pax_global_header00006660000000000000000000000064150776464670014540gustar00rootroot0000000000000052 comment=4028c3c38afb97c3e86dd8d77cdf48d86be3f11c starpu-1.4.9+dfsg/000077500000000000000000000000001507764646700140105ustar00rootroot00000000000000starpu-1.4.9+dfsg/.gitlab-ci.yml000066400000000000000000000101731507764646700164460ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # --- stages: - build - coverage - check - analyze - deploy - new-release - set-release - release default: interruptible: true .main_template: rules: - if: ($CI_PIPELINE_SOURCE != "push") && ($CI_PIPELINE_SOURCE != "trigger") # to download all the artifacts https://gitlab.inria.fr/starpu/starpu/-/jobs/artifacts/master/download?job=build # https://gitlab.inria.fr/starpu/starpu/-/jobs/artifacts/master/raw/starpu-1.3.99.tar.gz?job=build # https://gitlab.inria.fr/starpu/starpu/-/jobs/artifacts/master/raw/starpu.pdf?job=build # https://docs.gitlab.com/ee/ci/pipelines/job_artifacts.html#downloading-artifacts build: extends: .main_template stage: build image: registry.gitlab.inria.fr/starpu/starpu-docker/ci-debian12 tags: ['ci.inria.fr', 'linux', 'large'] script: - ./contrib/gitlab/build.sh artifacts: paths: - starpu*.tar.gz - starpu*.pdf .check_template: extends: .main_template stage: check needs: [build] dependencies: - build script: - ./contrib/gitlab/deploy.sh check_ci: extends: .check_template parallel: matrix: - NODE: [centos72amd64, macosx] tags: - ${NODE} script: - ./contrib/gitlab/deploy.sh check: extends: .check_template parallel: matrix: - NODE: [debian12, debian10, ubuntu1604, ubuntu1804, ubuntu2004] image: registry.gitlab.inria.fr/starpu/starpu-docker/ci-${NODE} tags: ['ci.inria.fr', 'linux', 'large'] check_simgrid: extends: .check_template image: registry.gitlab.inria.fr/starpu/starpu-docker/ci-debian10 tags: ['ci.inria.fr', 'linux', 'large'] script: - ./contrib/gitlab/simgrid.sh .analyze_template: extends: .main_template stage: analyze needs: [build] dependencies: - build analyze_coverity: extends: .analyze_template tags: - starpu - linux - coverity script: - ./contrib/gitlab/coverity.sh .deploy_template: extends: .main_template stage: deploy needs: [check] dependencies: - check deploy_chameleon: extends: .deploy_template image: registry.gitlab.inria.fr/solverstack/chameleon tags: ['ci.inria.fr', 'linux', 'large'] script: - ./contrib/gitlab/chameleon.sh new-release: stage: new-release only: - tags tags: - starpu-release script: - ./contrib/releases/new-release.sh timeout: 3h set-release: stage: set-release dependencies: - new-release only: - tags tags: - starpu-release script: - ./contrib/releases/set-release.sh artifacts: paths: - release-cli.txt release: stage: release image: registry.gitlab.com/gitlab-org/release-cli:latest dependencies: - set-release only: - tags tags: - ci.inria.fr script: - cat release-cli.txt - eval "$(cat release-cli.txt)" .coverage: extends: .main_template stage: coverage .coverage_artifacts: before_script: - rm -rf ./artifacts artifacts: when: always paths: - artifacts coverage: extends: [.coverage, .coverage_artifacts] timeout: 2h variables: RUNNER_SCRIPT_TIMEOUT: 118m RUNNER_AFTER_SCRIPT_TIMEOUT: 2m tags: ['starpu', 'node_gpu'] script: - ./contrib/gitlab/run_profile.sh coverage after_script: - ./contrib/gitlab/clean_profile.sh coverage coverage_update: extends: .coverage timeout: 2h variables: RUNNER_SCRIPT_TIMEOUT: 118m RUNNER_AFTER_SCRIPT_TIMEOUT: 2m dependencies: - coverage needs: - coverage tags: ['starpu', 'node_gpu'] script: - ./contrib/gitlab/upload.sh ./artifacts starpu-1.4.9+dfsg/AUTHORS000066400000000000000000000064531507764646700150700ustar00rootroot00000000000000Authors Augonnet Cédric, Université de Bordeaux, Aumage Olivier, Inria, Furmento Nathalie, CNRS, Thibault Samuel, Université de Bordeaux, Contributors Archipoff Simon, Université de Bordeaux, Beauchamp Guillaume, Inria, Bramas Berenger, Inria, Buttari Alfredo, Enseeiht, Cassagne Adrien, Inria, Clet-Ortega Jérôme, Inria, Cojean Terry, Université de Bordeaux, Collin Nicolas, Inria, Coti Camille, UQAM Danjean Vincent, University Grenoble Alpes, Denis Alexandre, Inria, Eyraud-Dubois Lionel, Inria, Flint Clément, Inria, Fuentes Mathis, ATOS, Guermouche Amina, Télécom SudParis, Guilbaud Adrien, Inria, He Kun, Inria, Henry Sylvain, Université de Bordeaux, Hugo Andra, Université de Bordeaux/Inria, Jego Antoine, Enseeiht, Juhoor Mehdi, Université de Bordeaux, Juven Alexis, Inria, Keryell-Even Maël, Inria, Khorsi Yanis, Inria, Kuhn Matthieu, ATOS, Lambert Thibaut, Inria, Leria Erwan, Université de Bordeaux, Lizé Benoît, Airbus, Lucas Gwenolé, University of Bordeaux, Makni Mariem, Inria, Nakov Stojce, Inria, Namyst Raymond, Université de Bordeaux, Nesi Lucas Leandro, Federal University of Rio Grande do Sul (UFRGS), Pablo Joris, Inria, Paillat Ludovic, Inria, Pasqualinotto Damien, Université de Bordeaux, Pinto Vinicius Garcia, Pitoiset Samuel, Inria, Point Gérald, CNRS, Quôc-Dinh Nguyen, IT Sud-Paris, Roelandt Cyril, Inria, Sakka Chiheb, Inria, Salingue Corentin, Inria, Schnorr Lucas Mello, Federal University of Rio Grande do Sul (UFRGS), Sergent Marc, CEA/Inria, Simonet Anthony, Université de Bordeaux, Stanisic Luka, Inria, Subervie Bérangère, Inria, Swartvagher Philippe, Inria, Tessier François, Université de Bordeaux, Videau Brice, University Grenoble Alpes, Villeveygoux Leo, Université de Bordeaux, Virouleau Philippe, Inria, Wacrenier Pierre-André, Université de Bordeaux, starpu-1.4.9+dfsg/COPYING.LGPL000066400000000000000000000636371507764646700156170ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! starpu-1.4.9+dfsg/ChangeLog000066400000000000000000002232411507764646700155660ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # StarPU 1.4.9 ============================================== Small changes: * Support STARPU_WORKERS_GETBIND in the case where only one thread per core is provided by the OS * When using STARPU_WORKERS_GETBIND, fix counting nusedpus * Fix each public API file by including starpu.h before the inclusion guard * Fix make install on systems with a dist-packages python directory StarPU 1.4.8 ============================================== Small features: * Add png curve generation to starpu_perfmodel_plot * Add STARPU_MPI_THREAD_MULTIPLE_SEND environment variable to enable parallel sending with MPI. * Add starpu_tag_clear * Add starpu_cublasLt_init/shutdown/get_local_handle helpers. StarPU 1.4.7 ============================================== Small changes: * Fix simgrid version of examples/mult StarPU 1.4.6 ============================================== Small features: * Add FXT option -use-task-color to propagate the specified task color to the contexts StarPU 1.4.5 ============================================== * Do not link libstarpu against libnvidia-ml StarPU 1.4.4 ============================================== Small changes: * Fix build system for StarPU Python interface StarPU 1.4.3 ============================================== Small features: * Add starpu_data_partition_readonly_downgrade_submit(). Small changes: * StarPUPY no longer requires python modules joblib and cloudpickle to be mandatory StarPU 1.4.2 ============================================== Small features: * New functions starpu_mpi_data_cpy() and starpu_mpi_recv_prio() * New functions starpu_bind_thread_on_worker(), starpu_bind_thread_on_main(), starpu_bind_thread_on_cpu(), and starpu_cpu_os_index() * New macro STARPU_CUSOLVER_REPORT_ERROR StarPU 1.4.1 ============================================== Small features: * Add starpu_mpi_tags_{allocate,free} functions to manage mpi data tags in distributed memory. Changes: * Fix StarPUPY when not using asyncio: we can use concurrent.futures instead. * Add STARPU_CODELET_PROFILING environment variable to disable codelet task counting for applications to be able to have const codelets. * In performance bounds, take into account the standard deviation to get the "expected" upper bound, in terms of expected optimistic deviation from the average, rather than the average. Small changes: * Fix function starpu_mpi_wait_for_all() * Fix building atomic functions with llvm on 32bit systems. * SOCL: Fix missing CL_CALLBACK for various callback functions * Update prologue function names for parallel workers StarPU 1.4.0 ============================================== New features: * Add a starpu_mpi_task_submit-oriented way of submitting MPI tasks with functions starpu_mpi_task_exchange_data_before_execution() and starpu_mpi_task_exchange_data_after_execution() * Possibility to specify different directories to store performance model files with new variable STARPU_PERF_MODEL_PATH * Checkpoint mechanism for MPI applications * Transaction support * OpenMP LLVM support * Driver for HIP-based GPUs. * Fault tolerance support with starpu_task_ft_failed(). * Julia programming interface. * Add get_max_size method to data interfaces for applications using data with variable size to express their maximal potential size. * New offline tool to draw graph showing elapsed time between sent or received data and their use by tasks * Add 4D tensor data interface. * New sched_tasks.rec trace file which monitors task scheduling push/pop actions * New STARPU_MPI_MEM_THROTTLE environment variable to throttle mpi submission according to memory use. * New number_events.data trace file which monitors number of events in trace files. This file can be parsed by the new script starpu_fxt_number_events_to_names.py to convert event keys to event names. * New STARPU_PER_WORKER perfmodel. * Add energy accounting in the simgrid mode: starpu_energy_use() and starpu_energy_used(). * New function starpu_mpi_get_thread_cpuid() to know where is bound the MPI thread. * New function starpu_get_pu_os_index() to convert logical index of a PU to its OS index. * New function starpu_get_hwloc_topology() to get the hwloc topology used by StarPU. * Add a task prefetch level, to improve retaining data in accelerators so we can make prefetch more aggressive. * Add starpu_data_dup_ro(). * Add starpu_data_release_to() and starpu_data_release_to_on_node(). * Add profiling based on papi performance counters. * Add an experimental python interface (not actually parallel yet) * Add task submission file+line in traces. * Add papi- and nvml-based energy measurement. * Add starpu_mpi_datatype_node_register and starpu_mpi_interface_datatype_node_register which will be needed for MPI/NUMA/GPUDirect. * Add peek_data interface method. * Add support of dynamic broadcasts when StarPU-MPI is used with NewMadeleine. * New STARPU_MPI_RECV_WAIT_FINALIZE environment variable to wait communication library completely releases the handle to unlock tasks (instead of just releasing the write lock). Only for NewMadeleine. * Add STARPU_MPI_REDUX * New StarPU Java Bindings * Add starpu_data_query_status2 function. * Add starpu_data_evict_from_node function. * Add a StarPU Eclipse Plugin * Add support for Maxeler FPGA accelerators. * Add 4D tensors filters. * Add n-dimension data interface and filters. * New STARPU_FXT_EVENTS environment variable to select at runtime which event categories has to be recorded. * Add support of mpi_sync_clocks for more precise distributed traces. * Add more worker states in STARPU_PROFILING: callback, waiting, scheduling. * Support for hierarchical tasks * Support mapping memory between CPU RAM and GPU RAM, instead of copying data. * New function starpu_get_memory_location_bitmap() and register in traces on which NUMA node are buffers used for MPI or tasks. * TCP/IP-based master-slave support. * Set STARPU_WORKERS_GETBIND to 1 by default, to inherit CPU binding from the job scheduler. * Add starpu_{vector,matrix,block,tensor,ndim}_filter_pick_variable. * New operator for data interfaces pack_meta(), unpack_meta() and free_meta() which are used in master slave mode for data interfaces with a dynamic content. * Add CUSOLVER support. * Add STARPU_NOFOOTPRINT data access flag. Small features: * New function starpu_mpi_comm_register() to store the size and the rank of the given communicator (update functions starpu_mpi_comm_rank() and starpu_mpi_comm_size() to no longer call directly the mpi functions) * New configure option --with-check-cflags to define flags for C, CXX and Fortran compilers * FxT is now automatically enabled at build-time, but not enabled at run-time by default any more, STARPU_FXT_TRACE needs to be explicitly set to 1 to enable FxT trace recording. * Deprecate starpu_free() and add new function starpu_free_noflag() to specify allocated size. * Reuse matrix tiles that have different shapes but same allocation size. * Add starpu_task_create_sync * Add ram_colind/rowptr to csr and bcsr data interfaces. This allows to make starpu_bcsr_filter_vertical_block work on several memory nodes. * Add cuda0 and cuda1 example drivers. * New STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK environment variable to tune transfer estimation times. * Add tool starpu_config to display the configuration StarPU was compiled with * Possibility to enable data locality enforcement when choosing a worker to run a task implementation * New function starpu_data_partition_clean_node() to specify node on which to gather data * Move to the public API some scheduler utility functions * New variable STARPU_SCHED_LIB to dynamically load a new scheduling policy * Enable GPUDirect when MPI supports it. * Install a module file in lib/modules * New function starpu_worker_wait_for_initialisation() which waits for all workers to be initialised * Add in the public API the codelet starpu_codelet_nop which has an empty function defined for all drivers * Add starpu_task_expected_length_average and starpu_task_expected_energy_average. * Add STARPU_SIMGRID_TASK_PUSH_COST environment variable. * Add starpu_memory_nodes_get_count_by_kind and starpu_memory_node_get_ids_by_type. * Add STARPU_MPI_REDUX_ARITY_THRESHOLD to tune the type of tree used in distributed-memory reduction patterns that are automatically detected. * New function starpu_data_set_reduction_methods_with_args() to specify arguments to pass to the reduction and init tasks Changes: * The redux codelet should expose the STARPU_COMMUTE flag, since StarPU actually uses commutability. * Rename STARPU_COMM_STATS environment variable to STARPU_MPI_STATS * Function starpu_data_lookup has been removed, it is now up to the calling code to manage a ptr-to-handle reverse lookup table when needed. * Cluster is renamed in parallel worker but keep the old API as deprecated * Removed pop_every_task scheduler method, unused since long. Small changes: * starpu_mpi_task_insert() returns -ENODEV if no worker is available on the node which is to execute the codelet (the other nodes do not return -ENODEV) * Add a synthetic energy efficiency testcase. * Make reduction methods want the commute flag. * Delete old MIC driver code * Rename - starpu_conf::sched_policy_init to starpu_conf::sched_policy_callback and - starpu_sched_ctx_get_sched_policy_init() to starpu_sched_ctx_get_sched_policy_callback() as the callback function may not only be used for init purposes * Change the default value for configure option --enable-maxcpus to auto. it allows StarPU to automatically use the number of CPUs on the build machine. * New option --worker for tool starpu_machine_display to only display workers of a specific type * Remove the unused and untested mpi_ms_funcs field. * The home_node parameter of the register_data_handle method is turned from unsigned to int, to explicit that it may be -1. * Value 0 for STARPU_MPI_NDETACHED_SEND and STARPU_MPI_NREADY_PROCESS will now disable their behaviour. * Distributed-memory reduction patterns are automatically wrapped-up if the user do not call starpu_mpi_redux_data() * Remove starpu_data_pointer_is_inside(). StarPU 1.3.12 ==================================================================== Small changes: * Add starpu_data_deinitialize and starpu_data_deinitialize_submit StarPU 1.3.11 ==================================================================== Small changes: * Fix building with cuda 12 StarPU 1.3.10 ==================================================================== Small features: * Add starpu_worker_get_current_task_exp_end. Small changes: * Change the default value for configure option --enable-maxcpus to auto. it allows StarPU to automatically use the number of CPUs on the build machine. StarPU 1.3.9 ==================================================================== Small changes: * Add missing interface macros for BCSR data interface StarPU 1.3.8 ==================================================================== Small features: * A codelet can now define a callback function pointer which will be automatically called when the task does not define itself a callback function, in that case, it can still be called from the task callback function. * New STARPU_WORKERS_COREID, STARPU_MAIN_THREAD_COREID and STARPU_MPI_THREAD_COREID environment variables to bind threads to cores instead of hyperthreads. * New STARPU_TASK_PROGRESS environment variable to show task progression. * Add STARPU_SIMGRID environment variable guard against native builds. * Add starpu_cuda_get_nvmldev function. * New configure option --with-check-cflags to define flags for C, CXX and Fortran compilers * Add starpu_sched_tree_deinitialize function. * Add STARPU_SCHED_SORTED_ABOVE and STARPU_SCHED_SORTED_BELOW environment variables. * Add STARPU_SCHED_SIMPLE_PRE_DECISION. * Add starpu_bcsr_filter_canonical_block_get_nchildren. * Add unregister_data_handle handle ops. StarPU 1.3.7 ==================================================================== Small changes: * Simgrid: bug fix for setting network/weight-S to 0.0 StarPU 1.3.6 (git revision fb9fbed81410d9f0ebbff5bdad1352df4705efe8) ==================================================================== Small features: * New STARPU_BACKOFF_MIN and STARPU_BACKOFF_MAX environment variables to the exponential backoff limits of the number of cycles to pause while drivers are spinning. * Add STARPU_DISPLAY_BINDINGS environment variable and starpu_display_bindings() function to display all bindings on the machine by calling hwloc-ps * New function starpu_get_pu_os_index() to convert logical index of a PU to its OS index. * New function starpu_get_hwloc_topology() to get the hwloc topology used by StarPU. StarPU 1.3.5 (git revision 5f7458799f548026fab357b18541bb462dde2b53) ==================================================================== Small features: * New environment variable STARPU_FXT_SUFFIX to set the filename in which to save the fxt trace * New option -d for starpu_fxt_tool to specify in which directory to generate files Small changes: * Move MPI cache functions into the public API * Add STARPU_MPI_NOBIND environment variable. StarPU 1.3.4 (git revision c37a5d024cd997596da41f765557c58099baf896) ==================================================================== Small features: * New environment variables STARPU_BUS_STATS_FILE and STARPU_WORKER_STATS_FILE to specify files in which to display statistics about data transfers and workers. * Add starpu_bcsr_filter_vertical_block filtering function. * Add starpu_interface_copy2d, 3d, and 4d to easily request data copies from data interfaces. * Move optimized cuda 2d copy from interfaces to new starpu_cuda_copy2d_async_sync and starpu_cuda_copy3d_async_sync, and use them from starpu_interface_copy2d and 3d. * New function starpu_task_watchdog_set_hook to specify a function to be called when the watchdog is raised * Add STARPU_LIMIT_CPU_NUMA_MEM environment variable. * Add STARPU_WORKERS_GETBIND environment variable. * Add STARPU_SCHED_SIMPLE_DECIDE_ALWAYS modular scheduler flag. * And STARPU_LIMIT_BANDWIDTH environment variable. * Add field starpu_conf::precedence_over_environment_variables to ignore environment variables when parameters are set directly in starpu_conf * Add starpu_data_get_coordinates_array * MPI: new functions starpu_mpi_interface_datatype_register() and starpu_mpi_interface_datatype_unregister() which take a enum starpu_data_interface_id instead of a starpu_data_handle_t * New script starpu_env to set up StarPU environment variables Small changes: * New configure option --disable-build-doc-pdf StarPU 1.3.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad) ==================================================================== New features: * New semantic for starpu_task_insert() and alike parameters STARPU_CALLBACK_ARG, STARPU_PROLOGUE_CALLBACK_ARG, and STARPU_PROLOGUE_CALLBACK_POP_ARG which set respectively starpu_task::callback_arg_free, starpu_task::prologue_callback_arg_free and starpu_task::prologue_callback_pop_arg_free to 1 when used. New parameters STARPU_CALLBACK_ARG_NFREE, STARPU_CALLBACK_WITH_ARG_NFREE, STARPU_PROLOGUE_CALLBACK_ARG_NFREE, and STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE which set the corresponding fields of starpu_task to 0. * starpufft: Support 3D. * New modular-eager-prio scheduler. * Add 'ready' heuristic to modular schedulers. * New modular-heteroprio scheduler. * Add STARPU_TASK_SCHED_DATA * Add support for staging schedulers. * New modular-heteroprio-heft scheduler. * New dmdap "data-aware performance model (priority)" scheduler Changes: * Modification in the Native Fortran interface of the functions fstarpu_mpi_task_insert, fstarpu_mpi_task_build and fstarpu_mpi_task_post_build to only take 1 parameter being the MPI communicator, the codelet and the various parameters for the task. Small features: * New starpu_task_insert() and alike parameter STARPU_TASK_WORKERIDS allowing to set the fields starpu_task::workerids_len and starpu_task::workerids * New starpu_task_insert() and alike parameters STARPU_SEQUENTIAL_CONSISTENCY, STARPU_TASK_NO_SUBMITORDER and STARPU_TASK_PROFILING_INFO * New function starpu_create_callback_task() which creates and submits an empty task with the specified callback * Use the S4U interface of Simgrid instead of xbt and MSG. Small changes: * Default modular worker queues to 2 tasks unless it's an heft scheduler * Separate out STATUS_SLEEPING_SCHEDULING state from STATUS_SLEEPING state When running the scheduler while being idle, workers do not go in the STATUS_SCHEDULING state, so that that time is considered as idle time instead of overhead. StarPU 1.3.2 (git revision af22a20fc00a37addf3cc6506305f89feed940b0) ==================================================================== Small changes: * Improve OpenMP support to detect the environment is valid before launching OpenMP * Delete old code (drivers gordon, scc, starpu-top, and plugin gcc) and update authors file accordingly * Add Heteroprio documentation (including a simple example) * Add a progression hook, to be called when workers are idle, which is used in the NewMadeleine implementation of StarPU-MPI to ensure communications progress. StarPU 1.3.1 (git revision 01949488b4f8e6fe26d2c200293b8aae5876b038) ==================================================================== Small features: * Add starpu_filter_nparts_compute_chunk_size_and_offset helper. * Add starpu_bcsr_filter_canonical_block_child_ops. Small changes: * Improve detection of NVML availability. Do not only check the library is available, also check the compiled code can be run. StarPU 1.3.0 (git revision 24ca83c6dbb102e1cfc41db3bb21c49662067062) ==================================================================== New features: * New scheduler 'heteroprio' with heterogeneous priorities * Support priorities for data transfers. * Add support for multiple linear regression performance models - Bump performance model file format version to 45. * Add MPI Master-Slave support to use the cores of remote nodes. Use the --enable-mpi-master-slave option to activate it. * Add STARPU_CUDA_THREAD_PER_DEV environment variable to support driving all GPUs from only one thread when almost all kernels are asynchronous. * Add starpu_replay tool to replay tasks.rec files with Simgrid. * Add experimental support of NUMA nodes. Use STARPU_USE_NUMA to activate it. * Add a new set of functions to make Out-of-Core based on HDF5 Library. * Add a new implementation of StarPU-MPI on top of NewMadeleine * Add optional callbacks to notify an external resource manager about workers going to sleep and waking up * Add implicit support for asynchronous partition planning. This means one does not need to call starpu_data_partition_submit() etc. explicitly any more, StarPU will make the appropriate calls as needed. * Add starpu_task_notify_ready_soon_register() to be notified when it is determined when a task will be ready an estimated amount of time from now. * New StarPU-MPI initialization function (starpu_mpi_init_conf()) which allows StarPU-MPI to manage reserving a core for the MPI thread, or merging it with CPU driver 0. * Add possibility to delay the termination of a task with the functions starpu_task_end_dep_add() which specifies the number of calls to the function starpu_task_end_dep_release() needed to trigger the task termination, or with starpu_task_declare_end_deps_array() and starpu_task_declare_end_deps() to just declare termination dependencies between tasks. * Add possibility to define the sequential consistency at the task level for each handle used by the task. * Add STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU, and STARPU_SPECIFIC_NODE_SLOW as generic values for codelet specific memory nodes which can be used instead of exact node numbers. * Add starpu_get_next_bindid() and starpu_bind_thread_on() to allow binding an application-started thread on a free core. Use it in StarPU-MPI to automatically bind the MPI thread on an available core. * Add STARPU_RESERVE_NCPU environment variable and starpu_config::reserve_ncpus field to make StarPU use a few cores less. * Add STARPU_MAIN_THREAD_BIND environment variable to make StarPU reserve a core for the main thread. * New StarPU-RM resource management module to share processor cores and accelerator devices with other parallel runtime systems. Use --enable-starpurm option to activate it. * New schedulers modular-gemm, modular-pheft, modular-prandom and modular-prandom-prio * Add STARPU_MATRIX_SET_NX/NY/LD and STARPU_VECTOR_SET_NX to change a matrix tile or vector size without reallocating the buffer. * Application can change the allocation used by StarPU with starpu_malloc_set_hooks() * XML output for starpu_perfmodel_display and starpu_perfmodel_dump_xml() function Small features: * Scheduling contexts may now be associated a user data pointer at creation time, that can later be recalled through starpu_sched_ctx_get_user_data(). * New environment variables STARPU_SIMGRID_TASK_SUBMIT_COST and STARPU_SIMGRID_FETCHING_INPUT_COST to simulate the cost of task submission and data fetching in simgrid mode. This provides more accurate simgrid predictions, especially for the beginning of the execution and regarding data transfers. * New environment variable STARPU_SIMGRID_SCHED_COST to take into account the time to perform scheduling when running in SimGrid mode. * New configure option --enable-mpi-pedantic-isend (disabled by default) to acquire data in STARPU_RW (instead of STARPU_R) before performing MPI_Isend() call * New function starpu_worker_display_names() to display the names of all the workers of a specified type. * Arbiters now support concurrent read access. * Add a field starpu_task::where similar to starpu_codelet::where which allows to restrict where to execute a task. Also add STARPU_TASK_WHERE to be used when calling starpu_task_insert(). * Add SubmitOrder trace field. * Add workerids and workerids_len task fields. * Add priority management to StarPU-MPI. Can be disabled with the STARPU_MPI_PRIORITIES environment variable. * Add STARPU_MAIN_THREAD_CPUID and STARPU_MPI_THREAD_CPUID environment variables. * Add disk to disk copy functions and support asynchronous full read/write in disk backends. * New starpu_task_insert() parameter STARPU_CL_ARGS_NFREE which allows to set codelet parameters but without freeing them. * New starpu_task_insert() parameter STARPU_TASK_DEPS_ARRAY which allows to declare task dependencies similarly to starpu_task_declare_deps_array() * Add dependency backward information in debugging mode for gdb's starpu-print-task * Add sched_data field in starpu_task structure. * New starpu_fxt_tool option -label-deps to label dependencies on the output graph * New environment variable STARPU_GENERATE_TRACE_OPTIONS to specify fxt options (to be used with STARPU_GENERATE_TRACE) * New function starpu_task_set() similar as starpu_task_build() but with a task object given as the first parameter * New functions starpu_data_partition_submit_sequential_consistency() and starpu_data_unpartition_submit_sequential_consistency() * Add a new value STARPU_TASK_SYNCHRONOUS to be used in starpu_task_insert() to define if the task is (or not) synchronous * Add memory states events in the traces. * Add starpu_sched_component_estimated_end_min_add() to fix termination estimations in modular schedulers. * New function starpu_data_partition_not_automatic() to disable the automatic partitioning of a data handle for which a asynchronous plan has previously been submitted * Add starpu_task_declare_deps() * New function starpu_data_unpartition_submit_sequential_consistency_cb() to specify a callback for the task submitting the unpartitioning * New tool starpu_mpi_comm_trace.py to draw heatmap of MPI communications * Support for ARM performance libraries * Add functionality to disable signal catching either through field starpu_conf::catch_signals or through the environment variable STARPU_CATCH_SIGNALS * Support for OpenMP Taskloop directive * Optional data interface init function (used by the vector and matrix interfaces) Changes: * Vastly improve simgrid simulation time. * Switch default scheduler to lws. * Add "to" parameter to pull_task and can_push methods of components. * Deprecate starpu_data_interface_ops::handle_to_pointer interface operation in favor of new starpu_data_interface_ops::to_pointer operation. * Sort data access requests by priority. * Cluster support is disabled by default, unless the configure option --enable-cluster is specified * For unpack operations, move the memory deallocation from starpu_data_unpack() to the interface function starpu_data_interface_ops::unpack_data(). Pack and unpack functions of predefined interfaces use public API starpu_malloc_on_node_flags() and starpu_free_on_node_flags() to allocate and de-allocate memory Small changes: * Use asynchronous transfers for task data fetches with were not prefetched. * Allow to call starpu_sched_ctx_set_policy_data on the main scheduler context * Function starpu_is_initialized() is moved to the public API. * Fix code to allow to submit tasks to empty contexts * STARPU_COMM_STATS also displays the bandwidth * Update data interfaces implementations to only use public API StarPU 1.2.11 (git revision xxx) ==================================================================== Small features: * Add starpu_tag_notify_restart_from_apps(). StarPU 1.2.10 (git revision beb6ac9cc07dc9ae1c838a38d11ed2dae3775996) ==================================================================== Small features: * New script starpu_env to set up StarPU environment variables * New configure option --disable-build-doc-pdf StarPU 1.2.9 (git revision 3aca8da3138a99e93d7f93905d2543bd6f1ea1df) ==================================================================== Small changes: * Add STARPU_SIMGRID_TRANSFER_COST environment variable to easily disable data transfer costs. * New dmdap "data-aware performance model (priority)" scheduler * Modification in the Native Fortran interface of the functions fstarpu_mpi_task_insert, fstarpu_mpi_task_build and fstarpu_mpi_task_post_build to only take 1 parameter being the MPI communicator, the codelet and the various parameters for the task. StarPU 1.2.8 (git revision f66374c9ad39aefb7cf5dfc31f9ab3d756bcdc3c) ==================================================================== Small features: * Minor fixes StarPU 1.2.7 (git revision 07cb7533c22958a76351bec002955f0e2818c530) ==================================================================== Small features: * Add STARPU_HWLOC_INPUT environment variable to save initialization time. * Add starpu_data_set/get_ooc_flag. * Use starpu_mpi_tag_t (int64_t) for MPI communication tag StarPU 1.2.6 (git revision 23049adea01837479f309a75c002dacd16eb34ad) ==================================================================== Small changes: * Fix crash for lws scheduler * Avoid making hwloc load PCI topology when CUDA is not enabled StarPU 1.2.5 (git revision 22f32916916d158e3420033aa160854d1dd341bd) ==================================================================== Small features: * Add a new value STARPU_TASK_COLOR to be used in starpu_task_insert() to pick up the color of a task in dag.dot * Add starpu_data_pointer_is_inside(). Changes: * Do not export -lcuda -lcudart -lOpenCL in *starpu*.pc. StarPU 1.2.4 (git revision 255cf98175ef462749780f30bfed21452b74b594) ==================================================================== Small features: * Catch of signals SIGINT and SIGSEGV to dump fxt trace files. * New configure option --disable-icc to disable the compilation of specific ICC examples * Add starpu_codelet_pack_arg_init, starpu_codelet_pack_arg, starpu_codelet_pack_arg_fini for more fine-grain packing capabilities. * Add starpu_task_insert_data_make_room, starpu_task_insert_data_process_arg, starpu_task_insert_data_process_array_arg, starpu_task_insert_data_process_mode_array_arg * Do not show internal tasks in fxt dag by default. Allow to hide acquisitions too. * Add a way to choose the dag.dot colors. StarPU 1.2.3 (git revision 586ba6452a8eef99f275c891ce08933ae542c6c2) ==================================================================== New features: * Add per-node MPI data. Small features: * When debug is enabled, starpu data accessors first check the validity of the data interface type * Print disk bus performances when STARPU_BUS_STATS is set * Add starpu_vector_filter_list_long filter. * Data interfaces now define a name through the struct starpu_data_interface_ops * StarPU-MPI : - allow predefined data interface not to define a mpi datatype and to be exchanged through pack/unpack operations - New function starpu_mpi_comm_get_attr() which allows to return the value of the attribute STARPU_MPI_TAG_UB, i.e the upper bound for tag value. - New configure option enable-mpi-verbose to manage the display of extra MPI debug messages. * Add STARPU_WATCHDOG_DELAY environment variable. * Add a 'waiting' worker status * Allow new value 'extra' for configure option --enable-verbose Small changes: * Add data_unregister event in traces * StarPU-MPI - push detached requests at the back of the testing list, so they are tested last since they will most probably finish latest * Automatically initialize handles on data acquisition when reduction methods are provided, and make sure a handle is initialized before trying to read it. StarPU 1.2.2 (git revision a0b01437b7b91f33fb3ca36bdea35271cad34464) =================================================================== New features: * Add starpu_data_acquire_try and starpu_data_acquire_on_node_try. * Add NVCC_CC environment variable. * Add -no-flops and -no-events options to starpu_fxt_tool to make traces lighter * Add starpu_cusparse_init/shutdown/get_local_handle for proper CUDA overlapping with cusparse. * Allow precise debugging by setting STARPU_TASK_BREAK_ON_PUSH, STARPU_TASK_BREAK_ON_SCHED, STARPU_TASK_BREAK_ON_POP, and STARPU_TASK_BREAK_ON_EXEC environment variables, with the job_id of a task. StarPU will raise SIGTRAP when the task is being scheduled, pushed, or popped by the scheduler. Small features: * New function starpu_worker_get_job_id(struct starpu_task *task) which returns the job identifier for a given task * Show package/numa topology in starpu_machine_display * MPI: Add mpi communications in dag.dot * Add STARPU_PERF_MODEL_HOMOGENEOUS_CPU environment variable to allow having one perfmodel per CPU core * Add starpu_perfmodel_arch_comb_fetch function. * Add starpu_mpi_get_data_on_all_nodes_detached function. Small changes: * Output generated through STARPU_MPI_COMM has been modified to allow easier automated checking * MPI: Fix reactivity of the beginning of the application, when a lot of ready requests have to be processed at the same time, we want to poll the pending requests from time to time. * MPI: Fix gantt chart for starpu_mpi_irecv: it should use the termination time of the request, not the submission time. * MPI: Modify output generated through STARPU_MPI_COMM to allow easier automated checking * MPI: enable more tests in simgrid mode * Use assumed-size instead of assumed-shape arrays for native fortran API, for better backward compatibility. * Fix odd ordering of CPU workers on CPUs due to GPUs stealing some cores StarPU 1.2.1 (git revision 473acaec8a1fb4f4c73d8b868e4f044b736b41ea) ==================================================================== New features: * Add starpu_fxt_trace_user_event_string. * Add starpu_tasks_rec_complete tool to add estimation times in tasks.rec files. * Add STARPU_FXT_TRACE environment variable. * Add starpu_data_set_user_data and starpu_data_get_user_data. * Add STARPU_MPI_FAKE_SIZE and STARPU_MPI_FAKE_RANK to allow simulating execution of just one MPI node. * Add STARPU_PERF_MODEL_HOMOGENEOUS_CUDA/OPENCL/MIC/SCC to share performance models between devices, making calibration much faster. * Add modular-heft-prio scheduler. * Add starpu_cublas_get_local_handle helper. * Add starpu_data_set_name, starpu_data_set_coordinates_array, and starpu_data_set_coordinates to describe data, and starpu_iteration_push and starpu_iteration_pop to describe tasks, for better offline traces analysis. * New function starpu_bus_print_filenames() to display filenames storing bandwidth/affinity/latency information, available through tools/starpu_machine_display -i * Add support for Ayudame version 2.x debugging library. * Add starpu_sched_ctx_get_workers_list_raw, much less costly than starpu_sched_ctx_get_workers_list * Add starpu_task_get_name and use it to warn about dmda etc. using a dumb policy when calibration is not finished * MPI: Add functions to test for cached values Changes: * Fix performance regression of lws for small tasks. * Improve native Fortran support for StarPU Small changes: * Fix type of data home node to allow users to pass -1 to define temporary data * Fix compatibility with simgrid 3.14 StarPU 1.2.0 (git revision 5a86e9b61cd01b7797e18956283cc6ea22adfe11) ==================================================================== New features: * MIC Xeon Phi support * SCC support * New function starpu_sched_ctx_exec_parallel_code to execute a parallel code on the workers of the given scheduler context * MPI: - New internal communication system : a unique tag called is now used for all communications, and a system of hashmaps on each node which stores pending receives has been implemented. Every message is now coupled with an envelope, sent before the corresponding data, which allows the receiver to allocate data correctly, and to submit the matching receive of the envelope. - New function starpu_mpi_irecv_detached_sequential_consistency which allows to enable or disable the sequential consistency for the given data handle (sequential consistency will be enabled or disabled based on the value of the function parameter and the value of the sequential consistency defined for the given data) - New functions starpu_mpi_task_build() and starpu_mpi_task_post_build() - New flag STARPU_NODE_SELECTION_POLICY to specify a policy for selecting a node to execute the codelet when several nodes own data in W mode. - New selection node policies can be un/registered with the functions starpu_mpi_node_selection_register_policy() and starpu_mpi_node_selection_unregister_policy() - New environment variable STARPU_MPI_COMM which enables basic tracing of communications. - New function starpu_mpi_init_comm() which allows to specify a MPI communicator. * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to let starpu commute write accesses. * Out-of-core support, through registration of disk areas as additional memory nodes. It can be enabled programmatically or through the STARPU_DISK_SWAP* environment variables. * Reclaiming is now periodically done before memory becomes full. This can be controlled through the STARPU_*_AVAILABLE_MEM environment variables. * New hierarchical schedulers which allow the user to easily build its own scheduler, by coding itself each "box" it wants, or by combining existing boxes in StarPU to build it. Hierarchical schedulers have very interesting scalability properties. * Add STARPU_CUDA_ASYNC and STARPU_OPENCL_ASYNC flags to allow asynchronous CUDA and OpenCL kernel execution. * Add STARPU_CUDA_PIPELINE and STARPU_OPENCL_PIPELINE to specify how many asynchronous tasks are submitted in advance on CUDA and OpenCL devices. Setting the value to 0 forces a synchronous execution of all tasks. * Add CUDA concurrent kernel execution support through the STARPU_NWORKER_PER_CUDA environment variable. * Add CUDA and OpenCL kernel submission pipelining, to overlap costs and allow concurrent kernel execution on Fermi cards. * New locality work stealing scheduler (lws). * Add STARPU_VARIABLE_NBUFFERS to be set in cl.nbuffers, and nbuffers and modes field to the task structure, which permit to define codelets taking a variable number of data. * Add support for implementing OpenMP runtimes on top of StarPU * New performance model format to better represent parallel tasks. Used to provide estimations for the execution times of the parallel tasks on scheduling contexts or combined workers. * starpu_data_idle_prefetch_on_node and starpu_idle_prefetch_task_input_on_node allow to queue prefetches to be done only when the bus is idle. * Make starpu_data_prefetch_on_node not forcibly flush data out, introduce starpu_data_fetch_on_node for that. * Add data access arbiters, to improve parallelism of concurrent data accesses, notably with STARPU_COMMUTE. * Anticipative writeback, to flush dirty data asynchronously before the GPU device is full. Disabled by default. Use STARPU_MINIMUM_CLEAN_BUFFERS and STARPU_TARGET_CLEAN_BUFFERS to enable it. * Add starpu_data_wont_use to advise that a piece of data will not be used in the close future. * Enable anticipative writeback by default. * New scheduler 'dmdasd' that considers priority when deciding on which worker to schedule * Add the capability to define specific MPI datatypes for StarPU user-defined interfaces. * Add tasks.rec trace output to make scheduling analysis easier. * Add Fortran 90 module and example using it * New StarPU-MPI gdb debug functions * Generate animated html trace of modular schedulers. * Add asynchronous partition planning. It only supports coherency through the home node of data for now. * Add STARPU_MALLOC_SIMULATION_FOLDED flag to save memory when simulating. * Include application threads in the trace. * Add starpu_task_get_task_scheduled_succs to get successors of a task. * Add graph inspection facility for schedulers. * New STARPU_LOCALITY flag to mark data which should be taken into account by schedulers for improving locality. * Experimental support for data locality in ws and lws. * Add a preliminary framework for native Fortran support for StarPU Small features: * Tasks can now have a name (via the field const char *name of struct starpu_task) * New functions starpu_data_acquire_cb_sequential_consistency() and starpu_data_acquire_on_node_cb_sequential_consistency() which allows to enable or disable sequential consistency * New configure option --enable-fxt-lock which enables additional trace events focused on locks behaviour during the execution * Functions starpu_insert_task and starpu_mpi_insert_task are renamed in starpu_task_insert and starpu_mpi_task_insert. Old names are kept to avoid breaking old codes. * New configure option --enable-calibration-heuristic which allows the user to set the maximum authorized deviation of the history-based calibrator. * Allow application to provide the task footprint itself. * New function starpu_sched_ctx_display_workers() to display worker information belonging to a given scheduler context * The option --enable-verbose can be called with --enable-verbose=extra to increase the verbosity * Add codelet size, footprint and tag id in the paje trace. * Add STARPU_TAG_ONLY, to specify a tag for traces without making StarPU manage the tag. * On Linux x86, spinlocks now block after a hundred tries. This avoids typical 10ms pauses when the application thread tries to submit tasks. * New function char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type) * Improve static scheduling by adding support for specifying the task execution order. * Add starpu_worker_can_execute_task_impl and starpu_worker_can_execute_task_first_impl to optimize getting the working implementations * Add STARPU_MALLOC_NORECLAIM flag to allocate without running a reclaim if the node is out of memory. * New flag STARPU_DATA_MODE_ARRAY for the function family starpu_task_insert to allow to define a array of data handles along with their access modes. * New configure option --enable-new-check to enable new testcases which are known to fail * Add starpu_memory_allocate and _deallocate to let the application declare its own allocation to the reclaiming engine. * Add STARPU_SIMGRID_CUDA_MALLOC_COST and STARPU_SIMGRID_CUDA_QUEUE_COST to disable CUDA costs simulation in simgrid mode. * Add starpu_task_get_task_succs to get the list of children of a given task. * Add starpu_malloc_on_node_flags, starpu_free_on_node_flags, and starpu_malloc_on_node_set_default_flags to control the allocation flags used for allocations done by starpu. * Ranges can be provided in STARPU_WORKERS_CPUID * Add starpu_fxt_autostart_profiling to be able to avoid autostart. * Add arch_cost_function perfmodel function field. * Add STARPU_TASK_BREAK_ON_SCHED, STARPU_TASK_BREAK_ON_PUSH, and STARPU_TASK_BREAK_ON_POP environment variables to debug schedulers. * Add starpu_sched_display tool. * Add starpu_memory_pin and starpu_memory_unpin to pin memory allocated another way than starpu_malloc. * Add STARPU_NOWHERE to create synchronization tasks with data. * Document how to switch between different views of the same data. * Add STARPU_NAME to specify a task name from a starpu_task_insert call. * Add configure option to disable fortran --disable-fortran * Add configure option to give path for smpirun executable --with-smpirun * Add configure option to disable the build of tests --disable-build-tests * Add starpu-all-tasks debugging support * New function void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source) which allocates the pointers located_file_name, located_dir_name and opencl_program_source. * Add submit_hook and do_schedule scheduler methods. * Add starpu_sleep. * Add starpu_task_list_ismember. * Add _starpu_fifo_pop_this_task. * Add STARPU_MAX_MEMORY_USE environment variable. * Add starpu_worker_get_id_check(). * New function starpu_mpi_wait_for_all(MPI_Comm comm) that allows to wait until all StarPU tasks and communications for the given communicator are completed. * New function starpu_codelet_unpack_args_and_copyleft() which allows to copy in a new buffer values which have not been unpacked by the current call * Add STARPU_CODELET_SIMGRID_EXECUTE flag. * Add STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT flag. * Add STARPU_CL_ARGS flag to starpu_task_insert() and starpu_mpi_task_insert() functions call Changes: * Data interfaces (variable, vector, matrix and block) now define pack und unpack functions * StarPU-MPI: Fix for being able to receive data which have not yet been registered by the application (i.e it did not call starpu_data_set_tag(), data are received as a raw memory) * StarPU-MPI: Fix for being able to receive data with the same tag from several nodes (see mpi/tests/gather.c) * Remove the long-deprecated cost_model fields and task->buffers field. * Fix complexity of implicit task/data dependency, from quadratic to linear. Small changes: * Rename function starpu_trace_user_event() as starpu_fxt_trace_user_event() * "power" is renamed into "energy" wherever it applies, notably energy consumption performance models * Update starpu_task_build() to set starpu_task::cl_arg_free to 1 if some arguments of type ::STARPU_VALUE are given. * Simplify performance model loading API * Better semantic for environment variables STARPU_NMIC and STARPU_NMICDEVS, the number of devices and the number of cores. STARPU_NMIC will be the number of devices, and STARPU_NMICCORES will be the number of cores per device. StarPU 1.1.8 (git revision f7b7abe9f86361cbc96f2b51c6ad7336b7d1d628) ==================================================================== The scheduling context release Small changes: * Fix compatibility with simgrid 3.14 * Fix lock ordering for memory reclaiming StarPU 1.1.7 (git revision 341044b67809892cf4a388e482766beb50256907) ==================================================================== The scheduling context release Small changes: * Fix type of data home node to allow users to pass -1 to define temporary data StarPU 1.1.6 (git revision cdffbd5f5447e4d076d659232b3deb14f3c20da6) ==================================================================== The scheduling context release Small features: * Add starpu_task_get_task_succs to get the list of children of a given task. * Ranges can be provided in STARPU_WORKERS_CPUID Small changes: * Various fixes for MacOS and windows systems StarPU 1.1.5 (git revision 20469c6f3e7ecd6c0568c8e4e4b5b652598308d8xxx) ======================================================================= The scheduling context release New features: * Add starpu_memory_pin and starpu_memory_unpin to pin memory allocated another way than starpu_malloc. * Add starpu_task_wait_for_n_submitted() and STARPU_LIMIT_MAX_NSUBMITTED_TASKS/STARPU_LIMIT_MIN_NSUBMITTED_TASKS to easily control the number of submitted tasks by making task submission block. * Add STARPU_NOWHERE to create synchronization tasks with data. * Document how to switch between different views of the same data. * Add Fortran 90 module and example using it StarPU 1.1.4 (git revision 2a3d30b28d6d099d271134a786335acdbb3931a3) ==================================================================== The scheduling context release New features: * Fix and actually enable the cache allocation. * Enable allocation cache in main RAM when STARPU_LIMIT_CPU_MEM is set by the user. * New MPI functions starpu_mpi_issend and starpu_mpi_issend_detached to send data using a synchronous and non-blocking mode (internally uses MPI_Issend) * New data access mode flag STARPU_SSEND to be set when calling starpu_mpi_insert_task to specify the data has to be sent using a synchronous and non-blocking mode * New environment variable STARPU_PERF_MODEL_DIR which can be set to specify a directory where to store performance model files in. When unset, the files are stored in $STARPU_HOME/.starpu/sampling * MPI: - New function starpu_mpi_data_register_comm to register a data with another communicator than MPI_COMM_WORLD - New functions starpu_mpi_data_set_rank() and starpu_mpi_data_set_tag() which call starpu_mpi_data_register_comm() Small features: * Add starpu_memory_wait_available() to wait for a given size to become available on a given node. * New environment variable STARPU_RAND_SEED to set the seed used for random numbers. * New function starpu_mpi_cache_set() to enable or disable the communication cache at runtime * Add starpu_paje_sort which sorts Pajé traces. Changes: * Fix complexity of implicit task/data dependency, from quadratic to linear. StarPU 1.1.3 (git revision 11afc5b007fe1ab1c729b55b47a5a98ef7f3cfad) ==================================================================== The scheduling context release New features: * One can register an existing on-GPU buffer to be used by a handle. * Add the starpu_paje_summary statistics tool. * Enable gpu-gpu transfers for matrices. * Let interfaces declare which transfers they allow with the can_copy method. Small changes: * Lock performance model files while writing and reading them to avoid issues on parallel launches, MPI runs notably. * Lots of build fixes for icc on Windows. StarPU 1.1.2 (git revision d14c550798630bbc4f3da2b07d793c47e3018f02) ==================================================================== The scheduling context release New features: * The reduction init codelet is automatically used to initialize temporary buffers. * Traces now include a "scheduling" state, to show the overhead of the scheduler. * Add STARPU_CALIBRATE_MINIMUM environment variable to specify the minimum number of calibration measurements. * Add STARPU_TRACE_BUFFER_SIZE environment variable to specify the size of the trace buffer. StarPU 1.1.1 (git revision dab2e51117fac5bef767f3a6b7677abb2147d2f2) ==================================================================== The scheduling context release New features: * MPI: - New variable STARPU_MPI_CACHE_STATS to print statistics on cache holding received data. - New function starpu_mpi_data_register() which sets the rank and tag of a data, and also allows to automatically clear the MPI communication cache when unregistering the data. It should be called instead of both calling starpu_data_set_tag() and starpu_data_set_rank() * Use streams for all CUDA transfers, even initiated by CPUs. * Add paje traces statistics tools. * Use streams for GPUA->GPUB and GPUB->GPUA transfers. Small features: * New STARPU_EXECUTE_ON_WORKER flag to specify the worker on which to execute the task. * New STARPU_DISABLE_PINNING environment variable to disable host memory pinning. * New STARPU_DISABLE_KERNELS environment variable to disable actual kernel execution. * New starpu_memory_get_total function to get the size of a memory node. * New starpu_parallel_task_barrier_init_n function to let a scheduler decide a set of workers without going through combined workers. Changes: * Fix simgrid execution. * Rename starpu_get_nready_tasks_of_sched_ctx to starpu_sched_ctx_get_nready_tasks * Rename starpu_get_nready_flops_of_sched_ctx to starpu_sched_ctx_get_nready_flops * New functions starpu_pause() and starpu_resume() * New codelet specific_nodes field to specify explicit target nodes for data. * StarPU-MPI: Fix overzealous allocation of memory. * Interfaces: Allow interface implementation to change pointers at will, in unpack notably. Small changes: * Use big fat abortions when one tries to make a task or callback sleep, instead of just returning EDEADLCK which few people will test * By default, StarPU FFT examples are not compiled and checked, the configure option --enable-starpufft-examples needs to be specified to change this behaviour. StarPU 1.1.0 (git revision 3c4bc72ccef30e767680cad3d749c4e9010d4476) ==================================================================== The scheduling context release New features: * OpenGL interoperability support. * Capability to store compiled OpenCL kernels on the file system * Capability to load compiled OpenCL kernels * Performance models measurements can now be provided explicitly by applications. * Capability to emit communication statistics when running MPI code * Add starpu_data_unregister_submit, starpu_data_acquire_on_node and starpu_data_invalidate_submit * New functionality to wrapper starpu_insert_task to pass a array of data_handles via the parameter STARPU_DATA_ARRAY * Enable GPU-GPU direct transfers. * GCC plug-in - Add `registered' attribute - A new pass was added that warns about the use of possibly unregistered memory buffers. * SOCL - Manual mapping of commands on specific devices is now possible - SOCL does not require StarPU CPU tasks anymore. CPU workers are automatically disabled to enhance performance of OpenCL CPU devices * New interface: COO matrix. * Data interfaces: The pack operation of user-defined data interface defines a new parameter count which should be set to the size of the buffer created by the packing of the data. * MPI: - Communication statistics for MPI can only be enabled at execution time by defining the environment variable STARPU_COMM_STATS - Communication cache mechanism is enabled by default, and can only be disabled at execution time by setting the environment variable STARPU_MPI_CACHE to 0. - Initialisation functions starpu_mpi_initialize_extended() and starpu_mpi_initialize() have been made deprecated. One should now use starpu_mpi_init(int *, char ***, int). The last parameter indicates if MPI should be initialised. - Collective detached operations have new parameters, a callback function and a argument. This is to be consistent with the detached point-to-point communications. - When exchanging user-defined data interfaces, the size of the data is the size returned by the pack operation, i.e data with dynamic size can now be exchanged with StarPU-MPI. * Add experimental simgrid support, to simulate execution with various number of CPUs, GPUs, amount of memory, etc. * Add support for OpenCL simulators (which provide simulated execution time) * Add support for Temanejo, a task graph debugger * Theoretical bound lp output now includes data transfer time. * Update OpenCL driver to only enable CPU devices (the environment variable STARPU_OPENCL_ONLY_ON_CPUS must be set to a positive value when executing an application) * Add Scheduling contexts to separate computation resources - Scheduling policies take into account the set of resources corresponding to the context it belongs to - Add support to dynamically change scheduling contexts (Create and Delete a context, Add Workers to a context, Remove workers from a context) - Add support to indicate to which contexts the tasks are submitted * Add the Hypervisor to manage the Scheduling Contexts automatically - The Contexts can be registered to the Hypervisor - Only the registered contexts are managed by the Hypervisor - The Hypervisor can detect the initial distribution of resources of a context and constructs it consequently (the cost of execution is required) - Several policies can adapt dynamically the distribution of resources in contexts if the initial one was not appropriate - Add a platform to implement new policies of redistribution of resources * Implement a memory manager which checks the global amount of memory available on devices, and checks there is enough memory before doing an allocation on the device. * Discard environment variable STARPU_LIMIT_GPU_MEM and define instead STARPU_LIMIT_CUDA_MEM and STARPU_LIMIT_OPENCL_MEM * Introduce new variables STARPU_LIMIT_CUDA_devid_MEM and STARPU_LIMIT_OPENCL_devid_MEM to limit memory per specific device * Introduce new variable STARPU_LIMIT_CPU_MEM to limit memory for the CPU devices * New function starpu_malloc_flags to define a memory allocation with constraints based on the following values: - STARPU_MALLOC_PINNED specifies memory should be pinned - STARPU_MALLOC_COUNT specifies the memory allocation should be in the limits defined by the environment variables STARPU_LIMIT_xxx (see above). When no memory is left, starpu_malloc_flag tries to reclaim memory from StarPU and returns -ENOMEM on failure. * starpu_malloc calls starpu_malloc_flags with a value of flag set to STARPU_MALLOC_PINNED * Define new function starpu_free_flags similarly to starpu_malloc_flags * Define new public API starpu_pthread which is similar to the pthread API. It is provided with 2 implementations: a pthread one and a Simgrid one. Applications using StarPU and wishing to use the Simgrid StarPU features should use it. * Allow to have a dynamically allocated number of buffers per task, and so overwrite the value defined --enable-maxbuffers=XXX * Performance models files are now stored in a directory whose name include the version of the performance model format. The version number is also written in the file itself. When updating the format, the internal variable _STARPU_PERFMODEL_VERSION should be updated. It is then possible to switch easily between different versions of StarPU having different performance model formats. * Tasks can now define a optional prologue callback which is executed on the host when the task becomes ready for execution, before getting scheduled. * Small CUDA allocations (<= 4MiB) are now batched to avoid the huge cudaMalloc overhead. * Prefetching is now done for all schedulers when it can be done whatever the scheduling decision. * Add a watchdog which permits to easily trigger a crash when StarPU gets stuck. * Document how to migrate data over MPI. * New function starpu_wakeup_worker() to be used by schedulers to wake up a single worker (instead of all workers) when submitting a single task. * The functions starpu_sched_set/get_min/max_priority set/get the priorities of the current scheduling context, i.e the one which was set by a call to starpu_sched_ctx_set_context() or the initial context if the function has not been called yet. * Fix for properly dealing with NAN on windows systems Small features: * Add starpu_worker_get_by_type and starpu_worker_get_by_devid * Add starpu_fxt_stop_profiling/starpu_fxt_start_profiling which permits to pause trace recording. * Add trace_buffer_size configuration field to permit to specify the tracing buffer size. * Add starpu_codelet_profile and starpu_codelet_histo_profile, tools which draw the profile of a codelet. * File STARPU-REVISION --- containing the SVN revision number from which StarPU was compiled --- is installed in the share/doc/starpu directory * starpu_perfmodel_plot can now directly draw GFlops curves. * New configure option --enable-mpi-progression-hook to enable the activity polling method for StarPU-MPI. * Permit to disable sequential consistency for a given task. * New macro STARPU_RELEASE_VERSION * New function starpu_get_version() to return as 3 integers the release version of StarPU. * Enable by default data allocation cache * New function starpu_perfmodel_directory() to print directory storing performance models. Available through the new option -d of the tool starpu_perfmodel_display * New batch files to execute StarPU applications under Microsoft Visual Studio (They are installed in path_to_starpu/bin/msvc)/ * Add cl_arg_free, callback_arg_free, prologue_callback_arg_free fields to enable automatic free(cl_arg); free(callback_arg); free(prologue_callback_arg) on task destroy. * New function starpu_task_build * New configure options --with-simgrid-dir --with-simgrid-include-dir and --with-simgrid-lib-dir to specify the location of the SimGrid library Changes: * Rename all filter functions to follow the pattern starpu_DATATYPE_filter_FILTERTYPE. The script tools/dev/rename_filter.sh is provided to update your existing applications to use new filters function names. * Renaming of diverse functions and datatypes. The script tools/dev/rename.sh is provided to update your existing applications to use the new names. It is also possible to compile with the pkg-config package starpu-1.0 to keep using the old names. It is however recommended to update your code and to use the package starpu-1.1. * Fix the block filter functions. * Fix StarPU-MPI on Darwin. * The FxT code can now be used on systems other than Linux. * Keep only one hashtable implementation common/uthash.h * The cache of starpu_mpi_insert_task is fixed and thus now enabled by default. * Improve starpu_machine_display output. * Standardize objects name in the performance model API * SOCL - Virtual SOCL device has been removed - Automatic scheduling still available with command queues not assigned to any device - Remove modified OpenCL headers. ICD is now the only supported way to use SOCL. - SOCL test suite is only run when environment variable SOCL_OCL_LIB_OPENCL is defined. It should contain the location of the libOpenCL.so file of the OCL ICD implementation. * Fix main memory leak on multiple unregister/re-register. * Improve hwloc detection by configure * Cell: - It is no longer possible to enable the cell support via the gordon driver - Data interfaces no longer define functions to copy to and from SPU devices - Codelet no longer define pointer for Gordon implementations - Gordon workers are no longer enabled - Gordon performance models are no longer enabled * Fix data transfer arrows in paje traces * The "heft" scheduler no longer exists. Users should now pick "dmda" instead. * StarPU can now use poti to generate paje traces. * Rename scheduling policy "parallel greedy" to "parallel eager" * starpu_scheduler.h is no longer automatically included by starpu.h, it has to be manually included when needed * New batch files to run StarPU applications with Microsoft Visual C * Add examples/release/Makefile to test StarPU examples against an installed version of StarPU. That can also be used to test examples using a previous API. * Tutorial is installed in ${docdir}/tutorial * Schedulers eager_central_policy, dm and dmda no longer erroneously respect priorities. dmdas has to be used to respect priorities. * StarPU-MPI: Fix potential bug for user-defined datatypes. As MPI can reorder messages, we need to make sure the sending of the size of the data has been completed. * Documentation is now generated through doxygen. * Modification of perfmodels output format for future improvements. * Fix for properly dealing with NAN on windows systems * Function starpu_sched_ctx_create() now takes a variable argument list to define the scheduler to be used, and the minimum and maximum priority values * The functions starpu_sched_set/get_min/max_priority set/get the priorities of the current scheduling context, i.e the one which was set by a call to starpu_sched_ctx_set_context() or the initial context if the function was not called yet. * MPI: Fix of the livelock issue discovered while executing applications on a CPU+GPU cluster of machines by adding a maximum trylock threshold before a blocking lock. Small changes: * STARPU_NCPU should now be used instead of STARPU_NCPUS. STARPU_NCPUS is still available for compatibility reasons. * include/starpu.h includes all include/starpu_*.h files, applications therefore only need to have #include * Active task wait is now included in blocked time. * Fix GCC plugin linking issues starting with GCC 4.7. * Fix forcing calibration of never-calibrated archs. * CUDA applications are no longer compiled with the "-arch sm_13" option. It is specifically added to applications which need it. * Explicitly name the non-sleeping-non-running time "Overhead", and use another color in vite traces. * Use C99 variadic macro support, not GNU. * Fix performance regression: dmda queues were inadvertently made LIFOs in r9611. StarPU 1.0.3 (git revision 25f8b3a7b13050e99bf1725ca6f52cfd62e7a861) ==================================================================== Changes: * Several bug fixes in the build system * Bug fixes in source code for non-Linux systems * Fix generating FXT traces bigger than 64MiB. * Improve ENODEV error detections in StarPU FFT StarPU 1.0.2 (git revision 6f95de279d6d796a39debe8d6c5493b3bdbe0c37) ==================================================================== Changes: * Add starpu_block_shadow_filter_func_vector and an example. * Add tag dependency in trace-generated DAG. * Fix CPU binding for optimized CPU-GPU transfers. * Fix parallel tasks CPU binding and combined worker generation. * Fix generating FXT traces bigger than 64MiB. StarPU 1.0.1 (git revision 97ea6e15a273e23e4ddabf491b0f9481373ca01a) ==================================================================== Changes: * hwloc support. Warn users when hwloc is not found on the system and produce error when not explicitly disabled. * Several bug fixes * GCC plug-in - Add `#pragma starpu release' - Fix bug when using `acquire' pragma with function parameters - Slightly improve test suite coverage - Relax the GCC version check * Update SOCL to use new API * Documentation improvement. StarPU 1.0.0 (git revision d3ad9ca318ec9acfeaf8eb7d8a018b09e4722292) ==================================================================== The extensions-again release New features: * Add SOCL, an OpenCL interface on top of StarPU. * Add a gcc plugin to extend the C interface with pragmas which allows to easily define codelets and issue tasks. * Add reduction mode to starpu_mpi_insert_task. * A new multi-format interface permits to use different binary formats on CPUs & GPUs, the conversion functions being provided by the application and called by StarPU as needed (and as less as possible). * Deprecate cost_model, and introduce cost_function, which is provided with the whole task structure, the target arch and implementation number. * Permit the application to provide its own size base for performance models. * Applications can provide several implementations of a codelet for the same architecture. * Add a StarPU-Top feedback and steering interface. * Permit to specify MPI tags for more efficient starpu_mpi_insert_task Changes: * Fix several memory leaks and race conditions * Make environment variables take precedence over the configuration passed to starpu_init() * Libtool interface versioning has been included in libraries names (libstarpu-1.0.so, libstarpumpi-1.0.so, libstarpufft-1.0.so, libsocl-1.0.so) * Install headers under $includedir/starpu/1.0. * Make where field for struct starpu_codelet optional. When unset, its value will be automatically set based on the availability of the different XXX_funcs fields of the codelet. * Define access modes for data handles into starpu_codelet and no longer in starpu_task. Hence mark (struct starpu_task).buffers as deprecated, and add (struct starpu_task).handles and (struct starpu_codelet).modes * Fields xxx_func of struct starpu_codelet are made deprecated. One should use fields xxx_funcs instead. * Some types were renamed for consistency. when using pkg-config libstarpu, starpu_deprecated_api.h is automatically included (after starpu.h) to keep compatibility with existing software. Other changes are mentioned below, compatibility is also preserved for them. To port code to use new names (this is not mandatory), the tools/dev/rename.sh script can be used, and pkg-config starpu-1.0 should be used. * The communication cost in the heft and dmda scheduling strategies now take into account the contention brought by the number of GPUs. This changes the meaning of the beta factor, whose default 1.0 value should now be good enough in most case. Small features: * Allow users to disable asynchronous data transfers between CPUs and GPUs. * Update OpenCL driver to enable CPU devices (the environment variable STARPU_OPENCL_ON_CPUS must be set to a positive value when executing an application) * struct starpu_data_interface_ops --- operations on a data interface --- define a new function pointer allocate_new_data which creates a new data interface of the given type based on an existing handle * Add a field named magic to struct starpu_task which is set when initialising the task. starpu_task_submit will fail if the field does not have the right value. This will hence avoid submitting tasks which have not been properly initialised. * Add a hook function pre_exec_hook in struct starpu_sched_policy. The function is meant to be called in drivers. Schedulers can use it to be notified when a task is about being computed. * Add codelet execution time statistics plot. * Add bus speed in starpu_machine_display. * Add a STARPU_DATA_ACQUIRE_CB which permits to inline the code to be done. * Add gdb functions. * Add complex support to LU example. * Permit to use the same data several times in write mode in the parameters of the same task. Small changes: * Increase default value for STARPU_MAXCPUS -- Maximum number of CPUs supported -- to 64. * Add man pages for some of the tools * Add C++ application example in examples/cpp/ * Add an OpenMP fork-join example. * Documentation improvement. StarPU 0.9 (git revision 12bba8528fc0d85367d885cddc383ba54efca464) ================================================================== The extensions release * Provide the STARPU_REDUX data access mode * Externalize the scheduler API. * Add theoretical bound computation * Add the void interface * Add power consumption optimization * Add parallel task support * Add starpu_mpi_insert_task * Add profiling information interface. * Add STARPU_LIMIT_GPU_MEM environment variable. * OpenCL fixes * MPI fixes * Improve optimization documentation * Upgrade to hwloc 1.1 interface * Add fortran example * Add mandelbrot OpenCL example * Add cg example * Add stencil MPI example * Initial support for CUDA4 StarPU 0.4 (git revision ad8d8be3619f211f228c141282d7d504646fc2a6) ================================================================== The API strengthening release * Major API improvements - Provide the STARPU_SCRATCH data access mode - Rework data filter interface - Rework data interface structure - A script that automatically renames old functions to accommodate with the new API is available from https://scm.gforge.inria.fr/svn/starpu/scripts/renaming (login: anonsvn, password: anonsvn) * Implement dependencies between task directly (eg. without tags) * Implicit data-driven task dependencies simplifies the design of data-parallel algorithms * Add dynamic profiling capabilities - Provide per-task feedback - Provide per-worker feedback - Provide feedback about memory transfers * Provide a library to help accelerating MPI applications * Improve data transfers overhead prediction - Transparently benchmark buses to generate performance models - Bind accelerator-controlling threads with respect to NUMA locality * Improve StarPU's portability - Add OpenCL support - Add support for Windows StarPU 0.2.901 aka 0.3-rc1 (git revision 991f2abb772c17c3d45bbcf27f46197652e6a3ef) ================================================================================== The asynchronous heterogeneous multi-accelerator release * Many API changes and code cleanups - Implement starpu_worker_get_id - Implement starpu_worker_get_name - Implement starpu_worker_get_type - Implement starpu_worker_get_count - Implement starpu_display_codelet_stats - Implement starpu_data_prefetch_on_node - Expose the starpu_data_set_wt_mask function * Support nvidia (heterogeneous) multi-GPU * Add the data request mechanism - All data transfers use data requests now - Implement asynchronous data transfers - Implement prefetch mechanism - Chain data requests to support GPU->RAM->GPU transfers * Make it possible to bypass the scheduler and to assign a task to a specific worker * Support restartable tasks to reinstanciate dependencies task graphs * Improve performance prediction - Model data transfer overhead - One model is created for each accelerator * Support for CUDA's driver API is deprecated * The STARPU_WORKERS_CUDAID and STARPU_WORKERS_CPUID env. variables make it possible to specify where to bind the workers * Use the hwloc library to detect the actual number of cores StarPU 0.2.0 (git revision 73e989f0783e10815aff394f80242760c4ed098c) ==================================================================== The Stabilizing-the-Basics release * Various API cleanups * Mac OS X is supported now * Add dynamic code loading facilities onto Cell's SPUs * Improve performance analysis/feedback tools * Application can interact with StarPU tasks - The application may access/modify data managed by the DSM - The application may wait for the termination of a (set of) task(s) * An initial documentation is added * More examples are supplied StarPU 0.1.0 (git revision 911869a96b40c74eb92b30a43d3e08bf445d8078) ==================================================================== First release. Status: * Only supports Linux platforms yet * Supported architectures - multicore CPUs - NVIDIA GPUs (with CUDA 2.x) - experimental Cell/BE support Changes: * Scheduling facilities - run-time selection of the scheduling policy - basic auto-tuning facilities * Software-based DSM - transparent data coherency management - High-level expressive interface # Local Variables: # mode: text # coding: utf-8 # ispell-local-dictionary: "american" # End: starpu-1.4.9+dfsg/INSTALL000066400000000000000000000152431507764646700150460ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # Contents ========= * Installing StarPU on a Unix machine * Installing StarPU on Windows Installing StarPU on a Unix machine ------------------------------------ $ ./autogen.sh # If running the SVN version $ ./configure --prefix= $ make $ make install Installing StarPU on Windows ---------------------------- If you are building from a tarball downloaded from the website, you can skip the cygwin part. 1. Install cygwin http://cygwin.com/install.html Make sure the following packages are available: - (Devel)/subversion - (Devel)/libtool - (Devel)/gcc - (Devel)/make - your favorite editor (vi, emacs, ...) - (Devel)/gdb - (Archive)/zip - (Devel)/pkg-config 2. Install mingw http://www.mingw.org/ 3. Install hwloc (not mandatory, but strongly recommended) http://www.open-mpi.org/projects/hwloc Be careful which version you are installing. Even if your machine runs windows 64 bits, if you are running a 32 bits mingw (check the output of the command uname -a), you will need to install the 32 bits version of hwloc. 4. Install Microsoft Visual C++ Studio Express http://www.microsoft.com/express/Downloads Add in your path the following directories. (adjusting where necessary for the Installation location according to VC version and on 64 and 32bit Windows versions) On cygwin, with Visual C++ 2010 e.g.; export PATH="/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 10.0/Common7/IDE":$PATH export PATH="/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 10.0/VC/bin":$PATH On MingW, with Visual C++ 2010, e.g.; export PATH="/c/Program Files (x86)/Microsoft Visual Studio 10.0/Common7/IDE":$PATH export PATH="/c/Program Files (x86)/Microsoft Visual Studio 10.0/VC/bin":$PATH Try to call , and without any option to make sure these dump their help output with a series of options, otherwise no .def or .lib file will be produced. 5. Install GPU Drivers (not mandatory) 5.1 Install Cuda http://developer.nvidia.com/object/cuda_3_2_downloads.html You need to install at least the CUDA toolkit. libtool is not able to find the libraries automatically, you need to make some copies: copy c:\cuda\lib\cuda.lib c:\cuda\lib\libcuda.lib copy c:\cuda\lib\cudart.lib c:\cuda\lib\libcudart.lib copy c:\cuda\lib\cublas.lib c:\cuda\lib\libcublas.lib copy c:\cuda\lib\cufft.lib c:\cuda\lib\libcufft.lib copy c:\cuda\lib\OpenCL.lib c:\cuda\lib\libOpenCL.lib (and if the version of your CUDA driver is >= 3.2) copy c:\cuda\lib\curand.lib c:\cuda\lib\libcurand.lib Add the CUDA bin directory in your path export PATH=/cygdrive/c/CUDA/bin:$PATH Since we build code using CUDA headers with gcc instead of Visual studio, a fix is needed: c:\cuda\include\host_defines.h has a bogus CUDARTAPI definition which makes linking fail completely. Replace the first occurrence of #define CUDARTAPI with #ifdef _WIN32 #define CUDARTAPI __stdcall #else #define CUDARTAPI #endif While at it, you can also comment the __cdecl definition to avoid spurious warnings. 5.2 Install OpenCL http://developer.nvidia.com/object/opencl-download.html You need to download the NVIDIA Drivers for your version of Windows. Executing the file will extract all files in a given directory. The the driver installation will start, it will fail if no compatibles drivers can be found on your system. Anyway, you should copy the *.dl_ files from the directory (extraction path) in the bin directory of the CUDA installation directory (the directory should be v3.2/bin/) 5.3 Install MsCompress http://gnuwin32.sourceforge.net/packages/mscompress.htm Go in the CUDA bin directory, uncompress .dl_ files and rename them in .dll files cp /cygdrive/c/NVIDIA/DisplayDriver/190.89/International/*.dl_ . for i in *.dl_ ; do /cygdrive/c/Program\ Files/GnuWin32/bin/msexpand.exe $i ; mv ${i%_} ${i%_}l ; done If you are building from a tarball downloaded from the website, you can skip the autogen.sh part. 6. Start autogen.sh from cygwin cd starpu-trunk ./autogen.sh 7. Start a MinGW shell /cygdrive/c/MinGW/msys/1.0/bin/sh.exe --login -i 8. Configure, make, install from MinGW If you have a non-english version of windows, use export LANG=C else libtool has troubles parsing the translated output of the toolchain. cd starpu-trunk mkdir build cd build ../configure --prefix=$PWD/target \ --with-hwloc= \ --with-cuda-dir= \ --with-cuda-lib-dir=/lib/Win32 \ --with-opencl-dir= --disable-build-doc --disable-build-examples --enable-quick-check make make check # not necessary but well advised make install The option --disable-build-doc is necessary if you do not have a working TeX binary installed as it is needed by texi2dvi to build the documentation. To fasten the compilation process, the option --disable-build-examples may also be used to disable the compilation of the applications in the examples directory. Only the applications in the test directory will be build. Also convert a couple of files to CRLF: sed -e 's/$/'$'\015'/ < README > $prefix/README.txt sed -e 's/$/'$'\015'/ < AUTHORS > $prefix/AUTHORS.txt sed -e 's/$/'$'\015'/ < COPYING.LGPL > $prefix/COPYING.LGPL.txt 9. If you want your StarPU installation to be standalone, you need to copy the DLL files from hwloc, Cuda, and OpenCL into the StarPU installation bin directory, as well as MinGW/bin/libpthread*dll cp /bin/*dll target/bin cp /bin/*dll target/bin cp /cygdrive/c/MinGW/bin/libpthread*dll target/bin and set the StarPU bin directory in your path. export PATH=/bin:$PATH starpu-1.4.9+dfsg/Makefile.am000066400000000000000000000171031507764646700160460ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2017-2017 Guillaume Beauchamp # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ACLOCAL_AMFLAGS=-I m4 CLEANFILES = *.gcno *.gcda *.linkinfo SUBDIRS = if STARPU_USE_MIN_DGELS SUBDIRS += min-dgels endif SUBDIRS += src SUBDIRS += tools if STARPU_BUILD_TESTS SUBDIRS += tests endif SUBDIRS += doc if STARPU_USE_MPI SUBDIRS += mpi endif if STARPU_BUBBLE SUBDIRS += bubble endif if STARPU_BUILD_EXAMPLES SUBDIRS += examples endif if STARPU_BUILD_SOCL SUBDIRS += socl endif if STARPU_BUILD_STARPUFFT SUBDIRS += starpufft endif if STARPU_BUILD_STARPURM SUBDIRS += starpurm endif if STARPU_OPENMP_LLVM SUBDIRS += starpu_openmp_llvm endif if STARPU_BUILD_STARPUPY if STARPU_USE_CPU SUBDIRS += starpupy else if STARPU_USE_MPI_MASTER_SLAVE SUBDIRS += starpupy else if STARPU_USE_TCPIP_MASTER_SLAVE SUBDIRS += starpupy endif endif endif endif if STARPU_BUILD_SC_HYPERVISOR SUBDIRS += sc_hypervisor endif if STARPU_USE_JULIA SUBDIRS += julia endif if STARPU_BUILD_ECLIPSE_PLUGIN SUBDIRS += eclipse-plugin endif pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = packages/libstarpu.pc packages/starpu-1.0.pc packages/starpu-1.1.pc packages/starpu-1.2.pc packages/starpu-1.3.pc packages/starpu-1.4.pc versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = \ include/starpu.h \ include/starpu_helper.h \ include/starpu_bitmap.h \ include/starpu_data_filters.h \ include/starpu_data_interfaces.h \ include/starpu_worker.h \ include/starpu_task.h \ include/starpu_task_dep.h \ include/starpu_task_bundle.h \ include/starpu_task_list.h \ include/starpu_task_util.h \ include/starpu_data.h \ include/starpu_perfmodel.h \ include/starpu_util.h \ include/starpu_fxt.h \ include/starpu_cuda.h \ include/starpu_hip.h \ include/starpu_opencl.h \ include/starpu_max_fpga.h \ include/starpu_openmp.h \ include/starpu_sink.h \ include/starpu_expert.h \ include/starpu_profiling.h \ include/starpu_profiling_tool.h \ include/starpu_bound.h \ include/starpu_scheduler.h \ include/schedulers/starpu_heteroprio.h \ include/starpu_sched_component.h \ include/starpu_sched_ctx.h \ include/starpu_sched_ctx_hypervisor.h \ include/starpu_deprecated_api.h \ include/starpu_hash.h \ include/starpu_rand.h \ include/starpu_disk.h \ include/starpu_cublas.h \ include/starpu_cublas_v2.h \ include/starpu_cublasLt.h \ include/starpu_cusolver.h \ include/starpu_cusparse.h \ include/starpu_hipblas.h \ include/starpu_driver.h \ include/starpu_stdlib.h \ include/starpu_thread.h \ include/starpu_thread_util.h \ include/starpu_tree.h \ include/starpu_simgrid_wrap.h \ include/starpu_mod.f90 \ include/fstarpu_mod.f90 \ include/starpu_parallel_worker.h \ include/starpu_perf_monitoring.h \ include/starpu_perf_steering.h \ include/schedulers/starpu_scheduler_toolbox.h if STARPU_OPENMP_LLVM versinclude_HEADERS += \ include/omp.h endif nodist_versinclude_HEADERS = \ include/starpu_config.h noinst_HEADERS = \ include/pthread_win32/pthread.h \ include/pthread_win32/semaphore.h if STARPU_DEVEL all-local: @if $(GREP) -r sys/time.h $$( find $(srcdir)/examples $(srcdir)/tests $(srcdir)/src $(srcdir)/mpi/src $(srcdir)/include -name \*.[ch] -a \! -name starpu_util.h -a \! -name timer.h -a \! -name loader.c ) ; \ then \ echo "Please do not include sys/time, it is not available on Windows, include starpu_util.h and use starpu_timing_now() instead" ; \ false ; \ fi @if $(GREP) -re '\' $$( find $(srcdir)/src $(srcdir)/mpi/src $(srcdir)/include -name \*.[ch] -a \! -name starpu_util.h -a \! -name utils.c -a \! -name simgrid.h) ; \ then \ echo "Please do not use getenv, use starpu_getenv instead, which catches unsafe uses"; \ false ; \ fi # we count the number of files which include unistd.h # we count the number of files which properly include unistd.h i.e by first detecting if it's available # and then we check both numbers are the same ...a @UNISTD_ALL_LINES=$(shell $(GREP) -B1 -rs "^#include " $(srcdir)/src/ $(srcdir)/include/ $(srcdir)/mpi/src $(srcdir)/mpi/include |$(GREP) -v dolib|$(GREP) -v -e "--" | tr '\012' '@' | $(SED) 's/unistd.h>@/unistd.h>\n/g' | wc -l | tr -d ' ') ;\ UNISTD_CORRECT_LINES=$(shell $(GREP) -B1 -rs "^#include " $(srcdir)/src/ $(srcdir)/include/ $(srcdir)/mpi/src $(srcdir)/mpi/include |$(GREP) -v dolib|$(GREP) -v -e "--" | tr '\012' '@' | $(SED) 's/unistd.h>@/unistd.h>\n/g' | $(GREP) '#ifdef .*HAVE_UNISTD_H.*:#include ' | wc -l | tr -d ' ') ;\ if test $$UNISTD_ALL_LINES -ne $$UNISTD_CORRECT_LINES ; \ then \ echo "Please do not unconditionally include unistd.h, it is not available on Windows, include config.h and test for HAVE_UNISTD_H" ; \ false ; \ fi endif if STARPU_HAVE_WINDOWS txtdir = ${prefix} else txtdir = ${docdir} endif txt_DATA = AUTHORS COPYING.LGPL README.md README.dev STARPU-REVISION EXTRA_DIST = autogen.sh AUTHORS COPYING.LGPL README.md README.dev STARPU-VERSION STARPU-REVISION EXTRA_DIST += .gitlab-ci.yml EXTRA_DIST += contrib/ci.inria.fr/Jenkinsfile-basic EXTRA_DIST += contrib/ci.inria.fr/Jenkinsfile-bsd EXTRA_DIST += contrib/ci.inria.fr/Jenkinsfile-windows EXTRA_DIST += contrib/ci.inria.fr/job-0-tarball.sh EXTRA_DIST += contrib/ci.inria.fr/job-1-build-windows.sh EXTRA_DIST += contrib/ci.inria.fr/job-1-check.sh EXTRA_DIST += contrib/ci.inria.fr/job-1-check-windows.bat EXTRA_DIST += contrib/gitlab/build.sh EXTRA_DIST += contrib/gitlab/deploy.sh EXTRA_DIST += contrib/gitlab/coverity.sh EXTRA_DIST += contrib/gitlab/simgrid.sh EXTRA_DIST += contrib/gitlab/chameleon.sh moduledir = ${libdir}/modules module_DATA = packages/starpu-1.4 DISTCLEANFILES = STARPU-REVISION include ./make/starpu-subdirtests.mk ctags-local: cd $(top_srcdir) ; $(CTAGS) -R -I LIST_TYPE $(SED) -i $(top_srcdir)/tags -e '/^[^ ]* [^ ]* /d' -e '/^[^ ]*$$/d' # Cyclomatic complexity reports. # The pmccabe tool, see . PMCCABE = pmccabe VC_URL = "https://gitlab.inria.fr/starpu/starpu/-/blob/master/%FILENAME%" # Generate a cyclomatic complexity report. Note that examples and tests are # excluded because they're not particularly relevant, and more importantly # they all have a function called `main', which clobbers the report. cyclomatic-complexity.html: $(PMCCABE) \ `find \( -name examples -o -name tests -o -path ./tools/dev/experimental \) -prune -o -name \*.c` \ | sort -nr \ | $(AWK) -f ${top_srcdir}/build-aux/pmccabe2html \ -v lang=html -v name="$(PACKAGE_NAME)" \ -v vcurl=$(VC_URL) \ -v url="$(PACKAGE_URL)" \ -v css=${top_srcdir}/build-aux/pmccabe.css \ -v cut_dir=${top_srcdir}/ \ > $@-tmp mv $@-tmp $@ starpu-1.4.9+dfsg/Makefile.in000066400000000000000000001645221507764646700160670ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_USE_MIN_DGELS_TRUE@am__append_1 = min-dgels @STARPU_BUILD_TESTS_TRUE@am__append_2 = tests @STARPU_USE_MPI_TRUE@am__append_3 = mpi @STARPU_BUBBLE_TRUE@am__append_4 = bubble @STARPU_BUILD_EXAMPLES_TRUE@am__append_5 = examples @STARPU_BUILD_SOCL_TRUE@am__append_6 = socl @STARPU_BUILD_STARPUFFT_TRUE@am__append_7 = starpufft @STARPU_BUILD_STARPURM_TRUE@am__append_8 = starpurm @STARPU_OPENMP_LLVM_TRUE@am__append_9 = starpu_openmp_llvm @STARPU_BUILD_STARPUPY_TRUE@@STARPU_USE_CPU_TRUE@am__append_10 = starpupy @STARPU_BUILD_STARPUPY_TRUE@@STARPU_USE_CPU_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_11 = starpupy @STARPU_BUILD_STARPUPY_TRUE@@STARPU_USE_CPU_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_12 = starpupy @STARPU_BUILD_SC_HYPERVISOR_TRUE@am__append_13 = sc_hypervisor @STARPU_USE_JULIA_TRUE@am__append_14 = julia @STARPU_BUILD_ECLIPSE_PLUGIN_TRUE@am__append_15 = eclipse-plugin @STARPU_OPENMP_LLVM_TRUE@am__append_16 = \ @STARPU_OPENMP_LLVM_TRUE@ include/omp.h subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ $(am__configure_deps) $(noinst_HEADERS) \ $(am__versinclude_HEADERS_DIST) $(am__DIST_COMMON) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = tests/regression/regression.sh \ tests/regression/profiles tests/regression/profiles.build.only \ socl/vendors/socl.icd socl/vendors/install/socl.icd \ packages/libstarpu.pc packages/starpu-1.0.pc \ packages/starpu-1.1.pc packages/starpu-1.2.pc \ packages/starpu-1.3.pc packages/starpu-1.4.pc \ packages/starpu-1.3 packages/starpu-1.4 \ mpi/packages/libstarpumpi.pc mpi/packages/starpumpi-1.0.pc \ mpi/packages/starpumpi-1.1.pc mpi/packages/starpumpi-1.2.pc \ mpi/packages/starpumpi-1.3.pc mpi/packages/starpumpi-1.4.pc \ starpufft/packages/libstarpufft.pc \ starpufft/packages/starpufft-1.0.pc \ starpufft/packages/starpufft-1.1.pc \ starpufft/packages/starpufft-1.2.pc \ starpufft/packages/starpufft-1.3.pc \ starpufft/packages/starpufft-1.4.pc \ starpurm/packages/starpurm-1.3.pc \ starpurm/packages/starpurm-1.4.pc tools/msvc/starpu_var.bat \ min-dgels/Makefile eclipse-plugin/examples/hello/.cproject CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(pkgconfigdir)" \ "$(DESTDIR)$(txtdir)" "$(DESTDIR)$(versincludedir)" \ "$(DESTDIR)$(versincludedir)" DATA = $(module_DATA) $(pkgconfig_DATA) $(txt_DATA) am__versinclude_HEADERS_DIST = include/starpu.h \ include/starpu_helper.h include/starpu_bitmap.h \ include/starpu_data_filters.h include/starpu_data_interfaces.h \ include/starpu_worker.h include/starpu_task.h \ include/starpu_task_dep.h include/starpu_task_bundle.h \ include/starpu_task_list.h include/starpu_task_util.h \ include/starpu_data.h include/starpu_perfmodel.h \ include/starpu_util.h include/starpu_fxt.h \ include/starpu_cuda.h include/starpu_hip.h \ include/starpu_opencl.h include/starpu_max_fpga.h \ include/starpu_openmp.h include/starpu_sink.h \ include/starpu_expert.h include/starpu_profiling.h \ include/starpu_profiling_tool.h include/starpu_bound.h \ include/starpu_scheduler.h \ include/schedulers/starpu_heteroprio.h \ include/starpu_sched_component.h include/starpu_sched_ctx.h \ include/starpu_sched_ctx_hypervisor.h \ include/starpu_deprecated_api.h include/starpu_hash.h \ include/starpu_rand.h include/starpu_disk.h \ include/starpu_cublas.h include/starpu_cublas_v2.h \ include/starpu_cublasLt.h include/starpu_cusolver.h \ include/starpu_cusparse.h include/starpu_hipblas.h \ include/starpu_driver.h include/starpu_stdlib.h \ include/starpu_thread.h include/starpu_thread_util.h \ include/starpu_tree.h include/starpu_simgrid_wrap.h \ include/starpu_mod.f90 include/fstarpu_mod.f90 \ include/starpu_parallel_worker.h \ include/starpu_perf_monitoring.h \ include/starpu_perf_steering.h \ include/schedulers/starpu_scheduler_toolbox.h include/omp.h HEADERS = $(nodist_versinclude_HEADERS) $(noinst_HEADERS) \ $(versinclude_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ cscope distdir distdir-am dist dist-all distcheck am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = min-dgels src tools tests doc mpi bubble examples socl \ starpufft starpurm starpu_openmp_llvm starpupy sc_hypervisor \ julia eclipse-plugin am__DIST_COMMON = $(srcdir)/./make/starpu-subdirtests.mk \ $(srcdir)/Makefile.in $(top_srcdir)/build-aux/ar-lib \ $(top_srcdir)/build-aux/compile \ $(top_srcdir)/build-aux/config.guess \ $(top_srcdir)/build-aux/config.sub \ $(top_srcdir)/build-aux/install-sh \ $(top_srcdir)/build-aux/ltmain.sh \ $(top_srcdir)/build-aux/missing \ $(top_srcdir)/eclipse-plugin/examples/hello/.cproject.in \ $(top_srcdir)/include/starpu_config.h.in \ $(top_srcdir)/min-dgels/Makefile.in \ $(top_srcdir)/mpi/packages/libstarpumpi.pc.in \ $(top_srcdir)/mpi/packages/starpumpi-1.0.pc.in \ $(top_srcdir)/mpi/packages/starpumpi-1.1.pc.in \ $(top_srcdir)/mpi/packages/starpumpi-1.2.pc.in \ $(top_srcdir)/mpi/packages/starpumpi-1.3.pc.in \ $(top_srcdir)/mpi/packages/starpumpi-1.4.pc.in \ $(top_srcdir)/packages/libstarpu.pc.in \ $(top_srcdir)/packages/starpu-1.0.pc.in \ $(top_srcdir)/packages/starpu-1.1.pc.in \ $(top_srcdir)/packages/starpu-1.2.pc.in \ $(top_srcdir)/packages/starpu-1.3.in \ $(top_srcdir)/packages/starpu-1.3.pc.in \ $(top_srcdir)/packages/starpu-1.4.in \ $(top_srcdir)/packages/starpu-1.4.pc.in \ $(top_srcdir)/socl/vendors/install/socl.icd.in \ $(top_srcdir)/socl/vendors/socl.icd.in \ $(top_srcdir)/src/common/config-src-build.h.in \ $(top_srcdir)/src/common/config.h.in \ $(top_srcdir)/starpufft/packages/libstarpufft.pc.in \ $(top_srcdir)/starpufft/packages/starpufft-1.0.pc.in \ $(top_srcdir)/starpufft/packages/starpufft-1.1.pc.in \ $(top_srcdir)/starpufft/packages/starpufft-1.2.pc.in \ $(top_srcdir)/starpufft/packages/starpufft-1.3.pc.in \ $(top_srcdir)/starpufft/packages/starpufft-1.4.pc.in \ $(top_srcdir)/starpurm/include/starpurm_config.h.in \ $(top_srcdir)/starpurm/packages/starpurm-1.3.pc.in \ $(top_srcdir)/starpurm/packages/starpurm-1.4.pc.in \ $(top_srcdir)/tests/regression/profiles.build.only.in \ $(top_srcdir)/tests/regression/profiles.in \ $(top_srcdir)/tests/regression/regression.sh.in \ $(top_srcdir)/tools/msvc/starpu_var.bat.in AUTHORS ChangeLog \ INSTALL README.md TODO build-aux/ar-lib build-aux/compile \ build-aux/config.guess build-aux/config.sub \ build-aux/install-sh build-aux/ltmain.sh build-aux/missing DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) am__remove_distdir = \ if test -d "$(distdir)"; then \ find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ && rm -rf "$(distdir)" \ || { sleep 5 && rm -rf "$(distdir)"; }; \ else :; fi am__post_remove_distdir = $(am__remove_distdir) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" DIST_ARCHIVES = $(distdir).tar.gz GZIP_ENV = --best DIST_TARGETS = dist-gzip # Exists only to be overridden by the user if desired. AM_DISTCHECK_DVI_TARGET = dvi distuninstallcheck_listfiles = find . -type f -print am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' distcleancheck_listfiles = find . -type f -print pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2017-2017 Guillaume Beauchamp # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ACLOCAL_AMFLAGS = -I m4 CLEANFILES = *.gcno *.gcda *.linkinfo SUBDIRS = $(am__append_1) src tools $(am__append_2) doc \ $(am__append_3) $(am__append_4) $(am__append_5) \ $(am__append_6) $(am__append_7) $(am__append_8) \ $(am__append_9) $(am__append_10) $(am__append_11) \ $(am__append_12) $(am__append_13) $(am__append_14) \ $(am__append_15) pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = packages/libstarpu.pc packages/starpu-1.0.pc packages/starpu-1.1.pc packages/starpu-1.2.pc packages/starpu-1.3.pc packages/starpu-1.4.pc versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = include/starpu.h include/starpu_helper.h \ include/starpu_bitmap.h include/starpu_data_filters.h \ include/starpu_data_interfaces.h include/starpu_worker.h \ include/starpu_task.h include/starpu_task_dep.h \ include/starpu_task_bundle.h include/starpu_task_list.h \ include/starpu_task_util.h include/starpu_data.h \ include/starpu_perfmodel.h include/starpu_util.h \ include/starpu_fxt.h include/starpu_cuda.h \ include/starpu_hip.h include/starpu_opencl.h \ include/starpu_max_fpga.h include/starpu_openmp.h \ include/starpu_sink.h include/starpu_expert.h \ include/starpu_profiling.h include/starpu_profiling_tool.h \ include/starpu_bound.h include/starpu_scheduler.h \ include/schedulers/starpu_heteroprio.h \ include/starpu_sched_component.h include/starpu_sched_ctx.h \ include/starpu_sched_ctx_hypervisor.h \ include/starpu_deprecated_api.h include/starpu_hash.h \ include/starpu_rand.h include/starpu_disk.h \ include/starpu_cublas.h include/starpu_cublas_v2.h \ include/starpu_cublasLt.h include/starpu_cusolver.h \ include/starpu_cusparse.h include/starpu_hipblas.h \ include/starpu_driver.h include/starpu_stdlib.h \ include/starpu_thread.h include/starpu_thread_util.h \ include/starpu_tree.h include/starpu_simgrid_wrap.h \ include/starpu_mod.f90 include/fstarpu_mod.f90 \ include/starpu_parallel_worker.h \ include/starpu_perf_monitoring.h \ include/starpu_perf_steering.h \ include/schedulers/starpu_scheduler_toolbox.h $(am__append_16) nodist_versinclude_HEADERS = \ include/starpu_config.h noinst_HEADERS = \ include/pthread_win32/pthread.h \ include/pthread_win32/semaphore.h @STARPU_HAVE_WINDOWS_FALSE@txtdir = ${docdir} @STARPU_HAVE_WINDOWS_TRUE@txtdir = ${prefix} txt_DATA = AUTHORS COPYING.LGPL README.md README.dev STARPU-REVISION EXTRA_DIST = autogen.sh AUTHORS COPYING.LGPL README.md README.dev \ STARPU-VERSION STARPU-REVISION .gitlab-ci.yml \ contrib/ci.inria.fr/Jenkinsfile-basic \ contrib/ci.inria.fr/Jenkinsfile-bsd \ contrib/ci.inria.fr/Jenkinsfile-windows \ contrib/ci.inria.fr/job-0-tarball.sh \ contrib/ci.inria.fr/job-1-build-windows.sh \ contrib/ci.inria.fr/job-1-check.sh \ contrib/ci.inria.fr/job-1-check-windows.bat \ contrib/gitlab/build.sh contrib/gitlab/deploy.sh \ contrib/gitlab/coverity.sh contrib/gitlab/simgrid.sh \ contrib/gitlab/chameleon.sh moduledir = ${libdir}/modules module_DATA = packages/starpu-1.4 DISTCLEANFILES = STARPU-REVISION # Cyclomatic complexity reports. # The pmccabe tool, see . PMCCABE = pmccabe VC_URL = "https://gitlab.inria.fr/starpu/starpu/-/blob/master/%FILENAME%" all: all-recursive .SUFFIXES: am--refresh: Makefile @: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/./make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \ $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ echo ' $(SHELL) ./config.status'; \ $(SHELL) ./config.status;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \ esac; $(srcdir)/./make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck $(top_srcdir)/configure: $(am__configure_deps) $(am__cd) $(srcdir) && $(AUTOCONF) $(ACLOCAL_M4): $(am__aclocal_m4_deps) $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) $(am__aclocal_m4_deps): src/common/config.h: src/common/stamp-h1 @test -f $@ || rm -f src/common/stamp-h1 @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) src/common/stamp-h1 src/common/stamp-h1: $(top_srcdir)/src/common/config.h.in $(top_builddir)/config.status @rm -f src/common/stamp-h1 cd $(top_builddir) && $(SHELL) ./config.status src/common/config.h $(top_srcdir)/src/common/config.h.in: $(am__configure_deps) ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) rm -f src/common/stamp-h1 touch $@ src/common/config-src-build.h: src/common/stamp-h2 @test -f $@ || rm -f src/common/stamp-h2 @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) src/common/stamp-h2 src/common/stamp-h2: $(top_srcdir)/src/common/config-src-build.h.in $(top_builddir)/config.status @rm -f src/common/stamp-h2 cd $(top_builddir) && $(SHELL) ./config.status src/common/config-src-build.h include/starpu_config.h: include/stamp-h3 @test -f $@ || rm -f include/stamp-h3 @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) include/stamp-h3 include/stamp-h3: $(top_srcdir)/include/starpu_config.h.in $(top_builddir)/config.status @rm -f include/stamp-h3 cd $(top_builddir) && $(SHELL) ./config.status include/starpu_config.h starpurm/include/starpurm_config.h: starpurm/include/stamp-h4 @test -f $@ || rm -f starpurm/include/stamp-h4 @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) starpurm/include/stamp-h4 starpurm/include/stamp-h4: $(top_srcdir)/starpurm/include/starpurm_config.h.in $(top_builddir)/config.status @rm -f starpurm/include/stamp-h4 cd $(top_builddir) && $(SHELL) ./config.status starpurm/include/starpurm_config.h distclean-hdr: -rm -f src/common/config.h src/common/stamp-h1 src/common/config-src-build.h src/common/stamp-h2 include/starpu_config.h include/stamp-h3 starpurm/include/starpurm_config.h starpurm/include/stamp-h4 tests/regression/regression.sh: $(top_builddir)/config.status $(top_srcdir)/tests/regression/regression.sh.in cd $(top_builddir) && $(SHELL) ./config.status $@ tests/regression/profiles: $(top_builddir)/config.status $(top_srcdir)/tests/regression/profiles.in cd $(top_builddir) && $(SHELL) ./config.status $@ tests/regression/profiles.build.only: $(top_builddir)/config.status $(top_srcdir)/tests/regression/profiles.build.only.in cd $(top_builddir) && $(SHELL) ./config.status $@ socl/vendors/socl.icd: $(top_builddir)/config.status $(top_srcdir)/socl/vendors/socl.icd.in cd $(top_builddir) && $(SHELL) ./config.status $@ socl/vendors/install/socl.icd: $(top_builddir)/config.status $(top_srcdir)/socl/vendors/install/socl.icd.in cd $(top_builddir) && $(SHELL) ./config.status $@ packages/libstarpu.pc: $(top_builddir)/config.status $(top_srcdir)/packages/libstarpu.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ packages/starpu-1.0.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.0.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ packages/starpu-1.1.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.1.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ packages/starpu-1.2.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.2.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ packages/starpu-1.3.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.3.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ packages/starpu-1.4.pc: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.4.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ packages/starpu-1.3: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.3.in cd $(top_builddir) && $(SHELL) ./config.status $@ packages/starpu-1.4: $(top_builddir)/config.status $(top_srcdir)/packages/starpu-1.4.in cd $(top_builddir) && $(SHELL) ./config.status $@ mpi/packages/libstarpumpi.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/libstarpumpi.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ mpi/packages/starpumpi-1.0.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.0.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ mpi/packages/starpumpi-1.1.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.1.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ mpi/packages/starpumpi-1.2.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.2.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ mpi/packages/starpumpi-1.3.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.3.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ mpi/packages/starpumpi-1.4.pc: $(top_builddir)/config.status $(top_srcdir)/mpi/packages/starpumpi-1.4.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ starpufft/packages/libstarpufft.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/libstarpufft.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ starpufft/packages/starpufft-1.0.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.0.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ starpufft/packages/starpufft-1.1.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.1.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ starpufft/packages/starpufft-1.2.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.2.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ starpufft/packages/starpufft-1.3.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.3.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ starpufft/packages/starpufft-1.4.pc: $(top_builddir)/config.status $(top_srcdir)/starpufft/packages/starpufft-1.4.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ starpurm/packages/starpurm-1.3.pc: $(top_builddir)/config.status $(top_srcdir)/starpurm/packages/starpurm-1.3.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ starpurm/packages/starpurm-1.4.pc: $(top_builddir)/config.status $(top_srcdir)/starpurm/packages/starpurm-1.4.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ tools/msvc/starpu_var.bat: $(top_builddir)/config.status $(top_srcdir)/tools/msvc/starpu_var.bat.in cd $(top_builddir) && $(SHELL) ./config.status $@ min-dgels/Makefile: $(top_builddir)/config.status $(top_srcdir)/min-dgels/Makefile.in cd $(top_builddir) && $(SHELL) ./config.status $@ eclipse-plugin/examples/hello/.cproject: $(top_builddir)/config.status $(top_srcdir)/eclipse-plugin/examples/hello/.cproject.in cd $(top_builddir) && $(SHELL) ./config.status $@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs distclean-libtool: -rm -f libtool config.lt install-moduleDATA: $(module_DATA) @$(NORMAL_INSTALL) @list='$(module_DATA)'; test -n "$(moduledir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(moduledir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(moduledir)" || exit $$?; \ done uninstall-moduleDATA: @$(NORMAL_UNINSTALL) @list='$(module_DATA)'; test -n "$(moduledir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(moduledir)'; $(am__uninstall_files_from_dir) install-pkgconfigDATA: $(pkgconfig_DATA) @$(NORMAL_INSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ done uninstall-pkgconfigDATA: @$(NORMAL_UNINSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) install-nodist_versincludeHEADERS: $(nodist_versinclude_HEADERS) @$(NORMAL_INSTALL) @list='$(nodist_versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ done uninstall-nodist_versincludeHEADERS: @$(NORMAL_UNINSTALL) @list='$(nodist_versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) install-versincludeHEADERS: $(versinclude_HEADERS) @$(NORMAL_INSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ done uninstall-versincludeHEADERS: @$(NORMAL_UNINSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscope: cscope.files test ! -s cscope.files \ || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) clean-cscope: -rm -f cscope.files cscope.files: clean-cscope cscopelist cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags -rm -f cscope.out cscope.in.out cscope.po.out cscope.files distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) $(am__remove_distdir) test -d "$(distdir)" || mkdir "$(distdir)" @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done -test -n "$(am__skip_mode_fix)" \ || find "$(distdir)" -type d ! -perm -755 \ -exec chmod u+rwx,go+rx {} \; -o \ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ || chmod -R a+r "$(distdir)" dist-gzip: distdir tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz $(am__post_remove_distdir) dist-bzip2: distdir tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 $(am__post_remove_distdir) dist-lzip: distdir tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz $(am__post_remove_distdir) dist-xz: distdir tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz $(am__post_remove_distdir) dist-zstd: distdir tardir=$(distdir) && $(am__tar) | zstd -c $${ZSTD_CLEVEL-$${ZSTD_OPT--19}} >$(distdir).tar.zst $(am__post_remove_distdir) dist-tarZ: distdir @echo WARNING: "Support for distribution archives compressed with" \ "legacy program 'compress' is deprecated." >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z $(am__post_remove_distdir) dist-shar: distdir @echo WARNING: "Support for shar distribution archives is" \ "deprecated." >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz $(am__post_remove_distdir) dist-zip: distdir -rm -f $(distdir).zip zip -rq $(distdir).zip $(distdir) $(am__post_remove_distdir) dist dist-all: $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' $(am__post_remove_distdir) # This target untars the dist file and tries a VPATH configuration. Then # it guarantees that the distribution is self-contained by making another # tarfile. distcheck: dist case '$(DIST_ARCHIVES)' in \ *.tar.gz*) \ eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ *.tar.bz2*) \ bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ *.tar.lz*) \ lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ *.tar.xz*) \ xz -dc $(distdir).tar.xz | $(am__untar) ;;\ *.tar.Z*) \ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ *.shar.gz*) \ eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ *.zip*) \ unzip $(distdir).zip ;;\ *.tar.zst*) \ zstd -dc $(distdir).tar.zst | $(am__untar) ;;\ esac chmod -R a-w $(distdir) chmod u+w $(distdir) mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst chmod a-w $(distdir) test -d $(distdir)/_build || exit 0; \ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && am__cwd=`pwd` \ && $(am__cd) $(distdir)/_build/sub \ && ../../configure \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \ --srcdir=../.. --prefix="$$dc_install_base" \ && $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) $(AM_DISTCHECK_DVI_TARGET) \ && $(MAKE) $(AM_MAKEFLAGS) check \ && $(MAKE) $(AM_MAKEFLAGS) install \ && $(MAKE) $(AM_MAKEFLAGS) installcheck \ && $(MAKE) $(AM_MAKEFLAGS) uninstall \ && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ distuninstallcheck \ && chmod -R a-w "$$dc_install_base" \ && ({ \ (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ } || { rm -rf "$$dc_destdir"; exit 1; }) \ && rm -rf "$$dc_destdir" \ && $(MAKE) $(AM_MAKEFLAGS) dist \ && rm -rf $(DIST_ARCHIVES) \ && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ && cd "$$am__cwd" \ || exit 1 $(am__post_remove_distdir) @(echo "$(distdir) archives ready for distribution: "; \ list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' distuninstallcheck: @test -n '$(distuninstallcheck_dir)' || { \ echo 'ERROR: trying to run $@ with an empty' \ '$$(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ $(am__cd) '$(distuninstallcheck_dir)' || { \ echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left after uninstall:" ; \ if test -n "$(DESTDIR)"; then \ echo " (check DESTDIR support)"; \ fi ; \ $(distuninstallcheck_listfiles) ; \ exit 1; } >&2 distcleancheck: distclean @if test '$(srcdir)' = . ; then \ echo "ERROR: distcleancheck can only run from a VPATH build" ; \ exit 1 ; \ fi @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left in build directory after distclean:" ; \ $(distcleancheck_listfiles) ; \ exit 1; } >&2 check-am: all-am check: check-recursive @STARPU_DEVEL_FALSE@all-local: all-am: Makefile $(DATA) $(HEADERS) all-local installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(txtdir)" "$(DESTDIR)$(versincludedir)" "$(DESTDIR)$(versincludedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -f Makefile distclean-am: clean-am distclean-generic distclean-hdr \ distclean-libtool distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-moduleDATA install-nodist_versincludeHEADERS \ install-pkgconfigDATA install-txtDATA \ install-versincludeHEADERS install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(top_srcdir)/autom4te.cache -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-moduleDATA uninstall-nodist_versincludeHEADERS \ uninstall-pkgconfigDATA uninstall-txtDATA \ uninstall-versincludeHEADERS .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ am--refresh check check-am clean clean-cscope clean-generic \ clean-libtool cscope cscopelist-am ctags ctags-am dist \ dist-all dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ \ dist-xz dist-zip dist-zstd distcheck distclean \ distclean-generic distclean-hdr distclean-libtool \ distclean-tags distcleancheck distdir distuninstallcheck dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-moduleDATA \ install-nodist_versincludeHEADERS install-pdf install-pdf-am \ install-pkgconfigDATA install-ps install-ps-am install-strip \ install-txtDATA install-versincludeHEADERS installcheck \ installcheck-am installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ uninstall-am uninstall-moduleDATA \ uninstall-nodist_versincludeHEADERS uninstall-pkgconfigDATA \ uninstall-txtDATA uninstall-versincludeHEADERS .PRECIOUS: Makefile @STARPU_DEVEL_TRUE@all-local: @STARPU_DEVEL_TRUE@ @if $(GREP) -r sys/time.h $$( find $(srcdir)/examples $(srcdir)/tests $(srcdir)/src $(srcdir)/mpi/src $(srcdir)/include -name \*.[ch] -a \! -name starpu_util.h -a \! -name timer.h -a \! -name loader.c ) ; \ @STARPU_DEVEL_TRUE@ then \ @STARPU_DEVEL_TRUE@ echo "Please do not include sys/time, it is not available on Windows, include starpu_util.h and use starpu_timing_now() instead" ; \ @STARPU_DEVEL_TRUE@ false ; \ @STARPU_DEVEL_TRUE@ fi @STARPU_DEVEL_TRUE@ @if $(GREP) -re '\' $$( find $(srcdir)/src $(srcdir)/mpi/src $(srcdir)/include -name \*.[ch] -a \! -name starpu_util.h -a \! -name utils.c -a \! -name simgrid.h) ; \ @STARPU_DEVEL_TRUE@ then \ @STARPU_DEVEL_TRUE@ echo "Please do not use getenv, use starpu_getenv instead, which catches unsafe uses"; \ @STARPU_DEVEL_TRUE@ false ; \ @STARPU_DEVEL_TRUE@ fi # we count the number of files which include unistd.h # we count the number of files which properly include unistd.h i.e by first detecting if it's available # and then we check both numbers are the same ...a @STARPU_DEVEL_TRUE@ @UNISTD_ALL_LINES=$(shell $(GREP) -B1 -rs "^#include " $(srcdir)/src/ $(srcdir)/include/ $(srcdir)/mpi/src $(srcdir)/mpi/include |$(GREP) -v dolib|$(GREP) -v -e "--" | tr '\012' '@' | $(SED) 's/unistd.h>@/unistd.h>\n/g' | wc -l | tr -d ' ') ;\ @STARPU_DEVEL_TRUE@ UNISTD_CORRECT_LINES=$(shell $(GREP) -B1 -rs "^#include " $(srcdir)/src/ $(srcdir)/include/ $(srcdir)/mpi/src $(srcdir)/mpi/include |$(GREP) -v dolib|$(GREP) -v -e "--" | tr '\012' '@' | $(SED) 's/unistd.h>@/unistd.h>\n/g' | $(GREP) '#ifdef .*HAVE_UNISTD_H.*:#include ' | wc -l | tr -d ' ') ;\ @STARPU_DEVEL_TRUE@ if test $$UNISTD_ALL_LINES -ne $$UNISTD_CORRECT_LINES ; \ @STARPU_DEVEL_TRUE@ then \ @STARPU_DEVEL_TRUE@ echo "Please do not unconditionally include unistd.h, it is not available on Windows, include config.h and test for HAVE_UNISTD_H" ; \ @STARPU_DEVEL_TRUE@ false ; \ @STARPU_DEVEL_TRUE@ fi # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET ctags-local: cd $(top_srcdir) ; $(CTAGS) -R -I LIST_TYPE $(SED) -i $(top_srcdir)/tags -e '/^[^ ]* [^ ]* /d' -e '/^[^ ]*$$/d' # Generate a cyclomatic complexity report. Note that examples and tests are # excluded because they're not particularly relevant, and more importantly # they all have a function called `main', which clobbers the report. cyclomatic-complexity.html: $(PMCCABE) \ `find \( -name examples -o -name tests -o -path ./tools/dev/experimental \) -prune -o -name \*.c` \ | sort -nr \ | $(AWK) -f ${top_srcdir}/build-aux/pmccabe2html \ -v lang=html -v name="$(PACKAGE_NAME)" \ -v vcurl=$(VC_URL) \ -v url="$(PACKAGE_URL)" \ -v css=${top_srcdir}/build-aux/pmccabe.css \ -v cut_dir=${top_srcdir}/ \ > $@-tmp mv $@-tmp $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/README.dev000066400000000000000000000145211507764646700154500ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # Contents ======== - Directory structure - Developer Warnings - Naming Conventions - Coding Style - Error handling - Makefile.am - Writing a new driver Directory structure ------------------- The directory structure is as follows: - src : internal source for StarPU - include : public API - tests : unitary tests - examples : examples using StarPU - doc : documentation for StarPU - tools : tools for StarPU StarPU extensions have their own directory (src/include/tests/examples) structure: - mpi : The MPI support - socl : the StarPU OpenCL-compatible interface - sc_hypervisor : The Scheduling Context Hypervisor - starpufft : The FFT support - eclipse-plugin : The Eclipse Plugin - starpupy : The StarPU Python Interface - starpurm : The StarPU Resource Manager Some directories contain only build system details: - build-aux - m4 - autom4te.cache Developer Warnings ------------------ They are enabled only if the STARPU_DEVEL environment variable is defined to a non-empty value, when calling configure. Tests ----- Please do try make check, at least with ./configure --enable-quick-check If a test fails, you can run it specifically again with make check TESTS=the_test You can also re-run only the failing tests with make recheck Naming Conventions ------------------ * Prefix names of public objects (types, functions, etc.) with "starpu" * Prefix names of internal objects (types, functions, etc.) with "_starpu" * Names for qualified types (struct, union, enum) do not end with _t, _s or similar. Use _t only for typedef types, such as opaque public types, e.g typedef struct _starpu_data_state* starpu_data_handle_t; or typedef uint64_t starpu_tag_t; * When a variable can only take a finite set of values, use an enum type instead of defining macros for each of the values. Coding Style ------------ * Curly braces always go on a new line Error handling -------------- * Use STARPU_ABORT() for catastrophic errors, from which StarPU will never recover. switch (node_kind) { case STARPU_CPU_RAM: do_stg(); break; ... default: /* We cannot be here */ STARPU_ABORT(); } * Use STARPU_ASSERT() to run checks that are very likely to succeed, but still are useful for debugging purposes. It should be OK to disable them with --enable-fast. STARPU_ASSERT(j->terminated != 0) * Use STARPU_ASSERT_MSG() to run checks that might not succeed, and notably due to application programming error. The additional message parameter should guide the programmer into fixing their error. Documentation ------------- When adding a feature, we want four kinds of documentation: * Announcing the feature in ChangeLog. * At least one working example in examples/, or at least a working test in tests/. Ideally enough examples and tests to cover all the various features. * A section in the Doxygen documentation, that explains in which case the feature is useful and how to use it, and points to the abovementioned example/test. It should cover all aspects of the feature, so programmers don't have to look into the .h file or reference documentation to discover features. It however does not need to dive into all details, that can be provided in the next documentation. * Doxygen comments along the declarations in the .h file. These should document each macro, enum, function, function parameter, flag, etc. And refer to the abovementioned section so that somebody who finds some function/macro/etc. can easily know what that is all about. Makefile.am ----------- Dependency libraries are appended to LIBS. Only real LDFLAGS such as -no-undefined go to LDFLAGS. If a program foo needs more libraries, it can put then in foo_LDADD. (No, AM_LDADD does not exist) All install rules must use $(DESTDIR) so that ./configure --prefix=/usr && make && make install DESTDIR=/tmp/foobar can properly work, as it is used by distributions. That can easily checked by *not* running it as root. Writing a new driver -------------------- Writing a new driver is essentially: - Creating an src/drivers/yourdriver/ and adding it to src/Makefile.am You can pick up src/drivers/cuda/driver_cuda0.c as an example of very basic driver which should be relatively easy to get working. Once you have it working you can try to get inspiration from src/drivers/cuda/driver_cuda1.c to implement asynchronous data and kernel execution. - Adding fields in struct starpu_conf and struct starpu_codelet. - Adding cases in src/core/task.c, look for _CUDA for an example. - Adding initialization calls in src/core/topology.c, look for _CUDA for an example. - Adding cases in src/core/worker.c, look for _CUDA for an example. - Adding the case in src/datawizard/reduction.c, look for _CUDA for an example. - There are a few "Driver porters" notes in the code. - TODO: task & bus performance model For now the simplest is not to implement performance models. We'll rework the support to make it very generic. - Other places can be extended to add features: asynchronous data transfers, energy measurement, multiformat, memory mapping Adding a new FXT state ---------------------- This consists in: - Adding a code number in src/common/fxt.h - Adding the callable runtime macro in src/common/fxt.h - Calling these macros in the wanted place in the runtime - Adding a paje state in states_list src/debug/traces/starpu_fxt.c and in src/debug/traces/starpu_paje.c - Adding the management of the code in _starpu_fxt_parse_new_file, usually calling a function that does the actual paje state addition (a push/pop pair or two state sets) A simple example can be found in 28740e7a91a2 ("Add a Parallel sync state"). starpu-1.4.9+dfsg/README.md000066400000000000000000000140551507764646700152740ustar00rootroot00000000000000 # StarPU: A Unified Runtime System for Heterogeneous Multicore Architectures ## What is StarPU? StarPU is a runtime system that offers support for heterogeneous multicore machines. While many efforts are devoted to design efficient computation kernels for those architectures (e.g. to implement BLAS kernels on GPUs), StarPU not only takes care of offloading such kernels (and implementing data coherency across the machine), but it also makes sure the kernels are executed as efficiently as possible. ## What StarPU is not StarPU is not a new language, and it does not extend existing languages either. StarPU does not help to write computation kernels. ## (How) Could StarPU help me? While StarPU will not make it easier to write computation kernels, it does simplify their actual offloading as StarPU handle most low level aspects transparently. Obviously, it is crucial to have efficient kernels, but it must be noted that the way those kernels are mapped and scheduled onto the computational resources also affect the overall performance to a great extent. StarPU is especially helpful when considering multiple heterogeneous processing resources: statically mapping and synchronizing tasks in such a heterogeneous environment is already very difficult, making it in a portable way is virtually impossible. On the other hand, the scheduling capabilities of StarPU makes it possible to easily exploit all processors at the same time while taking advantage of their specificities in a portable fashion. ## Requirements * `make` * `gcc` (version >= 4.1) * if `CUDA` support is enabled * `CUDA` (version >= 2.2) * `CUBLAS` (version >= 2.2) * if `OpenCL` support is enabled * `AMD` SDK >= 2.3 if `AMD` driver is used * `CUDA` >= 3.2 if `NVIDIA` driver is used * extra requirements for the `git` version (we usually use the Debian testing versions) * `autoconf` (version >= 2.60) * `automake` * `makeinfo` * `libtool` (version >= 2) Remark: It is strongly recommended that you also install the hwloc library before installing StarPU. This permits StarPU to actually map the processing units according to the machine topology. For more details on hwloc, see http://www.open-mpi.org/projects/hwloc/ . ## Getting StarPU StarPU is available on https://gitlab.inria.fr/starpu/starpu The GIT repository access can be checked out with the following command. ```shell $ git clone https://gitlab.inria.fr/starpu/starpu.git ``` ## Building and Installing ### For git version only Please skip this step if you are building from a tarball. ```shell $ ./autogen.sh ``` ### For all versions ```shell $ mkdir build && cd build $ ../configure $ make $ make install ``` ### Windows build StarPU can be built using MinGW or Cygwin. To avoid the cygwin dependency, we provide MinGW-built binaries. The build process produces `libstarpu.dll`, `libstarpu.def`, and `libstarpu.lib`, which should be enough to use it from e.g. Microsoft Visual Studio. Update the video drivers to the latest stable release available for your hardware. Old ATI drivers (< 2.3) contain bugs that cause OpenCL support in StarPU to hang or exhibit incorrect behaviour. For details on the Windows build process, see the [INSTALL](https://gitlab.inria.fr/starpu/starpu/-/blob/master/INSTALL) file. ## Running StarPU Applications on Microsoft Visual C Batch files are provided to run StarPU applications under Microsoft Visual C. They are installed in `path_to_starpu/bin/msvc`. To execute a StarPU application, you first need to set the environment variable `STARPU_PATH`. ```shell c:\....> cd c:\cygwin\home\ci\starpu\ c:\....> set STARPU_PATH=c:\cygwin\home\ci\starpu\ c:\....> cd bin\msvc c:\....> starpu_open.bat starpu_simple.c ``` The batch script will run Microsoft Visual C with a basic project file to run the given application. The batch script `starpu_clean.bat` can be used to delete all compilation generated files. The batch script `starpu_exec.bat` can be used to compile and execute a StarPU application from the command prompt. ```shell c:\....> cd c:\cygwin\home\ci\starpu\ c:\....> set STARPU_PATH=c:\cygwin\home\ci\starpu\ c:\....> cd bin\msvc c:\....> starpu_exec.bat ..\..\..\..\examples\basic_examples\hello_world.c MSVC StarPU Execution ... /out:hello_world.exe ... Hello world (params = {1, 2.00000}) Callback function got argument 0000042 c:\....> ``` ## Documentation Doxygen documentation is available in `doc/doxygen`. If the doxygen tools are available on the machine, pdf and html documentation can be generated by running ```shell $ make -C doc ``` The [documentation for the latest StarPU release](https://files.inria.fr/starpu/doc/html/) is available, as well as the [documentation for the StarPU master branch](https://files.inria.fr/starpu/testing/master/doc/html/). ## Trying Some examples ready to run are installed into `$prefix/lib/starpu/{examples,mpi}` ## Upgrade To upgrade your source code from older version (there were quite a few renamings), use the `tools/dev/rename.sh` script. ## Contribute Contributions are welcome! Both on the [main StarPU repository](https://gitlab.inria.fr/starpu/starpu) and on the [github StarPU mirror](https://github.com/starpu-runtime/starpu) Please see [our contribution page](https://starpu.gitlabpages.inria.fr/involved.html) for details. ## Contact For any questions regarding StarPU, please contact the starpu-devel mailing-list at starpu-devel@inria.fr or browse [the StarPU website](https://starpu.gitlabpages.inria.fr/). starpu-1.4.9+dfsg/STARPU-REVISION000066400000000000000000000001031507764646700161770ustar00rootroot000000000000000088e791ca295ec0482b835ff083c4777da8e2e5 (HEAD, tag: starpu-1.4.9) starpu-1.4.9+dfsg/STARPU-VERSION000066400000000000000000000066501507764646700161030ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. # Versioning (SONAMEs) for StarPU libraries. # http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html#Updating-version-info # Here are a set of rules to help you update your library version information: # Start with version information of ‘0:0:0’ for each libtool library. # Update the version information only immediately before a public # release of your software. More frequent updates are unnecessary, and # only guarantee that the current interface number gets larger faster. # - If the library source code has changed at all since the last # update, then increment revision (‘c:r:a’ becomes ‘c:r+1:a’). # - If any interfaces have been added, removed, or changed since the # last update, increment current, and set revision to 0. # - If any interfaces have been added since the last public release, # then increment age. # - If any interfaces have been removed or changed since the last # public release, then set age to 0. change # This is the tarball version, major.minor STARPU_EFFECTIVE_VERSION=1.4 # Note for StarPU 1.1: we have changed ABI # Note for StarPU 1.2: reset everything to 0:0:0 # Libtool interface versioning (info "(libtool) Versioning"). LIBSTARPU_INTERFACE_CURRENT=8 # increment upon ABI change LIBSTARPU_INTERFACE_REVISION=0 # increment upon implementation change LIBSTARPU_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface LIBSTARPUFFT_INTERFACE_CURRENT=1 # increment upon ABI change LIBSTARPUFFT_INTERFACE_REVISION=0 # increment upon implementation change LIBSTARPUFFT_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface LIBSTARPUMPI_INTERFACE_CURRENT=3 # increment upon ABI change LIBSTARPUMPI_INTERFACE_REVISION=2 # increment upon implementation change LIBSTARPUMPI_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface LIBSOCL_INTERFACE_CURRENT=1 # increment upon ABI change LIBSOCL_INTERFACE_REVISION=2 # increment upon implementation change LIBSOCL_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface LIBSTARPURM_INTERFACE_CURRENT=1 # increment upon ABI change LIBSTARPURM_INTERFACE_REVISION=0 # increment upon implementation change LIBSTARPURM_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT=1 # increment upon ABI change LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION=0 # increment upon implementation change LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface LIBSTARPUJULIA_INTERFACE_CURRENT=1 # increment upon ABI change LIBSTARPUJULIA_INTERFACE_REVISION=0 # increment upon implementation change LIBSTARPUJULIA_INTERFACE_AGE=0 # set to CURRENT - PREVIOUS interface starpu-1.4.9+dfsg/TODO000066400000000000000000000020311507764646700144740ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # Moving access modes for data handles from struct starpu_task to struct starpu_codelet ===================================================================================== TODO list - Make struct starpu_data_descr private (or not, as it can still be used in tests and examples) - When cost_model is provided, but not cost_function, need to rebuild a struct starpu_data_descr starpu-1.4.9+dfsg/aclocal.m4000066400000000000000000001271051507764646700156560ustar00rootroot00000000000000# generated automatically by aclocal 1.16.5 -*- Autoconf -*- # Copyright (C) 1996-2021 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.71],, [m4_warning([this file was generated for autoconf 2.71. You have another version of autoconf. It may work, but is not guaranteed to. If you have problems, you may need to regenerate the build system entirely. To do so, use the procedure documented by the package, typically 'autoreconf'.])]) # Copyright (C) 2002-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_AUTOMAKE_VERSION(VERSION) # ---------------------------- # Automake X.Y traces this macro to ensure aclocal.m4 has been # generated from the m4 files accompanying Automake X.Y. # (This private macro should not be called outside this file.) AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version='1.16' dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl require some minimum version. Point them to the right macro. m4_if([$1], [1.16.5], [], [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl ]) # _AM_AUTOCONF_VERSION(VERSION) # ----------------------------- # aclocal traces this macro to find the Autoconf version. # This is a private macro too. Using m4_define simplifies # the logic in aclocal, which can simply ignore this definition. m4_define([_AM_AUTOCONF_VERSION], []) # AM_SET_CURRENT_AUTOMAKE_VERSION # ------------------------------- # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], [AM_AUTOMAKE_VERSION([1.16.5])dnl m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) # Copyright (C) 2011-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_PROG_AR([ACT-IF-FAIL]) # ------------------------- # Try to determine the archiver interface, and trigger the ar-lib wrapper # if it is needed. If the detection of archiver interface fails, run # ACT-IF-FAIL (default is to abort configure with a proper error message). AC_DEFUN([AM_PROG_AR], [AC_BEFORE([$0], [LT_INIT])dnl AC_BEFORE([$0], [AC_PROG_LIBTOOL])dnl AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([ar-lib])dnl AC_CHECK_TOOLS([AR], [ar lib "link -lib"], [false]) : ${AR=ar} AC_CACHE_CHECK([the archiver ($AR) interface], [am_cv_ar_interface], [AC_LANG_PUSH([C]) am_cv_ar_interface=ar AC_COMPILE_IFELSE([AC_LANG_SOURCE([[int some_variable = 0;]])], [am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&AS_MESSAGE_LOG_FD' AC_TRY_EVAL([am_ar_try]) if test "$ac_status" -eq 0; then am_cv_ar_interface=ar else am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&AS_MESSAGE_LOG_FD' AC_TRY_EVAL([am_ar_try]) if test "$ac_status" -eq 0; then am_cv_ar_interface=lib else am_cv_ar_interface=unknown fi fi rm -f conftest.lib libconftest.a ]) AC_LANG_POP([C])]) case $am_cv_ar_interface in ar) ;; lib) # Microsoft lib, so override with the ar-lib wrapper script. # FIXME: It is wrong to rewrite AR. # But if we don't then we get into trouble of one sort or another. # A longer-term fix would be to have automake use am__AR in this case, # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something # similar. AR="$am_aux_dir/ar-lib $AR" ;; unknown) m4_default([$1], [AC_MSG_ERROR([could not determine $AR interface])]) ;; esac AC_SUBST([AR])dnl ]) # AM_AUX_DIR_EXPAND -*- Autoconf -*- # Copyright (C) 2001-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets # $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to # '$srcdir', '$srcdir/..', or '$srcdir/../..'. # # Of course, Automake must honor this variable whenever it calls a # tool from the auxiliary directory. The problem is that $srcdir (and # therefore $ac_aux_dir as well) can be either absolute or relative, # depending on how configure is run. This is pretty annoying, since # it makes $ac_aux_dir quite unusable in subdirectories: in the top # source directory, any form will work fine, but in subdirectories a # relative path needs to be adjusted first. # # $ac_aux_dir/missing # fails when called from a subdirectory if $ac_aux_dir is relative # $top_srcdir/$ac_aux_dir/missing # fails if $ac_aux_dir is absolute, # fails when called from a subdirectory in a VPATH build with # a relative $ac_aux_dir # # The reason of the latter failure is that $top_srcdir and $ac_aux_dir # are both prefixed by $srcdir. In an in-source build this is usually # harmless because $srcdir is '.', but things will broke when you # start a VPATH build or use an absolute $srcdir. # # So we could use something similar to $top_srcdir/$ac_aux_dir/missing, # iff we strip the leading $srcdir from $ac_aux_dir. That would be: # am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` # and then we would define $MISSING as # MISSING="\${SHELL} $am_aux_dir/missing" # This will work as long as MISSING is not called from configure, because # unfortunately $(top_srcdir) has no meaning in configure. # However there are other variables, like CC, which are often used in # configure, and could therefore not use this "fixed" $ac_aux_dir. # # Another solution, used here, is to always expand $ac_aux_dir to an # absolute PATH. The drawback is that using absolute paths prevent a # configured tree to be moved without reconfiguration. AC_DEFUN([AM_AUX_DIR_EXPAND], [AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl # Expand $ac_aux_dir to an absolute path. am_aux_dir=`cd "$ac_aux_dir" && pwd` ]) # AM_CONDITIONAL -*- Autoconf -*- # Copyright (C) 1997-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_CONDITIONAL(NAME, SHELL-CONDITION) # ------------------------------------- # Define a conditional. AC_DEFUN([AM_CONDITIONAL], [AC_PREREQ([2.52])dnl m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl AC_SUBST([$1_TRUE])dnl AC_SUBST([$1_FALSE])dnl _AM_SUBST_NOTMAKE([$1_TRUE])dnl _AM_SUBST_NOTMAKE([$1_FALSE])dnl m4_define([_AM_COND_VALUE_$1], [$2])dnl if $2; then $1_TRUE= $1_FALSE='#' else $1_TRUE='#' $1_FALSE= fi AC_CONFIG_COMMANDS_PRE( [if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then AC_MSG_ERROR([[conditional "$1" was never defined. Usually this means the macro was only invoked conditionally.]]) fi])]) # Copyright (C) 1999-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be # written in clear, in which case automake, when reading aclocal.m4, # will think it sees a *use*, and therefore will trigger all it's # C support machinery. Also note that it means that autoscan, seeing # CC etc. in the Makefile, will ask for an AC_PROG_CC use... # _AM_DEPENDENCIES(NAME) # ---------------------- # See how the compiler implements dependency checking. # NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". # We try a few techniques and use that to set a single cache variable. # # We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was # modified to invoke _AM_DEPENDENCIES(CC); we would have a circular # dependency, and given that the user is not expected to run this macro, # just rely on AC_PROG_CC. AC_DEFUN([_AM_DEPENDENCIES], [AC_REQUIRE([AM_SET_DEPDIR])dnl AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl AC_REQUIRE([AM_MAKE_INCLUDE])dnl AC_REQUIRE([AM_DEP_TRACK])dnl m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], [$1], [CXX], [depcc="$CXX" am_compiler_list=], [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], [$1], [UPC], [depcc="$UPC" am_compiler_list=], [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], [depcc="$$1" am_compiler_list=]) AC_CACHE_CHECK([dependency style of $depcc], [am_cv_$1_dependencies_compiler_type], [if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_$1_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` fi am__universal=false m4_case([$1], [CC], [case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac], [CXX], [case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac]) for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_$1_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_$1_dependencies_compiler_type=none fi ]) AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) AM_CONDITIONAL([am__fastdep$1], [ test "x$enable_dependency_tracking" != xno \ && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) ]) # AM_SET_DEPDIR # ------------- # Choose a directory name for dependency files. # This macro is AC_REQUIREd in _AM_DEPENDENCIES. AC_DEFUN([AM_SET_DEPDIR], [AC_REQUIRE([AM_SET_LEADING_DOT])dnl AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl ]) # AM_DEP_TRACK # ------------ AC_DEFUN([AM_DEP_TRACK], [AC_ARG_ENABLE([dependency-tracking], [dnl AS_HELP_STRING( [--enable-dependency-tracking], [do not reject slow dependency extractors]) AS_HELP_STRING( [--disable-dependency-tracking], [speeds up one-time build])]) if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' am__nodep='_no' fi AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) AC_SUBST([AMDEPBACKSLASH])dnl _AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl AC_SUBST([am__nodep])dnl _AM_SUBST_NOTMAKE([am__nodep])dnl ]) # Generate code to set up dependency tracking. -*- Autoconf -*- # Copyright (C) 1999-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_OUTPUT_DEPENDENCY_COMMANDS # ------------------------------ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], [{ # Older Autoconf quotes --file arguments for eval, but not when files # are listed without --file. Let's play safe and only enable the eval # if we detect the quoting. # TODO: see whether this extra hack can be removed once we start # requiring Autoconf 2.70 or later. AS_CASE([$CONFIG_FILES], [*\'*], [eval set x "$CONFIG_FILES"], [*], [set x $CONFIG_FILES]) shift # Used to flag and report bootstrapping failures. am_rc=0 for am_mf do # Strip MF so we end up with the name of the file. am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile which includes # dependency-tracking related rules and includes. # Grep'ing the whole file directly is not great: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ || continue am_dirpart=`AS_DIRNAME(["$am_mf"])` am_filepart=`AS_BASENAME(["$am_mf"])` AM_RUN_LOG([cd "$am_dirpart" \ && sed -e '/# am--include-marker/d' "$am_filepart" \ | $MAKE -f - am--depfiles]) || am_rc=$? done if test $am_rc -ne 0; then AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments for automatic dependency tracking. If GNU make was not used, consider re-running the configure script with MAKE="gmake" (or whatever is necessary). You can also try re-running configure with the '--disable-dependency-tracking' option to at least be able to build the package (albeit without support for automatic dependency tracking).]) fi AS_UNSET([am_dirpart]) AS_UNSET([am_filepart]) AS_UNSET([am_mf]) AS_UNSET([am_rc]) rm -f conftest-deps.mk } ])# _AM_OUTPUT_DEPENDENCY_COMMANDS # AM_OUTPUT_DEPENDENCY_COMMANDS # ----------------------------- # This macro should only be invoked once -- use via AC_REQUIRE. # # This code is only required when automatic dependency tracking is enabled. # This creates each '.Po' and '.Plo' makefile fragment that we'll need in # order to bootstrap the dependency handling code. AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], [AC_CONFIG_COMMANDS([depfiles], [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])]) # Do all the work for Automake. -*- Autoconf -*- # Copyright (C) 1996-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This macro actually does too much. Some checks are only needed if # your package does certain things. But this isn't really a big deal. dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. m4_define([AC_PROG_CC], m4_defn([AC_PROG_CC]) [_AM_PROG_CC_C_O ]) # AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) # AM_INIT_AUTOMAKE([OPTIONS]) # ----------------------------------------------- # The call with PACKAGE and VERSION arguments is the old style # call (pre autoconf-2.50), which is being phased out. PACKAGE # and VERSION should now be passed to AC_INIT and removed from # the call to AM_INIT_AUTOMAKE. # We support both call styles for the transition. After # the next Automake release, Autoconf can make the AC_INIT # arguments mandatory, and then we can depend on a new Autoconf # release and drop the old call support. AC_DEFUN([AM_INIT_AUTOMAKE], [AC_PREREQ([2.65])dnl m4_ifdef([_$0_ALREADY_INIT], [m4_fatal([$0 expanded multiple times ]m4_defn([_$0_ALREADY_INIT]))], [m4_define([_$0_ALREADY_INIT], m4_expansion_stack)])dnl dnl Autoconf wants to disallow AM_ names. We explicitly allow dnl the ones we care about. m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl AC_REQUIRE([AC_PROG_INSTALL])dnl if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl # test to see if srcdir already configured if test -f $srcdir/config.status; then AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi AC_SUBST([CYGPATH_W]) # Define the identity of the package. dnl Distinguish between old-style and new-style calls. m4_ifval([$2], [AC_DIAGNOSE([obsolete], [$0: two- and three-arguments forms are deprecated.]) m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl AC_SUBST([PACKAGE], [$1])dnl AC_SUBST([VERSION], [$2])], [_AM_SET_OPTIONS([$1])dnl dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. m4_if( m4_ifset([AC_PACKAGE_NAME], [ok]):m4_ifset([AC_PACKAGE_VERSION], [ok]), [ok:ok],, [m4_fatal([AC_INIT should be called with package and version arguments])])dnl AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl _AM_IF_OPTION([no-define],, [AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl # Some tools Automake needs. AC_REQUIRE([AM_SANITY_CHECK])dnl AC_REQUIRE([AC_ARG_PROGRAM])dnl AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) AM_MISSING_PROG([AUTOCONF], [autoconf]) AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) AM_MISSING_PROG([AUTOHEADER], [autoheader]) AM_MISSING_PROG([MAKEINFO], [makeinfo]) AC_REQUIRE([AM_PROG_INSTALL_SH])dnl AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl AC_REQUIRE([AC_PROG_MKDIR_P])dnl # For better backward compatibility. To be removed once Automake 1.9.x # dies out for good. For more background, see: # # AC_SUBST([mkdir_p], ['$(MKDIR_P)']) # We need awk for the "check" target (and possibly the TAP driver). The # system "awk" is bad on some platforms. AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([AC_PROG_MAKE_SET])dnl AC_REQUIRE([AM_SET_LEADING_DOT])dnl _AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], [_AM_PROG_TAR([v7])])]) _AM_IF_OPTION([no-dependencies],, [AC_PROVIDE_IFELSE([AC_PROG_CC], [_AM_DEPENDENCIES([CC])], [m4_define([AC_PROG_CC], m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl AC_PROVIDE_IFELSE([AC_PROG_CXX], [_AM_DEPENDENCIES([CXX])], [m4_define([AC_PROG_CXX], m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl AC_PROVIDE_IFELSE([AC_PROG_OBJC], [_AM_DEPENDENCIES([OBJC])], [m4_define([AC_PROG_OBJC], m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], [_AM_DEPENDENCIES([OBJCXX])], [m4_define([AC_PROG_OBJCXX], m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl ]) # Variables for tags utilities; see am/tags.am if test -z "$CTAGS"; then CTAGS=ctags fi AC_SUBST([CTAGS]) if test -z "$ETAGS"; then ETAGS=etags fi AC_SUBST([ETAGS]) if test -z "$CSCOPE"; then CSCOPE=cscope fi AC_SUBST([CSCOPE]) AC_REQUIRE([AM_SILENT_RULES])dnl dnl The testsuite driver may need to know about EXEEXT, so add the dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. AC_CONFIG_COMMANDS_PRE(dnl [m4_provide_if([_AM_COMPILER_EXEEXT], [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl # POSIX will say in a future version that running "rm -f" with no argument # is OK; and we want to be able to make that assumption in our Makefile # recipes. So use an aggressive probe to check that the usage we want is # actually supported "in the wild" to an acceptable degree. # See automake bug#10828. # To make any issue more visible, cause the running configure to be aborted # by default if the 'rm' program in use doesn't match our expectations; the # user can still override this though. if rm -f && rm -fr && rm -rf; then : OK; else cat >&2 <<'END' Oops! Your 'rm' program seems unable to run without file operands specified on the command line, even when the '-f' option is present. This is contrary to the behaviour of most rm programs out there, and not conforming with the upcoming POSIX standard: Please tell bug-automake@gnu.org about your system, including the value of your $PATH and any error possibly output before this message. This can help us improve future automake versions. END if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then echo 'Configuration will proceed anyway, since you have set the' >&2 echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 echo >&2 else cat >&2 <<'END' Aborting the configuration process, to ensure you take notice of the issue. You can download and install GNU coreutils to get an 'rm' implementation that behaves properly: . If you want to complete the configuration process using your problematic 'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM to "yes", and re-run configure. END AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) fi fi dnl The trailing newline in this macro's definition is deliberate, for dnl backward compatibility and to allow trailing 'dnl'-style comments dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. ]) dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further dnl mangled by Autoconf and run in a shell conditional statement. m4_define([_AC_COMPILER_EXEEXT], m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) # When config.status generates a header, we must update the stamp-h file. # This file resides in the same directory as the config header # that is generated. The stamp files are numbered to have different names. # Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the # loop where config.status creates the headers, so we can generate # our stamp files there. AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], [# Compute $1's index in $config_headers. _am_arg=$1 _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $_am_arg | $_am_arg:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) # Copyright (C) 2001-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_PROG_INSTALL_SH # ------------------ # Define $install_sh. AC_DEFUN([AM_PROG_INSTALL_SH], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl if test x"${install_sh+set}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; *) install_sh="\${SHELL} $am_aux_dir/install-sh" esac fi AC_SUBST([install_sh])]) # Copyright (C) 2003-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # Check whether the underlying file-system supports filenames # with a leading dot. For instance MS-DOS doesn't. AC_DEFUN([AM_SET_LEADING_DOT], [rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null AC_SUBST([am__leading_dot])]) # Check to see how 'make' treats includes. -*- Autoconf -*- # Copyright (C) 2001-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_MAKE_INCLUDE() # ----------------- # Check whether make has an 'include' directive that can support all # the idioms we need for our automatic dependency tracking code. AC_DEFUN([AM_MAKE_INCLUDE], [AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive]) cat > confinc.mk << 'END' am__doit: @echo this is the am__doit target >confinc.out .PHONY: am__doit END am__include="#" am__quote= # BSD make does it like this. echo '.include "confinc.mk" # ignored' > confmf.BSD # Other make implementations (GNU, Solaris 10, AIX) do it like this. echo 'include confinc.mk # ignored' > confmf.GNU _am_result=no for s in GNU BSD; do AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out]) AS_CASE([$?:`cat confinc.out 2>/dev/null`], ['0:this is the am__doit target'], [AS_CASE([$s], [BSD], [am__include='.include' am__quote='"'], [am__include='include' am__quote=''])]) if test "$am__include" != "#"; then _am_result="yes ($s style)" break fi done rm -f confinc.* confmf.* AC_MSG_RESULT([${_am_result}]) AC_SUBST([am__include])]) AC_SUBST([am__quote])]) # Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- # Copyright (C) 1997-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_MISSING_PROG(NAME, PROGRAM) # ------------------------------ AC_DEFUN([AM_MISSING_PROG], [AC_REQUIRE([AM_MISSING_HAS_RUN]) $1=${$1-"${am_missing_run}$2"} AC_SUBST($1)]) # AM_MISSING_HAS_RUN # ------------------ # Define MISSING if not defined so far and test if it is modern enough. # If it is, set am_missing_run to use it, otherwise, to nothing. AC_DEFUN([AM_MISSING_HAS_RUN], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([missing])dnl if test x"${MISSING+set}" != xset; then MISSING="\${SHELL} '$am_aux_dir/missing'" fi # Use eval to expand $SHELL if eval "$MISSING --is-lightweight"; then am_missing_run="$MISSING " else am_missing_run= AC_MSG_WARN(['missing' script is too old or missing]) fi ]) # Helper functions for option handling. -*- Autoconf -*- # Copyright (C) 2001-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_MANGLE_OPTION(NAME) # ----------------------- AC_DEFUN([_AM_MANGLE_OPTION], [[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) # _AM_SET_OPTION(NAME) # -------------------- # Set option NAME. Presently that only means defining a flag for this option. AC_DEFUN([_AM_SET_OPTION], [m4_define(_AM_MANGLE_OPTION([$1]), [1])]) # _AM_SET_OPTIONS(OPTIONS) # ------------------------ # OPTIONS is a space-separated list of Automake options. AC_DEFUN([_AM_SET_OPTIONS], [m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) # _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) # ------------------------------------------- # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. AC_DEFUN([_AM_IF_OPTION], [m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) # Copyright (C) 1999-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_PROG_CC_C_O # --------------- # Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC # to automatically call this. AC_DEFUN([_AM_PROG_CC_C_O], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([compile])dnl AC_LANG_PUSH([C])dnl AC_CACHE_CHECK( [whether $CC understands -c and -o together], [am_cv_prog_cc_c_o], [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) # Make sure it works both with $CC and with simple cc. # Following AC_PROG_CC_C_O, we do the test twice because some # compilers refuse to overwrite an existing .o file with -o, # though they will create one. am_cv_prog_cc_c_o=yes for am_i in 1 2; do if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ && test -f conftest2.$ac_objext; then : OK else am_cv_prog_cc_c_o=no break fi done rm -f core conftest* unset am_i]) if test "$am_cv_prog_cc_c_o" != yes; then # Losing compiler, so override with the script. # FIXME: It is wrong to rewrite CC. # But if we don't then we get into trouble of one sort or another. # A longer-term fix would be to have automake use am__CC in this case, # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" CC="$am_aux_dir/compile $CC" fi AC_LANG_POP([C])]) # For backward compatibility. AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) # Copyright (C) 2001-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_RUN_LOG(COMMAND) # ------------------- # Run COMMAND, save the exit status in ac_status, and log it. # (This has been adapted from Autoconf's _AC_RUN_LOG macro.) AC_DEFUN([AM_RUN_LOG], [{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD (exit $ac_status); }]) # Check to make sure that the build environment is sane. -*- Autoconf -*- # Copyright (C) 1996-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_SANITY_CHECK # --------------- AC_DEFUN([AM_SANITY_CHECK], [AC_MSG_CHECKING([whether build environment is sane]) # Reject unsafe characters in $srcdir or the absolute working directory # name. Accept space and tab only in the latter. am_lf=' ' case `pwd` in *[[\\\"\#\$\&\'\`$am_lf]]*) AC_MSG_ERROR([unsafe absolute working directory name]);; esac case $srcdir in *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; esac # Do 'set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( am_has_slept=no for am_try in 1 2; do echo "timestamp, slept: $am_has_slept" > conftest.file set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` if test "$[*]" = "X"; then # -L didn't work. set X `ls -t "$srcdir/configure" conftest.file` fi if test "$[*]" != "X $srcdir/configure conftest.file" \ && test "$[*]" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken alias in your environment]) fi if test "$[2]" = conftest.file || test $am_try -eq 2; then break fi # Just in case. sleep 1 am_has_slept=yes done test "$[2]" = conftest.file ) then # Ok. : else AC_MSG_ERROR([newly created file is older than distributed files! Check your system clock]) fi AC_MSG_RESULT([yes]) # If we didn't sleep, we still need to ensure time stamps of config.status and # generated files are strictly newer. am_sleep_pid= if grep 'slept: no' conftest.file >/dev/null 2>&1; then ( sleep 1 ) & am_sleep_pid=$! fi AC_CONFIG_COMMANDS_PRE( [AC_MSG_CHECKING([that generated files are newer than configure]) if test -n "$am_sleep_pid"; then # Hide warnings about reused PIDs. wait $am_sleep_pid 2>/dev/null fi AC_MSG_RESULT([done])]) rm -f conftest.file ]) # Copyright (C) 2009-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_SILENT_RULES([DEFAULT]) # -------------------------- # Enable less verbose build rules; with the default set to DEFAULT # ("yes" being less verbose, "no" or empty being verbose). AC_DEFUN([AM_SILENT_RULES], [AC_ARG_ENABLE([silent-rules], [dnl AS_HELP_STRING( [--enable-silent-rules], [less verbose build output (undo: "make V=1")]) AS_HELP_STRING( [--disable-silent-rules], [verbose build output (undo: "make V=0")])dnl ]) case $enable_silent_rules in @%:@ ((( yes) AM_DEFAULT_VERBOSITY=0;; no) AM_DEFAULT_VERBOSITY=1;; *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; esac dnl dnl A few 'make' implementations (e.g., NonStop OS and NextStep) dnl do not support nested variable expansions. dnl See automake bug#9928 and bug#10237. am_make=${MAKE-make} AC_CACHE_CHECK([whether $am_make supports nested variables], [am_cv_make_support_nested_variables], [if AS_ECHO([['TRUE=$(BAR$(V)) BAR0=false BAR1=true V=1 am__doit: @$(TRUE) .PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then am_cv_make_support_nested_variables=yes else am_cv_make_support_nested_variables=no fi]) if test $am_cv_make_support_nested_variables = yes; then dnl Using '$V' instead of '$(V)' breaks IRIX make. AM_V='$(V)' AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' else AM_V=$AM_DEFAULT_VERBOSITY AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY fi AC_SUBST([AM_V])dnl AM_SUBST_NOTMAKE([AM_V])dnl AC_SUBST([AM_DEFAULT_V])dnl AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl AC_SUBST([AM_DEFAULT_VERBOSITY])dnl AM_BACKSLASH='\' AC_SUBST([AM_BACKSLASH])dnl _AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl ]) # Copyright (C) 2001-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_PROG_INSTALL_STRIP # --------------------- # One issue with vendor 'install' (even GNU) is that you can't # specify the program used to strip binaries. This is especially # annoying in cross-compiling environments, where the build's strip # is unlikely to handle the host's binaries. # Fortunately install-sh will honor a STRIPPROG variable, so we # always use install-sh in "make install-strip", and initialize # STRIPPROG with the value of the STRIP variable (set by the user). AC_DEFUN([AM_PROG_INSTALL_STRIP], [AC_REQUIRE([AM_PROG_INSTALL_SH])dnl # Installed binaries are usually stripped using 'strip' when the user # run "make install-strip". However 'strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the 'STRIP' environment variable to overrule this program. dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. if test "$cross_compiling" != no; then AC_CHECK_TOOL([STRIP], [strip], :) fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" AC_SUBST([INSTALL_STRIP_PROGRAM])]) # Copyright (C) 2006-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_SUBST_NOTMAKE(VARIABLE) # --------------------------- # Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. # This macro is traced by Automake. AC_DEFUN([_AM_SUBST_NOTMAKE]) # AM_SUBST_NOTMAKE(VARIABLE) # -------------------------- # Public sister of _AM_SUBST_NOTMAKE. AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) # Check how to create a tarball. -*- Autoconf -*- # Copyright (C) 2004-2021 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_PROG_TAR(FORMAT) # -------------------- # Check how to create a tarball in format FORMAT. # FORMAT should be one of 'v7', 'ustar', or 'pax'. # # Substitute a variable $(am__tar) that is a command # writing to stdout a FORMAT-tarball containing the directory # $tardir. # tardir=directory && $(am__tar) > result.tar # # Substitute a variable $(am__untar) that extract such # a tarball read from stdin. # $(am__untar) < result.tar # AC_DEFUN([_AM_PROG_TAR], [# Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AC_SUBST([AMTAR], ['$${TAR-tar}']) # We'll loop over all known methods to create a tar archive until one works. _am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' m4_if([$1], [v7], [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], [m4_case([$1], [ustar], [# The POSIX 1988 'ustar' format is defined with fixed-size fields. # There is notably a 21 bits limit for the UID and the GID. In fact, # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 # and bug#13588). am_max_uid=2097151 # 2^21 - 1 am_max_gid=$am_max_uid # The $UID and $GID variables are not portable, so we need to resort # to the POSIX-mandated id(1) utility. Errors in the 'id' calls # below are definitely unexpected, so allow the users to see them # (that is, avoid stderr redirection). am_uid=`id -u || echo unknown` am_gid=`id -g || echo unknown` AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) if test $am_uid -le $am_max_uid; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) _am_tools=none fi AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) if test $am_gid -le $am_max_gid; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) _am_tools=none fi], [pax], [], [m4_fatal([Unknown tar format])]) AC_MSG_CHECKING([how to create a $1 tar archive]) # Go ahead even if we have the value already cached. We do so because we # need to set the values for the 'am__tar' and 'am__untar' variables. _am_tools=${am_cv_prog_tar_$1-$_am_tools} for _am_tool in $_am_tools; do case $_am_tool in gnutar) for _am_tar in tar gnutar gtar; do AM_RUN_LOG([$_am_tar --version]) && break done am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' am__untar="$_am_tar -xf -" ;; plaintar) # Must skip GNU tar: if it does not support --format= it doesn't create # ustar tarball either. (tar --version) >/dev/null 2>&1 && continue am__tar='tar chf - "$$tardir"' am__tar_='tar chf - "$tardir"' am__untar='tar xf -' ;; pax) am__tar='pax -L -x $1 -w "$$tardir"' am__tar_='pax -L -x $1 -w "$tardir"' am__untar='pax -r' ;; cpio) am__tar='find "$$tardir" -print | cpio -o -H $1 -L' am__tar_='find "$tardir" -print | cpio -o -H $1 -L' am__untar='cpio -i -H $1 -d' ;; none) am__tar=false am__tar_=false am__untar=false ;; esac # If the value was cached, stop now. We just wanted to have am__tar # and am__untar set. test -n "${am_cv_prog_tar_$1}" && break # tar/untar a dummy directory, and stop if the command works. rm -rf conftest.dir mkdir conftest.dir echo GrepMe > conftest.dir/file AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) rm -rf conftest.dir if test -s conftest.tar; then AM_RUN_LOG([$am__untar /dev/null 2>&1 && break fi done rm -rf conftest.dir AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) AC_MSG_RESULT([$am_cv_prog_tar_$1])]) AC_SUBST([am__tar]) AC_SUBST([am__untar]) ]) # _AM_PROG_TAR m4_include([m4/acinclude.m4]) m4_include([m4/ax_cxx_compile_stdcxx.m4]) m4_include([m4/ax_dlb_callback_arg.m4]) m4_include([m4/libs.m4]) m4_include([m4/libtool.m4]) m4_include([m4/ltoptions.m4]) m4_include([m4/ltsugar.m4]) m4_include([m4/ltversion.m4]) m4_include([m4/lt~obsolete.m4]) m4_include([m4/pkg.m4]) starpu-1.4.9+dfsg/autogen.sh000077500000000000000000000017111507764646700160110ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if ! libtool --version > /dev/null then # Perhaps we are on a Mac if ! glibtool --version > /dev/null then echo "GNU Libtool is missing, please install it and fix the PATH to it." exit 1 else export LIBTOOL=glibtool export LIBTOOLIZE=glibtoolize fi fi autoreconf -ivf -I m4 starpu-1.4.9+dfsg/bubble/000077500000000000000000000000001507764646700152435ustar00rootroot00000000000000starpu-1.4.9+dfsg/bubble/Makefile.am000066400000000000000000000013551507764646700173030ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-subdirtests.mk SUBDIRS = SUBDIRS += tests starpu-1.4.9+dfsg/bubble/Makefile.in000066400000000000000000000662341507764646700173230ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = bubble ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-subdirtests.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = tests all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign bubble/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign bubble/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am tags tags-am uninstall uninstall-am .PRECIOUS: Makefile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/bubble/tests/000077500000000000000000000000001507764646700164055ustar00rootroot00000000000000starpu-1.4.9+dfsg/bubble/tests/Makefile.am000066400000000000000000000040661507764646700204470ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk include $(top_srcdir)/make/starpu-loader.mk AM_CFLAGS += $(APP_CFLAGS) AM_CXXFLAGS += $(APP_CXXFLAGS) AM_FFLAGS += $(APP_FFLAGS) AM_FCFLAGS += $(APP_FCFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) AM_CPPFLAGS += -I$(top_srcdir)/bubble/include AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(HWLOC_LIBS) LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) #LIBS += ../src/libstarpububble-@STARPU_EFFECTIVE_VERSION@.la BUILT_SOURCES = if STARPU_USE_OPENCL nobase_STARPU_OPENCL_DATA_DATA = endif EXTRA_DIST = \ basic/basic.h CLEANFILES = *.gcno *.gcda *.linkinfo core starpu_idle_microsec.log ##################################### # What to install and what to check # ##################################### if STARPU_HAVE_WINDOWS check_PROGRAMS = $(myPROGRAMS) else check_PROGRAMS = $(LOADER) $(myPROGRAMS) endif TESTS = $(myPROGRAMS) myPROGRAMS = if !STARPU_SIMGRID if STARPU_BUBBLE myPROGRAMS += \ basic/b \ basic/bb \ basic/btb \ basic/btb_func \ basic/bbt \ basic/btt \ basic/bbtt \ basic/tbbt \ basic/tbtbt \ basic/brt \ basic/brbtt \ basic/sync \ basic/gemm_dag \ basic/b2t \ basic/brec \ basic/brec_level \ basic/read \ basic/tbrbtt \ vector/vector endif endif noinst_PROGRAMS += $(myPROGRAMS) starpu-1.4.9+dfsg/bubble/tests/Makefile.in000066400000000000000000002246621507764646700204660ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_3) $(am__EXEEXT_2) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader @STARPU_HAVE_WINDOWS_FALSE@check_PROGRAMS = $(am__EXEEXT_2) @STARPU_HAVE_WINDOWS_TRUE@check_PROGRAMS = $(am__EXEEXT_2) TESTS = $(am__EXEEXT_2) @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@am__append_8 = \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/b \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bb \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btb \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btb_func \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bbt \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btt \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bbtt \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbbt \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbtbt \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brt \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brbtt \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/sync \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/gemm_dag \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/b2t \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brec \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brec_level \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/read \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbrbtt \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ vector/vector subdir = bubble/tests ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_1 = \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/b$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bb$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btb$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btb_func$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bbt$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/btt$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/bbtt$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbbt$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbtbt$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brt$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brbtt$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/sync$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/gemm_dag$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/b2t$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brec$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/brec_level$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/read$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ basic/tbrbtt$(EXEEXT) \ @STARPU_BUBBLE_TRUE@@STARPU_SIMGRID_FALSE@ vector/vector$(EXEEXT) am__EXEEXT_2 = $(am__EXEEXT_1) @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_3 = loader$(EXEEXT) PROGRAMS = $(noinst_PROGRAMS) basic_b_SOURCES = basic/b.c am__dirstamp = $(am__leading_dot)dirstamp basic_b_OBJECTS = basic/b.$(OBJEXT) basic_b_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = basic_b2t_SOURCES = basic/b2t.c basic_b2t_OBJECTS = basic/b2t.$(OBJEXT) basic_b2t_LDADD = $(LDADD) basic_bb_SOURCES = basic/bb.c basic_bb_OBJECTS = basic/bb.$(OBJEXT) basic_bb_LDADD = $(LDADD) basic_bbt_SOURCES = basic/bbt.c basic_bbt_OBJECTS = basic/bbt.$(OBJEXT) basic_bbt_LDADD = $(LDADD) basic_bbtt_SOURCES = basic/bbtt.c basic_bbtt_OBJECTS = basic/bbtt.$(OBJEXT) basic_bbtt_LDADD = $(LDADD) basic_brbtt_SOURCES = basic/brbtt.c basic_brbtt_OBJECTS = basic/brbtt.$(OBJEXT) basic_brbtt_LDADD = $(LDADD) basic_brec_SOURCES = basic/brec.c basic_brec_OBJECTS = basic/brec.$(OBJEXT) basic_brec_LDADD = $(LDADD) basic_brec_level_SOURCES = basic/brec_level.c basic_brec_level_OBJECTS = basic/brec_level.$(OBJEXT) basic_brec_level_LDADD = $(LDADD) basic_brt_SOURCES = basic/brt.c basic_brt_OBJECTS = basic/brt.$(OBJEXT) basic_brt_LDADD = $(LDADD) basic_btb_SOURCES = basic/btb.c basic_btb_OBJECTS = basic/btb.$(OBJEXT) basic_btb_LDADD = $(LDADD) basic_btb_func_SOURCES = basic/btb_func.c basic_btb_func_OBJECTS = basic/btb_func.$(OBJEXT) basic_btb_func_LDADD = $(LDADD) basic_btt_SOURCES = basic/btt.c basic_btt_OBJECTS = basic/btt.$(OBJEXT) basic_btt_LDADD = $(LDADD) basic_gemm_dag_SOURCES = basic/gemm_dag.c basic_gemm_dag_OBJECTS = basic/gemm_dag.$(OBJEXT) basic_gemm_dag_LDADD = $(LDADD) basic_read_SOURCES = basic/read.c basic_read_OBJECTS = basic/read.$(OBJEXT) basic_read_LDADD = $(LDADD) basic_sync_SOURCES = basic/sync.c basic_sync_OBJECTS = basic/sync.$(OBJEXT) basic_sync_LDADD = $(LDADD) basic_tbbt_SOURCES = basic/tbbt.c basic_tbbt_OBJECTS = basic/tbbt.$(OBJEXT) basic_tbbt_LDADD = $(LDADD) basic_tbrbtt_SOURCES = basic/tbrbtt.c basic_tbrbtt_OBJECTS = basic/tbrbtt.$(OBJEXT) basic_tbrbtt_LDADD = $(LDADD) basic_tbtbt_SOURCES = basic/tbtbt.c basic_tbtbt_OBJECTS = basic/tbtbt.$(OBJEXT) basic_tbtbt_LDADD = $(LDADD) loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) vector_vector_SOURCES = vector/vector.c vector_vector_OBJECTS = vector/vector.$(OBJEXT) vector_vector_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ basic/$(DEPDIR)/b.Po basic/$(DEPDIR)/b2t.Po \ basic/$(DEPDIR)/bb.Po basic/$(DEPDIR)/bbt.Po \ basic/$(DEPDIR)/bbtt.Po basic/$(DEPDIR)/brbtt.Po \ basic/$(DEPDIR)/brec.Po basic/$(DEPDIR)/brec_level.Po \ basic/$(DEPDIR)/brt.Po basic/$(DEPDIR)/btb.Po \ basic/$(DEPDIR)/btb_func.Po basic/$(DEPDIR)/btt.Po \ basic/$(DEPDIR)/gemm_dag.Po basic/$(DEPDIR)/read.Po \ basic/$(DEPDIR)/sync.Po basic/$(DEPDIR)/tbbt.Po \ basic/$(DEPDIR)/tbrbtt.Po basic/$(DEPDIR)/tbtbt.Po \ vector/$(DEPDIR)/vector.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = basic/b.c basic/b2t.c basic/bb.c basic/bbt.c basic/bbtt.c \ basic/brbtt.c basic/brec.c basic/brec_level.c basic/brt.c \ basic/btb.c basic/btb_func.c basic/btt.c basic/gemm_dag.c \ basic/read.c basic/sync.c basic/tbbt.c basic/tbrbtt.c \ basic/tbtbt.c loader.c vector/vector.c DIST_SOURCES = basic/b.c basic/b2t.c basic/bb.c basic/bbt.c \ basic/bbtt.c basic/brbtt.c basic/brec.c basic/brec_level.c \ basic/brt.c basic/btb.c basic/btb_func.c basic/btt.c \ basic/gemm_dag.c basic/read.c basic/sync.c basic/tbbt.c \ basic/tbrbtt.c basic/tbtbt.c loader.c vector/vector.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" DATA = $(nobase_STARPU_OPENCL_DATA_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) AM_RECURSIVE_TARGETS = check recheck TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) $(HWLOC_LIBS) $(STARPU_OPENCL_LDFLAGS) \ $(STARPU_CUDA_LDFLAGS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(am__append_4) $(am__append_6) LAUNCHER = $(am__append_3) $(am__append_5) # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(APP_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(APP_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(APP_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src \ -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) \ -I$(top_srcdir)/bubble/include AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ #LIBS += ../src/libstarpububble-@STARPU_EFFECTIVE_VERSION@.la BUILT_SOURCES = @STARPU_USE_OPENCL_TRUE@nobase_STARPU_OPENCL_DATA_DATA = EXTRA_DIST = \ basic/basic.h CLEANFILES = *.gcno *.gcda *.linkinfo core starpu_idle_microsec.log myPROGRAMS = $(am__append_8) all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign bubble/tests/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign bubble/tests/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list basic/$(am__dirstamp): @$(MKDIR_P) basic @: > basic/$(am__dirstamp) basic/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) basic/$(DEPDIR) @: > basic/$(DEPDIR)/$(am__dirstamp) basic/b.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/b$(EXEEXT): $(basic_b_OBJECTS) $(basic_b_DEPENDENCIES) $(EXTRA_basic_b_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/b$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_b_OBJECTS) $(basic_b_LDADD) $(LIBS) basic/b2t.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/b2t$(EXEEXT): $(basic_b2t_OBJECTS) $(basic_b2t_DEPENDENCIES) $(EXTRA_basic_b2t_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/b2t$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_b2t_OBJECTS) $(basic_b2t_LDADD) $(LIBS) basic/bb.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/bb$(EXEEXT): $(basic_bb_OBJECTS) $(basic_bb_DEPENDENCIES) $(EXTRA_basic_bb_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/bb$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_bb_OBJECTS) $(basic_bb_LDADD) $(LIBS) basic/bbt.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/bbt$(EXEEXT): $(basic_bbt_OBJECTS) $(basic_bbt_DEPENDENCIES) $(EXTRA_basic_bbt_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/bbt$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_bbt_OBJECTS) $(basic_bbt_LDADD) $(LIBS) basic/bbtt.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/bbtt$(EXEEXT): $(basic_bbtt_OBJECTS) $(basic_bbtt_DEPENDENCIES) $(EXTRA_basic_bbtt_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/bbtt$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_bbtt_OBJECTS) $(basic_bbtt_LDADD) $(LIBS) basic/brbtt.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/brbtt$(EXEEXT): $(basic_brbtt_OBJECTS) $(basic_brbtt_DEPENDENCIES) $(EXTRA_basic_brbtt_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/brbtt$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_brbtt_OBJECTS) $(basic_brbtt_LDADD) $(LIBS) basic/brec.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/brec$(EXEEXT): $(basic_brec_OBJECTS) $(basic_brec_DEPENDENCIES) $(EXTRA_basic_brec_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/brec$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_brec_OBJECTS) $(basic_brec_LDADD) $(LIBS) basic/brec_level.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/brec_level$(EXEEXT): $(basic_brec_level_OBJECTS) $(basic_brec_level_DEPENDENCIES) $(EXTRA_basic_brec_level_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/brec_level$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_brec_level_OBJECTS) $(basic_brec_level_LDADD) $(LIBS) basic/brt.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/brt$(EXEEXT): $(basic_brt_OBJECTS) $(basic_brt_DEPENDENCIES) $(EXTRA_basic_brt_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/brt$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_brt_OBJECTS) $(basic_brt_LDADD) $(LIBS) basic/btb.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/btb$(EXEEXT): $(basic_btb_OBJECTS) $(basic_btb_DEPENDENCIES) $(EXTRA_basic_btb_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/btb$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_btb_OBJECTS) $(basic_btb_LDADD) $(LIBS) basic/btb_func.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/btb_func$(EXEEXT): $(basic_btb_func_OBJECTS) $(basic_btb_func_DEPENDENCIES) $(EXTRA_basic_btb_func_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/btb_func$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_btb_func_OBJECTS) $(basic_btb_func_LDADD) $(LIBS) basic/btt.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/btt$(EXEEXT): $(basic_btt_OBJECTS) $(basic_btt_DEPENDENCIES) $(EXTRA_basic_btt_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/btt$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_btt_OBJECTS) $(basic_btt_LDADD) $(LIBS) basic/gemm_dag.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/gemm_dag$(EXEEXT): $(basic_gemm_dag_OBJECTS) $(basic_gemm_dag_DEPENDENCIES) $(EXTRA_basic_gemm_dag_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/gemm_dag$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_gemm_dag_OBJECTS) $(basic_gemm_dag_LDADD) $(LIBS) basic/read.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/read$(EXEEXT): $(basic_read_OBJECTS) $(basic_read_DEPENDENCIES) $(EXTRA_basic_read_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/read$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_read_OBJECTS) $(basic_read_LDADD) $(LIBS) basic/sync.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/sync$(EXEEXT): $(basic_sync_OBJECTS) $(basic_sync_DEPENDENCIES) $(EXTRA_basic_sync_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/sync$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_sync_OBJECTS) $(basic_sync_LDADD) $(LIBS) basic/tbbt.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/tbbt$(EXEEXT): $(basic_tbbt_OBJECTS) $(basic_tbbt_DEPENDENCIES) $(EXTRA_basic_tbbt_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/tbbt$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_tbbt_OBJECTS) $(basic_tbbt_LDADD) $(LIBS) basic/tbrbtt.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/tbrbtt$(EXEEXT): $(basic_tbrbtt_OBJECTS) $(basic_tbrbtt_DEPENDENCIES) $(EXTRA_basic_tbrbtt_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/tbrbtt$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_tbrbtt_OBJECTS) $(basic_tbrbtt_LDADD) $(LIBS) basic/tbtbt.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/tbtbt$(EXEEXT): $(basic_tbtbt_OBJECTS) $(basic_tbtbt_DEPENDENCIES) $(EXTRA_basic_tbtbt_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/tbtbt$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_tbtbt_OBJECTS) $(basic_tbtbt_LDADD) $(LIBS) loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) vector/$(am__dirstamp): @$(MKDIR_P) vector @: > vector/$(am__dirstamp) vector/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) vector/$(DEPDIR) @: > vector/$(DEPDIR)/$(am__dirstamp) vector/vector.$(OBJEXT): vector/$(am__dirstamp) \ vector/$(DEPDIR)/$(am__dirstamp) vector/vector$(EXEEXT): $(vector_vector_OBJECTS) $(vector_vector_DEPENDENCIES) $(EXTRA_vector_vector_DEPENDENCIES) vector/$(am__dirstamp) @rm -f vector/vector$(EXEEXT) $(AM_V_CCLD)$(LINK) $(vector_vector_OBJECTS) $(vector_vector_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f basic/*.$(OBJEXT) -rm -f vector/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/b.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/b2t.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/bb.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/bbt.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/bbtt.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/brbtt.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/brec.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/brec_level.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/brt.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/btb.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/btb_func.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/btt.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/gemm_dag.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/read.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/sync.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/tbbt.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/tbrbtt.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/tbtbt.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@vector/$(DEPDIR)/vector.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf basic/.libs basic/_libs -rm -rf vector/.libs vector/_libs install-nobase_STARPU_OPENCL_DATADATA: $(nobase_STARPU_OPENCL_DATA_DATA) @$(NORMAL_INSTALL) @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" || exit 1; \ fi; \ $(am__nobase_list) | while read dir files; do \ xfiles=; for file in $$files; do \ if test -f "$$file"; then xfiles="$$xfiles $$file"; \ else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ test -z "$$xfiles" || { \ test "x$$dir" = x. || { \ echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir"; }; \ echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir" || exit $$?; }; \ done uninstall-nobase_STARPU_OPENCL_DATADATA: @$(NORMAL_UNINSTALL) @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ dir='$(DESTDIR)$(STARPU_OPENCL_DATAdir)'; $(am__uninstall_files_from_dir) ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? basic/b.log: basic/b$(EXEEXT) @p='basic/b$(EXEEXT)'; \ b='basic/b'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/bb.log: basic/bb$(EXEEXT) @p='basic/bb$(EXEEXT)'; \ b='basic/bb'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/btb.log: basic/btb$(EXEEXT) @p='basic/btb$(EXEEXT)'; \ b='basic/btb'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/btb_func.log: basic/btb_func$(EXEEXT) @p='basic/btb_func$(EXEEXT)'; \ b='basic/btb_func'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/bbt.log: basic/bbt$(EXEEXT) @p='basic/bbt$(EXEEXT)'; \ b='basic/bbt'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/btt.log: basic/btt$(EXEEXT) @p='basic/btt$(EXEEXT)'; \ b='basic/btt'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/bbtt.log: basic/bbtt$(EXEEXT) @p='basic/bbtt$(EXEEXT)'; \ b='basic/bbtt'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/tbbt.log: basic/tbbt$(EXEEXT) @p='basic/tbbt$(EXEEXT)'; \ b='basic/tbbt'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/tbtbt.log: basic/tbtbt$(EXEEXT) @p='basic/tbtbt$(EXEEXT)'; \ b='basic/tbtbt'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/brt.log: basic/brt$(EXEEXT) @p='basic/brt$(EXEEXT)'; \ b='basic/brt'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/brbtt.log: basic/brbtt$(EXEEXT) @p='basic/brbtt$(EXEEXT)'; \ b='basic/brbtt'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/sync.log: basic/sync$(EXEEXT) @p='basic/sync$(EXEEXT)'; \ b='basic/sync'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/gemm_dag.log: basic/gemm_dag$(EXEEXT) @p='basic/gemm_dag$(EXEEXT)'; \ b='basic/gemm_dag'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/b2t.log: basic/b2t$(EXEEXT) @p='basic/b2t$(EXEEXT)'; \ b='basic/b2t'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/brec.log: basic/brec$(EXEEXT) @p='basic/brec$(EXEEXT)'; \ b='basic/brec'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/brec_level.log: basic/brec_level$(EXEEXT) @p='basic/brec_level$(EXEEXT)'; \ b='basic/brec_level'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/read.log: basic/read$(EXEEXT) @p='basic/read$(EXEEXT)'; \ b='basic/read'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic/tbrbtt.log: basic/tbrbtt$(EXEEXT) @p='basic/tbrbtt$(EXEEXT)'; \ b='basic/tbrbtt'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) vector/vector.log: vector/vector$(EXEEXT) @p='vector/vector$(EXEEXT)'; \ b='vector/vector'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-am all-am: Makefile $(PROGRAMS) $(DATA) installdirs: for dir in "$(DESTDIR)$(STARPU_OPENCL_DATAdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-am install-exec: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f basic/$(DEPDIR)/$(am__dirstamp) -rm -f basic/$(am__dirstamp) -rm -f vector/$(DEPDIR)/$(am__dirstamp) -rm -f vector/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) clean: clean-am clean-am: clean-checkPROGRAMS clean-generic clean-libtool \ clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f basic/$(DEPDIR)/b.Po -rm -f basic/$(DEPDIR)/b2t.Po -rm -f basic/$(DEPDIR)/bb.Po -rm -f basic/$(DEPDIR)/bbt.Po -rm -f basic/$(DEPDIR)/bbtt.Po -rm -f basic/$(DEPDIR)/brbtt.Po -rm -f basic/$(DEPDIR)/brec.Po -rm -f basic/$(DEPDIR)/brec_level.Po -rm -f basic/$(DEPDIR)/brt.Po -rm -f basic/$(DEPDIR)/btb.Po -rm -f basic/$(DEPDIR)/btb_func.Po -rm -f basic/$(DEPDIR)/btt.Po -rm -f basic/$(DEPDIR)/gemm_dag.Po -rm -f basic/$(DEPDIR)/read.Po -rm -f basic/$(DEPDIR)/sync.Po -rm -f basic/$(DEPDIR)/tbbt.Po -rm -f basic/$(DEPDIR)/tbrbtt.Po -rm -f basic/$(DEPDIR)/tbtbt.Po -rm -f vector/$(DEPDIR)/vector.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-nobase_STARPU_OPENCL_DATADATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f basic/$(DEPDIR)/b.Po -rm -f basic/$(DEPDIR)/b2t.Po -rm -f basic/$(DEPDIR)/bb.Po -rm -f basic/$(DEPDIR)/bbt.Po -rm -f basic/$(DEPDIR)/bbtt.Po -rm -f basic/$(DEPDIR)/brbtt.Po -rm -f basic/$(DEPDIR)/brec.Po -rm -f basic/$(DEPDIR)/brec_level.Po -rm -f basic/$(DEPDIR)/brt.Po -rm -f basic/$(DEPDIR)/btb.Po -rm -f basic/$(DEPDIR)/btb_func.Po -rm -f basic/$(DEPDIR)/btt.Po -rm -f basic/$(DEPDIR)/gemm_dag.Po -rm -f basic/$(DEPDIR)/read.Po -rm -f basic/$(DEPDIR)/sync.Po -rm -f basic/$(DEPDIR)/tbbt.Po -rm -f basic/$(DEPDIR)/tbrbtt.Po -rm -f basic/$(DEPDIR)/tbtbt.Po -rm -f vector/$(DEPDIR)/vector.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-nobase_STARPU_OPENCL_DATADATA .MAKE: all check check-am install install-am install-exec \ install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ check-am clean clean-checkPROGRAMS clean-generic clean-libtool \ clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ distclean-compile distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-nobase_STARPU_OPENCL_DATADATA install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ recheck tags tags-am uninstall uninstall-am \ uninstall-nobase_STARPU_OPENCL_DATADATA .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/bubble/tests/basic/000077500000000000000000000000001507764646700174665ustar00rootroot00000000000000starpu-1.4.9+dfsg/bubble/tests/basic/b.c000066400000000000000000000035121507764646700200540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2019-2019 Gwenole Lucas * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "basic.h" int main(int argv, char **argc) { int ret, i; int v[SIZE]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return 77; } for (i=0; i #include "basic.h" #define check_binary_task(x,y) x+=y struct starpu_codelet sub_data_chain_codelet = { .cpu_funcs = {sub_data_func}, .nbuffers = 2, .name = "sub_data_chain_cl" }; void bubble_chain_gen_dag(struct starpu_task *t, void *arg) { FPRINTF(stderr, "Hello i am a bubble\n"); int i; starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; for(i=0 ; i 0; return 1; } void bubble_gen_dag(struct starpu_task *t, void *arg) { FPRINTF(stderr, "Hello i am a bubble\n"); int i; starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; for(i=0 ; i #include "basic.h" int main(int argv, char **argc) { int ret, i; int v[SIZE]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return 77; } for (i=0; i #include "basic.h" int main(int argv, char **argc) { int ret, i; int v[SIZE]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return 77; } for (i=0; i #include "basic.h" int main(int argv, char **argc) { int ret, i; int v[SIZE]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return 77; } for (i=0; i #define PARTS 2 #define SIZE 8 #include "basic.h" void rec2_bubble_gen_dag(struct starpu_task *t, void *arg) { int i; starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; FPRINTF(stderr, "Hello i am a bubble\n"); for(i=0 ; i #define PARTS 1 #define SIZE 25 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = PARTS }; void sub_data_func(void *buffers[], void *arg) { int *v = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); int nx = STARPU_VECTOR_GET_NX(buffers[0]); int i; for(i=0 ; i #define PARTS 2 #define SIZE 24 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = PARTS }; void sub_data_read_func(void *buffers[], void *arg) { } void sub_data_func(void *buffers[], void *arg) { int *v = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); int nx = STARPU_VECTOR_GET_NX(buffers[0]); int i; for(i=0 ; i #define PARTS 2 #define SIZE 8 #include "basic.h" void rec2_bubble_gen_dag(struct starpu_task *t, void *arg) { int i; starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; FPRINTF(stderr, "Hello i am a bubble\n"); for(i=0 ; i #include "basic.h" int main(int argv, char **argc) { int ret, i; int v[SIZE]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return 77; } for (i=0; i #include "basic.h" struct starpu_codelet my_codelet; void my_task_func(void *buffers[], void *arg) { int *v = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); int nx = STARPU_VECTOR_GET_NX(buffers[0]); int i; print_vector(v, nx, "task"); for(i=0 ; i #include "basic.h" int main(int argv, char **argc) { int ret, i; int v[SIZE]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return 77; } for (i=0; i #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define PARTS 4 #define SIZE 16 #define SYNC 0 struct bubble_arg { starpu_data_handle_t *A; starpu_data_handle_t *B; starpu_data_handle_t *C; starpu_data_handle_t *subA; starpu_data_handle_t *subB; starpu_data_handle_t *subC; }; struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = PARTS }; void scam_func(void *buffers[], void *arg) { assert(0); } void real_func(void *buffers[], void *arg) { int *A = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); int nx = STARPU_VECTOR_GET_NX(buffers[0]); int i; for (i=0; iA))) && // (starpu_data_get_nb_children_async(*(b->B))) && // (starpu_data_get_nb_children_async(*(b->C)))) // return 1; // else // return 0; } void insert_dag(starpu_data_handle_t *A, starpu_data_handle_t *B, starpu_data_handle_t *C, starpu_data_handle_t *subA, starpu_data_handle_t *subB, starpu_data_handle_t *subC, struct starpu_task *t); void bubble_gen_dag_func(struct starpu_task *t, void *arg) { struct bubble_arg *b_a = (struct bubble_arg*)arg; starpu_data_handle_t *subhandlesA = b_a->subA; starpu_data_handle_t *subhandlesB = b_a->subB; starpu_data_handle_t *subhandlesC = b_a->subC; free(b_a); insert_dag(subhandlesA, subhandlesB, subhandlesC, NULL, NULL, NULL, t); } void insert_dag(starpu_data_handle_t *A, starpu_data_handle_t *B, starpu_data_handle_t *C, starpu_data_handle_t *subA, starpu_data_handle_t *subB, starpu_data_handle_t *subC, struct starpu_task *t) { int ret, i; for (i=0; iA = A; b_a->B = B; b_a->C = C; b_a->subA = subA; b_a->subB = subB; b_a->subC = subC; name = "bubble"; } /* insert bubble on handle */ /* printf("[INSERT] first - %s - %d\n", name, i); */ ret = starpu_task_insert(&gemm_codelet, STARPU_R, handleA1, STARPU_R, handleB1, STARPU_RW, handleC, STARPU_BUBBLE_FUNC, is_bubble, STARPU_BUBBLE_FUNC_ARG, b_a, STARPU_BUBBLE_GEN_DAG_FUNC, bubble_gen_dag_func, STARPU_BUBBLE_GEN_DAG_FUNC_ARG, b_a, STARPU_BUBBLE_PARENT, t, STARPU_TASK_SYNCHRONOUS, SYNC, STARPU_NAME, name, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); name = "task_lvl0"; if (t) { name = "task_lvl1"; } /* printf("[INSERT] second - %s - %d\n", name, i); */ ret = starpu_task_insert(&gemm_codelet, STARPU_R, handleA2, STARPU_R, handleB2, STARPU_RW, handleC, STARPU_BUBBLE_FUNC, is_bubble, STARPU_BUBBLE_FUNC_ARG, NULL, STARPU_BUBBLE_GEN_DAG_FUNC, bubble_gen_dag_func, STARPU_BUBBLE_GEN_DAG_FUNC_ARG, b_a, STARPU_BUBBLE_PARENT, t, STARPU_TASK_SYNCHRONOUS, SYNC, STARPU_NAME, name, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } } void init_handles(int *data, starpu_data_handle_t *handles, starpu_data_handle_t *subhandles) { int i,j; for (i=0; i #include "basic.h" struct starpu_codelet sub_data_chain_codelet = { .cpu_funcs = {sub_data_func}, .nbuffers = 1, .name = "sub_data_chain_cl" }; void bubble_chain_gen_dag(struct starpu_task *t, void *arg) { FPRINTF(stderr, "Hello i am a bubble\n"); starpu_data_handle_t *subdata = (starpu_data_handle_t *)arg; int i; for(i=0 ; i #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define PARTS 2 #define SIZE 16 struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = PARTS }; void scam_func(void *buffers[], void *arg) { assert(0); } void real_func(void *buffers[], void *arg) { int *A = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); int nx = STARPU_VECTOR_GET_NX(buffers[0]); int i; for (i=0; i #include "basic.h" int main(int argv, char **argc) { int ret, i; int v[SIZE]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return 77; } for (i=0; i #include "basic.h" #define LENGTH 16 #define NPARTS 2 struct handle_partition { starpu_data_handle_t handle; starpu_data_handle_t *sub; starpu_data_handle_t *sub0; starpu_data_handle_t *sub1; }; struct starpu_data_filter filter = { .filter_func = starpu_vector_filter_block, .nchildren = NPARTS }; void task_2arg_func(void *buffers[], void *arg) { int *v1 = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); int *v2 = (int*)STARPU_VECTOR_GET_PTR(buffers[1]); int nx = STARPU_VECTOR_GET_NX(buffers[0]); int i; print_vector(v1, nx, "task"); for(i=0 ; isub0[1], STARPU_RW, handles->sub1[0], STARPU_NAME, "Task", 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&task_2arg_codelet, STARPU_R, handles->sub0[1], STARPU_RW, handles->sub1[1], STARPU_NAME, "Task", 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } struct starpu_codelet bubble_2arg_codelet = { .cpu_funcs = {bubble_func}, .bubble_func = is_bubble, .bubble_gen_dag_func = bubble_2arg_gen_dag, .nbuffers = 2 }; void bubble_1arg_gen_dag(struct starpu_task *t, void *arg) { FPRINTF(stderr, "Bubble level 1\n"); struct handle_partition *handles = (struct handle_partition*)arg; int ret = starpu_task_insert(&bubble_2arg_codelet, STARPU_R, handles->sub[0], STARPU_RW, handles->sub[1], STARPU_NAME, "BubbleLvl2", STARPU_BUBBLE_GEN_DAG_FUNC_ARG, handles, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } struct starpu_codelet bubble_1arg_codelet = { .cpu_funcs = {bubble_func}, .bubble_func = is_bubble, .bubble_gen_dag_func = bubble_1arg_gen_dag, .nbuffers = 1 }; int main(int argv, char **argc) { int ret, i; int v[LENGTH]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return 77; } for (i=0; i #include "basic.h" int main(int argv, char **argc) { int ret, i; int v[SIZE]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return 77; } for (i=0; i #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/bubble/tests/vector/000077500000000000000000000000001507764646700177075ustar00rootroot00000000000000starpu-1.4.9+dfsg/bubble/tests/vector/vector.c000066400000000000000000000114561507764646700213640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2019-2019 Gwenole Lucas * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define X 6 #define SLICES 2 #define NITER 20 #define TYPE int #define PTYPE "%3d" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void func_cpu(void *descr[], void *_args) { (void) _args; int x; int nx = STARPU_VECTOR_GET_NX(descr[0]); TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); for(x=0 ; x. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # This file is maintained in Automake, please report # bugs to or send patches to # . # func_error message func_error () { echo "$me: $1" 1>&2 exit 1 } file_conv= # func_file_conv build_file # Convert a $build file to $host form and store it in $file # Currently only supports Windows hosts. func_file_conv () { file=$1 case $file in / | /[!/]*) # absolute file, and not a UNC file if test -z "$file_conv"; then # lazily determine how to convert abs files case `uname -s` in MINGW*) file_conv=mingw ;; CYGWIN* | MSYS*) file_conv=cygwin ;; *) file_conv=wine ;; esac fi case $file_conv in mingw) file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` ;; cygwin | msys) file=`cygpath -m "$file" || echo "$file"` ;; wine) file=`winepath -w "$file" || echo "$file"` ;; esac ;; esac } # func_at_file at_file operation archive # Iterate over all members in AT_FILE performing OPERATION on ARCHIVE # for each of them. # When interpreting the content of the @FILE, do NOT use func_file_conv, # since the user would need to supply preconverted file names to # binutils ar, at least for MinGW. func_at_file () { operation=$2 archive=$3 at_file_contents=`cat "$1"` eval set x "$at_file_contents" shift for member do $AR -NOLOGO $operation:"$member" "$archive" || exit $? done } case $1 in '') func_error "no command. Try '$0 --help' for more information." ;; -h | --h*) cat <. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # This file is maintained in Automake, please report # bugs to or send patches to # . nl=' ' # We need space, tab and new line, in precisely that order. Quoting is # there to prevent tools from complaining about whitespace usage. IFS=" "" $nl" file_conv= # func_file_conv build_file lazy # Convert a $build file to $host form and store it in $file # Currently only supports Windows hosts. If the determined conversion # type is listed in (the comma separated) LAZY, no conversion will # take place. func_file_conv () { file=$1 case $file in / | /[!/]*) # absolute file, and not a UNC file if test -z "$file_conv"; then # lazily determine how to convert abs files case `uname -s` in MINGW*) file_conv=mingw ;; CYGWIN* | MSYS*) file_conv=cygwin ;; *) file_conv=wine ;; esac fi case $file_conv/,$2, in *,$file_conv,*) ;; mingw/*) file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` ;; cygwin/* | msys/*) file=`cygpath -m "$file" || echo "$file"` ;; wine/*) file=`winepath -w "$file" || echo "$file"` ;; esac ;; esac } # func_cl_dashL linkdir # Make cl look for libraries in LINKDIR func_cl_dashL () { func_file_conv "$1" if test -z "$lib_path"; then lib_path=$file else lib_path="$lib_path;$file" fi linker_opts="$linker_opts -LIBPATH:$file" } # func_cl_dashl library # Do a library search-path lookup for cl func_cl_dashl () { lib=$1 found=no save_IFS=$IFS IFS=';' for dir in $lib_path $LIB do IFS=$save_IFS if $shared && test -f "$dir/$lib.dll.lib"; then found=yes lib=$dir/$lib.dll.lib break fi if test -f "$dir/$lib.lib"; then found=yes lib=$dir/$lib.lib break fi if test -f "$dir/lib$lib.a"; then found=yes lib=$dir/lib$lib.a break fi done IFS=$save_IFS if test "$found" != yes; then lib=$lib.lib fi } # func_cl_wrapper cl arg... # Adjust compile command to suit cl func_cl_wrapper () { # Assume a capable shell lib_path= shared=: linker_opts= for arg do if test -n "$eat"; then eat= else case $1 in -o) # configure might choose to run compile as 'compile cc -o foo foo.c'. eat=1 case $2 in *.o | *.[oO][bB][jJ]) func_file_conv "$2" set x "$@" -Fo"$file" shift ;; *) func_file_conv "$2" set x "$@" -Fe"$file" shift ;; esac ;; -I) eat=1 func_file_conv "$2" mingw set x "$@" -I"$file" shift ;; -I*) func_file_conv "${1#-I}" mingw set x "$@" -I"$file" shift ;; -l) eat=1 func_cl_dashl "$2" set x "$@" "$lib" shift ;; -l*) func_cl_dashl "${1#-l}" set x "$@" "$lib" shift ;; -L) eat=1 func_cl_dashL "$2" ;; -L*) func_cl_dashL "${1#-L}" ;; -static) shared=false ;; -Wl,*) arg=${1#-Wl,} save_ifs="$IFS"; IFS=',' for flag in $arg; do IFS="$save_ifs" linker_opts="$linker_opts $flag" done IFS="$save_ifs" ;; -Xlinker) eat=1 linker_opts="$linker_opts $2" ;; -*) set x "$@" "$1" shift ;; *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) func_file_conv "$1" set x "$@" -Tp"$file" shift ;; *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) func_file_conv "$1" mingw set x "$@" "$file" shift ;; *) set x "$@" "$1" shift ;; esac fi shift done if test -n "$linker_opts"; then linker_opts="-link$linker_opts" fi exec "$@" $linker_opts exit 1 } eat= case $1 in '') echo "$0: No command. Try '$0 --help' for more information." 1>&2 exit 1; ;; -h | --h*) cat <<\EOF Usage: compile [--help] [--version] PROGRAM [ARGS] Wrapper for compilers which do not understand '-c -o'. Remove '-o dest.o' from ARGS, run PROGRAM with the remaining arguments, and rename the output as expected. If you are trying to build a whole package this is not the right script to run: please start by reading the file 'INSTALL'. Report bugs to . EOF exit $? ;; -v | --v*) echo "compile $scriptversion" exit $? ;; cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) func_cl_wrapper "$@" # Doesn't return... ;; esac ofile= cfile= for arg do if test -n "$eat"; then eat= else case $1 in -o) # configure might choose to run compile as 'compile cc -o foo foo.c'. # So we strip '-o arg' only if arg is an object. eat=1 case $2 in *.o | *.obj) ofile=$2 ;; *) set x "$@" -o "$2" shift ;; esac ;; *.c) cfile=$1 set x "$@" "$1" shift ;; *) set x "$@" "$1" shift ;; esac fi shift done if test -z "$ofile" || test -z "$cfile"; then # If no '-o' option was seen then we might have been invoked from a # pattern rule where we don't need one. That is ok -- this is a # normal compilation that the losing compiler can handle. If no # '.c' file was seen then we are probably linking. That is also # ok. exec "$@" fi # Name of file we expect compiler to create. cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` # Create the lock directory. # Note: use '[/\\:.-]' here to ensure that we don't use the same name # that we are using for the .o file. Also, base the name on the expected # object file name, since that is what matters with a parallel build. lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d while true; do if mkdir "$lockdir" >/dev/null 2>&1; then break fi sleep 1 done # FIXME: race condition here if user kills between mkdir and trap. trap "rmdir '$lockdir'; exit 1" 1 2 15 # Run the compile. "$@" ret=$? if test -f "$cofile"; then test "$cofile" = "$ofile" || mv "$cofile" "$ofile" elif test -f "${cofile}bj"; then test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" fi rmdir "$lockdir" exit $ret # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: starpu-1.4.9+dfsg/build-aux/config.guess000077500000000000000000001405121507764646700202250ustar00rootroot00000000000000#! /bin/sh # Attempt to guess a canonical system name. # Copyright 1992-2022 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale timestamp='2022-01-09' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that # program. This Exception is an additional permission under section 7 # of the GNU General Public License, version 3 ("GPLv3"). # # Originally written by Per Bothner; maintained since 2000 by Ben Elliston. # # You can get the latest version of this script from: # https://git.savannah.gnu.org/cgit/config.git/plain/config.guess # # Please send patches to . # The "shellcheck disable" line above the timestamp inhibits complaints # about features and limitations of the classic Bourne shell that were # superseded or lifted in POSIX. However, this script identifies a wide # variety of pre-POSIX systems that do not have POSIX shells at all, and # even some reasonably current systems (Solaris 10 as case-in-point) still # have a pre-POSIX /bin/sh. me=`echo "$0" | sed -e 's,.*/,,'` usage="\ Usage: $0 [OPTION] Output the configuration name of the system \`$me' is run on. Options: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit Report bugs and patches to ." version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. Copyright 1992-2022 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" Try \`$me --help' for more information." # Parse command line while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) echo "$timestamp" ; exit ;; --version | -v ) echo "$version" ; exit ;; --help | --h* | -h ) echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. break ;; -* ) echo "$me: invalid option $1$help" >&2 exit 1 ;; * ) break ;; esac done if test $# != 0; then echo "$me: too many arguments$help" >&2 exit 1 fi # Just in case it came from the environment. GUESS= # CC_FOR_BUILD -- compiler used by this script. Note that the use of a # compiler to aid in system detection is discouraged as it requires # temporary files to be created and, as you can see below, it is a # headache to deal with in a portable fashion. # Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still # use `HOST_CC' if defined, but it is deprecated. # Portable tmp directory creation inspired by the Autoconf team. tmp= # shellcheck disable=SC2172 trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15 set_cc_for_build() { # prevent multiple calls if $tmp is already set test "$tmp" && return 0 : "${TMPDIR=/tmp}" # shellcheck disable=SC2039,SC3028 { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } || { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } || { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } dummy=$tmp/dummy case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in ,,) echo "int x;" > "$dummy.c" for driver in cc gcc c89 c99 ; do if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then CC_FOR_BUILD=$driver break fi done if test x"$CC_FOR_BUILD" = x ; then CC_FOR_BUILD=no_compiler_found fi ;; ,,*) CC_FOR_BUILD=$CC ;; ,*,*) CC_FOR_BUILD=$HOST_CC ;; esac } # This is needed to find uname on a Pyramid OSx when run in the BSD universe. # (ghazi@noc.rutgers.edu 1994-08-24) if test -f /.attbin/uname ; then PATH=$PATH:/.attbin ; export PATH fi UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown case $UNAME_SYSTEM in Linux|GNU|GNU/*) LIBC=unknown set_cc_for_build cat <<-EOF > "$dummy.c" #include #if defined(__UCLIBC__) LIBC=uclibc #elif defined(__dietlibc__) LIBC=dietlibc #elif defined(__GLIBC__) LIBC=gnu #else #include /* First heuristic to detect musl libc. */ #ifdef __DEFINED_va_list LIBC=musl #endif #endif EOF cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` eval "$cc_set_libc" # Second heuristic to detect musl libc. if [ "$LIBC" = unknown ] && command -v ldd >/dev/null && ldd --version 2>&1 | grep -q ^musl; then LIBC=musl fi # If the system lacks a compiler, then just pick glibc. # We could probably try harder. if [ "$LIBC" = unknown ]; then LIBC=gnu fi ;; esac # Note: order is significant - the case branches are not exclusive. case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently # switched to ELF, *-*-netbsd* would select the old # object file format. This provides both forward # compatibility and a consistent mechanism for selecting the # object file format. # # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ /sbin/sysctl -n hw.machine_arch 2>/dev/null || \ /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \ echo unknown)` case $UNAME_MACHINE_ARCH in aarch64eb) machine=aarch64_be-unknown ;; armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; earmv*) arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` machine=${arch}${endian}-unknown ;; *) machine=$UNAME_MACHINE_ARCH-unknown ;; esac # The Operating System including object format, if it has switched # to ELF recently (or will in the future) and ABI. case $UNAME_MACHINE_ARCH in earm*) os=netbsdelf ;; arm*|i386|m68k|ns32k|sh3*|sparc|vax) set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ELF__ then # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Return netbsd for either. FIX? os=netbsd else os=netbsdelf fi ;; *) os=netbsd ;; esac # Determine ABI tags. case $UNAME_MACHINE_ARCH in earm*) expr='s/^earmv[0-9]/-eabi/;s/eb$//' abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` ;; esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need # kernel version information, so it can be replaced with a # suitable tag, in the style of linux-gnu. case $UNAME_VERSION in Debian*) release='-gnu' ;; *) release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. GUESS=$machine-${os}${release}${abi-} ;; *:Bitrig:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE ;; *:OpenBSD:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE ;; *:SecBSD:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'` GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE ;; *:LibertyBSD:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE ;; *:MidnightBSD:*:*) GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE ;; *:ekkoBSD:*:*) GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE ;; *:SolidBSD:*:*) GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE ;; *:OS108:*:*) GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE ;; macppc:MirBSD:*:*) GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE ;; *:MirBSD:*:*) GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE ;; *:Sortix:*:*) GUESS=$UNAME_MACHINE-unknown-sortix ;; *:Twizzler:*:*) GUESS=$UNAME_MACHINE-unknown-twizzler ;; *:Redox:*:*) GUESS=$UNAME_MACHINE-unknown-redox ;; mips:OSF1:*.*) GUESS=mips-dec-osf1 ;; alpha:OSF1:*:*) # Reset EXIT trap before exiting to avoid spurious non-zero exit code. trap '' 0 case $UNAME_RELEASE in *4.0) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` ;; *5.*) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` ;; esac # According to Compaq, /usr/sbin/psrinfo has been available on # OSF/1 and Tru64 systems produced since 1995. I hope that # covers most systems running today. This code pipes the CPU # types through head -n 1, so we only detect the type of CPU 0. ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case $ALPHA_CPU_TYPE in "EV4 (21064)") UNAME_MACHINE=alpha ;; "EV4.5 (21064)") UNAME_MACHINE=alpha ;; "LCA4 (21066/21068)") UNAME_MACHINE=alpha ;; "EV5 (21164)") UNAME_MACHINE=alphaev5 ;; "EV5.6 (21164A)") UNAME_MACHINE=alphaev56 ;; "EV5.6 (21164PC)") UNAME_MACHINE=alphapca56 ;; "EV5.7 (21164PC)") UNAME_MACHINE=alphapca57 ;; "EV6 (21264)") UNAME_MACHINE=alphaev6 ;; "EV6.7 (21264A)") UNAME_MACHINE=alphaev67 ;; "EV6.8CB (21264C)") UNAME_MACHINE=alphaev68 ;; "EV6.8AL (21264B)") UNAME_MACHINE=alphaev68 ;; "EV6.8CX (21264D)") UNAME_MACHINE=alphaev68 ;; "EV6.9A (21264/EV69A)") UNAME_MACHINE=alphaev69 ;; "EV7 (21364)") UNAME_MACHINE=alphaev7 ;; "EV7.9 (21364A)") UNAME_MACHINE=alphaev79 ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` GUESS=$UNAME_MACHINE-dec-osf$OSF_REL ;; Amiga*:UNIX_System_V:4.0:*) GUESS=m68k-unknown-sysv4 ;; *:[Aa]miga[Oo][Ss]:*:*) GUESS=$UNAME_MACHINE-unknown-amigaos ;; *:[Mm]orph[Oo][Ss]:*:*) GUESS=$UNAME_MACHINE-unknown-morphos ;; *:OS/390:*:*) GUESS=i370-ibm-openedition ;; *:z/VM:*:*) GUESS=s390-ibm-zvmoe ;; *:OS400:*:*) GUESS=powerpc-ibm-os400 ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) GUESS=arm-acorn-riscix$UNAME_RELEASE ;; arm*:riscos:*:*|arm*:RISCOS:*:*) GUESS=arm-unknown-riscos ;; SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) GUESS=hppa1.1-hitachi-hiuxmpp ;; Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. case `(/bin/universe) 2>/dev/null` in att) GUESS=pyramid-pyramid-sysv3 ;; *) GUESS=pyramid-pyramid-bsd ;; esac ;; NILE*:*:*:dcosx) GUESS=pyramid-pyramid-svr4 ;; DRS?6000:unix:4.0:6*) GUESS=sparc-icl-nx6 ;; DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) case `/usr/bin/uname -p` in sparc) GUESS=sparc-icl-nx7 ;; esac ;; s390x:SunOS:*:*) SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL ;; sun4H:SunOS:5.*:*) SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` GUESS=sparc-hal-solaris2$SUN_REL ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` GUESS=sparc-sun-solaris2$SUN_REL ;; i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) GUESS=i386-pc-auroraux$UNAME_RELEASE ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) set_cc_for_build SUN_ARCH=i386 # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. if test "$CC_FOR_BUILD" != no_compiler_found; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ (CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then SUN_ARCH=x86_64 fi fi SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` GUESS=$SUN_ARCH-pc-solaris2$SUN_REL ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` GUESS=sparc-sun-solaris3$SUN_REL ;; sun4*:SunOS:*:*) case `/usr/bin/arch -k` in Series*|S4*) UNAME_RELEASE=`uname -v` ;; esac # Japanese Language versions have a version number like `4.1.3-JL'. SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'` GUESS=sparc-sun-sunos$SUN_REL ;; sun3*:SunOS:*:*) GUESS=m68k-sun-sunos$UNAME_RELEASE ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 case `/bin/arch` in sun3) GUESS=m68k-sun-sunos$UNAME_RELEASE ;; sun4) GUESS=sparc-sun-sunos$UNAME_RELEASE ;; esac ;; aushp:SunOS:*:*) GUESS=sparc-auspex-sunos$UNAME_RELEASE ;; # The situation for MiNT is a little confusing. The machine name # can be virtually everything (everything which is not # "atarist" or "atariste" at least should have a processor # > m68000). The system name ranges from "MiNT" over "FreeMiNT" # to the lowercase version "mint" (or "freemint"). Finally # the system name "TOS" denotes a system which is actually not # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) GUESS=m68k-atari-mint$UNAME_RELEASE ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) GUESS=m68k-atari-mint$UNAME_RELEASE ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) GUESS=m68k-atari-mint$UNAME_RELEASE ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) GUESS=m68k-milan-mint$UNAME_RELEASE ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) GUESS=m68k-hades-mint$UNAME_RELEASE ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) GUESS=m68k-unknown-mint$UNAME_RELEASE ;; m68k:machten:*:*) GUESS=m68k-apple-machten$UNAME_RELEASE ;; powerpc:machten:*:*) GUESS=powerpc-apple-machten$UNAME_RELEASE ;; RISC*:Mach:*:*) GUESS=mips-dec-mach_bsd4.3 ;; RISC*:ULTRIX:*:*) GUESS=mips-dec-ultrix$UNAME_RELEASE ;; VAX*:ULTRIX*:*:*) GUESS=vax-dec-ultrix$UNAME_RELEASE ;; 2020:CLIX:*:* | 2430:CLIX:*:*) GUESS=clipper-intergraph-clix$UNAME_RELEASE ;; mips:*:*:UMIPS | mips:*:*:RISCos) set_cc_for_build sed 's/^ //' << EOF > "$dummy.c" #ifdef __cplusplus #include /* for printf() prototype */ int main (int argc, char *argv[]) { #else int main (argc, argv) int argc; char *argv[]; { #endif #if defined (host_mips) && defined (MIPSEB) #if defined (SYSTYPE_SYSV) printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_SVR4) printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); #endif #endif exit (-1); } EOF $CC_FOR_BUILD -o "$dummy" "$dummy.c" && dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && SYSTEM_NAME=`"$dummy" "$dummyarg"` && { echo "$SYSTEM_NAME"; exit; } GUESS=mips-mips-riscos$UNAME_RELEASE ;; Motorola:PowerMAX_OS:*:*) GUESS=powerpc-motorola-powermax ;; Motorola:*:4.3:PL8-*) GUESS=powerpc-harris-powermax ;; Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) GUESS=powerpc-harris-powermax ;; Night_Hawk:Power_UNIX:*:*) GUESS=powerpc-harris-powerunix ;; m88k:CX/UX:7*:*) GUESS=m88k-harris-cxux7 ;; m88k:*:4*:R4*) GUESS=m88k-motorola-sysv4 ;; m88k:*:3*:R3*) GUESS=m88k-motorola-sysv3 ;; AViiON:dgux:*:*) # DG/UX returns AViiON for all architectures UNAME_PROCESSOR=`/usr/bin/uname -p` if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110 then if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \ test "$TARGET_BINARY_INTERFACE"x = x then GUESS=m88k-dg-dgux$UNAME_RELEASE else GUESS=m88k-dg-dguxbcs$UNAME_RELEASE fi else GUESS=i586-dg-dgux$UNAME_RELEASE fi ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) GUESS=m88k-dolphin-sysv3 ;; M88*:*:R3*:*) # Delta 88k system running SVR3 GUESS=m88k-motorola-sysv3 ;; XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) GUESS=m88k-tektronix-sysv3 ;; Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) GUESS=m68k-tektronix-bsd ;; *:IRIX*:*:*) IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'` GUESS=mips-sgi-irix$IRIX_REL ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. GUESS=romp-ibm-aix # uname -m gives an 8 hex-code CPU id ;; # Note that: echo "'`uname -s`'" gives 'AIX ' i*86:AIX:*:*) GUESS=i386-ibm-aix ;; ia64:AIX:*:*) if test -x /usr/bin/oslevel ; then IBM_REV=`/usr/bin/oslevel` else IBM_REV=$UNAME_VERSION.$UNAME_RELEASE fi GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then set_cc_for_build sed 's/^ //' << EOF > "$dummy.c" #include main() { if (!__power_pc()) exit(1); puts("powerpc-ibm-aix3.2.5"); exit(0); } EOF if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` then GUESS=$SYSTEM_NAME else GUESS=rs6000-ibm-aix3.2.5 fi elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then GUESS=rs6000-ibm-aix3.2.4 else GUESS=rs6000-ibm-aix3.2 fi ;; *:AIX:*:[4567]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 else IBM_ARCH=powerpc fi if test -x /usr/bin/lslpp ; then IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \ awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` else IBM_REV=$UNAME_VERSION.$UNAME_RELEASE fi GUESS=$IBM_ARCH-ibm-aix$IBM_REV ;; *:AIX:*:*) GUESS=rs6000-ibm-aix ;; ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) GUESS=romp-ibm-bsd4.4 ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and GUESS=romp-ibm-bsd$UNAME_RELEASE # 4.3 with uname added to ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) GUESS=rs6000-bull-bosx ;; DPX/2?00:B.O.S.:*:*) GUESS=m68k-bull-sysv3 ;; 9000/[34]??:4.3bsd:1.*:*) GUESS=m68k-hp-bsd ;; hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) GUESS=m68k-hp-bsd4.4 ;; 9000/[34678]??:HP-UX:*:*) HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` case $UNAME_MACHINE in 9000/31?) HP_ARCH=m68000 ;; 9000/[34]??) HP_ARCH=m68k ;; 9000/[678][0-9][0-9]) if test -x /usr/bin/getconf; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case $sc_cpu_version in 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case $sc_kernel_bits in 32) HP_ARCH=hppa2.0n ;; 64) HP_ARCH=hppa2.0w ;; '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 esac ;; esac fi if test "$HP_ARCH" = ""; then set_cc_for_build sed 's/^ //' << EOF > "$dummy.c" #define _HPUX_SOURCE #include #include int main () { #if defined(_SC_KERNEL_BITS) long bits = sysconf(_SC_KERNEL_BITS); #endif long cpu = sysconf (_SC_CPU_VERSION); switch (cpu) { case CPU_PA_RISC1_0: puts ("hppa1.0"); break; case CPU_PA_RISC1_1: puts ("hppa1.1"); break; case CPU_PA_RISC2_0: #if defined(_SC_KERNEL_BITS) switch (bits) { case 64: puts ("hppa2.0w"); break; case 32: puts ("hppa2.0n"); break; default: puts ("hppa2.0"); break; } break; #else /* !defined(_SC_KERNEL_BITS) */ puts ("hppa2.0"); break; #endif default: puts ("hppa1.0"); break; } exit (0); } EOF (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac if test "$HP_ARCH" = hppa2.0w then set_cc_for_build # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler # generating 64-bit code. GNU and HP use different nomenclature: # # $ CC_FOR_BUILD=cc ./config.guess # => hppa2.0w-hp-hpux11.23 # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then HP_ARCH=hppa2.0w else HP_ARCH=hppa64 fi fi GUESS=$HP_ARCH-hp-hpux$HPUX_REV ;; ia64:HP-UX:*:*) HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` GUESS=ia64-hp-hpux$HPUX_REV ;; 3050*:HI-UX:*:*) set_cc_for_build sed 's/^ //' << EOF > "$dummy.c" #include int main () { long cpu = sysconf (_SC_CPU_VERSION); /* The order matters, because CPU_IS_HP_MC68K erroneously returns true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct results, however. */ if (CPU_IS_PA_RISC (cpu)) { switch (cpu) { case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; default: puts ("hppa-hitachi-hiuxwe2"); break; } } else if (CPU_IS_HP_MC68K (cpu)) puts ("m68k-hitachi-hiuxwe2"); else puts ("unknown-hitachi-hiuxwe2"); exit (0); } EOF $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && { echo "$SYSTEM_NAME"; exit; } GUESS=unknown-hitachi-hiuxwe2 ;; 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) GUESS=hppa1.1-hp-bsd ;; 9000/8??:4.3bsd:*:*) GUESS=hppa1.0-hp-bsd ;; *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) GUESS=hppa1.0-hp-mpeix ;; hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) GUESS=hppa1.1-hp-osf ;; hp8??:OSF1:*:*) GUESS=hppa1.0-hp-osf ;; i*86:OSF1:*:*) if test -x /usr/sbin/sysversion ; then GUESS=$UNAME_MACHINE-unknown-osf1mk else GUESS=$UNAME_MACHINE-unknown-osf1 fi ;; parisc*:Lites*:*:*) GUESS=hppa1.1-hp-lites ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) GUESS=c1-convex-bsd ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) GUESS=c34-convex-bsd ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) GUESS=c38-convex-bsd ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) GUESS=c4-convex-bsd ;; CRAY*Y-MP:*:*:*) CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` GUESS=ymp-cray-unicos$CRAY_REL ;; CRAY*[A-Z]90:*:*:*) echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ -e 's/\.[^.]*$/.X/' exit ;; CRAY*TS:*:*:*) CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` GUESS=t90-cray-unicos$CRAY_REL ;; CRAY*T3E:*:*:*) CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` GUESS=alphaev5-cray-unicosmk$CRAY_REL ;; CRAY*SV1:*:*:*) CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` GUESS=sv1-cray-unicos$CRAY_REL ;; *:UNICOS/mp:*:*) CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` GUESS=craynv-cray-unicosmp$CRAY_REL ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} ;; 5000:UNIX_System_V:4.*:*) FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE ;; sparc*:BSD/OS:*:*) GUESS=sparc-unknown-bsdi$UNAME_RELEASE ;; *:BSD/OS:*:*) GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE ;; arm:FreeBSD:*:*) UNAME_PROCESSOR=`uname -p` set_cc_for_build if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_PCS_VFP then FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi else FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf fi ;; *:FreeBSD:*:*) UNAME_PROCESSOR=`/usr/bin/uname -p` case $UNAME_PROCESSOR in amd64) UNAME_PROCESSOR=x86_64 ;; i386) UNAME_PROCESSOR=i586 ;; esac FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL ;; i*:CYGWIN*:*) GUESS=$UNAME_MACHINE-pc-cygwin ;; *:MINGW64*:*) GUESS=$UNAME_MACHINE-pc-mingw64 ;; *:MINGW*:*) GUESS=$UNAME_MACHINE-pc-mingw32 ;; *:MSYS*:*) GUESS=$UNAME_MACHINE-pc-msys ;; i*:PW*:*) GUESS=$UNAME_MACHINE-pc-pw32 ;; *:SerenityOS:*:*) GUESS=$UNAME_MACHINE-pc-serenity ;; *:Interix*:*) case $UNAME_MACHINE in x86) GUESS=i586-pc-interix$UNAME_RELEASE ;; authenticamd | genuineintel | EM64T) GUESS=x86_64-unknown-interix$UNAME_RELEASE ;; IA64) GUESS=ia64-unknown-interix$UNAME_RELEASE ;; esac ;; i*:UWIN*:*) GUESS=$UNAME_MACHINE-pc-uwin ;; amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) GUESS=x86_64-pc-cygwin ;; prep*:SunOS:5.*:*) SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` GUESS=powerpcle-unknown-solaris2$SUN_REL ;; *:GNU:*:*) # the GNU system GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'` GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'` GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL ;; *:GNU/*:*:*) # other systems with GNU libc and userland GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"` GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC ;; *:Minix:*:*) GUESS=$UNAME_MACHINE-unknown-minix ;; aarch64:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; aarch64_be:Linux:*:*) UNAME_MACHINE=aarch64_be GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in EV5) UNAME_MACHINE=alphaev5 ;; EV56) UNAME_MACHINE=alphaev56 ;; PCA56) UNAME_MACHINE=alphapca56 ;; PCA57) UNAME_MACHINE=alphapca56 ;; EV6) UNAME_MACHINE=alphaev6 ;; EV67) UNAME_MACHINE=alphaev67 ;; EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 if test "$?" = 0 ; then LIBC=gnulibc1 ; fi GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; arm*:Linux:*:*) set_cc_for_build if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_EABI__ then GUESS=$UNAME_MACHINE-unknown-linux-$LIBC else if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_PCS_VFP then GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi else GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf fi fi ;; avr32*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; cris:Linux:*:*) GUESS=$UNAME_MACHINE-axis-linux-$LIBC ;; crisv32:Linux:*:*) GUESS=$UNAME_MACHINE-axis-linux-$LIBC ;; e2k:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; frv:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; hexagon:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; i*86:Linux:*:*) GUESS=$UNAME_MACHINE-pc-linux-$LIBC ;; ia64:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; k1om:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; m32r*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; m68*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; mips:Linux:*:* | mips64:Linux:*:*) set_cc_for_build IS_GLIBC=0 test x"${LIBC}" = xgnu && IS_GLIBC=1 sed 's/^ //' << EOF > "$dummy.c" #undef CPU #undef mips #undef mipsel #undef mips64 #undef mips64el #if ${IS_GLIBC} && defined(_ABI64) LIBCABI=gnuabi64 #else #if ${IS_GLIBC} && defined(_ABIN32) LIBCABI=gnuabin32 #else LIBCABI=${LIBC} #endif #endif #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 CPU=mipsisa64r6 #else #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 CPU=mipsisa32r6 #else #if defined(__mips64) CPU=mips64 #else CPU=mips #endif #endif #endif #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) MIPS_ENDIAN=el #else #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) MIPS_ENDIAN= #else MIPS_ENDIAN= #endif #endif EOF cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'` eval "$cc_set_vars" test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; } ;; mips64el:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; openrisc*:Linux:*:*) GUESS=or1k-unknown-linux-$LIBC ;; or32:Linux:*:* | or1k*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; padre:Linux:*:*) GUESS=sparc-unknown-linux-$LIBC ;; parisc64:Linux:*:* | hppa64:Linux:*:*) GUESS=hppa64-unknown-linux-$LIBC ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;; PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;; *) GUESS=hppa-unknown-linux-$LIBC ;; esac ;; ppc64:Linux:*:*) GUESS=powerpc64-unknown-linux-$LIBC ;; ppc:Linux:*:*) GUESS=powerpc-unknown-linux-$LIBC ;; ppc64le:Linux:*:*) GUESS=powerpc64le-unknown-linux-$LIBC ;; ppcle:Linux:*:*) GUESS=powerpcle-unknown-linux-$LIBC ;; riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; s390:Linux:*:* | s390x:Linux:*:*) GUESS=$UNAME_MACHINE-ibm-linux-$LIBC ;; sh64*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; sh*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; sparc:Linux:*:* | sparc64:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; tile*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; vax:Linux:*:*) GUESS=$UNAME_MACHINE-dec-linux-$LIBC ;; x86_64:Linux:*:*) set_cc_for_build LIBCABI=$LIBC if test "$CC_FOR_BUILD" != no_compiler_found; then if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_X32 >/dev/null then LIBCABI=${LIBC}x32 fi fi GUESS=$UNAME_MACHINE-pc-linux-$LIBCABI ;; xtensa*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # earlier versions are messed up and put the nodename in both # sysname and nodename. GUESS=i386-sequent-sysv4 ;; i*86:UNIX_SV:4.2MP:2.*) # Unixware is an offshoot of SVR4, but it has its own version # number series starting with 2... # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. # Use sysv4.2uw... so that sysv4* matches it. GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION ;; i*86:OS/2:*:*) # If we were able to find `uname', then EMX Unix compatibility # is probably installed. GUESS=$UNAME_MACHINE-pc-os2-emx ;; i*86:XTS-300:*:STOP) GUESS=$UNAME_MACHINE-unknown-stop ;; i*86:atheos:*:*) GUESS=$UNAME_MACHINE-unknown-atheos ;; i*86:syllable:*:*) GUESS=$UNAME_MACHINE-pc-syllable ;; i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) GUESS=i386-unknown-lynxos$UNAME_RELEASE ;; i*86:*DOS:*:*) GUESS=$UNAME_MACHINE-pc-msdosdjgpp ;; i*86:*:4.*:*) UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL else GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL fi ;; i*86:*:5:[678]*) # UnixWare 7.x, OpenUNIX and OpenServer 6. case `/bin/uname -X | grep "^Machine"` in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; *Pent*|*Celeron) UNAME_MACHINE=i686 ;; esac GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} ;; i*86:*:3.2:*) if test -f /usr/options/cb.name; then UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ && UNAME_MACHINE=i586 (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ && UNAME_MACHINE=i686 (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ && UNAME_MACHINE=i686 GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL else GUESS=$UNAME_MACHINE-pc-sysv32 fi ;; pc:*:*:*) # Left here for compatibility: # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub # prints for the "djgpp" host, or else GDB configure will decide that # this is a cross-build. GUESS=i586-pc-msdosdjgpp ;; Intel:Mach:3*:*) GUESS=i386-pc-mach3 ;; paragon:*:*:*) GUESS=i860-intel-osf1 ;; i860:*:4.*:*) # i860-SVR4 if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then GUESS=i860-stardent-sysv$UNAME_RELEASE # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. GUESS=i860-unknown-sysv$UNAME_RELEASE # Unknown i860-SVR4 fi ;; mini*:CTIX:SYS*5:*) # "miniframe" GUESS=m68010-convergent-sysv ;; mc68k:UNIX:SYSTEM5:3.51m) GUESS=m68k-convergent-sysv ;; M680?0:D-NIX:5.3:*) GUESS=m68k-diab-dnix ;; M68*:*:R3V[5678]*:*) test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4; exit; } ;; NCR*:*:4.2:* | MPRAS*:*:4.2:*) OS_REL='.3' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) GUESS=m68k-unknown-lynxos$UNAME_RELEASE ;; mc68030:UNIX_System_V:4.*:*) GUESS=m68k-atari-sysv4 ;; TSUNAMI:LynxOS:2.*:*) GUESS=sparc-unknown-lynxos$UNAME_RELEASE ;; rs6000:LynxOS:2.*:*) GUESS=rs6000-unknown-lynxos$UNAME_RELEASE ;; PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) GUESS=powerpc-unknown-lynxos$UNAME_RELEASE ;; SM[BE]S:UNIX_SV:*:*) GUESS=mips-dde-sysv$UNAME_RELEASE ;; RM*:ReliantUNIX-*:*:*) GUESS=mips-sni-sysv4 ;; RM*:SINIX-*:*:*) GUESS=mips-sni-sysv4 ;; *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then UNAME_MACHINE=`(uname -p) 2>/dev/null` GUESS=$UNAME_MACHINE-sni-sysv4 else GUESS=ns32k-sni-sysv fi ;; PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort # says GUESS=i586-unisys-sysv4 ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm GUESS=hppa1.1-stratus-sysv4 ;; *:*:*:FTX*) # From seanf@swdc.stratus.com. GUESS=i860-stratus-sysv4 ;; i*86:VOS:*:*) # From Paul.Green@stratus.com. GUESS=$UNAME_MACHINE-stratus-vos ;; *:VOS:*:*) # From Paul.Green@stratus.com. GUESS=hppa1.1-stratus-vos ;; mc68*:A/UX:*:*) GUESS=m68k-apple-aux$UNAME_RELEASE ;; news*:NEWS-OS:6*:*) GUESS=mips-sony-newsos6 ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if test -d /usr/nec; then GUESS=mips-nec-sysv$UNAME_RELEASE else GUESS=mips-unknown-sysv$UNAME_RELEASE fi ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. GUESS=powerpc-be-beos ;; BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. GUESS=powerpc-apple-beos ;; BePC:BeOS:*:*) # BeOS running on Intel PC compatible. GUESS=i586-pc-beos ;; BePC:Haiku:*:*) # Haiku running on Intel PC compatible. GUESS=i586-pc-haiku ;; x86_64:Haiku:*:*) GUESS=x86_64-unknown-haiku ;; SX-4:SUPER-UX:*:*) GUESS=sx4-nec-superux$UNAME_RELEASE ;; SX-5:SUPER-UX:*:*) GUESS=sx5-nec-superux$UNAME_RELEASE ;; SX-6:SUPER-UX:*:*) GUESS=sx6-nec-superux$UNAME_RELEASE ;; SX-7:SUPER-UX:*:*) GUESS=sx7-nec-superux$UNAME_RELEASE ;; SX-8:SUPER-UX:*:*) GUESS=sx8-nec-superux$UNAME_RELEASE ;; SX-8R:SUPER-UX:*:*) GUESS=sx8r-nec-superux$UNAME_RELEASE ;; SX-ACE:SUPER-UX:*:*) GUESS=sxace-nec-superux$UNAME_RELEASE ;; Power*:Rhapsody:*:*) GUESS=powerpc-apple-rhapsody$UNAME_RELEASE ;; *:Rhapsody:*:*) GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE ;; arm64:Darwin:*:*) GUESS=aarch64-apple-darwin$UNAME_RELEASE ;; *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` case $UNAME_PROCESSOR in unknown) UNAME_PROCESSOR=powerpc ;; esac if command -v xcode-select > /dev/null 2> /dev/null && \ ! xcode-select --print-path > /dev/null 2> /dev/null ; then # Avoid executing cc if there is no toolchain installed as # cc will be a stub that puts up a graphical alert # prompting the user to install developer tools. CC_FOR_BUILD=no_compiler_found else set_cc_for_build fi if test "$CC_FOR_BUILD" != no_compiler_found; then if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then case $UNAME_PROCESSOR in i386) UNAME_PROCESSOR=x86_64 ;; powerpc) UNAME_PROCESSOR=powerpc64 ;; esac fi # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_PPC >/dev/null then UNAME_PROCESSOR=powerpc fi elif test "$UNAME_PROCESSOR" = i386 ; then # uname -m returns i386 or x86_64 UNAME_PROCESSOR=$UNAME_MACHINE fi GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` if test "$UNAME_PROCESSOR" = x86; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE ;; *:QNX:*:4*) GUESS=i386-pc-qnx ;; NEO-*:NONSTOP_KERNEL:*:*) GUESS=neo-tandem-nsk$UNAME_RELEASE ;; NSE-*:NONSTOP_KERNEL:*:*) GUESS=nse-tandem-nsk$UNAME_RELEASE ;; NSR-*:NONSTOP_KERNEL:*:*) GUESS=nsr-tandem-nsk$UNAME_RELEASE ;; NSV-*:NONSTOP_KERNEL:*:*) GUESS=nsv-tandem-nsk$UNAME_RELEASE ;; NSX-*:NONSTOP_KERNEL:*:*) GUESS=nsx-tandem-nsk$UNAME_RELEASE ;; *:NonStop-UX:*:*) GUESS=mips-compaq-nonstopux ;; BS2000:POSIX*:*:*) GUESS=bs2000-siemens-sysv ;; DS/*:UNIX_System_V:*:*) GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE ;; *:Plan9:*:*) # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. if test "${cputype-}" = 386; then UNAME_MACHINE=i386 elif test "x${cputype-}" != x; then UNAME_MACHINE=$cputype fi GUESS=$UNAME_MACHINE-unknown-plan9 ;; *:TOPS-10:*:*) GUESS=pdp10-unknown-tops10 ;; *:TENEX:*:*) GUESS=pdp10-unknown-tenex ;; KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) GUESS=pdp10-dec-tops20 ;; XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) GUESS=pdp10-xkl-tops20 ;; *:TOPS-20:*:*) GUESS=pdp10-unknown-tops20 ;; *:ITS:*:*) GUESS=pdp10-unknown-its ;; SEI:*:*:SEIUX) GUESS=mips-sei-seiux$UNAME_RELEASE ;; *:DragonFly:*:*) DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL ;; *:*VMS:*:*) UNAME_MACHINE=`(uname -p) 2>/dev/null` case $UNAME_MACHINE in A*) GUESS=alpha-dec-vms ;; I*) GUESS=ia64-dec-vms ;; V*) GUESS=vax-dec-vms ;; esac ;; *:XENIX:*:SysV) GUESS=i386-pc-xenix ;; i*86:skyos:*:*) SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'` GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL ;; i*86:rdos:*:*) GUESS=$UNAME_MACHINE-pc-rdos ;; i*86:Fiwix:*:*) GUESS=$UNAME_MACHINE-pc-fiwix ;; *:AROS:*:*) GUESS=$UNAME_MACHINE-unknown-aros ;; x86_64:VMkernel:*:*) GUESS=$UNAME_MACHINE-unknown-esx ;; amd64:Isilon\ OneFS:*:*) GUESS=x86_64-unknown-onefs ;; *:Unleashed:*:*) GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE ;; esac # Do we have a guess based on uname results? if test "x$GUESS" != x; then echo "$GUESS" exit fi # No uname command or uname output not recognized. set_cc_for_build cat > "$dummy.c" < #include #endif #if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) #if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) #include #if defined(_SIZE_T_) || defined(SIGLOST) #include #endif #endif #endif main () { #if defined (sony) #if defined (MIPSEB) /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, I don't know.... */ printf ("mips-sony-bsd\n"); exit (0); #else #include printf ("m68k-sony-newsos%s\n", #ifdef NEWSOS4 "4" #else "" #endif ); exit (0); #endif #endif #if defined (NeXT) #if !defined (__ARCHITECTURE__) #define __ARCHITECTURE__ "m68k" #endif int version; version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; if (version < 4) printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); else printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); exit (0); #endif #if defined (MULTIMAX) || defined (n16) #if defined (UMAXV) printf ("ns32k-encore-sysv\n"); exit (0); #else #if defined (CMU) printf ("ns32k-encore-mach\n"); exit (0); #else printf ("ns32k-encore-bsd\n"); exit (0); #endif #endif #endif #if defined (__386BSD__) printf ("i386-pc-bsd\n"); exit (0); #endif #if defined (sequent) #if defined (i386) printf ("i386-sequent-dynix\n"); exit (0); #endif #if defined (ns32000) printf ("ns32k-sequent-dynix\n"); exit (0); #endif #endif #if defined (_SEQUENT_) struct utsname un; uname(&un); if (strncmp(un.version, "V2", 2) == 0) { printf ("i386-sequent-ptx2\n"); exit (0); } if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ printf ("i386-sequent-ptx1\n"); exit (0); } printf ("i386-sequent-ptx\n"); exit (0); #endif #if defined (vax) #if !defined (ultrix) #include #if defined (BSD) #if BSD == 43 printf ("vax-dec-bsd4.3\n"); exit (0); #else #if BSD == 199006 printf ("vax-dec-bsd4.3reno\n"); exit (0); #else printf ("vax-dec-bsd\n"); exit (0); #endif #endif #else printf ("vax-dec-bsd\n"); exit (0); #endif #else #if defined(_SIZE_T_) || defined(SIGLOST) struct utsname un; uname (&un); printf ("vax-dec-ultrix%s\n", un.release); exit (0); #else printf ("vax-dec-ultrix\n"); exit (0); #endif #endif #endif #if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) #if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) #if defined(_SIZE_T_) || defined(SIGLOST) struct utsname *un; uname (&un); printf ("mips-dec-ultrix%s\n", un.release); exit (0); #else printf ("mips-dec-ultrix\n"); exit (0); #endif #endif #endif #if defined (alliant) && defined (i860) printf ("i860-alliant-bsd\n"); exit (0); #endif exit (1); } EOF $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` && { echo "$SYSTEM_NAME"; exit; } # Apollos put the system type in the environment. test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; } echo "$0: unable to guess system type" >&2 case $UNAME_MACHINE:$UNAME_SYSTEM in mips:Linux | mips64:Linux) # If we got here on MIPS GNU/Linux, output extra information. cat >&2 <&2 <&2 </dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` /bin/uname -X = `(/bin/uname -X) 2>/dev/null` hostinfo = `(hostinfo) 2>/dev/null` /bin/universe = `(/bin/universe) 2>/dev/null` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` /bin/arch = `(/bin/arch) 2>/dev/null` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` UNAME_MACHINE = "$UNAME_MACHINE" UNAME_RELEASE = "$UNAME_RELEASE" UNAME_SYSTEM = "$UNAME_SYSTEM" UNAME_VERSION = "$UNAME_VERSION" EOF fi exit 1 # Local variables: # eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" # End: starpu-1.4.9+dfsg/build-aux/config.sub000077500000000000000000001051161507764646700176710ustar00rootroot00000000000000#! /bin/sh # Configuration validation subroutine script. # Copyright 1992-2022 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale timestamp='2022-01-03' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that # program. This Exception is an additional permission under section 7 # of the GNU General Public License, version 3 ("GPLv3"). # Please send patches to . # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. # If it is invalid, we print an error message on stderr and exit with code 1. # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: # https://git.savannah.gnu.org/cgit/config.git/plain/config.sub # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases # that are meaningful with *any* GNU software. # Each package is responsible for reporting which valid configurations # it does not support. The user should be able to distinguish # a failure to support a valid configuration from a meaningless # configuration. # The goal of this file is to map all the various variations of a given # machine specification into a single specification in the form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM # or in some cases, the newer four-part form: # CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM # It is wrong to echo any other type of specification. # The "shellcheck disable" line above the timestamp inhibits complaints # about features and limitations of the classic Bourne shell that were # superseded or lifted in POSIX. However, this script identifies a wide # variety of pre-POSIX systems that do not have POSIX shells at all, and # even some reasonably current systems (Solaris 10 as case-in-point) still # have a pre-POSIX /bin/sh. me=`echo "$0" | sed -e 's,.*/,,'` usage="\ Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS Canonicalize a configuration name. Options: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit Report bugs and patches to ." version="\ GNU config.sub ($timestamp) Copyright 1992-2022 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" Try \`$me --help' for more information." # Parse command line while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) echo "$timestamp" ; exit ;; --version | -v ) echo "$version" ; exit ;; --help | --h* | -h ) echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. break ;; -* ) echo "$me: invalid option $1$help" >&2 exit 1 ;; *local*) # First pass through any local machine types. echo "$1" exit ;; * ) break ;; esac done case $# in 0) echo "$me: missing argument$help" >&2 exit 1;; 1) ;; *) echo "$me: too many arguments$help" >&2 exit 1;; esac # Split fields of configuration type # shellcheck disable=SC2162 saved_IFS=$IFS IFS="-" read field1 field2 field3 field4 <&2 exit 1 ;; *-*-*-*) basic_machine=$field1-$field2 basic_os=$field3-$field4 ;; *-*-*) # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two # parts maybe_os=$field2-$field3 case $maybe_os in nto-qnx* | linux-* | uclinux-uclibc* \ | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ | storm-chaos* | os2-emx* | rtmk-nova*) basic_machine=$field1 basic_os=$maybe_os ;; android-linux) basic_machine=$field1-unknown basic_os=linux-android ;; *) basic_machine=$field1-$field2 basic_os=$field3 ;; esac ;; *-*) # A lone config we happen to match not fitting any pattern case $field1-$field2 in decstation-3100) basic_machine=mips-dec basic_os= ;; *-*) # Second component is usually, but not always the OS case $field2 in # Prevent following clause from handling this valid os sun*os*) basic_machine=$field1 basic_os=$field2 ;; zephyr*) basic_machine=$field1-unknown basic_os=$field2 ;; # Manufacturers dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \ | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \ | unicom* | ibm* | next | hp | isi* | apollo | altos* \ | convergent* | ncr* | news | 32* | 3600* | 3100* \ | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \ | ultra | tti* | harris | dolphin | highlevel | gould \ | cbm | ns | masscomp | apple | axis | knuth | cray \ | microblaze* | sim | cisco \ | oki | wec | wrs | winbond) basic_machine=$field1-$field2 basic_os= ;; *) basic_machine=$field1 basic_os=$field2 ;; esac ;; esac ;; *) # Convert single-component short-hands not valid as part of # multi-component configurations. case $field1 in 386bsd) basic_machine=i386-pc basic_os=bsd ;; a29khif) basic_machine=a29k-amd basic_os=udi ;; adobe68k) basic_machine=m68010-adobe basic_os=scout ;; alliant) basic_machine=fx80-alliant basic_os= ;; altos | altos3068) basic_machine=m68k-altos basic_os= ;; am29k) basic_machine=a29k-none basic_os=bsd ;; amdahl) basic_machine=580-amdahl basic_os=sysv ;; amiga) basic_machine=m68k-unknown basic_os= ;; amigaos | amigados) basic_machine=m68k-unknown basic_os=amigaos ;; amigaunix | amix) basic_machine=m68k-unknown basic_os=sysv4 ;; apollo68) basic_machine=m68k-apollo basic_os=sysv ;; apollo68bsd) basic_machine=m68k-apollo basic_os=bsd ;; aros) basic_machine=i386-pc basic_os=aros ;; aux) basic_machine=m68k-apple basic_os=aux ;; balance) basic_machine=ns32k-sequent basic_os=dynix ;; blackfin) basic_machine=bfin-unknown basic_os=linux ;; cegcc) basic_machine=arm-unknown basic_os=cegcc ;; convex-c1) basic_machine=c1-convex basic_os=bsd ;; convex-c2) basic_machine=c2-convex basic_os=bsd ;; convex-c32) basic_machine=c32-convex basic_os=bsd ;; convex-c34) basic_machine=c34-convex basic_os=bsd ;; convex-c38) basic_machine=c38-convex basic_os=bsd ;; cray) basic_machine=j90-cray basic_os=unicos ;; crds | unos) basic_machine=m68k-crds basic_os= ;; da30) basic_machine=m68k-da30 basic_os= ;; decstation | pmax | pmin | dec3100 | decstatn) basic_machine=mips-dec basic_os= ;; delta88) basic_machine=m88k-motorola basic_os=sysv3 ;; dicos) basic_machine=i686-pc basic_os=dicos ;; djgpp) basic_machine=i586-pc basic_os=msdosdjgpp ;; ebmon29k) basic_machine=a29k-amd basic_os=ebmon ;; es1800 | OSE68k | ose68k | ose | OSE) basic_machine=m68k-ericsson basic_os=ose ;; gmicro) basic_machine=tron-gmicro basic_os=sysv ;; go32) basic_machine=i386-pc basic_os=go32 ;; h8300hms) basic_machine=h8300-hitachi basic_os=hms ;; h8300xray) basic_machine=h8300-hitachi basic_os=xray ;; h8500hms) basic_machine=h8500-hitachi basic_os=hms ;; harris) basic_machine=m88k-harris basic_os=sysv3 ;; hp300 | hp300hpux) basic_machine=m68k-hp basic_os=hpux ;; hp300bsd) basic_machine=m68k-hp basic_os=bsd ;; hppaosf) basic_machine=hppa1.1-hp basic_os=osf ;; hppro) basic_machine=hppa1.1-hp basic_os=proelf ;; i386mach) basic_machine=i386-mach basic_os=mach ;; isi68 | isi) basic_machine=m68k-isi basic_os=sysv ;; m68knommu) basic_machine=m68k-unknown basic_os=linux ;; magnum | m3230) basic_machine=mips-mips basic_os=sysv ;; merlin) basic_machine=ns32k-utek basic_os=sysv ;; mingw64) basic_machine=x86_64-pc basic_os=mingw64 ;; mingw32) basic_machine=i686-pc basic_os=mingw32 ;; mingw32ce) basic_machine=arm-unknown basic_os=mingw32ce ;; monitor) basic_machine=m68k-rom68k basic_os=coff ;; morphos) basic_machine=powerpc-unknown basic_os=morphos ;; moxiebox) basic_machine=moxie-unknown basic_os=moxiebox ;; msdos) basic_machine=i386-pc basic_os=msdos ;; msys) basic_machine=i686-pc basic_os=msys ;; mvs) basic_machine=i370-ibm basic_os=mvs ;; nacl) basic_machine=le32-unknown basic_os=nacl ;; ncr3000) basic_machine=i486-ncr basic_os=sysv4 ;; netbsd386) basic_machine=i386-pc basic_os=netbsd ;; netwinder) basic_machine=armv4l-rebel basic_os=linux ;; news | news700 | news800 | news900) basic_machine=m68k-sony basic_os=newsos ;; news1000) basic_machine=m68030-sony basic_os=newsos ;; necv70) basic_machine=v70-nec basic_os=sysv ;; nh3000) basic_machine=m68k-harris basic_os=cxux ;; nh[45]000) basic_machine=m88k-harris basic_os=cxux ;; nindy960) basic_machine=i960-intel basic_os=nindy ;; mon960) basic_machine=i960-intel basic_os=mon960 ;; nonstopux) basic_machine=mips-compaq basic_os=nonstopux ;; os400) basic_machine=powerpc-ibm basic_os=os400 ;; OSE68000 | ose68000) basic_machine=m68000-ericsson basic_os=ose ;; os68k) basic_machine=m68k-none basic_os=os68k ;; paragon) basic_machine=i860-intel basic_os=osf ;; parisc) basic_machine=hppa-unknown basic_os=linux ;; psp) basic_machine=mipsallegrexel-sony basic_os=psp ;; pw32) basic_machine=i586-unknown basic_os=pw32 ;; rdos | rdos64) basic_machine=x86_64-pc basic_os=rdos ;; rdos32) basic_machine=i386-pc basic_os=rdos ;; rom68k) basic_machine=m68k-rom68k basic_os=coff ;; sa29200) basic_machine=a29k-amd basic_os=udi ;; sei) basic_machine=mips-sei basic_os=seiux ;; sequent) basic_machine=i386-sequent basic_os= ;; sps7) basic_machine=m68k-bull basic_os=sysv2 ;; st2000) basic_machine=m68k-tandem basic_os= ;; stratus) basic_machine=i860-stratus basic_os=sysv4 ;; sun2) basic_machine=m68000-sun basic_os= ;; sun2os3) basic_machine=m68000-sun basic_os=sunos3 ;; sun2os4) basic_machine=m68000-sun basic_os=sunos4 ;; sun3) basic_machine=m68k-sun basic_os= ;; sun3os3) basic_machine=m68k-sun basic_os=sunos3 ;; sun3os4) basic_machine=m68k-sun basic_os=sunos4 ;; sun4) basic_machine=sparc-sun basic_os= ;; sun4os3) basic_machine=sparc-sun basic_os=sunos3 ;; sun4os4) basic_machine=sparc-sun basic_os=sunos4 ;; sun4sol2) basic_machine=sparc-sun basic_os=solaris2 ;; sun386 | sun386i | roadrunner) basic_machine=i386-sun basic_os= ;; sv1) basic_machine=sv1-cray basic_os=unicos ;; symmetry) basic_machine=i386-sequent basic_os=dynix ;; t3e) basic_machine=alphaev5-cray basic_os=unicos ;; t90) basic_machine=t90-cray basic_os=unicos ;; toad1) basic_machine=pdp10-xkl basic_os=tops20 ;; tpf) basic_machine=s390x-ibm basic_os=tpf ;; udi29k) basic_machine=a29k-amd basic_os=udi ;; ultra3) basic_machine=a29k-nyu basic_os=sym1 ;; v810 | necv810) basic_machine=v810-nec basic_os=none ;; vaxv) basic_machine=vax-dec basic_os=sysv ;; vms) basic_machine=vax-dec basic_os=vms ;; vsta) basic_machine=i386-pc basic_os=vsta ;; vxworks960) basic_machine=i960-wrs basic_os=vxworks ;; vxworks68) basic_machine=m68k-wrs basic_os=vxworks ;; vxworks29k) basic_machine=a29k-wrs basic_os=vxworks ;; xbox) basic_machine=i686-pc basic_os=mingw32 ;; ymp) basic_machine=ymp-cray basic_os=unicos ;; *) basic_machine=$1 basic_os= ;; esac ;; esac # Decode 1-component or ad-hoc basic machines case $basic_machine in # Here we handle the default manufacturer of certain CPU types. It is in # some cases the only manufacturer, in others, it is the most popular. w89k) cpu=hppa1.1 vendor=winbond ;; op50n) cpu=hppa1.1 vendor=oki ;; op60c) cpu=hppa1.1 vendor=oki ;; ibm*) cpu=i370 vendor=ibm ;; orion105) cpu=clipper vendor=highlevel ;; mac | mpw | mac-mpw) cpu=m68k vendor=apple ;; pmac | pmac-mpw) cpu=powerpc vendor=apple ;; # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) cpu=m68000 vendor=att ;; 3b*) cpu=we32k vendor=att ;; bluegene*) cpu=powerpc vendor=ibm basic_os=cnk ;; decsystem10* | dec10*) cpu=pdp10 vendor=dec basic_os=tops10 ;; decsystem20* | dec20*) cpu=pdp10 vendor=dec basic_os=tops20 ;; delta | 3300 | motorola-3300 | motorola-delta \ | 3300-motorola | delta-motorola) cpu=m68k vendor=motorola ;; dpx2*) cpu=m68k vendor=bull basic_os=sysv3 ;; encore | umax | mmax) cpu=ns32k vendor=encore ;; elxsi) cpu=elxsi vendor=elxsi basic_os=${basic_os:-bsd} ;; fx2800) cpu=i860 vendor=alliant ;; genix) cpu=ns32k vendor=ns ;; h3050r* | hiux*) cpu=hppa1.1 vendor=hitachi basic_os=hiuxwe2 ;; hp3k9[0-9][0-9] | hp9[0-9][0-9]) cpu=hppa1.0 vendor=hp ;; hp9k2[0-9][0-9] | hp9k31[0-9]) cpu=m68000 vendor=hp ;; hp9k3[2-9][0-9]) cpu=m68k vendor=hp ;; hp9k6[0-9][0-9] | hp6[0-9][0-9]) cpu=hppa1.0 vendor=hp ;; hp9k7[0-79][0-9] | hp7[0-79][0-9]) cpu=hppa1.1 vendor=hp ;; hp9k78[0-9] | hp78[0-9]) # FIXME: really hppa2.0-hp cpu=hppa1.1 vendor=hp ;; hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) # FIXME: really hppa2.0-hp cpu=hppa1.1 vendor=hp ;; hp9k8[0-9][13679] | hp8[0-9][13679]) cpu=hppa1.1 vendor=hp ;; hp9k8[0-9][0-9] | hp8[0-9][0-9]) cpu=hppa1.0 vendor=hp ;; i*86v32) cpu=`echo "$1" | sed -e 's/86.*/86/'` vendor=pc basic_os=sysv32 ;; i*86v4*) cpu=`echo "$1" | sed -e 's/86.*/86/'` vendor=pc basic_os=sysv4 ;; i*86v) cpu=`echo "$1" | sed -e 's/86.*/86/'` vendor=pc basic_os=sysv ;; i*86sol2) cpu=`echo "$1" | sed -e 's/86.*/86/'` vendor=pc basic_os=solaris2 ;; j90 | j90-cray) cpu=j90 vendor=cray basic_os=${basic_os:-unicos} ;; iris | iris4d) cpu=mips vendor=sgi case $basic_os in irix*) ;; *) basic_os=irix4 ;; esac ;; miniframe) cpu=m68000 vendor=convergent ;; *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*) cpu=m68k vendor=atari basic_os=mint ;; news-3600 | risc-news) cpu=mips vendor=sony basic_os=newsos ;; next | m*-next) cpu=m68k vendor=next case $basic_os in openstep*) ;; nextstep*) ;; ns2*) basic_os=nextstep2 ;; *) basic_os=nextstep3 ;; esac ;; np1) cpu=np1 vendor=gould ;; op50n-* | op60c-*) cpu=hppa1.1 vendor=oki basic_os=proelf ;; pa-hitachi) cpu=hppa1.1 vendor=hitachi basic_os=hiuxwe2 ;; pbd) cpu=sparc vendor=tti ;; pbb) cpu=m68k vendor=tti ;; pc532) cpu=ns32k vendor=pc532 ;; pn) cpu=pn vendor=gould ;; power) cpu=power vendor=ibm ;; ps2) cpu=i386 vendor=ibm ;; rm[46]00) cpu=mips vendor=siemens ;; rtpc | rtpc-*) cpu=romp vendor=ibm ;; sde) cpu=mipsisa32 vendor=sde basic_os=${basic_os:-elf} ;; simso-wrs) cpu=sparclite vendor=wrs basic_os=vxworks ;; tower | tower-32) cpu=m68k vendor=ncr ;; vpp*|vx|vx-*) cpu=f301 vendor=fujitsu ;; w65) cpu=w65 vendor=wdc ;; w89k-*) cpu=hppa1.1 vendor=winbond basic_os=proelf ;; none) cpu=none vendor=none ;; leon|leon[3-9]) cpu=sparc vendor=$basic_machine ;; leon-*|leon[3-9]-*) cpu=sparc vendor=`echo "$basic_machine" | sed 's/-.*//'` ;; *-*) # shellcheck disable=SC2162 saved_IFS=$IFS IFS="-" read cpu vendor <&2 exit 1 ;; esac ;; esac # Here we canonicalize certain aliases for manufacturers. case $vendor in digital*) vendor=dec ;; commodore*) vendor=cbm ;; *) ;; esac # Decode manufacturer-specific aliases for certain operating systems. if test x$basic_os != x then # First recognize some ad-hoc cases, or perhaps split kernel-os, or else just # set os. case $basic_os in gnu/linux*) kernel=linux os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'` ;; os2-emx) kernel=os2 os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'` ;; nto-qnx*) kernel=nto os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'` ;; *-*) # shellcheck disable=SC2162 saved_IFS=$IFS IFS="-" read kernel os <&2 exit 1 ;; esac # As a final step for OS-related things, validate the OS-kernel combination # (given a valid OS), if there is a kernel. case $kernel-$os in linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \ | linux-musl* | linux-relibc* | linux-uclibc* ) ;; uclinux-uclibc* ) ;; -dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* ) # These are just libc implementations, not actual OSes, and thus # require a kernel. echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2 exit 1 ;; kfreebsd*-gnu* | kopensolaris*-gnu*) ;; vxworks-simlinux | vxworks-simwindows | vxworks-spe) ;; nto-qnx*) ;; os2-emx) ;; *-eabi* | *-gnueabi*) ;; -*) # Blank kernel with real OS is always fine. ;; *-*) echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2 exit 1 ;; esac # Here we handle the case where we know the os, and the CPU type, but not the # manufacturer. We pick the logical manufacturer. case $vendor in unknown) case $cpu-$os in *-riscix*) vendor=acorn ;; *-sunos*) vendor=sun ;; *-cnk* | *-aix*) vendor=ibm ;; *-beos*) vendor=be ;; *-hpux*) vendor=hp ;; *-mpeix*) vendor=hp ;; *-hiux*) vendor=hitachi ;; *-unos*) vendor=crds ;; *-dgux*) vendor=dg ;; *-luna*) vendor=omron ;; *-genix*) vendor=ns ;; *-clix*) vendor=intergraph ;; *-mvs* | *-opened*) vendor=ibm ;; *-os400*) vendor=ibm ;; s390-* | s390x-*) vendor=ibm ;; *-ptx*) vendor=sequent ;; *-tpf*) vendor=ibm ;; *-vxsim* | *-vxworks* | *-windiss*) vendor=wrs ;; *-aux*) vendor=apple ;; *-hms*) vendor=hitachi ;; *-mpw* | *-macos*) vendor=apple ;; *-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*) vendor=atari ;; *-vos*) vendor=stratus ;; esac ;; esac echo "$cpu-$vendor-${kernel:+$kernel-}$os" exit # Local variables: # eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" # End: starpu-1.4.9+dfsg/build-aux/depcomp000077500000000000000000000560201507764646700172620ustar00rootroot00000000000000#! /bin/sh # depcomp - compile a program generating dependencies as side-effects scriptversion=2018-03-07.03; # UTC # Copyright (C) 1999-2021 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Originally written by Alexandre Oliva . case $1 in '') echo "$0: No command. Try '$0 --help' for more information." 1>&2 exit 1; ;; -h | --h*) cat <<\EOF Usage: depcomp [--help] [--version] PROGRAM [ARGS] Run PROGRAMS ARGS to compile a file, generating dependencies as side-effects. Environment variables: depmode Dependency tracking mode. source Source file read by 'PROGRAMS ARGS'. object Object file output by 'PROGRAMS ARGS'. DEPDIR directory where to store dependencies. depfile Dependency file to output. tmpdepfile Temporary file to use when outputting dependencies. libtool Whether libtool is used (yes/no). Report bugs to . EOF exit $? ;; -v | --v*) echo "depcomp $scriptversion" exit $? ;; esac # Get the directory component of the given path, and save it in the # global variables '$dir'. Note that this directory component will # be either empty or ending with a '/' character. This is deliberate. set_dir_from () { case $1 in */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; *) dir=;; esac } # Get the suffix-stripped basename of the given path, and save it the # global variable '$base'. set_base_from () { base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` } # If no dependency file was actually created by the compiler invocation, # we still have to create a dummy depfile, to avoid errors with the # Makefile "include basename.Plo" scheme. make_dummy_depfile () { echo "#dummy" > "$depfile" } # Factor out some common post-processing of the generated depfile. # Requires the auxiliary global variable '$tmpdepfile' to be set. aix_post_process_depfile () { # If the compiler actually managed to produce a dependency file, # post-process it. if test -f "$tmpdepfile"; then # Each line is of the form 'foo.o: dependency.h'. # Do two passes, one to just change these to # $object: dependency.h # and one to simply output # dependency.h: # which is needed to avoid the deleted-header problem. { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" } > "$depfile" rm -f "$tmpdepfile" else make_dummy_depfile fi } # A tabulation character. tab=' ' # A newline character. nl=' ' # Character ranges might be problematic outside the C locale. # These definitions help. upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ lower=abcdefghijklmnopqrstuvwxyz digits=0123456789 alpha=${upper}${lower} if test -z "$depmode" || test -z "$source" || test -z "$object"; then echo "depcomp: Variables source, object and depmode must be set" 1>&2 exit 1 fi # Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. depfile=${depfile-`echo "$object" | sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} rm -f "$tmpdepfile" # Avoid interferences from the environment. gccflag= dashmflag= # Some modes work just like other modes, but use different flags. We # parameterize here, but still list the modes in the big case below, # to make depend.m4 easier to write. Note that we *cannot* use a case # here, because this file can only contain one case statement. if test "$depmode" = hp; then # HP compiler uses -M and no extra arg. gccflag=-M depmode=gcc fi if test "$depmode" = dashXmstdout; then # This is just like dashmstdout with a different argument. dashmflag=-xM depmode=dashmstdout fi cygpath_u="cygpath -u -f -" if test "$depmode" = msvcmsys; then # This is just like msvisualcpp but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvisualcpp fi if test "$depmode" = msvc7msys; then # This is just like msvc7 but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvc7 fi if test "$depmode" = xlc; then # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. gccflag=-qmakedep=gcc,-MF depmode=gcc fi case "$depmode" in gcc3) ## gcc 3 implements dependency tracking that does exactly what ## we want. Yay! Note: for some reason libtool 1.4 doesn't like ## it if -MD -MP comes after the -MF stuff. Hmm. ## Unfortunately, FreeBSD c89 acceptance of flags depends upon ## the command line argument order; so add the flags where they ## appear in depend2.am. Note that the slowdown incurred here ## affects only configure: in makefiles, %FASTDEP% shortcuts this. for arg do case $arg in -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; *) set fnord "$@" "$arg" ;; esac shift # fnord shift # $arg done "$@" stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi mv "$tmpdepfile" "$depfile" ;; gcc) ## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. ## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. ## (see the conditional assignment to $gccflag above). ## There are various ways to get dependency output from gcc. Here's ## why we pick this rather obscure method: ## - Don't want to use -MD because we'd like the dependencies to end ## up in a subdir. Having to rename by hand is ugly. ## (We might end up doing this anyway to support other compilers.) ## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like ## -MM, not -M (despite what the docs say). Also, it might not be ## supported by the other compilers which use the 'gcc' depmode. ## - Using -M directly means running the compiler twice (even worse ## than renaming). if test -z "$gccflag"; then gccflag=-MD, fi "$@" -Wp,"$gccflag$tmpdepfile" stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" # The second -e expression handles DOS-style file names with drive # letters. sed -e 's/^[^:]*: / /' \ -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" ## This next piece of magic avoids the "deleted header file" problem. ## The problem is that when a header file which appears in a .P file ## is deleted, the dependency causes make to die (because there is ## typically no way to rebuild the header). We avoid this by adding ## dummy dependencies for each header file. Too bad gcc doesn't do ## this for us directly. ## Some versions of gcc put a space before the ':'. On the theory ## that the space means something, we add a space to the output as ## well. hp depmode also adds that space, but also prefixes the VPATH ## to the object. Take care to not repeat it in the output. ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; sgi) if test "$libtool" = yes; then "$@" "-Wp,-MDupdate,$tmpdepfile" else "$@" -MDupdate "$tmpdepfile" fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files echo "$object : \\" > "$depfile" # Clip off the initial element (the dependent). Don't try to be # clever and replace this with sed code, as IRIX sed won't handle # lines with more than a fixed number of characters (4096 in # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; # the IRIX cc adds comments like '#:fec' to the end of the # dependency line. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ | tr "$nl" ' ' >> "$depfile" echo >> "$depfile" # The second pass generates a dummy entry for each header file. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ >> "$depfile" else make_dummy_depfile fi rm -f "$tmpdepfile" ;; xlc) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; aix) # The C for AIX Compiler uses -M and outputs the dependencies # in a .u file. In older versions, this file always lives in the # current directory. Also, the AIX compiler puts '$object:' at the # start of each line; $object doesn't have directory information. # Version 6 uses the directory in both cases. set_dir_from "$object" set_base_from "$object" if test "$libtool" = yes; then tmpdepfile1=$dir$base.u tmpdepfile2=$base.u tmpdepfile3=$dir.libs/$base.u "$@" -Wc,-M else tmpdepfile1=$dir$base.u tmpdepfile2=$dir$base.u tmpdepfile3=$dir$base.u "$@" -M fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" do test -f "$tmpdepfile" && break done aix_post_process_depfile ;; tcc) # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 # FIXME: That version still under development at the moment of writing. # Make that this statement remains true also for stable, released # versions. # It will wrap lines (doesn't matter whether long or short) with a # trailing '\', as in: # # foo.o : \ # foo.c \ # foo.h \ # # It will put a trailing '\' even on the last line, and will use leading # spaces rather than leading tabs (at least since its commit 0394caf7 # "Emit spaces for -MD"). "$@" -MD -MF "$tmpdepfile" stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. # We have to change lines of the first kind to '$object: \'. sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" # And for each line of the second kind, we have to emit a 'dep.h:' # dummy dependency, to avoid the deleted-header problem. sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" rm -f "$tmpdepfile" ;; ## The order of this option in the case statement is important, since the ## shell code in configure will try each of these formats in the order ## listed in this file. A plain '-MD' option would be understood by many ## compilers, so we must ensure this comes after the gcc and icc options. pgcc) # Portland's C compiler understands '-MD'. # Will always output deps to 'file.d' where file is the root name of the # source file under compilation, even if file resides in a subdirectory. # The object file name does not affect the name of the '.d' file. # pgcc 10.2 will output # foo.o: sub/foo.c sub/foo.h # and will wrap long lines using '\' : # foo.o: sub/foo.c ... \ # sub/foo.h ... \ # ... set_dir_from "$object" # Use the source, not the object, to determine the base name, since # that's sadly what pgcc will do too. set_base_from "$source" tmpdepfile=$base.d # For projects that build the same source file twice into different object # files, the pgcc approach of using the *source* file root name can cause # problems in parallel builds. Use a locking strategy to avoid stomping on # the same $tmpdepfile. lockdir=$base.d-lock trap " echo '$0: caught signal, cleaning up...' >&2 rmdir '$lockdir' exit 1 " 1 2 13 15 numtries=100 i=$numtries while test $i -gt 0; do # mkdir is a portable test-and-set. if mkdir "$lockdir" 2>/dev/null; then # This process acquired the lock. "$@" -MD stat=$? # Release the lock. rmdir "$lockdir" break else # If the lock is being held by a different process, wait # until the winning process is done or we timeout. while test -d "$lockdir" && test $i -gt 0; do sleep 1 i=`expr $i - 1` done fi i=`expr $i - 1` done trap - 1 2 13 15 if test $i -le 0; then echo "$0: failed to acquire lock after $numtries attempts" >&2 echo "$0: check lockdir '$lockdir'" >&2 exit 1 fi if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" # Each line is of the form `foo.o: dependent.h', # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. # Do two passes, one to just change these to # `$object: dependent.h' and one to simply `dependent.h:'. sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process this invocation # correctly. Breaking it into two sed invocations is a workaround. sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp2) # The "hp" stanza above does not work with aCC (C++) and HP's ia64 # compilers, which have integrated preprocessors. The correct option # to use with these is +Maked; it writes dependencies to a file named # 'foo.d', which lands next to the object file, wherever that # happens to be. # Much of this is similar to the tru64 case; see comments there. set_dir_from "$object" set_base_from "$object" if test "$libtool" = yes; then tmpdepfile1=$dir$base.d tmpdepfile2=$dir.libs/$base.d "$@" -Wc,+Maked else tmpdepfile1=$dir$base.d tmpdepfile2=$dir$base.d "$@" +Maked fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile1" "$tmpdepfile2" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" # Add 'dependent.h:' lines. sed -ne '2,${ s/^ *// s/ \\*$// s/$/:/ p }' "$tmpdepfile" >> "$depfile" else make_dummy_depfile fi rm -f "$tmpdepfile" "$tmpdepfile2" ;; tru64) # The Tru64 compiler uses -MD to generate dependencies as a side # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put # dependencies in 'foo.d' instead, so we check for that too. # Subdirectories are respected. set_dir_from "$object" set_base_from "$object" if test "$libtool" = yes; then # Libtool generates 2 separate objects for the 2 libraries. These # two compilations output dependencies in $dir.libs/$base.o.d and # in $dir$base.o.d. We have to check for both files, because # one of the two compilations can be disabled. We should prefer # $dir$base.o.d over $dir.libs/$base.o.d because the latter is # automatically cleaned when .libs/ is deleted, while ignoring # the former would cause a distcleancheck panic. tmpdepfile1=$dir$base.o.d # libtool 1.5 tmpdepfile2=$dir.libs/$base.o.d # Likewise. tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 "$@" -Wc,-MD else tmpdepfile1=$dir$base.d tmpdepfile2=$dir$base.d tmpdepfile3=$dir$base.d "$@" -MD fi stat=$? if test $stat -ne 0; then rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" do test -f "$tmpdepfile" && break done # Same post-processing that is required for AIX mode. aix_post_process_depfile ;; msvc7) if test "$libtool" = yes; then showIncludes=-Wc,-showIncludes else showIncludes=-showIncludes fi "$@" $showIncludes > "$tmpdepfile" stat=$? grep -v '^Note: including file: ' "$tmpdepfile" if test $stat -ne 0; then rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" # The first sed program below extracts the file names and escapes # backslashes for cygpath. The second sed program outputs the file # name when reading, but also accumulates all include files in the # hold buffer in order to output them again at the end. This only # works with sed implementations that can handle large buffers. sed < "$tmpdepfile" -n ' /^Note: including file: *\(.*\)/ { s//\1/ s/\\/\\\\/g p }' | $cygpath_u | sort -u | sed -n ' s/ /\\ /g s/\(.*\)/'"$tab"'\1 \\/p s/.\(.*\) \\/\1:/ H $ { s/.*/'"$tab"'/ G p }' >> "$depfile" echo >> "$depfile" # make sure the fragment doesn't end with a backslash rm -f "$tmpdepfile" ;; msvc7msys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; #nosideeffect) # This comment above is used by automake to tell side-effect # dependency tracking mechanisms from slower ones. dashmstdout) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout, regardless of -o. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove '-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done test -z "$dashmflag" && dashmflag=-M # Require at least two characters before searching for ':' # in the target name. This is to cope with DOS-style filenames: # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. "$@" $dashmflag | sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" rm -f "$depfile" cat < "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process this sed invocation # correctly. Breaking it into two sed invocations is a workaround. tr ' ' "$nl" < "$tmpdepfile" \ | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; dashXmstdout) # This case only exists to satisfy depend.m4. It is never actually # run, as this mode is specially recognized in the preamble. exit 1 ;; makedepend) "$@" || exit $? # Remove any Libtool call if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # X makedepend shift cleared=no eat=no for arg do case $cleared in no) set ""; shift cleared=yes ;; esac if test $eat = yes; then eat=no continue fi case "$arg" in -D*|-I*) set fnord "$@" "$arg"; shift ;; # Strip any option that makedepend may not understand. Remove # the object too, otherwise makedepend will parse it as a source file. -arch) eat=yes ;; -*|$object) ;; *) set fnord "$@" "$arg"; shift ;; esac done obj_suffix=`echo "$object" | sed 's/^.*\././'` touch "$tmpdepfile" ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" rm -f "$depfile" # makedepend may prepend the VPATH from the source file name to the object. # No need to regex-escape $object, excess matching of '.' is harmless. sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process the last invocation # correctly. Breaking it into two sed invocations is a workaround. sed '1,2d' "$tmpdepfile" \ | tr ' ' "$nl" \ | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" "$tmpdepfile".bak ;; cpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove '-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done "$@" -E \ | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ | sed '$ s: \\$::' > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" cat < "$tmpdepfile" >> "$depfile" sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; msvisualcpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi IFS=" " for arg do case "$arg" in -o) shift ;; $object) shift ;; "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") set fnord "$@" shift shift ;; *) set fnord "$@" "$arg" shift shift ;; esac done "$@" -E 2>/dev/null | sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" echo "$tab" >> "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" rm -f "$tmpdepfile" ;; msvcmsys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; none) exec "$@" ;; *) echo "Unknown depmode $depmode" 1>&2 exit 1 ;; esac exit 0 # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: starpu-1.4.9+dfsg/build-aux/install-sh000077500000000000000000000357761507764646700177300ustar00rootroot00000000000000#!/bin/sh # install - install a program, script, or datafile scriptversion=2020-11-14.01; # UTC # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the # following copyright and license. # # Copyright (C) 1994 X Consortium # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the name of the X Consortium shall not # be used in advertising or otherwise to promote the sale, use or other deal- # ings in this Software without prior written authorization from the X Consor- # tium. # # # FSF changes to this file are in the public domain. # # Calling this script install-sh is preferred over install.sh, to prevent # 'make' implicit rules from creating a file called install from it # when there is no Makefile. # # This script is compatible with the BSD install script, but was written # from scratch. tab=' ' nl=' ' IFS=" $tab$nl" # Set DOITPROG to "echo" to test this script. doit=${DOITPROG-} doit_exec=${doit:-exec} # Put in absolute file names if you don't have them in your path; # or use environment vars. chgrpprog=${CHGRPPROG-chgrp} chmodprog=${CHMODPROG-chmod} chownprog=${CHOWNPROG-chown} cmpprog=${CMPPROG-cmp} cpprog=${CPPROG-cp} mkdirprog=${MKDIRPROG-mkdir} mvprog=${MVPROG-mv} rmprog=${RMPROG-rm} stripprog=${STRIPPROG-strip} posix_mkdir= # Desired mode of installed file. mode=0755 # Create dirs (including intermediate dirs) using mode 755. # This is like GNU 'install' as of coreutils 8.32 (2020). mkdir_umask=22 backupsuffix= chgrpcmd= chmodcmd=$chmodprog chowncmd= mvcmd=$mvprog rmcmd="$rmprog -f" stripcmd= src= dst= dir_arg= dst_arg= copy_on_change=false is_target_a_directory=possibly usage="\ Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE or: $0 [OPTION]... SRCFILES... DIRECTORY or: $0 [OPTION]... -t DIRECTORY SRCFILES... or: $0 [OPTION]... -d DIRECTORIES... In the 1st form, copy SRCFILE to DSTFILE. In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. In the 4th, create DIRECTORIES. Options: --help display this help and exit. --version display version info and exit. -c (ignored) -C install only if different (preserve data modification time) -d create directories instead of installing files. -g GROUP $chgrpprog installed files to GROUP. -m MODE $chmodprog installed files to MODE. -o USER $chownprog installed files to USER. -p pass -p to $cpprog. -s $stripprog installed files. -S SUFFIX attempt to back up existing files, with suffix SUFFIX. -t DIRECTORY install into DIRECTORY. -T report an error if DSTFILE is a directory. Environment variables override the default commands: CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG By default, rm is invoked with -f; when overridden with RMPROG, it's up to you to specify -f if you want it. If -S is not specified, no backups are attempted. Email bug reports to bug-automake@gnu.org. Automake home page: https://www.gnu.org/software/automake/ " while test $# -ne 0; do case $1 in -c) ;; -C) copy_on_change=true;; -d) dir_arg=true;; -g) chgrpcmd="$chgrpprog $2" shift;; --help) echo "$usage"; exit $?;; -m) mode=$2 case $mode in *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) echo "$0: invalid mode: $mode" >&2 exit 1;; esac shift;; -o) chowncmd="$chownprog $2" shift;; -p) cpprog="$cpprog -p";; -s) stripcmd=$stripprog;; -S) backupsuffix="$2" shift;; -t) is_target_a_directory=always dst_arg=$2 # Protect names problematic for 'test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac shift;; -T) is_target_a_directory=never;; --version) echo "$0 $scriptversion"; exit $?;; --) shift break;; -*) echo "$0: invalid option: $1" >&2 exit 1;; *) break;; esac shift done # We allow the use of options -d and -T together, by making -d # take the precedence; this is for compatibility with GNU install. if test -n "$dir_arg"; then if test -n "$dst_arg"; then echo "$0: target directory not allowed when installing a directory." >&2 exit 1 fi fi if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then # When -d is used, all remaining arguments are directories to create. # When -t is used, the destination is already specified. # Otherwise, the last argument is the destination. Remove it from $@. for arg do if test -n "$dst_arg"; then # $@ is not empty: it contains at least $arg. set fnord "$@" "$dst_arg" shift # fnord fi shift # arg dst_arg=$arg # Protect names problematic for 'test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac done fi if test $# -eq 0; then if test -z "$dir_arg"; then echo "$0: no input file specified." >&2 exit 1 fi # It's OK to call 'install-sh -d' without argument. # This can happen when creating conditional directories. exit 0 fi if test -z "$dir_arg"; then if test $# -gt 1 || test "$is_target_a_directory" = always; then if test ! -d "$dst_arg"; then echo "$0: $dst_arg: Is not a directory." >&2 exit 1 fi fi fi if test -z "$dir_arg"; then do_exit='(exit $ret); exit $ret' trap "ret=129; $do_exit" 1 trap "ret=130; $do_exit" 2 trap "ret=141; $do_exit" 13 trap "ret=143; $do_exit" 15 # Set umask so as not to create temps with too-generous modes. # However, 'strip' requires both read and write access to temps. case $mode in # Optimize common cases. *644) cp_umask=133;; *755) cp_umask=22;; *[0-7]) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw='% 200' fi cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; *) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw=,u+rw fi cp_umask=$mode$u_plus_rw;; esac fi for src do # Protect names problematic for 'test' and other utilities. case $src in -* | [=\(\)!]) src=./$src;; esac if test -n "$dir_arg"; then dst=$src dstdir=$dst test -d "$dstdir" dstdir_status=$? # Don't chown directories that already exist. if test $dstdir_status = 0; then chowncmd="" fi else # Waiting for this to be detected by the "$cpprog $src $dsttmp" command # might cause directories to be created, which would be especially bad # if $src (and thus $dsttmp) contains '*'. if test ! -f "$src" && test ! -d "$src"; then echo "$0: $src does not exist." >&2 exit 1 fi if test -z "$dst_arg"; then echo "$0: no destination specified." >&2 exit 1 fi dst=$dst_arg # If destination is a directory, append the input filename. if test -d "$dst"; then if test "$is_target_a_directory" = never; then echo "$0: $dst_arg: Is a directory" >&2 exit 1 fi dstdir=$dst dstbase=`basename "$src"` case $dst in */) dst=$dst$dstbase;; *) dst=$dst/$dstbase;; esac dstdir_status=0 else dstdir=`dirname "$dst"` test -d "$dstdir" dstdir_status=$? fi fi case $dstdir in */) dstdirslash=$dstdir;; *) dstdirslash=$dstdir/;; esac obsolete_mkdir_used=false if test $dstdir_status != 0; then case $posix_mkdir in '') # With -d, create the new directory with the user-specified mode. # Otherwise, rely on $mkdir_umask. if test -n "$dir_arg"; then mkdir_mode=-m$mode else mkdir_mode= fi posix_mkdir=false # The $RANDOM variable is not portable (e.g., dash). Use it # here however when possible just to lower collision chance. tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ trap ' ret=$? rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null exit $ret ' 0 # Because "mkdir -p" follows existing symlinks and we likely work # directly in world-writeable /tmp, make sure that the '$tmpdir' # directory is successfully created first before we actually test # 'mkdir -p'. if (umask $mkdir_umask && $mkdirprog $mkdir_mode "$tmpdir" && exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 then if test -z "$dir_arg" || { # Check for POSIX incompatibilities with -m. # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or # other-writable bit of parent directory when it shouldn't. # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. test_tmpdir="$tmpdir/a" ls_ld_tmpdir=`ls -ld "$test_tmpdir"` case $ls_ld_tmpdir in d????-?r-*) different_mode=700;; d????-?--*) different_mode=755;; *) false;; esac && $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" } } then posix_mkdir=: fi rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" else # Remove any dirs left behind by ancient mkdir implementations. rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null fi trap '' 0;; esac if $posix_mkdir && ( umask $mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" ) then : else # mkdir does not conform to POSIX, # or it failed possibly due to a race condition. Create the # directory the slow way, step by step, checking for races as we go. case $dstdir in /*) prefix='/';; [-=\(\)!]*) prefix='./';; *) prefix='';; esac oIFS=$IFS IFS=/ set -f set fnord $dstdir shift set +f IFS=$oIFS prefixes= for d do test X"$d" = X && continue prefix=$prefix$d if test -d "$prefix"; then prefixes= else if $posix_mkdir; then (umask $mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break # Don't fail if two instances are running concurrently. test -d "$prefix" || exit 1 else case $prefix in *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; *) qprefix=$prefix;; esac prefixes="$prefixes '$qprefix'" fi fi prefix=$prefix/ done if test -n "$prefixes"; then # Don't fail if two instances are running concurrently. (umask $mkdir_umask && eval "\$doit_exec \$mkdirprog $prefixes") || test -d "$dstdir" || exit 1 obsolete_mkdir_used=true fi fi fi if test -n "$dir_arg"; then { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 else # Make a couple of temp file names in the proper directory. dsttmp=${dstdirslash}_inst.$$_ rmtmp=${dstdirslash}_rm.$$_ # Trap to clean up those temp files at exit. trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 # Copy the file name to the temp name. (umask $cp_umask && { test -z "$stripcmd" || { # Create $dsttmp read-write so that cp doesn't create it read-only, # which would cause strip to fail. if test -z "$doit"; then : >"$dsttmp" # No need to fork-exec 'touch'. else $doit touch "$dsttmp" fi } } && $doit_exec $cpprog "$src" "$dsttmp") && # and set any options; do chmod last to preserve setuid bits. # # If any of these fail, we abort the whole thing. If we want to # ignore errors from any of these, just make sure not to ignore # errors from the above "$doit $cpprog $src $dsttmp" command. # { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && # If -C, don't bother to copy if it wouldn't change the file. if $copy_on_change && old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && set -f && set X $old && old=:$2:$4:$5:$6 && set X $new && new=:$2:$4:$5:$6 && set +f && test "$old" = "$new" && $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 then rm -f "$dsttmp" else # If $backupsuffix is set, and the file being installed # already exists, attempt a backup. Don't worry if it fails, # e.g., if mv doesn't support -f. if test -n "$backupsuffix" && test -f "$dst"; then $doit $mvcmd -f "$dst" "$dst$backupsuffix" 2>/dev/null fi # Rename the file to the real destination. $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || # The rename failed, perhaps because mv can't rename something else # to itself, or perhaps because mv is so ancient that it does not # support -f. { # Now remove or move aside any old file at destination location. # We try this two ways since rm can't unlink itself on some # systems and the destination file might be busy for other # reasons. In this case, the final cleanup might fail but the new # file should still install successfully. { test ! -f "$dst" || $doit $rmcmd "$dst" 2>/dev/null || { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && { $doit $rmcmd "$rmtmp" 2>/dev/null; :; } } || { echo "$0: cannot unlink or rename $dst" >&2 (exit 1); exit 1 } } && # Now rename the file to the real destination. $doit $mvcmd "$dsttmp" "$dst" } fi || exit 1 trap '' 0 fi done # Local variables: # eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: starpu-1.4.9+dfsg/build-aux/ltmain.sh000077500000000000000000012132011507764646700175250ustar00rootroot00000000000000#! /usr/bin/env sh ## DO NOT EDIT - This file generated from ./build-aux/ltmain.in ## by inline-source v2019-02-19.15 # libtool (GNU libtool) 2.4.7 # Provide generalized library-building support services. # Written by Gordon Matzigkeit , 1996 # Copyright (C) 1996-2019, 2021-2022 Free Software Foundation, Inc. # This is free software; see the source for copying conditions. There is NO # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # GNU Libtool is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # As a special exception to the GNU General Public License, # if you distribute this file as part of a program or library that # is built using GNU Libtool, you may include this file under the # same distribution terms that you use for the rest of that program. # # GNU Libtool is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . PROGRAM=libtool PACKAGE=libtool VERSION="2.4.7 Debian-2.4.7-5" package_revision=2.4.7 ## ------ ## ## Usage. ## ## ------ ## # Run './libtool --help' for help with using this script from the # command line. ## ------------------------------- ## ## User overridable command paths. ## ## ------------------------------- ## # After configure completes, it has a better idea of some of the # shell tools we need than the defaults used by the functions shared # with bootstrap, so set those here where they can still be over- # ridden by the user, but otherwise take precedence. : ${AUTOCONF="autoconf"} : ${AUTOMAKE="automake"} ## -------------------------- ## ## Source external libraries. ## ## -------------------------- ## # Much of our low-level functionality needs to be sourced from external # libraries, which are installed to $pkgauxdir. # Set a version string for this script. scriptversion=2019-02-19.15; # UTC # General shell script boiler plate, and helper functions. # Written by Gary V. Vaughan, 2004 # This is free software. There is NO warranty; not even for # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Copyright (C) 2004-2019, 2021 Bootstrap Authors # # This file is dual licensed under the terms of the MIT license # , and GPL version 2 or later # . You must apply one of # these licenses when using or redistributing this software or any of # the files within it. See the URLs above, or the file `LICENSE` # included in the Bootstrap distribution for the full license texts. # Please report bugs or propose patches to: # ## ------ ## ## Usage. ## ## ------ ## # Evaluate this file near the top of your script to gain access to # the functions and variables defined here: # # . `echo "$0" | ${SED-sed} 's|[^/]*$||'`/build-aux/funclib.sh # # If you need to override any of the default environment variable # settings, do that before evaluating this file. ## -------------------- ## ## Shell normalisation. ## ## -------------------- ## # Some shells need a little help to be as Bourne compatible as possible. # Before doing anything else, make sure all that help has been provided! DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi # NLS nuisances: We save the old values in case they are required later. _G_user_locale= _G_safe_locale= for _G_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES do eval "if test set = \"\${$_G_var+set}\"; then save_$_G_var=\$$_G_var $_G_var=C export $_G_var _G_user_locale=\"$_G_var=\\\$save_\$_G_var; \$_G_user_locale\" _G_safe_locale=\"$_G_var=C; \$_G_safe_locale\" fi" done # These NLS vars are set unconditionally (bootstrap issue #24). Unset those # in case the environment reset is needed later and the $save_* variant is not # defined (see the code above). LC_ALL=C LANGUAGE=C export LANGUAGE LC_ALL # Make sure IFS has a sensible default sp=' ' nl=' ' IFS="$sp $nl" # There are apparently some retarded systems that use ';' as a PATH separator! if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # func_unset VAR # -------------- # Portably unset VAR. # In some shells, an 'unset VAR' statement leaves a non-zero return # status if VAR is already unset, which might be problematic if the # statement is used at the end of a function (thus poisoning its return # value) or when 'set -e' is active (causing even a spurious abort of # the script in this case). func_unset () { { eval $1=; (eval unset $1) >/dev/null 2>&1 && eval unset $1 || : ; } } # Make sure CDPATH doesn't cause `cd` commands to output the target dir. func_unset CDPATH # Make sure ${,E,F}GREP behave sanely. func_unset GREP_OPTIONS ## ------------------------- ## ## Locate command utilities. ## ## ------------------------- ## # func_executable_p FILE # ---------------------- # Check that FILE is an executable regular file. func_executable_p () { test -f "$1" && test -x "$1" } # func_path_progs PROGS_LIST CHECK_FUNC [PATH] # -------------------------------------------- # Search for either a program that responds to --version with output # containing "GNU", or else returned by CHECK_FUNC otherwise, by # trying all the directories in PATH with each of the elements of # PROGS_LIST. # # CHECK_FUNC should accept the path to a candidate program, and # set $func_check_prog_result if it truncates its output less than # $_G_path_prog_max characters. func_path_progs () { _G_progs_list=$1 _G_check_func=$2 _G_PATH=${3-"$PATH"} _G_path_prog_max=0 _G_path_prog_found=false _G_save_IFS=$IFS; IFS=${PATH_SEPARATOR-:} for _G_dir in $_G_PATH; do IFS=$_G_save_IFS test -z "$_G_dir" && _G_dir=. for _G_prog_name in $_G_progs_list; do for _exeext in '' .EXE; do _G_path_prog=$_G_dir/$_G_prog_name$_exeext func_executable_p "$_G_path_prog" || continue case `"$_G_path_prog" --version 2>&1` in *GNU*) func_path_progs_result=$_G_path_prog _G_path_prog_found=: ;; *) $_G_check_func $_G_path_prog func_path_progs_result=$func_check_prog_result ;; esac $_G_path_prog_found && break 3 done done done IFS=$_G_save_IFS test -z "$func_path_progs_result" && { echo "no acceptable sed could be found in \$PATH" >&2 exit 1 } } # We want to be able to use the functions in this file before configure # has figured out where the best binaries are kept, which means we have # to search for them ourselves - except when the results are already set # where we skip the searches. # Unless the user overrides by setting SED, search the path for either GNU # sed, or the sed that truncates its output the least. test -z "$SED" && { _G_sed_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ for _G_i in 1 2 3 4 5 6 7; do _G_sed_script=$_G_sed_script$nl$_G_sed_script done echo "$_G_sed_script" 2>/dev/null | sed 99q >conftest.sed _G_sed_script= func_check_prog_sed () { _G_path_prog=$1 _G_count=0 printf 0123456789 >conftest.in while : do cat conftest.in conftest.in >conftest.tmp mv conftest.tmp conftest.in cp conftest.in conftest.nl echo '' >> conftest.nl "$_G_path_prog" -f conftest.sed conftest.out 2>/dev/null || break diff conftest.out conftest.nl >/dev/null 2>&1 || break _G_count=`expr $_G_count + 1` if test "$_G_count" -gt "$_G_path_prog_max"; then # Best one so far, save it but keep looking for a better one func_check_prog_result=$_G_path_prog _G_path_prog_max=$_G_count fi # 10*(2^10) chars as input seems more than enough test 10 -lt "$_G_count" && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out } func_path_progs "sed gsed" func_check_prog_sed "$PATH:/usr/xpg4/bin" rm -f conftest.sed SED=$func_path_progs_result } # Unless the user overrides by setting GREP, search the path for either GNU # grep, or the grep that truncates its output the least. test -z "$GREP" && { func_check_prog_grep () { _G_path_prog=$1 _G_count=0 _G_path_prog_max=0 printf 0123456789 >conftest.in while : do cat conftest.in conftest.in >conftest.tmp mv conftest.tmp conftest.in cp conftest.in conftest.nl echo 'GREP' >> conftest.nl "$_G_path_prog" -e 'GREP$' -e '-(cannot match)-' conftest.out 2>/dev/null || break diff conftest.out conftest.nl >/dev/null 2>&1 || break _G_count=`expr $_G_count + 1` if test "$_G_count" -gt "$_G_path_prog_max"; then # Best one so far, save it but keep looking for a better one func_check_prog_result=$_G_path_prog _G_path_prog_max=$_G_count fi # 10*(2^10) chars as input seems more than enough test 10 -lt "$_G_count" && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out } func_path_progs "grep ggrep" func_check_prog_grep "$PATH:/usr/xpg4/bin" GREP=$func_path_progs_result } ## ------------------------------- ## ## User overridable command paths. ## ## ------------------------------- ## # All uppercase variable names are used for environment variables. These # variables can be overridden by the user before calling a script that # uses them if a suitable command of that name is not already available # in the command search PATH. : ${CP="cp -f"} : ${ECHO="printf %s\n"} : ${EGREP="$GREP -E"} : ${FGREP="$GREP -F"} : ${LN_S="ln -s"} : ${MAKE="make"} : ${MKDIR="mkdir"} : ${MV="mv -f"} : ${RM="rm -f"} : ${SHELL="${CONFIG_SHELL-/bin/sh}"} ## -------------------- ## ## Useful sed snippets. ## ## -------------------- ## sed_dirname='s|/[^/]*$||' sed_basename='s|^.*/||' # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. sed_quote_subst='s|\([`"$\\]\)|\\\1|g' # Same as above, but do not quote variable references. sed_double_quote_subst='s/\(["`\\]\)/\\\1/g' # Sed substitution that turns a string into a regex matching for the # string literally. sed_make_literal_regex='s|[].[^$\\*\/]|\\&|g' # Sed substitution that converts a w32 file name or path # that contains forward slashes, into one that contains # (escaped) backslashes. A very naive implementation. sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' # Re-'\' parameter expansions in output of sed_double_quote_subst that # were '\'-ed in input to the same. If an odd number of '\' preceded a # '$' in input to sed_double_quote_subst, that '$' was protected from # expansion. Since each input '\' is now two '\'s, look for any number # of runs of four '\'s followed by two '\'s and then a '$'. '\' that '$'. _G_bs='\\' _G_bs2='\\\\' _G_bs4='\\\\\\\\' _G_dollar='\$' sed_double_backslash="\ s/$_G_bs4/&\\ /g s/^$_G_bs2$_G_dollar/$_G_bs&/ s/\\([^$_G_bs]\\)$_G_bs2$_G_dollar/\\1$_G_bs2$_G_bs$_G_dollar/g s/\n//g" # require_check_ifs_backslash # --------------------------- # Check if we can use backslash as IFS='\' separator, and set # $check_ifs_backshlash_broken to ':' or 'false'. require_check_ifs_backslash=func_require_check_ifs_backslash func_require_check_ifs_backslash () { _G_save_IFS=$IFS IFS='\' _G_check_ifs_backshlash='a\\b' for _G_i in $_G_check_ifs_backshlash do case $_G_i in a) check_ifs_backshlash_broken=false ;; '') break ;; *) check_ifs_backshlash_broken=: break ;; esac done IFS=$_G_save_IFS require_check_ifs_backslash=: } ## ----------------- ## ## Global variables. ## ## ----------------- ## # Except for the global variables explicitly listed below, the following # functions in the '^func_' namespace, and the '^require_' namespace # variables initialised in the 'Resource management' section, sourcing # this file will not pollute your global namespace with anything # else. There's no portable way to scope variables in Bourne shell # though, so actually running these functions will sometimes place # results into a variable named after the function, and often use # temporary variables in the '^_G_' namespace. If you are careful to # avoid using those namespaces casually in your sourcing script, things # should continue to work as you expect. And, of course, you can freely # overwrite any of the functions or variables defined here before # calling anything to customize them. EXIT_SUCCESS=0 EXIT_FAILURE=1 EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. # Allow overriding, eg assuming that you follow the convention of # putting '$debug_cmd' at the start of all your functions, you can get # bash to show function call trace with: # # debug_cmd='echo "${FUNCNAME[0]} $*" >&2' bash your-script-name debug_cmd=${debug_cmd-":"} exit_cmd=: # By convention, finish your script with: # # exit $exit_status # # so that you can set exit_status to non-zero if you want to indicate # something went wrong during execution without actually bailing out at # the point of failure. exit_status=$EXIT_SUCCESS # Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh # is ksh but when the shell is invoked as "sh" and the current value of # the _XPG environment variable is not equal to 1 (one), the special # positional parameter $0, within a function call, is the name of the # function. progpath=$0 # The name of this program. progname=`$ECHO "$progpath" |$SED "$sed_basename"` # Make sure we have an absolute progpath for reexecution: case $progpath in [\\/]*|[A-Za-z]:\\*) ;; *[\\/]*) progdir=`$ECHO "$progpath" |$SED "$sed_dirname"` progdir=`cd "$progdir" && pwd` progpath=$progdir/$progname ;; *) _G_IFS=$IFS IFS=${PATH_SEPARATOR-:} for progdir in $PATH; do IFS=$_G_IFS test -x "$progdir/$progname" && break done IFS=$_G_IFS test -n "$progdir" || progdir=`pwd` progpath=$progdir/$progname ;; esac ## ----------------- ## ## Standard options. ## ## ----------------- ## # The following options affect the operation of the functions defined # below, and should be set appropriately depending on run-time para- # meters passed on the command line. opt_dry_run=false opt_quiet=false opt_verbose=false # Categories 'all' and 'none' are always available. Append any others # you will pass as the first argument to func_warning from your own # code. warning_categories= # By default, display warnings according to 'opt_warning_types'. Set # 'warning_func' to ':' to elide all warnings, or func_fatal_error to # treat the next displayed warning as a fatal error. warning_func=func_warn_and_continue # Set to 'all' to display all warnings, 'none' to suppress all # warnings, or a space delimited list of some subset of # 'warning_categories' to display only the listed warnings. opt_warning_types=all ## -------------------- ## ## Resource management. ## ## -------------------- ## # This section contains definitions for functions that each ensure a # particular resource (a file, or a non-empty configuration variable for # example) is available, and if appropriate to extract default values # from pertinent package files. Call them using their associated # 'require_*' variable to ensure that they are executed, at most, once. # # It's entirely deliberate that calling these functions can set # variables that don't obey the namespace limitations obeyed by the rest # of this file, in order that that they be as useful as possible to # callers. # require_term_colors # ------------------- # Allow display of bold text on terminals that support it. require_term_colors=func_require_term_colors func_require_term_colors () { $debug_cmd test -t 1 && { # COLORTERM and USE_ANSI_COLORS environment variables take # precedence, because most terminfo databases neglect to describe # whether color sequences are supported. test -n "${COLORTERM+set}" && : ${USE_ANSI_COLORS="1"} if test 1 = "$USE_ANSI_COLORS"; then # Standard ANSI escape sequences tc_reset='' tc_bold=''; tc_standout='' tc_red=''; tc_green='' tc_blue=''; tc_cyan='' else # Otherwise trust the terminfo database after all. test -n "`tput sgr0 2>/dev/null`" && { tc_reset=`tput sgr0` test -n "`tput bold 2>/dev/null`" && tc_bold=`tput bold` tc_standout=$tc_bold test -n "`tput smso 2>/dev/null`" && tc_standout=`tput smso` test -n "`tput setaf 1 2>/dev/null`" && tc_red=`tput setaf 1` test -n "`tput setaf 2 2>/dev/null`" && tc_green=`tput setaf 2` test -n "`tput setaf 4 2>/dev/null`" && tc_blue=`tput setaf 4` test -n "`tput setaf 5 2>/dev/null`" && tc_cyan=`tput setaf 5` } fi } require_term_colors=: } ## ----------------- ## ## Function library. ## ## ----------------- ## # This section contains a variety of useful functions to call in your # scripts. Take note of the portable wrappers for features provided by # some modern shells, which will fall back to slower equivalents on # less featureful shells. # func_append VAR VALUE # --------------------- # Append VALUE onto the existing contents of VAR. # We should try to minimise forks, especially on Windows where they are # unreasonably slow, so skip the feature probes when bash or zsh are # being used: if test set = "${BASH_VERSION+set}${ZSH_VERSION+set}"; then : ${_G_HAVE_ARITH_OP="yes"} : ${_G_HAVE_XSI_OPS="yes"} # The += operator was introduced in bash 3.1 case $BASH_VERSION in [12].* | 3.0 | 3.0*) ;; *) : ${_G_HAVE_PLUSEQ_OP="yes"} ;; esac fi # _G_HAVE_PLUSEQ_OP # Can be empty, in which case the shell is probed, "yes" if += is # useable or anything else if it does not work. test -z "$_G_HAVE_PLUSEQ_OP" \ && (eval 'x=a; x+=" b"; test "a b" = "$x"') 2>/dev/null \ && _G_HAVE_PLUSEQ_OP=yes if test yes = "$_G_HAVE_PLUSEQ_OP" then # This is an XSI compatible shell, allowing a faster implementation... eval 'func_append () { $debug_cmd eval "$1+=\$2" }' else # ...otherwise fall back to using expr, which is often a shell builtin. func_append () { $debug_cmd eval "$1=\$$1\$2" } fi # func_append_quoted VAR VALUE # ---------------------------- # Quote VALUE and append to the end of shell variable VAR, separated # by a space. if test yes = "$_G_HAVE_PLUSEQ_OP"; then eval 'func_append_quoted () { $debug_cmd func_quote_arg pretty "$2" eval "$1+=\\ \$func_quote_arg_result" }' else func_append_quoted () { $debug_cmd func_quote_arg pretty "$2" eval "$1=\$$1\\ \$func_quote_arg_result" } fi # func_append_uniq VAR VALUE # -------------------------- # Append unique VALUE onto the existing contents of VAR, assuming # entries are delimited by the first character of VALUE. For example: # # func_append_uniq options " --another-option option-argument" # # will only append to $options if " --another-option option-argument " # is not already present somewhere in $options already (note spaces at # each end implied by leading space in second argument). func_append_uniq () { $debug_cmd eval _G_current_value='`$ECHO $'$1'`' _G_delim=`expr "$2" : '\(.\)'` case $_G_delim$_G_current_value$_G_delim in *"$2$_G_delim"*) ;; *) func_append "$@" ;; esac } # func_arith TERM... # ------------------ # Set func_arith_result to the result of evaluating TERMs. test -z "$_G_HAVE_ARITH_OP" \ && (eval 'test 2 = $(( 1 + 1 ))') 2>/dev/null \ && _G_HAVE_ARITH_OP=yes if test yes = "$_G_HAVE_ARITH_OP"; then eval 'func_arith () { $debug_cmd func_arith_result=$(( $* )) }' else func_arith () { $debug_cmd func_arith_result=`expr "$@"` } fi # func_basename FILE # ------------------ # Set func_basename_result to FILE with everything up to and including # the last / stripped. if test yes = "$_G_HAVE_XSI_OPS"; then # If this shell supports suffix pattern removal, then use it to avoid # forking. Hide the definitions single quotes in case the shell chokes # on unsupported syntax... _b='func_basename_result=${1##*/}' _d='case $1 in */*) func_dirname_result=${1%/*}$2 ;; * ) func_dirname_result=$3 ;; esac' else # ...otherwise fall back to using sed. _b='func_basename_result=`$ECHO "$1" |$SED "$sed_basename"`' _d='func_dirname_result=`$ECHO "$1" |$SED "$sed_dirname"` if test "X$func_dirname_result" = "X$1"; then func_dirname_result=$3 else func_append func_dirname_result "$2" fi' fi eval 'func_basename () { $debug_cmd '"$_b"' }' # func_dirname FILE APPEND NONDIR_REPLACEMENT # ------------------------------------------- # Compute the dirname of FILE. If nonempty, add APPEND to the result, # otherwise set result to NONDIR_REPLACEMENT. eval 'func_dirname () { $debug_cmd '"$_d"' }' # func_dirname_and_basename FILE APPEND NONDIR_REPLACEMENT # -------------------------------------------------------- # Perform func_basename and func_dirname in a single function # call: # dirname: Compute the dirname of FILE. If nonempty, # add APPEND to the result, otherwise set result # to NONDIR_REPLACEMENT. # value returned in "$func_dirname_result" # basename: Compute filename of FILE. # value retuned in "$func_basename_result" # For efficiency, we do not delegate to the functions above but instead # duplicate the functionality here. eval 'func_dirname_and_basename () { $debug_cmd '"$_b"' '"$_d"' }' # func_echo ARG... # ---------------- # Echo program name prefixed message. func_echo () { $debug_cmd _G_message=$* func_echo_IFS=$IFS IFS=$nl for _G_line in $_G_message; do IFS=$func_echo_IFS $ECHO "$progname: $_G_line" done IFS=$func_echo_IFS } # func_echo_all ARG... # -------------------- # Invoke $ECHO with all args, space-separated. func_echo_all () { $ECHO "$*" } # func_echo_infix_1 INFIX ARG... # ------------------------------ # Echo program name, followed by INFIX on the first line, with any # additional lines not showing INFIX. func_echo_infix_1 () { $debug_cmd $require_term_colors _G_infix=$1; shift _G_indent=$_G_infix _G_prefix="$progname: $_G_infix: " _G_message=$* # Strip color escape sequences before counting printable length for _G_tc in "$tc_reset" "$tc_bold" "$tc_standout" "$tc_red" "$tc_green" "$tc_blue" "$tc_cyan" do test -n "$_G_tc" && { _G_esc_tc=`$ECHO "$_G_tc" | $SED "$sed_make_literal_regex"` _G_indent=`$ECHO "$_G_indent" | $SED "s|$_G_esc_tc||g"` } done _G_indent="$progname: "`echo "$_G_indent" | $SED 's|.| |g'`" " ## exclude from sc_prohibit_nested_quotes func_echo_infix_1_IFS=$IFS IFS=$nl for _G_line in $_G_message; do IFS=$func_echo_infix_1_IFS $ECHO "$_G_prefix$tc_bold$_G_line$tc_reset" >&2 _G_prefix=$_G_indent done IFS=$func_echo_infix_1_IFS } # func_error ARG... # ----------------- # Echo program name prefixed message to standard error. func_error () { $debug_cmd $require_term_colors func_echo_infix_1 " $tc_standout${tc_red}error$tc_reset" "$*" >&2 } # func_fatal_error ARG... # ----------------------- # Echo program name prefixed message to standard error, and exit. func_fatal_error () { $debug_cmd func_error "$*" exit $EXIT_FAILURE } # func_grep EXPRESSION FILENAME # ----------------------------- # Check whether EXPRESSION matches any line of FILENAME, without output. func_grep () { $debug_cmd $GREP "$1" "$2" >/dev/null 2>&1 } # func_len STRING # --------------- # Set func_len_result to the length of STRING. STRING may not # start with a hyphen. test -z "$_G_HAVE_XSI_OPS" \ && (eval 'x=a/b/c; test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ && _G_HAVE_XSI_OPS=yes if test yes = "$_G_HAVE_XSI_OPS"; then eval 'func_len () { $debug_cmd func_len_result=${#1} }' else func_len () { $debug_cmd func_len_result=`expr "$1" : ".*" 2>/dev/null || echo $max_cmd_len` } fi # func_mkdir_p DIRECTORY-PATH # --------------------------- # Make sure the entire path to DIRECTORY-PATH is available. func_mkdir_p () { $debug_cmd _G_directory_path=$1 _G_dir_list= if test -n "$_G_directory_path" && test : != "$opt_dry_run"; then # Protect directory names starting with '-' case $_G_directory_path in -*) _G_directory_path=./$_G_directory_path ;; esac # While some portion of DIR does not yet exist... while test ! -d "$_G_directory_path"; do # ...make a list in topmost first order. Use a colon delimited # list incase some portion of path contains whitespace. _G_dir_list=$_G_directory_path:$_G_dir_list # If the last portion added has no slash in it, the list is done case $_G_directory_path in */*) ;; *) break ;; esac # ...otherwise throw away the child directory and loop _G_directory_path=`$ECHO "$_G_directory_path" | $SED -e "$sed_dirname"` done _G_dir_list=`$ECHO "$_G_dir_list" | $SED 's|:*$||'` func_mkdir_p_IFS=$IFS; IFS=: for _G_dir in $_G_dir_list; do IFS=$func_mkdir_p_IFS # mkdir can fail with a 'File exist' error if two processes # try to create one of the directories concurrently. Don't # stop in that case! $MKDIR "$_G_dir" 2>/dev/null || : done IFS=$func_mkdir_p_IFS # Bail out if we (or some other process) failed to create a directory. test -d "$_G_directory_path" || \ func_fatal_error "Failed to create '$1'" fi } # func_mktempdir [BASENAME] # ------------------------- # Make a temporary directory that won't clash with other running # libtool processes, and avoids race conditions if possible. If # given, BASENAME is the basename for that directory. func_mktempdir () { $debug_cmd _G_template=${TMPDIR-/tmp}/${1-$progname} if test : = "$opt_dry_run"; then # Return a directory name, but don't create it in dry-run mode _G_tmpdir=$_G_template-$$ else # If mktemp works, use that first and foremost _G_tmpdir=`mktemp -d "$_G_template-XXXXXXXX" 2>/dev/null` if test ! -d "$_G_tmpdir"; then # Failing that, at least try and use $RANDOM to avoid a race _G_tmpdir=$_G_template-${RANDOM-0}$$ func_mktempdir_umask=`umask` umask 0077 $MKDIR "$_G_tmpdir" umask $func_mktempdir_umask fi # If we're not in dry-run mode, bomb out on failure test -d "$_G_tmpdir" || \ func_fatal_error "cannot create temporary directory '$_G_tmpdir'" fi $ECHO "$_G_tmpdir" } # func_normal_abspath PATH # ------------------------ # Remove doubled-up and trailing slashes, "." path components, # and cancel out any ".." path components in PATH after making # it an absolute path. func_normal_abspath () { $debug_cmd # These SED scripts presuppose an absolute path with a trailing slash. _G_pathcar='s|^/\([^/]*\).*$|\1|' _G_pathcdr='s|^/[^/]*||' _G_removedotparts=':dotsl s|/\./|/|g t dotsl s|/\.$|/|' _G_collapseslashes='s|/\{1,\}|/|g' _G_finalslash='s|/*$|/|' # Start from root dir and reassemble the path. func_normal_abspath_result= func_normal_abspath_tpath=$1 func_normal_abspath_altnamespace= case $func_normal_abspath_tpath in "") # Empty path, that just means $cwd. func_stripname '' '/' "`pwd`" func_normal_abspath_result=$func_stripname_result return ;; # The next three entries are used to spot a run of precisely # two leading slashes without using negated character classes; # we take advantage of case's first-match behaviour. ///*) # Unusual form of absolute path, do nothing. ;; //*) # Not necessarily an ordinary path; POSIX reserves leading '//' # and for example Cygwin uses it to access remote file shares # over CIFS/SMB, so we conserve a leading double slash if found. func_normal_abspath_altnamespace=/ ;; /*) # Absolute path, do nothing. ;; *) # Relative path, prepend $cwd. func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath ;; esac # Cancel out all the simple stuff to save iterations. We also want # the path to end with a slash for ease of parsing, so make sure # there is one (and only one) here. func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ -e "$_G_removedotparts" -e "$_G_collapseslashes" -e "$_G_finalslash"` while :; do # Processed it all yet? if test / = "$func_normal_abspath_tpath"; then # If we ascended to the root using ".." the result may be empty now. if test -z "$func_normal_abspath_result"; then func_normal_abspath_result=/ fi break fi func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ -e "$_G_pathcar"` func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ -e "$_G_pathcdr"` # Figure out what to do with it case $func_normal_abspath_tcomponent in "") # Trailing empty path component, ignore it. ;; ..) # Parent dir; strip last assembled component from result. func_dirname "$func_normal_abspath_result" func_normal_abspath_result=$func_dirname_result ;; *) # Actual path component, append it. func_append func_normal_abspath_result "/$func_normal_abspath_tcomponent" ;; esac done # Restore leading double-slash if one was found on entry. func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result } # func_notquiet ARG... # -------------------- # Echo program name prefixed message only when not in quiet mode. func_notquiet () { $debug_cmd $opt_quiet || func_echo ${1+"$@"} # A bug in bash halts the script if the last line of a function # fails when set -e is in force, so we need another command to # work around that: : } # func_relative_path SRCDIR DSTDIR # -------------------------------- # Set func_relative_path_result to the relative path from SRCDIR to DSTDIR. func_relative_path () { $debug_cmd func_relative_path_result= func_normal_abspath "$1" func_relative_path_tlibdir=$func_normal_abspath_result func_normal_abspath "$2" func_relative_path_tbindir=$func_normal_abspath_result # Ascend the tree starting from libdir while :; do # check if we have found a prefix of bindir case $func_relative_path_tbindir in $func_relative_path_tlibdir) # found an exact match func_relative_path_tcancelled= break ;; $func_relative_path_tlibdir*) # found a matching prefix func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" func_relative_path_tcancelled=$func_stripname_result if test -z "$func_relative_path_result"; then func_relative_path_result=. fi break ;; *) func_dirname $func_relative_path_tlibdir func_relative_path_tlibdir=$func_dirname_result if test -z "$func_relative_path_tlibdir"; then # Have to descend all the way to the root! func_relative_path_result=../$func_relative_path_result func_relative_path_tcancelled=$func_relative_path_tbindir break fi func_relative_path_result=../$func_relative_path_result ;; esac done # Now calculate path; take care to avoid doubling-up slashes. func_stripname '' '/' "$func_relative_path_result" func_relative_path_result=$func_stripname_result func_stripname '/' '/' "$func_relative_path_tcancelled" if test -n "$func_stripname_result"; then func_append func_relative_path_result "/$func_stripname_result" fi # Normalisation. If bindir is libdir, return '.' else relative path. if test -n "$func_relative_path_result"; then func_stripname './' '' "$func_relative_path_result" func_relative_path_result=$func_stripname_result fi test -n "$func_relative_path_result" || func_relative_path_result=. : } # func_quote_portable EVAL ARG # ---------------------------- # Internal function to portably implement func_quote_arg. Note that we still # keep attention to performance here so we as much as possible try to avoid # calling sed binary (so far O(N) complexity as long as func_append is O(1)). func_quote_portable () { $debug_cmd $require_check_ifs_backslash func_quote_portable_result=$2 # one-time-loop (easy break) while true do if $1; then func_quote_portable_result=`$ECHO "$2" | $SED \ -e "$sed_double_quote_subst" -e "$sed_double_backslash"` break fi # Quote for eval. case $func_quote_portable_result in *[\\\`\"\$]*) # Fallback to sed for $func_check_bs_ifs_broken=:, or when the string # contains the shell wildcard characters. case $check_ifs_backshlash_broken$func_quote_portable_result in :*|*[\[\*\?]*) func_quote_portable_result=`$ECHO "$func_quote_portable_result" \ | $SED "$sed_quote_subst"` break ;; esac func_quote_portable_old_IFS=$IFS for _G_char in '\' '`' '"' '$' do # STATE($1) PREV($2) SEPARATOR($3) set start "" "" func_quote_portable_result=dummy"$_G_char$func_quote_portable_result$_G_char"dummy IFS=$_G_char for _G_part in $func_quote_portable_result do case $1 in quote) func_append func_quote_portable_result "$3$2" set quote "$_G_part" "\\$_G_char" ;; start) set first "" "" func_quote_portable_result= ;; first) set quote "$_G_part" "" ;; esac done done IFS=$func_quote_portable_old_IFS ;; *) ;; esac break done func_quote_portable_unquoted_result=$func_quote_portable_result case $func_quote_portable_result in # double-quote args containing shell metacharacters to delay # word splitting, command substitution and variable expansion # for a subsequent eval. # many bourne shells cannot handle close brackets correctly # in scan sets, so we specify it separately. *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") func_quote_portable_result=\"$func_quote_portable_result\" ;; esac } # func_quotefast_eval ARG # ----------------------- # Quote one ARG (internal). This is equivalent to 'func_quote_arg eval ARG', # but optimized for speed. Result is stored in $func_quotefast_eval. if test xyes = `(x=; printf -v x %q yes; echo x"$x") 2>/dev/null`; then printf -v _GL_test_printf_tilde %q '~' if test '\~' = "$_GL_test_printf_tilde"; then func_quotefast_eval () { printf -v func_quotefast_eval_result %q "$1" } else # Broken older Bash implementations. Make those faster too if possible. func_quotefast_eval () { case $1 in '~'*) func_quote_portable false "$1" func_quotefast_eval_result=$func_quote_portable_result ;; *) printf -v func_quotefast_eval_result %q "$1" ;; esac } fi else func_quotefast_eval () { func_quote_portable false "$1" func_quotefast_eval_result=$func_quote_portable_result } fi # func_quote_arg MODEs ARG # ------------------------ # Quote one ARG to be evaled later. MODEs argument may contain zero or more # specifiers listed below separated by ',' character. This function returns two # values: # i) func_quote_arg_result # double-quoted (when needed), suitable for a subsequent eval # ii) func_quote_arg_unquoted_result # has all characters that are still active within double # quotes backslashified. Available only if 'unquoted' is specified. # # Available modes: # ---------------- # 'eval' (default) # - escape shell special characters # 'expand' # - the same as 'eval'; but do not quote variable references # 'pretty' # - request aesthetic output, i.e. '"a b"' instead of 'a\ b'. This might # be used later in func_quote to get output like: 'echo "a b"' instead # of 'echo a\ b'. This is slower than default on some shells. # 'unquoted' # - produce also $func_quote_arg_unquoted_result which does not contain # wrapping double-quotes. # # Examples for 'func_quote_arg pretty,unquoted string': # # string | *_result | *_unquoted_result # ------------+-----------------------+------------------- # " | \" | \" # a b | "a b" | a b # "a b" | "\"a b\"" | \"a b\" # * | "*" | * # z="${x-$y}" | "z=\"\${x-\$y}\"" | z=\"\${x-\$y}\" # # Examples for 'func_quote_arg pretty,unquoted,expand string': # # string | *_result | *_unquoted_result # --------------+---------------------+-------------------- # z="${x-$y}" | "z=\"${x-$y}\"" | z=\"${x-$y}\" func_quote_arg () { _G_quote_expand=false case ,$1, in *,expand,*) _G_quote_expand=: ;; esac case ,$1, in *,pretty,*|*,expand,*|*,unquoted,*) func_quote_portable $_G_quote_expand "$2" func_quote_arg_result=$func_quote_portable_result func_quote_arg_unquoted_result=$func_quote_portable_unquoted_result ;; *) # Faster quote-for-eval for some shells. func_quotefast_eval "$2" func_quote_arg_result=$func_quotefast_eval_result ;; esac } # func_quote MODEs ARGs... # ------------------------ # Quote all ARGs to be evaled later and join them into single command. See # func_quote_arg's description for more info. func_quote () { $debug_cmd _G_func_quote_mode=$1 ; shift func_quote_result= while test 0 -lt $#; do func_quote_arg "$_G_func_quote_mode" "$1" if test -n "$func_quote_result"; then func_append func_quote_result " $func_quote_arg_result" else func_append func_quote_result "$func_quote_arg_result" fi shift done } # func_stripname PREFIX SUFFIX NAME # --------------------------------- # strip PREFIX and SUFFIX from NAME, and store in func_stripname_result. # PREFIX and SUFFIX must not contain globbing or regex special # characters, hashes, percent signs, but SUFFIX may contain a leading # dot (in which case that matches only a dot). if test yes = "$_G_HAVE_XSI_OPS"; then eval 'func_stripname () { $debug_cmd # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are # positional parameters, so assign one to ordinary variable first. func_stripname_result=$3 func_stripname_result=${func_stripname_result#"$1"} func_stripname_result=${func_stripname_result%"$2"} }' else func_stripname () { $debug_cmd case $2 in .*) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%\\\\$2\$%%"`;; *) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%$2\$%%"`;; esac } fi # func_show_eval CMD [FAIL_EXP] # ----------------------------- # Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is # not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP # is given, then evaluate it. func_show_eval () { $debug_cmd _G_cmd=$1 _G_fail_exp=${2-':'} func_quote_arg pretty,expand "$_G_cmd" eval "func_notquiet $func_quote_arg_result" $opt_dry_run || { eval "$_G_cmd" _G_status=$? if test 0 -ne "$_G_status"; then eval "(exit $_G_status); $_G_fail_exp" fi } } # func_show_eval_locale CMD [FAIL_EXP] # ------------------------------------ # Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is # not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP # is given, then evaluate it. Use the saved locale for evaluation. func_show_eval_locale () { $debug_cmd _G_cmd=$1 _G_fail_exp=${2-':'} $opt_quiet || { func_quote_arg expand,pretty "$_G_cmd" eval "func_echo $func_quote_arg_result" } $opt_dry_run || { eval "$_G_user_locale $_G_cmd" _G_status=$? eval "$_G_safe_locale" if test 0 -ne "$_G_status"; then eval "(exit $_G_status); $_G_fail_exp" fi } } # func_tr_sh # ---------- # Turn $1 into a string suitable for a shell variable name. # Result is stored in $func_tr_sh_result. All characters # not in the set a-zA-Z0-9_ are replaced with '_'. Further, # if $1 begins with a digit, a '_' is prepended as well. func_tr_sh () { $debug_cmd case $1 in [0-9]* | *[!a-zA-Z0-9_]*) func_tr_sh_result=`$ECHO "$1" | $SED -e 's/^\([0-9]\)/_\1/' -e 's/[^a-zA-Z0-9_]/_/g'` ;; * ) func_tr_sh_result=$1 ;; esac } # func_verbose ARG... # ------------------- # Echo program name prefixed message in verbose mode only. func_verbose () { $debug_cmd $opt_verbose && func_echo "$*" : } # func_warn_and_continue ARG... # ----------------------------- # Echo program name prefixed warning message to standard error. func_warn_and_continue () { $debug_cmd $require_term_colors func_echo_infix_1 "${tc_red}warning$tc_reset" "$*" >&2 } # func_warning CATEGORY ARG... # ---------------------------- # Echo program name prefixed warning message to standard error. Warning # messages can be filtered according to CATEGORY, where this function # elides messages where CATEGORY is not listed in the global variable # 'opt_warning_types'. func_warning () { $debug_cmd # CATEGORY must be in the warning_categories list! case " $warning_categories " in *" $1 "*) ;; *) func_internal_error "invalid warning category '$1'" ;; esac _G_category=$1 shift case " $opt_warning_types " in *" $_G_category "*) $warning_func ${1+"$@"} ;; esac } # func_sort_ver VER1 VER2 # ----------------------- # 'sort -V' is not generally available. # Note this deviates from the version comparison in automake # in that it treats 1.5 < 1.5.0, and treats 1.4.4a < 1.4-p3a # but this should suffice as we won't be specifying old # version formats or redundant trailing .0 in bootstrap.conf. # If we did want full compatibility then we should probably # use m4_version_compare from autoconf. func_sort_ver () { $debug_cmd printf '%s\n%s\n' "$1" "$2" \ | sort -t. -k 1,1n -k 2,2n -k 3,3n -k 4,4n -k 5,5n -k 6,6n -k 7,7n -k 8,8n -k 9,9n } # func_lt_ver PREV CURR # --------------------- # Return true if PREV and CURR are in the correct order according to # func_sort_ver, otherwise false. Use it like this: # # func_lt_ver "$prev_ver" "$proposed_ver" || func_fatal_error "..." func_lt_ver () { $debug_cmd test "x$1" = x`func_sort_ver "$1" "$2" | $SED 1q` } # Local variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-pattern: "10/scriptversion=%:y-%02m-%02d.%02H; # UTC" # time-stamp-time-zone: "UTC" # End: #! /bin/sh # A portable, pluggable option parser for Bourne shell. # Written by Gary V. Vaughan, 2010 # This is free software. There is NO warranty; not even for # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Copyright (C) 2010-2019, 2021 Bootstrap Authors # # This file is dual licensed under the terms of the MIT license # , and GPL version 2 or later # . You must apply one of # these licenses when using or redistributing this software or any of # the files within it. See the URLs above, or the file `LICENSE` # included in the Bootstrap distribution for the full license texts. # Please report bugs or propose patches to: # # Set a version string for this script. scriptversion=2019-02-19.15; # UTC ## ------ ## ## Usage. ## ## ------ ## # This file is a library for parsing options in your shell scripts along # with assorted other useful supporting features that you can make use # of too. # # For the simplest scripts you might need only: # # #!/bin/sh # . relative/path/to/funclib.sh # . relative/path/to/options-parser # scriptversion=1.0 # func_options ${1+"$@"} # eval set dummy "$func_options_result"; shift # ...rest of your script... # # In order for the '--version' option to work, you will need to have a # suitably formatted comment like the one at the top of this file # starting with '# Written by ' and ending with '# Copyright'. # # For '-h' and '--help' to work, you will also need a one line # description of your script's purpose in a comment directly above the # '# Written by ' line, like the one at the top of this file. # # The default options also support '--debug', which will turn on shell # execution tracing (see the comment above debug_cmd below for another # use), and '--verbose' and the func_verbose function to allow your script # to display verbose messages only when your user has specified # '--verbose'. # # After sourcing this file, you can plug in processing for additional # options by amending the variables from the 'Configuration' section # below, and following the instructions in the 'Option parsing' # section further down. ## -------------- ## ## Configuration. ## ## -------------- ## # You should override these variables in your script after sourcing this # file so that they reflect the customisations you have added to the # option parser. # The usage line for option parsing errors and the start of '-h' and # '--help' output messages. You can embed shell variables for delayed # expansion at the time the message is displayed, but you will need to # quote other shell meta-characters carefully to prevent them being # expanded when the contents are evaled. usage='$progpath [OPTION]...' # Short help message in response to '-h' and '--help'. Add to this or # override it after sourcing this library to reflect the full set of # options your script accepts. usage_message="\ --debug enable verbose shell tracing -W, --warnings=CATEGORY report the warnings falling in CATEGORY [all] -v, --verbose verbosely report processing --version print version information and exit -h, --help print short or long help message and exit " # Additional text appended to 'usage_message' in response to '--help'. long_help_message=" Warning categories include: 'all' show all warnings 'none' turn off all the warnings 'error' warnings are treated as fatal errors" # Help message printed before fatal option parsing errors. fatal_help="Try '\$progname --help' for more information." ## ------------------------- ## ## Hook function management. ## ## ------------------------- ## # This section contains functions for adding, removing, and running hooks # in the main code. A hook is just a list of function names that can be # run in order later on. # func_hookable FUNC_NAME # ----------------------- # Declare that FUNC_NAME will run hooks added with # 'func_add_hook FUNC_NAME ...'. func_hookable () { $debug_cmd func_append hookable_fns " $1" } # func_add_hook FUNC_NAME HOOK_FUNC # --------------------------------- # Request that FUNC_NAME call HOOK_FUNC before it returns. FUNC_NAME must # first have been declared "hookable" by a call to 'func_hookable'. func_add_hook () { $debug_cmd case " $hookable_fns " in *" $1 "*) ;; *) func_fatal_error "'$1' does not accept hook functions." ;; esac eval func_append ${1}_hooks '" $2"' } # func_remove_hook FUNC_NAME HOOK_FUNC # ------------------------------------ # Remove HOOK_FUNC from the list of hook functions to be called by # FUNC_NAME. func_remove_hook () { $debug_cmd eval ${1}_hooks='`$ECHO "\$'$1'_hooks" |$SED "s| '$2'||"`' } # func_propagate_result FUNC_NAME_A FUNC_NAME_B # --------------------------------------------- # If the *_result variable of FUNC_NAME_A _is set_, assign its value to # *_result variable of FUNC_NAME_B. func_propagate_result () { $debug_cmd func_propagate_result_result=: if eval "test \"\${${1}_result+set}\" = set" then eval "${2}_result=\$${1}_result" else func_propagate_result_result=false fi } # func_run_hooks FUNC_NAME [ARG]... # --------------------------------- # Run all hook functions registered to FUNC_NAME. # It's assumed that the list of hook functions contains nothing more # than a whitespace-delimited list of legal shell function names, and # no effort is wasted trying to catch shell meta-characters or preserve # whitespace. func_run_hooks () { $debug_cmd _G_rc_run_hooks=false case " $hookable_fns " in *" $1 "*) ;; *) func_fatal_error "'$1' does not support hook functions." ;; esac eval _G_hook_fns=\$$1_hooks; shift for _G_hook in $_G_hook_fns; do func_unset "${_G_hook}_result" eval $_G_hook '${1+"$@"}' func_propagate_result $_G_hook func_run_hooks if $func_propagate_result_result; then eval set dummy "$func_run_hooks_result"; shift fi done } ## --------------- ## ## Option parsing. ## ## --------------- ## # In order to add your own option parsing hooks, you must accept the # full positional parameter list from your hook function. You may remove # or edit any options that you action, and then pass back the remaining # unprocessed options in '_result', escaped # suitably for 'eval'. # # The '_result' variable is automatically unset # before your hook gets called; for best performance, only set the # *_result variable when necessary (i.e. don't call the 'func_quote' # function unnecessarily because it can be an expensive operation on some # machines). # # Like this: # # my_options_prep () # { # $debug_cmd # # # Extend the existing usage message. # usage_message=$usage_message' # -s, --silent don'\''t print informational messages # ' # # No change in '$@' (ignored completely by this hook). Leave # # my_options_prep_result variable intact. # } # func_add_hook func_options_prep my_options_prep # # # my_silent_option () # { # $debug_cmd # # args_changed=false # # # Note that, for efficiency, we parse as many options as we can # # recognise in a loop before passing the remainder back to the # # caller on the first unrecognised argument we encounter. # while test $# -gt 0; do # opt=$1; shift # case $opt in # --silent|-s) opt_silent=: # args_changed=: # ;; # # Separate non-argument short options: # -s*) func_split_short_opt "$_G_opt" # set dummy "$func_split_short_opt_name" \ # "-$func_split_short_opt_arg" ${1+"$@"} # shift # args_changed=: # ;; # *) # Make sure the first unrecognised option "$_G_opt" # # is added back to "$@" in case we need it later, # # if $args_changed was set to 'true'. # set dummy "$_G_opt" ${1+"$@"}; shift; break ;; # esac # done # # # Only call 'func_quote' here if we processed at least one argument. # if $args_changed; then # func_quote eval ${1+"$@"} # my_silent_option_result=$func_quote_result # fi # } # func_add_hook func_parse_options my_silent_option # # # my_option_validation () # { # $debug_cmd # # $opt_silent && $opt_verbose && func_fatal_help "\ # '--silent' and '--verbose' options are mutually exclusive." # } # func_add_hook func_validate_options my_option_validation # # You'll also need to manually amend $usage_message to reflect the extra # options you parse. It's preferable to append if you can, so that # multiple option parsing hooks can be added safely. # func_options_finish [ARG]... # ---------------------------- # Finishing the option parse loop (call 'func_options' hooks ATM). func_options_finish () { $debug_cmd func_run_hooks func_options ${1+"$@"} func_propagate_result func_run_hooks func_options_finish } # func_options [ARG]... # --------------------- # All the functions called inside func_options are hookable. See the # individual implementations for details. func_hookable func_options func_options () { $debug_cmd _G_options_quoted=false for my_func in options_prep parse_options validate_options options_finish do func_unset func_${my_func}_result func_unset func_run_hooks_result eval func_$my_func '${1+"$@"}' func_propagate_result func_$my_func func_options if $func_propagate_result_result; then eval set dummy "$func_options_result"; shift _G_options_quoted=: fi done $_G_options_quoted || { # As we (func_options) are top-level options-parser function and # nobody quoted "$@" for us yet, we need to do it explicitly for # caller. func_quote eval ${1+"$@"} func_options_result=$func_quote_result } } # func_options_prep [ARG]... # -------------------------- # All initialisations required before starting the option parse loop. # Note that when calling hook functions, we pass through the list of # positional parameters. If a hook function modifies that list, and # needs to propagate that back to rest of this script, then the complete # modified list must be put in 'func_run_hooks_result' before returning. func_hookable func_options_prep func_options_prep () { $debug_cmd # Option defaults: opt_verbose=false opt_warning_types= func_run_hooks func_options_prep ${1+"$@"} func_propagate_result func_run_hooks func_options_prep } # func_parse_options [ARG]... # --------------------------- # The main option parsing loop. func_hookable func_parse_options func_parse_options () { $debug_cmd _G_parse_options_requote=false # this just eases exit handling while test $# -gt 0; do # Defer to hook functions for initial option parsing, so they # get priority in the event of reusing an option name. func_run_hooks func_parse_options ${1+"$@"} func_propagate_result func_run_hooks func_parse_options if $func_propagate_result_result; then eval set dummy "$func_parse_options_result"; shift # Even though we may have changed "$@", we passed the "$@" array # down into the hook and it quoted it for us (because we are in # this if-branch). No need to quote it again. _G_parse_options_requote=false fi # Break out of the loop if we already parsed every option. test $# -gt 0 || break # We expect that one of the options parsed in this function matches # and thus we remove _G_opt from "$@" and need to re-quote. _G_match_parse_options=: _G_opt=$1 shift case $_G_opt in --debug|-x) debug_cmd='set -x' func_echo "enabling shell trace mode" >&2 $debug_cmd ;; --no-warnings|--no-warning|--no-warn) set dummy --warnings none ${1+"$@"} shift ;; --warnings|--warning|-W) if test $# = 0 && func_missing_arg $_G_opt; then _G_parse_options_requote=: break fi case " $warning_categories $1" in *" $1 "*) # trailing space prevents matching last $1 above func_append_uniq opt_warning_types " $1" ;; *all) opt_warning_types=$warning_categories ;; *none) opt_warning_types=none warning_func=: ;; *error) opt_warning_types=$warning_categories warning_func=func_fatal_error ;; *) func_fatal_error \ "unsupported warning category: '$1'" ;; esac shift ;; --verbose|-v) opt_verbose=: ;; --version) func_version ;; -\?|-h) func_usage ;; --help) func_help ;; # Separate optargs to long options (plugins may need this): --*=*) func_split_equals "$_G_opt" set dummy "$func_split_equals_lhs" \ "$func_split_equals_rhs" ${1+"$@"} shift ;; # Separate optargs to short options: -W*) func_split_short_opt "$_G_opt" set dummy "$func_split_short_opt_name" \ "$func_split_short_opt_arg" ${1+"$@"} shift ;; # Separate non-argument short options: -\?*|-h*|-v*|-x*) func_split_short_opt "$_G_opt" set dummy "$func_split_short_opt_name" \ "-$func_split_short_opt_arg" ${1+"$@"} shift ;; --) _G_parse_options_requote=: ; break ;; -*) func_fatal_help "unrecognised option: '$_G_opt'" ;; *) set dummy "$_G_opt" ${1+"$@"}; shift _G_match_parse_options=false break ;; esac if $_G_match_parse_options; then _G_parse_options_requote=: fi done if $_G_parse_options_requote; then # save modified positional parameters for caller func_quote eval ${1+"$@"} func_parse_options_result=$func_quote_result fi } # func_validate_options [ARG]... # ------------------------------ # Perform any sanity checks on option settings and/or unconsumed # arguments. func_hookable func_validate_options func_validate_options () { $debug_cmd # Display all warnings if -W was not given. test -n "$opt_warning_types" || opt_warning_types=" $warning_categories" func_run_hooks func_validate_options ${1+"$@"} func_propagate_result func_run_hooks func_validate_options # Bail if the options were screwed! $exit_cmd $EXIT_FAILURE } ## ----------------- ## ## Helper functions. ## ## ----------------- ## # This section contains the helper functions used by the rest of the # hookable option parser framework in ascii-betical order. # func_fatal_help ARG... # ---------------------- # Echo program name prefixed message to standard error, followed by # a help hint, and exit. func_fatal_help () { $debug_cmd eval \$ECHO \""Usage: $usage"\" eval \$ECHO \""$fatal_help"\" func_error ${1+"$@"} exit $EXIT_FAILURE } # func_help # --------- # Echo long help message to standard output and exit. func_help () { $debug_cmd func_usage_message $ECHO "$long_help_message" exit 0 } # func_missing_arg ARGNAME # ------------------------ # Echo program name prefixed message to standard error and set global # exit_cmd. func_missing_arg () { $debug_cmd func_error "Missing argument for '$1'." exit_cmd=exit } # func_split_equals STRING # ------------------------ # Set func_split_equals_lhs and func_split_equals_rhs shell variables # after splitting STRING at the '=' sign. test -z "$_G_HAVE_XSI_OPS" \ && (eval 'x=a/b/c; test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ && _G_HAVE_XSI_OPS=yes if test yes = "$_G_HAVE_XSI_OPS" then # This is an XSI compatible shell, allowing a faster implementation... eval 'func_split_equals () { $debug_cmd func_split_equals_lhs=${1%%=*} func_split_equals_rhs=${1#*=} if test "x$func_split_equals_lhs" = "x$1"; then func_split_equals_rhs= fi }' else # ...otherwise fall back to using expr, which is often a shell builtin. func_split_equals () { $debug_cmd func_split_equals_lhs=`expr "x$1" : 'x\([^=]*\)'` func_split_equals_rhs= test "x$func_split_equals_lhs=" = "x$1" \ || func_split_equals_rhs=`expr "x$1" : 'x[^=]*=\(.*\)$'` } fi #func_split_equals # func_split_short_opt SHORTOPT # ----------------------------- # Set func_split_short_opt_name and func_split_short_opt_arg shell # variables after splitting SHORTOPT after the 2nd character. if test yes = "$_G_HAVE_XSI_OPS" then # This is an XSI compatible shell, allowing a faster implementation... eval 'func_split_short_opt () { $debug_cmd func_split_short_opt_arg=${1#??} func_split_short_opt_name=${1%"$func_split_short_opt_arg"} }' else # ...otherwise fall back to using expr, which is often a shell builtin. func_split_short_opt () { $debug_cmd func_split_short_opt_name=`expr "x$1" : 'x\(-.\)'` func_split_short_opt_arg=`expr "x$1" : 'x-.\(.*\)$'` } fi #func_split_short_opt # func_usage # ---------- # Echo short help message to standard output and exit. func_usage () { $debug_cmd func_usage_message $ECHO "Run '$progname --help |${PAGER-more}' for full usage" exit 0 } # func_usage_message # ------------------ # Echo short help message to standard output. func_usage_message () { $debug_cmd eval \$ECHO \""Usage: $usage"\" echo $SED -n 's|^# || /^Written by/{ x;p;x } h /^Written by/q' < "$progpath" echo eval \$ECHO \""$usage_message"\" } # func_version # ------------ # Echo version message to standard output and exit. # The version message is extracted from the calling file's header # comments, with leading '# ' stripped: # 1. First display the progname and version # 2. Followed by the header comment line matching /^# Written by / # 3. Then a blank line followed by the first following line matching # /^# Copyright / # 4. Immediately followed by any lines between the previous matches, # except lines preceding the intervening completely blank line. # For example, see the header comments of this file. func_version () { $debug_cmd printf '%s\n' "$progname $scriptversion" $SED -n ' /^# Written by /!b s|^# ||; p; n :fwd2blnk /./ { n b fwd2blnk } p; n :holdwrnt s|^# || s|^# *$|| /^Copyright /!{ /./H n b holdwrnt } s|\((C)\)[ 0-9,-]*[ ,-]\([1-9][0-9]* \)|\1 \2| G s|\(\n\)\n*|\1|g p; q' < "$progpath" exit $? } # Local variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-pattern: "30/scriptversion=%:y-%02m-%02d.%02H; # UTC" # time-stamp-time-zone: "UTC" # End: # Set a version string. scriptversion='(GNU libtool) 2.4.7' # func_echo ARG... # ---------------- # Libtool also displays the current mode in messages, so override # funclib.sh func_echo with this custom definition. func_echo () { $debug_cmd _G_message=$* func_echo_IFS=$IFS IFS=$nl for _G_line in $_G_message; do IFS=$func_echo_IFS $ECHO "$progname${opt_mode+: $opt_mode}: $_G_line" done IFS=$func_echo_IFS } # func_warning ARG... # ------------------- # Libtool warnings are not categorized, so override funclib.sh # func_warning with this simpler definition. func_warning () { $debug_cmd $warning_func ${1+"$@"} } ## ---------------- ## ## Options parsing. ## ## ---------------- ## # Hook in the functions to make sure our own options are parsed during # the option parsing loop. usage='$progpath [OPTION]... [MODE-ARG]...' # Short help message in response to '-h'. usage_message="Options: --config show all configuration variables --debug enable verbose shell tracing -n, --dry-run display commands without modifying any files --features display basic configuration information and exit --mode=MODE use operation mode MODE --no-warnings equivalent to '-Wnone' --preserve-dup-deps don't remove duplicate dependency libraries --quiet, --silent don't print informational messages --tag=TAG use configuration variables from tag TAG -v, --verbose print more informational messages than default --version print version information -W, --warnings=CATEGORY report the warnings falling in CATEGORY [all] -h, --help, --help-all print short, long, or detailed help message " # Additional text appended to 'usage_message' in response to '--help'. func_help () { $debug_cmd func_usage_message $ECHO "$long_help_message MODE must be one of the following: clean remove files from the build directory compile compile a source file into a libtool object execute automatically set library path, then run a program finish complete the installation of libtool libraries install install libraries or executables link create a library or an executable uninstall remove libraries from an installed directory MODE-ARGS vary depending on the MODE. When passed as first option, '--mode=MODE' may be abbreviated as 'MODE' or a unique abbreviation of that. Try '$progname --help --mode=MODE' for a more detailed description of MODE. When reporting a bug, please describe a test case to reproduce it and include the following information: host-triplet: $host shell: $SHELL compiler: $LTCC compiler flags: $LTCFLAGS linker: $LD (gnu? $with_gnu_ld) version: $progname $scriptversion Debian-2.4.7-5 automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q` autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q` Report bugs to . GNU libtool home page: . General help using GNU software: ." exit 0 } # func_lo2o OBJECT-NAME # --------------------- # Transform OBJECT-NAME from a '.lo' suffix to the platform specific # object suffix. lo2o=s/\\.lo\$/.$objext/ o2lo=s/\\.$objext\$/.lo/ if test yes = "$_G_HAVE_XSI_OPS"; then eval 'func_lo2o () { case $1 in *.lo) func_lo2o_result=${1%.lo}.$objext ;; * ) func_lo2o_result=$1 ;; esac }' # func_xform LIBOBJ-OR-SOURCE # --------------------------- # Transform LIBOBJ-OR-SOURCE from a '.o' or '.c' (or otherwise) # suffix to a '.lo' libtool-object suffix. eval 'func_xform () { func_xform_result=${1%.*}.lo }' else # ...otherwise fall back to using sed. func_lo2o () { func_lo2o_result=`$ECHO "$1" | $SED "$lo2o"` } func_xform () { func_xform_result=`$ECHO "$1" | $SED 's|\.[^.]*$|.lo|'` } fi # func_fatal_configuration ARG... # ------------------------------- # Echo program name prefixed message to standard error, followed by # a configuration failure hint, and exit. func_fatal_configuration () { func_fatal_error ${1+"$@"} \ "See the $PACKAGE documentation for more information." \ "Fatal configuration error." } # func_config # ----------- # Display the configuration for all the tags in this script. func_config () { re_begincf='^# ### BEGIN LIBTOOL' re_endcf='^# ### END LIBTOOL' # Default configuration. $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" # Now print the configurations for the tags. for tagname in $taglist; do $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" done exit $? } # func_features # ------------- # Display the features supported by this script. func_features () { echo "host: $host" if test yes = "$build_libtool_libs"; then echo "enable shared libraries" else echo "disable shared libraries" fi if test yes = "$build_old_libs"; then echo "enable static libraries" else echo "disable static libraries" fi exit $? } # func_enable_tag TAGNAME # ----------------------- # Verify that TAGNAME is valid, and either flag an error and exit, or # enable the TAGNAME tag. We also add TAGNAME to the global $taglist # variable here. func_enable_tag () { # Global variable: tagname=$1 re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" sed_extractcf=/$re_begincf/,/$re_endcf/p # Validate tagname. case $tagname in *[!-_A-Za-z0-9,/]*) func_fatal_error "invalid tag name: $tagname" ;; esac # Don't test for the "default" C tag, as we know it's # there but not specially marked. case $tagname in CC) ;; *) if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then taglist="$taglist $tagname" # Evaluate the configuration. Be careful to quote the path # and the sed script, to avoid splitting on whitespace, but # also don't use non-portable quotes within backquotes within # quotes we have to do it in 2 steps: extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` eval "$extractedcf" else func_error "ignoring unknown tag $tagname" fi ;; esac } # func_check_version_match # ------------------------ # Ensure that we are using m4 macros, and libtool script from the same # release of libtool. func_check_version_match () { if test "$package_revision" != "$macro_revision"; then if test "$VERSION" != "$macro_version"; then if test -z "$macro_version"; then cat >&2 <<_LT_EOF $progname: Version mismatch error. This is $PACKAGE $VERSION, but the $progname: definition of this LT_INIT comes from an older release. $progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION $progname: and run autoconf again. _LT_EOF else cat >&2 <<_LT_EOF $progname: Version mismatch error. This is $PACKAGE $VERSION, but the $progname: definition of this LT_INIT comes from $PACKAGE $macro_version. $progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION $progname: and run autoconf again. _LT_EOF fi else cat >&2 <<_LT_EOF $progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, $progname: but the definition of this LT_INIT comes from revision $macro_revision. $progname: You should recreate aclocal.m4 with macros from revision $package_revision $progname: of $PACKAGE $VERSION and run autoconf again. _LT_EOF fi exit $EXIT_MISMATCH fi } # libtool_options_prep [ARG]... # ----------------------------- # Preparation for options parsed by libtool. libtool_options_prep () { $debug_mode # Option defaults: opt_config=false opt_dlopen= opt_dry_run=false opt_help=false opt_mode= opt_preserve_dup_deps=false opt_quiet=false nonopt= preserve_args= _G_rc_lt_options_prep=: _G_rc_lt_options_prep=: # Shorthand for --mode=foo, only valid as the first argument case $1 in clean|clea|cle|cl) shift; set dummy --mode clean ${1+"$@"}; shift ;; compile|compil|compi|comp|com|co|c) shift; set dummy --mode compile ${1+"$@"}; shift ;; execute|execut|execu|exec|exe|ex|e) shift; set dummy --mode execute ${1+"$@"}; shift ;; finish|finis|fini|fin|fi|f) shift; set dummy --mode finish ${1+"$@"}; shift ;; install|instal|insta|inst|ins|in|i) shift; set dummy --mode install ${1+"$@"}; shift ;; link|lin|li|l) shift; set dummy --mode link ${1+"$@"}; shift ;; uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) shift; set dummy --mode uninstall ${1+"$@"}; shift ;; *) _G_rc_lt_options_prep=false ;; esac if $_G_rc_lt_options_prep; then # Pass back the list of options. func_quote eval ${1+"$@"} libtool_options_prep_result=$func_quote_result fi } func_add_hook func_options_prep libtool_options_prep # libtool_parse_options [ARG]... # --------------------------------- # Provide handling for libtool specific options. libtool_parse_options () { $debug_cmd _G_rc_lt_parse_options=false # Perform our own loop to consume as many options as possible in # each iteration. while test $# -gt 0; do _G_match_lt_parse_options=: _G_opt=$1 shift case $_G_opt in --dry-run|--dryrun|-n) opt_dry_run=: ;; --config) func_config ;; --dlopen|-dlopen) opt_dlopen="${opt_dlopen+$opt_dlopen }$1" shift ;; --preserve-dup-deps) opt_preserve_dup_deps=: ;; --features) func_features ;; --finish) set dummy --mode finish ${1+"$@"}; shift ;; --help) opt_help=: ;; --help-all) opt_help=': help-all' ;; --mode) test $# = 0 && func_missing_arg $_G_opt && break opt_mode=$1 case $1 in # Valid mode arguments: clean|compile|execute|finish|install|link|relink|uninstall) ;; # Catch anything else as an error *) func_error "invalid argument for $_G_opt" exit_cmd=exit break ;; esac shift ;; --no-silent|--no-quiet) opt_quiet=false func_append preserve_args " $_G_opt" ;; --no-warnings|--no-warning|--no-warn) opt_warning=false func_append preserve_args " $_G_opt" ;; --no-verbose) opt_verbose=false func_append preserve_args " $_G_opt" ;; --silent|--quiet) opt_quiet=: opt_verbose=false func_append preserve_args " $_G_opt" ;; --tag) test $# = 0 && func_missing_arg $_G_opt && break opt_tag=$1 func_append preserve_args " $_G_opt $1" func_enable_tag "$1" shift ;; --verbose|-v) opt_quiet=false opt_verbose=: func_append preserve_args " $_G_opt" ;; # An option not handled by this hook function: *) set dummy "$_G_opt" ${1+"$@"} ; shift _G_match_lt_parse_options=false break ;; esac $_G_match_lt_parse_options && _G_rc_lt_parse_options=: done if $_G_rc_lt_parse_options; then # save modified positional parameters for caller func_quote eval ${1+"$@"} libtool_parse_options_result=$func_quote_result fi } func_add_hook func_parse_options libtool_parse_options # libtool_validate_options [ARG]... # --------------------------------- # Perform any sanity checks on option settings and/or unconsumed # arguments. libtool_validate_options () { # save first non-option argument if test 0 -lt $#; then nonopt=$1 shift fi # preserve --debug test : = "$debug_cmd" || func_append preserve_args " --debug" case $host in # Solaris2 added to fix http://debbugs.gnu.org/cgi/bugreport.cgi?bug=16452 # see also: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59788 *cygwin* | *mingw* | *pw32* | *cegcc* | *solaris2* | *os2*) # don't eliminate duplications in $postdeps and $predeps opt_duplicate_compiler_generated_deps=: ;; *) opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps ;; esac $opt_help || { # Sanity checks first: func_check_version_match test yes != "$build_libtool_libs" \ && test yes != "$build_old_libs" \ && func_fatal_configuration "not configured to build any kind of library" # Darwin sucks eval std_shrext=\"$shrext_cmds\" # Only execute mode is allowed to have -dlopen flags. if test -n "$opt_dlopen" && test execute != "$opt_mode"; then func_error "unrecognized option '-dlopen'" $ECHO "$help" 1>&2 exit $EXIT_FAILURE fi # Change the help message to a mode-specific one. generic_help=$help help="Try '$progname --help --mode=$opt_mode' for more information." } # Pass back the unparsed argument list func_quote eval ${1+"$@"} libtool_validate_options_result=$func_quote_result } func_add_hook func_validate_options libtool_validate_options # Process options as early as possible so that --help and --version # can return quickly. func_options ${1+"$@"} eval set dummy "$func_options_result"; shift ## ----------- ## ## Main. ## ## ----------- ## magic='%%%MAGIC variable%%%' magic_exe='%%%MAGIC EXE variable%%%' # Global variables. extracted_archives= extracted_serial=0 # If this variable is set in any of the actions, the command in it # will be execed at the end. This prevents here-documents from being # left over by shells. exec_cmd= # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF $1 _LTECHO_EOF' } # func_generated_by_libtool # True iff stdin has been generated by Libtool. This function is only # a basic sanity check; it will hardly flush out determined imposters. func_generated_by_libtool_p () { $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 } # func_lalib_p file # True iff FILE is a libtool '.la' library or '.lo' object file. # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_lalib_p () { test -f "$1" && $SED -e 4q "$1" 2>/dev/null | func_generated_by_libtool_p } # func_lalib_unsafe_p file # True iff FILE is a libtool '.la' library or '.lo' object file. # This function implements the same check as func_lalib_p without # resorting to external programs. To this end, it redirects stdin and # closes it afterwards, without saving the original file descriptor. # As a safety measure, use it only where a negative result would be # fatal anyway. Works if 'file' does not exist. func_lalib_unsafe_p () { lalib_p=no if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then for lalib_p_l in 1 2 3 4 do read lalib_p_line case $lalib_p_line in \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; esac done exec 0<&5 5<&- fi test yes = "$lalib_p" } # func_ltwrapper_script_p file # True iff FILE is a libtool wrapper script # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_ltwrapper_script_p () { test -f "$1" && $lt_truncate_bin < "$1" 2>/dev/null | func_generated_by_libtool_p } # func_ltwrapper_executable_p file # True iff FILE is a libtool wrapper executable # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_ltwrapper_executable_p () { func_ltwrapper_exec_suffix= case $1 in *.exe) ;; *) func_ltwrapper_exec_suffix=.exe ;; esac $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 } # func_ltwrapper_scriptname file # Assumes file is an ltwrapper_executable # uses $file to determine the appropriate filename for a # temporary ltwrapper_script. func_ltwrapper_scriptname () { func_dirname_and_basename "$1" "" "." func_stripname '' '.exe' "$func_basename_result" func_ltwrapper_scriptname_result=$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper } # func_ltwrapper_p file # True iff FILE is a libtool wrapper script or wrapper executable # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_ltwrapper_p () { func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" } # func_execute_cmds commands fail_cmd # Execute tilde-delimited COMMANDS. # If FAIL_CMD is given, eval that upon failure. # FAIL_CMD may read-access the current command in variable CMD! func_execute_cmds () { $debug_cmd save_ifs=$IFS; IFS='~' for cmd in $1; do IFS=$sp$nl eval cmd=\"$cmd\" IFS=$save_ifs func_show_eval "$cmd" "${2-:}" done IFS=$save_ifs } # func_source file # Source FILE, adding directory component if necessary. # Note that it is not necessary on cygwin/mingw to append a dot to # FILE even if both FILE and FILE.exe exist: automatic-append-.exe # behavior happens only for exec(3), not for open(2)! Also, sourcing # 'FILE.' does not work on cygwin managed mounts. func_source () { $debug_cmd case $1 in */* | *\\*) . "$1" ;; *) . "./$1" ;; esac } # func_resolve_sysroot PATH # Replace a leading = in PATH with a sysroot. Store the result into # func_resolve_sysroot_result func_resolve_sysroot () { func_resolve_sysroot_result=$1 case $func_resolve_sysroot_result in =*) func_stripname '=' '' "$func_resolve_sysroot_result" func_resolve_sysroot_result=$lt_sysroot$func_stripname_result ;; esac } # func_replace_sysroot PATH # If PATH begins with the sysroot, replace it with = and # store the result into func_replace_sysroot_result. func_replace_sysroot () { case $lt_sysroot:$1 in ?*:"$lt_sysroot"*) func_stripname "$lt_sysroot" '' "$1" func_replace_sysroot_result='='$func_stripname_result ;; *) # Including no sysroot. func_replace_sysroot_result=$1 ;; esac } # func_infer_tag arg # Infer tagged configuration to use if any are available and # if one wasn't chosen via the "--tag" command line option. # Only attempt this if the compiler in the base compile # command doesn't match the default compiler. # arg is usually of the form 'gcc ...' func_infer_tag () { $debug_cmd if test -n "$available_tags" && test -z "$tagname"; then CC_quoted= for arg in $CC; do func_append_quoted CC_quoted "$arg" done CC_expanded=`func_echo_all $CC` CC_quoted_expanded=`func_echo_all $CC_quoted` case $@ in # Blanks in the command may have been stripped by the calling shell, # but not from the CC environment variable when configure was run. " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; # Blanks at the start of $base_compile will cause this to fail # if we don't check for them as well. *) for z in $available_tags; do if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then # Evaluate the configuration. eval "`$SED -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" CC_quoted= for arg in $CC; do # Double-quote args containing other shell metacharacters. func_append_quoted CC_quoted "$arg" done CC_expanded=`func_echo_all $CC` CC_quoted_expanded=`func_echo_all $CC_quoted` case "$@ " in " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) # The compiler in the base compile command matches # the one in the tagged configuration. # Assume this is the tagged configuration we want. tagname=$z break ;; esac fi done # If $tagname still isn't set, then no tagged configuration # was found and let the user know that the "--tag" command # line option must be used. if test -z "$tagname"; then func_echo "unable to infer tagged configuration" func_fatal_error "specify a tag with '--tag'" # else # func_verbose "using $tagname tagged configuration" fi ;; esac fi } # func_write_libtool_object output_name pic_name nonpic_name # Create a libtool object file (analogous to a ".la" file), # but don't create it if we're doing a dry run. func_write_libtool_object () { write_libobj=$1 if test yes = "$build_libtool_libs"; then write_lobj=\'$2\' else write_lobj=none fi if test yes = "$build_old_libs"; then write_oldobj=\'$3\' else write_oldobj=none fi $opt_dry_run || { cat >${write_libobj}T </dev/null` if test "$?" -eq 0 && test -n "$func_convert_core_file_wine_to_w32_tmp"; then func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | $SED -e "$sed_naive_backslashify"` else func_convert_core_file_wine_to_w32_result= fi fi } # end: func_convert_core_file_wine_to_w32 # func_convert_core_path_wine_to_w32 ARG # Helper function used by path conversion functions when $build is *nix, and # $host is mingw, cygwin, or some other w32 environment. Relies on a correctly # configured wine environment available, with the winepath program in $build's # $PATH. Assumes ARG has no leading or trailing path separator characters. # # ARG is path to be converted from $build format to win32. # Result is available in $func_convert_core_path_wine_to_w32_result. # Unconvertible file (directory) names in ARG are skipped; if no directory names # are convertible, then the result may be empty. func_convert_core_path_wine_to_w32 () { $debug_cmd # unfortunately, winepath doesn't convert paths, only file names func_convert_core_path_wine_to_w32_result= if test -n "$1"; then oldIFS=$IFS IFS=: for func_convert_core_path_wine_to_w32_f in $1; do IFS=$oldIFS func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" if test -n "$func_convert_core_file_wine_to_w32_result"; then if test -z "$func_convert_core_path_wine_to_w32_result"; then func_convert_core_path_wine_to_w32_result=$func_convert_core_file_wine_to_w32_result else func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" fi fi done IFS=$oldIFS fi } # end: func_convert_core_path_wine_to_w32 # func_cygpath ARGS... # Wrapper around calling the cygpath program via LT_CYGPATH. This is used when # when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) # $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or # (2), returns the Cygwin file name or path in func_cygpath_result (input # file name or path is assumed to be in w32 format, as previously converted # from $build's *nix or MSYS format). In case (3), returns the w32 file name # or path in func_cygpath_result (input file name or path is assumed to be in # Cygwin format). Returns an empty string on error. # # ARGS are passed to cygpath, with the last one being the file name or path to # be converted. # # Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH # environment variable; do not put it in $PATH. func_cygpath () { $debug_cmd if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` if test "$?" -ne 0; then # on failure, ensure result is empty func_cygpath_result= fi else func_cygpath_result= func_error "LT_CYGPATH is empty or specifies non-existent file: '$LT_CYGPATH'" fi } #end: func_cygpath # func_convert_core_msys_to_w32 ARG # Convert file name or path ARG from MSYS format to w32 format. Return # result in func_convert_core_msys_to_w32_result. func_convert_core_msys_to_w32 () { $debug_cmd # awkward: cmd appends spaces to result func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | $SED -e 's/[ ]*$//' -e "$sed_naive_backslashify"` } #end: func_convert_core_msys_to_w32 # func_convert_file_check ARG1 ARG2 # Verify that ARG1 (a file name in $build format) was converted to $host # format in ARG2. Otherwise, emit an error message, but continue (resetting # func_to_host_file_result to ARG1). func_convert_file_check () { $debug_cmd if test -z "$2" && test -n "$1"; then func_error "Could not determine host file name corresponding to" func_error " '$1'" func_error "Continuing, but uninstalled executables may not work." # Fallback: func_to_host_file_result=$1 fi } # end func_convert_file_check # func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH # Verify that FROM_PATH (a path in $build format) was converted to $host # format in TO_PATH. Otherwise, emit an error message, but continue, resetting # func_to_host_file_result to a simplistic fallback value (see below). func_convert_path_check () { $debug_cmd if test -z "$4" && test -n "$3"; then func_error "Could not determine the host path corresponding to" func_error " '$3'" func_error "Continuing, but uninstalled executables may not work." # Fallback. This is a deliberately simplistic "conversion" and # should not be "improved". See libtool.info. if test "x$1" != "x$2"; then lt_replace_pathsep_chars="s|$1|$2|g" func_to_host_path_result=`echo "$3" | $SED -e "$lt_replace_pathsep_chars"` else func_to_host_path_result=$3 fi fi } # end func_convert_path_check # func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG # Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT # and appending REPL if ORIG matches BACKPAT. func_convert_path_front_back_pathsep () { $debug_cmd case $4 in $1 ) func_to_host_path_result=$3$func_to_host_path_result ;; esac case $4 in $2 ) func_append func_to_host_path_result "$3" ;; esac } # end func_convert_path_front_back_pathsep ################################################## # $build to $host FILE NAME CONVERSION FUNCTIONS # ################################################## # invoked via '$to_host_file_cmd ARG' # # In each case, ARG is the path to be converted from $build to $host format. # Result will be available in $func_to_host_file_result. # func_to_host_file ARG # Converts the file name ARG from $build format to $host format. Return result # in func_to_host_file_result. func_to_host_file () { $debug_cmd $to_host_file_cmd "$1" } # end func_to_host_file # func_to_tool_file ARG LAZY # converts the file name ARG from $build format to toolchain format. Return # result in func_to_tool_file_result. If the conversion in use is listed # in (the comma separated) LAZY, no conversion takes place. func_to_tool_file () { $debug_cmd case ,$2, in *,"$to_tool_file_cmd",*) func_to_tool_file_result=$1 ;; *) $to_tool_file_cmd "$1" func_to_tool_file_result=$func_to_host_file_result ;; esac } # end func_to_tool_file # func_convert_file_noop ARG # Copy ARG to func_to_host_file_result. func_convert_file_noop () { func_to_host_file_result=$1 } # end func_convert_file_noop # func_convert_file_msys_to_w32 ARG # Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic # conversion to w32 is not available inside the cwrapper. Returns result in # func_to_host_file_result. func_convert_file_msys_to_w32 () { $debug_cmd func_to_host_file_result=$1 if test -n "$1"; then func_convert_core_msys_to_w32 "$1" func_to_host_file_result=$func_convert_core_msys_to_w32_result fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_msys_to_w32 # func_convert_file_cygwin_to_w32 ARG # Convert file name ARG from Cygwin to w32 format. Returns result in # func_to_host_file_result. func_convert_file_cygwin_to_w32 () { $debug_cmd func_to_host_file_result=$1 if test -n "$1"; then # because $build is cygwin, we call "the" cygpath in $PATH; no need to use # LT_CYGPATH in this case. func_to_host_file_result=`cygpath -m "$1"` fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_cygwin_to_w32 # func_convert_file_nix_to_w32 ARG # Convert file name ARG from *nix to w32 format. Requires a wine environment # and a working winepath. Returns result in func_to_host_file_result. func_convert_file_nix_to_w32 () { $debug_cmd func_to_host_file_result=$1 if test -n "$1"; then func_convert_core_file_wine_to_w32 "$1" func_to_host_file_result=$func_convert_core_file_wine_to_w32_result fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_nix_to_w32 # func_convert_file_msys_to_cygwin ARG # Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. # Returns result in func_to_host_file_result. func_convert_file_msys_to_cygwin () { $debug_cmd func_to_host_file_result=$1 if test -n "$1"; then func_convert_core_msys_to_w32 "$1" func_cygpath -u "$func_convert_core_msys_to_w32_result" func_to_host_file_result=$func_cygpath_result fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_msys_to_cygwin # func_convert_file_nix_to_cygwin ARG # Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed # in a wine environment, working winepath, and LT_CYGPATH set. Returns result # in func_to_host_file_result. func_convert_file_nix_to_cygwin () { $debug_cmd func_to_host_file_result=$1 if test -n "$1"; then # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. func_convert_core_file_wine_to_w32 "$1" func_cygpath -u "$func_convert_core_file_wine_to_w32_result" func_to_host_file_result=$func_cygpath_result fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_nix_to_cygwin ############################################# # $build to $host PATH CONVERSION FUNCTIONS # ############################################# # invoked via '$to_host_path_cmd ARG' # # In each case, ARG is the path to be converted from $build to $host format. # The result will be available in $func_to_host_path_result. # # Path separators are also converted from $build format to $host format. If # ARG begins or ends with a path separator character, it is preserved (but # converted to $host format) on output. # # All path conversion functions are named using the following convention: # file name conversion function : func_convert_file_X_to_Y () # path conversion function : func_convert_path_X_to_Y () # where, for any given $build/$host combination the 'X_to_Y' value is the # same. If conversion functions are added for new $build/$host combinations, # the two new functions must follow this pattern, or func_init_to_host_path_cmd # will break. # func_init_to_host_path_cmd # Ensures that function "pointer" variable $to_host_path_cmd is set to the # appropriate value, based on the value of $to_host_file_cmd. to_host_path_cmd= func_init_to_host_path_cmd () { $debug_cmd if test -z "$to_host_path_cmd"; then func_stripname 'func_convert_file_' '' "$to_host_file_cmd" to_host_path_cmd=func_convert_path_$func_stripname_result fi } # func_to_host_path ARG # Converts the path ARG from $build format to $host format. Return result # in func_to_host_path_result. func_to_host_path () { $debug_cmd func_init_to_host_path_cmd $to_host_path_cmd "$1" } # end func_to_host_path # func_convert_path_noop ARG # Copy ARG to func_to_host_path_result. func_convert_path_noop () { func_to_host_path_result=$1 } # end func_convert_path_noop # func_convert_path_msys_to_w32 ARG # Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic # conversion to w32 is not available inside the cwrapper. Returns result in # func_to_host_path_result. func_convert_path_msys_to_w32 () { $debug_cmd func_to_host_path_result=$1 if test -n "$1"; then # Remove leading and trailing path separator characters from ARG. MSYS # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; # and winepath ignores them completely. func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" func_to_host_path_result=$func_convert_core_msys_to_w32_result func_convert_path_check : ";" \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" fi } # end func_convert_path_msys_to_w32 # func_convert_path_cygwin_to_w32 ARG # Convert path ARG from Cygwin to w32 format. Returns result in # func_to_host_file_result. func_convert_path_cygwin_to_w32 () { $debug_cmd func_to_host_path_result=$1 if test -n "$1"; then # See func_convert_path_msys_to_w32: func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` func_convert_path_check : ";" \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" fi } # end func_convert_path_cygwin_to_w32 # func_convert_path_nix_to_w32 ARG # Convert path ARG from *nix to w32 format. Requires a wine environment and # a working winepath. Returns result in func_to_host_file_result. func_convert_path_nix_to_w32 () { $debug_cmd func_to_host_path_result=$1 if test -n "$1"; then # See func_convert_path_msys_to_w32: func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" func_to_host_path_result=$func_convert_core_path_wine_to_w32_result func_convert_path_check : ";" \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" fi } # end func_convert_path_nix_to_w32 # func_convert_path_msys_to_cygwin ARG # Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. # Returns result in func_to_host_file_result. func_convert_path_msys_to_cygwin () { $debug_cmd func_to_host_path_result=$1 if test -n "$1"; then # See func_convert_path_msys_to_w32: func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" func_cygpath -u -p "$func_convert_core_msys_to_w32_result" func_to_host_path_result=$func_cygpath_result func_convert_path_check : : \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" : "$1" fi } # end func_convert_path_msys_to_cygwin # func_convert_path_nix_to_cygwin ARG # Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a # a wine environment, working winepath, and LT_CYGPATH set. Returns result in # func_to_host_file_result. func_convert_path_nix_to_cygwin () { $debug_cmd func_to_host_path_result=$1 if test -n "$1"; then # Remove leading and trailing path separator characters from # ARG. msys behavior is inconsistent here, cygpath turns them # into '.;' and ';.', and winepath ignores them completely. func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" func_to_host_path_result=$func_cygpath_result func_convert_path_check : : \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" : "$1" fi } # end func_convert_path_nix_to_cygwin # func_dll_def_p FILE # True iff FILE is a Windows DLL '.def' file. # Keep in sync with _LT_DLL_DEF_P in libtool.m4 func_dll_def_p () { $debug_cmd func_dll_def_p_tmp=`$SED -n \ -e 's/^[ ]*//' \ -e '/^\(;.*\)*$/d' \ -e 's/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p' \ -e q \ "$1"` test DEF = "$func_dll_def_p_tmp" } # func_mode_compile arg... func_mode_compile () { $debug_cmd # Get the compilation command and the source file. base_compile= srcfile=$nonopt # always keep a non-empty value in "srcfile" suppress_opt=yes suppress_output= arg_mode=normal libobj= later= pie_flag= for arg do case $arg_mode in arg ) # do not "continue". Instead, add this to base_compile lastarg=$arg arg_mode=normal ;; target ) libobj=$arg arg_mode=normal continue ;; normal ) # Accept any command-line options. case $arg in -o) test -n "$libobj" && \ func_fatal_error "you cannot specify '-o' more than once" arg_mode=target continue ;; -pie | -fpie | -fPIE) func_append pie_flag " $arg" continue ;; -shared | -static | -prefer-pic | -prefer-non-pic) func_append later " $arg" continue ;; -no-suppress) suppress_opt=no continue ;; -Xcompiler) arg_mode=arg # the next one goes into the "base_compile" arg list continue # The current "srcfile" will either be retained or ;; # replaced later. I would guess that would be a bug. -Wc,*) func_stripname '-Wc,' '' "$arg" args=$func_stripname_result lastarg= save_ifs=$IFS; IFS=, for arg in $args; do IFS=$save_ifs func_append_quoted lastarg "$arg" done IFS=$save_ifs func_stripname ' ' '' "$lastarg" lastarg=$func_stripname_result # Add the arguments to base_compile. func_append base_compile " $lastarg" continue ;; *) # Accept the current argument as the source file. # The previous "srcfile" becomes the current argument. # lastarg=$srcfile srcfile=$arg ;; esac # case $arg ;; esac # case $arg_mode # Aesthetically quote the previous argument. func_append_quoted base_compile "$lastarg" done # for arg case $arg_mode in arg) func_fatal_error "you must specify an argument for -Xcompile" ;; target) func_fatal_error "you must specify a target with '-o'" ;; *) # Get the name of the library object. test -z "$libobj" && { func_basename "$srcfile" libobj=$func_basename_result } ;; esac # Recognize several different file suffixes. # If the user specifies -o file.o, it is replaced with file.lo case $libobj in *.[cCFSifmso] | \ *.ada | *.adb | *.ads | *.asm | \ *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup) func_xform "$libobj" libobj=$func_xform_result ;; esac case $libobj in *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; *) func_fatal_error "cannot determine name of library object from '$libobj'" ;; esac func_infer_tag $base_compile for arg in $later; do case $arg in -shared) test yes = "$build_libtool_libs" \ || func_fatal_configuration "cannot build a shared library" build_old_libs=no continue ;; -static) build_libtool_libs=no build_old_libs=yes continue ;; -prefer-pic) pic_mode=yes continue ;; -prefer-non-pic) pic_mode=no continue ;; esac done func_quote_arg pretty "$libobj" test "X$libobj" != "X$func_quote_arg_result" \ && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ && func_warning "libobj name '$libobj' may not contain shell special characters." func_dirname_and_basename "$obj" "/" "" objname=$func_basename_result xdir=$func_dirname_result lobj=$xdir$objdir/$objname test -z "$base_compile" && \ func_fatal_help "you must specify a compilation command" # Delete any leftover library objects. if test yes = "$build_old_libs"; then removelist="$obj $lobj $libobj ${libobj}T" else removelist="$lobj $libobj ${libobj}T" fi # On Cygwin there's no "real" PIC flag so we must build both object types case $host_os in cygwin* | mingw* | pw32* | os2* | cegcc*) pic_mode=default ;; esac if test no = "$pic_mode" && test pass_all != "$deplibs_check_method"; then # non-PIC code in shared libraries is not supported pic_mode=default fi # Calculate the filename of the output object if compiler does # not support -o with -c if test no = "$compiler_c_o"; then output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.$objext lockfile=$output_obj.lock else output_obj= need_locks=no lockfile= fi # Lock this critical section if it is needed # We use this script file to make the link, it avoids creating a new file if test yes = "$need_locks"; then until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do func_echo "Waiting for $lockfile to be removed" sleep 2 done elif test warn = "$need_locks"; then if test -f "$lockfile"; then $ECHO "\ *** ERROR, $lockfile exists and contains: `cat $lockfile 2>/dev/null` This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support '-c' and '-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $opt_dry_run || $RM $removelist exit $EXIT_FAILURE fi func_append removelist " $output_obj" $ECHO "$srcfile" > "$lockfile" fi $opt_dry_run || $RM $removelist func_append removelist " $lockfile" trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 srcfile=$func_to_tool_file_result func_quote_arg pretty "$srcfile" qsrcfile=$func_quote_arg_result # Only build a PIC object if we are building libtool libraries. if test yes = "$build_libtool_libs"; then # Without this assignment, base_compile gets emptied. fbsd_hideous_sh_bug=$base_compile if test no != "$pic_mode"; then command="$base_compile $qsrcfile $pic_flag" else # Don't build PIC code command="$base_compile $qsrcfile" fi func_mkdir_p "$xdir$objdir" if test -z "$output_obj"; then # Place PIC objects in $objdir func_append command " -o $lobj" fi func_show_eval_locale "$command" \ 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' if test warn = "$need_locks" && test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then $ECHO "\ *** ERROR, $lockfile contains: `cat $lockfile 2>/dev/null` but it should contain: $srcfile This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support '-c' and '-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $opt_dry_run || $RM $removelist exit $EXIT_FAILURE fi # Just move the object if needed, then go on to compile the next one if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then func_show_eval '$MV "$output_obj" "$lobj"' \ 'error=$?; $opt_dry_run || $RM $removelist; exit $error' fi # Allow error messages only from the first compilation. if test yes = "$suppress_opt"; then suppress_output=' >/dev/null 2>&1' fi fi # Only build a position-dependent object if we build old libraries. if test yes = "$build_old_libs"; then if test yes != "$pic_mode"; then # Don't build PIC code command="$base_compile $qsrcfile$pie_flag" else command="$base_compile $qsrcfile $pic_flag" fi if test yes = "$compiler_c_o"; then func_append command " -o $obj" fi # Suppress compiler output if we already did a PIC compilation. func_append command "$suppress_output" func_show_eval_locale "$command" \ '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' if test warn = "$need_locks" && test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then $ECHO "\ *** ERROR, $lockfile contains: `cat $lockfile 2>/dev/null` but it should contain: $srcfile This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support '-c' and '-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $opt_dry_run || $RM $removelist exit $EXIT_FAILURE fi # Just move the object if needed if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then func_show_eval '$MV "$output_obj" "$obj"' \ 'error=$?; $opt_dry_run || $RM $removelist; exit $error' fi fi $opt_dry_run || { func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" # Unlock the critical section if it was locked if test no != "$need_locks"; then removelist=$lockfile $RM "$lockfile" fi } exit $EXIT_SUCCESS } $opt_help || { test compile = "$opt_mode" && func_mode_compile ${1+"$@"} } func_mode_help () { # We need to display help for each of the modes. case $opt_mode in "") # Generic help is extracted from the usage comments # at the start of this file. func_help ;; clean) $ECHO \ "Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... Remove files from the build directory. RM is the name of the program to use to delete files associated with each FILE (typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed to RM. If FILE is a libtool library, object or program, all the files associated with it are deleted. Otherwise, only FILE itself is deleted using RM." ;; compile) $ECHO \ "Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE Compile a source file into a libtool library object. This mode accepts the following additional options: -o OUTPUT-FILE set the output file name to OUTPUT-FILE -no-suppress do not suppress compiler output for multiple passes -prefer-pic try to build PIC objects only -prefer-non-pic try to build non-PIC objects only -shared do not build a '.o' file suitable for static linking -static only build a '.o' file suitable for static linking -Wc,FLAG -Xcompiler FLAG pass FLAG directly to the compiler COMPILE-COMMAND is a command to be used in creating a 'standard' object file from the given SOURCEFILE. The output file name is determined by removing the directory component from SOURCEFILE, then substituting the C source code suffix '.c' with the library object suffix, '.lo'." ;; execute) $ECHO \ "Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... Automatically set library path, then run a program. This mode accepts the following additional options: -dlopen FILE add the directory containing FILE to the library path This mode sets the library path environment variable according to '-dlopen' flags. If any of the ARGS are libtool executable wrappers, then they are translated into their corresponding uninstalled binary, and any of their required library directories are added to the library path. Then, COMMAND is executed, with ARGS as arguments." ;; finish) $ECHO \ "Usage: $progname [OPTION]... --mode=finish [LIBDIR]... Complete the installation of libtool libraries. Each LIBDIR is a directory that contains libtool libraries. The commands that this mode executes may require superuser privileges. Use the '--dry-run' option if you just want to see what would be executed." ;; install) $ECHO \ "Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... Install executables or libraries. INSTALL-COMMAND is the installation command. The first component should be either the 'install' or 'cp' program. The following components of INSTALL-COMMAND are treated specially: -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation The rest of the components are interpreted as arguments to that command (only BSD-compatible install options are recognized)." ;; link) $ECHO \ "Usage: $progname [OPTION]... --mode=link LINK-COMMAND... Link object files or libraries together to form another library, or to create an executable program. LINK-COMMAND is a command using the C compiler that you would use to create a program from several object files. The following components of LINK-COMMAND are treated specially: -all-static do not do any dynamic linking at all -avoid-version do not add a version suffix if possible -bindir BINDIR specify path to binaries directory (for systems where libraries must be found in the PATH setting at runtime) -dlopen FILE '-dlpreopen' FILE if it cannot be dlopened at runtime -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) -export-symbols SYMFILE try to export only the symbols listed in SYMFILE -export-symbols-regex REGEX try to export only the symbols matching REGEX -LLIBDIR search LIBDIR for required installed libraries -lNAME OUTPUT-FILE requires the installed library libNAME -module build a library that can dlopened -no-fast-install disable the fast-install mode -no-install link a not-installable executable -no-undefined declare that a library does not refer to external symbols -o OUTPUT-FILE create OUTPUT-FILE from the specified objects -objectlist FILE use a list of object files found in FILE to specify objects -os2dllname NAME force a short DLL name on OS/2 (no effect on other OSes) -precious-files-regex REGEX don't remove output files matching REGEX -release RELEASE specify package release information -rpath LIBDIR the created library will eventually be installed in LIBDIR -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries -shared only do dynamic linking of libtool libraries -shrext SUFFIX override the standard shared library file extension -static do not do any dynamic linking of uninstalled libtool libraries -static-libtool-libs do not do any dynamic linking of libtool libraries -version-info CURRENT[:REVISION[:AGE]] specify library version info [each variable defaults to 0] -weak LIBNAME declare that the target provides the LIBNAME interface -Wc,FLAG -Xcompiler FLAG pass linker-specific FLAG directly to the compiler -Wa,FLAG -Xassembler FLAG pass linker-specific FLAG directly to the assembler -Wl,FLAG -Xlinker FLAG pass linker-specific FLAG directly to the linker -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) All other options (arguments beginning with '-') are ignored. Every other argument is treated as a filename. Files ending in '.la' are treated as uninstalled libtool libraries, other files are standard or library object files. If the OUTPUT-FILE ends in '.la', then a libtool library is created, only library objects ('.lo' files) may be specified, and '-rpath' is required, except when creating a convenience library. If OUTPUT-FILE ends in '.a' or '.lib', then a standard library is created using 'ar' and 'ranlib', or on Windows using 'lib'. If OUTPUT-FILE ends in '.lo' or '.$objext', then a reloadable object file is created, otherwise an executable program is created." ;; uninstall) $ECHO \ "Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... Remove libraries from an installation directory. RM is the name of the program to use to delete files associated with each FILE (typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed to RM. If FILE is a libtool library, all the files associated with it are deleted. Otherwise, only FILE itself is deleted using RM." ;; *) func_fatal_help "invalid operation mode '$opt_mode'" ;; esac echo $ECHO "Try '$progname --help' for more information about other modes." } # Now that we've collected a possible --mode arg, show help if necessary if $opt_help; then if test : = "$opt_help"; then func_mode_help else { func_help noexit for opt_mode in compile link execute install finish uninstall clean; do func_mode_help done } | $SED -n '1p; 2,$s/^Usage:/ or: /p' { func_help noexit for opt_mode in compile link execute install finish uninstall clean; do echo func_mode_help done } | $SED '1d /^When reporting/,/^Report/{ H d } $x /information about other modes/d /more detailed .*MODE/d s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' fi exit $? fi # func_mode_execute arg... func_mode_execute () { $debug_cmd # The first argument is the command name. cmd=$nonopt test -z "$cmd" && \ func_fatal_help "you must specify a COMMAND" # Handle -dlopen flags immediately. for file in $opt_dlopen; do test -f "$file" \ || func_fatal_help "'$file' is not a file" dir= case $file in *.la) func_resolve_sysroot "$file" file=$func_resolve_sysroot_result # Check to see that this really is a libtool archive. func_lalib_unsafe_p "$file" \ || func_fatal_help "'$lib' is not a valid libtool archive" # Read the libtool library. dlname= library_names= func_source "$file" # Skip this library if it cannot be dlopened. if test -z "$dlname"; then # Warn if it was a shared library. test -n "$library_names" && \ func_warning "'$file' was not linked with '-export-dynamic'" continue fi func_dirname "$file" "" "." dir=$func_dirname_result if test -f "$dir/$objdir/$dlname"; then func_append dir "/$objdir" else if test ! -f "$dir/$dlname"; then func_fatal_error "cannot find '$dlname' in '$dir' or '$dir/$objdir'" fi fi ;; *.lo) # Just add the directory containing the .lo file. func_dirname "$file" "" "." dir=$func_dirname_result ;; *) func_warning "'-dlopen' is ignored for non-libtool libraries and objects" continue ;; esac # Get the absolute pathname. absdir=`cd "$dir" && pwd` test -n "$absdir" && dir=$absdir # Now add the directory to shlibpath_var. if eval "test -z \"\$$shlibpath_var\""; then eval "$shlibpath_var=\"\$dir\"" else eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" fi done # This variable tells wrapper scripts just to set shlibpath_var # rather than running their programs. libtool_execute_magic=$magic # Check if any of the arguments is a wrapper script. args= for file do case $file in -* | *.la | *.lo ) ;; *) # Do a test to see if this is really a libtool program. if func_ltwrapper_script_p "$file"; then func_source "$file" # Transform arg to wrapped name. file=$progdir/$program elif func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" func_source "$func_ltwrapper_scriptname_result" # Transform arg to wrapped name. file=$progdir/$program fi ;; esac # Quote arguments (to preserve shell metacharacters). func_append_quoted args "$file" done if $opt_dry_run; then # Display what would be done. if test -n "$shlibpath_var"; then eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" echo "export $shlibpath_var" fi $ECHO "$cmd$args" exit $EXIT_SUCCESS else if test -n "$shlibpath_var"; then # Export the shlibpath_var. eval "export $shlibpath_var" fi # Restore saved environment variables for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES do eval "if test \"\${save_$lt_var+set}\" = set; then $lt_var=\$save_$lt_var; export $lt_var else $lt_unset $lt_var fi" done # Now prepare to actually exec the command. exec_cmd=\$cmd$args fi } test execute = "$opt_mode" && func_mode_execute ${1+"$@"} # func_mode_finish arg... func_mode_finish () { $debug_cmd libs= libdirs= admincmds= for opt in "$nonopt" ${1+"$@"} do if test -d "$opt"; then func_append libdirs " $opt" elif test -f "$opt"; then if func_lalib_unsafe_p "$opt"; then func_append libs " $opt" else func_warning "'$opt' is not a valid libtool archive" fi else func_fatal_error "invalid argument '$opt'" fi done if test -n "$libs"; then if test -n "$lt_sysroot"; then sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" else sysroot_cmd= fi # Remove sysroot references if $opt_dry_run; then for lib in $libs; do echo "removing references to $lt_sysroot and '=' prefixes from $lib" done else tmpdir=`func_mktempdir` for lib in $libs; do $SED -e "$sysroot_cmd s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ > $tmpdir/tmp-la mv -f $tmpdir/tmp-la $lib done ${RM}r "$tmpdir" fi fi if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then for libdir in $libdirs; do if test -n "$finish_cmds"; then # Do each command in the finish commands. func_execute_cmds "$finish_cmds" 'admincmds="$admincmds '"$cmd"'"' fi if test -n "$finish_eval"; then # Do the single finish_eval. eval cmds=\"$finish_eval\" $opt_dry_run || eval "$cmds" || func_append admincmds " $cmds" fi done fi # Exit here if they wanted silent mode. $opt_quiet && exit $EXIT_SUCCESS if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then echo "----------------------------------------------------------------------" echo "Libraries have been installed in:" for libdir in $libdirs; do $ECHO " $libdir" done echo echo "If you ever happen to want to link against installed libraries" echo "in a given directory, LIBDIR, you must either use libtool, and" echo "specify the full pathname of the library, or use the '-LLIBDIR'" echo "flag during linking and do at least one of the following:" if test -n "$shlibpath_var"; then echo " - add LIBDIR to the '$shlibpath_var' environment variable" echo " during execution" fi if test -n "$runpath_var"; then echo " - add LIBDIR to the '$runpath_var' environment variable" echo " during linking" fi if test -n "$hardcode_libdir_flag_spec"; then libdir=LIBDIR eval flag=\"$hardcode_libdir_flag_spec\" $ECHO " - use the '$flag' linker flag" fi if test -n "$admincmds"; then $ECHO " - have your system administrator run these commands:$admincmds" fi if test -f /etc/ld.so.conf; then echo " - have your system administrator add LIBDIR to '/etc/ld.so.conf'" fi echo echo "See any operating system documentation about shared libraries for" case $host in solaris2.[6789]|solaris2.1[0-9]) echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" echo "pages." ;; *) echo "more information, such as the ld(1) and ld.so(8) manual pages." ;; esac echo "----------------------------------------------------------------------" fi exit $EXIT_SUCCESS } test finish = "$opt_mode" && func_mode_finish ${1+"$@"} # func_mode_install arg... func_mode_install () { $debug_cmd # There may be an optional sh(1) argument at the beginning of # install_prog (especially on Windows NT). if test "$SHELL" = "$nonopt" || test /bin/sh = "$nonopt" || # Allow the use of GNU shtool's install command. case $nonopt in *shtool*) :;; *) false;; esac then # Aesthetically quote it. func_quote_arg pretty "$nonopt" install_prog="$func_quote_arg_result " arg=$1 shift else install_prog= arg=$nonopt fi # The real first argument should be the name of the installation program. # Aesthetically quote it. func_quote_arg pretty "$arg" func_append install_prog "$func_quote_arg_result" install_shared_prog=$install_prog case " $install_prog " in *[\\\ /]cp\ *) install_cp=: ;; *) install_cp=false ;; esac # We need to accept at least all the BSD install flags. dest= files= opts= prev= install_type= isdir=false stripme= no_mode=: for arg do arg2= if test -n "$dest"; then func_append files " $dest" dest=$arg continue fi case $arg in -d) isdir=: ;; -f) if $install_cp; then :; else prev=$arg fi ;; -g | -m | -o) prev=$arg ;; -s) stripme=" -s" continue ;; -*) ;; *) # If the previous option needed an argument, then skip it. if test -n "$prev"; then if test X-m = "X$prev" && test -n "$install_override_mode"; then arg2=$install_override_mode no_mode=false fi prev= else dest=$arg continue fi ;; esac # Aesthetically quote the argument. func_quote_arg pretty "$arg" func_append install_prog " $func_quote_arg_result" if test -n "$arg2"; then func_quote_arg pretty "$arg2" fi func_append install_shared_prog " $func_quote_arg_result" done test -z "$install_prog" && \ func_fatal_help "you must specify an install program" test -n "$prev" && \ func_fatal_help "the '$prev' option requires an argument" if test -n "$install_override_mode" && $no_mode; then if $install_cp; then :; else func_quote_arg pretty "$install_override_mode" func_append install_shared_prog " -m $func_quote_arg_result" fi fi if test -z "$files"; then if test -z "$dest"; then func_fatal_help "no file or destination specified" else func_fatal_help "you must specify a destination" fi fi # Strip any trailing slash from the destination. func_stripname '' '/' "$dest" dest=$func_stripname_result # Check to see that the destination is a directory. test -d "$dest" && isdir=: if $isdir; then destdir=$dest destname= else func_dirname_and_basename "$dest" "" "." destdir=$func_dirname_result destname=$func_basename_result # Not a directory, so check to see that there is only one file specified. set dummy $files; shift test "$#" -gt 1 && \ func_fatal_help "'$dest' is not a directory" fi case $destdir in [\\/]* | [A-Za-z]:[\\/]*) ;; *) for file in $files; do case $file in *.lo) ;; *) func_fatal_help "'$destdir' must be an absolute directory name" ;; esac done ;; esac # This variable tells wrapper scripts just to set variables rather # than running their programs. libtool_install_magic=$magic staticlibs= future_libdirs= current_libdirs= for file in $files; do # Do each installation. case $file in *.$libext) # Do the static libraries later. func_append staticlibs " $file" ;; *.la) func_resolve_sysroot "$file" file=$func_resolve_sysroot_result # Check to see that this really is a libtool archive. func_lalib_unsafe_p "$file" \ || func_fatal_help "'$file' is not a valid libtool archive" library_names= old_library= relink_command= func_source "$file" # Add the libdir to current_libdirs if it is the destination. if test "X$destdir" = "X$libdir"; then case "$current_libdirs " in *" $libdir "*) ;; *) func_append current_libdirs " $libdir" ;; esac else # Note the libdir as a future libdir. case "$future_libdirs " in *" $libdir "*) ;; *) func_append future_libdirs " $libdir" ;; esac fi func_dirname "$file" "/" "" dir=$func_dirname_result func_append dir "$objdir" if test -n "$relink_command"; then # Determine the prefix the user has applied to our future dir. inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` # Don't allow the user to place us outside of our expected # location b/c this prevents finding dependent libraries that # are installed to the same prefix. # At present, this check doesn't affect windows .dll's that # are installed into $libdir/../bin (currently, that works fine) # but it's something to keep an eye on. test "$inst_prefix_dir" = "$destdir" && \ func_fatal_error "error: cannot install '$file' to a directory not ending in $libdir" if test -n "$inst_prefix_dir"; then # Stick the inst_prefix_dir data into the link command. relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` else relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` fi func_warning "relinking '$file'" func_show_eval "$relink_command" \ 'func_fatal_error "error: relink '\''$file'\'' with the above command before installing it"' fi # See the names of the shared library. set dummy $library_names; shift if test -n "$1"; then realname=$1 shift srcname=$realname test -n "$relink_command" && srcname=${realname}T # Install the shared library and build the symlinks. func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ 'exit $?' tstripme=$stripme case $host_os in cygwin* | mingw* | pw32* | cegcc*) case $realname in *.dll.a) tstripme= ;; esac ;; os2*) case $realname in *_dll.a) tstripme= ;; esac ;; esac if test -n "$tstripme" && test -n "$striplib"; then func_show_eval "$striplib $destdir/$realname" 'exit $?' fi if test "$#" -gt 0; then # Delete the old symlinks, and create new ones. # Try 'ln -sf' first, because the 'ln' binary might depend on # the symlink we replace! Solaris /bin/ln does not understand -f, # so we also need to try rm && ln -s. for linkname do test "$linkname" != "$realname" \ && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" done fi # Do each command in the postinstall commands. lib=$destdir/$realname func_execute_cmds "$postinstall_cmds" 'exit $?' fi # Install the pseudo-library for information purposes. func_basename "$file" name=$func_basename_result instname=$dir/${name}i func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' # Maybe install the static library, too. test -n "$old_library" && func_append staticlibs " $dir/$old_library" ;; *.lo) # Install (i.e. copy) a libtool object. # Figure out destination file name, if it wasn't already specified. if test -n "$destname"; then destfile=$destdir/$destname else func_basename "$file" destfile=$func_basename_result destfile=$destdir/$destfile fi # Deduce the name of the destination old-style object file. case $destfile in *.lo) func_lo2o "$destfile" staticdest=$func_lo2o_result ;; *.$objext) staticdest=$destfile destfile= ;; *) func_fatal_help "cannot copy a libtool object to '$destfile'" ;; esac # Install the libtool object if requested. test -n "$destfile" && \ func_show_eval "$install_prog $file $destfile" 'exit $?' # Install the old object if enabled. if test yes = "$build_old_libs"; then # Deduce the name of the old-style object file. func_lo2o "$file" staticobj=$func_lo2o_result func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' fi exit $EXIT_SUCCESS ;; *) # Figure out destination file name, if it wasn't already specified. if test -n "$destname"; then destfile=$destdir/$destname else func_basename "$file" destfile=$func_basename_result destfile=$destdir/$destfile fi # If the file is missing, and there is a .exe on the end, strip it # because it is most likely a libtool script we actually want to # install stripped_ext= case $file in *.exe) if test ! -f "$file"; then func_stripname '' '.exe' "$file" file=$func_stripname_result stripped_ext=.exe fi ;; esac # Do a test to see if this is really a libtool program. case $host in *cygwin* | *mingw*) if func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" wrapper=$func_ltwrapper_scriptname_result else func_stripname '' '.exe' "$file" wrapper=$func_stripname_result fi ;; *) wrapper=$file ;; esac if func_ltwrapper_script_p "$wrapper"; then notinst_deplibs= relink_command= func_source "$wrapper" # Check the variables that should have been set. test -z "$generated_by_libtool_version" && \ func_fatal_error "invalid libtool wrapper script '$wrapper'" finalize=: for lib in $notinst_deplibs; do # Check to see that each library is installed. libdir= if test -f "$lib"; then func_source "$lib" fi libfile=$libdir/`$ECHO "$lib" | $SED 's%^.*/%%g'` if test -n "$libdir" && test ! -f "$libfile"; then func_warning "'$lib' has not been installed in '$libdir'" finalize=false fi done relink_command= func_source "$wrapper" outputname= if test no = "$fast_install" && test -n "$relink_command"; then $opt_dry_run || { if $finalize; then tmpdir=`func_mktempdir` func_basename "$file$stripped_ext" file=$func_basename_result outputname=$tmpdir/$file # Replace the output file specification. relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` $opt_quiet || { func_quote_arg expand,pretty "$relink_command" eval "func_echo $func_quote_arg_result" } if eval "$relink_command"; then : else func_error "error: relink '$file' with the above command before installing it" $opt_dry_run || ${RM}r "$tmpdir" continue fi file=$outputname else func_warning "cannot relink '$file'" fi } else # Install the binary that we compiled earlier. file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` fi fi # remove .exe since cygwin /usr/bin/install will append another # one anyway case $install_prog,$host in */usr/bin/install*,*cygwin*) case $file:$destfile in *.exe:*.exe) # this is ok ;; *.exe:*) destfile=$destfile.exe ;; *:*.exe) func_stripname '' '.exe' "$destfile" destfile=$func_stripname_result ;; esac ;; esac func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' $opt_dry_run || if test -n "$outputname"; then ${RM}r "$tmpdir" fi ;; esac done for file in $staticlibs; do func_basename "$file" name=$func_basename_result # Set up the ranlib parameters. oldlib=$destdir/$name func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 tool_oldlib=$func_to_tool_file_result func_show_eval "$install_prog \$file \$oldlib" 'exit $?' if test -n "$stripme" && test -n "$old_striplib"; then func_show_eval "$old_striplib $tool_oldlib" 'exit $?' fi # Do each command in the postinstall commands. func_execute_cmds "$old_postinstall_cmds" 'exit $?' done test -n "$future_libdirs" && \ func_warning "remember to run '$progname --finish$future_libdirs'" if test -n "$current_libdirs"; then # Maybe just do a dry run. $opt_dry_run && current_libdirs=" -n$current_libdirs" exec_cmd='$SHELL "$progpath" $preserve_args --finish$current_libdirs' else exit $EXIT_SUCCESS fi } test install = "$opt_mode" && func_mode_install ${1+"$@"} # func_generate_dlsyms outputname originator pic_p # Extract symbols from dlprefiles and create ${outputname}S.o with # a dlpreopen symbol table. func_generate_dlsyms () { $debug_cmd my_outputname=$1 my_originator=$2 my_pic_p=${3-false} my_prefix=`$ECHO "$my_originator" | $SED 's%[^a-zA-Z0-9]%_%g'` my_dlsyms= if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then if test -n "$NM" && test -n "$global_symbol_pipe"; then my_dlsyms=${my_outputname}S.c else func_error "not configured to extract global symbols from dlpreopened files" fi fi if test -n "$my_dlsyms"; then case $my_dlsyms in "") ;; *.c) # Discover the nlist of each of the dlfiles. nlist=$output_objdir/$my_outputname.nm func_show_eval "$RM $nlist ${nlist}S ${nlist}T" # Parse the name list into a source file. func_verbose "creating $output_objdir/$my_dlsyms" $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ /* $my_dlsyms - symbol resolution table for '$my_outputname' dlsym emulation. */ /* Generated by $PROGRAM (GNU $PACKAGE) $VERSION */ #ifdef __cplusplus extern \"C\" { #endif #if defined __GNUC__ && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) #pragma GCC diagnostic ignored \"-Wstrict-prototypes\" #endif /* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ #if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE /* DATA imports from DLLs on WIN32 can't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs. */ # define LT_DLSYM_CONST #elif defined __osf__ /* This system does not cope well with relocations in const data. */ # define LT_DLSYM_CONST #else # define LT_DLSYM_CONST const #endif #define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) /* External symbol declarations for the compiler. */\ " if test yes = "$dlself"; then func_verbose "generating symbol list for '$output'" $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" # Add our own program objects to the symbol list. progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` for progfile in $progfiles; do func_to_tool_file "$progfile" func_convert_file_msys_to_w32 func_verbose "extracting global C symbols from '$func_to_tool_file_result'" $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" done if test -n "$exclude_expsyms"; then $opt_dry_run || { eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' } fi if test -n "$export_symbols_regex"; then $opt_dry_run || { eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' } fi # Prepare the list of exported symbols if test -z "$export_symbols"; then export_symbols=$output_objdir/$outputname.exp $opt_dry_run || { $RM $export_symbols eval "$SED -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' case $host in *cygwin* | *mingw* | *cegcc* ) eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' ;; esac } else $opt_dry_run || { eval "$SED -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' case $host in *cygwin* | *mingw* | *cegcc* ) eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' ;; esac } fi fi for dlprefile in $dlprefiles; do func_verbose "extracting global C symbols from '$dlprefile'" func_basename "$dlprefile" name=$func_basename_result case $host in *cygwin* | *mingw* | *cegcc* ) # if an import library, we need to obtain dlname if func_win32_import_lib_p "$dlprefile"; then func_tr_sh "$dlprefile" eval "curr_lafile=\$libfile_$func_tr_sh_result" dlprefile_dlbasename= if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then # Use subshell, to avoid clobbering current variable values dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` if test -n "$dlprefile_dlname"; then func_basename "$dlprefile_dlname" dlprefile_dlbasename=$func_basename_result else # no lafile. user explicitly requested -dlpreopen . $sharedlib_from_linklib_cmd "$dlprefile" dlprefile_dlbasename=$sharedlib_from_linklib_result fi fi $opt_dry_run || { if test -n "$dlprefile_dlbasename"; then eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' else func_warning "Could not compute DLL name from $name" eval '$ECHO ": $name " >> "$nlist"' fi func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" } else # not an import lib $opt_dry_run || { eval '$ECHO ": $name " >> "$nlist"' func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" } fi ;; *) $opt_dry_run || { eval '$ECHO ": $name " >> "$nlist"' func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" } ;; esac done $opt_dry_run || { # Make sure we have at least an empty file. test -f "$nlist" || : > "$nlist" if test -n "$exclude_expsyms"; then $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T $MV "$nlist"T "$nlist" fi # Try sorting and uniquifying the output. if $GREP -v "^: " < "$nlist" | if sort -k 3 /dev/null 2>&1; then sort -k 3 else sort +2 fi | uniq > "$nlist"S; then : else $GREP -v "^: " < "$nlist" > "$nlist"S fi if test -f "$nlist"S; then eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' else echo '/* NONE */' >> "$output_objdir/$my_dlsyms" fi func_show_eval '$RM "${nlist}I"' if test -n "$global_symbol_to_import"; then eval "$global_symbol_to_import"' < "$nlist"S > "$nlist"I' fi echo >> "$output_objdir/$my_dlsyms" "\ /* The mapping between symbol names and symbols. */ typedef struct { const char *name; void *address; } lt_dlsymlist; extern LT_DLSYM_CONST lt_dlsymlist lt_${my_prefix}_LTX_preloaded_symbols[];\ " if test -s "$nlist"I; then echo >> "$output_objdir/$my_dlsyms" "\ static void lt_syminit(void) { LT_DLSYM_CONST lt_dlsymlist *symbol = lt_${my_prefix}_LTX_preloaded_symbols; for (; symbol->name; ++symbol) {" $SED 's/.*/ if (STREQ (symbol->name, \"&\")) symbol->address = (void *) \&&;/' < "$nlist"I >> "$output_objdir/$my_dlsyms" echo >> "$output_objdir/$my_dlsyms" "\ } }" fi echo >> "$output_objdir/$my_dlsyms" "\ LT_DLSYM_CONST lt_dlsymlist lt_${my_prefix}_LTX_preloaded_symbols[] = { {\"$my_originator\", (void *) 0}," if test -s "$nlist"I; then echo >> "$output_objdir/$my_dlsyms" "\ {\"@INIT@\", (void *) <_syminit}," fi case $need_lib_prefix in no) eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" ;; *) eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" ;; esac echo >> "$output_objdir/$my_dlsyms" "\ {0, (void *) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt_${my_prefix}_LTX_preloaded_symbols; } #endif #ifdef __cplusplus } #endif\ " } # !$opt_dry_run pic_flag_for_symtable= case "$compile_command " in *" -static "*) ;; *) case $host in # compiling the symbol table file with pic_flag works around # a FreeBSD bug that causes programs to crash when -lm is # linked before any other PIC object. But we must not use # pic_flag when linking with -static. The problem exists in # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; *-*-hpux*) pic_flag_for_symtable=" $pic_flag" ;; *) $my_pic_p && pic_flag_for_symtable=" $pic_flag" ;; esac ;; esac symtab_cflags= for arg in $LTCFLAGS; do case $arg in -pie | -fpie | -fPIE) ;; *) func_append symtab_cflags " $arg" ;; esac done # Now compile the dynamic symbol file. func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' # Clean up the generated files. func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T" "${nlist}I"' # Transform the symbol file into the correct name. symfileobj=$output_objdir/${my_outputname}S.$objext case $host in *cygwin* | *mingw* | *cegcc* ) if test -f "$output_objdir/$my_outputname.def"; then compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` else compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` fi ;; *) compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` ;; esac ;; *) func_fatal_error "unknown suffix for '$my_dlsyms'" ;; esac else # We keep going just in case the user didn't refer to # lt_preloaded_symbols. The linker will fail if global_symbol_pipe # really was required. # Nullify the symbol file. compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` fi } # func_cygming_gnu_implib_p ARG # This predicate returns with zero status (TRUE) if # ARG is a GNU/binutils-style import library. Returns # with nonzero status (FALSE) otherwise. func_cygming_gnu_implib_p () { $debug_cmd func_to_tool_file "$1" func_convert_file_msys_to_w32 func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` test -n "$func_cygming_gnu_implib_tmp" } # func_cygming_ms_implib_p ARG # This predicate returns with zero status (TRUE) if # ARG is an MS-style import library. Returns # with nonzero status (FALSE) otherwise. func_cygming_ms_implib_p () { $debug_cmd func_to_tool_file "$1" func_convert_file_msys_to_w32 func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` test -n "$func_cygming_ms_implib_tmp" } # func_win32_libid arg # return the library type of file 'arg' # # Need a lot of goo to handle *both* DLLs and import libs # Has to be a shell function in order to 'eat' the argument # that is supplied when $file_magic_command is called. # Despite the name, also deal with 64 bit binaries. func_win32_libid () { $debug_cmd win32_libid_type=unknown win32_fileres=`file -L $1 2>/dev/null` case $win32_fileres in *ar\ archive\ import\ library*) # definitely import win32_libid_type="x86 archive import" ;; *ar\ archive*) # could be an import, or static # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then case $nm_interface in "MS dumpbin") if func_cygming_ms_implib_p "$1" || func_cygming_gnu_implib_p "$1" then win32_nmres=import else win32_nmres= fi ;; *) func_to_tool_file "$1" func_convert_file_msys_to_w32 win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | $SED -n -e ' 1,100{ / I /{ s|.*|import| p q } }'` ;; esac case $win32_nmres in import*) win32_libid_type="x86 archive import";; *) win32_libid_type="x86 archive static";; esac fi ;; *DLL*) win32_libid_type="x86 DLL" ;; *executable*) # but shell scripts are "executable" too... case $win32_fileres in *MS\ Windows\ PE\ Intel*) win32_libid_type="x86 DLL" ;; esac ;; esac $ECHO "$win32_libid_type" } # func_cygming_dll_for_implib ARG # # Platform-specific function to extract the # name of the DLL associated with the specified # import library ARG. # Invoked by eval'ing the libtool variable # $sharedlib_from_linklib_cmd # Result is available in the variable # $sharedlib_from_linklib_result func_cygming_dll_for_implib () { $debug_cmd sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` } # func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs # # The is the core of a fallback implementation of a # platform-specific function to extract the name of the # DLL associated with the specified import library LIBNAME. # # SECTION_NAME is either .idata$6 or .idata$7, depending # on the platform and compiler that created the implib. # # Echos the name of the DLL associated with the # specified import library. func_cygming_dll_for_implib_fallback_core () { $debug_cmd match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` $OBJDUMP -s --section "$1" "$2" 2>/dev/null | $SED '/^Contents of section '"$match_literal"':/{ # Place marker at beginning of archive member dllname section s/.*/====MARK====/ p d } # These lines can sometimes be longer than 43 characters, but # are always uninteresting /:[ ]*file format pe[i]\{,1\}-/d /^In archive [^:]*:/d # Ensure marker is printed /^====MARK====/p # Remove all lines with less than 43 characters /^.\{43\}/!d # From remaining lines, remove first 43 characters s/^.\{43\}//' | $SED -n ' # Join marker and all lines until next marker into a single line /^====MARK====/ b para H $ b para b :para x s/\n//g # Remove the marker s/^====MARK====// # Remove trailing dots and whitespace s/[\. \t]*$// # Print /./p' | # we now have a list, one entry per line, of the stringified # contents of the appropriate section of all members of the # archive that possess that section. Heuristic: eliminate # all those that have a first or second character that is # a '.' (that is, objdump's representation of an unprintable # character.) This should work for all archives with less than # 0x302f exports -- but will fail for DLLs whose name actually # begins with a literal '.' or a single character followed by # a '.'. # # Of those that remain, print the first one. $SED -e '/^\./d;/^.\./d;q' } # func_cygming_dll_for_implib_fallback ARG # Platform-specific function to extract the # name of the DLL associated with the specified # import library ARG. # # This fallback implementation is for use when $DLLTOOL # does not support the --identify-strict option. # Invoked by eval'ing the libtool variable # $sharedlib_from_linklib_cmd # Result is available in the variable # $sharedlib_from_linklib_result func_cygming_dll_for_implib_fallback () { $debug_cmd if func_cygming_gnu_implib_p "$1"; then # binutils import library sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` elif func_cygming_ms_implib_p "$1"; then # ms-generated import library sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` else # unknown sharedlib_from_linklib_result= fi } # func_extract_an_archive dir oldlib func_extract_an_archive () { $debug_cmd f_ex_an_ar_dir=$1; shift f_ex_an_ar_oldlib=$1 if test yes = "$lock_old_archive_extraction"; then lockfile=$f_ex_an_ar_oldlib.lock until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do func_echo "Waiting for $lockfile to be removed" sleep 2 done fi func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ 'stat=$?; rm -f "$lockfile"; exit $stat' if test yes = "$lock_old_archive_extraction"; then $opt_dry_run || rm -f "$lockfile" fi if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then : else func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" fi } # func_extract_archives gentop oldlib ... func_extract_archives () { $debug_cmd my_gentop=$1; shift my_oldlibs=${1+"$@"} my_oldobjs= my_xlib= my_xabs= my_xdir= for my_xlib in $my_oldlibs; do # Extract the objects. case $my_xlib in [\\/]* | [A-Za-z]:[\\/]*) my_xabs=$my_xlib ;; *) my_xabs=`pwd`"/$my_xlib" ;; esac func_basename "$my_xlib" my_xlib=$func_basename_result my_xlib_u=$my_xlib while :; do case " $extracted_archives " in *" $my_xlib_u "*) func_arith $extracted_serial + 1 extracted_serial=$func_arith_result my_xlib_u=lt$extracted_serial-$my_xlib ;; *) break ;; esac done extracted_archives="$extracted_archives $my_xlib_u" my_xdir=$my_gentop/$my_xlib_u func_mkdir_p "$my_xdir" case $host in *-darwin*) func_verbose "Extracting $my_xabs" # Do not bother doing anything if just a dry run $opt_dry_run || { darwin_orig_dir=`pwd` cd $my_xdir || exit $? darwin_archive=$my_xabs darwin_curdir=`pwd` func_basename "$darwin_archive" darwin_base_archive=$func_basename_result darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` if test -n "$darwin_arches"; then darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` darwin_arch= func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" for darwin_arch in $darwin_arches; do func_mkdir_p "unfat-$$/$darwin_base_archive-$darwin_arch" $LIPO -thin $darwin_arch -output "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" "$darwin_archive" cd "unfat-$$/$darwin_base_archive-$darwin_arch" func_extract_an_archive "`pwd`" "$darwin_base_archive" cd "$darwin_curdir" $RM "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" done # $darwin_arches ## Okay now we've a bunch of thin objects, gotta fatten them up :) darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$sed_basename" | sort -u` darwin_file= darwin_files= for darwin_file in $darwin_filelist; do darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` $LIPO -create -output "$darwin_file" $darwin_files done # $darwin_filelist $RM -rf unfat-$$ cd "$darwin_orig_dir" else cd $darwin_orig_dir func_extract_an_archive "$my_xdir" "$my_xabs" fi # $darwin_arches } # !$opt_dry_run ;; *) func_extract_an_archive "$my_xdir" "$my_xabs" ;; esac my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` done func_extract_archives_result=$my_oldobjs } # func_emit_wrapper [arg=no] # # Emit a libtool wrapper script on stdout. # Don't directly open a file because we may want to # incorporate the script contents within a cygwin/mingw # wrapper executable. Must ONLY be called from within # func_mode_link because it depends on a number of variables # set therein. # # ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR # variable will take. If 'yes', then the emitted script # will assume that the directory where it is stored is # the $objdir directory. This is a cygwin/mingw-specific # behavior. func_emit_wrapper () { func_emit_wrapper_arg1=${1-no} $ECHO "\ #! $SHELL # $output - temporary wrapper script for $objdir/$outputname # Generated by $PROGRAM (GNU $PACKAGE) $VERSION # # The $output program cannot be directly executed until all the libtool # libraries that it depends on are installed. # # This wrapper script should never be moved out of the build directory. # If it is, it will not operate correctly. # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. sed_quote_subst='$sed_quote_subst' # Be Bourne compatible if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac fi BIN_SH=xpg4; export BIN_SH # for Tru64 DUALCASE=1; export DUALCASE # for MKS sh # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH relink_command=\"$relink_command\" # This environment variable determines our operation mode. if test \"\$libtool_install_magic\" = \"$magic\"; then # install mode needs the following variables: generated_by_libtool_version='$macro_version' notinst_deplibs='$notinst_deplibs' else # When we are sourced in execute mode, \$file and \$ECHO are already set. if test \"\$libtool_execute_magic\" != \"$magic\"; then file=\"\$0\"" func_quote_arg pretty "$ECHO" qECHO=$func_quote_arg_result $ECHO "\ # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF \$1 _LTECHO_EOF' } ECHO=$qECHO fi # Very basic option parsing. These options are (a) specific to # the libtool wrapper, (b) are identical between the wrapper # /script/ and the wrapper /executable/ that is used only on # windows platforms, and (c) all begin with the string "--lt-" # (application programs are unlikely to have options that match # this pattern). # # There are only two supported options: --lt-debug and # --lt-dump-script. There is, deliberately, no --lt-help. # # The first argument to this parsing function should be the # script's $0 value, followed by "$@". lt_option_debug= func_parse_lt_options () { lt_script_arg0=\$0 shift for lt_opt do case \"\$lt_opt\" in --lt-debug) lt_option_debug=1 ;; --lt-dump-script) lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` cat \"\$lt_dump_D/\$lt_dump_F\" exit 0 ;; --lt-*) \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 exit 1 ;; esac done # Print the debug banner immediately: if test -n \"\$lt_option_debug\"; then echo \"$outputname:$output:\$LINENO: libtool wrapper (GNU $PACKAGE) $VERSION\" 1>&2 fi } # Used when --lt-debug. Prints its arguments to stdout # (redirection is the responsibility of the caller) func_lt_dump_args () { lt_dump_args_N=1; for lt_arg do \$ECHO \"$outputname:$output:\$LINENO: newargv[\$lt_dump_args_N]: \$lt_arg\" lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` done } # Core function for launching the target application func_exec_program_core () { " case $host in # Backslashes separate directories on plain windows *-*-mingw | *-*-os2* | *-cegcc*) $ECHO "\ if test -n \"\$lt_option_debug\"; then \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir\\\\\$program\" 1>&2 func_lt_dump_args \${1+\"\$@\"} 1>&2 fi exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} " ;; *) $ECHO "\ if test -n \"\$lt_option_debug\"; then \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir/\$program\" 1>&2 func_lt_dump_args \${1+\"\$@\"} 1>&2 fi exec \"\$progdir/\$program\" \${1+\"\$@\"} " ;; esac $ECHO "\ \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 exit 1 } # A function to encapsulate launching the target application # Strips options in the --lt-* namespace from \$@ and # launches target application with the remaining arguments. func_exec_program () { case \" \$* \" in *\\ --lt-*) for lt_wr_arg do case \$lt_wr_arg in --lt-*) ;; *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; esac shift done ;; esac func_exec_program_core \${1+\"\$@\"} } # Parse options func_parse_lt_options \"\$0\" \${1+\"\$@\"} # Find the directory that this script lives in. thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` test \"x\$thisdir\" = \"x\$file\" && thisdir=. # Follow symbolic links until we get to the real thisdir. file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` while test -n \"\$file\"; do destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` # If there was a directory component, then change thisdir. if test \"x\$destdir\" != \"x\$file\"; then case \"\$destdir\" in [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; *) thisdir=\"\$thisdir/\$destdir\" ;; esac fi file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` done # Usually 'no', except on cygwin/mingw when embedded into # the cwrapper. WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then # special case for '.' if test \"\$thisdir\" = \".\"; then thisdir=\`pwd\` fi # remove .libs from thisdir case \"\$thisdir\" in *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; $objdir ) thisdir=. ;; esac fi # Try to get the absolute directory name. absdir=\`cd \"\$thisdir\" && pwd\` test -n \"\$absdir\" && thisdir=\"\$absdir\" " if test yes = "$fast_install"; then $ECHO "\ program=lt-'$outputname'$exeext progdir=\"\$thisdir/$objdir\" if test ! -f \"\$progdir/\$program\" || { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | $SED 1q\`; \\ test \"X\$file\" != \"X\$progdir/\$program\"; }; then file=\"\$\$-\$program\" if test ! -d \"\$progdir\"; then $MKDIR \"\$progdir\" else $RM \"\$progdir/\$file\" fi" $ECHO "\ # relink executable if necessary if test -n \"\$relink_command\"; then if relink_command_output=\`eval \$relink_command 2>&1\`; then : else \$ECHO \"\$relink_command_output\" >&2 $RM \"\$progdir/\$file\" exit 1 fi fi $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || { $RM \"\$progdir/\$program\"; $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } $RM \"\$progdir/\$file\" fi" else $ECHO "\ program='$outputname' progdir=\"\$thisdir/$objdir\" " fi $ECHO "\ if test -f \"\$progdir/\$program\"; then" # fixup the dll searchpath if we need to. # # Fix the DLL searchpath if we need to. Do this before prepending # to shlibpath, because on Windows, both are PATH and uninstalled # libraries must come first. if test -n "$dllsearchpath"; then $ECHO "\ # Add the dll search path components to the executable PATH PATH=$dllsearchpath:\$PATH " fi # Export our shlibpath_var if we have one. if test yes = "$shlibpath_overrides_runpath" && test -n "$shlibpath_var" && test -n "$temp_rpath"; then $ECHO "\ # Add our own library path to $shlibpath_var $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" # Some systems cannot cope with colon-terminated $shlibpath_var # The second colon is a workaround for a bug in BeOS R4 sed $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` export $shlibpath_var " fi $ECHO "\ if test \"\$libtool_execute_magic\" != \"$magic\"; then # Run the actual program with our arguments. func_exec_program \${1+\"\$@\"} fi else # The program doesn't exist. \$ECHO \"\$0: error: '\$progdir/\$program' does not exist\" 1>&2 \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 exit 1 fi fi\ " } # func_emit_cwrapperexe_src # emit the source code for a wrapper executable on stdout # Must ONLY be called from within func_mode_link because # it depends on a number of variable set therein. func_emit_cwrapperexe_src () { cat < #include #ifdef _MSC_VER # include # include # include #else # include # include # ifdef __CYGWIN__ # include # endif #endif #include #include #include #include #include #include #include #include #define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) /* declarations of non-ANSI functions */ #if defined __MINGW32__ # ifdef __STRICT_ANSI__ int _putenv (const char *); # endif #elif defined __CYGWIN__ # ifdef __STRICT_ANSI__ char *realpath (const char *, char *); int putenv (char *); int setenv (const char *, const char *, int); # endif /* #elif defined other_platform || defined ... */ #endif /* portability defines, excluding path handling macros */ #if defined _MSC_VER # define setmode _setmode # define stat _stat # define chmod _chmod # define getcwd _getcwd # define putenv _putenv # define S_IXUSR _S_IEXEC #elif defined __MINGW32__ # define setmode _setmode # define stat _stat # define chmod _chmod # define getcwd _getcwd # define putenv _putenv #elif defined __CYGWIN__ # define HAVE_SETENV # define FOPEN_WB "wb" /* #elif defined other platforms ... */ #endif #if defined PATH_MAX # define LT_PATHMAX PATH_MAX #elif defined MAXPATHLEN # define LT_PATHMAX MAXPATHLEN #else # define LT_PATHMAX 1024 #endif #ifndef S_IXOTH # define S_IXOTH 0 #endif #ifndef S_IXGRP # define S_IXGRP 0 #endif /* path handling portability macros */ #ifndef DIR_SEPARATOR # define DIR_SEPARATOR '/' # define PATH_SEPARATOR ':' #endif #if defined _WIN32 || defined __MSDOS__ || defined __DJGPP__ || \ defined __OS2__ # define HAVE_DOS_BASED_FILE_SYSTEM # define FOPEN_WB "wb" # ifndef DIR_SEPARATOR_2 # define DIR_SEPARATOR_2 '\\' # endif # ifndef PATH_SEPARATOR_2 # define PATH_SEPARATOR_2 ';' # endif #endif #ifndef DIR_SEPARATOR_2 # define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) #else /* DIR_SEPARATOR_2 */ # define IS_DIR_SEPARATOR(ch) \ (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) #endif /* DIR_SEPARATOR_2 */ #ifndef PATH_SEPARATOR_2 # define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) #else /* PATH_SEPARATOR_2 */ # define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) #endif /* PATH_SEPARATOR_2 */ #ifndef FOPEN_WB # define FOPEN_WB "w" #endif #ifndef _O_BINARY # define _O_BINARY 0 #endif #define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) #define XFREE(stale) do { \ if (stale) { free (stale); stale = 0; } \ } while (0) #if defined LT_DEBUGWRAPPER static int lt_debug = 1; #else static int lt_debug = 0; #endif const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ void *xmalloc (size_t num); char *xstrdup (const char *string); const char *base_name (const char *name); char *find_executable (const char *wrapper); char *chase_symlinks (const char *pathspec); int make_executable (const char *path); int check_executable (const char *path); char *strendzap (char *str, const char *pat); void lt_debugprintf (const char *file, int line, const char *fmt, ...); void lt_fatal (const char *file, int line, const char *message, ...); static const char *nonnull (const char *s); static const char *nonempty (const char *s); void lt_setenv (const char *name, const char *value); char *lt_extend_str (const char *orig_value, const char *add, int to_end); void lt_update_exe_path (const char *name, const char *value); void lt_update_lib_path (const char *name, const char *value); char **prepare_spawn (char **argv); void lt_dump_script (FILE *f); EOF cat <= 0) && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) return 1; else return 0; } int make_executable (const char *path) { int rval = 0; struct stat st; lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", nonempty (path)); if ((!path) || (!*path)) return 0; if (stat (path, &st) >= 0) { rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); } return rval; } /* Searches for the full path of the wrapper. Returns newly allocated full path name if found, NULL otherwise Does not chase symlinks, even on platforms that support them. */ char * find_executable (const char *wrapper) { int has_slash = 0; const char *p; const char *p_next; /* static buffer for getcwd */ char tmp[LT_PATHMAX + 1]; size_t tmp_len; char *concat_name; lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", nonempty (wrapper)); if ((wrapper == NULL) || (*wrapper == '\0')) return NULL; /* Absolute path? */ #if defined HAVE_DOS_BASED_FILE_SYSTEM if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') { concat_name = xstrdup (wrapper); if (check_executable (concat_name)) return concat_name; XFREE (concat_name); } else { #endif if (IS_DIR_SEPARATOR (wrapper[0])) { concat_name = xstrdup (wrapper); if (check_executable (concat_name)) return concat_name; XFREE (concat_name); } #if defined HAVE_DOS_BASED_FILE_SYSTEM } #endif for (p = wrapper; *p; p++) if (*p == '/') { has_slash = 1; break; } if (!has_slash) { /* no slashes; search PATH */ const char *path = getenv ("PATH"); if (path != NULL) { for (p = path; *p; p = p_next) { const char *q; size_t p_len; for (q = p; *q; q++) if (IS_PATH_SEPARATOR (*q)) break; p_len = (size_t) (q - p); p_next = (*q == '\0' ? q : q + 1); if (p_len == 0) { /* empty path: current directory */ if (getcwd (tmp, LT_PATHMAX) == NULL) lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", nonnull (strerror (errno))); tmp_len = strlen (tmp); concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); memcpy (concat_name, tmp, tmp_len); concat_name[tmp_len] = '/'; strcpy (concat_name + tmp_len + 1, wrapper); } else { concat_name = XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); memcpy (concat_name, p, p_len); concat_name[p_len] = '/'; strcpy (concat_name + p_len + 1, wrapper); } if (check_executable (concat_name)) return concat_name; XFREE (concat_name); } } /* not found in PATH; assume curdir */ } /* Relative path | not found in path: prepend cwd */ if (getcwd (tmp, LT_PATHMAX) == NULL) lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", nonnull (strerror (errno))); tmp_len = strlen (tmp); concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); memcpy (concat_name, tmp, tmp_len); concat_name[tmp_len] = '/'; strcpy (concat_name + tmp_len + 1, wrapper); if (check_executable (concat_name)) return concat_name; XFREE (concat_name); return NULL; } char * chase_symlinks (const char *pathspec) { #ifndef S_ISLNK return xstrdup (pathspec); #else char buf[LT_PATHMAX]; struct stat s; char *tmp_pathspec = xstrdup (pathspec); char *p; int has_symlinks = 0; while (strlen (tmp_pathspec) && !has_symlinks) { lt_debugprintf (__FILE__, __LINE__, "checking path component for symlinks: %s\n", tmp_pathspec); if (lstat (tmp_pathspec, &s) == 0) { if (S_ISLNK (s.st_mode) != 0) { has_symlinks = 1; break; } /* search backwards for last DIR_SEPARATOR */ p = tmp_pathspec + strlen (tmp_pathspec) - 1; while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) p--; if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) { /* no more DIR_SEPARATORS left */ break; } *p = '\0'; } else { lt_fatal (__FILE__, __LINE__, "error accessing file \"%s\": %s", tmp_pathspec, nonnull (strerror (errno))); } } XFREE (tmp_pathspec); if (!has_symlinks) { return xstrdup (pathspec); } tmp_pathspec = realpath (pathspec, buf); if (tmp_pathspec == 0) { lt_fatal (__FILE__, __LINE__, "could not follow symlinks for %s", pathspec); } return xstrdup (tmp_pathspec); #endif } char * strendzap (char *str, const char *pat) { size_t len, patlen; assert (str != NULL); assert (pat != NULL); len = strlen (str); patlen = strlen (pat); if (patlen <= len) { str += len - patlen; if (STREQ (str, pat)) *str = '\0'; } return str; } void lt_debugprintf (const char *file, int line, const char *fmt, ...) { va_list args; if (lt_debug) { (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); va_start (args, fmt); (void) vfprintf (stderr, fmt, args); va_end (args); } } static void lt_error_core (int exit_status, const char *file, int line, const char *mode, const char *message, va_list ap) { fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); vfprintf (stderr, message, ap); fprintf (stderr, ".\n"); if (exit_status >= 0) exit (exit_status); } void lt_fatal (const char *file, int line, const char *message, ...) { va_list ap; va_start (ap, message); lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); va_end (ap); } static const char * nonnull (const char *s) { return s ? s : "(null)"; } static const char * nonempty (const char *s) { return (s && !*s) ? "(empty)" : nonnull (s); } void lt_setenv (const char *name, const char *value) { lt_debugprintf (__FILE__, __LINE__, "(lt_setenv) setting '%s' to '%s'\n", nonnull (name), nonnull (value)); { #ifdef HAVE_SETENV /* always make a copy, for consistency with !HAVE_SETENV */ char *str = xstrdup (value); setenv (name, str, 1); #else size_t len = strlen (name) + 1 + strlen (value) + 1; char *str = XMALLOC (char, len); sprintf (str, "%s=%s", name, value); if (putenv (str) != EXIT_SUCCESS) { XFREE (str); } #endif } } char * lt_extend_str (const char *orig_value, const char *add, int to_end) { char *new_value; if (orig_value && *orig_value) { size_t orig_value_len = strlen (orig_value); size_t add_len = strlen (add); new_value = XMALLOC (char, add_len + orig_value_len + 1); if (to_end) { strcpy (new_value, orig_value); strcpy (new_value + orig_value_len, add); } else { strcpy (new_value, add); strcpy (new_value + add_len, orig_value); } } else { new_value = xstrdup (add); } return new_value; } void lt_update_exe_path (const char *name, const char *value) { lt_debugprintf (__FILE__, __LINE__, "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", nonnull (name), nonnull (value)); if (name && *name && value && *value) { char *new_value = lt_extend_str (getenv (name), value, 0); /* some systems can't cope with a ':'-terminated path #' */ size_t len = strlen (new_value); while ((len > 0) && IS_PATH_SEPARATOR (new_value[len-1])) { new_value[--len] = '\0'; } lt_setenv (name, new_value); XFREE (new_value); } } void lt_update_lib_path (const char *name, const char *value) { lt_debugprintf (__FILE__, __LINE__, "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", nonnull (name), nonnull (value)); if (name && *name && value && *value) { char *new_value = lt_extend_str (getenv (name), value, 0); lt_setenv (name, new_value); XFREE (new_value); } } EOF case $host_os in mingw*) cat <<"EOF" /* Prepares an argument vector before calling spawn(). Note that spawn() does not by itself call the command interpreter (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); GetVersionEx(&v); v.dwPlatformId == VER_PLATFORM_WIN32_NT; }) ? "cmd.exe" : "command.com"). Instead it simply concatenates the arguments, separated by ' ', and calls CreateProcess(). We must quote the arguments since Win32 CreateProcess() interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a special way: - Space and tab are interpreted as delimiters. They are not treated as delimiters if they are surrounded by double quotes: "...". - Unescaped double quotes are removed from the input. Their only effect is that within double quotes, space and tab are treated like normal characters. - Backslashes not followed by double quotes are not special. - But 2*n+1 backslashes followed by a double quote become n backslashes followed by a double quote (n >= 0): \" -> " \\\" -> \" \\\\\" -> \\" */ #define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" #define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" char ** prepare_spawn (char **argv) { size_t argc; char **new_argv; size_t i; /* Count number of arguments. */ for (argc = 0; argv[argc] != NULL; argc++) ; /* Allocate new argument vector. */ new_argv = XMALLOC (char *, argc + 1); /* Put quoted arguments into the new argument vector. */ for (i = 0; i < argc; i++) { const char *string = argv[i]; if (string[0] == '\0') new_argv[i] = xstrdup ("\"\""); else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) { int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); size_t length; unsigned int backslashes; const char *s; char *quoted_string; char *p; length = 0; backslashes = 0; if (quote_around) length++; for (s = string; *s != '\0'; s++) { char c = *s; if (c == '"') length += backslashes + 1; length++; if (c == '\\') backslashes++; else backslashes = 0; } if (quote_around) length += backslashes + 1; quoted_string = XMALLOC (char, length + 1); p = quoted_string; backslashes = 0; if (quote_around) *p++ = '"'; for (s = string; *s != '\0'; s++) { char c = *s; if (c == '"') { unsigned int j; for (j = backslashes + 1; j > 0; j--) *p++ = '\\'; } *p++ = c; if (c == '\\') backslashes++; else backslashes = 0; } if (quote_around) { unsigned int j; for (j = backslashes; j > 0; j--) *p++ = '\\'; *p++ = '"'; } *p = '\0'; new_argv[i] = quoted_string; } else new_argv[i] = (char *) string; } new_argv[argc] = NULL; return new_argv; } EOF ;; esac cat <<"EOF" void lt_dump_script (FILE* f) { EOF func_emit_wrapper yes | $SED -n -e ' s/^\(.\{79\}\)\(..*\)/\1\ \2/ h s/\([\\"]\)/\\\1/g s/$/\\n/ s/\([^\n]*\).*/ fputs ("\1", f);/p g D' cat <<"EOF" } EOF } # end: func_emit_cwrapperexe_src # func_win32_import_lib_p ARG # True if ARG is an import lib, as indicated by $file_magic_cmd func_win32_import_lib_p () { $debug_cmd case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in *import*) : ;; *) false ;; esac } # func_suncc_cstd_abi # !!ONLY CALL THIS FOR SUN CC AFTER $compile_command IS FULLY EXPANDED!! # Several compiler flags select an ABI that is incompatible with the # Cstd library. Avoid specifying it if any are in CXXFLAGS. func_suncc_cstd_abi () { $debug_cmd case " $compile_command " in *" -compat=g "*|*\ -std=c++[0-9][0-9]\ *|*" -library=stdcxx4 "*|*" -library=stlport4 "*) suncc_use_cstd_abi=no ;; *) suncc_use_cstd_abi=yes ;; esac } # func_mode_link arg... func_mode_link () { $debug_cmd case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) # It is impossible to link a dll without this setting, and # we shouldn't force the makefile maintainer to figure out # what system we are compiling for in order to pass an extra # flag for every libtool invocation. # allow_undefined=no # FIXME: Unfortunately, there are problems with the above when trying # to make a dll that has undefined symbols, in which case not # even a static library is built. For now, we need to specify # -no-undefined on the libtool link line when we can be certain # that all symbols are satisfied, otherwise we get a static library. allow_undefined=yes ;; *) allow_undefined=yes ;; esac libtool_args=$nonopt base_compile="$nonopt $@" compile_command=$nonopt finalize_command=$nonopt compile_rpath= finalize_rpath= compile_shlibpath= finalize_shlibpath= convenience= old_convenience= deplibs= old_deplibs= compiler_flags= linker_flags= dllsearchpath= lib_search_path=`pwd` inst_prefix_dir= new_inherited_linker_flags= avoid_version=no bindir= dlfiles= dlprefiles= dlself=no export_dynamic=no export_symbols= export_symbols_regex= generated= libobjs= ltlibs= module=no no_install=no objs= os2dllname= non_pic_objects= precious_files_regex= prefer_static_libs=no preload=false prev= prevarg= release= rpath= xrpath= perm_rpath= temp_rpath= thread_safe=no vinfo= vinfo_number=no weak_libs= single_module=$wl-single_module func_infer_tag $base_compile # We need to know -static, to get the right output filenames. for arg do case $arg in -shared) test yes != "$build_libtool_libs" \ && func_fatal_configuration "cannot build a shared library" build_old_libs=no break ;; -all-static | -static | -static-libtool-libs) case $arg in -all-static) if test yes = "$build_libtool_libs" && test -z "$link_static_flag"; then func_warning "complete static linking is impossible in this configuration" fi if test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=yes ;; -static) if test -z "$pic_flag" && test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=built ;; -static-libtool-libs) if test -z "$pic_flag" && test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=yes ;; esac build_libtool_libs=no build_old_libs=yes break ;; esac done # See if our shared archives depend on static archives. test -n "$old_archive_from_new_cmds" && build_old_libs=yes # Go through the arguments, transforming them on the way. while test "$#" -gt 0; do arg=$1 shift func_quote_arg pretty,unquoted "$arg" qarg=$func_quote_arg_unquoted_result func_append libtool_args " $func_quote_arg_result" # If the previous option needs an argument, assign it. if test -n "$prev"; then case $prev in output) func_append compile_command " @OUTPUT@" func_append finalize_command " @OUTPUT@" ;; esac case $prev in bindir) bindir=$arg prev= continue ;; dlfiles|dlprefiles) $preload || { # Add the symbol object into the linking commands. func_append compile_command " @SYMFILE@" func_append finalize_command " @SYMFILE@" preload=: } case $arg in *.la | *.lo) ;; # We handle these cases below. force) if test no = "$dlself"; then dlself=needless export_dynamic=yes fi prev= continue ;; self) if test dlprefiles = "$prev"; then dlself=yes elif test dlfiles = "$prev" && test yes != "$dlopen_self"; then dlself=yes else dlself=needless export_dynamic=yes fi prev= continue ;; *) if test dlfiles = "$prev"; then func_append dlfiles " $arg" else func_append dlprefiles " $arg" fi prev= continue ;; esac ;; expsyms) export_symbols=$arg test -f "$arg" \ || func_fatal_error "symbol file '$arg' does not exist" prev= continue ;; expsyms_regex) export_symbols_regex=$arg prev= continue ;; framework) case $host in *-*-darwin*) case "$deplibs " in *" $qarg.ltframework "*) ;; *) func_append deplibs " $qarg.ltframework" # this is fixed later ;; esac ;; esac prev= continue ;; inst_prefix) inst_prefix_dir=$arg prev= continue ;; mllvm) # Clang does not use LLVM to link, so we can simply discard any # '-mllvm $arg' options when doing the link step. prev= continue ;; objectlist) if test -f "$arg"; then save_arg=$arg moreargs= for fil in `cat "$save_arg"` do # func_append moreargs " $fil" arg=$fil # A libtool-controlled object. # Check to see that this really is a libtool object. if func_lalib_unsafe_p "$arg"; then pic_object= non_pic_object= # Read the .lo file func_source "$arg" if test -z "$pic_object" || test -z "$non_pic_object" || test none = "$pic_object" && test none = "$non_pic_object"; then func_fatal_error "cannot find name of object for '$arg'" fi # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir=$func_dirname_result if test none != "$pic_object"; then # Prepend the subdirectory the object is found in. pic_object=$xdir$pic_object if test dlfiles = "$prev"; then if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then func_append dlfiles " $pic_object" prev= continue else # If libtool objects are unsupported, then we need to preload. prev=dlprefiles fi fi # CHECK ME: I think I busted this. -Ossama if test dlprefiles = "$prev"; then # Preload the old-style object. func_append dlprefiles " $pic_object" prev= fi # A PIC object. func_append libobjs " $pic_object" arg=$pic_object fi # Non-PIC object. if test none != "$non_pic_object"; then # Prepend the subdirectory the object is found in. non_pic_object=$xdir$non_pic_object # A standard non-PIC object func_append non_pic_objects " $non_pic_object" if test -z "$pic_object" || test none = "$pic_object"; then arg=$non_pic_object fi else # If the PIC object exists, use it instead. # $xdir was prepended to $pic_object above. non_pic_object=$pic_object func_append non_pic_objects " $non_pic_object" fi else # Only an error if not doing a dry-run. if $opt_dry_run; then # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir=$func_dirname_result func_lo2o "$arg" pic_object=$xdir$objdir/$func_lo2o_result non_pic_object=$xdir$func_lo2o_result func_append libobjs " $pic_object" func_append non_pic_objects " $non_pic_object" else func_fatal_error "'$arg' is not a valid libtool object" fi fi done else func_fatal_error "link input file '$arg' does not exist" fi arg=$save_arg prev= continue ;; os2dllname) os2dllname=$arg prev= continue ;; precious_regex) precious_files_regex=$arg prev= continue ;; release) release=-$arg prev= continue ;; rpath | xrpath) # We need an absolute path. case $arg in [\\/]* | [A-Za-z]:[\\/]*) ;; *) func_fatal_error "only absolute run-paths are allowed" ;; esac if test rpath = "$prev"; then case "$rpath " in *" $arg "*) ;; *) func_append rpath " $arg" ;; esac else case "$xrpath " in *" $arg "*) ;; *) func_append xrpath " $arg" ;; esac fi prev= continue ;; shrext) shrext_cmds=$arg prev= continue ;; weak) func_append weak_libs " $arg" prev= continue ;; xassembler) func_append compiler_flags " -Xassembler $qarg" prev= func_append compile_command " -Xassembler $qarg" func_append finalize_command " -Xassembler $qarg" continue ;; xcclinker) func_append linker_flags " $qarg" func_append compiler_flags " $qarg" prev= func_append compile_command " $qarg" func_append finalize_command " $qarg" continue ;; xcompiler) func_append compiler_flags " $qarg" prev= func_append compile_command " $qarg" func_append finalize_command " $qarg" continue ;; xlinker) func_append linker_flags " $qarg" func_append compiler_flags " $wl$qarg" prev= func_append compile_command " $wl$qarg" func_append finalize_command " $wl$qarg" continue ;; *) eval "$prev=\"\$arg\"" prev= continue ;; esac fi # test -n "$prev" prevarg=$arg case $arg in -all-static) if test -n "$link_static_flag"; then # See comment for -static flag below, for more details. func_append compile_command " $link_static_flag" func_append finalize_command " $link_static_flag" fi continue ;; -allow-undefined) # FIXME: remove this flag sometime in the future. func_fatal_error "'-allow-undefined' must not be used because it is the default" ;; -avoid-version) avoid_version=yes continue ;; -bindir) prev=bindir continue ;; -dlopen) prev=dlfiles continue ;; -dlpreopen) prev=dlprefiles continue ;; -export-dynamic) export_dynamic=yes continue ;; -export-symbols | -export-symbols-regex) if test -n "$export_symbols" || test -n "$export_symbols_regex"; then func_fatal_error "more than one -exported-symbols argument is not allowed" fi if test X-export-symbols = "X$arg"; then prev=expsyms else prev=expsyms_regex fi continue ;; -framework) prev=framework continue ;; -inst-prefix-dir) prev=inst_prefix continue ;; # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* # so, if we see these flags be careful not to treat them like -L -L[A-Z][A-Z]*:*) case $with_gcc/$host in no/*-*-irix* | /*-*-irix*) func_append compile_command " $arg" func_append finalize_command " $arg" ;; esac continue ;; -L*) func_stripname "-L" '' "$arg" if test -z "$func_stripname_result"; then if test "$#" -gt 0; then func_fatal_error "require no space between '-L' and '$1'" else func_fatal_error "need path for '-L' option" fi fi func_resolve_sysroot "$func_stripname_result" dir=$func_resolve_sysroot_result # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) ;; *) absdir=`cd "$dir" && pwd` test -z "$absdir" && \ func_fatal_error "cannot determine absolute directory name of '$dir'" dir=$absdir ;; esac case "$deplibs " in *" -L$dir "* | *" $arg "*) # Will only happen for absolute or sysroot arguments ;; *) # Preserve sysroot, but never include relative directories case $dir in [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; *) func_append deplibs " -L$dir" ;; esac func_append lib_search_path " $dir" ;; esac case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` case :$dllsearchpath: in *":$dir:"*) ;; ::) dllsearchpath=$dir;; *) func_append dllsearchpath ":$dir";; esac case :$dllsearchpath: in *":$testbindir:"*) ;; ::) dllsearchpath=$testbindir;; *) func_append dllsearchpath ":$testbindir";; esac ;; esac continue ;; -l*) if test X-lc = "X$arg" || test X-lm = "X$arg"; then case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) # These systems don't actually have a C or math library (as such) continue ;; *-*-os2*) # These systems don't actually have a C library (as such) test X-lc = "X$arg" && continue ;; *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) # Do not include libc due to us having libc/libc_r. test X-lc = "X$arg" && continue ;; *-*-rhapsody* | *-*-darwin1.[012]) # Rhapsody C and math libraries are in the System framework func_append deplibs " System.ltframework" continue ;; *-*-sco3.2v5* | *-*-sco5v6*) # Causes problems with __ctype test X-lc = "X$arg" && continue ;; *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) # Compiler inserts libc in the correct place for threads to work test X-lc = "X$arg" && continue ;; esac elif test X-lc_r = "X$arg"; then case $host in *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) # Do not include libc_r directly, use -pthread flag. continue ;; esac fi func_append deplibs " $arg" continue ;; -mllvm) prev=mllvm continue ;; -module) module=yes continue ;; # Tru64 UNIX uses -model [arg] to determine the layout of C++ # classes, name mangling, and exception handling. # Darwin uses the -arch flag to determine output architecture. -model|-arch|-isysroot|--sysroot) func_append compiler_flags " $arg" func_append compile_command " $arg" func_append finalize_command " $arg" prev=xcompiler continue ;; # Solaris ld rejects as of 11.4. Refer to Oracle bug 22985199. -pthread) case $host in *solaris2*) ;; *) case "$new_inherited_linker_flags " in *" $arg "*) ;; * ) func_append new_inherited_linker_flags " $arg" ;; esac ;; esac continue ;; -mt|-mthreads|-kthread|-Kthread|-pthreads|--thread-safe \ |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) func_append compiler_flags " $arg" func_append compile_command " $arg" func_append finalize_command " $arg" case "$new_inherited_linker_flags " in *" $arg "*) ;; * ) func_append new_inherited_linker_flags " $arg" ;; esac continue ;; -multi_module) single_module=$wl-multi_module continue ;; -no-fast-install) fast_install=no continue ;; -no-install) case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) # The PATH hackery in wrapper scripts is required on Windows # and Darwin in order for the loader to find any dlls it needs. func_warning "'-no-install' is ignored for $host" func_warning "assuming '-no-fast-install' instead" fast_install=no ;; *) no_install=yes ;; esac continue ;; -no-undefined) allow_undefined=no continue ;; -objectlist) prev=objectlist continue ;; -os2dllname) prev=os2dllname continue ;; -o) prev=output ;; -precious-files-regex) prev=precious_regex continue ;; -release) prev=release continue ;; -rpath) prev=rpath continue ;; -R) prev=xrpath continue ;; -R*) func_stripname '-R' '' "$arg" dir=$func_stripname_result # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) ;; =*) func_stripname '=' '' "$dir" dir=$lt_sysroot$func_stripname_result ;; *) func_fatal_error "only absolute run-paths are allowed" ;; esac case "$xrpath " in *" $dir "*) ;; *) func_append xrpath " $dir" ;; esac continue ;; -shared) # The effects of -shared are defined in a previous loop. continue ;; -shrext) prev=shrext continue ;; -static | -static-libtool-libs) # The effects of -static are defined in a previous loop. # We used to do the same as -all-static on platforms that # didn't have a PIC flag, but the assumption that the effects # would be equivalent was wrong. It would break on at least # Digital Unix and AIX. continue ;; -thread-safe) thread_safe=yes continue ;; -version-info) prev=vinfo continue ;; -version-number) prev=vinfo vinfo_number=yes continue ;; -weak) prev=weak continue ;; -Wc,*) func_stripname '-Wc,' '' "$arg" args=$func_stripname_result arg= save_ifs=$IFS; IFS=, for flag in $args; do IFS=$save_ifs func_quote_arg pretty "$flag" func_append arg " $func_quote_arg_result" func_append compiler_flags " $func_quote_arg_result" done IFS=$save_ifs func_stripname ' ' '' "$arg" arg=$func_stripname_result ;; -Wl,*) func_stripname '-Wl,' '' "$arg" args=$func_stripname_result arg= save_ifs=$IFS; IFS=, for flag in $args; do IFS=$save_ifs func_quote_arg pretty "$flag" func_append arg " $wl$func_quote_arg_result" func_append compiler_flags " $wl$func_quote_arg_result" func_append linker_flags " $func_quote_arg_result" done IFS=$save_ifs func_stripname ' ' '' "$arg" arg=$func_stripname_result ;; -Xassembler) prev=xassembler continue ;; -Xcompiler) prev=xcompiler continue ;; -Xlinker) prev=xlinker continue ;; -XCClinker) prev=xcclinker continue ;; # -msg_* for osf cc -msg_*) func_quote_arg pretty "$arg" arg=$func_quote_arg_result ;; # Flags to be passed through unchanged, with rationale: # -64, -mips[0-9] enable 64-bit mode for the SGI compiler # -r[0-9][0-9]* specify processor for the SGI compiler # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler # +DA*, +DD* enable 64-bit mode for the HP compiler # -q* compiler args for the IBM compiler # -m*, -t[45]*, -txscale* architecture-specific flags for GCC # -F/path path to uninstalled frameworks, gcc on darwin # -p, -pg, --coverage, -fprofile-* profiling flags for GCC # -fstack-protector* stack protector flags for GCC # @file GCC response files # -tp=* Portland pgcc target processor selection # --sysroot=* for sysroot support # -O*, -g*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization # -specs=* GCC specs files # -stdlib=* select c++ std lib with clang # -fsanitize=* Clang/GCC memory and address sanitizer # -fuse-ld=* Linker select flags for GCC # -static-* direct GCC to link specific libraries statically # -fcilkplus Cilk Plus language extension features for C/C++ # -Wa,* Pass flags directly to the assembler -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ -O*|-g*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-stdlib=*| \ -specs=*|-fsanitize=*|-fuse-ld=*|-static-*|-fcilkplus|-Wa,*) func_quote_arg pretty "$arg" arg=$func_quote_arg_result func_append compile_command " $arg" func_append finalize_command " $arg" func_append compiler_flags " $arg" continue ;; -Z*) if test os2 = "`expr $host : '.*\(os2\)'`"; then # OS/2 uses -Zxxx to specify OS/2-specific options compiler_flags="$compiler_flags $arg" func_append compile_command " $arg" func_append finalize_command " $arg" case $arg in -Zlinker | -Zstack) prev=xcompiler ;; esac continue else # Otherwise treat like 'Some other compiler flag' below func_quote_arg pretty "$arg" arg=$func_quote_arg_result fi ;; # Some other compiler flag. -* | +*) func_quote_arg pretty "$arg" arg=$func_quote_arg_result ;; *.$objext) # A standard object. func_append objs " $arg" ;; *.lo) # A libtool-controlled object. # Check to see that this really is a libtool object. if func_lalib_unsafe_p "$arg"; then pic_object= non_pic_object= # Read the .lo file func_source "$arg" if test -z "$pic_object" || test -z "$non_pic_object" || test none = "$pic_object" && test none = "$non_pic_object"; then func_fatal_error "cannot find name of object for '$arg'" fi # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir=$func_dirname_result test none = "$pic_object" || { # Prepend the subdirectory the object is found in. pic_object=$xdir$pic_object if test dlfiles = "$prev"; then if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then func_append dlfiles " $pic_object" prev= continue else # If libtool objects are unsupported, then we need to preload. prev=dlprefiles fi fi # CHECK ME: I think I busted this. -Ossama if test dlprefiles = "$prev"; then # Preload the old-style object. func_append dlprefiles " $pic_object" prev= fi # A PIC object. func_append libobjs " $pic_object" arg=$pic_object } # Non-PIC object. if test none != "$non_pic_object"; then # Prepend the subdirectory the object is found in. non_pic_object=$xdir$non_pic_object # A standard non-PIC object func_append non_pic_objects " $non_pic_object" if test -z "$pic_object" || test none = "$pic_object"; then arg=$non_pic_object fi else # If the PIC object exists, use it instead. # $xdir was prepended to $pic_object above. non_pic_object=$pic_object func_append non_pic_objects " $non_pic_object" fi else # Only an error if not doing a dry-run. if $opt_dry_run; then # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir=$func_dirname_result func_lo2o "$arg" pic_object=$xdir$objdir/$func_lo2o_result non_pic_object=$xdir$func_lo2o_result func_append libobjs " $pic_object" func_append non_pic_objects " $non_pic_object" else func_fatal_error "'$arg' is not a valid libtool object" fi fi ;; *.$libext) # An archive. func_append deplibs " $arg" func_append old_deplibs " $arg" continue ;; *.la) # A libtool-controlled library. func_resolve_sysroot "$arg" if test dlfiles = "$prev"; then # This library was specified with -dlopen. func_append dlfiles " $func_resolve_sysroot_result" prev= elif test dlprefiles = "$prev"; then # The library was specified with -dlpreopen. func_append dlprefiles " $func_resolve_sysroot_result" prev= else func_append deplibs " $func_resolve_sysroot_result" fi continue ;; # Some other compiler argument. *) # Unknown arguments in both finalize_command and compile_command need # to be aesthetically quoted because they are evaled later. func_quote_arg pretty "$arg" arg=$func_quote_arg_result ;; esac # arg # Now actually substitute the argument into the commands. if test -n "$arg"; then func_append compile_command " $arg" func_append finalize_command " $arg" fi done # argument parsing loop test -n "$prev" && \ func_fatal_help "the '$prevarg' option requires an argument" if test yes = "$export_dynamic" && test -n "$export_dynamic_flag_spec"; then eval arg=\"$export_dynamic_flag_spec\" func_append compile_command " $arg" func_append finalize_command " $arg" fi oldlibs= # calculate the name of the file, without its directory func_basename "$output" outputname=$func_basename_result libobjs_save=$libobjs if test -n "$shlibpath_var"; then # get the directories listed in $shlibpath_var eval shlib_search_path=\`\$ECHO \"\$$shlibpath_var\" \| \$SED \'s/:/ /g\'\` else shlib_search_path= fi eval sys_lib_search_path=\"$sys_lib_search_path_spec\" eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" # Definition is injected by LT_CONFIG during libtool generation. func_munge_path_list sys_lib_dlsearch_path "$LT_SYS_LIBRARY_PATH" func_dirname "$output" "/" "" output_objdir=$func_dirname_result$objdir func_to_tool_file "$output_objdir/" tool_output_objdir=$func_to_tool_file_result # Create the object directory. func_mkdir_p "$output_objdir" # Determine the type of output case $output in "") func_fatal_help "you must specify an output file" ;; *.$libext) linkmode=oldlib ;; *.lo | *.$objext) linkmode=obj ;; *.la) linkmode=lib ;; *) linkmode=prog ;; # Anything else should be a program. esac specialdeplibs= libs= # Find all interdependent deplibs by searching for libraries # that are linked more than once (e.g. -la -lb -la) for deplib in $deplibs; do if $opt_preserve_dup_deps; then case "$libs " in *" $deplib "*) func_append specialdeplibs " $deplib" ;; esac fi func_append libs " $deplib" done if test lib = "$linkmode"; then libs="$predeps $libs $compiler_lib_search_path $postdeps" # Compute libraries that are listed more than once in $predeps # $postdeps and mark them as special (i.e., whose duplicates are # not to be eliminated). pre_post_deps= if $opt_duplicate_compiler_generated_deps; then for pre_post_dep in $predeps $postdeps; do case "$pre_post_deps " in *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; esac func_append pre_post_deps " $pre_post_dep" done fi pre_post_deps= fi deplibs= newdependency_libs= newlib_search_path= need_relink=no # whether we're linking any uninstalled libtool libraries notinst_deplibs= # not-installed libtool libraries notinst_path= # paths that contain not-installed libtool libraries case $linkmode in lib) passes="conv dlpreopen link" for file in $dlfiles $dlprefiles; do case $file in *.la) ;; *) func_fatal_help "libraries can '-dlopen' only libtool libraries: $file" ;; esac done ;; prog) compile_deplibs= finalize_deplibs= alldeplibs=false newdlfiles= newdlprefiles= passes="conv scan dlopen dlpreopen link" ;; *) passes="conv" ;; esac for pass in $passes; do # The preopen pass in lib mode reverses $deplibs; put it back here # so that -L comes before libs that need it for instance... if test lib,link = "$linkmode,$pass"; then ## FIXME: Find the place where the list is rebuilt in the wrong ## order, and fix it there properly tmp_deplibs= for deplib in $deplibs; do tmp_deplibs="$deplib $tmp_deplibs" done deplibs=$tmp_deplibs fi if test lib,link = "$linkmode,$pass" || test prog,scan = "$linkmode,$pass"; then libs=$deplibs deplibs= fi if test prog = "$linkmode"; then case $pass in dlopen) libs=$dlfiles ;; dlpreopen) libs=$dlprefiles ;; link) libs="$deplibs %DEPLIBS%" test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs" ;; esac fi if test lib,dlpreopen = "$linkmode,$pass"; then # Collect and forward deplibs of preopened libtool libs for lib in $dlprefiles; do # Ignore non-libtool-libs dependency_libs= func_resolve_sysroot "$lib" case $lib in *.la) func_source "$func_resolve_sysroot_result" ;; esac # Collect preopened libtool deplibs, except any this library # has declared as weak libs for deplib in $dependency_libs; do func_basename "$deplib" deplib_base=$func_basename_result case " $weak_libs " in *" $deplib_base "*) ;; *) func_append deplibs " $deplib" ;; esac done done libs=$dlprefiles fi if test dlopen = "$pass"; then # Collect dlpreopened libraries save_deplibs=$deplibs deplibs= fi for deplib in $libs; do lib= found=false case $deplib in -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) if test prog,link = "$linkmode,$pass"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else func_append compiler_flags " $deplib" if test lib = "$linkmode"; then case "$new_inherited_linker_flags " in *" $deplib "*) ;; * ) func_append new_inherited_linker_flags " $deplib" ;; esac fi fi continue ;; -l*) if test lib != "$linkmode" && test prog != "$linkmode"; then func_warning "'-l' is ignored for archives/objects" continue fi func_stripname '-l' '' "$deplib" name=$func_stripname_result if test lib = "$linkmode"; then searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" else searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" fi for searchdir in $searchdirs; do for search_ext in .la $std_shrext .so .a; do # Search the libtool library lib=$searchdir/lib$name$search_ext if test -f "$lib"; then if test .la = "$search_ext"; then found=: else found=false fi break 2 fi done done if $found; then # deplib is a libtool library # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, # We need to do some special things here, and not later. if test yes = "$allow_libtool_libs_with_static_runtimes"; then case " $predeps $postdeps " in *" $deplib "*) if func_lalib_p "$lib"; then library_names= old_library= func_source "$lib" for l in $old_library $library_names; do ll=$l done if test "X$ll" = "X$old_library"; then # only static version available found=false func_dirname "$lib" "" "." ladir=$func_dirname_result lib=$ladir/$old_library if test prog,link = "$linkmode,$pass"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" fi continue fi fi ;; *) ;; esac fi else # deplib doesn't seem to be a libtool library if test prog,link = "$linkmode,$pass"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" fi continue fi ;; # -l *.ltframework) if test prog,link = "$linkmode,$pass"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" if test lib = "$linkmode"; then case "$new_inherited_linker_flags " in *" $deplib "*) ;; * ) func_append new_inherited_linker_flags " $deplib" ;; esac fi fi continue ;; -L*) case $linkmode in lib) deplibs="$deplib $deplibs" test conv = "$pass" && continue newdependency_libs="$deplib $newdependency_libs" func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result" func_append newlib_search_path " $func_resolve_sysroot_result" ;; prog) if test conv = "$pass"; then deplibs="$deplib $deplibs" continue fi if test scan = "$pass"; then deplibs="$deplib $deplibs" else compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" fi func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result" func_append newlib_search_path " $func_resolve_sysroot_result" ;; *) func_warning "'-L' is ignored for archives/objects" ;; esac # linkmode continue ;; # -L -R*) if test link = "$pass"; then func_stripname '-R' '' "$deplib" func_resolve_sysroot "$func_stripname_result" dir=$func_resolve_sysroot_result # Make sure the xrpath contains only unique directories. case "$xrpath " in *" $dir "*) ;; *) func_append xrpath " $dir" ;; esac fi deplibs="$deplib $deplibs" continue ;; *.la) func_resolve_sysroot "$deplib" lib=$func_resolve_sysroot_result ;; *.$libext) if test conv = "$pass"; then deplibs="$deplib $deplibs" continue fi case $linkmode in lib) # Linking convenience modules into shared libraries is allowed, # but linking other static libraries is non-portable. case " $dlpreconveniencelibs " in *" $deplib "*) ;; *) valid_a_lib=false case $deplibs_check_method in match_pattern*) set dummy $deplibs_check_method; shift match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ | $EGREP "$match_pattern_regex" > /dev/null; then valid_a_lib=: fi ;; pass_all) valid_a_lib=: ;; esac if $valid_a_lib; then echo $ECHO "*** Warning: Linking the shared library $output against the" $ECHO "*** static library $deplib is not portable!" deplibs="$deplib $deplibs" else echo $ECHO "*** Warning: Trying to link with static lib archive $deplib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have" echo "*** because the file extensions .$libext of this argument makes me believe" echo "*** that it is just a static archive that I should not use here." fi ;; esac continue ;; prog) if test link != "$pass"; then deplibs="$deplib $deplibs" else compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" fi continue ;; esac # linkmode ;; # *.$libext *.lo | *.$objext) if test conv = "$pass"; then deplibs="$deplib $deplibs" elif test prog = "$linkmode"; then if test dlpreopen = "$pass" || test yes != "$dlopen_support" || test no = "$build_libtool_libs"; then # If there is no dlopen support or we're linking statically, # we need to preload. func_append newdlprefiles " $deplib" compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else func_append newdlfiles " $deplib" fi fi continue ;; %DEPLIBS%) alldeplibs=: continue ;; esac # case $deplib $found || test -f "$lib" \ || func_fatal_error "cannot find the library '$lib' or unhandled argument '$deplib'" # Check to see that this really is a libtool archive. func_lalib_unsafe_p "$lib" \ || func_fatal_error "'$lib' is not a valid libtool archive" func_dirname "$lib" "" "." ladir=$func_dirname_result dlname= dlopen= dlpreopen= libdir= library_names= old_library= inherited_linker_flags= # If the library was installed with an old release of libtool, # it will not redefine variables installed, or shouldnotlink installed=yes shouldnotlink=no avoidtemprpath= # Read the .la file func_source "$lib" # Convert "-framework foo" to "foo.ltframework" if test -n "$inherited_linker_flags"; then tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do case " $new_inherited_linker_flags " in *" $tmp_inherited_linker_flag "*) ;; *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; esac done fi dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` if test lib,link = "$linkmode,$pass" || test prog,scan = "$linkmode,$pass" || { test prog != "$linkmode" && test lib != "$linkmode"; }; then test -n "$dlopen" && func_append dlfiles " $dlopen" test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" fi if test conv = "$pass"; then # Only check for convenience libraries deplibs="$lib $deplibs" if test -z "$libdir"; then if test -z "$old_library"; then func_fatal_error "cannot find name of link library for '$lib'" fi # It is a libtool convenience library, so add in its objects. func_append convenience " $ladir/$objdir/$old_library" func_append old_convenience " $ladir/$objdir/$old_library" tmp_libs= for deplib in $dependency_libs; do deplibs="$deplib $deplibs" if $opt_preserve_dup_deps; then case "$tmp_libs " in *" $deplib "*) func_append specialdeplibs " $deplib" ;; esac fi func_append tmp_libs " $deplib" done elif test prog != "$linkmode" && test lib != "$linkmode"; then func_fatal_error "'$lib' is not a convenience library" fi continue fi # $pass = conv # Get the name of the library we link against. linklib= if test -n "$old_library" && { test yes = "$prefer_static_libs" || test built,no = "$prefer_static_libs,$installed"; }; then linklib=$old_library else for l in $old_library $library_names; do linklib=$l done fi if test -z "$linklib"; then func_fatal_error "cannot find name of link library for '$lib'" fi # This library was specified with -dlopen. if test dlopen = "$pass"; then test -z "$libdir" \ && func_fatal_error "cannot -dlopen a convenience library: '$lib'" if test -z "$dlname" || test yes != "$dlopen_support" || test no = "$build_libtool_libs" then # If there is no dlname, no dlopen support or we're linking # statically, we need to preload. We also need to preload any # dependent libraries so libltdl's deplib preloader doesn't # bomb out in the load deplibs phase. func_append dlprefiles " $lib $dependency_libs" else func_append newdlfiles " $lib" fi continue fi # $pass = dlopen # We need an absolute path. case $ladir in [\\/]* | [A-Za-z]:[\\/]*) abs_ladir=$ladir ;; *) abs_ladir=`cd "$ladir" && pwd` if test -z "$abs_ladir"; then func_warning "cannot determine absolute directory name of '$ladir'" func_warning "passing it literally to the linker, although it might fail" abs_ladir=$ladir fi ;; esac func_basename "$lib" laname=$func_basename_result # Find the relevant object directory and library name. if test yes = "$installed"; then if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then func_warning "library '$lib' was moved." dir=$ladir absdir=$abs_ladir libdir=$abs_ladir else dir=$lt_sysroot$libdir absdir=$lt_sysroot$libdir fi test yes = "$hardcode_automatic" && avoidtemprpath=yes else if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then dir=$ladir absdir=$abs_ladir # Remove this search path later func_append notinst_path " $abs_ladir" else dir=$ladir/$objdir absdir=$abs_ladir/$objdir # Remove this search path later func_append notinst_path " $abs_ladir" fi fi # $installed = yes func_stripname 'lib' '.la' "$laname" name=$func_stripname_result # This library was specified with -dlpreopen. if test dlpreopen = "$pass"; then if test -z "$libdir" && test prog = "$linkmode"; then func_fatal_error "only libraries may -dlpreopen a convenience library: '$lib'" fi case $host in # special handling for platforms with PE-DLLs. *cygwin* | *mingw* | *cegcc* ) # Linker will automatically link against shared library if both # static and shared are present. Therefore, ensure we extract # symbols from the import library if a shared library is present # (otherwise, the dlopen module name will be incorrect). We do # this by putting the import library name into $newdlprefiles. # We recover the dlopen module name by 'saving' the la file # name in a special purpose variable, and (later) extracting the # dlname from the la file. if test -n "$dlname"; then func_tr_sh "$dir/$linklib" eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" func_append newdlprefiles " $dir/$linklib" else func_append newdlprefiles " $dir/$old_library" # Keep a list of preopened convenience libraries to check # that they are being used correctly in the link pass. test -z "$libdir" && \ func_append dlpreconveniencelibs " $dir/$old_library" fi ;; * ) # Prefer using a static library (so that no silly _DYNAMIC symbols # are required to link). if test -n "$old_library"; then func_append newdlprefiles " $dir/$old_library" # Keep a list of preopened convenience libraries to check # that they are being used correctly in the link pass. test -z "$libdir" && \ func_append dlpreconveniencelibs " $dir/$old_library" # Otherwise, use the dlname, so that lt_dlopen finds it. elif test -n "$dlname"; then func_append newdlprefiles " $dir/$dlname" else func_append newdlprefiles " $dir/$linklib" fi ;; esac fi # $pass = dlpreopen if test -z "$libdir"; then # Link the convenience library if test lib = "$linkmode"; then deplibs="$dir/$old_library $deplibs" elif test prog,link = "$linkmode,$pass"; then compile_deplibs="$dir/$old_library $compile_deplibs" finalize_deplibs="$dir/$old_library $finalize_deplibs" else deplibs="$lib $deplibs" # used for prog,scan pass fi continue fi if test prog = "$linkmode" && test link != "$pass"; then func_append newlib_search_path " $ladir" deplibs="$lib $deplibs" linkalldeplibs=false if test no != "$link_all_deplibs" || test -z "$library_names" || test no = "$build_libtool_libs"; then linkalldeplibs=: fi tmp_libs= for deplib in $dependency_libs; do case $deplib in -L*) func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result" func_append newlib_search_path " $func_resolve_sysroot_result" ;; esac # Need to link against all dependency_libs? if $linkalldeplibs; then deplibs="$deplib $deplibs" else # Need to hardcode shared library paths # or/and link against static libraries newdependency_libs="$deplib $newdependency_libs" fi if $opt_preserve_dup_deps; then case "$tmp_libs " in *" $deplib "*) func_append specialdeplibs " $deplib" ;; esac fi func_append tmp_libs " $deplib" done # for deplib continue fi # $linkmode = prog... if test prog,link = "$linkmode,$pass"; then if test -n "$library_names" && { { test no = "$prefer_static_libs" || test built,yes = "$prefer_static_libs,$installed"; } || test -z "$old_library"; }; then # We need to hardcode the library path if test -n "$shlibpath_var" && test -z "$avoidtemprpath"; then # Make sure the rpath contains only unique directories. case $temp_rpath: in *"$absdir:"*) ;; *) func_append temp_rpath "$absdir:" ;; esac fi # Hardcode the library path. # Skip directories that are in the system default run-time # search path. case " $sys_lib_dlsearch_path " in *" $absdir "*) ;; *) case "$compile_rpath " in *" $absdir "*) ;; *) func_append compile_rpath " $absdir" ;; esac ;; esac case " $sys_lib_dlsearch_path " in *" $libdir "*) ;; *) case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac ;; esac fi # $linkmode,$pass = prog,link... if $alldeplibs && { test pass_all = "$deplibs_check_method" || { test yes = "$build_libtool_libs" && test -n "$library_names"; }; }; then # We only need to search for static libraries continue fi fi link_static=no # Whether the deplib will be linked statically use_static_libs=$prefer_static_libs if test built = "$use_static_libs" && test yes = "$installed"; then use_static_libs=no fi if test -n "$library_names" && { test no = "$use_static_libs" || test -z "$old_library"; }; then case $host in *cygwin* | *mingw* | *cegcc* | *os2*) # No point in relinking DLLs because paths are not encoded func_append notinst_deplibs " $lib" need_relink=no ;; *) if test no = "$installed"; then func_append notinst_deplibs " $lib" need_relink=yes fi ;; esac # This is a shared library # Warn about portability, can't link against -module's on some # systems (darwin). Don't bleat about dlopened modules though! dlopenmodule= for dlpremoduletest in $dlprefiles; do if test "X$dlpremoduletest" = "X$lib"; then dlopenmodule=$dlpremoduletest break fi done if test -z "$dlopenmodule" && test yes = "$shouldnotlink" && test link = "$pass"; then echo if test prog = "$linkmode"; then $ECHO "*** Warning: Linking the executable $output against the loadable module" else $ECHO "*** Warning: Linking the shared library $output against the loadable module" fi $ECHO "*** $linklib is not portable!" fi if test lib = "$linkmode" && test yes = "$hardcode_into_libs"; then # Hardcode the library path. # Skip directories that are in the system default run-time # search path. case " $sys_lib_dlsearch_path " in *" $absdir "*) ;; *) case "$compile_rpath " in *" $absdir "*) ;; *) func_append compile_rpath " $absdir" ;; esac ;; esac case " $sys_lib_dlsearch_path " in *" $libdir "*) ;; *) case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac ;; esac fi if test -n "$old_archive_from_expsyms_cmds"; then # figure out the soname set dummy $library_names shift realname=$1 shift libname=`eval "\\$ECHO \"$libname_spec\""` # use dlname if we got it. it's perfectly good, no? if test -n "$dlname"; then soname=$dlname elif test -n "$soname_spec"; then # bleh windows case $host in *cygwin* | mingw* | *cegcc* | *os2*) func_arith $current - $age major=$func_arith_result versuffix=-$major ;; esac eval soname=\"$soname_spec\" else soname=$realname fi # Make a new name for the extract_expsyms_cmds to use soroot=$soname func_basename "$soroot" soname=$func_basename_result func_stripname 'lib' '.dll' "$soname" newlib=libimp-$func_stripname_result.a # If the library has no export list, then create one now if test -f "$output_objdir/$soname-def"; then : else func_verbose "extracting exported symbol list from '$soname'" func_execute_cmds "$extract_expsyms_cmds" 'exit $?' fi # Create $newlib if test -f "$output_objdir/$newlib"; then :; else func_verbose "generating import library for '$soname'" func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' fi # make sure the library variables are pointing to the new library dir=$output_objdir linklib=$newlib fi # test -n "$old_archive_from_expsyms_cmds" if test prog = "$linkmode" || test relink != "$opt_mode"; then add_shlibpath= add_dir= add= lib_linked=yes case $hardcode_action in immediate | unsupported) if test no = "$hardcode_direct"; then add=$dir/$linklib case $host in *-*-sco3.2v5.0.[024]*) add_dir=-L$dir ;; *-*-sysv4*uw2*) add_dir=-L$dir ;; *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ *-*-unixware7*) add_dir=-L$dir ;; *-*-darwin* ) # if the lib is a (non-dlopened) module then we cannot # link against it, someone is ignoring the earlier warnings if /usr/bin/file -L $add 2> /dev/null | $GREP ": [^:]* bundle" >/dev/null; then if test "X$dlopenmodule" != "X$lib"; then $ECHO "*** Warning: lib $linklib is a module, not a shared library" if test -z "$old_library"; then echo echo "*** And there doesn't seem to be a static archive available" echo "*** The link will probably fail, sorry" else add=$dir/$old_library fi elif test -n "$old_library"; then add=$dir/$old_library fi fi esac elif test no = "$hardcode_minus_L"; then case $host in *-*-sunos*) add_shlibpath=$dir ;; esac add_dir=-L$dir add=-l$name elif test no = "$hardcode_shlibpath_var"; then add_shlibpath=$dir add=-l$name else lib_linked=no fi ;; relink) if test yes = "$hardcode_direct" && test no = "$hardcode_direct_absolute"; then add=$dir/$linklib elif test yes = "$hardcode_minus_L"; then add_dir=-L$absdir # Try looking first in the location we're being installed to. if test -n "$inst_prefix_dir"; then case $libdir in [\\/]*) func_append add_dir " -L$inst_prefix_dir$libdir" ;; esac fi add=-l$name elif test yes = "$hardcode_shlibpath_var"; then add_shlibpath=$dir add=-l$name else lib_linked=no fi ;; *) lib_linked=no ;; esac if test yes != "$lib_linked"; then func_fatal_configuration "unsupported hardcode properties" fi if test -n "$add_shlibpath"; then case :$compile_shlibpath: in *":$add_shlibpath:"*) ;; *) func_append compile_shlibpath "$add_shlibpath:" ;; esac fi if test prog = "$linkmode"; then test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" test -n "$add" && compile_deplibs="$add $compile_deplibs" else test -n "$add_dir" && deplibs="$add_dir $deplibs" test -n "$add" && deplibs="$add $deplibs" if test yes != "$hardcode_direct" && test yes != "$hardcode_minus_L" && test yes = "$hardcode_shlibpath_var"; then case :$finalize_shlibpath: in *":$libdir:"*) ;; *) func_append finalize_shlibpath "$libdir:" ;; esac fi fi fi if test prog = "$linkmode" || test relink = "$opt_mode"; then add_shlibpath= add_dir= add= # Finalize command for both is simple: just hardcode it. if test yes = "$hardcode_direct" && test no = "$hardcode_direct_absolute"; then add=$libdir/$linklib elif test yes = "$hardcode_minus_L"; then add_dir=-L$libdir add=-l$name elif test yes = "$hardcode_shlibpath_var"; then case :$finalize_shlibpath: in *":$libdir:"*) ;; *) func_append finalize_shlibpath "$libdir:" ;; esac add=-l$name elif test yes = "$hardcode_automatic"; then if test -n "$inst_prefix_dir" && test -f "$inst_prefix_dir$libdir/$linklib"; then add=$inst_prefix_dir$libdir/$linklib else add=$libdir/$linklib fi else # We cannot seem to hardcode it, guess we'll fake it. add_dir=-L$libdir # Try looking first in the location we're being installed to. if test -n "$inst_prefix_dir"; then case $libdir in [\\/]*) func_append add_dir " -L$inst_prefix_dir$libdir" ;; esac fi add=-l$name fi if test prog = "$linkmode"; then test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" test -n "$add" && finalize_deplibs="$add $finalize_deplibs" else test -n "$add_dir" && deplibs="$add_dir $deplibs" test -n "$add" && deplibs="$add $deplibs" fi fi elif test prog = "$linkmode"; then # Here we assume that one of hardcode_direct or hardcode_minus_L # is not unsupported. This is valid on all known static and # shared platforms. if test unsupported != "$hardcode_direct"; then test -n "$old_library" && linklib=$old_library compile_deplibs="$dir/$linklib $compile_deplibs" finalize_deplibs="$dir/$linklib $finalize_deplibs" else compile_deplibs="-l$name -L$dir $compile_deplibs" finalize_deplibs="-l$name -L$dir $finalize_deplibs" fi elif test yes = "$build_libtool_libs"; then # Not a shared library if test pass_all != "$deplibs_check_method"; then # We're trying link a shared library against a static one # but the system doesn't support it. # Just print a warning and add the library to dependency_libs so # that the program can be linked against the static library. echo $ECHO "*** Warning: This system cannot link to static lib archive $lib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have." if test yes = "$module"; then echo "*** But as you try to build a module library, libtool will still create " echo "*** a static module, that should work as long as the dlopening application" echo "*** is linked with the -dlopen flag to resolve symbols at runtime." if test -z "$global_symbol_pipe"; then echo echo "*** However, this would only work if libtool was able to extract symbol" echo "*** lists from a program, using 'nm' or equivalent, but libtool could" echo "*** not find such a program. So, this module is probably useless." echo "*** 'nm' from GNU binutils and a full rebuild may help." fi if test no = "$build_old_libs"; then build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi fi else deplibs="$dir/$old_library $deplibs" link_static=yes fi fi # link shared/static library? if test lib = "$linkmode"; then if test -n "$dependency_libs" && { test yes != "$hardcode_into_libs" || test yes = "$build_old_libs" || test yes = "$link_static"; }; then # Extract -R from dependency_libs temp_deplibs= for libdir in $dependency_libs; do case $libdir in -R*) func_stripname '-R' '' "$libdir" temp_xrpath=$func_stripname_result case " $xrpath " in *" $temp_xrpath "*) ;; *) func_append xrpath " $temp_xrpath";; esac;; *) func_append temp_deplibs " $libdir";; esac done dependency_libs=$temp_deplibs fi func_append newlib_search_path " $absdir" # Link against this library test no = "$link_static" && newdependency_libs="$abs_ladir/$laname $newdependency_libs" # ... and its dependency_libs tmp_libs= for deplib in $dependency_libs; do newdependency_libs="$deplib $newdependency_libs" case $deplib in -L*) func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result";; *) func_resolve_sysroot "$deplib" ;; esac if $opt_preserve_dup_deps; then case "$tmp_libs " in *" $func_resolve_sysroot_result "*) func_append specialdeplibs " $func_resolve_sysroot_result" ;; esac fi func_append tmp_libs " $func_resolve_sysroot_result" done if test no != "$link_all_deplibs"; then # Add the search paths of all dependency libraries for deplib in $dependency_libs; do path= case $deplib in -L*) path=$deplib ;; *.la) func_resolve_sysroot "$deplib" deplib=$func_resolve_sysroot_result func_dirname "$deplib" "" "." dir=$func_dirname_result # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) absdir=$dir ;; *) absdir=`cd "$dir" && pwd` if test -z "$absdir"; then func_warning "cannot determine absolute directory name of '$dir'" absdir=$dir fi ;; esac if $GREP "^installed=no" $deplib > /dev/null; then case $host in *-*-darwin*) depdepl= eval deplibrary_names=`$SED -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` if test -n "$deplibrary_names"; then for tmp in $deplibrary_names; do depdepl=$tmp done if test -f "$absdir/$objdir/$depdepl"; then depdepl=$absdir/$objdir/$depdepl darwin_install_name=`$OTOOL -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` if test -z "$darwin_install_name"; then darwin_install_name=`$OTOOL64 -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` fi func_append compiler_flags " $wl-dylib_file $wl$darwin_install_name:$depdepl" func_append linker_flags " -dylib_file $darwin_install_name:$depdepl" path= fi fi ;; *) path=-L$absdir/$objdir ;; esac else eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` test -z "$libdir" && \ func_fatal_error "'$deplib' is not a valid libtool archive" test "$absdir" != "$libdir" && \ func_warning "'$deplib' seems to be moved" path=-L$absdir fi ;; esac case " $deplibs " in *" $path "*) ;; *) deplibs="$path $deplibs" ;; esac done fi # link_all_deplibs != no fi # linkmode = lib done # for deplib in $libs if test link = "$pass"; then if test prog = "$linkmode"; then compile_deplibs="$new_inherited_linker_flags $compile_deplibs" finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" else compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` fi fi dependency_libs=$newdependency_libs if test dlpreopen = "$pass"; then # Link the dlpreopened libraries before other libraries for deplib in $save_deplibs; do deplibs="$deplib $deplibs" done fi if test dlopen != "$pass"; then test conv = "$pass" || { # Make sure lib_search_path contains only unique directories. lib_search_path= for dir in $newlib_search_path; do case "$lib_search_path " in *" $dir "*) ;; *) func_append lib_search_path " $dir" ;; esac done newlib_search_path= } if test prog,link = "$linkmode,$pass"; then vars="compile_deplibs finalize_deplibs" else vars=deplibs fi for var in $vars dependency_libs; do # Add libraries to $var in reverse order eval tmp_libs=\"\$$var\" new_libs= for deplib in $tmp_libs; do # FIXME: Pedantically, this is the right thing to do, so # that some nasty dependency loop isn't accidentally # broken: #new_libs="$deplib $new_libs" # Pragmatically, this seems to cause very few problems in # practice: case $deplib in -L*) new_libs="$deplib $new_libs" ;; -R*) ;; *) # And here is the reason: when a library appears more # than once as an explicit dependence of a library, or # is implicitly linked in more than once by the # compiler, it is considered special, and multiple # occurrences thereof are not removed. Compare this # with having the same library being listed as a # dependency of multiple other libraries: in this case, # we know (pedantically, we assume) the library does not # need to be listed more than once, so we keep only the # last copy. This is not always right, but it is rare # enough that we require users that really mean to play # such unportable linking tricks to link the library # using -Wl,-lname, so that libtool does not consider it # for duplicate removal. case " $specialdeplibs " in *" $deplib "*) new_libs="$deplib $new_libs" ;; *) case " $new_libs " in *" $deplib "*) ;; *) new_libs="$deplib $new_libs" ;; esac ;; esac ;; esac done tmp_libs= for deplib in $new_libs; do case $deplib in -L*) case " $tmp_libs " in *" $deplib "*) ;; *) func_append tmp_libs " $deplib" ;; esac ;; *) func_append tmp_libs " $deplib" ;; esac done eval $var=\"$tmp_libs\" done # for var fi # Add Sun CC postdeps if required: test CXX = "$tagname" && { case $host_os in linux*) case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C++ 5.9 func_suncc_cstd_abi if test no != "$suncc_use_cstd_abi"; then func_append postdeps ' -library=Cstd -library=Crun' fi ;; esac ;; solaris*) func_cc_basename "$CC" case $func_cc_basename_result in CC* | sunCC*) func_suncc_cstd_abi if test no != "$suncc_use_cstd_abi"; then func_append postdeps ' -library=Cstd -library=Crun' fi ;; esac ;; esac } # Last step: remove runtime libs from dependency_libs # (they stay in deplibs) tmp_libs= for i in $dependency_libs; do case " $predeps $postdeps $compiler_lib_search_path " in *" $i "*) i= ;; esac if test -n "$i"; then func_append tmp_libs " $i" fi done dependency_libs=$tmp_libs done # for pass if test prog = "$linkmode"; then dlfiles=$newdlfiles fi if test prog = "$linkmode" || test lib = "$linkmode"; then dlprefiles=$newdlprefiles fi case $linkmode in oldlib) if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then func_warning "'-dlopen' is ignored for archives" fi case " $deplibs" in *\ -l* | *\ -L*) func_warning "'-l' and '-L' are ignored for archives" ;; esac test -n "$rpath" && \ func_warning "'-rpath' is ignored for archives" test -n "$xrpath" && \ func_warning "'-R' is ignored for archives" test -n "$vinfo" && \ func_warning "'-version-info/-version-number' is ignored for archives" test -n "$release" && \ func_warning "'-release' is ignored for archives" test -n "$export_symbols$export_symbols_regex" && \ func_warning "'-export-symbols' is ignored for archives" # Now set the variables for building old libraries. build_libtool_libs=no oldlibs=$output func_append objs "$old_deplibs" ;; lib) # Make sure we only generate libraries of the form 'libNAME.la'. case $outputname in lib*) func_stripname 'lib' '.la' "$outputname" name=$func_stripname_result eval shared_ext=\"$shrext_cmds\" eval libname=\"$libname_spec\" ;; *) test no = "$module" \ && func_fatal_help "libtool library '$output' must begin with 'lib'" if test no != "$need_lib_prefix"; then # Add the "lib" prefix for modules if required func_stripname '' '.la' "$outputname" name=$func_stripname_result eval shared_ext=\"$shrext_cmds\" eval libname=\"$libname_spec\" else func_stripname '' '.la' "$outputname" libname=$func_stripname_result fi ;; esac if test -n "$objs"; then if test pass_all != "$deplibs_check_method"; then func_fatal_error "cannot build libtool library '$output' from non-libtool objects on this host:$objs" else echo $ECHO "*** Warning: Linking the shared library $output against the non-libtool" $ECHO "*** objects $objs is not portable!" func_append libobjs " $objs" fi fi test no = "$dlself" \ || func_warning "'-dlopen self' is ignored for libtool libraries" set dummy $rpath shift test 1 -lt "$#" \ && func_warning "ignoring multiple '-rpath's for a libtool library" install_libdir=$1 oldlibs= if test -z "$rpath"; then if test yes = "$build_libtool_libs"; then # Building a libtool convenience library. # Some compilers have problems with a '.al' extension so # convenience libraries should have the same extension an # archive normally would. oldlibs="$output_objdir/$libname.$libext $oldlibs" build_libtool_libs=convenience build_old_libs=yes fi test -n "$vinfo" && \ func_warning "'-version-info/-version-number' is ignored for convenience libraries" test -n "$release" && \ func_warning "'-release' is ignored for convenience libraries" else # Parse the version information argument. save_ifs=$IFS; IFS=: set dummy $vinfo 0 0 0 shift IFS=$save_ifs test -n "$7" && \ func_fatal_help "too many parameters to '-version-info'" # convert absolute version numbers to libtool ages # this retains compatibility with .la files and attempts # to make the code below a bit more comprehensible case $vinfo_number in yes) number_major=$1 number_minor=$2 number_revision=$3 # # There are really only two kinds -- those that # use the current revision as the major version # and those that subtract age and use age as # a minor version. But, then there is irix # that has an extra 1 added just for fun # case $version_type in # correct linux to gnu/linux during the next big refactor darwin|freebsd-elf|linux|midnightbsd-elf|osf|windows|none) func_arith $number_major + $number_minor current=$func_arith_result age=$number_minor revision=$number_revision ;; freebsd-aout|qnx|sunos) current=$number_major revision=$number_minor age=0 ;; irix|nonstopux) func_arith $number_major + $number_minor current=$func_arith_result age=$number_minor revision=$number_minor lt_irix_increment=no ;; *) func_fatal_configuration "$modename: unknown library version type '$version_type'" ;; esac ;; no) current=$1 revision=$2 age=$3 ;; esac # Check that each of the things are valid numbers. case $current in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) func_error "CURRENT '$current' must be a nonnegative integer" func_fatal_error "'$vinfo' is not valid version information" ;; esac case $revision in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) func_error "REVISION '$revision' must be a nonnegative integer" func_fatal_error "'$vinfo' is not valid version information" ;; esac case $age in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) func_error "AGE '$age' must be a nonnegative integer" func_fatal_error "'$vinfo' is not valid version information" ;; esac if test "$age" -gt "$current"; then func_error "AGE '$age' is greater than the current interface number '$current'" func_fatal_error "'$vinfo' is not valid version information" fi # Calculate the version variables. major= versuffix= verstring= case $version_type in none) ;; darwin) # Like Linux, but with the current version available in # verstring for coding it into the library header func_arith $current - $age major=.$func_arith_result versuffix=$major.$age.$revision # Darwin ld doesn't like 0 for these options... func_arith $current + 1 minor_current=$func_arith_result xlcverstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" # On Darwin other compilers case $CC in nagfor*) verstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" ;; *) verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" ;; esac ;; freebsd-aout) major=.$current versuffix=.$current.$revision ;; freebsd-elf | midnightbsd-elf) func_arith $current - $age major=.$func_arith_result versuffix=$major.$age.$revision ;; irix | nonstopux) if test no = "$lt_irix_increment"; then func_arith $current - $age else func_arith $current - $age + 1 fi major=$func_arith_result case $version_type in nonstopux) verstring_prefix=nonstopux ;; *) verstring_prefix=sgi ;; esac verstring=$verstring_prefix$major.$revision # Add in all the interfaces that we are compatible with. loop=$revision while test 0 -ne "$loop"; do func_arith $revision - $loop iface=$func_arith_result func_arith $loop - 1 loop=$func_arith_result verstring=$verstring_prefix$major.$iface:$verstring done # Before this point, $major must not contain '.'. major=.$major versuffix=$major.$revision ;; linux) # correct to gnu/linux during the next big refactor func_arith $current - $age major=.$func_arith_result versuffix=$major.$age.$revision ;; osf) func_arith $current - $age major=.$func_arith_result versuffix=.$current.$age.$revision verstring=$current.$age.$revision # Add in all the interfaces that we are compatible with. loop=$age while test 0 -ne "$loop"; do func_arith $current - $loop iface=$func_arith_result func_arith $loop - 1 loop=$func_arith_result verstring=$verstring:$iface.0 done # Make executables depend on our current version. func_append verstring ":$current.0" ;; qnx) major=.$current versuffix=.$current ;; sco) major=.$current versuffix=.$current ;; sunos) major=.$current versuffix=.$current.$revision ;; windows) # Use '-' rather than '.', since we only want one # extension on DOS 8.3 file systems. func_arith $current - $age major=$func_arith_result versuffix=-$major ;; *) func_fatal_configuration "unknown library version type '$version_type'" ;; esac # Clear the version info if we defaulted, and they specified a release. if test -z "$vinfo" && test -n "$release"; then major= case $version_type in darwin) # we can't check for "0.0" in archive_cmds due to quoting # problems, so we reset it completely verstring= ;; *) verstring=0.0 ;; esac if test no = "$need_version"; then versuffix= else versuffix=.0.0 fi fi # Remove version info from name if versioning should be avoided if test yes,no = "$avoid_version,$need_version"; then major= versuffix= verstring= fi # Check to see if the archive will have undefined symbols. if test yes = "$allow_undefined"; then if test unsupported = "$allow_undefined_flag"; then if test yes = "$build_old_libs"; then func_warning "undefined symbols not allowed in $host shared libraries; building static only" build_libtool_libs=no else func_fatal_error "can't build $host shared library unless -no-undefined is specified" fi fi else # Don't allow undefined symbols. allow_undefined_flag=$no_undefined_flag fi fi func_generate_dlsyms "$libname" "$libname" : func_append libobjs " $symfileobj" test " " = "$libobjs" && libobjs= if test relink != "$opt_mode"; then # Remove our outputs, but don't remove object files since they # may have been created when compiling PIC objects. removelist= tempremovelist=`$ECHO "$output_objdir/*"` for p in $tempremovelist; do case $p in *.$objext | *.gcno) ;; $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/$libname$release.*) if test -n "$precious_files_regex"; then if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 then continue fi fi func_append removelist " $p" ;; *) ;; esac done test -n "$removelist" && \ func_show_eval "${RM}r \$removelist" fi # Now set the variables for building old libraries. if test yes = "$build_old_libs" && test convenience != "$build_libtool_libs"; then func_append oldlibs " $output_objdir/$libname.$libext" # Transform .lo files to .o files. oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; $lo2o" | $NL2SP` fi # Eliminate all temporary directories. #for path in $notinst_path; do # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` #done if test -n "$xrpath"; then # If the user specified any rpath flags, then add them. temp_xrpath= for libdir in $xrpath; do func_replace_sysroot "$libdir" func_append temp_xrpath " -R$func_replace_sysroot_result" case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac done if test yes != "$hardcode_into_libs" || test yes = "$build_old_libs"; then dependency_libs="$temp_xrpath $dependency_libs" fi fi # Make sure dlfiles contains only unique files that won't be dlpreopened old_dlfiles=$dlfiles dlfiles= for lib in $old_dlfiles; do case " $dlprefiles $dlfiles " in *" $lib "*) ;; *) func_append dlfiles " $lib" ;; esac done # Make sure dlprefiles contains only unique files old_dlprefiles=$dlprefiles dlprefiles= for lib in $old_dlprefiles; do case "$dlprefiles " in *" $lib "*) ;; *) func_append dlprefiles " $lib" ;; esac done if test yes = "$build_libtool_libs"; then if test -n "$rpath"; then case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) # these systems don't actually have a c library (as such)! ;; *-*-rhapsody* | *-*-darwin1.[012]) # Rhapsody C library is in the System framework func_append deplibs " System.ltframework" ;; *-*-netbsd*) # Don't link with libc until the a.out ld.so is fixed. ;; *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*) # Do not include libc due to us having libc/libc_r. ;; *-*-sco3.2v5* | *-*-sco5v6*) # Causes problems with __ctype ;; *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) # Compiler inserts libc in the correct place for threads to work ;; *) # Add libc to deplibs on all other systems if necessary. if test yes = "$build_libtool_need_lc"; then func_append deplibs " -lc" fi ;; esac fi # Transform deplibs into only deplibs that can be linked in shared. name_save=$name libname_save=$libname release_save=$release versuffix_save=$versuffix major_save=$major # I'm not sure if I'm treating the release correctly. I think # release should show up in the -l (ie -lgmp5) so we don't want to # add it in twice. Is that correct? release= versuffix= major= newdeplibs= droppeddeps=no case $deplibs_check_method in pass_all) # Don't check for shared/static. Everything works. # This might be a little naive. We might want to check # whether the library exists or not. But this is on # osf3 & osf4 and I'm not really sure... Just # implementing what was already the behavior. newdeplibs=$deplibs ;; test_compile) # This code stresses the "libraries are programs" paradigm to its # limits. Maybe even breaks it. We compile a program, linking it # against the deplibs as a proxy for the library. Then we can check # whether they linked in statically or dynamically with ldd. $opt_dry_run || $RM conftest.c cat > conftest.c </dev/null` $nocaseglob else potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` fi for potent_lib in $potential_libs; do # Follow soft links. if ls -lLd "$potent_lib" 2>/dev/null | $GREP " -> " >/dev/null; then continue fi # The statement above tries to avoid entering an # endless loop below, in case of cyclic links. # We might still enter an endless loop, since a link # loop can be closed while we follow links, # but so what? potlib=$potent_lib while test -h "$potlib" 2>/dev/null; do potliblink=`ls -ld $potlib | $SED 's/.* -> //'` case $potliblink in [\\/]* | [A-Za-z]:[\\/]*) potlib=$potliblink;; *) potlib=`$ECHO "$potlib" | $SED 's|[^/]*$||'`"$potliblink";; esac done if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | $SED -e 10q | $EGREP "$file_magic_regex" > /dev/null; then func_append newdeplibs " $a_deplib" a_deplib= break 2 fi done done fi if test -n "$a_deplib"; then droppeddeps=yes echo $ECHO "*** Warning: linker path does not have real file for library $a_deplib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have" echo "*** because I did check the linker path looking for a file starting" if test -z "$potlib"; then $ECHO "*** with $libname but no candidates were found. (...for file magic test)" else $ECHO "*** with $libname and none of the candidates passed a file format test" $ECHO "*** using a file magic. Last file checked: $potlib" fi fi ;; *) # Add a -L argument. func_append newdeplibs " $a_deplib" ;; esac done # Gone through all deplibs. ;; match_pattern*) set dummy $deplibs_check_method; shift match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` for a_deplib in $deplibs; do case $a_deplib in -l*) func_stripname -l '' "$a_deplib" name=$func_stripname_result if test yes = "$allow_libtool_libs_with_static_runtimes"; then case " $predeps $postdeps " in *" $a_deplib "*) func_append newdeplibs " $a_deplib" a_deplib= ;; esac fi if test -n "$a_deplib"; then libname=`eval "\\$ECHO \"$libname_spec\""` for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do potential_libs=`ls $i/$libname[.-]* 2>/dev/null` for potent_lib in $potential_libs; do potlib=$potent_lib # see symlink-check above in file_magic test if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ $EGREP "$match_pattern_regex" > /dev/null; then func_append newdeplibs " $a_deplib" a_deplib= break 2 fi done done fi if test -n "$a_deplib"; then droppeddeps=yes echo $ECHO "*** Warning: linker path does not have real file for library $a_deplib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have" echo "*** because I did check the linker path looking for a file starting" if test -z "$potlib"; then $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)" else $ECHO "*** with $libname and none of the candidates passed a file format test" $ECHO "*** using a regex pattern. Last file checked: $potlib" fi fi ;; *) # Add a -L argument. func_append newdeplibs " $a_deplib" ;; esac done # Gone through all deplibs. ;; none | unknown | *) newdeplibs= tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` if test yes = "$allow_libtool_libs_with_static_runtimes"; then for i in $predeps $postdeps; do # can't use Xsed below, because $i might contain '/' tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s|$i||"` done fi case $tmp_deplibs in *[!\ \ ]*) echo if test none = "$deplibs_check_method"; then echo "*** Warning: inter-library dependencies are not supported in this platform." else echo "*** Warning: inter-library dependencies are not known to be supported." fi echo "*** All declared inter-library dependencies are being dropped." droppeddeps=yes ;; esac ;; esac versuffix=$versuffix_save major=$major_save release=$release_save libname=$libname_save name=$name_save case $host in *-*-rhapsody* | *-*-darwin1.[012]) # On Rhapsody replace the C library with the System framework newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` ;; esac if test yes = "$droppeddeps"; then if test yes = "$module"; then echo echo "*** Warning: libtool could not satisfy all declared inter-library" $ECHO "*** dependencies of module $libname. Therefore, libtool will create" echo "*** a static module, that should work as long as the dlopening" echo "*** application is linked with the -dlopen flag." if test -z "$global_symbol_pipe"; then echo echo "*** However, this would only work if libtool was able to extract symbol" echo "*** lists from a program, using 'nm' or equivalent, but libtool could" echo "*** not find such a program. So, this module is probably useless." echo "*** 'nm' from GNU binutils and a full rebuild may help." fi if test no = "$build_old_libs"; then oldlibs=$output_objdir/$libname.$libext build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi else echo "*** The inter-library dependencies that have been dropped here will be" echo "*** automatically added whenever a program is linked with this library" echo "*** or is declared to -dlopen it." if test no = "$allow_undefined"; then echo echo "*** Since this library must not contain undefined symbols," echo "*** because either the platform does not support them or" echo "*** it was explicitly requested with -no-undefined," echo "*** libtool will only create a static version of it." if test no = "$build_old_libs"; then oldlibs=$output_objdir/$libname.$libext build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi fi fi fi # Done checking deplibs! deplibs=$newdeplibs fi # Time to change all our "foo.ltframework" stuff back to "-framework foo" case $host in *-*-darwin*) newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` ;; esac # move library search paths that coincide with paths to not yet # installed libraries to the beginning of the library search list new_libs= for path in $notinst_path; do case " $new_libs " in *" -L$path/$objdir "*) ;; *) case " $deplibs " in *" -L$path/$objdir "*) func_append new_libs " -L$path/$objdir" ;; esac ;; esac done for deplib in $deplibs; do case $deplib in -L*) case " $new_libs " in *" $deplib "*) ;; *) func_append new_libs " $deplib" ;; esac ;; *) func_append new_libs " $deplib" ;; esac done deplibs=$new_libs # All the library-specific variables (install_libdir is set above). library_names= old_library= dlname= # Test again, we may have decided not to build it any more if test yes = "$build_libtool_libs"; then # Remove $wl instances when linking with ld. # FIXME: should test the right _cmds variable. case $archive_cmds in *\$LD\ *) wl= ;; esac if test yes = "$hardcode_into_libs"; then # Hardcode the library paths hardcode_libdirs= dep_rpath= rpath=$finalize_rpath test relink = "$opt_mode" || rpath=$compile_rpath$rpath for libdir in $rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then func_replace_sysroot "$libdir" libdir=$func_replace_sysroot_result if test -z "$hardcode_libdirs"; then hardcode_libdirs=$libdir else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" func_append dep_rpath " $flag" fi elif test -n "$runpath_var"; then case "$perm_rpath " in *" $libdir "*) ;; *) func_append perm_rpath " $libdir" ;; esac fi done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir=$hardcode_libdirs eval "dep_rpath=\"$hardcode_libdir_flag_spec\"" fi if test -n "$runpath_var" && test -n "$perm_rpath"; then # We should set the runpath_var. rpath= for dir in $perm_rpath; do func_append rpath "$dir:" done eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" fi test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" fi shlibpath=$finalize_shlibpath test relink = "$opt_mode" || shlibpath=$compile_shlibpath$shlibpath if test -n "$shlibpath"; then eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" fi # Get the real and link names of the library. eval shared_ext=\"$shrext_cmds\" eval library_names=\"$library_names_spec\" set dummy $library_names shift realname=$1 shift if test -n "$soname_spec"; then eval soname=\"$soname_spec\" else soname=$realname fi if test -z "$dlname"; then dlname=$soname fi lib=$output_objdir/$realname linknames= for link do func_append linknames " $link" done # Use standard objects if they are pic test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` test "X$libobjs" = "X " && libobjs= delfiles= if test -n "$export_symbols" && test -n "$include_expsyms"; then $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" export_symbols=$output_objdir/$libname.uexp func_append delfiles " $export_symbols" fi orig_export_symbols= case $host_os in cygwin* | mingw* | cegcc*) if test -n "$export_symbols" && test -z "$export_symbols_regex"; then # exporting using user supplied symfile func_dll_def_p "$export_symbols" || { # and it's NOT already a .def file. Must figure out # which of the given symbols are data symbols and tag # them as such. So, trigger use of export_symbols_cmds. # export_symbols gets reassigned inside the "prepare # the list of exported symbols" if statement, so the # include_expsyms logic still works. orig_export_symbols=$export_symbols export_symbols= always_export_symbols=yes } fi ;; esac # Prepare the list of exported symbols if test -z "$export_symbols"; then if test yes = "$always_export_symbols" || test -n "$export_symbols_regex"; then func_verbose "generating symbol list for '$libname.la'" export_symbols=$output_objdir/$libname.exp $opt_dry_run || $RM $export_symbols cmds=$export_symbols_cmds save_ifs=$IFS; IFS='~' for cmd1 in $cmds; do IFS=$save_ifs # Take the normal branch if the nm_file_list_spec branch # doesn't work or if tool conversion is not needed. case $nm_file_list_spec~$to_tool_file_cmd in *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) try_normal_branch=yes eval cmd=\"$cmd1\" func_len " $cmd" len=$func_len_result ;; *) try_normal_branch=no ;; esac if test yes = "$try_normal_branch" \ && { test "$len" -lt "$max_cmd_len" \ || test "$max_cmd_len" -le -1; } then func_show_eval "$cmd" 'exit $?' skipped_export=false elif test -n "$nm_file_list_spec"; then func_basename "$output" output_la=$func_basename_result save_libobjs=$libobjs save_output=$output output=$output_objdir/$output_la.nm func_to_tool_file "$output" libobjs=$nm_file_list_spec$func_to_tool_file_result func_append delfiles " $output" func_verbose "creating $NM input file list: $output" for obj in $save_libobjs; do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" done > "$output" eval cmd=\"$cmd1\" func_show_eval "$cmd" 'exit $?' output=$save_output libobjs=$save_libobjs skipped_export=false else # The command line is too long to execute in one step. func_verbose "using reloadable object file for export list..." skipped_export=: # Break out early, otherwise skipped_export may be # set to false by a later but shorter cmd. break fi done IFS=$save_ifs if test -n "$export_symbols_regex" && test : != "$skipped_export"; then func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' func_show_eval '$MV "${export_symbols}T" "$export_symbols"' fi fi fi if test -n "$export_symbols" && test -n "$include_expsyms"; then tmp_export_symbols=$export_symbols test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' fi if test : != "$skipped_export" && test -n "$orig_export_symbols"; then # The given exports_symbols file has to be filtered, so filter it. func_verbose "filter symbol list for '$libname.la' to tag DATA exports" # FIXME: $output_objdir/$libname.filter potentially contains lots of # 's' commands, which not all seds can handle. GNU sed should be fine # though. Also, the filter scales superlinearly with the number of # global variables. join(1) would be nice here, but unfortunately # isn't a blessed tool. $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter func_append delfiles " $export_symbols $output_objdir/$libname.filter" export_symbols=$output_objdir/$libname.def $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols fi tmp_deplibs= for test_deplib in $deplibs; do case " $convenience " in *" $test_deplib "*) ;; *) func_append tmp_deplibs " $test_deplib" ;; esac done deplibs=$tmp_deplibs if test -n "$convenience"; then if test -n "$whole_archive_flag_spec" && test yes = "$compiler_needs_object" && test -z "$libobjs"; then # extract the archives, so we have objects to list. # TODO: could optimize this to just extract one archive. whole_archive_flag_spec= fi if test -n "$whole_archive_flag_spec"; then save_libobjs=$libobjs eval libobjs=\"\$libobjs $whole_archive_flag_spec\" test "X$libobjs" = "X " && libobjs= else gentop=$output_objdir/${outputname}x func_append generated " $gentop" func_extract_archives $gentop $convenience func_append libobjs " $func_extract_archives_result" test "X$libobjs" = "X " && libobjs= fi fi if test yes = "$thread_safe" && test -n "$thread_safe_flag_spec"; then eval flag=\"$thread_safe_flag_spec\" func_append linker_flags " $flag" fi # Make a backup of the uninstalled library when relinking if test relink = "$opt_mode"; then $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? fi # Do each of the archive commands. if test yes = "$module" && test -n "$module_cmds"; then if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then eval test_cmds=\"$module_expsym_cmds\" cmds=$module_expsym_cmds else eval test_cmds=\"$module_cmds\" cmds=$module_cmds fi else if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then eval test_cmds=\"$archive_expsym_cmds\" cmds=$archive_expsym_cmds else eval test_cmds=\"$archive_cmds\" cmds=$archive_cmds fi fi if test : != "$skipped_export" && func_len " $test_cmds" && len=$func_len_result && test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then : else # The command line is too long to link in one step, link piecewise # or, if using GNU ld and skipped_export is not :, use a linker # script. # Save the value of $output and $libobjs because we want to # use them later. If we have whole_archive_flag_spec, we # want to use save_libobjs as it was before # whole_archive_flag_spec was expanded, because we can't # assume the linker understands whole_archive_flag_spec. # This may have to be revisited, in case too many # convenience libraries get linked in and end up exceeding # the spec. if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then save_libobjs=$libobjs fi save_output=$output func_basename "$output" output_la=$func_basename_result # Clear the reloadable object creation command queue and # initialize k to one. test_cmds= concat_cmds= objlist= last_robj= k=1 if test -n "$save_libobjs" && test : != "$skipped_export" && test yes = "$with_gnu_ld"; then output=$output_objdir/$output_la.lnkscript func_verbose "creating GNU ld script: $output" echo 'INPUT (' > $output for obj in $save_libobjs do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" >> $output done echo ')' >> $output func_append delfiles " $output" func_to_tool_file "$output" output=$func_to_tool_file_result elif test -n "$save_libobjs" && test : != "$skipped_export" && test -n "$file_list_spec"; then output=$output_objdir/$output_la.lnk func_verbose "creating linker input file list: $output" : > $output set x $save_libobjs shift firstobj= if test yes = "$compiler_needs_object"; then firstobj="$1 " shift fi for obj do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" >> $output done func_append delfiles " $output" func_to_tool_file "$output" output=$firstobj\"$file_list_spec$func_to_tool_file_result\" else if test -n "$save_libobjs"; then func_verbose "creating reloadable object files..." output=$output_objdir/$output_la-$k.$objext eval test_cmds=\"$reload_cmds\" func_len " $test_cmds" len0=$func_len_result len=$len0 # Loop over the list of objects to be linked. for obj in $save_libobjs do func_len " $obj" func_arith $len + $func_len_result len=$func_arith_result if test -z "$objlist" || test "$len" -lt "$max_cmd_len"; then func_append objlist " $obj" else # The command $test_cmds is almost too long, add a # command to the queue. if test 1 -eq "$k"; then # The first file doesn't have a previous command to add. reload_objs=$objlist eval concat_cmds=\"$reload_cmds\" else # All subsequent reloadable object files will link in # the last one created. reload_objs="$objlist $last_robj" eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" fi last_robj=$output_objdir/$output_la-$k.$objext func_arith $k + 1 k=$func_arith_result output=$output_objdir/$output_la-$k.$objext objlist=" $obj" func_len " $last_robj" func_arith $len0 + $func_len_result len=$func_arith_result fi done # Handle the remaining objects by creating one last # reloadable object file. All subsequent reloadable object # files will link in the last one created. test -z "$concat_cmds" || concat_cmds=$concat_cmds~ reload_objs="$objlist $last_robj" eval concat_cmds=\"\$concat_cmds$reload_cmds\" if test -n "$last_robj"; then eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" fi func_append delfiles " $output" else output= fi ${skipped_export-false} && { func_verbose "generating symbol list for '$libname.la'" export_symbols=$output_objdir/$libname.exp $opt_dry_run || $RM $export_symbols libobjs=$output # Append the command to create the export file. test -z "$concat_cmds" || concat_cmds=$concat_cmds~ eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" if test -n "$last_robj"; then eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" fi } test -n "$save_libobjs" && func_verbose "creating a temporary reloadable object file: $output" # Loop through the commands generated above and execute them. save_ifs=$IFS; IFS='~' for cmd in $concat_cmds; do IFS=$save_ifs $opt_quiet || { func_quote_arg expand,pretty "$cmd" eval "func_echo $func_quote_arg_result" } $opt_dry_run || eval "$cmd" || { lt_exit=$? # Restore the uninstalled library and exit if test relink = "$opt_mode"; then ( cd "$output_objdir" && \ $RM "${realname}T" && \ $MV "${realname}U" "$realname" ) fi exit $lt_exit } done IFS=$save_ifs if test -n "$export_symbols_regex" && ${skipped_export-false}; then func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' func_show_eval '$MV "${export_symbols}T" "$export_symbols"' fi fi ${skipped_export-false} && { if test -n "$export_symbols" && test -n "$include_expsyms"; then tmp_export_symbols=$export_symbols test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' fi if test -n "$orig_export_symbols"; then # The given exports_symbols file has to be filtered, so filter it. func_verbose "filter symbol list for '$libname.la' to tag DATA exports" # FIXME: $output_objdir/$libname.filter potentially contains lots of # 's' commands, which not all seds can handle. GNU sed should be fine # though. Also, the filter scales superlinearly with the number of # global variables. join(1) would be nice here, but unfortunately # isn't a blessed tool. $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter func_append delfiles " $export_symbols $output_objdir/$libname.filter" export_symbols=$output_objdir/$libname.def $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols fi } libobjs=$output # Restore the value of output. output=$save_output if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then eval libobjs=\"\$libobjs $whole_archive_flag_spec\" test "X$libobjs" = "X " && libobjs= fi # Expand the library linking commands again to reset the # value of $libobjs for piecewise linking. # Do each of the archive commands. if test yes = "$module" && test -n "$module_cmds"; then if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then cmds=$module_expsym_cmds else cmds=$module_cmds fi else if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then cmds=$archive_expsym_cmds else cmds=$archive_cmds fi fi fi if test -n "$delfiles"; then # Append the command to remove temporary files to $cmds. eval cmds=\"\$cmds~\$RM $delfiles\" fi # Add any objects from preloaded convenience libraries if test -n "$dlprefiles"; then gentop=$output_objdir/${outputname}x func_append generated " $gentop" func_extract_archives $gentop $dlprefiles func_append libobjs " $func_extract_archives_result" test "X$libobjs" = "X " && libobjs= fi save_ifs=$IFS; IFS='~' for cmd in $cmds; do IFS=$sp$nl eval cmd=\"$cmd\" IFS=$save_ifs $opt_quiet || { func_quote_arg expand,pretty "$cmd" eval "func_echo $func_quote_arg_result" } $opt_dry_run || eval "$cmd" || { lt_exit=$? # Restore the uninstalled library and exit if test relink = "$opt_mode"; then ( cd "$output_objdir" && \ $RM "${realname}T" && \ $MV "${realname}U" "$realname" ) fi exit $lt_exit } done IFS=$save_ifs # Restore the uninstalled library and exit if test relink = "$opt_mode"; then $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? if test -n "$convenience"; then if test -z "$whole_archive_flag_spec"; then func_show_eval '${RM}r "$gentop"' fi fi exit $EXIT_SUCCESS fi # Create links to the real library. for linkname in $linknames; do if test "$realname" != "$linkname"; then func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' fi done # If -module or -export-dynamic was specified, set the dlname. if test yes = "$module" || test yes = "$export_dynamic"; then # On all known operating systems, these are identical. dlname=$soname fi fi ;; obj) if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then func_warning "'-dlopen' is ignored for objects" fi case " $deplibs" in *\ -l* | *\ -L*) func_warning "'-l' and '-L' are ignored for objects" ;; esac test -n "$rpath" && \ func_warning "'-rpath' is ignored for objects" test -n "$xrpath" && \ func_warning "'-R' is ignored for objects" test -n "$vinfo" && \ func_warning "'-version-info' is ignored for objects" test -n "$release" && \ func_warning "'-release' is ignored for objects" case $output in *.lo) test -n "$objs$old_deplibs" && \ func_fatal_error "cannot build library object '$output' from non-libtool objects" libobj=$output func_lo2o "$libobj" obj=$func_lo2o_result ;; *) libobj= obj=$output ;; esac # Delete the old objects. $opt_dry_run || $RM $obj $libobj # Objects from convenience libraries. This assumes # single-version convenience libraries. Whenever we create # different ones for PIC/non-PIC, this we'll have to duplicate # the extraction. reload_conv_objs= gentop= # if reload_cmds runs $LD directly, get rid of -Wl from # whole_archive_flag_spec and hope we can get by with turning comma # into space. case $reload_cmds in *\$LD[\ \$]*) wl= ;; esac if test -n "$convenience"; then if test -n "$whole_archive_flag_spec"; then eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" test -n "$wl" || tmp_whole_archive_flags=`$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` reload_conv_objs=$reload_objs\ $tmp_whole_archive_flags else gentop=$output_objdir/${obj}x func_append generated " $gentop" func_extract_archives $gentop $convenience reload_conv_objs="$reload_objs $func_extract_archives_result" fi fi # If we're not building shared, we need to use non_pic_objs test yes = "$build_libtool_libs" || libobjs=$non_pic_objects # Create the old-style object. reload_objs=$objs$old_deplibs' '`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; /\.lib$/d; $lo2o" | $NL2SP`' '$reload_conv_objs output=$obj func_execute_cmds "$reload_cmds" 'exit $?' # Exit if we aren't doing a library object file. if test -z "$libobj"; then if test -n "$gentop"; then func_show_eval '${RM}r "$gentop"' fi exit $EXIT_SUCCESS fi test yes = "$build_libtool_libs" || { if test -n "$gentop"; then func_show_eval '${RM}r "$gentop"' fi # Create an invalid libtool object if no PIC, so that we don't # accidentally link it into a program. # $show "echo timestamp > $libobj" # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? exit $EXIT_SUCCESS } if test -n "$pic_flag" || test default != "$pic_mode"; then # Only do commands if we really have different PIC objects. reload_objs="$libobjs $reload_conv_objs" output=$libobj func_execute_cmds "$reload_cmds" 'exit $?' fi if test -n "$gentop"; then func_show_eval '${RM}r "$gentop"' fi exit $EXIT_SUCCESS ;; prog) case $host in *cygwin*) func_stripname '' '.exe' "$output" output=$func_stripname_result.exe;; esac test -n "$vinfo" && \ func_warning "'-version-info' is ignored for programs" test -n "$release" && \ func_warning "'-release' is ignored for programs" $preload \ && test unknown,unknown,unknown = "$dlopen_support,$dlopen_self,$dlopen_self_static" \ && func_warning "'LT_INIT([dlopen])' not used. Assuming no dlopen support." case $host in *-*-rhapsody* | *-*-darwin1.[012]) # On Rhapsody replace the C library is the System framework compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` ;; esac case $host in *-*-darwin*) # Don't allow lazy linking, it breaks C++ global constructors # But is supposedly fixed on 10.4 or later (yay!). if test CXX = "$tagname"; then case ${MACOSX_DEPLOYMENT_TARGET-10.0} in 10.[0123]) func_append compile_command " $wl-bind_at_load" func_append finalize_command " $wl-bind_at_load" ;; esac fi # Time to change all our "foo.ltframework" stuff back to "-framework foo" compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` ;; esac # move library search paths that coincide with paths to not yet # installed libraries to the beginning of the library search list new_libs= for path in $notinst_path; do case " $new_libs " in *" -L$path/$objdir "*) ;; *) case " $compile_deplibs " in *" -L$path/$objdir "*) func_append new_libs " -L$path/$objdir" ;; esac ;; esac done for deplib in $compile_deplibs; do case $deplib in -L*) case " $new_libs " in *" $deplib "*) ;; *) func_append new_libs " $deplib" ;; esac ;; *) func_append new_libs " $deplib" ;; esac done compile_deplibs=$new_libs func_append compile_command " $compile_deplibs" func_append finalize_command " $finalize_deplibs" if test -n "$rpath$xrpath"; then # If the user specified any rpath flags, then add them. for libdir in $rpath $xrpath; do # This is the magic to use -rpath. case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac done fi # Now hardcode the library paths rpath= hardcode_libdirs= for libdir in $compile_rpath $finalize_rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then if test -z "$hardcode_libdirs"; then hardcode_libdirs=$libdir else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" func_append rpath " $flag" fi elif test -n "$runpath_var"; then case "$perm_rpath " in *" $libdir "*) ;; *) func_append perm_rpath " $libdir" ;; esac fi case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) testbindir=`$ECHO "$libdir" | $SED -e 's*/lib$*/bin*'` case :$dllsearchpath: in *":$libdir:"*) ;; ::) dllsearchpath=$libdir;; *) func_append dllsearchpath ":$libdir";; esac case :$dllsearchpath: in *":$testbindir:"*) ;; ::) dllsearchpath=$testbindir;; *) func_append dllsearchpath ":$testbindir";; esac ;; esac done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir=$hardcode_libdirs eval rpath=\" $hardcode_libdir_flag_spec\" fi compile_rpath=$rpath rpath= hardcode_libdirs= for libdir in $finalize_rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then if test -z "$hardcode_libdirs"; then hardcode_libdirs=$libdir else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" func_append rpath " $flag" fi elif test -n "$runpath_var"; then case "$finalize_perm_rpath " in *" $libdir "*) ;; *) func_append finalize_perm_rpath " $libdir" ;; esac fi done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir=$hardcode_libdirs eval rpath=\" $hardcode_libdir_flag_spec\" fi finalize_rpath=$rpath if test -n "$libobjs" && test yes = "$build_old_libs"; then # Transform all the library objects into standard objects. compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` fi func_generate_dlsyms "$outputname" "@PROGRAM@" false # template prelinking step if test -n "$prelink_cmds"; then func_execute_cmds "$prelink_cmds" 'exit $?' fi wrappers_required=: case $host in *cegcc* | *mingw32ce*) # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. wrappers_required=false ;; *cygwin* | *mingw* ) test yes = "$build_libtool_libs" || wrappers_required=false ;; *) if test no = "$need_relink" || test yes != "$build_libtool_libs"; then wrappers_required=false fi ;; esac $wrappers_required || { # Replace the output file specification. compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` link_command=$compile_command$compile_rpath # We have no uninstalled library dependencies, so finalize right now. exit_status=0 func_show_eval "$link_command" 'exit_status=$?' if test -n "$postlink_cmds"; then func_to_tool_file "$output" postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` func_execute_cmds "$postlink_cmds" 'exit $?' fi # Delete the generated files. if test -f "$output_objdir/${outputname}S.$objext"; then func_show_eval '$RM "$output_objdir/${outputname}S.$objext"' fi exit $exit_status } if test -n "$compile_shlibpath$finalize_shlibpath"; then compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" fi if test -n "$finalize_shlibpath"; then finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" fi compile_var= finalize_var= if test -n "$runpath_var"; then if test -n "$perm_rpath"; then # We should set the runpath_var. rpath= for dir in $perm_rpath; do func_append rpath "$dir:" done compile_var="$runpath_var=\"$rpath\$$runpath_var\" " fi if test -n "$finalize_perm_rpath"; then # We should set the runpath_var. rpath= for dir in $finalize_perm_rpath; do func_append rpath "$dir:" done finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " fi fi if test yes = "$no_install"; then # We don't need to create a wrapper script. link_command=$compile_var$compile_command$compile_rpath # Replace the output file specification. link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` # Delete the old output file. $opt_dry_run || $RM $output # Link the executable and exit func_show_eval "$link_command" 'exit $?' if test -n "$postlink_cmds"; then func_to_tool_file "$output" postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` func_execute_cmds "$postlink_cmds" 'exit $?' fi exit $EXIT_SUCCESS fi case $hardcode_action,$fast_install in relink,*) # Fast installation is not supported link_command=$compile_var$compile_command$compile_rpath relink_command=$finalize_var$finalize_command$finalize_rpath func_warning "this platform does not like uninstalled shared libraries" func_warning "'$output' will be relinked during installation" ;; *,yes) link_command=$finalize_var$compile_command$finalize_rpath relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` ;; *,no) link_command=$compile_var$compile_command$compile_rpath relink_command=$finalize_var$finalize_command$finalize_rpath ;; *,needless) link_command=$finalize_var$compile_command$finalize_rpath relink_command= ;; esac # Replace the output file specification. link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` # Delete the old output files. $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname func_show_eval "$link_command" 'exit $?' if test -n "$postlink_cmds"; then func_to_tool_file "$output_objdir/$outputname" postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` func_execute_cmds "$postlink_cmds" 'exit $?' fi # Now create the wrapper script. func_verbose "creating $output" # Quote the relink command for shipping. if test -n "$relink_command"; then # Preserve any variables that may affect compiler behavior for var in $variables_saved_for_relink; do if eval test -z \"\${$var+set}\"; then relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else func_quote_arg pretty "$var_value" relink_command="$var=$func_quote_arg_result; export $var; $relink_command" fi done func_quote eval cd "`pwd`" func_quote_arg pretty,unquoted "($func_quote_result; $relink_command)" relink_command=$func_quote_arg_unquoted_result fi # Only actually do things if not in dry run mode. $opt_dry_run || { # win32 will think the script is a binary if it has # a .exe suffix, so we strip it off here. case $output in *.exe) func_stripname '' '.exe' "$output" output=$func_stripname_result ;; esac # test for cygwin because mv fails w/o .exe extensions case $host in *cygwin*) exeext=.exe func_stripname '' '.exe' "$outputname" outputname=$func_stripname_result ;; *) exeext= ;; esac case $host in *cygwin* | *mingw* ) func_dirname_and_basename "$output" "" "." output_name=$func_basename_result output_path=$func_dirname_result cwrappersource=$output_path/$objdir/lt-$output_name.c cwrapper=$output_path/$output_name.exe $RM $cwrappersource $cwrapper trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 func_emit_cwrapperexe_src > $cwrappersource # The wrapper executable is built using the $host compiler, # because it contains $host paths and files. If cross- # compiling, it, like the target executable, must be # executed on the $host or under an emulation environment. $opt_dry_run || { $LTCC $LTCFLAGS -o $cwrapper $cwrappersource $STRIP $cwrapper } # Now, create the wrapper script for func_source use: func_ltwrapper_scriptname $cwrapper $RM $func_ltwrapper_scriptname_result trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 $opt_dry_run || { # note: this script will not be executed, so do not chmod. if test "x$build" = "x$host"; then $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result else func_emit_wrapper no > $func_ltwrapper_scriptname_result fi } ;; * ) $RM $output trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 func_emit_wrapper no > $output chmod +x $output ;; esac } exit $EXIT_SUCCESS ;; esac # See if we need to build an old-fashioned archive. for oldlib in $oldlibs; do case $build_libtool_libs in convenience) oldobjs="$libobjs_save $symfileobj" addlibs=$convenience build_libtool_libs=no ;; module) oldobjs=$libobjs_save addlibs=$old_convenience build_libtool_libs=no ;; *) oldobjs="$old_deplibs $non_pic_objects" $preload && test -f "$symfileobj" \ && func_append oldobjs " $symfileobj" addlibs=$old_convenience ;; esac if test -n "$addlibs"; then gentop=$output_objdir/${outputname}x func_append generated " $gentop" func_extract_archives $gentop $addlibs func_append oldobjs " $func_extract_archives_result" fi # Do each command in the archive commands. if test -n "$old_archive_from_new_cmds" && test yes = "$build_libtool_libs"; then cmds=$old_archive_from_new_cmds else # Add any objects from preloaded convenience libraries if test -n "$dlprefiles"; then gentop=$output_objdir/${outputname}x func_append generated " $gentop" func_extract_archives $gentop $dlprefiles func_append oldobjs " $func_extract_archives_result" fi # POSIX demands no paths to be encoded in archives. We have # to avoid creating archives with duplicate basenames if we # might have to extract them afterwards, e.g., when creating a # static archive out of a convenience library, or when linking # the entirety of a libtool archive into another (currently # not supported by libtool). if (for obj in $oldobjs do func_basename "$obj" $ECHO "$func_basename_result" done | sort | sort -uc >/dev/null 2>&1); then : else echo "copying selected object files to avoid basename conflicts..." gentop=$output_objdir/${outputname}x func_append generated " $gentop" func_mkdir_p "$gentop" save_oldobjs=$oldobjs oldobjs= counter=1 for obj in $save_oldobjs do func_basename "$obj" objbase=$func_basename_result case " $oldobjs " in " ") oldobjs=$obj ;; *[\ /]"$objbase "*) while :; do # Make sure we don't pick an alternate name that also # overlaps. newobj=lt$counter-$objbase func_arith $counter + 1 counter=$func_arith_result case " $oldobjs " in *[\ /]"$newobj "*) ;; *) if test ! -f "$gentop/$newobj"; then break; fi ;; esac done func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" func_append oldobjs " $gentop/$newobj" ;; *) func_append oldobjs " $obj" ;; esac done fi func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 tool_oldlib=$func_to_tool_file_result eval cmds=\"$old_archive_cmds\" func_len " $cmds" len=$func_len_result if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then cmds=$old_archive_cmds elif test -n "$archiver_list_spec"; then func_verbose "using command file archive linking..." for obj in $oldobjs do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" done > $output_objdir/$libname.libcmd func_to_tool_file "$output_objdir/$libname.libcmd" oldobjs=" $archiver_list_spec$func_to_tool_file_result" cmds=$old_archive_cmds else # the command line is too long to link in one step, link in parts func_verbose "using piecewise archive linking..." save_RANLIB=$RANLIB RANLIB=: objlist= concat_cmds= save_oldobjs=$oldobjs oldobjs= # Is there a better way of finding the last object in the list? for obj in $save_oldobjs do last_oldobj=$obj done eval test_cmds=\"$old_archive_cmds\" func_len " $test_cmds" len0=$func_len_result len=$len0 for obj in $save_oldobjs do func_len " $obj" func_arith $len + $func_len_result len=$func_arith_result func_append objlist " $obj" if test "$len" -lt "$max_cmd_len"; then : else # the above command should be used before it gets too long oldobjs=$objlist if test "$obj" = "$last_oldobj"; then RANLIB=$save_RANLIB fi test -z "$concat_cmds" || concat_cmds=$concat_cmds~ eval concat_cmds=\"\$concat_cmds$old_archive_cmds\" objlist= len=$len0 fi done RANLIB=$save_RANLIB oldobjs=$objlist if test -z "$oldobjs"; then eval cmds=\"\$concat_cmds\" else eval cmds=\"\$concat_cmds~\$old_archive_cmds\" fi fi fi func_execute_cmds "$cmds" 'exit $?' done test -n "$generated" && \ func_show_eval "${RM}r$generated" # Now create the libtool archive. case $output in *.la) old_library= test yes = "$build_old_libs" && old_library=$libname.$libext func_verbose "creating $output" # Preserve any variables that may affect compiler behavior for var in $variables_saved_for_relink; do if eval test -z \"\${$var+set}\"; then relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else func_quote_arg pretty,unquoted "$var_value" relink_command="$var=$func_quote_arg_unquoted_result; export $var; $relink_command" fi done # Quote the link command for shipping. func_quote eval cd "`pwd`" relink_command="($func_quote_result; $SHELL \"$progpath\" $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" func_quote_arg pretty,unquoted "$relink_command" relink_command=$func_quote_arg_unquoted_result if test yes = "$hardcode_automatic"; then relink_command= fi # Only create the output if not a dry run. $opt_dry_run || { for installed in no yes; do if test yes = "$installed"; then if test -z "$install_libdir"; then break fi output=$output_objdir/${outputname}i # Replace all uninstalled libtool libraries with the installed ones newdependency_libs= for deplib in $dependency_libs; do case $deplib in *.la) func_basename "$deplib" name=$func_basename_result func_resolve_sysroot "$deplib" eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result` test -z "$libdir" && \ func_fatal_error "'$deplib' is not a valid libtool archive" func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" ;; -L*) func_stripname -L '' "$deplib" func_replace_sysroot "$func_stripname_result" func_append newdependency_libs " -L$func_replace_sysroot_result" ;; -R*) func_stripname -R '' "$deplib" func_replace_sysroot "$func_stripname_result" func_append newdependency_libs " -R$func_replace_sysroot_result" ;; *) func_append newdependency_libs " $deplib" ;; esac done dependency_libs=$newdependency_libs newdlfiles= for lib in $dlfiles; do case $lib in *.la) func_basename "$lib" name=$func_basename_result eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` test -z "$libdir" && \ func_fatal_error "'$lib' is not a valid libtool archive" func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" ;; *) func_append newdlfiles " $lib" ;; esac done dlfiles=$newdlfiles newdlprefiles= for lib in $dlprefiles; do case $lib in *.la) # Only pass preopened files to the pseudo-archive (for # eventual linking with the app. that links it) if we # didn't already link the preopened objects directly into # the library: func_basename "$lib" name=$func_basename_result eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` test -z "$libdir" && \ func_fatal_error "'$lib' is not a valid libtool archive" func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" ;; esac done dlprefiles=$newdlprefiles else newdlfiles= for lib in $dlfiles; do case $lib in [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; *) abs=`pwd`"/$lib" ;; esac func_append newdlfiles " $abs" done dlfiles=$newdlfiles newdlprefiles= for lib in $dlprefiles; do case $lib in [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; *) abs=`pwd`"/$lib" ;; esac func_append newdlprefiles " $abs" done dlprefiles=$newdlprefiles fi $RM $output # place dlname in correct position for cygwin # In fact, it would be nice if we could use this code for all target # systems that can't hard-code library paths into their executables # and that have no shared library path variable independent of PATH, # but it turns out we can't easily determine that from inspecting # libtool variables, so we have to hard-code the OSs to which it # applies here; at the moment, that means platforms that use the PE # object format with DLL files. See the long comment at the top of # tests/bindir.at for full details. tdlname=$dlname case $host,$output,$installed,$module,$dlname in *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) # If a -bindir argument was supplied, place the dll there. if test -n "$bindir"; then func_relative_path "$install_libdir" "$bindir" tdlname=$func_relative_path_result/$dlname else # Otherwise fall back on heuristic. tdlname=../bin/$dlname fi ;; esac $ECHO > $output "\ # $outputname - a libtool library file # Generated by $PROGRAM (GNU $PACKAGE) $VERSION # # Please DO NOT delete this file! # It is necessary for linking the library. # The name that we can dlopen(3). dlname='$tdlname' # Names of this library. library_names='$library_names' # The name of the static archive. old_library='$old_library' # Linker flags that cannot go in dependency_libs. inherited_linker_flags='$new_inherited_linker_flags' # Libraries that this one depends upon. dependency_libs='$dependency_libs' # Names of additional weak libraries provided by this library weak_library_names='$weak_libs' # Version information for $libname. current=$current age=$age revision=$revision # Is this an already installed library? installed=$installed # Should we warn about portability when linking against -modules? shouldnotlink=$module # Files to dlopen/dlpreopen dlopen='$dlfiles' dlpreopen='$dlprefiles' # Directory that this library needs to be installed in: libdir='$install_libdir'" if test no,yes = "$installed,$need_relink"; then $ECHO >> $output "\ relink_command=\"$relink_command\"" fi done } # Do a symbolic link so that the libtool archive can be found in # LD_LIBRARY_PATH before the program is installed. func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' ;; esac exit $EXIT_SUCCESS } if test link = "$opt_mode" || test relink = "$opt_mode"; then func_mode_link ${1+"$@"} fi # func_mode_uninstall arg... func_mode_uninstall () { $debug_cmd RM=$nonopt files= rmforce=false exit_status=0 # This variable tells wrapper scripts just to set variables rather # than running their programs. libtool_install_magic=$magic for arg do case $arg in -f) func_append RM " $arg"; rmforce=: ;; -*) func_append RM " $arg" ;; *) func_append files " $arg" ;; esac done test -z "$RM" && \ func_fatal_help "you must specify an RM program" rmdirs= for file in $files; do func_dirname "$file" "" "." dir=$func_dirname_result if test . = "$dir"; then odir=$objdir else odir=$dir/$objdir fi func_basename "$file" name=$func_basename_result test uninstall = "$opt_mode" && odir=$dir # Remember odir for removal later, being careful to avoid duplicates if test clean = "$opt_mode"; then case " $rmdirs " in *" $odir "*) ;; *) func_append rmdirs " $odir" ;; esac fi # Don't error if the file doesn't exist and rm -f was used. if { test -L "$file"; } >/dev/null 2>&1 || { test -h "$file"; } >/dev/null 2>&1 || test -f "$file"; then : elif test -d "$file"; then exit_status=1 continue elif $rmforce; then continue fi rmfiles=$file case $name in *.la) # Possibly a libtool archive, so verify it. if func_lalib_p "$file"; then func_source $dir/$name # Delete the libtool libraries and symlinks. for n in $library_names; do func_append rmfiles " $odir/$n" done test -n "$old_library" && func_append rmfiles " $odir/$old_library" case $opt_mode in clean) case " $library_names " in *" $dlname "*) ;; *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; esac test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" ;; uninstall) if test -n "$library_names"; then # Do each command in the postuninstall commands. func_execute_cmds "$postuninstall_cmds" '$rmforce || exit_status=1' fi if test -n "$old_library"; then # Do each command in the old_postuninstall commands. func_execute_cmds "$old_postuninstall_cmds" '$rmforce || exit_status=1' fi # FIXME: should reinstall the best remaining shared library. ;; esac fi ;; *.lo) # Possibly a libtool object, so verify it. if func_lalib_p "$file"; then # Read the .lo file func_source $dir/$name # Add PIC object to the list of files to remove. if test -n "$pic_object" && test none != "$pic_object"; then func_append rmfiles " $dir/$pic_object" fi # Add non-PIC object to the list of files to remove. if test -n "$non_pic_object" && test none != "$non_pic_object"; then func_append rmfiles " $dir/$non_pic_object" fi fi ;; *) if test clean = "$opt_mode"; then noexename=$name case $file in *.exe) func_stripname '' '.exe' "$file" file=$func_stripname_result func_stripname '' '.exe' "$name" noexename=$func_stripname_result # $file with .exe has already been added to rmfiles, # add $file without .exe func_append rmfiles " $file" ;; esac # Do a test to see if this is a libtool program. if func_ltwrapper_p "$file"; then if func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" relink_command= func_source $func_ltwrapper_scriptname_result func_append rmfiles " $func_ltwrapper_scriptname_result" else relink_command= func_source $dir/$noexename fi # note $name still contains .exe if it was in $file originally # as does the version of $file that was added into $rmfiles func_append rmfiles " $odir/$name $odir/${name}S.$objext" if test yes = "$fast_install" && test -n "$relink_command"; then func_append rmfiles " $odir/lt-$name" fi if test "X$noexename" != "X$name"; then func_append rmfiles " $odir/lt-$noexename.c" fi fi fi ;; esac func_show_eval "$RM $rmfiles" 'exit_status=1' done # Try to remove the $objdir's in the directories where we deleted files for dir in $rmdirs; do if test -d "$dir"; then func_show_eval "rmdir $dir >/dev/null 2>&1" fi done exit $exit_status } if test uninstall = "$opt_mode" || test clean = "$opt_mode"; then func_mode_uninstall ${1+"$@"} fi test -z "$opt_mode" && { help=$generic_help func_fatal_help "you must specify a MODE" } test -z "$exec_cmd" && \ func_fatal_help "invalid operation mode '$opt_mode'" if test -n "$exec_cmd"; then eval exec "$exec_cmd" exit $EXIT_FAILURE fi exit $exit_status # The TAGs below are defined such that we never get into a situation # where we disable both kinds of libraries. Given conflicting # choices, we go for a static library, that is the most portable, # since we can't tell whether shared libraries were disabled because # the user asked for that or because the platform doesn't support # them. This is particularly important on AIX, because we don't # support having both static and shared libraries enabled at the same # time on that platform, so we default to a shared-only configuration. # If a disable-shared tag is given, we'll fallback to a static-only # configuration. But we'll never go from static-only to shared-only. # ### BEGIN LIBTOOL TAG CONFIG: disable-shared build_libtool_libs=no build_old_libs=yes # ### END LIBTOOL TAG CONFIG: disable-shared # ### BEGIN LIBTOOL TAG CONFIG: disable-static build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` # ### END LIBTOOL TAG CONFIG: disable-static # Local Variables: # mode:shell-script # sh-indentation:2 # End: starpu-1.4.9+dfsg/build-aux/missing000077500000000000000000000153361507764646700173110ustar00rootroot00000000000000#! /bin/sh # Common wrapper for a few potentially missing GNU programs. scriptversion=2018-03-07.03; # UTC # Copyright (C) 1996-2021 Free Software Foundation, Inc. # Originally written by Fran,cois Pinard , 1996. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. if test $# -eq 0; then echo 1>&2 "Try '$0 --help' for more information" exit 1 fi case $1 in --is-lightweight) # Used by our autoconf macros to check whether the available missing # script is modern enough. exit 0 ;; --run) # Back-compat with the calling convention used by older automake. shift ;; -h|--h|--he|--hel|--help) echo "\ $0 [OPTION]... PROGRAM [ARGUMENT]... Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due to PROGRAM being missing or too old. Options: -h, --help display this help and exit -v, --version output version information and exit Supported PROGRAM values: aclocal autoconf autoheader autom4te automake makeinfo bison yacc flex lex help2man Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and 'g' are ignored when checking the name. Send bug reports to ." exit $? ;; -v|--v|--ve|--ver|--vers|--versi|--versio|--version) echo "missing $scriptversion (GNU Automake)" exit $? ;; -*) echo 1>&2 "$0: unknown '$1' option" echo 1>&2 "Try '$0 --help' for more information" exit 1 ;; esac # Run the given program, remember its exit status. "$@"; st=$? # If it succeeded, we are done. test $st -eq 0 && exit 0 # Also exit now if we it failed (or wasn't found), and '--version' was # passed; such an option is passed most likely to detect whether the # program is present and works. case $2 in --version|--help) exit $st;; esac # Exit code 63 means version mismatch. This often happens when the user # tries to use an ancient version of a tool on a file that requires a # minimum version. if test $st -eq 63; then msg="probably too old" elif test $st -eq 127; then # Program was missing. msg="missing on your system" else # Program was found and executed, but failed. Give up. exit $st fi perl_URL=https://www.perl.org/ flex_URL=https://github.com/westes/flex gnu_software_URL=https://www.gnu.org/software program_details () { case $1 in aclocal|automake) echo "The '$1' program is part of the GNU Automake package:" echo "<$gnu_software_URL/automake>" echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" echo "<$gnu_software_URL/autoconf>" echo "<$gnu_software_URL/m4/>" echo "<$perl_URL>" ;; autoconf|autom4te|autoheader) echo "The '$1' program is part of the GNU Autoconf package:" echo "<$gnu_software_URL/autoconf/>" echo "It also requires GNU m4 and Perl in order to run:" echo "<$gnu_software_URL/m4/>" echo "<$perl_URL>" ;; esac } give_advice () { # Normalize program name to check for. normalized_program=`echo "$1" | sed ' s/^gnu-//; t s/^gnu//; t s/^g//; t'` printf '%s\n' "'$1' is $msg." configure_deps="'configure.ac' or m4 files included by 'configure.ac'" case $normalized_program in autoconf*) echo "You should only need it if you modified 'configure.ac'," echo "or m4 files included by it." program_details 'autoconf' ;; autoheader*) echo "You should only need it if you modified 'acconfig.h' or" echo "$configure_deps." program_details 'autoheader' ;; automake*) echo "You should only need it if you modified 'Makefile.am' or" echo "$configure_deps." program_details 'automake' ;; aclocal*) echo "You should only need it if you modified 'acinclude.m4' or" echo "$configure_deps." program_details 'aclocal' ;; autom4te*) echo "You might have modified some maintainer files that require" echo "the 'autom4te' program to be rebuilt." program_details 'autom4te' ;; bison*|yacc*) echo "You should only need it if you modified a '.y' file." echo "You may want to install the GNU Bison package:" echo "<$gnu_software_URL/bison/>" ;; lex*|flex*) echo "You should only need it if you modified a '.l' file." echo "You may want to install the Fast Lexical Analyzer package:" echo "<$flex_URL>" ;; help2man*) echo "You should only need it if you modified a dependency" \ "of a man page." echo "You may want to install the GNU Help2man package:" echo "<$gnu_software_URL/help2man/>" ;; makeinfo*) echo "You should only need it if you modified a '.texi' file, or" echo "any other file indirectly affecting the aspect of the manual." echo "You might want to install the Texinfo package:" echo "<$gnu_software_URL/texinfo/>" echo "The spurious makeinfo call might also be the consequence of" echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" echo "want to install GNU make:" echo "<$gnu_software_URL/make/>" ;; *) echo "You might have modified some files without having the proper" echo "tools for further handling them. Check the 'README' file, it" echo "often tells you about the needed prerequisites for installing" echo "this package. You may also peek at any GNU archive site, in" echo "case some other package contains this missing '$1' program." ;; esac } give_advice "$1" | sed -e '1s/^/WARNING: /' \ -e '2,$s/^/ /' >&2 # Propagate the correct exit status (expected to be 127 for a program # not found, 63 for a program that failed due to version mismatch). exit $st # Local variables: # eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: starpu-1.4.9+dfsg/build-aux/test-driver000077500000000000000000000114171507764646700201040ustar00rootroot00000000000000#! /bin/sh # test-driver - basic testsuite driver script. scriptversion=2018-03-07.03; # UTC # Copyright (C) 2011-2021 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # This file is maintained in Automake, please report # bugs to or send patches to # . # Make unconditional expansion of undefined variables an error. This # helps a lot in preventing typo-related bugs. set -u usage_error () { echo "$0: $*" >&2 print_usage >&2 exit 2 } print_usage () { cat <"$log_file" "$@" >>"$log_file" 2>&1 estatus=$? if test $enable_hard_errors = no && test $estatus -eq 99; then tweaked_estatus=1 else tweaked_estatus=$estatus fi case $tweaked_estatus:$expect_failure in 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; 0:*) col=$grn res=PASS recheck=no gcopy=no;; 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; *:*) col=$red res=FAIL recheck=yes gcopy=yes;; esac # Report the test outcome and exit status in the logs, so that one can # know whether the test passed or failed simply by looking at the '.log' # file, without the need of also peaking into the corresponding '.trs' # file (automake bug#11814). echo "$res $test_name (exit status: $estatus)" >>"$log_file" # Report outcome to console. echo "${col}${res}${std}: $test_name" # Register the test result, and other relevant metadata. echo ":test-result: $res" > $trs_file echo ":global-test-result: $res" >> $trs_file echo ":recheck: $recheck" >> $trs_file echo ":copy-in-global-log: $gcopy" >> $trs_file # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'before-save-hook 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC0" # time-stamp-end: "; # UTC" # End: starpu-1.4.9+dfsg/configure000077500000000000000000057672161507764646700157470ustar00rootroot00000000000000#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.71 for StarPU 1.4.9. # # Report bugs to . # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, # Inc. # # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh as_nop=: if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi # Reset variables that may have inherited troublesome values from # the environment. # IFS needs to be set, to space, tab, and newline, in precisely that order. # (If _AS_PATH_WALK were called with IFS unset, it would have the # side effect of setting IFS to empty, thus disabling word splitting.) # Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl IFS=" "" $as_nl" PS1='$ ' PS2='> ' PS4='+ ' # Ensure predictable behavior from utilities with locale-dependent output. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # We cannot yet rely on "unset" to work, but we need these variables # to be unset--not just set to an empty or harmless value--now, to # avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct # also avoids known problems related to "unset" and subshell syntax # in other old shells (e.g. bash 2.01 and pdksh 5.2.14). for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH do eval test \${$as_var+y} \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done # Ensure that fds 0, 1, and 2 are open. if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then _as_can_reexec=no; export _as_can_reexec; # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then as_bourne_compatible="as_nop=: if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else \$as_nop case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi " as_required="as_fn_return () { (exit \$1); } as_fn_success () { as_fn_return 0; } as_fn_failure () { as_fn_return 1; } as_fn_ret_success () { return 0; } as_fn_ret_failure () { return 1; } exitcode=0 as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } if ( set x; as_fn_ret_success y && test x = \"\$1\" ) then : else \$as_nop exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 blah=\$(echo \$(echo blah)) test x\"\$blah\" = xblah || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 test \$(( 1 + 1 )) = 2 || exit 1 test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO PATH=/empty FPATH=/empty; export PATH FPATH test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1" if (eval "$as_required") 2>/dev/null then : as_have_required=yes else $as_nop as_have_required=no fi if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null then : else $as_nop as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. as_shell=$as_dir$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null then : CONFIG_SHELL=$as_shell as_have_required=yes if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null then : break 2 fi fi done;; esac as_found=false done IFS=$as_save_IFS if $as_found then : else $as_nop if { test -f "$SHELL" || test -f "$SHELL.exe"; } && as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null then : CONFIG_SHELL=$SHELL as_have_required=yes fi fi if test "x$CONFIG_SHELL" != x then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi if test x$as_have_required = xno then : printf "%s\n" "$0: This script requires a shell more modern than all" printf "%s\n" "$0: the shells that I found on your system." if test ${ZSH_VERSION+y} ; then printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." else printf "%s\n" "$0: Please tell bug-autoconf@gnu.org and $0: starpu-devel@inria.fr about your system, including any $0: error possibly output before this message. Then install $0: a modern shell, or manually run the script under such a $0: shell if you do have one." fi exit 1 fi fi fi SHELL=${CONFIG_SHELL-/bin/sh} export SHELL # Unset more variables known to interfere with behavior of common tools. CLICOLOR_FORCE= GREP_OPTIONS= unset CLICOLOR_FORCE GREP_OPTIONS ## --------------------- ## ## M4sh Shell Functions. ## ## --------------------- ## # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_nop # --------- # Do nothing but, unlike ":", preserve the value of $?. as_fn_nop () { return $? } as_nop=as_fn_nop # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null then : eval 'as_fn_append () { eval $1+=\$2 }' else $as_nop as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null then : eval 'as_fn_arith () { as_val=$(( $* )) }' else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith # as_fn_nop # --------- # Do nothing but, unlike ":", preserve the value of $?. as_fn_nop () { return $? } as_nop=as_fn_nop # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_lineno_1=$LINENO as_lineno_1a=$LINENO as_lineno_2=$LINENO as_lineno_2a=$LINENO eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall # in an infinite loop. This has already happened in practice. _as_can_reexec=no; export _as_can_reexec # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } # Determine whether it's possible to make 'echo' print without a newline. # These variables are no longer used directly by Autoconf, but are AC_SUBSTed # for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac # For backward compatibility with old third-party macros, we provide # the shell variables $as_echo and $as_echo_n. New code should use # AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. as_echo='printf %s\n' as_echo_n='printf %s' rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" SHELL=${CONFIG_SHELL-/bin/sh} test -n "$DJDIR" || exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= # Identity of this package. PACKAGE_NAME='StarPU' PACKAGE_TARNAME='starpu' PACKAGE_VERSION='1.4.9' PACKAGE_STRING='StarPU 1.4.9' PACKAGE_BUGREPORT='starpu-devel@inria.fr' PACKAGE_URL='http://gitlab.inria.fr/starpu/starpu' ac_unique_file="include/starpu.h" # Factoring default headers for most tests. ac_includes_default="\ #include #ifdef HAVE_STDIO_H # include #endif #ifdef HAVE_STDLIB_H # include #endif #ifdef HAVE_STRING_H # include #endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_UNISTD_H # include #endif" ac_header_c_list= ac_func_c_list= ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS STARPU_SANITIZE_FALSE STARPU_SANITIZE_TRUE SOCL_VENDORS LIBSTARPU_LINK STARPUPY_EXTRA_LINK_ARGS STARPU_EXPORTED_LIBS LIBSTARPU_LDFLAGS STARPU_NVCC_H_CPPFLAGS STARPU_H_CPPFLAGS STARPU_OPTION_LIBS STARPU_MODULE_LIBS STARPU_LIB_PATH STARPU_INCLUDE_PATH ECLIPSE STARPU_BUILD_ECLIPSE_PLUGIN_FALSE STARPU_BUILD_ECLIPSE_PLUGIN_TRUE eclipsepath JULIA STARPU_USE_JULIA_FALSE STARPU_USE_JULIA_TRUE juliapath DOC_GENERATE_LATEX STARPU_AVAILABLE_DOC_PDF_FALSE STARPU_AVAILABLE_DOC_PDF_TRUE STARPU_BUILD_DOC_PDF_FALSE STARPU_BUILD_DOC_PDF_TRUE STARPU_AVAILABLE_DOC_FALSE STARPU_AVAILABLE_DOC_TRUE STARPU_BUILD_DOC_FALSE STARPU_BUILD_DOC_TRUE epstopdfcommand pdflatexcommand doxygencommand STARPU_STARPUPY_NUMPY_FALSE STARPU_STARPUPY_NUMPY_TRUE STARPU_BUILD_STARPUPY_FALSE STARPU_BUILD_STARPUPY_TRUE PYTHON_SETUP_OPTIONS PYTHON_NUMPY_DIR PYTHON_VERSION PYTHON STARPU_BUILD_STARPURM_EXAMPLES_FALSE STARPU_BUILD_STARPURM_EXAMPLES_TRUE STARPU_BUILD_STARPURM_FALSE STARPU_BUILD_STARPURM_TRUE STARPURM_HAVE_DLB_FALSE STARPURM_HAVE_DLB_TRUE DLB_LIBS DLB_CFLAGS STARPU_HAVE_AM111_FALSE STARPU_HAVE_AM111_TRUE STARPU_HAVE_HELP2MAN_FALSE STARPU_HAVE_HELP2MAN_TRUE HELP2MAN STARPU_HAVE_ICC_FALSE STARPU_HAVE_ICC_TRUE ICC ICC_ARGS STARPU_HAVE_F77_H_FALSE STARPU_HAVE_F77_H_TRUE STARPU_HAVE_F77_H HWLOC_REQUIRES STARPU_HWLOC_HAVE_TOPOLOGY_DUP_FALSE STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE STARPU_HAVE_HWLOC STARPU_HAVE_HWLOC_FALSE STARPU_HAVE_HWLOC_TRUE HWLOC_LIBS HWLOC_CFLAGS STARPU_BUILD_STARPUFFT_EXAMPLES_FALSE STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE STARPU_BUILD_STARPUFFT_FALSE STARPU_BUILD_STARPUFFT_TRUE STARPU_HAVE_FFTWL_FALSE STARPU_HAVE_FFTWL_TRUE HAVE_FFTWFL FFTWL_LIBS FFTWL_CFLAGS STARPU_HAVE_FFTWF_FALSE STARPU_HAVE_FFTWF_TRUE STARPU_HAVE_FFTWF FFTWF_LIBS FFTWF_CFLAGS STARPU_HAVE_FFTW_FALSE STARPU_HAVE_FFTW_TRUE STARPU_HAVE_FFTW FFTW_LIBS FFTW_CFLAGS STARPU_USE_MIN_DGELS_FALSE STARPU_USE_MIN_DGELS_TRUE DGELS_LIBS STARPU_LAPACK_LDFLAGS BLAS_LIB STARPU_NO_BLAS_LIB_FALSE STARPU_NO_BLAS_LIB_TRUE STARPU_SYSTEM_BLAS_LIB_FALSE STARPU_SYSTEM_BLAS_LIB_TRUE STARPU_MKL_BLAS_LIB_FALSE STARPU_MKL_BLAS_LIB_TRUE STARPU_GOTO_BLAS_LIB_FALSE STARPU_GOTO_BLAS_LIB_TRUE STARPU_ATLAS_BLAS_LIB_FALSE STARPU_ATLAS_BLAS_LIB_TRUE STARPU_HAVE_CBLAS_SGEMV_FALSE STARPU_HAVE_CBLAS_SGEMV_TRUE STARPU_HAVE_LIBLAPACK_FALSE STARPU_HAVE_LIBLAPACK_TRUE STARPU_LIBLAPACK_LDFLAGS STARPU_HAVE_CBLAS_H_FALSE STARPU_HAVE_CBLAS_H_TRUE BLAS_LIBS BLAS_OPENBLAS_LIBS BLAS_OPENBLAS_CFLAGS STARPU_OPENBLAS STARPU_OPENBLAS_LDFLAGS OPENBLAS_LIBS OPENBLAS_CFLAGS ATLASDIR STARPU_BLAS_LDFLAGS GOTODIR STARPU_HAVE_X11_FALSE STARPU_HAVE_X11_TRUE X_EXTRA_LIBS X_LIBS X_PRE_LIBS X_CFLAGS XMKMF STARPU_HAVE_OPENGL_FALSE STARPU_HAVE_OPENGL_TRUE STARPU_OPENGL_RENDER STARPU_OPENGL_RENDER_LDFLAGS STARPU_BUILD_EXAMPLES_FALSE STARPU_BUILD_EXAMPLES_TRUE STARPU_BUILD_TESTS_FALSE STARPU_BUILD_TESTS_TRUE GDB SOCL_OCL_LIB_OPENCL_DIR STARPU_USE_SOCL_FALSE STARPU_USE_SOCL_TRUE STARPU_BUILD_SOCL_FALSE STARPU_BUILD_SOCL_TRUE STARPU_HAVE_OPENMP_FALSE STARPU_HAVE_OPENMP_TRUE STARPU_OPENMP_FALSE STARPU_OPENMP_TRUE STARPU_OPENMP_LLVM_FALSE STARPU_OPENMP_LLVM_TRUE STARPU_PARALLEL_WORKER_FALSE STARPU_PARALLEL_WORKER_TRUE OPENMP_CFLAGS pkglibdir GLOBAL_AM_FCFLAGS GLOBAL_AM_FFLAGS GLOBAL_AM_CXXFLAGS GLOBAL_AM_CFLAGS STARPU_DEVEL_FALSE STARPU_DEVEL_TRUE STARPU_EXPORT_DYNAMIC STARPU_USE_MP_FALSE STARPU_USE_MP_TRUE STARPU_HAVE_LEVELDB_FALSE STARPU_HAVE_LEVELDB_TRUE STARPU_LEVELDB_LDFLAGS STARPU_FXT_EVENT_DEFINES STARPU_USE_AYUDAME2_FALSE STARPU_USE_AYUDAME2_TRUE STARPU_USE_AYUDAME1_FALSE STARPU_USE_AYUDAME1_TRUE STARPU_GLPK_LDFLAGS STARPU_PERF_DEBUG PAPI_LIBS PAPI_CFLAGS STARPU_USE_FXT_FALSE STARPU_USE_FXT_TRUE STARPU_USE_FXT POTI_LIBS POTI_CFLAGS FXT_LDFLAGS FXT_LIBS FXT_CFLAGS FXTDIR STARPU_COVERITY_FALSE STARPU_COVERITY_TRUE STARPU_COVERAGE_ENABLED_FALSE STARPU_COVERAGE_ENABLED_TRUE COVERAGE STARPU_HAVE_MPIFORT_FALSE STARPU_HAVE_MPIFORT_TRUE STARPU_HAVE_F77_FALSE STARPU_HAVE_F77_TRUE STARPU_HAVE_FC_FALSE STARPU_HAVE_FC_TRUE MPIFORT mpifort_path STARPU_USE_MAX_FPGA_FALSE STARPU_USE_MAX_FPGA_TRUE STARPU_USE_MAX_FPGA SLIC_CONFIG STARPU_OPENCL_LDFLAGS STARPU_OPENCL_CPPFLAGS STARPU_OPENCL_DATAdir STARPU_USE_OPENCL_FALSE STARPU_USE_OPENCL_TRUE STARPU_USE_OPENCL HIPCCFLAGS STARPU_USE_HIP_FALSE STARPU_USE_HIP_TRUE STARPU_HIP_CPPFLAGS STARPU_HIP_LDFLAGS HIPCC STARPU_USE_HIPBLAS_FALSE STARPU_USE_HIPBLAS_TRUE STARPU_USE_HIPBLAS HIPCONFIG NVCCFLAGS NVCC_CC STARPU_USE_CUDA1_FALSE STARPU_USE_CUDA1_TRUE STARPU_USE_CUDA0_FALSE STARPU_USE_CUDA0_TRUE STARPU_CUDA_CPPFLAGS STARPU_CUFFT_LDFLAGS STARPU_CUDA_LDFLAGS STARPU_CURAND_LDFLAGS STARPU_HAVE_CUFFTDOUBLECOMPLEX_FALSE STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE STARPU_HAVE_MAGMA_FALSE STARPU_HAVE_MAGMA_TRUE STARPU_HAVE_MAGMA MAGMA_LIBS MAGMA_CFLAGS CC_OR_NVCC STARPU_CUDA_FORTRAN_LDFLAGS STARPU_USE_CUDA_FALSE STARPU_USE_CUDA_TRUE STARPU_USE_CUDA NVCC STARPU_USE_CPU_FALSE STARPU_USE_CPU_TRUE STARPU_USE_CPU STARPU_SC_HYPERVISOR_DEBUG_FALSE STARPU_SC_HYPERVISOR_DEBUG_TRUE STARPU_SC_HYPERVISOR_DEBUG STARPU_USE_SC_HYPERVISOR_FALSE STARPU_USE_SC_HYPERVISOR_TRUE STARPU_BUILD_SC_HYPERVISOR_FALSE STARPU_BUILD_SC_HYPERVISOR_TRUE STARPU_SC_HYPERVISOR STARPU_LIBNUMA_LDFLAGS STARPU_HAVE_HDF5_FALSE STARPU_HAVE_HDF5_TRUE STARPU_HDF5_LDFLAGS STARPU_NEW_CHECK_FALSE STARPU_NEW_CHECK_TRUE STARPU_LONG_CHECK_FALSE STARPU_LONG_CHECK_TRUE STARPU_QUICK_CHECK_FALSE STARPU_QUICK_CHECK_TRUE STARPU_SRC_DIR STARPU_BUILD_DIR STARPU_MS_LIB_ARCH STARPU_OPENBSD_SYS_FALSE STARPU_OPENBSD_SYS_TRUE STARPU_HAVE_DARWIN_FALSE STARPU_HAVE_DARWIN_TRUE STARPU_LINUX_SYS_FALSE STARPU_LINUX_SYS_TRUE STARPU_HAVE_WINDOWS_FALSE STARPU_HAVE_WINDOWS_TRUE STARPU_HAVE_MS_LIB_FALSE STARPU_HAVE_MS_LIB_TRUE STARPU_MS_LIB hwloccalccommand MPICC_LDFLAGS MPIEXEC_ARGS STARPU_USE_MPI_FT_STATS_FALSE STARPU_USE_MPI_FT_STATS_TRUE STARPU_USE_MPI_FT_FALSE STARPU_USE_MPI_FT_TRUE STARPU_USE_MPI_FALSE STARPU_USE_MPI_TRUE STARPU_USE_MPI_NMAD_FALSE STARPU_USE_MPI_NMAD_TRUE STARPU_USE_MPI_MPI_FALSE STARPU_USE_MPI_MPI_TRUE STARPU_MPI_SYNC_CLOCKS_FALSE STARPU_MPI_SYNC_CLOCKS_TRUE MPI_SYNC_CLOCKS_LIBS MPI_SYNC_CLOCKS_CFLAGS STARPU_MPI_CHECK_FALSE STARPU_MPI_CHECK_TRUE STARPU_USE_TCPIP_MASTER_SLAVE_FALSE STARPU_USE_TCPIP_MASTER_SLAVE_TRUE STARPU_USE_MPI_MASTER_SLAVE_FALSE STARPU_USE_MPI_MASTER_SLAVE_TRUE NMAD_LIBS NMAD_CFLAGS CC_OR_MPICC STARPU_MPI_MINIMAL_TESTS_FALSE STARPU_MPI_MINIMAL_TESTS_TRUE MPIEXEC mpiexec_path MPICXX mpicxx_path MPICC mpicc_path STARPU_CROSS_COMPILING_FALSE STARPU_CROSS_COMPILING_TRUE gitcommand REALBASH CXXCPP LT_SYS_LIBRARY_PATH OTOOL64 OTOOL LIPO NMEDIT DSYMUTIL MANIFEST_TOOL RANLIB FILECMD NM ac_ct_DUMPBIN DUMPBIN LD FGREP LIBTOOL OBJDUMP DLLTOOL AS STARPU_HAVE_CXX11_FALSE STARPU_HAVE_CXX11_TRUE STARPU_HAVE_CXX11 HAVE_CXX11 SIMGRID_LDFLAGS STARPU_SIMGRID_FALSE STARPU_SIMGRID_TRUE STARPU_SIMGRID_MC_FALSE STARPU_SIMGRID_MC_TRUE SIMGRID_MC SIMGRID_LIBS SIMGRID_CFLAGS STARPU_BUBBLE_FALSE STARPU_BUBBLE_TRUE APP_FCFLAGS APP_FFLAGS APP_CXXFLAGS APP_CFLAGS PKG_CONFIG HAVE_PARALLEL_FALSE HAVE_PARALLEL_TRUE PARALLEL PROG_CLANG PROG_FIND PROG_DATE PROG_STAT EGREP GREP ac_ct_FC FCFLAGS FC ac_ct_F77 FFLAGS F77 LN_S SED CPP am__fastdepCXX_FALSE am__fastdepCXX_TRUE CXXDEPMODE ac_ct_CXX CXXFLAGS CXX am__fastdepCC_FALSE am__fastdepCC_TRUE CCDEPMODE am__nodep AMDEPBACKSLASH AMDEP_FALSE AMDEP_TRUE am__include DEPDIR OBJEXT EXEEXT ac_ct_CC CPPFLAGS LDFLAGS CFLAGS CC ac_ct_AR AR AM_BACKSLASH AM_DEFAULT_VERBOSITY AM_DEFAULT_V AM_V CSCOPE ETAGS CTAGS am__untar am__tar AMTAR am__leading_dot SET_MAKE AWK mkdir_p MKDIR_P INSTALL_STRIP_PROGRAM STRIP install_sh MAKEINFO AUTOHEADER AUTOMAKE AUTOCONF ACLOCAL VERSION PACKAGE CYGPATH_W am__isrc INSTALL_DATA INSTALL_SCRIPT INSTALL_PROGRAM target_os target_vendor target_cpu target host_os host_vendor host_cpu host build_os build_vendor build_cpu build LIBSTARPUJULIA_INTERFACE_AGE LIBSTARPUJULIA_INTERFACE_REVISION LIBSTARPUJULIA_INTERFACE_CURRENT LIBSOCL_INTERFACE_AGE LIBSOCL_INTERFACE_REVISION LIBSOCL_INTERFACE_CURRENT LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT LIBSTARPURM_INTERFACE_AGE LIBSTARPURM_INTERFACE_REVISION LIBSTARPURM_INTERFACE_CURRENT LIBSTARPUFFT_INTERFACE_AGE LIBSTARPUFFT_INTERFACE_REVISION LIBSTARPUFFT_INTERFACE_CURRENT LIBSTARPUMPI_INTERFACE_AGE LIBSTARPUMPI_INTERFACE_REVISION LIBSTARPUMPI_INTERFACE_CURRENT LIBSTARPU_INTERFACE_AGE LIBSTARPU_INTERFACE_REVISION LIBSTARPU_INTERFACE_CURRENT STARPU_EFFECTIVE_VERSION STARPU_RELEASE_VERSION STARPU_MINOR_VERSION STARPU_MAJOR_VERSION target_alias host_alias build_alias LIBS ECHO_T ECHO_N ECHO_C DEFS mandir localedir libdir psdir pdfdir dvidir htmldir infodir docdir oldincludedir includedir runstatedir localstatedir sharedstatedir sysconfdir datadir datarootdir libexecdir sbindir bindir program_transform_name prefix exec_prefix PACKAGE_URL PACKAGE_BUGREPORT PACKAGE_STRING PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR SHELL am__quote' ac_subst_files='' ac_user_opts=' enable_option_checking enable_silent_rules enable_dependency_tracking enable_simgrid enable_starpupy enable_prof_tool enable_bubble enable_bubble_verbose enable_opencl_simulator with_simgrid_dir with_simgrid_include_dir with_simgrid_lib_dir enable_simgrid_mc enable_blocking_drivers enable_worker_callbacks enable_shared enable_static with_pic enable_fast_install with_aix_soname with_gnu_ld with_sysroot enable_libtool_lock with_mpicc with_mpicxx with_smpirun with_mpiexec enable_mpi enable_mpi_minimal_tests enable_nmad enable_mpi_master_slave enable_maxmpidev enable_tcpip_master_slave enable_maxtcpipdev enable_mpi_pedantic_isend enable_mpi_check enable_mpi_ft enable_mpi_ft_stats with_mpiexec_args enable_mpi_verbose enable_maxnumanodes enable_native_winthreads enable_default_drand48 enable_quick_check enable_long_check enable_new_check enable_valgrind enable_hdf5 with_hdf5_include_dir with_hdf5_lib_dir enable_max_sched_ctxs enable_sc_hypervisor enable_sc_hypervisor_debug enable_maxcpus enable_cpu enable_maxcudadev enable_cuda with_cuda_dir with_cuda_include_dir with_cuda_lib_dir enable_cuda_memcpy_peer enable_cuda_map enable_cuda0 enable_cuda1 enable_maxhipdev enable_hip with_hipblas enable_hip_memcpy_peer enable_maxopencldev enable_opencl with_opencl_dir with_opencl_include_dir with_opencl_lib_dir enable_maxmaxfpgadev enable_max_fpga enable_asynchronous_copy enable_asynchronous_cuda_copy enable_asynchronous_opencl_copy enable_asynchronous_mpi_master_slave_copy enable_asynchronous_tcpip_master_slave_copy enable_asynchronous_max_fpga_copy enable_fortran with_mpifort enable_debug enable_spinlock_check enable_fstack_protector_all enable_gdb enable_full_gdb_information enable_fast enable_verbose enable_coverage enable_coverity enable_fxt with_fxt enable_poti enable_fxt_lock enable_papi enable_perf_debug enable_model_debug enable_memory_stats enable_glpk with_ayudame1_include_dir with_ayudame2_include_dir enable_ayudame1 enable_ayudame2 enable_data_locality_enforce enable_maxbuffers enable_fxt_max_files enable_maxnodes enable_allocation_cache with_perf_model_dir enable_maximplementations enable_leveldb enable_calibration_heuristic enable_export_dynamic with_check_flags enable_parallel_worker enable_openmp enable_openmp_llvm enable_socl enable_build_tests enable_build_examples enable_opengl_render with_x enable_blas_lib with_goto_dir with_atlas_dir with_mkl_cflags with_mkl_ldflags with_armpl_cflags with_armpl_ldflags enable_mlr enable_mlr_system_blas enable_starpufft enable_starpufft_examples with_hwloc with_icc with_icc_args enable_icc enable_starpurm enable_starpurm_verbose enable_dlb with_dlb_include_dir with_dlb_lib_dir enable_starpurm_dlb_verbose enable_starpurm_examples enable_build_doc enable_build_doc_pdf enable_julia enable_eclipse_plugin ' ac_precious_vars='build_alias host_alias target_alias CC CFLAGS LDFLAGS LIBS CPPFLAGS CXX CXXFLAGS CCC CPP F77 FFLAGS FC FCFLAGS PKG_CONFIG SIMGRID_CFLAGS SIMGRID_LIBS LT_SYS_LIBRARY_PATH CXXCPP NMAD_CFLAGS NMAD_LIBS MPI_SYNC_CLOCKS_CFLAGS MPI_SYNC_CLOCKS_LIBS STARPU_MS_LIB MAGMA_CFLAGS MAGMA_LIBS NVCC NVCC_CC NVCCFLAGS HIPCCFLAGS FXT_CFLAGS FXT_LIBS FXT_LDFLAGS POTI_CFLAGS POTI_LIBS PAPI_CFLAGS PAPI_LIBS XMKMF OPENBLAS_CFLAGS OPENBLAS_LIBS BLAS_OPENBLAS_CFLAGS BLAS_OPENBLAS_LIBS BLAS_LIBS DGELS_LIBS FFTW_CFLAGS FFTW_LIBS FFTWF_CFLAGS FFTWF_LIBS FFTWL_CFLAGS FFTWL_LIBS HWLOC_CFLAGS HWLOC_LIBS PYTHON' # Initialize some variables set by options. ac_init_help= ac_init_version=false ac_unrecognized_opts= ac_unrecognized_sep= # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' runstatedir='${localstatedir}/run' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *=) ac_optarg= ;; *) ac_optarg=yes ;; esac case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -runstatedir | --runstatedir | --runstatedi | --runstated \ | --runstate | --runstat | --runsta | --runst | --runs \ | --run | --ru | --r) ac_prev=runstatedir ;; -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ | --run=* | --ru=* | --r=*) runstatedir=$ac_optarg ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=\$ac_optarg ;; -without-* | --without-*) ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) as_fn_error $? "unrecognized option: \`$ac_option' Try \`$0 --help' for more information" ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. case $ac_envvar in #( '' | [0-9]* | *[!_$as_cr_alnum]* ) as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; esac eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` as_fn_error $? "missing argument to $ac_option" fi if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi # Check all directory arguments for consistency. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir runstatedir do eval ac_val=\$$ac_var # Remove trailing slashes. case $ac_val in */ ) ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` eval $ac_var=\$ac_val;; esac # Be sure to have absolute directory names. case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || as_fn_error $? "working directory cannot be determined" test "X$ac_ls_di" = "X$ac_pwd_ls_di" || as_fn_error $? "pwd does not report name of working directory" # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$as_myself" || $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || printf "%s\n" X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures StarPU 1.4.9 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking ...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/starpu] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF Program names: --program-prefix=PREFIX prepend PREFIX to installed program names --program-suffix=SUFFIX append SUFFIX to installed program names --program-transform-name=PROGRAM run sed PROGRAM on installed program names X features: --x-includes=DIR X include files are in DIR --x-libraries=DIR X library files are in DIR System types: --build=BUILD configure for building on BUILD [guessed] --host=HOST cross-compile to build programs to run on HOST [BUILD] --target=TARGET configure for building compilers for TARGET [HOST] _ACEOF fi if test -n "$ac_init_help"; then case $ac_init_help in short | recursive ) echo "Configuration of StarPU 1.4.9:";; esac cat <<\_ACEOF Optional Features: --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --enable-silent-rules less verbose build output (undo: "make V=1") --disable-silent-rules verbose build output (undo: "make V=0") --enable-dependency-tracking do not reject slow dependency extractors --disable-dependency-tracking speeds up one-time build --enable-simgrid Enable simulating execution in simgrid --enable-starpupy enable StarPU python interface --enable-prof-tool enable profiling tool --enable-bubble build the hierarchical dags (a.k.a bubble) support --enable-bubble-verbose display verbose bubble messages --enable-opencl-simulator Enable the use of an OpenCL simulator --enable-simgrid-mc Enable using Model Checker of simgrid --enable-blocking-drivers enable blocking drivers --enable-worker-callbacks enable worker callbacks --enable-shared[=PKGS] build shared libraries [default=yes] --enable-static[=PKGS] build static libraries [default=yes] --enable-fast-install[=PKGS] optimize for fast installation [default=yes] --disable-libtool-lock avoid locking (might break parallel builds) --disable-mpi Disable StarPU MPI library generation --enable-mpi-minimal-tests Only enable a subset of MPI tests --enable-nmad Enable StarPU MPI library generation using the new madeleine backend --enable-mpi-master-slave Enable StarPU to run with the master-slave mode --enable-maxmpidev= maximum number of MPI master-slave devices --enable-tcpip-master-slave Enable StarPU to run with the master-slave mode --enable-maxtcpipdev= maximum number of TCP/IP master-slave devices --enable-mpi-pedantic-isend Prevent StarPU MPI from reading buffers while being sent over MPI --enable-mpi-check Enable execution of MPI testcases --enable-mpi-ft Enable failure tolerance mechanisms provided by StarPU --enable-mpi-ft-stats Enable stats for failure tolerance mechanisms --enable-mpi-verbose display MPI verbose debug messages (--enable-mpi-verbose=extra increase the verbosity) --enable-maxnumanodes= maximum number of NUMA nodes --enable-native-winthreads Use native windows threads instead of pthread --disable-default-drand48 Do not use the default version of drand48 --enable-quick-check Lower default values for the testcases run by make check to allow a faster execution --enable-long-check Enable some exhaustive checks which take a really long time --enable-new-check Enable new and known-to-fail testcases --disable-valgrind Do not check the availability of valgrind.h and helgrind.h --enable-hdf5 enable HDF5 support --enable-max-sched-ctxs= maximum number of sched_ctxs --enable-sc-hypervisor enable resizing contexts (experimental) --enable-sc-hypervisor-debug enable debug for resizing contexts (experimental) --enable-maxcpus= maximum number of CPUs --disable-cpu do not use the CPU(s) --enable-maxcudadev= maximum number of CUDA devices --disable-cuda do not use CUDA device(s) --disable-cuda-memcpy-peer do not allow peer transfers when using CUDA 4.0 --disable-cuda-map do not allow CUDA memory mapping when available --enable-cuda0 Enable the minimal-support CUDA driver (only for testing) --enable-cuda0 Enable the small-support CUDA driver (only for testing) --enable-maxhipdev= maximum number of HIP devices --enable-hip Enable the minimal-support HIP driver (only for testing) --disable-hip-memcpy-peer if you want to disable peer transfers when using hip --enable-maxopencldev= maximum number of OPENCL devices --disable-opencl do not use OpenCL device(s) --enable-maxmaxfpgadev= maximum number of Maxeler FPGA devices --disable-max-fpga disable support for Maxeler FPGA --disable-asynchronous-copy disable asynchronous copy between CPU and GPU --disable-asynchronous-cuda-copy disable asynchronous copy between CPU and CUDA devices --disable-asynchronous-opencl-copy disable asynchronous copy between CPU and OPENCL devices --disable-asynchronous-mpi-master-slave-copy disable asynchronous copy between MPI Master and MPI Slave devices --disable-asynchronous-tcpip-master-slave-copy disable asynchronous copy between TCP/IP Master and TCP/IP Slave devices --disable-asynchronous-max-fpga-copy disable asynchronous copy between CPU and Maxeler FPGA devices --disable-fortran disable build of fortran examples --enable-debug enable debug mode --enable-spinlock-check enable spinlock check --disable-fstack-protector-all disable GCC option -fstack-protector-all --disable-gdb disable gdb information --disable-full-gdb-information disable full gdb information --enable-fast do not enforce assertions --enable-verbose display verbose debug messages (--enable-verbose=extra increase the verbosity) --enable-coverage enable coverage checking --enable-coverity enable coverity mode --disable-fxt disable FxT trace mechanisms --enable-poti Enable the use of the POTI library to generate Paje traces --enable-fxt-lock enable additional locking systems FxT traces --disable-papi disable using papi --enable-perf-debug enable performance debugging through gprof --enable-model-debug enable performance model debugging --enable-memory-stats enable memory stats --disable-glpk disable using glpk for bound computation --disable-ayudame1 Do not use Ayudame lib version 1 --disable-ayudame2 Do not use Ayudame lib version 2 --enable-data-locality-enforce disable data locality enforcement --enable-maxbuffers= maximum number of buffers per task --enable-fxt-max-files= maximum number of mpi nodes for traces --enable-maxnodes= maximum number of memory nodes per MPI rank --disable-allocation-cache disable data allocation cache --enable-maximplementations= maximum number of implementations --enable-leveldb Enable linking with LevelDB if available --enable-calibration-heuristic= Define the maximum authorized deviation of StarPU history-based calibrator. --disable-export-dynamic Prevent the linker from adding all symbols to the dynamic symbol table --enable-parallel-worker build the parallel worker support --disable-openmp do not use OpenMP --enable-openmp-llvm build the OpenMP LLVM runtime support --enable-openmp build the OpenMP runtime support --enable-socl build the OpenCL interface (experimental) --disable-build-tests disable building of tests --disable-build-examples disable building of examples --enable-opengl-render enable OpenGL rendering of some examples --enable-blas-lib=blaslibname: none default: no BLAS lib is used atlas: use ATLAS library goto: use GotoBLAS library mkl: use MKL library (you may need to set specific CFLAGS and LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags) --enable-mlr Enable multiple linear regression models --enable-mlr-system-blas Make the multiple linear regression models use the system BLAS instead of min-dgels --disable-starpufft Disable build of StarPU-FFT --enable-starpufft-examples enable build of StarPU FFT examples --enable-icc Enable the compilation of specific ICC examples --enable-starpurm enable resource management support --enable-starpurm-verbose display resource management verbose debug messages --enable-dlb enable DLB support --enable-starpurm-dlb-verbose display resource management verbose debug messages --enable-starpurm-examples enable build of StarPU Resource Manager examples --disable-build-doc disable building of documentation --enable-build-doc-pdf enable building of PDF documentation --enable-julia enable the Julia extension --enable-eclipse-plugin Build the Eclipse plugin Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-simgrid-dir= specify SimGrid installation directory --with-simgrid-include-dir= specify where SimGrid headers are installed --with-simgrid-lib-dir= specify where SimGrid libraries are installed --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use both] --with-aix-soname=aix|svr4|both shared library versioning (aka "SONAME") variant to provide on AIX, [default=aix]. --with-gnu-ld assume the C compiler uses GNU ld [default=no] --with-sysroot[=DIR] Search for dependent libraries within DIR (or the compiler's sysroot if not specified). --with-mpicc= Name or path of the mpicc compiler --with-mpicxx= Name or path of the mpicxx/mpic++ compiler --with-smpirun= Name or path of the smpirun helper --with-mpiexec= Name or path of mpiexec --with-mpiexec-args= Arguments for mpiexec --with-hdf5-include-dir= specify where HDF5 headers are installed --with-hdf5-lib-dir= specify where HDF5 libraries are installed --with-cuda-dir= specify CUDA installation directory --with-cuda-include-dir= specify where CUDA headers are installed --with-cuda-lib-dir= specify where CUDA libraries are installed --with-hipblas= specify where hipblas is installed --with-opencl-dir= specify OpenCL installation directory --with-opencl-include-dir= specify where OpenCL headers are installed --with-opencl-lib-dir= specify where OpenCL libraries are installed --with-mpifort= Name or path of the mpifort compiler --with-fxt= specify FxT installation directory --with-ayudame1-include-dir= specify where Ayudame version 1 headers are installed --with-ayudame2-include-dir= specify where Ayudame version 2 headers are installed --with-perf-model-dir= specify where performance models should be stored --with-check-flags Specify flags for C and Fortran compilers --with-x use the X Window System --with-goto-dir= specify GotoBLAS lib location --with-atlas-dir= specify ATLAS lib location --with-mkl-cflags specify MKL compilation flags --with-mkl-ldflags specify MKL linking flags --with-armpl-cflags specify ARMPL compilation flags --with-armpl-ldflags specify ARMPL linking flags --without-hwloc Disable hwloc (enabled by default) --with-icc= Name or path of the icc compiler --with-icc-args= Arguments for icc --with-dlb-include-dir= specify where DLB headers are installed --with-dlb-lib-dir= specify where DLB libraries are installed Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CXX C++ compiler command CXXFLAGS C++ compiler flags CPP C preprocessor F77 Fortran 77 compiler command FFLAGS Fortran 77 compiler flags FC Fortran compiler command FCFLAGS Fortran compiler flags PKG_CONFIG path to pkg-config utility SIMGRID_CFLAGS C compiler flags for SIMGRID, overriding pkg-config SIMGRID_LIBS linker flags for SIMGRID, overriding pkg-config LT_SYS_LIBRARY_PATH User-defined run-time library search path. CXXCPP C++ preprocessor NMAD_CFLAGS C compiler flags for NMAD, overriding pkg-config NMAD_LIBS linker flags for NMAD, overriding pkg-config MPI_SYNC_CLOCKS_CFLAGS C compiler flags for MPI_SYNC_CLOCKS, overriding pkg-config MPI_SYNC_CLOCKS_LIBS linker flags for MPI_SYNC_CLOCKS, overriding pkg-config STARPU_MS_LIB Path to Microsoft's Visual Studio `lib' tool MAGMA_CFLAGS C compiler flags for MAGMA, overriding pkg-config MAGMA_LIBS linker flags for MAGMA, overriding pkg-config NVCC CUDA compiler NVCC_CC C compiler for CUDA compiler NVCCFLAGS CUDA compiler flags HIPCCFLAGS HIP compiler flags FXT_CFLAGS C compiler flags for FXT, overriding pkg-config FXT_LIBS linker flags for FXT, overriding pkg-config FXT_LDFLAGS POTI_CFLAGS C compiler flags for POTI, overriding pkg-config POTI_LIBS linker flags for POTI, overriding pkg-config PAPI_CFLAGS C compiler flags for PAPI, overriding pkg-config PAPI_LIBS linker flags for PAPI, overriding pkg-config XMKMF Path to xmkmf, Makefile generator for X Window System OPENBLAS_CFLAGS C compiler flags for OPENBLAS, overriding pkg-config OPENBLAS_LIBS linker flags for OPENBLAS, overriding pkg-config BLAS_OPENBLAS_CFLAGS C compiler flags for BLAS_OPENBLAS, overriding pkg-config BLAS_OPENBLAS_LIBS linker flags for BLAS_OPENBLAS, overriding pkg-config BLAS_LIBS linker flags for blas DGELS_LIBS linker flags for lapack dgels FFTW_CFLAGS C compiler flags for FFTW, overriding pkg-config FFTW_LIBS linker flags for FFTW, overriding pkg-config FFTWF_CFLAGS C compiler flags for FFTWF, overriding pkg-config FFTWF_LIBS linker flags for FFTWF, overriding pkg-config FFTWL_CFLAGS C compiler flags for FFTWL, overriding pkg-config FFTWL_LIBS linker flags for FFTWL, overriding pkg-config HWLOC_CFLAGS C compiler flags for HWLOC, overriding pkg-config HWLOC_LIBS linker flags for HWLOC, overriding pkg-config PYTHON Python3 interpreter Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to . StarPU home page: . _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for configure.gnu first; this name is used for a wrapper for # Metaconfig's "Configure" on case-insensitive file systems. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF StarPU configure 1.4.9 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi ## ------------------------ ## ## Autoconf initialization. ## ## ------------------------ ## # ac_fn_c_try_compile LINENO # -------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest.beam if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_compile # ac_fn_cxx_try_compile LINENO # ---------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_cxx_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest.beam if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_cxx_try_compile # ac_fn_c_try_cpp LINENO # ---------------------- # Try to preprocess conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_cpp () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err } then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_cpp # ac_fn_f77_try_compile LINENO # ---------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_f77_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest.beam if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_f77_try_compile # ac_fn_fc_try_compile LINENO # --------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_fc_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest.beam if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_fc_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_fc_try_compile # ac_fn_c_try_link LINENO # ----------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext } then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_link # ac_fn_cxx_try_link LINENO # ------------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_cxx_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext } then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_cxx_try_link # ac_fn_f77_try_link LINENO # ------------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_f77_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext } then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_f77_try_link # ac_fn_fc_try_link LINENO # ------------------------ # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_fc_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_fc_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext } then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_fc_try_link # ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists and can be compiled using the include files in # INCLUDES, setting the cache variable VAR accordingly. ac_fn_c_check_header_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 printf %s "checking for $2... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO" then : eval "$3=yes" else $as_nop eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_compile # ac_fn_c_check_func LINENO FUNC VAR # ---------------------------------- # Tests whether FUNC exists, setting the cache variable VAR accordingly ac_fn_c_check_func () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 printf %s "checking for $2... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Define $2 to an innocuous variant, in case declares $2. For example, HP-UX 11i declares gettimeofday. */ #define $2 innocuous_$2 /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $2 (); below. */ #include #undef $2 /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char $2 (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ #if defined __stub_$2 || defined __stub___$2 choke me #endif int main (void) { return $2 (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : eval "$3=yes" else $as_nop eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_func # ac_fn_c_try_run LINENO # ---------------------- # Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that # executables *can* be run. ac_fn_c_try_run () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; } then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: program exited with status $ac_status" >&5 printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=$ac_status fi rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_run # ac_fn_c_check_type LINENO TYPE VAR INCLUDES # ------------------------------------------- # Tests whether TYPE exists after having included INCLUDES, setting cache # variable VAR accordingly. ac_fn_c_check_type () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 printf %s "checking for $2... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 else $as_nop eval "$3=no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main (void) { if (sizeof ($2)) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main (void) { if (sizeof (($2))) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : else $as_nop eval "$3=yes" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_type # ac_fn_check_decl LINENO SYMBOL VAR INCLUDES EXTRA-OPTIONS FLAG-VAR # ------------------------------------------------------------------ # Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR # accordingly. Pass EXTRA-OPTIONS to the compiler, using FLAG-VAR. ac_fn_check_decl () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack as_decl_name=`echo $2|sed 's/ *(.*//'` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 printf %s "checking whether $as_decl_name is declared... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 else $as_nop as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` eval ac_save_FLAGS=\$$6 as_fn_append $6 " $5" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main (void) { #ifndef $as_decl_name #ifdef __cplusplus (void) $as_decl_use; #else (void) $as_decl_name; #endif #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : eval "$3=yes" else $as_nop eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext eval $6=\$ac_save_FLAGS fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_check_decl # ac_fn_cxx_try_cpp LINENO # ------------------------ # Try to preprocess conftest.$ac_ext, and return whether this succeeded. ac_fn_cxx_try_cpp () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || test ! -s conftest.err } then : ac_retval=0 else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_cxx_try_cpp # ac_fn_c_compute_int LINENO EXPR VAR INCLUDES # -------------------------------------------- # Tries to find the compile-time value of EXPR in a program that includes # INCLUDES, setting VAR accordingly. Returns whether the value could be # computed ac_fn_c_compute_int () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if test "$cross_compiling" = yes; then # Depending upon the size, compute the lo and hi bounds. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main (void) { static int test_array [1 - 2 * !(($2) >= 0)]; test_array [0] = 0; return test_array [0]; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_lo=0 ac_mid=0 while :; do cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main (void) { static int test_array [1 - 2 * !(($2) <= $ac_mid)]; test_array [0] = 0; return test_array [0]; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_hi=$ac_mid; break else $as_nop as_fn_arith $ac_mid + 1 && ac_lo=$as_val if test $ac_lo -le $ac_mid; then ac_lo= ac_hi= break fi as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext done else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main (void) { static int test_array [1 - 2 * !(($2) < 0)]; test_array [0] = 0; return test_array [0]; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_hi=-1 ac_mid=-1 while :; do cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main (void) { static int test_array [1 - 2 * !(($2) >= $ac_mid)]; test_array [0] = 0; return test_array [0]; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_lo=$ac_mid; break else $as_nop as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val if test $ac_mid -le $ac_hi; then ac_lo= ac_hi= break fi as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext done else $as_nop ac_lo= ac_hi= fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext # Binary search between lo and hi bounds. while test "x$ac_lo" != "x$ac_hi"; do as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main (void) { static int test_array [1 - 2 * !(($2) <= $ac_mid)]; test_array [0] = 0; return test_array [0]; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_hi=$ac_mid else $as_nop as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext done case $ac_lo in #(( ?*) eval "$3=\$ac_lo"; ac_retval=0 ;; '') ac_retval=1 ;; esac else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 static long int longval (void) { return $2; } static unsigned long int ulongval (void) { return $2; } #include #include int main (void) { FILE *f = fopen ("conftest.val", "w"); if (! f) return 1; if (($2) < 0) { long int i = longval (); if (i != ($2)) return 1; fprintf (f, "%ld", i); } else { unsigned long int i = ulongval (); if (i != ($2)) return 1; fprintf (f, "%lu", i); } /* Do not output a trailing newline, as this causes \r\n confusion on some platforms. */ return ferror (f) || fclose (f) != 0; ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO" then : echo >>conftest.val; read $3 &5 printf %s "checking for $2.$3... " >&6; } if eval test \${$4+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $5 int main (void) { static $2 ac_aggr; if (ac_aggr.$3) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : eval "$4=yes" else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $5 int main (void) { static $2 ac_aggr; if (sizeof ac_aggr.$3) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : eval "$4=yes" else $as_nop eval "$4=no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi eval ac_res=\$$4 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_member # ac_fn_cxx_check_header_compile LINENO HEADER VAR INCLUDES # --------------------------------------------------------- # Tests whether HEADER exists and can be compiled using the include files in # INCLUDES, setting the cache variable VAR accordingly. ac_fn_cxx_check_header_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 printf %s "checking for $2... " >&6; } if eval test \${$3+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : eval "$3=yes" else $as_nop eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi eval ac_res=\$$3 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_cxx_check_header_compile ac_configure_args_raw= for ac_arg do case $ac_arg in *\'*) ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac as_fn_append ac_configure_args_raw " '$ac_arg'" done case $ac_configure_args_raw in *$as_nl*) ac_safe_unquote= ;; *) ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. ac_unsafe_a="$ac_unsafe_z#~" ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; esac cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by StarPU $as_me 1.4.9, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac printf "%s\n" "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; 2) as_fn_append ac_configure_args1 " '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi as_fn_append ac_configure_args " '$ac_arg'" ;; esac done done { ac_configure_args0=; unset ac_configure_args0;} { ac_configure_args1=; unset ac_configure_args1;} # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Sanitize IFS. IFS=" "" $as_nl" # Save into config.log some information that might help in debugging. { echo printf "%s\n" "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo printf "%s\n" "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then printf "%s\n" "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then printf "%s\n" "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo cat confdefs.h echo fi test "$ac_signal" != 0 && printf "%s\n" "$as_me: caught signal $ac_signal" printf "%s\n" "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h printf "%s\n" "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. if test -n "$CONFIG_SITE"; then ac_site_files="$CONFIG_SITE" elif test "x$prefix" != xNONE; then ac_site_files="$prefix/share/config.site $prefix/etc/config.site" else ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi for ac_site_file in $ac_site_files do case $ac_site_file in #( */*) : ;; #( *) : ac_site_file=./$ac_site_file ;; esac if test -f "$ac_site_file" && test -r "$ac_site_file"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 printf "%s\n" "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 printf "%s\n" "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Test code for whether the C compiler supports C89 (global declarations) ac_c_conftest_c89_globals=' /* Does the compiler advertise C89 conformance? Do not test the value of __STDC__, because some compilers set it to 0 while being otherwise adequately conformant. */ #if !defined __STDC__ # error "Compiler does not advertise C89 conformance" #endif #include #include struct stat; /* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ struct buf { int x; }; struct buf * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not \xHH hex character constants. These do not provoke an error unfortunately, instead are silently treated as an "x". The following induces an error, until -std is added to get proper ANSI mode. Curiously \x00 != x always comes out true, for an array size at least. It is necessary to write \x00 == 0 to get something that is true only with -std. */ int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) '\''x'\'' int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int), int, int);' # Test code for whether the C compiler supports C89 (body of main). ac_c_conftest_c89_main=' ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); ' # Test code for whether the C compiler supports C99 (global declarations) ac_c_conftest_c99_globals=' // Does the compiler advertise C99 conformance? #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L # error "Compiler does not advertise C99 conformance" #endif #include extern int puts (const char *); extern int printf (const char *, ...); extern int dprintf (int, const char *, ...); extern void *malloc (size_t); // Check varargs macros. These examples are taken from C99 6.10.3.5. // dprintf is used instead of fprintf to avoid needing to declare // FILE and stderr. #define debug(...) dprintf (2, __VA_ARGS__) #define showlist(...) puts (#__VA_ARGS__) #define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) static void test_varargs_macros (void) { int x = 1234; int y = 5678; debug ("Flag"); debug ("X = %d\n", x); showlist (The first, second, and third items.); report (x>y, "x is %d but y is %d", x, y); } // Check long long types. #define BIG64 18446744073709551615ull #define BIG32 4294967295ul #define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) #if !BIG_OK #error "your preprocessor is broken" #endif #if BIG_OK #else #error "your preprocessor is broken" #endif static long long int bignum = -9223372036854775807LL; static unsigned long long int ubignum = BIG64; struct incomplete_array { int datasize; double data[]; }; struct named_init { int number; const wchar_t *name; double average; }; typedef const char *ccp; static inline int test_restrict (ccp restrict text) { // See if C++-style comments work. // Iterate through items via the restricted pointer. // Also check for declarations in for loops. for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) continue; return 0; } // Check varargs and va_copy. static bool test_varargs (const char *format, ...) { va_list args; va_start (args, format); va_list args_copy; va_copy (args_copy, args); const char *str = ""; int number = 0; float fnumber = 0; while (*format) { switch (*format++) { case '\''s'\'': // string str = va_arg (args_copy, const char *); break; case '\''d'\'': // int number = va_arg (args_copy, int); break; case '\''f'\'': // float fnumber = va_arg (args_copy, double); break; default: break; } } va_end (args_copy); va_end (args); return *str && number && fnumber; } ' # Test code for whether the C compiler supports C99 (body of main). ac_c_conftest_c99_main=' // Check bool. _Bool success = false; success |= (argc != 0); // Check restrict. if (test_restrict ("String literal") == 0) success = true; char *restrict newvar = "Another string"; // Check varargs. success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234); test_varargs_macros (); // Check flexible array members. struct incomplete_array *ia = malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); ia->datasize = 10; for (int i = 0; i < ia->datasize; ++i) ia->data[i] = i * 1.234; // Check named initializers. struct named_init ni = { .number = 34, .name = L"Test wide string", .average = 543.34343, }; ni.number = 58; int dynamic_array[ni.number]; dynamic_array[0] = argv[0][0]; dynamic_array[ni.number - 1] = 543; // work around unused variable warnings ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\'' || dynamic_array[ni.number - 1] != 543); ' # Test code for whether the C compiler supports C11 (global declarations) ac_c_conftest_c11_globals=' // Does the compiler advertise C11 conformance? #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L # error "Compiler does not advertise C11 conformance" #endif // Check _Alignas. char _Alignas (double) aligned_as_double; char _Alignas (0) no_special_alignment; extern char aligned_as_int; char _Alignas (0) _Alignas (int) aligned_as_int; // Check _Alignof. enum { int_alignment = _Alignof (int), int_array_alignment = _Alignof (int[100]), char_alignment = _Alignof (char) }; _Static_assert (0 < -_Alignof (int), "_Alignof is signed"); // Check _Noreturn. int _Noreturn does_not_return (void) { for (;;) continue; } // Check _Static_assert. struct test_static_assert { int x; _Static_assert (sizeof (int) <= sizeof (long int), "_Static_assert does not work in struct"); long int y; }; // Check UTF-8 literals. #define u8 syntax error! char const utf8_literal[] = u8"happens to be ASCII" "another string"; // Check duplicate typedefs. typedef long *long_ptr; typedef long int *long_ptr; typedef long_ptr long_ptr; // Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1. struct anonymous { union { struct { int i; int j; }; struct { int k; long int l; } w; }; int m; } v1; ' # Test code for whether the C compiler supports C11 (body of main). ac_c_conftest_c11_main=' _Static_assert ((offsetof (struct anonymous, i) == offsetof (struct anonymous, w.k)), "Anonymous union alignment botch"); v1.i = 2; v1.w.k = 5; ok |= v1.i != 5; ' # Test code for whether the C compiler supports C11 (complete). ac_c_conftest_c11_program="${ac_c_conftest_c89_globals} ${ac_c_conftest_c99_globals} ${ac_c_conftest_c11_globals} int main (int argc, char **argv) { int ok = 0; ${ac_c_conftest_c89_main} ${ac_c_conftest_c99_main} ${ac_c_conftest_c11_main} return ok; } " # Test code for whether the C compiler supports C99 (complete). ac_c_conftest_c99_program="${ac_c_conftest_c89_globals} ${ac_c_conftest_c99_globals} int main (int argc, char **argv) { int ok = 0; ${ac_c_conftest_c89_main} ${ac_c_conftest_c99_main} return ok; } " # Test code for whether the C compiler supports C89 (complete). ac_c_conftest_c89_program="${ac_c_conftest_c89_globals} int main (int argc, char **argv) { int ok = 0; ${ac_c_conftest_c89_main} return ok; } " # Test code for whether the C++ compiler supports C++98 (global declarations) ac_cxx_conftest_cxx98_globals=' // Does the compiler advertise C++98 conformance? #if !defined __cplusplus || __cplusplus < 199711L # error "Compiler does not advertise C++98 conformance" #endif // These inclusions are to reject old compilers that // lack the unsuffixed header files. #include #include // and are *not* freestanding headers in C++98. extern void assert (int); namespace std { extern int strcmp (const char *, const char *); } // Namespaces, exceptions, and templates were all added after "C++ 2.0". using std::exception; using std::strcmp; namespace { void test_exception_syntax() { try { throw "test"; } catch (const char *s) { // Extra parentheses suppress a warning when building autoconf itself, // due to lint rules shared with more typical C programs. assert (!(strcmp) (s, "test")); } } template struct test_template { T const val; explicit test_template(T t) : val(t) {} template T add(U u) { return static_cast(u) + val; } }; } // anonymous namespace ' # Test code for whether the C++ compiler supports C++98 (body of main) ac_cxx_conftest_cxx98_main=' assert (argc); assert (! argv[0]); { test_exception_syntax (); test_template tt (2.0); assert (tt.add (4) == 6.0); assert (true && !false); } ' # Test code for whether the C++ compiler supports C++11 (global declarations) ac_cxx_conftest_cxx11_globals=' // Does the compiler advertise C++ 2011 conformance? #if !defined __cplusplus || __cplusplus < 201103L # error "Compiler does not advertise C++11 conformance" #endif namespace cxx11test { constexpr int get_val() { return 20; } struct testinit { int i; double d; }; class delegate { public: delegate(int n) : n(n) {} delegate(): delegate(2354) {} virtual int getval() { return this->n; }; protected: int n; }; class overridden : public delegate { public: overridden(int n): delegate(n) {} virtual int getval() override final { return this->n * 2; } }; class nocopy { public: nocopy(int i): i(i) {} nocopy() = default; nocopy(const nocopy&) = delete; nocopy & operator=(const nocopy&) = delete; private: int i; }; // for testing lambda expressions template Ret eval(Fn f, Ret v) { return f(v); } // for testing variadic templates and trailing return types template auto sum(V first) -> V { return first; } template auto sum(V first, Args... rest) -> V { return first + sum(rest...); } } ' # Test code for whether the C++ compiler supports C++11 (body of main) ac_cxx_conftest_cxx11_main=' { // Test auto and decltype auto a1 = 6538; auto a2 = 48573953.4; auto a3 = "String literal"; int total = 0; for (auto i = a3; *i; ++i) { total += *i; } decltype(a2) a4 = 34895.034; } { // Test constexpr short sa[cxx11test::get_val()] = { 0 }; } { // Test initializer lists cxx11test::testinit il = { 4323, 435234.23544 }; } { // Test range-based for int array[] = {9, 7, 13, 15, 4, 18, 12, 10, 5, 3, 14, 19, 17, 8, 6, 20, 16, 2, 11, 1}; for (auto &x : array) { x += 23; } } { // Test lambda expressions using cxx11test::eval; assert (eval ([](int x) { return x*2; }, 21) == 42); double d = 2.0; assert (eval ([&](double x) { return d += x; }, 3.0) == 5.0); assert (d == 5.0); assert (eval ([=](double x) mutable { return d += x; }, 4.0) == 9.0); assert (d == 5.0); } { // Test use of variadic templates using cxx11test::sum; auto a = sum(1); auto b = sum(1, 2); auto c = sum(1.0, 2.0, 3.0); } { // Test constructor delegation cxx11test::delegate d1; cxx11test::delegate d2(); cxx11test::delegate d3(45); } { // Test override and final cxx11test::overridden o1(55464); } { // Test nullptr char *c = nullptr; } { // Test template brackets test_template<::test_template> v(test_template(12)); } { // Unicode literals char const *utf8 = u8"UTF-8 string \u2500"; char16_t const *utf16 = u"UTF-8 string \u2500"; char32_t const *utf32 = U"UTF-32 string \u2500"; } ' # Test code for whether the C compiler supports C++11 (complete). ac_cxx_conftest_cxx11_program="${ac_cxx_conftest_cxx98_globals} ${ac_cxx_conftest_cxx11_globals} int main (int argc, char **argv) { int ok = 0; ${ac_cxx_conftest_cxx98_main} ${ac_cxx_conftest_cxx11_main} return ok; } " # Test code for whether the C compiler supports C++98 (complete). ac_cxx_conftest_cxx98_program="${ac_cxx_conftest_cxx98_globals} int main (int argc, char **argv) { int ok = 0; ${ac_cxx_conftest_cxx98_main} return ok; } " as_fn_append ac_header_c_list " stdio.h stdio_h HAVE_STDIO_H" as_fn_append ac_header_c_list " stdlib.h stdlib_h HAVE_STDLIB_H" as_fn_append ac_header_c_list " string.h string_h HAVE_STRING_H" as_fn_append ac_header_c_list " inttypes.h inttypes_h HAVE_INTTYPES_H" as_fn_append ac_header_c_list " stdint.h stdint_h HAVE_STDINT_H" as_fn_append ac_header_c_list " strings.h strings_h HAVE_STRINGS_H" as_fn_append ac_header_c_list " sys/stat.h sys_stat_h HAVE_SYS_STAT_H" as_fn_append ac_header_c_list " sys/types.h sys_types_h HAVE_SYS_TYPES_H" as_fn_append ac_header_c_list " unistd.h unistd_h HAVE_UNISTD_H" as_fn_append ac_header_c_list " sys/param.h sys_param_h HAVE_SYS_PARAM_H" as_fn_append ac_func_c_list " getpagesize HAVE_GETPAGESIZE" # Auxiliary files required by this configure script. ac_aux_files="ltmain.sh compile ar-lib missing install-sh config.guess config.sub" # Locations in which to look for auxiliary files. ac_aux_dir_candidates="${srcdir}/build-aux" # Search for a directory containing all of the required auxiliary files, # $ac_aux_files, from the $PATH-style list $ac_aux_dir_candidates. # If we don't find one directory that contains all the files we need, # we report the set of missing files from the *first* directory in # $ac_aux_dir_candidates and give up. ac_missing_aux_files="" ac_first_candidate=: printf "%s\n" "$as_me:${as_lineno-$LINENO}: looking for aux files: $ac_aux_files" >&5 as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in $ac_aux_dir_candidates do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac as_found=: printf "%s\n" "$as_me:${as_lineno-$LINENO}: trying $as_dir" >&5 ac_aux_dir_found=yes ac_install_sh= for ac_aux in $ac_aux_files do # As a special case, if "install-sh" is required, that requirement # can be satisfied by any of "install-sh", "install.sh", or "shtool", # and $ac_install_sh is set appropriately for whichever one is found. if test x"$ac_aux" = x"install-sh" then if test -f "${as_dir}install-sh"; then printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install-sh found" >&5 ac_install_sh="${as_dir}install-sh -c" elif test -f "${as_dir}install.sh"; then printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install.sh found" >&5 ac_install_sh="${as_dir}install.sh -c" elif test -f "${as_dir}shtool"; then printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}shtool found" >&5 ac_install_sh="${as_dir}shtool install -c" else ac_aux_dir_found=no if $ac_first_candidate; then ac_missing_aux_files="${ac_missing_aux_files} install-sh" else break fi fi else if test -f "${as_dir}${ac_aux}"; then printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}${ac_aux} found" >&5 else ac_aux_dir_found=no if $ac_first_candidate; then ac_missing_aux_files="${ac_missing_aux_files} ${ac_aux}" else break fi fi fi done if test "$ac_aux_dir_found" = yes; then ac_aux_dir="$as_dir" break fi ac_first_candidate=false as_found=false done IFS=$as_save_IFS if $as_found then : else $as_nop as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 fi # These three variables are undocumented and unsupported, # and are intended to be withdrawn in a future Autoconf release. # They can cause serious problems if a builder's source tree is in a directory # whose full name contains unusual characters. if test -f "${ac_aux_dir}config.guess"; then ac_config_guess="$SHELL ${ac_aux_dir}config.guess" fi if test -f "${ac_aux_dir}config.sub"; then ac_config_sub="$SHELL ${ac_aux_dir}config.sub" fi if test -f "$ac_aux_dir/configure"; then ac_configure="$SHELL ${ac_aux_dir}configure" fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then # differences in whitespace do not lead to failure. ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 printf "%s\n" "$as_me: former value: \`$ac_old_val'" >&2;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) as_fn_append ac_configure_args " '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## ## -------------------- ## ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # libtool doesn't actually properly manage a space in the workdir case `pwd` in *[\\\"\#\$\&\'\`$am_lf\ \ ]*) as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; esac STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`" STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`" STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`" STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3| sed 's/rc.*//'`" printf "%s\n" "#define STARPU_MAJOR_VERSION $STARPU_MAJOR_VERSION" >>confdefs.h printf "%s\n" "#define STARPU_MINOR_VERSION $STARPU_MINOR_VERSION" >>confdefs.h printf "%s\n" "#define STARPU_RELEASE_VERSION $STARPU_RELEASE_VERSION" >>confdefs.h . "$srcdir/STARPU-VERSION" # Make sure we can run config.sub. $SHELL "${ac_aux_dir}config.sub" sun4 >/dev/null 2>&1 || as_fn_error $? "cannot run $SHELL ${ac_aux_dir}config.sub" "$LINENO" 5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 printf %s "checking build system type... " >&6; } if test ${ac_cv_build+y} then : printf %s "(cached) " >&6 else $as_nop ac_build_alias=$build_alias test "x$ac_build_alias" = x && ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"` test "x$ac_build_alias" = x && as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` || as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 printf "%s\n" "$ac_cv_build" >&6; } case $ac_cv_build in *-*-*) ;; *) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; esac build=$ac_cv_build ac_save_IFS=$IFS; IFS='-' set x $ac_cv_build shift build_cpu=$1 build_vendor=$2 shift; shift # Remember, the first character of IFS is used to create $*, # except with old shells: build_os=$* IFS=$ac_save_IFS case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 printf %s "checking host system type... " >&6; } if test ${ac_cv_host+y} then : printf %s "(cached) " >&6 else $as_nop if test "x$host_alias" = x; then ac_cv_host=$ac_cv_build else ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` || as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" "$LINENO" 5 fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 printf "%s\n" "$ac_cv_host" >&6; } case $ac_cv_host in *-*-*) ;; *) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; esac host=$ac_cv_host ac_save_IFS=$IFS; IFS='-' set x $ac_cv_host shift host_cpu=$1 host_vendor=$2 shift; shift # Remember, the first character of IFS is used to create $*, # except with old shells: host_os=$* IFS=$ac_save_IFS case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking target system type" >&5 printf %s "checking target system type... " >&6; } if test ${ac_cv_target+y} then : printf %s "(cached) " >&6 else $as_nop if test "x$target_alias" = x; then ac_cv_target=$ac_cv_host else ac_cv_target=`$SHELL "${ac_aux_dir}config.sub" $target_alias` || as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $target_alias failed" "$LINENO" 5 fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5 printf "%s\n" "$ac_cv_target" >&6; } case $ac_cv_target in *-*-*) ;; *) as_fn_error $? "invalid value of canonical target" "$LINENO" 5;; esac target=$ac_cv_target ac_save_IFS=$IFS; IFS='-' set x $ac_cv_target shift target_cpu=$1 target_vendor=$2 shift; shift # Remember, the first character of IFS is used to create $*, # except with old shells: target_os=$* IFS=$ac_save_IFS case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac # The aliases save the names the user supplied, while $host etc. # will get canonicalized. test -n "$target_alias" && test "$program_prefix$program_suffix$program_transform_name" = \ NONENONEs,x,x, && program_prefix=${target_alias}- am__api_version='1.16' # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install # AmigaOS /C/install, which installs bootblocks on floppy discs # AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # OS/2's system install, which has a completely different semantic # ./install, which can be erroneously created by make from ./install.sh. # Reject install programs that cannot install multiple files. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 printf %s "checking for a BSD-compatible install... " >&6; } if test -z "$INSTALL"; then if test ${ac_cv_path_install+y} then : printf %s "(cached) " >&6 else $as_nop as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac # Account for fact that we put trailing slashes in our PATH walk. case $as_dir in #(( ./ | /[cC]/* | \ /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ /usr/ucb/* ) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. # Don't use installbsd from OSF since it installs stuff as root # by default. for ac_prog in ginstall scoinst install; do for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext"; then if test $ac_prog = install && grep dspmsg "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : elif test $ac_prog = install && grep pwplus "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # program-specific install script used by HP pwplus--don't use. : else rm -rf conftest.one conftest.two conftest.dir echo one > conftest.one echo two > conftest.two mkdir conftest.dir if "$as_dir$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir/" && test -s conftest.one && test -s conftest.two && test -s conftest.dir/conftest.one && test -s conftest.dir/conftest.two then ac_cv_path_install="$as_dir$ac_prog$ac_exec_ext -c" break 3 fi fi fi done done ;; esac done IFS=$as_save_IFS rm -rf conftest.one conftest.two conftest.dir fi if test ${ac_cv_path_install+y}; then INSTALL=$ac_cv_path_install else # As a last resort, use the slow shell script. Don't cache a # value for INSTALL within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. INSTALL=$ac_install_sh fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 printf "%s\n" "$INSTALL" >&6; } # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 printf %s "checking whether build environment is sane... " >&6; } # Reject unsafe characters in $srcdir or the absolute working directory # name. Accept space and tab only in the latter. am_lf=' ' case `pwd` in *[\\\"\#\$\&\'\`$am_lf]*) as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; esac case $srcdir in *[\\\"\#\$\&\'\`$am_lf\ \ ]*) as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; esac # Do 'set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( am_has_slept=no for am_try in 1 2; do echo "timestamp, slept: $am_has_slept" > conftest.file set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` if test "$*" = "X"; then # -L didn't work. set X `ls -t "$srcdir/configure" conftest.file` fi if test "$*" != "X $srcdir/configure conftest.file" \ && test "$*" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". as_fn_error $? "ls -t appears to fail. Make sure there is not a broken alias in your environment" "$LINENO" 5 fi if test "$2" = conftest.file || test $am_try -eq 2; then break fi # Just in case. sleep 1 am_has_slept=yes done test "$2" = conftest.file ) then # Ok. : else as_fn_error $? "newly created file is older than distributed files! Check your system clock" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } # If we didn't sleep, we still need to ensure time stamps of config.status and # generated files are strictly newer. am_sleep_pid= if grep 'slept: no' conftest.file >/dev/null 2>&1; then ( sleep 1 ) & am_sleep_pid=$! fi rm -f conftest.file test "$program_prefix" != NONE && program_transform_name="s&^&$program_prefix&;$program_transform_name" # Use a double $ so make ignores it. test "$program_suffix" != NONE && program_transform_name="s&\$&$program_suffix&;$program_transform_name" # Double any \ or $. # By default was `s,x,x', remove it if useless. ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' program_transform_name=`printf "%s\n" "$program_transform_name" | sed "$ac_script"` # Expand $ac_aux_dir to an absolute path. am_aux_dir=`cd "$ac_aux_dir" && pwd` if test x"${MISSING+set}" != xset; then MISSING="\${SHELL} '$am_aux_dir/missing'" fi # Use eval to expand $SHELL if eval "$MISSING --is-lightweight"; then am_missing_run="$MISSING " else am_missing_run= { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 printf "%s\n" "$as_me: WARNING: 'missing' script is too old or missing" >&2;} fi if test x"${install_sh+set}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; *) install_sh="\${SHELL} $am_aux_dir/install-sh" esac fi # Installed binaries are usually stripped using 'strip' when the user # run "make install-strip". However 'strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the 'STRIP' environment variable to overrule this program. if test "$cross_compiling" != no; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_STRIP+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 printf "%s\n" "$STRIP" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_STRIP+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_STRIP="strip" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 printf "%s\n" "$ac_ct_STRIP" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_STRIP" = x; then STRIP=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP fi else STRIP="$ac_cv_prog_STRIP" fi fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a race-free mkdir -p" >&5 printf %s "checking for a race-free mkdir -p... " >&6; } if test -z "$MKDIR_P"; then if test ${ac_cv_path_mkdir+y} then : printf %s "(cached) " >&6 else $as_nop as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_prog in mkdir gmkdir; do for ac_exec_ext in '' $ac_executable_extensions; do as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext" || continue case `"$as_dir$ac_prog$ac_exec_ext" --version 2>&1` in #( 'mkdir ('*'coreutils) '* | \ 'BusyBox '* | \ 'mkdir (fileutils) '4.1*) ac_cv_path_mkdir=$as_dir$ac_prog$ac_exec_ext break 3;; esac done done done IFS=$as_save_IFS fi test -d ./--version && rmdir ./--version if test ${ac_cv_path_mkdir+y}; then MKDIR_P="$ac_cv_path_mkdir -p" else # As a last resort, use the slow shell script. Don't cache a # value for MKDIR_P within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. MKDIR_P="$ac_install_sh -d" fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 printf "%s\n" "$MKDIR_P" >&6; } for ac_prog in gawk mawk nawk awk do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_AWK+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$AWK"; then ac_cv_prog_AWK="$AWK" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_AWK="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AWK=$ac_cv_prog_AWK if test -n "$AWK"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 printf "%s\n" "$AWK" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$AWK" && break done { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 printf %s "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } set x ${MAKE-make} ac_make=`printf "%s\n" "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` if eval test \${ac_cv_prog_make_${ac_make}_set+y} then : printf %s "(cached) " >&6 else $as_nop cat >conftest.make <<\_ACEOF SHELL = /bin/sh all: @echo '@@@%%%=$(MAKE)=@@@%%%' _ACEOF # GNU make sometimes prints "make[1]: Entering ...", which would confuse us. case `${MAKE-make} -f conftest.make 2>/dev/null` in *@@@%%%=?*=@@@%%%*) eval ac_cv_prog_make_${ac_make}_set=yes;; *) eval ac_cv_prog_make_${ac_make}_set=no;; esac rm -f conftest.make fi if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } SET_MAKE= else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } SET_MAKE="MAKE=${MAKE-make}" fi rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null # Check whether --enable-silent-rules was given. if test ${enable_silent_rules+y} then : enableval=$enable_silent_rules; fi case $enable_silent_rules in # ((( yes) AM_DEFAULT_VERBOSITY=0;; no) AM_DEFAULT_VERBOSITY=1;; *) AM_DEFAULT_VERBOSITY=1;; esac am_make=${MAKE-make} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 printf %s "checking whether $am_make supports nested variables... " >&6; } if test ${am_cv_make_support_nested_variables+y} then : printf %s "(cached) " >&6 else $as_nop if printf "%s\n" 'TRUE=$(BAR$(V)) BAR0=false BAR1=true V=1 am__doit: @$(TRUE) .PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then am_cv_make_support_nested_variables=yes else am_cv_make_support_nested_variables=no fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } if test $am_cv_make_support_nested_variables = yes; then AM_V='$(V)' AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' else AM_V=$AM_DEFAULT_VERBOSITY AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY fi AM_BACKSLASH='\' if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." am__isrc=' -I$(srcdir)' # test to see if srcdir already configured if test -f $srcdir/config.status; then as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi # Define the identity of the package. PACKAGE='starpu' VERSION='1.4.9' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h printf "%s\n" "#define VERSION \"$VERSION\"" >>confdefs.h # Some tools Automake needs. ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} # For better backward compatibility. To be removed once Automake 1.9.x # dies out for good. For more background, see: # # mkdir_p='$(MKDIR_P)' # We need awk for the "check" target (and possibly the TAP driver). The # system "awk" is bad on some platforms. # Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AMTAR='$${TAR-tar}' # We'll loop over all known methods to create a tar archive until one works. _am_tools='gnutar pax cpio none' { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to create a pax tar archive" >&5 printf %s "checking how to create a pax tar archive... " >&6; } # Go ahead even if we have the value already cached. We do so because we # need to set the values for the 'am__tar' and 'am__untar' variables. _am_tools=${am_cv_prog_tar_pax-$_am_tools} for _am_tool in $_am_tools; do case $_am_tool in gnutar) for _am_tar in tar gnutar gtar; do { echo "$as_me:$LINENO: $_am_tar --version" >&5 ($_am_tar --version) >&5 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && break done am__tar="$_am_tar --format=posix -chf - "'"$$tardir"' am__tar_="$_am_tar --format=posix -chf - "'"$tardir"' am__untar="$_am_tar -xf -" ;; plaintar) # Must skip GNU tar: if it does not support --format= it doesn't create # ustar tarball either. (tar --version) >/dev/null 2>&1 && continue am__tar='tar chf - "$$tardir"' am__tar_='tar chf - "$tardir"' am__untar='tar xf -' ;; pax) am__tar='pax -L -x pax -w "$$tardir"' am__tar_='pax -L -x pax -w "$tardir"' am__untar='pax -r' ;; cpio) am__tar='find "$$tardir" -print | cpio -o -H pax -L' am__tar_='find "$tardir" -print | cpio -o -H pax -L' am__untar='cpio -i -H pax -d' ;; none) am__tar=false am__tar_=false am__untar=false ;; esac # If the value was cached, stop now. We just wanted to have am__tar # and am__untar set. test -n "${am_cv_prog_tar_pax}" && break # tar/untar a dummy directory, and stop if the command works. rm -rf conftest.dir mkdir conftest.dir echo GrepMe > conftest.dir/file { echo "$as_me:$LINENO: tardir=conftest.dir && eval $am__tar_ >conftest.tar" >&5 (tardir=conftest.dir && eval $am__tar_ >conftest.tar) >&5 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } rm -rf conftest.dir if test -s conftest.tar; then { echo "$as_me:$LINENO: $am__untar &5 ($am__untar &5 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { echo "$as_me:$LINENO: cat conftest.dir/file" >&5 (cat conftest.dir/file) >&5 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } grep GrepMe conftest.dir/file >/dev/null 2>&1 && break fi done rm -rf conftest.dir if test ${am_cv_prog_tar_pax+y} then : printf %s "(cached) " >&6 else $as_nop am_cv_prog_tar_pax=$_am_tool fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_tar_pax" >&5 printf "%s\n" "$am_cv_prog_tar_pax" >&6; } # Variables for tags utilities; see am/tags.am if test -z "$CTAGS"; then CTAGS=ctags fi if test -z "$ETAGS"; then ETAGS=etags fi if test -z "$CSCOPE"; then CSCOPE=cscope fi # POSIX will say in a future version that running "rm -f" with no argument # is OK; and we want to be able to make that assumption in our Makefile # recipes. So use an aggressive probe to check that the usage we want is # actually supported "in the wild" to an acceptable degree. # See automake bug#10828. # To make any issue more visible, cause the running configure to be aborted # by default if the 'rm' program in use doesn't match our expectations; the # user can still override this though. if rm -f && rm -fr && rm -rf; then : OK; else cat >&2 <<'END' Oops! Your 'rm' program seems unable to run without file operands specified on the command line, even when the '-f' option is present. This is contrary to the behaviour of most rm programs out there, and not conforming with the upcoming POSIX standard: Please tell bug-automake@gnu.org about your system, including the value of your $PATH and any error possibly output before this message. This can help us improve future automake versions. END if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then echo 'Configuration will proceed anyway, since you have set the' >&2 echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 echo >&2 else cat >&2 <<'END' Aborting the configuration process, to ensure you take notice of the issue. You can download and install GNU coreutils to get an 'rm' implementation that behaves properly: . If you want to complete the configuration process using your problematic 'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM to "yes", and re-run configure. END as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 fi fi # Check whether --enable-silent-rules was given. if test ${enable_silent_rules+y} then : enableval=$enable_silent_rules; fi case $enable_silent_rules in # ((( yes) AM_DEFAULT_VERBOSITY=0;; no) AM_DEFAULT_VERBOSITY=1;; *) AM_DEFAULT_VERBOSITY=0;; esac am_make=${MAKE-make} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 printf %s "checking whether $am_make supports nested variables... " >&6; } if test ${am_cv_make_support_nested_variables+y} then : printf %s "(cached) " >&6 else $as_nop if printf "%s\n" 'TRUE=$(BAR$(V)) BAR0=false BAR1=true V=1 am__doit: @$(TRUE) .PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then am_cv_make_support_nested_variables=yes else am_cv_make_support_nested_variables=no fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } if test $am_cv_make_support_nested_variables = yes; then AM_V='$(V)' AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' else AM_V=$AM_DEFAULT_VERBOSITY AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY fi AM_BACKSLASH='\' DEPDIR="${am__leading_dot}deps" ac_config_commands="$ac_config_commands depfiles" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 printf %s "checking whether ${MAKE-make} supports the include directive... " >&6; } cat > confinc.mk << 'END' am__doit: @echo this is the am__doit target >confinc.out .PHONY: am__doit END am__include="#" am__quote= # BSD make does it like this. echo '.include "confinc.mk" # ignored' > confmf.BSD # Other make implementations (GNU, Solaris 10, AIX) do it like this. echo 'include confinc.mk # ignored' > confmf.GNU _am_result=no for s in GNU BSD; do { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } case $?:`cat confinc.out 2>/dev/null` in #( '0:this is the am__doit target') : case $s in #( BSD) : am__include='.include' am__quote='"' ;; #( *) : am__include='include' am__quote='' ;; esac ;; #( *) : ;; esac if test "$am__include" != "#"; then _am_result="yes ($s style)" break fi done rm -f confinc.* confmf.* { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 printf "%s\n" "${_am_result}" >&6; } # Check whether --enable-dependency-tracking was given. if test ${enable_dependency_tracking+y} then : enableval=$enable_dependency_tracking; fi if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' am__nodep='_no' fi if test "x$enable_dependency_tracking" != xno; then AMDEP_TRUE= AMDEP_FALSE='#' else AMDEP_TRUE='#' AMDEP_FALSE= fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 printf "%s\n" "$ac_ct_CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 printf "%s\n" "$ac_ct_CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. set dummy ${ac_tool_prefix}clang; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}clang" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "clang", so it can be a program name with args. set dummy clang; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="clang" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 printf "%s\n" "$ac_ct_CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi fi test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion -version; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 printf %s "checking whether the C compiler works... " >&6; } ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { { ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else $as_nop ac_file='' fi if test -z "$ac_file" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 printf %s "checking for C compiler default output file name... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 printf "%s\n" "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 printf %s "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else $as_nop { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 printf "%s\n" "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; ; return 0; } _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 printf %s "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 printf "%s\n" "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 printf %s "checking for suffix of object files... " >&6; } if test ${ac_cv_objext+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else $as_nop printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 printf "%s\n" "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 printf %s "checking whether the compiler supports GNU C... " >&6; } if test ${ac_cv_c_compiler_gnu+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_compiler_gnu=yes else $as_nop ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } ac_compiler_gnu=$ac_cv_c_compiler_gnu if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+y} ac_save_CFLAGS=$CFLAGS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 printf %s "checking whether $CC accepts -g... " >&6; } if test ${ac_cv_prog_cc_g+y} then : printf %s "(cached) " >&6 else $as_nop ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_g=yes else $as_nop CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : else $as_nop ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 printf "%s\n" "$ac_cv_prog_cc_g" >&6; } if test $ac_test_CFLAGS; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi ac_prog_cc_stdc=no if test x$ac_prog_cc_stdc = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 printf %s "checking for $CC option to enable C11 features... " >&6; } if test ${ac_cv_prog_cc_c11+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_prog_cc_c11=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_c_conftest_c11_program _ACEOF for ac_arg in '' -std=gnu11 do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_c11=$ac_arg fi rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c11" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi if test "x$ac_cv_prog_cc_c11" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } else $as_nop if test "x$ac_cv_prog_cc_c11" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } CC="$CC $ac_cv_prog_cc_c11" fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 ac_prog_cc_stdc=c11 fi fi if test x$ac_prog_cc_stdc = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 printf %s "checking for $CC option to enable C99 features... " >&6; } if test ${ac_cv_prog_cc_c99+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_prog_cc_c99=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_c_conftest_c99_program _ACEOF for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_c99=$ac_arg fi rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c99" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi if test "x$ac_cv_prog_cc_c99" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } else $as_nop if test "x$ac_cv_prog_cc_c99" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } CC="$CC $ac_cv_prog_cc_c99" fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 ac_prog_cc_stdc=c99 fi fi if test x$ac_prog_cc_stdc = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 printf %s "checking for $CC option to enable C89 features... " >&6; } if test ${ac_cv_prog_cc_c89+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_c_conftest_c89_program _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_c89=$ac_arg fi rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi if test "x$ac_cv_prog_cc_c89" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } else $as_nop if test "x$ac_cv_prog_cc_c89" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } CC="$CC $ac_cv_prog_cc_c89" fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 ac_prog_cc_stdc=c89 fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 printf %s "checking whether $CC understands -c and -o together... " >&6; } if test ${am_cv_prog_cc_c_o+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF # Make sure it works both with $CC and with simple cc. # Following AC_PROG_CC_C_O, we do the test twice because some # compilers refuse to overwrite an existing .o file with -o, # though they will create one. am_cv_prog_cc_c_o=yes for am_i in 1 2; do if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } \ && test -f conftest2.$ac_objext; then : OK else am_cv_prog_cc_c_o=no break fi done rm -f core conftest* unset am_i fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 printf "%s\n" "$am_cv_prog_cc_c_o" >&6; } if test "$am_cv_prog_cc_c_o" != yes; then # Losing compiler, so override with the script. # FIXME: It is wrong to rewrite CC. # But if we don't then we get into trouble of one sort or another. # A longer-term fix would be to have automake use am__CC in this case, # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" CC="$am_aux_dir/compile $CC" fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu depcc="$CC" am_compiler_list= { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 printf %s "checking dependency style of $depcc... " >&6; } if test ${am_cv_CC_dependencies_compiler_type+y} then : printf %s "(cached) " >&6 else $as_nop if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CC_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CC_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CC_dependencies_compiler_type=none fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; } CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then am__fastdepCC_TRUE= am__fastdepCC_FALSE='#' else am__fastdepCC_TRUE='#' am__fastdepCC_FALSE= fi if test -n "$ac_tool_prefix"; then for ac_prog in ar lib "link -lib" do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_AR+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$AR"; then ac_cv_prog_AR="$AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_AR="$ac_tool_prefix$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AR=$ac_cv_prog_AR if test -n "$AR"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 printf "%s\n" "$AR" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$AR" && break done fi if test -z "$AR"; then ac_ct_AR=$AR for ac_prog in ar lib "link -lib" do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_AR+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_AR"; then ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_AR="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_AR=$ac_cv_prog_ac_ct_AR if test -n "$ac_ct_AR"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 printf "%s\n" "$ac_ct_AR" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$ac_ct_AR" && break done if test "x$ac_ct_AR" = x; then AR="false" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac AR=$ac_ct_AR fi fi : ${AR=ar} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the archiver ($AR) interface" >&5 printf %s "checking the archiver ($AR) interface... " >&6; } if test ${am_cv_ar_interface+y} then : printf %s "(cached) " >&6 else $as_nop ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu am_cv_ar_interface=ar cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int some_variable = 0; _ACEOF if ac_fn_c_try_compile "$LINENO" then : am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&5' { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 (eval $am_ar_try) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if test "$ac_status" -eq 0; then am_cv_ar_interface=ar else am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&5' { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 (eval $am_ar_try) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if test "$ac_status" -eq 0; then am_cv_ar_interface=lib else am_cv_ar_interface=unknown fi fi rm -f conftest.lib libconftest.a fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_ar_interface" >&5 printf "%s\n" "$am_cv_ar_interface" >&6; } case $am_cv_ar_interface in ar) ;; lib) # Microsoft lib, so override with the ar-lib wrapper script. # FIXME: It is wrong to rewrite AR. # But if we don't then we get into trouble of one sort or another. # A longer-term fix would be to have automake use am__AR in this case, # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something # similar. AR="$am_aux_dir/ar-lib $AR" ;; unknown) as_fn_error $? "could not determine $AR interface" "$LINENO" 5 ;; esac ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 printf "%s\n" "$ac_ct_CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 printf "%s\n" "$ac_ct_CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. set dummy ${ac_tool_prefix}clang; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}clang" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "clang", so it can be a program name with args. set dummy clang; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="clang" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 printf "%s\n" "$ac_ct_CC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi fi test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion -version; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 printf %s "checking whether the compiler supports GNU C... " >&6; } if test ${ac_cv_c_compiler_gnu+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_compiler_gnu=yes else $as_nop ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } ac_compiler_gnu=$ac_cv_c_compiler_gnu if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+y} ac_save_CFLAGS=$CFLAGS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 printf %s "checking whether $CC accepts -g... " >&6; } if test ${ac_cv_prog_cc_g+y} then : printf %s "(cached) " >&6 else $as_nop ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_g=yes else $as_nop CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : else $as_nop ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 printf "%s\n" "$ac_cv_prog_cc_g" >&6; } if test $ac_test_CFLAGS; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi ac_prog_cc_stdc=no if test x$ac_prog_cc_stdc = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 printf %s "checking for $CC option to enable C11 features... " >&6; } if test ${ac_cv_prog_cc_c11+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_prog_cc_c11=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_c_conftest_c11_program _ACEOF for ac_arg in '' -std=gnu11 do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_c11=$ac_arg fi rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c11" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi if test "x$ac_cv_prog_cc_c11" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } else $as_nop if test "x$ac_cv_prog_cc_c11" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } CC="$CC $ac_cv_prog_cc_c11" fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 ac_prog_cc_stdc=c11 fi fi if test x$ac_prog_cc_stdc = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 printf %s "checking for $CC option to enable C99 features... " >&6; } if test ${ac_cv_prog_cc_c99+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_prog_cc_c99=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_c_conftest_c99_program _ACEOF for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_c99=$ac_arg fi rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c99" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi if test "x$ac_cv_prog_cc_c99" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } else $as_nop if test "x$ac_cv_prog_cc_c99" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } CC="$CC $ac_cv_prog_cc_c99" fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 ac_prog_cc_stdc=c99 fi fi if test x$ac_prog_cc_stdc = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 printf %s "checking for $CC option to enable C89 features... " >&6; } if test ${ac_cv_prog_cc_c89+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_c_conftest_c89_program _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO" then : ac_cv_prog_cc_c89=$ac_arg fi rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi if test "x$ac_cv_prog_cc_c89" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } else $as_nop if test "x$ac_cv_prog_cc_c89" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } CC="$CC $ac_cv_prog_cc_c89" fi ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 ac_prog_cc_stdc=c89 fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 printf %s "checking whether $CC understands -c and -o together... " >&6; } if test ${am_cv_prog_cc_c_o+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF # Make sure it works both with $CC and with simple cc. # Following AC_PROG_CC_C_O, we do the test twice because some # compilers refuse to overwrite an existing .o file with -o, # though they will create one. am_cv_prog_cc_c_o=yes for am_i in 1 2; do if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } \ && test -f conftest2.$ac_objext; then : OK else am_cv_prog_cc_c_o=no break fi done rm -f core conftest* unset am_i fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 printf "%s\n" "$am_cv_prog_cc_c_o" >&6; } if test "$am_cv_prog_cc_c_o" != yes; then # Losing compiler, so override with the script. # FIXME: It is wrong to rewrite CC. # But if we don't then we get into trouble of one sort or another. # A longer-term fix would be to have automake use am__CC in this case, # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" CC="$am_aux_dir/compile $CC" fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu depcc="$CC" am_compiler_list= { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 printf %s "checking dependency style of $depcc... " >&6; } if test ${am_cv_CC_dependencies_compiler_type+y} then : printf %s "(cached) " >&6 else $as_nop if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CC_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CC_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CC_dependencies_compiler_type=none fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; } CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then am__fastdepCC_TRUE= am__fastdepCC_FALSE='#' else am__fastdepCC_TRUE='#' am__fastdepCC_FALSE= fi ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu if test -z "$CXX"; then if test -n "$CCC"; then CXX=$CCC else if test -n "$ac_tool_prefix"; then for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++ do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CXX+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$CXX"; then ac_cv_prog_CXX="$CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CXX=$ac_cv_prog_CXX if test -n "$CXX"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 printf "%s\n" "$CXX" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$CXX" && break done fi if test -z "$CXX"; then ac_ct_CXX=$CXX for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CXX+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_CXX"; then ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CXX="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CXX=$ac_cv_prog_ac_ct_CXX if test -n "$ac_ct_CXX"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 printf "%s\n" "$ac_ct_CXX" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$ac_ct_CXX" && break done if test "x$ac_ct_CXX" = x; then CXX="g++" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CXX=$ac_ct_CXX fi fi fi fi # Provide some information about the compiler. printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C++" >&5 printf %s "checking whether the compiler supports GNU C++... " >&6; } if test ${ac_cv_cxx_compiler_gnu+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : ac_compiler_gnu=yes else $as_nop ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_cxx_compiler_gnu=$ac_compiler_gnu fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 printf "%s\n" "$ac_cv_cxx_compiler_gnu" >&6; } ac_compiler_gnu=$ac_cv_cxx_compiler_gnu if test $ac_compiler_gnu = yes; then GXX=yes else GXX= fi ac_test_CXXFLAGS=${CXXFLAGS+y} ac_save_CXXFLAGS=$CXXFLAGS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 printf %s "checking whether $CXX accepts -g... " >&6; } if test ${ac_cv_prog_cxx_g+y} then : printf %s "(cached) " >&6 else $as_nop ac_save_cxx_werror_flag=$ac_cxx_werror_flag ac_cxx_werror_flag=yes ac_cv_prog_cxx_g=no CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : ac_cv_prog_cxx_g=yes else $as_nop CXXFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : else $as_nop ac_cxx_werror_flag=$ac_save_cxx_werror_flag CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : ac_cv_prog_cxx_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cxx_werror_flag=$ac_save_cxx_werror_flag fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 printf "%s\n" "$ac_cv_prog_cxx_g" >&6; } if test $ac_test_CXXFLAGS; then CXXFLAGS=$ac_save_CXXFLAGS elif test $ac_cv_prog_cxx_g = yes; then if test "$GXX" = yes; then CXXFLAGS="-g -O2" else CXXFLAGS="-g" fi else if test "$GXX" = yes; then CXXFLAGS="-O2" else CXXFLAGS= fi fi ac_prog_cxx_stdcxx=no if test x$ac_prog_cxx_stdcxx = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5 printf %s "checking for $CXX option to enable C++11 features... " >&6; } if test ${ac_cv_prog_cxx_cxx11+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_prog_cxx_cxx11=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_cxx_conftest_cxx11_program _ACEOF for ac_arg in '' -std=gnu++11 -std=gnu++0x -std=c++11 -std=c++0x -qlanglvl=extended0x -AA do CXX="$ac_save_CXX $ac_arg" if ac_fn_cxx_try_compile "$LINENO" then : ac_cv_prog_cxx_cxx11=$ac_arg fi rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cxx_cxx11" != "xno" && break done rm -f conftest.$ac_ext CXX=$ac_save_CXX fi if test "x$ac_cv_prog_cxx_cxx11" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } else $as_nop if test "x$ac_cv_prog_cxx_cxx11" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx11" >&5 printf "%s\n" "$ac_cv_prog_cxx_cxx11" >&6; } CXX="$CXX $ac_cv_prog_cxx_cxx11" fi ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx11 ac_prog_cxx_stdcxx=cxx11 fi fi if test x$ac_prog_cxx_stdcxx = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5 printf %s "checking for $CXX option to enable C++98 features... " >&6; } if test ${ac_cv_prog_cxx_cxx98+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_prog_cxx_cxx98=no ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_cxx_conftest_cxx98_program _ACEOF for ac_arg in '' -std=gnu++98 -std=c++98 -qlanglvl=extended -AA do CXX="$ac_save_CXX $ac_arg" if ac_fn_cxx_try_compile "$LINENO" then : ac_cv_prog_cxx_cxx98=$ac_arg fi rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cxx_cxx98" != "xno" && break done rm -f conftest.$ac_ext CXX=$ac_save_CXX fi if test "x$ac_cv_prog_cxx_cxx98" = xno then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 printf "%s\n" "unsupported" >&6; } else $as_nop if test "x$ac_cv_prog_cxx_cxx98" = x then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 printf "%s\n" "none needed" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx98" >&5 printf "%s\n" "$ac_cv_prog_cxx_cxx98" >&6; } CXX="$CXX $ac_cv_prog_cxx_cxx98" fi ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx98 ac_prog_cxx_stdcxx=cxx98 fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu depcc="$CXX" am_compiler_list= { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 printf %s "checking dependency style of $depcc... " >&6; } if test ${am_cv_CXX_dependencies_compiler_type+y} then : printf %s "(cached) " >&6 else $as_nop if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named 'D' -- because '-MD' means "put the output # in D". rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CXX_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with # Solaris 10 /bin/sh. echo '/* dummy */' > sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with '-c' and '-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle '-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs. am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # After this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested. if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok '-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CXX_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CXX_dependencies_compiler_type=none fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 printf "%s\n" "$am_cv_CXX_dependencies_compiler_type" >&6; } CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then am__fastdepCXX_TRUE= am__fastdepCXX_FALSE='#' else am__fastdepCXX_TRUE='#' am__fastdepCXX_FALSE= fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 printf %s "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if test ${ac_cv_prog_CPP+y} then : printf %s "(cached) " >&6 else $as_nop # Double quotes because $CC needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" cpp /lib/cpp do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO" then : else $as_nop # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO" then : # Broken: success on invalid input. continue else $as_nop # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok then : break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 printf "%s\n" "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO" then : else $as_nop # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO" then : # Broken: success on invalid input. continue else $as_nop # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok then : else $as_nop { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 printf %s "checking for a sed that does not truncate output... " >&6; } if test ${ac_cv_path_SED+y} then : printf %s "(cached) " >&6 else $as_nop ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ for ac_i in 1 2 3 4 5 6 7; do ac_script="$ac_script$as_nl$ac_script" done echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed { ac_script=; unset ac_script;} if test -z "$SED"; then ac_path_SED_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_prog in sed gsed do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_SED="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_SED" || continue # Check for GNU ac_path_SED and select it if it is found. # Check for GNU $ac_path_SED case `"$ac_path_SED" --version 2>&1` in *GNU*) ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; *) ac_count=0 printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" printf "%s\n" '' >> "conftest.nl" "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_SED_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_SED="$ac_path_SED" ac_path_SED_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_SED_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_SED"; then as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 fi else ac_cv_path_SED=$SED fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 printf "%s\n" "$ac_cv_path_SED" >&6; } SED="$ac_cv_path_SED" rm -f conftest.sed { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 printf %s "checking whether ln -s works... " >&6; } LN_S=$as_ln_s if test "$LN_S" = "ln -s"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 printf "%s\n" "no, using $LN_S" >&6; } fi ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu if test -n "$ac_tool_prefix"; then for ac_prog in g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_F77+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$F77"; then ac_cv_prog_F77="$F77" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_F77="$ac_tool_prefix$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi F77=$ac_cv_prog_F77 if test -n "$F77"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $F77" >&5 printf "%s\n" "$F77" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$F77" && break done fi if test -z "$F77"; then ac_ct_F77=$F77 for ac_prog in g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_F77+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_F77"; then ac_cv_prog_ac_ct_F77="$ac_ct_F77" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_F77="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_F77=$ac_cv_prog_ac_ct_F77 if test -n "$ac_ct_F77"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_F77" >&5 printf "%s\n" "$ac_ct_F77" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$ac_ct_F77" && break done if test "x$ac_ct_F77" = x; then F77="" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac F77=$ac_ct_F77 fi fi # Provide some information about the compiler. printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Fortran 77 compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done rm -f a.out # If we don't use `.F' as extension, the preprocessor is not run on the # input file. (Note that this only needs to work for GNU compilers.) ac_save_ext=$ac_ext ac_ext=F { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU Fortran 77" >&5 printf %s "checking whether the compiler supports GNU Fortran 77... " >&6; } if test ${ac_cv_f77_compiler_gnu+y} then : printf %s "(cached) " >&6 else $as_nop cat > conftest.$ac_ext <<_ACEOF program main #ifndef __GNUC__ choke me #endif end _ACEOF if ac_fn_f77_try_compile "$LINENO" then : ac_compiler_gnu=yes else $as_nop ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_f77_compiler_gnu=$ac_compiler_gnu fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_f77_compiler_gnu" >&5 printf "%s\n" "$ac_cv_f77_compiler_gnu" >&6; } ac_compiler_gnu=$ac_cv_f77_compiler_gnu ac_ext=$ac_save_ext ac_test_FFLAGS=${FFLAGS+y} ac_save_FFLAGS=$FFLAGS FFLAGS= { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $F77 accepts -g" >&5 printf %s "checking whether $F77 accepts -g... " >&6; } if test ${ac_cv_prog_f77_g+y} then : printf %s "(cached) " >&6 else $as_nop FFLAGS=-g cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_compile "$LINENO" then : ac_cv_prog_f77_g=yes else $as_nop ac_cv_prog_f77_g=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_f77_g" >&5 printf "%s\n" "$ac_cv_prog_f77_g" >&6; } if test $ac_test_FFLAGS; then FFLAGS=$ac_save_FFLAGS elif test $ac_cv_prog_f77_g = yes; then if test "x$ac_cv_f77_compiler_gnu" = xyes; then FFLAGS="-g -O2" else FFLAGS="-g" fi else if test "x$ac_cv_f77_compiler_gnu" = xyes; then FFLAGS="-O2" else FFLAGS= fi fi if test $ac_compiler_gnu = yes; then G77=yes else G77= fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu if test -n "$ac_tool_prefix"; then for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_FC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$FC"; then ac_cv_prog_FC="$FC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_FC="$ac_tool_prefix$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi FC=$ac_cv_prog_FC if test -n "$FC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 printf "%s\n" "$FC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$FC" && break done fi if test -z "$FC"; then ac_ct_FC=$FC for ac_prog in gfortran g95 xlf95 f95 fort ifort ifc efc pgfortran pgf95 lf95 ftn nagfor xlf90 f90 pgf90 pghpf epcf90 g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_FC+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_FC"; then ac_cv_prog_ac_ct_FC="$ac_ct_FC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_FC="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_FC=$ac_cv_prog_ac_ct_FC if test -n "$ac_ct_FC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_FC" >&5 printf "%s\n" "$ac_ct_FC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$ac_ct_FC" && break done if test "x$ac_ct_FC" = x; then FC="" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac FC=$ac_ct_FC fi fi # Provide some information about the compiler. printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Fortran compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done rm -f a.out # If we don't use `.F' as extension, the preprocessor is not run on the # input file. (Note that this only needs to work for GNU compilers.) ac_save_ext=$ac_ext ac_ext=F { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU Fortran" >&5 printf %s "checking whether the compiler supports GNU Fortran... " >&6; } if test ${ac_cv_fc_compiler_gnu+y} then : printf %s "(cached) " >&6 else $as_nop cat > conftest.$ac_ext <<_ACEOF program main #ifndef __GNUC__ choke me #endif end _ACEOF if ac_fn_fc_try_compile "$LINENO" then : ac_compiler_gnu=yes else $as_nop ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_fc_compiler_gnu=$ac_compiler_gnu fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_fc_compiler_gnu" >&5 printf "%s\n" "$ac_cv_fc_compiler_gnu" >&6; } ac_compiler_gnu=$ac_cv_fc_compiler_gnu ac_ext=$ac_save_ext ac_test_FCFLAGS=${FCFLAGS+y} ac_save_FCFLAGS=$FCFLAGS FCFLAGS= { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $FC accepts -g" >&5 printf %s "checking whether $FC accepts -g... " >&6; } if test ${ac_cv_prog_fc_g+y} then : printf %s "(cached) " >&6 else $as_nop FCFLAGS=-g cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_compile "$LINENO" then : ac_cv_prog_fc_g=yes else $as_nop ac_cv_prog_fc_g=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_g" >&5 printf "%s\n" "$ac_cv_prog_fc_g" >&6; } if test $ac_test_FCFLAGS; then FCFLAGS=$ac_save_FCFLAGS elif test $ac_cv_prog_fc_g = yes; then if test "x$ac_cv_fc_compiler_gnu" = xyes; then FCFLAGS="-g -O2" else FCFLAGS="-g" fi else if test "x$ac_cv_fc_compiler_gnu" = xyes; then FCFLAGS="-O2" else FCFLAGS= fi fi if test $ac_compiler_gnu = yes; then GFC=yes else GFC= fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 printf %s "checking for grep that handles long lines and -e... " >&6; } if test ${ac_cv_path_GREP+y} then : printf %s "(cached) " >&6 else $as_nop if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_prog in grep ggrep do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_GREP="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_GREP" || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" printf "%s\n" 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_GREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_GREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_GREP"; then as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_GREP=$GREP fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 printf "%s\n" "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 printf %s "checking for egrep... " >&6; } if test ${ac_cv_path_EGREP+y} then : printf %s "(cached) " >&6 else $as_nop if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_prog in egrep do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_EGREP" || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" printf "%s\n" 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_EGREP"; then as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_EGREP=$EGREP fi fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 printf "%s\n" "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" for ac_prog in gstat stat do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_PROG_STAT+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$PROG_STAT"; then ac_cv_prog_PROG_STAT="$PROG_STAT" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_PROG_STAT="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi PROG_STAT=$ac_cv_prog_PROG_STAT if test -n "$PROG_STAT"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PROG_STAT" >&5 printf "%s\n" "$PROG_STAT" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$PROG_STAT" && break done for ac_prog in gdate date do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_PROG_DATE+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$PROG_DATE"; then ac_cv_prog_PROG_DATE="$PROG_DATE" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_PROG_DATE="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi PROG_DATE=$ac_cv_prog_PROG_DATE if test -n "$PROG_DATE"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PROG_DATE" >&5 printf "%s\n" "$PROG_DATE" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$PROG_DATE" && break done for ac_prog in find do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_PROG_FIND+y} then : printf %s "(cached) " >&6 else $as_nop case $PROG_FIND in [\\/]* | ?:[\\/]*) ac_cv_path_PROG_FIND="$PROG_FIND" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_PROG_FIND="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi PROG_FIND=$ac_cv_path_PROG_FIND if test -n "$PROG_FIND"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PROG_FIND" >&5 printf "%s\n" "$PROG_FIND" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$PROG_FIND" && break done for ac_prog in clang do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_PROG_CLANG+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$PROG_CLANG"; then ac_cv_prog_PROG_CLANG="$PROG_CLANG" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_PROG_CLANG="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi PROG_CLANG=$ac_cv_prog_PROG_CLANG if test -n "$PROG_CLANG"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PROG_CLANG" >&5 printf "%s\n" "$PROG_CLANG" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$PROG_CLANG" && break done { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for parallel that supports semaphores with exit code" >&5 printf %s "checking for parallel that supports semaphores with exit code... " >&6; } if test ${ac_cv_path_PARALLEL+y} then : printf %s "(cached) " >&6 else $as_nop if test -z "$PARALLEL"; then ac_path_PARALLEL_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_prog in parallel do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_PARALLEL="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_PARALLEL" || continue parallel --semaphore --id starpu --fg --fg-exit -j 2 exit 42 > /dev/null 2>&1 [ $? = 42 ] && ac_cv_path_PARALLEL=$ac_path_PARALLEL ac_path_PARALLEL_found=: $ac_path_PARALLEL_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_PARALLEL"; then ac_cv_path_PARALLEL=no fi else ac_cv_path_PARALLEL=$PARALLEL fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_PARALLEL" >&5 printf "%s\n" "$ac_cv_path_PARALLEL" >&6; } PARALLEL=$ac_cv_path_PARALLEL if test "x$PARALLEL" != "xno"; then HAVE_PARALLEL_TRUE= HAVE_PARALLEL_FALSE='#' else HAVE_PARALLEL_TRUE='#' HAVE_PARALLEL_FALSE= fi if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_PKG_CONFIG+y} then : printf %s "(cached) " >&6 else $as_nop case $PKG_CONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi PKG_CONFIG=$ac_cv_path_PKG_CONFIG if test -n "$PKG_CONFIG"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 printf "%s\n" "$PKG_CONFIG" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_path_PKG_CONFIG"; then ac_pt_PKG_CONFIG=$PKG_CONFIG # Extract the first word of "pkg-config", so it can be a program name with args. set dummy pkg-config; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_ac_pt_PKG_CONFIG+y} then : printf %s "(cached) " >&6 else $as_nop case $ac_pt_PKG_CONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_ac_pt_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG if test -n "$ac_pt_PKG_CONFIG"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 printf "%s\n" "$ac_pt_PKG_CONFIG" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_pt_PKG_CONFIG" = x; then PKG_CONFIG="" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac PKG_CONFIG=$ac_pt_PKG_CONFIG fi else PKG_CONFIG="$ac_cv_path_PKG_CONFIG" fi fi if test -n "$PKG_CONFIG"; then _pkg_min_version=0.9.0 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5 printf %s "checking pkg-config is at least version $_pkg_min_version... " >&6; } if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } PKG_CONFIG="" fi fi # Check whether --enable-simgrid was given. if test ${enable_simgrid+y} then : enableval=$enable_simgrid; enable_simgrid=$enableval else $as_nop enable_simgrid=no fi if test x$enable_perf_debug = xyes; then enable_shared=no fi default_enable_mpi_check=no if test x$enable_simgrid = xyes ; then default_enable_mpi=no else default_enable_mpi=maybe fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wno-unused" >&5 printf %s "checking whether C compiler supports -Wno-unused... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Wno-unused" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : APP_CFLAGS="$APP_CFLAGS -Wno-unused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wno-unused" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wno-unused" >&5 printf %s "checking whether MPI C compiler supports -Wno-unused... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : APP_CFLAGS="$APP_CFLAGS -Wno-unused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wno-unused" >&5 printf %s "checking whether CXX compiler supports -Wno-unused... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-Wno-unused" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : APP_CXXFLAGS="$APP_CXXFLAGS -Wno-unused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wno-unused" >&5 printf %s "checking whether Fortran 77 compiler supports -Wno-unused... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-Wno-unused" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : APP_FFLAGS="$APP_FFLAGS -Wno-unused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wno-unused" >&5 printf %s "checking whether Fortran compiler supports -Wno-unused... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-Wno-unused" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : APP_FCFLAGS="$APP_FCFLAGS -Wno-unused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wno-unused" >&5 printf %s "checking whether MPI Fortran compiler supports -Wno-unused... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : APP_FCFLAGS="$APP_FCFLAGS -Wno-unused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wno-unused-dummy-argument" >&5 printf %s "checking whether Fortran 77 compiler supports -Wno-unused-dummy-argument... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-Wno-unused-dummy-argument" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : APP_FFLAGS="$APP_FFLAGS -Wno-unused-dummy-argument" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wno-unused-dummy-argument" >&5 printf %s "checking whether Fortran compiler supports -Wno-unused-dummy-argument... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-Wno-unused-dummy-argument" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : APP_FCFLAGS="$APP_FCFLAGS -Wno-unused-dummy-argument" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wno-unused-dummy-argument" >&5 printf %s "checking whether MPI Fortran compiler supports -Wno-unused-dummy-argument... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : APP_FCFLAGS="$APP_FCFLAGS -Wno-unused-dummy-argument" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_header= ac_cache= for ac_item in $ac_header_c_list do if test $ac_cache; then ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then printf "%s\n" "#define $ac_item 1" >> confdefs.h fi ac_header= ac_cache= elif test $ac_header; then ac_cache=$ac_item else ac_header=$ac_item fi done if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes then : printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h fi ac_func= for ac_item in $ac_func_c_list do if test $ac_func; then ac_fn_c_check_func "$LINENO" $ac_func ac_cv_func_$ac_func if eval test \"x\$ac_cv_func_$ac_func\" = xyes; then echo "#define $ac_item 1" >> confdefs.h fi ac_func= else ac_func=$ac_item fi done { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for working mmap" >&5 printf %s "checking for working mmap... " >&6; } if test ${ac_cv_func_mmap_fixed_mapped+y} then : printf %s "(cached) " >&6 else $as_nop if test "$cross_compiling" = yes then : case "$host_os" in # (( # Guess yes on platforms where we know the result. linux*) ac_cv_func_mmap_fixed_mapped=yes ;; # If we don't know, assume the worst. *) ac_cv_func_mmap_fixed_mapped=no ;; esac else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default /* malloc might have been renamed as rpl_malloc. */ #undef malloc /* Thanks to Mike Haertel and Jim Avera for this test. Here is a matrix of mmap possibilities: mmap private not fixed mmap private fixed at somewhere currently unmapped mmap private fixed at somewhere already mapped mmap shared not fixed mmap shared fixed at somewhere currently unmapped mmap shared fixed at somewhere already mapped For private mappings, we should verify that changes cannot be read() back from the file, nor mmap's back from the file at a different address. (There have been systems where private was not correctly implemented like the infamous i386 svr4.0, and systems where the VM page cache was not coherent with the file system buffer cache like early versions of FreeBSD and possibly contemporary NetBSD.) For shared mappings, we should conversely verify that changes get propagated back to all the places they're supposed to be. Grep wants private fixed already mapped. The main things grep needs to know about mmap are: * does it exist and is it safe to write into the mmap'd area * how to use it (BSD variants) */ #include #include /* This mess was copied from the GNU getpagesize.h. */ #ifndef HAVE_GETPAGESIZE # ifdef _SC_PAGESIZE # define getpagesize() sysconf(_SC_PAGESIZE) # else /* no _SC_PAGESIZE */ # ifdef HAVE_SYS_PARAM_H # include # ifdef EXEC_PAGESIZE # define getpagesize() EXEC_PAGESIZE # else /* no EXEC_PAGESIZE */ # ifdef NBPG # define getpagesize() NBPG * CLSIZE # ifndef CLSIZE # define CLSIZE 1 # endif /* no CLSIZE */ # else /* no NBPG */ # ifdef NBPC # define getpagesize() NBPC # else /* no NBPC */ # ifdef PAGESIZE # define getpagesize() PAGESIZE # endif /* PAGESIZE */ # endif /* no NBPC */ # endif /* no NBPG */ # endif /* no EXEC_PAGESIZE */ # else /* no HAVE_SYS_PARAM_H */ # define getpagesize() 8192 /* punt totally */ # endif /* no HAVE_SYS_PARAM_H */ # endif /* no _SC_PAGESIZE */ #endif /* no HAVE_GETPAGESIZE */ int main (void) { char *data, *data2, *data3; const char *cdata2; int i, pagesize; int fd, fd2; pagesize = getpagesize (); /* First, make a file with some known garbage in it. */ data = (char *) malloc (pagesize); if (!data) return 1; for (i = 0; i < pagesize; ++i) *(data + i) = rand (); umask (0); fd = creat ("conftest.mmap", 0600); if (fd < 0) return 2; if (write (fd, data, pagesize) != pagesize) return 3; close (fd); /* Next, check that the tail of a page is zero-filled. File must have non-zero length, otherwise we risk SIGBUS for entire page. */ fd2 = open ("conftest.txt", O_RDWR | O_CREAT | O_TRUNC, 0600); if (fd2 < 0) return 4; cdata2 = ""; if (write (fd2, cdata2, 1) != 1) return 5; data2 = (char *) mmap (0, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd2, 0L); if (data2 == MAP_FAILED) return 6; for (i = 0; i < pagesize; ++i) if (*(data2 + i)) return 7; close (fd2); if (munmap (data2, pagesize)) return 8; /* Next, try to mmap the file at a fixed address which already has something else allocated at it. If we can, also make sure that we see the same garbage. */ fd = open ("conftest.mmap", O_RDWR); if (fd < 0) return 9; if (data2 != mmap (data2, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, fd, 0L)) return 10; for (i = 0; i < pagesize; ++i) if (*(data + i) != *(data2 + i)) return 11; /* Finally, make sure that changes to the mapped area do not percolate back to the file as seen by read(). (This is a bug on some variants of i386 svr4.0.) */ for (i = 0; i < pagesize; ++i) *(data2 + i) = *(data2 + i) + 1; data3 = (char *) malloc (pagesize); if (!data3) return 12; if (read (fd, data3, pagesize) != pagesize) return 13; for (i = 0; i < pagesize; ++i) if (*(data + i) != *(data3 + i)) return 14; close (fd); free (data); free (data3); return 0; } _ACEOF if ac_fn_c_try_run "$LINENO" then : ac_cv_func_mmap_fixed_mapped=yes else $as_nop ac_cv_func_mmap_fixed_mapped=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_mmap_fixed_mapped" >&5 printf "%s\n" "$ac_cv_func_mmap_fixed_mapped" >&6; } if test $ac_cv_func_mmap_fixed_mapped = yes; then printf "%s\n" "#define HAVE_MMAP 1" >>confdefs.h fi rm -f conftest.mmap conftest.txt ############################################################################### # # # Forwarded options # # # # Move here options whose values are needed early # # # ############################################################################### # # Check whether --enable-starpupy was given. if test ${enable_starpupy+y} then : enableval=$enable_starpupy; enable_starpupy=$enableval else $as_nop enable_starpupy=maybe fi ############################################################################### # # # Profiling tool support # # # ############################################################################### # Check whether --enable-prof-tool was given. if test ${enable_prof_tool+y} then : enableval=$enable_prof_tool; enable_prof_tool=$enableval else $as_nop enable_prof_tool=yes fi if test x$enable_prof_tool = xyes; then printf "%s\n" "#define STARPU_PROF_TOOL 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for profiling tool support" >&5 printf %s "checking for profiling tool support... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_prof_tool" >&5 printf "%s\n" "$enable_prof_tool" >&6; } ############################################################################### # # # Hierarchical dags support # # # ############################################################################### # Check whether --enable-bubble was given. if test ${enable_bubble+y} then : enableval=$enable_bubble; enable_bubble=$enableval else $as_nop enable_bubble=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for hierarchical dags - a.k.a bubble - support" >&5 printf %s "checking for hierarchical dags - a.k.a bubble - support... " >&6; } if test x$enable_bubble = xyes; then printf "%s\n" "#define STARPU_BUBBLE 1" >>confdefs.h fi if test "x$enable_bubble" = "xyes"; then STARPU_BUBBLE_TRUE= STARPU_BUBBLE_FALSE='#' else STARPU_BUBBLE_TRUE='#' STARPU_BUBBLE_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_bubble" >&5 printf "%s\n" "$enable_bubble" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether bubble debug messages should be displayed" >&5 printf %s "checking whether bubble debug messages should be displayed... " >&6; } # Check whether --enable-bubble-verbose was given. if test ${enable_bubble_verbose+y} then : enableval=$enable_bubble_verbose; enable_bubble_verbose=$enableval else $as_nop enable_bubble_verbose=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_bubble_verbose" >&5 printf "%s\n" "$enable_bubble_verbose" >&6; } if test x$enable_bubble_verbose = xextra; then printf "%s\n" "#define STARPU_BUBBLE_VERBOSE 1" >>confdefs.h fi ############################################################################### # # # Drivers # # # ############################################################################### # Check whether --enable-opencl-simulator was given. if test ${enable_opencl_simulator+y} then : enableval=$enable_opencl_simulator; enable_opencl_simulator=$enableval else $as_nop enable_opencl_simulator=no fi if test x$enable_opencl_simulator = xyes; then enable_simgrid=yes printf "%s\n" "#define STARPU_OPENCL_SIMULATOR 1" >>confdefs.h fi # Check whether --with-simgrid-dir was given. if test ${with_simgrid_dir+y} then : withval=$with_simgrid_dir; simgrid_dir="$withval" # in case this was not explicit yet enable_simgrid=yes else $as_nop simgrid_dir=no fi # Check whether --with-simgrid-include-dir was given. if test ${with_simgrid_include_dir+y} then : withval=$with_simgrid_include_dir; simgrid_include_dir="$withval" # in case this was not explicit yet enable_simgrid=yes else $as_nop simgrid_include_dir=no fi # Check whether --with-simgrid-lib-dir was given. if test ${with_simgrid_lib_dir+y} then : withval=$with_simgrid_lib_dir; simgrid_lib_dir="$withval" # in case this was not explicit yet enable_simgrid=yes else $as_nop simgrid_lib_dir=no fi if test x$enable_simgrid = xyes ; then pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SIMGRID" >&5 printf %s "checking for SIMGRID... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$SIMGRID_CFLAGS"; then pkg_cv_SIMGRID_CFLAGS="$SIMGRID_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"simgrid\""; } >&5 ($PKG_CONFIG --exists --print-errors "simgrid") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_SIMGRID_CFLAGS=`$PKG_CONFIG --cflags "simgrid" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$SIMGRID_LIBS"; then pkg_cv_SIMGRID_LIBS="$SIMGRID_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"simgrid\""; } >&5 ($PKG_CONFIG --exists --print-errors "simgrid") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_SIMGRID_LIBS=`$PKG_CONFIG --libs "simgrid" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then SIMGRID_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "simgrid"` else SIMGRID_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "simgrid"` fi # Put the nasty error message in config.log where it belongs echo "$SIMGRID_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } : elif test $pkg_failed = untried; then : else SIMGRID_CFLAGS=$pkg_cv_SIMGRID_CFLAGS SIMGRID_LIBS=$pkg_cv_SIMGRID_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : fi if test "$simgrid_include_dir" != "no" ; then SIMGRID_CFLAGS="-I$simgrid_include_dir $SIMGRID_CFLAGS" fi if test "$simgrid_lib_dir" != "no" ; then SIMGRID_LIBS="-L$simgrid_lib_dir $SIMGRID_LIBS" fi if test "$simgrid_dir" != "no" ; then SIMGRID_CFLAGS="-I$simgrid_dir/include $SIMGRID_CFLAGS" SIMGRID_LIBS="-L$simgrid_dir/lib $SIMGRID_LIBS" else simgrid_dir="$(pkg-config --variable=prefix simgrid)" fi if test -n "$SIMGRID_CFLAGS" ; then CFLAGS="$SIMGRID_CFLAGS $CFLAGS" CXXFLAGS="$SIMGRID_CFLAGS $CXXFLAGS" NVCCFLAGS="$SIMGRID_CFLAGS $NVCCFLAGS" HIPCCFLAGS="$SIMGRID_CFLAGS $HIPCCFLAGS" fi SAVED_LIBS="${LIBS}" LIBS="$SIMGRID_LIBS $LIBS" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lsimgrid" >&5 printf %s "checking for main in -lsimgrid... " >&6; } if test ${ac_cv_lib_simgrid_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lsimgrid $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_simgrid_main=yes else $as_nop ac_cv_lib_simgrid_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_simgrid_main" >&5 printf "%s\n" "$ac_cv_lib_simgrid_main" >&6; } if test "x$ac_cv_lib_simgrid_main" = xyes then : printf "%s\n" "#define HAVE_LIBSIMGRID 1" >>confdefs.h LIBS="-lsimgrid $LIBS" else $as_nop as_fn_error $? "Simgrid support needs simgrid installed" "$LINENO" 5 fi ac_cv_lib_simgrid=ac_cv_lib_simgrid_main for ac_header in simgrid/msg.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/msg.h" "ac_cv_header_simgrid_msg_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_msg_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_MSG_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_MSG_H 1" >>confdefs.h fi done for ac_header in msg/msg.h do : ac_fn_c_check_header_compile "$LINENO" "msg/msg.h" "ac_cv_header_msg_msg_h" "$ac_includes_default" if test "x$ac_cv_header_msg_msg_h" = xyes then : printf "%s\n" "#define HAVE_MSG_MSG_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_MSG_MSG_H 1" >>confdefs.h fi done for ac_header in simgrid/host.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/host.h" "ac_cv_header_simgrid_host_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_host_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_HOST_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_HOST_H 1" >>confdefs.h fi done for ac_header in simgrid/link.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/link.h" "ac_cv_header_simgrid_link_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_link_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_LINK_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_LINK_H 1" >>confdefs.h fi done for ac_header in xbt/base.h do : ac_fn_c_check_header_compile "$LINENO" "xbt/base.h" "ac_cv_header_xbt_base_h" "$ac_includes_default" if test "x$ac_cv_header_xbt_base_h" = xyes then : printf "%s\n" "#define HAVE_XBT_BASE_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_XBT_BASE_H 1" >>confdefs.h fi done for ac_header in simgrid/version.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/version.h" "ac_cv_header_simgrid_version_h" " #ifdef STARPU_HAVE_XBT_BASE_H #include #endif " if test "x$ac_cv_header_simgrid_version_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_VERSION_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_VERSION_H 1" >>confdefs.h fi done for ac_header in simgrid/simdag.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/simdag.h" "ac_cv_header_simgrid_simdag_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_simdag_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_SIMDAG_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_SIMDAG_H 1" >>confdefs.h fi done for ac_header in xbt/synchro.h do : ac_fn_c_check_header_compile "$LINENO" "xbt/synchro.h" "ac_cv_header_xbt_synchro_h" "$ac_includes_default" if test "x$ac_cv_header_xbt_synchro_h" = xyes then : printf "%s\n" "#define HAVE_XBT_SYNCHRO_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_XBT_SYNCHRO_H 1" >>confdefs.h fi done for ac_header in xbt/config.h do : ac_fn_c_check_header_compile "$LINENO" "xbt/config.h" "ac_cv_header_xbt_config_h" "$ac_includes_default" if test "x$ac_cv_header_xbt_config_h" = xyes then : printf "%s\n" "#define HAVE_XBT_CONFIG_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_XBT_CONFIG_H 1" >>confdefs.h fi done for ac_header in simgrid/actor.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/actor.h" "ac_cv_header_simgrid_actor_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_actor_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_ACTOR_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_ACTOR_H 1" >>confdefs.h fi done for ac_header in simgrid/engine.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/engine.h" "ac_cv_header_simgrid_engine_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_engine_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_ENGINE_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_ENGINE_H 1" >>confdefs.h fi done for ac_header in simgrid/semaphore.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/semaphore.h" "ac_cv_header_simgrid_semaphore_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_semaphore_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_SEMAPHORE_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_SEMAPHORE_H 1" >>confdefs.h fi done for ac_header in simgrid/mutex.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/mutex.h" "ac_cv_header_simgrid_mutex_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_mutex_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_MUTEX_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_MUTEX_H 1" >>confdefs.h fi done for ac_header in simgrid/cond.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/cond.h" "ac_cv_header_simgrid_cond_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_cond_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_COND_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_COND_H 1" >>confdefs.h fi done for ac_header in simgrid/barrier.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/barrier.h" "ac_cv_header_simgrid_barrier_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_barrier_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_BARRIER_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_BARRIER_H 1" >>confdefs.h fi done ac_fn_c_check_header_compile "$LINENO" "simgrid/engine.h" "ac_cv_header_simgrid_engine_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_engine_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_ENGINE_H 1" >>confdefs.h fi for ac_header in simgrid/zone.h do : ac_fn_c_check_header_compile "$LINENO" "simgrid/zone.h" "ac_cv_header_simgrid_zone_h" "$ac_includes_default" if test "x$ac_cv_header_simgrid_zone_h" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_ZONE_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SIMGRID_ZONE_H 1" >>confdefs.h fi done ac_fn_c_check_type "$LINENO" "smx_actor_t" "ac_cv_type_smx_actor_t" "#include " if test "x$ac_cv_type_smx_actor_t" = xyes then : printf "%s\n" "#define HAVE_SMX_ACTOR_T 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_SMX_ACTOR_T 1" >>confdefs.h fi # Latest functions ac_fn_c_check_func "$LINENO" "MSG_process_attach" "ac_cv_func_MSG_process_attach" if test "x$ac_cv_func_MSG_process_attach" = xyes then : printf "%s\n" "#define HAVE_MSG_PROCESS_ATTACH 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_attach" "ac_cv_func_sg_actor_attach" if test "x$ac_cv_func_sg_actor_attach" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_ATTACH 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_attach_pthread" "ac_cv_func_sg_actor_attach_pthread" if test "x$ac_cv_func_sg_actor_attach_pthread" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_ATTACH_PTHREAD 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_init" "ac_cv_func_sg_actor_init" if test "x$ac_cv_func_sg_actor_init" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_INIT 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_set_stacksize" "ac_cv_func_sg_actor_set_stacksize" if test "x$ac_cv_func_sg_actor_set_stacksize" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_SET_STACKSIZE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_on_exit" "ac_cv_func_sg_actor_on_exit" if test "x$ac_cv_func_sg_actor_on_exit" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_ON_EXIT 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "MSG_zone_get_hosts" "ac_cv_func_MSG_zone_get_hosts" if test "x$ac_cv_func_MSG_zone_get_hosts" = xyes then : printf "%s\n" "#define HAVE_MSG_ZONE_GET_HOSTS 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_zone_get_hosts" "ac_cv_func_sg_zone_get_hosts" if test "x$ac_cv_func_sg_zone_get_hosts" = xyes then : printf "%s\n" "#define HAVE_SG_ZONE_GET_HOSTS 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_zone_get_all_hosts" "ac_cv_func_sg_zone_get_all_hosts" if test "x$ac_cv_func_sg_zone_get_all_hosts" = xyes then : printf "%s\n" "#define HAVE_SG_ZONE_GET_ALL_HOSTS 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "MSG_process_self_name" "ac_cv_func_MSG_process_self_name" if test "x$ac_cv_func_MSG_process_self_name" = xyes then : printf "%s\n" "#define HAVE_MSG_PROCESS_SELF_NAME 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "MSG_process_userdata_init" "ac_cv_func_MSG_process_userdata_init" if test "x$ac_cv_func_MSG_process_userdata_init" = xyes then : printf "%s\n" "#define HAVE_MSG_PROCESS_USERDATA_INIT 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_get_data" "ac_cv_func_sg_actor_get_data" if test "x$ac_cv_func_sg_actor_get_data" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_GET_DATA 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_set_data" "ac_cv_func_sg_actor_set_data" if test "x$ac_cv_func_sg_actor_set_data" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_SET_DATA 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_data" "ac_cv_func_sg_actor_data" if test "x$ac_cv_func_sg_actor_data" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_DATA 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "xbt_mutex_try_acquire" "ac_cv_func_xbt_mutex_try_acquire" if test "x$ac_cv_func_xbt_mutex_try_acquire" = xyes then : printf "%s\n" "#define HAVE_XBT_MUTEX_TRY_ACQUIRE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "smpi_process_set_user_data" "ac_cv_func_smpi_process_set_user_data" if test "x$ac_cv_func_smpi_process_set_user_data" = xyes then : printf "%s\n" "#define HAVE_SMPI_PROCESS_SET_USER_DATA 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "SMPI_thread_create" "ac_cv_func_SMPI_thread_create" if test "x$ac_cv_func_SMPI_thread_create" = xyes then : printf "%s\n" "#define HAVE_SMPI_THREAD_CREATE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_zone_get_by_name" "ac_cv_func_sg_zone_get_by_name" if test "x$ac_cv_func_sg_zone_get_by_name" = xyes then : printf "%s\n" "#define HAVE_SG_ZONE_GET_BY_NAME 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_link_get_name" "ac_cv_func_sg_link_get_name" if test "x$ac_cv_func_sg_link_get_name" = xyes then : printf "%s\n" "#define HAVE_SG_LINK_GET_NAME 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_link_name" "ac_cv_func_sg_link_name" if test "x$ac_cv_func_sg_link_name" = xyes then : printf "%s\n" "#define HAVE_SG_LINK_NAME 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_link_set_bandwidth" "ac_cv_func_sg_link_set_bandwidth" if test "x$ac_cv_func_sg_link_set_bandwidth" = xyes then : printf "%s\n" "#define HAVE_SG_LINK_SET_BANDWIDTH 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_link_bandwidth_set" "ac_cv_func_sg_link_bandwidth_set" if test "x$ac_cv_func_sg_link_bandwidth_set" = xyes then : printf "%s\n" "#define HAVE_SG_LINK_BANDWIDTH_SET 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_get_route" "ac_cv_func_sg_host_get_route" if test "x$ac_cv_func_sg_host_get_route" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_GET_ROUTE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_get_route_links" "ac_cv_func_sg_host_get_route_links" if test "x$ac_cv_func_sg_host_get_route_links" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_GET_ROUTE_LINKS 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_route" "ac_cv_func_sg_host_route" if test "x$ac_cv_func_sg_host_route" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_ROUTE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_self" "ac_cv_func_sg_host_self" if test "x$ac_cv_func_sg_host_self" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_SELF 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_list" "ac_cv_func_sg_host_list" if test "x$ac_cv_func_sg_host_list" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_LIST 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_get_speed" "ac_cv_func_sg_host_get_speed" if test "x$ac_cv_func_sg_host_get_speed" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_GET_SPEED 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_speed" "ac_cv_func_sg_host_speed" if test "x$ac_cv_func_sg_host_speed" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_SPEED 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "simcall_process_create" "ac_cv_func_simcall_process_create" if test "x$ac_cv_func_simcall_process_create" = xyes then : printf "%s\n" "#define HAVE_SIMCALL_PROCESS_CREATE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_config_continue_after_help" "ac_cv_func_sg_config_continue_after_help" if test "x$ac_cv_func_sg_config_continue_after_help" = xyes then : printf "%s\n" "#define HAVE_SG_CONFIG_CONTINUE_AFTER_HELP 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "simgrid_set_maestro" "ac_cv_func_simgrid_set_maestro" if test "x$ac_cv_func_simgrid_set_maestro" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_SET_MAESTRO 1" >>confdefs.h fi for ac_func in simgrid_init do : ac_fn_c_check_func "$LINENO" "simgrid_init" "ac_cv_func_simgrid_init" if test "x$ac_cv_func_simgrid_init" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_INIT 1" >>confdefs.h printf "%s\n" "#define STARPU_SIMGRID_HAVE_SIMGRID_INIT 1" >>confdefs.h fi done for ac_func in xbt_barrier_init do : ac_fn_c_check_func "$LINENO" "xbt_barrier_init" "ac_cv_func_xbt_barrier_init" if test "x$ac_cv_func_xbt_barrier_init" = xyes then : printf "%s\n" "#define HAVE_XBT_BARRIER_INIT 1" >>confdefs.h printf "%s\n" "#define STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT 1" >>confdefs.h fi done ac_fn_c_check_func "$LINENO" "sg_actor_sleep_for" "ac_cv_func_sg_actor_sleep_for" if test "x$ac_cv_func_sg_actor_sleep_for" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_SLEEP_FOR 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_self" "ac_cv_func_sg_actor_self" if test "x$ac_cv_func_sg_actor_self" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_SELF 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_ref" "ac_cv_func_sg_actor_ref" if test "x$ac_cv_func_sg_actor_ref" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_REF 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_get_properties" "ac_cv_func_sg_host_get_properties" if test "x$ac_cv_func_sg_host_get_properties" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_GET_PROPERTIES 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_get_property_names" "ac_cv_func_sg_host_get_property_names" if test "x$ac_cv_func_sg_host_get_property_names" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_GET_PROPERTY_NAMES 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_send_to" "ac_cv_func_sg_host_send_to" if test "x$ac_cv_func_sg_host_send_to" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_SEND_TO 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_host_sendto" "ac_cv_func_sg_host_sendto" if test "x$ac_cv_func_sg_host_sendto" = xyes then : printf "%s\n" "#define HAVE_SG_HOST_SENDTO 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_cfg_set_int" "ac_cv_func_sg_cfg_set_int" if test "x$ac_cv_func_sg_cfg_set_int" = xyes then : printf "%s\n" "#define HAVE_SG_CFG_SET_INT 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_self_execute" "ac_cv_func_sg_actor_self_execute" if test "x$ac_cv_func_sg_actor_self_execute" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_SELF_EXECUTE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sg_actor_execute" "ac_cv_func_sg_actor_execute" if test "x$ac_cv_func_sg_actor_execute" = xyes then : printf "%s\n" "#define HAVE_SG_ACTOR_EXECUTE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "simgrid_get_clock" "ac_cv_func_simgrid_get_clock" if test "x$ac_cv_func_simgrid_get_clock" = xyes then : printf "%s\n" "#define HAVE_SIMGRID_GET_CLOCK 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC options needed to detect all undeclared functions" >&5 printf %s "checking for $CC options needed to detect all undeclared functions... " >&6; } if test ${ac_cv_c_undeclared_builtin_options+y} then : printf %s "(cached) " >&6 else $as_nop ac_save_CFLAGS=$CFLAGS ac_cv_c_undeclared_builtin_options='cannot detect' for ac_arg in '' -fno-builtin; do CFLAGS="$ac_save_CFLAGS $ac_arg" # This test program should *not* compile successfully. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { (void) strchr; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : else $as_nop # This test program should compile successfully. # No library function is consistently available on # freestanding implementations, so test against a dummy # declaration. Include always-available headers on the # off chance that they somehow elicit warnings. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include #include extern void ac_decl (int, char *); int main (void) { (void) ac_decl (0, (char *) 0); (void) ac_decl; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : if test x"$ac_arg" = x then : ac_cv_c_undeclared_builtin_options='none needed' else $as_nop ac_cv_c_undeclared_builtin_options=$ac_arg fi break fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext done CFLAGS=$ac_save_CFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_undeclared_builtin_options" >&5 printf "%s\n" "$ac_cv_c_undeclared_builtin_options" >&6; } case $ac_cv_c_undeclared_builtin_options in #( 'cannot detect') : { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot make $CC report undeclared builtins See \`config.log' for more details" "$LINENO" 5; } ;; #( 'none needed') : ac_c_undeclared_builtin_options='' ;; #( *) : ac_c_undeclared_builtin_options=$ac_cv_c_undeclared_builtin_options ;; esac ac_fn_check_decl "$LINENO" "smpi_process_set_user_data" "ac_cv_have_decl_smpi_process_set_user_data" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_smpi_process_set_user_data" = xyes then : ac_have_decl=1 else $as_nop ac_have_decl=0 fi printf "%s\n" "#define HAVE_DECL_SMPI_PROCESS_SET_USER_DATA $ac_have_decl" >>confdefs.h # Oldies for compatibility with older simgrid ac_fn_c_check_func "$LINENO" "MSG_get_as_by_name" "ac_cv_func_MSG_get_as_by_name" if test "x$ac_cv_func_MSG_get_as_by_name" = xyes then : printf "%s\n" "#define HAVE_MSG_GET_AS_BY_NAME 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "MSG_zone_get_by_name" "ac_cv_func_MSG_zone_get_by_name" if test "x$ac_cv_func_MSG_zone_get_by_name" = xyes then : printf "%s\n" "#define HAVE_MSG_ZONE_GET_BY_NAME 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "MSG_environment_get_routing_root" "ac_cv_func_MSG_environment_get_routing_root" if test "x$ac_cv_func_MSG_environment_get_routing_root" = xyes then : printf "%s\n" "#define HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "MSG_host_get_speed" "ac_cv_func_MSG_host_get_speed" if test "x$ac_cv_func_MSG_host_get_speed" = xyes then : printf "%s\n" "#define HAVE_MSG_HOST_GET_SPEED 1" >>confdefs.h fi LIBS="${SAVED_LIBS}" printf "%s\n" "#define STARPU_SIMGRID 1" >>confdefs.h # We won't bind or detect anything with_hwloc=no # disable mpi checks by default, they require static linking, we don't # want that by default default_enable_mpi_check=no # disable MPI support by default default_enable_mpi=no ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu if test x$enable_shared = xno ; then # When linking statically, libtool does not realize we need libstdc++ for simgrid_cpp.cpp SIMGRID_LIBS="$SIMGRID_LIBS -lstdc++" LIBS="$LIBS -lstdc++" fi SIMGRID_LDFLAGS="$SIMGRID_LIBS -lsimgrid" # Simgrid 3.12 & 3.13 need -std=c++11 to be able to build anything in C++... case \ $CXXFLAGS\ in *\ -std=*\ *) ;; *) # Make sure our C++ compiler can compile simgrid headers SIMGRID_INCLUDES=" #ifdef STARPU_HAVE_SIMGRID_MSG_H #include #include #elif defined(STARPU_HAVE_MSG_MSG_H) #include #endif #ifdef STARPU_HAVE_XBT_BASE_H #include #endif #ifdef STARPU_HAVE_SIMGRID_VERSION_H #include #endif #ifdef STARPU_HAVE_SIMGRID_ZONE_H #include #endif #ifdef STARPU_HAVE_SIMGRID_HOST_H #include #endif #include " cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $SIMGRID_INCLUDES int main (void) { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : else $as_nop CXXFLAGS="-std=c++11 $CXXFLAGS" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; esac cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $SIMGRID_INCLUDES #include int main (void) { simgrid::s4u::Engine::on_time_advance_cb([](double delta) { }); ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : printf "%s\n" "#define STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB 1" >>confdefs.h fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Check whether --enable-simgrid-mc was given. if test ${enable_simgrid_mc+y} then : enableval=$enable_simgrid_mc; enable_simgrid_mc=$enableval else $as_nop enable_simgrid_mc=no fi if test x$enable_simgrid_mc = xyes ; then printf "%s\n" "#define STARPU_SIMGRID_MC 1" >>confdefs.h # Extract the first word of "simgrid-mc", so it can be a program name with args. set dummy simgrid-mc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_SIMGRID_MC+y} then : printf %s "(cached) " >&6 else $as_nop case $SIMGRID_MC in [\\/]* | ?:[\\/]*) ac_cv_path_SIMGRID_MC="$SIMGRID_MC" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$simgrid_dir/bin:$PATH" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_SIMGRID_MC="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_SIMGRID_MC" && ac_cv_path_SIMGRID_MC="no" ;; esac fi SIMGRID_MC=$ac_cv_path_SIMGRID_MC if test -n "$SIMGRID_MC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $SIMGRID_MC" >&5 printf "%s\n" "$SIMGRID_MC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi LDFLAGS="$LDFLAGS -Wl,-znorelro -Wl,-znoseparate-code" # libsimgrid needs to be linked from binaries themselves for MC to work STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $SIMGRID_LDFLAGS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef STARPU_HAVE_XBT_BASE_H #include #endif #ifdef STARPU_HAVE_SIMGRID_VERSION_H #include #endif #if SIMGRID_VERSION < 33100 #error no mutex support with MC #endif int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : else $as_nop as_fn_error $? "We need simgrid >= 3.31 for MC" "$LINENO" 5 fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi if test x$enable_simgrid_mc = xyes; then STARPU_SIMGRID_MC_TRUE= STARPU_SIMGRID_MC_FALSE='#' else STARPU_SIMGRID_MC_TRUE='#' STARPU_SIMGRID_MC_FALSE= fi if test x$enable_simgrid = xyes; then STARPU_SIMGRID_TRUE= STARPU_SIMGRID_FALSE='#' else STARPU_SIMGRID_TRUE='#' STARPU_SIMGRID_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether SimGrid is enabled" >&5 printf %s "checking whether SimGrid is enabled... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_simgrid" >&5 printf "%s\n" "$enable_simgrid" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether blocking drivers should be enabled" >&5 printf %s "checking whether blocking drivers should be enabled... " >&6; } # Check whether --enable-blocking-drivers was given. if test ${enable_blocking_drivers+y} then : enableval=$enable_blocking_drivers; enable_blocking=$enableval else $as_nop enable_blocking=$enable_simgrid fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_blocking" >&5 printf "%s\n" "$enable_blocking" >&6; } if test x$enable_blocking = xno ; then if test x$enable_simgrid = xyes ; then as_fn_error $? "--disable-blocking-drivers cannot be used in simgrid mode" "$LINENO" 5 fi printf "%s\n" "#define STARPU_NON_BLOCKING_DRIVERS 1" >>confdefs.h fi if test x$enable_blocking = xyes ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether worker callbacks should be enabled" >&5 printf %s "checking whether worker callbacks should be enabled... " >&6; } # Check whether --enable-worker-callbacks was given. if test ${enable_worker_callbacks+y} then : enableval=$enable_worker_callbacks; enable_worker_cb=$enableval else $as_nop enable_worker_cb=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_worker_cb" >&5 printf "%s\n" "$enable_worker_cb" >&6; } else # worker sleep/wake-up callbacks only make sense if blocking drivers are enabled enable_worker_cb=no fi if test x$enable_worker_cb = xyes ; then printf "%s\n" "#define STARPU_WORKER_CALLBACKS 1" >>confdefs.h fi ############################################################################### # # # LIBTOOLS # # # ############################################################################### #c++11 detection ax_cxx_compile_cxx11_required=false ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu ac_success=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features by default" >&5 printf %s "checking whether $CXX supports C++11 features by default... " >&6; } if test ${ax_cv_cxx_compile_cxx11+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ // If the compiler admits that it is not ready for C++11, why torture it? // Hopefully, this will speed up the test. #ifndef __cplusplus #error "This is not a C++ compiler" #elif __cplusplus < 201103L #error "This is not a C++11 compiler" #else namespace cxx11 { namespace test_static_assert { template struct check { static_assert(sizeof(int) <= sizeof(T), "not big enough"); }; } namespace test_final_override { struct Base { virtual void f() {} }; struct Derived : public Base { virtual void f() override {} }; } namespace test_double_right_angle_brackets { template < typename T > struct check {}; typedef check single_type; typedef check> double_type; typedef check>> triple_type; typedef check>>> quadruple_type; } namespace test_decltype { int f() { int a = 1; decltype(a) b = 2; return a + b; } } namespace test_type_deduction { template < typename T1, typename T2 > struct is_same { static const bool value = false; }; template < typename T > struct is_same { static const bool value = true; }; template < typename T1, typename T2 > auto add(T1 a1, T2 a2) -> decltype(a1 + a2) { return a1 + a2; } int test(const int c, volatile int v) { static_assert(is_same::value == true, ""); static_assert(is_same::value == false, ""); static_assert(is_same::value == false, ""); auto ac = c; auto av = v; auto sumi = ac + av + 'x'; auto sumf = ac + av + 1.0; static_assert(is_same::value == true, ""); static_assert(is_same::value == true, ""); static_assert(is_same::value == true, ""); static_assert(is_same::value == false, ""); static_assert(is_same::value == true, ""); return (sumf > 0.0) ? sumi : add(c, v); } } namespace test_noexcept { int f() { return 0; } int g() noexcept { return 0; } static_assert(noexcept(f()) == false, ""); static_assert(noexcept(g()) == true, ""); } namespace test_constexpr { template < typename CharT > unsigned long constexpr strlen_c_r(const CharT *const s, const unsigned long acc) noexcept { return *s ? strlen_c_r(s + 1, acc + 1) : acc; } template < typename CharT > unsigned long constexpr strlen_c(const CharT *const s) noexcept { return strlen_c_r(s, 0UL); } static_assert(strlen_c("") == 0UL, ""); static_assert(strlen_c("1") == 1UL, ""); static_assert(strlen_c("example") == 7UL, ""); static_assert(strlen_c("another\0example") == 7UL, ""); } namespace test_rvalue_references { template < int N > struct answer { static constexpr int value = N; }; answer<1> f(int&) { return answer<1>(); } answer<2> f(const int&) { return answer<2>(); } answer<3> f(int&&) { return answer<3>(); } void test() { int i = 0; const int c = 0; static_assert(decltype(f(i))::value == 1, ""); static_assert(decltype(f(c))::value == 2, ""); static_assert(decltype(f(0))::value == 3, ""); } } namespace test_uniform_initialization { struct test { static const int zero {}; static const int one {1}; }; static_assert(test::zero == 0, ""); static_assert(test::one == 1, ""); } namespace test_lambdas { void test1() { auto lambda1 = [](){}; auto lambda2 = lambda1; lambda1(); lambda2(); } int test2() { auto a = [](int i, int j){ return i + j; }(1, 2); auto b = []() -> int { return '0'; }(); auto c = [=](){ return a + b; }(); auto d = [&](){ return c; }(); auto e = [a, &b](int x) mutable { const auto identity = [](int y){ return y; }; for (auto i = 0; i < a; ++i) a += b--; return x + identity(a + b); }(0); return a + b + c + d + e; } int test3() { const auto nullary = [](){ return 0; }; const auto unary = [](int x){ return x; }; using nullary_t = decltype(nullary); using unary_t = decltype(unary); const auto higher1st = [](nullary_t f){ return f(); }; const auto higher2nd = [unary](nullary_t f1){ return [unary, f1](unary_t f2){ return f2(unary(f1())); }; }; return higher1st(nullary) + higher2nd(nullary)(unary); } } namespace test_variadic_templates { template struct sum; template struct sum { static constexpr auto value = N0 + sum::value; }; template <> struct sum<> { static constexpr auto value = 0; }; static_assert(sum<>::value == 0, ""); static_assert(sum<1>::value == 1, ""); static_assert(sum<23>::value == 23, ""); static_assert(sum<1, 2>::value == 3, ""); static_assert(sum<5, 5, 11>::value == 21, ""); static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); } // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function // because of this. namespace test_template_alias_sfinae { struct foo {}; template using member = typename T::member_type; template void func(...) {} template void func(member*) {} void test(); void test() { func(0); } } } // namespace cxx11 #endif // __cplusplus >= 201103L _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : ax_cv_cxx_compile_cxx11=yes else $as_nop ax_cv_cxx_compile_cxx11=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_cxx_compile_cxx11" >&5 printf "%s\n" "$ax_cv_cxx_compile_cxx11" >&6; } if test x$ax_cv_cxx_compile_cxx11 = xyes; then ac_success=yes fi if test x$ac_success = xno; then for switch in -std=c++11 -std=c++0x +std=c++11 "-h std=c++11"; do cachevar=`printf "%s\n" "ax_cv_cxx_compile_cxx11_$switch" | $as_tr_sh` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++11 features with $switch" >&5 printf %s "checking whether $CXX supports C++11 features with $switch... " >&6; } if eval test \${$cachevar+y} then : printf %s "(cached) " >&6 else $as_nop ac_save_CXX="$CXX" CXX="$CXX $switch" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ // If the compiler admits that it is not ready for C++11, why torture it? // Hopefully, this will speed up the test. #ifndef __cplusplus #error "This is not a C++ compiler" #elif __cplusplus < 201103L #error "This is not a C++11 compiler" #else namespace cxx11 { namespace test_static_assert { template struct check { static_assert(sizeof(int) <= sizeof(T), "not big enough"); }; } namespace test_final_override { struct Base { virtual void f() {} }; struct Derived : public Base { virtual void f() override {} }; } namespace test_double_right_angle_brackets { template < typename T > struct check {}; typedef check single_type; typedef check> double_type; typedef check>> triple_type; typedef check>>> quadruple_type; } namespace test_decltype { int f() { int a = 1; decltype(a) b = 2; return a + b; } } namespace test_type_deduction { template < typename T1, typename T2 > struct is_same { static const bool value = false; }; template < typename T > struct is_same { static const bool value = true; }; template < typename T1, typename T2 > auto add(T1 a1, T2 a2) -> decltype(a1 + a2) { return a1 + a2; } int test(const int c, volatile int v) { static_assert(is_same::value == true, ""); static_assert(is_same::value == false, ""); static_assert(is_same::value == false, ""); auto ac = c; auto av = v; auto sumi = ac + av + 'x'; auto sumf = ac + av + 1.0; static_assert(is_same::value == true, ""); static_assert(is_same::value == true, ""); static_assert(is_same::value == true, ""); static_assert(is_same::value == false, ""); static_assert(is_same::value == true, ""); return (sumf > 0.0) ? sumi : add(c, v); } } namespace test_noexcept { int f() { return 0; } int g() noexcept { return 0; } static_assert(noexcept(f()) == false, ""); static_assert(noexcept(g()) == true, ""); } namespace test_constexpr { template < typename CharT > unsigned long constexpr strlen_c_r(const CharT *const s, const unsigned long acc) noexcept { return *s ? strlen_c_r(s + 1, acc + 1) : acc; } template < typename CharT > unsigned long constexpr strlen_c(const CharT *const s) noexcept { return strlen_c_r(s, 0UL); } static_assert(strlen_c("") == 0UL, ""); static_assert(strlen_c("1") == 1UL, ""); static_assert(strlen_c("example") == 7UL, ""); static_assert(strlen_c("another\0example") == 7UL, ""); } namespace test_rvalue_references { template < int N > struct answer { static constexpr int value = N; }; answer<1> f(int&) { return answer<1>(); } answer<2> f(const int&) { return answer<2>(); } answer<3> f(int&&) { return answer<3>(); } void test() { int i = 0; const int c = 0; static_assert(decltype(f(i))::value == 1, ""); static_assert(decltype(f(c))::value == 2, ""); static_assert(decltype(f(0))::value == 3, ""); } } namespace test_uniform_initialization { struct test { static const int zero {}; static const int one {1}; }; static_assert(test::zero == 0, ""); static_assert(test::one == 1, ""); } namespace test_lambdas { void test1() { auto lambda1 = [](){}; auto lambda2 = lambda1; lambda1(); lambda2(); } int test2() { auto a = [](int i, int j){ return i + j; }(1, 2); auto b = []() -> int { return '0'; }(); auto c = [=](){ return a + b; }(); auto d = [&](){ return c; }(); auto e = [a, &b](int x) mutable { const auto identity = [](int y){ return y; }; for (auto i = 0; i < a; ++i) a += b--; return x + identity(a + b); }(0); return a + b + c + d + e; } int test3() { const auto nullary = [](){ return 0; }; const auto unary = [](int x){ return x; }; using nullary_t = decltype(nullary); using unary_t = decltype(unary); const auto higher1st = [](nullary_t f){ return f(); }; const auto higher2nd = [unary](nullary_t f1){ return [unary, f1](unary_t f2){ return f2(unary(f1())); }; }; return higher1st(nullary) + higher2nd(nullary)(unary); } } namespace test_variadic_templates { template struct sum; template struct sum { static constexpr auto value = N0 + sum::value; }; template <> struct sum<> { static constexpr auto value = 0; }; static_assert(sum<>::value == 0, ""); static_assert(sum<1>::value == 1, ""); static_assert(sum<23>::value == 23, ""); static_assert(sum<1, 2>::value == 3, ""); static_assert(sum<5, 5, 11>::value == 21, ""); static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); } // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function // because of this. namespace test_template_alias_sfinae { struct foo {}; template using member = typename T::member_type; template void func(...) {} template void func(member*) {} void test(); void test() { func(0); } } } // namespace cxx11 #endif // __cplusplus >= 201103L _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : eval $cachevar=yes else $as_nop eval $cachevar=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CXX="$ac_save_CXX" fi eval ac_res=\$$cachevar { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } if eval test x\$$cachevar = xyes; then CXX="$CXX $switch" if test -n "$CXXCPP" ; then CXXCPP="$CXXCPP $switch" fi ac_success=yes break fi done fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test x$ax_cxx_compile_cxx11_required = xtrue; then if test x$ac_success = xno; then as_fn_error $? "*** A compiler with support for C++11 language features is required." "$LINENO" 5 fi fi if test x$ac_success = xno; then HAVE_CXX11=0 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: No compiler with C++11 support was found" >&5 printf "%s\n" "$as_me: No compiler with C++11 support was found" >&6;} else HAVE_CXX11=1 printf "%s\n" "#define HAVE_CXX11 1" >>confdefs.h fi STARPU_HAVE_CXX11=$HAVE_CXX11 if test "$HAVE_CXX11" -eq 1; then STARPU_HAVE_CXX11_TRUE= STARPU_HAVE_CXX11_FALSE='#' else STARPU_HAVE_CXX11_TRUE='#' STARPU_HAVE_CXX11_FALSE= fi if test $HAVE_CXX11 -eq 1; then printf "%s\n" "#define STARPU_HAVE_CXX11 1" >>confdefs.h fi case `pwd` in *\ * | *\ *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 printf "%s\n" "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; esac macro_version='2.4.7' macro_revision='2.4.7' ltmain=$ac_aux_dir/ltmain.sh # Backslashify metacharacters that are still active within # double-quoted strings. sed_quote_subst='s/\(["`$\\]\)/\\\1/g' # Same as above, but do not quote variable references. double_quote_subst='s/\(["`\\]\)/\\\1/g' # Sed substitution to delay expansion of an escaped shell variable in a # double_quote_subst'ed string. delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' # Sed substitution to delay expansion of an escaped single quote. delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' # Sed substitution to avoid accidental globbing in evaled expressions no_glob_subst='s/\*/\\\*/g' ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 printf %s "checking how to print strings... " >&6; } # Test print first, because it will be a builtin if present. if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='print -r --' elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='printf %s\n' else # Use this function as a fallback that always works. func_fallback_echo () { eval 'cat <<_LTECHO_EOF $1 _LTECHO_EOF' } ECHO='func_fallback_echo' fi # func_echo_all arg... # Invoke $ECHO with all args, space-separated. func_echo_all () { $ECHO "" } case $ECHO in printf*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: printf" >&5 printf "%s\n" "printf" >&6; } ;; print*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 printf "%s\n" "print -r" >&6; } ;; *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: cat" >&5 printf "%s\n" "cat" >&6; } ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 printf %s "checking for a sed that does not truncate output... " >&6; } if test ${ac_cv_path_SED+y} then : printf %s "(cached) " >&6 else $as_nop ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ for ac_i in 1 2 3 4 5 6 7; do ac_script="$ac_script$as_nl$ac_script" done echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed { ac_script=; unset ac_script;} if test -z "$SED"; then ac_path_SED_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_prog in sed gsed do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_SED="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_SED" || continue # Check for GNU ac_path_SED and select it if it is found. # Check for GNU $ac_path_SED case `"$ac_path_SED" --version 2>&1` in *GNU*) ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; *) ac_count=0 printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" printf "%s\n" '' >> "conftest.nl" "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_SED_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_SED="$ac_path_SED" ac_path_SED_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_SED_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_SED"; then as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 fi else ac_cv_path_SED=$SED fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 printf "%s\n" "$ac_cv_path_SED" >&6; } SED="$ac_cv_path_SED" rm -f conftest.sed test -z "$SED" && SED=sed Xsed="$SED -e 1s/^X//" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 printf %s "checking for fgrep... " >&6; } if test ${ac_cv_path_FGREP+y} then : printf %s "(cached) " >&6 else $as_nop if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 then ac_cv_path_FGREP="$GREP -F" else if test -z "$FGREP"; then ac_path_FGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_prog in fgrep do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_FGREP="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_FGREP" || continue # Check for GNU ac_path_FGREP and select it if it is found. # Check for GNU $ac_path_FGREP case `"$ac_path_FGREP" --version 2>&1` in *GNU*) ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; *) ac_count=0 printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" printf "%s\n" 'FGREP' >> "conftest.nl" "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_FGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_FGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_FGREP"; then as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_FGREP=$FGREP fi fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 printf "%s\n" "$ac_cv_path_FGREP" >&6; } FGREP="$ac_cv_path_FGREP" test -z "$GREP" && GREP=grep # Check whether --with-gnu-ld was given. if test ${with_gnu_ld+y} then : withval=$with_gnu_ld; test no = "$withval" || with_gnu_ld=yes else $as_nop with_gnu_ld=no fi ac_prog=ld if test yes = "$GCC"; then # Check if gcc -print-prog-name=ld gives a path. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 printf %s "checking for ld used by $CC... " >&6; } case $host in *-*-mingw*) # gcc leaves a trailing carriage return, which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [\\/]* | ?:[\\/]*) re_direlt='/[^/][^/]*/\.\./' # Canonicalize the pathname of ld ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD=$ac_prog ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test yes = "$with_gnu_ld"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 printf %s "checking for GNU ld... " >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 printf %s "checking for non-GNU ld... " >&6; } fi if test ${lt_cv_path_LD+y} then : printf %s "(cached) " >&6 else $as_nop if test -z "$LD"; then lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS=$lt_save_ifs test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD=$ac_dir/$ac_prog # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &5 printf "%s\n" "$LD" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 printf %s "checking if the linker ($LD) is GNU ld... " >&6; } if test ${lt_cv_prog_gnu_ld+y} then : printf %s "(cached) " >&6 else $as_nop # I'd rather use --version here, but apparently some GNU lds only accept -v. case `$LD -v 2>&1 &5 printf "%s\n" "$lt_cv_prog_gnu_ld" >&6; } with_gnu_ld=$lt_cv_prog_gnu_ld { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 printf %s "checking for BSD- or MS-compatible name lister (nm)... " >&6; } if test ${lt_cv_path_NM+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM=$NM else lt_nm_to_check=${ac_tool_prefix}nm if test -n "$ac_tool_prefix" && test "$build" = "$host"; then lt_nm_to_check="$lt_nm_to_check nm" fi for lt_tmp_nm in $lt_nm_to_check; do lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do IFS=$lt_save_ifs test -z "$ac_dir" && ac_dir=. tmp_nm=$ac_dir/$lt_tmp_nm if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then # Check to see if the nm accepts a BSD-compat flag. # Adding the 'sed 1q' prevents false positives on HP-UX, which says: # nm: unknown option "B" ignored # Tru64's nm complains that /dev/null is an invalid object file # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty case $build_os in mingw*) lt_bad_file=conftest.nm/nofile ;; *) lt_bad_file=/dev/null ;; esac case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in *$lt_bad_file* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break 2 ;; *) case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break 2 ;; *) lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but continue # so that we can try to find one that supports BSD flags ;; esac ;; esac fi done IFS=$lt_save_ifs done : ${lt_cv_path_NM=no} fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 printf "%s\n" "$lt_cv_path_NM" >&6; } if test no != "$lt_cv_path_NM"; then NM=$lt_cv_path_NM else # Didn't find any BSD compatible name lister, look for dumpbin. if test -n "$DUMPBIN"; then : # Let the user override the test. else if test -n "$ac_tool_prefix"; then for ac_prog in dumpbin "link -dump" do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_DUMPBIN+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$DUMPBIN"; then ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DUMPBIN=$ac_cv_prog_DUMPBIN if test -n "$DUMPBIN"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 printf "%s\n" "$DUMPBIN" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$DUMPBIN" && break done fi if test -z "$DUMPBIN"; then ac_ct_DUMPBIN=$DUMPBIN for ac_prog in dumpbin "link -dump" do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_DUMPBIN+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_DUMPBIN"; then ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN if test -n "$ac_ct_DUMPBIN"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 printf "%s\n" "$ac_ct_DUMPBIN" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$ac_ct_DUMPBIN" && break done if test "x$ac_ct_DUMPBIN" = x; then DUMPBIN=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DUMPBIN=$ac_ct_DUMPBIN fi fi case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in *COFF*) DUMPBIN="$DUMPBIN -symbols -headers" ;; *) DUMPBIN=: ;; esac fi if test : != "$DUMPBIN"; then NM=$DUMPBIN fi fi test -z "$NM" && NM=nm { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 printf %s "checking the name lister ($NM) interface... " >&6; } if test ${lt_cv_nm_interface+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) (eval "$ac_compile" 2>conftest.err) cat conftest.err >&5 (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) cat conftest.err >&5 (eval echo "\"\$as_me:$LINENO: output\"" >&5) cat conftest.out >&5 if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" fi rm -f conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 printf "%s\n" "$lt_cv_nm_interface" >&6; } # find the maximum length of command line arguments { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 printf %s "checking the maximum length of command line arguments... " >&6; } if test ${lt_cv_sys_max_cmd_len+y} then : printf %s "(cached) " >&6 else $as_nop i=0 teststring=ABCD case $build_os in msdosdjgpp*) # On DJGPP, this test can blow up pretty badly due to problems in libc # (any single argument exceeding 2000 bytes causes a buffer overrun # during glob expansion). Even if it were fixed, the result of this # check would be larger than it should be. lt_cv_sys_max_cmd_len=12288; # 12K is about right ;; gnu*) # Under GNU Hurd, this test is not required because there is # no limit to the length of command line arguments. # Libtool will interpret -1 as no limit whatsoever lt_cv_sys_max_cmd_len=-1; ;; cygwin* | mingw* | cegcc*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, # you end up with a "frozen" computer, even though with patience # the test eventually succeeds (with a max line length of 256k). # Instead, let's just punt: use the minimum linelength reported by # all of the supported platforms: 8192 (on NT/2K/XP). lt_cv_sys_max_cmd_len=8192; ;; mint*) # On MiNT this can take a long time and run out of memory. lt_cv_sys_max_cmd_len=8192; ;; amigaos*) # On AmigaOS with pdksh, this test takes hours, literally. # So we just punt and use a minimum line length of 8192. lt_cv_sys_max_cmd_len=8192; ;; bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` elif test -x /usr/sbin/sysctl; then lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` else lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs fi # And add a safety zone lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` ;; interix*) # We know the value 262144 and hardcode it with a safety zone (like BSD) lt_cv_sys_max_cmd_len=196608 ;; os2*) # The test takes a long time on OS/2. lt_cv_sys_max_cmd_len=8192 ;; osf*) # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not # nice to cause kernel panics so lets avoid the loop below. # First set a reasonable default. lt_cv_sys_max_cmd_len=16384 # if test -x /sbin/sysconfig; then case `/sbin/sysconfig -q proc exec_disable_arg_limit` in *1*) lt_cv_sys_max_cmd_len=-1 ;; esac fi ;; sco3.2v5*) lt_cv_sys_max_cmd_len=102400 ;; sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[ ]//'` else lt_cv_sys_max_cmd_len=32768 fi ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` if test -n "$lt_cv_sys_max_cmd_len" && \ test undefined != "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else # Make teststring a little bigger before we do anything with it. # a 1K string should be a reasonable start. for i in 1 2 3 4 5 6 7 8; do teststring=$teststring$teststring done SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} # If test is not a shell built-in, we'll probably end up computing a # maximum length that is only half of the actual maximum length, but # we can't tell. while { test X`env echo "$teststring$teststring" 2>/dev/null` \ = "X$teststring$teststring"; } >/dev/null 2>&1 && test 17 != "$i" # 1/2 MB should be enough do i=`expr $i + 1` teststring=$teststring$teststring done # Only check the string length outside the loop. lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` teststring= # Add a significant safety factor because C++ compilers can tack on # massive amounts of additional arguments before passing them to the # linker. It appears as though 1/2 is a usable value. lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` fi ;; esac fi if test -n "$lt_cv_sys_max_cmd_len"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 printf "%s\n" "$lt_cv_sys_max_cmd_len" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none" >&5 printf "%s\n" "none" >&6; } fi max_cmd_len=$lt_cv_sys_max_cmd_len : ${CP="cp -f"} : ${MV="mv -f"} : ${RM="rm -f"} if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then lt_unset=unset else lt_unset=false fi # test EBCDIC or ASCII case `echo X|tr X '\101'` in A) # ASCII based system # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr lt_SP2NL='tr \040 \012' lt_NL2SP='tr \015\012 \040\040' ;; *) # EBCDIC based system lt_SP2NL='tr \100 \n' lt_NL2SP='tr \r\n \100\100' ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 printf %s "checking how to convert $build file names to $host format... " >&6; } if test ${lt_cv_to_host_file_cmd+y} then : printf %s "(cached) " >&6 else $as_nop case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 ;; esac ;; *-*-cygwin* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_noop ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin ;; esac ;; * ) # unhandled hosts (and "normal" native builds) lt_cv_to_host_file_cmd=func_convert_file_noop ;; esac fi to_host_file_cmd=$lt_cv_to_host_file_cmd { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 printf "%s\n" "$lt_cv_to_host_file_cmd" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 printf %s "checking how to convert $build file names to toolchain format... " >&6; } if test ${lt_cv_to_tool_file_cmd+y} then : printf %s "(cached) " >&6 else $as_nop #assume ordinary cross tools, or native build. lt_cv_to_tool_file_cmd=func_convert_file_noop case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 ;; esac ;; esac fi to_tool_file_cmd=$lt_cv_to_tool_file_cmd { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 printf "%s\n" "$lt_cv_to_tool_file_cmd" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 printf %s "checking for $LD option to reload object files... " >&6; } if test ${lt_cv_ld_reload_flag+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_ld_reload_flag='-r' fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 printf "%s\n" "$lt_cv_ld_reload_flag" >&6; } reload_flag=$lt_cv_ld_reload_flag case $reload_flag in "" | " "*) ;; *) reload_flag=" $reload_flag" ;; esac reload_cmds='$LD$reload_flag -o $output$reload_objs' case $host_os in cygwin* | mingw* | pw32* | cegcc*) if test yes != "$GCC"; then reload_cmds=false fi ;; darwin*) if test yes = "$GCC"; then reload_cmds='$LTCC $LTCFLAGS -nostdlib $wl-r -o $output$reload_objs' else reload_cmds='$LD$reload_flag -o $output$reload_objs' fi ;; esac if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}file", so it can be a program name with args. set dummy ${ac_tool_prefix}file; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_FILECMD+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$FILECMD"; then ac_cv_prog_FILECMD="$FILECMD" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_FILECMD="${ac_tool_prefix}file" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi FILECMD=$ac_cv_prog_FILECMD if test -n "$FILECMD"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $FILECMD" >&5 printf "%s\n" "$FILECMD" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_FILECMD"; then ac_ct_FILECMD=$FILECMD # Extract the first word of "file", so it can be a program name with args. set dummy file; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_FILECMD+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_FILECMD"; then ac_cv_prog_ac_ct_FILECMD="$ac_ct_FILECMD" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_FILECMD="file" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_FILECMD=$ac_cv_prog_ac_ct_FILECMD if test -n "$ac_ct_FILECMD"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_FILECMD" >&5 printf "%s\n" "$ac_ct_FILECMD" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_FILECMD" = x; then FILECMD=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac FILECMD=$ac_ct_FILECMD fi else FILECMD="$ac_cv_prog_FILECMD" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. set dummy ${ac_tool_prefix}objdump; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_OBJDUMP+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$OBJDUMP"; then ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OBJDUMP=$ac_cv_prog_OBJDUMP if test -n "$OBJDUMP"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 printf "%s\n" "$OBJDUMP" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_OBJDUMP"; then ac_ct_OBJDUMP=$OBJDUMP # Extract the first word of "objdump", so it can be a program name with args. set dummy objdump; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_OBJDUMP+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_OBJDUMP"; then ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OBJDUMP="objdump" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP if test -n "$ac_ct_OBJDUMP"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 printf "%s\n" "$ac_ct_OBJDUMP" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_OBJDUMP" = x; then OBJDUMP="false" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OBJDUMP=$ac_ct_OBJDUMP fi else OBJDUMP="$ac_cv_prog_OBJDUMP" fi test -z "$OBJDUMP" && OBJDUMP=objdump { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 printf %s "checking how to recognize dependent libraries... " >&6; } if test ${lt_cv_deplibs_check_method+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_file_magic_cmd='$MAGIC_CMD' lt_cv_file_magic_test_file= lt_cv_deplibs_check_method='unknown' # Need to set the preceding variable on all platforms that support # interlibrary dependencies. # 'none' -- dependencies not supported. # 'unknown' -- same as none, but documents that we really don't know. # 'pass_all' -- all dependencies passed with no checks. # 'test_compile' -- check by making test program. # 'file_magic [[regex]]' -- check by looking for files in library path # that responds to the $file_magic_cmd with a given extended regex. # If you have 'file' or equivalent on your system and you're not sure # whether 'pass_all' will *always* work, you probably want this one. case $host_os in aix[4-9]*) lt_cv_deplibs_check_method=pass_all ;; beos*) lt_cv_deplibs_check_method=pass_all ;; bsdi[45]*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' lt_cv_file_magic_cmd='$FILECMD -L' lt_cv_file_magic_test_file=/shlib/libc.so ;; cygwin*) # func_win32_libid is a shell function defined in ltmain.sh lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' ;; mingw* | pw32*) # Base MSYS/MinGW do not provide the 'file' command needed by # func_win32_libid shell function, so use a weaker test based on 'objdump', # unless we find 'file', for example because we are cross-compiling. if ( file / ) >/dev/null 2>&1; then lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' else # Keep this pattern in sync with the one in func_win32_libid. lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' lt_cv_file_magic_cmd='$OBJDUMP -f' fi ;; cegcc*) # use the weaker test based on 'objdump'. See mingw*. lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' lt_cv_file_magic_cmd='$OBJDUMP -f' ;; darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; freebsd* | dragonfly* | midnightbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' lt_cv_file_magic_cmd=$FILECMD lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac else lt_cv_deplibs_check_method=pass_all fi ;; haiku*) lt_cv_deplibs_check_method=pass_all ;; hpux10.20* | hpux11*) lt_cv_file_magic_cmd=$FILECMD case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so ;; hppa*64*) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl ;; *) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' lt_cv_file_magic_test_file=/usr/lib/libc.sl ;; esac ;; interix[3-9]*) # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' ;; irix5* | irix6* | nonstopux*) case $LD in *-32|*"-32 ") libmagic=32-bit;; *-n32|*"-n32 ") libmagic=N32;; *-64|*"-64 ") libmagic=64-bit;; *) libmagic=never-match;; esac lt_cv_deplibs_check_method=pass_all ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) lt_cv_deplibs_check_method=pass_all ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' fi ;; newos6*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' lt_cv_file_magic_cmd=$FILECMD lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; *nto* | *qnx*) lt_cv_deplibs_check_method=pass_all ;; openbsd* | bitrig*) if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' fi ;; osf3* | osf4* | osf5*) lt_cv_deplibs_check_method=pass_all ;; rdos*) lt_cv_deplibs_check_method=pass_all ;; solaris*) lt_cv_deplibs_check_method=pass_all ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) lt_cv_deplibs_check_method=pass_all ;; sysv4 | sysv4.3*) case $host_vendor in motorola) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` ;; ncr) lt_cv_deplibs_check_method=pass_all ;; sequent) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;; sni) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" lt_cv_file_magic_test_file=/lib/libc.so ;; siemens) lt_cv_deplibs_check_method=pass_all ;; pc) lt_cv_deplibs_check_method=pass_all ;; esac ;; tpf*) lt_cv_deplibs_check_method=pass_all ;; os2*) lt_cv_deplibs_check_method=pass_all ;; esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 printf "%s\n" "$lt_cv_deplibs_check_method" >&6; } file_magic_glob= want_nocaseglob=no if test "$build" = "$host"; then case $host_os in mingw* | pw32*) if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then want_nocaseglob=yes else file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` fi ;; esac fi file_magic_cmd=$lt_cv_file_magic_cmd deplibs_check_method=$lt_cv_deplibs_check_method test -z "$deplibs_check_method" && deplibs_check_method=unknown if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. set dummy ${ac_tool_prefix}dlltool; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_DLLTOOL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$DLLTOOL"; then ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DLLTOOL=$ac_cv_prog_DLLTOOL if test -n "$DLLTOOL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 printf "%s\n" "$DLLTOOL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_DLLTOOL"; then ac_ct_DLLTOOL=$DLLTOOL # Extract the first word of "dlltool", so it can be a program name with args. set dummy dlltool; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_DLLTOOL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_DLLTOOL"; then ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DLLTOOL="dlltool" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL if test -n "$ac_ct_DLLTOOL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 printf "%s\n" "$ac_ct_DLLTOOL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_DLLTOOL" = x; then DLLTOOL="false" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DLLTOOL=$ac_ct_DLLTOOL fi else DLLTOOL="$ac_cv_prog_DLLTOOL" fi test -z "$DLLTOOL" && DLLTOOL=dlltool { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 printf %s "checking how to associate runtime and link libraries... " >&6; } if test ${lt_cv_sharedlib_from_linklib_cmd+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_sharedlib_from_linklib_cmd='unknown' case $host_os in cygwin* | mingw* | pw32* | cegcc*) # two different shell functions defined in ltmain.sh; # decide which one to use based on capabilities of $DLLTOOL case `$DLLTOOL --help 2>&1` in *--identify-strict*) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib ;; *) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback ;; esac ;; *) # fallback: assume linklib IS sharedlib lt_cv_sharedlib_from_linklib_cmd=$ECHO ;; esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 printf "%s\n" "$lt_cv_sharedlib_from_linklib_cmd" >&6; } sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO if test -n "$ac_tool_prefix"; then for ac_prog in ar do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_AR+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$AR"; then ac_cv_prog_AR="$AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_AR="$ac_tool_prefix$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AR=$ac_cv_prog_AR if test -n "$AR"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 printf "%s\n" "$AR" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$AR" && break done fi if test -z "$AR"; then ac_ct_AR=$AR for ac_prog in ar do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_AR+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_AR"; then ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_AR="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_AR=$ac_cv_prog_ac_ct_AR if test -n "$ac_ct_AR"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 printf "%s\n" "$ac_ct_AR" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$ac_ct_AR" && break done if test "x$ac_ct_AR" = x; then AR="false" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac AR=$ac_ct_AR fi fi : ${AR=ar} # Use ARFLAGS variable as AR's operation code to sync the variable naming with # Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have # higher priority because thats what people were doing historically (setting # ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS # variable obsoleted/removed. test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} lt_ar_flags=$AR_FLAGS # Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override # by AR_FLAGS because that was never working and AR_FLAGS is about to die. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 printf %s "checking for archiver @FILE support... " >&6; } if test ${lt_cv_ar_at_file+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_ar_at_file=no cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : echo conftest.$ac_objext > conftest.lst lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 (eval $lt_ar_try) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if test 0 -eq "$ac_status"; then # Ensure the archiver fails upon bogus file names. rm -f conftest.$ac_objext libconftest.a { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 (eval $lt_ar_try) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if test 0 -ne "$ac_status"; then lt_cv_ar_at_file=@ fi fi rm -f conftest.* libconftest.a fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 printf "%s\n" "$lt_cv_ar_at_file" >&6; } if test no = "$lt_cv_ar_at_file"; then archiver_list_spec= else archiver_list_spec=$lt_cv_ar_at_file fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_STRIP+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 printf "%s\n" "$STRIP" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_STRIP+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_STRIP="strip" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 printf "%s\n" "$ac_ct_STRIP" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_STRIP" = x; then STRIP=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP fi else STRIP="$ac_cv_prog_STRIP" fi test -z "$STRIP" && STRIP=: if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. set dummy ${ac_tool_prefix}ranlib; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_RANLIB+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$RANLIB"; then ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi RANLIB=$ac_cv_prog_RANLIB if test -n "$RANLIB"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 printf "%s\n" "$RANLIB" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_RANLIB"; then ac_ct_RANLIB=$RANLIB # Extract the first word of "ranlib", so it can be a program name with args. set dummy ranlib; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_RANLIB+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_RANLIB"; then ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_RANLIB="ranlib" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB if test -n "$ac_ct_RANLIB"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 printf "%s\n" "$ac_ct_RANLIB" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_RANLIB" = x; then RANLIB=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac RANLIB=$ac_ct_RANLIB fi else RANLIB="$ac_cv_prog_RANLIB" fi test -z "$RANLIB" && RANLIB=: # Determine commands to create old-style static archives. old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then case $host_os in bitrig* | openbsd*) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" ;; *) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" ;; esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" fi case $host_os in darwin*) lock_old_archive_extraction=yes ;; *) lock_old_archive_extraction=no ;; esac # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # Check for command to grab the raw symbol name followed by C symbol from nm. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 printf %s "checking command to parse $NM output from $compiler object... " >&6; } if test ${lt_cv_sys_global_symbol_pipe+y} then : printf %s "(cached) " >&6 else $as_nop # These are sane defaults that work on at least a few old systems. # [They come from Ultrix. What could be older than Ultrix?!! ;)] # Character class describing NM global symbol codes. symcode='[BCDEGRST]' # Regexp to match symbols that can be accessed directly from C. sympat='\([_A-Za-z][_A-Za-z0-9]*\)' # Define system-specific variables. case $host_os in aix*) symcode='[BCDT]' ;; cygwin* | mingw* | pw32* | cegcc*) symcode='[ABCDGISTW]' ;; hpux*) if test ia64 = "$host_cpu"; then symcode='[ABCDEGRST]' fi ;; irix* | nonstopux*) symcode='[BCDEGRST]' ;; osf*) symcode='[BCDEGQRST]' ;; solaris*) symcode='[BDRT]' ;; sco3.2v5*) symcode='[DT]' ;; sysv4.2uw2*) symcode='[DT]' ;; sysv5* | sco5v6* | unixware* | OpenUNIX*) symcode='[ABDT]' ;; sysv4) symcode='[DFNSTU]' ;; esac # If we're using GNU nm, then use its standard symbol codes. case `$NM -V 2>&1` in *GNU* | *'with BFD'*) symcode='[ABCDGIRSTW]' ;; esac if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Gets list of data symbols to import. lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" # Adjust the below global symbol transforms to fixup imported variables. lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" lt_c_name_lib_hook="\ -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" else # Disable hooks by default. lt_cv_sys_global_symbol_to_import= lt_cdecl_hook= lt_c_name_hook= lt_c_name_lib_hook= fi # Transform an extracted symbol line into a proper C declaration. # Some systems (esp. on ia64) link data and code symbols differently, # so use this general approach. lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ $lt_cdecl_hook\ " -e 's/^T .* \(.*\)$/extern int \1();/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" # Transform an extracted symbol line into symbol name and symbol address lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ $lt_c_name_hook\ " -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" # Transform an extracted symbol line into symbol name with lib prefix and # symbol address. lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ $lt_c_name_lib_hook\ " -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ " -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" # Handle CRLF in mingw tool chain opt_cr= case $build_os in mingw*) opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac # Try without a prefix underscore, then with it. for ac_symprfx in "" "_"; do # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. symxfrm="\\1 $ac_symprfx\\2 \\2" # Write the raw and C identifiers. if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Fake it for dumpbin and say T for any non-static function, # D for any global variable and I for any imported variable. # Also find C++ and __fastcall symbols from MSVC++ or ICC, # which start with @ or ?. lt_cv_sys_global_symbol_pipe="$AWK '"\ " {last_section=section; section=\$ 3};"\ " /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ " /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ " /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ " /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ " /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ " \$ 0!~/External *\|/{next};"\ " / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ " {if(hide[section]) next};"\ " {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ " {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ " s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ " s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ " ' prfx=^$ac_symprfx" else lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" fi lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" # Check to see that the pipe works correctly. pipe_works=no rm -f conftest* cat > conftest.$ac_ext <<_LT_EOF #ifdef __cplusplus extern "C" { #endif char nm_test_var; void nm_test_func(void); void nm_test_func(void){} #ifdef __cplusplus } #endif int main(){nm_test_var='a';nm_test_func();return(0);} _LT_EOF if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then # Now try to grab the symbols. nlist=conftest.nm $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&5 if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&5 && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" else rm -f "$nlist"T fi # Make sure that we snagged all the symbols we need. if $GREP ' nm_test_var$' "$nlist" >/dev/null; then if $GREP ' nm_test_func$' "$nlist" >/dev/null; then cat <<_LT_EOF > conftest.$ac_ext /* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ #if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE /* DATA imports from DLLs on WIN32 can't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs. */ # define LT_DLSYM_CONST #elif defined __osf__ /* This system does not cope well with relocations in const data. */ # define LT_DLSYM_CONST #else # define LT_DLSYM_CONST const #endif #ifdef __cplusplus extern "C" { #endif _LT_EOF # Now generate the symbol file. eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' cat <<_LT_EOF >> conftest.$ac_ext /* The mapping between symbol names and symbols. */ LT_DLSYM_CONST struct { const char *name; void *address; } lt__PROGRAM__LTX_preloaded_symbols[] = { { "@PROGRAM@", (void *) 0 }, _LT_EOF $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext cat <<\_LT_EOF >> conftest.$ac_ext {0, (void *) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt__PROGRAM__LTX_preloaded_symbols; } #endif #ifdef __cplusplus } #endif _LT_EOF # Now try linking the two files. mv conftest.$ac_objext conftstm.$ac_objext lt_globsym_save_LIBS=$LIBS lt_globsym_save_CFLAGS=$CFLAGS LIBS=conftstm.$ac_objext CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 (eval $ac_link) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s conftest$ac_exeext; then pipe_works=yes fi LIBS=$lt_globsym_save_LIBS CFLAGS=$lt_globsym_save_CFLAGS else echo "cannot find nm_test_func in $nlist" >&5 fi else echo "cannot find nm_test_var in $nlist" >&5 fi else echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 fi else echo "$progname: failed program was:" >&5 cat conftest.$ac_ext >&5 fi rm -rf conftest* conftst* # Do not use the global_symbol_pipe unless it works. if test yes = "$pipe_works"; then break else lt_cv_sys_global_symbol_pipe= fi done fi if test -z "$lt_cv_sys_global_symbol_pipe"; then lt_cv_sys_global_symbol_to_cdecl= fi if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: failed" >&5 printf "%s\n" "failed" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ok" >&5 printf "%s\n" "ok" >&6; } fi # Response file support. if test "$lt_cv_nm_interface" = "MS dumpbin"; then nm_file_list_spec='@' elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then nm_file_list_spec='@' fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 printf %s "checking for sysroot... " >&6; } # Check whether --with-sysroot was given. if test ${with_sysroot+y} then : withval=$with_sysroot; else $as_nop with_sysroot=no fi lt_sysroot= case $with_sysroot in #( yes) if test yes = "$GCC"; then lt_sysroot=`$CC --print-sysroot 2>/dev/null` fi ;; #( /*) lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` ;; #( no|'') ;; #( *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_sysroot" >&5 printf "%s\n" "$with_sysroot" >&6; } as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 printf "%s\n" "${lt_sysroot:-no}" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a working dd" >&5 printf %s "checking for a working dd... " >&6; } if test ${ac_cv_path_lt_DD+y} then : printf %s "(cached) " >&6 else $as_nop printf 0123456789abcdef0123456789abcdef >conftest.i cat conftest.i conftest.i >conftest2.i : ${lt_DD:=$DD} if test -z "$lt_DD"; then ac_path_lt_DD_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_prog in dd do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_lt_DD="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_lt_DD" || continue if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then cmp -s conftest.i conftest.out \ && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: fi $ac_path_lt_DD_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_lt_DD"; then : fi else ac_cv_path_lt_DD=$lt_DD fi rm -f conftest.i conftest2.i conftest.out fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_lt_DD" >&5 printf "%s\n" "$ac_cv_path_lt_DD" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to truncate binary pipes" >&5 printf %s "checking how to truncate binary pipes... " >&6; } if test ${lt_cv_truncate_bin+y} then : printf %s "(cached) " >&6 else $as_nop printf 0123456789abcdef0123456789abcdef >conftest.i cat conftest.i conftest.i >conftest2.i lt_cv_truncate_bin= if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then cmp -s conftest.i conftest.out \ && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" fi rm -f conftest.i conftest2.i conftest.out test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_truncate_bin" >&5 printf "%s\n" "$lt_cv_truncate_bin" >&6; } # Calculate cc_basename. Skip known compiler wrappers and cross-prefix. func_cc_basename () { for cc_temp in $*""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` } # Check whether --enable-libtool-lock was given. if test ${enable_libtool_lock+y} then : enableval=$enable_libtool_lock; fi test no = "$enable_libtool_lock" || enable_libtool_lock=yes # Some flags need to be propagated to the compiler or linker for good # libtool support. case $host in ia64-*-hpux*) # Find out what ABI is being produced by ac_compile, and set mode # options accordingly. echo 'int i;' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then case `$FILECMD conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE=32 ;; *ELF-64*) HPUX_IA64_MODE=64 ;; esac fi rm -rf conftest* ;; *-*-irix6*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. echo '#line '$LINENO' "configure"' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then if test yes = "$lt_cv_prog_gnu_ld"; then case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; *N32*) LD="${LD-ld} -melf32bmipn32" ;; *64-bit*) LD="${LD-ld} -melf64bmip" ;; esac else case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; *N32*) LD="${LD-ld} -n32" ;; *64-bit*) LD="${LD-ld} -64" ;; esac fi fi rm -rf conftest* ;; mips64*-*linux*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. echo '#line '$LINENO' "configure"' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then emul=elf case `$FILECMD conftest.$ac_objext` in *32-bit*) emul="${emul}32" ;; *64-bit*) emul="${emul}64" ;; esac case `$FILECMD conftest.$ac_objext` in *MSB*) emul="${emul}btsmip" ;; *LSB*) emul="${emul}ltsmip" ;; esac case `$FILECMD conftest.$ac_objext` in *N32*) emul="${emul}n32" ;; esac LD="${LD-ld} -m $emul" fi rm -rf conftest* ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. Note that the listed cases only cover the # situations where additional linker options are needed (such as when # doing 32-bit compilation for a host where ld defaults to 64-bit, or # vice versa); the common cases where no linker options are needed do # not appear in the list. echo 'int i;' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then case `$FILECMD conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) case `$FILECMD conftest.o` in *x86-64*) LD="${LD-ld} -m elf32_x86_64" ;; *) LD="${LD-ld} -m elf_i386" ;; esac ;; powerpc64le-*linux*) LD="${LD-ld} -m elf32lppclinux" ;; powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" ;; s390x-*linux*) LD="${LD-ld} -m elf_s390" ;; sparc64-*linux*) LD="${LD-ld} -m elf32_sparc" ;; esac ;; *64-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; powerpcle-*linux*) LD="${LD-ld} -m elf64lppc" ;; powerpc-*linux*) LD="${LD-ld} -m elf64ppc" ;; s390*-*linux*|s390*-*tpf*) LD="${LD-ld} -m elf64_s390" ;; sparc*-*linux*) LD="${LD-ld} -m elf64_sparc" ;; esac ;; esac fi rm -rf conftest* ;; *-*-sco3.2v5*) # On SCO OpenServer 5, we need -belf to get full-featured binaries. SAVE_CFLAGS=$CFLAGS CFLAGS="$CFLAGS -belf" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 printf %s "checking whether the C compiler needs -belf... " >&6; } if test ${lt_cv_cc_needs_belf+y} then : printf %s "(cached) " >&6 else $as_nop ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : lt_cv_cc_needs_belf=yes else $as_nop lt_cv_cc_needs_belf=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 printf "%s\n" "$lt_cv_cc_needs_belf" >&6; } if test yes != "$lt_cv_cc_needs_belf"; then # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf CFLAGS=$SAVE_CFLAGS fi ;; *-*solaris*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. echo 'int i;' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then case `$FILECMD conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) case $host in i?86-*-solaris*|x86_64-*-solaris*) LD="${LD-ld} -m elf_x86_64" ;; sparc*-*-solaris*) LD="${LD-ld} -m elf64_sparc" ;; esac # GNU ld 2.21 introduced _sol2 emulations. Use them if available. if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then LD=${LD-ld}_sol2 fi ;; *) if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then LD="${LD-ld} -64" fi ;; esac ;; esac fi rm -rf conftest* ;; esac need_locks=$enable_libtool_lock if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. set dummy ${ac_tool_prefix}mt; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_MANIFEST_TOOL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$MANIFEST_TOOL"; then ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL if test -n "$MANIFEST_TOOL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 printf "%s\n" "$MANIFEST_TOOL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_MANIFEST_TOOL"; then ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL # Extract the first word of "mt", so it can be a program name with args. set dummy mt; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_MANIFEST_TOOL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_MANIFEST_TOOL"; then ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL if test -n "$ac_ct_MANIFEST_TOOL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 printf "%s\n" "$ac_ct_MANIFEST_TOOL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_MANIFEST_TOOL" = x; then MANIFEST_TOOL=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL fi else MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" fi test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 printf %s "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } if test ${lt_cv_path_mainfest_tool+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_path_mainfest_tool=no echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out cat conftest.err >&5 if $GREP 'Manifest Tool' conftest.out > /dev/null; then lt_cv_path_mainfest_tool=yes fi rm -f conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 printf "%s\n" "$lt_cv_path_mainfest_tool" >&6; } if test yes != "$lt_cv_path_mainfest_tool"; then MANIFEST_TOOL=: fi case $host_os in rhapsody* | darwin*) if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_DSYMUTIL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$DSYMUTIL"; then ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DSYMUTIL=$ac_cv_prog_DSYMUTIL if test -n "$DSYMUTIL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 printf "%s\n" "$DSYMUTIL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_DSYMUTIL"; then ac_ct_DSYMUTIL=$DSYMUTIL # Extract the first word of "dsymutil", so it can be a program name with args. set dummy dsymutil; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_DSYMUTIL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_DSYMUTIL"; then ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL if test -n "$ac_ct_DSYMUTIL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 printf "%s\n" "$ac_ct_DSYMUTIL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_DSYMUTIL" = x; then DSYMUTIL=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DSYMUTIL=$ac_ct_DSYMUTIL fi else DSYMUTIL="$ac_cv_prog_DSYMUTIL" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. set dummy ${ac_tool_prefix}nmedit; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_NMEDIT+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$NMEDIT"; then ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi NMEDIT=$ac_cv_prog_NMEDIT if test -n "$NMEDIT"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 printf "%s\n" "$NMEDIT" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_NMEDIT"; then ac_ct_NMEDIT=$NMEDIT # Extract the first word of "nmedit", so it can be a program name with args. set dummy nmedit; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_NMEDIT+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_NMEDIT"; then ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_NMEDIT="nmedit" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT if test -n "$ac_ct_NMEDIT"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 printf "%s\n" "$ac_ct_NMEDIT" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_NMEDIT" = x; then NMEDIT=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac NMEDIT=$ac_ct_NMEDIT fi else NMEDIT="$ac_cv_prog_NMEDIT" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. set dummy ${ac_tool_prefix}lipo; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_LIPO+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$LIPO"; then ac_cv_prog_LIPO="$LIPO" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_LIPO="${ac_tool_prefix}lipo" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi LIPO=$ac_cv_prog_LIPO if test -n "$LIPO"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 printf "%s\n" "$LIPO" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_LIPO"; then ac_ct_LIPO=$LIPO # Extract the first word of "lipo", so it can be a program name with args. set dummy lipo; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_LIPO+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_LIPO"; then ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_LIPO="lipo" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO if test -n "$ac_ct_LIPO"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 printf "%s\n" "$ac_ct_LIPO" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_LIPO" = x; then LIPO=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac LIPO=$ac_ct_LIPO fi else LIPO="$ac_cv_prog_LIPO" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. set dummy ${ac_tool_prefix}otool; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_OTOOL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$OTOOL"; then ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_OTOOL="${ac_tool_prefix}otool" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OTOOL=$ac_cv_prog_OTOOL if test -n "$OTOOL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 printf "%s\n" "$OTOOL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_OTOOL"; then ac_ct_OTOOL=$OTOOL # Extract the first word of "otool", so it can be a program name with args. set dummy otool; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_OTOOL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_OTOOL"; then ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OTOOL="otool" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL if test -n "$ac_ct_OTOOL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 printf "%s\n" "$ac_ct_OTOOL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_OTOOL" = x; then OTOOL=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OTOOL=$ac_ct_OTOOL fi else OTOOL="$ac_cv_prog_OTOOL" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. set dummy ${ac_tool_prefix}otool64; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_OTOOL64+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$OTOOL64"; then ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OTOOL64=$ac_cv_prog_OTOOL64 if test -n "$OTOOL64"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 printf "%s\n" "$OTOOL64" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_OTOOL64"; then ac_ct_OTOOL64=$OTOOL64 # Extract the first word of "otool64", so it can be a program name with args. set dummy otool64; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_OTOOL64+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_OTOOL64"; then ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OTOOL64="otool64" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 if test -n "$ac_ct_OTOOL64"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 printf "%s\n" "$ac_ct_OTOOL64" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_OTOOL64" = x; then OTOOL64=":" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OTOOL64=$ac_ct_OTOOL64 fi else OTOOL64="$ac_cv_prog_OTOOL64" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 printf %s "checking for -single_module linker flag... " >&6; } if test ${lt_cv_apple_cc_single_mod+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_apple_cc_single_mod=no if test -z "$LT_MULTI_MODULE"; then # By default we will add the -single_module flag. You can override # by either setting the environment variable LT_MULTI_MODULE # non-empty at configure time, or by adding -multi_module to the # link flags. rm -rf libconftest.dylib* echo "int foo(void){return 1;}" > conftest.c echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c" >&5 $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c 2>conftest.err _lt_result=$? # If there is a non-empty error log, and "single_module" # appears in it, assume the flag caused a linker warning if test -s conftest.err && $GREP single_module conftest.err; then cat conftest.err >&5 # Otherwise, if the output was created with a 0 exit code from # the compiler, it worked. elif test -f libconftest.dylib && test 0 = "$_lt_result"; then lt_cv_apple_cc_single_mod=yes else cat conftest.err >&5 fi rm -rf libconftest.dylib* rm -f conftest.* fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 printf "%s\n" "$lt_cv_apple_cc_single_mod" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 printf %s "checking for -exported_symbols_list linker flag... " >&6; } if test ${lt_cv_ld_exported_symbols_list+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_ld_exported_symbols_list=no save_LDFLAGS=$LDFLAGS echo "_main" > conftest.sym LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : lt_cv_ld_exported_symbols_list=yes else $as_nop lt_cv_ld_exported_symbols_list=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 printf "%s\n" "$lt_cv_ld_exported_symbols_list" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 printf %s "checking for -force_load linker flag... " >&6; } if test ${lt_cv_ld_force_load+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_ld_force_load=no cat > conftest.c << _LT_EOF int forced_loaded() { return 2;} _LT_EOF echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 echo "$AR $AR_FLAGS libconftest.a conftest.o" >&5 $AR $AR_FLAGS libconftest.a conftest.o 2>&5 echo "$RANLIB libconftest.a" >&5 $RANLIB libconftest.a 2>&5 cat > conftest.c << _LT_EOF int main() { return 0;} _LT_EOF echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err _lt_result=$? if test -s conftest.err && $GREP force_load conftest.err; then cat conftest.err >&5 elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then lt_cv_ld_force_load=yes else cat conftest.err >&5 fi rm -f conftest.err libconftest.a conftest conftest.c rm -rf conftest.dSYM fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 printf "%s\n" "$lt_cv_ld_force_load" >&6; } case $host_os in rhapsody* | darwin1.[012]) _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; darwin1.*) _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; darwin*) case $MACOSX_DEPLOYMENT_TARGET,$host in 10.[012],*|,*powerpc*-darwin[5-8]*) _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; *) _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; esac ;; esac if test yes = "$lt_cv_apple_cc_single_mod"; then _lt_dar_single_mod='$single_module' fi if test yes = "$lt_cv_ld_exported_symbols_list"; then _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' else _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' fi if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then _lt_dsymutil='~$DSYMUTIL $lib || :' else _lt_dsymutil= fi ;; esac # func_munge_path_list VARIABLE PATH # ----------------------------------- # VARIABLE is name of variable containing _space_ separated list of # directories to be munged by the contents of PATH, which is string # having a format: # "DIR[:DIR]:" # string "DIR[ DIR]" will be prepended to VARIABLE # ":DIR[:DIR]" # string "DIR[ DIR]" will be appended to VARIABLE # "DIRP[:DIRP]::[DIRA:]DIRA" # string "DIRP[ DIRP]" will be prepended to VARIABLE and string # "DIRA[ DIRA]" will be appended to VARIABLE # "DIR[:DIR]" # VARIABLE will be replaced by "DIR[ DIR]" func_munge_path_list () { case x$2 in x) ;; *:) eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" ;; x:*) eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" ;; *::*) eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" ;; *) eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" ;; esac } ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default " if test "x$ac_cv_header_dlfcn_h" = xyes then : printf "%s\n" "#define HAVE_DLFCN_H 1" >>confdefs.h fi func_stripname_cnf () { case $2 in .*) func_stripname_result=`$ECHO "$3" | $SED "s%^$1%%; s%\\\\$2\$%%"`;; *) func_stripname_result=`$ECHO "$3" | $SED "s%^$1%%; s%$2\$%%"`;; esac } # func_stripname_cnf # Set options enable_win32_dll=yes case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args. set dummy ${ac_tool_prefix}as; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_AS+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$AS"; then ac_cv_prog_AS="$AS" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_AS="${ac_tool_prefix}as" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AS=$ac_cv_prog_AS if test -n "$AS"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AS" >&5 printf "%s\n" "$AS" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_AS"; then ac_ct_AS=$AS # Extract the first word of "as", so it can be a program name with args. set dummy as; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_AS+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_AS"; then ac_cv_prog_ac_ct_AS="$ac_ct_AS" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_AS="as" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_AS=$ac_cv_prog_ac_ct_AS if test -n "$ac_ct_AS"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AS" >&5 printf "%s\n" "$ac_ct_AS" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_AS" = x; then AS="false" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac AS=$ac_ct_AS fi else AS="$ac_cv_prog_AS" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. set dummy ${ac_tool_prefix}dlltool; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_DLLTOOL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$DLLTOOL"; then ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DLLTOOL=$ac_cv_prog_DLLTOOL if test -n "$DLLTOOL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 printf "%s\n" "$DLLTOOL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_DLLTOOL"; then ac_ct_DLLTOOL=$DLLTOOL # Extract the first word of "dlltool", so it can be a program name with args. set dummy dlltool; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_DLLTOOL+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_DLLTOOL"; then ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DLLTOOL="dlltool" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL if test -n "$ac_ct_DLLTOOL"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 printf "%s\n" "$ac_ct_DLLTOOL" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_DLLTOOL" = x; then DLLTOOL="false" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DLLTOOL=$ac_ct_DLLTOOL fi else DLLTOOL="$ac_cv_prog_DLLTOOL" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. set dummy ${ac_tool_prefix}objdump; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_OBJDUMP+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$OBJDUMP"; then ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OBJDUMP=$ac_cv_prog_OBJDUMP if test -n "$OBJDUMP"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 printf "%s\n" "$OBJDUMP" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test -z "$ac_cv_prog_OBJDUMP"; then ac_ct_OBJDUMP=$OBJDUMP # Extract the first word of "objdump", so it can be a program name with args. set dummy objdump; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_OBJDUMP+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$ac_ct_OBJDUMP"; then ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OBJDUMP="objdump" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP if test -n "$ac_ct_OBJDUMP"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 printf "%s\n" "$ac_ct_OBJDUMP" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$ac_ct_OBJDUMP" = x; then OBJDUMP="false" else case $cross_compiling:$ac_tool_warned in yes:) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OBJDUMP=$ac_ct_OBJDUMP fi else OBJDUMP="$ac_cv_prog_OBJDUMP" fi ;; esac test -z "$AS" && AS=as test -z "$DLLTOOL" && DLLTOOL=dlltool test -z "$OBJDUMP" && OBJDUMP=objdump enable_dlopen=no # Check whether --enable-shared was given. if test ${enable_shared+y} then : enableval=$enable_shared; p=${PACKAGE-default} case $enableval in yes) enable_shared=yes ;; no) enable_shared=no ;; *) enable_shared=no # Look at the argument we got. We use all the common list separators. lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, for pkg in $enableval; do IFS=$lt_save_ifs if test "X$pkg" = "X$p"; then enable_shared=yes fi done IFS=$lt_save_ifs ;; esac else $as_nop enable_shared=yes fi # Check whether --enable-static was given. if test ${enable_static+y} then : enableval=$enable_static; p=${PACKAGE-default} case $enableval in yes) enable_static=yes ;; no) enable_static=no ;; *) enable_static=no # Look at the argument we got. We use all the common list separators. lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, for pkg in $enableval; do IFS=$lt_save_ifs if test "X$pkg" = "X$p"; then enable_static=yes fi done IFS=$lt_save_ifs ;; esac else $as_nop enable_static=yes fi # Check whether --with-pic was given. if test ${with_pic+y} then : withval=$with_pic; lt_p=${PACKAGE-default} case $withval in yes|no) pic_mode=$withval ;; *) pic_mode=default # Look at the argument we got. We use all the common list separators. lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, for lt_pkg in $withval; do IFS=$lt_save_ifs if test "X$lt_pkg" = "X$lt_p"; then pic_mode=yes fi done IFS=$lt_save_ifs ;; esac else $as_nop pic_mode=default fi # Check whether --enable-fast-install was given. if test ${enable_fast_install+y} then : enableval=$enable_fast_install; p=${PACKAGE-default} case $enableval in yes) enable_fast_install=yes ;; no) enable_fast_install=no ;; *) enable_fast_install=no # Look at the argument we got. We use all the common list separators. lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, for pkg in $enableval; do IFS=$lt_save_ifs if test "X$pkg" = "X$p"; then enable_fast_install=yes fi done IFS=$lt_save_ifs ;; esac else $as_nop enable_fast_install=yes fi shared_archive_member_spec= case $host,$enable_shared in power*-*-aix[5-9]*,yes) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which variant of shared library versioning to provide" >&5 printf %s "checking which variant of shared library versioning to provide... " >&6; } # Check whether --with-aix-soname was given. if test ${with_aix_soname+y} then : withval=$with_aix_soname; case $withval in aix|svr4|both) ;; *) as_fn_error $? "Unknown argument to --with-aix-soname" "$LINENO" 5 ;; esac lt_cv_with_aix_soname=$with_aix_soname else $as_nop if test ${lt_cv_with_aix_soname+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_with_aix_soname=aix fi with_aix_soname=$lt_cv_with_aix_soname fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_aix_soname" >&5 printf "%s\n" "$with_aix_soname" >&6; } if test aix != "$with_aix_soname"; then # For the AIX way of multilib, we name the shared archive member # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, # the AIX toolchain works better with OBJECT_MODE set (default 32). if test 64 = "${OBJECT_MODE-32}"; then shared_archive_member_spec=shr_64 else shared_archive_member_spec=shr fi fi ;; *) with_aix_soname=aix ;; esac # This can be used to rebuild libtool when needed LIBTOOL_DEPS=$ltmain # Always use our own libtool. LIBTOOL='$(SHELL) $(top_builddir)/libtool' test -z "$LN_S" && LN_S="ln -s" if test -n "${ZSH_VERSION+set}"; then setopt NO_GLOB_SUBST fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 printf %s "checking for objdir... " >&6; } if test ${lt_cv_objdir+y} then : printf %s "(cached) " >&6 else $as_nop rm -f .libs 2>/dev/null mkdir .libs 2>/dev/null if test -d .libs; then lt_cv_objdir=.libs else # MS-DOS does not allow filenames that begin with a dot. lt_cv_objdir=_libs fi rmdir .libs 2>/dev/null fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 printf "%s\n" "$lt_cv_objdir" >&6; } objdir=$lt_cv_objdir printf "%s\n" "#define LT_OBJDIR \"$lt_cv_objdir/\"" >>confdefs.h case $host_os in aix3*) # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test set != "${COLLECT_NAMES+set}"; then COLLECT_NAMES= export COLLECT_NAMES fi ;; esac # Global variables: ofile=libtool can_build_shared=yes # All known linkers require a '.a' archive for static linking (except MSVC and # ICC, which need '.lib'). libext=a with_gnu_ld=$lt_cv_prog_gnu_ld old_CC=$CC old_CFLAGS=$CFLAGS # Set sane defaults for various variables test -z "$CC" && CC=cc test -z "$LTCC" && LTCC=$CC test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS test -z "$LD" && LD=ld test -z "$ac_objext" && ac_objext=o func_cc_basename $compiler cc_basename=$func_cc_basename_result # Only perform the check for file, if the check method requires it test -z "$MAGIC_CMD" && MAGIC_CMD=file case $deplibs_check_method in file_magic*) if test "$file_magic_cmd" = '$MAGIC_CMD'; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 printf %s "checking for ${ac_tool_prefix}file... " >&6; } if test ${lt_cv_path_MAGIC_CMD+y} then : printf %s "(cached) " >&6 else $as_nop case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD=$MAGIC_CMD lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" for ac_dir in $ac_dummy; do IFS=$lt_save_ifs test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/${ac_tool_prefix}file"; then lt_cv_path_MAGIC_CMD=$ac_dir/"${ac_tool_prefix}file" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD=$lt_cv_path_MAGIC_CMD if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <<_LT_EOF 1>&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org _LT_EOF fi ;; esac fi break fi done IFS=$lt_save_ifs MAGIC_CMD=$lt_save_MAGIC_CMD ;; esac fi MAGIC_CMD=$lt_cv_path_MAGIC_CMD if test -n "$MAGIC_CMD"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 printf "%s\n" "$MAGIC_CMD" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test -z "$lt_cv_path_MAGIC_CMD"; then if test -n "$ac_tool_prefix"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for file" >&5 printf %s "checking for file... " >&6; } if test ${lt_cv_path_MAGIC_CMD+y} then : printf %s "(cached) " >&6 else $as_nop case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD=$MAGIC_CMD lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" for ac_dir in $ac_dummy; do IFS=$lt_save_ifs test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/file"; then lt_cv_path_MAGIC_CMD=$ac_dir/"file" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD=$lt_cv_path_MAGIC_CMD if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <<_LT_EOF 1>&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org _LT_EOF fi ;; esac fi break fi done IFS=$lt_save_ifs MAGIC_CMD=$lt_save_MAGIC_CMD ;; esac fi MAGIC_CMD=$lt_cv_path_MAGIC_CMD if test -n "$MAGIC_CMD"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 printf "%s\n" "$MAGIC_CMD" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi else MAGIC_CMD=: fi fi fi ;; esac # Use C for the default configuration in the libtool script lt_save_CC=$CC ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Source file extension for C test sources. ac_ext=c # Object file extension for compiled C test sources. objext=o objext=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(){return(0);}' # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # Save the default compiler, since it gets overwritten when the other # tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. compiler_DEFAULT=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then lt_prog_compiler_no_builtin_flag= if test yes = "$GCC"; then case $cc_basename in nvcc*) lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; *) lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 printf %s "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } if test ${lt_cv_prog_compiler_rtti_exceptions+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_rtti_exceptions=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-fno-rtti -fno-exceptions" ## exclude from sc_useless_quotes_in_assignment # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_rtti_exceptions=yes fi fi $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 printf "%s\n" "$lt_cv_prog_compiler_rtti_exceptions" >&6; } if test yes = "$lt_cv_prog_compiler_rtti_exceptions"; then lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" else : fi fi lt_prog_compiler_wl= lt_prog_compiler_pic= lt_prog_compiler_static= if test yes = "$GCC"; then lt_prog_compiler_wl='-Wl,' lt_prog_compiler_static='-static' case $host_os in aix*) # All AIX code is PIC. if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor lt_prog_compiler_static='-Bstatic' fi lt_prog_compiler_pic='-fPIC' ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support lt_prog_compiler_pic='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the '-m68020' flag to GCC prevents building anything better, # like '-m68040'. lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic='-DDLL_EXPORT' case $host_os in os2*) lt_prog_compiler_static='$wl-static' ;; esac ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic='-fno-common' ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. lt_prog_compiler_static= ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) # +Z the default ;; *) lt_prog_compiler_pic='-fPIC' ;; esac ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. lt_prog_compiler_can_build_shared=no enable_shared=no ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic='-fPIC -shared' ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic=-Kconform_pic fi ;; *) lt_prog_compiler_pic='-fPIC' ;; esac case $cc_basename in nvcc*) # Cuda Compiler Driver 2.2 lt_prog_compiler_wl='-Xlinker ' if test -n "$lt_prog_compiler_pic"; then lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic" fi ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) lt_prog_compiler_wl='-Wl,' if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor lt_prog_compiler_static='-Bstatic' else lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' fi ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic='-fno-common' case $cc_basename in nagfor*) # NAG Fortran compiler lt_prog_compiler_wl='-Wl,-Wl,,' lt_prog_compiler_pic='-PIC' lt_prog_compiler_static='-Bstatic' ;; esac ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic='-DDLL_EXPORT' case $host_os in os2*) lt_prog_compiler_static='$wl-static' ;; esac ;; hpux9* | hpux10* | hpux11*) lt_prog_compiler_wl='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? lt_prog_compiler_static='$wl-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) lt_prog_compiler_wl='-Wl,' # PIC (with -KPIC) is the default. lt_prog_compiler_static='-non_shared' ;; linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in # old Intel for x86_64, which still supported -KPIC. ecc*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-static' ;; # flang / f18. f95 an alias for gfortran or flang on Debian flang* | f18* | f95*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fPIC' lt_prog_compiler_static='-static' ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fPIC' lt_prog_compiler_static='-static' ;; # Lahey Fortran 8.1. lf95*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='--shared' lt_prog_compiler_static='--static' ;; nagfor*) # NAG Fortran compiler lt_prog_compiler_wl='-Wl,-Wl,,' lt_prog_compiler_pic='-PIC' lt_prog_compiler_static='-Bstatic' ;; tcc*) # Fabrice Bellard et al's Tiny C Compiler lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fPIC' lt_prog_compiler_static='-static' ;; pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fpic' lt_prog_compiler_static='-Bstatic' ;; ccc*) lt_prog_compiler_wl='-Wl,' # All Alpha code is PIC. lt_prog_compiler_static='-non_shared' ;; xl* | bgxl* | bgf* | mpixl*) # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-qpic' lt_prog_compiler_static='-qstaticlink' ;; *) case `$CC -V 2>&1 | $SED 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='' ;; *Sun\ F* | *Sun*Fortran*) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='-Qoption ld ' ;; *Sun\ C*) # Sun C 5.9 lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='-Wl,' ;; *Intel*\ [CF]*Compiler*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fPIC' lt_prog_compiler_static='-static' ;; *Portland\ Group*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fpic' lt_prog_compiler_static='-Bstatic' ;; esac ;; esac ;; newsos6) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic='-fPIC -shared' ;; osf3* | osf4* | osf5*) lt_prog_compiler_wl='-Wl,' # All OSF/1 code is PIC. lt_prog_compiler_static='-non_shared' ;; rdos*) lt_prog_compiler_static='-non_shared' ;; solaris*) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' case $cc_basename in f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) lt_prog_compiler_wl='-Qoption ld ';; *) lt_prog_compiler_wl='-Wl,';; esac ;; sunos4*) lt_prog_compiler_wl='-Qoption ld ' lt_prog_compiler_pic='-PIC' lt_prog_compiler_static='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic='-Kconform_pic' lt_prog_compiler_static='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; unicos*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_can_build_shared=no ;; uts4*) lt_prog_compiler_pic='-pic' lt_prog_compiler_static='-Bstatic' ;; *) lt_prog_compiler_can_build_shared=no ;; esac fi case $host_os in # For platforms that do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic= ;; *) lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 printf %s "checking for $compiler option to produce PIC... " >&6; } if test ${lt_cv_prog_compiler_pic+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_pic=$lt_prog_compiler_pic fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic" >&6; } lt_prog_compiler_pic=$lt_cv_prog_compiler_pic # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } if test ${lt_cv_prog_compiler_pic_works+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_pic_works=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic -DPIC" ## exclude from sc_useless_quotes_in_assignment # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_pic_works=yes fi fi $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic_works" >&6; } if test yes = "$lt_cv_prog_compiler_pic_works"; then case $lt_prog_compiler_pic in "" | " "*) ;; *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; esac else lt_prog_compiler_pic= lt_prog_compiler_can_build_shared=no fi fi # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } if test ${lt_cv_prog_compiler_static_works+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_static_works=no save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_static_works=yes fi else lt_cv_prog_compiler_static_works=yes fi fi $RM -r conftest* LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 printf "%s\n" "$lt_cv_prog_compiler_static_works" >&6; } if test yes = "$lt_cv_prog_compiler_static_works"; then : else lt_prog_compiler_static= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_c_o=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_c_o=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } hard_links=nottested if test no = "$lt_cv_prog_compiler_c_o" && test no != "$need_locks"; then # do not overwrite the value of need_locks provided by the user { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 printf %s "checking if we can lock with hard links... " >&6; } hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 printf "%s\n" "$hard_links" >&6; } if test no = "$hard_links"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } runpath_var= allow_undefined_flag= always_export_symbols=no archive_cmds= archive_expsym_cmds= compiler_needs_object=no enable_shared_with_static_runtimes=no export_dynamic_flag_spec= export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' hardcode_automatic=no hardcode_direct=no hardcode_direct_absolute=no hardcode_libdir_flag_spec= hardcode_libdir_separator= hardcode_minus_L=no hardcode_shlibpath_var=unsupported inherit_rpath=no link_all_deplibs=unknown module_cmds= module_expsym_cmds= old_archive_from_new_cmds= old_archive_from_expsyms_cmds= thread_safe_flag_spec= whole_archive_flag_spec= # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list include_expsyms= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ' (' and ')$', so one must not match beginning or # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', # as well as any symbol that contains 'd'. exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. # Exclude shared library initialization/finalization symbols. extract_expsyms_cmds= case $host_os in cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. if test yes != "$GCC"; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) with_gnu_ld=yes ;; openbsd* | bitrig*) with_gnu_ld=no ;; linux* | k*bsd*-gnu | gnu*) link_all_deplibs=no ;; esac ld_shlibs=yes # On some targets, GNU ld is compatible enough with the native linker # that we're better off using the native interface for both. lt_use_gnu_ld_interface=no if test yes = "$with_gnu_ld"; then case $host_os in aix*) # The AIX port of GNU ld has always aspired to compatibility # with the native linker. However, as the warning in the GNU ld # block says, versions before 2.19.5* couldn't really create working # shared libraries, regardless of the interface used. case `$LD -v 2>&1` in *\ \(GNU\ Binutils\)\ 2.19.5*) ;; *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; *\ \(GNU\ Binutils\)\ [3-9]*) ;; *) lt_use_gnu_ld_interface=yes ;; esac ;; *) lt_use_gnu_ld_interface=yes ;; esac fi if test yes = "$lt_use_gnu_ld_interface"; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='$wl' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' export_dynamic_flag_spec='$wl--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then whole_archive_flag_spec=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' else whole_archive_flag_spec= fi supports_anon_versioning=no case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix[3-9]*) # On AIX/PPC, the GNU linker is very broken if test ia64 != "$host_cpu"; then ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: the GNU linker, at least up to release 2.19, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to install binutils *** 2.20 or above, or modify your PATH so that a non-GNU linker is found. *** You will then need to restart the configuration process. _LT_EOF fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds='' ;; m68k) archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes ;; esac ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then allow_undefined_flag=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' else ld_shlibs=no fi ;; cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec='-L$libdir' export_dynamic_flag_spec='$wl--export-all-symbols' allow_undefined_flag=unsupported always_export_symbols=no enable_shared_with_static_runtimes=yes export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file, use it as # is; otherwise, prepend EXPORTS... archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs=no fi ;; haiku*) archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' link_all_deplibs=yes ;; os2*) hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes allow_undefined_flag=unsupported shrext_cmds=.dll archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes=yes file_list_spec='@' ;; interix[3-9]*) hardcode_direct=no hardcode_shlibpath_var=no hardcode_libdir_flag_spec='$wl-rpath,$libdir' export_dynamic_flag_spec='$wl-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) tmp_diet=no if test linux-dietlibc = "$host_os"; then case $cc_basename in diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) esac fi if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ && test no = "$tmp_diet" then tmp_addflag=' $pic_flag' tmp_sharedflag='-shared' case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group f77 and f90 compilers whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; lf95*) # Lahey Fortran 8.1 whole_archive_flag_spec= tmp_sharedflag='--shared' ;; nagfor*) # NAGFOR 5.3 tmp_sharedflag='-Wl,-shared' ;; xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) tmp_sharedflag='-qmkshrobj' tmp_addflag= ;; nvcc*) # Cuda Compiler Driver 2.2 whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' compiler_needs_object=yes ;; esac case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C 5.9 whole_archive_flag_spec='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' compiler_needs_object=yes tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; esac archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' if test yes = "$supports_anon_versioning"; then archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' fi case $cc_basename in tcc*) hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' export_dynamic_flag_spec='-rdynamic' ;; xlf* | bgf* | bgxlf* | mpixlf*) # IBM XL Fortran 10.1 on PPC cannot create shared libs itself whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test yes = "$supports_anon_versioning"; then archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi ;; esac else ld_shlibs=no fi ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; esac ;; sunos4*) archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= hardcode_direct=yes hardcode_shlibpath_var=no ;; *) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; esac if test no = "$ld_shlibs"; then runpath_var= hardcode_libdir_flag_spec= export_dynamic_flag_spec= whole_archive_flag_spec= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) allow_undefined_flag=unsupported always_export_symbols=yes archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. hardcode_minus_L=yes if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. hardcode_direct=unsupported fi ;; aix[4-9]*) if test ia64 = "$host_cpu"; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag= else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to GNU nm, but means don't demangle to AIX nm. # Without the "-l" option, or with the "-B" option, AIX nm treats # weak defined symbols like other global defined symbols, whereas # GNU nm marks them as "W". # While the 'weak' keyword is ignored in the Export File, we need # it in the Import File for the 'aix-soname' feature, so we have # to replace the "-B" option with "-P" for AIX nm. if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' else export_symbols_cmds='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # have runtime linking enabled, and use it for executables. # For shared libraries, we enable/disable runtime linking # depending on the kind of the shared library created - # when "with_aix_soname,aix_use_runtimelinking" is: # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables # "aix,yes" lib.so shared, rtl:yes, for executables # lib.a static archive # "both,no" lib.so.V(shr.o) shared, rtl:yes # lib.a(lib.so.V) shared, rtl:no, for executables # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a(lib.so.V) shared, rtl:no # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a static archive case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) for ld_flag in $LDFLAGS; do if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then aix_use_runtimelinking=yes break fi done if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then # With aix-soname=svr4, we create the lib.so.V shared archives only, # so we don't have lib.a shared libs to link our executables. # We have to force runtime linking in this case. aix_use_runtimelinking=yes LDFLAGS="$LDFLAGS -Wl,-brtl" fi ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds='' hardcode_direct=yes hardcode_direct_absolute=yes hardcode_libdir_separator=':' link_all_deplibs=yes file_list_spec='$wl-f,' case $with_aix_soname,$aix_use_runtimelinking in aix,*) ;; # traditional, no import file svr4,* | *,yes) # use import file # The Import File defines what to hardcode. hardcode_direct=no hardcode_direct_absolute=no ;; esac if test yes = "$GCC"; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`$CC -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L=yes hardcode_libdir_flag_spec='-L$libdir' hardcode_libdir_separator= fi ;; esac shared_flag='-shared' if test yes = "$aix_use_runtimelinking"; then shared_flag="$shared_flag "'$wl-G' fi # Need to ensure runtime linking is disabled for the traditional # shared library, or the linker may eventually find shared libraries # /with/ Import File - we do not want to mix them. shared_flag_aix='-shared' shared_flag_svr4='-shared $wl-G' else # not using gcc if test ia64 = "$host_cpu"; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test yes = "$aix_use_runtimelinking"; then shared_flag='$wl-G' else shared_flag='$wl-bM:SRE' fi shared_flag_aix='$wl-bM:SRE' shared_flag_svr4='$wl-G' fi fi export_dynamic_flag_spec='$wl-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. always_export_symbols=yes if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. allow_undefined_flag='-berok' # Determine the default libpath from the value encoded in an # empty executable. if test set = "${lt_cv_aix_libpath+set}"; then aix_libpath=$lt_cv_aix_libpath else if test ${lt_cv_aix_libpath_+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=/usr/lib:/lib fi fi aix_libpath=$lt_cv_aix_libpath_ fi hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag else if test ia64 = "$host_cpu"; then hardcode_libdir_flag_spec='$wl-R $libdir:/usr/lib:/lib' allow_undefined_flag="-z nodefs" archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. if test set = "${lt_cv_aix_libpath+set}"; then aix_libpath=$lt_cv_aix_libpath else if test ${lt_cv_aix_libpath_+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=/usr/lib:/lib fi fi aix_libpath=$lt_cv_aix_libpath_ fi hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag=' $wl-bernotok' allow_undefined_flag=' $wl-berok' if test yes = "$with_gnu_ld"; then # We only use this code for GNU lds that support --whole-archive. whole_archive_flag_spec='$wl--whole-archive$convenience $wl--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec='$convenience' fi archive_cmds_need_lc=yes archive_expsym_cmds='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' # -brtl affects multiple linker settings, -berok does not and is overridden later compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' if test svr4 != "$with_aix_soname"; then # This is similar to how AIX traditionally builds its shared libraries. archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' fi if test aix != "$with_aix_soname"; then archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' else # used by -dlpreopen to get the symbols archive_expsym_cmds="$archive_expsym_cmds"'~$MV $output_objdir/$realname.d/$soname $output_objdir' fi archive_expsym_cmds="$archive_expsym_cmds"'~$RM -r $output_objdir/$realname.d' fi fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds='' ;; m68k) archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes ;; esac ;; bsdi[45]*) export_dynamic_flag_spec=-rdynamic ;; cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in cl* | icl*) # Native MSVC or ICC hardcode_libdir_flag_spec=' ' allow_undefined_flag=unsupported always_export_symbols=yes file_list_spec='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then cp "$export_symbols" "$output_objdir/$soname.def"; echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; else $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, )='true' enable_shared_with_static_runtimes=yes exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' # Don't use ranlib old_postinstall_cmds='chmod 644 $oldlib' postlink_cmds='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile=$lt_outputfile.exe lt_tool_outputfile=$lt_tool_outputfile.exe ;; esac~ if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # Assume MSVC and ICC wrapper hardcode_libdir_flag_spec=' ' allow_undefined_flag=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. old_archive_from_new_cmds='true' # FIXME: Should let the user specify the lib program. old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' enable_shared_with_static_runtimes=yes ;; esac ;; darwin* | rhapsody*) archive_cmds_need_lc=no hardcode_direct=no hardcode_automatic=yes hardcode_shlibpath_var=unsupported if test yes = "$lt_cv_ld_force_load"; then whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' else whole_archive_flag_spec='' fi link_all_deplibs=yes allow_undefined_flag=$_lt_dar_allow_undefined case $cc_basename in ifort*|nagfor*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test yes = "$_lt_dar_can_shared"; then output_verbose_link_cmd=func_echo_all archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" archive_expsym_cmds="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" module_expsym_cmds="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" else ld_shlibs=no fi ;; dgux*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-L$libdir' hardcode_shlibpath_var=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2.*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes hardcode_minus_L=yes hardcode_shlibpath_var=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly* | midnightbsd*) archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; hpux9*) if test yes = "$GCC"; then archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' else archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' fi hardcode_libdir_flag_spec='$wl+b $wl$libdir' hardcode_libdir_separator=: hardcode_direct=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes export_dynamic_flag_spec='$wl-E' ;; hpux10*) if test yes,no = "$GCC,$with_gnu_ld"; then archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test no = "$with_gnu_ld"; then hardcode_libdir_flag_spec='$wl+b $wl$libdir' hardcode_libdir_separator=: hardcode_direct=yes hardcode_direct_absolute=yes export_dynamic_flag_spec='$wl-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes fi ;; hpux11*) if test yes,no = "$GCC,$with_gnu_ld"; then case $host_cpu in hppa*64*) archive_cmds='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) archive_cmds='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) # Older versions of the 11.00 compiler do not understand -b yet # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 printf %s "checking if $CC understands -b... " >&6; } if test ${lt_cv_prog_compiler__b+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler__b=no save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS -b" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler__b=yes fi else lt_cv_prog_compiler__b=yes fi fi $RM -r conftest* LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 printf "%s\n" "$lt_cv_prog_compiler__b" >&6; } if test yes = "$lt_cv_prog_compiler__b"; then archive_cmds='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi ;; esac fi if test no = "$with_gnu_ld"; then hardcode_libdir_flag_spec='$wl+b $wl$libdir' hardcode_libdir_separator=: case $host_cpu in hppa*64*|ia64*) hardcode_direct=no hardcode_shlibpath_var=no ;; *) hardcode_direct=yes hardcode_direct_absolute=yes export_dynamic_flag_spec='$wl-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test yes = "$GCC"; then archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' # Try to use the -exported_symbol ld option, if it does not # work, assume that -exports_file does not work either and # implicitly export all symbols. # This should be the same for all languages, so no per-tag cache variable. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 printf %s "checking whether the $host_os linker accepts -exported_symbol... " >&6; } if test ${lt_cv_irix_exported_symbol+y} then : printf %s "(cached) " >&6 else $as_nop save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo (void) { return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : lt_cv_irix_exported_symbol=yes else $as_nop lt_cv_irix_exported_symbol=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } if test yes = "$lt_cv_irix_exported_symbol"; then archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' fi link_all_deplibs=no else archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' fi archive_cmds_need_lc='no' hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' hardcode_libdir_separator=: inherit_rpath=yes link_all_deplibs=yes ;; linux*) case $cc_basename in tcc*) # Fabrice Bellard et al's Tiny C Compiler ld_shlibs=yes archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' ;; esac ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; newsos6) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' hardcode_libdir_separator=: hardcode_shlibpath_var=no ;; *nto* | *qnx*) ;; openbsd* | bitrig*) if test -f /usr/libexec/ld.so; then hardcode_direct=yes hardcode_shlibpath_var=no hardcode_direct_absolute=yes if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' hardcode_libdir_flag_spec='$wl-rpath,$libdir' export_dynamic_flag_spec='$wl-E' else archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='$wl-rpath,$libdir' fi else ld_shlibs=no fi ;; os2*) hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes allow_undefined_flag=unsupported shrext_cmds=.dll archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes=yes file_list_spec='@' ;; osf3*) if test yes = "$GCC"; then allow_undefined_flag=' $wl-expect_unresolved $wl\*' archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' else allow_undefined_flag=' -expect_unresolved \*' archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' fi archive_cmds_need_lc='no' hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' hardcode_libdir_separator=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test yes = "$GCC"; then allow_undefined_flag=' $wl-expect_unresolved $wl\*' archive_cmds='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' else allow_undefined_flag=' -expect_unresolved \*' archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' # Both c and cxx compiler support -rpath directly hardcode_libdir_flag_spec='-rpath $libdir' fi archive_cmds_need_lc='no' hardcode_libdir_separator=: ;; solaris*) no_undefined_flag=' -z defs' if test yes = "$GCC"; then wlarc='$wl' archive_cmds='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' else case `$CC -V 2>&1` in *"Compilers 5.0"*) wlarc='' archive_cmds='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' ;; *) wlarc='$wl' archive_cmds='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' ;; esac fi hardcode_libdir_flag_spec='-R$libdir' hardcode_shlibpath_var=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands '-z linker_flag'. GCC discards it without '$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test yes = "$GCC"; then whole_archive_flag_spec='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' else whole_archive_flag_spec='-z allextract$convenience -z defaultextract' fi ;; esac link_all_deplibs=yes ;; sunos4*) if test sequent = "$host_vendor"; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. archive_cmds='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi hardcode_libdir_flag_spec='-L$libdir' hardcode_direct=yes hardcode_minus_L=yes hardcode_shlibpath_var=no ;; sysv4) case $host_vendor in sni) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' reload_cmds='$CC -r -o $output$reload_objs' hardcode_direct=no ;; motorola) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' hardcode_shlibpath_var=no ;; sysv4.3*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var=no export_dynamic_flag_spec='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes ld_shlibs=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag='$wl-z,text' archive_cmds_need_lc=no hardcode_shlibpath_var=no runpath_var='LD_RUN_PATH' if test yes = "$GCC"; then archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We CANNOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. no_undefined_flag='$wl-z,text' allow_undefined_flag='$wl-z,nodefs' archive_cmds_need_lc=no hardcode_shlibpath_var=no hardcode_libdir_flag_spec='$wl-R,$libdir' hardcode_libdir_separator=':' link_all_deplibs=yes export_dynamic_flag_spec='$wl-Bexport' runpath_var='LD_RUN_PATH' if test yes = "$GCC"; then archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-L$libdir' hardcode_shlibpath_var=no ;; *) ld_shlibs=no ;; esac if test sni = "$host_vendor"; then case $host in sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) export_dynamic_flag_spec='$wl-Blargedynsym' ;; esac fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 printf "%s\n" "$ld_shlibs" >&6; } test no = "$ld_shlibs" && can_build_shared=no with_gnu_ld=$with_gnu_ld # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc" in x|xyes) # Assume -lc should be added archive_cmds_need_lc=yes if test yes,yes = "$GCC,$enable_shared"; then case $archive_cmds in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 printf %s "checking whether -lc should be explicitly linked in... " >&6; } if test ${lt_cv_archive_cmds_need_lc+y} then : printf %s "(cached) " >&6 else $as_nop $RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl pic_flag=$lt_prog_compiler_pic compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag allow_undefined_flag= if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then lt_cv_archive_cmds_need_lc=no else lt_cv_archive_cmds_need_lc=yes fi allow_undefined_flag=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 printf "%s\n" "$lt_cv_archive_cmds_need_lc" >&6; } archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc ;; esac fi ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 printf %s "checking dynamic linker characteristics... " >&6; } if test yes = "$GCC"; then case $host_os in darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; *) lt_awk_arg='/^libraries:/' ;; esac case $host_os in mingw* | cegcc*) lt_sed_strip_eq='s|=\([A-Za-z]:\)|\1|g' ;; *) lt_sed_strip_eq='s|=/|/|g' ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` case $lt_search_path_spec in *\;*) # if the path contains ";" then we assume it to be the separator # otherwise default to the standard path separator (i.e. ":") - it is # assumed that no part of a normal pathname contains ";" but that should # okay in the real world where ";" in dirpaths is itself problematic. lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` ;; *) lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` ;; esac # Ok, now we have the path, separated by spaces, we can step through it # and add multilib dir if necessary... lt_tmp_lt_search_path_spec= lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` # ...but if some path component already ends with the multilib dir we assume # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). case "$lt_multi_os_dir; $lt_search_path_spec " in "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) lt_multi_os_dir= ;; esac for lt_sys_path in $lt_search_path_spec; do if test -d "$lt_sys_path$lt_multi_os_dir"; then lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" elif test -n "$lt_multi_os_dir"; then test -d "$lt_sys_path" && \ lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" fi done lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' BEGIN {RS = " "; FS = "/|\n";} { lt_foo = ""; lt_count = 0; for (lt_i = NF; lt_i > 0; lt_i--) { if ($lt_i != "" && $lt_i != ".") { if ($lt_i == "..") { lt_count++; } else { if (lt_count == 0) { lt_foo = "/" $lt_i lt_foo; } else { lt_count--; } } } } if (lt_foo != "") { lt_freq[lt_foo]++; } if (lt_freq[lt_foo] == 1) { print lt_foo; } }'` # AWK program above erroneously prepends '/' to C:/dos/paths # for these hosts. case $host_os in mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ $SED 's|/\([A-Za-z]:\)|\1|g'` ;; esac sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` else sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" fi library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=.so postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='$libname$release$shared_ext$major' ;; aix[4-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test ia64 = "$host_cpu"; then # AIX 5 supports IA64 library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line '#! .'. This would cause the generated library to # depend on '.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # Using Import Files as archive members, it is possible to support # filename-based versioning of shared library archives on AIX. While # this would work for both with and without runtime linking, it will # prevent static linking of such archives. So we do filename-based # shared library versioning with .so extension only, which is used # when both runtime linking and shared linking is enabled. # Unfortunately, runtime linking may impact performance, so we do # not want this to be the default eventually. Also, we use the # versioned .so libs for executables only if there is the -brtl # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. # To allow for filename-based versioning support, we need to create # libNAME.so.V as an archive file, containing: # *) an Import File, referring to the versioned filename of the # archive as well as the shared archive member, telling the # bitwidth (32 or 64) of that shared object, and providing the # list of exported symbols of that shared object, eventually # decorated with the 'weak' keyword # *) the shared object with the F_LOADONLY flag set, to really avoid # it being seen by the linker. # At run time we better use the real file rather than another symlink, # but for link time we create the symlink libNAME.so -> libNAME.so.V case $with_aix_soname,$aix_use_runtimelinking in # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. aix,yes) # traditional libtool dynamic_linker='AIX unversionable lib.so' # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; aix,no) # traditional AIX only dynamic_linker='AIX lib.a(lib.so.V)' # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' ;; svr4,*) # full svr4 only dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,yes) # both, prefer svr4 dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # unpreferred sharedlib libNAME.a needs extra handling postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,no) # both, prefer aix dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' ;; esac shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='$libname$shared_ext' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl* | *,icl*) # Native MSVC or ICC libname_spec='$name' soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' library_names_spec='$libname.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec=$LIB if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC and ICC wrapper library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' soname_spec='$libname$release$major$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly* | midnightbsd*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[23].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=no sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' if test 32 = "$HPUX_IA64_MODE"; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" sys_lib_dlsearch_path_spec=/usr/lib/hpux32 else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" sys_lib_dlsearch_path_spec=/usr/lib/hpux64 fi ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[3-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test yes = "$lt_cv_prog_gnu_ld"; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; linux*android*) version_type=none # Android doesn't support versioned libraries. need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext' soname_spec='$libname$release$shared_ext' finish_cmds= shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes dynamic_linker='Android linker' # Don't embed -rpath directories since the linker doesn't support them. hardcode_libdir_flag_spec='-L$libdir' ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH if test ${lt_cv_shlibpath_overrides_runpath+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null then : lt_cv_shlibpath_overrides_runpath=yes fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS libdir=$save_libdir fi shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Ideally, we could use ldconfig to report *all* directores which are # searched for libraries, however this is still not possible. Aside from not # being certain /sbin/ldconfig is available, command # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, # even though it is searched at run-time. Try to do the best guess by # appending ld.so.conf contents (and includes) to the search path. if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsdelf*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='NetBSD ld.elf_so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd* | bitrig*) version_type=sunos sys_lib_dlsearch_path_spec=/usr/lib need_lib_prefix=no if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then need_version=no else need_version=yes fi library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; os2*) libname_spec='$name' version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no # OS/2 can only load a DLL with a base name of 8 characters or less. soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; v=$($ECHO $release$versuffix | tr -d .-); n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); $ECHO $n$v`$shared_ext' library_names_spec='${libname}_dll.$libext' dynamic_linker='OS/2 ld.exe' shlibpath_var=BEGINLIBPATH sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test yes = "$with_gnu_ld"; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec; then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' soname_spec='$libname$shared_ext.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=sco need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test yes = "$with_gnu_ld"; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 printf "%s\n" "$dynamic_linker" >&6; } test no = "$dynamic_linker" && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test yes = "$GCC"; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec fi if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec fi # remember unaugmented sys_lib_dlsearch_path content for libtool script decls... configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec # ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" # to be used as default LT_SYS_LIBRARY_PATH value in generated libtool configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 printf %s "checking how to hardcode library paths into programs... " >&6; } hardcode_action= if test -n "$hardcode_libdir_flag_spec" || test -n "$runpath_var" || test yes = "$hardcode_automatic"; then # We can hardcode non-existent directories. if test no != "$hardcode_direct" && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, )" && test no != "$hardcode_minus_L"; then # Linking always hardcodes the temporary library directory. hardcode_action=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action=unsupported fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 printf "%s\n" "$hardcode_action" >&6; } if test relink = "$hardcode_action" || test yes = "$inherit_rpath"; then # Fast installation is not supported enable_fast_install=no elif test yes = "$shlibpath_overrides_runpath" || test no = "$enable_shared"; then # Fast installation is not necessary enable_fast_install=needless fi if test yes != "$enable_dlopen"; then enable_dlopen=unknown enable_dlopen_self=unknown enable_dlopen_self_static=unknown else lt_cv_dlopen=no lt_cv_dlopen_libs= case $host_os in beos*) lt_cv_dlopen=load_add_on lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ;; mingw* | pw32* | cegcc*) lt_cv_dlopen=LoadLibrary lt_cv_dlopen_libs= ;; cygwin*) lt_cv_dlopen=dlopen lt_cv_dlopen_libs= ;; darwin*) # if libdl is installed we need to link against it { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 printf %s "checking for dlopen in -ldl... " >&6; } if test ${ac_cv_lib_dl_dlopen+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char dlopen (); int main (void) { return dlopen (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dl_dlopen=yes else $as_nop ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } if test "x$ac_cv_lib_dl_dlopen" = xyes then : lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl else $as_nop lt_cv_dlopen=dyld lt_cv_dlopen_libs= lt_cv_dlopen_self=yes fi ;; tpf*) # Don't try to run any link tests for TPF. We know it's impossible # because TPF is a cross-compiler, and we know how we open DSOs. lt_cv_dlopen=dlopen lt_cv_dlopen_libs= lt_cv_dlopen_self=no ;; *) ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" if test "x$ac_cv_func_shl_load" = xyes then : lt_cv_dlopen=shl_load else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 printf %s "checking for shl_load in -ldld... " >&6; } if test ${ac_cv_lib_dld_shl_load+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char shl_load (); int main (void) { return shl_load (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dld_shl_load=yes else $as_nop ac_cv_lib_dld_shl_load=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 printf "%s\n" "$ac_cv_lib_dld_shl_load" >&6; } if test "x$ac_cv_lib_dld_shl_load" = xyes then : lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld else $as_nop ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" if test "x$ac_cv_func_dlopen" = xyes then : lt_cv_dlopen=dlopen else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 printf %s "checking for dlopen in -ldl... " >&6; } if test ${ac_cv_lib_dl_dlopen+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char dlopen (); int main (void) { return dlopen (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dl_dlopen=yes else $as_nop ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } if test "x$ac_cv_lib_dl_dlopen" = xyes then : lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 printf %s "checking for dlopen in -lsvld... " >&6; } if test ${ac_cv_lib_svld_dlopen+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lsvld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char dlopen (); int main (void) { return dlopen (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_svld_dlopen=yes else $as_nop ac_cv_lib_svld_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 printf "%s\n" "$ac_cv_lib_svld_dlopen" >&6; } if test "x$ac_cv_lib_svld_dlopen" = xyes then : lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 printf %s "checking for dld_link in -ldld... " >&6; } if test ${ac_cv_lib_dld_dld_link+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char dld_link (); int main (void) { return dld_link (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dld_dld_link=yes else $as_nop ac_cv_lib_dld_dld_link=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 printf "%s\n" "$ac_cv_lib_dld_dld_link" >&6; } if test "x$ac_cv_lib_dld_dld_link" = xyes then : lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld fi fi fi fi fi fi ;; esac if test no = "$lt_cv_dlopen"; then enable_dlopen=no else enable_dlopen=yes fi case $lt_cv_dlopen in dlopen) save_CPPFLAGS=$CPPFLAGS test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" save_LDFLAGS=$LDFLAGS wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" save_LIBS=$LIBS LIBS="$lt_cv_dlopen_libs $LIBS" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 printf %s "checking whether a program can dlopen itself... " >&6; } if test ${lt_cv_dlopen_self+y} then : printf %s "(cached) " >&6 else $as_nop if test yes = "$cross_compiling"; then : lt_cv_dlopen_self=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF #line $LINENO "configure" #include "confdefs.h" #if HAVE_DLFCN_H #include #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif /* When -fvisibility=hidden is used, assume the code has been annotated correspondingly for the symbols needed. */ #if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) int fnord () __attribute__((visibility("default"))); #endif int fnord () { return 42; } int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else { if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; else puts (dlerror ()); } /* dlclose (self); */ } else puts (dlerror ()); return status; } _LT_EOF if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 (eval $ac_link) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then (./conftest; exit; ) >&5 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; esac else : # compilation failed lt_cv_dlopen_self=no fi fi rm -fr conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 printf "%s\n" "$lt_cv_dlopen_self" >&6; } if test yes = "$lt_cv_dlopen_self"; then wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 printf %s "checking whether a statically linked program can dlopen itself... " >&6; } if test ${lt_cv_dlopen_self_static+y} then : printf %s "(cached) " >&6 else $as_nop if test yes = "$cross_compiling"; then : lt_cv_dlopen_self_static=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF #line $LINENO "configure" #include "confdefs.h" #if HAVE_DLFCN_H #include #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif /* When -fvisibility=hidden is used, assume the code has been annotated correspondingly for the symbols needed. */ #if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) int fnord () __attribute__((visibility("default"))); #endif int fnord () { return 42; } int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else { if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; else puts (dlerror ()); } /* dlclose (self); */ } else puts (dlerror ()); return status; } _LT_EOF if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 (eval $ac_link) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then (./conftest; exit; ) >&5 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; esac else : # compilation failed lt_cv_dlopen_self_static=no fi fi rm -fr conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 printf "%s\n" "$lt_cv_dlopen_self_static" >&6; } fi CPPFLAGS=$save_CPPFLAGS LDFLAGS=$save_LDFLAGS LIBS=$save_LIBS ;; esac case $lt_cv_dlopen_self in yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; *) enable_dlopen_self=unknown ;; esac case $lt_cv_dlopen_self_static in yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; *) enable_dlopen_self_static=unknown ;; esac fi striplib= old_striplib= { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 printf %s "checking whether stripping libraries is possible... " >&6; } if test -z "$STRIP"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } else if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then old_striplib="$STRIP --strip-debug" striplib="$STRIP --strip-unneeded" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else case $host_os in darwin*) # FIXME - insert some real tests, host_os isn't really good enough striplib="$STRIP -x" old_striplib="$STRIP -S" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } ;; freebsd*) if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then old_striplib="$STRIP --strip-debug" striplib="$STRIP --strip-unneeded" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi ;; *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } ;; esac fi fi # Report what library types will actually be built { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 printf %s "checking if libtool supports shared libraries... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 printf "%s\n" "$can_build_shared" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 printf %s "checking whether to build shared libraries... " >&6; } test no = "$can_build_shared" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test yes = "$enable_shared" && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[4-9]*) if test ia64 != "$host_cpu"; then case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in yes,aix,yes) ;; # shared object as lib.so file only yes,svr4,*) ;; # shared object as lib.so archive member only yes,*) enable_static=no ;; # shared object in lib.a archive as well esac fi ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 printf "%s\n" "$enable_shared" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 printf %s "checking whether to build static libraries... " >&6; } # Make sure either enable_shared or enable_static is yes. test yes = "$enable_shared" || enable_static=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 printf "%s\n" "$enable_static" >&6; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu CC=$lt_save_CC if test -n "$CXX" && ( test no != "$CXX" && ( (test g++ = "$CXX" && `g++ -v >/dev/null 2>&1` ) || (test g++ != "$CXX"))); then ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5 printf %s "checking how to run the C++ preprocessor... " >&6; } if test -z "$CXXCPP"; then if test ${ac_cv_prog_CXXCPP+y} then : printf %s "(cached) " >&6 else $as_nop # Double quotes because $CXX needs to be expanded for CXXCPP in "$CXX -E" cpp /lib/cpp do ac_preproc_ok=false for ac_cxx_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include Syntax error _ACEOF if ac_fn_cxx_try_cpp "$LINENO" then : else $as_nop # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_cxx_try_cpp "$LINENO" then : # Broken: success on invalid input. continue else $as_nop # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok then : break fi done ac_cv_prog_CXXCPP=$CXXCPP fi CXXCPP=$ac_cv_prog_CXXCPP else ac_cv_prog_CXXCPP=$CXXCPP fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5 printf "%s\n" "$CXXCPP" >&6; } ac_preproc_ok=false for ac_cxx_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include Syntax error _ACEOF if ac_fn_cxx_try_cpp "$LINENO" then : else $as_nop # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_cxx_try_cpp "$LINENO" then : # Broken: success on invalid input. continue else $as_nop # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok then : else $as_nop { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu else _lt_caught_CXX_error=yes fi ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu archive_cmds_need_lc_CXX=no allow_undefined_flag_CXX= always_export_symbols_CXX=no archive_expsym_cmds_CXX= compiler_needs_object_CXX=no export_dynamic_flag_spec_CXX= hardcode_direct_CXX=no hardcode_direct_absolute_CXX=no hardcode_libdir_flag_spec_CXX= hardcode_libdir_separator_CXX= hardcode_minus_L_CXX=no hardcode_shlibpath_var_CXX=unsupported hardcode_automatic_CXX=no inherit_rpath_CXX=no module_cmds_CXX= module_expsym_cmds_CXX= link_all_deplibs_CXX=unknown old_archive_cmds_CXX=$old_archive_cmds reload_flag_CXX=$reload_flag reload_cmds_CXX=$reload_cmds no_undefined_flag_CXX= whole_archive_flag_spec_CXX= enable_shared_with_static_runtimes_CXX=no # Source file extension for C++ test sources. ac_ext=cpp # Object file extension for compiled C++ test sources. objext=o objext_CXX=$objext # No sense in running all these tests if we already determined that # the CXX compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test yes != "$_lt_caught_CXX_error"; then # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(int, char *[]) { return(0); }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_LD=$LD lt_save_GCC=$GCC GCC=$GXX lt_save_with_gnu_ld=$with_gnu_ld lt_save_path_LD=$lt_cv_path_LD if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx else $as_unset lt_cv_prog_gnu_ld fi if test -n "${lt_cv_path_LDCXX+set}"; then lt_cv_path_LD=$lt_cv_path_LDCXX else $as_unset lt_cv_path_LD fi test -z "${LDCXX+set}" || LD=$LDCXX CC=${CXX-"c++"} CFLAGS=$CXXFLAGS compiler=$CC compiler_CXX=$CC func_cc_basename $compiler cc_basename=$func_cc_basename_result if test -n "$compiler"; then # We don't want -fno-exception when compiling C++ code, so set the # no_builtin_flag separately if test yes = "$GXX"; then lt_prog_compiler_no_builtin_flag_CXX=' -fno-builtin' else lt_prog_compiler_no_builtin_flag_CXX= fi if test yes = "$GXX"; then # Set up default GNU C++ configuration # Check whether --with-gnu-ld was given. if test ${with_gnu_ld+y} then : withval=$with_gnu_ld; test no = "$withval" || with_gnu_ld=yes else $as_nop with_gnu_ld=no fi ac_prog=ld if test yes = "$GCC"; then # Check if gcc -print-prog-name=ld gives a path. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 printf %s "checking for ld used by $CC... " >&6; } case $host in *-*-mingw*) # gcc leaves a trailing carriage return, which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [\\/]* | ?:[\\/]*) re_direlt='/[^/][^/]*/\.\./' # Canonicalize the pathname of ld ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD=$ac_prog ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test yes = "$with_gnu_ld"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 printf %s "checking for GNU ld... " >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 printf %s "checking for non-GNU ld... " >&6; } fi if test ${lt_cv_path_LD+y} then : printf %s "(cached) " >&6 else $as_nop if test -z "$LD"; then lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS=$lt_save_ifs test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD=$ac_dir/$ac_prog # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &5 printf "%s\n" "$LD" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 printf %s "checking if the linker ($LD) is GNU ld... " >&6; } if test ${lt_cv_prog_gnu_ld+y} then : printf %s "(cached) " >&6 else $as_nop # I'd rather use --version here, but apparently some GNU lds only accept -v. case `$LD -v 2>&1 &5 printf "%s\n" "$lt_cv_prog_gnu_ld" >&6; } with_gnu_ld=$lt_cv_prog_gnu_ld # Check if GNU C++ uses GNU ld as the underlying linker, since the # archiving commands below assume that GNU ld is being used. if test yes = "$with_gnu_ld"; then archive_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' export_dynamic_flag_spec_CXX='$wl--export-dynamic' # If archive_cmds runs LD, not CC, wlarc should be empty # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to # investigate it a little bit more. (MM) wlarc='$wl' # ancient GNU ld didn't support --whole-archive et. al. if eval "`$CC -print-prog-name=ld` --help 2>&1" | $GREP 'no-whole-archive' > /dev/null; then whole_archive_flag_spec_CXX=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' else whole_archive_flag_spec_CXX= fi else with_gnu_ld=no wlarc= # A generic and very simple default shared library creation # command for GNU C++ for the case where it uses the native # linker, instead of GNU ld. If possible, this setting should # overridden to take advantage of the native linker features on # the platform it is being used on. archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' fi # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else GXX=no with_gnu_ld=no wlarc= fi # PORTME: fill in a description of your system's C++ link characteristics { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } ld_shlibs_CXX=yes case $host_os in aix3*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; aix[4-9]*) if test ia64 = "$host_cpu"; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag= else aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # have runtime linking enabled, and use it for executables. # For shared libraries, we enable/disable runtime linking # depending on the kind of the shared library created - # when "with_aix_soname,aix_use_runtimelinking" is: # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables # "aix,yes" lib.so shared, rtl:yes, for executables # lib.a static archive # "both,no" lib.so.V(shr.o) shared, rtl:yes # lib.a(lib.so.V) shared, rtl:no, for executables # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a(lib.so.V) shared, rtl:no # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a static archive case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) for ld_flag in $LDFLAGS; do case $ld_flag in *-brtl*) aix_use_runtimelinking=yes break ;; esac done if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then # With aix-soname=svr4, we create the lib.so.V shared archives only, # so we don't have lib.a shared libs to link our executables. # We have to force runtime linking in this case. aix_use_runtimelinking=yes LDFLAGS="$LDFLAGS -Wl,-brtl" fi ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds_CXX='' hardcode_direct_CXX=yes hardcode_direct_absolute_CXX=yes hardcode_libdir_separator_CXX=':' link_all_deplibs_CXX=yes file_list_spec_CXX='$wl-f,' case $with_aix_soname,$aix_use_runtimelinking in aix,*) ;; # no import file svr4,* | *,yes) # use import file # The Import File defines what to hardcode. hardcode_direct_CXX=no hardcode_direct_absolute_CXX=no ;; esac if test yes = "$GXX"; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`$CC -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct_CXX=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L_CXX=yes hardcode_libdir_flag_spec_CXX='-L$libdir' hardcode_libdir_separator_CXX= fi esac shared_flag='-shared' if test yes = "$aix_use_runtimelinking"; then shared_flag=$shared_flag' $wl-G' fi # Need to ensure runtime linking is disabled for the traditional # shared library, or the linker may eventually find shared libraries # /with/ Import File - we do not want to mix them. shared_flag_aix='-shared' shared_flag_svr4='-shared $wl-G' else # not using gcc if test ia64 = "$host_cpu"; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test yes = "$aix_use_runtimelinking"; then shared_flag='$wl-G' else shared_flag='$wl-bM:SRE' fi shared_flag_aix='$wl-bM:SRE' shared_flag_svr4='$wl-G' fi fi export_dynamic_flag_spec_CXX='$wl-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to # export. always_export_symbols_CXX=yes if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. # The "-G" linker flag allows undefined symbols. no_undefined_flag_CXX='-bernotok' # Determine the default libpath from the value encoded in an empty # executable. if test set = "${lt_cv_aix_libpath+set}"; then aix_libpath=$lt_cv_aix_libpath else if test ${lt_cv_aix_libpath__CXX+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath__CXX"; then lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath__CXX"; then lt_cv_aix_libpath__CXX=/usr/lib:/lib fi fi aix_libpath=$lt_cv_aix_libpath__CXX fi hardcode_libdir_flag_spec_CXX='$wl-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag else if test ia64 = "$host_cpu"; then hardcode_libdir_flag_spec_CXX='$wl-R $libdir:/usr/lib:/lib' allow_undefined_flag_CXX="-z nodefs" archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. if test set = "${lt_cv_aix_libpath+set}"; then aix_libpath=$lt_cv_aix_libpath else if test ${lt_cv_aix_libpath__CXX+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath__CXX"; then lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath__CXX"; then lt_cv_aix_libpath__CXX=/usr/lib:/lib fi fi aix_libpath=$lt_cv_aix_libpath__CXX fi hardcode_libdir_flag_spec_CXX='$wl-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag_CXX=' $wl-bernotok' allow_undefined_flag_CXX=' $wl-berok' if test yes = "$with_gnu_ld"; then # We only use this code for GNU lds that support --whole-archive. whole_archive_flag_spec_CXX='$wl--whole-archive$convenience $wl--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec_CXX='$convenience' fi archive_cmds_need_lc_CXX=yes archive_expsym_cmds_CXX='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' # -brtl affects multiple linker settings, -berok does not and is overridden later compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' if test svr4 != "$with_aix_soname"; then # This is similar to how AIX traditionally builds its shared # libraries. Need -bnortl late, we may have -brtl in LDFLAGS. archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' fi if test aix != "$with_aix_soname"; then archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' else # used by -dlpreopen to get the symbols archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$MV $output_objdir/$realname.d/$soname $output_objdir' fi archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$RM -r $output_objdir/$realname.d' fi fi ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then allow_undefined_flag_CXX=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds_CXX='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' else ld_shlibs_CXX=no fi ;; chorus*) case $cc_basename in *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; cygwin* | mingw* | pw32* | cegcc*) case $GXX,$cc_basename in ,cl* | no,cl* | ,icl* | no,icl*) # Native MSVC or ICC # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. hardcode_libdir_flag_spec_CXX=' ' allow_undefined_flag_CXX=unsupported always_export_symbols_CXX=yes file_list_spec_CXX='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. archive_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' archive_expsym_cmds_CXX='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then cp "$export_symbols" "$output_objdir/$soname.def"; echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; else $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, CXX)='true' enable_shared_with_static_runtimes_CXX=yes # Don't use ranlib old_postinstall_cmds_CXX='chmod 644 $oldlib' postlink_cmds_CXX='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile=$lt_outputfile.exe lt_tool_outputfile=$lt_tool_outputfile.exe ;; esac~ func_to_tool_file "$lt_outputfile"~ if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # g++ # _LT_TAGVAR(hardcode_libdir_flag_spec, CXX) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec_CXX='-L$libdir' export_dynamic_flag_spec_CXX='$wl--export-all-symbols' allow_undefined_flag_CXX=unsupported always_export_symbols_CXX=no enable_shared_with_static_runtimes_CXX=yes if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file, use it as # is; otherwise, prepend EXPORTS... archive_expsym_cmds_CXX='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs_CXX=no fi ;; esac ;; darwin* | rhapsody*) archive_cmds_need_lc_CXX=no hardcode_direct_CXX=no hardcode_automatic_CXX=yes hardcode_shlibpath_var_CXX=unsupported if test yes = "$lt_cv_ld_force_load"; then whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' else whole_archive_flag_spec_CXX='' fi link_all_deplibs_CXX=yes allow_undefined_flag_CXX=$_lt_dar_allow_undefined case $cc_basename in ifort*|nagfor*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test yes = "$_lt_dar_can_shared"; then output_verbose_link_cmd=func_echo_all archive_cmds_CXX="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" module_cmds_CXX="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" archive_expsym_cmds_CXX="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" module_expsym_cmds_CXX="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" if test yes != "$lt_cv_apple_cc_single_mod"; then archive_cmds_CXX="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" archive_expsym_cmds_CXX="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" fi else ld_shlibs_CXX=no fi ;; os2*) hardcode_libdir_flag_spec_CXX='-L$libdir' hardcode_minus_L_CXX=yes allow_undefined_flag_CXX=unsupported shrext_cmds=.dll archive_cmds_CXX='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' archive_expsym_cmds_CXX='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' old_archive_From_new_cmds_CXX='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes_CXX=yes file_list_spec_CXX='@' ;; dgux*) case $cc_basename in ec++*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; ghcx*) # Green Hills C++ Compiler # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; freebsd2.*) # C++ shared libraries reported to be fairly broken before # switch to ELF ld_shlibs_CXX=no ;; freebsd-elf*) archive_cmds_need_lc_CXX=no ;; freebsd* | dragonfly* | midnightbsd*) # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF # conventions ld_shlibs_CXX=yes ;; haiku*) archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' link_all_deplibs_CXX=yes ;; hpux9*) hardcode_libdir_flag_spec_CXX='$wl+b $wl$libdir' hardcode_libdir_separator_CXX=: export_dynamic_flag_spec_CXX='$wl-E' hardcode_direct_CXX=yes hardcode_minus_L_CXX=yes # Not in the search PATH, # but as the default # location of the library. case $cc_basename in CC*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; aCC*) archive_cmds_CXX='$RM $output_objdir/$soname~$CC -b $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes = "$GXX"; then archive_cmds_CXX='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; hpux10*|hpux11*) if test no = "$with_gnu_ld"; then hardcode_libdir_flag_spec_CXX='$wl+b $wl$libdir' hardcode_libdir_separator_CXX=: case $host_cpu in hppa*64*|ia64*) ;; *) export_dynamic_flag_spec_CXX='$wl-E' ;; esac fi case $host_cpu in hppa*64*|ia64*) hardcode_direct_CXX=no hardcode_shlibpath_var_CXX=no ;; *) hardcode_direct_CXX=yes hardcode_direct_absolute_CXX=yes hardcode_minus_L_CXX=yes # Not in the search PATH, # but as the default # location of the library. ;; esac case $cc_basename in CC*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; aCC*) case $host_cpu in hppa*64*) archive_cmds_CXX='$CC -b $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) archive_cmds_CXX='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) archive_cmds_CXX='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes = "$GXX"; then if test no = "$with_gnu_ld"; then case $host_cpu in hppa*64*) archive_cmds_CXX='$CC -shared -nostdlib -fPIC $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) archive_cmds_CXX='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) archive_cmds_CXX='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac fi else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; interix[3-9]*) hardcode_direct_CXX=no hardcode_shlibpath_var_CXX=no hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' export_dynamic_flag_spec_CXX='$wl-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds_CXX='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds_CXX='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; irix5* | irix6*) case $cc_basename in CC*) # SGI C++ archive_cmds_CXX='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' # Archives containing C++ object files must be created using # "CC -ar", where "CC" is the IRIX C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. old_archive_cmds_CXX='$CC -ar -WR,-u -o $oldlib $oldobjs' ;; *) if test yes = "$GXX"; then if test no = "$with_gnu_ld"; then archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' else archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` -o $lib' fi fi link_all_deplibs_CXX=yes ;; esac hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' hardcode_libdir_separator_CXX=: inherit_rpath_CXX=yes ;; linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' archive_expsym_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib $wl-retain-symbols-file,$export_symbols; mv \$templib $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' export_dynamic_flag_spec_CXX='$wl--export-dynamic' # Archives containing C++ object files must be created using # "CC -Bstatic", where "CC" is the KAI C++ compiler. old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;; icpc* | ecpc* ) # Intel C++ with_gnu_ld=yes # version 8.0 and above of icpc choke on multiply defined symbols # if we add $predep_objects and $postdep_objects, however 7.1 and # earlier do not add the objects themselves. case `$CC -V 2>&1` in *"Version 7."*) archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' ;; *) # Version 8.0 or newer tmp_idyn= case $host_cpu in ia64*) tmp_idyn=' -i_dynamic';; esac archive_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' ;; esac archive_cmds_need_lc_CXX=no hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' export_dynamic_flag_spec_CXX='$wl--export-dynamic' whole_archive_flag_spec_CXX='$wl--whole-archive$convenience $wl--no-whole-archive' ;; pgCC* | pgcpp*) # Portland Group C++ compiler case `$CC -V` in *pgCC\ [1-5].* | *pgcpp\ [1-5].*) prelink_cmds_CXX='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' old_archive_cmds_CXX='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ $RANLIB $oldlib' archive_cmds_CXX='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' ;; *) # Version 6 and above use weak symbols archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' ;; esac hardcode_libdir_flag_spec_CXX='$wl--rpath $wl$libdir' export_dynamic_flag_spec_CXX='$wl--export-dynamic' whole_archive_flag_spec_CXX='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' ;; cxx*) # Compaq C++ archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib $wl-retain-symbols-file $wl$export_symbols' runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec_CXX='-rpath $libdir' hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' ;; xl* | mpixl* | bgxl*) # IBM XL 8.0 on PPC, with GNU ld hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' export_dynamic_flag_spec_CXX='$wl--export-dynamic' archive_cmds_CXX='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' if test yes = "$supports_anon_versioning"; then archive_expsym_cmds_CXX='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' fi ;; *) case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C++ 5.9 no_undefined_flag_CXX=' -zdefs' archive_cmds_CXX='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' archive_expsym_cmds_CXX='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file $wl$export_symbols' hardcode_libdir_flag_spec_CXX='-R$libdir' whole_archive_flag_spec_CXX='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' compiler_needs_object_CXX=yes # Not sure whether something based on # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 # would be better. output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' ;; esac ;; esac ;; lynxos*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; m88k*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; mvs*) case $cc_basename in cxx*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds_CXX='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' wlarc= hardcode_libdir_flag_spec_CXX='-R$libdir' hardcode_direct_CXX=yes hardcode_shlibpath_var_CXX=no fi # Workaround some broken pre-1.5 toolchains output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' ;; *nto* | *qnx*) ld_shlibs_CXX=yes ;; openbsd* | bitrig*) if test -f /usr/libexec/ld.so; then hardcode_direct_CXX=yes hardcode_shlibpath_var_CXX=no hardcode_direct_absolute_CXX=yes archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`"; then archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file,$export_symbols -o $lib' export_dynamic_flag_spec_CXX='$wl-E' whole_archive_flag_spec_CXX=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' fi output_verbose_link_cmd=func_echo_all else ld_shlibs_CXX=no fi ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' hardcode_libdir_separator_CXX=: # Archives containing C++ object files must be created using # the KAI C++ compiler. case $host in osf3*) old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;; *) old_archive_cmds_CXX='$CC -o $oldlib $oldobjs' ;; esac ;; RCC*) # Rational C++ 2.4.1 # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; cxx*) case $host in osf3*) allow_undefined_flag_CXX=' $wl-expect_unresolved $wl\*' archive_cmds_CXX='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $soname `test -n "$verstring" && func_echo_all "$wl-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' ;; *) allow_undefined_flag_CXX=' -expect_unresolved \*' archive_cmds_CXX='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' archive_expsym_cmds_CXX='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ echo "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname $wl-input $wl$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~ $RM $lib.exp' hardcode_libdir_flag_spec_CXX='-rpath $libdir' ;; esac hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes,no = "$GXX,$with_gnu_ld"; then allow_undefined_flag_CXX=' $wl-expect_unresolved $wl\*' case $host in osf3*) archive_cmds_CXX='$CC -shared -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' ;; *) archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' ;; esac hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; psos*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; lcc*) # Lucid # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ archive_cmds_need_lc_CXX=yes no_undefined_flag_CXX=' -zdefs' archive_cmds_CXX='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G$allow_undefined_flag $wl-M $wl$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' hardcode_libdir_flag_spec_CXX='-R$libdir' hardcode_shlibpath_var_CXX=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands '-z linker_flag'. # Supported since Solaris 2.6 (maybe 2.5.1?) whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract' ;; esac link_all_deplibs_CXX=yes output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' ;; gcx*) # Green Hills C++ Compiler archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' # The C++ compiler must be used to create the archive. old_archive_cmds_CXX='$CC $LDFLAGS -archive -o $oldlib $oldobjs' ;; *) # GNU C++ compiler with Solaris linker if test yes,no = "$GXX,$with_gnu_ld"; then no_undefined_flag_CXX=' $wl-z ${wl}defs' if $CC --version | $GREP -v '^2\.7' > /dev/null; then archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else # g++ 2.7 appears to require '-G' NOT '-shared' on this # platform. archive_cmds_CXX='$CC -G -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' fi hardcode_libdir_flag_spec_CXX='$wl-R $wl$libdir' case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) whole_archive_flag_spec_CXX='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' ;; esac fi ;; esac ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag_CXX='$wl-z,text' archive_cmds_need_lc_CXX=no hardcode_shlibpath_var_CXX=no runpath_var='LD_RUN_PATH' case $cc_basename in CC*) archive_cmds_CXX='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_CXX='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We CANNOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. no_undefined_flag_CXX='$wl-z,text' allow_undefined_flag_CXX='$wl-z,nodefs' archive_cmds_need_lc_CXX=no hardcode_shlibpath_var_CXX=no hardcode_libdir_flag_spec_CXX='$wl-R,$libdir' hardcode_libdir_separator_CXX=':' link_all_deplibs_CXX=yes export_dynamic_flag_spec_CXX='$wl-Bexport' runpath_var='LD_RUN_PATH' case $cc_basename in CC*) archive_cmds_CXX='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' old_archive_cmds_CXX='$CC -Tprelink_objects $oldobjs~ '"$old_archive_cmds_CXX" reload_cmds_CXX='$CC -Tprelink_objects $reload_objs~ '"$reload_cmds_CXX" ;; *) archive_cmds_CXX='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; vxworks*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 printf "%s\n" "$ld_shlibs_CXX" >&6; } test no = "$ld_shlibs_CXX" && can_build_shared=no GCC_CXX=$GXX LD_CXX=$LD ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... # Dependencies to place before and after the object being linked: predep_objects_CXX= postdep_objects_CXX= predeps_CXX= postdeps_CXX= compiler_lib_search_path_CXX= cat > conftest.$ac_ext <<_LT_EOF class Foo { public: Foo (void) { a = 0; } private: int a; }; _LT_EOF _lt_libdeps_save_CFLAGS=$CFLAGS case "$CC $CFLAGS " in #( *\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; *\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; *\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; esac if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then # Parse the compiler output and extract the necessary # objects, libraries and library flags. # Sentinel used to keep track of whether or not we are before # the conftest object file. pre_test_object_deps_done=no for p in `eval "$output_verbose_link_cmd"`; do case $prev$p in -L* | -R* | -l*) # Some compilers place space between "-{L,R}" and the path. # Remove the space. if test x-L = "$p" || test x-R = "$p"; then prev=$p continue fi # Expand the sysroot to ease extracting the directories later. if test -z "$prev"; then case $p in -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; esac fi case $p in =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; esac if test no = "$pre_test_object_deps_done"; then case $prev in -L | -R) # Internal compiler library paths should come after those # provided the user. The postdeps already come after the # user supplied libs so there is no need to process them. if test -z "$compiler_lib_search_path_CXX"; then compiler_lib_search_path_CXX=$prev$p else compiler_lib_search_path_CXX="${compiler_lib_search_path_CXX} $prev$p" fi ;; # The "-l" case would never come before the object being # linked, so don't bother handling this case. esac else if test -z "$postdeps_CXX"; then postdeps_CXX=$prev$p else postdeps_CXX="${postdeps_CXX} $prev$p" fi fi prev= ;; *.lto.$objext) ;; # Ignore GCC LTO objects *.$objext) # This assumes that the test object file only shows up # once in the compiler output. if test "$p" = "conftest.$objext"; then pre_test_object_deps_done=yes continue fi if test no = "$pre_test_object_deps_done"; then if test -z "$predep_objects_CXX"; then predep_objects_CXX=$p else predep_objects_CXX="$predep_objects_CXX $p" fi else if test -z "$postdep_objects_CXX"; then postdep_objects_CXX=$p else postdep_objects_CXX="$postdep_objects_CXX $p" fi fi ;; *) ;; # Ignore the rest. esac done # Clean up. rm -f a.out a.exe else echo "libtool.m4: error: problem compiling CXX test program" fi $RM -f confest.$objext CFLAGS=$_lt_libdeps_save_CFLAGS # PORTME: override above test on systems where it is broken case $host_os in interix[3-9]*) # Interix 3.5 installs completely hosed .la files for C++, so rather than # hack all around it, let's just trust "g++" to DTRT. predep_objects_CXX= postdep_objects_CXX= postdeps_CXX= ;; esac case " $postdeps_CXX " in *" -lc "*) archive_cmds_need_lc_CXX=no ;; esac compiler_lib_search_dirs_CXX= if test -n "${compiler_lib_search_path_CXX}"; then compiler_lib_search_dirs_CXX=`echo " ${compiler_lib_search_path_CXX}" | $SED -e 's! -L! !g' -e 's!^ !!'` fi lt_prog_compiler_wl_CXX= lt_prog_compiler_pic_CXX= lt_prog_compiler_static_CXX= # C++ specific cases for pic, static, wl, etc. if test yes = "$GXX"; then lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='-static' case $host_os in aix*) # All AIX code is PIC. if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_CXX='-Bstatic' fi lt_prog_compiler_pic_CXX='-fPIC' ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support lt_prog_compiler_pic_CXX='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the '-m68020' flag to GCC prevents building anything better, # like '-m68040'. lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic_CXX='-DDLL_EXPORT' case $host_os in os2*) lt_prog_compiler_static_CXX='$wl-static' ;; esac ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic_CXX='-fno-common' ;; *djgpp*) # DJGPP does not support shared libraries at all lt_prog_compiler_pic_CXX= ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. lt_prog_compiler_static_CXX= ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic_CXX=-Kconform_pic fi ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) ;; *) lt_prog_compiler_pic_CXX='-fPIC' ;; esac ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic_CXX='-fPIC -shared' ;; *) lt_prog_compiler_pic_CXX='-fPIC' ;; esac else case $host_os in aix[4-9]*) # All AIX code is PIC. if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_CXX='-Bstatic' else lt_prog_compiler_static_CXX='-bnso -bI:/lib/syscalls.exp' fi ;; chorus*) case $cc_basename in cxch68*) # Green Hills C++ Compiler # _LT_TAGVAR(lt_prog_compiler_static, CXX)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" ;; esac ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic_CXX='-DDLL_EXPORT' ;; dgux*) case $cc_basename in ec++*) lt_prog_compiler_pic_CXX='-KPIC' ;; ghcx*) # Green Hills C++ Compiler lt_prog_compiler_pic_CXX='-pic' ;; *) ;; esac ;; freebsd* | dragonfly* | midnightbsd*) # FreeBSD uses GNU C++ ;; hpux9* | hpux10* | hpux11*) case $cc_basename in CC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='$wl-a ${wl}archive' if test ia64 != "$host_cpu"; then lt_prog_compiler_pic_CXX='+Z' fi ;; aCC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='$wl-a ${wl}archive' case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic_CXX='+Z' ;; esac ;; *) ;; esac ;; interix*) # This is c89, which is MS Visual C++ (no shared libs) # Anyone wants to do a port? ;; irix5* | irix6* | nonstopux*) case $cc_basename in CC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='-non_shared' # CC pic flag -KPIC is the default. ;; *) ;; esac ;; linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in KCC*) # KAI C++ Compiler lt_prog_compiler_wl_CXX='--backend -Wl,' lt_prog_compiler_pic_CXX='-fPIC' ;; ecpc* ) # old Intel C++ for x86_64, which still supported -KPIC. lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-static' ;; icpc* ) # Intel C++, used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-fPIC' lt_prog_compiler_static_CXX='-static' ;; pgCC* | pgcpp*) # Portland Group C++ compiler lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-fpic' lt_prog_compiler_static_CXX='-Bstatic' ;; cxx*) # Compaq C++ # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. lt_prog_compiler_pic_CXX= lt_prog_compiler_static_CXX='-non_shared' ;; xlc* | xlC* | bgxl[cC]* | mpixl[cC]*) # IBM XL 8.0, 9.0 on PPC and BlueGene lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-qpic' lt_prog_compiler_static_CXX='-qstaticlink' ;; *) case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C++ 5.9 lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-Bstatic' lt_prog_compiler_wl_CXX='-Qoption ld ' ;; esac ;; esac ;; lynxos*) ;; m88k*) ;; mvs*) case $cc_basename in cxx*) lt_prog_compiler_pic_CXX='-W c,exportall' ;; *) ;; esac ;; netbsd* | netbsdelf*-gnu) ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic_CXX='-fPIC -shared' ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) lt_prog_compiler_wl_CXX='--backend -Wl,' ;; RCC*) # Rational C++ 2.4.1 lt_prog_compiler_pic_CXX='-pic' ;; cxx*) # Digital/Compaq C++ lt_prog_compiler_wl_CXX='-Wl,' # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. lt_prog_compiler_pic_CXX= lt_prog_compiler_static_CXX='-non_shared' ;; *) ;; esac ;; psos*) ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-Bstatic' lt_prog_compiler_wl_CXX='-Qoption ld ' ;; gcx*) # Green Hills C++ Compiler lt_prog_compiler_pic_CXX='-PIC' ;; *) ;; esac ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x lt_prog_compiler_pic_CXX='-pic' lt_prog_compiler_static_CXX='-Bstatic' ;; lcc*) # Lucid lt_prog_compiler_pic_CXX='-pic' ;; *) ;; esac ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) case $cc_basename in CC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-Bstatic' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 lt_prog_compiler_pic_CXX='-KPIC' ;; *) ;; esac ;; vxworks*) ;; *) lt_prog_compiler_can_build_shared_CXX=no ;; esac fi case $host_os in # For platforms that do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic_CXX= ;; *) lt_prog_compiler_pic_CXX="$lt_prog_compiler_pic_CXX -DPIC" ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 printf %s "checking for $compiler option to produce PIC... " >&6; } if test ${lt_cv_prog_compiler_pic_CXX+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_CXX" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic_CXX" >&6; } lt_prog_compiler_pic_CXX=$lt_cv_prog_compiler_pic_CXX # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic_CXX"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5 printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; } if test ${lt_cv_prog_compiler_pic_works_CXX+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_pic_works_CXX=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC" ## exclude from sc_useless_quotes_in_assignment # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_pic_works_CXX=yes fi fi $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_CXX" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic_works_CXX" >&6; } if test yes = "$lt_cv_prog_compiler_pic_works_CXX"; then case $lt_prog_compiler_pic_CXX in "" | " "*) ;; *) lt_prog_compiler_pic_CXX=" $lt_prog_compiler_pic_CXX" ;; esac else lt_prog_compiler_pic_CXX= lt_prog_compiler_can_build_shared_CXX=no fi fi # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } if test ${lt_cv_prog_compiler_static_works_CXX+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_static_works_CXX=no save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_static_works_CXX=yes fi else lt_cv_prog_compiler_static_works_CXX=yes fi fi $RM -r conftest* LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_CXX" >&5 printf "%s\n" "$lt_cv_prog_compiler_static_works_CXX" >&6; } if test yes = "$lt_cv_prog_compiler_static_works_CXX"; then : else lt_prog_compiler_static_CXX= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o_CXX+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_c_o_CXX=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_CXX=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o_CXX" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o_CXX+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_c_o_CXX=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_CXX=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o_CXX" >&6; } hard_links=nottested if test no = "$lt_cv_prog_compiler_c_o_CXX" && test no != "$need_locks"; then # do not overwrite the value of need_locks provided by the user { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 printf %s "checking if we can lock with hard links... " >&6; } hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 printf "%s\n" "$hard_links" >&6; } if test no = "$hard_links"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' exclude_expsyms_CXX='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' case $host_os in aix[4-9]*) # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to GNU nm, but means don't demangle to AIX nm. # Without the "-l" option, or with the "-B" option, AIX nm treats # weak defined symbols like other global defined symbols, whereas # GNU nm marks them as "W". # While the 'weak' keyword is ignored in the Export File, we need # it in the Import File for the 'aix-soname' feature, so we have # to replace the "-B" option with "-P" for AIX nm. if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' else export_symbols_cmds_CXX='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' fi ;; pw32*) export_symbols_cmds_CXX=$ltdll_cmds ;; cygwin* | mingw* | cegcc*) case $cc_basename in cl* | icl*) exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' ;; *) export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' ;; esac ;; linux* | k*bsd*-gnu | gnu*) link_all_deplibs_CXX=no ;; *) export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 printf "%s\n" "$ld_shlibs_CXX" >&6; } test no = "$ld_shlibs_CXX" && can_build_shared=no with_gnu_ld_CXX=$with_gnu_ld # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc_CXX" in x|xyes) # Assume -lc should be added archive_cmds_need_lc_CXX=yes if test yes,yes = "$GCC,$enable_shared"; then case $archive_cmds_CXX in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 printf %s "checking whether -lc should be explicitly linked in... " >&6; } if test ${lt_cv_archive_cmds_need_lc_CXX+y} then : printf %s "(cached) " >&6 else $as_nop $RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl_CXX pic_flag=$lt_prog_compiler_pic_CXX compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag_CXX allow_undefined_flag_CXX= if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 (eval $archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then lt_cv_archive_cmds_need_lc_CXX=no else lt_cv_archive_cmds_need_lc_CXX=yes fi allow_undefined_flag_CXX=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_CXX" >&5 printf "%s\n" "$lt_cv_archive_cmds_need_lc_CXX" >&6; } archive_cmds_need_lc_CXX=$lt_cv_archive_cmds_need_lc_CXX ;; esac fi ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 printf %s "checking dynamic linker characteristics... " >&6; } library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=.so postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='$libname$release$shared_ext$major' ;; aix[4-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test ia64 = "$host_cpu"; then # AIX 5 supports IA64 library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line '#! .'. This would cause the generated library to # depend on '.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # Using Import Files as archive members, it is possible to support # filename-based versioning of shared library archives on AIX. While # this would work for both with and without runtime linking, it will # prevent static linking of such archives. So we do filename-based # shared library versioning with .so extension only, which is used # when both runtime linking and shared linking is enabled. # Unfortunately, runtime linking may impact performance, so we do # not want this to be the default eventually. Also, we use the # versioned .so libs for executables only if there is the -brtl # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. # To allow for filename-based versioning support, we need to create # libNAME.so.V as an archive file, containing: # *) an Import File, referring to the versioned filename of the # archive as well as the shared archive member, telling the # bitwidth (32 or 64) of that shared object, and providing the # list of exported symbols of that shared object, eventually # decorated with the 'weak' keyword # *) the shared object with the F_LOADONLY flag set, to really avoid # it being seen by the linker. # At run time we better use the real file rather than another symlink, # but for link time we create the symlink libNAME.so -> libNAME.so.V case $with_aix_soname,$aix_use_runtimelinking in # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. aix,yes) # traditional libtool dynamic_linker='AIX unversionable lib.so' # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; aix,no) # traditional AIX only dynamic_linker='AIX lib.a(lib.so.V)' # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' ;; svr4,*) # full svr4 only dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,yes) # both, prefer svr4 dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # unpreferred sharedlib libNAME.a needs extra handling postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,no) # both, prefer aix dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' ;; esac shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='$libname$shared_ext' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl* | *,icl*) # Native MSVC or ICC libname_spec='$name' soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' library_names_spec='$libname.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec=$LIB if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC and ICC wrapper library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' soname_spec='$libname$release$major$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly* | midnightbsd*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[23].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=no sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' if test 32 = "$HPUX_IA64_MODE"; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" sys_lib_dlsearch_path_spec=/usr/lib/hpux32 else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" sys_lib_dlsearch_path_spec=/usr/lib/hpux64 fi ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[3-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test yes = "$lt_cv_prog_gnu_ld"; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; linux*android*) version_type=none # Android doesn't support versioned libraries. need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext' soname_spec='$libname$release$shared_ext' finish_cmds= shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes dynamic_linker='Android linker' # Don't embed -rpath directories since the linker doesn't support them. hardcode_libdir_flag_spec_CXX='-L$libdir' ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH if test ${lt_cv_shlibpath_overrides_runpath+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_CXX\"; \ LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_CXX\"" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null then : lt_cv_shlibpath_overrides_runpath=yes fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS libdir=$save_libdir fi shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Ideally, we could use ldconfig to report *all* directores which are # searched for libraries, however this is still not possible. Aside from not # being certain /sbin/ldconfig is available, command # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, # even though it is searched at run-time. Try to do the best guess by # appending ld.so.conf contents (and includes) to the search path. if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsdelf*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='NetBSD ld.elf_so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd* | bitrig*) version_type=sunos sys_lib_dlsearch_path_spec=/usr/lib need_lib_prefix=no if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then need_version=no else need_version=yes fi library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; os2*) libname_spec='$name' version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no # OS/2 can only load a DLL with a base name of 8 characters or less. soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; v=$($ECHO $release$versuffix | tr -d .-); n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); $ECHO $n$v`$shared_ext' library_names_spec='${libname}_dll.$libext' dynamic_linker='OS/2 ld.exe' shlibpath_var=BEGINLIBPATH sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test yes = "$with_gnu_ld"; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec; then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' soname_spec='$libname$shared_ext.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=sco need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test yes = "$with_gnu_ld"; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 printf "%s\n" "$dynamic_linker" >&6; } test no = "$dynamic_linker" && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test yes = "$GCC"; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec fi if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec fi # remember unaugmented sys_lib_dlsearch_path content for libtool script decls... configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec # ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" # to be used as default LT_SYS_LIBRARY_PATH value in generated libtool configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 printf %s "checking how to hardcode library paths into programs... " >&6; } hardcode_action_CXX= if test -n "$hardcode_libdir_flag_spec_CXX" || test -n "$runpath_var_CXX" || test yes = "$hardcode_automatic_CXX"; then # We can hardcode non-existent directories. if test no != "$hardcode_direct_CXX" && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, CXX)" && test no != "$hardcode_minus_L_CXX"; then # Linking always hardcodes the temporary library directory. hardcode_action_CXX=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action_CXX=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action_CXX=unsupported fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_CXX" >&5 printf "%s\n" "$hardcode_action_CXX" >&6; } if test relink = "$hardcode_action_CXX" || test yes = "$inherit_rpath_CXX"; then # Fast installation is not supported enable_fast_install=no elif test yes = "$shlibpath_overrides_runpath" || test no = "$enable_shared"; then # Fast installation is not necessary enable_fast_install=needless fi fi # test -n "$compiler" CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS LDCXX=$LD LD=$lt_save_LD GCC=$lt_save_GCC with_gnu_ld=$lt_save_with_gnu_ld lt_cv_path_LDCXX=$lt_cv_path_LD lt_cv_path_LD=$lt_save_path_LD lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld fi # test yes != "$_lt_caught_CXX_error" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu if test -z "$F77" || test no = "$F77"; then _lt_disable_F77=yes fi archive_cmds_need_lc_F77=no allow_undefined_flag_F77= always_export_symbols_F77=no archive_expsym_cmds_F77= export_dynamic_flag_spec_F77= hardcode_direct_F77=no hardcode_direct_absolute_F77=no hardcode_libdir_flag_spec_F77= hardcode_libdir_separator_F77= hardcode_minus_L_F77=no hardcode_automatic_F77=no inherit_rpath_F77=no module_cmds_F77= module_expsym_cmds_F77= link_all_deplibs_F77=unknown old_archive_cmds_F77=$old_archive_cmds reload_flag_F77=$reload_flag reload_cmds_F77=$reload_cmds no_undefined_flag_F77= whole_archive_flag_spec_F77= enable_shared_with_static_runtimes_F77=no # Source file extension for f77 test sources. ac_ext=f # Object file extension for compiled f77 test sources. objext=o objext_F77=$objext # No sense in running all these tests if we already determined that # the F77 compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test yes != "$_lt_disable_F77"; then # Code to be used in simple compile tests lt_simple_compile_test_code="\ subroutine t return end " # Code to be used in simple link tests lt_simple_link_test_code="\ program t end " # ltmain only uses $CC for tagged configurations so make sure $CC is set. # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_GCC=$GCC lt_save_CFLAGS=$CFLAGS CC=${F77-"f77"} CFLAGS=$FFLAGS compiler=$CC compiler_F77=$CC func_cc_basename $compiler cc_basename=$func_cc_basename_result GCC=$G77 if test -n "$compiler"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 printf %s "checking if libtool supports shared libraries... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 printf "%s\n" "$can_build_shared" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 printf %s "checking whether to build shared libraries... " >&6; } test no = "$can_build_shared" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test yes = "$enable_shared" && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[4-9]*) if test ia64 != "$host_cpu"; then case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in yes,aix,yes) ;; # shared object as lib.so file only yes,svr4,*) ;; # shared object as lib.so archive member only yes,*) enable_static=no ;; # shared object in lib.a archive as well esac fi ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 printf "%s\n" "$enable_shared" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 printf %s "checking whether to build static libraries... " >&6; } # Make sure either enable_shared or enable_static is yes. test yes = "$enable_shared" || enable_static=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 printf "%s\n" "$enable_static" >&6; } GCC_F77=$G77 LD_F77=$LD ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... lt_prog_compiler_wl_F77= lt_prog_compiler_pic_F77= lt_prog_compiler_static_F77= if test yes = "$GCC"; then lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_static_F77='-static' case $host_os in aix*) # All AIX code is PIC. if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_F77='-Bstatic' fi lt_prog_compiler_pic_F77='-fPIC' ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support lt_prog_compiler_pic_F77='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the '-m68020' flag to GCC prevents building anything better, # like '-m68040'. lt_prog_compiler_pic_F77='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic_F77='-DDLL_EXPORT' case $host_os in os2*) lt_prog_compiler_static_F77='$wl-static' ;; esac ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic_F77='-fno-common' ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. lt_prog_compiler_static_F77= ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) # +Z the default ;; *) lt_prog_compiler_pic_F77='-fPIC' ;; esac ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. lt_prog_compiler_can_build_shared_F77=no enable_shared=no ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic_F77='-fPIC -shared' ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic_F77=-Kconform_pic fi ;; *) lt_prog_compiler_pic_F77='-fPIC' ;; esac case $cc_basename in nvcc*) # Cuda Compiler Driver 2.2 lt_prog_compiler_wl_F77='-Xlinker ' if test -n "$lt_prog_compiler_pic_F77"; then lt_prog_compiler_pic_F77="-Xcompiler $lt_prog_compiler_pic_F77" fi ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) lt_prog_compiler_wl_F77='-Wl,' if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_F77='-Bstatic' else lt_prog_compiler_static_F77='-bnso -bI:/lib/syscalls.exp' fi ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic_F77='-fno-common' case $cc_basename in nagfor*) # NAG Fortran compiler lt_prog_compiler_wl_F77='-Wl,-Wl,,' lt_prog_compiler_pic_F77='-PIC' lt_prog_compiler_static_F77='-Bstatic' ;; esac ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic_F77='-DDLL_EXPORT' case $host_os in os2*) lt_prog_compiler_static_F77='$wl-static' ;; esac ;; hpux9* | hpux10* | hpux11*) lt_prog_compiler_wl_F77='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic_F77='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? lt_prog_compiler_static_F77='$wl-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) lt_prog_compiler_wl_F77='-Wl,' # PIC (with -KPIC) is the default. lt_prog_compiler_static_F77='-non_shared' ;; linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in # old Intel for x86_64, which still supported -KPIC. ecc*) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-KPIC' lt_prog_compiler_static_F77='-static' ;; # flang / f18. f95 an alias for gfortran or flang on Debian flang* | f18* | f95*) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-fPIC' lt_prog_compiler_static_F77='-static' ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-fPIC' lt_prog_compiler_static_F77='-static' ;; # Lahey Fortran 8.1. lf95*) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='--shared' lt_prog_compiler_static_F77='--static' ;; nagfor*) # NAG Fortran compiler lt_prog_compiler_wl_F77='-Wl,-Wl,,' lt_prog_compiler_pic_F77='-PIC' lt_prog_compiler_static_F77='-Bstatic' ;; tcc*) # Fabrice Bellard et al's Tiny C Compiler lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-fPIC' lt_prog_compiler_static_F77='-static' ;; pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-fpic' lt_prog_compiler_static_F77='-Bstatic' ;; ccc*) lt_prog_compiler_wl_F77='-Wl,' # All Alpha code is PIC. lt_prog_compiler_static_F77='-non_shared' ;; xl* | bgxl* | bgf* | mpixl*) # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-qpic' lt_prog_compiler_static_F77='-qstaticlink' ;; *) case `$CC -V 2>&1 | $SED 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker lt_prog_compiler_pic_F77='-KPIC' lt_prog_compiler_static_F77='-Bstatic' lt_prog_compiler_wl_F77='' ;; *Sun\ F* | *Sun*Fortran*) lt_prog_compiler_pic_F77='-KPIC' lt_prog_compiler_static_F77='-Bstatic' lt_prog_compiler_wl_F77='-Qoption ld ' ;; *Sun\ C*) # Sun C 5.9 lt_prog_compiler_pic_F77='-KPIC' lt_prog_compiler_static_F77='-Bstatic' lt_prog_compiler_wl_F77='-Wl,' ;; *Intel*\ [CF]*Compiler*) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-fPIC' lt_prog_compiler_static_F77='-static' ;; *Portland\ Group*) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-fpic' lt_prog_compiler_static_F77='-Bstatic' ;; esac ;; esac ;; newsos6) lt_prog_compiler_pic_F77='-KPIC' lt_prog_compiler_static_F77='-Bstatic' ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic_F77='-fPIC -shared' ;; osf3* | osf4* | osf5*) lt_prog_compiler_wl_F77='-Wl,' # All OSF/1 code is PIC. lt_prog_compiler_static_F77='-non_shared' ;; rdos*) lt_prog_compiler_static_F77='-non_shared' ;; solaris*) lt_prog_compiler_pic_F77='-KPIC' lt_prog_compiler_static_F77='-Bstatic' case $cc_basename in f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) lt_prog_compiler_wl_F77='-Qoption ld ';; *) lt_prog_compiler_wl_F77='-Wl,';; esac ;; sunos4*) lt_prog_compiler_wl_F77='-Qoption ld ' lt_prog_compiler_pic_F77='-PIC' lt_prog_compiler_static_F77='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-KPIC' lt_prog_compiler_static_F77='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic_F77='-Kconform_pic' lt_prog_compiler_static_F77='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_pic_F77='-KPIC' lt_prog_compiler_static_F77='-Bstatic' ;; unicos*) lt_prog_compiler_wl_F77='-Wl,' lt_prog_compiler_can_build_shared_F77=no ;; uts4*) lt_prog_compiler_pic_F77='-pic' lt_prog_compiler_static_F77='-Bstatic' ;; *) lt_prog_compiler_can_build_shared_F77=no ;; esac fi case $host_os in # For platforms that do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic_F77= ;; *) lt_prog_compiler_pic_F77="$lt_prog_compiler_pic_F77" ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 printf %s "checking for $compiler option to produce PIC... " >&6; } if test ${lt_cv_prog_compiler_pic_F77+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_pic_F77=$lt_prog_compiler_pic_F77 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_F77" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic_F77" >&6; } lt_prog_compiler_pic_F77=$lt_cv_prog_compiler_pic_F77 # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic_F77"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works" >&5 printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works... " >&6; } if test ${lt_cv_prog_compiler_pic_works_F77+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_pic_works_F77=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic_F77" ## exclude from sc_useless_quotes_in_assignment # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_pic_works_F77=yes fi fi $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_F77" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic_works_F77" >&6; } if test yes = "$lt_cv_prog_compiler_pic_works_F77"; then case $lt_prog_compiler_pic_F77 in "" | " "*) ;; *) lt_prog_compiler_pic_F77=" $lt_prog_compiler_pic_F77" ;; esac else lt_prog_compiler_pic_F77= lt_prog_compiler_can_build_shared_F77=no fi fi # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl_F77 eval lt_tmp_static_flag=\"$lt_prog_compiler_static_F77\" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } if test ${lt_cv_prog_compiler_static_works_F77+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_static_works_F77=no save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_static_works_F77=yes fi else lt_cv_prog_compiler_static_works_F77=yes fi fi $RM -r conftest* LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_F77" >&5 printf "%s\n" "$lt_cv_prog_compiler_static_works_F77" >&6; } if test yes = "$lt_cv_prog_compiler_static_works_F77"; then : else lt_prog_compiler_static_F77= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o_F77+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_c_o_F77=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_F77=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_F77" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o_F77" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o_F77+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_c_o_F77=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_F77=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_F77" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o_F77" >&6; } hard_links=nottested if test no = "$lt_cv_prog_compiler_c_o_F77" && test no != "$need_locks"; then # do not overwrite the value of need_locks provided by the user { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 printf %s "checking if we can lock with hard links... " >&6; } hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 printf "%s\n" "$hard_links" >&6; } if test no = "$hard_links"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } runpath_var= allow_undefined_flag_F77= always_export_symbols_F77=no archive_cmds_F77= archive_expsym_cmds_F77= compiler_needs_object_F77=no enable_shared_with_static_runtimes_F77=no export_dynamic_flag_spec_F77= export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' hardcode_automatic_F77=no hardcode_direct_F77=no hardcode_direct_absolute_F77=no hardcode_libdir_flag_spec_F77= hardcode_libdir_separator_F77= hardcode_minus_L_F77=no hardcode_shlibpath_var_F77=unsupported inherit_rpath_F77=no link_all_deplibs_F77=unknown module_cmds_F77= module_expsym_cmds_F77= old_archive_from_new_cmds_F77= old_archive_from_expsyms_cmds_F77= thread_safe_flag_spec_F77= whole_archive_flag_spec_F77= # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list include_expsyms_F77= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ' (' and ')$', so one must not match beginning or # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', # as well as any symbol that contains 'd'. exclude_expsyms_F77='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. # Exclude shared library initialization/finalization symbols. extract_expsyms_cmds= case $host_os in cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. if test yes != "$GCC"; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) with_gnu_ld=yes ;; openbsd* | bitrig*) with_gnu_ld=no ;; linux* | k*bsd*-gnu | gnu*) link_all_deplibs_F77=no ;; esac ld_shlibs_F77=yes # On some targets, GNU ld is compatible enough with the native linker # that we're better off using the native interface for both. lt_use_gnu_ld_interface=no if test yes = "$with_gnu_ld"; then case $host_os in aix*) # The AIX port of GNU ld has always aspired to compatibility # with the native linker. However, as the warning in the GNU ld # block says, versions before 2.19.5* couldn't really create working # shared libraries, regardless of the interface used. case `$LD -v 2>&1` in *\ \(GNU\ Binutils\)\ 2.19.5*) ;; *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; *\ \(GNU\ Binutils\)\ [3-9]*) ;; *) lt_use_gnu_ld_interface=yes ;; esac ;; *) lt_use_gnu_ld_interface=yes ;; esac fi if test yes = "$lt_use_gnu_ld_interface"; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='$wl' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' export_dynamic_flag_spec_F77='$wl--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then whole_archive_flag_spec_F77=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' else whole_archive_flag_spec_F77= fi supports_anon_versioning=no case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix[3-9]*) # On AIX/PPC, the GNU linker is very broken if test ia64 != "$host_cpu"; then ld_shlibs_F77=no cat <<_LT_EOF 1>&2 *** Warning: the GNU linker, at least up to release 2.19, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to install binutils *** 2.20 or above, or modify your PATH so that a non-GNU linker is found. *** You will then need to restart the configuration process. _LT_EOF fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_F77='' ;; m68k) archive_cmds_F77='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec_F77='-L$libdir' hardcode_minus_L_F77=yes ;; esac ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then allow_undefined_flag_F77=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds_F77='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' else ld_shlibs_F77=no fi ;; cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, F77) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec_F77='-L$libdir' export_dynamic_flag_spec_F77='$wl--export-all-symbols' allow_undefined_flag_F77=unsupported always_export_symbols_F77=no enable_shared_with_static_runtimes_F77=yes export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' exclude_expsyms_F77='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file, use it as # is; otherwise, prepend EXPORTS... archive_expsym_cmds_F77='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs_F77=no fi ;; haiku*) archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' link_all_deplibs_F77=yes ;; os2*) hardcode_libdir_flag_spec_F77='-L$libdir' hardcode_minus_L_F77=yes allow_undefined_flag_F77=unsupported shrext_cmds=.dll archive_cmds_F77='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' archive_expsym_cmds_F77='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' old_archive_From_new_cmds_F77='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes_F77=yes file_list_spec_F77='@' ;; interix[3-9]*) hardcode_direct_F77=no hardcode_shlibpath_var_F77=no hardcode_libdir_flag_spec_F77='$wl-rpath,$libdir' export_dynamic_flag_spec_F77='$wl-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds_F77='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) tmp_diet=no if test linux-dietlibc = "$host_os"; then case $cc_basename in diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) esac fi if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ && test no = "$tmp_diet" then tmp_addflag=' $pic_flag' tmp_sharedflag='-shared' case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler whole_archive_flag_spec_F77='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group f77 and f90 compilers whole_archive_flag_spec_F77='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; lf95*) # Lahey Fortran 8.1 whole_archive_flag_spec_F77= tmp_sharedflag='--shared' ;; nagfor*) # NAGFOR 5.3 tmp_sharedflag='-Wl,-shared' ;; xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) tmp_sharedflag='-qmkshrobj' tmp_addflag= ;; nvcc*) # Cuda Compiler Driver 2.2 whole_archive_flag_spec_F77='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' compiler_needs_object_F77=yes ;; esac case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C 5.9 whole_archive_flag_spec_F77='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' compiler_needs_object_F77=yes tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; esac archive_cmds_F77='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' if test yes = "$supports_anon_versioning"; then archive_expsym_cmds_F77='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' fi case $cc_basename in tcc*) hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' export_dynamic_flag_spec_F77='-rdynamic' ;; xlf* | bgf* | bgxlf* | mpixlf*) # IBM XL Fortran 10.1 on PPC cannot create shared libs itself whole_archive_flag_spec_F77='--whole-archive$convenience --no-whole-archive' hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' archive_cmds_F77='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test yes = "$supports_anon_versioning"; then archive_expsym_cmds_F77='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi ;; esac else ld_shlibs_F77=no fi ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds_F77='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then ld_shlibs_F77=no cat <<_LT_EOF 1>&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs_F77=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) ld_shlibs_F77=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs_F77=no fi ;; esac ;; sunos4*) archive_cmds_F77='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= hardcode_direct_F77=yes hardcode_shlibpath_var_F77=no ;; *) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs_F77=no fi ;; esac if test no = "$ld_shlibs_F77"; then runpath_var= hardcode_libdir_flag_spec_F77= export_dynamic_flag_spec_F77= whole_archive_flag_spec_F77= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) allow_undefined_flag_F77=unsupported always_export_symbols_F77=yes archive_expsym_cmds_F77='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. hardcode_minus_L_F77=yes if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. hardcode_direct_F77=unsupported fi ;; aix[4-9]*) if test ia64 = "$host_cpu"; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag= else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to GNU nm, but means don't demangle to AIX nm. # Without the "-l" option, or with the "-B" option, AIX nm treats # weak defined symbols like other global defined symbols, whereas # GNU nm marks them as "W". # While the 'weak' keyword is ignored in the Export File, we need # it in the Import File for the 'aix-soname' feature, so we have # to replace the "-B" option with "-P" for AIX nm. if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then export_symbols_cmds_F77='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' else export_symbols_cmds_F77='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # have runtime linking enabled, and use it for executables. # For shared libraries, we enable/disable runtime linking # depending on the kind of the shared library created - # when "with_aix_soname,aix_use_runtimelinking" is: # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables # "aix,yes" lib.so shared, rtl:yes, for executables # lib.a static archive # "both,no" lib.so.V(shr.o) shared, rtl:yes # lib.a(lib.so.V) shared, rtl:no, for executables # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a(lib.so.V) shared, rtl:no # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a static archive case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) for ld_flag in $LDFLAGS; do if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then aix_use_runtimelinking=yes break fi done if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then # With aix-soname=svr4, we create the lib.so.V shared archives only, # so we don't have lib.a shared libs to link our executables. # We have to force runtime linking in this case. aix_use_runtimelinking=yes LDFLAGS="$LDFLAGS -Wl,-brtl" fi ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds_F77='' hardcode_direct_F77=yes hardcode_direct_absolute_F77=yes hardcode_libdir_separator_F77=':' link_all_deplibs_F77=yes file_list_spec_F77='$wl-f,' case $with_aix_soname,$aix_use_runtimelinking in aix,*) ;; # traditional, no import file svr4,* | *,yes) # use import file # The Import File defines what to hardcode. hardcode_direct_F77=no hardcode_direct_absolute_F77=no ;; esac if test yes = "$GCC"; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`$CC -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct_F77=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L_F77=yes hardcode_libdir_flag_spec_F77='-L$libdir' hardcode_libdir_separator_F77= fi ;; esac shared_flag='-shared' if test yes = "$aix_use_runtimelinking"; then shared_flag="$shared_flag "'$wl-G' fi # Need to ensure runtime linking is disabled for the traditional # shared library, or the linker may eventually find shared libraries # /with/ Import File - we do not want to mix them. shared_flag_aix='-shared' shared_flag_svr4='-shared $wl-G' else # not using gcc if test ia64 = "$host_cpu"; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test yes = "$aix_use_runtimelinking"; then shared_flag='$wl-G' else shared_flag='$wl-bM:SRE' fi shared_flag_aix='$wl-bM:SRE' shared_flag_svr4='$wl-G' fi fi export_dynamic_flag_spec_F77='$wl-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. always_export_symbols_F77=yes if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. allow_undefined_flag_F77='-berok' # Determine the default libpath from the value encoded in an # empty executable. if test set = "${lt_cv_aix_libpath+set}"; then aix_libpath=$lt_cv_aix_libpath else if test ${lt_cv_aix_libpath__F77+y} then : printf %s "(cached) " >&6 else $as_nop cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath__F77=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath__F77"; then lt_cv_aix_libpath__F77=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath__F77"; then lt_cv_aix_libpath__F77=/usr/lib:/lib fi fi aix_libpath=$lt_cv_aix_libpath__F77 fi hardcode_libdir_flag_spec_F77='$wl-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds_F77='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag else if test ia64 = "$host_cpu"; then hardcode_libdir_flag_spec_F77='$wl-R $libdir:/usr/lib:/lib' allow_undefined_flag_F77="-z nodefs" archive_expsym_cmds_F77="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. if test set = "${lt_cv_aix_libpath+set}"; then aix_libpath=$lt_cv_aix_libpath else if test ${lt_cv_aix_libpath__F77+y} then : printf %s "(cached) " >&6 else $as_nop cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath__F77=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath__F77"; then lt_cv_aix_libpath__F77=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath__F77"; then lt_cv_aix_libpath__F77=/usr/lib:/lib fi fi aix_libpath=$lt_cv_aix_libpath__F77 fi hardcode_libdir_flag_spec_F77='$wl-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag_F77=' $wl-bernotok' allow_undefined_flag_F77=' $wl-berok' if test yes = "$with_gnu_ld"; then # We only use this code for GNU lds that support --whole-archive. whole_archive_flag_spec_F77='$wl--whole-archive$convenience $wl--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec_F77='$convenience' fi archive_cmds_need_lc_F77=yes archive_expsym_cmds_F77='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' # -brtl affects multiple linker settings, -berok does not and is overridden later compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' if test svr4 != "$with_aix_soname"; then # This is similar to how AIX traditionally builds its shared libraries. archive_expsym_cmds_F77="$archive_expsym_cmds_F77"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' fi if test aix != "$with_aix_soname"; then archive_expsym_cmds_F77="$archive_expsym_cmds_F77"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' else # used by -dlpreopen to get the symbols archive_expsym_cmds_F77="$archive_expsym_cmds_F77"'~$MV $output_objdir/$realname.d/$soname $output_objdir' fi archive_expsym_cmds_F77="$archive_expsym_cmds_F77"'~$RM -r $output_objdir/$realname.d' fi fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_F77='' ;; m68k) archive_cmds_F77='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec_F77='-L$libdir' hardcode_minus_L_F77=yes ;; esac ;; bsdi[45]*) export_dynamic_flag_spec_F77=-rdynamic ;; cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in cl* | icl*) # Native MSVC or ICC hardcode_libdir_flag_spec_F77=' ' allow_undefined_flag_F77=unsupported always_export_symbols_F77=yes file_list_spec_F77='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. archive_cmds_F77='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' archive_expsym_cmds_F77='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then cp "$export_symbols" "$output_objdir/$soname.def"; echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; else $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, F77)='true' enable_shared_with_static_runtimes_F77=yes exclude_expsyms_F77='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' # Don't use ranlib old_postinstall_cmds_F77='chmod 644 $oldlib' postlink_cmds_F77='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile=$lt_outputfile.exe lt_tool_outputfile=$lt_tool_outputfile.exe ;; esac~ if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # Assume MSVC and ICC wrapper hardcode_libdir_flag_spec_F77=' ' allow_undefined_flag_F77=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. archive_cmds_F77='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. old_archive_from_new_cmds_F77='true' # FIXME: Should let the user specify the lib program. old_archive_cmds_F77='lib -OUT:$oldlib$oldobjs$old_deplibs' enable_shared_with_static_runtimes_F77=yes ;; esac ;; darwin* | rhapsody*) archive_cmds_need_lc_F77=no hardcode_direct_F77=no hardcode_automatic_F77=yes hardcode_shlibpath_var_F77=unsupported if test yes = "$lt_cv_ld_force_load"; then whole_archive_flag_spec_F77='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' compiler_needs_object_F77=yes else whole_archive_flag_spec_F77='' fi link_all_deplibs_F77=yes allow_undefined_flag_F77=$_lt_dar_allow_undefined case $cc_basename in ifort*|nagfor*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test yes = "$_lt_dar_can_shared"; then output_verbose_link_cmd=func_echo_all archive_cmds_F77="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" module_cmds_F77="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" archive_expsym_cmds_F77="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" module_expsym_cmds_F77="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" else ld_shlibs_F77=no fi ;; dgux*) archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec_F77='-L$libdir' hardcode_shlibpath_var_F77=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' hardcode_libdir_flag_spec_F77='-R$libdir' hardcode_direct_F77=yes hardcode_shlibpath_var_F77=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2.*) archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_F77=yes hardcode_minus_L_F77=yes hardcode_shlibpath_var_F77=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly* | midnightbsd*) archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec_F77='-R$libdir' hardcode_direct_F77=yes hardcode_shlibpath_var_F77=no ;; hpux9*) if test yes = "$GCC"; then archive_cmds_F77='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' else archive_cmds_F77='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' fi hardcode_libdir_flag_spec_F77='$wl+b $wl$libdir' hardcode_libdir_separator_F77=: hardcode_direct_F77=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L_F77=yes export_dynamic_flag_spec_F77='$wl-E' ;; hpux10*) if test yes,no = "$GCC,$with_gnu_ld"; then archive_cmds_F77='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_F77='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test no = "$with_gnu_ld"; then hardcode_libdir_flag_spec_F77='$wl+b $wl$libdir' hardcode_libdir_separator_F77=: hardcode_direct_F77=yes hardcode_direct_absolute_F77=yes export_dynamic_flag_spec_F77='$wl-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L_F77=yes fi ;; hpux11*) if test yes,no = "$GCC,$with_gnu_ld"; then case $host_cpu in hppa*64*) archive_cmds_F77='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds_F77='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_F77='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) archive_cmds_F77='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds_F77='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_F77='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac fi if test no = "$with_gnu_ld"; then hardcode_libdir_flag_spec_F77='$wl+b $wl$libdir' hardcode_libdir_separator_F77=: case $host_cpu in hppa*64*|ia64*) hardcode_direct_F77=no hardcode_shlibpath_var_F77=no ;; *) hardcode_direct_F77=yes hardcode_direct_absolute_F77=yes export_dynamic_flag_spec_F77='$wl-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L_F77=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test yes = "$GCC"; then archive_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' # Try to use the -exported_symbol ld option, if it does not # work, assume that -exports_file does not work either and # implicitly export all symbols. # This should be the same for all languages, so no per-tag cache variable. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 printf %s "checking whether the $host_os linker accepts -exported_symbol... " >&6; } if test ${lt_cv_irix_exported_symbol+y} then : printf %s "(cached) " >&6 else $as_nop save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" cat > conftest.$ac_ext <<_ACEOF subroutine foo end _ACEOF if ac_fn_f77_try_link "$LINENO" then : lt_cv_irix_exported_symbol=yes else $as_nop lt_cv_irix_exported_symbol=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } if test yes = "$lt_cv_irix_exported_symbol"; then archive_expsym_cmds_F77='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' fi link_all_deplibs_F77=no else archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' archive_expsym_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' fi archive_cmds_need_lc_F77='no' hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' hardcode_libdir_separator_F77=: inherit_rpath_F77=yes link_all_deplibs_F77=yes ;; linux*) case $cc_basename in tcc*) # Fabrice Bellard et al's Tiny C Compiler ld_shlibs_F77=yes archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' ;; esac ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else archive_cmds_F77='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi hardcode_libdir_flag_spec_F77='-R$libdir' hardcode_direct_F77=yes hardcode_shlibpath_var_F77=no ;; newsos6) archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_F77=yes hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' hardcode_libdir_separator_F77=: hardcode_shlibpath_var_F77=no ;; *nto* | *qnx*) ;; openbsd* | bitrig*) if test -f /usr/libexec/ld.so; then hardcode_direct_F77=yes hardcode_shlibpath_var_F77=no hardcode_direct_absolute_F77=yes if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' hardcode_libdir_flag_spec_F77='$wl-rpath,$libdir' export_dynamic_flag_spec_F77='$wl-E' else archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec_F77='$wl-rpath,$libdir' fi else ld_shlibs_F77=no fi ;; os2*) hardcode_libdir_flag_spec_F77='-L$libdir' hardcode_minus_L_F77=yes allow_undefined_flag_F77=unsupported shrext_cmds=.dll archive_cmds_F77='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' archive_expsym_cmds_F77='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' old_archive_From_new_cmds_F77='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes_F77=yes file_list_spec_F77='@' ;; osf3*) if test yes = "$GCC"; then allow_undefined_flag_F77=' $wl-expect_unresolved $wl\*' archive_cmds_F77='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' else allow_undefined_flag_F77=' -expect_unresolved \*' archive_cmds_F77='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' fi archive_cmds_need_lc_F77='no' hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' hardcode_libdir_separator_F77=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test yes = "$GCC"; then allow_undefined_flag_F77=' $wl-expect_unresolved $wl\*' archive_cmds_F77='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' hardcode_libdir_flag_spec_F77='$wl-rpath $wl$libdir' else allow_undefined_flag_F77=' -expect_unresolved \*' archive_cmds_F77='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' archive_expsym_cmds_F77='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' # Both c and cxx compiler support -rpath directly hardcode_libdir_flag_spec_F77='-rpath $libdir' fi archive_cmds_need_lc_F77='no' hardcode_libdir_separator_F77=: ;; solaris*) no_undefined_flag_F77=' -z defs' if test yes = "$GCC"; then wlarc='$wl' archive_cmds_F77='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' else case `$CC -V 2>&1` in *"Compilers 5.0"*) wlarc='' archive_cmds_F77='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' ;; *) wlarc='$wl' archive_cmds_F77='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_F77='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' ;; esac fi hardcode_libdir_flag_spec_F77='-R$libdir' hardcode_shlibpath_var_F77=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands '-z linker_flag'. GCC discards it without '$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test yes = "$GCC"; then whole_archive_flag_spec_F77='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' else whole_archive_flag_spec_F77='-z allextract$convenience -z defaultextract' fi ;; esac link_all_deplibs_F77=yes ;; sunos4*) if test sequent = "$host_vendor"; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. archive_cmds_F77='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_F77='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi hardcode_libdir_flag_spec_F77='-L$libdir' hardcode_direct_F77=yes hardcode_minus_L_F77=yes hardcode_shlibpath_var_F77=no ;; sysv4) case $host_vendor in sni) archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_F77=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. archive_cmds_F77='$LD -G -o $lib $libobjs $deplibs $linker_flags' reload_cmds_F77='$CC -r -o $output$reload_objs' hardcode_direct_F77=no ;; motorola) archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_F77=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' hardcode_shlibpath_var_F77=no ;; sysv4.3*) archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var_F77=no export_dynamic_flag_spec_F77='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var_F77=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes ld_shlibs_F77=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag_F77='$wl-z,text' archive_cmds_need_lc_F77=no hardcode_shlibpath_var_F77=no runpath_var='LD_RUN_PATH' if test yes = "$GCC"; then archive_cmds_F77='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_F77='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_F77='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_F77='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We CANNOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. no_undefined_flag_F77='$wl-z,text' allow_undefined_flag_F77='$wl-z,nodefs' archive_cmds_need_lc_F77=no hardcode_shlibpath_var_F77=no hardcode_libdir_flag_spec_F77='$wl-R,$libdir' hardcode_libdir_separator_F77=':' link_all_deplibs_F77=yes export_dynamic_flag_spec_F77='$wl-Bexport' runpath_var='LD_RUN_PATH' if test yes = "$GCC"; then archive_cmds_F77='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_F77='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_F77='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_F77='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) archive_cmds_F77='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec_F77='-L$libdir' hardcode_shlibpath_var_F77=no ;; *) ld_shlibs_F77=no ;; esac if test sni = "$host_vendor"; then case $host in sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) export_dynamic_flag_spec_F77='$wl-Blargedynsym' ;; esac fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_F77" >&5 printf "%s\n" "$ld_shlibs_F77" >&6; } test no = "$ld_shlibs_F77" && can_build_shared=no with_gnu_ld_F77=$with_gnu_ld # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc_F77" in x|xyes) # Assume -lc should be added archive_cmds_need_lc_F77=yes if test yes,yes = "$GCC,$enable_shared"; then case $archive_cmds_F77 in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 printf %s "checking whether -lc should be explicitly linked in... " >&6; } if test ${lt_cv_archive_cmds_need_lc_F77+y} then : printf %s "(cached) " >&6 else $as_nop $RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl_F77 pic_flag=$lt_prog_compiler_pic_F77 compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag_F77 allow_undefined_flag_F77= if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_F77 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 (eval $archive_cmds_F77 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then lt_cv_archive_cmds_need_lc_F77=no else lt_cv_archive_cmds_need_lc_F77=yes fi allow_undefined_flag_F77=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_F77" >&5 printf "%s\n" "$lt_cv_archive_cmds_need_lc_F77" >&6; } archive_cmds_need_lc_F77=$lt_cv_archive_cmds_need_lc_F77 ;; esac fi ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 printf %s "checking dynamic linker characteristics... " >&6; } library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=.so postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='$libname$release$shared_ext$major' ;; aix[4-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test ia64 = "$host_cpu"; then # AIX 5 supports IA64 library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line '#! .'. This would cause the generated library to # depend on '.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # Using Import Files as archive members, it is possible to support # filename-based versioning of shared library archives on AIX. While # this would work for both with and without runtime linking, it will # prevent static linking of such archives. So we do filename-based # shared library versioning with .so extension only, which is used # when both runtime linking and shared linking is enabled. # Unfortunately, runtime linking may impact performance, so we do # not want this to be the default eventually. Also, we use the # versioned .so libs for executables only if there is the -brtl # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. # To allow for filename-based versioning support, we need to create # libNAME.so.V as an archive file, containing: # *) an Import File, referring to the versioned filename of the # archive as well as the shared archive member, telling the # bitwidth (32 or 64) of that shared object, and providing the # list of exported symbols of that shared object, eventually # decorated with the 'weak' keyword # *) the shared object with the F_LOADONLY flag set, to really avoid # it being seen by the linker. # At run time we better use the real file rather than another symlink, # but for link time we create the symlink libNAME.so -> libNAME.so.V case $with_aix_soname,$aix_use_runtimelinking in # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. aix,yes) # traditional libtool dynamic_linker='AIX unversionable lib.so' # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; aix,no) # traditional AIX only dynamic_linker='AIX lib.a(lib.so.V)' # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' ;; svr4,*) # full svr4 only dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,yes) # both, prefer svr4 dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # unpreferred sharedlib libNAME.a needs extra handling postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,no) # both, prefer aix dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' ;; esac shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='$libname$shared_ext' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl* | *,icl*) # Native MSVC or ICC libname_spec='$name' soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' library_names_spec='$libname.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec=$LIB if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC and ICC wrapper library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' soname_spec='$libname$release$major$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly* | midnightbsd*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[23].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=no sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' if test 32 = "$HPUX_IA64_MODE"; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" sys_lib_dlsearch_path_spec=/usr/lib/hpux32 else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" sys_lib_dlsearch_path_spec=/usr/lib/hpux64 fi ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[3-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test yes = "$lt_cv_prog_gnu_ld"; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; linux*android*) version_type=none # Android doesn't support versioned libraries. need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext' soname_spec='$libname$release$shared_ext' finish_cmds= shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes dynamic_linker='Android linker' # Don't embed -rpath directories since the linker doesn't support them. hardcode_libdir_flag_spec_F77='-L$libdir' ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH if test ${lt_cv_shlibpath_overrides_runpath+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_F77\"; \ LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_F77\"" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null then : lt_cv_shlibpath_overrides_runpath=yes fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS libdir=$save_libdir fi shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Ideally, we could use ldconfig to report *all* directores which are # searched for libraries, however this is still not possible. Aside from not # being certain /sbin/ldconfig is available, command # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, # even though it is searched at run-time. Try to do the best guess by # appending ld.so.conf contents (and includes) to the search path. if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsdelf*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='NetBSD ld.elf_so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd* | bitrig*) version_type=sunos sys_lib_dlsearch_path_spec=/usr/lib need_lib_prefix=no if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then need_version=no else need_version=yes fi library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; os2*) libname_spec='$name' version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no # OS/2 can only load a DLL with a base name of 8 characters or less. soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; v=$($ECHO $release$versuffix | tr -d .-); n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); $ECHO $n$v`$shared_ext' library_names_spec='${libname}_dll.$libext' dynamic_linker='OS/2 ld.exe' shlibpath_var=BEGINLIBPATH sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test yes = "$with_gnu_ld"; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec; then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' soname_spec='$libname$shared_ext.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=sco need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test yes = "$with_gnu_ld"; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 printf "%s\n" "$dynamic_linker" >&6; } test no = "$dynamic_linker" && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test yes = "$GCC"; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec fi if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec fi # remember unaugmented sys_lib_dlsearch_path content for libtool script decls... configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec # ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" # to be used as default LT_SYS_LIBRARY_PATH value in generated libtool configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 printf %s "checking how to hardcode library paths into programs... " >&6; } hardcode_action_F77= if test -n "$hardcode_libdir_flag_spec_F77" || test -n "$runpath_var_F77" || test yes = "$hardcode_automatic_F77"; then # We can hardcode non-existent directories. if test no != "$hardcode_direct_F77" && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, F77)" && test no != "$hardcode_minus_L_F77"; then # Linking always hardcodes the temporary library directory. hardcode_action_F77=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action_F77=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action_F77=unsupported fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_F77" >&5 printf "%s\n" "$hardcode_action_F77" >&6; } if test relink = "$hardcode_action_F77" || test yes = "$inherit_rpath_F77"; then # Fast installation is not supported enable_fast_install=no elif test yes = "$shlibpath_overrides_runpath" || test no = "$enable_shared"; then # Fast installation is not necessary enable_fast_install=needless fi fi # test -n "$compiler" GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS fi # test yes != "$_lt_disable_F77" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu if test -z "$FC" || test no = "$FC"; then _lt_disable_FC=yes fi archive_cmds_need_lc_FC=no allow_undefined_flag_FC= always_export_symbols_FC=no archive_expsym_cmds_FC= export_dynamic_flag_spec_FC= hardcode_direct_FC=no hardcode_direct_absolute_FC=no hardcode_libdir_flag_spec_FC= hardcode_libdir_separator_FC= hardcode_minus_L_FC=no hardcode_automatic_FC=no inherit_rpath_FC=no module_cmds_FC= module_expsym_cmds_FC= link_all_deplibs_FC=unknown old_archive_cmds_FC=$old_archive_cmds reload_flag_FC=$reload_flag reload_cmds_FC=$reload_cmds no_undefined_flag_FC= whole_archive_flag_spec_FC= enable_shared_with_static_runtimes_FC=no # Source file extension for fc test sources. ac_ext=${ac_fc_srcext-f} # Object file extension for compiled fc test sources. objext=o objext_FC=$objext # No sense in running all these tests if we already determined that # the FC compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test yes != "$_lt_disable_FC"; then # Code to be used in simple compile tests lt_simple_compile_test_code="\ subroutine t return end " # Code to be used in simple link tests lt_simple_link_test_code="\ program t end " # ltmain only uses $CC for tagged configurations so make sure $CC is set. # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_GCC=$GCC lt_save_CFLAGS=$CFLAGS CC=${FC-"f95"} CFLAGS=$FCFLAGS compiler=$CC GCC=$ac_cv_fc_compiler_gnu compiler_FC=$CC func_cc_basename $compiler cc_basename=$func_cc_basename_result if test -n "$compiler"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 printf %s "checking if libtool supports shared libraries... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 printf "%s\n" "$can_build_shared" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 printf %s "checking whether to build shared libraries... " >&6; } test no = "$can_build_shared" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test yes = "$enable_shared" && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[4-9]*) if test ia64 != "$host_cpu"; then case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in yes,aix,yes) ;; # shared object as lib.so file only yes,svr4,*) ;; # shared object as lib.so archive member only yes,*) enable_static=no ;; # shared object in lib.a archive as well esac fi ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 printf "%s\n" "$enable_shared" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 printf %s "checking whether to build static libraries... " >&6; } # Make sure either enable_shared or enable_static is yes. test yes = "$enable_shared" || enable_static=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 printf "%s\n" "$enable_static" >&6; } GCC_FC=$ac_cv_fc_compiler_gnu LD_FC=$LD ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... # Dependencies to place before and after the object being linked: predep_objects_FC= postdep_objects_FC= predeps_FC= postdeps_FC= compiler_lib_search_path_FC= cat > conftest.$ac_ext <<_LT_EOF subroutine foo implicit none integer a a=0 return end _LT_EOF _lt_libdeps_save_CFLAGS=$CFLAGS case "$CC $CFLAGS " in #( *\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; *\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; *\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; esac if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then # Parse the compiler output and extract the necessary # objects, libraries and library flags. # Sentinel used to keep track of whether or not we are before # the conftest object file. pre_test_object_deps_done=no for p in `eval "$output_verbose_link_cmd"`; do case $prev$p in -L* | -R* | -l*) # Some compilers place space between "-{L,R}" and the path. # Remove the space. if test x-L = "$p" || test x-R = "$p"; then prev=$p continue fi # Expand the sysroot to ease extracting the directories later. if test -z "$prev"; then case $p in -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; esac fi case $p in =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; esac if test no = "$pre_test_object_deps_done"; then case $prev in -L | -R) # Internal compiler library paths should come after those # provided the user. The postdeps already come after the # user supplied libs so there is no need to process them. if test -z "$compiler_lib_search_path_FC"; then compiler_lib_search_path_FC=$prev$p else compiler_lib_search_path_FC="${compiler_lib_search_path_FC} $prev$p" fi ;; # The "-l" case would never come before the object being # linked, so don't bother handling this case. esac else if test -z "$postdeps_FC"; then postdeps_FC=$prev$p else postdeps_FC="${postdeps_FC} $prev$p" fi fi prev= ;; *.lto.$objext) ;; # Ignore GCC LTO objects *.$objext) # This assumes that the test object file only shows up # once in the compiler output. if test "$p" = "conftest.$objext"; then pre_test_object_deps_done=yes continue fi if test no = "$pre_test_object_deps_done"; then if test -z "$predep_objects_FC"; then predep_objects_FC=$p else predep_objects_FC="$predep_objects_FC $p" fi else if test -z "$postdep_objects_FC"; then postdep_objects_FC=$p else postdep_objects_FC="$postdep_objects_FC $p" fi fi ;; *) ;; # Ignore the rest. esac done # Clean up. rm -f a.out a.exe else echo "libtool.m4: error: problem compiling FC test program" fi $RM -f confest.$objext CFLAGS=$_lt_libdeps_save_CFLAGS # PORTME: override above test on systems where it is broken case " $postdeps_FC " in *" -lc "*) archive_cmds_need_lc_FC=no ;; esac compiler_lib_search_dirs_FC= if test -n "${compiler_lib_search_path_FC}"; then compiler_lib_search_dirs_FC=`echo " ${compiler_lib_search_path_FC}" | $SED -e 's! -L! !g' -e 's!^ !!'` fi lt_prog_compiler_wl_FC= lt_prog_compiler_pic_FC= lt_prog_compiler_static_FC= if test yes = "$GCC"; then lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_static_FC='-static' case $host_os in aix*) # All AIX code is PIC. if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_FC='-Bstatic' fi lt_prog_compiler_pic_FC='-fPIC' ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support lt_prog_compiler_pic_FC='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the '-m68020' flag to GCC prevents building anything better, # like '-m68040'. lt_prog_compiler_pic_FC='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic_FC='-DDLL_EXPORT' case $host_os in os2*) lt_prog_compiler_static_FC='$wl-static' ;; esac ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic_FC='-fno-common' ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. lt_prog_compiler_static_FC= ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) # +Z the default ;; *) lt_prog_compiler_pic_FC='-fPIC' ;; esac ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. lt_prog_compiler_can_build_shared_FC=no enable_shared=no ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic_FC='-fPIC -shared' ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic_FC=-Kconform_pic fi ;; *) lt_prog_compiler_pic_FC='-fPIC' ;; esac case $cc_basename in nvcc*) # Cuda Compiler Driver 2.2 lt_prog_compiler_wl_FC='-Xlinker ' if test -n "$lt_prog_compiler_pic_FC"; then lt_prog_compiler_pic_FC="-Xcompiler $lt_prog_compiler_pic_FC" fi ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) lt_prog_compiler_wl_FC='-Wl,' if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_FC='-Bstatic' else lt_prog_compiler_static_FC='-bnso -bI:/lib/syscalls.exp' fi ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic_FC='-fno-common' case $cc_basename in nagfor*) # NAG Fortran compiler lt_prog_compiler_wl_FC='-Wl,-Wl,,' lt_prog_compiler_pic_FC='-PIC' lt_prog_compiler_static_FC='-Bstatic' ;; esac ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic_FC='-DDLL_EXPORT' case $host_os in os2*) lt_prog_compiler_static_FC='$wl-static' ;; esac ;; hpux9* | hpux10* | hpux11*) lt_prog_compiler_wl_FC='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic_FC='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? lt_prog_compiler_static_FC='$wl-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) lt_prog_compiler_wl_FC='-Wl,' # PIC (with -KPIC) is the default. lt_prog_compiler_static_FC='-non_shared' ;; linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in # old Intel for x86_64, which still supported -KPIC. ecc*) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-KPIC' lt_prog_compiler_static_FC='-static' ;; # flang / f18. f95 an alias for gfortran or flang on Debian flang* | f18* | f95*) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-fPIC' lt_prog_compiler_static_FC='-static' ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-fPIC' lt_prog_compiler_static_FC='-static' ;; # Lahey Fortran 8.1. lf95*) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='--shared' lt_prog_compiler_static_FC='--static' ;; nagfor*) # NAG Fortran compiler lt_prog_compiler_wl_FC='-Wl,-Wl,,' lt_prog_compiler_pic_FC='-PIC' lt_prog_compiler_static_FC='-Bstatic' ;; tcc*) # Fabrice Bellard et al's Tiny C Compiler lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-fPIC' lt_prog_compiler_static_FC='-static' ;; pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-fpic' lt_prog_compiler_static_FC='-Bstatic' ;; ccc*) lt_prog_compiler_wl_FC='-Wl,' # All Alpha code is PIC. lt_prog_compiler_static_FC='-non_shared' ;; xl* | bgxl* | bgf* | mpixl*) # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-qpic' lt_prog_compiler_static_FC='-qstaticlink' ;; *) case `$CC -V 2>&1 | $SED 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker lt_prog_compiler_pic_FC='-KPIC' lt_prog_compiler_static_FC='-Bstatic' lt_prog_compiler_wl_FC='' ;; *Sun\ F* | *Sun*Fortran*) lt_prog_compiler_pic_FC='-KPIC' lt_prog_compiler_static_FC='-Bstatic' lt_prog_compiler_wl_FC='-Qoption ld ' ;; *Sun\ C*) # Sun C 5.9 lt_prog_compiler_pic_FC='-KPIC' lt_prog_compiler_static_FC='-Bstatic' lt_prog_compiler_wl_FC='-Wl,' ;; *Intel*\ [CF]*Compiler*) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-fPIC' lt_prog_compiler_static_FC='-static' ;; *Portland\ Group*) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-fpic' lt_prog_compiler_static_FC='-Bstatic' ;; esac ;; esac ;; newsos6) lt_prog_compiler_pic_FC='-KPIC' lt_prog_compiler_static_FC='-Bstatic' ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic_FC='-fPIC -shared' ;; osf3* | osf4* | osf5*) lt_prog_compiler_wl_FC='-Wl,' # All OSF/1 code is PIC. lt_prog_compiler_static_FC='-non_shared' ;; rdos*) lt_prog_compiler_static_FC='-non_shared' ;; solaris*) lt_prog_compiler_pic_FC='-KPIC' lt_prog_compiler_static_FC='-Bstatic' case $cc_basename in f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) lt_prog_compiler_wl_FC='-Qoption ld ';; *) lt_prog_compiler_wl_FC='-Wl,';; esac ;; sunos4*) lt_prog_compiler_wl_FC='-Qoption ld ' lt_prog_compiler_pic_FC='-PIC' lt_prog_compiler_static_FC='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-KPIC' lt_prog_compiler_static_FC='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic_FC='-Kconform_pic' lt_prog_compiler_static_FC='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_pic_FC='-KPIC' lt_prog_compiler_static_FC='-Bstatic' ;; unicos*) lt_prog_compiler_wl_FC='-Wl,' lt_prog_compiler_can_build_shared_FC=no ;; uts4*) lt_prog_compiler_pic_FC='-pic' lt_prog_compiler_static_FC='-Bstatic' ;; *) lt_prog_compiler_can_build_shared_FC=no ;; esac fi case $host_os in # For platforms that do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic_FC= ;; *) lt_prog_compiler_pic_FC="$lt_prog_compiler_pic_FC" ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 printf %s "checking for $compiler option to produce PIC... " >&6; } if test ${lt_cv_prog_compiler_pic_FC+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_pic_FC=$lt_prog_compiler_pic_FC fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_FC" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic_FC" >&6; } lt_prog_compiler_pic_FC=$lt_cv_prog_compiler_pic_FC # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic_FC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_FC works" >&5 printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic_FC works... " >&6; } if test ${lt_cv_prog_compiler_pic_works_FC+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_pic_works_FC=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic_FC" ## exclude from sc_useless_quotes_in_assignment # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_pic_works_FC=yes fi fi $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_FC" >&5 printf "%s\n" "$lt_cv_prog_compiler_pic_works_FC" >&6; } if test yes = "$lt_cv_prog_compiler_pic_works_FC"; then case $lt_prog_compiler_pic_FC in "" | " "*) ;; *) lt_prog_compiler_pic_FC=" $lt_prog_compiler_pic_FC" ;; esac else lt_prog_compiler_pic_FC= lt_prog_compiler_can_build_shared_FC=no fi fi # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl_FC eval lt_tmp_static_flag=\"$lt_prog_compiler_static_FC\" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } if test ${lt_cv_prog_compiler_static_works_FC+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_static_works_FC=no save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_static_works_FC=yes fi else lt_cv_prog_compiler_static_works_FC=yes fi fi $RM -r conftest* LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_FC" >&5 printf "%s\n" "$lt_cv_prog_compiler_static_works_FC" >&6; } if test yes = "$lt_cv_prog_compiler_static_works_FC"; then : else lt_prog_compiler_static_FC= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o_FC+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_c_o_FC=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_FC=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_FC" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o_FC" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if test ${lt_cv_prog_compiler_c_o_FC+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_prog_compiler_c_o_FC=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_FC=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_FC" >&5 printf "%s\n" "$lt_cv_prog_compiler_c_o_FC" >&6; } hard_links=nottested if test no = "$lt_cv_prog_compiler_c_o_FC" && test no != "$need_locks"; then # do not overwrite the value of need_locks provided by the user { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 printf %s "checking if we can lock with hard links... " >&6; } hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 printf "%s\n" "$hard_links" >&6; } if test no = "$hard_links"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } runpath_var= allow_undefined_flag_FC= always_export_symbols_FC=no archive_cmds_FC= archive_expsym_cmds_FC= compiler_needs_object_FC=no enable_shared_with_static_runtimes_FC=no export_dynamic_flag_spec_FC= export_symbols_cmds_FC='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' hardcode_automatic_FC=no hardcode_direct_FC=no hardcode_direct_absolute_FC=no hardcode_libdir_flag_spec_FC= hardcode_libdir_separator_FC= hardcode_minus_L_FC=no hardcode_shlibpath_var_FC=unsupported inherit_rpath_FC=no link_all_deplibs_FC=unknown module_cmds_FC= module_expsym_cmds_FC= old_archive_from_new_cmds_FC= old_archive_from_expsyms_cmds_FC= thread_safe_flag_spec_FC= whole_archive_flag_spec_FC= # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list include_expsyms_FC= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ' (' and ')$', so one must not match beginning or # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', # as well as any symbol that contains 'd'. exclude_expsyms_FC='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. # Exclude shared library initialization/finalization symbols. extract_expsyms_cmds= case $host_os in cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. if test yes != "$GCC"; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) with_gnu_ld=yes ;; openbsd* | bitrig*) with_gnu_ld=no ;; linux* | k*bsd*-gnu | gnu*) link_all_deplibs_FC=no ;; esac ld_shlibs_FC=yes # On some targets, GNU ld is compatible enough with the native linker # that we're better off using the native interface for both. lt_use_gnu_ld_interface=no if test yes = "$with_gnu_ld"; then case $host_os in aix*) # The AIX port of GNU ld has always aspired to compatibility # with the native linker. However, as the warning in the GNU ld # block says, versions before 2.19.5* couldn't really create working # shared libraries, regardless of the interface used. case `$LD -v 2>&1` in *\ \(GNU\ Binutils\)\ 2.19.5*) ;; *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; *\ \(GNU\ Binutils\)\ [3-9]*) ;; *) lt_use_gnu_ld_interface=yes ;; esac ;; *) lt_use_gnu_ld_interface=yes ;; esac fi if test yes = "$lt_use_gnu_ld_interface"; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='$wl' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' export_dynamic_flag_spec_FC='$wl--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then whole_archive_flag_spec_FC=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' else whole_archive_flag_spec_FC= fi supports_anon_versioning=no case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix[3-9]*) # On AIX/PPC, the GNU linker is very broken if test ia64 != "$host_cpu"; then ld_shlibs_FC=no cat <<_LT_EOF 1>&2 *** Warning: the GNU linker, at least up to release 2.19, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to install binutils *** 2.20 or above, or modify your PATH so that a non-GNU linker is found. *** You will then need to restart the configuration process. _LT_EOF fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_FC='' ;; m68k) archive_cmds_FC='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec_FC='-L$libdir' hardcode_minus_L_FC=yes ;; esac ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then allow_undefined_flag_FC=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds_FC='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' else ld_shlibs_FC=no fi ;; cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, FC) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec_FC='-L$libdir' export_dynamic_flag_spec_FC='$wl--export-all-symbols' allow_undefined_flag_FC=unsupported always_export_symbols_FC=no enable_shared_with_static_runtimes_FC=yes export_symbols_cmds_FC='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' exclude_expsyms_FC='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file, use it as # is; otherwise, prepend EXPORTS... archive_expsym_cmds_FC='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs_FC=no fi ;; haiku*) archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' link_all_deplibs_FC=yes ;; os2*) hardcode_libdir_flag_spec_FC='-L$libdir' hardcode_minus_L_FC=yes allow_undefined_flag_FC=unsupported shrext_cmds=.dll archive_cmds_FC='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' archive_expsym_cmds_FC='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' old_archive_From_new_cmds_FC='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes_FC=yes file_list_spec_FC='@' ;; interix[3-9]*) hardcode_direct_FC=no hardcode_shlibpath_var_FC=no hardcode_libdir_flag_spec_FC='$wl-rpath,$libdir' export_dynamic_flag_spec_FC='$wl-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds_FC='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) tmp_diet=no if test linux-dietlibc = "$host_os"; then case $cc_basename in diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) esac fi if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ && test no = "$tmp_diet" then tmp_addflag=' $pic_flag' tmp_sharedflag='-shared' case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group f77 and f90 compilers whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; lf95*) # Lahey Fortran 8.1 whole_archive_flag_spec_FC= tmp_sharedflag='--shared' ;; nagfor*) # NAGFOR 5.3 tmp_sharedflag='-Wl,-shared' ;; xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) tmp_sharedflag='-qmkshrobj' tmp_addflag= ;; nvcc*) # Cuda Compiler Driver 2.2 whole_archive_flag_spec_FC='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' compiler_needs_object_FC=yes ;; esac case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C 5.9 whole_archive_flag_spec_FC='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' compiler_needs_object_FC=yes tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; esac archive_cmds_FC='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' if test yes = "$supports_anon_versioning"; then archive_expsym_cmds_FC='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' fi case $cc_basename in tcc*) hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' export_dynamic_flag_spec_FC='-rdynamic' ;; xlf* | bgf* | bgxlf* | mpixlf*) # IBM XL Fortran 10.1 on PPC cannot create shared libs itself whole_archive_flag_spec_FC='--whole-archive$convenience --no-whole-archive' hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' archive_cmds_FC='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test yes = "$supports_anon_versioning"; then archive_expsym_cmds_FC='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi ;; esac else ld_shlibs_FC=no fi ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds_FC='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then ld_shlibs_FC=no cat <<_LT_EOF 1>&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs_FC=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) ld_shlibs_FC=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs_FC=no fi ;; esac ;; sunos4*) archive_cmds_FC='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= hardcode_direct_FC=yes hardcode_shlibpath_var_FC=no ;; *) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs_FC=no fi ;; esac if test no = "$ld_shlibs_FC"; then runpath_var= hardcode_libdir_flag_spec_FC= export_dynamic_flag_spec_FC= whole_archive_flag_spec_FC= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) allow_undefined_flag_FC=unsupported always_export_symbols_FC=yes archive_expsym_cmds_FC='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. hardcode_minus_L_FC=yes if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. hardcode_direct_FC=unsupported fi ;; aix[4-9]*) if test ia64 = "$host_cpu"; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag= else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to GNU nm, but means don't demangle to AIX nm. # Without the "-l" option, or with the "-B" option, AIX nm treats # weak defined symbols like other global defined symbols, whereas # GNU nm marks them as "W". # While the 'weak' keyword is ignored in the Export File, we need # it in the Import File for the 'aix-soname' feature, so we have # to replace the "-B" option with "-P" for AIX nm. if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then export_symbols_cmds_FC='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' else export_symbols_cmds_FC='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # have runtime linking enabled, and use it for executables. # For shared libraries, we enable/disable runtime linking # depending on the kind of the shared library created - # when "with_aix_soname,aix_use_runtimelinking" is: # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables # "aix,yes" lib.so shared, rtl:yes, for executables # lib.a static archive # "both,no" lib.so.V(shr.o) shared, rtl:yes # lib.a(lib.so.V) shared, rtl:no, for executables # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a(lib.so.V) shared, rtl:no # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a static archive case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) for ld_flag in $LDFLAGS; do if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then aix_use_runtimelinking=yes break fi done if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then # With aix-soname=svr4, we create the lib.so.V shared archives only, # so we don't have lib.a shared libs to link our executables. # We have to force runtime linking in this case. aix_use_runtimelinking=yes LDFLAGS="$LDFLAGS -Wl,-brtl" fi ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds_FC='' hardcode_direct_FC=yes hardcode_direct_absolute_FC=yes hardcode_libdir_separator_FC=':' link_all_deplibs_FC=yes file_list_spec_FC='$wl-f,' case $with_aix_soname,$aix_use_runtimelinking in aix,*) ;; # traditional, no import file svr4,* | *,yes) # use import file # The Import File defines what to hardcode. hardcode_direct_FC=no hardcode_direct_absolute_FC=no ;; esac if test yes = "$GCC"; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`$CC -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct_FC=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L_FC=yes hardcode_libdir_flag_spec_FC='-L$libdir' hardcode_libdir_separator_FC= fi ;; esac shared_flag='-shared' if test yes = "$aix_use_runtimelinking"; then shared_flag="$shared_flag "'$wl-G' fi # Need to ensure runtime linking is disabled for the traditional # shared library, or the linker may eventually find shared libraries # /with/ Import File - we do not want to mix them. shared_flag_aix='-shared' shared_flag_svr4='-shared $wl-G' else # not using gcc if test ia64 = "$host_cpu"; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test yes = "$aix_use_runtimelinking"; then shared_flag='$wl-G' else shared_flag='$wl-bM:SRE' fi shared_flag_aix='$wl-bM:SRE' shared_flag_svr4='$wl-G' fi fi export_dynamic_flag_spec_FC='$wl-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. always_export_symbols_FC=yes if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. allow_undefined_flag_FC='-berok' # Determine the default libpath from the value encoded in an # empty executable. if test set = "${lt_cv_aix_libpath+set}"; then aix_libpath=$lt_cv_aix_libpath else if test ${lt_cv_aix_libpath__FC+y} then : printf %s "(cached) " >&6 else $as_nop cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath__FC=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath__FC"; then lt_cv_aix_libpath__FC=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath__FC"; then lt_cv_aix_libpath__FC=/usr/lib:/lib fi fi aix_libpath=$lt_cv_aix_libpath__FC fi hardcode_libdir_flag_spec_FC='$wl-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds_FC='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag else if test ia64 = "$host_cpu"; then hardcode_libdir_flag_spec_FC='$wl-R $libdir:/usr/lib:/lib' allow_undefined_flag_FC="-z nodefs" archive_expsym_cmds_FC="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. if test set = "${lt_cv_aix_libpath+set}"; then aix_libpath=$lt_cv_aix_libpath else if test ${lt_cv_aix_libpath__FC+y} then : printf %s "(cached) " >&6 else $as_nop cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath__FC=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath__FC"; then lt_cv_aix_libpath__FC=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath__FC"; then lt_cv_aix_libpath__FC=/usr/lib:/lib fi fi aix_libpath=$lt_cv_aix_libpath__FC fi hardcode_libdir_flag_spec_FC='$wl-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag_FC=' $wl-bernotok' allow_undefined_flag_FC=' $wl-berok' if test yes = "$with_gnu_ld"; then # We only use this code for GNU lds that support --whole-archive. whole_archive_flag_spec_FC='$wl--whole-archive$convenience $wl--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec_FC='$convenience' fi archive_cmds_need_lc_FC=yes archive_expsym_cmds_FC='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' # -brtl affects multiple linker settings, -berok does not and is overridden later compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' if test svr4 != "$with_aix_soname"; then # This is similar to how AIX traditionally builds its shared libraries. archive_expsym_cmds_FC="$archive_expsym_cmds_FC"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' fi if test aix != "$with_aix_soname"; then archive_expsym_cmds_FC="$archive_expsym_cmds_FC"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' else # used by -dlpreopen to get the symbols archive_expsym_cmds_FC="$archive_expsym_cmds_FC"'~$MV $output_objdir/$realname.d/$soname $output_objdir' fi archive_expsym_cmds_FC="$archive_expsym_cmds_FC"'~$RM -r $output_objdir/$realname.d' fi fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' archive_expsym_cmds_FC='' ;; m68k) archive_cmds_FC='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec_FC='-L$libdir' hardcode_minus_L_FC=yes ;; esac ;; bsdi[45]*) export_dynamic_flag_spec_FC=-rdynamic ;; cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in cl* | icl*) # Native MSVC or ICC hardcode_libdir_flag_spec_FC=' ' allow_undefined_flag_FC=unsupported always_export_symbols_FC=yes file_list_spec_FC='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. archive_cmds_FC='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' archive_expsym_cmds_FC='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then cp "$export_symbols" "$output_objdir/$soname.def"; echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; else $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, FC)='true' enable_shared_with_static_runtimes_FC=yes exclude_expsyms_FC='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' export_symbols_cmds_FC='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' # Don't use ranlib old_postinstall_cmds_FC='chmod 644 $oldlib' postlink_cmds_FC='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile=$lt_outputfile.exe lt_tool_outputfile=$lt_tool_outputfile.exe ;; esac~ if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # Assume MSVC and ICC wrapper hardcode_libdir_flag_spec_FC=' ' allow_undefined_flag_FC=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. archive_cmds_FC='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. old_archive_from_new_cmds_FC='true' # FIXME: Should let the user specify the lib program. old_archive_cmds_FC='lib -OUT:$oldlib$oldobjs$old_deplibs' enable_shared_with_static_runtimes_FC=yes ;; esac ;; darwin* | rhapsody*) archive_cmds_need_lc_FC=no hardcode_direct_FC=no hardcode_automatic_FC=yes hardcode_shlibpath_var_FC=unsupported if test yes = "$lt_cv_ld_force_load"; then whole_archive_flag_spec_FC='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' compiler_needs_object_FC=yes else whole_archive_flag_spec_FC='' fi link_all_deplibs_FC=yes allow_undefined_flag_FC=$_lt_dar_allow_undefined case $cc_basename in ifort*|nagfor*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test yes = "$_lt_dar_can_shared"; then output_verbose_link_cmd=func_echo_all archive_cmds_FC="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" module_cmds_FC="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" archive_expsym_cmds_FC="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" module_expsym_cmds_FC="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" else ld_shlibs_FC=no fi ;; dgux*) archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec_FC='-L$libdir' hardcode_shlibpath_var_FC=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) archive_cmds_FC='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' hardcode_libdir_flag_spec_FC='-R$libdir' hardcode_direct_FC=yes hardcode_shlibpath_var_FC=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2.*) archive_cmds_FC='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_FC=yes hardcode_minus_L_FC=yes hardcode_shlibpath_var_FC=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly* | midnightbsd*) archive_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec_FC='-R$libdir' hardcode_direct_FC=yes hardcode_shlibpath_var_FC=no ;; hpux9*) if test yes = "$GCC"; then archive_cmds_FC='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' else archive_cmds_FC='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' fi hardcode_libdir_flag_spec_FC='$wl+b $wl$libdir' hardcode_libdir_separator_FC=: hardcode_direct_FC=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L_FC=yes export_dynamic_flag_spec_FC='$wl-E' ;; hpux10*) if test yes,no = "$GCC,$with_gnu_ld"; then archive_cmds_FC='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_FC='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test no = "$with_gnu_ld"; then hardcode_libdir_flag_spec_FC='$wl+b $wl$libdir' hardcode_libdir_separator_FC=: hardcode_direct_FC=yes hardcode_direct_absolute_FC=yes export_dynamic_flag_spec_FC='$wl-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L_FC=yes fi ;; hpux11*) if test yes,no = "$GCC,$with_gnu_ld"; then case $host_cpu in hppa*64*) archive_cmds_FC='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds_FC='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_FC='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) archive_cmds_FC='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds_FC='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_FC='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac fi if test no = "$with_gnu_ld"; then hardcode_libdir_flag_spec_FC='$wl+b $wl$libdir' hardcode_libdir_separator_FC=: case $host_cpu in hppa*64*|ia64*) hardcode_direct_FC=no hardcode_shlibpath_var_FC=no ;; *) hardcode_direct_FC=yes hardcode_direct_absolute_FC=yes export_dynamic_flag_spec_FC='$wl-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L_FC=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test yes = "$GCC"; then archive_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' # Try to use the -exported_symbol ld option, if it does not # work, assume that -exports_file does not work either and # implicitly export all symbols. # This should be the same for all languages, so no per-tag cache variable. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 printf %s "checking whether the $host_os linker accepts -exported_symbol... " >&6; } if test ${lt_cv_irix_exported_symbol+y} then : printf %s "(cached) " >&6 else $as_nop save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" cat > conftest.$ac_ext <<_ACEOF subroutine foo end _ACEOF if ac_fn_fc_try_link "$LINENO" then : lt_cv_irix_exported_symbol=yes else $as_nop lt_cv_irix_exported_symbol=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } if test yes = "$lt_cv_irix_exported_symbol"; then archive_expsym_cmds_FC='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' fi link_all_deplibs_FC=no else archive_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' archive_expsym_cmds_FC='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' fi archive_cmds_need_lc_FC='no' hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' hardcode_libdir_separator_FC=: inherit_rpath_FC=yes link_all_deplibs_FC=yes ;; linux*) case $cc_basename in tcc*) # Fabrice Bellard et al's Tiny C Compiler ld_shlibs_FC=yes archive_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' ;; esac ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds_FC='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else archive_cmds_FC='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi hardcode_libdir_flag_spec_FC='-R$libdir' hardcode_direct_FC=yes hardcode_shlibpath_var_FC=no ;; newsos6) archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_FC=yes hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' hardcode_libdir_separator_FC=: hardcode_shlibpath_var_FC=no ;; *nto* | *qnx*) ;; openbsd* | bitrig*) if test -f /usr/libexec/ld.so; then hardcode_direct_FC=yes hardcode_shlibpath_var_FC=no hardcode_direct_absolute_FC=yes if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then archive_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' hardcode_libdir_flag_spec_FC='$wl-rpath,$libdir' export_dynamic_flag_spec_FC='$wl-E' else archive_cmds_FC='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec_FC='$wl-rpath,$libdir' fi else ld_shlibs_FC=no fi ;; os2*) hardcode_libdir_flag_spec_FC='-L$libdir' hardcode_minus_L_FC=yes allow_undefined_flag_FC=unsupported shrext_cmds=.dll archive_cmds_FC='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' archive_expsym_cmds_FC='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' old_archive_From_new_cmds_FC='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' enable_shared_with_static_runtimes_FC=yes file_list_spec_FC='@' ;; osf3*) if test yes = "$GCC"; then allow_undefined_flag_FC=' $wl-expect_unresolved $wl\*' archive_cmds_FC='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' else allow_undefined_flag_FC=' -expect_unresolved \*' archive_cmds_FC='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' fi archive_cmds_need_lc_FC='no' hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' hardcode_libdir_separator_FC=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test yes = "$GCC"; then allow_undefined_flag_FC=' $wl-expect_unresolved $wl\*' archive_cmds_FC='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' hardcode_libdir_flag_spec_FC='$wl-rpath $wl$libdir' else allow_undefined_flag_FC=' -expect_unresolved \*' archive_cmds_FC='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' archive_expsym_cmds_FC='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' # Both c and cxx compiler support -rpath directly hardcode_libdir_flag_spec_FC='-rpath $libdir' fi archive_cmds_need_lc_FC='no' hardcode_libdir_separator_FC=: ;; solaris*) no_undefined_flag_FC=' -z defs' if test yes = "$GCC"; then wlarc='$wl' archive_cmds_FC='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_FC='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' else case `$CC -V 2>&1` in *"Compilers 5.0"*) wlarc='' archive_cmds_FC='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' archive_expsym_cmds_FC='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' ;; *) wlarc='$wl' archive_cmds_FC='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_FC='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' ;; esac fi hardcode_libdir_flag_spec_FC='-R$libdir' hardcode_shlibpath_var_FC=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands '-z linker_flag'. GCC discards it without '$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test yes = "$GCC"; then whole_archive_flag_spec_FC='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' else whole_archive_flag_spec_FC='-z allextract$convenience -z defaultextract' fi ;; esac link_all_deplibs_FC=yes ;; sunos4*) if test sequent = "$host_vendor"; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. archive_cmds_FC='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_FC='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi hardcode_libdir_flag_spec_FC='-L$libdir' hardcode_direct_FC=yes hardcode_minus_L_FC=yes hardcode_shlibpath_var_FC=no ;; sysv4) case $host_vendor in sni) archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_FC=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. archive_cmds_FC='$LD -G -o $lib $libobjs $deplibs $linker_flags' reload_cmds_FC='$CC -r -o $output$reload_objs' hardcode_direct_FC=no ;; motorola) archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_FC=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' hardcode_shlibpath_var_FC=no ;; sysv4.3*) archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var_FC=no export_dynamic_flag_spec_FC='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var_FC=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes ld_shlibs_FC=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag_FC='$wl-z,text' archive_cmds_need_lc_FC=no hardcode_shlibpath_var_FC=no runpath_var='LD_RUN_PATH' if test yes = "$GCC"; then archive_cmds_FC='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_FC='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_FC='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_FC='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We CANNOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. no_undefined_flag_FC='$wl-z,text' allow_undefined_flag_FC='$wl-z,nodefs' archive_cmds_need_lc_FC=no hardcode_shlibpath_var_FC=no hardcode_libdir_flag_spec_FC='$wl-R,$libdir' hardcode_libdir_separator_FC=':' link_all_deplibs_FC=yes export_dynamic_flag_spec_FC='$wl-Bexport' runpath_var='LD_RUN_PATH' if test yes = "$GCC"; then archive_cmds_FC='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_FC='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_FC='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_FC='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) archive_cmds_FC='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec_FC='-L$libdir' hardcode_shlibpath_var_FC=no ;; *) ld_shlibs_FC=no ;; esac if test sni = "$host_vendor"; then case $host in sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) export_dynamic_flag_spec_FC='$wl-Blargedynsym' ;; esac fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_FC" >&5 printf "%s\n" "$ld_shlibs_FC" >&6; } test no = "$ld_shlibs_FC" && can_build_shared=no with_gnu_ld_FC=$with_gnu_ld # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc_FC" in x|xyes) # Assume -lc should be added archive_cmds_need_lc_FC=yes if test yes,yes = "$GCC,$enable_shared"; then case $archive_cmds_FC in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 printf %s "checking whether -lc should be explicitly linked in... " >&6; } if test ${lt_cv_archive_cmds_need_lc_FC+y} then : printf %s "(cached) " >&6 else $as_nop $RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl_FC pic_flag=$lt_prog_compiler_pic_FC compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag_FC allow_undefined_flag_FC= if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_FC 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 (eval $archive_cmds_FC 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then lt_cv_archive_cmds_need_lc_FC=no else lt_cv_archive_cmds_need_lc_FC=yes fi allow_undefined_flag_FC=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_FC" >&5 printf "%s\n" "$lt_cv_archive_cmds_need_lc_FC" >&6; } archive_cmds_need_lc_FC=$lt_cv_archive_cmds_need_lc_FC ;; esac fi ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 printf %s "checking dynamic linker characteristics... " >&6; } library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=.so postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='$libname$release$shared_ext$major' ;; aix[4-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test ia64 = "$host_cpu"; then # AIX 5 supports IA64 library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line '#! .'. This would cause the generated library to # depend on '.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # Using Import Files as archive members, it is possible to support # filename-based versioning of shared library archives on AIX. While # this would work for both with and without runtime linking, it will # prevent static linking of such archives. So we do filename-based # shared library versioning with .so extension only, which is used # when both runtime linking and shared linking is enabled. # Unfortunately, runtime linking may impact performance, so we do # not want this to be the default eventually. Also, we use the # versioned .so libs for executables only if there is the -brtl # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. # To allow for filename-based versioning support, we need to create # libNAME.so.V as an archive file, containing: # *) an Import File, referring to the versioned filename of the # archive as well as the shared archive member, telling the # bitwidth (32 or 64) of that shared object, and providing the # list of exported symbols of that shared object, eventually # decorated with the 'weak' keyword # *) the shared object with the F_LOADONLY flag set, to really avoid # it being seen by the linker. # At run time we better use the real file rather than another symlink, # but for link time we create the symlink libNAME.so -> libNAME.so.V case $with_aix_soname,$aix_use_runtimelinking in # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. aix,yes) # traditional libtool dynamic_linker='AIX unversionable lib.so' # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; aix,no) # traditional AIX only dynamic_linker='AIX lib.a(lib.so.V)' # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' ;; svr4,*) # full svr4 only dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,yes) # both, prefer svr4 dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # unpreferred sharedlib libNAME.a needs extra handling postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,no) # both, prefer aix dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' ;; esac shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='$libname$shared_ext' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl* | *,icl*) # Native MSVC or ICC libname_spec='$name' soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' library_names_spec='$libname.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec=$LIB if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC and ICC wrapper library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' soname_spec='$libname$release$major$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly* | midnightbsd*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[23].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=no sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' if test 32 = "$HPUX_IA64_MODE"; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" sys_lib_dlsearch_path_spec=/usr/lib/hpux32 else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" sys_lib_dlsearch_path_spec=/usr/lib/hpux64 fi ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[3-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test yes = "$lt_cv_prog_gnu_ld"; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; linux*android*) version_type=none # Android doesn't support versioned libraries. need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext' soname_spec='$libname$release$shared_ext' finish_cmds= shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes dynamic_linker='Android linker' # Don't embed -rpath directories since the linker doesn't support them. hardcode_libdir_flag_spec_FC='-L$libdir' ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH if test ${lt_cv_shlibpath_overrides_runpath+y} then : printf %s "(cached) " >&6 else $as_nop lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_FC\"; \ LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_FC\"" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null then : lt_cv_shlibpath_overrides_runpath=yes fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS libdir=$save_libdir fi shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Ideally, we could use ldconfig to report *all* directores which are # searched for libraries, however this is still not possible. Aside from not # being certain /sbin/ldconfig is available, command # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, # even though it is searched at run-time. Try to do the best guess by # appending ld.so.conf contents (and includes) to the search path. if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsdelf*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='NetBSD ld.elf_so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd* | bitrig*) version_type=sunos sys_lib_dlsearch_path_spec=/usr/lib need_lib_prefix=no if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then need_version=no else need_version=yes fi library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; os2*) libname_spec='$name' version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no # OS/2 can only load a DLL with a base name of 8 characters or less. soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; v=$($ECHO $release$versuffix | tr -d .-); n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); $ECHO $n$v`$shared_ext' library_names_spec='${libname}_dll.$libext' dynamic_linker='OS/2 ld.exe' shlibpath_var=BEGINLIBPATH sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test yes = "$with_gnu_ld"; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec; then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' soname_spec='$libname$shared_ext.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=sco need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test yes = "$with_gnu_ld"; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 printf "%s\n" "$dynamic_linker" >&6; } test no = "$dynamic_linker" && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test yes = "$GCC"; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec fi if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec fi # remember unaugmented sys_lib_dlsearch_path content for libtool script decls... configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec # ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" # to be used as default LT_SYS_LIBRARY_PATH value in generated libtool configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 printf %s "checking how to hardcode library paths into programs... " >&6; } hardcode_action_FC= if test -n "$hardcode_libdir_flag_spec_FC" || test -n "$runpath_var_FC" || test yes = "$hardcode_automatic_FC"; then # We can hardcode non-existent directories. if test no != "$hardcode_direct_FC" && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, FC)" && test no != "$hardcode_minus_L_FC"; then # Linking always hardcodes the temporary library directory. hardcode_action_FC=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action_FC=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action_FC=unsupported fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_FC" >&5 printf "%s\n" "$hardcode_action_FC" >&6; } if test relink = "$hardcode_action_FC" || test yes = "$inherit_rpath_FC"; then # Fast installation is not supported enable_fast_install=no elif test yes = "$shlibpath_overrides_runpath" || test no = "$enable_shared"; then # Fast installation is not necessary enable_fast_install=needless fi fi # test -n "$compiler" GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS fi # test yes != "$_lt_disable_FC" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_config_commands="$ac_config_commands libtool" # Only expand once: # Autoupdate added the next two lines to ensure that your configure # script's behavior did not change. They are probably safe to remove. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 printf %s "checking for egrep... " >&6; } if test ${ac_cv_path_EGREP+y} then : printf %s "(cached) " >&6 else $as_nop if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_prog in egrep do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_EGREP" || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 printf %s 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" printf "%s\n" 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_EGREP"; then as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_EGREP=$EGREP fi fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 printf "%s\n" "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C/C++ restrict keyword" >&5 printf %s "checking for C/C++ restrict keyword... " >&6; } if test ${ac_cv_c_restrict+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_c_restrict=no # Put '__restrict__' first, to avoid problems with glibc and non-GCC; see: # https://lists.gnu.org/archive/html/bug-autoconf/2016-02/msg00006.html # Put 'restrict' last, because C++ lacks it. for ac_kw in __restrict__ __restrict _Restrict restrict; do cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ typedef int *int_ptr; int foo (int_ptr $ac_kw ip) { return ip[0]; } int bar (int [$ac_kw]); /* Catch GCC bug 14050. */ int bar (int ip[$ac_kw]) { return ip[0]; } int main (void) { int s[1]; int *$ac_kw t = s; t[0] = 0; return foo (t) + bar (t); ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_c_restrict=$ac_kw fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext test "$ac_cv_c_restrict" != no && break done fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_restrict" >&5 printf "%s\n" "$ac_cv_c_restrict" >&6; } case $ac_cv_c_restrict in restrict) ;; no) printf "%s\n" "#define restrict /**/" >>confdefs.h ;; *) printf "%s\n" "#define restrict $ac_cv_c_restrict" >>confdefs.h ;; esac # Check if bash is available # Extract the first word of "bash", so it can be a program name with args. set dummy bash; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_REALBASH+y} then : printf %s "(cached) " >&6 else $as_nop case $REALBASH in [\\/]* | ?:[\\/]*) ac_cv_path_REALBASH="$REALBASH" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="/bin:$PATH" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_REALBASH="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi REALBASH=$ac_cv_path_REALBASH if test -n "$REALBASH"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $REALBASH" >&5 printf "%s\n" "$REALBASH" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi # Record git version # Extract the first word of "git", so it can be a program name with args. set dummy git; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_gitcommand+y} then : printf %s "(cached) " >&6 else $as_nop case $gitcommand in [\\/]* | ?:[\\/]*) ac_cv_path_gitcommand="$gitcommand" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_gitcommand="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi gitcommand=$ac_cv_path_gitcommand if test -n "$gitcommand"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $gitcommand" >&5 printf "%s\n" "$gitcommand" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test -f $srcdir/STARPU-REVISION ; then cp $srcdir/STARPU-REVISION . elif test "$gitcommand" = "" ; then echo "unknown" > ./STARPU-REVISION else bdir=$PWD cd $srcdir git log -n 1 --pretty="%H%d" . > $bdir/STARPU-REVISION_tmp cd $bdir if test -s ./STARPU-REVISION_tmp ; then mv ./STARPU-REVISION_tmp ./STARPU-REVISION else echo "unknown" > ./STARPU-REVISION fi fi if test "x$cross_compiling" = "xyes"; then STARPU_CROSS_COMPILING_TRUE= STARPU_CROSS_COMPILING_FALSE='#' else STARPU_CROSS_COMPILING_TRUE='#' STARPU_CROSS_COMPILING_FALSE= fi ############################################################################### # # # MPI compilers # # # ############################################################################### #Check MPICC if test x$enable_simgrid = xyes ; then DEFAULT_MPICC=smpicc else DEFAULT_MPICC=mpicc fi # Check whether --with-mpicc was given. if test ${with_mpicc+y} then : withval=$with_mpicc; DEFAULT_MPICC=$withval fi case $DEFAULT_MPICC in /*) mpicc_path="$DEFAULT_MPICC" ;; *) # Extract the first word of "$DEFAULT_MPICC", so it can be a program name with args. set dummy $DEFAULT_MPICC; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_mpicc_path+y} then : printf %s "(cached) " >&6 else $as_nop case $mpicc_path in [\\/]* | ?:[\\/]*) ac_cv_path_mpicc_path="$mpicc_path" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$simgrid_dir/bin:$PATH" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_mpicc_path="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_mpicc_path" && ac_cv_path_mpicc_path="no" ;; esac fi mpicc_path=$ac_cv_path_mpicc_path if test -n "$mpicc_path"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicc_path" >&5 printf "%s\n" "$mpicc_path" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi ;; esac # We test if the MPICC compiler exists if test ! -x $mpicc_path; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The mpicc compiler '$mpicc_path' does not have the execute permission" >&5 printf "%s\n" "The mpicc compiler '$mpicc_path' does not have the execute permission" >&6; } mpicc_path=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether mpicc is available" >&5 printf %s "checking whether mpicc is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicc_path" >&5 printf "%s\n" "$mpicc_path" >&6; } MPICC=$mpicc_path if test x$mpicc_path != xno ; then MPIPATH=$(dirname $mpicc_path):$PATH else MPIPATH=$PATH fi #Check MPICXX/MPIC++ if test x$enable_simgrid = xyes ; then DEFAULT_MPICXX=smpicxx else DEFAULT_MPICXX=mpicxx fi # Check whether --with-mpicxx was given. if test ${with_mpicxx+y} then : withval=$with_mpicxx; DEFAULT_MPICXX=$withval fi case $DEFAULT_MPICXX in /*) mpicxx_path="$DEFAULT_MPICXX" ;; *) # Extract the first word of "$DEFAULT_MPICXX", so it can be a program name with args. set dummy $DEFAULT_MPICXX; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_mpicxx_path+y} then : printf %s "(cached) " >&6 else $as_nop case $mpicxx_path in [\\/]* | ?:[\\/]*) ac_cv_path_mpicxx_path="$mpicxx_path" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $MPIPATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_mpicxx_path="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_mpicxx_path" && ac_cv_path_mpicxx_path="no" ;; esac fi mpicxx_path=$ac_cv_path_mpicxx_path if test -n "$mpicxx_path"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicxx_path" >&5 printf "%s\n" "$mpicxx_path" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi ;; esac # try with mpic++ if mpicxx was not found if test x$mpicxx_path = xno ; then DEFAULT_MPICXX=mpic++ # Extract the first word of "$DEFAULT_MPICXX", so it can be a program name with args. set dummy $DEFAULT_MPICXX; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_mpicxx_path+y} then : printf %s "(cached) " >&6 else $as_nop case $mpicxx_path in [\\/]* | ?:[\\/]*) ac_cv_path_mpicxx_path="$mpicxx_path" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $MPIPATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_mpicxx_path="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_mpicxx_path" && ac_cv_path_mpicxx_path="no" ;; esac fi mpicxx_path=$ac_cv_path_mpicxx_path if test -n "$mpicxx_path"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicxx_path" >&5 printf "%s\n" "$mpicxx_path" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi # We test if the MPICXX/MPIC++ compiler exists if test ! -x $mpicxx_path; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The mpicxx compiler '$mpicxx_path' does not have the execute permission" >&5 printf "%s\n" "The mpicxx compiler '$mpicxx_path' does not have the execute permission" >&6; } mpicxx_path=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether mpicxx is available" >&5 printf %s "checking whether mpicxx is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpicxx_path" >&5 printf "%s\n" "$mpicxx_path" >&6; } MPICXX=$mpicxx_path # Check if mpiexec is available if test x$enable_simgrid = xyes ; then DEFAULT_MPIEXEC=smpirun # Check whether --with-smpirun was given. if test ${with_smpirun+y} then : withval=$with_smpirun; DEFAULT_MPIEXEC=$withval fi else DEFAULT_MPIEXEC=mpiexec # Check whether --with-mpiexec was given. if test ${with_mpiexec+y} then : withval=$with_mpiexec; DEFAULT_MPIEXEC=$withval fi fi case $DEFAULT_MPIEXEC in /*) mpiexec_path="$DEFAULT_MPIEXEC" ;; *) # Extract the first word of "$DEFAULT_MPIEXEC", so it can be a program name with args. set dummy $DEFAULT_MPIEXEC; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_mpiexec_path+y} then : printf %s "(cached) " >&6 else $as_nop case $mpiexec_path in [\\/]* | ?:[\\/]*) ac_cv_path_mpiexec_path="$mpiexec_path" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $MPIPATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_mpiexec_path="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_mpiexec_path" && ac_cv_path_mpiexec_path="no" ;; esac fi mpiexec_path=$ac_cv_path_mpiexec_path if test -n "$mpiexec_path"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpiexec_path" >&5 printf "%s\n" "$mpiexec_path" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi esac { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether mpiexec is available" >&5 printf %s "checking whether mpiexec is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpiexec_path" >&5 printf "%s\n" "$mpiexec_path" >&6; } # We test if MPIEXEC exists if test ! -x $mpiexec_path; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The mpiexec script '$mpiexec_path' is not valid" >&5 printf "%s\n" "The mpiexec script '$mpiexec_path' is not valid" >&6; } default_enable_mpi_check=no mpiexec_path="" fi MPIEXEC=$mpiexec_path ############################################################################### # # # MPI # # # ############################################################################### # Check whether --enable-mpi was given. if test ${enable_mpi+y} then : enableval=$enable_mpi; enable_mpi=$enableval else $as_nop enable_mpi=$default_enable_mpi fi if test x$enable_mpi = xmaybe ; then if test -x "$mpicc_path"; then enable_mpi=yes else enable_mpi=no fi fi # in case MPI was explicitly required, but mpicc is not available, this is an error if test x$enable_mpi = xyes ; then if test ! -x "$mpicc_path"; then as_fn_error $? "Compiler MPI '$mpicc_path' not valid" "$LINENO" 5 fi OLD_CC=$CC CC=$mpicc_path cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : printf "%s\n" "#define STARPU_HAVE_MPI_EXT 1" >>confdefs.h fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_fn_c_check_func "$LINENO" "MPI_Comm_create_group" "ac_cv_func_MPI_Comm_create_group" if test "x$ac_cv_func_MPI_Comm_create_group" = xyes then : printf "%s\n" "#define STARPU_HAVE_MPI_COMM_CREATE_GROUP 1" >>confdefs.h fi CC=$OLD_CC fi build_mpi_lib=$enable_mpi # Check whether --enable-mpi-minimal-tests was given. if test ${enable_mpi_minimal_tests+y} then : enableval=$enable_mpi_minimal_tests; enable_mpi_minimal_tests=$enableval else $as_nop enable_mpi_minimal_tests=no fi if test x$enable_mpi_minimal_tests = xyes; then STARPU_MPI_MINIMAL_TESTS_TRUE= STARPU_MPI_MINIMAL_TESTS_FALSE='#' else STARPU_MPI_MINIMAL_TESTS_TRUE='#' STARPU_MPI_MINIMAL_TESTS_FALSE= fi ############################################################################### # # # NEW MADELEINE # # # ############################################################################### # Check whether --enable-nmad was given. if test ${enable_nmad+y} then : enableval=$enable_nmad; enable_nmad=$enableval else $as_nop enable_nmad=no fi build_nmad_lib=no CC_OR_MPICC=$cc_or_mpicc #We can only build StarPU MPI Library if User wants it and MPI is available if test x$enable_mpi = xyes -a x$enable_nmad = xyes ; then build_nmad_lib=yes build_mpi_lib=no pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for NMAD" >&5 printf %s "checking for NMAD... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$NMAD_CFLAGS"; then pkg_cv_NMAD_CFLAGS="$NMAD_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"nmad\""; } >&5 ($PKG_CONFIG --exists --print-errors "nmad") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_NMAD_CFLAGS=`$PKG_CONFIG --cflags "nmad" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$NMAD_LIBS"; then pkg_cv_NMAD_LIBS="$NMAD_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"nmad\""; } >&5 ($PKG_CONFIG --exists --print-errors "nmad") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_NMAD_LIBS=`$PKG_CONFIG --libs "nmad" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then NMAD_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "nmad"` else NMAD_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "nmad"` fi # Put the nasty error message in config.log where it belongs echo "$NMAD_PKG_ERRORS" >&5 as_fn_error $? "Package requirements (nmad) were not met: $NMAD_PKG_ERRORS Consider adjusting the PKG_CONFIG_PATH environment variable if you installed software in a non-standard prefix. Alternatively, you may set the environment variables NMAD_CFLAGS and NMAD_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details. " "$LINENO" 5 elif test $pkg_failed = untried; then { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full path to pkg-config. Alternatively, you may set the environment variables NMAD_CFLAGS and NMAD_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details. To get pkg-config, see . See \`config.log' for more details" "$LINENO" 5; } else NMAD_CFLAGS=$pkg_cv_NMAD_CFLAGS NMAD_LIBS=$pkg_cv_NMAD_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } : fi save_LIBS="$LIBS" save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $NMAD_CFLAGS" LIBS="$LIBS $NMAD_LIBS" ac_fn_c_check_func "$LINENO" "piom_ltask_set_bound_thread_os_indexes" "ac_cv_func_piom_ltask_set_bound_thread_os_indexes" if test "x$ac_cv_func_piom_ltask_set_bound_thread_os_indexes" = xyes then : printf "%s\n" "#define HAVE_PIOM_LTASK_SET_BOUND_THREAD_OS_INDEXES 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "nm_trace_add_synchro_point" "ac_cv_func_nm_trace_add_synchro_point" if test "x$ac_cv_func_nm_trace_add_synchro_point" = xyes then : printf "%s\n" "#define HAVE_NM_TRACE_ADD_SYNCHRO_POINT 1" >>confdefs.h fi CFLAGS="$save_CFLAGS" LIBS="$save_LIBS" else build_nmad_lib=no fi # If MadMPI is used, MadMPI can't be built with PIOman (we don't want communication progression to be done in both StarPU and MadMPI): if test x$enable_mpi = xyes -a x$build_nmad_lib = xno -a ! -z "`$mpicc_path --showme|grep pioman`"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Using MPI backend of StarPU with MadMPI built with PIOman: disabling PIOman's progression." >&5 printf "%s\n" "$as_me: WARNING: Using MPI backend of StarPU with MadMPI built with PIOman: disabling PIOman's progression." >&2;} printf "%s\n" "#define HAVE_PIOMAN 1" >>confdefs.h fi ############################################################################### # # # MPI Master Slave # # # ############################################################################### # Check whether --enable-mpi-master-slave was given. if test ${enable_mpi_master_slave+y} then : enableval=$enable_mpi_master_slave; use_mpi_master_slave=$enableval else $as_nop use_mpi_master_slave=no fi if test x$enable_simgrid = xyes; then if test x$use_mpi_master_slave = xyes; then as_fn_error $? "MPI Master Slave not supported with simgrid" "$LINENO" 5 fi use_mpi_master_slave=no fi # in case it is explicitly required, but mpicc is not available, this is an error if test x$use_mpi_master_slave = xyes -a ! -x "$mpicc_path"; then as_fn_error $? "Compiler MPI '$mpicc_path' not valid" "$LINENO" 5 fi #We can only build MPI Master Slave if User wants it and MPI compiler are available if test x$use_mpi_master_slave = xyes -a x$mpicc_path != xno -a x${mpicxx_path} != xno ; then build_mpi_master_slave=yes else build_mpi_master_slave=no fi #users cannot use both at the same time if test x$build_mpi_master_slave = xyes -a x$enable_mpi = xyes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: StarPU-MPI and MPI Master-Slave cannot be used at the same time ! Disabling StarPU-MPI..." >&5 printf "%s\n" "$as_me: WARNING: StarPU-MPI and MPI Master-Slave cannot be used at the same time ! Disabling StarPU-MPI..." >&2;} build_mpi_lib=no build_nmad_lib=no enable_mpi=no fi if test x$build_mpi_master_slave = xyes; then printf "%s\n" "#define STARPU_USE_MPI_MASTER_SLAVE 1" >>confdefs.h CC=$mpicc_path CCLD=$mpicc_path CXX=$mpicxx_path CXXLD=mpicxx_path fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the MPI master-slave mode should be enabled" >&5 printf %s "checking whether the MPI master-slave mode should be enabled... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_mpi_master_slave" >&5 printf "%s\n" "$build_mpi_master_slave" >&6; } if test x$build_mpi_master_slave = xyes; then STARPU_USE_MPI_MASTER_SLAVE_TRUE= STARPU_USE_MPI_MASTER_SLAVE_FALSE='#' else STARPU_USE_MPI_MASTER_SLAVE_TRUE='#' STARPU_USE_MPI_MASTER_SLAVE_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of MPI master-slave devices" >&5 printf %s "checking maximum number of MPI master-slave devices... " >&6; } # Check whether --enable-maxmpidev was given. if test ${enable_maxmpidev+y} then : enableval=$enable_maxmpidev; nmaxmpidev=$enableval else $as_nop if test x$build_mpi_master_slave = xyes; then nmaxmpidev=4 else nmaxmpidev=0 fi fi if test x$nmaxmpidev = x -o x$nmaxmpidev = xyes then as_fn_error $? "The --enable-maxmpidev option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxmpidev" >&5 printf "%s\n" "$nmaxmpidev" >&6; } printf "%s\n" "#define STARPU_MAXMPIDEVS $nmaxmpidev" >>confdefs.h ############################################################################### # # # TCP/IP Master Slave # # # ############################################################################### # Check whether --enable-tcpip-master-slave was given. if test ${enable_tcpip_master_slave+y} then : enableval=$enable_tcpip_master_slave; build_tcpip_master_slave=$enableval else $as_nop build_tcpip_master_slave=no fi if test x$build_tcpip_master_slave = xyes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5 printf %s "checking for dlsym in -ldl... " >&6; } if test ${ac_cv_lib_dl_dlsym+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char dlsym (); int main (void) { return dlsym (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dl_dlsym=yes else $as_nop ac_cv_lib_dl_dlsym=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlsym" >&5 printf "%s\n" "$ac_cv_lib_dl_dlsym" >&6; } if test "x$ac_cv_lib_dl_dlsym" = xyes then : printf "%s\n" "#define HAVE_LIBDL 1" >>confdefs.h LIBS="-ldl $LIBS" fi printf "%s\n" "#define STARPU_USE_TCPIP_MASTER_SLAVE 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the TCP/IP master-slave mode should be enabled" >&5 printf %s "checking whether the TCP/IP master-slave mode should be enabled... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_tcpip_master_slave" >&5 printf "%s\n" "$build_tcpip_master_slave" >&6; } if test x$build_tcpip_master_slave = xyes; then STARPU_USE_TCPIP_MASTER_SLAVE_TRUE= STARPU_USE_TCPIP_MASTER_SLAVE_FALSE='#' else STARPU_USE_TCPIP_MASTER_SLAVE_TRUE='#' STARPU_USE_TCPIP_MASTER_SLAVE_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of TCP/IP master-slave devices" >&5 printf %s "checking maximum number of TCP/IP master-slave devices... " >&6; } # Check whether --enable-maxtcpipdev was given. if test ${enable_maxtcpipdev+y} then : enableval=$enable_maxtcpipdev; nmaxtcpipdev=$enableval else $as_nop if test x$build_tcpip_master_slave = xyes; then nmaxtcpipdev=4 else nmaxtcpipdev=0 fi fi if test x$nmaxtcpipdev = x -o x$nmaxtcpipdev = xyes then as_fn_error $? "The --enable-maxtcpipdev option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxtcpipdev" >&5 printf "%s\n" "$nmaxtcpipdev" >&6; } printf "%s\n" "#define STARPU_MAXTCPIPDEVS $nmaxtcpipdev" >>confdefs.h ############################################################################### # # # Miscellaneous things for MPI # # # ############################################################################### # Check whether --enable-mpi-pedantic-isend was given. if test ${enable_mpi_pedantic_isend+y} then : enableval=$enable_mpi_pedantic_isend; enable_mpi_pedantic_isend=$enableval else $as_nop enable_mpi_pedantic_isend=no fi if test x$enable_mpi_pedantic_isend = xyes; then printf "%s\n" "#define STARPU_MPI_PEDANTIC_ISEND 1" >>confdefs.h fi # If the user specifically asks for it, or if we are in a developer checkout, we enable mpi check if test -d "$srcdir/.git" -o -f "$srcdir/.git" ; then default_enable_mpi_check=$enable_mpi fi # Check whether --enable-mpi-check was given. if test ${enable_mpi_check+y} then : enableval=$enable_mpi_check; enable_mpi_check=$enableval else $as_nop enable_mpi_check=$default_enable_mpi_check fi running_mpi_check=no if test x$enable_mpi_check = xyes ; then running_mpi_check=yes if test x$enable_mpi = xno ; then as_fn_error $? "MPI checks requested, but MPI is disabled" "$LINENO" 5 fi fi if test x$enable_mpi_check = xmaybe ; then running_mpi_check=yes fi if test x$enable_mpi_check = xno ; then running_mpi_check=no fi if test x$enable_mpi = xno ; then running_mpi_check=no fi if test x$running_mpi_check = xyes; then STARPU_MPI_CHECK_TRUE= STARPU_MPI_CHECK_FALSE='#' else STARPU_MPI_CHECK_TRUE='#' STARPU_MPI_CHECK_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI tests should be run" >&5 printf %s "checking whether MPI tests should be run... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $running_mpi_check" >&5 printf "%s\n" "$running_mpi_check" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the StarPU MPI library should be generated" >&5 printf %s "checking whether the StarPU MPI library should be generated... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_mpi_lib" >&5 printf "%s\n" "$build_mpi_lib" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the StarPU MPI nmad library should be generated" >&5 printf %s "checking whether the StarPU MPI nmad library should be generated... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_nmad_lib" >&5 printf "%s\n" "$build_nmad_lib" >&6; } if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then printf "%s\n" "#define STARPU_USE_MPI 1" >>confdefs.h if test x$build_mpi_lib = xyes ; then printf "%s\n" "#define STARPU_USE_MPI_MPI 1" >>confdefs.h else printf "%s\n" "#define STARPU_USE_MPI_NMAD 1" >>confdefs.h fi fi if test x$enable_mpi = xyes ; then if test x$enable_simgrid = xyes ; then if test x$enable_shared = xyes ; then as_fn_error $? "MPI with simgrid can not work with shared libraries, if you need the MPI support, then use --disable-shared to fix this, else disable MPI with --disable-mpi" "$LINENO" 5 else CFLAGS="$CFLAGS -fPIC" CXXFLAGS="$CXXFLAGS -fPIC" NVCCFLAGS="$NVCCFLAGS --compiler-options -fPIC" HIPCCFLAGS="$HIPCCFLAGS --compiler-options -fPIC" FFLAGS="$FFLAGS -fPIC" FCLAGS="$FFLAGS -fPIC" fi fi enable_mpi_sync_clocks=no pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MPI_SYNC_CLOCKS" >&5 printf %s "checking for MPI_SYNC_CLOCKS... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$MPI_SYNC_CLOCKS_CFLAGS"; then pkg_cv_MPI_SYNC_CLOCKS_CFLAGS="$MPI_SYNC_CLOCKS_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"mpi_sync_clocks\""; } >&5 ($PKG_CONFIG --exists --print-errors "mpi_sync_clocks") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_MPI_SYNC_CLOCKS_CFLAGS=`$PKG_CONFIG --cflags "mpi_sync_clocks" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$MPI_SYNC_CLOCKS_LIBS"; then pkg_cv_MPI_SYNC_CLOCKS_LIBS="$MPI_SYNC_CLOCKS_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"mpi_sync_clocks\""; } >&5 ($PKG_CONFIG --exists --print-errors "mpi_sync_clocks") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_MPI_SYNC_CLOCKS_LIBS=`$PKG_CONFIG --libs "mpi_sync_clocks" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then MPI_SYNC_CLOCKS_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "mpi_sync_clocks"` else MPI_SYNC_CLOCKS_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "mpi_sync_clocks"` fi # Put the nasty error message in config.log where it belongs echo "$MPI_SYNC_CLOCKS_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } enable_mpi_sync_clocks=no elif test $pkg_failed = untried; then enable_mpi_sync_clocks=no else MPI_SYNC_CLOCKS_CFLAGS=$pkg_cv_MPI_SYNC_CLOCKS_CFLAGS MPI_SYNC_CLOCKS_LIBS=$pkg_cv_MPI_SYNC_CLOCKS_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } enable_mpi_sync_clocks=yes fi if test x$enable_mpi_sync_clocks = xyes ; then printf "%s\n" "#define STARPU_HAVE_MPI_SYNC_CLOCKS 1" >>confdefs.h fi fi if test x$enable_mpi_sync_clocks = xyes; then STARPU_MPI_SYNC_CLOCKS_TRUE= STARPU_MPI_SYNC_CLOCKS_FALSE='#' else STARPU_MPI_SYNC_CLOCKS_TRUE='#' STARPU_MPI_SYNC_CLOCKS_FALSE= fi if test x$build_mpi_lib = xyes; then STARPU_USE_MPI_MPI_TRUE= STARPU_USE_MPI_MPI_FALSE='#' else STARPU_USE_MPI_MPI_TRUE='#' STARPU_USE_MPI_MPI_FALSE= fi if test x$build_nmad_lib = xyes; then STARPU_USE_MPI_NMAD_TRUE= STARPU_USE_MPI_NMAD_FALSE='#' else STARPU_USE_MPI_NMAD_TRUE='#' STARPU_USE_MPI_NMAD_FALSE= fi if test x$build_nmad_lib = xyes -o x$build_mpi_lib = xyes; then STARPU_USE_MPI_TRUE= STARPU_USE_MPI_FALSE='#' else STARPU_USE_MPI_TRUE='#' STARPU_USE_MPI_FALSE= fi ###### Failure tolerance material ####### default_enable_mpi_ft=no # Check whether --enable-mpi-ft was given. if test ${enable_mpi_ft+y} then : enableval=$enable_mpi_ft; enable_mpi_ft=$enableval else $as_nop enable_mpi_ft=$default_enable_mpi_ft fi default_enable_mpi_ft_stats=no use_mpi_ft_stats=no # Check whether --enable-mpi-ft-stats was given. if test ${enable_mpi_ft_stats+y} then : enableval=$enable_mpi_ft_stats; enable_mpi_ft_stats=$enableval else $as_nop enable_mpi_ft_stats=$default_enable_mpi_ft_stats fi # TODO: Check MPI version to be ULFM if test x$enable_mpi_ft = xyes ; then if test x$build_mpi_lib != xyes ; then as_fn_error $? "Failure tolerance mechanisms only work with a particular MPI implementation: ULFM (OpenMPI based)." "$LINENO" 5 else printf "%s\n" "#define STARPU_USE_MPI_FT 1" >>confdefs.h use_mpi_ft=yes; if test x$enable_mpi_ft_stats = xyes ; then printf "%s\n" "#define STARPU_USE_MPI_FT_STATS 1" >>confdefs.h use_mpi_ft_stats=$enable_mpi_ft_stats; fi fi fi if test x$use_mpi_ft = xyes; then STARPU_USE_MPI_FT_TRUE= STARPU_USE_MPI_FT_FALSE='#' else STARPU_USE_MPI_FT_TRUE='#' STARPU_USE_MPI_FT_FALSE= fi if test x$use_mpi_ft_stats = xyes; then STARPU_USE_MPI_FT_STATS_TRUE= STARPU_USE_MPI_FT_STATS_FALSE='#' else STARPU_USE_MPI_FT_STATS_TRUE='#' STARPU_USE_MPI_FT_STATS_FALSE= fi ###### End of failure tolerance material ###### # Check whether --with-mpiexec-args was given. if test ${with_mpiexec_args+y} then : withval=$with_mpiexec_args; mpiexec_args=$withval fi MPIEXEC_ARGS=$mpiexec_args { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI debug messages should be displayed" >&5 printf %s "checking whether MPI debug messages should be displayed... " >&6; } # Check whether --enable-mpi-verbose was given. if test ${enable_mpi_verbose+y} then : enableval=$enable_mpi_verbose; enable_mpi_verbose=$enableval else $as_nop enable_mpi_verbose=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_mpi_verbose" >&5 printf "%s\n" "$enable_mpi_verbose" >&6; } if test x$enable_mpi_verbose = xyes; then printf "%s\n" "#define STARPU_MPI_VERBOSE 1" >>confdefs.h fi if test x$enable_mpi_verbose = xextra; then printf "%s\n" "#define STARPU_MPI_VERBOSE 1" >>confdefs.h printf "%s\n" "#define STARPU_MPI_EXTRA_VERBOSE 1" >>confdefs.h fi if test x$enable_mpi = xyes -o x$build_mpi_master_slave = xyes ; then cc_or_mpicc=$mpicc_path # For some reason, libtool uses gcc instead of mpicc when linking # libstarpumpi. # On Darwin (and maybe other systems ?) the linker will fail (undefined # references to MPI_*). We manually add the required flags to fix this # issue. # openmpi version MPICC_LDFLAGS=`$mpicc_path --showme:link 2>/dev/null` if test -z "$MPICC_LDFLAGS" then # mpich version MPICC_LDFLAGS=`$mpicc_path -link_info | awk '{$1=""; print}'` fi else cc_or_mpicc=$CC fi CC_OR_MPICC=$cc_or_mpicc ############################################################################### # # # NUMA memory nodes # # # ############################################################################### default_nmaxnumanodes=2 # Extract the first word of "hwloc-calc", so it can be a program name with args. set dummy hwloc-calc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_hwloccalccommand+y} then : printf %s "(cached) " >&6 else $as_nop case $hwloccalccommand in [\\/]* | ?:[\\/]*) ac_cv_path_hwloccalccommand="$hwloccalccommand" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_hwloccalccommand="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi hwloccalccommand=$ac_cv_path_hwloccalccommand if test -n "$hwloccalccommand"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hwloccalccommand" >&5 printf "%s\n" "$hwloccalccommand" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of NUMA nodes" >&5 printf %s "checking maximum number of NUMA nodes... " >&6; } # Check whether --enable-maxnumanodes was given. if test ${enable_maxnumanodes+y} then : enableval=$enable_maxnumanodes; nmaxnumanodes=$enableval else $as_nop nmaxnumanodes=auto fi if test x$nmaxnumanodes = xauto then if test "$hwloccalccommand" = ""; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: hwloc-calc not available to automatically get the number of NUMA nodes, using the default value: $default_nmaxnumanodes" >&5 printf "%s\n" "$as_me: WARNING: hwloc-calc not available to automatically get the number of NUMA nodes, using the default value: $default_nmaxnumanodes" >&2;} nmaxnumanodes=$default_nmaxnumanodes else nmaxnumanodes=$($hwloccalccommand all -N node 2>/dev/null) if test x$nmaxnumanodes = x; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: hwloc-calc could not get the number of NUMA nodes, using the default value: $default_nmaxnumanodes" >&5 printf "%s\n" "$as_me: WARNING: hwloc-calc could not get the number of NUMA nodes, using the default value: $default_nmaxnumanodes" >&2;} nmaxnumanodes=$default_nmaxnumanodes fi fi fi if test x$nmaxnumanodes = x -o x$nmaxnumanodes = xyes then as_fn_error $? "The --enable-maxnumanodes option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxnumanodes" >&5 printf "%s\n" "$nmaxnumanodes" >&6; } printf "%s\n" "#define STARPU_MAXNUMANODES $nmaxnumanodes" >>confdefs.h ############################################################################### for ac_prog in lib do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_STARPU_MS_LIB+y} then : printf %s "(cached) " >&6 else $as_nop case $STARPU_MS_LIB in [\\/]* | ?:[\\/]*) ac_cv_path_STARPU_MS_LIB="$STARPU_MS_LIB" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_STARPU_MS_LIB="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi STARPU_MS_LIB=$ac_cv_path_STARPU_MS_LIB if test -n "$STARPU_MS_LIB"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STARPU_MS_LIB" >&5 printf "%s\n" "$STARPU_MS_LIB" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$STARPU_MS_LIB" && break done if test "x$STARPU_MS_LIB" != "x"; then STARPU_HAVE_MS_LIB_TRUE= STARPU_HAVE_MS_LIB_FALSE='#' else STARPU_HAVE_MS_LIB_TRUE='#' STARPU_HAVE_MS_LIB_FALSE= fi case "$target" in *-*-mingw*|*-*-cygwin*|*-*-msys*) starpu_windows=yes libext=a printf "%s\n" "#define STARPU_HAVE_WINDOWS 1" >>confdefs.h ;; *-*-linux*) starpu_linux=yes printf "%s\n" "#define STARPU_LINUX_SYS 1" >>confdefs.h ;; *-*-openbsd*) starpu_openbsd=yes printf "%s\n" "#define STARPU_OPENBSD_SYS 1" >>confdefs.h ;; *-*darwin*) starpu_darwin=yes printf "%s\n" "#define STARPU_HAVE_DARWIN 1" >>confdefs.h ;; esac if test "x$starpu_windows" = "xyes"; then STARPU_HAVE_WINDOWS_TRUE= STARPU_HAVE_WINDOWS_FALSE='#' else STARPU_HAVE_WINDOWS_TRUE='#' STARPU_HAVE_WINDOWS_FALSE= fi if test "x$starpu_linux" = "xyes"; then STARPU_LINUX_SYS_TRUE= STARPU_LINUX_SYS_FALSE='#' else STARPU_LINUX_SYS_TRUE='#' STARPU_LINUX_SYS_FALSE= fi if test "x$starpu_darwin" = "xyes"; then STARPU_HAVE_DARWIN_TRUE= STARPU_HAVE_DARWIN_FALSE='#' else STARPU_HAVE_DARWIN_TRUE='#' STARPU_HAVE_DARWIN_FALSE= fi if test "x$starpu_openbsd" = "xyes"; then STARPU_OPENBSD_SYS_TRUE= STARPU_OPENBSD_SYS_FALSE='#' else STARPU_OPENBSD_SYS_TRUE='#' STARPU_OPENBSD_SYS_FALSE= fi # on Darwin, GCC targets i386 by default, so we don't have atomic ops # The cast to long int works around a bug in the HP C Compiler # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. # This bug is HP SR number 8606223364. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5 printf %s "checking size of void *... " >&6; } if test ${ac_cv_sizeof_void_p+y} then : printf %s "(cached) " >&6 else $as_nop if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" "ac_cv_sizeof_void_p" "$ac_includes_default" then : else $as_nop if test "$ac_cv_type_void_p" = yes; then { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "cannot compute sizeof (void *) See \`config.log' for more details" "$LINENO" 5; } else ac_cv_sizeof_void_p=0 fi fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_void_p" >&5 printf "%s\n" "$ac_cv_sizeof_void_p" >&6; } printf "%s\n" "#define SIZEOF_VOID_P $ac_cv_sizeof_void_p" >>confdefs.h SIZEOF_VOID_P=$ac_cv_sizeof_void_p case $SIZEOF_VOID_P in 4) case "$target" in i386-*darwin*) CFLAGS="$CFLAGS -march=i686" ;; esac STARPU_MS_LIB_ARCH=X86 ;; 8) STARPU_MS_LIB_ARCH=X64 ;; esac # This will be useful for program which use CUDA (and .cubin files) which need # some path to the CUDA code at runtime. printf "%s\n" "#define STARPU_BUILD_DIR \"$PWD\"" >>confdefs.h STARPU_BUILD_DIR=$PWD case "${srcdir}" in /*) printf "%s\n" "#define STARPU_SRC_DIR \"$(eval echo ${srcdir})\"" >>confdefs.h STARPU_SRC_DIR="$(eval echo ${srcdir})" ;; *) printf "%s\n" "#define STARPU_SRC_DIR \"$(eval echo $PWD/${srcdir})\"" >>confdefs.h STARPU_SRC_DIR="$(eval echo $PWD/${srcdir})" ;; esac case "$target" in *-*-mingw*|*-*-cygwin*) # Check whether --enable-native-winthreads was given. if test ${enable_native_winthreads+y} then : enableval=$enable_native_winthreads; enable_native_winthreads=$enableval else $as_nop enable_native_winthreads=no fi ;; esac if test x"$enable_native_winthreads" != xyes ; then INCLUDE_PTHREAD_H='#include ' fi for ac_header in unistd.h do : ac_fn_c_check_header_compile "$LINENO" "unistd.h" "ac_cv_header_unistd_h" "$ac_includes_default" if test "x$ac_cv_header_unistd_h" = xyes then : printf "%s\n" "#define HAVE_UNISTD_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_UNISTD_H 1" >>confdefs.h fi done ac_fn_c_check_type "$LINENO" "struct timespec" "ac_cv_type_struct_timespec" " #include #include #ifdef HAVE_UNISTD_H #include #endif #include $INCLUDE_PTHREAD_H " if test "x$ac_cv_type_struct_timespec" = xyes then : printf "%s\n" "#define STARPU_HAVE_STRUCT_TIMESPEC 1" >>confdefs.h fi if test x"$enable_native_winthreads" = xyes ; then CPPFLAGS="$CPPFLAGS -I$STARPU_SRC_DIR/include/pthread_win32" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #define STARPU_CONFIGURE #include int main (void) { pthread_t t; pthread_create(&t, NULL, NULL, NULL); ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : printf "%s\n" "#define STARPU_NATIVE_WINTHREADS 1" >>confdefs.h else $as_nop as_fn_error $? "pthread_create unavailable" "$LINENO" 5 fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 printf %s "checking for pthread_create in -lpthread... " >&6; } if test ${ac_cv_lib_pthread_pthread_create+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lpthread $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char pthread_create (); int main (void) { return pthread_create (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_pthread_pthread_create=yes else $as_nop ac_cv_lib_pthread_pthread_create=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 printf "%s\n" "$ac_cv_lib_pthread_pthread_create" >&6; } if test "x$ac_cv_lib_pthread_pthread_create" = xyes then : LIBS="$LIBS -lpthread" STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS -lpthread" fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing sqrt" >&5 printf %s "checking for library containing sqrt... " >&6; } if test ${ac_cv_search_sqrt+y} then : printf %s "(cached) " >&6 else $as_nop ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char sqrt (); int main (void) { return sqrt (); ; return 0; } _ACEOF for ac_lib in '' m do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO" then : ac_cv_search_sqrt=$ac_res fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext if test ${ac_cv_search_sqrt+y} then : break fi done if test ${ac_cv_search_sqrt+y} then : else $as_nop ac_cv_search_sqrt=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_sqrt" >&5 printf "%s\n" "$ac_cv_search_sqrt" >&6; } ac_res=$ac_cv_search_sqrt if test "$ac_res" != no then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" else $as_nop as_fn_error $? "math library unavailable" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lws2_32" >&5 printf %s "checking for main in -lws2_32... " >&6; } if test ${ac_cv_lib_ws2_32_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lws2_32 $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_ws2_32_main=yes else $as_nop ac_cv_lib_ws2_32_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ws2_32_main" >&5 printf "%s\n" "$ac_cv_lib_ws2_32_main" >&6; } if test "x$ac_cv_lib_ws2_32_main" = xyes then : printf "%s\n" "#define HAVE_LIBWS2_32 1" >>confdefs.h LIBS="-lws2_32 $LIBS" fi ac_cv_lib_ws2_32=ac_cv_lib_ws2_32_main ac_fn_c_check_func "$LINENO" "sysconf" "ac_cv_func_sysconf" if test "x$ac_cv_func_sysconf" = xyes then : printf "%s\n" "#define HAVE_SYSCONF 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "getrlimit" "ac_cv_func_getrlimit" if test "x$ac_cv_func_getrlimit" = xyes then : printf "%s\n" "#define HAVE_GETRLIMIT 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "scandir" "ac_cv_func_scandir" if test "x$ac_cv_func_scandir" = xyes then : printf "%s\n" "#define HAVE_SCANDIR 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "pthread_spin_lock" "ac_cv_func_pthread_spin_lock" if test "x$ac_cv_func_pthread_spin_lock" = xyes then : have_pthread_spin_lock=yes else $as_nop have_pthread_spin_lock=no fi if test x$have_pthread_spin_lock = xyes; then printf "%s\n" "#define HAVE_PTHREAD_SPIN_LOCK 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_PTHREAD_SPIN_LOCK 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "pthread_barrier_init" "ac_cv_func_pthread_barrier_init" if test "x$ac_cv_func_pthread_barrier_init" = xyes then : have_pthread_barrier=yes else $as_nop have_pthread_barrier=no fi if test x$have_pthread_barrier = xyes; then printf "%s\n" "#define STARPU_HAVE_PTHREAD_BARRIER 1" >>confdefs.h fi # yes, that's non portable, but it's still better than sched_setaffinity ac_fn_c_check_func "$LINENO" "pthread_setaffinity_np" "ac_cv_func_pthread_setaffinity_np" if test "x$ac_cv_func_pthread_setaffinity_np" = xyes then : printf "%s\n" "#define HAVE_PTHREAD_SETAFFINITY_NP 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "pthread_setname_np" "ac_cv_func_pthread_setname_np" if test "x$ac_cv_func_pthread_setname_np" = xyes then : have_pthread_setname_np=yes else $as_nop have_pthread_setname_np=no fi if test x$have_pthread_setname_np = xyes; then printf "%s\n" "#define STARPU_HAVE_PTHREAD_SETNAME_NP 1" >>confdefs.h fi if test "x$cross_compiling" = "xno"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PTHREAD_MUTEX_INITIALIZER just zeroes" >&5 printf %s "checking whether PTHREAD_MUTEX_INITIALIZER just zeroes... " >&6; } if test "$cross_compiling" = yes then : { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run test program while cross compiling See \`config.log' for more details" "$LINENO" 5; } else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { pthread_mutex_t var = PTHREAD_MUTEX_INITIALIZER; char *p; for (p = (char*) &var; p < (char*) (&var+1); p++) if (*p != 0) return 1; return 0; ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO" then : printf "%s\n" "#define STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PTHREAD_COND_INITIALIZER just zeroes" >&5 printf %s "checking whether PTHREAD_COND_INITIALIZER just zeroes... " >&6; } if test "$cross_compiling" = yes then : { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run test program while cross compiling See \`config.log' for more details" "$LINENO" 5; } else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { pthread_cond_t var = PTHREAD_COND_INITIALIZER; char *p; for (p = (char*) &var; p < (char*) (&var+1); p++) if (*p != 0) return 1; return 0; ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO" then : printf "%s\n" "#define STARPU_PTHREAD_COND_INITIALIZER_ZERO 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PTHREAD_RWLOCK_INITIALIZER just zeroes" >&5 printf %s "checking whether PTHREAD_RWLOCK_INITIALIZER just zeroes... " >&6; } if test "$cross_compiling" = yes then : { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run test program while cross compiling See \`config.log' for more details" "$LINENO" 5; } else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { pthread_rwlock_t var = PTHREAD_RWLOCK_INITIALIZER; char *p; for (p = (char*) &var; p < (char*) (&var+1); p++) if (*p != 0) return 1; return 0; ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO" then : printf "%s\n" "#define STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi # There is no posix_memalign on Mac OS X, only memalign for ac_func in posix_memalign do : ac_fn_c_check_func "$LINENO" "posix_memalign" "ac_cv_func_posix_memalign" if test "x$ac_cv_func_posix_memalign" = xyes then : printf "%s\n" "#define HAVE_POSIX_MEMALIGN 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_POSIX_MEMALIGN 1" >>confdefs.h fi done for ac_func in memalign do : ac_fn_c_check_func "$LINENO" "memalign" "ac_cv_func_memalign" if test "x$ac_cv_func_memalign" = xyes then : printf "%s\n" "#define HAVE_MEMALIGN 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_MEMALIGN 1" >>confdefs.h fi done # Some systems don't have drand48 ac_fn_c_check_func "$LINENO" "drand48" "ac_cv_func_drand48" if test "x$ac_cv_func_drand48" = xyes then : have_drand48=yes else $as_nop have_drand48=no fi ac_fn_c_check_func "$LINENO" "erand48_r" "ac_cv_func_erand48_r" if test "x$ac_cv_func_erand48_r" = xyes then : have_erand48_r=yes else $as_nop have_erand48_r=no fi # Maybe the user still does not want to use the provided drand48 # Check whether --enable-default-drand48 was given. if test ${enable_default_drand48+y} then : enableval=$enable_default_drand48; enable_default_drand48=$enableval else $as_nop enable_default_drand48=yes fi if test x$have_drand48 = xyes -a x$enable_default_drand48 = xyes ; then printf "%s\n" "#define STARPU_USE_DRAND48 1" >>confdefs.h fi if test x$have_erand48_r = xyes ; then printf "%s\n" "#define STARPU_USE_ERAND48_R 1" >>confdefs.h fi # Some systems do not define strerror_r ac_fn_c_check_func "$LINENO" "strerror_r" "ac_cv_func_strerror_r" if test "x$ac_cv_func_strerror_r" = xyes then : printf "%s\n" "#define STARPU_HAVE_STRERROR_R 1" >>confdefs.h fi # Some systems may not define setenv ac_fn_c_check_func "$LINENO" "setenv" "ac_cv_func_setenv" if test "x$ac_cv_func_setenv" = xyes then : printf "%s\n" "#define STARPU_HAVE_SETENV 1" >>confdefs.h fi # Some systems do not define unsetenv ac_fn_c_check_func "$LINENO" "unsetenv" "ac_cv_func_unsetenv" if test "x$ac_cv_func_unsetenv" = xyes then : printf "%s\n" "#define STARPU_HAVE_UNSETENV 1" >>confdefs.h fi # Some systems do not define nearbyintf... ac_fn_c_check_func "$LINENO" "nearbyintf" "ac_cv_func_nearbyintf" if test "x$ac_cv_func_nearbyintf" = xyes then : printf "%s\n" "#define STARPU_HAVE_NEARBYINTF 1" >>confdefs.h fi # ... but they may define rintf. ac_fn_c_check_func "$LINENO" "rintf" "ac_cv_func_rintf" if test "x$ac_cv_func_rintf" = xyes then : printf "%s\n" "#define STARPU_HAVE_RINTF 1" >>confdefs.h fi # Define quick check # Check whether --enable-quick-check was given. if test ${enable_quick_check+y} then : enableval=$enable_quick_check; enable_quick_check=$enableval else $as_nop enable_quick_check=no fi if test x$enable_quick_check = xyes; then printf "%s\n" "#define STARPU_QUICK_CHECK 1" >>confdefs.h fi if test "x$enable_quick_check" = "xyes"; then STARPU_QUICK_CHECK_TRUE= STARPU_QUICK_CHECK_FALSE='#' else STARPU_QUICK_CHECK_TRUE='#' STARPU_QUICK_CHECK_FALSE= fi # Define long check # Check whether --enable-long-check was given. if test ${enable_long_check+y} then : enableval=$enable_long_check; enable_long_check=$enableval else $as_nop enable_long_check=no fi if test x$enable_long_check = xyes; then printf "%s\n" "#define STARPU_LONG_CHECK 1" >>confdefs.h fi if test "x$enable_long_check" = "xyes"; then STARPU_LONG_CHECK_TRUE= STARPU_LONG_CHECK_FALSE='#' else STARPU_LONG_CHECK_TRUE='#' STARPU_LONG_CHECK_FALSE= fi # Define new check # Check whether --enable-new-check was given. if test ${enable_new_check+y} then : enableval=$enable_new_check; enable_new_check=$enableval else $as_nop enable_new_check=no fi if test x$enable_new_check = xyes; then printf "%s\n" "#define STARPU_NEW_CHECK 1" >>confdefs.h fi if test "x$enable_new_check" = "xyes"; then STARPU_NEW_CHECK_TRUE= STARPU_NEW_CHECK_FALSE='#' else STARPU_NEW_CHECK_TRUE='#' STARPU_NEW_CHECK_FALSE= fi for ac_header in malloc.h do : ac_fn_c_check_header_compile "$LINENO" "malloc.h" "ac_cv_header_malloc_h" "$ac_includes_default" if test "x$ac_cv_header_malloc_h" = xyes then : printf "%s\n" "#define HAVE_MALLOC_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_MALLOC_H 1" >>confdefs.h fi done # Check whether --enable-valgrind was given. if test ${enable_valgrind+y} then : enableval=$enable_valgrind; enable_valgrind=$enableval else $as_nop enable_valgrind=yes fi if test "$enable_valgrind" != "no" ; then for ac_header in valgrind/valgrind.h do : ac_fn_c_check_header_compile "$LINENO" "valgrind/valgrind.h" "ac_cv_header_valgrind_valgrind_h" "$ac_includes_default" if test "x$ac_cv_header_valgrind_valgrind_h" = xyes then : printf "%s\n" "#define HAVE_VALGRIND_VALGRIND_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_VALGRIND_H 1" >>confdefs.h fi done for ac_header in valgrind/memcheck.h do : ac_fn_c_check_header_compile "$LINENO" "valgrind/memcheck.h" "ac_cv_header_valgrind_memcheck_h" "$ac_includes_default" if test "x$ac_cv_header_valgrind_memcheck_h" = xyes then : printf "%s\n" "#define HAVE_VALGRIND_MEMCHECK_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_MEMCHECK_H 1" >>confdefs.h fi done for ac_header in valgrind/helgrind.h do : ac_fn_c_check_header_compile "$LINENO" "valgrind/helgrind.h" "ac_cv_header_valgrind_helgrind_h" "$ac_includes_default" if test "x$ac_cv_header_valgrind_helgrind_h" = xyes then : printf "%s\n" "#define HAVE_VALGRIND_HELGRIND_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_HELGRIND_H 1" >>confdefs.h fi done fi if test "$enable_valgrind" = "full" ; then printf "%s\n" "#define STARPU_VALGRIND_FULL 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "sched_yield" "ac_cv_func_sched_yield" if test "x$ac_cv_func_sched_yield" = xyes then : printf "%s\n" "#define STARPU_HAVE_SCHED_YIELD 1" >>confdefs.h fi ac_fn_c_check_header_compile "$LINENO" "aio.h" "ac_cv_header_aio_h" "$ac_includes_default" if test "x$ac_cv_header_aio_h" = xyes then : printf "%s\n" "#define HAVE_AIO_H 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for aio_read in -lrt" >&5 printf %s "checking for aio_read in -lrt... " >&6; } if test ${ac_cv_lib_rt_aio_read+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lrt $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char aio_read (); int main (void) { return aio_read (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_rt_aio_read=yes else $as_nop ac_cv_lib_rt_aio_read=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_aio_read" >&5 printf "%s\n" "$ac_cv_lib_rt_aio_read" >&6; } if test "x$ac_cv_lib_rt_aio_read" = xyes then : printf "%s\n" "#define HAVE_LIBRT 1" >>confdefs.h LIBS="-lrt $LIBS" fi #AC_CHECK_HEADERS([libaio.h]) #AC_CHECK_LIB([aio], [io_setup]) ac_fn_c_check_func "$LINENO" "copy_file_range" "ac_cv_func_copy_file_range" if test "x$ac_cv_func_copy_file_range" = xyes then : printf "%s\n" "#define HAVE_COPY_FILE_RANGE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "mkostemp" "ac_cv_func_mkostemp" if test "x$ac_cv_func_mkostemp" = xyes then : printf "%s\n" "#define HAVE_MKOSTEMP 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "mkdtemp" "ac_cv_func_mkdtemp" if test "x$ac_cv_func_mkdtemp" = xyes then : printf "%s\n" "#define HAVE_MKDTEMP 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "pread" "ac_cv_func_pread" if test "x$ac_cv_func_pread" = xyes then : printf "%s\n" "#define HAVE_PREAD 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "pwrite" "ac_cv_func_pwrite" if test "x$ac_cv_func_pwrite" = xyes then : printf "%s\n" "#define HAVE_PWRITE 1" >>confdefs.h fi # Depending on the user environment, the hdf5 library may link against some # mpi implementation, and bring surprising runtime behavior. # Check whether --enable-hdf5 was given. if test ${enable_hdf5+y} then : enableval=$enable_hdf5; enable_hdf5=$enableval else $as_nop enable_hdf5=no fi if test "x$enable_hdf5" != xno ; then # Check whether --with-hdf5-include-dir was given. if test ${with_hdf5_include_dir+y} then : withval=$with_hdf5_include_dir; hdf5_include_dir="$withval" else $as_nop hdf5_include_dir="" fi hdf5_inc_dir="/usr/include/hdf5 /usr/include/hdf5/serial ${hdf5_include_dir}" enable_include_hdf5=no for f in $hdf5_inc_dir; do if test -n "$f" ; then SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS="$CPPFLAGS -I$f" ac_fn_c_check_header_compile "$LINENO" "hdf5.h" "ac_cv_header_hdf5_h" "$ac_includes_default" if test "x$ac_cv_header_hdf5_h" = xyes then : printf "%s\n" "#define HAVE_HDF5_H 1" >>confdefs.h fi if test "$ac_cv_header_hdf5_h" = "yes" ; then CPPFLAGS="-I${f} ${SAVED_CFLAGS}" enable_include_hdf5=yes break else CPPFLAGS=${SAVED_CPPFLAGS} fi unset ac_cv_header_hdf5_h fi done # Check whether --with-hdf5-lib-dir was given. if test ${with_hdf5_lib_dir+y} then : withval=$with_hdf5_lib_dir; hdf5_libraries_dir="$withval" else $as_nop hdf5_libraries_dir="" fi hdf5_lib_dir="/usr/lib/x86_64-linux-gnu/hdf5 /usr/lib/x86_64-linux-gnu/hdf5/serial ${hdf5_libraries_dir}" enable_libraries_hdf5=no for f in $hdf5_lib_dir; do if test -n "$f" ; then SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS=-L${f} _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lhdf5" >&5 printf %s "checking for main in -lhdf5... " >&6; } if test ${ac_cv_lib_hdf5_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lhdf5 $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_hdf5_main=yes else $as_nop ac_cv_lib_hdf5_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hdf5_main" >&5 printf "%s\n" "$ac_cv_lib_hdf5_main" >&6; } if test "x$ac_cv_lib_hdf5_main" = xyes then : printf "%s\n" "#define HAVE_LIBHDF5 1" >>confdefs.h LIBS="-lhdf5 $LIBS" fi STARPU_HDF5_LDFLAGS="$STARPU_HDF5_LDFLAGS $LIBS" LIBS=$_LIBS_SAV if test "$ac_cv_lib_hdf5_main" = "yes" ; then LDFLAGS="-L${f} ${SAVED_LDFLAGS} ${STARPU_HDF5_LDFLAGS}" enable_libraries_hdf5=yes break else LDFLAGS=${SAVED_LDFLAGS} fi unset ac_cv_lib_hdf5_main fi done fi if test "x$enable_libraries_hdf5" = "xyes" -a "x$enable_include_hdf5" = "xyes" -a "x$enable_hdf5" != "xno"; then printf "%s\n" "#define STARPU_HAVE_HDF5 1" >>confdefs.h enable_hdf5=yes else enable_hdf5=no fi if test "x$enable_hdf5" = "xyes"; then STARPU_HAVE_HDF5_TRUE= STARPU_HAVE_HDF5_FALSE='#' else STARPU_HAVE_HDF5_TRUE='#' STARPU_HAVE_HDF5_FALSE= fi # This defines HAVE_SYNC_VAL_COMPARE_AND_SWAP { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_val_compare_and_swap" >&5 printf %s "checking whether the target supports __sync_val_compare_and_swap... " >&6; } if test ${ac_cv_have_sync_val_compare_and_swap+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar; int main (void) { bar = __sync_val_compare_and_swap(&foo, 0, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_val_compare_and_swap=yes else $as_nop ac_cv_have_sync_val_compare_and_swap=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_val_compare_and_swap" >&5 printf "%s\n" "$ac_cv_have_sync_val_compare_and_swap" >&6; } if test $ac_cv_have_sync_val_compare_and_swap = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_val_compare_and_swap_8" >&5 printf %s "checking whether the target supports __sync_val_compare_and_swap_8... " >&6; } if test ${ac_cv_have_sync_val_compare_and_swap_8+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int64_t foo, bar; int main (void) { bar = __sync_val_compare_and_swap(&foo, 0, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_val_compare_and_swap_8=yes else $as_nop ac_cv_have_sync_val_compare_and_swap_8=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_val_compare_and_swap_8" >&5 printf "%s\n" "$ac_cv_have_sync_val_compare_and_swap_8" >&6; } if test $ac_cv_have_sync_val_compare_and_swap_8 = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8 1" >>confdefs.h fi # This defines HAVE_SYNC_BOOL_COMPARE_AND_SWAP { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_bool_compare_and_swap" >&5 printf %s "checking whether the target supports __sync_bool_compare_and_swap... " >&6; } if test ${ac_cv_have_sync_bool_compare_and_swap+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar; int main (void) { bar = __sync_bool_compare_and_swap(&foo, 0, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_bool_compare_and_swap=yes else $as_nop ac_cv_have_sync_bool_compare_and_swap=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_bool_compare_and_swap" >&5 printf "%s\n" "$ac_cv_have_sync_bool_compare_and_swap" >&6; } if test $ac_cv_have_sync_bool_compare_and_swap = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_bool_compare_and_swap_8" >&5 printf %s "checking whether the target supports __sync_bool_compare_and_swap_8... " >&6; } if test ${ac_cv_have_sync_bool_compare_and_swap_8+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int64_t foo, bar; int main (void) { bar = __sync_bool_compare_and_swap(&foo, 0, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_bool_compare_and_swap_8=yes else $as_nop ac_cv_have_sync_bool_compare_and_swap_8=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_bool_compare_and_swap_8" >&5 printf "%s\n" "$ac_cv_have_sync_bool_compare_and_swap_8" >&6; } if test $ac_cv_have_sync_bool_compare_and_swap_8 = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8 1" >>confdefs.h fi # This defines HAVE_SYNC_FETCH_AND_ADD { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_fetch_and_add" >&5 printf %s "checking whether the target supports __sync_fetch_and_add... " >&6; } if test ${ac_cv_have_sync_fetch_and_add+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar; int main (void) { bar = __sync_fetch_and_add(&foo, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_fetch_and_add=yes else $as_nop ac_cv_have_sync_fetch_and_add=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_fetch_and_add" >&5 printf "%s\n" "$ac_cv_have_sync_fetch_and_add" >&6; } if test $ac_cv_have_sync_fetch_and_add = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_FETCH_AND_ADD 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_fetch_and_add_8" >&5 printf %s "checking whether the target supports __sync_fetch_and_add_8... " >&6; } if test ${ac_cv_have_sync_fetch_and_add_8+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int64_t foo, bar; int main (void) { bar = __sync_fetch_and_add(&foo, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_fetch_and_add_8=yes else $as_nop ac_cv_have_sync_fetch_and_add_8=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_fetch_and_add_8" >&5 printf "%s\n" "$ac_cv_have_sync_fetch_and_add_8" >&6; } if test $ac_cv_have_sync_fetch_and_add_8 = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_FETCH_AND_ADD_8 1" >>confdefs.h fi # This defines HAVE_SYNC_FETCH_AND_OR { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_fetch_and_or" >&5 printf %s "checking whether the target supports __sync_fetch_and_or... " >&6; } if test ${ac_cv_have_sync_fetch_and_or+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar; int main (void) { bar = __sync_fetch_and_or(&foo, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_fetch_and_or=yes else $as_nop ac_cv_have_sync_fetch_and_or=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_fetch_and_or" >&5 printf "%s\n" "$ac_cv_have_sync_fetch_and_or" >&6; } if test $ac_cv_have_sync_fetch_and_or = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_FETCH_AND_OR 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_fetch_and_or_8" >&5 printf %s "checking whether the target supports __sync_fetch_and_or_8... " >&6; } if test ${ac_cv_have_sync_fetch_and_or_8+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int64_t foo, bar; int main (void) { bar = __sync_fetch_and_or(&foo, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_fetch_and_or_8=yes else $as_nop ac_cv_have_sync_fetch_and_or_8=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_fetch_and_or_8" >&5 printf "%s\n" "$ac_cv_have_sync_fetch_and_or_8" >&6; } if test $ac_cv_have_sync_fetch_and_or_8 = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_FETCH_AND_OR_8 1" >>confdefs.h fi # This defines HAVE_SYNC_LOCK_TEST_AND_SET { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_lock_test_and_set" >&5 printf %s "checking whether the target supports __sync_lock_test_and_set... " >&6; } if test ${ac_cv_have_sync_lock_test_and_set+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar; int main (void) { bar = __sync_lock_test_and_set(&foo, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_lock_test_and_set=yes else $as_nop ac_cv_have_sync_lock_test_and_set=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_lock_test_and_set" >&5 printf "%s\n" "$ac_cv_have_sync_lock_test_and_set" >&6; } if test $ac_cv_have_sync_lock_test_and_set = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_LOCK_TEST_AND_SET 1" >>confdefs.h fi # This defines HAVE_ATOMIC_COMPARE_EXCHANGE_N { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_compare_exchange_n" >&5 printf %s "checking whether the target supports __atomic_compare_exchange_n... " >&6; } if test ${ac_cv_have_atomic_compare_exchange_n+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar, baz; int main (void) { baz = __atomic_compare_exchange_n(&foo, &bar, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_atomic_compare_exchange_n=yes else $as_nop ac_cv_have_atomic_compare_exchange_n=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_compare_exchange_n" >&5 printf "%s\n" "$ac_cv_have_atomic_compare_exchange_n" >&6; } if test $ac_cv_have_atomic_compare_exchange_n = yes; then printf "%s\n" "#define STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_compare_exchange_n_8" >&5 printf %s "checking whether the target supports __atomic_compare_exchange_n_8... " >&6; } if test ${ac_cv_have_atomic_compare_exchange_n_8+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int64_t foo, bar, baz; int main (void) { baz = __atomic_compare_exchange_n(&foo, &bar, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_atomic_compare_exchange_n_8=yes else $as_nop ac_cv_have_atomic_compare_exchange_n_8=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_compare_exchange_n_8" >&5 printf "%s\n" "$ac_cv_have_atomic_compare_exchange_n_8" >&6; } if test $ac_cv_have_atomic_compare_exchange_n_8 = yes; then printf "%s\n" "#define STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8 1" >>confdefs.h fi # This defines HAVE_ATOMIC_EXCHANGE_N { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_exchange_n" >&5 printf %s "checking whether the target supports __atomic_exchange_n... " >&6; } if test ${ac_cv_have_atomic_exchange_n+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar; int main (void) { bar = __atomic_exchange_n(&foo, 1, __ATOMIC_SEQ_CST); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_atomic_exchange_n=yes else $as_nop ac_cv_have_atomic_exchange_n=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_exchange_n" >&5 printf "%s\n" "$ac_cv_have_atomic_exchange_n" >&6; } if test $ac_cv_have_atomic_exchange_n = yes; then printf "%s\n" "#define STARPU_HAVE_ATOMIC_EXCHANGE_N 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_exchange_n_8" >&5 printf %s "checking whether the target supports __atomic_exchange_n_8... " >&6; } if test ${ac_cv_have_atomic_exchange_n_8+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int64_t foo, bar; int main (void) { bar = __atomic_exchange_n(&foo, 1, __ATOMIC_SEQ_CST); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_atomic_exchange_n_8=yes else $as_nop ac_cv_have_atomic_exchange_n_8=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_exchange_n_8" >&5 printf "%s\n" "$ac_cv_have_atomic_exchange_n_8" >&6; } if test $ac_cv_have_atomic_exchange_n_8 = yes; then printf "%s\n" "#define STARPU_HAVE_ATOMIC_EXCHANGE_N_8 1" >>confdefs.h fi # This defines HAVE_ATOMIC_FETCH_ADD { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_fetch_add" >&5 printf %s "checking whether the target supports __atomic_fetch_add... " >&6; } if test ${ac_cv_have_atomic_fetch_add+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar; int main (void) { bar = __atomic_fetch_add(&foo, 1, __ATOMIC_SEQ_CST); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_atomic_fetch_add=yes else $as_nop ac_cv_have_atomic_fetch_add=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_fetch_add" >&5 printf "%s\n" "$ac_cv_have_atomic_fetch_add" >&6; } if test $ac_cv_have_atomic_fetch_add = yes; then printf "%s\n" "#define STARPU_HAVE_ATOMIC_FETCH_ADD 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_fetch_add_8" >&5 printf %s "checking whether the target supports __atomic_fetch_add_8... " >&6; } if test ${ac_cv_have_atomic_fetch_add_8+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int64_t foo, bar; int main (void) { bar = __atomic_fetch_add(&foo, 1, __ATOMIC_SEQ_CST); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_atomic_fetch_add_8=yes else $as_nop ac_cv_have_atomic_fetch_add_8=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_fetch_add_8" >&5 printf "%s\n" "$ac_cv_have_atomic_fetch_add_8" >&6; } if test $ac_cv_have_atomic_fetch_add_8 = yes; then printf "%s\n" "#define STARPU_HAVE_ATOMIC_FETCH_ADD_8 1" >>confdefs.h fi # This defines HAVE_ATOMIC_FETCH_OR { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_fetch_or" >&5 printf %s "checking whether the target supports __atomic_fetch_or... " >&6; } if test ${ac_cv_have_atomic_fetch_or+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar; int main (void) { bar = __atomic_fetch_or(&foo, 1, __ATOMIC_SEQ_CST); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_atomic_fetch_or=yes else $as_nop ac_cv_have_atomic_fetch_or=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_fetch_or" >&5 printf "%s\n" "$ac_cv_have_atomic_fetch_or" >&6; } if test $ac_cv_have_atomic_fetch_or = yes; then printf "%s\n" "#define STARPU_HAVE_ATOMIC_FETCH_OR 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_fetch_or_8" >&5 printf %s "checking whether the target supports __atomic_fetch_or_8... " >&6; } if test ${ac_cv_have_atomic_fetch_or_8+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int64_t foo, bar; int main (void) { bar = __atomic_fetch_or(&foo, 1, __ATOMIC_SEQ_CST); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_atomic_fetch_or_8=yes else $as_nop ac_cv_have_atomic_fetch_or_8=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_fetch_or_8" >&5 printf "%s\n" "$ac_cv_have_atomic_fetch_or_8" >&6; } if test $ac_cv_have_atomic_fetch_or_8 = yes; then printf "%s\n" "#define STARPU_HAVE_ATOMIC_FETCH_OR_8 1" >>confdefs.h fi # This defines HAVE_ATOMIC_TEST_AND_SET { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __atomic_test_and_set" >&5 printf %s "checking whether the target supports __atomic_test_and_set... " >&6; } if test ${ac_cv_have_atomic_test_and_set+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo, bar; int main (void) { bar = __atomic_test_and_set(&foo, __ATOMIC_SEQ_CST); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_atomic_test_and_set=yes else $as_nop ac_cv_have_atomic_test_and_set=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_atomic_test_and_set" >&5 printf "%s\n" "$ac_cv_have_atomic_test_and_set" >&6; } if test $ac_cv_have_atomic_test_and_set = yes; then printf "%s\n" "#define STARPU_HAVE_ATOMIC_TEST_AND_SET 1" >>confdefs.h fi # This defines HAVE_SYNC_SYNCHRONIZE { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports __sync_synchronize" >&5 printf %s "checking whether the target supports __sync_synchronize... " >&6; } if test ${ac_cv_have_sync_synchronize+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { __sync_synchronize(); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_have_sync_synchronize=yes else $as_nop ac_cv_have_sync_synchronize=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sync_synchronize" >&5 printf "%s\n" "$ac_cv_have_sync_synchronize" >&6; } if test $ac_cv_have_sync_synchronize = yes; then printf "%s\n" "#define STARPU_HAVE_SYNC_SYNCHRONIZE 1" >>confdefs.h fi CPPFLAGS="${CPPFLAGS} -D_GNU_SOURCE " _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing set_mempolicy" >&5 printf %s "checking for library containing set_mempolicy... " >&6; } if test ${ac_cv_search_set_mempolicy+y} then : printf %s "(cached) " >&6 else $as_nop ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char set_mempolicy (); int main (void) { return set_mempolicy (); ; return 0; } _ACEOF for ac_lib in '' numa do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO" then : ac_cv_search_set_mempolicy=$ac_res fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext if test ${ac_cv_search_set_mempolicy+y} then : break fi done if test ${ac_cv_search_set_mempolicy+y} then : else $as_nop ac_cv_search_set_mempolicy=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_set_mempolicy" >&5 printf "%s\n" "$ac_cv_search_set_mempolicy" >&6; } ac_res=$ac_cv_search_set_mempolicy if test "$ac_res" != no then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" enable_libnuma=yes else $as_nop enable_libnuma=no fi STARPU_LIBNUMA_LDFLAGS="$STARPU_LIBNUMA_LDFLAGS $LIBS" LIBS=$_LIBS_SAV { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether libnuma is available" >&5 printf %s "checking whether libnuma is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_libnuma" >&5 printf "%s\n" "$enable_libnuma" >&6; } if test x$enable_libnuma = xyes; then printf "%s\n" "#define STARPU_HAVE_LIBNUMA 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether statement expressions are available" >&5 printf %s "checking whether statement expressions are available... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #define maxint(a,b) ({int _a = (a), _b = (b); _a > _b ? _a : _b; }) int main (void) { int x=maxint(12,42); ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : statement_expressions="yes" else $as_nop statement_expressions="no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $statement_expressions" >&5 printf "%s\n" "$statement_expressions" >&6; } if test x$statement_expressions = xyes; then printf "%s\n" "#define STARPU_HAVE_STATEMENT_EXPRESSIONS 1" >>confdefs.h fi saved_LIBS="${LIBS}" LIBS="${LIBS} -ldl" STARPU_DLOPEN_LDFLAGS="" for ac_func in dlopen do : ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" if test "x$ac_cv_func_dlopen" = xyes then : printf "%s\n" "#define HAVE_DLOPEN 1" >>confdefs.h STARPU_DLOPEN_LDFLAGS="-ldl" fi done LIBS="$saved_LIBS" ############################################################################### # # # SCHED_CTX settings # # # ############################################################################### { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of sched_ctxs" >&5 printf %s "checking maximum number of sched_ctxs... " >&6; } # Check whether --enable-max_sched_ctxs was given. if test ${enable_max_sched_ctxs+y} then : enableval=$enable_max_sched_ctxs; max_sched_ctxs=$enableval else $as_nop max_sched_ctxs=10 fi if test x$max_sched_ctxs = x -o x$max_sched_ctxs = xyes then as_fn_error $? "The --enable-max_sched_ctxs option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $max_sched_ctxs" >&5 printf "%s\n" "$max_sched_ctxs" >&6; } printf "%s\n" "#define STARPU_NMAX_SCHED_CTXS $max_sched_ctxs" >>confdefs.h # Check whether --enable-sc_hypervisor was given. if test ${enable_sc_hypervisor+y} then : enableval=$enable_sc_hypervisor; enable_sc_hypervisor="yes" else $as_nop enable_sc_hypervisor="no" fi #for pkgconfig if test "x$enable_sc_hypervisor" = "xyes"; then printf "%s\n" "#define STARPU_USE_SC_HYPERVISOR 1" >>confdefs.h # PKG_CHECK_MODULES([SC_HYPERVISOR], [libsc_hypervisor], [], build_sc_hypervisor="yes") STARPU_SC_HYPERVISOR="-lsc_hypervisor" build_sc_hypervisor="yes" else build_sc_hypervisor="no" fi if test "x$build_sc_hypervisor" = "xyes"; then STARPU_BUILD_SC_HYPERVISOR_TRUE= STARPU_BUILD_SC_HYPERVISOR_FALSE='#' else STARPU_BUILD_SC_HYPERVISOR_TRUE='#' STARPU_BUILD_SC_HYPERVISOR_FALSE= fi if test "x$build_sc_hypervisor" = "xyes"; then STARPU_USE_SC_HYPERVISOR_TRUE= STARPU_USE_SC_HYPERVISOR_FALSE='#' else STARPU_USE_SC_HYPERVISOR_TRUE='#' STARPU_USE_SC_HYPERVISOR_FALSE= fi # Check whether --enable-sc_hypervisor_debug was given. if test ${enable_sc_hypervisor_debug+y} then : enableval=$enable_sc_hypervisor_debug; enable_sc_hypervisor_debug="yes" else $as_nop enable_sc_hypervisor_debug="no" fi STARPU_SC_HYPERVISOR_DEBUG=$enable_sc_hypervisor_debug if test "x$enable_sc_hypervisor_debug" = "xyes"; then STARPU_SC_HYPERVISOR_DEBUG_TRUE= STARPU_SC_HYPERVISOR_DEBUG_FALSE='#' else STARPU_SC_HYPERVISOR_DEBUG_TRUE='#' STARPU_SC_HYPERVISOR_DEBUG_FALSE= fi if test "x$enable_sc_hypervisor_debug" = "xyes"; then printf "%s\n" "#define STARPU_SC_HYPERVISOR_DEBUG 1" >>confdefs.h fi ############################################################################### # # # CPUs settings # # # ############################################################################### { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of CPUs" >&5 printf %s "checking maximum number of CPUs... " >&6; } # Check whether --enable-maxcpus was given. if test ${enable_maxcpus+y} then : enableval=$enable_maxcpus; maxcpus=$enableval else $as_nop maxcpus=auto fi if test x$maxcpus = xauto then confcpu=$(getconf _NPROCESSORS_ONLN 2>/dev/null) if test x$confcpu = x then as_fn_error $? "cannot get the number of CPUS, please specify a numerical value with --enable-maxcpus" "$LINENO" 5 fi maxcpus=2 while test $maxcpus -lt $confcpu do maxcpus=`expr $maxcpus \* 2` done fi if test x$maxcpus = x -o x$maxcpus = xyes then as_fn_error $? "The --enable-maxcpus option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $maxcpus" >&5 printf "%s\n" "$maxcpus" >&6; } printf "%s\n" "#define STARPU_MAXCPUS $maxcpus" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CPUs should be used" >&5 printf %s "checking whether CPUs should be used... " >&6; } # Check whether --enable-cpu was given. if test ${enable_cpu+y} then : enableval=$enable_cpu; enable_cpu=$enableval else $as_nop enable_cpu=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_cpu" >&5 printf "%s\n" "$enable_cpu" >&6; } STARPU_USE_CPU=$enable_cpu if test x$enable_cpu = xyes; then STARPU_USE_CPU_TRUE= STARPU_USE_CPU_FALSE='#' else STARPU_USE_CPU_TRUE='#' STARPU_USE_CPU_FALSE= fi if test x$enable_cpu = xyes; then printf "%s\n" "#define STARPU_USE_CPU 1" >>confdefs.h fi ############################################################################### # # # CUDA settings # # # ############################################################################### { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of CUDA devices" >&5 printf %s "checking maximum number of CUDA devices... " >&6; } # Check whether --enable-maxcudadev was given. if test ${enable_maxcudadev+y} then : enableval=$enable_maxcudadev; nmaxcudadev=$enableval else $as_nop nmaxcudadev=4 fi if test x$nmaxcudadev = x -o x$nmaxcudadev = xyes then as_fn_error $? "The --enable-maxcudadev option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxcudadev" >&5 printf "%s\n" "$nmaxcudadev" >&6; } printf "%s\n" "#define STARPU_MAXCUDADEVS $nmaxcudadev" >>confdefs.h # Check whether --enable-cuda was given. if test ${enable_cuda+y} then : enableval=$enable_cuda; else $as_nop enable_cuda=maybe fi # We don't want to be hit by conflicts between simgrid, boost, and CUDA if test x$enable_simgrid = xyes; then if test x$enable_cuda = xyes; then as_fn_error $? "Building against CUDA should not be enabled with simgrid" "$LINENO" 5 fi enable_cuda=no fi #AC_MSG_CHECKING(whether CUDA is available) # Check whether --with-cuda-dir was given. if test ${with_cuda_dir+y} then : withval=$with_cuda_dir; cuda_dir="$withval" # in case this was not explicit yet enable_cuda=yes else $as_nop cuda_dir=no fi # Check whether --with-cuda-include-dir was given. if test ${with_cuda_include_dir+y} then : withval=$with_cuda_include_dir; cuda_include_dir="$withval" # in case this was not explicit yet enable_cuda=yes else $as_nop cuda_include_dir=no fi # Check whether --with-cuda-lib-dir was given. if test ${with_cuda_lib_dir+y} then : withval=$with_cuda_lib_dir; cuda_lib_dir="$withval" # in case this was not explicit yet enable_cuda=yes else $as_nop cuda_lib_dir=no fi if test x$enable_cuda = xyes -o x$enable_cuda = xmaybe; then __cuda_dir="$cuda_dir" __cuda_include_dir="$cuda_include_dir" __cuda_lib_dir="$cuda_lib_dir" if test -z "$__cuda_lib_dir" ; then __cuda_lib_dir=no fi if test -z "$__cuda_include_dir" ; then __cuda_include_dir=no fi if test -z "$__cuda_dir" ; then __cuda_dir=no fi if test "$__cuda_dir" != "no" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available in $__cuda_dir" >&5 printf %s "checking whether CUDA is available in $__cuda_dir... " >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available" >&5 printf %s "checking whether CUDA is available... " >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then __cuda_include_dir="$__cuda_dir/include" fi SAVED_CPPFLAGS="$CPPFLAGS" have_valid_cuda=no if test "$__cuda_include_dir" != "no" ; then CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" fi ac_fn_c_check_header_compile "$LINENO" "cuda.h" "ac_cv_header_cuda_h" "$ac_includes_default" if test "x$ac_cv_header_cuda_h" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi unset ac_cv_header_cuda_h if test "$have_valid_cuda" = "yes" ; then if test "$__cuda_lib_dir" != "no" ; then __cuda_L="-L${__cuda_lib_dir}" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" else if test "$__cuda_dir" != "no" ; then for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do __cuda_L="-L${__cuda_dir}/${__cuda_libdir}" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" if test "$have_valid_cuda" = yes ; then break fi done else __cuda_L="" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" fi fi fi if test "$have_valid_cuda" = "no" ; then CPPFLAGS="${SAVED_CPPFLAGS}" unset STARPU_CUDA_LDFLAGS else if test "$NVCC" = "" ; then # Extract the first word of "nvcc", so it can be a program name with args. set dummy nvcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_NVCC+y} then : printf %s "(cached) " >&6 else $as_nop case $NVCC in [\\/]* | ?:[\\/]*) ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" ;; esac fi NVCC=$ac_cv_path_NVCC if test -n "$NVCC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 printf "%s\n" "$NVCC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test "x$NVCC" = "xnot-found"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'nvcc' not found, disabling CUDA" >&5 printf "%s\n" "$as_me: WARNING: 'nvcc' not found, disabling CUDA" >&2;} have_valid_cuda=no else # This is for very old cuda, to enable the use of double etc. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports sm_13 architecture" >&5 printf %s "checking whether nvcc supports sm_13 architecture... " >&6; } OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -arch sm_13" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } NVCCFLAGS="$OLD_NVCCFLAGS" fi # This is for recent cuda, which complains if we don't actually set an arch!? { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports -Wno-deprecated-gpu-targets" >&5 printf %s "checking whether nvcc supports -Wno-deprecated-gpu-targets... " >&6; } OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } NVCCFLAGS="$OLD_NVCCFLAGS" fi rm -f cuda_test* fi if test -n "$NVCC_CC"; then NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" fi if test "$__cuda_include_dir" != "no"; then STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" fi fi if test "$have_valid_cuda" = "no" ; then __cuda_dir="$CUDA_ROOT" __cuda_include_dir="$CUDA_INC_PATH" __cuda_lib_dir="$CUDA_LIB_PATH" if test -z "$__cuda_lib_dir" ; then __cuda_lib_dir=no fi if test -z "$__cuda_include_dir" ; then __cuda_include_dir=no fi if test -z "$__cuda_dir" ; then __cuda_dir=no fi if test "$__cuda_dir" != "no" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available in $__cuda_dir" >&5 printf %s "checking whether CUDA is available in $__cuda_dir... " >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available" >&5 printf %s "checking whether CUDA is available... " >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then __cuda_include_dir="$__cuda_dir/include" fi SAVED_CPPFLAGS="$CPPFLAGS" have_valid_cuda=no if test "$__cuda_include_dir" != "no" ; then CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" fi ac_fn_c_check_header_compile "$LINENO" "cuda.h" "ac_cv_header_cuda_h" "$ac_includes_default" if test "x$ac_cv_header_cuda_h" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi unset ac_cv_header_cuda_h if test "$have_valid_cuda" = "yes" ; then if test "$__cuda_lib_dir" != "no" ; then __cuda_L="-L${__cuda_lib_dir}" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" else if test "$__cuda_dir" != "no" ; then for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do __cuda_L="-L${__cuda_dir}/${__cuda_libdir}" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" if test "$have_valid_cuda" = yes ; then break fi done else __cuda_L="" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" fi fi fi if test "$have_valid_cuda" = "no" ; then CPPFLAGS="${SAVED_CPPFLAGS}" unset STARPU_CUDA_LDFLAGS else if test "$NVCC" = "" ; then # Extract the first word of "nvcc", so it can be a program name with args. set dummy nvcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_NVCC+y} then : printf %s "(cached) " >&6 else $as_nop case $NVCC in [\\/]* | ?:[\\/]*) ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" ;; esac fi NVCC=$ac_cv_path_NVCC if test -n "$NVCC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 printf "%s\n" "$NVCC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test "x$NVCC" = "xnot-found"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'nvcc' not found, disabling CUDA" >&5 printf "%s\n" "$as_me: WARNING: 'nvcc' not found, disabling CUDA" >&2;} have_valid_cuda=no else # This is for very old cuda, to enable the use of double etc. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports sm_13 architecture" >&5 printf %s "checking whether nvcc supports sm_13 architecture... " >&6; } OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -arch sm_13" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } NVCCFLAGS="$OLD_NVCCFLAGS" fi # This is for recent cuda, which complains if we don't actually set an arch!? { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports -Wno-deprecated-gpu-targets" >&5 printf %s "checking whether nvcc supports -Wno-deprecated-gpu-targets... " >&6; } OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } NVCCFLAGS="$OLD_NVCCFLAGS" fi rm -f cuda_test* fi if test -n "$NVCC_CC"; then NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" fi if test "$__cuda_include_dir" != "no"; then STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" fi fi fi if test "$have_valid_cuda" = "no" ; then if test "$NVCC" = "" ; then # Extract the first word of "nvcc", so it can be a program name with args. set dummy nvcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_NVCC+y} then : printf %s "(cached) " >&6 else $as_nop case $NVCC in [\\/]* | ?:[\\/]*) ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$PATH:/usr/local/cuda/bin" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" ;; esac fi NVCC=$ac_cv_path_NVCC if test -n "$NVCC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 printf "%s\n" "$NVCC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test "$NVCC" != not-found ; then CUDA_ROOT="$(dirname $NVCC)/.." # Try to find all of cuda just from the availability of nvcc in PATH __cuda_dir="$CUDA_ROOT" __cuda_include_dir="$CUDA_ROOT/include" __cuda_lib_dir="$CUDA_ROOT/lib" if test -z "$__cuda_lib_dir" ; then __cuda_lib_dir=no fi if test -z "$__cuda_include_dir" ; then __cuda_include_dir=no fi if test -z "$__cuda_dir" ; then __cuda_dir=no fi if test "$__cuda_dir" != "no" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available in $__cuda_dir" >&5 printf %s "checking whether CUDA is available in $__cuda_dir... " >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available" >&5 printf %s "checking whether CUDA is available... " >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then __cuda_include_dir="$__cuda_dir/include" fi SAVED_CPPFLAGS="$CPPFLAGS" have_valid_cuda=no if test "$__cuda_include_dir" != "no" ; then CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" fi ac_fn_c_check_header_compile "$LINENO" "cuda.h" "ac_cv_header_cuda_h" "$ac_includes_default" if test "x$ac_cv_header_cuda_h" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi unset ac_cv_header_cuda_h if test "$have_valid_cuda" = "yes" ; then if test "$__cuda_lib_dir" != "no" ; then __cuda_L="-L${__cuda_lib_dir}" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" else if test "$__cuda_dir" != "no" ; then for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do __cuda_L="-L${__cuda_dir}/${__cuda_libdir}" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" if test "$have_valid_cuda" = yes ; then break fi done else __cuda_L="" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" fi fi fi if test "$have_valid_cuda" = "no" ; then CPPFLAGS="${SAVED_CPPFLAGS}" unset STARPU_CUDA_LDFLAGS else if test "$NVCC" = "" ; then # Extract the first word of "nvcc", so it can be a program name with args. set dummy nvcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_NVCC+y} then : printf %s "(cached) " >&6 else $as_nop case $NVCC in [\\/]* | ?:[\\/]*) ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" ;; esac fi NVCC=$ac_cv_path_NVCC if test -n "$NVCC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 printf "%s\n" "$NVCC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test "x$NVCC" = "xnot-found"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'nvcc' not found, disabling CUDA" >&5 printf "%s\n" "$as_me: WARNING: 'nvcc' not found, disabling CUDA" >&2;} have_valid_cuda=no else # This is for very old cuda, to enable the use of double etc. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports sm_13 architecture" >&5 printf %s "checking whether nvcc supports sm_13 architecture... " >&6; } OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -arch sm_13" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } NVCCFLAGS="$OLD_NVCCFLAGS" fi # This is for recent cuda, which complains if we don't actually set an arch!? { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports -Wno-deprecated-gpu-targets" >&5 printf %s "checking whether nvcc supports -Wno-deprecated-gpu-targets... " >&6; } OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } NVCCFLAGS="$OLD_NVCCFLAGS" fi rm -f cuda_test* fi if test -n "$NVCC_CC"; then NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" fi if test "$__cuda_include_dir" != "no"; then STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" fi fi cuda_dir=$(dirname $NVCC)/.. else unset NVCC fi fi if test "$have_valid_cuda" = "no" ; then for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_ROOT" "$CUDA_PATH" "$CUDA_INC_PATH/.." "$CUDA_INC/.." "$CUDA_BIN/.." "$CUDA_SDK/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do if test -n "$f" ; then __cuda_dir="$f" __cuda_include_dir="no" __cuda_lib_dir="no" if test -z "$__cuda_lib_dir" ; then __cuda_lib_dir=no fi if test -z "$__cuda_include_dir" ; then __cuda_include_dir=no fi if test -z "$__cuda_dir" ; then __cuda_dir=no fi if test "$__cuda_dir" != "no" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available in $__cuda_dir" >&5 printf %s "checking whether CUDA is available in $__cuda_dir... " >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is available" >&5 printf %s "checking whether CUDA is available... " >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then __cuda_include_dir="$__cuda_dir/include" fi SAVED_CPPFLAGS="$CPPFLAGS" have_valid_cuda=no if test "$__cuda_include_dir" != "no" ; then CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" fi ac_fn_c_check_header_compile "$LINENO" "cuda.h" "ac_cv_header_cuda_h" "$ac_includes_default" if test "x$ac_cv_header_cuda_h" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi unset ac_cv_header_cuda_h if test "$have_valid_cuda" = "yes" ; then if test "$__cuda_lib_dir" != "no" ; then __cuda_L="-L${__cuda_lib_dir}" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" else if test "$__cuda_dir" != "no" ; then for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do __cuda_L="-L${__cuda_dir}/${__cuda_libdir}" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" if test "$have_valid_cuda" = yes ; then break fi done else __cuda_L="" SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA library is available in $__cuda_L" >&5 printf %s "checking whether CUDA library is available in $__cuda_L... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcudart" >&5 printf %s "checking for main in -lcudart... " >&6; } if test ${ac_cv_lib_cudart_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cudart_main=yes else $as_nop ac_cv_lib_cudart_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_main" >&5 printf "%s\n" "$ac_cv_lib_cudart_main" >&6; } if test "x$ac_cv_lib_cudart_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cudart=ac_cv_lib_cudart_main unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcublas" >&5 printf %s "checking for main in -lcublas... " >&6; } if test ${ac_cv_lib_cublas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublas_main=yes else $as_nop ac_cv_lib_cublas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublas_main" >&5 printf "%s\n" "$ac_cv_lib_cublas_main" >&6; } if test "x$ac_cv_lib_cublas_main" = xyes then : have_valid_cuda=yes else $as_nop have_valid_cuda=no fi ac_cv_lib_cublas=ac_cv_lib_cublas_main unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" fi fi fi if test "$have_valid_cuda" = "no" ; then CPPFLAGS="${SAVED_CPPFLAGS}" unset STARPU_CUDA_LDFLAGS else if test "$NVCC" = "" ; then # Extract the first word of "nvcc", so it can be a program name with args. set dummy nvcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_NVCC+y} then : printf %s "(cached) " >&6 else $as_nop case $NVCC in [\\/]* | ?:[\\/]*) ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="not-found" ;; esac fi NVCC=$ac_cv_path_NVCC if test -n "$NVCC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 printf "%s\n" "$NVCC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi if test "x$NVCC" = "xnot-found"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'nvcc' not found, disabling CUDA" >&5 printf "%s\n" "$as_me: WARNING: 'nvcc' not found, disabling CUDA" >&2;} have_valid_cuda=no else # This is for very old cuda, to enable the use of double etc. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports sm_13 architecture" >&5 printf %s "checking whether nvcc supports sm_13 architecture... " >&6; } OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -arch sm_13" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } NVCCFLAGS="$OLD_NVCCFLAGS" fi # This is for recent cuda, which complains if we don't actually set an arch!? { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvcc supports -Wno-deprecated-gpu-targets" >&5 printf %s "checking whether nvcc supports -Wno-deprecated-gpu-targets... " >&6; } OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } NVCCFLAGS="$OLD_NVCCFLAGS" fi rm -f cuda_test* fi if test -n "$NVCC_CC"; then NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" fi if test "$__cuda_include_dir" != "no"; then STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" fi fi if test "$have_valid_cuda" = "yes" ; then break fi fi done fi # Check cuda is compatible with the C compiler { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA is working" >&5 printf %s "checking whether CUDA is working... " >&6; } if test "$have_valid_cuda" = "yes" ; then SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}" SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS} -lcudart" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : if test "$cross_compiling" = yes then : { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run test program while cross compiling See \`config.log' for more details" "$LINENO" 5; } else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO" then : have_valid_cuda="yes" else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CUDA found and can be compiled, but compiled application can not be run, is the CUDA path missing in LD_LIBRARY_PATH?" >&5 printf "%s\n" "CUDA found and can be compiled, but compiled application can not be run, is the CUDA path missing in LD_LIBRARY_PATH?" >&6; } have_valid_cuda="no" fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi else $as_nop as_fn_error $? "CUDA found, but cuda.h could not be compiled" "$LINENO" 5 have_valid_cuda="no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CPPFLAGS="${SAVED_CPPFLAGS}" LDFLAGS="${SAVED_LDFLAGS}" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_valid_cuda" >&5 printf "%s\n" "$have_valid_cuda" >&6; } # in case CUDA was explicitly required, but is not available, this is an error if test x$enable_cuda = xyes -a x$have_valid_cuda = xno; then as_fn_error $? "cannot find CUDA" "$LINENO" 5 fi # now we enable CUDA if and only if a proper setup is available enable_cuda=$have_valid_cuda fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA should be used" >&5 printf %s "checking whether CUDA should be used... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_cuda" >&5 printf "%s\n" "$enable_cuda" >&6; } STARPU_USE_CUDA=$enable_cuda if test x$enable_cuda = xyes; then STARPU_USE_CUDA_TRUE= STARPU_USE_CUDA_FALSE='#' else STARPU_USE_CUDA_TRUE='#' STARPU_USE_CUDA_FALSE= fi cc_or_nvcc=$CC if test x$enable_cuda = xyes; then cc_or_nvcc=$NVCC printf "%s\n" "#define STARPU_USE_CUDA 1" >>confdefs.h # On Darwin, the libstdc++ dependency is not automatically added by nvcc # case "$target" in # *-*darwin*) AC_HAVE_LIBRARY([stdc++], []) ;; # #*-*darwin*) AC_HAVE_LIBRARY([stdc++], [STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lstdc++"]) ;; # esac STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcudart -lcublas" STARPU_CUFFT_LDFLAGS="-lcufft" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu case \ $NVCCFLAGS\ in *\ -std=*\ *) ;; *) SAVED_CXX="$CXX" CXX="$NVCC" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef STARPU_HAVE_SIMGRID_MSG_H #include #include #else #include #endif int main (void) { ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO" then : else $as_nop NVCCFLAGS="-std=c++11 $NVCCFLAGS" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CXX="$SAVED_CXX" esac ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test "$F77" = "gfortran" -o "$FC" = "gfortran" ; then STARPU_CUDA_FORTRAN_LDFLAGS="-lgfortran" fi #in case this is a 64bit setup, we tell nvcc to use a -m64 flag, if missing from existing flags if test x$SIZEOF_VOID_P = x8; then case \ $NVCCFLAGS\ in *\ -m64\ *) ;; *) NVCCFLAGS="${NVCCFLAGS} -m64" ;; esac fi SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}" SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" SAVED_LIBS="${LIBS}" ac_fn_c_check_header_compile "$LINENO" "cuda_gl_interop.h" "ac_cv_header_cuda_gl_interop_h" "$ac_includes_default" if test "x$ac_cv_header_cuda_gl_interop_h" = xyes then : printf "%s\n" "#define HAVE_CUDA_GL_INTEROP_H 1" >>confdefs.h fi for ac_header in cublasLt.h do : ac_fn_c_check_header_compile "$LINENO" "cublasLt.h" "ac_cv_header_cublasLt_h" "$ac_includes_default" if test "x$ac_cv_header_cublasLt_h" = xyes then : printf "%s\n" "#define HAVE_CUBLASLT_H 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cublasLtCreate in -lcublasLt" >&5 printf %s "checking for cublasLtCreate in -lcublasLt... " >&6; } if test ${ac_cv_lib_cublasLt_cublasLtCreate+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcublasLt $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char cublasLtCreate (); int main (void) { return cublasLtCreate (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cublasLt_cublasLtCreate=yes else $as_nop ac_cv_lib_cublasLt_cublasLtCreate=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cublasLt_cublasLtCreate" >&5 printf "%s\n" "$ac_cv_lib_cublasLt_cublasLtCreate" >&6; } if test "x$ac_cv_lib_cublasLt_cublasLtCreate" = xyes then : printf "%s\n" "#define STARPU_HAVE_LIBCUBLASLT 1" >>confdefs.h STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcublasLt" fi fi done { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cusparseCreate in -lcusparse" >&5 printf %s "checking for cusparseCreate in -lcusparse... " >&6; } if test ${ac_cv_lib_cusparse_cusparseCreate+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcusparse $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char cusparseCreate (); int main (void) { return cusparseCreate (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cusparse_cusparseCreate=yes else $as_nop ac_cv_lib_cusparse_cusparseCreate=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cusparse_cusparseCreate" >&5 printf "%s\n" "$ac_cv_lib_cusparse_cusparseCreate" >&6; } if test "x$ac_cv_lib_cusparse_cusparseCreate" = xyes then : printf "%s\n" "#define STARPU_HAVE_LIBCUSPARSE 1" >>confdefs.h STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcusparse" fi ac_fn_check_decl "$LINENO" "cusparseSetStream" "ac_cv_have_decl_cusparseSetStream" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_cusparseSetStream" = xyes then : ac_have_decl=1 else $as_nop ac_have_decl=0 fi printf "%s\n" "#define HAVE_DECL_CUSPARSESETSTREAM $ac_have_decl" >>confdefs.h # we also check that CuSolver is available { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cusolverDnCreate in -lcusolver" >&5 printf %s "checking for cusolverDnCreate in -lcusolver... " >&6; } if test ${ac_cv_lib_cusolver_cusolverDnCreate+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcusolver $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char cusolverDnCreate (); int main (void) { return cusolverDnCreate (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cusolver_cusolverDnCreate=yes else $as_nop ac_cv_lib_cusolver_cusolverDnCreate=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cusolver_cusolverDnCreate" >&5 printf "%s\n" "$ac_cv_lib_cusolver_cusolverDnCreate" >&6; } if test "x$ac_cv_lib_cusolver_cusolverDnCreate" = xyes then : printf "%s\n" "#define STARPU_HAVE_LIBCUSOLVER 1" >>confdefs.h STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcusolver" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether nvidia-ml can be used" >&5 printf %s "checking whether nvidia-ml can be used... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { __typeof__(nvmlInit) *mynvmlInit = nvmlInit; mynvmlInit(); ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : printf "%s\n" "#define STARPU_HAVE_NVML_H 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } ac_fn_check_decl "$LINENO" "nvmlDeviceGetTotalEnergyConsumption" "ac_cv_have_decl_nvmlDeviceGetTotalEnergyConsumption" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_nvmlDeviceGetTotalEnergyConsumption" = xyes then : ac_have_decl=1 else $as_nop ac_have_decl=0 fi printf "%s\n" "#define HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION $ac_have_decl" >>confdefs.h else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: nvml.h could not be compiled. This will prevent from correct understanding of the machine topology." >&5 printf "%s\n" "$as_me: WARNING: nvml.h could not be compiled. This will prevent from correct understanding of the machine topology." >&2;} NO_NVML="Warning: no nvml.h found" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CPPFLAGS="${SAVED_CPPFLAGS}" LDFLAGS="${SAVED_LDFLAGS}" LIBS="${SAVED_LIBS}" fi CC_OR_NVCC=$cc_or_nvcc have_magma=no if test x$enable_cuda = xyes; then pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MAGMA" >&5 printf %s "checking for MAGMA... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$MAGMA_CFLAGS"; then pkg_cv_MAGMA_CFLAGS="$MAGMA_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"magma\""; } >&5 ($PKG_CONFIG --exists --print-errors "magma") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_MAGMA_CFLAGS=`$PKG_CONFIG --cflags "magma" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$MAGMA_LIBS"; then pkg_cv_MAGMA_LIBS="$MAGMA_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"magma\""; } >&5 ($PKG_CONFIG --exists --print-errors "magma") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_MAGMA_LIBS=`$PKG_CONFIG --libs "magma" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then MAGMA_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "magma"` else MAGMA_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "magma"` fi # Put the nasty error message in config.log where it belongs echo "$MAGMA_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } : elif test $pkg_failed = untried; then : else MAGMA_CFLAGS=$pkg_cv_MAGMA_CFLAGS MAGMA_LIBS=$pkg_cv_MAGMA_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } printf "%s\n" "#define STARPU_HAVE_MAGMA 1" >>confdefs.h STARPU_HAVE_MAGMA=1 have_magma=yes fi fi if test x$have_magma = xyes; then STARPU_HAVE_MAGMA_TRUE= STARPU_HAVE_MAGMA_FALSE='#' else STARPU_HAVE_MAGMA_TRUE='#' STARPU_HAVE_MAGMA_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MAGMA should be used" >&5 printf %s "checking whether MAGMA should be used... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_magma" >&5 printf "%s\n" "$have_magma" >&6; } # cufftDoubleComplex may not be available on an old CUDA setup ac_fn_c_check_type "$LINENO" "cufftDoubleComplex" "ac_cv_type_cufftDoubleComplex" "#include " if test "x$ac_cv_type_cufftDoubleComplex" = xyes then : have_cufftdoublecomplex=yes else $as_nop have_cufftdoublecomplex=no fi if test x$have_cufftdoublecomplex = xyes; then STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE= STARPU_HAVE_CUFFTDOUBLECOMPLEX_FALSE='#' else STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE='#' STARPU_HAVE_CUFFTDOUBLECOMPLEX_FALSE= fi if test x$have_cufftdoublecomplex = xyes; then printf "%s\n" "#define STARPU_HAVE_CUFFTDOUBLECOMPLEX 1" >>confdefs.h fi # The CURAND library is only available since CUDA 3.2 have_curand=$enable_cuda if test x$enable_cuda = xyes; then SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lcurand" >&5 printf %s "checking for main in -lcurand... " >&6; } if test ${ac_cv_lib_curand_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcurand $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_curand_main=yes else $as_nop ac_cv_lib_curand_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curand_main" >&5 printf "%s\n" "$ac_cv_lib_curand_main" >&6; } if test "x$ac_cv_lib_curand_main" = xyes then : have_curand=yes else $as_nop have_curand=no fi ac_cv_lib_curand=ac_cv_lib_curand_main LDFLAGS="${SAVED_LDFLAGS}" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CURAND is available" >&5 printf %s "checking whether CURAND is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_curand" >&5 printf "%s\n" "$have_curand" >&6; } if test x$have_curand = xyes; then printf "%s\n" "#define STARPU_HAVE_CURAND 1" >>confdefs.h STARPU_CURAND_LDFLAGS="$STARPU_CURAND_LDFLAGS -lcurand" fi # Peer transfers are only supported since CUDA 4.0 # Disable them if user explicitly wants to disable them # Check whether --enable-cuda_memcpy_peer was given. if test ${enable_cuda_memcpy_peer+y} then : enableval=$enable_cuda_memcpy_peer; else $as_nop enable_cuda_memcpy_peer=$enable_cuda fi if test x$enable_cuda_memcpy_peer = xyes; then printf "%s\n" "#define STARPU_HAVE_CUDA_MEMCPY_PEER 1" >>confdefs.h fi # Check whether --enable-cuda_map was given. if test ${enable_cuda_map+y} then : enableval=$enable_cuda_map; else $as_nop enable_cuda_map=yes fi if test x$enable_cuda_map = xyes -a x$enable_cuda = xyes ; then SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "canMapHostMemory" "ac_cv_member_struct_cudaDeviceProp_canMapHostMemory" "#include " if test "x$ac_cv_member_struct_cudaDeviceProp_canMapHostMemory" = xyes then : printf "%s\n" "#define STARPU_HAVE_CUDA_CANMAPHOST 1" >>confdefs.h fi ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "unifiedAddressing" "ac_cv_member_struct_cudaDeviceProp_unifiedAddressing" "#include " if test "x$ac_cv_member_struct_cudaDeviceProp_unifiedAddressing" = xyes then : printf "%s\n" "#define STARPU_HAVE_CUDA_UNIFIEDADDR 1" >>confdefs.h fi ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "managedMemory" "ac_cv_member_struct_cudaDeviceProp_managedMemory" "#include " if test "x$ac_cv_member_struct_cudaDeviceProp_managedMemory" = xyes then : printf "%s\n" "#define STARPU_HAVE_CUDA_MNGMEM 1" >>confdefs.h fi ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "pageableMemoryAccess" "ac_cv_member_struct_cudaDeviceProp_pageableMemoryAccess" "#include " if test "x$ac_cv_member_struct_cudaDeviceProp_pageableMemoryAccess" = xyes then : printf "%s\n" "#define STARPU_HAVE_CUDA_PAGEABLEMEM 1" >>confdefs.h fi ac_fn_c_check_member "$LINENO" "struct cudaPointerAttributes" "type" "ac_cv_member_struct_cudaPointerAttributes_type" "#include " if test "x$ac_cv_member_struct_cudaPointerAttributes_type" = xyes then : printf "%s\n" "#define STARPU_HAVE_CUDA_POINTER_TYPE 1" >>confdefs.h fi LDFLAGS="${SAVED_LDFLAGS}" printf "%s\n" "#define STARPU_USE_CUDA_MAP 1" >>confdefs.h fi if test x$enable_cuda = xyes; then # Check whether --enable-cuda0 was given. if test ${enable_cuda0+y} then : enableval=$enable_cuda0; else $as_nop enable_cuda0=no fi if test x$enable_cuda0 = xyes; then printf "%s\n" "#define STARPU_USE_CUDA0 1" >>confdefs.h fi # Check whether --enable-cuda1 was given. if test ${enable_cuda1+y} then : enableval=$enable_cuda1; else $as_nop enable_cuda1=no fi if test x$enable_cuda1 = xyes; then printf "%s\n" "#define STARPU_USE_CUDA1 1" >>confdefs.h fi if test x$starpu_windows != xyes ; then STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lstdc++" fi fi if test x$enable_cuda0 = xyes; then STARPU_USE_CUDA0_TRUE= STARPU_USE_CUDA0_FALSE='#' else STARPU_USE_CUDA0_TRUE='#' STARPU_USE_CUDA0_FALSE= fi if test x$enable_cuda1 = xyes; then STARPU_USE_CUDA1_TRUE= STARPU_USE_CUDA1_FALSE='#' else STARPU_USE_CUDA1_TRUE='#' STARPU_USE_CUDA1_FALSE= fi ############################################################################### # # # HIP settings # # # ############################################################################### { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of HIP devices" >&5 printf %s "checking maximum number of HIP devices... " >&6; } # Check whether --enable-maxhipdev was given. if test ${enable_maxhipdev+y} then : enableval=$enable_maxhipdev; nmaxhipdev=$enableval else $as_nop nmaxhipdev=8 fi if test x$nmaxhipdev = x -o x$nmaxhipdev = xyes then as_fn_error $? "The --enable-maxhipdev option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxhipdev" >&5 printf "%s\n" "$nmaxhipdev" >&6; } printf "%s\n" "#define STARPU_MAXHIPDEVS $nmaxhipdev" >>confdefs.h # Check whether --enable-hip was given. if test ${enable_hip+y} then : enableval=$enable_hip; else $as_nop enable_hip=maybe fi if test x$enable_cuda = xyes; then # hip_runtime.h conflicts with cuda_runtime.h # see https://github.com/ROCm-Developer-Tools/HIP/issues/2703 if test x$enable_hip = xyes ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Disabling HIP as CUDA is enabled, see https://github.com/ROCm-Developer-Tools/HIP/issues/2703" >&5 printf "%s\n" "$as_me: WARNING: Disabling HIP as CUDA is enabled, see https://github.com/ROCm-Developer-Tools/HIP/issues/2703" >&2;} fi enable_hip=no fi if test x$enable_simgrid = xyes; then if test x$enable_hip = xyes; then as_fn_error $? "HIP not supported with simgrid" "$LINENO" 5 fi enable_hip=no fi have_valid_hip=no if test x$enable_hip != xno; then # Extract the first word of "hipconfig", so it can be a program name with args. set dummy hipconfig; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_HIPCONFIG+y} then : printf %s "(cached) " >&6 else $as_nop case $HIPCONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_HIPCONFIG="$HIPCONFIG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_HIPCONFIG="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_HIPCONFIG" && ac_cv_path_HIPCONFIG="not-found" ;; esac fi HIPCONFIG=$ac_cv_path_HIPCONFIG if test -n "$HIPCONFIG"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $HIPCONFIG" >&5 printf "%s\n" "$HIPCONFIG" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$HIPCONFIG" = "xnot-found"; then if test x$enable_hip = xyes; then as_fn_error $? "'hipconfig' not found for HIP support" "$LINENO" 5 fi have_valid_hip=no else HIP_PLATFORM="$(hipconfig --platform)" HIP_DIR="$(hipconfig --path)" HIP_LIB_DIR="$HIP_DIR/lib" HIP_INCLUDE_DIR="$HIP_DIR/include" STARPU_HIP_CPPFLAGS="$(hipconfig --cpp_config | tr -d '\n') -L$HIP_LIB_DIR" if test "$HIP_PLATFORM" = "nvidia"; then STARPU_HIP_CPPFLAGS="$STARPU_HIP_CPPFLAGS -DSTARPU_HIP_PLATFORM_NVIDIA" fi if test "$HIP_PLATFORM" = "amd"; then STARPU_HIP_CPPFLAGS="$STARPU_HIP_CPPFLAGS -DSTARPU_HIP_PLATFORM_AMD" fi HIP_CLANG_PATH="$(hipconfig --hipclangpath)" have_valid_hip=yes # Check whether --with-hipblas was given. if test ${with_hipblas+y} then : withval=$with_hipblas; custom_hipblas_dir="$withval" fi if test x$custom_hipblas_dir != x; then HIPBLAS_INCLUDE_DIR="$custom_hipblas_dir/include" HIPBLAS_LIB_DIR="$custom_hipblas_dir/lib" STARPU_HIPBLAS_DIRS="-I$HIPBLAS_INCLUDE_DIR -L$HIPBLAS_LIB_DIR" fi HIPCCFLAGS="$HIPCCFLAGS $STARPU_HIP_CPPFLAGS" fi fi if test "$HIP_PLATFORM" = "amd"; then SAVED_LIBS=${LIBS} SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} -L$HIP_LIB_DIR" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing hipMemGetInfo" >&5 printf %s "checking for library containing hipMemGetInfo... " >&6; } if test ${ac_cv_search_hipMemGetInfo+y} then : printf %s "(cached) " >&6 else $as_nop ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char hipMemGetInfo (); int main (void) { return hipMemGetInfo (); ; return 0; } _ACEOF for ac_lib in '' amdhip64 do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO" then : ac_cv_search_hipMemGetInfo=$ac_res fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext if test ${ac_cv_search_hipMemGetInfo+y} then : break fi done if test ${ac_cv_search_hipMemGetInfo+y} then : else $as_nop ac_cv_search_hipMemGetInfo=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_hipMemGetInfo" >&5 printf "%s\n" "$ac_cv_search_hipMemGetInfo" >&6; } ac_res=$ac_cv_search_hipMemGetInfo if test "$ac_res" != no then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" else $as_nop have_valid_hip=no fi LDFLAGS="${SAVED_LDFLAGS}" LIBS=${SAVED_LIBS} fi if test x$have_valid_hip = xyes; then SAVED_CPPFLAGS="$CPPFLAGS" CPPFLAGS="${CPPFLAGS} $STARPU_HIPBLAS_DIRS $STARPU_HIP_CPPFLAGS " for ac_header in hip/hip_runtime.h hip/hip_runtime_api.h do : as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes" then : cat >>confdefs.h <<_ACEOF #define `printf "%s\n" "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF have_valid_hip=yes else $as_nop have_valid_hip=no fi done if test x$custom_hipblas_dir != x; then as_ac_Header=`printf "%s\n" "ac_cv_header_$custom_hipblas_dir/include/hipblas.h" | $as_tr_sh` ac_fn_c_check_header_compile "$LINENO" "$custom_hipblas_dir/include/hipblas.h" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes" then : have_valid_hipblas=yes else $as_nop have_valid_hipblas=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipblas' custom" >&5 printf "%s\n" "$as_me: WARNING: 'hipblas' custom" >&2;} else ac_fn_c_check_header_compile "$LINENO" "hipblas/hipblas.h" "ac_cv_header_hipblas_hipblas_h" "$ac_includes_default" if test "x$ac_cv_header_hipblas_hipblas_h" = xyes then : have_valid_hipblas=yes else $as_nop have_valid_hipblas=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipblas' default" >&5 printf "%s\n" "$as_me: WARNING: 'hipblas' default" >&2;} fi if test x$have_valid_hipblas = xyes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lhipblas" >&5 printf %s "checking for main in -lhipblas... " >&6; } if test ${ac_cv_lib_hipblas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lhipblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_hipblas_main=yes else $as_nop ac_cv_lib_hipblas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hipblas_main" >&5 printf "%s\n" "$ac_cv_lib_hipblas_main" >&6; } if test "x$ac_cv_lib_hipblas_main" = xyes then : have_valid_hipblas=yes else $as_nop have_valid_hipblas=no fi ac_cv_lib_hipblas=ac_cv_lib_hipblas_main fi if test "$HIP_PLATFORM" = "amd"; then if test x$have_valid_hipblas = xyes; then for ac_header in rocblas/rocblas.h do : ac_fn_c_check_header_compile "$LINENO" "rocblas/rocblas.h" "ac_cv_header_rocblas_rocblas_h" "$ac_includes_default" if test "x$ac_cv_header_rocblas_rocblas_h" = xyes then : printf "%s\n" "#define HAVE_ROCBLAS_ROCBLAS_H 1" >>confdefs.h have_valid_hipblas=yes else $as_nop have_valid_hipblas=no fi done fi if test x$have_valid_hipblas = xyes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lrocblas" >&5 printf %s "checking for main in -lrocblas... " >&6; } if test ${ac_cv_lib_rocblas_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lrocblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_rocblas_main=yes else $as_nop ac_cv_lib_rocblas_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rocblas_main" >&5 printf "%s\n" "$ac_cv_lib_rocblas_main" >&6; } if test "x$ac_cv_lib_rocblas_main" = xyes then : have_valid_hipblas=yes else $as_nop have_valid_hipblas=no fi ac_cv_lib_rocblas=ac_cv_lib_rocblas_main fi fi if test x$have_valid_hipblas = xyes; then printf "%s\n" "#define STARPU_USE_HIPBLAS 1" >>confdefs.h if test x$custom_hipblas_dir != x; then HIPCCFLAGS="$HIPCCFLAGS -I$HIPBLAS_INCLUDE_DIR" STARPU_HIPBLAS_LDFLAGS="-L$HIPBLAS_LIB_DIR" fi STARPU_HIPBLAS_LDFLAGS="$STARPU_HIPBLAS_LDFLAGS -lhipblas" if test "$HIP_PLATFORM" = "amd"; then STARPU_HIPBLAS_LDFLAGS="$STARPU_HIPBLAS_LDFLAGS -lrocblas" fi else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipblas' not found, disabling HIP examples" >&5 printf "%s\n" "$as_me: WARNING: 'hipblas' not found, disabling HIP examples" >&2;} fi CPPFLAGS="${SAVED_CPPFLAGS}" fi STARPU_USE_HIPBLAS=$have_valid_hipblas if test x$have_valid_hipblas = xyes; then STARPU_USE_HIPBLAS_TRUE= STARPU_USE_HIPBLAS_FALSE='#' else STARPU_USE_HIPBLAS_TRUE='#' STARPU_USE_HIPBLAS_FALSE= fi if test x$have_valid_hip = xyes; then if test -z "$HIP_DIR"; then have_valid_hip=no fi if test -z "$HIP_LIB_DIR"; then have_valid_hip=no fi if test -z "$HIP_INCLUDE_DIR"; then have_valid_hip=no fi if test "$HIPCC" = ""; then # Extract the first word of "hipcc", so it can be a program name with args. set dummy hipcc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_HIPCC+y} then : printf %s "(cached) " >&6 else $as_nop case $HIPCC in [\\/]* | ?:[\\/]*) ac_cv_path_HIPCC="$HIPCC" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$HIP_CLANG_PATH:$PATH:/usr/bin:/bin" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_HIPCC="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_HIPCC" && ac_cv_path_HIPCC="not-found" ;; esac fi HIPCC=$ac_cv_path_HIPCC if test -n "$HIPCC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $HIPCC" >&5 printf "%s\n" "$HIPCC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi #testing if hipcc is defined, if not => STARPU_USE_HIP undefined if test "x$HIPCC" = "xnot-found"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipcc' not found, disabling HIP" >&5 printf "%s\n" "$as_me: WARNING: 'hipcc' not found, disabling HIP" >&2;} have_valid_hip=no fi if test "$HIP_PLATFORM" = "nvidia"; then HIPCCFLAGS="$HIPCCFLAGS --x cu" fi if test "x$have_valid_hip" = xyes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $HIPCC is working" >&5 printf %s "checking whether $HIPCC is working... " >&6; } rm -f conftest.hip conftest.o touch conftest.hip if $HIPCC $HIPCCFLAGS conftest.hip -o conftest.o -c $STARPU_HIP_CPPFLAGS then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'hipcc' does not work, disabling HIP" >&5 printf "%s\n" "$as_me: WARNING: 'hipcc' does not work, disabling HIP" >&2;} have_valid_hip=no fi fi fi # in case HIP was explicitly required, but is not available, this is an error if test x$enable_hip = xyes -a x$have_valid_hip = xno; then as_fn_error $? "cannot find HIP" "$LINENO" 5 fi # now we enable HIP if and only if a proper setup is available enable_hip=$have_valid_hip if test "x$enable_hip" = xyes; then printf "%s\n" "#define STARPU_USE_HIP 1" >>confdefs.h if test "$HIP_PLATFORM" = "nvidia"; then STARPU_HIP_LDFLAGS="-lcuda -lcudart -lcublas $STARPU_HIPBLAS_LDFLAGS -lstdc++" fi if test "$HIP_PLATFORM" = "amd"; then STARPU_HIP_LDFLAGS="-L$HIP_LIB_DIR -lamdhip64 $STARPU_HIPBLAS_LDFLAGS -lstdc++" fi # Check whether --enable-hip_memcpy_peer was given. if test ${enable_hip_memcpy_peer+y} then : enableval=$enable_hip_memcpy_peer; else $as_nop enable_hip_memcpy_peer=$enable_hip fi if test x$enable_hip_memcpy_peer = xyes; then printf "%s\n" "#define STARPU_HAVE_HIP_MEMCPY_PEER 1" >>confdefs.h fi else STARPU_HIP_LDFLAGS= STARPU_HIP_CPPFLAGS= enable_hip_memcpy_peer=no fi if test x$enable_hip = xyes; then STARPU_USE_HIP_TRUE= STARPU_USE_HIP_FALSE='#' else STARPU_USE_HIP_TRUE='#' STARPU_USE_HIP_FALSE= fi #AC_ARG_VAR([HIPCC_CC], [C compiler for HIP compiler]) ############################################################################### # # # OpenCL settings # # # ############################################################################### { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of OpenCL devices" >&5 printf %s "checking maximum number of OpenCL devices... " >&6; } # Check whether --enable-maxopencldev was given. if test ${enable_maxopencldev+y} then : enableval=$enable_maxopencldev; nmaxopencldev=$enableval else $as_nop nmaxopencldev=8 fi if test x$nmaxopencldev = x -o x$nmaxopencldev = xyes then as_fn_error $? "The --enable-maxopencldev option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxopencldev" >&5 printf "%s\n" "$nmaxopencldev" >&6; } printf "%s\n" "#define STARPU_MAXOPENCLDEVS $nmaxopencldev" >>confdefs.h # Check whether --enable-opencl was given. if test ${enable_opencl+y} then : enableval=$enable_opencl; else $as_nop enable_opencl=maybe fi have_valid_opencl=no #AC_MSG_CHECKING(whether OpenCL is available) # Check whether --with-opencl-dir was given. if test ${with_opencl_dir+y} then : withval=$with_opencl_dir; opencl_dir="$withval" # in case this was not explicit yet enable_opencl=yes else $as_nop opencl_dir=no fi # Check whether --with-opencl-include-dir was given. if test ${with_opencl_include_dir+y} then : withval=$with_opencl_include_dir; opencl_include_dir="$withval" # in case this was not explicit yet enable_opencl=yes else $as_nop opencl_include_dir=no fi # Check whether --with-opencl-lib-dir was given. if test ${with_opencl_lib_dir+y} then : withval=$with_opencl_lib_dir; opencl_lib_dir="$withval" # in case this was not explicit yet enable_opencl=yes else $as_nop opencl_lib_dir=no fi if test x$enable_opencl = xyes -o x$enable_opencl = xmaybe; then case $target in *-*-darwin*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available" >&5 printf %s "checking whether OpenCL is available... " >&6; } SAVED_LIBS=$LIBS LIBS="$LIBS -framework OpenCL" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __APPLE_CC__ #include #else #include #endif int main (void) { return clSetKernelArg(0, 0, 0, 0); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } enable_opencl=yes have_valid_opencl=yes STARPU_OPENCL_CPPFLAGS= STARPU_OPENCL_LDFLAGS="-framework OpenCL" else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } enable_opencl=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$SAVED_LIBS ;; *) if test "x$has_opencl_being_checked" != "xyes" ; then __opencl_dir="$opencl_dir" __opencl_include_dir="$opencl_include_dir" __opencl_lib_dir="$opencl_lib_dir" if test "$__opencl_dir" != "no" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir" >&5 printf %s "checking whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir... " >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available" >&5 printf %s "checking whether OpenCL is available... " >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } if test "$__opencl_include_dir" = "no" -a "$__opencl_dir" != "no" ; then __opencl_include_dir="$__opencl_dir/include" fi SAVED_CPPFLAGS="$CPPFLAGS" SAVED_LDFLAGS="${LDFLAGS}" if test "$__opencl_include_dir" != "no" ; then CPPFLAGS="${CPPFLAGS} -I$__opencl_include_dir" fi ac_fn_c_check_header_compile "$LINENO" "CL/cl.h" "ac_cv_header_CL_cl_h" "$ac_includes_default" if test "x$ac_cv_header_CL_cl_h" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi unset ac_cv_header_CL_cl_h if test "$have_valid_opencl" = "yes" ; then if test "$__opencl_lib_dir" != "no"; then LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 printf %s "checking for main in -lOpenCL... " >&6; } if test ${ac_cv_lib_OpenCL_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lOpenCL $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_OpenCL_main=yes else $as_nop ac_cv_lib_OpenCL_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } if test "x$ac_cv_lib_OpenCL_main" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main unset ac_cv_lib_OpenCL_main else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir" >&5 printf %s "checking whether OpenCL is available in $__opencl_dir... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 printf %s "checking for main in -lOpenCL... " >&6; } if test ${ac_cv_lib_OpenCL_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lOpenCL $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_OpenCL_main=yes else $as_nop ac_cv_lib_OpenCL_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } if test "x$ac_cv_lib_OpenCL_main" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main unset ac_cv_lib_OpenCL_main if test "$have_valid_opencl" = "no" -a "$__opencl_dir" != "no" ; then for __cuda_libdir in lib64 lib lib/x86 lib/Win32 ; do __opencl_lib_dir="$__opencl_dir/$__cuda_libdir" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir" >&5 printf %s "checking whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 printf %s "checking for main in -lOpenCL... " >&6; } if test ${ac_cv_lib_OpenCL_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lOpenCL $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_OpenCL_main=yes else $as_nop ac_cv_lib_OpenCL_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } if test "x$ac_cv_lib_OpenCL_main" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main unset ac_cv_lib_OpenCL_main if test "$have_valid_opencl" = yes ; then break fi done else LDFLAGS="${SAVED_LDFLAGS}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 printf %s "checking for main in -lOpenCL... " >&6; } if test ${ac_cv_lib_OpenCL_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lOpenCL $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_OpenCL_main=yes else $as_nop ac_cv_lib_OpenCL_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } if test "x$ac_cv_lib_OpenCL_main" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main unset ac_cv_lib_OpenCL_main fi fi fi if test "$have_valid_opencl" = "yes" -a "$__opencl_include_dir" != "no"; then STARPU_OPENCL_CPPFLAGS="-I$__opencl_include_dir" ac_fn_c_check_header_compile "$LINENO" "CL/cl_ext.h" "ac_cv_header_CL_cl_ext_h" "$ac_includes_default" if test "x$ac_cv_header_CL_cl_ext_h" = xyes then : printf "%s\n" "#define HAVE_CL_CL_EXT_H 1" >>confdefs.h fi fi CPPFLAGS="${SAVED_CPPFLAGS}" LDFLAGS="${SAVED_LDFLAGS}" if test "$have_valid_opencl" = "yes" ; then if test "$__opencl_lib_dir" != "no"; then STARPU_OPENCL_LDFLAGS="-L$__opencl_lib_dir" fi STARPU_OPENCL_LDFLAGS="${STARPU_OPENCL_LDFLAGS} -lOpenCL" fi if test "$have_valid_opencl" = "no" ; then for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_ROOT" "$CUDA_PATH" "$CUDA_INC_PATH/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do if test -n "$f" ; then __opencl_dir="$f" __opencl_include_dir="no" __opencl_lib_dir="no" if test "$__opencl_dir" != "no" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir" >&5 printf %s "checking whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir... " >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available" >&5 printf %s "checking whether OpenCL is available... " >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } if test "$__opencl_include_dir" = "no" -a "$__opencl_dir" != "no" ; then __opencl_include_dir="$__opencl_dir/include" fi SAVED_CPPFLAGS="$CPPFLAGS" SAVED_LDFLAGS="${LDFLAGS}" if test "$__opencl_include_dir" != "no" ; then CPPFLAGS="${CPPFLAGS} -I$__opencl_include_dir" fi ac_fn_c_check_header_compile "$LINENO" "CL/cl.h" "ac_cv_header_CL_cl_h" "$ac_includes_default" if test "x$ac_cv_header_CL_cl_h" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi unset ac_cv_header_CL_cl_h if test "$have_valid_opencl" = "yes" ; then if test "$__opencl_lib_dir" != "no"; then LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 printf %s "checking for main in -lOpenCL... " >&6; } if test ${ac_cv_lib_OpenCL_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lOpenCL $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_OpenCL_main=yes else $as_nop ac_cv_lib_OpenCL_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } if test "x$ac_cv_lib_OpenCL_main" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main unset ac_cv_lib_OpenCL_main else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir" >&5 printf %s "checking whether OpenCL is available in $__opencl_dir... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 printf %s "checking for main in -lOpenCL... " >&6; } if test ${ac_cv_lib_OpenCL_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lOpenCL $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_OpenCL_main=yes else $as_nop ac_cv_lib_OpenCL_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } if test "x$ac_cv_lib_OpenCL_main" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main unset ac_cv_lib_OpenCL_main if test "$have_valid_opencl" = "no" -a "$__opencl_dir" != "no" ; then for __cuda_libdir in lib64 lib lib/x86 lib/Win32 ; do __opencl_lib_dir="$__opencl_dir/$__cuda_libdir" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir" >&5 printf %s "checking whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 printf "%s\n" "" >&6; } LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 printf %s "checking for main in -lOpenCL... " >&6; } if test ${ac_cv_lib_OpenCL_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lOpenCL $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_OpenCL_main=yes else $as_nop ac_cv_lib_OpenCL_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } if test "x$ac_cv_lib_OpenCL_main" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main unset ac_cv_lib_OpenCL_main if test "$have_valid_opencl" = yes ; then break fi done else LDFLAGS="${SAVED_LDFLAGS}" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lOpenCL" >&5 printf %s "checking for main in -lOpenCL... " >&6; } if test ${ac_cv_lib_OpenCL_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lOpenCL $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_OpenCL_main=yes else $as_nop ac_cv_lib_OpenCL_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_OpenCL_main" >&5 printf "%s\n" "$ac_cv_lib_OpenCL_main" >&6; } if test "x$ac_cv_lib_OpenCL_main" = xyes then : have_valid_opencl=yes else $as_nop have_valid_opencl=no fi ac_cv_lib_OpenCL=ac_cv_lib_OpenCL_main unset ac_cv_lib_OpenCL_main fi fi fi if test "$have_valid_opencl" = "yes" -a "$__opencl_include_dir" != "no"; then STARPU_OPENCL_CPPFLAGS="-I$__opencl_include_dir" ac_fn_c_check_header_compile "$LINENO" "CL/cl_ext.h" "ac_cv_header_CL_cl_ext_h" "$ac_includes_default" if test "x$ac_cv_header_CL_cl_ext_h" = xyes then : printf "%s\n" "#define HAVE_CL_CL_EXT_H 1" >>confdefs.h fi fi CPPFLAGS="${SAVED_CPPFLAGS}" LDFLAGS="${SAVED_LDFLAGS}" if test "$have_valid_opencl" = "yes" ; then if test "$__opencl_lib_dir" != "no"; then STARPU_OPENCL_LDFLAGS="-L$__opencl_lib_dir" fi STARPU_OPENCL_LDFLAGS="${STARPU_OPENCL_LDFLAGS} -lOpenCL" fi if test "$have_valid_opencl" = "yes" ; then break fi fi done fi has_opencl_being_checked=yes fi # in case OpenCL was explicitly required, but is not available, this is an error if test x$enable_opencl = xyes -a x$have_valid_opencl = xno; then as_fn_error $? "cannot find OpenCL" "$LINENO" 5 fi # now we enable OpenCL if and only if a proper setup is available enable_opencl=$have_valid_opencl ;; esac save_LIBS="$LIBS" LIBS="$LIBS $STARPU_OPENCL_LDFLAGS" ac_fn_c_check_func "$LINENO" "clEnqueueMarkerWithWaitList" "ac_cv_func_clEnqueueMarkerWithWaitList" if test "x$ac_cv_func_clEnqueueMarkerWithWaitList" = xyes then : printf "%s\n" "#define HAVE_CLENQUEUEMARKERWITHWAITLIST 1" >>confdefs.h fi LIBS="$save_LIBS" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenCL should be used" >&5 printf %s "checking whether OpenCL should be used... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_opencl" >&5 printf "%s\n" "$enable_opencl" >&6; } STARPU_USE_OPENCL=$enable_opencl if test x$enable_opencl = xyes; then STARPU_USE_OPENCL_TRUE= STARPU_USE_OPENCL_FALSE='#' else STARPU_USE_OPENCL_TRUE='#' STARPU_USE_OPENCL_FALSE= fi if test x$enable_opencl = xyes ; then printf "%s\n" "#define STARPU_USE_OPENCL 1" >>confdefs.h STARPU_OPENCL_CPPFLAGS="${STARPU_OPENCL_CPPFLAGS} -DSTARPU_OPENCL_DATADIR=\"\\\"${datarootdir}/starpu/opencl\\\"\" -DCL_USE_DEPRECATED_OPENCL_1_1_APIS" STARPU_OPENCL_DATAdir="$(eval echo ${datarootdir}/starpu/opencl/examples)" fi ############################################################################### # # # Maxeler FPGA Settings # # # ############################################################################### #NUMBER OF MAXELER FPGA DEVICES { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of Maxeler FPGA devices" >&5 printf %s "checking maximum number of Maxeler FPGA devices... " >&6; } # Check whether --enable-maxmaxfpgadev was given. if test ${enable_maxmaxfpgadev+y} then : enableval=$enable_maxmaxfpgadev; nmaxmaxfpgadev=$enableval else $as_nop nmaxmaxfpgadev=12 fi if test x$nmaxmaxfpgadev = x -o x$nmaxmaxfpgadev = xyes then as_fn_error $? "The --enable-maxmaxfpgadev option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxmaxfpgadev" >&5 printf "%s\n" "$nmaxmaxfpgadev" >&6; } printf "%s\n" "#define STARPU_MAXMAXFPGADEVS $nmaxmaxfpgadev" >>confdefs.h # Check whether --enable-max-fpga was given. if test ${enable_max_fpga+y} then : enableval=$enable_max_fpga; enable_max_fpga=$enableval else $as_nop enable_max_fpga=maybe fi if test x$enable_simgrid = xyes; then if test x$enable_max_fpga = xyes; then as_fn_error $? "Max fpga not supported with simgrid" "$LINENO" 5 fi enable_max_fpga=no fi if test x$enable_max_fpga != xno; then # Extract the first word of "slic-config", so it can be a program name with args. set dummy slic-config; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_SLIC_CONFIG+y} then : printf %s "(cached) " >&6 else $as_nop case $SLIC_CONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_SLIC_CONFIG="$SLIC_CONFIG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_SLIC_CONFIG="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_SLIC_CONFIG" && ac_cv_path_SLIC_CONFIG="not-found" ;; esac fi SLIC_CONFIG=$ac_cv_path_SLIC_CONFIG if test -n "$SLIC_CONFIG"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $SLIC_CONFIG" >&5 printf "%s\n" "$SLIC_CONFIG" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$SLIC_CONFIG" = "xnot-found"; then # in case FPGA was explicitly required, but is not available, this is an error if test x$enable_max_fpga = xyes; then as_fn_error $? "'slic-config' not found for Maxeler FPGA support" "$LINENO" 5 fi enable_max_fpga=no else STARPU_MAX_FPGA_CPPFLAGS="`slic-config --cflags | sed s/\'//g | sed "s/-I /-I/"`" STARPU_MAX_FPGA_LDFLAGS="`slic-config --libs | sed s/\'//g | sed "s/-L /-L/" | sed "s/-L /-L/"`" enable_max_fpga=yes fi fi STARPU_USE_MAX_FPGA=$enable_max_fpga if test x$enable_max_fpga = xyes; then STARPU_USE_MAX_FPGA_TRUE= STARPU_USE_MAX_FPGA_FALSE='#' else STARPU_USE_MAX_FPGA_TRUE='#' STARPU_USE_MAX_FPGA_FALSE= fi if test x$enable_max_fpga = xyes; then printf "%s\n" "#define STARPU_USE_MAX_FPGA 1" >>confdefs.h fi ############################################################################### # # # General GPU settings # # # ############################################################################### { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous copy should be disabled" >&5 printf %s "checking whether asynchronous copy should be disabled... " >&6; } # Check whether --enable-asynchronous-copy was given. if test ${enable_asynchronous_copy+y} then : enableval=$enable_asynchronous_copy; enable_asynchronous_copy=$enableval else $as_nop enable_asynchronous_copy=yes fi disable_asynchronous_copy=no if test x$enable_asynchronous_copy = xno ; then disable_asynchronous_copy=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_copy" >&5 printf "%s\n" "$disable_asynchronous_copy" >&6; } if test x$disable_asynchronous_copy = xyes ; then printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_COPY 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous CUDA copy should be disabled" >&5 printf %s "checking whether asynchronous CUDA copy should be disabled... " >&6; } # Check whether --enable-asynchronous-cuda-copy was given. if test ${enable_asynchronous_cuda_copy+y} then : enableval=$enable_asynchronous_cuda_copy; enable_asynchronous_cuda_copy=$enableval else $as_nop enable_asynchronous_cuda_copy=yes fi disable_asynchronous_cuda_copy=no if test x$enable_asynchronous_cuda_copy = xno ; then disable_asynchronous_cuda_copy=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_cuda_copy" >&5 printf "%s\n" "$disable_asynchronous_cuda_copy" >&6; } if test x$disable_asynchronous_cuda_copy = xyes ; then printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous OpenCL copy should be disabled" >&5 printf %s "checking whether asynchronous OpenCL copy should be disabled... " >&6; } # Check whether --enable-asynchronous-opencl-copy was given. if test ${enable_asynchronous_opencl_copy+y} then : enableval=$enable_asynchronous_opencl_copy; enable_asynchronous_opencl_copy=$enableval else $as_nop enable_asynchronous_opencl_copy=yes fi disable_asynchronous_opencl_copy=no if test x$enable_asynchronous_opencl_copy = xno ; then disable_asynchronous_opencl_copy=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_opencl_copy" >&5 printf "%s\n" "$disable_asynchronous_opencl_copy" >&6; } if test x$disable_asynchronous_opencl_copy = xyes ; then printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous MPI Master Slave copy should be disabled" >&5 printf %s "checking whether asynchronous MPI Master Slave copy should be disabled... " >&6; } # Check whether --enable-asynchronous-mpi-master-slave-copy was given. if test ${enable_asynchronous_mpi_master_slave_copy+y} then : enableval=$enable_asynchronous_mpi_master_slave_copy; enable_asynchronous_mpi_master_slave_copy=$enableval else $as_nop enable_asynchronous_mpi_master_slave_copy=yes fi disable_asynchronous_mpi_master_slave_copy=no if test x$enable_asynchronous_mpi_master_slave_copy = xno ; then disable_asynchronous_mpi_master_slave_copy=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_mpi_master_slave_copy" >&5 printf "%s\n" "$disable_asynchronous_mpi_master_slave_copy" >&6; } if test x$disable_asynchronous_mpi_master_slave_copy = xyes ; then printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous TCP/IP Master Slave copy should be disabled" >&5 printf %s "checking whether asynchronous TCP/IP Master Slave copy should be disabled... " >&6; } # Check whether --enable-asynchronous-tcpip-master-slave-copy was given. if test ${enable_asynchronous_tcpip_master_slave_copy+y} then : enableval=$enable_asynchronous_tcpip_master_slave_copy; enable_asynchronous_tcpip_master_slave_copy=$enableval else $as_nop enable_asynchronous_tcpip_master_slave_copy=yes fi disable_asynchronous_tcpip_master_slave_copy=no if test x$enable_asynchronous_tcpip_master_slave_copy = xno ; then disable_asynchronous_tcpip_master_slave_copy=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_tcpip_master_slave_copy" >&5 printf "%s\n" "$disable_asynchronous_tcpip_master_slave_copy" >&6; } if test x$disable_asynchronous_tcpip_master_slave_copy = xyes ; then printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether asynchronous Maxeler FPGA copy should be disabled" >&5 printf %s "checking whether asynchronous Maxeler FPGA copy should be disabled... " >&6; } # Check whether --enable-asynchronous-max-fpga-copy was given. if test ${enable_asynchronous_max_fpga_copy+y} then : enableval=$enable_asynchronous_max_fpga_copy; enable_asynchronous_max_fpga_copy=$enableval else $as_nop enable_asynchronous_max_fpga_copy=yes fi disable_asynchronous_max_fpga_copy=no if test x$enable_asynchronous_max_fpga_copy = xno ; then disable_asynchronous_max_fpga_copy=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $disable_asynchronous_max_fpga_copy" >&5 printf "%s\n" "$disable_asynchronous_max_fpga_copy" >&6; } if test x$disable_asynchronous_max_fpga_copy = xyes ; then printf "%s\n" "#define STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY 1" >>confdefs.h fi ############################################################################### # # # Fortran # # # ############################################################################### # Check whether --enable-fortran was given. if test ${enable_fortran+y} then : enableval=$enable_fortran; enable_build_fortran_requested=$enableval else $as_nop enable_build_fortran_requested=yes fi use_mpi_fort=no enable_build_fortran=no if test "x$enable_build_fortran_requested" = "xyes" ; then if test "x$FC" != "x"; then if $FC --version|grep -q 'GNU Fortran'; then ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu OLD_FCFLAGS="$FCFLAGS" FCFLAGS="$FCFLAGS -cpp" cat > conftest.$ac_ext <<_ACEOF program main #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 9) #error GFortran too old, version >= 4.9.x needed, Fortran examples will not be built #endif end _ACEOF if ac_fn_fc_try_compile "$LINENO" then : enable_build_fortran="yes" else $as_nop enable_build_fortran="no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext FCFLAGS="$OLD_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test "$enable_build_fortran" = "no" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: GFortran too old, version >= 4.9.x needed, Fortran examples will not be built" >&5 printf "%s\n" "$as_me: WARNING: GFortran too old, version >= 4.9.x needed, Fortran examples will not be built" >&2;} fi else if $FC -V 2>&1|grep -q 'Intel(R) Fortran'; then enable_build_fortran="yes" ifort_fc_version=`$FC -V 2>&1 |head -1|sed 's/.*Version //;s/ Build.*//'` ifort_maj_version=`echo $ifort_fc_version|cut -d. -f1` if test $ifort_maj_version -lt 16; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Intel Fortran compiler $ifort_fc_version too old, version >= 2016.x needed, Fortran examples will not be built" >&5 printf "%s\n" "$as_me: WARNING: Intel Fortran compiler $ifort_fc_version too old, version >= 2016.x needed, Fortran examples will not be built" >&2;} enable_build_fortran="no" fi else if $FC -qversion 2>&1|grep -q 'IBM XL Fortran'; then xlf_fc_version=`$FC -V 2>&1 |tail -1|sed 's/.*Version: //'` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: IBM Fortran compiler $xlf_fc_version not validated with the native StarPU Fortran API, Fortran examples will not be built" >&5 printf "%s\n" "$as_me: WARNING: IBM Fortran compiler $xlf_fc_version not validated with the native StarPU Fortran API, Fortran examples will not be built" >&2;} enable_build_fortran="no" else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Fortran compiler has not been tested for StarPU native Fortran support" >&5 printf "%s\n" "$as_me: WARNING: Fortran compiler has not been tested for StarPU native Fortran support" >&2;} enable_build_fortran="yes" fi fi fi if $FC -v 2>&1 | grep -q 'Arm C/C++/Fortran Compiler' ; then armflang_version=`$FC -v 2>&1 | head -1 | sed 's/.*version //'` armflang_maj_version=`echo $armflang_version|cut -d. -f1` if test $armflang_maj_version -lt 23 ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: ARM Fortran compiler $armflang_version is not validated with the native StarPU Fortran API, Fortran examples will not be built" >&5 printf "%s\n" "$as_me: WARNING: ARM Fortran compiler $armflang_version is not validated with the native StarPU Fortran API, Fortran examples will not be built" >&2;} enable_build_fortran="no" fi fi if test "x$enable_build_fortran" = "xyes" ; then printf "%s\n" "#define STARPU_HAVE_FC 1" >>confdefs.h if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes -o x$build_mpi_master_slave = xyes ; then #Check MPIFORT if test x$enable_simgrid = xyes ; then DEFAULT_MPIFORT=smpifort else DEFAULT_MPIFORT=mpifort fi # Check whether --with-mpifort was given. if test ${with_mpifort+y} then : withval=$with_mpifort; DEFAULT_MPIFORT=$withval fi case $DEFAULT_MPIFORT in /*) mpifort_path="$DEFAULT_MPIFORT" ;; *) # Extract the first word of "$DEFAULT_MPIFORT", so it can be a program name with args. set dummy $DEFAULT_MPIFORT; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_mpifort_path+y} then : printf %s "(cached) " >&6 else $as_nop case $mpifort_path in [\\/]* | ?:[\\/]*) ac_cv_path_mpifort_path="$mpifort_path" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$simgrid_dir/bin:$PATH" for as_dir in $as_dummy do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_mpifort_path="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_mpifort_path" && ac_cv_path_mpifort_path="no" ;; esac fi mpifort_path=$ac_cv_path_mpifort_path if test -n "$mpifort_path"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpifort_path" >&5 printf "%s\n" "$mpifort_path" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi ;; esac # We test if the MPIFORT compiler exists if test ! -x $mpifort_path; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The mpifort compiler '$mpifort_path' does not have the execute permission" >&5 printf "%s\n" "The mpifort compiler '$mpifort_path' does not have the execute permission" >&6; } mpifort_path=no else OLD_CC=$CC CC=$mpicc_path cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { return MPI_Comm_f2c(0); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : use_mpi_fort=yes else $as_nop use_mpi_fort=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC=$OLD_CC if test "x$use_mpi_fort" = xyes; then printf "%s\n" "#define HAVE_MPI_COMM_F2C 1" >>confdefs.h fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether mpifort is available" >&5 printf %s "checking whether mpifort is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpifort_path" >&5 printf "%s\n" "$mpifort_path" >&6; } MPIFORT=$mpifort_path if test x$mpifort_path != xno ; then MPIPATH=$(dirname $mpifort_path):$PATH else MPIPATH=$PATH fi fi fi fi fi if test "x$enable_build_fortran" = "xyes" ; then if test "x$FC" = "x" ; then enable_build_fortran="no" fi fi #We have MPI C/C++ compiler if test x$build_mpi_master_slave = xyes; then #Check if we can compile fortran cases if test x$use_mpi_fort = xyes ; then F77LD=$mpifort_path FCLD=$mpifort_path F77=$mpifort_path FC=$mpifort_path else enable_build_fortran=no fi fi if test "x$FC" != "x" -a "x$enable_build_fortran" = "xyes"; then STARPU_HAVE_FC_TRUE= STARPU_HAVE_FC_FALSE='#' else STARPU_HAVE_FC_TRUE='#' STARPU_HAVE_FC_FALSE= fi if test "x$F77" != "x" -a "x$enable_build_fortran" = "xyes"; then STARPU_HAVE_F77_TRUE= STARPU_HAVE_F77_FALSE='#' else STARPU_HAVE_F77_TRUE='#' STARPU_HAVE_F77_FALSE= fi if test "x$use_mpi_fort" = "xyes"; then STARPU_HAVE_MPIFORT_TRUE= STARPU_HAVE_MPIFORT_FALSE='#' else STARPU_HAVE_MPIFORT_TRUE='#' STARPU_HAVE_MPIFORT_FALSE= fi ############################################################################### # # # Debug and Performance analysis tools # # # ############################################################################### { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether debug mode should be enabled" >&5 printf %s "checking whether debug mode should be enabled... " >&6; } # Check whether --enable-debug was given. if test ${enable_debug+y} then : enableval=$enable_debug; enable_debug=$enableval else $as_nop enable_debug=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_debug" >&5 printf "%s\n" "$enable_debug" >&6; } # Check whether --enable-spinlock_check was given. if test ${enable_spinlock_check+y} then : enableval=$enable_spinlock_check; enable_spinlock_check=$enableval else $as_nop enable_spinlock_check=no fi # Check whether --enable-fstack-protector-all was given. if test ${enable_fstack_protector_all+y} then : enableval=$enable_fstack_protector_all; enable_fstack_protector_all=$enableval else $as_nop enable_fstack_protector_all=yes fi if test x$enable_debug = xyes; then printf "%s\n" "#define STARPU_DEBUG 1" >>confdefs.h CFLAGS="$CFLAGS -O0" CXXFLAGS="$CXXFLAGS -O0" FFLAGS="$FFLAGS -O0" FCFLAGS="$FCFLAGS -O0" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -fno-optimize-sibling-calls" >&5 printf %s "checking whether C compiler supports -fno-optimize-sibling-calls... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-fno-optimize-sibling-calls" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-optimize-sibling-calls" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -fno-optimize-sibling-calls" >&5 printf %s "checking whether MPI C compiler supports -fno-optimize-sibling-calls... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-optimize-sibling-calls" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -fno-optimize-sibling-calls" >&5 printf %s "checking whether CXX compiler supports -fno-optimize-sibling-calls... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-fno-optimize-sibling-calls" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -fno-optimize-sibling-calls" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -fno-optimize-sibling-calls" >&5 printf %s "checking whether Fortran 77 compiler supports -fno-optimize-sibling-calls... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-fno-optimize-sibling-calls" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -fno-optimize-sibling-calls" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -fno-optimize-sibling-calls" >&5 printf %s "checking whether Fortran compiler supports -fno-optimize-sibling-calls... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-fno-optimize-sibling-calls" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -fno-optimize-sibling-calls" >&5 printf %s "checking whether MPI Fortran compiler supports -fno-optimize-sibling-calls... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -fno-optimize-sibling-calls" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu enable_spinlock_check=yes if test x$GCC = xyes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Og" >&5 printf %s "checking whether C compiler supports -Og... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Og" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Og" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Og" >&5 printf %s "checking whether MPI C compiler supports -Og... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Og" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Og" >&5 printf %s "checking whether CXX compiler supports -Og... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-Og" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Og" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Og" >&5 printf %s "checking whether Fortran 77 compiler supports -Og... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-Og" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Og" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Og" >&5 printf %s "checking whether Fortran compiler supports -Og... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-Og" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Og" >&5 printf %s "checking whether MPI Fortran compiler supports -Og... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Og" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test x$starpu_windows != xyes ; then if test x$enable_fstack_protector_all = xyes ; then CFLAGS="$CFLAGS -fstack-protector-all" CXXFLAGS="$CXXFLAGS -fstack-protector-all" FFLAGS="$FFLAGS -fstack-protector-all" FCFLAGS="$FCFLAGS -fstack-protector-all" fi fi fi else CFLAGS="-O3 $CFLAGS" CXXFLAGS="-O3 $CXXFLAGS" FFLAGS="-O3 $FFLAGS" FCFLAGS="-O3 $FCFLAGS" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether gdb information should be enabled" >&5 printf %s "checking whether gdb information should be enabled... " >&6; } # Check whether --enable-gdb was given. if test ${enable_gdb+y} then : enableval=$enable_gdb; enable_gdb=$enableval else $as_nop enable_gdb=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_gdb" >&5 printf "%s\n" "$enable_gdb" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether full gdb information should be enabled" >&5 printf %s "checking whether full gdb information should be enabled... " >&6; } # Check whether --enable-full-gdb-information was given. if test ${enable_full_gdb_information+y} then : enableval=$enable_full_gdb_information; enable_full_gdb_information=$enableval else $as_nop enable_full_gdb_information=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_full_gdb_information" >&5 printf "%s\n" "$enable_full_gdb_information" >&6; } if test x$enable_gdb = xyes; then if test x$enable_full_gdb_information = xyes -a x$GCC = xyes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -gdwarf-2" >&5 printf %s "checking whether C compiler supports -gdwarf-2... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-gdwarf-2" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -gdwarf-2" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -gdwarf-2" >&5 printf %s "checking whether MPI C compiler supports -gdwarf-2... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -gdwarf-2" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -gdwarf-2" >&5 printf %s "checking whether CXX compiler supports -gdwarf-2... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-gdwarf-2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -gdwarf-2" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -gdwarf-2" >&5 printf %s "checking whether Fortran 77 compiler supports -gdwarf-2... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-gdwarf-2" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -gdwarf-2" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -gdwarf-2" >&5 printf %s "checking whether Fortran compiler supports -gdwarf-2... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-gdwarf-2" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -gdwarf-2" >&5 printf %s "checking whether MPI Fortran compiler supports -gdwarf-2... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -gdwarf-2" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -g3" >&5 printf %s "checking whether C compiler supports -g3... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-g3" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -g3" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -g3" >&5 printf %s "checking whether MPI C compiler supports -g3... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -g3" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -g3" >&5 printf %s "checking whether CXX compiler supports -g3... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-g3" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -g3" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -g3" >&5 printf %s "checking whether Fortran 77 compiler supports -g3... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-g3" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -g3" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -g3" >&5 printf %s "checking whether Fortran compiler supports -g3... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-g3" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -g3" >&5 printf %s "checking whether MPI Fortran compiler supports -g3... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -g3" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu NVCCFLAGS="$NVCCFLAGS -g" HIPCCFLAGS="$HIPCCFLAGS -g" else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -g" >&5 printf %s "checking whether C compiler supports -g... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-g" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -g" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -g" >&5 printf %s "checking whether MPI C compiler supports -g... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -g" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -g" >&5 printf %s "checking whether CXX compiler supports -g... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -g" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -g" >&5 printf %s "checking whether Fortran 77 compiler supports -g... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-g" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -g" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -g" >&5 printf %s "checking whether Fortran compiler supports -g... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-g" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -g" >&5 printf %s "checking whether MPI Fortran compiler supports -g... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -g" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu NVCCFLAGS="$NVCCFLAGS -g" HIPCCFLAGS="$HIPCCFLAGS -g" fi else CFLAGS="$CFLAGS -g0" CXXFLAGS="$CXXFLAGS -g0" FFLAGS="$FFLAGS -g0" FCFLAGS="$FCFLAGS -g0" LDFLAGS="$LDFLAGS -g0" fi if test x$enable_spinlock_check = xyes; then printf "%s\n" "#define STARPU_SPINLOCK_CHECK 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether extra checks should be performed" >&5 printf %s "checking whether extra checks should be performed... " >&6; } # Check whether --enable-fast was given. if test ${enable_fast+y} then : enableval=$enable_fast; enable_fast=$enableval else $as_nop enable_fast=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_fast" >&5 printf "%s\n" "$enable_fast" >&6; } if test x$enable_fast = xyes; then printf "%s\n" "#define STARPU_NO_ASSERT 1" >>confdefs.h else # fortify gets really enabled only with optimizations, avoid enabling it # when optimizations are not enabled, because with some glibc it # spews a lot of warnings. if test x$enable_debug != xyes; then if test x$GCC = xyes; then CPPFLAGS="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=1 $CPPFLAGS" fi fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether debug messages should be displayed" >&5 printf %s "checking whether debug messages should be displayed... " >&6; } # Check whether --enable-verbose was given. if test ${enable_verbose+y} then : enableval=$enable_verbose; enable_verbose=$enableval else $as_nop enable_verbose=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_verbose" >&5 printf "%s\n" "$enable_verbose" >&6; } if test x$enable_verbose = xyes; then printf "%s\n" "#define STARPU_VERBOSE 1" >>confdefs.h fi if test x$enable_verbose = xextra; then printf "%s\n" "#define STARPU_VERBOSE 1" >>confdefs.h printf "%s\n" "#define STARPU_EXTRA_VERBOSE 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether coverage testing should be enabled" >&5 printf %s "checking whether coverage testing should be enabled... " >&6; } # Check whether --enable-coverage was given. if test ${enable_coverage+y} then : enableval=$enable_coverage; enable_coverage=$enableval else $as_nop enable_coverage=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_coverage" >&5 printf "%s\n" "$enable_coverage" >&6; } COVERAGE=$enable_coverage if test "x$enable_coverage" = "xyes"; then STARPU_COVERAGE_ENABLED_TRUE= STARPU_COVERAGE_ENABLED_FALSE='#' else STARPU_COVERAGE_ENABLED_TRUE='#' STARPU_COVERAGE_ENABLED_FALSE= fi if test x$enable_coverage = xyes; then CFLAGS="${CFLAGS} --coverage" CXXFLAGS="${CXXFLAGS} --coverage" FFLAGS="${FFLAGS} --coverage" FCFLAGS="${FCFLAGS} --coverage" LDFLAGS="${LDFLAGS} --coverage" LIBS="${LIBS} -lgcov" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether coverity mode should be enabled" >&5 printf %s "checking whether coverity mode should be enabled... " >&6; } # Check whether --enable-coverity was given. if test ${enable_coverity+y} then : enableval=$enable_coverity; enable_coverity=$enableval else $as_nop enable_coverity=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_coverity" >&5 printf "%s\n" "$enable_coverity" >&6; } if test x$enable_coverity = xyes; then STARPU_COVERITY_TRUE= STARPU_COVERITY_FALSE='#' else STARPU_COVERITY_TRUE='#' STARPU_COVERITY_FALSE= fi if test x$enable_coverity = xyes ; then printf "%s\n" "#define STARPU_COVERITY 1" >>confdefs.h fi # We would need a PIC-compiled libfxt.a for this to work ; that's usually not available. if test x$enable_mpi = xyes -a x$enable_simgrid = xyes -o x$enable_shared = xno -a x$enable_starpupy = xyes ; then default_enable_fxt=no else default_enable_fxt=maybe fi # shall we use FxT to generate trace of the execution ? # Check whether --enable-fxt was given. if test ${enable_fxt+y} then : enableval=$enable_fxt; else $as_nop enable_fxt=$default_enable_fxt fi # Check whether --with-fxt was given. if test ${with_fxt+y} then : withval=$with_fxt; if test x$withval = xno ; then enable_fxt=no else fxt_dir="$withval" use_fxt_from_system=no # in case this was not explicit yet enable_fxt=yes FXTDIR=$fxt_dir fi else $as_nop use_fxt_from_system=yes fxt_dir="" fi if test x$enable_fxt != xno; then if test x$use_fxt_from_system = xno; then save_PKG_CONFIG_PATH="$PKG_CONFIG_PATH" PKG_CONFIG_PATH="$fxt_dir/lib/pkgconfig:$PKG_CONFIG_PATH" pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FXT" >&5 printf %s "checking for FXT... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$FXT_CFLAGS"; then pkg_cv_FXT_CFLAGS="$FXT_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fxt\""; } >&5 ($PKG_CONFIG --exists --print-errors "fxt") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FXT_CFLAGS=`$PKG_CONFIG --cflags "fxt" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$FXT_LIBS"; then pkg_cv_FXT_LIBS="$FXT_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fxt\""; } >&5 ($PKG_CONFIG --exists --print-errors "fxt") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FXT_LIBS=`$PKG_CONFIG --libs "fxt" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then FXT_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fxt"` else FXT_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fxt"` fi # Put the nasty error message in config.log where it belongs echo "$FXT_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } have_valid_fxt=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Old FxT without fxt.pc file, hoping link will succeed" >&5 printf "%s\n" "$as_me: WARNING: Old FxT without fxt.pc file, hoping link will succeed" >&2;} FXT_CFLAGS="-I$fxt_dir/include/ " FXT_LDFLAGS="-L$fxt_dir/lib/" FXT_LIBS="-lfxt" elif test $pkg_failed = untried; then have_valid_fxt=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Old FxT without fxt.pc file, hoping link will succeed" >&5 printf "%s\n" "$as_me: WARNING: Old FxT without fxt.pc file, hoping link will succeed" >&2;} FXT_CFLAGS="-I$fxt_dir/include/ " FXT_LDFLAGS="-L$fxt_dir/lib/" FXT_LIBS="-lfxt" else FXT_CFLAGS=$pkg_cv_FXT_CFLAGS FXT_LIBS=$pkg_cv_FXT_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } have_valid_fxt=yes fi PKG_CONFIG_PATH="$save_PKG_CONFIG_PATH" else pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FXT" >&5 printf %s "checking for FXT... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$FXT_CFLAGS"; then pkg_cv_FXT_CFLAGS="$FXT_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fxt\""; } >&5 ($PKG_CONFIG --exists --print-errors "fxt") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FXT_CFLAGS=`$PKG_CONFIG --cflags "fxt" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$FXT_LIBS"; then pkg_cv_FXT_LIBS="$FXT_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fxt\""; } >&5 ($PKG_CONFIG --exists --print-errors "fxt") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FXT_LIBS=`$PKG_CONFIG --libs "fxt" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then FXT_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fxt"` else FXT_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fxt"` fi # Put the nasty error message in config.log where it belongs echo "$FXT_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } have_valid_fxt=no elif test $pkg_failed = untried; then have_valid_fxt=no else FXT_CFLAGS=$pkg_cv_FXT_CFLAGS FXT_LIBS=$pkg_cv_FXT_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } have_valid_fxt=yes fi fi if test x$have_valid_fxt = xyes ; then enable_fxt=yes save_LIBS="$LIBS" LIBS="$LIBS $FXT_LIBS" save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $FXT_LDFLAGS" ac_fn_c_check_func "$LINENO" "fxt_close" "ac_cv_func_fxt_close" if test "x$ac_cv_func_fxt_close" = xyes then : printf "%s\n" "#define HAVE_FXT_CLOSE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "fxt_blockev_leave" "ac_cv_func_fxt_blockev_leave" if test "x$ac_cv_func_fxt_blockev_leave" = xyes then : printf "%s\n" "#define HAVE_FXT_BLOCKEV_LEAVE 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "enable_fut_flush" "ac_cv_func_enable_fut_flush" if test "x$ac_cv_func_enable_fut_flush" = xyes then : printf "%s\n" "#define HAVE_ENABLE_FUT_FLUSH 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "fut_set_filename" "ac_cv_func_fut_set_filename" if test "x$ac_cv_func_fut_set_filename" = xyes then : printf "%s\n" "#define HAVE_FUT_SET_FILENAME 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "fut_setup_flush_callback" "ac_cv_func_fut_setup_flush_callback" if test "x$ac_cv_func_fut_setup_flush_callback" = xyes then : printf "%s\n" "#define HAVE_FUT_SETUP_FLUSH_CALLBACK 1" >>confdefs.h fi LDFLAGS="$save_LDFLAGS" LIBS="$save_LIBS" save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $FXT_CFLAGS" ac_fn_check_decl "$LINENO" "enable_fut_flush" "ac_cv_have_decl_enable_fut_flush" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_enable_fut_flush" = xyes then : ac_have_decl=1 else $as_nop ac_have_decl=0 fi printf "%s\n" "#define HAVE_DECL_ENABLE_FUT_FLUSH $ac_have_decl" >>confdefs.h ac_fn_check_decl "$LINENO" "fut_set_filename" "ac_cv_have_decl_fut_set_filename" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_fut_set_filename" = xyes then : ac_have_decl=1 else $as_nop ac_have_decl=0 fi printf "%s\n" "#define HAVE_DECL_FUT_SET_FILENAME $ac_have_decl" >>confdefs.h ac_fn_check_decl "$LINENO" "fut_setup_flush_callback" "ac_cv_have_decl_fut_setup_flush_callback" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_fut_setup_flush_callback" = xyes then : ac_have_decl=1 else $as_nop ac_have_decl=0 fi printf "%s\n" "#define HAVE_DECL_FUT_SETUP_FLUSH_CALLBACK $ac_have_decl" >>confdefs.h CFLAGS="$save_CFLAGS" if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then # simgrid's SMPI needs fxt to be linked in statically for # variable privatization to work FXT_LIBS="$(pkg-config --variable=libdir fxt)/libfxt.a -Wl,--as-needed $(pkg-config --libs --static fxt) -Wl,--no-as-needed" fi ########################################## # Poti is a library to generate paje trace files ########################################## pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for POTI" >&5 printf %s "checking for POTI... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$POTI_CFLAGS"; then pkg_cv_POTI_CFLAGS="$POTI_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"poti\""; } >&5 ($PKG_CONFIG --exists --print-errors "poti") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_POTI_CFLAGS=`$PKG_CONFIG --cflags "poti" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$POTI_LIBS"; then pkg_cv_POTI_LIBS="$POTI_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"poti\""; } >&5 ($PKG_CONFIG --exists --print-errors "poti") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_POTI_LIBS=`$PKG_CONFIG --libs "poti" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then POTI_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "poti"` else POTI_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "poti"` fi # Put the nasty error message in config.log where it belongs echo "$POTI_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } have_valid_poti=no elif test $pkg_failed = untried; then have_valid_poti=no else POTI_CFLAGS=$pkg_cv_POTI_CFLAGS POTI_LIBS=$pkg_cv_POTI_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } have_valid_poti=yes fi # Check whether --enable-poti was given. if test ${enable_poti+y} then : enableval=$enable_poti; enable_poti=$enableval else $as_nop enable_poti=no fi if test x$enable_poti = xyes -a x$have_valid_poti = xyes ; then printf "%s\n" "#define STARPU_HAVE_POTI 1" >>confdefs.h save_LIBS="$LIBS" LIBS="$LIBS $POTI_LIBS" ac_fn_c_check_func "$LINENO" "poti_init_custom" "ac_cv_func_poti_init_custom" if test "x$ac_cv_func_poti_init_custom" = xyes then : printf "%s\n" "#define HAVE_POTI_INIT_CUSTOM 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "poti_user_NewEvent" "ac_cv_func_poti_user_NewEvent" if test "x$ac_cv_func_poti_user_NewEvent" = xyes then : printf "%s\n" "#define HAVE_POTI_USER_NEWEVENT 1" >>confdefs.h fi LIBS="$save_LIBS" FXT_CFLAGS="$FXT_CFLAGS $POTI_CFLAGS" FXT_LIBS="$FXT_LIBS $POTI_LIBS" fi else if test x$enable_fxt = xyes ; then as_fn_error $? "FxT is required but not available" "$LINENO" 5 fi enable_fxt=no fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether FxT traces should be generated" >&5 printf %s "checking whether FxT traces should be generated... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_fxt" >&5 printf "%s\n" "$enable_fxt" >&6; } if test x$enable_fxt = xyes; then printf "%s\n" "#define STARPU_USE_FXT 1" >>confdefs.h printf "%s\n" "#define CONFIG_FUT 1" >>confdefs.h fi STARPU_USE_FXT=$enable_fxt if test x$enable_fxt = xyes; then STARPU_USE_FXT_TRUE= STARPU_USE_FXT_FALSE='#' else STARPU_USE_FXT_TRUE='#' STARPU_USE_FXT_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional locking systems FxT traces should be enabled" >&5 printf %s "checking whether additional locking systems FxT traces should be enabled... " >&6; } # Check whether --enable-fxt-lock was given. if test ${enable_fxt_lock+y} then : enableval=$enable_fxt_lock; enable_fxt_lock=$enableval else $as_nop enable_fxt_lock=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_fxt_lock" >&5 printf "%s\n" "$enable_fxt_lock" >&6; } if test x$enable_fxt_lock = xyes; then printf "%s\n" "#define STARPU_FXT_LOCK_TRACES 1" >>confdefs.h fi # Check whether --enable-papi was given. if test ${enable_papi+y} then : enableval=$enable_papi; enable_papi=$enableval else $as_nop enable_papi=yes fi if test x$enable_papi = xyes; then pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PAPI" >&5 printf %s "checking for PAPI... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$PAPI_CFLAGS"; then pkg_cv_PAPI_CFLAGS="$PAPI_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"papi\""; } >&5 ($PKG_CONFIG --exists --print-errors "papi") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_PAPI_CFLAGS=`$PKG_CONFIG --cflags "papi" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$PAPI_LIBS"; then pkg_cv_PAPI_LIBS="$PAPI_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"papi\""; } >&5 ($PKG_CONFIG --exists --print-errors "papi") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_PAPI_LIBS=`$PKG_CONFIG --libs "papi" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then PAPI_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "papi"` else PAPI_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "papi"` fi # Put the nasty error message in config.log where it belongs echo "$PAPI_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } have_valid_papi=no elif test $pkg_failed = untried; then have_valid_papi=no else PAPI_CFLAGS=$pkg_cv_PAPI_CFLAGS PAPI_LIBS=$pkg_cv_PAPI_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } have_valid_papi=yes fi if test x$have_valid_papi = xyes ; then printf "%s\n" "#define STARPU_PAPI 1" >>confdefs.h STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $PAPI_LIBS" fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether performance debugging should be enabled" >&5 printf %s "checking whether performance debugging should be enabled... " >&6; } # Check whether --enable-perf-debug was given. if test ${enable_perf_debug+y} then : enableval=$enable_perf_debug; enable_perf_debug=$enableval else $as_nop enable_perf_debug=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_perf_debug" >&5 printf "%s\n" "$enable_perf_debug" >&6; } STARPU_PERF_DEBUG=$enable_perf_debug if test x$enable_perf_debug = xyes; then printf "%s\n" "#define STARPU_PERF_DEBUG 1" >>confdefs.h CPPFLAGS="${CPPFLAGS} -pg " LDFLAGS="${LDFLAGS} -pg " fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether performance model debugging should be enabled" >&5 printf %s "checking whether performance model debugging should be enabled... " >&6; } # Check whether --enable-model-debug was given. if test ${enable_model_debug+y} then : enableval=$enable_model_debug; enable_model_debug=$enableval else $as_nop enable_model_debug=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_model_debug" >&5 printf "%s\n" "$enable_model_debug" >&6; } if test x$enable_model_debug = xyes; then printf "%s\n" "#define STARPU_MODEL_DEBUG 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether memory stats should be displayed" >&5 printf %s "checking whether memory stats should be displayed... " >&6; } # Check whether --enable-memory-stats was given. if test ${enable_memory_stats+y} then : enableval=$enable_memory_stats; enable_memory_stats=$enableval else $as_nop enable_memory_stats=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_memory_stats" >&5 printf "%s\n" "$enable_memory_stats" >&6; } if test x$enable_memory_stats = xyes; then printf "%s\n" "#define STARPU_MEMORY_STATS 1" >>confdefs.h fi # Check whether --enable-glpk was given. if test ${enable_glpk+y} then : enableval=$enable_glpk; enable_glpk=$enableval else $as_nop enable_glpk=yes fi if test x$enable_glpk = xyes; then for ac_header in glpk.h do : ac_fn_c_check_header_compile "$LINENO" "glpk.h" "ac_cv_header_glpk_h" "$ac_includes_default" if test "x$ac_cv_header_glpk_h" = xyes then : printf "%s\n" "#define HAVE_GLPK_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_GLPK_H 1" >>confdefs.h fi done _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lglpk" >&5 printf %s "checking for main in -lglpk... " >&6; } if test ${ac_cv_lib_glpk_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lglpk $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_glpk_main=yes else $as_nop ac_cv_lib_glpk_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_glpk_main" >&5 printf "%s\n" "$ac_cv_lib_glpk_main" >&6; } if test "x$ac_cv_lib_glpk_main" = xyes then : printf "%s\n" "#define HAVE_LIBGLPK 1" >>confdefs.h LIBS="-lglpk $LIBS" fi STARPU_GLPK_LDFLAGS="$STARPU_GLPK_LDFLAGS $LIBS" LIBS=$_LIBS_SAV fi # Check whether --with-ayudame1-include-dir was given. if test ${with_ayudame1_include_dir+y} then : withval=$with_ayudame1_include_dir; ayudame1_include_dir="$withval" if test -n "$ayudame1_include_dir"; then CPPFLAGS="-I$ayudame1_include_dir $CPPFLAGS" fi else $as_nop ayudame1_include_dir=no fi # Check whether --with-ayudame2-include-dir was given. if test ${with_ayudame2_include_dir+y} then : withval=$with_ayudame2_include_dir; ayudame2_include_dir="$withval" if test -n "$ayudame2_include_dir"; then CPPFLAGS="-I$ayudame2_include_dir $CPPFLAGS" fi else $as_nop ayudame2_include_dir=no fi # Ayudame 1 header is capitalized ac_fn_c_check_header_compile "$LINENO" "Ayudame.h" "ac_cv_header_Ayudame_h" "$ac_includes_default" if test "x$ac_cv_header_Ayudame_h" = xyes then : printf "%s\n" "#define HAVE_AYUDAME_H 1" >>confdefs.h fi # Check whether --enable-ayudame1 was given. if test ${enable_ayudame1+y} then : enableval=$enable_ayudame1; enable_ayudame1=$enableval else $as_nop enable_ayudame1=yes fi # Ayudame 2 header is lowercase ac_fn_c_check_header_compile "$LINENO" "ayudame.h" "ac_cv_header_ayudame_h" "$ac_includes_default" if test "x$ac_cv_header_ayudame_h" = xyes then : printf "%s\n" "#define HAVE_AYUDAME_H 1" >>confdefs.h fi # Check whether --enable-ayudame2 was given. if test ${enable_ayudame2+y} then : enableval=$enable_ayudame2; enable_ayudame2=$enableval else $as_nop enable_ayudame2=yes fi if test x$enable_ayudame1 = xyes -a x$ac_cv_header_Ayudame_h = xyes; then printf "%s\n" "#define STARPU_USE_AYUDAME1 1" >>confdefs.h ayu_msg="yes, use version 1" else if test x$enable_ayudame2 = xyes -a x$ac_cv_header_ayudame_h = xyes; then printf "%s\n" "#define STARPU_USE_AYUDAME2 1" >>confdefs.h ayu_msg="yes, use version 2" else ayu_msg="no" fi fi if test "x$enable_ayudame1" = "xyes"; then STARPU_USE_AYUDAME1_TRUE= STARPU_USE_AYUDAME1_FALSE='#' else STARPU_USE_AYUDAME1_TRUE='#' STARPU_USE_AYUDAME1_FALSE= fi if test "x$enable_ayudame2" = "xyes"; then STARPU_USE_AYUDAME2_TRUE= STARPU_USE_AYUDAME2_FALSE='#' else STARPU_USE_AYUDAME2_TRUE='#' STARPU_USE_AYUDAME2_FALSE= fi STARPU_FXT_EVENT_DEFINES="`grep -E '#define\s+_STARPU_(MPI_)?FUT_' ${srcdir}/src/common/fxt.h ${srcdir}/mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2`" # Heteroprio works better if it can store information based on the program's name { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the target supports program_invocation_short_name" >&5 printf %s "checking whether the target supports program_invocation_short_name... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include int main() { printf("%s\n", program_invocation_short_name); return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : printf "%s\n" "#define STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext ############################################################################### # # # Miscellaneous options for StarPU # # # ############################################################################### { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether data locality should be enforced" >&5 printf %s "checking whether data locality should be enforced... " >&6; } # Check whether --enable-data-locality-enforce was given. if test ${enable_data_locality_enforce+y} then : enableval=$enable_data_locality_enforce; enable_data_locality_enforce=$enableval else $as_nop enable_data_locality_enforce=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_data_locality_enforce" >&5 printf "%s\n" "$enable_data_locality_enforce" >&6; } if test x$enable_data_locality_enforce = xyes ; then printf "%s\n" "#define STARPU_DATA_LOCALITY_ENFORCE 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how many buffers can be manipulated per task" >&5 printf %s "checking how many buffers can be manipulated per task... " >&6; } # Check whether --enable-maxbuffers was given. if test ${enable_maxbuffers+y} then : enableval=$enable_maxbuffers; nmaxbuffers=$enableval else $as_nop nmaxbuffers=8 fi if test x$nmaxbuffers = x -o x$nmaxbuffers = xyes then as_fn_error $? "The --enable-maxbuffers option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxbuffers" >&5 printf "%s\n" "$nmaxbuffers" >&6; } printf "%s\n" "#define STARPU_NMAXBUFS $nmaxbuffers" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how many MPI nodes fxt files can be manipulated when generating traces" >&5 printf %s "checking how many MPI nodes fxt files can be manipulated when generating traces... " >&6; } # Check whether --enable-fxt-max-files was given. if test ${enable_fxt_max_files+y} then : enableval=$enable_fxt_max_files; nmaxfxtfiles=$enableval else $as_nop nmaxfxtfiles=64 fi if test x$nmaxfxtfiles = x -o x$nmaxfxtfiles = xyes then as_fn_error $? "The --enable-maxfxtfiles option needs to be given a number" "$LINENO" 5 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxfxtfiles" >&5 printf "%s\n" "$nmaxfxtfiles" >&6; } printf "%s\n" "#define STARPU_FXT_MAX_FILES $nmaxfxtfiles" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of memory nodes to use per MPI rank" >&5 printf %s "checking maximum number of memory nodes to use per MPI rank... " >&6; } # Check whether --enable-maxnodes was given. if test ${enable_maxnodes+y} then : enableval=$enable_maxnodes; maxnodes=$enableval else $as_nop maxnodes=0 fi if test x$maxnodes = x0 ; then if test x$enable_simgrid = xyes ; then # We need the room for the virtual CUDA/OpenCL devices nodes=`expr 4 + $nmaxcudadev + $nmaxopencldev + 1 + $nmaxmpidev` else # We have one memory node shared by all CPU workers, one node per GPU # we add nodes to use 2 memory disks nodes=`expr $nmaxnumanodes + 2` if test x$enable_cuda = xyes ; then # we could have used nmaxcudadev + 1, but this would certainly give an # odd number. nodes=`expr $nodes + $nmaxcudadev` fi if test x$enable_hip = xyes ; then # we could have used nmaxhipdev + 1, but this would certainly give an # odd number. nodes=`expr $nodes + $nmaxhipdev` fi if test x$enable_opencl = xyes ; then # we could have used nmaxopencldev + 1, but this would certainly give an # odd number. nodes=`expr $nodes + $nmaxopencldev` fi if test x$enable_max_fpga = xyes ; then # we could have used nmaxmaxfpgadev + 1, but this would certainly give an # odd number. nodes=`expr $nodes + $nmaxmaxfpgadev` fi #nmaxmpidev = 0 if mpi master-slave is disabled nodes=`expr $nodes + $nmaxmpidev` #nmaxtcpipdev = 0 if tcpip master-slave is disabled nodes=`expr $nodes + $nmaxtcpipdev` fi # set maxnodes to the next power of 2 greater than nodes maxnodes=1 while test "$maxnodes" -lt "$nodes" do maxnodes=`expr $maxnodes \* 2` done fi if test x$maxnodes = x -o x$maxnodes = xyes then as_fn_error $? "The --enable-maxnodes option needs to be given a number" "$LINENO" 5 fi if test $maxnodes -gt 32 ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Note: the wt_mask feature only supports 32 memory nodes" >&5 printf "%s\n" "$as_me: WARNING: Note: the wt_mask feature only supports 32 memory nodes" >&2;} fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of memory nodes" >&5 printf %s "checking maximum number of memory nodes... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $maxnodes" >&5 printf "%s\n" "$maxnodes" >&6; } printf "%s\n" "#define STARPU_MAXNODES $maxnodes" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether allocation cache should be used" >&5 printf %s "checking whether allocation cache should be used... " >&6; } # Check whether --enable-allocation-cache was given. if test ${enable_allocation_cache+y} then : enableval=$enable_allocation_cache; enable_allocation_cache=$enableval else $as_nop enable_allocation_cache=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_allocation_cache" >&5 printf "%s\n" "$enable_allocation_cache" >&6; } if test x$enable_allocation_cache = xyes; then printf "%s\n" "#define STARPU_USE_ALLOCATION_CACHE 1" >>confdefs.h fi # Check whether --with-perf-model-dir was given. if test ${with_perf_model_dir+y} then : withval=$with_perf_model_dir; if test x$withval = xno; then as_fn_error $? "--without-perf-model-dir is not a valid option" "$LINENO" 5 fi perf_model_dir="$withval" have_explicit_perf_model_dir=yes printf "%s\n" "#define STARPU_PERF_MODEL_DIR \"$perf_model_dir\"" >>confdefs.h else $as_nop # by default, we put the performance models in # $HOME/.starpu/sampling/ have_explicit_perf_model_dir=no perf_model_dir="\$STARPU_HOME/.starpu/sampling/" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking using explicit performance model location" >&5 printf %s "checking using explicit performance model location... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_explicit_perf_model_dir" >&5 printf "%s\n" "$have_explicit_perf_model_dir" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking performance models location" >&5 printf %s "checking performance models location... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $perf_model_dir" >&5 printf "%s\n" "$perf_model_dir" >&6; } # On many multicore CPUs, clock cycles are not synchronized { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" >&5 printf %s "checking for clock_gettime in -lrt... " >&6; } if test ${ac_cv_lib_rt_clock_gettime+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lrt $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char clock_gettime (); int main (void) { return clock_gettime (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_rt_clock_gettime=yes else $as_nop ac_cv_lib_rt_clock_gettime=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_clock_gettime" >&5 printf "%s\n" "$ac_cv_lib_rt_clock_gettime" >&6; } if test "x$ac_cv_lib_rt_clock_gettime" = xyes then : printf "%s\n" "#define HAVE_LIBRT 1" >>confdefs.h LIBS="-lrt $LIBS" fi ac_fn_c_check_func "$LINENO" "clock_gettime" "ac_cv_func_clock_gettime" if test "x$ac_cv_func_clock_gettime" = xyes then : printf "%s\n" "#define HAVE_CLOCK_GETTIME 1" >>confdefs.h fi # Compute the maximum number of workers (we round it to 16 for alignment # purposes). if test x$enable_simgrid != xyes; then if test x$enable_cpu != xyes; then maxcpus=0 fi if test x$enable_cuda != xyes; then nmaxcudadev=0 fi if test x$enable_max_fpga != xyes; then nmaxmaxfpgadev=0 fi if test x$enable_opencl != xyes; then nmaxopencldev=0 fi #By default, if we cannot build mpi master-slave nmaxmpidev is set to zero. #But with the multiplication with maxcpus, we need to put it to one. if test x$build_mpi_master_slave != xyes; then nmaxmpidev=1 fi #By default, if we cannot build tcp/ip master-slave nmaxtcpipdev is set to zero. #But with the multiplication with maxcpus, we need to put it to one. if test x$build_tcpip_master_slave != xyes; then nmaxtcpipdev=1 fi fi if test $maxcpus = 0 then nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* 64 \) + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` elif test $nmaxmpidev = 0 then nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` else nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* $maxcpus \) + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking Maximum number of workers" >&5 printf %s "checking Maximum number of workers... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxworkers" >&5 printf "%s\n" "$nmaxworkers" >&6; } printf "%s\n" "#define STARPU_NMAXWORKERS $nmaxworkers" >>confdefs.h nmaxdevs=0 if test $nmaxdevs -lt $nmaxcudadev; then nmaxdevs=$nmaxcudadev fi if test $nmaxdevs -lt $nmaxhipdev; then nmaxdevs=$nmaxhipdev fi if test $nmaxdevs -lt $nmaxopencldev; then nmaxdevs=$nmaxopencldev fi if test $nmaxdevs -lt $nmaxmaxfpgadev; then nmaxdevs=$nmaxmaxfpgadev fi if test $nmaxdevs -lt $nmaxmpidev; then nmaxdevs=$nmaxmpidev fi if test $nmaxdevs -lt $nmaxtcpipdev; then nmaxdevs=$nmaxtcpipdev fi printf "%s\n" "#define STARPU_NMAXDEVS $nmaxdevs" >>confdefs.h # Computes the maximum number of combined worker nmaxcombinedworkers=$maxcpus { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking Maximum number of workers combinations" >&5 printf %s "checking Maximum number of workers combinations... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $nmaxcombinedworkers" >&5 printf "%s\n" "$nmaxcombinedworkers" >&6; } printf "%s\n" "#define STARPU_NMAX_COMBINEDWORKERS $nmaxcombinedworkers" >>confdefs.h # Computes the maximum number of implementations per arch { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking maximum number of implementations" >&5 printf %s "checking maximum number of implementations... " >&6; } # Check whether --enable-maximplementations was given. if test ${enable_maximplementations+y} then : enableval=$enable_maximplementations; maximplementations=$enableval else $as_nop maximplementations=4 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $maximplementations" >&5 printf "%s\n" "$maximplementations" >&6; } printf "%s\n" "#define STARPU_MAXIMPLEMENTATIONS $maximplementations" >>confdefs.h if test x$maximplementations = x -o x$maximplementations = xyes then as_fn_error $? "The --enable-maximplementations option needs to be given a number" "$LINENO" 5 fi # Enable LevelDB support if requested and the lib is found # Check whether --enable-leveldb was given. if test ${enable_leveldb+y} then : enableval=$enable_leveldb; enable_leveldb=$enableval else $as_nop enable_leveldb=no fi if test x$enable_leveldb = xyes; then ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu for ac_header in leveldb/db.h do : ac_fn_cxx_check_header_compile "$LINENO" "leveldb/db.h" "ac_cv_header_leveldb_db_h" "$ac_includes_default" if test "x$ac_cv_header_leveldb_db_h" = xyes then : printf "%s\n" "#define HAVE_LEVELDB_DB_H 1" >>confdefs.h printf "%s\n" "#define STARPU_HAVE_LEVELDB 1" >>confdefs.h fi done _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lleveldb" >&5 printf %s "checking for main in -lleveldb... " >&6; } if test ${ac_cv_lib_leveldb_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lleveldb $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ namespace conftest { extern "C" int main (); } int main (void) { return conftest::main (); ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : ac_cv_lib_leveldb_main=yes else $as_nop ac_cv_lib_leveldb_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_leveldb_main" >&5 printf "%s\n" "$ac_cv_lib_leveldb_main" >&6; } if test "x$ac_cv_lib_leveldb_main" = xyes then : printf "%s\n" "#define HAVE_LIBLEVELDB 1" >>confdefs.h LIBS="-lleveldb $LIBS" fi STARPU_LEVELDB_LDFLAGS="$STARPU_LEVELDB_LDFLAGS $LIBS" LIBS=$_LIBS_SAV ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi if test "x$enable_leveldb" = "xyes" -a "x$ac_cv_lib_leveldb_main" = "xyes"; then STARPU_HAVE_LEVELDB_TRUE= STARPU_HAVE_LEVELDB_FALSE='#' else STARPU_HAVE_LEVELDB_TRUE='#' STARPU_HAVE_LEVELDB_FALSE= fi # Defines the calibration heuristic for the history-based calibration of StarPU { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking calibration heuristic of history-based StarPU calibrator" >&5 printf %s "checking calibration heuristic of history-based StarPU calibrator... " >&6; } # Check whether --enable-calibration-heuristic was given. if test ${enable_calibration_heuristic+y} then : enableval=$enable_calibration_heuristic; calibration_heuristic=$enableval else $as_nop calibration_heuristic=50 fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $calibration_heuristic" >&5 printf "%s\n" "$calibration_heuristic" >&6; } printf "%s\n" "#define STARPU_HISTORYMAXERROR $calibration_heuristic" >>confdefs.h ############################################################################### # # # MP Common settings # # # ############################################################################### if test x$build_mpi_master_slave = xyes -o x$build_tcpip_master_slave = xyes; then build_master_slave=yes else build_master_slave=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the master-slave mode should be enabled" >&5 printf %s "checking whether the master-slave mode should be enabled... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_master_slave" >&5 printf "%s\n" "$build_master_slave" >&6; } if test "x$build_master_slave" = "xyes"; then STARPU_USE_MP_TRUE= STARPU_USE_MP_FALSE='#' else STARPU_USE_MP_TRUE='#' STARPU_USE_MP_FALSE= fi # Check whether --enable-export-dynamic was given. if test ${enable_export_dynamic+y} then : enableval=$enable_export_dynamic; fi if test x$build_master_slave = xyes; then printf "%s\n" "#define STARPU_USE_MP 1" >>confdefs.h if test x$enable_export_dynamic != xno ; then STARPU_EXPORT_DYNAMIC="-rdynamic" fi fi ############################################################################### # # # Flags for C Compiler # # # ############################################################################### { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wall" >&5 printf %s "checking whether C compiler supports -Wall... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Wall" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wall" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wall" >&5 printf %s "checking whether MPI C compiler supports -Wall... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wall" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wall" >&5 printf %s "checking whether CXX compiler supports -Wall... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-Wall" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wall" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wall" >&5 printf %s "checking whether Fortran 77 compiler supports -Wall... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-Wall" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Wall" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wall" >&5 printf %s "checking whether Fortran compiler supports -Wall... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-Wall" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wall" >&5 printf %s "checking whether MPI Fortran compiler supports -Wall... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Wall" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Werror=implicit" >&5 printf %s "checking whether C compiler supports -Werror=implicit... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Werror=implicit" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=implicit" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Werror=implicit" >&5 printf %s "checking whether MPI C compiler supports -Werror=implicit... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=implicit" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Werror=implicit-function-declaration" >&5 printf %s "checking whether C compiler supports -Werror=implicit-function-declaration... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Werror=implicit-function-declaration" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=implicit-function-declaration" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Werror=implicit-function-declaration" >&5 printf %s "checking whether MPI C compiler supports -Werror=implicit-function-declaration... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=implicit-function-declaration" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" if test x$enable_perf_debug = xyes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -no-pie" >&5 printf %s "checking whether C compiler supports -no-pie... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-no-pie" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -no-pie" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -no-pie" >&5 printf %s "checking whether MPI C compiler supports -no-pie... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -no-pie" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -no-pie" >&5 printf %s "checking whether CXX compiler supports -no-pie... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-no-pie" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -no-pie" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -no-pie" >&5 printf %s "checking whether Fortran 77 compiler supports -no-pie... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-no-pie" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -no-pie" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -no-pie" >&5 printf %s "checking whether Fortran compiler supports -no-pie... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-no-pie" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -no-pie" >&5 printf %s "checking whether MPI Fortran compiler supports -no-pie... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -no-pie" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -no-PIE" >&5 printf %s "checking whether C compiler supports -no-PIE... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-no-PIE" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -no-PIE" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -no-PIE" >&5 printf %s "checking whether MPI C compiler supports -no-PIE... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -no-PIE" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -no-PIE" >&5 printf %s "checking whether CXX compiler supports -no-PIE... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-no-PIE" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -no-PIE" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -no-PIE" >&5 printf %s "checking whether Fortran 77 compiler supports -no-PIE... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-no-PIE" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -no-PIE" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -no-PIE" >&5 printf %s "checking whether Fortran compiler supports -no-PIE... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-no-PIE" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -no-PIE" >&5 printf %s "checking whether MPI Fortran compiler supports -no-PIE... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -no-PIE" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -fno-pie" >&5 printf %s "checking whether C compiler supports -fno-pie... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-fno-pie" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-pie" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -fno-pie" >&5 printf %s "checking whether MPI C compiler supports -fno-pie... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-pie" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -fno-pie" >&5 printf %s "checking whether CXX compiler supports -fno-pie... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-fno-pie" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -fno-pie" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -fno-pie" >&5 printf %s "checking whether Fortran 77 compiler supports -fno-pie... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-fno-pie" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -fno-pie" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -fno-pie" >&5 printf %s "checking whether Fortran compiler supports -fno-pie... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-fno-pie" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -fno-pie" >&5 printf %s "checking whether MPI Fortran compiler supports -fno-pie... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -fno-pie" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wextra" >&5 printf %s "checking whether C compiler supports -Wextra... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Wextra" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wextra" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wextra" >&5 printf %s "checking whether MPI C compiler supports -Wextra... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wextra" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wextra" >&5 printf %s "checking whether CXX compiler supports -Wextra... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-Wextra" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wextra" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wextra" >&5 printf %s "checking whether Fortran 77 compiler supports -Wextra... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-Wextra" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Wextra" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wextra" >&5 printf %s "checking whether Fortran compiler supports -Wextra... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-Wextra" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wextra" >&5 printf %s "checking whether MPI Fortran compiler supports -Wextra... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Wextra" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wunused" >&5 printf %s "checking whether C compiler supports -Wunused... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Wunused" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wunused" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wunused" >&5 printf %s "checking whether MPI C compiler supports -Wunused... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wunused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wunused" >&5 printf %s "checking whether CXX compiler supports -Wunused... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-Wunused" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wunused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wunused" >&5 printf %s "checking whether Fortran 77 compiler supports -Wunused... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-Wunused" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Wunused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wunused" >&5 printf %s "checking whether Fortran compiler supports -Wunused... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-Wunused" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wunused" >&5 printf %s "checking whether MPI Fortran compiler supports -Wunused... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Wunused" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wundef" >&5 printf %s "checking whether C compiler supports -Wundef... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Wundef" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wundef" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wundef" >&5 printf %s "checking whether MPI C compiler supports -Wundef... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wundef" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wundef" >&5 printf %s "checking whether CXX compiler supports -Wundef... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-Wundef" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wundef" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wshadow" >&5 printf %s "checking whether C compiler supports -Wshadow... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Wshadow" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wshadow" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wshadow" >&5 printf %s "checking whether MPI C compiler supports -Wshadow... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wshadow" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wshadow" >&5 printf %s "checking whether CXX compiler supports -Wshadow... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-Wshadow" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wshadow" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -Wshadow" >&5 printf %s "checking whether Fortran 77 compiler supports -Wshadow... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-Wshadow" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -Wshadow" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -Wshadow" >&5 printf %s "checking whether Fortran compiler supports -Wshadow... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-Wshadow" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -Wshadow" >&5 printf %s "checking whether MPI Fortran compiler supports -Wshadow... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -Wshadow" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Wpointer-arith" >&5 printf %s "checking whether C compiler supports -Wpointer-arith... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Wpointer-arith" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wpointer-arith" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Wpointer-arith" >&5 printf %s "checking whether MPI C compiler supports -Wpointer-arith... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Wpointer-arith" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Wpointer-arith" >&5 printf %s "checking whether CXX compiler supports -Wpointer-arith... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-Wpointer-arith" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Wpointer-arith" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test "x$STARPU_DEVEL" != x; then printf "%s\n" "#define STARPU_DEVEL 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -Werror=pointer-arith" >&5 printf %s "checking whether C compiler supports -Werror=pointer-arith... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-Werror=pointer-arith" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=pointer-arith" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -Werror=pointer-arith" >&5 printf %s "checking whether MPI C compiler supports -Werror=pointer-arith... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -Werror=pointer-arith" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -Werror=pointer-arith" >&5 printf %s "checking whether CXX compiler supports -Werror=pointer-arith... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-Werror=pointer-arith" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -Werror=pointer-arith" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports -fno-common" >&5 printf %s "checking whether C compiler supports -fno-common... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="-fno-common" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-common" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports -fno-common" >&5 printf %s "checking whether MPI C compiler supports -fno-common... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS -fno-common" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports -fno-common" >&5 printf %s "checking whether CXX compiler supports -fno-common... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="-fno-common" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS -fno-common" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports -fno-common" >&5 printf %s "checking whether Fortran 77 compiler supports -fno-common... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="-fno-common" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS -fno-common" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports -fno-common" >&5 printf %s "checking whether Fortran compiler supports -fno-common... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="-fno-common" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports -fno-common" >&5 printf %s "checking whether MPI Fortran compiler supports -fno-common... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS -fno-common" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi if test "x$STARPU_DEVEL" != x; then STARPU_DEVEL_TRUE= STARPU_DEVEL_FALSE='#' else STARPU_DEVEL_TRUE='#' STARPU_DEVEL_FALSE= fi # Same value as Automake's, for use in other places. pkglibdir="\${libdir}/$PACKAGE" # Check whether --with-check-flags was given. if test ${with_check_flags+y} then : withval=$with_check_flags; check_flags=$withval else $as_nop check_flags="" fi if test "x$check_flags" != "x" ; then for xflag in $check_flags do { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler supports $xflag" >&5 printf %s "checking whether C compiler supports $xflag... " >&6; } SAVED_CFLAGS="$CFLAGS" CFLAGS="$xflag" check_mpi="no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS $xflag" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI C compiler supports $xflag" >&5 printf %s "checking whether MPI C compiler supports $xflag... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS $xflag" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CXX compiler supports $xflag" >&5 printf %s "checking whether CXX compiler supports $xflag... " >&6; } SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$xflag" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { const char *hello = "Hello World"; ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO" then : GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS $xflag" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CXXFLAGS="$SAVED_CXXFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran 77 compiler supports $xflag" >&5 printf %s "checking whether Fortran 77 compiler supports $xflag... " >&6; } SAVED_FFLAGS="$FFLAGS" FFLAGS="$xflag" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_f77_try_link "$LINENO" then : GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS $xflag" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FFLAGS="$SAVED_FFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=${ac_fc_srcext-f} ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Fortran compiler supports $xflag" >&5 printf %s "checking whether Fortran compiler supports $xflag... " >&6; } SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="$xflag" check_mpi="no" cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : check_mpi="yes" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI Fortran compiler supports $xflag" >&5 printf %s "checking whether MPI Fortran compiler supports $xflag... " >&6; } cat > conftest.$ac_ext <<_ACEOF program main end _ACEOF if ac_fn_fc_try_link "$LINENO" then : GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS $xflag" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu done fi ######################################################################## # # # Parallel worker support # # # ######################################################################## default_enable_parallel_worker=yes if test x$starpu_darwin = xyes ; then default_enable_parallel_worker=no fi # Check whether --enable-parallel-worker was given. if test ${enable_parallel_worker+y} then : enableval=$enable_parallel_worker; enable_parallel_worker=$enableval else $as_nop enable_parallel_worker=$default_enable_parallel_worker fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for parallel worker support" >&5 printf %s "checking for parallel worker support... " >&6; } if test x$enable_parallel_worker = xyes; then printf "%s\n" "#define STARPU_PARALLEL_WORKER 1" >>confdefs.h if test -e penmp || test -e mp; then as_fn_error $? "AC_OPENMP clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5 fi # Check whether --enable-openmp was given. if test ${enable_openmp+y} then : enableval=$enable_openmp; fi OPENMP_CFLAGS= if test "$enable_openmp" != no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5 printf %s "checking for $CC option to support OpenMP... " >&6; } if test ${ac_cv_prog_c_openmp+y} then : printf %s "(cached) " >&6 else $as_nop ac_cv_prog_c_openmp='not found' for ac_option in '' -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ -Popenmp --openmp; do ac_save_CFLAGS=$CFLAGS CFLAGS="$CFLAGS $ac_option" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef _OPENMP #error "OpenMP not supported" #endif #include int main (void) { return omp_get_num_threads (); } _ACEOF if ac_fn_c_try_compile "$LINENO" then : cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef _OPENMP #error "OpenMP not supported" #endif #include int main (void) { return omp_get_num_threads (); } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_prog_c_openmp=$ac_option else $as_nop ac_cv_prog_c_openmp='unsupported' fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ac_save_CFLAGS if test "$ac_cv_prog_c_openmp" != 'not found'; then break fi done if test "$ac_cv_prog_c_openmp" = 'not found'; then ac_cv_prog_c_openmp='unsupported' elif test "$ac_cv_prog_c_openmp" = ''; then ac_cv_prog_c_openmp='none needed' fi rm -f penmp mp fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 printf "%s\n" "$ac_cv_prog_c_openmp" >&6; } if test "$ac_cv_prog_c_openmp" != 'unsupported' && \ test "$ac_cv_prog_c_openmp" != 'none needed'; then OPENMP_CFLAGS="$ac_cv_prog_c_openmp" fi fi fi if test "x$enable_parallel_worker" = "xyes"; then STARPU_PARALLEL_WORKER_TRUE= STARPU_PARALLEL_WORKER_FALSE='#' else STARPU_PARALLEL_WORKER_TRUE='#' STARPU_PARALLEL_WORKER_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_parallel_worker" >&5 printf "%s\n" "$enable_parallel_worker" >&6; } ############################################################################### # # # OpenMP LLVM runtime support # # # ############################################################################### # Check whether --enable-openmp-llvm was given. if test ${enable_openmp_llvm+y} then : enableval=$enable_openmp_llvm; enable_openmp_llvm=$enableval else $as_nop enable_openmp_llvm=no fi openmp_llvm_msg="" if test x$starpu_windows = xyes ; then enable_openmp_llvm=no openmp_llvm_msg="disabled on windows" fi if test x$enable_simgrid = xyes ; then enable_openmp_llvm=no openmp_llvm_msg="incompatibility with Simgrid support" fi if test x$PROG_CLANG = x ; then enable_openmp_llvm=no openmp_llvm_msg="missing clang" fi if test x$enable_openmp_llvm = xyes; then printf "%s\n" "#define STARPU_OPENMP_LLVM 1" >>confdefs.h # Force activating the generic OpenMP runtime support enable_openmp="yes" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LLVM OpenMP runtime support" >&5 printf %s "checking for LLVM OpenMP runtime support... " >&6; } if test "x$enable_openmp_llvm" = "xyes"; then STARPU_OPENMP_LLVM_TRUE= STARPU_OPENMP_LLVM_FALSE='#' else STARPU_OPENMP_LLVM_TRUE='#' STARPU_OPENMP_LLVM_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_openmp_llvm $openmp_llvm_msg" >&5 printf "%s\n" "$enable_openmp_llvm $openmp_llvm_msg" >&6; } ############################################################################### # # # OpenMP runtime support # # # ############################################################################### # Check whether --enable-openmp was given. if test ${enable_openmp+y} then : enableval=$enable_openmp; enable_openmp=$enableval else $as_nop enable_openmp=yes fi ac_fn_c_check_header_compile "$LINENO" "ucontext.h" "ac_cv_header_ucontext_h" "$ac_includes_default" if test "x$ac_cv_header_ucontext_h" = xyes then : have_valid_ucontext=yes else $as_nop have_valid_ucontext=no fi openmp_msg="" if test x$starpu_windows = xyes ; then enable_openmp=no openmp_msg="disabled on windows" fi if test x$enable_simgrid = xyes ; then enable_openmp=no openmp_msg="incompatibility with Simgrid support" fi if test x$have_valid_ucontext = xno ; then enable_openmp=no openmp_msg="ucontext.h unavailable" fi if test x$enable_openmp = xyes; then printf "%s\n" "#define STARPU_OPENMP 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for OpenMP runtime support" >&5 printf %s "checking for OpenMP runtime support... " >&6; } if test "x$enable_openmp" = "xyes"; then STARPU_OPENMP_TRUE= STARPU_OPENMP_FALSE='#' else STARPU_OPENMP_TRUE='#' STARPU_OPENMP_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_openmp $openmp_msg" >&5 printf "%s\n" "$enable_openmp $openmp_msg" >&6; } if test x$enable_simgrid = xno -a -n "$OPENMP_CFLAGS" -a x$starpu_windows != xyes; then STARPU_HAVE_OPENMP_TRUE= STARPU_HAVE_OPENMP_FALSE='#' else STARPU_HAVE_OPENMP_TRUE='#' STARPU_HAVE_OPENMP_FALSE= fi ############################################################################### # # # SOCL interface # # # ############################################################################### # Check whether --enable-socl was given. if test ${enable_socl+y} then : enableval=$enable_socl; enable_socl="$enableval" else $as_nop enable_socl="maybe" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SOCL" >&5 printf %s "checking for SOCL... " >&6; } # in case SOCL was explicitly required, but is not available, this is an error if test "x$enable_socl" = "xyes" -a "$have_valid_opencl" = "no" ; then as_fn_error $? "SOCL cannot be enabled without OpenCL" "$LINENO" 5 fi # now we enable SOCL if and only if a proper setup is available if test "x$enable_socl" = "xyes" -o "x$enable_socl" = "xmaybe" ; then build_socl=$have_valid_opencl else build_socl=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $build_socl" >&5 printf "%s\n" "$build_socl" >&6; } if test "x$build_socl" = "xyes"; then STARPU_BUILD_SOCL_TRUE= STARPU_BUILD_SOCL_FALSE='#' else STARPU_BUILD_SOCL_TRUE='#' STARPU_BUILD_SOCL_FALSE= fi if test "x$build_socl" = "xyes"; then STARPU_USE_SOCL_TRUE= STARPU_USE_SOCL_FALSE='#' else STARPU_USE_SOCL_TRUE='#' STARPU_USE_SOCL_FALSE= fi if test "$build_socl" = "yes" ; then ac_fn_c_check_func "$LINENO" "clGetExtensionFunctionAddressForPlatform" "ac_cv_func_clGetExtensionFunctionAddressForPlatform" if test "x$ac_cv_func_clGetExtensionFunctionAddressForPlatform" = xyes then : printf "%s\n" "#define HAVE_CLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM 1" >>confdefs.h fi if test -n "$SOCL_OCL_LIB_OPENCL" -a -f "$SOCL_OCL_LIB_OPENCL" ; then run_socl_check=yes SOCL_OCL_LIB_OPENCL_DIR=$(dirname $SOCL_OCL_LIB_OPENCL) else run_socl_check=no fi else run_socl_check=no fi ############################################################################### # # # Debugging # # # ############################################################################### # Extract the first word of "gdb", so it can be a program name with args. set dummy gdb; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_GDB+y} then : printf %s "(cached) " >&6 else $as_nop case $GDB in [\\/]* | ?:[\\/]*) ac_cv_path_GDB="$GDB" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_GDB="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_GDB" && ac_cv_path_GDB="not-found" ;; esac fi GDB=$ac_cv_path_GDB if test -n "$GDB"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $GDB" >&5 printf "%s\n" "$GDB" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "x$GDB" != "xnot-found"; then printf "%s\n" "#define STARPU_GDB_PATH \"$GDB\"" >>confdefs.h fi ############################################################################### # # # Examples # # # ############################################################################### # Check whether --enable-build-tests was given. if test ${enable_build_tests+y} then : enableval=$enable_build_tests; enable_build_tests=$enableval else $as_nop enable_build_tests=yes fi # check stuff for tests (todo) if test x$enable_build_tests != xno; then STARPU_BUILD_TESTS_TRUE= STARPU_BUILD_TESTS_FALSE='#' else STARPU_BUILD_TESTS_TRUE='#' STARPU_BUILD_TESTS_FALSE= fi # Check whether --enable-build-examples was given. if test ${enable_build_examples+y} then : enableval=$enable_build_examples; enable_build_examples=$enableval else $as_nop enable_build_examples=yes fi # check stuff for examples (todo) if test x$enable_build_examples != xno; then STARPU_BUILD_EXAMPLES_TRUE= STARPU_BUILD_EXAMPLES_FALSE='#' else STARPU_BUILD_EXAMPLES_TRUE='#' STARPU_BUILD_EXAMPLES_FALSE= fi # Check whether --enable-opengl-render was given. if test ${enable_opengl_render+y} then : enableval=$enable_opengl_render; enable_opengl_render=$enableval else $as_nop enable_opengl_render=no fi if test x$enable_opengl_render = xyes; then _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for glutInit in -lglut" >&5 printf %s "checking for glutInit in -lglut... " >&6; } if test ${ac_cv_lib_glut_glutInit+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lglut $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char glutInit (); int main (void) { return glutInit (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_glut_glutInit=yes else $as_nop ac_cv_lib_glut_glutInit=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_glut_glutInit" >&5 printf "%s\n" "$ac_cv_lib_glut_glutInit" >&6; } if test "x$ac_cv_lib_glut_glutInit" = xyes then : printf "%s\n" "#define HAVE_LIBGLUT 1" >>confdefs.h LIBS="-lglut $LIBS" else $as_nop as_fn_error $? "cannot find glut" "$LINENO" 5 fi STARPU_OPENGL_RENDER_LDFLAGS="$STARPU_OPENGL_RENDER_LDFLAGS $LIBS" LIBS=$_LIBS_SAV _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for glXCreateContext in -lGL" >&5 printf %s "checking for glXCreateContext in -lGL... " >&6; } if test ${ac_cv_lib_GL_glXCreateContext+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lGL $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char glXCreateContext (); int main (void) { return glXCreateContext (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_GL_glXCreateContext=yes else $as_nop ac_cv_lib_GL_glXCreateContext=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_GL_glXCreateContext" >&5 printf "%s\n" "$ac_cv_lib_GL_glXCreateContext" >&6; } if test "x$ac_cv_lib_GL_glXCreateContext" = xyes then : printf "%s\n" "#define HAVE_LIBGL 1" >>confdefs.h LIBS="-lGL $LIBS" else $as_nop as_fn_error $? "cannot find GL" "$LINENO" 5 fi STARPU_OPENGL_RENDER_LDFLAGS="$STARPU_OPENGL_RENDER_LDFLAGS $LIBS" LIBS=$_LIBS_SAV _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gluLookAt in -lGLU" >&5 printf %s "checking for gluLookAt in -lGLU... " >&6; } if test ${ac_cv_lib_GLU_gluLookAt+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lGLU $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char gluLookAt (); int main (void) { return gluLookAt (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_GLU_gluLookAt=yes else $as_nop ac_cv_lib_GLU_gluLookAt=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_GLU_gluLookAt" >&5 printf "%s\n" "$ac_cv_lib_GLU_gluLookAt" >&6; } if test "x$ac_cv_lib_GLU_gluLookAt" = xyes then : printf "%s\n" "#define HAVE_LIBGLU 1" >>confdefs.h LIBS="-lGLU $LIBS" else $as_nop as_fn_error $? "cannot find GLU" "$LINENO" 5 fi STARPU_OPENGL_RENDER_LDFLAGS="$STARPU_OPENGL_RENDER_LDFLAGS $LIBS" LIBS=$_LIBS_SAV printf "%s\n" "#define STARPU_OPENGL_RENDER 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenGL rendering is enabled" >&5 printf %s "checking whether OpenGL rendering is enabled... " >&6; } STARPU_OPENGL_RENDER=$enable_opengl_render { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_opengl_render" >&5 printf "%s\n" "$enable_opengl_render" >&6; } if test "x$enable_opengl_render" = xyes; then STARPU_HAVE_OPENGL_TRUE= STARPU_HAVE_OPENGL_FALSE='#' else STARPU_HAVE_OPENGL_TRUE='#' STARPU_HAVE_OPENGL_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for X" >&5 printf %s "checking for X... " >&6; } # Check whether --with-x was given. if test ${with_x+y} then : withval=$with_x; fi # $have_x is `yes', `no', `disabled', or empty when we do not yet know. if test "x$with_x" = xno; then # The user explicitly disabled X. have_x=disabled else case $x_includes,$x_libraries in #( *\'*) as_fn_error $? "cannot use X directory names containing '" "$LINENO" 5;; #( *,NONE | NONE,*) if test ${ac_cv_have_x+y} then : printf %s "(cached) " >&6 else $as_nop # One or both of the vars are not set, and there is no cached value. ac_x_includes=no ac_x_libraries=no # Do we need to do anything special at all? ac_save_LIBS=$LIBS LIBS="-lX11 $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { XrmInitialize () ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : # We can compile and link X programs with no special options. ac_x_includes= ac_x_libraries= fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS="$ac_save_LIBS" # If that didn't work, only try xmkmf and file system searches # for native compilation. if test x"$ac_x_includes" = xno && test "$cross_compiling" = no then : rm -f -r conftest.dir if mkdir conftest.dir; then cd conftest.dir cat >Imakefile <<'_ACEOF' incroot: @echo incroot='${INCROOT}' usrlibdir: @echo usrlibdir='${USRLIBDIR}' libdir: @echo libdir='${LIBDIR}' _ACEOF if (export CC; ${XMKMF-xmkmf}) >/dev/null 2>/dev/null && test -f Makefile; then # GNU make sometimes prints "make[1]: Entering ...", which would confuse us. for ac_var in incroot usrlibdir libdir; do eval "ac_im_$ac_var=\`\${MAKE-make} $ac_var 2>/dev/null | sed -n 's/^$ac_var=//p'\`" done # Open Windows xmkmf reportedly sets LIBDIR instead of USRLIBDIR. for ac_extension in a so sl dylib la dll; do if test ! -f "$ac_im_usrlibdir/libX11.$ac_extension" && test -f "$ac_im_libdir/libX11.$ac_extension"; then ac_im_usrlibdir=$ac_im_libdir; break fi done # Screen out bogus values from the imake configuration. They are # bogus both because they are the default anyway, and because # using them would break gcc on systems where it needs fixed includes. case $ac_im_incroot in /usr/include) ac_x_includes= ;; *) test -f "$ac_im_incroot/X11/Xos.h" && ac_x_includes=$ac_im_incroot;; esac case $ac_im_usrlibdir in /usr/lib | /usr/lib64 | /lib | /lib64) ;; *) test -d "$ac_im_usrlibdir" && ac_x_libraries=$ac_im_usrlibdir ;; esac fi cd .. rm -f -r conftest.dir fi # Standard set of common directories for X headers. # Check X11 before X11Rn because it is often a symlink to the current release. ac_x_header_dirs=' /usr/X11/include /usr/X11R7/include /usr/X11R6/include /usr/X11R5/include /usr/X11R4/include /usr/include/X11 /usr/include/X11R7 /usr/include/X11R6 /usr/include/X11R5 /usr/include/X11R4 /usr/local/X11/include /usr/local/X11R7/include /usr/local/X11R6/include /usr/local/X11R5/include /usr/local/X11R4/include /usr/local/include/X11 /usr/local/include/X11R7 /usr/local/include/X11R6 /usr/local/include/X11R5 /usr/local/include/X11R4 /opt/X11/include /usr/X386/include /usr/x386/include /usr/XFree86/include/X11 /usr/include /usr/local/include /usr/unsupported/include /usr/athena/include /usr/local/x11r5/include /usr/lpp/Xamples/include /usr/openwin/include /usr/openwin/share/include' if test "$ac_x_includes" = no; then # Guess where to find include files, by looking for Xlib.h. # First, try using that file with no special directory specified. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO" then : # We can compile using X headers with no special include directory. ac_x_includes= else $as_nop for ac_dir in $ac_x_header_dirs; do if test -r "$ac_dir/X11/Xlib.h"; then ac_x_includes=$ac_dir break fi done fi rm -f conftest.err conftest.i conftest.$ac_ext fi # $ac_x_includes = no if test "$ac_x_libraries" = no; then # Check for the libraries. # See if we find them without any special options. # Don't add to $LIBS permanently. ac_save_LIBS=$LIBS LIBS="-lX11 $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { XrmInitialize () ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : LIBS=$ac_save_LIBS # We can link X programs with no special library path. ac_x_libraries= else $as_nop LIBS=$ac_save_LIBS for ac_dir in `printf "%s\n" "$ac_x_includes $ac_x_header_dirs" | sed s/include/lib/g` do # Don't even attempt the hair of trying to link an X program! for ac_extension in a so sl dylib la dll; do if test -r "$ac_dir/libX11.$ac_extension"; then ac_x_libraries=$ac_dir break 2 fi done done fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi # $ac_x_libraries = no fi # Record the results. case $ac_x_includes,$ac_x_libraries in #( no,* | *,no | *\'*) : # Didn't find X, or a directory has "'" in its name. ac_cv_have_x="have_x=no" ;; #( *) : # Record where we found X for the cache. ac_cv_have_x="have_x=yes\ ac_x_includes='$ac_x_includes'\ ac_x_libraries='$ac_x_libraries'" ;; esac fi ;; #( *) have_x=yes;; esac eval "$ac_cv_have_x" fi # $with_x != no if test "$have_x" != yes; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_x" >&5 printf "%s\n" "$have_x" >&6; } no_x=yes else # If each of the values was on the command line, it overrides each guess. test "x$x_includes" = xNONE && x_includes=$ac_x_includes test "x$x_libraries" = xNONE && x_libraries=$ac_x_libraries # Update the cache value to reflect the command line values. ac_cv_have_x="have_x=yes\ ac_x_includes='$x_includes'\ ac_x_libraries='$x_libraries'" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: libraries $x_libraries, headers $x_includes" >&5 printf "%s\n" "libraries $x_libraries, headers $x_includes" >&6; } fi if test "$no_x" = yes; then # Not all programs may use this symbol, but it does not hurt to define it. printf "%s\n" "#define X_DISPLAY_MISSING 1" >>confdefs.h X_CFLAGS= X_PRE_LIBS= X_LIBS= X_EXTRA_LIBS= else if test -n "$x_includes"; then X_CFLAGS="$X_CFLAGS -I$x_includes" fi # It would also be nice to do this for all -L options, not just this one. if test -n "$x_libraries"; then X_LIBS="$X_LIBS -L$x_libraries" # For Solaris; some versions of Sun CC require a space after -R and # others require no space. Words are not sufficient . . . . { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -R must be followed by a space" >&5 printf %s "checking whether -R must be followed by a space... " >&6; } ac_xsave_LIBS=$LIBS; LIBS="$LIBS -R$x_libraries" ac_xsave_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } X_LIBS="$X_LIBS -R$x_libraries" else $as_nop LIBS="$ac_xsave_LIBS -R $x_libraries" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } X_LIBS="$X_LIBS -R $x_libraries" else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: neither works" >&5 printf "%s\n" "neither works" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext ac_c_werror_flag=$ac_xsave_c_werror_flag LIBS=$ac_xsave_LIBS fi # Check for system-dependent libraries X programs must link with. # Do this before checking for the system-independent R6 libraries # (-lICE), since we may need -lsocket or whatever for X linking. if test "$ISC" = yes; then X_EXTRA_LIBS="$X_EXTRA_LIBS -lnsl_s -linet" else # Martyn Johnson says this is needed for Ultrix, if the X # libraries were built with DECnet support. And Karl Berry says # the Alpha needs dnet_stub (dnet does not exist). ac_xsave_LIBS="$LIBS"; LIBS="$LIBS $X_LIBS -lX11" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char XOpenDisplay (); int main (void) { return XOpenDisplay (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : else $as_nop { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dnet_ntoa in -ldnet" >&5 printf %s "checking for dnet_ntoa in -ldnet... " >&6; } if test ${ac_cv_lib_dnet_dnet_ntoa+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ldnet $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char dnet_ntoa (); int main (void) { return dnet_ntoa (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dnet_dnet_ntoa=yes else $as_nop ac_cv_lib_dnet_dnet_ntoa=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dnet_dnet_ntoa" >&5 printf "%s\n" "$ac_cv_lib_dnet_dnet_ntoa" >&6; } if test "x$ac_cv_lib_dnet_dnet_ntoa" = xyes then : X_EXTRA_LIBS="$X_EXTRA_LIBS -ldnet" fi if test $ac_cv_lib_dnet_dnet_ntoa = no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dnet_ntoa in -ldnet_stub" >&5 printf %s "checking for dnet_ntoa in -ldnet_stub... " >&6; } if test ${ac_cv_lib_dnet_stub_dnet_ntoa+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ldnet_stub $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char dnet_ntoa (); int main (void) { return dnet_ntoa (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dnet_stub_dnet_ntoa=yes else $as_nop ac_cv_lib_dnet_stub_dnet_ntoa=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dnet_stub_dnet_ntoa" >&5 printf "%s\n" "$ac_cv_lib_dnet_stub_dnet_ntoa" >&6; } if test "x$ac_cv_lib_dnet_stub_dnet_ntoa" = xyes then : X_EXTRA_LIBS="$X_EXTRA_LIBS -ldnet_stub" fi fi fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS="$ac_xsave_LIBS" # msh@cis.ufl.edu says -lnsl (and -lsocket) are needed for his 386/AT, # to get the SysV transport functions. # Chad R. Larson says the Pyramis MIS-ES running DC/OSx (SVR4) # needs -lnsl. # The nsl library prevents programs from opening the X display # on Irix 5.2, according to T.E. Dickey. # The functions gethostbyname, getservbyname, and inet_addr are # in -lbsd on LynxOS 3.0.1/i386, according to Lars Hecking. ac_fn_c_check_func "$LINENO" "gethostbyname" "ac_cv_func_gethostbyname" if test "x$ac_cv_func_gethostbyname" = xyes then : fi if test $ac_cv_func_gethostbyname = no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gethostbyname in -lnsl" >&5 printf %s "checking for gethostbyname in -lnsl... " >&6; } if test ${ac_cv_lib_nsl_gethostbyname+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lnsl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char gethostbyname (); int main (void) { return gethostbyname (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_nsl_gethostbyname=yes else $as_nop ac_cv_lib_nsl_gethostbyname=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_nsl_gethostbyname" >&5 printf "%s\n" "$ac_cv_lib_nsl_gethostbyname" >&6; } if test "x$ac_cv_lib_nsl_gethostbyname" = xyes then : X_EXTRA_LIBS="$X_EXTRA_LIBS -lnsl" fi if test $ac_cv_lib_nsl_gethostbyname = no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gethostbyname in -lbsd" >&5 printf %s "checking for gethostbyname in -lbsd... " >&6; } if test ${ac_cv_lib_bsd_gethostbyname+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lbsd $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char gethostbyname (); int main (void) { return gethostbyname (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_bsd_gethostbyname=yes else $as_nop ac_cv_lib_bsd_gethostbyname=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_bsd_gethostbyname" >&5 printf "%s\n" "$ac_cv_lib_bsd_gethostbyname" >&6; } if test "x$ac_cv_lib_bsd_gethostbyname" = xyes then : X_EXTRA_LIBS="$X_EXTRA_LIBS -lbsd" fi fi fi # lieder@skyler.mavd.honeywell.com says without -lsocket, # socket/setsockopt and other routines are undefined under SCO ODT # 2.0. But -lsocket is broken on IRIX 5.2 (and is not necessary # on later versions), says Simon Leinen: it contains gethostby* # variants that don't use the name server (or something). -lsocket # must be given before -lnsl if both are needed. We assume that # if connect needs -lnsl, so does gethostbyname. ac_fn_c_check_func "$LINENO" "connect" "ac_cv_func_connect" if test "x$ac_cv_func_connect" = xyes then : fi if test $ac_cv_func_connect = no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for connect in -lsocket" >&5 printf %s "checking for connect in -lsocket... " >&6; } if test ${ac_cv_lib_socket_connect+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lsocket $X_EXTRA_LIBS $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char connect (); int main (void) { return connect (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_socket_connect=yes else $as_nop ac_cv_lib_socket_connect=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_socket_connect" >&5 printf "%s\n" "$ac_cv_lib_socket_connect" >&6; } if test "x$ac_cv_lib_socket_connect" = xyes then : X_EXTRA_LIBS="-lsocket $X_EXTRA_LIBS" fi fi # Guillermo Gomez says -lposix is necessary on A/UX. ac_fn_c_check_func "$LINENO" "remove" "ac_cv_func_remove" if test "x$ac_cv_func_remove" = xyes then : fi if test $ac_cv_func_remove = no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for remove in -lposix" >&5 printf %s "checking for remove in -lposix... " >&6; } if test ${ac_cv_lib_posix_remove+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lposix $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char remove (); int main (void) { return remove (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_posix_remove=yes else $as_nop ac_cv_lib_posix_remove=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_posix_remove" >&5 printf "%s\n" "$ac_cv_lib_posix_remove" >&6; } if test "x$ac_cv_lib_posix_remove" = xyes then : X_EXTRA_LIBS="$X_EXTRA_LIBS -lposix" fi fi # BSDI BSD/OS 2.1 needs -lipc for XOpenDisplay. ac_fn_c_check_func "$LINENO" "shmat" "ac_cv_func_shmat" if test "x$ac_cv_func_shmat" = xyes then : fi if test $ac_cv_func_shmat = no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for shmat in -lipc" >&5 printf %s "checking for shmat in -lipc... " >&6; } if test ${ac_cv_lib_ipc_shmat+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lipc $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char shmat (); int main (void) { return shmat (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_ipc_shmat=yes else $as_nop ac_cv_lib_ipc_shmat=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ipc_shmat" >&5 printf "%s\n" "$ac_cv_lib_ipc_shmat" >&6; } if test "x$ac_cv_lib_ipc_shmat" = xyes then : X_EXTRA_LIBS="$X_EXTRA_LIBS -lipc" fi fi fi # Check for libraries that X11R6 Xt/Xaw programs need. ac_save_LDFLAGS=$LDFLAGS test -n "$x_libraries" && LDFLAGS="$LDFLAGS -L$x_libraries" # SM needs ICE to (dynamically) link under SunOS 4.x (so we have to # check for ICE first), but we must link in the order -lSM -lICE or # we get undefined symbols. So assume we have SM if we have ICE. # These have to be linked with before -lX11, unlike the other # libraries we check for below, so use a different variable. # John Interrante, Karl Berry { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for IceConnectionNumber in -lICE" >&5 printf %s "checking for IceConnectionNumber in -lICE... " >&6; } if test ${ac_cv_lib_ICE_IceConnectionNumber+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lICE $X_EXTRA_LIBS $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char IceConnectionNumber (); int main (void) { return IceConnectionNumber (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_ICE_IceConnectionNumber=yes else $as_nop ac_cv_lib_ICE_IceConnectionNumber=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ICE_IceConnectionNumber" >&5 printf "%s\n" "$ac_cv_lib_ICE_IceConnectionNumber" >&6; } if test "x$ac_cv_lib_ICE_IceConnectionNumber" = xyes then : X_PRE_LIBS="$X_PRE_LIBS -lSM -lICE" fi LDFLAGS=$ac_save_LDFLAGS fi if test "x$no_x" != "xyes"; then printf "%s\n" "#define STARPU_HAVE_X11 1" >>confdefs.h fi if test "x$no_x" != "xyes"; then STARPU_HAVE_X11_TRUE= STARPU_HAVE_X11_FALSE='#' else STARPU_HAVE_X11_TRUE='#' STARPU_HAVE_X11_FALSE= fi # In case there are BLAS kernels that are used by the example applications # we may specify which library to use. Note that this is not used for StarPU # itself. blas_lib=maybe # Check whether --enable-blas-lib was given. if test ${enable_blas_lib+y} then : enableval=$enable_blas_lib; if test "x$enableval" = "xatlas" ; then blas_lib=atlas elif test "x$enableval" = "xgoto" ; then blas_lib=goto elif test "x$enableval" = "xopenblas" ; then blas_lib=openblas elif test "x$enableval" = "xnone" ; then blas_lib=none elif test "x$enableval" = "xmkl" ; then blas_lib=mkl elif test "x$enableval" = "xarmpl" ; then blas_lib=armpl elif test x$enableval = xno; then blas_lib=none else echo echo "Error!" echo "Unknown BLAS library" exit -1 fi fi if test x$blas_lib = xmaybe -o x$blas_lib = xgoto; then # Check whether --with-goto-dir was given. if test ${with_goto_dir+y} then : withval=$with_goto_dir; blas_lib=goto gotodir=$withval GOTODIR=$gotodir CPPFLAGS="${CPPFLAGS} -I$gotodir/ " LDFLAGS="${LDFLAGS} -L$gotodir/ " fi if test x$blas_lib = xgoto; then _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lgfortran" >&5 printf %s "checking for main in -lgfortran... " >&6; } if test ${ac_cv_lib_gfortran_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lgfortran $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_gfortran_main=yes else $as_nop ac_cv_lib_gfortran_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gfortran_main" >&5 printf "%s\n" "$ac_cv_lib_gfortran_main" >&6; } if test "x$ac_cv_lib_gfortran_main" = xyes then : printf "%s\n" "#define HAVE_LIBGFORTRAN 1" >>confdefs.h LIBS="-lgfortran $LIBS" fi STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lifcore" >&5 printf %s "checking for main in -lifcore... " >&6; } if test ${ac_cv_lib_ifcore_main+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lifcore $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_ifcore_main=yes else $as_nop ac_cv_lib_ifcore_main=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ifcore_main" >&5 printf "%s\n" "$ac_cv_lib_ifcore_main" >&6; } if test "x$ac_cv_lib_ifcore_main" = xyes then : printf "%s\n" "#define HAVE_LIBIFCORE 1" >>confdefs.h LIBS="-lifcore $LIBS" fi STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV # Perhaps that GotoBLAS2 is available instead (so that we have libgotoblas2.{so,a}) _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sgemm_ in -lgoto2" >&5 printf %s "checking for sgemm_ in -lgoto2... " >&6; } if test ${ac_cv_lib_goto2_sgemm_+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lgoto2 $STARPU_BLAS_LDFLAGS $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char sgemm_ (); int main (void) { return sgemm_ (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_goto2_sgemm_=yes else $as_nop ac_cv_lib_goto2_sgemm_=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_goto2_sgemm_" >&5 printf "%s\n" "$ac_cv_lib_goto2_sgemm_" >&6; } if test "x$ac_cv_lib_goto2_sgemm_" = xyes then : printf "%s\n" "#define HAVE_LIBGOTO2 1" >>confdefs.h LIBS="-lgoto2 $LIBS" else $as_nop havegoto2=no fi STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV if test x$havegoto2 = xno; then _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sgemm_ in -lgoto" >&5 printf %s "checking for sgemm_ in -lgoto... " >&6; } if test ${ac_cv_lib_goto_sgemm_+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lgoto $STARPU_BLAS_LDFLAGS $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char sgemm_ (); int main (void) { return sgemm_ (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_goto_sgemm_=yes else $as_nop ac_cv_lib_goto_sgemm_=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_goto_sgemm_" >&5 printf "%s\n" "$ac_cv_lib_goto_sgemm_" >&6; } if test "x$ac_cv_lib_goto_sgemm_" = xyes then : printf "%s\n" "#define HAVE_LIBGOTO 1" >>confdefs.h LIBS="-lgoto $LIBS" else $as_nop as_fn_error $? "cannot find goto lib" "$LINENO" 5 fi STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV fi printf "%s\n" "#define STARPU_GOTO 1" >>confdefs.h fi fi if test x$blas_lib = xmaybe -o x$blas_lib = xatlas; then # Check whether --with-atlas-dir was given. if test ${with_atlas_dir+y} then : withval=$with_atlas_dir; { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking STARPU_ATLAS location" >&5 printf %s "checking STARPU_ATLAS location... " >&6; } blas_lib=atlas atlasdir=$withval { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $atlasdir" >&5 printf "%s\n" "$atlasdir" >&6; } ATLASDIR=$atlasdir CPPFLAGS="${CPPFLAGS} -I$atlasdir/include/ " LDFLAGS="${LDFLAGS} -L$atlasdir/lib/ " fi if test x$blas_lib = xatlas; then # test whether STARPU_ATLAS is actually available ac_fn_c_check_header_compile "$LINENO" "cblas.h" "ac_cv_header_cblas_h" "$ac_includes_default" if test "x$ac_cv_header_cblas_h" = xyes then : else $as_nop as_fn_error $? "cannot find atlas headers" "$LINENO" 5 fi _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ATL_sgemm in -latlas" >&5 printf %s "checking for ATL_sgemm in -latlas... " >&6; } if test ${ac_cv_lib_atlas_ATL_sgemm+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-latlas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char ATL_sgemm (); int main (void) { return ATL_sgemm (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_atlas_ATL_sgemm=yes else $as_nop ac_cv_lib_atlas_ATL_sgemm=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_atlas_ATL_sgemm" >&5 printf "%s\n" "$ac_cv_lib_atlas_ATL_sgemm" >&6; } if test "x$ac_cv_lib_atlas_ATL_sgemm" = xyes then : printf "%s\n" "#define HAVE_LIBATLAS 1" >>confdefs.h LIBS="-latlas $LIBS" else $as_nop as_fn_error $? "cannot find atlas lib" "$LINENO" 5 fi STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lcblas" >&5 printf %s "checking for cblas_sgemm in -lcblas... " >&6; } if test ${ac_cv_lib_cblas_cblas_sgemm+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcblas -latlas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char cblas_sgemm (); int main (void) { return cblas_sgemm (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_cblas_cblas_sgemm=yes else $as_nop ac_cv_lib_cblas_cblas_sgemm=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cblas_cblas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_cblas_cblas_sgemm" >&6; } if test "x$ac_cv_lib_cblas_cblas_sgemm" = xyes then : printf "%s\n" "#define HAVE_LIBCBLAS 1" >>confdefs.h LIBS="-lcblas $LIBS" else $as_nop as_fn_error $? "cannot find atlas lib" "$LINENO" 5 fi STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sgemm_ in -lf77blas" >&5 printf %s "checking for sgemm_ in -lf77blas... " >&6; } if test ${ac_cv_lib_f77blas_sgemm_+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lf77blas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char sgemm_ (); int main (void) { return sgemm_ (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_f77blas_sgemm_=yes else $as_nop ac_cv_lib_f77blas_sgemm_=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_f77blas_sgemm_" >&5 printf "%s\n" "$ac_cv_lib_f77blas_sgemm_" >&6; } if test "x$ac_cv_lib_f77blas_sgemm_" = xyes then : printf "%s\n" "#define HAVE_LIBF77BLAS 1" >>confdefs.h LIBS="-lf77blas $LIBS" else $as_nop as_fn_error $? "cannot find f77blas lib" "$LINENO" 5 fi STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV printf "%s\n" "#define STARPU_ATLAS 1" >>confdefs.h fi fi if test x$blas_lib = xmaybe -o x$blas_lib = xopenblas; then pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for OPENBLAS" >&5 printf %s "checking for OPENBLAS... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$OPENBLAS_CFLAGS"; then pkg_cv_OPENBLAS_CFLAGS="$OPENBLAS_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"openblas\""; } >&5 ($PKG_CONFIG --exists --print-errors "openblas") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_OPENBLAS_CFLAGS=`$PKG_CONFIG --cflags "openblas" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$OPENBLAS_LIBS"; then pkg_cv_OPENBLAS_LIBS="$OPENBLAS_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"openblas\""; } >&5 ($PKG_CONFIG --exists --print-errors "openblas") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_OPENBLAS_LIBS=`$PKG_CONFIG --libs "openblas" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then OPENBLAS_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "openblas"` else OPENBLAS_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "openblas"` fi # Put the nasty error message in config.log where it belongs echo "$OPENBLAS_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if test x$blas_lib = xopenblas; then _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lopenblas" >&5 printf %s "checking for cblas_sgemm in -lopenblas... " >&6; } if test ${ac_cv_lib_openblas_cblas_sgemm+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lopenblas -lopenblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char cblas_sgemm (); int main (void) { return cblas_sgemm (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_openblas_cblas_sgemm=yes else $as_nop ac_cv_lib_openblas_cblas_sgemm=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_cblas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_openblas_cblas_sgemm" >&6; } if test "x$ac_cv_lib_openblas_cblas_sgemm" = xyes then : printf "%s\n" "#define HAVE_LIBOPENBLAS 1" >>confdefs.h LIBS="-lopenblas $LIBS" else $as_nop as_fn_error $? "cannot find openblas lib" "$LINENO" 5 fi STARPU_OPENBLAS_LDFLAGS="$STARPU_OPENBLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h STARPU_OPENBLAS=1 fi elif test $pkg_failed = untried; then if test x$blas_lib = xopenblas; then _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lopenblas" >&5 printf %s "checking for cblas_sgemm in -lopenblas... " >&6; } if test ${ac_cv_lib_openblas_cblas_sgemm+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lopenblas -lopenblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char cblas_sgemm (); int main (void) { return cblas_sgemm (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_openblas_cblas_sgemm=yes else $as_nop ac_cv_lib_openblas_cblas_sgemm=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_cblas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_openblas_cblas_sgemm" >&6; } if test "x$ac_cv_lib_openblas_cblas_sgemm" = xyes then : printf "%s\n" "#define HAVE_LIBOPENBLAS 1" >>confdefs.h LIBS="-lopenblas $LIBS" else $as_nop as_fn_error $? "cannot find openblas lib" "$LINENO" 5 fi STARPU_OPENBLAS_LDFLAGS="$STARPU_OPENBLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h STARPU_OPENBLAS=1 fi else OPENBLAS_CFLAGS=$pkg_cv_OPENBLAS_CFLAGS OPENBLAS_LIBS=$pkg_cv_OPENBLAS_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BLAS_OPENBLAS" >&5 printf %s "checking for BLAS_OPENBLAS... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$BLAS_OPENBLAS_CFLAGS"; then pkg_cv_BLAS_OPENBLAS_CFLAGS="$BLAS_OPENBLAS_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blas-openblas\""; } >&5 ($PKG_CONFIG --exists --print-errors "blas-openblas") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_BLAS_OPENBLAS_CFLAGS=`$PKG_CONFIG --cflags "blas-openblas" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$BLAS_OPENBLAS_LIBS"; then pkg_cv_BLAS_OPENBLAS_LIBS="$BLAS_OPENBLAS_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blas-openblas\""; } >&5 ($PKG_CONFIG --exists --print-errors "blas-openblas") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_BLAS_OPENBLAS_LIBS=`$PKG_CONFIG --libs "blas-openblas" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then BLAS_OPENBLAS_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "blas-openblas"` else BLAS_OPENBLAS_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "blas-openblas"` fi # Put the nasty error message in config.log where it belongs echo "$BLAS_OPENBLAS_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if test x$blas_lib = xopenblas; then _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lblas-openblas" >&5 printf %s "checking for cblas_sgemm in -lblas-openblas... " >&6; } if test ${ac_cv_lib_blas_openblas_cblas_sgemm+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lblas-openblas -lblas-openblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char cblas_sgemm (); int main (void) { return cblas_sgemm (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_blas_openblas_cblas_sgemm=yes else $as_nop ac_cv_lib_blas_openblas_cblas_sgemm=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_openblas_cblas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_blas_openblas_cblas_sgemm" >&6; } if test "x$ac_cv_lib_blas_openblas_cblas_sgemm" = xyes then : printf "%s\n" "#define HAVE_LIBBLAS_OPENBLAS 1" >>confdefs.h LIBS="-lblas-openblas $LIBS" else $as_nop as_fn_error $? "cannot find blas-openblas lib" "$LINENO" 5 fi STARPU_OPENBLAS_LDFLAGS="$STARPU_OPENBLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h STARPU_OPENBLAS=1 fi elif test $pkg_failed = untried; then if test x$blas_lib = xopenblas; then _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lblas-openblas" >&5 printf %s "checking for cblas_sgemm in -lblas-openblas... " >&6; } if test ${ac_cv_lib_blas_openblas_cblas_sgemm+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lblas-openblas -lblas-openblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char cblas_sgemm (); int main (void) { return cblas_sgemm (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_blas_openblas_cblas_sgemm=yes else $as_nop ac_cv_lib_blas_openblas_cblas_sgemm=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_openblas_cblas_sgemm" >&5 printf "%s\n" "$ac_cv_lib_blas_openblas_cblas_sgemm" >&6; } if test "x$ac_cv_lib_blas_openblas_cblas_sgemm" = xyes then : printf "%s\n" "#define HAVE_LIBBLAS_OPENBLAS 1" >>confdefs.h LIBS="-lblas-openblas $LIBS" else $as_nop as_fn_error $? "cannot find blas-openblas lib" "$LINENO" 5 fi STARPU_OPENBLAS_LDFLAGS="$STARPU_OPENBLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h STARPU_OPENBLAS=1 fi else BLAS_OPENBLAS_CFLAGS=$pkg_cv_BLAS_OPENBLAS_CFLAGS BLAS_OPENBLAS_LIBS=$pkg_cv_BLAS_OPENBLAS_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } printf "%s\n" "#define STARPU_OPENBLAS 1" >>confdefs.h STARPU_OPENBLAS=1 CFLAGS="${CFLAGS} ${OPENBLAS_CFLAGS} ${BLAS_OPENBLAS_CFLAGS} " LIBS="${LIBS} ${OPENBLAS_LIBS} ${BLAS_OPENBLAS_LIBS} " blas_lib=openblas fi fi fi if test x$blas_lib = xmaybe -o x$blas_lib = xmkl; then # Should we use MKL ? if test -n "$MKLROOT" ; then CPPFLAGS="${CPPFLAGS} -I$MKLROOT/include" case $host_vendor in *1om) mkl_plat=mic ;; *) mkl_plat=intel64 ;; esac SAVED_LIBS=$LIBS STARPU_BLAS_LDFLAGS="-L$MKLROOT/lib/$mkl_plat -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lm -lpthread -ldl" LIBS="$LIBS $STARPU_BLAS_LDFLAGS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : blas_lib=mkl else $as_nop STARPU_BLAS_LDFLAGS="" fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$SAVED_LIBS fi # Check whether --with-mkl-cflags was given. if test ${with_mkl_cflags+y} then : withval=$with_mkl_cflags; CPPFLAGS="${CPPFLAGS} $withval" blas_lib=mkl fi # Check whether --with-mkl-ldflags was given. if test ${with_mkl_ldflags+y} then : withval=$with_mkl_ldflags; STARPU_BLAS_LDFLAGS="$withval" blas_lib=mkl fi if test x$blas_lib = xmkl; then printf "%s\n" "#define STARPU_MKL 1" >>confdefs.h fi fi if test x$blas_lib = xmaybe -o x$blas_lib = xarmpl; then # Should we use ARMPL ? if test -n "$ARMPL_DIR" ; then CPPFLAGS="${CPPFLAGS} -I$ARMPL_INCLUDES" SAVED_LIBS=$LIBS STARPU_BLAS_LDFLAGS="-L$ARMPL_LIBRARIES -larmpl_lp64 -lgfortran -lm" LIBS="$LIBS $STARPU_BLAS_LDFLAGS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : blas_lib=armpl else $as_nop STARPU_BLAS_LDFLAGS="" fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$SAVED_LIBS fi # Check whether --with-armpl-cflags was given. if test ${with_armpl_cflags+y} then : withval=$with_armpl_cflags; CPPFLAGS="${CPPFLAGS} $withval" blas_lib=armpl fi # Check whether --with-armpl-ldflags was given. if test ${with_armpl_ldflags+y} then : withval=$with_armpl_ldflags; STARPU_BLAS_LDFLAGS="$withval" blas_lib=armpl fi if test x$blas_lib = xarmpl; then printf "%s\n" "#define STARPU_ARMPL 1" >>confdefs.h fi fi if test x$blas_lib = xmaybe; then #perhaps it is possible to use some BLAS lib from the system use_system_blas=no _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing sgemm_" >&5 printf %s "checking for library containing sgemm_... " >&6; } if test ${ac_cv_search_sgemm_+y} then : printf %s "(cached) " >&6 else $as_nop ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char sgemm_ (); int main (void) { return sgemm_ (); ; return 0; } _ACEOF for ac_lib in '' blas do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO" then : ac_cv_search_sgemm_=$ac_res fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext if test ${ac_cv_search_sgemm_+y} then : break fi done if test ${ac_cv_search_sgemm_+y} then : else $as_nop ac_cv_search_sgemm_=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_sgemm_" >&5 printf "%s\n" "$ac_cv_search_sgemm_" >&6; } ac_res=$ac_cv_search_sgemm_ if test "$ac_res" != no then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" use_system_blas=yes fi STARPU_BLAS_LDFLAGS="$STARPU_BLAS_LDFLAGS $LIBS" LIBS=$_LIBS_SAV if test x$use_system_blas = xyes; then printf "%s\n" "#define STARPU_SYSTEM_BLAS 1" >>confdefs.h blas_lib=system elif test x"$BLAS_LIBS" != x; then printf "%s\n" "#define STARPU_SYSTEM_BLAS 1" >>confdefs.h STARPU_BLAS_LDFLAGS="$BLAS_LIBS" blas_lib=system else blas_lib=none fi fi if test x$blas_lib = xsystem; then ac_fn_c_check_header_compile "$LINENO" "cblas.h" "ac_cv_header_cblas_h" "$ac_includes_default" if test "x$ac_cv_header_cblas_h" = xyes then : have_cblas_h=yes else $as_nop have_cblas_h=no fi fi if test x$have_cblas_h = xyes; then STARPU_HAVE_CBLAS_H_TRUE= STARPU_HAVE_CBLAS_H_FALSE='#' else STARPU_HAVE_CBLAS_H_TRUE='#' STARPU_HAVE_CBLAS_H_FALSE= fi if test x$have_cblas_h = xyes; then printf "%s\n" "#define STARPU_HAVE_CBLAS_H 1" >>confdefs.h fi if test x$blas_lib != xnone; then printf "%s\n" "#define STARPU_HAVE_BLAS 1" >>confdefs.h SAVED_LIBS="$LIBS" LIBS="$LIBS -lblas" ac_fn_c_check_func "$LINENO" "cblas_sgemv" "ac_cv_func_cblas_sgemv" if test "x$ac_cv_func_cblas_sgemv" = xyes then : printf "%s\n" "#define HAVE_CBLAS_SGEMV 1" >>confdefs.h fi LIBS="$SAVED_LIBS" _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing dgels_" >&5 printf %s "checking for library containing dgels_... " >&6; } if test ${ac_cv_search_dgels_+y} then : printf %s "(cached) " >&6 else $as_nop ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char dgels_ (); int main (void) { return dgels_ (); ; return 0; } _ACEOF for ac_lib in '' lapack do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO" then : ac_cv_search_dgels_=$ac_res fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext if test ${ac_cv_search_dgels_+y} then : break fi done if test ${ac_cv_search_dgels_+y} then : else $as_nop ac_cv_search_dgels_=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_dgels_" >&5 printf "%s\n" "$ac_cv_search_dgels_" >&6; } ac_res=$ac_cv_search_dgels_ if test "$ac_res" != no then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" enable_liblapack=yes else $as_nop enable_liblapack=no fi STARPU_LIBLAPACK_LDFLAGS="$STARPU_LIBLAPACK_LDFLAGS $LIBS" LIBS=$_LIBS_SAV fi if test x$enable_liblapack = xyes; then STARPU_HAVE_LIBLAPACK_TRUE= STARPU_HAVE_LIBLAPACK_FALSE='#' else STARPU_HAVE_LIBLAPACK_TRUE='#' STARPU_HAVE_LIBLAPACK_FALSE= fi if test x$HAVE_CBLAS_SGEMV = x1; then STARPU_HAVE_CBLAS_SGEMV_TRUE= STARPU_HAVE_CBLAS_SGEMV_FALSE='#' else STARPU_HAVE_CBLAS_SGEMV_TRUE='#' STARPU_HAVE_CBLAS_SGEMV_FALSE= fi if test x$blas_lib = xatlas; then STARPU_ATLAS_BLAS_LIB_TRUE= STARPU_ATLAS_BLAS_LIB_FALSE='#' else STARPU_ATLAS_BLAS_LIB_TRUE='#' STARPU_ATLAS_BLAS_LIB_FALSE= fi if test x$blas_lib = xgoto; then STARPU_GOTO_BLAS_LIB_TRUE= STARPU_GOTO_BLAS_LIB_FALSE='#' else STARPU_GOTO_BLAS_LIB_TRUE='#' STARPU_GOTO_BLAS_LIB_FALSE= fi if test x$blas_lib = xmkl; then STARPU_MKL_BLAS_LIB_TRUE= STARPU_MKL_BLAS_LIB_FALSE='#' else STARPU_MKL_BLAS_LIB_TRUE='#' STARPU_MKL_BLAS_LIB_FALSE= fi if test x$blas_lib = xsystem; then STARPU_SYSTEM_BLAS_LIB_TRUE= STARPU_SYSTEM_BLAS_LIB_FALSE='#' else STARPU_SYSTEM_BLAS_LIB_TRUE='#' STARPU_SYSTEM_BLAS_LIB_FALSE= fi if test x$blas_lib = xnone -a x$enable_simgrid = xno; then STARPU_NO_BLAS_LIB_TRUE= STARPU_NO_BLAS_LIB_FALSE='#' else STARPU_NO_BLAS_LIB_TRUE='#' STARPU_NO_BLAS_LIB_FALSE= fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which BLAS lib should be used" >&5 printf %s "checking which BLAS lib should be used... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $blas_lib" >&5 printf "%s\n" "$blas_lib" >&6; } BLAS_LIB=$blas_lib ############################################################################### # # # Multiple linear regression # # # ############################################################################### # Check whether --enable-mlr was given. if test ${enable_mlr+y} then : enableval=$enable_mlr; enable_mlr=$enableval else $as_nop enable_mlr=no fi # Check whether --enable-mlr-system-blas was given. if test ${enable_mlr_system_blas+y} then : enableval=$enable_mlr_system_blas; enable_mlr_blas=$enableval else $as_nop enable_mlr_blas=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether multiple linear regression models are disabled" >&5 printf %s "checking whether multiple linear regression models are disabled... " >&6; } if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } install_min_dgels=no support_mlr=yes _LIBS_SAV="$LIBS" LIBS="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing dgels_" >&5 printf %s "checking for library containing dgels_... " >&6; } if test ${ac_cv_search_dgels_+y} then : printf %s "(cached) " >&6 else $as_nop ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char dgels_ (); int main (void) { return dgels_ (); ; return 0; } _ACEOF for ac_lib in '' lapack do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO" then : ac_cv_search_dgels_=$ac_res fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext if test ${ac_cv_search_dgels_+y} then : break fi done if test ${ac_cv_search_dgels_+y} then : else $as_nop ac_cv_search_dgels_=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_dgels_" >&5 printf "%s\n" "$ac_cv_search_dgels_" >&6; } ac_res=$ac_cv_search_dgels_ if test "$ac_res" != no then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" use_system_lapack=yes fi STARPU_LAPACK_LDFLAGS="$STARPU_LAPACK_LDFLAGS $LIBS" LIBS=$_LIBS_SAV if test x$blas_lib = xnone ; then use_system_lapack=no fi if test x$enable_mlr_blas = xyes -a x$use_system_lapack = xyes; then printf "%s\n" "#define STARPU_MLR_MODEL 1" >>confdefs.h LDFLAGS="-llapack $LDFLAGS" else if test x$enable_mlr_blas = xyes -a x$blas_lib = xmkl; then printf "%s\n" "#define STARPU_MLR_MODEL 1" >>confdefs.h else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether min-dgels is linked" >&5 printf %s "checking whether min-dgels is linked... " >&6; } if test x"$DGELS_LIBS" != x; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } printf "%s\n" "#define STARPU_MLR_MODEL 1" >>confdefs.h else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking min-dgels source" >&5 printf %s "checking min-dgels source... " >&6; } if test "${cross_compiling}" != "no" ; then # Cross-compiling is not supported by min-dgels { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } install_min_dgels=no support_mlr=no else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } DGELS_LIBS="-Wl,--start-group $STARPU_BUILD_DIR/min-dgels/build/minlibblas.a $STARPU_BUILD_DIR/min-dgels/build/minlibdgels.a $STARPU_BUILD_DIR/min-dgels/build/minlibf2c.a -Wl,--end-group" printf "%s\n" "#define STARPU_MLR_MODEL 1" >>confdefs.h printf "%s\n" "#define STARPU_BUILT_IN_MIN_DGELS 1" >>confdefs.h install_min_dgels=yes fi fi fi fi else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } install_min_dgels=no support_mlr=no fi if test x$install_min_dgels = xyes; then STARPU_USE_MIN_DGELS_TRUE= STARPU_USE_MIN_DGELS_FALSE='#' else STARPU_USE_MIN_DGELS_TRUE='#' STARPU_USE_MIN_DGELS_FALSE= fi ########################################## # FFT # ########################################## have_fftw=no have_fftwf=no have_fftwl=no fft_support=no # Check whether --enable-starpufft was given. if test ${enable_starpufft+y} then : enableval=$enable_starpufft; enable_starpufft=$enableval else $as_nop enable_starpufft=yes fi pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FFTW" >&5 printf %s "checking for FFTW... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$FFTW_CFLAGS"; then pkg_cv_FFTW_CFLAGS="$FFTW_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3\""; } >&5 ($PKG_CONFIG --exists --print-errors "fftw3") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FFTW_CFLAGS=`$PKG_CONFIG --cflags "fftw3" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$FFTW_LIBS"; then pkg_cv_FFTW_LIBS="$FFTW_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3\""; } >&5 ($PKG_CONFIG --exists --print-errors "fftw3") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FFTW_LIBS=`$PKG_CONFIG --libs "fftw3" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then FFTW_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fftw3"` else FFTW_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fftw3"` fi # Put the nasty error message in config.log where it belongs echo "$FFTW_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } : elif test $pkg_failed = untried; then : else FFTW_CFLAGS=$pkg_cv_FFTW_CFLAGS FFTW_LIBS=$pkg_cv_FFTW_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } printf "%s\n" "#define STARPU_HAVE_FFTW 1" >>confdefs.h STARPU_HAVE_FFTW=1 have_fftw=yes fi if test x$have_fftw = xyes; then STARPU_HAVE_FFTW_TRUE= STARPU_HAVE_FFTW_FALSE='#' else STARPU_HAVE_FFTW_TRUE='#' STARPU_HAVE_FFTW_FALSE= fi pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FFTWF" >&5 printf %s "checking for FFTWF... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$FFTWF_CFLAGS"; then pkg_cv_FFTWF_CFLAGS="$FFTWF_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3f\""; } >&5 ($PKG_CONFIG --exists --print-errors "fftw3f") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FFTWF_CFLAGS=`$PKG_CONFIG --cflags "fftw3f" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$FFTWF_LIBS"; then pkg_cv_FFTWF_LIBS="$FFTWF_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3f\""; } >&5 ($PKG_CONFIG --exists --print-errors "fftw3f") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FFTWF_LIBS=`$PKG_CONFIG --libs "fftw3f" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then FFTWF_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fftw3f"` else FFTWF_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fftw3f"` fi # Put the nasty error message in config.log where it belongs echo "$FFTWF_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } : elif test $pkg_failed = untried; then : else FFTWF_CFLAGS=$pkg_cv_FFTWF_CFLAGS FFTWF_LIBS=$pkg_cv_FFTWF_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } printf "%s\n" "#define STARPU_HAVE_FFTWF 1" >>confdefs.h STARPU_HAVE_FFTWF=1 have_fftwf=yes fi if test x$have_fftwf = xyes; then STARPU_HAVE_FFTWF_TRUE= STARPU_HAVE_FFTWF_FALSE='#' else STARPU_HAVE_FFTWF_TRUE='#' STARPU_HAVE_FFTWF_FALSE= fi pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for FFTWL" >&5 printf %s "checking for FFTWL... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$FFTWL_CFLAGS"; then pkg_cv_FFTWL_CFLAGS="$FFTWL_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3l\""; } >&5 ($PKG_CONFIG --exists --print-errors "fftw3l") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FFTWL_CFLAGS=`$PKG_CONFIG --cflags "fftw3l" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$FFTWL_LIBS"; then pkg_cv_FFTWL_LIBS="$FFTWL_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"fftw3l\""; } >&5 ($PKG_CONFIG --exists --print-errors "fftw3l") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_FFTWL_LIBS=`$PKG_CONFIG --libs "fftw3l" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then FFTWL_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fftw3l"` else FFTWL_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fftw3l"` fi # Put the nasty error message in config.log where it belongs echo "$FFTWL_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } : elif test $pkg_failed = untried; then : else FFTWL_CFLAGS=$pkg_cv_FFTWL_CFLAGS FFTWL_LIBS=$pkg_cv_FFTWL_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } printf "%s\n" "#define STARPU_HAVE_FFTWL 1" >>confdefs.h HAVE_FFTWFL=1 have_fftwl=yes fi if test x$have_fftwl = xyes; then STARPU_HAVE_FFTWL_TRUE= STARPU_HAVE_FFTWL_FALSE='#' else STARPU_HAVE_FFTWL_TRUE='#' STARPU_HAVE_FFTWL_FALSE= fi if test x$enable_starpufft = xyes -a \( \( x$enable_cpu = xyes -a x$have_fftw = xyes -a x$have_fftwf = xyes \) -o x$have_cufftdoublecomplex = xyes \); then fft_support=yes fi if test x$fft_support = xyes; then STARPU_BUILD_STARPUFFT_TRUE= STARPU_BUILD_STARPUFFT_FALSE='#' else STARPU_BUILD_STARPUFFT_TRUE='#' STARPU_BUILD_STARPUFFT_FALSE= fi # Check whether --enable-starpufft-examples was given. if test ${enable_starpufft_examples+y} then : enableval=$enable_starpufft_examples; enable_starpufft_examples=$enableval else $as_nop enable_starpufft_examples=no fi if test x$enable_starpufft_examples = xyes; then STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE= STARPU_BUILD_STARPUFFT_EXAMPLES_FALSE='#' else STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE='#' STARPU_BUILD_STARPUFFT_EXAMPLES_FALSE= fi ########################################## # hwloc # ########################################## have_valid_hwloc=no SAVED_LIBS="${LIBS}" SAVED_CPPFLAGS="${CPPFLAGS}" SAVED_PKG_CONFIG_PATH="$PKG_CONFIG_PATH" # Check whether --with-hwloc was given. if test ${with_hwloc+y} then : withval=$with_hwloc; if test x$withval != xno; then if test "$withval" = "yes" ; then use_hwloc=yes else # use specified path if test ! -d "$withval" ; then as_fn_error $? "\"Directory specified for hwloc <$withval> does not exist\"" "$LINENO" 5 fi if test -d "$withval/lib64/pkgconfig" ; then export PKG_CONFIG_PATH=$withval/lib64/pkgconfig:$PKG_CONFIG_PATH else if test -d "$withval/lib/pkgconfig" ; then export PKG_CONFIG_PATH=$withval/lib/pkgconfig:$PKG_CONFIG_PATH else as_fn_error $? "\"Hwloc directory <$withval> does not have a subdirectory lib/pkgconfig or lib64/pkgconfig\"" "$LINENO" 5 fi fi use_hwloc=yes fi else use_hwloc=no fi else $as_nop use_hwloc=maybe fi if test "$use_hwloc" != "no" then : pkg_failed=no { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for HWLOC" >&5 printf %s "checking for HWLOC... " >&6; } if test -n "$PKG_CONFIG"; then if test -n "$HWLOC_CFLAGS"; then pkg_cv_HWLOC_CFLAGS="$HWLOC_CFLAGS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"hwloc\""; } >&5 ($PKG_CONFIG --exists --print-errors "hwloc") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_HWLOC_CFLAGS=`$PKG_CONFIG --cflags "hwloc" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$HWLOC_LIBS"; then pkg_cv_HWLOC_LIBS="$HWLOC_LIBS" else if test -n "$PKG_CONFIG" && \ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"hwloc\""; } >&5 ($PKG_CONFIG --exists --print-errors "hwloc") 2>&5 ac_status=$? printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_HWLOC_LIBS=`$PKG_CONFIG --libs "hwloc" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then HWLOC_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "hwloc"` else HWLOC_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "hwloc"` fi # Put the nasty error message in config.log where it belongs echo "$HWLOC_PKG_ERRORS" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } have_valid_hwloc=no elif test $pkg_failed = untried; then have_valid_hwloc=no else HWLOC_CFLAGS=$pkg_cv_HWLOC_CFLAGS HWLOC_LIBS=$pkg_cv_HWLOC_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } have_valid_hwloc=yes fi fi if test "x$have_valid_hwloc" = "xyes"; then STARPU_HAVE_HWLOC_TRUE= STARPU_HAVE_HWLOC_FALSE='#' else STARPU_HAVE_HWLOC_TRUE='#' STARPU_HAVE_HWLOC_FALSE= fi # in case hwloc was explicitly required, but is not available, this is an error if test "$use_hwloc" = "yes" -a "$have_valid_hwloc" = "no" then : as_fn_error $? "cannot find hwloc or pkg-config" "$LINENO" 5 fi # in case hwloc is not available but was not explicitly disabled, this is an error if test "$have_valid_hwloc" = "no" -a "$use_hwloc" != "no" then : as_fn_error $? "libhwloc or pkg-config was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot. It is strongly recommended to install libhwloc and pkg-config. However, if you really want to use StarPU without enabling libhwloc, please restart configure by specifying the option '--without-hwloc'." "$LINENO" 5 fi LIBS="${HWLOC_LIBS} ${SAVED_LIBS}" CPPFLAGS="${HWLOC_CFLAGS} ${SAVED_CPPFLAGS}" if test "$have_valid_hwloc" = "yes" then : printf "%s\n" "#define STARPU_HAVE_HWLOC 1" >>confdefs.h HWLOC_REQUIRES=hwloc STARPU_HAVE_HWLOC=1 ac_fn_check_decl "$LINENO" "hwloc_cuda_get_device_osdev_by_index" "ac_cv_have_decl_hwloc_cuda_get_device_osdev_by_index" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_hwloc_cuda_get_device_osdev_by_index" = xyes then : ac_have_decl=1 else $as_nop ac_have_decl=0 fi printf "%s\n" "#define HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX $ac_have_decl" >>confdefs.h ac_fn_check_decl "$LINENO" "hwloc_hip_get_device_osdev_by_index" "ac_cv_have_decl_hwloc_hip_get_device_osdev_by_index" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_hwloc_hip_get_device_osdev_by_index" = xyes then : ac_have_decl=1 else $as_nop ac_have_decl=0 fi printf "%s\n" "#define HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX $ac_have_decl" >>confdefs.h ac_fn_check_decl "$LINENO" "hwloc_distances_obj_pair_values" "ac_cv_have_decl_hwloc_distances_obj_pair_values" "#include " "$ac_c_undeclared_builtin_options" "CFLAGS" if test "x$ac_cv_have_decl_hwloc_distances_obj_pair_values" = xyes then : ac_have_decl=1 else $as_nop ac_have_decl=0 fi printf "%s\n" "#define HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES $ac_have_decl" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "hwloc_topology_dup" "ac_cv_func_hwloc_topology_dup" if test "x$ac_cv_func_hwloc_topology_dup" = xyes then : printf "%s\n" "#define HAVE_HWLOC_TOPOLOGY_DUP 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "hwloc_topology_set_components" "ac_cv_func_hwloc_topology_set_components" if test "x$ac_cv_func_hwloc_topology_set_components" = xyes then : printf "%s\n" "#define HAVE_HWLOC_TOPOLOGY_SET_COMPONENTS 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "hwloc_cpukinds_get_nr" "ac_cv_func_hwloc_cpukinds_get_nr" if test "x$ac_cv_func_hwloc_cpukinds_get_nr" = xyes then : printf "%s\n" "#define HAVE_HWLOC_CPUKINDS_GET_NR 1" >>confdefs.h fi ac_fn_c_check_func "$LINENO" "hwloc_get_area_memlocation" "ac_cv_func_hwloc_get_area_memlocation" if test "x$ac_cv_func_hwloc_get_area_memlocation" = xyes then : printf "%s\n" "#define HAVE_HWLOC_GET_AREA_MEMLOCATION 1" >>confdefs.h fi if test $ac_cv_func_hwloc_topology_dup = yes; then STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE= STARPU_HWLOC_HAVE_TOPOLOGY_DUP_FALSE='#' else STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE='#' STARPU_HWLOC_HAVE_TOPOLOGY_DUP_FALSE= fi LIBS="${SAVED_LIBS}" CPPFLAGS="${SAVED_CPPFLAGS}" export PKG_CONFIG_PATH=$SAVED_PKG_CONFIG_PATH { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether hwloc should be used" >&5 printf %s "checking whether hwloc should be used... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $have_valid_hwloc" >&5 printf "%s\n" "$have_valid_hwloc" >&6; } # is the header file f77.h available ? ac_fn_c_check_header_compile "$LINENO" "f77.h" "ac_cv_header_f77_h" "$ac_includes_default" if test "x$ac_cv_header_f77_h" = xyes then : have_f77_h=yes else $as_nop have_f77_h=no fi STARPU_HAVE_F77_H=$have_f77_h if test x$have_f77_h = xyes; then STARPU_HAVE_F77_H_TRUE= STARPU_HAVE_F77_H_FALSE='#' else STARPU_HAVE_F77_H_TRUE='#' STARPU_HAVE_F77_H_FALSE= fi if test x$have_f77_h = xyes; then printf "%s\n" "#define STARPU_HAVE_F77_H 1" >>confdefs.h fi # Check whether --with-icc was given. if test ${with_icc+y} then : withval=$with_icc; icc_path="$withval" else $as_nop icc_path="" fi # Check whether --with-icc-args was given. if test ${with_icc_args+y} then : withval=$with_icc_args; icc_args=$withval fi ICC_ARGS=$icc_args # Check whether --enable-icc was given. if test ${enable_icc+y} then : enableval=$enable_icc; enable_icc=$enableval else $as_nop enable_icc=yes fi ICC="" if test "$enable_icc" = "yes" ; then if test "$icc_path" != "" ; then ICC="$icc_path" else # Check if icc is available # Extract the first word of "icc", so it can be a program name with args. set dummy icc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_ICC+y} then : printf %s "(cached) " >&6 else $as_nop case $ICC in [\\/]* | ?:[\\/]*) ac_cv_path_ICC="$ICC" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_ICC="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi ICC=$ac_cv_path_ICC if test -n "$ICC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ICC" >&5 printf "%s\n" "$ICC" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi fi fi if test ! -x "$ICC"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: The ICC compiler '$ICC' does not have the execute permission" >&5 printf "%s\n" "The ICC compiler '$ICC' does not have the execute permission" >&6; } enable_icc=no ICC="" fi # If cuda and icc are both available, check they are compatible if test "$enable_cuda" = "yes" -a "$ICC" != ""; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether CUDA and ICC are compatible" >&5 printf %s "checking whether CUDA and ICC are compatible... " >&6; } OLD_CC="$CC" CC="$ICC" OLD_CFLAGS="$CFLAGS" CFLAGS="-I$PWD/include -I$srcdir/include" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else $as_nop ICC="" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CC="$OLD_CC" CFLAGS="$OLD_CFLAGS" fi # Disable ICC on windows if test "x$ICC" != "x" -a "$starpu_windows" = "yes" ; then ICC="" fi if test "x$ICC" != "x"; then printf "%s\n" "#define STARPU_HAVE_ICC 1" >>confdefs.h fi if test "x$ICC" != "x"; then STARPU_HAVE_ICC_TRUE= STARPU_HAVE_ICC_FALSE='#' else STARPU_HAVE_ICC_TRUE='#' STARPU_HAVE_ICC_FALSE= fi # Do not generate manpages for the tools if we do not have help2man for ac_prog in help2man do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_HELP2MAN+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$HELP2MAN"; then ac_cv_prog_HELP2MAN="$HELP2MAN" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_HELP2MAN="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi HELP2MAN=$ac_cv_prog_HELP2MAN if test -n "$HELP2MAN"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $HELP2MAN" >&5 printf "%s\n" "$HELP2MAN" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$HELP2MAN" && break done # Disable on windows if test "$starpu_windows" = "yes" ; then HELP2MAN="" fi if test "x$HELP2MAN" != "x"; then STARPU_HAVE_HELP2MAN_TRUE= STARPU_HAVE_HELP2MAN_FALSE='#' else STARPU_HAVE_HELP2MAN_TRUE='#' STARPU_HAVE_HELP2MAN_FALSE= fi ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "pciDomainID" "ac_cv_member_struct_cudaDeviceProp_pciDomainID" "#include " if test "x$ac_cv_member_struct_cudaDeviceProp_pciDomainID" = xyes then : printf "%s\n" "#define STARPU_HAVE_DOMAINID 1" >>confdefs.h fi ac_fn_c_check_member "$LINENO" "struct cudaDeviceProp" "pciBusID" "ac_cv_member_struct_cudaDeviceProp_pciBusID" "#include " if test "x$ac_cv_member_struct_cudaDeviceProp_pciBusID" = xyes then : printf "%s\n" "#define STARPU_HAVE_BUSID 1" >>confdefs.h fi if true; then STARPU_HAVE_AM111_TRUE= STARPU_HAVE_AM111_FALSE='#' else STARPU_HAVE_AM111_TRUE='#' STARPU_HAVE_AM111_FALSE= fi ########################################## # Resource Manager # ########################################## starpurm_support=no starpurm_dlb_support=no # Check whether --enable-starpurm was given. if test ${enable_starpurm+y} then : enableval=$enable_starpurm; enable_starpurm=$enableval else $as_nop enable_starpurm=no fi if test "x$enable_starpurm" != xno then starpurm_support=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether resource management debug messages should be displayed" >&5 printf %s "checking whether resource management debug messages should be displayed... " >&6; } # Check whether --enable-starpurm-verbose was given. if test ${enable_starpurm_verbose+y} then : enableval=$enable_starpurm_verbose; enable_starpurm_verbose=$enableval else $as_nop enable_starpurm_verbose=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_starpurm_verbose" >&5 printf "%s\n" "$enable_starpurm_verbose" >&6; } if test x$enable_starpurm_verbose = xyes; then printf "%s\n" "#define STARPURM_VERBOSE 1" >>confdefs.h fi # DLB DLB_CFLAGS="" DLB_LIBS="" # Check whether --enable-dlb was given. if test ${enable_dlb+y} then : enableval=$enable_dlb; enable_dlb=$enableval else $as_nop enable_dlb=no fi if test "x$enable_dlb" != xno then # Check whether --with-dlb-include-dir was given. if test ${with_dlb_include_dir+y} then : withval=$with_dlb_include_dir; dlb_inc_dirs="$withval" else $as_nop dlb_inc_dirs="" fi dlb_inc_dirs="${dlb_inc_dirs} /usr/include/dlb" dlb_incdir_found=no for dlb_incdir in $dlb_inc_dirs do if test -n "$dlb_incdir" then SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS=-I${dlb_incdir} ac_fn_c_check_header_compile "$LINENO" "dlb.h" "ac_cv_header_dlb_h" "$ac_includes_default" if test "x$ac_cv_header_dlb_h" = xyes then : printf "%s\n" "#define HAVE_DLB_H 1" >>confdefs.h fi if test "$ac_cv_header_dlb_h" = "yes" then CPPFLAGS="-I$dlb_incdir ${SAVED_CPPFLAGS}" DLB_CFLAGS="-I${dlb_incdir}" dlb_incdir_found=yes break else CPPFLAGS=${SAVED_CPPFLAGS} fi unset ac_cv_header_dlb_h fi done # Check whether --with-dlb-lib-dir was given. if test ${with_dlb_lib_dir+y} then : withval=$with_dlb_lib_dir; dlb_lib_dirs="$withval" else $as_nop dlb_lib_dirs="" fi dlb_lib_dirs="${dlb_lib_dirs} /usr/lib/dlb" dlb_libdir_found=no for dlb_libdir in $dlb_lib_dirs do if test -n "$dlb_libdir" then SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS=-L${dlb_libdir} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for DLB_Init in -ldlb" >&5 printf %s "checking for DLB_Init in -ldlb... " >&6; } if test ${ac_cv_lib_dlb_DLB_Init+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ldlb $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char DLB_Init (); int main (void) { return DLB_Init (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : ac_cv_lib_dlb_DLB_Init=yes else $as_nop ac_cv_lib_dlb_DLB_Init=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dlb_DLB_Init" >&5 printf "%s\n" "$ac_cv_lib_dlb_DLB_Init" >&6; } if test "x$ac_cv_lib_dlb_DLB_Init" = xyes then : printf "%s\n" "#define HAVE_LIBDLB 1" >>confdefs.h LIBS="-ldlb $LIBS" fi if test "$ac_cv_lib_dlb_DLB_Init" = "yes" then LDFLAGS="-L${dlb_libdir} ${SAVED_LDFLAGS} ${STARPU_DLB_LDFLAGS}" DLB_LIBS="-L${dlb_libdir} -ldlb" dlb_libdir_found=yes break else LDFLAGS=${SAVED_LDFLAGS} fi unset ac_cv_lib_dlb_DLB_Init fi done SAVED_CPPFLAGS="${CPPFLAGS}" SAVED_CFLAGS="${CFLAGS}" SAVED_LDFLAGS="${LDFLAGS}" CPPFLAGS="$HWLOC_CPPFLAGS -D_GNU_SOURCE $CPPFLAGS" CFLAGS="$HWLOC_CFLAGS $CFLAGS" LIBS="$HWLOC_LIBS $LIBS" # check whether libhwloc has a dedicated glibc-sched.h include for conversion with glibc cpusets ac_fn_c_check_header_compile "$LINENO" "hwloc/glibc-sched.h" "ac_cv_header_hwloc_glibc_sched_h" "$ac_includes_default" if test "x$ac_cv_header_hwloc_glibc_sched_h" = xyes then : printf "%s\n" "#define HAVE_HWLOC_GLIBC_SCHED_H 1" >>confdefs.h fi CPPFLAGS="$SAVED_CPPFLAGS" CFLAGS="$SAVED_CFLAGS" LIBS="$SAVED_LIBS" SAVED_CPPFLAGS="${CPPFLAGS}" SAVED_CFLAGS="${CFLAGS}" SAVED_LDFLAGS="${LDFLAGS}" CPPFLAGS="$STARPU_CPPFLAGS $CPPFLAGS" CFLAGS="$STARPU_CFLAGS $CFLAGS" LIBS="$STARPU_LIBS $LIBS" # check if StarPU implements starpu_worker_set_going_to_sleep_callback() if test x$enable_worker_cb = xyes ; then printf "%s\n" "#define STARPURM_STARPU_HAVE_WORKER_CALLBACKS 1" >>confdefs.h fi #AC_CHECK_FUNC([starpu_worker_set_going_to_sleep_callback],AC_DEFINE([STARPURM_STARPU_HAVE_WORKER_CALLBACKS], [1], [Define to 1 if StarPU has support for worker callbacks.])) CPPFLAGS="$SAVED_CPPFLAGS" CFLAGS="$SAVED_CFLAGS" LIBS="$SAVED_LIBS" if test "x$dlb_incdir_found" != "xyes" -o "x$dlb_libdir_found" != "xyes" then enable_dlb=no fi fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether DLB support should be enabled" >&5 printf %s "checking whether DLB support should be enabled... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_dlb" >&5 printf "%s\n" "$enable_dlb" >&6; } if test "x$enable_dlb" != "xno" then printf "%s\n" "#define STARPURM_HAVE_DLB 1" >>confdefs.h starpurm_dlb_support=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether DLB resource management debug messages should be displayed" >&5 printf %s "checking whether DLB resource management debug messages should be displayed... " >&6; } # Check whether --enable-starpurm-dlb-verbose was given. if test ${enable_starpurm_dlb_verbose+y} then : enableval=$enable_starpurm_dlb_verbose; enable_starpurm_dlb_verbose=$enableval else $as_nop enable_starpurm_dlb_verbose=no fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_starpurm_dlb_verbose" >&5 printf "%s\n" "$enable_starpurm_dlb_verbose" >&6; } if test x$enable_starpurm_dlb_verbose = xyes; then printf "%s\n" "#define STARPURM_DLB_VERBOSE 1" >>confdefs.h fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether DLB callbacks expect an user argument" >&5 printf %s "checking whether DLB callbacks expect an user argument... " >&6; } if test ${ac_cv_dlb_callback_arg+y} then : printf %s "(cached) " >&6 else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include dlb_handler_t dlb_handle; void _dlb_callback_disable_cpu(int cpuid, void *arg) { (void)cpuid; (void)arg; } void f(void) { (void)DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu, 0); } int main (void) { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO" then : ac_cv_dlb_callback_arg=yes else $as_nop ac_cv_dlb_callback_arg=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_dlb_callback_arg" >&5 printf "%s\n" "$ac_cv_dlb_callback_arg" >&6; } if test $ac_cv_dlb_callback_arg = yes; then printf "%s\n" "#define STARPURM_HAVE_DLB_CALLBACK_ARG 1" >>confdefs.h fi fi fi if test x$starpurm_dlb_support = "xyes"; then STARPURM_HAVE_DLB_TRUE= STARPURM_HAVE_DLB_FALSE='#' else STARPURM_HAVE_DLB_TRUE='#' STARPURM_HAVE_DLB_FALSE= fi if test x$starpurm_support = xyes; then STARPU_BUILD_STARPURM_TRUE= STARPU_BUILD_STARPURM_FALSE='#' else STARPU_BUILD_STARPURM_TRUE='#' STARPU_BUILD_STARPURM_FALSE= fi # Check whether --enable-starpurm-examples was given. if test ${enable_starpurm_examples+y} then : enableval=$enable_starpurm_examples; enable_starpurm_examples=$enableval else $as_nop enable_starpurm_examples=no fi if test x$enable_starpurm_examples = xyes; then STARPU_BUILD_STARPURM_EXAMPLES_TRUE= STARPU_BUILD_STARPURM_EXAMPLES_FALSE='#' else STARPU_BUILD_STARPURM_EXAMPLES_TRUE='#' STARPU_BUILD_STARPURM_EXAMPLES_FALSE= fi ##################################### # StarPUPy # ##################################### starpupy_support=no if test "x$enable_starpupy" != xno then for ac_prog in python3 do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_PYTHON+y} then : printf %s "(cached) " >&6 else $as_nop if test -n "$PYTHON"; then ac_cv_prog_PYTHON="$PYTHON" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_PYTHON="$ac_prog" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi PYTHON=$ac_cv_prog_PYTHON if test -n "$PYTHON"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PYTHON" >&5 printf "%s\n" "$PYTHON" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi test -n "$PYTHON" && break done if test "$ac_cv_prog_PYTHON" = "" then if test "x$enable_starpupy" = xyes ; then as_fn_error $? "python3 missing, cannot build StarPU python interface" "$LINENO" 5 else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: python3 missing, cannot build StarPU python interface" >&5 printf "%s\n" "$as_me: WARNING: python3 missing, cannot build StarPU python interface" >&2;} enable_starpupy=no fi fi fi if test "x$enable_starpupy" != xno then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 version" >&5 printf %s "checking for python3 version... " >&6; } PYTHON_VERSION=$(echo "import sys ; print(str(sys.version_info.major)+\".\"+str(sys.version_info.minor))" | $PYTHON) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PYTHON_VERSION" >&5 printf "%s\n" "$PYTHON_VERSION" >&6; } PYTHON_INCLUDE_DIRS="`$PYTHON -c "from sysconfig import get_paths as gp; print(gp()['include'])"`" SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS="$CPPFLAGS -I$PYTHON_INCLUDE_DIRS" for ac_header in Python.h do : ac_fn_c_check_header_compile "$LINENO" "Python.h" "ac_cv_header_Python_h" "$ac_includes_default" if test "x$ac_cv_header_Python_h" = xyes then : printf "%s\n" "#define HAVE_PYTHON_H 1" >>confdefs.h have_python_h=yes else $as_nop have_python_h=no fi done if test "$have_python_h" = "no" ; then if test "x$enable_starpupy" = xyes ; then as_fn_error $? "Python.h missing, cannot build StarPU python interface (consider installing python-dev)" "$LINENO" 5 else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Python.h missing, cannot build StarPU python interface (consider installing python-dev)" >&5 printf "%s\n" "$as_me: WARNING: Python.h missing, cannot build StarPU python interface (consider installing python-dev)" >&2;} enable_starpupy=no fi fi fi if test "x$enable_starpupy" != xno then as_ac_Lib=`printf "%s\n" "ac_cv_lib_python$PYTHON_VERSION""_PyErr_Print" | $as_tr_sh` { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PyErr_Print in -lpython$PYTHON_VERSION" >&5 printf %s "checking for PyErr_Print in -lpython$PYTHON_VERSION... " >&6; } if eval test \${$as_ac_Lib+y} then : printf %s "(cached) " >&6 else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lpython$PYTHON_VERSION $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ char PyErr_Print (); int main (void) { return PyErr_Print (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO" then : eval "$as_ac_Lib=yes" else $as_nop eval "$as_ac_Lib=no" fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi eval ac_res=\$$as_ac_Lib { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 printf "%s\n" "$ac_res" >&6; } if eval test \"x\$"$as_ac_Lib"\" = x"yes" then : have_python_lib=yes else $as_nop have_python_lib=no fi if test "$have_python_lib" = "no" ; then if test "x$enable_starpupy" = xyes ; then as_fn_error $? "Python library missing, cannot build StarPU python interface (consider installing python-dev)" "$LINENO" 5 else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Python library missing, cannot build StarPU python interface (consider installing python-dev)" >&5 printf "%s\n" "$as_me: WARNING: Python library missing, cannot build StarPU python interface (consider installing python-dev)" >&2;} enable_starpupy=no fi fi fi if test "x$enable_starpupy" != xno then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 setuptools" >&5 printf %s "checking for python3 setuptools... " >&6; } if $PYTHON -c "import setuptools" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if test "x$enable_starpupy" = xyes ; then as_fn_error $? "setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)" "$LINENO" 5 else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)" >&5 printf "%s\n" "$as_me: WARNING: setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)" >&2;} enable_starpupy=no fi fi fi if test "x$enable_starpupy" != xno then CPPFLAGS=${SAVED_CPPFLAGS} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 module joblib" >&5 printf %s "checking for python3 module joblib... " >&6; } echo "import joblib" | $PYTHON - 2>/dev/null if test $? -ne 0 ; then joblib_avail=no else joblib_avail=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $joblib_avail" >&5 printf "%s\n" "$joblib_avail" >&6; } if test "$joblib_avail" = "yes" ; then printf "%s\n" "#define STARPU_PYTHON_HAVE_JOBLIB 1" >>confdefs.h else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: python3 module joblib missing, cannot build full StarPU python interface (consider running 'pip3 install joblib')" >&5 printf "%s\n" "$as_me: WARNING: python3 module joblib missing, cannot build full StarPU python interface (consider running 'pip3 install joblib')" >&2;} fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 module cloudpickle" >&5 printf %s "checking for python3 module cloudpickle... " >&6; } echo "import cloudpickle" | $PYTHON - 2>/dev/null if test $? -ne 0 ; then cloudpickle_avail=no else cloudpickle_avail=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cloudpickle_avail" >&5 printf "%s\n" "$cloudpickle_avail" >&6; } if test "$cloudpickle_avail" = "yes" ; then printf "%s\n" "#define STARPU_PYTHON_HAVE_CLOUDPICKLE 1" >>confdefs.h else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: python3 module cloudpickle missing, cannot build full StarPU python interface (consider running 'pip3 install cloudpickle')" >&5 printf "%s\n" "$as_me: WARNING: python3 module cloudpickle missing, cannot build full StarPU python interface (consider running 'pip3 install cloudpickle')" >&2;} fi starpupy_support=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for python3 module numpy" >&5 printf %s "checking for python3 module numpy... " >&6; } echo "import numpy" | $PYTHON - 2>/dev/null if test $? -ne 0 ; then numpy_avail=no else numpy_avail=yes fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $numpy_avail" >&5 printf "%s\n" "$numpy_avail" >&6; } PYTHON_NUMPY_DIR="" if test "$numpy_avail" = "yes" ; then printf "%s\n" "#define STARPU_PYTHON_HAVE_NUMPY 1" >>confdefs.h PYTHON_NUMPY_DIR="`$PYTHON -c "import numpy ; print(numpy.get_include())"`" fi PYTHON_SETUP_OPTIONS="" if test x$enable_debug = xyes ; then PYTHON_SETUP_OPTIONS="--debug" fi fi if test x$starpupy_support = xyes; then STARPU_BUILD_STARPUPY_TRUE= STARPU_BUILD_STARPUPY_FALSE='#' else STARPU_BUILD_STARPUPY_TRUE='#' STARPU_BUILD_STARPUPY_FALSE= fi if test x$numpy_avail = xyes; then STARPU_STARPUPY_NUMPY_TRUE= STARPU_STARPUPY_NUMPY_FALSE='#' else STARPU_STARPUPY_NUMPY_TRUE='#' STARPU_STARPUPY_NUMPY_FALSE= fi ########################################## # Documentation # ########################################## def_enable_build_doc="yes" available_doc="no" if test -d "$srcdir/doc/doxygen/html" ; then def_enable_build_doc="no" available_doc="yes" fi if test "$starpu_darwin" = "yes" ; then def_enable_build_doc="no" fi # Check whether --enable-build-doc was given. if test ${enable_build_doc+y} then : enableval=$enable_build_doc; enable_build_doc=$enableval else $as_nop enable_build_doc=$def_enable_build_doc fi # Check whether --enable-build-doc-pdf was given. if test ${enable_build_doc_pdf+y} then : enableval=$enable_build_doc_pdf; enable_build_doc_pdf=$enableval else $as_nop enable_build_doc_pdf=no fi available_doc_pdf="no" if test -f "$srcdir/doc/doxygen/starpu.pdf" ; then enable_build_doc_pdf="no" available_doc_pdf="yes" fi # Check whether doxygen needed tools are installed # Extract the first word of "doxygen", so it can be a program name with args. set dummy doxygen; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_doxygencommand+y} then : printf %s "(cached) " >&6 else $as_nop case $doxygencommand in [\\/]* | ?:[\\/]*) ac_cv_path_doxygencommand="$doxygencommand" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_doxygencommand="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi doxygencommand=$ac_cv_path_doxygencommand if test -n "$doxygencommand"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $doxygencommand" >&5 printf "%s\n" "$doxygencommand" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "$doxygencommand" = "" ; then if test "$enable_build_doc_pdf" = "yes" ; then as_fn_error $? "doxygen missing, cannot build documentation PDF" "$LINENO" 5 fi enable_build_doc="no" enable_build_doc_pdf="no" fi # Extract the first word of "pdflatex", so it can be a program name with args. set dummy pdflatex; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_pdflatexcommand+y} then : printf %s "(cached) " >&6 else $as_nop case $pdflatexcommand in [\\/]* | ?:[\\/]*) ac_cv_path_pdflatexcommand="$pdflatexcommand" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_pdflatexcommand="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi pdflatexcommand=$ac_cv_path_pdflatexcommand if test -n "$pdflatexcommand"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pdflatexcommand" >&5 printf "%s\n" "$pdflatexcommand" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "$pdflatexcommand" = "" ; then if test "$enable_build_doc_pdf" = "yes" ; then as_fn_error $? "pdflatex missing, cannot build documentation PDF" "$LINENO" 5 fi enable_build_doc_pdf="no" fi # Extract the first word of "epstopdf", so it can be a program name with args. set dummy epstopdf; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_epstopdfcommand+y} then : printf %s "(cached) " >&6 else $as_nop case $epstopdfcommand in [\\/]* | ?:[\\/]*) ac_cv_path_epstopdfcommand="$epstopdfcommand" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_epstopdfcommand="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi epstopdfcommand=$ac_cv_path_epstopdfcommand if test -n "$epstopdfcommand"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $epstopdfcommand" >&5 printf "%s\n" "$epstopdfcommand" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi if test "$epstopdfcommand" = "" ; then if test "$enable_build_doc_pdf" = "yes" ; then as_fn_error $? "epstopdf missing, cannot build documentation PDF" "$LINENO" 5 fi enable_build_doc_pdf="no" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether HTML documentation should be compiled" >&5 printf %s "checking whether HTML documentation should be compiled... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_build_doc" >&5 printf "%s\n" "$enable_build_doc" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether HTML documentation is available" >&5 printf %s "checking whether HTML documentation is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $available_doc" >&5 printf "%s\n" "$available_doc" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PDF documentation should be compiled" >&5 printf %s "checking whether PDF documentation should be compiled... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_build_doc_pdf" >&5 printf "%s\n" "$enable_build_doc_pdf" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PDF documentation is available" >&5 printf %s "checking whether PDF documentation is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $available_doc_pdf" >&5 printf "%s\n" "$available_doc_pdf" >&6; } if test x$enable_build_doc != xno; then STARPU_BUILD_DOC_TRUE= STARPU_BUILD_DOC_FALSE='#' else STARPU_BUILD_DOC_TRUE='#' STARPU_BUILD_DOC_FALSE= fi if test x$available_doc != xno; then STARPU_AVAILABLE_DOC_TRUE= STARPU_AVAILABLE_DOC_FALSE='#' else STARPU_AVAILABLE_DOC_TRUE='#' STARPU_AVAILABLE_DOC_FALSE= fi if test x$enable_build_doc_pdf != xno; then STARPU_BUILD_DOC_PDF_TRUE= STARPU_BUILD_DOC_PDF_FALSE='#' else STARPU_BUILD_DOC_PDF_TRUE='#' STARPU_BUILD_DOC_PDF_FALSE= fi if test x$available_doc_pdf != xno; then STARPU_AVAILABLE_DOC_PDF_TRUE= STARPU_AVAILABLE_DOC_PDF_FALSE='#' else STARPU_AVAILABLE_DOC_PDF_TRUE='#' STARPU_AVAILABLE_DOC_PDF_FALSE= fi if test x$enable_build_doc_pdf != xno ; then DOC_GENERATE_LATEX=YES else DOC_GENERATE_LATEX=NO fi ############################################################################### # # # Julia # # # ############################################################################### # Check whether --enable-julia was given. if test ${enable_julia+y} then : enableval=$enable_julia; enable_julia=$enableval else $as_nop enable_julia=no fi if test "$enable_julia" = "yes" ; then # Check whether the julia compiler is available # Extract the first word of "julia", so it can be a program name with args. set dummy julia; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_juliapath+y} then : printf %s "(cached) " >&6 else $as_nop case $juliapath in [\\/]* | ?:[\\/]*) ac_cv_path_juliapath="$juliapath" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_juliapath="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi juliapath=$ac_cv_path_juliapath if test -n "$juliapath"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $juliapath" >&5 printf "%s\n" "$juliapath" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether julia is available" >&5 printf %s "checking whether julia is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $juliapath" >&5 printf "%s\n" "$juliapath" >&6; } if test ! -x "$juliapath" ; then as_fn_error $? "Julia compiler '$juliapath' is not valid" "$LINENO" 5 enable_julia=no fi fi if test "x$enable_julia" = "xyes"; then STARPU_USE_JULIA_TRUE= STARPU_USE_JULIA_FALSE='#' else STARPU_USE_JULIA_TRUE='#' STARPU_USE_JULIA_FALSE= fi JULIA=$juliapath ############################################################################### # # # Eclipse Plugin # # # ############################################################################### # Check whether --enable-eclipse-plugin was given. if test ${enable_eclipse_plugin+y} then : enableval=$enable_eclipse_plugin; enable_eclipse_plugin=$enableval else $as_nop enable_eclipse_plugin=no fi if test "$enable_eclipse_plugin" = "yes" ; then # Extract the first word of "eclipse", so it can be a program name with args. set dummy eclipse; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_path_eclipsepath+y} then : printf %s "(cached) " >&6 else $as_nop case $eclipsepath in [\\/]* | ?:[\\/]*) ac_cv_path_eclipsepath="$eclipsepath" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_path_eclipsepath="$as_dir$ac_word$ac_exec_ext" printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi eclipsepath=$ac_cv_path_eclipsepath if test -n "$eclipsepath"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $eclipsepath" >&5 printf "%s\n" "$eclipsepath" >&6; } else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether eclipse is available" >&5 printf %s "checking whether eclipse is available... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $eclipsepath" >&5 printf "%s\n" "$eclipsepath" >&6; } if test ! -x "$eclipsepath" ; then as_fn_error $? "Eclipse executable '$eclipsepath' is not valid" "$LINENO" 5 enable_eclipse_plugin=no fi libs=$(for x in starpu-$STARPU_EFFECTIVE_VERSION $(echo $STARPU_EXPORTED_LIBS | sed 's/-l//g') $HWLOC_REQUIRES ; do echo $x ; done) option_libs=$($srcdir/eclipse-plugin/tools/cproject.sh option $libs) module_libs=$($srcdir/eclipse-plugin/tools/cproject.sh module $libs) fi if test "x$enable_eclipse_plugin" = "xyes"; then STARPU_BUILD_ECLIPSE_PLUGIN_TRUE= STARPU_BUILD_ECLIPSE_PLUGIN_FALSE='#' else STARPU_BUILD_ECLIPSE_PLUGIN_TRUE='#' STARPU_BUILD_ECLIPSE_PLUGIN_FALSE= fi ECLIPSE=$eclipsepath STARPU_INCLUDE_PATH=$(eval echo ${includedir}/starpu/$STARPU_EFFECTIVE_VERSION) STARPU_LIB_PATH=$(eval echo ${prefix}/lib) STARPU_MODULE_LIBS="$module_libs" STARPU_OPTION_LIBS="$option_libs" ############################################################################### # # # Final settings # # # ############################################################################### if test x$enable_simgrid = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then as_fn_error $? "Cuda0 not supported with simgrid" "$LINENO" 5 fi if test x$enable_opencl = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then as_fn_error $? "Cuda0 not supported with OpenCL" "$LINENO" 5 fi if test x$enable_openmp = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then as_fn_error $? "Cuda0 not supported with OpenMP" "$LINENO" 5 fi CPPFLAGS="$CPPFLAGS -DSTARPU_SAMPLING_DIR=\"\\\"${datarootdir}/starpu/perfmodels/sampling\\\"\"" STARPU_BASIC_H_CPPFLAGS="$HWLOC_CFLAGS $STARPU_CUDA_CPPFLAGS $STARPU_HIP_CPPFLAGS $STARPU_OPENCL_CPPFLAGS $STARPU_MAX_FPGA_CPPFLAGS $SIMGRID_CFLAGS $PAPI_CFLAGS" # these are the flags needed to compile starpu.h STARPU_H_CPPFLAGS="$STARPU_BASIC_H_CPPFLAGS" STARPU_NVCC_H_CPPFLAGS="$STARPU_BASIC_H_CPPFLAGS" # these are the flags needed for linking libstarpu (and thus also for static linking) LIBSTARPU_LDFLAGS="$STARPU_OPENCL_LDFLAGS $STARPU_CUDA_LDFLAGS $STARPU_HIP_LDFLAGS $HWLOC_LIBS $FXT_LDFLAGS $FXT_LIBS $PAPI_LIBS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LDFLAGS $STARPU_BLAS_LDFLAGS $DGELS_LIBS $STARPU_MAX_FPGA_LDFLAGS $STARPU_DLOPEN_LDFLAGS" # these are the flags needed for linking against libstarpu (because starpu.h makes its includer use pthread_*, simgrid, etc.) if test "x$enable_shared" = xno; then # No .so, so application will unexpectedly have to know which -l to # use. Give them in .pc file. printf "%s\n" "#define STARPU_STATIC_ONLY 1" >>confdefs.h STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $LDFLAGS $LIBS $LIBSTARPU_LDFLAGS" fi STARPUPY_EXTRA_LINK_ARGS="" if test "x$enable_starpupy" != xno then if test "x$OPENMP_CFLAGS" != "x" then STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '$OPENMP_CFLAGS', " fi for flag in $STARPU_EXPORTED_LIBS do STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '$flag', " done if test x$enable_coverage = xyes; then STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '-lgcov', " fi fi LIBSTARPU_LINK=libstarpu-$STARPU_EFFECTIVE_VERSION.la LIBSTARPU_LINK="$LIBSTARPU_LINK $STARPU_EXPORTED_LIBS" # File configuration ac_config_commands="$ac_config_commands executable-scripts" # Create links to ICD files in build/socl/vendors directory. SOCL will use this # directory as the OCL_ICD_VENDORS directory SOCL_VENDORS="vendors/install/socl.icd" for icd in /etc/OpenCL/vendors/*.icd ; do if test -f $icd ; then if test "$(basename $icd)" != "socl.icd" ; then new_icd=$(basename $icd) ac_config_links="$ac_config_links socl/vendors/$new_icd:$icd" SOCL_VENDORS="$SOCL_VENDORS vendors/$new_icd" fi fi done ac_config_files="$ac_config_files tests/regression/regression.sh tests/regression/profiles tests/regression/profiles.build.only" ac_config_headers="$ac_config_headers src/common/config.h src/common/config-src-build.h include/starpu_config.h starpurm/include/starpurm_config.h" SANITIZE=$(echo $CFLAGS | grep sanitize) if test -n "$SANITIZE"; then STARPU_SANITIZE_TRUE= STARPU_SANITIZE_FALSE='#' else STARPU_SANITIZE_TRUE='#' STARPU_SANITIZE_FALSE= fi ac_config_files="$ac_config_files Makefile src/Makefile tools/Makefile tools/starpu_env tools/starpu_codelet_profile tools/starpu_codelet_histo_profile tools/starpu_mpi_comm_matrix.py tools/starpu_fxt_number_events_to_names.py tools/starpu_workers_activity tools/starpu_paje_draw_histogram tools/starpu_paje_state_stats tools/starpu_paje_summary tools/starpu_config tools/starpu_mlr_analysis tools/starpu_paje_sort tools/starpu_smpirun tools/starpu_tcpipexec socl/Makefile socl/src/Makefile socl/examples/Makefile socl/vendors/socl.icd socl/vendors/install/socl.icd packages/libstarpu.pc packages/starpu-1.0.pc packages/starpu-1.1.pc packages/starpu-1.2.pc packages/starpu-1.3.pc packages/starpu-1.4.pc packages/starpu-1.3 packages/starpu-1.4 mpi/packages/libstarpumpi.pc mpi/packages/starpumpi-1.0.pc mpi/packages/starpumpi-1.1.pc mpi/packages/starpumpi-1.2.pc mpi/packages/starpumpi-1.3.pc mpi/packages/starpumpi-1.4.pc starpufft/Makefile starpufft/src/Makefile starpufft/tests/Makefile starpufft/packages/libstarpufft.pc starpufft/packages/starpufft-1.0.pc starpufft/packages/starpufft-1.1.pc starpufft/packages/starpufft-1.2.pc starpufft/packages/starpufft-1.3.pc starpufft/packages/starpufft-1.4.pc starpurm/Makefile starpurm/src/Makefile starpurm/tests/Makefile starpurm/examples/Makefile starpurm/packages/starpurm-1.3.pc starpurm/packages/starpurm-1.4.pc starpu_openmp_llvm/Makefile starpu_openmp_llvm/src/Makefile starpu_openmp_llvm/examples/Makefile starpupy/src/setup.cfg starpupy/src/setup.py starpupy/Makefile starpupy/src/Makefile starpupy/examples/Makefile starpupy/execute.sh starpupy/benchmark/Makefile examples/Makefile examples/stencil/Makefile tests/Makefile tests/model-checking/Makefile tests/model-checking/starpu-mc.sh mpi/Makefile mpi/src/Makefile mpi/tests/Makefile mpi/examples/Makefile mpi/tools/Makefile mpi/GNUmakefile sc_hypervisor/Makefile sc_hypervisor/src/Makefile sc_hypervisor/examples/Makefile doc/Makefile doc/doxygen/Makefile doc/doxygen/doxygen-config.cfg doc/doxygen/doxygen-config-include.cfg doc/doxygen/doxygen_filter.sh doc/doxygen_dev/Makefile doc/doxygen_dev/doxygen-config.cfg doc/doxygen_dev/doxygen_filter.sh doc/doxygen_dev/doxygen-config-include.cfg doc/doxygen_web_introduction/Makefile doc/doxygen_web_introduction/doxygen-config.cfg doc/doxygen_web_installation/Makefile doc/doxygen_web_installation/doxygen-config.cfg doc/doxygen_web_basics/Makefile doc/doxygen_web_basics/doxygen-config.cfg doc/doxygen_web_applications/Makefile doc/doxygen_web_applications/doxygen-config.cfg doc/doxygen_web_performances/Makefile doc/doxygen_web_performances/doxygen-config.cfg doc/doxygen_web_faq/Makefile doc/doxygen_web_faq/doxygen-config.cfg doc/doxygen_web_languages/Makefile doc/doxygen_web_languages/doxygen-config.cfg doc/doxygen_web_extensions/Makefile doc/doxygen_web_extensions/doxygen-config.cfg tools/msvc/starpu_var.bat min-dgels/Makefile bubble/Makefile bubble/tests/Makefile julia/Makefile julia/src/Makefile julia/src/dynamic_compiler/Makefile julia/examples/Makefile julia/examples/execute.sh eclipse-plugin/Makefile eclipse-plugin/src/Makefile eclipse-plugin/examples/Makefile eclipse-plugin/examples/hello/.cproject" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes: double-quote # substitution turns \\\\ into \\, and sed turns \\ into \. sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 printf "%s\n" "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else case $cache_file in #( */* | ?:*) mv -f confcache "$cache_file"$$ && mv -f "$cache_file"$$ "$cache_file" ;; #( *) mv -f confcache "$cache_file" ;; esac fi fi else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' DEFS=-DHAVE_CONFIG_H ac_libobjs= ac_ltlibobjs= U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 printf %s "checking that generated files are newer than configure... " >&6; } if test -n "$am_sleep_pid"; then # Hide warnings about reused PIDs. wait $am_sleep_pid 2>/dev/null fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: done" >&5 printf "%s\n" "done" >&6; } if test -n "$EXEEXT"; then am__EXEEXT_TRUE= am__EXEEXT_FALSE='#' else am__EXEEXT_TRUE='#' am__EXEEXT_FALSE= fi if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then as_fn_error $? "conditional \"AMDEP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCXX\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${HAVE_PARALLEL_TRUE}" && test -z "${HAVE_PARALLEL_FALSE}"; then as_fn_error $? "conditional \"HAVE_PARALLEL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUBBLE_TRUE}" && test -z "${STARPU_BUBBLE_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUBBLE\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_SIMGRID_MC_TRUE}" && test -z "${STARPU_SIMGRID_MC_FALSE}"; then as_fn_error $? "conditional \"STARPU_SIMGRID_MC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_SIMGRID_TRUE}" && test -z "${STARPU_SIMGRID_FALSE}"; then as_fn_error $? "conditional \"STARPU_SIMGRID\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_CXX11_TRUE}" && test -z "${STARPU_HAVE_CXX11_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_CXX11\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_CROSS_COMPILING_TRUE}" && test -z "${STARPU_CROSS_COMPILING_FALSE}"; then as_fn_error $? "conditional \"STARPU_CROSS_COMPILING\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_MPI_MINIMAL_TESTS_TRUE}" && test -z "${STARPU_MPI_MINIMAL_TESTS_FALSE}"; then as_fn_error $? "conditional \"STARPU_MPI_MINIMAL_TESTS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_MPI_MASTER_SLAVE_TRUE}" && test -z "${STARPU_USE_MPI_MASTER_SLAVE_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_MPI_MASTER_SLAVE\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_TCPIP_MASTER_SLAVE_TRUE}" && test -z "${STARPU_USE_TCPIP_MASTER_SLAVE_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_TCPIP_MASTER_SLAVE\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_MPI_CHECK_TRUE}" && test -z "${STARPU_MPI_CHECK_FALSE}"; then as_fn_error $? "conditional \"STARPU_MPI_CHECK\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_MPI_SYNC_CLOCKS_TRUE}" && test -z "${STARPU_MPI_SYNC_CLOCKS_FALSE}"; then as_fn_error $? "conditional \"STARPU_MPI_SYNC_CLOCKS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_MPI_MPI_TRUE}" && test -z "${STARPU_USE_MPI_MPI_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_MPI_MPI\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_MPI_NMAD_TRUE}" && test -z "${STARPU_USE_MPI_NMAD_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_MPI_NMAD\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_MPI_TRUE}" && test -z "${STARPU_USE_MPI_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_MPI\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_MPI_FT_TRUE}" && test -z "${STARPU_USE_MPI_FT_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_MPI_FT\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_MPI_FT_STATS_TRUE}" && test -z "${STARPU_USE_MPI_FT_STATS_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_MPI_FT_STATS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_MS_LIB_TRUE}" && test -z "${STARPU_HAVE_MS_LIB_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_MS_LIB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_WINDOWS_TRUE}" && test -z "${STARPU_HAVE_WINDOWS_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_WINDOWS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_LINUX_SYS_TRUE}" && test -z "${STARPU_LINUX_SYS_FALSE}"; then as_fn_error $? "conditional \"STARPU_LINUX_SYS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_DARWIN_TRUE}" && test -z "${STARPU_HAVE_DARWIN_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_DARWIN\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_OPENBSD_SYS_TRUE}" && test -z "${STARPU_OPENBSD_SYS_FALSE}"; then as_fn_error $? "conditional \"STARPU_OPENBSD_SYS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_QUICK_CHECK_TRUE}" && test -z "${STARPU_QUICK_CHECK_FALSE}"; then as_fn_error $? "conditional \"STARPU_QUICK_CHECK\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_LONG_CHECK_TRUE}" && test -z "${STARPU_LONG_CHECK_FALSE}"; then as_fn_error $? "conditional \"STARPU_LONG_CHECK\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_NEW_CHECK_TRUE}" && test -z "${STARPU_NEW_CHECK_FALSE}"; then as_fn_error $? "conditional \"STARPU_NEW_CHECK\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_HDF5_TRUE}" && test -z "${STARPU_HAVE_HDF5_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_HDF5\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_SC_HYPERVISOR_TRUE}" && test -z "${STARPU_BUILD_SC_HYPERVISOR_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_SC_HYPERVISOR\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_SC_HYPERVISOR_TRUE}" && test -z "${STARPU_USE_SC_HYPERVISOR_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_SC_HYPERVISOR\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_SC_HYPERVISOR_DEBUG_TRUE}" && test -z "${STARPU_SC_HYPERVISOR_DEBUG_FALSE}"; then as_fn_error $? "conditional \"STARPU_SC_HYPERVISOR_DEBUG\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_CPU_TRUE}" && test -z "${STARPU_USE_CPU_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_CPU\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_CUDA_TRUE}" && test -z "${STARPU_USE_CUDA_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_CUDA\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_MAGMA_TRUE}" && test -z "${STARPU_HAVE_MAGMA_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_MAGMA\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE}" && test -z "${STARPU_HAVE_CUFFTDOUBLECOMPLEX_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_CUFFTDOUBLECOMPLEX\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_CUDA0_TRUE}" && test -z "${STARPU_USE_CUDA0_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_CUDA0\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_CUDA1_TRUE}" && test -z "${STARPU_USE_CUDA1_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_CUDA1\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_HIPBLAS_TRUE}" && test -z "${STARPU_USE_HIPBLAS_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_HIPBLAS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_HIP_TRUE}" && test -z "${STARPU_USE_HIP_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_HIP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_OPENCL_TRUE}" && test -z "${STARPU_USE_OPENCL_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_OPENCL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_MAX_FPGA_TRUE}" && test -z "${STARPU_USE_MAX_FPGA_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_MAX_FPGA\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_FC_TRUE}" && test -z "${STARPU_HAVE_FC_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_FC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_F77_TRUE}" && test -z "${STARPU_HAVE_F77_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_F77\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_MPIFORT_TRUE}" && test -z "${STARPU_HAVE_MPIFORT_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_MPIFORT\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_COVERAGE_ENABLED_TRUE}" && test -z "${STARPU_COVERAGE_ENABLED_FALSE}"; then as_fn_error $? "conditional \"STARPU_COVERAGE_ENABLED\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_COVERITY_TRUE}" && test -z "${STARPU_COVERITY_FALSE}"; then as_fn_error $? "conditional \"STARPU_COVERITY\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_FXT_TRUE}" && test -z "${STARPU_USE_FXT_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_FXT\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_AYUDAME1_TRUE}" && test -z "${STARPU_USE_AYUDAME1_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_AYUDAME1\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_AYUDAME2_TRUE}" && test -z "${STARPU_USE_AYUDAME2_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_AYUDAME2\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_LEVELDB_TRUE}" && test -z "${STARPU_HAVE_LEVELDB_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_LEVELDB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_MP_TRUE}" && test -z "${STARPU_USE_MP_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_MP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_DEVEL_TRUE}" && test -z "${STARPU_DEVEL_FALSE}"; then as_fn_error $? "conditional \"STARPU_DEVEL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_PARALLEL_WORKER_TRUE}" && test -z "${STARPU_PARALLEL_WORKER_FALSE}"; then as_fn_error $? "conditional \"STARPU_PARALLEL_WORKER\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_OPENMP_LLVM_TRUE}" && test -z "${STARPU_OPENMP_LLVM_FALSE}"; then as_fn_error $? "conditional \"STARPU_OPENMP_LLVM\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_OPENMP_TRUE}" && test -z "${STARPU_OPENMP_FALSE}"; then as_fn_error $? "conditional \"STARPU_OPENMP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_OPENMP_TRUE}" && test -z "${STARPU_HAVE_OPENMP_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_OPENMP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_SOCL_TRUE}" && test -z "${STARPU_BUILD_SOCL_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_SOCL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_SOCL_TRUE}" && test -z "${STARPU_USE_SOCL_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_SOCL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_TESTS_TRUE}" && test -z "${STARPU_BUILD_TESTS_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_TESTS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_EXAMPLES_TRUE}" && test -z "${STARPU_BUILD_EXAMPLES_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_EXAMPLES\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_OPENGL_TRUE}" && test -z "${STARPU_HAVE_OPENGL_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_OPENGL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_X11_TRUE}" && test -z "${STARPU_HAVE_X11_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_X11\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_CBLAS_H_TRUE}" && test -z "${STARPU_HAVE_CBLAS_H_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_CBLAS_H\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_LIBLAPACK_TRUE}" && test -z "${STARPU_HAVE_LIBLAPACK_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_LIBLAPACK\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_CBLAS_SGEMV_TRUE}" && test -z "${STARPU_HAVE_CBLAS_SGEMV_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_CBLAS_SGEMV\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_ATLAS_BLAS_LIB_TRUE}" && test -z "${STARPU_ATLAS_BLAS_LIB_FALSE}"; then as_fn_error $? "conditional \"STARPU_ATLAS_BLAS_LIB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_GOTO_BLAS_LIB_TRUE}" && test -z "${STARPU_GOTO_BLAS_LIB_FALSE}"; then as_fn_error $? "conditional \"STARPU_GOTO_BLAS_LIB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_MKL_BLAS_LIB_TRUE}" && test -z "${STARPU_MKL_BLAS_LIB_FALSE}"; then as_fn_error $? "conditional \"STARPU_MKL_BLAS_LIB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_SYSTEM_BLAS_LIB_TRUE}" && test -z "${STARPU_SYSTEM_BLAS_LIB_FALSE}"; then as_fn_error $? "conditional \"STARPU_SYSTEM_BLAS_LIB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_NO_BLAS_LIB_TRUE}" && test -z "${STARPU_NO_BLAS_LIB_FALSE}"; then as_fn_error $? "conditional \"STARPU_NO_BLAS_LIB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_MIN_DGELS_TRUE}" && test -z "${STARPU_USE_MIN_DGELS_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_MIN_DGELS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_FFTW_TRUE}" && test -z "${STARPU_HAVE_FFTW_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_FFTW\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_FFTWF_TRUE}" && test -z "${STARPU_HAVE_FFTWF_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_FFTWF\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_FFTWL_TRUE}" && test -z "${STARPU_HAVE_FFTWL_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_FFTWL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_STARPUFFT_TRUE}" && test -z "${STARPU_BUILD_STARPUFFT_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_STARPUFFT\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE}" && test -z "${STARPU_BUILD_STARPUFFT_EXAMPLES_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_STARPUFFT_EXAMPLES\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_HWLOC_TRUE}" && test -z "${STARPU_HAVE_HWLOC_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_HWLOC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE}" && test -z "${STARPU_HWLOC_HAVE_TOPOLOGY_DUP_FALSE}"; then as_fn_error $? "conditional \"STARPU_HWLOC_HAVE_TOPOLOGY_DUP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_F77_H_TRUE}" && test -z "${STARPU_HAVE_F77_H_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_F77_H\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_ICC_TRUE}" && test -z "${STARPU_HAVE_ICC_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_ICC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_HELP2MAN_TRUE}" && test -z "${STARPU_HAVE_HELP2MAN_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_HELP2MAN\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_AM111_TRUE}" && test -z "${STARPU_HAVE_AM111_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_AM111\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_HAVE_AM111_TRUE}" && test -z "${STARPU_HAVE_AM111_FALSE}"; then as_fn_error $? "conditional \"STARPU_HAVE_AM111\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPURM_HAVE_DLB_TRUE}" && test -z "${STARPURM_HAVE_DLB_FALSE}"; then as_fn_error $? "conditional \"STARPURM_HAVE_DLB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_STARPURM_TRUE}" && test -z "${STARPU_BUILD_STARPURM_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_STARPURM\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_STARPURM_EXAMPLES_TRUE}" && test -z "${STARPU_BUILD_STARPURM_EXAMPLES_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_STARPURM_EXAMPLES\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_STARPUPY_TRUE}" && test -z "${STARPU_BUILD_STARPUPY_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_STARPUPY\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_STARPUPY_NUMPY_TRUE}" && test -z "${STARPU_STARPUPY_NUMPY_FALSE}"; then as_fn_error $? "conditional \"STARPU_STARPUPY_NUMPY\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_DOC_TRUE}" && test -z "${STARPU_BUILD_DOC_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_DOC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_AVAILABLE_DOC_TRUE}" && test -z "${STARPU_AVAILABLE_DOC_FALSE}"; then as_fn_error $? "conditional \"STARPU_AVAILABLE_DOC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_DOC_PDF_TRUE}" && test -z "${STARPU_BUILD_DOC_PDF_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_DOC_PDF\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_AVAILABLE_DOC_PDF_TRUE}" && test -z "${STARPU_AVAILABLE_DOC_PDF_FALSE}"; then as_fn_error $? "conditional \"STARPU_AVAILABLE_DOC_PDF\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_USE_JULIA_TRUE}" && test -z "${STARPU_USE_JULIA_FALSE}"; then as_fn_error $? "conditional \"STARPU_USE_JULIA\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_BUILD_ECLIPSE_PLUGIN_TRUE}" && test -z "${STARPU_BUILD_ECLIPSE_PLUGIN_FALSE}"; then as_fn_error $? "conditional \"STARPU_BUILD_ECLIPSE_PLUGIN\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${STARPU_SANITIZE_TRUE}" && test -z "${STARPU_SANITIZE_FALSE}"; then as_fn_error $? "conditional \"STARPU_SANITIZE\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh as_nop=: if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi # Reset variables that may have inherited troublesome values from # the environment. # IFS needs to be set, to space, tab, and newline, in precisely that order. # (If _AS_PATH_WALK were called with IFS unset, it would have the # side effect of setting IFS to empty, thus disabling word splitting.) # Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl IFS=" "" $as_nl" PS1='$ ' PS2='> ' PS4='+ ' # Ensure predictable behavior from utilities with locale-dependent output. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # We cannot yet rely on "unset" to work, but we need these variables # to be unset--not just set to an empty or harmless value--now, to # avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct # also avoids known problems related to "unset" and subshell syntax # in other old shells (e.g. bash 2.01 and pdksh 5.2.14). for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH do eval test \${$as_var+y} \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done # Ensure that fds 0, 1, and 2 are open. if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS case $as_dir in #((( '') as_dir=./ ;; */) ;; *) as_dir=$as_dir/ ;; esac test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null then : eval 'as_fn_append () { eval $1+=\$2 }' else $as_nop as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null then : eval 'as_fn_arith () { as_val=$(( $* )) }' else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits # Determine whether it's possible to make 'echo' print without a newline. # These variables are no longer used directly by Autoconf, but are AC_SUBSTed # for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac # For backward compatibility with old third-party macros, we provide # the shell variables $as_echo and $as_echo_n. New code should use # AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. as_echo='printf %s\n' as_echo_n='printf %s' rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 ## ----------------------------------- ## ## Main body of $CONFIG_STATUS script. ## ## ----------------------------------- ## _ASEOF test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Save the log message, to keep $0 and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by StarPU $as_me 1.4.9, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac case $ac_config_headers in *" "*) set x $ac_config_headers; shift; ac_config_headers=$*;; esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" config_headers="$ac_config_headers" config_links="$ac_config_links" config_commands="$ac_config_commands" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ \`$as_me' instantiates files and other configuration actions from templates according to the current configuration. Unless the files and actions are specified as TAGs, all are instantiated by default. Usage: $0 [OPTION]... [TAG]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit --config print configuration, then exit -q, --quiet, --silent do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE --header=FILE[:TEMPLATE] instantiate the configuration header FILE Configuration files: $config_files Configuration headers: $config_headers Configuration links: $config_links Configuration commands: $config_commands Report bugs to . StarPU home page: ." _ACEOF ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ StarPU config.status 1.4.9 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" Copyright (C) 2021 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' INSTALL='$INSTALL' MKDIR_P='$MKDIR_P' AWK='$AWK' test -n "\$AWK" || AWK=awk _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # The default lists apply if the user does not specify any file. ac_need_defaults=: while test $# != 0 do case $1 in --*=?*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; --*=) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg= ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) printf "%s\n" "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) printf "%s\n" "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; --header | --heade | --head | --hea ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; esac as_fn_append CONFIG_HEADERS " '$ac_optarg'" ac_need_defaults=false;; --he | --h) # Conflict between --help and --header as_fn_error $? "ambiguous option: \`$1' Try \`$0 --help' for more information.";; --help | --hel | -h ) printf "%s\n" "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) as_fn_error $? "unrecognized option: \`$1' Try \`$0 --help' for more information." ;; *) as_fn_append ac_config_targets " $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX printf "%s\n" "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # # INIT-COMMANDS # AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH sed_quote_subst='$sed_quote_subst' double_quote_subst='$double_quote_subst' delay_variable_subst='$delay_variable_subst' macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' AS='`$ECHO "$AS" | $SED "$delay_single_quote_subst"`' DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' shared_archive_member_spec='`$ECHO "$shared_archive_member_spec" | $SED "$delay_single_quote_subst"`' SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`' host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' FILECMD='`$ECHO "$FILECMD" | $SED "$delay_single_quote_subst"`' deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' lt_ar_flags='`$ECHO "$lt_ar_flags" | $SED "$delay_single_quote_subst"`' AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_import='`$ECHO "$lt_cv_sys_global_symbol_to_import" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' lt_cv_nm_interface='`$ECHO "$lt_cv_nm_interface" | $SED "$delay_single_quote_subst"`' nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' lt_cv_truncate_bin='`$ECHO "$lt_cv_truncate_bin" | $SED "$delay_single_quote_subst"`' objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' configure_time_dlsearch_path='`$ECHO "$configure_time_dlsearch_path" | $SED "$delay_single_quote_subst"`' configure_time_lt_sys_library_path='`$ECHO "$configure_time_lt_sys_library_path" | $SED "$delay_single_quote_subst"`' hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' compiler_lib_search_dirs='`$ECHO "$compiler_lib_search_dirs" | $SED "$delay_single_quote_subst"`' predep_objects='`$ECHO "$predep_objects" | $SED "$delay_single_quote_subst"`' postdep_objects='`$ECHO "$postdep_objects" | $SED "$delay_single_quote_subst"`' predeps='`$ECHO "$predeps" | $SED "$delay_single_quote_subst"`' postdeps='`$ECHO "$postdeps" | $SED "$delay_single_quote_subst"`' compiler_lib_search_path='`$ECHO "$compiler_lib_search_path" | $SED "$delay_single_quote_subst"`' LD_CXX='`$ECHO "$LD_CXX" | $SED "$delay_single_quote_subst"`' LD_F77='`$ECHO "$LD_F77" | $SED "$delay_single_quote_subst"`' LD_FC='`$ECHO "$LD_FC" | $SED "$delay_single_quote_subst"`' reload_flag_CXX='`$ECHO "$reload_flag_CXX" | $SED "$delay_single_quote_subst"`' reload_flag_F77='`$ECHO "$reload_flag_F77" | $SED "$delay_single_quote_subst"`' reload_flag_FC='`$ECHO "$reload_flag_FC" | $SED "$delay_single_quote_subst"`' reload_cmds_CXX='`$ECHO "$reload_cmds_CXX" | $SED "$delay_single_quote_subst"`' reload_cmds_F77='`$ECHO "$reload_cmds_F77" | $SED "$delay_single_quote_subst"`' reload_cmds_FC='`$ECHO "$reload_cmds_FC" | $SED "$delay_single_quote_subst"`' old_archive_cmds_CXX='`$ECHO "$old_archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' old_archive_cmds_F77='`$ECHO "$old_archive_cmds_F77" | $SED "$delay_single_quote_subst"`' old_archive_cmds_FC='`$ECHO "$old_archive_cmds_FC" | $SED "$delay_single_quote_subst"`' compiler_CXX='`$ECHO "$compiler_CXX" | $SED "$delay_single_quote_subst"`' compiler_F77='`$ECHO "$compiler_F77" | $SED "$delay_single_quote_subst"`' compiler_FC='`$ECHO "$compiler_FC" | $SED "$delay_single_quote_subst"`' GCC_CXX='`$ECHO "$GCC_CXX" | $SED "$delay_single_quote_subst"`' GCC_F77='`$ECHO "$GCC_F77" | $SED "$delay_single_quote_subst"`' GCC_FC='`$ECHO "$GCC_FC" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_no_builtin_flag_CXX='`$ECHO "$lt_prog_compiler_no_builtin_flag_CXX" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_no_builtin_flag_F77='`$ECHO "$lt_prog_compiler_no_builtin_flag_F77" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_no_builtin_flag_FC='`$ECHO "$lt_prog_compiler_no_builtin_flag_FC" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_pic_CXX='`$ECHO "$lt_prog_compiler_pic_CXX" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_pic_F77='`$ECHO "$lt_prog_compiler_pic_F77" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_pic_FC='`$ECHO "$lt_prog_compiler_pic_FC" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_wl_CXX='`$ECHO "$lt_prog_compiler_wl_CXX" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_wl_F77='`$ECHO "$lt_prog_compiler_wl_F77" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_wl_FC='`$ECHO "$lt_prog_compiler_wl_FC" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_static_CXX='`$ECHO "$lt_prog_compiler_static_CXX" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_static_F77='`$ECHO "$lt_prog_compiler_static_F77" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_static_FC='`$ECHO "$lt_prog_compiler_static_FC" | $SED "$delay_single_quote_subst"`' lt_cv_prog_compiler_c_o_CXX='`$ECHO "$lt_cv_prog_compiler_c_o_CXX" | $SED "$delay_single_quote_subst"`' lt_cv_prog_compiler_c_o_F77='`$ECHO "$lt_cv_prog_compiler_c_o_F77" | $SED "$delay_single_quote_subst"`' lt_cv_prog_compiler_c_o_FC='`$ECHO "$lt_cv_prog_compiler_c_o_FC" | $SED "$delay_single_quote_subst"`' archive_cmds_need_lc_CXX='`$ECHO "$archive_cmds_need_lc_CXX" | $SED "$delay_single_quote_subst"`' archive_cmds_need_lc_F77='`$ECHO "$archive_cmds_need_lc_F77" | $SED "$delay_single_quote_subst"`' archive_cmds_need_lc_FC='`$ECHO "$archive_cmds_need_lc_FC" | $SED "$delay_single_quote_subst"`' enable_shared_with_static_runtimes_CXX='`$ECHO "$enable_shared_with_static_runtimes_CXX" | $SED "$delay_single_quote_subst"`' enable_shared_with_static_runtimes_F77='`$ECHO "$enable_shared_with_static_runtimes_F77" | $SED "$delay_single_quote_subst"`' enable_shared_with_static_runtimes_FC='`$ECHO "$enable_shared_with_static_runtimes_FC" | $SED "$delay_single_quote_subst"`' export_dynamic_flag_spec_CXX='`$ECHO "$export_dynamic_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' export_dynamic_flag_spec_F77='`$ECHO "$export_dynamic_flag_spec_F77" | $SED "$delay_single_quote_subst"`' export_dynamic_flag_spec_FC='`$ECHO "$export_dynamic_flag_spec_FC" | $SED "$delay_single_quote_subst"`' whole_archive_flag_spec_CXX='`$ECHO "$whole_archive_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' whole_archive_flag_spec_F77='`$ECHO "$whole_archive_flag_spec_F77" | $SED "$delay_single_quote_subst"`' whole_archive_flag_spec_FC='`$ECHO "$whole_archive_flag_spec_FC" | $SED "$delay_single_quote_subst"`' compiler_needs_object_CXX='`$ECHO "$compiler_needs_object_CXX" | $SED "$delay_single_quote_subst"`' compiler_needs_object_F77='`$ECHO "$compiler_needs_object_F77" | $SED "$delay_single_quote_subst"`' compiler_needs_object_FC='`$ECHO "$compiler_needs_object_FC" | $SED "$delay_single_quote_subst"`' old_archive_from_new_cmds_CXX='`$ECHO "$old_archive_from_new_cmds_CXX" | $SED "$delay_single_quote_subst"`' old_archive_from_new_cmds_F77='`$ECHO "$old_archive_from_new_cmds_F77" | $SED "$delay_single_quote_subst"`' old_archive_from_new_cmds_FC='`$ECHO "$old_archive_from_new_cmds_FC" | $SED "$delay_single_quote_subst"`' old_archive_from_expsyms_cmds_CXX='`$ECHO "$old_archive_from_expsyms_cmds_CXX" | $SED "$delay_single_quote_subst"`' old_archive_from_expsyms_cmds_F77='`$ECHO "$old_archive_from_expsyms_cmds_F77" | $SED "$delay_single_quote_subst"`' old_archive_from_expsyms_cmds_FC='`$ECHO "$old_archive_from_expsyms_cmds_FC" | $SED "$delay_single_quote_subst"`' archive_cmds_CXX='`$ECHO "$archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' archive_cmds_F77='`$ECHO "$archive_cmds_F77" | $SED "$delay_single_quote_subst"`' archive_cmds_FC='`$ECHO "$archive_cmds_FC" | $SED "$delay_single_quote_subst"`' archive_expsym_cmds_CXX='`$ECHO "$archive_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' archive_expsym_cmds_F77='`$ECHO "$archive_expsym_cmds_F77" | $SED "$delay_single_quote_subst"`' archive_expsym_cmds_FC='`$ECHO "$archive_expsym_cmds_FC" | $SED "$delay_single_quote_subst"`' module_cmds_CXX='`$ECHO "$module_cmds_CXX" | $SED "$delay_single_quote_subst"`' module_cmds_F77='`$ECHO "$module_cmds_F77" | $SED "$delay_single_quote_subst"`' module_cmds_FC='`$ECHO "$module_cmds_FC" | $SED "$delay_single_quote_subst"`' module_expsym_cmds_CXX='`$ECHO "$module_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' module_expsym_cmds_F77='`$ECHO "$module_expsym_cmds_F77" | $SED "$delay_single_quote_subst"`' module_expsym_cmds_FC='`$ECHO "$module_expsym_cmds_FC" | $SED "$delay_single_quote_subst"`' with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`' with_gnu_ld_F77='`$ECHO "$with_gnu_ld_F77" | $SED "$delay_single_quote_subst"`' with_gnu_ld_FC='`$ECHO "$with_gnu_ld_FC" | $SED "$delay_single_quote_subst"`' allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' allow_undefined_flag_F77='`$ECHO "$allow_undefined_flag_F77" | $SED "$delay_single_quote_subst"`' allow_undefined_flag_FC='`$ECHO "$allow_undefined_flag_FC" | $SED "$delay_single_quote_subst"`' no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' no_undefined_flag_F77='`$ECHO "$no_undefined_flag_F77" | $SED "$delay_single_quote_subst"`' no_undefined_flag_FC='`$ECHO "$no_undefined_flag_FC" | $SED "$delay_single_quote_subst"`' hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' hardcode_libdir_flag_spec_F77='`$ECHO "$hardcode_libdir_flag_spec_F77" | $SED "$delay_single_quote_subst"`' hardcode_libdir_flag_spec_FC='`$ECHO "$hardcode_libdir_flag_spec_FC" | $SED "$delay_single_quote_subst"`' hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`' hardcode_libdir_separator_F77='`$ECHO "$hardcode_libdir_separator_F77" | $SED "$delay_single_quote_subst"`' hardcode_libdir_separator_FC='`$ECHO "$hardcode_libdir_separator_FC" | $SED "$delay_single_quote_subst"`' hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`' hardcode_direct_F77='`$ECHO "$hardcode_direct_F77" | $SED "$delay_single_quote_subst"`' hardcode_direct_FC='`$ECHO "$hardcode_direct_FC" | $SED "$delay_single_quote_subst"`' hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`' hardcode_direct_absolute_F77='`$ECHO "$hardcode_direct_absolute_F77" | $SED "$delay_single_quote_subst"`' hardcode_direct_absolute_FC='`$ECHO "$hardcode_direct_absolute_FC" | $SED "$delay_single_quote_subst"`' hardcode_minus_L_CXX='`$ECHO "$hardcode_minus_L_CXX" | $SED "$delay_single_quote_subst"`' hardcode_minus_L_F77='`$ECHO "$hardcode_minus_L_F77" | $SED "$delay_single_quote_subst"`' hardcode_minus_L_FC='`$ECHO "$hardcode_minus_L_FC" | $SED "$delay_single_quote_subst"`' hardcode_shlibpath_var_CXX='`$ECHO "$hardcode_shlibpath_var_CXX" | $SED "$delay_single_quote_subst"`' hardcode_shlibpath_var_F77='`$ECHO "$hardcode_shlibpath_var_F77" | $SED "$delay_single_quote_subst"`' hardcode_shlibpath_var_FC='`$ECHO "$hardcode_shlibpath_var_FC" | $SED "$delay_single_quote_subst"`' hardcode_automatic_CXX='`$ECHO "$hardcode_automatic_CXX" | $SED "$delay_single_quote_subst"`' hardcode_automatic_F77='`$ECHO "$hardcode_automatic_F77" | $SED "$delay_single_quote_subst"`' hardcode_automatic_FC='`$ECHO "$hardcode_automatic_FC" | $SED "$delay_single_quote_subst"`' inherit_rpath_CXX='`$ECHO "$inherit_rpath_CXX" | $SED "$delay_single_quote_subst"`' inherit_rpath_F77='`$ECHO "$inherit_rpath_F77" | $SED "$delay_single_quote_subst"`' inherit_rpath_FC='`$ECHO "$inherit_rpath_FC" | $SED "$delay_single_quote_subst"`' link_all_deplibs_CXX='`$ECHO "$link_all_deplibs_CXX" | $SED "$delay_single_quote_subst"`' link_all_deplibs_F77='`$ECHO "$link_all_deplibs_F77" | $SED "$delay_single_quote_subst"`' link_all_deplibs_FC='`$ECHO "$link_all_deplibs_FC" | $SED "$delay_single_quote_subst"`' always_export_symbols_CXX='`$ECHO "$always_export_symbols_CXX" | $SED "$delay_single_quote_subst"`' always_export_symbols_F77='`$ECHO "$always_export_symbols_F77" | $SED "$delay_single_quote_subst"`' always_export_symbols_FC='`$ECHO "$always_export_symbols_FC" | $SED "$delay_single_quote_subst"`' export_symbols_cmds_CXX='`$ECHO "$export_symbols_cmds_CXX" | $SED "$delay_single_quote_subst"`' export_symbols_cmds_F77='`$ECHO "$export_symbols_cmds_F77" | $SED "$delay_single_quote_subst"`' export_symbols_cmds_FC='`$ECHO "$export_symbols_cmds_FC" | $SED "$delay_single_quote_subst"`' exclude_expsyms_CXX='`$ECHO "$exclude_expsyms_CXX" | $SED "$delay_single_quote_subst"`' exclude_expsyms_F77='`$ECHO "$exclude_expsyms_F77" | $SED "$delay_single_quote_subst"`' exclude_expsyms_FC='`$ECHO "$exclude_expsyms_FC" | $SED "$delay_single_quote_subst"`' include_expsyms_CXX='`$ECHO "$include_expsyms_CXX" | $SED "$delay_single_quote_subst"`' include_expsyms_F77='`$ECHO "$include_expsyms_F77" | $SED "$delay_single_quote_subst"`' include_expsyms_FC='`$ECHO "$include_expsyms_FC" | $SED "$delay_single_quote_subst"`' prelink_cmds_CXX='`$ECHO "$prelink_cmds_CXX" | $SED "$delay_single_quote_subst"`' prelink_cmds_F77='`$ECHO "$prelink_cmds_F77" | $SED "$delay_single_quote_subst"`' prelink_cmds_FC='`$ECHO "$prelink_cmds_FC" | $SED "$delay_single_quote_subst"`' postlink_cmds_CXX='`$ECHO "$postlink_cmds_CXX" | $SED "$delay_single_quote_subst"`' postlink_cmds_F77='`$ECHO "$postlink_cmds_F77" | $SED "$delay_single_quote_subst"`' postlink_cmds_FC='`$ECHO "$postlink_cmds_FC" | $SED "$delay_single_quote_subst"`' file_list_spec_CXX='`$ECHO "$file_list_spec_CXX" | $SED "$delay_single_quote_subst"`' file_list_spec_F77='`$ECHO "$file_list_spec_F77" | $SED "$delay_single_quote_subst"`' file_list_spec_FC='`$ECHO "$file_list_spec_FC" | $SED "$delay_single_quote_subst"`' hardcode_action_CXX='`$ECHO "$hardcode_action_CXX" | $SED "$delay_single_quote_subst"`' hardcode_action_F77='`$ECHO "$hardcode_action_F77" | $SED "$delay_single_quote_subst"`' hardcode_action_FC='`$ECHO "$hardcode_action_FC" | $SED "$delay_single_quote_subst"`' compiler_lib_search_dirs_CXX='`$ECHO "$compiler_lib_search_dirs_CXX" | $SED "$delay_single_quote_subst"`' compiler_lib_search_dirs_F77='`$ECHO "$compiler_lib_search_dirs_F77" | $SED "$delay_single_quote_subst"`' compiler_lib_search_dirs_FC='`$ECHO "$compiler_lib_search_dirs_FC" | $SED "$delay_single_quote_subst"`' predep_objects_CXX='`$ECHO "$predep_objects_CXX" | $SED "$delay_single_quote_subst"`' predep_objects_F77='`$ECHO "$predep_objects_F77" | $SED "$delay_single_quote_subst"`' predep_objects_FC='`$ECHO "$predep_objects_FC" | $SED "$delay_single_quote_subst"`' postdep_objects_CXX='`$ECHO "$postdep_objects_CXX" | $SED "$delay_single_quote_subst"`' postdep_objects_F77='`$ECHO "$postdep_objects_F77" | $SED "$delay_single_quote_subst"`' postdep_objects_FC='`$ECHO "$postdep_objects_FC" | $SED "$delay_single_quote_subst"`' predeps_CXX='`$ECHO "$predeps_CXX" | $SED "$delay_single_quote_subst"`' predeps_F77='`$ECHO "$predeps_F77" | $SED "$delay_single_quote_subst"`' predeps_FC='`$ECHO "$predeps_FC" | $SED "$delay_single_quote_subst"`' postdeps_CXX='`$ECHO "$postdeps_CXX" | $SED "$delay_single_quote_subst"`' postdeps_F77='`$ECHO "$postdeps_F77" | $SED "$delay_single_quote_subst"`' postdeps_FC='`$ECHO "$postdeps_FC" | $SED "$delay_single_quote_subst"`' compiler_lib_search_path_CXX='`$ECHO "$compiler_lib_search_path_CXX" | $SED "$delay_single_quote_subst"`' compiler_lib_search_path_F77='`$ECHO "$compiler_lib_search_path_F77" | $SED "$delay_single_quote_subst"`' compiler_lib_search_path_FC='`$ECHO "$compiler_lib_search_path_FC" | $SED "$delay_single_quote_subst"`' LTCC='$LTCC' LTCFLAGS='$LTCFLAGS' compiler='$compiler_DEFAULT' # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF \$1 _LTECHO_EOF' } # Quote evaled strings. for var in AS \ DLLTOOL \ OBJDUMP \ SHELL \ ECHO \ PATH_SEPARATOR \ SED \ GREP \ EGREP \ FGREP \ LD \ NM \ LN_S \ lt_SP2NL \ lt_NL2SP \ reload_flag \ FILECMD \ deplibs_check_method \ file_magic_cmd \ file_magic_glob \ want_nocaseglob \ sharedlib_from_linklib_cmd \ AR \ archiver_list_spec \ STRIP \ RANLIB \ CC \ CFLAGS \ compiler \ lt_cv_sys_global_symbol_pipe \ lt_cv_sys_global_symbol_to_cdecl \ lt_cv_sys_global_symbol_to_import \ lt_cv_sys_global_symbol_to_c_name_address \ lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ lt_cv_nm_interface \ nm_file_list_spec \ lt_cv_truncate_bin \ lt_prog_compiler_no_builtin_flag \ lt_prog_compiler_pic \ lt_prog_compiler_wl \ lt_prog_compiler_static \ lt_cv_prog_compiler_c_o \ need_locks \ MANIFEST_TOOL \ DSYMUTIL \ NMEDIT \ LIPO \ OTOOL \ OTOOL64 \ shrext_cmds \ export_dynamic_flag_spec \ whole_archive_flag_spec \ compiler_needs_object \ with_gnu_ld \ allow_undefined_flag \ no_undefined_flag \ hardcode_libdir_flag_spec \ hardcode_libdir_separator \ exclude_expsyms \ include_expsyms \ file_list_spec \ variables_saved_for_relink \ libname_spec \ library_names_spec \ soname_spec \ install_override_mode \ finish_eval \ old_striplib \ striplib \ compiler_lib_search_dirs \ predep_objects \ postdep_objects \ predeps \ postdeps \ compiler_lib_search_path \ LD_CXX \ LD_F77 \ LD_FC \ reload_flag_CXX \ reload_flag_F77 \ reload_flag_FC \ compiler_CXX \ compiler_F77 \ compiler_FC \ lt_prog_compiler_no_builtin_flag_CXX \ lt_prog_compiler_no_builtin_flag_F77 \ lt_prog_compiler_no_builtin_flag_FC \ lt_prog_compiler_pic_CXX \ lt_prog_compiler_pic_F77 \ lt_prog_compiler_pic_FC \ lt_prog_compiler_wl_CXX \ lt_prog_compiler_wl_F77 \ lt_prog_compiler_wl_FC \ lt_prog_compiler_static_CXX \ lt_prog_compiler_static_F77 \ lt_prog_compiler_static_FC \ lt_cv_prog_compiler_c_o_CXX \ lt_cv_prog_compiler_c_o_F77 \ lt_cv_prog_compiler_c_o_FC \ export_dynamic_flag_spec_CXX \ export_dynamic_flag_spec_F77 \ export_dynamic_flag_spec_FC \ whole_archive_flag_spec_CXX \ whole_archive_flag_spec_F77 \ whole_archive_flag_spec_FC \ compiler_needs_object_CXX \ compiler_needs_object_F77 \ compiler_needs_object_FC \ with_gnu_ld_CXX \ with_gnu_ld_F77 \ with_gnu_ld_FC \ allow_undefined_flag_CXX \ allow_undefined_flag_F77 \ allow_undefined_flag_FC \ no_undefined_flag_CXX \ no_undefined_flag_F77 \ no_undefined_flag_FC \ hardcode_libdir_flag_spec_CXX \ hardcode_libdir_flag_spec_F77 \ hardcode_libdir_flag_spec_FC \ hardcode_libdir_separator_CXX \ hardcode_libdir_separator_F77 \ hardcode_libdir_separator_FC \ exclude_expsyms_CXX \ exclude_expsyms_F77 \ exclude_expsyms_FC \ include_expsyms_CXX \ include_expsyms_F77 \ include_expsyms_FC \ file_list_spec_CXX \ file_list_spec_F77 \ file_list_spec_FC \ compiler_lib_search_dirs_CXX \ compiler_lib_search_dirs_F77 \ compiler_lib_search_dirs_FC \ predep_objects_CXX \ predep_objects_F77 \ predep_objects_FC \ postdep_objects_CXX \ postdep_objects_F77 \ postdep_objects_FC \ predeps_CXX \ predeps_F77 \ predeps_FC \ postdeps_CXX \ postdeps_F77 \ postdeps_FC \ compiler_lib_search_path_CXX \ compiler_lib_search_path_F77 \ compiler_lib_search_path_FC; do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[\\\\\\\`\\"\\\$]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done # Double-quote double-evaled strings. for var in reload_cmds \ old_postinstall_cmds \ old_postuninstall_cmds \ old_archive_cmds \ extract_expsyms_cmds \ old_archive_from_new_cmds \ old_archive_from_expsyms_cmds \ archive_cmds \ archive_expsym_cmds \ module_cmds \ module_expsym_cmds \ export_symbols_cmds \ prelink_cmds \ postlink_cmds \ postinstall_cmds \ postuninstall_cmds \ finish_cmds \ sys_lib_search_path_spec \ configure_time_dlsearch_path \ configure_time_lt_sys_library_path \ reload_cmds_CXX \ reload_cmds_F77 \ reload_cmds_FC \ old_archive_cmds_CXX \ old_archive_cmds_F77 \ old_archive_cmds_FC \ old_archive_from_new_cmds_CXX \ old_archive_from_new_cmds_F77 \ old_archive_from_new_cmds_FC \ old_archive_from_expsyms_cmds_CXX \ old_archive_from_expsyms_cmds_F77 \ old_archive_from_expsyms_cmds_FC \ archive_cmds_CXX \ archive_cmds_F77 \ archive_cmds_FC \ archive_expsym_cmds_CXX \ archive_expsym_cmds_F77 \ archive_expsym_cmds_FC \ module_cmds_CXX \ module_cmds_F77 \ module_cmds_FC \ module_expsym_cmds_CXX \ module_expsym_cmds_F77 \ module_expsym_cmds_FC \ export_symbols_cmds_CXX \ export_symbols_cmds_F77 \ export_symbols_cmds_FC \ prelink_cmds_CXX \ prelink_cmds_F77 \ prelink_cmds_FC \ postlink_cmds_CXX \ postlink_cmds_F77 \ postlink_cmds_FC; do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[\\\\\\\`\\"\\\$]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done ac_aux_dir='$ac_aux_dir' # See if we are running on zsh, and set the options that allow our # commands through without removal of \ escapes INIT. if test -n "\${ZSH_VERSION+set}"; then setopt NO_GLOB_SUBST fi PACKAGE='$PACKAGE' VERSION='$VERSION' RM='$RM' ofile='$ofile' _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; "executable-scripts") CONFIG_COMMANDS="$CONFIG_COMMANDS executable-scripts" ;; "socl/vendors/$new_icd") CONFIG_LINKS="$CONFIG_LINKS socl/vendors/$new_icd:$icd" ;; "tests/regression/regression.sh") CONFIG_FILES="$CONFIG_FILES tests/regression/regression.sh" ;; "tests/regression/profiles") CONFIG_FILES="$CONFIG_FILES tests/regression/profiles" ;; "tests/regression/profiles.build.only") CONFIG_FILES="$CONFIG_FILES tests/regression/profiles.build.only" ;; "src/common/config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/common/config.h" ;; "src/common/config-src-build.h") CONFIG_HEADERS="$CONFIG_HEADERS src/common/config-src-build.h" ;; "include/starpu_config.h") CONFIG_HEADERS="$CONFIG_HEADERS include/starpu_config.h" ;; "starpurm/include/starpurm_config.h") CONFIG_HEADERS="$CONFIG_HEADERS starpurm/include/starpurm_config.h" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; "tools/Makefile") CONFIG_FILES="$CONFIG_FILES tools/Makefile" ;; "tools/starpu_env") CONFIG_FILES="$CONFIG_FILES tools/starpu_env" ;; "tools/starpu_codelet_profile") CONFIG_FILES="$CONFIG_FILES tools/starpu_codelet_profile" ;; "tools/starpu_codelet_histo_profile") CONFIG_FILES="$CONFIG_FILES tools/starpu_codelet_histo_profile" ;; "tools/starpu_mpi_comm_matrix.py") CONFIG_FILES="$CONFIG_FILES tools/starpu_mpi_comm_matrix.py" ;; "tools/starpu_fxt_number_events_to_names.py") CONFIG_FILES="$CONFIG_FILES tools/starpu_fxt_number_events_to_names.py" ;; "tools/starpu_workers_activity") CONFIG_FILES="$CONFIG_FILES tools/starpu_workers_activity" ;; "tools/starpu_paje_draw_histogram") CONFIG_FILES="$CONFIG_FILES tools/starpu_paje_draw_histogram" ;; "tools/starpu_paje_state_stats") CONFIG_FILES="$CONFIG_FILES tools/starpu_paje_state_stats" ;; "tools/starpu_paje_summary") CONFIG_FILES="$CONFIG_FILES tools/starpu_paje_summary" ;; "tools/starpu_config") CONFIG_FILES="$CONFIG_FILES tools/starpu_config" ;; "tools/starpu_mlr_analysis") CONFIG_FILES="$CONFIG_FILES tools/starpu_mlr_analysis" ;; "tools/starpu_paje_sort") CONFIG_FILES="$CONFIG_FILES tools/starpu_paje_sort" ;; "tools/starpu_smpirun") CONFIG_FILES="$CONFIG_FILES tools/starpu_smpirun" ;; "tools/starpu_tcpipexec") CONFIG_FILES="$CONFIG_FILES tools/starpu_tcpipexec" ;; "socl/Makefile") CONFIG_FILES="$CONFIG_FILES socl/Makefile" ;; "socl/src/Makefile") CONFIG_FILES="$CONFIG_FILES socl/src/Makefile" ;; "socl/examples/Makefile") CONFIG_FILES="$CONFIG_FILES socl/examples/Makefile" ;; "socl/vendors/socl.icd") CONFIG_FILES="$CONFIG_FILES socl/vendors/socl.icd" ;; "socl/vendors/install/socl.icd") CONFIG_FILES="$CONFIG_FILES socl/vendors/install/socl.icd" ;; "packages/libstarpu.pc") CONFIG_FILES="$CONFIG_FILES packages/libstarpu.pc" ;; "packages/starpu-1.0.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.0.pc" ;; "packages/starpu-1.1.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.1.pc" ;; "packages/starpu-1.2.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.2.pc" ;; "packages/starpu-1.3.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.3.pc" ;; "packages/starpu-1.4.pc") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.4.pc" ;; "packages/starpu-1.3") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.3" ;; "packages/starpu-1.4") CONFIG_FILES="$CONFIG_FILES packages/starpu-1.4" ;; "mpi/packages/libstarpumpi.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/libstarpumpi.pc" ;; "mpi/packages/starpumpi-1.0.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.0.pc" ;; "mpi/packages/starpumpi-1.1.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.1.pc" ;; "mpi/packages/starpumpi-1.2.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.2.pc" ;; "mpi/packages/starpumpi-1.3.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.3.pc" ;; "mpi/packages/starpumpi-1.4.pc") CONFIG_FILES="$CONFIG_FILES mpi/packages/starpumpi-1.4.pc" ;; "starpufft/Makefile") CONFIG_FILES="$CONFIG_FILES starpufft/Makefile" ;; "starpufft/src/Makefile") CONFIG_FILES="$CONFIG_FILES starpufft/src/Makefile" ;; "starpufft/tests/Makefile") CONFIG_FILES="$CONFIG_FILES starpufft/tests/Makefile" ;; "starpufft/packages/libstarpufft.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/libstarpufft.pc" ;; "starpufft/packages/starpufft-1.0.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.0.pc" ;; "starpufft/packages/starpufft-1.1.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.1.pc" ;; "starpufft/packages/starpufft-1.2.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.2.pc" ;; "starpufft/packages/starpufft-1.3.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.3.pc" ;; "starpufft/packages/starpufft-1.4.pc") CONFIG_FILES="$CONFIG_FILES starpufft/packages/starpufft-1.4.pc" ;; "starpurm/Makefile") CONFIG_FILES="$CONFIG_FILES starpurm/Makefile" ;; "starpurm/src/Makefile") CONFIG_FILES="$CONFIG_FILES starpurm/src/Makefile" ;; "starpurm/tests/Makefile") CONFIG_FILES="$CONFIG_FILES starpurm/tests/Makefile" ;; "starpurm/examples/Makefile") CONFIG_FILES="$CONFIG_FILES starpurm/examples/Makefile" ;; "starpurm/packages/starpurm-1.3.pc") CONFIG_FILES="$CONFIG_FILES starpurm/packages/starpurm-1.3.pc" ;; "starpurm/packages/starpurm-1.4.pc") CONFIG_FILES="$CONFIG_FILES starpurm/packages/starpurm-1.4.pc" ;; "starpu_openmp_llvm/Makefile") CONFIG_FILES="$CONFIG_FILES starpu_openmp_llvm/Makefile" ;; "starpu_openmp_llvm/src/Makefile") CONFIG_FILES="$CONFIG_FILES starpu_openmp_llvm/src/Makefile" ;; "starpu_openmp_llvm/examples/Makefile") CONFIG_FILES="$CONFIG_FILES starpu_openmp_llvm/examples/Makefile" ;; "starpupy/src/setup.cfg") CONFIG_FILES="$CONFIG_FILES starpupy/src/setup.cfg" ;; "starpupy/src/setup.py") CONFIG_FILES="$CONFIG_FILES starpupy/src/setup.py" ;; "starpupy/Makefile") CONFIG_FILES="$CONFIG_FILES starpupy/Makefile" ;; "starpupy/src/Makefile") CONFIG_FILES="$CONFIG_FILES starpupy/src/Makefile" ;; "starpupy/examples/Makefile") CONFIG_FILES="$CONFIG_FILES starpupy/examples/Makefile" ;; "starpupy/execute.sh") CONFIG_FILES="$CONFIG_FILES starpupy/execute.sh" ;; "starpupy/benchmark/Makefile") CONFIG_FILES="$CONFIG_FILES starpupy/benchmark/Makefile" ;; "examples/Makefile") CONFIG_FILES="$CONFIG_FILES examples/Makefile" ;; "examples/stencil/Makefile") CONFIG_FILES="$CONFIG_FILES examples/stencil/Makefile" ;; "tests/Makefile") CONFIG_FILES="$CONFIG_FILES tests/Makefile" ;; "tests/model-checking/Makefile") CONFIG_FILES="$CONFIG_FILES tests/model-checking/Makefile" ;; "tests/model-checking/starpu-mc.sh") CONFIG_FILES="$CONFIG_FILES tests/model-checking/starpu-mc.sh" ;; "mpi/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/Makefile" ;; "mpi/src/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/src/Makefile" ;; "mpi/tests/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/tests/Makefile" ;; "mpi/examples/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/examples/Makefile" ;; "mpi/tools/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/tools/Makefile" ;; "mpi/GNUmakefile") CONFIG_FILES="$CONFIG_FILES mpi/GNUmakefile" ;; "sc_hypervisor/Makefile") CONFIG_FILES="$CONFIG_FILES sc_hypervisor/Makefile" ;; "sc_hypervisor/src/Makefile") CONFIG_FILES="$CONFIG_FILES sc_hypervisor/src/Makefile" ;; "sc_hypervisor/examples/Makefile") CONFIG_FILES="$CONFIG_FILES sc_hypervisor/examples/Makefile" ;; "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;; "doc/doxygen/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen/Makefile" ;; "doc/doxygen/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen/doxygen-config.cfg" ;; "doc/doxygen/doxygen-config-include.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen/doxygen-config-include.cfg" ;; "doc/doxygen/doxygen_filter.sh") CONFIG_FILES="$CONFIG_FILES doc/doxygen/doxygen_filter.sh" ;; "doc/doxygen_dev/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_dev/Makefile" ;; "doc/doxygen_dev/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_dev/doxygen-config.cfg" ;; "doc/doxygen_dev/doxygen_filter.sh") CONFIG_FILES="$CONFIG_FILES doc/doxygen_dev/doxygen_filter.sh" ;; "doc/doxygen_dev/doxygen-config-include.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_dev/doxygen-config-include.cfg" ;; "doc/doxygen_web_introduction/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_introduction/Makefile" ;; "doc/doxygen_web_introduction/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_introduction/doxygen-config.cfg" ;; "doc/doxygen_web_installation/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_installation/Makefile" ;; "doc/doxygen_web_installation/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_installation/doxygen-config.cfg" ;; "doc/doxygen_web_basics/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_basics/Makefile" ;; "doc/doxygen_web_basics/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_basics/doxygen-config.cfg" ;; "doc/doxygen_web_applications/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_applications/Makefile" ;; "doc/doxygen_web_applications/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_applications/doxygen-config.cfg" ;; "doc/doxygen_web_performances/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_performances/Makefile" ;; "doc/doxygen_web_performances/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_performances/doxygen-config.cfg" ;; "doc/doxygen_web_faq/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_faq/Makefile" ;; "doc/doxygen_web_faq/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_faq/doxygen-config.cfg" ;; "doc/doxygen_web_languages/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_languages/Makefile" ;; "doc/doxygen_web_languages/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_languages/doxygen-config.cfg" ;; "doc/doxygen_web_extensions/Makefile") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_extensions/Makefile" ;; "doc/doxygen_web_extensions/doxygen-config.cfg") CONFIG_FILES="$CONFIG_FILES doc/doxygen_web_extensions/doxygen-config.cfg" ;; "tools/msvc/starpu_var.bat") CONFIG_FILES="$CONFIG_FILES tools/msvc/starpu_var.bat" ;; "min-dgels/Makefile") CONFIG_FILES="$CONFIG_FILES min-dgels/Makefile" ;; "bubble/Makefile") CONFIG_FILES="$CONFIG_FILES bubble/Makefile" ;; "bubble/tests/Makefile") CONFIG_FILES="$CONFIG_FILES bubble/tests/Makefile" ;; "julia/Makefile") CONFIG_FILES="$CONFIG_FILES julia/Makefile" ;; "julia/src/Makefile") CONFIG_FILES="$CONFIG_FILES julia/src/Makefile" ;; "julia/src/dynamic_compiler/Makefile") CONFIG_FILES="$CONFIG_FILES julia/src/dynamic_compiler/Makefile" ;; "julia/examples/Makefile") CONFIG_FILES="$CONFIG_FILES julia/examples/Makefile" ;; "julia/examples/execute.sh") CONFIG_FILES="$CONFIG_FILES julia/examples/execute.sh" ;; "eclipse-plugin/Makefile") CONFIG_FILES="$CONFIG_FILES eclipse-plugin/Makefile" ;; "eclipse-plugin/src/Makefile") CONFIG_FILES="$CONFIG_FILES eclipse-plugin/src/Makefile" ;; "eclipse-plugin/examples/Makefile") CONFIG_FILES="$CONFIG_FILES eclipse-plugin/examples/Makefile" ;; "eclipse-plugin/examples/hello/.cproject") CONFIG_FILES="$CONFIG_FILES eclipse-plugin/examples/hello/.cproject" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers test ${CONFIG_LINKS+y} || CONFIG_LINKS=$config_links test ${CONFIG_COMMANDS+y} || CONFIG_COMMANDS=$config_commands fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= ac_tmp= trap 'exit_status=$? : "${ac_tmp:=$tmp}" { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status ' 0 trap 'as_fn_exit 1' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 ac_tmp=$tmp # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. # This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then ac_cr=`echo X | tr X '\015'` # On cygwin, bash can eat \r inside `` if the user requested igncr. # But we know of no other shell where ac_cr would be empty at this # point, so we can use a bashism as a fallback. if test "x$ac_cr" = x; then eval ac_cr=\$\'\\r\' fi ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then ac_cs_awk_cr='\\r' else ac_cs_awk_cr=$ac_cr fi echo 'BEGIN {' >"$ac_tmp/subs1.awk" && _ACEOF { echo "cat >conf$$subs.awk <<_ACEOF" && echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && echo "_ACEOF" } >conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` ac_delim='%!_!# ' for ac_last_try in false false false false false :; do . ./conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` if test $ac_delim_n = $ac_delim_num; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done rm -f conf$$subs.sh cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && _ACEOF sed -n ' h s/^/S["/; s/!.*/"]=/ p g s/^[^!]*!// :repl t repl s/'"$ac_delim"'$// t delim :nl h s/\(.\{148\}\)..*/\1/ t more1 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ p n b repl :more1 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t nl :delim h s/\(.\{148\}\)..*/\1/ t more2 s/["\\]/\\&/g; s/^/"/; s/$/"/ p b :more2 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t delim ' >$CONFIG_STATUS || ac_write_fail=1 rm -f conf$$subs.awk cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACAWK cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && for (key in S) S_is_set[key] = 1 FS = "" } { line = $ 0 nfields = split(line, field, "@") substed = 0 len = length(field[1]) for (i = 2; i < nfields; i++) { key = field[i] keylen = length(key) if (S_is_set[key]) { value = S[key] line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) len += length(value) + length(field[++i]) substed = 1 } else len += 1 + keylen } print line } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" else cat fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 _ACEOF # VPATH may cause trouble with some makes, so we remove sole $(srcdir), # ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ h s/// s/^/:/ s/[ ]*$/:/ s/:\$(srcdir):/:/g s/:\${srcdir}:/:/g s/:@srcdir@:/:/g s/^:*// s/:*$// x s/\(=[ ]*\).*/\1/ G s/\n// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" # Set up the scripts for CONFIG_HEADERS section. # No need to generate them if there are no CONFIG_HEADERS. # This happens for instance with `./config.status Makefile'. if test -n "$CONFIG_HEADERS"; then cat >"$ac_tmp/defines.awk" <<\_ACAWK || BEGIN { _ACEOF # Transform confdefs.h into an awk script `defines.awk', embedded as # here-document in config.status, that substitutes the proper values into # config.h.in to produce config.h. # Create a delimiter string that does not exist in confdefs.h, to ease # handling of long lines. ac_delim='%!_!# ' for ac_last_try in false false :; do ac_tt=`sed -n "/$ac_delim/p" confdefs.h` if test -z "$ac_tt"; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done # For the awk script, D is an array of macro values keyed by name, # likewise P contains macro parameters if any. Preserve backslash # newline sequences. ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* sed -n ' s/.\{148\}/&'"$ac_delim"'/g t rset :rset s/^[ ]*#[ ]*define[ ][ ]*/ / t def d :def s/\\$// t bsnl s/["\\]/\\&/g s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ D["\1"]=" \3"/p s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p d :bsnl s/["\\]/\\&/g s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ D["\1"]=" \3\\\\\\n"\\/p t cont s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p t cont d :cont n s/.\{148\}/&'"$ac_delim"'/g t clear :clear s/\\$// t bsnlc s/["\\]/\\&/g; s/^/"/; s/$/"/p d :bsnlc s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p b cont ' >$CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 for (key in D) D_is_set[key] = 1 FS = "" } /^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { line = \$ 0 split(line, arg, " ") if (arg[1] == "#") { defundef = arg[2] mac1 = arg[3] } else { defundef = substr(arg[1], 2) mac1 = arg[2] } split(mac1, mac2, "(") #) macro = mac2[1] prefix = substr(line, 1, index(line, defundef) - 1) if (D_is_set[macro]) { # Preserve the white space surrounding the "#". print prefix "define", macro P[macro] D[macro] next } else { # Replace #undef with comments. This is necessary, for example, # in the case of _POSIX_SOURCE, which is predefined and required # on some systems where configure will not decide to define it. if (defundef == "undef") { print "/*", prefix defundef, macro, "*/" next } } } { print } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 fi # test -n "$CONFIG_HEADERS" eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :L $CONFIG_LINKS :C $CONFIG_COMMANDS" shift for ac_tag do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$ac_tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 printf "%s\n" "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) ac_sed_conf_input=`printf "%s\n" "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac case $ac_tag in *:-:* | *:-) cat >"$ac_tmp/stdin" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || printf "%s\n" X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir="$ac_dir"; as_fn_mkdir_p ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # case $INSTALL in [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; esac ac_MKDIR_P=$MKDIR_P case $MKDIR_P in [\\/$]* | ?:[\\/]* ) ;; */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; esac _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= ac_sed_dataroot=' /datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_sed_extra="$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s|@configure_input@|$ac_sed_conf_input|;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@top_build_prefix@&$ac_top_build_prefix&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t s&@INSTALL@&$ac_INSTALL&;t t s&@MKDIR_P@&$ac_MKDIR_P&;t t $ac_datarootdir_hack " eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" case $ac_file in -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; esac \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; :H) # # CONFIG_HEADER # if test x"$ac_file" != x-; then { printf "%s\n" "/* $configure_input */" >&1 \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" } >"$ac_tmp/config.h" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 printf "%s\n" "$as_me: $ac_file is unchanged" >&6;} else rm -f "$ac_file" mv "$ac_tmp/config.h" "$ac_file" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 fi else printf "%s\n" "/* $configure_input */" >&1 \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ || as_fn_error $? "could not create -" "$LINENO" 5 fi # Compute "$ac_file"'s index in $config_headers. _am_arg="$ac_file" _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $_am_arg | $_am_arg:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || $as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$_am_arg" : 'X\(//\)[^/]' \| \ X"$_am_arg" : 'X\(//\)$' \| \ X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || printf "%s\n" X"$_am_arg" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'`/stamp-h$_am_stamp_count ;; :L) # # CONFIG_LINK # if test "$ac_source" = "$ac_file" && test "$srcdir" = '.'; then : else # Prefer the file from the source tree if names are identical. if test "$ac_source" = "$ac_file" || test ! -r "$ac_source"; then ac_source=$srcdir/$ac_source fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: linking $ac_source to $ac_file" >&5 printf "%s\n" "$as_me: linking $ac_source to $ac_file" >&6;} if test ! -r "$ac_source"; then as_fn_error $? "$ac_source: file not found" "$LINENO" 5 fi rm -f "$ac_file" # Try a relative symlink, then a hard link, then a copy. case $ac_source in [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;; *) ac_rel_source=$ac_top_build_prefix$ac_source ;; esac ln -s "$ac_rel_source" "$ac_file" 2>/dev/null || ln "$ac_source" "$ac_file" 2>/dev/null || cp -p "$ac_source" "$ac_file" || as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5 fi ;; :C) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 printf "%s\n" "$as_me: executing $ac_file commands" >&6;} ;; esac case $ac_file$ac_mode in "depfiles":C) test x"$AMDEP_TRUE" != x"" || { # Older Autoconf quotes --file arguments for eval, but not when files # are listed without --file. Let's play safe and only enable the eval # if we detect the quoting. # TODO: see whether this extra hack can be removed once we start # requiring Autoconf 2.70 or later. case $CONFIG_FILES in #( *\'*) : eval set x "$CONFIG_FILES" ;; #( *) : set x $CONFIG_FILES ;; #( *) : ;; esac shift # Used to flag and report bootstrapping failures. am_rc=0 for am_mf do # Strip MF so we end up with the name of the file. am_mf=`printf "%s\n" "$am_mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile which includes # dependency-tracking related rules and includes. # Grep'ing the whole file directly is not great: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ || continue am_dirpart=`$as_dirname -- "$am_mf" || $as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$am_mf" : 'X\(//\)[^/]' \| \ X"$am_mf" : 'X\(//\)$' \| \ X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || printf "%s\n" X"$am_mf" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` am_filepart=`$as_basename -- "$am_mf" || $as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ X"$am_mf" : 'X\(//\)$' \| \ X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || printf "%s\n" X/"$am_mf" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` { echo "$as_me:$LINENO: cd "$am_dirpart" \ && sed -e '/# am--include-marker/d' "$am_filepart" \ | $MAKE -f - am--depfiles" >&5 (cd "$am_dirpart" \ && sed -e '/# am--include-marker/d' "$am_filepart" \ | $MAKE -f - am--depfiles) >&5 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } || am_rc=$? done if test $am_rc -ne 0; then { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "Something went wrong bootstrapping makefile fragments for automatic dependency tracking. If GNU make was not used, consider re-running the configure script with MAKE=\"gmake\" (or whatever is necessary). You can also try re-running configure with the '--disable-dependency-tracking' option to at least be able to build the package (albeit without support for automatic dependency tracking). See \`config.log' for more details" "$LINENO" 5; } fi { am_dirpart=; unset am_dirpart;} { am_filepart=; unset am_filepart;} { am_mf=; unset am_mf;} { am_rc=; unset am_rc;} rm -f conftest-deps.mk } ;; "libtool":C) # See if we are running on zsh, and set the options that allow our # commands through without removal of \ escapes. if test -n "${ZSH_VERSION+set}"; then setopt NO_GLOB_SUBST fi cfgfile=${ofile}T trap "$RM \"$cfgfile\"; exit 1" 1 2 15 $RM "$cfgfile" cat <<_LT_EOF >> "$cfgfile" #! $SHELL # Generated automatically by $as_me ($PACKAGE) $VERSION # NOTE: Changes made to this file will be lost: look at ltmain.sh. # Provide generalized library-building support services. # Written by Gordon Matzigkeit, 1996 # Copyright (C) 2014 Free Software Foundation, Inc. # This is free software; see the source for copying conditions. There is NO # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # GNU Libtool is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of of the License, or # (at your option) any later version. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program or library that is built # using GNU Libtool, you may include this file under the same # distribution terms that you use for the rest of that program. # # GNU Libtool is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # The names of the tagged configurations supported by this script. available_tags='CXX F77 FC ' # Configured defaults for sys_lib_dlsearch_path munging. : \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} # ### BEGIN LIBTOOL CONFIG # Which release of libtool.m4 was used? macro_version=$macro_version macro_revision=$macro_revision # Assembler program. AS=$lt_AS # DLL creation program. DLLTOOL=$lt_DLLTOOL # Object dumper program. OBJDUMP=$lt_OBJDUMP # Whether or not to build shared libraries. build_libtool_libs=$enable_shared # Whether or not to build static libraries. build_old_libs=$enable_static # What type of objects to build. pic_mode=$pic_mode # Whether or not to optimize for fast installation. fast_install=$enable_fast_install # Shared archive member basename,for filename based shared library versioning on AIX. shared_archive_member_spec=$shared_archive_member_spec # Shell to use when invoking shell scripts. SHELL=$lt_SHELL # An echo program that protects backslashes. ECHO=$lt_ECHO # The PATH separator for the build system. PATH_SEPARATOR=$lt_PATH_SEPARATOR # The host system. host_alias=$host_alias host=$host host_os=$host_os # The build system. build_alias=$build_alias build=$build build_os=$build_os # A sed program that does not truncate output. SED=$lt_SED # Sed that helps us avoid accidentally triggering echo(1) options like -n. Xsed="\$SED -e 1s/^X//" # A grep program that handles long lines. GREP=$lt_GREP # An ERE matcher. EGREP=$lt_EGREP # A literal string matcher. FGREP=$lt_FGREP # A BSD- or MS-compatible name lister. NM=$lt_NM # Whether we need soft or hard links. LN_S=$lt_LN_S # What is the maximum length of a command? max_cmd_len=$max_cmd_len # Object file suffix (normally "o"). objext=$ac_objext # Executable file suffix (normally ""). exeext=$exeext # whether the shell understands "unset". lt_unset=$lt_unset # turn spaces into newlines. SP2NL=$lt_lt_SP2NL # turn newlines into spaces. NL2SP=$lt_lt_NL2SP # convert \$build file names to \$host format. to_host_file_cmd=$lt_cv_to_host_file_cmd # convert \$build files to toolchain format. to_tool_file_cmd=$lt_cv_to_tool_file_cmd # A file(cmd) program that detects file types. FILECMD=$lt_FILECMD # Method to check whether dependent libraries are shared objects. deplibs_check_method=$lt_deplibs_check_method # Command to use when deplibs_check_method = "file_magic". file_magic_cmd=$lt_file_magic_cmd # How to find potential files when deplibs_check_method = "file_magic". file_magic_glob=$lt_file_magic_glob # Find potential files using nocaseglob when deplibs_check_method = "file_magic". want_nocaseglob=$lt_want_nocaseglob # Command to associate shared and link libraries. sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd # The archiver. AR=$lt_AR # Flags to create an archive (by configure). lt_ar_flags=$lt_ar_flags # Flags to create an archive. AR_FLAGS=\${ARFLAGS-"\$lt_ar_flags"} # How to feed a file listing to the archiver. archiver_list_spec=$lt_archiver_list_spec # A symbol stripping program. STRIP=$lt_STRIP # Commands used to install an old-style archive. RANLIB=$lt_RANLIB old_postinstall_cmds=$lt_old_postinstall_cmds old_postuninstall_cmds=$lt_old_postuninstall_cmds # Whether to use a lock for old archive extraction. lock_old_archive_extraction=$lock_old_archive_extraction # A C compiler. LTCC=$lt_CC # LTCC compiler flags. LTCFLAGS=$lt_CFLAGS # Take the output of nm and produce a listing of raw symbols and C names. global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe # Transform the output of nm in a proper C declaration. global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl # Transform the output of nm into a list of symbols to manually relocate. global_symbol_to_import=$lt_lt_cv_sys_global_symbol_to_import # Transform the output of nm in a C name address pair. global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address # Transform the output of nm in a C name address pair when lib prefix is needed. global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix # The name lister interface. nm_interface=$lt_lt_cv_nm_interface # Specify filename containing input files for \$NM. nm_file_list_spec=$lt_nm_file_list_spec # The root where to search for dependent libraries,and where our libraries should be installed. lt_sysroot=$lt_sysroot # Command to truncate a binary pipe. lt_truncate_bin=$lt_lt_cv_truncate_bin # The name of the directory that contains temporary libtool files. objdir=$objdir # Used to examine libraries when file_magic_cmd begins with "file". MAGIC_CMD=$MAGIC_CMD # Must we lock files when doing compilation? need_locks=$lt_need_locks # Manifest tool. MANIFEST_TOOL=$lt_MANIFEST_TOOL # Tool to manipulate archived DWARF debug symbol files on Mac OS X. DSYMUTIL=$lt_DSYMUTIL # Tool to change global to local symbols on Mac OS X. NMEDIT=$lt_NMEDIT # Tool to manipulate fat objects and archives on Mac OS X. LIPO=$lt_LIPO # ldd/readelf like tool for Mach-O binaries on Mac OS X. OTOOL=$lt_OTOOL # ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. OTOOL64=$lt_OTOOL64 # Old archive suffix (normally "a"). libext=$libext # Shared library suffix (normally ".so"). shrext_cmds=$lt_shrext_cmds # The commands to extract the exported symbol list from a shared archive. extract_expsyms_cmds=$lt_extract_expsyms_cmds # Variables whose values should be saved in libtool wrapper scripts and # restored at link time. variables_saved_for_relink=$lt_variables_saved_for_relink # Do we need the "lib" prefix for modules? need_lib_prefix=$need_lib_prefix # Do we need a version for libraries? need_version=$need_version # Library versioning type. version_type=$version_type # Shared library runtime path variable. runpath_var=$runpath_var # Shared library path variable. shlibpath_var=$shlibpath_var # Is shlibpath searched before the hard-coded library search path? shlibpath_overrides_runpath=$shlibpath_overrides_runpath # Format of library name prefix. libname_spec=$lt_libname_spec # List of archive names. First name is the real one, the rest are links. # The last name is the one that the linker finds with -lNAME library_names_spec=$lt_library_names_spec # The coded name of the library, if different from the real name. soname_spec=$lt_soname_spec # Permission mode override for installation of shared libraries. install_override_mode=$lt_install_override_mode # Command to use after installation of a shared archive. postinstall_cmds=$lt_postinstall_cmds # Command to use after uninstallation of a shared archive. postuninstall_cmds=$lt_postuninstall_cmds # Commands used to finish a libtool library installation in a directory. finish_cmds=$lt_finish_cmds # As "finish_cmds", except a single script fragment to be evaled but # not shown. finish_eval=$lt_finish_eval # Whether we should hardcode library paths into libraries. hardcode_into_libs=$hardcode_into_libs # Compile-time system search path for libraries. sys_lib_search_path_spec=$lt_sys_lib_search_path_spec # Detected run-time system search path for libraries. sys_lib_dlsearch_path_spec=$lt_configure_time_dlsearch_path # Explicit LT_SYS_LIBRARY_PATH set during ./configure time. configure_time_lt_sys_library_path=$lt_configure_time_lt_sys_library_path # Whether dlopen is supported. dlopen_support=$enable_dlopen # Whether dlopen of programs is supported. dlopen_self=$enable_dlopen_self # Whether dlopen of statically linked programs is supported. dlopen_self_static=$enable_dlopen_self_static # Commands to strip libraries. old_striplib=$lt_old_striplib striplib=$lt_striplib # The linker used to build libraries. LD=$lt_LD # How to create reloadable object files. reload_flag=$lt_reload_flag reload_cmds=$lt_reload_cmds # Commands used to build an old-style archive. old_archive_cmds=$lt_old_archive_cmds # A language specific compiler. CC=$lt_compiler # Is the compiler the GNU compiler? with_gcc=$GCC # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc # Whether or not to disallow shared libs when runtime libs are static. allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec # Whether the compiler copes with passing no objects directly. compiler_needs_object=$lt_compiler_needs_object # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds # Commands used to build a shared archive. archive_cmds=$lt_archive_cmds archive_expsym_cmds=$lt_archive_expsym_cmds # Commands used to build a loadable module if different from building # a shared archive. module_cmds=$lt_module_cmds module_expsym_cmds=$lt_module_expsym_cmds # Whether we are building with GNU ld or not. with_gnu_ld=$lt_with_gnu_ld # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag # Flag that enforces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec # Whether we need a single "-rpath" flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator # Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes # DIR into the resulting binary. hardcode_direct=$hardcode_direct # Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes # DIR into the resulting binary and the resulting library dependency is # "absolute",i.e impossible to change by setting \$shlibpath_var if the # library is relocated. hardcode_direct_absolute=$hardcode_direct_absolute # Set to "yes" if using the -LDIR flag during linking hardcodes DIR # into the resulting binary. hardcode_minus_L=$hardcode_minus_L # Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR # into the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var # Set to "yes" if building a shared library automatically hardcodes DIR # into the library and all subsequent libraries and executables linked # against it. hardcode_automatic=$hardcode_automatic # Set to yes if linker adds runtime paths of dependent libraries # to runtime path list. inherit_rpath=$inherit_rpath # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs # Set to "yes" if exported symbols are required. always_export_symbols=$always_export_symbols # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms # Symbols that must always be exported. include_expsyms=$lt_include_expsyms # Commands necessary for linking programs (against libraries) with templates. prelink_cmds=$lt_prelink_cmds # Commands necessary for finishing linking programs. postlink_cmds=$lt_postlink_cmds # Specify filename containing input files. file_list_spec=$lt_file_list_spec # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action # The directories searched by this compiler when creating a shared library. compiler_lib_search_dirs=$lt_compiler_lib_search_dirs # Dependencies to place before and after the objects being linked to # create a shared library. predep_objects=$lt_predep_objects postdep_objects=$lt_postdep_objects predeps=$lt_predeps postdeps=$lt_postdeps # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path # ### END LIBTOOL CONFIG _LT_EOF cat <<'_LT_EOF' >> "$cfgfile" # ### BEGIN FUNCTIONS SHARED WITH CONFIGURE # func_munge_path_list VARIABLE PATH # ----------------------------------- # VARIABLE is name of variable containing _space_ separated list of # directories to be munged by the contents of PATH, which is string # having a format: # "DIR[:DIR]:" # string "DIR[ DIR]" will be prepended to VARIABLE # ":DIR[:DIR]" # string "DIR[ DIR]" will be appended to VARIABLE # "DIRP[:DIRP]::[DIRA:]DIRA" # string "DIRP[ DIRP]" will be prepended to VARIABLE and string # "DIRA[ DIRA]" will be appended to VARIABLE # "DIR[:DIR]" # VARIABLE will be replaced by "DIR[ DIR]" func_munge_path_list () { case x$2 in x) ;; *:) eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" ;; x:*) eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" ;; *::*) eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" ;; *) eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" ;; esac } # Calculate cc_basename. Skip known compiler wrappers and cross-prefix. func_cc_basename () { for cc_temp in $*""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` } # ### END FUNCTIONS SHARED WITH CONFIGURE _LT_EOF case $host_os in aix3*) cat <<\_LT_EOF >> "$cfgfile" # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test set != "${COLLECT_NAMES+set}"; then COLLECT_NAMES= export COLLECT_NAMES fi _LT_EOF ;; esac ltmain=$ac_aux_dir/ltmain.sh # We use sed instead of cat because bash on DJGPP gets confused if # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? $SED '$q' "$ltmain" >> "$cfgfile" \ || (rm -f "$cfgfile"; exit 1) mv -f "$cfgfile" "$ofile" || (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") chmod +x "$ofile" cat <<_LT_EOF >> "$ofile" # ### BEGIN LIBTOOL TAG CONFIG: CXX # The linker used to build libraries. LD=$lt_LD_CXX # How to create reloadable object files. reload_flag=$lt_reload_flag_CXX reload_cmds=$lt_reload_cmds_CXX # Commands used to build an old-style archive. old_archive_cmds=$lt_old_archive_cmds_CXX # A language specific compiler. CC=$lt_compiler_CXX # Is the compiler the GNU compiler? with_gcc=$GCC_CXX # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_CXX # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic_CXX # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl_CXX # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static_CXX # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o_CXX # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc_CXX # Whether or not to disallow shared libs when runtime libs are static. allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_CXX # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_CXX # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec_CXX # Whether the compiler copes with passing no objects directly. compiler_needs_object=$lt_compiler_needs_object_CXX # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_CXX # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_CXX # Commands used to build a shared archive. archive_cmds=$lt_archive_cmds_CXX archive_expsym_cmds=$lt_archive_expsym_cmds_CXX # Commands used to build a loadable module if different from building # a shared archive. module_cmds=$lt_module_cmds_CXX module_expsym_cmds=$lt_module_expsym_cmds_CXX # Whether we are building with GNU ld or not. with_gnu_ld=$lt_with_gnu_ld_CXX # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag_CXX # Flag that enforces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag_CXX # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX # Whether we need a single "-rpath" flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX # Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes # DIR into the resulting binary. hardcode_direct=$hardcode_direct_CXX # Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes # DIR into the resulting binary and the resulting library dependency is # "absolute",i.e impossible to change by setting \$shlibpath_var if the # library is relocated. hardcode_direct_absolute=$hardcode_direct_absolute_CXX # Set to "yes" if using the -LDIR flag during linking hardcodes DIR # into the resulting binary. hardcode_minus_L=$hardcode_minus_L_CXX # Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR # into the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var_CXX # Set to "yes" if building a shared library automatically hardcodes DIR # into the library and all subsequent libraries and executables linked # against it. hardcode_automatic=$hardcode_automatic_CXX # Set to yes if linker adds runtime paths of dependent libraries # to runtime path list. inherit_rpath=$inherit_rpath_CXX # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs_CXX # Set to "yes" if exported symbols are required. always_export_symbols=$always_export_symbols_CXX # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds_CXX # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms_CXX # Symbols that must always be exported. include_expsyms=$lt_include_expsyms_CXX # Commands necessary for linking programs (against libraries) with templates. prelink_cmds=$lt_prelink_cmds_CXX # Commands necessary for finishing linking programs. postlink_cmds=$lt_postlink_cmds_CXX # Specify filename containing input files. file_list_spec=$lt_file_list_spec_CXX # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action_CXX # The directories searched by this compiler when creating a shared library. compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_CXX # Dependencies to place before and after the objects being linked to # create a shared library. predep_objects=$lt_predep_objects_CXX postdep_objects=$lt_postdep_objects_CXX predeps=$lt_predeps_CXX postdeps=$lt_postdeps_CXX # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path_CXX # ### END LIBTOOL TAG CONFIG: CXX _LT_EOF cat <<_LT_EOF >> "$ofile" # ### BEGIN LIBTOOL TAG CONFIG: F77 # The linker used to build libraries. LD=$lt_LD_F77 # How to create reloadable object files. reload_flag=$lt_reload_flag_F77 reload_cmds=$lt_reload_cmds_F77 # Commands used to build an old-style archive. old_archive_cmds=$lt_old_archive_cmds_F77 # A language specific compiler. CC=$lt_compiler_F77 # Is the compiler the GNU compiler? with_gcc=$GCC_F77 # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_F77 # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic_F77 # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl_F77 # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static_F77 # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o_F77 # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc_F77 # Whether or not to disallow shared libs when runtime libs are static. allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_F77 # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_F77 # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec_F77 # Whether the compiler copes with passing no objects directly. compiler_needs_object=$lt_compiler_needs_object_F77 # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_F77 # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_F77 # Commands used to build a shared archive. archive_cmds=$lt_archive_cmds_F77 archive_expsym_cmds=$lt_archive_expsym_cmds_F77 # Commands used to build a loadable module if different from building # a shared archive. module_cmds=$lt_module_cmds_F77 module_expsym_cmds=$lt_module_expsym_cmds_F77 # Whether we are building with GNU ld or not. with_gnu_ld=$lt_with_gnu_ld_F77 # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag_F77 # Flag that enforces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag_F77 # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_F77 # Whether we need a single "-rpath" flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator_F77 # Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes # DIR into the resulting binary. hardcode_direct=$hardcode_direct_F77 # Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes # DIR into the resulting binary and the resulting library dependency is # "absolute",i.e impossible to change by setting \$shlibpath_var if the # library is relocated. hardcode_direct_absolute=$hardcode_direct_absolute_F77 # Set to "yes" if using the -LDIR flag during linking hardcodes DIR # into the resulting binary. hardcode_minus_L=$hardcode_minus_L_F77 # Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR # into the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var_F77 # Set to "yes" if building a shared library automatically hardcodes DIR # into the library and all subsequent libraries and executables linked # against it. hardcode_automatic=$hardcode_automatic_F77 # Set to yes if linker adds runtime paths of dependent libraries # to runtime path list. inherit_rpath=$inherit_rpath_F77 # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs_F77 # Set to "yes" if exported symbols are required. always_export_symbols=$always_export_symbols_F77 # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds_F77 # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms_F77 # Symbols that must always be exported. include_expsyms=$lt_include_expsyms_F77 # Commands necessary for linking programs (against libraries) with templates. prelink_cmds=$lt_prelink_cmds_F77 # Commands necessary for finishing linking programs. postlink_cmds=$lt_postlink_cmds_F77 # Specify filename containing input files. file_list_spec=$lt_file_list_spec_F77 # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action_F77 # The directories searched by this compiler when creating a shared library. compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_F77 # Dependencies to place before and after the objects being linked to # create a shared library. predep_objects=$lt_predep_objects_F77 postdep_objects=$lt_postdep_objects_F77 predeps=$lt_predeps_F77 postdeps=$lt_postdeps_F77 # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path_F77 # ### END LIBTOOL TAG CONFIG: F77 _LT_EOF cat <<_LT_EOF >> "$ofile" # ### BEGIN LIBTOOL TAG CONFIG: FC # The linker used to build libraries. LD=$lt_LD_FC # How to create reloadable object files. reload_flag=$lt_reload_flag_FC reload_cmds=$lt_reload_cmds_FC # Commands used to build an old-style archive. old_archive_cmds=$lt_old_archive_cmds_FC # A language specific compiler. CC=$lt_compiler_FC # Is the compiler the GNU compiler? with_gcc=$GCC_FC # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_FC # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic_FC # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl_FC # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static_FC # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o_FC # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc_FC # Whether or not to disallow shared libs when runtime libs are static. allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_FC # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_FC # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec_FC # Whether the compiler copes with passing no objects directly. compiler_needs_object=$lt_compiler_needs_object_FC # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_FC # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_FC # Commands used to build a shared archive. archive_cmds=$lt_archive_cmds_FC archive_expsym_cmds=$lt_archive_expsym_cmds_FC # Commands used to build a loadable module if different from building # a shared archive. module_cmds=$lt_module_cmds_FC module_expsym_cmds=$lt_module_expsym_cmds_FC # Whether we are building with GNU ld or not. with_gnu_ld=$lt_with_gnu_ld_FC # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag_FC # Flag that enforces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag_FC # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_FC # Whether we need a single "-rpath" flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator_FC # Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes # DIR into the resulting binary. hardcode_direct=$hardcode_direct_FC # Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes # DIR into the resulting binary and the resulting library dependency is # "absolute",i.e impossible to change by setting \$shlibpath_var if the # library is relocated. hardcode_direct_absolute=$hardcode_direct_absolute_FC # Set to "yes" if using the -LDIR flag during linking hardcodes DIR # into the resulting binary. hardcode_minus_L=$hardcode_minus_L_FC # Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR # into the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var_FC # Set to "yes" if building a shared library automatically hardcodes DIR # into the library and all subsequent libraries and executables linked # against it. hardcode_automatic=$hardcode_automatic_FC # Set to yes if linker adds runtime paths of dependent libraries # to runtime path list. inherit_rpath=$inherit_rpath_FC # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs_FC # Set to "yes" if exported symbols are required. always_export_symbols=$always_export_symbols_FC # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds_FC # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms_FC # Symbols that must always be exported. include_expsyms=$lt_include_expsyms_FC # Commands necessary for linking programs (against libraries) with templates. prelink_cmds=$lt_prelink_cmds_FC # Commands necessary for finishing linking programs. postlink_cmds=$lt_postlink_cmds_FC # Specify filename containing input files. file_list_spec=$lt_file_list_spec_FC # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action_FC # The directories searched by this compiler when creating a shared library. compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_FC # Dependencies to place before and after the objects being linked to # create a shared library. predep_objects=$lt_predep_objects_FC postdep_objects=$lt_postdep_objects_FC predeps=$lt_predeps_FC postdeps=$lt_postdeps_FC # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path_FC # ### END LIBTOOL TAG CONFIG: FC _LT_EOF ;; "executable-scripts":C) chmod +x tests/regression/regression.sh chmod +x tests/model-checking/starpu-mc.sh chmod +x tools/starpu_env chmod +x tools/starpu_codelet_profile chmod +x tools/starpu_codelet_histo_profile chmod +x tools/starpu_mpi_comm_matrix.py chmod +x tools/starpu_fxt_number_events_to_names.py chmod +x tools/starpu_workers_activity chmod +x tools/starpu_paje_draw_histogram chmod +x tools/starpu_paje_state_stats chmod +x tools/starpu_paje_summary chmod +x tools/starpu_config chmod +x tools/starpu_mlr_analysis chmod +x tools/starpu_paje_sort chmod +x tools/starpu_smpirun chmod +x tools/starpu_tcpipexec chmod +x doc/doxygen/doxygen_filter.sh chmod +x doc/doxygen_dev/doxygen_filter.sh chmod +x starpupy/execute.sh chmod +x julia/examples/execute.sh for x in \ tests/microbenchs/tasks_data_overhead.sh \ tests/microbenchs/sync_tasks_data_overhead.sh \ tests/microbenchs/async_tasks_data_overhead.sh \ tests/microbenchs/tasks_size_overhead.sh \ tests/microbenchs/tasks_size_overhead_sched.sh \ tests/microbenchs/tasks_size_overhead_scheds.sh \ tests/microbenchs/tasks_size_overhead.gp \ tests/microbenchs/microbench.sh \ tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh \ tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ tests/microbenchs/parallel_independent_heterogeneous_tasks.sh \ tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh \ tests/microbenchs/parallel_independent_homogeneous_tasks.sh \ tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh \ tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh \ tests/microbenchs/bandwidth_scheds.sh \ tests/energy/static.sh \ tests/energy/dynamic.sh \ tests/datawizard/locality.sh \ tests/overlap/overlap.sh \ tests/model-checking/prio_list.sh \ tests/model-checking/prio_list2.sh \ tests/model-checking/prio_list3.sh \ tests/model-checking/barrier.sh \ examples/heat/heat.sh \ examples/lu/lu.sh \ examples/cholesky/cholesky.sh \ examples/cholesky/cholesky_julia.sh \ examples/mult/sgemm.sh \ examples/scheduler/schedulers.sh \ examples/scheduler/schedulers_context.sh \ examples/scheduler/libdummy_sched.sh \ examples/profiling_tool/prof.sh \ tools/starpu_paje_draw_histogram.R \ tools/starpu_paje_state_stats.R \ tools/starpu_mlr_analysis.Rmd \ tools/starpu_paje_summary.Rmd \ tools/starpu_trace_state_stats.py \ julia/examples/check_deps/check_deps.sh \ julia/examples/mult/mult_starpu.sh \ julia/examples/mult/perf.sh \ julia/examples/variable/variable.sh \ julia/examples/task_insert_color/task_insert_color.sh \ julia/examples/vector_scal/vector_scal.sh \ julia/examples/mandelbrot/mandelbrot.sh \ julia/examples/callback/callback.sh \ julia/examples/dependency/task_dep.sh \ julia/examples/dependency/tag_dep.sh \ julia/examples/dependency/end_dep.sh \ julia/examples/axpy/axpy.sh \ julia/examples/gemm/gemm.sh \ julia/examples/cholesky/cholesky.sh \ starpupy/benchmark/tasks_size_overhead.sh \ starpupy/benchmark/tasks_size_overhead.gp \ starpupy/benchmark/test_handle_perf.sh \ starpupy/benchmark/test_handle_perf_pickle.sh \ starpupy/examples/starpu_py.sh \ starpupy/examples/starpu_py.concurrent.sh \ starpupy/examples/starpu_py_handle.sh \ starpupy/examples/starpu_py_handle.concurrent.sh \ starpupy/examples/starpu_py_np.sh \ starpupy/examples/starpu_py_np.concurrent.sh \ starpupy/examples/starpu_py_parallel.sh \ starpupy/examples/starpu_py_partition.sh \ starpupy/examples/starpu_py_partition.concurrent.sh \ starpupy/examples/starpu_py_perfmodel.sh \ starpupy/examples/starpu_py_perfmodel.concurrent.sh \ starpupy/examples/starpu_py_numpy.sh \ starpupy/examples/starpu_py_numpy.concurrent.sh \ ; do test -e $x || ( mkdir -p $(dirname $x) && ln -sf $ac_abs_top_srcdir/$x $(dirname $x) ) done for x in tools julia/examples starpufft/tests examples examples/stencil mpi/tests mpi/examples socl/examples bubble/tests starpupy/examples starpu_openmp_llvm/examples \ ; do test -e $x/loader.c || ln -sf $ac_abs_top_srcdir/tests/loader.c $x done sed -i -e '/ STARPU_SRC_DIR /d' -e '/ STARPU_BUILD_DIR /d' src/common/config.h ;; esac done # for ac_tag as_fn_exit 0 _ACEOF ac_clean_files=$ac_clean_files_save test $ac_write_fail = 0 || as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: CPUs enabled: $enable_cpu CUDA enabled: $enable_cuda $NO_NVML HIP enabled: $enable_hip OpenCL enabled: $enable_opencl Max FPGA enabled: $enable_max_fpga Compile-time limits (change these with --enable-maxcpus, --enable-maxcudadev, --enable-maxopencldev, --enable-maxmaxfpgadev, --enable-maxnodes, --enable-maxbuffers) (Note these numbers do not represent the number of detected devices, but the maximum number of devices StarPU can manage) Maximum number of CPUs: $maxcpus Maximum number of CUDA devices: $nmaxcudadev Maximum number of HIP devices: $nmaxhipdev Maximum number of OpenCL devices: $nmaxopencldev Maximum number of Maxeler FPGA devices: $nmaxmaxfpgadev Maximum number of MPI master-slave devices: $nmaxmpidev Maximum number of TCP/IP master-slave devices: $nmaxtcpipdev Maximum number of memory nodes: $maxnodes Maximum number of task buffers: $nmaxbuffers CUDA GPU-GPU transfers: $enable_cuda_memcpy_peer CUDA Map: $enable_cuda_map HIP GPU-GPU transfers: $enable_hip_memcpy_peer Allocation cache: $enable_allocation_cache Magma enabled: $have_magma BLAS library: $blas_lib hwloc: $have_valid_hwloc FxT trace enabled: $enable_fxt Documentation HTML: $enable_build_doc Documentation PDF: $enable_build_doc_pdf Examples: $enable_build_examples StarPU Extensions: StarPU MPI enabled: $build_mpi_lib StarPU MPI failure tolerance: $enable_mpi_ft StarPU MPI failure tolerance stats: $use_mpi_ft_stats StarPU MPI(nmad) enabled: $build_nmad_lib MPI test suite: $running_mpi_check Master-Slave MPI enabled: $build_mpi_master_slave Master-Slave TCP/IP enabled: $build_tcpip_master_slave FFT Support: $fft_support Resource Management enabled: $starpurm_support Python Interface enabled: $starpupy_support OpenMP runtime support enabled: $enable_openmp OpenMP LLVM runtime support enabled: $enable_openmp_llvm Parallel Worker support enabled: $enable_parallel_worker SOCL enabled: $build_socl SOCL test suite: $run_socl_check Scheduler Hypervisor: $build_sc_hypervisor simgrid enabled: $enable_simgrid ayudame enabled: $ayu_msg HDF5 enabled: $enable_hdf5 Native fortran support: $enable_build_fortran Native MPI fortran support: $use_mpi_fort Support for multiple linear regression models: $support_mlr Hierarchical dags support: $enable_bubble JULIA enabled: $enable_julia " >&5 printf "%s\n" "$as_me: CPUs enabled: $enable_cpu CUDA enabled: $enable_cuda $NO_NVML HIP enabled: $enable_hip OpenCL enabled: $enable_opencl Max FPGA enabled: $enable_max_fpga Compile-time limits (change these with --enable-maxcpus, --enable-maxcudadev, --enable-maxopencldev, --enable-maxmaxfpgadev, --enable-maxnodes, --enable-maxbuffers) (Note these numbers do not represent the number of detected devices, but the maximum number of devices StarPU can manage) Maximum number of CPUs: $maxcpus Maximum number of CUDA devices: $nmaxcudadev Maximum number of HIP devices: $nmaxhipdev Maximum number of OpenCL devices: $nmaxopencldev Maximum number of Maxeler FPGA devices: $nmaxmaxfpgadev Maximum number of MPI master-slave devices: $nmaxmpidev Maximum number of TCP/IP master-slave devices: $nmaxtcpipdev Maximum number of memory nodes: $maxnodes Maximum number of task buffers: $nmaxbuffers CUDA GPU-GPU transfers: $enable_cuda_memcpy_peer CUDA Map: $enable_cuda_map HIP GPU-GPU transfers: $enable_hip_memcpy_peer Allocation cache: $enable_allocation_cache Magma enabled: $have_magma BLAS library: $blas_lib hwloc: $have_valid_hwloc FxT trace enabled: $enable_fxt Documentation HTML: $enable_build_doc Documentation PDF: $enable_build_doc_pdf Examples: $enable_build_examples StarPU Extensions: StarPU MPI enabled: $build_mpi_lib StarPU MPI failure tolerance: $enable_mpi_ft StarPU MPI failure tolerance stats: $use_mpi_ft_stats StarPU MPI(nmad) enabled: $build_nmad_lib MPI test suite: $running_mpi_check Master-Slave MPI enabled: $build_mpi_master_slave Master-Slave TCP/IP enabled: $build_tcpip_master_slave FFT Support: $fft_support Resource Management enabled: $starpurm_support Python Interface enabled: $starpupy_support OpenMP runtime support enabled: $enable_openmp OpenMP LLVM runtime support enabled: $enable_openmp_llvm Parallel Worker support enabled: $enable_parallel_worker SOCL enabled: $build_socl SOCL test suite: $run_socl_check Scheduler Hypervisor: $build_sc_hypervisor simgrid enabled: $enable_simgrid ayudame enabled: $ayu_msg HDF5 enabled: $enable_hdf5 Native fortran support: $enable_build_fortran Native MPI fortran support: $use_mpi_fort Support for multiple linear regression models: $support_mlr Hierarchical dags support: $enable_bubble JULIA enabled: $enable_julia " >&6;} if test "$build_socl" = "yes" -a "$run_socl_check" = "no" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: SOCL test suite will not be run as the environment variable SOCL_OCL_LIB_OPENCL is not defined. To run the tests, you need to install the OCL implementation of ICD (https://forge.imag.fr/projects/ocl-icd/ or Debian package ocl-icd-libopencl1) and set the variable SOCL_OCL_LIB_OPENCL to the location of the libOpenCL.so." >&5 printf "%s\n" "$as_me: WARNING: SOCL test suite will not be run as the environment variable SOCL_OCL_LIB_OPENCL is not defined. To run the tests, you need to install the OCL implementation of ICD (https://forge.imag.fr/projects/ocl-icd/ or Debian package ocl-icd-libopencl1) and set the variable SOCL_OCL_LIB_OPENCL to the location of the libOpenCL.so." >&6;} fi if test x"$have_valid_hwloc" = xno -a "$enable_simgrid" = "no" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: hwloc was not enabled. If the target machine is hyperthreaded the performance may be impacted a lot. It is strongly recommended to install hwloc" >&5 printf "%s\n" "$as_me: WARNING: hwloc was not enabled. If the target machine is hyperthreaded the performance may be impacted a lot. It is strongly recommended to install hwloc" >&6;} fi if test x"$starpu_windows" = xyes -a "x$STARPU_MS_LIB" = "x" ; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: lib was not found, you will not be able to build StarPU applications with Microsoft Visual Studio. Add to your PATH the directories for MSVC, e.g c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE; c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin" >&5 printf "%s\n" "$as_me: WARNING: lib was not found, you will not be able to build StarPU applications with Microsoft Visual Studio. Add to your PATH the directories for MSVC, e.g c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE; c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin" >&6;} fi starpu-1.4.9+dfsg/configure.ac000066400000000000000000005256541507764646700163170ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2018-2018 Umeà University # Copyright (C) 2018,2020 Federal University of Rio Grande do Sul (UFRGS) # Copyright (C) 2017-2017 Guillaume Beauchamp # Copyright (C) 2013-2013 Thibaut Lambert # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # AC_INIT([StarPU], [1.4.9], [starpu-devel@inria.fr], [starpu], [http://gitlab.inria.fr/starpu/starpu]) AC_CONFIG_SRCDIR(include/starpu.h) AC_CONFIG_AUX_DIR([build-aux]) # libtool doesn't actually properly manage a space in the workdir case `pwd` in *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) AC_MSG_ERROR([unsafe absolute working directory name]);; esac dnl Versioning. STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`" STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`" STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3`" STARPU_RELEASE_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 3| sed 's/rc.*//'`" dnl we do not want the rcXX in the release version. we would like to use sed -r 's/[a-z]+.*//' to remove any string but the -r option is not portable AC_SUBST([STARPU_MAJOR_VERSION]) AC_SUBST([STARPU_MINOR_VERSION]) AC_SUBST([STARPU_RELEASE_VERSION]) AC_SUBST([STARPU_EFFECTIVE_VERSION]) AC_DEFINE_UNQUOTED([STARPU_MAJOR_VERSION], [$STARPU_MAJOR_VERSION], [Major version number of StarPU.]) AC_DEFINE_UNQUOTED([STARPU_MINOR_VERSION], [$STARPU_MINOR_VERSION], [Minor version number of StarPU.]) AC_DEFINE_UNQUOTED([STARPU_RELEASE_VERSION], [$STARPU_RELEASE_VERSION], [Release version number of StarPU.]) . "$srcdir/STARPU-VERSION" AC_SUBST([LIBSTARPU_INTERFACE_CURRENT]) AC_SUBST([LIBSTARPU_INTERFACE_REVISION]) AC_SUBST([LIBSTARPU_INTERFACE_AGE]) AC_SUBST([LIBSTARPUMPI_INTERFACE_CURRENT]) AC_SUBST([LIBSTARPUMPI_INTERFACE_REVISION]) AC_SUBST([LIBSTARPUMPI_INTERFACE_AGE]) AC_SUBST([LIBSTARPUFFT_INTERFACE_CURRENT]) AC_SUBST([LIBSTARPUFFT_INTERFACE_REVISION]) AC_SUBST([LIBSTARPUFFT_INTERFACE_AGE]) AC_SUBST([LIBSTARPURM_INTERFACE_CURRENT]) AC_SUBST([LIBSTARPURM_INTERFACE_REVISION]) AC_SUBST([LIBSTARPURM_INTERFACE_AGE]) AC_SUBST([LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT]) AC_SUBST([LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION]) AC_SUBST([LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE]) AC_SUBST([LIBSOCL_INTERFACE_CURRENT]) AC_SUBST([LIBSOCL_INTERFACE_REVISION]) AC_SUBST([LIBSOCL_INTERFACE_AGE]) AC_SUBST([LIBSTARPUJULIA_INTERFACE_CURRENT]) AC_SUBST([LIBSTARPUJULIA_INTERFACE_REVISION]) AC_SUBST([LIBSTARPUJULIA_INTERFACE_AGE]) AC_CANONICAL_SYSTEM AM_INIT_AUTOMAKE([1.11 -Wall -Wno-portability foreign silent-rules color-tests parallel-tests subdir-objects tar-pax]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)]) AC_PREREQ(2.64) m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) AC_PROG_CC AM_PROG_CC_C_O AC_PROG_CXX AC_PROG_CPP AC_PROG_SED AC_PROG_LN_S AC_PROG_F77 AC_PROG_FC AC_PROG_GREP AC_PROG_EGREP AC_PROG_INSTALL AC_PROG_MKDIR_P AC_CHECK_PROGS(PROG_STAT,gstat stat) AC_CHECK_PROGS(PROG_DATE,gdate date) AC_PATH_PROGS(PROG_FIND,find) AC_CHECK_PROGS(PROG_CLANG,clang) AC_CACHE_CHECK([for parallel that supports semaphores with exit code], [ac_cv_path_PARALLEL], [AC_PATH_PROGS_FEATURE_CHECK([PARALLEL], [parallel], [[parallel --semaphore --id starpu --fg --fg-exit -j 2 exit 42 > /dev/null 2>&1 [ $? = 42 ] && ac_cv_path_PARALLEL=$ac_path_PARALLEL ac_path_PARALLEL_found=:]], [ac_cv_path_PARALLEL=no])]) AC_SUBST([PARALLEL], [$ac_cv_path_PARALLEL]) AM_CONDITIONAL([HAVE_PARALLEL], [test "x$PARALLEL" != "xno"]) dnl locate pkg-config PKG_PROG_PKG_CONFIG AC_ARG_ENABLE(simgrid, [AS_HELP_STRING([--enable-simgrid], [Enable simulating execution in simgrid])], enable_simgrid=$enableval, enable_simgrid=no) if test x$enable_perf_debug = xyes; then enable_shared=no fi default_enable_mpi_check=no if test x$enable_simgrid = xyes ; then default_enable_mpi=no else default_enable_mpi=maybe fi IS_SUPPORTED_FLAG_VAR(-Wno-unused,APP) IS_SUPPORTED_FFLAG(-Wno-unused-dummy-argument,[APP_FFLAGS="$APP_FFLAGS -Wno-unused-dummy-argument"]) IS_SUPPORTED_FCFLAG(-Wno-unused-dummy-argument,[APP_FCFLAGS="$APP_FCFLAGS -Wno-unused-dummy-argument"]) AC_SUBST(APP_CFLAGS) AC_SUBST(APP_CXXFLAGS) AC_SUBST(APP_FFLAGS) AC_SUBST(APP_FCFLAGS) AC_FUNC_MMAP ############################################################################### # # # Forwarded options # # # # Move here options whose values are needed early # # # ############################################################################### # AC_ARG_ENABLE(starpupy, [AS_HELP_STRING([--enable-starpupy], [enable StarPU python interface])], enable_starpupy=$enableval, enable_starpupy=maybe) ############################################################################### # # # Profiling tool support # # # ############################################################################### AC_ARG_ENABLE(prof-tool, [AS_HELP_STRING([--enable-prof-tool], [enable profiling tool])], enable_prof_tool=$enableval, enable_prof_tool=yes) if test x$enable_prof_tool = xyes; then AC_DEFINE(STARPU_PROF_TOOL, [1], [Define this to enable profiling tool support]) fi AC_MSG_CHECKING([for profiling tool support]) AC_MSG_RESULT($enable_prof_tool) ############################################################################### # # # Hierarchical dags support # # # ############################################################################### AC_ARG_ENABLE(bubble, [AS_HELP_STRING([--enable-bubble], [build the hierarchical dags (a.k.a bubble) support])], enable_bubble=$enableval, enable_bubble=no) AC_MSG_CHECKING([for hierarchical dags - a.k.a bubble - support]) if test x$enable_bubble = xyes; then AC_DEFINE(STARPU_BUBBLE, [1], [Define this to enable hierarchical dags support]) fi AM_CONDITIONAL([STARPU_BUBBLE], [test "x$enable_bubble" = "xyes"]) AC_MSG_RESULT($enable_bubble) AC_MSG_CHECKING(whether bubble debug messages should be displayed) AC_ARG_ENABLE(bubble-verbose, [AS_HELP_STRING([--enable-bubble-verbose], [display verbose bubble messages])], enable_bubble_verbose=$enableval, enable_bubble_verbose=no) AC_MSG_RESULT($enable_bubble_verbose) if test x$enable_bubble_verbose = xextra; then AC_DEFINE(STARPU_BUBBLE_VERBOSE, [1], [display verbose bubble debug messages]) fi ############################################################################### # # # Drivers # # # ############################################################################### AC_ARG_ENABLE(opencl-simulator, [AS_HELP_STRING([--enable-opencl-simulator], [Enable the use of an OpenCL simulator])], enable_opencl_simulator=$enableval, enable_opencl_simulator=no) if test x$enable_opencl_simulator = xyes; then enable_simgrid=yes AC_DEFINE(STARPU_OPENCL_SIMULATOR, [1], [Define this to enable using an OpenCL simulator]) fi AC_ARG_WITH(simgrid-dir, [AS_HELP_STRING([--with-simgrid-dir=], [specify SimGrid installation directory])], [ simgrid_dir="$withval" # in case this was not explicit yet enable_simgrid=yes ], simgrid_dir=no) AC_ARG_WITH(simgrid-include-dir, [AS_HELP_STRING([--with-simgrid-include-dir=], [specify where SimGrid headers are installed])], [ simgrid_include_dir="$withval" # in case this was not explicit yet enable_simgrid=yes ], [simgrid_include_dir=no]) AC_ARG_WITH(simgrid-lib-dir, [AS_HELP_STRING([--with-simgrid-lib-dir=], [specify where SimGrid libraries are installed])], [ simgrid_lib_dir="$withval" # in case this was not explicit yet enable_simgrid=yes ], [simgrid_lib_dir=no]) if test x$enable_simgrid = xyes ; then PKG_CHECK_MODULES([SIMGRID], [simgrid], [], [:]) if test "$simgrid_include_dir" != "no" ; then SIMGRID_CFLAGS="-I$simgrid_include_dir $SIMGRID_CFLAGS" fi if test "$simgrid_lib_dir" != "no" ; then SIMGRID_LIBS="-L$simgrid_lib_dir $SIMGRID_LIBS" fi if test "$simgrid_dir" != "no" ; then SIMGRID_CFLAGS="-I$simgrid_dir/include $SIMGRID_CFLAGS" SIMGRID_LIBS="-L$simgrid_dir/lib $SIMGRID_LIBS" else simgrid_dir="$(pkg-config --variable=prefix simgrid)" fi if test -n "$SIMGRID_CFLAGS" ; then CFLAGS="$SIMGRID_CFLAGS $CFLAGS" CXXFLAGS="$SIMGRID_CFLAGS $CXXFLAGS" NVCCFLAGS="$SIMGRID_CFLAGS $NVCCFLAGS" HIPCCFLAGS="$SIMGRID_CFLAGS $HIPCCFLAGS" fi SAVED_LIBS="${LIBS}" LIBS="$SIMGRID_LIBS $LIBS" AC_HAVE_LIBRARY([simgrid], [], [ AC_MSG_ERROR(Simgrid support needs simgrid installed) ] ) AC_CHECK_HEADERS([simgrid/msg.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MSG_H], [1], [Define to 1 if you have msg.h in simgrid/.])]) AC_CHECK_HEADERS([msg/msg.h], [AC_DEFINE([STARPU_HAVE_MSG_MSG_H], [1], [Define to 1 if you have msg.h in msg/.])]) AC_CHECK_HEADERS([simgrid/host.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_HOST_H], [1], [Define to 1 if you have host.h in simgrid/.])]) AC_CHECK_HEADERS([simgrid/link.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_LINK_H], [1], [Define to 1 if you have link.h in simgrid/.])]) AC_CHECK_HEADERS([xbt/base.h], [AC_DEFINE([STARPU_HAVE_XBT_BASE_H], [1], [Define to 1 if you have base.h in xbt/.])]) AC_CHECK_HEADERS([simgrid/version.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_VERSION_H], [1], [Define to 1 if you have version.h in simgrid/.])], [], [[ #ifdef STARPU_HAVE_XBT_BASE_H #include #endif ]]) AC_CHECK_HEADERS([simgrid/simdag.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SIMDAG_H], [1], [Define to 1 if you have simdag.h in simgrid/.])]) AC_CHECK_HEADERS([xbt/synchro.h], [AC_DEFINE([STARPU_HAVE_XBT_SYNCHRO_H], [1], [Define to 1 if you have synchro.h in xbt/.])]) AC_CHECK_HEADERS([xbt/config.h], [AC_DEFINE([STARPU_HAVE_XBT_CONFIG_H], [1], [Define to 1 if you have config.h in xbt/.])]) AC_CHECK_HEADERS([simgrid/actor.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ACTOR_H], [1], [Define to 1 if you have actor.h in simgrid/.])]) AC_CHECK_HEADERS([simgrid/engine.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ENGINE_H], [1], [Define to 1 if you have engine.h in simgrid/.])]) AC_CHECK_HEADERS([simgrid/semaphore.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_SEMAPHORE_H], [1], [Define to 1 if you have semaphore.h in simgrid/.])]) AC_CHECK_HEADERS([simgrid/mutex.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_MUTEX_H], [1], [Define to 1 if you have mutex.h in simgrid/.])]) AC_CHECK_HEADERS([simgrid/cond.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_COND_H], [1], [Define to 1 if you have cond.h in simgrid/.])]) AC_CHECK_HEADERS([simgrid/barrier.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_BARRIER_H], [1], [Define to 1 if you have barrier.h in simgrid/.])]) AC_CHECK_HEADERS([simgrid/engine.h]) AC_CHECK_HEADERS([simgrid/zone.h], [AC_DEFINE([STARPU_HAVE_SIMGRID_ZONE_H], [1], [Define to 1 if you have zone.h in simgrid/.])]) AC_CHECK_TYPES([smx_actor_t], [AC_DEFINE([STARPU_HAVE_SMX_ACTOR_T], [1], [Define to 1 if you have the smx_actor_t type.])], [], [[#include ]]) # Latest functions AC_CHECK_FUNCS([MSG_process_attach sg_actor_attach sg_actor_attach_pthread sg_actor_init sg_actor_set_stacksize sg_actor_on_exit MSG_zone_get_hosts sg_zone_get_hosts sg_zone_get_all_hosts MSG_process_self_name MSG_process_userdata_init sg_actor_get_data sg_actor_set_data sg_actor_data]) AC_CHECK_FUNCS([xbt_mutex_try_acquire smpi_process_set_user_data SMPI_thread_create sg_zone_get_by_name sg_link_get_name sg_link_name sg_link_set_bandwidth sg_link_bandwidth_set sg_host_get_route sg_host_get_route_links sg_host_route sg_host_self sg_host_list sg_host_get_speed sg_host_speed simcall_process_create sg_config_continue_after_help]) AC_CHECK_FUNCS([simgrid_set_maestro]) AC_CHECK_FUNCS([simgrid_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_SIMGRID_INIT], [1], [Define to 1 if you have the `simgrid_init' function.])]) AC_CHECK_FUNCS([xbt_barrier_init], [AC_DEFINE([STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT], [1], [Define to 1 if you have the `xbt_barrier_init' function.])]) AC_CHECK_FUNCS([sg_actor_sleep_for sg_actor_self sg_actor_ref sg_host_get_properties sg_host_get_property_names sg_host_send_to sg_host_sendto sg_cfg_set_int sg_actor_self_execute sg_actor_execute simgrid_get_clock]) AC_CHECK_DECLS([smpi_process_set_user_data], [], [], [[#include ]]) # Oldies for compatibility with older simgrid AC_CHECK_FUNCS([MSG_get_as_by_name MSG_zone_get_by_name MSG_environment_get_routing_root MSG_host_get_speed]) LIBS="${SAVED_LIBS}" AC_DEFINE(STARPU_SIMGRID, [1], [Define this to enable simgrid execution]) # We won't bind or detect anything with_hwloc=no # disable mpi checks by default, they require static linking, we don't # want that by default default_enable_mpi_check=no # disable MPI support by default default_enable_mpi=no AC_LANG_PUSH([C++]) if test x$enable_shared = xno ; then # When linking statically, libtool does not realize we need libstdc++ for simgrid_cpp.cpp SIMGRID_LIBS="$SIMGRID_LIBS -lstdc++" LIBS="$LIBS -lstdc++" fi SIMGRID_LDFLAGS="$SIMGRID_LIBS -lsimgrid" # Simgrid 3.12 & 3.13 need -std=c++11 to be able to build anything in C++... case \ $CXXFLAGS\ in *\ -std=*\ *) ;; *) # Make sure our C++ compiler can compile simgrid headers SIMGRID_INCLUDES=" #ifdef STARPU_HAVE_SIMGRID_MSG_H #include #include #elif defined(STARPU_HAVE_MSG_MSG_H) #include #endif #ifdef STARPU_HAVE_XBT_BASE_H #include #endif #ifdef STARPU_HAVE_SIMGRID_VERSION_H #include #endif #ifdef STARPU_HAVE_SIMGRID_ZONE_H #include #endif #ifdef STARPU_HAVE_SIMGRID_HOST_H #include #endif #include " AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[$SIMGRID_INCLUDES]])],, CXXFLAGS="-std=c++11 $CXXFLAGS") ;; esac AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[$SIMGRID_INCLUDES #include ]], [[simgrid::s4u::Engine::on_time_advance_cb([](double delta) { });]] )], AC_DEFINE(STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB, [1], [Define this to 1 when s4u::Engine::on_time_advance_cb is available])) AC_LANG_POP([C++]) AC_ARG_ENABLE(simgrid-mc, [AS_HELP_STRING([--enable-simgrid-mc], [Enable using Model Checker of simgrid])], enable_simgrid_mc=$enableval, enable_simgrid_mc=no) if test x$enable_simgrid_mc = xyes ; then AC_DEFINE(STARPU_SIMGRID_MC, [1], [Define this to enable Model Checker in simgrid execution]) AC_PATH_PROG([SIMGRID_MC], [simgrid-mc], [no], [$simgrid_dir/bin:$PATH]) LDFLAGS="$LDFLAGS -Wl,-znorelro -Wl,-znoseparate-code" # libsimgrid needs to be linked from binaries themselves for MC to work STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $SIMGRID_LDFLAGS" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ #ifdef STARPU_HAVE_XBT_BASE_H #include #endif #ifdef STARPU_HAVE_SIMGRID_VERSION_H #include #endif #if SIMGRID_VERSION < 33100 #error no mutex support with MC #endif ]])],, AC_MSG_ERROR([We need simgrid >= 3.31 for MC])) fi fi AM_CONDITIONAL(STARPU_SIMGRID_MC, test x$enable_simgrid_mc = xyes) AM_CONDITIONAL(STARPU_SIMGRID, test x$enable_simgrid = xyes) AC_SUBST(SIMGRID_CFLAGS) AC_SUBST(SIMGRID_LDFLAGS) AC_MSG_CHECKING(whether SimGrid is enabled) AC_MSG_RESULT($enable_simgrid) AC_MSG_CHECKING(whether blocking drivers should be enabled) AC_ARG_ENABLE(blocking-drivers, [AS_HELP_STRING([--enable-blocking-drivers], [enable blocking drivers])], enable_blocking=$enableval, enable_blocking=$enable_simgrid) AC_MSG_RESULT($enable_blocking) if test x$enable_blocking = xno ; then if test x$enable_simgrid = xyes ; then AC_MSG_ERROR([--disable-blocking-drivers cannot be used in simgrid mode]) fi AC_DEFINE(STARPU_NON_BLOCKING_DRIVERS, [1], [drivers must progress]) fi if test x$enable_blocking = xyes ; then AC_MSG_CHECKING(whether worker callbacks should be enabled) AC_ARG_ENABLE(worker-callbacks, [AS_HELP_STRING([--enable-worker-callbacks], [enable worker callbacks])], enable_worker_cb=$enableval, enable_worker_cb=no) AC_MSG_RESULT($enable_worker_cb) else # worker sleep/wake-up callbacks only make sense if blocking drivers are enabled enable_worker_cb=no fi if test x$enable_worker_cb = xyes ; then AC_DEFINE(STARPU_WORKER_CALLBACKS, [1], [workers must call callbacks on sleep/wake-up]) fi ############################################################################### # # # LIBTOOLS # # # ############################################################################### #c++11 detection AX_CXX_COMPILE_STDCXX(11,noext,optional) AC_SUBST([STARPU_HAVE_CXX11], $HAVE_CXX11) AM_CONDITIONAL([STARPU_HAVE_CXX11], [test "$HAVE_CXX11" -eq 1]) if test $HAVE_CXX11 -eq 1; then AC_DEFINE(STARPU_HAVE_CXX11, [1], [compiler supports cxx11]) fi LT_PREREQ([2.2]) LT_INIT([win32-dll]) AC_HEADER_STDC AC_C_RESTRICT # Check if bash is available AC_PATH_PROG([REALBASH], [bash], , [/bin:$PATH]) # Record git version AC_PATH_PROG(gitcommand, git) if test -f $srcdir/STARPU-REVISION ; then cp $srcdir/STARPU-REVISION . elif test "$gitcommand" = "" ; then echo "unknown" > ./STARPU-REVISION else bdir=$PWD cd $srcdir git log -n 1 --pretty="%H%d" . > $bdir/STARPU-REVISION_tmp cd $bdir if test -s ./STARPU-REVISION_tmp ; then mv ./STARPU-REVISION_tmp ./STARPU-REVISION else echo "unknown" > ./STARPU-REVISION fi fi AM_CONDITIONAL([STARPU_CROSS_COMPILING], [test "x$cross_compiling" = "xyes"]) ############################################################################### # # # MPI compilers # # # ############################################################################### #Check MPICC if test x$enable_simgrid = xyes ; then DEFAULT_MPICC=smpicc else DEFAULT_MPICC=mpicc fi AC_ARG_WITH(mpicc, [AS_HELP_STRING([--with-mpicc=], [Name or path of the mpicc compiler])], [DEFAULT_MPICC=$withval]) case $DEFAULT_MPICC in /*) mpicc_path="$DEFAULT_MPICC" ;; *) AC_PATH_PROG(mpicc_path, $DEFAULT_MPICC, [no], [$simgrid_dir/bin:$PATH]) ;; esac # We test if the MPICC compiler exists if test ! -x $mpicc_path; then AC_MSG_RESULT(The mpicc compiler '$mpicc_path' does not have the execute permission) mpicc_path=no fi AC_MSG_CHECKING(whether mpicc is available) AC_MSG_RESULT($mpicc_path) AC_SUBST(MPICC, $mpicc_path) if test x$mpicc_path != xno ; then MPIPATH=$(dirname $mpicc_path):$PATH else MPIPATH=$PATH fi #Check MPICXX/MPIC++ if test x$enable_simgrid = xyes ; then DEFAULT_MPICXX=smpicxx else DEFAULT_MPICXX=mpicxx fi AC_ARG_WITH(mpicxx, [AS_HELP_STRING([--with-mpicxx=], [Name or path of the mpicxx/mpic++ compiler])], [DEFAULT_MPICXX=$withval]) case $DEFAULT_MPICXX in /*) mpicxx_path="$DEFAULT_MPICXX" ;; *) AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$MPIPATH]) ;; esac # try with mpic++ if mpicxx was not found if test x$mpicxx_path = xno ; then DEFAULT_MPICXX=mpic++ AC_PATH_PROG(mpicxx_path, $DEFAULT_MPICXX, [no], [$MPIPATH]) fi # We test if the MPICXX/MPIC++ compiler exists if test ! -x $mpicxx_path; then AC_MSG_RESULT(The mpicxx compiler '$mpicxx_path' does not have the execute permission) mpicxx_path=no fi AC_MSG_CHECKING(whether mpicxx is available) AC_MSG_RESULT($mpicxx_path) AC_SUBST(MPICXX, $mpicxx_path) # Check if mpiexec is available if test x$enable_simgrid = xyes ; then DEFAULT_MPIEXEC=smpirun AC_ARG_WITH(smpirun, [AS_HELP_STRING([--with-smpirun[=]], [Name or path of the smpirun helper])], [DEFAULT_MPIEXEC=$withval]) else DEFAULT_MPIEXEC=mpiexec AC_ARG_WITH(mpiexec, [AS_HELP_STRING([--with-mpiexec=], [Name or path of mpiexec])], [DEFAULT_MPIEXEC=$withval]) fi case $DEFAULT_MPIEXEC in /*) mpiexec_path="$DEFAULT_MPIEXEC" ;; *) AC_PATH_PROG(mpiexec_path, $DEFAULT_MPIEXEC, [no], [$MPIPATH]) esac AC_MSG_CHECKING(whether mpiexec is available) AC_MSG_RESULT($mpiexec_path) # We test if MPIEXEC exists if test ! -x $mpiexec_path; then AC_MSG_RESULT(The mpiexec script '$mpiexec_path' is not valid) default_enable_mpi_check=no mpiexec_path="" fi AC_SUBST(MPIEXEC,$mpiexec_path) ############################################################################### # # # MPI # # # ############################################################################### AC_ARG_ENABLE(mpi, [AS_HELP_STRING([--disable-mpi], [Disable StarPU MPI library generation])], [enable_mpi=$enableval], [enable_mpi=$default_enable_mpi]) if test x$enable_mpi = xmaybe ; then if test -x "$mpicc_path"; then enable_mpi=yes else enable_mpi=no fi fi # in case MPI was explicitly required, but mpicc is not available, this is an error if test x$enable_mpi = xyes ; then if test ! -x "$mpicc_path"; then AC_MSG_ERROR([Compiler MPI '$mpicc_path' not valid]) fi OLD_CC=$CC CC=$mpicc_path AC_COMPILE_IFELSE( [AC_LANG_PROGRAM([[ #include #include ]],,)], [AC_DEFINE(STARPU_HAVE_MPI_EXT, [1], [ is available])]) AC_CHECK_FUNC([MPI_Comm_create_group], [AC_DEFINE([STARPU_HAVE_MPI_COMM_CREATE_GROUP], [1], [Define to 1 if the function MPI_Comm_create_group is available.])]) CC=$OLD_CC fi build_mpi_lib=$enable_mpi AC_ARG_ENABLE(mpi-minimal-tests, [AS_HELP_STRING([--enable-mpi-minimal-tests], [Only enable a subset of MPI tests])], [enable_mpi_minimal_tests=$enableval], [enable_mpi_minimal_tests=no]) AM_CONDITIONAL([STARPU_MPI_MINIMAL_TESTS], [test x$enable_mpi_minimal_tests = xyes]) ############################################################################### # # # NEW MADELEINE # # # ############################################################################### AC_ARG_ENABLE(nmad, [AS_HELP_STRING([--enable-nmad], [Enable StarPU MPI library generation using the new madeleine backend])], [enable_nmad=$enableval], [enable_nmad=no]) build_nmad_lib=no AC_SUBST(CC_OR_MPICC, $cc_or_mpicc) #We can only build StarPU MPI Library if User wants it and MPI is available if test x$enable_mpi = xyes -a x$enable_nmad = xyes ; then build_nmad_lib=yes build_mpi_lib=no PKG_CHECK_MODULES([NMAD],[nmad]) save_LIBS="$LIBS" save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $NMAD_CFLAGS" LIBS="$LIBS $NMAD_LIBS" AC_CHECK_FUNCS([piom_ltask_set_bound_thread_os_indexes]) AC_CHECK_FUNCS([nm_trace_add_synchro_point]) CFLAGS="$save_CFLAGS" LIBS="$save_LIBS" else build_nmad_lib=no fi # If MadMPI is used, MadMPI can't be built with PIOman (we don't want communication progression to be done in both StarPU and MadMPI): if test x$enable_mpi = xyes -a x$build_nmad_lib = xno -a ! -z "`$mpicc_path --showme|grep pioman`"; then AC_MSG_WARN([Using MPI backend of StarPU with MadMPI built with PIOman: disabling PIOman's progression.]) AC_DEFINE(HAVE_PIOMAN, [1], [PIOman (from PM2) is available]) fi ############################################################################### # # # MPI Master Slave # # # ############################################################################### AC_ARG_ENABLE(mpi-master-slave, [AS_HELP_STRING([--enable-mpi-master-slave], [Enable StarPU to run with the master-slave mode])], use_mpi_master_slave=$enableval, use_mpi_master_slave=no) if test x$enable_simgrid = xyes; then if test x$use_mpi_master_slave = xyes; then AC_MSG_ERROR([MPI Master Slave not supported with simgrid]) fi use_mpi_master_slave=no fi # in case it is explicitly required, but mpicc is not available, this is an error if test x$use_mpi_master_slave = xyes -a ! -x "$mpicc_path"; then AC_MSG_ERROR([Compiler MPI '$mpicc_path' not valid]) fi #We can only build MPI Master Slave if User wants it and MPI compiler are available if test x$use_mpi_master_slave = xyes -a x$mpicc_path != xno -a x${mpicxx_path} != xno ; then build_mpi_master_slave=yes else build_mpi_master_slave=no fi #users cannot use both at the same time if test x$build_mpi_master_slave = xyes -a x$enable_mpi = xyes; then AC_MSG_WARN(StarPU-MPI and MPI Master-Slave cannot be used at the same time ! Disabling StarPU-MPI...) build_mpi_lib=no build_nmad_lib=no enable_mpi=no fi if test x$build_mpi_master_slave = xyes; then AC_DEFINE(STARPU_USE_MPI_MASTER_SLAVE, [1], [MPI Master Slave support is enabled]) CC=$mpicc_path CCLD=$mpicc_path CXX=$mpicxx_path CXXLD=mpicxx_path fi AC_MSG_CHECKING(whether the MPI master-slave mode should be enabled) AC_MSG_RESULT($build_mpi_master_slave) AM_CONDITIONAL([STARPU_USE_MPI_MASTER_SLAVE], [test x$build_mpi_master_slave = xyes]) AC_MSG_CHECKING(maximum number of MPI master-slave devices) AC_ARG_ENABLE(maxmpidev, [AS_HELP_STRING([--enable-maxmpidev=], [maximum number of MPI master-slave devices])], nmaxmpidev=$enableval, [ if test x$build_mpi_master_slave = xyes; then nmaxmpidev=4 else nmaxmpidev=0 fi ]) if test x$nmaxmpidev = x -o x$nmaxmpidev = xyes then AC_MSG_ERROR([The --enable-maxmpidev option needs to be given a number]) fi AC_MSG_RESULT($nmaxmpidev) AC_DEFINE_UNQUOTED(STARPU_MAXMPIDEVS, [$nmaxmpidev], [maximum number of MPI devices]) ############################################################################### # # # TCP/IP Master Slave # # # ############################################################################### AC_ARG_ENABLE(tcpip-master-slave, [AS_HELP_STRING([--enable-tcpip-master-slave], [Enable StarPU to run with the master-slave mode])], build_tcpip_master_slave=$enableval, build_tcpip_master_slave=no) if test x$build_tcpip_master_slave = xyes; then AC_CHECK_LIB([dl], [dlsym]) AC_DEFINE(STARPU_USE_TCPIP_MASTER_SLAVE, [1], [TCPIP Master Slave support is enabled]) fi AC_MSG_CHECKING(whether the TCP/IP master-slave mode should be enabled) AC_MSG_RESULT($build_tcpip_master_slave) AM_CONDITIONAL([STARPU_USE_TCPIP_MASTER_SLAVE], [test x$build_tcpip_master_slave = xyes]) AC_MSG_CHECKING(maximum number of TCP/IP master-slave devices) AC_ARG_ENABLE(maxtcpipdev, [AS_HELP_STRING([--enable-maxtcpipdev=], [maximum number of TCP/IP master-slave devices])], nmaxtcpipdev=$enableval, [ if test x$build_tcpip_master_slave = xyes; then nmaxtcpipdev=4 else nmaxtcpipdev=0 fi ]) if test x$nmaxtcpipdev = x -o x$nmaxtcpipdev = xyes then AC_MSG_ERROR([The --enable-maxtcpipdev option needs to be given a number]) fi AC_MSG_RESULT($nmaxtcpipdev) AC_DEFINE_UNQUOTED(STARPU_MAXTCPIPDEVS, [$nmaxtcpipdev], [maximum number of TCP/IP devices]) ############################################################################### # # # Miscellaneous things for MPI # # # ############################################################################### AC_ARG_ENABLE(mpi-pedantic-isend, [AS_HELP_STRING([--enable-mpi-pedantic-isend], [Prevent StarPU MPI from reading buffers while being sent over MPI])], enable_mpi_pedantic_isend=$enableval, enable_mpi_pedantic_isend=no) if test x$enable_mpi_pedantic_isend = xyes; then AC_DEFINE(STARPU_MPI_PEDANTIC_ISEND, [1], [enable StarPU MPI pedantic isend]) fi # If the user specifically asks for it, or if we are in a developer checkout, we enable mpi check if test -d "$srcdir/.git" -o -f "$srcdir/.git" ; then default_enable_mpi_check=$enable_mpi fi AC_ARG_ENABLE(mpi-check, AC_HELP_STRING([--enable-mpi-check], [Enable execution of MPI testcases]), [enable_mpi_check=$enableval], [enable_mpi_check=$default_enable_mpi_check]) running_mpi_check=no if test x$enable_mpi_check = xyes ; then running_mpi_check=yes if test x$enable_mpi = xno ; then AC_MSG_ERROR([MPI checks requested, but MPI is disabled]) fi fi if test x$enable_mpi_check = xmaybe ; then running_mpi_check=yes fi if test x$enable_mpi_check = xno ; then running_mpi_check=no fi if test x$enable_mpi = xno ; then running_mpi_check=no fi AM_CONDITIONAL(STARPU_MPI_CHECK, test x$running_mpi_check = xyes) AC_MSG_CHECKING(whether MPI tests should be run) AC_MSG_RESULT($running_mpi_check) AC_MSG_CHECKING(whether the StarPU MPI library should be generated) AC_MSG_RESULT($build_mpi_lib) AC_MSG_CHECKING(whether the StarPU MPI nmad library should be generated) AC_MSG_RESULT($build_nmad_lib) if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes ; then AC_DEFINE(STARPU_USE_MPI,[1],[whether the StarPU MPI library is available]) if test x$build_mpi_lib = xyes ; then AC_DEFINE(STARPU_USE_MPI_MPI,[1],[whether the StarPU MPI library (with a native MPI implementation) is available]) else AC_DEFINE(STARPU_USE_MPI_NMAD,[1],[whether the StarPU MPI library (with a NewMadeleine implementation) is available]) fi fi if test x$enable_mpi = xyes ; then if test x$enable_simgrid = xyes ; then if test x$enable_shared = xyes ; then AC_MSG_ERROR([MPI with simgrid can not work with shared libraries, if you need the MPI support, then use --disable-shared to fix this, else disable MPI with --disable-mpi]) else CFLAGS="$CFLAGS -fPIC" CXXFLAGS="$CXXFLAGS -fPIC" NVCCFLAGS="$NVCCFLAGS --compiler-options -fPIC" HIPCCFLAGS="$HIPCCFLAGS --compiler-options -fPIC" FFLAGS="$FFLAGS -fPIC" FCLAGS="$FFLAGS -fPIC" fi fi enable_mpi_sync_clocks=no PKG_CHECK_MODULES([MPI_SYNC_CLOCKS],[mpi_sync_clocks],[enable_mpi_sync_clocks=yes],[enable_mpi_sync_clocks=no]) if test x$enable_mpi_sync_clocks = xyes ; then AC_DEFINE(STARPU_HAVE_MPI_SYNC_CLOCKS, [1], [Define to 1 if you have mpi_sync_clocks and it is meant to be used]) fi fi AM_CONDITIONAL(STARPU_MPI_SYNC_CLOCKS, test x$enable_mpi_sync_clocks = xyes) AM_CONDITIONAL(STARPU_USE_MPI_MPI, test x$build_mpi_lib = xyes) AM_CONDITIONAL(STARPU_USE_MPI_NMAD, test x$build_nmad_lib = xyes) AM_CONDITIONAL(STARPU_USE_MPI, test x$build_nmad_lib = xyes -o x$build_mpi_lib = xyes) ###### Failure tolerance material ####### default_enable_mpi_ft=no AC_ARG_ENABLE(mpi-ft, AC_HELP_STRING([--enable-mpi-ft], [Enable failure tolerance mechanisms provided by StarPU]), [enable_mpi_ft=$enableval], [enable_mpi_ft=$default_enable_mpi_ft]) default_enable_mpi_ft_stats=no use_mpi_ft_stats=no AC_ARG_ENABLE(mpi-ft-stats, AC_HELP_STRING([--enable-mpi-ft-stats], [Enable stats for failure tolerance mechanisms]), [enable_mpi_ft_stats=$enableval], [enable_mpi_ft_stats=$default_enable_mpi_ft_stats]) # TODO: Check MPI version to be ULFM if test x$enable_mpi_ft = xyes ; then if test x$build_mpi_lib != xyes ; then AC_MSG_ERROR([Failure tolerance mechanisms only work with a particular MPI implementation: ULFM (OpenMPI based).]) else AC_DEFINE(STARPU_USE_MPI_FT, [1], [whether the StarPU MPI failure tolerance mechanisms are requested]) use_mpi_ft=yes; if test x$enable_mpi_ft_stats = xyes ; then AC_DEFINE(STARPU_USE_MPI_FT_STATS, [1], [whether the StarPU MPI failure tolerance mechanisms stats are watched]) use_mpi_ft_stats=$enable_mpi_ft_stats; fi fi fi AM_CONDITIONAL(STARPU_USE_MPI_FT, [test x$use_mpi_ft = xyes]) AM_CONDITIONAL(STARPU_USE_MPI_FT_STATS, [test x$use_mpi_ft_stats = xyes]) ###### End of failure tolerance material ###### AC_ARG_WITH(mpiexec-args, [AS_HELP_STRING([--with-mpiexec-args[=]], [Arguments for mpiexec])], [ mpiexec_args=$withval ]) AC_SUBST(MPIEXEC_ARGS,$mpiexec_args) AC_MSG_CHECKING(whether MPI debug messages should be displayed) AC_ARG_ENABLE(mpi-verbose, [AS_HELP_STRING([--enable-mpi-verbose], [display MPI verbose debug messages (--enable-mpi-verbose=extra increase the verbosity)])], enable_mpi_verbose=$enableval, enable_mpi_verbose=no) AC_MSG_RESULT($enable_mpi_verbose) if test x$enable_mpi_verbose = xyes; then AC_DEFINE(STARPU_MPI_VERBOSE, [1], [display MPI verbose debug messages]) fi if test x$enable_mpi_verbose = xextra; then AC_DEFINE(STARPU_MPI_VERBOSE, [1], [display MPI verbose debug messages]) AC_DEFINE(STARPU_MPI_EXTRA_VERBOSE, [1], [display MPI verbose debug messages]) fi if test x$enable_mpi = xyes -o x$build_mpi_master_slave = xyes ; then cc_or_mpicc=$mpicc_path # For some reason, libtool uses gcc instead of mpicc when linking # libstarpumpi. # On Darwin (and maybe other systems ?) the linker will fail (undefined # references to MPI_*). We manually add the required flags to fix this # issue. # openmpi version MPICC_LDFLAGS=`$mpicc_path --showme:link 2>/dev/null` if test -z "$MPICC_LDFLAGS" then # mpich version MPICC_LDFLAGS=`$mpicc_path -link_info | awk '{$1=""; print}'` fi AC_SUBST(MPICC_LDFLAGS) else cc_or_mpicc=$CC fi AC_SUBST(CC_OR_MPICC, $cc_or_mpicc) ############################################################################### # # # NUMA memory nodes # # # ############################################################################### default_nmaxnumanodes=2 AC_PATH_PROG(hwloccalccommand, hwloc-calc) AC_MSG_CHECKING(maximum number of NUMA nodes) AC_ARG_ENABLE(maxnumanodes, [AS_HELP_STRING([--enable-maxnumanodes=], [maximum number of NUMA nodes])], nmaxnumanodes=$enableval, nmaxnumanodes=auto) if test x$nmaxnumanodes = xauto then if test "$hwloccalccommand" = ""; then AC_MSG_WARN([hwloc-calc not available to automatically get the number of NUMA nodes, using the default value: $default_nmaxnumanodes]) nmaxnumanodes=$default_nmaxnumanodes else nmaxnumanodes=$($hwloccalccommand all -N node 2>/dev/null) if test x$nmaxnumanodes = x; then AC_MSG_WARN([hwloc-calc could not get the number of NUMA nodes, using the default value: $default_nmaxnumanodes]) nmaxnumanodes=$default_nmaxnumanodes fi fi fi if test x$nmaxnumanodes = x -o x$nmaxnumanodes = xyes then AC_MSG_ERROR([The --enable-maxnumanodes option needs to be given a number]) fi AC_MSG_RESULT($nmaxnumanodes) AC_DEFINE_UNQUOTED(STARPU_MAXNUMANODES, [$nmaxnumanodes], [maximum number of NUMA nodes]) ############################################################################### AC_PATH_PROGS([STARPU_MS_LIB], [lib]) AC_ARG_VAR([STARPU_MS_LIB], [Path to Microsoft's Visual Studio `lib' tool]) AM_CONDITIONAL([STARPU_HAVE_MS_LIB], [test "x$STARPU_MS_LIB" != "x"]) case "$target" in *-*-mingw*|*-*-cygwin*|*-*-msys*) starpu_windows=yes libext=a AC_DEFINE(STARPU_HAVE_WINDOWS, [1], [Define this on windows.]) ;; *-*-linux*) starpu_linux=yes AC_DEFINE(STARPU_LINUX_SYS, [1], [Define to 1 on Linux]) ;; *-*-openbsd*) starpu_openbsd=yes AC_DEFINE(STARPU_OPENBSD_SYS, [1], [Define to 1 on OpenBSD systems]) ;; *-*darwin*) starpu_darwin=yes AC_DEFINE(STARPU_HAVE_DARWIN, [1], [Define this on darwin.]) ;; esac AM_CONDITIONAL([STARPU_HAVE_WINDOWS], [test "x$starpu_windows" = "xyes"]) AM_CONDITIONAL([STARPU_LINUX_SYS], [test "x$starpu_linux" = "xyes"]) AM_CONDITIONAL([STARPU_HAVE_DARWIN], [test "x$starpu_darwin" = "xyes"]) AM_CONDITIONAL([STARPU_OPENBSD_SYS], [test "x$starpu_openbsd" = "xyes"]) # on Darwin, GCC targets i386 by default, so we don't have atomic ops AC_CHECK_SIZEOF([void *]) SIZEOF_VOID_P=$ac_cv_sizeof_void_p case $SIZEOF_VOID_P in 4) case "$target" in i386-*darwin*) CFLAGS="$CFLAGS -march=i686" ;; esac STARPU_MS_LIB_ARCH=X86 ;; 8) STARPU_MS_LIB_ARCH=X64 ;; esac AC_SUBST(STARPU_MS_LIB_ARCH) # This will be useful for program which use CUDA (and .cubin files) which need # some path to the CUDA code at runtime. AC_DEFINE_UNQUOTED(STARPU_BUILD_DIR, "$PWD", [location of StarPU build directory]) AC_SUBST(STARPU_BUILD_DIR, $PWD) case "${srcdir}" in /*) AC_DEFINE_UNQUOTED(STARPU_SRC_DIR, "$(eval echo ${srcdir})", [location of StarPU sources]) AC_SUBST(STARPU_SRC_DIR, "$(eval echo ${srcdir})") ;; *) AC_DEFINE_UNQUOTED(STARPU_SRC_DIR, "$(eval echo $PWD/${srcdir})", [location of StarPU sources]) AC_SUBST(STARPU_SRC_DIR, "$(eval echo $PWD/${srcdir})") ;; esac case "$target" in *-*-mingw*|*-*-cygwin*) AC_ARG_ENABLE(native-winthreads, [AS_HELP_STRING([--enable-native-winthreads], [Use native windows threads instead of pthread])], enable_native_winthreads=$enableval, enable_native_winthreads=no) ;; esac if test x"$enable_native_winthreads" != xyes ; then INCLUDE_PTHREAD_H='#include ' fi AC_CHECK_HEADERS([unistd.h], [AC_DEFINE([STARPU_HAVE_UNISTD_H], [1], [Define to 1 if you have the header file.])]) AC_CHECK_TYPE([struct timespec], AC_DEFINE(STARPU_HAVE_STRUCT_TIMESPEC,[1],[struct timespec is defined]), [], [ #include #include #ifdef HAVE_UNISTD_H #include #endif #include $INCLUDE_PTHREAD_H ]) if test x"$enable_native_winthreads" = xyes ; then CPPFLAGS="$CPPFLAGS -I$STARPU_SRC_DIR/include/pthread_win32" AC_COMPILE_IFELSE( [AC_LANG_PROGRAM([[ #define STARPU_CONFIGURE #include ]], [[ pthread_t t; pthread_create(&t, NULL, NULL, NULL); ]])], AC_DEFINE(STARPU_NATIVE_WINTHREADS,[1],[Using native windows threads]), AC_MSG_ERROR([pthread_create unavailable])) else AC_CHECK_LIB([pthread], [pthread_create], [ LIBS="$LIBS -lpthread" STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS -lpthread" ]) fi AC_SEARCH_LIBS([sqrt],[m],,AC_MSG_ERROR([math library unavailable])) AC_HAVE_LIBRARY([ws2_32]) AC_CHECK_FUNCS([sysconf]) AC_CHECK_FUNCS([getrlimit]) AC_CHECK_FUNCS([scandir]) AC_CHECK_FUNC([pthread_spin_lock], have_pthread_spin_lock=yes, have_pthread_spin_lock=no) if test x$have_pthread_spin_lock = xyes; then AC_DEFINE(HAVE_PTHREAD_SPIN_LOCK,[1],[pthread_spin_lock is available]) AC_DEFINE(STARPU_HAVE_PTHREAD_SPIN_LOCK,[1],[pthread_spin_lock is available]) fi AC_CHECK_FUNC([pthread_barrier_init], have_pthread_barrier=yes, have_pthread_barrier=no) if test x$have_pthread_barrier = xyes; then AC_DEFINE(STARPU_HAVE_PTHREAD_BARRIER,[1],[pthread_barrier is available]) fi # yes, that's non portable, but it's still better than sched_setaffinity AC_CHECK_FUNCS(pthread_setaffinity_np) AC_CHECK_FUNC([pthread_setname_np], have_pthread_setname_np=yes, have_pthread_setname_np=no) if test x$have_pthread_setname_np = xyes; then AC_DEFINE(STARPU_HAVE_PTHREAD_SETNAME_NP,[1],[pthread_setname_np is available]) fi if test "x$cross_compiling" = "xno"; then STARPU_INIT_ZERO([[#include ]], pthread_mutex_t, PTHREAD_MUTEX_INITIALIZER) STARPU_INIT_ZERO([[#include ]], pthread_cond_t, PTHREAD_COND_INITIALIZER) STARPU_INIT_ZERO([[#include ]], pthread_rwlock_t, PTHREAD_RWLOCK_INITIALIZER) fi # There is no posix_memalign on Mac OS X, only memalign AC_CHECK_FUNCS([posix_memalign], [AC_DEFINE([STARPU_HAVE_POSIX_MEMALIGN], [1], [Define to 1 if you have the `posix_memalign' function.])]) AC_CHECK_FUNCS([memalign], [AC_DEFINE([STARPU_HAVE_MEMALIGN], [1], [Define to 1 if you have the `memalign' function.])]) # Some systems don't have drand48 AC_CHECK_FUNC([drand48], have_drand48=yes, have_drand48=no) AC_CHECK_FUNC([erand48_r], have_erand48_r=yes, have_erand48_r=no) # Maybe the user still does not want to use the provided drand48 AC_ARG_ENABLE(default-drand48, [AS_HELP_STRING([--disable-default-drand48], [Do not use the default version of drand48])], enable_default_drand48=$enableval, enable_default_drand48=yes) if test x$have_drand48 = xyes -a x$enable_default_drand48 = xyes ; then AC_DEFINE([STARPU_USE_DRAND48], [1], [Define to 1 if drandr48 is available and should be used]) fi if test x$have_erand48_r = xyes ; then AC_DEFINE([STARPU_USE_ERAND48_R], [1], [Define to 1 if erandr48_r is available]) fi # Some systems do not define strerror_r AC_CHECK_FUNC([strerror_r], [AC_DEFINE([STARPU_HAVE_STRERROR_R], [1], [Define to 1 if the function strerro_r is available.])]) # Some systems may not define setenv AC_CHECK_FUNC([setenv], [AC_DEFINE([STARPU_HAVE_SETENV], [1], [Define to 1 if the function setenv is available.])]) # Some systems do not define unsetenv AC_CHECK_FUNC([unsetenv], [AC_DEFINE([STARPU_HAVE_UNSETENV], [1], [Define to 1 if the function unsetenv is available.])]) # Some systems do not define nearbyintf... AC_CHECK_FUNC([nearbyintf], [AC_DEFINE([STARPU_HAVE_NEARBYINTF], [1], [Define to 1 if the function nearbyintf is available.])]) # ... but they may define rintf. AC_CHECK_FUNC([rintf], [AC_DEFINE([STARPU_HAVE_RINTF], [1], [Define to 1 if the function rintf is available.])]) # Define quick check AC_ARG_ENABLE(quick-check, [AS_HELP_STRING([--enable-quick-check], [Lower default values for the testcases run by make check to allow a faster execution])], enable_quick_check=$enableval, enable_quick_check=no) if test x$enable_quick_check = xyes; then AC_DEFINE(STARPU_QUICK_CHECK, [1], [enable quick check]) fi AM_CONDITIONAL([STARPU_QUICK_CHECK], [test "x$enable_quick_check" = "xyes"]) # Define long check AC_ARG_ENABLE(long-check, [AS_HELP_STRING([--enable-long-check], [Enable some exhaustive checks which take a really long time])], enable_long_check=$enableval, enable_long_check=no) if test x$enable_long_check = xyes; then AC_DEFINE(STARPU_LONG_CHECK, [1], [enable long check]) fi AM_CONDITIONAL([STARPU_LONG_CHECK], [test "x$enable_long_check" = "xyes"]) # Define new check AC_ARG_ENABLE(new-check, [AS_HELP_STRING([--enable-new-check], [Enable new and known-to-fail testcases])], enable_new_check=$enableval, enable_new_check=no) if test x$enable_new_check = xyes; then AC_DEFINE(STARPU_NEW_CHECK, [1], [enable new check]) fi AM_CONDITIONAL([STARPU_NEW_CHECK], [test "x$enable_new_check" = "xyes"]) AC_CHECK_HEADERS([malloc.h], [AC_DEFINE([STARPU_HAVE_MALLOC_H], [1], [Define to 1 if you have the header file.])]) AC_ARG_ENABLE(valgrind, [AS_HELP_STRING([--disable-valgrind], [Do not check the availability of valgrind.h and helgrind.h])], enable_valgrind=$enableval, enable_valgrind=yes) if test "$enable_valgrind" != "no" ; then AC_CHECK_HEADERS([valgrind/valgrind.h], [AC_DEFINE([STARPU_HAVE_VALGRIND_H], [1], [Define to 1 if you have the header file.])]) AC_CHECK_HEADERS([valgrind/memcheck.h], [AC_DEFINE([STARPU_HAVE_MEMCHECK_H], [1], [Define to 1 if you have the header file.])]) AC_CHECK_HEADERS([valgrind/helgrind.h], [AC_DEFINE([STARPU_HAVE_HELGRIND_H], [1], [Define to 1 if you have the header file.])]) fi if test "$enable_valgrind" = "full" ; then AC_DEFINE([STARPU_VALGRIND_FULL], [1], [Define to 1 to disable STARPU_SKIP_IF_VALGRIND when running tests.]) fi AC_CHECK_FUNC([sched_yield], [AC_DEFINE([STARPU_HAVE_SCHED_YIELD], [1], [Define to 1 if the function sched_yield is available.])]) AC_CHECK_HEADERS([aio.h]) AC_CHECK_LIB([rt], [aio_read]) #AC_CHECK_HEADERS([libaio.h]) #AC_CHECK_LIB([aio], [io_setup]) AC_CHECK_FUNCS([copy_file_range]) AC_CHECK_FUNCS([mkostemp]) AC_CHECK_FUNCS([mkdtemp]) AC_CHECK_FUNCS([pread pwrite]) # Depending on the user environment, the hdf5 library may link against some # mpi implementation, and bring surprising runtime behavior. AC_ARG_ENABLE(hdf5, [AS_HELP_STRING([--enable-hdf5], [enable HDF5 support])], enable_hdf5=$enableval, enable_hdf5=no) if test "x$enable_hdf5" != xno ; then AC_ARG_WITH(hdf5-include-dir, [AS_HELP_STRING([--with-hdf5-include-dir=], [specify where HDF5 headers are installed])], [ hdf5_include_dir="$withval" ], [hdf5_include_dir=""]) hdf5_inc_dir="/usr/include/hdf5 /usr/include/hdf5/serial ${hdf5_include_dir}" enable_include_hdf5=no for f in $hdf5_inc_dir; do if test -n "$f" ; then SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS="$CPPFLAGS -I$f" AC_CHECK_HEADERS([hdf5.h]) if test "$ac_cv_header_hdf5_h" = "yes" ; then CPPFLAGS="-I${f} ${SAVED_CFLAGS}" enable_include_hdf5=yes break else CPPFLAGS=${SAVED_CPPFLAGS} fi unset ac_cv_header_hdf5_h fi done AC_ARG_WITH(hdf5-lib-dir, [AS_HELP_STRING([--with-hdf5-lib-dir=], [specify where HDF5 libraries are installed])], [ hdf5_libraries_dir="$withval" ], [hdf5_libraries_dir=""]) hdf5_lib_dir="/usr/lib/x86_64-linux-gnu/hdf5 /usr/lib/x86_64-linux-gnu/hdf5/serial ${hdf5_libraries_dir}" enable_libraries_hdf5=no for f in $hdf5_lib_dir; do if test -n "$f" ; then SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS=-L${f} STARPU_HAVE_LIBRARY(HDF5, [hdf5]) if test "$ac_cv_lib_hdf5_main" = "yes" ; then LDFLAGS="-L${f} ${SAVED_LDFLAGS} ${STARPU_HDF5_LDFLAGS}" enable_libraries_hdf5=yes break else LDFLAGS=${SAVED_LDFLAGS} fi unset ac_cv_lib_hdf5_main fi done fi if test "x$enable_libraries_hdf5" = "xyes" -a "x$enable_include_hdf5" = "xyes" -a "x$enable_hdf5" != "xno"; then AC_DEFINE([STARPU_HAVE_HDF5], [1], [Define to 1 if you have the header file.]) enable_hdf5=yes else enable_hdf5=no fi AM_CONDITIONAL(STARPU_HAVE_HDF5, test "x$enable_hdf5" = "xyes") # This defines HAVE_SYNC_VAL_COMPARE_AND_SWAP STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP_8 # This defines HAVE_SYNC_BOOL_COMPARE_AND_SWAP STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP_8 # This defines HAVE_SYNC_FETCH_AND_ADD STARPU_CHECK_SYNC_FETCH_AND_ADD STARPU_CHECK_SYNC_FETCH_AND_ADD_8 # This defines HAVE_SYNC_FETCH_AND_OR STARPU_CHECK_SYNC_FETCH_AND_OR STARPU_CHECK_SYNC_FETCH_AND_OR_8 # This defines HAVE_SYNC_LOCK_TEST_AND_SET STARPU_CHECK_SYNC_LOCK_TEST_AND_SET # This defines HAVE_ATOMIC_COMPARE_EXCHANGE_N STARPU_CHECK_ATOMIC_COMPARE_EXCHANGE_N STARPU_CHECK_ATOMIC_COMPARE_EXCHANGE_N_8 # This defines HAVE_ATOMIC_EXCHANGE_N STARPU_CHECK_ATOMIC_EXCHANGE_N STARPU_CHECK_ATOMIC_EXCHANGE_N_8 # This defines HAVE_ATOMIC_FETCH_ADD STARPU_CHECK_ATOMIC_FETCH_ADD STARPU_CHECK_ATOMIC_FETCH_ADD_8 # This defines HAVE_ATOMIC_FETCH_OR STARPU_CHECK_ATOMIC_FETCH_OR STARPU_CHECK_ATOMIC_FETCH_OR_8 # This defines HAVE_ATOMIC_TEST_AND_SET STARPU_CHECK_ATOMIC_TEST_AND_SET # This defines HAVE_SYNC_SYNCHRONIZE STARPU_CHECK_SYNC_SYNCHRONIZE CPPFLAGS="${CPPFLAGS} -D_GNU_SOURCE " STARPU_SEARCH_LIBS([LIBNUMA],[set_mempolicy],[numa],[enable_libnuma=yes],[enable_libnuma=no]) AC_MSG_CHECKING(whether libnuma is available) AC_MSG_RESULT($enable_libnuma) if test x$enable_libnuma = xyes; then AC_DEFINE(STARPU_HAVE_LIBNUMA,[1],[libnuma is available]) fi AC_MSG_CHECKING(whether statement expressions are available) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ #define maxint(a,b) ({int _a = (a), _b = (b); _a > _b ? _a : _b; }) ]], [[ int x=maxint(12,42); ]])], [statement_expressions="yes"], [statement_expressions="no"]) AC_MSG_RESULT($statement_expressions) if test x$statement_expressions = xyes; then AC_DEFINE(STARPU_HAVE_STATEMENT_EXPRESSIONS,[1],[statement expressions are available]) fi saved_LIBS="${LIBS}" LIBS="${LIBS} -ldl" STARPU_DLOPEN_LDFLAGS="" AC_CHECK_FUNCS([dlopen], [STARPU_DLOPEN_LDFLAGS="-ldl"]) LIBS="$saved_LIBS" ############################################################################### # # # SCHED_CTX settings # # # ############################################################################### AC_MSG_CHECKING(maximum number of sched_ctxs) AC_ARG_ENABLE(max_sched_ctxs, [AS_HELP_STRING([--enable-max-sched-ctxs=], [maximum number of sched_ctxs])], max_sched_ctxs=$enableval, max_sched_ctxs=10) if test x$max_sched_ctxs = x -o x$max_sched_ctxs = xyes then AC_MSG_ERROR([The --enable-max_sched_ctxs option needs to be given a number]) fi AC_MSG_RESULT($max_sched_ctxs) AC_DEFINE_UNQUOTED(STARPU_NMAX_SCHED_CTXS, [$max_sched_ctxs], [Maximum number of sched_ctxs supported]) AC_ARG_ENABLE([sc_hypervisor], [AS_HELP_STRING([--enable-sc-hypervisor], [enable resizing contexts (experimental)])], [enable_sc_hypervisor="yes"], [enable_sc_hypervisor="no"]) #for pkgconfig AC_SUBST(STARPU_SC_HYPERVISOR) if test "x$enable_sc_hypervisor" = "xyes"; then AC_DEFINE(STARPU_USE_SC_HYPERVISOR, [1], [enable sc_hypervisor lib]) # PKG_CHECK_MODULES([SC_HYPERVISOR], [libsc_hypervisor], [], build_sc_hypervisor="yes") STARPU_SC_HYPERVISOR="-lsc_hypervisor" build_sc_hypervisor="yes" else build_sc_hypervisor="no" fi AM_CONDITIONAL([STARPU_BUILD_SC_HYPERVISOR], [test "x$build_sc_hypervisor" = "xyes"]) AM_CONDITIONAL([STARPU_USE_SC_HYPERVISOR], [test "x$build_sc_hypervisor" = "xyes"]) AC_ARG_ENABLE([sc_hypervisor_debug], [AS_HELP_STRING([--enable-sc-hypervisor-debug], [enable debug for resizing contexts (experimental)])], [enable_sc_hypervisor_debug="yes"], [enable_sc_hypervisor_debug="no"]) AC_SUBST(STARPU_SC_HYPERVISOR_DEBUG, $enable_sc_hypervisor_debug) AM_CONDITIONAL([STARPU_SC_HYPERVISOR_DEBUG], [test "x$enable_sc_hypervisor_debug" = "xyes"]) if test "x$enable_sc_hypervisor_debug" = "xyes"; then AC_DEFINE(STARPU_SC_HYPERVISOR_DEBUG, [1], [enable debug sc_hypervisor]) fi ############################################################################### # # # CPUs settings # # # ############################################################################### AC_MSG_CHECKING(maximum number of CPUs) AC_ARG_ENABLE(maxcpus, [AS_HELP_STRING([--enable-maxcpus=], [maximum number of CPUs])], maxcpus=$enableval, maxcpus=auto) if test x$maxcpus = xauto then confcpu=$(getconf _NPROCESSORS_ONLN 2>/dev/null) if test x$confcpu = x then AC_MSG_ERROR([cannot get the number of CPUS, please specify a numerical value with --enable-maxcpus]) fi maxcpus=2 while test $maxcpus -lt $confcpu do maxcpus=`expr $maxcpus \* 2` done fi if test x$maxcpus = x -o x$maxcpus = xyes then AC_MSG_ERROR([The --enable-maxcpus option needs to be given a number]) fi AC_MSG_RESULT($maxcpus) AC_DEFINE_UNQUOTED(STARPU_MAXCPUS, [$maxcpus], [Maximum number of CPUs supported]) AC_MSG_CHECKING(whether CPUs should be used) AC_ARG_ENABLE(cpu, [AS_HELP_STRING([--disable-cpu], [do not use the CPU(s)])], enable_cpu=$enableval, enable_cpu=yes) AC_MSG_RESULT($enable_cpu) AC_SUBST(STARPU_USE_CPU, $enable_cpu) AM_CONDITIONAL(STARPU_USE_CPU, test x$enable_cpu = xyes) if test x$enable_cpu = xyes; then AC_DEFINE(STARPU_USE_CPU, [1], [CPU driver is activated]) fi ############################################################################### # # # CUDA settings # # # ############################################################################### AC_MSG_CHECKING(maximum number of CUDA devices) AC_ARG_ENABLE(maxcudadev, [AS_HELP_STRING([--enable-maxcudadev=], [maximum number of CUDA devices])], nmaxcudadev=$enableval, nmaxcudadev=4) if test x$nmaxcudadev = x -o x$nmaxcudadev = xyes then AC_MSG_ERROR([The --enable-maxcudadev option needs to be given a number]) fi AC_MSG_RESULT($nmaxcudadev) AC_DEFINE_UNQUOTED(STARPU_MAXCUDADEVS, [$nmaxcudadev], [maximum number of CUDA devices]) AC_ARG_ENABLE(cuda, [AS_HELP_STRING([--disable-cuda], [do not use CUDA device(s)])],, [enable_cuda=maybe]) # We don't want to be hit by conflicts between simgrid, boost, and CUDA if test x$enable_simgrid = xyes; then if test x$enable_cuda = xyes; then AC_MSG_ERROR([Building against CUDA should not be enabled with simgrid]) fi enable_cuda=no fi #AC_MSG_CHECKING(whether CUDA is available) AC_ARG_WITH(cuda-dir, [AS_HELP_STRING([--with-cuda-dir=], [specify CUDA installation directory])], [ cuda_dir="$withval" # in case this was not explicit yet enable_cuda=yes ], cuda_dir=no) AC_ARG_WITH(cuda-include-dir, [AS_HELP_STRING([--with-cuda-include-dir=], [specify where CUDA headers are installed])], [ cuda_include_dir="$withval" # in case this was not explicit yet enable_cuda=yes ], [cuda_include_dir=no]) AC_ARG_WITH(cuda-lib-dir, [AS_HELP_STRING([--with-cuda-lib-dir=], [specify where CUDA libraries are installed])], [ cuda_lib_dir="$withval" # in case this was not explicit yet enable_cuda=yes ], [cuda_lib_dir=no]) AC_DEFUN([STARPU_CHECK_CUDA_L], [ __cuda_L=$1 SAVED_LDFLAGS="${LDFLAGS}" STARPU_CUDA_LDFLAGS="${__cuda_L}" AC_MSG_CHECKING(whether CUDA library is available in $__cuda_L) AC_MSG_RESULT() LDFLAGS="${SAVED_LDFLAGS} ${__cuda_L}" AC_HAVE_LIBRARY([cudart],[have_valid_cuda=yes],[have_valid_cuda=no]) unset ac_cv_lib_cudart_main if test "$have_valid_cuda" = yes ; then LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}" # we also check that CUBLAS is available AC_HAVE_LIBRARY([cublas],[have_valid_cuda=yes],[have_valid_cuda=no]) unset ac_cv_lib_cublas_main fi LDFLAGS="${SAVED_LDFLAGS}" ]) AC_DEFUN([STARPU_CHECK_CUDA], [ __cuda_dir=$1 __cuda_include_dir=$2 __cuda_lib_dir=$3 if test -z "$__cuda_lib_dir" ; then __cuda_lib_dir=no fi if test -z "$__cuda_include_dir" ; then __cuda_include_dir=no fi if test -z "$__cuda_dir" ; then __cuda_dir=no fi if test "$__cuda_dir" != "no" ; then AC_MSG_CHECKING(whether CUDA is available in $__cuda_dir, $__cuda_include_dir and $__cuda_lib_dir) else AC_MSG_CHECKING(whether CUDA is available) fi AC_MSG_RESULT() if test "$__cuda_include_dir" = "no" -a "$__cuda_dir" != "no" ; then __cuda_include_dir="$__cuda_dir/include" fi SAVED_CPPFLAGS="$CPPFLAGS" have_valid_cuda=no if test "$__cuda_include_dir" != "no" ; then CPPFLAGS="${CPPFLAGS} -I$__cuda_include_dir" fi AC_CHECK_HEADER([cuda.h],[have_valid_cuda=yes],[have_valid_cuda=no]) unset ac_cv_header_cuda_h if test "$have_valid_cuda" = "yes" ; then if test "$__cuda_lib_dir" != "no" ; then STARPU_CHECK_CUDA_L("-L${__cuda_lib_dir}") else if test "$__cuda_dir" != "no" ; then for __cuda_libdir in lib64 lib lib/x64 lib/Win32 ; do STARPU_CHECK_CUDA_L("-L${__cuda_dir}/${__cuda_libdir}") if test "$have_valid_cuda" = yes ; then break fi done else STARPU_CHECK_CUDA_L("") fi fi fi if test "$have_valid_cuda" = "no" ; then CPPFLAGS="${SAVED_CPPFLAGS}" unset STARPU_CUDA_LDFLAGS else if test "$NVCC" = "" ; then AC_PATH_PROG([NVCC], [nvcc], [not-found], [$cuda_dir/bin:$PATH:/usr/local/cuda/bin:/usr/bin:/bin]) fi if test "x$NVCC" = "xnot-found"; then AC_MSG_WARN(['nvcc' not found, disabling CUDA]) have_valid_cuda=no else # This is for very old cuda, to enable the use of double etc. AC_MSG_CHECKING(whether nvcc supports sm_13 architecture) OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -arch sm_13" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then AC_MSG_RESULT(yes) else AC_MSG_RESULT(no) NVCCFLAGS="$OLD_NVCCFLAGS" fi # This is for recent cuda, which complains if we don't actually set an arch!? AC_MSG_CHECKING(whether nvcc supports -Wno-deprecated-gpu-targets) OLD_NVCCFLAGS="$NVCCFLAGS" NVCCFLAGS="$NVCCFLAGS -Wno-deprecated-gpu-targets" echo "int main(int argc, char **argv) { return 0;}" > cuda_test.cu $NVCC $NVCCFLAGS -c cuda_test.cu >/dev/null 2>&1 if test $? -eq 0 then AC_MSG_RESULT(yes) else AC_MSG_RESULT(no) NVCCFLAGS="$OLD_NVCCFLAGS" fi rm -f cuda_test* fi if test -n "$NVCC_CC"; then NVCCFLAGS="${NVCCFLAGS} -ccbin \${NVCC_CC}" fi if test "$__cuda_include_dir" != "no"; then STARPU_CUDA_CPPFLAGS="-I$__cuda_include_dir" NVCCFLAGS="${NVCCFLAGS} -I$__cuda_include_dir" fi fi ]) if test x$enable_cuda = xyes -o x$enable_cuda = xmaybe; then STARPU_CHECK_CUDA("$cuda_dir", "$cuda_include_dir", "$cuda_lib_dir") if test "$have_valid_cuda" = "no" ; then STARPU_CHECK_CUDA("$CUDA_ROOT", "$CUDA_INC_PATH", "$CUDA_LIB_PATH") fi if test "$have_valid_cuda" = "no" ; then if test "$NVCC" = "" ; then AC_PATH_PROG([NVCC], [nvcc], [not-found], [$PATH:/usr/local/cuda/bin]) fi if test "$NVCC" != not-found ; then CUDA_ROOT="$(dirname $NVCC)/.." # Try to find all of cuda just from the availability of nvcc in PATH STARPU_CHECK_CUDA("$CUDA_ROOT", "$CUDA_ROOT/include", "$CUDA_ROOT/lib") cuda_dir=$(dirname $NVCC)/.. else unset NVCC fi fi if test "$have_valid_cuda" = "no" ; then for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_ROOT" "$CUDA_PATH" "$CUDA_INC_PATH/.." "$CUDA_INC/.." "$CUDA_BIN/.." "$CUDA_SDK/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do if test -n "$f" ; then STARPU_CHECK_CUDA("$f", "no", "no") if test "$have_valid_cuda" = "yes" ; then break fi fi done fi # Check cuda is compatible with the C compiler AC_MSG_CHECKING(whether CUDA is working) if test "$have_valid_cuda" = "yes" ; then SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}" SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS} -lcudart" AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [[#include ]], [[]] )], [ AC_RUN_IFELSE([AC_LANG_PROGRAM( [[#include ]], [[]] )], [have_valid_cuda="yes"], [ AC_MSG_RESULT([CUDA found and can be compiled, but compiled application can not be run, is the CUDA path missing in LD_LIBRARY_PATH?]) have_valid_cuda="no" ]) ], [ AC_MSG_ERROR([CUDA found, but cuda.h could not be compiled]) have_valid_cuda="no" ] ) CPPFLAGS="${SAVED_CPPFLAGS}" LDFLAGS="${SAVED_LDFLAGS}" fi AC_MSG_RESULT($have_valid_cuda) # in case CUDA was explicitly required, but is not available, this is an error if test x$enable_cuda = xyes -a x$have_valid_cuda = xno; then AC_MSG_ERROR([cannot find CUDA]) fi # now we enable CUDA if and only if a proper setup is available enable_cuda=$have_valid_cuda fi AC_MSG_CHECKING(whether CUDA should be used) AC_MSG_RESULT($enable_cuda) AC_SUBST(STARPU_USE_CUDA, $enable_cuda) AM_CONDITIONAL(STARPU_USE_CUDA, test x$enable_cuda = xyes) cc_or_nvcc=$CC if test x$enable_cuda = xyes; then cc_or_nvcc=$NVCC AC_DEFINE(STARPU_USE_CUDA, [1], [CUDA support is activated]) # On Darwin, the libstdc++ dependency is not automatically added by nvcc # case "$target" in # *-*darwin*) AC_HAVE_LIBRARY([stdc++], []) ;; # #*-*darwin*) AC_HAVE_LIBRARY([stdc++], [STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lstdc++"]) ;; # esac STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcudart -lcublas" STARPU_CUFFT_LDFLAGS="-lcufft" AC_LANG_PUSH([C++]) case \ $NVCCFLAGS\ in *\ -std=*\ *) ;; *) SAVED_CXX="$CXX" CXX="$NVCC" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ #ifdef STARPU_HAVE_SIMGRID_MSG_H #include #include #else #include #endif ]])],, NVCCFLAGS="-std=c++11 $NVCCFLAGS") CXX="$SAVED_CXX" esac AC_LANG_POP([C++]) if test "$F77" = "gfortran" -o "$FC" = "gfortran" ; then STARPU_CUDA_FORTRAN_LDFLAGS="-lgfortran" AC_SUBST(STARPU_CUDA_FORTRAN_LDFLAGS) fi #in case this is a 64bit setup, we tell nvcc to use a -m64 flag, if missing from existing flags if test x$SIZEOF_VOID_P = x8; then case \ $NVCCFLAGS\ in *\ -m64\ *) ;; *) NVCCFLAGS="${NVCCFLAGS} -m64" ;; esac fi SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}" SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" SAVED_LIBS="${LIBS}" AC_CHECK_HEADERS([cuda_gl_interop.h]) AC_CHECK_HEADERS([cublasLt.h], [ AC_CHECK_LIB([cublasLt], [cublasLtCreate], [AC_DEFINE([STARPU_HAVE_LIBCUBLASLT], [1], [Define to 1 if you have the cublasLt library]) STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcublasLt"]) ]) AC_CHECK_LIB([cusparse], [cusparseCreate], [AC_DEFINE([STARPU_HAVE_LIBCUSPARSE], [1], [Define to 1 if you have the cusparse library]) STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcusparse"]) AC_CHECK_DECLS([cusparseSetStream], [], [], [[#include ]]) # we also check that CuSolver is available AC_CHECK_LIB([cusolver],[cusolverDnCreate], [AC_DEFINE([STARPU_HAVE_LIBCUSOLVER], [1], [Define to 1 if you have the cusolver library]) STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcusolver"]) AC_MSG_CHECKING(whether nvidia-ml can be used) AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [[#include ]], [[ __typeof__(nvmlInit) *mynvmlInit = nvmlInit; mynvmlInit(); ]] )], [ AC_DEFINE([STARPU_HAVE_NVML_H], [1], [Define to 1 if you have the nvml.h header]) AC_MSG_RESULT(yes) AC_CHECK_DECLS([nvmlDeviceGetTotalEnergyConsumption], [], [], [[#include ]]) ], [ AC_MSG_RESULT(no) AC_MSG_WARN([nvml.h could not be compiled. This will prevent from correct understanding of the machine topology.]) NO_NVML="Warning: no nvml.h found" ] ) CPPFLAGS="${SAVED_CPPFLAGS}" LDFLAGS="${SAVED_LDFLAGS}" LIBS="${SAVED_LIBS}" fi AC_SUBST(CC_OR_NVCC, $cc_or_nvcc) have_magma=no if test x$enable_cuda = xyes; then PKG_CHECK_MODULES([MAGMA], [magma], [ AC_DEFINE([STARPU_HAVE_MAGMA], [1], [Define to 1 if you have the MAGMA library.]) AC_SUBST([STARPU_HAVE_MAGMA], [1]) have_magma=yes ], [:]) fi AM_CONDITIONAL(STARPU_HAVE_MAGMA, [test x$have_magma = xyes]) AC_MSG_CHECKING(whether MAGMA should be used) AC_MSG_RESULT($have_magma) # cufftDoubleComplex may not be available on an old CUDA setup AC_CHECK_TYPE(cufftDoubleComplex, [have_cufftdoublecomplex=yes], [have_cufftdoublecomplex=no], [#include ]) AM_CONDITIONAL(STARPU_HAVE_CUFFTDOUBLECOMPLEX, test x$have_cufftdoublecomplex = xyes) if test x$have_cufftdoublecomplex = xyes; then AC_DEFINE(STARPU_HAVE_CUFFTDOUBLECOMPLEX, [1], [cufftDoubleComplex is available]) fi # The CURAND library is only available since CUDA 3.2 have_curand=$enable_cuda if test x$enable_cuda = xyes; then SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" AC_HAVE_LIBRARY([curand],[have_curand=yes],[have_curand=no]) LDFLAGS="${SAVED_LDFLAGS}" fi AC_MSG_CHECKING(whether CURAND is available) AC_MSG_RESULT($have_curand) if test x$have_curand = xyes; then AC_DEFINE(STARPU_HAVE_CURAND,[1], [CURAND is available]) STARPU_CURAND_LDFLAGS="$STARPU_CURAND_LDFLAGS -lcurand" AC_SUBST(STARPU_CURAND_LDFLAGS) fi # Peer transfers are only supported since CUDA 4.0 # Disable them if user explicitly wants to disable them AC_ARG_ENABLE(cuda_memcpy_peer, [AS_HELP_STRING([--disable-cuda-memcpy-peer], [do not allow peer transfers when using CUDA 4.0])],, [enable_cuda_memcpy_peer=$enable_cuda]) if test x$enable_cuda_memcpy_peer = xyes; then AC_DEFINE(STARPU_HAVE_CUDA_MEMCPY_PEER,[1],[Peer transfers are supported in CUDA]) fi AC_ARG_ENABLE(cuda_map, [AS_HELP_STRING([--disable-cuda-map], [do not allow CUDA memory mapping when available])],, [enable_cuda_map=yes]) if test x$enable_cuda_map = xyes -a x$enable_cuda = xyes ; then SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} ${STARPU_CUDA_LDFLAGS}" AC_CHECK_MEMBER([struct cudaDeviceProp.canMapHostMemory], AC_DEFINE([STARPU_HAVE_CUDA_CANMAPHOST],[1],[Define to 1 if CUDA device properties include canMapHostMemory]), , [[#include ]]) AC_CHECK_MEMBER([struct cudaDeviceProp.unifiedAddressing], AC_DEFINE([STARPU_HAVE_CUDA_UNIFIEDADDR],[1],[Define to 1 if CUDA device properties include unifiedAddressing]), , [[#include ]]) AC_CHECK_MEMBER([struct cudaDeviceProp.managedMemory], AC_DEFINE([STARPU_HAVE_CUDA_MNGMEM],[1],[Define to 1 if CUDA device properties include managedMemory]), , [[#include ]]) AC_CHECK_MEMBER([struct cudaDeviceProp.pageableMemoryAccess], AC_DEFINE([STARPU_HAVE_CUDA_PAGEABLEMEM],[1],[Define to 1 if CUDA device properties include pageableMemoryAccess]), , [[#include ]]) AC_CHECK_MEMBER([struct cudaPointerAttributes.type], AC_DEFINE([STARPU_HAVE_CUDA_POINTER_TYPE],[1],[Define to 1 if CUDA pointer attributes include a type field instead of old memoryType field]), , [[#include ]]) LDFLAGS="${SAVED_LDFLAGS}" AC_DEFINE(STARPU_USE_CUDA_MAP,[1],[Define to 1 if CUDA Mapped host memory may be used]) fi if test x$enable_cuda = xyes; then AC_ARG_ENABLE(cuda0, [AS_HELP_STRING([--enable-cuda0], [Enable the minimal-support CUDA driver (only for testing)])],, [enable_cuda0=no]) if test x$enable_cuda0 = xyes; then AC_DEFINE(STARPU_USE_CUDA0,[1],[Define to 1 if the CUDA0 driver is to be tested]) fi AC_ARG_ENABLE(cuda1, [AS_HELP_STRING([--enable-cuda0], [Enable the small-support CUDA driver (only for testing)])],, [enable_cuda1=no]) if test x$enable_cuda1 = xyes; then AC_DEFINE(STARPU_USE_CUDA1,[1],[Define to 1 if the CUDA1 driver is to be tested]) fi if test x$starpu_windows != xyes ; then STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lstdc++" fi AC_SUBST(STARPU_CUDA_LDFLAGS) AC_SUBST(STARPU_CUFFT_LDFLAGS) AC_SUBST(STARPU_CUDA_CPPFLAGS) fi AM_CONDITIONAL(STARPU_USE_CUDA0, test x$enable_cuda0 = xyes) AM_CONDITIONAL(STARPU_USE_CUDA1, test x$enable_cuda1 = xyes) AC_ARG_VAR([NVCC], [CUDA compiler]) AC_ARG_VAR([NVCC_CC], [C compiler for CUDA compiler]) AC_ARG_VAR([NVCCFLAGS], [CUDA compiler flags]) ############################################################################### # # # HIP settings # # # ############################################################################### AC_MSG_CHECKING(maximum number of HIP devices) AC_ARG_ENABLE(maxhipdev, [AS_HELP_STRING([--enable-maxhipdev=], [maximum number of HIP devices])], nmaxhipdev=$enableval, nmaxhipdev=8) if test x$nmaxhipdev = x -o x$nmaxhipdev = xyes then AC_MSG_ERROR([The --enable-maxhipdev option needs to be given a number]) fi AC_MSG_RESULT($nmaxhipdev) AC_DEFINE_UNQUOTED(STARPU_MAXHIPDEVS, [$nmaxhipdev], [maximum number of HIP devices]) AC_ARG_ENABLE(hip, [AS_HELP_STRING([--enable-hip], [Enable the minimal-support HIP driver (only for testing)])],, [enable_hip=maybe]) if test x$enable_cuda = xyes; then # hip_runtime.h conflicts with cuda_runtime.h # see https://github.com/ROCm-Developer-Tools/HIP/issues/2703 if test x$enable_hip = xyes ; then AC_MSG_WARN([Disabling HIP as CUDA is enabled, see https://github.com/ROCm-Developer-Tools/HIP/issues/2703]) fi enable_hip=no fi if test x$enable_simgrid = xyes; then if test x$enable_hip = xyes; then AC_MSG_ERROR([HIP not supported with simgrid]) fi enable_hip=no fi have_valid_hip=no if test x$enable_hip != xno; then AC_PATH_PROG([HIPCONFIG], [hipconfig], [not-found]) if test "x$HIPCONFIG" = "xnot-found"; then if test x$enable_hip = xyes; then AC_MSG_ERROR(['hipconfig' not found for HIP support]) fi have_valid_hip=no else HIP_PLATFORM="$(hipconfig --platform)" HIP_DIR="$(hipconfig --path)" HIP_LIB_DIR="$HIP_DIR/lib" HIP_INCLUDE_DIR="$HIP_DIR/include" STARPU_HIP_CPPFLAGS="$(hipconfig --cpp_config | tr -d '\n') -L$HIP_LIB_DIR" if test "$HIP_PLATFORM" = "nvidia"; then STARPU_HIP_CPPFLAGS="$STARPU_HIP_CPPFLAGS -DSTARPU_HIP_PLATFORM_NVIDIA" fi if test "$HIP_PLATFORM" = "amd"; then STARPU_HIP_CPPFLAGS="$STARPU_HIP_CPPFLAGS -DSTARPU_HIP_PLATFORM_AMD" fi HIP_CLANG_PATH="$(hipconfig --hipclangpath)" have_valid_hip=yes AC_ARG_WITH([hipblas], [AS_HELP_STRING([--with-hipblas=], [specify where hipblas is installed])], [custom_hipblas_dir="$withval"], []) if test x$custom_hipblas_dir != x; then HIPBLAS_INCLUDE_DIR="$custom_hipblas_dir/include" HIPBLAS_LIB_DIR="$custom_hipblas_dir/lib" STARPU_HIPBLAS_DIRS="-I$HIPBLAS_INCLUDE_DIR -L$HIPBLAS_LIB_DIR" fi HIPCCFLAGS="$HIPCCFLAGS $STARPU_HIP_CPPFLAGS" fi fi if test "$HIP_PLATFORM" = "amd"; then SAVED_LIBS=${LIBS} SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS="${LDFLAGS} -L$HIP_LIB_DIR" AC_SEARCH_LIBS([hipMemGetInfo],[amdhip64],,have_valid_hip=no) LDFLAGS="${SAVED_LDFLAGS}" LIBS=${SAVED_LIBS} fi if test x$have_valid_hip = xyes; then SAVED_CPPFLAGS="$CPPFLAGS" CPPFLAGS="${CPPFLAGS} $STARPU_HIPBLAS_DIRS $STARPU_HIP_CPPFLAGS " AC_CHECK_HEADERS([hip/hip_runtime.h hip/hip_runtime_api.h],[have_valid_hip=yes],[have_valid_hip=no]) if test x$custom_hipblas_dir != x; then AC_CHECK_HEADER([$custom_hipblas_dir/include/hipblas.h],[have_valid_hipblas=yes],[have_valid_hipblas=no]) AC_MSG_WARN(['hipblas' custom]) else AC_CHECK_HEADER([hipblas/hipblas.h],[have_valid_hipblas=yes],[have_valid_hipblas=no]) AC_MSG_WARN(['hipblas' default]) fi if test x$have_valid_hipblas = xyes; then AC_HAVE_LIBRARY([hipblas],[have_valid_hipblas=yes],[have_valid_hipblas=no]) fi if test "$HIP_PLATFORM" = "amd"; then if test x$have_valid_hipblas = xyes; then AC_CHECK_HEADERS([rocblas/rocblas.h],[have_valid_hipblas=yes],[have_valid_hipblas=no]) fi if test x$have_valid_hipblas = xyes; then AC_HAVE_LIBRARY([rocblas],[have_valid_hipblas=yes],[have_valid_hipblas=no]) fi fi if test x$have_valid_hipblas = xyes; then AC_DEFINE([STARPU_USE_HIPBLAS], [1], [HIPBLAS support is enabled]) if test x$custom_hipblas_dir != x; then HIPCCFLAGS="$HIPCCFLAGS -I$HIPBLAS_INCLUDE_DIR" STARPU_HIPBLAS_LDFLAGS="-L$HIPBLAS_LIB_DIR" fi STARPU_HIPBLAS_LDFLAGS="$STARPU_HIPBLAS_LDFLAGS -lhipblas" if test "$HIP_PLATFORM" = "amd"; then STARPU_HIPBLAS_LDFLAGS="$STARPU_HIPBLAS_LDFLAGS -lrocblas" fi else AC_MSG_WARN(['hipblas' not found, disabling HIP examples]) fi CPPFLAGS="${SAVED_CPPFLAGS}" fi AC_SUBST(STARPU_USE_HIPBLAS, $have_valid_hipblas) AM_CONDITIONAL(STARPU_USE_HIPBLAS, test x$have_valid_hipblas = xyes) if test x$have_valid_hip = xyes; then if test -z "$HIP_DIR"; then have_valid_hip=no fi if test -z "$HIP_LIB_DIR"; then have_valid_hip=no fi if test -z "$HIP_INCLUDE_DIR"; then have_valid_hip=no fi if test "$HIPCC" = ""; then AC_PATH_PROG([HIPCC], [hipcc], [not-found], [$HIP_CLANG_PATH:$PATH:/usr/bin:/bin]) fi #testing if hipcc is defined, if not => STARPU_USE_HIP undefined if test "x$HIPCC" = "xnot-found"; then AC_MSG_WARN(['hipcc' not found, disabling HIP]) have_valid_hip=no fi if test "$HIP_PLATFORM" = "nvidia"; then HIPCCFLAGS="$HIPCCFLAGS --x cu" fi if test "x$have_valid_hip" = xyes; then AC_MSG_CHECKING(whether $HIPCC is working) rm -f conftest.hip conftest.o touch conftest.hip AS_IF([$HIPCC $HIPCCFLAGS conftest.hip -o conftest.o -c $STARPU_HIP_CPPFLAGS], [AC_MSG_RESULT(yes)], [ AC_MSG_RESULT(no) AC_MSG_WARN(['hipcc' does not work, disabling HIP]) have_valid_hip=no ]) fi fi # in case HIP was explicitly required, but is not available, this is an error if test x$enable_hip = xyes -a x$have_valid_hip = xno; then AC_MSG_ERROR([cannot find HIP]) fi # now we enable HIP if and only if a proper setup is available enable_hip=$have_valid_hip if test "x$enable_hip" = xyes; then AC_DEFINE(STARPU_USE_HIP,[1],[Define to 1 if the HIP driver is to be tested]) if test "$HIP_PLATFORM" = "nvidia"; then STARPU_HIP_LDFLAGS="-lcuda -lcudart -lcublas $STARPU_HIPBLAS_LDFLAGS -lstdc++" fi if test "$HIP_PLATFORM" = "amd"; then STARPU_HIP_LDFLAGS="-L$HIP_LIB_DIR -lamdhip64 $STARPU_HIPBLAS_LDFLAGS -lstdc++" fi AC_ARG_ENABLE(hip_memcpy_peer, [AS_HELP_STRING([--disable-hip-memcpy-peer], [if you want to disable peer transfers when using hip])],, [enable_hip_memcpy_peer=$enable_hip]) if test x$enable_hip_memcpy_peer = xyes; then AC_DEFINE(STARPU_HAVE_HIP_MEMCPY_PEER,[1],[Peer transfers are supported in HIP]) fi else STARPU_HIP_LDFLAGS= STARPU_HIP_CPPFLAGS= enable_hip_memcpy_peer=no fi AC_SUBST(STARPU_HIP_LDFLAGS) AC_SUBST(STARPU_HIP_CPPFLAGS) AM_CONDITIONAL(STARPU_USE_HIP, test x$enable_hip = xyes) #AC_ARG_VAR([HIPCC_CC], [C compiler for HIP compiler]) AC_ARG_VAR([HIPCCFLAGS], [HIP compiler flags]) ############################################################################### # # # OpenCL settings # # # ############################################################################### AC_MSG_CHECKING(maximum number of OpenCL devices) AC_ARG_ENABLE(maxopencldev, [AS_HELP_STRING([--enable-maxopencldev=], [maximum number of OPENCL devices])], nmaxopencldev=$enableval, nmaxopencldev=8) if test x$nmaxopencldev = x -o x$nmaxopencldev = xyes then AC_MSG_ERROR([The --enable-maxopencldev option needs to be given a number]) fi AC_MSG_RESULT($nmaxopencldev) AC_DEFINE_UNQUOTED(STARPU_MAXOPENCLDEVS, [$nmaxopencldev], [maximum number of OPENCL devices]) AC_ARG_ENABLE(opencl, [AS_HELP_STRING([--disable-opencl], [do not use OpenCL device(s)])],, [enable_opencl=maybe]) have_valid_opencl=no AC_DEFUN([STARPU_CHECK_OPENCL], [ __opencl_dir=$1 __opencl_include_dir=$2 __opencl_lib_dir=$3 if test "$__opencl_dir" != "no" ; then AC_MSG_CHECKING(whether OpenCL is available in $__opencl_dir $__opencl_include_dir and $__opencl_lib_dir) else AC_MSG_CHECKING(whether OpenCL is available) fi AC_MSG_RESULT() if test "$__opencl_include_dir" = "no" -a "$__opencl_dir" != "no" ; then __opencl_include_dir="$__opencl_dir/include" fi SAVED_CPPFLAGS="$CPPFLAGS" SAVED_LDFLAGS="${LDFLAGS}" if test "$__opencl_include_dir" != "no" ; then CPPFLAGS="${CPPFLAGS} -I$__opencl_include_dir" fi AC_CHECK_HEADER([CL/cl.h],[have_valid_opencl=yes],[have_valid_opencl=no]) unset ac_cv_header_CL_cl_h if test "$have_valid_opencl" = "yes" ; then if test "$__opencl_lib_dir" != "no"; then LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" AC_HAVE_LIBRARY([OpenCL],[have_valid_opencl=yes],[have_valid_opencl=no]) unset ac_cv_lib_OpenCL_main else AC_MSG_CHECKING(whether OpenCL is available in $__opencl_dir) AC_MSG_RESULT() AC_HAVE_LIBRARY([OpenCL],[have_valid_opencl=yes],[have_valid_opencl=no]) unset ac_cv_lib_OpenCL_main if test "$have_valid_opencl" = "no" -a "$__opencl_dir" != "no" ; then for __cuda_libdir in lib64 lib lib/x86 lib/Win32 ; do __opencl_lib_dir="$__opencl_dir/$__cuda_libdir" AC_MSG_CHECKING(whether OpenCL is available in $__opencl_dir and $__opencl_lib_dir) AC_MSG_RESULT() LDFLAGS="${SAVED_LDFLAGS} -L$__opencl_lib_dir" AC_HAVE_LIBRARY([OpenCL],[have_valid_opencl=yes],[have_valid_opencl=no]) unset ac_cv_lib_OpenCL_main if test "$have_valid_opencl" = yes ; then break fi done else LDFLAGS="${SAVED_LDFLAGS}" AC_HAVE_LIBRARY([OpenCL],[have_valid_opencl=yes],[have_valid_opencl=no]) unset ac_cv_lib_OpenCL_main fi fi fi if test "$have_valid_opencl" = "yes" -a "$__opencl_include_dir" != "no"; then STARPU_OPENCL_CPPFLAGS="-I$__opencl_include_dir" AC_CHECK_HEADERS([CL/cl_ext.h]) fi CPPFLAGS="${SAVED_CPPFLAGS}" LDFLAGS="${SAVED_LDFLAGS}" if test "$have_valid_opencl" = "yes" ; then if test "$__opencl_lib_dir" != "no"; then STARPU_OPENCL_LDFLAGS="-L$__opencl_lib_dir" fi STARPU_OPENCL_LDFLAGS="${STARPU_OPENCL_LDFLAGS} -lOpenCL" fi ]) #AC_MSG_CHECKING(whether OpenCL is available) AC_ARG_WITH(opencl-dir, [AS_HELP_STRING([--with-opencl-dir=], [specify OpenCL installation directory])], [ opencl_dir="$withval" # in case this was not explicit yet enable_opencl=yes ], opencl_dir=no) AC_ARG_WITH(opencl-include-dir, [AS_HELP_STRING([--with-opencl-include-dir=], [specify where OpenCL headers are installed])], [ opencl_include_dir="$withval" # in case this was not explicit yet enable_opencl=yes ], [opencl_include_dir=no]) AC_ARG_WITH(opencl-lib-dir, [AS_HELP_STRING([--with-opencl-lib-dir=], [specify where OpenCL libraries are installed])], [ opencl_lib_dir="$withval" # in case this was not explicit yet enable_opencl=yes ], [opencl_lib_dir=no]) AC_DEFUN([STARPU_LOOK_FOR_OPENCL], [ if test "x$has_opencl_being_checked" != "xyes" ; then STARPU_CHECK_OPENCL("$opencl_dir", "$opencl_include_dir", "$opencl_lib_dir") if test "$have_valid_opencl" = "no" ; then for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_ROOT" "$CUDA_PATH" "$CUDA_INC_PATH/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do if test -n "$f" ; then STARPU_CHECK_OPENCL("$f", "no", "no") if test "$have_valid_opencl" = "yes" ; then break fi fi done fi has_opencl_being_checked=yes fi ]) if test x$enable_opencl = xyes -o x$enable_opencl = xmaybe; then case $target in *-*-darwin*) AC_MSG_CHECKING(whether OpenCL is available) SAVED_LIBS=$LIBS LIBS="$LIBS -framework OpenCL" AC_LINK_IFELSE( [AC_LANG_PROGRAM([[ #ifdef __APPLE_CC__ #include #else #include #endif ]], [[return clSetKernelArg(0, 0, 0, 0); ]])], [AC_MSG_RESULT(yes) enable_opencl=yes have_valid_opencl=yes STARPU_OPENCL_CPPFLAGS= STARPU_OPENCL_LDFLAGS="-framework OpenCL"], [AC_MSG_RESULT(no) enable_opencl=no]) LIBS=$SAVED_LIBS ;; *) STARPU_LOOK_FOR_OPENCL() # in case OpenCL was explicitly required, but is not available, this is an error if test x$enable_opencl = xyes -a x$have_valid_opencl = xno; then AC_MSG_ERROR([cannot find OpenCL]) fi # now we enable OpenCL if and only if a proper setup is available enable_opencl=$have_valid_opencl ;; esac save_LIBS="$LIBS" LIBS="$LIBS $STARPU_OPENCL_LDFLAGS" AC_CHECK_FUNCS([clEnqueueMarkerWithWaitList]) LIBS="$save_LIBS" fi AC_MSG_CHECKING(whether OpenCL should be used) AC_MSG_RESULT($enable_opencl) AC_SUBST(STARPU_USE_OPENCL, $enable_opencl) AM_CONDITIONAL(STARPU_USE_OPENCL, test x$enable_opencl = xyes) if test x$enable_opencl = xyes ; then AC_DEFINE(STARPU_USE_OPENCL, [1], [OpenCL support is activated]) STARPU_OPENCL_CPPFLAGS="${STARPU_OPENCL_CPPFLAGS} -DSTARPU_OPENCL_DATADIR=\"\\\"${datarootdir}/starpu/opencl\\\"\" -DCL_USE_DEPRECATED_OPENCL_1_1_APIS" AC_SUBST(STARPU_OPENCL_DATAdir, "$(eval echo ${datarootdir}/starpu/opencl/examples)") AC_SUBST(STARPU_OPENCL_CPPFLAGS) AC_SUBST(STARPU_OPENCL_LDFLAGS) fi ############################################################################### # # # Maxeler FPGA Settings # # # ############################################################################### #NUMBER OF MAXELER FPGA DEVICES AC_MSG_CHECKING(maximum number of Maxeler FPGA devices) AC_ARG_ENABLE(maxmaxfpgadev, [AS_HELP_STRING([--enable-maxmaxfpgadev=], [maximum number of Maxeler FPGA devices])], nmaxmaxfpgadev=$enableval, nmaxmaxfpgadev=12) if test x$nmaxmaxfpgadev = x -o x$nmaxmaxfpgadev = xyes then AC_MSG_ERROR([The --enable-maxmaxfpgadev option needs to be given a number]) fi AC_MSG_RESULT($nmaxmaxfpgadev) AC_DEFINE_UNQUOTED(STARPU_MAXMAXFPGADEVS, [$nmaxmaxfpgadev],[maximum number of Maxeler FPGA devices]) AC_ARG_ENABLE([max-fpga], [AS_HELP_STRING([--disable-max-fpga],[disable support for Maxeler FPGA])], [enable_max_fpga=$enableval], [enable_max_fpga=maybe] ) if test x$enable_simgrid = xyes; then if test x$enable_max_fpga = xyes; then AC_MSG_ERROR([Max fpga not supported with simgrid]) fi enable_max_fpga=no fi if test x$enable_max_fpga != xno; then AC_PATH_PROG([SLIC_CONFIG], [slic-config], [not-found]) if test "x$SLIC_CONFIG" = "xnot-found"; then # in case FPGA was explicitly required, but is not available, this is an error if test x$enable_max_fpga = xyes; then AC_MSG_ERROR(['slic-config' not found for Maxeler FPGA support]) fi enable_max_fpga=no else STARPU_MAX_FPGA_CPPFLAGS="`slic-config --cflags | sed s/\'//g | sed "s/-I /-I/"`" STARPU_MAX_FPGA_LDFLAGS="`slic-config --libs | sed s/\'//g | sed "s/-L /-L/" | sed "s/-L /-L/"`" enable_max_fpga=yes fi fi AC_SUBST(STARPU_USE_MAX_FPGA,$enable_max_fpga) AM_CONDITIONAL(STARPU_USE_MAX_FPGA,test x$enable_max_fpga = xyes) if test x$enable_max_fpga = xyes; then AC_DEFINE(STARPU_USE_MAX_FPGA,[1],[Maxeler FPGA support is activated]) fi ############################################################################### # # # General GPU settings # # # ############################################################################### AC_MSG_CHECKING(whether asynchronous copy should be disabled) AC_ARG_ENABLE(asynchronous-copy, [AS_HELP_STRING([--disable-asynchronous-copy], [disable asynchronous copy between CPU and GPU])], enable_asynchronous_copy=$enableval, enable_asynchronous_copy=yes) disable_asynchronous_copy=no if test x$enable_asynchronous_copy = xno ; then disable_asynchronous_copy=yes fi AC_MSG_RESULT($disable_asynchronous_copy) if test x$disable_asynchronous_copy = xyes ; then AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and GPU devices]) fi AC_MSG_CHECKING(whether asynchronous CUDA copy should be disabled) AC_ARG_ENABLE(asynchronous-cuda-copy, [AS_HELP_STRING([--disable-asynchronous-cuda-copy], [disable asynchronous copy between CPU and CUDA devices])], enable_asynchronous_cuda_copy=$enableval, enable_asynchronous_cuda_copy=yes) disable_asynchronous_cuda_copy=no if test x$enable_asynchronous_cuda_copy = xno ; then disable_asynchronous_cuda_copy=yes fi AC_MSG_RESULT($disable_asynchronous_cuda_copy) if test x$disable_asynchronous_cuda_copy = xyes ; then AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and CUDA devices]) fi AC_MSG_CHECKING(whether asynchronous OpenCL copy should be disabled) AC_ARG_ENABLE(asynchronous-opencl-copy, [AS_HELP_STRING([--disable-asynchronous-opencl-copy], [disable asynchronous copy between CPU and OPENCL devices])], enable_asynchronous_opencl_copy=$enableval, enable_asynchronous_opencl_copy=yes) disable_asynchronous_opencl_copy=no if test x$enable_asynchronous_opencl_copy = xno ; then disable_asynchronous_opencl_copy=yes fi AC_MSG_RESULT($disable_asynchronous_opencl_copy) if test x$disable_asynchronous_opencl_copy = xyes ; then AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and OpenCL devices]) fi AC_MSG_CHECKING(whether asynchronous MPI Master Slave copy should be disabled) AC_ARG_ENABLE(asynchronous-mpi-master-slave-copy, [AS_HELP_STRING([--disable-asynchronous-mpi-master-slave-copy], [disable asynchronous copy between MPI Master and MPI Slave devices])], enable_asynchronous_mpi_master_slave_copy=$enableval, enable_asynchronous_mpi_master_slave_copy=yes) disable_asynchronous_mpi_master_slave_copy=no if test x$enable_asynchronous_mpi_master_slave_copy = xno ; then disable_asynchronous_mpi_master_slave_copy=yes fi AC_MSG_RESULT($disable_asynchronous_mpi_master_slave_copy) if test x$disable_asynchronous_mpi_master_slave_copy = xyes ; then AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY], [1], [Define to 1 to disable asynchronous copy between MPI Master and MPI Slave devices]) fi AC_MSG_CHECKING(whether asynchronous TCP/IP Master Slave copy should be disabled) AC_ARG_ENABLE(asynchronous-tcpip-master-slave-copy, [AS_HELP_STRING([--disable-asynchronous-tcpip-master-slave-copy], [disable asynchronous copy between TCP/IP Master and TCP/IP Slave devices])], enable_asynchronous_tcpip_master_slave_copy=$enableval, enable_asynchronous_tcpip_master_slave_copy=yes) disable_asynchronous_tcpip_master_slave_copy=no if test x$enable_asynchronous_tcpip_master_slave_copy = xno ; then disable_asynchronous_tcpip_master_slave_copy=yes fi AC_MSG_RESULT($disable_asynchronous_tcpip_master_slave_copy) if test x$disable_asynchronous_tcpip_master_slave_copy = xyes ; then AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY], [1], [Define to 1 to disable asynchronous copy between TCP/IP Master and TCP/IP Slave devices]) fi AC_MSG_CHECKING(whether asynchronous Maxeler FPGA copy should be disabled) AC_ARG_ENABLE(asynchronous-max-fpga-copy, [AS_HELP_STRING([--disable-asynchronous-max-fpga-copy], [disable asynchronous copy between CPU and Maxeler FPGA devices])], enable_asynchronous_max_fpga_copy=$enableval, enable_asynchronous_max_fpga_copy=yes) disable_asynchronous_max_fpga_copy=no if test x$enable_asynchronous_max_fpga_copy = xno ; then disable_asynchronous_max_fpga_copy=yes fi AC_MSG_RESULT($disable_asynchronous_max_fpga_copy) if test x$disable_asynchronous_max_fpga_copy = xyes ; then AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and Maxeler FPGA devices]) fi ############################################################################### # # # Fortran # # # ############################################################################### AC_ARG_ENABLE(fortran, [AS_HELP_STRING([--disable-fortran], [disable build of fortran examples])], enable_build_fortran_requested=$enableval, enable_build_fortran_requested=yes) use_mpi_fort=no enable_build_fortran=no if test "x$enable_build_fortran_requested" = "xyes" ; then if test "x$FC" != "x"; then if $FC --version|grep -q 'GNU Fortran'; then AC_LANG_PUSH([Fortran]) OLD_FCFLAGS="$FCFLAGS" FCFLAGS="$FCFLAGS -cpp" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[ #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 9) #error GFortran too old, version >= 4.9.x needed, Fortran examples will not be built #endif ]] )], [enable_build_fortran="yes"], [enable_build_fortran="no"]) FCFLAGS="$OLD_FCFLAGS" AC_LANG_POP([Fortran]) if test "$enable_build_fortran" = "no" ; then AC_MSG_WARN([GFortran too old, version >= 4.9.x needed, Fortran examples will not be built]) fi else if $FC -V 2>&1|grep -q 'Intel(R) Fortran'; then enable_build_fortran="yes" ifort_fc_version=`$FC -V 2>&1 |head -1|sed 's/.*Version //;s/ Build.*//'` ifort_maj_version=`echo $ifort_fc_version|cut -d. -f1` if test $ifort_maj_version -lt 16; then AC_MSG_WARN([Intel Fortran compiler $ifort_fc_version too old, version >= 2016.x needed, Fortran examples will not be built]) enable_build_fortran="no" fi else if $FC -qversion 2>&1|grep -q 'IBM XL Fortran'; then xlf_fc_version=`$FC -V 2>&1 |tail -1|sed 's/.*Version: //'` AC_MSG_WARN([IBM Fortran compiler $xlf_fc_version not validated with the native StarPU Fortran API, Fortran examples will not be built]) enable_build_fortran="no" else AC_MSG_WARN(Fortran compiler has not been tested for StarPU native Fortran support) enable_build_fortran="yes" fi fi fi if $FC -v 2>&1 | grep -q 'Arm C/C++/Fortran Compiler' ; then armflang_version=`$FC -v 2>&1 | head -1 | sed 's/.*version //'` armflang_maj_version=`echo $armflang_version|cut -d. -f1` if test $armflang_maj_version -lt 23 ; then AC_MSG_WARN([ARM Fortran compiler $armflang_version is not validated with the native StarPU Fortran API, Fortran examples will not be built]) enable_build_fortran="no" fi fi if test "x$enable_build_fortran" = "xyes" ; then AC_DEFINE(STARPU_HAVE_FC, [1], [Define this if a Fortran compiler is available]) if test x$build_mpi_lib = xyes -o x$build_nmad_lib = xyes -o x$build_mpi_master_slave = xyes ; then #Check MPIFORT if test x$enable_simgrid = xyes ; then DEFAULT_MPIFORT=smpifort else DEFAULT_MPIFORT=mpifort fi AC_ARG_WITH(mpifort, [AS_HELP_STRING([--with-mpifort=], [Name or path of the mpifort compiler])], [DEFAULT_MPIFORT=$withval]) case $DEFAULT_MPIFORT in /*) mpifort_path="$DEFAULT_MPIFORT" ;; *) AC_PATH_PROG(mpifort_path, $DEFAULT_MPIFORT, [no], [$simgrid_dir/bin:$PATH]) ;; esac # We test if the MPIFORT compiler exists if test ! -x $mpifort_path; then AC_MSG_RESULT(The mpifort compiler '$mpifort_path' does not have the execute permission) mpifort_path=no else OLD_CC=$CC CC=$mpicc_path AC_LINK_IFELSE( AC_LANG_PROGRAM( [[#include ]], [[AC_LANG_SOURCE([return MPI_Comm_f2c(0);])]] ), [use_mpi_fort=yes], [use_mpi_fort=no] ) CC=$OLD_CC if test "x$use_mpi_fort" = xyes; then AC_DEFINE([HAVE_MPI_COMM_F2C], [1], [Function MPI_Comm_f2c is available]) fi fi AC_MSG_CHECKING(whether mpifort is available) AC_MSG_RESULT($mpifort_path) AC_SUBST(MPIFORT, $mpifort_path) if test x$mpifort_path != xno ; then MPIPATH=$(dirname $mpifort_path):$PATH else MPIPATH=$PATH fi fi fi fi fi if test "x$enable_build_fortran" = "xyes" ; then if test "x$FC" = "x" ; then enable_build_fortran="no" fi fi #We have MPI C/C++ compiler if test x$build_mpi_master_slave = xyes; then #Check if we can compile fortran cases if test x$use_mpi_fort = xyes ; then F77LD=$mpifort_path FCLD=$mpifort_path F77=$mpifort_path FC=$mpifort_path else enable_build_fortran=no fi fi AM_CONDITIONAL([STARPU_HAVE_FC], [test "x$FC" != "x" -a "x$enable_build_fortran" = "xyes"]) AM_CONDITIONAL([STARPU_HAVE_F77], [test "x$F77" != "x" -a "x$enable_build_fortran" = "xyes"]) AM_CONDITIONAL([STARPU_HAVE_MPIFORT], [test "x$use_mpi_fort" = "xyes"]) ############################################################################### # # # Debug and Performance analysis tools # # # ############################################################################### AC_MSG_CHECKING(whether debug mode should be enabled) AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug], [enable debug mode])], enable_debug=$enableval, enable_debug=no) AC_MSG_RESULT($enable_debug) AC_ARG_ENABLE(spinlock_check, [AS_HELP_STRING([--enable-spinlock-check], [enable spinlock check])], enable_spinlock_check=$enableval, enable_spinlock_check=no) AC_ARG_ENABLE(fstack-protector-all, [AS_HELP_STRING([--disable-fstack-protector-all], [disable GCC option -fstack-protector-all])], enable_fstack_protector_all=$enableval, enable_fstack_protector_all=yes) if test x$enable_debug = xyes; then AC_DEFINE(STARPU_DEBUG, [1], [enable debugging statements]) CFLAGS="$CFLAGS -O0" CXXFLAGS="$CXXFLAGS -O0" FFLAGS="$FFLAGS -O0" FCFLAGS="$FCFLAGS -O0" IS_SUPPORTED_FLAG(-fno-optimize-sibling-calls) enable_spinlock_check=yes if test x$GCC = xyes; then IS_SUPPORTED_FLAG(-Og) if test x$starpu_windows != xyes ; then if test x$enable_fstack_protector_all = xyes ; then CFLAGS="$CFLAGS -fstack-protector-all" CXXFLAGS="$CXXFLAGS -fstack-protector-all" FFLAGS="$FFLAGS -fstack-protector-all" FCFLAGS="$FCFLAGS -fstack-protector-all" fi fi fi else CFLAGS="-O3 $CFLAGS" CXXFLAGS="-O3 $CXXFLAGS" FFLAGS="-O3 $FFLAGS" FCFLAGS="-O3 $FCFLAGS" fi AC_MSG_CHECKING(whether gdb information should be enabled) AC_ARG_ENABLE(gdb, [AS_HELP_STRING([--disable-gdb], [disable gdb information])], enable_gdb=$enableval, enable_gdb=yes) AC_MSG_RESULT($enable_gdb) AC_MSG_CHECKING(whether full gdb information should be enabled) AC_ARG_ENABLE(full-gdb-information, [AS_HELP_STRING([--disable-full-gdb-information], [disable full gdb information])], enable_full_gdb_information=$enableval, enable_full_gdb_information=yes) AC_MSG_RESULT($enable_full_gdb_information) if test x$enable_gdb = xyes; then if test x$enable_full_gdb_information = xyes -a x$GCC = xyes; then IS_SUPPORTED_FLAG(-gdwarf-2) IS_SUPPORTED_FLAG(-g3) NVCCFLAGS="$NVCCFLAGS -g" HIPCCFLAGS="$HIPCCFLAGS -g" else IS_SUPPORTED_FLAG(-g) NVCCFLAGS="$NVCCFLAGS -g" HIPCCFLAGS="$HIPCCFLAGS -g" fi else CFLAGS="$CFLAGS -g0" CXXFLAGS="$CXXFLAGS -g0" FFLAGS="$FFLAGS -g0" FCFLAGS="$FCFLAGS -g0" LDFLAGS="$LDFLAGS -g0" fi if test x$enable_spinlock_check = xyes; then AC_DEFINE(STARPU_SPINLOCK_CHECK, [1], [check spinlock use]) fi AC_MSG_CHECKING(whether extra checks should be performed) AC_ARG_ENABLE(fast, [AS_HELP_STRING([--enable-fast], [do not enforce assertions])], enable_fast=$enableval, enable_fast=no) AC_MSG_RESULT($enable_fast) if test x$enable_fast = xyes; then AC_DEFINE(STARPU_NO_ASSERT, [1], [disable assertions]) else # fortify gets really enabled only with optimizations, avoid enabling it # when optimizations are not enabled, because with some glibc it # spews a lot of warnings. if test x$enable_debug != xyes; then if test x$GCC = xyes; then CPPFLAGS="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=1 $CPPFLAGS" fi fi fi AC_MSG_CHECKING(whether debug messages should be displayed) AC_ARG_ENABLE(verbose, [AS_HELP_STRING([--enable-verbose], [display verbose debug messages (--enable-verbose=extra increase the verbosity)])], enable_verbose=$enableval, enable_verbose=no) AC_MSG_RESULT($enable_verbose) if test x$enable_verbose = xyes; then AC_DEFINE(STARPU_VERBOSE, [1], [display verbose debug messages]) fi if test x$enable_verbose = xextra; then AC_DEFINE(STARPU_VERBOSE, [1], [display verbose debug messages]) AC_DEFINE(STARPU_EXTRA_VERBOSE, [1], [display verbose debug messages]) fi AC_MSG_CHECKING(whether coverage testing should be enabled) AC_ARG_ENABLE(coverage, [AS_HELP_STRING([--enable-coverage], [enable coverage checking])], enable_coverage=$enableval, enable_coverage=no) AC_MSG_RESULT($enable_coverage) AC_SUBST(COVERAGE, $enable_coverage) AM_CONDITIONAL(STARPU_COVERAGE_ENABLED, [test "x$enable_coverage" = "xyes"]) if test x$enable_coverage = xyes; then CFLAGS="${CFLAGS} --coverage" CXXFLAGS="${CXXFLAGS} --coverage" FFLAGS="${FFLAGS} --coverage" FCFLAGS="${FCFLAGS} --coverage" LDFLAGS="${LDFLAGS} --coverage" LIBS="${LIBS} -lgcov" fi AC_MSG_CHECKING(whether coverity mode should be enabled) AC_ARG_ENABLE(coverity, [AS_HELP_STRING([--enable-coverity], [enable coverity mode])], enable_coverity=$enableval, enable_coverity=no) AC_MSG_RESULT($enable_coverity) AM_CONDITIONAL(STARPU_COVERITY, test x$enable_coverity = xyes) if test x$enable_coverity = xyes ; then AC_DEFINE(STARPU_COVERITY, [1], [Define to 1 if you are building with coverity]) fi # We would need a PIC-compiled libfxt.a for this to work ; that's usually not available. if test x$enable_mpi = xyes -a x$enable_simgrid = xyes -o x$enable_shared = xno -a x$enable_starpupy = xyes ; then default_enable_fxt=no else default_enable_fxt=maybe fi # shall we use FxT to generate trace of the execution ? AC_ARG_ENABLE(fxt, [AS_HELP_STRING([--disable-fxt], [disable FxT trace mechanisms])],, [enable_fxt=$default_enable_fxt]) AC_ARG_WITH(fxt, [AS_HELP_STRING([--with-fxt=], [specify FxT installation directory])], [ if test x$withval = xno ; then enable_fxt=no else fxt_dir="$withval" use_fxt_from_system=no # in case this was not explicit yet enable_fxt=yes AC_SUBST(FXTDIR, $fxt_dir) fi ], [ use_fxt_from_system=yes fxt_dir="" ]) if test x$enable_fxt != xno; then if test x$use_fxt_from_system = xno; then save_PKG_CONFIG_PATH="$PKG_CONFIG_PATH" PKG_CONFIG_PATH="$fxt_dir/lib/pkgconfig:$PKG_CONFIG_PATH" PKG_CHECK_MODULES([FXT], [fxt], [have_valid_fxt=yes], [ have_valid_fxt=yes AC_MSG_WARN([Old FxT without fxt.pc file, hoping link will succeed]) FXT_CFLAGS="-I$fxt_dir/include/ " FXT_LDFLAGS="-L$fxt_dir/lib/" AC_ARG_VAR(FXT_LDFLAGS) FXT_LIBS="-lfxt" ]) PKG_CONFIG_PATH="$save_PKG_CONFIG_PATH" else PKG_CHECK_MODULES([FXT], [fxt], [have_valid_fxt=yes], [have_valid_fxt=no]) fi if test x$have_valid_fxt = xyes ; then enable_fxt=yes save_LIBS="$LIBS" LIBS="$LIBS $FXT_LIBS" save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $FXT_LDFLAGS" AC_CHECK_FUNCS([fxt_close]) AC_CHECK_FUNCS([fxt_blockev_leave]) AC_CHECK_FUNCS([enable_fut_flush]) AC_CHECK_FUNCS([fut_set_filename]) AC_CHECK_FUNCS([fut_setup_flush_callback]) LDFLAGS="$save_LDFLAGS" LIBS="$save_LIBS" save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $FXT_CFLAGS" AC_CHECK_DECLS([enable_fut_flush], [], [], [[#include ]]) AC_CHECK_DECLS([fut_set_filename], [], [], [[#include ]]) AC_CHECK_DECLS([fut_setup_flush_callback], [], [], [[#include ]]) CFLAGS="$save_CFLAGS" if test x$enable_simgrid = xyes -a x$enable_shared = xno ; then # simgrid's SMPI needs fxt to be linked in statically for # variable privatization to work FXT_LIBS="$(pkg-config --variable=libdir fxt)/libfxt.a -Wl,--as-needed $(pkg-config --libs --static fxt) -Wl,--no-as-needed" fi ########################################## # Poti is a library to generate paje trace files ########################################## PKG_CHECK_MODULES([POTI], [poti], [have_valid_poti=yes], [have_valid_poti=no]) AC_ARG_ENABLE(poti, [AS_HELP_STRING([--enable-poti], [Enable the use of the POTI library to generate Paje traces])], enable_poti=$enableval, enable_poti=no) if test x$enable_poti = xyes -a x$have_valid_poti = xyes ; then AC_DEFINE(STARPU_HAVE_POTI, [1], [Define to 1 if you have libpoti and it is meant to be used]) save_LIBS="$LIBS" LIBS="$LIBS $POTI_LIBS" AC_CHECK_FUNCS([poti_init_custom poti_user_NewEvent]) LIBS="$save_LIBS" FXT_CFLAGS="$FXT_CFLAGS $POTI_CFLAGS" FXT_LIBS="$FXT_LIBS $POTI_LIBS" fi else if test x$enable_fxt = xyes ; then AC_MSG_ERROR([FxT is required but not available]) fi enable_fxt=no fi fi AC_MSG_CHECKING(whether FxT traces should be generated) AC_MSG_RESULT($enable_fxt) if test x$enable_fxt = xyes; then AC_DEFINE(STARPU_USE_FXT, [1], [enable FxT traces]) AC_DEFINE(CONFIG_FUT, [1], [enable FUT traces]) fi AC_SUBST(STARPU_USE_FXT, $enable_fxt) AM_CONDITIONAL(STARPU_USE_FXT, test x$enable_fxt = xyes) AC_MSG_CHECKING(whether additional locking systems FxT traces should be enabled) AC_ARG_ENABLE(fxt-lock, [AS_HELP_STRING([--enable-fxt-lock], [enable additional locking systems FxT traces])], enable_fxt_lock=$enableval, enable_fxt_lock=no) AC_MSG_RESULT($enable_fxt_lock) if test x$enable_fxt_lock = xyes; then AC_DEFINE(STARPU_FXT_LOCK_TRACES, [1], [enable additional locking systems FxT traces]) fi AC_ARG_ENABLE(papi, [AS_HELP_STRING([--disable-papi], [disable using papi])], enable_papi=$enableval, enable_papi=yes) if test x$enable_papi = xyes; then PKG_CHECK_MODULES([PAPI], [papi], [have_valid_papi=yes], [have_valid_papi=no]) if test x$have_valid_papi = xyes ; then AC_DEFINE([STARPU_PAPI], [1], [Define to 1 if you have the libpapi library]) STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $PAPI_LIBS" fi fi AC_MSG_CHECKING(whether performance debugging should be enabled) AC_ARG_ENABLE(perf-debug, [AS_HELP_STRING([--enable-perf-debug], [enable performance debugging through gprof])], enable_perf_debug=$enableval, enable_perf_debug=no) AC_MSG_RESULT($enable_perf_debug) AC_SUBST(STARPU_PERF_DEBUG, $enable_perf_debug) if test x$enable_perf_debug = xyes; then AC_DEFINE(STARPU_PERF_DEBUG, [1], [enable performance debug]) CPPFLAGS="${CPPFLAGS} -pg " LDFLAGS="${LDFLAGS} -pg " fi AC_MSG_CHECKING(whether performance model debugging should be enabled) AC_ARG_ENABLE(model-debug, [AS_HELP_STRING([--enable-model-debug], [enable performance model debugging])], enable_model_debug=$enableval, enable_model_debug=no) AC_MSG_RESULT($enable_model_debug) if test x$enable_model_debug = xyes; then AC_DEFINE(STARPU_MODEL_DEBUG, [1], [enable performance model debug]) fi AC_MSG_CHECKING(whether memory stats should be displayed) AC_ARG_ENABLE(memory-stats, [AS_HELP_STRING([--enable-memory-stats], [enable memory stats])], enable_memory_stats=$enableval, enable_memory_stats=no) AC_MSG_RESULT($enable_memory_stats) if test x$enable_memory_stats = xyes; then AC_DEFINE(STARPU_MEMORY_STATS, [1], [enable memory stats]) fi AC_ARG_ENABLE(glpk, [AS_HELP_STRING([--disable-glpk], [disable using glpk for bound computation])], enable_glpk=$enableval, enable_glpk=yes) if test x$enable_glpk = xyes; then AC_CHECK_HEADERS([glpk.h], [AC_DEFINE([STARPU_HAVE_GLPK_H], [1], [Define to 1 if you have the header file.])]) STARPU_HAVE_LIBRARY(GLPK, [glpk]) fi AC_ARG_WITH(ayudame1-include-dir, [AS_HELP_STRING([--with-ayudame1-include-dir=], [specify where Ayudame version 1 headers are installed])], [ ayudame1_include_dir="$withval" if test -n "$ayudame1_include_dir"; then CPPFLAGS="-I$ayudame1_include_dir $CPPFLAGS" fi ], [ayudame1_include_dir=no]) AC_ARG_WITH(ayudame2-include-dir, [AS_HELP_STRING([--with-ayudame2-include-dir=], [specify where Ayudame version 2 headers are installed])], [ ayudame2_include_dir="$withval" if test -n "$ayudame2_include_dir"; then CPPFLAGS="-I$ayudame2_include_dir $CPPFLAGS" fi ], [ayudame2_include_dir=no]) # Ayudame 1 header is capitalized AC_CHECK_HEADERS([Ayudame.h]) AC_ARG_ENABLE(ayudame1, [AS_HELP_STRING([--disable-ayudame1], [Do not use Ayudame lib version 1])], enable_ayudame1=$enableval, enable_ayudame1=yes) # Ayudame 2 header is lowercase AC_CHECK_HEADERS([ayudame.h]) AC_ARG_ENABLE(ayudame2, [AS_HELP_STRING([--disable-ayudame2], [Do not use Ayudame lib version 2])], enable_ayudame2=$enableval, enable_ayudame2=yes) if test x$enable_ayudame1 = xyes -a x$ac_cv_header_Ayudame_h = xyes; then AC_DEFINE([STARPU_USE_AYUDAME1], [1], [Define to 1 if Ayudame 1 is available and should be used]) ayu_msg="yes, use version 1" else if test x$enable_ayudame2 = xyes -a x$ac_cv_header_ayudame_h = xyes; then AC_DEFINE([STARPU_USE_AYUDAME2], [1], [Define to 1 if Ayudame 2 is available and should be used]) ayu_msg="yes, use version 2" else ayu_msg="no" fi fi AM_CONDITIONAL([STARPU_USE_AYUDAME1], [test "x$enable_ayudame1" = "xyes"]) AM_CONDITIONAL([STARPU_USE_AYUDAME2], [test "x$enable_ayudame2" = "xyes"]) STARPU_FXT_EVENT_DEFINES="`grep -E '#define\s+_STARPU_(MPI_)?FUT_' ${srcdir}/src/common/fxt.h ${srcdir}/mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 | cut -d : -f 2`" AC_SUBST([STARPU_FXT_EVENT_DEFINES]) # Heteroprio works better if it can store information based on the program's name AC_MSG_CHECKING(whether the target supports program_invocation_short_name) AC_LINK_IFELSE([AC_LANG_SOURCE( [ #include #include int main() { printf("%s\n", program_invocation_short_name); return 0; } ])], [AC_DEFINE([STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME], [1], [variable program_invocation_short_name is available]) AC_MSG_RESULT(yes)], AC_MSG_RESULT(no) ) ############################################################################### # # # Miscellaneous options for StarPU # # # ############################################################################### AC_MSG_CHECKING(whether data locality should be enforced) AC_ARG_ENABLE(data-locality-enforce, [AS_HELP_STRING([--enable-data-locality-enforce], [disable data locality enforcement])], enable_data_locality_enforce=$enableval, enable_data_locality_enforce=no) AC_MSG_RESULT($enable_data_locality_enforce) if test x$enable_data_locality_enforce = xyes ; then AC_DEFINE([STARPU_DATA_LOCALITY_ENFORCE], [1], [Define to 1 to enforce data locality]) fi AC_MSG_CHECKING(how many buffers can be manipulated per task) AC_ARG_ENABLE(maxbuffers, [AS_HELP_STRING([--enable-maxbuffers=], [maximum number of buffers per task])], nmaxbuffers=$enableval, nmaxbuffers=8) if test x$nmaxbuffers = x -o x$nmaxbuffers = xyes then AC_MSG_ERROR([The --enable-maxbuffers option needs to be given a number]) fi AC_MSG_RESULT($nmaxbuffers) AC_DEFINE_UNQUOTED(STARPU_NMAXBUFS, [$nmaxbuffers], [how many buffers can be manipulated per task]) AC_MSG_CHECKING(how many MPI nodes fxt files can be manipulated when generating traces) AC_ARG_ENABLE(fxt-max-files, [AS_HELP_STRING([--enable-fxt-max-files=], [maximum number of mpi nodes for traces])], nmaxfxtfiles=$enableval, nmaxfxtfiles=64) if test x$nmaxfxtfiles = x -o x$nmaxfxtfiles = xyes then AC_MSG_ERROR([The --enable-maxfxtfiles option needs to be given a number]) fi AC_MSG_RESULT($nmaxfxtfiles) AC_DEFINE_UNQUOTED(STARPU_FXT_MAX_FILES, [$nmaxfxtfiles], [how many MPI nodes fxt files can be manipulated when generating traces]) AC_MSG_CHECKING(maximum number of memory nodes to use per MPI rank) AC_ARG_ENABLE(maxnodes, [AS_HELP_STRING([--enable-maxnodes=], [maximum number of memory nodes per MPI rank])], maxnodes=$enableval, maxnodes=0) if test x$maxnodes = x0 ; then if test x$enable_simgrid = xyes ; then # We need the room for the virtual CUDA/OpenCL devices nodes=`expr 4 + $nmaxcudadev + $nmaxopencldev + 1 + $nmaxmpidev` else # We have one memory node shared by all CPU workers, one node per GPU # we add nodes to use 2 memory disks nodes=`expr $nmaxnumanodes + 2` if test x$enable_cuda = xyes ; then # we could have used nmaxcudadev + 1, but this would certainly give an # odd number. nodes=`expr $nodes + $nmaxcudadev` fi if test x$enable_hip = xyes ; then # we could have used nmaxhipdev + 1, but this would certainly give an # odd number. nodes=`expr $nodes + $nmaxhipdev` fi if test x$enable_opencl = xyes ; then # we could have used nmaxopencldev + 1, but this would certainly give an # odd number. nodes=`expr $nodes + $nmaxopencldev` fi if test x$enable_max_fpga = xyes ; then # we could have used nmaxmaxfpgadev + 1, but this would certainly give an # odd number. nodes=`expr $nodes + $nmaxmaxfpgadev` fi #nmaxmpidev = 0 if mpi master-slave is disabled nodes=`expr $nodes + $nmaxmpidev` #nmaxtcpipdev = 0 if tcpip master-slave is disabled nodes=`expr $nodes + $nmaxtcpipdev` fi # set maxnodes to the next power of 2 greater than nodes maxnodes=1 while test "$maxnodes" -lt "$nodes" do maxnodes=`expr $maxnodes \* 2` done fi if test x$maxnodes = x -o x$maxnodes = xyes then AC_MSG_ERROR([The --enable-maxnodes option needs to be given a number]) fi if test $maxnodes -gt 32 ; then AC_MSG_WARN([Note: the wt_mask feature only supports 32 memory nodes]) fi AC_MSG_CHECKING(maximum number of memory nodes) AC_MSG_RESULT($maxnodes) AC_DEFINE_UNQUOTED(STARPU_MAXNODES, [$maxnodes], [maximum number of memory nodes]) AC_MSG_CHECKING(whether allocation cache should be used) AC_ARG_ENABLE(allocation-cache, [AS_HELP_STRING([--disable-allocation-cache], [disable data allocation cache])], enable_allocation_cache=$enableval, enable_allocation_cache=yes) AC_MSG_RESULT($enable_allocation_cache) if test x$enable_allocation_cache = xyes; then AC_DEFINE(STARPU_USE_ALLOCATION_CACHE, [1], [enable data allocation cache]) fi AC_ARG_WITH(perf-model-dir, [AS_HELP_STRING([--with-perf-model-dir=], [specify where performance models should be stored])], [ if test x$withval = xno; then AC_MSG_ERROR(--without-perf-model-dir is not a valid option) fi perf_model_dir="$withval" have_explicit_perf_model_dir=yes AC_DEFINE_UNQUOTED(STARPU_PERF_MODEL_DIR, ["$perf_model_dir"], [performance models location]) ], [ # by default, we put the performance models in # $HOME/.starpu/sampling/ have_explicit_perf_model_dir=no perf_model_dir="\$STARPU_HOME/.starpu/sampling/" ] ) AC_MSG_CHECKING(using explicit performance model location) AC_MSG_RESULT($have_explicit_perf_model_dir) AC_MSG_CHECKING(performance models location) AC_MSG_RESULT($perf_model_dir) # On many multicore CPUs, clock cycles are not synchronized AC_CHECK_LIB([rt], [clock_gettime]) AC_CHECK_FUNCS([clock_gettime]) # Compute the maximum number of workers (we round it to 16 for alignment # purposes). if test x$enable_simgrid != xyes; then if test x$enable_cpu != xyes; then maxcpus=0 fi if test x$enable_cuda != xyes; then nmaxcudadev=0 fi if test x$enable_max_fpga != xyes; then nmaxmaxfpgadev=0 fi if test x$enable_opencl != xyes; then nmaxopencldev=0 fi #By default, if we cannot build mpi master-slave nmaxmpidev is set to zero. #But with the multiplication with maxcpus, we need to put it to one. if test x$build_mpi_master_slave != xyes; then nmaxmpidev=1 fi #By default, if we cannot build tcp/ip master-slave nmaxtcpipdev is set to zero. #But with the multiplication with maxcpus, we need to put it to one. if test x$build_tcpip_master_slave != xyes; then nmaxtcpipdev=1 fi fi if test $maxcpus = 0 then nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* 64 \) + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` elif test $nmaxmpidev = 0 then nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` else nmaxworkers=`expr 16 \* \( \( \( $nmaxmpidev \* $maxcpus \) + $nmaxcudadev + $nmaxhipdev + $nmaxopencldev + $nmaxmaxfpgadev + 15 \) / 16 \) ` fi AC_MSG_CHECKING(Maximum number of workers) AC_MSG_RESULT($nmaxworkers) AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers]) nmaxdevs=0 if test $nmaxdevs -lt $nmaxcudadev; then nmaxdevs=$nmaxcudadev fi if test $nmaxdevs -lt $nmaxhipdev; then nmaxdevs=$nmaxhipdev fi if test $nmaxdevs -lt $nmaxopencldev; then nmaxdevs=$nmaxopencldev fi if test $nmaxdevs -lt $nmaxmaxfpgadev; then nmaxdevs=$nmaxmaxfpgadev fi if test $nmaxdevs -lt $nmaxmpidev; then nmaxdevs=$nmaxmpidev fi if test $nmaxdevs -lt $nmaxtcpipdev; then nmaxdevs=$nmaxtcpipdev fi AC_DEFINE_UNQUOTED(STARPU_NMAXDEVS, [$nmaxdevs], [Maximum number of device per device arch]) # Computes the maximum number of combined worker nmaxcombinedworkers=$maxcpus AC_MSG_CHECKING(Maximum number of workers combinations) AC_MSG_RESULT($nmaxcombinedworkers) AC_DEFINE_UNQUOTED(STARPU_NMAX_COMBINEDWORKERS, [$nmaxcombinedworkers], [Maximum number of worker combinations]) # Computes the maximum number of implementations per arch AC_MSG_CHECKING(maximum number of implementations) AC_ARG_ENABLE(maximplementations, [AS_HELP_STRING([--enable-maximplementations=], [maximum number of implementations])], maximplementations=$enableval, maximplementations=4) AC_MSG_RESULT($maximplementations) AC_DEFINE_UNQUOTED(STARPU_MAXIMPLEMENTATIONS, [$maximplementations], [maximum number of implementations]) if test x$maximplementations = x -o x$maximplementations = xyes then AC_MSG_ERROR([The --enable-maximplementations option needs to be given a number]) fi # Enable LevelDB support if requested and the lib is found AC_ARG_ENABLE(leveldb, [AS_HELP_STRING([--enable-leveldb], [Enable linking with LevelDB if available])], enable_leveldb=$enableval, enable_leveldb=no) if test x$enable_leveldb = xyes; then AC_LANG_PUSH([C++]) AC_CHECK_HEADERS([leveldb/db.h], [AC_DEFINE([STARPU_HAVE_LEVELDB], [1], [Define to 1 if you have the header file.])]) STARPU_HAVE_LIBRARY(LEVELDB, [leveldb]) AC_LANG_POP([C++]) fi AM_CONDITIONAL(STARPU_HAVE_LEVELDB, test "x$enable_leveldb" = "xyes" -a "x$ac_cv_lib_leveldb_main" = "xyes") # Defines the calibration heuristic for the history-based calibration of StarPU AC_MSG_CHECKING(calibration heuristic of history-based StarPU calibrator) AC_ARG_ENABLE(calibration-heuristic, [AS_HELP_STRING([--enable-calibration-heuristic=], [Define the maximum authorized deviation of StarPU history-based calibrator.])], calibration_heuristic=$enableval, calibration_heuristic=50) AC_MSG_RESULT($calibration_heuristic) AC_DEFINE_UNQUOTED(STARPU_HISTORYMAXERROR, [$calibration_heuristic], [calibration heuristic value]) ############################################################################### # # # MP Common settings # # # ############################################################################### if test x$build_mpi_master_slave = xyes -o x$build_tcpip_master_slave = xyes; then build_master_slave=yes else build_master_slave=no fi AC_MSG_CHECKING(whether the master-slave mode should be enabled) AC_MSG_RESULT($build_master_slave) AM_CONDITIONAL([STARPU_USE_MP], test "x$build_master_slave" = "xyes") AC_ARG_ENABLE([export-dynamic], [AS_HELP_STRING([--disable-export-dynamic], [Prevent the linker from adding all symbols to the dynamic symbol table])], [], []) if test x$build_master_slave = xyes; then AC_DEFINE(STARPU_USE_MP, [1], [Message-passing SINKs support is enabled]) if test x$enable_export_dynamic != xno ; then STARPU_EXPORT_DYNAMIC="-rdynamic" fi fi AC_SUBST(STARPU_EXPORT_DYNAMIC) ############################################################################### # # # Flags for C Compiler # # # ############################################################################### IS_SUPPORTED_FLAG(-Wall) IS_SUPPORTED_CFLAG(-Werror=implicit) IS_SUPPORTED_CFLAG(-Werror=implicit-function-declaration) if test x$enable_perf_debug = xyes; then IS_SUPPORTED_FLAG(-no-pie) IS_SUPPORTED_FLAG(-no-PIE) IS_SUPPORTED_FLAG(-fno-pie) fi IS_SUPPORTED_FLAG(-Wextra) IS_SUPPORTED_FLAG(-Wunused) IS_SUPPORTED_CFLAG(-Wundef) IS_SUPPORTED_CXXFLAG(-Wundef) IS_SUPPORTED_FLAG(-Wshadow) IS_SUPPORTED_CFLAG(-Wpointer-arith) IS_SUPPORTED_CXXFLAG(-Wpointer-arith) if test "x$STARPU_DEVEL" != x; then AC_DEFINE(STARPU_DEVEL, [1], [enable developer warnings]) IS_SUPPORTED_CFLAG(-Werror=pointer-arith) IS_SUPPORTED_CXXFLAG(-Werror=pointer-arith) IS_SUPPORTED_FLAG(-fno-common) fi AM_CONDITIONAL([STARPU_DEVEL],[test "x$STARPU_DEVEL" != x]) AC_SUBST(GLOBAL_AM_CFLAGS) AC_SUBST(GLOBAL_AM_CXXFLAGS) AC_SUBST(GLOBAL_AM_FFLAGS) AC_SUBST(GLOBAL_AM_FCFLAGS) # Same value as Automake's, for use in other places. pkglibdir="\${libdir}/$PACKAGE" AC_SUBST([pkglibdir]) AC_ARG_WITH(check-flags, [AS_HELP_STRING([--with-check-flags], [Specify flags for C and Fortran compilers])], check_flags=$withval, check_flags="") if test "x$check_flags" != "x" ; then for xflag in $check_flags do IS_SUPPORTED_FLAG($xflag) done fi ######################################################################## # # # Parallel worker support # # # ######################################################################## default_enable_parallel_worker=yes if test x$starpu_darwin = xyes ; then default_enable_parallel_worker=no fi AC_ARG_ENABLE(parallel-worker, [AS_HELP_STRING([--enable-parallel-worker], [build the parallel worker support])], enable_parallel_worker=$enableval, enable_parallel_worker=$default_enable_parallel_worker) AC_MSG_CHECKING(for parallel worker support) if test x$enable_parallel_worker = xyes; then AC_DEFINE(STARPU_PARALLEL_WORKER, [1], [Define this to enable parallel worker support]) AC_OPENMP fi AM_CONDITIONAL([STARPU_PARALLEL_WORKER], [test "x$enable_parallel_worker" = "xyes"]) AC_MSG_RESULT($enable_parallel_worker) ############################################################################### # # # OpenMP LLVM runtime support # # # ############################################################################### AC_ARG_ENABLE(openmp-llvm, [AS_HELP_STRING([--enable-openmp-llvm], [build the OpenMP LLVM runtime support])], enable_openmp_llvm=$enableval, enable_openmp_llvm=no) openmp_llvm_msg="" if test x$starpu_windows = xyes ; then enable_openmp_llvm=no openmp_llvm_msg="disabled on windows" fi if test x$enable_simgrid = xyes ; then enable_openmp_llvm=no openmp_llvm_msg="incompatibility with Simgrid support" fi if test x$PROG_CLANG = x ; then enable_openmp_llvm=no openmp_llvm_msg="missing clang" fi if test x$enable_openmp_llvm = xyes; then AC_DEFINE(STARPU_OPENMP_LLVM, [1], [Define this to enable LLVM OpenMP runtime support]) # Force activating the generic OpenMP runtime support enable_openmp="yes" fi AC_MSG_CHECKING(for LLVM OpenMP runtime support) AM_CONDITIONAL([STARPU_OPENMP_LLVM], [test "x$enable_openmp_llvm" = "xyes"]) AC_MSG_RESULT($enable_openmp_llvm $openmp_llvm_msg) ############################################################################### # # # OpenMP runtime support # # # ############################################################################### AC_ARG_ENABLE(openmp, [AS_HELP_STRING([--enable-openmp], [build the OpenMP runtime support])], enable_openmp=$enableval, enable_openmp=yes) AC_CHECK_HEADER([ucontext.h],[have_valid_ucontext=yes],[have_valid_ucontext=no]) openmp_msg="" if test x$starpu_windows = xyes ; then enable_openmp=no openmp_msg="disabled on windows" fi if test x$enable_simgrid = xyes ; then enable_openmp=no openmp_msg="incompatibility with Simgrid support" fi if test x$have_valid_ucontext = xno ; then enable_openmp=no openmp_msg="ucontext.h unavailable" fi if test x$enable_openmp = xyes; then AC_DEFINE(STARPU_OPENMP, [1], [Define this to enable OpenMP runtime support]) fi AC_MSG_CHECKING(for OpenMP runtime support) AM_CONDITIONAL([STARPU_OPENMP], [test "x$enable_openmp" = "xyes"]) AC_MSG_RESULT($enable_openmp $openmp_msg) AM_CONDITIONAL([STARPU_HAVE_OPENMP],[test x$enable_simgrid = xno -a -n "$OPENMP_CFLAGS" -a x$starpu_windows != xyes]) ############################################################################### # # # SOCL interface # # # ############################################################################### AC_ARG_ENABLE([socl], [AS_HELP_STRING([--enable-socl], [build the OpenCL interface (experimental)])], [enable_socl="$enableval"], [enable_socl="maybe"]) AC_MSG_CHECKING(for SOCL) # in case SOCL was explicitly required, but is not available, this is an error if test "x$enable_socl" = "xyes" -a "$have_valid_opencl" = "no" ; then AC_MSG_ERROR([SOCL cannot be enabled without OpenCL]) fi # now we enable SOCL if and only if a proper setup is available if test "x$enable_socl" = "xyes" -o "x$enable_socl" = "xmaybe" ; then build_socl=$have_valid_opencl else build_socl=no fi AC_MSG_RESULT($build_socl) AM_CONDITIONAL([STARPU_BUILD_SOCL], [test "x$build_socl" = "xyes"]) AM_CONDITIONAL([STARPU_USE_SOCL], [test "x$build_socl" = "xyes"]) if test "$build_socl" = "yes" ; then AC_CHECK_FUNCS([clGetExtensionFunctionAddressForPlatform]) if test -n "$SOCL_OCL_LIB_OPENCL" -a -f "$SOCL_OCL_LIB_OPENCL" ; then run_socl_check=yes SOCL_OCL_LIB_OPENCL_DIR=$(dirname $SOCL_OCL_LIB_OPENCL) AC_SUBST(SOCL_OCL_LIB_OPENCL_DIR) else run_socl_check=no fi else run_socl_check=no fi ############################################################################### # # # Debugging # # # ############################################################################### AC_PATH_PROG([GDB], [gdb], [not-found]) if test "x$GDB" != "xnot-found"; then AC_DEFINE_UNQUOTED([STARPU_GDB_PATH], ["$GDB"], [Path to the GNU debugger.]) fi ############################################################################### # # # Examples # # # ############################################################################### AC_ARG_ENABLE(build-tests, [AS_HELP_STRING([--disable-build-tests], [disable building of tests])], enable_build_tests=$enableval, enable_build_tests=yes) # check stuff for tests (todo) AM_CONDITIONAL(STARPU_BUILD_TESTS, [test x$enable_build_tests != xno]) AC_ARG_ENABLE(build-examples, [AS_HELP_STRING([--disable-build-examples], [disable building of examples])], enable_build_examples=$enableval, enable_build_examples=yes) # check stuff for examples (todo) AM_CONDITIONAL(STARPU_BUILD_EXAMPLES, [test x$enable_build_examples != xno]) AC_ARG_ENABLE(opengl-render, [AS_HELP_STRING([--enable-opengl-render], [enable OpenGL rendering of some examples])], enable_opengl_render=$enableval, enable_opengl_render=no) if test x$enable_opengl_render = xyes; then STARPU_CHECK_LIB(OPENGL_RENDER, glut, glutInit,,AC_MSG_ERROR([cannot find glut])) STARPU_CHECK_LIB(OPENGL_RENDER, GL, glXCreateContext,,AC_MSG_ERROR([cannot find GL])) STARPU_CHECK_LIB(OPENGL_RENDER, GLU, gluLookAt,,AC_MSG_ERROR([cannot find GLU])) AC_DEFINE(STARPU_OPENGL_RENDER, [1], [enable OpenGL rendering of some examples]) fi AC_MSG_CHECKING(whether OpenGL rendering is enabled) AC_SUBST(STARPU_OPENGL_RENDER, $enable_opengl_render) AC_MSG_RESULT($enable_opengl_render) AM_CONDITIONAL([STARPU_HAVE_OPENGL], [test "x$enable_opengl_render" = xyes]) AC_PATH_XTRA if test "x$no_x" != "xyes"; then AC_DEFINE(STARPU_HAVE_X11, [1], [enable X11]) fi AM_CONDITIONAL([STARPU_HAVE_X11], [test "x$no_x" != "xyes"]) # In case there are BLAS kernels that are used by the example applications # we may specify which library to use. Note that this is not used for StarPU # itself. blas_lib=maybe AC_ARG_ENABLE(blas-lib, [ --enable-blas-lib[=blaslibname]: none [default]: no BLAS lib is used atlas: use ATLAS library goto: use GotoBLAS library mkl: use MKL library (you may need to set specific CFLAGS and LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags)], [ if test "x$enableval" = "xatlas" ; then blas_lib=atlas elif test "x$enableval" = "xgoto" ; then blas_lib=goto elif test "x$enableval" = "xopenblas" ; then blas_lib=openblas elif test "x$enableval" = "xnone" ; then blas_lib=none elif test "x$enableval" = "xmkl" ; then blas_lib=mkl elif test "x$enableval" = "xarmpl" ; then blas_lib=armpl elif test x$enableval = xno; then blas_lib=none else echo echo "Error!" echo "Unknown BLAS library" exit -1 fi ]) if test x$blas_lib = xmaybe -o x$blas_lib = xgoto; then AC_ARG_WITH(goto-dir, [AS_HELP_STRING([--with-goto-dir=], [specify GotoBLAS lib location])], [ blas_lib=goto gotodir=$withval AC_SUBST(GOTODIR, $gotodir) CPPFLAGS="${CPPFLAGS} -I$gotodir/ " LDFLAGS="${LDFLAGS} -L$gotodir/ " ] ) if test x$blas_lib = xgoto; then STARPU_CHECK_LIB(BLAS, gfortran, main,,) STARPU_CHECK_LIB(BLAS, ifcore, main,,) # Perhaps that GotoBLAS2 is available instead (so that we have libgotoblas2.{so,a}) STARPU_CHECK_LIB(BLAS, goto2, sgemm_,, [havegoto2=no], [$STARPU_BLAS_LDFLAGS]) if test x$havegoto2 = xno; then STARPU_CHECK_LIB(BLAS, goto, sgemm_,,AC_MSG_ERROR([cannot find goto lib]), [$STARPU_BLAS_LDFLAGS]) fi AC_DEFINE(STARPU_GOTO, [1], [use STARPU_GOTO library]) fi fi if test x$blas_lib = xmaybe -o x$blas_lib = xatlas; then AC_ARG_WITH(atlas-dir, [AS_HELP_STRING([--with-atlas-dir=], [specify ATLAS lib location])], [ AC_MSG_CHECKING(STARPU_ATLAS location) blas_lib=atlas atlasdir=$withval AC_MSG_RESULT($atlasdir) AC_SUBST(ATLASDIR, $atlasdir) CPPFLAGS="${CPPFLAGS} -I$atlasdir/include/ " LDFLAGS="${LDFLAGS} -L$atlasdir/lib/ " ] ) if test x$blas_lib = xatlas; then # test whether STARPU_ATLAS is actually available AC_CHECK_HEADER([cblas.h],,AC_MSG_ERROR([cannot find atlas headers])) STARPU_CHECK_LIB(BLAS, atlas, ATL_sgemm,,AC_MSG_ERROR([cannot find atlas lib]),) STARPU_CHECK_LIB(BLAS, cblas, cblas_sgemm,,AC_MSG_ERROR([cannot find atlas lib]),[-latlas]) STARPU_CHECK_LIB(BLAS, f77blas, sgemm_,,AC_MSG_ERROR([cannot find f77blas lib]),) AC_DEFINE(STARPU_ATLAS, [1], [use STARPU_ATLAS library]) fi fi if test x$blas_lib = xmaybe -o x$blas_lib = xopenblas; then PKG_CHECK_MODULES([OPENBLAS], [openblas], [PKG_CHECK_MODULES([BLAS_OPENBLAS], [blas-openblas], [AC_DEFINE([STARPU_OPENBLAS], [1], [Define to 1 if you use the openblas library.]) AC_SUBST([STARPU_OPENBLAS], [1]) CFLAGS="${CFLAGS} ${OPENBLAS_CFLAGS} ${BLAS_OPENBLAS_CFLAGS} " LIBS="${LIBS} ${OPENBLAS_LIBS} ${BLAS_OPENBLAS_LIBS} " blas_lib=openblas ], [ if test x$blas_lib = xopenblas; then STARPU_CHECK_LIB(OPENBLAS, blas-openblas, cblas_sgemm,,AC_MSG_ERROR([cannot find blas-openblas lib]),[-lblas-openblas]) AC_DEFINE([STARPU_OPENBLAS], [1], [Define to 1 if you use the openblas library.]) AC_SUBST([STARPU_OPENBLAS], [1]) fi ]) ], [ if test x$blas_lib = xopenblas; then STARPU_CHECK_LIB(OPENBLAS, openblas, cblas_sgemm,,AC_MSG_ERROR([cannot find openblas lib]),[-lopenblas]) AC_DEFINE([STARPU_OPENBLAS], [1], [Define to 1 if you use the openblas library.]) AC_SUBST([STARPU_OPENBLAS], [1]) fi ] ) fi if test x$blas_lib = xmaybe -o x$blas_lib = xmkl; then # Should we use MKL ? if test -n "$MKLROOT" ; then CPPFLAGS="${CPPFLAGS} -I$MKLROOT/include" case $host_vendor in *1om) mkl_plat=mic ;; *) mkl_plat=intel64 ;; esac SAVED_LIBS=$LIBS STARPU_BLAS_LDFLAGS="-L$MKLROOT/lib/$mkl_plat -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lm -lpthread -ldl" LIBS="$LIBS $STARPU_BLAS_LDFLAGS" AC_LINK_IFELSE( [AC_LANG_PROGRAM([[ #include ]], [[ ]])], [ blas_lib=mkl ], [ STARPU_BLAS_LDFLAGS="" ], ) LIBS=$SAVED_LIBS fi AC_ARG_WITH(mkl-cflags, [AS_HELP_STRING([--with-mkl-cflags], [specify MKL compilation flags])], [ CPPFLAGS="${CPPFLAGS} $withval" blas_lib=mkl ]) AC_ARG_WITH(mkl-ldflags, [AS_HELP_STRING([--with-mkl-ldflags], [specify MKL linking flags])], [ STARPU_BLAS_LDFLAGS="$withval" blas_lib=mkl ]) if test x$blas_lib = xmkl; then AC_DEFINE(STARPU_MKL, [1], [use MKL library]) fi fi if test x$blas_lib = xmaybe -o x$blas_lib = xarmpl; then # Should we use ARMPL ? if test -n "$ARMPL_DIR" ; then CPPFLAGS="${CPPFLAGS} -I$ARMPL_INCLUDES" SAVED_LIBS=$LIBS STARPU_BLAS_LDFLAGS="-L$ARMPL_LIBRARIES -larmpl_lp64 -lgfortran -lm" LIBS="$LIBS $STARPU_BLAS_LDFLAGS" AC_LINK_IFELSE( [AC_LANG_PROGRAM([[ #include ]], [[ ]])], [ blas_lib=armpl ], [ STARPU_BLAS_LDFLAGS="" ], ) LIBS=$SAVED_LIBS fi AC_ARG_WITH(armpl-cflags, [AS_HELP_STRING([--with-armpl-cflags], [specify ARMPL compilation flags])], [ CPPFLAGS="${CPPFLAGS} $withval" blas_lib=armpl ]) AC_ARG_WITH(armpl-ldflags, [AS_HELP_STRING([--with-armpl-ldflags], [specify ARMPL linking flags])], [ STARPU_BLAS_LDFLAGS="$withval" blas_lib=armpl ]) if test x$blas_lib = xarmpl; then AC_DEFINE(STARPU_ARMPL, [1], [use ARMPL library]) fi fi if test x$blas_lib = xmaybe; then #perhaps it is possible to use some BLAS lib from the system use_system_blas=no STARPU_SEARCH_LIBS(BLAS,[sgemm_],[blas],use_system_blas=yes,,) if test x$use_system_blas = xyes; then AC_DEFINE(STARPU_SYSTEM_BLAS, [1], [use refblas library]) blas_lib=system elif test x"$BLAS_LIBS" != x; then AC_DEFINE(STARPU_SYSTEM_BLAS, [1], [use user defined library]) STARPU_BLAS_LDFLAGS="$BLAS_LIBS" blas_lib=system AC_ARG_VAR([BLAS_LIBS], [linker flags for blas]) else blas_lib=none fi fi if test x$blas_lib = xsystem; then AC_CHECK_HEADER([cblas.h], [have_cblas_h=yes], [have_cblas_h=no]) fi AM_CONDITIONAL(STARPU_HAVE_CBLAS_H, test x$have_cblas_h = xyes) if test x$have_cblas_h = xyes; then AC_DEFINE(STARPU_HAVE_CBLAS_H, [1], [The blas library has blas.h]) fi if test x$blas_lib != xnone; then AC_DEFINE(STARPU_HAVE_BLAS, [1], [The blas library is available]) SAVED_LIBS="$LIBS" LIBS="$LIBS -lblas" AC_CHECK_FUNCS([cblas_sgemv]) LIBS="$SAVED_LIBS" STARPU_SEARCH_LIBS([LIBLAPACK],[dgels_],[lapack],[enable_liblapack=yes],[enable_liblapack=no]) fi AM_CONDITIONAL(STARPU_HAVE_LIBLAPACK,test x$enable_liblapack = xyes) AM_CONDITIONAL(STARPU_HAVE_CBLAS_SGEMV, test x$HAVE_CBLAS_SGEMV = x1) AM_CONDITIONAL(STARPU_ATLAS_BLAS_LIB, test x$blas_lib = xatlas) AM_CONDITIONAL(STARPU_GOTO_BLAS_LIB, test x$blas_lib = xgoto) AM_CONDITIONAL(STARPU_MKL_BLAS_LIB, test x$blas_lib = xmkl) AM_CONDITIONAL(STARPU_SYSTEM_BLAS_LIB, test x$blas_lib = xsystem) AM_CONDITIONAL(STARPU_NO_BLAS_LIB, test x$blas_lib = xnone -a x$enable_simgrid = xno) AC_SUBST(STARPU_BLAS_LDFLAGS) AC_MSG_CHECKING(which BLAS lib should be used) AC_MSG_RESULT($blas_lib) AC_SUBST(BLAS_LIB,$blas_lib) ############################################################################### # # # Multiple linear regression # # # ############################################################################### AC_ARG_ENABLE(mlr, [AS_HELP_STRING([--enable-mlr], [Enable multiple linear regression models])], enable_mlr=$enableval, enable_mlr=no) AC_ARG_ENABLE(mlr-system-blas, [AS_HELP_STRING([--enable-mlr-system-blas], [Make the multiple linear regression models use the system BLAS instead of min-dgels])], enable_mlr_blas=$enableval, enable_mlr_blas=no) AC_MSG_CHECKING(whether multiple linear regression models are disabled) if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then AC_MSG_RESULT(no) install_min_dgels=no support_mlr=yes STARPU_SEARCH_LIBS(LAPACK,[dgels_],[lapack],use_system_lapack=yes,,) if test x$blas_lib = xnone ; then use_system_lapack=no fi if test x$enable_mlr_blas = xyes -a x$use_system_lapack = xyes; then AC_DEFINE(STARPU_MLR_MODEL, [1], [use reflapack library]) LDFLAGS="-llapack $LDFLAGS" else if test x$enable_mlr_blas = xyes -a x$blas_lib = xmkl; then AC_DEFINE(STARPU_MLR_MODEL, [1], [use mkl library]) else AC_MSG_CHECKING(whether min-dgels is linked) if test x"$DGELS_LIBS" != x; then AC_MSG_RESULT(yes) AC_DEFINE(STARPU_MLR_MODEL, [1], [use user defined library]) AC_ARG_VAR([DGELS_LIBS], [linker flags for lapack dgels]) else AC_MSG_RESULT(no) AC_MSG_CHECKING(min-dgels source) if test "${cross_compiling}" != "no" ; then # Cross-compiling is not supported by min-dgels AC_MSG_RESULT(no) install_min_dgels=no support_mlr=no else AC_MSG_RESULT(yes) DGELS_LIBS="-Wl,--start-group $STARPU_BUILD_DIR/min-dgels/build/minlibblas.a $STARPU_BUILD_DIR/min-dgels/build/minlibdgels.a $STARPU_BUILD_DIR/min-dgels/build/minlibf2c.a -Wl,--end-group" AC_DEFINE(STARPU_MLR_MODEL, [1], [use user defined library]) AC_DEFINE(STARPU_BUILT_IN_MIN_DGELS, [1], [use built-in min_dgels]) AC_ARG_VAR([DGELS_LIBS], [linker flags for lapack dgels]) install_min_dgels=yes fi fi fi fi else AC_MSG_RESULT(yes) install_min_dgels=no support_mlr=no fi AM_CONDITIONAL(STARPU_USE_MIN_DGELS, test x$install_min_dgels = xyes) ########################################## # FFT # ########################################## have_fftw=no have_fftwf=no have_fftwl=no fft_support=no AC_ARG_ENABLE(starpufft, [AS_HELP_STRING([--disable-starpufft], [Disable build of StarPU-FFT])], enable_starpufft=$enableval,enable_starpufft=yes) PKG_CHECK_MODULES([FFTW], [fftw3], [ AC_DEFINE([STARPU_HAVE_FFTW], [1], [Define to 1 if you have the libfftw3 library.]) AC_SUBST([STARPU_HAVE_FFTW], [1]) have_fftw=yes ], [:]) AM_CONDITIONAL(STARPU_HAVE_FFTW, [test x$have_fftw = xyes]) PKG_CHECK_MODULES([FFTWF], [fftw3f], [ AC_DEFINE([STARPU_HAVE_FFTWF], [1], [Define to 1 if you have the libfftw3f library.]) AC_SUBST([STARPU_HAVE_FFTWF], [1]) have_fftwf=yes ], [:]) AM_CONDITIONAL(STARPU_HAVE_FFTWF, [test x$have_fftwf = xyes]) PKG_CHECK_MODULES([FFTWL], [fftw3l], [ AC_DEFINE([STARPU_HAVE_FFTWL], [1], [Define to 1 if you have the libfftw3l library.]) AC_SUBST([HAVE_FFTWFL], [1]) have_fftwl=yes ], [:]) AM_CONDITIONAL(STARPU_HAVE_FFTWL, [test x$have_fftwl = xyes]) if test x$enable_starpufft = xyes -a \( \( x$enable_cpu = xyes -a x$have_fftw = xyes -a x$have_fftwf = xyes \) -o x$have_cufftdoublecomplex = xyes \); then fft_support=yes fi AM_CONDITIONAL(STARPU_BUILD_STARPUFFT, [test x$fft_support = xyes]) AC_ARG_ENABLE(starpufft-examples, [AS_HELP_STRING([--enable-starpufft-examples], [enable build of StarPU FFT examples])], enable_starpufft_examples=$enableval, enable_starpufft_examples=no) AM_CONDITIONAL(STARPU_BUILD_STARPUFFT_EXAMPLES, [test x$enable_starpufft_examples = xyes]) ########################################## # hwloc # ########################################## have_valid_hwloc=no SAVED_LIBS="${LIBS}" SAVED_CPPFLAGS="${CPPFLAGS}" SAVED_PKG_CONFIG_PATH="$PKG_CONFIG_PATH" AC_ARG_WITH([hwloc], [AS_HELP_STRING([--without-hwloc], [Disable hwloc (enabled by default)])], [ if test x$withval != xno; then if test "$withval" = "yes" ; then use_hwloc=yes else # use specified path if test ! -d "$withval" ; then AC_MSG_ERROR("Directory specified for hwloc <$withval> does not exist") fi if test -d "$withval/lib64/pkgconfig" ; then export PKG_CONFIG_PATH=$withval/lib64/pkgconfig:$PKG_CONFIG_PATH else if test -d "$withval/lib/pkgconfig" ; then export PKG_CONFIG_PATH=$withval/lib/pkgconfig:$PKG_CONFIG_PATH else AC_MSG_ERROR("Hwloc directory <$withval> does not have a subdirectory lib/pkgconfig or lib64/pkgconfig") fi fi use_hwloc=yes fi else use_hwloc=no fi ], [ use_hwloc=maybe ]) AS_IF([test "$use_hwloc" != "no"], [PKG_CHECK_MODULES([HWLOC],[hwloc], [have_valid_hwloc=yes], [have_valid_hwloc=no])] ) AM_CONDITIONAL(STARPU_HAVE_HWLOC, test "x$have_valid_hwloc" = "xyes") # in case hwloc was explicitly required, but is not available, this is an error AS_IF([test "$use_hwloc" = "yes" -a "$have_valid_hwloc" = "no"], [AC_MSG_ERROR([cannot find hwloc or pkg-config])] ) # in case hwloc is not available but was not explicitly disabled, this is an error AS_IF([test "$have_valid_hwloc" = "no" -a "$use_hwloc" != "no"], [AC_MSG_ERROR([libhwloc or pkg-config was not found on your system. If the target machine is hyperthreaded the performance may be impacted a lot. It is strongly recommended to install libhwloc and pkg-config. However, if you really want to use StarPU without enabling libhwloc, please restart configure by specifying the option '--without-hwloc'.])] ) LIBS="${HWLOC_LIBS} ${SAVED_LIBS}" CPPFLAGS="${HWLOC_CFLAGS} ${SAVED_CPPFLAGS}" AS_IF([test "$have_valid_hwloc" = "yes"], [AC_DEFINE([STARPU_HAVE_HWLOC], [1], [Define to 1 if you have the hwloc library.]) HWLOC_REQUIRES=hwloc AC_SUBST([STARPU_HAVE_HWLOC], [1]) AC_CHECK_DECLS([hwloc_cuda_get_device_osdev_by_index], [], [], [[#include ]]) AC_CHECK_DECLS([hwloc_hip_get_device_osdev_by_index], [], [], [[#include ]]) AC_CHECK_DECLS([hwloc_distances_obj_pair_values], [], [], [[#include ]]) ]) AC_CHECK_FUNCS([hwloc_topology_dup]) AC_CHECK_FUNCS([hwloc_topology_set_components]) AC_CHECK_FUNCS([hwloc_cpukinds_get_nr]) AC_CHECK_FUNCS([hwloc_get_area_memlocation]) AM_CONDITIONAL(STARPU_HWLOC_HAVE_TOPOLOGY_DUP, test $ac_cv_func_hwloc_topology_dup = yes) LIBS="${SAVED_LIBS}" CPPFLAGS="${SAVED_CPPFLAGS}" export PKG_CONFIG_PATH=$SAVED_PKG_CONFIG_PATH AC_MSG_CHECKING(whether hwloc should be used) AC_MSG_RESULT($have_valid_hwloc) AC_SUBST(HWLOC_REQUIRES) # is the header file f77.h available ? AC_CHECK_HEADER([f77.h], [have_f77_h=yes], [have_f77_h=no]) AC_SUBST(STARPU_HAVE_F77_H, $have_f77_h) AM_CONDITIONAL(STARPU_HAVE_F77_H, test x$have_f77_h = xyes) if test x$have_f77_h = xyes; then AC_DEFINE([STARPU_HAVE_F77_H], [1], [Define to 1 if you have the header file.]) fi AC_ARG_WITH(icc, [AS_HELP_STRING([--with-icc=], [Name or path of the icc compiler])], icc_path="$withval",icc_path="") AC_ARG_WITH(icc-args, [AS_HELP_STRING([--with-icc-args[=]], [Arguments for icc])], [icc_args=$withval]) AC_SUBST(ICC_ARGS,$icc_args) AC_ARG_ENABLE(icc, [AS_HELP_STRING([--enable-icc], [Enable the compilation of specific ICC examples])], enable_icc=$enableval, enable_icc=yes) ICC="" if test "$enable_icc" = "yes" ; then if test "$icc_path" != "" ; then ICC="$icc_path" else # Check if icc is available AC_PATH_PROG([ICC], [icc]) fi fi if test ! -x "$ICC"; then AC_MSG_RESULT(The ICC compiler '$ICC' does not have the execute permission) enable_icc=no ICC="" fi # If cuda and icc are both available, check they are compatible if test "$enable_cuda" = "yes" -a "$ICC" != ""; then AC_MSG_CHECKING(whether CUDA and ICC are compatible) OLD_CC="$CC" CC="$ICC" OLD_CFLAGS="$CFLAGS" CFLAGS="-I$PWD/include -I$srcdir/include" AC_COMPILE_IFELSE( [AC_LANG_PROGRAM( [[#include #include ]], [[]] )], AC_MSG_RESULT(yes), [ICC="" AC_MSG_RESULT(no)] ) CC="$OLD_CC" CFLAGS="$OLD_CFLAGS" fi # Disable ICC on windows if test "x$ICC" != "x" -a "$starpu_windows" = "yes" ; then ICC="" fi if test "x$ICC" != "x"; then AC_DEFINE(STARPU_HAVE_ICC, [1], [Define this if icc is available]) fi AM_CONDITIONAL([STARPU_HAVE_ICC], [test "x$ICC" != "x"]) # Do not generate manpages for the tools if we do not have help2man AC_CHECK_PROGS([HELP2MAN], [help2man]) # Disable on windows if test "$starpu_windows" = "yes" ; then HELP2MAN="" fi AM_CONDITIONAL([STARPU_HAVE_HELP2MAN], [test "x$HELP2MAN" != "x"]) AC_CHECK_MEMBER([struct cudaDeviceProp.pciDomainID], AC_DEFINE([STARPU_HAVE_DOMAINID],[1],[Define to 1 if CUDA device properties include DomainID]), , [[#include ]]) AC_CHECK_MEMBER([struct cudaDeviceProp.pciBusID], AC_DEFINE([STARPU_HAVE_BUSID],[1],[Define to 1 if CUDA device properties include BusID]), , [[#include ]]) dnl Set this condition when Automake 1.11 or later is being used. dnl Automake 1.11 introduced `silent-rules', hence the check. m4_ifdef([AM_SILENT_RULES], AM_CONDITIONAL([STARPU_HAVE_AM111], [true]), AM_CONDITIONAL([STARPU_HAVE_AM111], [false])) ########################################## # Resource Manager # ########################################## starpurm_support=no starpurm_dlb_support=no AC_ARG_ENABLE(starpurm, [AS_HELP_STRING([--enable-starpurm], [enable resource management support])], enable_starpurm=$enableval, enable_starpurm=no) if test "x$enable_starpurm" != xno then starpurm_support=yes AC_MSG_CHECKING(whether resource management debug messages should be displayed) AC_ARG_ENABLE(starpurm-verbose, [AS_HELP_STRING([--enable-starpurm-verbose], [display resource management verbose debug messages])], enable_starpurm_verbose=$enableval, enable_starpurm_verbose=no) AC_MSG_RESULT($enable_starpurm_verbose) if test x$enable_starpurm_verbose = xyes; then AC_DEFINE(STARPURM_VERBOSE, [1], [display resource management verbose debug messages]) fi # DLB DLB_CFLAGS="" DLB_LIBS="" AC_ARG_ENABLE(dlb, [AS_HELP_STRING([--enable-dlb], [enable DLB support])], enable_dlb=$enableval, enable_dlb=no) if test "x$enable_dlb" != xno then AC_ARG_WITH(dlb-include-dir, [AS_HELP_STRING([--with-dlb-include-dir=], [specify where DLB headers are installed])], [dlb_inc_dirs="$withval"], [dlb_inc_dirs=""]) dlb_inc_dirs="${dlb_inc_dirs} /usr/include/dlb" dlb_incdir_found=no for dlb_incdir in $dlb_inc_dirs do if test -n "$dlb_incdir" then SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS=-I${dlb_incdir} AC_CHECK_HEADERS([dlb.h]) if test "$ac_cv_header_dlb_h" = "yes" then CPPFLAGS="-I$dlb_incdir ${SAVED_CPPFLAGS}" DLB_CFLAGS="-I${dlb_incdir}" dlb_incdir_found=yes break else CPPFLAGS=${SAVED_CPPFLAGS} fi unset ac_cv_header_dlb_h fi done AC_ARG_WITH(dlb-lib-dir, [AS_HELP_STRING([--with-dlb-lib-dir=], [specify where DLB libraries are installed])], [dlb_lib_dirs="$withval"], [dlb_lib_dirs=""]) dlb_lib_dirs="${dlb_lib_dirs} /usr/lib/dlb" dlb_libdir_found=no for dlb_libdir in $dlb_lib_dirs do if test -n "$dlb_libdir" then SAVED_LDFLAGS="${LDFLAGS}" LDFLAGS=-L${dlb_libdir} AC_CHECK_LIB(dlb, [DLB_Init]) if test "$ac_cv_lib_dlb_DLB_Init" = "yes" then LDFLAGS="-L${dlb_libdir} ${SAVED_LDFLAGS} ${STARPU_DLB_LDFLAGS}" DLB_LIBS="-L${dlb_libdir} -ldlb" dlb_libdir_found=yes break else LDFLAGS=${SAVED_LDFLAGS} fi unset ac_cv_lib_dlb_DLB_Init fi done SAVED_CPPFLAGS="${CPPFLAGS}" SAVED_CFLAGS="${CFLAGS}" SAVED_LDFLAGS="${LDFLAGS}" CPPFLAGS="$HWLOC_CPPFLAGS -D_GNU_SOURCE $CPPFLAGS" CFLAGS="$HWLOC_CFLAGS $CFLAGS" LIBS="$HWLOC_LIBS $LIBS" # check whether libhwloc has a dedicated glibc-sched.h include for conversion with glibc cpusets AC_CHECK_HEADERS([hwloc/glibc-sched.h]) CPPFLAGS="$SAVED_CPPFLAGS" CFLAGS="$SAVED_CFLAGS" LIBS="$SAVED_LIBS" SAVED_CPPFLAGS="${CPPFLAGS}" SAVED_CFLAGS="${CFLAGS}" SAVED_LDFLAGS="${LDFLAGS}" CPPFLAGS="$STARPU_CPPFLAGS $CPPFLAGS" CFLAGS="$STARPU_CFLAGS $CFLAGS" LIBS="$STARPU_LIBS $LIBS" # check if StarPU implements starpu_worker_set_going_to_sleep_callback() if test x$enable_worker_cb = xyes ; then AC_DEFINE([STARPURM_STARPU_HAVE_WORKER_CALLBACKS], [1], [Define to 1 if StarPU has support for worker callbacks.]) fi #AC_CHECK_FUNC([starpu_worker_set_going_to_sleep_callback],AC_DEFINE([STARPURM_STARPU_HAVE_WORKER_CALLBACKS], [1], [Define to 1 if StarPU has support for worker callbacks.])) CPPFLAGS="$SAVED_CPPFLAGS" CFLAGS="$SAVED_CFLAGS" LIBS="$SAVED_LIBS" if test "x$dlb_incdir_found" != "xyes" -o "x$dlb_libdir_found" != "xyes" then enable_dlb=no fi fi AC_MSG_CHECKING(whether DLB support should be enabled) AC_MSG_RESULT($enable_dlb) if test "x$enable_dlb" != "xno" then AC_DEFINE([STARPURM_HAVE_DLB], [1], [Define to 1 if dlb support is enabled.]) starpurm_dlb_support=yes AC_MSG_CHECKING(whether DLB resource management debug messages should be displayed) AC_ARG_ENABLE(starpurm-dlb-verbose, [AS_HELP_STRING([--enable-starpurm-dlb-verbose], [display resource management verbose debug messages])], enable_starpurm_dlb_verbose=$enableval, enable_starpurm_dlb_verbose=no) AC_MSG_RESULT($enable_starpurm_dlb_verbose) if test x$enable_starpurm_dlb_verbose = xyes; then AC_DEFINE(STARPURM_DLB_VERBOSE, [1], [display DLB resource management verbose debug messages]) fi AX_DLB_CALLBACK_ARG() fi AC_SUBST(DLB_CFLAGS) AC_SUBST(DLB_LIBS) fi AM_CONDITIONAL(STARPURM_HAVE_DLB, test x$starpurm_dlb_support = "xyes") AM_CONDITIONAL(STARPU_BUILD_STARPURM, [test x$starpurm_support = xyes]) AC_ARG_ENABLE(starpurm-examples, [AS_HELP_STRING([--enable-starpurm-examples], [enable build of StarPU Resource Manager examples])], enable_starpurm_examples=$enableval, enable_starpurm_examples=no) AM_CONDITIONAL(STARPU_BUILD_STARPURM_EXAMPLES, [test x$enable_starpurm_examples = xyes]) ##################################### # StarPUPy # ##################################### starpupy_support=no if test "x$enable_starpupy" != xno then AC_CHECK_PROGS([PYTHON], python3) if test "$ac_cv_prog_PYTHON" = "" then if test "x$enable_starpupy" = xyes ; then AC_MSG_ERROR([python3 missing, cannot build StarPU python interface]) else AC_MSG_WARN([python3 missing, cannot build StarPU python interface]) enable_starpupy=no fi fi fi if test "x$enable_starpupy" != xno then AC_SUBST(PYTHON) AC_MSG_CHECKING(for python3 version) PYTHON_VERSION=$(echo "import sys ; print(str(sys.version_info.major)+\".\"+str(sys.version_info.minor))" | $PYTHON) AC_MSG_RESULT($PYTHON_VERSION) AC_SUBST(PYTHON_VERSION) PYTHON_INCLUDE_DIRS="`$PYTHON -c "from sysconfig import get_paths as gp; print(gp()@<:@'include'@:>@)"`" SAVED_CPPFLAGS="${CPPFLAGS}" CPPFLAGS="$CPPFLAGS -I$PYTHON_INCLUDE_DIRS" AC_CHECK_HEADERS([Python.h],[have_python_h=yes],[have_python_h=no]) if test "$have_python_h" = "no" ; then if test "x$enable_starpupy" = xyes ; then AC_MSG_ERROR([Python.h missing, cannot build StarPU python interface (consider installing python-dev)]) else AC_MSG_WARN([Python.h missing, cannot build StarPU python interface (consider installing python-dev)]) enable_starpupy=no fi fi fi if test "x$enable_starpupy" != xno then AC_CHECK_LIB([python$PYTHON_VERSION], [PyErr_Print], [have_python_lib=yes], [have_python_lib=no]) if test "$have_python_lib" = "no" ; then if test "x$enable_starpupy" = xyes ; then AC_MSG_ERROR([Python library missing, cannot build StarPU python interface (consider installing python-dev)]) else AC_MSG_WARN([Python library missing, cannot build StarPU python interface (consider installing python-dev)]) enable_starpupy=no fi fi fi if test "x$enable_starpupy" != xno then AC_MSG_CHECKING(for python3 setuptools) if $PYTHON -c "import setuptools" ; then AC_MSG_RESULT(yes) else AC_MSG_RESULT(no) if test "x$enable_starpupy" = xyes ; then AC_MSG_ERROR([setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)]) else AC_MSG_WARN([setuptools missing, cannot install StarPU python interface (consider installing python-setuptools)]) enable_starpupy=no fi fi fi if test "x$enable_starpupy" != xno then CPPFLAGS=${SAVED_CPPFLAGS} AC_MSG_CHECKING(for python3 module joblib) AC_PYTHON_MODULE(joblib,[joblib_avail=yes],[joblib_avail=no]) AC_MSG_RESULT($joblib_avail) if test "$joblib_avail" = "yes" ; then AC_DEFINE(STARPU_PYTHON_HAVE_JOBLIB, [1], [Python joblib package available]) else AC_MSG_WARN([python3 module joblib missing, cannot build full StarPU python interface (consider running 'pip3 install joblib')]) fi AC_MSG_CHECKING(for python3 module cloudpickle) AC_PYTHON_MODULE(cloudpickle,[cloudpickle_avail=yes],[cloudpickle_avail=no]) AC_MSG_RESULT($cloudpickle_avail) if test "$cloudpickle_avail" = "yes" ; then AC_DEFINE(STARPU_PYTHON_HAVE_CLOUDPICKLE, [1], [Python cloudpickle package available]) else AC_MSG_WARN([python3 module cloudpickle missing, cannot build full StarPU python interface (consider running 'pip3 install cloudpickle')]) fi starpupy_support=yes AC_MSG_CHECKING(for python3 module numpy) AC_PYTHON_MODULE(numpy,[numpy_avail=yes],[numpy_avail=no]) AC_MSG_RESULT($numpy_avail) PYTHON_NUMPY_DIR="" if test "$numpy_avail" = "yes" ; then AC_DEFINE(STARPU_PYTHON_HAVE_NUMPY, [1], [Python3 numpy package available]) PYTHON_NUMPY_DIR="`$PYTHON -c "import numpy ; print(numpy.get_include())"`" fi AC_SUBST(PYTHON_NUMPY_DIR) PYTHON_SETUP_OPTIONS="" if test x$enable_debug = xyes ; then PYTHON_SETUP_OPTIONS="--debug" fi AC_SUBST(PYTHON_SETUP_OPTIONS) fi AM_CONDITIONAL(STARPU_BUILD_STARPUPY, [test x$starpupy_support = xyes]) AM_CONDITIONAL(STARPU_STARPUPY_NUMPY, [test x$numpy_avail = xyes]) AC_ARG_VAR([PYTHON], [Python3 interpreter]) ########################################## # Documentation # ########################################## def_enable_build_doc="yes" available_doc="no" if test -d "$srcdir/doc/doxygen/html" ; then def_enable_build_doc="no" available_doc="yes" fi if test "$starpu_darwin" = "yes" ; then def_enable_build_doc="no" fi AC_ARG_ENABLE(build-doc, [AS_HELP_STRING([--disable-build-doc], [disable building of documentation])], enable_build_doc=$enableval, enable_build_doc=$def_enable_build_doc) AC_ARG_ENABLE(build-doc-pdf, [AS_HELP_STRING([--enable-build-doc-pdf], [enable building of PDF documentation])], enable_build_doc_pdf=$enableval, enable_build_doc_pdf=no) available_doc_pdf="no" if test -f "$srcdir/doc/doxygen/starpu.pdf" ; then enable_build_doc_pdf="no" available_doc_pdf="yes" fi # Check whether doxygen needed tools are installed AC_PATH_PROG(doxygencommand, doxygen) if test "$doxygencommand" = "" ; then if test "$enable_build_doc_pdf" = "yes" ; then AC_MSG_ERROR([doxygen missing, cannot build documentation PDF]) fi enable_build_doc="no" enable_build_doc_pdf="no" fi AC_PATH_PROG(pdflatexcommand, pdflatex) if test "$pdflatexcommand" = "" ; then if test "$enable_build_doc_pdf" = "yes" ; then AC_MSG_ERROR([pdflatex missing, cannot build documentation PDF]) fi enable_build_doc_pdf="no" fi AC_PATH_PROG(epstopdfcommand, epstopdf) if test "$epstopdfcommand" = "" ; then if test "$enable_build_doc_pdf" = "yes" ; then AC_MSG_ERROR([epstopdf missing, cannot build documentation PDF]) fi enable_build_doc_pdf="no" fi AC_MSG_CHECKING(whether HTML documentation should be compiled) AC_MSG_RESULT($enable_build_doc) AC_MSG_CHECKING(whether HTML documentation is available) AC_MSG_RESULT($available_doc) AC_MSG_CHECKING(whether PDF documentation should be compiled) AC_MSG_RESULT($enable_build_doc_pdf) AC_MSG_CHECKING(whether PDF documentation is available) AC_MSG_RESULT($available_doc_pdf) AM_CONDITIONAL(STARPU_BUILD_DOC, [test x$enable_build_doc != xno]) AM_CONDITIONAL(STARPU_AVAILABLE_DOC, [test x$available_doc != xno]) AM_CONDITIONAL(STARPU_BUILD_DOC_PDF, [test x$enable_build_doc_pdf != xno]) AM_CONDITIONAL(STARPU_AVAILABLE_DOC_PDF, [test x$available_doc_pdf != xno]) if test x$enable_build_doc_pdf != xno ; then DOC_GENERATE_LATEX=YES else DOC_GENERATE_LATEX=NO fi AC_SUBST(DOC_GENERATE_LATEX) ############################################################################### # # # Julia # # # ############################################################################### AC_ARG_ENABLE(julia, [AS_HELP_STRING([--enable-julia], [enable the Julia extension])], enable_julia=$enableval, enable_julia=no) if test "$enable_julia" = "yes" ; then # Check whether the julia compiler is available AC_PATH_PROG(juliapath, julia) AC_MSG_CHECKING(whether julia is available) AC_MSG_RESULT($juliapath) if test ! -x "$juliapath" ; then AC_MSG_ERROR(Julia compiler '$juliapath' is not valid) enable_julia=no fi fi AM_CONDITIONAL([STARPU_USE_JULIA], [test "x$enable_julia" = "xyes"]) AC_SUBST(JULIA, $juliapath) ############################################################################### # # # Eclipse Plugin # # # ############################################################################### AC_ARG_ENABLE(eclipse-plugin, [AS_HELP_STRING([--enable-eclipse-plugin], [Build the Eclipse plugin])], enable_eclipse_plugin=$enableval, enable_eclipse_plugin=no) if test "$enable_eclipse_plugin" = "yes" ; then AC_PATH_PROG(eclipsepath, eclipse) AC_MSG_CHECKING(whether eclipse is available) AC_MSG_RESULT($eclipsepath) if test ! -x "$eclipsepath" ; then AC_MSG_ERROR(Eclipse executable '$eclipsepath' is not valid) enable_eclipse_plugin=no fi libs=$(for x in starpu-$STARPU_EFFECTIVE_VERSION $(echo $STARPU_EXPORTED_LIBS | sed 's/-l//g') $HWLOC_REQUIRES ; do echo $x ; done) option_libs=$($srcdir/eclipse-plugin/tools/cproject.sh option $libs) module_libs=$($srcdir/eclipse-plugin/tools/cproject.sh module $libs) fi AM_CONDITIONAL([STARPU_BUILD_ECLIPSE_PLUGIN], [test "x$enable_eclipse_plugin" = "xyes"]) AC_SUBST(ECLIPSE, $eclipsepath) AC_SUBST(STARPU_INCLUDE_PATH, $(eval echo ${includedir}/starpu/$STARPU_EFFECTIVE_VERSION)) AC_SUBST(STARPU_LIB_PATH, $(eval echo ${prefix}/lib)) AC_SUBST(STARPU_MODULE_LIBS, "$module_libs") AC_SUBST(STARPU_OPTION_LIBS, "$option_libs") ############################################################################### # # # Final settings # # # ############################################################################### if test x$enable_simgrid = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then AC_MSG_ERROR([Cuda0 not supported with simgrid]) fi if test x$enable_opencl = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then AC_MSG_ERROR([Cuda0 not supported with OpenCL]) fi if test x$enable_openmp = xyes -a \( x$enable_cuda0 = xyes -o x$enable_cuda1 = xyes \) ; then AC_MSG_ERROR([Cuda0 not supported with OpenMP]) fi CPPFLAGS="$CPPFLAGS -DSTARPU_SAMPLING_DIR=\"\\\"${datarootdir}/starpu/perfmodels/sampling\\\"\"" STARPU_BASIC_H_CPPFLAGS="$HWLOC_CFLAGS $STARPU_CUDA_CPPFLAGS $STARPU_HIP_CPPFLAGS $STARPU_OPENCL_CPPFLAGS $STARPU_MAX_FPGA_CPPFLAGS $SIMGRID_CFLAGS $PAPI_CFLAGS" # these are the flags needed to compile starpu.h STARPU_H_CPPFLAGS="$STARPU_BASIC_H_CPPFLAGS" AC_SUBST([STARPU_H_CPPFLAGS]) STARPU_NVCC_H_CPPFLAGS="$STARPU_BASIC_H_CPPFLAGS" AC_SUBST([STARPU_NVCC_H_CPPFLAGS]) # these are the flags needed for linking libstarpu (and thus also for static linking) LIBSTARPU_LDFLAGS="$STARPU_OPENCL_LDFLAGS $STARPU_CUDA_LDFLAGS $STARPU_HIP_LDFLAGS $HWLOC_LIBS $FXT_LDFLAGS $FXT_LIBS $PAPI_LIBS $STARPU_GLPK_LDFLAGS $STARPU_LEVELDB_LDFLAGS $SIMGRID_LDFLAGS $STARPU_BLAS_LDFLAGS $DGELS_LIBS $STARPU_MAX_FPGA_LDFLAGS $STARPU_DLOPEN_LDFLAGS" AC_SUBST([LIBSTARPU_LDFLAGS]) # these are the flags needed for linking against libstarpu (because starpu.h makes its includer use pthread_*, simgrid, etc.) if test "x$enable_shared" = xno; then # No .so, so application will unexpectedly have to know which -l to # use. Give them in .pc file. AC_DEFINE(STARPU_STATIC_ONLY, [1], [Only static compilation was made]) STARPU_EXPORTED_LIBS="$STARPU_EXPORTED_LIBS $LDFLAGS $LIBS $LIBSTARPU_LDFLAGS" fi AC_SUBST(STARPU_EXPORTED_LIBS) STARPUPY_EXTRA_LINK_ARGS="" if test "x$enable_starpupy" != xno then if test "x$OPENMP_CFLAGS" != "x" then STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '$OPENMP_CFLAGS', " fi for flag in $STARPU_EXPORTED_LIBS do STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '$flag', " done if test x$enable_coverage = xyes; then STARPUPY_EXTRA_LINK_ARGS="$STARPUPY_EXTRA_LINK_ARGS '-lgcov', " fi fi AC_SUBST(STARPUPY_EXTRA_LINK_ARGS) LIBSTARPU_LINK=libstarpu-$STARPU_EFFECTIVE_VERSION.la LIBSTARPU_LINK="$LIBSTARPU_LINK $STARPU_EXPORTED_LIBS" AC_SUBST([LIBSTARPU_LINK]) # File configuration AC_CONFIG_COMMANDS([executable-scripts], [ chmod +x tests/regression/regression.sh chmod +x tests/model-checking/starpu-mc.sh chmod +x tools/starpu_env chmod +x tools/starpu_codelet_profile chmod +x tools/starpu_codelet_histo_profile chmod +x tools/starpu_mpi_comm_matrix.py chmod +x tools/starpu_fxt_number_events_to_names.py chmod +x tools/starpu_workers_activity chmod +x tools/starpu_paje_draw_histogram chmod +x tools/starpu_paje_state_stats chmod +x tools/starpu_paje_summary chmod +x tools/starpu_config chmod +x tools/starpu_mlr_analysis chmod +x tools/starpu_paje_sort chmod +x tools/starpu_smpirun chmod +x tools/starpu_tcpipexec chmod +x doc/doxygen/doxygen_filter.sh chmod +x doc/doxygen_dev/doxygen_filter.sh chmod +x starpupy/execute.sh chmod +x julia/examples/execute.sh for x in \ tests/microbenchs/tasks_data_overhead.sh \ tests/microbenchs/sync_tasks_data_overhead.sh \ tests/microbenchs/async_tasks_data_overhead.sh \ tests/microbenchs/tasks_size_overhead.sh \ tests/microbenchs/tasks_size_overhead_sched.sh \ tests/microbenchs/tasks_size_overhead_scheds.sh \ tests/microbenchs/tasks_size_overhead.gp \ tests/microbenchs/microbench.sh \ tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh \ tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ tests/microbenchs/parallel_independent_heterogeneous_tasks.sh \ tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh \ tests/microbenchs/parallel_independent_homogeneous_tasks.sh \ tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh \ tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh \ tests/microbenchs/bandwidth_scheds.sh \ tests/energy/static.sh \ tests/energy/dynamic.sh \ tests/datawizard/locality.sh \ tests/overlap/overlap.sh \ tests/model-checking/prio_list.sh \ tests/model-checking/prio_list2.sh \ tests/model-checking/prio_list3.sh \ tests/model-checking/barrier.sh \ examples/heat/heat.sh \ examples/lu/lu.sh \ examples/cholesky/cholesky.sh \ examples/cholesky/cholesky_julia.sh \ examples/mult/sgemm.sh \ examples/scheduler/schedulers.sh \ examples/scheduler/schedulers_context.sh \ examples/scheduler/libdummy_sched.sh \ examples/profiling_tool/prof.sh \ tools/starpu_paje_draw_histogram.R \ tools/starpu_paje_state_stats.R \ tools/starpu_mlr_analysis.Rmd \ tools/starpu_paje_summary.Rmd \ tools/starpu_trace_state_stats.py \ julia/examples/check_deps/check_deps.sh \ julia/examples/mult/mult_starpu.sh \ julia/examples/mult/perf.sh \ julia/examples/variable/variable.sh \ julia/examples/task_insert_color/task_insert_color.sh \ julia/examples/vector_scal/vector_scal.sh \ julia/examples/mandelbrot/mandelbrot.sh \ julia/examples/callback/callback.sh \ julia/examples/dependency/task_dep.sh \ julia/examples/dependency/tag_dep.sh \ julia/examples/dependency/end_dep.sh \ julia/examples/axpy/axpy.sh \ julia/examples/gemm/gemm.sh \ julia/examples/cholesky/cholesky.sh \ starpupy/benchmark/tasks_size_overhead.sh \ starpupy/benchmark/tasks_size_overhead.gp \ starpupy/benchmark/test_handle_perf.sh \ starpupy/benchmark/test_handle_perf_pickle.sh \ starpupy/examples/starpu_py.sh \ starpupy/examples/starpu_py.concurrent.sh \ starpupy/examples/starpu_py_handle.sh \ starpupy/examples/starpu_py_handle.concurrent.sh \ starpupy/examples/starpu_py_np.sh \ starpupy/examples/starpu_py_np.concurrent.sh \ starpupy/examples/starpu_py_parallel.sh \ starpupy/examples/starpu_py_partition.sh \ starpupy/examples/starpu_py_partition.concurrent.sh \ starpupy/examples/starpu_py_perfmodel.sh \ starpupy/examples/starpu_py_perfmodel.concurrent.sh \ starpupy/examples/starpu_py_numpy.sh \ starpupy/examples/starpu_py_numpy.concurrent.sh \ ; do test -e $x || ( mkdir -p $(dirname $x) && ln -sf $ac_abs_top_srcdir/$x $(dirname $x) ) done for x in tools julia/examples starpufft/tests examples examples/stencil mpi/tests mpi/examples socl/examples bubble/tests starpupy/examples starpu_openmp_llvm/examples \ ; do test -e $x/loader.c || ln -sf $ac_abs_top_srcdir/tests/loader.c $x done sed -i -e '/ STARPU_SRC_DIR /d' -e '/ STARPU_BUILD_DIR /d' src/common/config.h ]) # Create links to ICD files in build/socl/vendors directory. SOCL will use this # directory as the OCL_ICD_VENDORS directory SOCL_VENDORS="vendors/install/socl.icd" for icd in /etc/OpenCL/vendors/*.icd ; do if test -f $icd ; then if test "$(basename $icd)" != "socl.icd" ; then new_icd=$(basename $icd) AC_CONFIG_LINKS([socl/vendors/$new_icd:$icd]) SOCL_VENDORS="$SOCL_VENDORS vendors/$new_icd" fi fi done AC_SUBST(SOCL_VENDORS) AC_CONFIG_FILES(tests/regression/regression.sh tests/regression/profiles tests/regression/profiles.build.only) AC_CONFIG_HEADER(src/common/config.h src/common/config-src-build.h include/starpu_config.h starpurm/include/starpurm_config.h) SANITIZE=$(echo $CFLAGS | grep sanitize) AM_CONDITIONAL(STARPU_SANITIZE, test -n "$SANITIZE") AC_OUTPUT([ Makefile src/Makefile tools/Makefile tools/starpu_env tools/starpu_codelet_profile tools/starpu_codelet_histo_profile tools/starpu_mpi_comm_matrix.py tools/starpu_fxt_number_events_to_names.py tools/starpu_workers_activity tools/starpu_paje_draw_histogram tools/starpu_paje_state_stats tools/starpu_paje_summary tools/starpu_config tools/starpu_mlr_analysis tools/starpu_paje_sort tools/starpu_smpirun tools/starpu_tcpipexec socl/Makefile socl/src/Makefile socl/examples/Makefile socl/vendors/socl.icd socl/vendors/install/socl.icd packages/libstarpu.pc packages/starpu-1.0.pc packages/starpu-1.1.pc packages/starpu-1.2.pc packages/starpu-1.3.pc packages/starpu-1.4.pc packages/starpu-1.3 packages/starpu-1.4 mpi/packages/libstarpumpi.pc mpi/packages/starpumpi-1.0.pc mpi/packages/starpumpi-1.1.pc mpi/packages/starpumpi-1.2.pc mpi/packages/starpumpi-1.3.pc mpi/packages/starpumpi-1.4.pc starpufft/Makefile starpufft/src/Makefile starpufft/tests/Makefile starpufft/packages/libstarpufft.pc starpufft/packages/starpufft-1.0.pc starpufft/packages/starpufft-1.1.pc starpufft/packages/starpufft-1.2.pc starpufft/packages/starpufft-1.3.pc starpufft/packages/starpufft-1.4.pc starpurm/Makefile starpurm/src/Makefile starpurm/tests/Makefile starpurm/examples/Makefile starpurm/packages/starpurm-1.3.pc starpurm/packages/starpurm-1.4.pc starpu_openmp_llvm/Makefile starpu_openmp_llvm/src/Makefile starpu_openmp_llvm/examples/Makefile starpupy/src/setup.cfg starpupy/src/setup.py starpupy/Makefile starpupy/src/Makefile starpupy/examples/Makefile starpupy/execute.sh starpupy/benchmark/Makefile examples/Makefile examples/stencil/Makefile tests/Makefile tests/model-checking/Makefile tests/model-checking/starpu-mc.sh mpi/Makefile mpi/src/Makefile mpi/tests/Makefile mpi/examples/Makefile mpi/tools/Makefile mpi/GNUmakefile sc_hypervisor/Makefile sc_hypervisor/src/Makefile sc_hypervisor/examples/Makefile doc/Makefile doc/doxygen/Makefile doc/doxygen/doxygen-config.cfg doc/doxygen/doxygen-config-include.cfg doc/doxygen/doxygen_filter.sh doc/doxygen_dev/Makefile doc/doxygen_dev/doxygen-config.cfg doc/doxygen_dev/doxygen_filter.sh doc/doxygen_dev/doxygen-config-include.cfg doc/doxygen_web_introduction/Makefile doc/doxygen_web_introduction/doxygen-config.cfg doc/doxygen_web_installation/Makefile doc/doxygen_web_installation/doxygen-config.cfg doc/doxygen_web_basics/Makefile doc/doxygen_web_basics/doxygen-config.cfg doc/doxygen_web_applications/Makefile doc/doxygen_web_applications/doxygen-config.cfg doc/doxygen_web_performances/Makefile doc/doxygen_web_performances/doxygen-config.cfg doc/doxygen_web_faq/Makefile doc/doxygen_web_faq/doxygen-config.cfg doc/doxygen_web_languages/Makefile doc/doxygen_web_languages/doxygen-config.cfg doc/doxygen_web_extensions/Makefile doc/doxygen_web_extensions/doxygen-config.cfg tools/msvc/starpu_var.bat min-dgels/Makefile bubble/Makefile bubble/tests/Makefile julia/Makefile julia/src/Makefile julia/src/dynamic_compiler/Makefile julia/examples/Makefile julia/examples/execute.sh eclipse-plugin/Makefile eclipse-plugin/src/Makefile eclipse-plugin/examples/Makefile eclipse-plugin/examples/hello/.cproject ]) AC_MSG_NOTICE([ CPUs enabled: $enable_cpu CUDA enabled: $enable_cuda $NO_NVML HIP enabled: $enable_hip OpenCL enabled: $enable_opencl Max FPGA enabled: $enable_max_fpga Compile-time limits (change these with --enable-maxcpus, --enable-maxcudadev, --enable-maxopencldev, --enable-maxmaxfpgadev, --enable-maxnodes, --enable-maxbuffers) (Note these numbers do not represent the number of detected devices, but the maximum number of devices StarPU can manage) Maximum number of CPUs: $maxcpus Maximum number of CUDA devices: $nmaxcudadev Maximum number of HIP devices: $nmaxhipdev Maximum number of OpenCL devices: $nmaxopencldev Maximum number of Maxeler FPGA devices: $nmaxmaxfpgadev Maximum number of MPI master-slave devices: $nmaxmpidev Maximum number of TCP/IP master-slave devices: $nmaxtcpipdev Maximum number of memory nodes: $maxnodes Maximum number of task buffers: $nmaxbuffers CUDA GPU-GPU transfers: $enable_cuda_memcpy_peer CUDA Map: $enable_cuda_map HIP GPU-GPU transfers: $enable_hip_memcpy_peer Allocation cache: $enable_allocation_cache Magma enabled: $have_magma BLAS library: $blas_lib hwloc: $have_valid_hwloc FxT trace enabled: $enable_fxt Documentation HTML: $enable_build_doc Documentation PDF: $enable_build_doc_pdf Examples: $enable_build_examples StarPU Extensions: StarPU MPI enabled: $build_mpi_lib StarPU MPI failure tolerance: $enable_mpi_ft StarPU MPI failure tolerance stats: $use_mpi_ft_stats StarPU MPI(nmad) enabled: $build_nmad_lib MPI test suite: $running_mpi_check Master-Slave MPI enabled: $build_mpi_master_slave Master-Slave TCP/IP enabled: $build_tcpip_master_slave FFT Support: $fft_support Resource Management enabled: $starpurm_support Python Interface enabled: $starpupy_support OpenMP runtime support enabled: $enable_openmp OpenMP LLVM runtime support enabled: $enable_openmp_llvm Parallel Worker support enabled: $enable_parallel_worker SOCL enabled: $build_socl SOCL test suite: $run_socl_check Scheduler Hypervisor: $build_sc_hypervisor simgrid enabled: $enable_simgrid ayudame enabled: $ayu_msg HDF5 enabled: $enable_hdf5 Native fortran support: $enable_build_fortran Native MPI fortran support: $use_mpi_fort Support for multiple linear regression models: $support_mlr Hierarchical dags support: $enable_bubble JULIA enabled: $enable_julia ]) if test "$build_socl" = "yes" -a "$run_socl_check" = "no" ; then AC_MSG_NOTICE([ WARNING: SOCL test suite will not be run as the environment variable SOCL_OCL_LIB_OPENCL is not defined. To run the tests, you need to install the OCL implementation of ICD (https://forge.imag.fr/projects/ocl-icd/ or Debian package ocl-icd-libopencl1) and set the variable SOCL_OCL_LIB_OPENCL to the location of the libOpenCL.so.]) fi if test x"$have_valid_hwloc" = xno -a "$enable_simgrid" = "no" ; then AC_MSG_NOTICE([ WARNING: hwloc was not enabled. If the target machine is hyperthreaded the performance may be impacted a lot. It is strongly recommended to install hwloc]) fi if test x"$starpu_windows" = xyes -a "x$STARPU_MS_LIB" = "x" ; then AC_MSG_NOTICE([ WARNING: lib was not found, you will not be able to build StarPU applications with Microsoft Visual Studio. Add to your PATH the directories for MSVC, e.g c:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE; c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin]) fi starpu-1.4.9+dfsg/contrib/000077500000000000000000000000001507764646700154505ustar00rootroot00000000000000starpu-1.4.9+dfsg/contrib/ci.inria.fr/000077500000000000000000000000001507764646700175525ustar00rootroot00000000000000starpu-1.4.9+dfsg/contrib/ci.inria.fr/Jenkinsfile-basic000066400000000000000000000056601507764646700230240ustar00rootroot00000000000000#!groovy // StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // def statusHasChanged = false pipeline { agent none // Trigger the build triggers { // Poll SCM explicitly every hour pollSCM('0 * * * *') } options { timeout(time: 1, unit: 'HOURS') } stages { stage('Tarball') { steps { node('autotools') { checkout scm sh 'contrib/ci.inria.fr/job-0-tarball.sh' script { env.tarballgz = sh (script: 'ls *.tar.gz', returnStdout: true).trim() } stash includes: "${env.tarballgz}", name: 'tarballgz' stash includes: "starpu.pdf", name: 'doc' stash includes: "starpu_dev.pdf", name: 'doc_dev' // Stash those scripts because they are not in make dist dir('contrib/ci.inria.fr') { stash includes: "job-1-check.sh", name: 'script-unix-check' } archiveArtifacts artifacts: "${env.tarballgz},starpu.pdf,starpu_dev.pdf", fingerprint: true, onlyIfSuccessful: true deleteDir() } } } stage('Check') { steps { script { labelToSelect = 'unix' listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect { node -> node.getLabelString().contains(labelToSelect) ? node.name : null } listOfNodeNames.removeAll(Collections.singleton(null)) def p = listOfNodeNames.collectEntries { [ (it): { node(it) { dir('check-unix') { unstash 'tarballgz' unstash 'script-unix-check' sh 'chmod 755 job-1-check.sh && ./job-1-check.sh' deleteDir() } } } ]} parallel p; } } } } post { // hooks are called in order: always, changed, aborted, failure, success, unstable changed { echo "Build status has changed." script { statusHasChanged = true } } success { echo "Build success." // email when changed to success script { if (statusHasChanged) { emailext(body: '${DEFAULT_CONTENT}', subject: '${DEFAULT_SUBJECT}', replyTo: '$DEFAULT_REPLYTO', to: '$DEFAULT_RECIPIENTS') } } } failure { echo "Build failure." // always email on failure emailext(body: '${DEFAULT_CONTENT}', subject: '${DEFAULT_SUBJECT}', replyTo: '$DEFAULT_REPLYTO', to: '$DEFAULT_RECIPIENTS') } } } starpu-1.4.9+dfsg/contrib/ci.inria.fr/Jenkinsfile-bsd000066400000000000000000000056101507764646700225060ustar00rootroot00000000000000#!groovy // StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // def statusHasChanged = false pipeline { agent none // Trigger the build triggers { // Poll SCM explicitly every past-half hour pollSCM('30 * * * *') } stages { stage('Tarball') { steps { node('autotools') { checkout scm sh 'contrib/ci.inria.fr/job-0-tarball.sh' script { env.tarballgz = sh (script: 'ls *.tar.gz', returnStdout: true).trim() } stash includes: "${env.tarballgz}", name: 'tarballgz' stash includes: "starpu.pdf", name: 'doc' stash includes: "starpu_dev.pdf", name: 'doc_dev' // Stash those scripts because they are not in make dist dir('contrib/ci.inria.fr') { stash includes: "job-1-check.sh", name: 'script-unix-check' } archiveArtifacts artifacts: "${env.tarballgz},starpu.pdf,starpu_dev.pdf", fingerprint: true, onlyIfSuccessful: true deleteDir() } } } stage('Check') { steps { script { labelToSelect = 'bsd' listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect { node -> node.getLabelString().contains(labelToSelect) ? node.name : null } listOfNodeNames.removeAll(Collections.singleton(null)) def p = listOfNodeNames.collectEntries { [ (it): { node(it) { dir('check-unix') { unstash 'tarballgz' unstash 'script-unix-check' sh 'chmod 755 job-1-check.sh && ./job-1-check.sh' deleteDir() } } } ]} parallel p; } } } } post { // hooks are called in order: always, changed, aborted, failure, success, unstable changed { echo "Build status has changed." script { statusHasChanged = true } } success { echo "Build success." // email when changed to success script { if (statusHasChanged) { emailext(body: '${DEFAULT_CONTENT}', subject: '${DEFAULT_SUBJECT}', replyTo: '$DEFAULT_REPLYTO', to: '$DEFAULT_RECIPIENTS') } } } failure { echo "Build failure." // always email on failure emailext(body: '${DEFAULT_CONTENT}', subject: '${DEFAULT_SUBJECT}', replyTo: '$DEFAULT_REPLYTO', to: '$DEFAULT_RECIPIENTS') } } } starpu-1.4.9+dfsg/contrib/ci.inria.fr/Jenkinsfile-windows000066400000000000000000000064761507764646700234430ustar00rootroot00000000000000#!groovy // StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // def statusHasChanged = false pipeline { agent none // Trigger the build triggers { // Poll scm once a day between 10pm and 11pm pollSCM('H 22 * * *') } stages { stage('Tarball') { steps { node('autotools2') { checkout scm sh 'contrib/ci.inria.fr/job-0-tarball.sh' script { env.tarballgz = sh (script: 'ls *.tar.gz', returnStdout: true).trim() } stash includes: "${env.tarballgz}", name: 'tarballgz' stash includes: "starpu.pdf", name: 'doc' stash includes: "starpu_dev.pdf", name: 'doc_dev' // Stash those scripts because they are not in make dist dir('contrib/ci.inria.fr') { stash includes: "job-1-check-windows.bat", name: 'script-windows-check' stash includes: "job-1-build-windows.sh", name: 'script-windows-build' } archiveArtifacts artifacts: "${env.tarballgz},starpu.pdf,starpu_dev.pdf", fingerprint: true, onlyIfSuccessful: true deleteDir() } } } stage('Check') { steps { script { labelToSelect = 'windows' listOfNodeNames = jenkins.model.Jenkins.instance.nodes.collect { node -> node.getLabelString().contains(labelToSelect) ? node.name : null } listOfNodeNames.removeAll(Collections.singleton(null)) if (listOfNodeNames.size() == 0) { sh 'false' } def p = listOfNodeNames.collectEntries { [ (it): { node(it) { dir('check-windows') { unstash 'tarballgz' unstash 'script-windows-check' unstash 'script-windows-build' bat './job-1-check-windows.bat' archiveArtifacts artifacts: "*.zip", fingerprint: true, onlyIfSuccessful: true if (env.KEEP_WORKING_DIRECTORY != 'true') deleteDir() } } } ]} parallel p; } } } } post { // hooks are called in order: always, changed, aborted, failure, success, unstable changed { echo "Build status has changed." script { statusHasChanged = true } } success { echo "Build success." // email when changed to success script { if (statusHasChanged) { emailext(body: '${DEFAULT_CONTENT}', subject: '${DEFAULT_SUBJECT}', replyTo: '$DEFAULT_REPLYTO', to: '$DEFAULT_RECIPIENTS') } } } failure { echo "Build failure." // always email on failure emailext(body: '${DEFAULT_CONTENT}', subject: '${DEFAULT_SUBJECT}', replyTo: '$DEFAULT_REPLYTO', to: '$DEFAULT_RECIPIENTS') } } } starpu-1.4.9+dfsg/contrib/ci.inria.fr/job-0-tarball.sh000077500000000000000000000023111507764646700224340ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set -e export LC_ALL=C export PKG_CONFIG_PATH=/home/ci/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH export LD_LIBRARY_PATH=/home/ci/usr/local/lib:$LD_LIBRARY_PATH if test -f $HOME/starpu_specific_env.sh then . $HOME/starpu_specific_env.sh fi BUILD=./build_$$ ./autogen.sh if test -d $BUILD ; then chmod -R 777 $BUILD && rm -rf $BUILD ; fi mkdir $BUILD && cd $BUILD ../configure --enable-build-doc-pdf $STARPU_USER_CONFIGURE_OPTIONS make -j4 make dist cp *gz .. cp doc/doxygen/starpu.pdf .. cp doc/doxygen_dev/starpu_dev.pdf .. make clean starpu-1.4.9+dfsg/contrib/ci.inria.fr/job-1-build-windows.sh000077500000000000000000000045041507764646700236110ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set -e export LC_ALL=C oldPATH=$PATH export PATH=/c/Builds:/usr/bin:/bin:"/c/Program Files (x86)/Microsoft Visual Studio 11.0/VC/bin":"/c/Program Files/Microsoft Visual Studio 11.0/Common7/IDE":$oldPATH tarball=$(ls -tr starpu*.tar.gz | tail -1) if test -z "$tarball" ; then echo Tarball not available exit 2 fi basename=$(basename $tarball .tar.gz) test -d $basename && chmod -R u+rwX $basename && rm -rf $basename tar xfz $tarball touch --date="last hour" $(find $basename) version=$(echo $basename | cut -d- -f2) winball=starpu-win32-build-${version} export STARPU_HOME=$PWD rm -rf ${basename}/build mkdir ${basename}/build cd ${basename}/build #export HWLOC=/c/StarPU/hwloc-win32-build-1.11.0 prefix=${PWD}/../../${winball} rm -rf $prefix #--with-hwloc=${HWLOC} options="--without-hwloc --enable-quick-check --enable-debug --enable-verbose --enable-native-winthreads" day=$(date +%u) if test $day -le 5 then ../configure --prefix=$prefix $options --disable-build-examples $STARPU_USER_CONFIGURE_OPTIONS else ../configure --prefix=$prefix $options $STARPU_USER_CONFIGURE_OPTIONS fi make CHECK=${PWD}/check_$$ touch ${CHECK} if test "$1" == "-exec" then (make -k check || true) > ${CHECK} 2>&1 cat ${CHECK} make showcheck fi fail=$(grep FAIL ${CHECK} | grep -v XFAIL || true) if test -z "$fail" then make install cd ../../ cp /c/MinGW/bin/pthread*dll ${winball}/bin cp /c/MinGW/bin/libgcc*dll ${winball}/bin # cp ${HWLOC}/bin/*dll ${winball}/bin zip -r ${winball}.zip ${winball} rm -rf starpu_install mv ${winball} starpu_install fi PATH=$oldPATH echo $fail exit $(grep FAIL ${CHECK} | grep -v XFAIL | wc -l) starpu-1.4.9+dfsg/contrib/ci.inria.fr/job-1-check-windows.bat000066400000000000000000000022001507764646700237070ustar00rootroot00000000000000REM StarPU --- Runtime system for heterogeneous multicore architectures. REM REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria REM REM StarPU is free software; you can redistribute it and/or modify REM it under the terms of the GNU Lesser General Public License as published by REM the Free Software Foundation; either version 2.1 of the License, or (at REM your option) any later version. REM REM StarPU is distributed in the hope that it will be useful, but REM WITHOUT ANY WARRANTY; without even the implied warranty of REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. REM REM See the GNU Lesser General Public License in COPYING.LGPL for more details. REM set oldPATH=%PATH% set PATH=C:\MinGW\msys\1.0\bin;c:\MinGW\bin;C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.28.29333\bin\Hostx64\x64;C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32;%PATH% sh -c "./job-1-build-windows.sh" set PATH=%oldPATH% set HWLOC=c:\StarPU\hwloc-win32-build-1.11.0 cd starpu_install set STARPU_PATH=%cd% cd bin\msvc starpu_exec ../../share/doc/starpu/tutorial/hello_world_msvc.c starpu-1.4.9+dfsg/contrib/ci.inria.fr/job-1-check.sh000077500000000000000000000072041507764646700220770ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set -e set -x export LC_ALL=C ulimit -c unlimited export PKG_CONFIG_PATH=/home/ci/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH export LD_LIBRARY_PATH=/home/ci/usr/local/lib:$LD_LIBRARY_PATH tarball=$(ls -tr starpu-*.tar.gz | tail -1) if test -z "$tarball" then echo Error. No tar.gz file ls pwd exit 1 fi COVERITY=0 if test "$1" == "-coverity" then COVERITY=1 if test -f $HOME/.starpu/coverity_token then COVERITY_TOKEN=$(cat $HOME/.starpu/coverity_token) else echo "Error. Coverity is enabled, but there is no file $HOME/.starpu/coverity_token" exit 1 fi shift BRANCH=$1 fi basename=$(basename $tarball .tar.gz) export STARPU_HOME=$PWD/$basename/home mkdir -p $basename cd $basename ( echo "oldPWD=\${PWD}" env|grep -v LS_COLORS | grep '^[A-Z]'|grep -v BASH_FUNC | grep '=' | sed 's/=/=\"/'| sed 's/$/\"/' | sed 's/^/export /' echo "cd \$oldPWD" ) > ${PWD}/env test -d $basename && chmod -R u+rwX $basename && rm -rf $basename tar xfz ../$tarball >/dev/null 2>&1 hour=$(date "+%H") today=$(date "+%Y-%m-%d") lasthour=$(echo $hour - 1 | bc ) if test "$hour" = "0" -o "$hour" = "00" then lasthour=0 fi (find $basename -exec touch -d ${today}T${lasthour}:0:0 {} \; || true ) >/dev/null 2>&1 cd $basename if test -f $HOME/starpu_specific_env.sh then . $HOME/starpu_specific_env.sh fi BUILD=./build_$$ mkdir $BUILD cd $BUILD STARPU_CONFIGURE_OPTIONS="" suname=$(uname) if test "$suname" = "Darwin" then # the VM macos is very slow export STARPU_MPI_NP=2 fi if test "$suname" = "OpenBSD" then STARPU_CONFIGURE_OPTIONS="--without-hwloc --disable-mlr --enable-maxcpus=2" fi if test "$suname" = "FreeBSD" then STARPU_CONFIGURE_OPTIONS="--disable-fortran --enable-maxcpus=2" fi export CC=gcc set +e mpiexec -oversubscribe pwd 2>/dev/null ret=$? set -e ARGS="" if test "$ret" = "0" then ARGS="--with-mpiexec-args=-oversubscribe" fi export STARPU_MICROBENCHS_DISABLED=1 export STARPU_TIMEOUT_ENV=3600 export MPIEXEC_TIMEOUT=3600 CONFIGURE_OPTIONS="--enable-debug --enable-verbose --disable-build-examples --enable-mpi-check=maybe --enable-mpi-minimal-tests --disable-build-doc $ARGS" CONFIGURE_CHECK="" day=$(date +%u) if test $day -le 5 then CONFIGURE_CHECK="--enable-quick-check" #else # we do a normal check, a long check takes too long on VM nodes fi ../configure $CONFIGURE_OPTIONS $CONFIGURE_CHECK $STARPU_CONFIGURE_OPTIONS $STARPU_USER_CONFIGURE_OPTIONS if test "$COVERITY" == "1" then cov-build --dir cov-int make -j4 grep "are ready for analysis" cov-int/build-log.txt tar caf starpu.tar.xz cov-int curl -k -f --form token=$COVERITY_TOKEN --form email=starpu-builds@inria.fr --form file=@starpu.tar.xz --form version=$BRANCH --form description= 'https://scan.coverity.com/builds?project=StarPU+MR' exit 0 fi make -j4 make dist set +e set -o pipefail make -k check 2>&1 | tee ../check_$$ RET=$? make showcheckfailed make clean grep "^FAIL:" ../check_$$ || true echo "Running on $(uname -a)" exit $RET starpu-1.4.9+dfsg/contrib/gitlab/000077500000000000000000000000001507764646700167125ustar00rootroot00000000000000starpu-1.4.9+dfsg/contrib/gitlab/build.sh000077500000000000000000000016771507764646700203630ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set -e ./contrib/ci.inria.fr/job-0-tarball.sh tarball=$(ls -tr starpu-*.tar.gz | tail -1) if test -z "$tarball" then echo Error. No tar.gz file ls pwd exit 1 fi if test ! -f starpu.pdf then echo Error. No documentation file ls pwd exit 1 fi starpu-1.4.9+dfsg/contrib/gitlab/chameleon.sh000077500000000000000000000027361507764646700212140ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set -x set -e export starpudir=$PWD export rootdir=$PWD/../starpu_chameleon export builddir=$PWD/../starpu_chameleon/build if test -d $rootdir ; then rm -rf $rootdir ; fi mkdir -p $builddir ./autogen.sh cd $builddir $starpudir/configure --prefix=$rootdir/starpu.inst --disable-static --disable-socl --disable-build-tests --disable-build-examples --disable-build-doc --disable-opencl make -j 32 make install source $rootdir/starpu.inst/bin/starpu_env # compiling morse cd $rootdir git clone --quiet --recursive --branch master https://gitlab.inria.fr/solverstack/chameleon.git chameleon cd chameleon git show HEAD mkdir build cd build CFLAGS=-g cmake ../ -DCHAMELEON_USE_MPI=ON make -j 20 set +e ctest -R test_mpi_s if test $? -ne 0 then ctest --rerun-failed --output-on-failure fi #ctest -R test_mpi_sgeadd -V starpu-1.4.9+dfsg/contrib/gitlab/coverity.sh000077500000000000000000000016421507764646700211200ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # BRANCH="unknown" if test -n "$CI_COMMIT_BRANCH" then BRANCH=$CI_COMMIT_BRANCH fi if test -n "$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME" then BRANCH=$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME fi ./contrib/ci.inria.fr/job-1-check.sh -coverity $BRANCH starpu-1.4.9+dfsg/contrib/gitlab/deploy.sh000077500000000000000000000013171507764646700205470ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ./contrib/ci.inria.fr/job-1-check.sh starpu-1.4.9+dfsg/contrib/gitlab/simgrid.sh000077500000000000000000000015261507764646700207130ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # export STARPU_MICROBENCHS_DISABLED=1 STARPU_USER_CONFIGURE_OPTIONS="--enable-simgrid --disable-cuda --disable-mpi --disable-mpi-check" ./contrib/ci.inria.fr/job-1-check.sh starpu-1.4.9+dfsg/doc/000077500000000000000000000000001507764646700145555ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/Makefile.am000066400000000000000000000037331507764646700166170ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk SUBDIRS = doxygen SUBDIRS += doxygen_dev SUBDIRS += doxygen_web_introduction SUBDIRS += doxygen_web_installation SUBDIRS += doxygen_web_basics SUBDIRS += doxygen_web_applications SUBDIRS += doxygen_web_performances SUBDIRS += doxygen_web_faq SUBDIRS += doxygen_web_languages SUBDIRS += doxygen_web_extensions DIST_SUBDIRS = doxygen DIST_SUBDIRS += doxygen_dev DIST_SUBDIRS += doxygen_web_introduction DIST_SUBDIRS += doxygen_web_installation DIST_SUBDIRS += doxygen_web_basics DIST_SUBDIRS += doxygen_web_applications DIST_SUBDIRS += doxygen_web_performances DIST_SUBDIRS += doxygen_web_faq DIST_SUBDIRS += doxygen_web_languages DIST_SUBDIRS += doxygen_web_extensions EXTRA_DIST = \ tutorial/hello_world.c \ tutorial/hello_world_msvc.c \ tutorial/Makefile \ tutorial/README \ tutorial/vector_scal.c \ tutorial/vector_scal_cpu.c \ tutorial/vector_scal_cuda.cu \ tutorial/vector_scal_opencl.c \ tutorial/vector_scal_opencl_kernel.cl \ title.tex \ sectionNumbering.py \ extractHeadline.sh \ fixLinks.sh \ doxygen.cfg txtdir = ${docdir}/tutorial txt_DATA = $(EXTRA_DIST) readmedir = ${docdir}/manual readme_DATA = README.org all-local: README.org DISTCLEANFILES = README.org README.org: $(top_srcdir)/doc/extractHeadline.sh $(top_srcdir)/doc/fixLinks.sh $(top_builddir)/doc starpu-1.4.9+dfsg/doc/Makefile.in000066400000000000000000001037531507764646700166330ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = doc ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(readmedir)" "$(DESTDIR)$(txtdir)" DATA = $(readme_DATA) $(txt_DATA) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) SUBDIRS = doxygen doxygen_dev doxygen_web_introduction \ doxygen_web_installation doxygen_web_basics \ doxygen_web_applications doxygen_web_performances \ doxygen_web_faq doxygen_web_languages doxygen_web_extensions DIST_SUBDIRS = doxygen doxygen_dev doxygen_web_introduction \ doxygen_web_installation doxygen_web_basics \ doxygen_web_applications doxygen_web_performances \ doxygen_web_faq doxygen_web_languages doxygen_web_extensions EXTRA_DIST = \ tutorial/hello_world.c \ tutorial/hello_world_msvc.c \ tutorial/Makefile \ tutorial/README \ tutorial/vector_scal.c \ tutorial/vector_scal_cpu.c \ tutorial/vector_scal_cuda.cu \ tutorial/vector_scal_opencl.c \ tutorial/vector_scal_opencl_kernel.cl \ title.tex \ sectionNumbering.py \ extractHeadline.sh \ fixLinks.sh \ doxygen.cfg txtdir = ${docdir}/tutorial txt_DATA = $(EXTRA_DIST) readmedir = ${docdir}/manual readme_DATA = README.org DISTCLEANFILES = README.org all: all-recursive .SUFFIXES: .SUFFIXES: .cu .cubin .hip .o $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-readmeDATA: $(readme_DATA) @$(NORMAL_INSTALL) @list='$(readme_DATA)'; test -n "$(readmedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(readmedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(readmedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(readmedir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(readmedir)" || exit $$?; \ done uninstall-readmeDATA: @$(NORMAL_UNINSTALL) @list='$(readme_DATA)'; test -n "$(readmedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(readmedir)'; $(am__uninstall_files_from_dir) install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(DATA) all-local installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(readmedir)" "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-readmeDATA install-txtDATA install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-readmeDATA uninstall-txtDATA .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ check check-am clean clean-generic clean-libtool cscopelist-am \ ctags ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-readmeDATA install-strip install-txtDATA installcheck \ installcheck-am installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ uninstall-am uninstall-readmeDATA uninstall-txtDATA .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null all-local: README.org README.org: $(top_srcdir)/doc/extractHeadline.sh $(top_srcdir)/doc/fixLinks.sh $(top_builddir)/doc # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxy.mk000066400000000000000000000173441507764646700161020ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = if STARPU_BUILD_DOC if STARPU_BUILD_DOC_PDF all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) EXTRA_DIST += $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) txt_DATA = $(DOX_DIR)/$(DOX_PDF) else all: $(DOX_HTML_DIR) EXTRA_DIST += $(DOX_HTML_DIR) endif # STARPU_BUILD_DOC_PDF DOX_HTML_SRCDIR=$(DOX_HTML_DIR) install-exec-hook: $(DOX_HTML_DIR) @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) uninstall-hook: @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) else if STARPU_AVAILABLE_DOC EXTRA_DIST += $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) DOX_HTML_SRCDIR=$(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) install-exec-hook: @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) uninstall-hook: @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) endif # STARPU_AVAILABLE_DOC if STARPU_AVAILABLE_DOC_PDF EXTRA_DIST += $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) endif # STARPU_AVAILABLE_DOC_PDF endif # STARPU_BUILD_DOC if STARPU_BUILD_DOC EXTRA_DIST += \ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html chapters/version.sty: $(chapters) $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @for f in $(chapters) ; do \ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ done | sort -r | head -1 > timestamp_sty @if test -s timestamp_sty ; then \ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ fi @if test -s timestamp_sty_updated ; then \ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ else \ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ fi @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ if test -f $$f ; then $(RM) $$f ; fi ;\ done chapters/version.html: $(chapters) $(images) @for f in $(chapters) ; do \ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ done | sort -r | head -1 > timestamp_html @if test -s timestamp_html ; then \ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ fi @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @if test -s timestamp_html_updated ; then \ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ else \ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ fi @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ if test -f $$f ; then $(RM) $$f ; fi ;\ done doxy: @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @$(DOXYGEN) $(DOX_CONFIG) $(DOX_HTML_DIR): $(DOX_TAG) @$(MKDIR_P) $(DOX_HTML_DIR) $(DOX_TAG): $(dox_inputs) @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @$(DOXYGEN) $(DOX_CONFIG) @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) $(MKDIR_P) $(DOX_LATEX_DIR) @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @cd $(DOX_LATEX_DIR) ;\ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ $(MAKEINDEX) refman.idx ;\ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ for i in $(shell seq 1 5); do \ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ else \ break ; \ fi; \ done mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ -r \ $(DOX_HTML_DIR) \ $(DOX_LATEX_DIR) \ $(DOX_DIR)/$(DOX_PDF) endif EXTRA_DIST += refman.tex $(chapters) $(images) starpu-1.4.9+dfsg/doc/doxygen.cfg000066400000000000000000002372771507764646700167350ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Doxyfile 1.8.3.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" "). # We include a file here that is generated by StarPU's configure # script. This file will contain some configure-set values, such as # version, source dir, etc. @INCLUDE = doxygen-config.cfg @INCLUDE = doxygen-config-include.cfg #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or sequence of words) that should # identify the project. Note that if you do not use Doxywizard you need # to put quotes around the project name if it contains spaces. #PROJECT_NAME = # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer # a quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify an logo or icon that is # included in the documentation. The maximum height of the logo should not # exceed 55 pixels and the maximum width should not exceed 200 pixels. # Doxygen will copy the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = NO # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. Note that you specify absolute paths here, but also # relative paths, which will be relative from the directory where doxygen is # started. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful if your file system # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 8 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. #ALIASES += # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding # "class=itcl::class" will allow you to use the command class in the # itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, # and language is one of the parsers supported by doxygen: IDL, Java, # Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, # C++. For instance to make doxygen treat .inc files as Fortran files (default # is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note # that for custom extensions you also need to set FILE_PATTERNS otherwise the # files are not read by doxygen. EXTENSION_MAPPING = # If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all # comments according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you # can mix doxygen, HTML, and XML commands with Markdown formatting. # Disable only in case of backward compatibilities issues. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented classes, # or namespaces to their corresponding documentation. Such a link can be # prevented in individual cases by by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also makes the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES (the # default) will make doxygen replace the get and set methods by a property in # the documentation. This will only work if the methods are indeed getting or # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and # unions are shown inside the group in which they are included (e.g. using # @ingroup) instead of on a separate page (for HTML and Man pages) or # section (for LaTeX and RTF). INLINE_GROUPED_CLASSES = YES # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and # unions with only public data fields will be shown inline in the documentation # of the scope in which they are defined (i.e. file, namespace, or group # documentation), provided this scope is documented. If set to NO (the default), # structs, classes, and unions are shown on a separate page (for HTML and Man # pages) or section (for LaTeX and RTF). INLINE_SIMPLE_STRUCTS = YES # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = NO # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to # determine which symbols to keep in memory and which to flush to disk. # When the cache is full, less often used symbols will be written to disk. # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time # causing a significant performance penalty. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on # a logarithmic scale so increasing the size by one will roughly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. #SYMBOL_CACHE_SIZE = 0 # Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be # set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given # their name and scope. Since this can be an expensive process and often the # same symbol appear multiple times in the code, doxygen keeps a cache of # pre-resolved symbols. If the cache is too small doxygen will become slower. # If the cache is too large, memory is wasted. The cache size is given by this # formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = YES # If the EXTRACT_PACKAGE tag is set to YES all members with package or internal # scope will be included in the documentation. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespaces are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen # will list include files with double quotes in the documentation # rather than with sharp brackets. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = NO # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen # will sort the (brief and detailed) documentation of class members so that # constructors and destructors are listed first. If set to NO (the default) # the constructors will appear in the respective orders defined by # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to # do proper type resolution of all parameters of a function it will reject a # match between the prototype and the implementation of a member function even # if there is only one candidate or it is obvious which candidate to choose # by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen # will still accept a match between prototype and implementation in such cases. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if section-label ... \endif # and \cond section-label ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or macro consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and macros in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 0 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. # This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. # You can optionally specify a file name after the option, if omitted # DoxygenLayout.xml will be used as the name of the layout file. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files # containing the references data. This must be a list of .bib files. The # .bib extension is automatically appended if omitted. Using this command # requires the bibtex tool to be installed. See also # http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style # of the bibliography can be controlled using LATEX_BIB_STYLE. To use this # feature you need bibtex and perl available in the search path. Do not use # file names with spaces, bibtex cannot handle them. CITE_BIB_FILES = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = YES # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = NO # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = NO # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # The WARN_NO_PARAMDOC option can be enabled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. #defined in doxygen-config.cfg #INPUT = # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py # *.f90 *.f *.for *.vhd *.vhdl FILE_PATTERNS = *.h *.doxy # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). #defined in doxygen-config.cfg #EXAMPLE_PATH # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). # From @INCLUDE, above #IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. # If FILTER_PATTERNS is specified, this tag will be # ignored. #defined in doxygen-config.cfg.in #INPUT_FILTER # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. # Doxygen will compare the file name with each pattern and apply the # filter if there is a match. # The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty or if # non of the patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) # and it is also possible to disable source filtering for a specific pattern # using *.ext= (so without naming a filter). This option only has effect when # FILTER_SOURCE_FILES is enabled. FILTER_SOURCE_PATTERNS = # If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page (index.html). # This can be useful if you have a project on for instance GitHub and want reuse # the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C, C++ and Fortran comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. # Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = YES # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. #HTML_OUTPUT = # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. Note that when using a custom header you are responsible # for the proper inclusion of any scripts and style sheets that doxygen # needs, which is dependent on the configuration options used. # It is advised to generate a default header using "doxygen -w html # header.html footer.html stylesheet.css YourConfigFile" and then modify # that header. Note that the header is subject to change so you typically # have to redo this when upgrading to a newer version of doxygen or when # changing the value of configuration settings such as GENERATE_TREEVIEW! HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If left blank doxygen will # generate a default style sheet. Note that it is recommended to use # HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this # tag will in the future become obsolete. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify an additional # user-defined cascading style sheet that is included after the standard # style sheets created by doxygen. Using this option one can overrule # certain style aspects. This is preferred over using HTML_STYLESHEET # since it does not replace the standard style sheet and is therefore more # robust against future updates. Doxygen will copy the style sheet file to # the output directory. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that # the files will be copied as-is; there are no commands or markers available. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. # Doxygen will adjust the colors in the style sheet and background images # according to this color. Hue is specified as an angle on a colorwheel, # see http://en.wikipedia.org/wiki/Hue for more information. # For instance the value 0 represents red, 60 is yellow, 120 is green, # 180 is cyan, 240 is blue, 300 purple, and 360 is red again. # The allowed range is 0 to 359. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of # the colors in the HTML output. For a value of 0 the output will use # grayscales only. A value of 255 will produce the most vivid colors. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to # the luminance component of the colors in the HTML output. Values below # 100 gradually make the output lighter, whereas values above 100 make # the output darker. The value divided by 100 is the actual gamma applied, # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, # and 100 does not change the gamma. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting # this to NO can help when comparing the output of multiple runs. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. HTML_DYNAMIC_SECTIONS = YES # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of # entries shown in the various tree structured indices initially; the user # can expand and collapse entries dynamically later on. Doxygen will expand # the tree to such a level that at most the specified number of entries are # visible (unless a fully collapsed tree already exceeds this amount). # So setting the number of entries 1 will produce a full collapsed tree by # default. 0 is a special value representing an infinite number of entries # and will result in a full expanded tree by default. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. GENERATE_DOCSET = NO # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Doxygen generated docs" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = org.doxygen.Project # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely # identify the documentation publisher. This should be a reverse domain-name # style string, e.g. com.mycompany.MyDocSet.documentation. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING # is used to encode HtmlHelp index (hhk), content (hhc) and project file # content. CHM_INDEX_ENCODING = # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated # that can be used as input for Qt's qhelpgenerator to generate a # Qt Compressed Help (.qch) of the generated HTML documentation. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can # be used to specify the file name of the resulting .qch file. # The path specified is relative to the HTML output folder. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#namespace QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#virtual-folders QHP_VIRTUAL_FOLDER = doc # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to # add. For more information please see # http://doc.trolltech.com/qthelpproject.html#custom-filters QHP_CUST_FILTER_NAME = # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see # # Qt Help Project / Custom Filters. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's # filter section matches. # # Qt Help Project / Filter Attributes. QHP_SECT_FILTER_ATTRS = # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can # be used to specify the location of Qt's qhelpgenerator. # If non-empty doxygen will try to run qhelpgenerator on the generated # .qhp file. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files # will be generated, which together with the HTML files, form an Eclipse help # plugin. To install this plugin and make it available under the help contents # menu in Eclipse, the contents of the directory containing the HTML and XML # files needs to be copied into the plugins directory of eclipse. The name of # the directory within the plugins directory should be the same as # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before # the help appears. GENERATE_ECLIPSEHELP = NO # A unique identifier for the eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have # this name. ECLIPSE_DOC_ID = org.doxygen.Project # The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) # at top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. Since the tabs have the same information as the # navigation tree you can set this option to NO if you already set # GENERATE_TREEVIEW to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. # Since the tree basically has the same information as the tab index you # could consider to set DISABLE_INDEX to NO when enabling this option. GENERATE_TREEVIEW = YES # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values # (range [0,1..20]) that doxygen will group on one line in the generated HTML # documentation. Note that a value of 0 will completely suppress the enum # values from appearing in the overview section. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open # links to external symbols imported via tag files in a separate window. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are # not supported properly for IE 6.0, but are supported on all modern browsers. # Note that when changing this option you need to delete any form_*.png files # in the HTML output before the changes have effect. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax # (see http://www.mathjax.org) which uses client side Javascript for the # rendering instead of using prerendered bitmaps. Use this if you do not # have LaTeX installed or if you want to formulas look prettier in the HTML # output. When enabled you may also need to install MathJax separately and # configure the path to it using the MATHJAX_RELPATH option. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and # SVG. The default value is HTML-CSS, which is slower, but has the best # compatibility. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the # HTML output directory using the MATHJAX_RELPATH option. The destination # directory should contain the MathJax.js script. For instance, if the mathjax # directory is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to # the MathJax Content Delivery Network so you can quickly see the result without # installing MathJax. # However, it is strongly recommended to install a local # copy of MathJax from http://www.mathjax.org before deployment. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension # names that should be enabled during MathJax rendering. MATHJAX_EXTENSIONS = # When the SEARCHENGINE tag is enabled doxygen will generate a search box # for the HTML output. The underlying search engine uses javascript # and DHTML and should work on any modern browser. Note that when using # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets # (GENERATE_DOCSET) there is already a search function so this one should # typically be disabled. For large projects the javascript based search engine # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be # implemented using a web server instead of a web client using Javascript. # There are two flavours of web server based search depending on the # EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for # searching and an index file used by the script. When EXTERNAL_SEARCH is # enabled the indexing and searching needs to be provided by external tools. # See the manual for details. SERVER_BASED_SEARCH = NO # When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP # script for searching. Instead the search results are written to an XML file # which needs to be processed by an external indexer. Doxygen will invoke an # external search engine pointed to by the SEARCHENGINE_URL option to obtain # the search results. Doxygen ships with an example indexer (doxyindexer) and # search engine (doxysearch.cgi) which are based on the open source search engine # library Xapian. See the manual for configuration details. EXTERNAL_SEARCH = NO # The SEARCHENGINE_URL should point to a search engine hosted by a web server # which will returned the search results when EXTERNAL_SEARCH is enabled. # Doxygen ships with an example search engine (doxysearch) which is based on # the open source search engine library Xapian. See the manual for configuration # details. SEARCHENGINE_URL = # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed # search data is written to a file for indexing by an external tool. With the # SEARCHDATA_FILE tag the name of this file can be specified. SEARCHDATA_FILE = searchdata.xml # When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the # EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is # useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple # projects and redirect the results back to the right project. EXTERNAL_SEARCH_ID = # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are # all added to the same external search index. Each project needs to have a # unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id # of to a relative location where the documentation can be found. # The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ... EXTRA_SEARCH_MAPPINGS = #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. #defined in doxygen-config.cfg #GENERATE_LATEX = YES # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. # Note that when enabling USE_PDFLATEX this option is only used for # generating bitmaps for formulas in the HTML output, but not in the # Makefile that is written to the output directory. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4 # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! #defined in doxygen-config.cfg #LATEX_HEADER # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for # the generated latex document. The footer should contain everything after # the last chapter. If it is left blank doxygen will generate a # standard footer. Notice: only use this tag if you know what you are doing! LATEX_FOOTER = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = YES # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO # If LATEX_SOURCE_CODE is set to YES then doxygen will include # source code with syntax highlighting in the LaTeX output. # Note that which sources are shown also depends on other settings # such as SOURCE_BROWSER. LATEX_SOURCE_CODE = NO # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See # http://en.wikipedia.org/wiki/BibTeX for more info. LATEX_BIB_STYLE = plain #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load style sheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. #XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. #XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. # This is useful # if you want to understand what is going on. # On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # pointed to by INCLUDE_PATH will be searched when a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = STARPU_USE_OPENCL=1 \ STARPU_USE_CUDA=1 \ STARPU_HAVE_NVML_H=1 \ STARPU_USE_HIP=1 \ STARPU_USE_MAX_FPGA=1 \ STARPU_USE_MPI=1 \ STARPU_USE_MPI_FT=1 \ STARPU_USE_MPI_FT_STATS=1 \ STARPU_USE_MPI_MPI=1 \ STARPU_USE_MPI_NMAD=1 \ STARPU_HAVE_HWLOC=1 \ STARPU_USE_SC_HYPERVISOR=1 \ STARPU_SIMGRID=1 \ STARPU_OPENMP=1 \ STARPU_PARALLEL_WORKER=1 \ STARPU_MKL=1 \ STARPU_WORKER_CALLBACKS=1 \ STARPU_HAVE_GLPK_H=1 \ STARPU_USE_MPI_MASTER_SLAVE=1 \ STARPU_BUBBLE=1 \ __GCC__ # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition that # overrules the definition found in the source code. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all references to function-like macros # that are alone on a line, have an all uppercase name, and do not end with a # semicolon, because these will confuse the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. For each # tag file the location of the external documentation should be added. The # format of a tag file without this location is as follows: # # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths # or URLs. Note that each tag file must have a unique name (where the name does # NOT include the path). If a tag file is not located in the directory in which # doxygen is run, you must also specify the path to the tagfile here. TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = starpu.tag # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option also works with HAVE_DOT disabled, but it is recommended to # install and use dot, since it yields more powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = NO # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is # allowed to run in parallel. When set to 0 (the default) doxygen will # base this on the number of processors available in the system. You can set it # explicitly to a value larger than 0 to get control over the balance # between CPU load and processing speed. DOT_NUM_THREADS = 0 # By default doxygen will use the Helvetica font for all dot files that # doxygen generates. When you want a differently looking font you can specify # the font name using DOT_FONTNAME. You need to make sure dot is able to find # the font, which can be done by putting it in a standard location or by setting # the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the # directory containing the font. DOT_FONTNAME = Helvetica # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. # The default size is 10pt. DOT_FONTSIZE = 10 # By default doxygen will tell dot to use the Helvetica font. # If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to # set the path where dot can find it. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If the UML_LOOK tag is enabled, the fields and methods are shown inside # the class node. If there are many fields or methods and many nodes the # graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS # threshold limits the number of items for each type to make the size more # manageable. Set this to 0 for no limit. Note that the threshold may be # exceeded by 50% before the limit is enforced. UML_LIMIT_NUM_FIELDS = 10 # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = NO # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will generate a graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are svg, png, jpg, or gif. # If left blank png will be used. If you choose svg you need to set # HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible in IE 9+ (other browsers do not have this requirement). DOT_IMAGE_FORMAT = png # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to # enable generation of interactive SVG images that allow zooming and panning. # Note that this requires a modern browser other than Internet Explorer. # Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you # need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible. Older versions of IE do not have SVG support. INTERACTIVE_SVG = NO # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The MSCFILE_DIRS tag can be used to specify one or more directories that # contain msc files that are included in the documentation (see the # \mscfile command). MSCFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = NO # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = YES # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES starpu-1.4.9+dfsg/doc/doxygen/000077500000000000000000000000001507764646700162325ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/Makefile.am000066400000000000000000000241671507764646700203000ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen DOX_HTML_DIR = html DOX_LATEX_DIR = latex DOX_PDF = starpu.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h include $(top_srcdir)/doc/doxy.mk chapters = \ chapters/foreword.doxy \ chapters/starpu_introduction/introduction_intro.doxy \ chapters/starpu_introduction/doc_organization.doxy \ chapters/starpu_introduction/glossary.doxy \ chapters/starpu_installation/installation_intro.doxy \ chapters/starpu_installation/environment_variables.doxy \ chapters/starpu_installation/building.doxy \ chapters/starpu_installation/configure_options.doxy \ chapters/starpu_installation/configuration_and_initialization.doxy \ chapters/starpu_basics/basics_intro.doxy \ chapters/starpu_basics/starpu_applications.doxy \ chapters/starpu_basics/basic_examples.doxy \ chapters/starpu_basics/scaling_vector_example.doxy \ chapters/starpu_basics/tasks.doxy \ chapters/starpu_basics/data_management.doxy \ chapters/starpu_basics/scheduling.doxy \ chapters/starpu_basics/examples_sources.doxy \ chapters/starpu_basics/code/basics_vector_scal_c.c \ chapters/starpu_basics/code/basics_vector_scal_cpu.c \ chapters/starpu_basics/code/basics_vector_scal_cuda.c \ chapters/starpu_basics/code/basics_vector_scal_opencl.c \ chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl \ chapters/starpu_applications/applications_intro.doxy \ chapters/starpu_applications/vector_scaling.doxy \ chapters/starpu_applications/code/vector_scal_c.c \ chapters/starpu_applications/code/vector_scal_c_align.c \ chapters/starpu_applications/code/vector_scal_cpu.c \ chapters/starpu_applications/code/vector_scal_starpu.c \ chapters/starpu_applications/stencil.doxy \ chapters/starpu_applications/code/stencil5.c \ chapters/starpu_applications/code/stencil5_starpu.c \ chapters/starpu_applications/code/stencil5_starpu_mpi.c \ chapters/starpu_performances/performances_intro.doxy \ chapters/starpu_performances/benchmarking_starpu.doxy \ chapters/starpu_performances/online_performance_tools.doxy \ chapters/starpu_performances/offline_performance_tools.doxy \ chapters/starpu_faq/faq_intro.doxy \ chapters/starpu_faq/check_list_performance.doxy \ chapters/starpu_faq/faq.doxy \ chapters/starpu_languages/languages_intro.doxy \ chapters/starpu_languages/native_fortran_support.doxy \ chapters/starpu_languages/java.doxy \ chapters/starpu_languages/python.doxy \ chapters/starpu_languages/openmp_runtime_support.doxy \ chapters/starpu_languages/code/nf_initexit.f90 \ chapters/starpu_languages/code/java_starpu.java \ chapters/starpu_languages/code/java_spark.java \ chapters/starpu_extensions/extensions_intro.doxy \ chapters/starpu_extensions/advanced_tasks.doxy \ chapters/starpu_extensions/advanced_data_management.doxy \ chapters/starpu_extensions/helpers.doxy \ chapters/starpu_extensions/debugging_tools.doxy \ chapters/starpu_extensions/advanced_scheduling.doxy \ chapters/starpu_extensions/scheduling_contexts.doxy \ chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ chapters/starpu_extensions/cuda_support.doxy \ chapters/starpu_extensions/opencl_support.doxy \ chapters/starpu_extensions/max_fpga_support.doxy \ chapters/starpu_extensions/out_of_core.doxy \ chapters/starpu_extensions/mpi_support.doxy \ chapters/starpu_extensions/tcpip_support.doxy \ chapters/starpu_extensions/transactions.doxy \ chapters/starpu_extensions/fault_tolerance.doxy \ chapters/starpu_extensions/fft_support.doxy \ chapters/starpu_extensions/socl_opencl_extensions.doxy \ chapters/starpu_extensions/bubble.doxy \ chapters/starpu_extensions/parallel_worker.doxy \ chapters/starpu_extensions/interoperability.doxy \ chapters/starpu_extensions/scheduling_policy_definition.doxy \ chapters/starpu_extensions/simgrid.doxy \ chapters/starpu_extensions/code/complex.c \ chapters/starpu_extensions/code/disk_compute.c \ chapters/starpu_extensions/code/disk_copy.c \ chapters/starpu_extensions/code/forkmode.c \ chapters/starpu_extensions/code/multiformat.c \ chapters/starpu_extensions/code/simgrid.c \ chapters/files.doxy \ chapters/fdl_1_3.doxy \ chapters/api/fortran_support.doxy \ chapters/api/bubble_support.doxy \ chapters/api/fft_support.doxy \ chapters/api/threads.doxy images = \ chapters/images/arbiter.png \ chapters/images/data_trace.png \ chapters/images/distrib_data.png \ chapters/images/distrib_data_histo.png \ chapters/images/paje_draw_histogram.png \ chapters/images/parallel_worker2.png \ chapters/images/runtime-par.png \ chapters/images/starpu_non_linear_memset_regression_based.png \ chapters/images/starpu_non_linear_memset_regression_based_2.png \ chapters/images/starpu_starpu_slu_lu_model_11.png \ chapters/images/starpu_chol_model_11_type.png \ chapters/images/tasks_size_overhead.png \ chapters/images/temanejo.png \ chapters/images/eclipse_installer.png \ chapters/images/eclipse_install_cdt.png \ chapters/images/eclipse_hello_build.png \ chapters/images/eclipse_hello_run.png \ chapters/images/eclipse_hello_fxt.png \ chapters/images/eclipse_hello_graph.png \ chapters/images/eclipse_hello_vite.png \ chapters/images/eclipse_hello_svg_graph.png \ chapters/images/eclipse_hello_plugin.png \ chapters/images/eclipse_hello_paje_trace.png \ chapters/images/eclipse_hello_hgraph.png \ chapters/images/eclipse_install_pde.png \ chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ chapters/images/starpu_log_arr.png \ chapters/images/starpu_log_list.png \ chapters/images/starpu_non_linear_memset_regression_based_energy.png \ chapters/images/starpu_power_non_linear_memset_regression_based.png \ chapters/images/starvz_visu.png \ chapters/images/starvz_visu_r.png \ chapters/images/trace_bw_heatmap.png \ chapters/images/trace_recv_use.png \ chapters/images/trace_send_use.png \ chapters/images/trace_volume_heatmap.png \ chapters/images/starpupy_handle_func_perf_pickle.png \ chapters/images/starpupy_handle_perf_pickle.png \ chapters/images/starpupy_handle_func_perf.png \ chapters/images/starpupy_handle_perf.png \ chapters/images/tasks_size_overhead_py_fut_pickle.png \ chapters/images/tasks_size_overhead_py_futur.png \ chapters/images/tasks_size_overhead_py_handle_pickle.png \ chapters/images/tasks_size_overhead_py_handle.png \ chapters/images/tasks_size_overhead_py_none.png \ chapters/images/tasks_size_overhead_py_noret_pickle.png if STARPU_BUILD_DOC starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ starpu_config.h \ chapters/version.sty \ chapters/version.html \ $(top_srcdir)/include/starpu.h \ $(top_srcdir)/include/starpu_bitmap.h \ $(top_srcdir)/include/starpu_bound.h \ $(top_srcdir)/include/starpu_cublas.h \ $(top_srcdir)/include/starpu_cublas_v2.h \ $(top_srcdir)/include/starpu_cublasLt.h \ $(top_srcdir)/include/starpu_cusparse.h \ $(top_srcdir)/include/starpu_cuda.h \ $(top_srcdir)/include/starpu_cusolver.h \ $(top_srcdir)/include/starpu_data_filters.h \ $(top_srcdir)/include/starpu_data.h \ $(top_srcdir)/include/starpu_data_interfaces.h \ $(top_srcdir)/include/starpu_deprecated_api.h \ $(top_srcdir)/include/starpu_disk.h \ $(top_srcdir)/include/starpu_driver.h \ $(top_srcdir)/include/starpu_expert.h \ $(top_srcdir)/include/starpu_fxt.h \ $(top_srcdir)/include/starpu_hash.h \ $(top_srcdir)/include/starpu_helper.h \ $(top_srcdir)/include/starpu_hip.h \ $(top_srcdir)/include/starpu_max_fpga.h \ $(top_srcdir)/include/starpu_mod.f90 \ $(top_srcdir)/include/starpu_opencl.h \ $(top_srcdir)/include/starpu_openmp.h \ $(top_srcdir)/include/starpu_parallel_worker.h \ $(top_srcdir)/include/starpu_perf_monitoring.h \ $(top_srcdir)/include/starpu_perf_steering.h \ $(top_srcdir)/include/starpu_perfmodel.h \ $(top_srcdir)/include/starpu_profiling.h \ $(top_srcdir)/include/starpu_profiling_tool.h \ $(top_srcdir)/include/starpu_rand.h \ $(top_srcdir)/include/starpu_sched_component.h \ $(top_srcdir)/include/starpu_sched_ctx.h \ $(top_srcdir)/include/starpu_sched_ctx_hypervisor.h \ $(top_srcdir)/include/starpu_scheduler.h \ $(top_srcdir)/include/starpu_simgrid_wrap.h \ $(top_srcdir)/include/starpu_sink.h \ $(top_srcdir)/include/starpu_stdlib.h \ $(top_srcdir)/include/starpu_task_bundle.h \ $(top_srcdir)/include/starpu_task_dep.h \ $(top_srcdir)/include/starpu_task.h \ $(top_srcdir)/include/starpu_task_list.h \ $(top_srcdir)/include/starpu_task_util.h \ $(top_srcdir)/include/starpu_thread.h \ $(top_srcdir)/include/starpu_thread_util.h \ $(top_srcdir)/include/starpu_tree.h \ $(top_srcdir)/include/starpu_util.h \ $(top_srcdir)/include/starpu_worker.h \ $(top_srcdir)/include/fstarpu_mod.f90 \ $(top_srcdir)/include/schedulers/starpu_heteroprio.h \ $(top_srcdir)/starpufft/include/starpufft.h \ $(top_srcdir)/mpi/include/starpu_mpi.h \ $(top_srcdir)/mpi/include/starpu_mpi_ft.h \ $(top_srcdir)/mpi/include/starpu_mpi_lb.h \ $(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90 \ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor.h \ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_config.h \ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_lp.h \ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_monitoring.h \ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_policy.h \ $(top_srcdir)/starpurm/include/starpurm.h \ $(top_srcdir)/include/schedulers/starpu_scheduler_toolbox.h endif starpu-1.4.9+dfsg/doc/doxygen/Makefile.in000066400000000000000000001300471507764646700203040ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg doxygen-config-include.cfg \ doxygen_filter.sh CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config-include.cfg.in \ $(srcdir)/doxygen-config.cfg.in $(srcdir)/doxygen_filter.sh.in \ $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen DOX_HTML_DIR = html DOX_LATEX_DIR = latex DOX_PDF = starpu.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ chapters/foreword.doxy \ chapters/starpu_introduction/introduction_intro.doxy \ chapters/starpu_introduction/doc_organization.doxy \ chapters/starpu_introduction/glossary.doxy \ chapters/starpu_installation/installation_intro.doxy \ chapters/starpu_installation/environment_variables.doxy \ chapters/starpu_installation/building.doxy \ chapters/starpu_installation/configure_options.doxy \ chapters/starpu_installation/configuration_and_initialization.doxy \ chapters/starpu_basics/basics_intro.doxy \ chapters/starpu_basics/starpu_applications.doxy \ chapters/starpu_basics/basic_examples.doxy \ chapters/starpu_basics/scaling_vector_example.doxy \ chapters/starpu_basics/tasks.doxy \ chapters/starpu_basics/data_management.doxy \ chapters/starpu_basics/scheduling.doxy \ chapters/starpu_basics/examples_sources.doxy \ chapters/starpu_basics/code/basics_vector_scal_c.c \ chapters/starpu_basics/code/basics_vector_scal_cpu.c \ chapters/starpu_basics/code/basics_vector_scal_cuda.c \ chapters/starpu_basics/code/basics_vector_scal_opencl.c \ chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl \ chapters/starpu_applications/applications_intro.doxy \ chapters/starpu_applications/vector_scaling.doxy \ chapters/starpu_applications/code/vector_scal_c.c \ chapters/starpu_applications/code/vector_scal_c_align.c \ chapters/starpu_applications/code/vector_scal_cpu.c \ chapters/starpu_applications/code/vector_scal_starpu.c \ chapters/starpu_applications/stencil.doxy \ chapters/starpu_applications/code/stencil5.c \ chapters/starpu_applications/code/stencil5_starpu.c \ chapters/starpu_applications/code/stencil5_starpu_mpi.c \ chapters/starpu_performances/performances_intro.doxy \ chapters/starpu_performances/benchmarking_starpu.doxy \ chapters/starpu_performances/online_performance_tools.doxy \ chapters/starpu_performances/offline_performance_tools.doxy \ chapters/starpu_faq/faq_intro.doxy \ chapters/starpu_faq/check_list_performance.doxy \ chapters/starpu_faq/faq.doxy \ chapters/starpu_languages/languages_intro.doxy \ chapters/starpu_languages/native_fortran_support.doxy \ chapters/starpu_languages/java.doxy \ chapters/starpu_languages/python.doxy \ chapters/starpu_languages/openmp_runtime_support.doxy \ chapters/starpu_languages/code/nf_initexit.f90 \ chapters/starpu_languages/code/java_starpu.java \ chapters/starpu_languages/code/java_spark.java \ chapters/starpu_extensions/extensions_intro.doxy \ chapters/starpu_extensions/advanced_tasks.doxy \ chapters/starpu_extensions/advanced_data_management.doxy \ chapters/starpu_extensions/helpers.doxy \ chapters/starpu_extensions/debugging_tools.doxy \ chapters/starpu_extensions/advanced_scheduling.doxy \ chapters/starpu_extensions/scheduling_contexts.doxy \ chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ chapters/starpu_extensions/cuda_support.doxy \ chapters/starpu_extensions/opencl_support.doxy \ chapters/starpu_extensions/max_fpga_support.doxy \ chapters/starpu_extensions/out_of_core.doxy \ chapters/starpu_extensions/mpi_support.doxy \ chapters/starpu_extensions/tcpip_support.doxy \ chapters/starpu_extensions/transactions.doxy \ chapters/starpu_extensions/fault_tolerance.doxy \ chapters/starpu_extensions/fft_support.doxy \ chapters/starpu_extensions/socl_opencl_extensions.doxy \ chapters/starpu_extensions/bubble.doxy \ chapters/starpu_extensions/parallel_worker.doxy \ chapters/starpu_extensions/interoperability.doxy \ chapters/starpu_extensions/scheduling_policy_definition.doxy \ chapters/starpu_extensions/simgrid.doxy \ chapters/starpu_extensions/code/complex.c \ chapters/starpu_extensions/code/disk_compute.c \ chapters/starpu_extensions/code/disk_copy.c \ chapters/starpu_extensions/code/forkmode.c \ chapters/starpu_extensions/code/multiformat.c \ chapters/starpu_extensions/code/simgrid.c \ chapters/files.doxy \ chapters/fdl_1_3.doxy \ chapters/api/fortran_support.doxy \ chapters/api/bubble_support.doxy \ chapters/api/fft_support.doxy \ chapters/api/threads.doxy images = \ chapters/images/arbiter.png \ chapters/images/data_trace.png \ chapters/images/distrib_data.png \ chapters/images/distrib_data_histo.png \ chapters/images/paje_draw_histogram.png \ chapters/images/parallel_worker2.png \ chapters/images/runtime-par.png \ chapters/images/starpu_non_linear_memset_regression_based.png \ chapters/images/starpu_non_linear_memset_regression_based_2.png \ chapters/images/starpu_starpu_slu_lu_model_11.png \ chapters/images/starpu_chol_model_11_type.png \ chapters/images/tasks_size_overhead.png \ chapters/images/temanejo.png \ chapters/images/eclipse_installer.png \ chapters/images/eclipse_install_cdt.png \ chapters/images/eclipse_hello_build.png \ chapters/images/eclipse_hello_run.png \ chapters/images/eclipse_hello_fxt.png \ chapters/images/eclipse_hello_graph.png \ chapters/images/eclipse_hello_vite.png \ chapters/images/eclipse_hello_svg_graph.png \ chapters/images/eclipse_hello_plugin.png \ chapters/images/eclipse_hello_paje_trace.png \ chapters/images/eclipse_hello_hgraph.png \ chapters/images/eclipse_install_pde.png \ chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ chapters/images/starpu_log_arr.png \ chapters/images/starpu_log_list.png \ chapters/images/starpu_non_linear_memset_regression_based_energy.png \ chapters/images/starpu_power_non_linear_memset_regression_based.png \ chapters/images/starvz_visu.png \ chapters/images/starvz_visu_r.png \ chapters/images/trace_bw_heatmap.png \ chapters/images/trace_recv_use.png \ chapters/images/trace_send_use.png \ chapters/images/trace_volume_heatmap.png \ chapters/images/starpupy_handle_func_perf_pickle.png \ chapters/images/starpupy_handle_perf_pickle.png \ chapters/images/starpupy_handle_func_perf.png \ chapters/images/starpupy_handle_perf.png \ chapters/images/tasks_size_overhead_py_fut_pickle.png \ chapters/images/tasks_size_overhead_py_futur.png \ chapters/images/tasks_size_overhead_py_handle_pickle.png \ chapters/images/tasks_size_overhead_py_handle.png \ chapters/images/tasks_size_overhead_py_none.png \ chapters/images/tasks_size_overhead_py_noret_pickle.png @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ starpu_config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_bitmap.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_bound.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cublas.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cublas_v2.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cublasLt.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cusparse.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cuda.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_cusolver.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_data_filters.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_data.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_data_interfaces.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_deprecated_api.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_disk.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_driver.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_expert.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_fxt.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_hash.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_helper.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_hip.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_max_fpga.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_mod.f90 \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_opencl.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_openmp.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_parallel_worker.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_perf_monitoring.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_perf_steering.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_perfmodel.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_profiling.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_profiling_tool.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_rand.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_sched_component.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_sched_ctx.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_sched_ctx_hypervisor.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_scheduler.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_simgrid_wrap.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_sink.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_stdlib.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task_bundle.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task_dep.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task_list.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_task_util.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_thread.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_thread_util.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_tree.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_util.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/starpu_worker.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/fstarpu_mod.f90 \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/schedulers/starpu_heteroprio.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/starpufft/include/starpufft.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/mpi/include/starpu_mpi.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/mpi/include/starpu_mpi_ft.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/mpi/include/starpu_mpi_lb.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/mpi/include/fstarpu_mpi_mod.f90 \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_config.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_lp.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_monitoring.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/sc_hypervisor/include/sc_hypervisor_policy.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/starpurm/include/starpurm.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/include/schedulers/starpu_scheduler_toolbox.h all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ doxygen-config-include.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config-include.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ doxygen_filter.sh: $(top_builddir)/config.status $(srcdir)/doxygen_filter.sh.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen/chapters/000077500000000000000000000000001507764646700200435ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/api/000077500000000000000000000000001507764646700206145ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/api/bubble_support.doxy000066400000000000000000000015101507764646700245450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2024-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * The file is empty but necessary to define the group API_Bubble */ /*! \defgroup API_Bubble Hierarchical Dags \brief API for Hierarchical DAGS */ starpu-1.4.9+dfsg/doc/doxygen/chapters/api/fft_support.doxy000066400000000000000000000060241507764646700240760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \defgroup API_FFT_Support FFT Support \def STARPUFFT_FORWARD \ingroup API_FFT_Support todo \def STARPUFFT_INVERSE \ingroup API_FFT_Support todo \fn void * starpufft_malloc(size_t n) \ingroup API_FFT_Support Allocate memory for \p n bytes. This is preferred over \c malloc(), since it allocates pinned memory, which allows overlapped transfers. \fn void * starpufft_free(void *p) \ingroup API_FFT_Support Release memory previously allocated. \fn struct starpufft_plan * starpufft_plan_dft_1d(int n, int sign, unsigned flags) \ingroup API_FFT_Support Initialize a plan for 1D FFT of size \p n. \p sign can be STARPUFFT_FORWARD or STARPUFFT_INVERSE. \p flags must be 0. \fn struct starpufft_plan * starpufft_plan_dft_2d(int n, int m, int sign, unsigned flags) \ingroup API_FFT_Support Initialize a plan for 2D FFT of size (\p n, \p m). \p sign can be STARPUFFT_FORWARD or STARPUFFT_INVERSE. flags must be \p 0. \fn struct starpu_task * starpufft_start(starpufft_plan p, void *in, void *out) \ingroup API_FFT_Support Start an FFT previously planned as \p p, using \p in and \p out as input and output. This only submits the task and does not wait for it. The application should call starpufft_cleanup() to unregister the \fn struct starpu_task * starpufft_start_handle(starpufft_plan p, starpu_data_handle_t in, starpu_data_handle_t out) \ingroup API_FFT_Support Start an FFT previously planned as \p p, using data handles \p in and \p out as input and output (assumed to be vectors of elements of the expected types). This only submits the task and does not wait for it. \fn void starpufft_execute(starpufft_plan p, void *in, void *out) \ingroup API_FFT_Support Execute an FFT previously planned as \p p, using \p in and \p out as input and output. This submits and waits for the task. \fn void starpufft_execute_handle(starpufft_plan p, starpu_data_handle_t in, starpu_data_handle_t out) \ingroup API_FFT_Support Execute an FFT previously planned as \p p, using data handles \p in and \p out as input and output (assumed to be vectors of elements of the expected types). This submits and waits for the task. \fn void starpufft_cleanup(starpufft_plan p) \ingroup API_FFT_Support Release data for plan \p p, in the starpufft_start() case. \fn void starpufft_destroy_plan(starpufft_plan p) \ingroup API_FFT_Support Destroy plan \p p, i.e. release all CPU (fftw) and GPU (cufft) resources. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/api/fortran_support.doxy000066400000000000000000000013611507764646700247710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \defgroup API_Fortran Fortran Support \brief Fortran API */ starpu-1.4.9+dfsg/doc/doxygen/chapters/api/threads.doxy000066400000000000000000000322021507764646700231520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \defgroup API_Threads Threads \brief API for thread. The thread functions are either implemented on top of the pthread library or the SimGrid library when the simulated performance mode is enabled (\ref SimGridSupport). \def STARPU_PTHREAD_CREATE_ON \ingroup API_Threads Call starpu_pthread_create_on() and abort on error. \def STARPU_PTHREAD_CREATE \ingroup API_Threads Call starpu_pthread_create() and abort on error. \def STARPU_PTHREAD_MUTEX_INIT \ingroup API_Threads Call starpu_pthread_mutex_init() and abort on error. \def STARPU_PTHREAD_MUTEX_INIT0 \ingroup API_Threads Call starpu_pthread_mutex_init() only if the content of PTHREAD_MUTEX_INITIALIZER is not zero. This should be called instead of STARPU_PTHREAD_MUTEX_INIT when it is known that the content of the pthread_mutex_t was already zeroed. \def STARPU_PTHREAD_MUTEX_DESTROY \ingroup API_Threads Call starpu_pthread_mutex_destroy() and abort on error. \def STARPU_PTHREAD_MUTEX_LOCK \ingroup API_Threads Call starpu_pthread_mutex_lock() and abort on error. \def STARPU_PTHREAD_MUTEX_UNLOCK \ingroup API_Threads Call starpu_pthread_mutex_unlock() and abort on error. \def STARPU_PTHREAD_KEY_CREATE \ingroup API_Threads Call starpu_pthread_key_create() and abort on error. \def STARPU_PTHREAD_KEY_DELETE \ingroup API_Threads Call starpu_pthread_key_delete() and abort on error. \def STARPU_PTHREAD_SETSPECIFIC \ingroup API_Threads Call starpu_pthread_setspecific() and abort on error. \def STARPU_PTHREAD_GETSPECIFIC \ingroup API_Threads Call starpu_pthread_getspecific() and abort on error. \def STARPU_PTHREAD_RWLOCK_INIT \ingroup API_Threads Call starpu_pthread_rwlock_init() and abort on error. \def STARPU_PTHREAD_RWLOCK_INIT0 \ingroup API_Threads Call starpu_pthread_rwlock_init() only if the content of PTHREAD_RWLOCK_INITIALIZER is not zero. This should be called instead of STARPU_PTHREAD_RWLOCK_INIT when it is known that the content of the pthread_rwlock_t was already zeroed. \def STARPU_PTHREAD_RWLOCK_RDLOCK \ingroup API_Threads Call starpu_pthread_rwlock_rdlock() and abort on error. \def STARPU_PTHREAD_RWLOCK_WRLOCK \ingroup API_Threads Call starpu_pthread_rwlock_wrlock() and abort on error. \def STARPU_PTHREAD_RWLOCK_UNLOCK \ingroup API_Threads Call starpu_pthread_rwlock_unlock() and abort on error. \def STARPU_PTHREAD_RWLOCK_DESTROY \ingroup API_Threads Call starpu_pthread_rwlock_destroy() and abort on error. \def STARPU_PTHREAD_COND_INIT \ingroup API_Threads Call starpu_pthread_cond_init() and abort on error. \def STARPU_PTHREAD_COND_INIT0 \ingroup API_Threads Call starpu_pthread_cond_init() only if the content of PTHREAD_COND_INITIALIZER is not zero. This should be called instead of STARPU_PTHREAD_COND_INIT when it is known that the content of the pthread_cond_t was already zeroed. \def STARPU_PTHREAD_COND_DESTROY \ingroup API_Threads Call starpu_pthread_cond_destroy() and abort on error. \def STARPU_PTHREAD_COND_SIGNAL \ingroup API_Threads Call starpu_pthread_cond_signal() and abort on error. \def STARPU_PTHREAD_COND_BROADCAST \ingroup API_Threads Call starpu_pthread_cond_broadcast() and abort on error. \def STARPU_PTHREAD_COND_WAIT \ingroup API_Threads Call starpu_pthread_cond_wait() and abort on error. \def STARPU_PTHREAD_BARRIER_INIT \ingroup API_Threads Call starpu_pthread_barrier_init() and abort on error. \def STARPU_PTHREAD_BARRIER_DESTROY \ingroup API_Threads Call starpu_pthread_barrier_destroy() and abort on error. \def STARPU_PTHREAD_BARRIER_WAIT \ingroup API_Threads Call starpu_pthread_barrier_wait() and abort on error. \fn int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, int where) \ingroup API_Threads \fn int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) \ingroup API_Threads Start a new thread in the calling process. The new thread starts execution by invoking \p start_routine; \p arg is passed as the sole argument of \p start_routine. \fn int starpu_pthread_join(starpu_pthread_t thread, void **retval) \ingroup API_Threads Wait for the thread specified by \p thread to terminate. If that thread has already terminated, then the function returns immediately. The thread specified by \p thread must be joinable. \fn int starpu_pthread_exit(void *retval) \ingroup API_Threads Terminate the calling thread and return a value via \p retval that (if the thread is joinable) is available to another thread in the same process that calls starpu_pthread_join(). \fn int starpu_pthread_attr_init(starpu_pthread_attr_t *attr) \ingroup API_Threads Initialize the thread attributes object pointed to by \p attr with default attribute values. Do not do anything when the simulated performance mode is enabled (\ref SimGridSupport). \fn int starpu_pthread_attr_destroy(starpu_pthread_attr_t *attr) \ingroup API_Threads Destroy a thread attributes object which is no longer required. Destroying a thread attributes object has no effect on threads that were created using that object. Do not do anything when the simulated performance mode is enabled (\ref SimGridSupport). \fn int starpu_pthread_attr_setdetachstate(starpu_pthread_attr_t *attr, int detachstate) \ingroup API_Threads Set the detach state attribute of the thread attributes object referred to by \p attr to the value specified in \p detachstate. The detach state attribute determines whether a thread created using the thread attributes object \p attr will be created in a joinable or a detached state. Do not do anything when the simulated performance mode is enabled (\ref SimGridSupport). \fn int starpu_pthread_mutex_init(starpu_pthread_mutex_t *mutex, const starpu_pthread_mutexattr_t *mutexattr) \ingroup API_Threads Initialize the mutex object pointed to by \p mutex according to the mutex attributes specified in \p mutexattr. If \p mutexattr is NULL, default attributes are used instead. \fn int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex) \ingroup API_Threads Destroy a mutex object, and free the resources it might hold. The mutex must be unlocked on entrance. \fn int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex) \ingroup API_Threads Lock the given \p mutex. If \p mutex is currently unlocked, it becomes locked and owned by the calling thread, and the function returns immediately. If \p mutex is already locked by another thread, the function suspends the calling thread until \p mutex is unlocked. This function also produces trace when the configure option \ref enable-fxt-lock "--enable-fxt-lock" is enabled. \fn int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) \ingroup API_Threads Unlock the given \p mutex. The mutex is assumed to be locked and owned by the calling thread on entrance to starpu_pthread_mutex_unlock(). This function also produces trace when the configure option \ref enable-fxt-lock "--enable-fxt-lock" is enabled. \fn int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex) \ingroup API_Threads Behave identically to starpu_pthread_mutex_lock(), except that it does not block the calling thread if the mutex is already locked by another thread (or by the calling thread in the case of a ``fast'' mutex). Instead, the function returns immediately with the error code \c EBUSY. This function also produces trace when the configure option \ref enable-fxt-lock "--enable-fxt-lock" is enabled. \typedef STARPU_PTHREAD_MUTEX_INITIALIZER \ingroup API_Threads Initialize the mutex given in parameter. \fn int starpu_pthread_mutexattr_gettype(const starpu_pthread_mutexattr_t *attr, int *type) \ingroup API_Threads todo \fn int starpu_pthread_mutexattr_settype(starpu_pthread_mutexattr_t *attr, int type) \ingroup API_Threads todo \fn int starpu_pthread_mutexattr_destroy(starpu_pthread_mutexattr_t *attr) \ingroup API_Threads todo \fn int starpu_pthread_mutexattr_init(starpu_pthread_mutexattr_t *attr) \ingroup API_Threads todo \fn int starpu_pthread_key_create(starpu_pthread_key_t *key, void (*destr_function) (void *)) \ingroup API_Threads Allocate a new TSD key. The key is stored in the location pointed to by \p key. \fn int starpu_pthread_key_delete(starpu_pthread_key_t key) \ingroup API_Threads Deallocate a TSD key. Do not check whether non-NULL values are associated with that key in the currently executing threads, nor call the destructor function associated with the key. \fn int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer) \ingroup API_Threads Change the value associated with \p key in the calling thread, storing the given \p pointer instead. \fn void *starpu_pthread_getspecific(starpu_pthread_key_t key) \ingroup API_Threads Return the value associated with \p key on success, and NULL on error. \typedef STARPU_PTHREAD_COND_INITIALIZER \ingroup API_Threads Initialize the condition variable given in parameter. \fn int starpu_pthread_cond_init(starpu_pthread_cond_t *cond, starpu_pthread_condattr_t *cond_attr) \ingroup API_Threads Initialize the condition variable \p cond, using the condition attributes specified in \p cond_attr, or default attributes if \p cond_attr is NULL. \fn int starpu_pthread_cond_signal(starpu_pthread_cond_t *cond) \ingroup API_Threads Restart one of the threads that are waiting on the condition variable \p cond. If no threads are waiting on \p cond, nothing happens. If several threads are waiting on \p cond, exactly one is restarted, but it is not specified which. \fn int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond) \ingroup API_Threads Restart all the threads that are waiting on the condition variable \p cond. Nothing happens if no threads are waiting on \p cond. \fn int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex) \ingroup API_Threads Atomically unlock \p mutex (as per starpu_pthread_mutex_unlock()) and wait for the condition variable \p cond to be signaled. The thread execution is suspended and does not consume any CPU time until the condition variable is signaled. The mutex must be locked by the calling thread on entrance to starpu_pthread_cond_wait(). Before returning to the calling thread, the function re-acquires mutex (as per starpu_pthread_mutex_lock()). This function also produces trace when the configure option \ref enable-fxt-lock "--enable-fxt-lock" is enabled. \fn int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime) \ingroup API_Threads Atomicall unlocks \p mutex and wait on \p cond, as starpu_pthread_cond_wait() does, but also bound the duration of the wait with \p abstime. \fn int starpu_pthread_cond_destroy(starpu_pthread_cond_t *cond) \ingroup API_Threads Destroy a condition variable, freeing the resources it might hold. No threads must be waiting on the condition variable on entrance to the function. \fn int starpu_pthread_rwlock_init(starpu_pthread_rwlock_t *rwlock, const starpu_pthread_rwlockattr_t *attr) \ingroup API_Threads Similar to starpu_pthread_mutex_init(). \fn int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock) \ingroup API_Threads Similar to starpu_pthread_mutex_destroy(). \fn int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock) \ingroup API_Threads Similar to starpu_pthread_mutex_lock(). \fn int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock) \ingroup API_Threads todo \fn int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock) \ingroup API_Threads Similar to starpu_pthread_mutex_lock(). \fn int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock) \ingroup API_Threads todo \fn int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock) \ingroup API_Threads Similar to starpu_pthread_mutex_unlock(). \fn int starpu_pthread_barrier_init(starpu_pthread_barrier_t *barrier, const starpu_pthread_barrierattr_t *attr, unsigned count) \ingroup API_Threads todo \fn int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier) \ingroup API_Threads todo \fn int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) \ingroup API_Threads todo \fn int starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared) \ingroup API_Threads todo \fn int starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock) \ingroup API_Threads todo \fn int starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock) \ingroup API_Threads todo \fn int starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock) \ingroup API_Threads todo \fn int starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock) \ingroup API_Threads todo */ starpu-1.4.9+dfsg/doc/doxygen/chapters/fdl_1_3.doxy000066400000000000000000000567321507764646700221740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page GNUFreeDocumentationLicense The GNU Free Documentation License
Version 1.3, 3 November 2008
\copyright 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. http://fsf.org/ Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
  1. PREAMBLE The purpose of this License is to make a manual, textbook, or other functional and useful document free in the sense of freedom: to assure everyone the effective freedom to copy and redistribute it, with or without modifying it, either commercially or noncommercially. Secondarily, this License preserves for the author and publisher a way to get credit for their work, while not being considered responsible for modifications made by others. This License is a kind of ``copyleft'', which means that derivative works of the document must themselves be free in the same sense. It complements the GNU General Public License, which is a copyleft license designed for free software. We have designed this License in order to use it for manuals for free software, because free software needs free documentation: a free program should come with manuals providing the same freedoms that the software does. But this License is not limited to software manuals; it can be used for any textual work, regardless of subject matter or whether it is published as a printed book. We recommend this License principally for works whose purpose is instruction or reference.
  2. APPLICABILITY AND DEFINITIONS This License applies to any manual or other work, in any medium, that contains a notice placed by the copyright holder saying it can be distributed under the terms of this License. Such a notice grants a world-wide, royalty-free license, unlimited in duration, to use that work under the conditions stated herein. The ``Document'', below, refers to any such manual or work. Any member of the public is a licensee, and is addressed as ``you''. You accept the license if you copy, modify or distribute the work in a way requiring permission under copyright law. A ``Modified Version'' of the Document means any work containing the Document or a portion of it, either copied verbatim, or with modifications and/or translated into another language. A ``Secondary Section'' is a named appendix or a front-matter section of the Document that deals exclusively with the relationship of the publishers or authors of the Document to the Document's overall subject (or to related matters) and contains nothing that could fall directly within that overall subject. (Thus, if the Document is in part a textbook of mathematics, a Secondary Section may not explain any mathematics.) The relationship could be a matter of historical connection with the subject or with related matters, or of legal, commercial, philosophical, ethical or political position regarding them. The ``Invariant Sections'' are certain Secondary Sections whose titles are designated, as being those of Invariant Sections, in the notice that says that the Document is released under this License. If a section does not fit the above definition of Secondary then it is not allowed to be designated as Invariant. The Document may contain zero Invariant Sections. If the Document does not identify any Invariant Sections then there are none. The ``Cover Texts'' are certain short passages of text that are listed, as Front-Cover Texts or Back-Cover Texts, in the notice that says that the Document is released under this License. A Front-Cover Text may be at most 5 words, and a Back-Cover Text may be at most 25 words. A ``Transparent'' copy of the Document means a machine-readable copy, represented in a format whose specification is available to the general public, that is suitable for revising the document straightforwardly with generic text editors or (for images composed of pixels) generic paint programs or (for drawings) some widely available drawing editor, and that is suitable for input to text formatters or for automatic translation to a variety of formats suitable for input to text formatters. A copy made in an otherwise Transparent file format whose markup, or absence of markup, has been arranged to thwart or discourage subsequent modification by readers is not Transparent. An image format is not Transparent if used for any substantial amount of text. A copy that is not ``Transparent'' is called ``Opaque''. Examples of suitable formats for Transparent copies include plain ASCII without markup, Texinfo input format, LaTeX input format, SGML or XML using a publicly available DTD, and standard-conforming simple HTML, PostScript or PDF designed for human modification. Examples of transparent image formats include PNG, XCF and JPG. Opaque formats include proprietary formats that can be read and edited only by proprietary word processors, SGML or XML for which the DTD and/or processing tools are not generally available, and the machine-generated HTML, PostScript or PDF produced by some word processors for output purposes only. The ``Title Page'' means, for a printed book, the title page itself, plus such following pages as are needed to hold, legibly, the material this License requires to appear in the title page. For works in formats which do not have any title page as such, ``Title Page'' means the text near the most prominent appearance of the work's title, preceding the beginning of the body of the text. The ``publisher'' means any person or entity that distributes copies of the Document to the public. A section ``Entitled XYZ'' means a named subunit of the Document whose title either is precisely XYZ or contains XYZ in parentheses following text that translates XYZ in another language. (Here XYZ stands for a specific section name mentioned below, such as ``Acknowledgements'', ``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title'' of such a section when you modify the Document means that it remains a section ``Entitled XYZ'' according to this definition. The Document may include Warranty Disclaimers next to the notice which states that this License applies to the Document. These Warranty Disclaimers are considered to be included by reference in this License, but only as regards disclaiming warranties: any other implication that these Warranty Disclaimers may have is void and has no effect on the meaning of this License.
  3. VERBATIM COPYING You may copy and distribute the Document in any medium, either commercially or noncommercially, provided that this License, the copyright notices, and the license notice saying this License applies to the Document are reproduced in all copies, and that you add no other conditions whatsoever to those of this License. You may not use technical measures to obstruct or control the reading or further copying of the copies you make or distribute. However, you may accept compensation in exchange for copies. If you distribute a large enough number of copies you must also follow the conditions in section 3. You may also lend copies, under the same conditions stated above, and you may publicly display copies.
  4. COPYING IN QUANTITY If you publish printed copies (or copies in media that commonly have printed covers) of the Document, numbering more than 100, and the Document's license notice requires Cover Texts, you must enclose the copies in covers that carry, clearly and legibly, all these Cover Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on the back cover. Both covers must also clearly and legibly identify you as the publisher of these copies. The front cover must present the full title with all words of the title equally prominent and visible. You may add other material on the covers in addition. Copying with changes limited to the covers, as long as they preserve the title of the Document and satisfy these conditions, can be treated as verbatim copying in other respects. If the required texts for either cover are too voluminous to fit legibly, you should put the first ones listed (as many as fit reasonably) on the actual cover, and continue the rest onto adjacent pages. If you publish or distribute Opaque copies of the Document numbering more than 100, you must either include a machine-readable Transparent copy along with each Opaque copy, or state in or with each Opaque copy a computer-network location from which the general network-using public has access to download using public-standard network protocols a complete Transparent copy of the Document, free of added material. If you use the latter option, you must take reasonably prudent steps, when you begin distribution of Opaque copies in quantity, to ensure that this Transparent copy will remain thus accessible at the stated location until at least one year after the last time you distribute an Opaque copy (directly or through your agents or retailers) of that edition to the public. It is requested, but not required, that you contact the authors of the Document well before redistributing any large number of copies, to give them a chance to provide you with an updated version of the Document.
  5. MODIFICATIONS You may copy and distribute a Modified Version of the Document under the conditions of sections 2 and 3 above, provided that you release the Modified Version under precisely this License, with the Modified Version filling the role of the Document, thus licensing distribution and modification of the Modified Version to whoever possesses a copy of it. In addition, you must do these things in the Modified Version:
    1. Use in the Title Page (and on the covers, if any) a title distinct from that of the Document, and from those of previous versions (which should, if there were any, be listed in the History section of the Document). You may use the same title as a previous version if the original publisher of that version gives permission.
    2. List on the Title Page, as authors, one or more persons or entities responsible for authorship of the modifications in the Modified Version, together with at least five of the principal authors of the Document (all of its principal authors, if it has fewer than five), unless they release you from this requirement.
    3. State on the Title page the name of the publisher of the Modified Version, as the publisher.
    4. Preserve all the copyright notices of the Document.
    5. Add an appropriate copyright notice for your modifications adjacent to the other copyright notices.
    6. Include, immediately after the copyright notices, a license notice giving the public permission to use the Modified Version under the terms of this License, in the form shown in the Addendum below.
    7. Preserve in that license notice the full lists of Invariant Sections and required Cover Texts given in the Document's license notice.
    8. Include an unaltered copy of this License.
    9. Preserve the section Entitled ``History'', Preserve its Title, and add to it an item stating at least the title, year, new authors, and publisher of the Modified Version as given on the Title Page. If there is no section Entitled ``History'' in the Document, create one stating the title, year, authors, and publisher of the Document as given on its Title Page, then add an item describing the Modified Version as stated in the previous sentence.
    10. Preserve the network location, if any, given in the Document for public access to a Transparent copy of the Document, and likewise the network locations given in the Document for previous versions it was based on. These may be placed in the ``History'' section. You may omit a network location for a work that was published at least four years before the Document itself, or if the original publisher of the version it refers to gives permission.
    11. For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve the Title of the section, and preserve in the section all the substance and tone of each of the contributor acknowledgements and/or dedications given therein.
    12. Preserve all the Invariant Sections of the Document, unaltered in their text and in their titles. Section numbers or the equivalent are not considered part of the section titles.
    13. Delete any section Entitled ``Endorsements''. Such a section may not be included in the Modified Version.
    14. Do not retitle any existing section to be Entitled ``Endorsements'' or to conflict in title with any Invariant Section.
    15. Preserve any Warranty Disclaimers.
    If the Modified Version includes new front-matter sections or appendices that qualify as Secondary Sections and contain no material copied from the Document, you may at your option designate some or all of these sections as invariant. To do this, add their titles to the list of Invariant Sections in the Modified Version's license notice. These titles must be distinct from any other section titles. You may add a section Entitled ``Endorsements'', provided it contains nothing but endorsements of your Modified Version by various parties---for example, statements of peer review or that the text has been approved by an organization as the authoritative definition of a standard. You may add a passage of up to five words as a Front-Cover Text, and a passage of up to 25 words as a Back-Cover Text, to the end of the list of Cover Texts in the Modified Version. Only one passage of Front-Cover Text and one of Back-Cover Text may be added by (or through arrangements made by) any one entity. If the Document already includes a cover text for the same cover, previously added by you or by arrangement made by the same entity you are acting on behalf of, you may not add another; but you may replace the old one, on explicit permission from the previous publisher that added the old one. The author(s) and publisher(s) of the Document do not by this License give permission to use their names for publicity for or to assert or imply endorsement of any Modified Version.
  6. COMBINING DOCUMENTS You may combine the Document with other documents released under this License, under the terms defined in section 4 above for modified versions, provided that you include in the combination all of the Invariant Sections of all of the original documents, unmodified, and list them all as Invariant Sections of your combined work in its license notice, and that you preserve all their Warranty Disclaimers. The combined work need only contain one copy of this License, and multiple identical Invariant Sections may be replaced with a single copy. If there are multiple Invariant Sections with the same name but different contents, make the title of each such section unique by adding at the end of it, in parentheses, the name of the original author or publisher of that section if known, or else a unique number. Make the same adjustment to the section titles in the list of Invariant Sections in the license notice of the combined work. In the combination, you must combine any sections Entitled ``History'' in the various original documents, forming one section Entitled ``History''; likewise combine any sections Entitled ``Acknowledgements'', and any sections Entitled ``Dedications''. You must delete all sections Entitled ``Endorsements.''
  7. COLLECTIONS OF DOCUMENTS You may make a collection consisting of the Document and other documents released under this License, and replace the individual copies of this License in the various documents with a single copy that is included in the collection, provided that you follow the rules of this License for verbatim copying of each of the documents in all other respects. You may extract a single document from such a collection, and distribute it individually under this License, provided you insert a copy of this License into the extracted document, and follow this License in all other respects regarding verbatim copying of that document.
  8. AGGREGATION WITH INDEPENDENT WORKS A compilation of the Document or its derivatives with other separate and independent documents or works, in or on a volume of a storage or distribution medium, is called an ``aggregate'' if the copyright resulting from the compilation is not used to limit the legal rights of the compilation's users beyond what the individual works permit. When the Document is included in an aggregate, this License does not apply to the other works in the aggregate which are not themselves derivative works of the Document. If the Cover Text requirement of section 3 is applicable to these copies of the Document, then if the Document is less than one half of the entire aggregate, the Document's Cover Texts may be placed on covers that bracket the Document within the aggregate, or the electronic equivalent of covers if the Document is in electronic form. Otherwise they must appear on printed covers that bracket the whole aggregate.
  9. TRANSLATION Translation is considered a kind of modification, so you may distribute translations of the Document under the terms of section 4. Replacing Invariant Sections with translations requires special permission from their copyright holders, but you may include translations of some or all Invariant Sections in addition to the original versions of these Invariant Sections. You may include a translation of this License, and all the license notices in the Document, and any Warranty Disclaimers, provided that you also include the original English version of this License and the original versions of those notices and disclaimers. In case of a disagreement between the translation and the original version of this License or a notice or disclaimer, the original version will prevail. If a section in the Document is Entitled ``Acknowledgements'', ``Dedications'', or ``History'', the requirement (section 4) to Preserve its Title (section 1) will typically require changing the actual title.
  10. TERMINATION You may not copy, modify, sublicense, or distribute the Document except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, or distribute it is void, and will automatically terminate your rights under this License. However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, receipt of a copy of some or all of the same material does not give you any rights to use it.
  11. FUTURE REVISIONS OF THIS LICENSE The Free Software Foundation may publish new, revised versions of the GNU Free Documentation License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. See http://www.gnu.org/copyleft/. Each version of the License is given a distinguishing version number. If the Document specifies that a particular numbered version of this License ``or any later version'' applies to it, you have the option of following the terms and conditions either of that specified version or of any later version that has been published (not as a draft) by the Free Software Foundation. If the Document does not specify a version number of this License, you may choose any version ever published (not as a draft) by the Free Software Foundation. If the Document specifies that a proxy can decide which future versions of this License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Document.
  12. RELICENSING ``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any World Wide Web server that publishes copyrightable works and also provides prominent facilities for anybody to edit those works. A public wiki that anybody can edit is an example of such a server. A ``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the site means any set of copyrightable works thus published on the MMC site. ``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0 license published by Creative Commons Corporation, a not-for-profit corporation with a principal place of business in San Francisco, California, as well as future copyleft versions of that license published by that same organization. ``Incorporate'' means to publish or republish a Document, in whole or in part, as part of another Document. An MMC is ``eligible for relicensing'' if it is licensed under this License, and if all works that were first published under this License somewhere other than this MMC, and subsequently incorporated in whole or in part into the MMC, (1) had no cover texts or invariant sections, and (2) were thus incorporated prior to November 1, 2008. The operator of an MMC Site may republish an MMC contained in the site under CC-BY-SA on the same site at any time before August 1, 2009, provided the MMC is eligible for relicensing.
\section ADDENDUM ADDENDUM: How to use this License for your documents To use this License in a document you have written, include a copy of the License in the document and put the following copyright and license notices just after the title page:
Copyright (C) year your name. Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included in the section entitled ``GNU Free Documentation License''.
If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, replace the ``with...Texts.'' line with this:
with the Invariant Sections being list their titles, with the Front-Cover Texts being list, and with the Back-Cover Texts being list.
If you have Invariant Sections without Cover Texts, or some other combination of the three, merge those two alternatives to suit the situation. If your document contains nontrivial examples of program code, we recommend releasing these examples in parallel under your choice of free software license, such as the GNU General Public License, to permit their use in free software. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/files.doxy000066400000000000000000000042401507764646700220520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page Files Files \file starpu_config.h \file starpu.h \file starpu_bitmap.h \file starpu_bound.h \file starpu_cublas.h \file starpu_cublas_v2.h \file starpu_cuda.h \file starpu_cublasLt.h \file starpu_cusolver.h \file starpu_cusparse.h \file starpu_data_filters.h \file starpu_data.h \file starpu_data_interfaces.h \file starpu_deprecated_api.h \file starpu_disk.h \file starpu_driver.h \file starpu_expert.h \file starpu_fxt.h \file starpu_hash.h \file starpu_helper.h \file starpu_hip.h \file starpu_max_fpga.h \file starpu_mod.f90 \file starpu_opencl.h \file starpu_openmp.h \file starpu_parallel_worker.h \file starpu_perfmodel.h \file starpu_perf_monitoring.h \file starpu_perf_steering.h \file starpu_profiling.h \file starpu_profiling_tool.h \file starpu_rand.h \file starpu_sched_component.h \file starpu_sched_ctx.h \file starpu_sched_ctx_hypervisor.h \file starpu_scheduler.h \file starpu_simgrid_wrap.h \file starpu_sink.h \file starpu_stdlib.h \file starpu_task_bundle.h \file starpu_task_dep.h \file starpu_task.h \file starpu_task_list.h \file starpu_task_util.h \file starpu_thread.h \file starpu_thread_util.h \file starpu_tree.h \file starpu_util.h \file starpu_worker.h \file fstarpu_mod.f90 \file starpu_heteroprio.h \file starpu_scheduler_toolbox.h \file starpu_mpi_ft.h \file starpu_mpi.h \file starpu_mpi_lb.h \file sc_hypervisor_config.h \file sc_hypervisor.h \file sc_hypervisor_lp.h \file sc_hypervisor_monitoring.h \file sc_hypervisor_policy.h \file starpufft.h \file starpurm.h */ starpu-1.4.9+dfsg/doc/doxygen/chapters/foreword.doxy000066400000000000000000000014051507764646700225770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page Foreword Foreword \htmlinclude version.html \htmlinclude foreword.html */ starpu-1.4.9+dfsg/doc/doxygen/chapters/images/000077500000000000000000000000001507764646700213105ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/images/arbiter.png000066400000000000000000000014201507764646700234430ustar00rootroot00000000000000PNG  IHDR~}hPLTEٟIDATxOr0p,:ΕdG(%+ܠxuxAs;|F&' =I`$ ^,OS5ENNNNN>vgJ\ƳW~ȅ+O[枹p߿x XGNN~g|_ ޓ2[D"sVMwD s[Wu,rҬ}x5ڱmfm*3Fzc\3Wf243o3ߢN;O-WTcyf[;IM=HLsǗnyз^q]yCpGryGo==yW8}hs$[=Wxr25/V;'Eiܠ~2q1)uc]5֙e>sc B/^bi%N̠)&BwOx 4)Q`Őקf5Wy~>Xo<8h\|b.88B{qp c8gg gmugܯo|'kwPJ]yGrrrrrrrL}<4ޤIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/data_trace.png000066400000000000000000000171671507764646700241210ustar00rootroot00000000000000PNG  IHDR, PLTE@Ai @0`@ԥ**@333MMMfff22U݂d"".Wp͇PErz挽k ܠ ݠݐP@Uk/@@``@@`pͷ|@ KIDATx흋* GgHhnغ&9C.VDM;h}RqsMtB&eTg*VƗ( eq=x6Y8Ꝥh%/JDb#~Yl;%iYN&R3Ϣ "\ܕbE#öH7'>\&Ϣ./[E5o|.'Jߣ5.EmHg=)xM_/|tIN7 5wzOEgI*oD=TSyr\< %S4pi~7ǽ w窶GIKf $ԟW$-< %o|Ѳ%}9Qe;ƑLxz}e#~YܙS4WTke*=)qZ$%|峠*Շy!'6D>g%я?6,Is? ]߬ʮ/\T[E!ۜ^ -n:#ֲܩrfN>^L8vsŒx$Qbק-:SKN@4ӿ+KN!?.f/ +guzm kW/W)S5TGMŲoǷEZ6Vc'~zhiI$tsKv祿ӫ_wskvj7 vw>s$b.Dzo'&M]u9½dNI Puh1nfOa"@sKS"w˱TpxIG )5A`N:cqP)}w=G]s^ [#̜%~E_C1EэJ?SLi-K:;gz\12;n~ihֻ&!@Oy|x 2ߐs")y9x?]ޟ+7rΞEt< bidV;㟼|Jz6M>zQ?sgTeKьym6 eÀр7~/ < L{avSrp( 0@o I@#Rl]P-@Krꫧ܁U|[gq6!@` 82]4y<+6?wGހDNdpdƶfpD;_;'Ը[;⒨ :Hɰ xOsGTՏ@b{JTmb̷ lܞ̌pyzo8PǙLn=+V2J3ma0j.@W&'dKr|V5Yه7;9V U(]v)~~KŧIK:^^O9*qZ+4eK Co2|~?A=(lウ{N(?.ą]?S]C v_b}9أ~6aX^D[KMU} -|x̗gzF?а4f{\4|(/o>>* &_RmjC[N jQM!)m.8InbD )Wfˢ-Nuz]'{ ^l9i3p~dRlZGU_Esob~_rkkTWHӿ0/uBמac*i+R2崤t:m{PBrz/EWdBkG`fC+f+Q7\i߿.P@YSQ/ՙ6'FOq.(1xd3IEF4q\գ/ f"v[7&\q|>\M~1=u^1˹rbn],Dz@ݫ;ػ%t2J]>W1Wv+R__n 2Iw?_AE∫;yv^'wSMׯ|ccQݚ}Y&np`]@uZ~jHx95Muz`\fam!%`]Y]SlS|&E:[m d,ii!wpѼƢqaVӭ*fMEs:*+_tKC3}N#q,LHQs\\!d?7p_6ktð!F>[E* u3Z,$n Ui~(MEnQm]5inUψ^Y&#s閭 Ɖ%ōcU(sهʲWViW6Xw~6UF}L,6"Lo1\I:oVմ/I?=C+Nfm5F7 Qx/O:PF9>%8͵ [b'σ*r ʹc>wNOs)=ҳWi\:Z2DydH ,ޅ)Ij)<y7Az9 _-Wlqff!g-Їyu{,Xf'28 pB Ϝ+;6}x[l#Zq\N5c{V}q4}[7J>O#I :1t28]f43re>2oN$<{)>/RxKX/($ ]gfbDfϧGS||ťcHQq{\U[ Q@OJTDQe7_B'Ba5JCvScr1X2b2JsjWPF݋"Y({lD+&s윣)|*=ڕ?chW(zM۴P ITw/MiCSV;qT8FB1S%"sS}b .ˑ>aZ}0#pV{:!2Q!.qB'ݰr|٧Ԃ}xMV B~@<eCE.ۻQ8nKNO/EB%wl `(lxTk:23,` 1 6#6Rh&RFմ $RS-\("vQ.JqԵ8P9-/]Q%1}['90̐ c O]`D??_/`WL`$sI1ݏyIQ}!5&8wGv*S 0xlIﶤk˦-b/$I._RVhx,8PqZ jw Ž.ArՆ,-p6oS"睺>CB7# qqhgK|_ȭ e 9 Uy ?pi7 QsF8bqJߙ%7Rr2D'696]-`0}y7{_o 2>tNba}.}0 빬2v"7_sZry#f``Iĝn%ӖNMv'5;Qarab;fb`Ov;|=tw5[jEyPi a fEǛ Kxa69(St-BzVVRH#/R/;J kc9E7}" |,M#֡̃cn?Y!6dM0^xa=l :ҝËojġ{3"{樴C|G<S{e _+4XcqK4"?ub}%fQqgz{< إ}|0vWdp,HcV&&(t+U(c*BX$- ]q􍳜s%ZJrk,^ ,StI1[f-+$S KCNPG Sg#)|,!dDT-f!oXN\jq$ǷM 8fTudTc1k_>pI^UpU,txD-v\;Ǩ wV&(ǗE6f_eE&~%uB]" s"}b Xp89G@͕Ǭ?1UĆpL~~odv]眥*0$焂%|ٶ[/c3z:"p {IND>bs+eKlc5(L:;=Fi7F=ʑ䏥eS$#ZIL 8]2eꦎR=k?˂?粮hO1},ŔmV:ӆ哝d%}~1*10:2tLreNBus E&qdQ|J=NmI,4Ƨ/Ho m7(;s=&q Bx7q.is%IpN`c3=l58 IzCd>A*P0E/8[ؑ 3j2Ε9^xJ_e=Xc\Ԇ#h*^l m@mb#e[M #awC߲RoaǍ郶 |9?ə0"Džso߳JA5y>͵/^)61>Y>K SU))Bx(ZSAFF|,,3С̆Yu.A_ sC&?>9͚4~tp1& y/fÀazc#!3Nm:Ag,Zgy( Gow0Gz+lhjnG+ z@S>!@e0v\FH'Yշ4/.I6UZ4Hz+2O0r/ rz,(B.wT}0ڈ-V\D0 AѾ߽͌zbDXg x 0@S @`@D $@F Ku@dC̆a|Ac @` L) 0@2r~c QRF x) 0e,^L)2[) 0@!ߘt L) 0@S @` L) 0@SH#dBv~kX `Ma-`HH# gh'Twd@Z4p( 0E'$raq@L 2[OQNB@S @` L) 0@dC ) 0@S @` L) 0@S @` L) 0@S @` Ľ) 0@S @` L) 0@Su  !'s :kЯղ.vr,E[^_F@Muu%.AXB<Dz  @`{wct/Aw_$?,JuuXժ7W|"XXqU=ͫ[tn \ȦN*y;Scw|j}]p oyu:PCH# v{kj)|^LZˍԿ@i_]扈KRQQ>""znRznlX X&n2xh Xs98t`J\J9 s2탹^२$؜&u y S @H2R?)pxII~JJRJel[ Pw Y#LR Sx?D`~<IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/distrib_data.png000066400000000000000000000141471507764646700244560ustar00rootroot00000000000000PNG  IHDR,2PLTE@Ai @0`@ԥ**@333MMMfff22U݂d"".Wp͇PErz挽k ܠ ݠݐP@Uk/@@``@@`pͷ|@ ___???E$8pA!C(@??yZ1Ye:^u #@Gi?|KX#4n#FFT{xh#[ \䡎Ϭe {ݩG(۷o'D*Ôt`iZ.Uj.2ܖ9<\8:ή Suɒ-ߺ!FGД?̀Mn P .r1X^"f~-fމfxq3gL犥oᡌL1š~.Nf_8B pxq}gZ]>7172lӭm;(eIEŶ_XU6ՀXw;\],ύ5k띏[l U'仙O%  HuͲ<(`:^|3MtcT5z#m۫] 9W軏X]MQ8^z}Q庀캦 ^|;ȣ%ȵ~wCvtYn~BݵH#n&Nj~WUӹqP,e{;cβGS{rz^v/bPOuJQA!A w?IE$?ͳ Џi8Ȕ|n$'@S3?k=K%.Kt,S3KkWg|v$q!)nBꟼ/{,%#l R%wޑ [&,@/l6`dJNJ\,@67)2 SH&%?^H&%?x~0%'$ ;3(Gx%(~Ϡ> 8}ϋI`::%õaaBC.@S.@[p9u SmRvg[7RUh$o_:/ L/%f|!`xr6wk2;/g_) ˷/Mm 6wR+id/t}蘒e|iI%(~ =Pc83(Gx%(~+M5x?j`J>(A<x?gP JQ A <3(Gx%(~ϠA<x?gP JQ A <D+.Ps{Z4JQ zG@XEO* b,$Tb`rk|)rQ$(0|BrpAg HP$( J%`A<x?gP JqZ#8aGx%(~ϠA<x?gP" ;3(Gx%(~ϠA<x?ͷ%8yYϗqŽGܾ< HB$ _m/uWPGf[g@x'(~XBEUąeˢuVp"A 0Q/ ڷ$:Գ Ad ٞB5cP8k@ǠX_@ <1 5}m@r;(Gx%(~X~|jR,4vRrQܩYqeQ,$H, ЧVd:R%A=G$^/V(@2@7A[$Jf)@2%د?Kswqj8 7gű "YqPB;pP gaNzBq 5REep?HEYg50AՀ0ݡ'm@ Rd`[PxZJJQxZJJQ l$((0Lձ,0A w_N.t580|tD>]󯮸B}j D^0݆a/PuΫ~l# }xaI@}  $C&3x:b‘'n}oPw~J?M2.`C&P= 0qUi([2@DmRۤ{ *loP=(G`JQ6_n%(~x㕫JQFUah7x?RQOHy6x?gP F_6CKPi"֏nƧ?>lFՀ  kM5wjc4^|e}3`Yg# c&-Een_v#NUQ{/sn7uID-px1 |m^ 0tQ(8hu8H\VCuoeIFjiBbeTӎ(@=( PQ:pŽǍH%(~{%@ <7^ (Gx%8QA<x?ioWA <x?n ŏ#:(K;dxqނo%(~q P(@ <Go%8!{ Kpqԟ73-o#:Rf( '}P UCNt`s%ɻT檎1omwl <P=<+@R:ePۀ 9s#8>}d+1 )'ζo>!@Nx`0!@Rf^q0〼C6NڬQ D~ak'"\/]KLp[I>i??~Y yoY?R~2NbknDmw3ΐyl̙޿YD^AaF ] !@0/>(@` @S m|34\%#p ki4GC:!GqxƜg -zvv z=GPlnH^zSl@}w)Iو`Ö1/wjCbhqpindV.(H̱YխNy#y2exKʿм]|S[;3ȤrLozuF-ٗyU2}Mhi/mϜ \d_-T3@~~u]Ud7CLgVuԍPW57FFLv3nW5 V6b}fw:k07Lnן;gYÑ|UBQ?vD4V3d=T*ۋD7 OQ6ܲV3wghvȯ9#F>5&NE=׋Jό{_rh~q#fm@)Ū (̸7/ۭ2B Wkj 3:fϠ*3l%S[̺$8xfy]G9~~ު*B!B!B9?AyYKIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/distrib_data_histo.png000066400000000000000000000230171507764646700256600ustar00rootroot00000000000000PNG  IHDRMgAMA asRGB cHRMz&u0`:pQ<qPLTE81ytRNSD3"fݻwU0 DPf3짟ڙݟuiu~\"憂wUN@qr[}?0Y=g?ix5bKGDH pHYsHHFk>"IDATx흋yq'\iD9.gQHnEr;kӬۺ6˺֥[mwI#}~B Oy'I_k6享Yce`ͼ_]9Hʔ껮(+8at0}Pf7{,kv89¼8a3UP"%CP.m E$NQv>[}w:,>pu _0QJ{8Lg.v4S2S?/%CS ?GIGA}' MA$rxs*{ӄ9Ƒ/sMHd!ueQ ONo dAΝCzҼ>kHA;~H"F"K=yei>t"2$<:`,]҄0 3)1ftՐO޳q})|rx\;K.ٲW4\+. 4";;Qq%̳A.vCRHj"RGڌJH]:) U&Rxq$N7~q{).j u%Rꆑ-T#٘qr:/GoRݥvT O[)#sz+¯.o-&4_6S!$v:.%X}ק6(ivkL0jPW"enB5R,bPw]v-A]YQCxKPVQcCxKP^ s!%h=CxKfM#aJ@5ہ&kSiFtفk΅?{[kz_DŽ?ܛRo:I<y@5 Цԇ;ہE~,6vrہ6kЦY!h&w!h& h&%.v͚Wm$\wo ڬIYs!%h!.!܉M!h=!-Eι@Sn)*w;CljS9IlB[XX$vr7$ T@On.IsX؁\Z,&&!34TҚ-Ar7$Lwo Kk;}sX؄6SaXEg'N&ث$vYΧSÚ.: a*l݅o07w Ego7w ۅgxDg4Z+wiMrd޻#~8㥵=ZZ\Ytn#gg>ÔWu>R$ǥ5 0%q)Ll~i*8*[9j$]/r3T|I8r\;w=ѳGKQ)Q,F~uÔ?sWZ SI5y}&ILx5Ii*\Zrg>I*IPM${#Tl@5 |/I›kz_'6)*wVÚKS"Y5h&y4%5[]4 C;Xzڴ|+w5IQ5V45 (V°&rT$vTrhS ہJ宖7IP1]=oXdk4+wnM2Úl4Xg~iM2Úp4 $Dw5mbMBuT^i*<52.I>Ú4 C~$6TtI@p9l.I`*l6:LˡM}4~ǤFp9)sXMgӊ5 㸩$њTxkgMugsXe~.IpԄHk/&M$ˇ&=$]k+bMB)$vÚsS vq̔h>%h0ˡMYSa;PЦ<WЦ\w [jKkTGrWKkT,@rWKkTl@rWЦ?C T*wvP SaXN]# Út: LaMbЦO5 9W5 h1uMkOWt 0f kçvL4 f}F2Gl: ;]/]m(WJkz.6}d׻TO}b5a=CRo$ ?YNytsB&b&)Mfٜ~OkWJķf/0WGbЦL6G.*|-/ITm dSo*w9OeN m dx>=P,8hMS_|ͿdK_4/y;o& 8=:muSrho>l_~q(N#1{Tt84mj*\m:Ro$[;mVSrh|x3\xfKSrhSZfm媩g)|zST*wUSaiMBzqŦxΧ]5CJS}`VoiM%ESWdn\,z׍aM2JCt #=fakM+K }U#[rhӛ_ߦ4v `z"Σ f1nv{\=y_1ҔŸ^+΄/-׏>= _KSz.L%o6ۋL(~Aa0wnJ4//aDc_Vr ??S[{{Klx>=107bD:9nC|NtlKSڔ./-np \ROOY9nX~"9|&/LʡM1殞q;M.gj9n okwy%I\"lܡ#Z[X瓟ř_wDxϛ.WxNxq tW,h/5 Dwow sgߚ˧yY;i󨑿3ˡMpݖtqX>ܨRSЦ_B4X-/W{TU"-}ˡMeٴrƖT%;_;ܴ{_싙DoEߋKӾXA4N4=+L'rϿk˷! $ccgwfx@x3Ct !^:/of73KGy-{.ጣ:*{:a е趦G{RN_6$J 9Ū=ltZ"yZw:ʢw:wS 9'U^19n8A'քH~I7@NtЂN=^6DQY兎I:ש5!2}!s;4}+? =%%n兎~ N5!:p/DI֝85sEYCG@{tL.ΝvS:xO .〝EɠetMNS܁1u{+i粡B9u\O9T#]&DFnv{Q#9s-w|wSggӚ8;&_;uk|N6]op=Dv0K#?IK]UF: *̓G np]$e(^HOLQi_"|"%0\FhK }8w bğ96}v#HJ5 YDWhD›=oʏ3G]m#?rABxH.*WJzN$9]LFqeQGaB:Iҕh#!qmA0]#BFP" oVT@erGrŅ4a!|[r mP|>#JjG_1fKjWN)l(E$Bz$[&HDWea9\q8`OE8Er oӖ Cz3lT>Jr3g^Fh\2F=#E"_7kމҘ4?ٲ.|4Kdn!<]T,ͼ?2TҨ*%CG³ n=8$Lu7"]ן!s5bQ!<]T,ፊ~*F9{86O/ծsG-6r=\G-Ee$9誈L$mF'' dGx d %镗e0ȏ,6"Sys7(hfd0W1<ROF$T$d"s=qj$@pJOJWBAā[l5HD(6E(Hfre[\%N;>ļZqy: *sy\N`n(-_r$~W\>&EqpbX%tEXtdate:create2013-11-08T15:41:34+01:00ؒ%tEXtdate:modify2013-11-08T15:41:34+01:00`. tEXtpdf:HiResBoundingBox504x504+0+0wtEXtpdf:VersionPDF-1.4 G:xIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/eclipse_hello_build.png000066400000000000000000006730121507764646700260150ustar00rootroot00000000000000PNG  IHDR$#sBIT|d IDATxw|Uם[CzBФCBt`w-k[uu׵#]`w(ҤkH嶙!t3s|Ιr33ɲ, ˲Y2fy+_SÝf?[5FkJ+ϳ\O^^U5,0Mk3?ڪ1gDv؉a;vV=K-I\0/0*+5fZU%ClFl6l60%@`bRҷj`%EDDDDDDDDDDDDvno0ME6dHW$w=؛,:* Ӵ*8PhtӴHڰ,JjҷZDDDDDDDDDDDDnr;vV9 5m$+:ڨ>sy݃*0q03\Ҋ0 va/I/rNl\veUz5"""""""""""""/va=zUU< zQ#-Ŷ[̥zo]HQa)>01וONンq8ՒSXDDDDDDDDDDDDft:^0Vw*\05U^ YeebmS)96s(>?Sol6q'SDI_%EDDDDDDDDDDDD~V **hWe > n(9\y=_L}ٓ}AaaXNj؛ěΥkZՓJ>8'|kί-[}z9%ؿ' +FGx|Ku%l˧SkY%EDDDDDDDDDDDD ,&11h7j&t>/ŶmLXkzUo+ܭi[yw\ BC>Y^/!skpN8EEEKdddyGddDuY8cǷ9V4(f5M9,2dX+*""""""|>&N s=ᴺO?ʴPN;~z;wU%666n7 #99#GHHH,lp4.ɦM+El6n:?蔠_TTºu8pk֬&5'IIdeeu6 ի4h 5z; wP6/$v,ⴳb\ %Ev=`%\."#c(--hZՔ)QTTTeZrr3]窷S~~6m"44]ڭ 11zEGGs7qyk|`/"""""[d˶m;K`$''bMgF= y 8?0sہnݺ /L"//p'#H&ޔشZU{5쳈V6mΝׯ/))ɁiDFFbJ1"g֮]ngAt4vFHccС'K,Ԟtڅ|.\D~~>111ח=WYgzp08昣]͛P\\LLL iik֬YŠAj]݈Օwn" gcQ9ed{+/[Gy SNNII) ?ցlno#zƏ?*e>4M{cGOλ@yyWvᆿcٳ7nʇzW窫sO2j^x?˘%K~"$3ϼ/4M '춍 $āe #ʕrXbI}j|.Z4-VAѿ;HLϬYy79c< _DDDDDpzJfeeyV.@ ))R""1e:6i~]uU 6JϞݙ>M.$%%sWbو&+k/o=֞[ 1><`8 B\,"""""1cFt #U8NVXE޽gر%koS\\yԩOs)СCP,^s=p{'6%N>y=<_~L8󟔔bE())?]駟ĉ׳xb>3.B^|9:8,>㦛n7f#X /p^}UŋIdɒ_o߾|pI'q}r'R}q8u7x}'z:ˮןсaj:qU_ |>~/G`֬l޼+`ZDDDDD'nr6¥^ARR2YY{I\p3fI\\i\oVnh~yW!ݻ1cƛ\~Ǘ޵o_3fIjj*qquӁ#||\2OQN6\.wwY9uB׮]Xv-K.K.E: KFJ=8uJQQ.Æ aElذ1c svv'q=O? dڴ1M ?[oEީ} :ap%rXx1k׮0|gtz&ܹ0V`ܸq5<bEy}?~ }w6}]l߾nߏaz݁&44(ON{9=0HHH0*Y.]zbVw&?a#Qyzug nʦM!11͛7_sg߯__ ypHkv:u*ё￧x kbŊ v<#UL:bJ IJJoߺo44ִo j䘘hF,54j8!ϔQZ#))Î]JHH8'??+cRZ&99N~^, e\.^7EEܞ11)l58ssOg+کrݻ۝cųw[׊zy˲,Lӏ硬0p7x09 ؿ럺߿_@߾mV ?8Gݸd?g)uοN6laXNn]+yر!Cb2Cxxxum6J{W=uCq:q.Lˇ_J;2w>{&ҥkb+#"""""rYkbebYZ3FaKθ\4Lahs+OLL"""W`;:BW<˵'&S\ڇE 7x}a #pmZ^LX6]&QWDDDDDLV܋ʇ)ۉTEmI` k-)qmQin:vLŜpq(=m嗲w)sI|&]ag\wYѿkj!""""""-"$$@ݩ-DDDDDD {dlrzޡ.LWLۄ}xrQΧ?hf>LaذvzgfQHH^uCiBN'aF80$ #s1}i\7"3fgDEELX&Jp=I^^֭'99C95i^Ew5%EDDDDDDDDDDDDDF{ """"""""""""""-1H P`Ä"""""""""""""" %EDDDDDDDDDDDDDJ&9L{Z(c*6+۶ox=9m\.SRx窼omŵmvp:5߹kSI'ߦnڲ&**V-T&99\N"QW{_ފѶU׍t`bB[lhcJ#OLC$%&+39BxXX[b7Զ /pfY~OLLBCC[4[-[ٱs'Iiva ta#x$&$r9U.>TdZEYY111U/\EDDDp-^z<;f-^27>?܊n`Y)**u¢",\x_=2oEc|\w \{vGs?nm2%^z> ^| /JПo;f۴) .WU{YʩO7W{ߡv,]}+3/%&\x1oLYl=΋-UfCmÏtٲ*1My7livch95[Te[oÿeZŝ߿* _|m=0˖z=mHu5SN/IP\~lj̻햛zK˯;zT`za- |\Ng{~fe ObB6<}L8L2622=1FVzٷ?q\Z˭mӆڼ6m=ҫq'ջ`Ztlb:tu}W_>Ԙci3~}k_}䇦pirު-޺YcfUfc+׶-sbCZcKONξ}l߱n]lEcGbYFC LpDBBBbﻸBsZcρ>p:KDDDDDDDDDњ 5 ;?;{{l߾ޭks ں wc6, N;*+ԧزu>O??Q#ҹjVEEIdddQoٺ'x< 1e7M/?K#njo`&モN:#f:bbb(*.湩+5ͷɠ&9)'?0~67srWr/"6&}pܱ[ÓʪI۴6Mw n?/8eY<^|k׭'%,,1~/vler'34X^]DkC3 c`< =enÇ5iZޓI|\\ԻWjC6kkvmmCsb0c:i/%9)),[n]?7ܼ\n,R- =`ybl軸sso_O>㋏'<Ā#1'1 |I>+Ӟo^ԕW_w8\y٥XSS_|5\U{-Uֿի%Nl-θO`A90MLzۙsL뎃1bia3O yӈ7xIII>tajmRAϞ=, Ndd$7NK`=gv7/vx˯8S,{ֶMmmj-kG9jV_]O9$y*> =(<״>m&wO gɕ_[Ye܇:f):UQc Ё^}{7^zͼtܩx}~+Fss}mcuZq[mZm`?-\j_a,_d |>~?kׯګS.n蜜6lw7!2"/7W_7seڽ&6ܦakv8ȱtҙ0v(ŗ@p_ϙ˙Z뺶lʮ{ ڴ>T1L)c&9 ;-[`Y-scg5.o[7oxw٨FCmc%ԷZq6ԶmsbZsK>e+2]:pt̺ jZ=cC ccc٣'?/[NN>8o,Yn#RPX`}kTWnӖ6kms9Logv*S~뮹y"2"Ck?7*NoTyյ>T1SRZZoSL af>3N#"<.;Qwxx݇+4{ GX>}㶾u8=kϳ3Z:'5a],3N mPv3h66ƆٟGgex}>F12={;wo.C jo^7xm6iEDDDDDDDDD*k_IJL4M|*l+ v?\geg-Mb6O>;v_>RXThY8:9$$XG^^F&%&RZVF^~~j8."#d]wpW03T]|0ǙYYHW_Mlsڦ-m! <ߚ^ezCu8z<8K/ϿSV{IIPT\msFW~?z߱sguLڰZ9` m3Ӵu x@#j̾ mJJ2{dV'}ǎ)MQMuӆ ߚܩ# !COpݤ7]<"-'~Ϩ4zd~vl09ݒz,}9UniI~.4?.-۶Xlyyy̜5¢ϾK>}ֵ={oߚq4g~?;̮ͪ]:3p@}@OvimRY9˯6|00M?yylٺo ̯Psmau>,0BySƝě3aK9S,[.?O?ڽ+ =LyۯN5j7f=m\[Mٮ8c/عkP>˯ɀG:T>'B!m0f|!a :װz:҆7 }? ެ,7il6҇gw[t`4'RPP~§eZ :ߧVl٘|]\ǀ#c4 M]ni<(#ӹ;_|a܉'i:Kȃ>we쨑Lff3 3Fll ^n[ogLt̙Wj*w:*n׿a,Neuuiq]wrrڄ󈏋#//g>NT6&KyF߹S'?K3RR8c).)inӆڼ-ݛc:JV8S51|P:uXo9N9$<4f|7jk܇9f+?6RfС0lڇ:7j`qme6v ٵ{*bcb/(o^{ZoC*ΉK.k95'JBCt4پs'`pя =2=oeD't.ZBB\ۿ{nw7HKY]Hqq 9_ٖkn o۲+nfeBB||Ue{ uVǞL#;ҒkَTy˲ TKDDDDDDDDDDD-زB.!!p oda-DDEѵG?(߇MBr R_ͽ#"Q(.+vj*lقkոIKKcǎi[2eو!11^rezٷo%%%OuaaarI;&rh߲Ԟ];cEVoQl6vF  ~ *_[laɒ%rdɨQի%JKKϰl:[ž}x9߿/[ qgMttth" kx+++#..1c[C|DM[Pvrl"""""""~_N:5fr/'+pQQEE-4nZ|9 ,#$99U,={sQG1jԨ:q:x 0m >${tBTTT˭]%KpЪ1U5gf͚W\Q'ilذ_ukz,^k2r:;wMФe=mw(&"""""""9J m ;eA2ҵ%xJpŴjrh v㧟~⢋.jd&@N֭o{&..2>M6q'֙mi.޽{tRok,cY/fܸqm^qK/i&[e&77aÆit2`,YBYYv$}[JIs)^cŮ l~A)}?#мc'=1[XYc.#Ѷt۲||!1،6knl+n3|0bb꿓=[˲hhDDDDDDDZFk-+Z- w"๩s[Ugl;zͱg~tcΞJ>Z~~PI\N''q&"""j+;s qT;GY-˘49"[}^Yȓn_Rhcsy,&/}+EŁ<|ߣU}@- 9Vv9,&N=_N;Qg>ޮI_~aÆaٴiGQׯgԨQx^֬^Mzz~(]~= b޽t֭]6۷/ʰC\*))bԱ9gXd.p6z_eeebSVVV뼊d}ۂ8pHMS{XeFqUCdץǻڬb_[-eL|gwusc())!<҆JSX˃=G='';$6yf͚L+**bҥ^s&O>mKkoX9u|>}Ex|l6V(-)ݡf=?5u/I{#cwHL1>ՎIi~~=,nҎ,wʕ&44+ۭ'ϴ ̤P7{ yCG\?4u:ԴWR=#`5[{Xu&am64-,hb]VJIiQ8<8Rx}GUm[II )}z2zJmjmΏ-ufrYcZ_kq)a.8#R׼DHG0U<^/:b}|+ C?WGlm}_>+nqVvۧSb_R#o"/fQ8 &/.7{Y;Gۼs7bYT>ܴ WX6mIbԍf_ǑI`o3gIdHwR=q.NBQDFF؅߻/x͓-q>Y$3u+&b(ǷgsnR;q 9Wzq ٓ ##1M^5g+G$QobfU]EcSTЉGSa#qDנ"ؑgZ#դ84S6MFn]Թl{Ypoذ}U+4hVj8,b`!,]lXb9eT$ŋ[*6UTTİaXlo> '+4\k_sFuqͰalX?JmtO7WWCwxTPPHӇ=~<֭[~ğMz7~W~c v0 ð/d q;&%aa$$we- l|?bG[ gr3$q%lZ煗 i߹ߔOpޓ's?7#즳pDDU̫X)$sVV'pJJJvLf~2neJ& pZ2v|XiL:Gnkn_Gj_JlhɄ;bxë,ߜŦ_mVX^d/%)‡asRkr&ņ/ذ{?lZvH${п3!x CHp~]?}p`0)M?t ܲI4Wݍ0 ]*3ɝ{Én2))ᛷ`m|М\h:oltI¿W'(tgO4: \LSOa)l%M ȥÏ$cV&7Ϝsz7^1l ^Tڍ'|nt"4FK|ԧrcdnLUy_WS=K2"=@n]Tyݞ=˂ uF=XA~GбcGV\&qXENʯv`РA̛7m۷3v&;77+W2lpbcc۵ò,,KMH]D w$*Ed|zg2oQ "^CRsxፏ4e %|t֓0r8atBoeCI~?T{&ifCAtPM|(=EESTTBQq1_NG`Yӎm\ʘ#+Wa[N/M{s⬑1IIv𖖒oGعslXJJj?㴩9Zyk!`Rߠ8gH]`qVkNz \1-<(raV>QN&Ϟ3u]R@񮥄lxоñtOLOŸGPhT}'KJP Vw{* g%,Zd-^1]p leѹsg{[ŗ_}]3v>{ӽ{ٶm;nX;vpz?6g<222Fll.:EEEѯ_@ RoBBBHIIʏ@ߒ^ e2tىpى1pډt`L.#pw[p""""" ~w|u̶d{Bo% ((`r"zgN;N== w**6,"M^^6[g~, , r Nyvwvv2hj^f w4.V^G#,&âfסB&["&vڅ` ?w#4$LRbسVmڐyB]D5dЫO_‚k>e=NLAMFto ~VEo^)00:5dⱏ~Ft5}}6p=~e6/-<騪Fwl_>9,t1'۵(bog>Ƀ 70s? !6z٠q](z(֣'hܝŌl4t{_S{kvGj.Mۃ EUqq#wll]Nj޶vU5{|${̡(:Ӈwc:~s9렣:;ݿ*3LF!DAn&3wK&1i,v1Ĩ^Ͱ 3`+[uD&؄v) ?9p(,a9ߺ1Yv.YCsQ"D4j΁[9q:c]h<.;+$vl x߷F֨9{9=İ]9ॿ? IH/>@w'2kc?o?iנǏ䋏6W˙o1.(j7/$k׬bϾ:elٶfFd,/_E6iڲ-}:gVAW]ω?-X>1Mplur.vb6z2;vANAh@sgE_cc߱]x~| kҺf6xʾuޘ2Ab a`dct^}ڴz/đl"$C;&t+gdq6&E9${iMڳzB,,߷wpn&ti\u^옛D$UZd%v' O(*.U11ew<d1q)-(Ʊ޿9qq"ڗQT*VLzJ(!D"^ʜ6T@'jE1Lid<M%BF=~pMI&@QQPN~Q?o}L#(,UUbf: &M!l8vKׯLsاmUVZx5_@̡DY,s_saX߽Xރ)vc6{Wg#0MoƦ(LJJJ{ѣG9u{Kj*yb$׬%%%6..bz} ؗpc:dv۶ѤIڶf2 >\. C+R6l@VH=k7ҵ[7"""j͸822.?Ng㍏Gu֮]G> "Ջ?nh* wJ\{xs8;TIkDm2G7fUPVҽ+(v`4耳ij#i3M=vo Km!_R0dfUPލ56A4 Jn@qߠgoҿgzNpm9+?E+#8Uã|e/#2j"C8\.Iӝx#c%N@ۃc~܀ֱw&HłlAjabOpG0U~C.d 6d#XCƑuƇٓY4jt*++,&KwK[f qqq4iR5ɛ\$2 eeej ۍdu<^[ޚL&n7Z+Y|ږś9z5Ժ_DP__L[ ̫e_ÁE Y!B_m۰fZ-_Qe rK.wV0jTHش吒/֓RKgmɿL^mcض"0\4 E5;Ghm2g;Ѫ.OxCBBhɆTWNVNr*eMHkV㊕wח?䧟7үG*< +m q\5-[&.#@Ή65ݧhq jn2՛4qNF}8bW%j0hӦ gv0rV+MTm9Cw`Tpzm9e8nbtޖ&1i߮-bDEDߘ& KTl|Qi]lھY/k%]1URn¢D~M<cwPJq\(8WKv_q.7ee6Jm=nJKmĄ_eD'?/GA89uEńXg5Xp؈IMl(p`2[p9x4>Iƭ3HɄfQVf]GUU0 |w!]Q};䳢<9 N)((-ܣOSmժKlPk]t"4MLiUi¢W>`Ӂݼp>s~O 6=Y'b¼ jy|ڍt{/F[xWٛuYYhP:]?/]gNֵ_=Y|]-[ҵCg&Mؼ5.iEn)MӈOnͿWdmfKv^BǞCEOs!%&c4xcjb R>w?~Ngc8Ӏyn D}3XCBau@V[@$*Zsvt>O~̍w0|PΛOO?U]2UYfewaQZiӅUqUo>&:߾{gF?FVM}3*L=miw8]ׯa=WI+ᚿo9>RJ زG:_10pQ5 ( y^jg> aPsKBC0bD~̭(M)=i'KuPYu_[ieeD)jݻvWTU_>,_^zY%Zc IDAT+ dLU*gWVN\e@GzzgЩS'bbbukG [hNͫ}쫯Aurssٺu+ݻWJ䚔'7l --JIIISex/7Oyj+ʓܸS߉r.M/%>InQ 1ff.T !BM-7wNge>rBCCi֬)ӻW_QPP~[AO\qgo"*Lr`Z Iqksz&u;bֈ1Wġȕs@yviժ%{Zm.'\%}!OF]PPpxJ1:%@QYvU߳g8*MJL$:*eWTؾ}{Ej*zQeR^|*CALy1Q>In~1>vUNQ6/F5L3oܲ?8a<6c.5T]zzT&c=Hg&s*߸O,0u,u#%,,=H4kJN^!Q D%С#(F q5Gq}IW7:[N2Mwa+u7L !{ʃ$ zyw1 {PlL]QUfN]t:Q0Lnt=VF^3 `]4Zw hn~Axxt#9)]pW*m۴AQZ]_qxfMp;~&}ZD_]?+Zwn.Όyl]{o ҩ#,ˍh4dxF%LVJL#4qf셬=~|2v8kgdI`^n\.Tov e܏-unY,.J !X Sy[Efu8 /`6ym}}y8fBOh{UιG ӏH`]vP *ch؜ٷMHq4>_yFn#'M#@V&j^ݍhvН<͆bqS@T]T`=߯< =J`&?^1T\c댻fdP߮r o[K1kXcU_E%*FOkHW&Nרs8:GU- !I*c5oEJms1tp %ts nNdѢe.AAǖ4Uy\Qh4a4*cb/s_'^b@& -,z:7727~;?-zE** KÀ8Pt2wwrV *snQ.lvUB!L2}ĉ';^J`ݚ#m vc"tO1B0̼"vnYp)&":0QFN\ g:&6:Í(9*Ё8r5ppu c6iҤ~|ӥ/ή./- fU|HʻǀwIc拏VÏi_mj5]/Xvx}MUUt]?gP F{CGlAB勵;Ag|M|~EԎ>9*.)Uܷ~6o<ć8WV7Ũ{pA?OpxQMRw1D꒿s'%!&BIn0ݧVǟns[B*@,7ri6k֬%NҝxTsݘkm{Fs+ jDC0_TUB!zVnw#'7O?ɓ&|;8n^7:Km|tt>ϣi r5lkЬY{v#44Pl2rtlxz\8iR}UqcЋA EŃL4`ZuZ:tƍ$7mTe'NTn0ѽ{[ƍı}6@kmsk*|V?fή]|_deeѪUj3  n7EEE4n|n1xCz l۷|&TUE4NYu(9q\nL&gxW/)6`lA^[9m ,-yp*ǭy &[TeVxkCQUsh$Ъ ‡_~kL*ޮy0Pb׳x]tzVu_ǁy0w&~wuI .["]q,Ztz 3B4H9k:BB!ɓ?swq6}NizXUU"#/oh'"1~VZ:qbn؊YY$%&6tժl n3{lOzlZz5V⮻"((2[iKV+l6_ĉ|zVѣG>|8mڴSzM/HHHcW`4j*h۶m&Yu]:t~kW\;MK%ץ]ʱ !R>B!B֯s6aU>2tA ˽_PU+((믿>۷Qq10a+V'|hV\~'uFl2NNNf,^ŋ_.Ek׮ 4M&-[dk7хIlB\d,y=.RM!B!s9YCp Ƌ~!C<JJJ())(ոqcmFVVEMh*Bjj*UƁNLL Ǐgɒ%lݺD߾}֭[󦤤pS\\GQBBBX,QF׏m۶Y/UcZӧ5T\;MK%ץ]ʱ !B!v.x2D 2a{9<쟈3f{ڶiaX||S$B!B!9={|1Vsմ@mmê>-괭iv2{Bs涖mypЕtk:\GoM9cye.Lfёtk+9& ۼçBF#*n_*'rJgxWvMvm=˅_MU?leI/ʕ+ <<:櫸뮻lj>W9(*I^W$ =SFM0d$-e|O$_xoѣGiݺ5lܸg}?P᧟~bĈuZG<XCB[|+V0v츊㗭^B!B!D_m'ҁUyN7m)sv)S/ѣ1 sLR;v? ,4Vq 4ﻏcvvVrj7a|soo~5Y@Fqv!,ӱ?=<`:(,cbۡ#5>nw6Qq;]@f]HF#n ܺ+%˝o8ΝKnxWy'uB?:vR¢bXDŽ _ܹs2e SL!11AVV Uf#''Fa4.ڬ733> Pqvm_.cǎѨQ#fs`X;vsQ5jbj~N-S]Yt)7p~)7pCEBX#0PRtb0dVZNtzIU7o`ׂQ9w>]FX:ɓU,^==p/QQC}ioM'NT1ԩSonn.f' 6sǯߟ &/!B!BԥN;4Ϋvs=?yf6d'66?Y=̛oVq9<y~1ڨ.v76(OW{0'op33FAᵍnbaw 5uU񆝩.Jз/~h׏'UܾE LAUyϓ'OfɼΞ;wLc,i Qe >j<h?r ?|1&yW\qqA{9{=8~8ƍc ģG2m4֬YC\\tԩ: iѢo&<כرcQUMӸ[y(((ȑ#L8ݻwLtt41m^rIzz:o&]bc̘13w $55磪*\}uvر'rIv;{x9r$0o<7o~T2 vVK2x`>S/Kbʸ * `R *F7kROoP1T]?$é),W;<.7_T ;?. Mx<8~\n>l_JWvsi*V[wܫDZl6>h)Sg^bhӺuܹkSL%;;NϞ=3튓effrv:b),,oбcnC_B!B!v"ؚU59]1o.Ɯ9楗^nSO=)S1/ƒҼ>>}C9nܸ׎!v[_wy~||Rbbb1x!t-\`gڴ;Xli4ɓ0h@^xyƍG̯:SNirS>l&''}_>xB!BS_^«VMQ5 i]۷onӷæ͛+pU u-~?ɪUyݞ^q~€Q]F:u> |8^Q!,{|pälfS5|gnJ{5g7%NZyj({A$pGe+99HLLKVُ~y IDAT$' t^Luk?ufbzΝϻ;vp~a{2e `Ŋ?~]7nO>$999Ds8@GԩS9r$,_nΊwwSOlٲsÖ-[Xtiv|IFܹs+ƘIHHСCzN9{*_UKk2.2Ԋntf hmЈd-]ˣ;sXv(^8ua2 ӣGOo\o `.svyw4X*}gN婹w6?:tYfq t]gDnnn2w>mƢcۣ>ʵc0G9&8p_~YżCV|G\x=~UE!B!rl6kư>EOOWtA6sX>=bQ]F H`2p+;R@j4`wk3Q"`R*$D(c1#{fnf$33WmnN[y5 PnAu@QoiVy%p1h?ݻ-|9+d\%y|VF_ىdNp2!C UV:7x#;wdĈDFFt:DpBBB.8믿N͙\2d?{e3пcO0O>ӱc2®ԕJ% 6$9IIIY޸qcȰl \T]Eo,*݆inժ@Zu-|DϯPw-(77^R-b:ҥOit`ǜMftN.} ֠0\GT3Z>**ld| >>tFY|}}+<ΰB!B!(f49t{f'!#wE|tbbbg>vWzFu+j6xy?<[cw~fr;ҦzTA5ך;C^`/wj4UCX<2`Z9p8ƍ6ߴAAqPr6v]c7iҘOΥS|N-*֦] C_B'9Gpjƍizy֯_ϕ+WW^v[Ν;En:4t$;;ZjpKaaaF6mdphd 67/X(dl&99*0]A呕e)_BBV۩`uE1Wyhl0`%m6|}}e۶mVC< @Z@m/ ,Q)y:~D&EO19~ 6lO3&JמDVYǑcYv]'MJL&8~0r! FLݢEsuknuٽ{S뷌u̜9 (}8{6?Ò%|w: =";;l 2kȐB!B!7B#v֏S3o_| ,غfS @֭-UVgͳJ+88ӧOWZ]yFu>7RPoُSew~y p~ѱ'4KcBfu;_g+ˈNe'wUp4Z-`+j8o,}u"GDD>zl}ZIwej ,[|ŊjVt Of]`i;t|5UK*\͛3~xڷoO&M0<#wP(Xd #G$88߭Zzzz|rF=ƍ b ƎKhh(5b̘1|G|.]رc t͛mg̞=jگB>xzz2dnʈ#?Pׯ;vDѐ¢E,-KS*wGݺ;xL4Ln~U)=&ޯ|;>l/ϡCܹS_㾠8gs &Na^|otyYinC`[IMM%''͛lIff&7tK|Ǭ[۷[M˄ViVZ1sL^hԨKHH@">>Yeώ;2dUg7NS13B!B!KzJrgB='txwxsGl`ſU&04il3NǥK4lfuϨnF IGh!/c2Wk=?Exo*w򂿿Ep9?Wԏ ?˖$5Lpzm]J-i[S^,Wi࿟}.?ǀ|oE)x;*,lw:u<8tW\Ill,͛7̞=ŋY. Zh4n]Σ2d+ۧO˝'߲nw[/WUw!Qy/7/Ǻ9s5뗬m߼jo%"[F0m4:teKy덩WFoR`4ޖmf}I%\Z>(( uw^dB!B!l3xm) `T*i޼e<==hT9+ϨnF^_U7sfKSlRoߒiU)Gݖd-$.K/ W6mZҡC/_{߭|oUNKk׎sIBBBشi5V7B yEj~@ꪨA…Xy7,ca4)q@;`.Z!B!BzLfoݢWg(~>"*VV? U^K;3JӾTW7Џ ¹t3ߛ!79U"]@wt#B!B.g.8~z!ZlLHQS*B!B!B!n*٧!(kB!B!B!B!*B!B!B!!`!B!B!B!EHX!B!B!B!nB!B!B!B[B!B!B!ȺI`:5]u5\!n "=q'E^GJVpW^>x.^#VpW4]d*~ر-G?/'^Bf/iϒ],-GޥH[8t=zRQTmAπ.ЩYjtY2󮺕ƍDL|p<ʹ 6_eoU%B!n5*HMK IMKSU%Bq/[CxypBM6oߞgϲi&yfOEAARH|Q}Q EުeڱK'Y׏(*^8?2wXVҝxmȋNFB:& `3,|<5ֻ1cJU~ݦ'C`kE(UǛaP(ZI ~إ4F99Fjp6iu*<d_&NdŌf3~ʤ^# ͖9x{xe3 [3`n?go%R.ݩ9W/.:(GYy{l{|s}qs}9:m8ϙQ/BwJݷ}{Ѓ5]$!*ګ(fTJ~k%;wl'zSߌ/73zh+T_|ҧOr !Zjٜwb2E&34xhhٔ#1LA6_Eg=]T[=+k*kAQ:R˫J||gިH/{[oV^/@6:0h< X-C!Ҳ0cf (L7n˚~"OTn&O uHΰL{s{ӡ/w.b=BAzL|pq$='kPסT(>^._Ma,xg,xfOo;S{w|n.S/]G?~Z[D?._MeOR/kNpmS2?/{ۯ/[s&}gw;ǿB!=k/+E7qyXr%~<BT*k&(J=}2(hauV cΝtw!̥T7mH 6!u9s1ft9._Iv/޶ ]z41-}ATuX$3|Lzs[6l҂'Nxfb5~bz4:4iQޠen2&ߏm祇'вA 2sX=/ksXK|if41soɄIi1=1B߿3{׾SWj݃[d6Y3x%gtNy-4ߘ?4Yix=Bu/NGtAp}goߑWͤU4EL]~;:,QrGww9S~{7cMЯ 1+yyάh8:?8s9wGN53u Co4preǷw?/{ۯ2/{s&}gw;ǿB!ĭ(t2.1y%+Kݙeظq{AAz}Ňq7>!խJ|:>6B(J^>iddi*` >>f_iii9?edd[B\ˋW_Y"3v~D?wˇ™<ߖڐxwV!ԘlЂ4vöxjHL*\(Z6h@ Vʿ%W/=Oo;d6a4J? MF|<7NgtѺKj};^x=hڌ,# $ȯehs9wJU2sбi{^jӃñ,kpse9WHߵ Biz_lq=*îS{ݦ'R[9w닻{+QU_gL*CEǿB!ĭ(ۛ@oojk4>Fx4_Puٓŋζ^TTX` ..2ϟOLw5(>!7*hVF1Շ L/%p"9>l h}1i$ )((`͚5uWCxF3gh4|r"##*sLL #F 55BYhիFBABޝ>̼|61!#Sgxqe敌yأU7Ol^<G=@6O~i IDAT%;VCZ_r2Q-:Ԡ71l3ѻmO|l rjLt篵=+c#ӆN݀: ?@.=$?J5cmT(m2̹mj:N/,`ΏWX@Voe-%ħ'Fߪq\̘`V'2;?u|\Y;B2t+ZW-P_e̽Jzs닻{+QU_gL*CEǿB!ĭF*T(P*(K.R&NȢE #%%aÆ駟2abbb0a'O_~,X0x`J%f#GZ?ψ#'FciիYl7 >vY6i$^j>3a[~`#m/ lܸ{Rn]~m&MġC5jQQQl߾JŌ35j{&w6|p.>x8q"?BMP.Qv2.ѦYh wc ;5~#XMW(|۱m|9~.| mpZhV%k`zF2YJIJe౻O#|O?x=Ujt tZ<]BqWm̘yu.H-ˇFzgӡmD гUwEjVU_g# r r[3 rVg_ۯ;ɴ+yYRV~auyqt~:s|ÙZ|KsHFr{|+c~ŝ˙ۿ_*B!xT( JEqJZBRvᅮ'O` l6Xw޼y矼 6 (~O2zhON^^}J`0X^>h<^|B!n46Ceh} CO̙0Sߌ7i[Ðg}uZwIf3ܹ~d:t(###6ٳ;vz ///x V^}ôHBh2Jaa!EEEi/3g(ע0)rL^*%m5(^zN&dae2Z]qW@E";H8 MfKk^ā G*|j4 :]:InBJY)/@|W1߀Π$j~Z>kԚZƷ괜NG3ǧ;>xgϞ.LPP>>ׇ4iԨ+PdM!pѽ.T;hg[`B5C<7~R.X`TyZb̙ 4^'66VZqMk׮L8SNѦM^jYΝVk5~B;[Mx4#۸wcVBryɼ[4 *{PPj~ uř#02ͤdUn7"+ԮRqcc )ȡ<\Zr%e4דyQti^6xh.eHn7U& %,]wU]~{st~Viύp}0Ld[6rvgVQS8~앯!Bh<`uC‹Bӹp&]|*I/| ::ɓ'w1i2صkSGFFҶm[ON߷ /z*{* xihZᇷ?BkTI8_-qpܰ_(1vHoOɄS|b<Ԭ;˝Wzuj, mcl=tw2RN ª.}f+~{_&Uje]zA Q (M3ϢT(QR{(Yڳtjm׹.=B*7`޾.O^|KyXEoKeTJ%!kut|Sʸ+_e\B!n%j5jUIWJUUV)@ U{FY[ !&&Ʋ|V?O>f-ZW_}rمF)~KT8u׉z1/Do8\fթx( .]رc aÆ$''͛T*xX|9Zno-=Bǫ[% QU?'o< U#@{}f7j4P^]Z{ûoIG)M R@ցZBݿuV1[(lsk_CUv^:)պOB! jFZWUc+ @à*O!B!B!HX![J_e=v}tW`>=*O!B!B!HX!DIMM%==vUYo^dNm¬p^Ѫs)B!B!B,$,RtRk*_c5̓øi,?!B!B!f"`!n3 ,Zp[yk|mwFPSy !B!B!7 !pdbҥ0xj˷YpX%B!B!B,5]!7/ի)((`t: 5]4!B!B!x׮]dddP(h4RZ,|=**Bz=`ƌ]veС5U,!B!Bvy^{5V\ /ӧO!1!čZ .d.ȉeSgHa3oABBqJ%o6cǎu:[rBJ<==ykB!B!_Xf/F;dze޽;'OGLVV7n|/((p:gFVիWst{r6stT'8cL>(tٲe=FalZZ/7-^ORR 4$&&Rn]]^W!B!B! מi+ Tz SJZ OZZAAAԪU2Xfy@bb"!!!e͟?< 4hzV!5^z5 :=` *e( ܯ,VR9[.GP0|p~iVlqmۖ]ұcGΜ9cΝ;iٲ%w}7VoZZ>8).8Aˉ'(**`ԨQDEEB||<>(FZ租~b߾}\p{1cPPPc6-˚f F#FOsEN>Ν;-B!B!BJB@TzU ˼ֿ*O!'O`bcc9v̛76m0o<Y`f'|ѣGs%bbb_4 L&6l`޽$%% /0i$yزqrBV#`p>7W/ ^f|=ǧv ^>V0=[n%bccٹs'O?43tPGFFegyz0n8L&۷owWLL L<FcyI!B!B!yyQ)T*T**2ORY^.Caa!k׮%33BAxxO>ɓ'y_tϳ>ksС={|rr2AAAX5ja>%Nh44oe!UƮBÇgL`('$u+(^B iػswR h4i&&%%Ym6INNr5 Z2?==p233ζ ^+7t#00֭[Wˏܣq BO7Lu GD|foxys_;\Z-Ѳ#]d/(h.s#-4s.癫g\%v=noT:w_/ wۥ's:)/wr|άlrf9]6GF{SzkHLj^tfY9=Zvp+|& Q3G0M(@"Bvm{gUvo_z. NM5 5+itnAϾP+UԯUfaeҏMM3D4MXsҜULETJ-B?*Ǚ?KvAմ֮#?ĥ'.<@➛ΧēAfmQ*Nm{b/p%/jZN(׻2<| GˎղZ;GG?ѯB!\od*z(׺~VP_kQgDÆ cذadgg3k,JJJ @С\zVFS{211ѭUwBHj*lp-wku߽5 GYUG$;}b3MHaa!۷aÆ~fΜ @DDL<N? ڵkU:_~%W\d2ѻwoڶmˏ?VOe˖DEE1i$򈍍eΜ9Nn=!,[cǎ֭[O),,.&p $W)HE<{|eIW^f[a:ş#=ev9ߪhOѪa3lv"t*%ҰO&zĥ'x櫷o'38~9g;~?>%%+MGvޡPWTms}1瘶sfkρ"-o,Y_Zvr_*zV{sXkn*K$fFioE׳Vfs_b{=4{ׯX)Ҳ|{ ryc٧,߽mŅ4 -}Χsrg#.wM[j^L`_[Km^ʿp>%ީ]v:X濿+X)yYNϑE;~-X˧t}+lb٤egr$4w7 \q_BQj*%௲x<`2.gZ'$$_[VZNg}BLLeVZɇ~@AA{-⫯r+]!.ؕ/h&=.?NS/)i[ Il_3=GͤI{l9sLfΜ7ۗÇӬYCBҥK;v,ؼy%~ѱcG4 ))),Z?NӦMcOL&ƌ޽{-XcJF3f }KR!n=udb9rivImR3GvZF|ANpz w,-;s,w5?֟Z]!Ez%REfsㇷzp:P)CJo0p5?zZ/&tn>]gCeS™..-X?{j*M*6w2ɠUߩ7/<8z^g BE;~dֈܬ fd㑝N-{Fvo;,/:#Hd"@ks[lj~&:~\.3GN p&]ǿe-cLxwX'|d2oYuz,|4W~UəWzN']U[~ %Hz﯌a&#_YɈօLJغ:wtتs/ߦO(F1:9;N`KK*!HtGܳqw?]{qv8CB!*Ch6_ JxL`Bpll,t IDAT "((-OL¸q={6wk֬믿fРA,ZB{1?^ẕǖ-[cܸqNW!K] u&BPoZR*{6vݶmS6nܘ_._Lhh(>}?85j?buFRRqqq;S&1-[ֽBJ?T(j33G).u݌Ut713V|FvA.Z}=#;OZ?4Ɓ 'D6b7>^$d0n[xq%q%^\0/}lųi֜]|5WL#nVRPeH{U&J<=<D&5i_Ϟd&/AO~G>йh2>>=[zlK^ ti78QwC&:=cO2{S2 .} m6SN) ri{U羝Aa;=~^ĤEz'ne:w^҉Q*dfگMK@y_ɔ>PCPPdгhNϙÙs&"S}JKC+w~>V=89ھuܳ~ݲ|k羝-]Jb<<{0 y櫷g0}uaCc*}Ι׀Nwjݴ|44ߐk-Z uh߯TJG~U/^.%ljmם#q+|zԬLyS큺ԹV-2w?U.GwpǟB!nj?&#Mzze\uwA||<ƍi5ٳVуxbcc ߪՁ/y_EVϳw) !IvU`_8 ;P((J %EVT4l݋ M@+0`;FCi3rJbcci޼9Of,^R,GPP]t>߼ zڄYNUgq%/u\_&9z5lcHd|Jws oAZ<"o;·?~l*lfyKF9<RY0= OO]Izcv>f3s!5"L:}KD6h¯wW(qiIg\w-=*&A8uFuBѸ3Nͯu-0Ucy}%/̼j)oFߖ 篳Gݦ+CQ(H2k74 χ#'ޚ,ضr ?n!"1S<@41pϙÙb6v&65qߢ}JI\LK"- ALwt}۷2ռnoO/t==6a^M&̥l'/o3_GjД'KOqo:mqu_;xw+,˗8x Ǐ#yuԛ6غ:ǏUvq <*s~^X5,ܾ3Cy/<8]AfnY9de҃?U"]ߡ_O!7SU*M-^^^6 !?Mw^\6'2sEWZHH@ ^QJ~i1\/~!kVo0b͇k/-"걣A!tNLu_1qM;w}YGɄ^Jm3 ɟ wאN}V^ûyoӀyߎ{I~i vM+SMVy y"q)Wg12x"*J*(8SRv;$d䞡JWMyN-%wgΠ&=]gنr@nٳCh߇DAAGGNdh>u>C;$DG v:L~)ߥ?IRl1N\;j5?v QTQ/ƚ~t3_B!B!\o_"[\lx\ZZoΆBxDJJ )))(¬YذaÇ7h}jԡe~:g DRY [[qE)xq K>,0R;\WDil3Hۢ qBk IjcB|6Q:izg E5+|e;[dżdqg:͡3'o-uQlhT*rp} ԇ/7G,4sTRޚ/ N}~#(_ާ{;+[oyMƈn-sٳW|u9x{~#o%ex&>Zfxuk?ΔFvhAmeTs>)]ݶ ێojWz/~=Nk>5 xqq9 tސ_3׿B!B5B?JE\\Gn1Ut dwuևR((#28 0=ƒB+-,#V@ۇzz&r[!`s 萈 KhڋR* ܘYHUF^h귶K&I`y? TYZ] eg[;Z֓7^3ik=w?kSsce6&e%.c|xdHC Op^ ːGϝyӹMGnWr[hBqAee%ǎCߴmL#G3ul=o//j]P?JViWt=Fu~gNV>{+#kXޣh-1) gN~'R\Q֠|$$Yu٫ڎdsbݞAZZYRWMyu%ZyVJE,GL&9%rtW]z¦C;|dûʇZF֠Qkл?GFʫ+@Q(R['xy*:-wsۉnّXv÷;7>t8Ex`ݓҘae mNu:ٙ. *:@_?]YHX{yջsdzsXW\'sϰncĄFq9͂bv8`۝9|$Owvg sٯڲO*Wm޿ny ?7-ZmWߡ0) \:-? 96<ڻyJPŽo?Pue2WW:=B?gO=-:֣{2#Qv۝.g۟B!BϑB!`07P^^?t҅lzm ~5yE<8RZUΨ,=q-|[ߝFEuݓҸkh( qzԸ$C# nzbʫ*Pլ I-xxf}͇iHhԮϕĠܫ\s3jAdˑ]kdQrxSw)z-7/Ϋ|}<>n2Ǿx0U:-ú_m_+~i-hO^cFOЀ ʊZ_XVDlXtkF õ:?{ׇ;ST<6n[8vFCǖm=Z~./!AO楱Z9sksǀĄFX{ eڼ7ؙN'Ο1㮧CB{/o˶g shtp0c/f~nz;b%|s#5_NIe)qXh v/ղŏMtH^|8~>9}zӨ٣5xv >~Tꪸ ό~Vьnג?5}?6}{~wuB!BY}}˝!. :[?4ks'۟Bq|t7r9GIjjciߥBǓw -}DGG||I|LXHH@ ||#[cM=w 3,}S'lO˨挾r03lWg2xdWRSopP%/ ]|TVho-1,yA΅s}g0?߼b^m=fa *9Oª*\N'p[;fz(?O'hb17+uf7[4y$| s{P@_ "(8 =2/W\*G|k@}h x/xz Z/ ]:!%!q~|IBT,R7}s+8~jVڎdojᮏ $D lfqؠwΦd:SΗG!B!Sz+oC]YZRcsc}N&_3:ZEz&`^D9:XC` T?G pjM;.WbvCB_%*[zYhǏIl_GQy)ã:#c<~tgؼZuseapj{. g\5 }!KpG6v@N0x5;+9'N}\mn'$+ܦv-7Kw]B!,iǁle B!`?5: g !šozkgA@9(^\NtWW/U 9(KxwBlqÍO(;z\9'`M>.i:%+#MZB!$,Ӭ*ć{qpeA|%ʉB\yP !B!BIX!݉er#EMx 6[gw*0VY]0uh(?f)߅B!B!B2yd&NH޽/wVb !܋ؠ0)u>$TG/fT78|9|N_BXjz6eYYYz(ν%28я%B!B!.j>:}8溚ҫW/{1n 60lذ˝ !&0՘lFIxvӨ!6DSo@5 &G~*Bm.w?fz9ؑq":41B!B!0??'*0вϓ8q>|=9OfH hܻ1f$FzIZRiƂ{CIx@y Qz8nbi !DS( ˖-[n~'Wp0_'я+B!B!̼.dRThj5KݰO< x㍴mۖ<]wԩScذa4k֌I&Y;vODGGai-a֛Tj~.Y]޽팎{McgyWw@ Osxpҳu?7¨p&}0] =:Z^WF۷F赡sY xΗNzBx푮qMy۷Cll,z[^SGx⋷ᝉOԨB!B!h4T*j9ث\KKAQz̙3LP3gez)ƍggҤIxyyQRR A;w.&M̽rW\Ɏ;d2a0Xb۷o'::>I&A@@ִ8ջzjmFTT/ӦMc׮].ׁB3ިhT,?5tc=1F_(_ﬠNp^a^tMe 5Ćhpq>[emy2tFٛt|]|xE&JLu7B5!شiC$ǛB~NnS6?B!B!._o/4j5FFsqxQ/v}ˀYt)T*ӹsg_| a֭VL2K￟h^L&?y}^7|3 " !ĥb)30(owhRU kUTyq^[Ba52jgu-ADk)U_\5EDŽj+u !Ɋ+8p xw `@>c**UB!B!ҧwҨv=萔Ĝ9s1c?={7ޠ_~6Wcr! FXX:BZhQgxgZM-v95jt:F#^^O3)ZgTcl3* IDAT'5C)Q>FjxxaUYyF|T-DŽhJlsK{ p\B8ɓ'ʢO>dddN###D?=%?AzJ"B!B!Y-d2{j4^~uwwƍcܸqs7sy?),,$(h_tgu<<8o@ >NU` #OWQqW`6N-x]ׅ15 B?8^'445kְfc'fhQѤ'n /0tà B>5ĒWf$:XCz;ߤ?xW[5vW\Vċ7X M2?A=RKII!%%mزe SLB!B!KQQ.{55 ViptMDDDLvv63gδ}'{>}:]wK,atܙVZ( cƌ᱆ B׮]̝;ҋ^od21i$mfwڵs\F!hl}}kܻ)1 0l2?JEŠ>^ F*OAkP9_M%FtF^agֹղޥt|=-KGuzmu!B!BGecF߄ֶ8NRvVN;L.=Ο=_ީV?9崍F#YYY qϻ+hoB=U|n½Ts0[GP0/|8\^B!B!B!Dӡ׷B swe'&9K{2[ jB!B!Ծ^yVWƉ#$mgô !I߻΄B!B!B!Bl˝!.ص5rg"++reiiiTF=G9g{mc[>>ҼQJQA$5kA zmѭUzE.8OQy)S\:Βm?u]t-)dU%Qs'.E!28!z[-oӑ/]wjWϞM?gwkdekIK9󌆶o !B!Oz !pˆ Xj7o(h;s/+Zlau-gq\&,_#{=mYuXc3Vc;6*U<=Yxp;/v8:|ޛ+f3蹥0zhǹ\앯1ۇs9~g YWe%Э/aYP翿?B!Bx] !ӧ$q sZF#{a)ED}&#o{h2RZmgRL< ok(e.c(>q oAtW7n*J1) A }"4w egr}Va4ǙU틟4QpՏ'8?w)B~YuL *J oh "CM۝g/PZ#<0q~ϙ~R\ڗ3ף{iƽߙR[F7E)۾!B!BG?S7]!phFJ־ױ-#bxgⓄ9oEuZkp{AOEucޞ l\_'<(rztaȻyiû.CQL||K$D Jku~SoW7[).oQסV}?!yf=wRx1 ~e{/+?K?*T$$-S,zGLA>%eTK[|M%1*O Mp8[?oM@ Í|5mK`SL}_?sS!an>^L6|,m\J_ULz&\u={pi,S"*8~ <=4j5[Q/|+ &#[ʸXg:?wo{w~q}9:};n;;}Զn>~~>ӧmvLSB[eSw}ۻ:sB!B!LhT8_䙇B!.kײ~zL&\s  jc~!FӶy+^_)ûvDTmFt;!S8~6'?~Ü)F䎌;?_t=~UfC,+wċccBrK -wLh@0%e v}yslLmDde;o:F /Ze/x-9uZ\'3톻(d/펍s>ZЙ:gsSY`FɥpBd,mb[~޽i=,Gg4Pj;b|;YZģ78th2q|sxJ}=[uֿ(Snbw EFy:j\ܟ?ƋGΗ!X귾Lܽǝ?(g 3[厮/p6EQfWxhDD0/Ƥ(U*Dwu#B!B56Ͼ%ϰo3X4VyMZ|2\k4sK 9hoIY>ҩ~>xi46m||[ۇF0J*xqL>Z/}v3qU71ukѬ4z&w`X~c M9LmoeUkUW0coy'c--DRޚ/G;W\Q(VPX^Jչe 7*]5ێKb*D](;Ǖiܚ;\WDilWZDrLz;S/ !_\Q @h`\xkszwq 3wcs}9{Oaߩ#<;>\_ =51j* hߎػ!BQ\\ի-{c Mɧ~y7=EM6oNrtk'OE^^Wfݻ.]n:^yh{e˯#*O׮]y'Xtzz.ڪrv s$js*78~5&PX_F1Q_dbK3_Ork\WsYqgΜ!** ??># ëzL !]c\;BԱ˶[AR@Ql_\QƷ;7,Onk/`<<.6z-]jh2Ty#GϝyӹMGnc 7#:aywR((#BORƒB]:F}#)&*]5kl~|Nux~apެ޽\wԟFc0+$… ':W] 2KVu63g}QU\k(R/;#/gWqC,p ;{!BѴٚD]K0 9s:l= 77BC-JlArybbbUYYI~~{eȐ! 6}Bll۶e˖|||}} ֹm;wx>}رcܹ3]tM6isq46mڐr^[yG8~UCue)7޾VGmM&#5 7|NmkTTxp߃ F@t|{լ!Ly/(y`h٬;v#%%Ӽys7~~/66-[X~߹s'QQQV'&&ҿRSSyg ݻ7_~yBǎygIMMO>DGGaC!Z-(:uÇӶm˜3L‚-VPsc # 9yU*spfޓgrdM8Vhݬǎq4;a8}%`Kx`ݓҘa)&?*:zg+][c宍ؿ\*G}CR! wh݁[czpR?~ז}Rj\}knaWF:vgYLńFqybv8~2s`28tן#YH,Cfes ˵>k_uMF߻^m:[9s~˟'og4~m_Μ6->xB!DeRL,?߁,?fP;v )):G!!!om\ L[:u*qqq 6f͚1i$^9s&}_%"" e:}4Æ #..ʣx-wuse֬Y駟:Xr̟?Ç[3f̠Vھ};aaaN׮]ԩ-[d]^۷/ ,oeʕw}yf*j?sߺ󞣪Hazc.ܳG,q?N޼RVg{O{iOO{/jxG? ֿ+ؾ};|GL4 i0, EQ0]эF#&LgaÆY_pUVm6x6mv[Bg7o___Z-};k)7>^5T׊&5.Azq9AfF]1Ѐ`pS>}(thu7/}!wHNKDŽ.z)(/Z}݂7x ~aMĆEQP^LVi/=Ot/Y1?Lh@eŌ}ebGqTGFs wQQ]E4nP_˧iլF뿶 al-̡eT7-_thaVP{<|k.\GaTsKkIO$ן#_Rبyn 3^ [Z ^s(B\x4s˟׷3oh۾9~]5{Y=OܟahΰwB!MI1LY1vmÉ/-g'1s|vנpw3qD^y˹뿍k??x s!330E̙3Vۺ[4io&^^^0h ΝˤIx',Gq V2M0Drss!//FT;3g cVa/-{Gؼy3,i?Gb׍7HPP&3h }]Z-#GG6kkGmcժU̜9~~իWdfΜisWf͚zṣ۹bmuֵ<_~BqY"͝OYGR>x( k-Zwhy:IҖ &Pz ^WϞtƅ[v 7`3g8[Qr;Y(i)*ӧOn4heժU(J=yYGFF*6mo)(ʾ}///ڲ~J``ߧ̚5}d29!𤝿TV[ܳCtFRXX)=WWU*_RQdS`4\Q/6QwFYUNu Zo3;_V'xثA/NR5(mѠykzWU:wN1 :~Myƾa FN J)\%|2_)zŸ}Y/;gҠIl{~ꃃ)/94VWJduǂmqGZM-v:5rss ✊͛7w_tItF XV cO ԨsXU*{pb 'з ez{)_LXdO{[\GB=w<Ćwng}Hh`165 {QkT5HRf IDAT7=q~ݹQq9OnB!hZxe!A,ړ4۲ezOJJbΜ9̘1￟={oЯiE-(رc9tÆ #,, NGaae)S0p@Oμy昭OVV4k֬m<b!ʡRzy(~_~+VW_ ٳg zV%??qi>[h@DDDC^M<& J಴v-W}ȕ'yd+>h }f z+K J_`/݂Bp?~ wr~|>"1z9*`Y?2PEusX?=<4g͚ey>d21`vЁ+VPUUŻkYBϞ=6mdddo;U.!B?,Գo !#rB!c,ړr]'^4۵kGjj*o&VdeeʀtV=Q-jF%(΢El7ydyN>ͭLm۶W^<Iqq1N^UgǎVx{{3ax ֭[ĉ-RSSС :?=拡XjZRyvA]oTPkH<ݛsnR J Y";Zh`ϋhf㰨"ٳe1.u,5TQ PGtDw}A0N\NKn-MwȐ!tڕTN'|by ^z%.\HVHMMwV+N:E͹ v|B!B!B!_Z8SŤsy{Cj.jO>>6mڐB߾}>##.]L׮];9sUBrr2'Ostԉc2f̘:zt:N{R/9r111oߞmZ׎( wrTn5k֠j4i֭o߾$%%Yjϟʕ+IHH ..nPն?k׮MS(Ouu5k׮e„ n{d7vݯ]&~qJŐ1OydsΜ7'`4hjmRtToNbMhTYDPg{X`_sf`,CDrK~7܁c'*p4j|zsH+lũWj׮Æ #33-[h{n:Lބy-[O?ٿ)صui[-;y0ɩ}6(UM"B!B!KY>}*;[vH:ImY-;y0p;0):MՒA||[5 kHl!''Rm,(( >>;vСCUPP@AA:GWx"-[5jcƌ;t)ӧO_ٵ6j/2p@RSS8rׯW_ |UWOs _9sb ,YV>Q^/~!5#($o_y{J2~E֨4^~DlAΡU33ՋWH.ҜLJ/^LFF>|2aBiZ)//'22T4Msoa#c kK@u9E!28!f'}v!}RҵU;z`*rJ؎[^t/Vn*JHnA䎨U*:v8@|d,s=p(IiGViPOΉ^#'Ѩ5mh?ɟ Fuy}RZU "Y ?5_rUgXζ?ӟn? !B`-N4UOL[\-cs݊+:t_H"##]ΣxPPVDGgek#{q)YvU_c+c|kJX\1f{A.{9LubC#Imɺ}d:z+<=RZ1ؑq[2gʿV/Yvtp/kQw7V/+{Gd)(yx3 ˊ.Qc/ߗuԸVn;mu|an? !By:h"ZnO?m̙3\uz!MW_q h' Ȫ0ti=[oy(wfiii!3[z57jnƲAZ/G0{R&rx\3)&tFYKo0PTQJtHxzS"" Vh0LWNH@~>n{Δ^͟;wj4|)(%, ت PTQIQ mP.(?_OZ\W [mWZ'i׍^_gc]ԯ3/zb}΍ Bg;U wtTվFzC@ i"zE)J+>PXPA\D@Q"(J(!$!6?"1IY+keN9̞w/ _`@]%奔<5uYeVsY5VcM))/LӼiӪ;^ǜYsCB!zlanŋn4FkBe8}I !Gt:%%kX){ˉB"Bpxx}{YJ[}}ZrF}0bxǾfy@~Z3So(*+!۟O|^]1x]ip; w6|kP,$LV^>wE6#QRs2P>QƗ[0XwoN|oM_BfA.=ZȕxE`#S)_\Ru9*e5+}G_>Sn0P.WΎo^Y ׯ[?NNm݉=nOjv:]d[kT*V~𯴊xt٘j_SyS7u66;_!-;bvܬ ~D~I!&P%%۾F8(Ƽ`Vnv>ߵo7K{8z[?5-آcX{iKQkk>u)5oSaײxj\~X.P_B!`6SQɝA!Dh8qǏGc=fO~YV徻"1}awwf?8'cz`rtҾAᬟ!qٯ߲bǧPnKW*HHKbųpPd.;Ngd({/w$o$Ԓ4֟ˈfYn +vm{Cxh՜l\Mnn on|oML'g?/GOMp:EAbkī{sq2]w7}JlF(,-Oû[Ai'}d(0qveoRXZlvlTL̩_S".20\\)רy~<62Yszxg v/'G'f|~bʠGCOlk4 Dp6%m[cLkZiI~a>L_.nacNjz qjM֝dW!B!)@!DS!K/+jٹs~^n4 1/Сq+gCbz삋3#q='l.HfD4 mTsyExazCF~6Zp*y hUno~Ş[_Mآ~4%OxqY.N$eq62Z`rTk5]̓1=ٟpº[^S.Y_X3+kU:݃^w8sg}g~o(6QINg}|<W73RMd̢Y|"IAz⊓#ݗ 'm۶SھMC*rvr4?oWgھ,ڳS^?XGtǖch8QB!B>yX!DVoڵkWppp &&Ǐ-ϛkhM>3\smB[kgR*_%`|;C[_Mآ*UwT8JB0}u)(-b`1GG(Ju|.36bvDՠhpvrbpWaކy~ͺ3nNLLկۢ ſKdϡYHCg`oc?NaVw-9kkj.ݸFJv:~b!B!vBp,q ({PT(5:ѱڅxu?)*qP i0%YMoI|5ɿ6 gz.&AR[T@?4-7W]OtXcTKL-iArjmw7]9@ص:ήRsg}w3UֶÑ]l~`Jz P/:98(J6_s n9~k '=/ˢslXڪWǜcWOߊDXS>SoمnRs_!B!d h!5@tt4BQZ-NQFuUH&k dr؂1}hb11#I >tlҚ;7U/XTVɫ,ӘVʿ6!5rM捃tu3{0ɉ[]ضOfml]xx18BJ @{-3s(P1KiaQ9Z ŕ~n>ym|~{?Skm֞RU| +9qA>:rJYZœS(Si5Y|v{HLXYq6EioioN{(רl;.u|s*}zbG/3}/B!BQ{ `!V>|8+VwÀ:,^:Y֓{iD7a@nLټ2=m0sr<MR5 ^[?'GG6lQ阗|w6~COË\Fubu()-3˿^M_ Yݸ5=Z7絵3zQ,E%tlҚ:cn~xm-F/%L@lۍO~YSiOƷÿRR^Fۨ=ܬozb/[O쭴훙`|~{?Skm֜3uٛT{m8;91cn <=QFub~%aQ`8>(1}fgCyOE4#Gud3`ZÞߜc " ͮh`Ws?TÛ@_P颅B!B?#~W:vSq!iӱҶ+ h}E鸺,,\\\6}mJKlG}P.'#/`,lCӒAo NW|B|lm6g~QY Yyxx*Yx׬oK =p#77W +5ݿMկ[s~~q!y%DlfKs/)vAӒW\}MkO<{D]VmucoS_2flf @5J_!U߰Q4.IK\I8];YB;O#2BhРŃUn.Dh*ߌjj[/7_|L՝aVcנMTuRoߧnjTߦ9Ëu2 _}=6 舀:MoMڻ?xxl?(7)Y>%n..toS͙B!w3fk׮Zj3j(^~Zٳ,=S{EyJ۲رcGmǏgĈ_dĈL4fb̜9˗i=[ry٩ZH؛ !B!ukcׁ? ϩ܄B!ݻw\aTkٲe^zѣGJtd'ŋQe\BAAm'N`۶m<}:/^T&{(-gKBcu:-Exx5&Q7䫻B!B!B!Nm_<x<ߟV%ONTT<-Zn ^]vC ԩS@Ŕ :`&OV8cymۖYfѲeKHXXOP٣}7i$V\ҥKdٲefնm[^}UѣAAAܹӬr^aÆ?#S)Gaa!tЁvѰaC FFFݻW__>|&MЯ_?ZlɆ zɚ5k2e zD}*+C aM<ûrF6,0us2]4+UQR?ۥI8yo\&alV$W53jK IDATB!B!B!,St|zt->{H?J>{O 3gXb/_ɓܸqٳg( ƍ#((t8s L<7np9]Ɖ'XrECѰi&<ȥK7o'OX_QJi4Xbcǎ駟&99Y?4 7o0m4GΝi;p…Jq=xyy;v,뤥h>}zxt:>'x'rUٶm͛7c1m4&L-[:u9MdVΝ;{nf޸B⅃kӟJo;w|MZEQ;7aҜ5LNKܖ鴷sD= +R]:jf͊; !1ZM|||W=*ȵyٺlM)ĝ;Zgv˯&.Hfl8I 7H_ʚE<lq6O4~{_wj"~=}u{G[%EtCO! ٻOŴ$૸8tB!w{bt Njbޟjlذ,T*QQQ\t8>AAA/Yf !!!oͦx *֦to5.5N*̙3(b888aÆ_Si]ɓ'pN<ܹsZ_{5j?;:wgΜW^ˋ3fٓM6ѩS'|||Xp!o꥾'44k׮sQ=hآ3U 8k5jNCJ&G'g:J_ )'Ww .7; !R*_C-K7*(|̂Jd߰(3ejLJ #?8 r|7}ool<[.%p=[ -Ngk<}([Tg~Ӯf9FZkSUY4ol#1#vORpp;7{-Nc7S3n7yܝ~wW3R.ZE4;B!\MŞ5NI&XKF^ '%%IpppEGaҤI;v ζ}n <:JU㲴{O@QՓ(W[ע_0Z*id~oPĝjB!2eFߦSNv3m=;70}DNQnmFCNQ>A>վ9+*+AiqZE4e&S|ήV$Q*̩cZ Yxm|ԟ1洟1XY޾՟k~̩1o|kj?coY˚E!(˷U/~UW|R.gG2Ь{Ew䞶8xKcvf͔cS( 9xW߷Mgfڻܿ3czX_q[ܿB!j[FVN/Wp3f cƌ!//{~4"##)**"33ʺ/^';;/xɱ(ocyܔrk*^EQHMM%,, -))}MYKYrDEEVK#F`ʔ)믬X'ߗSXXHnn.~~~@...զFNN%%%O7:t0\OqIhܲ+_D;Vw?B\*~a^NyT;B4r(꠷B9u4nnyy/'gΗ É/eD 1#YPQSTZ¨*i!w?TR~YīxU, r9*T4 b#Sr6Ϛ}lFܹd(:>} CMoM|E%ۂMκܯW\@^w3{_Y ׯ[?NNm݉=n~4;EWkɶQk5ag,lZNߩEk_[׿g̅W]9oc |>* )URQQw嫭7/ >u.@ufʝTCПڀ%E=qaj~}ߤiy N|>kB!u属ÙS4i;'cq׮]CQ6l/}/ТE uƜ9sXd V,V%##///YnC (cy(\t)&M" ?NG߾ڴiæM\gggƍٵk/M6[;CPuVZ͂ ?IqqUuQ͛7sI^zjק>|>}!n\cwpte}CJI V/0Fm =>NhںW_ ¯A$|CQqb" 2,f:DNMN}wEXc~CVM8(3?$Q>[VL}ҒX<T*,ˎy}`bFweHJcٯ_kؒ_k?}QI٬Ëytnچ6Q+?f?b ~8uTo~v o=!g[G4l*g/Ҭ-czŕrcy ԟ9t(ʭudtm:( }:`|ߑ\SKs[}s ɂM+u93vܴ:Xuƴ k7/]+>נ?axoOl6U`7Gbz I~3J57ScLX[5UDh8rYT|ij}ӥɔ4 iXeo!Br੎clVx</@`T5[<Mjj*|󉯾Bhh(رybbbhܸ10j(-_RAСrJSoÇ~@1yd8P&-Sxիeeeޚg}׏&M;88zjFͺu())]v,_^<|g<\RFͩSjT8tRRno~/r+ghq_,단*A^dx74r4C3MGR1luߎFSNȾh,!B!l"33D}Q3x{oiɰ)*+1}O:s*-*S 3?n-brS5jkN[m69p6S"e~~P捝NjWyfxy08'NT5lɱ+L[.}Zu䡮H-ms)ӓ))/ՍČkԤelU?_z~6o$pDxSPX,~o3u`3er/,'g28ra0՟ٲM).+ab*=lO>EӐ6qvrOmn|i[52ѱ>{޿B!Aуy zp5կ_?233IJJBШQ#\\nմiSCVVYYY4jH?xbOfͪ<'^ 1T /XD6l%6Si;w͛7rjՊ^zn:&L?CU};wk׮^e?o=HJJDFF… M #::Z… ٳg/tCy֮]ˀh޼c x.0ڌol`_Sƽy89Si#JCZ3!v9 ՝^NAuB8x ͚5^n7M7~?KFR_U:>ߢxfDT30c*[^1}7d׼( @{]gv4l),-OѾQ4Q?XYzꈧ;jzM\VcU|cSoͭ0|]MNUb뿺gzUOr)}hfu%Esb Jض;3GLѡ`ԟٲQk4#ZG4a|ߑk {e?ϩU0?>cXw-i^zdB!Y3kc5ӠA4hPe{HH!!!՜a9Cy@[v -[Z2T T;l%kٺ'C Ɩ-[vo_|{Ǝ-J:.ӧ?aՈo::9Ty'B!j9z(#FT* |Ϙ ix0{ᚪn@TÛm<|PTb?(/_5TMB"))/qӦ/蕳s .6X?y%=>MIY˜qvt~ai1f֟yAiUהگ6sTLӐ(e# jæ{y0sDb٫ާkju:=udkwCI˪v-3?^>j+=lB!*99޽{W'66Ç'2ϥ^ni[|U͛7ʶg}ʶ߯CjBwjiӦC1KXtf t#pgچO(ٳRJ=0B+Dƭ.}OK3P;Tl="NM_h'1wS?!@VA./6;`n3R9onԴ۪_>w7Z c[_}Gvq $uiRӗ7 ,[unq_\Ȍ(./2uyc}4mY]eFRu9( ).MuZ ŕ~ Mgv{HLO*&_Y|%R[,$ 7g.%UnB!*22/V3c .\Ν;jBa)yX!:DqvvPބm1kyYSSz^d2Mi[6yW\@<=ֺkk?fXJKؤ5tǢ2F-JOwOzx3L]&!((hޢ~n]5#vԗ*Y#൵qrtm֏zdRXR[OIP 2|JɾAP3~7U~km_: am6'nB*ƭ+jꔪZ~}=JfWcNqp+mV ,Mu{+mfl-x{Lo? yZG6I|R]s("nMghB!SEBTGtާBaw:cmW,.quseX5RVZR/Tai19E6ũMoM|E%dᅯwM&sm/.$Ta3V?r2X:-9\?SiL|<X>HUwҭ6u괤fUmٻK{Ru|UϽ[^o!B?7l>)+eGS.IV]I8];YB;O#BQ^nxyO!~ j-xxYX9Unrtp4:a1ŚOʼxpWtuպ/_{9:8bpmqw7 㡮9z,qB!B !BZ3=;uB;2簺A!B!_ !B!B!B!dX!B!B!B!"dX!B!B!B!"d `!VKNN&)) gggZj%:EK4w3eV( yٔmIy)E):M 5;qjljyF5Fc0VCZn&%E5~[9_TVBukW<2 r ֿ=B!B;wZݻINN6Nl2V^SO=ѣk%X:ud􊋋Qe\BAAm'N`۶m<JLL ۷y4o\|ILL8Vsػv_{ap? IDATw{/#Q΍jJІ8Om˱iI?8O++cnB&_ûku:)*h?#&w4+Fk>CJXۃY?C|P3?> 2MjA3 iIxv.BQt8]uJƜL:oVoץY[]ėϿE\͸c9k-cS}`bŮPմ k7/oqjtAIlEX#N-__6C/!B!Nm_<x<ߟV:x у vܩߗD޽iӦ ;wC?otЁvѰaC FFF]СC gs)ڶmˬYhٲ%$,,L'@hh({ѿ>rF4i+WdҥDFFl2j۶-3VիW3l0G}T峾GaaYٽ{w+ÇӤIG˖-ٰa~_Ϟ=Yf ??#SL^3ZOOquueȐ!^`sxWOȆeNO>gF]^@NF_2f5Jt: 'o |D6,ꏞٽc !J"## \\\orYH$oy[}rl7Z G@{r`pLO'0!tOo:WHLܜ]pqrud3T)˗|S&9)l;l67ck gHnQ.bXd琑M1fAz2u9q1]LRfgS.i 'mٿB!B=ӣkٻD1oW"ٻ|ztՃ[l0mڴJkN0.]FRR<&L0/ncҿ_NZZӧ=OQƍGPP$$$p"""'@7mtc9(h4ƗZbcǎ駟&99Y?4 7oL#..Ν;;v,… z2>5BEQx'8q"W^%>>m۶ݼy3=ӦMc„ lٲS'KԷtܙݻw7/T^H>OnV~۹hҪ.(֯!I aҜ5tZWN՗? L]ryވGBԘZf͚5 2G}'x???~GԾt#?JUiZSJJv:n\cyxӮaKrQO^>dJΠ˧27ySoq^{};;:fۚܺf1n$W\ZF՚s3$=?J?o~ AP5kפ n׃ ').+e߅">uTlμ۲ !B!V"ԧbyP/Fw{:Sj}9s pev͔)SHMM%))~i2 .pIΝ Tv [tgFM©]{YdݸBL~WC9NJK9Ӿ &7YX!DQZZZMaxx8{MC"kt ug˱ݤWz:JEnQ A+/_1Ύ K mMUbNŷ| tJ $sd_r .6A+)X2l}(77>C㏢(7nukiEܹ>wDr`戉@ŚóWOmwWٲ !B!\MeZʟфxxiHpwwVKbb"uFnniSRR5jDYYNJJzÑRS͛9 ՟rT*&Mbڴilڴ ___X^q 6WADD?^1py -ԧt:>?Vw!n:p"IDvg⎫ρjԔW)l\ݽqrvo Bܙ `!5燫~FC|||hxǾl=Ҿ~ǣ(*+atiFLo %Ujϭ)s~r5Zm'ҥyJH&.|bYJUOF^qEWғsyj_s+Y57>CC&YsEe%zJܮ?Ł jݦ_HM\*NQsX hӍ ~c}O/}Q`ǩl|>}!^gp-cp,]:FtA}~Q7oki5}M[c(Dr|7heط첈zȾ!w#~W +9~X)-)VZ[|-0FQF-9w1i9J~qaJ˔VW?);NW Z'W&UE%)z7}am|(%EJRuL]nj'bjFWTP_B!BWw?XNiIrj?' ZVJ)>S;YItta넄Pj( 4HSڴi+C ߩS'eժU_?~\V"## (W_n2K.)={T֭[+ʉ'1?JTTҲeKGٸq(JF(k2'|RTZX9Ξ=(,OWWWdȰaزe ׮]O_Ͻ7]ZB!B!B!BB4N#55UV1rH !""6&BBx b4׶Z}B!B!B!DBo@/_Nvv6$(( `DCR nB!B!Bˉ !Ncĉ͊xCe !i=f5c$84mhY1TOqv2BBQZcm?TS?چ[9vU",֨ ($£XBÇsWP[[KTTT !B!B_϶N M{F;Gg[1aPGضmsd2QSSΈ#eԩdggDTT˗/'==ݣ<ԩS)**d21p@,YBllN:̙3ٺu+TVVn:z۷3~xj50mڴze{&W~<#<ȑ#Yxq2r2)-[-p0eF 0͌3ٳgt&SSBqq1LEQ1cwq`0p5+6oi&x &OdJNsx['7@( 3Ksm~^?`]$Us&ݧ/όǗPkDV7Y1v{Zd=Fts6d=o΁9k]).ڮ( ~"z1?`Yzٸ5qP~5bw`Բ_+~gK̸Q#~>wh8{r.ĥGB!B!B!2/̿/yu} 8ppq6lwMAAL0m۶QZ~OÇw^yZ-O?4};ƍy[6866(efeeq?3glok)MjmNڌ T*tgHrsIbJ2oeiN<v.6ȳ&͉1:=qxٷ`]$ܶ{oEqPף8<ʊNo?EQ(=EyIV7D8ˮ8e*}(yu ,B!B!B$D`;q89栠 , vj5wyg @;N#2ܾ)))fJKK{777FݕYPP@TTv9M'"f\f~I\bg]{2M n@9FEa/PQKr+mkݶA~5JEHh f n*? mZhDvSM%rAhwz(BR@`pX׽/.2,B!B!Bݛ'e2Gzù=.[$v;W&"ömb0O:E@@ˁӤ$F# +M6TTTP[[KPPyyyn'jM9ñ1kw}T_`j*?b0T5>f1Vp"k ΃ae1Z6@ݠ`.#X嶍Mq(Z&CNHhfՂ?L|v(L<],n{p.ĥEB!B!B!nёѽXԘ /ߏr=Kiii 8GyqFOOO{̛7‹/]1`{1V+z^.]an'8x˾8ygvnXhCe1!Mnn`]%.K!'+ž-_8"ԅkM%7 M:FaX7@[Qˑ?m8g -mjmlG$Žv;~ZNCPTD$M̞-+ꕝԅͫs( Oh2w==!3BUSSC~~>йs饥dggH=jҲd@tOz/cl-c<鉽'SUk@hHhPVG'+!uO!*+PEFJ~wk 2ЖnB=7&:4=^To];E5 IDAT뗚?w.V2uj巶K%Kwsռ5ǹKwR6}CQe)R0ihB!h9{-K824y تT*>#fΜI||<п֬YxZҥK4i˗/={h"~L6x())aݺuddd,SVpBƍǒ%K0LL4}5Mǹ뮻xW=z4yr9qh3} U>6(m2y9U):c?7oW^3'r3Q.}(/lCul>ze2(jzne:u4'Wa(tvssԱqM5 s66Eۣu8~Gf!:>>sg޽BWS^s^O˓ X]F\b:7MX\|Kjǖq! t[Gͫrg]ٲe _~%AAAh4|z'OߧGS[[O@@G1MqֱStO?ch&RV]AbTG-q%úCQ]qn0j.XAڧPđ,.uUUU>}6m>N"((=s(++#%%DWef?NRRm|WbЈx~͛Sۨ oL5Uj Oߪ1*kjʃ?|s&F S?Ǔ66U %h.mY8.,m߯Z5TE6<{v `!u֍ >ի׏cbyپ};ۏ`ɏOfsnCnh޺Oy3;s|Łr'mU 9g6FQJ* n@^C6l,9}oFtmޘ<qUp( Ѻp%UDJqPk1aZ '͵yxkfXElXד+U= cAZ@Uk1aZiô/?]qh[BD ܥ{O?VtizCK;}B!aaa> z&''7h.SҵkWe{&_ZMXd4DD'>n4'`]$"K@dlF氓ȼ:,zqu.ɼs=Zϋ+x)T3,]},zqIl;HޚGߗ{ݵ++♏ߢZ!}xt <{?B$ۏn'M;z,>izZ7q3镒^wzt6W׷OWpuW?Z'GaX+xzTКO!B!.yyy v:fbܹ8"q)H|%!a0TbÍp3\ !h*8p9~x5˰;*ڤUpnJX^)zre@'D&uo_w~_˷m2ƒCc_l[WuJ X͗;ֳ7w'y] hW# ;Ϋpou]`0çOre\ݭc#|_YS0V|ORzcQs\3jꥻ/6,nsݗ%BCR7Vԝ_}M纾m??[[~$j-&V5^v8_EQ}];M]-q\.D{ZO!B!B,Y@!KvɄjd2a6/rwNff&^OVV=z!7ɡ(|j&L3?NDŽzbB#(*Go'JU{(cS/@Ojl[6eltKl:`՝ضh=L "5-vm GZݷՂag͞M La6UMF]Y }/Z< L]wFC.|s2߱{|knbbwnfw٢5<U*'ub٦oIz$yUNlXZ-;OlV;M]-q\.DYZ/}zZO!B!B,,¥l>>,̞=nE /`X۷/ݺuXyoݧĆFRYSM綩3zy۶gDLIf^3Fʴa7q νZ4G௟Sk1#9CY z Fj5LDO{onfծ $DPf'mPT<2vO/{wϏDMK %-S| EQh˔!7:& ^aгٍ{b]{!ƒuU8:2R:{->_f8ɤs1j۾+7^ĨxhcZ4~R~FJgN]q޺O]#07әL536&tw\]߾ww֮<"$xT/S!B!-GcOJ߁W]8Bxagfډ#YtLnU"(BYYt-mi6նX2j)3 _1PYk 1*E=ˡ(K o͵UmRvcx,Z# LV %$FšV7o chWo E2=ۇ_Z͍ +}o?xrk++bO_ 9y}B!_/?O?2e}ZNɢ[+|O!2X!T*111; àsXU.*m"[ocm!O쁃%óHn츳tM.#;+,XGM_+}o?xrk-=ͤA}S!B!`!B0SF>087{_0B!Bd].!Bp[b!B!5g֯_AiNhqV+%1^*̵rq1Vz/dX!B!B!^y~Ϣwj;%;5esx~Ŏ)h4r *++M!77Vuɓ'1L>TTWW7zf#''e {KSf3vrb(b2{ap4^aq=6Ceq2]Kvj}1v2knZ_ejm2j}vT~OυB!ܪ!??EQܹaONI DFx|̦]HDxphOaZ؞}1KLXN e؃@NWp9kk?‚Bml=>] TY!*2R:?!"؜p۶wfZ~l?I ƶH_F~E1}wPqTؾAYIb>4j?£qw*%}u-@Fq|A{;K+EBosϿ;.|?f[6o66m'`M95:\^|]1 ԁ`8d1P{>WFZc;9ۘDQYJ~t$U%Br~j&5.}c3Q|޳.}sنYLMB97|5~;g166EEۏ_$C/6+_/3W&>_FzQ ZVzůl>q7c>*?~37 U~O΅|BuFFFf^V9{7~X!7z4@QeY׋*FO3uoy{>ν6l"1r ˴a7P6&=]ڔa8gURUNV3YuMNǕEQ( :uvezBA.˳q+:xIK\~bڰd\=B!)h!LlvL&VXرc&99cǎqF>3?; ##۷` >>L]Ց޽{Y~=Z~믿>l8@ff&:9sp]wpq6l;CAA0a~iJKKp)zJi-v6>0tᱍ=_+pٹ4O`BW7,uv'ct|{/j%XI,pSmFglMA^,`oj{E)ʊN0n|4z*_.#Sd™\ z +PUq'sM0V ec"08^,7BRx?ܥIGd6o$E+xzٛJIAS\YFrtBk ,1$jil82{xjSn] @jVn.opr0a(M61W ^O]|ghhHīt_3؁<;)o<f+P~1VFO廽yw'ԘkQ̼v3_NI|0͊Tp0g}Gx1k O9gPoQYSM̐><:vg>}?w4~uoG@!Bq c 8oߞŋop=п^y Bnn.!!!58NQn6:_ODDrwUG~~>:Hk)))fJKK)(( **sڵ;TNNj ta \3~.{3`$.3s㦄y^A_@ A!oSš_$NW a0߳6ϽR X{|n*? mй6C#۬j*1V E+P|z'ul246B?,scHk+>W1K_&Edzd6zcu5Lx%U$:4w^tf3&܇HZ d1[-޽,gѽra048]L\17V̛ jgHzw'9w7N8iۿp;`@QeT`Fe fPƶӇ_gcN~{zPT3۟~#x%tKܛ~Tý/#;(P8<ߕWObT= _tnBDH(0[?gw~WZ玫( ;Y^ 6,Ex[EARaw9Z_AV3{K,߼;G~wSWF0mXq׻ϐXѿc&-Y^]?1ʑvW;Gr'+{|W/w:%cݾ-VU]x”}4dB!hѽybY&{$7 |vLS'OfTVV/3a IJJh46:ѣ\rt\***Um۶`0뉈;uЦM***%(g>[ng.̞H9ñ1kw}T>|U f3;7T: j15=ԃY[Pw +1\ecu(mlFab6ꖶn`pBB0 ج4g.6Ylǻm~/WB. B!4mM̺vttObţcŴH_JVquGqe%Uj¢Z,F|zfTϯ՗q$D=!bZ>߶9Kd5;6[-R5\3l=){|-Z6o^]/d KZ=3"><M0 5[x?[3]gn ZG+%z}uzX(D{Z5RFӮ-Jś?zymsnmG<M4U=FR>߳?!a h.w ?oHKOyr}9.} aY Jj̛'鞔kx++:E~y1ҽׯB!uzkY1X|}DG=~8ƍ#**P ??dڴiǓ@II ֭###{w RSSQ'RUJ岎K2i$/_Nmm-={dѢE@l܅ 2n8,YdbҤI۷YG}̙3'11Ϛ5k]W_}ѣG7(8?6I Ҫ* YhB a.cؘ3A^u?\MҮS?FOsv5?"';EQ:6Q>W&֨;_Fs[Gsb~}/BnWa1_-c>4j \;Q,bmlG9qBt|*C?|P#aՇϒ{-6N=(缲䇕'tnd|K !,uOj,bd p`[.JbTmG!gT*ވCR7V}f&XE>~~ {c" :}F!>Y;ِ9[BkBלbȐ_5j?PEw830YWgjRcdmCoYț΁@sw* i,ݛ]q ڞYkϮ_!B!}RrsslLС6m2RRRj;y뭷ӟDUU;vl<=Uz"++SN`AIJJ"44o;8}4mڴ!,Ú]ÇlO66'3bЈx~z?~c>A9qI Ak9"p0Ta&4<?uOb5նgܡj}!8m㮍MqL}p!4D'$we8.,mCFLc/b1PS]Nphs+]~wB\>d h!.vL&sɄl8RVhGni.'I>ob`p`Y1jZl.0iw8 CeruhIm˦@C\bԿ~4<9n&8nƌAL-U}ǃ}06>ZCƯ-Tnf-zhFS*ݓ:l7 L$s =Ӽ*'&4rjhڅv:_/-SG(ֳ勛[dH}we+p8!wTA|c=[edρ^n_k[w w{7ӿSzaQhjrKoڠk{`\ݭE.zϪΜ,- 5.u[>KAֈZ۽ii8mψ̼fc7Neo2i\ZqO0Ž ityZ_b]{!ƒuU8zKjq%oijw'1*)o<(ck4t}.S 9]ڧRxd ^_ =yp#<8in#><A{yU+-UOzĆG[o9gO~B!_СCM5ksνKAJ+ k|PCU)+>hZ7{_Ck[~Rb! ;375oNɢcz76؅b6^q\HE!@RL/җ ~q++bO_7rR/#Xܓ|&rZiL5T&'[vۇ_ZćGJ=gw)(!!<-@e$VagG Nݬ2\]B!tΟ'TuDz>KNɢ[+|fk|7ZjgůaouJ d!|up,B\hjZj0sQz䖞fҠї/@|D$7cv7tDǷ_X0ZobWSk=[]'Ám00-帺~B!_iN.b D=rB!n=!ZӀN6WnS.mc2A!B!.B!GzC.<0zA!B! yt\!B!B!B!~%dX!B!B!B!~%d h!nԐ(tܹ^Zmm-(t邟_Ǵ''C#<>&|z$ \ Н-{OP︰ )جl;ڏh:'5pyd/NƠgãgR'Z*RAE^]]]낀HbEt"=LfG``ȴd>ϓsO3ɜ9']s'oO~,:5neƞ#=_`1[Ŝt Ja:*3;sl+g/!B!B[ !k޽|wh4y-gϞMDD>Qx=}.̣w?K:-;8BRZ2A T+lBs#zje1o=\²ζߍȯxWGĘi<?6"_~۲ bb垍L<-:9jw'՟3u72 rX}=&`/gkߎ۲B!(,1;tw;|:z/Z})o8<2t~^_Gw&G^%B!Bq`!vjՊvڑĦM*O:W;áCѣG'I؀Z͞#,ܶq:5 2:o$əVg ԗ}GLV})ec*c=%z_Ly oO/KT۪݉ Ϝ 9+<\RW ;:?Ѭz8JQlKv~? */ʸêT7{;S1Mjzf }[V3V=y%h<EQ*#VZw*Y敌;}Ť(+Jr*}[w};zjqs,GۻڟB!B!ĭ@BaW`` ׮ xxxGIIIgN<̮CFto1xev'gtlԒ=+؏q[]nG_jiqli;ڱ{ ؕpJi hݍ'oiٹRX}gi$~бQbr hf}?L;stmC&mmpN<~g*7`yd _P=ʧܠG-?JynYXNIQjnhZݯi qsKc%W $oSe{!j !pYyy9/&&&;SV FRUyN?owo^/ʲ]߳|ݶ'{R(_tBD/|{sx{xzx{z'0:Hcd;ys<1ة:W%tdo~;|X|j4۠J( W-.^s(YV<\ak5R-U1w7SN;V}.\ z2|<-|9ǞԾ~_C"fggw8_T* Telo+}gڟ3B!k%=[G&$;4 Hr>ketjǨQP(-{7ƍ̙3Yx1ѤGЦM̚5kPtR b5YfxbIKKc̟?777>3.]ʞ={˰aøxm.^ȤIؿ?eڶmK||<ƍ###NG=epz0n8h4rJ7on,?SSOrwhѢJi$'#;,]?b5]G_ C[G3t*;ܬv~){~ƝgX4c1)&yE~M= u +4-~_,K h|)ӗ0/9y{yZp4۪7FBZv~ v]Tjb>e:47F @R_RD7K `1y^?tg݁- z!U*Rs3|Ss3V"ˑok[W[~G/ޗ:M,[l4pnU?[;Vζ+j !BGD݁ˋ$_ ?oH(шϒtt+o/ кPV](&Ғ{uϹDNy7wǹ](B^Er3/йo྇woN0w~pV]PRCqA&[&RbuNUGf !+11%K_kDDD0sL U|Xot҅ѣG֔%;c̯#BY9}qj۝۾߶@{֩YyL__ KyS3w's֨WվAsT@4ow|r(-:GXts؞IOau(]C{եV=!N_f++>ḃP+SR˖_&ok;ŖՄS-YTC9pW/,/#ߙLOѥ:^|l.G7jRqpԿ<7qsmLO="[獨#Δߕ?gڿ3%Uޛ^!BQd®^s 8b1;~67jԈE1gLB׮]ywի9NtՕj5#--RZCq) DPPeeeL6{dڵkg lKJJ VMMMϏ{6h^Ovv둜Zfĉvޝ|,W5~uGrg8o7|Dzyem p>L<<ޟ2* ?}&yY)4錗Fc9"u zLRFIQnxβW<<5xi`,7PRkVܺv(/JֵXoU/nTB^~ۻSі[<JU*iz+Bm.MQ\U.dl]e4ߕ¼ZI^b4L%&u]~GۿB![Z(UlGWUN9QmZ=yG(G(ѓ*>QO<)ZόVX_wYYdIs_|Q0k޼_L&N:?P)$C)**2ǟ4i2zh|w,X@iӦpBuJLLT%++RX||(yyWNٹsF#11QQT_::cǎbeʗ6[4%6x EWz %W>0d2)^Ƿ+3jlq|xdr/D8y$N%BZE(}[vFXAklG뭡~X]mQ*1}E_*u =a[wj !BGȀXwdtH. yb`[jѢ͛7w@ڌERRR%KYYy;*yg~嗘L&[)-Zh$++ xV\Y)ԩSy饗x"cƌqXMҽ{wyiӼysZn믿@YYoFBV;M6G<쳔Ulc2صkͲ8I||:]H:ȡ_+_, 2%A[F\'>~!eؿh@H$'T,]n(u氠("[.o;G˨R֗ɸ3OE1հ:bAa1io;*V3mJJ"(,h^u9O=GH;2{6}aN[Q~pfYw~%K@ !B!B!Jz˶~ u)]ZXx1:1cpqϝ;}GHHgY!{wӡC4 ,^??Ji2uTڵkGÆ QxB˽LnjìYx,fER_3~x"""$++-[Ю];-[Ƙ1cXr%mۖ :u=T*˗/gҤIDDDڵ+?ղ8'|{VJ'35ЩO¼t6~^<4wt|O:ym?o҅ciu1_( -:Cƥ+a5;BiI>#'a)cnYT"V}(k؎lhksxxj;u߼ə;(//#4!G=}TƯ_#/ѤM?r3IE/_~߀PJrS9>؏5KޝJ}nv9BT}Ҳ~O'ۼUdE+%!B!Bq^{uC+rgiԴűhվ廙z=Ν#::FRRR(//AxzzZho͠AHNN~_,##BbccQ+/Ctt4qqqn]999РA,.^F3ףuxeƯ d d?([#gfotBtEݣV˨-àe%[h=>NL.XOo8{u/.>ryfZEU/jOdB!B!B!n-^^^lҩnnn4jnFtzDDDX ۰a_~%/@hh(Vbbbl@nUWJ& 8tnֳ' #)_0W9/wO[ sث_`=l O/* V5]2,B!B!B?aÆ]c\ƍ/Zt{z_Wy+}4h ߀3).f>FZ4'B!j( ͚5ҥKD˖-qw DFTy !B!B_-[hΜ9c52ϡ:/ Gf~i5L$`!vݻF{o7X9|07ofڴix{{H5g3.Cܹx{zCNQB/#B!B!čp#&E?fAƤ7BaWVh׮IIIlڴRQTL&{=9B=j>~xϟ}݇'+WdlذEQ7n7fȑ.M!nWB.B!B!2[ qfQ֊ݬݝrZ_y+2k>c8ŋO8{,| !LB!B!BQ%8/eρ4o3f |6ㄆ3c vɋ/ vBG K@ !B!B!J&[+v01 GҭyTWռ<<[7j(N&MB `!B!B!BTIQ _Ikz6k(4mvҴiSK:b4ܹK ! !B!B!J??QZv,~~$-bBYH??\筷';;jMXjs'55<v'K@ !pHՒ(4kf ^ DFTse\腊=iF֧Ut,j^AfаE؝xףsl*:CqgOIof琚IF-1[Ŝt ivVbyn4_kl]M_.$QXZ ѨN=5Nɦݢl >^ лJw[[*j~&}q1w|ٴ=>~T=7}Zv!eJ= -5oOb#c xJb؈;pw}KWSofA.wHFA6`]JjMqw74_[ϧB!R),-wc @b-cBr9}ҫW/|Ja<䓼{<ݻ)S߻BnOBܲ믿Ί+Xfx|_fp6"qN]PFjn33řNeOґZ*u{0yVÎ&0򾘓c[HdޖK yeGv9~ܹ|q)Z})O`ޖUNr&jsw}o>0'M:ޖUoJmcΏKxϰ~?d;[rc:[[*j~Z}&L 1y޿8c{yW\hK yf;o=q3o|;OZΪ}?s&=|\/G;zj}wB τwsbN[o3gΤe˖<|x2Je?:W3ٌ;y%h<КP%w77<=+)$ǿR\ߕJ6K+l=B!W՜zۓHT?z5gcԈ. :t#Xr؏?Ț5k8qXXX| SLaDFFXv"B! ! ""Zs`^H'byP{Jt ehwSc~][k\ET8 '9+gKY])|0 G1S_s)Y[J/\v%LN\00}jhQI|{-|+>0*RNw@Rѣi;~=*o߾ұ+Vb CRXXX)ރ>ȃ>rv&B!\RTTįʴi8~xO+0 ؇u2{tCGyڱ/]uL./^H,o~;Eיg9 ax/1M{j),zM*ʖeJAXx;fڂP:L9?Ӵn^ P1(a>Q3Ϩ^{h:mbY4lq:h2|핿klcޞ^7NFuacwa}̓OF E1M 5N_`_gӋg|slwVJ,ض'Lh J=B k6WGMW|j-.dT, l>Lf%o2sl\Jby^*mt}]ؙxg:Wַ*:g :-S7mW©ׅo?V\ߕCQf_.w2.Wiנ9QtQ!Bá?wkQϼsnfB\&B!\as3d\5k|Yu  Л}~~c ͱ3 'g]BXk=zlyY5Po_lY͡7Wl: %4k3t %;ShVQ!u,Ұ^7u74N`Swt}Ϳ&i˼Di/oҜ6mgOQ ЖxIU:=3@v˪?9={Ծ#pwmmj^їnt|ͭr_Ǹ9ѹqj{[ku9_u:{^ۇ`P ?P@̫hxyxGfa.2.hmΣ&҇ !BXO-zJ"z2,Ο?OJJ ={ܹsdggSVVƹshРnnn+{ hݼ3x{x2PJV}COYJM_[~yƙrm}<1\p5/c9r<ضbf}ܶJqGv7cWSVӭ :[^C)*-+?HW]mzp<(d/*ׯ^5>vt}Ϳ&"'wNzi0(7\W{ƶfP^i|@nIA-0؈5^MjkW>{}QTh\ exZ;j~>DӴnt=kcUp6}W/)DQ7/ouWVQ_Jh2G~IŲWg[]-KM<;zB!Bq{`!f0 dӦMSRR¦Mxke+3\π6Yw` PA(7ŽR@Q|7JMB9oF:a5nzxzs)+Lהh2Q3O@xn{tk҆~: ގoYG恉󙩀e[tW(ΪN-yY X봄Y3-̅])%kk3x{z0'RNӣiJB"PTfDjnF=pbJZΦ#oMO*"B//)$׿v>A EA輒"W-}[8;bt*B!b}*BqhDa0T,4k֌iӦGpp0ӦM27IQXGz2=;㈍*fvjԒb2UW/؅$Dfa.%E#_|خ'˳\ kFvqמ4 bw!b'h\j?Wfn9_RZ2emŇkT\aYg^bwJBxҭ0N\< @NQ>qgOT eMF~:M,])]m$gVj28ulҰ~]Ϳ&l5dwP18աQKVیIQ+.d}5:8UZXd2QVnX9*9eVl߶~pwC|h;PѶ~9]NFQ' Ql<+YiH9]m~l툽گB!B[BaWbb"K,1~׈`L4v- @[DL!s =[`{3#(zc+Q3G5b@<Ls4# p\W9tت? < a~>%2(|:6lIf &=r~?˴!"0 \άas)irC</%۹x5_ ¢(BTp8c{ u|SFNq>ť%j~>Fxwh֞GT\P <ZF:]{]mU:#0 _Kai1#:051^X_F3]^-:2K?hrgSX}3>ʨnwWkt-)wU{j#| %R:5j.wߕCRye~8w77oZm~Lit\a !B!u~ۻSԣ.B*8WZdqbR:^ޚ,Vu7%Rr #jbB"Ctrxms5m%DWy9N~5^Jou)MGү r~b7PFVA.BV[_2RHz~6?7]BwLOFAAhy[+뭐2s)VMa4I"20 wߵog!B>_:O[?@kXB!n#G`!B!B! !pUd`!B!B!B!`!B!B!B!`!B!B!B!`!B!B!Ba͚5۷WSN3ܰ29s|駟.{\Rs) !IB8j9}4IIIRRRQt49|HL;_B!B!&V`9< ;waegƍ:tw)OATtfp.)5!ݻF;/Em" |ETZ)ٌFw$ޞ^zSGJqE_>>qAhӦ /͚5{4lؐǏ3i$ڶmKtt43f0ǟ5kQQQ 4:u0qDƫ=z믿H_yٓpmf_lC 1:ϙ3>}XQ\\ @||<:tm۶ԯ_!Ce qqq4jԈ~ѬY3֮]ks˖-yvy39}jV|2,9X3o_=G'fOG6BdX!]ڍc4dr{gc0wٕ݉pGM}V~1VB!B!L/oČ݈oš. yf:Dzz:wu [nr1>s8q"$$$pE=ŋ-һsr~GOjj*3f̰w׮]tzر߿b{E1rH0L;ᅮNyy93gδZEQxǙ0a.\ >>͛7W]t_m/ɜq6M&Vm& Iy`&]]BO>BᲟߖ q ؇=aJ8T)$.3c+!B!BW-c:"2?thŲ=?ԩS `ذaT*&O @ӦMi֬k׎#GtR/_NDDi `ќoMnfX { !pСC EVҥKe˖e5FD3CoJ-óV!B!BgB3D[ cxӾ28 ]i_oooZ-Cq) DPPeeeM?<<FCYYFwwwT*Y/̘1?lذ@@jj*~~~k ΦN:iX+d2R֣4>{xb2cRLUnvBndX!KeE4oޜZT,G=uwQTyX9ycyyZ)B!B!^ H/,6H/,WV3gΰ~zrss(GJJ yyyN3&&tcÇglݺE1alTTŋȾnݺQZZF*f_/##jףjxYqB!jԍoqDy0z_{;.B!B!\Xy$bbwxc +Zh4@||<+Wt)>}gqÃq;yf&L`k޼9[_zQFY$w}Vˇ~X)^\\}BAZ]B^VJ'ydX!]FN`@ӡ끊NΝ;g^… $$$дiӛVތ~=ǃ=ݴ2!B!BѩUj0'|<|VOz';=Zucbcc:u*ڵm۶|8˖-K.4iq͍>(***{Y>}:͛7Etũl{=gǏ_q8;s^"rᙶ[eoܿA"""WӺ]{kYJs>$$$8]`ܸqcXnݺѳgOs(6^)bq)"""""""r!*=&io"Mڡ{CMU!Vڅ*-##<5kV#; 0Aq}Uj\JJ u)w?:A8},\yUj~[tT,66)S /@nn.V/pgbɯkxZ""""""""W('""7uTf͚Uq9֭ۛ[oꫯVz~@`777BBBj; <<9j" կTDDDDDDDDv^Eҧ\6|WI\j.DDDDDDDDDDDDDDTL,"""""""""""""r""r^=zN-ʴH~~>lwwZTDDDDDDDDDDʦThݺu,Z:u srr9s&DEEsNBBB_~-e,""""""""""rRXDD*Kvطo?C%KШQ#1L3EDBAAAfHLL+EEE^DDDDDDDDDDDlZ,""Ubal۶ B:t(G+EDJKKggjrZLDDDDDDDDDDʤT7]wnnnxxxЮ];k93+ ""Rexzzb2j;A`9Պlvll6)..͍-[i&v;V;wҨQLYDDDDDDDDDDQ m޽$$$8>O4ƍ@߾}㏙2e 6pw^[銈\T 2eʔs3n8" f'"""""""""""gSXDDjDhhhm """"""""""r"""""""""""""" EDDDDDDDDDDDDD.*\&TKy:kEm!""""""""""rQDDWXXѣGhqGF5])F,b`r39WNRh`js 7=KDۆUWRP7C M ^Z)^pa?p5;ԧ[?[R,""Zn-N:xxx0aGlf͚5NӉVtމ{kwȕB`Pll,ڵc߾}Nm ɤ |}Y2#n}k8 F`'8L|Q{{:K/Re6>g[c-N;\ExbxdTҶdr7q8})Kbo)3>}a ])45'QX0`94lim`.I0zT.h`yl5xk3(&BHE@~ c';甼 PT*Wݺ` k?+AASڹeǭǦMС?.9uDq7PWD^툼16kF h{oWnyƒYmeb,6r M ׌NAfg.so{mW5fGiJmlڰY~nfa=> }np>b.勁Sa;}$.o[8?6n0e"m[,Mc5RlRvx~ ~W>ͫ?s`#?GNcݲ*g,?^ c`ܩ,""5ĉ>|W9FH`XwQU4A1ze(mwi϶\-ԩ8u=ot7/儣mGQѼg?h?}.n~yhs>ۑ5{Xp/:˭owjO^Ù4d0uG&wRo>oKWc5u'LAǿ΍`@L3MO1 w2>h tq1h^(U߻1>ƽR}|W_!4tRh h7nYfW9C /% 5/'mAl+6;Ƹ;&7_NJ] /5 `?Bd9IS{:v}Ω-7%7xo?ˆw&8s>pEN7 9k7Ј;{<&4Ni^W|=XDDDDDDDDDJ""RmV[ҡCj:y P6yY4Du)-$?=}=K VMn&̹Nm FQiߋ?q+O{ޥ7,=# + ?{?l\y l|J٫3sWwV~ꜳVӞ. "$(Ğ=&x)*axr`Bl-"""""""""": YVJKK)--l6c2>pbb"V6mTkk87 ohE[1%Ŏ-߁#.ҷ拁Svto3r1g?8kՓ瑛o@ԁ BN`eyoLy4 gltvmE@Tg"EA qr1.'_o3^ۏ9@6b/(c瓙Pվ1Ƙo?2Iu-\ͥ6~>< ?,۷O3&Zl/gEDDDDDDDDD$*HKBBI`ܸqk6m⪫ӳZss\tm7 sFٜwws}#]*wߗ#'2]?7w7M'. ec&Qt~1E\uR<+&|Qr$haܧY0L&-Uг0j~zQQj 'uݍBt[@籓a߇1w7fm4 Ӎé\.{V^Þ+i˺Un~ /8Uh^Q7^DDDDDDDDDDƭeB`˄ ""rI=o[g4DDDDDDDDDDD.*\ 9z(v-ZiOMMȑ#xzzҪU+=])F,b`r39WNRh`juޅrS%m*+)(!o&^ /_^M@ ԧ[?ۅWDDDDDDDDDR""RuK/_0w2k֬?$''믓^9OH睸8+o.&KN_kjSɄ;[S g+oVRkϋ+WzJhHOX+ ""RX{9Wn5kӧvC qlܸZsZ O_oki3L;>z~wqrSXmZ>Ai*fwr5ʿ 8Eq^Q9fYiÉt8f#)1h^RlĨ(/~iIb\T(((v_/)`;daX)N&e˒q[Y0r?1f EۓX@~z-Y,9kwOw8R/z#ۦ ? KY:cl=~K3o4Jэ8*}eo?ˡasHKGns!ܠ톀 HJQջrG#c஑]aalf=mѧ3C9 -š9"!abBMtAS_%O+EDZ ڵkYr%-ԩSxUD\ ѬW;"nb=hxC+ە[| lV[6#'k5#S^[DU0n8sN9ڬ6l۱Ym#?=O~7}{;KbT3r8qNI,|p[ʏ L}H~<K a5xmbTxq4ii)LnGG`ax3ؼyÌ1 6r)6z.9X w՞mǏ c`iTd"33|f]4._^xb텝!=4s=`:zypMǟ]+*}|X <1B;\T*+--eΜ9 2V| }C=貾y)Yyy>i[bXiܙ08ށu0p2sR /@AfC;]_cWrSk]a| Nm@^6=0qVHV+`61Lx{{7;wobTk87 ohE[1%Ŏ-߁#.ҷ拁Svto3r1g?8kՓ瑛o@ @`P2݆][٫mEPzw2 .}GnnیW1#c[[A7ãOfBݳVFc(2'%ֵpm7Rsٹ?[fЭLx_4G!uxyL""""""""""EDB{%!!yҤIDDD0n8<<<ߍ~i5ty|_r`˻vt6iS.X+ŏ}@xGa,~tqFץK9 G˵{z߇!sfwW[:#><,]^`pÉcp]wz6!пydMØ;Y op;Z#nnwB6FUlYp*Þˍi˺UnpU'X1RoB0#-G xs.cu{_z |~[{5}6 EDDDDDDDDDZ,""U @AAکSͭxGyE4ՎȫcXfD v7/<V&b# p͈dy2Gi?qUcz&œSP&ιج6l۱Ym#?=O~7}{;KbT3r8qNI,|p[Ώ Lm2?I}fS*8(jNn?`QH >w߼N==Z`6Xr_vV`|ج.>`_f%@h0(nOvV.ȥN+EDiܸ1K,o&--Ç\x!M#ϓŁJW:4ٖ=۲y2N+b:7NHWrѶ#a,Q!U\كBq}4/ :`Vɫws86; A0ڥƒ@?>>ph/!Yΰw; N+we0\xؙշoT<*{@U`\?x; >'BH=x;O˝ ""R-C~/$<qs7;+ ""R- 4yΜ9ԯ_NH'(&F^j1]ZDu؞L~z W,LXK-s | /:_ĭ8a`c+ҥ7,=k~0Vվ2zw<v3Fw H 1u}G[㓙{*8oyzVA˹{f**[@ """"""""""W ,""ZfJKlLa4;;ڵD:vXr<&7G1տ%l3QwKs߀n狁S>N]Ȋg8׺'c;KV6%;)3?:7wVt'N " *W8wG7%5,17wwso vCƶ)IF-Pl675VX)Fl&}ʅ.όXQ>-xy X~>`LCq3o+"""""""""W ݻI&qؾ};+V\'%?#&8tœQ6gƜ|_Hw{[]ƷFIw_,~fpg<ݟ~Ggi\`t]zϊ pԻ\;7ɫw;ڽ}2ixD~+-VzFMk,Jo?p\sצ#:6> 6}yQ?uEЧ1>aO`  ]ˏr+7'*Ĵe*{N7v""R [ׯu;{hֲr}H>NuQ-*6Xa1I@+7~F.6gsNŧń3F\NU7w 甓e67VWƝWÃ@^Ɩc.c)F|w>\E7&it=^աKH-Z,""@@ϰxxReTGUO_}ƒ 8NU Z>Фfv `1]o1ڡ"""Rc|3ieB`˄ o>[.Zݩω'ػw/>>>F IDAT]Kي\ٴXDD*+ .])))q'''osQ6m;."""""""""""VHFMDDlݺN:?ұcG j7`tҥ6"iTt X,>|6mN˖-9p@*""""""""""rSXDD\Aff&qqqa b׮]dddDvvv-g*""""""""""reRXDD\R\\̜9sѣcUb/fӦMxzzbZk3U+X,̞=z c?."""""""""""VHl6sח:d2qqj)[+ ""rNvsRXXNII fRL&mڴal6rrrسg`Sii)[n`Ɏ;v䮻[o??%%%oߞZWDDDDDDDDDDJSL?cƌ!++ /Pv"""""""""""G*HL&իWi\teB`˄ """""""""""""" ED䲴{ZQkϛ7YfriW.nz>DDDDDDDD"*Hؾ};Wf׮]X2} ٿ? ˷rW,n}J ,gU~Lĉ5jT… ׯs)Yd </^~e֬Y_~cǎ>~x~JƌÀx+gr7'i=z4'yUFm=өS':uD׮]+=B:u*?Sm!"""""""DD+AXX6mbŊ<#xyyn:-ZD:u`„ Nv;R38EA?_#Ci um3|TVja⭘s 4Gii)/&''L۵^Kz\L&رCOhKɓ';[n_v{ǓO>;vЮ];-[AAAѼysV^M;>HÆ ?~/ ))>})'Yi7vyϥw|(rJJȭEEEdddT磏>:JJFjj*iiilrvRRR*d*j(--^^5СCpwh"T)NJJ %%%嶝^u Eʪwٲe}$%%h" ѣGUB}Giݺu8yds~3VcڵkG\oժ5p?|vRSS|7&::JsVRRÇή_E?,Kt?X}ʬӧ׿꫉%,,3LNN&..Ν;s7Mnn٣&].Ǵi 妛n"::0zM͹(**rS˖-еkWy饗sgU_DDDDDDDR,##L\790[e^#7ym[Ɓ(:эhص>~XKj9I5ûzV<{ ޮa:>Oa铟MGt|I|}}ˆ {y7 /'|R~'~}QGnjqFڵkSO=EFFcƌq9GOOO^{5V\IBBGgu(|dddrJڷoOV\?m4^{5*f͚ĉ:WZy=ɓ7n`7o:bpڴiÈ#d̙.i;v젨Q;w.#F`ر1;_ٷoGj+8}gpBRSS a޼yo>b5kFZZG^zL4)?ό38u{!44zkCիgK //Dzz:7pCCZZ۽{7|Ssv;Ç硇"99jժrWwؑիWX"""""""","".)..fΜ9iUpEr h3ϔ]!ٔށuhڳ-IvP|wbi}3u~f^tek|ABB , **>|Fy׉tNʁIIIqrtb~yBBBxi׮Ç/BvСU#F7`ʔ)*233Yt)ׯѣvnrS5==}*Q3fn|W/&&'O[߳mQӿp}dd㳗}ϰAv/^Lst1c)))Z6]Gtt4Gp'vEL{=V^ͫ?OLLLq3g˟ktG[D.+W:ޟ6l=mڴjyf3ѠXo& ĉ'~c…{.SNL:7x7~zzY͛ktjq5\CHH >>bYbEDDDDD. Z,""TZZ֭[9|0'OfҤIL4 :ݻI&ߒǤI1c`~Nmh]lݱ4GNmι0@N _/"ŏbׁS9;FwTQ=FץKgw );yy饗8y$F"..Ω=;;ѣG3zh*ӟw @RRmۖxVZꫯfÆ L:2+2d=/"{;d4mڔ۷QQQիRyGٴi?&LpJO?46G}峤+]?www ‚ ʍq>jժ͚5#&&b3[?|74i҄}ժٽ{7nݚ/_"""ꪫhذ!;v_~5#\X}ҹsgZjEDDӼyw6l>>>.ŮW?<;wm۶Wg7{l֮]K`` ݻw? \Xn:l|ᇘfv;ŵeôe*{N xKiݮӵCЬelx=B+6U)L}w`)w.oN:ELL ,;v ???mHsrr n_~4iҤ҅tv;b`ٲeݛgy)STf3ԯ_J埑b!::JsOM/ζgnVك|~]qIiذaBRRRh޼9999?~ SyyyuiİvZ5jT9ٵk ,૯b۶me߿֭[H-*wc.؟Ç =VUEVV͛7ͭjjZqww[n1ѶfFgê,?("&kX:9Ke?5s,l}l\jj*\1f,\1S4ÅPԉ(juy]|<_IHH"RsgΜaÆ L4 j套^⦛n=7|s](""REsP"چ];Dj+-$&""r}iP>~a6Oޛk 6aÆqvvm۶5lC?{O>p?b0X,㏶waܸqBwuƵnˍ 1^oM61`rXVRSSve˖9z*..&-- ;JJJ v9\޴iS\\Oڤ#EDR4hЀw^*^zUsm%%%| 6/ul{c^/1smǙiȿa)_|^wmk?̋fV t [^^n>ߟ;3͛3k֬je@~hӦ ǏЮMy׸[С:t={L޽ر#}7(3ɓYpa-"""""""""[8,##L:9 IDATvhwȑ#lڴEGn(ڱo> ܹ6nđ nO}J&xo\}jmmzEBB 77///˴{mGUߩS߿BCCݻwlذZ[DDDDDDDDDB;EDJ%%%,Y`"""۵k-[AcC뉥̉-9@=I9͍08i@|L*eiӷUj痒B-}QFIBBYYYFL&Svmڴ)sٳvF;O>} EDDDDDDDDD~;XDD*eXӓҶf͚u29<&.ݔј}jG&>x7Zdç>U }\m мysN:Uޝ;wڵ ӓ͛7Wk] V?pk֬qfgܹX,v;mۧO&NXDDDDDDDDDDD;ED&xfΜi޽{wFAII +W$//zQPP@Ν~WW) gggFիѣGx}=9]Õ_<;.ݐn.u".}>MnF2a="իW;>x`^|EzeۍSOqm??ƍŋ5kO=TҨQ#^~ezE-Z 2~dz!ҥ +w#G """"""""""=ov /HDD];biߩݵIQj%\hذ!nnnk,*ooo&LPשH5hTjőAtt4ͣLgϲh"V^]Y־f=Z5XSY]=EEEƺNCDDDDDDDDDDI;EDR`23g񄇇XVVZE.]8qDeZtjmOA`CZLfJ&fK7={ Cǩ?W}N|,""TpuuA۵ yW;Z2nm1fM-1phn^wM_bۼ2swDFFJΝԩʊ+lGŔ)S.,] y饗*l7yd.\Xk󊈈HPXDDAff&;v]%66uY ӛM~Xk)}*[2n3sQ nb{iݯ]`{6󗔔w^ fv˗/'..ڡ#ED!F("""vY OO:̮|O7b=s6^78$l!tKȑJ=øqlymq1[[n+ > mV'ka""""""""""R9*d]?~8NϏcǎEqq1ǎtݻ'ݿ<>>>0v~T.ڷo_s;O>""""""""""R1EDR(<==b& qF6ngƍL:ʸ X+2c=Fٽ{7""""""""""r}""R!ʕ+)((`ر쌫+ڵ]v;wcԩue)7_(`{on8^k/2J .d׮]dzo> ­J6mӇ֭[V+=Ԍ ""R!D||<3gδ]޽;#F y|ڥǷip8Zq8)vJ8!!{oӫW/Mƃ>H\\G!$$sу3gpYt)r vj:\Y=ov SyH5}v׎'':CqWiՈFLILLۡ}/""""""[e,*vi6$:tV[iu$1a,""rIJJbڴiW8{"""7C2tv|9A,"""""""""""""rPXDDDDDDDDDDDDDwH 9|0999-~)opS`r=0!uJNge08y;oƴ:%;[n]v4mڴSu,""={6qqqdddͼy(..ŷlپ}jÌog7%3~oZI=esj}f{={Tul}{W79} BD2/L]slͯ<111ŅkꫯӦMjCII ~~~矚v1ٌhtgR~}ׯ_aJJJ e6EDR4hЀ|6f/퍓}ј}03[;ĿmS͊j)b C =$ =O]KH8:4u4c?%m1"'# Mf]Q~CWdOp2(,6Ӈ+iـ7qd4y 3F?#}/ lϹ܋ އ/Q"u.U?|˱T,V ,#y ?;mKLtiƫ^i :An/;s?ͧ? g\]?<'秢w4L6m۶1`ڶm7|?ۗ4<^ח3}tׯEEEdggT3f#<@VV 6ŋOxx8VرcX, @|߿K/XяǶbiIYŏJH,@ @.H8O)8wfE1_1ȍ%/j@̏sdjR?H^Q!oZb[b1cZ2[[כCiЗucmזmd('ϤgкIiDFy0cB[ʲl}cG?ZC#3u,ۅFN, zSqw"efK%3{'qI4_T%e|xIYV*3m4V\ɤIx'4iVߵkW7n̺ul-[ȑ#m2i$ZnMZZ4jԈ3ft4 [nѣNGС))),^RRRHIINJxx88q;w`7Yh'77$[˗𺈈ȍE;ED!F("""v4HJJbҥ4i҄aǘ]}ڍ_e;ڷUczqy9@A] Gc4%c׿Pݪ6Z^GpuPI~H>HכCi$Z{t"a:Jna>^4ֿ:U_ח_~N:m3(FAaa!Wf۶ҿDGGcRSSK87|?V;}q(b6oܹsaܸq_'x®>@ppp6l@@@"""""""""7EDJ%%%,Y`"""b,2Ç'ľo}_&_K\"]@.䤜ç~omy \e?s!?g/PGd{]x{h2a2iצi٢%|o }ɸx7RȬ[Xƽq7 ͥG`0s7}r.-hZ3Ǐʴl}./Cvsqh*v#sIMl_E|}KQF`;yر̚5\6nHpp0]O>]:O/Qйsgjٳ/~&002mzgEg1,""X,DEE}We?̃)w;Ҏ`Z8 dl*-xe{5##$yiħL+Sߋ|^ Xg}8qbȍADDB&xfΜi޽{wFd_%%%c4뮻СC]|`{mEF͹Avm͡ջ9wh7+m:۟'ɺ82^O&T{Վy89;q׌km}OYC8s>ú3qt`T~f}G/|^'")V(msCZnğoih~T6^v4w?olۑVb &*M=|HSC3;[o^|`O_Y9yjnkױ7e:5S]GDX"=Tǟ3K~mcTUs9y~Z+%FggϞj. &ФI5jDZZO=/ҫW/5kFVVO=vmݻ3j(13gG`޼y{エlْl"""=9r.\ {f\OwҾSWkǓhR«Gblj k,*GL/*䦀&e8A8_#/r6<74խLl4k;Ƣj9z(VmCONCn\Gh\'')e =hyoXuÕF!ͮޞ_^ /AH5~U|{S߫¸3mxMkZP\GJ||G7M65E?IIIL6M_0EDDG c![25Mpߟ\CeСu!EDD䪙OԸ0TtׅDDDDDDDDDPXDDD .|F?::kO_$~NCDDDDDDDDDDZXDD*UXXÇϏЗ IDATPSh4H^^is=x禎nJvhڴf˭Hxx8...l߾V""RٳgGFF̛7b[ŋƒ~ٳg0ѬGkZ١F}oˡ{w߾}ĐJAAGQY|93gdǎ6\;XDD*I`` &9soIaZhѣ1 ujөU>} qvqj1)1-՞7&&vZ^}UyڴiŤѰaC|}}b-[yA`ԥ/+ 4 ??Bbb"=xzzIe#IlWKn] hߜ# iҌCkvZ6ڴil۶жm[o߾뉌899w}@Ν;Y`ի:L<={2eʔj#EDadffұcG)))!!!{~EQPPPǙ\ؘ xo"7R^KSYLIwbhvޛH~HޛH)}ڴi\I&O2i$VZU:^׮4nܘu/[#GT#qaRSS駟㏫˗/'..DDDDDDDDDDni8h4EDDݑ.\`X,/^̖-[ȑ#IHH ++ h+WG>} J;EDR(<==b^^^b0(kt=Vk +z֭ԩV⫯bرXff& .$==ݶKepuujbZhTjrJ >|8v:99®]ZfO-8)7vC<9ǫHްV;v, .dÆ 3v`0`0*=YL <==ټyssӇ'j""""""""""riTd2̙3m׻wΈ#uTCXwjwxrC:Tkwzӹspss˫ZE~-Ν;G6mpraɴoߞDڵkw2 DZ v<9VZ""rILأ""R;:z$%%1m4EDDDDDDDDDC*C2tкNCDDDDDDDDDDjB5I` """"""""""""""7XDD*UXXÇϏP0L$''S~}ZhqSSXDD*5{l `׮]l޼{ 777ؾ}]t:tHPXDD*I`` PwΜ9SL-))a̙t֭M;EDR4hЀr߿OOOZly_RXDDAff&;v,7k.u`ʙ,""2DEEa+,N8A׮] ;ED%%%,Y`"""m?кuk|||rv"""""""""""r ""R)BTTw}1ӭ[ ""R!ʕ+)((`STTdkl[nLEDDDDDDDDDDkd">>3gڮwޝ#F>ߵk;w("""""""""""?SXDD*[oUeI&]lDDDDDDDDDDD*:ZDDDDDDDDDDDDD B` """"""""""""""7EDDDDDDDDDDDDDn*H IHH 66b6˴III!..]v[YJ]5k۷o4DuluFNgeĢ9 {iY)c֭9sܹsoj4O?ͷ~{:}4O<Æ wީvJ:wC aȐ!|geɼ{|.^ȱcxwHOOÌ}k׮[nue駟Xp!={vog7]W;øqpss+JN9r.`X-&3%FVs0V;?̅c ʮsu/j%9gb?łTa'Φq!/yNgeP`,*+* 3B%9$Jʹsj}f'{eS?;)0n4s:+F__z}Hf]C;;Zy~*[G+OLL #Gѣ]xT6~;6mq߷oǏ}ՎW*kZIOO'--N:ѴiSk/&Mbɒ%|7F>6hѢDDDDDDDD\:>vuuAvIm;;;r}xo߾뉌899Ƅ ؿ?N:b |||l|kryyONbb"l1c<Pz<@VV 6ŋZOxx8VرcX, *TRR_|Q1Gcj\IC$>FSTl$ 1Q{/Yw~N`C^dp>,|zn9xva[bZg![qhæwP\bKV>6^<$|1 g;?]Kk}k~wD.x dp"|&'1`S˶xv>l|n9-_o}/~D')Y9/*dX;Y+:>UYu=9y<1x43>n$Ze8 h>qT7&Xg/0bqsuޟg*3m4mƀh۶-| e >?84/c2[ꫯl_{ԩS3f ?8c F||}a5׵kW7n̺ulז-[ȑ#qr*9i$ZnMZZ4jԈ3fGGG+uV=Jzz:wQzkHIIańBJJ 0uTIMMĉܹ i6Yh'77r0tRw^o> ܹsX3a60xx zLo5MuNt}$M۔?`b1[8<y1 ty$¶[b`)صNޝl$vnɰŏqPt1* r5ľX!fRo`Ҽ%)7x8пK/>?"h~J9]e7baQWrbIKJ H%f3??̑Ix#yEjoŌjl)t9 }Y;vmٶ /N=Πu >&4aƊ{e"~O7qGhggY #㍜X5;`AJק*D |Tf3g϶JJJ쎖6v_f )))_b+:9+.]pq=Jll헉r~/9x AAA`ƿ;m͑%)))Znܘ]}ڍ_o/r$:zorhrkKƮp^Uzhsx5Ugxq($?$͡ny\ʰ l , &qSD,Rb5aeiZܳEs[% \b(h.&0uAu9=g}|.+(S#cdܼTo;'+[QŠϗ_WC Cp""7Z .#aоmQP&.1)]+CbʭsSDӿcfxÊ2  Ip5?WMOƯinOχ&ORzvvvgᨨ(hHȮZ /֪󽑪_CCC\t ѷo_i|coc̘1pssS{\@S`mmu;"""""""zr0LDDjIRl޼NNNa툌`׮]_^>(,Yؿ?S 7oB$a֬Y mO42V v[[[)޽rHT_M r ⦮ſ_`H|%=N@YN1 MeؗpK{exqPNAopS?bcա^5n:ocYY J58KvJ?sNVxʵakE 8JlsսE.(+)VU,,H8::B$)lܨoձQ=??}iCF>̙w"""wŋ/kjt,isWVVt`Zq݅ZDD,L۷cǎU(+--Euu5k]t'u >>>سg~D#r9ۧrŬnܸ=tttLX[[C$Тs:?ɲ={\ '''ܹsuuuJ WB[ǿG:+Kt>B#B}]C«RQWvʉ)M﯊)־2k1iW3U Hd߅ew!g} [sKWT% ($:$ 4f=Ar e*\vCX-;Аm"~MmAUOįk:;'$UXX|G]]wii[,{#UXv-֮]4 $$?;88@,###^^^/ _""""""""&""r9vލJ39 hX)GQQQذa0aẍ N)u9~xl޼.\p^ .]͛ aCbƍB۶mÈ#Ze\8uT?&8{RY[#[߮2uhH^ZkpF{%{%%(?r;.G8j?}"E+F&ULu͢п1褏1mcDЖTů< M濭ߩSZ u۶mT*# ܾ}III/:OFuu53b_D;v,֮]zܸq+"""ˠA0i$ѓ+H:-Z$\GDD0n88vjjj`ii^xBnx뭷O[n e6mK/;;;XYY֭[x7dGXX͛@tEEEx7пDFF@N>fk׮(--Epp0bbb5HƢ_?3t\V?kB/R)=zB٦/?25ǭxc0 {~ ߛ.(*+N@^jּFחG!fd}{}w+ʠ-ѸW7?CGe|ʕ+[nnnpuuŻ+>|8RSSaaa wu~|wڵ+n߾aÆaҥ =***K/a̘1Z#;;={Ժ=9D 9QDDWڟYpU?FL&ݻw/cVQ*))AAA܂T*ڵkH$¹ڸ~:bq>  Eff& ʪK+Qq.:Bh٢m`S_ b[KPr0UVyZlѢtKQ\^NiK7_{07lھ7 *~X%QPz0줯T.ǵ[g^/ Ivahȑ#Azz:tZݹspvvV*͛7ѽ{wV>ߛSVV[nںɭsrr...ppP\.Gvv6bq_pعsg0dee @ IDATpwoyDDDDԾjnsRP<>ϬLxVXDDH?DDtttԞ177yg gDv޽W_}'OTZghf C36wg[IW=N٥M/66ؤ%f}ks}C>?&07Qe=]]twjq.-nn2'N`-N%,-qnUX X0f,[ ?P.УG׮] 6L̞=_""""""8&E~/gd-N~7*z:t iiX“>ٳ;M"`Ϟ=-nzf˟}Y<- L=&cf۾*ttZw𤏏HSLyœ>>"""""""""Mp.""""""""""""""PUUU˗/ +_W^oh:tЧO@WWǏG^^^m郮] ݻUWWcN:{V(..H$%z @mm-1l0._| 4HQ]] r9[[[H$ #>>.] H4 . )) KKKG刋C~~>1|p4|'i6IIIpww+W7oٳ=z4D-kkk@(d!((H9Ȗ/_???w(DDDDDDDDD-05kҥ5NFW^yW^+ 55}􁃃PoԩM >c!-- &&&B;#߇L&Àb ,_\n~3ydcV8_tIi'O_~ gwAxx8駟"!!A +WDtttQVVT*~\.Gnn.,--allPV__f/(( GNN$= HJJȑ#5Qe DDԬ/аmΝq=ݻw!JH$\. Aii)եxIlD"\~0zh#..Ne=X^zڵk* >%%%JzZJJ 4c˖->}BZ999s=Q⹟&s+Wѣ fE"<<<4jZNk6Y6axyyO?oh!!H*bΝ<5JKK%kpqqL&ܺu Ç;~+H$ 2Xd ##6m.]GGG8::ofeeO=0qD*?}􁗗cƌ֭[UiӰaDDDDDDDDD+Hc(,,D^@X5w=hH[K*̙3pttt;v`055֭[  ##'NШ-[?olϞ=pttlD"SgZ'ObĈ¦M{5Y7-- ǎÚ5kT +++XYYH8T CJJ ^~e8pRT駟ƨQP\\޽{^^^={0|?^)3f ׯGII c̙Bzlܸv€pM)lxǎJDDDDDDDDD`4RSS۷#88XXܹsgt xgp-\vMMtt߿?^*a3f q!-jU2d$zիW'|={"..N2Y{vvv 3r"֭CTTBׯǯ|cӦM[aaa\.DZc`dd+ԩVH^VUU)iݸpNk:uRnXRYY CCC͛Fa% cR.ΝKj ! a⤤$hMٳgT޸ErOYYP6k,[Ob6l d2c*ΏD"ATTfϞbhךbbbk.rXt)Я_?7Thccc={~a$$$`„ u>@}}= ?7oKW:cK.y& `Сظqm0bmNjvA0i$%""""""""z&""ꐖk׮aѢEXp!.\p^izz:>C,\?Ǝ+|ҤcΜ9abb@̞=ƍCuu#a׮] lllPUUKj6..< tuukH$BRRRm^|EboH$Өm#Gɓx駛vH7=NҎO_kfe­VJ<娪IV㯢o߆;%2 ׯ_ lll%{ƍzd2ajjfۙΝ;puu}, EffF?pvvRT*ŵk H`aauׯ_X,nQ#G &&M_OOOddd]SS~]n=/3+^~=F2O `""zx055m02`ffaQ311gyb&5W_}U8X[Mnݻ8>=qV^dhH~Ϟ=_""""""""갘&""GO>QYֳgO 0{[b͖?xgQ4DDDDDDDDDcVqkQ0=Yhcr""?@72.!"f"''077Jâ"\t ի )a5#G5%K ((AAAJe?؉ 7w!2~;ʖ/_???7?C}A@@tuuqq5٦O>ڵ޽{aooZuu5ԩѻwo~>bD"XZZW^">>Æ ǠAIzz:ѿ\.G\\`kk+~DzҥKAXX$s$%%aiihqqqχ7.lQy}UiS׿Vo8S$"""""""05kǎ055ř3g3fpu|իܹCW_~;G>v؁+W $$]x"6l؀z.`leڢ'Y/z>Us011nݺݻ u_~ΝS:/… L+V`ժUGll,;HKKD" ୷ϟGxx8\]]%]TTc"((HOO+ܹsQPPWWWܾ}W^ƍ1~xaر8s عs'߯ї?%K?#<<999sssq@޽pyy9F"ؽ{7bcc5X?ƍÍ7pB:u ͏:/_Fpp0\\\-[ 55˖-S;>M_4_OOO{{⣏>ӧۤޟDDDDDDDD10LDDz @H={VX隔BIJJȑ#RsE||<*\,^XѣGcJ1!""ȀBƍၲ2x{{/Ă ' .X111z.4j(+z*\]]ӧzSNmr\ iii011аl~y̟?>d2 +V`BuӜɓ'#887oаҥK uTOWU_O2HRar9rssaii cccz4AALLLgD=rrr HIDDDDDDDDODDjL6\!v5x{{tuuѳgOdggK+-- x@&kHv~Gƈ#н{wL8UUUrR)v܉{N,vZ|7^y b lc+(t p)>ß1b>ciٳ1sLbϞ=0dzcƌ֭[ Dsss4$G`ũ'ѫW/\vMeÇDiprrB޽"\KIIAXX}lٲӧOWHY[[k6''YYYsQF)s?M~W\ѣGVŊD"xxxhԾ-ڶm /[֭[|2rssqEزe i\}sΡ Je'a`bcY+xeRJo OM{e8.`#?>S1I{c={6vލ)S`֬Y2e ٣TV&H0x`a…߅/#h*//'OĈ#0rHƪcǎ5{ill,`eeU Z>|X)||GO||<yyylѽoܸ d~wP񵅇yT_Oϗ)S nBnn.pB~bb",X~ W\A^^BNN999ԩS3f@@@rssq58qׯW7nڵkQ^^L/]M?c=zTi$"""""""6-H-L:HR դR)SNطo HP__ߞ> &`툈@UUbccq!<008/++S8kwؼypr}}=N8.]pM*\700СCqFm۶aĈZ I&i/&H%TݻwcXh>3aBPTVVO?g}___xyyc/22ǏG߾}ѭ[7ẕϟ@Ǐѣ믿Ftt4 Um+Vט5f#o5w\bz>Y7\ت@llBrѮ]`aaؠ K.ըϸ8<3 <"IIIMyahhkto=lll H`bb{OF'O駟nr{EBʦM3g"((]t5}k={VVVpvv1}t5k pvvƌ3QYֳgO 0k&"~Oޞ{DDDDPsg? +a{|}"""""""" `"""GI<~=?QGDDDDDDDDDDDDDDO. "f"''077JJB.ݽ"%"""""""""""&Y;vX,)Μ9d̘1cǎa޽022Νu1LDDz 2 +Vٳg򂏏._gDDDDDDDDDDDyLQA. #,"""""""""""jDDVvv6^k׮~~~5A "O&Rd5 `""RaaaAee%矇iߣ=/_???zDDDDDDDDDc;w8x KTWW K.ѣGDYëSˑ@Gŋذa~&˫QSS=>} -- ~-PQQium9_L&k}q05^H$d2XgϞE@@ &&:,[ iiiBMֹWPN&71hқ047V1\owѪcʔ)ꫯP[[ ///믿ުid2d2Ԡ('`ooggg HJJȑ#>X"""""""""PuK$AOOr\֘N:sθw#5'?pCXo<6 H(}Ozmg)үcRl7cL2EniT*Ν;s)mٲx[>Ç1vX>FӦMÆ Gk/77׿k׮i&((111/|||PZZ (//~G~dd$^~e~z/ƁZ\>flݺUcFRR6nKKK5Y/??ի#/zoz`P; % Kxj0d:yׁ\Y = c+S\ y;wUUUU*FNNƍzƶe7}nn.䄟.\#_Twǎ8zV7>޽{sϡW^z*6oތݻk^*̙3pnݺL;vLMMuV̜9W\둑?\q6럈&""d2 Jadd١555ؾ};V?nN F,]^xms< wpL3cZ=U>]?E@"SN= 2MI$Ϥ522o1cͭɳj`mmpk)11ӦM+&O۷D~all ߿?^*a3f q!bƾ}Cff&-322-5Cnn.rylqMDDDDDDDDDm+H-www!&&8xBT*͛v,\m"̻Z=U*&A=*v6ۭE.C*6 SSS@YYӱrJB,+4h<< (QCst26P+iwpյ*C5]1ٳGFȀŋ&9b\zƍob̘14ij+]_yĉصkVWAaРALcҤI///\KDDDDDDDDbTDnn.wHb?wFee%P[[ m;wX# /DYOު񁱱1Ξ=~Zшnq{Hcbڵ;v,rss믿W;h aӦMttt0j(5 ow}iӦ!11Qsa}(^ԩSd1=Y&""R)vލ y4羮ψ^0F%C@;xiQǾB:^6fEϧ>xxx0qb^m۶'++˚Qʽʮ/**dڵkǢEe_yѭ[7 PKiT*::CAll,-Rs xoz4{7P1ڄroP]f:W'GtRzQӭ[jݿ?'Ngm$~m<&†hXwscm?3Fӱ5DxrL #zv~,t {ꪮÑ=uˣS )**j ::z;vTاW^yۧ{gݻ7[ٓgy\ MhTj <9pu&XURsKKộrX,p|1U\ .Ml?Uq!-&88/1`RRa:/XTbpyf3-[_%%, F-[V'==&MФI T oxt-o!h_H'pv/o^{"H0<}ʞgyc ~=5Uކ_TJQ`_T矿A+W'6mmڴ|:uꄋKm]_hh(Tlڴ8[DDDDDDDT`Rvٽw)Cwm?AAn3Awގx6eicy%Nr28F|4w?$2hپ)G% +'Ѣ/;|\|? 0UdbժU|we̙3|8˗/{ŴiӬKF?Ӹ`joH-!$~z؏Cw~Qx+8:9"P:;vHw;<ZW>R+&lJBnvia}6p>׸rJ̙CNN3f୷ުǃ>Hjj0a8uvvvkydee1d,YkP_7b> SLGINe>{s5#9 vc뱮ٺ=3wqil\b¹lZ7ף]Ƚ5sC3wس=˜ 蟞we_u! ܹsҥ N"))[ZU;vh4|r:thh4ZI&ӧO'YFvڴiDDDӧٹs'K,L&%%%T+~}Y ӯٶRZgn-=V߀9g-01HO)y$9 ~y^Fw/W'?Tl#}jiqh>=?~zˡ{lݺh~Z/"""""""r `lɄkjhN4?Ҳƕ#ƽ2$/"?Y m3KƕY6,-am쌼rspiĨA l[ i4 bdqd[}5h4닣c..?~]vѵkWڷo_=*99Ç@xxuX(]oe޼ꟗǏgÆ ̘1qur.wH W.龲sW7!غ L*]VY8q>B¡WSyV}~}s0.xuh֪lC5mq>3}Q Xv͇$"##9x qqqN|||#f\-Zb)D<7|_#d&"""""""rc `RHHC !**|bcck.˧_6m|>-7ޥ.w8c3X5Ae_xwΣ>1L3==Ӽysvbsy椥EYGÎA6; 0g!\Ȁ//z}M̷ycՆ&S_36Hq{G7 6W_}E`` ]tO:QFqA233)**3._U ݛJ4XDD`0ЪU+233k.bZK鞧RJgf`Wo5Xgr@.\@qqqY^9 IDAT^^,^ŋo> ĠAxǬ}-5k Lt(-  iٲڂM+;;ۺuU{{5tm (t'Yg3t.eOB:X?+p`vpC?EUo{ۂﭮ}tԉ5kqY222Xt)iii߉2ﺣ#ٷCZZ_DDDDDDDF""R|,V4K| |1?Ԥ vѥt9zS`jja=%Fw 7778Pm޽ֱcG<<vy[\\Ro㉈4XDDɓ'9}4>>>tږ֭[6m...x"""""""""""*f1Lc6m֭_~kA_QcܪT*0d'66SNqY|||8y$\r'ORRRR}%''+W\!""""""""""r""RmVZ @qq1 //˗/I&aoo_X4iZ}.""""""""""r[PXDD*OJJ  #""(bsNoδi;++:#""""""""""r;QXDD*d2Xz5yyyOΝӧOǶX,iPXDD*9sͥOOO*}q}9nII )))y,ۉVH[_^y ODDDDDDDDDDVP^xN<ɏ?kc#""""""""""rS""rC;;f"TEh hTFFF< ?gϞ%//朰0 NUDDDDDDDDDD䶧T*::ٿ?111L6 bcczNhh """""""""""@`ӭ\9p>ٳRO{Hk0pppb)))ҥK띞\C3EDJ X,# "Nd*ӧ'>>FV+X%rOM̋\*n:j:|pcTk| O<Æ _l~mm c7yfΝ;W̙3k<%''3c ΂ jސ}Q(3Z5̟? bرe -͙"N%|} u^ӳqlSrGsZ\(4ͤbggG--RL-pssir ΝёVZY6aÆ1}t{ZVPlF#^d9uW\aرମd7oSo?sޯ?yWxϧYf5ĹsһwZ`20wY,Ο?le˖~SN\+44Tvy F?, )))6mPXXX׺r xzz]aʞhe˖88?6mbuUDDDDDDDvՋH-\jXBh||<}|sJl!C[\\1c+aRdpTM#~ lysx,sO^Ǚkk1UBQNɋyav='Sľ#&&iӦ$$$XM&?VX-Z ((Ӻuk8~x>Vb6ok̘1kײfVXA߾}mСCnϥKmzO?O3{lÇ' t}QlR=55_p]w1h  W_{cǎY3gw]nwח}_.^{58p L0>={H^yy71b|rc,/"""""""r3RXDDȦMXl>>>%h1LXgK/8TbPRR®]ԩg̘1z~IIuk[oo[tх d2qY.&MDFF|ACQPP@Νm ٳYz5&MgeҤIYƦ9q)))??n:F#|F-ZTI&Ѿ}{RSSIIIiӦ6l2/^Lnn.ǎot҅SN֭[˝ncƍl޼$ٳg-HY "55=z?ЦOEbOSOq֬YÖ-[W{lݺh hlɄT\\ ŋ3gf˗ː!CȺtgg);mϑ/=\i@8I1(-c1[{tFqGO_9s 96{,vp|C6v+&"~;! 2&&&aÆo,b ֮]kUf"::u֑qqq5Ρ"Uhٲ%>o۷SXXX/ɓ'c,X@ǎ>}zpwwg޼y$&&dk[u겳6okܹx{{3w\:uēO>YfqhҤ PZ_ʟũ 6nȎ;HIIbC?LѣG[v_?ή]ڵ+۷oVx?>&L@xxx59;WwmDFFrAݝ2{gϞKyի8-Zc WȭJ`RHH!!!X,.]Jll,=Υ_;;;ԩuN;\)\|̘9õ_ =Bƃ cJ=5jG'(/ѭ8Wq|E1yd??`6nHv⣗WNFFlْ:y_h-Off&G]xxx`gggݿչ>L=UnjC@@///>۬Y3ΟIP1 <6{o~'U//^Gŋ/.w/pF4uTbcc9r$Y?tzz:~~~cM6-7NE""""""""R=ZZDD`0ЪU+233ҙخ%q7:Nv-˜:;;IshQZҙ쳙x5U;'))Ç3sLfmiZZZsccc`ܸq:tz׺;|r1RYF53gKY : 7k]ǯr…r S7uUxxs6߷hu3 b_cצMŋINNf˖-Y׍f?9{l:::bXj\d`ҥlݺwysX8=HMisΕ+--ZSXDD*OBB?geeggggGؽ{u?Ç@vp_.`OloGvegq6iI랡ZcuU8^of =t+A {>b眔 ;{.ƙ̚5֙v)Ǝbᗿ%1ͳ{uٳg9uW\)#,, (OgժUίW2}t^xѣHJJ>(wiߺչN:f~O?L&}}vV^q5kƀxW hֽ{Zgwۻwoxwcǎ?ȑ#@~;w~m6  Xn]oժ̛7lAySKWnT L&V^/[o_ڷoO>}} K"  6M1wFc:m@] e l 0.Z5&ͽzrvxOPx1U2rSK߼s<}X .Khvw`UHJJbرOj$̙36mwfĉw:p@ynJv8x`Ǝ˘1c駟7ިv~ſ{aΝ̛7___=fiӦѼyޞ*u\=cƌaڵq#_J _igy/m2dȐ X_=z-ZF`` }+h޼9;wutޝCk.,ǐ!Cx饗$44͛[ܽٙ… 5vӦM;w.3v??۷?}Nk׮""""""""XF""7};֩ͱS hߡcqv)oyrss),,Ӻ/eee䄻{r(*,v׺t&g7\Tg /](@ ;w&MTLiaa!gΜe˖xxxشaXhٲ.]ۛ={yfNcoo /]Dzz:[ťF䐚ukFپ};AAAeޓ#GvZVZU,ˆ'$$fVM7a28}4.%^[YYYdee]>KZRR=cҤIL0ڶm68x`3ةctܭH{""rqww[^!!y\&xռps'pqC]gkMUaS ,>Β;mhfnMW>fWu}<޽V`|||msss///5Qn*WWWF;ã>O IDAT~+ŋc4+ ܱcǘ={v?n}}}k 6Fxx8;wȑ# 6̦Ν;⯈H,""rmĈY󝜜3f ks=)H-V.aÆ)*J| :tQϏh>sڶm?PfٳGDDDDDDDvM¶̖sss#::ȍkΜ9\/cHTI|-4~:4]GDDDDDDDDDDDDDH2220EXXuXqq1 eiҤ AAAu{E~mxx"""""""""""EDRxxxaڴiPXXȶmlѱc:͛Ƕmۘ7oaaauKDDDDDDDDDDvTj f3DDDSO=ek2x뭷֭[&$$Cqy,ۅJ]-^RnÇF6mغԴT˺T)11'Ori|||*{nufS4<\\\lf8pk.h9ׇVe,hSsq b,%Z?tsΥweڮ\Bjj*x{{{b!%%lJJJ,nӤI4iRałh׷L***"==w===H} `Ե7z{{{e7Z~n~_נim_~CTcya,xN@@<3X_{58p L01}]|}}۷/)S8~8ZMHHst&L+Wl4h*s;vϏǏW+ӫW/ stԉlrss ?3f O=TXr%xxx~SLaҥWDDDDDDDDD,""UJLLdӦM,[ 僇ۉe;kfx//|&<13l2}62?3rA [3x 0xt{j_nFoooKF#-4TN8AJJ G)nܸ_~͛7DZZ={cǎF/_N0F&Ol=ڴiDDDӧٹs'K,ɱe˖xbrss9vվʘL&ϑ#GHMMl6 ;| 3f ))%K… %61= EGG?[\rNl6S\\dٶ0j0 //''27vp|Cqvwe_q!1%'91}0yiHx;J{5H..u;v,1GJyAUV$''asٙ~l2rrrO8p`sݻ7Okݻ3foSNcȑiӆl @TTTsMLLCuڲ< /O&Mdٌ=~:ǿ{9w/^ޞ+Wr]wqF~ q}""""""""""-;};֩ͱS hߡcqvqV\ ɩLlŋ899^ "++`_d2qi|}}q3gQs #>>_W ODDDDDDn]E5>'xΜ9hՕ''FVxC;r%g|ęsGoll޼Zlة;JEGGs!222eѢEZۿ;V\ŋٻw/W\iČ7]q|[2[6{\}+Oak{v=1s8ֳAcġC!%%|kׯ[n5^TJM>'#0}tL&XƎСC2e 8b1[1fzR\\Kʱ(-q%%\9k|=ɿ̅JX,Ͻ}˅?OU9>Ra{Q3B*&JiSn~剉aԨQ$%%~z{1RRRj;t־@3,"u!/{w'vhcivw 90Խ'Y~KtؗF`6}οm{G{1^sFV+~Ud?v* kKOhKV%t͒l1 gͮ9bf߂OD̦;b*K~q>n$=4 ô'gVymNWHF\K!߰w;QKȩ%3ؑz!^ N>m[yh^^ww.#O0'uΝw^ 3 ޣ/˟~'jݟܼ9+%'?Cָm{nAw?j?ܟǙ;yA:y>Ais_ŭ;:.XgK̘Mfazwn忡+QxrWo_}F8d=Ic?K;ø? `\ XDFNRS|561JfSGg[9/uTRS'H\K^a|l=d.l1یUb.ֵumJ3Oo޳z-0w`elzH@Ez *E# V,(pgAE,?+"A XAzo !˶Iل|^(;~y+Li$ a/t+޷\ڪg)lўw8lصK}|*rS92{ _z%#N7RɜKݎSQ|#_%/T|wݍ<'Nd޼y; &0vXΝ;ŋ]?SXN^?cҴiSHLL$""ɓ'Rz)V\޽{INNKnӦ Gaٴlْ#GpW`ٓD8 9sfp0k,f̘Avv6;vp%͙3uֹ}\DDDDDDDD¢""R!Ӊfnd) ƍdȹ w>*c߲ʹޅ9 R4 :ILNr{n՝i5[mؘ1J`*Y|ٙtV4۠R{cӁ]l9\vx宿tO&b#K컩ծ^>Y0{*GYFg2@,ܸU 5K/E_|)/|'phߨ>^iɭv7odOM曟rJ-]O-ڱ{Ss|U5e+ˤI eҤIqx`e|g9|.\ȪUET.]ILL,^xS r ۷/LJ>}jeL:n&ezz @llm-YH\X hт-Z`&=CM;fωgϞy$؈h:]R45cw <#иENg鐽=(Yݪ)V+~;,EިQhРax`e̘1L2lSÇ0d N=Dq:t@U]?ѧ&99͚5+-w""""""""raRXDDf'--͵- 뮻>^z^hry[oRo2#B.a)}b% 2ƱMI n#,!uq ~ٷ&:ާFmfo2Crh/Q^B95iY%ޞ^9M6 qs= 6|Ϙ~C\Ga&'Q1aQJ=ZneȠP %8B#pt)p?ʊߝS*sw+_EGQӦM/d3T xS.⏉СCmXJ~"#ߡ)))%<=^DDDDDDDDʦ5EDLyyy޽ەHOOg˖-%F8q5%֭[ٶm]vx҇msݡ1#ycW?g|%~PpN=Oo/c K1M9ʦWv"?Y&Q2T?,i%avrC{Yq}d lV:rkY;6pDCw|;%?etQa hߍ?Z?7+/N\Ň+^h hMv]W۟Jʎ=+Υw wؘ1cxwXd Gvmb<8ERgeez5o>CnZ J_~}>Ljjj>>>ߟYfO++mVz""""""""ra)ng޼yG^^:to߾26mbxzzb&_[n-F}v=߸b1ex;u;&Z L!$LggdJXM,Ohbnsxʰ%ڬ .1cU# aRz6EÄ3꺣YX.74iZŵa70,߼Ѐ@.ssӍJzU7oG>x`24uoDD`I'Ryph:C\Ɠ׍Q?, >-Ux|޼qF8w#3/qJt{fHDN 7n+:>gÝ맢.+6j(z!wN&MJ[[.$%%\:t(O>$zr͖ңGW]veԨQnqo2b7nLff& {{سg-[!=Ni׹ĶwдeJVl ۻUzj‚|)y=W_Ui-iwSS OP\NZtӳ3IΠYX,F&y|m Г܂|F@^aӎѬ^,eL}2rIitVW$N !uʾf==|8Ç0DDDDDDDD),"""5fW^>z <=5q6Ub\ί~t͛7l2cϞ=U4sg֦y(,""2M/N:ok׮\u]8~Zzx^DaMԉ $e}v.ϯ}iW0 C;sGĉZӼys;_+HJJo{wcXxgy׈&==C2sL*uNww]z""""""""""R4XDDʕի4hp:uV;Fppp#OG_Q'*g)#xv'҈lKOo/ }COW ~]'Nȼy;v,&L`ر|EuLTT/vOb)'"))]vyw+}̙úu*]ODDDDDDDDDDjFH-ZD?c^4/M6p8j:js\2=?죥'}rߓ̯}O4BD$gg&_ݙV#UI&ʤIo'33ӵ|g9|.\ȪU\{ŦMزe m۶J%KYz""""""""""R4XDDʴ~:DXX#-- ʾ}p8PXXȄ :t(* ? wsTN{-Khh(FiӦAHHk1co믿&66ƍ#>>M6Faa!6qӇVZ""R&Fpp0 䐛KBBcǎ%((RRR %%9jX LӬr۷/u{ӦM/d3Ƶ/55wydزeqxyya&ibFc?%EDL-ZE6l`ڵ?޵m۶_6mڐŎ;pϚ5M"o] s~z{94Nrߘ1cxw駟6mkaAZZlٲE1z4h?˗/+J_O>4mڔ>sO `9+C ^jҹsgڴiSaUIwVmůoiVp/cŸ8n'{D5T/W:smwi%^}|%4IOOחJ-,ȯ8Hzz:4k Rwul۶p9 +]gm4iުĶwЦC KDD#6""r ""Ð r;v0qD%EDDDDDDDDDCJH ÇgTA?%%EDDDDDDDDDDDDD.J\ СCl66mZb{^^I-j):QXDDܒ¬Y 塇rmꫯӓ'x),""2M/N:qڴiC\\v"!!v@k6nHtt4111g N:""RlV^͠Aj;""REѿk;""Rs!طoiiiXVۇDDDDDDDDDDD>OY.vJHH;v<8MAz1|W=4sy뇇' O||})#?/9),"""""""""""\vf^Ց0&;39(,"""""""""""眵s־/"r"""""""""""rΙ({=0?O-fA^+S!"G 7~s1-&~6J5&yϷd*GӔJ\L7Od. 8}h,{%8 Ov;Vnx˶dfeGU6W֭ZƬwݳڷ;~[m'(0Lǖ'eQ/ؗ˚S}1۶ǜ(KƟNɛә_ !<o?"5~QNFm6n}W_5Zbk!R׶Bv94jZ{x}#6 &$мy~׮]GNe}.:Egyիڑs}4MLG>ab[7Uo3;ra: &&?߽ґ㪞N q#_n&`;APW=me7} [ΆQRybfњM jkd_};ii_4q&ia if's v.3#XJm9Vנak.uCk߾}{lӃEiny,u#W޳#zǏ?RXD" 60 :(f-,ӣ¶JN.n/:W^1H`AjZaaxyKegDedd\f{Ǐj/;;ODI`@ uT~bv^N؍7oȳ!I1UjfqD!x{{W{NXh7 B2 ã먘y61x^ny5j&߯Z˯MLJ([ursQ^͆YW΍QYܹ)V7#<,׬uh֬k/Azz:>>ږnѣԫW3rssIMM%,,JGDFiXx>#nf/^LuT\N9`i%fM2>v kt*&/} G G4.}X3Ou o@ftzU[}#EBtܥZZm%Ǵ'qrsm)kkr0gTRΙ=iU[c/%KNn.99y^8m6 -[JW e.t;-^+k6?L&E `o8xϠ@99x6ntH?N}Q14I>BXh(wݺU8N&=0AA0u'22ZnՒzS_~߲0 rrsybCtڹ>SRSya>7ALzb%W=we5Z1,/Y/?BvNC_ye}ODD8O}o], }Ū5;v汧aov3ϿaX0MA垿Դ4EHP09yhҸ[qr}|+uYtUgՒea0_nΧsp%+**:ƥ}.:o̘ÇycLf;vѿ?ÇCEлgw_Faa!q:ƕfYf$//wfbڴWJ$:w>y߾cv;)))н{wޝ=taØǜ9ѯoqyGOرc_ǴW^q?:|0~;vϏP>CZ4/VXA;w.Cp%WX6p8``V?Ǖk:&&͋{41o8޷}' M&n=M(gM `T\0 ~HNtO2{PQ6XfF;̢4\0͢4q v9_)$8FX/q^l$ͯ١ ̽QL/ߢTK .'f 9_g[7pn9eMMޓ?]s3[6mZl1[pjDa{)8wք>Ig&cxxxS tr1=?&?Ǜfa㏼+L;32m:-7)cXtTW ?J`@SMg֛}$&%q8N._'"NL$-=RǠ'?dܠ6,t 9oő4/lA{mxxx''w3 ۗw#I4)ݣ~7M+ǐ_POZ_4MyZxtT>OOO23\OwMH빶OkS"%3&;]F,l6^^^^G}_c8sa&uʬغm;7ď֢- *>7ngT9:vu3uVݻwN'ze/SXXȑ#/KYr< <n-۷o磏>fc&nGwu]tۥKॗ_;bܹs߿?}ţϟWn""""""q:Lgq-N$L+ƿm ,yQ䋳on6 w0,|`xOKxYb)F= Ph/ YՁ|ZJզ 'y+H^@H;hsg ^)t\[ {w!1 p_ Uy[>9#㔷W_@AA_PH3^ oOv7\w-@ ժ$R :[B ֥VI"&/-ܜ\XF ` X^76@D&dY8}8`6juyK2(#*²o#%5}gѼ BhHT‰II;piSˋnǞzsuؐ\m |% ~z(}6۽?i YYuՃ]Sօ_c8x0͚^jcݣ;AD1"_ȵZqMY¤k;_:x˻LB{YOǕƍٵ{$_OhHwd {.|;r6?w#8xL{?__FW&G=Ilٶg|5ׂ;N|Bpƒ=p:X׍#^G9z_b. pNy*:7ş \݋OiVj9  +{e]=/tpF S_y4󣰰1lPhРA.~֮]믿ѣG1M#F0y?IOOw_}1t_CXXDDDDD2~x}Q.h.س9=pEe(߫WNaz{ {&i^z=XQ lOP <|\鄠F}H]0aݟ+0ZuoPьn_-W!w$ KDP'68MhjI 8k, IDATزN4u/j jfii/1 wIRQ"##i߮9m~#YK| 6laebr=1}R_N{Hh,:.Nt%//J\VBB{ytЩMԍ '|; 뜮cX),"r+szĒiŶHN;M&(qߨj3+#^318-)~?_lFEFbrqaD~}';ba7Jloݲ9D:oooJ9oo/ ԍѿ?E_3?oѲysVڶi8ьBZeo;mp;H`` l4M5e*kNn*sxslny 6lNN~)~iy*^KII׷spD01Rd[2}ɑ/mϮrˋ:uٳo_Qeޗ;J~.ܭSq|=xxcq:Z nF8rHӤ$rj= c),,$==ݵS6aZq84nܘwfͷK.8ȱغ8ɚHL:6[R&}= }|֬(׷o疮c17\Jg͡`:*JՅ[˳Js1z6B;',$iNuJΘ/VJ\ l>18!umWh lC@ɧ$д}kZCve?}daxXPKSjZ%Ol&Ǐvt>8N^3}py>1cŋ?(`K'©m1ៀA'K,4}RmuV~rnG.`ϠeƮeOI|#7 EsMv m-GQXXȼY7OO4&N ܔo|c=S-,w#+ûٳQ^1Պfpb"_,X{xʼ/wS',4نPٛҿ[,{_{x7n7ߌ7Y?ኁڵ+[fʔxZL}f[kK9r4%((.Ygc&-7T͚5{<ēL8N֯@޽ʍkGxbϊ+:t(kܹs:t(i)'?VLiˈ Of.קKB,e޻KzL>7^VJ6e$j,[bdowi> 7}G3t_'J1gRb._`$]:FsX,WMr8=p:MVn!J~BE3$W NdQ7S0 x!{i*+VJƍxx%u؁o&FF<0׍5Ut͘g+c< Ϗy`*%Fvm{җk;QPqW_]p:xy}q_. ]@tt畨3{ĵkKh^`wc28_3 'ȔiӭYp ㄿjwÁx N4ɥ/ g{huxx朹3wDFDЩC{fF.:rU ׍do2wtҹĺ=yݺ?Я̶F]=O? M|(0X,|{|-|1okۖop+pQѣGy}t 0Jm75nB9z(]taѢƑJ3<;w.'/kDDDDDD"ʼ0rBLYi_>OnJ쩺 kճZa~t.GbDv{ZwGTT]ߔzMzېaPPXt1MQe-71'22ˣ~zUJNWTfq4>󗕝MVV6Q;}%]^bۦ7}5D6lVbt;t7w@ TtrܸFrM*szV6 __}[c$$fJ8tV c>ȑ#Uz]ÇvVv6ԭ[LS6?sMjjv+[ϥ."""""rϵế%xm'UlϿjKzdŁ-[پ9:{Q.?/&JiȱcLjB2?;9_aW@Rksu0?ˋT_k_//M}$:ҏEL/ 9j7|O΃'n݆a#+WU3„e7]}!YDDDDDIۘ6\ԹIb粿?r:a˚e1>_T+Ulr M.iX3ckwrY@r Mf@P^:q8 ;q-6v4^pBj&?IIImۆ?ȶ;܃gž_ײk}bĞU_l [^c9kFDxv Df,tImZ +#PZi/iǚgF}{.l|JNS/r8JF ~.W0/\Sunޭ۹ T? ٚ6Gv,yxbazIDcON-&I\{7v<-{_s40͢M 8)4mP+x ,GOڎPT"""""""R YbV)w2UM|IəZi{KV[{R9u+^bZsE ` #-um&s.08Նi$4Ymk"""""""""""rϏ‚|{tg4M;W:""M-"""""""""""5""ydgfPPXtU{łQu(EDoJHSk#uk_Y:r""""""""""""""%EDDDDDDDDDDDDٻAA R@EMHԱ暫|i%Ӭ+iQ74W}-YgjVR&jXC >0# 3C.&"r!ՕNfxzyuvo--xn'""""""""pȅD)xzzNDDDDDDDD6poMDDDDDDDDDDDDD* `""uy;~-aC[;~uH$Bp pIz[Cmo  BtԽ6ǟ7#4x׷PrxwVZ$?! uo$ D|\Sr5OKK׻7Wʁ`""sy)~[IC78 dXWOF\X^Dg?NNmۍ1|||dvָZg=p|}SS3Z_#746ƒGgꟐ~>ggd!igňÝ9>\t[>\&"r19cXj(YXB J:}s罌/w<&?;9<}$0ǍؾPZY+Wakϯuat#҂f,^g n?X ELW`CҜDž]eo/ R=X[WXە:]m6_G۰ODDDDDDDDDDtuvDD<ΟGss3?,i]7oڝ'~mP3*X`NA7`0`xd;_.rׯOYExw4x0vcvlN+>1@A|וj''Joy&lZ)ޱSv }q1c;\< -':چ=}""""""""""; DD.޽voMv܂ں:7Coh;ޭ.^ą c4@Ϟ=1e-/f<+L23{gzz0 J/`0 --`!HdS6*%Z:r GNj:ǽ΃lpL]8{ Řl͛77/<{4,uu}CǏGDD]u]bt;5]!Ow[Wם_Rݏ?7Qאy  vPɶ gO!.qh'eDDDDDDDD6 IDATDDDDX:; """"""""""""""r.>Eٳ&<<<0am+VT*SO=eW;֟7o&NtڽS_c=p pwwǾ}X.JJJpu 6 LJ:?DDDDDDDDDD\Ԛ5k  2nիWদ&xyyݎۇ9`Pǟl8wJ8wt~ɓ'^3.;z|CDDDDDDDDD]pȅo3|h4H$67 ^GHHBjѧO+**b466 MMMVߑ5{׽Wv|veB/f~@"zҞ`JB>}cW """"""""""[DDw7B.C*_oWٳg#&&4hD/++C||>F2͛7_?x饗D_POQQ RSS1c`ѢE̘1W9gZd ͛=zlwpٳg!C ,, hii)Ɗ+|aaaXx#""""""""" DD.ٳx7n6pFF***0ydtvuZ-9bjz抮GaȐ!pΝ;x{{sQ񳲲Z J@,\@*Ϣ6m޽{m#G̙3PTtXdIzN3P#c00}t̜9eeeرc V.Nnn.8`Sδo>9sYYY=>Crr2T*JKKqAZJt|p9TVVbȑNˏ.0^&?s ddd1bΟ?/7N>~ ZQQQek)~uu51}tT*TTTF~~>Ǐ3<HHH<`S`ڴigs {8+K֬Y_W_'[x1̙={+pTKK كٳgai&dff"!!兴4GDDDDDDDDDdM} 9lXl?{zzYtݗ_~W^Os6)t|4B^^^HOOGNN1͛1n82e ֯_bϐ>x}%"""""""""RÇ\.ǖ-[rJr(J߰ap 6 'Nڵ8y$X( ޽ H˗äI0rHDEEAPlΜ9ضm"##1a„v3~^^^Xr%222?Pt~HR̟?ƍ3_UU:qmo|2e j)Ok.5>XL&L^{ )))App0veW_l!)q!)GDD;\XAI&.=ġyPՐdVokMmm-4 ݂Y󈈈]\F+~:=zg/DӡGHOOGVV233;[ 22?3bbbep2HRoSFբv """""""""XLDDG*B*:~~~fѿ}1[8goI^^*++>he+O/֏gp; 0. L\lݺ8p@j޽ѻwN%q >ׯ4s9LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD"LDDDDDDDDDDDDD";;""_~%t:$ 0dx{{gŊJxꩧL'''ݱo>8˗/GHHN**>uyaĉHOOz|spE!ɠRp#>>ަ8MMMj=77ΝRȑ#hhhN/`uJB:2}av[""""""""""r. bĉSbѢE͵)nׯ5 @HH477C@.C"Xn%MӡbyKK j5z 򫯯VE>}l_=y""""""""""j&"Kשؼy?^zzƍJxmnСCP(HKKCRR.]dR.?55gFLL 1h ˋP(x3-[II r9f̘ӧOC.C.>3`d27č7L(J,JKKi&ݻdZGAqq1j5zӧO̙3QVV;v`޽j򋋋CEE>S 0O?mGTBV̙3PT8ydXNdggNB@@Z J@,\Pt~mrssqQѣG͎P~s!99* 8x VZDDDDDDDDDDw+ȅףW^EϞ=q;nyy9?]v'##xK#Ff 55IٷJIIcp 444{AIII&OlB\xj~Gaa!T* ~a6_C&Eiii={tRN@^^^xADDDDDDDDDt0 2>xڵ/ToWdppq?{zzk.~``]Z2k,|x!Jlv=::6!H0w\zr|8r9lق+WB.CT兕+W"##.`0i$9֬_T*1n8Ƶfذax'@Z @x !!SL1JY]'OD߾} BݻwVUUU)71giŔ'صk뇈bNFR^CR$";pab%lp(//Gtt4zZϟGDDt:ztdee!33i/_˗/#::nn)jkkhn-ί RNMDDDDDDDDDt)9Vg|||0`wH켼S;b痈,.?0l0L4 h !HlnGv9|3 @HHݝ{I>}:F:}3qac߫E qXjӧ}1 >>>6ADDDDDDDDDDgݥRSS1{l 11 B]]ӧ!""zpY$&&bȐ! Cff&ZZZ̶UWWT̛7ϸmƍJxmP(x3-jgƌXzMcs98pӟ$ bbbL5k·L&].b;227jcǎE1p@(J֚S*x1x`A&,LDtj8rV둛 HJJBPPvio|s=dT*XjUv._zF²eˌ322PQQɓ'Cٔ`1sLaǎػw/Zm88pMcS\\ 5[~eTVV].b82jn~Z J@,\ФNCNNЀSNd""""""""""j DDw #FeSNŖ-[7n?OL6 ҂={`???ddd //$FAzz:RRR;8-r?~f 55l-~:}אZʅ8ZYF~~>OJ hnn67m4L8C"l}r!wk`y~!H0w\ɓC.k#:::@DDDDDDDDDt0Aaǎزeۿ@«D"1..?۷cxg[hh(̶t:=z? 8p 徾8p oPG)W.`0`Ν(,,4opww)<.FBӡ hii)ƴiӰzj|ט:uqӑ͛1n8O?իUP$''cҥFs0w\`ڴix饗pe@CC>s_Whm*h}Vݵ& M14AAA=z4z-QPPശ<.'G… oO>B$%%!22ҤO>]Я_?DDD 66Lbݺuӟb!˱e\rJRtnׯ!J1|7IPWWgC?B3YH1c^x"44&ϸ*h}ذaMT*ڵkqIP(ؽ{]񉈈H'Vyy9^x; rƷ*\Zb֭[p_tR~HNNFjj8˗/͛-q{Ďwv|ԩS_L ǧ9ӜN_wv\]v!??溧N† l2GssSsj]?m%4~}~!""" DD)-[?XԾ'Oƙ3gۇJ&kƌhjj‹/Gy8_~%pGO~Gs9bon| r~_nn.򗿠PT#GXKGWGGYgoGz믿"77W6)s+4~XN/wb3g)S駟ԩSuV>}.Q[[:z~=Ư/DDDDԽpȅ CZݧׯ_Nꫯp5<ʚQ^^`Fk̖544ɸrJ/ 33FBllv,iNfoci *++V_ /}Gjz}17؈jzڵv?>fΜiL`F#>iaaaV?`91Y;92?b~|ϯ1_G],ߎ38_W7W~=ch4裏!|W?lٲi=>`}~]?_bOL/~$""""."( ">>cƌELYb",c# IDAT, /NYd ͛=zl߷obbbAXvY$&&bȐ! Cff&ZZZ_|1rHrY˿r3fӧ!!g1WVVx -- aaaWh|/Ծ#id7n7n6lO ..ƍCCC _wK:nܸrRI]1_صk x5淋YOCAP -- IIIt}oN̿X;9k~3Qekwb>~iqB@``]cnnn&?{zzBVG`}~~ߖgG{{m+[WLD O3g ;v޽{M'z~~>|M8w*++1rHwʾ}pdee+;z(V둛k,{琜 JRS 00ՠ3~}  .ܹs(((WLB;J%j5Μ9J'OѣG믿BR˖-3_n9bΟɓsHJN%K?++ >,JKKi&[ʿ_X;9k~3QbϿ۷ 3g,XЮW^b'`0~YYvލ7xGFTT?ޱ/Y[RGDJJB]G""""j`""T^^Ǐc֬Yi&dff"!!兴4bt:hZhZoAfŘ3gzٮ,##>>>ƈ#pyf۳gfϞ CFFuCCC!!! 3~S__jcPT?|L'O<[蟽6OO?V(xyy^w41RRR믿bӦMXv-׳gOxzz⩧Ν;E1Ogv6m|}}ၬ,ִg$$$vƑ[Bί뫳ΏcۄsO(;]e:8_xmVq xzz|+cGst|ļ57?b~Ϳ-Ok1~j|ozz:>ChZz#?wvDD|mdmZQQD19r{xWm ݻ6l0[g<˃QYYi|v;777Z-!H0w\rSsh۝?{3{qU<z*? ;;;Z/ƬYT*Ess3n޼n?KAAAƟe2Dg3?n_PP'\o]k#5̿= qb8:wX>8k-Ssuw+\}-d?~< |AxX~G{Yjj*{=֢| |Ad2wߏDDDD<\&"rAmT-.]8>aaaxBb߿?my~b%K`̙L&D"FAHH-~#a0sN7g&(ՙ,o%B~3A[????dgg#;;RRĤI_w4zjTVVt Zj555&9\9a}5vm<<<`0`0 H1h4/GͿ=9:?O,k+뫳Ώ;Bg'uzim|\k$ 3gTϑ | ͿGϒ466bݺux'7k_G&"rAHNNҥKQXXhW)S`(..z[ %T.F=wwwP_eee/&zth͛1n8Q0zh[ף#h]o[L Eyyu"455h}T*5~X /Ծ /H HPSS/¦kjjVƍT*eBǗMTTTo9asؼy3nܸVua6\ٳdB@\\֬Y())1eK5B#X?ͯ:?K ̝߭?t3׿2B/+\͛a0P[[v1+WA믿bm#XBB/)gRRRG1c{F#gHDDDD`""~z߿RǸqL>0a^{5 &&صkrgذaMT*1eQ$3O>]Я_?DDD 66*1֮]'Oo߾Bݻ'dΜ9ضm"##1a?Tb 6 O>$bccUV˄_h6l؀`$&&"<<Æ ĉEw4Zx7L2]]1qBP@c2צ uuu ?\-[`ʕ>v|9 ** xWDOL~^^^Xr%222?@999X|9>L4݇b17G:GGk'Tkc2~Ukw%;w?f͚Z 655駟FϞ=ѷo_`ԨQ7o}F6>󷣬 Ϳ35iii1aTvǘ#;#')q!)9 "".,@$mΞB\vt:BffIVEii)ުP# 22?3bbbUVVTjwh47ނϙYsu#::ngdnŎ_}}=j5d2[a::Xjߙ[se\|85x`;v,]vX:\:ÖǕ+WaF󈈈][{owX_gq}l<~un~9իm[ ~ӕwߏDDDDdX#1 yyyDBB< ̙3:jvv*fu>~!PLbs]WykMWȟC]ׯCDDDD%NJ.uG2 غu+"##qn {F޽;; z~]]waW]WykMWȟC]ׯCDDDD<0Q7d-Pr=p.>Eٳ&<<<0am+VT*SO=eW;֟7o&NtڽS_c=p pwwǾ}X!8z| ͿX\&"rQk֬7 4ȸW^ew;߷olw;rD_PPCfܹsP*Xqė_~ Sґ9z| ͿX\&"ra?̟?jyss34 r9$M bPzhZl X5~:oCSSպbwd~nn%_ӡbyKK j5z 6B9ǧ9>@'ODDDDDDDDDw%>.qFrHRSSS1{l 11 B]]eeeGJJ fRhjcǎE1p@(J:B4$%%ҥK6]0`cԨQ&SSSyf/ )**B@jj*1f,ZHTn%%%1cN> \\≯ς 0n8DGG#337n0T*ocL&ӧEgmfƌXz~ΑQgϞEbb" 0dffi񉈈uqȅ={o߭@EE&O N׮Vő#GP\\ Z ^\?# 2.\sPPPooo}=j1~VVVR hhVV}YbӦMػws9s* :K,1^7t&5bwd~ O3g ;v޽{jE |0`***PQQ~ڸRZ6ɓ'-t: ;; 8uDgm&77{|:s=dT*Xj-\^7ͬ6[1ϟ]OO?$DEE_]]|BR`0ƻ (//DZk.@BBxӦM3Z7++ |{=Y[ 55IٷJIIcp 444{AIII&OlB\xj~Bo2̞.}|:{ҥK~~~@^^^xADDDDDDDDD DD.lXl?{zz՗_~W^O>>سgvgoIhh(tRzЮ8& H HPSSu/pZ~B/߭}SQb4B^^^HOOGNN1͛1n8\RÇ\.ǖ-[rJr(J߰ap 6 'Nڵ8y$X( ޽@dNN/_&Mȑ#wbbbBf+Ʋ9s`۶mĄ =CwarUuUݝB:f, HTF2 # ODAge^Qqw@EAEpPG% lQ @ $dOH']nUwN ^``vhhhP8T{Ǝx`Կ/~{X"̙<@&MTq~`go;l:  555 7?b{wX"U  q1F{vUM#`*!`*!`*!`*!`*!`*!`*!`*!`*!`*!`*!`*!`*!`*!`*!`*!C]vu",8-X2bw#w_ozS|0{#Ǝ8*' "N=5b;Rn%="cC=2""2!=_Fw˹첈8l}>۫x۳/J9?r'Rc8":*=^.^>;>ʶ+v~QK_8찈+"R_jć?1qbĘ1Kv_~xE ծ.ӗꍍgKN_rEE}vMA_{8唈҈mlWmAE45E<| Wrn=8ᆈ?9{0?Flܘ}vķս=wۭgWt.5|;*}J)u}ѿji?G|[ngҏdv~}/(}^L 'DԤ>Jm?G3 :JxbĪU_J|0[#j۶}`ļy=]~y`{ߋ;7+ϟ_E3eqwGBr{n vbĒ%Bxk#~ǭf_Tbm_,pٲ/U )|OJ]_YWNDZysWش򳜟J Vg۶2B~Km_ܙwԨSSrUk6SGlu;R/2J`ԩ'|M| {fԋz.={(1jTC,V~kkwG|r׶o{[]wuS=@:ꨈ9D|c]Rﳆ1̝q˗W^ޔQG~̈N=oxq-}٧p_/[zp@;.b;}\_DzӟN=gN-n3S,RmW2ⷿk?szϤ9؈3o}w,HQGv~dgB!?L#ootlj)S?kRWSssjoۖک=S_yq=r]d=?'zN}IDAT j#]wEw^y#ޞpCuG˗g};sGS#|S/ӏ<4v-+ۻaރ?cxub}GGjG5+]?tӳOv5`PH^lI=zE|=_w\ e< aKnjIʾ͈cKgZ "֬IC̎6eɒ4}9]Í# q(wyRBξѩw=RիS~Y)`Z4oME ^eWo]{o{[m[/zFE~{򳜟J :*q1W\`AZ৞JwB {_oH9ޝvfwZ4uumWzZߑGF\xayuOrKu|*+^wÀ ߬g{|g u:oyKzi܋/̝s&Ї܋O<ӰBn"YgE˿/K?~|aC5n\?kLgqww/wUߦܳr<^wklz>jTy'>SNIROgWK|#˜2%@RތJ?=Lf-?@﨣ק;k| ;⤓R@];;WI0Yz7rK oRfz~sg?8ᄮ|*}|!r#vUz\'uk RݛnJ"?/YS3kC_o'> )O͚zmiI5{Tuu)@1Dcw͚:C]Ecc L,ItSzd1eJjnK=A;wޙU+)'?Uz* 秔ߜ9}vgUW/իӼwߝK{{ ;P͢we]8GlNSF[|mVztRč79;{WeY>۾7Yϊ˕W>PTn_ɌHwv͇S׾g9߳qu> #~ô~kk7w+?8}{{sEzkꭕi[SկxҲ#'=_p)SeקNMt~ vkW`Ajt, ø=Hz~S]n5f9SJk 47UG{l2s{?m|> |%o{[ěqisO?iyuܙ^4?emJN~u#J__`ꪈ_"b}ӏfꚓ5穧*E;;7_?<9=O/bSq)ݾoDzi59|֬Ԧ\ӵU7yrяFeپRwK7 ^  V \ܒB/~qkPΙ\4K/MCR8JsegGuMʳQN8!=(_SS 7Di{7}lkP90#<0 JJJJJJJJJJJJJJJJDmD{ X0r7 !Jml^kV.j#p]ޚKcӋ2\;``--2\;=K|yiŠ Wzj?>54{u5d`bDG[DaSMEqѼ(\(lJE1sɵ;8ŠF<ȣ1Ph S}ѱ-mB=_QXز:cvE~'GO\]CD.+YzS{{{^6&O_xa}DDLXaonQuu1vLo뛛OJ"Nb!9/D/|]LyrԌg|$tD6"W 5^hQ۶EEyMDDX*>uEsK>ETL65<ԅźu/Ķm⠃fg'uUq%c hٴ)̴~|mٴ1ڢq 0BT~)FX-Qx5*ƿ$Qc|MQ׎L8"Wǭ7(N;U_{i>f<0nƸG7ě9:.vQ?{am?q~Ӎ^h˯C/S\466nI ~+&7e__c͊%S R}X(D}k_x0Nȷ1!:^x0r[#: i z O9h0!""xxgX,˖Ï<'p|^&V\o|·.uVnllS1ۣ=:::p9>`plu5N1Ŏ(;"Dn\csm/>/Dk|aM׺-oup4"> W\.~n4{Vtۮ7V1c]^{mmmc+WŘѣ3{?^|>\w08jjGE1C] `\.ho[Q?!uD!-mˣcȏm\65MMW|1~>ɓb˖-WZuu1puزukon.-[|ߌ>F;|uQ̏Eac1jһ#_E Q3! K~ {1.-Z/:exYqmmH>裙6uZr}mm{sF>B~g|m/ٳ_& --Gٹ\.jjj&e=>`ՏcǕ߹ Rwr\mjFyb69rű&F4o^\혈\m.\.~Xbew=;{Ikmg\\p޿=88FksK{yxɒxIg{ƻcUT{X0ڶFC] `\.fLF*:VFGGL|CL8tnl`Ӧͱfژ0kPWa^h?)Z[[PhqcT\Vn `)*Ry){>hcGwGKKK uUbw*8`yaX|PWA}G Z6D˦C] `ڡ@MciC] `sT C@T 0@Tĥ*IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/eclipse_hello_fxt.png000066400000000000000000007164201507764646700255200ustar00rootroot00000000000000PNG  IHDR$#sBIT|d IDATxw|Uם{sCzBФPbYݵWuuׂ]ܵ+AAI אm3?B.)7Mg|,˲,, (oaM-cV_zl6[*0*^lTz]yzr򄮉ei_|>Vy?sh}'EDDDDDDDDDDDDȷs. a]$q 80W|͆jV a5CٰL èIC=JIߪ=9vh3׊ׇzl†?9\=Pob?'먘n6LӪrc`Mv0kò*ުICBkhݎl0X0ٟkPwͫNǡpJ+2Ô*؍+#~WP|>AtLqeT[ېJvMzYshݎXO)cd.#֍obB K|;WΜxPza-_"""""""""""""_6G jOC! \sZ=8`eYX&VvX623*q(ekQfpNL=k'Z1uZ@aq)_|}yLt&LVO* ,""""""""""""p~{WNC9?Pҷ* r,Jp;n΋+'kpK%l˧xkY%EDDDDDDDDDDDD| .!))h3j&t>/LZW}Uo+ @4aݚm?s.W]w&aa\noZܹ?ԻI'X}]ޢ"rs󈏏#**Ƽr'**ƺ, {rC{ɜR z+ek暦ŜVp܉4Gi ^'?ᴨֵ2߯;Dn6%}q=U1nVztᤤ$sQ} ʂ~lp4,[#ԖYl6nZ?Ԡ_TTk׮!-;IdeemvϚ50?QQ5z;% wR6$/bq\"7v4 K,tKiiA[bZi,XsϽȹN8,iIS>NQQQi))ɜ{<W^6oLXXxԭ#)))`bbbI,a;ܷo۷䢋.fٳ?n҂6aup6 #ѿN=}QQ\|O'22uc8p;w4M_nga̝# <_eҥ())!-;;w"//EOll,}[U֙>s0 ;X<֭g˖˰aCk_mv[1mf;t#bʔ)$%%1g> IIwܘ/"""""Rlf>n%\Jrry /wgHM=<?S w܅iA׿]֮]àAIHHK.e̷ˉO]\݄ݙwm " P;6 2*ǟA߇U\uz_׿`J/0_f̘Q|\.novwƦM~5"##zDGGq@.ylY׻5?3 xzmOuu폦$4ԁej-z]_q:\ߞkk{\xeۭim 0}{q Gc>f͚oI'߿u_DDDDD:T4++-[q饗)IIө)L;uOl ర0wʌoq$99+ FLL YYxu#,,,pO3s/:vDatA"""""#Xl%JFjBBBXr5={$//1cO?믿EbbgHqq1>Ӧ=iFv@Uzp}/'d9Sx0 o(,,dĉ~픔re())ʓӕq8&N%KG_pKs1'`Y_|&M7a̘Xɋ/^{5Ȓ%KMbҟ^w|p'3vXURRB^^}rr2^^qSOqeql߾:ˬ~V}F:ArrK,r'R\TvZGii?[n>ԓyXk[DDX^llTv j[U^èQÙ5k6[l9T @\.7>6n./bf|?Ҷ*跾 _]v̙oqWP>9̜iiiպN0gYvee&R_.BBlG:t:˃noJΝXn˖-K/ŋ c9CF*?tLQQ.C blܸѣ} wrr'qn7<,g0M/ /oMϞisY ܏ap饗r駰d֭[a8/4%wEhh8+Wfp)0rHJJ6?nw1+W'++c>n7ٓENTAslv\m/oٸt֕YnMe&%\_UV׹RS;;N'64QꪫСiiiEQ^uxkmJm@X@/ատ$<O PχUΝ;xyzk\rex<^L j]-":yr^7Jr68沲R Îqh*#ðSVV]gVTGl% ڃz`Y636 E~aÆ | NJJjMӤ_fhh(_|?tvM7݄0 x<.It ';\ua$&&bF >}zq7RNcVe*BHHŹ^xxD=eVrk~ǭׅem϶mp\tܹQ]0ۻӯ_.mS[*zYk(7ضUSp^^>3f ѣ,J́{"""""HNKKc̷uW(,,7^W^A\ټy5h}=|MӬيrLdsvv6Y\y5k׎,L$%%+ධ_18ֿEJD"p/J| 9_wDDDDDg̞1 , /`РtR4h3PΝ̙ի*ժ|2 3fp3yl6&kYl9 _>^5k`6CCy睌33gCᇳ+hc̘Q3oG׾?BCC{ڴiD/|:?\c9Am.ӦMò|\B[<5-"""""Kn'!ccc1|4h82呀 ^gpV~=Ov%%ر8bW^Crr ^z9 Ng50SaGwfŰ:c+!“x(sPR2z* ,"""""fò,Fa30ztc=#lvXlW\'=w|ڷOaڴ1u‹DFFrmҡC(O>(>$M@y/QFpL6||>O[a;tHL̫uo#Ӯ],<tXo Կ7o^O<8j- jk{ͩ6mO?Dhh()7ĴUr6JK$'pqJ 4}gcvMJK]`߇eYrYN'ܖ5Att<G(EEx\uYW>0B.2.rmOE/՘$v9|:GcƦbռR/Py T}vٻ m[(,TeYMYYW_DDDDDW_}́ݓ&O  KLZ{())a2d+ҥC=8p;w3hV\N>ہ{,[f?EiD7|޿@#B8 '+ԕG+Sx9OE՟̵aY&ev%2^jkvOf\Y'"""""""""GzOU=Κd2\x>r+W&++ł &wgͦ(EE,\zy"c3ƛXz {O>֬eN*ϼ"}~UλR^zՠ?_139eIM%icLUn7Uì]ʩަ-v,[+3/e&\t oxIlv*mУl*1M}/[nmrch9%߰;\e?u7e^ݓMo_տ_~~+V[HDDDDDDDDDdkqiQ[ϒ W_\>lk̻[xΐF|")1y$%&`۫,ߒԓO?τ"%P/ߴy#=rD XnֵO恶iSt֕_{?rr ަ+degۮ]U^]ۯ%PCf6|d8*3_~-SʙP(pm}c1ǵ mۖ9>-҇ %g~vEΝX21F<#A;/@.s%;'ĄDBBKDDDDDDDDDtnp~, ;v$44h^vܔb~Fq׽ ˂qZe~4[m籧3rx:M*42=۶s9p M;CP1|NaLvc.=oiLn:thzKQq1O{iiO@VV6)u?G 3뮹s<{~ qdoO/++`}?䳀mh2┱'1yS.Uffx鹧˯[~~䆍r=bX駞ʬ>~Voy-ن9f+c`݆ <‹n\e.>C46 Ӵس7نYCo_۶m91s÷s4{KMI}j*WKN%7/I7/` ҇*z/:'?ޜo%xY> C"""Ïy皼=sW]#< FaQᆱ DDDDDDDDDDS +3a+/0χe~fÏ>L) fÏu%+32x7", IDAT!2,qw ת*&jt9[bB#҇qwrؓ4pF '];L>t(F<N=vÏ{n?n'**'ٿƍnn_YNDz,}ࡀ O敷iZ4fpu࣏qΙr|2ϾRz{ kZMɽSdgqrtUO]vLd Tr,\W@lvmXu\U%.2eq dznxCaQ_qQ1w`붭L\rхVn|t*Z2 o.ܵMW q^OO|\{3z5qXbb[m9c&ǝN8m>Q.g4~ݴoC||k֮`=7`ymjf.=Ν:2\׸Xƌ(9s9km޳.P.: 5P]L0S`'(O:8nۊe5+F.*yEn/޼ݽ{WmC͡}\׊㶾mj;'%_!,_?zADFFЩcGok6.Kne rrSTTae1/_ΈaͲ-EEU={Yv70-HeA>Aޛf0e꿪L߯/EEEUz5ƕ*kRss4->s4=É>hIMM񐟟s3e𳭵Ox\udgcFoYw۶ogԈDGEx((,de+o6wrLg;i<^ü?{#:*T^uц:fJJK,]LLPu P^x<^Llgs8"#"Ա#U{W76\}c9Ե=@m]pa[>|hlCîݻY9g!Y2ŀ~.=Hex Oƈt{av >8|vs7ŷn909)0l$'&lqT|wINJ4Mz*l- r7tF1 ?{vѧw/JKK),*o}Yل8hw&rhh(.ۿ=*OMNJ7[k8":*W\lTvfO OgfeQx0SW}>MJLwCm܆ĠxUWWcnjfcOlr+<lMIN&bo]{4ۯLߣv['i-)) R3]+lP>3MAPs=FU|ֵmSSSػ7ʴ۷Omdj8lȐ-)%9Ρw 4> EС^}Æ3>i9{oMN7ƞ+'{-"{N䈈4߹SG˳/L)QeW7k9p>]еKg~=\:וۛ-<ޛ5¢"/C^ҹ iݻڛo7ޞ 4{7/XfYfN߯/Ͻ'[vi}RY9qUaܼ<nwsU_OaLJ|FiYZ}ur2o|%,O.;><{^ީ4t0U~-݆;f+?:UW_:4 ׷z 5U}8Pٯ8˯ص{7P>+oEG2T>'B-aؐ!|} aY0d VY˚u6ح8<_Af>3}ʴg|ym~opݜ2$6oRty'9>Фlws Ӊ` yۭtwݓ9sq\HK;nk bYg;^≠ O^^M{iOeqe .c7~<]tHMb S\RR}ykݳ'st$V}upkgth߾r|_>RS9k8f~~-ن9f*?mXz0l js 5E08P ݯ8xv8 ݳug[se|o ClCvŀ~u?.a8A_P@G9s m3"=+P';."""""""""l%EVKR\\B$&$xeKx<޳$""Z .}YلaUپ,BCCxٛIrRR('ڧTy,>iLrs)..cԭԵR0umwޟż~gjry-ن9f*ujhjkˮ믽;Yk?}\]K}ĶkWY%|{+xu9֊M$Em|Ïٳ:}Nf}'=CPPPws 5%CݫHWAHKi!Eލ6e6yG4r9L=z4Im.%EDDDDDDDDDDDDDJ!{33:i"""""""""""""""G%EDDDDDDDDDDDDDJ!9B8:i{33ia1 9EYI1i6i]aIBb2Q1)B#uEDDDDp5J[WRX m_iI҆TzUeYm6 HCܺBBNBCKY^]۷Mn='P9qHLдDhlj/"aut:].Wf֭x+&&aÆѾ}{J ꊊ}lƒnu9˲x<߿pp:$p :U߾+*ȧа:Sfnw`)., ?/6vDDDDDDD% my_2@N.?AuV.]J~~~ GQQQ9=Dii)_|6^zp\y˲ؿ?~!cǎo߾u.|r-ZDTTTǕOLL s111)**bdeebTWVVF||˲,˧~1ȑ#k]_~!$$cô9x^ԩ[nK.墋."11EckΜ9̚5+FO`4ٸq#N .JxXd ֭cĈ.7/m]bBZ|aQ2vDDDDDDD% my_2@dL,Q]3sѾ)-ZV} ٰq#>/YNNΐKD-((dDFFx֬eYXEii){jo9?,4>fy }m߾۷Z2BRR:ub&].qqq$66].eeej\aбcGrrrj$}>6VBdd$vMxXhe|w팧yIͲPPV[|Η^L$9:qQպ67/of.e؝]iGQV`r~?Grn(,lx}XɄWԄ;ܭZ~ߖy,dG?.Z}ǒo _!Ӱ-6͵=[sʰq>eOvk[Dn\x0:t{doubkA,$L& Z-hcp1bԹA-ߔ21͘U!SCߗ[Kkjl7˩o.LJ3毛#aZ_?Nuyq74, V㺨-q-^ƍGTT}aQǷ޽㖼X->Rb{o[&=XehlnO'1n|=$FD&7 nguQ.aFa57 0`K.Xnl6vAJJ Cjt_[Ueҙv1vGHl*++#%""*Շ;n EjhOmq0qU@k!kSqmVLsTb7+oiu7yq ˉdON1^ |&r4ν9SOo1E/Nɣ1m+N~-)o^ړh_qƤk?,z7P]DNa_t"GfIoɛ?eG3bV.3ȱ&I؞-3ޛNӾW`I';B٧#[7e]_χƵJߌ; 9v:1º۲/HpnJl+ yK /zV &6͹O|lC¥kL1#풛^{%_nt:)((`_8-w`vL}=.b!FϪUq:3e.ZeذDбz8Zs<eLL >4Ujx6i7cY^r0yc/|Փq>Y4{HWOKė߹-{HK#/'4)w_#5QKNa-Rzp?;{ʪ"k6])*ء1oIެի56ѻg7gleenBRzz}]i[II ~9z1jJN*V떈퐚TONp^#w v Y>\ƍÆj* ի[%˲8p +324p˖/'?7lm(#Cdɒ U1d/_?ޛi5ʯ9 8f0l6f./vrΛkMBD덎!""""Җ ^zqǑH~~?ΛOkm~Tf`7 8 ;}BQRSNbJg4͒[q{ o=@9{wwA hdBQ}W>qOyohB؍gYS ȚWlcnHU$8p`=SSS۬'״cW(-sSrSVꦴ͝> {H8KWKg3| ZcۗeQra٪?Ggğe\.L°۱alP'&n.b7CAa11a"*>G G϶(Ϳ{o HKNTDzT^#O> g_~+*c-=8ɞ}9tѓ)Id|솝H:wDXh( gΗ)I\#D 'e x˙}kZJAY17|O9'K`ڼ786]lz:ARB˃N%ǧ_cŖ,,6lYs{}H/]%:$6~=1yanJw{ oGB)%a&g_>aS>>Rνumb}e>Wۅp'Q.3ɝ<#d9RRw\Ʉ[Z'P)sSIlGξetOK[TV>@Yx ΤI಼=lh | Ncӯlmސml[)ldsor+sZ|˧b1ͥ]xu@RNC4wA]' 򳉋 M Cqs7hM-""=Xv61Fv(kYU(-r'pBSC=s,afILOXܱܻwo.\o6qdzcN9۳jժVò,:tP~s' `޼yl߱1c4zݹyZ!Cפ3%˲HII,-[Nz$?^׈"&ItbCw, 1oBܤw [R)t*\H[WADDDDU,Z؟ӓ5{6os{US4՟Æab7p:l*/O+?$6osW#O{w̯x&G*s擟l6֬ 8 ϝE2섿7Yr!{ܤi IDATz4Mj=66J!e*zq:lò'g`ZS>ܱyqo"ٲa; ^cW>z7ߙMTR' OY TnDBUw>}J"/ eDd qڱCK~!P_}ީ,e#OK J ӏxc&ƟCld|!)}p KW˂_6j\eEҳr?/ ⛟0yTJr3/Х;Qq)ҵc**6}>L641 Wi!.ӵcJCa7=^;i蓍)x+FĦÙ`(n׉7W#x{ŧ`2Puҝ~-e|Gb1ymzZ\"J3aMhh׬-'HNoa&hP&goiG3hټ&36r}PRRJIi)%%JJKiZv nzn@N oe|Fݫ-:M/*+P\:c{X-҉W7nTN^7vҺAӦMW#0 ohFmiٲEsAvك; @3Y>x? %/Â4‚{Y7"_drsxͷ:ϔ2ibcuW_=|>GK.dffҪU*IeUUٷoIII ثkv3NjVn1+sOEU,J?c70S2Pf-|޹Hb %KKo[UPw̚jJ܌\[ŧo|T}l1Yp{۵y".f-홹x}zϴ.cׇ EUq98]"27yvmYBIߺ{T{^j@zg<5'>;M%.9y9RxᏳ뇿ֽٶG*QLmf0zq}gc3wоk<c,9a>G~3qD` }nfRTP+ oWbOѳ,JX Lx[s+j*64DrK*Ʃsc'gd6a8kvS}p Ҕ>Sbݾlr mOT}iؖ:Ҝ}XlaUZp$==5~j24޸^#.yr Q ,{:vUJWw#Ƽn:9~8m۶eݺu\uU_yվvhѢZ%wСC+9(+ڶmIunN_ƍ$ ~LlvkM[ڳڽݻ8۷F9 ?W5N۳b[ `3F%،lF 6[0[X W6T AjnLȰ{ !BKSALQJJ[@DŽ~@s+oF:NKU%,?Mg疏c7HHtg\=E}+PaeJ+6.}t"I\C-cEЪmdz%}&.FGgavbh g~n7HcIRgu)}jOiFoH^Okw{{6@1 | td2a6  atMɓ/ȄdW3է%6{` w|u띫YQm(n'6y{0v5-Buwj6cƌl_K %#zu›b-\5~*c.[h|'~DwJ[tV o Wiy,X9gbMGGb291wWIAcÔ(@|ߍ|+4{eڳ[Ѳ%D*+IY(;̤Ur'ZI!_vŪrŵٲMtjӄݙ8tG}|a yu-&/__aV;<2ܞOG{>wz2R) O]6y0k&16kPOYƿÁ(Mt/|,`}\񑛙jRZPHA8w6>8M}(j`=VDZRvs- .|2UH@tP _͝75E3[=] X|!t >&&I&@A9qV?P"FN!(4UU*o q: D܇+ӽGT?hMgbm:KHV+]|\6f8VDw7an_6gWk~CHT3#cصi!Zpϋy.^7|SVtt(s:)syo4to9:cdMuW`DWѾ WnSbڵ8Ы+͓'PVRªwn`=Vګ;p}EU| Lenxy<ҏQGnΒ7ߎZϙ iشk4:]NTt" n7o:Hc T@zzz]UUi޼yJ@W\'8*ˬ(d]u%mAfJT?ΓYDx}QV̗غ0>,Mi3~ }kd̚YHHچad n#₢NEybk/!uW.ՙ0GXܣ|wy>>٦Vӛ8{3s~ʍuh/warL4 0yُo;&IQvnr?1!AfM8KKر3- LAA{Ȼ| :tõ;>rXsUEd2U ǃf22ٗߐunN֡t0;.#A:Cr>%Dvmy=Yp]}G3ym[l].Ьl9lf˻h5}>:N:sYD3%#=Q3Gl"Ԧ( -{ER*dR1wWWϘg|ޛFl)rc2if9+_`5Qbt(ϡ#4MB]萖F76CpPjمdz &A4l˪u{gǚDgMZ6'S`_v]YĪIsx<f;ƺeLAFׇϧq[jހ=?5%$ ŊsQ?~ DKpG0ԮT~c-ML8 w"<2dEaԢ @]b}n'J@5wpC|rblj=pݷ߃9E$3s3H؍ܒUF*F>1+޺m3mbs|u(\U@mIYbJ~!,M9(JYYm۶b6k|Sۛp+{.fK۶m AZOTL @FZẃxN (-2OW^^IɾG!B_dV^%K\nѢET^߻ ̏m&惧5R3ƥg$햊n<;h Napʹ;gՄ qZoXW zyp{5zͩNmK2n'9IЭUu?!z@몋}y6 o`@`M6IB>N~q)Qvk}S|x'`ӺU}w$뎪bҾ#:w/=7 vi*ɄjAUeUU%44ŌI˺쏓e<1.^_o.-iH{-XBC˝{>wH ÅIhyXk) J\NEj=sbX ύ?ȡԳ2Rg_._c,^_17gڰI}G* ׍ISq8}9b_~ėuc-|]k4\[H2'ijF:Kp0KߐΰGS埪ET%nGt!DuҗPR$R{k{:'11(fT)'S#S*>On'Y;֑ HDvZYoif֢ڴ+NW SHKr<=Zo[8qDކ׌)k+AMj  ?-f_eyL([d%p<#bbBN|w@cPLf3-;vt,[I?WrnW@ ?9s3c*b-sdd:gݺt҅s_zӾ\雓Ö-[HKK%2%"оkLjjHϯg6NM%iEEE5wͤbWǵTTXU`՚5;cvI >B!Ӈ7{n\ Dll,,Z;vb2ѣG)=aѬVGۥӗՂz&rddeaלn'DlLWB6 ! IVJꠗύ'5)ބpmxx...fϞ=ULp,^ ?6f͚6i\u` {Es1u:|h=CvfpgZ%ݿNC.K逦.7m@d>Ĥ{躎^k,(,(RVV<P54Úի4"fѨ>Ylڲp,!D>q 7M2Q~3`G'&:jPRvRXTBGA~>Nǃ(8Yj+mcWx)+sPRZ :l-#y| )(*&n%'7G^G~ `MLB7M(?d(@!88\ Ŋ§ñì۰a38A]e5j.*~E?$BnZBQf|ʇlܗ+cݏj=uܛ.$:?Z穯ʎt -k}>b&Oɓ_degGCڅf'N _/]F]xrl=ՊdaQWT ;snzòonИ1k M㱅,-CQ   ؚǪӽY ?giɌfض/-j,ؼR7Z1]%k1zڴ?Zzu|v,vF"u(w}^R;tm^H°8 ,[C2m1W  W-X թ(BqIX,ADEl̺Ʀ &{Α0veOǃ^CKR{ J ׂ@/u{>gR_ލ7uru)\Y}w=NDFWLwfq_NVlue3@Lt4=n:w\﫝d2ruOܮ/{^Ç߭[ڃj等֭[KI+VPZZzV֥ulN-hՖ׶ OM%**y2@_ď:ˡyI) aJi؛B!hlaaa\X>dwfU7Lx^rAMz}EqOUíERiaoMٔu sŴemOHh7<}{{uRzI ˩Ϛ5tT͙U`(뼮NMرAU=|0\*.]:}z̨ݻw:ѡCtLt8&MnRp #:2Z-?sn:[Zྩ(?3r}9,6A۱tB- -e!XSӽ7QDi נWXU\^A!\qU|zc qmYgZiC]0rdiKRٸS׼އ,_;bXxܵ>g zֽ)υ IDAT.;c}((+s( Wv+>~3$%7C =N˸p%gKx|Z0MSj_|]$zInц7mA۽u 3'?x7u|'4UHgO_Nub/z#f~C`U,Zsxuk^Kfԉ}#nd~Ɉ!=gO]P %^ EUq98]DpV}\[{]/HIhKJ?Up~zTwzwLjt{8/^>UΚg7:>RJ ؼG^: {d(<~?:_w,j;8 p!Q,mwYނ܂CxN {Nc-TV]6r[?4Xl/U9YޣJҿC,6$n-yi%eJr MCxOSJdZ^u-4-%]GIl޽J.+ISqeUz_ 1ƒڵҥ գG[nE/1 \lBϞi֠''ׯ_Gjj6z{OGU$ZJmT홖VƝNx ?IqQnrJ`xB!ujݪw5EsA  eLgNAAΞOf]ラO}T7yqa^C*o^·;nWeewpp_yN"h۶ |Vu\7ZMVuJ)1QQJ1%xZb"AQO{MC;5z˫xk.qU $)@Td$,`۶mgʕ+իWyJy㹅x}>J.u RR_Āclh Cl=m:c3fxjt5KM5c6߾L4.l B⚶z- kH<ٹk1 4HOTlw?;22$֮ ,2+G_[s7OQ:yJi G{x40p<.;p6k1,"WwDGҹ[aҽn /?qQhIh56< K4O}\qJJr2~,!vnLk%]:u|!ܜ?]y2bq&k*) +4B5_D;QVr\~6Z2w_E;~f#5B c]Gl,AVnEDdP|<o=T]ղwm?qҴQHl<ˑ7Wuhm ۳1[<YlXÆaΘ-/`7٢ӵCC1w 0G"L8e-c]}sAHD6>sĦp54hl#z$-%LJa3ufMYMDԮm}l7Cс][0[˓˖d`L]>eMОQ^1rT5z\d5ԭCZj֮[O׮]͛陖Z/r{lzKy׿mLnReS!V c 2[e~rB!B]XXW_=&N?|>p6LHHy#{iÇFWj&>0M_~2gg'%r/(_F^Ge&Vm;V[\y /^;pzPt$jFR]^|&eVi=xʪv* nH1 ƎBڨ1wBggyof|s,@rU}cDUEKdidT5*,2UIhVudϘW-вU[ZVM ^ZRL~Kl[5gOUM#(*+B)5M&3&<Vx*\6&CTTu5?q#< jی^˭ow 27=3== KàV}xṽSfsݷ'9rPtjmVuUw8Of&džAf1@|밚MA~c˧,,H'pu So~ST^[?ؿ2Z1zXZU$y-dT1ϧ8hdu8ןmoN4t;ZgG?eZUGs)~}{c#=RjuGq]b %RJj`KV<:f虖ʊ) `ڵח UP `ٲeX,z_~CX,[~}]R/$^z36){ʍ*=B!FXX'Mbr޿[O@コ :8 uaL͟nEmkk Ab2[xرy&䵉WLxTt*3vql%RB(b\^E!" ]8ݾ ƾcp~5kV|>] kyhh(⏤Dt'}uS'n/w>@wkhhckX(kwmgr`㺝O^r+#:߽Êݛ fR,#%.'ֵ/%B:Ͽ (NJ6x9c u9L{WfV=w;Wҹl<(D1!FWN@=t⃙Nu[ aUH{#;_O| 6NId XAI{DG[i?h"͓ huZZNV5c~';O9)4x׺4OR_Pw}ЭsZ_D%6ӱ|%Rn?ہfX8su!դI"cp/W\!~J_N.5B!Bl6Ə?'0q„z_O7|EKw Wu^r%QS\T@˖ڙIHH0!8J)*)ѽe]kIL{:E3A1AŇp̴hem8;vdÆU7n2ϑ#GL4^={i{۰aC8m (tְ ^]˯~Ѽ5ܿعsg_i۶mϙL&4Mּ^/EEE4mZ}jnk.j7 z]8Cj&TUEun)=FQz Wz1g)]u^wڷIiњР7B:& us8#6$ >'Q[I:,!U6EUوѾ^݇ImJ"U=حNWC+iuUu훃%q s`gw'~"Zabk0 [@eDZ/m-,v:]?#7>m=C!B fc ̛7x}ߥK*IIm{XUU"".G ~nǎqtX/W%#)))UI#w$;ĄZ5ٿmvN'{k,^j+W{!((2Gi>3l6bOˑ#GO[6CG1bS\Ok=*ǥ̬\xRRR0 GFF }';k&u.}F֠*arH|M~% !Å|+B!_s]4fRe&c^Trt kO>a۶mŝdffǸqcu̝;t,\=zs汌7o_Kh+B2dHϛfڴiêU(..n2х߿={bZq9OwL%\]͎FLe>GJ_&B!BbovL%%%@մiSNnJvvyMh*BΝܹsq_O?Ė-[]Z_߿?=z8IIIqxg( vzy4i€غu+n^a`ׯ  ;k&u.:&i~3 C|bbkLB!B!Χ1ە*34OJ&.E!D_6޽{LjjrB522뮻y^i躎|jCQouW:rf/ٞ\B±RZ\bAS:1 ]q{VyBޯ$6!B!BO}c+U8JKoX d\$!BURTHn1Ɩ)Uو#$¸iK!~ ZW!Bq!5Jc+IX!B!B!B!.jc B!B!B!~HX!B!B!B!.B!B!B!B$B!B!B!"! `!B!B!B!H1!*ڵxB!B!DuZbhHw$6Ic! xvVSjH~֯ZJL|bc!B!B=Rt34.igB;%%B!B!B!b! `!B!B!B!HHX!B!B!B!.B!B!B!B$B!B!B!"! `!B!B!B!HHX!B!B!B!.B!B!B!BBQUd|KhRn ??_}2գG5vHB!B!p{l6( B(SXc| [5gl v}B ss}ӟ"..ܙN)֯Ƃ4P$B!B!5SY!<^=6QFL!.~:n|VF]kT3:Y_bJrss C*Ane<{{lذa:/>ʨQ#>݇־~Qn'Cqg[CrX[[<'UU`СܾOMsԓu{V;}7e5Js=L8}C=Ę1c:thB!BqgFU/]SeZړuxvZV^뤦2`@:/wibl(OXa$ۉ=9M?5ӟÙiӆB6mOM $ K/feie38@RR*%| zQ=*NK.p8p4ZBkSJ ֢]ӯV#VHoa0[Nٷ_6pUbyѣ*O=D_?{/r]55tè9=V;( pB.Xt)iiig|l6 ,H>/Z#GV{3l/!B!1Z6 `_~iK8N'&Ί+=z4ϗ^sN|8CBCBj!Cyk+pxjvv!. qكǦT{,bwRwj{ 7S`9Lzz+\.=J\\AAAվpKbb"&SBr333>Ǐ 66?n\{ !++D,9bz9|0XV喙T0 -Z 7V(@"aR5PuĪ2{ZOtbYU3Z/8.Æݕytow]J@9%KhO|p rB!B!6 Nj_cθ''6mukLgKxxü(.˅;̽ǚ&66"j:uBnfZj+Yan7UUuq7&G8t&Of׮vj~333?` 2Go",4Y g/F > UU)((_bĈ.sΝL4cǎt:ݻ7yėֳW\~9s?ǿ{g}OսfРծfh"ȑ#*NJ-fq(Y0k*&͟5'TLI}gfoS'y{LҰ=Ʒ}|ws!/\.dytIs]f ӦMtp8?>\r d񤧧LTT}))5+}vƏѣGq:ۗ?g9tSLaRXXȏ?H.]θ9!B!%mloiwu%Z`֬wy*y'8i3fZfkÆ z bΝ/V#zN|s_5W&&&~iӦ~z&L@^Xx11c &LիϹ\]7n\r /.QF6a?-[r1, Ǐ?~ ܳ`s+B!B4oBdR^*?SZ-k۶m8N?}ظi]z)^Pa/\M?^oq7~˒ŋfƌ?<6@ B\.6W eW|O RTXȡÇCLL ׯ'}.JKJc͚pBΟv IDATm۷+V2'cvE9wl޲i?=~{Ν;q;wd |{z]߿W0sL9a\s5L DGGW].7哯*&LkL6qoL,[^a֭|Z<\=f tbҩSV :{D;@tTiEVZRzs鮩wV_s?0 B#Yw-2X!3qDBCiݦ aΝ ~xn:|~Fz$n喊iZ 2mBBBDUU*իg<`_:lYkT^}5z4]v=MN&UUI&d9r|YYY؉֢Es\.۫I&~O_#3ډUFN{:tN+bۗ, ԒgΤGj*o~zyjq}JEyztL`ɋtw zW>v|ddd*'O2>}(((8g833*ClENNi Oay?{wU?~5:l*;i-/Q}OJ4M++l1rɥr-\e` gy$s=}=^s_^Чw!Ɩx> P?qu\ BQY*r6}8{dmv+ʵkQ˯͛ Dsrr̾%3~j{nZT*tFffW[(cuز'j( "t:cǍgeΝl߾{dx CdCFF18MS-?p8Uv[p3#WRJkO o n| |P'JLbo#_?Μ=Kk|w)FNwzPܳ^Eؾ}E $;;t7oٹ㠠 rrr]\aZ!B!`tupŁӷpH^0^f`Mid ]@K꼻yonˊ=A !*W7g7~e(ضb5g.%V<NGJJ2""ظaqy&ԩSNCAFFqWW~Xww<醄4Cƛdgpu.$Ν;oQPPoxa7W)[`֬٨y,]K/fM|-[ۅ+WBӽ%iB͙; h<~2o^-SY j|Ⱦw^qww7~ȐJ[B(pT(pƝ,|r pΧP/hd nƽ&V?wAw48^'/=.s~SJOg)OnL\)zOB׮]2e=VmJ-5kPx_3g6ޗV\W_}e̯K.{zz:gpOjyx>՜ُKsqV?Ca[»Ng%lݺ={xL:WWW7onlPU|"[Ymy.مsO2{Kh1t~5O8 VW8F2v:wBz1 <̬L͟._cRAC¶I̪+;vO=4l߭ZɄ iԸA5 ][&LH ֭[tЁ-[6Y 0~g K?ٙAf}chm݊CKnT$&$_{0T*Y2FņQմlт_,)3,ƛR-wB܏~rs;*8)8)?+;64yYddᎫks0ߝUI@vj sپY-Jŗ_.ԩӴo_usW Jf:itJtz|}sѫK70}g3t k֬a̘1Sn]ԩ;v참RgݺujZjŲeˌܹlƎB`Ռ9ԩCrr2{{B!BQ~z٦/jwx{kk[6LP<"??ӧ3axz=Cu6z=F_{/Mdߟk*={B%pF uf1M{)wy. ~~~'hnn#ףӨa#"/G2}u<=+^*fVT{B`Up޿'a-8y$.^ߟ-sc?Fݺu-!DٔJ%/ W=d;1u=Re>;tW^ƍ}q>Q5tz%~<0`0-l-0S-B!BqXim3/]BP࿋f̞=0;gNu ύ}owҤ !MIX!!B!BB>nά߹{Gez}L!ytB!B!B!Ɂ0dDB!B!B!B! ,B!B!B!,B!B!B!,B!B!B!,B!B!B!,B!B!B!8Tw,TP@rB|5G"+v*>5;BfmRoUs$B!B!߇3Ty.#@ CqJ -B!B!B!B!Onn.qqq 6mj״cS)ȿc# ݻiƤr)6jgLV^6 x\.huZN_?G渹u;LwVVÙ?pP:P˫&jUK,OY#-WKwYil9T‚BycU}Ш aFhaoU8>KB!B<8B:|0fbڵlܸr@}'9pQ70 6``ϋOK0~|?/p;;|M IVǻ.]7'೭_ruN^;߽ku;18 y?#)#ٸ>RbLu66/'ɝ~Kn kOVFkʬbh55l܍ l<ҁΰy u,ƪYs`oyIד1HoУjLGr;'7gni H˺.Nw9֝!]`t+VƿqFS7u˧.v}\ihtZ2s3Adt\.Q逓xyC]FaŀL 5Jm֔\֔ߒ T=3r3qurũ칻:?a~ۺ\vAY{mKU[[Ptpu/nZmzY_=/sY5*z|lB!BWS W|v<J?{L66.p3~xڵkxW4hz*g!ASF2E\'_omX#􀫛 9s14 -sǷTLg~c\Pǟw{OUR-J%.n@pӳ7CӐfxgӪ'į'wE:08Ύw 1Ms)ғ0`` 3S"[ds*_YEp 3R߽} U8;810z8sOr zx3sT\TVzzjk׏a5}iV]3siװ c.Z7IOx;;Eۿ"&9BALzrXM)@PPqsQqv[.}llXx?|?Ajv{ko.~koɇ?-z 2r3x8:89n'ϋRgɧcv8`4w\(Qˬ;DT~[5[*Tb4Rnf|!Rk\Ěg)sl>~7~8.ndt< as痹gIߚc !B!_U7oI(=}:w[ٍ{GpCe,B"o$Ҹa]"z\ qW@=L_w-:-{п>_XFөI{x=wGn,-zd`cÇf0 ( ~L:Ϸ}ɆÛ 5ܪ2o;~q$'@O#t@jq{ko.~koϼaЬno~_IӀƌNN~.Sv(MtrLRmo , Z?nKWm-of{Ki~ZbM9^Fg1- BrVq닭K痹g\|֤oMRB!⯪R?} g~аMW[>v9gY< j)s$O^)))deeUx{! 5=xcn ez|Wo߿DАO#pwq~`<pxsk<֮/coXl]K6!!=AT*RJ-MPפo0u{w&<67 ?k%|aښYͻk'G'4"1c7zKnLugӻeҲoFۆ9{;9:v Qzp*;Op(l#(^f{s\mi9ōa8uĺKʪl_ZʟFLJ,,@wж6<ǧ,Og/gXw 8̹[?W֔ߚ졢/B!EWΠ^ꊣiMV&xe p1&OL^^lڴ޽ aĈDFFReݺuZŋ1bѵkWV\Iڵ+B0/y9y7%edQ7SK-hn:Sӣp~BkpqrNy@e1OMjvKtj NV_iߒ-Fc#wY=0YkfsN=5-9-jycV7ZwQ/ ɿz=i(PMcuOwHN^.l\7˜~ƹ^5ݫIIBz^*O7ǚ͕VY0P] 7/s3Jka+o—CTͿ/)-*Cjm.u[?W֔ߚ졢/B!DyeEI@yrN1V6eҀ‘vnG\挟0%UwJKKy#={0dmu'z7ndŊf7>|8Çjɼ9sZ(w'E?w/l3앁m۶qQjժŻɓ9uFSN۷fϞͨQ8z4p8ÇӻwoϟO~~>O>$&M￷Bw([IiTb٭$cҷu/ccoizSA8{BQjLu˗PífJli{ W6aA%_.sl0[{çRRY喸:R͋0W#8w7z foGڴ#PBf]s I&.=ⷅ' df^337xW &֖-軴tcRzN:A5˝i,mot~ZS?maQEgÛRZm=+r~YkǧB!?>3My (ku1Z$#ΓCY0)"q Z뜎ӓFS _o*.ysi8dg7w (zWsnNOѼy뭷cΜ9\x WC^e5JumO> ϥo&LVZ.\` **0n8㉉aС;vK˗/s9O ӦMcƍ[B!t:h { 呟@X@\DFFF7&22%aKW۫&Ǯ; d0r|+<5r8!5+cYwljk!`A59q4:V IDATݸ@p9 %Oi $./OtyǷR-gM"p;'sZcn^לhR]&"iD%F A5qsVhl_1P/N7syt ZVI֖xxИOl`0p5-mot~ZS?maGg߸Tj\jօ ΰz8kً-5;_ׯ|jB!О ~&?_yM-Mnl7Oܼk4>;4> _ѲJɿ,ߢ̏(rsGVvʠP(G!"2Ҧ̎7tP=-5kNN5ۯYŇcVT鈎FT2zhtBzzѸ,qqqxxxsw듟OJJq !'Z3gҤI( vm՘Dn&]JhRϿԡ]~X8rklӇ,u 0s&4o@μ[ {x(ZSO1m,jyoƮ8`4 gv'nr|?ެ+?#x}fm?o\dTku|n9T(9p0A2udҞmqK^KIZv:Ot`<.bԮQtZkNFo7o@MO_jרUۗU~[տG?}K_M!7_MzpgԪ`M+sY*Dz/l< Ѻ~ ߖ[iM5'Gom]Jn~.#zoԆg<ɴճ&KhȪg-KĚ-5K鹙{v!B!jrpd-Zq ۜ_6q2)YYyzZޠ.9ʇxj@Kj`@PaP(}ىѷ8r-TJ P8Ynsf4ϣ2d"Oh1YޟB`@߭ZEvXd1s̱)=Ņ3' am:]p4<>l5>T5W82^P ؽ7.UbYf͘;w.< W^%$$FCTT͚5#-- ooRر#&M^(K 8@~P(en !vrI43ҹSh[a]\K*積q{y$u}KrIKY8deSǻFl[ 3Spqr)uxѿ|MYiC??%37Z5q*5ie/^¿C㒍!937gWøW&[oʯ7Q*X7ޭz3mʎ\|O{OsN'93OWRۖkU6K%>{sUvB!ʼʽͅ3'h޺wׯDx!DuiΑjͯd 7Z _2s.feq7Y00OO6 ]Y[Сy]v+c|K-]ιLR<;߰=~~t|]<׬9Y%-!3~ѰE [*޳gqq sJDC@7tPcï5yZp6V~`wzr45߿?WZ_;BBBڵ+SLpB^>44-Z0k, 3g_! :Yn.*T᭧ʃ: wjQo ֹop:j|S9].ܼIJWBF˽lj{լ_n_VO_?ǞxfjJAX;>ϊO=x.CXNgqL.~jlBEC^FPφT өu.W;鋇K;9^xjd&ǸhrFE_{JxU9j~=/Y!Buj!꺕 _gRҍJel>=eҗGX~1/\* љԊpS|rdHj*O4Ac꾥vͫtU)!!_~'Nmv3;tq6:I~NS8'O7$}!ڕSB5k3f ԭ[x:uĎ;,nT*yYnjVZlYᩅBQRn|uu!D9q4.NL{u^*OEP1jiP^Uu{x ˕hPckK::_czw6Da:UzLB!c|\J_`|:,(7ӓYY Zz\,P$֝|;+Ƹ|IvV4YlAnlB'))ԩS6<?lluSxY.xwT@K?_fG^9=z='<<%{We"|)s?///,X3gh۶7Y\^~Ux%~uk-Л,/j̖GCtPv4ip_>v"33[n7'N0ޟ۴iCDD7oDRY7X!B<8{D5j\![aT9#k\r: '?' o Wy~x [8Ju:5~Mnֻ.W1p018TڟB!B{Ry9N;Ocj18@V!}> I k zr 6u|:_+gmΣ@/SӨ| #@qoŊ \;v$ ?٪ӅTJ B!:B!B!z?, u9u~Œ6Vxq7Ae.g~?9?oƯt'W^iM<;/|+ï].D%gM+GLJLFwJ9,|ZcbyQc./\r)o1+[D&0B!Pjgr.\N|FG^%y !B!Bp@[\5T8ж3$]IsdAjł}-T8" Pլ]ڍ{slS?K_/džvK?7'TXCjw6' B!>СC$&&c-ZDAAAw-&'.ugN ׽zNŋ`B>Sjx{sqҫ!Bq?~<h4>CN:E׮]+%.rJEaٞ} u}+%?!B!B`zf@[e="Lc((H}JkiV6ЦhȀ/ˍ[`s^hUX?R;4[RFD_΂{W4 !|֬YC>}LzȞOrim&'_]i !B!BQ&GC4 mڔЦM풖4 !0Vʕ+ O>-PB[)B!B!iB!Pz5k,FAUB!B!Bx )))( T*T-|񄇇ӵkJT!<SN0{ݹ::vСC+,!B!B!6+V^i_;tݽi9l6*!9m4n޼IHHDFFT*yw3fٳG} !ggg̙Sa!B!B!*m޸q#@9gwoc8f*]'PÇg֬Y@k׮%<<777FanRR~~~h#00ggrKZpuu-B!B!B!BQ\qFV8hnNJ~V Wvyq-5O<-B`7Τ'TLL ?0-ZcǎmۖHm8@ӦM۷/l۶͸N:ƟO=z 44S]B!B!B!H\7/bK8yˋ qodӫfO>,Z|\\\5j:ub߾}8880{lFѣG;vڵktRFMTTnnnhZ q]V@1bƍԩSQ޸9|4Z} ''(8q㈏'&&Cr1RRRۼԮ]cǢٷoż.^HTTSL@R/W8v!B!B!B!j\Ě9s:=hr?Z.gOי6jם<7gձB58s JѣGӥKӍC9)J֭K||ż]@@ձ !B!*&'OMlZ ;!B!BJQm Çgh-lNywҺc5PPt#/W<}} t:۷o7;/o\\xcCJBV'''Ljj*ԨQk׮+^!Vƒ7͛7ѱ58AP:,[^ϑ+縙z ow/ͣMGŕ>\\Kmi۠Y%EvꃿҪ^55]66}3KkIμMOmjGubS9r,.* 넛ky(Sh =:6K4:-G]渻LZojakSU 36,[665;kյiw( zEH@}viWT jJ$7lNVñtFMH?*17"S(Podu4S"E7[)ߟ160>-mHtr-Cv/ujB )tlRi7b5Ҳ3L9A>>*Ntk_&zh-ǧ<B!|ehkwp`&:=(AqwS~pwswwGrS8&;4u:yyydddp1 Ǚ;w.!!!tڕ)SPPP>P8xI:_~%iii|zz @-زe jӴiS:uɓ&**O>ʽ'Bڵk9wٳ Wi]K d&OS@\ZdZҒ}(l­N[\!93:"0nx~>fu|omY/۾Co0;a. ջ ַ72拷|+_Og3˵|-v! )l?s5*?A `˲< rL[[w_8{_iUVsYs~>+k2s?⸚p(nYo3?ü3bkeWzN&.4"< }@Fn.dݡpd_$'˼sÕ[ڴCϔ;βmcUkʷ M_hՄt'v+]HjvUYr\#;C3e<2R9 ; ~r|~ !B!ʧ{@3Ł_~Ǡǩ]dP۷pΐcطKLsܹ̝;WWWӧOQ? k֬a̘1vԩ;v0ӯ_?ڶmJ"!!+WQKf̙<zFѣG3իѣ˵/B[K1zyqva']`-8(<ò=?2@[}g{vdXvN&*'2{ h=*`0y/7\Lqҡu4Z-s2c{ @oS`on89hmӹ=X?T0lvg:7x24:-b =zWESxty^Wo{8BNC]LuoU>/ߥWX'FK-ͦ_ǮY~f[7C6NoVȕs| LR %+9?~ETM(h&fL-0cb]ULyjt Pd IDATϽ~=6M2~uIӣ5'OM/ҡK^֟-_UTzR,O.k|Tjyyx b犣ZfmeR'BT+J|wkIxˊ.]0e{2IOOg۶mƟsssh4sϞ=<^!&ilݏBBDPR}Vui?e///{T*7/}gg2uׯ_OTT7&""yj*,BXիDEE/:tXNc`4x<ܬm3sIΠ~2ו5fS ;/_{ҥPq5!_RɤsYwh{&fԩ66n;};3>fh܄4\ "o[?G[7Mb3 5OuE#I?L=s^IaQ*[:鎽iP; }S|{#'̶nZb, M癴f l?U("IqĤܢgu^//^:ltUkiI4>N 5;J⳴@חY|mPowO\>Kϰaz΋$l~|4r oZ?{gѝgOK2\|֜WaPތ4Q]:C~}Op=)4_7k\fe_{ߩqK=8.h5gl=xAoN2o/%߲ݛXo3z=fdϧ^Kw@jV:9fgړmgcEXCBQU] >W(8(n\G%\TsrrHJJח5N+V%66:uXtRHIIOOF%jc* ufuT^pp*{Q˖-9~8.\N:l߾]sBQz=VJB/^F| O;Hc o09-@U18N˩?p W7~#WΚ4oʣm˩}='?¦ܹZaKջEgPLL#*1G@ZW%dlԘ[C+93 /7wL{GYڿz %3P(sƆ%?2kV-|+ɉ%;=u}(iZEMY{=Õ[78s(T(K"Y~aKת?Bϰk 8q =~Oy0%5D:e?ΎNĤ$p). &_i=`"+/g'gL^ߠjs-p-&QI psq%:95 lxknggp%(k_*7cJB_Kƀ ^{2}d~</d UO"Чqu5cMBxذ!mo€aT*9| o<J I T./?z5B!8:6* J%b JDZ1i$V\IPP 6 2qD.^ĉ2e c8qT*1 9$]/3bgãmz#^#%Q*\W+W(R Bh&cɒ%"yx̜6gf}c}255ѣGEEE̛7hǿSnݺI&QQQAYY-_~B.ǏȄ] kӦ o.B+\˖-iٲ%ٳgvZ l PG֌׶m}:ShRԭBxY;ƒB+)ۯKlk۝#Y'&L\k6Rk>զ$ ebpZ2˷uqb ޣMBFY+_/Ǘh;?5 ymlEsk9q.s^e#<8(ֵ|o*U_p @xҿW~L_{hAF5@RxWvDH3PKy((e@d^keÝզ:ž_ehklkڹI3ߠF0ӧu0 k̐}vW7mKh׳ (떮ymluCm}`*1|a9_o\=9;ǗQ2P ˊyq[^c{ TI;?J w?7u<)_è8=[v"EGMV*yyܲ#o,V}͋wO[D5%޹BohPT*kW}ѿ_}1w\;FXXSN0sL6nȔ)S9r$`%W^ۥi4mٛfFE~1czn &_:,ʕ+ٲe QQQ L4۷]?!ĕ !T*9s4Q~4N`C$Pc{.JEAi1a5j_AٹoaY1"> hvn]R@Jnq>!g.q{T*`!b4FjJ m|wf^CkN_s2h|60tLK}Wzsk{CO:v#U>W׷Q_j}d6r#4& iYύ΋ N3ApFc*(#s7oO#[w_ώkl¸G瑽,M$__cP]0`5n%˝Oڲ&s7W}4PvQ4GFO~>^(u.O!/~>Z4TU}i55ƶ́;t:,^aÆIRRطo7o (('2ve8dݬ[Z??y~nܸqDEY1bEmDqy|B!q!,ge޽$&^ZSVA,K*|h7>Z-fuu*_l[vi*Lf?Dإ/wmd{z*wGqUb3l\b!RώDu7< JTf1Irۜou2ҡQ͑TAZTV塞2ԭ*k\?l6U˦;MWENƆrj4j fOxDIEJX((L_7j}h4zC%KޝEgPm[f`sKХI]l{½T_}0gZmO GׇkAxVXLV]T dhEV_W8;ڮGOgϟu֙6miy󧊫ǺoOΏ?!BH}vNm g+f] j5j|I&̝;ٳgG޽ٸq333@;7 GÆ APPfiԨzէֵ62X!F{JJJ;ҷo^/7v|*0~_3 ((K6~ F$e?u&DFm7ݪ~ܒJKQլ޽& ud /}?/Z6[h\U|3kuUZ_l< M춹j*Ӷ3n(]a.:]xhL-n8zwM]lھff>muP L~( xZ>%o>govh ^UTFW/}?JyvI-jLq팫 Я]wf5mwڧSnK>fۏ "$_aջ7rzbâ-)s6<;]9><)Jbxf{w j肸*/m[}?T=}ofܧ/ݹcɶT?r$ }hԜ;z ?d=,F*n*J_ßvހ=v?l[m=[|;),+e|cx[etvXkwm{oyqV$_ϔfXO`%<>nw0<*3zc%Sh$ןrfGxG4`ʭFxSc]gW%׿Bq!%?QhNՠU[[cȑ9BMƈ#8}4@qqqS^^N@ui)kOII Yg9y$Bm};S,/w!BoMiҢ{wom#I\]SQQAhh(n(Z9QQ'0`Bu5g9\>zDצm)7 qlb'xjթ^o6Q<> v^iĉ3/37j]_Y9qwӇ_7X,2 [TBKo-b3[,d"bB#{J-. L쵩Y>Df~Q'+/*+Z C%9y$D4p83rw|k$ESW(('P]PvuuU_lc2.h6fۚ& rM^?>ѿ/K !cLMJ㪎W{XT[}o3Mn{I,mU=z4T*J׮]y饗۷;瞣gJJJڵ+&Ll6ӡCn~m*++[ a̛7^oFuּz>|-Z`0߻B]d h!JLttߋ׏F t qt?__HCYc=:⭿p,;^-'Lè8w@Zfz۝ot& ޾UT$F8 Jz *,@Yj+su[Y̟ YgWd{`gd1}溝NPiVoe.NÜCௗs~:A[g5YV`mJ'_xm()9;E۶w *)e&)2ߌ|[XouhZC 8lנϑd,y鄬ARA_IQ+;?߿XwO0*Σ] {*qϞ h"ck VѠ/xN;x߳ ^^O'!^ՠQW{5gkPjܟ:==;ҬY3:uĽˇ~h[:詧bt:n6j5| 3gΤylْ^z9L_V_|r3fYz5?"`!F{bٿ,wδMj^V}J4ziGYgK 掞7|hkh wU}aOq0.OQg4 Ю?oȳӌt]oO͵X:߿|gij9Jڵܠ \wm!r~ mW a4)ke`#uVi •KZBp9B(sYt%__!u'`!B!B\*$,?qY>hZ?QO(ˡؾ!L=f-fVм;3URyB|5*޹3'%5W/B#`Scn!nJLL  :~1Ĉ޽)BtAB!B!B\i. &5&*?;?;ǘ &Un[)gGM+iSw!6Dڴrsn-b*Ml(_ԵĥõtJ_ 1旙)*}BMŬ_A]>i05HiK?eC B!B!B\).&&s͡tdPiR k痝KZoQVC\)nmAcG5UZ(8_F|5*BujK폏 Pnx_!-[u]N fiȐN}(՗_B!B!Jqk֩WqE&jx|A]W:_j1! & Lӹ. !tQN8AϞ=IOO̙3TVVNFh4^38 ~m{J"B!B!JqLmVc6Þ혫J-EYNNɟb2\*37⣱*Fs <'{@Uqk@j/"2e0 eժUZ}QZZʪU0 wӘD[0X!B!B!w]6# &5&?Oot=?r~zびS@=xjYY5!bR3 <Ǜ-|^68m}fɎR,Kyp~Kn0q_d!B-[ҲeK-[qFƍwK%B!Bq9|0Xp!'N[nZ?0w^KSu&f |0 rv}`Fi:edf^Uh⑯"*HÙsk;'_2pGYĄh-5v~ni#<5jN4\X!B!B!=y}nz[zɓ IDATSPPʕ+mܚaMIk׮eB\.:䭩*k2TBX㳊Ln_Xn\F !8zA=.v1B!B!jJB>7hRd8ZFáRii)DDDj{_c2h4r)bcckl5kVyF233G=q;i!DudVQzN׀EŹpB!B!B!.wZ5VPը5V]%"'LyHLLӌ9ǓB\.B!B!B!}mZ50ը8ϑ&M0w\{=z!uƛoI޽k?33t: '::s@@L&FB+B!B!BqIK~n f5VўYA{vtOB#Gdȑ2m4Fӧ#??rlSFy![&\r!W]/vlN8AII{mڴAR98;vK#120M:Ůhې]?v?HQy * 8 & `2n:7nC/)Cnh/SoW<޸3tlԚ;{P/\, M c`dtQ>ЩqR..v)W9ޠU| \2kB!.6JSՈ_UUaɓ'X,4lؐPG}bARKjjm֭[ӪU+z-{9ʘ1cGu;?y;vGi !LXBqZv-+V`Æ n̑Lq>J2<>u>ʢYl:nMvYlk߰$e-ﭘN׽[(ӗ̂ Ƕ}[fT LF|o-CɃInѡXկ>{9|8rk>('SQp~X,^X8?.5u!B\|ќ:WRq?Nǎi֬:u{?= SO`t:vjO>3gҼysZlI^5뮤 F#EDy6qTYT*  qtֿ?ROZJ/gי2}Z_EkYoP!BQ5A &Lf&>k9s 'Nh4ҨQ#|}}_YŔ[/vQB+dzJ^7t#bx):skuu:ߢh31 .}n JHnّI7ݏ_/6W%6cCv2sX|K$E(Sfm2Kjlwob/ )ӗs[AsYv\mwUNfPXVLAOVyrh.zIO:6jeWY?dF{[:)7_}vtcg=Ϸ߱us`w~y[ M ;j}7xr, [LRF}n\_y zxr7x22uhj=C6~D^~ίyz};+qWLWR;r-:h*ZivvUrB!O 5 Mćmڴ !g.dtw@B!2Wf͚5fO~-ZlbhkhטiK?eHkӺcCtAtlԊ)_}ڴ-mF?5'eCv>{{Fݚcd!Qi4Xwwhd1|ox#KjNva~>>icBu),+c3C/u[`klnjZ5bmQ#|磴oXsO[)  xb%knd6NylCV;sMmWg2Twf~ȟ4I`4?#ݶUf>Vxpٻa]ߓiK?單0ҳN1vt<>ܟ\UNGhyb4#־ίyz};+qƓWs rO35vﻺϗ, Z< yen8fJ4oogW%!B!^|h^4=%a8~4qD֭[wɦ'B\jK/ċ/]wŚ5kؿwOߛyt]h+#MĴ'1@s`!3?gMjNF^6?U;7XNfN4 Tl۶,e2#gLfծ L)_'ΜfF:&h,_m/(-{ҩg)ʣGyd/`2h*t@Az.Թ}Ö: sͫ1MǗQ2P ˊyq[^c0ӧu0 M%D4pFfmܱ7A$F؂ϩN]5Lk3PKy((e@d^4jP[PT+CmWY JX,v뚆WRNа%_nXNIEl9ZL:xiy]w:IF^6a`(n?|4t]IUhl]ڿs JB^wί7IRzfbL\_u=U1jٟU*&e߮8!B!8{AϿZcF{Eh,fΔb6i{)~c[ 222[N:ETTS͚5F#ǣQ7B!RkW!P]0`5vT*Xj_PZֱx/fg5u c-6)A:޼^ky@ N#:zb6ڮBPTyv$\AiAn4IU;7пmm=?hW}|>;֓m7NIh4v2"hߨp skNy.v6pVXVl 8Ʈ)>?WWTEvOuT Kj n:Lݸ?y%{#B!PMh_E >LptgtW mD\՟g܄^`z[:~Mdj׮'Oe˖ 08V\pX6nh{m6^7jԈkVZ1uTlۓsOk׎SҪU+zItt4k׮UB!ĥFדǏ'---Z\l+{j b #(R5yU*kpjޣ٧x`qt<Ӷ7T瑽4m<fhqt<~LMx`]aŘ!K>~Pv%GS,߾Ζ/{6ӭY[U*&6g$M۲pOK^w9ƒOܪWelOOm+i(<@nq)nN;]~ڵcf:犫oAxr2{[y(>?oe &~ڵ;ض)9[K*enO\xoݟUJ8!B!8ªt$&IUxx|x^z#>^:ǻ,[[ͬY3f t54/, Fu/ѣy:u* <ض/8^X-[ /Iؾ}ӶB!.Ec4CsuqUW]9[|;),+e|cx>Яmú#T=}ofܧ/ =[vj/}?JyvI-Z/[R@Iy)jջ7$: ݛXc=aQйq)%3 [\?MYj+7a=<}1Ҋr4i-]UGS/?I ,f>^m So퓑EèXҮ^bâ7 &m=}krˎ}"(,+i\8kJâqh55ק犫E3cXfTmJίyz}+)]Ja|޸?yYJ8!B!TvX|aÆٽ1b 7oE^a='eA[nرc4i`Uΐ!Cڵ+&L **%KЧORRR8p ٳ.]PRRuj￟nݚI&1vXC׮]NW&B(kMZO!vvV:~l6SXXd"<<ƽѤr "TPX^BBDWbX*8_|^/'0]pNLIEEĄFq{3JUϿ479Ž3_#) lb'xjթ}_<"yJx3ȣ?%l"3?(|˥p}R^YALhͿüq~=뛧/,+f'4sż?튒B!.Mrٷ36ؽwP B!.]\Eڻ@I E6Ú??ksIw>ZLL'ŝ=oSwݷ__[tp%h"bjVן_ԩ\ gH@ !8\kZ>/\=(!B!p܎\5QFܯK N9Q=n&ח;x&Zs#_nȰlXt K999ͥPGqY'!B!KV۾8 Z1VkO71^t0݁ҩS'ZjĉO }^z иqcZjErroǏ7wMpk !B!B!B\H?07oB\MN g<É3q݀嘌Cl^؋䫝4p@z-;FÆ V/gNtjb岖]WUxZsg8?e 6A~i!MY;*JR%12d䒑M&m0l9΍B={":4w\?BwJB>7iRd8ZFá~ii)DDDj{_c2j9}4111-F#'O$>>??? [V#33xZE!!5*/l4ף/)h`[j7Hbbخ];rssYlEEEZok !}]֯_OVV֭#;(r$$)8SO,ө40nt`'zޣrz9{O"(wֲ9T ܞ<:bwX2򲙽;f-E<}^[<۶J^_erY>ض۷k*MX/}/aA#u:~|7,IY{+sǻu>qiWZ>gP+?޾ !Z5>UW k4j?n! &-B-x?~<?D~&NH||<A3.Hܮ];ӤIz͛i׮'Oe˖ 08V\i;&667^o۶(޺u+ZgϞDGGv:_S/$ 7VX IGNӡuBp^*U6mx7Bq9Yr%5bȑ.(#24j5dŌ={mfz`4_ZDtHxvX,h(՗c2 q2:*]>ʟVRgWZ>Oe  ZKb_Zb!2(N:?}j~$$xkuL[ ]ֶ:y^!(q IDATNhV~ί__ןU7οLFr  1:76,JAQ]ܱ7.ӯmw * ԩ|>6g pު{?qJyez`:yr*GB!.|5T*jUѨպ޷osرcaX8u3gdƍL2#Gڎ3f oZBǼy3f `|rRRRl6c4Yl[n%::Yf1ftFe'- F!W\ɖ-[^`ҤIl߾6BK8PBfTQ^^N~%ϖ{N_Z~%$O"/f5>).gl;J<8:eS_NPZBo40or*7²b zz̓GcugpܷEҷ<6nfL^{*QqvZJs`^~ίy)i_WxZ떰 tCk}E%]_Ьh68:WF>χ*g쯣}ndtR_Ox;xz}p 4mHʑL&Z5;Eo?JJ8ӌq&}Okkc!rECH_嫴 ZFO%(]tTTTxb Fdd$IIINС)))QRRBLL 7oƍGLsjC=Dtt4cǎg~c:2n8(#FcX.ȃBKL/B\ 1ڵ;wb4㮻@?-d615k̴25iE:bP^lfn[x{]6m_gW.lR-]lljg=ϚֵiK?單0ҳN1vthvр kf1wa<,foxvI.Sǫ;*y'aï׈`ο^cXPT|e5-1kPWwU͘tT/C:n1ZQ#|磴o|>?Gs2ht:aܧ/SRQn~ίy)i_W;xݚcd!Qi4Xww~u1m`Bu9:}/VyjÏ<8vE9|>]=31Fg{PCiFwUk28|_>&j sM+o(_]_%?(I4mȚ=빦BqI}۪Z?iҤ s㡇[nݻ- wqgQYYI^ 5̥Z&!!L\* @ee%&IBB! ֩2e O?4&^׊<:.u4I䕑ѩqz˯>\ߡ'>j}h،kf呞u\@ӘD%5WPZ̟0SSGykvl[>N  2802uևgN?#D|D|k0~4[ _Ǡ|hgwS}_=>^2O=n[uf ޭ J:^ןU7οMjNF^6?U;7XNݿdͲu<ˌ1U60VFl_? C;eˡ^+{`p4~i? ?_|4Zwn(_]_W?(7u;`4XwQBKMsK2z>#}_\IWbўSȑ#?keĈs[W,]͛73}t}Y6mj7}3P(܌ L⬳fiɩqB(%!W ??ݻwGVVС;w< ׶m}<3^*Gpv=ׂ"B[tn Jd60PKy((e@d^i)(-bحB^I zI}@ &#A)5k^[<;{`{ϧҔbJ:^ןU7οOa.;O.`00Lnՙn2coIq{z_X`y޻riky3Ox;xrr/)R|lm{(_]_W?(׵W~ɑde vk !RRi> :>y$ J߾}裏lS)ƒj_Vc2!((Tzy͞=￟>3f3}Zj۶-˖-cРA3cCC3vX(HX!sQքUT*ĭR>%eD*:6eŶ/aKi|v&p4'\؝/WS!㉛>9m7oǵWusyl.JEAi1gG+uOƨUj2i߰%$p*/}['4!ןG?3W+lFI\gãŏg{%"@@A@Tzb9=ן*E@x=L=i)Gd &!Ah{wٙ̚t{?SkmYaY g>0L,]jEG(Jֺ?kay 1I&V^-)ʷ[2V-1gXs5S~}·w Ѧ?Tz`.cwurfXl?Y+NQ~u*PT8;XL0???b ?e3z}˪*Hx8ilm_CߌJZ[y0.fO99:օbu wvnݓX:K(lw˯)񆟝f_jPs!贔Vwm嵩_c^ADr2wXpqr!BJ4ksuUjtz?%_H7G?~JGZNv{z,ʣkmkJ:@VNcg~ӐǚjSeczfG>Χp_e(B x:** *J)--){Myy96m?ٳlڴ$>S6nh&55 &)k:u-[ iwuرT^y _~%[liN:(/7GTT<3^Z>?$Gz u'eɖO{_R9+ߑۊBʀν̾~3yiLy/ z$k?7|_=%̾g|я?uq7˷?SQ]EpcVHώmX~&gM{j_k˷zw/f~EO+z_N,36}Lie9?39vs(Bp  L~߹ ZzRT^ʃkHG`lZ[Þߜc_ =ڐks?T#GB[G-oڷoo5Z\]v7ٙΝq B멒+zi8BBJQڇ"dRݣZb///'ߠ¦q4jr/o : r iS߸-,@8;:Y;>{_VUA~I!^J50wW3kۯ2[[BO(db c3տǷ|k/./6~6{~%̹WQQ]IOKĠRgMoM{?2cXvNoc/ߜNc{ϲh,#B? I>O޵]83ôiײπR)GeX!pc'Bqs]GO؅|ci'|B?7B?ģ `!B!B4~"[SpqrfdW!Bۈ<X!B!M6ujO!!B!|S!B!B!B!$dX!B!B!B!$d h!6h8sLkήe'2^6wc@(º4kr2ɸ]똣ځ{٥8AS[HGSt]Gv]=tͭ^ ߦn: wWFE i7KcVT_RQƑs[\@Dw<+8蹳SOiߩ 8yVUlN϶:!B!0J&BPee%qqq]rnݺm\v:C? /<:NͬJKgL~8n^:94 w2k_EyrtumQ>K~JOK??OZiڿknwE*+-Ɯjpru)_=CX@ڶj [qsv5͇w5x%Nc7Sr#n.<;bnÃFZ|S!BqkBې?gV믿N>}VfdV/>~&j9b[RꤗUUv]t`_cL( x{lQO{b 4^GEuUs ʊqsr٥޼e3s8"]dNi/) /#1L)ߵzTS6n~^?2YcZ֔( e>֯G_}@Uj ?%c̺~`DoяgO fGLi?0WR[cT|oƧoۚ~dD~bB!BؒL !'NNXXʘ=ċxev4ڴ,b)Z eL~gsLdLN\:ê7QVUA+|ȋxWRȒ͟p>'*: tuYmt$.5\E'^%U򭉯*M5nAPڌl^CKT10}ӗ/RXV?8:82k6& ѫ]D>5'GM'v6ҿs/b> [~2>O~:fزщ#;pzߩ j<\uccߙwm5džM~fOҦbQ?c߬׿gYļ$vۨU*.5˨TVjXCnV7/3-<)*/5Z?ϭ57Sn?jlf\)(c1L4ƬLS㳩5:_Y{B!B{ `!Bpc,i:︛aUn(fwKM`z;sOZ_fow(vmX:1{f!3 ~s϶gU='+=IN]ɬݓ^"Wpۡ;C;wXM<0U_ )oBƽ$Tjm:p2| 䌳wvѸ:Pԧoxp9(p^ѣg^S}2s8gg0㗉9Dd4#"YZq]cN~!tzX7olӹu;|=8x:ִӉ ~'_֖[ſ jaTi9\ʻQ⏬\~c=_H4Z-'3Mgj|ƿ9AFC[}[{B!DtY^x6l١fѢE?!CX-jn4 .&<<%Kؽ̓'OrJ-[fL@~~>Ǐgذ ͜9zihdZjEllM5VYx1'<anFo͈14[s~;lz=ج|gG'.]dytz~f?ՖoJyU%l_f3֪`{\c'0@:􉓣als36>O{ߒo *8:8I[!M۷~.//G${aUVfΝ˔)SnJ1116}ql\prss kދܹ &裏;w.<vYت>QQQӻֵOc[Qcj3Ɵ9FWq?P^U'#u;?mT?a=x,u'ȖoFuӵM{fgѵְ\czWƧoI7m؞o!Bԯ]O*׺iZ222 ?α2rrrǧNF!33`ߨ+Wk+Qr_$$Fs$&&2|pFiZz둜̡CزeₗW/ ˗/O0`x衇W^tԉNSNY9])SpBΞ=[NPY^N.K4dꜫ(+u `!6HHH`ر7k+CR8 IDAT'F{نw";XMȘ*{1G? 7*²ZzL³4Cq۽&paoaY g>0L|^ɤO~2}j}_ZYDQya0~/˼iU7f͙PO_CKM`oQ_[ⱳg?թtdl7z=orc9ܿi ۏ2:]U !.S;ywj]b``VM3qDj50sZoӧ\oZx3NQi8z|' EQGsky:|^w},) ly;F0+f5ӞZA3[ +Bؑ#GիNNNMY"3{?f};q}u'eɖO{_R5jK0UrOvo߫E%tc K>`{1UVл}Wע:kvEX@tOvo+}ܽ9>PPйEeۻLϤ~#-OP7sT*/{=چmc?o+RZQZfG~ag̪2fӶUL17Uk˷=⣟ҵM{Zi;^|[_Za]kTS>&.טރyv#νZVLYUE<Iu~ubgi#q?;:a;{{׻k1-|)*/5M35>Ϳj=bߩio8nB!,fLFw"AޞLƚ?0G65$55d<'-bܹNee%7ofܸqlْZ̛7ϰsܹ_wpdd$񤦦RZZJ`` dΜ90m4>c/_Naa!_50Zs7|cX{-ho ,UVL4%K(zq F믿NUU֭cӦM>}$Ԭj}饗9r$_|ju5Ftt4F;v~Wڬ][}HOOo0޾L=Msmx_NIj8sb7g_`#Kppt=n?PjvչAA^:Ws.2prvW_{fm!x"B#xR^VZQ*+-48,URQ\ʻTieWZQ\R ˊUʥˊNkTZVwYhMEbM]Q2Gw7_s.]TV345JFXػ/f)Sލ~3B!XyDeEr|q3tz,:1CqڵKiժUcqqq>}(_|k*Wh%..ΐ[V~QQQʕ+ɓ'+ݻwW/^ʽޫL^WeY{]SVkB8 (sR^Z(|O*6jɃϺ_ߟIxEV !ꎧ{-ߚ<\pu3zNoFm V?g}\ mklz[)L0hLi\._ߦ4fhkv_`߶Uk&5Ԭuvo!B.+ťWK.ynݚ***psyOaS2uTͤIr!=36E֭sY[^gM.]DAA;]n:VXŋM)$$2 +kXnnnTT\߽'77d &/S cǎ&66ٳgVSZZJaa!@3ܱETTDsOvvv|{}I=.wy 処R\\k^ EquUh58:լ/+nշ#yB!¤(xBa[2%zEB!͈)\).j&OaFF٥K"""Xfr{mdӹt>>>  ^VtIє֯__tLbN׏{F@aa!fؽ{wn @EE0ŚL&>>5NNNL>z;w2{lCZDDݻw^%K0q:%c1mƛo`zsO||wZոZʕ+裏ԩ;w&::ϟ?O^ر#QQQ̚5˗ך>|8QQQDDDh"V\iX{;2|"##ٳ'='Osޔ)Sf֬Y޵ERW_q)֭ᆉkS1ꫬ[0"""߿2bM^1c ~Gj]7gvItt4۷7WլYPNJ[:6_eǎFidǎL>sr2OqįF>%wRT MBq[HI5wcΤrαNިsu:.]BҮ]; i]t7dȑѶm[<<<旝Mqq1;vwf~~>!!!ӽ{wꕟO~~>ڵIKK#$$ĬIgclW}7n'Of֬Y嗞[[dȒqMPP C !""SN{nx ˃ ^`̘Ưb'66[i&4GiQ.N.{...tլsjĬXo֭[1bœ-[e˖Lζy-jK.eʕgɳl-׌=۷nآ?'%%QFΛ6mՓ`o[(>fD v%B!m,##K.D.]֟5R iDK/_g.]\5y۷anoAG^lhn]_B!Bq;3f !!!6otЁ^x  ozhMr3m>α7xα'xfc~C `!6?L߾})))~ 2}q#Nc`(quv՝ Sljݳ^ij'G6O翆g ^<9C]F%*I suZQRQ֨ۊV-Ϝ/Hc5+"J l{/!B!X?{N6EBB۔#pʿ)ѣHx\F_ ~owOMϿ܏_EY&+ټPb`<;j䧯3a ~gG'j(/ަ[HGR*EQ|2UB[^mW?:74:COJZn|j &9C&2`v&](/wa,*O_C@ϞV^-puϠL:]z7†?ӛRwE؇qqr6Y9k-cy%, sQc`(/?OWwC?/_$f[|ΎN<ĻڅoV.?0,RSZJaK ƗB!B!?B!nSƍc픔PZZJII &L[yZN;&uv︛A]z6eڲ&5rw>wXM<0U_՜Q!OG:E CO#9EWqqr2~q%-j5 cd:0`6.~V ^hFDFtu6g\"7PT< ODp;O77cFمj>}mc/cӿ29oW/3Mg On8svzxqt"` ~a+nkxh_u50 7o| !B!B5B!mLRKnn.u'~lexj <]kSFTX Vco 35:- Rx3"2g-sh [!es3N5Ϛ3ΎNt :yyW ډ̫9L: +-Xڂ95#gO0:jy__x$G*1`x UjR1ĥ+uwK;ɠ.pqCՠ.Xڂ95$**~qz9ܘ0J=A٬i理^!%e џcgvr| !B!B `!6TTTDee%AAןM̡C^vF]̰~l/Y9V{R(,+/P3ipC rnHƜ ?2|E%:TR+o-)WK *,+ợ{$%\jPE5-PO_dՍ?~t %.5G mU;j9osWNr| !B!B-Bې/...8qVKJJJ hLHG5pё.| v8ȝ iog/)$s˜v%ZAױ3i?wvY+/9W),+K8uvUj&Ok⾐S,.5.N.$\HiT|  ozʧl6l^VUAḧIbx6-tVZ ax=Ym5gk,N!B!BOV !!GGGz!6māϏ|C3*"=>~ڎ`3yiLy/ z'Aoq| ^n6^؜2gQ`FS1xI;y+^!Ҋ2j5;޿ Kg>cvؗ,݋` {Y\5#MSZYμ1}fg>a.K|ķݓB&rc~ZK6i¿VHc{ mE~i!wu7k|]-+B!B!Tn4uB!m!%(û:v2)Q}-&z pvv7YuL}YEtDTdjx*5]_j}6BiG4S"ЧM-'.WjP[5U/Qכӿְ6>:@8;:8B_VUA~I!^&n5K!BTUZt]؅3QQQg \{)Ү0%zT}B!BCS;^^^N^^8:^Zʕ+jU 5+v333 ٪rfp9kZ-YYY #99Ceť?222 u< 0s7 IDAT4z!"##իH:uSN;uDZZű2e .ٳd888R{!V\^GYq-! !<1r>~MJ<>AMV_!B!W|{j'C>ӳL=IaK,\PƏOxx8ɫtFIpp0Æ #88'N5[3rH3g:ѣGy:w̰ahݺa'@PP3|QZje~իWb BBBXjYyу_| ?YXf G6L^}ݵ:|0BTT={m۶=\端2O{ܹ37o6EGGvZ;{q^z%dT>֬Y`iSzҁ-l^SXN)V1 MuͶz}8f/QQVd8 9t}ldwfyճyQ.`l6dX!B!B!͚^I:&|<=.BZ2y|ѓryg@QO?9999sdڴi9s&55tYzE*CղuV>̹sx73gtEQj6.@ll,ӦMc޼yddd>6Ve۶m:tL~ibbb̪G\\}55m4:ӧk5~x<==L6!Cpe\Ve…yfϞŋIIIaΝsmƌ3x駙5k۷ogtY[}޽{7/iכ9SgI.prvCQv|sf+u1gN>MRR/2P^o1L(55dɢE lݺ>}Ͳexl.ͭ>AAA7o{1Msmx_Npi59.w@a~s_ppt=9W s5"}Oٍ^i06a2,B.^Ⱦ}HHHJ~IM)K!B!.f& dŬFپ}{bccYb[fm/]uS|~cǎKuu5W^|ce\bZM6mj|qm"͍jt:zT:?0_|z[=@ff&hqyڵ:gee燻X۶m khfIv[hNTFץq*q7v . @Yq>Nn]-zj,/WYU\ܼptr1s-B!nS?3_~%=z>Ȯku>*5d^Ͷ[yB!B!s.+ť]).e``:u* 77{I&5eeeNН={ol2ҡCzW@ck23oū( YYYn0LYKYzrʕZƎKEE?3̞=0iLii)\3[nMAAA322ETTDsOvva%{cu I=.w処Ң\ˏR;7<ޅmBqh4/L:ccѣGVfd^#2q^C VB!B!\fDa$ROaFF癞ΥKaTWW( ׏{F@aa!jt:a4%%[\2Ybaeg}^gtޝ[PQQ{Ȗ./SuӧO筷bΝ̞=ېA WWWd&NZ]wjK.DDDtR˭jl۶7|Tx/>BކǫΑC9sLjnHm_@;NCׁ:moVSEfE4L !Z-mڴ0<ܹsv+sJ(o8N#.5'>N.v+O!B!j񱿓QP9LFA!;s{OEjtի;v$**Yf|rT**SNHnҥKtؑIϞ=y衇#:uDΝnD+4_eǎFidǎL>sr2OqHw} woVT 5ΤrαN^sZv\|i׮..ןeMqq1;vw%%+K.曌94ڶmG몫IKK#$$3cy5Vƍc̚5ˢqssw?󄄄eVAAA+ 2N:ݻy7 [/4^x1c_XnʦME8:m\VCqeVk֧e˖/(00@PP|ޮ]֛LΝm-jK.eʕgɳl]\\l=۷nآ?'%%QFΛ6mՓ`o[(>fD vFHV !7QsY 5G233 e˖\xsu+9w V{~  ܊~wݻζg}Ɗ+xG8q )sܹiUWW,jlŋдĉl߾q#}G駟V_[OnxVtZ ߋhk*ݱy=z۬=?jJ1|dv}5@of)Vm_[jjYoI_CJb+>]]U}~g+kKa+Ҫrn <fՠ6Hm[׿5֮getlߎ^Hy*|~S!>]e;li  b0^?\朱nuuuҲeK\]>)(( $$IP(6ǒjKӑC˖-qwwt'N`С1cm89x 6lhWWء+sG?A_Kv9{BF>AHhǗnm5TkkKa̸cB|u1[]-r mOB^>E.KӻQ#Ki޾ʇ> u=Ù3k੥o0 Ս'F/~EjS.d!CFH^ S[= چDp(UK:RT9>ym/y囏(FLw0n.Gf~OچDt4:xTx8\?gv}8q3hۿA㩑%iyb._B]U?. ؓgF=(9  7h`cvm_KjɜaS3NC1w\4 լ_ɓIMMETիk&OL~~>D/_Npp㲲1c$882vA.]ܹ3#Gdݺurj5_}Fտƺu߿?GaĈY,b͚5rV^ͫ#.jtvY7w1_=ss2ڨGw1g4??>Gz={ _Oo|={kגaX"66ڃۈnŪwqwuhd0ճc_ʉ߬OHR]?.`4m0`lph0#~ 7~Fx@~ڹTlg2Y]h`iiOImk[_*k/a9"ڱvbV~3.ige♷Y;w5ZV9}=?o;L'g)okק~*٬}v*O|.2uJ)Fh  b0*Y~؛?Y]X[l0k֬FkN:^zGff&{/SNVoI&1h ._L^^uuu̞=qFɓ'LAAΝ#99pM6q!Ο?ς >}:զߝFl2&Mc=Fvvic[yձyfg+={`kҤIzʜ2n*OڅF0gL9K=駘-w".=9%l? 0sd48!:x7%%szb/{㛋Eyɹޠe@nfS2 ٌ1oD Yk_[v鋇W>N]9 bTRX^Bx? 87w]юv\ߎo+~G8z1gPyx2,Νht~(qsqeD쿒5 q+Ҫ)ZklCoГA|+*nNq=nܚ) pu"̧~0ubžliO ){@pUiQ%9~v$7w%]b)AOH_S2k,ٹsi{sf>5^͝IdR;)5}OJ:q, b{PQg;<:>1kTJ/"BMG,L3%s Wk&*#*4U9gGOEbXuU9 UWٷ#7d}min%UeMΫd2n`g~R4\\9yMKۊUF_CIe[ZY5 q+oՁ{RSMM7ANnCGXo|Q_sk  R."m Qxy7YTR[[^'##\n갻O>jS%999T* Vz™xyyY]o8"Z[rͽ9/Zj?[qd:z!f͚믿ΦMoo\UV;.v;í`@U݇g}42R"uʋqsWh_:2^~(A*_Ӣ ҉`AAAPPPP`C364N6zz3s6hL&3o*N}CSżS3 IDAT}uhQW:  (ܜסH||;s{^6Yuvn)[j0nH]]qrTֹvsqi!Wnn~>˔#yي#22FF̙3YlӦM3lo{hтRjjjP*@[nǭO~~>Muܸ 9{l݇X֠TG+ {]^^h5jqZUQ4b hAAh4t:V ǁ0 jΞ=KΝof-2ܻjM_Sh o? KPWU4.\v M=:%{iriچ; jex)<$w:%[կQsN/R;Oբ7^7q:> 5IfgT?/z_5MTu~~7BSVrsu[X?ӔOЫ]\~::z'ѫ}֯#SdEg$FӳmkHh @E#+lbv||僴{zڰDr 5SAF{]>B^y%Px ʌ&11{Z~={$C\\iZ|MƏ\n&::>} /Qըj>| %%%| @\\6mŋ5e+nFǸ1yd~moδiLigll,111;TWW;l޼zbORRvKi9{t+sl#PS_P$!Qٵ#mf_t /0{Prbc#A*55/Jhh(g`ԨQ,] P[[K=ԩͪMR鎵{SV]Ayl}iنq}1dc͔wg#};tuj0/~VKh&O^RMeMrm'&8w<rl7a~AW޺d2ύyy>BV;QwFQA- FZ3 }1˷9r!o]̌nV=ʛ>eTQ\Gu~ΈQaSC&.Km:26aH}B7 rj>R};|T.:$;6Gʜ2Tjyl}ܓ8\r-oGϿ热[s/$VAa+Es>?AAnLΣ='+Eۗx ZvƋLd\3fjfW^㏶-b &NիK.,]f_5SL!440 ٱciСC֭J9\<MOl|'iٺ >>d27}S%05?k6LqsZw ɒ';uy!  )'&gN%[](7ʫ+)$< ԩ^e0WhޢTik(PݤH=j{ 5Uj .MoQ[SP@]9~w|u1 ӚW=IhGNkވY [)Ϸ,ߖ|%xrMn  OZM~#x{4vYnT?hn[suVyy9/_E?++ Ris*..(S'ill,o#F ##VZVmm-DDD4Z3)8ƌ=ԩSϞj\p}ƫʠA\ر ^0`/wHgٲelڴ;TO)* quSi>(/'7ō*1XAd2:n&/%^t:xR.¿R(raK;Q3(MUnD6qtU*OT?:~wP?{Wx=;RoY-EwCϠ~~  (7&u^ԵW 4yJTұcGit8y,{SPXl'KFŖ-[2M_ȑ#MM4_hxN>omoD+1W7[5cmD   7UG} b/ͮ  d 0lO?ܹs-{]wqCl. ״|m[`u۞|Q7 $:AAAffWAAAᆋ ==l9{^ XAAAAA&pu47LAn,:       xCAh4ҡCӛÉD'_OF/d oé4ʪ+ 0)),3wVyzt!vpsri!K Ѷiߌ?ƒ:&@๓to/%YyVfa~Ab_q}iƴM%i"h|u19hӑ39!#6uy V:O*w!7VV;>δOx*<zLw*vFֱT?վM=z^}[ chDSPVʽ/+kT,UAAAn1XAYj֭;gt!R4ZrJ%8qnSؕĚxc'Ǖ9L=ʚXs` ~m|~ŕjuU)Nhr{e'$=/'5[!Յw&k$/łlvuU9V}Ȃo?1mKp|eJmUO䭍KtYV ́B545V;>T[C3/83?~ls4gլGpGr<:d)oHy.s8ߓ[xgRjtܛ8ZAAA#ASNǓ֭[Nw,ߵ }Y˷L'6 #eth S#'02S/&˴̡~~|Е)ǘIL8)`4gYo hX[K7;%p84ѮWp6u#YvF\d{صYw>N_ǒk1dWF1[#1iԂ*m ;#Le1`4PksNWWGiU9>dKQ)pW]7)RAo0F;x[[u4>kZwW7J5:~^GQ[s|h@o_4N_Gq/}y>*ӌRҥo;{>?)ucfAAAn]XA|} o+&ɉK{DhTI:620wV}Hר;b"Ú@}bvq>c Hg#^vC{S3z1gbاFNfǯ00SdtAʕwBIpHHmw6-%6-wKl;I<ϋf5*N_S#'>>i {3`/0|:UyM}5"le*я_9:._bβX"IŤ]|/\]\ر'όzP9AAAxЂ  q$qgۘ8?c٣ %uU'.LiW;ƲXsΝ4mϿoOQͥԜ jיm7Z`tA|+όzR_JJ|t$9m>;N$9+ݮ;L'g)E hh_}v/yȠ0>.\^&˞\1 8}thڹY==d_໤tAT[pgn_OvnC(9v´Stk~X;FwW7>> Fe♷Y;w5ZVmg4Y3& frR䲥uf|B]̥l>kFŜX[xzIJFVh0]ccOfE^>i>Ht   ɉ`AAn)Q]hnc%*"\`` ʊ),/E.:VikTSF5m]:Ub}6/8mC# Pһ}\0A|ғ1z|<4Ԝ lɮ)B=cyjdm>o?0RtiՁNYbm zr3o弩ZwW7:FrijSk*OֲsuUO1K),/Ot<ϟ63K_~ҡղ1v+xX_j@/A>~;n.ڟN:+(/B~6{ mh#K_ٞ{TxQ{]^z [RF-vP׳/PWAAA}S@ pC\]Sqp\RG_39*uU9ӎj:L},a~A|{' 4JKӚ~jtY&3NRM=;kl]cFM‚o?ab$?Qv-_P@V{73K_\]%;#~[OS֌oՁ{RSMMwQjx?򂂺ш)_CIeʶLƇ[n=Aú=?Pp(! xX_jq#ɑ_Y&7M4<~^>׽ TSW՗`aRI?n20 ^ %:uzuyX-R_[Vp>?<"叛}  Y4u,_,?W. fi0\=[cps.l9Or  =ê=h4xzpp #FhwgϞjlT1Ӈ{{キ)ճi',;Q[s%{4-%gj"A?(^FAXѠj%J2-C,Ld`]ZEsR* :kj5Tj1 T;mÓnm:v F#8u;Ս-{e]ZO6GΟŕ`֝Su^LA`®$ r0hmK7;^6,LF\D{VLbt<=Ʊt.(/A]UaqntkGwStř t9+${УMG-TY'vX|wd'ϝdhDwTs~::z'ѫ}F>(bW upK<'9[LV߹ZV]^.d7]1Xm]?wȶ{9z!]KW+ gƷGX,Z>nX'7=2̤ 49"**P]]m^򬫫###Fc1o{cVEof{j5QjP`04%i,+h2NG}aXB3\*Mu9 |qTRK 21XARSS/M꫄2{lI鷚0WFWŴl܅PS_/DY2Ys|:9wMShjΘAɫWB.>ΔLĴl>LK21 %?)[!ϥJSC60Ĕ>0~6G.2;=#8Q)ߝ 5$o4ν+ކv)ߖP&}BV`fy ˆڕ5Ӳ 7Y [  Ÿ+Ґ> n8fLΡ2:ܹsh4TWW~z.33ɓ'R$ իW#)&OL~~>D/_Nps1c$882vA.]l晔FL(o{cO>IJJ O>$=CeٲeqEyI2lBDue) ;?0_7~=/sYcDuŹ;1 %}ϣE& sPYV/}/*<ߛ-:1g˗bus]\ ~8LVSɰ{^Ut@eoO%b mMia&׼Eue):Qz>T֕Lzo.`eC^/LԖ}dǓZS Uʉ#O IDATmɣuҙj斨Ǎd0)4oEx+_e3/œ >=W0ݐFWKaY !:۝RSMiU9ͪ#?0q贄6K9hmog)4%-Ag]SӭIyXAAJ1?ϚIthsgi~? RS?H?zTL+RdM6qhL_ {oi&<@BBgw@ a E5Kls>$F79k  ݁<6$el;O28pO<AAAL0dF#.\`̜9\2330afiii}AV:-ARoVeffe6ViJoIhHƐs9y`;7}HHx4ͱ%^>ˮTNȏ,0=Qx4^\ *kd2^ATU4`=bܕ(*6ou:4eTUPzhPkKEi>2}Qh5xx6(aBӈ`AAA߱CUxj]AA߽# V? ~*v4˺zn݊-[Zm:>++ wwwTUUQTTt~lѢԠT/ߕ䘤 " j5U ?C/O0[]]ݩӚ~T~ FWAS]nQŒ@Ա$gKkps? TZ;eMF*T،k{yAښ.xxVSIW7++jh`m4ӟxXAAAAA[1[;O:yKh#ìuDtt4~a~צz2Ct(]L2(n>B#i_fKjE\.k9֎UҪrdȈ "oK3 r.oUBbcyX= ٜ @tX+:EktG⿺Txp[|^/),]߶\m?T&$+K?m_`qIוgjVqot$>)K)_t]er  Yi55v|<=mx,~r._L-Ҭ,JkS\\LTT Q-IgwDOB77wFHkW0;ENGyeT>)'XOfqdAG`AAڿ?}JWWW^zF .$44`>̎;_{|~B#I/O+J ygXc-ccxO9Ŋg';PT^JhҶ!Guy>Kr>/;ySEV _KYu%t3fQ\fުY5]Sgs'X'|-Sy Z♅r-[t4klcoQpr>J)0i)W|Dp||(*/wGVoYoAA|||􌌌yZ ;Zޘmkdr9>a#wq/0>>)uT\6"-7vW7]iWG ҉`AAԩ񤥥uqBC;_u:o6G%11Yt>6"/8K 1Ibc(1;2p<3VA-пκۙz{_HVç{ bܛz8~\hϫ K8s񺑰3k%ӯCWq /8O\_'dP^Lp]Y;^J"Ů̺At:Xˌ!̞GkJQ)LHay)>*ӈm)b΃sͦ9|t z>ݱڴ]/)l]߶~qMoWR\߶/g<_lij|RYj_loY^}AA!;;M{駙;g4-Co|%PY^ķK5֥XhՄ, PVӯvRjL7RyeѴ㩒| /| C:xM,R(iNдm}LxwūUڛzqP׳/u0wuI?Hm+T4&x*9HGUWP?;ϏyS-=thǞG--h4c_AGy~qLFbt<1K {o[寛(ۮZk|Fװ}%^}  Bs ==l%}?#g68OlL&?, ҹsf+??Bosgۈnњ?cT$OQ 087?>įgy/x`=6-v̾s*ՕTdimN39Ts^?VW{8u*5Ւc'])I\,ȡxqܣv\Z`tA|+Rs/23%{sբ)s~JM5%/} |t#SPVM:>9y|:]Zuw.txË_ϝ6ҥ~YA>צ l>r}cczz:ɿ)F7:[VmHf)7tVa?׿AAFsu4.7 -B9k}Zu ]L}N} FJy9oArl;X^79Ӷ%<$w <4GrG1`^]>7?Mw޷WϼW=u)*<$Zo~%2E!.`V\T4Ejy׫q|mj/<O7<$3pJC%3{ܧt_kKj!"ꀦ^?jсۻ#JIJKKe!9ݕK{ B!B\Kdo!:p8Xx15\ O.57",hm*I 9yF嚬~>uq*%(I ٖZ޶ g#uRMSP^BT6]Mb#QT..5[b(#.{jr-&Bˠ9[St5סV*un*ʥqb*]_ԤR?..MBר௯4~o_P6AXo\;~w./B!BBqL&3M&fEQXlF#F`X/wKLj~N_Rc aefg3v;S{N$$(FNyjD Obp|ݎf%ovIR4gUXLMF8Ynr&#A_+!tmږT.u]j~w]u( r0d1p*-6k׻~(X.nne~Wח'^?|%48I ؞3i^a:]}M||sj_B!B!FBqJOOg…g̘NcڴiXV~Yf9ٓѣG_Xk-߱pwu ,˼tͧ=6~3e<,H*c}ZuZhu]A8HU؛,]߶;,}(ڧo=o?# 9-O =nݵwv*u[$E]6|"//yo?TAfѳn-Tm7M6Rc94[swLEbO`׶w辍<ה4hPx~7B!Bq5RڗJ!B\CVml;rg:tW9a F̦"f--$6RKLJ#~%2*(ԗozKCQ*%"4ml=tq( %UyoE]uB!VM?wh_*:W}PԱ4dA!B!  IR+pCpCb"5Dzx[z*끧Z~\eu_]!B!D,B!B!BQ˦OΦM.kǏ{9Qvef,۾jYaDW2,B!B!B#YӱdX!B!B!WY0S]ҧe2]Ʋd[突 Neh$33VelӘLcpW@AA.ldddx!//t'##XEq.( &LI&J~~swɼSZZʠAX`'O~Ou8ƏϠAx1yL6E( SLg̙z\I&ѫW/6oLPPfbҤI.>`⋌7m rOqtࢼӤO(P pdMѪ@>݌CQYJI(?RATJQn4ƶoSQ |_Gnylé]y]41lX>0mtCEa헯Ѱy?6+]g7CƼoߢM!~ V|B-{z [3rjl^̺o` S~_΅9YZ!Fܹ3gd-[V-}ɒ%8p|6nȻ[+s" 'QP^j!(B!B!ĵaM`SZT{ vϩqL&/_Naa!*Fp m[oEhh(IIIйsgŋtڵ˯=qQ8_xYl4:G>}'OuV~ad޽i7;=ov@08$ ©#;0@"*&ko8T*Ǽ8w=:r&20֧kcNpt8rΤymCÝo88Sxj{IY sOk 8yxP\pP9ˍ8.+>ǹisBy4ПW/jB!Q۷s=zիWWK'l:o>S'8Ă+ RّO7.gB4I}B!B!v$džWפGf͘?><z__~dffIz( wGo'66BQQ_{#++(ۚ4ill㉈p7n juɽ{*_q  v}˦Rakscw"}[_GHh8Ț>wš/_8?F-{n\QͮsqJ"Rܿ>t)Om7!Lܦfd,P^DFKpH%]./ERa[Uեl]eE`!1ҧ%U*(aq(Oo_@tDB!Bq]6+/.ɔ/^va翎Pƍcܸq2{lFMNN)))  ~?~+VPTTDTTU:4h^3gJbb"ק 4 s evV^[,Ձ/,۪@Zudm_fLx~qeٽfv~o2:bd,s=e1es*m'!A"ScrPIQ0﵍xj{6I_5 VC6Ij!8$||Uv(4ӹX\%BYv-1ѣpNex6fmi?Y}B!B!6nP vMf&:6adϦ<8QOΜ9Cff&P0X,(BVݻ7?}xgX,@*p۶ms>|cN>|yGҠI'v+Oq~kBFZr6;u7@ҖkWN7Mz;FaP9@[ɱP3]8hд6ׄ=+v{,y~T*b¾_˫Җ?v( Nro?,B\V͆FpY]܅V'B°1+>!B!B\;>~mSr6y9r$ ,d21vX~gً/fԩt:6lHvv6zb͚5.c^ࡇ'CoVN^V:Mcii02'3AtW;_ӸeOvI|+dBQvܳi{_`ݲ7qDDSa(aلGy&1vN~(XТMXF?E7~29,bmtCtQ:~*ٌf!Aה>A#[feǛ)͸ظ_|6{  װ wM&}=CR]nB!.:Zȁе_儅k|λw^V^K/6(̝;ƍ3l05*んgIC{_!B!BK]U:oynfI&VSXXHaa!M4!,;Rsss)++E/h\l6sIRRRj..++ܹsԯ_h^7?<muP*:6JrNKe*#(8Nc4ꋱD75wxw*}LQ˞S*y|i;ڮ/'8$vsDE'?وm3ji2_VTgu)a/d1w]%%2^^..̟z ˹|gC+_c5w\}<߂y5K6u#o!6d}bb|WߺjQW3MWhpGfę=gWdÁ<+˜_gNܙNJyDp*_Zˬo|k-s2{?B׿|o_mX){NV.Zu<4g srw}zB!B\dBqj߾=;wѣ^eEQo֭uOC,ؼ tr&Nx};ٿ{9Sa1Wk0W`w8.'ejQl(#):c5ެ0Py"ҲNU)w9yj(p( Q1Վto_ZN~Y1Q/חkjQX^BdD,6Omu.8n> o}ue;ܩ0Y-EF(qyLWbϭ??Q[o&>~daUl?a)wPd&Kv:}[wCg {vC'ܿ2}/&Z^5Kqys" vbC09t&W|H 8( eFh->.qS~?tӗB!sq^z%ӧ3bpٵYo)-fqXVپjMa3y2,B\bbyݻt$''t:&):u U`l?HDTx^[1'ΠBE ]#rc<=مo`Y1*t&8yO6|\A6ɹr gqq1ɹMR8~|wthAe6o-)EobZPTmV⟿[ڹhʌz3w>@X lс[VRV'&Bbd[K߅8Ĝ4!3 ÁYFs0٦h#(00.j"i|)※ Fjʁ׳k JEVzdxx_{;#"LÝ=_k&9z4%2{3]0-/_O ~mS qAMńRP^BV]s0/>V+z}gOK%H&2LClioo_?x!ךGX-Dxp87_^OII Vr~o4Z{[ңGZ)r6ݛg}{キ&yܧo -:T'F#ƫBO江|Ο} I Zz:Bq*//g֭r6A<`6_l/C:Ȑ7O _d2ޛ'Rl:@ k@Gm@->Oʌznʥq-qb}1wV}Nz)fvwZS5)ԫpOOҩqڧ@o2MWEQ Gg䧯ϧwy)Z @EQFws2, qT/x{>vB!%!#>-/>d[rӨTm6gϞ%9Cns̩`0G||ˇV+YYY4hЀPfrrrt׼.F hРU  CqA^^)wf3vݧ2=r[%)ۥ\q~oZ !jO!.R`@_n޷ %4k/ ! #Y'i] )ݟ7 tlz1>ZmwwhԒ9g8w p2}jVnbK1ZL4r_کsnjᗝޥu)]ƶ.x?)>O6 0RY/]|AAՖ8 d6m4,X@JJ 9997w};2tP-[Z?a\a,X$=z4#((??;v8ܹ;3ggΜaԩ$%%QZZԩf„ b2ӧ3_Gff&&L ==FC||&ywnMZ؁M8k|ǽL>w Ĕ@_o{6 MD ͡wYG1n_5}D`(/E 1Z [ƭzr6T*5f!cq^}6 gW/nkԖ[yMdyYGYoR m!8$p#)nbqݱBq:uۗ'ORPPbɓ4i҄ ;u|32pN_}Ȝd}WVJ_T'3oc=?hZM$V FHp0C:UԬHX-yu\8ԹMR,tҾS^aKߧd}xIkX__믍.7ތ#o۝0 V[-<}^-:p{~Di"IPZkB׸ʫP{?DJԨwq~`d5;oZ:`EGDM|nӯ?s _CT~㢢)W~kr?*5WT(1yqK\XjB!ĵb]8 Ql:S`EQ2e ?3gD3x*yl6Gjbddd(={Jޕ+W{n3g'Oɓ.ŸZuᰓ1Qq(R9kTz3IS^X:( C)jmضm}*juo?{h6m81&2Cqa6 IDAT? 湍xh(56Baf忋̶UfȘ@Q[6޷܏R^ReoPakFNyZ͋͞Yyx˹^BqZİzjz=իW3eʔ: @DhԱ7^T~( 'Lw^aoRT(3rm7ER}N[Ӵ^C۶a34=uŬ>%Bjg/ ;<в#75ݟW@_n&?s`T^Ub KEAQ*ˣ~\]uTeBo2Uuc%1UK$>oK5OFӤb<˝ņr&W|{Bqu*QK^Y%r/hQ ܗ,, •p&5`{@컟Ny~j7|]}G3_ iR"«|)Ο;ގoJTYﭖ/):0~>uاrIcz`<~ىjfvz:H|t͚ȫ|~Zm+kØ>CX@=fױ@Tfz8b}USzÁfEo2? <{Ls׿H$$8Mϛʾ]j!/G^iԀU{c~_xzz+BV$džW~E]u~[ 2;qc+5k֌3w\ׯO~ؾ}{<))~W4lؐꯁP{wx{bșcѢEF.\HΝ$22y".&M4l6SPPxdddV0fd,P^DFK%../ERa[Y:5K_e7oKiT})_dBqJOOg…g̘Ncڴix%l>$mrZ7h#LqCnjSDDQX^˜>9-m4cPL%=oO>W{|+yo؛,e"u3bB;|wv*۾z-㽶oڻD %tkڎ/-6Ο'ގoLO.&ʱ $8wLb7sЛVIY{ r~$^\/F&~mqwρR7_4d2ӷ<5jy]}ܾS펧5mD^^.c~1`;֒@PT<{^^y AAtl~ۗOW!-6+/.ɔ/>Pwa翎ZׯOqq1h4*K:2n8ƍGii)gf8ӳWlׯ_Ǐb @ؙ̤[nk׎%K0w\~imJII`0a۠Az=%%%V9sPv;Wv-o*4Vb1yۗY2?ٽfv~o2^|5IdLV,SV'ҢlN"$}(<=zP.\(dff*OvZJSַiFiӦRXX(|'JbbR^^^Ǐ+jZ9y(r!E*GN(kһwoeɊRKqqR\\vC3<(feذaرc}:C۷*fٹ֭[]/_6LyW<)#}gi%yJyIGR:ZQΟ?UY΃۫l?T|(b{.T9O)yS*o4U+rucIaះ*EEQܙ#U yPY΃Xw=O^W莇;e{+WUEQ6݂5K_=C݇6~~Nܧ,{iʆo9v8JvAg_~􄒾o}B!oFdp(;wiӦ(˜1c(++oرL>I&UYJ_|ĉt$''ܹ3-bر,] :uħ~PT,^S[Ջ5kָ[^xz :oZ9yY:7VVê/fP^Ȁ;p~}l\6M=:. '篐 EQh6rϦ]hC}uOg嵎;YhcuB7a1Xcbs眳=mOEqXe36]Sz*Ϊ/fo6o(7㒲_bqDF'`(/^65iX<|_T*q!ׅЬU*ێ]{UNAl*B!B!D͘M"оTu^e۩cif3'O$%%{vLl6M4!44ԙֶm[pAƍlnn.eeehrȅJjWaa!4i҄*igΜAT[&|;eee;wbYCq8(/CqPln$*ﭽXmL=C4FHt|*ŻS;d Z$zV}O/mtS\"nRV|$BpNw>sRI-`!f4BQZn]%-33^_e[v|zH gL6clV3mx^RI!B!z,C}4Zju-)dTsA!B!ןm۶b v{VܹsX,VXAAAA<999XvBodBqJOOg…g̘NcڴiXV.\f#,, i߾wU ؛+B!B!nY,ߞFD-e(3ybD/yf>S&O>qD͛ȑ# eҥ|'\EQ0a͛7gĈM!j !רkLW^N\\AAA9‹Jڻ9 B!B`e*ݓrזlA6`+&L ,,e{:`&Mω'o[!j,-B\j5qqq$&&^_`u0~4NEB!B!iwz6ӳYa,ߞ@D-ӳ)7k\رcᣏ>r'!!>z-[K/1oaÆ裏h+**bҤI<z뭜uY\-2HK"""aAzU󑝝MII IIIqf*"""""""RGgw6kMСC aNJ߾}4i>,̜9.Ei EDDZ%K0oU~?u]s=EDDDDDDDDDD9\.'pvIٲeA4`#P\\l=iA EDDZ)0z垽^/>N޽ 5kڵ4`O@DDD~ׯg̙|N 믿δi0Msi銈H P,""J3mڴ::u*{!22qv""""""""""[P,""rKJJ:S=EDDDDDDDDDDDDDZ """"""""""""""`rmW_5ꞵkrnsظq#cǎ On֘~Y}HP,"""-e/4DDDDDDDD~ߨ{nv͡O?4t<`f0@)M9DDDxؾ};`^zZ?C׮]5;=$wfvgpܵ#5NU?\Ί?v )4R4xlbKLwν$g!6οe{8uj+""""""R4kZc:vHdd${% v#77nwXK/Tk@;vӹ?bҥ|s~~>.!m6:tPmL:7ҳgFnix!:. ݱ|iMKD `Vjɒ%<̞=9s愵y^/^7k̽sgwȎ) A^66|=՝|&2ca? 5Pky\ Z_JfdeDy>ws/5nL.zʼn'HnXf ^{-sL2%tmFjj*gy&۷窫0wX{/{fȐ!$''hѢPY8C35Ykmlu5_{G'WDOH+ν˨Qjquׅj`uQ.:Г_e}Dfk.xFz)كQ^Aߠ`.)f9c5Y9%Ɠ_qYƩd`Ty/Nu IDAT  6ܜ>0Ͻ.>kU/oz"""""""`EF6L9:I㕕.\ʕ+eĈvip l߾^xu_uUڵu֑ի1cFXi',]۷3eʔ~/^̠AB_|1K.eÆ~9󉍍4M.bN=TvInn.@S:`0W_W\-[f…5^A7u/ɜW6oXVgi!`[6|E?E!8yʹMɣn;f1-(iiеk֬!::nݺ5i%xT>kf~u:6_ŧS^`5/sB諺':L⩮7YT j7y&\3=noWo _!^gvC ̽P+xu,?5]fߓaZv_)ܒW㺆ص^_8(ؾ:c ˆi`_EK6~]o=>3yuYaDuEA1isB<A\E=S.Q8*$REE){f;ŵMI s0|e8?t {t~YO_ILL?F̀I|38(+f?yӍ)_ᥪab* 1 `~\ }ˢ:[fW;Og['Lq[!#5[DDDDDD-;Bo|eGw4\e8 vkvx<AƎ˕W^?@^^ݻC(//-m*gJfΜi|$$$p)}vbccC5@׮]|{ǎmۖй.]Ը4&7TcgD$h"r`_""""Y~~>7ofܸqMQ)dNW]HHK&9jD?я/.d΂ZqE1ػ1hqh^EDDp% rWzMFF?0L61c`׌}C޽yq~u˗/gusˆYMkUѱm)b~ EDD`arJn%!-iRmmjDq+P[X55G݆Uo'-]`P3ԭ OlrW8#a&`ч2\D5|U^{xܶ".Se(m ={v7.DOY)̝WAxC-$&~Τ&wg^ƶB޽.&7;zɓ߿?cܸq\tEsĉ̟?/ 2tPw:oۙ5k}iiiRVVVk[yyٳ'zbü^/}\rIܽ}=_3i㆏5{z--""j/6 &;;0hXc߾':LbtևV7o UҖBK8Xҭ߷wj{12' #wffy'dWނRFĥaIJBvy'>_OnkdX_y;׬e_Y`zfXKAHxpTM}!CuV6mDΝ' Ϟ=N;={:]]ZgWa mSn8:S,""J_3g|ARRR:uj1CDDD_]Et?- V59leoA M~%{vhP#-c47N'sB\;'I/qlfm=29w0?Ll6F׈Wip=0n0+V7 pSAN84+h*c=F<㮷wznޝ8nX㎷-*May޺n'ל]V{c\z+\y{U{ )))g?8ӧO?4f^E߾}km駟EdZ<~L}^G ;Ǖd Ԩ~\Vee-yul+q* uR|$ս.q"ڶp~ŐZ)K#]^ec C|o,""""""dZNu(g94d^ `98ڤ{cSmw'N Ĵn7Ԯͻ_DDDDDDDDDCci6""""""""""""""`VB־ӿ8i{"""x}krl6sj_t؝q׎hX/:1Ruir>iĖ'!#{IYBl<;SG㊈iT,""J-Y~ٳg3gΜ/_lڴ'|fwc.̼Y鏙9 t;%Y`aw:ƆWRg_36|o׵[W`]ؼy}խ\ ۚv4`V*=={wQFھxb3Gsl/+dWQx+.k8L*O3\ؽz\aguW^Ww{)7Dm˛ק%EDDZz۝N'#tp8p:AΒ ̽꟔c ^w;{م;1O\k^`>0 +依ظ`51\C 0O0=4As|9iܰqv+NW',ù 7UUygٝ@}3f&7ڵy^[s|e0o-%/0iUNlsӃp5ppZK$zJb'`f>̀I|38(+f CKG18o rEx K ۖ̇׾ ZBLfig>}~oz×нm` 7c`㏾Ժq\j/`-a|êNtvoujdw=|.?r lXӸEDDDDDDDDĢ `#f#//"##1͚lC9* Cw8 8 iI5GQ#ŅYPk?(F>~{7ÿQjqh^+0^mF_ hqaۦєVKaV:ڴ~d)""""""""" XDDy뭷?~<}X.}'N<53)soc&̀{=g~\Q6֯(]x܉1-ZιMa-?Kϒ=$To6T ־5wBqL܋%dn6UVNJ9}%~^;i8--""r***ҡCйTv>n%!-iRmmjBYNmy(-JҚɢnyڢV V6vE|gsΗþ  65>9 Xhh_=q mE^]`: oR;: =0[HLjDDDDDDDDDZ3URax^l6.D\.k֬am?yKZ¹ ocdNF8OIɮ"x5vGޣhK7I/.$*޴;xstov9qmX=kzХ{ɦ0p9WNx5kYWXgR=!&foTkyжJo4_B_ap Z5~UgSG4߷_G{q!DZfU\7]DDD=,%vC`Vj̜93tԩSq:7{%Kh۶-cǎmXSeq*&--(ɯSy|y v.E[YBy;>8. C||ӫH C)ɜAn||8%<-߬ Gƺ?^O>UzCר*& .ϛi 'pV1ZSq[$? r;pܰ{o[T`-Gћ# ڳ 5OKo+oo=-zz{C;'"""BEDDDD5U˃ <9"d^A[J2 jT?.wT4M $..Qe-6xn#c\dWf3p%!uKwQԶ+^MY@&HT ok xgCwNDDD~[ {@uEDG>oYZ?P "rh^B"""G2NRɪAީM76%vwb Ęzi@Lix뫩N֫s8ihېuG]l]nmA5f0T,"""",""""rXv\_[m6j zȡN wAzs ~EDDDPXDDDD[uU{зo_z:vx0("""oY?8%%WU,""""""""ZRpܵ#T O?4?9tNDDD1N:ioMb۷ իW-[m6$22c!S[(Crzglo-?ixY.!_&W^OMl)Nǟ}WǷK.tp@4梋._4Mv{kȡo"""*,Y~ٳg3gΜ_~%o&Xfwc.̼Y鏙9 t;%Y`aw:ƆWRg_36|o׵[W`=YrcT;h ^'v"c)E?MYYVksNZDDDiN _zoDDDDDei߿?6l`am~EqWңG >,+V`Ȑ!MEYDDtBOo6Gdָn<`?E9{MIYWoPOlD"]=ޣͯM_qNWݿ"yt⊏j8L&8* a.|rGjz_w@\ķ`vm$pGyJ[m{wCJ_(>Fl׮]cs]N(W̊޽}0 hm'+g}}һt:>I9l޿EDDDD5 EDDZ: t ޸JKK_~iRds'yŘ_vN)q3W1X?0;':L o'6.XMLr,} #i礻.hrN}f7z+61{8S8,{~ n{3`rgո?wf޻Yvgux_̼ĮOJwms^W?.=azf_[kdä¾"8M…Xf<I)PN9ԓ΄cMVJ ?پs &κғKoH7dß_ f.߬g9F^>T~+q E[-ހw U\koΖݬY ~?=+=mEDDU- C`9)9EG[enǾ}M[:96% y1Dƺ_c>ԁGxlwy},ч0)ٕOEԖ/.䬧`HԃA7A i:1 aOoƲo x8%^̸}e̿ >E._o'+qLa3B޴s~6+$ܟj?Lcŗ ^Kn€5uӍpkOV?`UՑ׭{@t6&L_MBA~X IDAT j\g 8}#MKd]Ҏ1WoYAmU.7, GÀ!~5*wɮyYCZxKWߞW-aU ֹїZ_;71Ц#܏ؓG+z?P^hMdl}`q=mQe,"""VX!&Nyi߾=f˖-MOQ+6a ޻Y8㊏f UNi4nrmk|nYYB!oz٬Z߭@;\hyp >exk݀uǵi[iM] | "6vG}k^_Ŭq(Ǟo%, ^OV2 ?Yo,Fv7cb9T~B`9(9BrEz-:vn%!-iRmiC{4C:e-ZU-/,6 y—ngUsoޢ74vE|g31[#a&X<ߪ}68|J8T+K_pݙֲ#'n=<ޛ _ZK)lk <·*pDdx\R*+k`DxCk8`pl[ h>CI>a|`9'ҏc'M98`0 LY}抈p[BW~Ab~lsY& ϚW@Y7Ftt< 7@ @n.RRٵk7:t6-14lC`V0 ~?~y^l6.uAA v榛njXc߾':LbtևV7o UҖBK8Ņ=Nm8Fa1s䑔*[PʈG/]Ӯogy=.qCD/XB M>MuX{چ3&W* ]ڱl| ?A'c}˵*7k̰BVo}o )))L:իW_t: 3FXUSeq*&--(ɯSy|y v.E[YBy;>8. C||ӫH C)ɜ04Nm9 |qpK?y$[Yju3~̽|rUkqxK"Ø7 SAN84+H*c=F<㮷Ƚ>dpJ{KO8(p]z+\{|ķN=`_QƽVϷRv-=)ɴZa-rlo6xGG'8NvGzui-n~HA@mkW@i Q>L ;Zz0 l6;iT6N+x5ԵDFF@׻\10_tOm`UcI~?YYxZ֑ѧFnCoߎݻwSVVO?eN0DEINNjY϶B`9TٲV-3`CDD䈐zݏvn+0QQ-2}QVVF۶m [bhO<:߸Kva:goa)}^Ғtwe{KKm8bm!)ԩpեUqCϰdإ5^̱` Z_޻®x{4^9~l}v;.6 &Nng"rlv8`" #PTF  +Q@؈oXvl6 ; 4ve{ii!'6mhOQ8.]ri 1M`0Hllb9" ~oE޿SzM #|p##"jTR||R"#hzA5i?Pyoh4jU?t~M`ڴǹ{p:8Зnph)h9le'kr@?C9ȡ!{ U∋;;vStolJB܉1^>TWS%&5-8nhh޲[;7Kꛚ8Uބe̸?pof]}N;HgPD`᪨M4͊`  +1"bbBxJK*}vipDtonrmx8 ^\U!k PNYY irEc-o&.L-4MUV}BmM~.WtEDFF_ϗ+~dcT!cݺ x<C!pu6Я_F =?7o{[ō ;G=}%""""""""IgYG߄tCO}ÏYzMˌFa#*ƉN{Znn- UUk-l#<ԌQk~ HiiQ4>~9{՗-~5ѣwfӹsGbccAQuk}D̾֙&~/|de0tVqO?%#/.Wd9ZVrn EDDDPXDDDDZxRF!8$%)/z +nH7P{-8sn׎@G vΰsnJC_V%m`0 [|ju1~_R722Va\}ް%U~6؈tWCUxv8\1aK[P5;vf=&&F!~krї`0Hv:ʼ]DD=X""""rP,""""oCAa >޵xx֬g4QU;b<=Gs9B`9&'",daв.W `0p| }""\8KDf"LYrTkb<\Ѹݱ4x8KL~zn7\:z9tFl0$Rr`IqCee~e#j,A_y1^_!`Uʝtr&CoJ ))KLL"""wl_(  NnwvR\O\\P Viaa.BTQQ*躖mOk QQ񔗗Q^^F\\QQ ۗO ৸8MmW.=]:lVuSihz}N:gy/gB֕6[WGi)齁ONNk AzzoЮ]RƩw9T)i{1RRRHNN/n 2Zs˖-dffw^knPAoF35캁zkaID8qڭf0@(WWDrK\vHbc~${ a^{V}qoqqv#tSoAZ,,=<"<"vyzKÖ{k0uη6]_:U,P} {ZڬYGˮbƌHIXk:{###T[jR'O&%%Bʕ+Po4MONdd$<^zEE%0oLLL NS!H5Å6i*_Ԫ~ l޼ {fƍe"MQk?bJI f 1$$Tk`>R%Pt0^8oDD~a`/2O?4N ;{={x<\"T;M"""G]vGff&ŘIBBYYYk׎6mtg*1;ݖ4p'd Mx}L#H v A^8<,v 00M0x'qp \r 23iU ,R^9\(i|>o~z*8l?&==$ [fStص= g7r1}.y>$$е{:U!"Ne[?cDFFrW3v%[뗎f#**J!H--"""" """X `ƌq駇λ\. LBdd$_}U9oc5 &с2:iMFll,ƍc̘̝.Ģp8 W""""r"""i[DGG3f̘x\.wraؽ{7i"Ӭ7`mvQ+"G 4kT<LBBw!YYkٻw/Oa(WDDDDu EDDZ`0Ȝ9sx<zl62220MB֭[#XDDDZ_0}|g$&&rY^GAAee^rS+"""" --"" ~V\ #<:?h .B>l裏R^^qGzzAU!ptt4'tY|gr""""""8"""Pdd$ӦMXnك&66wozEf""rE 0 ˹;x#~C`ޝGUݏݙf%b;ʮ( ui׺b*#[k§V\@[ RD "k ք%!ϝc`$e&|}ӕ];wJX!B!RB!Ye+(î3k֬Bq&K4 X49c=׿?)ݳZI !B!YFB!D'v)..M6iIu\.%%%l߾"l'?{hI3YmdҤIDUU233~HOOnG{ٽ{7>[nwy픕& d!hG-%O|f̘?%F׋~Tl6KX!B!:,-BtRǒÌxGn}oPw\vA X6zٺ@>ߛnaf-o =v7yٸƗֹ~o%:03gs?QkOFFO>Iwu&):… &'',)))5j]v%33kFC=Dll,ݻw'++=zW1L<ƍCUUnQ@G}AϞ=IHH`̙o24hk֬d2q]wE| >w}_qt֍ &еkWٶm[{ꩧ"G!Dc'&{f3Vbͬ]+VPXXȢE/x뭷HLL`̘48S__B!B$B@EE/V'HOfފF& B1رzAQX~>䪗ƅw\,go>'8R jF7װwPz6(y衇ˬX:?OxWHMMmrz_ͷ~}0SYu4Mcʕ 0~ xcTU_|ŋRZZo~pl&y\QQŋ袋(((W_}_|2k,|ؑįi@Q9< ӧO`Ĉ+W@ @ii)};爐7|3\[o?9xB4zY o &pH5uuu|B!BVB!D'z3gcƌi4+%k[,̘1=O{g\o]ހ}}h%T`ȽW2Aײქ\,cclVՍ/]`􉌘>ۈ`ҥ|>O9rdx)cƎرcy9x`ֲpB&OH3_wxZC1w\>S\.f7u]LJ~ȦMoq ]^To]wyb IDAT饗ӧm /c׿RVV.O9F!8x|Y_{l̘1d"6.( Iᰓ !B!hB!:@ ٳa̘1c=IeuNbl'/:/ֲg&v.(F1(\7}H bPHB񻼨pV=%NnjPYY @͍|?u]<<`z[nl޼*[lSYYI@4<Ow͇~|@aa!V[n9y۷nkgJ\\ 1 p;*4!8=c̜9kŋiUUU̝;;3Q\®ݻ$!>}~ & Jllv!B!MB!:`0Ȝ9sL:5s`!Tr`!8霂)CP ;*f'&=.BL֖VaMp'?$kGu?@( 49+;wa|AvFuۇݻ=&&&^/__ 4C977|wL2ʢf`X[[u#ugddPWWa !y駟 ^o@ W_h$Krrr葛K=ٓt֕ XZ.B!B!D':rӟ4h 4 Ny<EbЧG&Umrpج8U`1#蚎x^R ԺpY|P7ˡrSu0+9w VɃɹ5oTg/?.bL&o̞=Q /\]Z~_`deeC1b39p?͛73l0lfK퇻w^馛‰RvͬYN?QQNx n[naݺuL:iӦQQQAuu5Qܹ_Q];_φ =p{7SPGCC=. ٌnaوMhT=<|>n7Ç+M(++cp 1L8TUmPQK!B!DǑB ~P!Cp 7m۶p੧"--ӧ( cGeGi{Y鞕L^״f[0u{ 0YUnA?}ȟ4OuV-/3[X|z 6>|HdeeOs=0m4 L69呕Ν;yW3d F 4+VfN!|-__}rM71~xz>czĉILL<>x 233+#|Rx /_h䩧8GyjHjӦMFCC=wmGU$'HrRFM x>6+Kbݲc2ɏB3OQ^~eO套^$)){ww7.<OSNkiii6 vB!Bt,eӺz;:!⼰x 4:e}} =x4ݦ8jVbcKtD}ƉC|NR땛vM Z< ;;;KKoٵkQ'uXp!Ǐgƌ/~dd2a00(l6nCUdedyRt+8L~UUa/_n]sHO?mר!T춎lrLLL I]4{딶dmh=u}A6nLeeqqq |QZ3B ?zy1>£>JJJ ǏgŊff3ƍc֬YՑ̵jJ7B8;y-&f|{ekBUd2a4;8R!h9֭vlE Bqv\Ff !Js  'NHjb\ЦӛIl۶{k<Ш|ƍͶG׮]V+zj~K񧥥5y<뷤fNW[o!##ٹU5fV+Vk(1I xx=X-V2spYi <<^X|>?U3?_x~   ׭@ tYt>֭0_1__rw?m1ŚE7>jxT*qGp8bNJGS?Z$'uM=?nͶvŰ!CZ^b7{v~E>l?ɓó0n8Z>r~|_ьљt6B!HX!fnf ZvJ޽lT޿fϝ2e |IJzs uf{F1)ٱD\\b40 ":Mhhbѭ wnw~3z=ssrQ({^-|"q]mv @͕xds:]X@5k2~XrQ~Z0H4 ۍfk~\`0al= ?_)I  H{<^FCxV ͌5vkN$?-= M=?=ӭSkqGޑ#def̙?{p:<7s&Nd3p*+++`YMXbVbСƢ&^ox"_h6{D3<^o}E4r=:>`9:U!BqB!)̝;{lo,Dԩ?W]BZj{vc۱X- G|6(@t=4?Z@CUx<^7VZ[NԔ~ĸ+.'-5Ç⫅h|ua=帯sEx?3M="_u0z1:y={y6nɏOߏXNHXK`,[-_UQXr%;wbZq\vK.n|rٹ{zP.cdgeg^P|>_::xn#td#vtVE_U.=:ZSSSK qhFjJ hr-+ȑ#\8p`孍moP[Whb0K+2Dr㺷gg~j-hۥKٽg/v ѫ(xHH'99ݻ̢o[NWYNeƌ\Yo6C%!1o,!=- I]}]1m\hteTVUaZ\sx$&l19iskN6v!i(WiRZlkp%%;vp8p 'B!8IX!^Ý_ @tv*acY1k41( @h AHP37Q㈣Hv/yVf3GI4u}2q"VuXrSYyPo~Ku]gK(Ϡh_~v6,"z撖BVfVkhF#S~gnn 6dC(ٗ (ٱ 1qݤIl6t]2i"V۶dw|LV).Z~ߧї\̷K.ٻZ<Ŷ:_̌tF i|h߯\n)GTSZp$cma!)Lznz)\ʔI֟H?q\-sBst$ZьeFaP f8oYo1hVTg!55ÁhfaZ+䢋.bhTz}x^Vk?_c\1r/YB#[o ekH>w8uNb6Ma!}/b%S[WǼOӻ -~5W55l߱[~tCgNg}-B!8;u)B!Bt'sr9p`Fc^%{tPAQPF jƬf`0b2®;ZM xxؼu+2e>{ 'UwFuMMT呶W]SC}C=d4ҷwۚp8^t ,[?uZlhjbX"~Z6ӻW*9( G|\5G,d2i{fk!B!D;k! ۩K.4򶪮f̙_#FrBMTnu>̪j `hpPB7(`0(LF@ntD>BR~/p\~M[rwi2fzx#m`6}Fc3 ʧW>uv~99٤$'7y΢oLf3`q"tCbDQv;.Z} ظy ݻv%K6_9. U55Z>;&ƁixV[Dm}CK.mt^jJ2>/ lى5g%S_#^?*W_5]ILH@ SVĢhn:t(ovY>bc4  -祈i$$$np6`2H%kS;Ƿo2Xh4 V)11\:6m'%9aCB!B$,BtR=iiij*-Z/~^`ҥ neUU /4ch %>t]|AAWt0f+ф0( (E bZxܘTNՑCg$l41͘fRQLF4-zN{_3>Ew^,_ں:R91T[W2ߏ7 ׸&wu\؏k~uo.Gn.l>r^Sv;~Q_;N Vh6`Pzvz?< ۻڣXIIIA56WxhVQEl6?}Laa!yyy԰qFϤk -(LJ,-q|ꘞ^ukA mt\O%gn.=ss|oȿOo"B!BIBIM6iӦxz".oj.bO=!6N drtΠ  ua=ÿ5] hZ 411blL .5kC$'&$ q31qq$'bjlh![~HpI0$YFMMm\Qt]s4A[SSî{"oeRo?Y@@Q^󩊏#1!^7н[WEi)eQaCkilHMIaŪ}u]P< '>?M]7~j>N.]HHl6u&78223lh?Fl223#THKMe5祈)imC&r|>|>_G{?'mG8N' Gf2jZ#B!83dBI*111Jd1D{chpc4Cˑ*䯢¡|NP9z{]?$8ږb0`04 P"voYKͶѼ9p /7eu<*:$._Fe|ч81cMMU]wr*VaPF_<eذq#YYs{󉉉rw[VFOLF#.Gϴk~

i~:4M ?-~tM4`0( Fcxc}Ӗ91m/p4ڳ>N1>Ϻt  ex9ETU L&IυB+wlZ.jtlwV$ gkdBy^̙Ø1cLVM6dV\ /p*! qV3& CB 7 :E?_CQ$ zh9i)^ q'{PW@\l,5lشGju6 -?9vf?1#تzJ[ D{~[W?W{#QR-sc^"k>q(YQbcbNcDB!BN!<={69993&h|{ 0SjKvc0B(~lm(GLa#IP 躎hՀ5YD^/{Lr]sr::,fgᰟMgwB!BqB!D' 3gvSF]ロロ0o/p8-S3q:xVPbv>MEW(+b`CWhAch蠆ٌ'+#oO!B!BdB!ȑ#)//qZt\N':Jdfd]*PW_Oll,`0 ,6:f#&&K!B!BdB!NxfsG!iЀj`EQ (#`d4a4#>>͆t4 vҳ{^GߖB!B!8IX!B6P=9|Ʉ`P S]]L*fj`4a20ͨ1M>RIrdvGGߖB!B!8IX!" [>^AѬE8K㈣g{}^@5T&&!1IE h z0?'33oG!B!BtB!:)Mqq1lڴ MWVVvZ ٰa@"m3gdҥO~ħbft#F0bFզ铑A]}&ՄNKM*4+`h0PS[CDF#>SSWn=B!B!ĩBI=s,[ ,Xk ϝ;SYYŋǖ-[x76lii?shO_֧:AQF/~_ ܹsygX|y+66l5q Ah@G 4RSR;B!B!D'bBqzL64~??ߛnaf-oS꓃kwZy!n|o;\][¯|m3K?g5)Sx,{WIJJ.#++3gFu-/RRR?~$,B***_~رz.]0x}_ϛ0h`ǂb\ٿr]G`MpG/S^qE Fb+H'5^¹jp˷oEI12“/X'xo;wR^^%\r*UW]Łؾ};g˖-o:[oznJRRsw{'dɒ&;w.˖-kFUU{m{\nǃh2iN@ _ cwtpB!B!3B!D'z3gcƌi4+BK?~6M{ӞM-[j8=gxN뇇[^B {%#)|->X5 cclVՍ/]`􉌘>&cLOOghmFC1[,=fȑqF뉍e'ջ馛KU 6pB 5jT/HIIi%vMP^.].l6bccuʪJCB!B!B!:@ ٳa̘1'3qDMbejn납4,(P0=RP SpW;񻼭^YY@BP3175GnZV[{xSUUT///chMNNn:G>XVdVtA׃aVU@Ǭ1M7-B!B!D[ `! ̙3ԩS[( TUUN9?ip!obv.\O|b@Nmi=9!\[vٺdgg7 EiiiQU]O=ܚJ|Mó7nd2d:L8@vv6O>p>0-x>A5(G h>>>Gl,6gh2X!B!B!3BNHu͛bʔ)|.plܸ1;W 0κuN*sֿWHݾG-wa_J#9e YB usq/ &#k7EwqM.|-0{l6m]bE:v-ZU((ƍO#>?33#F / Y|9˗/oѣۣPM*p^^G1PP0L|^/.]b4͆ ? !B!B#3BNSTT3<>>dnͣ͆b\zr0|̟?CT^0u{ 4J*7~ YOgM{Q[ߖNq-,z> NΉϟK/t'#GgcO>b?[ofΜ[d~1rHu놮L47{l~Dž^㛜-czU۝(f3 JX٢ԃNEE9hFA^QB!B!5ʦu>wtB!yasr/h?E4$v,V[S__!>>>WiwўnW_֭[X[fo%8;DGzjx=$5YtRMFqqq>{!))ĨchÇ9|0yyymuiF/;3n-\VRRB޽ټy3v~K1c#X-q=*g%dgЯO?<^AqR^q%1~UggӺpQcK"y!8;l.^#3BYll,F*((%VbԨQQ-׶&8&48^bӟZLL&CKHJj:9ݚ?rϊ+شi'OnTg֭$dzqS~66cFRB!B!BI!7cƌfںٳۥ#lvЯ؇AQ@ut=c} 2a !B!BN}~#(B!:dj^ `ښjfM&fSرsP!B!BtvA!Bt*[>^AѬE8OeePUuOPAol6B!B!yAB!:)۩K.`4[ZZgϞg8ɏ8ѡtW_}8~ut({vEl\I)^'$P~(B!B!:=,BtR=˖- kCx뭷?~D2e}t7O?e?״sTgvt筮9])8Ď%~j~?.:jkr+pЅB!B!ddBIM64~??.BA޾:eIhnPЯѵ`z9t(vG=?1l+Wi i-jpA^O.u)B!B!l$,BtRǒlTgʕ~'6F/߃Ѣ; qqGIɶOW/CK~xË(/GE/%++KO}}=C婧oo&>>7|]cƌ34Yfذa?o躎!#=rjRZUuu.~o隝rc0da!B!B>7MB!yJ>V__Oaa!W^yeF~d^Gj<&fAjKHÅw^կ/ W&r;=ࢻ0q_?vZ6mā̝JBsNx 6o+wr뭷o>n&4MkܹsYlY]W@4>χgw+Np|hZB!B!"Z2X!^/sa̘1f\~۟cn%Gv+ 17aO (nh1z>|8v <'x)SPQQ%K: RRRuFٌfCuzv#;+MՊjEUUL&\!B!BmBщfϞMNNcƌ ߽{79]vQUUc׮]t ؁Q>8A  }ߦ$$$m6z|IƍG޽ڑ=zt\|( Vٌn 1 $h쐽B!B!D% `! ̙3ԩS~/hhht_rwvpkC(iiW]u+V?яNuDQш`l6c^EQ$+B!B!N I !̛7OS|>ZVUU_b}w_G|J|N/] oeuأVpKo&V3i$ D^^^zGgϞokt/B$+B!B!:$BNSTT3<>>dnᆎ T7io5:vmy'x毦l6v~ULċ%\\\̯k#G榛nbٲeXp;vЫW1tPadwQ-"}ȥԚǭ-[l̖,KqE\rTSs7SNEA\p]ߗwмU]PADj؟ce!""""8InXrF/"""""""""O[@T"""""""""RXDDDDDDDDDDDDDPXDDDDDDDDDDDDDPXDDDDDDDDDDDDD"""Tvv6"==___BCCqppdffi޼9f*""""""""""D+EDD3feeܹׯ_ϊ+ؼyeZ+1ǩp.Y-_}#& !cȭA+EDDqd&>>N:tܙBpVFj`_l1c5aͷ:VszQ\1I?JZ8\űZ$bPǿj|\yfSrOMϫsYk^[A)'}}(]_XKs" }rs0 dExdc<0)rrIʤޓ:l Ok_Ȭ_ǹmߍ${N}NMYK:j!ݯhV7ύ <1wo#lmf,za&5(#wqurZ;@WǑi,N_}&?\`$| *87᫿N ^TgJIII!,,̮|լ]Bdd${o%Exb'~Fc^`[]`g-ɑ"$U jŒoв4Ն^#`m%߂lk{Jޟ-3R}#ڏ3ȹxeѴmЌ/GNْ-r,G|/Hm;GqwA)n{O,>Cmm->&M֜LM%>~#,h)s><ޒj+ߒ_khJwl"}|q%h(ÄQsҨV=b_/1c_Xg;3_Ɲ|OEDDDDDDDD#%EDD\[ܧO08p/ZjѼyJeq,nMׇڶiH{t# 4A;V _RK?xxd`[?m\ǧaP/мn^$[Q&z Dx y[O>7XvO7wrM&L".q@?-ˇ!%~wo/7Yڵ` 6ld >yǮO]?VP'^8}aRXDDDDDDDDnsJTSh4hPo3ے')Nulmﭠ{Z%c(>M Ӝ"u}IN8AfEj.q/}xc9)zC]z bNIߓ8}Q j S/%0H(]*FѪ~~ZmuZ,>~^|?gmNKƤEDDDDDDDDn^)jeѢEdee1`d2B;v̖,l]XV oİo6Σc Ꭳ dj]@(1?o(܅ܰS|ذm8o7$6=Pp ޽7Wʝnl^D+o)8g8=mk`HwMnXDD2L0m4[y퉊d2ٌ s=hѢBn&m [R?.'kpW.-9:}#hһm\VMX6KK0`䞩GM IDATC*l~`ΜOkͮM-?O1gL_3!BeѸv0 jҨV=/qYg|jzеEٮrC>/ ]݂L_M&F&a<Ц?LWDDDDDDDD:1۽ڢuʎCDD䶰?a' ڕ'mڗkb!--||||p(Yrs+$E 8Y4,|<떼9)xųbOz%/crYH\Eh(F0m0/F[wRN6w*&+74 R Φ玀80͘L&wgyCbGNn:ӧd̘pvv'''ѱܟEDܜrٷ{[Eؕ?|DD ;XDDve4^'zvƯTz funnx_s?kJu 7W KPpSXDDDDJ+ !~\b]ܥǵ%"""""""""WPXDDDDJ5}3/hmEDDDDDDDDnwJ 3DDDDDDDDDnce """R~'?^[aT 9t+1sssٿ?ѬY"mn5_Y}'xJÆ 4mڔڵܒt GGG6o\cH"""Ԍ3زe 2w\l/^d֬Ylڴd~GΞ=[W:w6~ǩp.Y{ ..ӧOő#G)|L6m۶Uؘ"""""""""RhH55n80LV\ `P+/Ms Ŕ9ׄ5Ry?#K.W^^qe#//D񱫫_>999KDDDDDDDDDn/JTS_'''j֬ɥKX,߿cǒ [YQ< m]Zr$6O:p0f<8 7Q31 4K&iI&qFzE&MXn~~~ӣGquu`Ŋ7Ǐc4yט5kAAA;w}RFr݇'|:0zrA[@III!,, 4f3 ̞=3gGUɑ^GkqR ^B {ΎyqAx"^L= t/2.r$6&}"xg2e >>>L2VZ㏓fG&**l,Yƍm;w&!!{/}Xr%'"""""""""ՃVTcf?`"##m...tш#ZĉMY`;b.Y%)<prrh4ҬY3oNFX,/Tr&R.{SrLMóov;őt4ݶ^e7|pϟώ;xwm@jj*AAAݻy͍kңGw֍F` [DDDnGM|ˁ|rLhRBNMH5d2`ڴi@߾}Y`o@Jz\dٸ ?ڸVwrh7BCeĉtЁ 2e ;w&$$J~wqq_#FŋL>g}֮͑#Gh֬Y,"""7 3崹èrLVv4뤙Vu琚 H5[ou6<쳜;wggg<<LJJ 'OEDDr̤M紙-LZ ,""""՞"""9??Au<8IhڴiF%"""77;s+;\%EDDDSXDDD߿?0DDD$Wv""""r(~oArLᶓ{.""""՟""""""""""""""Մ""""""""""""""Մ9t&ÇNHHUDDDDDDDDDD*"""Ԍ3 " ۷vZƎ3999l޼ٮ}RR-ZPXDDDDDDDDDH55n8o6t Fmkk66mڵpEDDDDDDDDD(,""RM&Y&.]*/׿IщȭaM0SML67 r"3JEuٗGz#ce """7^rr2)))[}vڵk`ɑHU``}^Lߋr{W?>3>Ԯ&]7ty:(+""""RIrss&22nUpT~7"""*!:w'ϑ?ϧ` ^~5ɣ>-837dެRXDD3|YlF}@dh "#b+ʳ(>jH] x0f,"""""QXDDX,DGGƠAmO||<ڵщHU㈓CyE&0 Vֺ cd4~:{3Cb(,""R YV-ZDVV //L&]O˖-+)Rj -zofŮ 2i/"""""7cMDDDVc2`ڴie{}vZnMQDDDD$n h?w6dc޼{c)t("""""7"""Ր3oVFu[f.dY2:5tRRv_€"""""Uۙ-j[y:k21yQ%g17 ÌHe `pj'%[ J`ʤPH'H`\qHEl`bY_aH9 `jB+EDDl:Dzz:`ԩS899G%E+7ڵ+]vPDDDDDDDDDiH55c lBrr2̝;<[͛Ox"ǎw%))#.]Jv*;믿2|:tP?N7\spY՟_DDDDDDDDVH55n8ƍC=3?s%F|ssV+N"++H]~~>Wٳ\tmV+'O,vweĈ8;;b%92/`X[LsMX-Γv/ߔυcɘ/""""""""rRXDD }D͚5v[B;88xk/999+VR,LLLiӦlْ={rEqGݩ[.ӧO`ԫW'|R^=ի'|b{aZnM۶m[.>݊k={ꫯҦMZhA@@Q… ӟThX7x8w cv06DuaDx?);3K̺co"`";c^Hyl-|V?t{a<}Uz;  CE.󇈈TeS^)dRRR ObʕdddIFF(]DD,_(+XnFQF0rHNʬY_fӦMk[ݢE N:ŋ2e {-ԩ~!/^cǎ|<36|G|wt҅'ORFR/`-^z>ٳlZn].vg½.Çgdddj*i۶-'O`00a>'UO:Uز8{,V.DRRR7.VVk{?'>`[?m\ǧaPKм5I?uWo"_y-v3L͂JoT["""Քb!::777 dWFNNjղթS~fYa5jDVXx1?]"^zXV/_^ٺu_uXl5 ٳg]6Pr5pM,oܸBL"ے')Nulmﭠ{.)\XB|S"u}IN8AfEj.qY pQ&B'"""4ʢEb呓d (vqq_~ V ߿.!|+>|8gʕ<#@"##yW ==M6 6?}^S'O$%%ŮŅ{>6W_}߉aߢ>z+"". hUWHQW~"""Րd">>iӦ۷oOTT 2[/+СC8q":tAvu ,VZȳ>k;o߾osԩST}Y:vh} :͛v̘1̝;R~}҈dܸq7Cd G6{muh]:Nc&\[rtuMFФw2?&lX"ڗSa?l=S\u}3VDDnUR-L,`r>Yy:"9h^ˁ!.t/yC;DDDD3۽ڢuʎCDD䶰?a' ڕ'mڗkb… 8;;Q9tEΞ=w܁kzo>>?ĉxzz^SߛݛbbNZg.g= Ŝ-9)x_c61͘L&~M^|9\]<+4~0}L~Yl/GGGqppL9o+;~3T vjh4x&nu퍷wgB7CBBSO=ڵ]^n7lA^qtu¿Y2W򫈈ȍp_}J `s=W!T8m("""7[Vz*"""7Kq_}H `i% Wg[Šȍ䯈*tH5͡CHOOחPڜ8qSNQF pvvho 7nM6t mFRRR}ڴiCm.]Jڵi߾,''UVwAxx86mܹs |}} >???BBB8q-w^HÆ 9|mT D׮]qqq!!! zj[B^ٳ4lؐ3gp1># Fzz: b׮]i j*6o\m۶ӧsq[YBBԩSO>m.p[8##^zJdd$֭cѢE,YcZyg fyͽt#"""R%}EDDDV3EDD/l \fM.]*H]plK 9zhZvʸq uִjՊ4`uihh(|СC=zM---llex{{kGdd$111%$,,~6?/^,-&<<5k֬YC߾}<_|1cl_)|ȑ#G5Hy]y嫁EDDDD $''BXXnnn\t WWW IQi1^/̮]طoFv_3f<<C-[Frr2,XLsE߾}8p V[RF :vh&''/ggg}𸦹A IDATt~PJȭB[@Tcf?`"##+| SNԨQ"'͵9}4sOVZVfsrsscl۶3g /P1111{l/^;K.kyfNAt޽+-l6y4={d͚5YnҸӲeKҧO2+ԸqcF݊ovJHUH5dZYhYYY >ܶENNN\p///F#cc>3ݝ_|L} Ջ۷ӡCbϓ6m>>>WVluO>$< ]vN: saϞ=]^AHb͚u@-ʰok*;4hjWxZi_q\\+ތ ѱ|dWX4Μ9?N'NN```p%~wJGVe69|0_|+"""_h{),"AnNv۽"ʎ> ""UZDDv၇Geq˫(hA;͛7/SV(iӦL: R䕟o{YV+_ Ҽys+; pB5kV[n_~a792Q[[2Y{د%2D `7xĺf͚1xMQ' /|[\[`K {bV]b]HH5{ ("##+^H+- \Z2oI+W*+""""U""""rUW; $ker5Wb3}:]vk׮EֿNVZxÆ 4mڔڵk{wh׮]/lܸ6mЩS'ضmIIIiӦ ׷_t)kצ}@ClVZZli7֭[9w>>>4o///V+fիWӵkW<<rssi۶m>J\\?s|||С}~~>֭رcq}C||<)))ޟ-[R^=:Ć Y&{\~G[.Ŏw5őJӦM֭muծ_Mooo5k@ΝK;KV2o<ù뮻*; UR+U"""""U"""_'ڵ8Ə+'NO>!,,ϳqFz)+9_sQzYq̟?'[_F{\?No-fiֿ]ٳVZ;^^^$&&Ҹqc[ѣGgϞ"־{L:(, 3gd֬Y 8%KuV GHH'N%iذ!ʃ>H׮]qvvfϞ=4hЀ+W`W^ٳ4hЀ$?Μ9sx衇bĈl޼pV+K.e͚5\U{gǎ{O{n1cgΜ!88֦y$ff&Æ tڕ[j*>s֮]k{Ϟ=;v, 6dժU=p)fϞի  55'|;̾} a…xzzQW-[~`֭+W_3fJHHɜ8qeZ?|0_}O?4NNNe7eռ;ݐ;w̤IuT+'`˚ JGDDDD*SXDDzm nwﶭt]z5۷O̚5;vХK #11///|||ׯONNN%Ew}#F񥎑v5ppg1c5aͷ;8ҥKyWx;WW^rK/Ċ+{le[:u_~L2H 111DEEiѢCJZZ̙3)Sؒ#Fॗ^^c„ 2lnh-.ܻwoyN>mKVoٲp[#G+Κ5L6l؀Pvrr^ٳgq^u{9&MbgϞ̛7_ݖ|iҤ tܙO?xrwخȑ#v}e4aÆL<z|cСAyedd`6j̙3PF"yyyWM<~V+ѥK 7oTW&b[{UeIEDDDD %EDD+N]2ozh֬G%уD +V0n8?h^c֬Yq9ˇ~Xlf.\Ⱥu-y>`{k'Hl aaa|gv?y1m4p'Ndĉ8pCzz:aaaG V`1sq%˜9sPdy,[ǏcZ~'ǢE}^}~/={vu/.P}mvv6[.RHWjm7 ̅"TZ< t=;kcAвM HZ 8i$f3F"""QF3p@v۷/+o߾<öؾ};<,X_|ѮMj;wuV̙Sd[Gшjeٲed;? _0@>}ذaG&//-[0eƍoߞ^zqyiٲeFѮ]!!!`N<0k.~gfϞ]DC +ծ~+WϏZj:~Y޽aW~wȚ5k4h}QQQsӟz)5j5j3fw Vп]0rssٱcNNNs1bbbx׈/r'Ncǎ̝;/ҭ[7>3ƏcG‚ ԩNFEe…%EaX=zti?*SWRHUH5fX0LfjԨRl #˗/E_^Gyhfɒ%lܸV߹sgػw/xxx {%&y[%[SNWlr}4/MxwFrH>]OT|^d&]HlMD:20e|||2e Z'--HZjqiVҕ+W2g{x 5kFLLLXt)ZVZٓǓLPPM֭mΛ7#F؍1|/_Nrr2[{yj+ӯ_?|Aرc5jԠsζ?@0 撞n듗#yyyx{{+Iav]]]m... ;ݟ_9swwwwaݺuxR`0'rJΞ=KFF~i/'%%W?0~-&;;e˖?HJJ qqqlٲdV+{f8FI6mdt(kc;8::É/ݵ2vtv䗫[.888X|9TU%k^DDDDًTcM6iӦXVϟݻmZnn.&LٙV~+>|8ӧO'##UVL۶mmcƌa 8OOOrss1L2j%*bkY`;b.Y%)C l6w}رc% qqqm`ɓ8>_U\׫ZfvO63gS^^dŅx<<u%˓"""N:quF...ӱu|Z3&ѣѬXL4VVPP 8vmhZZZdevX,5fgÆ Ͳthh('OK`SS}`cl 3a͛gWFuq-@޽ILL$009sfϟϣ>j+֭[ .www yΪ:nGGGFͷ~˷~[xxxлwo6o_oyUz쉃YYY%VSȶmۈwdff68Φ٫_w/!!!X,VZUΝ;sȑ W }~R]{>"""""""roJ\VF#dBaa!iiiD`wlٲӧȠO>vM4 zj\iii\!!!nݺk}ٛݝjϝ-g{Il/S%E'n߳s۹%s.ϵ:thcf^{bΞ=k6III;;/n:Fc<Ӽ꫔۾+QQQ$&&PZZڵky/ 99ѣG(/++Ӷ%?#{ر_mW>>>17|ϸ[kǤI1c{i.%u_~tؑ#Gsٖ/**bƍ:'N?fϞ=TVVujwԉl۞U\\\>|8 .駟2jԨwvEDDDDDDҢHd2X|9>,f_G 6VgرFӯ_?Z1ꦛ0a[l᪫[n̜9ӷo_&N 7P|}]quusVoKqttd„ DdS%|RgvO\1Jrfj"[Ӭ$&&VKU%,,;RZZʫjW+W}m ``ڵ? WWW}Xh};vz)έ2fmƠAj]{֬Y^ѶzX:uꄯ/k}]K. 2x >#44`t”)SxGߥR?{Ddd$|8yLDD$%%UkcL0H6o<ڵ+aaaDFFqEDDDDDDÞ햨~Z;߄t{SwһZsSVV75- Ҿ}P^VjwB mK^222;v,ݞ̛6m"!!Z3g8z(Bf69|0/gr|f3YYYxzzږ nn'Nɓt޽M& ;}4tWW&CgW!>Wέ󙙙Idd$鄇7}<տ*ЯINdUՎ@yKCz,""ҖyzzYg`$q-ϯ=:/Zs|ٳ0`#GlEDDDDDDD `6jҥRPPy뭷(++?~5k0o<Ν;׊7]M88^ݻ 0hРZ(//Q?@ٹs'׿i]*ZrC l6H""""""""6hH5uT f9s搜LLL PQQkΝ;m嗣Sٌl6s>b{/ƿuVҥK˯Z֮]˘1cI,""F2 899aXlǪڵ}={؜E<@ 4?o+? 1;/u'n{R[ü4Oo|W??$dNgw &e˖qM7([d !!!xyymr6n7 3_=~mw ,hp9\ӽ{w:d9$$$A~̙3[ &ph/w5|ܸq|' ODDDDDDDD~;iòXv- .C 0zǏ>}\Y"8Bb`wsc0W9}ѯŕ -{_X0W=ϻޑX*_n755RW,>>nv*++4%K0pZ_999Kee%|ٳ>\ҥK4m||tMӇӳgO7Lڵ={lfҥxzz'C~}y7jZٸqc/""""""""?--""҆f***0Lպwhyy9}#G6+rsս#ْs$ *ASY"7gK&g`8,f _b{Կ+~QGv<]wuZWU{Һc1n8zQ^W& i7К5k޽;s} n'>>www8`+0`>,ƍlذV|-17j*ө ##.~qPPX,VYZDDDDDDDD.},""҆GBBFW+7L|DŽ2rV۽#>](=u c$h@d&%_fX,L&S/{xzzPTTDJJ s̙3xyyը;tP"""5 X{ }||l?;;;Hps=2|p"##km5-|Hۢ"""N:q 1g}moضʫsNbp \}uJ=q988pssQ>>֬Yc>`mΝ;IMMn5nkdHmiL&˗/777F#cذaTTTsNf͚e;or-sKrrmm˦dՔE0߽|GȄ HLL9z(Nё%Kлwo֬YӬ1W_eSRR]wŸqjʢW^ n93>>DFFdw4Nچ,Nt)zpcoZO^VCoʍ`O ƶH s%88;hn@5pThSZVb+_ Z.g<6wWcp.Cy8 !ᙷp~'!}'ĎiZuݟo]w5浀شiSu mqvmBKHH ;;ݻ[5ٳg3`Fٔ4XDDZt)~zz-.;{ySc^ G׾ :Q~9{D}޽{Y` jx_~ɀ쪻k.=jHp$ ^ r"""mԩS1 <̜9sHNN\DiL\}}&2: :^oGO u'O+WjǸz8y/?9V;^it=ϝ;xkY,?l&88|`rbCpppuã:u2zxᯐs:`:k_}, w C,8c9[n4TSwo _7>p_ʭ3C߿?~;;wi5s;21D @-W21 }RZZJ~jorWrAƍmɪ(rrrXhz"''[b0yd׿r!>N2rss9t[n߷Lj3+++x\^6,Κ;6ZcԟY;LsgN IOw/kϹ8w4Y^gM.^Z΅OSǓn{UW]EǎYjا~m݆FM^^#77{bh 87YtR~F/"""""""V,""҆f***0LXr_-Wr %HpFNGw@{&=iڙMEv.,4"ߟTcȘnDt9SPRk ?rMwqC|9iE'Jسi?}+srr]v5vu槟~⪫G^*޽$k[.Kۮ[ٳ?y衇ujKͲ3\-H\ j{76~ l]VطY/}knBj/ бS^~s;ϸ[(--%11ڗ$LJJ iiiIzzz+F\ rssX,5Xz5Hۢ"""mXxx8qqq$$$`4Y~}ʥyu%v;tM[rXNpF]u-O0p@n|||_d|jjڱ@;֘!~R-б`wmC~<,' O[g^>|˛VcX,`2w ~<}~/KM&M믿PJ[m]#z ɓTTTԘ?>;wdь=[oVPǎ8q|kb`0Oppsי3glKP@-Qύ7>"tXu.z8='`\$y(< ߶GDGGb 2i$[YAA ,رcDZZZg]vX,:g^ ǎ#$$ `6h4iKFHH]2N߿9I/[tgڹ} _nXT`{xzgIېՠqww'99Fَ;(+NN֩S')((v<44(>ټyŅÇpBO5jNXX휣Gv1w}h/ #ٚ /?+ydAjMz؟;7lnek+9+g6mLui'MĂ Xz5wq``0ؾʕ+k;֭kTa :""""""",""L&/_NII nnnFǰa*9J{7&>3ƮgJYªjǪw O] Cwߕ͞ iwL0Dje/>k׮=z#Fp 7T3p@&L@dd$f`0pBƏϢEh׮C vy?~<]v̙39[?֭[/5⯫eeeѫW/ICĎgb?{;)=/Ygк\5ZCsS06k:]1{j=ʩSpttdɒ%ݛ5k4K&L` 4nݺَ3sLLXXg_xwӼ+<}S_};wn--1In7MHOA+o7u'lP;.nv+.. ooo\~!ev[g,.APN?o1 ƎKFF...ʊ# ??m28paaa5ڮraQf4ΦgϞ8?NDFFNxxxo902> aعaW t Z._.Y_Ԝ| .\!ZtҳgO`<999yfܹa={HLLdٲei&HII5V6=ۉڱ( "riHO١%EDD2OOOLW.@ǰe ""|m۶("""Q_'''LuڒիQ_&g̘j'79l;:A𺓧KMuFusjި/ƍ^Ο?~#jn;Sg|iK4XDD"3[f\Y>HPwn:4XD-JO١=EDDDZk;CDDDDDDDDD~.ϵEDDDDDDDDDDDDD%EDDDDDDDDDDDDD--""FCII >>>DFFTG=z47x-[0bxF#"""""""""""""mҥKIMM[oQVVV^~~> .$11IdҤIL>۷7-iiN`l63gձX,X+C5.]ХK"##ٷolR{""""""""""p,""FU%~vrrbTO?HΝ_'''Ν;l퉈4XDD :t:0`[Yqq17ndʔ)޽tuuiHf6d2l\Çެ}={/fm[DDDDDDDDDD.L `6,<<80_r:t8qΝTVV6뮻",YBvvvs CDDDDDDDDDD%EDD~ :uĉTTT7|@II gϞoa8::6L<9BPXDD 2ҳgO Xgoݺ͛73eʔ&]XXHXXXSXDD 2L,_0׏aÆx2!"""""""""ixꩧ(.. ooo5p54Jrss hr[""""""""""p#""҆yzzposy !$$#Fx""""""""""R""",x 8?[k#"""""""""%EDDYo߾C `6B `6BK@QPRR89G#G())vNdd$b*""""""""""D `6jҥxyyɮ]HJJbʔ)~z eL `6jԩdlfΜ9$''c3x` Z!H3"""m3y NNNX,ju*++ IDAT9}4fb'""""""""""-@3EDDڰ,8CС VvZ֭[lfȑ1"Hf6d2Vm_ddddlňEDDDDDDDDD)HN\\ F֯_o+ Wzž}Z+TiJ :uĉ'S ˗WDD 2dffbX(,,$--9pdddpWZ""""""""""tXDD 2L,_0׏aÆPQQŋ1LP^^ÉjEDDDDDDDDD)ixꩧ(.. ooom۷g̙3TVV닣c+F,""""""""""A `6OOZHK""""""""""""""m""""""""""""""m""""""""""""""m"""rIVv.ZaHK.DχJQڵ7{nL&S::TVVBխn+;[Χ7Bғ\.lʕp ݻ[;F[z5w}77x#O>d+iӦiG8>m45_桇⦛nbܹ .oIG}TEo+55hۛ)u[QzR\WL^^՚ ;; ݫ;wGҮ];:ud;~7r72uT͛ר&''Ν;7{;w':^\͝;ZϵGk=_IIIɉ/{'|FHǎX3gdС*/**d2ѡC:z#1EhMArr.bK]]RVV~w9׷z999T\{]1c4)Vi}5[DD:?9S5CbDhzz:Æ X܌y-Dꆫfpuw#W72Ugُ4 /-[4or͒; 1;/u'n{R[{D~WbnnYGRR!!!ҹsg̴L&q<<<ѣ>>>+ӧI׮]ܹ3ݻwg׮] .̙3899q5K.]ԩS߿?;vɉ{kXx1NNN > c̘1t҅~gw~e˖qM7U;~9<_3fࡇ"11+Vxbjuի}aĈ>}V˧~j{2c n[ r}yyy <<޽{3zj='**=<|0}a :Ν;s̙_V~???::w\?<5={rwRZZZ΅; %66>}^zF?ƍO.EDDDDDD~˔iòXv- .C%hϜ9d"%%7x9spB[" TYχWZܟ :0u{G,c޸ޘ+/9v5? _4өgʔ3~m̙C~Xh=\Yv3gׯg9rgyqB[,*++駟fƌ?~zv~eem y֬Y<裀u7xZM&GwL<{=W%55RW|͘1˗3ydy&O̊+ٵk#77J^}jl%__ߕ+W/_|999[vO<=zGnn. ⫬d…̟?b2227+߿7:˷5c͚5<|߿cǎ1dȐF^FM^^ۻw/׿թ[,~>̊+ذaC876["""""""8ZZDD 3TTT`2pss%***8uO=fE~z(<}pz /Ob.v߱"t]_DS?)s&b#8B(o gΜ!))o_|ʋ/ 11UOҥKYr%FgggC] 88w}իټy3eee͒{ٲe s%**S6 OOOfϞMVVζ3eg۵kW|͜9___fΜItt4sO&M&d/7(((`͚5lٲ\, cǎG~m)uuu槟~⪫G-'pwҷo_\\\ƻ1LJJ iiiIzzzz#G{nRޱd{Zrx0%EDDڰpñX,,X3vX\\_4ލe'ky,^jywqhI` bݺOGvYlA>]tko{//2/2={d͚5t&}||jMN?~}RPP@߾} {rBW %UHOOĉ4[M元mg3>L]W՞&L $$O:۱cG>dggc0xGUx N:-©Sӟk 5\GNNN`?///m_:_m???1Z+""""""" 0 tԉ'N֙ڵsVT`˧|8'b@ MaR*idMpxu- gǣ 'K"Lva1{nMFVVojOcǎ8w0i$RSSꫯl8_8{l6s<|0m^[B%fmj/44'O֚ԟ*}%~AAAݜ0$$ªUزevuJ0|ٰa+VukܟΝ;sȑ ۮ];,K,X7k1sLBCCkm__~?zh};v̖֣Hd4̴aa!iiiՋm۶sݽ{7aaaDu?|R f Ƴ!\^sGGo uFr m˯Ą@Gc~垤RΖ$Dܓ^ 'Gv-;cʹ|MOfڝ8q"o?ٳgwSJKK1L|G3V͛krlcuؑ#Gs.**j^;vx˫Ef:ᄏ]ĉٳg`ݏw֭ꄄκuԶ``0ؾʕ+>SN0{lf پ}{.]-"""""""Hd2X|9>,f_G 6V'..ǏۖD6 1& ~ӛgg>rrٟsvW̓(;cqrmm˦ê)io^S%|Rgxuȗ&Rzy}Bkv^3qDwMDDD%qMƔ)SضmwuO?mQF?7ҽ{wRRRj7qD&L@ZZ> /]*g̞֭=??j3{1f3SL!00^Mmߞ9::2akmR~A= 'e~?nݺWw^$44o/^L`` K. 8nYcjtbr~>xg\;nWg)/.;ԯuYݣGQ2eee>|`;v Bpp%Km>}___ ρ֭[1LtܹEbk/t}222;v,5/{M~~>]tյAG@@m͛ ٳD-[V,L"##IOO'<b{A TC( =!>ɴEd4 ud}=;9YkѴibgkEDEE[XZ|(%ۤIbJ[e]_fxٽ{wf:*@m*T[lOOOz-Ƚ[}̙WÇ3yVpqq!""⢌TW&11-[sN<Ȁ ٹs'}"""""""EDDD.-[vwlٲRwsscС4k֬~+)H%,}G?`"EE)۫Zb[ӦMm۶1}t7o^DDDDDDDDDDbT&M`j2c ~7:uꄏ>lfڴik׮qcbb۷/]tX""""""""""R1XDDP𵋋 6ؾ;^^^4lذqM&}i_gEDDjX?ɓ' ,qݻi׮]qeeeeQqDDDDDDDDDD4XDDZL&f3X"}RRR8y$m۶R2sLvEݫ4T """5Xdd$c„ q"}vEƍR ЪU+5jTDDDDDDDDDDrT իGJJJ}4tE7۷%K}i hLJbرcYt)lܸjֹsgIOO鈈\QT)\qH (dCh h*998y游8N>+͚5AV/gh;f?s oN{LC|}}5jSq-ZāHNNfƍ|F{mۘ?>ϟ$&&:0Q4ޢRn-zʽ?n8F#O<v[)ʕ+i׮]YFhH 5i$ V3foѩS'ܻwo:vή]sueI?JP? )d5Y0癰YLB׮]瓐Şoو'00//BmfRRx{{fGPPPUGRRRzꩧ.yLˉfP/ \\\lc...8;;ۿwvv.Dg0iFxDƻWMH9{xe[_g0rǿ '))Σ>jo W^DDDp}[h? wNXXoф3n89Bxx8̛7~nLL [M6q}_h޽{K/quѢE BBB8rH47x#&LYfnݚVZ@ff&͚5cC , <<___oܸq̞=⊈\nTbccY~=s!00~;۷ogƍ\bg[~#}wm?൚N!{t[^EFӸW+ߓ>݃;+# K|`oݻ7 =zx:Tn:^x6o̱cHLLnE1w\6mJ\\qqq'NHNɓܹYfb0gfΜIff&&((X̯IHHjh"|||yG8vf"::{Zb98 b)ߢEرcG{HV+& ٌ'Vk¨` 99,܊\}sK%{>@iq{9ȯ7q&F0Y]&ٝWk~;wfDEE-[^|~~>?#ӧOߟ#GzjyB} B.]_~u\ȑ#KJwؑǏڵk /wٳgٲeK/?]k֬!$$T4XDD_~L06ndoW^ 2c?/.w_O N޿V+x?~RRRd2hРANJJfj?JbbbQ܊W**+V3l0K<ς|z8s ɅӣG̙@FF .W^εk׮=e޽ypBƏOll%q4f3K,!++ OOOrrrhݺ5ݺu f2K?G`` vpcL s }u[ȑ{8'_[mS2e ;wAl6_O~xܹ}?Nǎ}ڷoСC'O6swҰaCٳ'&Lp4mڴ W[yٴitܙɓ'3dv؁G_̂ kXn]~DDDDDDDDD.'hy\Q:kk_q=M1m[-tDaTgJTf'H(3ΙesHHx$#'soوK->@R9ݟ6ǙyZ&`5[XL]镡_Sp{<#Nyh ~^>s^SƘGzN0uxN^^4wW~M_׾+&O7w{SSdKx6+^HӰHZ6`oo6ѦQS<3O/n|n,ax><< <\y{e^۾ㇹ $_k2ng¬81k%N'%3ꃗT  j؄yo8ZKfù {4=\' ziR2Γmݙ e<ՇddHaLpKMXs a=IJOcOݝzt˛_i&O̖-[ӧM4᧟~"((}q-Wf„ 8q'''5j;VZxby饗ܹs<3ywmikX}&ײcJB[7Ag?~4jʉY+9 6^,mQja/4!qs7w8лMg'?N0aZ{for`XQbg.'~ZX\Ղf-4j)׵Xg-k h*넓0ų3/,Wp$~.8 :E$~ZNGnIOYFvkrC/,֢3V 8%a:6+weݟ_]C@->;k_()7L<%K0vX{1Ǝ˲e۶mKڵYj… ?cҸqc'88Su l޼cǎ 7,ݢE ;w.M6%..8{`ĉtԉxN<Ν;5kVo0gfΜIff&/Xh;v(})f`VɄl럅ZjѰaC֬Yc9wBs8q+C18̂hTxuir[[y}cԹ!#V?Whuyj ,z*$nI8ŮWM[QN#37O//o ]Bm#f9=SyBydx|u;Ln~wmf˴$gן/O-Xox޲nOѵEr7.5>~n~ -T^0c?[%s.le=?W)S)Shժcƌ)Ç70p@rssY|9[L&''n:~ kW_}G˖- wwk׮-??ӧȑ#Yz5HOU}9d7WL:<_SH~% (xJxx8VFoAff&k׮~iSP>sLA+?QѺuk*#)) wMhh(EFDD:Vy""""""""QXDD `0W)))cjbРA֭Ec|0_W1>f_ 8w08 ^">aԪSL/3z<Ǿ=az_יA]ZF`(v4f_Jg 1KrrF딌 n.ӳ3Vy4NMXs#Ȉ}mAlAK.t(`0HJ?G݀`(ZL-_ %_sAe/SH~e0C7nLVXl=#F+~xxAU,l?,ӧOɩ'$IJMJJ*TEoDDDDDDD"""5PNN111bEjj*QQQfٗ>x Ѵo!VuO~Aں!@>aū=n`Ĭv,{>׾/~l,k:+o-c09#EŹp1^`gR j 0 y<1V\|y FS>fWӻMg{[l??RXD^5{3k~ly=/~3ӾnFN6[úM8xPΣQ/vY}[NsODDDDDDDtl6d$''֭[ӭ[7{?f㮻N:̺j;/ﭶ iyύY3;pl~"I?X笚8ku)~Na |89;cjզU| ֮Re{/\l ' `ڈ{ L|?MؠȹQ>Wfqp>{ۣqK.jpc97ޡCkiVG23V}IHKkw#KgGS/0<>`8#-|d>؟,z }WJrSGZ iY8;9`j5^cY*u/f~ :'|:ШQBm9FN:?n_r_~~Uރ1=h֠JYgg"֯ B`C*u/f~{tCA?f*H,""""eӶK~;Ķa ʦuY'ƀ0`@`)>Rs{LmNNջL#EDDDrqvvt UM>wqrt""""ҝq>}Dƍ OIIȑ#xxxp G3wCo޼H֭~%ԩ...l۶ZC3EDDj$̙ ?u|޽O>|eY}]ߘ[TMSUY8p 6ONNʣ8-bڴi/6<*`6e˖ѦM"mׯ} 2xž={e󳉌r}Ϥb79n5Y0癰Ya̱cXr% ">>BcsIҊ5lؐ %"""""""""W--""RڵPԩɓ'f3'OgϞ8;;ӴiSbccҥd߿h^T}Fpvw%y˘ܴGVa3Ym,8@~mg?ydlB>}hҤ ?AAA۷[n<<<Xz5&Lĉ899/Jjj*c֬YxzzV>7:V<4XDDd֭zE222ZqAΞ=_3O-Z>ƻbfZVOrM}ڌ |-{iܫmIF[,Ycc1vX-[V0^۶Ԯ]UV/\TX޽IHHѣs!͛Wh"vQDDDDDDDDDf `jŊ//"mfsǮZ-ZbiVon7dϧB'˹D~3)4MI}Jm۶ENRiY0aNc/N -﹑#pfOdZ:t(O>$:tQFL2Ν;ӠAl6/r;| #G<ҴijYDDDDDDDDD] ckѺ"DK&g=t`\׾B{xVK>6T<<dqƅf#22AىHuQXDDKJJbΜ9OڏϬ\OOO\\\xTEDDj0ƲehӦ 'O,֢E ZjѣGYvcj=EDDj]vJXXX6???\,*Plݺ[oѩ%H b z聗SKD`ĉ>}@?NJJ ?~DDDDDDDDDD"qqt"""RL&~~~],Yv-cǎŠH Idd;w}v&Nh?fX0LL&F#wwKTEDDPG᫯?uTBCCyT """W;ұcBZh믿DDDDDDDDDDbprt""""""""""""""R=T!T!T!T!\8:J޿)ȟTq IDAT::JkԤS""""""""""""""5 """"""""""""""5 """"""""""""""5q1*\Bst "/@&É."""""""\rb)T!8ٳg utWܜl2ϓfXnXgJt*p)g`0Tm@ BjY<<= X"Rrs1ታK .{x`!7',"""""""""""Re Cuc`0FfKKDH EəKKD=EDDDDDDDDDDd~yyy1.4h`o;|׮" SCr<5V+u"x.] ulV6 Պjťr3w lV%%/<N㊥ehê4l ̀T'4iP: o%IDDDDDJOLl,W׷H{vv6NUW]_~;vӋaÆDEΊVpwsiZ%zT)V48ض;wx͆U[^y7BK̴S9aV+ii:[-u\~&f#B*=FT\.o.Vf_O/ O@JǻTcsǗitH*\L<ڍ*LVV+l`0lȤUHLHdtmI |VLItb}5>3SQbc{# d&"""""u.-ŋÃ[떛i$?M[իp$4hw- ϾO]ع>qqqeyyb⋠n򌜎[ & }Hn)Ȩ%s}Hv]u襄5w@2~WU Ųly$v+g+RǺFnF)v~n^qP6''&i7LLbz~i*=t>o*\l~FH_V{Y\CC13Wc1pR QчHӓW'0 J9&7kZl9ҩ#[^[8Qч)x>{ijX 5>lDF6!>>˖'٬YqcGW8N70ƌyp睃X| ÇXx!}{T &+HfV. |2<†%+Qy|h\˷> dsnŋ.c 1c^>Fc>y&f[f7N|Nɿ_.l+ӯ趐xyzR~x߽/M75T%<ݻhWr9P@b:/BXzdggs8Gð+oNvmJl`4M""Jl9CTB2MX7k\@kZ09egekKl5`s˝Y?e'zIp눅egd2q&>' mo/39ɧ|t1c^fy]1H3aDV\Iݩ_?kȣnB3aiSUK.lw$''*Б\}ոNNNGbbоmlf]6k֭')9~%$$WWWf3VJŹ}y̙3x 2i899ܹޭ;7aZNXm6njjaXlε/' }>6ӅsWm6joS.a5T8d"33i-F~^>呟S;e1tHʛ/E3V|6ΥccW)p,3{MΎ n ekJe?~]`%;r?;(ϖ-mɍ3 mLrRY1z=fZq|I'NWRXDD2&7 #`Vgw7yy89V ?mwߧ-= X,$k"U,?BJj*A8^s† _³F#僷wT]g歵11G3h|z VqM&[_z_e~yyygdsf?`k#tw-F۝=ߟQ-xpwgn碴sL&Vб'&{YrgbF^fmc+TUرf3u-;#;;dˬ)99dشif9^VZE=uV G-ODDUY7ԝENNN{^_ٳg[gIRf /fPU,D#ڈ`#̹Hl6+7j͵3o%++lrfד}\[6 m3Giڼu{#>'ԍuiUg1$*\x~fy yn%~9)9צÙxŔg~xAquGԡ=n#//7_y+Vk>2ק?8OfcO \Z7uʤT`?hn߁86NG`?mϏst3KWd˶ /xzxz>јX^:6[{(3~))G:z_?r1Ѩar]r Օ=*`0г{W:oz*r]e~.K|0sΜკ5s^ךxG&?]ۯfGҩl%?D^^/YkUlKR]o$srr=go=Pmۿ~>cHJJh4ҡC͝Cpp0_π0h7tVp_ziz!aaa={Μ91c8z4OOO/lR^I^^^lڴ=zү_?{wӦMxyU&""""RsY,̸b)erWo#88^[n<׷OVٽ{7Ǐ'l26l@ll;wד–ۈ$--o[k,/`],( 6 ^ljo&)l~ .^3=׋=c}?W8΅[$ɮ/UkX0a }xL&&_dJ.)ֹ7" HrF'R_?b79rPk +KP+X'~GMDə7ⰜRe/ `+T^|S'`KysHqtj٨B1v ggg.AZen>ymZ<\ת%s| |a: *vɫ7~M"xi899oyᅢOZ{L~n ɏN">!'>bzjy{cHIM=Oc4Yk ?ëK纫8o'S@\@ ֛yphaSXFr3=ucωO <`u#]:vãm6^7d̽#5s b٘%~Ckf_BzzYfg_ks6/BRzJ`M,a5& WWR^Wi\l6j.G'`! MZdPks!={E|:3>} ݻz#yArssҥK[VFMwE]r+^UzG! P&(( TA(MPUzGzWwԃ{.ǒ\Kv7\ݝgy33o\T*&j>>FspZuԶ9/⏵k0fgpuuE^^P(PZ5j,Y^^hР+DGKΟX wV#7,3?4vZ晀{ng+A Ϋg=PV 83Dݿms6GqRrz+ W6|> (1_r+p=\lW0i!;P{Dd̝ 69 S(-iIE&䯄Ds _gWHC-8ɬ[a)ٮ~sD^^>󑟯B^~>UpsU`tt~~`ѡj#Uu@Ja<+CǭCسa"Fnr" r'Y 8۷A` (䪔{-@7WENGjPP2 '<EVpI*z9^>/}oܟV~^Jܽw1Oo//DDA.M2x j5N:IS7oG`į[ڂ4 yj*x@ h1ظX˳UHNI嵣ŧcB& ÇPVzѮu+lز ,Z&Ϝ>>Ӻw>n޼Ç0a1 ?b0y?eJ]wWx8|||jU:p9,Zqqq`Y}Ŵi#%%E9ڷkuDxxÍ&  Ct햄x,`n%I Bffrrq4j...VPV-@ff&"""޻wXB̯ydܜ! Է*-B!4ptg8s:⢄'uGȬwNx Gq!2}Vso^6n.5c\?2LvRZ~k}Mo Ը z";tcS[ 0MKnPϦmvnsn2,X PՑ۳v qxGVao_TLAe\40dB"Ŀ"2a BšQ ܼz"/5jݺ!+) ggs6V?w߯0L&/RRS\y ][sr.CgRqJU ?Ŏ{𗥨]&>׫+NĥP'>F`t8| _Z)˲>gĠ+/ͭ4 t KV‡ùP+= OIMWށpKLLsŠedrøm0c 4[/Wi(bNυRes( Tڑ5SFqF(?bꖧgaNJKkD20 cy 7 *ò]Oaf,ၿvԭ[Ǧ4d229sfgPԸt26lWWWԯa3336XX0,7CaAHKuę46...Pcvg@#94 ǥ7 glur_ecoжG_UX+ t ƒQ׺u2TVr6sڛLK*Ύ2(Ь* #0}569@Wtm3>|g(ӅQPԡ*őҊy'XB7F{B` (䫝PI3 һ&޵+fljb"7?RG`?7l`B_#;'noRr2.,Tx `ʫ퐓Mv`uVS+1٨ 7g4ZlG9Z Ă0iUqŋعi~KĤ$de: [6k?/׮ֹ x{#+;8)-Ó BoZ4&.NKZ'W3 s%wBnpf8 Ҹr +:_b0b[삝&uAHHmێƍ}bŊAFFyfĀo"33w?a߾}E۷ u*`   w"B QBW == cݐd6m=bp,\ o >f=~ԫ9AYō7Վx$Z4ZޡwnpG쌌to2Ϡe(;pn AaH~r n^ܹj wGsrX^.ݹմ,w"C/{V|pp*m?>gh+b9w7«j}\.Cp'PsMṰmxܺy bzˑFۊ\.,V>2 ۶~~ɥ8RU!g.09jj`=E1d~Q!0AAA5뀂`\=qi&%DhN sntwBXhT`GB`mh۪Clxsgc`VՐ[VcG#XŭH4anRr2XEA b6}S4Ŋ#q`bO|"]{k#;3ͪ8e29Zpqv'8q R IDAT8vtt@yN\qc>.:|P`PTؾk>Pck'W^Ƞegg%:RIo) s]:+k,䖇g[zI1p$Rb {i:wdu,zM\.*Y!-[WO͚Ѡa#0X&2ݜA2 ojsZp:sYr&O!g2W+S+rAD[vH)R]|a=c ct}bYOOe"|!W_`pTZ2;W^jc>iM%/o~5k`V)g ..HNMŧF@4⇿ä~сs(-1)9Xw.6[7) =0lX,оMkޥ3F|95@u2 >Sf‘@"c}d8̙oo/dddbެr3 7-Fkh3vޡ++:͇KΙc$^ 87mݎM[CT/7nbP^n>;&_b0/YWuS6dᲨ$ ϟvodwbܗ@JJ ÇC.տcРض};Р~}XLQQonnnâ ! ɬ[&a1jhTRC&M{.^9rss`ǎ`֭ر#rs-AA²,r95jd񙂂83[j5joEPZ5OWKq8+J3Cسw֭ L D=|jUT?-?Cڵ$Q,;+k3Ya[׮ 1>rdggCP"44h.|ŀe $:ODhtory T*-n*(@BB"DMdsss_"[0HLJVE`@V[BcpU*ƭtvm'G*˲x!2x w鬿\WhӤE(ZF\||b~%Pefe!33 V}UۏcF׮-i{ÿC,J7aBv$6_r!T6∏-ҮZ F-:]d"t,#ܜ,EcDtt4 ԨT)L2ȐX}֮Vœ'OhwfVw]+pg3bǎO>~띿9>k  {ۼsSFfM^Ahu 233ѯosw=z5+0?֑CƍT*HԬY?@T*DhU'$$ J,߿J'1Gy yy9[E=d2J;`ĝ{hނھ}zf mT6?s3ho|B^I}Zsԏ{/FA#6/7aUkhZզ`#ס?@~㺵ӽMiiC\DtLೕCCrl3(Q'kW|VqL6dL>@IɅɅj皀+|G)5_Dci0AAE""2yB.B2W˱u Rs 4@~~>ʒۮc^sr!11wGpe܎u@n ,h--ߛ;X[ΖB7mn{il%_<ۊ\ZQkup# j *2 ix  ͺ0DC+ 8mX^޸uWEȽ"`,\^LϏ?|5*Iq ݡWH*r +EŠ k@}y%lCh0Q: `  yܹX/c>A֭Vv7MaV~)efIqst[aPm_T듑JҶBrLRl*'70\KK4\B XC:#GM]’$ǒ  J`[xzz1=($(e`YnPŲ2xzII6pfztEyn!pلAA!.D&M=x'N78:okpqv0%E\|<*P(dؼm;.]>+KI-h~lں w+b#buÇ=fyJ(XiWXvZrɺ|oy;>(OyaKߕ5h/@oicBܹwwC'OHF'd2UIcmNN.]/!66AAP*FaGGSy;r9|kXwíxV\]]Ƨ2'&ii~b>EhHÅKqeD=zOOOYK>/fnMpi0AACc'Nx*h4Zg|degvXdԮeLuVew $&&kѥӫ% }-F7|ӛ-2}sGAlmv4M>P{17<ڤڢw{Qmɯt_Վc,n(Uceرk7֬ۀO>}{YgIʚM1vkoQغ/=Z-S{ 0nED`VÇl |z[UhݲMX7,y MLvz ~^" k>[" ("_#44He+WΎװ_qQ/NZE * &L_[6 gͳ:pY {@{}N )9~~6:,*gd)gk K %; A\|0,c8$Wh֤MHOOnOצGZZ<<郀ӵsg:,Ϛ'gg֥UrIIvsߗ/C}=YSܸ | xq#̞7 \V$3;n]:ca4OH>Pbpy,YWbqLKV֣cvӇ[A  (998~$>H̙=*aނEz.Ѽi|'$b~'''xxcwSPR A-pq0 S&cEhӪ* r~~x? A6mpq WaΌp[V?t^^HJNc/!mGĝ;0i rXѭގ8-w}ޭ+Z4k /=V-Z'1) }.QBLMYʳ!mEk#"鸯g=ǜШa@䝻x`s{>>zpK1g4n]:u둜??_( h޷ۑXtY[QG1e ԉ_KK| Ǝ1r@ڵ~:{۶*LIq1Ԩ^ >ǨR>͛ ːi'Ubf$$&bqv% ~Djlj/L*g6K,2gbdS ?؉8zo zl3џ,msC,kW11fNow3+_b屔_R9a/2y.v d29XA]m_8i;p ,eM2e} 屧]\\xuWѝ!t[ʋM,B([^֎teM}ե/ϋսqoVBedMXmdֹ߸6\\Rg!,z49 +Uƒ G#GR G0b0ZǬP|IO(+Օ%}BWcvͥ!sk AQh4p2:s.Qճ;ϙ_cqM ߻ڹ>YQNm,_rf˗B>sX>`Z\U+3gc/8K<AQR%hZ[jZp-_ /OOZ,U+zG995 ?tzt),bƜy5:yyy7z˲>t =uؾ5>y1iW6kjլO£Xrt:gCu@T 㜆{Dm6WshZ`Ype/&pC=8)9 _4.V?[6.\dV>/\hKB20,~:2>3j_L?/o' 3|U*7l(S{mZĞ0ZϞK兝`c8;; 7YŔg"((vBtܪ[ ŔP%2avdM;,TLL(c;k3џ,mP7a߁عe#,DeVo("_cK$TʾHGiŅM_yvZ B!J.[g16Xh!y8q4OOFcO RW_|f6blî=ί >ׯH+{SĦǗvD$]ܽwgn.ͧ!A ` (+lkzX `uX\|vOcpu}W;Hgd߷|@i>hӲd2~[ذPT EԣFxyz^Q$Fj{>>G7oݶ9(  |wP$ ~]7,q.x{i1Ĥdx[׶[W|7XZ̜6E–Bͱ!ܶrG^= 1yp@/% ..kNNF X%m'1OpC t+ <Oð9rE3[EpŊ=7X%+Vb^b%޿oӳܻQ/{ )(yw/*>zaYwxk7nѣ?RZl9Yb0,c8E$SO R}rѵn*(g Lq/ k䱄:clUfC=8'''^ g1csbG?Օ}YMmyIC(w|e9'Vc/wX{@x/~k՟AB+ضA|rjbl!ZNxr r G#: Z6ֆy3޻>ݿ\bKxuhI>sOZJBn^22g:҇qsk VADA.Cf5C&cF׫l߯HXg@TqV)J[L j \.Ǥѹ։ IDAThѬܠQes>Sa|=/edY^|/6oۆڡV!Gw,X 4Ξ[ xqc.?,k^GKUp-;:;;P=`X2mIIpqqѿ|@jddd:{ًDN>m[[ؽ?q܎8w"ڵi._\.CE߷D|B"\v--ٙM1,c+vXL]F>y6DlFn16MpŊl޶s~ uƘ#ШaBi-9sdk.2w77|}yB\=[t/+[f!Yh2 鑯%Y[hnQ_Br=w^O.Gӳ[+_abڹX "]IŶ IIIP(psVceMou0 fBFFϟ߶a|4z {*~9/7noo< VTInXh(>4?.\fN=hX֯ؽo?ΜvMOb[A& 2_Gx >__u{ؾ=*T(\&3Ha,L. Wl%S:sGү`OLDKkVkJ''EVf:q1:rYYڹ0K9T$YsеS'x g_@V--agsms?}"k /ڎ%PfMvbpUjj*/򐕝U+WA??ϧ9N9SXKWѥӫQQt1L򳆨>r 6è(u`F`b,")%e}K#/?E^p^bra aok_CJ"NEAMuӵs't 99sF|wv͜ZJy-/0WN|uXˣ̆ #399cbc->o.zf/Mc JL=;!t[mPf!=2 k %$WHp0?; a8OWW!=?ηeL.#5}%1+#GѬ+岵"io]~x4?/h̬,n:kuԁZFl\|,v.hL&3HT|I/|IPfMdff!//% !F5A/4AA!кe L2{oAqQ&.\Q AzuP5ϵ7J<:r9Ft@ԣ8zogGJPRm WaFq.Y}8GOFc;cQ+s_jw~ TL%r<ݺtƟ6ҕг{7=1z5Z*8},@RamV܅X~_[@~ `-h׺ywƶ+վ+UBUz:3tͺЮ~+G!rF۶c=z9rrrۚy6F=Z֟5-&LIq0 YӾ_}٧8{g/23{˰xq#-e+pFVvM)(g!LX{$T|m=GejB6MBb"]AрeYT&4\ܶ:0_ٗG`صwRX(֩K!//OcqV޴MaI9y{!]bu'%ݖG4YH|}TKH.L888Yf8OWW>=?Ηb?|՟ABXܒ< EAAsrp;"MHǏb岵 `Yӣ9L,byiS, [}ν +U2g ˲x>DY77C!Ĥ'^ +;*Xpsr,yap%99s&ԪYS5K CH:̭ xAQFquuExIFt߻5o>oڂV͛aΌ1o1ѳo?!)9ƢR|_>xá  :G|2LO]:ֹ>zVb޵ Z BL+Ŷ;P(ѱ}; ,Y-zv$F ! N=eѫGwܺy8y G|4'MA7C舠@̟3K._>.3\i'aqPT^&=*9^2 KWE?xC{kVLbρԱvPvmW Ç4k>''%RRSoo L￝3gGAzz:~Y05W0^ǩg1I.X+% { NOצIHJNB&A&Mf*o5Xʯ}:WQfx V}o"(0z[Dk:3)2!41W{>]bu'%ݖG4YH|}dsW_֌ꆵku/4ηeL.>X;^(%y֮߈7BT"0 M_ykWDHpE>`Yӣ9L99w ; cy䉘6k #U*g ˲6s~rzF:ڷi/Ǝ1C1'&_{b-3>߬gsZĤ$xzx =# Ì?o8҇sk ^,d^a5nRrA kj:FעE&\$WԖCMjZrrrbՊ7|j -7C\|<,X223AFNi[ SĖ)ʿ?c/ʸ8d E>SĴ8Vs6aVk^J-7Ki*)1uRٗGŰq6:s/<_=ST"#ǚz/+XJeN&t GK}Y˚vږq5uja `8˒Ab)=/Br٫}6GKX0,jeei.pG! HRtȗ^I,8ǒ>tX?`߹5A7Q"LA%II8 i<~a#K[K_RڢfBU1rQa?xU*Mڵ~ygUz^8"zuJҒGJ.2؉EHp0G?M1uDmJTɖ{Zme.~WCZ5GIWYEY>,rbҎgܚ !^$F>?! 8x]KOM.r(=<=z(E(m1C6ڹ0>>f.OţQ3& /4k vdq;^e]Ҥ馴Rˣdq"܅#}":dKRuee-6Y2t]!eʫ颬c e yi3qnMD鐞L+ $AAAAADqu/e1 x/m     rADZ-AAAAAQι}&AAAAAAA   "))sPOժ_ ? xx؞ ߲%GG̙Ⓩ!@ր7@ǎ?b?X%/z?m/nJ|Io}Bjjea|Rtf _24e%ݾJ; & H^op"f}={ShvaX`.`F!?pr*bck6f|R/Ký0}M%.NHaj߁F s?￀/P2/sUq4ФbXʲO\b8ss akV⦤˷:oRR[i寸Zڇ=ʯ<鿤ۇѣK)++<3Dٰ & LFs,}02}P,[Pc, pWW{ٜ_|~tԩ\*1|} HLBC_(~Jnr/P4ǧ=eJZ@tt@a?RO(.݊/Mև=-'&~11 > ˿j, <}ϴ_(nK!V@ ?kjbW|4mT?tHl[Vk3c:?{ؗPx)u| aI(/@\ dB[1#Mi~`[S\Bٕ)߲`?AD ` (o<~ 4hjkܽ۷ С;ߡo:{ӭP0x04׀VZի=wiSڵ{ݺ0hذq[LT~۶usNr]‹яPb'%_mP3fޏv圵se.>}> o,XۏUmrKc:Wd,N͛ 60|_ !ҿ8rk?4^}:۵^yժN>agϜݾn{@__C1R=.ݗ_̽GjB+U>!Bj[q'ݜNڴy} yG\x1j_bO/Ӿ!Ewra_Α!vP꧔_}ZS_~YxMbb7QMj4?o ُS=. omo(T,AarADyc"nmtt*z/W^p>0kqwE_jʕe܋H}|*p>>;° a\Z-_,r2p ߭[\>KLx1J_o 9?7 T)h4s'%'O۷9a?Px>N>{_ Qh<6٦{V+Vp/.-s釯}} Z-e#GrT_ohmo KWn߇~տ3۴1~/~!ǨQO9_9 N?RSRo!ڿ7|1 Wׯur/¯]v,3H͟T /T?S}Bkzի~bSl;szP*~Ŵo|4?o ُSkhÜD`^a@6o~*e[ mMA ggKW.|VK7ƔZ6=w@Ņr _n˸-K.mJBп?|9%:9f.\^,У0k}Ҭ]XbbBH)?K<nVHWmۚ۷/gm Mp>}2{p!Cc*i7lXhm[q}O>Vڙ^[Ѱ!ɉ[ cGApw&8xyq} F?R*_{Hӯ=[Gy 8Ŋ.'3+/H R˿8ϒ`Pn'p"&b[!#_b"׾tL^x]L*~7_*/k?5>QnYH,J AQ?HK&iir_=g#Y=_QrsE?`X,L?)܋7, wرχ'] KUMk,<#,OLo3Sj6^bʇSp/ֵ=[>/cG@CH>s(.n1m9ZӹVsR;&Z)KDNU8HM\_{֏\^ÿ` RGB/$x-}~%GXkk!#ɓ={8N^1jbbGCoR'&~N_Q\1?s_A  ^^܄p2K^Nrn"hJRwg||MpIJ*;9Ri ۏ$ (#$2lW r/׬v>x[qeCs1aIDn+k1bBk`M(ߦC[7Hߤ—?)%&<_:>W)ӿ+Y՚ǥ֟n׷/w|F߾a֭u1j߄GCoR'&ҦĦok[!}왒R(^n?A/(t0AA7pzdM_&s_(ɒLƽ ˰~='F[MPuT =ǝAdO^KBBC4ǎY/܋ W:uVΏɲr9P[Z*S "8;l6lc{wꏐ|aa\W~߾]ҮC.;ٓ[`A3z;6nqZ:ڊ''nϕ+ߙ\Yv))TO jb7[_O} W?~ !kPNGǎqeEj)WWU oBX_Ⱦ9[_m m^[ AQOƍJM^iڔn]+~~K yaP( M_}UxOUU]]]ecgHN}ùqXÇ_Psv8r,Z[kmm|ck?.#hr~?[{/-Y, ^fh~?3~/sSؚ?ttq?K/=.2?zG=o{pCy9Ip99.-ϳnYL: 䖏_'yߢѶ_ְa[s>t@$>PںUzh+}VƦMc/0٭{Tpzu{GѸ)SWFS3[omٸי?Szo' {3fleh+k?OIh_#Fw `.#W=Y,`+ `ɋzh[,372\HEE>SM>^gϞꫯ/PddƍkFǏw#W_}EDD^kuVEGGkĈ>|Ξ=kk{ճgOMОH.]\?͚5KԱcTTT=N;s3FEEE:|vء+V8766V;vhP8ǷZZrU^^|7y|04k,͞=[ohBB~B{kw^رcӻkkױctiv hŢ={\˖-ȑ#u!}7ڲe|}}%9vORRRTXXnMVAwǵxb=MU~~n& `\йsTVV2՝NӟTEEE*,,ԩS~zF6lМ9s$IJIIi믿^YYYnAK.bݻu7knM?%I iGվ}L4l0SJJv*___ƪ@s_\\<~[ﯼ3%%%) @ժu4|9sF7|Μ9;Cjegg;K#.S>seff?6mڤR6mرcեKIONLLѣp69aZn%ҥ Ða2LS&I TTu4:l0?^onv}ڸqfի$Xv(~Gyxx0fs]̩S+ĉ?ft+;;[񊏏-kN$d[R7JzG%ύSNiZ~w.ժo]=z+&%ի&NTQϝ;-[صP׮]aÆFǹXn\6_M<١<<<Eiڴi^xAcǎuM7ސ$i۶m1n8s=:GGթSw1&I&I%%%꓿k׮mV wVUUgؒM] )ʲ}͟?_QQQRmϐ.--K/۰0̟g߿_{W_Hf6>>>z饗"___-]?OOկ_?EEEꫯ=ƍS.]4d%$$ȶ_*~ɤ+Wjź+u-ؒwRkuꫯVHHVXp_Gńh…ӰaÔӧ7+zK1bkn_kwu];z饗xo eddh֭ UPP:e|r0Tee&Md+;s֯_"_3fP.]-FHX Rrr]YNNdXle.\кu딚ɓ'۵߾}bbb^{Mcƌ@Б#G۪Ut6UXXǏŋO6677W馛Z:t ^$L)66֡ՒWΛ7O8pƏF낃cܹZ`zqvJNNVjjݪ`ű:ٳGfYSNcos)''G/ٳg Ðbi2335`mڴIPee6mڤZ[j%&&jZpSb΀OOOl6l6voxxK߯hĶyf-]ѺիW_tuUUU)""B>$XN҂ ~zu]VU~zW^y)q@gA4(>>~&I3gTzzfΜ"}Gz7WJJRRRZ?>>|^~e-]TYYYWϚ5KzU^^.I񑟟_N-pASLѮ]4zh6>R)::Zwq(R窨PFFnݪP)((H>`{+pA!!!ZMVTTmeڳg<6藛gٳmwMacDбOZh6nؠd2+l4+Iڹs&L֡.#Vk-h&H  n0.$''G;v+3 Ck֬ɓ'me>}O7|8O֧~7z9sh;wsOA_|/_W_}U'NhLmذ2F `\HVVrrr$b+p֭[TM<ٮ}ZZBBB{キ>^fYfY?p{dyVm}Q={VÆ 9R+V$UUU)""B>$XN҂ ~zuIg0.(!!AiiiQ|||֪B2 CeeeJIIQJJJGEE_͛jYƖ;ߗ_~YK.UVVVc5vL]vi lVu)((H; ŵQ㣌 -ZHW>}T^^{C `\PHHVkp[YRR ød\:ΖHJJuuUaaam;{l͞=2F+Eiƍ;wj„ m]Ӽ4pK& `:`^Ҝ U `p$M `\HNNk+3 Ck׮՘1c*I*--w-I[m%̙3Z~Կ͘1C]t;Μ9stQ 0@˗/ gfڽ{oWHΝӚ5ktI 4HӦM<pIܽ_Gj8rvve׿ߋWXgyFݻW#GTaa%IԎ;0[ݻkȐ!Vff|||uVu&??_k֬;C{)7s*((H~~~*,,o*#Ghܸq VLL߯+VhĈ<f h\ɴita6lؠ[nm۶izו'N(##îMtt$b-[?{n" BaÆl6fcС~~~իJJJgkDFF*$$bhڵ?TZZ@GB`Kh˖-JWnnuM79%UVkm[9;/,,TUUy 2DW^y&L@"ū$0)???6{r%''+55nUmk?^;X.\TBY,M4IO=,Y@G `\L\\gfM:U{ˮVbbF :-60dXr߭[7I}'OOOnӶmۜz|X T||ff~fZգG+NmZtiuWT}Q׮]e2le}  `\PBBҔxYfT%I>>>ku\)))JIIiPee PYY<==d6mV\'boƎsw)Sh׮]=zWQQ mݺU RPP|6y֭[ :v옂'ɱ/^<+<<\&ɩ\@G `\PHHVkpI5 ~*((hu-to߾ڷouUaaam&tHSbb-Z76zzvܩ &AtI(88XWnq4'Fp7A `p<* ڵk5fJJKKuwKnVkI׮]tIEEEoq̙GjZ|e83{OݻUWWΝӚ5ktI 4HӦM/յkWM>]{b0.$++Kve999JJJb]pA֭Sjj&Ol;_W}Wz5bٵy衇4n8m޼N"d{ 9Cjɒ%3<,Yɓ'oզMtUW/.0.dڴi:| le6lPLL>|ʾ }GZrvڥ;vm6'фDkٲeϟ?_qqqڽ{x eeeiĈ^xAO?yZJƍʕ+/WHB"##5l0f[lVBBcxxxؽjujF,֮]竬L m-__6: IHHl}ݧmٲEl6kӦMںu{)ZJK.mnՊh;C~[1b+88X/GU\\Ǐ+==UwA$L)66YcX,UVV… {~pk?I'h[… \XX(ŢI&驧Ғ%KlL&uIui[@bTWW={l6kԩv:;bʔ)ZdTZZ~)!3tMt}S>>>۴m6I d=Sz뭷'(22R/r|X T||ff~e24rH8p)m޼Mӧv*d+Gٳ6llȑZbE 5FwIDAT}ڻwwi}'$*//s=^.-pA!!!ZMVTT%o|Q8qBeee=WrssUPP$0.Z߷o_۷O|vO#G4Z+Eiƍ $kΝ0aB[bhrY(88XWnq4'Fp7A `p<* ڵk5fJJKKuwKnVTee&MdW>g=zT lw_^EEE߿f̘.]ص9}vޭ:7믿ֺuԽ{wr-\Kc0.$++Kve999JJJb]pA֭Sjj&OXyyy8qxu=ƍ͛7;Я_?-_\ztu4(99Y{o۷oWLLki̘1v3# 6m>[ن ahΜ9JIIi8ъr^Ͱm6m߾]u eddU\\e˖5ӬY7Z \ `\Hdd &l+3JHHh8+VА!C4j(gjCS^TRRb+THHH}e͜9SiӦ3# IHH%4kjje˖f%?ŋ'tzlVҵ^WaaaU~~n&j*22R|rss#G4;`/!!AIII2 CSllΝ GNm񊈈h.88Yc+99Yv/ZRy)11Q@g `\L\\gfM:UeԀi&8p@ڴijkk[aX,~5Guu5zh-\~s)''G/ٳgЬb0.S22z[UU=裒b:uJ ,ս{VŶyf-]ѺիW_tuwYV~ѣ^yf?<<\%IWtttwE4(>>~v_~e-]TYYYN+%%E)))-of͚R*//$OT[[ UVV0 S2L9s5sL飏>қo-pASLѮ]4zhw8NSQQ mݺU RPP|A[u)((H; }Y*,,LѺ;.Y&egg+**JϞ=[gnPV\ݺualt6ڳg<:;LX @D-ZH7nlvtܹS&LhڞdҕW^It@Zzu91`0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  n0 &H  nKrvwtjTYqh g8~zn0BY4 &H  :*9yЉ}0JNHA~{q `?^ϗw:WxwPpΟ?Æʊoظ1F;vkWtm0Ҵ?G׏511-eђ4mJh3}s萾OUUU+WO 6T!}= @;r}}1sJO[bhl\zLwvE@ @'Һ!YJ 2Ydu2>>VU'3$D ݸZ}*=}Z=hLEJJ.]̙2IRPPc'K+<ѶX,*)-Up`۵:w\ ukȵrjթR -\ҙ3gnW\ѠoSY1[oJҬK۶gO+Vx/~8&VO,$啯^idrle8K;!ðHJgT yztQ"X֊RUޠ=_g$d[r$) UX޻W}##t);wwݓ$I{r4g~8&V7NH]趛oR'#zn2-ev~#Z'c' ԩRM#=6<L_}PRY1>aNy˶:p6NY;w?gׂ??z[%% q-]Ņr* G{h}ǐɰzAoW[CkLu*Y]-)šIӸfdQҌڳ7G1Çkİa "ժ٬{$կܺ=K^zΐjQW'I/җdDqO̖WTM:fL%~?Y}_{?jpПZ'^`]7[x 6T]mmVv~2^]!=Gsof<-,wI7*o&}cs`5jj{K ])]PZ{Gݺ0 ϖM'gn~SmYM;sS%%QP[YXEǏGui @'Q_Jƙ/t󨮯2Y%d2<*~*{=g52Fz={5rp]qEWEkW/7OIÆ 9)Q=$Im7h8xP>1oTAgn]7NW``ֻӧ0'=WΗ_*it*)щŚ0G۟g[cw,yٽ:q_\Ӓn֫oi/I;&V&II3K"#"72Bصoɜj3m C2 C>[s2L KY1ޕ?x555$??_[]:W~Vgϕw:w F N-@&=]=8~ 62ݮ߆\a{KW\:;O/ow:wL2<$/y\SSOwyt 3@2jdW]qyt)W}{G0:^'L>\N+IbP5r]>f0? y6} xXK~7kN($ᶮzT]]>ݳi|&ݔ8CKߟSߟCIIuBo-;<.G@`!]u啒o ~<,aDZ|1y"WACd9+@%[(,4Tj]wޡ[}Oq3O^7%&էwo24qTQYiQ_L>>*=}ZͣUe2VO=%ݤ=CtTe/j3yxȼ3-eVzԿ_T ǡk}͡8v?[k׮:UR|H}hܒ]?em$Fh}|}uw\w?i3oV//If„1~6mJ 8Ja\8p $#T0A/Z2QTL$sc)/d"u'&VbBs={_y Nv*+r *OI^ͯfXt:GàsQܧur2L*H`0"ꝴmPc41K~q9|6:= \Z.Ŝd /ՊXdDCFhݟ\3pssJc2֒)J2;,FvMRez*wrBamMTn.Qs?Pllh@ݱ[w3gb0V̓C@NNzggrX~h: 9ժUQ0mvif]2ssĜAĜ" OCJJ*W_?S4hj~ܝ%шLVxd4)((7kN_UQF13gh(777Z- ̈.K'O%2byyy}fF=J|֯ڵ|J3cLrss=_Ixx#`0̙5ׯT% <5de… bkH~~C_5 序(u)ҨQ:uDWR.KWx~dnd2Yń 3?222_?mڀիW"x^z F222ʘdJ\t FL&=yyj.\#|"FoO y,6r9r97 ڽ2O!9% ȡC!88NWb5++ȿ`xxxp:۷o!??'ϒpFę3gt2j4 763޽BAfMرc'nDD47{2|6mė_~͊[[!ɋUTkk[r4mΎ;i{{{n)V(JN'ryի8} j7nL&kl-:88/qY  6fwcر# ^O̕1&4ii4huڞׯ3vXVZ]%@ff$++  BӖ"/IRR2/ӯ_<= J>}YjzoʓhʒF 99e˖ШQ^с T*}t9RIpp=bbNs=xV}@nشi7nƦ|{ܦ;+K, \p(l/[Ry>#Gښ& *VއJeuJ;Awce滄 1cec:u^ ~u2o]}9o`xx# +ٲeK2hڴUq or&33 oooFQ7CJJ .ӿ+u˛DjJ/3f|\I>$ccc-/_J߾/!prr"%%+P2m2?&L&?Z,V='Ԅ:Ѷ}g _Q.'Ņ99l:wNfPPǞ=Q$''NCGя< FÄ ڵ]>NJhY[jtRh޼r #66C3w\4 ɫ d͚_:tJ~o_ФI;|nNJ~``РW9r0\xk/bgDLV^͙3gXd ..]c+*ĉS,Yww7vɆ fΜINN+v< ŝʕLꂗ(V%G^wmqDG'''#F}S|h4Z =KodSROƕ)J2FvZuDZ~X8;;DZMVXm\\8[o;kuz8{49l_@fz)qL捯/-Z4jԈ!CRшǎ#h̘1_|;w~>sA`LJj\jsj߼23̟׮]CE1'`kkFVg~|ҥ'Acǎ|: 9EVKao2"߿?:]>]vСC9sketؑ2i$VNklի{x~o + ?trrrEԭ[.?ڵ۶mseoʼ^RSSIJJWLjj F//o|s!+Od>U6NJ",,5Oag\ժDM??aQi ??jCR{ NY x!̒%WExOX[[agg[ro4VZcnQ÷;v&'hDVT*U&С 666(zq0loΎ &иqcrr2URXf &_|ݻqQD^VrIw2 Lų믿Fffshذ3gB.x޽Kbёɓa2x饗ѣYYe^ڱTŜ93/7o>?gy*BsͬY_0u49sNNQni:TZP6{c\bP(amm˅dT*Cͺr_Ғ׃EXX#<=HMM_PVa/,WEFIN^[ز Ƙ&NWw>_āϐg ?<233t١T*777͍X]KfmX)u||_{ d2T*G1\NAA.JYd"?_7J&@VV 2 oDFFR}x#ɹ^g+K&ammOffc0PH2>XRSѢE;9w.48&Իn˺XJ:6++Z| ^^^( 1y*}Q*pu-BזJtt: nXY͸kQ(89ȸqsBaEAF|焆$tΟ/T< Aٳ[~|;OM8q~~5ps+,JOODsx4ήK"۷GQa:˗IJJ"):I7L˳t@-x{{JXX-zAfJ`L& Q@.WH[bio[*PߥdT>WV^&33{]c)NѨ'33A}wnXJ:6NLbqSsK:%Co/bI/{nnKc0Kx~ VVRYddʊӳ/He)trr~`RRnR{Z*ۓrm^od zUsf* znLC.WVA.?Ijj꿣 4- /VۓcK[RɇNβ\{2%|,T#owuP`k+&L+ ''GJ{tmwSbqA]fZԨ Z$\&//=r9.\: B7`4VuUr^zR١Vٺv}`bdB9p,]z!jͷr9 NG^VB.GGr79p uʯhjp}x'ٶmIFNʕ+kmۖ-[)M$ VN|8aaaZdroooi֬Px9v۷oɉ_~۷Y nʟI51bsaɒ%4l^= BP%k׮1g~ h֬'NaÆ(\.' ^x222ػw/7nd4lؐ{}Jce}v֭[G:u7`,Y}{nUԡ 鑹!C_~g=Juuuge֬Y̘1^}Uԩ… R()@dd$#""͛IKKc$''JUDO2j?z?_ӣGrc:u0rHΝKHH#GQFlٲE$P(X~=M4YfaoooVqFщ^ETlݺA>}֭[jOOO BnX|9;w楗^b޼yK^QT<( lmms4hЀ={pEx'qpp |{=[ZjŔ)Spvv:rQzuƍlj'Xp!,Y]JZuҭ Ɔ6m\.^Q"*ҍ7h۶-K,aȑ5$[5jԈYfDOBd2ၛ T-'Nн{wr9ӧO[,ޞ#G|3gXht&T)&ɜVQݻ73g$--B-I$c2X`m۶-vO>$Fe\x(~w󛺤*RΝQ*oA(/$۷s%:w\.r;XERTnݚ H <,I͛WݗpRkٲ%ϟ'>>^8AIڱc);33~3g;cNf&[˗cСT^sAbbbի +uښݻwSvr-[*$-ʽ\VKdd$ `ڴi\~e˖GFFbmmͰaOHOOgʔ)h4_k9r$g>ɒkɓZ T O2j||ZP+ミuҥKdddӶm[e˖dffr2#++UVѩS'Ջ3gΰgϞr?.GGGRSS˽\A~`5 E8wqqq bС}}h4<<<=޴iS֦M=988pr-S*J'/?V[#ILLDRo/ɓ={p~mp֭ ** ooox,<|7bv[\x+W(u8P$M2o?+WJJW_}E۶mz"U^lܪC8q~)SeڷoO=:{2L>|HΟ?OǎYv-aaaR&'(\7dȐ!^?w} ڶmK6mPTRYLZZ{a$%%ѽ{w~gZj%uhP$SDR1h Yh-bŊӴiS7n$%&&r=J\\ : Ib=RIRW^Ջ7nj*+AAAԭ[zS`4ILL$66sq9nܸ=z/K.@)R{իWgь=t6mDTT{aF\\\Y&իWgggpvvJBD^^P8Z&##,HLL$))WpppE 6.]вeKuA#dnF߿?Pӑ#G8sg֭l|||m۶FÆ ɂ T+:uSNwlKII!%%j5:\r9d2\\\pqqz£J'#lA,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,X@k^{ի$%%@VV rssQ(888 qqqjԨAZhذ!קFR ˗IMM%--7oJVVjV @vv6&\7pd;;;www<==]6Szud2,*d222صkQQQDEE^GTデNNNJB.ckkh$??ɄZ&++dHNN&-- (\'}k׎&ME*ׯs ϱcǸxya?kkk<==qvvGGGz!u@ףh%;;sq233IMMEәDxx8aaa4jԈƍSZ5iNLILL~cFjժEpp0ڵN$$$pbcc>}:Ǐü>w.#*66ݻwgvɕ+W5_>;w6bVd"==ݼk8u6l0HѦMO@@@QY?>/&!!@zMDD#ݿaeeEhh( 8ׯsAO;իСCyquu:$hFAtt4;v]J%ˉ ""h֬YCF=z4SL&ٳ̞=_~;;;ڴiûK͚5"|||}{.\`ΝL6iӦ/uҏL<rss:thN0kҤ 3f^cw^ê.\@>} eӦMk| 8&թSC2o<^xV\IݺuyILL:2I&!!m_0dNa=LFΝ={6^^^k׎ӧc4RIMM&$$2vXK:vޱgeܹ1Sn]}rrr*Rtt4]vΎ3fTO/gggƏƍ6m'O_~ARI~Wz-L&C}) ڵkO,5O2???&Mđ#G߿cGIVgƌ4lPI-[dƌveRiZw; W,Ϗdz~zf͚%u8?_UVL<'''CzUV>uҵkJh*;yܸq~[QR _~L<HEmܸ^z;2lذǢZT*.5{U$g~Hg%447x^6?^z֭[3x~P(xwgϞ$''KId21~x6lH-GR2si-[&u8NK/Ͱa,`ex%kkk`4ɓ̆ 8x :21 ={+Wrĉr/LJz)SŢ~ywQ*-7Dk˖-L2ѣG;|0oft=xX666ݻ~IX$O2?# 6vRR&/^$**<7Hy{g'**"K!--ٳgӳgO,:`2%iJSUD=xXtЁ)SHڪ4ɤӮ];)ø/AAAtբ%88EYt?k <͛7w|Μ9( <$n[XX9岏W^dffJZ$M2w`0дiS)øoliڴ)۶m~Gvv6K,K.xzz;~L&=rJڷo_WVD=xXlْ+WJgi޽ԪU ;;r/;>> 6K\\>رc=zB xСYYYZ n޼Ivv6#Fѱ,MPP˗/'>>R]J*Jz=,\o///K>}hݺ9}4.]bСDGGF!##üMVsAK׮]i޼9PzvwСC|㏜9sooo BݺuKnu|7ꫯҥKJ%Ν/[nիMf͘={6T--3gX[s̡C/o))),Y"qhΐӧOW~Gl,X6mPn]Nʅ wASEسgC aӇWgffrUN:U:k.6o̍7?޽;#F %%>XN͗XP@@ժUHhذ!FGZ{4ɤXd` ))K.3(l/Z_~| cǎhѢcj2Ϗrnlll!55B΅ 1cu֥^z|n[Kҽ{wscu}}}ͭ"w#'<<L+Bhh(-Z`ذah4lrטVwtB`֭N,{e֭уlj\\\,Hd<<<,6WѨ&M/0}tf͚-:k׮1ONCT駟dnHq`0G.],v~233dt2`H`` }žGS\RSS{Bꈯoh4n7Le;ԫWM6Cf+e0Ǣ)}2k&))tk֭[3}tL&111˳UVk^th.\'} \q&}eTtYÛoItt4ϟgԩsb^Y}{h4uHi222Φe˖%nӣG222_J-\t HppESILV8E[cs%sssΎիGxx8棏>aٲel޼wvIBB;vիdeeq̣'-9T\\vvvYlh4o6"11sU6mpqqȑ#Oz %**͛7h̋eggwbo:RDכWh߾.YinuHfvqa%[G$ӦM4 Ν+r9~)OpBv-RdbҤI|G?iŕi&^oNZ-ju> 9l08@nnԡTQQQXYYѷo_C)し% W^ZULL 4hQ曒.'y4hoRR)ɛoᔫ3gҥK ʽ?661c0g-ZDϞ=˭,\kkk>C^$O2L:"w5;2I&IJ aRS͛7sIF#=R#:K.1|pvnܸÇb W[Çcԩ휯qqq_s>K߾}ٿ?~-*ʢTΝܹs7|S.**ӧӾ}{^$o@݇UVիG|7tڕ:3g b;ڹs' ,'N:$(wYQ4FJbɒ%U~rmLπ믿g2X|9cĉ|'RdV). 65r-ZD8q"W\aȐ!b󉉉ażRTLJ2~!̝;lt"uHV5kׯ_^zRTƍG 8p Ǐgذa4jHc8::sNEAT$잞$''3`*Ȝ9s0 ٳvʙ3gx뭷1cڵ_~IZu /_Ç>|8fͪЙG铹?.]ۙ2e )))RTn'|Ljժ믿vZ.]ĨQXr%yyyRVM6vXٺu+ , *q0`GER{KPHMM/`۷ RRRXx1#Gرc̛7~iCJyt`:ә9s&vbРAԧAA7uGrHER1vX w}7|_œO>IN:wUb28s 7o'o`oo/uxeV w3h yMFhh(/bN6lڴ2e cƌyҥK駟Xr%5k֤I&4mڔz=cL&/_ѣ;v/I~LF&MhҤ &M`0pqˉ'8v[l1O_ggg?T*\/vvvd2s݀‰Z-999䐛KNNpMRRR4ʓO>[oE6m_>ry[.DC aȐ!\|#GcILLIr9ժUVZlْ_ H?. M6-ƺ^qetܼyIDATtz 777<==  "" ! H(%rIvE/tEj5W^%;;lz=999( P*899兗#٫/GTRleeggsM222ϧ(\ d2LT*WpR{l9-!TNNNNY6qxA0"`Q"`Q"`Q"`Q"`Q"`Q:NڵkY h4{d8PoAZd֬YS^E P>A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J$A,J 7G*"'p)h9m;˓4 A #\޴X,~#z+*+\!W责P5bJ\8pasD~UM3O.xCjzժsouj㫯k@Q4+#}|Za@0Ap(ۜ@ !7$I,P p"I l U :73X4M VkX#Jƀ a_zo$dmd.mdL ]+ .cm)Et62XuJ!_uJ!7rik[x[r3mx̹/2&It޷?3==*2bt۠i%4}x}ʀ_Gk(CouW"5UeWnߡj-]'!rnx4M$9V M4M?>/[`n{@Fzk{z.稷q"[p>{nDrACsK˟Cԗ[~[GYTrk^76[SW5kj>)\Ֆ]= d|aZYlVBAt}CcCcT"IKIdA\.+oljxdZhXvGZJ騪29a.+ 59I,QUZ^>yRvUuMOouQakj8l6%IJ*GHR[Wbp鴽}.t&'YmZNfN "gb.pð~$"QCccoo%NjEm~2dT (*0anTT+ RRT*egW"U|RaQCcSGGgfsZjXXLxT6][WrO2L 3#g}ȈefE$(ro 's8]\.$I>'J8{oLJoyfm- eP@]}F}}u rCy(\VwLf4?.-q ֬xWHܱ{OqeZ]ZV~@Oo.oݾ4mnno_H$TURյ~)TSWev/Ah/}c@RQT|]n *ҲCyGO޻OPlw9H,It:H|ɣOꚚ;wz+*b.^*,-+iR)u;NX[eR) i@Tj5Al>EQio4gϝI$b+"ʾr7|948 P/gϽ~ 5ݰ'PfqlH413cjNIMMNi)M==}u ӧ.^8{)ӧLNRYQvʤY)I/jJ@ ĤPo4訨#1ӧL0,:*r}ϙX Sr&EFD54LOϘ>52"bƴyǎ{<@ZJJjJ)+<^@"000)a=}'O^0wNŋn㩔J=nLVTTX6&qBL& AQ.(>.ttt"p{8qjRRI$`OlS].'@&(8lbT*J-kC ˟yr9IQ[O?\&RZmm?ή.ǣwȘ2Mn;H%1 <.h T"Aa\ P1 vGމB`C0AMM .Ela GDA:I%0Ta͖J"AtG6P1&>#- IOq^pǽ^L X"(v76%bqNDA;t̙9r 6 ;~}? /ZwdRS˟hikD~2({~˽x|p 6e\n@( 80 e]($  tuw8's===a\ ?ooZ”I+Z1 DȈvbnzGTʤ]'/gXNx3G( 2 斖on{!!ԔW\ɓqEӴX" hΜ+p=%d~,KӲl91#W~RYٕ+a!!]ݍMK1bm6+r!GdiCcdjmm 0l67u+?Y&^.+r88e\J|'6%pmpFIV#(}JE妯^z9JoA"XuuA:-zq @boM_o>!Ja(~gٌ0 0L*,[d=SS&x BQ0c "0EX, EARlBd~UPaX,y 0 r8O/_gm;vq8M X,aآ.^0ŋ|H$z|cZ5+&B5@fѼ"Щshl Lξ+UU6;)aZ`؉|~J9!.7ԔիhPg@gP\no_oGGH$T.K$awm;w{dD,Ԩ?gX2R9d @~-鶜8uf-wRȍ MMGO|9vŋ+Xh4VdgeήʪY-kyEE\l 㻾Aļ_Nʞx肔P.;(H{ l64pBlZ]]FF/lO&dOԨUʪT@ߞ-8\.'9)/j7۟|`k04]]]룢ikkۻwUjufffggg}}}ll5L&_|qʕO>D(yFСCz~ѢEWZmmmf0FPH$TWW 6O6mڴiuuum߾-..nɒ%B0??444̝;WTڵg}VTN> ȬYϟ?}ĉϷl^7**kɒ%<3n*KKKCCCIJuuuf"Bpʕ̆@%%%(*''g̙{HiA:-5\9@fsn@aq>* ;:;釨]8o@ʺlvw豘G<(($=زu6wt*P<=3rp&NX\|mBaVVVXXҥKWX]^^>ٸqS222zzz># /:;;)2 ^`0DfZ-Mϟ7qYY7,//ONN>pi.**zv\./t?PQQo޶gϾ曑&L?xRSS ܹsOtq\.\p>#+++##wߕH$^W_ݺukXXٳg7n_}ߗ74tPUPpϊ HN= h îA.ne]CP(Yثs:7b R˹ x8={k__|FaVpCyGʀ qM0޷_IJH8w{)$A;?13.o-7/H$UUU\.7888;;EѪsEGGNxHLLLxF1$$D[,Lc"##cccIHJJo Cvvj_oڴ)"""##`b1Y, ZSSSPPsx<퍏W*%'''<<<22̙3%%%|>?##)))L&V z{{Je___BB\.jz۶mV5333*****/**qV/M]]]999x<VtBOx?hqrbc^*91 N'Dž@bV+þLO M1=4F cr jc b6].cQ.|؋ޛ0˳mV 0vt@\NIx\6bE$Ixi^ ]M ݁( pzE$a,b +c'2dDd @BA36e U :Ps"36v@ cE5; ?+\eR\rYyjG@%??_z$1d`m |>ETwT>K_ҲT-S4}0PkkA @m@Ǜ~!_U [i‰Y`?NGD=ppIi۶[V@`͞932'h.CQTi]&6:^{$~afi:>?&yo~5RmmϞ{%~~~w:=*hp0_ePImg4e)aԎy'Y,Do~°Ϯ̙EDOo_fzP hE媭GD H$& ǽ*^HdJ#qܫ9^/r9EQ "b d0=}IIZt ;B 8wuw;].D `X^br8Z%!vwSf3f)F IDATAIR}P_dfف5Eӗ0J6fuvwSrdr:]@ѨU0J2uJwO&f?UrT",ɓ.ZV'[Ѓϴwv*˵čt &I%{k2x|ǃϟ3; ?il6 P岲Z{{-}Ϗ+K.?M|>f͘>-!>l .E"P013#<`XLh{s}{ZŢ̔I6c"Ѓ:-%صŹn[ԓwZ˥ZU ?Byh[^Y+A\ӼE]D8Ӳg=r.^8a0* PddZ'%&/\V~U+r .^?gəqM }cW[P/fed:sv̎+W<82"C.i/$'&7u y]zhIẎ4M+.^k/ݳoҒ33̯ol~q?[s oni5dyL\*l7% %\8+n `4oVbp8Vg˶zNIBC| F OBZ;:++?]AәHirS.W?&*ð9=|D!=ŦM\v(N,4S\%2aQE"A՗(JޡQdFMQN'$)\u.>.vtvܳmX1ǽIƦА` xځ 1 a,AQ/.6VV-?avS3s&W]Wt:753-r^,*02W)[LO}Q²ZnMSd 4^+6ϡ?Ѵ?kq9\Ο3O&>e.(͙5`4l Ѩ\doXXEQCP{##"f7-<,tdefl#"<`XQ+AZ]B|N:XB|\FZjvfk"aNvvD`GPtڔc׬@fڔ߆FF۰xDVF6Kϯ=rXHAî$Ȑo|hzvPԛ\*% AhR7Ppt(${DOb(t:lD -fGgm`7=y<^[{{Nw͠lX,V̇F)PfvzqtX,??DLDh9lNB>Oή.xpl6. @QdX4%bv8LvhL'(yޞ^|0z@ sU8 ;/+=uhBdѳ^B2A fbDt1c+8d@ 7ʨBѴOӴN3 "s\3C ةtVWWvvvvtt8.Hr97 (T* 'I`X߿qFCBBネ\Ψ2*mnn>rH~~~{{; """:::==]PH$i l2,KaaSBabbٳSRR` rUJQTOOϿxyd2X PkCD$D"Nx<&ܹs۶mKNN~Ҹ\8d<#*x<ذa^_tiHH0y \ZVՓ&MZyyy?|||/WB~JJV'|rcEDDQoj$Ƀ>8qĢ_=55gILLϫ|ػwovv>{%ʀ F3g΢E_9rd څ@ƀQK=Onn[o/ Ѫy`j0^}Ջ/ZJR`cFG^>ۼysjjjNN[1 KOOW*6@ #dtF~5kO>e1 K.mhh?3@ g*(*77/˒%KRRRX^t j+ ~mdT.]z|Чjժoذ9pp߳gξDi-((u4MLK.544,KqqHң)}566dJ222|D ӧOH$kkkS]]z;::vQRR2;ccc-˱c(I=ȸ0"YF*FEE `L&n)H(bQQQ)ukRRRK1z qa*8q -04M痗3!/4MY,agENux+++$$Z^^>"0|vww\!jnnܼy3"S\\p.\cF7 {Z,ֶeFՃA$)ʲ!Y WiMM ǻ5dffN6 Ax\ VVWW̜9oZT$Oovvv^~B#JysrlZhP^r&モsYbŘeпs$iXΝ;wSNVsrrFgiX,ϟ ٳn3CƗ;ǏY3++_vml6fK$RzKKK߿aÆ+VyƍVX!F 6T^^~رcǎ-ZGUWW|rBqQ%&&~'O~q-z{{o߾yf裏TH$̸jzժUZFiW^X,˗/jcAW,'''GFFxΝ뛛  y慆^_D"H$6ҥKںu?N0!0^J=ω'{+ H$Zxqaa_ڧ~ZT y^~}^^ޤI&O,ۨbf͚wС+W>˖- oEFM4M/IMM4ixcl6;33SPڵh42$w'gZw[O(%%%%%eHI,++۽{wCC0q077ooϟ9s&a<( #Uݽ{V nEQV5//f\Yl909 ^uT:) ôZѣGF#>jiim8/YdET ,^h͚5p;6T555/^LMM3LXL``P]QQQE卮=Ǐ[֛s3goݕD"DɃz999Wnmmn{fFf m6FR)TWWf].Wttl6-%%ER8^\\LӴl`5hhhhEEE\zuUUUrrrzzܹseWwް!Ռ˗ y^&pvI+++ryWWWgggRRRxx8a83^($e2ԩSRiqqq__ߤI7/***** AR9|),+ !h4Ru)Ǔ߿w^͖_^^-[m$ :p xUU֭[?K.ݳgO{{;w1.C4~/Hi$U[[k6#|>?22t666ܹ̙3bð 64͜oVVVBBÇqqqjVTT0-pt c+**!wATWWK!v 0L===z>11uҥ_\.w\.W&1C>x@uuu8ߏ8t Z[[wdɒٳgs8!U[WWWTT?DGG3Z ..ZZZX,`1cNjc%%%A]|ҥKw߭(*33Λ7O?mhhijj3gΐ&Gն:NY7fD*%IG" 5Ҷn_Ξ=;!!0FLFJJJmmmaahi$apzQ\r***Qlulb0چ4ֲ9 Dr8Çwtt,Xf8 YYYhtvrEDD={vʔ)n[\r% ð2477n;͈T,u:C-۷oƍ+Wqt:N湈.\PSSscVTT4;Zvٲe:n `4ۯA EZZdj槠if%Rk׮/rժUW4bbbVkwwf#Ir޽/_ k׮-**R(999ǎi:55uOӴxcMq3"bVkjj^1yz-[4559s&##NwL8EQ8|r I&݌<{hV(K,9sfwwwIIIGGB|zرc%%% L6̦0}5EEEYYYoyf n;((諯>}:AfiE ;j5ATgTTT~~~UUŋz^3$$d0JY,Vxx}nr555vBP(BCCCBB9RVVR5MQQH$"I Ґn6m4/W,D</666**j֬Y3gjWAAAZ1!!a&$$$&&~"0d6VkkkFJ<`00d$l3gN___AA$I??VBA|ye05IDAT$MWr&|h^ڴic31ZdMOOW=NSRt:Ay䑖T,XaؐFYEե(\/Y$&&&88_bM:uϞ=A ^,kٲe $I *J \2 N:(:k֬@Á\.gҎI$۽vZVUպ\.@t:\H$BQ(X2-NdBB#UiDDVmhhPTܘU5|(bccd`\!pQύ6H-[VPPp%K܉ZxL&#B(3f\|A]3eʔAVK~QbEDD\=H$}}}j2mAX,VPP^tho"""VXq%U{AUVo۶m>j9r^z(N:u͚5/ի.]:ăQիWϘ1OF-s=DZ/$I466н'''"h֭mΐa&iׯ_jpdJ_s8-[Vr>|8??w]jj쀒oGGGoٲ%//. 6L|ѣGyW_}zNb@ [٢R֖hV] <}mmQ)q.9a_PU͛7ܳ˗V;l~֭4W(mˆAsv^ׯX,{Slu:j{憣_9Tz'Na6=//$ LÇ< %$??,y</\ ĄFgE$@kllvlaϛL)^p8<99̘L&^$Ivd2ݸqy涗\999_|g4~Uղ()@ x/^p(z۷o6`0 ge\l:T,_,[H>b$)uVQz}QQhL$^ٳ6`_zp877rt:]yy5)l6[__ϗO>h***VkMM {TVVt/^5^dEe%K.EQ޿CAUUeY3g\rEW^UWWhttt``d2(~A$ey}}}zzZUU߿/tIlݻGS]zӧ[ޓ}xcܹ._ɓ]l})۷o 9T iGw^YYym0nJ!- (ưp:T @*PJCY&7TfsNN^of8j*fl ,ұh^^ F5dG޻wl6%7Ir ϳa&t]^WWv`}K^;J?`0pPG BԡRP)u:T @*PJCԡRP)u:T @*PJCԡRP)u:T @*PJCԡRP)u:T @*PJCԡRP)u:T @*PJCԡRP)u:T @*PJCԡRP)u:T @*PJCԡRP)u:T @*PJCԡRP)u $zFIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/eclipse_hello_paje_trace.png000066400000000000000000007154671507764646700270260ustar00rootroot00000000000000PNG  IHDR$#sBIT|d IDATxwx̖PB/#"@QXAESZ)v+V"Ho dM6#d6ɦ#awy̙s(=4Eu]]:VZx< C_!B!B!B!uR}ZZEQ(4EAQ4TU-IZoB!B!B!g0 ^^'{* dnqRXu'=}{(h*h^u.B kr&jDoo!eh!B!B!B!'a4 `(7l;\QWPυw&83\Ѣ0'Q]X.m:}p4f#Aa0ZoEC>KX!B!B!B`h45vٿ&K]: \_ѳKnxrXG>QMl;$%5kN>N ??3!tŗ}2IIX!B!B!B(LƪKHrR#Udo٤$B!B!B!8?oeP<=%}N/5?;;M'/b O×nw.V- ]eƋwT&[~]I !B!B!Bq[~F).V>[ui_vY#.OѭIlϻq]Wҷ%6!wi{XIw]o%+B!B!B!`04vfŊ?\fذK,kZ",,r222 #00ܶtdG=;>ny>3rCqx,_!G\*+*B!t2yL&o9éWkk~G}OK(5ϗ#tnE6"BCC ???Nx\$o/B!8) K'%%q!ڵkKLLL}V'EGpٳ՚Ǟ={ڵvm6DEEʑ#Gҥ+;w[alT6=D62WHՎz;B!BQN'lڴL``ٍRm]ˮV]7ޘرcΊxӬY/cZKMb18 +[7%(u+G׮]X`Nµ׎.^I_!Bq~SUs=z믿Eŋ`0]֜pVU[,Ytڍ/N=O9r믿4M'++r0rrҮ/D֬Ve\Y!B8q$:u$&&>>7$$lbЧO<uvރ`{nƶ@4V^`OެX0z.h4yh۶ -ʲaF, !!!tԑ֭[f||/VX r;ݻparss w^et vMnn.m۶!.{H#Gs. {"::w/K㧟~trCCAUVf"˗+٘U񨩙3gҡCf3>> +[ȴiOqn{"5i[f?RC̙3d|L6( uB!$Îj٤d>7LTTant9;Yc=i^׿I`vI= ofѢO扄ή];֭A|Jm8ԪQE0*& ˯[B!L,&^z72{,xAٰa *~ ``)rM8AF#\Љ;w1z.U[o-`w8?o*II\{gMʲï.fqb0hӦ5 tHFF&>>><EӊWm޼>wٌisˆpM3f,{S|'(REe-~Μ*<.ի+q:OYLj %//+,l] 0$$lvڞb&xl檳խ[7:wС4_}?a.r/B!e9))):toNDEEHDFM4??Բ&}}}iӦ ~ 7DTT42 EQ&%%O?]H۶aE72K}cNݻ F}B!Bx2`@?lI@U}HL܁d"!a۷'+ d͚?gke׸i$(|j۶xghu7g,x.7| U_~&Ọ>J^^. 7!//(Ljzm\yH&OM67K8z~M,])SG2p ?ӽ{w6mDHH$7{;ߒ%\Œ3|pʎGV>**Xj^N#G2o<&L}9z499Y[;*=QQlڴE]Ln)i׮|;.iԩח]v sfΊJna+j{!!WWmUv?;N\.᫯s!foB!BOl6;.τ MJJ; \n`Ѣ #mkMU/LW]"MӪUK-[nDxxci,Z1m۶%,,m13~X]h]f2)QT0<9c+ !B!MۂݻweVٸq#m۶`8pECi25_}0`@{>,~~fl|l$n^k׮,X6$77];c6y뭷Oh߾-\3=PԃQUUnf|FM6{nTȒ%K2e*YY 'NGBB"^z)#//޽b璐PXFrT -ZDQoOpŴj[f.( {/.֭[FVݻwVZfddپ}G &&+`Ȑf,Gǣi74Wv< ]uwI&ѬY3ڶmoU\}Sa-:Wm[yE'''(yVy^!B!(ppTu\l?~?_?Gύ7NpiW۪oE~晙.1䣪ѫFj #uz`sTzjGut (lQV!B! ׇŋem1cXh?+P[ؓ777燪c:Ngah4ҺukvG}رhժ%? 6޽{Yd Ӧ=KDGUi\y2}||Xgb1}rت^.* .dk>}&0fUر-[ңGtĎ;عs'`03p`..a| %JT8?~~~Zy^]Vlۖ{>*0 BQS`& `6% ,B!oxEAu B]ϭ 8UUq:4k/ /3mtڶmK/&88ԲBo31DGG;j0ѣ1u믿|Wlٲ[ncGih}Y^᭷#̙"/8~r}1}<&]..Y͚TZeU~e~_>4iŒdg y84P.jժR_ye6U*,*j{V-fVmU˚5k!&&ѣ՚^j9O٬U#B!8_UQÇQj{ie뫪Z-*k׮8}ȐՎbУGOIMMᣏ>p?裏>n'**=HN>E``@m(- i?d^v_eT>eMwagWPWX>,B!EE%$$,|18XeM&&BCCqYp:(BHH TbpT@nAAf#""???l hjUZf=mc0fuL8'oxzGb0NrTzOnǪl{c-znuVQ=rrp::SP㾋ݛ !B!DI]v="A:vlOhhhbXfm/z5k2s ?c&@^^E\\oѲe zGBV:uꀿm(t% 5Oߚmͯ0 dǨt'NW>, lt j sEld+#B!U)Tk\]u^& t R\U5ZY̊x\xFޠe=V8^2x/ϋWWV\nõ-!B!W_}C1͍JP %+BHHWD8ǖcLxC q `*U5*FWMwigƪVZF2qعFB!Bi|R?RgEyڑ'B!8W]AhhHcr^p'5-&܎7E䷃Xr+݀hg}\9!bB!BzGp~c"3B!ӧغuP Y1{ Y (9\:::`P ؜+π]lXuB!B!B!B4Mcݺ L&;s^{F^ j&p9md6v'MЪM MUaB!B!B!Bba*L8A5B!B!B!BKسg/э9B!B!B!Bƍ;s$B!B!B!6vB!B!B!B!'cB!B!B!BQB!B!B!B#$,B!B!B!I !B!B!B!9BB!B!B!BqB!B!B!B# O<Jܱ}cv}liL x)ǎh,'\.vq{aV^CBRSS ߿^l,%IjZfjt/˯_􍏯hN-Z({`}9Ι̥I&DEFK3iWm$a|-N{<ǮJפ*c\LF _{ò]93>^ӡG8~Qi'Of[B"-7CU oYn=ͦZg;((( $$ 6a6 j;O!B!Bqn+%`;qi6m6x l8.;cǺqt|~ϱZYa}g2nK.Zg1~B>^) L``kׯ>dYgGJJ -ZxӍߝwxKߖ{ZSǴ&yG5r{gGf 뤜mcm̖m ۷޶_ɱZ_u:o.A}t.Dʊ=V^5n 1Gpʙs*-ZG})WDϗ:"q}t{Zѿ]&֧={2/>3k (L>9}˖Ծ},>[,x=zv=(}lf<"L_vo!!B!B!(˫0//23 b*.EJj*aa=Rg62y瞞c[6bl2ո"9%ȈL&,Ȉp C\v%cb*4\=^f3_鄛 =#Pfr=շcZ>OkMV\q%nϛVl.JH&WYyhC9g*+ߛ}Xsrq^m>>@+,.RVk$5-LkB!B!BO~q\q%'%g9v8>>>3OӪelmڞ}O #)칯rQf{Z@><ת|kM ,5=Ggnӵk={+\ˬ?ݷ)?/?| ͓`Ѐl91'i֬)Ͽ2v!$$kn.o}vm61ֵ ))DGEUZ^?@QT-_]wL⚫do/BjZ:Æ^>49%c}Ǵ}iY}qa,c2Uƫw=ϯ9w~{iEE5F\v_}-~*:' ysTU~U {yvl6f=;^q'}{FtNN"<,c!!!}Vs6. J'," IDATO&6}Ĭ% 74f)N5pf+_>΁,ٖKי7yu@DA%&zsh`~[_Lt4Mcbؚ@ddfɔ/b lݖH|ʶb곸k+E^w_/-˯ozx'~Xrc2'J"B!B!u_%$!XǏBR3]83ό-٦5M2QLe"6)?ZTV^ED}oʯNE: xw[4i;ȫowޮu}=Yv-r݃Ϗ֜gq]w^1׺\>~8nd2qǭYrU׎MXhrO'&qud!!!51h"\zC/W|ǫ/D`@@o߮-_e e/OUP5q᠁Ķh΢/80?~phߖ`ԕ#ne5ݛl'Ou}Vs.sg,d$1p~^5u.hS6ja6 FNMJęE?}Y?hvTtMV}^qlMH wݫgiѼ9{gǮ݀B3=cUU]CCChӺ nFZZ:Vk\-[ c޺d_@)rR ^^3}N5 MٖXgq !B!B!DI^߻Z>̜R]tjWWjZ:AzTyud22{hη?,9s4?yyThp`X~`dduRr ~mcrtDRSxa|kwyFfpQKP` N윜*IJjW>kヒ{cǔ^U]9w WOϽ22 ,jWVCΙJoU=gMY\5Z4oN@@% U &Bq@(o鯓s =b-''9lcH 8\:YyoDnR#m@BJp3=Ut"qPw7~TtMV}^qqI%&rUW;'ltRXg7o|/6oيO|o̳s 32yfo~_~]k`Td*DEDVgq !B!B!DIĨH4McK=DfspZy3ECt¸W379~:v ??սSR149#6ݽ=J)"#/( b)CtCLP` AL n6F_}U?q$֬%߹9NJI!LBBc^>كݻ' KMٯeV3WEmtTlކ'NVye_Ceϙ\PiON9PbW3]Km.R14ZP]Si`(}̑T;=b(her2#>:%Uj9OgBPەnhLGi'3 -BMV⼺ק(7k+VFуoF|^5~U޴>{37p\Mmߗӽk*u^]IMO+uCB!B!Bԥze?Esuo-p;^nFE-i2O>}mr:-++ϿZLꞶ_p:tЁ-iۦ5ə8 2=p6Yv-6Ͼ2c[4kμ1m^_ꘔԬiSr$}*"3+ Gl 叩7!L|UUW^Nj>cӟ[r2/ko'??Ni)6T9SUԩ٦m}Vs2r]|:Zpt4 2ȁd{Ɯ[΁dKk݃jsG v#coPտׇqq, ztY׳wb=UgVYM{דX>Q^Xu:7jz %dgވ䧟5^={{B!B!BSVg6}yGx'_\:|p[|~}~}ZŦ*˖൷@5瑇iӺӟ|'`l6ڵm˓=wkף:F`=(xI=<#\KxXYYY1ڵmcR֭&p݄u[oެFfYL ::Ç^Dn^^ xL c 3|t,[> 6}λ13=JB!B!B5%/תWX摖NDxxg[7S߿a.bHNIׇpwSR8tRQXKv6K6McK=ԤLrssiެy=u+'M]k/b՚5m^˫69SY5SuPc Ot'wy;C\XjzUǬ!_9Ϭdxi9N6rD׆ås2Ah&~u\j_k߉7g5"^o6&}tR6`B!B!BM$Wt(XɩShެGgg3ǹpMԃyo|HLttc$y(\ !B!B!/68gЮMkˡGc/j8Wqvb I q4`QQB!B!<"=B!B!B!SB!B!B!BB!йYBY`Hc B!B!B# `!uBuuxiuBQR !B!B!ތ;!ss1 2I`N[okEQJkٲ$B!B!8(ysz!Ju4MC!j 11vmۢ$B!B!S;!t8; !D 9\!B!Bs$BԊ躎jP5rB!B!MB!jhh!ߓiB!B!! `!&={sX!B!BsutRMcb; !B!w]!B!(92R(˭虪@xDMZG!_F4~}(@$$n'hzADDD-!̬,h۶ oM֥u+W;'!!!nHfMԱcqhVW8? }zYڙ=g.ÆO|)lSp\4iB-1g+WѮ][b[qǎ'%%DlXBC˷5!B!B!D9~Lf3>>p&oQcaۺ]r, lDD7]BxPUGA76;vpaNgL޽iڴ{Z~pYV$-EQ !22Prp8HOO'//)Ϗpf; \m}Æ^DP` ֬aן Iwp \~%* HJNeXrrrػo?ヒ;&33埓aQu4MG=&IߘNѼYqrJ.~~ﯿED{L~(ikB]:_Pgp\l8HX8t0Ə'44}NtСm8u ^M4_]t99~$WQf؄B!Bs5B5_Jsբ( FT_9X-U6% ##oC٫l޼RϑA`` ]gҥ(Bݫ:t]'==ÇӹsJߺu+6l 00X,s5qƍIIIϯ)0 1"r;w悷y}+\8h{zNNnxsn'==0|JQ")93G^q9OM5W*F]9ҽMoL&}{ݓI.*]l&߿F7k֭2fBKNNNvv\h4M#j%0=sV223tD|}}kPY B!B!s.|OOKTEGRLzZw `Պ՚SBTu׶mX~= "::^uӧO3x`W'&ÇWô.8NXr%-Z ((rwf\u>mEq-_n\O`MطofǗA]6mbd[ޝV+;_?l>/+^e@~|ifϙK$44T.>iO=!` 71d ~W\Ƌ,Wv}4N*IrJ 7r+O? x(Sz@1۫h?:|Ǟx lv;=u P$!q;Ç]\* ?~_ ~_WӪeKF\~| m۴fhƨ#k;`jXf؄B!Bs}A^.>Aà(bU#@@pZuOCw>}eeen:nIf4k֌-[GѾ}{-t:9x Ç0[f3۷g˖-;v][Fu6mĥ^`h4r饗;pA:fI\\\&'d.]ؼy3TIމA}yˈكA!!1;&ƈ/s7꫙:w{&su.Uk슕+1 ų[ܺ7/pax<9m:tȲߗs>i?hƿxaaرÝ~x\ 6ar:)?< ˅nwd!C}7_$'ޮ!;l;otU=O\Uu׳gGO#KۿT )(zZN:Q\II96|}},'q!2 5ۂ!ĶE4۷?@>^-_XA4/~&YuU˺6Դ4.rZ0̨ g+MH;xE.sBˆQT^skՌj\B!BsMvv66nd߾deeHNп?aa q}pel~*ƽEL 뭜ʼ#[s_:1cveWO<$d?:֕)_ ߨW5 `0pA.;K~p8ڹ*k޽t֍dZlY.}EQ8vq={ԸNgEwV8ٱy]{߀hm_B <+J֗! YԞ#񢪪Ǹt]GQz""T[eꂣ 8oZ'ۮklųhCTZ.\׾g\L&ǍehoY/2d UГ<ΡG4p8,en,n>}7hۂp58R7+۷gxx~/J27_VJj*~rЬiSv;YY*7gmZg2fo]ݟzfzRRRiѢyS[ ucj &rQ}ې~dKPSoj#'qktN ~E>a0VoƚMc%5S0ǤnC0 (*i::0al+'/ L&d]ǜdNfY՗Uw/r%Mӑ`se%h&Mx}&#;rݵ=g.G[׮(eO`UI˻. IDAT}%{\(F5O75kڔSJ}f \MxrtF\~k֭c ppS~$//PIIɘL&BBT9|FPP'*+UuԱ;ٟc=wX8y V:g;oҹy3#Tk% kv3 &w*NK'OWۤ([Tj[A/>V/>>y-//_Bvm߷O6$)޳܍V8;S4\ߖEPqm'hYػsqٵ~> q:t2;׾ǜйMgwD>˦wvTbZcm$$4M;];~PϼWTUQXG򵣴ᚏɯwp!Bq~v';tЋŒUسg/|R~<M5U|vvEbbZGDt,hI`DwvyRl<1>NfV=WрPw8Ͽ8Q rà '2-w[kN׋SE +Z&?H%SRR޽{=cbb'S3i/oSo'\GSg㣨lNH%^EQ+XzT<= QKH/wGHHHHv7}^Jvgd3ݙg`>V,AK~W-]v#IRߟj0bl$knMב _Cd&#J *fu SZAy)"2* {l2qg[eǶ-;p$:tnmYE%?3?ql۶M61Ň 쎴KJ`^JPdFzzf`9cXS^#ۻs/p?醐Z]UG%3.:1y{]NFbd^C|dPRR7}ǹc&)k>]rscێTRZja|7{ ТXN?m$6O4<ݻuk2QxL桷>oޥ쩤<3&]{ʔWqϫտYГe{k=e#NJ,_+}fl'j$l$j b bn5V?n2ƏyB\fXLp_<>~AAAh/M^zŵ'$$p0} 6lܟ2[=P$S7 Q1(2FUEG*_T='ϝɨɟ%?F>3<臵=؇U2J^Y$\k6 P0qdwCވ[ټm֬\HJ[tK4҆?{tttp(w뺎a4$餬e4]a#\u:9P~}F玙x]dFLj___MۛC9q>qt2:tî y󣯹#'eߧAOll,^Yy{U]8eL}1z] 3|/'.3QTVU53Z.BDŽK.%6&Ҳ2:Kǟoa08Xl9w7^{奐?0>=n@ ruE-sft:Ա#{V??Y`defrWKaQ^/f M{O@20{;xvSo9M/cFLON2/݋n -lҔcGpǓĦ< mzr&UTVUQY頲޻vYAAn6_>z:vV",!5&{~ $pKc^Ndjn]xR/YBRR,aذ*~}"+Af/ sw hȻH]#]}w)=E1(s90^_hv/aݧג7eb2`%63;̇XPPpM77:T] T '.X6FQnTl&AnTlG1i>X>]QNTNAAifF oTIv5 ͸὘8]yRxز{J*CY!; Rs9m &9~رK""ѣwme&+={&X"WS>?~gTW-ϊMp9lڰAJ\$v{.l߼F+}jlذ MqVUUQd-f1^3$bqUS$6.Afۮ=1klYkHQ۸)ql޺KǜAÃIH!jCyشaB Lag 78?Y1wV]_n9{ U5q &9~|eڊcVoW =NIs~?q8(+-EQޒtɻ4jP]%[IUXAY.&He|{|87?Hחp8ݮ*ʈBNZ˚:w/z*nÇ!!!_ðZnvBc~mˍc208fO>muCJoIθ{>Kd 3.k<)33ˍO]a^{ k_Ke/ѧv߽ƙ0s~k|>\~=y}t;dt]gep`.b@7,$b@1X4C2h ܎s&3*gP&  '#DGUOR3%Rk w:uo^L׫>&$c7<qGt3+gc΀sCyj,5tK!k'""-S"'s6!xk ձk;+TJ?`ܘ#:_+'M8Io'PX;*x]%-y=ûqUU=|~ #?::ת VW]b0X6"ٱK.ᷧpg73K7 Zz0bX8,U*F3!..Ղ$l6蜙Ŀ_xG*) Թ8TX¶5YG_qܹ( Qؾ~ LN@4~?Bn-nu]']e*헞%;=7 <ă,uII7F&WWdl޺]yI1UklY|^xn%jP6o̢ 1|ê=ec˾nn"g|1 IoⶥQUyż|DLdcxN:q:fr:38.>DTd[R=wt=˖Ħ<'^ Ӆrtpح^~* .Mxei1uKPmWѷk/n?~sz<$ ̬سgW?Zܫ{w&BIn-^Bff& GHHH}|A &l[yyB\k<WpW&S$oN<3g(X˪ߤ'M)䝛5>={ILLDu,YA1I5P\q%㯭^@=%;I4FXi똏E+  DGGSZZJqq1)) ;U6'{OXg~Κ^W'f0_z6Mk:G kJ wn `0V_}DY }CQBkFWBr+x'B d6mEYIOOoQ u+5MrfDq[hi-f"_<|TOF$6X=-{yOC~=T :]s0(2>Wi)liٌ *qq$$D,=9_w"3.r=lcƭ&T]((W3͔kCyz.f- vIs^\Ulظ 5(l .aټ10$kcC$K˒zQT$سs+51{:Ύ)8t{w}n]ryoLhc| Pi=knPLٵHA2/]o@h5k|Zt~s|GIu^R| $Bǯ)8䲐RV~kd&& =r'?q8oÒ'Z8տ˖-}j4jO4Ñ7L_;G!92nV=rR:ǵowno^+{s TU=r* V!J?ߚ_ЋueyAA߾/^O?ϫ7ps\MB=$񿷪$1!L/y^yh:|ZxЇ3<@^ͯºtk7x|9ֱ'-I=l4iIp݄no||s`H<ɜ;>v.`͜)-ಛrоF ZB߿:2=z4ֈ'W^3~9Y_0MȒDVnW.ʊW?GDZD>rvM!.9&^q5)-c_c~2s[;1{MD4̶H233I3qߧHwj${ZE%@;Nlb bFdId!+vty5??0N FҫkGna|1Dج蚆ȪdD~˲LDD&_I˖Sz gi)Oo廵ӣ=v+g>1v~8ϗau;+71yKŠnTvlkcey(_өtH 󊉗x 'ޕ2^L`RuU(DD|Zڀ)2 |( h\RRի雗tp?TU$,%KxO٭ &6%T\AANr*l‡}̈HHHsaFTUO>mOT\of`4DD%%@:_ZWeG]sy\l>|ʓHB1{T{(//#{'u}%s5ЪkFN㮜4J!ԓ]hxsEE[ntU,;XU5 V+ii/ɲ(+h|l?ok&7h|7WF ]y{OA%CݿKt킢T_\rybOyzuruihAܭ(IiY%HPVZt~dEd"KbZI*]:cNVYGLT%#f &&>K+m㹧c*qy5Qe*>JbG^?%%<^dgIpTl|~Y!ʢp:T9,>E6ueD_B{8|0m& J8k|7|ט܎KJUG:"фƯ{Yb  #ڪ 8Lf 3-;گǖxܥ$Gk6>99~czd)Y/x{'\ǻQ9q"/u%~M>j&K&cp:_0[ߚ#GfgJP@5 IDAT?jOٗf2aǎ88ު%ս;~_V^^!m`|;zr8Nƞsv+\1g%p T'?x(dy<9W=6dg7qÃcс޼ID|u/K= 7Yvޓ!χ9:xFl <l͚5#66&¹|[7+g; I"9VgXAANH.O-[زeKUUW_s٤I!*{O'/^$L|2F_1%aE%@U &l}` Gعql߶w\W KX(+-0蕛LVڇ㓳%C `ӹMoذaÆVݻ.\H^^^գGw֯_נ-[iХK, n(ج&*n"#/ziR" *UC{X|9)<yHK\{MUePļ[3*̙;qG`2{UE) :*O߀M2n=1gCA¯X;1 px+p_Ob7'gxG҈Hl^5y_{ YSYE\ܒd K:Jq:+e3$_=Y:ޣJ #,6%2xvi<2c!j 2 ʃYt9yW 46Y2x?ϛπ@mI;™e^זc;}M\dpƜ3`;W3HMs,3Dd5lY$t =[#Xaw3;OLuaVV&YY>ŗ_zm7o_io ON_fb,J,FTOT  O 3wOڽRv;_|Aii)>\=5AiS/!&hu,4Aveϳxow-Q6#*:wq RZ"6&neTT^^_ù!׿Ƞ>$g^Ϝƀ$7X|~yf, r$eS᧟իXn]mpBWoRS| OÍd& M(vKQI [Րd!Qu ]{Ga@Voy{0# j=EU?l4hFS;raFcG[d_A6zB|r2ӝ2bⓈMH揷ΝT ]tI7ܥW^S$.rmtӅ-*?\4xblonr"lހ~mD1zX,+zFG`0T[9g8EAU:#@n"#ѫ) 蚟1gAersrd9[?0m\ܷ ?-Eh2{:}4a7@eƵ/`Uƒ]xr8.#u`s:cPf 'KVGiq=EtB2% yLNhe/آo Jz~t91oKc;_}sp{:y˧%[#Gu%wn5իW%{"y|RyMMF:$`Ve,FC:"+  ^EFFr}nWλR\\»Ϥb?5+B=Wj!Bߏ;ldE]oÐ}MnV.FӪF|>aSصPqT>dIsA#id́_gT?{u BdjP ՉSRRdk nKs\0v}ΈsmOpmOχn[cܥ<5lP$b-髌11(11 _a#9DI'5+##uEFѳ ^UYAAG:4NS(:=/IczU5YFd>^}g[u,~ƣ7ɽȧ=Ͱi˘{-n0B//o/VUy(aYx[Op (,fuOZV|$cVmV|/DoMz/86[UYϧmil>uO>Xֽp] ;J^7]Ф5s 6ѵKnPw$ӰX >YT  B#"##rU8pVUz~TT4d:G|L$ύ^;^͟Inb; aB0#/=? *GM| bSTDFvrˬYQIXcm>$IނrJܸ<) n,\wߴk4AK`[ \SZ#i `í7\WbFէ7? j_n:Qt]oRw9,躎/ լDn΀wĴ]E8 M.C=Wf'Lߛ"#Xy-9òp9 lYbj0Qr0o٘ vAh[`*=B^@73ydsNLƄ^lmR'ъVCUd>}̐oNCg @݂>lIl57ifdd{xӖSn-~^5oARQLx*m-J9,tNta׎h5pͿAAqVI/孷ަOOgW}P/t#Oڮ*>y'::.4Fv6Qa ]ר(/ ##a[X8*(2zdDtN4f K>$;2~ Da}WԔtڕ+VK4\2(oѸu+kX^֭$zj >Mo~ hYYi>@Wdd$7n ~aPPP@ǎ}NUUEf1|>IMmXA fc`9nJ~#,i $Apc^/>pB<5慫on}JnD[oPuM+p'ߓ ]KD 5{Sc ὠ%Iݠ}*(H chc ̚5 >_$[$"2_KǛ;BT   Аje+1S :x~ߣGV˲LthЎ6p萛+^G)aپ p`duʑpK K@-%9ǜOMֽ;+++ٳgrJzEQUUE=6 $%%f0~q:ęgN㣏>`0ӢJ gMZZêшhdDDDk]9p,c4ѵƫsB=5&UupwLZ;py$}Υp% <"j}z?_ ^\|m':AAA8%YVV~K#"_ Ȳ ^oZ<v n]UE'/l6s3}t֭[Gbbb&5Mc߾}3q&+Vh(**bΜ9Ӈ!΅^-%Iwތ1 ,Z6m]VVƎ;d2r:]NA@96AAAAs*ߛ6|^sSgTVVJ u]ڵk)((hՄ$Itޝݻכ1qqq\x㏬Y~3 <>}4lff&\s ax$Ifa2\]v 2kײw6uʠAHn^A@96AAAAs*'wdx=^Rvh= CbJ!5ݿV+oňW7Ä p\$ ٌ(hiͮ#2QQ'G{IHNN&) L: p2Bs2&   B`N{{d*1(P;躎z""G6Rڵh`AhJ Qq7xLUdTu[@k~ռ'uU8ޝe8XgwqÿiF6:7Xw;Vk;HTQ裨2U59UE+(DScw\9f$nY*r#$zi~N[hNAAAA̩r~MeyEp9hZˊ eYb. {sNjͥ4VTwFW U~aMs%l=lP[Z}Ml<5H"dr L~ieJK]T 7pF ى(wdūlf*J|-Mn+5ZP? JMJ7FZdTYbA8AAd9=2*J" )U^@bR/vZg >M.DMܩSnz]FE".ST5]j͉UgJk:'/-VWUxq:QPD!)RBzhJ?ch ͤƨRј"گEnjf+ OoE sTT    H <,2IieZ^u&썫hi߭sp(rO]u5 p"V9 uVJ  ?ntW%oݱgxdjWGrϏ%#V)wjY1ktYvQ W7籯I0cuL4Y *-gDd_,欮d&HTl R 84&+d!o_֠*=ӌ䪗8x&:[)°Nf,d@۵᫞ɤD)m    ! )U&jI &]V*KG1;$y/¥!Vn)ݔ;F3oIVnoKIPN$id$VAiq=4a7$G*<5FC 8Pn啵IHMoJQnE%G gLd8 uk՜d !Ad\^W_e :YRL      p(X^GK|"hږM&Ēh A4ͮ[י]-\5oYLVux`\Lea:~s.7_tPSʭgc7Pe#)J71SMnqzqF+f0(AAAAA8VzcA~4McN G_baHk{c_1i^ޜ:Gh.C~,ъP_V\ҥdhP[Wiu cUlm[;[zuUAEAAAAԹ;lt@@^eX;ܘTGW׫q8<~UFwh{`wbTqQO|SڠxO~lRXh[;l$!݉C pjtv+xi|D. <>4MF?U%q dFU[c*޿.e>$`'6tk&"sh]=̞V}ccyk2+$D(zA/76Y_%L=/nKR#*s{TH!fFZpx4,F-|>_],eU팔94ߎZAAAAS|ϺN$Q,m5[l':\.~Ea=eMUDd IV1RUqyՍL2J|m6D]4TMS9?m_FE&"SPoʸ5b H,CKt֑#*AAAf.h hQ|b `Aj.؏' zhƻN&Ǹoe! ]i.5(S:_*ή"_ig#=FesA8U~Ƕ5b rF㷗AAAA" {ߏ,|)tPV't]?cb(mAAAAAN", ikC=,LUj&5)UѯKY‚     oH {~ϴixg1*UĻ7*FI`AAAAA" G4~?O= foLs"AAAAA~DXAݫI$|Ix ,f_2U9ѼNAAAAANj@Al'^,{ N?PRRʫy ŀ~sRXM|c۹≋9篟 [.+HtCAq IDATAAA8i9nퟓǬ7fu0)`ZQN XwCAAAN1N~X? c'(2AS_H=oe1_gNk_jrSXX@dd$]v%999ӟɓ8p`RuƺFfko6;lX p:!%k)6`ԇU?6Ϟ={ԩXSrwB9spYghrc=ޯ_$͛G߾}[pIxXVܹs;v,@̕)TYf5$v__^\xV_'.7&Mh`_4Xn/˓d&>>~<^|6 I[VYh={$..N$AAAAhK/5]@u7&jl/UW9=US_~a(3>Knxۏn]G[na„k[SڪcZ|ڄ1aU jhױC '#;?{_?puG͖)2$%jmgT-:FEZ:5JKjj(Qdq{m"{xxs>$raذa 6 *;wpss2DGGV[dP\\\r `~/l>ǢJMM%,, t:]?--<<<@SL&8 /Mxa  Z*AbM?6 z{$+VIDI ˵ăQr1ocCi- h4b00a .Ϗ&MpիB!BxLp966s+wMCΞ=S'qqqi۶O`ү 4Fcm}yҸ}69)puu-\ }5kV0dr]sk_Onٷn# z?PwEQq~)VՕpϷ~Fˆ#8~8ܽ{S~,cӷo_TҥKywԩSZ( 2ŋ['-6k 5Vbb"}?3ʕ+ǔ)S6YfBUXn-Z/гWoBCn1 fo3h߮_4kێ1cF3hԗ=zifILLdÆ<@zc\r+++Yz5W>O"-ĵ?bWq}ػzz۷jVÇgܹhZ޽KvXjÇd21x`|||Defuݺuw̚׸F3j(&OLRR]t!-Ch4̚5Cr\СCL6^ŷ~?5b˖-\tSNۦ?ҥK>|8ׯ_^Q DvXp!)))уqƱvZx;vɓ( ۷rꓛ;w3۶m˃2Vڹsg'xXYV? |-4A`9\w~lv{vK7:XSRP,| boo… խ[*q9L0/Zja4 v899IX!B!|%Kd RRR@={dt֍P:l󫯾j>#G2e<- ʕ+;w@.]Xzy1c͛7 tN +­3\:ی >?Μ%`k*WDB+ h4r=y^:g-xzybkkNf͊t3?^ݻӠ~"/mOJtt4a8+wp!VBE?XGHh(I,]!B!B+Yr_cփ>3#y?:=hg'ln{ ۾ꨑ8;;ЧOoΛd͛9r@n߾dO>L>s<ۦ k֬>{u>؟ԄX~wc\F#NWh׮Zjc2xt]t@lln&wK*Wy[~FFF%@n~}Mj5Çr͛kSNyX[X|ŝ+ >eƾj/ XQ޽{G`` 7"&6ӨS )S+tIf4ٳoWw   с6|K>;&) /?5k@p1> ~͋ B!BQڌ{|ϘL&OXCH=W\4AlP<%!s\ h[PռkY7B""a߰='/a]1R~>>>2B}vbcc҃qqq@z@yfj*zŶm۰w܊+ݻw)W׮]3h'F^z%vΝ;3_L&ó3xxx@||y~!!!t,)₞SAyy~<tڇ-Z06jKoRDţZ*~E:gw)Lܗr%"'zkμ>:}1~ۇ +'#Wh$66'2|:vFFRjUHNN&..gg+XdDl\͟SEPSǁϟ;0 k4"puq"1#(mYɯG7L?( 811|F2<'N/͛мYDB!B!dU(g^CөP~O!yWV-ٲe+ fMشy34nteeeERǎ>ʗEh:t0C}5kVKP 7qP/AŮѫh] jF#QQQrE6nHΝ^:͛7cҥXXXK,g1---ٶm k׮ر#qkԨ/Ǐg…DFF2|W&L/Dӡ( GuhZNJRR˗/gǎ 63gdɸ6lNNN,_EQx"լYu2sL͛``7Sn2y>p@s+-?JЪThk5 Z-:k Ly]P&Q<k,Y;) gOyy empzQNDhߥ+|BygN$ h4ĉ;w.ڵ~͛{ALTT:t`ڔs>Ϗ?[D|N?a|yܿqヌ'~0)[`yo"* DϳafAiՒ]`T>AΟptt$**g[hղZ|tZgZ_ la ),)M ƙh߮m/`^JaP1 x{|NX*+| 㑥 KѠjQTB!BQfSVY߹̡܌OV3mT~ӭ[7~ y1uT,--]6;wC$%%h,c\{DjUi֬&}… 8-[ȳo~ t=:*++px᧓ͯ7o.l^W TZѣGӠA*UdbܻwHgݺu˸QBؿ,VNƍsڵ+q2bf|g_#Ff.ݻs=y1o޼,݋JbѢE̙GA6rΤ2bX~}zl߱؆&߾soM1WпoF2^{MFc8믬_g''EΞwzFCBBymyn}<ĺqttdpG{~=z+a@6YU/?Ztt4iiixzxڦCvߦYӦԪYüݤ(\~!^SA1gKIh._J5B!B1cr.\,-4 f)?y AQƌ}HOԩSɇSۯ?? EQ~ywacYHNذ1HrrƌOxxxpm6i?lϳٷn}v!":paKzqoѢEL2{QjlUTȑ#yOhZ֮][q}||ػw QF۸coo_setؑsrMTɓY6lؐ`BBBV";';>R6?H%oRxKCL&GeaYf(&T 6tSJ  ot֏51` DgTk5J}hZҟ֬S,cw+VrZRSSbEE*WѬp}̙C˖-9pG#55JϟOXZZb0 mmCrS_zO}€}rߵ7 

|܃G{k`}eCyynܸdR%ڬMrW[xzz`mmM@InmڴٳYpϜ#U ۛ;~ܹs *`_e2oݺ\y͚r]sHྟ*yf.ggB/׸6mTZ`͛ի3[Ҭ]vWX(s-[m۶RY+U}n.AUPju=J\9LݶoRjQ Nՙ1c 6dÆL p">cmz3G:'Nyi۶- r+4/OOR )}XÃ~ ԫS|=KYꍛ7Ar=Fl\,vvv23ҭdIpMZ֖ԴT}ʛ_jʻZ9_^Ojj*}hz=))d2eMHH`Ŋ矩V1qqd:;|N$$$l*y}~ٽ˼'?ysU/2ݻwIHH0CBr$t:=`y{VDsiuVeBvvv\|Ŝ2{Tf|X$:&nЦu$&&f$h{B!BQf~+k!-=d"!(-E]6s)ie?,22;vժU}IA^=Yt)˿^JժU8x-7M+[Y˖-W_ʊt:l~oۖx| ޝ@Z_ۿ6ӳDD`2pP[[[<ӘM[f)X8o'Oa̛b|s67ua@{bbY6jT*o`kѫg۳(&?T*mZb}+gO:uh4 N}pw51XDVVVqE>,^NNg !B!AGkeٮUVUBSBa7'N0QƐbȒu''ЫW/0DGGa^yZhAHH8:9EAаp|-jthbC7=ccmMdt4'N̖%jaaٳ6^/?_d3m,:tJ ՓUkzNx? &NړH~f8xzfM--ծYpQ85IIITVF ҫ4kڔ=q)*WDK|ݺtaמ=_F&hΝrB!B!K '?l[BԱCg3)| IDATrø4E( \?JX5 Μ9ɓӗ?8~8ڵCףTGᆪ^zxzzbccZ6Orr2>XZZ̕(ܾsш{ XXXȹ^YWyxyzV=Ez},Z~ nEZit}{;2Yd2b2agkkPF ,,, O!B!'N8ʲluj06ūѦu+ʗ B!B!BLϞ=yׯ9 xM||<ڢ %.:({;,,|B!B3zO%!;e<!ēB HW{GkSB( te=BX޽{,[ gggso山1*Vԩ< Νh4 ^ݻ( &鱭,DIR6AIX!B!")IsKtz*WUB}0~x,XsTB!B!B3Q)⩤h8s 5kd2舳9R0Lh4ʕ+|r8=`oo%jZB!B!B !x*YXXP~}QX[[ceeeeu:\xg@|||&M^!2iRPh44MB!B!B!JKNZWuhPИE?@Q)8,iii:/_HsFcggWB!78qwwq8;ǩ aZE ʹ5Μ=GRr2hܰ...?q nnx{gs9Qy[i\Ò6Oս; +MXx8iF#].Ϝ+`톍\z5߱BIHHuduKݰȨhBBCYj5{B!B!c7cVΦzlz/[Č[Kd\kLZsD* c9kׂv-0{r敗cƖrˤ&ָٺ3lٲ+WyРA 4(33i"40>xf`<ޮ]8~8...L6si/D0k,@Q DvXp!)))уqƱv| !x2.ZSٴn-΢㏳g1F#QQwu5x.l;0 ^6`@Q8,j)_,-~ՋADd$nd Bm_GEDF2dHKK#2* W:_%$4o0Y(9oE~BdF:oKZ88x/Y !B!BmsmtnTcq6jn~e^Z{zl XgH df7#L!6>TC*: h쩦C%r̼\<p֮n,r16߉%>=I̝|˗g\x:uy\0-[7H{ѣ~`>/Dƌy~gd2q >̒%Kd2ѿ6͕+W8w@@.]Xz5ju$J !(Aw>xb%#۶5}|ͿR=sfϢs󞽤 $''`Χ6mRmZd=Og}DЁ'cL2Zǟڍ|>[_f|4|YBBd[ }zu\?ÇNu} B]G];MDRa2y6l̑E&}ZlV>4=K la`_Җƙh߮m/ePۛ;wb5T\WP]y#%%/OO^^uI()Ony5%zvG9+ LTVT*~b7o۵UT*skڵ0ֵ(=żL=- Mm7TigGؙ >oww֬^MƍYK>=X固h _4 _wRK^1ÝkweM<3///F#?sABB!!!t|B! I9+g޶e6YOw[а0N9'dcv,ZUρ/7ؾ=߭Xd",<@_0g''j55W¥Kܼy$893Ł >̮=IHxQ#ѭٳ4jؐvFԬQPQ׮ 4, !W

SX˙Fϗo=2Njm0$$93~Z;VҍP.9~yl݌޿}yh絰ʒ-vQ]܎oPemMx6[%H"saYP%Ѽw]K`DjyR1*H PAR)NSfMøiHi'*9ܟJJ"1g1[蚰x$Uâ\(-LKbv&fӨps,ߌ{iZg5Vm`Ɔ5o%19es`b{'n@ݸi E*9q{>@G38@"{nVn; FivE@(3,kin0a2Q5W=(EJ}-)-;j39=6F }M(عFef&Z(˜E0Dz$QYPTJ@vZZ3#F8ڂ(KQ"Q &Q(j!MP4MXځhTv~yn6`>X4~2-:rsռYiՐT yF]x[{>!9')RJ9\n_eZok[J CvI%hiW% G>G$Lw&,AKCScC(s(B{z;P̦gN6=㫹 RP #oZI& &ibb0 ih!3td' ⍝4R.2>|Оqud^"Q շr)_o>^Y0||/ݫijrP 1GF9D\ o}ASv?1 ;:xtp?B/H{df#EZ"k/j:}+.C(*4 70:jH>7Gk*4Mgnv2iXNRffj2YCjv;0(ժݏfFhn'pW*p1yIՅZחRC>\*W .T K Mrx8ٻUTERM-}>Zڗ~ʥ(JΕ_Փe3n@j:BF&G $&Sa.TJFH6a|R&b躛xS'` ii[FCS7q" M(əQb N Lo-aSfnڎ$ڱcX/0Б^^?C;HMNNՒR)躛B.+2_n, 11h4K\"3\+hhadpr1teuWuRs<\=MjafhhʦfǙ?B2X%o)D"M J-aT92FREkfP|;"JTQ;hjA[є%r$^_ff9u D-uiItmxvJ@\N0;=bˎv{,H2|\z J9nr)/Xˌp\\1 NM8WVWZ&iNxVhlFHLj:4 ؛Pֲ*]z^ 4fB9S™U3A-}J)jm$&SΩyk 2kU/MR*|E.=#8E~%3VKPt//\ N}N-H]Pؾ֪r8E<=@?'{1be˕ DQ&F!B{-Aa'u=iR_PI `D:qBjiqUMvpxᨅ2*2*2*2~첖*)%݋ vI<(UUK#(t^B[ڷ4h9E%9ª'*%v8888Brq>{oeeK?ph~/C,->Ofڗ/'žGzݨU]]ۊiZ|?{ -I{gG9ܧހk߸ss'q{A1O{Ƴyqq>W_qAerE>_gæuMJ&?~m`Or|mkm7N?y:[G߁eZ|0H{޷(_=^sFn&_<|TEu+k]{׾jGsi  8um@JyVJ{w6$g3LM{}>]hmk2-F$icevC?Zg.m_U&'f7Dhmkk4c#x};zJ#B2α#c46Ey]3>?#/ŒO!)%@`Kg&O?o~.VFU沌 MjF5$ut:G\M?efgLO񡏾׍봴6P.WIΤp{\{jFT&'fǿM]M\ASK EQ&9Xܞ ΤHʥ t<.ӊ˭c&9|>7Ss\:XW%ledh|Hsk aEAJ36;e09vtֆzMrLϠ4[4ёiB!?h~RJ c *-~t2\i݌Ų,pufgT+=z}'LO'BsOx)+Y^7]ΤKxihYzRs\9,znj;?|C~n "{1 GPUF۟Ɖ*ժG7ٻ(w1-Rawr'E\Tp\OٷRJ6mY~uS-#;ll%c;y[!hmk]/C=#?N6+q/3xlO  7]o|(bv&E_;WX`nZ !8|pWV ʥ*b7uomc#e~<ʃ^ͺ |[Cdy a~׾t?35aYȗMP5"SIn#Wn[dzOO|gڋ(:ߤlS5L4U-\{S(6r* \N>o? n|N8-}_%|сQ,ˢ?دvO}) R{0?䷱7%<źx辧ILshh1px83sl|^8̧s%@!`.oKua{ݰy[^{%\O;7ǎOfgRś[︒_6Fj.r͗q|0 Y?w2Τ?|{#`{Zyg;7~P,qu~e[W3yJ>w R$7z6iQ*-7bxNzXgcl| Uؼi67AjJX](ɉCGQc|O,p 1MX~CoT./[kp9Go]M $s_?ņMr"mfulڲ~F4S ~pt`ķXqۮYd#ȟOv 0>ࣿ(BKk1 CtV~-'x-^G[~*fs|\s&><7IeٻGbuDA:3W]]-k3T,?qx5+)ʴƙIfW+_17.# }<>Gx;oa>xϯA&g-oJ*oK}vע 3|oCì߰|?دMW9=ƦVF;y]=2d7n^7!b#\͌Lshmk$JX?.~w?WxW\EQ(K W|3/ ZYcG(J|N;[XEwO 9ٵc^{7?vtg/6/[ŕW[t{y<ȟ|}%#^VS_}C?syٰi9ǎ 蟾6>o? -?}/nN4:gyt >b?~L65kd25׾bVdrbѱQzzzhii9F^TPUsWvx}EUU^kj>3@,57nan >7mx~ͷmm`#d3yi äT22eI C\uEؽs :[ex:$IvރAJ }^|>/ 19|0SS#B{N;3 _d/z<]Nm.%HH$.-WoH4XiH|~ϢA^JW >҃˥?}۟(?CU4Me݆~mG>Hg{ڛ<>eZt-oa^~Y2֮1º |25]^Gv+VuHk72xl|q7zy1&>0kp .?zڸwzijrwApu\{&|>}6WtⅧj*iwS X[_{%#S|`0ROMskEUP)sgﮣ~.ۺcI<bjbcV~p$HCxS cAT۸HlNEEmisRMo^cxj^۷w\WywUem 6&yCH^? !@ldV]6;;Ͻ?Fs+ʒ,c0>f{<9y|8G׳vCvh4~DF0~d2Ńo5Qrssy ztZҩ4O?yj;n^u8r|o[x?~f6niZ9z7o??ݷ((tR#O-54xOgVc7ݲ|wUk0/wuF:|'a0T.ۅUZ"Ѩb5~>EA͕KrVcf4Z vM[uTT`2G?o#!iK36c0YG;y7pl|S-upHwbdEso~.k3.|}!#MZSdKغEދ L?E*nl63y9/4!EVw.[AǓ?{"7vf\;r3/xggϣVͼ]=^m!)F_u2h4J,0A  qiT7OҲc'NOqX$Q4.bw9H Nqrx'{ ,XmeǧnNvONē|Abv|]/O05=;c똙v]euQ#7/a֬^c-oȱ0-$y9v(1͌OOy;K5Ju˞2UUs>4umKlɒ(a).[tN!7o饩VRt:p%l6.+{~kAȸ1#-xYt:-E  #ǺȕrVpqTy$mTnCiIqAK$I=gskeӗJuy_Y\-/(’mJ%p,2mj겍aQT+m0C}'uFe'Kj.;x\u:e-`ctyKKA 399Mg”\? 8$Sq҉$iQB#M)hE4`)++[Rxg#FCKKeu1|]9~cb1nwvOZ͛zok+vE1+ҲI2K(Iz\NWb0D4iR6z^'ro{" #Jp캍iFKK=% ՄBaz=Xj򟾾%c,ǹs瘚rt /@wwuyR)B|-̙3r(g=??㹡|bCmmmˆWMI(ŘO{<#xvww3>>~&"gΜzzt-ennZٳg>v$cccMv98sssWu )jZjk:x  M'O{/#{ׯtuu~d2IsӊejZ3D"LLLs1I&HD$lb1brgET$N:(-YF:&NyHD,#pyLplƨ 5$əBr|_0YKL& Ë(~[*cG"3dre*"ȑAeuϖDr&BBb1zB!Ο?/s)9sfQϴͲ%EmKKr L&㌍199)>11/dO~^/h`0(ly###I/%ێmh4*dH$Ο?O8&ϣѨ})[4gϞ%x&I"ȢFFKzer&;e_ڇDQ$#JIR D}h!333߿_AٶKR]rwjr"Il&tp %<ضm+:$#**j뫨n300DqQ6g|b‚7R\iY H&j*RǏgjj Z͆ tuuP__OCC& LMMֆ$IP[[ d4~N'TnΟ? 466R[[ɓ'hZ6n(?[OO]]]hZ|>ĉR)***PդR)t:@V\h5544D{{;$QYYIKK Nh4رVl66nȹsxF9y$>6mĉ'$If3;v p!A`||E}رcA&''ZLNNp8P՘L&9<͛7#"پ}WX$z{{ NuV(mmmT*8[lbp9Tcc#(8nM60$yf:::8{,YgӧCRvZ8p\.6oLWW###hZ֬Y^4S$a!tl޼Y6DhnnfppX,F}}=Ǐea En䶼馛$Vm۶QYY) ӧOgrS l۶MFgQϟ?O2dڵQ^^NAAOvdz۶mخkQV/HYP(DNNFt*E4y7ܹ A2"JQG*-b0Xz5yC^`_8pl߾B?N0ĉfN:E   Xff CpI֬YCss3aI&[N~xVZZZhnngǏN)**äiΝ;G"C#lGL&=zz6nHGG^I(۷o'HTUUA֯_O$a||&-(LOOsNl6$I:;;),,Z8 &HGmm-+Vܹs~Yf yyyT*YHt:z{{x< SSSe&X,icIJ:&'',*/zzz ضmގVe֭EljK< (**bll>7DjeǎDQϼ<F`ƍq)bZZZ8}ig$zxQ)++#??VK(Ԅl!&x< T 0TWWcZ.!N333#/ (,,DA,C("/҂˺ 0JAQQ>9(^N'׳:?. ͆f, Fq8JJJ$I\.tLgI&'%nZ9y$prݝN':3gPUU ;wNnfT*ߥ&+)$ P]]M__}}}TWW˫L 柬h4RVVjD NaaUZsΡhXb^AJKK#0;;l6V)--' s FGGI$r?-,,rQZZJ8FEN'"XlS|>6v;pǏ366F" H\{M&*|L&(..&HŰZt:3 a:͖-[>|xɽl$iQɎxtOh4V0??Z&//V^͛q:qN'rG^~=k׮EVN1  x^ e1_Yt:d"{^' ɛv`0M$aƍsiYjńB!\.@#G hjjZd B4 :u)V^MAA`\yII`0ֆ㡠r룧h4JII & KKK ٳg@vIRÙ3g yxll ^Ϻup~Ξ= EEEirrrx^:4m]ͬYB(b8}4(i& HZ-hI}}=cccXVl6.X,FII ـl+a>h]VK"`͚5$IZ-ńaAP\\,o~0 PWW0nJJJp݄a\.jz{{PVV&4͢JKKAm+MYYb4鼘Ahnnd2h0dBLOOSPP `f&ٱ333АK$+`͛7V! 2>>ի)**"0vdKMM `)..~?vLYYlذa`0HQQp8L^^}}}//Qz{{tS^^N,]H$" Պ$b6 BLLL "Ν`0rdff\JKKe<\O?&" P$IVZlfttq\. SQ^F&\_j,a28FVdᇒR*jQ kA**+oAXTBjF&f"^xy]gI&˺^Zi롯3gΰc~?הqaBctuu֭[R_~JϺp /v%s]wa2_Z~$]Vc Jqꖴ/RuRReɥ>SPUUűcp\lܸqɶ299ɡC˓7`7 7$5ޥdw_J/eץZBXr@]3_sRaٍ-wK?/..^v^>k^Au)ʛ[l&R}AbsNJKKT>K]KiNNe` NwUm7K <1jYn5֗z)$jZ>ŸעT*%ϮBnJ!Ss IR!555W=]r(ԊfBG7IR^Vb1|vJ刢H8b\w8$IB!8q{b1~jFH^5fͷtZvgu]A`9DQd||kyxE;\ f,%W#ȁ#p(RWWڵk9v[ouр$I>ɸKO*`̙3lذ᚞+Xɾ} ---/Y|Rx ٳoK1;;˞={dn^xܼ` B! .x^nzhooVKii)6mzLb>_WaVX ~sIdܹs:tÁdb׮]vm$IæM8/%NsqvءwOUUU׽)yrrrYfB^/o(//gӦM$IPTXE0h4Juu5rl8Gqq1@<p8d"A<}|e?`0d"N,3;;KMM xAJJJ5558xp82KUU###Nc $իWMSS6lXϢ~_n:9tttvZI$T*fff`0L,cbbP(NpPYYI `bbbΝ;G2N__HLCC L&1Ls=JQQ*++`ll!,Ass3SSS t:illHcc, %Ibdd!L&V3>>N(=Ef3L&/ssٵk|ژ|:yժUxt:9q^ZN:%ŠbݻV ǎcvv^xшduww399ɱc~g /|i!hdVqD"nMgg'D"ECP___4\ r!3::J*d2aZ9r~CF)..Fq1h4޽q?NQQђ+6Vի9z(]V{yyy;w9qݍ">?:Y 8 p8PɄnD"A*bݺu\D"!ׯ | vAj䐛^efQTRWW0k|D" 999r bsssB!9]СCr˼O5anJoo/=|7 H$XA455QSSN JKK@SY[}"pȧ|u*;w~A9J˱hzgWVV\J @UUuuuTVV fʕ++ |r`0@@p<6m^O?X`0H*`0mR***j8?+:S̊`=gP QVV&ksYt:-ڈFk֋'b#-qgϞEo 2,ȯd;y$@:YêٳSRR޽{1L/̚5kB]{w6^ =߿gT29R]]*J~&ZpM7cR999ܹS3].zi]zT*%V9Bcl6˱cꢾSN6dǎrܕK'X/ ^UU%Ğ())aÆ y ۗnJDAAsv9998pVKee<\.:Nk.ZZZo*cxxPgLv\,]vvZZ[[6ͶC?d߁lswt:ٵk۷@ Nϒ]eeCG}}4 Ҳ=1M6@vy!Qn/߉ ٌFC#"+V19ҚbժUgy`0(k =zDcc# &QVVhd2* ":;; N˶3gpI^/cvPgş8qQ1X,\.^|w`4#Gϧ#G`2\ ={8v  ֯_Okk+ ?Bjϳ,0vR@P(Ĺs\ߏhdڵ 022B,BfC$\d4a0(,,b_d2n_zYg~_6w^f36mbÆ ?~ 9'H&r}q/Fp8rVfMXvJH$v2BsE7ZVvAL&rхCFaر eOVJ"1$>O|ٕA,vt:V+dRDVKoo/Νc۶mh4v!3>>N,'(7nVhkkchZ̬8Nba;fg("et:O<D$ D6V^fvvPXX(M$ϞFEQ5G}$Nh4*.-f~}*aX0L,lW ?lHAٺu+~Ç󖷼岾]d[Vv[ət:DBGUze\PFgp8h4A"T .o߹sI/kNɆ͎-"Z?c<~%I^Z-[j}Y`Vi^I%NsIZ-+W|E0l߾}PTEQcLr-89qĎ;^i{rQLuʚT*ͱz MF077Ç1b1+# 'ͥ}E+P&_V֖L&̫!bPH$B!z ;ƊQCվ- \梖5o :nQw? "^c(]AAA5"^c(]AAA5"^c(]AAA5"^c'AiIJ8QL#^zThII&,L&021dj @T*4-b4m^/ Zhv 7!"hX,vCzIFDї]/>ߊjƝ/Ҿ#YhD""JR X&Lf<8DG5Ys?@xQ%I7 .4?s'??tT|77۹-i~g C Nr_b=Q&?ŊU@w۩^B;n^C$?z<>awX$ QQ3Z䄇Y*pXi!+ i^U9r|r;3-~RV_繧Nl[v[fIx߈^׏eth} =~8@o&PX]ᅴ2&g"J3:2Nw8zG~<@r{p{ds}|#{?nĴHSK5M/oeiW+Գ/tqs4`FXqbuY|[?0?Sa' ^˝ngEsN( Ntd|X4E"Dz"X͆Mp8˖-Ix7˵-:$Q"ŗEh4JWN(FSX,ނb_盼]﹃H$F&LѪ4[e4dQdͺz_u$zH$SXm Ϣ`d6AXHw2:2M<@c1.ıNT*77mcB>ŏqG_R63-o_1%yw)W%{0 8(jd*N wmezsY{Ot(*qE%FL@,gh^jJ6 X,bBQg^Z&O^rAlں\|]}C9Gx}ۗ~M+{5U?[/4\J'jhG h֯=;%"(^Oyzǝ//RD;w:f'H!+E%$INvc9X \%ejBבĸ\]W#x|35uet RU]̴w= w#KG-Z[WfL]@<3CBBVc[P$Id6P^QÝlݱQ&'=TYT*w拟=;߸Z`lڌ˝a~AVyU hX,AYyNfM5z.;.ܼe[H%0x/⹧o!iQ-6м&Au?/`O[48q46Ur;$$'e'RZO:Tk76'4Fbta Μ%OR\K, 'U5-aR4ursVc{arCuM5}?KgK!h14sMx$dx]Y#]ã"K 򡏿V-k{O$tkjӓs| Xfazn~z{~"7dL/ |߂7< ¼}wQX?|_63woy|QV/X}}2~ٿ^=J%_/TזdFM.gE>JQqrUJ$j﹃?G,`ælւ$I7a5>4,>BvffwFމ?F$7Z_)ӟ Nm}Z5h`WVTVFoq3n|3ߑ5O(,rRCnA%~ ʹGH$ͷމJy7bZ·(?%x{ DV8 /wm ;~I$R)o6*(d>7o~;7ݲծkM1(.i[Ϛ`BFؐYZȟ=ʖz<́}R UMOMw'`0\C&1Rݻslݱ ;?V-[p:^(q{" _^b|g?IO16Lf5ٴu5?\pjQ^{woooT ݆ \f %s-"d Nl?^1tZͲ 7/ $Sh^NfgD Ulݱy?( ة)"H;ɬj5c4jn@"j :|(P8~\)f)Rn̂CN!k73Uo{+p858mv&ZV Hvƪ+BR111I$뤠5 4g14j BAII1TU%ʪlعu  7߲ A'w399Û3}y? TVV,IJSPPPC冞KVIT7HFBVe" jJ>=%311ř.|!::ps8C8WeTAAAk˹ @Rm#O %HH")pjD#q'(--n#I">NGs ߊV`0xݮJ0h0+PMg|pgI%h4z701> 8z N{EKKMM rkV"7GpkiC h7DHR}1@vnu#a gcL( 'Q3;0M!HWQɉpkM7R܁ d~?24?yr>oa2z Hx,n`Ӷznu#ij12\ƮR.g$ +O=MW38'H H{_@Vw?w13=Z,rFڗ{m;WwIuM|ft:M2Ȑ$L&h4i9FN&$Iq%ӃhL& 3V+:IH$צ $IVuE2>>jf]tQ/F E2L2y6;R*[#]̛" ÃӜ%SRZ@yE!NB0=]$SLLjm|c)z5 6:Dmm-Ź-|!D={rhoogϞ=Օɥvy9va.&8{,TTT`doKsg>^x^h4133ñc={a"rRqF#L&i)$IСCt:\.ײmu ٻw/eϮ tH$r}ZD"q$I؝n| QB-=3C:%NAO4g0[7h2"I"b9(Fe8, ,+#L200@8frr z~l6---d!"@T*E<M6144ݻqݤioNCCâ{aΞ=Z`0z NHaa!"+WbD;t&[@?V|>w:T*J" q֮]+ ى YzAڐ$YENfgg)--pz#3<z{q:۷h4Jgg'455VQhZ<؈`@Rx<={nΞ=,tttp9=rH|ܹsn9$mmmb1YL&w&s ^/N<vikkc||vEGRXXVvk4L:;=TjI+ Wٌ }Qvqq|hIbA{+˸ޛظy#; N9H$yfZ-TUU188H0v&˓} rrrp:3I0֭C$C@EEŢ2ALLLV[M.KH&R)ɤܞTYn6`ddP($a6l2::Juu5Ǐd2D5MLLpM7QTT(L&˩Er)fggt:M<GRQUUE}}=d2zzzW<>yhte~!**rG;sSa4pxB0H7{3 l60 F:;;eFᦛnbbbݻwr*IFA\`t:L @0TDIj~$T ZVEPe1r]uyLMMQZZJCC ej2RZDvFF2qnJ^^*rٱc|lFc0hnnGRQRR^6ٳg<ZDQuֱ{n~_騭e՘f֬Y̢"g)UVq Ν;$IzdϞ=foNqq1F6lÇ&775k`j_#GՅN^<O>$@m۶QPP@UUh4Q/a9B,ra08r ^oUPR-$J*BO^KV1Ll[ET*%Sj5dRl.uw qw\X^v#V$l6ے[H" ȤóX,V-JH0dBբRbjRj4̆w8&b20L$InPx<.h={lقFj"\@ʞ2$IvX,zB+(,JdH$Urv?xP^QxCVXI֑N Fz*L/#CQc2hYU+ vQ̘ꌍ?qkX4(J ]xnȪxa‒$o g'?zia5̷[`x362CM])3a7݆9OQ(,eOpw4@V#I\Z̘ʤSa՚:j5fUk(,r_VPn]DQD%r9cLM͑NJi{&@~=HHhE+f7pgͯ?#F^^N<xCs_h$Q}0յ%j?CAiqVEx|x=~h/ >ɷpme.}:cdQ_EIݽT t[ʻRt:viXXʾl"_IE$(bZĝ!v Y`hpg:յv!x rhpG;E1s; ~]CLz\+$'ط$ZϚu YyD*]ا"_WTzm=TjkB"3zݢ2 oI"SSslL4gqL6]DcS%d=/"E&4cYH^/j5z)/_<$A_(%y^[Ǒ|AlkaZ?{~ 64r֡iǓ$'f3󆻶RSW d4>(Jxh4˹{(鴈o>Z@8ɪu<-]y8#ǺEqwܳyoׯҲ|f^N T9B+c#$EO%S>p'{|oR /%t>~~qtz->^37gzr/~^ zrrxawX8qIh Ovsɜ'@( T"?yo1$I240AZxc| ?\@F._,l[- Nxf <O'_ .N!ljۗ~ݙwc|#EO9?c3|ߣl?XcG;w41>?}Ξ#OpY;<_8s<({_hE%sTKɳgzE[pLܹ#:g:pRvݾ ńb$/߉ZfjrIfL2<8"024 3 N21>^hST˃oh$yX_SGj5O#L񋟽]Ư~"j_R 9׾SB<>MO L "/3=} j_<7s"C|)FIӈDUu1w.,$64%9}Ұ⚞`t٘.,Ut_>=iM?~FAfM8I> "719jMoI_6iA%0;@4}Fﻓ;ƿ~:ёiVV#J"յ|`w}$[w'pXcCw0s~_ܧk(f 'PUnf+)+pXy飨*N|0vK8{=/`&v?{> }Gq~8 b0ꖴā}))ͧQD[W$֯<4CS;o毘GYE!b6/6?$]Y)v?w{roc˶4cwX)(rf}g:@Ջ xüԄ_8z7=x+$rU ܟ}{OSS[S9DIYk5 {w2<8ya5D"1b8;PX~\`EXyR%|iF:0>6D0~c Ab5_dEs6P]]F]Vgm6&EnfR kCRpaRrFr., HHaxNB9{d7q-k"|+PUSLvƙYAȘ6ln_8HZqy $I0NYU.)$Q4bP3m93`b4qZ1tZTjbbq:mzZ .'Hēxf}Q!ҢHÊ f}cLz\ L0/~W('8yg;,JC]hNo݅C$63ss~R4:VÃ︍ٙ |[BaN_84'" Fp/xfrlt*MM][n9ё)zyhu~)ƦJ-~CfR Ôg6m N[;7N=.$޹D'O:%TNM&z-ן#нY@Mm _PQYl`yRiyIEy"'nYKiy>{OWdrÚ huZp>˗CD1{p(I҅f } =#aAY&ccN"$RXG?rr.{wo7'?xIffqlޒy #'Zb5bib9R=e3c3|o F`8ߍl7׿S>Od2֨_g0 i֮o$/)-l{t*7V%5%anc3߹cxkKx;^O~4gtZ "m8 +/޲k=Ǐv/_?7gK^^{YM+()˿ğN?S9wO2 8]vv޺_yzF4CqI-5|;/}ׯ?}&g D)fgqlپ 웝9}oev{t3?oQ]P vY :5?f'Ʉd"XvS`"_'\ɓG5 "I7AjQRI^ QM" T`33^ۀټmM$j.KѨˡ09Ұ-kh5*VV ^Kn^M+0 :))gxh'ef#nZ̓oـV]6ooob5uRC:FVӴ50 V/pѼZ4-,r|{dj5NM~ԯlh.JEM]EŹD"qjx`2֗:ZMnl\Q#ǂaie56VMEE5u%[sK N Nm+zضc*kKI$M֮o`Es9N+"V;ʪu֕ȱ\(X&p6V ̪utZĴHEe!S[W;A2@1(!I"+ޛ O&1[uZIxfLOe΍_/']c01 %A@wub"N$r(*H' ol6q|j޾[8|$NݶD$AM\Fo+g5lڼ6cDVHǧ;0 wuo:"$CƆI%E9fJJ C Jd{rX`DpL&:9az@Ic0Pm$0bn}5/p}90ťMW|V|zOfkN@ KLOOF9re.E8^oq5 ..bh%TE"5Stz 7N_%Ȥ(jW+T*j ǯFobUW2DV/9pNwwzJ,s͂=oJjќO09R3SAA O}n)l޺[[XqC/v17}y+1?eFYYK>s"l6Ο? /b`Z9tGyy|}?ǎjʾWÑ#Gػw/x^˯xm[[^«O4|1yOYYgxcǎqqh4n|>mmmC$PXX("R9x j/ٳg kz Bv$5 '>"vKIٳ˅|DCCCtwwSQQ; SVV;_oo/Ri۱lttt ڨb_|g`SFttGI]TFdۇq2 sG"4/>A^XlDQ?fv&@$1T*~+E^ (((`nnh4999/!N3<,wfx>Nnn.EEEip8x#L"`!,nJaa!F^O*F:;;h$366ٳgZt:0ϟgll Á^'Gp93Fst:mmm0<22B(d2122ݻ)o>Ѩ @6X,FY]]ݰ7LTTTrt,..==T*o( 477k׮[*I" 0 T*L&{d8pN0RdӦM߿V$x@ %N|ܹfeeT*+++LMMk."sssAٷolV f( ͂ \J.^x`eeQE &3>>h4 &''y'z#Iccc$ J%pX>77 b4Q*,..H$ htt}QWWoJ8㡯 |>/ VVVB|}:nt=B,eee8p,P v܉$Iβi&qhMYYنTl߾{effQF#LX,F4E$ػw/P@ d2dD"b1bB-6Hlx.׳o> 1|>\Ӽbhmmj5Gs9"c0b eF8fpp-*dkQLf; ~?CvhEZZZ8pbI$IxCwoii~<gbbz۪>xZ}ǓP)x;b.o{gz;sg޽ t:ټ$KqkyJ^׋ng||\ÍF#/Ü8q|3XVJ]' w[STo#C2jvOr<N.x=,y2~ƍ~n݊Fڗ\B$LJEMM ׮][JD{btk_$ _3 /"$:r8{,v:::2RJnfffX]]e߾}1==ͻヒcTVVxl6c06 :D~ꮃr͑zY^^FdbYeXYYI(nT*'RYY墯qj֢hb΢P(DP>f癝%Nvq~vwaTy***_[n/jD"!swUW}}=x\ d\&e EY"`vvVCχF+JVWWE4O:d21==BncZh4T*t:իWikkC$.]j7CۛON_.i׷5@MMpϥR)\.. G<pPQQ!lR2$Zj5`eѮl6T Bbaee%fxv曨T****lTUU2u8D"|>F:ʕ+S( _m+zذ0d5ħ!y?GyB:uJD% ;Ũܼ8Nnz__YYwޡL&C:g ;a/%^@^>L|{#3Z-n_B!4XH-Ű"Rn,=,j^WD>xVERdr RVE}ҾK(Q#?Ywdه2JFV}(Ǚ|>!ӄF$IB?N:bQ;yATXsީS!R* F#v=JŢȬ|ЍX,V|> #ur9.^("A`mիLLLjER155` L(|)/{zzbH9GZ^^fbb|>.K" %d2\t }[ݤi.^HYY#` RUU?2::f{*'IDww7###"u``TN].d>d2(7n044Udyy2gaa6n$aiiiCF.j\.(V$sssTTT(׮]chh BÁB NG?KKKv&''g~~^D=.ހɒ:ɄlY,,ˆFvf dH&,-- @I,,,ܖU{5Q*qI}IRB!8tx<"Lh4"MVK&,fBAT.,,l BB.@"EV#I.\```* dϟYu]]]~.]!O.E*k,..Jg#@`CYZrXEfnP`aa L&s(baa ٱrHR,..R,v$I$ "{0M#I6rڽ kd .]"p9$€޸qZMee%VL&oA0ZGQN8A6VƏLee%nbȍ7ꭷ"Y\\ƍV֩T..ΝU 3omT'OܕN:E.nW,ciiI>733;#ᠲ.[|>1LEzzzDqww6xHןv$ILOOr& ONv*6o b?)utt077֭[ >}–-[% (yz9px(pM6QSSë"JzٷoW^ennVK `eFg,.&Z-X>R^^~[y^Jg?YΜ9ܜ(x}v;UUU"ST qRմ^+W$r?.111 O<Ć'NsZ-^}knܸ!%I' qe?t'OY?<}}}c`S ---/}IyZ-MMMS[[7 mE?=kD$VWW"gr ad2Ikk!Yf u?9ryyY:t:0=zX,FGG{ezzzìr܌lfbbB(Ǟ={#322‘#G8th26ūʹsضmP Ç+"$ae@B">O$LNN299nt: ;AdT 5Ć=*TeGR^^!b*199O?́P \`g{nz)~?a1amۆhrF}Yؐ ]8&1>>NSSmmm"*Bh4JEEǎCV@$!ϳe˖ Ik+++ F#byyW^y1`mŻ+v;j)oruBH_:cccHObH2w_:UUUG&n[P"$We˖8fY rr2$s='z{{) TUU1<,W J9\NŗBo'3.ŽXbYh4 QZ]]ett(:(+BpXGX2###3: s\fl6>0O~}bn(l۶ *N;L y444ٳgijj͛yꢪ|>/@ SMccW$InynNY#n׮]vΝ;Ggg'Dv UUUBN\H$X, ٱcWᶾ$I[lRZk׮Aee%&w}M6!IBK'J/\`ׯ_g3044Dss3 +OZT'q8X,q OXdhhl6Ν;EV/UUU֊MaNjnZ\nʄj655t:8J`0P^^NYYbUQQQ!f\vMq8L&n7ٷo8HD> S֋NYV^/^Wӷoߎl>܅"mmmc0x>bX,-rt"Zvj;vt:[bfٻw/N|>ʊzTWWpTWWSQQh4lقb!*jz=n!fɲ8dЗ|D(zEr\.NShyeI),--1??bzh4ahhhv6Z,Dxjb '''YYYv4 555277G6G ;v.z<~8K>*f>βΝ;+|>ٳg'Sijjj͡CĪҥKr9>|_ BĨX,&4q>A[$)//y {%J&v! {P.A)QDO=*M!a J'_D%JNX  aؕJBD%J܉@)m(QcFɰ(QcFɰ(QcFɰ(QcFɰ(QcFɰ(QcFɰ(QcG~z$Iw8D%J|]$ X,R,W((J %W2%J(p|hX,r ߍB ~ FAѠTE%J( ˡRh4m_,8gQ,Jyj5JNI( (U҄ħGj t\.wW_^/LhD&3=($PU4`YNq/~39>ޠcS{ݣ,X][JP  ,VյeF` .HH:Z6աk7\7Mo sR= ,Z@$L&=uX&񷥥UQmvzl6OSK5X9<:r.mg(13' L&5eXmG>F# oѹk[ć#3\T*PC^ F#j{;( f#?}w~_7^ȳګpma6ʟ?7Mz.T2iϱV")^;)J5ea }筫|+or d/߯RQ$pYe[3._}?Ų6}koXO3>6ǯb5!I*s~soPV$`0gy3D"ũ7L%^S#1\d2~JP H`4h4}t*F~0[(J*nX4Wdݛ3ZJŶPCL/T(SYf6 \iFf1 lD bmvusO;U۞3 Fs$cϾ! _}h$AM]9~$j58] \ pFͶ-t܄$I M1<8nfǮ6\nBIT* + x^'Lejү|t:O?|-67Ig bHVnxX4yR~N}sOU|G V˿"opصb,\VvٌB ׮ ^OY|k#F6׳rk{<`'I7P,B]C% E0t|M5tlmBV!I׺Qڳ 'd)fsWزI&|fno|`QubH*BP|Jj3cP Z4{sW;,44UNP(r}r'G F"gɟ'Nc4c~˘:-n g{i5lz׍lv3ͭ5-Flkw. y僌wtlmG~%63S>BQO IZt*{_[OP$Lc0 b/| I;,hu7N5Z5ZFbd%f{6M ؼMw^h4\]f~;f99V ߦPxoy筫;<˟;F{&)%n\.\?㯓Neudy|f'_ծ!.eVkʘF5u0=eS-ޚ2j"OP^᤾ ʖmM|[gŒLOY FzBN! ::(bw~ر0[](6\lnIRט N =fgd29$i086rmlkd6N>/)Pm_~i^<^j+8EfkGɅsU,x**]=}],Jt_/[.qBO}מMOHE /0xE Xm:?}]`jO&e( -|7Jy X,=0"ff{[J7?;40byqzʓ`37rAG2…b3;_S:wmZӑvploy&/GۋaLOݯ :A~/ ߷_7/y8V<+xUjj <BaÆ.+Q'RWL~3'35@tG&) /21>,4r[WF9|H8*K,V#lHsk u L140gx:0[,x=l"W/r[]?/P^d NSUᇾ|*7U\n׮3<8R`߁- &H$Rf``9JE_?H:evL>UŒt]꧐/Yvk(I,-PuӱeZQ9dm3p(F.WŅlv3JeX]qv~:^Z\e%aH PCw7R>ͯMѹn,%<+?i]c0< H:ݽ]Ͼs_7hj)\w"\ ytz'"3ɢRn4`}tE.G*Jhu﹥^.ZMhs<|8/Ǘa]O'S,-ĢIR }gǡw?B߻vP`) rZLw?_6 Lx ꦑ6 YJ < N/TSڃ4dP;"26KQm\W ~VJey=7ӸY%J(i ݍ 'f6֔DQx/hWңP,QDDžGhH3R\GW  J" eBs}g}cJ(Q1R.;wxhܺ"\lpW+'BBΠFRT(pz,%]DG((z:Q-Ņ+|@TСTUE Fް۫V0t7OȐk{( ƽ[DFTBPtnB:wNRFm¤`[sa% rBQ*%z 5(W0ueKǘ(Q{@*eS-wH( fӓ~&|;ֶ#nEP<#J(Qqᡴb(!YPR;~+#E@3\lѳe{3-jQ*lj8lJ;qm,.y[h45TV|JeEjpxT,IkZ?5Ny$Jg9a!˭^~\\'`K<bذ+n5v\lGӐ9Mۮܘ(pn@~Klf4Z ?/pXFd9>=Ey]{ .~ٙE(pV{[FVٝ\|q&&&XXXP(044؍$Ir9T*hiNmﶲ72ڽ@m$GTY/b2V8 #m ywFPж]|gI&/Q]$HMM ~L&Cuu5$BP(mɭ$P(4 jSN1334}>,//355EYY\r/ 2]XX R(b8|D"'\;5 ,..2,YJ&DnkZ|@ NdRԁWbD"A(baaA'L\x<.ȫUBW^{C? *Qѐd'S|s( \Y&b[=ޠhma"[fϾYa4aÊN*ڰZXV<===$Iؽ{7,--Q,iiia͢]lիl6*++q:bVvټy3.\kΝi.\@UU+++ݻ\.G?*111A(b˖-X YP`ll@ @UU\zHPѣyyZ-Db2 … Et:;vRb1VVVp:d2Çtkfڰd2D"$ {9_.P(O?M__mmmTTTp:;;Ҩ8xl6K0`0l0nR>O>;`٘l6ٳgfdY:::c߾}zΜ9͛GVy' oH$<L&ovI&TVVrAz{{NbÇs 1E?N__@T*E*رc\|#Gh8w$NO4;vpiF#ccclٲ6Ν;Z&qafjR[[hdhhG$ȫ*dѣ?P(l&L /J8s T*SO=$ILr5n7ׯH&455B:&QWWG[[P#JyghjjbffMMM ۷oG$ Ccc#z;Gee%6m^xPTLLL0>>`466'bH(d2}voeLLL`8~8۷=FC}}=ՄB!VVVH$q1, XV agϞɓx<\tO655kFFFp pBaZT&w/[5VmyP(p}bɄX,RVVhZ7˅h$LE111A,CP@__;wpp8y3h4rsssh49WWW`iiis`0077*555dXXXr9\8x^T*XL?B ZWmm-8FGGillyD#w*wÁbahhPGj+6;eyul6uuu Nqxt lxeV+F=φUAee%Νc||a\{\1l ʩw*gd}J`8mP`aa*j,)tJF( U߰uV"`mUYY$Q]] FŲaEF1 TWW'H066Ȉ0CCCXVrt:Zϑ#GX, c6 ּ^磮N^]. w?ᛇ;[ٻ'K{9~>=7SPpgAv1L(Jffʨ < TToZftt\.Ν;QՌL&پ};\.fff8Nv;6M|fvjP^^墪 N':ACCNS<\ElDvttYXXzaͯ[h0\.癟VљJ%& %,UUUlٲe@lI \2vp8L2]].& Fd2H޽{ a^,JRbzBOuu5MMMx<v;333DQ12vT*333dY***Ľ+++***ضmL \={`)//r9 G #ŦM'tUss3͸\. sss.܆V+P\.0---x1i.N`0b`$Ibzz+WIkkH N_t:afwCUUN7ng?g Ü8qJ%}}}:^2baT*---{A6 ˡ/`0(xnp}\Hcii RIee%eD"htw;|)jjj6̬5|XDTP((-033C"d2oY{;GXd~~e%? LYP(N8>=3ܹʙjZ$B2q4 `V+ڮc4 ]ifgg"]xYCVfOfY~mN&Y.u䲗H$x7Fדdh*|BGJ. d2 qݹ\|>/ڔR!?2"u}2<<̥K庯:Vϙ3g+o</Lp8DyaGFFf"A="oT*qd2^}UL&;wp8FP(|E\pm۶qUbcamqIFNXԩSEG__|L&CEEņfowjr"fww7:m4>}:BT|Myy9$o~7nuV.\@>ߐ'Xo?{;BK.yfo~0$}Ϝ9R'a^T/hmmݐH$xPww7|F85 ]]]D"٠1th B|}#U(Ÿ*K0Auu5G^2 )/%IN-ZMUU---t:m*;ˤR)_,LFC)++ pHUUg)D*"PVVF>'NDeeCA0ͬxp\F\.0޽l6ŋ78zqX,& }MMȥŠnumyITWW .ky\~ik6yٺu+ϟg||p8,2 ^d5{KZF~2|>XSS jimmsJ'OZÁRn$(?oMMISoٲo>g$멭eppP)jl楗^b\p ?fĉB\aZٸDh4*dYpXF^/}}}brQ]ez=tuu #$,y$V*bFF>P@VݻUcCRxp޽'xawmhl&iUUϟZ-6~***tt:qV&''D"1l5<77ǖ-[x=-Щ  vcTUUP($dy*n%‚O6VP0|xfEG:JC>e+(' زegêX,brOX|>B@nO^ \ccc$ ^/LF\sqq.*mƑ#G(++~mYP׋ɤ{r]YY! ,Vn0 !tlnZ ---x6$ bh#$@cc#CCC,,,j" LzZb1fH^/buyk%Fd YS_& /@yy9]]]Q8 Džm< >O"~zzzi/Fl0Pbp0??Ooo/$aQ( rX%S]]J"zt  p}}0x\苤iۮ)//GVÇ1 BKYdN'x|އ,*k |>/|YN' J]]6ׯ0LTVV6m"Ns52 {>!Z~w2 D"݋n'PUUadIX,RQQ!eW2z^/BӧO322bx<h4 mMX3== 333fv܉`'>MNN "YET R[[KCCݻwrX^^ Յg׮]BKvy^^e}VjehhH$۷oZ8&ߖ1ʊZc$Z-6-'͊UN,X,&''7du:N$I[( m6x"lݺZחZɤf|EEPiX,4662??/… <쳸nN8!,_S.7Y$MnH&pPٕ|M^|EFy رC`gt>nz^ܧxv##o QuC~yYȮ5NW:B%E+ćݖ'.Gu{qIDOM%JO4%R(@\9u=$o-QDO"uP(ٳgIRHݻvHbHWWch%J|2$qf3/Aw+++Bg֭y"~\.AZD%>I.'}ԩ di"BV I͟0ryv;R!طoc]V%JtrBX~1r6lAZs(XKV(J2̚&Mi]YUNO.XVJ&D(npG9K|lVZ/ 6di4j$IZ37e6aMdH$EK(QBeQ)o *QDNR? U;IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/paje_draw_histogram.png000066400000000000000000001076471507764646700260460ustar00rootroot00000000000000PNG  IHDRgAMA asRGB cHRMz&u0`:pQ<bKGD pHYs  ~IDATxw\%a! [E]Z]u[Y[m݊[=W- Wr}ӔH"磏>ޟ; C$IA iA4 H@$ IA iA4 H@$ IA iA4 H@Q P>8p 77ã?ٳ'55jժcǎؠ!o߾=|ׯkԨ_߇:::fggݻʕ+keϟsNڵ !gΜYJoVV  P^ݹs2bĈGܹ3}[nW[_MyՁ>}qa<==޾}{ԩ.]ddd:tŋCvɓ'ׯ?n8333Uݣs̥K!O~!Co>s fkkK;wr֭[0ҒRPP[n4lpذa}v^^^~rss[~ 9111{TTYPPаaÌ!vz!ǧSN<}oܸuwܹsɃV`YСCׯ_8.((?ܹsͭS_]z]h~rK5 G%ATٓbkk[F /<azxdǏ.΄WWכ7ob={ʊill-ϟ?O9pnС<ٳL*\$,,n˄~ֶ^ze˖4hЀ]vtRz,--7o|XX+!ٙ~ ŖtBHʕ}||166UԽ{w٘{xx;BY5*UDqssKLLy~֬Y4K011y>&&~J*]\\"""Jm2. 87T48qB&IR___ r=<z>}utt22F T2w\znZjVVVgϞy^Tnڴa>}Ѕ=V[laaaRTQo>B !tκu!k֬OdffBF9jԨb[;臫R|! M [hqƄXI߫П'Vǎ{]R1m4t֭H$}UMJ/_GDDhE5ͭiӦmA)nIwUTPD)m~JMMo\\\|}}ON?k!.Q$?::ՕtRaS>>>ꓱӧOo۶CժU V)b#Mw2y߿ 49JZΝ;cǎmڴU:uN:UmVRC?K#zJIuǎkР͛5jdkkۧO .^i?jՕ^zXYyzzzfffݻz=G* &{̙3SLH$+WSNY5I033+Bs?E^~n:&&&niE_-F>}zpp0˲EtR&M_VwEFFqzC^R ;;;BH'+m#ڵk߾}ɓ'WnٲcڷoyRTzm}PBܜa;#JU$ P}:gΜ䐐 'UիW'>11Q}ǏR;vСCG4hX.z X[n[[[_~ѣ۷o߾]tkB?Ts9Kڈh-޼yshhGiccөS'L&yfjjy^zռym۶tC*իYfW=zѣGWZյkW???Eƌs…k׮-XUV/_, qC 9uT׮]CBBmּy󘘘_.O?ݵkӧ,Xdɒ5k֭['!'|_Ї}W&MJݣ]իiZ&M:|۶mmvر^z:uj„ E :[޾}ڼyb BH:uUI&8qݺuU]J sO gg-[ 4+Vh޼9q4puuݹsU~׮]zz+ٳg[[[g ,8qĜ9shkk;{lk(/p [ /OIII\x:f]^~]CuoFWH1559siii}d611ٱcѣ޺u+00ś3gw5j\vMuwKٳgO՜{>~Xe˖7oIB1}tSSSU [B رcҥ x{{{B5W-K:'00y\^V-t#Jrʕ͚5H)ɓ'jY^OƦMOϴ^`RzE_mڴy~큁g6qD<Ϗ1bʔ)[nU%4.]R-.f477駟lҴiӒ_3ϟѼyhտΞ=֡J\\\``z^rylllZZ}* ???_z~˲,˚dff>z666]v~BBB)|Ⅻͯ^bYVqS333M4+BQJUEeee=~^w޽U:=?[֬YSUJŋ W ѣG\8111..ۻuɯ^rww3zyDK\Y}eVVV͚5=SlSK___<$ ٵkСC8zA\\ϐ!C;4&##A_u@@W6mڔ.Bqqq MLLhԨE0\4lݺUu@IR*,,؇y^ ANNNwBvڕjmiӦyΐTvx$}rORA|\T*tɓ'꯷.f-߇G^T[A[J+WWf-B/ SDz+WY8A^z5w\/Z'j… ߩSϟ/d;+V8~x#G$mϞ=E({ӧO-ZT;v8{l) ͝;m۶͚5/ }=|h}]S(ӦM{?جo]t3mڴR!C4o޼M6k֬|xߴiS.]4iү_x޽{ e ,>}tŊO|811ǧ ]vuIIIڵkԨ޽{_|?`z7WI[vիW-,,z꥾ .ܼy :00PΟ?8@ҿbBYDDD={e͛_zիFFF Rj)p)?~jnnjnn>dȐǏݻN:l9///$$$::Fmfdd:t(::ӳ_~vvv,\.?׮]K9sO>'N8w\FFȠu=z...{VܞoEEEU^} \r>} ѣ7|ׯ߫Wcǎ...C/(iˏ?>uBС6߿-ZܸqzرGG|xW² {T:ekÆ 'O,e!!!3f̰433o2336׭[fOOOKKoF;v޽[6?k۷oGкW?ܜ2$$οpӧbbb+۷o7o^Jlmm;¥ _jFl2__ߔo]Y5jӧVl2?zVZCUuR(ʹsҗnBB>tݻw1AO~!gϞ <811Χ?V^}[l!/?󋊊4hj޽o宮˖-4hӧOkԨqرM6{92++rK,yvt@ӦM_XZ5B0oBHJun9>>>;;[ue˖Ǐ[XXܻwرcǎKOOOHH]PffflllVw^% BR8Yft~pppVVVoV?99::*UD8-?~iӦ TXZZu kk븸8P(~>vK,AN:% ߿:%=8<~y׋'¯z8::_NNNoɓ';v3+WGyMjjwh{%0¯B;v??8qW^ 4 g#++&WH.''GpJ$O5.vRTUDBʒJ 07E|䘙L&>ػr~~zS3++K&:{nfff۶mQ_%u0WWW9S(ꝟ0z6<`UQEoŊ3fxٳU :A;ܢ<ϛ7wޥ,ŋvɗ/_6nX[(eWWW^xATH? &xm9,J/>ܷo_I߿233߽{GƲ,K;F{W[vww?wjgϞ?ݽgk׮999 wըQcܹ.__\\\T_=Ȩ&?~k_,J+WS>,ոqc777?sکϞ="K]I&׭[w^\f[:<ݻwp78;qą *W\=z8x`xxB8vÇ;wbee˖%&&/]R~}ssիWggg͜9޽{hrH&uyݺu)))9996l(YwwKfff~Ŋ-[/{1J~cǎt={\poroLLLDDO)"kӦMJJʞ={ ţGmFbu֭[gΜaYΝ;ǎѣVQҖw/<~8//oƍ3/X %%%55uG7wɓ uEO8QJϫ櫾¿G]̙3ϟnoo?8;;gdd!:tHHH4iRNNʕ+I9__R޲(HdժUoʕ+ӳ%myѣ,,,d2ٚ5k͛׶m[[n=m4BzKQjբ71 秺bԩ-֭q 4םi,Xо}{a5k6{l!E633S]Ϗ.vNNN+W\d+W '00PV.Ek׮]pO?dll_6mTPph^dիΝkeeW_kJoVBHjT_TǺnyСɣG8.88rtt$̙3~ڵ+quԡWKR??:zj˫aÆQQQQQQibx7.Z_۟UeggSb|LVo^^L&s`qG-YãRPp'Df-i%gE@r`՜yk׮mnnND;hELZ_+ ekR_0x`ƙ-X;x`W# <PҖK)vۢ xw(h$SqqqW\)v6MKu% s.bxb0@-A~BPԬY؟~pG u@@@#i9]E;+$ sqq))oeccC ~zyEeHIqN<(E\0"H,&y^wYh㢣iUG> Èr;| ] Hok0Gi>333׬Y#6&O,& L}Ҁ@ER(".=R5D1q ÈXQTXtTc'Jmmm>HBVLi9Es\1;;ׅV^3R8|棏gcqjj*)CJ>0"Wzzk,T*Uw( aee88###MennD" JeYLi9Yq\.H$kGS=^wP( D\v.˥R)VYPAWX?Ç^={ٵkW5ò,zOR$T㉞϶m;95U~fMN IA iA4 H@$ IWcmONˁP- i Y9rIQyA iA4 H@$ IA iA4 H@$ I"3*J <*ŭQ1 #"˲wv,5pBDR*Q!<3 cGDDÇ`bд鹹EORxB/ÇVGa`I˲6Ю,b-q%THĜ8N* ÈAqS'bEq_8 Je;k>!,v,jzU!Oh9|4.L,0T*4D"(7r[L&|X!UT*ՑMI ,iШ1 0,4]fD#.B nEq)!VX~eGDDÇ`bVe9h|>1i IA iA4 H@$ IA iA4 H@$ IA iA4 H@$ IA iA4 H@$ D*0tɎuֵE$ ͳLOO755ϟ>}z.]?OٳgO85jXnʕ+/_kB`?н{w՜ƍ7iݻZdtg3 XH X͚5gΜM_~gς Id6lOHH ̝;Ν;ׯ~,}.JJ'srrlӠÇK$`4fbbn:Bȏ?hiiIg盚j7>޽_޻wNrתUgg%K1 Bϟ?Oûw&L@zJ쬚sر/vz%@S/^vYΜ9ӿʕ+שSG47sAAA9Nԩ͉|}}k׮}i4<4iP(tz˗Ϝ9LqM߿ΝAf&$$4iD5BCC ="++ =+Wܽ{EΝz' 111&Mx񢉉4+++դX~}sGNKKZT2 #FU\Q yE\)9Ӵñ,0L^^p"STrDzu<_hN~~v/c(V!EBg=e5]Qt8z+󭐌NǓ܊w##CXzzL 6 P(4t[[[GTkbFF݋8㌌4uDKjJ%˲2Lr, <DwWLMMw0P( T7kT!RT*?gddTDϧ]޽{WtaYyY'RwŠq<133+G&.]J,VVVK, MOO7,<<|ĉ8##gϞ=x3**JXtttnW,zTq̙*OuHB>|o~=J}Yhh(I"<<˗={DhI&ׯm۶gϞ%3&$$D>ڵkצM-[7SN ,pqqAhѣG<8z?i&BHv֯_WV0LHHȹs={vȑM4P(jԨt}^v-""G4x{{OJN:u M N5RRR---իӆ ҫMKfܼe˖'On۶m^^/rƍw;V("H^'J;™fcc3iҤ!ѳgςB󍌌D}k=&zٳgOB]]۷owttJ $ 'BZYYi+ֲeKqC]H [zz͛\jgg׬Yc:;;k=`JԩSccc/^?feǎXH ثW駆 ֮]{ѢE7oz,$ ,%%D5awwDB`|}}߼ysu՜7nh=.0`SN2e޽{ !s}u߾}4@a}i޼E>}xV.!i0x...;4 i0l 6M6q@=mȐ! cii@H ĉU)J77~N: ]zU7˲W\9{BHC&k֭[߸aՅB0, )']LM;_y^᫜O8RP#ÿ}6!!h~0lK,Q|Ǐ[2H4k4GDQ eYR)bENTfFFFd2dF*\.H|(BhۉT*JS!#4'qBd2;HOr<1 x"ߑdff>٨QI&mÙ6yd5DmӦM-Zݻwdd$!dwE $ ,&&fsҥ A.O6-))I@asE5iajtuYZjuؑe}̜9˗wҥvcL^zjj*q2N:/^ ٥k=fkkۮ]˗GDD:twޭVc sNccg97nСCPPc!i0`999ݻw I/4 6Zvc!i0`,ﱱ3߿eÇk$ ѣG!!!=zP'O\]]MLL I333ݻرcghѢFڍU^ɩSciiP(rrrEZXH [vܸqczzYN:쬕@H ۅ δy… ڪO_x~UV-Oe"˻rJhhhXXSݺuIJL\avڵkW^upph۶_~YV-]B9G%8|Δ)S֬YӸqc[. ۲e<=='O~xzA4oܸ^lɓCBBN<XH ˗/===k׮#B`sssUs8)$ 7 `Ȑ!)))?sĉZ]}aaannn[n555z r `LMMnjCYdNLa[p[xA/h5<PEĤ=A8N*]+ÇI@x&]}SZ;֮]ۮ];7h?{ׯ_LJJx񢙙ٜ9s I{/ƍ'JU3wդI___777-B`\]]G١Crvڍ٩?FlbbXH [VܸqcPPv!i0l'NP|A h=wݺuvڰab@q\J>P&<ϫff„ Z͛ 6433:u*˲tŋk֬ibbҤIODشiS-zIYz}ZHIC\\\֭?'N߿ٲe.]|WQQQҥ N9h*&&fsҥ }˥\.6mZRRc>E>Ebjj*f\e9HbO? +To;w466>{,sƍ:hND?I޽{G=nܸ#GX[[ә)))eRSS kzKh<ϋԑH$"֢#At8M0dTMI/SQNI4ZQt}z.]?OٳgO85jXnʕ+/_kB`?@D5nܸI&jlL^՗"^?AH X͚5gΜM_~gς Id6lOHH ̝;Ν;ׯ~,T7Asss[n!sq=~Dof@`؆@ymٲ]4 BSRRMMMuIa[l٭[dzM@)I&߿ʕ+]饑څ=>>^^^&Myc!i0`rL":˲,j=痕5o޼xBHnn?Fi=fffnݺ7oޜ;w|rWWWF7nPx{{ܹSGpm۶e˖111#GܹmtIKLL_ϟ}ѣGܹ322R뱐4N:dcǎ1W^ 6|c!i0`޽#~:))QFJ2))Lp!$322?~lllǎMЪU+™fdd~z\2vXBHϞ=gooX8`تVdd-?~0v!i bccCCCϞ=믿"iINN>wܙ3g^xQ~AnA끐4.]:uֺAB!C燄=zJG4 &:ujܸq=ӧϰaݛ@ydSN>|ZjAAAZ0:tPeq<Pd?/A iA4 v.*E%e(zVβ+ͧ]"*D//n12GȊ%-Q_Ox—euu0{4( M@U!JRZJH8NPH$b8;qqz T*Jޚ6+eY4-Z*^D-aYVqV|nYPhzqqBB90E}Yek|"b`IF7,0T*4D"q q<ϋX4i@*2 ъtMfI.A4 xT4q2T yemЃ49]kڢ^ ?O H@$ ipdd&S˸<\6}F$ Pݎ{ϣetwrդ!i Ikz00F0x/S $&M'AW[q5e?Y~w I㒲y( ~A4 H@yMõWy?HWco&yTMe15##_{9U^LA iA4 H@$ IA iA4 -_r7rNWDPVUfRw?O H@ICF=zThffff5Pf4DGG;w{n///} ^vٳg5W@k*5+},ɱgr9=k;lY䟡m+.Ah4DDDL8"W^0J$ݻ ϝ;נA)S̝;oE+Tjҍ@Ґ4IM'PE]YW^/6lPtׯ;vѣG^^^.]ܹэ'4ܽ{ve wގ;8ڴiv@%9 UTٻw/J|wwZ^~~C5~۷o>I:dii>? @`ҐlkkMNNV_`ҥOV3z贴4!w2R JaB!J*֊R6%R<'or9q.]8eaM˩T*9cYVr*J7~X[~~v/cht i4RqQՠs,'Ie2Lо˲(:q3322:]T>Orss?񤃷4j҈[YGJ%/a :j@kIkJ,ʤ2B#ڨ/`"6s3T*R^"㥬?W6I̕_+BOU[$ ZriDRg999 l۶PGܵk?wP(4tFQ*_Maeep88###MG]nnD"155t,+4-'˲yyyVVV"}DwWLMMvTB((((m T*Jܥ,5FFFXXXhOD|UJ_ݻwEgey[x"JML4_Ǔve?V}zd7Ig5>>^5'/Rh "BHOOȲܹٳgi:xN:=-wO8p`ҤIgΜ,޽{ݺuׯ43իW;w0QӬY3[011zjϟ??t]v13wO;>'''3g LFO4{ñD"q"''̢ݻjxD& \+/__[u P!LWhW\p³gϢZ*5dȐ.++K3y͛7—E/鱻 JVfgggeexRd"0Ǿr'JE3???++KjQ(DwWlLt,+[-'!Dc?|4Ç53 q<OD\V'z>+Dse|6RA.oٲ耵uxxxxx8Ӟ={P-E׮]PF[ԭ]E%33s&&&QQQ"^Ȥjժs$I׭[נAT+@#2i2]Ӑ___v4p`5kdgg9SN;wS7o hn:m1559szgS*(}4̞={Ŋ_}U>}\\\:?(~AGidd4p@sG---5]K-R999T*-R+((`F[xHiZNr}Ǭ۷odrrrڵsH$Wφ2|LMM5-CjᓐpBcaaQ>,D齲x 5k$$e &i8nڵ/Zjiuҥ ,Zj—W( È鶶":Rꍌ ++++sgdd͕H$SXd"K'.\\.Ht|EwVx]! @aH.KR!qYPBD<8AB~"zл>8|gF!keYVe9HR}`=+UD4'T|w޽^zpĜi\D"yǏKRWWע\|?P*GUe˖O >Ow(|Nwiر'N('N7sEOܭ[FFF 6|1!VZ8;;/YdĈh L䅐{{addTRw)~В}݅ 5kF֭ݻ,Yrر(KKˡC͞=MP>L*Ut͋/޿?>>ݽ~m۶-cǼh@?Nԩ׷vڧO0a|*Ýڶm۶m[zlI˼z{֬Y4229rɓIHHhҤj1//B/_LX-.iW?+D>⣜5NtΈ#"""w}ݻwϟwuuݽ{wttرc更4f L?}ѣGkFTZ""^J9}sᲳ}މJ4D"q7iZN]-F.B;EM8,PS+:3;;[x9\e9H$MO|ӫ% :t(qAAAnZhQHHȍ7 9jii믿J$ƍGEEm߾oWUzzN*}ɒ%666fqiy^@ PljWLLL=A\}O9|NCq^3 >2A~ԨQAAAo߾߿jȑ#={vҥ7o|rwwwqqQ WWW+xzzE i8uTffmk֭曐B'zSBRnYPhzqx|q^+ ,i裎VOGqe <ӱps8SDhŲԿ c:|h+vʩs6 T>,M+ ,i4(B`FUTc[Z,*J4^4dT*bJ%˲t]'=tE>nܸ7775 J?Ǧ_~ر;w9ssIIIAm5´e=iXl~wގ;zyyBڴivȑ/ ǏOzիZ!,,lӦM3fjׯկcQF\\Z/_PSPPPȑ#'6lؾ}{yҐ9hРm۶UTI}~rrj699Y}'>}Z}ѣ҄V* 0 irF(y O\qeYaDQJ8e5-Bs{+DP )"?y,銢qG)NǓ܊w¬wicc#fXy^o_T*511x333;;9|d22/e1___kkkU:Oo9[lQ( |Eo¼}6BX>}B=xڵkk7Γ;w5ZJ&uS(FFF߽{7: >>77W5㸨(/ $ 7 `Ȑ!)))?sĉZ]}aaannn[n.r `LMMnjCYdNLa[p[Ia4~0lO i0l44~~~c!i0`,>|PPԭ[7%%ɓ'Z' XիT2lذ>}N8LF`6nx1K={!i0l,vڕRPPPPP`eecii|NND4vqYf:uY+4 .I4\pZ[46[[[$֧O3͛WNB`Ǝeٻw'>i='aDzRbnb嶴m۶Uر)S?@H ˑ+IVMZVY}yO<}믿z $ M  E.]j֬@H ɓ'JQ>}@x%@"H\\\õei0lVrJǎIa;}ȑ#[j@4,+|ayhJT[Kd

FFFÇ?XbH,, xl 6mt={̟?ԩS@@mvui=o5 ȑ#ׯ˻xC\\\/b7olٲv#K|nnn322T!¢vIIIZ0 y-BȣGT͛i=(~0Hƍ1cF``4h;wꃐ4VZ:tի+WwUBr#"i0T...[8$ m̙Eg9G46IIIׯ_oӦ!i0lcǎULLL:t\T(...׏qY|.6sŧzNN\tiFFƲev訕@H?'2݆?Uݙ?~љUTaYv 'F{.))ֶrʪA:88h1ŋE=zH*r:cƌ͛k=0`&M?nݺuO>=!!A뱐4gϞ,`ʕ+B[ni=fnnnbbR蝖ZUV&ٳ'99YfZ ! D"y͛7kcc󼥥g}FsM0s4i0l>{!i0lW(<`ؖ.]@8PѬ]622Rř?{ׯ_LJJx񢙙ٜ9s I{/ƍP]v5iM40WWב#GvA}\.]vc!i0`nnnSdccXH UgddB͛ū($ ŋ:tm___-߼yIh 4T@r654r1I%yaŋmmmI@owTsێ. iӦM{qvv^paڵW^ݪU5jh=yݻwϝ;K.K,!iӦ%%%i=\V;6,99`̙>>>wz,=% rхgff޿??? BSSS9duy!.==]4lݺʪK.jOTf{{:8::|̘1Dp#G|7^^^nݪU!XPה*Çoժ[YYiԛ4GRQ U+*JcccRqqFFFԈ:0+VG* W^~}FQFjߟh>ov۶m[lT3U޽+tF߾} mgɒ%0ƊĭEWNW%E]R]).ŕPt(\ Z#"#.Q9% -~,}p-O$" h|:>ŋުRjF=|PIot҆ ݻ1BZlyewnn;w7oN@Oe(((tqqz,' _hYp!!?… ###G]v-Z4RZuу211[njSNVEW^9s={Ν;Н'"s ͫZ*+WҋinjӡCy_Oj:q@^^Mf:0Ripppxx#G۶m+rQ!jlvyYZZO>K.Z\ٳgO85jXnʕ+/_kB`?н{w՜ƍ7iĀ_ =g6==]ӧnլYz̙ل?{,88X뱐40LaÆBܹsܹ~z{{{Bu477uB~GlILa;|˗/ !:Ct۷o!~0l׮]fypd|™lٲ[nzM6Op)I&߿ʕ+]߿XH ,Xcii5i$__ߛ7oj=&$},˲c!i0`~~~YYY͋'O>mԨc!i0`fff֭{͹s!˗/wuuz,=`تW>qDoooB˚5ktg ی3⋔N:uqԩ??_G4vڭ_ȑ#ׯo߾ٳI@EljXaq;("8Nt}D(RBIxe0#*-,uW!?ݗBcߝ/\)%A FoW( dD"rڛE(J4MJ%q2Lo B J5-'0 BDrJR4!4BuHeEc4%z2#4-C?>Xbcdd$|i&$BR' OdRYuU3J̿hr%S+ ,iШC3#"DE(ZhbpeOK=/&lS>⚯Dt0FYs<Lo`I<|B H@$ IA iA4 H@{4@y|R41~4h8oU^F{U I_a)yjeh~+I䂔j&h{ I4{*ϑ,u HmB5YŊG~A4 H@yM7{ǫ)`B ?O̙3ٳg7^d5@?ǞY=kZtLCnnn&M!}]ddd׮]z4H>裝iؽ{;$'|t֭ 4 @4\vCbooߨQׯ=ʭv͛7~~~Ioߪ/0k֬[nϩUV~~+Bsx_7p,[ʦR$<4͟S 0Ih}||>||Qʕ4 2… ϟ?'iiizB{[3g6jԨv<عs%O;wѣ={V^=[[[4@yrYrʕ+? A iA4 H@es _>G]˙ 4}**)óWr^"GVS֭رcPPԩS̬I&5nxРAQQQ,@ǭ]688q'NLMMJ˛7oޭ[W/bYk׮j޽{۵k׶mݻwfT FQCMHHHTl1`С=R w4hгgϛ7o~ߖBGѠAvرCÇ `-o>"ٳgO9:_Gv100pϟ?Q:uҥKu=|֬Y޻TA>ZS 1bjNNN>ܲeK:?22xܹZɓ55yd''m۶=zAZR*<Ϸnݺe˖6lXJ5V%Ce˖9::n۶?hԨQ֭o{޽ .422-=!d׮]tr޽[lٲe޽{K/pժU6mRbBرc0wQQܿlܸqOo߿ ~~~:u:qƍt7| ooosss .TTw5>ILLիW:ugBJ:|BΟ?/ɖ.]گ_?WW׬,O#GB&LPz-}Q7nah4ihӦ;j-[Uq󩩩R4,,QFۗ.s% ˲IHH }ݻw%Irr2Jz6l(ժUn۷oB222233Ξ=K8pذa<ϗT M]|Q4ԫWo+Vԭ[Q}dgϞ_R3[ zAG2nܸO>D5駟?^w&Hrrrѣ?3  -['ᣑo.AGRKw2l0722tWreJIDATUҠIÆ >(ZG?O\x#FϼvZ55~:!?ܹ3]e˖Dž ;rVZIz+`NN4hPR%B0:t?K)p?C޽666<(((hݺ5駟͖T ~ذa;w477srrr;uD';vSR1{51iڵʢm/ LUHݺuǍdtuT!nnnGQ5trrsgϞ͟?_5Gw˗^^^駟nܸAgthjŊ !z\\\޾}KW\YuDvttLLLY++7W?> A B*WL7[R1իWʕ=ڣG#FlݺU*yQ Ņ+1bĐ!CZl^tkpĒѐS*y9sj׮ML%.UѣK>x U{=!|MӦM!^GÇӧ۷O>QQQv矯\Ү]nVwG+tT!%tW!jװa ԨQCwÇtRss &>iii_˗5k^|YG4#''G[lnnn5ŝaaa6mھ}{:>r|֬Y;t۷_5jn$8;;ot%CSm۶]lYXXӧϝ;Th/*U$HJ*@JrɁk׮]jUffSvDQmsI^zf͢Ǎ VR3TһtT!ԪUɓ'C 3u]!~֭[Ν;yd ݻwťZСC bժU>_~ꮄ ܺuKGGtW!t]!-z*˲?ϔ)Swܹjժϟ߫Vy^GiѢEtrȑϟ?OOOݧ!iÇU=-4?11155G-5GV='22RLdd$lI(##tΎLKK +;##ߗ山O>urrTHddSI… /^TMrgggWR3T .UիWGsݽ ٺuޮ];ڻJFt1| ƍ u0 -tQ!tT!mڴ9}4["TR~h|'ִedd{|o>%,,,t}؈#w),ĉVz]+{Μ9Egfwdgg[ZZ=z/^+K*p}ԩSFFFիo޼!QFuET*Ν;嗪C;vXYYzڵk/Zf*F/TLUHnݺv޻bbbtW!7o411guQwGݙ3gTwOn4idРArǏ~Z?G#7oV{BGRKw2tN:}z衟-HFY`uf,-->Lg޽{6lYCCCjt =zҲys-]p_L%CtW!%Q;88ԪUK} fGԫWOz>BJ yŋqqqbTQR5k_#B H@%%%###//o2 C`u)ufffnnneZAAA\\h\r48p􌍍رu}$4kLš6m*9\xbXFBٳ#"":IA۩S'7B||MٷӨQE>>9887iD}I& 666eߎT*U>5kܹs ! m۶w;vڵksssG<}t Z[[lݺUHe˖-[ 4pttٳgzz={֭kmmݦMW^%322ƏeeeU~Ç9$ _xxxDD!K. 0j֭ןÇ6mZvy.\xZjW 2e ]r2dȚ5k*Uԯ_?ZH(>3)@ZhfΝw|^^!dذat~FF!>cce˖<glln:F&L`jj]h<7 4t>322z 5jTjxW*s̡󳳳tr):{zz5i X[[&mmm[3]&?}?ڵkWB*Uz>0۷o ˳8ydΝ:BwR{cEtt4!ZjEyVO9tPʕ `mmݢE[|P 33ϟ࠭ջuVLLŋ80zׯ_/\PnL?///==>|8sLa#G<{l۶mϝ;(ϐ4@1ڴiSn>DŽǏ2ݻwz9gΜT*o߾<@y0LHHT*UEZwء[ZZ_vvvAAA...˗/G#<(𨨨*U4nܸ&'D<۷iذjYvލwO7JdddԨQFh+Wڵ+P ЂϞ=yE>}Hi gL+˾5SSS-{{{-@yk@< iA4 H@$ IA iAL62IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/parallel_worker2.png000066400000000000000000000065241507764646700252740ustar00rootroot00000000000000PNG  IHDR^l\FgAMA a cHRMz&u0`:pQ<PLTE\\((MM%$$EE55VV$$xx[XXyuujffۈ=::ž̦áyujfLIϾ=:[Xɯ-,^<DfMw "3+oU̡0ZtRNS"DwUf30"Upf3PwD"UDݻf̄3p٭`zkwߗ몮bKGDH pHYsqFtIME *:β IDATx p28-$YiFNGKmqitBAZIa7 ;;;S7|͛7o,ǽ , a]"r+h !C| ["7~ Iwwa>%" :alA&Oi LPS+'mHN6ǻpteldmw%t삧_LN$Uv"eͨ%I%t6ϭ ,xlr |˂6v @&ih!3IiT2t,Ȏs~>+Mhʁ@v-fgw~F{)rja?Ze *9݃hbzitG#~#%~ Z㡰gfy9 M&}D(gy> c\I,%^ \\Vdw}y\榏̒y[!;F!63=ƽܚ\!Lm \m??VH6;ݟ6!v*gq}1or)c nߡϭ#|_ n{#&|D _t>w|~p[ yM _s?)*}N&*o!wĦ<%KAO xLG°_x"x'{^6XޗÙSx(Sd <,;3Ž~-zNv^3܀I6̽Y`s%s8F6c{zd-*6cd [l=_g,#Tʎ,.>M&SBcw>*Ǧʫ b]`!DZiov &KX%bۨ2k0ָ49n؛IJšo wƚTpϔ{P(}Pm]VfMWo4%kDIۂ(vkqUdJլ((7Em6#Yl uǽ.1$Dlti{Zu}Ngq.õ1z1P\{. ϔV6Э989FhëKLtx='8'N B9xG+IxX t$>7@hn@tPsCs_J!8 U`~}m* 95|s}avCrK{%85m8wON.^\2x-kK@thW\]5C#w 嘝 cKxVηq(J8hw]$(=wJĭC$H#_EHk6 db@gWw= Ze_\'CڎMfOϷ=a;V}\ /}׺v]FVrQ>ם{^EO  nJ9vRwUGxu'jm^jٝ^SIrb $@hH!KFG񼡮ks#zskndB 6}3T}E[saU밢^&(6Ǫ5r/y|l%tEXtdate:create2015-12-11T17:42:58+01:00%tEXtdate:modify2015-12-11T17:42:58+01:00S!(tEXtpdf:HiResBoundingBox87.9613x81.2432+0+0(tEXtpdf:VersionPDF-1.5 \ 9IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/runtime-par.png000066400000000000000000002177161507764646700242770ustar00rootroot00000000000000PNG  IHDR1t"tgAMA a cHRMz&u0`:pQ<bKGD pHYsqFIDATx}|\gy'@)( L5X]1JT1xa78&e Kw[$}R' H6NOCCnpMgjDAjǐZ$J/q߷u93s^6ϱ-Y:s^뾮Q`0 `0i+`0텼 ̏` j 1 `0R <2\ߛ$ď`Z#1 `$JA$+EI#:* 'j`0F\((k_ yr~Q7dG|a0Lb XeJR@}ZXI.JP4JWSG3(BCGp `09XL$@MGHNh0 &1 `Eyd5C%4^ 佻Ic0Lb0 (~^ 9F}^)"7%pԆ`0id`8!Ѝě?`J^%yQ(KF~VN#/:5=G&PVϿaC<0҈"3j.3d0V L6I9~Tqm퍶 QzM=;S2辔xx˸ y?ˏI #|8U/]gg7\21 |m-L7P\80̠)X_ʼn|2hLh8rOW,67z7R>0ܒEi$0a$V"+׼J'pKRle5RGc5RhCk2pB_n?@eI #Qo>6T>I~Qb9%ǀk*OQ&c0a+Gj_p6*BҤ9$F"J HLb0O`n,W爐2i1  fE}gpM&/I!/kKoXX\ g* 2#Tl֝9܉л,t k+I HK+OrNc0R'#4k<(L^^HҞ>p/:-7% Ir2qFx`n"ti9:I #aJ98 5Kf# #K`z31t~yX1E2 *ݒI*̞s2q`mFL^N^?J?ejQLsdMg"yg0a$ #̱^|rF YC^ϸ-M1y!>N/8v쬧`Ϩ.fy ͟s[+3ax@"2!'9 On*hi#U*s{؈k;2F^VH\_[K8BIph=͆YJ"!ZSS˘0DR+T8L`4ּy8v4*[wϪ|!k@4/ D$EXCNJoJq&u8َYF%nU.C'{wolb; ZIL0j5Ki\@/pe8$XXIG"/I2p׌m <'P3"؈KQ&]rH3 9]>|4Z"A}!2[]cGn85(a0MVPy6 \#h2XXq"'oF_FR楌Zl%g a.Svf 3mM>7*:?ϟF'7YekvƽC 4W$јDląt|F=ꆹ֧ zF.܂:W99%'u?wF/AN-&1 ^I *lj[{:#aJqưg~wxg=5(zaVI&<U w:0ȢrjdI BY crA4%6Qɐ *69Ht@y4p zTƸVpI'_>eF 5h/ +#.\/aU5q:O?$(} LpTTt~28$fUC `®˔Aͬu0lqo u`RafH6#s۬˷XqҬcƠ&zurT:Q,04kD~D0+_WaS0N3aؑ\Fbc \wa6΂C=G"{=<q&}qjVd-Lbҏ$z 2X?נUaI!րZG_z-u#/^mFXFѸ.l(}*e*I›sP2!dSpu#=sI^$aDIMtciyJa8 \1d{qJ~Iv8+ʔ˓a1aMyoeuaTYP/ RjGeXّĚ07zSA$r33TY8mTQ<}ƴ ]l^'>#S׫yɬ:`yɪ5]NnYl7E_qS$n^2Tʒ }OXS_w<m6cb 4۪yee>}fWϕ+[KiĤ-]mBWxawBHPBY iͪ\j?#N)| ذM6[^H!s)%͌oW+Ohg'0'a$vi^S kfn><ڗ$&H❲ {/<씗\N"2Z]Pi9z˽e:ogk%^NN{2O~J~HC&.]͙jKnwFb^=4V:$3Bo)dCm2r !ݑ?ڊ9ݾ^GoUV9RI4F%;ŬY2C`hsm$#9/XSް +J Z.&U'>4K;6{Pw I%ۼ(&kd+:FPIᷝ2{Ĭ*-hO͘3#%zA3G J#d>1>`WVlX)Bd j=@3N{Ţ\!J0b]l?(l\h3${*OyAqQ\;OȽkY#SCREI{͆e ӯ徽f)Xsw{Km=FH̨TX N{Y!9w"1yQLbr=@3 H.F1H{͟MKRo 6 / sH>[̟=uD1Inct8s=\ц{ZxOk7LA4k tT0a-S52"qV0 Vhx%'M*wmSמ5N .v6;f] @t}*ax6ĿQa=˓^MR~pwMAMH'H5H#i^~p|W#pTxx0l7~".W,c^co&y*DHؑ!Q4n(HEKEҊ^Kh_@{uc?vӿ||O]px}*G]%d+|\:eFD(?کx PJ{Ss#~Z*6?b'"UXdz0jJ?>~alVf"!s=w &$[_Y]Z/z@ʵ5,?m6#qE9@Wn")idxYNm{NK'P̓:G\D]64s#6"r 8X9ؼlG@x!xtȔVȔ1>7@8FQI&p:#+or}<-Af|<-IUnd,c,EyQ5|zlGS^ .{#c5৬iΘcĬ~n%#j[%X5zu[2z$l}ԿԻ in1Ml;7H#VN$eǎ9^ܕkn]`ȏ2ϥ9u)CK^s<<*m [T " F 8N*R!߄~HbZeN AN`vr 2DD#B3r_&d}90ǩ}n% Lݨ0[*'Jc$IjLb1>dE;Ɇܑ0 !z?`1T2&FpMxu]"TίM>K Q@LJZG73kAH5pnêHzlBZhZKQWOxKeCFmb!r$؜YCF7B Dl3"" dFauݹ *);lJQ"`ǬHh%z.zX< P)5\VMn mP}NI~ůw7%V''͑hKR*u2p_u3w h}^A4A:/8ibZlјD$0n'e /q]O=ynw82J2HAyNe[Z6TD_/vS=}_? = reBE*w<,Q^TaP_XY"x7X?{[]WCE'2HHh^6@aM 2ی(w9Dz 5[9"#U;F],b<2d3vjsK7dRf)O{>Yy׊"!wS 07n%09JFi+IJQma9=h3͝Κ5K%s䞫ڈ{J-9 u'ip,%S.\I2;Gi7)"oHGZ8IrNàTĊ2LF^,A(UW@[=xBZTs@mlԫH]~ݠi4·7 IΫ?S^uirJ רmftЈ4i4ӊ߃$2 %*#qkv}Q(fLy(uÎYJ'P8֡}i V0|l^,{F>I#WQo* 'IMIc,&"ȨLJtgܟYwDVHuN_XNN~&0LbdD?Wnݛ@/pkauVHY. 1Bxn̽VD/xuDQ/TKś*bC7֤GٍQ&SO߼d{D6l*V w-G vm4T2962X0D6bkew{<Qw}u3J'p:.&pJ6"&F3rt-WV Ddq@s*DγN9UeZ^h_' ?p yzi|B *uBjL#0ԔrȨ= ʋT5/L, B s<Ȉ"n*:RU7C5ih-%vuNb8dӨs3L\[qnaFFQ(~ہ7l<;,!% t7!kTyOjǒ>OEfrH>`>8XxK"Aل-D`Rֳ4TDpRR4%(R Qho~<1z"4>@u9bʔRI7L506/ǎ;[==fQ"IIѝ\qS_Z1%ĉ$͇n;J6׆^O"0Ąbd\#@ks(PNEndYPSc7W"טiАHP9L x G)Rmĩt$nZ7񤽿5KHlA*N͕̻y͚r|$t kUB`AP't7gH{iE  ۙUĬ"#Yx2Nk6,CMNEHkTJQ뤎92`ws j81ҀÔL44N%EW[^qbt&Y?(8ACiAi"Db|kA[cA.mWFe&imFb$T̟Ҋb X BM MJFz6v:2C:0 %fT>aCDh7!4d~ذ\r:!1hOI~ޅ!%FEHL鄒=͠V`+ ϛ=&!H/i[jGJ/gw_ZV/섏.88e3K!^|IL:A[%! F/ 6S^Fpgd}G(%H+eftD5#4r^_1Ww١ .59ɟHdQBXJ>f"C1A6G#.P&D %\Bf5bk?^r6BKI/pDSL='ݙ2u%YWfd>@p- x*2I^$]wXz6=JzĀI3$ڢM8 k.B*-EP6Yأ6Ɣ2 F]}}/ fOxXhص,ǁ{ınzfM>f~0& /Q/Xݠάcup?)㕫^ $ILJG6;/sゖz$nB>DSFb2UU;a9kL;bRQiTeAhZ ʇ)BC;}_+f<Ƿ#3b 8(?(HGD?!}FU$^-jԡLlf~ J'P#ƭ߁H+K8 q f)A'5S\7ܝ#8i8};ޭg{Le+I->JٚJ04VjD.)8tH@3II&QFTj aʀOZDBr'3xMm{8JT4:ݾ [h*v)Vh_CugeD9y{_1SPC?'.KɻVm4%wb7l'|i*[EyB.& ݊ e"H:;ZG|G`w8&1!Cе \?%x).&5{]פP@3iMEU1]Yn̛:I)'gw%4hGcN=ҕNt!pdnfG7(QhQx5-9\GksԵM FZ_1wјMnHOD5Q { ?:x e+l98zQ#-P+Xa6R hisiJ)kVڡ&; \]^<Կ'0~%.UM&Y6KkF؄AjJ)Ei!/x"uiynhSp$Ɲ!YMQe](,pp[eP)T y()(I!_q}9d?9ɼiti[kb5q1_& <I 䳨w<L=RdAڠ Tin:q9F`oD~-u2Zp:Y R*Y|m+&2:'7]jȤ=%8T6V/}^3]:>}C>P_=%vH['U 1Q 5uD`5Lb>IoFLyU-~zʫh`Qŭ3iFց2L׍"Ih*vilaJ?\n9.~MK'J:|Y/$5NA4Z&WmR\VOS F@`F^6awiy ܚ#ĽѼnirnQw7J _7*? ښB'FMuaOwG7t~2p팷tJ'p.WIqT 7j$mLdҷ+v]h,zgid'~oZ'n_Mޮ!+|El(N*yiH;f)wye+ 7 up+?e~X-B1fEO_t YXLbp#b ΥUaOJz'ije▭$ub9ERdI{VyBADDzགྷOܚ6iǃ J74K6'J/0L~&Qcg20{ 6[RBϤC54^K*\^Cs&1T((Pb-kކ2]Ľt-/k@dbo3)[4祜Q1m5 ʘ{/˵QN4Ց e^N YOnhXPU[yUF_,i.6jz6nZ:wFAht 6J2j,} "B}fˋdOx+o5Lb+('=/Ll{}8#irJL$e}բY'/jDT2:ݳ4QC$RyqYsy/{%f=HֆhBF14Gܟ4cH 4G%/cMPF:zwOkR]H9R>*TEF[()E@)-Yg) y6dR1K+y)Ę\ۤR $.9*}J,q_ EBE)j e0Epdda#&s^'gzB7 IT2MꤜQ5 H&0R7%{$*˟#W:Y\+7BIL*(e 5R\d6u/I uZ8Q0&={ lK&]Hrb@Y^māЊ؎#òB_ I%4C[TB8fyBݐI!/j֙0a2H|yF4%5^̝paX3{P9-h6]E9X+BU-6dv@"s nҸT ]~`-I?cO_H9Fk!eXiUIyΫ M9=2I&&y J,gJ g^g1 j;TsNμ\cN[6z犤 s:hk2Iug5/O2{j[ Vns5/c}Xok;\*<' Ҫ ޺cUvK &&Y q*ʋwM72R4qyDu#׶IPzDoej r]o$CղLq Pz 6LR?B.~+u"/ʛ81M.=JvٺRSOo:ޙՈa'NYGc&1cؠLՉ,+@ (+7?]ﲈ\;fT1\頵a:@!?:|mSaC.<4R6ǿ%{$WήI"r]>mF?{M{!$'{3yuVK?e&1Ň,!JAmC&W҈O zi445f^ӔMK`+/Zh^IlE<IɁ>x{_4QEԃ(R(mմsaز"/ng|Y]7NɭN'XиR,Ԡ˦c^n`O j7ۃqLspnfQp(1P #P9P9WhW-CN`<Ib5^| 4L)r金w=/ٵ AVy>} QhJ$i0" a*'Lq3WPA;+\6JV7#1~Z,G1%U~M6t9YB4C[Luʐ>dnuή)^ȋg|BQ ^L^Qnq&1al& $go2lS!ǚ;ZI-.l!:I, wwq)ܧBpj4sd}_F`Nir2֨10)'ͤ@[ zbH{ a;]6d&.f0i[}Q `.ƙgFv_M^#O:(3.<5 nwkfID+ hĺ0vh?͆q>9)@賆!ثΩ|38Ӭ= ћ!˃~Ĵ%Š{Z ]iHɑMn|8FfZ^S8*|M%Y8Őӝ3"Q'WGCcnL` NX )z.K3feT7F2ꑼҡ:$Pfb$9MH@ߴ%b&i&sT:BR;MaN P"s RI$JHNʽd;K>x۝-efZ=,?e~СD{T29{zi66OL /gڱHf `:$f(3/&7n5s={wj,2oCTM禜osڢ2qxUs( RcJFVG}tRs%AqIrc L0}ὯY~rn*d$gp좺9m?LD#a*bR#XF*lQ* ff(l@Z^~G2 Qi4JQyy0)K#!.8EeђdCN;F>d>|| eS6WΎ՞Ǻ1}D F4xG2=׉{MRK\vB&K_pp@ $~ LAgFE`Xn?V vX+ `\^ UNpk@2= ÐST^QYC[ U D֐=gLwLTa.0faO!yӿgQ쩻Inj;`E2ENR]u& <9肳ad bf%t1{QF:6yi9AI2ASzQy;B1.=N/~#d{$uījDh5}V#ENs%kg_{6:\I_QWT3(XHt-7W,FBĴ121U|#m<*AnnCr=q)8:Ȼϐ[_ V[(l҈yXOiFú9<n J]eB]1 /jMNŠGe'a9zw! /:Gu^&1L@`rlqg͟4*)@ymȩڠhKZyHV]=`yU*T\N{>8Ty$+SP;)4J*<}a$f5SE~qV6G̟ωT<4HDTLȄ ːS8=2ma6Lmڐ['0BDڢ1y%tUd.{Ȭ8;0:6KA/LjǸܣM )لI^ⴓb S8gB'b+AwRq &`M¹}"د)98G[gjr c\k!aQT&ύ'D||Ͱ7}Ο) F$ff4Sljǽ l7p:sd3b٥:S9zt\ 4x?eyX8`MJ:%YS=fOA-<" E`LO5-dM@to)dM:v3?<<?Og*9:;u^S9F$1rԀ{.CF9MF2# F|T%*]ZcQDeGyxM[[ٸ[; by]yB 6ձjl%K z^#*9nc1{g0}N>&KNuC~3,IaO^/ǎ+brIJN>?. kkCXw]=xB=21)ݏU^2,/۴: N}ڐ)0R(|p&u,JlCWgcoxy A#ܑw{^#u !rې˨wz݇]Uu T Zw5"yчAEmn5.CIs.l'6CRLb5(r oۉW:w hF\ ]wV*LB,y+_>uOr)ō8zpgP0cggZoZF2ĩ='Q 9L\jdfc2d$_*n>6t>r L1W^`aJDD7"N=Qr,Ceofqyn @}24;Aɋ$K+b mݔ?'#H"2t͉*3lؔ{F|@+ȞK=>6[6?$YR(n6N5psTQDe "HIX#럭s{%#IK1єk2p?"sp7]gIҬG\brk9>=@=by&=VcŘJ?P0m7b@sjvB0~ԻO9>!J?%/ƶODNy}Nk=F6Hׇf#}z57wDp7e52߻5t'Mj\Zq%`O?~P/Gr)AmX\}[I͉Enirt97f 1pݳ}O۲@ة]ISx˯ -}F?EOR&#tPL~*X]Srآ|A ڃgkgum;f7=S9 $1EoU30z6r(CZ0{{M@@slpd TR `͟B }1&N V5m _H Y™7lf(Q {C)#'IQR6{ R9C!_{6i9z'͐wdՠ|6+ǡ%@z[ /e@'q˥@ĘQ56,5mԻK5oJ A7-t"лL"ped͒Y%k05/X={vK'Ⱦ00%Hڼ`[!5A: 8FbJޓ 5ĴA*$.:mj CvJLIk+S2_Bk eILR`] ).CC1LG*SktZ:Ɉ*&KyRטě̟w6E{!릮mn3hGD8xOXd^nGzA$Eb*)v.5tC-PE=y; yӻt1kQZD*TJA'^Lohd\EĴXfڡFl=}sM|AӞJ#D̯)N3S1"]0vAtw)ȍk6N^)y/ Ow6)42R`Xk7,.;!%? RݔLF{ 1\nњN/&}fT?x3RY(qig)Uh >۟=/g:J9Ȱu9B܇IK:2uh i~°'d_kBŻP2JjP?_f3{?[) CFMaC.MQ)1y(o废h< kvӠ?rYR?KȒgI~A^cf L[X׶rƕ=^ L[_!M MgkI)'YT̴P\cPJdʐChtۆdJIwF5K޲;3xEa)n2DM3<g>(Ued,2A1 MKa2%&n{t74$? dO )!ptB/<|x^1GĿֿ+alϣ}G6(}ɜ|LQtx&҉w89Q,IL Kf sp0wVFom఻fDW-u,ǎuY>yiђLܝ n U:Ol"0S ڽiioYG:3aR.#z#:BL Zd^s{g4#kej;ױ'Y_t7M{I&=tVȫqhg. sjO~] K"G,tU9 ˬ~?9sYXo2Hw= HJ4qǬX'>L^'1tZ\zk/_T!ڡ!1i8یtQ`ތ_<}+dSSy)3Md-Z>ꯥu@s@-3iZug7| iB$,2@d wx `ܯD_a Z:C O>&4߲s#y~2<m>Lh)˶lPޖ{1JJB2p BoL+b3!`2pV޺܃ bT iD".n'Z5+;~ |)MA=8< $SXRQ ~bdsdݗz x5x;|ɎĨsy"pHQE,kti9d!޻|PF @/~V:n%ްLb2yi];>#/>6jGC-"ͭck 4o^)bAQ Pytq}y1 \s#Ek!'I> U.thY_~A#F=Mse]ꢪ0pMcVL0ofd6~*u%m2ʩdѫ䳺#;釰0jovo^>pʾ3=`5d`};Z~X=v7-//r=`*[E7vxsb#11`@hEO?v#qQLYVޜ=ח kWf@#5KgKs6$Bб(/u8nhT^ˇu*"u!| c$7)4BEf .#-?/ \3>=|O3Ip8E]5KbkPvӽeVrSݚA큽@#MGTr< uA佷Ą|a fJ'ًh.<,>njT N,GɯCIL+}nI.yBeQ&5J=Rbb2FjRIZI荹5O OY)6s^mñmqkoz'j=oλ7.xj+ZP3f UD^U'i@}m`sxuarrYsR\SzM1BpTKAwd^N-^JdN7$cHG *҉׌]E j[]X0b Ŷ;f HOZ~Nܸ5Rs.Zb*Olpsjx`knqH%JSQLxl dxStcڞ>hZy% ^2_/]j#* ( q F4 b^x7D@BOi/ڻ/9s>2; |GKJm2,$9to-hژZWsO,RR=_70lb]$z,wA-&YJ]jbA JL@yܨ}N[p)~֍ʓ9ɭHV'U}nzb/uH ѽ<&fe?}":X~+]x?T9y҅LhmC!^ 6ՕUU~ub4ˮe))ɦ(Ha#_Ռfg5H[ܺ Ӗ\-.v-GT֣4X5:72o]"!/Ĵ;~2\5J> W{K\sV 45ΚGu/ͷ,:d=0Run$ƐƂy]`mV<al^sp쎣:^Moȃ5ץ"@0(eVX)*'{~d)o#j+!?A!ayr(֔A6Hi6NAه|Lhư5vǯQZLc"!ARy $D!G"^DfIIa6d;8XFRktR2K]w Q2RHbh2(mH}Ԍ16eMUE漣I%щd#){5R&j@x;tc_[[9C<255\%sT֨=G5h:z'ĥ_taMJ#аknňf$5lֽ#uC2|6/y"^8l Y/fasb}x:he!ǻgcEd "+,Lbb&0Q .8I63³}Pz嚊PO<":u-M&9ϗ +c4D׬\K]2pD׹+^I6[eg!GUb&#SXSx7zXQ} Nۻ |h^ANTZzp݀[:ܣ͸B7a'Gtd}^;|[֋)2" xHk>XcG/vS>bpmtwB`U'$9hPB`,6+RqM} OX큋EcnYGwqtUiϑ!G3N{%8=X߻,,~N,&w9+ȡ"{Gʴ=.ĽҴ~pgd3\1Nc[5aF㴓Ƚ|i7,,, ҠipQja6?B"3xfUK5g$ $K.U"(A8[;/wx^oZ/u$L|lJ"Nn>QFc4H [M)#5]B;^NoX*gEq Lwk2RY>D+K-9O1b~ۼو1D"G69$If>.Csy9uj 'jo]ѼZ0xDL 4J#1)a#Z$<}WSQ2,kG(HE2{'anڼ|/QDc1ryːb2 v"~\;wژ][5{ot+3Jv"! ~H~xVM[*bJ~i}F^uOGmjmZcGI_9tȹ^0HJH[7bWhJ* ^/nX!&k'CDdB5zLy{= | L3>)ؾs&"#V꼥C}Q-쓽f< MsPsݓM?< l-*yo!N\] 2LGh$wjLb$>ۻ 4Kﲁtψ́^'D;䤑Zd%kW: Z d>Zw8Q*v^l`E@b`jVLᦕ ,#!+ =~8!kCj+8/J@prIFV:EchF~հ~0#*"d>*SvYN|89֯>O;ɹ y5&s&Bt"]w̚kt p l$ -ۖ|Z5$daPB*#}fOmؖtO3}4F79xOlu\r91ӿ atO۰DR.ÈqX"ŀ:c "&jFZ p:P) !u4wHcLE)KP n L6R]{lCɴNl%C/L֪ 1&ș0i}c[jWn"y?6G9{B>mRanUKZUdͥ:T@-5 dm^es(n ~BUb|gMG+o4PSY~ٮקE$EXsL9ḂnWmD/0_zjZ/ISnS|ȼ|΍%W@ێFF-!z[{M8#>wMa lu@IbcɌhO8# 1TIr&jR"yyO :~hD'<x΀T-5mXئ WkJ'ԘK|421.ekZ3IL(O˅zƭF&`!Sഇ-hɴr;v['=v(0=lEe:y5A$xh]μ4HǔsmZ{NcEOSI8tO d",1¼z"2Il޸nF%׵[߫ PĠZ>gE( Rh'_n?׹;TuhZg>P\ՕD>M7 42e0aÝؘkBg^]rq%S) b#u- qG/yFSK""GM|iDkt]9pk\楌L yxQt; ~ZL>1wϘsu`.  I~(]Y ^8e}EN*Md߁ *不3r-_T Uڡt$)830;Ձ^`VBmRt@EEeݒyֻ"6RTg'FIĨ|I$YI9 8S曪b* aFD>m3$_<N,bǵMkB[*{ǤZg;LgDhcVM <_ڶʇ2Vfs/'":VV='}~̺!#׳PRWێh;ӽL 4 :$4?l`5_V0@?d|US&0^@wQkF[%<9CmlIĐڴm$u'Kp6#^% hөJ%Iݝ~_]gE sݔ=HVr3#3&ф3 m bp($ʐJ.8k`nZx|ĒzSb ;HMrl5 !ް$HzJj\R6*o{"tjUDA _aA>j@62 )YaKIQx"!3toIw;x* HJ^^3>'37{ՁN' *rn+P@,Y1WZQZ*.ԭ F VASTuR00Ԡn9}?wd71t >,>␠tR:̄U/Ԉ<چR><@`0Sf#'̝wЬF\i~ʫ6ʰOl/D,DT^+HJ"߹KُRC 73E`o3T}>i] { SjkMA&wI'{)bpgLUJ儁Y~\Y)W^^!0/ɿd +˨m\L C© MURDFkx:Nʟlk-HIʘ΄SfO Οp#f1FVCؑzɟ}?-J!Yfږ*||,h)gj*m7l0j|5'~9rؼ`Sq&Kx&@ ^p -$c F>I7QdJՀ@Hv?!lD!Dl`݌"-39 >#HeA{A=Uqx&BY߮,fX&O$J: # rr@`u?!*g<0[E#!"IbKi9{͡|׭Fh%^ ֽHDnfEhי83Ip<Γ!/"kVo@&4(TKC&^1*M!jXԕSݶ{Tv!2RM! .]4η8_,Oi 1bakDQ^ eX 1;Y)cS ^3Jom9\@TRzq@^3&fJGNoFyʧNFm4ԟŖ&aUxMqBH2UZ2&2kbiUBth(iGZR 3b`:L?:Kwf#XR@gEP3Gj6aA643jWi#gH״ 0im" 4ǴqH5NzW 2`ʋC6 +uPH^#2+R.Ð}Z #%Vlx=lAS>mux/#9& 3 tgEG=KDBHj2naH@}"MVjŘtB!U˙ĄHWjVK+oRC]<5K@~~Ow=i { Wz` plߛEQ*N?t-Oy>#{~^E# TWDǿV #kE=zdYd(L7Zg r*b5!uC7CVW8_2"3ƎDnm<"~mEbh6%Ł^QʨI83 \_rq9!7{ Ơ1^e:>dN1!V^];Kbz#cjeIoZggM(tōf)^6IX.:gI>56ܨ (Ѐx"Ҵ SkKJ90͋'f稔y X)}ſgFeR@t:lXA=;v[j*#KZaS7/^3ݢ|'uFSnps0&IDATں }CeAuy?tgn􃦏WӚ@;~2D=7ψʞF-bFW5UV#cpGo{ryx_xqxO׺eR#: [ul&rK8[ ak/bmE*2A}}!o4Z F؄\_ÆHߤ"j@DNx#{[y( u}Mv9P~N %'Wn~t9rVNZq W49q5hHscܲ&YS2 > Ll x=@__inܼ%,`PJd֍~p *?-=L$k=v5x"#k}޲lZܰN#:Je[ԑJ4+@kʴ=. \U<;[i°uHgg 4$1/ǎ#0 ذyNZ`S6丸\&ռl6}}o.5mքu؈F u'!2n;ZhVOՊ,4SeÏ34+.'D:F$F" xf/tKbX62*_Aב4'O>S| pRn8G1w:/e3?8Q4p0<&KAJCuopjgsas'!Y{8&.R6K@-nr?27j⠜1$aȀڋG}=Ƕz&1œ/E; cbjw WS unL+Q,^7~W9n0{hx3xyC : v$D ը@<^SW\hEH3'JSjft=[ӆgcMZMUkrnX5q &Rp+w$zֵ L/Ǟ\5{g+QD gM- ֬8 ] i@zdF17Wu+N)ޮ9tN Fr A;,c$$$=)s-~xG{> }ʀ>G̙_lk H0۵ltʠ:SkĘѡhv+ bo,R@ɳvƧU'RT$D댹Ws׌0<,TJK[mݼ`NF.DR@K}4blJ(:Իgq5#3i$:%qe6F/.B1BBE˷2%لQŶ '%M^r9;O2,o UqcȥK` e&B90ɀQVy;3%O(!r`‰娳rUtH 1L*)[e 6e>\,ݍ!(bTR6#(fYW[*N{wQ`~wrpR]Tpû*կ 'gBx 4]&N띇Zzv\&{@~4Yg2 BsZ6gHX(;|a/Fm#iN#f2y9AQkas8WlEmHě ExdѬS/#B8vf<)|6?i]Z1zlXFK)u`_ [Fͤ ZTKS ᭹+Գ%rvBk!~A#$osDA"$ hb{ e-RFCM]o%ɩE0+4|I21Ɉu^)E'7?{C2Zc}߷tZܴ(0俏@ VfL_ PP vϦ9^)ls=f p[t;c챻 {m{kry?Zge.IwNU[ |, dW.h+31)$D# iM}{t=ƭr:&gdpuNspM\Oc~زߚRwe_ jsjӑ7q5jHjlNDh'Ն:Mwwn۟vNl mcVg9vZ mEbޝbe 嶲W/;ϻw #fħ1N^Av:;E5MzQJw'!\C>1a|D#37tįmtQB*aS/DY42g{`.Ui(g8b= i58%2``1)Qg7\]ݲN{;@꣎,$u}Л'tn i_>Kr@-G'{E;cpbˠ8aUC{[|{9E•;7'2ӜXQ:v8+:JGE-!ēW6e)mrK6`c PSɳ rnQz;~G\S!/b&{FLn6/uB_6=ՠf z|/DaK3Xz.e x aǎ?-17F,ðԺYkM* 픥3urR{i:PKy7$}&Z%mKN[]{@y}m,06~U:u~rr_3=p14XquW_|bޥP; Vk sRAo^,7n e t-Vv&rO%݋g۵ \;n0si@#ɼ=п|8#d: Z-$F)oW>X 6FaΉb;mdQiZy2bn@a#xyyZ_}x{8_*{g͋d%39iL>G 'L\ۜϞ'~Fܔ݀kĢ{G -ۚW:E=Tshr{8 ;CO\蠯M`$LAx Ng׭Qmjqi׸$)`%qnߙq`sU$1kt0=>/Sgr yYS1< +Ϣ=8_]Nq7?w!KsqB\]gOňa_sʺ' ɉW"pTθx]:/ۚNn^C֙zg?sh=<ȺzУnPr5l2D`zdI/y"<yyAB_{θ'a<ØJla41( 险5 !%3 13\Cc58-z%wJ(+EFcL+29hjE&( óPGR'0N׎ߣLDFZ_'L{fzJ#^!G{l&{h-~̯?iyE}>2ޢ"nS-L6 0i54sr12|~i[`D ^O5 9qS4 LȪ w!'N7rFs~l^V֧A߉xջɏV75E1_:d`W"~{׏0e4& NfG9qHl} u# n$7: dz۸YaPs)sÆmI)qMϤVKug@.N;oT$ Ms1~(>T=MGhA3ܑȶ3QyDQ{TꭩpYWd͚q$3?FMԳI:<ٖdI%16PRIHpLxu%=Af8h'0o!nQǠ閺|G׽Cq&$'%!5\GQ\l o離ZU#>t_E'\k_1n]? >F ;ZZQTZ{<נNlJ{OJIXa0ek?Nކ.Ij:~Rt?)IHTrAA'%Oɢ~>20"]Ȍt=O$ж=x/4UNè EE>C咏G↞Z3!gW{i'lqj Ճy ܰ:&IB/IK'S4-CzH5 +Ukgq1Kby 3A(/Sdt({ZK>FTkr Vߐ\._~&LP<aDa3U(~iϭ8&M=7I`IIMUJd? |m[mrp3ur1ҙDx7vRϣ@@eMEGew ?DZ Z I!1Б6J~ē] ݭ@jy~U-:{cvuo^~fs#ZeH<|Qvo9جn gFu򛦺8eufGjp\>yA!AP] %`)^Z}n{Q}jRF楌!1:%ĝ[%4^wL p{3I~Pka }a=_=ϠS׃5 HK殕bj)3uCj/}xh1Lb8ZF%`dKE5`d,ǀ KOq,Ѝd#)\+HaC;Q!n kr``7L8rpeV(K(h@Y{pgLw4%Vr|ʐϷI W7 .f x)g^lIv/~bi xs5< *odsaI}3}?s Xvݏ38 |lPH DBbԝD6$++E&bl,pQ|?'GaX^ܕ:n7>W"QtL\ "X_GcGɆ%_uaV]9Y7P3Qd` J/uC"0?>"!3DkT@wFM` R4"coY8QF}*bT: cOo6 O "9 $**=Mr#B9ݰl\S k>!6ׄ3!׽ |`kFl@+e85F7N2C&*ˁ-[1q=/Nǟ_S^3ƥ|~yA)s E+X_ݚ;`~("syaͷ} !Dݐ!Ff;-Њ^Spq^2D?Do |)!ɰ\^C-%Gڮ#XTd^9jjRsDe'g/'ܥR]_ $Q$)L1]{G}Rũ\ʳ1s%)\By۬> P0Oae`/Eز.TWpg*E vk뿽{ظX; |m-ąEY^Eƚ%?θMRBQ=?9|2gr𪴎iL1Ap&%!w-]"$:>M1L4&k:oL1Dem͒H4 ۽De\@pk 9)KicK"/BP:vƭzW@Ȑ&C%r[($ ۇ tx1vpiTBo2W Yt@n*0u 0g*%4er̻Qr@$ BmW]B>f0t(bU맀w.:Z$4Wš;5H jVCi 1hJ 1aD 9jP/E=Qy))Q9ȁ5dⅽ~ Cļ2a4!'Yuje=%#!`~Nsyg#8 ! %1*"(\ *!uS=@O*s73M<u yXi=GH2$7}B.9m/UAϚAE@/͹&o~Ò8.7 cB8YFrWV̞zK-XY&ߋ J I+i4 |? Y_z@/,3(ˤ>慐)y ~[mץ,i&+H+ѕ`ٝqaI;N-ZLV7`~-1yD$&Q>ӿ迋# tGfJ=}YGC"*mĭr!7Y6;Eb_ {%|?}H^Օ3g5f$fg7r$mec}y/ۃM& ž 2M!DW6 z|8kup6&ʖ{Ϝ~xV{(L'/6xٵ lKuI @ yһ`27֙ψ DyL!CXscJ'P(VaQ4T5D1^͉M>G!EҰˮNk;W)B C3.־ĕ f/ ΟbSOꇫsAO9d\l$gNAݙ>h!.xa NG6iޅ0G(,ObB0he\(ۺF!h lE)F>cGaAq&),j8i 4Ah @^ ǰHܷΜ~8CRZܶBA?wH`o&yAYVQ~PgG$#Y)X6Goۥ%4im"w/]4+G'Џl2s[֑6l8Ƈ&zGvk(F+[HfɜNᝧaș %$9d%.#,HZM9?)+ B3@rWbb>-ҧ.=$ˁ?2{Nnj!"upJe#eZEr`c/3wګsDJW4^\qbms~}{`y'c`qG{hp=5"~~ | 9\rPl+eV0Ty~m:G[q0߻3BR GWޏ~x7~4LK,񼸯}}nAطlahj$7̵%׀"u lɨfekmeJ3VoOo quز8zg)W9cwY@=~=zE^,p?`,R'pV1Y˅cVb4` CUQ(+"Fqaz7/I/rд|22a_k|;/k*qX>PO0qɈ*8L8a>{WqUK?\# E9"p7]o}o--\.BI' fJM,x\JA-L-|㴑Po#METb#l+(L+{W瀷ʈoX{djHuꞶ!IẻfP' ն_/.kbPIzI9߇(h0S$&gu}85d#=1)q%ƏzO,8@38 MKe(-*CQnaLn)[-NJR\kA?t-W9[醛H IQ[p5pݏR7d͑ϪAm$EBu [i͒\lMvxKn@DmN3Y3^ y)QRCbKFqײ(k*s_80,l;fKN8e7*MlCl3r9FܘFH +#d}J򲉗7g?yٜ+7܏x<9MNrm,#?6E#4y{w2Uz5ΈYLJY_\x=y)z;hO<n|N_psww>GF\ʰZO Y:Y!Q#|ز>dv{T) ,Ă$y@8{.5.Lb@2g`+XT:EƊaE)m͒⹚0* I%kȌPBpn>zS%#qoj*{.Qxx=+RPܿHYS݈Gi%UFtC, bXQ$cvإ놃>x#ْ4rﱓRj,Rc^HAD*{w&HK#j= H)1k̉CUQs=/M \%6R֔IC3QidI&jQjeq'Ϥ:1 F׉/ǎK:GrV@R|q)4HR-UbgL'ͧa;y{Ť辛dr:I|дK.')Is%eBKDRȴ1&1 ַ7/wnLkT(M6$nAXވ1 M#2P#lT*;)q(0N E֚%9lȿ6`4 :nkFt86AIC:1oiF&Ĉ? r3GY`S_j_r>6?c= mlE=jR~xVEG!r~lr'59)3p '5ΥXF5ZG^N3D]zt\$`N^`߯L'׾q,T^&cx7g`114aFg :*'uP!OGGmxRhsqOw|C(F$ 9y_\ , 2i#Quc58"_ϵWD:{H9i_ׯ'|>qX; rdťm11h񠠦`=v~Jb fS`bͤ{#Pb Eu8$.J,6<1Z*h} gtَ,y^3Ҷjtck2G#ļ( r3tq)B>|L2iH#O"M%CZHPc3Ӧu "Q"iHj}9̘+8(4{T`iXV,F#CD܏pOQdu0tÍFHWSB΁\eV2HC4IL!)QbŸE O!l%KCI.r42}Ti0ܦ>FӞUmj1I BDDb^Ɲ@Qbi%JAFV[)daIKX!K6OZ42&11}ӜiCHsHKZ1Sp'z&I'Ɯ>OCd(8ILLZBa4D8YiA% [AFV̼aGh MbVQ"mua׆h$3I BUFeJdNa/[|QZX ,id\+Jz>,p 7J`8'n20Nex-!԰eč2&$ S%}5V,$n,H} 7uj+V9ILjj2I/Rc<%va)O'gC.Z8Ս%a] $p ,xǻb;VidLbRPC0\&d&hR`I&0$EMwVfQÝ{V7PidLbRi%%j:]&ɽL` C%YFh L. {Ia4N#c&[!T2IM#B)$5jrQ=ruبA.wL`P7i6F#@!"& &(f,2i?Y76,H&1XFoLҊԆ P IQ`IeCnU{?nԹőG^~t_h$Fb 6S!T?2I*RSx%+1h7'3rоȒ}]s~3>Hp 7vnonoZ%1aؠz}Iiɞe[%7˯KR[A7c/q ڝ/dgTݤ@sF a B.SD2سhd(Z+Չ uc^ҍg ;ֿZ^kÒ:ؘI #BIslgᕈiGE%%,ry^@b`s6,)H&1l^BJܤ]ƞeFP%y96MPoU7;_ 7vn1ܮunu@RLbGj^쒘]ƞeFXJ㶸9 Er_l1QX^ay<&+-g+qvn1LFp[idLb!5`{v/:3 qF؆^K'GمKB[Yz?dAo7#p>[ G\4*o?n`cN#c0qǃr2queF1FrQTg^\A*nQa\sy?Lb0Q _Y`3)`q49o*s0hE<q9@idX{ dO P8~xBo 7Q ScvgkR3ava[]9cI3' ruAXR=n (+HG CwKrq(vVqho8v?e/VjDkO3ig5wSBY2)sa? Bb:j:::iÖAյ \u8Xn{++@q#:CLD`T*F1~0h']3]3?zuw=,ˀ'{z[oį9^'=\74-;7/uFZV^qRo7/w K?ؿ;5@/peX0q08b'(^q `4LCu Q^ iUu21hлM6sDK#QXy0 #0}Quh%u28,?&ȉW(t<6dFڿ(pBSR!EY**ӛ anC-|mߩ*\!C!]%{ [৴m+MJH;j;zmm`R,$~ G2v'0+\#5xP)L`!t1"rBr9ѪyL`>\W92BjuJ-k Ô~* Ql`]8hP9ibfq#Ff"Ւb1$/sU @?C#8>dd<\(,Ft:kټ.ĀW#h,|r(bb!yw12"quюPFlƤpԅ6>{iau<˹U:#0Oltj*lŀZ~EϾ&`^H|̉Y}L4R^hFڐ~.}?swFBqƬWn"d~<+rj5Ĵ³wŁ^uF Cz>))}!0p H&ψt7k+!0?ywFt6#p$}0"ήe1QdJ'ppTN FVH2pJ'7^?QF1 %crT။I1.?VF:e?c߾>ƭdV1i3Ltuu_ B~ãVyMݲs#1J4`a]U'.8;Ow_[rs}#6wdp~F2Q&1 cPs[^pi`݋{3=t/1 U2PX›; ܚl1RG:kvײ}jzA6)FȬY>4+d .H;z@bvI50x,ik4)o{]gYNmabga<0OtY FX(H3D9MJB`%DH-25k Fa|3@b8lu NTcDQEA)AX^ mm+=``0DV#2LZ H(t2`0 *. `0 F$`0 `0a0 `0 &1 `0 $`0 `0a0 `0 &1 `0 $`0 `0a0 `0 &1 `0 $`0 `0a0 `0 &1 `0 $`0 `0a0 `0 &1 `0 $`0 `$`0 `01Z Pg`0 `0mZd `0 #U``0 `0RW6 ~< `0 #Fݒ*?3`0 #Nt2`0 *0a0 `0Lb `0 I `0 `0a0 `0Lb `0 I `0 `0a0 `0Lb `0 I `0 `0a0 `0+0 + $_w# yydUeHQ{Mܡ&l|"=+e 9or \Nw5C˽ЂuZK^Χ?o[N^|gf UF'ntm4Z9sT*+934w`0N`_.L7jp$ޢFu6:]^#8˸ϝ$oAn~o{rnzmְ~ 2@qxne4w`0dLbIb a.$f/{  >z/ѩ"MОSas ynQuss!o9X+a.5=ךi( ~;Nf0Lb F\ߍQ9^fSI qcދpNVaÝW~J^ˠG253L\a8q7``0RHbg\8q)E9[\$L0>.^?(H5ʶ6Yϔ\<:'kΐju>I(fw`051 -?6du6Ǫ_&D[aBh>s͇,3 ccfPqD7t<#yt/yns=0#wjl2kqkuX[#.Q4N I 0ӜEew-m`x!u g8\Zw#WMZB+&;o sɵ{e~G<[%nΓ#<XH4`0̂0cv.?͎IIrΉ Z_.?ދvLJwGoeܟGugxzCZQ4o>pÅ cV1qG^,`c%yoIVyB9,ddȳ)_])\^ פ^Zu %Tn6:jYѮYJj4I`0̂#1 F'0i"?`0IGb69U< ^f& `0.g0 j%04`<`a0 jʝ.ê]Qb0 `0Z讀PLca0 &1 `$  }51 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `0a0 `0 &1 `0 I `0 `$Lu䵯hdT0 `0Jb&G i6%~4 `0`0 ζhopI JrRe0g[~wŤ%qs0 `P#{J50a0X29&7@yw<` X``07pĬw\b`0V;2`0 ``0 `0Lb `0 I `0 H`0 hd!=%>eq&1 `0 F([+dHRI `0 Nd ZkbI<٫ި$5C&1 `0 #TCDa뱾r-}޵_8{B >3a0 `0҅ %0]ZO݆f5r Lcze$:3`)J/d `0 FB`v`,fkE`(6`x⑛qqtj$`0 `0d?06Ջe?' 2P{DLI `0 0Uȏ.pTי!M!d!@ےe+R vN BnĐ!cؚMY 37 n0#E$TL#,y,ɯ n0v#h=iݾtn߾?[OU->99xN? >Dԋ8I(b!B!Z)QjTxvwe#[*B!B^DƳx} *z!xFC!BI &edJB!B.,Y1So!B!8 B!B0/Ot%B!Bksgr|+?"B!.~KztwH&`)b!B!zmhδLAxzUldO,¢O`D F˰, r!=|:?dz oCʹx8]:ԏVB!dzĉu2|1h/G \gD"ƋBqr,)LR!gL%xNt p/?E'Y"2{!,!BN ^DvG JO[ǦWdӀR)Ue) P0T7N,<1fZA{؂!  aS#=rB! ~u1߆eb*/V`2-3E ~L1 C˱<(L Fl zpliv??Mͻ lF!gb,<Bx`;ΕX: DVe\9EqDfeIqOb%*B!X ٗ г{{⭾FuH P:,+pm326/ٮ15@s'Kt1=g=@ !0 K~݊րKt$N8<CzrۂDaxY}]mۊI}%*j<5DO, _=Uxy2 t:&B1bY<ba(@_Fl vu@Q:Q%DT6'^+'b@o",泃8"*Pqũ D*&!`Ćwd#{H͖bsxKU3/t羆NfD3#u3밮<:a]y3jB!Ħbzl)؊GzgcjpZXm{rB!(bLq*ђXD3F3Lf4E E\L\s rl6d#{b}R2, =tWfс^N7ЗIpL230B!"4!,L1:ss8nC[y7 F`D F˱<$)O̞ "PMt]3,}!B9~̲L}=Cv+NoL4dkwt[ U}cȽ>dk0yB!A8뙁'pH9&q@9st9p+>֩gN" Ыd])9=!B2awHπ%T$[ov 4 ߌ"t"R3ۯƙ9MU)?3"ŋ;yZw@!psuB!f#Xő~8ch^Y[Y&G2RĤ<`,wyi-FP&GȇX$v畉eHt\YF~$!B2m0-0]A;)VFnZq43cpɫ$ VyӠbPA&lcrĈKҦPN+B!1 <­$/AG?4~ ۵E"rAK$|Mlf|͝ᾱx׎`d`1J6hP~D)h]"bB(b, B|]$Ϻ!\a^٘}n}<e)~CPJP8]W:XuGqI\B' )oeꆙӨn|_Xşᾱ\ϗaY(1M.7.$zBntGG#7p˟ә:ԋ\ȵ084*S ba!<EX}E.P}1{ov8]K$E8Z;_yow;}Wp%G*'-e?t4j:L,qmSH&$>AyXVDV2n1"42F`Xj#D6l![Sy:yo]#"yH57G.zR W@&h4MlTrOh9j/h."$U# lö^-//X:y)JmA]~9c W:~Ip4;mr/ ߋ{GwK$Ucr08(D|x,bEĉt0+Ʋ=qVA4H^cH"^t9-ى%!ah~=w( cD Ote\yܻnr r~Gy7ZX}%U!')ߢ*T}:1S\{b#6 "jFsYG!OF88׽RHXUrݏP6O%x>>#]j> F!(>e|y <i !j$!j|Rle-֞_ WeEVɅx_[MсS8s# <>jU 33`H,2, mVA)Уˍcܵ CZF@s?O|EP%Tԍ|^iJfm3Y%zԟO-*wO =jc ֬:z̙"[~Awc¯J02Ʒhײ CnG P0*fPeGqd-֞W+B%j|@>c@u!x 2c#MRi߄Ljf a[o Vl؊/ᥢ&4yeq,sb܀ ={7i_P5Cy`;{$Q#U2Fē翇 Le*{]Ra .A&Z38ߋBp:wW -{P|v܎V4J zQ>stot(x>ه}=[ӌfov`!GD(ލ'Z Eě;Б rͬ,C:{9b_ a'X.3#,F/%[SA1əe &a>J{Gc*i0[,KktagdX'K$x^z#`D]UD0,KȞk9+rϟyY#|8z<+1FDxzaK1C(ĤFl .|gik<ȓR>y >Ab61C35Z%*ų]~>u+ZG,0`/?ա:ЊY}C|"^<zNPIc"[ x*`,lFAХX:.Y_%d$CM/gqu=y7:q׫x55h>WqUSۈ" b/ Dv:rgp.GlGҮT؉%byWgND0([cf!H$baY<58yy<>U1"uXWE[œe("_vbgI:$5 O~L4XAba( 8:ƶը=g~"%xu鹤E(`6,:]Vl~1"bsxKMqwPCџvwzs2#R4üg X*r;a!``I5{- NFuh6@zRKtT 11xG>ćm;&_BqD8yL8P#5{=| ()yϟD~WƊ;6J®ry=8NtsD4^D2bh-Eԛ8ޅ]j꘤š m<;Сy(zԧ ZN6'b&(251~D^xB .aky6`CY}]e[u@h~7qS@؋2scʈp83UlbzK[u@v?טTú=@ϡ1e=qW=M78"^,yxA(;":xt@I:a]y=3S4 f(K@{㮣8Zk[EաΛK.rKԄ ǸE2PڳJA];d֕_mض*a80Fďzljzo) 7N<]jbdUꓥXd+"A<߲٬dA;W|cŪ% dLLQδ@Jh p*4"9%`q=kKrӂD'L؏6yQh^93 ʿǿ?/{h)%/"x<rI':Б;KP]E݋{78Z2š~ dFqI)WbcI40_܎!@D!:aT%v#%V3HJ ,rEL FM.F͓$v-bI?7vrqNL ükoYg%r m)hhy?wbgV!s). gaÏHZ&H ][x`%ce(݈8.-K bojeS~#r{;Y"gn`d`C/Im\/k8]֢جyF3" _-j5ghMѬ7K K c]Ĥ_џ/F7csLPXP{THIT:k))@J"FsGt?O9}˿T ȹ֟an1]fq;^/3x+FYr1/`ZtT7ίCWdyRg8m)ѠxyxeXz OS4̔4t/ew*D@d?W:Ӂ?/b1@C9ñzei@Z,d٠cfgfh{ dGUp((0 Ln?kbM㍖+OPN46Qpbf&GOMAD[Vތ)yHQ_[`ә=RG wӆ6l6,`)JCjfEv8Zv`FN+ZǸ9k"-A ~$N{Dw ȓnA]ˬAf(blVIb& bF<y/h}/Ye!qVBi| = pX  VpXz4J"JzxaBk3ȱ~O i{̀$dnBҾX} UgkZ@21Stދ5K04N W50X$b? L Q懿(`^)fNFy*Pށù rIV7 g1zv+~wAyR`{m[RoA򁠅XٷKQ"?bY>KGbh::. (gB"6c*T* el05\˩EmI<֡ΫUr2Me Kw2!(Tӑ04im'/V< )[7!Gmh/ڋPW1L'0m"B鸇-I?]E#m )Te te3 !B!N!Bg41E !B`Z2L%SEbarl ?;?%Ӷ'G e& !B!b:*sc?!B!YPB!B(b!B!(mARB!BTDLq !B!JB!BC!B!F!_N B!B!B(b!B!"B!B(b!B!1B!BC!B!1B!B!B!B!B!BE !B!PB!BE !B!"h ~'B!LE8f%XC {ؘbV!Bl_(b,'0HwS0lB/ L0> .klj6HL󊾞BQ9.9= bg/Q!V1wkbgӭ2Pd @tYeFIz|_~V1QxO>F c8Wƕ"zay uƢ SvJ,0 rVgvx@e132g(b2B$jX Rİ+\İ  FcKa1@$f TE )1B"b}Z0}"Ɓ&Ι)` 6 O1)`R1Ī>C!c U**"Nu|3ش"bR12Ċ>UC!cA,&^"^eS3@0@$ lv"bN@x6ZD:rm gزQX/`1JJJvrF]#D L _m[N  ;\+=l1Nq`y EL"'-2^Ĩ &AƬ #D 4%bqJ?}l1v8N`oEMEL Ft;Ρ%`1L+)gF㔎8 `XdG1 @̓R!dSNd&"F.dU o.籶AnMҬ168ȆX//%_i1K(;OM2*bDGoe+ep7HՍF{?ɐr'f+On>xbS(hLj=,oK☢r] Y"&0L^s3J4k,#:.}// ELfwį " xKzP O&Қ∙wN@̚J?mBG,M?'9$p(rb "F28N8)1s} Dנ{D܃@rq nBA!vDFt)g8 iBD7r1#Fǥػ+g6cKm)F{lx\h>I0m!' 1M3#F%|&xRdIDATYv8<.obrz7 %ӰwKB! O$&X1&RԘ7+\gAo}#/#=/\>(_ij30,`(*|5M$QKCdx)6P&iGHȴ1xV-s6VK2"rDgM]*츌8NiV{|@&gev7bZ;ϼ2G 12&gHω ItӧM&3>{͂P8~d؏H6w-<3ވ-DEx7K| _fҏ;Ħ>>n=hٟ_crqŋ_>k)9)_YbD:X7! f1E]NW&Dr:( ,p R[-t3]S E E S)UX1#mP'fB'a,'U61AJ1CM(b4g!5XoX@u}>tႪz]+vtvuaC}}f;o7:שQ=1צzL)P{]Oo/^ػ7|mu5jkSnMy]k{;vt|R=~:-K:-S^vKuIH{gV߬FwvzܬFwvKBCOooH3:-ܸy3:5~!u߿ySuCCc]WZS^gG)j;֥$D"Nui~]B!BŊnmu"mEyPw :eYu9ujB! j`Ѕ Zufײ^uK}ӓ뽸1q={1r{$o2rQ9fnsc[aN g(ڍ >L\L 33ſ3u\٤=1 F]E !Qv+LDk wV.qdިGh*pMǷ~BՆ7 u@>{T±~uRP} o3cCXmۦ\#ۖ@v%Z" [/#5;>-7v.Mݷ_Od@O9YL0}0^Mceg621 oԣ@ߪ )ؐo<_~C.-Enn\UŒJ;ת!{f7=h7-O6zZm(-5g}\d T bh/;MOhjϚG^oe@߅-Y&zӶt[N6tB;;33>Ӫ#gdA1띾>G:u6dLi7m* vX]b{;i7-]Y=T}f[fee3`}1c!^|2z;u$ ~.hv D})+BMMm0Q߭-I: b͙v[sm>l3q [Ϋ]E1VUv %Gh>J[/Ž&S}kU2hqG;t=z#rgGnΫ]G,/I/(NIשx]Z+**bh~20 `]K?7JsPrW',[ץZhM2nSEn;hC8z9bt7L !p4?];j>z߲@ҙ 8e?u y_,U_M)/.ȾD>a0#Ɠ44oY ˈ7P\om<#zа!x_ n}$z]|;i˔Sz#;lEn^WzhKhK @TƷ~~GGlWze޺E9nJqp4*;`C1Q8@%";ܡ i7g0V.bf:>\]ʖ0D^+{ LMO}=XwD>"d ;iyoҔf =GqnhnKoptЗl:'w|<&94w7oɮH^ֲǵ}PCQ,s/QpYnfT]5vنSei7gK28~p%4H/Ѱ!k# 0aO}{c Ԭ/*bYU4lz?2#fO9{Ϋ]Gq, @u##˔Gwei0:crec7vؠ>6=bl,#6}6mkD_Q44i,M&`r:-څ: ) B N*yz-/Ed$ͻ3n 0;[ߜx}Z쨗Xi;ȎI}vB}f+ "o{«f}K0}#4lz(:Ŭ41 G, r fqy˘, q|V$8OQ.O~Mr]m"ܟ@$;ZkDp s h7ڍvsfRkGlQ imDY䁾<,"Зg%S1^| -.Fӏ=ش3KcxdSK: c@vԙܲ)ͤ8YS;@G:-[kO]$/ߩ_Bӊ;:)LΌ/+~L*Fdsh:O?`ݬ[Oo/nܼ99ژ`bsrΡx)I8ꭇ5P4IkG6ԫl3hBߜXX\(߳}xP>zg{I}QIaC(.=ߋ,oY|-FæTƿ}~?OxΈ1]Ĩi ףNxCZ-[+kW- ڍvUp)~o*b4rz1`@_Nz]an`ӥnQniM;l׽}ӵ[ JƴMmʄvxl#նcELݢl?rѴ9˥w']9*ܲG.՞msSw,rOnzf)}GjA$,/Oo [}]k=b>ܨcGĴM-|l;Ie3gD&pe"X8X1iv*(7[LG[(k>]ƜR:o hjBr~9_RNbv{aޘsrSt[s_ vdfFhz}-iQ[CO͙v3*П81;:vE [ag_l'bE+3 o V/7|cݢZ]tvuaC}diJ-:21֋+R XPa`vӯnZD ͨr~96,7SWœ_^9mvR\T/NJf2nvٙj7E#\⮬-U|p;bIl!bp|z9;wzюP=^t^R"BӉ1w.uTy^.DkN$)@ NQnZE -[Wy>3,jG#DR;=}."Af$j7#4ݕՃ/.47u>ĬOEY"K^d1zve;#;NB('1G. bpNpv(Ɉ'kݜLOo/nܼ ʞmz086cC1-ʅgn]`7BnٙL\KoK\|,F,+([msfPsKWB+5髆.\`۴M,ဎ#DZqcr+E3IE#}ew32Qȑߎukwiu2D3R6 Q1qI/[£S{"Hu###xoR|@U9,;zPKfG@~]mή9B(bq8e,r;}}紶kմuamuu3Zm ή6P^guIKʹ^'O_XP*EZۣWTTή.ujr?L]m*ʺ$!"FyfU^gW焺#ӕT!"L؟u#FG^'CC1׉kSb[QQ/g]N~"1ʛ܎7::>'%!Bq!ĎlM&P# ӹVTTN64>I~:k_ceXhޯzCElZϘfueug~y ؿl8 |磿-,i4Xd9 ">-gL^R[XU5uJeH6Ȫ ZX474>~e$l.[&|3/mg> -3/K;-,n4>~8yL4_Z ȯ ;{^͖Χ#;haܲ\i< hb}&`u /M_Y4=Jpq 7~ hap x2"d>﫳fMAzL;JnPVC3R {*`3 i_@x2"Ih|j'Qp>j+yM^S-lc#|7;<0cAȻ0ML%p)@.!&\ B}/KA@7G#_cE뉙h⶯q2;nqh*֞{`(C~ce3X=uE7]~^_t{VfI\4^,̅1UϨ^%lE=T8a>|;ie_0q"|[ #գѬp靌ʢZk&|d*jO K&'LQIe6DhO}AEgiYIXW_N?T:^E{7Fj/o7Lv4ܣh}K]|]egA^pQ uG rruz;$3ivǢلnG=ַlvh\rAϘdv[M]?7F2&671?s0M[6Z?6ʲњIf5Vc> 8~t]3yhڕ=?[.d~UC'7jW=Fk׏MLzcBfG\v^8S~e߯9,ۗ0ѷ<4:p|y z+Ay,'ٻh8C黹!Q9*T &z7F2dëSߣ[N䪭]tn;qP=Vu7h6XٗV/Nτ2tWOxQ hOS6{.ʛ84փnM7pLmno.l> Q)J«Q)J«Q)J#:eW%PP*y2>7P*yh0je7yCJP)J ̻c"vP*yhg*WCcIX$:^%c OPP*yP:|QSx<`TJUΏ*W#43mI^B|&TNUCOdT)J!qfGRx<j«o:hMU^%`uAgZ*y_2ORx<ƃ%:W#ALe GBi NUB7«gSx<|td gLU ^%<|ѢRxü~JeWנRx<-*W?]P)J>c1Rx<#r *W9*᩠Rx<<}cT s^%0Rx<{sT [oJUp ~+^%*WJUp T #o «D*y8Q)Jo«{o/Vo+ i>G@; @reÿC!1] U~9| c9ޘ}mdqe…NSv<NRx<*WkDnB *9;NU;JU;JU ;JU كqq«qQ)J05*Wc9*Wc9*Wc=*Wc=*WcsJU\Rx2), g1օھs͙ys 9~G"s Q]yx`"z Dt{.~~i":`nYlTN@󃗉<˛4K>(RXկVʵ r G" 8s:Qm თG' yCF@G]"? EZz{UѵX>778Eڽ8-_8_, ~- ӧc =¿Xn"`, ߏ=z. e?ޕw:¶Ż2G a()лPd 9 >4 _PĿ _X0ԆAF@3OD_hl/8Ay 0OG rnC16[!{1+1`bwhWSu6ޜ8>[. yMMV^aJ}2]nLIi;>&[zۉK]M\%q ~/:ød<+0.oD@AGCykMϲ62p>^3z^ۤk. 1Z>ލxR&k.̒k4$L_Wݡ_vU?ECMkIENDB`starpu_gflops_non_linear_memset_regression_based_energy.png000066400000000000000000000154561507764646700353670ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/imagesPNG  IHDR,8PLTE@Ai @0`@ԥ**@333MMMfff22U݂d"".Wp͇PErz挽k ܠ ݠݐP@Uk/@@``@@`pͷ|@ ???___;;;s/ pHYs+IDATx (ew0hPTxz $ U!Kml\n*Q'MA'O]"$ .5Ry&`^N[aY*k_$Aʶ*pB >.`+,]=Xrw51{Y ! gg) ܕg)0-Ȼ6Uf hDžEwT. 7Ewoj_ILB;NNi&}[w'@[iigqTeeP5RlV (kByɻw]wր0!FzyNF vÞss-r7Mü hZ@B sdhU 9ȈヘFki/^u=M,E]]8SZHͿG;o;r?ʪCLs%m2a 6HaIT$MNJ^~\K3BsWr$:!Bl|$`~/ PY4M y=Lz,wNZay++t͡P;Æo6R6Ijf;0&5EU+-YQݧ ; m jiި?I޾vrUIOvjjW@{) S'ihRE횀P^TyJb>] hW -4w{8*r;;Cd]hgSP'[Iaخc'/ehZU&ܩd2:c2[  enO y~2u;Z`u1QU[,\׮v"$?r(D7}ӷ;׀t:}z 8Mgg(S+Z\^zjS[i/~rWӥhx94)4Ci='1ڸ^Y$}B  nMh  wamY})={Ct䝫WJ62 [Nrfq)ϟ=գ4)wtޢŽčJb2/17[|oi'e>QΔksc;T[iїi+7ba_'1AJ]" E# $C=ѕiTN_?p)dbnJH \8AH H $7-}^}B,>Q<[Je[\tgNs/DW[ZԪ&9{n )C7jq;&VӐu?n<9>eѯw>feOZn_-0ZE?oKO/5c2Gf]LtY|ԉ.lv0dCS +rڕ=juYU/[bYF˔ڀɱj qi!qcZݧ'-銜4HS.1D٥@t)͜Cj [6i>~v"j]n,Y.&h 0pX M3Y{Ϥ`pkNl^},g֎9ʞveun)2%T@[}#p >jp/\@@u!e.[veOt:ꉀNxәͼbP5`=C ?Oƽ`nH^U@풁2e *L aѱ ,,tjuk@j@ h~ъ*veN¬jzMhpy.lOuevlV@=#m4wQnG+rڕ=9-23Vl՚[q7E,lP'0Do~B6p'w68|C;!K<_Bc Xq4<_Bc goaέN=ӱ&K\7+wӱ&K0+27n1kӱsgOǚ,)tXLǚ,)tXLǒ;55n, c#MǚW;LR)0#֟w%>/3pn+=cekӱn;}XHHӱx &ӱF5^e:CyLz4X“A@H BR /p;!  $!)IA@H BRCB A@H BR $!)IA@H BRoc{[-Ȋÿ#߇wG\_WKH>[fw1&eu/]D7!2}%^ D0ѼvMw0uXu|-8p`[xoicrY$>Ezu20J^52Eyt;gN}́A׷^!sBN@pkČ9A}#b9҈!UgC/<xDF(C6k[&uIE}OVK_>F|*ƿA7["}} ֿ1JBoo!n >e臀d? ɗ2`@|-Uɞb M& l(a7)~=@|=@|z >0a > ` >]0߾lx|Ω.7_w D'AhA1$; D >2m >=T˭'H#gTНA{oߐ=hn<Aa#Me80 }7|? |9< MJ_\8N:|ҕ$މ8r?sR?oʈqK/e˟pY4E L\:u 59wDRTYBWOZk< Q4hf]"*kt\}㍞Ш9{8O~TmU\ 0w\=Wwo^Z9q]8ퟋ3zNVoѷM!*VF6m";pk+\4 hw tǝl&n(+Ag0 5Oq@4.!Q̭B\dI@ 6YnA?0=@v_Fu,۱>8|9[A|y PiY㊀K/X ^pb>_dc/)٘/V 0 TZq L `F `2P PTlP1gvBėP"BSR¢>4<Ϗ~&$\+Qns &X@Y "`<5Mpm#HP-BK'g[!ژU_p{E:]K + { HhaPv `< > ;!~" #,*;c[Y\sP0 qW!׀E[FoǮ%"A!z3vP DaͿ.BlH#V\)M(}3A~e6muq/D(&5\D7O#` % k ,߇"a!V8E|?; !XsĴmDXC8C8cp-7!|q" 0RQ>~ؿ0ruGԓ6P  pJd}lA)/&#OOz XEъ6+}7# v2ׁ3+qE>9 pJj}c]#|m@ ѿT\}4 uI ( DE5EAg\mN*~q99"4E 8_MIW6f~q99!FpP/c_ {i~ 50@@~mA@TK % x^.XJ+}̶Õ[Atxn ='&~L*e4(r.L>k@lPQ50n0L4<˱_ O IVxa/ 0 s0a 9_ x ) `(x @ _( D' `|oFA= `lowvqA `To/ F1 `Lp7F `<LF# `44""A0 |+Q0OBR $!)0 $!)IA@H BR $!)IA@H BR $!)< 8H BR $!)IA@H BR $!)IA@H BR $!)IA@H BR $%E)zGY! (bPE3p%œX"mj'B x$B#dCao-aN$j}kk6M#*l[v ݳ|X?p)dbnJH \8A07F{1d3qC}hŨ O o޳S9N0kbEY"PMVdv־Q{wPXbZv'zG7٩$wWU9܊\n"FtE~ ^Z G =;ՍG)kճ҃ycfg`4ūU3I^p;rގ,~ȳp=]Q9V{|ySv^YRo"3vdD96._YPh~? he覓3ZlEfgi+>\1 V- _[1q ౠ腨 otpBT gEwuBfA;!A:!=m|U%_7= ^Z E?OYҳ";oM¢·<;yx}A7kgYMZ؎ތf wuhg?/t'sTy!)ZCTBMr-E_Ǚy 91Dzu 9w]' r-|Ser{iERF>isy'a@QB 6dˤ_1+!39qQ^"<)~H_5E[A@RͼR:2W vV`%W-`.n]O}r7].dLS0c֘Հҍ^ CvɘyYx.k@YiqJ5JnX]LM`Q/Xlա5+qÆDևVԦ+3[W{ydwU6"BZ(իlIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starpu_log_arr.png000066400000000000000000000371301507764646700250450ustar00rootroot00000000000000PNG  IHDR V &iCCPiccHgPY<@BPC*%Z(ҫ@PEl+4EE\"kE t,ʺqQAYp?{ossp e{bRɎ(tջ{i常r)teJOYLgWX\2XyKο,]~ )sT8بlOrTzV $G&D~SGfDnr&AltL:5204_gK!FgE_zs zt@WOm|:3z @(U t08|A $`(E`8@-hM<.L@ށA2@F 7 Bh( ʀrPT UAuP t݄84 }aXև0v}p4 ^O6< "@]p$BV)GVC!Bd h(&JerFTVT1 uՁECDh2Z@Ёht]nDѓw aa0Θ Lf3sӆL`X VkaӱJI%vG)p`\.Wk] p xq:o—;IA"X| q B+aH$͉^XvbqD%iRi/82! L ے&US{1O,BlXXؐ+ NP6Pr(3;Yq8WJ)Hq"HJ IKIJGJJIKa8y"Ֆ͒="{MvV.g)Ǘ+;-Hז,L_~NAQI!ER¬"CV1NLMZ)VL $L`V0{"eyeg :JJU*[5JLGU殖֢HVQ?ާ>حѩ1͒fX9֘&YF3U^FuX6m]}G1Չ93 |UҪU$]nnCM/OS~~>&   .y݆i񍪍&v\mu:ƑGLMv|253Θ՘lOv19|y-Եl^Zä́UUКij}ZhlfSoV6¶vʼn伲3صs-[{'BCSGhGfhgWΣ<lqu%V>svu.֪MZ26,b&*4r**4j:*@LMLyl,7*us\m|GD\bh$jR|Robrv`NJA0"`H*hL֧uӗ? ͌]֙ՙdKd'eo޴gTcOQ{rswol m ڳMu[NO [A^i۝;OrR V (m? Yrˆ[EEE[?Xި%%Ga%oDDiNe̲²7Yn\^{p(㐰­Rr_bULp]u[|͞iU-x4:zccǞ77Q'z̚KZ!'lsWnk]8q/v=s}ٚvZ{aԱC) _}ABEKr.]N<{%DƞWzuW8}nX8[[Mowf[@;]wv8d3tyoy02*|`a׏2-<>+|"ߵ~o /ۏ?yx??'󟓟O)M5MMqb݋ɗ)/f 櫳/ M^̛oy=}na>|ZZ.V|R?B,sMT cHRMz&u0`:pQ<RPLTEԔԕՕՔeu]ԔԕՔ0Eԕ֕ՔԕՔԔԕՕՔԕՕՔԔԔՔԔԔԔՔԕՔԔՔՔԔԔՔԕ֔ԔԔԕՔԔՕՔՕՔՔԔԔՔՔԔԔԔԔԔԕ֔ԕՕՔԔԔՔԔԕՔՕ֕ՔՕՔՕՔԕՔՕՕՔՔԕՕՕՕՔԕՔՔԔՕ֔ԔԔԕՕՖؔՕ֕Քԕ֔ՔԔԕՔՖהԕՔԔԔԕՔՔԕՔԔՕ֔ԔՔԕՕՔՕՔՔՔՕՕ՘ڔՕՕՔՕՕ-SwlGfՕՕՔՕ֔ԕm;U^\tRNS3UfD"w[uھD3iٙu"fzNǯPwU\پ[L?p0~ @PжQnbKGDH pHYsqFtIME  |0IDATx흍y A.$VuZK \{FEd9sS}تu[WqʪԜֵq+iݴM~a?!p@`03}8|̼ #-D9qAN$1S9xmk,ɮ#ARɧv9ERmGQ}pO{ IvzF?{RQNrZg8_@Dѫh;_Ύ ԻlӚ @SN6Eڶs]C];0]dlWݿ,ȿE쓟,QFK^ UG{0oܥh?o[5ӗ?vK%@JئR9K8ӈj~OZ_z 1_pOɝŹ)/Q^ckޱ_ Eգ]q6jO1{hbp[;e݀`+v/ -^"'$i_ՐH+{[/lľ< yG pK:*iiqP m]tZf:z&"ʽeEO~JX3Vȟ4ݣYD=rCcO#J~{[qher+P"yJly5q=jtKs{۶{= ZDNj wV[!w@[6Hm#,X 4&FC8BJ~L%rBPC~u4rN"hݝ_{5ŁQ{qrJUT+ I=yopk8{,y wqU%&g퉙#F zns;R8:AݻՠBؤ>=cz#4aKbV!rO!wS`X6jRr!/|q5NP/u-b2“nrj8vgw:~D-8?O%\"rjtwS P~ӳl#rour{X e_]]Ĭ?o#ur ]Q ئ]Gg*6h$m~#~w?M#׿$􏻡#2LH[}1ْ(1)|]\פ-rN}l1pUzݧy8u"'j4?T.rDFtA9qWK9 䜜XF%/dU͐l/z!}үEAdv38@D 8,ܩt`N]foz8+?XxS׃Y=tt0+?X][v"*w%8X{<z{?  ?U<e;vwtK7(I!Ԛ-'y&ή qp)OaH^T!(#o/#Sn<zȎn\o?zC黳3f;|({f8Iddl{&ή qp)OaH^T!(#o۽9Gj rO.U%[!׸4s'U%}nE3ѝoZe$22r^$#BG\xSX35q)(G+Rt`޿~XUr_$ ]守brga- O>";y#k%fYZC9 ='ꕻFƌ3#.eB6T.*Ôu ßȝ#I5HX$bxAHFшD^l$A'!HdG9OCDF[QRE/)&?Z=H亊фdX$7h( |CDNP^b qLn8EE*E~ѺB7Fd[+$V4ֻ-#"r'[BAHN9XTحJ7r玩\irrר%aE~$hD"/6JM~кGh$ӯW()qkx.9`Cɘ:G[4,(dHFRn9$QlfJ~b5KڒSD;G"zqȑ5k>n=7nxG"AHFшDnl$G>)آFBFh%&N$]6큭4FQ4*RN$ 9#|թu t ZF2J>"~wԓ~ZX(LOc;Ռ:d,6ݼ2|/0iHF6%++J6_~cyr_ɨ[B!('&p'>||h*~TR᫆iUbPML]0gHb^g#v?k\9>'>u|E0's̓/>9bĜ4\ͦ닓n8V5_A㘘ӟ15-@yfg^4w=>Mr>D^ƀAc0M|W;h ߹~;h]^ `<د>k[krMҗ$;;8Zi3њ;;A>%]ՅO$A1?z)SfW>w!)L;E@&?ނy ؕn|3.t Z3ze|jԐ;3a n*r:owj |;+r5e:cFA=YX양+8ňл⶝Tԋy nApLwz^Ur Ր4jےDKڗS88^rW+wޚDA]/ppXg3NW5obHT=٬ڗR88.2Fgtv{վAñ/Rc)we>( |svfg3m)wI%/cLUZsUcrWW0f+YNgLR"ު}P8wlhR+]q o&iڋ_xÓ{Oy `|͞,ũjk-̀L *$#_} "c:3ii%;05.4 n0h Ma_+:oTi~O_}*:oT_5^ lwP=~·-wz8_{3׋zr4͛7OWa|pKFA aҸ!wP-a-)wtU.2 AWTC?9?7Mr wP 0V;5{7=wzӼged\ w~sկ뗆~-&p> wsi$9>j^M"vתS|1ulnv@# })^:~"!wы[ a-:@„p^*~w|1=P(n߳8  GKG;ey|}a(r{G}Sɐ;3$ҐSِ;/ wx*xnG w/. "rbK /NNO/?ε^{y]ε菝zāA)wr01A9s, wP *{#)/pkY= )NɶmKNK.pǸ]LQ* 5I-/poP(ID޳eI-pbƻ+(Kqoܗ]8ˤ+}Pz(32JS*TJTi>&~"3Ml*Qlpu@pZ씁Nɭ v+v @XU4]mwi{is5XGيSV-L&`4]Քh0caX KrqVi]-w(:r;F$(J.;(bČL}k~2kz8Z.r22+ӺVxʬ;Tp_axLEb'CA_ZNNm117n?8ZJT$;`b['#^=T \x|%z''oWN^]56[0'͖/)CMh>2t>Tru1]9uD&Q8؂cd_NyG3M8՟6`jfwhȻE;X]V wi>֕s8V+dla3ZWL;?1Fxd| w`drwe r k , V`t9 SxY3 )da}cQ(w_դGAͫZr "QgTc3[TKFݺC:kƨ@ǚ5b: w1j!w1Z6feZyyC+~، _4ߛ[k ghr?xƯKr?l&Jo긁$aco}EC7wD=EQ=6a,xWb8&۶-Iv;pU]QTi@Ih~/wM_1S "ln~ tU%Z oU8Xy}¡ZE̒6NT TBC8D3w U"dCW:[Vl'ZrJS~$vZޡ,m;4*waBJ4b&̶Ux(\{>(iy?Cd2kaj *]).]Aރ2]^ "h]QcIǕ{O9{Qc)NjZ>,ٓW¥p2t7#n)/mY6HwM{̎HAxT&$س_^01ȝ5"R>rRWq<*)awqzǠ/Š!yDF5xW0U\ H{d/^|^' {hYRm_8^3Kŝ_Lf5S$Z,So`L:SXzAjcXscDyaLsz0w,v + kမW@pmpjS`ȽH>^Ђ9`Q\[+vWЦ  TA"~y̻3S0fE˻Uroa. {@ܛw *1u{s&XVVaJp8LSSOTqP}[ST{`@zr#d6¦8Rc rOJukDVGW0c3 or/zA=?7gf:Uƕ=GLe8{DkLaTdۚ=A C!55aoUqN !X$r^| cKꖹGOa|X\;}iEtg{>fn/2}0xga_{TA~E*|Fb W^#F3} G 3҂wg_Jov` *3gJ]ZWuw܆] B7 r13MYӯ곏Ӵ~dI3XKMȾ53,"SܣfØa2EGc&3GX3 )ȝ7XUS(`& LAp7f:^,~b~Ȗ)S ܻo<?xAxiXdձ@q@\xmBn;b/# `hkE%lw==p8uhE2hT0ѸׇӘupq3rQ}׎݀2QtM$45Eb賋A;XX%4GI_wa/o=/|8 {.2z<>3ֹ3CoP8GKҺӀ0K; ]0haw4°YDL Gӷ9Qa=;ÿS˞n#j5rD{ ;=9zs?4*Y{Ug{wK0=ޠg6ܭ*NwiżJh_UnHJ 2wcz6;w*0~'yv~C ?Ou/ovqy8m%GS^Tv1pH f幷vo_)O#ӴxUyjI~Fosz{n5h<'_sQq=3ʊK쫥p\&7iD~YNĜ|K~2񘬇DEq=3%r%l[(;f,GnwSjx3h̚Xغ-H2[fq&B zܝ=:R]O{Cܳ0MeM?еț|kƽTW[%>?6*|/#ժ@L)GiLI}fH.̘4~;bK}|4%:jyd`',OSy/V=`rvn/} KVב*˝y,;,WV2͚YRa?X,>9so" A+Equ։| ͧ6=hl{+w?\g{,Y]CQt)sFn7}G2s|V zRi7 c3ˈ$[l\a;e}ba>$p+a9ڼZڂ1TZSxV8b%F1wǢ ||ovϸ+TZIlKSF J}d%m)mUtMkyoOTZr7r-!c2/_܅,o8ևt4Cu{f|Drk0avo<{|ݟǯ|>— 9>/*/w!9䩋DgyD@ɉ`]s3 ??ccTT~!5}X233 hDߢ&dʐIذ]HtDdNq"Bg@QOhՕ ÂAZ}1cX5Fmv5 a%Q'=fSk=y"zg;HQ۶$ml))J{(]}%ḛ,gw@oS? ԍCmWN/pڻ+`-1FiL')1Tɥ'j$+ǘUm\g'G\,ł _"'fPer\&Xvoo@`%mm_m. Ro؊}z.`)؉}rݨo`&. ^} NrN1"D.\}ceA2bB؅tU,oӭAnwU,` ؉Z  w+/UNJ;8 j;}-@;8 wp@@rC `@r;8 wp@@QcZ%UK.[Vƭ^I7( WZakeenm˓{ۖ$< o"7;k]ǥŏUiΚqirٲNbK;ry"!8vwtK7GU ݉&5[O"#c+wܓO!#.elN@(dK#F>(ӎ!wPyhCJbs/Ó{K 4EI֕C;Ŷ;KC6mejuȂERrxrv_$ :Ǿ[Z84yGLb֐Ir3pȊPQzmO2hCF~&Qi΄|\ѦX;rrC+ntI58M# l](MN#N+h0ApJ18*J4֑hkNesNh e\)q1f"p=[ GӐKN:s[I}UP5\wtOʭn(qlW|4D˞Nr4hB>h g3д)8";IH1И ݄5pACis[N/#H{;Jjvn?/%tEXtdate:create2021-09-28T10:23:06+02:00Ub%tEXtdate:modify2021-09-28T10:23:06+02:006!tEXtps:HiResBoundingBox360x252+50+50.vtEXtps:LevelAdobe-2.0 EPSF-2.0 ZIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starpu_log_list.png000066400000000000000000000341461507764646700252400ustar00rootroot00000000000000PNG  IHDR V &iCCPiccHgPY<@BPC*%Z(ҫ@PEl+4EE\"kE t,ʺqQAYp?{ossp e{bRɎ(tջ{i常r)teJOYLgWX\2XyKο,]~ )sT8بlOrTzV $G&D~SGfDnr&AltL:5204_gK!FgE_zs zt@WOm|:3z @(U t08|A $`(E`8@-hM<.L@ށA2@F 7 Bh( ʀrPT UAuP t݄84 }aXև0v}p4 ^O6< "@]p$BV)GVC!Bd h(&JerFTVT1 uՁECDh2Z@Ёht]nDѓw aa0Θ Lf3sӆL`X VkaӱJI%vG)p`\.Wk] p xq:o—;IA"X| q B+aH$͉^XvbqD%iRi/82! L ے&US{1O,BlXXؐ+ NP6Pr(3;Yq8WJ)Hq"HJ IKIJGJJIKa8y"Ֆ͒="{MvV.g)Ǘ+;-Hז,L_~NAQI!ER¬"CV1NLMZ)VL $L`V0{"eyeg :JJU*[5JLGU殖֢HVQ?ާ>حѩ1͒fX9֘&YF3U^FuX6m]}G1Չ93 |UҪU$]nnCM/OS~~>&   .y݆i񍪍&v\mu:ƑGLMv|253Θ՘lOv19|y-Եl^Zä́UUКij}ZhlfSoV6¶vʼn伲3صs-[{'BCSGhGfhgWΣ<lqu%V>svu.֪MZ26,b&*4r**4j:*@LMLyl,7*us\m|GD\bh$jR|Robrv`NJA0"`H*hL֧uӗ? ͌]֙ՙdKd'eo޴gTcOQ{rswol m ڳMu[NO [A^i۝;OrR V (m? Yrˆ[EEE[?Xި%%Ga%oDDiNe̲²7Yn\^{p(㐰­Rr_bULp]u[|͞iU-x4:zccǞ77Q'z̚KZ!'lsWnk]8q/v=s}ٚvZ{aԱC) _}ABEKr.]N<{%DƞWzuW8}nX8[[Mowf[@;]wv8d3tyoy02*|`a׏2-<>+|"ߵ~o /ۏ?yx??'󟓟O)M5MMqb݋ɗ)/f 櫳/ M^̛oy=}na>|ZZ.V|R?B,sMT cHRMz&u0`:pQ<PLTEԔԕՕՔԕՕՔ0E<VhvNp-yV{ՔԕՔԔԕՕՕeGeԔԔԔԔՕ֔ՔՔՊƔԔԕՔՔԔԕ֔ԔԔՔՔԔԔՔԔԔԔԔՕ֔Օ֔ԕՔԔԕՔԔՕՕՔՔՕՔՕՕՔՕՕՔԕՕՔԔԕ֕ՕՕՔՔՔԔԔԔՕ֔ՕՕ֔ՔԔԔԕՔԔՕՕ֔ԔՕՔԔԔԕՔԔԔԔՔԔԔՔԕՕՔՕՕՕՔԔԘڕaX~ՔԕՔ}tRNS3UfD"w[uھߟDPu3fݟup\"U?wĪiNۻ@ۿ~妇[P a )bKGDH pHYsqFtIME  KOl+IDATx흋u$ڶ]EwZkIeې ʮ6e]ֵ*&۴(#uXJTy֎mn:mq @AOZ`f$Ȳ—%6UxR-}}p2A;lѕ]Z?Rj_n 9ޚ!Sulw_6LZL=w7NrPz5G$TK]W7Z=U7#Uep9qNɟ?wc(匍6QzE,$'~ CrwjcңtU] UVPe j(}hOPF6z{BwC]}ʏ6צ=Mkݍވ%aEu$5#-҂FCa[bn/r_p2Lʒb;]ګl06T UP= ɬXmnp-קҡEk +>9b'w;zf"o]qѝ9( opuhè}ޜu g;_;3A)| 0:6\];3AqZa?HEꗎYT+֥˝yDoUM[MEu1:ZDv*##{V4Wgv[6s QO{P&Үcյ܃|IuŢ!}_ b )u)ZwJ[A .i J;uPȕGz3~?pw*wOJV7ZAw+ dԂq *0rp7nʝw*ևnXE(pY=1t\ã,N %"L(+ɻ Wrwuk|w+>/{xjb) 9'1,"(eR;qh4;Ooӟx'?GeB{{; Vr+Rps_Vq Fk]2۞55ȽNՋ9}RO |94=2Q4Um#sehhu r]|8A?* 'MtbRUwܵ!WC9=Jdoo␧bx~A;R$w:6,%R>o0aFtE츜$aH=o; }n?ǟ@]&hpTzm @񆮇҃mK?t}=*=XٶCQʶ(xpTzm)j8pe28~3 sK֞xd|F6߾q#~\>fu6oy.QY܍Էõ_̈́չ:UpkuMgR??~/ gO|Omk2Iq#\oH߸_Xx>y'vt:f<\j,yFZJuүwOu|juf]*:ަ~,&5SW-Co)G6u!k/nm9}C/l$vt:fnVǐSR~{S7VG]*:ަ~,)_Qʶep{SN|%w]zTzminkJ?,̳G-}C/H{qUgӌ_%=r߬TGS=_LX=ro ӊUpkuM5jNrG+ۖ>'¨h~7S"oK3Фr35oQBu#S#,+nGQ1?[H}4{Z$Q$#IިrZ$Cgp̞0ˬ)&}'z$RPQo3^*}'8fɝ~xӥIF,phLFьD^n$5M MxM>1GaX9&q-R0%).f,{PN$3SHA&hF"?7eM=ّg±rLHE XOgoӜ"Mz5dZ$])d)bꃔ.Q|,~++tNbY⇪h(,WLN.%"QwYz(|Sȝ܍-r zrEƎކƲ"{ z(<11KH֤VdHF2= [x c%ˊ8t}~\ IroT=-]R(ύd=lZ4CsCAa#VJs4+P?Rp\kXS(Gv^;ISG9dHF" YNQݕM.CsC~aDuyCsE"Fob8xd*)ĒuM[g2Yݰ'ZSFAN#C4L6R.vd2~xOB·`'2<,>ք!* ItՉ4;h vqȀLe 3Qޢݶ,8)XqWUYbmr bY*PG._aYmKTKrY^Q/ۖ`/+轺m˒B-rU.1ۖAi6IݼPg"(w`2@w~2c2( gTm۟yΤ]aeP RA{r$KLƲ_KKT9iۗ>pf@m,1[-Kq\r0 (B-Ϝg ( 3_~4?9eAy_%d e,M!{h,cڴF/w$' !w^ͺN zOk}ު߄AMęJwໃc,M/cwro'hWN8U'be㭅 wp3#ľ+Iq aaۏط?MYz!ܪFވRlܢ@Xrqy6/i[4&tXbq>p+W+b[ՇgCrd2^{wm h(:Df@cO.n!w,ˡGA,e w<&4߁;hSgH4{(9bvA/;h3s w޾/-Wvl\y4̏Κ&78ԛ}WkOJކNr,2|LÕɕ,@d .Ě[b#nr`ey"i1ӑVЉ;/mzDo>$LK[wܶ#r]AmpJn; #;{A=X5n##;*@L͓t ;A-" zPID k_׍eAXEPe)NskUU,8h,oڟiQ\4UIeA8F5vJtw5\k4e͝WƯ|KBY3Er;eA_Xrדиj0TԌ)60;/~$CڼJ3a|vxV}οE2$E= 4 ePL{ 9{H9o<)U S8mݫ(&{쯟We[r.=IɛKWUWP7&obVܵ}woÑ3^ow,T'w|Ùi- »9i*Dg&pfZȊp%uMVͭjL&̴m//ǷAjo9"Aü޾I0G$;zwXMM wP2~*C1qSG5Wo |qq/o AeR@Le}%bzr2/8$[<_c&P&+~ wr9h!wp8U;j7}]7ˊ♈Lw,e+NskUU^VdO^ u䲥Jn%l-w]uj%XVcO^ 7xN8(=┮.=]R6NTz /ChڹҌ,r﫮Sz=؊+ X 1n 4=rWW5бQ2xL/dP\̅9 W5,Cu4_L;C3$%'I=yadj"仳Cdd3K89if09 3)NP*([lJlG^,[o<;gnݩu,N`|m{f߽+,83 O?Mqm4b?Oj8ק.wd*{q&ΗE_B1`Hnk[x] m]{bܾ} EMqCh3@dݹ) =2"r1S '˯5#1bLHyQqxh(3q&KFLw2эd;ڦGF߽^nK'v 3J;nUkv&s=sZL[LՇSj~~]WT41S ^QgfBylK* + SV2&΂g^<2(N_QUk™ "Eu̡|w7 mщF"~" dDL}e=:41^2GfJL tzl˗/ w IcrKH+Ƚrs/`=@"MԶ};'3uY %K;Itd**u6c蚺`hݫ&U+k|yJ6ũCEץ[c&ȽJfou"WD_ZUd1Gd9Nֽe#q OEOHW™9m/E_Xud`اrGpf Udͧ WVr$)8(eKd1OY#ABDL:vx3'Nk`JVwm8]ɗ<%*^ Vl>t;%*UڥI=bX'J.IHަ)Q[@RR4٦)QO {x*.r/K[t2{CIU4 <6ݥqwL@&-j K6K7UAw cUO|T/٢Gin Lxw<~[my{POt2)N4ҹsw,ٲgDΔ;J,iۍj݋ SE^v&WgW-]fMgףD X\: tBkҮRur\qU]? gE)bY>hk]5##3:rTodbw3Ӛɔd׊ؾ/.{6,uO֝ ^7GF=)CU{T\TT#ZZ_葎,Ѧ0x6GL~'\-*q+L:/ȫeo-װ=IGB93%P94 g&7?}O4g 5;7Kn[8g͆3)r? Te'zro$vt/83dٶE ܙj66C*ᷪpf*yZr?ǦЀ Γy\ɗf_H/Ȯu/1f rWdј%r/Ƚ\XK$D`8 w,*}Y".iSr~HL;P$hԼA("A'ѐt=Vp D {C yY6 rE+k.EluP$^$P&Ì 3E tC .K̼؎BrvI VYO2 tЅP wq\,(b c53:Q|h waXir6Tp$(.F'Kf r D}S1FT8hN\e'/ĨVq:rYB.'D_QufNlӑ,u}'C)gׅi$/]s>}X/Cm?3D\gk\ +Lq3ڕ&!NwmBJ<)!ߙ#ܙTG)DΌϠreWH#zgҙ05RK~ .gn33qVLmsf.A@Ag*jE3Ӣ[Չ3~gf S *@8+`m+z߽ ^2 3#Ƚ\U-ڀ[rAVVT{i؈?Ƚ Îc W?DfB 2Sٟ>>} "3elLe |[t  o^֯䞋q=;@߁"V7eYMOl[%eᜟSB^%^ ۀg6{b ~k ,G/)=-{.FHbYb^u޸0jLӉ4zȰ|m{}:OB"߼S߻l6"3r&G4}Byk;u{mܥ=Ƕ/ȃ=B0 RT!a1ğk̉m?vSO;:>)gt M{Cw汛PzbD]^˜_KQ[KҲ^=i9 ,j,z}AI&d%RL 卧u=2;{[L}ui>˽㪪 4 b>wl{ݶmnp4T%I7uC?`oo%©axdt]h|2ws'W'(</OTGr7\nYg|< P/krMަC-G4ʜt=/3{D}wlSŦw|0|s{|lv}/h͒{|zеIݲ^iQ-4wO[_2Đ˳_ @~&%y fׯ/Zw rkpAn~ #CWg #4{M^<۬d~7.w"4NaO1N(iv:&=E{8] FK|FaRSlѡ`2w}Z#Vyw|%uYVζڸAiܥFB`7M՗/ Lv<7Syrň$F@>0&;ȁYrE3r-r-r-r-r-r-r-r-ȁNhA@E@E@E@E@E@E@E@E@E@EjWL_9hTG"ucs}P碢lzETTYDP;`rsu!wS83n#{ ]4K75Jަ'rY+BNߐKyf{v:ffu 9p-:׻:{>5}3au} n齺~JwުVf]O>5u#XS_l7nk[Q7߾q3~&nVgӌ_5QYL};\KNΞOͿnLXYXV^ԏ%j)YFb=&wN[su$rOz]CDpQu,P ]ևO@菔l h>ݞro\Rlm$(Ĕ>Q_)׆>e(VFrYBL(Zm﫤ԙ1 jCЧ8߸l%JH.+0Q>T+6xBJ|9{)ߧ8߸leJH.+0Q>T+1x"ud?7.D9b6 Lc;>OJ) ǭSD9b6 Lc*&w\[.uFdWȽvFՏh%K nMc+f#D1br7ʵtܣ6QŘĶFA ]A'~>(VFrYBL gj߭w xJo[qe& 1}b~}#%>T84Qe&4F#GgJ>WpzGzUY۶8몫z7YflDDA1] 6';:u8TIWMNSFrwlr!SE)P;|Ng(KPդ0z wv#{+יwԑTvē;æ`prﲞݬuG3(<31Tlx N'ók}r׽\F?ZFbL>WgNqi+pAIIYtݔ{?Z dԼ|<U:d)ɽu~P)wsqf}G˰]>|͟nxrWFN/er]xre$߷UV\uvŏtG9b6uehʦa 뚦*hݜsft3W9t/ rQŊyc3Ҿm7~68?; M$%tEXtdate:create2021-09-28T10:23:21+02:00׷%tEXtdate:modify2021-09-28T10:23:21+02:00s-!tEXtps:HiResBoundingBox360x252+50+50.vtEXtps:LevelAdobe-2.0 EPSF-2.0 ZIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based.png000066400000000000000000000205511507764646700325130ustar00rootroot00000000000000PNG  IHDRc)PLTE{sjfffǦwwwDDD333UUUݪ"""  UUdddUU99 /UrrU9&&U9rU99 9&&UB B rrr/LLL pHYs  ~tIME SU IDATx}8v|";6[ۛn̩l3L llF%!SW0B#F`0B< Sm8l;"*ѣZE:Ҩ3?=i w~H+7UWcƚC}n p?w#?-oóyXQ4oTWuu>pj!6Nu]<n$  鷧T @94LŸ2?nY*,6F_OA\z,\ ,N-D2usH"ؗxo8Pmo{8\X^aU;,pVwo.D7c_+ \y|SXrܐkmw6*nXXPޙs7(%l@ Sgznઝ u%B \n`q-rWipn+kf 44 f)~;/Ai[7Jto8o S^DGV{]j B?7 ,\Yl.5 /BKMo!z,tY2;yhfY{J_QۈrBAIkҠg%.3\p{vK*R EWw3ot^7;ZK =y?=ΫRbGgzJ0hHkBf>C^DO `"Fk?wL5֊\ V\+5-iu/ѳbYvdzV B8lEn=r-_>j49I %avxrVoLꩃVmzȵ*-V}h$Ng?O?8G3FՎ\+r(*qSG%E{ek2\kYє | `70^ȵmǗU]ŜwN%ݳk%^\Ef!k̃Њ25_PExajEAhĮ l#_Q< cL4ڌp!#}u80Cz H[~0Қ_F:(io^ 0nvV/Z io /Z i/>0I F6n 0dvV/#`Gh0Z:_F pNtwjw*]}~߮;5*t}~wZ᜽xNm+LZ`4ߣX09n﫡 K߮6 |l>?F= -90Ynѵp~yx%-ߣX6̡| n 3v^0ZH~-E`b0Қ_F(0z>ߎB'yOjC3=}v'F: 0z`l~ǰ80ҁ_FZ :#` ~JR"_F3}; F|0a.@o' hbk^L/IC96 ˉ `4) .#x"~MF|h\7&W,A̋_߸0ˋX[gqGSMVɴ0Hj0ZWL7 hB89F4N0 ल3Ѩǟ hq\D w&wʤg&ABw^[LB mNTN_F~2mr>@ّ|Ww">}ds`4r`tsV_0OqkT'.D6z{ FcB 6Q0ߟL4ƣ'8hY@v] o H/'`t8.n H)C]}!G'`t'q)ݤuf!6Nli)"ڏ/iX`1s?=FMי@Zcmu[eF6J%zlXN`lvRDSpO&v g"Y% ygD8?z]YalxӿBئ-̽ozz=  !_΅ @X#~5?\ޛk"cEg$홮mO<{1Du l[JRX[=#pD^h).'˜rc"Wpb-вe>4Rkxg/p~)}{J45x4XNb=p2.> _A$Ϗ5<ѓS7vK~v+a!ZjqR q;'noE㭫$:vڃ 1 ށ ˅H; B[2 7/`XpMϛ10xWL:vH/kVFo|!כR}`,/Y2wu<{v$ 8I]cׅ7z_IpAU_SWN#I~-Xnୣn3HbVs"od ;5l"SrF47+kDf`c)EI"cMl2`mZ/xu,{7<6ͩA.> &xqmʾ~ɍV~xBneCC!7*ǯzul>j.gh,~?_`rlOŷ:F[G@ h7ua,|gD{}[rH0sV.?qA4`{sB<n>~X8R8^ Cx(WU}z ˟~8,/ߕ_E^gA<Oo'I' `jp8{g? p.D$},÷]S|3y_Z÷8vL0mVG]z,Ĉg|`^~+0+Ѧ&(N qY?UoWw<7>0_sr'|{l'q `~,ĭ3`r|,l%_}'ӫdܒ=P=3B?oQq'En 5u{0"xT>*X37f~E]B0+;gP-;N)!xAU e~ Fod}ޒS +G0?0q rzJp!~{W ӓQz'2rX'=#y$ O>yv 7_\,߬\BwvL#1wJ&Q[Q&e tJc>V:c&=-[B Xa  CC |6|l WLfVK,0Z_|` 9h07Gxx^k|d{5vCg}Rvuy_anvkXN};V +F !ks>p%Bܭ z4MKpjVfաo^ l3 2]hkH0!>Jա3}Tx_q5E[Gpr6xWXcIg]x2:N[n2 :1 Qo{!F 3"\=6 #C)KoX`00[B0` `  0h0000`>0 |I- G^E4oo_Wm< Gg!poSnk; l{5v[qx }Nwۭa\F4-m #%ŸVuuaʋ d΋W۲Q7ﶷ)`^( Pj޶ӛԗ5}Q_A-to8o S^DGƸ^os^D}l@ӟX\춷M`ob)/2Q"?n{4` @rVzn0Z.DO)w[FXmoZ.0B\v-ڷ6 YzFZݪ5+ݷN9J=-]-'O{b7k!D׾U7dWi^vtk|vY ;+UhwPt [s/h!pҭ=%О;kB}k#Yj_ku٭ZCR֭QV֒֠ϮT-noh` A-twa/~pZ ؤZ: !!KԾH[wEN`/zn7:iABaBOd.ZooDYzFݪ5+2VGb5R}-  &-u!_Fdj_oq٭Zû"۶FٗBhB lˊ! A`m\ !('7di\v𮨿!SMI(rF g]|9bzkވ,@4.UkxWJ/Ʒƶ= -mv"|k0}s#vo#6ҼV]ra߾"BZBBHeh@ }]@Օ\ =RFP\}ej Jf vg_ޠ9Olـe8Wp"\1-5XRO큍Ժ+Ovn`w5{ ;[N,Z!F#0B!~)9FqWobZ<mp/9jF295 OS۝ww)Dio%`2V,ō2ܥS-ON亞,JA !0NnAP}/jaZ~T(vEHg#٪/nD<bnvT,WÈSJdү0,t傄Fs\x^BXNyT(v̅hjq#-\;7.DairW^eHW.7"Ns6 ` _Z`[SMKWA\6r-nD<,k8I]7c-1AYq4-bWۢ aODōSxї+m;GXM0IDATv{#0`4B!nl_IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png000066400000000000000000000175751507764646700327500ustar00rootroot00000000000000PNG  IHDR,2PLTE@Ai @0`@ԥ**@333MMMfff22U݂d"".Wp͇PErz挽k ܠ ݠݐP@Uk/@@``@@`pͷ|@ ___???V>9mM% D݇BK}sVf+8# Dԅz/jRj_ѿJ%Zee+vׯOhSu2 ucmSPD˪;Ml~³7&~y6iw EX=4ރe~ڪm䵕lU6Y%Īݒ^ hO,~)ˤ?/QQ6_Ѵ]CDK}z2S 1iZ!6iw yHka!2^Q'x|7jKG_AFɩ$Q#@{IEє>Sg ǔiJEa!2BrD5YA)_ٞTeWͤ0JN'jL8/~楮&B9 comҰi|2h/B(Gf @,GR%ʻ2.@J[~rD,W79 pIæ5-Lޥ8!TuS؂;ϾbÔ3)UK*ק64s%`کؠ~?f9T>-li0+ ѷ&:6pp(Q~QTq'{e_1]'E!Վ##"TM YWX!fmwLd?d#)؍r$ ?ѨT)~mPߠ>{L~ vOů [GFEo W5=ا`6t'`OuӖM 8ۤQŒZGFq̣ Ǧ;4B&oB~#KUAE!B!B!B!B!4u`fC 9NU9LY 91z%D(@r) *Ζ^.[i"zY f C.LjK6j :!wf,yʿ+ooxU+$xFc3W2]3g~W4]Wzk-xb} +Jqf,ZCTy=ɩ'[vZnç"9Id}?i|_r }un?ֽȜ@U 6.\.k>jd@H~tˇ_%a룜@µPuA~u9^u]Ӽi ?\ZHhM>l=w-Kst7Eo|IN,ZG8~ ӿMB㭿T"1s,"[XReƳ x^TeƳA pY+s7Edw.ovҀg:`GK} }nPY"<q+.5}d-L~ƳhY58y/8[qY-OY?>"{[?@wYbԣjd(-P}Yogo:"&$DhG't7EQnJltdդwm? ɷ?Dτ䯌MsA܍gjNx 2^!a@jL%?FEUԷxF\f%)%V\D9nv8bטѐ|_ްװBt6Ek):2.2gPF}9?_M!,WGτO6L (m%즋j+ⶁ;5+qÐ3!g#m@A܍gJ*ۖ/gcm=%oȟ %"X|(,Z Fm&2^~&[ȏJ^ t7EwhG|NvwUVOQqMg.ڗ6PbTvwٴ/M˞\@rtwAfܐS-Go _ T)H^]jęvt/cSpڀ|Y.KQu+F6ٸ2^o&ݲ5,twUU5W`ժ2"tݶ,sw>>tཋlҘA:Dg?@ws0jۣ7~W9[{Oxea(?H Z~_}ֿ?[F4'4ī.`Q`+ô{ Yd#=r+:!BA !/5e[bwC 0B'o09P9[QTRS+5`4`#goKvBC0+C\{2vB\uyV6cDϢ kgꏳɟ? F&ϝxgCw0 5%Sn<2T5O n<21;\!Ϣݼ'x#<܌tG?@wYoz:/Ko{XUuKF2:OOwA'*$%ɏ`SRS-aeJKVbއ=|NfE/'DyĐ7 0xx%oˏU gK_;}2ȕf%0]&DJ݄7=(@y+#@ F t +Opye$(~ʏ/Y/;!K9Z8~Da'Yo3u Ϣyoq܍g^iM7^nxטyҠrMH,sP7MFu?K@@wY?~0jeƳh/ZccpgOT38!? L- N`Q~>h@79JEY<_';*R]0uѾ`nB?L7rN Birx xs:0+(ھ/CP ߲x /uq P۽sUݍ 0w縻QCίxe}]BC g#f] pˏ= :Ϣ(}"TB&MwA*f](DP#Us.0t L369:\`*^#𥟮ˑxCc˕ 0>> xh%`Ԧ{/!.oqRgtǃ 4iFUs߂I:^#@{wv/x O`\>u@F謚c"oA$ozw/^הn@^՗KK@0rd%_}K.(@[qfx hWSw_ Vn8ˇyljIRVe (tog+J IXKQ68|5ҽv 9 ,)ޕLiI(,nnFڈ9Ђ m@[E,?Ht#{0" m#L#j;N1I^l{Cm@hQO{IبFgc7*X*Y RBC w|bP%nxC%B}88PM{Vb|;8KAKɥ4O xveӾ4`c!M4B)ۍpq:MD2 ,A2NW@ڀ.ڗo=(@F rN8HPBC wc!]#B!B!BظH~*oޔ,8X-Us؂ˏ'N~{>R? oq+YKfQMbk[q*p~*q A߯:!YHMDYxTmL~W>S? 0dBҖ)RՔgyd_lͥӢ,2Dۓ|*xl8MW@]pRojgvYFv+8f,b8*UbpA6~g,LO'aW"L~W>S? f,Qql{:Y\)x~w<}bqŶԗe#etBt~7.)ʶO}=EYk/]N!B!B!B!B!xDdk5=ş6-}n TVNΞC֕^EϵnR/_e%WY?28OZ=Nؽhy8Je_!'|ս4'Gސ:uhy:ZKj}5+-}TSc͞tV!$R^E<ыJi#XEo6u*ۋ$/,U/6LPOO8#(}&FmӢ#vLͲ3nJ ڀ[nZrb{UY蕫vZJU ܥU )-e|.G$U?5*B!B!B?}B˟FyIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png000066400000000000000000000222261507764646700340650ustar00rootroot00000000000000PNG  IHDR,>PLTE@Ai @0`@ԥ**@333MMMfff22U݂d"".Wp͇PErz挽k ܠ ݠݐP@Uk/@@``@@`pͷ|@ ???___sVooop pHYs+ IDATx8Esp8޾E-nRA 1         t|G`;+QǞw'_=:MlSdO6.q6e:.7].5'Po'voA$(JCaqjO݊O0·QE7 >8]\y{N4[WtG5\^yt.n1 UŞ9~Qrf | noU.T̂5o^ >L7'j8S ]?/Oy1nF]aYY(N9UhZS\Ncptȱ5Oԉ lN`~doIa-7qh U@7̟d.G>Q.򎵺gf>li ;8òIݺS$&<۹? s2qG:GQ#Oqvoo9^ky6WI=/> )M#@`by/gHY5OJO,ȍ=l}?P9wh=z5h xbbw(+@s>=r^:kxu{]\$J4] y/_+oj hƟ{"\էQM%s#@u#@a:{ޘ; sK%@8Z,WI.it wxpvrvo lڀ08f hSw.3:'YD~-m@u,a0g?و0׶7crڀS`#'Q/Xn[mŦq>dsS Pb9* #='rvO`.`6p.u61]S>ȨNA/Pd܇ldqg0+c9hЊߍ=b mϹ}ybNdvgn'065rkW;g|-aBSӯʏ'[)zdkP>qĭp.O^x"9AAAAģIbyNeײ"Av\$kY O˥%vҮe-D6oUm~'ٮe-D>-lƤZU9$@RHĥT[ZA|DN'~2ToB-aZւ`x,pxlåK4*;Uxhovh ?&P栗4%o>,?$w0KRJNZbz4n6.kv4ś1Л)7Ōuڈ t[,źNլ|t1pxZӗ^3~* g!;b.Qe0sޛuӫ6lLP;Cznѝn} ͞$a~"K }:ֿT2t,z<>(!|~:'c^ cm@( mhDXU@ ڳNݟXX/X \y/KmR55\J(ٱhfx.uӠ܉塇H2\6ckyXkiXLOb˼mzђY6fl0k- F vTO鉯FM咉-sF:w LuXo|`~rrT!lVcXufNJ[˳7mK:%K.[~eaa5;֊﵍w+g#hK\kwDZK>S:<( Kem XT I`2ݐئ4]{]aib֞Н * (_0%nsLPu~mƖ/ 4,M,@'X 1# RQ]uŽ|.pG( KR=;I`CХdG6r r ϸ m@8*)[c3FХ2J XӀцstMM=O @:sfNJG6s =ev+Т\>1c|ZKXXb c)>UGX`.cgNJk+mI3 B:)ެs&(c2;V^F̄ұh2`1(k  c:!N al7YI٩+PI髡8Ş c 9d]">'Kt1ڐ3*`XD|Qm@9h2hC't 2ڀ.?hD|@  bÔ%F5KD6id]"rIb]"r9~6KD& b!D0b]"H/ ErAF%"1ڀ.d1ڀ.Tцd2ր.UtH%WtH% H F DvAF%"OmgC1TtH1hDQ m@C>b]"6K1ڀ.|ZCF%bm@}>! C6K1ڐ36Tt1ڀ.|?hDl  om@ b]"^tX!` `]"V)P"F%bC6K EW.maY5eKDMX[ގ qokBi Zh,vI!E%tjmMݭ) ]"jRJ=:582)@5eKDE鯎gHH\I9 0iKD5 N'i~ o Hv@ Fk0֔V!J,k=)5_ QL1ր. hDX 1ڐ2(Č5Kd]"${3o[!F%B9|?hC &*0b]"b}m@: ~ X@t`R~ . ^p 1ր.Z.E @hDb]"^m@~׻ 0m^pK 1ր.: ҏSQt鷩?h-$@[$|hC~ c ҏ_ qwǦ]Ϧ!~y)8wW!C dwi3W!<@~|M|:.$B|W^0ⳡ#E (HJ;6$?V*펏) 5|Y>G$YWCYnNxHhw !^͉ˈvێx1g/!ZHײ*q1EtE ^yABS4Su ^?N0k@Uɯb]dC6_j>5K_PtG'|m@~[AH 8'; ڪ>A}Qҡ "b1Uv:O$KXyX>?&#˨pMHWRTQ/`MUG@ s|3^ !Ϛ*׽hwKy|8+,DemE;! 1Z:mYr(jKTpJvBiT.k*X=9(_Xj`eM-; y.帮RGBx7z1nuh]2L nYs(u @*@UA6k7^ 0"s`=Aey< \ PUhadx[sZK;z[sltB`^'C&j?N$Gz-͡89} 4۲`$gtfvF̱ Z D_BN6݂PSRrFך8b%s˿Xwe2*C׀63O"| I" j{cvu~ ʿGХgq^Jz+oa&CҢ=wT94I /E=SH[dyď/SôR}Osxؒ v VwVON HߔP /L6 /5֍Ɇ9_uPǒ=}O :0 @m@Fs 쌂XNe7NI_iv~ 8Ay)z^xk2Vѧ\c1ڀ.=!%K 2+oa^]'Z60eܙ0/y$8|9ڀ&.?YW/H'xwA8s. !;=Kނ ! 3 RD@anLKHupKTV" d]zC6K?b]=hht{`m@+0ڀ.?ItPC6KL b]5V 4WM /tƼ'A6KeFi`]-dw/FХ"RPm?htW @htSnP"FХ{r!FХ["AZ?htKTt2Ex?D+ցo,KӼllrXŢvl8[,KER*ڝ ~ V-Wwy%]i^ll+Q~C+ց7 _YtQ=`UBTY5+qdR.ϲK՟,/vf 5 : W7) `sR'i&uBNFg:rHHwb2}Ƭ[˳nrG1L8A =-[zsL8%a&y(*Y_!c m@KV\@n t.`96d8Ttғ - Y,sKAХ;6ctTt ܭm@Q?hC6L?XyDp+1h-F>b 6ۀHt  ȟ!~A`!I8nZC%`^LOw3BF%\^v6%=(fݲ 6K .h{}mȞ5Y, c D-'?د_H0ڀ.Oԧ8B\ Rqv Rqnl-/tܫU RqjHu%s`]帆(6 90ڀ.r4,Λٸj/o) l(K=7l#wί2ր.ham# R.oɖڤ{{ R.S &(AFХ\a7/*tkJd2ڀ.}M*~6KDAFХ˙@ht)[4 UScfFrxt ?Xt.ϫy-t4b ?hTսOb]*?C_b~7^.!x'`r[y D{qm@'\0ڀ. kFХ\7݁߯#~# R6Ne۰XێC/06cJt%ZsK|XR# v۱c/1]%ҙ##JE ޔWvvRr^Y}/=OȯXG.*ѦWⅹmzLi.%MBO ڀY mu%kV6L peWa$yc;g)g pϴBHj/xA Uҥ~rv'iuB8x^z7|.%#5y cVKU%x^Z|? ҞPP\s4 e c ҧ|vh똫] E &.0ڀ.6KD5 . k@jF%t`!D c Q hDT0ڀ.6KD5 0%k@jF%t`]"m@jFgDT2ր.6KD5 Q hDT0ڀ.6KD5 Q hC>"k@jF%t`]"m@jF%t`]"m@jF%t`!*@%t`]"m@jF%t`]"m@jF%tFhhegǖ%]"n@h>+; :YtM~ Vvf)z%]"@h\TM,oвK-(A֡[G&Y~҃f!!"^f!Uȃxf]e}G!FF΂}/ 0´W:!/QFkƬ,<D5%8%h(8qYʚ2{}ס=GϤMb|f)7tl;ڷ#i-c"2}!kٕa`ii{lfXWnۀNGfr̿8 8 Ӵ>0P &iE|gkWGfuRRiR&M(+td6"0ͱݸliC3QtnߎM3Y~r5aA&MxP%4)IDATtd6"0ͱj[Q 6 l>6k?g]24L⼆dcm-4"qŮ_-J`VfZb6ȝW+@c."4S 4Ƣ599j'Ȍߖ#ᳫŏI_ P ]9fg?H20ٱ=M-֮jn7qL-0;O ,L3;! fodGnn-Sfev)[&۶iUXfgs2-mC4b:s2n-c Ņ &iE|UiTv[naFAAAAAALcG/kŸ6= tnjG׳-~ q)JM'ތL+bIjNQǙ$ &5KdW'%=n`7 2/{gOHcG񚘶It,vmXL(WDŤLwWFe#҈Gx:B5YDm7SRjfOﮨtrf5&-KJ ar(~/]l S1abŗdpӻ+a&lY ( 4%n 8pٓ^/XlG١ +\g|ܲ"6642B>[osM#Ťidx=[스9WޤNJ#    ;IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png000066400000000000000000000154131507764646700337300ustar00rootroot00000000000000PNG  IHDR,5PLTE@Ai @0`@ԥ**@333MMMfff22U݂d"".Wp͇PErz挽k ܠ ݠݐP@Uk/@@``@@`pͷ|@ ???___s" M pHYs+|IDATx e/qԪ_bEAL.j^ Q6ՕADd:tv&J̰݅w;;L_#υzOs?|ly+c~}Ɇ=zy&߫OM <y!lԬ~=pVUiC/^+v B Q4RyP9kʦd|fYm20|sA:,9L_%3yNk) yDqIC_ȟŭm2!Rzi*kJ=Lnz hZ@J Urm(U=i3r yҾi9a0}ȚT5I*W͂<8K*amCPfQU<+Z۵Nvޛn'b_2'6_VhOtFvC l|d`ךyOݾJ4Vus : ˣ#V#VCVɺ=}~$ST<둀0,3\ջR7-v]=m]}W2w_] JT5Տ'JM hu} Ca( MҬ^F (&n{ZM6lyUeK+AMx }X',ME24{nLx+`aX:}2>Yjn_SJCk@{u3y h\twꁀ Th 8 %ѽ  b؅aХ=*w>{H};#_)ǭ}(i(ɧj?aICk5aJeKKD6Tx}i5oE/m'jvQkqXgҙ6:23G[@VOW}FAڪ{13JN K*LH|8@4rBOܦfN;h7}~_=ߛ%K.!9{AonR, ҪL33Q9%r0s:х-fO@9a<9bi;m=𴝸N?tٙ>M dvSiSLn{9%D-P-6w@:X9H@35h'`w L.jl3pݹpZP͐ZT'!iU3vy2=`l,PWa2}鱀mUnffS3^RNWڛ5dNj+̴fϦ\:jʬn֕E^'E&. F)biPF$؛50w@;ԪWNZ)G5`K۞֋YY2]#u% ;z쾡U+dg 7KBP !(AA@ BP½`!0AA@ BP !(AA@ BP0x@ BP !(AA@ BP !(AA@ BP Q$wXqo7p?#4xQȫj^{18f݈, =AYTyqjp `r'8£<_ܫAB&!B#ʢz3{38׈2K+;C xS `u#j!'֍>곙}]A ti|4 `/#d 0"`o#9 챡O{e 0=֍ϒϯ.gWA@x.|>Rs.Ӽ^L{Bq<*Qiփ|yQq "?VJ0g v `0Ђ!@@ ^h2⼛qa^O ^F٭}4x y'W-~`=}l^QM'ЂnQ<8?oq珎@_+P 3;5Bn( fK XP=~4eqτ0Ђ_R xyU,^AXiDYH@@j O`V)S/#IoDYT F8nIR*3`M}гQD1Q @@YM#n3!烀F93"7?#Ћ,,K|-ñx(pBYPE|@! C}x _6pD>h> ΢z㏃xU*Ey98.@Y|eW!Ƃ[#gRTzCqoO X9Pw|_q"Vܣ@(}= h|XqWܣX c@CX#`4{_%Ѱ*^Bh]"`4Uo5R50\FC[u;CT}@f\"Ξ0 2xkF'2#~'2-[str^^'o!$Uƅ(T8!:SX95@ )0sZVĖm$KDʌK%rߋ{3퍥NMxeٷp^ݷr/%J2W0B6ޝI$Žc>$|L\V@:7a\+ [_`(}k:d<ٗ 3bo_X[q7Z<@NW@s =2'>OO0Rv %'GyUqGX>׽"[0yXCFp \0ܓ%tƁU_O 8Zs[%ѲmB^Sʬ3pzt֛J$0ZA$u)kζePm8jxGul@lLV8 |0RoyGuNS9׍%9a'w{&7Hu&0+_z.dhwݟAxY;3)v莩iw0^()L?Oz}rp"[ލk䰡2ߛ `x=ZIKIӭ3?W7z[m_'֛*S _m) fwE3S׉^2fl/$ ?4ĝNPw@ oݿ-Yklwc 3zخgS]Y~ϛu?-c¹ ,'L]JxZ ~񁒣n:WlJ{;8]Uo>5{v6 ߨVh3vc| {=܁k7qO>(55@[Ս3r^Ҟ-~$Ou?x[f_/aF{5)?ڶ*3݁~{F亟+ \FB<=Ԁ2nQ?kמ \k…wN9 0< !(AA@e7 !(Aa<  !(AA@ BP !(AA@ BP !(A8OA` !(AA@ BP !(AA@ BP !(AA@ BP !(J@@sYFakD" ,pe&BB@P?%ѽ  b؅aХ=*w>'CDW@A@ E,Wj%lWdU*BTUYuAIB|/MhoiPKLd"Yb_Eu(:WN>d. ۰"U?3|6l1~=j*|Ct1kd%4YBq_e`0:kcX!2? :*>.BEJm>UGgm ~yWϣ(x-Q0TD'PY}.Pu[ yhtֶRJ=KFQ~aKQ[&K7|jgc6'zx=8Rjf2Yto~>5%jt{@˫CϜ=BrW_z(zDMd /P.Zh2Bȅ݀xE1|R!(𙰕p塀?T =#nkw4B<Ï5B{@k{P=. zQsaQQ3 Tc2q{>(  #z>55Sz5 *q{>(  V03|6l1~=j*|Ct*} ABsf3e}΢}A\TQ:.@BPi.+?ȵV:rݢәd 0"R=73XB9TYR4)L29l-kQ9[*I[ʇ9d.m$,HKI*<2r$R$YF# j' HC]Dڮ,JtJ5r?l4*F>݄e6`)yU *jW@*[*9 >Sz{-o3 PU_$;-Սl2ozV^@PVhZ|R,2a>5f3X -R5hEo7'\gK alb2fe~ꭩf9MLmTN}W%~PWL^? mIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png000066400000000000000000000167151507764646700300100ustar00rootroot00000000000000PNG  IHDRծ;PLTE{sjfffǦwwwDDD333UUU""" UU 99UU99UU99Ur U 9U&r9UU9U&{ rrU9rr&9999 &&rrr&ULr&rU9LLL^w pHYs  ~tIME k IIDATxiHv:'@xZٕmj&ݕc{{c6  -xSG, {K3/B"E|_/BaƋavήDp+%UURf[;НRU\tqh=;A++ױK*Ww{H=;aLH|<}6u{tYx.׋+ƫI]ergIKq-.|vܞ&'F7_;euOQ5䵙N$~ӉG rKen^°"7)KJ=U1YJAVL-rw` aVNGZ:3{S5VO&aWЛҪLo9uqN!*+^_[kf &m\.}ygg%$,R%#7]'+f`GɟsiHOFdSIETe2+ _ل) Q>| uTב-Ni(}yb@Si"۶F߬0fj=Y1[M{ۑZdVv(fiwD4dWGvpw :=.V?J|ٮ"Q'KeY6 MA ߴ iF7M#ٕN_<3[ssg}&U,QwnMWS|U͹:1k]Yc0 كo^$Ju |N34)9*)IlWƻdl5wI_[_$V񁴴QfOazӄݑރ,)"P!G*pq f|F>%|w^<򐬆Q:9*~8ϵ CӉCRڨ~Qo0 iWzYwT(ٝfvGޞьPbvKV aME[:ڥqI/j4ۣӮ;otk|G*d%,ޔ{MsTfwfzkU|\(R|_/B"|zEDh&Y\||АMJ&"[A""-uǬE)DZ,r,3罝H}"RWFs=ݨinf7g-']dK7WDSbȗG47X*l>,6H7n'3kȗ?Ȯ,qo*?y0]K.3wqߓgnd7޽<5/ΌԱu]ֽ$#ր00Iwfx4w2-x%yPn|${ɿi|c|iKR׷pp(+( ҹEexZx.0yjo C9 GJjY2wilZfs7]^ŷP( G Ki2wilZ5 S|ڹ˫fJ?BCM:Bwd 0w>-e$Ӎg߉l˫ ,'!=}'ӂ2FIxƹˋ*/t3[`bdb|WhsundDhŷa]qAW=Uj]t38ߓ{Բ|+d}3ΖO4|JRd oV!*SVqvu>J{* ûwǷOjY~[MU(Fg)8{˥D {v(',Wڗr2eՁg5\#}{efovO+䮬2VJb)?w>hJ- $tK+@9Ќ3y@#ƷWjYf2U28B74^|e)ev* C3ΖyK-Sd sG93=CURflWXM42 9Me?bg{M|SU # |/geN2Κ4Po=,ǜu:W gٻs8TY}*=gr\n֛70۪3f{@ƙdU}ifq˾/vΗqVg5r-FKFtΌv|f9 #ַM8k-l|tތez;;߷M8ۏ/gquҌ="S _-Z _<-}q~g|૕vw :+yXjM/wfYF/% <MW_/g|mnq[6O|:wu sv|@fXÓzʷ,;6oy_ܶ_% :m?_m>|uV7OX_u׿mbrn_s}Wm#Qm_uX?;"7ۛ;&ە_S8xoz]-_|56/޾V5_BxYb&hS۪>`n3ghvR| [<!E^qqۉnCqnߘ\;c<}q[ݻ3{]%]Ѹ<ڑ0qksî5pq9T f޴=D߫8/⦍9n^^eG=6]l ݶp|5^b#/CAu^֛>}.q{|b o>. W_o>q-˲}A??}xOւc91}ZnVX6|pnOK1}څm|R/d|;nݧ}ȘCħbZ c65ӗuh6ma} OusuhW_O;X9y P>'m}Ș{Q;АiGIl]aj|Uf!l:˲ymF|߳8{T1ے;ww%AcZCVy$h4q[03M|ߡl-/v}jrmm';,+m/m ͘|3#B],'xYRa\_z#^vybTq~|Ýk{=?öߏwD\ns+:IyOۗ m 1 ә?ׯ~xRtq?;mכea=b;$=?۫[b||>}d»~uHG;֯_,w*uh7'SǟD{iZcl:v` [ui|[^P|V5}`vZC7׈t}/j#uZf# |28θM0; m#k2`,w> 9.ʋk1Eܼ=^סm|/jSq[gS|[x*^2ӏ/=y[/sS?}[Y,=g&aV_?;jAueiYleq`ikVFm|7_8g?mgװpDooס_o'jgq[߳8%/^Zϧ j݋o.=|pF2^6=0{˻9m{m-:;\KֆS9u{9a}W_a|Wϐ |uv{qΌ^\8||1gwj{Fۋ=LN/qۥ cha-6uHsu|O|n|c|}ϨϿ?`|WӘm9%n38lu8?E"Uרٙt3%n|9ɸ |1 m| | |o^6:A~{_/W×|ށ[HWy\_Nr_)՗^_\km/^\/q/"6"%n_u__Ӌ_6_\Em;)_6_\EmE;h1 m  ||à,c|"w@b|q|q|wƗSm  mE;h1 m  mEK|1|O/N7fi-cmMu_߰%i u/FF-a,N4qۉsuƼ/ڑ0qttcR?~ViFUvҍ.u_Ad/zVߘ~t!7oOژ|>i&vzݯtK3uohW`j K+FcCoߍHZN jMs @}ӌ*ի.~]~Cs|ߖĶ} Mt u럴}#<ЭԫzݯtC3oh_ 1LOn9MMg ϐ$ D3KUvҍv hiŹu4Ma;6, pLk7m&W]YfzTfGRāg 169Hc/_!E"/_[.)aems P9oɱ[uo[H"t.Lgjݝ,/7[,hnŏtL[%TD{|- ̙\Æ)r+R${ x>(D4'J117n رaäwܹRm(9 ?u$$$hԩݻj׮-T믫AruuUPP'O٧VZR´`< Ë?%ƍvM&m]F-Z=zFrrr6mzJl]f͚e~K>NNN];vS_bbbd2iׯ려Yfٺp_uMznQe2nݺrYn]KQR$غePi&O &hŗ ]*''G;wɓu=CSfΜ-Zȑ#ꭷ-YD!!!zGtqJ@)C Q>}߫vի:v쨢"͝;W_fǖƐ 0@ C=*Uhjݺ-[fnW*Qz5`yzz^xܗ_bbL&:vhRK4mڴQXXZjVZ)88Xo*..Nmڴիw}Wcǎ-f'u}z躜n{eh"͘1C>>>Zbڶmk=11QӧO*,]vgÇ{rvv])5J}UM8m۔zJs̱u9{G{K~Pff 2t6Iɓk$^UՆ J/Wh<}ᇒXٛ^x͞=[ׯWBB"""硇ҫzuEԨQ#[`… %`MC)&&F&b9}$z֬Y2L7LϥKYܹSƍSDDW.qzg\jK /Icƌb]jР$:kqF5JaaaZԨQ#?^{l2iFS>}t |i :tj׮*U3wq5J!!!ruuUjj˖- 8P:u$͞=Y)uLkzJޗ<jԨ\\\ԫW/IRǎe2%KuPjԯ_?sReW3$9sF#FPPPԬY30r/]0@4i$5,fWzd5}/jʔ)jѢ<==֭[kW|j׮0M2E999߫]vTժUջwo۷~W+W<`}^ٳgo͛]O c=~IR=_$ierr:[ V\}s yyݫF! j3Wf`3v$[^Ҿ{20 C/ k ڹǏ+<<\iii;գG*))I}ڴi ktiF.\PumذAC QQQji4f988C P||ٳBQNIҜ9s4dY+ Հ4{lyxxC,SN… foΝ裏dm߾]AAAVTTpUZ\ryI!Kڵkt})''G;v?}z-+z?c1B&I۷W``^8}ZjkN999rww$zt95lPUViʕ/K*^955UVRHHŬ͛7.UTT^ziÆ RXX,w}W۷W=+!!Av풛[WΜ9m RttΝ;^,1===5`:tH7owmvW;-˩SԵkW޽[aڲe۷>( O?Tܹݵn:+Z|~G{oF}Zl={jZx֬Y.ǏrvvVV]v_-[͛7f͚ կ_?}7rvvVddj֬ǏS^^>5o\}… UfMu<crr^딑^99$0oGV:~NRj=+//?\#rfyC[ݾsNCnnӧ%K>a2˗_###9~!ȸll7/_;07kX>`CvZs믿nH2FUj$СCmzY=$o߾FNNyŋ IFݺu->|pvv65k֘AǛ9sf}ӦM!ɨ^1|pcΜ9of\j͚5FjjE[aa1i$C1h mG5ަMܹsƜ9sf~3 I-d=zZ'Nxo͊+tg1$&Mv5:11puu5+WW^_Z?\{@FFh8::sn۶ͨZi:uԘe]y -448qD~QQQ$زe=++h۶!ɘ>}Eu5c 2ĐdDGG[\DŽǟ<'N,KԫWK/=z0$GxSSS-[1l$#((8p===h׮!;vպ%~x IFu-kCѬY3)EEE?Eɓ'&MGؖn[yQQQarr#G&99V/6eK]cZkךmۚ1NZl坹q23kedf&\eddnM, f.\ IVg$)7S$5h@z~)--M~|kʔ)1JfrÍQtn.]Jm[BBB=>Czf͚رcJLL4Ϙ1Cyyyzꩧ|JR*U^q?ܹsӧO?O?M*00P&LPFFFƔN:Y̌%IzUV--[̾|}}u,Oݻ եKVފ?XS*UtR*zsrrԯ_?v'''M63W>@3fPJJƌ[lkٲ^{5]pAsν^)SVZenզM /H6lPcw ./Mfq5jԨ Y/;wԊ+ԪU+x͚5駟J>rߗ$M8QwyG2LO.ܶm[/1d2iɪ]vܩM6]o$i޼ySJRn޼,X3gH;H*~Jr;()iR$w!L޽O).CPeeQ}ԬREFU˖(4_^.{g@ի7Q΁7wp &)Yy!}}ӧ#vWIWc4j$]eZDDDH^y9::K.UKJ={(%%E͛7K}R[nZhQ߯_?EGGkɒ%駟m6o?4uT-X@7oV5*4ٳgl2ٳG*,,$ٳJKKSj,e˖wٚ={v-[/PɓZ|ۧLI\"zƍ}OݺuӒ%K*\wxxPի%I{گ}[VWd2]qnݺjkР$)%%B+X Q֭R?9HǏ;)A-TO{99^q\\U~L )}ŋVgeeIX...3,U}*şʒ Wx4p@^Z_z)WWWjJݻwPj׮m95 {֭[ǽo9sx :tH>r7o<=3Y19|`y 1wTfMo^]v<4Ljϯб*r%IճZ<ʺ%FFF^Ɍg[5=v-ˣc]w^u_JW_}UjggϞU^^3Yppvڥ'Ovw*KɹΝd=PaZtAʜ0++Kk֬$o߾\c^+^*믿.---Jv*( ,Tֻu3o߮cǎn~]]õkZ]j899u֒ھgڵKj޼yAAAfe]5j5&""BڵkWWGp1B?S'O*.}BέW6TܳOu]KUKSg׏QpTֳZUp7Ug،F%xf,w}WwVTTTͮ%/ܤIjժƼ.-Dׯu_ҥKYYYzgY_|a50 lPmРArvv֜9sas{aaƎWhC7Trrrm7#Xl R4h I;w;pI >B]#GQӧO-hJOOW˖-+)9ӧ[~w˕VA/R?=??_?ŵ Æ S5oO?- lAAVZeܗ̤~c^ͻuTUPPѣG+77׼m׮]*x%!ֹsgG>/{v͛7g]viʕ瞓$Xzy5JahذaV$޴if̘a6qD;vLaaa k***R>}Ϟ=>g=!C0##Cׯ7_NNN:| Xj t%&]צMUB=DU6]mнPF38D e2l]>:c `ɓ˓$DRqH0o~GmٲEwyڷo$ǫzB ,|=Tn&3qD 2D}Q驭[[=EHF*駟VHH.ѣ|RgHH~m3F:uRTTj֬[*--MO>Νk]JΞ=3fhĉjڴ4h GGG%%%i۶m***=ܣ^{͢_tt>m۶ruuUÆ K/)::ZM6Hhڵj޼ڶm-[\SJ5m4UttZhF);;[[lѩSTvm}sРAzw|r5lPZRZZ֯_^zi֭}ZOҞ>}ڢ֮]^{MZd4p@رCwq-NW0x`͜9S7͛uVS?|}}K /hȑƍxb:uJ?裏^ZG e˖ӪUԼysǛg+XԻwo꧟~ҢEtuI6m$ooo~SLѨQTPPJӧO7/쬍7jĈruuշ~{? ''ʽ W0rHmٲE}ӧpB ԫ;wg+???m۶MOLǏVZDM0ϛ-K5ejҤ~-X@wwܡz='|9sEZ~/_5j護Қ5knc=e˖QK.Չ'CV-ۢo֯_>}(55UK,ڵkUXX#FhٲeWRE .ԬYԺukm߾]-҉'j̘1zꩧtY?-f {IK$F.\أOwӊC?\KΞ]&wj VZQdY}z&׷U$%PǔBQH ZѣGU~}K'^7;vHRB~{),,TXXݫdغ$3|OÇG}drpzg}V.Ry-HTNQefnQAA$Gyyw||'pʭ?pkb `Xƪ֥9|,f+]]t!\g h۹s^z%IғO>iPA۷oT2| 2uć:wnrr{B\\UVR@#Gjȑ߀$i^˯XA|78q"""TNeffj׮]JII>C["##ƍUBB4j(EFFںD\͝;WVcকwFAt/Jg9C>z^n f'ҤImΊ5qsCKw}ڵk{nVZ1b&L:uغD7aXB۷oWzz<==աC :Tuy[꧟~R[o^z. n'x7IK=vPPpI;)IITjt k$^U 0l]h%3fddۻrrrtQկ__׷KgT7yv0 Qzs/'$ͭ|};ǧ|}/S?C HW,VbccB[R?aExqE///YIwٹܦ9R#G4HRQQ.\ة ب*(8+Zf'Aޑrrc8;^rvfb@ n;EEf/3s /UޭUZѣRc*e,$6PPpAqdrtO;խ|}˫\l].B؍%'arq){)sd? PU׷c|};#L'VbccB[*()i-hee*ɐs-vP@ v{#L;*@#Gjȑ̔prsGQ$Inn:u^Od2ٸZpHI)%0nΞ]ITVIҞ==%INN5uC>>dRMOI>qsl0 ]+=}N^ [5j,Ep 7aäwܹRm[a)+Ws/#cd"/DrqTNW@K `#.P,0P /7_4L ueZhz5jIjڴz)}˳u5kL&IkV~cǎR}JKLL,sܘL& 8=k,[bnձc+>7DL&uѢ]ɦxںnnQtׇyooÇ_R~)SXjkլWjxMժ$Vg7Mժu`@ 2D3f̐$hBQQQ2 CΝsT@@dRzlX Qv$I.\5|-\P ,Ptɴ֭[N:i7egJJJa.@u|22+=}2260SnڵGǧ[`7*66V.w^999ٺR-Z3fG+VP۶m-'&&jrqqQk"lg}Vݻv4j(Wԣ7o7^QQ237^[TT%wDnq񉒷w+98Tg+΁Wowl9R#GTff|||l]ШQ#[`… %gmfn:L& 8d2iݺuIII1b4h wwwUVMM6հaô~}M&Kh#33S/ׯ/'''3uyxxAǏ\ƍ5j(jժrssSF4~xZeԦMO}с*\5jС]T{ϼ5j(UժUӃ>-[X7p@uI4{lg!&&1)*y_oQFrqqQ^$I;vdRbb,Y֭[CժUS~tĉR*ݸ$̙31bf͚)66VaX}˫5VTTjժ%gggwڶmձ0) @Æ JSO=ի];wUa7o:wUU7VLL.^X~g=C3e~էOCZb}/w?ݫN:rqqQ͚5շo_oe;>>^}UZ@w}$IJJ2s),RZڏ:z5Km]:ĉTx6EF),l}Y>>9,1 ܄Μj?p;T=TXX$}gjӦ6lXզM]pA۷י3ga 2DEEE:tӦMӘ1c:( @{Գg G:u$Isѐ!CyCCC5`͞=[zG.%kԩZp̳ܹS},Y۷+((يRRRU\Ο?/I7d^zIvRXX>hǎ?oVqqqVeE#Fd2} T\\UOVVTPPv)''GT<Ν; ӧj*\R_~qIK1jժU uyWUۥԫW/mذAQQQ >}ѣ5|%$$h׮]rss+*2֙3gԶm[yS-[TժU}vM4I֭[W6w\ 8PT:uc}GZh֭[guÇ{Ujԭ[7%''kƍz5}t 4\_d\5o\[_kաC>ӦM /"EDDC:svޭ^zI_hҀ\|7m299ǧBB)_(yx%a eddmFvv ܹ!٬]֐d 0kך^uC1jԨR'%%hdԫWq%}5rrr/^lH2֭kÆlYܞo 4<̙3u6m2 IFՍÇs1~7~Rk֬1RSS- I&AYl;z6mΝ+5̙3˼OhH2>>W|?*|vmٳX+W4Rpz IF@@o>sӧPCO?Y޽!>>m...zTJ-]j뜜OIӦM3WQ...j>c h̘1߿Ŷ-[^Ӆ z׫1eժU?ڴic^xI҆ *tu}rttԴi,cxxFU^M t]wiӦƸ裏ڳgLq9+W[TD={J oCW@@y>O?UQQϕ{LvM:UAAAھ}6o\]-Gŋ5s+SVVLRݻkĈ:~;sԩSe^}Uw}}T=z\*&?N^hڼ֩S@ |{٧mSԴWUYyx4e?l%'ED\O/T)<+^W^yEҥK@UEҦԠAٳG))) $sG-uEUStt,Y~I۶mo?CSNՂ yfըQB={V˖-Ӟ={BIR~~Ξ=4UV͢O``ZlyqgϞٳgjoٲ x-N<˗k߾}4\"zƍ}OݺuӒ%K*\wxxPի%I{گ}[VWd2]qckK6h@Rwu{u\oV]JjʕںuN>ΦM$xw|WW裚6m6nܨHݺux}9y_}mݺU? n:I3 ~{u$YrrK׷'׷\\.˻Fmeȑ9r233.ш+svv2g;~x}[h-Z(&&Fѿ/3,jҤIj;wTYVX<=ztGv:p9bz]ZOYŋ%IoVOvvRSS)IIIVkOJJ㔨]߯ǛVnݬ%]8}i߾}jٲ>R++9ײ<%WFཛྷ ѴiR/7Cߕ'((ǯ]>wy\E)--MbvgrrXrs.U]Q g6T&O(ͤ$rRҴcǎr)Sd2iϞ=Zc.\Xj[FFV^]ʚ!ġC$899jsI۰a x-:t Ie`5kHڷo_1/w~|iy_ږV{}%]vcyz*ݺDDDU۷oױcJmv*s~,rJeff>rժU+hJKKJޏyږobK^ZKΧ/:v(IOJp1 CYY~~CPeeVӦyFZVFG p "7-Lׯu_ҥKYYYzg_ !^aN:ץAYsц 텅;vΟ?_7|Sɥ߿_cǎT,쥂X 4h@4w\eeeO<ÇWk5rH9::jZ|Ŷ >\jٲ%-Qr~ӧOW~~/\i4H.../?[\0l0ըQCo>RhժU}Li:ռ[7;OOO_=zrssvڥ>*YF4<ӕuM;vR+V0Eƍջwo}ԡCyzzj֭C=d^g{Knnn:z7|~[cƌQN5kj֭JKKӓO>sggjƌ8q6m QIIIڶmt=^>@j۶\]]հaCKVӦM}v*22R999Zv7omj˖-ҼysM6M_-ZQF֖-[t)ծ][_~e4hy-_\ 6TV׫W^ںuk,[~};5j~uAӹsԿ }%ZtzaÆ7TfTjUjǎJOOŋլY3IRpp´}vs=jڴhIԩS~z-YD!!!j׮ӵf 6L~aQF 2D}nuYnnnڸq 5pJYfW>#mV֮]VZ)99Y7nԳ>r5~x۷O_|7n-Z~+,,LO=Ǫ[> 8P={Tddԩ;vhYΝkoh"[N{RRRa\ˁj޼yz'ԧOqɓ'ceee_~1~[ƍSΝղeKy:svڥ\Putt֯_O:u5u]pMQr' &@v+#dI *(8+Z)0p||*Un 3*l9s7n͛7k֭ٳ~g^ȑ#奍7j:uqGn=Z ,P˖-UVy抏$ϯ|>L{Vaa~'-ZHGQNkӦM7e5Jꫯ4}t,\ڸqF!WWW}ڻw{9rrr r#GԖ-[Էo_>}Z .ԏ?@ڹsyVӶmO(//O˖-ɓ'5ydˑ^k/VVUVV\\BCC+XԺuk7n!$EEEi֬YҥE W^:r̙ӧ[,[wk˖-6l d;vL?5n8IG}wyGO?7K.ھ}իWiǹ;'xBZd2335sLAsҥKյkW=zT .ԦMꪗ^zI3f(xO=6nܨ|P{Ղ #F(!!A5/44T?´j*mݺU[5tP}L&SzHwֳ>+ɤ~A}N:={믿V&M,Z~~a;vL ,О={t]wwޱS .+2p#()iRmEEܮѯFkf?%$㔟Vj=Ԯ]÷;ϯ;? a-33S>>>(ֺTNN=b7ŽRD ߰V {*99Y. ̟?_Gٺ\lI&l]NyΟߡ5l8SyyX*,, X1 ඔ9X99I> IJJzCεL~~Ѫ]9988ٸR^nn?QP0^^0 ]_Adu;DխBd9_`kv///EII-@F.\إ mT~iIl #WITjt k$%P\s( @I(//΁V[ RQQ$)%eyE[TXx^&U`3 o6RūTw(5ujzV^^7|Bnɟ()iR՛ox9ߜ$%'u=,/?ժ\l\1AnAA-Ixq}Rϕ{cI3jrsSRaaΟOPf̌SffRk9>:u/W΁Wo"?v $\\J/bd2^zJLLu)]ppdF 8Pgڵkձcr҄ xbP'NTLL~()i(0}A߅ %S^^( `[0[%I99JM!ru $y{[:\\Y ֯-Z( @'Nu9Mm„ {Lj޼*ӺuԩS' 0@fͲu9먰0Krs)33Ng%OZAA#ZMd29ZJMZ-'X'֘1cg2pݻWNNN.㖲d_~˩<'`ru:{[QnnDRWaZF׶KK,ё#G4x`}.QF.s խ[ I<'`/ PtOؤl+)'rr);'T8΁Wo+t Ӌ/Sv֭['ɤZ>p@L&[΢=))I#FP jժiӦ6lodRpppMKKӈ#(5kL3f(Eurww}Q:tH1112L^b0 ͛7O}U !///s="ꫯ~ruuUHH&Noƌj޼*55\رL& PRRL&#I2Lرe]`u6LVvWnnn ՠA ԩ$i5ĘDzXbvUU 6u9믊VժUᡨ(mٲPnnQnnJkϞ>ڼvh2~ Ho(8xH(8x'a*\K׏ @p`@&!!A?nݪ[ɓ#r5e͟?_ǎSjԽ{wM3iF 6,XjӦ.\̙3ڰa " :biӦi̘1rppPxs=ٳg#77WO<ԤIٳڲeF[ ƍUjU=?O/eǏ?999SNk.wݻwWppfϞ-=#:+7nM;*44T7o֧~{j搠$eeeGڰa<<m۶M͚5k,//EIIm=s~2Nd"o֪]{|}=rpp.5;:]j=+/y:v J3yd-]B}rrrԹsg)00P=5sL}wa?[o+WVv73_45J|Ŷcǎ)??c-]T}լY3@.YD?&Ol>U+0JӦMkZlRRRʵ]CW^j֬;fIK驧2 ߻+OϊC*UԫW/$U^]SLR!ߒwyPkVG0 =$OOO}ɓ-fo%I6l0'''Cs̱IR͚5akH=x...ŋFFFZ$o[sMZ:|ڷo$i׮N:ujBԤjZEO~~Ws՛(gg(fPi^~ ퟗ?PR[^x͞=[ׯWBB"""g͝;WqqqJOOTիw)v/F֭.]͛7OժU8+3s226+5u)+7ըQ%P\u@6yfedd($$b^cǎI*{֭$I:vhϺuRϦLI&][eptt.:]UvR͚2e/\`u{LoִhB-ZPLL233_3fnZj:~4iRZYx$i޼yjڴŶ#GX=g233KW/11QIIIjܸqew5~VD,G*))=OLL$ժU7R^^&?T0 G[̡$W`y{U@DãL&G[Svj۶mQNTjUf͚rqqQժUթS'h۶m.L%#www=<<$Y.z&L BMPہJmKKKӎ;5L"ɤ={Tj%"##%I .,-##CWxΝd=_jWԩSNiϟ?qڷo_UJ=zT}5:v(GGGZ\_I05]IɵLӧj*L&JKAkW7mTU۷7סCUvA?&MQ6'պQ5ijzVw+z&3 #ؑB}'jѢZn7xCׯWFF<==$eddhz7ԺukO?pĉWEכIUn]Zt=++K<啿 !^agl ̙ 6 5v KK-,X9sX3bIرcfn?rx }.IzKeK瞫eզM)66^PPcѣr =рtYNR|| gȑrpp￯۷s);;[{n(!7ux>$i׮ڴ[wwӱcEuNPծ]""/ը\\.._?\K #؉ q1b9'|RӧOׯ|vݻ'СC4|p5iLh)IxYYY$//Vɩכĉ%I}QΝ޽[=P.\KzOM6ݻo^:CBBo+77W:uRΝկ_?5h@ .ԓO>)]ɸqǫe˖z'ԪU+=z;v"##P=#ٳ5k-ZnݺmV/ժU+u]?BBBt/4fmV{VHH͛TqMmjڵWz衾}M6SŬ} u=hРA:t-[vcs=>^$??rաCmڴI;w֑#GY .ȑ#qvvի5ayyyiڳgy-\P&jԸqc[N֭S׮]?ZjWw1ХK-[LZҎ;~znZ۶mSpppKk׮մiԴiSmܸQ˖-ӧտ=/\Pzґ#G4gM>\KL+om۶iѢErqqѸq5kV9ڷoBWG{>)'jlS$IC>>ȸr999:zׯoPBB""~Î{;),,TXXݫdغ$ fb+#cӋ?$Iao||:snMUl]ލ Gdd|||taܹS͛7ؾ`INq{9|,f&+]]t!߁)=}NJ2ӳW-(SV qvv֨Q[oiȑZz<<<$I;>>df̜ "BEmVVEo]Z{NhWSo[eQ aq"0,ɜ,3If`j$&9|<0sy>1|> .ԦMTYYcv{AAngwV'Ijiy?%.SHTnرc5l0amܸQuuuׯk#RX,VTTto۶ZZL -S{UhЯ4+eg+##pB&y-Ң={(///YF?믿Çnȑ#{%s='_YFmkcz.h4SJaÆi?  NG)&)~UoEVsmm;߻3ގ m/O?]Æ K=z:o;wΝ WwJ$2Ly̖[&I;@ Eh֭֭Scc.N hwQM W*ZPh"5v4'N-kTk$RgT\_^5f=zꩧtj2 Cg;{wUTTPԗ&c233NS{{$)--$)!Y˲ $ŕ1X@ 'e.oQ>.RIV3fMhƍWT>' * *teggA>m۲,KGNm,k-}i$[nwJΒiNQVG˒]vhv+;W@zgL{رcu]w$y͛_׾`Ҿ'4MY%ر---ڱcd233 ۶.˲Ԥ#.RW,V\/Z7$IO;Rֱ(6ڵK---Gz <CL4S᯵u$)3sJS:CVIIIrՠD"t$p~mE[Bn$eesU^cdy_av;~#Sm}**FO!ٶ,kb V^W,{p= @ [x.ZJg:::z1Y  >N55?Sжjn~Slo?&BC|E9Ey22r{AO_UZZcysܹseq~W$RPh utadY *OTz@@HQ~$fΜ)l аX*bjj$_lMRNUql`_ԬYpDD"ueںvv}?IPz()oҥKu{ȑ#xH@>Mqw0 l믿^;<]wu1brrrRTGG-}#5ed4+5rT*;>|JK;e taY+]u C:Ua8 @J$JG%zC+UK.P 0U3LUVIAHQgult G7ʲV_oKJ.V P«T*3s? EQRԝwީI&wj7(Z.:FIf̘+Bgvx][[y=z甛 iXlVK˿Bd('gLRded9@?AH׿O?-ϧ:KƍI'\|>E"544wѫ*E"y<p g͖ WQI҆ SHH23O_ܿded|=ðmv:W__EiѢEڶm$0.//7 ͚5KCͨ}Z8i,K~8>"ֺuc5v:|b ^;\DxJ5a¶N  @)((~?Лo+VhӦMڵk,˒i2dFɓ'SNq:2)AR56PE%Ixv24}UP0[^o$`j +[&QE"kظX IrYNS 0Unw9܊o``c@;XjkSQ5x a)Z7%kH~o:z4/ `0`0x*ԴA5}OJ(ϳZZR(\Br/I>MӕEv}F~ f-$>Oo'C}FCbUD%IR"*)M>ߧ4tWeSd1D"ժ_9*z!P55mo|IxӹX&]cWr:()n޽ӟ5khϞ=:su7Jx mٲEwN (b:ާkv9ƶjjڠPh_eP<ax[.˫gy_Ir UZz @ {o|CMMMm[a8y?y=C׿`R@Vyϟ,+mT(B +ʔiN7**o\.}64<{\z<*/ UV??Zfl4sϕi_PCSUQQG}X.;fnXmzV ~8e}vHG/Vid? .\.-^X]1--Mz{9]ill>93F^WO=կs`ڻڳ)E5jiyS[_.w?o'sTd_8< @Zr&LpT=nw6lؠ?- dY P(#;'ێSpL6pxҢq\ccc/qV^^۝)'Sm}**FWkooe"Z!ZHdl]2ͳUPp ÐSht`2I? @*..oq1m_WyyydZn/^5kh͚5z9U ,У>۷kРA 5|wyMGGZZZ]/֓O>][[jj)?AX.Y 5IB udegBtm$RE*.#;@ E]xoG}T_W^;vJ駟>kѨMիWP\rmۦ|P>V^#Ft^7?mmjmݢPh,k,kZ[%IޜUVK/U}}Am[C O?;*r බ6 2DeZgqFG֦MvZ;sWkٲeڼyQg 2MSeul;Mڲv!z<0a[70 &LwyG-ŋm6% hkd1W^eY9rA?I˴i&=3-3F>`OFHtZ2YrB+[2 23ORF ef˶UP0[^o$wCcGHq>Ow~(lƍ.8iӦgʕ*++;X,i;p8| IwbuOEEDLZB -S8Rx\L5l 4oHZ|𰊋o l߾]TRRkjj9]z:FOGZ`͛MgթfgtY[޿2ëHDiN?P P)o\.O ~`GM2&IRVVV糳%IH$ylկ~;v(;;[zy}={r-듯ᰆ qzEGG-}564'frr:P>* -Z;S y{!Q/~_|@M+$n$Lut4}I224hȟ)d~OPwCz()7:ٶ38C#FPNNӱɁ---]onn$|n{`0`0o =zͯcw--%(ĉ0ގ(){s=iӦ9 >\s.8^ZZm9w\͝;WpXiv}Xb )Z7$IyeJKˑez$I~xN()jΝ:sR'IG$UWWwyO?2@OFw(Z,73sLsIy>Tkݺ*.#> Lc|,&Miڲe6lؠ1ct:OH.bm[{ 'KVIRV֩=WeeLSpZ@*+R?РA9縸n]{;5w\=Ζ$-\P6mReeƎm  ǻ6۶,kY$C99wJd݃nwJKo]س)Ͱmv:cu饗j֭UVV0 G3=s?~5kd۶Ə=$zYuW-) *5t4+} uHx|@ E'?љg7oҜQt= ɲi$[OLRP Peq6m۶!z?Q#FԩSU\\,O[oՁ}W8i,K~8A,V/Zҷ I[@R9E@*buOEE)@b()r0 ͏axhtG_kfIRf) s8)6Rԃ>tvm+ݪPh,k_/*IrsSyeSX0QRԕW^tұlk۶Z[7+ڷŸe-S,S˛@`Ls `0`0Ȗ@jkSM<8h 57ײ^R| uH (sܹseq~϶jjڔ,BѱW!o f4+es:.) "M&0CDӦM;k В%Kz0a*I E"$I9mG%e4'x)'(--: @Xt PKKK2 R6E"k_R<t.ۑ.g,psuVIRqqqЛ".S8JD #S^(yerjhxFIþ7˲VɲdmJO4'|LR99crvDp () ''vիyf]q @*˲^Q(L\ȫed4hV Pd.0QR̙35s#-Zx~`P`Px(@(zyo"jI 2J zJee"0nwJKo]س(\"8? sܹseq^ֶG\о-}7I S PkLQfۢSۻ578@{!X,kY$@R%%U P)b()?q6l8zw|rM>7|Cjm,IT[zIi*4dWdSdg+##QRK/t̶jj$:P[=2t|TP0SY)Ӝtq(PQ%jjZ\ϲV#$T P)Ҳ ömLӔeYNH)YPh,k,\'*T P)Ly 0 l@ǣD(ڷo8JDrdgk+"SrN) $oQ8C*vLdS P3r'z>$Sm}**FOavtDdYȲ"ved4hȟ4+s # :S~ !Y YjIqed U PC@RYY0\<0(pmm{dY+ -e-SSFI<f V PQ2 `]VuuubwWb*8-}+R<ޢHJqyh$ed(/BGy()G?rm2  m$Z7G5&w<:{%/Ѣ[wqtEOs:Ч%1ëظDKmw(==OYYca{*(-Lw68@()w233b UTT8Sl+$jk%C._yyk_*7\efJnTkGU\{}@ EرCӦM#Sm}**栭|hkۥpx| %ᏜM~'>xiUPPlc`0`0xH^' ˕{9-}B&!@_aضm;曵h"m۶"q2MSe;G!Z *UXMEV<12͉'4'iKujul aXHQ~V\3f 't$ఎf l;ͪ,kbjo%IH^o|35h*.O!Y0PRg>% -]TrJKKURR"uX0dR-|?#pJ_Uh>Sk_jweK__QRҥKq{z{`/턚Poߐd+==W~Y*)/Gm_z$I~#S{BHQ[nu:pD eY({_MM$IOmI<2 4]%%ew_E>@_DHQNGթ>]sֽ]hR8Z\۷-ێu:N:nN 00PV[[jj)?FFY˲ +Y')<|Rz_Ol;y B&%D >=Zb}IRqqLJCu8!XNBrY 57&ɖ]@Rdu H^o { _Pw߃8 @ {'5k,555ɶ^{M?D-ҥ^`E=ҢѣG뮻gt4~a*IRs7c$)3 vLs޲N?,pk@Xv&ND"}sUV۶mGoSZZ^y}Sr:r8qFK.D999z׿Uk֬ѣpXiʲ,N ZZU]vkĝtXN.;濁 .R=Sz'1t饗 _x^NsݫD"N;M&MT m[͛W(BSZ_'(=T( f-$5htg8R/''I5i$X$r铟nP"hWkk@ O54,V{r2LՈ?ѠA(+dHZ֍Uq|48 @,KÇ?ÇW_D[N/֚5kf}pZ[[`=ھ}  /PWqqazWuts:S~NxO?}LDQM6MWVaa.m۶M>}Y^Z#F8}v͙37mƇ]DUnwrsSnyr^nwJKo}() .{}󕖖mۺ[oꫯ\&L駟qiܸq*++S,;5wqV^ &WNN$i…4k,-]kt7?N;~аXp::TK6lDEROWYُ5h>Ma=B'ܹSgq4|p}K_RYY$F?mۦNtb۶ZZi;%ȗ?Q??Q>ߧv>.Timr R@ E,_\tg&_)SD,Gw8GFCbY+vy$ieQISAWI2͉%0>{z<*/F@ S0 [:V<t/''GfI  }lV4ZpxUO1NO:ތ~ " !4;NuÇ$ܹv͝;WsU8No p񨚚ɲV%Kmmud%gPAlye$()a_ѣGK<駟kb۶b W% MMer'?KnNG"ժ@sU8O(pԤIdl٢ 6h̘1?/؁t` K$bDV_ݯV?AWrV.#F覛n:[nE#GDvko{久 jӦMرcN=T7[XN[ޮXѝڵq_׫zVkںjo߭C/'?&NYgmѩIsUU.Timr 0T Em۶Mw>={h۶m=h{NOnkk$uYcz.zrJ5J'OVMM4x`=ݖoܹ;wLN55?CϾݾ+^vJ24dr2>vPPfed|?Pݻw>K/ GSO=Ai̙?JJJz<7Q8J:vL) , }a۶tri̙\/HwѴiy^NطXв,~nkߴgjmZZV<$z?[33F.?濁C=zضo~=+AAqnHVc jhXH $):::rJ{ Bd$iR~~~cn4c }><8 Rmjn~M/B˔H(==O.aht`2I?^Mw!+)d۶m]./r `&ݑ,56]2 *+M)'g HZT\_CGHQ/ cC & H2SY=O~De:p@ EUVV:!%AAqH$J5zG(7<ߡswLu UZzž 8İmv:c+p8,4eY~qlVsjl\ -S"ѢA=W)7_pF4s ظD0< &/MWN濁-`"Ў;l2mݺU3gTii KR56jm}G Tny2IJKt:*rX:::tӫÇ;OXNy<H)^[=O9r{/<FF'Gq:N  ǵyf~(E"ZnƎ]'"yܶm57,B˔H4+=}rs%K#L@E4vX-^(}ܹs5w/@E; #<[ee*7<䌑a9(\}}RPCbUJ$Z$m0M%IlG?ҹ;@Jhk@в_K$@RÆݨ@R^5Hv~LsU8PR֭[yv+??_UXBeZZޖ$efR P4+8R#`0`0xYt\}@ Em߾vء^Hƾ߻Vkk{_K99gh//\()3wk:ӺkiժU>}z/ضw:V'%ojn:@ 9sgE] .t'_D"9s8  NGS ouZ᯽Ii>C/_tq0öm 7ܠ{Ga(hĈ{OPHm~_8 2MSegpl;7?ra@RTs:.g6 @ӟKov㧜rnf]~%p^OgY޾G!d4'*--c(D]]v!I6l NԷ h+u}+ut40h JKr:.`t8XNy3fHbb VyϟqPp_oC -W|,X{{͚5K*fggk̘1V?8ܖXUz뭫HD>rC5zz@ E駟,JYY @Ht(^pxᕲUj$IY&g #C{>z$I~xGHQvҤI8]---аXmjjZ/IZ~4ef/?A9AOqHZ{>9*yf۷q͛UXXxq8!Sm}**栭|}+E"k -Ѷme۱'H~I&驧҆ 4f̘.,[LfΜ٫`0`0x<~Vyϟ!Pm-56% *+-ͧ1$wV/:-}B&? ۶mC8vUUU4iuWFFfΜx@/fΜz]V~ӑp8,4eY~q`DnXަ)zQmmu2 =O\;v[o``7wD",(''G.KpXa7o: Gvޯ(ۡh=IVzG*/"|[iiY@}6R؜9sTQQ'ze۶"^.?ФI >Gb+^D*^mmuvþ/[@ uYzꩧd۶٣D"|9 0Q55UJNI3L~%ɥFFm_z$I~#S{a PRԖ-[4rk04x.>ӺKz+khXpJDTjIƍARBGy S"j/RqcDHQgqGW]u!Ǵx^Ll;z/*:ؓ3X`D"o|ztkРAο׿w}WFr(%T[{Sxqmmy54Sϫ} #S(+\YڽcP>t@ ۶mC8v7o׾5[Nzuyɶmy睚?uW{QVVӑp8,4eY~qE"ZnƎ]iD]*54C TSS$)'B]A.?A.Wa70 N[6mܹs?IǏWKKK?~c&jڴiZz u%h۶mzj1bD%ԒH\ϲ^Q<a22͉JO;Ѓ()K_^.|>=úo}rM0A~ƍqƩLXqZz&L^999 nЬYt^H}GCbUJ$bDJ^{"!T 0EÇ(Ӝ"oҼG}P>˰/'W[["G*r බ6 2DeZgqFG֦MvZ;]wq1 2MSegE }G[nYz+HD9kz1"濁~w՞={O<9GW^eY9rA?I˴i&=3] E5 l\--oK22(3sRFyRI4o<jll$]yz$IӟpB=3fImƍ.8iӦ>XVpcmjiyKbobSe*-Ls"j󤊋@ EjڴiZf|3z:6mJ/۷KJJJ<xMͿڽ{-[&Ijiio'xB?},XyugtP,VTTt<ƒ'jjZu޾GR| Edr{) nUUUiկ~%+iLQQN=T 뮻JzxMMM.gggK"Ho/~O>|Ij۶m][n_|5lذVyϟ!Pxd/^xI.W~Y**Odsv6 @ E=c>|~*=?'t^y^LN*۶#C T+KHUvL|}rI\s.8^ZZ-7w\͝;WpXiv=_D*B/EëdVJJ@Z@CHQcƌڵk{n <1[nur7zhIRuuu?{-˶D' BD ȑ e^Km_z$I~x0PRW_K_}Qw: 4k,C)lҤI2MS[lц 4f̘Nx I_@:mjiyS/qB-\Y vNSNΘ.DU_HsU8 կgѣ>#FhĉW^yE\r-[p8+BgN{hn[^{N͝;W?%I .ԦMTYYcvAAnb۶ѭ~Vko%paîWnr;2l۶ضgoٳ94u7oa׬Y#۶5~[uE%_GQM:UUUU*,,ɓUSS* sLiNTZZVzF񸪫m6% hܸqr{?ꪫ;̙3;kmmՂ #hǎ4h.B͟?_%%%ݞ_Tkݺ;vA[ﶷU(4Ykiy[}ZLQzDt[1 l H33GɲV$ MM$@`rs)*{ӱn70;zS0T0T<w: cаXplCȫM>=JOS^E*)sw60= H|HD"WiEǭCskĚ^L8o``c@Чv\HB,k\Y|x/&0д}Phirw%I٣5xLiNVFF٠#xbu]Zj90 uttb2]{{BeW{I--oHNVnt@Y);_O_UZZ:k  NGթ>]#c;:²jlܷoSI͝+8ǰmv:cwi魷߯3gr9)a)˲8"ֺuc5v:|-W ]i )7y΄ o`c@ E_Reef͚t@"S8\ pxl]CY Qf 2 @UPP|cH1 WI+Y'IzֶKR\ 4]#G\Ӕu*?( @?FzN%D7x<\[[mA'0l۶صhԩ2MS{Fte,˒w:B ʲ^V",ȔS,\ii^ f-$5htg8j+)*++K/&NO>Yeee*..:haZd)CrdgVSS!+CTkgT\_O@ E:yfٶ-[h˖-]5 ]`P`Px(rթ>]#@O`()*//Oz7t/@zaؘ!RT"PEE?8RᯬG@')ꬳҖ-[8Q3@;5i$wW_tGppe%-qcFHQ*}[ҟgM>]r\]+z9!0%m]mbضm;s\2 C~ rm2 Cx7ypXiʲ,~8R/J@cؘqRԏ~Cph`P`B$>C|W} G O@X(=3t@_ R3@{OwU$QWz%K8^HᯕH`)ʶm͞=[=PgF׆a83\Gf4u>Oioozw{G]3gqnթ>]#@ ?(;;[ߕ?OQFiԨQ |F_5qD:^mmux P/f<[oiĉ˓6x\iii.Lcǎ~3]r%eHu%h$-{D 8̀)*H$%IjllT~~~Qsz>X|t5턢jn~]{MZR{.IR]zG4(/**?0 0#b&_w>}z͛Ώ:RC[n54ד_--o(o?dwƶ;ںYJJN -Y$矯nI7xWź{n:{NrwmjiyKk?25x?$ET_HI=h} @ E͘1C={9͘1CGW='>;}K0T0T<w: @u|"5 ˲ܥCDN9NGa;_T~G ۶#b1+--MޮzꩧبOcǮ`b({54,eP4ZHdZZ^dHre)' EfrrNaux|G0g[)B#G?t3G[K$Q(D55 d۱kQ,S%%֜Oo{ @zwt';P[[jj)?Fm'ԴQK -Q(BD*==W#7w݅*-Mn7+- @5jt cme_T(D/c\L*+U败o$RE*.sٳg{~mVаXp::,E"J6lT",Pf)*.O4'8Tuu7Peenf]|>|nB j_H4|Qݓ|za۶t.-mkm0Î5 C+ea)˲-b:ާkt]Fw*ZFHred [.pi׮?`2I?^MG%濁5lذ# 6ڵkUPPnA^{ӱVyϟ,I}Z92)rDkןU\_##m۶9wuggUuu4M]~N?VRK҇ S;@){azǕs=W[n)i޿k'򎚛_$mv22-_UIuTs UZz@`ضm;gN8A^z%M6M[lш#>pXiʲ,J 3vzLҡ)Մ z-w1 l~ƹni2dHfZn/^5kh͚5z%IGjzG}v 4H^xϟNc7oެ~|ɒwy Ķm{޽ʲ^PNX 5)R4ZE*(-Lw4;z@ E͜9Sax0tyWF#ϟ~F6mV^B]r%ڶm|A=ZzuR_cc@{&XNy<oH -޽jg'+Su*/3x:X"V}"ϑW[PRԏ~#m۶M?rrrtkᒤ;vW$_.ǣ+WɓnݺV&L駟qiܸq*++S,;5wqV^ &WNN$i…4k,-]۳8N55?#Ylkۥ{O{>*o3LyyU^gL/\gyf͚2Mp8믿^TUU#F{'?ѯ~ntM76׿$dOz=CZl֭[cJڷڟeY RMKf54]{>pJҾm{Yyy+;^.Timr^Q@ Er-/uy߯_K.}/т 裏+Ȳ,9RgqA/2mڴI:kذad 55mTcBK P"Ѭst (8WYY'0uHx|WD nF]ve tW.Kvء'|R?l֍7(I,K֭ӥ^dm̘1z嗏#C@թ>]#m]56._{I r2eUVvs!Hs 0PPRs=馛N;m[n[s>Kݫysu"Arrr$θ+͒$m  ǻikSM|<^zsUZZ$iǎz}vuIYYYѣGp Ȑ֭[vhW|roPn m~LmT thal:_H Ðd'IywqA1T m S+trݫnOPl`ED$):LT{l\.eddhɄ@ИWWrs?כ'-VNxu&+!!]6[d0o묳 v2M 6U#y~qw]) !D;%%MVb M>R [p(giM[nUn݂]N˓Vbbb t*2(%&US;Zv{t]7оTyy:,};ug(222e ~bH*/ϑaD*1q&+)in v CacΝ;u90 uE5{1 C}]m4[w?r8\ung+ÈPB r.WS8Vv{l^@۫7MS{ N1 $ekǎԹj@+ᰟ++˖dWb(\+)iqwBS>/%eSn^vI)!av"r4@ <9 >$P_]VR9r @yl2ܹSCC E]޽{ y`@O_X}w|j?kL}K 4`q?|=󒬡$0 9=1cF0 +yyyr:rJL'$@hUPQHn*y\z7駟 /Ԏ;t:]*h&D>W(-F'GktNTb c+޽O{0"W-ēO>)ͦwyGSNX~M7颋.SO=W^yE]tQM+/_^:fvJKUND%$r@ؼyƌS-w7'͛PTyݫ*Y|좤oJJdF7*ʥr^B@ Lo߾/k˒@#~/%IG/%%Mu&*&f hyz^BUu0a{l$ז%ZJJ$F%%MPttj`>m++-P\utΉJJ.Q0M vfٚ<$ax<-\QxPFF^n*ۭ`#>GW }, #B #&*1q"#].8tha?oS1~5K{E={^ZIvpB5Rlq[JmUG\/ekϞחjS\<Fj'4z;v~Fݻ_2 #ڱp( a~/;[𽇡P[jc68MٿAu|C#Tfp0MS_k髯h͚T][_=Wԡ4x;vFަ[MP!]P>_"k)7mov͙sl"5uϗ:vSScg07ƶmiu.'zF)9ٚ0@6ږ5fik+k_:zu׷N6n޽{k^o~qY?cz:_۲v˺ ջu}5萵+%:%)KZu~N|Xk>1s8 ֲ}kWwo):Zr8ם;={}c- TϾ~<}'e_l$U?suM٦˳0Ms'I'KQQ|t߸gλR~5_Zj; nKfꯁ,۹j+ úvIzA)-͚=Z6Mbf?2}˓Vo*-ݏ(%eڟ|eϬ[%7OPbh94QcWʒӥLiĈ`W_WKC~*mlvzK:TAiZ!C^+⟯kmơX i`+Yv:gxݻnݬpg H*+&" 5el 3[TX(YMS?vHӳ3klE4o:r[ϽjPoƅnVokVj+~zpWke/KKjw}k:P 糳뿇֟Fk>lV. 9oOߙiNS׺m*\MjE7о BBYYvM;ϨzE[s8+'sz^ND%$+20O>cת;mo>tvzWQQVe'6mwϞx j{Q $%I__{rS3`Q5tJ^[E*{MǎVL]:wCp Zb>کX+IU~.IWUIZwƆnr+$,Ie{ve9vRۙ;<2Eh r@Гt74QNNT|pl_$ǯme>͵sի/[*g_G>r;X=$$4.wd8""ƼC{ 6kZf}Nyg5 QayN;WWHhV=Nᄦ~ͩ5e=Es)[4A bB5Ε^OIV91=9h5eՅ= ۪PޣϿhw*)١mo?- \2 [[ 55~mI3fu@ӴzCuZKbb3,ȶk<+糞٥NuܥCi*%l=\.;=[#O V?NM"8 tKJ|8ꊷĚUYcuH9%=.{f5&K[EG[U##}ZЮddd(##Cަb@9i*.&{JJu_@i۸r@Ή#Z9 _II|uӘf ꟒޽+\Wu~zSm4ee_A驒ahh#{Dk@WK99Yfg['X.5b飏? IzX?7r} 6I۬d}Oz}X9w~>-M:J{k;Kz\.Tiƍ1[u .϶-Ir&j~.׵JY]Y:Rrr1CݻGhu)Z* T}6fjhL%Ð-PoA(ںZ]Vmo:|r1@/Kzp[)HmuZrRyFWmƍkϪ s~^}HW= or /7#k E?>_ea9L?^: kVQ՗_J?`}fv>(p8Uf̚2VuKLmug^kvӔ+?^ t\7%Yw,4;˺&f;9,?7I/?\.g|uKݬ]֛R֞d DDsm=fm˗[ƍN9nSԭ[ ]tt _ FDH?(-Y"͛g'=i` TZZJ۷7^V<7%~kPk׮t*)ٮ2rI69=G_+"Yc,ef+==S <PMSCriPaSj{ w}23u E.R5?duIVECXU⤡CSNNyxB?Eʱ%KP9]+wYrb++wE+4|ܥ*ދ(Q9%r:J/3Ԛ.3̚qƔ)1r>}oAqձxq{ih8Fm|FuNwk|/@MYw%{C?i=z#,IQXLˑ|@)tS=mh6i~ZW/8#GJ'T{ ~h5laHFY_~;vHOĉVS^^e+W=E"xʕ~Gw= ;vH<#͙c$9˥u۷K?^'P>~fP)\ڍھ7?͓ FpYEx oKWW0+U^:)7wO{,#Ucn<Zu m&mZugOd5*_Q"V_ YO٥9*W#J%Hx#$uPװ [u ֐TAoWs ?;_7TU u8nsV6j%a(ߖ5ԹBhm {*UrG9[D @xJta[#Dv{JKU6[8IRـ ;tk10^]Ҡ/^T\w:r ]ړc/KQzڃ|ѕ$kӦU0\YYZ\0z[~eeYAj۪:O(C6.KZ G tr͒K2eъ޽I*è0??K{,QW5G?å޽dyP_n `=r4l\ 6YiM?X57~VUߵ;&f(0>^7/Ϛ8DR+Q3ZeeRb5Z#_󅅕J VmK`݀V/%0]3z$]vzFT})x@&xq+x9%ҤIͦ:#B:՜pBS t?0oBzښ[J.paۜ'b  ] dMiiv~D))pKi**Fyyv={x ǐ HpaJK+*ډGqVVc64-(-J3Hޢjc7Oɩ=ݳMϯCW/gW:NRTu2HկgPҐTIJÆYʠ^~XQQVqqA|e~k%lۭg3vuy}V{V &(+ҽ딙8E#&Y7cҤ:O^OIV91=qšpowC @f7о. 0[ rW+/ݟצpPtt/y_НJJZd,.QYWRqiTjSI2"T\fW'Rž(SJO:ISk"QT'TrEm2 V}_^.y}w;#eEvtmҌ<4iK|w~DnJlsHt늎OU{d W|@7ے5cl\ VudmjҡCէM;bi/I.\j'UY;ry#Tl`*xU5UVP/J({" b>E;LDb l19f&?EG.".ɮwZ/[O?8Z>_e̟I:ܦʥ2SY:b>^"划ճgMUsiܿF[Vͨ^p+.nָ\*-kFibc_ V""^vw.]H}}'XSMZB~9I:l8 \%ȧۥ%Kj9~ӦU7+磌ե%-\fAG@Yz֬Yk.hUҳ glhٳWr?۽Jn*>z*,ܤM*(ؤ=h(RS(*!Zm{ףF2M+o傖=K[WfWT)嚵^uQZ>lr|k[.Ѯ]>ȋazcYAZrzQ)٦rEcF\tx>{ȣX=a" >O6p&--Ms <9Nn%&&6$(,)=] Hk*;SiiVڧO:cp: Ԟ_tej(C+/o[#X6[/sbn$۷KEE+6 u]8Z;6hwQU#˃zD͟C:Y?Ǐ~Ɵ v7Tڼγ4eeSnFLoiJEEV(V;rki.kƒ%} Z:(*>Z6Fr>$i m0]wA%(ðr.ZFT}}7o%I ɚ<䀢uRYԱ5UgyUU~HѦ? " "m^E^T:Pd ,ruIP|xirGֹ}B׸x7YlQAz(Tjo:Z:fsQ@FŽ?0ek(ʚW{Yd=,>z0ޫWTs}A]X]9:XSǎV ?l.u@vdKYicIIVMd)|ʀ{}/&N^#"Z8k߲ieTcuWrKnM 2M)9OrlY2;ULOre,-YV.@lr<2=2.nE%Ig쇃DV- v)5ʫan3uMe?M$mϞ]:sN'(>@f=VP/U %S QJ)0&VX&[sۺRm۬]i)bc"Iܴ>P >5i^12Z6zu+W^n}A"eGWQ{W/IJpݨ:\:Snҏw<@Hpvk nw7 <PcjȐ!5jF4#wܡ5khرz{W]w.bXb\ٳcƪW^;*yV䈈o8 UAXVzw^e߃%XH5';ðv,׾MQpՋQ?Ts2B&.Y]7wsiJQQٚ;}|RI-SJMӈjUJK[Nn/i0(> ޝ~IVaXߤn*/iڴs:dj_.I cKXazojS?#2R:+Y:yVPz.)䶶rGtF5k&p0HFlu(/LFYKe(-z--QZ"ؚ72d&_&#![CFLl12bȈ-.FFlXcdDD4ث\cpCeuh]c^ϑW58ޘ6߻둓? XٳPn}l~sY?#YOEEWߵj@$p=kRk8+ÜGN>j6?SsU!zPqTtUŠzb+M:5lv!`ٿ;dOvd3ۼ>>.M^*MV+ujX|m|U{*9⵶ec7 Ewz'(Źj싍m^VO>r-һ^lU{f_:_3yre#y8hgI#϶ݬ_J[j(_-0ViiiCK.rë:t6mڤ 6(p֕W^Yy'MT-4XUZZ̙;#k˓VbP@_V̬P-=4~Za%+pȩw3mJNzkW0V` 6I)22YN)>~lc_tejԨ>>6u߀{9>:}:Zm9[?+fklsi>$qC;Lqc,NJѭg{a-UC#S%=l@0ۼUϳ/s6"^;/>.1aiʍcZ&Q?Wbr537W&ss uWzrec}dGηǤ7ߔvPx=2Ҫc׽w5qB2Mv2gޮ#.&PVVV TF!@Z.IQ(M6YM~Ն/kp"L;U/+ ^uU?+>-L%%;t6IĉJ29յIͥoN=>}$stV% ev{8ׁ()i\X2wW},:2 i`k XԽ{ݺYa Jv[IeˬDon7[nɱơ:>c׸Pp_7" c5x vI+%:Aպð?>pdԳg``j&a%%WJgz㏥UO>9Gܪ>Oc^E\˵@ |vK %,j_  ㏒=zԺ޿|ǎ:ONNV\)I***_^zIqqq:Skgźۚu^g*ϵa)+ۯOv۽ZyyTa3UVKJN~Yӥy6RMݷ3ϔNaXj0Ȑ%ׁ(-P\7#]xNԷ5i^{MZr\j"4?r8]}eV `\sH.bcAV59$2TmtVoz+o9Ϸ;{4pmzoyXBkH.4:P h |GZoտ_[nmr+VД)Sj,OMMkݧ{I@k ?۫G٢ kX{3MS^-{VkIRTTw9Yڷ/[EE+6 u]8Z;0!_K&X De5vxHӴzܽ[ڵzOU[d\C))V)=5ex˪&rhk,ڦ;TSJV`׆mhWΝwޱ~:4T՜!B.11ֱsr>KGXP`z8Q2k /6Ph h@ؙ_ 6V*/'P\JJ?'#Ma J>K iu'hh0]Z&e8?GڷGYT/&SRX}L_^^}'(w^_rrĪ=7vWccAÑ#~t 6J㱆gͲ^ @KrUuG4b%Y֦ >222!ڕ=BVf׎yRgsƀsiE/?|bl1JL-29㕘8FI h-xhw_g[([Qh0%%ϻ?zHn {Tjqo|/TLV/hT Z[Duϓ qB@ի$駟~uyjjjtZW_}uE@ '[{IIΝ[ntzq lҞ=>I[o.ɝV(wb&JHoAm̑3r⣇()V* ) (,2:+u-{o #VO{[?|uV+*J:T2E8뵁k gz[ 'XEGK{[S}kZ^Տ?Vw,V5MsÍ0ZСC%IYYY/RPU6|mS^l6CkH砈ٶ##%={j+Y٦}JJwnodX_܃}*Jj+-%dJ=6EʙYy2Vچ뉉rrH9<!ivG_?U9U}_ߺ(i6+WJ7[Cת\^+wFFV.?L6CK:PN֫?tw' pKH9ƚo׸Nh:*AP  4` -Y; оPzK-xn:ѣGW,ӟO?}II&Okri„ ڱc֮]dYF}i M!Ԍπ[M~땟A_I0JL(ð8NgY8GqV¨[jђ 4@=>h{@x o.@Zrrr˵xb=Zl:vyiѢEѣG i*RYYVꪫ2DYl'&)%&G=|e*,"g64=2H Qb8uJHceE6Νï5>.`A뇪/. v@ NӼy/&&F~n/ ᥭGYeI>H[IQQіj|.,dW\ܱJH)aeG7!JE ikSޛ kv!+Ȑ֭[/̇۠QJK[~I2;P #+ac]vh @8&hhhOvzPUZCY.I(Ǩk?B Ao<7MVRV%yk9xx1m (D4izUPZ௴GIRtt_9㔒2_iSyy$C%%۵guv$IwEhW222!Tx򕗷"𗗷F^o #R#|qJLp'**kpw-v<P Gvl5oG9'W?wd v%##Cz.hlRR}]ϣM?Ibb{L0M vt:vr:g)33]JH!Ix*uW+?|.sv hM<7z s%rWp|;oiiv~D)))@o}@`^}Sg?+W ItuSE~Zå޽ʱ@x#h|>>ܳջ_Ag %I11?BgT!U#>_e S+$ PBu|pEF&h:|2~UgLnW"Pl1OWr/*~.ž۬*=miI)6vեaaHvBVnVAAV64%7XZ@JFF222z] -VaA,d G]qq*!!].Ŋc]6@0L4]<9Nn%&&Viiv~D))p-RACgKI^FS||).x1ms!x oh5eeڱ6uFWbhu~w]@"huYp~**Z)ÈR|%&S=ܳ߱. ?P^ZIJnLR)>~zp~eEZEТ2eGee?Ю!Ch6-cdYn.Qtt$)1qt8zci,ٳDݻ_A b v p&*ʥrCZR ]QC+-#wM*f_c 4l@dԣG-:fs_bb"mPjhBA>B;R8f4uvyB{R;m?iOB{R;mmF{*߻ ùhoC(|#o]eI-r\ U T+7ozM>]_n&n=#.@+2L4]>O61mZZ̙;#UU0׸q+Ǐҋ/Re˖kկ~+=駟]tR]tE*//WD@z-?צM4p` :tH]v… u7!旿vޭ?b… A @kc`@H1f?N:lCa0+..֭ު(::Z)))⋵k׮6@zZ~ ex^xA|ϟߒ Uq})99jQ-޽"""4dK-yH[֭SuUW)>>^ :uСָ$( /(==]}mKІڢ͘;wW^Q~~6lؠx@W^yek\uYzj,M2Ek֬҄ }v[NZfS  n3$:mܸQ|\Vmƞ={r$Iv]>.-ڌ{jzw5vX-\P=Ca-ڍ{O=zd-[L˖-̙3[h34l0uuiرZlYk\V~\./mڌ\W\qE'sSO=ը!oر2dFQF)--Mswh͚5;v}K^]wu⋵bŊ6@[k6cڵu-P[;w땟w}WթS'͚5. @+h6o)cǶh#mnL>]ӧOxgh„ _B3mf|>e˖SNhжmԿV>-_ѹҗ EO\snfM2E۶m7߬믿^_[=:GJYYt"ۭ, >CjӦMڰakOѥ5ی۷k̘18q^xph3.2\R[nmn3 k͚5ݻ$;O?/Rjzwo=5ڌ.]o߾O+ۿkiƌwAZUk1uTUk?h3 #G\;wVc vիvշoJ9#Iz7ں4!mFnnN?t'$-{ưa߷JBGmƷ~21B:tPt]wiС6@4)mƠA|o85{jʕ/u Mi3nݪCVnСzھ}{ >sI҈#j]_iӦ6 @jJQVV>[EEE -{'| @ ?~/_^mKꭷҋ/bBAKa^}Z848tkܹ$?a:ZbԜ3^z%|>h={ƍm%IJMMm2?JzQz;vT,ʕ+%IEEEK/)..Nzj+W f\uUZr}Q$IlQ)mƔ)S4k, 8P%%%z_W m3:wɓ'WfŊr858:5ws9G'p R=cO믷~)muk̙馛~]3gI 85{4~xu޽ Rf\~N:I[n-ܢ3f(%%4-@R~~~Ų/\{/+55.\Sڌ|KmKf :T^zItgqeBm@fm@nm@fm@(׿$]}*,,XwjӦM4iӃU"B A PA A ifzK-xn:ѣGW,ӟO?}II&Okri„ ڱc֮]dYF}i6h36@ h3v@ h36@ h3`}999ZvmUT[˗kzgl2uQӢEԣGV@pfm@f@fm@fhM@8!!!!!!!!!a0 $ֺ͝w)0p6 Gy0 X"إTxt1Nh}ȑ#+oذAahх^(%.0t?… [5\. ˆֽ26#h֭ 8p@?ƌSlڵas]r%zեK͞=[sU~MKVoI-c%a0 EGG:rpYxlfA(--߮!CNkpKF]lڵ԰aZ eeez7.]Ƿ<.Ksտo}W. ͦ/\yyyrpq\8pbcc]J^zIӅ^ب 2DUjjϞ=zJMM?9s4M=.W(0r7*&&F<8Ш}&O,0}o.04yj.\(0tReffSOURR:vs=W?$P_;N/RXBah޼yּyԵkWhĈzꩧmHS^zgeΝۨ׿ :(&&Fԍ7ިzk޳g.RC$Y2L~(á:KTRR%Kh̙ӧbbb'矯q38Ca߯7xeeez5j(uIJKKgQ _VtM7ݤ+&&FNS't|=c=&0_Qۯ_^:tP%IҶmۚիoiӦC1Sg&--M+WVܟFw駟d9G]x)I2eJϤ3MS<Ə]*::Z={'57Nzҿo4@0rtW(??__[|k׮ոq㔓ӧSNz5uTnM2EO>Fcꫯҹ瞫{]#Gڵkugse^^>cOݻwuP?:tl6 Pǎez衊m͛/^O?]+VPzz:,鮻ѣwފm9͚5KԵk׊{u94\7xN=T13f(""Bk>~J[n՘1c4m4eggkլYjkOցt 7h…Zz&Lݻwkڴir\z5uTW;矯;OׯװatiPvLRc|Psbbbti)>>^?N:j̝;W}$M>3>>^tkΜ9ڰa>[צMjm? ФIt!}'Ag I2vig366֌3WŋMI ;i$S?8?`J2'MTm LI$X^VVf|ɦ$sI'dTLIĉoǛ6mZ}֭[gǛ6̬X̙3g֨{۶m$sРAu޳#fnnne%%%_nJ2o:kn8}~̟~|-ʹ4fUx<gϞfddw7n)\vmߛTs|Ies5%˗/oؿoMIgazފڏ?xSywW,7Mջwon7oni)<^3/K.1O;4S9nܸe 0%ͫX7֭3m6oYbyII_”dΚ5>u}ǔdwqm*|>[o5%/>Ֆ_uUWUܣi4TSb~˷lb:366LKK_iiyI'jӻ۔dN<ή%\bJ2oj۩jBO4%K,O}bp _m]yyG4M0%zkB =]+TaaV=uWTo~I_zHqqq͛Ν;O?Uyyyl6=5jj|>=O>dOo$]veSO=UN2áOzjpPttt^xquI'|r~{G|>v.2W o_k2dHɑ$ >\:u.::ZcǎmytkzgeYoh͚5k?T,~{뭳w#mڴIt1Թ͌3c%I_+%%%o߾z'*]tE:?O|>7ѣ+;TLL^}UܹQǫϟgIs=~U,7 C .԰aK/ig߾}zGճgO=> ZdCպ^ѫ$ 8PvԣGjmATTO5䱟w߭8=֭[}xuM׿jkٌ~;IG}TWRW{.""B&LuJ>F nAqqqz衇=4hSg?O>4 0:ݮTV6lXٳgK>ea/Oc]uUԩxj>}u5\$uСQ/_\5j$)??_YYY4iR?xXץKg?իt|}?WuqXoƍ'׫zb SNQLLLݺuSyZ^ߺݬ,߿_'xvZcСC~j;92\.]Gt7o~;vTuPrrj;l|s]5ֺ֥OZZ$]tEr8ZdLӔd4w^}5zϽޫ޽{+?=z'OH׫WzۥK9Z׹nM:U&LП'=Zt|Ɋܑu\1cnZѳZYYz)HMbb}Q9]޽9]qM رCf͒iz+_~۷od è1%''KRnI2ʕ+5n8EDDHV^-ǣ'6#ڵI߷m۶z PFFX>h/edu]۷4w\;u(I \4_ߤc6gUUq d]Kܹf͚g}Vt'7i5k뮓?M}zE+B}QM4I˖-u*))={l|zkʕzG#wFP3fPNN|AM26)Rko~;wn9̜7o^a{-ϛ7O͓$-X@ .lryߺuV1q] yaÆiСn[[h%qջmmޖlWN:$}z7jŊzꩧSOi֬Yz饗j=&aso~ŋkJIIu(IRAAAu;wlcǎzv W\q}Y=0`{=߿Z]^}UIҟg͝;ںbٳ ī*ݮ_g1$=կ_?zd]z饺Ke{=Խޫ/X{l .@6mҕW^+=]z饚5kVuK.TkOu4~M6iƍ:묳*"##ugTl7lذF?%%E?vءX鮾:wKȱƏxYjn lDwy:$Ya_zoNC$JPaSBB9$mݺƺ>U;g}m۶XK0a=X-[Lw}|>.Ҁ6/Xk/|-СCJLL?ug.ro? 4bN9~钤/}nV꫚2e:6mʰes{oj]饗jҥZt9^ҥKCD~z6K.YgO0AsXS`s&=zW_ Ĕ)SdoYjQFtjʕu7[?xϘ1ctHl")(@0c:uo[jɒ%n3i$I=ܣ~ᄊ(o~judff èw竬Lҵ  IZdI`W_}n! ༇ /Pmw-_}/"9w}2M3!%iƍzWTVVVmvZIRϞ== /;C}ы/5kgyF-RiiiijZzu?e۵~5z@ؚ5kTVV'6\etkÆ F:?I>Ofg}Vc*zOuk={[coV/rk_|}ٵtk.=>B?.]Z^m?uIl7BC뮻NoTy 0aVX~%Ӯ]t%T,裏$YN8A-G;V'OVΝzjܹSWFFF_y睧/R_aÆo߾2MS}6mڤF@hzjذaի W_o̙3[dxOkРA>|z2}7ꫯ4dȐ3߮ԹsgI]wݥꢋ.W_#GG*,,'|9Rg}v㙦+W*))I'xbjh+:tk111ٳg+??_o^^x]}mW ׬YO>Y˗/׊+4x`=ZhQ9N1B SYY^uڵK-uזr뭷Ҙ1cgwQJJ>C͘1O:$IgN:t1c;Pzzڰa Qy^IҪUO:ٳbkƍ;ԣGYFnݪÇ+##Csi5j }Gr\߿UoYo&MW^nڵڵk_՟g\RfҞ={l2-_\^WW^y^F'&&F|I=Z[lK/ 6(99Yv66MO=^{5M6M?^~eZJIOO׿o ͛7c v9Bywy^v[n>{2'ICz6oެ7|3(zJs! @oF^zN|P=zW\Zݍ7ި;OӦMi쒂/׀tm.IwJdggkɒ%ĉ(>>>e^;wTjj/^3g7|2pt} i.!CCCCCCCCCCCCCCCCCCCCCCCCCCCCC?{wWUqs@fPbΡ"bfukXjzo VĬkuy▥efe<BJ9* Ez׉^Y{ ~xBJ)(""""""""""""""""""R )PDDDDDDDDDDDDDDDDDDRH)TXCvv6OlٲL&k#""""""""""""""rS T\(PDKOZjCDDDDDDDDDDDDDDD8qUZ; Ô("e_ܬIJJZjE@/,@)rE H2c L&S׆  "R*m޼[[ܯ6lhhDDDDDDDDDDDDDDDDD<%HԪU+ʔѷ0i `RH "Rbݻ7UVd2a2n_JJ uёʕ+3h N:U>UTL22gΜH`2 ðv"roի .~o3tԉpf3;vHvڹۯ\(ZjEJJ ӧOg,X={9֤$ILLͭxKM "Rb>C^J-hѢ5k$-- ׿xiӦ VI&1fٰau@zz:EUȽ@ϿEoJ TXDi֬Y&MÎ;/8[oEZZZc_ ##"#"""w[[[HPז-[HLL'_O|;w3g=zÑY ~~W l5LԬYaj޼ytޝ+bgg'5geڴi[;Df̘d󲳳jժ۷ۜ;V!!!L& p r1cC)E׮])W\>߀0LlذlذT""""""""""b=qqEД("r(PDޞ#G|W^4i111oE+E_Tq_dڴi 6nݺaooo4rH~iwa۷si}YfΜip+-[d߾}[;^ aaplqϓYߪY1@{Eڶe#ׯgĉ,X *0`&L@ժUK4#F0bp_C(ܹsKr_j֬' ndggɓ={6qqqDFFҾ}{k%wOOO<==FN< @ڵ,""""""""""ep ,[o9=eda XhiQF""&-,"%fqWAK,:99r!҈o)?)7Z.sL&6lؐ=66ÇSn]P5bС߿?϶-{q/^1888иqV7o[OOO|I:DHH&)OI`*3fd2d2微]rΝk元k_ƽv9̤$FMZcԨQ7Ņupĉ"V1rH|}})_}8p@c63g0dVJ2e`0螮Yf=sNu]8q"͚5WWWZnͷ~{Q憳3L8?/K/_Nquu|ݛ=v7:+VG}l\p $((Y&Xv-`j`ƍymXOxW^WDbx*ޮioԝÔ y r""UTJKĉqExwNVV|WiӆziڴiÕ+Wlڴ͐!Cl?eF :tۛZlI=5jժ0sLuSoŅ'x"*Z|s7Ν;Y`;vrOII!00X|E˗rů*vחΝ;Jtt4~!K,!<СC/qtt䡇ٙ 6oxb֬Yٽ/Nѣ111̟?uֱqF4iR;?{{{Zhlf׮]eѢElٲJ*nE߾}_]vTT'NtRܹ3M6O>̝;J*ѭ[1TTDDDDDDDDD;z/T[ΒX:t/<.-piāwW jcV=|gF uxSRR{)))w(?̚e`yˬ_߿0֯_;1r|5j(O}7zy9|aooo֭m0;7|So޼ٰ16l1sLcϞ=Fvvv4k[8sL,c` 80OѣGscoӦqҥ|c~7^={`?KFח]vsNZn{Ύ'xɓ'vݥvϝ;Ǯ]xhܸq~Dvعs'QQQ<ر4iBVn8G)yq1Epx<=ضͲeo`cm›oB&=Ӛ0t\..б#|%A0I+ORD>@[G[>*)QQWrрXj?3=zё-ZЭ[7 wǪZje˖ ----'ZjSz"Z.../7ٳ|7:tz㏼ \rm._/(&QR% *0F ,I=E5i$ƎKFFFUk}ijԨQ>%Ea8`vNųۭbŊyYй,uҢy뭷x뭷 ݿ}uťmj֬ɮ]8uT39Varr~_ȹN8P""""""""""R&^۟DXy5 A(mL[ G-l #FXZ{lq,""r[(PDVկoIU8}:  9KזT,۵lmm駟;v, .dݺuDDD|+h۶mƿۭRJ;ggg^z%.]Z}ccc0`'OGJ*8999mۖxwTQl߾=3f(R,$''\69}%Uxx8cƌݝ)SбcGs*W\hZvs>O %uoMqttdǎ?~<_AV\_eq]nS;;;Zn ٳ޽]vJӦMt 233so,!^UR~}[9*?ʕc׮]EJ5HHE嗡iSb}ҥzg}z .QC!m}=? AJ)E("r7~UիW~c…W^^ )))>}]I~˗/0jP%m3sL6mڔ۞Ř1c|r2dO׷J^OŋKMMeذa$$$мyə$_ݻ_b8p |Y&=##_~9Ϲ- CbŊ|G|dʕy>9_7so\]]ׯKڵO?D:k֬ܪwGgX㺴jՊ lˆ# c׮]XH $$$O˗/3rH `С.Iyf}s|.\૯_~BƍsΎÇl_R,)ɒ+*@Ϟ0o%Sz-{bڶFG|W:׳:~z77Mͺ%EDF^^y2ƍcӇ:Jdd$nnn3O X*=sѣGƆ{G}ĨQԩTTH.^3<ìYrKȅ 7n5nݺΦe˖y O?ϏmHzxW QFر:uЮ];RSSY~=M6m۶lݺv|6mʔ)S?Ipp0͚5~uVΝ;GժU<Yx1գE\x7ҫW/"##KdyZj3rH~a:t耷7\t~+)W .G :{ƍS|yΜ9Ctt4 ̟?ƍPfM|}}ٱc-[QFLpp0P{4ظq# ,LJۓu:t(}Yc̙4mڔ:Lxx8ݻw'999Ze֬Yt֍4mڔʕ+HLL 'Nॗ^[n7'^/q!C0uT|||aϞ=yI߿ٳӴiS|||0 Ç+?|>o&+ ,nݺPbEN8Att4AAA ݺuc4i???i׮,R""""""""""ݕ+ye9  * J F ˢG 5w/v 22,ws? |O> Xsj""rTPDDmРA|74hЀ-[I=ضmʕ˷ѣ1be˖%,,s9z)"""x'o[/s̡y愇rJ6mJDDDn5,"W_}E޽bڵ̛7#GЩS' 6oތ[&Nȑ#䧟~bqttdɒ%۷^;;=!70bnO?M||=ŽJOOOnСCb?~?/ zW_}رcqsscʕ;v7x㺟7k\+uV>6lȯʜ9sv?W^)xSNe̙4k֌7xb*VϺupvv.po,Z[[[.\ɓ'ٳ'۶mYfc2lܸ>}p,Xb,Z(eʔaܹ̘1֭[c͛ɓ'y5jTMƳ>˅ >}z*""""""""""E\Xޯ'9Vނm|yx9||Xˉ7?绫a,'O16 z*cf 3DDt0aX;;%44P8pѣGUVK'6іZQQw{ח}qi}gۗaÆ[;˗ӽ{w^|EBCCNyo0%-֯K?//RS_ijKvcP nm-ZXn$.NCl]ӕ;$)) ww>{ʈ#1bD/@r;|0ytow^t?,** o۵Uƍ_܄Ν;k.É!33*U0|px UfEyo˖-cǎ$$$J2dvxr]MSF k$"""""""""R*\hI<^iu +@j*8K&g:t< __DDg2 ðv""wZ]jj*GVZ8::ޠXU)Qw缈-X˗[egC͚P ԭ .g:L%m.lalI v,I"ET"roR@DDDDDDDDDDDDDJxXҒr%?nn{Nl*̟}<E0:=O O{*}ĉ?]._//xaxUj}gVѣ`geٰITDD>@͖+`K^kЭk6~Dcz%|oLSw$uef3gy)J{ޑ#V[-yxݺYr4|N^ .X:wO?,[a6CHHGDD("r׈ c6DDDDDDDDDDDDD*%6n߁P o:iJ~ L&a,UZ,+""rR]bT?q@"""""""""""""qqgkC-a$+,  5UG?0xܶ,U`Kl֚HX;:,Y,mf6tu㻓L&5kִv7oݻwbŊIFxg6m3f`2Z*}%:::>9;vqCBB0L 0=c kR*cǎ7ލ;dcǎysՐ,fۿȽ!.RX#.\ŋׇ^Lx}س5؏05yzTaXȀw߅8u f̀}'""*% ZAN&5jX%aLJpvٳ;w.s!88tڰa:uwebHiVfMbcc1 ڡmfph%<k=P6t݂Tv.a+`Jx5-{[ ggkOEDDD(PDD:a3ok "r'1{}vkRjmذĀ 0`& 6ieԭ[ggg*T@F:t(ϳdf͚ŋ >ٌ7] ͣu8;;ɓO>ɡC d2y9x30L7L;nsN^{5ڵk/r|;v,w$FMZcԨQ7Ņupĉ"V1rH|}})_}8p@c63g0dVJ2e"00*U`oo7{.A͚5siӦ닓 :.X>>,^^^8;;ϬYnj~a?CQ|yiР!!!$''{m۶ѳg35k,̵O>}Ņvڱlٲܷo Zj888PR%~i~iTfΝ;W_7yJDDDDDDDDD,޳[<(̝ ZY2šr} v$K̓v[ض BBM%}C?DDB得v'NϏ/н{w嫯M6ԫWHc%$$ЦM\B@@ϟgӦM >+Wb {z)3gXr%>>>y.6mbVvv6zbӦM닇Gm1i$޽;̞=(vڅSWΟ?O۶m9x +W&88K./ssqbXp!:7776 {{{cݺuso4͛7666`xyyÆ 3fΜiٳ.tsugΜӖe?;zhnmڴ1.]oog0=Zhƍ/Ȳeˌvh+VmOOO7Wk}m ?xl6lmmYQon/_puu5Ν;o[uSq|llݺ5իF۶m >}z}r_͌5x`0Ǩ(GI'&&ؽ{wVXa>>>5jmF:u Xvm}ufƠAEsyɹWǍ>2cǎF\\\n{ZZZ9}ׯpqɰ5.\۞e5͛`SW^4nܘǏsرt}Wʔ)äInXڵkǬY >>/{Fa6y7HLL,֘:uRJylllxwR -*tO>r8W^eڵݛ,tB:uoq=#ispp`ɔ)S _qujj*}mcʔ)UO?GG<_5qqq5~k޼9o6W\_oĉRJ/2mڴٙѣGiӦbc]r[[[L<1rbfbxiԨQ1~a|I>ݻ <΄ T(dذasVX&M5KK#O䣏>Ņٳggooϧ~7_~%7oڴiv>*W̎;زeK}]]]s=O<|77<ɓz*'Ns[n >'Nt>0x뭷ܹs}ʔ)Cox\TXx7//x)ؼzE?pU͸AgЧ,] /BXFY/{1{eyY{"""V%EDnQr2[{݊&n?o&t%_BUqҦueQfMd'|2ʕk׮̛7X۷/,Xkײ}vٳg3g[lbŊ… ,Zݻw@VV\p/RB͛KB_^㧟~9ɔ)S ]vyvZ9zr|'DFF㏓Ɇ xn8!&֬,))P"t#gc'UcY6|dfBժ佱cᡇ?.+ƃW )PD;VOJJTxE5`VZ?L=pttEt֍Anu#UV-lٲ%'GNrNj ܧzE>\\\ׯ_n2ٳgoxw9to_}UyrJ\|9_`Q}^J ((6 $դI;v,:VqӧQF|VM9YYŊTJ,HA糠sYEv݃ʼn,I~yF˗o8%̉s7Ο? !ԩSn4'g>׫P /.\ %% *|("""""""""w'L[R :t qRl֭7nI2ŒP͖jfmHi@[T%V0x0\[Ro姟~bر,\uAXX|+Vm۶EfnJ*1vXy饗,cy# 0,裏RJh۶-۶mMVQ(oߞ3f)gggINN.t>"Θ1cpwwgʔ)toodʕ+Z}nօO|?H 899qE(W ##"#L,U˲j7F<k`>ˎ͚3Xڷ}:_3f `YZ+Wٳ&ԭ[YfqSN1lذbwF-ӧOgyRSS6l 4o˒șm߻w/z>p@YfMn{FF/rs[JŊ裏/-ϛʕ+|s*߿1o޺۹ү_?233y饗HKK۵k~mN:ذn:<۞.;vJRRcƌ!+++oٲeIvEk˗ݻwU O:w}WgΓ͛oɩS/r GΓX|r~g8p ?fxW7o^4̙ɓ's^uL&>ׯϳ}ff&˖-vKDDDDDDDDD w իC0fe_&ssa>ܲp|%oe)W](Ew뿏ƍӧ:tՕHٳg%T{ɉG mLJ>QFѩS'T\xgyYfV ._3n85jDݺu%66۷M˖-y̧~m۶ёzꫯLFرcuԡ]v~z6mJ۶mٺu8=4mڔ)S`5kFIIIa֭;wUýxbիG-x"7nW^DFFj?fȑ7f͚cZlIF%88`ViqF,X۷'!!u1tP>"߃7bŊ <&MCDXXYYY 0Di۶-ӦMchтӧOƋ/Hhhh;v,;} 4YfԪUt޽{gXիWgԩ 0=zЮ];UFtt4RJ̚5}ǼyذaZ"..M6a|IS?#ӧuԡAp)z*kx|Gkh'\DDDDDDDD> aafeY],ÓOBY:$%l|:t˲M YD+4lؐ-ZX;RmРA|74hЀ-[I=ضmʕ˷ѣ1be˖%,,s9z)"""x'o[/s̡y愇rJ6mJDDxxxiP+zMVVk׮e޼y9rN:_yf7qDFIff&?ӧOϭeooOXXÇё%Ko>zjJ#غu+O?4̝;5k`6y뭷عsgnU`'==Eq)&LPriѢ\&MN:uJX[~^-7-66@f̘A.]3w\ző#G9s&ӧOϳoqӓ[2tPX`Ǐ?P{f}|ԪUkF.]رc5j(Ԯ] HJJoW^)X666̜9 ѣG;w.7oёW_}=쳄co>̙CJJ Ç'**_:uضm\HZnŋ2dHL|m={$&&_|իYt)ΝG4l0> 7n̙3ݻw'׿puueܹL>X.˲`ʗn-y|}~_-Ù?Y=<[.ʕЬDDD"&0 k!"r%%%NbbbdkrQjժ,v'DG?DE;}%++ ___ӧvH"ٳgӷo_ npLÆ ٳg)kݎڵp"@ǎ1.hxp!uk- ֵTŲa֞QQȽIK=xxx䩞ƛo޽{ҥDn(ܹW_}gyaI ۱c`Y~]DDDDDDDDDK`zKpH_˖0bx6]EoX iiP%-W4DDD&(PD.gyۗlx~ƍ?ժU#))]v'}Ckooo4hG%**lFIv܂h&Nիׯ#öm&5 h:.e?uq@8ђ׸1L֞"%%N][k8 !:w̮]'&<y7VC,[;v+:t`Ȑ!J?~Szu~mz)k$""""""""r1 سτ!9<<,* ص/bZÒת[{*"""RLaBDNKJJݝD .55GRV-okLV,*;s^DDDDDDDD$G\0Сs4YcI[Μh *Gcv5e٩Q#K_.С\9;[DM("rPȽ/.β*TppgCWX*$cioMPia!NWm+CP ӦA| _GrvXgC>zW&!;53)PDJt^y>wGikkKJX"dggߑ㊈H+S eB""""""""WCDdfB̚,:@FԪj3GqݶX.oƛq`N?1g & _gXDDD&詗(V^Mdd$:u ,w=)))L8ٳgsq*T@nݘ0aUTɷɓ⩧"$$vLP&IDDDDDDDDDDDDBqq0u* zܺ سoѣʕoK GB֤)e-appwT c> Ъ5?{EL0 kTz!1ٓcǎ7߰dé]vgϞgŊ%bqq0~< Xr^;wBj*@ 4v=G2G(x)C#ΞThR .@K_EGwG+"""JԦM|}}iѢ-Zf͚]w{pڴiêUpuu`ҤI3AaÆ|MuF6mnTDDDDDDDDDDDDYjj=v찬ƛdihz ZԹDӸ希zSHWHN׀֙H(PDJ믿^ M=z4~-7n$** vͬY'!!T4 <}dJ;vvek)W < ~K%aWϲazp={Aޖe9q㮿氈m@-[͚5OŋСCW@3lذ;XIVl sgY&1*.v`R8y\]kW˲<޹W0naaߢc6CHHMDDD("Vk.m}_>63f`ҥ/ԭ[cY8)ֻ ,̒7w.9c) P:哅5KT8j` =P1^0DDDDnEj?@ժU iӓ;fÆ 888k'2~[ XDDDDDDDDDDDDJ\\L C]I736n$͛Aj%}|Ҳ:ͼ?2%z1Ư%R|Ɓh9_)("Vs wqq˷|7xѣG~Djny\5q1Epx<=Xޒ7>?5ksYZ\Ĵi#Co]3R? 8zڷ *Z ZWDDDJ!%HBH!C!EDDDDDDDDDDDz2\? /B0x0d߭.^pf3wDDDD1Ji׮>|;wҴiȽbj, 道}&)e(S|VtZI߾f{%/(:u ?ne Scޘó)[ "Rb㉈~m[|||>GGG֯_ĉX`*T`L0UEDDDDDDDDDDD$شRVK_Qsf&t6m޾c2 aK{z)i?fÅdyC#rj/""""wEDDDDDDDDDDDDD,%l3/* vT:W8t:4;_*TZy3,k^2`VVJ23-I~kZݺҠbt:Wz+|t:5Om[,t`Isrs2'_IDDDD<%ܤ8: ߆ e2XW4Qڷ, ~~`gWf ٳwODDDDnJJhh(deeY;3~m1n9eh}W]VHeOKߘƍƦ1}zp[ 0ȝ;Y;)߇3 7`?3٫|gڱ皕,~9:udTDDo*YYse~6,5нj>b 47jլK("""""""""""""RDW€'/s:-8䐻My|2 tx ^ZWDDDDn%\ŋd %LxxǻC#VZDDDDJӧa|y!ҙlښ36< yw_󚖝[f""""rQ="33ʕ+G2EDDDDDDDDDDDAC O&eȤ[bzvW+<Lˀ'~ %$RJm߾KqFvIRRRnM6%00G}-ZX1һKhh(deeY;2À aي8aЍ|gўe(+:|NNa`!={EDDDDda Dh6m_|111\{-[777rJndI& 6A:pww'11777k#"""""""""""wHVl]1?̓cW(%zY~<=:C͚E0.NClDϿEoJ)%̙ÛoɡC([,zcǎlْckkmff&;_ rԩĉӧgrw/@"""""""""""|?Hfo9遙] h vvUDD[@RƆ&M0vXz쉣cMMMe|oZ$""""""""""r+&μ,= IcsuMeok)""r[@Rb…/@"""""""""""4., gqZUې#MлiA5Tc"z-rSܗ =!NƂҘۑ m+Wy+g(EDDn+=v""""""""""""""Er l‰;@L< Bv/V##(RJ%''sy<<J)9zhQQQ\tg}6v&N&""""""""""" {v/8 }SYx;'SαL+ި;jǗvf"""""EJMd,X@Ν0a&prVRDDDDDDDDDDD |-;ݑl1"(Gl7M] mãPSY e MCnsYvE߾}ܫ㹼x[:N6;.7!Aℋ}:m[rBV68;;Z;b{EJm2|Ͽolllr)[,ݺub"""""""""""rOHMHfŲy a볓dceڷʿm MS}57 Hq 0ȝ;Y;͑e6am\2<Qudr""")=9ḲS5nePDDDDo"ԡC dРAT %44,k"""""""""""rӲ`. YlސKȦ {NӾ[Y̽ZAfEDDDD 0_Zhٲ%?C)pww'117˥@d%of|tZI@FbhGCPTfEDDo*RO>$}8::Z;)A.֭T ; M%fo!.qC7ڡ "Trr2;vݝ/kT/ DDDDDDDDDDnrԟ~aa{a0$PfK M`۵_dEDD[ "3k֬m۶ԯ_5kRJl >ڵka&m GZz^ .16ZO-tL], ]ށU(RJ:u.]p >|mM&NDDDDDDDDDDSС`6Mf&%/'/>ll U;OOHڻB+˩ ;Z^A_DDDDD @Rj߿.]ڵkjDDDDDDDDDDD+q1Epxf/!" ۶իhCo'gڜCв%<AsukDDDDDPH)fx/_??xqU*?T (WΠ}Kzgw{RzvЩ+gYH)@R*;;???%,j,\T 77+nC?lEfk/;?&[UMHDDDDDiJ)ZnÇ=%.Ə ))n,YӾ"3;rIJ2kRY=;7^$_DDDDDn%R>ڵ㫯矷v8"""""""""""x 9uʋK-IkX}jgӷϴMH3 _g_1?(Y֯ztΝn]0~4 j>5S.=H);w/5jTtΝ;(""""""""""8?Lrna&tpq̠q~╣|*SyrHw&0;3q`}g("""""@R+$$!5<<<Ȱv""""""""""r7ش$tg&,f~ < ~me%h!Ҝ% ٳE8NLDDDDD$EDEEzj"##ԩSh&Nٳ9~8*T[nL0*UOff&,_իW3w.VE?* )[jք VR?|k - |}ᙾf)#={A?jUAf틈e戈Q^Xpa}IMMSNc6 رcDFFExx8kγϙ3g0^-xkRR$&&V}EDDDDDDDDDΈSa?TX,lUx06BЫ q+cmCWoMFDDD6o*O=!!!4hسgǏ.ԦM|}}iѢ-Zf͚]w{pڴiêUpuu`ҤI3AaÆ3.\Hٲeiݺ5-Z^z/_ere.^پ};\|ƌÛoiڲe Kx'aMlڴ)| UDDDDDDDDDDի!"RR`,4ptR1t:eokՂNR;,z-۴juf 3)yJ)%Ø1c>}:ӧOgժUZ ɔo{j_gРATTN}]vϯ뎳uVj֬Yȝu$ /+9fy >mW-իϼ2q C0 9J)ey뭷x뭷ػw/aaap9qwwbŊ4i҄4h` uqVZ`N{llln[Nӧ'55 ?_^Xiii~tወH!naWʼ*\y4%O2.bX&'I+'("""""rPH)ְaC6lh0nڕ?ٹ~._֤I>SN8 6dDnd$IT7m۶`˖-_===EVVhܮ)I$I$Iͮ]aܹaY JJ>~DEɬ}҂+xXLJHN>}Қ٨ {I$IJd%M.]=;߯:ǍǸq((( 55ܮ+I$I$I(/&M1cٶ,7w.^Cpi2oZFgMQGUMiܞv7+0--OT$I$UcX,wMkצ"7oN~~>˖-+}:uGeŊ 0$I$I 4c}n/nt$u+D"й3ۿA>QS ;x V`PwF X6\wI \w=\ucֳgR$I]vTn^~e&Nxŋb?|k9СC 8ׯ7n$''f͚h"ڷo_n5feeE4eڵB$I$IRw߅U9yg[av$|{mr^=~}4( -#O#o &x1So$mx~DIjRfPRy';NxGn$J%I$I$Im׮0wlkÀ#0|XKW8 7wӻU2X.t^:]1iiCgw I$IIk-B N߾}ѣQdffs=g#YYYdeeF]$I$I$ש4+*EBiX n ~S{G ;v@J \xaX /uU7 YY)I$I:K$81dddjժJ7nƍ;Y$I$I<0F::!7}:̚AӦaZо9oO+‰W^ _rHZiV0~C}F޹T$ItV J j…R-[$''$I$I$;fݰ65={=Q#d0ַk$͘wσCBjp0x̄-yf<h6rO$IT. J 4k޽$I$I$0wVXjM܄X 7}A:o 0. "4- Ə?aI$IRfPJPZb'<&j*.JJ$I$It6oY(X,>jf!W^ {diIK:'$I$ŋ@)A1<| _81ؼy3rK%WWueeeE4w)$I$IW92 ) .:/mo;k=o2]zEX1McKw!C;W$IT}DbX,EH:}[ls۷o~p \}|.?-{7t,% m 7I$IT[ J ^㦛nbD"b17o΋/HϞ=Ta I$I$I)/C/;v:u` >aޡiDw5U+r.ۂ_qURhϞaܯ$I17Rۻw/'O&;; 6PRRB֭:t(cƌ!555%VI~$I$IT J\gd1wZO=SH?3>jՂ޽ak7mF<ǘ76sܞK$IU¥j/@$I$I'/&M1c -bzuMs¡CaCc l Chtjz;c 7uKCP$I:K[x I$I$IT 0j;8Ò`X\sD^O #}{},WΩ?(I$Is¦Mˣ߿+"ƻI$I$IUΝ@3dΜfGW\2nˆ;CG=TN$I$I:W9XJ`'O?!7n<ގf dI$I$I 99;ClLIԭ{<>Ԩ֍H$TPJPo{btڕS^x%I$I$I e~3طtkcICib^B4xD^m6m]$I$IqaPJP ,s<.E$I$IXI ,YR:7PRs|EFRL=x0a.I$I$IT~O)a4QT𴕴%twHKw$I$%H,ŻI?ӟh߾=UV$%%}H$8TYuJ~~> 4w9$I$IR{yb|4Mq1P%EsM|fd_NĻlI$)[ J *))H$©'DFPU՗EVVhkH$I$I:Ch̜ 3_)䵥5(.%^^[J$IՃ@zs?ƻ4n8ƍw $I$IS²e1aj8i7Kf#^Mפ6+J.翷:=C6UCZ $I$I:PR/ $I$I? M+fդNA!dy.״&i`84 o/aIƗ.H$I8K՛%I$I$I7.alەBazof0rzkHa!ЦMK$I$Z3(I$I$IR50{6̜c"ٔBLZ-%dYDA5;/ \D"'ne?`iO$I`)A I֭7f͂S ycM-bבNf }&#{@f&dd@ { H$IU¥.%9sD8pϧ*r ȕ$I$It)*N=LΒ$G9?y;|#2]v߮o΋wْ$I$hժQ%I$I$TI ,_Qoz1C5h\ ~L2o࢑ Ʉ ]$I$I:C~ϒ$I$IX y_v gF&%_l.ߏ$*oN_p.[$I$Rzꩧ OxܢEXv-v[%U&I$I$"mQoFeG-#QzF3$zY!s4tH˖$I$I b.BKJJo?q_WxljFTYb((( 55|4hr$I$Iڽ(70oW ɫȌN'|-!3Z$Iozt+))!{%I$Iq̟Q]YX,B,ƄlvK!{ĻlI$Is{gcʲ#$I$IÇ? M+浜EɴL2;m!}!7$I$I~>V~s2t(/!7qi,I$I$UX2f͈ 5y/)3L{KG -]$I$I"X,"$$"Xd7o+BFFF%T8JvH$I$IR>73]Ia%d.%chj oHK$I|-UovGbwy'}e70koY<̤9qC=V-I$ozfϞ$I$IsR^Lc@Zڱ9x., dJJ"Sy t-Ff_^}I$It3(I$I$IɄ {'ii(.%K> eGY 'Ӽ ..f]ڏ~͚I$I$Iҹ$I$I$IkЌwe[͘9WgP3 6d .v>!_Ю]+$I$IՌ@I$I$I9TFfgìYrQ>0#+;6ઁ9ld]H%>$I$I J$I$Iiyy0aut0ŋaTɏ9T\OSLMrc([Pf/I$It%I$I$I0mLӧШ HGbiW.2 F%I$IfP$I$ItΆ(._ۆ? w #F@<7x=r*,ڒqUpBϞqyI$I1(%;3}rw'xsWRe;O>$8p.]}ƻ4I$I$U<4 ƌ9z:~@aa |M_]ma"_"E$I$\$ŻIg'`'=n<䓕PQ6mJVV?hժ,_<ޥI$I$^^^ٶCWAaC0 t9>;z4L <]sK/5=x"}@I$Id@WTTDrrr(W/M49yȐ!\qdeeqL$I$IaҐ՛3^{-t+ 0p \qXn. Ё0j,\/ ֹ:7$I$I:]sX,#77fέx ))/ǩ"I$I$Ui`ժ_ڵ ;HNpa^)!=X*- z<C$I$B~\jԩVuֱ}vn(KŋYx1[nBD<#ЦM)]$I$),CoTXB3#ફ4~zzNԊpjO߇ /..]`<[J$I$UQ`n<{JJ )))_$I$I9*;-aɒ_?F~}0737>k#|RlEgfφ3²n]۱#h:@,O< 7ok_MCK;/~8փ$I$IU@w^֭[޽{b<\թ(c/޹sr߸q7n5%I$I<4 ƌ9vX,lY]tF:Aw6 ӭv kO-X.%I$IR[jws9nTUxקORSSYnoW^yQy8T'I$Ix`5*ފa0wƌ+*-aȐ073ZNN|m׿2o^9ܾ=iqjE(I$IT;C߾})((O>~z/{Kqq1FaÆ.jժ=Cƍө[.>(+V`t֭\`I$It+'P~ߥB$rj, L'ewށڵC"kBTnnVj+BI$I+;Y0IUҗe/q16e:J.ԴisU¶᫮o>v֊P$I$`@)Apu~L!߲eBпXl 7ڵ { EI$IR\7Rz0`M4w) /@$I$<2t3geM8|!ӟ*!CW:kxR(۞W;V*+r:u]S$Iw.@ҙIJJK/e 8дix&I$It+*E99P\ [Cf&|0xp c+)Cu+\Y{ݰSw:=$I$IRE0(%￟9soj*~p%0p#f͚ŹRI$IWRoQ7ƍa կBcO7ϫ;a/WIDATQaݲ%?>4?#%I$IT,%Ν˜9s={6˗/G|3h {8WZY$I$H,͜ 3fPNef+!)Ԯ&1唣B*W?m '=6l(J$IK՛@S|Wx'8tHh4ҪI$IT}j./̙y3$'CϞe^ %j={B)S²};ԯ;CӦ0y2 څzCX$I$[,#:… 3gs_mƹ#++,$I$U#yy0aB=0gNY7ΝCX V]L !.[o#OU;22W$I$I*u_QQXtxdIOOwU$I$ͅn`8p,t)!!7h4o^Aj+0w߶mPn(rȰt@I$I|-UovTF(**B/| $I$I>7omyuB!xc1X,`%}4?!CI$I$ J t_m֭H$ΕI$I$GAA;w.o'Fz~Xk< !,u+ԩUkwfNK*j%I$ITE/~ fϞͼywC$Aۗ2h vj P$I$vCo鯤Z^~;xF.YIEfgCNN{ٲg`ǎPlυ_~Pv%&I$IsA$]3X|9gfΜ9̟?ݻwDHMMo߾ 4o~.J)(( 55|4hr$I$I)ز% 5kvx~ºcGo"ss[7X22*+>81aCeU$I$I:[ J@O*$I$I+;J 4WKIIwI _@H$Itz`$3k{߼y!&榦\_nPV*~oƌYwZoߐ\$I$ TT-H$Is|u[ Qh,J+8fyVá5ai08==AƒH$IT[,I$I$mV;V۴_ N oG7:͙5jïBW%I$IO3(V^;޽{9^Sn$I$I.;;4X*ص v/(x S|1<5j^뛑ք$I$Iqf̘رcYnqbD"h%VVY$I;t(Ǵn 7W^Ml~عǴi6U^M$I$UK՛d"r +Wdʕ|eݺu̘1ݻwswжmx+I$IX }N)Sd܃A63ahh.ӳg h. ]aB8|8ߡK$I$IgR馛x:u*C;੧:oϞ=39sdڴi犫,F]_@H$Ia{A͚a0r$\z)D" ݺ]FF+֭3Bo֬0~}4 KNUxI$IʏRZp!]veС߰aCz)ڵkǃ>ȓO>YVMƍcܸqGI$ITb!7u*̝ EEaZȑ!7hԫJCЯ z׿=z$I$ITMԇ~|UnݺЯ_?Q$I$I P6 ?iuHZ֕^{,dIH0^t\sM L/I$I$URj֬G}x+l?x ^$I$IX /aSa(.o NiiCRrUesh 6mN^$I$Iu~#{A,cҤIf(};ԭ _P%y`۶PxvvXo{CBΐJ%I$I*}{\r%1t~Ӻukf͚šC=zt˕$I$I娤. a)S ''lr#OHIwojY5k 7%I$I$@)AvmRRR@Z?>9^u^u뮻7j%I$IڹM i`.h d&M }[w’%e^ÇmPwhSشi+$I$IR$][o{n.B5krRSSϧA.G$IO).ŋ.] t:zA͚TP^^Hii>&ugÞ=!8hP  ;B$RIK$ItnTT-H$ITmV/;vƍaذ6 ZSqЭ[H"fdm5,aԨ҉6I$IT|-Uo$I$I'Â!7u*,_u_z׽;$'ǻҏ)*:::]|1\{m ǻRI$I$gP$I$ID6ߔ)0s& ͛!?״iHv6a[߾BݺУ<8 [ǷVI$I$2(I$I$UC`޼.kք~{߈е+$%Żҏٺ5ƍ{p^$I$I$I$IT֭ NٳCaH0!4k0U~޽0gN͘ofwiipaOЮ]8g8-I$I$)b.B*KVVYYYDQ֮]K~~> 4wY$Iw@ϕ}]Y3L1".?o$I$IRص Oi~,7 ]ڶwٳfφ3Bw ) w}-zZ]$I$I80(%w}pwƻI$Il zX;w۷; z"І4xqhSx!àAШQ+$I$ITT˖-iڴiː$I$J_?4a#Tn]5e9s`~h23CBqPh.΅J$I$I J 곟,ӟ8tk׎w9$I$U<4 ƌW<8<2h Lѣ˲t={ƥtؾ,7cl))з/<`uFJ$I$I DbX,EH:}`:ĻRPP@jj*4h H$INSn.tKBzeի!5DfͣȨbsC/;V ۻt aCCNJ.L$I$ |-UovT:u1cW_}5_|1ڵUV$;@$a̙qb,Y_W,\u__<.K$ItDb1ؼ9}-!8jG0|xh7hH.Çu X$I$Is@)Amݺ!CvZb֭cݺu<6Truk,Z}k׮x#I$I: yy0aB+x0&,_/˖ EA>!_w,ii0~ Oۺue#}g͂ݻ~}4} Ec]$I$IR|o͐!C{i߾=ՋwY{ڵo1$IrgOFzj(* Cs۰!,eRZв!8vl VH+W s5 Ǝ 6m*$I$Itj J W^[.e|ĉyOC1x`-ZDZZ]w6l#׿Xh۷%I$Ig#/&M1cBpQX{.,7&ypP}WxsD޽ܹ3ݻw{k׎h"zөW>(ַ;3gN%T/I$I:]yy0aU+,YBèV_'N_Z,)w/̙S{MDf W_B$I$ITԪU;СCiժIBpmUJ]ɫҥK֭[*I$I*?³φ߿F^x!qGu^ࡇᥗBo"(. <6-B%I$I$N$!1w\͛wbHkӡCv7|3+Vॗ^2(I$IUDv6=a 0`@۲%\w|;peމv'R2w4?azX:lѯ}{_N^$I$I$UAD΁˗/ 8sJXj$I$۞=ab¾}GFzv;zԮyƝ57+BsNا=t֬W:K$I$IR`PJPۑgӦMnKoܸȶ;wꫯpz-y֭ȑ#yB |.(((%I$)QJÇC,^2v\F֨&YiosݐW_ Pn [q,T$I$Iʟ@Iqu9uw#V^g?#}Y}Yٰa1#0a„rZ$I߱b!OW3ƍ!3 .(Nnn i^~O ֭+{-!ئ |3p]УGq!X)J$I$IR0() 8رF8=# hӦMy&I$I - m  ]BrrpVìY0s&oPRcq%%>{X$I$I7|^|Ey{`\$a̙qիQDz~ׯVIII!%%嬮!I$I.;.M`ɒmаn"~{>j~Rii0~'[!) uo΃tغFu$I$ITT,c_zi;x0  }=B%I$I$1(%w<\uU<#oӟ;vl=.]{;w\.7nƍr$I$U5Â0eJXV M.8:uBxSoWn!+ Ox]ؼ9lPg? 7|%#I$I$I-;ּPIU^Ϟ=Yf 6lI&q|-Uo׿/}K 5 [n~H$rdϟO=Uj I$IVXK.>t6mvo0%whh.߳g{N%xmLBِ͚AaW@q$I$I:U7R+,,F$''pa~ /{n:uwJ,F]/@$I4۵+,CQԭ zA>зo}Bnt)ddSǺá)S²bD"#ҭ3+8I$I$*RfPR I$IxٷX },7~ p!קOXt5>R)eKXϘ߈!7l4iRI$I$T[N˒ :t?x"I$IÐS[ LƍDa@xKOOGZ.' s |y6rdXi7 ]R$I$I$.;J nݺ\wu/w) _@H$I:Sky3,YoN~^ ^~{CÆq.xN;^}~V<$I$IRTPJP;v>w$ITmlӦ}GFúyy0isN{۷_fM޾=  uBvɓagr.\$I$IT J jo_|qIYYYdee-};'I$Iq`MS[oN_ -Z@ǎWB./&LQ>ܲڵa{N0`uGcBFF-I$I$Ihw?w]ZڶmKZ]VB$IO7s¡CЦ #F@f#}ss[7X3tQ&G֭ \zi S`/I$Iozb|qD"WVi$I0쁙3Co4ؼRRBG? K.H$egCN  /~yӻ~Z$I$I$2(%6m(I$IJ\hhx7mZX- .n):uNz垡`z3uؿ?^iiCg~$I$I$) J jÆ .A$IB^Leg@0d!׶]3t.7:~\II?Ю]8gϳ$I$I$\`P$I$%<4 ƌt"Xl!O׭}7s5kơX {ߖ-!׵+|sao߾аay!8v,ddġpI$I$IRUdPREVVh4ޥH$I: yy0a֕f s[:5BD>c$I$I$I:X,wNSO=uJժU&MХK7o^U%RSSϧA.G$Ii? . 5Bns琳Pҥ3YdACI$I$U[7RJJJ"H!C_;V`e/@$IRb))^|}6n YzKcJ.lR*xAX>n=$I$It|-UoCaz)իǰah۶-7ofݻ[o.\Ț5kh޼9K.UVq~ $ITma'>>=6lxoZ8p l?|~{!5$I$IRuoz3(%uѣGn~K<о}{?_7_qj $ITsaY"`o0h<Fv={СPخ]̙ay_iIDI$I$RugPJPeڵ$%%󘒒:uDFF)** .N:sV8I$IWR˗χ"hLhBnaoFF9o_H"Κ˗y;6hԩ$$I$Ii.@ҙ={6Æ ;n ))=z0}tjժE.]3gN%U)I$IG+뛝 ;wݠAӟE- aY 'HL><ڴ!8vl$%I$I$I:1R:p۷o?q;vСCG>7hЀ5YYYdeeF]$IT-<u[2lM޽!%LJiFaٲB7I_2:u$$I$I$IRիK.e̛̙7Aѽ{w^{5Yn]e[Y$IX ??t6 f*7,7gZ~Х 1I0f%I$Ioz3(%{on6nf|4j<}߅ `vp<Fv={Vb$I$I$şrwo>>r$~L=dvua`xӟǟݒX߼yp hsOXn}zH$I$IfPJPÇĉIѮ];6n?6l@&Mͥ7RPP@jj*4p$I,5kBii0lX ͚ssCpR(۞g]ફB!CbJJ9$I$I$t[ J lʕ|K_bժUD>QW\g.XUH$I!_7mZmt߰apT4ec}/h:ta!C¼F*I$I$IgR`)]qX9s0ou+Tvv($//p᷿ .b$I$I$IٰnFʊw) _@H$2[W5 ƍCSúuJ(Сoj֬ﲪ,F]|Tx;I$%BƮ!S׽{ VX,7ol+]?&1V1K$I$IP[ J }yf"͛7vڟ:6nݺ.J $I' yi2oBV!7|xפIPTKχ={BBЯjhԨ$I$I(|-UotZǗTP%/@$I;XÒ߰ap%4Mw>x_N<uB!ׯuT@$I$I¥F tf I$Ig. ai 77l`rs! -+’$I$I$\`P$It΋wΞ Cf0t(|aV7ߴ,7oYm~Giv$I$I$I՛@)+ l޼:ww\$IJ&1fu۰!4|0rb[!7wnXo]rI}{a^7$I$I$IQ$]S/}~t#)))<ӌ5*%RSSϧA.G$Ig 7uK!##L}_NN֩S;l cŰlYYwÈd5fƒ$I$I[(%ɓ'׿5jp뭷ҵkW˿/^{5n66nHjjjK$I*#_j* MkW7;x0$ K YµkC^𵯅_4$I$I$I4;J ,\iӦyԾ;z?qq0 Id|JJ`dx0Uw:((ڴ /뮃qFymXl%ppH~V))tSI$I$I:u7RhԨ]vԾwy.G}4%I$U- !{Z4wr*|w |7n,7q"lF~@ǎرe]O( JΟ۷w \~}pI$I$I*sNJ=׿Enn.wz.O$I 0jTYnկ4wӟF;w֟TX^o0CfMn=6yV0isz#i;…eW/ Kpy I$I$Ix7R({Ε} /'8m />$I//&M ;WX_ ݻ8MO6 &OI7߄-[^ /k ˝w;e(I$I$It1(L8_|9tfѢEquױa?E۽{7 6<5:O$Ig//&LQ}/ W;޽ۡNh|O$<ƓE7rT8?w?}}!eX}O/I$I$I8 J*7{stޝݻӮ]; Ox?̢Eݻ7ӧO^z<|[;dΜ9P$ICW`ǎ0wᄚnsrsCpS֬F@!ТEH$vDa$ %I$I$I!%xഎ/** ++H'W_eҥt Ξ={$IҙΆa}}{׶-|p5p!W7]*.ܴ_ ۺw?$I$I$I9Y`tЁ]~j7̊+x饗;u[ouq/袊/Z$um>9NZiI;?MҒ.Z={K_;޾eџS$I$I$IR5dPR,_tk)ݾ 0|p{1}ʺux7+3pơ:I&1al~SqФ & ͛csf\W#K ?}Ï~;l"8nQ_6l8ǟ$I$I$It JZjq=q1}t֭ >ʊ+0`ݺus$I//&LQ!7e \ IIУۭ$'۶+{0hPyߴ4x?O$I$I$Ir/3q#իבmk|~1c .cǎ׏7Cfx˵Ƭ,Fz]IqcXߙ3a>h"?a!'%m޻ {'o>81Z$I$I$IRŊbXtnx';NxGn@] H"w~ VJ$I$IOTP$I:W4]()io-իcbw^澔=چu:]W?I$I$I$I$IC'BX517~3wF9T py.LzbI:. 'I$I$I$%$IR`ر+`~ m[6n-ظΑSdӏ]x]$I$I$Is@I$)b1((aǎѺx.VlnĂ]0ڃa3Ў ˩E﹋ rHn^ʎJ$I$I$I %I޽ £N/لE @NN;\Q괛g>[zX=Z<k0l!$$!r bZ[hK(]Ei-mU-rۋViIJ4qۋ$d#{83Y&$q\s1II2urNnvY<.4MCX5*Fʒ%KJM|2g6~묓l'ƛdJ-3^zEy֩V.]/|!nn%;:mnT[1$G|}&yZSV( ]@S={v:wYfSN]I2ujrU''MwO>Yqmܚ}۴IuK6٤꫺ 6HʲhQɸqKo[<6~_B~mtGNT߯#7@0ujr!~p/O>ٷu뚡mIܳ`$Z-3+ybo„by;'4F= U%u@Jüy+7K{V^]]3uP߲ oԜ]O9bD~bAUTOh>>X/I^zxIťu?`5jULU*4ext).L~z~IvŠYg{r+"@ַ)Sj=뭗zjY6٤^n#磏Vx81a۶ɰaŰߠA!@c V MtlM1w IϞv%of+9 /mq1wwL]Yb4SJN>9(͝ɯb2`@?&G>d {,}.<89bo, 4?@ԩ'TV&F3g&V|~O8_?1*fKbvKb%5 V>|xٳg5 _C-9 `u_K$[ {o17sf{m[CSP(ۜ9\P\7Iw9]73ѣ;bn-aɓM\{mr ䷺>0㎥EK~RlX[ BsԪfJN5ĉɀ N;5w5иVz7{SԼv12ڷOxZ%eeKv5lEܫ{mC%LzrA^zW]qvŐ/>}b\S&矟rH26vﳷtP mYqzsk{ɒZ;mk$ɝv%%G1׾}Cz}|KNXN[vpVfa]}u鼾bhd->}3{ΜYɿ]mU5ڊBj9iSfk) ڲ _Kxk/7g9kפ(6ݴl UmY52׭V_ɝw3Zl¤{d/99 y4'Kk%S 7XP(.٠`]e}ɲߡBضmBP˿6%K3}Ǐ>Z\wѢx :y]$ݺ\m S䢋j6޸*]U-w[^P(ƌIƎ-޿ףG1l7ZGʒ+6tՎhQ1٠ロBͶʺ:+?`]m:gkh u:m ԯZxuwLˋʶmCnǟnz\W?>y S=3_պ-\C>&';viowL-<.,ԽVT$#F|`mg@`_@45e:B!Upuu)++> \+//?LEIǎ> }zii.i߾xk>o[O&=W\%jfFzn'}QG%_n1{NՎ I. af@Ϛ2WYY1tױcv V>./OݷlUk8nUOX vښkg1[ۧO=Yĉ6$o&@U$i*ի_pesΩ .o$ɨQI|'7pCƎz* H|9S{=ܳFh>V9sɓ &V[%[nY\wdƌd ]pBҳgq]vi `%4qe֬YիWvqZۏ8L4)sOuSN)ԥBɄ ɢEɤIŶ .H&i>udRR{kfXoK~Tgd}NKvکQ/  gM촌KUD8݋/o/̟_sZU/F[IYYS^IX(ybo_J^}3gΝawN8fo֎ 5 `|$I^>eʔ:ϴi2v$ܹsK/nKsgY`Aϳg^ _Uo>ee ;$묓tV sOqڎ?.:tH{r{1ͫye7޸5uO\uUru/2/N~{iA7Hy',[Vli2mZͥ{ WZ=RQwު_'ԗ `>$zX#}I9Uq>GydϷ~{n#'OE]? +8ݧ~U]gOo+>zA[B1{˾=D%Kji3 nIҵkҦNM?`\V0nɒwkVz7d钼qϞ[U('c\{mr }d]VVVEE2bDE(9C IPW}{wSٳ7tiE/N^{l~/̝[gm-:hio!TVV!cbpy*+?|xᇵӵCU n~|Ww7gn݊A-,WVf+ޒbkN[~J `:t萤,o]>$Iǎ*)//oRO^~vW&}'G] m]1֪ꝿ1mժ5痿…޲f|boԥ*['[|չ;u*vسg1z,'[l$y^ޣG&FQFeg?e7{vY~oQ1/)j׷o2dHrmW񮬬qjee]7޽x[%^y`rM1B fvHL8Uk>k>|xfϞΝ;7[4ijWwӳg1waKgnKR[ILPl< 'פoSj%4BPh"жm,X 6.\nݺe֬YyӿwaL4)O>d /[Up֬YԩSB+߮l /$~XܧuOᾪ&i߾y_L ݺ^obq7@McY?= /$ot-nK߰v0  _zxɴi̙[jU }K~nt@`2jԨ5*K,iRXV2eJͶkΝ[n)I˛>5%)ϧg|曓Oڪض.~5_}kMX+4&NL Hz*i`oXjTT$WaiҨQ2jԨ,YKY+TT$#F4wkBPh"ٳӹs̚5+:ujr`n$%HJ @`2jԨ7lR` BsfϞΝ;g֬YԩSs߰v3 @(AP @ *FJ߾}3p.VKYP(4wMmܹsf͚N:5w9J| k73@ j4OgϞ̕"vJsIl\ 9ss] Z2n:v옲e7p< z:gUקٳ筷JN}.uؔZBMUCc%ƌs_FK3V}7FKy߭R1MZnUڍƌIKy߭P) y\X;h 7e >k^Fh URj/@}32naV P%HJ @Z'|2{lz?lsA";w{G}Zn!;s_o>;Sn. (=\ڴiݻ7w)@ 6z蔕պ3KZE /V[m3]tQs@C sFYYYN- 7ܐ~}ٳg>̝;Y.dܸq?~|ӧ7w9@ w嗧O>5jT:t믿>O]̘1#vXm殻QGmkvte%GM֭۷o3Vd72nܸwyݻwx㍼]]q={vO?=-JEEE3UTwqG >;^z|{ˬYrUW5wy@#*+ .*++ӪUqbڞ={cɅ^U-Շ~X|nXP*z] Bu]93}-o7wI@ 5zYhQڴo4iRv.(13fo#F~s0_WGn1bDF?+%h+3j*o}xf(5]tɢE ZpaS^^kѣGgVɝwޙE_js@/NNju9TT-Xz*_|q?t=eee)++[ay?qzm6niowicƒ%KOw q @j1cř={vn<9R&Tc_]R MYc6K6mү_vm y@i1O>}riCرcgƌqI@#jr-0`@z43;wܑ9s'ȑ#sꩧ6%-%X;,w}w'dk2~TTTdwɓ㏧k׮?~|j:ZJ[SI_:guV~|oFS^***$[W\N:a/htM1fzsgРA1bDKCjqo[z.7o^u]뮻r衇6u)ƌ߿~d9묳2hРu]qY@#i꿃N>=~~ v@h1)R1oq†@j 4(3p3 ,Xn /0ǏϠACI.,guVof̘1MP=Ԛz̘0aB=?TcFm'x"sߟO?=]tСCҀFc|K_ʠAR&߿>8{~ Bi12B!wuWt$i۶m<ȼӧO]а~{/^|+ })@h1Go~k>;?ff@m۶Y`2E… ӭ[̚5+'N̎;Xc;I&'̀j7 Ys̘;wn(]1f?;s?FzR5O.5ϸq2k֬իև$9#$sOS@:f̜93tPznA 9yF; .N; 6 lA.䒼` ru5i@7 >VenEV5|x3v|_m:aUƌW^y%;Cva,Y$'Onze `ܳ>$i^>iҤ& hVeXpa?̝;7=Pڵk-BC}xҳg hy;f <8?p}F/zkzFh FPȝwYteUƌ<0?O2}lFIz(eee~b9n-9#@EY1c7O?]c'&Ize- 7Lt޽USLn6mZƎ$;wn^zvmi߾}8FhN2fvi;vly7$d5ܪ{Wm6wߝn)`Ywh2dȐ3&ځ5Ӫ|8#ׯ_,Xk&׿? ժ'|r~CL>=g}v9H p2fT[2xlfW ТʘqI'sɦn;J~CɦnEF'I[o:o>I2gΜƿ\sG5ܪSYYN8ƾ Xʘ;dȑy뭷Ҿ}7sO>/hV2fkU7z\s5y$;cx\-VeXCOW򕬷zW__0ЬVO}<9rd(2f|NVrW.o>:\pA 4+@Z!CP(4w@/_e%jĈ1bDs`?J6l|JĦn.([Ιg3@}32naØԇ1@[l$K|VU{=&2faØԗqcP >@}va$ĉ^ޯ_& h@}30feØԇ1cP4v-;wkgyn-I/+Z"cP >@}70faØԇ nu駟$>|x>m]vY&M=3 hĘԇ1cP_ >@}30fQV( ]-}ݗ .[mU^4@}30feØԇ1cИ4w|ӦM˄ jmڴi5m6?p.tM뮻fذa ҽ{Fh >@}32naØԇ1hLfԪ OJ @(AP @ $%HJ @ (++KYYY_̜9}.┕eĈMZkaÆ,cƌiRjXxqfJV[ew'LYYYFX%0mڴ{챩H֭SVVѣG7yFh^}Nvo6w)"PBf͚.ˀ&sUWW^Y`~7x#kuۄ dNt-GuT;ݻIνi9眓$hJ%,m۶̘ͯ1a sE_lX`A~򓟤_~9WO<$e]&LuY'o2-\0_ҳg<ѣGg~nVOEEE; /4@ѪUtI={v~_4w9a***fkRvmrǮ /R㧽{YdIzVs1) ~ܥ4(!{nڵk#G?\>C IYYY&O\kɓSVV!Ch1bD2z$g}vz왶mfmV}$3fȫZ/~ 6 m۶6lSkgϞѣGdرOϞ=W\:t5j|gU/^+2 JNҮ]?_~y/^\k={,I2jԨli׮]r\z) I'_r6pt!zhLRg B!ӟ{W?wm]Fs]wݕ]w5۷φn:z̩RVVn!I^{xMVuB!?g7N۶mg}ͨQjհne-̟?pf$PB***r)dΜ90aBvmL6-t[o5Of͚+7pCA^Wox}Qvu2dHv}$~kZ}\q9#Ү]xСCn5wqիWdСC1'|2;C?'&Ms,++˞{3f[hZn]( {zڷo_袋 I w^{g!I7ިu7x{h? I I W^yeu… o!Io߾޻Wok I {G=ojy :t(jժSOU?$C=Vݯj!Ia[sY_ 3gά6I'THR8Y͛WU{]xkm>}z,TVVh ={,jժeŅ7߼:Zm I &LX$=zO^cۼy =X;_UHR8 K,|!IK/n/-ܲкus= _BG} 'P8 x`!Iavnz I Æ nV?xUV:Ə_>‘GYHR:th>z?~«Z^YYY\HRWZO{믯~iUٳO BG$M7ݴ_,[oBϞ=k[`Aa.$^z饅$!CNZ 'PHR8sj[oO>/|p購y 兎;^-ZT?YOP(9-JoSO=5|I.F=s)T:3HK+Ӿ}Æ Fm_YhQj*#Gg>|x*++sWTݻwL:q$ɷ8 ;wV^^/ >l߾nYdIzk̘1YhQ/]vo&ӧO{:]}y>Н8qbO/| xki׮] 3f:uJ̜9^h.mVw\q~>{Qe\볭lF:thnȾJ?>guV:w_2dH6dަnŖs9y衇{o_?[<߿:irJ\}s=s]we9cuuQw}swرcsUW媫w\|'9C2mڴ\qkjSZҗToU6h:g f62j-U @hrg䢋.E]M7ݴ]w$\k[oը5~֔)S^5r)馛rWg뭷ӧڲyI9jl7o^{u>V}yiݺu?W,VeyKݻwn9~Z׮]s'OLP|_e]o~>B!72iҤz9SܯjO<1Cwխ[$s$$0Go=_rƌIR=c%@Kg `vYgcḁ̌H+**$Jm>`Y宻ʥ^ʜx:oUEo:gk3fHNjٯ,'tRϟ'?n3HYYY/堃J<+8wygk7Y~_aU5/\O<1Gѣ6ۤ]v[2z\y啙?~:}F;lϿ'I?6mڴWwҽ{ly|^{֭[{͢EVXjܹsƎfC 7/^x7$Z_|$ 4'@֥KW,Xk}s$/̝;_eVgQz*oSVVN>,\0F:Sz뭓$^{m` /s9RΘ1#rK_WyᇗOyyy. z-y$O?t,\FG} &$I6|[nɅ^*zkڴY"CM߾}?? ` BƍqƭT_Һu<+G.R?~|.\=cUÇUVo~'|}…932o޼~+|WƏ~TVVfСygjmg\6,73yuQyk;~j׼,93gΜ~ut;jjкBofF]cIW=I-%JYg#Gfun?ꨣr饗vmO<|;/~d|ygӫW5kVz,Z(?;c͹瞛sC^ve/{'lM>(cǎa|}{9c}-FJ楗^ʙg_WFeС馛R^^c=^2eJΝ;gw&l3gg̙/4hr?I1huYgչϹ瞛m6mڴ]wݕ??6Kn2}<3_jfرcv}3&ov7&kwy''pBu?$Y .~ 4(C Fmq孷J>}2jԨU>}y, HӫW k4iR:tR!_י|x⌅ǏϾ~8cƌI߾}s .XfΝ;gvJzτWu'x"G}t.\?ywrԹkC׿/kyͦnz(r I?<]t׹wu\x0`@^~zy'ӯ_\wu+5 ܒ%K$>hn:o^}O? /0ݻwswW^Ɏ;QFcYk]/wTWg***ҧO>W]{͞{'x"wqG w&LoZ;vl{/wuW~,Y$zj?qڵk._mݖ'|2]v矟K/KVr7~7x#~{}Ѵm6wsu׭y ?۷ox\{k3gΜ+/3dȐ曹;裏Gկ~cǦ}Ѽ[7mۮv}MP(lcƌ^{;.Gwz+[ne6|)++k"[?<@~ 2ir ,H=ҭ[L4a w'ꫯs=}s]J1 -_%KdkMσ>}ske/I8=\.5ԩSs7cJ -/O<1Ot=rJs=}oR(ӟKjV'tRz] kK.$Ir6s%Ӧ L:5^{mڵk=##GLFw7筷J=rECmU6m/7w.\~]@ Bsԏ% @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ x IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starpupy_handle_func_perf_pickle.png000066400000000000000000003433261507764646700306110ustar00rootroot00000000000000PNG  IHDR=]=~9tEXtSoftwareMatplotlib version3.4.2, https://matplotlib.org/+X pHYsaa?iIDATxy\T冻⒚[Y.]M44ԬRK˵2ZQz+mvi嚅h 0l2AL0z><9|9Rk2 (..'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\'F zpi\GYpyyy:ze2ʺ2 C銈pm"ʙ8f/r׈ÇFe]pE 0ʺX, ÇT\TZZj֬T." SCz.SZ\'F 1'dʺ U)ww.(W=eٔ.p+,,L&KBO 9|>?~\zt]wJۆa())IY*0ǏK˸"| ͚5K͚5SrrbccճgO_v۹RDDJZ$?~\ժUc[@>}TBO.I /999$n:K2L/&..NQQQQ6muBۊnW͚5KK-)Iھ蟤RPpF !33SM6U\\\,Y1chھ}6mnݺ9Y+pi ?~ڵkWVVyZZZ)EzgtH11ٶm[ |>syzz~I͚5:pb%)1qBBzۻ|~ tzf)>>^&Lp,sssS׮]yfIauM7`ӦMԩSXN6cb-/QժU˺IRʕ]oٮR%nd2͍ s`l6`x[Rz*UhȑqlS$ o͚55zheff:v ڵn=H石t:xE>+p%zWȍ7ި<رӸq"VPPUq2g_Ν~M_|E$M:U?={jРA:}$)!!Awy飝;wjzgmoРA}R߾}^SӦMO?֭[%I}矗ePNN^xܹS_|<'**J]vϟ?_=+G=Z?ۧoF;vtf…ז-[ꫯʕǡ^uڵK .gyƩ,+zwk.UVMF͛xbϺ뮻Խ{wo-[hذa5jvء.]_,k1ydM8Q۷o=3={֯_hҤI?CM4I/٣zpjwٚ1c^{5֭z8Ǐ?={;w]xw*00^pBBBuV=czGu]w]vھ}nVw}ʒ֨QC|vޭI&;zjZz.\ hnKr?޽맟YK,ц 4j(νrrRtoo/ĉ%:sf$رus:x9PW 0\$cҥVpwwwZf1x`wޗu7|ӈ1֭kH2,yۜ9sؽ{q̙:a!"##_N˚6mjL<0 Ø1cQn]fhIĉ322 I 0q5rjg5$)))#'' 1-ZX6p@c:u2qNBB!駟an 2z 0mfH2 0իW; ~ɒ%FJl0 #>>0LFBBBgFPPVN:7xӲVZƍ+O>ĨR IƎ; wwwȑ#N|Ƅ  {~N{wu,裏 IƪU˦MfԫW78 /m۶v#""^zɩVZ#Fpjw֬YNlٲpww7=ja;v0֬YS也^}XdH26o\d;#G4x>d#22u,뮻s{ݖ~6l;m~z]a[ET(1>Z4eƁcmZWիele~o=m3XZƞ=ÌSV\){ϛkszKZj#)ԪUtF#G*--Mf;feI{l'ٹzoI J:f_,]wݥYfSճgOKESդIc9ܷoZj}֭Cׇ~Oŋ_˖-/tz%)ShΝJIIq I{!5hG#GҥKuwkҥ [nQddu޽cgΒ4tw}iӦi޽JKKSnnh˩_~Ev]uujjJ*={öm۶Nc E}vP9dffװaC/77bMKKѣGվ}{6ڷo;w:-;jݺ6l jy^/]UT9>IN_\\͛C̙3lj֬S 6yxxӐnKr?ܹS?>C20Zk͖ĩ -opYX6*5uRSW+-m$R.^}7ʩJN~OիP``29W'P 8pF魷RnnbP׮]յkWMo o TtswtO[oUVR6nܨӧ! BO9rFc.农 =pVk.zsY!R 99Y^^L *0'PlIJLNgQ!YJ'w\v;PʺԠAjc\)6蟹sءKl v&I_|jXfL&RSS˴d2iǎeV߯>}8wYO<9Vy8s]//Ϧ:xq6l֎u (:ݮo<ƍ5T``spM'P\9=zgtH11C/gϞSL_|QfdҥK֮];%%%\5kTRRBBBݮ镲X6j=~Q]&*:zr6%PazPaH-ZM=oYqUyyy),,ː{u!~+rM6M^^^e]FkSrInE&Y,tL-?1llɪYs/*'_)_}aَ;d24~x6>^I.X@SNΝ;e2d2`~'OT߾}:u/t:ڵkպuky{{+<<\ǏWnnc}TTf͚Of4ezI۷L& <<(IJLLT^TR%aÆ ݿ5ou]IgMȚѣG+88XUTѸq4dȐ"{W˗/wZtR*++^ Z|bcc>bSSSe2fInװa-___իWOg.cuAjժ߯m۶e˖ P=tԶmۦ[nE!!!2ԩoԶdһ[%%nKr?iڴiv6mO?ԱmÆ kv\'N|~{B?B6T/TrݠʕoSDH= I { 2+CJOOO?$)? qa:w|޾ SO= *))IIII0`cԩSտٳ ӧOK9={UVڹs~m{zK\m$IWRR9rV֭[_~E>++K}[Nرc_y}ᇚ?6nܨb8 ??qZO>+r_֞={t-'//O5j'|ݻwkҤI?}%О={އ~I?Z>=vޭ˗kϞ=zgرccխ[Wtيղe믿}ݧ[:pByyyiƍ3gRSSuM7y7رc߿c~Zk׮_XBk֬9/+˗oG}{OvO]V&N-[8뮻tq-_\jѢnfr)oӧvZ\RyÁg?׎;t]wn;Ǐײe{'Oĉ}vyxx{3<ٳgk:p&M>==]C ц ?N:ٳӝ-ߒ\%MEiΜ9ڵk|I{ZvvgoM4)CJN@=[ciSvSN})&Wjzڶ=תIT^}$z\2>(' b1$ugΜ1vm9s沏!ZhaL>0 ӧK/^^^Fzza7fcɓ'M6=MIĉ322 I 03իg93 naDFF׿mڴ1yd,][z!HIIq,駟 IFBBaѸqccʔ)%$N::^S0\VZ^dK.5L05k`H2~' v {O0 #%%Őd^ȚFi|Ȑ!NЩS'w<ѣ裏:?cFΝlW^СC ]WPXk.CgϞ"ۼیzʩ͛;m /zӲÇ}醗;֟:uu:sM<3˺ufDEE9k0zӦM3 0֯_oNm]ܹs/+VơC ^[:4?tG}ѣ3뮻~=[aG}dH2VZX6m4^zaaۍ@cم-)̹lشiS;Æ 3hF۹J322G5vشzիelط#9?ƙ3/VZZzP9%} \+ (gws&feI{l'ٹzoI V-Y[KttҩS'YFO=֯_iӦ?ֆ tiEDDN:%k,gWPP?.Iڳgڶm+ئ}ПZj].ѣ裏jŊڵWl/.???]sX,:vZnXXfϞ=/Rw}>3k׮޲e˒i޼y:tΜ9#ͦf͚xzH}fϞYfqOsvmw/ddd(<STTT_/OOOmٲ8%%E/WKrr<<<Uj:|ݻzG"Ytuթhiٸqz-Staȉ/tݖ~lРuСRWOnn~pLOߢl((j'[ݧ F{/?4tk:WJԤI}z7%I;vTSlرC5jP``y= 3b͚5K=F}i3fWM7ݤ W^ ֤I~WZjvڪYL^zI׌3y'ԣGխ[W)))Zzbbb.xEy4m4ծ][o ڱcGiРAv^ {O|}}u 7_Vtt?':WN-ZH~k۶m?nƍn;i$ƪaÆZꫯ.uS>SmڴI*U̙3uر z#G;CgQʕu-^X4l0=ӪRUg}i(ҵkWmV}ѫuѣZl{évU7֠A4k,jĈԩSڼ{4vX;Zh%p!u￯-[*--MO?Cڸu{100PcǎՓO>rR@1:u$ݮΝ;KʟA +v~{ҥV>Dǫ^kmݺUM6##ݻWM4+r^Jݮ#G*&&FݻwWݺu[o\ 3n8 8PV۶mnݺǧoL& 8P;wԠKLzO͛\'85>| 0@mڴѩSzϕT:uԮ];կ_&L&McǎrwwŋK|'E֭:w0EDDhƍ[ոqc= vӧOWԫW/uU7xysɤZ;vСCUn]}JLLt̋yUJԱcGuU]w,YRf맀SJJZhOGVj.\%_x=s6m߁e˖]tr֣:~|mk*uVݺqjjڷ?Ff'K ed\p9=-egg+!!A .oݥouP)&&F /P\QaN:1bƌS2|jذ^.T]4@Ef:Wvvt&uQfsyѵϛk_=*=Oo/11Q+VPNdZo*!!AsOYvE8qB/VrrZh͚5Zfex./*ejMѣs1\ey%e#ْ$DUQf:'(@4w4u{Ҕ)W$\"777-X@cǎajԨ˚'TVM!!!]p ͛7WJJ^ybv~f%)1qBBzz(#'GilPnnL&Rh`wPPP{yz J'P)..NvcpwuԬYS7n,2:Ff6>V lX~JmQ^^TJʷ UN IyrsWJ]qWٝPƘ==W,`Ų^{ U^^ӺcUԸJ^@G 3=}v0͒}͖,-I&?'>QX0DI\=BO@2 CVoovi}FO-ԪUyzv8W4.b!\ Y[Ѓ$80Fv{ΜCvEd2{[8@ T@Vkc(xy_\ @qCfg֮]w);re㱯o]5k ädIRVsbb>_Ϯ-T,ne]ѹ-ѹe],X ng2_\z5kd2)55K~JQQQ5k%{-_0򔙹KGj(Cs͝]]>> l/Ʊ@;\S?TpԷo_UTIwygY$"bbc$I118ED / hSLQfʬkNIII2W8;iӦ Ppp7oiӦ9֗vwd2d2Kk?\IŇz/'ԫmۦ~׳+))I=z#5I Sd=Z$>RNվ}õ#~Q۶5*=}VC ~-.3)O/N@I1-p=zp².ʼn䜐$yz}}}[ ///]̛7OO<^uuIVU?~R?f${?Vk9R0aBZ%{5G@TPv{ҶbYi2 ˳vEgQ zqp!Ν;+00(VNIWW_}`vIҎ;d24~x6>^I=,XSjΝނ ,pwIW~~~SKc]V[5~xGC͚5Ӕ)S&\ QZp$%&&W^TհaC}ׅ_|Q@@wﮤaѣG+88XUTѸq4dȐb{i~߿ ڵkaÆ8p^z%I.\{͚5qƩnݺu]{N999 z cG}T]v=コX;wVbb|IGt) 8PիW7n>iO?T7T]*33l۶MUV+RzͦQF)<<\>>>t=xL&v>55u-qF5iD>>>᫯Rz;SYYYZpTR%=qIwarqٽB ?W.]秦MjN;f͚S߾}5s U (rrRu2xmN6kΛ石L*ݪ6T6LQQ/(*իNm҃P aݺu>}㕔K$iJNNVӦMoueSp9סC맟~R˖-vZ8>k׮ոqw_7モ$!`NW_}UӧOoA)11Q+W֑#GԳgOZhݫzH>>>PBmۦjժi޽/u9rl6֭['޽[En^{Mtjر%I>C͟?_111={ uҥ6´vZ%&&*22cǎ՞={K*W,I Ԃ _~E=38?p>3}žV:uT/|jڴ~a=Cيոqe˖_֭[+))Iԫ}*==]ׯawܡW_}iس/VZ:|>|Ok W^ڿ<==%_/^tq۷_?P~Ծ}{ 0 ^S:uj:p<<N Y,8p@:u:o____ã6~ 8PSnݪݻ뭷R͚5od2~:zƍI&  >:~qƒ뮻srr4g]QFw74aWo:zeɺ;um۶ٳN) @Zĉ4vX-^)lZhQêUVoc=VlRre+00Щիkر=[}ǎ377Wwq#-xOζtR }ZSEФIc㒤={m۶aO%}ПZj].ѣ裏jŊڵT$;bرcN" ͛믿jݺuڴi w}W|MA%KWFFrssMdddW_}(//OsO{^,ݮul6YVM67߬ƍ[uwRJ6l٢J~iK-ܢz{?[on۶qʕU^=ٳDZ!44TQQQNCCCȕp?ǏWo>G_uքPF0 eeSJJTjjrs _C֖NXa$)x Y (K%lׄ Եk+iӦiԩ\_}Ɨhs{z&'aT)HԋYR;wּysNyzz~ܹ֬YB{yDПL&S߹y*/INm΃>nݺiٲeZbM3f㱰s+lKѨQ#5jH#F#<:hڵEyf 4HSNUnd6xb͘1i;Bҥ~myyy)""B RffBׂMn0ӧOٳ5k,5nXz'd$+WԦMb zgeEGGKzUREmv{q-Z(!!A˗/w}k׮OKt]Ta^SC f;Uj=,Sfs{ժ5A*ݪ2IO߮'h Y (K%8yvBCCj޿ڵvܩLըQC|SOM0AcƌqbϠ 0@ НwީݻNE%~]H?}+&&c^®KU^=m۶iٹWVzeenUj_~M6[$zUO*ݪrw/BO JR ʛJ*I&oJ:v+''؞QQQJHHЎ;TF 1bf͚{LFҾ}4yd3鍊n҂ ԫW/kҤIrww?VR4 jڵkf͚2e^z%߿^ODǫ^kmݺUM6#H{U&M+_tnkȑQUn][%:Œ7Nնm[[n)r]~]wݥu_~ѪUTJIC=ze˖Z6nܨ޽{'ԨQԬY3mڴI=%~%KSN>|6lѣGo׻[~?<믿ѫrĉjѢuΝ;+,,)< ҺuԳgOխ[W'NԌ3 7,,L~ 4/$W_U˖-ժU+>V<<.ʏ<Ĩzʺ@=ݫחu)ה;|)ð+#gY,elŲA6[~(Hfs7lQ&c]Vsbb>_F^^@Tm"dddC?V\YjҘ1c4dlR[֬YC^qVk[?~]IbbVXN:j7TBB瞲. k[nEZ|.\xYJKQN}-e6nOV  j'O9s?(š5kԥK2D ,$曚>}լY3jӦMJLHĩE).Ç֯*0ԨQ#رcY Zfuu#oFE@rѣ4i>CEDDu֊N>_U񊌌ɓ5`.GT4v{,ͲXСWd"T۶^qۢ\k޼ x5hРmΜ9/BfÇ5vث\%.ENiY,fdlaã[=H>>Q2l%'WX0DIڔm\'ʵSNJ*WlokzTzŲNHkl2;/F&ɱ_zv*66^-ʪ|pY|ތ(.6VOWgp딝$׷;fͱ2;')Xp .THHn6I3<jР>#EFFq#..Nqqqe] @F23wbYWouَJ2߿T)vQm{y+2r¯L1-\Fzo릛n͛յkW_W_}%}e]bbP(#'GilPniL lp^e].Th|ތpVڵ%I_|맇~X۷WΝ˶8k~F[ 8b٬L*(j-ݯ '\F@@N:ZjiŊ3f$GgΜ)-Y,=9ӷ0l|&lrs*r@G q-T͵~Sk.EEEmq.f;.e'gFNIy Qժݭo$ɭpB 'TJIR||XCeSV^IO措^}-TdQE8KXI:zt"";m PV־j=$Ik2;l W|UͨrСCUV?r䈪W~+(l$%&NUHHBOð+#gY,9srNHrW``sUz;*(BJ'ʵVZO>zժUBX,5{l==zUt)..NvKebzqJR^U?5\Fi2Fe6wPPP[yx^3کSK/i޼yQll"""㣔޽[vR-sϩgϞe]ra*튏Ull[8>Rii[̟R^^aNJʕ WQ-Sv'('\™3gl2mذA:sBBBԼysuM5*KgA٢ŏ UFFӷСWeEUK%^.ϛQzՃSilzX;/r[G򊐔'(,l||$IAAmT-W7"`NOZdlŲ^ϔa؜֟ݫݬ-9o׉KTTw(+k_KZ UP rwO򲕜<^('p3//W{= ddxL-[ndr,KO߮y Wddyy=7(@EA \-IS[:sRR*%eb lxw\bBO ꫯSO)//OƍӃ>X%R`&fK>^gkZwݠ5* drWVsbb>_$S^.ל9s͛7+22RfRttn.InnƌիWl6+66V}U*Uʺ4P↪-sJ{ޯNSSW:ޫE/ak%zq\,.(~[cƌQϞ=*.I ֬YʶBlݺU 6T=zhŊj zqJzDǎ-#uk#mr^y}.'A owѳ>+www-[_~)[NzRDDL&󶉋STT|||ԦMmݺձѣ^yuȑR0$I'O~{5g@N뿯ƍVL@%I} zpՋm2Լy{{{+33ԏMwq%Kh̘13gڴiYf[nڷoUV)rsuJ3L>>QVmj)/BHO߮ҷ!kpzeDGGkǎtZ7(&&ԏףGѣ3gC=CJ̙e˖i޼y?~"""zv9rD[.=*xV g@S0DmHHo0ܣEii?(3s$vrr,-I]U.c̘19renݪ>HӦMӻ{Uklׄ ԵkWm޼YԺuk:rf/_{6MS^(rrN/Ҷ(=} InovQcIdRvA%'a$)8 P=2|Ajĉ=ܣ͞=[w}Uɓ u&44T{$yxxhƌҥ3ϨJ*E9a3<--M5kּ2'5ԩouݒU&6 l)BIO߮TU[0O''\ʠA4h eee)##ϝٻwoݻDz{{[qqqnP~\}XZJ}$iժU5jUkzDŲ^H2Wd6w9u)C`ZD q}6lBCCe2222tرC+WVZ4f 2D-[T֭5k,effjСu\T;K!m$sld析$֑|jxBf}?\;såp:4aOv3ڰa6mzUfuC т $IoOd5kLڴ)p9R0bQPPP @y`Ν*;" U˖?^t;?(ڵ}+K-.5ϛQ.~:sU;^Νu5ԇ'Z}HZe>t,x m6?^&MRFZv ߼25IAgv$TooeIhϞ{b$I^^\+-[$H۴nLK==2nia2L )p6IQjJM]$?\*UIYc(0?dVRRjy~)h貫mQ)99YժU+tNҜnPذ6qY,dlPZF0r2lQmPgm.J?j~}~йidJ!!Rg.Rztq(ioR 7"'ʵB_wͅZd%I23Hy[99ǔSV!IwMU 7* 1C+#G;xc~-5o.r~٨TL LT Xh  ooo6M/˨+o@cvk:yߤǧU P:{s&i)'m##ZiH|ތ.]IIIVSNZj\H///GەNkeln)v*U(rr;97m3]RTnfeIIÆ/6m[n)qA|ތm2 ÐO2cg+=}RSbY+e'j+/pLrsTV^%&ȉ/IkXg<)mwmtLPZI#k'm+E۷燞#FH-ZMp.BO{͛7dd7,/[ݮu޽ +pv{,MX)5uҶ0rw7lQQQI-yەBB*0O&/Oڳǹ¤_97>zӧ$iǎ֭뼼~Qu-''UiiV딑/ȕg掺Wd6wT@@Le].$%IsJÇKEo!mwy*IMC;yr~)2^/ژ.c…0`|||ʺ+*..Nqqqڿ?c@9fŲ1\mFNIIfs'w_LS\՚G*"bd*ۥX)>ae CJLtŹsg~i۵iZ:O *DE@ Sj=WNkG-CN.)8ҩSII;l^_?w'ͨa(;RS:]W_fsGEF>+||jqkW_I.%$)I/yxH5jH;J )RLz(?i$J /в.p:s&5k֯/|\m/@9E *$Sf/g WN99%+0V׼Y\ 9ll6JS'iĈ!jU˟aä۴)aNObu(]yy9-M䥠֎99#\Μ~CΝ6<>΂9=-~p]|ތpv] ,ЪUtq9˨2d&ѹ.o"˳*-mŲQyyrsUPP;ը j#ww߫xB!krJ.ׂ tmQF2Le] lIJLNݞV딖 *w@7**9+772<J.#$$D-RϞ=˺+*..Nqqqڿ? p.ԓ3=}cլ\ejdr/3TE+=T-=9Q'\FDD֬Yuu)WTTfll',;u[>?GYY{ۻ(00VժWpp'dSeQ\yrfT o SOiz71Vkl$Irǎ}#GRfJO*me(;WqT  9O'.cÆ Zz/_ i^FJakc[?g:ԑN5d%+9= O$)(P1dgrZPzeo߾e]lIJL:}zRRUZYE11 $WKuW 9ze̟?Kgoc%0 Y,$I=3gnϐ{*WEj*/ QV^%&ȉ/Ikx pm#k'\Ή'o>IRzTj2*{nzl'M .#cqժw~;OO߮ҷО^^ኌ,/ƒV@Œ$͝+ .򧡨RWCƍ 9zedffӢE'Irwwoϯ+nTJoSz6m՚Zb墏)_(\(,fTwm9b"3f֮]j߾$iÆ =zz)e\5ko֧~Z@J2T$:?gvAef*Iڱ_k=W_UީV lT9[J%ZiJN~Oի(QM(jǥp~7j$+4qT?t5<t5.c֭jԨMJv)}^ff6mx@wqy,Y1chΜ9jӦf͚nݺi߾}V$Yf=o+V(""kQy9 PV|'>g _ٔ{JQQϕZ}኎Rjʗzooٹk'i#/(?.b"hԨ~7}ڻw$i4h|}}/=zGE9sz! :T4g-[L%I; fI/VUV<--P1lIJLN՚UJIYd'g<=:F]PޛV^O_u (p=9>z{>bb4PT,p)~~~z衇ql65a2777uU7o"ǜ6mNzEPY:uk+?1SޣJl4\mPJ}jժO>Eng2d?kbw}Wm8ŕP>b(es vfv*4tdZJ2?ҝwJ|˖7l(I ()BOkyJ ;رc 9rF4+z,ejMѣs1iNmbYX+3IVT^ޙCVJkn}OBOEivZnٴxb <ԎXZ4//OVҨQJ8`%)1qBBz;\a̙X;.4e6wTO*8||dru)rP ))G=wtH~̂uMH&u,խ?wg>+}ARѽDP=2ݻZ59h5tЋ=322tرC+WVZ4f 2ٻ6& %DApWܕZeqъVVܵR[..*U!lIX33Y'9?d6Y&Iޯ:̜y9̓I=h̘1;v,XRM6]Χ1Rs"]&{\T]}@E #ԫ'nHPHZÐnMzUOܼ%8W)&F7N:T)#|'m(O=HC6L)h% 7+#No޼y5wqkӟT111 1(Suu*.T290܊n&+%Jee'efޣxr|0l١=@XZNάߗ^xA8Pڳǜ{3.N?Lrz4v[?u͕qm͙3G4p@]y啊m0ٳH3Sr8>Uq2\y<-T}|*1q,zۻ\k'\*/>_}{#d(] IO)S:ݐ]yJI$5]nåjxJKd(&OSzt*>~,Kt#.t#s IO&/x<&x 4*+ vH~STQ]{OU~Rr򩊍Ԫ$gLL23(&s74_N3ѵ?j)˵V?p\mק5:;02ۺҠA Rj 鉰q%[wkذaZdIKz.0T^UNgUZ^Q-?tL4`.~ZJM\>]oGZ@wHOQ#FPIIICiun޼oX.$guIR|P%%eaT|$l{?paٲCu*nj>\)1Qn闿BΎHz"?mڴ)+>3 JKLee$IQQ=4Ns{4/kr'ڀ*(-f̨pHjMנAsufں{~J OCagUbb 4eʔE־rss+PcڽƍkGFիEJM8B1&m)"z}c̙f͚P]uCO?{Rm;EwƐt !mtUj[`'Ǝ;4hРz333cǎD*+wXr8VkI(**IUUo_@RX(t{҅Jw)-M,eKgZiEaO>5p@ׯW^BFr8ñB$IqqCdW~nr ܰa^T|y7&+0W^nExCO֮5oѣC"a'UW][oU6Mz$O?mݦ!˵ps> )B㕔tִz[ZYٸ :~ Rz1NҜ9,aֵ^W_}UQQf… ӵq:r8JJJ u8@=N9Ռt:?[x%%(}$牊hߕڳg22fKo3J?.]qE#]pIO͛7kÕUnnrssxyf>iTVԔu8d$S'8'nQutHfI/,]zLM uTHz"nm۶MG}t͈Ϯ!a*/,cIRlQ5 N}㏕b qGA4&=Ro%tTY)=t:כt݌2͚5K?$sQGYfo߾뮻B!*)UpPU>qT^Nt"5#a-k9ss/.~+ /xB#!鉰qwkOtyլ8qΝKhqj)""V68ed8<Ik i$ ҋ/JYY溆Fyt %=4u*;6o,YO<ѯtְa?02spr}%ɣk'fP @Pڬ,im].;'9Gz)'ƾ}ԧOzKKK?FeeYZ@̜skMZc˷Hbcn9TKDG]GIӧKK xr Cac̘1zw4k,IIt>S:餓BaSJʤ[r8Vx8$Y0\=z }bcbJKV͕N?]ciРPG@Ca矯^^Vҧ~t~ޖñBNjy<%XJJ'()$EG':T}o/7,(wiL) IOkݺuz5|p=zV^Ç:v\yD'|r@7HZkʾSUSK֭;KO$SZTEF&-͖IWfpiϛ%kO>K,OKZXz%3WRF4k4}tQwo[yX 0BRmkw@AABhJHHh{).á G }\kz%e Pe9ke˭eUN->hXA4&=\bfڧ^}U.H YVɑ@xz3Fz"lx^… UTT͛7먣ҽޫu~ӕ~/ﴴ4'=*+ r})kKUU$ED7mBBv1[H7̑m"祧6o>l?eJRHz"lO?zH7xc?^ ,hqsٲe?TPP_]\r_\͟?_9r}Q;ű-pTeeJK$}ye$(#N6F!nhZ5] 9̘!Md߰A>rI JI_VvvggvFkzА@Dؽ{\oUUUUwoGyD7xM&IZpy=3뮻$I֭kKt]wOnmeeec.P~yRBBDaӔ)S{ny^kڴi^xz(55o}jjjtW\%KhĈz7$I?4|}ݚ={vcz{:pmUTlSEvIҮ]OqqC1S}R=C,hiNi3)I,]tt\IO'z뭷?A 4zh[:C^=Ǐ jjUnnrssx@2[%KOp[@K*+UY8fT-gIVI/$oS\,t$=@-+&L|㤤(22REEE~닊cϜ9S3gΔnBy8j,(}O>()BUXtT-J6 N߲y\Z4jtȑ\'I}sxX@Hz QNN.]K.DztRr-  \!/ə|G+"1漜Oh^Nб\.pHfb쳥9sNLŋۡCQh4oɓkϙ#ۡ͝!DURR[<޶m֭[={j={L1chرZ`JKK5mڴEy[Е\TV ãC>$}rw0܊Wr8p$s3 kGp^-}9G3_cJq3fH&5<<,aO>DgqFSLs='Iz4|*;;[5n\`eWp())C L۵nݙhmrr>k[2?(hR^%:52'l.klޓ;Oz]s?~ތ'yf>@تvc<z_[&-& m{sZJJHƍMrxԳgۏvc&V;-t$=:)>@1 \/u:t}9%y{z$q yL={PyfIJJ}oUϞfՈIWfp@Kc[z jWK6ϥ?Ѽ=ZZ?@PagϞzu:Aj.ZhUQC/s Zգǹ%'P @ $h4o^m̑myKՒ!EDH#G֖=$tm(s^ذA{5Gb#^7 hҏ?stgfb;$رRbbέ5z=&O\t/DxgQ\\\ :y:BuupӹR}d{5S'c;q*i,GͤTdŵƜw3/?ɹw6+LrFr{93kRНq#=6~_V>}4p@EGG=vE־Se9+pñB%%%ySvJK^R UVRaJK^%IIIB>a)]$ކ m\G02}AiFZJH0Gnx7u#|TRRӤCbiͥ>OtG ѿj.@{,RFƌfUU4Gq\_0܊LTRIWO6NQQ-%&&]sC#dɆdVzuiBo}{;ϯ}x:Կ.@wDa~h…8p` @2/i**~X2iLLOQBEDO}5]m~ZRntzvBTJRttWKG@Hz"lLS7AFy'Fii~8PyaxTZ$:=8{8S9<gzszn0YYY/*>>KKӚњM9p@zu3r\f4ltqI?(=p ijκU}W߭ohIO7kUnnvޭE uxx*r}y8\*y<Y,Ѳ(5u OVttz[Z^gfk]@[J6նњ ).6\L^~۔s.c.G\ؼmdmpRHz"l[ztkڴi0aL-^X\sMCle*XUUUr8VX.s R6%%Trlcע}Ĥ+3s;B[JҶԡCߛwvK'${oh2ҵg1 u@ kׯ^{5;V۶mURRەn`RBrծPiג EG*9yQ㕐0B|@XI@}}K?/t b􍰌6wyۻ YfI?~ڦx=wߕ=l%kft\A8ꨣm6 0@C?O;VoC" Py:q.WE6IR\կm'(.hY,G @jmIZWڱCڸn3c5kj}Ik&7㋣R>=>'ƴiӴ~zvi뮻tSUUyP”[jJ:+TUORի$e՚pV*+-[7J6Ieef839ztҠA'hh͖bM΅[5bĈP(7@px`C!v(?RR&$= ëꔪ]]d~ B ~32nђ7K/,X-{yI+{n˷?Dҋ/#^ כ0a?u ȼj9#C>le**~aT^9߼IFIfoOf%i},OdfNtŵ}KJlfsȶmLKKIOMSek@N*UVd$iݺSdղXb8J7bVLL**UXҮWl@IRRҸ>%]y9ҷl$mfk-HӧKnz&-ΪLM;q̺RFgR{#鉰q饗b[oXꫯֱ@CSJ$Yxr}e үaTu.Ա~V˵VOo_3*-Mb'5\\W\\{4mzSKH:T{RmљHz"lvJNNVNN$iڵ*..9眣%K/.]SN9%555r!O$i\o íHeWl@F e' flƌKrTR|ӷoaMM5N:qݥJڷץ?IKuk(!9kV(Liiic)""BzumfW^7߬ZbEk:r摪]ڵQ:˷-I**zIqqGGճEFY,8mrgLL23(&MV6׮LMԶaHG`I̢" m]QQc&LO`z@oJ4 IFgDx駵rʚ$EDDh֬Y:u[nф B%KuuU*.Xş\#SaT;}oկ->՚A=h@ $y䜍ICOJIEDH^ouo0M%/}5*++:?iyqg'0}K-Kdkm[m{  Iڸq97nc^\mpP(..ĉU]]jvmC-rSYwo$I[*H?J*::UɧkȐ)Xbv$UTlWa͖e'}mͶ t *m} ˽{uYʦJRl赸^sڧ,ՐDظkuwN8IҚ5kt뮓$}6lX(ìaٴl2ǫT.2+ԡ )9|zse3xd}ɲ8to9?eK˾: mII7}y ].ъ#f[zmttϑ #J\p"3))̖`KHz"lRSSCHo]o%Is;PY#22RJ!0BꀊXOURNl:MLJ6OH0g0./ }qHnw1o\{(sիͥ1^j|P$櫊5dIҰaήg7`:"@G{tܶr: xx݆ 0t:%IIIIDze4|婠@.6? 5rH=;vl(..i-[h9sf:Nv96'q*//G=ʝr8,_kf*9T%',EDXeXTVA6LVV֋7Ĥ7:R4ѤАtmjZ)'G˓F7>hELTN,b#,TY)9wKO?xq㤓OJKͥRq,-=2RԟƄ ҴiRϽ{3dOK_oΓ)&+Cz{gmRVͭԅK Yhct_m-9^miYk@^Cus Y~כ0ag߾}ڴ,7tPj?9rO.K,ٳpB7N ,й瞫M6O>lU7pUWFF~z._V @{(*˷fm]؁obc3O||Vj}([ךrmo#yfײ?ټ[WђFe.Ng=wI(%$KRW_I[4WH&&nw"5;hҧMΙcNku ]8O޽%IVgϿأAϧ)՞o@&g=uD 1ښm&X}-8ttB$=6JKK5k, z2{裏֔ 矯?yx㍚6m$i…zw3讻$[.cjȑZ|.TVVo4+ЕtjUݞU]γ57ޫOU\?QYن"UWTIAUU5ztefQLL{3gG Lh鼉]W뽆aL4#={Mv3QW7:,?,㚔$lRlqQ926+H˳2T;&-M:o;?ecz|G| Șq4 "?^h>iDDޯ4%m;bߛ7o>Z:Hͷij̡9o6Z,3#|ۖϗv6e㡇7cfdY~0x<$˥8ARx̥)efRp WVO7,Ǜ8tlgæ%7aܶ٣|5?PlFIkjo_Rb%VQ6s}$ikJtx0@}?h{ ZtYR@1?_ں|`7?d4ѵaٱllYmӧ6ֺ3\$5sݹ|i.uȺ@#嗿z24nn|}-O4S>eˮT@ "l̘1C~{1rU+V[ogX,~mn˯)ST\\?Hlr8:S/k ;wy@:E`,8BY=bUpu D϶mk< 4NI%Iqq(9tUVbӹ[ UQqJz!Cn =L<Kg\gBZZ3G üvoW_>w4|xI˦K>1?o[cy-&F 饗-ٔFRԠTlKЀ?iѯ6hƂ,g9b7o)v[É`oc|$k̀fgvrj3IL:4D<~{nzᇦ@Ipv:lc 2JKDS;`%ǃ~0AҘD/YY?Rwi}#-dVCz0 5)/7HI$'EEӿ#r}MOm_huM=L -Fz"lֿ/~5.g?YGڿ<ORڸqc@M7$0df͚hSn͞=T[w]DWpr%`JWV6;v$^EE[TQa~̼ۗGɧj͐a|Ȑz≅e9iT(GueSg90Aq_Ώ>2s ^e^lh^{5 9O3ֳZ{Icƌ1ˋ~}&N4aב|׿W]mh{ұ5ϷŵCLywj ~m7m2QQҧ6_µ-@GR6ѕlQ9mЙ$'"I3$5MM߰As?,}Kz93ЧtEJS;+hn/7 ی5k4eo$ݑoƷ>LI/j_8\7ntv#_Zeu_ӥ\33'l-H/xk5_xp1\5_}u㎦'O6M 0-/~soWV ھА㏯}p_}یkNlךgg_yTgk7{y6Lz擖 Miυk Du~{ӿ+Gh[:%Fz"l+//OY4vX6f9sϞ=۷VZN:o~}OT*77WG7ov߼i˷YGhh4zRUYE7,o{кMj;LZſ}/WWWwǎjU_3Ƭh>mojVaibu9r4~o_sPRZJRdZ.#ì2efCʕubI#GȊf }a>m}=VT4=nƪ7~C i^S2CTr#k7?R[k᫘^֑?k))XE]nzx`Jxued6mj,3qm&|6vixX _NWkɒ:6U7)Kt@knOW-zGi[=u[C#$m{~047d?5kFEu!Y18uGZ3d%9n/u?P7n$Қm[M}-ףn3mO:N鉰qI'iΜ9z+I*//׼y!%%E*Ս?HiE̜9S3gάn~t:&AXjeNsHuYѥg϶hh]~zս`mNsdR/klf>&Ƽo_+*̟˱戟#+/IoX# kk~pK֬Y Xjgu: #"#%@S,Ҵi3TU.LΤ՟$.n?UZ'JKD gx| OIz^Z>>={護u}룣%j+L'stk*+oo7TKI)4;rf3czc~yuwDۿmu_/4.m:a~2 &u#UUS3YjGFJߗ}KǾijV1<`4U 9xu(ߍosM_lonnظcFy"Qb9\s+IX"'Q뿮%#u\c^=2AUU8ꪦq91?YjAQ!|s|Ukw|2eYJ?6{w%-^F{5}%7}|\YC$ O>ZŠFuEf4u~h2ƀ/ivgB]#4P5=f;ҵj ~/\gQ:J+Ҡ ue7W l4G3t㋔oP͙#emc\͓zOjRm{6$L?~~S+P?OZ}S :Ғ;=|Fhts8-ݗ7o{->6v:˹[_lZ׺©/ "i5jfeekQoI h GGyDgqzhɒ%2e-Zcj?7֛3kFD̞1St@iq!?RڳH[՚ ;"m| mdff҇)FŪLf-k5oɚs[,}SNZ]pA<_v:Od}23JN>]ɧn?MWR t@KGtzw_{%|ٗvXQ}N Jl{s^o彻Omۈ֨{D]KKfkuKKNiUFk2o* j+ uhri9t󏂖[WS5Z7o6 69?qqҏ3OO7k2e;\{UXXl׸qB)lޞo4NҚ-/8U>N8B~ sUWcZE(CC uAIΦyJc&*N8fϽåy$Ij6)Vvi_m}mVU(g{tFloae kƌ\: ZBkZ<~~Fpm0jsuq%(M4C3H:<[:ľD׾}f2bQkUk><\_^n޾L<߯󕤈xh{fD?Ƚd4XJ(^@7UMKka:0.s>t]v{DQTf@'z PYF9+umf-׈xy_S,^=uyY*s薬)e|jie4זՒߨkVuXb)r 颋̑TO^tÍc͋GnLwtoaWqK^ ݟ8Ѭm(4JrtEJWk4~Ws}w<7W'Jg2$nmY̺KUU樰ԌuSs6U1IR~9JĢs-)1k{Ro&Hw4=%%s{#7_K/5GSIMzJ6}OF8u㯿6̥WT\,e%Y:^rK#$WJR}bc*#̍W]MlȒJIIRIH~t1fnMq|L<qƌ1ZDOC̾hH-kߚmZ~fi˖sH@C /HK7覛̥!~'j|&׵ˑS_z.DO۶t_IIݯgӑ <׶5۶49I2 $ft+]d? uAy4/DFQ@ƚə@qMq9ʤnIF4TT4ܑ5ll&Q&]ox}^9E2əFf"0!MVXh&y*+sE2V{ArXg:itI70ue.2 3DDm&^w~߾ / L1kW1)IǦkHs$ooiqDD<,w{aS@ox>c:~:J[QFwJ[%ymeAcd?0XHJ_ߢXZl?x뛕fDou{wkysKd`_Ì}[0Уl\1ӧK3gfh8HB(5IpҖ]{,q+gDat9G`S۷ZooO<>HKg}%=[t5 չwX<38繶e_W"X @:n͛F:\|temKN6tK0{@57ٍmK]^}M٘k~[3ٳg9Zs&"~F4ǔ)ҪUMJHFhY&:LzF-Qg)z\}GóN}*sν!,qqCdinN:}w2lWz-51-c2mƌZs `le_ٶ#Z3R0QpBz3^ϟ7|Sn[gu̙PTC$;B/**إ>K&HzLnnrssxB 5IN+OQd%%~ ("dt0P>&ЙUTXr8{IRLL_%'O>Avx%$/%"}VVhϞEȘ! d^~_3XʝQvx prX,:՚Ac ˕W'ɹR%EfQWybbz:\ @wW]ӹZr/ ED+)$wJJ:Q ! @7v,U[RNW)kР?nlEDD:\hIO00T^Rf|$)6vȸYv8 HzUVhϞEȘ!5ɶQI~INPE գ98JN oǜIO:9@2^)*.^.s<,l'(-m8OVttIOUnn<OCZ2SXY3Z#èRdd5`]'f;Apz0P>).áP H\QNNl5@N窮.˕'EEPu!IRTTO1Qvx8\KdNכ0N뭔U{d~aÖtt) *[rSYwv#I7DnwjIVdY%Ywb]؁qz>Й SJʤgEN9˵u{D]5ml|VZ{o_HzFow $I$Iw.RIښ"#.mt|o}a35{8[YYe5 в ڰa^T||$)&11̜So=D2 JK7\-39[IFUW[%M-]kkР:tI$= *++SVV =á@͕qZt$}xF$)6vz8CJzdXTVQRf=*IVo# џgx≡@+Uk**tñR-֗{rU~ri#8%Fq@{ ɖ-[qF]|oCFxr:Xy8ѹJnIR\ܱZ|s'$P niٲe?TPP_]\r_\͟?_9r}Q;6cy睚?VZh-kJJ֫|IIՒpRS~NRLL*+ vH6hÆzQYhYZ@!nT#Gue{~ɒ%={.\qi:si&Gz۾Zf9s1$= tNNk:$qDۈX}~ZdZvA矯?yx㍚6m$i…zw3讻$[?3+zWURR*%%%ktJUVt:[xV@؜TQC}C>ԡCKUUOq@ҳVsj IOn[yyykEDDhĉZzu@xH{9}M&<}̛7XUU?>@[$YdQzcUUuUj} IOQjjTmܸ1hǽ5{NSprSYwz+uGop"PLL?\z@g(:-U HzA6uԀYVYV*77W'a)ñJ7N۽ﹺ~:'j !%%E***[_TT̙9sNv{ЏtٳH3ꍸ)c?\/eղXbssrEZ:HzGQNN.]K.DztRr-A?>#=ѕϟI~IϪb9$9J*&&MviJMNɧ)22YUUZ7''k"nD[nym6[N={Ԁ4{lM2Ecƌرc`jڴiAj*+ Ϲi&*%BZ*+^!Ӕ~OS\Y,f=fNNIOtK_~8㌚dzgϖ$M2E=J۷Ow w}W [;v<ݻ[e/jc}Zɧ)6vP$gcc1 uj-oyf9%%%:, `abX}%h:؅!н* r]IOC0*+p8ɹLvdQBIRiFw kE t?\oFw@y[@ٳH3d6\6VIW5 Ncb6Fn EG7 $6# t2u.P~QٯoT]O%%_{[v㕜~$)>~X@P(7v;zLѽ%e`HUy*)Y䬪/INQbh][33.(Y,=9$=t*{,R~:ZcHT^uڡʝ58KJ-$}896&O(OB^oFHOVEŏ}GIՒ"?T6h3%&RbbC.:ۖv "#;82YЩddPJ$IRaڽ̼G))JbbIx?$=N&77Wx<%$tY钤 ڽ[5lBdfΜ3gL,TV.$ouJ2GzIOLCszn~oW9G Ad" S;gCbb Щԝ ڂ'F@XcNO $EDDGC@CUV)111atYHzZl.beddb7ި&77WTllƍ/EǰX,:t 'h9ꢼ-R9RӧOe]V%KhZpƍ sզMԧOIRvvm+##C+VP߾}UPP'j1bD ;a: ,^u]r%5ƍN8A=$5k׿ S6|ee*++k;N_CIII->yns]mniĉ5"""4qD^:}rIJJJGiذadk$ @/ǣT*,, hEEE?~FO}!% /BD_}K_N@<}#.}τ֡/t }ar\(& D_3Ψy<{lIҔ)Ss+Ծ}t}Pzw!EDD_~ >٦?Z}K m풒mKBY}BAKj:~_Ή yBG] C_}:'B匿Fx#nOo~-al[b̙}K m[ϣ,GhZt~Вj鵠/6L}/\gy-:"Y^BmC_hzS_=X fI9Nv9N5 T /&`/uժ9sj: /&`/c'HOa'F@X# % 8P#FPvv8PTYY233uw: $5feggדO>ꐀعsN?twq1b^}P̥^=z/u(@zujȐ!zꩧB2|b oVںu﯇~8񨲲R*--/Rz uh@*((PQQUXXm޼Y  p|\.y_ u8@q?Xv]999Zj[st ٲe6nܨ??ԡ!xIRee CwlIRZZRRRt駟.0_hذa۷uuX@H9nٲe⋕!Ţ7x^\ 8P7nbN 'ŋS@ꈾpwh舾P\\#G_~vh?|xԿ6F @8ikسg[o߾ڽ{wG+>'S{?@WE@Еjȑm%Khٚ3g֮]#Gs޽{ke;rٳg$iŊӛo__u t1cSZ#>~zm۶M/:܀舾 Iu]'x"FG ܴG`j @_oرc3g55UHǏȑ#u' 'pit5a9R&LЬY4|` M{+Wjɒ%z7l}7iĉ+WB8^ }#**J_ug(;;[wqzp stu?@w:(_>aԩSC2cǎպuBrǏ u@: $&MI&: #=TJJ"##UTT䷾Hiii! xD_LD_FLD_L$=TLLrrrtҚu^WK.I'ȀE_LD_La D_LDy[AWRR[<޶m֭[={j={L1chرZ`JKK5mڴF ?`/&`/ o&`/0 >cCReʔ)5m}QcFLL1vX> ]@}0}h}0}h0 #x)U.Hzk$=5IOa'F@X# HzЈ{NA?νޫn)ؿ]v8 $=:u,|Ao,K•W^͛75|o^3gbhԩ^z_]&%%E]w̙P DllСC%s!DS>}zzJ'|233_kUTT襗^ҀSAA tREEE颋.k3m4-^X @NbĉJKKSIaH{Ѳe#hΜ9袋ԣG}5cƌz;C_}N:$]|:p,O(g}Vz`.KSLъ+giȐ! r͝;W^zM>]uVkuIJKK5{l}Zt"""t饗JnK/}3l0kFӚ5kK ]fMh]vOԄ $#T=\l6-_\+WTbb;FG}=Xw [111Zbvr~ӟ*++K?v~iIM6/d&^z饚$eCFm222|~Zb󕟟+Wj }Dl6Zd""P~~~pE:/zaÆVcذa~ɮT >qddz꥽{mwI'܏Ҙ1cjЅ^gyF[oRW\qEqoԐ!Cdەرï]C#333ջwou[lUW]:JIII5e}}յ^[ڵkjԩ8{lp 8q|Ary'O7߬qںul6[MgϞht:}{Իwo]xz /TJJJm8 [N֭_|s=W~g\\ʚ=G'$=9Su\DD [WUUUݑY-K|eau 7W^Qyy}Y]y啊o)Sn:oӪUn:ի^ׄz6/O?.I~h׮]zgug*33ΝN^x>#wqMyk֬YzjFJfIٜģoټyK`:tM>]=&G&$$hHz@'ꭷի[~O|>_]]}jn7>ZIII5y7gh,"""jF|5jTp@:Çkw~ۧzH?rssݎ_]7n̙3u!addN[C +ې!C6lؠ?\\sZ[=ԫW/=ںu>#͞=7p|AK/}[n'|R3g5ku^|5jn&,t5(%%E?O|rm۶M|nVڵGDDhĉV+22R6lFUVVija͚5K%%%kڔ)//OsNpD:?fee\9R_|v;>|P#GԊ+o֛C믗ִiӚO?CiѺku뭷O>-""Btol{UW)**JW]uUeJfBt1g??|͛7^ۢ"mܸQK.UFFkIײe4`]ve_ %%%5dpS܇$57Nk֬ѫO? 0@&Lhr_n,Ƒ۷kРAꫯd˗묳Ν;1uGk͚5=ztia7nnv]uUA=։'[o9F \1JڵKsW\)UUU*,,=ܣO<'<%s'xBA=ue= @_~Y*..C=prJk͚5Zpa Xvvڠ#%%EodXzkHzk$=5IOa'F@X# Hzk$=5IOa'F@X# Hzk$=PtU;wԵ^{***J{⊀zڳgl6,K#tear)##CCd1 u@WTPP"eggP999ڼy~׮]߿t;wT~B$==]钤4'=m6$C()))hq6ө\w"@#-[+//Ozu%UXX#GGرc+//OE#7}%mHzڌЕQhDiiF_dfϞ9shڵ9r=\ݻׯuu'舰`X7nN8=c$׫k֬Y뮻$I:u7kmy+7p8 h5)ftiZv+//O'NY'jՒ$04uTy&<%nYZR ;cNO/ǣTڸq$iʕZdF7xCCÇopw}fϞ]7ҳ)^Wn g7ъ u@Bjj*77Wxʕ+*Uokʕ7n Ãg}*UP~}, [X zꩴ} 6hѢ)S'x'Of>[nM>yd*UDjjj}gi7}I=55^{%JPLI&Qn]ݩP/iۯ]`jՕGȫJrpwwI&DDD{TX… ӳgOΜ9sb?ҢE hԨc˖-4lؐ"EйsgNݐꫯPBԬY>MMMߦ|򸺺GXXMΙ3VZQP! <<<7|ݹpBcoݺ5/ ŋۛ/ DѢEyYti1))) <*UF5裏SO|jdɒ 6+W퓝mvׯO{}+TIHHH۞MDDDDDDDy;Hd<( g*2f g"KcBCCU5w(/‡~ԩS??>uc#cׯgϞСCӇ oСCec…v\x={fz|@zضmcƌaXܹs2pƍۙ?>NJʕi߾=ӦMK>m4z gVnݺÇoF˖-3c ٴiyY|yv>cv͌3XjZ>.^W_}ݻ)]4/"7ndر}ҩS'6myM6;z-=z4QQQ899OkGn:[o[oŻKLL ǘ1c1c]G1qD>vA@@ݻwOUo/2111Ջ{,O>-Z4qϘ1RJyf^z%yK͉cǎ<\x0˗租~bϞ=[]z5`̘1ӧ3}fx:uD޽ٱcsa&""""""" BB$'ƍ0v,<*{C>%CğH]{3 ɓbcc iۥK={.]֘`ٻ̙m`j[XXJH*Ud|իga'N4Wn$%%ex=ztx0.]jaF:usex+WJ2۴xGӞjxCm۶t7kѣGcce0.\`a^:Mfxzz?gx˗ 0Hbʰ_~02ުU+N֨Q#_4~(YdiӦv8vXc۵kg50  LGM7]}ꫴYfrʴ5jH{?Cƍg4k-[QF /ɓ'gӦMa'N0L~{lO>dZf3cƍ3l0wihTTHNNNk۷oōnf8x`gMϺu 2DDF-##ӷ}:mڴrߡC*Uw)dU׿V5]+V0~xK\\\|/⒮;wBHɒfّf6k,]y\.ooot3Ɛ7{;C㡇bn 5nܘڵk3c x fΜIJnո)YM޿P=ʥKHJJ/]k1jeΝioqر>0 RSS9t>>>V"""""""r:u "i~˖e?8:4} #㑯hxnN=S'w~1w fiP"_Ԭi& ԩkkx^m&;6m̶R+& Hv*TgŊ,_^x &f3vŒ麕կ_?Zjɓ'Y|9nnnt)cݳ^ˋG@RRR€6mz~i-Z("""Xlo!!!lٲbŊY·k׮<(Q3x`ҒnnnX,>qtt$222]2 H"kVyol:匿K4ic~oGF!Coi4hP0#W|5ٳgꫯ2qD5kFѢE0a6meWȉ-СCӭ{UŊg&"""""""iyr2ZD%1 (y6egB9 B:l2 5 &OXO zMEp;qf³_^^^خ+ǡCFn֭Æ f͚ܹ;[5XdI-[͛SB̙ҥK۷oI̸ %%]v柄޽{9s *T57oא!CS}+h߾=۷'88bŊjժ[ T&NfAf~pIZh>>>>7%ݲMٲe9x r{AٲeٰaZJk߰a-gߟ^{?={0pX7o /v#;|ٳg>m_DDDDDDD>saL35bϏ.]R[k"$$ɷcBCC )Yv?h۶-ӧO[n+Vz+,ӧB&M(\03g͍J :I&3x`>}:-g=L2}z>wҥqss#,,SP!<==i۶-#F`<L4ϧWbE\\\OxصkƍдiS^u~i2wѢEz>=zҶwbر >OOO:uDbb"[nܹs1Mpp0<~~~L6t%\3Sxqzſo:vH8TVopTw}ǖ-[RJ6;_M/2dٳg˗/O?ͱ1H_0gl Ќt$R%>3"G8Kq*Ԇŋo$V+J"8z ƞ={5#1G*>5VZѵkWtBPP<@bŊ_Cˊ+XpaJ*̝;___>scϞ=+Wɉ?SRlYzO?0`Zjժi< :}t~'jժ{|p>YWX1ΝK۶maʔ)̚5ڵkg#F0rH֭KXX ,Zj:>:t(zGI&9s&ݬ6;G___֬Yþ}hѢ筷ޢlٲ93X' 4Z `op:0!9f&l a<,Z.s"YSy{#wŋl6{=z_$< QDDDDDD_9s !5Š6_+gJ,B%Ф 8OWXK }{AF!""ԩ0vl=o;={+2rHFerՋ/ȬY ʵҲe3["""""""#[ >X/^E`),^Wyu+&ϠOŔyt7ʌ+ƂW'TV$ %44ۗ;m陑hHmEDDDDDĞA"gPaa0gE9LCHK6R1Ltn nn?fN:T*sJAHu|GDDDDDD^~ž{y<&ea/ܨ# `?3YX,v[LJzJA"""""""""""x!ltNR]\be~%xfH(^ܾHIMݻaJUcYܴ -OoϏ<ؑ"""""""""""aU* R8WWiNl uJJ~oD?{~xAjm+w"R))R]_?#V=Ose;ጣ%Ve%m+Y7nAvl1:?;BD%=E Sa̷CH= GDDDDDDDDDrzy[2J}<F[<ƙ=B~Pr""yBCCU5ʵs  mLmkmCکm+VY,ϟ X,Ο?o8>b!::n1}uc8MϞ=)\0ժUcνf7n+V7x+Wf#䟅M+W @Ϟ=X,ioQyګ<|0G[n/^wwwj׮͒%K2 oM:unjc̘1s9뇗nnnTViӦiӆ… S^=6nܘ̙3<+W… Sn]f͚֭[/+PT)صk;wH"x{{Or/\BB H"XV&Nx $$???*VH"ExHII/eʔtҼ;skylǾeMfK{~' Fbb"k׮eΝ)R$/^|w}ڵk9z(jiӦa\Ã]?k p™o{C|OJ駟سgoo&?c駉I>l۶;v0hР 3f {aҥT?*T^<-9~e4hŋٵk>,O>$7oNnj3pqqaÆ L2Ӷm[ׯ֭[ ĉ<#if͚5,[r9pK.%,,Yf_ӥK/֬YѣٴiS1}ɓ,]Hi׮]NMMMG={5kְ|r6ӧ:o ~~wTVD$_2D$O5#66m.]2c\t)-!0"#w 33ն1&L0 0w}pqq1.\`_`۷0 Ø6mvlppQ^ѣG=7cҥaƛoiԨQHMMM'44(RbaTT[^=#888y͛V^mƹsڶmfơC 0u!!!:~ڴi`߿?]iϽ^S0dbŊF=2s޼yF"E޼XPBi١C ضm[ϟޓ5 8wW4aÆ{N{>ptchժ/=ܹϧ=饗֭[gn݌Aej_}UZݻ ɴ.]#GLc3n8cǎO0~w… ?m?sn7 6 .lť+WN\aԨQ?~an:ø|rxcԩwe GGGѣiۯ~7oN8yds?FΝӞO8ѨZj2zf͚eʕ+ƏoԨQ#> 0RRRE .Lkտ%s{˗… [~l30d7gφl%zB D' 6o+2oV`ȑ[?~zΞ=KٲeVmp,(www<<<8y$1114k uh{衇篿bŊ}1|py-[Fݻw .g駟fҤI888?f?O޽cǎѼyt\>X&O߿/ҡCtMJJ~r&""""""yۥKz5,^lގ… giS aTǟ(FrhMpmQ؟"5dv:um իdЦVܐպukoߎ35k֤uDDDp9Zjήb2w# HvʕێjRngȐ!xb-[8q"/R}f4c]...Ӈ~{~G}'$߲gW_eĉ4k֌E2a„teXo[n2o<\\\r }tΝ;s,Yi׮Æ >Hjh„ |GLRJҬY޿ƙx/^LrmsuuM<;7ɿ\L|V-H ;RdbP("ԭ }BPl@ h ZD SS>Qpg\̈́g~9Ӎ%8[n{ǹs9rdǺr_~0Ȇ (Z(˗ ͖vL\\J׏-ufxDGGߴ_ xx5j_~eIϬxzz͖-[hٲ%)))DEErd~СwfժUU'׿Wglذ͛ /]?0;8p ӦMŅ{ 77,b 8-Z;]3+6lG0VZY/Bʕ3L*?8;;iӦYΝc߾}w#b9~8NNNT\9??LgΟ?LH  ?`L81> :t(V^Oƒ^z믿/sܹ[mٲ%eʔ_~TR%]y=qssiӦ{İfFjժuVٷocƌa˖-}!Cj*x駳_e޽E^Z5/_oFLL Cĉ>-ZW_}_̘1'|Œ3rl""""""7> 3gCв%L*vcƋջJjjker8sχg) `J&XDD8%=EPjժ^n+U*}nkժ)))iI%JPV-ʔ):{SNi///f͚+W%Kyfիs=%FEVڵ+]t!(((:'NdTPᦵrvvf֬Yݻ___fP0l0|||ԩիW>X23`5kF"EPBYgXxپ};pof7|Crr2 4W^5:t(zGI&9s&V͛7f͚Lں0j(|}}iٲ%̞=;=z4кukʔ)CPP-+[,6l %%;Rn]^y+؜0a-Z[noߞ~Asbaɒ%lْAQzu{19.믿RxqZlIZ*s񞞞ݛ"Ed5_5Νߟ'|ÇSt#;|Ǎǘ1c?~|ڿ/J*92VÀz'Ꮽy%la;][aw/LdaXlދH:n\'xxxve:D*Unܺ(h\NJޑ<ƍw80 U /#܅vQvm>c{rDDDDDD%$ʕf%K௿{*8Dt֤#P2tкV=f)l6bÑ#GXlZ"11O?CO;\u)fϞ4hÑ;t9"""yEA>4^[3"ZJc-50n&:kԀ[,G$""wGIOhT;6}[CH= IӧOW_0 ԩÊ+jtҔ*U/ŋ;CܹsY/ QDDDDD~w lp-8{'T;,!3΁vhQ{."RHmTVDDDDD$w< KIe 6ʔH$Τo(x\30мթٜg)R))""""""""rMj*lvm6-*fDUtIg\:woC(VΑUJzc-"R0w,_n&9,={yOt>9&M@냃C ()9::{ŋ8;;9?\\fX_G/xROx@NJ;l%=E!''' .̩SpvvA.) ŋv㿫*.)ups1Q(f6j_k+DDPS$%%ɨQh߾=7n>Ǐرcs*D{fc{I#G`<= @";iJr(AjP0,9cq@`s2V_/B_\uJz܁ӧOwvoooݛ}l߾ʗ/O?Df2sԨQ1"y\\*TȝS/8u/̓s!2\\ rJKiZQg*]ڢ>`t]ZO0gq6j5HH.ZbEuuu5YѰ{7xc%bv_gqyq#RS*U GGGN8ĉ)S %44GDDDDDDDDNj}NǡOdzt?%lh{6^#@*[֦Y"" Jz4hʕ+  55+W/UÆ cذaъێSEfZYWƍf޶mp$h@$͇^hHyltS*[+""rD||W2!9, ))ǨێSEf"*Lr.X۷A3N8LRδc?QisҲQ'Z!8 ="y 8}/դWsooqMKtOE\Fsghё%i0&qK,cvh*)Oz)"""""""""r['8:׻c=\~ۅ_EKԏ ڨk}Nɧ.^ 3oqV%U֟ Yk"$ʭEDDr"y)l6:͸a޽fsRXR tHuEKON%qZ!$$g'""H""""""""OT)t"2,˸8XlΣG9tqOKuX7U ʕ#*Ҡom>i:tEtY +N=2eۼX~ 9BRg麚Niye@xڴO}}}B?{FDDQSDDDDDDDDDĎ/u`~^_'SW++!N?xt^>B0|0~LR(DDDKIO):ZQEl(EDD$/RS .0sLfϞ͛IJJ0 , ˗cǎ<4jޡmJJu`я ,"I?Ç=!(ޥ 6궫()"""&%=%O4i.<ݺu7ߤlٲqYvźuر#M4O>Zj[DDDDDDDDp,Yf] <… ]ẎҶY jZr1g\}ItZv-JzJe֮]Kڵ3޸qc~iL´iXn""""""""yaΝha*f']8И=)7K^us11}*so|ba Dfqqqxzz-.հdͻŸ(bN"kcmC(\8˾l'J C'`G#zJzJǪUQ>>/GHDDDDDDDc`bX4/D! Zv-zڧ4kWDf)TVGy-[/r%6lÇ1 ٳgӻwo{#BCC %%%ޡd)5nE`DvђCB=JGa>bw"""rLO7ʔ)Cxx8~ 88۷3c mfs~y#"""""""yх |9,Zhda2'8S1)F@˔x=te;\Aכ`LO7bcc)Qaaaݛ… ӥK9:]JD$]qi?=Jԫ;.()F ظq#%J ,,ٳgp9 *dDDDDDDDDɰq?ekg#ɴvX˄tn>Gh/pvw"""R))+B~(R*Uu]u78|9 3Kr]%WƱSwG1>"""hMOW"##9z(:tH",^bŊC9""""""" g6"  %B{zG:,a8ݻCYDD7KAH?B"""""""Ӓ`͚k΃-rB{ut8@wg %]o@m%O{xqss徛6mtD&"""""""b_6L C՚~'Nҥf3< >Bg+]6%P8#CBl """"9EIOCŊ۷/ݺuaÆxyy̞={X~=3gos""""""""cǚgOzl~u6'llzkRw޸<÷,A=h4 i~-۷oO?' ..GGG\]]x"gȐ!׋ ?EN^D?\%Ι5}~""z*o+K1bWӓ гgOJ,iDDDDDDD:\wॗOysY> xqhxkθs?d()Vp0{54U1ch.H-bQKҚ5 np""""y"Xv-&L 22Ƽy JOhh(&LԫWO>ƍ%<ʕ+DZcDDDDDDD$q:w ϝ18۷tng^絲)<[F=1(]ձ50xhp4""""y"HHH^zǏgڭB >{;ּHJNredzez|\aS#" =P悟m;f޹V+ߜi)|_NNN=z… ?裄XN>MJJ ڽ9~8NNNL86mȑ#)Yd}5ش۟cr4_`H(W8Fk%0pɣ<,lƟm_ ԨQy ڨ|ZMML*)""",[pʗ/Zj9rNQe{t3rD"""""""/7PkN<&0Ox/bӉ?OU/j3QQ a,l6{VDDDo]vlݺ$^{5vٳgٰaC/>>=?tє(Q+2bHÆ iܸ1'O&!!Ax,"""""""r> xkS<Œ%u+X,M*2~IsH' -u9u*k>.3uc̶`M[0 {!]|l߾x6l֌ܥ g8ӧ駟2a?1M2Xv\_v߾}qW}$*Q{ ۱cp`G{w.olV/ \ 3[3CV3|Zjtؑz ooo{Tn@DDDDDD ,c۽{f.czff5Nsg(SN"""7KAo8q#Fd0`V'v0r.gmv_^}z\| tj o:@;-WQ/""""ɮ>}a0DDDDDDD$lffk&kAժи1Lb%6s)z18g)֫| < \]Ӫ9EDDD|ŋ///֭sÇSd95=ۧr""""""R DEAfZ6gtzyA}ՙ8~+WE}E$l6nTV %=%y(T%KbmX,hHx>Ww5Vb-Z%m~i&8:ooNo 89so;\u"""""""E`BXDQ]||P\N|ql3i$;E;Tn@DDDDDD;0+.\hޢ!t2n'> "VDDD,fzJsNׯ]m/>EDDDDDD$W\d\-2o \d#-9u1]C`{xEau,| m<(Z}#""""o^!HL}%}y߼˗\nhtcܮ&gj՛A!X{1,()""""""""Y>5;7۷SlQK*7EܰcXDu^rށ2fU3=EDDD$PSMi9,]DWI!hйa|{7,;ZVڵyшܚ0 Adȑ#AժUټy3^^^i\\\(]4v0whaii9je* (x)켒mStUL)PtY }?ٜKZipe]N9iYB{xhݺekEDDf)4SDDDDDDD$?̤ephiv95.BgUPƜcKIO<$dQ,u&"\A"6:ӊEڸl_s6g`AmE,)RS!2/E Srr~E'"""]o@3=%ߨZ*[ldɒϟ??Sd"""""""X- 9q O:S:1H Gk<~Jz\OIO7>LJJM퉉;vd߁期L:$;㼟'̣R7NŹs{hJ GG.9cø>fvdw """""y-X qxx8iSRRXr%+WCd"""""""r6lŋ rp$ъ5|`,K<PȨ\Ly_ #JzJba9;;Sre&NhDDDDDDD;}`O[H%8$0uWh%ۤPKK<KYk UJxdba DJ*lٲRJ;{B K};w_hv p![b<PE:uf!HRhwDhh(_*""""""wZi, sHV:;&c: //{+""""R hi1>, ?rÇߣ FDDDDD$OXSe\Jq 갔UҸviu޲dȽR()yZ*Uغu+%KJ*gX8x=,鏐ߒYEaNl?aődf=]Jokj>懥u+(\ኈdIכ Py[Ӯ/i[Pۊ}BS,y%QޜN t^M&*s"""""r#%=EDDDDDD@2 }šYĺu1\ӵpldAIOW/,XѣGIJJJmҤIvJDDDDDD˩~3ϱ(Kq*~㣇ҥq*= ;T JzJrJwNժUٻw/u<ĮS,;O.Rrv0)`m_X!%=%5j*cǎhѢ/.]~ѩS'{'""""""-S_ad~7m76}{lb$M ⵇ~ũ7B%=%߈a֬Y899q%)oM=xͼ%&`M-ADVxN,˗FxX <*[+""""r?RS wwu}ڞH`T:>ۤ8De cflLVӎ5"BTwG:F <<\ 58DDDDDQSM~z||| dȑܹsҴiS{'""""""9fc{kIWzp7< ő+T0]*b<ׯT_"""""bJzJ1i$;v,̙3jժ1i$;G={AvHu]Zbbի_a6X؛ ^w\ WR ZDDDDD a_EDDpf̘qIϸ8<==#"w2*[{ȑYk翦Gu)jkHRhomڴYf*TdK֭w""""""yBFekaΜTU-@q|,(@$6nHn(V-Z`ѬXK.Qk׮[n-[o'44ʕ+SP!4i͛r""""""rM@+CW=cPJ7oJzh)INNfӦM]5kHFX~m@zxիM̙È#2e M4aStiHNNe˖Ql;H>UګavsN>}Ճw߅矇˗ox%/frVQRfxx~kZ')"""""899CE%(Z(g޽WΝܹs'M3<àA2e /o7 ::ƑDӞX""""""!r{²e|lRlb|M3+d=Xl  Ҏyᝋ#FIO7 """Xf hт֭[3zh|}}s\IIIDFF2jԨ6ڷoƍs\W?cJ""""""9f6#3&sy9szsN8yUMjfP+ef(֍><< H|ˋ#G /PH\;ӧIII;/okViپ}; /_~f͚eQ1bD8*Tpg%|&o%a@ZIaL2m Ѡ V OckDDDDD$SSsvZfϞMpp0קuִnݚ~… ;ěX"Jhh(bd""""""eg}΋8&Np'K߶~y?[ CDDDDD (%=% "((X֭[O?D׮]qpp9vRJȉ'ҵ8q2ey22l0 F\\z.2[iX~%s{Æ a`h|ՄL [`unf\kW(_BCah3Ċ3gΜa͚5DDDݻ)^8-Z󸸸РAV\hMMMeʕ9z.3DKÀf%55 ^d=!Ӛz5m 5z#cArdyknSDDDDD#JzJQn]bbb(^8-[gUVQ߿?CDTX#F0p@6lHƍO? &pqiҤIV㞜SDDDDD __ص+}63ߞu@9&Mʼ#E@O:sB^eY*q+"""rtY %=E(u$dj*SϬ{Y;>}䓰ys}  Sd]o@m%_믿X`G%)))ݶIYrXDDDDD`XJdr2[g&:3Zwoh*VGl Ü:/m.NnVv""""""JzJrJwNժUٻw/u1ZSDDDDDVrd?f 3{6tpX2ZsbcXʜ>i.f kvfUDDDDDTVƍӹsgƎKѢEپ};K_~tԉ!(숎ݻEX~ ˗0Q#X2'[s­fEEAy̪$]o@3=%߈a֬Y899q%)oM=HV`4;˗Sߡq[gFS@YϔXót`fS5SDDDDDr\&lDwwu *c.zDPDDDDD))ƤI`ر3gUƤ֓RSa۶kIεk(RZw1 uʖM'Kf^]41ZVjе+ fy EDDDDDroTZ5;SLc4""""""wjkƖ-%pv6oa&940۲㶖}Fכ LO,l0u* y"296l0Ka@60ik\91˧sFÀѣoҷC_޺/)""""""""""""YbUgBX\Οoo39~֭СiMޱY+Ws'Ŋ ha)F""""rf)4S OOOVj0DDDDD>s ,Ybv1wq G?ҷ_6g͚p'KxXٝ"""""R()FHHcǎoj';`,,1AйY)n]t7G*ƞ= e 114jBHȝ+"""""))#<¬Y(]4+W9(;E&"""""y]J l|m6gTX,Ҙ#GB`Y !>||nb盉իͲ=ǛWjN$22bwH"""""> f3,̜lYe{yݺ۪{ ̛g&:7nGGhBCG(S&co;*""""""W))ŋ w("""""38ܼ #9qc3y;a*0!ig;P!3:}:tŋZSDDDDDn))F w.44PRRRl0u* u.ys%K`R8y<)SPre{$66@${EDDDD +ym6ou֩c .\[ssh8pV'dI ^];pu˓]o@3=%߿?/^p8pٳvLDDDDDrӅ rDcgAPB`Ҥmsq>0k8EDDDDDߙ'Ow"""""an}lJGٜ-ZʿŋaXQ Oxnz:wϥ`DDDDDD$))kիy' رcL:{ቈ ._u$gX^K/A@4nҘ>>fvlD֭fڇ2gpvfX2>jشZ͙m*)"""""GiMO7*Vȷ~K֭ **|Yfd{Tc]DDDDÀ\ƜPY 0gu,yl6:՜p"""""""9CIO7_{gPDDDDDTspq#$'Cjеl ._5k%:GGh F2, ̙cobp""""""))Frr2| +VA>i$;E&""""7?˖I)(Zڶ?6UbW^MrFDKڜ;;@vPX}.)""""""%=%صk\طo_m\%""""wl0u* zǤ$ذlhyLr6kf5zD [߆Nv[͵Z:)""""""`1 ðw"r3-,-""""2FFgXkLHҥg@t`>5w,] +P9S'h&抈HVtY #2i70f.y89f^=sY\s<\i&9 2'lN={̰bPB<<ԨQщ= Ǝݯ%=SR`\yO7b`fܫI΍||G$g斋Ad.7+<==YjQQQX,, ۶mcժU$''3gիdž *'[VZO?;$'˜٧*usF110`m{ (4={`$Q O+)oA\\~)JMM_hѢm M͒cYܷvm SR`fs&gXlbY9S'h\\D"""""rzJzJņ ^z}ѼysN>Ν;iѢϟOYW-B _DDDDQQDH7Ο%Kॗ̏mԬ&U_wfp3ɹl;Ŋ37;w6˖˓z*o+Frr2{}޽PP ڵk֭e˖b0?aT\B ѤI6o|GGFF턧6V~vzxu338뛃A]koSC˖6Kվ:sSO))""""""ɮ'|oҨQ#l1`֬YCڵ_BB駟W^7m3g#F`ʔ)4i҄ɓ'Nҥ#99c-[F.={_ѸEDDD$smŒfZlaL1n,kBp:rZڕ+3کt`>TV򍔔{=>SN87/{wu`⠸%*f'˅sLKSSf瘝+N~be ,m1 5R0-T,4-@q7 *0q]k|{p={O|}}վ}*]G~Ǝ[o0`^|EIfP͞=[<@?Ϛ1cn +:;;[[3/OڰA1teLUumy7gM))&Y4Ⱦ6g>7ho o@'<|A=ΖRܡCW~~5o޼}T*jԩS5lذ OIZh.\X1K JcƘQ>G%2OZ(͚W*Β7T{5ҙ3Rv&|aipV''>>5vآ}TTT^BuAw}xj߫XXuJvmVڶ̈́7Jf͡CM6m`G=u""j fǪ͒u3s__1113T7#77ԦMVzObdjJ:tPLLL+**J/VNNMvAӥsIσiҘTr$zWAZg=tVs/,ܡCMB" Rll|M5nXt-\P [ :똘IҔ)SoVVVz!edd(22R~&+>>^.:yRڻc&O|oZ|ǥw5[jԤI6kfB͒f =ho k.]s5S޽%I;wTƍg)""#Y͘1q)TNBʔ&)7Wj΄iS'ϭ>p@̶nS5>$TpP%TzckN_~mDs @Szղ_%Sn%SIyC-[4OJiHK)?g#Tq]!| o@ ci߾}Zt4E(@ںUZFzEE,_.}u%.ZԳ5:Z) kF| oP#;vPBB>sK͚5sx}ժUn\)7W t~t,]{)0-mRVV6җ_N9}y \e˖= cMO-+)ɄUق))FIIݦLt.Z-H!!RnmURVho Q]Ɏ?d9W6KdZҠAכ{_BsH>[͛K7$ҰanTz+,,SO=>H>|bccդIw Q#3.Y"]:k,K )̈́V3=Q=裊Stt4i{NGk VL5gZٿr.M`:ȎSk[8-4SRYiffkY[+!jKho:/,,Ls̙3%I_~Z9sFr'o> eJ{H6ط#G?cGjܬx>}̍?,aYbiW/)?ߜ"M$XaWK -'Fi -׸qc߿_۷w\_BX޽!gfԠ% "K99RÆ&\LS'sM{JIO7d;47ٳG07*r!)sހΟ?ƍ;袋t97;e-gt^hJIח9 ndW\[lR =jdK[܆ &TVx^WQτSO^iT)K .}@v)#s>O?$͝kkZ>4ԣQAĚ@Euӧvxץ-jf%,(^f_׮_b!C(TzuL||UPP/믦rzi:oOg_>\zEtvr7ހJO/o+de8m!޽feICq͚Ϗypݦ_nYnEz-PhUKKH3g5:EހJO;qBڸr~߭4læclfzCyJ4ibnU9OϗFK/T= ,.zGrr͛!gbTX(uhB39?b)lWaLU{WfPZ͛ҬY%o`G xgMu&ܶM:wCnÆI;WL^"o7{RFԨt҂Rtԧ[zM .% ꔊMŋMٽsacJr=O2>}ɓMyR&N.zzuHzp4f ;!ƍӒtUcW/SHY#(^)IvAl&}$?/u fa V"R .j{۷O'O\YɓC̚٦h+M8l)l?a3Gzٲ_U1՜]^x9)岳ͨ=:_BxUVw>fh2.s萔`JGSS>nL]8@Mfx_MuL/13.gj *fjx pG Lȹnw߻t RN}mӦҞ=#HK=z""j`ҦM&LH0ksZRX4|:T/kw )d~p-BO9c2E[ȹcTP |qSDپ aJwn(:%?Lxic[Ku'=*7-ܺkO~yҥ7SW.)앜IG?'LٯԠbw.@-"*PP %'CM) }ir^ziCΒ,*RaJYYf%__lq 9, E=V! ҉RӦCOϯnn6IJI?E>AkҨQ&2Djٲ|V݃PZvvtI{8kVk 9׭23 AL9leո9sg-ͥn0!RH ހJOW|| =_֯NKӦ;6mAo-;yƅlTzuy@:zԴ{]f*NKfMu5¤=3g5kCs""H P_y3'PGK -md9n!!R.ȑ#fSReIk>Z5bc8P_y3mӥ%K3%gH[C;)411p}{)iFp&$tҘ1&jǒݥi$i )lvLػZ6yD?_Üx]EBu?Ӷ=j{o'ѣGUPPPmppSk>^*((p}+=9S?$5YqYJJi#/i*i`)(:IeiٲMY;Mɡr")Ng&I]J_-mfJFK*(0qmM9sIkUqZ,&%zN4lPSBBƎ+I*,,TBBn{֬Y5k{ܣ2-iZ4iS2->\w S˖eMȐ4JڱCjڴ;V 3l~ٺursÇMP٪tҜ9R֦I֖-6ob*?=N>}djJ:tPLLL+**J/VNN+ْU')1l"%k?y)lX͔eiAy=6))?H?(^-)Iڷ|,Z &KYmmd>VAa U3e뒤_|QO=222^.vVsݷoN<)P~LٷYsɓPsmkP#L>-|b'Sص$BʒbdfJKoiz6o.t>>RR.RHt%/r|i8s\դeE-YϛQzu~eϛ/,4s$upkŖ%鑵`^G9?Yiin嗥-[ʾdFDHݻK͚9kJ H?,~兘 j 7VSkٞyFwcm?CڴI ?oJFwvRSM9$kgK.15?)ugfE,0dՖ՚@S-=Zx 6w?-:e^4)W]U3=]JKRRd3j,ϭ^oOI 6՚#F5:{4_8VmYcZۖgL$eeIVIKPh7SrMK%064Ve2Fch3S?$~Fo7cX([JKO7?3^cYC[7骫ӥ0ud+g#clf_DD5TNY tv̂ťٟҢEUGP#ho QJ+^陒bnQْ~Doүgg4mϏbnfnR.RƕgYwMk%~#ZW;vsvuaԴͤK/Xz\7x=>o71eBܱzeK Xd*V3eYDZa^Vss[f.&ؼا,xjkkfRReh7uG~򊇙;~oٳs||`)eY.\k׿IRrY)㏥kMyꔔe 2m0kiܢzUTz<-=Yɓe4ڵ3[h .m_k'o/m+]t=gL#e.{0""\~ф))ҧ}=HĶ:fΔƌ)ŪRLd^=Qx!_Vj4~Ժw|ϗt:׺=r 3m[˖##&Gƍs礟~m{i&RtΝM&7gT?_S՗5'k@KVY>_4%K, ^yBL~NFe~L2YYZ"7WڻtjOɤ&e0<:v|}3.}(zϤ.^zI5Kl⿵/H q /Kn!!R}gݨkLY[iH]6{0nB o@ Q#Gߗ}WڼYjPRԹ&0mVd\'LWVdJ'mf~лvE[6h`̒fnf-NTϛ =:_Bf'OJ~(󎔐`/f믗+dвyMM7[j -ڴ1-`۷7mڷ?6tm2Ç}}O.yc<"͟/g{Da~ 3o?lV =(=3'sZ ,tRPP'u`Ub?/rscF-,t<7;P!u,z W6jX6&Fzq-MЍ/ KPËƱdIrץ M\Vۏ7AfɭKӃϯ+ \kofx*=EM4fLͅ%ĔU !w ;*w֭I??)8X.Sy)U\8wnYJE(ݬksꜿW}ulG̜iј1櫬,=?~k43:LKΞۮ 2{47[2U1shy4(':Uu;߫P(vdac:]W?+I\n!֘Zj+y:=JR4pQ33gJ%Yۨ'dt?PEL,) 4뱱]wӶ}) nY~Ѵ?_7Μ,i#?o~lZ3>^FSQeggK۷Kb}7SyQ=nhSyzi2;C>g/Ś@=l¯HպQbޠ C6JZg]ddʯ:s|(%}EMT"5.Mk.*B+fe` l^mW7Ֆ[fl_l 4U_l-ϋo-[㒤 :t k"}釜f}%kWv+WҜ8r?׊Ω΢e-NM fx*=)3k)ɺ Zr<6Rd:P $M$?|WsQ2R Fm r&,|}q^GwZ0ۛ3SSϗ23֤tw 4m[YlPW& }IkWꫥELRknѴ4>^ڷo-,]UW~[0N4+KZJ?^jϴ֮5KFUZTW~ ϛU=:_BV8ݣy>bbb$[#FБ#GTϞ=Km֬Ynݺ[nj{;?رc 8P Ћ/(I*,,ThhfϞxkΛ7O+VN>siΜ9z衇<'//OyyyE_ggg+44T'O egg+ ϛQQ DEGGUttJ]cѢE:t8~Z3f(7P^ =9z ?88X{q}͛O<:(;;eϙipSVFQFE_~ Q NR@@'PBPP?33Sm۶qСCjѢ|||^0`vQkWSc+:ζC\=e5y}BSW=s*3 -B\ky\y ՝Pa. usSjթSR1(aÆׯ4vXIRaatw8|}}վ}{]U9V8?sq O]Wσ\c. usisTwBm[Pb.M̅ڿN} Tx#W:}_uZZժU+uA1112eﯨ(-^X9996mGm7k֬Z?*T }M]~z5y}BS2\=r|][]^0jBեsa.T]]^8e.0|? 5wszÓ!ʪB6lСCK2e^uIҋ/zJ?H[vvt:k0y0x;*=ᕆ ᄏV٢F)66V5rPb.s0 a.s0 vTzh\BOG#= :ue]H :*77W;vܹs=-N8+22R={ԫ!nq! 2D^z.26ƍ_oCj'|ݻ+,,LK.up|V݃ թS'޽[͛7wP{~駟vpZWPP<5mT999ٳ[{h@JOOWff"##~i߾}j֬Ժ 6ԩSz7{8@8.R_^ׯn'WIMM՞={4rHwp???5mT'*b(22RԶm[رc&C Q-= Vm߾]j׮7o#G?w=ƍ5zhGW.uL||:uƍkھ}{㣫Z ʕ+kh@ͪ0w\-ZF Fm̅'Nwj߾>聚Ss&11Q Q56I.tnoj׮]ڵӯZCj'&>@}E rrrԻwo;}SLLbcc޽{kĈ:rH1uJn$iJLLG}{L}7*\=֬Ynݺ[nj -[Ν;~[ހ Iǎɓ+=Q[s4517555+"I?a_TTu֬YE_XCBB-=Νk]|p=W̅xھ}{kǎV… kr@ ߬ p9WͅgZl}7kjK7pCM uՙ;֕+WxW@}RݹPQ 󕘘}_]kԩSӧOkݺupxWh":tHO?3f衇rՐY{ɓڸqwRsjjԩ6ln6W p @}Tݻw_ӧO#Fpא`Tf.xBOnuQ(88app222*uL]yݻ.rMǫhРy :T3g1\e*{*3Aw.T.]sNwSN!ndwp+RP'|cƌј1c= =x*=UPP?33Sm۶uӨ\ `08 `0[5lPSBBBѾB%$$hРAnP \ `0\ `˝>}Z/:--Mjժ:t蠘M2EWTT/^M6͍js0 \ s0 \* .~zR۔)Sy:t6leo7`E \ `0\ P1ju] Ś<'F zh<'F zh<'exղeKg;]zGM6:|K@ uԩSaիQo־}\z =sz{뮻J?k,hԩmkɓ'+66֥܁ƍ'=:w\}nԤIiƥXt uahh}]9shٳgoCs5(==]JHHP tu93m4\Rǎs͛7!:"::Zm۶բE<&..N/^N:}=uT;V=ղeK_u}UVj߾/_^t΁w}WW\q7n={ꫯ$YVuUO?ý;;?YAAA W_$c|||K/i̘1j֬}ѢtRuY7$}駺+ղeKO?TtaÆv~VV6lcܹs-Z_ӷ~+t{N:9TT6CZlVZׁwѣK۷BCCjժ}VRԧOR7jHm۶U۶mx@RVVV1 ч~Xz@{L/n:oڸq}Y_m۶鮻̙3KӜ9swiРA=z~wowJ%iꪫԵkW8uꔦL͛7oQXXFSN9qi׮]%IZJɒoUBB|}}5n8JO~[yyyE^bڵkaÆ9ĉվ}{رCztE9=vǎEՔ_K2#FP-i&mٲE͛75\Sfc?N_/=4m4wiXB]vU``kQQQڴiSOB uȸqy.jJ?wo]ݻwWnnOaaa7o6l͛7;wwnPxx^z%hٲeL޽{}vI&{BJg I&G +ܢ R[oUӦMS.]Z7T>}teIn?^]vUdd^{5ڵK?$i5k]_/Zҙ_~EѣtM7wNmݺuQ5O>t?${*,,ҥKիW/k_aÆ2mZI&i:x<-[hҤINOԼys5o\-ZG}{O ^<'1O]Μ9˗oVӦM<~ʔ)JNNs=[*99Yڽ6k֬Թ=Zǎӫm۶i۶mpӧ/Ç|r 6L;v,sqqqtjݺuK]r={|͢ Tɴׯ_Qhۧ[ol˼߮_]oFU͚5S׮]յkW 0@K.UNN^}U;V*LOG u??X_֭[+##!wY曢ϟWbbË5J͚5K/O?Nl٢{GFRDD5jGVklݫk wK׫Za]nݺ{ǗZfEtmTiӦ(|mNw%߿=35AmkV|}}Imvޭ>}T: =ի&Na!C'|R??P{ѬYtqOSNռyי0[JIIѶm4qD5iҤZcW^yE׺uӧjոqʼ3gtwkÆ Ekfر!-~ѣէOy($iĉ _M6)--M6l=ܣÇ;KZRRR?ϯƓٳg=zt1JLL_2_φջwom߾]sέ{>zջwom޼Y}Q5$kڴi^oٲe:~nMsڴiSwUbbz{WO=c'L h„ e)@ɓխ[7ȑ#pRfffjϞ=JHHPHH,K&IM6ƍաC?^;tY9[Z { IO3p@رC Rt̚5kԡC  *غ w3(T7VݺuըQ#5jHAAAJMMi>@vRƍvZyxxH&N#G_~ڴiվ=z;'I~'IIIX/H*555T\9%&&j߾}_~XО={b-**JjѢ~-囔$ooo%&&-~ XMm߾] ]JVXaў~ZAAA5kV%TDD$AV8pܖΝ;ڵkڰa܊P!N:%IP9!Ch͚:u"##)Irqq:$*55yRRR @6uUI%K$]r믿*++Kطrʊ:΄ 4nܸB(l]TTT +'IoͯӧO߽" lCt5ۓ%Iwt@q lRJ3gXݞ^rV`M=Ò}Yݞ^nB9^XX”y[ Pzz p9::ewda"ܻ\]]Ԥ\rJLLzYluٳG!!!VWRR(//[ڵkŇ|}}ؔ ?PCڵkUdIIĉulٲPu5>}Z*](də711QgϞ$3v B/h$I{OO?__c=j޼ejwx񢜜Tre9::ڴp3gŋ`(T.\Pxxx.\`U7nԄ GK.U2eԧO?^*T(󒑑db2g*==]NNN. (0a-))IJLL?)))TPPb]~]QQQR\]]m]%?pob@0)33@L&SUtW^_Pܕ@"2tP :Ҹqyo3F; J;f嗥sZۘIMѹs8P.. W@ԠA+'WdRPPŋվ}{+WNNNNUڵճgO}JKKu9sL&I*TPݵo߾\}rsܱcd2O>EW|ȩ{̙..w^mVJG&I6mwM6EGSZZK{@ ҥVZ *SNjժ4w\+c@cppz޽{gU%4|j6~Exp\rE;vԯ W^ݻ<<0tHOYN3ť7˕%r2#ɤM6YGGGk^ULծ][ѣG-dǍ թSGӧOϳŋ+44T?Ǐkر2L3̅ $Ie˖3gΔd2d2n\r5jBBBTlYjժ2dΝ;kLJJ믿*UIÇe]%KT%IO׹܉[jذa[J.-777լYSGVBBB^/_\7|||ԥK_bcc5`UPA%J_|a6l2egю;,ӧZn-I5k{aرiM^V󒖖_5k֔:u$IjժL&tRdɒ*Swn(gvƒ/j ԩ0%s{$7| s٭頠 3sMmu_$ڵk0aׯ/yxx(44TfͺfΜ9j֬uj„ JIIiUVYfPҥչsg矹յXz~ic.]du0?'\]]^xAׯ}OT"Iڼy=bf(>RRN):C]CqrtwK-$߿+(hjԘjj P7hfhN> (>>^<ڷoLEGGkԩjܸjԨԸqc]zU͛7ŋe_YYY0`&M-Z_zGԡCGŊ%IgVqU޽{k֬Y*Yvjv,Z}-Zugۿ[-]T{Q```_-[*::Z-[T Tt|˕+W$,Wo*""Bu?۷O~gڵ,4x`L&5o\ڵkm.\PFf͚)%%E;w駟˗UF =Ӻp֬YիWk޼yz%e/5k(88bzVm7RNelRu֕>|&N͛} w^EDD-+X/^T&Mt1cǎ|Fcǎݻ?۷6_۝Җz'te˖2 C;vP>}g}W>5e6mڤwyG+VЯj~hoհaCuAВ%Kam޼Y?p?zh}rvvVF}Z|o.???޽,X ggg5mT~~~:}~WzԥK-ZH~~~j׮y f/Lօ ;K লezTT+Lre.\XӳK{DCx_n>|ظ~]5 )c1qFCѻwo{mH26nhnmH2 khmʕ+[=$[nFJJyے%K IFJ,8qpvv6 6Ӎ}Ǜ1cF}۶m!([1h cơC.=f̘\;GʕV}$^x 64$Vz̿[p! 4/s{BBѬY3C1rHuK2Lbn2z-CQ^|]~ɐdԩSkJVV/hg$4N>>pႾ;KkV@@~m%&&hLIjݺX*_/_g/RJq~zuYjӦUVz ꩧE (QB˖-گ:%%EݻwדO>inwrrҤI|...ꫯj>}thѣŶ իW5w:0aʗ/#FqKlRcwWj޼yrttԤI,c 4lذY/ʕ+ըQ#M8=)SH|/$3F<[aaa2L#-\P۷oWr 4K|r-`Ipabcg*1q=U܋-o2L%@U-b>F}%,,LaaapUOB7mZ+5hPxS>}vZOС\]]ըQ#kNǪPvO(JMM5*VhOJ},YR=z0m4c :~}]M:5zWu}Z 4+pܹ (g|ybcg%JQOVy$Gǻ? :TCURRR%Io[͚;Mr-Xn7rttԏ?ѣGkٲeڰaõuV}GZz4io'V4zh^/ӧ$/O?MR;w4nY5k3gwww]vM׮]smyn׮]9r5i$jJPT```{5ywעf!>s֞y/,vŘz4k쮄 -sX~ۚ†/--FQ..JNSqq;Giig^S+[~~/ )wUoڴ%mY.gf'kׯkرJJJرckETj@@=ӧO,PTYV\4zrm?ydw+*T_'OZ^7֓|dI҇~޽{[l~bccɹVk.P=ѣGSwlWٿ1W}N7~+tI#G,pY/^Trr@j,}֖i s7a!O8E!r%\%JVr[>oLN篿ʵ->>^8^^^0aL&22Tl7լ9Wի`G@qkR*U?C˖-3'''W_URRR>s̱[j 0U+ggg͞=[[l1gffjȑrJ0`>;w.׶GgڵŶ@)!!!WիKΝdsٳg5hРw*GGGM6M+VؖA)!!A 64+o%Mf:|zBo߾rqqѼy믿5bk[rO>є)Sr-5kXsfR;z1o*<<<ԣGedd^Sjjy[DDB=^NuܹY$)22R?}yGOhe.""BWx?$Icǎ5ʕ+6l .Im6M>ݢm̘1:u֭{ァ,uEϵҥK:us1BԿ\&&&j}}}'N(33,=:xyJM٪׏…:vl-&lYˏ̘1cԿuE-Zv-///=!){^z)88X=p988>(:'hjݺZl)???޽[z5w\Rr%M>]cƌQڵUzu9::*::Z#޳ױcG}WjР4i"WWWըQCo:vڵkkϞ=V6mmܸQS&Mcǎ<ԫWO&M?OuQW͚5uuرCϟW 4o޼|ٷo_}gZbjԨF)>>^7oVN{BYJ*4l0=jѢk.]|Y=z(PݷRT)-[L:tN:*]bcco>%$$hɒ%S$)((Hu՞={#vrttTǎձcGIG}͛7kҥ Vf͔ 6h Jppzٳg^zjѢ]]v}vZܹsծ];}7z)00P:pN>^{MڵX]vի)SN:zM6… o5`M-RNtI͞=[ӦMXBϖ=Վ;4p@effjҥ:u>S5JR:uF-///YFQQQzo~})Wvء/R>~w-\PPժU駟7x'OٳU~}m޼Y+VPrjÆ rww^e˖̙3zgsNկ_jk>|ڼytX-]T7nTff˗[_D -ZH3gThh٣ŋ̙3zg4|p{S.]M6b@@+Ӯ]A/[w6= zU|lQPп/)󄍫a-))IJLLֺQJJ"##UJK'}i^AwHff֭#GܹsuI}g޽ oVZkȐ! u9feee(!abcg%JQRKUg>W޽! +OO~io&p_ SXX233m] 'NbT;:|ڴiC(b{UHHEoJ^~e[ڳg쥖wjbcyJK{-UokśuvPcp+CСCͿQd$ˏB,X@cƌQHH*V$EDD(&&Fm]"pkڴUV-yyy)22R{UVV MںD͝;Wk֬tbྕzNqqQ\%'ʕ{I@&)_㸸JE[, Ҹqm7Ί5f4v]-^+""BvҁkzU {rJٳG P-4`vޭ׫^zUre[d]Tst:LN*U>T2O%lda"n˙011Q^^^yHUREE[ԍ3Z } 0Iu"ef^wsTٲɩKD1ߟ71 {ZjjΝrqÊJM=#7jXq^[T (ii1'_ߎ`Zy??_u^(QZu_Oyyd2ٸj@qF.J?).n.]Z%A>>Orw\l]"N("디.IaٹJ1rre;Eg_h34IRZ]\իi(++M.JK;'J|P..J$IzԦ@0)33֥\bc+.n/Uʖ}QNNt>ǯRCWC*))I޶.BLL+//GzzΟAu99_oUɒm]Gɓq>f4v]+@ #K/oPlt]X!]{||~\yP8@11pԱc^~Y;WU+ۺ~=R3;S^SU_O߲KT[.- @j UmSd2)((eij߾ʕ+''''vٳ{ٺD͜9S& *{ڷo_>9EEE9رce2ԧO+=sL[bjժ-ߧQTTL&Zjeў-fSn]7܎늋WxxU9ʔyRPFUқ PPW.]j*UPA:uRVsW^Q||E[bջwo[>J(+44T˗/IMM6exAAA2L.=0 %%ѣcGyY5gIը1EލIͱ09r䈜l]F./+WI&ۣ4m4بܚ5kf1[zz ^ RvlQ, 6LݺuS4( Eq v^qqs3]׮KU[ bf͚.EIG='e@5~]V899/ݻwYf. Ō|}}m]Pq 7UK~ _Tg*]L&G[@X ,?'ZƴO>2LڴiE{ttի]eʔQڵ5p@=zb_kKxx s[nհaTn].]ZnnnYF\ɽ^|7n,wwwK.믿 \5jPJ(/¼ӧ5l0Ueʔ3<;vXקOnZ4k,رcsӚ%--Mj֬)uIԪU+L&EEEiҥ Uɒ%ULu]gΜuK.^+00PnnnSd-bbb'e˖*_Ν;~:K~[﯁Z}J={lٲrwwWHHΝ{[g~=c*]\]]UV-;V׮]+x;wԳ>k:י=zT]tJ,MjʕV~VKĉkWE}矵k.ײe2Լysh׮]us\pA5RFF5kKS=Ӻ|jԨ~Z.\К5kzj͛7O/쥘cccf[̺X^۪FYYYԩl٢-[nݺoĉռyso^?ݫxŋjҤ;@uQ/_ֈ#tر>԰l2[Q֭+///;vLK,?Ym۶zQFiҤIjժU۷kʔ):r6ol JRdd4iXUZUmڴٳgիW/ :@痕_~Y?<<<԰aC.]Z{Ѹqj*mڴ)mܹӧ233մiSUXQӷ~ŋkӦMVg;q}Q)SFm۶չsuV=36m훯/]TݺuSjjիP>}Z?VXUVE}&M_]YYY Q-tE8p@o^ywZpջwosf`MFFΟIӕS%J+Oz.3>hH2o׍Çׯ_Ke;0ƍ IF޽nݻ!ظq!6lXǏ[I2*Wlnݺ)))mK,1$*Us pvv66l`nOOO7koƌ:m۶$lٲƠAٳg2g|na#66֢-337n!۷ŶHs76._k3fy:d8::cǎY)Ɉ̳1c[rJ#!!-%%xW IƸq,ν2\]] '''c4G7shܿ=Qti8|1vl[jՌ3gײeKCnرܞl4iĐdL6͢O}oH2:vhqkx{{(9pq\c^pvv6s=Õ+W6$ƟinpQZ5C~z>ڵ3$3˗/7?s-[3ƢO>1$Z2bbbk[obשS 777Xl=33>|!hذպ$z8+V{i,Y0֭[gmժUQbE#55ܾyfd2Ưj'==_,rWA<+++˸|yqp/cfwcF#"rb.?pob `C׮Iki޼ƍco5kbdY:M6UTIK_u:uR:ut)EEEۧO4Ӽ$(QB'N ~״iS͝;W>>>pႾ;KkV@@~m%&&hLIjݺ,[˗_TR:Nrr֯_Ν;+33SmڴQj \oA=Shsqq_|%JhٲeV^{z'NNN4iy־rqqW_}%WWWӧ+&&FÇW=,5lP^zKގ &|yn1b7nl]$i˖-:V~Ǻz͛'GGGM4:6h@Æ +qoIz衇Tv\cHaw?nѧDj߾- ))g]+hխJ=/_- +aaa SfffRHH gaڴWaٻWjР˯^wyGjӦM@UAҦTzu… }v+W@^tI˗/`~ҥKW2e,aÆ7w֬Y5kV jΜ9N={V+VП$sh9e` rn*I֭[}||Զm[-]u7hjnڵΝ;[׼ysIݻ |a2ṉ%nW.I);޽{Pы/?@.h 9RSSzj޽[.\PZZ$?$;vL=mg۶mmJ_;~[$۷O/^O<+,Innn /cǎr aU);kҤIںu6mjm۶V9o&?˗_~ݻwSFF6m$IzWo9>OjjΝrq ؖ)&f._^+7-j֜.o2L6A}eС:t4nG͚;utb7n/L\+-{gǨY߮>}hڵ駟ԡCQFj׮g1խTPjOpNŊTR{%KG0M\\f̘_Ǐ׻ᆱS{~A^>W\Oj֬Լys=rp(DaHRqk/^e0ji?{lm:o&|n6CҥK~ʔ)c5|ii1'_ߎ++6v)##^^^MTT-Jqp݋fViӲV&l*֬-GQGֲe˴ak֭裏zj5i$_NhirwwkfDGGO>|U|yI4i;ww7,͚5̙3U]k7Y+:g[ɒ%5]4rHy{{kҤIjժaڵMEBLl z{ /( 4H RժU!ɤwyG&L8)^WV!SkdTQk=EZ{OFFΜJu~9;+ Qɒ6 @%)N>g~;v4vX}>|x,Gz Tx$lַ+W*--Mo^{\O;w.׶GjȑQ``⬆hW.I;wgϞՠA Tߝ:t5m4Xb[JJ 5l˒9iӦ)==~a[V{߾}y_5kĈ׶0 8Pʕ'|)SZ7##Ck֬x̔vQcγUyxxGkT󶈈}WE^Cj 6رcta$)88Xm۶URRFL+WCvQFʕ+ܹY Ϟ=9skMϷfeewٳgbz^u`ݪUO?M}G)777ZxqZpΜ9cn{뭷d2jƍgddhʕmzاK:{6L{6Rzz||$WPxWSm\-AP`cƌQեKhBڽ{Zz`=CrssSdd>H '|Çujٲ{n_ܹs3GʥK4}t3FkVho#<{Ϣ_ǎW_AjҤ\]]UF رj׮={ZjjڴRRRqFիWOM4ю;R^=M4I?ձcGկ__5k׵c?^*Tмy=f߾}giŊQ5jxm޼Y:uݻ ey*U>ӰaOE׮]teѣ@uJRl2uA|:utҊվ}%KN: խ[W{#<ڵkQ;vTǎ%ٲ}6oެK*88X͚5SBB6lؠ믿3x;ʕ+kԩzcM[nUffS(Jҷ~&MƍըQ#;wN[nՐ!CF?SsQZT~}UREiii:z>ugϞRJѨQcaÆztEEDD(55"TݱcGm޼Y?Zn%KW}Q.8b#++];S.dL]{||\٧K|!l`)fX~4c ժUK۷oݻաCܹSJʵ믿CS[nՒ%Kty ?_dkZp6l]vi͚5W*IXaaa:u:wL_^/ɓ'պuk}wڶm,M0AÆ SFF~GM6< nݪU?9Zn СCcuM.\ТE믿* @o/?|||o饗^RZZ/_gjV#ڗ,YF)<<\k֬?]vZjz,I QFrV˖-5sLiƢϢEԩS'8!Cd2iݺu_tyuA?|A>o6oެ{NN… uA=C,?_ТE4mڴ- ^#sg>>wÇզM@ۻwBBB,߯7|S/ۢ,={H^~򒕕K~Vl,ǯ Reʔi'[*UyPLd$ˏB,X@cƌQHH*V$EDD(&&Fm]"pkڴUV-yyy)22R{UVV MںD܁}i„ Zn$G6@qc\٣Y:edӳUrɩKnbbdi8˶'3F;tOx]vP5x`۪XKyoV\={(!!AjѢ @Xp)-YD*U{ァ_|%(&RS).nbcgڵ#rvT@+勒%uyLa.$y{{+11Q^^^yHUREEZӍ3Z }_df^ŋ;S/|}o.F&K97{3@1A0 %%TlL?23Tիr^P޶.{@P`));[qqu1TR _/?`/ )6v6MevUߩTV2l]" O-(++YJRUl(io\~¼oJJ\]RQ%7 [D}&,,Laaa̴u)@ (6vSe˾ >n*d  :TCURRm]9@..w" #S/oPlL]DYY)*]j՚'_Nrttu 8wn幽r1Re+?7KsvVnn5T\]+غ< (_ߎk׎ȑU\ג= p'/+WUD)+]K`gl] K<=ӳ9^v?-k2d2PDEEd2UVT8q℞{9A&I6m*ɦMd2ԧO[ ]zA;vرW>7QQ!fAVVv+44T<o4vX7N3f 0zR\\ǩdɇTSr=R|@@s999ٺ ܧ~5o\[lu9Gё#GmRE,-ΟA3urrUr/߿<<1bf͚.3gHVjJ ;òtJT|/$gT^6[/6mds>}d2iӦM}z5/^Xrwwy?~\cǎd̙3 |vW5UZ5VZ1cy 6uRҥիW/]t)qZj%ɤ(͝;W!!!rwwWrԻwo={baÆd2iʔ)y^5jA'O=!!A_}|IU\Y...Qvnݺ<=}z쩲e]!!!;wnKJ*ruuUppƌ;ɤ-[Jf͚%$ɤVZIfΜ)ɤcZue=zzTVM ð:ޥKꡇRɒ%奇zHFRLL$)((Hƍ$\﫛322W_)$$D#|ԢE +<<\<:tp[S /`kׯ$SݻwWhh|IܹSsQddlbu 7|͛gծ]4{lmذA;wT $ITXXNW_}587o_6m9Sޮ]SAAAQ7nSNiڵZv{䈌T&MUM6:{zꥡCZ=NZZ|ImݺUKO?T} Yӻwoj͚5 Vf$άiiij۶>VZ)99Y7oѣu}9rDm۶ՙ3g'|R_O?U&MԩS'uU"""ԴiSUV+WKO< І 4d[N .Co4qD5o\۷Wxxϟ{*""Bnnnw|(.nf)9G~~Q C->ڻ^6l+mNRzzzZlu릙3gEtR=s?~Eɓ5jzjnZRgLzQDtt<==u1-[VqF=czwK駟$%%%I&ڶm6md>M>>Ǐe˖:w\5̙3i&YF͚5sNlR$ٳG5zhs=3ghjժ˳A'O6$aÆzꩧt5ٳܾxb=zTkY$vzn_J&NhIR@@>SIٳhZ3bsOKlrǵ0 %&_ Ν:|EedԤIjQ>> pop23ڵ?x ._hQ|Tjrw)GG;vHy9::M6ngC^9W*&&FAAA۷K\*UJm۶ŋ;VZjZ~wm6){7ku떫Gm۶ҥKm6u]Ru1b͛O?Ts*%%EÆ LL_^;vPLLRSS%Iǎ(I۶m$kN޹޽~GSNԩS*WwߴƻrV^${믿J  GG"'ڕ-[{gQRtqZNZk(3Y~\\*(0p{=3@C׮{CdiVhㅄ앧gB/飵k꧟~RF]vׯ_`TPj$fBH+VڧRJ>p'rf.,_|rVs7.޽{kĉZ`z-){IgI\̙3zg>W\19طFRJ)!!ᦵ yXVd2rJHHٳgs=Χܹ (IN\D3uz98׷x+.Leq`rww>^8^^^0aL&'UTQJhٲedV[s̱[j 0vV+ggg͞=[[l1gffjȑrJFsҏ?5k֘?Ј#gyF*UէSNWgϞruu5oիj֬i+WuAm߾ܞzj.88Xm۶URRFL+WjVi#G*>>~I,֨Q#kժUڻK_xQ WX/?:wի5j-:tbŜ=Z$pႹ=66Vo$^sAjj"#*55&׶+ґ#/(>go%+*:zY6… UV- kE顇Rjs饗^Rƍչsg988>(:'(55U[c=ݻzZh^~eI=zW_}UO=ZjegV``k}ԯ_?1nt)=zT%JhԨQP˖-նm[uMժUw}CZ=ַ~+???}Qwﮖ-[g9r6m{Zjڵ:t:u~VCCo222ԬY3kNO=WL5nܸPSD -ZHTreuE;wC=:u۶m+WWW}zꩧԿ 0#F詧ҁszTzu9rD:uҐ!C ^qJKfe…:tEQNK*_XmIWPxը1Ֆ@N *Y~i֬Y۷j׮ky%JN:ׯ̙k޼yrww /`3_~1cjժ۷kСvܩRJ_СC驭[jɒ%:^|E/Z_{5-\P 6Ԯ]fիWOY|||RWQz74}t%&&jҥJJJRϞ=~Ӡc=&Ijܸj׮c;5k֭۷_?]vaÆVTZUz饗`qƌz7qvvڵkoS+V5b-ZH&)_رc駟B ڰa<~iݺu&S"""oS+WԆ d2[o)44Լo``-[Pm۶MӧO״i7No˗kҤIZ֬YkתF … _Vv?h:];q V[//QPп/)+apk˖-+ k[ڳgKرC'Nл[䒒Dyyy_JJ"##UJ%Oڕ+woBBӳ];$33SuՑ#Gt9ۺ"ժU+m޼Y *pjʔ)1cS-<[|:%%+#._ޠ[J(%\Uuwhɒ%ݺNccfΜ˗vsі-[#W^5r^J8L З*X! f$//͖I˻ZWux}ݭ @ NmݺU&F #u뭷vkp8_T\\ZoY=-[6eQ)UT<*%%(%e2TQY$Ir60>PT^^6ozDպgF_G?P~_FQG?_k׮/i1o;pp <*+[r8N[[VO{Z69v_Kz$)\gTPp&w@ NM8Q^z%|ɺ;d:6}ϏQ}[f$5qk׮= Bm"]^bbb t@@$IJHHqpأʚ-í-ϷHuu_(11C99*'ROabQ/~ +:$Y#L8Q3glݧ\k֬_uVIСC޲u]EE-[&IW_}^))):۽[vDJiii8H)/áX@_L)+1ned|_6n.> @ NM>]/?R'O{l>O+--MvZ vVIR0l]n:]p_x *,,Ԗ-[ڽ_W[/ 6Km6m޼Y4h`oiI~_*((u$ ~~km(ee˕=Kp5bļ^L (qgnI7tS#3f̐iݺدKNNֈ#T]]8r: S(Tu.P$cƯO\GTjj$k*I,S{v:jP4i NfTUUoYʗeJM)2fy w"I55z@ DTYYP(>ÇD]גknoY_XXg:cv{c (W^#x-JIѣK.I W]땖6)Ɖ@ -]T Psss& &HVZǏ+..Vqq"HL(QYϕn]oQUWh/0}ƍ{Z.Ta^E]h4,N۞w}t: KM8VNN~>3M8 &hڵO5y./c Wd&O.QZ$zw54|Ô?W+)iHQv6FԼy$I=̙#@n?.p z7"I{vZx=Z@|m[d('z#JO?K9n1s'ĩAizcW_}UwqG+W4MM:uo~y晭5c XBn[JKKb eggk9rdSUT 57W_',M}rr΋qJ6FT^^bc/Zb^w_WQQfԻᆱ[-K/ ͙3GwqLT}ZE"77W5I S/Ohr:7U\\H$7 ^fUVC/a cQr*/\yy,$\S1+ N0 0QT}}f̘t=5jT#ӌ? mUbbQvy2dlIR0J%%5yr&89Sq*99YoOcǪHl{k~$lp6]UTC/\v{Uv~l6Q$]qjǎ:SqFM6iӦMkF(tfp+iPuvktP^\2؝VaݝQ7߬ 6SNэ7ި#G*555ֱ0nG;wKhSARS'tk í#^Xa/33SYYY/8q'(==]~_.+q ԼPh$nWraqJ8 lĩh4I&Qb+: @\u.R$R׶pLp#ĩO?];wʕ+c%. 54lюyHH@ɇ&ɔ׻Pyy,$\S1+y` 701 .wqzu:i%JLLW~\ s$)\%w WZڤ'@ N}ꪫtu驧ҬYTPP W\qE'@p5bļ .a@I4ڬh?+HNg nT^JJxo``c@Jqq4:iTYYBMۦcR810'z- ={ q^8/\ iЛ*X!Ijh IZf"61bl}O@b ` N~y睺c=VSLѡ!C(--M`PUUUڰa>XB`PC7x~_+==='ij>*T8GMw1o``׫ j…ڲe$0kyj1B?uW+77/k4M y<+ ())ONg˻FNg$嚪Y 6 @/kڵ|JOOWNN&L?^ƍu~7@ ^46nUyzWCͽL+5TR2Y'(-mRǿ-1;LvXc57TQ˟PMͻْC2d #!P0XH$QڈFUSUYF sv o50 12/jk}\>"%'SnͽTNXs6F> y-())K99(7rMa8@@$R%*/\UUo05[#Fܡd%:" @%UQ"\4fߔ}:"@OGee ?Wu}}yOIBr:GhЛ{abL(~ =*-ٲJHHWNE˻B.t8P%I6"CfTÞUfJHp8!f8sN=Zr*++u'$iݺuڴiN9%''8)a-V]窫BT[ #5dȘ(qӏc4M;vй瞫{L]vY *..V$uGL3*|UV@RJUUKUUϷXӦmIN ,ifC辏?X'p\.~{9͙3G?$)(;;[3f?'_rbCi*\)o|gJJʖÑ-z*/9E$k*# ǿ8^6MK.դI'!!A&M_|i]-UT<-}rr.u ֺ0J^B\ >#M6_HQE3Q]JHHWv=/3{:}gz?Q[p*+_jjޓa$*#T23g+11-fi:Γam-YDSNQ+(==]~_.+q@hjQeK-IҐ!'+'"ee!1N@701 ǦM 6h…Ztl٢h4Cj֬Y;wc47j{FUU4~F_nώuD0 O@"UUK>3ڹE rUNΏ}XGp@UU|ڹs"ZNRQ9RSa;8u}nW_̙3c˪*XԼ#IڰaR| ?`(qj:餓(6//Uss^"Z+.Ӱa?A: P5hРX蒆Mx+du\*/\yy,$\Sc8uWO8{FCxQσyG{+5u$)\UPp&81_(q[oʕ+uI'NYIII˓fk_0iӦE$RTV9r^nwv>L  4X}ׯI' uiF UJOOuV0Zσ*/J_C"Zee#x8 lĩ[nE>OsjȑJIIu,047=-AnwhР 8jzc i*\-V4ڠ3tKql6=}w@:b䚚U^<UWB ~94 N:$]61A4MVYكx^RDhԨ֐!0b$[?s^n6E"Xpا[[+W3 ЈӴiu+#T@?@z'uYg3<3f@6޽^0&)@gQUW%AUV.dSvy3f>Qa:"il6 PW†a0Jrb> ޅjlܢÕrs/SRRf.701 yXGq&mVUkxΝfs*'":8CSW^ye#8аE^By<(ޡI=X+11=' b3%2s*+*!!UViib@0L4cZ Pzz~\.Wc뿖G5MnɹP )zǿ81sL{LC̙3|]0ob:KH*+_PYك)1qrscxz @ N{2 C0^Jb y< 57Wk7)eeP X(qb͒6{Fσ +))GnrѱQDaaa-,QYك)Ր!?̳ec@ P?C9DӧOt˗kƍ+()~/jk?^CoR^4(ĘafiΜ9z;k?H$GC Pzz~\.W4MyP>߳FC!!ׯY_+/*9[Si=y<9> r 1 @~իZעY6lпo͚5/. >*!54|AhĈ;wX' 4X56Ma4}ѿ/M4Ə@ t~FH ӌj<s?e>_nJO?^a:"C6F#<"ɚꫯ=]s5kەc=V/c46n< *%Hurs/SRҐX(q+l=cOoPȣϟ+ݺ>mVUի*+{PUUf)?Zhz@ N{TZ:_YYpAڵں =:Vv{|w"I55A XZz?x 4H#FHD( y$Iif֮=C~:A~:qUhw4l͒UTtnSFƬX>#+5k9+Zjn&/u<@ =*-̳,֭P0B))Gj'slXIęx@aSrrN>dm޼Ywq@FÒuW(Tt䑯*#tv[nwuTĻz}x< BwWao3Prrr .@jȑ1LG;wʗT[jk?k]vD0FR?Ir81b^%(q췿>wi2 O %%%ZtV\+Wjǎ:Р[/֭[N;Mwq qFuYm֍;Va . }*/J6*ͶPT%IGM`(qN%$$3Ԙ1cLwq,Yҭ466j̙Z|n9mٲE<^y-_MZnsC i$B-P~\9FTU/y땖L4(qk=Domw_0(Uߘ8q>n]Wnˁ@@Æ h yTV@sp%I~rm~**^Pbb窠r:/ 6o,55UT__:IRZZߗ8pأ8C~ھ^5h!=ͽR^nwv>N }  c >\}v?+"$/QSWG,QfY2 [u81b^ء f&L IZjU[֏?2bj^W O$Iξ^v[6۠}`Ĺr=zcIRAAN8]uU͍qŽwqJOOצMzjM8^tg}XHn3(o54|#Ir8 5mږ0Mӌu^W_ZT6 CiiiZp;8s: B{emݦKӧOכoIҽޫ[nE'x{./ӌrmn/PZ$NT(T&wY$Ir*#cVlC#6 @O5}tEQ?_"-['Ћ/}:{=ӫ;ʕ+eNں7<ˍ1cVX!ۭ?^Zb|r9dzbV^#ھOаa7+3)~U*)ɓK6)Ɖ'SqV$?s=Ͷkzuy{iN7UTThŊ{}]EEEmNSn-ZH/2224gq:thPho?y< TN΅:gr`@՘1cw7>'lpKھ-͖gr:uvǿ85|}7|p}'}(>XH$Q{TZ:_gvmԼ+P#G[#FSEEEJNN֗_~~~e˖ '#}qr:ظEiiS5l-:W6S$-SO=U6lЯvG3MSv+vi1HHjTR2MWϔ$w23Rb`S۷oQG* >\^x$IzeeffjժU:thl3|zFSN?o:(Դi[,@o``cV@:ty]z/2 C5$yzꩧ(zT4ڬ7<;)ɔ5UNgGqޅ˻FNg$p0ı#PTQaow.߮ֈz/8@@ n̘1Cah3fLb[@Gv|U6ݢw\iL @ N\q2 Cm.UUKPcc~k-[~+sSn>;)]ai:@ t~\X>XO>H>GMwp@8 l`@)..Vqq1S"0kUYy]SG)5u$S^˻FNg$Ӽc@ N9R\p?t߯~+=ڴiS%|4: ظEN( K].0= pSRRRczIssP}T~JHHSv˛dF׵*,]vz @ Fa>|xz5zUE?6hȐS4nܓ:W ]í#nX@đ6{1=c^4M'?X yTV@spkyqB4hަtab@,PȰaZjغuv]={~eL =*-JHHG|t\9rt_ Glzf .?@^aIҷZ~2Ea 2K=s0( @ NˋuS\\bE"XG6򟪭]U@`\㕚:Q1+)arbim IMpjڴ-} @zǻW\KI4h}\UUo0lrY1jl&wY$Ir64a@ Nl6LӔaLy>fT~zWEsDr)7r\}U*)ɓK6)qSo-FQm۶M˖-͛5g !@Vy*/B[tiП+7r%'u<@ĩyuY7|y}'} Ijj˟P \ɹP+={2 [׷*,]vSH$QFiZhQ+ EaUU&qL3Sw23g+!aP# o``c@ ɓ'kҥ%4 ~"q|ܼSGi?(7b8Qr^WuuuR?) 54lݞ/^#b0@PRhTu1:čPȣϟ+ݺ9T^jjޓ͖jӐ!3e 1L (qj̙n͛UUU%ͦoo*..V$uT8Qi|eeVRRRy|QhcSV0Mӌug:ݞiӦ鷿N>>J?rb@? RId\{ܼS֠A*%H55.T^5r:$I.؆XhҤIz;筷ҤIdɒ>LS8ӶmO'd|gv_\Sn/kԨ߫zIRA**MEE)#cV7F#tJJ&9Wkʔ/4yJ\O0i`r-d#GJ[4Mg?6h?./8:yysǿ |IW_}fq_R_~yO*..V$ƍyCM7)~=P6 Ah۶maÆv8Q U^GUW<^+rXǿq񁃄 t4ȑА!NW3pXfV\J~={$) ) Яt4a_ 6lؠJ+Vh]wW-ZzWui*&&l5sI{S0/϶mt 'h:?Qi /n /pأ =FCxA~>@~+%'ӑGKMF8Nz衇tWK>)))8qb|Xy?-57W)-ɹ)~!8k忎7Tz]k ~*Iڹeԣy22f8% @|:_SS ({V6" 6_~\iӶ&$) @֭[ƍv 9ʗT^ߒaJM,P))**׻Pyy,$\Sc@ Nwqz饗zjM8}-[/Bsl>hXUU|vѣWNJJl7\%w WZڤfu'4u9^S$iw_DtM fT55˴aOGysT_^EEt챥:6?w 4X׿?ѨU__Tl6O~N Pzz~\.Wi]#)| mYKsRSmB-P~\9>H \6 @[|;$9N͘1C㎋q7@@[ ߪ||E_,eg_KrMa6 A4MUVV**++K ԯa|g-R \6[UN%2lI`8 l`lڴIFjlwɒ%:s*~9U^Ho0 edqVVJHHuDE8uQGihh~3=ÊD"}@UUi*mTz 3X+))3~ĩh4zW]m'_k1JB$zUMͿ-REjnVJWNΏtuDp 4X}7nԥ^G)"4u]w;PSSZw}JNNu~%(==]~_.+qajkW[YK{RRuD=Fcͺ?Atui裏zHg}vSOa˟ϷH땔싔{\ceF#%6 A>Yg`0(I:SO('''/ ޅϷHrl)>W99jȐe%:">o``KuX@u_|5kh֬Y1L57TY-uayc![˖-3hĉZf~_NM7ݤP(@4Re[w>(W_}uWMՑGS99Q`b ` N4Mr-;dM/֭[u멧ґG C ?3ͨjj-o**WssRR&(7RHNXGOp(qfiСz5cƌAp z't:U__!11MS-Ry wK{RRuDǿ-1 /P<4=:uum8]аIO{J_)))[99)'\0XG#@YYc_p\>߳*/J %$*+\\!CN͖ǿ_Jeffj̘1mQb9ʗT^ߒa$(#tvbefXGq_(үkeeeiر{uO>I&iձ `hHKn݅\}ՕFC3o>ݣ#\(@jhh̙3rJ3Ыf3g+Գ>'&(0fT55**WssRS'wa8?Q+V5\vP|vaz뭷>FIi]-V8CNߠKrX#@ N=3>|)1?GC&/ϷH_)))[99)'\0XG1 @ڼyHaJK++kvk9U^Ho0q;ef@ NM8Q~***>7og}}hX/[;_V4ڨ5f>OII8 @k{/ŋf{MMj555'?IRj`p$iea%%)'rFNXh0MӌuK.ŋӧ7ѣ5vX-[L@@W\q}XGwrb1G v(Դi[.tǿSO=?r:z7%I_^~eKpjڴ- C(дi4~xM2ESLQQQBP׹;|rM6MoRSS%I{n]}zZ65|~<oGee ?|MM;UQ|USLr:)9PBez*/9E$kj@ϡCzkú%Iŭ?Iciٲe*))ɓ%I/tz'xb pأʚTUV$oߔiF5d:Ѕ:*y UPp&0[8x|5j:꨽_~uu]'4;](@FIҦMG諯Ps_>L&)6?{xyӃ~` c@=f͚5IukYPŁ@3v|M>bխU]ϔ~RRTFƩȘmn.~@HKgK^|W@ DTYY٦ටÇz[Jt?Zl$^_}yOo:w}ϟ@ aQ{ZeemJoMUTc555uajnn]cǎUQQ d0 ݾW_}UwqGp8,I:c[7љgzn[o>HGRXBzᇻ yTV@sp#TE3zP0B .eg_˕~ cU:oN7n/ݶmI7[]8cr)ڸqLԦMiӦv5s犊 Xbhtww߭E饗^RFF̙;CCݯ,pأʚ!ڹ?4Med[JHn.a{} ƌٵ44HYYRm5HEE־SR?E]{NrnF9R_XX؇b+hƍ 'Zeewvrs/WNΏd:"cQ̷goFX%'[v?mZ%M,H&c) 233/R wx14UVv|I ߪ[fX6[&+5u23RFƬXGGZ@;Wr#+<˵䞘_aREi|#/Kݝm6T4դI(MyG都]55/oVeM{ϗfώ4ne34{}]K~_mmVxWɯPJf-~{??(qcզMbL(QYϕPhwe? mdSZ(!!]h #I[.T^5r:$I.Ծy@w|G&v_Q!){)Xc5j_yrsR߄ *%9}pРĩKw|A]{~"tf)Õyjh(IJIj<%&u*y UPp\UR+ԫ-H2$JHp嚪4xL9C}]V^mU?fW0(!]k 1lҤ^ MO4Qo8thJVIn-˗[_aìLGkUM~yJfRʬQ%J~un-1cURI4{VkM5=1B!^{Mھ]NUIQ>L8A/s,PR$)DG%їk⍚`[+[4 yԞũhv봳]o(޳\$y5WӰuDM;+[ڵrR]~nYq0-ϯ5^{e*y!C׬^k gB_gKB޷2§d.{Q Pzz:ǿ ͛a=~{/??tvPӦm@b*P'ӴFx:_ʬjȭ~;iYe"uuKkXmۤ~,X#|mݺk]eq.˭9o_: Rb}]N=;ZɗSɓ5q{<~嗻N[!ed;܉+jK^h3a)mn}/NW -QUZMЋ:WS~z;b^E rҠ$0z=_KJMu3ѨJYpϥW^+ݾ }Ҥ/kE,yXEp(x]4REڱ~˕cd{Jyy,$\S qKZ^gƶuK~~+m,9ZiTV%NF*ѭ:y7MfzMII(z'LBU{ū&2gߕ&M &{$;* h&N8JGd(-M:DٌڹS:)-ֵHO c~~WQNRҤr}Oʕ+_.{J6ц4YPYc$)Y eᙍȖ,L;2M뇶;*d=FW/3Z,}{}%@?ظ]eeyPMM> r?Eef%-Q*|Ozq Wc`0e34{oR׻YJ|b^oEN8I {o%yR_0íw?o[ |]Ea.6Ji_{[[rv/cTLQRKVGoΎZe!}nߥ/rswu \2Ur}5ݧQis~ y8Ucf{P56j+-RT6TEs@՘jVk-IIV.WUc6Z߾&ԶYZ ~7%JwpO|O9gVA5j^WZ#rz))鑈mnH5`@ cG*+_RBBT~JIi{3\ɚ< 'mF/u5ԆPH^դOrj`0 %v+!\51DvkmޏfD)-ύO|nU"I=2bs׭/TYW:z0y utV7ښx1b}~P70Q|%K믿V0T{Oi0o ] */R;vܯ/@pB6}Q6D!2HGџZ]yQN%Il'g1D t'HU>T}]54{& d rѶofiuN=k5 ^|jjKw~ MS"mu_}KTc?9FyyVnu_s;2d?[ŽtkՖ.^Jۚu1+o5ޑ_q7*4l^IfwS =@ NkF=Xk06ˆa(8o00V_A;v}TH~j2+=u{$*+U+kfδw #,Y#|mtc-kH3ZFrjl֯F5juE"҈ر=^=}L 1QDI & V a +`[\hfT") Dl@D̄}edcelsv6`^Usv.Y`LMm[ܳ rT2EZBOv4thuwٕA cJrr }ݿ;Ouר7ZhOפ.NL  zǿ nG[/ꫯҷ~gyFOՎ[,IͽX7:7^3z6cziT$jn܊D''C!l#}Uȫ9ߛaH))VfRUe _f=k|KI̴N B^meh]nܟ}&}]*zlZp⻚k+hpa|` &ͱmDaìz}I])|k"_4Bᝏ·tW?{po_N#7S+pOռQĩh4ZdIRuuZ׏=ZjvG wbV ܟRiqw=cٚ=Bnwvף9~^zK <~:hV7HZ;vt|anr`|KkjϮga̔O.@5:2a_^~YڶM+k݅ZS%&ZG;:ޕ%9yx| Mhc_ Q#~eg1/PR}/wdϞw ajk7ߔ>DjjgCW;rswARRfa|;vXضR߿|jtVa;˲eRC_yF\YYiK鯽 }iTVϱ}n#uKeվ_|[x6cvi"k+Ue߅a=r^2thǣ7^o\(X 2df.>ߓy?X D8UPP-S~g*7nTb"OuQG'ON8jSuו76ZmkkODl֧% eZu;yyT#Vζue~{k}u:mnۭ'E"햗[ptj9\[RFc}:?t ܲ+zFE]/*{SGAVofwgw8[q\z}),?ZMkSЂirwȁ|{3G7ҺuֺE/~4q5RPKe˖oZ$VBk$eh봰;v+뵌R*Y%**44t>?^{ή{by?z:{4jO{>QcWaU&K[(u%*gUP"HO{j2&O*\Ks"sКKp:/!AZ*llL|*C=[k} Ǝ&LUD?r;_ZI'dMSKӴ]ѫzר]sZߴ4Z= zygkumm-uue|gwA||~?$wii%Zdg[X v-+WZ$ϗZt^!m])|ogVS%>\ЛSvr5{v}Z] $5Gvc}j^JJ|L Pwiv68/o-ǣYFGu&L~Zzt뭷OҥKc_ JOOuתUVᯤ?r[L*tD}vn8:ܼi{v 2hOq"ِ![NSS{fD9lj|]ŸݗrRVf4h)kJLIǚpu='5ΖcSgLQϖv*+]c̟n}#]:|+rYJK;?r`pǝ kð GeF/*4o XґGZ]זs]WO˭̦i}nFjJLkݗu>)GG9jhϨ7hWu1>Mѐ!Vgg_ȮDꤷ߶n?u;ȑk|4j}O;:_ZeӴ^VNu|s{>W[;ueLK~V(֟򩂇uOP]b[4YTIKa=wvj6{Rn7m-˳Vy7=@–-wSvIRuZZV Tr䕚4|10:^lηy۽-=}޾^3`!:[Z~|D*3L֊Az3K翨9A%:(C zo{}m^ϻw!]kvN/މpO|-[6.siYOHD- h7.=گԂt;v.sBa0,[Ϯ1cwk]z"IM>2Gcժ!\ނ9ַ1=WY h|6@ N=ӺKK/iْK.D/?cU~7@}7}RW{$?ZN?_8VvKqiO{Ul9({F^k8Nw^LN_-ܹ.+]dNMSaaP7PG>QӺJ dwddH?l=4zOw4)X2/~yD׷n~v:h$|g@O}(ۓd(;BͶϢ`=E>#W^3WHʛ3 gty\֦<,CH%Q}DF k԰Fݵ4 I_,^ҝ˻d`M5Ws@nyo6[GJJj ZO[*z<=^p8~e[(*~,m C-,^;ZC𘘸- ڵh>y]]"RhutOΧfʒ i<^ؐ%_m Eudf}U}$ZIVnR~ӧ%iþ釕{|SI$WJ[ m E@D("8q:e^s^tNU y^p (wr[4Zz- }=<&ߦ9|:J5'-sZHbip>A9 vysV?dR{v*e=VVA];*pŪ8N;UP^wݿi}lVSumWo:k2T <ɢ$ח_=}ڪlnkG6emSQݵ[JuJ u{\=]JDGk7 ~> :_E:=4W7<߯80W#,|`~P ת~՜-꼬On0}p\ܯ?QVSVˁb֞vv4qH/χl[/_߲^=]ODzoK"0[V3moJoty'({**l{]\SR |/g.*#JU%n-_}(y}Rr}wzބCW?~a[de;aS%ߏ?߿_wlS[7tR…i7}@qP󕟟/׫uW˵,믨,,n)is:uߺ޵@CrGm2u|Gw\4bb$8GW^9ML׻;ÇKg X-]4w`׋{8h9)f ߽Š9ٚH6 ^sնm{fd>Цvmn׆mmf׏UVqQzrwPJuR]*Խ~Vv ?hE`sE۬qm .G rDm='\)*h8dl+m7uluKWƒOÛKZ6ZR;-rO2+|M%&k VKn| {}ƒ~x'VC_MU~;Gnآ%16w`cUj R=4wPQz]7@h[ݼaa(@՝z fz~_=]BwI%%PIIJJ2kUTRyUF[af*9Ƣp7sqWa^~`o=^ӧ?1w ?H>@o?LϷ<50Hw_6SCQ_g[$y/cGU_qoQT}JC_ci-;Oǹ v;$S|| ȑ:wnV8Y_H>{JIHCO6;![}d. ֆ ڰ!Ar<={\w3/О;p<~M}>ɵ9]tJzwoPsۡ뛶:ڴ&ʃ=n.KڱcƪR?WE+!ALEllj1}ʃ/w4hJݺ9#Lk^^'2)C;wJӓ{OIN=6O_c6I 'R %h6>vKFnQiA`P`7g+X5 iiSVԖ}]0v?va.ێu<I%I:}_5@翇Wc0~=7/ҾݩSmƹouϋGiY*)!V:]sCzkhvokNKL-VUӓS7Zh-Mz=Ν@Uߚ r\\!`…*x[{Ԯ]/iwdUJL7ϳAO?تV3?%%Җ*-ߣ|ф;ww^(SW۪ceұА-֚:tZEEZ7tګ᝝CEmY]紡oNg_,jk-a_95t_OڬNCr$ ?A[zӦ_@@ӤI /7 N)_}5AOh ak[k o=&8i <!HMW_/wKQC+~j}~_ثyFW\tGطk>9oȫ⒃ɷ2ݼjCչҧÀs/JRvR|F*+A]):jxUTlUbW(ރ=T uEEyyW3j~h ?x0$X}[m^$*`PП 6aʢ>w#T(sJZzٮ]l  P;8Po@:YP`?PgO+qj'W'ֺ:}^n/eБ̩9TbtUV5O:)V<[6b:ee|~WB`^?aHw4vdҺ0!!Czuѝy=}zWo0eJ-w-ܝNrpCsBz;\0iMYhyzD1MiCƦ5%PhlB:ӹLXֹeGwkE伡R=T7N竼Kmݪ@oyYIh(5}5理4葿9۬ws6[#iD 5)8e3}DE:Rz mk#W {]oլuٛq|C8^öţےEs ?@0 tl5p@uY~KHj;.=V{OgQQު׭^=DE/ klZ\\oORA!99QýG_OL<3Ab^=T|1dHݡs(w:I~C;̲ TĚZy^B8v|N?ҐNnPKc.گnAҨQ{ ݛ`}/ xhfXA>u[ݫduϒxpŦ^<\ֹk5{;`mP曥{ՠT/i*^|qTonVYyߔ n٠f[BBïBw).}{&%I?{p ZC{E~jnc?[ߚ1¥͈dd۴n:­hv,.ܸ%C>4p?}0'o/:'/X,Җ-sE=EZ *i*.^;_۽CGӧ(=}⎑ьnCvs=g܇9fz||\:6@kognݺf-0o˷u]/a$Hn7Zk-N$Î&aHl3Z L$ʕ;=nFC.dvzQv2ԩJO"sXB ymi tlG#@d=0&io .k< Pws%8@V8tT.Ǝ=[60O{̔Ouc;4u=2͉2U^vzI;wN&+=}F0Zi,^A{qDr@$!D]7Ё-0o piI}=3a$:m>V-=!s.f|;hiiRioZuWbUZ$u>C))d <C>sT  C dl -ٷo@ee_xvJ(&UzP @qP󕟟/׫u#Le6~ :6:$p䨨ت={^ծ]sr-aD+.89ي=Jnw f)#J9$sRRƆpZ׿-*h{Il6jJOOoGEVv+SFOW߾:QɁeW`tA!h]5uT i3gW^mX@ۨR# æշP+@뮻i&=s駟nݺIl٢竸X\rv>S͟?_FҪUԥKUTlݯh׮*.^v 7N}>J<6lL--*ڏa"gzGXn˥7z׵l2u]r̙3CTyxp\JLLTQQNg5o W\\aWJJORS'4)@Goc#D?_WֺudX](IIDATw4hv+//OZ~};W^x@xaؕz:u;7б10.\O?$Y,?$fiZhQ;U а5B+e8ro:Ku@X#D2v;w"t**> ڽ{v횫UB㕝}RS"=X}G裏tI'ջŋkСi[lQNګL]=PD$@[uyiܸqKuyk׮!W_}U=Lԭ*I***ҪU4yP:}KJԳԭmJM=KVk\K"a"4?nMn[aԚgl6z!׿$mذAsթ!Cr"9P@ثܡ۟RV53]l]`x/&S+5LB@ mܸQfҧ~;vH2335rH]~޽{+ O 8ūj` J ʾ [ZFKRS'(=JI9SVklocc` u#??_z.UZ]`xҵX┖vrsVJvB:$۷@.2IRIڳ5EE%)aؔ8R]@/&1qp;w?O}ڶm$K.:餓t嗫s!D"R|s<]{<$t|:uY( z"ګ+B%%%:l4k,M<9D/@]n.}KjMTllEE%}ed\)#WtSJ @oc#D+WN9眣K.D2 C6m?_]VUK,ѐ!CB]rX i*-]{Ҟ=ox$ JMceWkժ._(##*C-{D]oRJN uZ@ B7N<{イZ曦?_2DUTQ@-| Rz/:A'buڈa"o֭:o>uMrss%I7oܹsi&jmar)11QEEEr:.&1MW--RUIIz}{LЎ tlvZ]tEꫯ$IaH'I{c Y7@HpA`hߪ]JVJxqN u D tl  Dc=Vk֬ѢEk풤,5Jcƌ mY74݊=JS:ANY,|ttDs=Wu)o@‰izr- [V #ZII {bbzL tl|MP9P)Ҿ}y<{;]))+*4 TZZ2@~ [TLӣcú}aXC]&APSL?eddP#kI`he6%'=Sj99.@2L4C]UUUs?x@&LPtttˊ.K***pJSUU}޽oi߾wWttgNPj%'P \:6z"T>}ewy2 Cr8u5 C?c ?ϗ u)#i*+оK$ytRS'(!a RaPKp |>_U%|>> [Q,OSj%#;e@F@"@rM^l.}Lɧj u:@=LTiׁ}]$JH] (0B]* ay͛7O[lnWu+///ԥ|J߿@-UTlէJI9Kv{FIa"4E]^zII2 Cv]/&N"Rbbt:C]UVO)+홵;w;/Tv{7 JJ#oc@ B̚5K/t%Sqqz-}gKyf%&&TŽ۽C7ߣ2TRe`h 9ÕsRSV\1 "Ĝ9sdXSO L~/\=^{5]~!UH6o_TYUVkRRƩKiJI/S"ڵk5|Zjwq̙k׆2޽j׮URFee_K +6%'vf#D˥=z;zjϒھ)ee]#=304}**X;w;!ɬ[T\ *-mS !LӔjwb$|, qwh{6Q6[Wi׮krn領fK͖M*(+pJa=h!X۶=*/_N|uFQ\\BgԻnqjZ5X"(\:6z"Hs|᪲roJYYnϬ3)Ҿ}wڹyIZMJ%%s߿H#Flw?6[rrVw~!"۽C7ߣ3eʾ׾}ok޷TTLӣcvr8'RFƕr8r%INcg*/ozTP0G{ XJJ:E={UpZx fKi "vw|TiZI? 5Q11{vW roJYYnlp9{ *(#[gH%%+TUKw6_-S99wfkxp#fswh{6VPQ'*,\*)\{RS')**NG*+`22Ñ+Ir:vv{!@  ?ќ9szjiu'4Lӧ/UfPriξQɧnϪ^qjR.Ӕ0(D@d" +ģ>oYW\q-ZX~*((ht֭[-[o]wy=:4},7GIҶmOUB0W.d؛]-S99wf;0m*++v+==]EEEZz;Z 5khʕ˥Dt6y=$TX vhty=G#Flj* tl,YDEEEѣG$wyZf|@kյ^߯:K3gNP?4;}SʺFv{=y|LBR+7n%'2 KP۷2sl7"NC!wo~c˥]bU:**~ nJk$#gu37B V/I*++w~iUBBBTnn~p*+k*,\b$.2 v|^Wȕ$9BX5 ju&Iںuk[MnڿQ W^N{RS+)d%%Vttj`ڹyu2M BU: j  IZzu߿jTYC۷?kdgֻLU^߿8_Yג>JN>Eyy*)il,p Ռ9R_h+>;+??_^7d58r;y=JKVUWQG_iIJN>Y99w()i&fTNݲ:&4M3E Cjٸ;?I'pϟ8Iң>oYG֢Eک↹\.%&&HN3Pūj`*+pJJ>MII'+9d%%,[@F@~m{a˗/i6lX`uY3f-[L5J7oֲeԩS'-]Tݻwoo4i*(}S[U^<ݒ$5AGNUv r8dF+@{n-[v]k… 5c 7oRRR4uT{nx<.˵T.2\KUUebS^mJ=CW }#ɔ9LN0]^o8UTlRA,ed\)#WtSJؐ :=P󕟟/R۽3sxIđf9Ga*(.])!aPC@߀"[emݞy |/SE&I͖!sā+>~TGqjZ5X"7б 8nm|& 5z[eUXst9Lv{WѬ:lL-!DZ@<"\+=\KUUGSNped\a/j37ն@0o߿XnN~-IS$Y,JLG?tJJ֨t}Gr?IJӀo@J~~z0vv+)YUHl,WZdI>lJUP22Ñ+Ir: 4M3E@{s\JLLTQQNg:Ree$šc__qq;oqjZ5XRB > lp*4vo++N$9̫2 k rC]#g$jMP\\%&iz;FQQ-گ͖eeaAUVO)+LӧMB~kALIR||egȑa^ݞ魾]" @:({6oGiik="ի_iZy%PJʙMqqOVkl %??_z.9#Iڷov~-,I2(JK;Gqq_6[fciho.K*** u9@1MSnNSY:۷@kUUG^I>I՚Ί?Nii_}eB{oc@UUU^>?z,eHH#[,XYcO{ -Wy?v2[ )=Rllo9UT\eM*(+pJa84 #>GT^Nk*+H2%IVkbc{+&RRNWLLoRLL/EE5_فSLLoy 2 Kge*'nlmv^o֫|}dz/EtWFƥr8dBRݞ!7h9:|Dϭ5}~nrQQ)!zSS'ָ8i.ڛRbbt:C]€iTYΐ6Jb=U#[ڃp*+whu&cv|}=AUH #JGVj$ lY2 - IB9a>{6oGiijjޢrv{7VbI̼J11\Y, W3!X!>[TXP~vNWLL/sC&jiNo|~M>G}>TYx/Iڰ2 [WRttRjWZל[ kR$_+I-UUNGdvWζL-{ #Zyy  #DK{-yU竐۽CTTʾ۽[UU{TU;psw|$v\O-bbz+6*,\eM*(+pJar LӔW.D^o0{ho|5lymL-};ifr"9P:ھ)ee]#[ee_i*/_/I;Fv{Yr8rUU+^}WkժZ+x}"-8UUm݁ۡHNuO #Z}<t,j߾rIJJVI6l]v{,JK 5Aa ep ‘idU2M|MJTR$*+H~&Y,1db yEuc6EGwIi2_irjWU^oed\)#Wtkgiho.K*** u9G>Ue&,ijMPTSVk>><,;֚*#WkժM'R>_E2̓W~iW,٧oKK$VSA=xS#p沒Ei6Nni~ӭxѤm8pHMZU>OzؤYilN*d*6<"yw^@^c| 3d8dth8|% RRbq0d2(Y,FtiQ5eْ/TV$vjϞԹT/%el3S@ hU))c*(n~ۢ yE`}<<p`c,&E8^5~Zk KK{ëW.@0M׳ݷ^H;G Wa|v|{mlyf::8~萸KN>9pxyMٿjӞlL--2=g:d:dun6|@WC畕Ԛ4}È :8XsHKhL0:\vUQzUQ.'ϣ={iUYS^o|>LRWeg)**I ǫSQTTbsZ'lWqe .5FY,1Nodmy<{܇EC|^#&/8ޥ˴1$ݞ. ̈́SFKl4Iiކiz68Xs^EŦC CVk .ee_U+*6jL~ENaptW|Zce՞Vb<[eޢzki}ð.&ðZr*+7I{G7O$EE%)**Ua;?7Jizdenf;4lg&fKo0cG~cbz=Sӑ/1ifr"9P4My%]7'6 .s$3p9~=n:;tjP L͙3GC QRR4h K. @XvRٳg0:E4aJww.ݮ\͘1#eCcƌ}aڱcG̙+..N[UVVP$-YDK.Չ'={aS^x=:S|r 0 3:s4p@9͛7OSL9oQ.@OdZjK.DK,w߭={jƍڹsgx \ZnUUU)333DUWN[oUƍw}w***SO=!4M3Ed;_/UW{u1cȑ##'x2335wP L͛7O7t~_֭[C]05{l]~媪RT߹и~[?ϴf70ܹO;#3\po߮?80m9s54š=X,s1ڸqc*iRSSUUU2)ۭzdC]8̞=[r ?믫J\pAK<Ngi|!@{!m8UVй瞫l!0^yy.[CYYY+m۶v@zZbzgxr/kkZPj3{1uԉj=ktEQQQ߿^y< =ڌ˗W^6m㕐.HmqHP(>}5x`ѣ5@;j6.|^{MZrfΜ뮻- @a`as9z7LoOHEEN>d-]T5j6mڤ˗SNZtw^  Dn3$7߬?\{l^mFAA233%IVUO<񄮾=m=ڌ;wwz4bM>]< C=ڍ_VҰaT^^Yfi޼y7o&M&mGae4p@מ={t7kĈ7o^[6ޟٳGu-qhfԵ^/s=פ!oĈ߿C*77Ws}iҥ1bϟxIң>oYW\q-Zhof,[L~N@j6#--M+VPqq{=p JMMɓh3q1bD[ vƸq4nܸ &hԨQ = '45oYׯwܡ[oU=p8TYY7RnUTTիW㎫5ZfV\Y#K[6muI'_lgT/ŋkݺuZ?mW_}kҥ˓$=z_+66V6͏ @i?~mFzzz>,ܞ={ԩS'8qb63N=Tj?Dh3!Cg ,/K.і-[#8XB]ȳdGuތJy'Iz7ۻ4amuYg)77Ws!t>cڰaC |fr4h|P۷oWrrkiFsڌ:  #YKgܹS/\ui3֭[ZnzڴiS KIҠA_=}͚5V՜6vsUYY>CĴ}BkO۪?'x.\Xkٳgܹsջw6@8hi_"#Ksڌ٣4I҇~(0t1ǴqB%3^y|>?o]v^kիWKrrrڢLa OzWO߼ys`ݻxbIRYY;+әgӦMŋOkƍڸq$ns 8585ydWz7 /G m34f̘Z,ZHvtG8tǫ3<>L۾`!՜6k_WM4Igz뭺pkNQ_։'.]]JsڌZvt)hݺu;5qDeee}B ՕHbcc'I*..Lk}sW_իpi3>|>]y啵|i3 3gj˖-S~oj„ m_0jNckNѻwo=3ںu$[oǷqB9mFRR>Cp :?_?p 'h̙m[ Ҝ6obO?G}T;wօ^{キ Raa̘12M3ec=2Dӧk.@u O>Z`A!B]`ZuM7馛n u)ڙ%h36@0h3v@0h36@0h3PwֲeL9mݵ9-\P3f /y)%%ESNսޫ6@hfm`f`fm`fhK@<D D D D D D D D D 0 $߿exӧkm82L:UahѢE.ǣ>}oݻwא!CW\)04s̶*ݻwK/UffV ٳ[}?ӧOomwD7xbbbO? EEEzGC]nz)[Iֽ{jƍ>|x`ڲe$–++==]SLe]={˾; vm;ShO"ar8zUXXrp1cv 5Eee??~a_b$iذai˖-SttVen;_={N<67Z&33S]vo u9MF BX,]}r\zC]0۷bccC]J+]vK/m /&5TPP ׫Y,|i.b. ~튉̙3w&3fM6ՙi&1cԚ>}tٳgkժU:3ںu$Tzrssp8t1W^E0 M:U;vԩSչsghРAzj-cEGGk׮z / 0te5<|ǺԿ%''+&&F}߮7ZsAA*egg+**J=$r}nsΑ$UTTh֬Y4iwﮘ%%%餓NK/Tg&La?~PVV$%$$vOhСJMMUllrss5a„:o8i~u߾}~~)&&F:S[oC=32 C/+^zI ~j;hرJNNP>}}*''Gxm{=uYԩvw7M~WxZ4M։'Ν;pk׮:Ӕ_#G[n׿p"A233uתX|˖-ȑ#{n7N;wN=TO֜9s4tP1B|?|no߾}>|{=3FFڵkuei鵎sĉںu{zOK&-ܢYf)&&FzN=T\.=:URRRzwСCokĈ:3kt9衇R=4i$effJ,*\R4iKjʔ)YZvs窨H/pc袋t5i$uM|Iz9;ueչ]|ŲZ)I֭\ƍӐ!Cl2}A\rgϞҥKTUoo eXdRRRd|2SNmr 3fYgEi:sTVV|PÆ Ν;˞wy\cǎՎ;tM7i|{M7[nQNNN;4ݻWvO%KhԨQھ}ƎLթZ|袋tjŊ8pƏRs=:S'y睧?^z饗t)Z.S=$Iƍ$z뭺⋵rJ 0@{z5k~ѣGP~i7@ș{Ljif\\k׮23f0%w}wuGmJ27nXg7n4%G56%'|20vv)ׯy)%%%<)<餓jmo…;:˗/7MbZ*0}$sҤIu^~)<ꨣ].W|Mi䘒̬,~LoMnƚ֫4O9SRsC1cƘ;v쨵ΕW^iJ2oZTS '`J2g͚Ukƞn7 6ԚWUUe~Gu1MӜ9s)ɼ뮻n tY]wJKKNOO<׿_~zd?b|7vZM}4ĉ3gѸq$IӒԣG=쳁i_~yM>Oկ4lذtݮo믿-[4i{ӟ$Izճgt04}t 8Pvvڥ~Z]vճ>[Yff'w?^$o߾?~ʔ]-l?I!y<=CK/Z̜9S?QoO7tSfll~H>F&˥JCyyyEEEiԨQ׷o_I_|}@tm)..NO>dmO?δݻKrssջwZVrrrTUUUoXiL"I W_-ǣg}60Jgϖnץ^l۶Mu7+ԩSuufi3hРnt|>]wu5uT͝;WRTTT X-!8۷7i|ǚ6mRSSoxs=uW˗/oҾv%IJNNn .T||*I*..ի5z&E]Tg^zzN?t|>-YYۯk.}ի9: ȑ#zjժFh"UUU3PLLLի֮][W3zjٳG'p:w\g tu׳>9shΜ9*++Sqqqu֭n;HzF?ٳgkΜ9ܡĉnݺ@jn[=bbb t駟n׭ު.]O>kZۼy&O,45w@ڦM$saԹuIۋ]"I2lŋ5rHEEEI,Y"ǣN:XKsy۶mkU{ P~~ßm= n믿iڷo_}Za o7xz\]vezw\tJԾB%*yn=[oYۨoΚj̼֒ɓ'^?viwҥ曕\cƌQFFF `b {v>C=Zs9%%%jj7n\ Xӵ^^{MO?FyiϞ=K2eN;4?/^zJO=~GivJKK5qD޽[O:T?8z{~֤}V_P0sԩuI~ oaPm 8P htBzgϞ9rdmvSN?zK-Z{N=&OW^y:`^gD BWf̘3f(++l6$μ-[iڼys;kV/~i[zUo!$O.ZUPPm_j@bNSϞ=ꫯj̙A[SNtUW骫izuGW\>M%\5ku]Wr=]uU|.IW_}Ug~+)(@(`J͚5eF-IzGTVVk2|>~_ժcժU&0]kV~~C6U޽%Ifͪotm A췰P/r_pF׽ec4͠u]sUTTL_,׿jʕn[կT^^s=)?i/߻wotEW\q6mڤ)ShΝuꫯvz*..ֹ[oO۶m?}UZ_H~ٳk5TQQx[|&@c`w7k̙r\Ο2ez!}:ꨣ4tPmݺU+Vo~=V _G:餓TTT?PUUU;5dȐz׻KuL&M T_~yӧ}i:s|'nw_B֗_~N7t4nZZ&O^xAv]^ziP޼y&OD 2Dڿ>#ֈ#w!]oW߾}yiܸq뮻MWzz٣/Bv_&QFiѢEںuk7Jҏ?m۶+ L裏$E?^{~kĈ3fҴdmٲEzR~~~_Ӆ^ZG2MS?֬Y&@qmڴI{= 8PݺuSii4iR ܐo]}yuQ:㔗'ۭ^|良>lo[-X@iii|P_믿^C QvvJKK駟j2d=Z3MS/VRRN8^ %''olp~LLiʔ)*..;#׫_~Y_}*K.i jѢEׯ}Y{ AIR=UwŊ vW۶mӽ[墋.oÇ/лᆱ,}ᇚ8qa?S$I{RSSu}i5w\\R?&z%I|̙Sﭠ |^Svv.]^{M֭q|]|M>uc~}233իW&>wqz-=Z+Vknٲeܹs_ӟ'-^X'OVAA͛ &m'&&F̙aÆo++WSN{CZX,zohرڸq^}U}'r8[D999_ŋn'|-[K.hq}0M u8-ZH'|.2͞=;l٢C$n뮻ڵk[ovء{N_|1?Q ,}ꪫtꩧ'PvvPnv]x;vLԟPRW_}z{'ԥJWPgǎ5kbbbtI'i̙uYm^Җ-[3fhҤI.)߇ {1=c. hifa`"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"@"PT P|ھ}dFYLTqqd!m߾]]v u-[(;;;ehgtH oNgy\.vc!CtD:      С_~:thKZ0M u\.UTT$LTUU|>_VڒjUtttU47#CT Hvk.Bv]iii|FL[ljUrrbbbdZeFKAͷH۶m$BX0٣hjB111JHH֭[gXP񨴴T)))8DUVV*Bx$Iv=ĕ-Iz!h&0 wc֐L v!.zJ瞆}4}zF5H'tҿ%u ;vA.w@[hOׯR̔ ߪCGupZzGlrssC]F^{5?^銎VZZ>h]r%zgvC]fϞ-0jݢ)Shu֩^nӦM nw2 CSNm@uݳgu)!תUt+))I[5uTE5yEE:;vA<ƺM3u,!RȒ%Eun/RTJ.J%Yv)KJ"liؗM32c;1\7!!I$e=O?ٳ#E9T֭k(U͚5QF0tPN6nܘnH4/_-ZТE 6mJ\ScƌHSƖ-[hҤ _|իWnEx߲eB4j(Q$I$I$ISK:-"wܑ#8p qqq7z[l2 @޼y#0gnz-ڶme]F|8.{@ٲeygtlɝ;7{> !!YfQ~HQp.\812pꩧF8@GgI$I$IѣeK ߿˔eˎd"I:9$)[7]f˖- BL2%˗Ӯ];*VH8䓩\2mڴalܿwƍk׎xYgyĈԩS Ppa:~7tB(J!["Edi Ҟ(JTGF)Ryrꩧrw;"tIII<+WܹsӡC*X +V`ʕYz.cڴioߞ*UpI'?~*UDNؼys\3uR@ *5\/ͰzjJ*E\ݻwv+W}/_|q'sW2}tlْ .~;{K.}][ݻy駩TyYf4jԈP(IJe5juԡ`|tMiuk@X~=ڵDϟ:}AN}߷h]e˦]i ѣs111PN~yw_>(P*UУGv}gԯ_N:$j~ ?~ 6lt xiܸ1 "_|-[믿/r0uti%I$I$Op뭐11ppmu[3τ?lNI:PF_hí\իqFN;4.rY|9nݺ~Ykԭ[[r~z+ZnMJJ wqGCA/^3grrUWey.]!Cкukbbbq *ТE ~m ,ȵ^]zéRJZ73j(̙C%2c6liذ!իW礓Nsٲe fTR.;w2o<{9>f̘ksoЮ];B>̘1uuQV-Kٹs' o+`ӦM~\q[ &0~x}]n <ի0a˗OuZjRRRh֬_} 6J**T(6/>_~93g>`ܹ,XgxkԫW_%JФI6m.B ()Sx;v,_|E{>c^uj֬UW]… 9r$&MbԩTZ5Kԩ=yVZdz`2f+}rr27t1yΣXb\O?ݻwsEQZ5NbŸư{$I$I$χap(Y^~9\??̛CwCN*IǨ@C$&&v;v~`ǎG(:4 xׁպu4iu;wnR۝;wΰn_S)S&:/_~yի5k@gez̿6lX%J~嗴7oׯAǎ3 of򔔔G ZjYz->:t_SRRRҾp yg 83=ݺ͛7SLIcꫯ\׬Y3:,VXeҖ8ݻws뭷M +W.^zv;CRP!֭[omFʕ#111[c\p:cDEESOQdIƌ}_~eN<,g۶m|\}$''sSBlͮ_ť[7o^zM\=zte\ܹnK/4myܹӧOϲ"o޼+˗/@h޼yu5k'd֭ :{ zAɒ%nݺi(P|*[X[nw%::>}{WNuܜ(2|ƍGZx饗ҽNj+ƛo 믿ctܙN;-my\\}% ѯ_LWwygPgyRJ1|{龦N\Z5 ؽ{7/"Rk fK$I$I Sqc[~ y/;>5ko6ov5kwVƃj*Ǝ?LRR)))ɓgf\O6 o1 K.aԨQ]zL>sL;5kVy B~9K2,X" :^Vǚ;w.;wN:inᆴcѾKYfDEe{s9,Ì32TR*U`ϟO:uҭɝ;7^{-{fڴi8ڵkY`vguVP;3w\.R̙͛Z*ks$I$I$E^CnP*|1K#SKJgCnǡ2w.T~ˢ-[GqUW/_>jժe]Nųw4}Ʃk_JJ¡|=ss 5<cS~#RJ}.ŋO7vfUl(I$I$I:$'ðaн;|=4l'Ee/!6mX %`(px .dJHʓ'>եvv99C.]HJJK.Cex/^ʕ+93@\x@7ݻwCqgX|ST)~~L_Ww9r$ݻwEرիWDI=˗/4qR*UŋөS)\[GE~_s099o㼤^?͚5cǎ *Du05NE~}Au:ZjEpЮԮ pT'Nn{^{-[zI!+sOZE\\\["2;wu\s54hЀf͚Ell,M6MW$|v˗&,]3gEnKӫW/:t\@Æ )Vfbƍr- :4lذҹsg*WLŊf̞==\|t5i҄W^yիS^=駟?L&M\2s̡B wyܹɓ'SZ5ի˓AjӧwM4sΡRJرӧvZJ*Żロ1[jŋ/رc9өU7ndԩ4k֌Yfy˕+Nj/HKiРŋgƌlڴ͛g+x≌=6mЭ[7:,N:$V^ͼyؼy3#G䬳lٲTR9spRreiҤ M4m={dԩ5S~}6o̤IhӦ j)_?8}FbŊ-Z+W2o<7nLÆ p.cRjUWNÇYf 2B6VNPpaON6mHNNfԨQXGy5SN2a-[c=HE2}t^~enmG:>/o߾sHD$I$I$E@bvPc qgatZydcSKiK,PBtڵ~/?0;w.5jHl<r-l3gjY$I$Is,] =.+U  ۺ5-ޮvl #F χ:u`$#I:|,EBB`ѢeLΝQK&)) @…yW#Q:wy/^38X.]ܹsIII}wy{1tP&L@޼yk"I$I$I۶A yy7͛.l6۳{/z+%K@ӧC"Br[Cٲmj>%IGtHHK||C$I$I$Iұm xa#8\[S'Ln8yr`QW?,AtdX(ҷo_KrrrdЯt;C.G,$I$I$I#lvxyx9Ý1HCnP ͚es IRNWvIǕ{~ٳgG:JmܹᏡCˆ߲6m"H BMm5~1b_~9E%w.\ʕ+s뭷[o{HGdBtsTRtM̛7/>-[lv҅P(D˖-_ 5#%G8_F}z4ZlPF[zv9*4[$I$IVA~*}/@٨ ;6*`W(>>܁)$)dzPիgqѡu\s5|g*Uf͚ѨQ#RRR:t(wy'7nLO$ bʗ/O-hѢM6%W\|ԩS1cD$jʔ)9xR ʖ-K(t I$I$Iگ_ [wа!x#s# 'ASRW&M O0f̀+4>>ě9$騳h"r1ǸqW^˖-c͛7B 3_~.n{歷ޢm۶\veɓ'ruTj߾=7x#үl:Z~K$I$I"!v O<o7a\rI6KN>U^S %I ;J:*U|򑎑Ápqߋ ܁gᤓN:Ѳ,wݛ5kV#(Tpa*UD\\\(9Z~K$I$Iw㏡woX {nN9?I:Y(IʖMcڲeKBSLI|k׎+R@N>d*WL6mXxqm32ݸq#ڵ#>>yrYg1p}f1buԡ@.\뮻~.] u'֭H"Y~iSR._P(ѨQϟ#+ӧe˖\p ]tpJ^vO?MJț7/͚5QFB!-[ƨQS O榛n?p}]2i׮%J uYۗ jfHHHW^4lؐ%K'O/W_ٳ3Sʾ[TRSxqڴi{[o"EP@jԨСCAυ^I'D|83ҥ ۷ox~-M6M:Sl}~ŋs5PP! ,yǸq2Ehٲ%K&o޼+VoqǞ9s&7x#%K$o޼sEѿӎ%I$I$&NnOxK/AŊЮ]/Ko Zg0ndҼBt|p `I: YcNrJWƍ9ӸINNfߟurgi͛7Sn]nꫯhݺ5)))qӧ:t ** PxqfΜɹUW]Qti B֭+T@-x)X ^{mںJ*gϞ >*UP~} \3j(̙C%2c6liذ!իWr-[~a,X@*U袋عs'O>aƌ=oڵ# qό3\Znjb޽ԯ_;wR@ \LuWi&N?t ֭[DŽ ?~<.7pyL0˧gjժPJIIYf|W4lؐ*UPPtۼkK\~̜9>s`ϟegS^=~WJ(A&Mشi<?d=z4>(~:UT!66_#G''p>GO>4jԈ *7oh"NV$tRիի9SYjvsO_JJ r >111ԬYN:9sеkW>3L6tPZlIrr2wKf޼y1)Sjɒ%Ԯ]O>K.?iӦqW2`Zj5o]vQZ5ԩʕ+裏;v,} 4HO>}xIIIF4hЀ׳pB~aN/N-6l۶mEi.\8K$I$I$pNcNg{e00`<Z^ FA*)$)G $8Abb?ncǎয়~ vq ~5\L/i׭[TP!/2>]vY~{gϞcƌI6lnks+F iwڕ>b+VѣG-ONN:tA͚53v[3v :::(P@\fsҥA`ĉ}gAܹҥKvJ[>u  'pB_gϞ=~nYʮH}$I$It|%bb o _?|u xcǭ[x ̓য়hv Y-.u܋/8úSN9gyX^}t5kYgŊ+X?8p w[oM~ W\KwwCPB[7xnʕ+c=Fbbb (VXeQQQ^իӾ}l@2}T\9^z)]wK,~89>ԬY}y3,Y&N2UV1vX~gҊɳi`sM7ޘaB q%0jԨl^zEu9W_}u̚5+<Phg6mŊHHH:ܹsٹs'uI;Wu 7н{l;Rڵ3k,֭[ݻ_98_5.h{+v|}^c4n8Cq0@Q~)~PO>^ "뮻>}0m4;t/LO}>c\//2f7{eʔ)u]_$I$I6A P7ڵM*m$I,m܀Ce\^ЍU-[?磏>⪫"_|ԪU.o=]w)UTO8 \*8tҙs)dU`A7oVLf O?oOWX1?|7nLTTA β꥗^SNٳ'[ι?(SLdV|zSX֧VfEM)13Yձ5 .kҤIe˖:Ρ|8q~05۾2._jUu{>cO*ˆ رc'|rŇ$I$It4[ڶqõ| u~=꫰{7u<0^$Ie$J…u ֭!uCpl:h>C:uѣ4i3gdڴiٓS^, Ek[bԩ O7,_-[i2 J,IW~mZ_eb-Zj׮}XsH$I$Iᔒo <'GC&OYE@\X/믇Wk;B6gHd$OWQyg+Wss9s9.]D.]KT,^+Wrgf+끸 u}7nw桇ϰ?dTR/^ͳ|7rHwgǎ^ O^|yٗ/_~HTR,^N:Mq}:Z~$??L͚5y=?PP]cu_SJ*e(Q"\rԡoϧD=~RXd /b  .Lٸq#7oO>$I$IIwS_?FAṂx Dl|IRZ/dXqF͛qbccѣP~fL9k%&&gk}uKohr޽{3gӦM@u_}k֬VƃѠA}v0ܶm&MҘ>Y磏>ʰnƍ>ӸqcYZ෯¡&5j _|̙3+VdX:=i&&NxH:IJJʰ>Xj"..SqƃΖz}޽?8v石y SO'ٽ^iԨof%I$I$ICr2}6X_|gR7q"tcW5Ot,PGʕSNgi˷m]wݕi;3ig}F.]dmժyaȐ!|Wi˓ر#[lxwqݺu?̰ntO W%J`͚5TXCm۶Vm۶w0`cǎMnΝmۖ͛7SfNK* 0={-駟xGYVZ7o^}]⋴{H mڴhѢՋ7|3{e„ /^rmbbbh޼9{g׮]i,X+r3TP(&Mį<=}( ʗ/%\BRR;v$999mݸqҊ"o޼<#lٲ:.Vwx[&| |SRRxYj5j޺u+>`º>>ӪUcǎϟz#FdXk. G% ѽ{w&On{2nܸtw}I$I$IҡPnx߶mᢋqKewäIн;y-I:8$Ixܹ3[kA0k,bcciڴi%w(_N?t~a4iBʕ3g*TcΝL}hҤ s*UbǎL>kRT)},٪U+^|EƎ駟NZظq#SNYf̚5L[\9^|Eڷoϥ^J (^83f`ӦM4o<[Od\uUiӆnݺqYgqI'zj͛͛9r$gue˖J*̙3s=ʕ+M&MhҤ k+'ٳ'SNeԨQ/_yf&MD6mxW| EҺukOժU ɟ??ӦM#99-[iv^uի[oɓU'ӦMo߾YSNϼ;qs9+Wݻwxb~'T­޺߱N9G˖-ꪫ8(]4c+VCfo1bSLv$$$W_/rP>7|3\s *T3Π`Zym66lH^xG Y&vׯgڵ+]Qu&M:u*]t\p p3k/$I$I$eУGnBp!`̓g7~Zeծ}KAv$eΠA83o5kW]u~-'fi> s'pӦMcȑ]n3gru?Æ f͚̘1 &PZ5fΜI|(TP۷/ꫯ&99/#Fso_n=zо}{ˇ~ȀҺpɓiӦѮ];'|¢E{8q"s>/~s=L>ou1|p y'?~ZW(Tgof݌3UV3d:f9r$jb̙L0U2c *TpHPN{ybccӊܖ/_NÆ 6mڐ̨QXb?<<k@⋔+W/iӦq3gʔ)sȎsꩧ2sLnf6ǫQHJJbРA|8_5fY[oeڴi\y,ZaÆcڵkܹsTRUPo*U0af͚E:u;v,wqGB ˚6m… BL8O?krUWGqg硇bԩfŊ 6~>_|1ݶwa 0 [.K$I$IRV̞ 5jg}Q}}5_ n|ix_'Ѹ|cL( !$HKJJ".. ZsN.]JrҊŎy¿̝ իWR -?x⑎$w>n&ڶm믿8:H;v@y㏑%>/I$I$)gپ:w[ ~L'`pkʕ!_&gFtl$阶dɒtDڵGy~.?0;wne[n9ґt̙3O.I$I$INJSjUxpm̙+KIQ^ziZpHXn9}@||0>> Iұ,7$ E?BwȾ?Ν;SF J.MRR , !!… ꫯF:t;(^8gq,]s璒B9"Qa޼yу'мy'$I$I7O޽GУ4h& =p|-[w'm mY힃!~!QpdJJ? 7Ju`I=SK$I$I:}18d7Ugxnxa.9"W,PAܹɝ;7[n%&&&q$I!cɝ;wH$I$I:~ڵ7- nyK_;"UlՅ$P( '@bb";vtIt$''HB#I$I$0ٳzj`rbTy_?X?IRd@Iڏ… cVXAll,'p HA]ظq#)))-Z4ґ$I$I$&}[ÂYNׄ~"y&  7,$l~'tҬ_-[yHG$I`/^}n4Fov$I$I$I$IG_e}\5q=rrS?x׫y0=}q$)2,$I$I$I$IG`|yOb}'7(L\ < \Te7?#q%I( %I$I$I$IRf3>bfKcx40QQN'Io$I$I$I$)b'Uj4yN+gc2gy1O'ӹ+$EBA!IGZRRqqq$&&8$I$I$I%D>8^ͯT* <ѻ( .t:)Gt|s `I$I$I$ItdO<Vp $nZG:$I9$I$I$I$JNf#x񕼴fq7^zu$( %I$I$I$I{7Wn[:WmїP?I$I$I$I$ڱ5xm-]7n^)]xIt̰P$I$I$I$[ǻ<\fr'PhH$c$I$I$I$I:86kwƉ };ywi߹'?$I:fY(I$I$I$Iڵ|{<;mG[6ҁ2xm!|"N$I$I$I$)K%3HZ/Xh=lyȝ;$I:DE:$eל9sۨPP?$I$I$I$ӂb܅/Pj.#늟͇wӚ´lc$Iboaƌԯ_HǑ$I$I$IY)scXqN)e1v.[U[':: %I:9{lٲ #I$I$I$|-; /gJ.$P($IR*;Jq%I$I$I$rA&ذ8-v9|;m/_.*ɅOU4sҳgOjJ*E("vSO=EŊɗ/%JogժUG $I$I$It  {S/?vtz?/JR Jt򻴤CgagΝ\x̘1x6mʲe4h| 3fSO=L%I$I$ITr2Kj-WW*t$) ,tHխ[*UPV-jժEٲeٵk?ӭ[7f̘Aݺuωॗ^cǎ~L2$I$I$I{zwīobWT~Z7[/ES$$ Htʗ/vb__jvMѢEILLd޼ys9WZ 2gjԨaer-Э[lJJJ"..Dbcc$I$I$I#Ū~ =vfҍhwf|>Ht-ߢ"@o!11g(k;v쑎&I$I$I$m sc3x tk+I9Z`իWt} L$I$I$I1!1:H7N]qrtO!66&$I!`ZbJt}˗-[nSN` 6 +qvŮ]>OJJ:$%I$I$I.ȽwP`"/uJGN@H$I"j֭o `˖-i~Gχ)S ˖-t=zеkCZ$I$I$I: %$>"y$n$t8It8DE:$eWF Ǿ{1>V\yK$I$I$IQl9BrYyrU}c ?+h$I0;J τ;͚Oj|L:瓔.66jժѰaC jժjժ̛7/˫Tr2I$I$I$IG{fse}\Vyt4I! H5ɼ[,\^'p$&&uִPUҶm[nvr:uc׮]Kݻ)Z(|wTV-UpB̙C5Y$HLL$66+I$I$I$j;&~[.WSr5jt4IGK7J9İa83h׮;r  gϞ=$&&rJؽ{7 .䭷o~m۶y >>>q}o߾$''G:$I$I$I{1?d=C^ s ]9'%&xSJ(V IQj׮ͦM_Җ 0;3 /G}O?_~z^zE0'))8/$I$I$It$'|:kfsџ0jG0cjŊT^=ݲ1c 4hKqƌ?>@I$I$I$PKH~M\֑@Fw]wsE|raeޝs^cQtO$ jӦMxiA_M*U(]tU2a„$$I$I$Im ѯ+a,g, ΠQ$&<  *w-pSH?IEE:Sxq.]ܹsٴi 6L](:$I$I$IBt çyVmNZ$|| L%$I`@)V| F⢋.g! qW_DJ)I$I$I$c&N3anx .Ѭ kTɓOs)W$I:x H}ӧOA^APZ5̙CTT5k(Y$7tN$u#11HǑ$I$I$IR$N1kfMm lT];11%G-(Pcȑ _5jϦp \veL*I$I$I$RR ^taP)E(I?vt\۷/}%99_~ſ$I$I$I? 7 O䞂q[9<jXŐ/Jr_Yڵqgt\t|Pq$I$I$INBm@||xЩB> /üwQ3;t՛[oN,Y~=v6rL$I$I$I$0 е+4i6Ontz7TL_믇ߧp$IGR6qD}Y[ٳB{=$I$I$I%a].љ6/>]x6ǰ'7q}/_}s$IR$X(PÇn %%… SLbbb"K$I$I$I9&N3!)Ko_ƪͅi{*:u>C.G4$IR:J9TM⭷ޢe˖D$I$I$I%-Zq;O}{G1G9}.(,%IRoѰaCnHG$I$I$Ir o^HH}sw`ᎊ\j*O <ǟǕ$I'J9T)^$I$I$IKIѣW/1se_ش!w`Yĕ$I waΝ˗/q$I$I$IJH~M;w_=t6/LT .er,@z.$IY tIٷ}v5jD\\o˗to߾ۗd~t,I$I$I$yF ;WM7OX.9q}6<\}5DEQ"slHJJ"..q@)KJJ^z,^eRdIRCP(ė_~G/$I$I$I:$LXHFn&Wt뺻4U {$oN,PV/_~!,Y’%K26 t$I$I$I0q"̜ @¤tZ|?1q&-Fժ0|84k/U|z?VJT\DI@I$I$I$Iґ0z4K$B;WP*n)7M ['IQ )kn-ZtPc\($I$I$IE))0|8saI?6$ ɗk/g_\ :ՉtjI:ny[:Y(@$I$I$Ic^ *ו+neeG,$o-ߜX$I$I$Ig kl^<ӛ3{q,7BfP+,er,@ڑ/It<P$I$I$IGsB>!s5f`:aD ȓ̛PGCv~iK$)@)۰aCe֬Y_.Gy%KpS@'$I$I$IR%'л7˦`p\ 7q:iVE$IRDY(`1wq[n%B%KL[j*os-D0$I$I$I"*1 `G7$jpu!Zu!as($I̷~7L\x5kA梋."..#FD($I$I$I%wFp}̊oJۇcO-Kr6^Wo.@Ig%**'RzLzG8$I$I$I]G||<LšyQFOɕ(̽iʗtjI$ J9ө[>R/^3gT$I$I$I:+]hYy \~!2\QYKwG鴒$I:,r۷SHniӦ#F$I$I$IGĉbN[)T/ޝrq!N>9I%ItY(P%Km ~\rG($I$I$I{n+(M7`DJ0&~bSJ$ptI.c|歷bʕ\qG0$I$I$I{=`f4s 6N}src$Iґ`@)ԩv}ضm}#GW^)Rx i%I$I$Ite3/ѧ)ȟk qjռ)t9Ejvm$I: "BҁokXz5P(ݺ (Z(GvJxJJJ"..Dbcc#G$I$I$)sASs 4!T{x\*]D[`ETo~FK0K7;J9XݺuYx1 `ĉ,[J*EƍiӦ qqq)I$I$IJIO>!xcgS޾,<7_Bg(W/.Q$I ;J:ۗ}/_@H$I$I޽У3,ñT / +DwIH~M?%Ie@f?I$I$IΝ0h<<.|>zK/P(!%IG#K7+V !!]vs D$I$I$Iʒ$x x%֮g\R$I$I$I%A>з/۶`ڄuQtG:$IvJ9kƽKs9zD:$I$I$IɊп?{CT? Gs=PPCJ$)Pʡ'O>S.HǑ$I$I$I?gx9: 6@yױ\|3tE:$IrHt`.$I$I$Ifsµ™g3ix:~pr1g$I:0J9)B#C$I$I$]SL\5k\Wgu^[3 Q#R%It,PʡZhɓٸqcH$I$I$kcJ ڵ7{/3Y  }]PK$)Pʡ}Q4h\)S ґ$I$I$I_A*д)Ŷvw،wFѭ hё+IcE(jHʱ֬YE]ĢEȝ;7ŋ'**c]o(bɒ%HxJJJ"..Dbcc#G$I$I$$'̙w/ 㾣shC?O;1(?< {'PH$-,rEqnݺ,uKII9r$I$I$ILXyC 1ꬢ"曡[7(W.Y%I<K7r;vZZh زe )))$I$I$IAڱ^yn ^ \xwdRJ$I$I$[‹/BR~;<[FO@Ѣ,I$PʡBdB$''T9!I$I$IHLW^7;GI)u |O> K­B׮Pl!_?h3$qR5hРHG$I$I$Iʙ6o_w >JPcM`hFPХˑ .I$gCh"$I$I$IrMwov {(QWp7P^K$I@I$I$I$IǶ^~쁶m!>~'1P |)_ E:$IJ$I$I$I:6m/{wCA,[[;@rpВ$IRY(^x!P~RJqfyP(ė_~y}o߾$''G:$I$I$I:\֭_}!%'\W(k³ᤓW; OH$I/A/**P(ĢEX"QP(d$%%Gbb"#I$I$Ikso>En.<:@-It|C,]%K\$I$I$鸷z5<|_\p¿Bص ^ ݻÖ-pЩ*В$IP!ʔ)K$I$I$wW/x=nw$';sgKthI$*C anƌ 2$$I$I$I:B3Sa xQX ~ओ=TVs$ItPʡZl[o @V@"I$I$I?sz* =.N:S^=h a,cT)¹%I@B(t I$I$Ib}7/O> ˗SO'wAFw/L_|jE:$ItxtINlllcH$I$I$eУ N 'oZ> IRO|ڻw//櫯qG"$I$I$IR%$@~ЦMx^Kga`8D-0&&mނbŠhryS$IǙPACHʚ(BA=OѢE7nիW? s$HLLC$I$I$I4oԨs熋w!Cᇡ];(X͛W/{#$I,K7FA @~ԯ_֭[gm/KV-8l(P{a@֮3K$I;t\$I$I$I 5kH8hCjRN)/7OC .I$I$I$I瞃΃O> WB" ?Tc$[ D:$IttP$I$I$Iҡ}; /+VWo@L37[<9ZBWΖO^xAڽ۬:fL qoteI R҇Jq|d駥~nE[U+Pvt-[HK=Uڵkս{wUZUM6[oewIx,Ӿm[sC[RC_SGZJ=M*W*Ě4i"á*Oxƍp8l:>}.󕘘G}T2d=Μ_zU))I:魷ݥb1l(=&37R߾n&Tm;ph̙ UW];vhǥߖ^{M:rDvi+Ʈ]Ҙ1GIQQiwH)x-)}맰n6mݺU۷o2lo4zԤHJ[J~Zl'- _6oe4i$r-jԨG)>s^xlRjР>ݻ[lQVs?\@%[J< 5m*M"=sجY:%YwͰi6`ʍ!JĉڶmN8!˲ ]Gnc;wnIKKS޽j*EFFoTRR>͟?_VR<8vj֬oj: 4kTRDDefJgV?rD9RzYn] e>~ӣ>%Ksv{=]tQ6mԩS'uIJOO/v &hժUҥ-ZիK&O'xBw,Y %4ig?=Hn#͜)=mt&@GmݺUݺuSjjvdرCwqo߮Deeei:˓O>Y322[oIbccφ$ׇ~K*!!A:td.Rɑ1իv3[oJ>'=ixR6n]0aN8>@˖-S%IӧO?7[nڴi&Ols[bRRRԼyso߾J͛wv^˖-y|9o_xnddH/nm~ժI~+%&J^boꫥkK @G-^X]tB_p;w:Օκu$I111.w_~y}… u̙f͚-ZYfn:yR< ;|tE?b7ߺU4HIJN6WzL< <֭[]JIRZZy5kT^4|Wv$5jԨ;w<;TNN ~ALRb1==]&Ҙ1R&ғOJ}H7J_}%]~y''Kkn^o,13|y睧|a9z8y$),,ժU$8q켺u*..Nǎ_7|S'O֐!C'*""Ըqc= \`.^}Ui6%8~\zY /^~YڲE6L tdwʧiӦ~ڵeY /H>%KI&vڵ˴O?TBm(ҧJKkj4魷3gwfM ;|5\ &hΝRUNЦM԰aC}JIIC=dw^$Ӆ.?u$FTx??K&I_-5j$HyI>2g6}A7 |Ԑ!C(**JժUӌ34h }gӧ}Y+-3={ ]屚p˒~Ij {Rppw3H6I&M lx%j޼&No^޽sN-[LǎS˖-աC*,Y۶m%I.woӦj岲3_֭.L+i) Ի'驧L>PэuzJZjk.Tvm۶Mk׮Uv-5k$6T@IӦ}o^{MSr8J ,bbE>}V5R*_~9bwzH4j(:uɓ'kٳKƪuԩ cر)% -e%&{*u/)I:TjVi ijr9,˲.@( @[VϞ=իW/SuԱ hgo/,K;w>;_vZZzxEFF{ڹsUn]ZJ͚5syPJJ]HL:t Lt^oZ(.^|^J<8 7|?%KhڵoK."l nݺСC/0?C[Q'Nԧ~9sӽޫǫQFn {E3Fop׿J>*EFi'OJ'KjMjn@%@@ǥ꧟~Ғ%K?jݺuɑjJW^yz-+.PnqqR|tWf|P{wˤ=4NoFCI<#ծ* P8 .ԴiӔ&álK*|[TkW˓JMN4cfCq&M\V)CDZZV\%Khɒ%Zz%IMKgY…G]&h!M. .EG;モ~ZZV0@kZ=J eddȲ,EEE;P^ԫW/EEE]׈Ull,ec^7H^*%&ȑRLLzJZDMZ\>VZȐd:+QFiԨQg[ Or4e4b)=j卍5C~t㍒Qn,=%HI__@>K.СCիW/uAR@%'_~&RFKHisY{c>5>P.)0@,˲eԲet19[nիJoߞ@`SRRnw9o(u 5o.m. &T~vs4iRjsIk @EqogY֭[QK,#""ԭ[7]yz.իpV\/:$͙#%t-RRR>nzgΘߤIRf&h.@qMw}Wr8ʲ4 YK{<*JJJ*ta`|nA|y#p'-Ʈ]dɒΝ;VRBgH&9"Jz~Mz; gA;nUXO2յkWK={W\asxOgF{N]۴6M9R)ts;vHyYYfHiv< ࣢p8dYBBBԭ[7KzR.]jw^)66Vζ;%'KSx##s/Y"= z4ite=wz1#K|T=BBB.'5JFRjj"""..ҸqҀ&y4zi׹|Եk"#1cgN̮;OG^]:#K|Ԓ%K.wwIϥnBWOVhFHrF^zIj2JHpbG !!.N7׷n5]'v}?,W.6 ^]zeɲ矗}to@6nܨ[ĉ,u7敕eo~]JJ*ծRz}oHgu Ẅvkȑڶm[X%AP%&J\ mmRڰA>\6t\y.Yb._|QjF} ]^fM}G s=pdi8s)I{JO?-5n,=IbtҘ1y8 %nmF  #3SQGU^$:uJժU${DXegg] ~]30/1@gDFJc}%4yIBCV= @Gխ[WnKu饗x>o5j(5JxswK5kV|< tS9w,^5g;/v%4,˲e׫W/!F_ ЉRӦґ#E%%%U矗&NnY6Mϼqo  ;}ZZ`רtRD4c4|mܹwy4xx.9-,(4oLo<A?n %&ȑRLL.9Y2E1BڳG8e\Իk2@%Ç+==u4iFFh ͝kmFzYi vҸqRV+RvҬYRnK)^zI?233\p(++˃|B{338.}ttEOH.jU~##1c{ESK!!%T*˲,Pvg߮թSGQQQ^z*""B))) Gb7!AjVZԄfϖ1nL\"9n/).NZB7Ajر{O{-{rr/߁Rt}۷H?[o5 l$)*JJJXI>?TϞ=u}] I Jj:IRvM7Iܒn22(SH?(K]J;J'OJSJÇ\d: _ԩcw_q]^OK+Wo=bovw>4~jIL4#> n6}JKKS/!䕕%-Z$}4wit͚I]${ɦ߈RddLi2R3%xi3$=Ԡ)*|7<Q~fΜZ?5kի~TT~@Պֲ_~1f̐.L:Tvm[JnHڻQQRR߫Eohw9WJN6] -} ԰ *]tmn U/7]Oz;V"TZP9#} -^,J"Իd߈-KZ\zeiqcid^]JL4#[J@Q_~mfwW㥬,ͼ^hǥ1c5l+1'G?_4ɔx&8xTmerXe]KLLT׮]oﷻ(<&w F\o^mʖ5]pVʈ߀#/@)SL7ߘ7ߘ H7`sW]eB^(:ZڹQQRR@Eqo  ˗kܹںuN82C? JNƍ O7OZLʒڷyĬ۾`_ݥ*u"+JI˗KÇ`$ulg (˲4|p}gC#_ypU&ls~] zxtk*K%+K:Uz 驧ѣ[#@G;6m:v쨉'?+ھ}>s}z5rH'Kii?yݻKRjRnK/IW_-Unoe`YҷJi4d>[ ࣦMjժoQڵ'HZh-Zo߾ׯnv]qbBeqIǏ_n.O6onUȆ OlcϞҚ5_^Ҙ1G:uWvڒtvlJ:W_Ս7h[$66Vζv9uJZXZLv}/PjR >\>\6tlkeK͛KsHH3r>ر*s6'IaaacǎN:ghB ,x}jԨQ5jRSSaw9goٲEAA|4駟rC[$\Ϟ&!ׯԢUDN'3HJ<"=Tݕ-OLL6mtv(kFeiڼyN8W^yE j߾%vmo_/޽s#GEҘ1-Y"u$sԥҫ,˲eg鮻Ҝ9s4`Iҝwީ3fp]/00P˗/e]fW^9pJJ.@ydfJ+Vv۸Q u3.X7bihk23onvW;po J233KsѱcԲeK=ZݻwR G%'K~kIZY*i8?4&Mn] `'ol_r%&~;ڷt dii3qz[҄ )y3qժvW qoAv|bbbԼys͜9RV:zT:*))={̲5M[o56RS>CjJziP(Ҙ100D d֬5j2vhB:HMl_ZoBn#zG8 7://oدC'Mo:t+ &իgwuʈ >Rڵkm۶_unoi>2$7]_Ҕ)&w4`R>R@*3|T@@eW_7|S-ZpceԒ vs;/7犰:}Z />^jP~ipQ#?7^xAIII裏Tzu]s5jҤ$iZhN8!C($$D+WԢEԽ{w%$$aÆ6?.QRدcG{r~QQJl:QMfGI))R߾Ҝ9KAuxm6]veKW_}x5kL[FқoiSށ_@'W0엜lծ]arIL4O_B#K_~iTwRfvW o|ԠA-[( urrrԲeK/PFF6m0mݺ{S`دI~. Ι#\)tt҃J7$]%`po4|ԏ?k$.ӢE$Ij۶,Y*ʾ}Қ5~eΰ߰acGqc~.'K%-]jt*o/MdM>`18X*;;R,~οխkB~Çv#ÇKw&yw饗꧟~O?=zβe˴tRuݻwnݺ*5JF:pg~:v5"aIIo!~K/sKm"(@G=ZT߾}5tP 8P7dB~gG}$˲4zhIRJJt뭷Y:ݒ)S#mcY޽;9~Ꙑv#g˒Vo髯5F4MpH)&j(ò,"믿'|RrB˲zG$I۷o̙3uUWcǎv5SRRnw9&&Px;f3ס԰!a?/)͚eWK^(=4t^IopocM:U+WTrr$)22R]vհaԬY3+N|#e7վgO~ΰa?s[oMǿk5`GX`ML?6ACGZ2< &Nl͚5K7|semڴрW_[oդI4k,*+Op4,K:zTڵLwܵKڳv $ծ-͛gBu-1G,]GHi>VeIIw%zyM]<Z|ῼnfuU˖-`etrrRH^i&)) {ORVVKIMH͛KW^i:;&ED?`'.]\x##c]OTș3'6Iۛ Am"5iҤ4iիW{"x{옴aR|$馛J  1=gi߾uH JKIE71ѤJω~>k~)6Vz3oK=zCQׯZzk׮U=PU`^Y4ffJ!_o eaC&2RZ47w⋋3ABt:`NHK~>gZ3g'="}3<+000r˲kͺmg6je:9~ΰᅴdܽRis٦k`kѢL7uj;9ɯRɑ,04M!_zI_5,˲ego^GU&M4h E;w̙3ڵk+11Q5`/(<sP 9C~^jUR&XJ)i4ץ[MǤ[o5I(ըQ#-^Xwu~7+r8L?IK5}t68w;vy;vH˗K[HǎIǏzL=6^[ x^OسGzMwTCK+w,Ye˖i߾} {?& ~! \5˒ 9/דkזN22gݺY` ~eQV֭ӂ:-~2*-/fߙ3jդ~SFQr"##kw)>/@nl&IZBzwIHN<,?P;~<ܐߩS놇KM):ef(x~R4ukyy 9sLo 1yQi0QP~C>j…馛.(hX3nK^>{Nsr~_QÄp' ^:镤@ߕW լY_ %&ȑ7N470ݥ/ 0 ,@|TӦMu*/o~}̴a邰V-:xnpjU=@.o6=]JIRSͥs{{ݺ!y3]qE!zs|;Kwܑ?Wo@-2R3 ~'o˵kUWI99%Ho%e[yTJ  zWeկ_r|-hZiFs}fiV̼/ڵ]߲LwLp>xЌWxx3'v7Š+uz3W +kYf{E{;o\RDt䈔Uz Jvy CTrI{<*rFQ馛jҤITJ|W+;ۜ)*,]t]a o~*S?7%^m͟/5n\^jjvU ED|9vu?ۊ,) ;|'@!}O/6ږ-!}b>v'poլY3hݒ$áz)44C۶mt^/@b LС oRW@XRWZ;7}8f(+Qݨhmݾ{='%3S(eY-6E-;s\K+n^1@%.} ͛g={Jw%zs ;s8 P>I猕aYoO1@Q"#1c T"5h`UpVi2sSKի=,,' a #UZ[e o|elU-3o<*ZZfބ RӦRHԽtu9'\imb=mZRhٰ]qCҖU*f .xY=)eDON*8))tJ4J 7\RFRVWOu=*ZGzH}xijJK5j7V*]rԫityfp]ӧ={ݻ]̥sᇂAʽ>qy_Ԩ9 #3a*R6R;tj9dsynv?}GBs]-xWҬ]%[(;M̡5*JzA⋋ގA^]3DzI‚?lm<y75`sJ+ a<祫ع35K m:R%<\z )vQK/_͚ut)eiT丑]3ijt&[sp.pbJJ_ߴY0}lYY&yԳF -323.8|)/q{=_&MLw!>-zpYٽ.FDH_~EoHǒ9sF'NԌ3k.wyk5~x5lХ5rpR֓Awj*5kM@!|#$S$_zW3C-]T С$iƌ_Z~ d&S))IjH>k @G]BVX5o\۷/|Z~͛w6tWۻW3[VY {RR@o@ˬ[NSr{&t?.͞mBKHRҘ1uI!!vW @.k.IRF ]s ϡCtRIӧyf͚5KժUu]W6'NԸq*tI RfԻ4ut-RDoF˜k,IR=\YXnZ:uTYYҷJCH/ytkRr`G @.zH4j(:uɓ'kٳ:t`W5j6mڤիWV[r4v;_vZZzxEFF{ڹsUn]ZJ͚5(Ljj"""pIL:t3o3&) ,uԾp[3o]u!wޡC- Տ?'O?՜9sty{ըQ# ¡Cf8Mp[)S^@+@^qqfh_IZT{s=0PjyFZվ0@~/@+*JڵII+J߀c`_'3$5n,]zp4|mwl[yE"*66Vvsf 뮓~Gbbhv4j(mڴIWxHr4vo4]f͒~$RʄJ-9Y7NjF ϥͥ?)c.@@{8PjPz]{*U _?2t v  T^zחnY>\:p@Z\,M"QpxcǤモ,F:rVI$ V>!9Y7N0tۼY?߄/7ڶ|PWOJI:t6G>!=\/RhtRlԯԸqMpH)&Ƴ5@~%66VζS,%K={}̲y֭]Anp{^aYewiPJJ./$&J:H ;񥤘~ҪUe-NYk8 7:<*+KZ.M׿5Νp_.>mI&7u4|mu\EFJcǺ؉ mL1ir3kWt8ͥF6-h#G bVi ̥eI*ٴI5K=[Z^ .LG}_His aYewiPJJ.%'KSH#F Yn ͚%m,ը!/ (+uW7(Vr4n4` ZfMnmۤ5o^yEjEP*I|bBvIuH7,JW^)=CC+UvvlB dP7\oi9"}tt mU.nm}1)o%*1QAJHbb^/;[JJ6o.8>\DE7m&Tb'OJQ0ea Z2S߾1i^/)I:U>\6tl#@qqqR|.Xao5h`}=zH<kP (z&8rd]YJKG䟿o_M@F\II͓fΔtkHbfM [_d4fpXe]xZjj"""p~|_ߢERFtmI*EEoܹL+_6ƍv"^KYYR׮+_Fop!%Z OKNLF(;rD3G5˄nrm>߀ Xegg] 3ҸqҀ&x ͜)-^lB=zH&r P2:KxZbԡR|㏒eI={J_vW 5MFKi6i&3M~ F."99?o _o3_OGɲLw?gÇP{wip)(HjPڹS:̋6wlkq ŋsCvIeI>(]uԥD9RvT.ǏKK~̿R[LG)<*@gH+VY3woezeod4f\aYewiPJJy>Zr4e4bDỬ,i͚ʕRz ]uU(F@Zr4n4` ZoK'5^_6/F@m.JzibA)$D 'McG),%*i]O,Ia!syÇ[S3g̶&H J^(=#Rժ=(ѹJFF^q>파QCSLuJII&w{ʹk2JllbccLNtxB}S js ɿ8)>\OJNͼΝD,˲TEDD(%%EvWr22_7P]/$D 9bB:"nר!9{RRB 8 7:8+=]ZNZF=0ήz hBuEjՒ+$ SYYƍ&z\^4FE_Æf9s ۧ]՗^d4f*h 799Җ-~*9#H[K;J:6mL?';qot*˒TE 8\o/Unkʁ [F:|,kĄ~t1އ!%Z |Ց#a?go>^=sN:Hoo8 7:^*5UJL۱,YtQjHr8l-3ү?$˒U3n97׼9a?<,#Cڰ!ߤl)$DjN*驧LدU+)0xegKZ΄K/5]F4aK./ eI?엘(>mmʄ 1IU]5_E(˒vKH7˛53!o4ڷm-@%C(L[F:x,kЄ7A]zT~P%'KSH#FH% mթcB~#FˎKOp5Jllbccmw)JNƍ (;yRJLO,< EEIaYewiPJJ.|VY;jnh]R w)'GZU1!?gدE >7K|K*s>%mw'ҥґ#RRugY&׼tUa֭ >߀O^kivزEڱÄn5Cnj_a?ɑ22s@kMG'TK!!R*R͚KaaRj/2릤zHQC̔ԑՓNN2O7ӧvPqcɯgO3tcް0SԩRt٦sgO=  /V0륟63FEI]dˢ&~3{ܫRń,=A%/ZۥkQCSrQpyI_l{yÃ}%* SѡF@/,M"C,3t/i.5kr;Rժ&Hm.srr-UrW*s կoo:I&oYO3۶IӦn/:KS wA7W!k%3|m&fXۆ M!9237 8#%M^^a*z.eݎ98 7:lG0e-q 9K;weuK];o^&8rda7L|>t "#c\ m,kC=*Xav:*.NZR:~\ڼ̋65ydi8i7t=SXҦM~E73?,t8Є:w6CEEFJcƔu(_ [NIi`ȑNͽ%%%y,B,K:tHھ]ڱ\:eL &Ͻ~fU=[3eᅲ>̄LǿKLo(syRP9㎰Yzݞ~ 9,jPJJ.JL4McH1,t{}iLl|nU>PjPڻWQt zvm+r`jp_ԩuk6fLǫ3gPNI_|aLIvӥ`3[7{wkWV-*\T֮fϖ .̣yc41رW75C6h mkԐ""kNN鯗e 3IRV$sߍIW\!}zu;E  {""٩Y3 Z2Oj*8ըab飏iӤ[͐+s'҆H&}s҅?,Kڿpߎ&lvNMJE{np<; l|ܷϬ{%& ؽ 6nx*֨Qι1ǚ5 _5_lX ;zQ op }aaҭ'QY4n$  6Xpv5p`ISx薇TZ4w&0b:wԣGlaCwޑbc~|\kuT!zw0??/oȯAK "Y\L,**C`&prw|\ϊŠ#%t>]Z ڲYRSmpRN`>嗛 +Dav 5~Nի@;\>wYhfc$iTipT\ȼ?z@7K|rҜ ۜ{SJJBÄK>ԼB6 uYaK3fH'Kכ!3{L<؄$J%V99ڹ>t@Us}y:\<٦l׮cbrCc+sOO,0ЩW^)ut-sN".΄1=M*2RvoLWHW Zٷٳ|-K:q [yϜ23ӧ mz%]TJO}[7K| w 1]ۑ!j({p9D|7og!OLosCa~[r}[ҿm5I#GJ^T}zۖ8_>3%7)4(|fL?oW'UrCV Sժ&x_O :;&=jK͐˧Opƍf_ktUK:\5kJZx/<{ =Û7߼ :Ӵiٶuv)mpp߾ oje : >!(Qr4n4`gBeYYҼy&7o Zx4it56pΚ{K[H.ĉ&hcofdi7l0ᚓ'Ͱ M5h`a_nnmqϽ_+w,ZJ>%23sC΃*UF6m2ʦzuRjs;3ӄƜC4~ k![7ޮl9wr~ML]mx$\xaVa7m2πatq3ӼN4|;خ4d \Hi̘?&d3`Q3!9,*%g / <2u,(kATTU Ϝ1UM'>T1u4m*]|qrە&8&I|^=|hJфQT1ΩAAeM+OV2-\~ի7<(yΜ1C)|ӎw9q3LKK˿M*H|WҶeQyv 5omBE2eS!-}~j FXaBPիcyy|N*~.yk-lײ 7+pRPXz9~_rInyE+XYkނu7K|'Y 64hQFސzaq-]~yMhe 6a=z\9dծmdEFVaK شmkӍOyo&ո,5ibժII޽;vLZPԖeֿt+kP0^@^y+j(Ւ={дyRńU˝W4V֮5 ZV{>?x/^8Cqo%v!X QQ&նha'W7CnUHH׋rn n}+WqiDڽAARÆJe<ẊTWI_fo_ jݏދ+2ulF_ |쪟.TUl[P&t5stmf*U/7xvYPCko]2KL4!# DWd4fL5e}5s3*V;^/W32R;:Js?z/ or _y{[_UlleZ ~!S o%*-@],IRjj͕P~ @~ĉƍ\ w EDD]sX۷O5jԐ(rN:i.oW)e.55U7ݻ^'yC] >s-qݾpp oܕ1oܕ1c?]yyC|p~\ExCۆkx箼v_njnWceY:q4h2_ PFJ\/00r>+n_l}IPjpxqc\ExK|cFy/}rܨoܕ1oܕ1c?]yyC|p~\ExCO }/3ʻ]I :/cԨQ^ϊۻ9A|9=U3|?R/7Q7{K3?[j1ßs 1Õoןsoߓ5]õ[j|p@^  pTEDD(%%+~qPV7 e1@YpPV7 % mBBB4f] 1@YqP3 e1@YqP3 :š5k4tP]pr8z. /t+22Rѣ/_nwYԇ~;f͚Vbbb4c 6lؠ 5jRxiӦp,YbwiTff&Lf͚)$$Dњ8qeBz*{PrrB~ڴijժ)::ZGӧ. ]b ZJݺuÇ.{ԢE ƪztUW_~Q۶m.9vn&kN3gPtMv=裪]e˗/W``ۭ[lȐ!Zbƌ .@;vЁ. zoC=LEFFTo_{ѣշo_m޼YO?RRR4eF˲, ''G1mttnM0x#G;K/T]vջkce|En3g] /5g=c;kϞ=vKM6MÆ Sff=-X@7|֯_VZ]s1;vyenv۷O˖-;;oرz7u+n  ƹxt%hǎ6UԮ][vKeddo&MȴiԻwo嫯RffnvKo^DDrrrl(!!A&M-ܢFppݙ3g /e˖ U t}i޽]8fdggkպ \x'YYYJMM81•xknݺT*O~hذԦM͚5˕x/E9rW5j讻ұcաC5oyq=ח_~'Nh͚5z7׿ a`@n&͝; IKKӕW^UV)22RݻwWRR~խ[WVRf ݖ!c$z'믿K/u:f߿_@z\`'P˖-ߪK.;v{=|'}Թsg9sFSN՜9s4gxny\nj]vzgua=ҥ̙㎇Mf멧s=GA:fԩSGW։'ꡇRڵu뭷pO3y]{ҥ; qo߾۷7pwﮗ^z cl'x7w3ի͛>ÇUn]͝;W pVqUW|;ڵSǎ{]>Ӑ!C{#|.PXB)))j޼y/4p@IҼy<]/Tcu+::Z~!?O{Fv}v{*##C111Ujժ_~YSZ{~4Ey]tQ'8mTfq-]T~sزeڶmom*;;[IIInr֭$9 *1###CrN>ŋjժ/Wp+W*::ڥ>e=ftM?cuM h̙jٲ \]ò,}WPѯ_?ÇUNIŋp8t%bvYf)''Gv Ushܸ~|%&&JQ&/Arv$5jԨ;w<;СCZt$ڼyf͚jժssTcȑ#tRՎ;cIRHH'ـJ<nj+RzZj4͝;W~}] Ve=fԩSGzʷΒ%KR`>ʩ<5.LmڴQzz{=_0[1bO=zn~Tr9f8}֭6lx3x=jРz-[ӀԠA 6.wIIRXXX˫U&I:qy7n˵ٳgkي%5Pɕ+''GÇϷ. +1m۶z7{nUVM[ּyt 7`*1+qe˖zgIR5|[y5kŋCiРA ӠAꫯ`*Oۧ˗7toJy>W'O矯;SǏwlEz%˲. ,^{5kvG;Vc{wUq׀6((bP@@BDEqőlHD(1wq"6n{\/w>xǶvkGV;U 3E~_4t@ ڵk㎆.(lM֭[e%q9sG7t)QCf͚%y(,]4If}j5] >@}X3naÚԇ5@>vm]T}~۷j>@}X3feÚԇ5kP|>'IzZ+{|vY3fa˺ԇ5kP >'͛7<#5$)W|Y3fa˺ԇ5kP >m$7n\.]ZwYge߿ve* faÚԗukP >@}EQ4t||9S~7o^nV,o+VȀ2wܴm6 /suֹ|:@}X3feÚԇ5kIjٷhѢ̝;F{-ZTiӦ5kV&N/<]w]rˌ=:rJڵk 4 kP >@}Y7faÚ|%QCԟ @(AP @ $%HJ @PVVh"oVל~)++˄ >0z蔕e ]J5k֬I׮]/}Nou]~RVV)S|R%VhѢzi۶m7nL6cτ >?oRE9묳 7SO=U`o{.{j;wn9@Q~6mdĈ5jT:w3z '$I~ӟ~>M%,M69眓ŋ7t98qbO-(W+W'=zd}ϟ$mݪΝ6(={ʬjժr-СC~\ve6mZߛm۶5jT.Rs!(\x ] @W(2~TTTdʔ)y74f)++?_OYYY P}„ )++˴ifN-[_z^z$ҥKsǧCiڴivy\uU53{씕eYpaF*K/ .Fmm6k׮.򔕕eԨQuzw}wGb-RQQnݺey뭷6X+1cƤ]viҤI&O];tUVONnR^^<0IbŊL:5Æ oh"{goRVV3g ˖-K-feɒ%̫V矟^ze˖dMҡC~5_qmyof~{H3hРtMX]|)++7:]?lҥKdy?ҮrK -"M6M׮]kt!۷O̙3tС۲u)//oc9Jk֬\w=oy***ҳgL<9k֬q}RVV$9;";v̤IRE䡇-2͚5˰a /ZCQAUw1&LȲej\uK޽馛f-̈#֜Jeee>}zd>k]Q}fJӦMf9jЧOlv˲bŊp$PBڶm#<2K,/O~sM>}hѢ :4-[̌3^{3}+{{|_^|ofzn- H~裏fԨQ0aB<K/nu.(IrGY;L:5k^{wg}g-Z^zo޻.y֭ˁI&SN6lXڶm݀1cC6lXz?#FI~ڳߌ3of6l|>8ƍ˓O>޽{gذanr=i}fԨQ5~94n8I$O=TzO?=˗/СC뮻fܹs;3;;w>Z VxYxq5jl)"\pA5Gs 'N̾ٳgg]vɁe˖3nW_}گ~>|xdzW_Wtgk׮9ҤI}5!˗/ϗ|{SO=޽{gȐ!Ypa> ><֭uG;.۷oNȄ r_~?!Cm۶^{eYn]>92OϞ=>dҥ9餓2pc*W>i֬Y 4ژQFSNICV|6k,IrC<9蠃ҥK,X,ŋs}}4ϼ$Eƍ(W^ydMM7ݴxת8qb^ml$s=Wc{HR߿Z"I WZU X>s"I1lذu?Ebw\;{[nx뭷X8#$I'ޚX|y9+;w\K5_;(֭[Wg-:tP4jԨe͚5Ŷn[lFūZc>}Is~>E}믿^o}WmԨQEb֬Y8~"I~k׮ߊ$ŤIڋݿWǎƍ>_Eq뭷I#G믿8Ë?g}$E>}va"I1z誶wuyF͚5++V_׊$ëYO*;sOW[8$7jc*\rI}{U׿;T~(ڷo_$)fgjNjbM6):tPmʕ+AIjI&I .6/'pB1&lRtb=(SN6fC˗fmVW5ԳgZ]#FH}Umeee9#f͚\r%UWδiR^^C=^/ /|ѣ3vlyk/~qGVֹosON=Ԍ;6vXF3f$I3&M4 9Vݺu˦no9/N6l27xccgΜ$9蠃j[7o^k%Ib:]?k֬4k,zJ,Y$=P_WYYvm| 5{^:_WRQQQ뭷N.]裏z{o@믿=#[mU1e]xZsݧryxCڴivGiܖ[nu%u7n\-[O?Omf͚+W׾}Zt!Ij;찔gԩ)"ɻ;ͽ9蠃jl!guV:v옱cs%\ӧgYlY,YRni&kү_g?˅^iӦeU߷m۶9SOUjժ\z饩5V7<]tQss\v#V{z뭫1^h?WϞ=~Z85t9}൵x?ueРAygrM7nٳs饗K/sUWS< ! VZ娣ĉ3ql6^'I{?^`{mpG/E]v!~{tRkm}$iQFU[|y^y:U^{m7nnjgJ:tܹsL2W֭3f̘3&EQo7uYvi㋢ȷ,X cǎرckr1cdڴi$T$} ΁XkFegϞu6l{./Bw^r ]UV6m2W߾}3eʔ4G[ny>oyF#G&y7 }-W_}un>ծ_xqTX Y`vf6oEڶm$SO=U;D{Gy$O?t+"IPW~evu]I&eݺu3fL[x3fԺ a|kOzǕ#Ȋ+r'svqQ탔+_Jw$_s'k_z 2$-?|ZnjiӦeڴiڵk***rWfڴi bŊUL6-x`߯_$}-mMڵknݺ;ZLƍsM7eiW^i޼y̙ǥ2ܼf͚:ݻwo%IF?~aQ$PZlYreNZ5OyYlYUwޙɓ'eVYn]:ju<9sSVVfժU9FUZW;CdԩՂQ=XN8?H=x\y>̚5kc;찔g)^G'?k&VofܹImvs\y9Sgƌid ><ݻwr)Yre(r{S{G7nw}wvm@ߟUVe=ӽj3nܸ4j(TZ*GuT/_:a]g?˺u2|<#5x㍪] 7s\oψ#ꫯָgW_k^YdI:Zw|#߫rB3mڴjkMXWn޼yIos0@;c3eʔ;1"&M}ݗw1zK/cɯ~O/_ҩSyswfO]wݵqzhƏe˖eذaUa3<37xcv^z7̜9srf޼y=я~C9$7sy]v_'x"G}t>mժU/<9Cu^x!ÇOͳ뮻f뭷[o+K,w}s?Nncǧ[niҤI :4'xb=#mڴ믿Gy$Z>:횷f_~={v^zZwoLoysWu]I_җr)'?Iv} 0 Zʽޛ_|1]ty#Gk~_d]vIϞ=өSEoY`A5kV9眓>W_}un3m].]{,ǏO87|szGy$zkfy9>pA$tPZlY{;zjve<䓙1cFx# ڵk$sOO^+Ru}.]SOMvrkSO=/| 9r!*뮻Ҷmtҥ͏tM߿ϟkjsfHi9sdyWrue֬YYvmƎnNTTT[oӳnUW]x [I'I&}luצQFKsgȐ!yrW{IӦMsqwG..,ݻw̙33uL:5K,INrgfk&sOڷo>;sIyyy瞼ַM~> eEQ ]fϞfԨQ6mZǿرcv<)++ :thfΜYfe ]Χnʕi߾}ڴi 4t9w.(>hvi.Ng駟kfܸqowܑve/Is'GM7/l…Ks!%EϤ'|2cƌ^{??ڵˑGe}Ə#GfȐ!)"vZCԠ89餓qISO=+& ]f…:uj***{fʔ)i֬YC+/}8qb %5&M'l27yL<(.G@ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ $%HJ @(AP @ STPIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starpupy_handle_perf_pickle.png000066400000000000000000003453011507764646700275710ustar00rootroot00000000000000PNG  IHDR=]=~9tEXtSoftwareMatplotlib version3.4.2, https://matplotlib.org/+X pHYsaa?iIDATxwtT$! ( R(D+R.6Ԩ\P K/ ((E!$㘁$L|?k͚Sgb!pPNIO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# Hzph$=84IO'FC# e˖vڪYfϞmpnH0 {܈Un]mذA~~~͛TsssuQbsaJIIQXXÍnT?իJ*I:w5k֨O>EѣRJq(C/U\ał'PNmۦx}嗊&&&F; V%I˼$UTIG)}||$|}}eRrrTb GQϞ=/Y'hԨQzբE M>];v޽{UBk>~^K[___kTjѸ(@Ν5i$#ӦMc=Anݺz?PfSyx %''py$=m۶]veNNNj׮~GIRȑ#JMMʕ+ձcǜ2e7(U8urrrb<$$Dǎ$+WNSNU۶mըQ#= *p1c̙3_Up`NOu]ݻw/Ҷnnnrsssssy5PBppuqǏWŊi옘(''233u^_+Vbw(@@ %ɬ\~ vMcGGG+::Zw0/gggURENNt0 ĉP;G$=jPllUjU5JTӦMռysM>]iii4hPǖtӳ؏(=<<<$I'NP hu 'P_~Em۶>5j$i;wz'Ojܸq:v5jUV)$$䚎[y\]]Xǔ,$apgΜͺs^ݯ|xVP((S Q0 P͜)EF|9Zj>}P6m4rHƍX,:}$iܹ/|/V G|4aB5 n JfCHݻ sY)ܺu UzujԨQF|4quh)uB(emm7P,k_!5iR<ǻNʗ/o$I,HWNN,h8&ad~cʼ'mQ~i5j &H Є TjU),,L#F(pŢٳgGT͚5tR.]5k]m۶ռylZ^o߾ݻͲ,k.mZZ50`|}}KիW$5nXEmڴw_I?l}GiӦQŊշo_8q"x/vA999_~Y>}t+777}]f}nn}YbŊ,ϴiT~}yyyJ*:tRSSZ.]Tu֕> =Z*UZh7ڌ=w\UZUѣ.X,-YDwq<<<ԬY3۷O[nUӦMΝ;Eٳ!wwwթSG5^zI+W5jUV]2'|֭[]>&J^^^JII۴iÇkȑ PHHf͚4 4H>>>QV\i'''GVڵkkƌ6>ÊoP)::ZYYYmr?X_*UhĈJKKpk^z2oҤImZxʺSl?כo3g+կ_}&N^ziΝҥDIҁt+**J;vА!C /:^~&azjGoaÆ_/$[N/2(++K/vءJIZjj׮̙c|Μ9z_4bKڻwVZ;fyK[lk^zIk/סz-ڵK7|g}ft꫚={vڥ *hذaxbܹS<:u-[h6lbccնm[M4H5vXm߾]ʕS߾}jƌ~7κ… 5n8M)͛`>|x =۴}vuA=%Iʕ+O?ݻ5n8,Yb3 hÆ 7oΝsZ_-?PNt}iΝO?hذa6]y0 iNi{[S? H/h{̾(&Rw1"""Zj3g\ٳgݻwgϞ.X`y_Í7|fYÆ aSN5jժedffiIرcSSS Iʕ+ 0{θ[lx IFRRR2[ݻy֭'|&(q8`H2~Wkq70[0 cÆ 60gϺ'|bΝ3 0mfX,Frrr[nm~6˚5kfxay[΢8sL77 J3=]ڳh۞^8Q] =?ƍRFTԳԵt睒o>@:TjҤO@ " ("#~>0oWj۶"kIa,pN*Uh޽Zn֮]C_׷~+|Ǽxb)pʢׯZn'NhڵPN ˫Hc_5HKKSǎձcG-\P˗ÇձcGeff0`̙={?d hڸq֬Yqi„ ںu%:bbbkΜ94hkޫb΋q=zN-[GlrqƸ׭dC 7OժUy/+Kν{%39e7(N3YWz~4xԶJ+=|򊏏>ONNցlPnԭ[7EGGN:*~UvmXf֭[/mݦ*UO>ʕ+t-$)''fůANN~w߳g+J*97ԣ>[nEճgB/Wڵkvi7|s$/77WSNz|iܸrrrt qnqI~W*$$Daaa?կ_6´i&nں|ӦM:g[oݻwk- c4tP?&(\E<6iDwV5ưve*ml.v);[6O"k'mPixjQ7&3|ɞu];wu&7ΦnܹQ- CWu!Chڴiz4x`jܹtj}׾}ֿETByxxhժU\移KFu7kڴi:}uUUoqz嗯[sGG.[LNhŊUڵt5j(++KouM6߿~jR~4`M:U7ɓ'~z5h@]vՈ#ԪU+{z"'NԈ#N:)##C򋒒4jԨgu7QF3gbccmZ$ @={3<:rWGAj֬kժ^>#mݺUիW/Eny|t뭷jذazGݻwkڵzw9%7W3glegR\tLH*Mp]P9;bbbTnK澻3F[=ܣ]***J7|uf͚VZAZnZwRիWg}/B 4{ァ^xAvac|OwVJ.(ʕ+zK3gTXX^I#<jjݺn&kdVΝ;W~֭W^yEo_#' Jӧ%K-g}UǬlL|^Nh4~ 5%@{[*&*) mta ͛Wsemkԑ,{G]e@)A 1U~>fBS2ԿtYh~8aԾg@YTzTVz%0 ~+x[o~b•eFJvٲvJs4hP2q@AHzw~nモo_JE3[.X`1  0y%77m[i4K馛 ?"BjҤʠQ,Ft䈴b\NJK*Wv5w%yy>Fh4~a5jnDrnS||8SJ[n)t;{'eM|l,$%&JnH&TɓK?/5h Y,%>+Ltt\lɯזw%U+Vwrvv.40S9L;Rצ>oO68Q=Ð~ܜ7KRFfC׮RBbG'PƝfFi„ ԣGY,מ6<<(I:t萺u릀yyy^zZbEWVDDթS'_Sl1B s=ڴPrr<<̺>?(ӥeˤ'͊I UK*MlVzp# qy r0;PJJ~WIf"288ؚ[֦MKݻ~iիWO񊏏W޽'N^ziΝҥDIґ#GԥK5kL;v{ァ>@&M*r[n$͙3GW#::ZoW_wۧ7G}Nѣ_}U-\PsѦM\h^___s=m/\PQQQ,p^Ծ}BߓrO{n7N%KiGyDqqq6ï;wjРA/jZr{)88f^xAGVlljժ>}XΝSdd/_]׿Cc޼yruuզMӺ뮻Ըqc/Zj?^zYyg믿֚5kqKr?rJZJ-|]ַ~W_}UcǎՖ-[<:qV\m۶I&777W{jڵ?/߯?\_|bcc(''&x -_\~x;V۷oWrԷo_=쳚1c{߿_ƍn~O?5kK.JII-qʔ)?}ڵKO=of ?o 4(bc [o{98-zV6+ǤJ5؁T:s!8s%Ξ=k޽8{5gÐ֤I_7 0ɓ'FJJ}as1?hذ%cJ2Ǝk}jH2V\iaQvm#77׺MLLmaƛoi3nÆ /,6l`H2~WCq0 ~Ƅ 9s Imb > aFժU{キ8KHKK3 üݭف IƯjW_}e3N~a$%% 6Sttq}Y8Znm<;w6x Çmڴ)pnݺ w]^̳g϶.۵k!Ɉ+p̮]O?M7_6:t`쯿2${5RRR WWWcɒ% ^l񆧧l]ֱcGZj0 vƔ)S 05Ν;g37l̜9]fl>|غ>短㺸'N9O} (vwPzedFD]A[owp} ( AK{mۓ'Wvn`{/IREN@[ƍOה)SdJLLTXXj֬Y.pa|}}u IR\\Zl)jJUj+>޵1bx YFڵ}WhnfP빝9sFǏWͭ띝ҥ\\\tR=v{ӦMz?PٳgFy{L l]/;vPjjl9{㏫7..NUTQ*U֭+ũYfp?,=5k#GRJ;w~ak~.|BBB$I7>I:~Ǝ7ĉQzz>\D*-qJOOWmƍ,7c9zTZRZBZVQ%h& b|vm4)ڶmڴч~;vEuQ6mqF%%%uW$ФŜdͲ+#/)tX裏cǎZ|֬Y)Shԩ>|xcwnz\]]u?փ>?X{VrIEy /^ѣGkԩjٲ|||۴anݺM_~\]]￿;wCiŊZvnEGG7ްnskt˻^u͘1CӧOW契#G*338F֭^}Kb |(쳐PyϽq*qjذϟ:h׮]Z|eコxم 8P 1c榖-[^vMMM$-_\.1f(7!'GڲLrXa$zsRR`dHqqR҂RD?2'pSL@ŕ| ,mk.JϢʛ7ߴ&8۴iW^yEIIIz U999E??"""0 krdӦMQʕ%I˗W||ud8pf?->>^KRq=3ff͚U`ҳ0~~~ ֭[uwJrrr}VMO۷׮]7\y{O.| *.~ 6mڤnMC.Ұ(ʕ+jΜ9ruuՃ>(B)_;3sUV-ߤ7,mٲZu}] رc*WUvƍ_Z{n>}d`:rڵkgS1zlڴIt"I믿t '2u[cݺuÇ_XU$URb$u$-uh>/HDD  <=:|`&<1], @ 4… ;HNKYYY&U(66V+W%Q:tOÇkذaڻwƏQFY.͝;Wݺuƍ'ggK~zjJnnn֤jԨ*Uh„ H[nUկ>۴iSێ7NW222lٲ+zk֬>L7oV@@MǏ_6YfO>zgkŚ={5x`=3 R  /ش^ڵk-[***JjժGjѣUSm׮ׯ~iСCպu"ٷo_=Zf*˩Y>#5mTzg.$o]رcդIuQmڴQŊu´i&C_F)kb_wܡnݺ]v/zX,ZbN 4Hj҃>CYżqk;TvtM7O>)~~~]j|JJJR&MCiĈPQ(Ǘ_~Y/Lb˯8(?7;Ud&8_{M f͒1;Բ0'b\p(''Gә3gk͹stU^ɭYlj6-77Wի^~e{S P͚55tP5}ݪWz-{r]z\!slm׮f5gV#EY,??|onJhEGG[x&)ivqh(sҚ5kԺukeddwсԷo_{VN<ŋرc4hUJJJƍqkx.-ʲti΃%ww3Ν%@ A3gJ'.6(4w\=Za[nѺuiPGPB?O14nXIIIzW mo(ʚ҆ ҹsfb{j6m${G emRvJP mdfJ?psEmkkז,{G (  @V4kJi]Hپ.$=eZNetҘ1faC94# (sNV6WKRPԩRsc y՜[H!5i" j٬lHW'aK3gJCn^i:3ѹbtcVqΞ-u|>D(22╙_zWP=~ĉRRŊR\$KRݺRܜZIp9;%љڶ-ѣ3Ν+ngXW_{<%aƍX,:}C)~aEEEVO~UH/c&L0/^}y5=TT⋒4cҮ]KmےIOC!ܦmX IX`]6J{־}'LF-Hvm_Gf͚ ܸLb]~?,,\]]UF KΖTxr$ʂ[_y~gAxuܹ#`yU&= C1B4o׭5+-[fYz (yѓO>GyDͳw(6η:)Irq O{eCDbŊ~?P#G[o֭[+##C;w~ݏ)~>٩S'͙3GZb1c\_%q=PgHKHVIkHG%(Fmڴ(TV)l2+''G+Ţ޺ͣ>K;w&N;vXΝkԩSѣ<==UfM-]~7o.777矷VJW 6jH&L]b}~Z%IRn ///իWO+Vw`Պ:u ~ژ#F_AAAz4pB4.]^ziQի>}hɒ̪y믿7n$=sU<==uM7_TVVu켊ٳgzrwwsssSŊ'xBڵ佺RmڴѡCSOYcG*UׯEg~PPPڵk|uV/_^j3335l0]كb(6׎ӧOۼy6mڤ ]zM":zXlj׮-OOOJOO׼yTZ5hĈϙd{}w-*4< mVjذ~Gs5kT"OOOCӦM+Rj@RPZIZV4ɬ|1s]wI{K˗K-ZH11҇J Hcǚیk>_@z;@)Aew}nݺ),,111VբE % ;PJJ~WIf"288&ߪM6ۻwo=ӪW޽{[O8QzΝ;եKO#GK.j֬vء{O|&MTطn*I3gϯFtt222w~ӫ*ooOOOo>H}>ѣG[׿Zp̙M6)99mx+V~IwѣիW/k5>>^v$Gsݻ5c ͚5Ko/&w1eff|\^z%kt9EFFjK=3>}GQ\\6nܨ={0K7ߨ}#͜9S}Yw-_u^xAGVlljժ>}XDi&=z'k}X~mkSSz0|=s 8!mf&D 3~̛gI mQfaÆzGԳgK'5j}hBӧOWǎw^UPAY xa`5k(,,B99JOSmNZ+;6Kf[#ggnFiƍjڴ6nܨzJ'NTjjΜ9u֗!ooo+W.v?#IzK?:uw}WUT;#Ţ:uѣz4n899]yAÇu}~no$ 6L/uo1cƨ?w?~zjժVZjٲt_NNN222.9߱y?Y58zh-^X>uyffϟ_`[U0~z^ZÇ/4 |||lbTMrxZz,Y͛+>>^ٳ%\/Ԁ4{BV͚5ubXǼRǏW%ISʕ_W^޳^>#?~\ު[ڶm 6oAra=Z]vd^zڿԩ~[;wj͛lٲz=7#G͛77l23ʕ :u;1}qv PJDչsguܹӦMc=AI}-_\~%kaUl%-=}m@ǎ}pEFn+\[ƍOה)SdJLLTXXj֬y14hK:q$)..N-[=VZ)55UVzǻ#FO<5k֨]vl⿘5%Is;s挎?͛[;;;+22RQ;m޼YٳjժB|z-JMMUvv|}}m 7l2y{{+++K۷Zd9LeddL7lPw}ׯ;C`c˖-Zl>BKf}]:u{G:t[li}ڵk+..κ!$$DժU ^#54ԜĉSkMi޼9IO(#㥙3!Cڵf!_eݻKNNRfbs(Fln&=Th4~|$=|dffj۶m3fuڵkwuKFF222ϓhO:Vm/"]EpĐc<q䵬6ʕғOJg^_nҊһ^qCCAeIO NRNNBBBlhϞviǎJKKSʕ駟Tr]hʔ)8qUYˋ;*$U(|7 6mW^QRR~uuuUNN3""B ð&h6m$U\Y3 $uq\\\.{xk`~UT?\cƌѬYͫBBBuVy睒 ۷N\Qn]Ifg){  /`]VмR yڵk+;;[jr63qGZ 7X7mڤ{Wd&g=OLصjJZҸq/RF$/P6mԫW/-Yħz޽{WNhs]4nXRU?8))ISDDDǼ]WXڵ/Z8liF3ѹbf5gݺRŊRZRFo[K'HzhݺuEv̘1dd&ުTRaM@@4h wޑ$yի V8XU\Y>>>rss1ӧk6lݫkԨQ KsUnq_^ZM<5jP*U4aML7oV@@MǏ[[lաCUPA[lɓ'/y/*ToFm۶U>}xb <ӦMShh7n,'''}駪X[oUW'N̍z^zIAAA /˶ֽR]Wuwjڴi֭\֮liZOooeK NپLzֶVPtOarvvm?~\+V,cW}nV}r֭[+''Gmڴd[Xn]UXQk.pO:uR۶mU|y-ZHǫTVXY 6?$ƌ֭[{Q׮]e3o$M:Uk׮U*UzsqqѢEg5h@&MdMNNN:VZzjS>}4`lRر ܧ]v駟VZ+((Hcvjڴʗ/M6{zꩧ4l05jH7o֋/xձ_O>Q֭5dիWO#FнޫٳgK/oVU;VM4QǎզMUX&yN]tQZ4vXM:5y~+VoFoE^{55mT͚5b kb?Tvv"##5rK<|IEFFرc+WWעEߵ|jJM jժUzꩧ S|l9,7..;ۼOMzMxP,p`E_~MREj޼~mIf{̪Ujذaz=d̙3Yw98p@իWdBJvmmWGnn"""ԫW/c=={￷w(7w})mvQi*eڵfuTٲUZҖ_:~|4s4dkP(RSS~֎ZFiӦj޼O4 4ȎQ_4gs/Irs_СCZfZn ;:pkPFj߾rJ͛7}gfsb1O?-ub&DitͤZJEȈѣ36<IOYڶmk}75w\['OԸqt15jHVRHHH|[h^/GԡCm>'q899iܹ=z -ܢu]9 CZNP|l].4_]{fIJho #=]$H..wHK:IuپLze~N4VJ%$, 65ug E$JhEGG[ P6v풲 e[KRR6қoJmJ(\FF!v>*2QGG'n0`'OK}JWKӦI#GJy9QefWB %$,SRɳ}؟@D޽R҉f[l3/'JUjJHXeJI*"_[U GʕVz).""LZ! f OR%知5ʒx=:SaaCVp"2;;UII딐L+/gg_vRJImqwy!3<@);2a=H HLχC&*8%IϳgVs>Q)ڪP_+99\ Ou'"a;bbi\YG?_ʢڶ-RU_ɛ8Y,.o *OfTz!>}Z_~{:tH*_7n;nw@o)*Jڽ[ZXIIUzo$ йsr|W*WǞ(DvQ7N .TXX7o0yxx(11Qmۦp?^oHRϞReajֆJOm||nUd%7,Zƍ5p@m۶Muwgꫯ_i%%P<>@z E ϥ ʊD%&Qb %&RVBjPB@Db۾4QLLrrro>~yp7otRGI|"o&?]]'nlX%&TB %'$)W^^ Y-&Eq믈$-Ծ'Q鉲JOjWt$EGG+::G73m?|3 OJJZk<&ggSZ3I 3B>>MJ0b(IO8y)88X]v$=ujѢE s2Ĭ씤9swޑ<=ӥH)9\'a(-mV(1qΜ,)G;ϯ ϲ*<|\]8è]{=u]ծ];Zlʕ+/!^W(n")efJÆICCI|egQR:%$TbJef)(;ݽD1f$=0<==gUZU=5|ڵKmڴɓOh#P6Lpi{ onddљ Rf5.%&gnM2lyzF(0v99p'oFY@{[8 ooo%$$jժZfF%Irwwٳgpu<=&M)Sc=Ty"Off6I%%WbJ%&PFrrP@ݪQ-vG5%'F裏qڷot"IڵkUf+dҋ/J{KGKV(C!jJL\:s{F<bbb۷v7\驧ޒڴ6n,x[ۖm))JOߥ %&ɓO99g$׷*TxPAAqE)E{[$=R?B7,i ㏥)*J7ם<)}ԳT,4eչsmqm||nUd%7,-JÇjժEȑ#TR1F\4uŋ^&5;vOl?QrOJHXJKM}LpPʑD֬Y3EEEGUf̙3Zdf̘_1bD G \^bt=ΝҊRvAVVW+!aW);;A.. @i8WDyzFH\])'Jݻwkj߾0+))Iw֮]ԤIkҥC.qYy옴aTQ=Ts.ә3$ۻWP=m&Ź1<=#ӤpgϞ?СC:{ոqcuQrCpcطOA͕֬ԱwD(i:}[kܹrrP@ G]^ddљ "77*}:IOtwoij㏥wD(t%%}D3љ񷜝AkV``Uw"" ok֬Y+b]޴iS=ڎ,:}Zl{lԡ#;wZy7='wSAAN99;LU q5nnnnJKKCD(KN</u(9"}Ԣ}c+22uL [:Qr%$,SB2&YwZt<=kHzaT^] Yj*EDD)*NvIOΞݧXI9j! *_XpBGƎtWaaa1cdxxv2OO)&ƬtqBC W*<c>EUz K,NCׯtB  77ߔ^vY|Ԯxxi„L:wV(1q_f?K(*@iAS7?M>@rsw*T0+<ǎz0ԶVRRbK;:);$'yy5RJ&7:{v#IWҧ3p 7n6lؠ'N(77f}bb"_zH'NPr/wX(֭feKKRppSGjޑ:ffЮ]ܹ?l7+''Wլ9ú.%e'C=k bwH*WOFرcT.]ep\i 3wRfrssNUrV%&PBr"(t/%?aÆHBCCO+*88X$=J z78[ {/>8 kY+o[uZIIk%I.''ݮ:u˫lIRzz+"b<=#$Ic*<|%eN::{uSn&ŢmbbbTZ5E矯X۶mSNNTrQZ%%I"U. ,լ)}yԳg OI*_Zff,>SjΊ)!kIRnn?gǧ||ǧ5a]vqB-TիO7 (;xwkܸq[bKKKSÆ #gϞO4j(jѢO;j޽P$QFξd5k(,,L94k֬+סCRVپ駥ڵȈ&9$I;RR*55V2.tR (*a7HJ۷on0 Y,\E_~Zhf͚wޑ$J*>|"C=tE1%''OgΜ.7o 5oEf O>|}Cå+iv)2RڶMjQOȑi*UV@ݲXm-׊'_UKȩS~={iM6O>Q ~G_~O2E <@2dԽ8.N_Z@0 ʒ/6+;w4+2-_*ǷEGg*,lMeVi:NX r7a4mTW$=o]E~̘15jyrrTRz(lr4k9gK;۴a +33^MTppw9;(!:qbW0Zj׷ #SR-k'pÇדO>gyFח .jYǏY~qUXBnnnrssSLLbbbiRi ٳR߾-;2ǓLs"}s7[Xв`$=0z-IzG, ÐbIBWWWEFFj$j6lu;N~mX;y^~laǒ҈RJ4*Ers'ͺ}|"UCZ=8pu/55U?66VZFiӦj޼O4 4:uo_V*WL{Loх-j󒞆=JIE[RSce/5ugiY 'pu_~Em۶>ϛOs;wz'Ojܸq:v5jUV)$$q1dXAAi|f@*.lQ'I@5M黕W,- #f2 22$ѡC>Vu$IrN0 AYt:w,-]mw^BQgΜ/?UeaH7K˗K[ϝW~ZV),L -.۷KҶmRR:e=ұc \B7||\sJIٮm"97,ZTT; *X%qizi:K9p@6|<~4aBo$).^2%|ٳuG:~|e( M D@"R-77772 %E;3ɹn[nz4+)U/&M2anWRƙ3mqI%_󓝝'?uF99y) nW]=[Ժ*<|\]KAF"-ջwo,ŋ5`;EV>B%$=Q5nXEEw}ʕ;ԩ#8S '9Crr6{̬63y-ʗdd+3졛$i)-m P@@;իɭlz 2IOzQQQXuQ%puuUjt})/::ZEz[J!ծ-udVri#9Pĉ6RSY|%'JKU޽{Z"Ζ~|%Rf9evғO՜+;Rǒ䟔R+kۧ,p\$=0h*>޼$4Լ]-Ð˹q,HmJoa&9#"L'ddUbksF.. T\\\|GMRxXyz֑$yzֳs~$=1s4qbǏ&L1Lr%:%U+g̊ΦMr%M ޸8{-77Kɛ`Vsdo U;'R~))u$O" 2111QNNC̐!R8iR*Zi3e=ƍ[v˫8ΠK$xUFF!rs>wokڤuIKyvRժ+0\\J6hapd̙3w8YZIZJرΝ3̫)7W&];umpp^jl׶m&&ԙ3?\%9VvVPPgy{7Tc\. WQP W;u>ON{gfcի_SJKڌxefoLz_ȑwtJI٪tVT``'vrq 㹹z #tp999;w֯_'N(77f7|cݵ))IZ&O6KNE/8?xQǎ}`ر9AA=u-^Q5'Hza<䓚;wv[nE!\7׹y4|mBCW&'K_|a.:|$5ib׮ԢjsqtVbJ%$PB²BwA5U'\IbEcΜ9܀j3OQ[^8Ҽy没CMɓBBsy_Xmod:th_8UJHXO5g\\*(0_3NBɓڻw$v*_eTm^e$=+}?o֭R. OάQ+Bii;eVsު*UQPPgy{7jNHzai?rss%I0`~myzz9#&&F111ɱw( VmIK H沋< sby3Yt}KǎI +No~vy^x͕[*=}$)9ItA99;(8VTsPY #we@6d[NZj%I4bo^#3gP&^-u$Z%uXvѣ̹9S$ggys3٥Td-\x>گ&\TD֭>>*27,?g}6mXuEի %)\TPAvhzwܹS:p\^TYٳT4cF-k/l{urr))i֩SUVqTPppw rrrWzz+"b<=;î7 IO8-[j?%IgϞĉղeK;G0SXB?Kueo%I͎n40nn/\hש#5iRxޗEYYJHXSRbjã*V(゙̋ $p3fPǎUre5lPckv̙W6k&;W0[^,!u(=vaB6.^*z#;wmIʑO Up#Kħpu Uxx;@,״(ӵpBٳG~Α]L, @]X\BzE嗥.]e+JIIfsRibZڴ7å/&MUmaIիNU&L0 ԩRSce* n߫nrs wPf}3*=P<==c; P\̫ZUJN>LLΝk&D/[3oߢsCHݻ 77Kg|SҩS_+#㰜UU;\9hŃ'JKsrqqҥK ݶ{aJk>\6dtYzmm[~7rT%%֩S_)!aVYQ Wb0@@ZTT; *(**,r|ZW:22ۥf϶M:JfSNnL|^N^/#8ԩN!/Tiݸ9IOj>(&&$.wa͛jn3]\_f~2ffÆҀRґ#YkU_*;ԩuWJNIE~~w覛^Qppwyxd@Gcݻlgffj0`"mX{g2F^f@9sfRsѢ#GmUZ56\.蠺py_y9Uman$-Dٳ{NSg2%'ƠAԩS'UPfyJJ t$=( 0 $-X`V{Y!5m*g&8oUX7OYnU{s:}23ť_W@@;9;{;L|0 # o*".lg&,""eW[xkc_OJM59RjVn*΢(꼜yZ ee%)1qNJ* 鯠{R,(7n,"Ţ[ʝlsrrtuɎp㹰}ɓ}ppJfѣfr3sw+Y,f+DNV2oRQyUN-r/>ε,8wm30\UQpp<=#9ޝGULI&$a n{Uq)VV[-Pmnb]U?jUq_*Dæ$@uf2s\2IL2Y&!gɝ9}IOy3gΔ$]V~|bbb4|p]paC_$?Nes:\$yt)7cpqTYٜ|e{;;\֙D$]fjkkWBKRRNѨQ(#l&@DpBIu%(666nnll4v?LnN( n<ќ,(0ǎ 5iKZH{,Uv\OmTeGڿUْ~ PTCIODٳg;Be7FW.fװ;wJ|`.;W5̄qI_oO %&6@KNg\. RWWr8FȕS@Rccbb+#\edeƄ B'"jmw< ӋimiVںc9ӟg0Z&x˛P=KUX/(y|'"Z}}?hD}r8G?_ I[HVI֙j_o?\?^:\̤aI))-##.X^ov_=,fcc*;{n.p f)7V]G$=1RSS[ip8φ12l6$t0 ufciipe\}s9`@GtN{cuoSE \bÔ|$C{>k:]pݞ*Yj.Ubb; Hz"bwkZ0`MNիUTT_~͸zT\\'GԩS>FEEh˖-zObtckܲEb8ƴ4isͳjOH0_GNety:jSVuz**Z{]\$,Tbbo]LLrrҺ@$=A-V'NW_?_|Q ,c=iӦiɒ%:ӵi& 8P6[VJJ֭[ 5hР=@kl ܱCzD1҉'6'6G|;V ̩eMq&3/T^B..IRBBO+9y"egPRp]Y1bQhO BDx駕.^zIuuu={v;3ug\Ӝ9s$I=驧.IZvmP4h&N>H^xa+Jnn9ftŭRN"ҥ֍5q.nv 9TZVc6eԬUMZEE*9yjkZq7 Hz"b{ZtiN'=rzjqeVU3fg}T%%%r8JLLTee>C]w:r:UUU]?8Ĵluo|tcLvաakuT_ongI#F 3ln9z4tdyip2wt9VwY+AtF(>UU}'یy0ZqtIOD;vhĈmhǎ=zvРAڸqcPe? Ða?&Lp{W -@V?u)=PzԩSV-XJC Ad{mv3g6:| 鮻:Zu=d&3tM~УȐfSIII%%%c5W~~<OHnfmv H_.54lJH_|!!%7ob&2njN?8IIyBne,Ҟ=K=X T\LOabbbvQwseNгa; .KW\q^z%EEz׫+R=bbb\b/3gM6MSN#<;ְat 7oֹJɪTR_F ǹsO-Z69 .M7Iӧdffۿߜ,.L)^ݜkiNneU]FWOɫ8I^o'TZX x â19>Hz"l޼Y֭S\\&L.SSS[J:(=Ú>}4l0⋚={.]Sjɒ%73ζ UY--(0P}Y)77-igf64H7H~k7cSvVR4d9m3Hw)w^ t,e=찇T[AޯģuTQQI~9ZZrPO#=^O]Gߌ'"ҶmtaZ|v￯ӧY>{l-[L?Q嗛_*os5gڲpҸq .qZO Co&©'?88BX]F<6bҤHz? 鉈QWW/$ȑ#5| ozl9̺s mWkspO8>'g>*|vu6oۭR#Dĸ馛'hɒ%:3~z9R-Z/"!(.BPhϟ~]MdҬ՗γՙgWfq~ q(4#6x:N2jjycc}3ls2zMMvsInwHF[]yuZz& @G?џ~σIgK=uQʒJJ̋hUkIf&<>=t7-W?&ƚB_V沦ǖvjn]2,_׹ߌ}W^ы/c9Fŷ|ozҡr#MG0kg<ӥKCnY_z-B7w5iѕfg?s|?ob3IݾJY/|<8d=++?-$IIGih3O[r5Ϝ)]zyj[vT(:ʫޗK7AcnzMƎ W˜zv @:*RabeddUT bRr >JK;CQe^K$y.-m$)&vxtE/·N=!+eyf-[>˛/.WKfLǮlwb^]^g?=39aNϷ祗^j}~.Eִi 毿ohvl}TC -=16lؠ#G*11Q֭ȑ#n:tIBcz.ZN$kJ5(I TkC ^6z3%Z]ͳڢZPޒ hy- ٥+zM:sH<7荇 t-JkἹu!Tw^cŋttmt%g&1@ܧIBD}pp,ӓ]zٖx-{LϽ1w }a)^LRT +EeP$i|r6wfY0:CP 䔹lF%$GiZKͭ"ZHb1Y,?ߧhM:I]oZQ(M$Mҁt:[2|[ J7ڃAεr%0TQ?xpvː[1QJpPj)j]E]-~\C)}avtQF_qt53;@ݏ6]\$+X!eI#F CL^6%=Ŝ 6s8D-f K~9ّ8E3㦦-g馛jkԵzi揩뛧ϛ՜Ni}yN;աDsQ))=`Ko' tӣsUnste֙izy_˫{Gr-JDX=;nh.6K!96FXƌ/TW2Z%.Y#8eY ZU&mPXyk6^hJ>Gי2$3̿Goמ(9^ξ}NV-QWeOW\GO1[kh rOvj/QR堞HD1"%MiJmݲ 9(|BZη_Ԟ{߽\Qv`^sCt+z+q7 [N-+/7Ruj5ɮMSR_ l&Q 0V󂝚j&Lkۤ|nj+:ܦy̮D1r #|q4ߕ[-WBOS0ѿDugj֬YZlΝZ~>M<9!zjyӵlbk%8h_-=or\NY,ҮMZ%G3FjXCHGjDY,7ͷ.oocg{5M8ubb̮Z#Π~مa~;;{MolSڥ!zMzU*U^LQ?+:Z%Ikivh$sueqr:{GOr)Z:IӵBOQjcs4cڨ(kN,V19hrKʷj}G>z^B& QMU*|CYuN ӫ:Og >YdhI3{zog>oYdhSNeXY3zr~Zt0=6h,MV쑱XCb4xt|:m_w46ZoW[#~:-.o0v~Lr[Tl fM+I[**ʫ$ĉ ^[} ik[_m6jJĤid޴I hV |Mw<|$)Col5HŊGgy/^y<@k2*`I~x{ Ck?o9Ԥg`iذB4֝z{-u*+pi:2{2JRD)c?c{٦i쿛Tu$ZR5ZF#KZ&*V>LNVeJ3{-ܯfǕ*ix̞ |(uPˢZ*[V.7 *-{![Y_]zϸyuǎi/ fmolaqE}=U7Oz_ˑ$v`'뱛 (IU:LȩXydR뜛Gև[3{ܶ{ׅ*1Ό.qkM|GH;7[m[r:>2ˮsړkN}~tLo2UoTEgDU*p4vHns7/l]^n>6MҎjƍȑfb0l FW*+3":ԜdӠA_bco_:dSR%Eر"lmjӼe>=UҮ]󯾒yG:|^cvvY}ʶUU50X̿IBB岯n?qxf3/|uoՖyhVϡ^;kXѿDDot}iݺuѤIL&Lwh=mos M &&lXX\x`^}ˬ-e9hEyv&E^CCFl1d*6ڣFF5nmTͥXKΊ:jeSuN>wcQa#Z]5պU늖aQ --ޘج^y Kȣ(5-{wDVC6a|o*V5b̺ojJuGtg(.NrzghյS^ٴQ4HŪUjhͫTԮ5Z]9*񾛾FSVEM*UQu}ŐE5:lsVQR^YTq:\rN6Ԩ(FYԨQȦFFe31XQ6hl6CQ6CQ6)*PUGɱ #ɐ (Imr KbUX0)wkQǸX3F!{^*~~$i&:=?ZU+Q.}uoZּnޯ!;iԗo}#{Ou$EG[m(&F,U^mV[:[ ',׌*llc6_mjd1Z!ZwD&ʪ)ʜ{ џsXXn[ f'o+_*^эjEpԣY~Itt|?eM[;=5V (SjU.zBWɡxjj긙*RJZ"aʰ,zZY? Z\?@sg -k9\{LRq7<7cȪ=vyQ|lK'mUë8j:U VջlsG^Wurm_Y*RJeUǽϳ=TA<j"Cڣ;V^nE)CڡarGk]h[{n,`cj ժS\G۾ѩ'ӔT@hJy4oKQ[Q^e潮Vii>66<橱\q< Rc׿^Л޶v}!I_[*pN[#bTnJ& *2[4M.Wgwr@eY'#^i.Ye_*uRU!<;ʫ'v:أ_c GE&Eo0Ɯ:l/-|Gzxfys;=컹iJO776ͩ?ͤfoҢluUVxfbMh"tm%>ެǁy{0]/}:j .AǷfƳepzͿE{cb1Ǧ{ҽ㐬.ii7%WԘ Y3V.cdXhR{R*nQq[$PO).μeJ϶޵g鎼7yٺfI4sھO͋S$IryͪoXh>։:Ai߁.:/9[IKmum2hޓ'NS)Bt*JK}۝QA~ Ӥ3>g@ٗV˪`/izSr*roJdW2o*d,:궁]SPN#^ZhZTlXwN5oŽ>dM(~p|{B>N~n^~?[fq6(4eQ黕ФJ|w\pYQ-}}@hۯԝ#\+hHRW_)k&bgKϤa Ѥ4ݰi?ڪ7[ S$:SK#Ie%?{g#<If"gV5V~Tx~[(ݫt5?2itnMׇۭ]?xHS?jGVֲONl[ܟk$Y|u\Ye'ѶnYKtOj}~ys+[NT$3);ݢ r5,*KޥfŁOпߔP̺׫:GV:8]8UWkߩ7ꘗn%~Qk eDT# ex2q7F;.~HV>o|!JRiPͷ*߹JұwB]˩iYtts 'LNKhӲMRܶ,LۨTΗ$hy( xRS+9EE%. $IғҵH3pVvv{ǃܿlcwh}K+Y,@#66VGy?K.կ~+7яp_aK21A]M_:ETo~O$TKU0AFK==?=L4]M.*#ӿ1毿h]+]xM6 OKٖ-6Z~v6$vn\4KCɅ4bvɏNЦڎg=;ۦ[3|PymLP752+7cXj?.LшĚ&G=ZYytRT$iݬKXn}jbPeM|gYty֤mZBMY( ߊ|-g[z!w챭M{TyCH.IʝA]W>tp#r1*Xobq.{eizˤgCTy^YPwi%]{m{ED͝+=x1mZ>o6SNg|tiv*|k?9{*8?1bn6% \' YWjФ*-H..s >U"}ZEʺt4|Im4 u_~Y0_32PV}TYx;pkKw\M-$Ԝ,^Sҳ#O\6mZyL*,XМDOGIsLZ?OKE76V=~cU'?ҁ|SSn=ct,brӃۡIIsIImۚ׬i}`ʔz.9d";D1l VgM]ӆ|A(tj*,,O?N;#Lne5׻:Uhd٣ =U~F-:|(Gh.rqL;^ѧ۴te7}-)E]>q$ҙBkWlwokgWPГN׀]~w^~4#q޷;kQ=~ C ЃqxnUU}wT^>=*[:\#X[;5R49^y۝zՕE;fOvt`b  B_Hz"qo(:!"??_x<ڼys/4Wnd-o*o҇j4e=Т7u%'|-֖hO 4wIn^ |-z-^=X:<ͨ}E|?Ws4ӕoפ'D M<yv7-/GB)7C}-U}ݺ|1G?|$)uI'ֆanVy;x_ORrRSg(-Zr̿kiھ. ~dcLL.|_{$?th?gD@\^{VXx@|^yd\r#k_ ߏG[&kѓۯ[ {DLgߏ#sZYe$`K@KwTV\ݲX|RSJL,y؂m0p1'gFXgII!Hz"b cor p֙j ~3w}~_jZk.͙3Gov" .BpdQLLRS*%T홽"DDjhhPlll).Bp^uRT}I\PV9ceX-hX;^n >'s-[_111Gqx0Fֳ͛ZV w(~iTA䓁ڰly#ZCGuTo3־}/uy<5C.R|XYfLLrr*%2/!鉈_Rgݻz|rmڴI<^p@ԩ-:_[6v p;UݞEW壏>ү~+[N5554i~_N wh=>HtSBB @sLCaf$=>,Ҟ=K=o5*+kWB¤Qa5oF@@|y'@MUT+FK\K'"a?{O{m~a x<@eeoMo5l/v.$`rsÑ+I )}IOD'ZtOAb;$ejkRyySke.9JO?S#GޯSj;JLԛ'"_W-_\guVC BZs\e*%e;~1A7&&K99 i t'"FrrF0h0^KrVU}.#cXiig(9Dlݞ#x'"ƢExb=SEzYTV%{l$И1*-t w@A/7 8PÇWtttk֬ Sd=+??_x<pVUէ*+3欩B0YYYsvj$`1 w@0.b{ 5hР6c,\0LFUUUYYpN}vIx=@ii+- ~W11&@ߌ׿G'pBC<"PccWHtGNY,v|G#Gޫ3pm3 '"ƬYTWW;LCѭ֗)2@_e^|KrVV~*ɣÕqPJI@7DXdICNgY칲۳ڬwm_'-A34fLRSOW\@ȐDĘ={vC.W +#Yzݪ7USZpUZJJ:VVkL#*$=rZ-KJJ S4%Y$HTWW Iڽ;_ UUZEE+-4 rRSOݞsлHz"bg?z=PIӭ+.~7~8%Y,^@` w@n6zGeOh3<p:m"9E~UWQu_K״yMj'"FYYF)Lt 'gh׿10 lYŭX)r'iذ5*,G)1qR н-"ȑ#m6Iرc]4%%%emܸQgyfCȘ#Pzْ[(&&[X vf[8D6sѺu$I~oY?O;Uև~>[ٲX,zWlÇ+66VӦMʕ+;u[oU{oHnwv֯?K~SzPkNC.ɓWk}VoٰaRNBdDEĸ}3fƍzj5JGydʪĉuWo_Ԃ ciڴiZdN?tmڴI$婱;oVZ1ch̘1O;y\".VF9۳:UZJJe^ 0,Gk,@F r:3ci=Zb/3gM6MSLGIСC5|~ywgfSMMnn =СCUYY H.ٲ@Huu_-y>~8uUWnnW~~xB~Vy;*/_2Y~Vk|u*$=16lؠI6ojbdddfeffq7o͛cBz:0$iӦаS$Y4MߠJJ&1:Dxz8111v!p:|clԬצMs}oQ}&IR|~Wii)9Dl60#33S]v?jJ2ڼ馛^xA?яLqzڴzjyYgiܸvX]dD0`>3͛7_OTEEEx-ܼy3qy0 UTwUQQr&&ɓ?ɐahڸqcƍx$I~$Ӽy|!6HqNaxU[**Uy{x_OVOw)&f$,6)c$E+5\2e$iժUo~+RhxLa~=К=eؕ|E(1q֘6WWQa=8q9+$=1~iРAURR"I4hnfg?$vi:3fz꾒$UVWTP0[..56VH))i*5%%+-.|@ǘHUUUtH=Na֮=Y &!a>ze?ߌ8ӦM$IcǎUFFF#bUT pTegiĈE]"IODZ͟?_<^$f+#<"@r߸uu$#WiĈ{pa$Iuu*(gpJbbh Ack㏗$}Ǻu-G sEuvWHrT[^7J))ӕsRRNY#WBrp#鉈?O>ٷ쬳R\\.C&陟|yNi~˨^{q'A!::Z W̛7O@\}׊f Q6[װaw(9D%&--6zIODy鷿x EEeNgY칲۳ncmj̡0!JN>Q|bv),,TL8GcժUZbz-M0A/_#D~_kѢE1c^{SO=UUITJ)1(#}8EG=?k:]pI,aF3zhz뭚;w$w=j9|y<m޼YJJJ wX@8EZq|!h͖ctWRTlmӳ1bQ/p訪Rrr2qH#>nk֭:toYllnݪ!C1"H^W} ?No9Ng\"IR]] f)7Y9,YsR!}^ccbcc[-SD@]՚Ieg]ޜԌRNBB! UW]%[РG-[|y8yuU;tm.=lA#>om͚5+ `̜aW&~pmJpVU} uZzIO#H.WQ11Y=:ej| ǻ Hz@ٳTYjlKH{,UvfW_Hp~NW+>~<]"IO8GTYnQQrq̖-\E*,\s$=_ڦI $IѩJL䷬m vSM:oUMZ[$=>&??_x<nk +s$nߓag*+{|Vk22ΦZ@e1 wڪRrr*++pI-[gw+=,IRtt˴X5x*t!ChԨz,v7?'nk-UUW謁$Y%y3dȭJO?[[eTQ񑊋Tf5JM.Ir8ƇtHz@'9EڳgRUUj߾$I۶LMGhKqbcl*,G99w*#It*+ۂ3X-ʍRNB) IOZ0]JҎj[-+(囏+=,%&-[MoJ5$g˸, g '~Vpn]lTTjk7fjkk߾9h1bqoέ[sr$=D`[k6hlj3**]咼qJJ:^CtѾ}/(3N$9L Ζuxt'Lk̓U ׭r:wa %IZd(!!O*!afsh۶EmOIIf<ߺ`[pt0 #AhJɪTRRRs%![ʺ^))'K֮WM͗r: H U_'Y f)7ـI֖/) f$=>|dZ{/}puuor pt굪Y'{Ryۊ=LӹGR2?A G*>~'ua$=?٦ږ6Ggp~3@ >\6lPBBB"¡֓Z@|VQuT,kР5!EGHߌ'B$=@i m~[gIٖ-ON>AN.yɓuIz⋺+ciڴiZd^z%mڴI$婱Mo{n ~y@q41 \"o}аM*b#%|eN9;3ttauLpIO@f̃ӦMӔ)S?Qz5tP͟?_~{ӟTǏUW]w=СCE(7}P%̱0Jjsq GLLbcGVme@Æ~ ٔдZ_v~UXxO]ҶW۽O-׀+:z@CIOto rzjqeVU3fg}TzJLLTMM}]]|{xqŞ=Kiw*#I0r:l~tK|ZÔzGx>Re9$#m]evz"IOhРA4H7n wysӔ)SnwhM-=7} TP0Kȕ/e򮮮@7BXm^'ypklōT\h"9R11A1Vz\i}]@9R֭ z{.ݮ|0p؍V0T'{6[媮^%I曟i˖jl,gM5SllSk͸nֲg}6cuI2uI Y$=?222dTRRjyII233Czyi޼y>4v"bg𪱱B.^|SSR2 We\}WkĈ_+&fPcgvU[b%-111hM:UK,Qmm̙Ҹ{'$fIOu|Ԝ4k]iھ. ~j]0tutLѪ %J-$9b_KG3u5בDOӧO=_`$iZl.۷O/U\\<4hPH{[@G:l2i֔Nm]\1@H[%.u1T9W*QiSbV˖룢RB޵,'O>Ya 7@w$-5hO=Y~g[^vw`1 CO\\Ū-P}7jl,UuIRaUTFY,Q2 :F}ajS*I7FLbl }&v^?'Iw=)=-+y(^k:ٓ-lOk*IRm&[Ó7rq<:54l$m<@hљ+Ū딓s$f\ee[5 RlԢP2PWWY}VGǩHJt9f `ΞgWRpNR [원p+tTMr:waΝߢrJ$%I;vݪ\.-A6[<:56oeVNZ~bhUXxrrTFy^hC.|[HEcӳ۽㡦e⡉Ñ"+מ,e|͛I ['*]le{AbUVkVXv.noVWkEɒj~T &ْ롘@A@za%I|sv|X]QQINmuwWZ:,f-u%)<d`wD n1߹e^o2F|)IڵڷUF$3)ٔzͤlmIoK*.nXsy5^^K^S6[>SQ<&ed#%Fv{=fWZa*2X]$Hz}L_.\{$Id|4zEw&!jvٙjWZ=_aTgj^r/Zn^I7ܪ;%գRZ}$^V7KQTTep &)k^U$A=گg_3>sy@m[#Iڸmt/ecϲxl$nJQLLl6ǁbqyfX;#cSZ**z\IISzJF+`Ʒݺuo>'g$u͡3b 4geeNJ ^mB@'Q;Zz,$,(5bbI$esrWE^o&-@QꚦtԪLQQZγrdysѢ,vNyjSTTR"k }\ኍtTccY}lT%&askN55k7hذȕ˵[N9аMN I63QfXbd9XodPtt ícGŮXejKK;KއMYFdXzOJJu'Z Y,Iɡ]^o A^hL| *3IrN;)2yaˤUUqshl,%Sr ]~=*+?VUէ_.@'(%$OJ-全̼VÆ@3M+ڮ$퓙C,[%L*-WqYw$fKPLLbcsd%~Wb4tnӹM0IIIX1`ODcG"uV fKlڲ嘴߶=$5Axģs&{Bt򳲮VY]Ij'5$khK۴n tL20"5]MTuⲹ+`ƹlS >$d&SHz}Lӳ& u=Y~_yij/ys=ԅOw,(oo&0%Hz}̼y4o<_YtV@OJR}zR z2)OwJ LHz@dOoѓw$C<#Bh쌞LJӝDH\>j u=Y~Wj`kj(Oue3Σ/%C)ԉɤ?$*! \WzX`jRl0їDcg"IOB\ɤjwcJ3{G_JF='+??_'ܡ J P'z2X`>k0A2g1 wڪRrr*++p¢zVɓW+1qRq۶-j kP龴erן}nws@~3Zz@}顧?+%$= LH3'F@D# 17nLP"IO7okZ*ܡD"IO'F@D# Hzh$=Dp|;p:rI Z=JRLL&aFVUUUYYpkm[,Ԉz/ p~3ZzS*#㜀cbh Z# Ot5@wHzh$=D4"IO'F@D# Hzh$=Dp?0$IUUUaɚ37wE$=>Z4t0G8TWW+999a!a1H}՞={(jݔ)Sjժ.ݕ;Ov]UU;w*)))X#UwGOO]8z zЙ ͨ _uo.~YRS=N]8K]5ԅ/7QzC.jeggjeCh QVUC fu\W>nvIIIkwGOO]8z zЙ ͨ _uo.~YVC=N]8K]5ԅ/7QzC.:@7o^ߙ}ݶq+G ZzЙ?kA]} ח^ BC]輾ZF.Pԅۇ5}=${[}VUUUYY'~ u0QuM h ϲZpv{CŠ P&D]Lwh Hzh$=D4 ÇבG=aUWWz }G? ;wOָqtG꥗^ wH@؜wyJMMՅ^P^ѣO; l|?@`1 w]Ç׆ P/~[jСz<NjkkuGЫTRR<kڼy}UWW/;W466jܸqzɓ'O?3%?CȖ-[qFy&!Ir:2 CQVV$IPYYYxOVbbbzʕ+5~x |bcc5m4\SǰX,w)Sȁu[oսCuB'NԐ!CӟT==sz.4Yz<ͨכu$ݭ{}ݻwF@:z.*BV'NT~~/,X j͚58qN?tݻ׷MӸlO{$}Zz7֯_+tF«1ch̘1uJ@u!%%E֭ӶmϫW ި TVV+R?x ު @鉺 pH3I2^~V˦Nj̛7ƽۥczO?ݍ( E]ۍ!C999Fzzd,^'z\o\z㥗^N@ȅ.444'x3T@H{\pAO ԍO>Ę9soM7d쳠ʨUuu$FƏxP鉺pjΝھ}|A]wu/艺PRR.TVV?xP鉺`*r)+B*R=QCQ0ucԩڰavޭ駟: |?@@@X߿_G j|РA*..p 8q9]y啚2eJ(B'p(艺PXXOhM:UK,Qmm̙ƨG]LD]L?`.&B3$fϞG1 fSN5/` D GLD]:f1 ]JB1=D4"IO'F@D# Hzh$=D4l28wu~k׮'W]u,V_yY,0EK.D7o1^/|˚?яl?o$QFlukbh֭~]j*##Cw5kִbG9眣x׿O<#F(66VoNPJJ_|󍯬SN9E7pCۧXo֭ӕ$M6ǏWvv^~vcHCͦ7zG=ᆱ={?? RSSG?Ν8?Ou-/б>[X,[%J%駟I'QFZg=z:,UWWnѢE:_ꫯ$mݺU˗kڵZ-X@b YVwyzkV?Ng}V)7/\C ѪUzj~튎U|)wڥc9F'x$駟D}GO38#`Kղ2}:?5ꩧ4g۶TSSg}VFRzzzuSNG}aIHz@ry)//.?p]}:UWW=z?n 7ܠ .@zG'|RقtӦMZr$3r)5kƎ\=㪫 mreiΜ99rV˥gyFGu!`Mu6u{{-[~ZwӧkڵZvV\O?]gyfg\\:[eee?\$*ko]v駟)1.ZH_}wոq3ZJf'O%͛7.[VSk>Ui˞7بիW+77׷쬳R||}Q&$O>э7ި:KǏn[ii6mڤ;Szrss& 'L>Z;~ɘ1ct7뭷f&[nՅ^羮tL4I[l}Ǧ)99oyv|4&hӘX,Z֤M6lؠ:*r >h„ ?O>Y߯oFc/7j޼y*//o4lꪫtwhѭg_}ץRSS\[nջᆱ k}'0ty,^7p}ߘVjmg:J?U\\$/WFF=\}Gڶm}xڵk[V͘1ت-l6믿f SPP竦Fg}o:^ZvZr >WUgsss?I8qV\[oǎy}ĉg1$\.͙3|IkҤI+t7j]j^իuGot*..֝wީc9'<%s\!=u< @oQEEp'|,ZJ=X Z^^⊐###Cv,KH@"-=D4"IO'F@D# Hzh$=D4"IO'F@D# B_ѣG'w8$aFE7n{=%''kO)Bdʕ?~yz뭷!'~>[ٲX,zWlÇ+66VӦMʕ+}٣L4uTmذAwVrr뮻 Xޫŋ\K, w:$leXTQQ!IZlRRRBvpB}8tIyi޼yC_ҤI9^Yj/_vپ}F/ByyyIO۷c6`p IJKKkwH"bJ0\.Ą; Їw\qp`n3=Rrfޖ,m|Rsa&Hs\R4M+Ł= *p8z>qs9H7ھ]l1&L7WyH| u/i+VaÆI,ҰaTP!eΜYWuIի-*ITl6֭XIjѢtxzꩧ={v˗Oڵٳg{'N(]tڵkcǪpŠrxܤIbާy_{OrR|b~fƌr)k֬*Xk׮lvŊ*]2gά'O֭[zw+k֬^6olw3fPBʒ%Zl .=lZpjժ%wwwUZUGΝ;SO)[ljҤO6M>>>rssSR4iҤG>oTT>c(P@3gVŊf͚{λ`թSGnnn2e<<|۷ʕ+kjٲeǍ5J*TО={4dرC?*$$DK.M۠۷oO>Ѿ}l28q.O"EԠAڭ T.]vVڵK?Ck֬Qڵ9sfͪ۷/lO.Əj̙ڸq{=s\~]#GԴitAɓG}նm4|߿_mڴQƍJo߮ݻo߾ڻwիO?4A eȐAڵ{ァqѣG5tИΝC>SppiȐ!9s#wܸq=zFQFj޼yk7TppZjW_}_V|3gΔvء~7P6mTfMaÆر_.Ʉ ТEt! :T~.\hwM6رcڴifΜ3fhƌ1w&x15nX[`/۷~w$i^{wY)CR%E {wOmTgV XR+WX+Wܳƍ֡C7nYfYw'fe͙cnݻ[d֬I .l}Wv*T`YeYG|I+<<۷-///k֬Y1^{5m۶1ԩcvuh<Ǐ$Y{[eY/չsgXeܹӒd]zղ,ڴik <==c_`3gN͛eYݻ-f?~,YbyxxXשSzgUZzyѢEc=800Вdݻ7fmO}AYeMmo۶[{?mڴuͳ$Y6lY7bdɒ1/n}vO5j|X͛7udʔ<Pdd|IsߺuK=$)885j ϕ7o^IΛ7ok ӱcԽ{w5IDD<==3؝gѾ}} UVMeʔ̙3hΜ9*\=]>Iv??M>]'Oԍ7+ڝL2J>}cooo8p ۄ|ۧkܹ1,RTT?.H{Ο7&I#ծ--Zd:;:@*GHa|}}{–x*eȄ8w6{w^=Kʝ;ϛ@ҥeYvb X`A~zG_~l٢3:GP=>@ڵ{舣k?y=Z5jP_j=o9㺕ϡwvsF+TP~ ޽%o;&LV٤~ R'BO Ȓ;.'nMwɝ;BBBbvYfj֬|}}UT)8p@ⵕ,YRW[sWfM,XP ,?6m%SLtOAdd~wúp>s,XPsACe˖դIVZŻ ԠA5h@~~~ʑ#6nx$ѣGz|TTI:{jժp{·~~PyU_}"~<<䁟GO?}uMq믿Rڵ3gN^ZQQQ*Yd'۷5a5kL[nɓ{ܓO>۫SN=z*UsiÆ *_^x_+???/^\+VT``k7k\r̩VZwUÆ U@#.%JЬYvZ-ZTgΝ;Uh#!mB>~iW=zP֬Yu!_^'NL \7H˗K+թ#-YbPpt.yy/РATNzԢE /^$eΜ9c۷oCg>ƄȐ!Əϟ_/$[nܹ:u:uXb1]1c-ZҥK?רQ% /ۭ[xˑ#.]G'OּyTL=O 4f9Re˖ܹs5bĈN:i*YZh;w eOkԩ7n*Tui :ѣM@+WNuь3(s0`ri͚5ZbJ(s|X{VVԶm[U^].\L\ }W_5|$__飏V<8f\@=+Wa͛:~-*77G{ J3B>3MѢE~gG0{lbMN-1%Rpǥާpaĉd+)Uf !($$)8~)Sޓ&MRժUci֭/շo_g]'NhĉO]?\{NgZ<n΀x-*ը!IӧKݻKE}WwjR8BO "##IL߱up羟_?ԧ~/PB8p 첒T߾}5oy㽽S \ICpH:A+,L2EPAWO:tH?^_iܸ0=@HÇۇ%TRѢR>RΫ@@1'֭&L0st!aB΢E]H_bϏܱwڔͲQ#E`Ai`ܝ o 0*]Vdѻ{͙c͙sg]I 6c ȑl6-[,I7of˗Zlj'dٴw^ХKh"qݺu[o%s{|7/̺)3-'g"R___:tH;wL*W67κ0m۶muȑÆ SŊVOr5kTHH<==y` Qٲe?$(ij߯|@w2pwwHV2eR|]ҧOoe)222+rMʔ)Hxo.!H^WHt 5{6t|Ιs"%4RHҸIW9b²{f>٧GС$mg̘Çk߾}ll1cFp^-[T,YTD Xl٢jժ)s|Eرc펩XĦE$lR6- O:O8!IլY3̙SYfU2ezjGk׮e˦ƍ+$$$߿rȡ{L:wl74llrww?`Su{{~P*U9sf͙3P/_f͛%I޽-*www,YRƍsX#.Ԋ+jȐ!tڷoܹs]%JP``>ի,YB ڶm[̶ .^?,Y\r7ouU߾}[oK5$jҤe˦ycǎ:N:)[lѣDwN>] RlԧOEFF/P|'O}gv]|Y=zPܹk߾}|ޓ'O꥗^RlW^yEgΜӦMSѢEYfӭ[բE u뎾.\Zj]UVՑ#GsN=Sʖ-4iscΝz%OOOթSGAAAvl6mZ$M1**J#F9O x>o,J?.Sү9=5Rq6BOɢVZz#^^^1AXusl۶m5p@)SF!!! Q۶mc>\߯M} jڴV}믿7|O?4G5GzWnO?hȑʖ-[_~]FٳO?ɓzwb9RsU``nݪxЋ/onܹsբE eg>@3OTT (EСC:t>C-\0AwMv?={hڵc C~PppkyyyGw޽{O^ o޼*UhժUիW/uQ;v;̙3)S&mݺU'O˗U~}UTIvҚ5kt+1Ǽڲe/_uir;vL?֬Yyo /G[lȑ#5x`m߾=6mٳ{nU\Y=\a^zI/^Ԗ-[~z_\GՒ%KtRݻWmڴQdd]xYZJݺui R Ԯ];{7n~g=zTCիܹ~o*Q6mWڝ7Kr&8b͚5K'OoCڲe~?o"#+矗J,[76۝MJ EHW\$YW\gۍ7CY7n܈YfYw'fe͙cnݻ[d֬I*Wl}嗖eYV->ʔ)uU$YG,˲-OOϘc *sNIc_v͒deYև~h,YҊʖ-iYe.\ꫯ[Byx_ߦM,I֥Kbٳǒd?~ܲ,*W5lذhI=jW{޼yc͛7=,ˊ *dKqwYٲew-=;~%ڳg]]˖-;Ze]tɒdmڴ)Κ|}}֭[<ܹkSouɘߎ;bΛ1cFٳvoXM4ygϞ֭ƌtoW_}oZn 5lP-ZP͚5s?S<{J*H… *<<\nݺ3]}iӦMy;ګW>W\*Y}߃"E({1ͫ+]tv뢯}ڵkzs ;v `1K.9r(88XUV$.\Xs{={jժkƌҥՑ?yJʕ+>I:s͛7ٳu8{%RmB>G=oxx*Ud.1?oߥ ̜oKmJK\'J*eȄ8w6{w^=K+ynݺ>}ۧ3TR[6oެK.N: ?Y,3f{l .]:Ye\Gt(\wGjԨVZuiĈ=z^ݵ>L2_ַ~W_}U~ڶm 5k;!;hѪQgϮ/ni֬2gάN2e۷/ǹ&MkZ~{9jԨQ1~C/Ըq4vX+WNYf[op=v횚5k#GS=|7GF|k׮nHh>ySR%UPAfRÆ uAZr{]:w .hܸq*\2gά5ju&u{5IҪUm˜9|xʕ&ܴ MR^R|ή'Jdɒ˻} <۷Oܚ=W_}p֭[W.]ylL%KȲpd֭ʞ= ( Iʝ;BBBb Γ1c>t[HHr)Iڻw=,XP^u 4HSN3􌏧ͫ;wvڒHݷk}zuAmܸ8g=\=غuj֬>}Ĭi2dPΝL2W_{Ν[;wVΝUV-vg|nݪ^zI:td#GtWre-YDEq*/^\3fc/]#G<Wӧ!C)R$SNԩS1ݞ˗HƎW 4M,[nդIԴiSIҩSt>/?] <.]Z3gɓ' @\.\M&MNj4tj%e .8r̩kܹ[$v oSH?~\{u֭=g>}t)Ot%֯__g?sJ>=Ͽa>}Z.]r\O< ,aÆ?ԪU4zh}z-]VǏWPP6m$G#Fh?oҥKvʗ/ڷoE P~&z+88X[l+Qvڥkȑ#2dvߣGmܸQk֬QnwСZ|=DZ~~Ww:s}ŋkiΝ:v֮]]*22RٲeSjƍեKdK TF hB֭Ӊ'믿꣏>Ү]ʕS;vSNSNhm׮M:?LJUD ͞=[ھ}ڷoߐ9w={v;z5sL;vLAAA0afΜhIڷOC*P@:4=%m**'ԇHaUty𒚗2թSG1g\Tti˗/y [nƍ^zʝ;͛{ǵzjرC*T믿ݻq R:u/^P-єѣGk*X=sE˘1͛Ç|9r=7n'|R&MJkqkN:Fʖ-5j$77xlz״o>߸~&ӧOWDDTzwjժڶmի… vs UD լYSJoh)S& 4H˗Wڵ>}z͟??5x`U\Y5Rݺu/_>hϟ_[nUdd6lr魷R9b/RjRfԠA= l6V^ڵkk׮z'ꫯaϻ|r̙SkV TX1-X A{zzuʖ-[Ӈ7ҥK\:v+O<t\ <~'2dFUThDy-"BZXSGXQZF2Di ) rf=pDhh<==uyxxmy?E7ܺ ;w?H9W^yE|IReDӧ r{9)SFǏwv)*)?{:BBwo3'gs礩S6gZR~RRƌN+)H|7sziPHIRpR2?rZnԩ[niĉ:~ڵkҒԹs4|>}Z]vuv9xH.]͛yGxN)|BBåwAA҄ fNMj^ty@ZC A?b}??iذd- )]t1cyYe|yB]A2n;q~, #\6?n7oJ :Or攪Wrfϖw7Y \R%Irwwwr5vuIRƌ\ 1:%M,M*;'5j$}6}z3R>RήRBOeȐAYdѹs1cFK%eY~Ξ=9r uYOs2)KkWWzIgWpA6M:~ogHf9rP|]IsJ'JH>>RǎR쎏`f1)"SW\}̕)cƌtx ;vL4I>] 5חl6gW*!7NOKNnnn.@& 7MK&[Wٜ]QzR(ii|13|oH 9:@bae 3._JzIgM?H#Fx@jC pY!!wB^]zqjդ_v풺tܜZ. 0- 9r,W/in)Ox%y{;N@!aä)S׍w~撟_p2H댊~)~=I@tzR:7ҥON 7Cَ% ,*e+Sƹu' p4_KAAR &3GQCQ-ʕW7=):w4H)gNfd/ 1'Zl9s_vv)Kؿ{{K~~f HleY.H6oެWj̙Zx*OOO]rEIT!+8 }t=_gŊάp=|ߌNO խ[Wٳgwv˨XQ|Y:zT9ҬkRj<z"駟ԬY3ϟ_6M˖-g)RDnnn^vؑiKI.UjiVXX*T ,XOAAAP5jgSbE-[\/H5,Kz )G|giҤ4i1cƨgϞڵ$iZjO>@wD֭[uVD;7 Vi ܆ svUW@'@xxvޭ ĬK.4hm۶%s1B1 &)ѹsқoJmJ͚9!8"##7o^yӧ| M6Zz (o`:h ]r%vԩp5oe?ٕ\IL3gV̙ eZ m;s'":=>}z9sn3g/_>'U>WJo!5l(ujp SLR6l.**J6lP5UtiUZ5IH >PpA l6gWpU o4ڵk:zhǏk޽ʕ+ *szꩧTZ5;Vaaaڵk+___3I prV,KcHE8*+Ye96oެzݳsΚ1c$iĉ/uiUXQǏWՓʕ+HCPT :敶nҧwvU@H =_BHC ={e]}3R+22٥f^A)"B;F i>s+SFXљ\@ _ 5Rtx駥mے -f]HÆe4˒6nZ?哴<@*p)oӧuuΝ[rrvIdåͥYYI`ti}l :t̑||έ=]zUsc˲,l6(P@ 6T^TjUg )!!w:;_nݒ41g:v>>RW/ bx[hcƌQ"E hٲeڻw9m۶Ojذ7n?%?2.])ްaf*UL$t)/խ{o- H<6˲,g^ULxu)S&u-KZL, Qvm9<ҶmWi'BK"H{wcK˗m6HR͚RRhRRf2eH .+44T7nTɒ%=9G" .5onBϿN3ggf͒^~Yrw7гeK$?BOW^yEkV߾}u =S:q,պukg;CYR߾oIٳK:ϲeW'sv@BOU$モeY|ƏO?m(Js7iYR.҄ qޒp$BO+W(W\5k֨uʒ%^xN.tҪZK@aYұcsV۽a=A Q`Am۶MaaaZf6l(Itܜ\]աCsNgT 3Og*5k`NOzK۷WlTpaխ[W\r-p!'OJ&IӦI.HMHsH%KJҙ9=;t0||1tpR2BO>}z:yyKg+Ɯ]BBwoXZysr)[6[7Wz *WNֲx(6˲,g^ԕ+Wrb̰"8 M(<(.-+uhOGL6oFZ@'R?\ouy P򄄘d:%[ڵ _l- 9 >RBԧO:w\̶߿_&MR͚5նm[eϞ݉&.]ZUVuv)p1Æ*Uͺ7(O\"۷o&Nŋ+44TӧW̙uuIRJԣGuEnnnN60ToŽ駥mے)7#- ˈƍRŊҒzKھ]]J2˔*Vtb@rcZI|ߌH%rԼ4uTvnsH 6dpvGwԣ⋦sgW8IHIRpR2iH=Y32tyl&#A4l4e y U!7{wR|f~4g۶ߟ4cv}{P!/^26H-Z@B o+C :ydm۶Zf+ߡCHoiۀT%$LJ/[ժIO?-?Nϲe=gR5Xn֮] ح/Qo'U$p3gb҈ήH!!Rfɓ)Sg +&MmS'\FXX]g/*sNpO?7nήH$!!w:;ͲOi.)sfm[%W#m2jժYff9svIv^R[@._~jӦBBBewKMJ.U: H晡m pv5@I'KI HGJ_m&5*S2aafY"О={Txqg,BCC+W dF?/͟@ ޽ %$ӧ9_zIO[ ؉ip/6o2daYfP773 "yRipźn'3^LtT0$ǜp'NT6m?\rʘ1;2 M,]+^-= 3]'JKKSJ&e1pӺu͛7/l6'R?yGzuIgWHڻW:xߺ,4""*UiӤn4 /_>_|ҥK#33:@rgsŋ_~]3x+vﴉ@{iioۖ|}3:=2նm4x 0#:쳒4lXq+_>y8zetY ,Ї~RDӻԼ?mRW_uty"Y-ҧZ^|Q񑂃9sc zeDFF/ڵkU|ye̘n1cT,0yƹ V4Y4bԵ+׽H+'{szeԫW/m6M7nLjc zISƽm1ARhGjPr4XHwo:<]7#- R(~ n0s2paI}& ,li&WB a㏒ԣqvuHd|ߌm 44TOJ.U:<xC*QBZRKٷ/'PDin@1}H%-_.5jD U.#]t:}cٳzum'U4 1x{~6$D1B g{OfMZ w;Ҵif?H5jMRvfo1:ӧO<Ԛ5k;4rqo3ȑҤI4tԿ >Db.fLd!Ծi1\USz"ūXl6l6al5a'TԮwoyssji3OgӦf4ѢEMoKN-Mv={̲eK)`A飏Lʞ3j)eY+;v(w12eʤ>f!H"IիҢE_9vnݤʕo5gR1NNM3f̙R66}|.,2g`ptҼyR+D)[gN^JʒEQ[P!c%??;BObŊiΝz_|Y+W_j4JGJŊI/u,*//%@Μ1%O"8aIL;q ;4lXRV ..ĉg[:"[̴ڵRfѩSڵt=&$tY4!%mb\TJ^jV?_VMٜ]!HVXsڵy 6H"N I%vȃ{sʹ+H nC;h.Vɓ?JBIʕ@d,rv@|l6}f̘QEѣ/:$*OOO]rEoTfذ{6^ }ܳGʗM]H>> #1Wr%aU+7:uS}3BOEjΝJ#&Ke`]9gΝQ)_o:/fͤnݤƍ {pI'O6 {RR׮&mRoFZyǏ;$ءsf%U|GΙ3ʖ>\j^ʓ'jF :̑¤^>Tj HVHƏ^zMǏw'SUHN/%\xgSjtuVHH"7oJI_-mf:9|SS*T@FR榯*l6'@u :.4u E[ݝ]R#Gi Es-^,5o.e"D {H[H;bkYo_5'Im39҆ R\f޽%].]Z:uJuUҥU|y-Z%3Lm*رfŋfR֬H"'OJj۴1CΞm&5R(:=RXB'OTxxݶ1c8*2dȠcǪbŊ:}TM*k֬. E۱$K*_^ڸP-[ΫPdf\ڤ;] =26lؠ͛Xb:|ʖ-'NȲ,U\[_哗.^H eIkJ#GJ7KO>)MjrŋMY+;}L;eRJ&|5)[6gWx o 1h ;:pܴd:uJuQ6m|?5kfiٲe"EMիW׎?1|@wVdd ,PVҼy&kjD~]ZD:tHCʜ"ձ,i&WO?חovz$D N:I2C޸qCٲe#G>TB;ܾ` 0@~~~ R ԨQ#={6f+lٲ￘}.^N:iʔ)\#@juo::۵3ݜ&j%Og_//%P.^ʴ ׯ/8`_Yd9JCbx[YcǎL2?4i&MĹ}̘1ٳv*I$ݻ7uZh>@5kּᄋn݊yW:.^4a~۶ҥRŊq;H02_--\hl g[6!'"pO?~iӦ8p8K駟N ݻ5hРuҥS m۶ò,uEWǎ#4| %;u4MbrݥE]\RHIҹs&9oN2=as>HOMʓie'\Ƙ1ct5Iu5-X@%JИ1cΟ?H͛n}޼yuc֭Z`ʗ/3_ٳU\94H y:$}4wɡ fL+8~)I4* @ R2 c_|QsaC)@jF QXYfɓX=쳊J3gV̙"u4rrT >{eK2 Krjz77W0K?_;v~[ BOC^zQܒ>}z9sn3g/_$}nW%Zeέ[ҥ3^3w"v]Px|$V2fY  1p۶mӘ1cUN:[y'seʔIUTц ԢE IRTT6lؠ}&s_LH,oKn΃g1w/#2|-$1L }Aܾm\iһ@C ~zEDDh駟e?^nRժU/<]G<>~ݫ\rPB0`:wzJժUرc]&K+___3I Q\&M&#:%5kfgqvepiwO:|T^-J'OJիKZIGH|#u.իg+SyfY"umڴI?-[&OOO?αyfՋX:w3fH&N/ROVŊ5~xU^=1^}EW\G<'@B;'M M(]*k'9ɰaNT4yr fpSL͛eݺuKjRݺuUn]/_^6%&*~ qihit3lmϞoK 92*{qo6]?dW$nmMΛ׾CiJ ܅0-\믿ܹskӧe쒒szf>iHiB)gNi Wʕٕe93fcno.㼽Mgg l2OڼyURNg}VYdqvdYҖ-&\F*RDzkW)} r\wwgHst=cFV-SZ:+VtK\7#- Kr~g-ZHSttMg%!2RZ܄;vH*:_yEqxSJ֮{7#-?SR.\-[hڼy<9sVZ. ݺeFKn]F$!Uhj4oi~to!'={: '\Fr9svٳԩ;4*M,+>-lij՜]\B!kϝ.Zr6뼽 J#FY<(],Oo u._W:uTlYgHgRӧqI7N̜%K:2{2!Cg1uJI^^/;Wݼ PŸJFI3gJ2I.?+Kz 9?D*^ a~dIsmH2|ߌNOG+Vɓ'nm̘1N ۵K9RZDʓdK.ʐj?oǛnsZ6Z*:I.cÆ j޼+Çlٲ:q,Rʕ]@cYnHiF':Innήmϝ3˗29  wyG,YSNN:jӦH1"" Ls]FRhhtԫ'֭Ҷm澗sֻc$$BO`uI!CݸqCٲe#G:ҥKjժ.0!!RP]kk/􉯾jr ;_ҧw+ 1dpѢeˤ5gG WSTj ed͚5fOooo;vLeʔ$/ o$:eʝYJ7nHmژNf sspD 3[s4?Z+J3g7?nRx~H['\O?_~E>>>jڴhҥz駝]@[j_Z24+EFn!CŝZ&\TC-[5a=fs̼Æ%E!3f]&I>|] D3fH>/J˗wKK&)CSAS2] H+Z~L[$.H@2Ye9ʕ+pv9 YooiiĈdPO,oFZ@'8Y4_o7w*+- |:<'oN5}|E\`0mnv|b"EᅲjזNzm)GOΪ.-"Œ +aCis "HaH9rpq s)=4rɨeQ]pIKJZIsufӥUzڶFR)EZ@ P u6,a9RHk~ptvtƗg9 P$)UxgS.0e}3BO Cijb~ԴY%m"cB$7we]ʖv5c)g:$+oFZ eY/^M6ٳ۾tR'Uqf#Ϧ_ʹKI>+]+}4xԲnڅNھݬ+UJ8PzIs!xR!BOzKW++v݆ fٯ!vS'),~3/:rD6M_߶kk>-Tm2rʥ9si/ :$tNτĸySZD:Ռ+ԺRQ0m@H TbŜ]yyKbwzK:h}|̺:=oܐ:v,tmq/ٳc٥W{KݺI3:L7#-.Xb:~ *RJi…VV\9r8<DBMr{2ygK\93ZҘ1R˖w΍4ld]j{@#ڵۧ:u>Pf4qDݾ}[cƌqvy 8w~AtΝ+T4` } ܹfYiș3Ҍw<.VL*QBZT.ߎߠA>|XwO<;2?gy4o ;rڶ5AgZDEI6Ib-ҧ^yEKYS[zz%ܾ}[7ɓUD IR…Upa'WR[rHS')CC2}iĹssQ36_ _srvu =2f̨; BB-.\{΅ ͺ˗A %vܹH,L -]j~_~Y>]z;m/K,P@Ye9 !~meΜYKIԕ+WrHtÆIÇǽs?\3`39O"JuΟ7tN"9b^ΝwĺPip>}~GUREYf>f'UFR~\ac\,Kڰ@nҀRJK-[\fZ:ܹO X懽xY׺ >k׎r#$p*Gf?L@䨃Gwm3|Q޽RJҷJmژ;%I,U*sE#J.^f2fpTg3c K شiKNr4u4vtԸR7%V Zu\H2-'J=R$BO8ݹsh'g0];wr>.Ѹñ=KCť?tIjzel06Mnnnz'Ԯ];,Y GqA3ܹ@+z҄^*ÿ*'M լiy{m HfbqLptN+$/~exzzjƍ ffӞ={qFEDDhPnRC:tHjT*[ _;b/x"6V,ii+&-Y"U =OСbYHjЀ4NO|]v8qFEE7T5|z/8ڇ/EFF:T&Oz 3*iNRv.I _Jۙj=*u&͟/N/4fY"ȝ;nݪ'|n#GTfM?^PZteBCC+Wtp)3l؃Md_l.k|V=Y! oFZ@'\FDD>|OyᘮH777~?͔ӧK2HMkTi`eK߃tfƞ<]Lx[…n;}Z4LJ '\Fǎս{w}ᇪZ$iΝN:Il٢2e8LR:ZyWK^^һJoaϹsMYA1|m*`YΝҬYҼyŋ Rdx6'\W_}y/Й3g$Iyo_԰aC5nؙe*$pfůʔ1f[bU v4g ;Cʟ_CQ*[3wY>l*R=BO룏>G}PIgB 94RK:\Ǭ_ɒI3-ciH^.5AMs~})}zgW\ '\ιsHJ*%/oDhX7:$+͞mw,杰4.s/ EFJ7sRu^=3kRqK*.#,,LӬY%tvI&LP,Y\!ieI֙!l׮5a[ :tsH+=Gf…>.IJYL bI0`l٢+W˺|/_-[h.T9e7i̴KgϚl ixaIKO=e&r2Ez%ߤÇ??IJU[f]w$.NO%KhŪ[n̺M]k@*N2stN$}t႙s$vmu"uK{|^mֽI62g~ŞX<@,pׯ_W޼yY'O]~ <)u"͛'e(uj| gW,iviLi%jU3>Z/@ QF i֬Yrss$ݸqCÇW5\Ktҥf݇JyJ.Tsk8a5KO@ә٩@D 1vX5nX P $I퓛֮]H]w+iͲ,E uu͝;W$}rwwwre/44Tr<<<] |Y[itinl6k&e"M, ,liM@r0уU>f]B$ݻ7Aϕ7o^UPA?^~eܺuKn݊yWHM3-2JR>Rdv4S')cƤkQBu9dDFJ6fBW4)eYҥ2g͛zו5k֘uK.M ݻ5hРuҥS m۶̙3ʒ%gϮ+W觟~o#Fvk;V uу T~Hdfn͸T:ui|3~qVRҘ1R˖fjFz"ܹ=:DO&DΟ?{͛7>sի,˒eYׯʕ+ Ҁb`.Ų_5؂R RzR<ܹdZ./*'M լi9ƞ?ᅲԑ/*T0$*e z" tv Zj V2gl H\f6aKEJ,uz';L68,sLuH^WJ~+M,+*$}ԭisL+[\bwj>1ݱ,nj1C<梙<ٻs7((ZBOY׮]ѣGc?~\{U\TP! 0@;wSO=jժiر S׮].___*44TI\d:V~]zϤF?6wn%H@4|xaGF(|Sʖ\2H}'}::W z"ڵkիxΝ;kƌj۶Ν;CӪX֬Yy:d 7Νs@{w`AgW{-5onqǬs9_35k9&2RZBǥ2eu 3~?5:B'H=fխ[WeŻO߾}|8[@q :g66n,-[f2-L(t;&:̙s('Tt +7a BOGqㆴhrq6)o^o_gOHgWduԩW zs1+!R+22٥l:g͒.]4/6|3::KWK:~q3_f[R&Qv\seR 4vKrz$;+nJX[t&.͛g}TԧԽi v,,Mv쐮_7W$5kf5>1mG-:ܹЖd8 'Hw79iRU+)wn.0P8NƎ<= =]RHv,i2nFZ5G^=Q#e-Ɏ zN:򰣀:j2LL /M]ᅴ98_&mdn9sJ-Z<0|-KWҥUjUgT`H$f/HI۷ P{v6O>1U2g6M~]J'Nfg1ۣk\ʥlnr 3fw.޽Z*\@ +___أȋ||̺>Mln1̞]ʒ SfہRnR6&$tae̲dI3fqvW2t'~Pېݜ'.,MΝ3K//uibiT) 5Nt*T\1YڵҎu믥K,u"DEI+VH#FphBJ>q:ܹ'  (-)sPsh>4ɷ2g54`FNxwҧ9w>7~'Ja""!vmiaCfs|AnCvT U#=pIaFiXi.)_>O{wx#vf􈤱G97(x7oakR:~\ziʔ;'wn僢 M#9y||6% "gϝ:5Q 9>_||k9~23ͣKJ1WDcHgϚ aRbŤ'iփ{Թi~%mjnW=l||j2Ւ9so_ixi),\ '(z̒9_&:nܐ-f͚̐֯̈|Dc%%Y|wWOMM7wI zIJ $Om]=_<*;ݡ+ek(jEEt@ qwuFd(Ruqtq_ƍ "޲tK8Mi.i|<$')9n U!xGKihiz{W^1S2>4.lKf:fs1Yv8Ν%K矗My뭞,'`fϞٳg@n!zJ@)w6onOm\_KGnXKm3sM2~?08@J=S*f!߷J|"m$};̰UURhb.WUIuEY04iɴ.J} yz-yCms.⼼cGva3g.-8@J=-+12gQ!}sٹv{Eƍ&Lpի⪲arʛww_{\-ԉZkrs݃M!ҠAf?^U5u5&puVB @-m9b}pVTM׀sfaEGKcJ]& "%%%I˖IK\udѷ|ߖbtfrivϡfvW2c${4u34HJL,Ϗo֭1+BO}+8LTd!xtm/%\_ۏ?kF20v4\åL9i{9`ԩ#*/Ͱ[8vNN6UgѣΝ|6kcbsjkw4hn$y=I~>6֔h/v 6{7{\gf!Gl6Saɉ6l6n Ϟ= umM~n׾Ϧ`:TgBI6PZj1Νu2aam5ZO-8/k[b_w>GJ 77lΙe l7OuxsLz='5Si2tV]!inڽ1NP!|sMSL/|%JOOoJ*B$+'ˮKykdY >BOo5}~)~ gk萟o*6ffҌ,:|v4t%Ź4W]%IOIK.7^7l4'Q6@۝SXa6l~\e͓O~{twA.aRi@)mg.اvDI=ۮA1UXZ.MF ٳgk*((P||\s~g3Tھefi6ins$-_n ̔'JÆ[[mݖ˖XҜf[OUٚZ2Q+Ёzt NX QnZeb=Ps6={FDHajmsOYa!C{ۺ\CqHH(ޜ-Rxo+9 o?lUM=Z3U9=ZZu))qnۣ4pYN=yy@3bh[_"}t@z[4/gs~ 9qwI~j&QQ& *Mm@ aZۢYIܷϽBuٻ׹mD4`3Ԝ1j 6xV*sdGv,hfi(:z =X)|s:wߕ֭3U ㎆@F @h*,o =kTUI-GS23MGf(٪*mV6CB[,&MN6C_̔qm6)?_˓7,,4LUYgI"M"ކ*9us_7nf%0< X:̙ymڎNB %硗}nچf 1P5#iCZpl/S{GHr Jy+kno{F'3M$L濮kw˫JJ/>}]$M^Harf?'-o|Pi3tP*+DXXh&)ݳǹv\6Տ3gJk&%܄u.ʊwr,:dI{Է .kWWiBm/2믛EL{Luu䈴tYol&W^n0Z'aδSmvpRkPs [ RV%'ҒAOSCYz\穁 Is)@*h$)I'd,3/Zw@r쿭xN<-\rFx.n(\smzSY>K,jJS+sT]M7͓$]DOܫ%]*[?ÇsΑf̐){R )7=l/!f\KK~2ߛ/"RX5̔?h*6ǍnQ: i)<Ѭ=UW#v|W^zH3LVtv^?LҥM7kG3d*G $ǡnJj4֭:1?_~+IΤЮ""L4G4kVSvMdڳ}cϥ7:DFJ;?l&b)1u^/r5-M?;=Urv!_s'z|b\M%q=RE;R$eJY-n޽[o=NSjavi׈I n/z|Bʋo־}:o9-5_럝'%ldV5$,nFbZi:2sybAC3!V)*4ZX-;A𢭸OO8/7䘢ݻeGI#Z+6,vs)88W̖.m\Quu:\*gxJ=#O>': ?YҵPm#[ʲ23uGY{i`rTϢ"Tf^qgBͮ]>~A`f(fy_VL%SI҈^]ɐ@ ~&/_M!;7gX=cYqwS~u]+MEĖ-,cq7IZ,t7' [1U趮W5D*3 ?M_"Pˋq,Z}RϪ$Vteu @Rӽz,~>٪#99&HȐ^o$'jn3:((*,T#kM~%%fm7A vtuٹӪݻ*-eϜ'5=dc#*|WT)KJ:z*M*7C"եAsSU{ԩޗM'͟/&tTv9"t]6mrz266kL\cTRֆ%TBtQV^Ō4:nݻ4 OjuՂS?S+U5_2)3)0d#%Iھ䠺~@מ!hVV2Cv ܽmv/G$IG^Hӗ߯Kϐ$%H? 3M>֓̓Ԡ%*Ch':d/b}w9`N$$K=]k1 $.''m.m?NK;rӻ:]3+=n7'k[lo* 3s?ufڰA~KLxj.@:唺K931 PQKKWksv1ouxbM%뼇I,=5K[8=t GUW7+rΖg.'ztpR ~ϹQ;zr͹M ջ߅E z|4|Sw]OlD 4b_ߗy>0;mCBLt1&wSR˟^=T;6ݘ1C|@DW*DENJ.30_~)IQ,71O^^ٗ;!/ϙvx0Z:>^ OJqiLC{K(vW:u2`LY.ߝԾޱԾOqiSN'e(}E`%7ZԿ/]+띗:KftR\Z*}rwz]Zao$={ /j^ԣ{$Ei̯ m3ke/c$-ie ̕_,_&ɚj~瘁J|2=]^KP4`fD޼)v *k+|`s@F~.=ۄ] raNKs`o9-dXZmbWPPx9rDqqqnNٸO9?JYG}}讑HaRXX]6TU 0Ma!U +.yw7%KN7>׿]Z{>^θ jhuLJnL%hQ2!BJI7K{rH}:#6Utԫv<=}L'V6lnvv1fdp]…z7t#kRO{RVuML:飐Q({dUU/lxx݃ű*]iG߇̏Z[fEHFFԶ执@vPiRf9[k{ zjB%HCM zF 1,R ݧz +V-PXQŠua)ŽRȑUQlaŔRHuLUȪf^EEg%orFI:/5zuYP'Ǩ2GkN}Ԟp?\B~tiWru?IjU\UݻK@]kIKHٲ}]X,{Q Kd/*֡婋([$InެdȦqFc9It\.Y;K%KgSҺyseUJ }ѧb$np.IRtғ4a e|N=+m'4SC{~Õ7 }˩d=.Q-) Su0I~']qJJ ;U/z+.= *$ĮΕ)6\e -QlHPWlaŖTlAŔWlšQbUH{, RDEKtQN\w *uUu۶[A˥ңzAcӆDD8~%:ڄ5a\QXQk(==nl ˃rK:]={Fys{^45>, [׮G:cMJ[^\e^ǘν{K)6|7ϵtU^~43vS^nJ~ܬ-PzͰ-ryr#îT̩\swLY㕤VǿBn;W}g%u}4?oaZJLod̓Mo궨:nQ o HUUo^0eokPXaҧ9!۹sݥS's>*㸯U2Vݹf8mKԻ*{h^Y1\ty$s:1ќCڵ)_NI K`269o\c+W_>\WkB*W_3f f9ĭWWr믥O>>TUTH Z5I'御u gcJ*W^4F'I*Dd7C-[eDn%b4\?8m=orp,'d3mr=]ג##ҳͨ瞆O8* uvyOUOMڧڭڣdQr5vT?#+G4x{ ߽7wqlI uƸ#?vwPi4`>?4\sݩD!(;q숚,b3=gi1LUn͛k^3_b =]X՜cDrNYؾ{Қ5\,VuR5b~PU^aQeTg}~}U}2A㫾"۠a ?{t&nR_?/w{{)_T(M 9!xM7ӕ@UQZ=ݫzf/ 6ͿmuaKߜj͖hN7yt |3:*=#?3g^ qVxdQνr)BtGlF<=;5/q浒FKSIm m`n/Lwy ߜcD2?phАkkUzup(K9:UJ$i@SCdWimot?ӛ)]>Ƶ.l]rnIsvsw7/LfcǑۥg~JӂᏤf}ւ9՚-@B `bEyf 5IIuzg]xyM:gr"ޡuŦޓՏ;+]:os%U(7Xq+YcQ|=T#C>o+qMg:Kΐ[Cs6A"I{zPncK@;e'&JI&<{m@19Z"xmpbiϒ߬Rq{$)!^5"ҵ@%Хq1re=1F])rd6̍أ~~Hա p|:&n`!e:?<~A S$EOf)Ӝo'Bǖ*] _ۑS[mnSs ,$ӿy[}ABX7XU$ $iR7)M-$NM U '}Ւ p>~)Qnjќ^c\稖Ye⟳r΀]7Np {khDao l7ճV(o?4X@Fc] d5^lmu$kHn|lG:B ~8Y4IV4c:Qc\prJ3 SEyRiM9imwլ )s<{s^eyl/e9ڌy+jÎ0$E}œB愁ry㫣WM nGYYҕӤ_ zZ#gvU}<&kbVJEM \8egKi@uuDIeicf$:ynWPɯãZ-vEWٮCp4, z(inJ7szN9=$+KvKLKOf84'lsmJ8Z_ H,S{MkZ$ =5BOA- ڳK/C=HRVZV0a P\Z+t^^@JOo&-F ?QSNզMHX2ktyo=!ZJ>222zK\p6/փ>\5J=Əc̝;W>֭[ʭ P(@{DXFҵ^.K.՜9sGyyڲez!IJMMfsߏ>HׯСC5tPBOcnw#X,u*=?x7N׿$IUUUӧny睍s޼yz"UTTo=S}TVVVGGQ\\\ C+((P||<Ѯ@)//WFF&O\s]HH&O/«}ڵko߮zH_}>5K>}Z<: BOR={tgϞΛ7OGYvhOӧ{]dd"##}vJOnݺ)44Ts~߾}JJJSPBO3F+WJ+W '_x=X70-:"mݺڸqշo_͙3G\sƎGQqqf̘͞=[gVAA}x7|'{Μ9kF=.28p@srss>@={WPnrTz9rDqqqn Hqsz9=JO @@k|3:*=07g"삂?' %I}sKAaa 'PUUUڻwbcceXn7n֯_}7M6]AA]vuq[K;Zs'PqAS/8~_D_h}K_H@ (}/ i}nPzRH3}P!!!ݻBCC[\sߔxu?X[K;Zs'PqAS/8~_D_h}[_@ (}/ i/} Ow@={v߿)vۖ>&P^_5O_h(})oZZ> ׂz/4]mю(}C_h@y=/p @*((P||9^//@Ԃ ~E_ `@_ `Q Q zj'F ]߿FTM8Dܹs/5vXӓO>&~k.vi:c5rHkn7^xtK.M;CaÆiȐ!zꩧo:nRצM~w]wi֭ӧz!7hs*++SΝU\\;N|vm*''GSjjrss5fOwӀ6gP?^u7h6M{>Sk̘1ZnC{?wS UΝ%Ieeew~般VRSS%IIII֭>F~ri)66_Ç+99Y111:GY_=t |nժU:uz%Ţe˖f߿t믿ncX,z7n^zVj9кڢ/̝;W+ 5jz?֭[+h=m222TYY>}@k˾{*99ڳgO[4hU|OFkkԨQZxۗ.]9shԨQtgj58eݻWfeddhŊwߵs}a:tVO h^HHHз~lڷo_<7)ڢ/HÇuW'slZo}0Z/kvhCouǏϞ=ߕ^zf=ܹs>l Z /wiݻ_~] .lf-nFk֒f>竾PZZj?/Bk5)_~/|/5 k/_z6i/+-{Is?@{G'*//WFF&O\s]HH&O/«}PTTTO>DÇI{_ipk׮]ھ}z!]{|d'Z/۷{ȑ#Zj }nk4i*_5 @{M?~6mڤ={HMfڸqI'*7y睧;c_uMڷoSRRZ=`3`*""Bcƌʕ+kʕ+u 'e@ۢ/}0 A_SO=OzWuњJKK/o߾usYg)''G999Zrtm3c K:|o 'ɓ'+))Iۤ+55GyDuSϞ={W6M޽{geXꫯOTTT;8}$ݮ롇r{7bh֭ۻ~z׿VnSO=UnX,-YDw׿y>S0`$I|N:$%$$k׮:sm۶}M4I7|8\c8QӘ1c7H;m**î]F JLL矯۷{|\W_}USNs}ZZ7|7|S}ѣl$%%%)55Uwyvڥl3|pKoVm`C "44Tw{ϻ'h޽Zj~a-X@{t颯J7pf͚Uq?o׆ t 'hԩ:t,ZT}Yr)͛7kرo?31cm]_յkWƏիW7&@.BxD԰atjذa*))џ' 2DSDD֬YvoY_|RRRd駟d*Hl٢Z ^~嚐ғI&+1=*)) u+4c 8fr =zF)IuEiJMM3<^7o$]tE˗瞫ғ;wj:c4d]z5jmw^SMh"7ސ$-]TUUUzꩧ4bgΝ;gv]zxW^5khǎڱc֮]+bbbXXBK.UH{;vx+BO0+++>|[ճgO1ߡڵvN8rXXƎ[ӎ^ziʔ)zg$Iot饗ێ}א!C8iΝnytׯwv?/\T\\\ͰEEE骫icff6mڤӧ9sh̙u:ﭭ{2e{9=쳚2euqۉ'jƍڸqky::uRIII 'SN9Egy͛W綐v***lW{xV:ǰޚ9s^}U=zT>.2uܹmܸQ>֭[7k׮u{s_OM:U֓O>J_}$o̙ݻwgդIԯ_zۘ~ASL'|c=9/?Sr-zj*P%31cjGO?+/GW]{z7XUk7nz)'tudvx/ܮ޽rss݂O|/lٔ9EGGkɒ% $iڵ[u9hኌնCi˖-u+%%cX8b;VO>d: :Tv>#]tEu.uغu.OJ!--M?zQ>:x4hj1'cPoY,T:lڴIGz? = 1BӦM?OO;48p@-Ҷm۴xb/[oQgV^^[hӧk޼y2dp 2DJӦMSNն.]k׮z'uV}'3gmgΜx@v]^xaӭުݻw{|M<ܪBCC͛7+44jړ[nEEEE:uj6%%%gQ~ z@{ ?ZxFZsm|hԨQZfVXQg뮻N1cF{駕4]uU[գGf-$$D222tqnӃ>q/\aaa/S2CtWkСo~>[ .}?jʕիVk"I;w֪UԷo_]tEJIIu]Rǐ 5>$>i^iVշo_| ^{2@}v 0@6lPjjjۮ^Z~vڥ={Mh4h֯_47Av]n6]~>} &[owQVTzRVVݻw+==]^zi@w߭ &|)7x l6>uE;@@ qH$}w>;@@ qz:Ц9?hw^bkO?9Zlg[bk7֠gN֎bk}H$}w>;@ӓ7--`\xw):d]`0>z@-Hi믿>x3v ;ݻwSSS{:.Q>qn'8@C&z:N"tW7pgvYfYZZ͟?\&MXzjA}f0;wLMM bbb O:uРA...QQQRTl3`oٲ|rB?U޽{ .=zYfgffZ[[XW^yZzEMMM6mrtt466]dK;;⡡:::Sljԕ_:\]]###) 00ښboTϟݻw3L''xB_2`I&]pA-z߹|($O,CW]~͛+V&BO>]9;;S![n%;wO>0nܸW_}U? ӄIG8qbfBxd2ɓLOի_d2Y|MWW`\pA>UUU9̝;ٳ555ҟ~iԩ#F͍ !zzzy~ʼ%KP!>믿`B 9s||}} !tSN7@]umBH{NofPPv;rqqQ:rH33;wBBĉr\g33M6͟?B>򻝢"L_0L4'fa266?p##;wM0aҥǎRG<44T";vr岲2jAWoߦ0aR;w,(_O:thޯT}4;`j)z=yԘW]]]LQZ[[-5557nԩS'L744(H&+W1cFE;[ZU焇?~<;;Ǐ?{!CTq??Cϋ/NJJ0`@pp0⯠Pq̘գ<+U-ܐs4;`j1 xCW3"'-ٲeKcc#}{W>f̘1F"xgyF͛ԇO?444}G;F`0_xߑ֕/^TlR{ڻw5jɒ%K,J_￿+V>zzzONN~MLLH׎󲲲ѣGSyױc>.އsC삩YKO7|#kU\\iӦv=P(Y<=J}JԼ1sy ]WWG--//f#TTT(|>]N%3gRy^{5&6ݽ}Kaa!ݽ{7UGGgʔ)}}}:G<44%""?Oѕ_!ʕ+P BH[[`0͛gmm=q}ɇv?_|䫷發@?>7:G @;U;w 8q"5o݁L|nΝ;7}߻woFFFfǎ˛?>'y˖-K.8qbppX,>|N=z1c?ƍ?۷_̌m۶3g111=ܔ)SBBB ~֭Hŝ=v-&M?~ nܸdffX_~>ℐ)SXYYݻwĈf5>BLMM,X`mmNW޹sӹ\ RRRBABڝ%W7빡Tq wA ̙s9{{}}w/_9rZ|'k׮{ᄏyf77 6(V o߾_nݺU~T9swC-9r͛E"ѥK[E8}~wGGǂӧOٳ~ m{̰M6;w_pႵc8!DGGW^d .T ܕ_!d˖-6my_Mݵkʕ+?S||;@@ qH$}w>@r3gtX,;[ĽD"5cƌz̝;w233UT@+=N=>;yy nTk gE+a;Y*a跦q X[wb}Gb嚄lX*۴^C:ʅzFOr*r;+ͣ{[jij: u,jPO鱽wV-tS+_𵫛n!`0,,,BCCGwOZXX<쳚 O-Sֿ‚_>sK,2 X,VUUUK Y,-Un 4#B K5< {8/8p;f͊JLL 9s&G9{u8u ۴ii6Ep$ 0`^]]ҿbnnvK=%%eڵ---ٳcǎuss;uԑ#G-NdEϟ'(>05]L&+-->|.!ƍt4ׯ3LOtF=66O?[NٿgtSvqqIOOOIIQ,?rHNNĉiiiӺK҄:wwwdڴi.\DWzzzka{PCڰa*++zt̘1ʷ{PuuudddYYٰa @ D,+&&1777**cܯ_qFDp lBimmݰaݻwg̘abbpTmbbrJ-n;=|Lݵ,--a*pGFA}nmm^^^[LfEEEWb:KK;vxk׮q8SԄ ݛ\TT$lvppظq._8nܸ_Y;j$#F(--mnn644<|KK˔)SEMMM%%%Ç 5Ywwcǎi"<<\uCCe˖" jq6mڣG>>aB /@dySS=@-jq_fMbb={Cy2Ǐ/!}Q7E ލv%l'!(Vl,5۫K867%M͍-% tV, 74E}Wq Q_]ɬHDEGvgMT+#Tk<]ukމQ5@ꑸ!ܕ TWz0]Ff/_q5GG]vQ w744$&& BjDZ[[₃Mw5rvzgN zϧ>3 2j$8bUUUubR%fVƍS,Qa57|:x111K,'NTH$ }BbFٷdEϟ'VnmmM)//oW.JKK۵`C?XtzjBHtt۷!.vqqIOOOIIQ,?rHNNĉcooonnVRR"/J uuuZ Cj !>|7hiiR~7C͛ <<<222!!ɓ C D"b ~7n\z ijj*,,rppX`AmAi7oZjܹK.ݻwW_}~zX|ƍv]8D***8kpp|XynnnTTS߿\TT$l˗/\088N&&&+W`e@Y5W_u? 5酄tVرctSƆf41ܹs&*^`/5WWWv8ⳛݻQ:l'K:CGmzpKU4 ivR?͒u[t $RÁ[^RMk$^1he[ƪzHew-ݷʪ[=?z?͎ZU7+H:k;z:,YzQe;Y^[ozcǖ)/jii F^`/5sk׮)kmmuvvjlZ y<^YYYcc 466*P*d2lg`` $dddǯ[Ny͛7秤BDP6/z&oذ!99yժU… !2СC}'|mqjn֭8991⤤۷8P^ޞPPP@6lݽ{֯_Cwwɓ'_r%>>#*[[[:t_?sBȾ} !L&366ѱL\\P(l7Sbccw!/P/oTbbbMM'MDY`Addɓ'_|E 4(Yf p'''1jԨߺu뭷5rrrMr;[dBzܖ;::RY;!DGG'00"49baak.BH[[[[[5 rŋk3.d2sU^%z/D"ٝD"133srrRnnP#q߱cի_uuuSC޾}w={EEV.ggggggb]XEդٝޞ={^t2vX›7Sb IDATofgg+֚Nj$ފ+ bW_}iӦfsssm!X,\.555熆>_]]֕yr=믿:99 _TTXjmmqG4P#qp8+W[lUdɒ7nBBCCw-1O*d20|>O}f0AAA^]]wUk׶[jaa*=` 6jgΜ:ujxxy֯_k.TjkkgϞ3ft_200`XUUU.mll,,,dXTIgĉ~-x׃_4XScV1cƜ9s3f7|`0VZu֭^Slllݻ1<oӦMׯwQw;|PU"0 ofϞ={}嗕vͨ7ӧ*ƌs;v+}||bqrrB =W^^^ޮ\&>J~2K/wB!CDH[]\\SRRˏ93qD3ۛ)N.JݩiiigbPswɓ-x֭4hP\\^r]P]]YVV6l0!D"ŊnFEE=v7n(H\\\8NSSSaaaUUÖ-[JKKW\iggUOO &h<::dʕ]dò2uײx*z8z{ uuuS,Qπ'rǎ<ڵkmmmgԩj55a„{&'' ###6G p&[wwRk<bYwwcǎi"<<h@U~ȑ'@w]v9܆ʽJ4ԘkTU*KrڢƕҸq%hV зz$V7 ^[KcYآZu-juӎyY>~ݺu26oޜOuÇ˩RL8l(sj9u̙"hذamݺU p8'''Q\\}k:;;NsCCCAAAbbP(ܰa >{yy;w?G}Ϟ=ŋ/NHH vS=:u*66v׮]B:@yz:thРA_~СC !K.=|0w:5G۶m5jɓNvء4rrrBBBMW__8"&  KJJh&RϏ !Ǐϗd O95K?geeuX$L&sܹʋ¼bUPK$I$+V*ZYY!kS#q߿šul6[q ub1/1L___sCC}̙۷oj677+Zj$ofš]bX$q\ssskjj |>YW^}}BH@@9s-=~xttbCĽJ2LGGf}> "((H/-[VSSSPPp_vNܛBakk#]|=bUUUubɇy!ӧO0`@ff3K}}}ǍX?))I[]@^~EUVVKn޼{!m9x111K,i,igH'J$#gggsssI&eff)&u! NtB͛WYYᑐsʕD//yUWwdEϟ'VnmmM)//oW.JKKn߾ի !t oڴ pss[`-[͛7w_vqqIOOOIIQ,?rHNNĉcooonnVRR"/J uuu}}'Nȧikk=zv6 bj r劮ەm۶m L ###N<`0H$bX+Vߎ|qիWp8ª* BLLL-[jժI&Bx qcPoԔfut>^lZ HRSSy<^EEE[[quu +͍pD"SŋٷoߖJ666t611YreW<|Lݵ,---,,#~)///***-- jqp8RTyELVQQp4 酄tVرctW]ShPc?sE K;VJIIY|yUUղeˆ B]jU "~J;gX=kqL4ʅl'KBջ 5kGY+ j,ijnlSEOXa !^RMw$^ݨ8,ޭ%hRtEVVv|)YFhwƻtyMj~Rea} s O}Ǯ&jqjiiٰaСC Bwݺu͛7o@f꽀i͚5r-[P"62,==㕕566ZYYq@{^SWa2l6300dB222׭[7oOIIoモJoVmeʔ>oiZOx999&hryyyd2!JI&رL:*q/--LNN^z~kk7m3hРiӦEFF^xQ,I4d2Ν(,,lʕcK%HѣūWV@cy8_`|ʕKgΜ9s !?~QPQQf٩XW,SUWSvΝÇGtlŒZ:zyyyyyQ_ܹCeOsNVVVVVV/IbH$r4P|~uuZjiiٶmSQ(!!Aښ~GRo:HEMMMw)//b@Z!Je2*|>S EPPPhhZ߿ѣG˗/W]gU,(SB~;qĉ'ݻG & X,VUUUK Y,-!3zɓ'ׯ_gRK5< K+WPz^^^[[U>jԨS!666ʙ4NjYdLJGD"Ԑϕ+W]zyB#Q===e2Yiiuuuy^{MqѣG{;P/Z0nܸ3fx{{3'Wy{{:u*==<88X^~ȑ'ҟ1<--]UTzСiӦBƍ7n8ŵ222,,,^yl<Ԅ/c0,q8'[WUWWGFF 6`Hbbbb B͍zׯoܸQ"p8ª*-[tt… 8MLLV\tEw/EEEuVA .P<>|ÄO>}ʔ)ph"KK;vxk׮q8SԄ ݛ\TT$lvpp^O;rӧ$*d2ӧO>}< wܡw'wi-4hPppp||ݻwB]fϞm``'YXX̟?ڋF=zpX|'O??xɍNnTw_ԇNYj;;%M͍-6XR/QǮ6DY}GhuOO@G\+4GՠC\z:Q5@m`#YcѹCןR.|'5;uwxvc}4|ggii' dtWVVheer é0L6HMn:wwvo޼9???%%E___,zdaaDx*M% ϗ'צB'O=Ajn֭8991⤤۷8P^Y>cCCCAAAbbP(TI2lÆ *1U{}}}NN666踺z{{eBaO:k׮ybֈ|@H;HDyWy-m@otIIFƄ.dfe_9CBT%J%< ug777غ䄄?'''//̬uLf@@P(,))Q7qg]95eGY3fMxfB埕V.f{:@IUNeF233LܹsyyyŪK$=D"&i``pGڎ=ZWWWϧQ_&CMT%~bhQEEpubqVVU~"` Tѣ?C+++j͊%mmm{=jkk';8V`<}…O,"Ҭ[SSC}nhhnnn;D!!!ӦMcbbo߮vǏGGG+D#w97G,+Pv=@?tT*d::^/E|>`0,,,BCC2663f 5((NMMz*B~g>s̡CzaSvgn``b:\XXXblmmv3hť'&OZZZ7NZRRRxiR+ ߰ѣg3q't8u NjYdt9>>>b899Yy !S疗+dÇսsG}D,Blll4<譅))^|Ё询G0aS~9[S̓Gə8q"c/XJ6m!deee4i!!ѣCho43/^z{4Ŷ/Cz:@&Ȅ'O:880 @ X,֊+chhlܸq...aT~{+WnkkͭzLMMmz&89B&Lp&=*5jz22.J9W. mz(.*,,y7{*6 zMRڵkEEEꫯj)0H$<ḺˇFEEy8EEE"Ȉf)pu/rΝ#G:88<6h+WveKÇ_-Z4z7|~ sPʻ4|{vjzIuHl+/R]n;4χ<{;5~5XU; @T=IZg>tzӡW:j9Cdʯ+BHmM vVϟ311Y|yO&DҫF!y<^YYYcc 466*P*d2lg`` $dddǯ[ݽ]7oOIIקJ222~jKK˗^z) `t<9Ս;?7ЂVF,_/N{t twccqݼyS$uuu[nɉ`'%%eggo߾z)Y>A{CCCAAAbbP(ܰaZ=۷㮮7nػwoCC5X hll׊q&GEElT]Rȣm寿e;vqHBȔ)S222z-zc͛?ZYYu_L' Mxԩ]vEDD =<<봶FDD GGG]~㯼kFJ+V駟fϞM 8ۃ?\!䝷.\}Y{:4-y`>hϯ$''Ν;=EĽ^ۿ1cF… /"Ҭ[SSC}nhhnnn4[hjj_ WYYl2ŚEEE %4;255t#Sw]M߽{7##㥗^KĽJ2LG,|>S EPPPhh(!<زe"##O8 /(xgץmi~mڭc,322222zPpSƦ*H ΐ!C!&gXZ6$Kְ#I_2,E//LB7o^eeGBBBNNΕ+W*++͛W]]}G,'''+/:gϟ3^`4#1ofO?H7mTSSo䄅yxx-X˖-{͛/Puy{{(9r$''gĉg777OKK+))J҄:www$ ʕ+WX|Q&9a-l@3}d>1\t,bP1iҤk׮+]dffꚛ5W]]YVV6l0!D"ŊFEE=v7n(H\\\8NSSSaaaUUÖ-[4w}[VVot8h+WjqÇҲ1P!///***-- jq?FFFFGm,--wؑ]pNL=KJ߄ ݛ\TT$lvpp|CC-[8p͛/_1bM /矏=X-5wSYY)JYdp0^HHHHHHgݏ;F) pu `0,,,BCC5ҥKeee˗/lu^֭;jhB0aZOGU&A&qK/`bX .ꫯ:\XRRRPPbUUUubR%ftkkcvwܸq%III hjnl>z"۔St*ԒguoN-8N} QL >)׾>ĘZk6WP/8? uYkOCtߑ o'b`]{z_XCT;I|Gymw~W~lWO>+䘙ёA 4'uԨQժ2KD9wj㞐=|#W\2&&_LJJ1bDii̙3]]]g͚kyvEQONKK+,,dNNNo'߅:;;s\Bș3g8N0۷oOMM=q!>|X$rOOψz5P.q~ddki)GW;;+V0ŋX$Bϝ;w^˳+Rƍ<(}}}'NV;Y__/BBB~]vu7|3##cĉfͪݶm˗{Bላ.;va>ˆ+CW?xyyytӔ)S<==ccc.]+ivsrrZ,x…/r߾}7o'NGTn޼9555;;{Ĉ.66VT~V/^zC<ּf ̂EVBVzؑ#ǎ|{໾ew:ݦ{vvn͛=Z%%%X=,,,000%%E\ZrޞAFYUU޳f)o/ǟ ג#.ˈ+B̙37o;v,!dСgϞh4,"ryoigrynZlٔ)STVWՆ_~'h4;99;N8Lߊ%IFq9;9?lNxQZ̙3fںukn߾7}c:DGƍ7zD"QLL~d̘1 6m2L\timm-!̙3+W}{ONFjE"=====~`0ϟu...nsx?>}vpp?~m2dԟp <痗*ɲ|iLO&MZl1cFss_ݝC@`Ī2mR([Nsww/++ks阴m۶edd8}EUj5`2EEEZzӨid\ظi&PCG>[o=TKBMRbcc[7]zkPq \]]Y,-!AI/2FE-^>DjSSӒO?4((HTN]'NǏ=`1666εZmLLT*0a!&!!AWk`9gN 0vڀR4H /$''ǭW?CdǎB`dggb>{n;;;BHrrrTTTs322nݪV]\\˅BΝ;47nرcǠAy<͛7ϟ|󌎎677SI%z#GRJJJTTT|||{9ٳl6ɓ{:iÇ?qℷ'] ٳ'...--֭[%444""5ѣXHmjj1c 5}ّ#G]&=<>`ҥ˫%DFFv=N*~G... #77رc.]Ϭt= orrrlb߿yf.;uTBȕ+W6nܸ}v]fDNyCBBy7ndffaÆEGGϟ?X… _~}6oެ N8QRܼysjjjvv# <1={|Bx㍣G=tB0pTBHhhhJJJSSSzz^[[+ª=---)))<<SRRۗΚ5ĥܿtN3gNjjjMMMWOO]q155Tzٳg\yZ7-[lʔ)*uJZ6p'''V{ɓ -]ӧO8qncϴbggg:RΟ?O4p*`ee:JR.G4G#Ѕ;ٱpWTb800ɺ%%رc ݯaiiIsBH2ԩS1cx :*?#BHpp0wQOjE1OOOOOO_3 ?쳆dXb׮]oFHHFz*&莎j 6oLx|>VL=<(O|…ӧODDDGMJJ 0|ooo &j:$JONG.^_ B6w\.ۓ'RG}Xa;v숉IHH #;;[,5k>nݺn:欬rPdے%K}{/44:11{…sr00' =ju\\\ZZZqqFqqq3fLDDnZyrrrTT!7VTTƊD"XljjuTvO%]q߶m|@vpp`eeeW\ yo Bo)˔8 ڌz.:8u2isr Sӄʪ>;G2!ĊG+:Ċ'-SK2rʨoB |1koK kؐy|z+͟|fso鏸7<#+t_<<➐͛7WUUI$RTeuܹK` 3)ڵkכo Z[[o۶vݺu5kV/$N>VXX(ɜu+o?߅:;;k59s Z}'Np8+W|#r<&&֭[MMM>>>˗/o%178N7hk<ƍ=ARƍ<(}}}'NV;Y__/BBB~]vuJrYYYAZqs <8((?x뭷JKK~zcwRYRR[HQ} [.hnۿNNN/\_۷oͺĉ(͛?& IDAT7fggj)Xw566_v-//OLXX؆ :]S^qg0UUU[RiAAqz4N%%%X=,,,000%%}\.='//#-]tʕIII[Ϝ9caa1{lzs/_dMgEZJ7i4+WR|O}gϞrkݴlٲH'OjÏhbblٲ5k螖hݻ<&Bk!dͩ|Annɓ'G+BD"E"󝝝u=<} ѣG mlluj111Rt„ 2}DzS,߼y3((40|KBBٳY,;ok~g}Z=0dǎB`dggb>{n:䨨NgddlݺUV477gee ;w_"""tARe˖9s昛?^PDGGt|摑x'zQ}}}aa{ RTЩ:qs)ڵkכo Z[[o۶vݺux@ سgO\\\ZZڭ[4KhhhDD3jѣGױ"(;;9""bƌ]rnw2gҥV0⊻D"ihhh]+J+++>_\\>prtz9Jɩ+\.Yt1M萡Se yqqqUU}VTZPP0wܞNzzo$Fy%V)i@+w?ޅ;U̯e{Äzpg_AYWwD9zr?!IbW.;O﨟p4_?߶-BmkAvڏ[O}L]Vt&xuGZWXV$Ye.ut]D:ȡwu7 Sy~mb+L- X蠉 B, ?H}kfNBDMGIr_9`aocZ4I{%%+˭f2?6s6oCoS5im/r SͬZ>NZU%Vu[FiGlٹí]neK~.px&tt'f6W׉>̯7T{oQkſn}m~s{|3 Ңu_1.݇:/zŝ,_UVhV\IQ{>A0> 77ɓ#G|WB!EQ"h"hɺ@wQ3D֭kzɓ'OG֮]gϞnĨ=44Ԩ b\.# K.^CoN5Duuu:=P755=ztܹNNN Z3bL *Gojj w9pf̘hѢN1p8rȑ#Gz-Znkk;{~~ѣGᇬ,:bBCC/^8bĈrao߾#G$%%ћL&sʔ)-Z`x @5k! cĉ-ZpsӧOd2''p333Ù3g8N0ž۷oOMM=qùz0tӧ>\._tBx'W\ٺ5&&֭[MMM>>>˗/7| ЁΗ6l؋/ /l.J7nxAX;qDZ}ر. 8f̘z֭[۷oZLIIQ(k׮QߤV7nxܹoVii! tg_utxH$Zr5kΝlٲYfq8x999Vz't .|ۼyN2%,,L)J;& ǎʕ+ #(((11Ν;k׮ ۰aC\\ի;M;t+ %%%{ T(Ǐꩧ׬Yڝ%%%W턐Oٴi>z~1--mԨQs!\rE̙3gϦ7eL/@:*vԻwnٲeUUU?~VwTsiÇ߶m_zj>OH$3fpuu}7~wÏZ\\leeպ7|xyyy"W\aX'Of2&Lhhh}6TYYhZΎ";>'N4iҤI{=Çb_|_xzzw:JmY"٥Riff? &MpիW!rbaa?I7u}Ң#cƌ31Cf̙sDϜ9bV(JJIi|bb"EQiRR=36ct zf_xuTYXX_^^fL&̍RUUE&}+jyyy\\---u㭭 !#Л}g͚5k,H upvvww}v]]ݠAZ4޽{ŊelZzZ`0:lO.%H$"ƶv[XXlTT[[K@S=Veڴi7n܈}[4]zBOIwss#(ՕbupګW:A7򫯾[T*.]T*\P(̤(J7&##zzzvzRcbykO>q~ѣIIItmccjcbbR :8D]]ݮ]T*Ւ%KobLߍN4I.+O>.11n7o 233/@;!d;v숉IHH #;;[,5k}LLL֯_uu477gee B]EN~:=o^єdgg755=3cS 'L`ii"_իAAAϟ/sssϟ?obb3rFc}V ={ťݺuKѸFDDx<]ѣGױ"(;;9""bƌ-&߼y͛k>?zӧ7dBCC[g?hР7n477nݺe2ҥK]\\ 9#.}:g;!f/X`wsppXzuu+<{lb2nW_xtg_w0J_^qG.\A!C|*pP$,,,,,AQT_0EGG*LT9B0\JJJTTT|||{ppΝ(P(~`Glllljjx0hJB-t_h<mvTO>gu.Ȫ! }<>ӣG*/~Ĉ}<>#GٳX@pP9zEQUUU6 XAoilllnnxVVV} < ƺwN>(LP( w~;@?@pP( w~;@?@pP(ܡ̝;wĈ=5Z||L7dԩ|󍍍nߵk=zrJ:w;Cӄo>|8!!a555sѭ+WtttO77^{6lݺi&L?kE=oGxx0(zKsssmmS_'](ܜ pP( w~;@?@pP( w~;@?`uN+KKʠ(AfEGG߸qcС}D^\q(L:522>Aq3gD <䓳fb0t\.uVSS˝t;v8p駟677ccc&Zqs <8((?x뭷JKKP7I-iu"`w=##ԩST\̭ ! c:0*)JMMrO3;YBn}3a߿?=M}m`*ŋoE֭[w)z1JFcoo!D,wϫ dB(BZ)L])k k~3=&99}KJJ^}U\N߅ޔi&''Bc'i ^Xɭ 9zRV eeo5晹یw&3|DMph4ʝ;w9jwqܹ3gϟiU_II/3f,_{k-E|TAZW[Ѣc33e-9&OyOw`[pxj'0̧~ƍYYYF]M[[[kk.ϟꉓ&g-o) EUVu댪 b<kcx51Uӓ|>RSScaafҿn^[[KN~0 @?3&-PhJFU\?[x'[*hEtSk]NRתdjj"mTԔ|`0x,b`0ʚA}pVĀ+ܧMv…S'''K嗹ϟ711=;0/K}C_ Eu\V)*B(BJVժ Y!0 e\,l8 57`10BT\wݹs+<<|-Zo߾-|||.]JϢ~k{###{46e?dު,ʩ[ WT IDAT)j `⚎s}kt3.^X?lHޚBʬ*KgVޓVHz\Qu ALp[LQO{_iQo{?]`8ϘoVR+nVgT7HŲze_3DA++dO~^BG` pO;t)<8y~~]υwϯ75jRTR^Uoӽ* \C#GO”J:nK*4Z% !Dya,z8Xh#p0a Fqʚ2eٳguqT:w+WL4w| 9=EdKŷ*s+̚2ZMQT+L2P 6i3=m0+{+>>ѭ9-|GR411Qj'|~aBBBד6kk|TZ]iy^}uY"kȒݪ(kyB(VSSoBLcfyX?5zHK1}ψ=))ǧu[JJJ%-&U?XWߨ*ʪ.+kQ4Ie2J7ӆ"Dժ&5UmK9d+|oYf}qN0Q(JVd|&V511z$ehPZ]UY]"Ȫ*/jSURV\lΫNoJc2x8Bȑ# !YYYǏR6*i)JVVe5)wT 0L'3+7+;?{)΃G۹y8mxh2cǎ۷o۶myyyyyytn˖-Z2W g15.I*A%JK]WTj&-djen}Un} !&>i?΂buyhq%}7_{5Ht]3lذCxp@8L-|{T67˹yLTP_YhVi4UZBT[PEGex&J@ʵ=?????7|/uPzSQKʒ+e97+r몚JuRְBA8L6)ol?[FwݻwիW%ҥK`Cv3eV3}f<FUPWQ]|;)ZP+5j^ZmBptB,s.z$BxdQ߹s_h4ׯ7o^pp0_~{キd__6 sjVUߪ,9NfM\VS+e5et`pl[0kA)΃ p0/Q'J?op[o%-[t@L& r4h)JՔ5ݮ*ZQUZXۨVj)n/ULU.! an# <ݝͭ|xFW\133[zu{n=t`Xl/+;/+y)NQ,fTRYUR-oU|xNNFi]+Pأ@`1nn KiTS%wjqYMWh4UUB l&ӄn-q:y{76epk [K./ aaD^UUfZ{z(ojryLḼ[|||/_#%dsu?QȒ狳nU55U ^ʓoBa0bA 7y @_p?q믿یkݭãe[\\.V7nXXX8ydKKķzk...l}<#g{EZM\KIVRy~N$Yۃ("Wj%.bۘyBkǏs3CI.bn͞QQQ=Z/[]jժ'xaÆ8z@La6(4ZI~LV_)=1JKiejQIW[y d08Lk3{}+>!X/^`_Lm2EٳãL?~ ٳgӛ|/hff֝yv=kWxN$(JiνzEjv+ШT!˻S͝kF0q`+8Wpg2<ޟ7͛7oر~㹹_|ņ tAFs޽3V8MììkF"jFE/%{ufF(B(BHJqfE?4p]ͭ}]ػٻ:x\<Ԍ9<9rdՎJFcoo#b>VdNtlR+r3Kɨ*SS&-mPʳlnBa qxgJ0[J/BYRP|駁3fh$ ![ޔi;wzPحVټ熏{n8-EU6׋e iUE4*ڿyPezUWCa1Sƍ۲e ͖J .JH$@ l lT) K,ՈM *JCQ>>>>>3g888<3?`BX,yfPP{wZd0켬f=Ԩ3kRޮ*Rh4-v(Jnҿ&Be-9g8^ᎃQKG@X"DKD_,FX$ii$ lQ~(_ޗYWS.-8L soBq:H bccݛcddKUa}{z02%!#Q!RQ/Ǘf'uejXeJK$b3̭X;r u5PIM;1f}رC Q-jW_f]o9zÇBwdd$mZ@A%Yt\sW%آiռƚ*QX!UGxLYE!BwBd1d[ЛaMzm!>L@sonddlٲVUVK˿FF-,8::ZBB֭[ϝ;\Mww؁"Fl#vޭ@/ini+E +-ʭ(j"BV"-^OAQHd;cqޞ,;#sOsksx+\5k֨/._+!3;[5/jWS/5 2j\ȯ:zW9#1eOunjjnŦx8@cߧ}R<"wݒL\Y+$gE* hdn@+Gg ]~ƍ'O)ʷfw.j Ȕ}-l @HI:~*4W/)?kpe y y!!F66ٌwdeaK%9ZO̙31iѶMs%UhРK. ##.a0q4 uVϩJ(ύ)L{R^()NDJI01 hAV.v˚=_~yܹ OW'0WSKWS:(:ɅE ժ=,C)0Ĵq6zs8 A6z-woo[._͛ `2o扎]t/}* ^\_\"lP(D yzmEzmBBBtg$͞iއiia<hK=66O?E=`@}~! Yu巋3o J 8B"IElH^?9cdJ` ޽{ /^lii A `g3߱4=PTUWQK*yZQYW)S(7NWWu9?(CȐBu2a{LԪ+ZݔlvLL < @3-!a) rQj^/'ʤ28BBeuna)L'+ҁclƦCV^{cc#1bDrd$Jƚ5ugE9:BA8 X$eŕe`#c)~_)?z fgddd2*^ @hdWX-,N)LK*J=!rKo#00D"ʛe>`< yZH[.^>۽{7 1,F]R2jV7V7VDrRWW*qeD__pD"abc߹C-s1@i1]& :t޽7o6l[_w^ @!3k3&UEO Wԉ rX.WJ2iZ5/uu48XX3L>c Y[1۷-[+z/<:Y|Iv^")oZH,C#}Om#_],!!a֭Νk.w܏;z]`z5ʦN*=YOkx [K2'h ^$R$̋h8dw~@-yu\=350\=b%g֖?((N*L*e8R5bAX/= B0cfg req}Y42L2ZJeaa!cHinin_r4^YfBT(+*^/KŹ UgsBafH2(T ʹ>ytfCց{LLƍ% B,((h˖-~~~P=eq$8.K+ c_.mJJr%ޠ4$ƴD 0)clh4ҟm:]|~ʊB^pҥKvZbE $ 3|Yv,>ÈD\SWVK*_S!+2B5<\^!l6dV)9B3P-&}̬,-؆LxQ=Ϟ=?P(իWg, !TSSk׮իW8pVURčp@t\V%3!,/Hg.KUU(4 I/dǂKZMȑ#m۶^ZhnnessUV @+@+U\ 뒫Jrn AGb<2R~ʎCL"S19.˶s܊:dT*e2\.777W5ԽSNeffVVVچ+?s@=|ڶ=k@ 5eեezih^($)ǜ3˂!`@< k]\\*++RNTZZnݺs8'O7nҥ!\jժaÆ1̸8ǣ\n;j;b.NiRQ& \12ٜfhA711ПeP :N0 :?h $$DN֭8p Bh[l sqq]x B!!!k֬9{,ַg-P IDAT.lS>U8dIZ]*RH_KraC!G3 ($)͈0MulE%;1ΥE>3g,]_~Ԟq|ɒ%JrĉPCKLL"vD2eʓ'O\\\.]dllFӧO~n߾`ўRChh;BHTKeqY* ˄ 2\+q\PT +UW ^`2 Q4!d8=g_ f@O)pOJJR__xSSS,YxFFƏ?(-+eMnWbavmŝL"0S Ȥ/;ɷێmDS>zΪF?._뛟266V@,b"nZ .l߾]=pK0)~l;?*QTVK _^-HMH&G8D3B$(gANC8NLt;[nuZ=:_yy>}vZiog-9($!_1H)g̚^be_(GWl"euR"#Q Y#lDv`1`@իW9aXddԩSd2B !بXhZwy[=:k:*lo²7a! »%Y/e/kJB8丢N"J'AQ1 ncdefgagcĤT6t"-śSׯ4?&)JMMzZy{֪'7Ihi:dP6.rQ=O}:|pb"Ӏ>mL"$"4yeAnCuIcX.'VIjRXY|CaQd͘0ucۍ: ޚ1k֬gϞ}':UG),,rrr:p@UÇ ݻwoNNQll,N1c=k}!Ā``@Q~ %_VX#Jq ELXD!ל3ީL-@k`:ܒIѪGmݺ.\8i$X,>zhRRP(rݰ=k}v##e˖}@W RJEB42ft-Ԟibb4wV.nzܹ2&pG<<++U'j%~2r~jfmYL*VJes1 aD%T5d[k%LI zwMihhH&tU z3ΟmjvyPLYS~67vq&OT/Un&8‘RPJ ˜K;?c[7j z-yyyO>utt400I٘Tzc*E_S_U/˔ ylpKŷKod#H&T1dqkfXҙ]":={ޚ^YYyQDZס66'ʥo'y:]y#%BbL,U܆ʛ! !F219X+bAXD!){Dg?wsk`:Mˁ{rr͛>|hnnN$nڴiӦM g*ځ5)# -m>Hr+ Sk2BmJ)U)UŻob sgw9؈ cBϝ;W" pnܸ!fH$͛5kJ#BVP[P_U# ,A!6FT4nll\dɾ}T=w&H!OO۷ϙ3g߾}31Lw eJŋT~YIcT?l/na15c[g14.]B]V= UIBQEJ"gg۫Rpu7J2RJj3ky R (9Ȯ2BC aT j4ƛu4W&4EEED>n8D##!12>R+k%/UU5* +~I DU a4pDG_?=|/)pvvvM}ԨQ8+z]W' c$݈EwbBT.H)K-ϯID&DJKW_\0DFds:}o܆I|ɩP=%/////f7|VV |7>,oz/k k$(qɑR$8dC"CD2P㜼p=\,g'==ƍcǎ%RBF^޽{wX=z86xz\|^YyEQf-BX+g.W*ꤊo"(f00arqLM 4Ͽt󟘘zUnժUC ЫPHNЫ2ܺz"^ D ?qDP["Y!d}f{ kk}ڴi JHHpvvvuu-((H$ӦM#2ddd{ܲ+E )UU†gu ! WJD$~ LPM }-lGڹy80YV̮ixBmɓ't:}Ȑ!L?gggtRUB0k5$ދH+)?r)ɪ52"W+RET\Xs0rR sV!v޾l[ HѪ{ɸv?ݣ-[~jiEAV]jދ^cD 3FD.+ו nd a&4CO3kKsYW47-(;D̨)ZQP/˨)%Y)^'>*{CX,HU}w ۩ފM7fssg R[)eڂA}LP]GR\gn" (d D1c=\LX=luCPdee 4D"뗐ۅu#,t'V jy rbB8"B. YTW_F%h!jehlǴ`^朩&TlnTVV* 6bB<O=p/..NOOW&:Ȓd0G3b^+*fUW*qs*R+D /ˈ ?cƌl{ĉ'Ntt @wr5ԇvvGMJJ ޑ>5미8ߝW ׯG}駭9hg޻w>꺀KuA[r]WWXΝ{a֭[ p]tV666666Q\t˫3?*!;;;OOϮ._ 4/hpsʕ >\=;zw*; p@@=;zw p@P@0 ai84K4҄#:,W :, hvZK@M6v;:, ;zw p:piuNNN.]j}!C8p`;s___j}=ONNa .l.ùs0 G[p?tȑ#l6Nwvv^`AFFF}I:|uROBK߼yX,ng;H׮?z\~СÇ0 77e˖2t7;܁΄ZjժU+VvڤI֯_ y)STVVj[H>m[š-6lXdd/ҿ{vu@ ƎYfnذ!<Yy<޴iӶl2lذ7sN+?H=g^^ٳgwޭm!m)I&lܸqd2HOOOXlܹs@)S,]T(ƌsÇwЇڥïɥ=tXTG;~xll_~uVUgς>GuZMLwQlll=rըtcׯUaB޽{VVVy}}?Q(4ЄEB^:S(=?'ONHHZR: p̜93!!!??H)--7o?rȐիW߹s?}6j5?z$Dra_>}zӦMKNN>rHFFݻw:]:4{{EEENNNׯ__fMjjPT5cƌGNq;v偁oͬUBeptthD0SS?S?EFFV˽{vرfUᕕ۶m//DB4LXΝ;Y]]}ڵcjް1::zG!RϟS]]M: Ԥ]:4{륻aSO IDAT~9s&BHPDDD8qbҥ Ҷnk'Oܸq8@цCϬ^u` Xy^`CZ|9111onH"V\Z q\?3#bX,"_paƍ|ߚ=|pѢEe!ݻw oo_oԶӁfܪsMQ(:F{ܹ8ᅬ=ٹ5B"-[^8L^bjOPKp&X1]C!V^￷!L0,..H9vXYYj5AoնkV_!&і;v Μ9ӶҺ9sdgg'&&~O>]n˷~$35gqwi:GELO⒑Ν;ǎc:QADX555ߴiӬY bee>hO9s}iYYY!OOOMZff&B( @=qoԶӁfDe(**\\~yÇ B[/\.0V{{{biK=(ܨ(cc7444ܽ{wAAG}tyL\m.m?mO}O>m+;0ʕ/^_z@՚~#*:173qfٲen[K{י>t~mB0*:n8U}B """TSW?jT*gϞ]XXgeeՆvwwG5y`Oz+BٳgŋN V\駟~o>ظ~zcc㈈"BL6رcuuuD8~\3RdR<꼼EuOFFFW^-++S ?B?`*,ssbͻ5zי>2TTTK$o!`0&LO?T*gΜ9gΜ㊊$*---11}ݺu/_>x`suu;tj>ׯ'%%nhԨQ,k555DJaa١;}'7o޴ia9jԨ͛7!H˒ͥi>[, #33S"/_0`}E4rH'UZviuihz!*}ڵɓ'nk7oVD"ٲe L UϬjYaR:Gp̙3gsr<===>>R65**jذa#G>}:źxbjj-[: V+ڵk„ gusssww***|||RRR~7kk묬'Ok)66vd2ԩSt&5 H;vO~L&s۶m}Q@@id2/։:{3 rymܸСCIJJzROD9AAAtX,3Ro|Yf۷А1p@ק_r2<<\WKOC[.g>clllBB¬YN-ڹsgIIɆ N:`ccSUUSQQs7|xkإ@g\rowyg'NTeJJJZf͍7<<<~v 1eʔcǎ_I$ҥK>_F 2ϛ ܉q̙vÇ߽{w{߿tt*E-Zf׮]_dɜ9s lGtz/p̓:uR_ڵkߜD"͘1cϞ=S~PZgb돥/ NZd !!! h4ڕ+W6lp֭&,,lݺuztkW? mAotO:5((o={l>}vءʬmQݖم wG >|Fz3ųX.muwn^HD]mF>|Q?RIǦ͔l҄#:,W.ao|Fw[ײtXژD"Qmm-5kWkڂa؊+ָvnfBܥ-긯36p݋w^tt#]rssUM qa[jfu@?~|̙aBǎ=ztV Z. 融͝;wsssmmm=<.w:+1M5οv믝qnw)|u0;zf@wx8&t.74]:,WaiE]b6yK;zw p:pM4˫C8p`?%''ð C}#ZG!!!Zm;;w\S`o޼Y,wuڮ7BH.:th666 mٲeEEEVO[āb6A ׯ_D[[7|CYZZ$>/w7ѤmVnw@;V/quu7lؐp…XS*&L4hPddAZZڏ??߹sGwg p:3}ŋaüg޽sN;KdjxwMض>jVzwٔ)S.]ZDcƌxb||ûbm㱱_~֭[UϞ= =zԅukMts3ry'P(pkN-B%&&6Y% uA-5zRR'L<9!!wQt'uΛ7͍d<ɓD1co2''"111qҤINNNf*,,T/*+++<<UQ&M r劽=J1cFqqǏ7|ܱc[k( v޽qF&ITnj^ʕ+1 kB>QuěmT#Э[BBB\϶m śJ"Q7n;qIRPXXXXXX~~~PP!--mʔ)ׯ_W\xki͝!Hi&///pŠWS*&{IOO755%vC;!tѣGo֭De.SMn23dff6 ٳg]vmܹ)))QQQQQQǎۿǽB7nܘ8q"Ù;wR@MJp:#>}zӦMKNN>rHFFݻw5jG݌3=:y L:uĈD kiizʥW?͝rСŋ͟?_$}oQ&9܁|G}zMYYBhŊd2ٳg\.!iӦs~wLHH@5QTX88ӧ֭sqqoQ;~;ݻ_|Xoo_oNO&3tYe222p߹sΝ;lj23>^}8x3'===srr-d>}fgg7WZϠyibbRSS[{zzz9s{mk؆?YerU ^(MPPK{C4ͧݻ?3''}6w RVYw\R"_~>lذRԦN!F0@Gnۂt8yٲenUoMH$!2\iz_G4|o>EPOy[GIkDEE}G[fMCCCM<4::;acddxndꥵx:/]tڴi.\uօ >o߾M2#g9ضmP(ܲeNC|DFFzbE4^M%IB """~gUbkT*g.,,jG\]]B)))Ģ{C\]]p… *={|駇^pasT[}gddtղ2@ܴfXmixŋ%2@cAc0&L駟TR9s9s Tوo''~Au5''gӦMď:wСz"IIIm.011? V*k׮EM<!`0233Boo#Ƣ"D!bnݺ˗/bD9r$BJjXu͞=;>>~꧃D"ٲe L mOs5bm߾X[XXxA[LiE n5lذ#GN>b]x155u˖-$hƻv0aBXXݻO:zj#H;vO~yTٳϟ?0k,"( {nhhYJKKghh(Ɉ 4\W Vkw}WQQ㓒oY[[gee>>ׯOIIMOOr劥exxUsZ;wlذԩS666UUU111;w$nm۶>( `ڴi2_~W͢2zMtspt//߸qc?hʔ)ǎ駟B'O{ˡC~?|O>:޽keew޴h'''6u~-++kϞ=Rtǎ_|ڵk,Yy׫6l6HK.3_ݼy3ǻ#G,--?J#zKJJӧOkU+_PPГ'O|͛K,VemNKҺÇ;::WT*W\Ѷ( VXWs6HT[[pR]5o>kg (w4= 33SQ2tx:tbg~ֵtuBvPG` 荢ݻ=b@k׮O!;ӯ_/:LA7A pgΜ9q?);vl]Z)yP_{CjkA7A p͝;wsssmmm=<<<+yDDDu[=tx7tƸ00ƽ1DOwYep=w p@@=;zw{\SW7. JK5Ĵ@m,TԢ^,ֱ( ֎g AA΀*PSlS@cJ$r)c vB@?;kֳx qx qx qx` J;ɜ5k@{)))_F윟?P$š5kwpTTT$''pMvZqf]3tHjf8!d!6>p'<eB0ϭY=7OtC5(044Xn6ڰ'^Xpg0g/]bKv:qsڶnec>> رc2eQUU%vA:Fm۶%KL<ٳ޽{\O Ɵd?=$555---!!Ahا'!!R*z{{Ӝ~v~?///66J'ט.E"P(4 !|>BTt,Ɍ \v&MzgfT*Egc6q?y$\jU]111qqqj:hhN{U<~~~ݫ3'p~٥2MMMp!cjuqq1Փtn6maԩS !rLJ8Z|>fvjC",Zח݄I&6R]z'N [s]l]hNLܵZN3\bD"H$6pqq\f >j_5l8sO3Ѧ IDATLY,nnnwJfTQ 0٥2ؤ̢"`0R\.g[l?C|||bb֭[\nWWWuuuss3Nj9::;88sr00 v=&//O,755r܅ FEEX,Cyyyrr2S[[[srrjjjr 8@  3]}С*J5o޼krALIIqttʙ"999??c(4iSVy 5jIF@ L裏Z(DSaI6FpX>P#1b?_&°PGhֳGugh՟Q ΐANܬ[mOu:]AAX,nhhPT|>_(K4Rd29NPPP(d2Œ۷+?޽277ްgyr0]PٳG*r\QWWwرO2Eˋ訪ʒd;v`ޞ($`-c9qOOOdFKJJRSS􍁁}zzz*++Rɷ\.?tݻw?ޘMbH$ZzQpHTQQT*=dFDDdk׮OܕJ͛JPCc&'Od2V+&&&88XV8*ohCioo?z2f&c]ıjIFNill4_~… -],¢?Cc3qWr_^^NmwttH$EZ=6XbMn`,Vt66to/%H$ `0\\\"##׬Y39::r\[[ƒX,6^JU]]f=<<3*"""""°(XL87oƋX,޵kŋieggGPU0 _Xff@VUVVF9Ԍ3!F: k]`R~/|G^HH_AAAnna{vvH$_19??ڵkFVP(6@[ܹs֭T]+V_"dee}7ԪQ"666)))331 T*l-[uV???.U]]񢣣( NCt^}N ̝;w͇>#G֮];њM⦦^.p¨(u˓\ښSSS#'LpAXXX7o^n]TT SRR,8A***`FSO_? /By}766Μ9sҥgΜN w4q7c+W\.BΞ=;eʔ%KBg͚U[[;p_f,I{̙3힞D_(d28pNbb+%BΕ Fx~'7 P`Q#bUpg0(˞01F"8B@;Ɓ[޷6N<7ɡ J9eG:ڶnef$3gάvpp(++wޓO>Iv횛ې:@,744T*WWW>/ 'NHuָd29NPPP(d2Œ۷- {\{{{BH[[[VVL&u gPΌrٲewލH$;v <ӄNGuuuѯ>2 Ŷm۾ \h;믿'|3++?6k7o{eee+W$dddㄐ褤gyJ߸s8~_3gʕ+S...999/ -!E"P(4+**J@2̈B%Hbcc# !_5Caf͚u1FH4 W>>F_ß8q>3l>uux"X*sGyd VaH$f0...Ғ~ym6jڨ ǙcD0~8pW^tqX,6^JU]]f=<<3Cqԩ cڵFcr"QYerH܏?v}͞={Ş}V rUUUJhX,޻w ivvvd U5 o}'6m4u~Gsss3*xFE*XH_u6uf_'qΝY~Ѯ2BE͘1dخܨ5kDGG[ Յ8;;GEE۳E"?1^^^[)i#G(e˖Q\K/dS0'q׆/YTT T*l6{˖-qppOLLܺuꪮnnnx덍mmmF=c<1M*F'z{{\… X,ա<99ũ999555r|„ G Q ܩq=pÆ +V01rJJc\\EcAEEErrr~~@Nܯ\g]O?MIIYz9s8MV:=z/##eǎj03K.+&>>*Bg0!sq瞞2Z;L>=&&&%%嫯vcߝ;mӊ3#<ﲝ7xK;W.O\"X|8/ugPMmX3ghg3W | >1đjj&:'Nw]|||ee3NWPP T*+ [Gdr8 Pd2 !۷o0ݕ˗/=zQzxx^DN83fCFlllO~5f bϞ=R0cǎ߿ʔ)UUUYYY2lǎO0裏|b0ϝ;W&ݻ'x 3q߱cGNN[oֶn:BN}#G}]>`4ɓ'LU V&mFCs:Fe۷o2 Qό}ƌGYvG}GB⋌ BLMMd455q8Åz&UTO1isΙ3g^ꫯ~  [::YEs"H !+Vdve2F={?k׮ٳgSPr_^^NmwttH$EYg֭7F{bqJJa σ7na^NqqqIKK#R)zWZN_"H$j`DFFYƲ7m^UUUTTouttr-0dF駟nݺUVq^zCii(IY,nnnwJf%FghѢE˗O4ɓ>aW__'3ʼ۩Fj:%%+--uvvjlC~~KLj]v]xPvvvd U5 }B0B̎gFr}(OH`Rvo߾}NNN~ڲ@SQ o[ ; wmgFxe;U_(N8@q7==]&U)))IMMMKKKHH7IHHJjx*{&''O>dX"X,BQp>_QQT*:dR DNO=<< "3s]rE._4VtI&jժbbbjél5?~n֭ȌޯJkvPMMMp!ghhi:VVS=͚ѣ7n|Ǧk !Ǐ_v}fϞxbOOO}mO?ԪYHVr7*//ooo;::$IKKˢE|}}Oz޽}0N:e!C@J_u6t_$qj:bD"Hmie_~ݻw7ol[gg7 [&{L6uȌ/8b~Tj6AU狊>AI7,hC9`Nk _V^UUT*fbx޽6l'ّ.Th4 ƦdWP\7?s̙3gv+/.t},+cv@ 8w\NNN{BiƌƠ vNW__fkk`_~pǝ]]]-? B{WWWzzzEEL&7o{RRRRPPoD+xyy9;;oj9P(-[F7o޼y *,,tqqys>0>}/otRNNNjj#Gؤ̢"`0R\.g[l?C|||bb֭[\nWWWuuuss3NjS0; iӦG}޾gϞe?Ӈ5\si4-(|d/L}:̲F-0jjiFWSSsСwb$ bqCCJruuBP_8&pB!$fddl߾= hݻwWVV۫իW]3qF-3z+77w͛6m:u*!IIIoְi BgTr}|| F]]ݱcJKK?e}O___},LfxAt?#FY,fFm۶;vرf08lqZ"==]&z,))IMMMKKKHH7IHHJ4儐w c̸8{UTTO2SL D|0hX,BQp>_QQT*:dFDDB]FF*qp8 0 S߸{ 88xF>_\\L:ucɓ'LU V&k43r&b~ᇻwzxx̙32'|򉝝K,_i{jjjp8.14qZ81͈\.g07nZ̙;ms .tvuD0J=<<4O?O?0̀@ /^btSVr>Oyy9X!HZZZ-ZdV\ӳze˖ږ~W{ݿ?w)))>>41c!itz777[[w}Yƍwww 0d7Bߞ:uԩS7oޤY,VXXXDDO?= Z($$9**Jߞ-Wrvv`U9rDP,[psɓ']B`wV{9*YgϞM-~ꩧ9NK&%%effx<!Jr9޲e qnry<^tt4gƍy\\\HHHoooyyyssoiX2 NCRb}DGGG@@f -hbqSSSoo/]paTTeyyyrr2S[[[srrjjjr 8@  3\~󟵵fZf 4ȔGGǸ)<***`*qW7o޳>d2Mi]z֭7o655ݺuz]ָ={*XYYyѣGBϟ|'|rt.miӦh;83.jqO\ڹrQ !oQ@=iN1QtƱx( y:YAk;Qo;vsǡ~J#'gwПs Xeޡo}YٍgīCh֊?Y1Cot 7/:8ҹYm[mDS{PPPPPPbb;wN>M%/_|g}d2/_| Ռ*:@,744T*WWW>/ 'NHuָd29NPPP(fddl߾= hݻwWVVo[XX7ߴL>矏 === Rۛt/^_|ŗ_~jlcʕTewS?2̺1YX,DW6.***JS2̈Lv57|3a666R ; ]H$}m*!~š`xɓ'LU V&dj4tJ>oX.K`(L%JR$QzyyJﲱYpahhhhhhHH(,/p yzzzzz8VVS=iNw-Z=mڴӧO:uᄆ>8TkZÖ P߬/]ycZ|>fra;::$IKKˢE|}}iE7߄TUU}˦M {:u-ƘFs"L%T>{lu@2R Vt66n/EH$f0...k֬?coo/!֭[ӟϟOŐtԩ~pLgg7 1k `00>|844tƌ#X,6^JU]]f=<<3HvBo~sUWW&m!gSL&֭8ݽ1bx޽6l'ّ.Th4 fԩ`ل۷o6Μ9s̙-nWDw1G ꜜ!>>>4ϡѨ]׻:88̜9ɰ?˵ ~Cc6q +((5lD+xyy9;;_vMߨj333 E@@Q]]}9Z>~8|Ǭp>0"ccc233x<Jrfoٲ8񉉉[nr]]]</::&JJJ.]:mڴ򆆆W_}Z00 NCcFMMM\.w…QQQԵd: &p8@fXСC.]joo9sP(|' 2%%1..n(g :MwAӯ9c*q4i ܹswfƑ; wmgFx޳ ]v\`Q'. zTKk-`"0-P}{UL'H3JK ڍuVsF?f}J G'Ьݬ!~<Acd >o;Yz_~Tz~:̲F-m]F{GFm[m]9TUF]rʕ+Ǐ'2ptbAR|P8qDuq!L& B&I),,ؾ}ݻ+++sss[[[7mo ?;`1כ>,֮]+FBgTr}|| F]]ݱcJKK?e}O___///j*++K&O'Nl2~ `L%?gΜ+V={6==7ߴ^lC.ɌJ=%$$ $$$TVVJRooo:s9;;=Ç;99wyXG}D)))h"E"P(4*+**J@2̈BRАqɓ'[<:wN $\z*Yɓ'LU V&k4ftxBBB,uwGGG''&fMMMp!cjuqq1ӲE"T*ݷo_]ׯ_paKgW'k`N믿Κ5* Z|>fvjC",Zׂu:]vv6ϟ3gN߽b8%%Űn7݂`N.t:]y}ju: D"P %22r͚5?644l޼߽\.װֲ`BVR`YbUTl6[_6Ǩ̘1cܹ|{5 8a*q3}ZNMM8NllUcww*RdK,ݻwÆ t]Bj4aվ\.F H.Sٳi|aHG@pܹ*++#jƌƠ vNW__f֥TӍ›mHumllx<^LL̅ íUgggD"c ˺kLB`عGoܻMlkkK-#bcc233x<Jrfoٲ8񉉉[nr]]]</::Z߳.88x5uJ⦦^.p¨(u˓\ښSSS#'LpAXXwj 6X~)))qqq uqŋcbb/|'= B!jLF,swwR*NNNFb޽{7lؠOM/BUF`0lllZk 3gΜ9Ӱ[yqی``2755XV'Ν;~z]ݣ|||hEUxlll 2ltnnn?0!ðp4_ WPPk؞-Wrvv7,j333 E@@!:;;swV`̟?:݋"JeZZڬY̽d>MJJ,**x C*r6e888'&&nݺϏvuuUWW777xhB-[6obΟ?ڵH1`пtҤIs=d]&//O,755r܅ FEEX,Cyyyrr2S[[[srrjjjr 8@  3]sڵGuww{xx<73ZEEErrr~~@H=#V A4w3ָ8pW^immFl@79~ڵk7{ŋ{zzN8Ѩϧ~j0A0=X}B!v$s傑߹O\"X%7wYX:(@O@ͧ^2GE,~Y lP[P^f1kyhv3{6;fxVq: ltDDH>mߟs+߸iDtXРR\]]|P(_uq!L& B&I),,ؾ};Uݻ+++sss !7oߺukΜ9T6bn{zz䌒ĝb~Tj6AU̒%Kz补KxFDDu!c;5.JΟ?oІݽJTR {nذAF}e`-**"}XUex<^cc>;3gH$r@ P999}wB 3lӨ666tz777j˙3gbaI&͛7ς  DDD8pܼ{_ ,0WPPk؞-Wrvv7,j333 E@@r餤˗bqXXɴfR+V_/x^x>HLLLLL:bcc233x<Jrfoٲ8񉉉[nr]]]</::}?𩧞joo?{ /0<' };wV\I7nGɕ4M^^X,njjr .b]7<99ũ999555r|„ G ֮xbVVV}}#<0h)))꾳0***`vޗNc0Cd1PH4l ܭN蜑Yoc:sO=P(\~Сw׮]Zrn8ƆVZh"F>k; IDAT"H(Ur JrcLfDD!JR*k׮ݸqH$껷pҤI< p֬Y~~~}J?@̸8Ú{-]3ɓL&sժU}wjSu05 bbb!G5{'x°_EEE]]Yŋ_xᅹs* }]ϟaÆ͛7p yzzN6mcjuqq1Փ,+<<<<<|ɒ%Fnݺk4ԩS ! Xfoܻwjݹs'!dӦM;w'N wzjZ.|۩펎DҲh"k՞&L4ɰzH;u [b=41t^__kݻwoݺu/C=DݻvÇ\re_H$ZxHDMVtRLH$f0...k֬Vʛy<^JJ/矏Xblvsss{U*Uuu5֗7tPUUJ7oڵk\gyRQQ?PS_xWTT~[n>|xݺunJr޼y7o_ꫯZ $0hnj{TTO眙y.ΞyO Dd8qgѣӦMh4IIIrm׮]A/_`T??/BFi նݳgONNNLLD"ޑP&r1BFw&9׉wy ?z~q>Rբ@C~" z֡Hh9iXcJ?2RꞈEX^[|_%g?}}=zz{1^u?S;T۫g{LuRՈ6 {-t?kVIbuݝ[bM651.0F ;@`$f;@`$f;,߿?..Ν;---;hРZ]ҥKP(<}'|ڵkѣG߿ye2ٜ9srI^xᅺ۷o|rUUUPP5g_LvŽdʕ[l }Qsh +\^Z:qKJJBCC C 9zӧKD2u= hO5{G ->$eY)%a݇,ˆq{maaqܹ_~Y( M6o~~ŋu[tly0A.]\\rrrVWW'&&4\D=nEٻw/蘕uQeeO}w>o<ݖkD c4PLSիeeevvv-\LJRAR1 e۷oӽ=//֭[Dk׮ZN>=p@kkk@PRRUZZJDO}u[6#ESJ@a}ԨQ111vzku>})^^^D1tP1,޾}s|>YJKKO>ݵkWWWW-ZHD bΜ9.\P("(000!!eYxH՜} 3MU_;v8w\߾}dK.KKKӎh4۷o/))4hS}J駟ZN: ;L$ 2D&]tF]VV:Ҳ?9fS/^nݺ۷>|800a7n溸Dlٲ5k,]444S*&&&j3rٳgyj:33ƍUUUO>$ljFFƝ;w dccS+Ç?~C ;rƍSSS9"H|E0%nnn_|޽{._V===G9}tX֫WM6ڵ֭[7nܰ>}1cjMp]\\z5zp-<#G֍$44.&&F*ZXXYf֭ϟ 3ggD& `w"3f̘1ca/60 ,,L[>gϞ={./hJ$E>Z}/F1w3r۷z :$n(T3,k: 6XYY-Yԁ\x?ܷo_}0UɭSUUnz7ol3ܺus#FhB"wMD&Lppp0u8n޼yhЪ Ů]h񎎎ڭaÆ͟?_\q7<+WƌӖ/[m ի@V^^ћo٣GSV||+WlllSw3 q0*xaԁ@;Dz,^fmCє899E`0U q0Hw3 q0Hw3 q0Hw3 q0Hw3 q;w7n[nEEEѱc&M>w\F?/::{DTTT4|+V:uj'O$Gye٧~ѣsMHHGD[ly;w<|T'5!rg!qƅ_.&@ެϞ,[ c- zouy]v-XZϟb{{_=&&b=eʔ#G~7o&׿DϜ9sϞ=r\,|>?..ӓ>s~s^renΞ=^ׯ_c#U9}G}[o5-SeڛR)d,lBn`XZya΁ٶm[NNc||k^}U<#FcǎkիW/Z-*++:`.&"a^{5e>U\\ok4{lc+gZx<ޒ%K=2@滑O)IzugHo"F /rٲe<Ȅ Ə/ V{Neaa}|u˲\tl}_{ɿ ԍT7"Hb`S]2İŋO>=""ĉ}Ϟ=O:%ˉ׷D?TPHDK,0aB.ooorxMmll DV= aeeem;4Wqqqzzz@@… .\h7x~JܸqC뫫׭[g {Lۘy̙" ^vMEf(==س4h͕ؿo{NDBw޾_uEEכdgg>ĉ駄Ec ''_~}ݺu...D~^zo߾ӧOW*{޽:D4f̘cΚ5+;;oP*ƞK,z^nn˄ LLJJ @P3fE"##86iҤq1 ud۷_|*((hܜj-[|SNڴiӮ].JrC:˗/2m>#""|gy4ͫN2E"9s&--_8q"[{]x+F8pœO>=x˖-[`T*%Hkks'O6Y^ qo,e/]*]t8qZNNN ?r<55@2UPPT*;vС G}tҤI jggg](777$$DXUUUZZ;LR͟ {cqa.\pWnٲ%33sѢE2uwr]ZڰanK>}t3{7Z&ٓ4ͺu:cΟ1;c7Jh~t4HK,Qhh(Ǜ:ujLLLbbȑ#RwnqO?fT-MDXaĽ 뮮...DT\\lmm-JJJt{VVVVVV- +ZX UJw}z:hjW"zgCLBFDx8!qŖDd)4ID])SBy<g":qԱ@wt 1{8y|SBfٜrPViX@?$@iJ%vHS!q豢X"*;C ;-1bL q0Hw3 q0H8E J1u,wmĘy97Jr7];w "/"si ^Gչ;QVeY[DD8w*HVxH _(`hkhz@ $brSz q"" _87Q@Xbfq@4,KDw L @GcxdBֺwzc!ΞJ: C}~㉈!TthuHܡSWƏ9֋ IDATt!q0Hw3.b *S!qK("jⷤ CMxDT,6u,wh:kxN *B$`IeM@"0uf짟~h4?E&m߾UUUAAAwww7a-.|řIbeX:\qoSNݻ7==]ۢRV\y!C^~}YYY& ŹXDf߱cA6mx̙~Ŋ/Қ5kr޽{Ma+1L'8y:X/|||t###Ǐ= =yduu{ШJ0u w޽;55uҥxzBQi蘐7wutKJJL2`UV R"rpph8a3sNg6pI/+,WW(^:Py8j@@X&Z;ܿ 7#:'eN`vf5@Ľt["##]]]|Iѣ谰0"͍:t m!.9)ΝuYb=aQ/+ Hz#Glܸ155ȑ#D֛ ﯝ~\vZEDJ{L~BЌdɒ֋ƬD5k֌56l4u\FаlLm"ZmT[YKjVc!9% "i{^#^TTqƍ7/X`nnnoZ$ɢELLPt׈ZH.'"O CDSzi c0Vƈ+)))Wvڲe:wShړQ |5PʉJ ";X 4uh+9fwJƫ'Ne%:КR{Ȑ!}]NNΞ={xʍ7/$$?k($W(d%=~NEDBrKRSGg}\}:/#M?~G\C͔v YYBD*,9n9s<Sϓ'Ovss[tiuuˢE^z%KK˽{Ȑ!ݺukb|m㋲.ZqkfɽT6dUmikfߡ(mۦMfggHwرfDė&D|ZV@l=;la^^DD\uth6 c0byC 5+lyFykWvBV$8DSG++V0p|I3HZyaY ;Ky̫*ߙc!:_+i {|iŪGxv :&C~jᐸv. {r=gwamը?Z56[neqAu P?j޶mP(6mZPPCrr;׬Y3u ڻ'4>q^=Z伹+w;YX6!/{T"}壟4U0БJܟ{9݇+V :x𠷷}ڵ'O^vmxxxk ޴+'hw`0b)cx2ԏ>d׮]Y;988/*j-/r|n0#۸UA"s܉H,7y{FE;cD9su)((nŋ[.00W2e՝"W{!":;vE0?{o>}=n̼mX;6r;ũ'Qjv%ǽûшB>=i?ƫdž!O6fT8Ϥغ|㼚ehR,B=-뫫aDzI`*_߻ `֌H_xiӦL6ʔ)SL 6mZqqkUs7ƻPwQݞ ,mXVRmǷލ4+SHhqԎ2cocW 0Dֹyv%"+H,x6VKgyۍsҊ5+p,tw*>7>;lXZ;ܹ>KKKHMMurr/wJ!8Iv[gM*a٘y eR*TV¾,Z-wZWWܹsDyCфyK#-|ZCDNqˋ2Jjud&yGe'zˍ9DߐX <} ;v#B>K 9FV9=~[MˈĽÃ[R,w9--6l]Z%F0+ 1C:uqֶD4PLDEҪuV$-9NNuI< 񅙾N: tw\n*rFKѨY/>ȫgըEj_L&OZAD%&%{zz_ƍڇ]tճ<LxsOO"*U]n%zX;O9~!WѰ#7(4lՅ2mR;]ne ;z6_/SH͞~nk\B!;p݊ŧv,1!⛬ _)qJ~ypŴ9J̬R"*Vʸn(Z{```FF6;ڵsꚴ#77W&տ7@ 'ՂѮ؃j^̿jƣs(*;YC߻7UJ v" I#jy?T[u>V$!">ø[ڎ;g'#Wb7K[+󰙏Qtvʌ_]Bľ4jt8WV*_:.? '~ݾ^M*#'4'X"{./ZWJ̫y{~q:j(Z/HDƍ(,,,''gڵ۶mKLL n0}'=ۧ- z({itk͹VVRb|q|-U)B!{įRJ PsFsihn8'xwٻ:K5լp&'^.ϼ~Ϝ'jsk7=,lhn!DҨuD|3ED@ZWe"?/-S{jj+ɓˆ"9sfN֬Ycgg`"ZbEk &DaSj~ ѝb"j:Ŗ+WZ$DN 2BѢ^#DBmn\!ݝG2['Wn4+%|C a5J::' ޅk։Dp*+YVɪ6`weeUGYQgLDJJV y]zw#hƭƲe˖SN-Z"##'u?O4#@q "i}>c {rGW":q <؊3ÿoĹ;%.`J9vbàH//$"a\-lB._ dDf5gfy<^dv4"аH|#zWBJDƟ|56=]vX>6N*PVuI94:bD޽{ ryRRR@@@qXћ}kyß򷭹X"bM\4ۀCכX,. M/fdӯqe(_=<޷#c+ .>QT PeD޿͛7ڼyBݻwMEN~eE/kxt#ʥeKa3?|TwwBz(!|??:B s;[;s>5}=OD޽u"<~f4sⲩԼPAš7\,7)JaW "Ш7X ӰܴV{yyVVVW#-[899-]mܸGh7B~%d^\˅tnc3mnm[ZUm4^vfƊmcѹt"rX ܒKDR"LVs~ǟM6d 0ؕ5DZO6~/`P{;֎UFZ^;>U1zoֈ ɓ/^" 5kןV $|!Vmӓ0Da/rky8޴.,#ߤ㼃q]%6&VdqɕWg2-Kijyi qȘ0aU׮];uԳgcǎ˲۶m1cFXXXHHWnN@haanF8UJ R.FnN8GR ,Q,<U~]RȼU6?S ZόXV;% uYS}v^"Y{mqVw1}j&5s5K R C8D0hUQb퀡9%%%}-*hEbb N:s/xV:t1/=saRB^W4oG7 [5iwrW;WèXvKbs+0FE-ӄrk/E||[˛O]%Q;y +M}wwp;<5JJ}p novFDܲ"{VѢEDoM0 B0KT+ $퀘/1a%jR$S) նn&,ɝЯ# {> 1쪲m>l妏v.?vy'C?;1 3~VL4ر-\ :Il]IיL%"MD.^²whooû|rǼ}wx somOR]UƹZ{zYVaKr*+kDİ,9IGJ;06b+"z.hXݯ\#"_ք`v.U)^6kp DD ʍ^ٔu`@;ukoڕ SJscVQ?{l4v\hܡf1ӗW(eU*v U'uVbY%}p@Vل)r>7۸sUR}OߧiG^2֙'ƿxs]윛.żDOz meU7=:'k'Hމ9UeDt4r3C.1V*HDU*ܜ3B!/$Oޣʔ'*FsfLz'=x"IՃ&q-_ޑ8$|+wguϊ!KѹPw.-H(7k^)4nzycf@|Tpk=L$1rf^Y" dH(2򦍙 LȆ:_#+j%K(7eCN]پY^AV]Eƕ%r֤ q+&^S gT_8~.໋1 -_-8ɔbqmJuU)QaN To1eW ]qxߺ jM%Y0BHaB, utXԾ%rgǺYL/LV@g'cw%Mҍ_8^QNc^+uynN]QcғLV^]>ٝO9=Re`5e#=IJ,%W f5S8Y%;)(ڵk`J"wۓ3?NQ=av5߿qz3eL"mև9grum Jڗ u6r}ޟﴴvބuuuG;vٳg˜UVX,/Bnnq4ѣGy*')BByL,VH$ZlI a> o8P}:iWGߋ @.A*1])۳ 8ӤFΜT{k:;ܺ-b63旈YQGREþK;:gɩΞ=B܈^#L:=@&[Ee<t'ig@yVr(h} +c0Ys^}Ls3k"ɓh% վ\ډCd#]Y$[>@?A=m9B!Pǽ5k,_ܾX^^q[򓮮JJJZmaS,-MJؐa[<*nZq~ 鵋,f'J?N{r4sI( _O >k{ƠiY@kuLph][M#+ @f!.> ixO Kg|Սb~s ZB~o S9h/8Q[GH(rzK+pQɻPojtFI #?qV@PaԻ?Iw2OUT`B]"@45*ִZõe UꑧFqCY0{r/ U~ ubؙMO"iJ=մx^.VeS%9_2B9{Y~; I"yHNصk¦uhrU9{5Zsgn`)I&H DI&^Цe50/ Y.SoPgwX[}ێʴ+Կ~UmZ, ,WN #%l+#eVp Z@v}YxrD>a;:ys{g`Tu^8~]*\$q{ϓ> mU-%*V9ƅHd]YҫNl.!7PKKr;Gp֡TW7z?'lYVje{22@^CojkZJU餥ڤ#'=Ҏ/Evmcʊ`e:Yu2 lyeԔ`7ow~@g18 ֒[NQ99sAC?lEPǽ3k׮4h /@,)Jy"fm֢Kh~BB`{'TF!}Ua2W˿ӪK.͟& i]!6^>grc,Y '+dV@D]~?7Fi"nq3 i,$&CrEo&"7ג$}<@fmy@m9)!.oxkvr/o4s_=e/{'yap̮δ}Ͳb&W"?;ΤZd"kE)R,v,c%Wm;*}fa5;>q%zTW tY#M(< čfkָNnV__=Щ>ݲQҦC cO&-y4f#C&S#=6Q(̃  IDAT]aVXh"H @CC!<ťJٳnjc7, qk+zg&*2GkJ|r(a#Wԧ Z{\*&oAV%,} a"t#1_H`f36 kb^h FL fD]>޸E ]ω0Z*tI(ؒڨרcx[pO `go0k턭8e4VQY7LQc,BmaBmM;!"Y%<a`ṭ=@'/ rʨX8Ry5yN=XI>W<$(RuyR*Lvs` ڲl1ѷ:R,?b\-Q(NCWܯիWY&&&?;DjZ,WW7)8WSSپQR6SL$qtzb|[Vo#; Z,hRkғ ]GY, :x< //UcqHۡ lZiŞ`AwjCj?^:Ёt5.sJC=;®lv,K-G k<iᏄ|u&CWF{ <~7No˨)״D1~af'klNymH|ls_3eE`['/հ;VWkqUlbzfgsSPM^H9?.׼4bR6M1^Z |w(/J({ܞf:͛7u]v>}zmmѣGÒs΍?^Jn@̊zN>Fez8g2jHEn?9I2I?bzMPgҿp|ӎԷ$=rU%U"9UV+"Թ/ i{7paUŴp~Κ^6d&|}Jہ+ Zօ>:Hܽ*p0 gUgo XңM†*lMn^]6{jFL>[EEC2ʄY0sL!PjN/}27|W3u鯧XKܐԛ3ccb :5fP: u;Jii)fRowߝέy#Jv^1jBm0 kN}%СI}>f0ˇâСj<-wS#כ  .UleDM#IyuE egNjOjUp0澾 #gVY['B\hL5)fFn#oC@g1My:SjB˶xK[{݋b^@[d-v]=% ziOޅҎ{O} B(aNL灈 }mI5;Xud; hLU{>paF(U_WU2" lv?=zjCgnw8ڨm8ʺ/;PNE ݌{b1Gr4N=kc}dšCW7 tl Rly~˞E^; +}SIdfʃ?·di{g{o-#KuYCь3rb'aV`Ĝ[FxkDT-ћ!ߜoy؆o{O6Щ~mgjHQ#(]: ulO &Wߵ32j/[y4F0dךlڌ*bޞ}PbyAUiDW۾8I; ߆Lz.Ca3;*YlQgi!2)2-]^:IK;^|wx`qp' ֢50ZXcœ%z6IXK|Y^yTOI_P@k6H 76]@[ kA'i+>bҪNOolc-$@!2KեUv_6E&PǝBY#/Tk+s}o`Ѷ8)uSP/I gHwzox7:=3י ftwkh+ERf2rȎ,zvwyZ ?$9 2km-sfOq[U`oSz` ӎK|\,?K .Kӷdo8YSP_ dfuTˆ<9tz[#39'IOfUV;IiV9ҽI-BiG򿴓'dNKYƦ}E"WKv/| {wҽL `{Wڞb_O~Ggzz )aN_MBucS"1qCpa9?S5;oFmT17l%@n]eC5'-Mw*I`R"3WhxsqmQuϼ4ӥ9:$^Š̭E8˔\BW;bcfMYG@o1EoHbߣVZʰ3'@t\D U>dgp;}H룇~_wk=qI] W8 @^o1ǗmiT~tݽGV<8Uͅc =:Y3;ET]ڲbY̥zfۑsEjzqP(m,S{R$WfRjkr0L˒=_WG]y.+~m!1N.9xdiNGlM ް-gFIm,4C2Ymĸ9XGu~ne:jaN#cf_ Ww_AٛqŞo_;_m _^穇=YSOY򉍿 R%/qla .\: =)3h]}Bbg3|#洚[ev@B=;tnPWDjQ1`b: ~Oxfg1@SNZx5k:aNdrr  3ͯ+Fxڠ/l!etJ(;#eEUGԚLA!1u&tXJfjyaYFgÑF`ha?yԪ"#opRAvC*34 % ^SJ+4f2=qP(1g!f]Hi ;J/ԛĺ'r_[Ai. ~h,nM4###C}4F|6dbQ 5ERcLeyjʨ3q&b}xN\!ɚ+Fa U93ˈ&ź?KG]'$: ',XQPsHtt3D7isVI='lbeXweW fYL_gksU_Y'MҠݗߘO#zS($_R9^ʍֱ q qD]1@f]6Lk6E[D"v/<1nw%nf^xgtUE_ 0̼ى-'{7,9; K tBՇB1B,!/cbEAÏEO07|]ƹj rb"ΖfԖ3`xea^[z˒]_/"c\*QۡJ2 bϞ؂:ƒܔh{H{ "N5 MoE]òH_3:ljGXx>,T$"N ܩU;7e%>rg +&<$gIz|' 0#Ȧn40G*5fpHjT!3<3Uİ\buiz͕DU7@[b7ؙsf: _ ׆,¾LW9;kB.Vd[KHXa3{Nd kٟ,ut0wL$&^n$60'. L%^Y5RM۳8t(9X>p $gy?{2, %HLV;xbu 03#>x;rq#+3v)6}Dİ Db{Y»v?wN*qP({t{G؞ڬ^ua#IQssW_"P{ RaKׄ%sEŜS_hq m8"wۓ<6p@ ߖbc8J@] κȁR,%EOHlN*hlI$iZX_&ŭG,up#fX%oݳ08Y'rApmp쪆~rۑm9/ܹ;P(Jo#K sJe!zS[RzG f`#*1 'c>CPjxA8R@#Umq d@&@f]9: ; Rg֔ L$j\lC- y4?zY`ᣄ?5d20 UozL=s~p4<='emZ(೔ alȌM4V!# gY+7J.,$KT {Ӻv+w /cSf" Uſ~̀WOm")+2ܼo/ @:ɓe`G ՇeܺJ g281`Fz.mT@ u)Jߠ3[k:Jw[{zY `w,Q IDATs?]:m;7󛧷y~uOACʍDA2DNӷ^{QZY`{N iyVq*jMzVl, e;rRjkPeZggo+/#넦< N҈9tJ U ua|ϕ>Nݟh7?>$HfM X к*w~C׾̋S(Ꮬm)d"N{. q򬉿✑4f ONwj-rcr|k̂٩KE.1_9R,u o j͍J8CnHJos$PWXC(.JokwИ]Mϧ}kV DvMoܰs#QPOTP@!AQ;rR{DJ/ǭ8ZIJ~T$>٘_2pf/cQC ÚUrH23 # O|ף`6Na{fuqɩoG5WmYf'8^Tlwq=yl^{ a3wA*X5O87%%onӤPzCp`ap`&ͯ-=A I$OHq5FWL7kRtfwPdS(PXTzMك~C }yID;XB$QS@kiSL 縸#Y82mS~3BN<HQ#@dNA`_B,U;rR*5= ƨk) p &֕ګT8˔.>ɕ IZ۞s%a3#E"y3 E/Xm9Gz$2'L9@`ԛYuϧV)PfՑqkC(7)dMCJM1<_?A"`Bm͇{L [|`r3 ĽKH\'H8Ξ+LޓB=gFvq| 'K_9oSr;YFwKj:T쯢3gԔr]='ҷf+$\GzpIqޡŴk34rUԔ)q<v8n:7#Rxwh_[Ay,eks=y˿@&O rzwl\JU3CgnNrrOB@̊v,|Բ6TM;˿"W=~x k5~?a{nʃ?N-yw ~ϦD:j~@ڱA) k]]L}T Q^/2loQ썄:ÿqM mPǝBaNg(I M>Xk^ƙ\R 0v*.t(eaMv0RdՖyf;iR9>oZ)}m{rrT%$--HJ: =8 ks(KLuP] JgnR9`n]_8L5s,ѓ7uWEMH]>'{d;cXvDkj% ص6qP(Ȯ[(J;yKT[EM7>Փ[ZKmTPbF̺)N-E;w /h8Tx\]PݮT$^ 0~qҗnmiBPnXTB&$2[sR~|p1;Z?I?iYB!#(N^G*@S( Bn&b)/DqݷLߦJz9R »gwYB$GVb[7e&tR4OYF]2;ۉln~D^ܑ-N"4 [^+mU{d5pgϰ7? @|ynϽA7TUBP(u$oyfi'~c7WoJ5L13Q̶>-CsZ|M7Xxߛ8l3H[1Y?qwi$jVDzئI2:rʩ+܍)x;D;ɸCw BP(Rʩ_!gJszr ݣL >jCg>KIJVdo/3Z(Uik**c@Š 젩o]}ډ!C5m2|>pG_ ?- BP($=K]J\"u䉼 #xU|{-'?~x]˹&@I/@`:33Bbfė_y.>_<;ZأXrg+ xA ~+U9+ BP5:ɾ(/{/ҩ&Fkg;8-P[m8]oݱ5;@G(\0-@6Y$՚9?"@%=9x)lź(#𼹵,v8YI~N+ /;BP(嚲b]reƦNj9)UϪ0rWOmmvbuɭ񍶖M ]oYr5HΥ B<ԧߟQ)xxDit\i;3=e7+ ˜p#TV&e;7#՗8G3߲S( B鿸) q M rUǀ䡄W;93ꚝҩ/9[ l|^>`v/=#MF@ɰ 5VB`X#BFPQ 2~mF*w+4 f  BP/2n f;GFH@䄻oj#`Z1.הHdNr%YIZx*5K~ Dp=}_?;ɣcZmk$qOnԙ2t sϠ1w|׫2R|4ƝBP(Jfi<տiMYL -qmQo b%qspe;T[Haڹ hrU9p|zp]"W[CS( B׈YhZ[ CBJ߶.^R@HH "d#cm?4d˰@T(M;BP(ĺƐkVQפ`jF*WIeUo60qþ=vy иsNИW+t>(7&4TBP(uɒ3;ɔe}]O],ʨkݜU80yU$}_x[c*Uy0[ʩjpSahpnڨNP( zI|w˽37 fg}zȾbT^PٴGC׾'"y(fX8Ж6:5f=zj[vȽ @g6v4RMzM fmȌ|҇V-tŝBP(ʍϠ]UERukuM<I}+i,/-9f9y. đu\AC[HIq6S0 folLOmۙ{l_\?S di>S( Bxgmcj-ʲ;J""dReA5F!j\!5zc;m(Ϡ&@guX[G+S( BPjLYv~x=ݳ &ZiW(W +rbx0gBbB\_1+ir1ޤW%YeQAvPIe* gYP( BAX 8cӪzl䃅\|7e%z"Br*?C˻o Ut}Orr +bYkA[*WU) M^CU[ߙw BP(7Ŕ[er R]]FM 6P!{IbW>dؘ`2of\@ kcL'/`1-Huw~9iÇoѧV*qP( rC2f&S1f  ewy'Jr>Jy_c qp1g@ra, MEu 9n6OڇXy4z0~.ߔxZ=;qP( rVULhT'˰J ITgWyr` n\@xD?=Vn8]`6VW)ͫzUE] 8Qde=U`Fl-j+,mɯ2qÇG2zzS( Butv%$rFx>L[`U13EW;5b|#$:RG$bYo*4.8gkᯑjkּOr`bP*;6oba6g-=3l$@]캊NneNP( FC*XgSiҁSNźYx- 9y$҉;oҸ2 ZZR:lw2e ){U0 0(rF@A"F4 V(^z^biy oIxzG}o >SVhk*wZyKƺ>YrzuNpSz3N!C?7J5eDT#kNrTc1흯Z ͟#"w#qtMB``!B.DlEsc魐З5[$9;1ٚZwwC?7aYIJVEDM}zU$sզ&E'? tMUUQ$p(QRޛ*%_{7(|0TFk G}:.KhdԃjdMjlq!c}kw^l):1?:i}kJVu;W1 ڡT* <==yjy{{"ikLfX(ϗf?7ZU?u̱Q]U5U,f_+of1^oT;*++J#s\DJIIQ?\?q7\W~ŰK,7tNw*hX}nJ)SXk%XW1㭋 MPW%&/V$:];d2D5C튋93fOqyY;:!ܲVU_wqҤI=ry_^p&::Ӗ~p_gJ m̿0I{ۉhw6+{"r0ܒ*SX!/m[ڍ-V*èXwKgj1ADب5yܸqƍS}g7pifPǏk?KG1&jH$uuuD4hP H$ׯ쟃rrr>ן;wkVTTdff600]kO11?u_J'/r۷8q7&&б@8o<"ڼyÁ>kǎǏ7lpBP(ERi[[ۀ|]WR) BdT*U(H3)|~HҖΎbr*@/@ `{4p@kkkC}0x@`X/>Ь e<hOа @/2L&۳gիW<<<-Z$ yyy %%%*jذag~ڏvZH<\bbbRR҃O0 wiRVVw[nD" wrrj?Lgv[[#FwVUU߿~2dɓL~Wen:=B9z{.33ɓǏRD111 _"ܸq#11ёZH<\]]kkkSRR|ȑ# iSXXz@''+NR IDAT˗/?3f̠AckkYVVf͚QF:uϏ:(bKOO 9q,,, 7lT+Zj\!<<|Ŋ\B⁆CBB5JҥKZZZXhʕ+|͇rͪyrMd< 6:tHӦM3f̸tT*ׯ )**b;(qJLLDSLᚮ͆ z8Jum???bȑcYZH|ڴi BQQNNN*͛4xBp+VpHP 8SEFarjWYYT*rD+**::@ꫯrRi]]]ZZZAA;COJ-[[[$h)))%%%C yWO0 JdDTRR~7_ݽ|Cd2"D\;+W>|gΜIOJ-$hsŤ@OO۷߿?** i",,,H*&&&L;tPRR;wBЂTLjժS R*DTYYanJw% 4"##7nK/)ʳgr)o633S"|GeF!;KKK"jllTB47vX"zD"QrrԩS!;K-$hĄx<̙3\rƍ_~f%HMM8qD"333:iRRRv0LDDĬY {'|~mmzg]]q+t͛GVO__wJ$.R u))ۛ ݽ{w޽(;;2Q L&۵kWvvvssGDD7h'_ T*СCCCC}||ڏvZH<*...~~~"͠rrr{{ѣG 4x6%%%˗/_paXXs>'GFFP'w^;@/@ pw֮]hBrƍ tURk<-}333Q﩮fYv흖zS^^^Ç:tNo%&&BAVUUO8áK.={6 `mڵ˖----sEM;tO}#0qgIJlSSbЕ-[ONݳg0_,lllf̘qI"ϟ?|}}9ݻ{D"WWŋ>mH݉|ժU^^^"hȐ!ʿg|k ;<%Kф 9~iӶmw [[[33+W?^^^}]~~Syyݻǎ{PTT_;;;3 >k֬M6i]-~˗={Ν; .--=wݻwNJDRYY`m۶544KүJT.^͛zGUUUTTTeeennniiO?DD|Ikk@Oc˖-ծ]͉$::n߾=euuu111˗/722""##h\e=pU"ZdP(z̙r%:Z@@o[[["D흃&"J5Ϝ9CD .RGuÈ#hҥiii?㸸8=fbbd,,,Q*ut?|AAAB; $$䯫@(@JBxiNrrr: RSSϜ9s .|'N<~xA:'\\\ 7l`eee`ƏR222"##nڴaƸ>޽{;b8..NܿÃeY._u@7p}OիnM eY={pttk֬d\gIIݻ_t w77xn H OOO{{={!6nhee5j(wwwI&EFF!@z5|ww۷o8p޼y999o~3gNFF̙3bqQQQSSS``ݻ3cڇ@k׮_l~X__oii쌁У;@/: ر#==iͺѣG-zӠp^ڵk׮]5jNr֭C SzqP(z w^;@/rtH2"IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/starvz_visu_r.png000066400000000000000000001610431507764646700247430ustar00rootroot00000000000000PNG  IHDRkBk pHYsttfx IDATxy\T0În"(_P$P̥r/$_Pej B"B.(j,ǭibs>W^3{s{aYe;x5$;@h$;@h$;@h$;@h$;@th9ɕ_n+OIӈк q*ZNZ?؛؛*_.GRbo^cqJ 5%w4Ux p 0&ޚJ%Y@s?kS%*1͂2;@MHH`n]k"äCIY`ざofv.k666ݓ駟FammݶmaÆ%'W_Gk={444ի֭[YUgF@ݩSB@晌s[h6]yyyppŋ  }~^p?{aܹ3a„3g=:>>^y:88i>Xr"=4whn8''I&=z=xxx >\$,,a10ׯ_l@ pssĉÆ kӦ+OUظW^+WH$D4|D5͝6mZ׮]MLL<==бŸq㼽&Ls|=z044ttt9sӧOeu +U׮];uĝNM9Mp̬o߾QQQ\䜬,P`eeu_T;;;o߾6ljllQmA}FC+?0[cc]vEFFVD)))ÇёzyyUUUܹsuܙI'Nx{{jܮ]{V@U@J0֭[SN 8{g_>**>bO>>}:WsN__O>s F;cƌ mvQQQD޽{Ν;}9qѣ͛0ԩS.\ܱȽ{ q1c;(Uyw?ӎ;~G...w 9AAA,OQz{O0LSǏ;w.[t㹪_~ŋo֌3>|8rݻwSmgr͒z6礦.\pΜ9k׮;w#6ooܼy߿R~F΍Fz>0[CC{ VcX,α'L6_~DTPP2k,nJjnnn_|U9\r&h*T:o<[[_‚|}} nݺm۶)qG>}zrr-,,9>>>fffG ":t05+]qQPHDQQQ&M:|?0bĈ0@p:p4hPxxk5KԔFMDݺu3f͛77nDO<ٳgϝ;wZA+ύFͺ>0_F$vmn3eޞ[RRBDadsrrhaaaD͇IHHزeĉ~?0++k֭ Muƍ7opID4t]v9R;rqq""?9O?=|ѢEzo6.JJJN>=c f"b?dY`-Eḹ,w0 sEٔ*//;9\~;lذ;6+]eD˝牉{JJ̙3DLLL,X3Ef(7jر1Z熌"GFTiRۍ-|X|>U4]2Ve˖;wN8qǎw9HDD=:{W\yҥcn߾===]Wܡ޽KDծ9{JߑΝ;ݻwښfʸˮ?~<++n*))i۶m]ܾ}eUVZZFfkkkhh({i7sϟ?ɩwoƘ1c|||UG<((h޽Ǐ1c˗>||rjW⻠ݻ.#_=UdqhE6rWRm•|Z|nTYfS`Fo-.IyebccCDՖyonnm۶>&5dNXXɓ'/\0e{h$TܜWoE<*nUUjCbbb"""z5lذ>}|'_9i 3fL*ٕAUW$&&㏉۷oի---?^^^m۶=z3:dll|mW⻠\__ku+kE[ Ef]J 20|u* $w+v^^|a~~>fgg_|?Ovdomn޼9i$"f.pCnnn=+q qz׈(++K~ђ+Wq|)R0<.ѣGZ.](--w޲ڛ7or}ײںVWҥ 0zzzF?|W<]t9s̙3R >]vuuuꫢQӎtڕ;322kݺu{eM\h!Ѭq{b 8w˲:?/2eʔݻw/_$"ccc,޸ipGgh/l\\\ U˗W[U ܹ#[~6y4==cTʭ :2::ZGGq+7s^-wZCCñc~嗲RĉLMX\|y5KffV)ră*++#""RM9zѯzqD"O8qC~v)[⳴tժU m^뙬D͵ԐYf#vݦOCqO  v!eHuV@ADgϾxɓ'hzznAwh*@~~qٳ6QO?4rI&n޼@:tCoر>>>111oƐ!C---O8q֭(+++%h/fիҥK322$++ĉhȐ![nرe6+]@D&&&'Ovpp8sLjjjpp07_y#Gtww377K7?xݻw7lPUU=k֬:ڵ~vUx'O>}Y~~/{n+++n fP8rȯ/$=z\~}-[ k-}˫YARRԩS/]|~ŋ,55ȑ#,{vZiiiϞ=Onkk,mTz֭G~#7tss㏗/_^YYy1&֞={v#FpqqٺuD"ٳgOLL۷w޽p 6DDDĄ*eJC?<++ٙaCk׮555tqq鯕_{ΝK*;X7xŋqLLl <(7m~ؗ/_^Zk;0rss8iӦ P8~/޸q#???)))55G ٴ9q߶m۝;wfϞ=vXYw}qM6EDD /ߦ*"""555++G lwޟ|I|||pp*zѣGDԱcG"rrryyyĽZi:::?vvv>rO<9mڴp]]m>TAk縧]tI(gDRTTT׶|>Dt}xIccciΝ]]]ϝ;WVV֨4]]]+++/^X#GdYvҤI ,8z(,XK.'Oݻ_jj {RRkW3uW5aYbH$wuvvyI]]]+++xKabbBDDTQQADmڴ600pׯ_OJJ:t(k,118>>~͚5˖-ۿرcZ9c;;;2NNNNNNl+Μ9õTpwD"i۶|%;;; ~MYii;j6:t lKKlmk}ŋ ڶm+IfffNNEtttG;wH`+Wpӟ\$լp0Z?{ry,oߞݽ{ 7n̪]_Lk8qȑ#tR~ϧ;udnn T*ݻwoaa'WŋW8P Q6KK˧O;:bqeP ڻworrrϷ Q}}GFFۗgffu}\9sfƍFFFgΜן0ajTƍۺuH7shA9qYn]|||ZZڵk$aAӧ֭[n߾e```gg%*###s岲={;*,\~DkNpouխZ mN܉HWW700000ǏW+kkϝ;a!Qi1 a^%JҧAJH@ȀhcQWVHwOO?22rv6qw5 EyW+sxH!7h$;wyQi",Tw,C@җw;0{622 Uw WVVX,n&|>[n*M`־cמf$%%eŊ u5whtt'ƌwh.\-Rw,Ue4w @ q94Lu2* qJI,e F5^k٦r?d] IDAT~4zm~IFȆ z5TI3S}T3z^4Aͪ%U+ݔҗ]3EE C'pS AG)n7 TRR'KkD̓(‰'0rHe'M`GQIII ,ҥɓ{痚:a„GkhZCk>~ԴfS=ۊD3gp-ߣ@ &G}7u5+,,ϗ/422R|GJgbbBD;v۷D"Ydɭ[<<<ŋo߾=??СڵswwYhffm5mڴe˖߿֬YcƌIHHwFDJ$绻+ʕ+'O(=~!::Zo߾Jߑ)1ܵv/<<^xAPPP۶mmmmk-mcaavqf]*,+?Q~caT`>uQFٳw]vyfHHP( ;vln>G褤,udɒ(aLMM322ˍ8FM@ ˫,33ʊLDVQqq3jdu]쒓?={xxxl۶;wwޡ۷oOKK;wʕ+hĉ5 9&YfΛ7oȑj!ĝ:tp"33jUiii1113gΔ%ffzX,fFKD||ju\$լp9;;+ؕ\eٜ۷̻dy9|Z<ĉG/?pK1:u277OHH_]*ݻSa733/^; ""T*HW VHkQpppTT޽{w0LVVV~~UHHϟ??222,,޾<333//{j;{߾}aaa\{Uw-6'666֭OKKvD"6lX@@@ hPW}ٺuk\\۷ 4RE>xzzzjA*/\pj @hsNDu5<~"]Y[[+ҲC Fm۶= +-]f`d,[ZZc߆:tl7iyZ|| f1a6Z;==uǡBH@DDDHZ2Kuy8w|b;_qw|ki_~O^Ffc;5ZGZO'-PVͭ?Xyz%"Š~e*zVՍǽ5GqG/ݨY>;aԺZZ 򑮬K/+ )$~:/5Ϝv6W<> =-m Y(f+b"by|U;hdiA3Mf wsVqA=D")w^B-1۷O88S3ځ9w>_|E/מ7oVoW^94_sqs*h ; "3h6F""ayڜjؠyyy:tPwDD<]T*Uw $bK 8T;hiחHo۶Ν;g;v۸qM"""doSUUգGwPd"#'"ooowwG&+=zpY;x|hcc K3fpOӦM[lg͚UYY9f̘\}U`%"w$lʕqqqqzz'O\\\ÇO:\2|bbbtt|I߾}if [XX1ٳF+TzQ///=2999>>>~~~nnnǏW b$b"by⮁R)˲Ø=f߿*{ɶmۮ^ڽ{uҜFqPؿ={i͛!!!B044֭ k.IDK,b4;;;##XV7꼪f] XYYZ[VVieeȕT[y)N>{nay<9#Fٳ|ɡCF;w/vyվ}^m۶o>w\޽͛TlԊQllYbcc͛7rHUZ/F"""_UFkסCׯUJKKV.W+nfv7bad⋳gϾsε7ssssss@H0LDDD4p@ZS" J8N4(33S!2ZE"Q\\\ͪ .(,oߞp٠%Kԕ(J*]]]O8q\t_~өS'sseݥR޽{eRiBBI7222*))QwH$ߐP #h\6/888**j޽ݻwg&+++??*$$D~ϟjoo_^^׽{ɓ'ѣG={m۶j8O>2 2ѣ|ͩ۫ԁ[Uj[e9qYn]|||ZZڵk$a:O>[n}vVV]@@wɓ'DSm۷=""bȠڵQcgU} |nNwXWOOǏ+ҕupppi!:tsBBBڴiJKKu }:ǭWw;ԣS;--+}Y]ml}rZtkKDպ446TO#Ֆ9vFFFFuu/VqvlH)]T/jyf<@qwl_?{;~ cZT{vFFF ڄwlDIdca)_.xNxTo4[=W.h WqbŊ;h8HSeZ2]$-hq1 ;BIWF݁wp"dHWX$rJ#՜.IKN$"̅I W?I_M5dut$jHYi]!P@DUd`X}bT}/Tf qor3T+,_`ҩ]>̳/sٿ(]5s;gU3ֳ-54 wzsE5HgN55ţ!VZWY}ģ;BA:C ,~\giܶ%AyFf'p@4gebjǢJgߛ<)yW\w{ )+?-s@1 Sh܆կ'H@1R)RFĽ[+:iT;h2X_U;h2zk$X'?B?sLSRרTW+%fEoeld,55ȑ#zzzDt֭oѣGR100_~2Цڲeͩ|D-eAi;w;;;_,:t(44˗-]\\2dHiiillUׯGDD4hԨQO>O/_1ӧvG}3gΔ K0L׮]RiFFFddyΟ?ϵ}ݻ]a$44T___ ֡Ln۶Ν;g;v۸qM"""doSUUգGwo߾6mڬ^m۶D$ ̙7_IR '';vXXX9r޽{&L0aegg]6$$7޸~k~B/^xƍ p{{Uܜ.]$ 3r"vwwOII)**k[>?zh"J׹LLLz ͽfԩ1ebt CBBt_綶-g={vѢEzzzzY/$&&.ZU]]]+++6 ,ҥɓ{痚:a„G5T;Pn9H{i*''#F.$Μ9õTpw|>Ĉ=ŏ?޻wݻwG՘~䉛KJJ~3f̨335k,[lVVVcǎU[@3Uኻ`wOܵo "(??]W\)((s)K#S"=z[oUȑ#%}uvvnĎeG=vXv{ҥ޽{HңGzyyV_fffNNEttU6a* ( #δVvP*,)t10AAAܹs _\QQG5ft_}߾}ݺuyfHHP( .,,f],Y0ivvvFFFyyJGVL-i<@ ˫,33T[y܈hȑIIIcƌ޽vO8]7㣣lru;w~MRxPjjjEE(66v֬Y9r*05С׋̪U̜9S׏[)baoj=<<>|([[[[[[w+>. /8pl=PX85=<@իmګ; hx/ IQ@I,O5zfݺuiii׮]H$Æ  O>[n}vVV]@@R?K:S|If[ouҥqK^*Ç}%Sc j;PF,V iwNDu5<~"]Y[[flXjҳgߺmmlllË/޾O>H-@+SB!0R L%Oܡ?;avIi)WbV{acf-0{f)_b{8/ܱO{h"c6`")@Q4$]'βǖ&I˼]s?ijp*ucEHm +-@M>#*J4##qZ,_0[F1R RuܜZ#.q)NЯ/J^?䕽lTU|;F+Bj8)"ddhTW5bDSe4#Q(qbP+;˷5+4JfWM鼖:Z(RGD-ZZ*ƽk_iܫCVph>ߚ}ה;BF"WUUBr_G_RblCDEYbqv&j"/;RV ). LLki,{CS[7<(@˘H4ik%u-S1AӈEDDqpDdmي k ^1J@G< y=zcٖ}'[V;2h$V$""ҫ%qgmtF^*J33x=z3VN0ZXvuKg4M݉{4@+ŠE׌Ʋ-afUSTZtw+ sO9ttG}'G<chړ@@/sZƜ-)asUbgX%"bYJ[l˴""iZ [PYr(JHHW14d[^NIE"Q!1oWZ >HEE%lT\LDdbAX^oA-s}we uݚI<%li cٖ1$OZP@[#/U1-ϬҿkC޺>_M[S8Vm[_W+q_?ƶDTkㆲsͨs,[๲k׍GX)*X'賺\aX)1<[Y!}uMZ~9_KQOr,Y2eʔ-[DF@D/q/,s.?cY˻OIxf~Zbd&,LʬV"bK/qPWh$.qA^êULMM׭[=7n܊+bccǎ P/A ƈ+$G 466&sΝ?[n7ntss88_Se@1R۸q#}/&'NѮ]=zٳGEQf;ULi 7{{sO/^hjjoQ:w|=*7\J.j\%QJVF%@_?dPwW$*Zͩ dIIIǎUUU }?5Ytiba{sk׮Z(,,\3~ëg,Ua[ -ƭ*C cǎ999.\2dWU^^~۫&HP˗/꼱}̙}L@a@ƾrh17ǽU$ dK.%QF˲~iyye۱cGLLzv"b&&&曣 Z7dL-*rҀo' .ݲe˖-[7$^zeeeт T%(ݻw[-++*I˲ MhF,~9K݁4 &Lڵbkk븸&HP>ey-geYuGH"fy:.5l>PΝ:TҥKu),,466kÈ{)9sRT*DRTdEB.VreYF˺|Ͷnbݶ~В."="&~qfLfIk_: \o۷OEǥtܹSRRݵkѣ mRSS```\__UUUC3$BHf&HD?"zF[/=AAyʘ}}/wCCH;;)S4 %ܸqcNN3Aǎv͛ D5]\\lll|VO>!ߪaذaFFFk׮ݵkWppǏEi.\6a„s^zU&QQQyyy_bŶmVZD!~F@2<3kqjli84 .qUr)|Q|LtQ̸qΝ;f5췢͛7n8QKvرsUV ===pUV888(ϟ޽oKǏى' ZWW'6##_566ْIFFFPPĉo޼y=6}B!98u(fɎ1$(p]f͚s,iiiAAA99=99Zj3J&I[l244lEjA w^###,V/_#m۶K9..ٳ񎎎+WdtBNCk+BFt]_Ni'N(((ؾ}_RQQ_m&Of/_oLM/_>mڴ#FΙ3gǎ={Tˡ!jzwn**q(+B(RHt 'urEu4'q/**.bmmmmm-c[Gj-=x? k/(믿0@ ^:++CF=qĊ i={?s̵k9r$44iرqqqx!toK45v,~{@:u $Rhf嬟T^ڻwȿӦM6ɨv…f=---ߑYYY5~ɓN֭ۀ eo]XX8fcc@773g!j&4RKdĝQVD6*ʎ6T8?sš\BP ޽{]j SS`Eٷo_]]… eWxB;R+WΘ1cprrӧOffEoݺ2hРÇظ&W^AAAAAFFԩ!PD };U" 6YSw(5Ԧb}1R_PLLLJJJ$mhh611n@gZ W~3>EBB²e8MѪ_k׮t[2LbbbBCCcbb/^Ľu^ߊ;s N377w֭7n(--Ol"~U3==e&5w\Q.3[⃪|> FMK!2 C|<^zKZ^^|ǤB>>>f"_+W\r[7xxx_Բwvv E!>Iq9.CI&Kݱv'Ƴ@ P`?Df"i;?cMMMFؾ}w}wɓ'+?>|xjj̙lՍ7@ّzO</'I{t:O>_{ CCÑ#G0BsRLF;yUk׸3ʊ8⨇STeffΚ5/]})K.%bڴiuoܹsGMLL8p#ʼn. :TYY9x`ׯߤ7蘚N44iL Bh=B)vn*i kr-iZǽ=S8g.m۶pe˖͘1#22ȑ#ʎ""":`ooODNN611Yhhkk/]tݺuK,quup8%%%ӦMS!74y^^^.#BM&F{~/,lH:~@_u|&-?aaa-WM>}ҥjȲ33-[>}:--Ν;rذa&LR޽yyy999:::&LbŊYf%$$t]aTUU}a+= !{L;?h8!tK!L^[!7Vw8 !`.IiiNoF$ #$$$$$DZ9sFLMM%~iСCWn{ ڰaF+w/--~O7^- (DҙmB"nw[fI uΐ|`WAܽ)??_!Ex~E^gJiߨaj\V$av[Bփ4N-a0X{_+Up;w[xr 34r0q/q;ּ-͞e*)B=B(ɹEv; MOhF!C<,p׬Yӹsŋ[Hغue˴UBPW_ݱ{B(`;vVL>)zzz}5޽{tdcʼnB!fG~Czp-mY@:RwFD裏œ:ujVVVyB!zE8 ;5\^[A99C$|mԃLPHy/G'~+wKKp`$))iܹ0eʔ[$FB$2Ӈ|Π ɲFEwBK[XSE֎02Ր9Y/Kz]MěA$ ϯI`0At{!hI;Fdm ⓙhlsɊi(z}[za˜LɓW^sNPheek׮cǪ.JBHw*D5/9Nm=i+ѬE]b1=<@y~07rn@tS޷o߫WwA,^8++ vB)JI>-9'ID@v<@pW\ڵ+۷[l٢ױ%F!R$cHu).Y7$=z:]qwptt|r.]l*B!PG !=98/@ݚW'$$H[tovԨQцJ=z2Cweƨq@d A-iGKu"_ n'QtADWSܺIVWIO҇n_ H°3eV)|R(>ciݾ4AS}{§O$',>ĭ ̺',~NC zޮ%m_ZJ @~or#dMnHVa]b  wSѬ{?[9HN-?7A-D'ۂ @o98 _dќ\wRodUYQAcΞ/qYY5 cǭOi)|T`iч9KPPpF7Y]I0DOkc+i<%˞) =Z}P투_ƕ'O/![B9#A/W/JOCՉ{ye=Y,%.v!=^&y9{N4ZO뗉{N 5I:JNs) =_]"yMm\2c=9|V,> AZ KK 3$o&$gOE! @"|(y$r\ i}iV&ӚfI I=HI˸ w¬{B":KMsnʊ* $\QX!'_ÛieG#~-q[/7q_ *9ҾrO]ѾHu˝>GU%I: 9|iCկjf4$O&u/~*jA9 \i _ .eTZ{Up %n[)FP qVcMS>M jM lX3_#Ҝ2#<) N922d>' 7*ibD@j}gf563J܏=fq B`3K̬ػ]e fqAGqܫwٻwo Rڡ!< dU/Ii`p(yJE蕓vmݴ+*YU@GV)u$qOh||5#@ӻiZu#Pϰ*``gcQȬ&_?&P|ﭖ4܆XO^>F(ԹP0x^ # 㛔avs IDATG@1& H}70jg-*'ʉKӿgXO>K*'| z Zw}kOʛFFFaR LWA$I-ި}ѡFt&!lO=>  T)2yHq3*LC~WKZ|$KoғYZ~=7S3F'LdkC i $__^'vM~u}mުF{|ŋĽٳgOTTԄ uQ^ ul֒C.[BiB(` M}K;G'[cR q߷o@mҥKKKKY&$$?OSc!Xǵ5@矫.ƶm۶efk׮Q#FcB=$0^݁|@9PF!M=h uGУG:uG!Qv@l8H@;r.\AU𿾊A$rz\~]փu4bە+W>bQIffwqÇ+; ^:ݴB6j6 &֥K_'H2$ }J\.ָ # BCQoFIIɓ===,Xзo_&{7oN<9##LuIΝKKK{qCC{PP 6ob,,,X,ٳo~ߑ#G;Hq[밊+e_OZm'$ezC+zkB(PʡIh2QUJ,cuz} ^m8 \'^SjmYuHVD>!i2V^ >O5WU*ۮ<ێC/// ݳgmڴi_|ݻׯ_sNJ7nɱtvv&رc׮]ۼy˙ jkkcbb׬Y#JKKIttt֭P|/… aaa&L ;wիWEk/_+XΝ2“0S/5Z_?/%R %]u&m+RϠjVӿq̧$!k\>J↺/kHP5KKM`=lT:y斫6mڴo߾7nҥK;vعsUDu\URRRrrrӧ;99)HFFjllSN<{ѣ;wp8eees!Iʊj'((hĉ7o޼w>HY@HKV6jw?IK=IЄݗxˢvHkI>47H $4j_-zo _,m*q*J~?eC%Zo!v !dAt5#Izfʾ ditv |rss{'g>}wIKKKLL `ȑR{X,x{w ֆ6˗/۶m[VVL[Dt:njӻwoGgg縸g;::FDD\RsE!jH--->}* ["IRyϳXf̘.DuUfX,--7!!!77W hOc^|oa2F"Irԩ˗/?y$q8cǎeffZ[[/[a嶶ӦMsrr LII4iRmm'o[B!dSnk^^ށZtСZN,,,$v1-ǻx"US=l BCCkkk>},[\FhG8qbEEmBBB={C&$$t}Ԩ7mt}gR3g\v#GBCCƎwB!ZMDjű .,))Y`A.]":::""N/^Xeq*lwww9'%%|ݻnnn }a\.7$$dĈt:ڵkGڼy7n8z V)O?tSNum=9r~3eʔ]:t֭[?ӧOk׮ njcllv̙`!B!P@aÆ+WYf͚5AP.A6lTY $Ihv{ݻwe LMMLN̙۷o_epppUUӧo߾-0@)[BBB t=zeff.Z(((lʔ)ƍ;qW_}Et~QϞ=O<-QjAdddp8-|B! u]XbU***~h9ƹiiiH!;;Ą <:J rBn̙|ĉvJOOغu+ɬ~Sxxxvv={,X?ܹs?~|>hRRR}|| &&&444&&fŢ5!Bd%+V>|ݝfI={LOO644l*---**jܹ]6gU|>AnS=웚 )O*rʕ+Wxyy&ZnݺuR4_Q٪ !Bcՙ䧟~;v!CVZu円nݺilÇxǏoƍ,gS=zII,,,޽;N˩E={lE!BS7|["##Gidd]n$kWWsƾ1#ѣG(1666FFFqqqú CUVVR}w?rO8߲ &lK$I8B!L>}411Ҹ\7nܸkkkÇ>|AL",,,""СC AlEߎҥK׭[dWWWKKK]RRboo?m4Nhh/#JJJKF_~o@.XO&!BTKKːhllLMMW^ztuu}}}불Z>fff[l9}tZZڝ;wa&L%k6{ǏXXXL0OԩSW\{/UpdJdɒK.Ip/b3 BfUF[[ۛz*rʃ.^xEJ`xH4x3gӔiXX:ͦXF*((pss ܹZb5}'j !BɶuZ?%)--}G3D`,ch:հ뤭5l߬zXh!BRHC]˗/_p… Ϟ= G udյol,Tu#BuoO܅Bajj*''' ذazzz*!BMVNrrDOOoT޻w6!B!$3qϨ~;LJbE\ mIOd[!Dv ٻBDf#vO nS=ޝSo޼ӳ?5kVݝ,Yr6!B<Ϧ_vl;^^^^^^֭r 5R{VVVVV֭[Y,רQF5`=rvfA v'Y9 d%X8&ԮIo4aBVec4{3^gJ#a˛ɻlI/Е ̾+٨uӌ:#mw4:Sr_eeVTϨm?"N`*\_P۠Cgk|?iO^NvV)Nh abۅ0,ia3#&? -R)9|.% Ϛ5OG獻Z,ukJ?:I<4L;] IXGLJ@Gp5{4w2St͑swEPP0h>=w!WWΝ;O0a„ p}*7|ӵkW??QF͚5K#7Z;`zT 0B6RZ}ByضKWQDKI[; [/ {CkZRӖ4;B'_r&f:BVr}kg@cWjk'Y]-EN3P+^w%LIoC,,_x_eDOk 橒ݻ}#""ѿ;;97?ȪJF,ɗGBGE,~Br@~\٭Q$bIZ;> D35#|m `0@u}{4K^MN릆Zݺu+!!!66677*ğZ!22ROO/<<\݁ BWNN{Ϝ9cmm-Zsݼy3''ݻT!j$)n*aFPQaӷK SS֗_10HNNްaC\\ 9W[B!P;`eeVW'uFpTMĝ߽{WM&9d__Ç&HBwu*N:q0JܫL=))AF 0gNE!{#88{aaaUU~׮]===VV@:oDY:tQ+C@!do^pNZZSSƍff>z_Jܩ֖>|pSSӶ !BH=~駵kȑ#~ 6mcu:(Y}}}{fѠ_bͻcl,Y ,¼֚rYIyiiw'OrƣAr{l%֗Vh;rR(!Tw{ފ:Jo?]ߠ8[R;)g5ivtzƩkZ5M ]44c_;wB6! diT9~d&9#ɬUW ;T?m|UNk俐r,C̩3gĬ!BK h4wn$P,B!ްdW_-+ϞFEE矨 ?;:qaB!~8xS_ ]rhg&!B͕(x i?|ߡ#FVwh!B ίM/;Ξɸ؉'$y A!uPwhMMMS+*ޥ\'JeuF5]<;KH!лpVodŒÇc_7;RL;s7o||>ҥ;M\&Gɫʷ|lpOZB!;@.`ggG)+GX. ܹG ӧOWFTHRRRwě8gee;1Vݱ 3f槟=N烃勶u\ $Ձׯ_Q"**jN;B!XxْO:ݲ_._,..$;:uz $?׵Ǐww:tE $ BS}={yi7n4رc[{n)#NԩSA\|KuX,FaBH]tܵkWLcMZBMz$I>zv*++y<@!jOM9ϧ*FH<{ufϞ=eʔ%K444*$%%)$$$<<رc\.Բaaa!!!aaaϟfڶmӧE/ccc뛙٬rBBK.]LRSSSN%tСz:t@ B!$7yw]]~efflD+Wܻw/vvvǎ MW>Q=ݻwϞ=]vv}qŒu-^ ϟoddx;wǏYlY\\̙3 ((h+A'***//owٶmDEEkkk`B!)ٳgȑ'O?7nҥK;vعsUDu\URRRrrrݽ{Ξ=;iҤO>BE/ @ÓԌ j^ q׮]_~ӳjΝ< d;wL4iҤIPPPyE}?cPPĉo޼y=6}6>FBN===O>ٷo_߾}{ղ̎;^륥%&&g0rjCCC۲X,ʟ_:::T 4mҥ999GwGGG6]TT!*4i4662 KKK'O@^ښ痔Pk,YbddqFpvv򊏏9sfDDʕ+ !P;0uƔȟz>zOF:, R uuu.\XGsϳXf̘-Ho:ϗsw$IbcccccH`0 {Eǎ;xԩSgWUٳgwI/_<==}ڴi#F 3gΎ;jkk+++{VBƐCLMLE/MML xJ [ .*;;;11?WWl, +**gmmmmm-c[wE+++xe旈 IDAT]vrʅ u/ZfyAAA.] t:؃P($NW5yS($I;fݻw޽K-ajj' ?[Y}޽{ϟ^S8qp8\.}…-ﰪIIIĵ &&&VVVTIgZGpuuvhqjjjvvxޫWf_x.V ;$00NNN666Oo?_C ޲QJJJccĄ,^xԨQmu@!?N _뢩]{w.^xڵSN6IS,qrg}V\\,*<~;><|p۶={K:&---**jܹ]6U|>A4K.`jj**** 8p@HyP5+++6lذaÆDuƌ3f̘fk$9_Q !BH[W QڵkO8p#G10qv&ɓ'{zz:t(111555&&ۻxɥ TQÇx?q8;;T=H,,,޽;NիWQQxꖿ(!BH~ٹ_}~2>zzzs玺P(qCCCg̘A =mڴ7n,Xl TQ>>>Ν/?zhbb1(..ÇBPx!٩Kw տ%h4P5Iw@B/SƤ۷}VR~޽7<]eRSSt͛[ڴiӾ}4m:̰C%$$fMLL-Z$;K.]nݒ%K\]]---9NvvvIIiӨjÇt҆ ڵkפǏϞ=0izrT ;B!u9FOOZ>u5BMeZB5sgN̨Ç}R9fjb&zy޽;vݻKQH---BaaI,** #$$$$$DZ9sFLMMdn6jƚ4iRZ_ϛ=S^տټ/5h%Bb_jXӤp۷4j@ꚗwF:tV銐&X7o3Υ/K;Lꛕ@Bvuc'@vMKk 0jTwW{pB:2=z(/^866v…%%% ,@/VYH%9s|F,͞e6+UZ!R/?|8 g->w3񣃽DJb ms8vw~ѣǫ;H===7lذr5k֬Yؘ r bÆ *!B9*c_3N[nh;~=1h]!A>ӾXniEUQf;vPw/)0$X"99yȑ|BD!jK7o^zimxTh=z ?_!U7UI[H̩N=[fI477WAT!BjSSSC@鬫Kry\uFT̤=YĬ.[n#BD#h3t@AΘsʕ+-GdQ/Yw7l#F:S/eoB!PɆPRA!e˘C~[nIRK.th I@B+''S˲۸ubTR*azDM.%LMY_~7l';7n֭[B!iY_/Tq0!Bɓ' sUw R`T3g=|0==]I!BiO&$$\p!77} \vmFŋWP!BJJJ.^ 3fXzB*1usZF^^^߾}mlldoIccc.\(zyСӧOV[[[__oeeB!\LLLBBBzzz.]:k֬e˖;.B vڶjͺk$l6[p85/!BH㏖I:!C; Y2ԩS_9s4JQ]]=}6!BH,Xd2ϟ_={N!U{9?PdxwE-^YFir.%fp6ka:ۗg[e*ouH-]ZBIX҉i^kYSbeÓ\J m|wmiV ]%#к7Pwދ/$~Eꫜ/1k`ٳϻx/n:8gwm ěMPU]YQGUsLZRN%n^d1,k5:uJڳgϲl|!Bvv}햭X~L?9sӏHe;B!'A8|;.%;"q)ٳg0@!!Bi@Pǥ@~ܹnCC@MM INNN8 B!jdxYOQe/]WWwHLL***rtt޽D!BHe&Vy{{[[[?~v$u5bffFj}|Б!BH.n.BwKKj˞={^ܾ}[!U]ݗǏwuuUw,=z?cA!PektT g(;ӧObb@ ^ljvU.@^~wE;Bu,Nνzڨ; H͛v%%EǃB!>R q'IrԩΝBM6]v-11E!͛7n8QKvرsUV ===pUV888ȳ/##/Y Onǡrl6CvΝ;?gzyytf̘!JܝcccgΜrJ]A!h}sQiӦ<8--m 53r9r{rrdRͰX,M ? ou#m`5e@bl2lذ̯v%>mmmM2iҤ'OB!$ ;P q?<8y򤛛x]ll,ŊUvxwyլg6eƌG5~R;Iv߿OZp?[\\\ZZvZ[[ .͛7 Ϟ=[[[{yڵG1117n\aBV#v+c!55ʪ*KK޽{*/wUTTdaa!ޑ]Zƶ<ŋT=111''gӦM-Weff^vMBOOs'ו(%% `ccc__˗/߽{s̡SLѣ2;;p̘1ƑnnngΜ Vq B $֖*#$%xl6]IIIC{niiK+NѣGrm^^ޡCK  n5\ffE|}}ccc<==8йsg77S;kX>!!컀RDAq]ʦU\[Rq-"VQuVkrQ"*ԊDX /77@LB~jsfyN29ǎL sssTTTt*2FŋIII3fu֛7o<<|:"""##cرczzzss35q(22rՑ7nl?C֭9xŋUDx<^tt/!go)!]]kiT3tecccB&ԡĄnP#3 @peA3gʕ+k''``ԩSܹrٲeVVV666ZZZeee~~~=vСC_~1w~]QW;fh4K,+//ťKQtLĝ7ovvv ?.]tС^ kLվ*99bii)fWx!CP@x<ޤIw_h\.WQh4/$K !'ONKKkhhξ~uuuAAA?\BT;w.%%eܸqcff+;=uTUU`t==w zqaaT]]O;_oTRRڊ+}788ԩS4-??b_^~li&kkk##Rsss~ˆB''^8lٲ]v={&{hVk XBȴim߾">}}dfffeeq8##ɓ'{yy)((HԕՏ?UPPdhh5}t =x=}*=>~Ɯ9s֮];di_o޼|YXҎBĝ_%Y{{8qӣAޤNNNPȑwn\Rum5Uتl~ǯ4JM5lCiL}) 4%tGt9o%.T IDATZ777555OOφBHuu ~ʕ+y<ޚ5kR.zjrʞ{x<^|||PPЧ~hѢM6EFF666/ooo?? .RmRSSg777W^}|ŋm&+s\nnnnPPƍw0--mڵ7nrssVZebbhѢ +((8qDVV֡C???EE>?Q&r;w,,,Xv9+((8vXAA`v;x`^Y\UUU~m~~%F+,,po߫[3̌z]WWɓ;v͛73 EE޽{͓'OqBԃr=P]]ٳ#GR999</""̬!,,>>ӦMswwώ9RWWWUU5tо8=hd»;5wBV\zdZ|UMx<!fD337nTTTB8γg y.  [tiDD`"""fϞrJOO/^D]]=>>zbŊ;w9sf---s̉w**eI9 1dTYY0 Nd355555o[[[RRR#zzzfddϚ5KAAݻ,/j_eee *++,Xs 6զh4OOO--S޸q#;;{̙":+**={VhhM\\goH<H)=744ŝ={611tX,[[[1ۧVVVR겳lllƌ#A}*j*X2vX7~ÇWWWB֮][^^~zS^tɓlxi4ZaaannnSS;uYٳxrr.vMhzzz;ydLL9sRSSoo ,x'!D^^^;wB wE߾}{ч;v BIOOonnvvv&DFF^:22rƍ3fSMfS$Ny<ݻwϞ=ſJ-//?}?"]]ktuuU <o߾0a˩ӧ755EDDܻwoҤIf͚5kBĄO iߌ& @pe]rz䔗3At{9UÇgϞ={lqq1UBӧN{xxhiiN]4tМ $NWoTe4MNN˗\.Whj s,'Cydk޽cƌٷo_qq1N6m!?LJJ?߲vBȔ)SڢW%''B,--Z˗B<hȐ!t:zPH FrҎ%*q8q&Mtׯ_߸qBgggkk˗/_tIܹs)))ƍ333MMϟ \S !FFFܝ4¢gNG뗔H;NQM ʬ_B> U`=788ԩS4-??bR&&EE-[mڴȨ)//LJBׯ__8;;+((ddd<ӓzQw9rDځtѣnnnҎ..N|}}E133 ,D)a2q><ޗ~_V+BښD=VCJܫUWӘ4%tFcqaHHf͈JY,###|x„ sNLw 3gٳgΜ93sL:^TTԣ޽h***K.]tiYYم z$&>";cd'UhhlD0̑#GR+!t +"iH`2H` ; `J-rHWXDПBCПn?9d2CKcZwXo/}DI4KU D"?T<]PiP]߅λY6#hdfV>. '^v$~uپb~7B77˫0k[zPTS^bq՝#f3 xCH)Oq?UW;2!};@w]v*3UwZ٭***Bx<^SS]@!&l$TeenI&)pFGG/^cMMx@\Hеk׮]+@y{{Ʌ _v8w璒/_.(:%(`@\MM蔜Nr{H:WUU&(L9tPLL fffSN}8/ussSSSlhh TWWp89 dg{~s*ǻ|rff/ lmm=<< D5HMM ܅d:::zxxP˭$$$?~|Bٳ'==ҥKׯ_>}bnncll=\.ѻvڸq;!$--mڵƍ[dIxxӧOt]:, +((8qW_}uС0???EEE)2~rWUU}FFF4… o9f333u]]]NNNdd'Ov!={d2]\\!w߳g~+77Z~#=z:<<ٳg#GKg ,{ÇY,իW{T$D~رcO{lڴiTN֞3gNzz۷oz}`# 6E^^… ?ŋ.nݺuĈ>>>GvwwOOO_pa]]˗/{#3seիL&f-d~~~W5x6-JJJ!\.?7~|׋-ZhQDDD:QWWvy]]]sX(..644gjjjjj*b߶$D,,//իWB|CC3<o111ܰaC;+**={VhhM\\g;"Ľbڊ>55z]WW]VVfcc3f1{000־u떗*OBB!YX%cǎ@}&))_>|xuuuPP!dڵׯ`VXX$8QT3qr<ONN܉@kh"(''jժ}mذaҤI'99SFLyyy?ξtBHaa]ww۷o=Zӛ !Wܸq3z4jI%3qWPP---1//fM<5'N >w\bbyhh(x`xi111fSBBBVEDݕ+WNNNyyy=4@w!qCԨ UeffZF]26J6M------ЏΓS$1eʔUɄ [4/xEEEC 'z֭Liii***FB}uq]aMܝ/_|%sΥ7Nc455cccurNJn޼\VVFm>~833sL&ΧikkyFQtf^{w۩2__SN%&&h|~zQTTܲeKPPЦMJKK}||f>>>۷o qxAAA~E6mo.\JIHHpssKMMm={gw*lnn>~ڵk-[ZZZڣ:LLL3fEjkkO#FZZZ'N1BUUuƌO>%ֺyzz644B]\\8Zt$ۉ{UUDDŲtpp`.\J4njM4!22r߾}];nkkc lk׆/W\nnnnPPƍwիWSRR/cccWX!ˍ7֬YcbbtȵkBVZebbh"BHXXXAA':D)**0^رcO|WWWÇ^`]fffm۶cǎ͟?~"ܾ}{~~~yyyǏ\n޽{ !֭;vsӛ !Wܸq3x2DVwBСCsrrjjjՅ233¨5銺^|k6Myi_EE`TUU Rwjjj 2L&)XB P7͞=Hё&(((((HFf IDATgggw굓N2|s)SڢW%''B,--ؘx!C[ >d2ڗtd8qwvv|Kϝ;2n83f333MMXeݹ\Sdڴi555wޥ6Y,VFFA /Q ?q\#= =*C >uTbb9FgXׯEE-[mڴȨ)//ܼrvvNJJ:rHaarRR… zr_~ٰaX||y,~]__111YYYh^^^ ueee?FEE+))zyyM>]L&3((ɓoll5jԲeˌtfn۶mOPUU]|yZoٲ烧 @?ҏ2a0ޝ5+===___qZ:>׭['NR{DtoF_rzUKKH63.XOܡtJ׹etD^= 7$2; wp_=kWv @BcRXQHK]hhlD0̑#GR<x^Zm}`; ThC;B:ܗtwh*0U@ qHdw@w%w2L{{>>n˽1G֯{Ӱ$K-{ GU__B~vuh}jvU_o 5KT*s˨}Gx3GGMC9Pv X( n1s]ڦw-mƯuؙHGџҧj])#QQUԛ ;,:.?0SDѾl~$TK?P 36|XqؙٔB!tF29R+  66"~;o }-Է4W"A"B&4 ]:ÏoG gQtAw>ws4j* ~/:p;BoTg#A@Pgښ-i5UP`ԴiSÉu5[[+Z_ ?@_D^ :!OƂQ JqGIIIrrŋ;k+2; @ $2; @ $2; @ $2; @ $2; @ $2; @ $2; @ $2; @ iO``\\\CCC->4SRV>|pfVN}L:ΤS7m T6ÆlasrzB9Ir}kY?pw=@!qg_7CMM͙3g&M􉉯T!Hy/ܵkcPWW/TTTsb:Y"H?q._vZiGA!K.-ȓvR___QBPUr_C C着jkkv2;B?0f553g V:t(&&F]jkk<==!...N JDv횯|y7oZrsss6nᅨKXXXAA':D)**Ig$\SSӟ~IKKҥKϞ= %cw}!źzjzzzߝ K;ǻ|rff/ lmm=<< D5HHH8~ݳgOzzK哓/XL6ֶq-[2o޼իW=uTVVVCCèQV\i`` ug_)ںu֭[:`B*..sKKK.9::&$$X"88ߟ/=@jSe#""X,;p႟_mmm:;vQVV֞={"##۷LKKkii!<Of_vmᎎ?/KJJ ;7nضm!`ꊿ֭[G3zhww ս|%ǎ{ɚ5kΝ/~#G$ՕYUUzssswܡhw}葥%?x9?*WWׯ:&&tg`ɒ%nnn3g\`۷oUTT$E]]=>>ZbΝ;Ϝ9zꖖ9s;@I{fffJJ`NquuMKK!455m`0Ν;'X^__9z9sBܹ#X2{ljsַonll|t߿„ ,KUUuرmmmEEE%//h6l }yJJJ\\\M:իWL{˗ߚO}jii)'߷ںEhԙ޿{ͧO6669rd-=z4eʔÇ%00088F655s :#ĽPMM}ԩSuttz@Æ #KܹC'N(''goo_WWCUs8CkkkBX,Wn/js)Z`Ahh˗/#""._,//a˚۷o?}T.΄[nXbƍ3fSxHaq[[pGeeeD*jCw &\~=99yܸqfBЅajA}SSSCCCKƎ׵)MLLuttovݕ+WNNNyyy/#+\. N)=4Mpݻ<L3fRJJ 53;N꛷=u?w]]ktuuMLL B>I݊ZZZChiidee?^CCR__/%@&X"toWUUԔrGZ ,7tМuuṵUVPlvl6F{gG-uB%eee(fǏWQQa0UUUUՄMMMξR1cƌӧOo޼Yځ}邏O2QQQ}PUrr2!nllLy򥣣`WTT4d:.(|5yݺu !˖-KMMmmme2湹<?ÇL&'՝}޶mۜlmUTTV\d2ˍ_: C;;;_~˚^^^sΥ7Jv455ccc̨6\.UUUӦMq}P%;Nwrrld2'Lp֭tGGiӦ:tݻ>:̑ړ,}}}O:hnnNY,6[l ڴiQSSS^^^ii9?#ܻw7p^zаpB걩/_|ⅽP$&MuVrrsRRґ#G .\(ug_M>/] !k ?sOl:*CK}j]5K3zk3LGB -_QhhOwwZ⮯̬,cdd4yd///~3++1** ??_IIkB322222׺VVVӦMJy2'OnN]PP@177166bAݹsGWW?,|IyyٳሢڵK Cf꫌1cƌ1"###))K;'%$$ 6lȐ!]C qÇO8ڵk ‚`Ay֭[+++ӯ_>vXMMM;j(~ׯmѣGھ~ƍUUUǏ'(ƒϿ|23//pF2a˗////6kkk}||֯_OmZx 9sp8WSS zΦM>7oPK, 61ҠkJKK;/ Q'((hGIHHPQQ={69|pkk۷o766J70\ǏCZXXx<򮡅br劒… M99-[|'1̠G}ٳgӦMեJ̙[YlիSRRfggvvvԦ!$??t>tӧ9Ukk떖B)^~Y ߾}k``@D- <KOO\lܹfSJJJ!Bҍ\]|UluuuKKKBWSSSrÿbJeXBC @ɜ:u*֭[O>Oɻ.MGG͛׮]{򥡡ԩS͛G0̠P,,//]Jyٳg ? ޽{#GoHfB`!IUcӦMo޼!̚5룏>"Zx &j>Ljj+W---srr"""^zn: 3)ڷnRUU%%$$1Ftŋ\biiiiiYTTD:QH:czu*++srr7o,zha8!|޽22\.788ڵk3gK 3ܪU۷aÆI&q8dj1 @SVVvر sss"rDa tzBjBf̘r9sP+Bv60@L kkk*k'}G<˛d2 d@II!XH}F]%zha8 V\\,XXZZJ122"fsnݺ)XB=4# zVll7-Z(S:QH܁L6ݻ&ptt4ht~\^^ڵk,dBùz|@50@Lf{vE&ieeE0̠ܼy388|qffө[)0ҠqX##ŋwؠEߵkW߅ Ґ!Crss>}FmذjјLիWSRR***rrrN8ٳ+W;khaX,ֳgӧ+V02x7oݾ}̙3ԍie555W\ŋK.ikkxRPPlD/À|ɜQF-[7hw?~OrܡCzxxkE- <5T=zTYY9l0 bA=|022hcƌQTTbA׼|/XbUaUV͟?t2qHdw@ qpvA}h@hj 6W )6h J.///:eeeeff6t^=|֖Ы, DWW...BUiiiɎRmǎι}sĚ SSӿo \qQƏ/#q.x o qQ:$xsSh4ѣG#""TTTtttܒ!K.9r]ttPok׮UQQ>|'|#ICN 7oRQQ1442eʯŷo!q={vՄ'''?w#G?˺ z׭[{[YY?~ϗN\D 2vh*_~hCD?D$*HDD\W3Չ#`0 MDDFσL&s}}-"D$ ===bvt:;;;޽ L&h<ϗ2\*A$999㋋mmm{{{_J?;@u:jΚLE l6Fx~~~ffw@>455 `Xs2h\.ⵓIDAT;7Q0==VVVCs֖~~Q;@MMM:{{Sᰈ - FcCCCuuƆfCU3Ln[ӎǓw@x<~ݞL&+**...z{{vGѮ.|{{r~iii~2+\@ Phdd$?3RZ((IiL;}Hƛx}F޷:8CoC=Fc+-,MJiw j\wCy'wl$^G;ѓ{x"N=Jd!W[ j@0G" = AaLO2">d=L{π^EmD@TS!j UY|!j `O =6&`Cķ-KڋZ @9Ǥbbv[JFZ-{au 5pnPn6u@Lxܓ*1s=. wHVԂ) 7 hN%S4pұ00O J %Fc,q?RFvzw-r YOzX\my0ޯ:hD $H?@ZRYW>S4,lK;rV)0'ʂQ4 @F ۀ[8Huj޹=T:Xq-0z$5Rp 9(`Ej |4j*PO%:ڷy5PMWiodߙ;Ebj)j:?//171ڌ|,f(P{Z`h=j:?ǕdT'f*Qt~+18)/j!?]B_W@5Jg@!%o!F-dTb\퓫095eP̦fL,UXR4:>۽w/PŶhńRom~r|IMg6͡UbOyVPOW:?*U3t~Cqip)|lc{W8{?NaXˏ <=JBB,TU=gYiY}ȉ~ M=giY(y:sGk5"=K(q_0 Nl#O>2wΓDz\pGbL} g34V/AI<c>ޟ >d{|xp" >}o$> a&kX3Co:Dm@]/Wˎbwgta"-5u?+Es^ ,n׿3 ЛijM: Ѿe:dZCY:>/"@TXldY\ޛ(tZlJD;2 ViYjT+F)iTW*-KM6엳z@ NPۀTQefP v)#?x6iOefZ& Y:zDU:: T'Ⱦ_5Ld SUG|b5nj _*Jm% wOen͹k ?UiYjX˞sWʬYN ʾiY FvMsRQ{7nQ }Z Θu(*!N'@>-VM4L29J@>-?S~~p.bYX>>7buBК:Lo.s]5 򋷨oܚΊEV?@(P!uZ;cV"$$K b3B+dH/ժ]]NK#yY FHEWˍ~z y1<K]qQ8B]&;N BM}EBPE?6#z[R2>Ho >ƬXtL EZ8|@TEL* `9'Τuz0a#OI pU'Yzna8N|`Ac~dSՐōܦޜР̸1+c\Y?>n@5j^?/̸m@ p ~CeSyIʌY ,ӂ_O;PGk_sNRNi@9zD>4 )~q<*>v |PΊ?II$WLCo#1? Xf+bk]9.6nΊd=Goo֡oy +bED5cjX by75+VRdrfC{@s^5Ѩ-z'ּ~kP5#_ `#㨸Df3fHE_3:9H'ŚtBs2^ϒZ9wur6"%:z[0*)>E-w8:#OVxzC8 w?޺ן eh_9Hͷf8ɻ^*<0|zaXÀr+|ui:\)<׹T/C`׾먒s8Nտz&DѯO-dI+ur;9I+u4Y^Rˋv$\}g)^߫JO_?[@{pr|bw/:nQ+u izuWwV7~.9Mi+ui!?~`_g̹}g/aDJ$U3 PW>maFBXM_K8g޸ʈ~^!FB4Daou'ޠ:藺'% WR{9zG?Rp&X2O[FB'40tYm@3>i|KSS~>TÏ!zj<]~vĥ] *\$G%}rl;E 8֢p !T v]˔jGOGbit\ DR!59;CqOMS!x[TrJf>w^‚K˙ȝ?2V:WX}GJ ɽݞEO@KG;~ZH U72?֟P ,~|C{l"U t.q>G><IuI[ȇUp_f{.~R J!ȉ~j(0Gjݝ>&\Y`ueR:g霹`DRxw8J+FޤCGh/-=_H_OmnҒs %x$`Gt]+Uj`:#zJ~.<{BxGBHD}h 59ަnPEۡM재'x 0(';kng"A9w}0z[iɓC3G|*w1r QѢ*8d}t`cM}>ZMͱP V< S6U7~!wxD+_G<-ҶNF#ztoB۶g#zYѻUh֡9;wSDIaGnu7!ޭZF vDowTыtWi{eF!H׈~6;)~[iCm vD/~Zj3m `aT^Z'{j7B+\au%@BHGX y XSU+0&@[ބ`L`Л#O NFM!υѯ Eswᣎ@X`Ln'|@H`LΜѯ )8 {[ 7EIUN ~5z豹EM!!97.{OL XGw1!0$'E+t`L`H*["hƶ "'m4rߵuoX!Д7z2!F8@hʍ~MhA !:/*m*%-oKe8]n*% ޷m ^c,6k{GS۱Ys"}d>g*n{fj_cUǍ.~k ŵ_=bL \$u嶞Oj~Y5,\GTj@uh*6 `nVX8c׍Yq!}7q/ Lcq6`OXfq;xU0l}um0J&)+ !ϸ(1qk] 4 ';"6VlW_h&j]ZT~8m{l|9۸ u@Ϙa .XPBUa%XKʿeVCucF&/Fɲ]@ < -:Z`0Jl)d }?75a(NDSԆ_ole|>ŀwY6 s,?+eCLM7$r@c`ڕ#I Al@.ӏn{K Jĩ[pZ r~Ff-> Ao[ 96 22_fYOw_v c ם}@珽lfy@$([=B j//WI LYn Z%߶u+KCI򾁀z9ާB`~Z iVױL)Qsh]BɷsH4@@dHn5 > BV D C~hcQq̊q#z`;(Z::?MAIj;xH( kQW.޶U[&Y[ө 1 -۶ʮb]XxRe%CUp" ȤJOCLC?5C!'P%>Y9v:gyA/PX _.F:; ,RQձ!*Ysy/iR_"Gm!P ~MJ/1 t !xJ1P? @j# sߗf @pn j9'p[`>S".EAfH{G7u%4rF] . {mueEvn(3wO%}.}nh|Ì>#[k1P< }"9;P:^: 2]cQ/ ~CW  n{i [pPr+߃JaMfo%2/ao(pKP@=WeJMD;ր5:mo8pJ Ck _L[Nkj5MS(o+ qϘ:vcdfn6U۲L8qF%\ssL֧QVLcVܝ[\6F8ss5HEw/2BvNBꋙWv` Q~wQ38Vej =53ZcUd[eV2#-_ Df5XǷv2BDA{,xƽxM;X 3J@iՊ`į3ԛeT>}@csvռs޺@@Lݩ8qUV{ bʔ*.y$9ng\? ^`uVtjh(n1/ZoIHe3[5t !2*vc 8ecN.fUR.[!x!I製=ANŃ.%pcyrIVUp 9:bz0S$αvj@#okCI"g|ps{\`>d#v 0 Xf$(XI*v{D]YC{u~㰫=c`SfWǩPȕ-Ȩ:]`Xx/@꼿a8 @6ALg9ua[c<wfCPҡȳr&KOyf ]u؟mC,/<` y lQ0O/>q9wH :3'$IE 8IDAT"`.$p_-X5/Wɚfs$p:<ч| 7oz 䲫]< U)4 w im'cw>_p=p@&z1MV/a2ePݭ+P?\}-P (>1Sse[gZui X'G~Y[%?肽/0kcj3m:rm`1ofPs圻3niobǹF.f {dTHxVʊڋX7Z~ t"싩@v]"&3xR"d(W+xgȣ_11$71xπqb h:̥B/Z 4\E_PuTvu읗u>ZV Yn/Yl/bl>]ҲbŊBJ9 +wyHx̖239Nkۀe- 0X YSsY/# dKoԁb8.Pf+CeX43fO@w$;EJp h4X\uA7,_Xy{u^۩5:kHoeU1  .& еX0PeG,"O\m!i {H{5`bW#ة9|:F،Jlv$OrI@Mjp~? DxZZ:{J3Gt3'%۲X8j6QZb)S@B_YOa$n4-|:Ȃ*gPlm̲+*%OLLPbaG &Ub2wC/wĉtvˍo jARZهBu l\DT!^Ryuޯ`Ko\n5:9'i;@_URC?Xsc0]7 ]DpgjWNxzVɳ^nҾчCNdc_Y5Y.WMJ.ƺU]/g}r ;Wg z[i7Znu?o>ȋ:-o-sOa 0;ZW?OҊ\:Us"e]xuߌ/ۚ沜^^o?7b 4ѫgׄNԲ7{ۯ־Ue;zV=8>Ǣ)K˽ȁۓxep:`U|繯νiZpy\:.)#4=ށ y[f'5`YLz' \S7+ykɢw *MVQ(􅁄-+Y?5(`5 \N:;`\kt%Hm1s;L]Vn^G}pM2fjHH-̃:^Ǧcġ\\`q-[-?}" 7ZVm >142"?_qqb>ZG ڛ\]6UllXz'Ҷ// *K56nc~>oNG>_ Zb@.wVYy}k?֋^M.o"::^\Up"GǸ[d;*T}|⼼c V߻B*\]hPbhsYZ::\-ʧ"Vތ:X~On$kN.D!%vYfZ|uӟjsm yA>ٚ CX6ڋ:pHc1 A YN6ΥrEnY_!e1IVӟ߱WPGۛm (5`ued#%W 'u(]j S:z!W:ę\v"ѶA R=n":>e5pUyѨ qP( BPS&T4ُdU3K#%uT6{%kUQCtL5H꙼j~ƌ:ޞPX{HuOڣډm/;knX$0 º@+*nrTN٘j'jbY{Rgj#}5kcO/Hk;c[f.U 5Mɹgnz6*Y/7X{GMsuvkț3uQHBA=}mhvx&|$S,UYϮ"Ls4Un`kM㽦9"#6לuXcVP;Tm˪Aďb R]vl|U3'j͂8;kcRWǜѩSxYjX{T.ۨanUP,ѣ蜨c>NjuܙznWs7w<Tw`:[[UsOYb BP( BP( BP(*S,o.N'P]&蠶Q!Xϧ DnvZOy̔!V, 1sEP37Đ?˛e!׸O T߰=ͪjs:~l2~?GOCٍݛ #6}~(W< }fdRwV!OCTIt{De+$s@=;#7@jF,u4:iOQ$,x}cFNO:(9pPF4}V Q0]BQpM"J)Hq"HJ IKIJGJJIKa8y"Ֆ͒="{MvV.g)Ǘ+;-Hז,L_~NAQI!ER¬"CV1NLMZ)VL $L`V0{"eyeg :JJU*[5JLGU殖֢HVQ?ާ>حѩ1͒fX9֘&YF3U^FuX6m]}G1Չ93 |UҪU$]nnCM/OS~~>&   .y݆i񍪍&v\mu:ƑGLMv|253Θ՘lOv19|y-Եl^Zä́UUКij}ZhlfSoV6¶vʼn伲3صs-[{'BCSGhGfhgWΣ<lqu%V>svu.֪MZ26,b&*4r**4j:*@LMLyl,7*us\m|GD\bh$jR|Robrv`NJA0"`H*hL֧uӗ? ͌]֙ՙdKd'eo޴gTcOQ{rswol m ڳMu[NO [A^i۝;OrR V (m? Yrˆ[EEE[?Xި%%Ga%oDDiNe̲²7Yn\^{p(㐰­Rr_bULp]u[|͞iU-x4:zccǞ77Q'z̚KZ!'lsWnk]8q/v=s}ٚvZ{aԱC) _}ABEKr.]N<{%DƞWzuW8}nX8[[Mowf[@;]wv8d3tyoy02*|`a׏2-<>+|"ߵ~o /ۏ?yx??'󟓟O)M5MMqb݋ɗ)/f 櫳/ M^̛oy=}na>|ZZ.V|R?B,sMT cHRMz&u0`:pQ<bKGD pHYsqFIDATxyX[eǿ' aP {ӽ+jFKՁq0OgQ3 n@]): um"v-K -)q-! (y<-o}Mr~9$0 0 0}'0 0 0Lǰpgaa~ waapgaa~ waapgaa~ waapgaa~ waapgaa~ waapgaa~ waah=w@D !0 0 ôGQ*v3l``ݽ 0 0 !^ '(@<!Daaa !3;FTc>B{.pBuzC 0 0 {3Ⱥvx"2@ۉwϛaaab@ wH6"9DT3X3 0 0}qoY7HL]hkdP1I l>LϢ@ivD |=ݻg{v"8pR)`C%HpPy (R~H9iQ <{gwv!W'rU&Iљ(@'0 0 0m1\! q] 0 0 'PRh&S Z߮[aa2Fb<@y z!E^dy0 0 0L2`;Z"m&\Iȅ$3p)0 0 ӧHUeDvT C=Yaa FR਻{%zLpvoaWM<{gwvqt FF`sz!r>_$|=ݻgW@3 0 0̀{?≨aa -{aa`p!Ry0 0 0= 'p(T.cv%FQNaac8!T"QU @%;y6;+;*chݘDɶaas%@lAU $K 5"He#+! @:67ba*3pIQ,*B"J$ң!pHP$Q^$("HHBGQn= y$C ]_^l{#pgaVX^G"A'>@"aYwaafɩ!D $orR1 0 0 M $+TrIH^] 0 0 5*3Bh$+L.Z&$@2R=ߝ0aaz BZ!D-ϸ{.JU@*!Y$?ש 0 0"z5B>GŹs 0 0L/waapgaa~ waapgaa~ wf@BYIzJrT3 0 6\U Em,7@jxdT[ K]CW)騬;ƶ5 YS hÞN[1'n&o+ՎQDM *¶vXjzN*w<3L=aCfw"ʀ~7-ɑ3BJ'aa5*'b='C?%y𢜨̝u6o=5> *l{9ݸskgHsQ BlyD3GNOC<,lNTHfe.ẁD *HO;aR=$Ev2rI$w0 0̹2]*tEbz*W=c8{DxK[nO)d s^{KctSհ4vԚk! /_ω k1.{+ϮBl Ikɩ%Rer=ɐJD +0 0 'U~XsL-V r3&8pe߯#drWmƣZH'jUHM!ޗe~h=V#NOpG%#-Ɩf5|r-?_qeS-c wH=:h";7Z"} ٧-c sjjԙu*6"'Ar teJA@lAY2cw6,)'*yp77aBYI:xWrW[<4ԫqϔ|QPvfL Ly.@:ڸhϩ^ D+\#a_֘s(Qٴ˛在lJfY獆:UPb\o tUFq ",'ߦ1(㜬YOoD$x H5۪Hw;2fP697@"P.0 `0 q$;cx-WIOYIoՓwʧyݣ<&m$ )m:oOzoB /g= U0/'*$sׂՐ w@+d@ @ɐ"tJ[ `B;s;Sok{8itPHm SQbdHɐޏ^''*[P 0HhOp<aUte%@1J}=+7 yŇ6O]Uwߕ۪?9c,_YN6m4v?!D  IKT=ܯĘ 'ѩ VaicnQ5O~`Qw ; >N@%B"2ADnj>Ag} y,: j*ʼn"MNv];va?[P/Vg*>`\s<yKfHw3؂fј{qP]Gv_4clkIU#='R6ۋbrBLeрA?ʮ'-kfLlE`@SÑp~HQ*.v*Bgz`_\u]pNŻva[Pq)"}wWܲ y1OR[<|5SVt5@ښ7[v7E*=T3\\nbi_Su@W<0_PH.ͦdADSM۵AGBD]s½%ÑV6!D JN{E+>Dy_H̐&ʾumxx ϸxQ9âa+PVQN/joLNTHB3G+Z_{a?^i ӭQDҢrz&t p6U/~E'?i2B-/,ͦ;t/H^Qze[yz܋JޅW^TpUe w'F-W(MDz;\E/O<.9 h !/ b "iV3Im`#*c?(5{!Du7m?űr 0 ôC*e%eN^=uP6P!^^zR87ɶuYp-u } .Yv!{6n6{DH@ۡ=kܔRRMfM_g=3le_QЗ~/Z)"ιJ=r~"\εbѡo_" 8O\Hw "28C=v.c^:@*2 0L"@!e%^bE$Rv Eư8ёpgzPR"@⅐>ypIJ&C}2ўoQƸ.#&!D"S)7\ٝ(v@|B2da V!ixJ_OT I##})[*yoKE8VS&xyom߲aDDDdmǎmG?~AW> ]r= H$B$8ms<fں=/UOvg% Qa %+ݯM{-{Dxe%eܠ9}چtQw/nh{ݔ]($ϳ|f9]hiu*uOzw'5} ߾h+[Uu:^b_& mGuՂQGW41 09LYIvOlwA뼿ݚ [X|x @a]t /p :!~+f{'[ůU_|RaXi6.S4\rfSai6pR3J4/ib} pĽoRɯTpVENq}1j l0ijO#7NCݕ0 øwqc/,V3ss޺1!k" NZ3̸@qgT-{;BX߻:Q+`P۽TOZՎ~A,XayZCYBآ!WIMivh*8xMG,cUix'k ;Xw3 0 t:@OYI[칭򺭏x^E{꽚d {Nuឆ-?zIFh߫Cc5 U{|Y(sXrB4\3lj>80 0 EJʠ :UATl-/^Z}b4çbB֘lv@ *4Qѣ,۷h.P]9H7@Ѱ8&oTM0l8L/aaz*6\x'.;5I3vdF{ӡTF+DMFǜ: O^ʉ Ɍ-(Ή I-((L"nN7SVR$p ?IrǢS >\G鋨N5(r啈Q"!IƖ@n:wvEi&TOn 0ȉe1buz.YS֬ryW|֦!sTqnI`Lu|*H;H3lfb|RUk&D;FUȱHNTHOplAٷTt½oz&HuJU}v"@K"yl2 !b 20I1$@Z'G@a0 øx OK?_z𛟦.lI|(\8=/I4|M]^*] ,yǛ,;v@lAYFG}JΕʣ+6!B{$@4%C- q=)BH6"(ޮ R*$>OQzq5!PR5#e4V󃸴n8}qq@ۿU^jm5/_GW쇛Dh1Gz5B>GE歐EMĢ4"2l2 0LoPHYI}=GC/GܻL+=CՇ ц ҄yԸ5!SA+qSU>'wk)9Q!_9,tbD+~ + *wOq YGi' D3gDZggca Yc5ĻSqj 8{_6ȝ9Jr jbߢR8;JkN֪\ Ok0 rc "|l/xpAG DTIGΣ! of :D}ӆ!DDD&yy*h~'0 0}.MO qkffŜ?1ޏ4u*HPa!FZvSjh毭ޢԖTo j在l2[5Z}wIwu~]!mEp*+8kvԈ1j_9GW. I$ko51ADܒrerIF!|___?'&dM2PVRēD7WTu~jF}"?,;>WqS׭xYm Ƅ omQ3Wk[ !UHi$"'N +hذpe].N7;u]˘"|0 q14z1zw_T{lCu/4yqDh!6n{hnڐa^??{b[Ó.$~'2s͘ ,N@ PM#DldQzpg,ܙ Wax) w\9eUA6xN,TϠ[c B1/Bdvvd}jZ.Dhv<:,a0Ą-{XIGH&kFal|>#٪w4zL=9ҠZ},ÃmUՍ^6S<[k|c{MJu%VXJqic5A ;@;0 05EeOg(|"/4|{4Q?QS?VCpӮfԍutTU ¤YDGo;1UO4(A(B OlbL|iXMv0.aaD{~tMY@YIU!VX~lWoܱR9ob ԙ6m+4aV=2ƲS/&m^6R+&`N.(SjvB6t4>{: 0 ,3,TuAwEArlJ Xv¶pb j"z~n`fSu7տXa^ `s?^7%'* ݇P1#B <~m3#̀D^tEIMg0 3(x:i%3{5~ToZܲ3O5'#<'  o ׎FM/UP"R `h&[PאmDJOWwa"JF?½C$5fB06RMu=D6:ve.#lp" O\ 0}J[5[OJkfS!Қ[z"Y7Jq7nu7Ֆf]mv)" jc& Ԣy삆cdLu}sBJ)un[{^{>F}I_71 >loΈ6!"Eًvy=m]ag@8oܕy]ga΁\JlWTۊċW}2sCý/e/ 񁪖4}]{!3}6 `: vN>w]YDy\Au塦}ju>>Qm l(ͦ憳 00L}D{#NȾ"]\BL;<0 tHNTH+ܵ&`RӣJ.  @ClӑwwTUt Ehɣ Zxxn|khB,Ή 1~S<mgNT[aOj#/ZAu‡b*HvmogP{9V%{ѭtM&"< 'va,0 te%[ 멸0 yGѧBt35bfe^Q:?KJ{ӈƆF[Mq*BD(7<|;;λLܼCv5>>fFt0L}{g;wu5bR rt)B~NI,O+@碥z*R4UD01.WD*',a=1!kPc;'3sz*N>zw"^U#S&`EfO`2*\fu؈% &'5١Wэ_?zU{i%WbYUV<`x?o&yDb9`W"%|'\,rlpu<0 9Q!%hZ|֍'_Ƚ_?3 @}y=Cfi6%ioLVUj[ŶjKؼ&xauSih^@;5>lX=l>7l*(\S/a;{NB7 ?LK$9 }ڕhGv5'} o\4$cvex[񌻏AWzW6tf,0 8r')\kw ~lAYaNTHً6Jug멬P!oհٮBlw|7ra2$󟃷}a4:6^]tq B SK?Bo?|UML/@DF;2).,ߵ\*,)/ pxg2 0JҋmaU| a7>|Ƅe'=bT{*{;hBhGDfgw1Q;aqʼn3],-K?>'*RU " B"J=N.Β;aiJ2BJ<;[l`GYhr໶5[N VQP )+)RXrt_azaٗn\1]Vm(!\1R5ٶea_ VC Lw0؊,@ۉDNa4[½;;wv =hz.10 0P23|*?T}Px‚;~7,Wn M)~[&cf Ri [E?y 4;=飲Yoxf7f߹@F4|Gc 2ֵ1!!p o&23kb^; ;G `/'k2 0slAY8m'!W~Ր2ߛ&MܸA\*.KRM hGhPޤi~f5w}o(&9c\'LT<iS;X K"`6 3Erj[v^~!k*U~1zy\c5s'1?{qZL 0]4 _=MWGM yZ=iٛ6NSf4aUO~>A;&^3uq* (ս?'Dhm PHR̜ʼn ˫cvL`㜪8 !Er `Ħq| EN`>½ΝEz;_V/< 5A$g7Ni<D'icݎ[P-8Q}/OH]!aqb1 롣>F}NLd{Ν)Dn!AJd B}PN͟p6Flz:B4!ԙ@X;)r$Ga9Q!ijh yޏ~s7+2Fyn6ƜP<|wݏPݘ TDu֪v7 l!Z/Q*ͦl7HuB1'*$[w2H{{tLi6}2"r]׼KepWN fٻ$;ۛDs;720 ӭfSaZՐ3mj(%zcmCm&uT_=.ZY h۹O֏$єJ`De9Q!%rlXF gctsfN#Jg諎x};D8.:sO8QRM]yiؚ3`BɖdDb:(giI%疒Όu>|Z.&ΕX\@n~&{PVd&9 1?w K*ʍ'_GSkZ;xB{ޏ鮜1ac|N@\̦7U:U@B?j[vśw ksDU)0^~i6ʼnT]Q F;x[FlE c,9rbh24H IQ"k" %ig%g)5z;3-Z[gGf@&@wm8@nɔa>Dy{6RVRx_Ozǽ;xxȠʮRAƠ[iHC~3|miY\D^:wVT*=^( >(l&B37Sb3,1r)d 8W ڿFIRT=(b~wfSU&)1?BxZQ.)df00wϫx3}=KX;ehݴaq*ߩMC'-?8?`ֲKsBMۉ7-so+KG;̧ T]6~W XokUIԟAG2%87)tjh<0Lo[wC/}ʶx'LBMZsQu Qa)Z=td4NNg^f99>~ ʌ•yتD / rq7^TAAf't.-'BXo/+hIiBK fu*˻2א<(1?U]n.מǞtaWlRI1O)1^kF-SctV_8~!@t|x/=1|\㷫rlA@6ԄuV01TҼ;7ve- Xc&H`S!EHwBV(nY}] UT&{zgƺ40O8gk'kWqae%E ic]O[ =捵nViC)#%UCõ͵J1']|(vﯼדSfMMVR1j9Q!fb;r ^"ۋt3Ra͆=qk{_6W{4zZ7gQO^šM |8P. va}vBe y%pVD$,ǴOq1Dpr # 0 H^w*#T~z+~sZ[${(U O(x*|.zW Х6C+TA55Gp֞YNTșʣ+ -<򹵦va?xaqS,a&;N4h%c ,+zM֩ o?dUٮ Y -Tl݃XW#veqCkj{=.CoRʹoyaOSX˾3x?oOȑmVY3LVa?Ĥ)އPѪUm&&dMܲ,~T;H:ァ}Sd|u7'aW.[wŜ9 ٙxJQYAƼ"?r[_ + D`elAYF8m8T0:7;0b8m{1Y!,rMw;=}R*D:D EbZ# Bv֔vмڝsdql2J6V<}`Ϩ~:xnK4+Pc1j^LGŲ5F$PVR OP'juDeNI?j~^=*0"2@f87C&tWL}q|OJd]Ql|ĕGs 0 ~}-ߣ颏-x[w7.ӞCux&_.|bB&٤?F@U V,{;ηT;^r*{,{EfX,2 mNe%8#]!#; )EZ݊rцho5O`d}G髬b=%S֟TJ;.|y>tVHPE5=~H̃@תr#{4!Yl(qZu>CձobZ*CdanSD r$FH'}W❜C{wV2_=x/~M/}1ɩDKtSUm1i Xa'pV H\Ju}!ȴ;1UDEtGt~>4v0J2k߭\"V;FrO~{E^ny39qP8v~Aa؂^"ޟ=N?"]k}??P/xj qCҠU$]X^j׻RXcd{YK-M"z%DR-gb8z:Ϛ=4!ZavCnSyמ|+ZB>\~YlP^jôzj;7c>A U"&Hs%j^"/OsVQF!BH?_SMc7dH#RVIzam_TOxo:/GT{r˞λ;4m?NNr@LsmOOخmM/wm P}̝Μ'J(\a}`xrrB48<&Lv.wKpfC:9)wϑaKiq6Xnyd][c*Msj߬귯 L(w}nQZڪ 0~YUlhUa`>@g'Xx, Wx-(3juAtNmZ W[+b&y *݄!Mϴ\ߏQ[~ Ee UA p({ Ow!0pJ90 8"(+ݦpg{]p>%]?5qLcØE+z?G67Z.QA?h Z d%őzYDYsR*}\> A Gamr)+)FNwz'4DUNVBX\g~7,mY-]Kg4}{z*k}T0`]\4ʚOP~|UK/,Ck}D`X9H08c>,a bubоx7;n3s%'NM*F|CW~RjD+jUyvIo_#?|߆o7:n}cƽpga6Hkֲ䊙kO}ÛF]DaSIUjWj~MIvo²(hV -߿5wD.􊿎S|NW\Ru8}`iȘ>@s1azF݉awfPADz"bo 0.#uE߼'>bW}=F#‹[5v]║4CMՌ'6k>[T,~C,{RU&*^#A &.ECJV1(?5)zM:B H'G֘gayOzlq'-&^>j&uBi =D6l]ݶ%5#Jr$e-k>sg[弚Yn}:Ha! tD-51v" V zM(QW6)r]x#_(f2 e g5XZF!~Ob#^4^h ۡI͟\wNfUJGDd`>qU'#ZGSl-E0L' YlH*VSi7z^c`xL=1&E_POW5zxOކ`uĭ߱W"| D[O5 Q=RwM si6M wSI)bd"*ntGɗ4%ԕhr옘#6w wff"ʄtwUfWF{^ w촾V;k&h98֘K5Jl5$`+PDB*;wT<՞7p;'ɸLi6;3=q70'ImINCat ɒdKI$Iw 9۝~O幧vdqvK|>-PMWDb bgWܐu, jGKDY|t[k6]$ fB}"13v8P=) Jwo[QN,VFemVY~˅WR% pFrj{"Y4&C:9t&! BHѠs\]WQqReۭ Dݙd!'2 4_ cAu-K0ݲrk]VyOՍ8`_Y_}Beju 6@1aEq"cVUN]4.4GW\K84lDlAY*Wj3NN5A*Iˈj̀nk.']7.B ?FtltԶp* <$CM7.01_Z1_7_tWyW5䃺?`=0r D֘cz].ìQK^ʏ^}fg0LNoӣ]ݒJDRX$]BsUhHs؉vc'"Dܻb10P*ypgg܈<8c!V ,H_OCj?x欛uq= ;ZAufKE&}!l'cK !"D#C{g`[Sh_r%!}MՅc<թIxMأ ^a?9Lَ_>pMO`H}7rYu4N{E w9rޣ.YFߓqB-WvQ>XS)BlN 0 buzT_ ?{_QsوѬPK'x}OgJ]ewqmB,T7.fߒ rU}sG]:h1Gc$Bɹ[P5}8wi(#$7YNNm H#E׹IL*2 |5RVsJS؂%ק_{}Hqa^*&/jc^ӥi,._4aXSIQYPҷ(AK89 =q'BbGB_(_0 øDqO m}rD_~ms=&|tjftm;v΃Z¦v E`;ESvqK]cdv`QvD@0AJ50R]z1JRȍٕu KO{. ô@YI!#>yӃw7j?6{?o[N-1k>:slt ż~taG+kWj-(Ci6-U"O1`AvT@y]3@F Hɸ&_QЗŸҍI$p,vO K}= tl{Dx vbB֠Vwrƻ_[PVxeYEOӚ=.G=j'zGdzlTN:_5~8դV|sfnڅ՞\.Pc-?դ:vÇS[DNTGy9uҭmBUWU!TZO66jm*F9i @0ClزTXp%'*De{2(]Dz܅%B6}Jߐ=nx&Cjv&ؗ bN&Dq8sjgo4?6ptW~W]x  øH_90=K{a8[ d"2gADc# OrSaN_qfCYI?BGOmnUQ3=gj߲Tĩ/+-X[T#qO ݹ]pNb1!k$=v_ؾK?7LzuD:.I4z]b#PβYIve01fDd@i6 A*΄nJ?1HI,M! B )@p "gkÌn<4vFx4 @w+Ȭ+IjWZi]eY,nzk՗i K[& -Gj7n ֲKb71ɘg2DhPa_ ۅx"!@Hp iģ?^ӨmC9cŴquAh"u}̼T=Z'f !V{!B`}^ GGHd~DO{ ҃mѧ` ]$ٟ?}]y h`؞Զ sX|u8uӕNyh |GD﹟{Ҩ|c9og۫qC8BDt*@5N"©3oU\X}*%!f @DJp{D&= 9j9ŞT"[Bc9k7 Wn* phED~6Xv%Л/ 9_鏜'T_\`rŒodW6}Sy2,wW6aYґ?s/Fz T Vul^ulz"$ 錭46ŝ)aqD%w0,m7Id&:Q.(!LDdDKr* xHSr&W"uYGȀ-.dm.34'ɦy}n+WGw0L7oYw;o]\4xՍnu$!DcWSd9T;>Km`!p=LPZA!rB~3JJ(7crDC*#H Hةpd\GqֱYw)$ ddadH}G(1!=8?xj';y2Th/UjkʆsY>tx9RˎҗʴSLJ9H&&G̕daFKnx7^ _V4ϫ~ګBQƖ}/-;ϛl_1J烹G&HOhog%iډzG_Jh.}i ø@oq\JBt0 øJ20ZLo-y͏D0aE7X7Gb]Ӟ+T7Du;Oe 3@q ޛ!eǩN 0L `!RVR_e؂24 i{Ƕi*|NkS rk-#unR֬"y<|M*Ҩ D:C6WJ6fS_njka]ڪ 7">J eE0 Ǹf.tjv~|?^{BgVyp˃-uwjԞUG?jt%z *}zE IߧDEӂ @wZ/T-bG w;{xŒ '0 A+n`n5''&k\9t;a;n8V`/X=}!C ,"*g׿/ޝޠMoO2-?w= lcy.e0 fܼwe|-]ycjxyc䲺lmᧉ>M{QxgF[m\Рޕҕr_=0 Jr*}swW!q1m(+)INﰜeWݎ%ӯoYӏ4lH}gw94f/k/#.=xѤ)<": /'JJԤ[}S]XU43zS!% NDJ7N5L66G7/w'$ž.e@n% "qgc-Fgqz . ͇QVŻtjΉ 1 VS_˻*궏e-#^MKc)| >|ײq| 0rdq. ׭TAj)֮*|\J/kWug#'*[P֞ǝ黴;shjH<[ Pz# C"Gˇ-a,ћw4Ŗ{"HBHIAnOpA!׆<[ewS',|?'y 9tvtx⡪r(hϩ,twn4p&o*b\]`XDyR)1?!'*dPʼnlj+xf3HkW )1@a95C@2 vLϐQ$\^~Y9gJLDir(Z;*i.4voSV@X]|e%vϭC h}Y*Fh-<\UyGN/uݐkEoC 6Yř |: ʑR#Z% Umv@%o|hc bH*dPb~$ZJd^g[S3(v') Ya'H9%'ȓ/,gӈfJJtK`x c?KYI#rpڊ>Qc1.oֈ/jh <-oŬPSM<+0v`b|va!z``#{8t{j92bQpCIGskB+C^4v{UyF@@kWɟA%*oocVY%h E[59\D2LuNAΩG-GQB{!ft"C"VgRVR!e%t?n$W ѿe%Et]3/,=];D ;{L{s`kT.jU''ABzz⚠3t8Xj\+. IvHnbvb*[PftbY?Pw4NpR%Ep@<3!d{Bɒß^Džcn[{D4'9FYSyEYI?8Y0ϔTعvH '2"&dMtng؂TϻN]邀b.p剦}Bj VF Х+ #,}Th?i{H͔oJe4'NWz9"}P}g]fpSGG볻3]&ט4@;.kWe9;Tlj7 ̹ÝS |%o /ۣYF$:=߳DĄp"SD^qQl$-ܦ۪?8?X;Roլ%&p` *7탗"캧b,^[10Q`˯-| oPb@!%8lʑwža؂2#!D95̀tlcct_|c!K*B]."M]ey\Ѷ0t6}y_H)b:s[_ ԙw7"E$ϯh:ݓc&'*$!,#&dMxa}E®)GVyU-9e^Pyzx yyM#q!pK{cD7oH )ʨWꝱ0׽3؞;1jǃfN}HA8ɻcph} Jv1&H=AS^žP{({\7dք1NC.r?3* F Ox2/\pIKfdU15-GaB[:&Y=V#%:T U2j/:ҝ",$rv\?ADx[rRpdˣ!$8ەR HS e% !U?64 T3m iw`v&L{JfIRMx(kd_ǴFuLIjyzx{"~7ZpAA#Bp">'GNZ0Ǚ O~: ޼op2&U)J>y.e)]a=2@??߯ժ;wNV8"A|b N` hDd;e}|9_\1L]R[Ċ EsЊtAPͭ_ҩR,4$ ٶ@Bb*wv?ySɯ@j@(x o:ٹ ap#@ARv9GYb*u6ok[]GI!ě>>^8cvɩ%U&:;m NY#)1?by45s!??*2Wbǜm zp?kNg)&}^|%n!Ht@X*Fކ7D11 ]R} h%ǃ(@hX=۩:H \R9f!ޣvWS ەY^'5A Zr,/{`w:&?'z*6t<\2z3JbukHJEQ5zzQ~E3JRV?.cזN|Cl|Ԫ \g?@U_?yWǛGK4  D)b9Ҟ ҇W,>nzkAXU]}[[I]F9(%@J( =$֯Q)ϓ(/fn=o#-213(>ӿBl 37}EH)YJ(`)mGIzEr"' 0c;F]*I5 ' ,.PYut 2 Ho!_>ڭCWQFŹrbȕם>X]:E:jN0}>0ٓ p9?G˿F k)lnⷞj)|?e9{Wzxtl ;8!ghYr0 Ӕ0u`#DzkM\X6(v*%'nPbɼKhxGnwKmI!`1VIDATθX !U'%gh}) )zRĈܾx+;F;.|1% ?mnM@e 8qu!inO"U,ߑl@4@b1eϜ#9u w6EuiTN ӗ_5UxK#8Q3Ew2ҩN7C-=s<*ˑ`*٦Q=1?"4ioL/4N2(rĐK# 84Kj锘=Zbl=6JnkV"uNW?i[UcheZ"r3oΔ3)뵳<1)oFowwIHNl۱+g)'h?x!֮IG\P}2@}Rwd!KFt3w((1?헆qqXBwrDxȃd\8Blkr U N*"wl;=6CDKҶ_iKL{8譪2zHo*Pߣ 0ElAag}4mJDDax?g]-N^T]+7yt/y5F\"Ě#Umҁ57:5 !lrپ=ELO8"T]+a]Y.A(;'R](UAZB3g`˯+wi>~rx3{_=T}1.98nf8ozN}mZ?K;%{QZXE:zjw"jS;,,wm+jgj\ɱG&\Z!Rb{z=~ 퉵J.y!zHQh; D~?FՊ?EJd{k<e| d|lA>ǫ*ՃYxYlQKXPf6D[[|! Uˢ0*UYϲM %G@U+5(1?3^i;Ͼ`IraDI;~ RZVDJkpS?8'Qk|QtVW8Qv*?Ӥ @+_[x~V'UBǕi3mӝS^yY2ȉ]݆ri;)on50 ӋDmxі.+Y,^O'S> rS#bPoNVd-^Nն P5@PT #§B.IE3ݲ hm)Z/X7QRv>9[kִ5?XpMɓ7Y ުGykbaʅBM㖓L*kW.+U]!|+%?^Hb,^Crsa>bΎ*ABnB,G*ymJu۫ /֮J+DvJ tlUV1B[$m(f$-a L6|̔лf+ aYqve{u>; `HJNE`j}e!l#o+ϡ:tDdU8SdW'\JB'(JRrYxDd@D1UNtG~j{f`qޔ' ʞk??}*Jz7BmڙhjHm!*k"fXkoUi=z\XI|c9͐JARdڼ vɾA+zHY6ZKiIH|{N_xm̟GK R E? !YIΈ3A%Qb1fw5sd.!p8MP"  aٯxHP)]ޱL@K`,--*EH^6Ƥ7CA#q>O=E񫵳`m^9_B sR8zr(er0]GQ*}[v"*߹5ZG_!}҄)vC! B!5vUUpzZ1@-C!cKI2 #QMFư8uǗև>O<|~AQF變1rmQ1V+gTF"L]uVJZ󿱰6o KECNTɑȹ ౻']4hyUqm[ 1kWEu0OYѽ I|)J#nW(kM .gI/| pݖ$m̂7\;@;ՀqDr:|T _-噗=+W~ɯ)ɮULn5Ыʘ9_y9)x OdH(qfg.E !rW/D'gq5CL]m_ʷAEJ t' Pgu*y9Q!g'!'*/!yoN刦+|%jߓzMh8S÷ܡݓ\%k˯Crf_DI@ !mʼY+C'_}T4,?(Pr~m,7Uچޞ:ߗߖͮz*!H5•ߗjiCKRz^F4 g.qBYڕhyYl||7#7[S.@;/sb9AKŜDQEyegJ=^GrI#;X*nVzT !m!EnO VI"U[o9 Xޙ nrTt]! wF#`Wf`]e'{[P6^희c-8miB| +JsnHu$Ax T4Sb<nW3L v`12mqg#^. QёpZ6 ֮2=;%,rJ̿#Lڷ&@kJJJTˊMA/L V)+i-z*6L~YVSa9֓:@a#OTZ[Tj9̤mT*ٰ1m%:f `9 hl[7(%f9x[-Yq2}X $L.6,u9kcf@-&(oIȔg `bXj|'PGE;i[tk)1 Z.0(E9rx姕 R_2]@tU<6uuä0AItYݸpU2%+?uਔKcg(ͦccszΦI{yyAc=O_4fn~5PQRq:͆-$zEhi`ιDRp[g㣪7=I$!   [,.[-h *łV?Phi+(V*P[CY\P" @X& !d㜛ܙ;3|>=s~sϫ*5~;9mVjQʵEB6۰LގyEY"ӨXic3j^x&)G'ALPځ1K=/} :pfA-S傱H:U;E$glW7Ⱦ4.#tC2ҳ9q2 T&1QD2ݜ̈c$p4;@g\}AT d>b$+&)nB!1{褶xfTls>?#jؠA|5F|˯{q 4*z.bF?@DcN [##fd3L?SΪ?]ON5*ح7AC=҃CwSϢa.R,P懔%*dg0(LȞrpgZ9d`OOA/ 2Ar1|4x4&:p#6# $% 3wb%X{K@4Ly #D\0&$7h*1R?qMqŌ_.B\ hx쒦 &WY(m kyrz!MʍDVMEmDg:i/;qMq<*3^AaqPR.C㟋h*S5_|i `ùA EN~̣n~H}&aDY?I|+AY62[~eG&)B,GVkol\ ON3feQLAmcN?!%]CgoYlڛĜ="1KPܐI};hz@J&ct,BT3s:)𞓺z/{>Ws]3JODdu#@5+ blW&߇u}1ԯ-oY bȥꢍr@N3OE=?7hrY Yn" oDg1@VDY&-mL@.I=bv1xL=A4/G157HmEuwG^SOHn"*w:a'%%_!ۯf߬=F |vwlM,6<>Ǎ_e#R $A'@ .#[oUP_Y_Si 0 -Z LG/b6R &c̸5 88RmOI28hi y[.vǥ K)kMՁ~O]1 ?ٝ3CFyOsutQK+#58%-ٳPG[BiJBWķ |DЌgu /_랇қ7"歬n*UOmS1BC!(җ= Mf%MY:h}hre3_:W >7a̼.gS~#_jψjJ*]= nd\O9%#K!bܵ5 ip/w4:jmN~\h1#@{E-e<r ČlB5o6 <:j7壓k3rBlҷpXE:hU?qS9Ձ@4w_fĜ0j1_gټPMt=>xᥙnqpb]=٣رWJ2铈\֍ac ! T_|bFz,P  E.o m@ͱU36cNDEʹB+AAH:B4ϪUx_RO\S<]>:)gv뫍Tѣs2^DTEDK* B _koEwQAkJ_a_~3\NAE|lg먼nuL_ b'|9'PƢxO\/y,@f$9;)dU@kNʙx N0_sm:; FJy~k߉BdB{Lv]Te))d'cFUL޶ եv|q[̕w\R5?}b]t-MS&L)p:}D5Q oº>rKČ՛Ō5xRR!"F@hڡ,-"8p/i0kx컁t!Drh@[ÿedC)y?շox%F}S]E_giWw &s&>JM1_y󷫶Dq>''&Ϲ.‹f+QݞM{2UOʦc (H1#{.R LN_x&Dռb/eشc2knT&잉kƐ2I3h3;Zq?N ̕RR&fAxx-m.4Ft E629ݣSh ?{ s8݁5f+Fd:;(}p{s?8~/5dHQ:psum1zu Ï=[A|cO[. =oPzv\1#{eI~c8vZDƁ+Q9&rщGV% _裆\)`4YRQ%#y{Jts Z|%w_3nJV`Dާ)^|tҨEb딉474|,>?Ez:zT$fd傱s94\VO:8p?8A#b.T4nb2X©?TxLs4 .%+ctӀfZtJPՌ(qgZVq˃K|k_ .!~- K-m5ҵe=Ў+P3ht:kZqw1,IjKɹ'9Bwl$(KL^*۸d-`ϟ9RlPN ,O_Κz^u£~'yа@bcz4YIlڛ M(eprqi?t1#'- j X#%wE!%G D9hi8w0^JVD7 BD PEDfmQhD޵4g-ѷ+de3mN#.8te _XqMϫz~9@K.] ѵй]A{IU Y߇*'38i/s-lgm~3O +|F-yׄ|=D1Č%l@ݳy(?qi׶,z88 QL)pR !rzy8LA=k /}sK6 y^ӑ@o=A"eӳuB%Ƒd9pq)eM 27%Q4&a"g{X63]N*ǘ?v뢷<{|~?O\5`j^/LdL:s pc^~音}q1E-)~I9bFx`(3rH'*8AH\dZaofwX69y{ڀ!z]RVxg:K_Tk`0_f{(RD)i]AAyEt5Ut\0֣#y(pö'5\w;pࠍ`BҽsyqX6sOԅ꤈ˋwAɤ}gi 7 egI%XJbPJyM4! e&P b]zr*D&ΠAˠY :pKN1G9ĵ}@!D RڈN={F5gY? GUJ~s7gCյoNZ$/ktޭ 'Y٧f'sF:he H9QPAmp "'Gݪw' ;2/G ,0TLmHY!4;yIK(sk_C1-^"K՟\ ̕ 33u(d1$|'A8h6\E)r!8hSЉ.q_ϋGzy T#DlS+!k v&$X/}J9'U%S9o^2I+Cg X++$fd{TA$*;{}8 7ιqмp)۴$k-iQDpmwAwstז $?GI\ ꗁہ%bFJTLp**6=sj0#qp!88渵t҉|sZ>J(wgYM$-/髮/ 2@hc "ҾioR LЈssuў(R6O.L \ VhmY({(}:j䂱G$xP؝2֭p1~f8hCpZ'|7ӫ*ʊK'$ޛd^48'OL振M{(ƒSbVmaH{M&N#+;'_>:χxEIN2l$=EQUݨd.=dƑvʜ /δX4L/J&sE8pX!8h1,tb~{_oG~y{~ћxXURÜio2}JPPKU֕6Է;ppiOuOb8*/v[G{PDΰgVc\b6ZL.2i㯇%tEYbNK gfA=X63/'ϷuJtyΈ"&RbNDT5v٤u}2̷-?'F>c7{Do5,|_2傱SŌ,k{\0_O.Gqo}Jb!J*S%m сoLH7=-e38c)"..WOoڮÝt+cʢcJE\dUXϣN֘5L:seZƬal.EtR-%i*j +P1Miǵ9/8h8wK(􊨈Aog%]yxգ[ݭ(kgmjѤ6'+zU$j{ b&TQ^M9WQ\0 N%"Z>%9#%yG[yqA!88Z~9yz953yrgO^ѯ: fm C7[&28u}T/O㍉+TC)߮?פ)W^pW( ߦ%1#3?ʻ MqwmɁ;pY|tԉkXbf&|#⬩:2& 9뇳4>,M=LdΈGBڻ9罞 Ӟ%y[.~nDusdc p$݉:pz8݁fNۺdD&&$D$)}'vI=|q \C,".6:*Coʃ= c3}dڽgdGK傱SČ(R^qD.c$wSތ 5[[ݾ7A!8ŸJǓIJ~]'.uJD|;oᶤ`p^KT5ò~aoTH>0g^x Vͅ\CpoH$Fjߔg#;w_\yVC ?) p_䀓|EYIv&aSq2ʺrk}IW]xo("lX,Frq=!8Ek4|{so*>])'_:}M\ 1Ź˂&$11ob)>)8FmKb2̓VӾ6;3>gW30F5c]ooҔvS;p !2PW#+GJF\IsG'y&). /dBҽLŕox?Guڴ79뇓eҙ[>l^-y3/s{2nP[w{vww{ט7`?oWv3nh* Z8pb 0EAf !VJ)'ikN1!w#B,9ȑ.I|v_/v=5u[ؿp%ªxZRӧo>M(垷/aaD|<7v_ b֭ypk}ʲs(ȓŌ ]Wrlqwl)<!Ҟ""OBLARyZD Z#ߺIJD忞|tRZ,9%8ђ~z~Fz{G${PAϒ9i77}qIGW~-K}k@Tb:)%);I==$l~8_ p]zDǚ&jJ1T7/.}+Vt{#N{{g> Oo/wOJg:Uo,!qo |8pІQy7 ;6(tC@,.'Jj#.|Ӓ:p}SoGOz8xك.=,a=$6D\H՝.)WFYk>R?=Y)aҙ[PJbM{X9ddZEkjO\Pœ 0_fN0(*Ԟ"Mt˟ _b$M@P*ࡔ|OVk8hy)+k7Ў1k\)(yRY `nz6@M8pRfnxZ ݩ)V쯪H9uTȽ|wޮs-ӇmP0~8u| 3s7nDVs1#} 74y(fe$i `>@]_t>\063fO{dYT.5vV^8;5fW!bMgƶuP>^^P7G'Qz$yoX߹vTUuM~CDq[viޟ/zk_+;$TW9}X94X! qYw.L%=5ߓWv/Y7ہi ?HxAεkӁ{ix`HkmXkP)KRK`i`k},|݁@tLyAY5{HiAԐ!}j>ӈ /1p~8 pYGh\o2 >b/ᜧFra:W0ᴯd>l>1#{*~y' ?$D h^Gr }"EiW$ sZ6Hs dH1<%߳ (=xH\AKp X  WUKa% THp !Rw#@ I }w]"BV+%/0*]>:iX63ciL}emu]gI9%r`,mp8cY1Oo{ |1LEN0hݻ;Ү sی$<{)s xdvig޼Ov/4tx|Č%(z`%3gˇұ'JѨ|*rt$M4w&I䚾KV''$tQy,wT!0mb?R='#?ͽ~י)8h~+CݗKS=Z/bM[5yĽ#Her0K)sP+KA.f>cN<>Y'#,M k 6Mbgi}JXxd ])dAןU duAE3P=0;(0I;@Gx  <=vS^\VAB3FjTЁ%ÐӡG!w6I;ȟT>h]J٤8N %DK zӬF)o )|<@K. 0O' aHEWb>"TInTo|a^J<*ss$YIR=i R.+Ls/kz8>zjzށGm(PL [{GA@X$Oh݁ ]}?oG\ЈKj# 7 egiO=a708ӿgP}L1{Hlׯ|ktg$~ܢC*LJ8B;?`z)4jXUvp>&K{mZ.ےzMI].W4Wm}/!C4:uukXv^?z'];qgi$'S⍡KT5ßg{x5*y=}]_VbU_bFwu}k~lY^Η|O10%%"#Qzz4Kkѵu>:A#0͝ѵ ՟@wmc'7sGxкg\BqW@EaLm1>Q'Ǡ\^:>h pm+|~=_Ų壓r{ѻز56 vz7c~>{sn[RC֖ATŲio}KIRaҠ-37Sa_kt>\~+cy}* 2 9.K#Z/{]_\:]A!% .^ԍ1ZP`I`QҧЮ?ɧ)3%Ca-`1VڿH4|ֶ =lk@'kIXQ~>cV8eHaN. }z{TG5tkoDGC8h#zcB+^t[?ۀsQ0&c0\9pP6b܀oVaTΒ[w1T4"&1gpcq&pY|^y3,7mTrEٳ\`eK%K/bdOm=!}. _ Z•j$tpH*׆N@BKnAZP(3pWa uNJu+5>m^8%-m} g&I $|<`VI礢47yd|Qn(Js,^^D UܯIxEOjYDQ!5m~u{;ւc頹Q%2<˒=g I}oTx#-AЯeT}K08X9ʌYȊK _~ƪ̾;k$]TδnU}_Kw[傱~MinA܇R-B@Tb}qWR= AS{mײ)`.b8_euh{%Ha x2s9w~HiQȫYЁ }z!%P SnޯV+AoDMEюx0W gwֆvlªJR=ncwFIRυwP3a&Ӷ(ܮ3|Bw$C܏qw(@迿9%oP3߈OԉONջڴ7IGN¸au/3ihRoq(/OR瀬pzFϽ\W_G24k (Ǵ_^0  ew|@3rR=,Js Bnjgha;SJIkQ{0,ܦu> Q h{' %ll>M%i"ˤl\BySۺُP"P|Ӏ <$F3=X{xPe" O@P2!ګ|%&!nLu:Jn\e*^5{] J~>#r ,zYT`Z|s-S8Asڔ#zϕ*.S.s l+;k_]'-7Mbpr1s.cd v}s.#.zSXޅ駾oo^4ܦQ$ksuC}ѠŇEBTuQd)P"kd[U5 gi} <[T]OMogj/;.R@K MAuN]PC(y)G@P/H'>@džط7B3}dalWK{foe/Cs$d2OA#9LIU =w̪^n &?n9&]ڬoyYkF"k꠬׋K"@!JS_@YEi>&򮣉SR='vI+i~W.R<|"}iB!FBR해m?!D7]_Y05۶KAg Jbה}3dCNIγ8]]GGǛ:(7Hx c8 i{/p?p-޻+_o:j/@a>}y_7*9ޡ*adYS/1#1Kzb ȤգyڗcSnQ?ꋟ/gYc_cW^=6kCQJrѾoT+MI(Jd@ț| 6_Ԕr4^4d' a_"MH0=0^Z|zjc$ޙHt 8h8Kw|U޾`0n3V‘iz)_';UqwO}uO+?_^O,ϋ:wHQĩ ㊎r  ØtKnऽD|O)*<].4]<8}Ro{oc"c&Ni܎ͭ}l:b~Qwd߿0ͽI&x.D $5*AqBJà#%E00fҒ;hW~[Ih2[4clkAD%_i{~UqEC*kboG;pLLHRwQow+NXxOӣ+O!q/ec4.aSq2yblYWЇu7`"]>F@awmzFi)G[viEi,C+$|Jnuh|SqNuG;p:E">sxs)}CH{A,0qoB)}Kٴ7gaduu}pW,4+OůN}AusEdFZ"9|,pC0 6Ӓ|HxVEDSWkզNj#0$ڦp2(EDE/d$$>uW+{ʆ|ށΦwܭOSB$,jcN-?##C88J?וů5@IMzY?\ӑ} XWЇWsg a4hK}4>k9VFJaI^|+7Ѿx? RaETqn^BYf֝'3m^~"%٥ǧ[_{fIM}rx]qBY"%30WHI@nnºlg :D8pO;G]A !%Tϋ?~da}Ӈ]=U݊3̴A`Ha&RQE֘5\Z>ٝG#Z?f3=;t{2Нe7Gj[R$]%Ii[#Wæ/{ȞKna%Jɀ{UG]K.ugh9ix0T#\枋*jfƅ[M~_IlMpPŌ~0 BDwipG5֮4?`Dާ)2N;"JibdYcoB);M".F`9 IU}$/PD !haMtm޿W Iq_*94$T!Ӂ`~j'(=N27$|/}3zrijINV +a,MQG_\Vf c˄*?YUgrW/j= jM 0T s0j|#i'YH%(G0@8un;@u` !Vp$B|,|X6s.''ϷS,j6z}.0̓5艱Ulڛ;}bի 0}e-Ӈm 1֋,rƯu}xS'Q$g=Sߺ=&kKrwk傱Zc{t.C.:,MՄ)c5D$aιBUtUޖˢ 8O;}=?6ȝ&O2?<Ҕcy4(Js+p (GltC ^.EF!/1Y[_$*Y.LsMqw'|OB\# .G‰;p:׎(Ү/ᅪqW{ I Fz4}J9ƌ-LW2kkykg_vg\a)EF۪;REu]]"\i5}5Ժp񑾈S㪢sգSw\ČQKL*DQPOK1PWJE.PyM q  y  HLA/$<X*?sDy!R= 5sPȍC8'cƪ3~lֿ)檴^sẑ0^* 7 `Έ-M(%*ڗf։= N< >FMGCWR^w}?jvT4H%ұЎwti;Zk;%HQ{R8Bh[Eӭv1Gk!Ai8ߛ:t%S㇟޵rߝY9ҳ$04k v&ioPʪXulURCN)Q|y~i$Ip-=}OoVi?hV8$O;s ܙG8 r": ADL%Dgn|5LJ-RBLܴ797TK_SŦdV|y*ʺ82u]AθU68W4~PEDHvGjx'fEA"5)Ls ?Һ{}G3u;8n!̐74}IU Vfᨷ} X߇Igna,apr1kyyc?rO'kzꢢ?S=n]ŭk mq`݁fY\w6e{ACx:6?=dLǫoA4°3NZ=9#޷sc#N}*+U]˄bbܫ>P+[Q < _<8pQ 4 B-Y-΁68};{?u4E\m_ƷȺ>dmĪٴ7M{1vOv^ 8*D*YW+d%|Bִ8iؚEj xn2ru%aD܏As‰;8nԹ>k}m"v&wZ4T-]WЇ{IBiz6Mb]A>NIUz;bY -zEq: E̠:GFk]Y_\̪kpwe.XVYe}J7`;l&I:Y+ wn;y{ thmCt8AX6-'JOVȴhG^*,T*p vݎPB8'AS`ܛbs9@p~t/_oN{r;ĜqZHǘU0nv%U1|Qij[௱/Vy|F#Omc%ڒǵ~B([9)@'q>0!fdO :uGSun :CӐ3b Wh|3pٴ764ޜ:Oc&5-lE/湨(VZnK@<$bF`\0!4ƑxDt 8v:$O6[9uEme\]˞mv.Jv7{ ,gBL#n2\<ȓ mH)ߖ Fs݁8vt$8wob)&qV.oO5JCo?꧊'{^_"|Oh&F TAh\`*j z l:1#{.#5Zc'cČ Z1#{AR=9iYM\pСw EY=;^W~tyz &mڛ ZԌIŬ8 ^u'|&9G1EJL5}l#,T)BRJ?!, GJ?OBRIrRK~\0_w/TSu Mո;M:!7޿G[i]lnUH4hWmȴU8 ~81U>mW6nvݽ͞7k}E֏3^I1U2dy" fhRNBBdJ)BdRʡB\:?jl`"'0oHjYF#-}ڿ lڛb&Rt3#Բwwş56JQ )Azg+@kc˯X-8#2)02@E Y6 H} ۨ?6QqVnN;%-ӫč<k+d)(o*<<)%X A3^dfgY("lD=~tI)GI)gK)WJ)Ct!4KjflK 51o|L gG[ MmZ 9O4+i#+t0F0ii~Ƈõ"_id+#O}]ʱ<{S%&jȐ?Hӻ7sú^YP>K$v_* @~Jh5%"| (r\qR.5GC=׸.5!5_ !~(+SPz\ pŤ7K[P//Pd})oD㫁SS0 !nDZiZ^/A~5q/K)kcQpནLdRt:XVVY)J6gpm_^} [ڢ Z*;ǘG$/&u %9y.%sAXBMhah8ǀ!v} ||a^~H 腒,n:&! xП^,d,7^*ɨw7gȇ/B@ >s5*6bF ygѮgtquFC5+ kS=!i# @[UBF5j=:Fy <ҏ߬h;7 8"!R*oog5+q` IIqo~x>9Wޏ~4 zq/]Qo7ϟ:A!Ŕ| vSoBtvE<ylA= %g[V/j}K9]ח"I)G1)4t(Qap܁?Bl@P碒p=B(c|O7y= VSȞ$y4 \c vC_=zC!f ^A I p̄<D8Ӛ=!nRjD`L SSoaW5ek{ʄ }:@4[k Av&pP6M&1,t(;K(ԓvyڗQIݶDљz#&훫83:"hGJ*ňX[hOJWDM_z[awNa0=iO׿}'8 Å(IP(: x.xG  &2oTQB\|m`,p&Q,!5[p:]u,)MK~ry!17+({cVs2ZmHvbvL5, siF !*m%dgCM1$Q,Ls0רpmr?٣xXY볒-13^C=_C'fhw:ahgRG4l}DUC`~#/x(fpDiDGQʰ5NĽi7n&ǟ\{OO\S/cDܳ6"k & ¤3~,0Zʞf-Tꓜ_XKB!fg/bt05t$ w{< S'X$]t6ALˍ(mߧlv"Gm?N߀ƋU(W;L;ɚ~aGEQRȻn8>joP*[r_y-AY)У)E(:*@_[(pDQoZiv/6BIiMyȿ?2R> '⬥hykcDl=Xl_F30o< k(Lsgف~N)h7E`3!8e1f~&L 0ch2mY8W/Y&{P9J/XoFku̦qL3_9y&jx>K;*Z;w/m\=Uݒk#^Qҩu}X?'nٷe⾺7zv%p)M(}^|w#wXWU\VbJ f ol^4dA][ ~b%e1ܢݕ`!4(N /sRJy4Яq=3EqM\ͱ>+#g#\,Tl$PMXA d>X @Ʌ&iA1KJ]OBdMLoϲy  YA0JQa{.cN< Da{<ѫ$y1v go:NSй w:ʭecN7Zsl,@]vo/Jr1r2])@!۝ TŐc}Yi'ZJv3L"+UR3[e~݉7-gݸa9< "$2VFϮ,*Rqb'T\ul}|ީ:ш:j& Qܜt1:(kS"MVZސ4b0"Op*r^ _/**4*I7tn }%NAb;%hN @L28/HgP1 S;bcɥ(0^Ӧ{ђ؛]&R3X=15 $V6jE6#ronsxux} b5qK0~24/>rϡX lAƹ~:2Va_|zIrbYzKPߣ4PT>`n!D!j2t^LR}}>B輣'5Shx4m{AkG܏{d}wм1V,M*ŔTP⍭'PnaG\'%p'GLگs'Og @>RR) >->+bsugEGT6F%+^+Xsi6^<'ztF$/d,A9A 5AEWf9~q?`w~A.%^Ҥ_)EWyYOڹ}e0~ך} 4sIoAnc f~FU`M@s 2&X0SHpΰ SA3 E u-#,6V<^@Cp* !w+$^/l4 FfܮM5d`*@4;][?ɚޗB$Pk¬e>i0QrQ+?PHԠլ4[_kw]XҠcrJRf:'PNc}P3[tv~'׺Ō]B/7 \ |cW#WȰ,ฉ `.הѢqoJMǟY?ibl;Ku(SKWQo}[(^Ll!gsu]_}jMϮS=q揾7w+ %oy  L0u?tlGo[I1S4}@;7,jo%0j8O>RlDc!]&(2iH]zFH>EͿ 'lM8Pm;$Wx5.~6H K\M3P$ލJ.EY!ٸ_ϠdcΥN9,fFDsx,m lݷu@kmq.^JH.=b >3CR;,έ̅ C1TN?Q9LZ6T~[_KAgyטdt75`4e3$uvx#kQnɰ ! H[5bk<ߖ}ᆡdmau?9W!kFOr}q9/&uӥ_oʽ+͘(|Jgxa;5 $T^zIOgwmJahu9Yy *1kdކY@,O_M}`!V!ArFTD3kn~ŔX fdCwG<9j(\%n*CtíO)DCpˢJCD/qQIo#8 rb؆<[ľy_ǓQDt!X #"ΦHp6 늴f Lb^z5)3Pu`>yި껖LQһԽ6UJH%ꆒ/J`Q7 QuC%1Y(FL׉q#.>lR-S҆F5Vl~ɡsQq>80H7 %۹IxB<#Se4J&rIǴ)XA*K@~@i.= `:Z];?%K2}3 P-gllI#yYS۴$D+yHB%]m"a5" qT {Jl(:MJ~< LE鮿AFbD CH}Pq#16"9(2!dvY]OhԻI֢"ӱ|ʁgO&JiE4-i],gPmWx&pW51,\d|5[9)ew)yˌoWvɒ$퇤\rD[ܬ'yPS0 Af.ܷ>@A4Q$ւ!C]Fn@cvㅸe3AqSaOiz P ?iɥdHH w̯1no9u!o]E:>IUl 'or _nE0,-v{ؚi$$yv"uh > r*dPEAq'^Z>WG w2PN$9CeZIDATN0819]|"dcGr03/݁B9P:;[.]z6)57GLOa{C)~`{+.{}DQ[i_JU׽L{%Sזp j","s;1"ֿ ƀg8p3"~~NLāNQWEmKpUPxdF,[3Ɂ ZW #ɺ hۋQtL+/^]WqwZ7;`y@L0i 4qQ6eA[Da{GjS=M:8rQU[G}::6MbadYC"j_:@U~trqA\{ 6i%1ڍӛDxYރ2%ꮊ4**(9Ex&%CP0 ,f=o9T${@-"Yi8s6LnK"b$ ^Қ#w-["VIQ XR-[8VZ*䱛pFDz"ߴ)e1?νӂ0legITm 88F W+1ᅪqVrݷ;zϹ,j6=2b5?q8ǽ*G{ $H yI {6V21rzؚnOE $݊rs:ψ&.oz``,qضUϦ"QeʛEbj: )Np8hex +qA=8ЎnS:Jjk!1,.9)BdfD:D};-Қ1NH_FnEK}^n.T,*I7DzcgiCj9WĶۀW ?J r~_]"geRM`@@=Ŷew3&/ (y͗'F $)Ŷq*gRa#-&) 8hFan"V GЇ(\$oc(pFShl6u]8hψ݇L+3~]ub]GɝguԪF "YTVŘ(Ex{#~}q9y%3]4oʙTJuPbbZI]::.R"khY 2m6 -6wغ, bU;݊PZ%CA^gTbP?Q@ހ};/ 4V"u|koG;ϴHlk7b2`>Cj,!yey>SqBBJ9OJ9 %&1kT̺^`Έ%. 3f#fX]QôXDzZ>^r9s?oߺT"#];@\y'1+ y,@N^dR7G% b`13QF .h| UU^EbcmBE}TQRMܮFpaKa!bmYsEbkFsۋV;< D2ֹłcAffwuAx2/.+0+QX[3A!Y/\g/.spnʏ{|˹3ߠzn:Ȳ::*{$ϗ{˼nq:X|kPq$9Pcl\4}>/vi'>ɖf"'#m$$$aB#멣֍p<#kxLbTS^?qy9ƶ?% #_Z@65rXlKkoCGbRt! Ե*J܋7фK .,,[mes6Az[rWLbm!G EqYkE6S=y{!*Q]r|O#W0b[,Õ{XK髠Fcd-XѺe^xe!ں{ɮK}^_ 1Շv~߈:!.MY7i͘,&~5@j\N$W9Ŀ⣲ ym.œeU,޹?R=.",eOxmϟ4+Nl~*;Kօ('X~ohpC :J{A M@L5UK@xS~6N"W"9GZ w3n r\QUB P[z\Ԛ}ȫC]B5~xw5)>2 Ͷ 돏Z]':R<}CX8kx2sc~L}u,3ٲ$2ή_(ւhCA%S:UC# ~Ny A[Jר"5=RTm.Qs?(hu s9Px.{]um5cEbF=o: H5@*h[o]clz&k3xBk; FVRoLoeƏ|׾;?F#_)sБ~Ъ%M-~+IpHI%W9ĮRs?/&uaJ}we)I;GE)~dDvPH(t @a"*ʆ2F ئY[k4dH^! HTF$p(8EbRcu4`S=:g~3v> ;8GdMb]:,*ucDl$ʼ~=RR(>*2f,FeExu⁁ Gͯ]ם-_ x6UՇo6KӀPҔp}D$4QoDɁgt8Kir`tF& KGF8T@W m[f}2Od}Ƌ;JPb۸bۺ ` 9h{⪷gpr g?n@#9/;hB7ŞlI IL:N-f}iDn4gZ_8y'ӿkz0 Hoѿ7LC?%?bk!71۸yYEw{r843]6ht~'&0BbH"o[9)D?Q>&׮cNp{E7IG7js<Ȣj6EUS^)ưpT{cyKOezQHىt}<գ31~5ۇu+[~pf}C\/wMfY@o$L'/{ɥG(zH,[pT*!G&JQyj#$݇P5VbELkNR蜥֟}!x}k /A1J¶n>tY-G'6_'6F܂(bm56u;1\vo1 CG^لgյdw@l%Y 1 Z,\ XSgm~jhjww1 j lITKX 8޹Xlaܶ3'pph܏q/#386e#%Pc/ѿ}~y~+b 7Wqü۳~qk*f[ki&%>ҼxD _Tj?IOz`Z 9D>Nj{l ,[wBU[$nDIrϰ~_$EEÄ@ mz`/c9Mq뤫&JVxclOψwDJ\Q!{>z*:n,0i.WFOKr#!5Pq oCQ-!SsY+cd*5"T{{wKO aJ\m|zק~gIy.s,ʭ#lfo~;]ܭ/ _9yW 5c rIhO CtNT TgH-hSUmȴ L B;32!e(/[6!IќLЦ$Ώp[ݠ$pht͙VkR-i52Yjq+[%6Հ+g(x{$DjPA(O2~E5}&F{{@'>' Epa) &=gUԾ9ίZ\F(Rr֒3'E@anQ8ĽSq(@AA*^ׁbg IlK EeU#<,A/Ӑ,R`晇q; S6qa\}7l}_?pK{q}KWU6QǮNysSݳ'먩$ƲAA<>㿌u;Ozl'mb̹rٶgY(6ց܍"VEX 6Єq0^_ A BN?n$+1*4^.m_h!+YKEd<gMv$.x4j6mϕW3`/&ui_7/>AF@|XUˮ:1=vQm7\OXΊ˨Jj@P$"$~xѷDWG\\4h⳴9:ME4; ij3jm"ĽE"CQH=qWּٮMLOSΰq|:*)Y_ϯw/ @U]7v۾lI͛'Sk? ;l86_+%qv +"|T咮Qn+BEv"_#yAd$[bV)_ELUՑu'o>;8@\ELo?~ş+QnS݆^K2헜<X6sX63*z_/'vtEn9kmbAV}uߥW#]{RKvܵQGG$21D_Hd-PЦ@Hd\ $DFiFVuyDz@ ҹJdY,JY$oUAD܏"^{.Iq _]7tS7S'ݺ.]qwTR{?%&z`uKx8r_$ѩ"SL/k#Utgs{7r˿d͸o#sVWe3DŲ+9yRY4Gz!0F0HHyJtB08ԽC5g!|\D)Imp8O<;/u_#d<˴CM8A=XkБ\9y~Ⱥ/H9 HIKI抗/o#PYx54hfAl`49QBwcr`2JR6oKT # ܇O"aZz]An_IuHON490 NcvAMD~2M\Le0\$\#-gP2- w{=G DP'7efT,snS;ͣ/ Rѹ]i )Yyӧ0gȳ].AI`3v!GcAn:=Xa(/f⁏Ukޥ3_u=!y_W#m#EkTGѳ+xSUX{]eO\BF~2$г+GuL-wͿumf[ߕ[U}]JRb"k#W@h Jq,h!|;H:ٍ@WlZTD=7A/`o )'XlC^@;#)E%{ގY A^R.Fj:Pun鯏lV"ϙ&z#k#k ڕKHBAphő\Ͽ~g\eG1gYsJwZOg vYcL[:Fsʾ-*by`<3;]y`C{b.60Bd!;Yp Si K6!,<*;FC)%q^V5,*jd^D b[qjDFZ:jjDDXF <+26~կV_W&TQ.Mǣ"g!4RmZ|lDYww(``>Lqs~@OJ:Ҿ|6M=sɁ"z(E"c=] OetB$(Cwh œ qo8V}]l`TObۈ鼨^*!u{U2gPy$u{> mw,R^Μwq"ज़>A\]64o(1^_F$9M|u=#MW*H~-zb̍wN=UɭymG" l `%cx񕝼{w8SLeƾ7 86I]\3z2eh T!޽|͠T"W]|'CYBg[u)DDY)'騍ע?+t!8V<^=/z߽zg[?g_Țܯ4e%}ћ}ܯiӗG01X\ԫ^V+ {FTE%Á~>G7r`UR kUv$s[g\G7mZNF7e+؄䛀6k7A5&J " F=kݗG07(_T@T5pbuh`!UfT=H=Y oF:ZkDDFVL5wBAɅITT*}LFʽA s ==V;(O}' a'l"`\DE#H\GQeffG"pWp4 ED$lB;yR qz?S]]{Ͻ7ǮK ~nP)/p$1i3 .`kϒt3d5v&e2KƦo GE `ާ;cE=M[i\ 3kvt5zР mQ';Oea!x$g睄]P;;E)GI&iN& J8H2? D03A?1L9Vh C1SA8V(-0Jz`0WЭ<ѷ0kZ>t-FM'2͎T}1a@t8 z%oT{ t t8UH'#1@7@yZYQG2ft@2 [7gO`)N?bأ拷xNΓ]f["K"mV=wtI"k}6P+ٕN9ostme1m#8 O("l![ p(n f;qX/ Y]NC>071*\\%;*TNe8P]U_Bg-L{WVG&YKK/ \ѥ^>:T䎰S\s\`kZ˪rM.wreS7?ostKv̚#ʟ?8܀;Lpy#Go=bup9̫wѹ7V' -PGyk=TS{Z.n_A1ĕ`G}_s&Tq@Nh٬s^ W(d~`0̊)# &G~tcj;`0P(l7?eգ`~zkg捱hUa늭\aFCv2c y>xjxڀrN `` uBЩ]2 2?P&iXfSwp3]Df:[փ/<nA0KdYˁ2,i $Y91_7*n!?Q8]ϭbo:xSZv8L6%à% Xx=AGZNl8t2C \1 Nfpڂ*O]h'LYweۗ4@GC_E-5]{Ն`1Htcke-LUx%j~3 = R8}B&G1=g'>ui)voյr=-r`[ŧ3m64X`H663j=UYL v":LXN^;:9nqa'ՠ7Ćr$iOyvJa#8 @5?K?M?-RsDU Z6k_`G@{8q7>%1usk~| 'V=.hyQUM#0 6ϵU0tͨG(1>"pf!9սlkNj 􈪐7X黓'k'd-4Y X"+ N :PI$PWC12iS1'Q B@ 'B &3J|D`kG3e(HNb0tT?cʞPSTNY8劀SᨴayZeGqD Xӳ+m\婋F"~'ào` \0(;eٸ 4^d= _2YY&̏B7ڮWWJoRWMe7AYE w-5D=ql/[! ʙLjUn<~xtG (RU_>Gci 7^$QXNB5]⒝^:uMK Ȼ44t@P>3 +y I٩PAh|eBy=(Cs`Su,3+?ؘ{;[~rƢy~owfegFNoXeI>X%pn4=8 2eε`z E4,p͚3 zS}(n!V[_\c{< s_ۈ| VOL }U$@1x41 ~`TAqg@6e2BY ܃zh}4 A`S<sth'=8W7Pӵ5ѧ`BAqm9Bnm5}u!mjr n<piypv/,Oݏ ūkDGGMyX~C[䝼>hk䂨wA\s:=9Ǩ3o}uGw h DH_jw/rpY k_?sapf{5߹D ڤ7^oE̸}w>oPKd)& D@[O[ͮZ_F O@wJ!0[(gbv^h܎O {1p h >FOV;nB p.U5/{E$kW`T<|9[BVanN 2Ya@z,EgYhĮF j]޻WռQ 22A Iɨ8nk ).{4w7J,1%˂ ]Qrӕiٚe(&B BOH j]16) 2q^v?Ϩ34JR`tȬDuf& B1=Hf`jX?=~@6óRs Fĭ7ڬPN(!4iop+ K_ۗDQ:CBmPN!-p]PY`fjs5$0J\B]<%]S5٣?~4*FzCHY@`]y:l̸U7nF:;]`ZruiOUEomoemCHϗpJy]ިv Mkғrrw$Bk- [ C5a('wT- [T0KTP ODoRߩoM#}zEoKy?}=2)Њj-aPNpu Gz;wrGуȉ`׋_$Az5cj(kƶSAO|ʕkW2 W[_#7.QZ8BFc.3mHѮ:j@Vn] W[ߺP,|VHD 繿GJZ0k2_ߋj7ޫAdA ZZ؇pHr,[j(a0%$ Xz=*R 9@Rm0mCd 2*㈹A%At%R,>𗑣ɢHbLzY ' 2O> : e^_BT{|cmu~Ϣ ]iGk#|]+C:--pf9~RWSw~ޱ_ԕwEIqU}h!=뎖%Paz-UdFyʽE=pb?_7~!V8 >ZH U0\K6)䄶ov +S맽}6Şw3 /[S\Z&SLth%&A^vjib䖝mB,.e &tޠ}+m2jRw6DJgSX~]=7T%Hd7Y$[O$%H4[a@ _()H@u5s}*䏈&UKdq{=PKZ^;n 5ٸ )ǯG?O~ bڀxz:V_M[T2GfxwNvEdA3nk}U%mC}aWDz囲wKP?7,aJJ%q2Y<c%[; tȽӣ ZGPg֯7ݬL]1mUrٖu6 )nw-aPwߍ:5|qOC .=O0i\#Ugd  O"rgd<W2 ?2rN.n2 _sٮ;ңp uUĉNI@+͸ DU$ ,k(=;XNk2/ƮrT|!1Vu7b]tFc־IaY^W ,ݦO~sw4йw=+JQ8Y85b4GWMS(RLn,MQSjaהJ,4EGMIwSVۍE̐+YWmb`!~M붐R 5 v޻\[z,s}WbW76׎R6*x pdd[B'ckd{^,JV~[/sYK1ofQrܪ8|T7g&J}pV.*v"u՗;.;jAvp\0ݚh@m"2,KXfz@quL寅XyFv4]㚔FvSuҶ֮U5/6ӑ]&6G_p1(rjˈg)UafF{м10' t.O]T,dN0 i:Z_;/;p)tlطyij,2C5E]zRsM}AX% .+ Hi/>/ުHčaƣ˱^w ֺ]Wf'Ͷ Fϫ LysZ Uz3 p #l \XYOGENAc}'omUwfQlσ_D^fQl\^pz R.~i}/Yx2_L l`|~qk"qk߲rkVgQ*eI] J;8yČ 2:߆M<^r¤(pP_Nְڪ̢듽oZQ e`zl}{'<[x$WTg=VG-BXh5&O蟳lϸW:v2M?|m↠_y.snP A\iٍⷡg NnSHĮ6ӧ/_ L4z^y 5QהfHZTF/X7 ahy{*~I8n1xW뤾?QSddcn:ySۛ1k#rCm+^]P&D5iu:7rMՑA湸[Sؤ@5ZQ1  \eVC\ {}!>M~%Mel]I7%ݿo%˭K'H_<玱9oғg6~]QuEs!0ܛE'{ x3Oxꙏ all@Ȩ#^ 4qѧ"T7 t#ž?߸@73C'M4jG #0zUvߓ==2ivJŝj;zw:]$7dI\ (Y{>=[?'dڹwiڝs7@1Wg%ǁ=|7,KG; 2 ^[ 3ȟ}{$c2cYs lnWgmeZ#'<cˑ8 CR /I``ouE-;襝U)>Q)Gxt䧶N;{}O^t3w=Kgb.}=ԿLz7|}^+@u{|, }"muG{*eߐ8`gXk$z㝼 ֿH؇E ~i`[ɋKmg$fxy3G1l]7_ ϸDF6A\7$e $on0j4!udŵƲ=n[o Fq/( 2F#ɿO$oƼV} 0˿.>:D#:2)AH* 2qPFQn;:"rfR&F B5lT޷lk\qmA? yf` 6[M"l|=FJG&t˳K|At|Y @ EGsNGz Xn-ɷ7kI gq?COo?WѷDl(FF{n]vXc?bs9\vݣDA.)G.|IeD1qH-VQeaDȂb ?[{ b @)``mGlqSX4r{daZ_)E.SkyϢf~b;nkSY6ieO3x&ftw-R.9ֻ8}n\aW_H[^^="ݚ;IXSr%Yh#?nD߰yKd2;r玎x#dLDh,YKVocWkyF+V\BDDh6֮LH1x1=8M ݘV>S *CHk8nG%HE8z.sUM.;dx3YfyQ̸]V1p#h:;_I%DfHvJY2ú4]=[*y.3IM,|԰sU=d%B3va kE;Bfl! X.}/Sվݳg\!G̏ pv1|^DpO׮tVtLGuC}Y.@+Gn2BK {ylٹ8Z1o~7է,誤 DU!30v30^ET+!r=clZ 4͵w9n^#%gcJ_}8Vx1Y~S1'eMhłn[B労̸֋)>kTd& /(k/&\7{l#9+e/UKogC۽oTFOl4|?-à Gƚ_+Zo|,~U0=l֞k ԡĐ=#N\i}*j8d |[{Z- ;rn,E~ Ƴ4zy3+k_~ժWcVN41`DZNu'M*}NW>31 2ϓ%}6wݗXxR=XEO#翰6r>oGTtqy\ȗw0 vC͓;yw79A8:yY GhgRH¾5Ӡ#qa 컯Uj5ZϢ6`GoR򿠤➜筍Mʤ_Ȍe_Eހ=j+rGF}+ p/TL\J/oWܻ7K,|̫RVo,L%l t+QVML}OϺ6]֐fƤkfE&Z:F/<άY>ݽʙz<=}~ӁjОʢweHWg}~ vUOI?23#(K}M\Gn {ՑVH-#;3 W+ 3g¿3 T%l}NŽ9v]`7m=2q>?*|:NgNunIjp,> @ `߰GDw7hݍQ8u]Q~B^}`H908/ X:ok HӏfOAq݊ *7.{d"V8蟒 `Ư-\`ָpfIkU& @^$G~qf"',J ·̘yH ޠ (thV9gOu70_[=2)O3L 5W^j0Bfؠ:0Nt G&uVefS2x ڣޞKMW>fI\uGJ~jyR2O]4\5eu᳸V v_W4+VZS28wz ־[o|׫8%-!./K;V3Vr"?q:"0Y9lh{rb@XŒOc6,ћU< ZfwԖ6|NO8`̢g&o@EO~:Z.@ű+]9zzxF:eL׏`pL;<0ׂևNziח{AYmq% t4vwz$rG tZkk.-Dhhx:}Lcy)KK 60Y5Ջ" 4/ݺYu`:ҽ|Tb#?v r./"M hm48do%{ [%LCXtIW2&/%ڹ&kV?g4ù3tpٱ@G[VC w=QzoJɳؙUSl+2&U^ jWo=e6xkEi2Kް,굻dPX]=\Vw7\ۃIu5fbQEO&&mg4ùj^I_$#?~J SEɏC[d  '*7Ǭ|k& WLde,zib ]DĞj}ٽn?]~zQwHz~zDxPf<.5`Ft00+(&4Jn5pS={lW]h)巟#f\2ZRTkڽ;V޾ |\&LX-~5&@nظ]+"_A~>H6)H)6ِUjZ17TGﮪFZس"Lt6.3",n  H"a+ jpX`BD9<\[fZL˙r9-Y4 qߖ)>h=`UƯ- `vĩ7h6sQ'#j31q"ۈKx5eLrвYnbwO(7.:] `º':Xsz[/_kke\_ +0K`kW [>XD[{V u.~cUm 3vI trqca RfH]f)ƗٻcMJf2C\JICc|'8>AVNrHy7ڞ*Vɀr[{K-1}`Щ5wgZS@S&TPbSR,w, *I]q_% 9ar~r@h%~+\澬vp: !FX8DsqAkOa{fd|~B콢eD>3v~I!¹ È<Q%%%NSr7S^&n$^L'=^آ-sj &[R'E[uH~ﳱ6:A ROƻP +Dht %d Dwm-!NO !AIuudPX/LcHVY: N3tB.i;x]]=[3;){7NawWe(WV2=lZi2y Xfl C^2߾Pf=uy^7ЕV[;o>bCdok߁ͽ\91J]s۟lW^Őlxu_e&ʕ9p =^pؖwl?j3Z>XlZkXN坈O$3:l {];iDإ˟"WC劈]]VX|5!«̸@l. ӑ^W Q`q[?dzJ"8)Z]`>>t\+-o h//@:R%D'Ŵoꫜ/͵qX}O>=Y_0f˰;';bz^k1:JfLifbA[%o[2kDN|G&e&Sx7wGM?qC^^9k|8w{К i#dL:d%rXzK6ޅ~.3CDDV$f^sa{kBo.a(QU>L\ \VP)I /XՎ6t*t(J/;VGpl 04؆jrU9h׌LozS2x. Yq~@p z@9*L]ԏzo~y!7ӫCA=2pu[^M?Yx Y7:j )T}9/y꿹PONQ;S\i3~ƥ%r(.H8u a3) G?ӿX`z~M7h_;]jJ q @%ѺpocD R)k|ay+}>|ҥɬq01Нn){|M ~="1 DGl:w X@,J q&1}(.{D`V='%#@XUeX35 ?@*=2C y.i cF,SNV"/@E4Qr ת&nfS9὏$cX vksB}}~]~WXSG"hki4V̺tFT|')֨>ilV4M۩I6ӏ֞@tj ?%:=u%(L< o>&8T nRF8"|Ō8D+5wB650mL^L]#gH|:%sU}ԉa؇؇x.U]ZvkQ-6ؤjZՕc"ɌEu}ַMfp+tmZi?~JW _]^7m?/~/{U`x L7;YYo`G&3ªfoo$t(a@F.,e=f70UcwAJTgj.%Yca` a1]1t>%s|uYm{ZE5&}Oqpc}_o{y(#TrkLnʏԻw/]}iiaeƥS ]{}&_\Yinefo ¤ l̢T1c|ҽHd|(:cZ3$^Y^Wm!}.f[нpU=~f`VHI8|3) ;DwҲYqӎy=U^5D5eH|e}R{drok_e̪NWc_4, ;е/\e}uޖeEӳG&.+wXUf}a/o_=yZh{Lu԰s?7vbö 2 Rk.wj?Kα?XX}4WfK/i_="φ.HF0ȍGe`we](lW]זiĝPO1[}HW]H}ަ'Ow}8 d4J_j8`2ƸO\@q~-6L*5 &gUA y|I5嶿ZBg ȭNYUMYFw_bL.k>Cq ]ywat!|Z35' >ϼOgPO\܇x @rɱ =L ڃD;ؼ?D ĻM<9a]S` (37]Kq^ɱ{bmp-}U]k1?FŒоWm1n@k|~겅Ek1ˋ b#o"UXj̟jqH\n XfO/(1+ۇҴ.TKosPOK~oz8:I7S-M [/V+:DGWnAr LJ+@>Dx61mW҇CEoӪx1~fLwr,Œy&:0c^]4đawgm\}Cs[ۺ*ž{Ec\o&XM!]no`ԥ>oRrpoV2OVʃfM ħpVה*G[5E)qjGv6'hmԬNnއ_oK۷B;}8hG1r_z4MɛY7Ϙމwfa[ZrAK}8`Ӈ5 0HF3%d!Tw%Uˮ@ha6^! w G0@t_S(=5_@H`xu?ts{+ wSspMtvgHQQmqu?t|,=pw*KΌh8*6˺ͮ(p3lžwမn0+WMvT$l(휌>cދcf5:|[~1cF@VXꆾ$"B Sn}+(CY=ӅwkMa9y8(oZ|%NB yC3=tJޭmRBۆ'#,}*=75oؐ^H w[vMtv!&#,}baVGN7du{Q6`y;YNIHԧ]6X"L,`:a@JD8N)DI VpROGGZ];Ӽۿ [Ixv3+-Qq,gVt@huht7u}q`~.x|wFx͆0ܕnMoƼsA6h{6RqK/U8ˬTn^P^vE_^.5d4}clK)'ô4vV.WkMD& &%Ad]>U`NRj\Zނ{ hq9[WIN.A{x4ũ2s1MMS@=]6.g }kttMSyQM(ʭͩuP= }7/sw`279蛙sKv3NWK~\]++S9 FR]Ut}@9?|'t}v 0yѻf9B׿M{wWLK|LvwZ9jcYpe,^xTjtf۩fz/oTݠ ]~5}wcnw _y|琪T9ӷ9 C]t@:3!P^oA1p0soCt-85.4]N?e5b"Je9g@[:@ KկE@u'X uN|s'`>+8{upi^@ 4QWs?ӛ8C]6̜y,/nj}^ `if> U<4E@1nfvӹ3*̢7~vH7FAa0ڇ ߜPf}7iPt:X{(][@'B͏ =Iν̙^nvb (DgB1"]7[>;`x P]ě'moR ݝ3}s?o Y9Tid&!P -t}g;g͐!oZs܅t$nA]5PGP61Eu3d 3 s{LRP0\<}KYsLE>k]OGڳ8ѿ2ȁmBgRu@^y>7xQY8ϵŌ@ GFn=k}0@ 8UF @ h] @ h] @ h] @ h] @ h] @ h] @ h] @ h] @ h] @ h] @ h] @ h] @ h] @ h] Z0D48b"!;=@В@ l{')QirG M{'@ AKC@pf2Cy*T3㔈c s4fw x6!8(9iy_39d- @ hvY<xӄ| @,U=r`f)q:C^/e`톴@1b?p84t=:ݸSubԯ5w=x#@qg5tf&]0[=SH s=:\|*kӾ\z"g3M}|^(D4[M]f@1W3ץ83Y0_oHӍ;Xo~]eUA =rx#~var4n Ywa~? 7]2 *2CQ/fh $s[TT8"P,@@"N'Uiv5A 0WV[6\>Zm첯q5<h k%sv5ǢUˋ#ly>2 @=h@@_Z:khe0%^F8@@"P,@@ cVm P#PX w8#$"*3[^"\Gy>2פ p.pnzp/:@z\@4 X bE(X b^3Lh,BL-G9( K!_4 Gy5' p6@|+j|[2|iy^,6 u )7ixBյjzx!t+ZJ@kk^k˿_K(N~~+@i}Pm?%elՔ 2L|p8 ~u@ig x1WG_Pnw*>>'BuU 6 ږ' Wڭq Vj~0tg­~ @1ş0FXp?k^ 6~A8|/PAgh`>o$?ٳ S~@ @Bh_Sgq*DvWf $n[ ߦ[)@ ozZ@t"D7ڝxz;LLB(Mm~CŪ<ӑx=@hVo0R7՗vy:@TG6h3o'CƼϰ0^|= o*Y)T@YKm?oc9 ܚ~(O rj0@Ag?|"$H>`rS  ;_U0@s߮.~F>.&Mw~(W xXgGnf?'L|W"執'!tML@ݞ9(Cg?Eai$h$7S6Cws#tY\pvxls8a fYLwx.pR=~j7/:ۃkSЭ*Pz5[i埓gܜګ|k!@f^ͫ+7/' umW}{Om3W'CѮ ]ɓVOv*pj]:W%-tG_=PʱISDM6KSO/y-nͻ Eyi!p?W ?oN0O*BťApW2mXۍA^$il1埊/,Z^~S&XhB6`]@zy p&Rh-1@@}KSp~5&Ԭ_+_SSּ mk(OSr I > 5Wx/k[\Ѐ$-NpUopM+S u_%S KYܕh@Ww[m/!ت1tUyY)ǷǬb$Vs^΃e#W_tWd $΢U`w?ӞNM Sv4pe8<$`*F =ο-{JYjc5#`0' VGpy="9]W9\"xR~ͣ!b[}}_4$^(.!8.] ȱ*yd1ͣR;2%9˳}i,½`צȿztCسm\g?;v\^l#>.~-rߩ,m1\oo?US ,,^Xr⥟p߃?牚~*sO=a"T@(6ُOZ7Wq=DI?jnv@ ɤ? b}=@z{ R?lOq-@K?!x݈WwM/lL?EW_/S v馟"a%^@s{?S͗v!?} H?37$5]2OE´a Е_ao! Aǣ{5՗iB^h Z)(|wu;@,| .el>p:@ K_}:m)dGrpQ@)j|_} x_)(Emg }C@!o/ `* -#Lѥ )`n?R)߶=2By3ߜ?E۶{YR@ oz6s[V@ kv_Up@ c_{V;?E߃ُEYd>I|ti ,6sY)i,dUoZj@ C^,ڟb?Es?>vw@nfqu=@V6:s/en6>W]Ӛ@VWȼ1GW:|#|\iO&+a uKwi7[Pm@k-pGNǕ8﷿nǕ:U[V@@uKg۴-(h}$ QWh`f?{>^.S p_5\A_",K>' s=% @@+^jYwf?݀dٴ-g?Tً_f ʪ{yT {Xߧ7xUwy iTUN &^(4 K+wP)'wW _aBQ w?oXQ{o:@ԋo[:o2f!4I]][ $%H[/nG@/˖_?3HXm}?׭P"toه@ Yt/>d1H['@W^,ȿC O?j]Q9A7UG"o,x/)HL/HKW~9 grP9A 9k?*E8hs?g@ G/ayTDIXf;ًRl7_nRF ϿoEDg:}b;kU?Xw@dzwGՕ?X- y_oo'= E7@DE? _d K7DҗoW mG=C'?)<#,@h5o*@ ?h@aY_ Ң_n@@ܷ͗i#p@0{?o]d HKE@aX~_-bnz^m@6?@owG]>oO xv~m)H@@?990 wu?,@#ُ€i_%"_߯UQyF~.~0*@ 7A@N5G]ob5A@N^{/ p=kmf?P@z t7SZi@NKEk @? @v/>צ #Gt_`}ew k+K>RSl1VkSvSvl_S-" !ט\P noPY[1)H&S?mj_=p1JHA00] .Y)H[?mQ߳7 @`cZI,SVv_/nS+$%CQlIRw62?)G nl7&ħ LoM_T 01sPqhLAaWY%_䙓/( j lbtv_VҳI_{]  χ|1]LZGaUTUGS?!_J?5^e5ApUr_ $/_Y4/ &u@i?t埡h7\by z5WY~ć={~*KpCJu0pP]^m%^JV%Qw忯-_4'4b6[꿦nIuLPu { ̨ү80T8VkE4_j*K$Qn:ۥZ-jdUCp"$Qe~L7_a6g@}2o[H&@g>C /HQZ'/}IU>R#Qz>~HͲ#Q./ُӯDItN>&_+%ME(%7%QͥS8rD&_D) o|t)4@pXukk(__%f;bw'Qc۞_ZMb @>ʿӯ@"{{o~U!V~&NTZB )WEEܷΘ2AgB"c/o] C ׄ~B~~ʰҶ;C"[5{Vh)΅Tm~/4DvOSI}@doLoou4 i nʿ>GwWo쵿u Gu]{l) $_? ̜P 2]{域6J9ٹu ص}o˨NrW~^*ˮK¶׶i {9oȅ?~> @O5<쵿?]Z*(rAB.s;}#qe^'j/+,PS#2CB&C\Q"gWw$r ʿu/[ ٶ/_@7 l__XQ6+A SQW֫{UM#Ja= R1 ?_A@Vת{R~E!!»_8m~o0Q~a e}sSQe0H_^zv7b˛GD/Z=uR53DDM~ YrˋnB.&A_M|o'.y8BF"ior:VͣA6)s>CUC<!D#?՟ b<!c K{٫ U~Ϸ3|q/: 4^Z-6|#IϿ>) o0r&0#Eqtp ^շZx5@M/P<bYN\2!7,AJm;M},NF  ^m( }O:@lsi.T,ߏ>^L іocÑ!_ ʹAIDATS}y:2dD ,ߺ-(X l6?2׷dzu]{7`7Qח@D_.   :h ظ!2F"Ƣ>K; "߃s??xB"' G "ScwߣP1lg?#1I:i܏Kl !ڢC$ 3n"T춿VٸD"?G" u Y=g?'C@ҷ.g?(@@/\ #R! 7@xgh/.܌ ^K@x쵿6 ~\* _4#D@xdZMu_C΢ҏjjx__s:"򯋿s:4GTu4 {5ս^08 0q oX^}U7 oz_ZBڟ/?xE1-;pp1A$9[$Z~w~G?NCꗿOc?@83\W}>pvW+_[R!u ܨsPAN|۳!YҸsw?m =Q!(izG!0Ou~ߝُЀc _] @?H@eկ ~u_C@<0v,FHv 埧g@ehUoG&S/~ϲ#{V~Gt-U~  n0~+_l26-.}`es*C @\t*/Qwƴo>n qqU?BwZ22_@XjhUH0,RDTP!O Μ{C@xHCǰ'@i??dľkjU.z @:iw=ckC@o"adZg5ьpImZ7}kTBbG3GbpG[5Db>\w_6^Fb^;CĂqE/}§qk˸_@ 4 FGAu~ ;/@7M_G~Xo W-߫_w2?D=xO,@= X2s_?b%Z" Vo._4C_NwfGA2T_-?F;~ "!X_;-o?Gh>et`K|;fGA:8zVm+?GfjU5A!`aW>^K!`Y^8^w Q/|_uTݿ/x%ȿVu_b_ `9򯪻i矿wJ,rcXWw5BߜL^"J)9X C~ߣ6SͿUy@iBetG]חg /Sr`6o5_bN"9Ȅ'` olMOx"hKWB~cKv<"l|??>hg)QFoW7WIvE;1?[_sn+ݿuz|;90ksԳ]lΰrY] ?ABC%{@f?[{sͥ?AKyPYJkow _),ٛ]]|ad$vpY,{\]?l(@JHdoBKF@0#zd>R9!FHu%q(C2̅:*/bŞ&3h*H?<3 ,l?)l2́~/&?' 4oMwxzx/3/BO-_5ʿ#Q $ý}o2mMwda /!(Wf?~~(֢U#wnBվߝ#"ejVϘ@h#"jg?f?#?m(ϢUsb;?$D(KHaTUOXnf?zqCEUE0˿e(}jZ~8|2ث`nW?pU"zWw۟/J?f>PO8q{ W9.#P ژsCaX#/>o~Cq@OG(?y~C8.}/g?\.3 @&[(}~yի Gm|6~ v_Ӑ(v @W~ivC͗~?WoӦ_?Kh`lk>>/JF&Kk_W=,c#SݹֿfJ?i_ܷuwSLu};=?@Chv͛ ?X"3ӹu}#WGk`rW>w]?`LͶ~C{3~+9?`rs{u~vX]~0w3a#C`*ٶG"`W]iÏNI~,)0y0>U;_3:{;`n~4_⿦>OwH,aFY#⿝ُFYpaqkNwG,߃ohNvGWLYg7rr @a!l?aGuAp\@qWt`hvoc'x kjkjҢmY`8wfGrðDb"_յ_#_Dh;zߵG7{wV>^#}ُ߽O)pkw܉>?5Ыuk*vA>mgxDn`DzӶo@j@^U/F:n?;йй6ܯkُ˸εw?t /ߓ!{W@B@Ͽឆt; >Н՟V}X@p3]ou5,@WKT/? ЍWwM#8@']Wi"_ @4sSzt@tGsL{!U'q*#YVbE#Wʿ<}5 #_SWHkKQu=2/G "_}فIF޲ZZDh`r1?Q;_5}:a?uF;/EYa{5{\]/.;^OX.BI"3.}y;f K?+X//e-fQ>޼$ܰo%2xAS?by wðD^ѷ?%䏃2^!/ߋL Šk hmj z8HR>*/V'Uw#EAAyp،*$ُg%t E W5x<_Җ1r}@ '-#`#hb xjnLF:T%N 'w!O-1H/}e]Rw/!HGAc_<2ǵO 4ev.c >cHGb%&}s,_f>^ԃ'ԓ'#\o|w{f*2q;=D:>-T*?88 ;#JC)480?沗, $"Kn;T6/ڝsds %|_pehg?tdetKck;q=x8: UV/ ~uUG?Yn08__n8&'WwO!_Z08o_?f>-cuaChj 1#yBUVvIo?_\H8v9 |}~W}?Ӷ֓oO_X8.9 @zK7vg*lUߺNg?.ş*Ya\pݹoouZYaBfGsXwU/f?'?8` գlou=aS˥v-p.2uo~W.%`X`cؔCF,ԒKG] x+hpVjQtq8s?[Z]Z+_<3-.+ o.}`h-?E 6[[1]RoOlx3E`mxߣf@!s#̃m,7<-?ȷ9 =K 8$Equh^חi~0{\q_}읔2^| {tyKG +Xhlv\P]eqb+|g?\iڙ2V+վ>񇼕~:$? o`2vlдN1  |5ۭ9H?yǰK em!6G0>h[1 `@h5 A`m+?8)?{w4//7'vz)i_? _TN ~?))_/.w˿RUG)˿V~:-?`UխvxQ_ gBPսu؆%#\$$v!u[Ο@`H 9)ؗ b~٢#%0yrp?? !wjΟ@i TSܐG`|?;bM1)+R>Kb{  E(X bE(X bE(X bE(X bE(X bE(X bU|2T/ UH @9C3Rꅜ)(grFP3T9#% UHr*gZ{*2T/ UH ZorFP3T9#u5j)CBP匔3T9#e^r*g 9C3R_x @(\Xi !rŸ;&1#M?RhVO7|GI,q dUTP,)fbHX3ONP d-9XtaNt%⟩'c#!/b0AX#d! bbz @ 5JQ%hfAP4# 1/F%P(`RDVt;y~nL1Uju6ݿz~nLs.zpzi勉8KEZ`4m1AU.A dAşVgs6<`dIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/tasks_size_overhead_py_handle.png000066400000000000000000000362251507764646700301050ustar00rootroot00000000000000PNG  IHDR]$K2PLTE@Ai @0`@ԥ**@333MMMfff22U݂d"".Wp͇PErz挽k ܠ ݠݐP@Uk/@@``@@`pͷ|@ sVBr` pHYs+ IDATx *Faܜ d[(#* ݩdx5^Ay@j  @qPj f-+ST 2 UH= umLQ_X䌔!grFJ2 UHoh} 䌔!grFhB3R)(grFP3T9#% UHjr*g)C BP匔3T9#eAE(V.0 u+@@ &VVyS#*t;Dn:3.vqSU-Q; 2|ex*dz G '+O@VyTpE(X bE(Vx+΅ VY  gVv]ߠK#V lQv٧ pOYI?mxz[g>2<[)3@>@I$h 8!q(P8"P,@@"+Z^cY Xzp HXm|t=k <?QVl0Q P4pa7)>@EGr?[u9 Gp(bE(X b2#DB蕅|pX`4.'}~N @dz)`fYH˻;]@}OL\B(T]T!UKz _S+o*@4 L(6%@I^BHf|fU" avu2৮~T0ByUi\0ߦl/@ kp Q~m ]}c;6@1WYS.~֟ 𘺞*p*PZ@koK(B%hy>y umq(4췿/IHnmM@c߶_@f0 Hx\~PP 8.-& xW1 l l1A"Gv=$G8qD\PW] q_h 2`&u<k;7Reُw<[J6X-nd Po8ght9lfC A@V/A  4?E8~ @Tc;@1M埑7`@h#̋o/knZ{U119uClğvpX"Gxn3 @x޿D*_AnTU\*Z @\_o}axC:ˣwZNUX/^u(j {~~:{ unSƥ}KUC1Σ ]oz9[ 2{r [Vn;x rv{|z/dDk VfH[/Vyֆ`2lVO:  B-i*pbe0RW<̻qk/~.@.v("'CI[;  cLS /c\?mŊ?S_އp}꫏Q)m֩>S_{"7Hj~;pF\y]yG,܆/c-"@z<x>8t S;bӖN`,Xm''ʿ~S ]w@rqH$DfZ{$u+d Dz>~;ݧ`Hb_sߐ.]8kNw5Wu T) 6VWD@'-?N}#R﴿% zhʿaQ$Q)zj['>lM& BH\Wz]npqw*DIS<ˡuO*@#֨\h?;Ҋg}6.Dl{?,py6uw| A+ @!ϱM0`~x\7 ?$M4 7b `ϼo-`ǢuiS?oGtM<@wUOS uc;\]-, 6W_OB%wNHŬiQp: '1o;k)αoC!$chcTLS_ʿ%|cTJlULgh6.S*Ht3.Նη1um [%qNQF4bAJmH쵿)Z;Q5lJa춿ʿ+TUCU֦@ oa M?u?~ /[`7N'xj @ V/ߙ7$"F]u4^/5o ?&17B'8  N}WÑ|d n\OyX3,eo6_Cĵ_|*nx!ߠ_^_R66⿠# @Œc>jn Ye ߩ_>㮕702?Hy_6$6 xJW3/xH_?/oS9 G/ 0?qϔ!@|_X [o[CG}޶oLmK!ww@<׼  C"9z{MmKy"8`G@ 4kC"׼A C;t;~h/[)x11AxPO610Pe+ m/7 py+k _@.WW /_@Zo/} pn~3cjY?=,ukӪK \+k`iXU^g?_ po51VO{l pj/~ik;}h goH p+7m pRmT]KvW݅&Ψ7_csf']#wu[]_ʿ8@f~wv<go,nRED.w+ƇGEE_) pvu(b##Xv|>. ]wMo =[oWG=v춿ʿ{Oe vU(C[v__$ذ-gVz6n#5C;;/3 blgS oֳ,ʿ^#RA6Uw| Xw̿/; `Zk}.~!B?owg\G/g 0ZmK/譵^˿E  YmKʿ|?wzUUU"!tg3OwgpSTU@HR﴿co/] JW}#RF\ʿE]z8KQV/cwct P8y @ٻ(;h!л~61?Oz4GwkhDAwjި!Bckm_{cV]  "P8VeǿD/SvCO{"" Q&[Ob(Vu@9@g:QʿqC""Q??/CL c os1m/(}ofGD$FLw7O*[~U?O.ŘӴUvepxR_l` |kj['"m յ##QWu'qSOXP'3Y"NkTyD"wo @ʿ[]g Yʿy[l9!zZg~_^@VLjCij@dkkc?ʿW_/C t%Y:jOn qxiCB@g*?l FDno7ydGDV߫ס!A rwRa1N',_#UՉǞ{$.ʿS" A"vWXE"-q[|kO9V[|p2ȷlۏ?4 mOl+ϩVo=v:03 bUc) **@~K?DB̧jk}3_@5{G!LjUz}_R0 ` NOQ*2_sy#mCB8Dܶ ܰlۏ''_8FtOtXnُS_ qv_M0acÀ7E,x 20H2R*=[ `rolpjv7kO'A*&Az-|(pރ_lt׫{㣕q : ua W/7H^;;8)uH\]z7i)Im<9[E"i~\rhooGQ'jw~|UcDoʿ&O!G\ Vu_ d_]WGH?xS$$6~_[%1vh XowG H3@$/q[m]u풀: psRgs[BXC~>r[B Nb~8ADž\DhM;~|C8 _'D"~DwgWF"W9d,F8F{fM,=.d_?DAaqY| 6~B@8zNa  2o(PϨ~hե` 8h~TUGox?xG"7g?tDlf?z}# e!Y>t8 :=!-^ b:?!n ' W~eyz4G"SdV- ת'XVʿ!ێt*?a/.nHA" Q剝M>sj=IDATtӔ 8=!![Iu{QIof?_BB2s ;= 뾨w?e)ЬRErg#QTMksh#A|wm @urC@'AIG"^ ~L5U"}xa]E9LBT 9y26E1n:ְ. EU5*wY_8OQU^EIt9.8~#%ރY7hn? }_ur#!~_r_w~&rS7˿ks,Rv˿>@HAm.޿[! 6 z{a},.&}$"K3ͧMW7_u# X_vmֵP,n_No\`I~g>,ϗ$# RN|?k>Eпُ+O#a~ ߠ{{?Gg?b>=9 ˰h>9y horA`Zw+?~g<-Y!3g_7rCMǾ7سm7E!?`Ǿ->k##0cُOlClz1aݯ}rEޛ8C\_M F#dJ;, /`w74?0Ec83WvFE&gժKgy`j_=@iٜm?,?0)!SڜUH\EC.#a6oYmE#S1o_a]Uai߶;OJrV* G&Xw"Bw`O!'` icD CC>.D00Li,E>>M,zd80dFiSg= Ag]ioa7 okLF>BL'|N[~DQ1u{ ?*9.v;$+.gʿ @Nl=|L)3kmtrʾ^(gjEHs_u7yogl[`s*;./>k.m8:3ʿt VUt 2FeO8?RٗPŚ#/T~vqsy0o|+E ۔؆Wߝ/ƨF+Kd6߭'n|s$S`}"znh?OSoyaBr?Vb?l`_ܖG~ubpM~UUMWE)(;@_]`]_HOH7~ zVwZ#@/]Wc??Ѓ6jH?Uʿ!(oWzPHnZu)8ͽ0H?+crs߻+@+_^3~:!.P4yߪ?&/GxG^gƩf#/6vwB^@kFk"/iFk=k~@hmkߨx^mU@Wm~F_ ُ xѭ$Swkz@Tv;ܨR"#O0՛Z@t-nuw>?+ &~g\_vNG!]L\@V;ܦg!yC_f>WSoI? ྺ+|pOmF.? n\'7J ^w6? !_vʿS/pC[-oc\!׹?H;A)"W|&p67j1,pkK #g~@@[p+}@@(m@@7j1o| cُ#oӳ8~@,J>Oko~Py*kChw%Q'Ts-]f?fw\T>'Ao5&Ui~7`Dfw i~)ζ ZuǾU&W_?@R}k n#?l_oHsdN2`{qP@J =#?`ݵc?#lC{GizO>h S9)+ٙRxnGy)*Fǿ#(IIh#L)'oKwF;/䨘'~6p :4)%_+K?De?"^]@Jwz3? k`~^'@޲o/? {kPk;H<SYoPp.!\i_j-?`hW>ʑmSP\5_t% RɈ?j @Zwe>ؾ>=qnǍ7xevfv;Цgϛ/ۗxQ>uxa勉<KEZ`p\{< jk:;>@YF8͝?uxv(?V]{IIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png000066400000000000000000002740001507764646700314270ustar00rootroot00000000000000PNG  IHDRL &iCCPiccHgPY<@BPC*%Z(ҫ@PEl+4EE\"kE t,ʺqQAYp?{ossp e{bRɎ(tջ{i常r)teJOYLgWX\2XyKο,]~ )sT8بlOrTzV $G&D~SGfDnr&AltL:5204_gK!FgE_zs zt@WOm|:3z @(U t08|A $`(E`8@-hM<.L@ށA2@F 7 Bh( ʀrPT UAuP t݄84 }aXև0v}p4 ^O6< "@]p$BV)GVC!Bd h(&JerFTVT1 uՁECDh2Z@Ёht]nDѓw aa0Θ Lf3sӆL`X VkaӱJI%vG)p`\.Wk] p xq:o—;IA"X| q B+aH$͉^XvbqD%iRi/82! L ے&US{1O,BlXXؐ+ NP6Pr(3;Yq8WJ)Hq"HJ IKIJGJJIKa8y"Ֆ͒="{MvV.g)Ǘ+;-Hז,L_~NAQI!ER¬"CV1NLMZ)VL $L`V0{"eyeg :JJU*[5JLGU殖֢HVQ?ާ>حѩ1͒fX9֘&YF3U^FuX6m]}G1Չ93 |UҪU$]nnCM/OS~~>&   .y݆i񍪍&v\mu:ƑGLMv|253Θ՘lOv19|y-Եl^Zä́UUКij}ZhlfSoV6¶vʼn伲3صs-[{'BCSGhGfhgWΣ<lqu%V>svu.֪MZ26,b&*4r**4j:*@LMLyl,7*us\m|GD\bh$jR|Robrv`NJA0"`H*hL֧uӗ? ͌]֙ՙdKd'eo޴gTcOQ{rswol m ڳMu[NO [A^i۝;OrR V (m? Yrˆ[EEE[?Xި%%Ga%oDDiNe̲²7Yn\^{p(㐰­Rr_bULp]u[|͞iU-x4:zccǞ77Q'z̚KZ!'lsWnk]8q/v=s}ٚvZ{aԱC) _}ABEKr.]N<{%DƞWzuW8}nX8[[Mowf[@;]wv8d3tyoy02*|`a׏2-<>+|"ߵ~o /ۏ?yx??'󟓟O)M5MMqb݋ɗ)/f 櫳/ M^̛oy=}na>|ZZ.V|R?B,sMT cHRMz&u0`:pQ<bKGD pHYsqFIDATxwX\eǿg* !uRLb[ɠQe5Kl0\]]l.X`l+{7h$C̼?a2Jr>Cyp{9`aafpaa 0 0 3`0 0 0C 0 0 3`0 0 0C 0 0 3`0 0 0C 0 0 3`0 0 0C 0 0 3`0 0 0C 0 0 3`0 0 0C(@23J!kbaa_ ;6F&v0 0 0J/'0(U^ tDdP@waaaABv & Z2$"(aafVOȑu\Dd?!B"naaưl26!DkFDTX3 0 0q?9nۃ{/k1E&h8! 3R 9M m-A=AB㏅ ;)pIJ' !V{OBuQS\$ZN<2p\0_Vdf!DO&/aaa1\!=3 0 0 3(VRn,oW(k 0 0 a#e1P-z )^ADɲ7 @i5 0 0 3 ȹׄ:Y+%D{!N0eaa5éL%*a}0 0 0=aw?X]{O#QN(V GXo{8Ia#@/ts!_l9o> =Iqgaaa !QEW1 0 0C #:kzaafp!u0 0 0 's(GcnscN"2 0 0Lp}CD9D$&x"@"ymtW!wUƤn1h7{Zm3 0 0 K F3 R,wAIX[ 5"He+ N @67ba*3|V,*B BɐD8B(BTQ6$Q/0 !J6Hr}l۵eV#-fr=taa /bЀLߞ0)ɐ{\o=S`0 0 ӧp?MB؈<I(n 닻$aa/c  W^waatSOSH=5a'Tc7>0 0 ,OoR ,#4^e_қ 2 0 021B3͇d)A_ U ) 0 0 !N}"kB_ _QGaa =!Ĺ>Gܙn!saa= 0 0 3`0 0 0C 0 0 3`0 0 0C̰2 TٝR0 0 3аpJ#(]nT Oi7wӄ.jNg)" @Ewĸ<.Ma e(հ,Yn$"I$V@n.޻;/ْ2gxۓz?G "-@!.P 0 0@U_K[s wTI{d{C @.$ARB!D$?t#$ay$H0ILD0'(\e~!ɟ"->Ļhr?a9 kinwў )S{ɩtNn,n{,>y} R!DJo m] E;0 x2iy^-[M; [󫷧c|R˭%^SbV'˯s*G܇u/#,YAk3qhganQxwVâVS ?xQkʑ&6c` *sp^sUɾҗBSȜpWCU2@"ߓX3 0H{ <5 il؁ˣN] _տ}c,%ưU{OrYɣ~*"8rŗ,B*!I+sC&j@8;z*Lmez0 0r!%f"SzJt%XlPe}YU]m][Ǒ,lȏ$( "3ɢR=Tr'"d!Dl4q{}K$0 01JY`){fzoy %'+JUv6"ȡ}w@GMߨE{IU4q7`>BFl h#N&W)Bn?+k~mBBS&IL_jBh-H6-{z\\wnɩCYt'Cl7 nS,=<}U3 0p2ޮ{kiq-mtOne-w^:")A$ݔq+>fW˷AoIl?sŌ{듖u1 }CKzZǝn$rMlPdqM% HQdH{n+lۊ{4қ9]+^nJTɩ}AGn 0 ÜNr[K[ kV) !cgLQ5Jnw]{gs/4o HU(]Oj+pٽI)?,/$Oza/U"nz~O yM)6* RSi7IF7K>DM]0 0CPM ?qV;>fhO ݲ)4QUC5Q5yA{賱hCk$@F@Zn-P:DdP#T:2 0 Tiiy>R˭~_ >r]O_X>Zr]bLNDF4EЉzc@*BAYn?6\k3WHB~W+/qsĝSaaTiP"ܞXjޟ~ƒ+Y ߥ`5):9=K!οDjcB8oGZ Ĉ'Mum#>p;W_rMU1UcoKYZn-H-AGPfXYeN9r,^qɃ'0 0̩ @񔻲%΍yղPu*78m!W BT Ku_@M ԧL;绦\4?^C\fhU6V"*@gBe%$r'&@3Crwےa 0 0E+wǞ wRqkg_Fׅ\3zjц74Ͽhr!r榀kkfv@f'*rkK?ޡĦ?J8GyuO&V^2,"DQAB!2N-W;JaaTI-&[[eFki WbqxWO6_㧶UNRyشَMa)YoF4Q澭G,k- {!Hc֧Ml9]CQC= R\}(!d!_;|2 0 Ü"x`>?˺m.8U"4#N 3٠eFl_u5Z_UF:-eF+~;?%WFHe#(%"B'htrJMz2'Tl|=5,u1n0K1{xaf(hr{{pc7ky2 m|_vGqӃQaNrfب:g's-!D{zY\[UL s#VH5#m5줟YUL9>Ù ܕT_O,F!Yg:SO?~=e RtO$ n1n1@ !Rx T[Ÿn d$0 0PDoh}1r zQ;'P?͇Z;.L hEݻ?[rſ%??+D+%BU1 юǜ+ i_ Pb8wH,ِ#h|w:w&0YH{v*q7~޶WoJ,0 RbEEb?K9҉(Cèa]b-ܚ8,aeV(uo. w0sT4&/ݣ6 RTªֱ6GK 4ڊ gmh4[mu80BR;Q|Ov4ay-NƮZؕ}'Y `z:8 I[\#DdPЏu )mӂԷPڅs7 |>BrFDLp& $]w 0Pژ?;("-p-m5({Ca3-UōEF6W^zU4O X x[@vgե83 8о{BW.jyru޹S2/ֶaQ:> )2};/[$N wy8 wwѪx{#0O svԧ85Żvaf+=K/=@D QŎ:@/iUQn*|!ZhTZ_jr2T+ԣ--E~SO6pFO8] r ( P[U>JNZ<{ }"!r(onq8c[k߅; wHOA4DE;0 3T3n]bLa[%{.»J9#tH݄*R6@EBH! mѺ)8z-Gn(~2b  @at芥T ?1#nd*$%iU1YJ4㒉h~t^!² Yrz#ā(ea`; DuoU_]^aa|2SDZIǞoE{XTՐEe*Hُp>>N?:&!p.҉o(|nlOG"kʭ&t;^1˫0,:ʍ BHQH.!U`@JTDf+gTW~e?S|]OBV]&xi+=D>~da.p\>kfz: qnBY}U;I? K[j\?WUܙ"YB.6g]yj%;:"B v|)83%@JBq^%.O%+F)K28E;þ0 0@ZniyTd£=e Z1y2c>J.JYmDhbRZNr@?F11W4=u[-آ|߬-UMM7k N.*1Qc-}HK}'zp) !& !H)XI< !B86t˷?phOB oIB4B+aR˭"-|c/Յ7)~ȟnsϋ%c# C8q0Uynh hM:cz 5ck oRdG8Z39< | >CaqķŽK)c[h7B1ݭ0 0eDZϠֺĘU~j#ShPSP_ oGhFMFjFu* Po n_+ШloM l9: V !}Swp y"=`>q^@D%8ѷovs^ .½sљ/$ZܒN+iI&d,jt];0 3TxK: gڿQƯZ&?x{f9֏ԍK0q9zU@B#JN:;:yhP_Bؕ†rF+Eg[ ^qϪNhTwpHQrgI9XRۇ)OFgI[z0R"+ϕ81ܡ'ddt䣛xKՅa!H˳(uO}-ḿO6X. =nk@ QESî~&v ǯmU!wTKYZnXS=R@J԰M!xiu1#5{3If1l<ÑnI8)Y2WXғh_.15o0 .R˭Ft{5WZ~%bqet𒖿l65^~/EϊfpaaP,{S|w*ƵwΡ,E)QC]vLe_pf# g2嫂BB5O?־ ;L\"ޜ;7Y{N݅B4Q#mS!5 ?q"pgaG-fu"Dړg=n} hh^n(覧ܮc.SfR[zƶ4tc3j+krF۽zǛ+^1&OqgaTiPRu$UǛPJlj 0 ㍔h+?W(޺ aƽ>HUųLG4SN5#p*4I@N]fD m?:3fkӶ&״t-ZQao|󀈪=V+XN!MWE|WQLDlT)"R!D8#:;DdBwxw*Sў.Kr{0>an.1ƜZn]u}='o"RՌ= D0&q.@N>RLvKA}#S?ɰ"ɐi B k^6KUAΚhFgFf}pR е;eAj&e !rqy\:t.́$+Qv"JI5Gћ9)=^EȗAIJ* (ahrh]rsÅV\?F#DGN}g ]!&J1bhTF&(;G'OAJ&O @~cnOƺKOx_D.m ē}E+ xWl#lHo:;&C]7SӘ d](!d!w"eaT ^y_YiE&/huۆx_zdSbƋuxTt~u1D Sr;}sxUAZ5;ϺiGicA/K__bOq 5 {uۿ гhefa$)1K2 \__K[ kiki˟q¢Wt-jƿ~Wm³KUܦ\pQ[zI<_T:ɍ wV-zHe~~ :vU Fc޾p}#GqI v㣊?UdgCgSX+z;gW49aa (> *,iy֬J GL*ޑ GY,Xtm'~jݺ}tȝԈ6ڶW>YȿqKXa@R0>ψ`ߚꑻ<-@ F\ð6zhtFm|>, b r!Dlw‰oe75lM^^IU1zDVS#*<g<"% 0LXSZn5Ĭ`sA9?f\лT cYxu]tLSGk4 &l]sWrz\)@y@Z UQ &"(  ?Xn!WQ/ "DRv{%5R(z\sٳR^JE`@k"{2C`Bn߂-D 0brkӓǧ쿕b!%fu# y@Ilkf/Z:Z7y4 jRpƆwzm ]k%ƘѺ[j5wfpB "kBp3-8߭"Q~=T ԓ9@0B =-t@QUL+EBƛbWz)ba|f{v*<;0qWG]64!Zctnvc >|^;ۣtUU@8XreӯR%wc ׏Z).vVU1Cn0O|{Һ,^$j?^f @DFq?E%h `[pO]>kɵcaADz[qmT WZnW>7.ŋQ3?l~U 0bLwHաjMSh7^ĭ]`Ŧ?-fb>sۜ-C쾞ؕ x[ң2 !,&]dOQ>)Ys"Oeaĸ+IZ?a{$>kCrG/>it]qST>ֆO;|OfMv [6!ʾ'F5J8{; @@ɰpQ$V(ki)d"r! HfHQv$'wrNf"s:];2 0`iוܰ+~i?ng7N~q~$ 㷑豎CO"onS6XmЂjMtlnvN{8k oܵW=wኔgmUń)oҐĨ;+?3D`>8)$]-,fH`ܓH*,橄Bw+zHv֜.23:afPq#Vo83%fۢgtSh#ǁ 9tjUȏD@{:r_{ǏC4bO#ZChRd-:H^S=R4TSEK k0S{?S#鉯4v`{02 "-mp/хجQUkuBVgn'(ަu/Npwk&m U3r[^['=cV[Mc|rÃQMgm{-,8DףUgτtr,a6\5߿};]+Ƭn yc ilseYoUly0!BCNhjo>ҡ,9ں ڷW1Ոf!0thѥĬ.>=0:ωC  0 tP}ⲣIE+mOO;oTiIJ[?>Զf0>XݱZ?6~ػ#MWJQc,UdIl}-҆:j1r/k6TW!*/aaXcVAyãڊ:R}(3DϺ0⺎`MD*~*N?5 &w&EwbWWxrշؕ0vH8g{gF5HJggS# 0 (@ӵ5~s|Q=-Q.%ےST,P3B6TNph]bLaj5Ry*m#n^Jzں3 j;Ti8¹mnm;2*9p,w5Nn8` K.T'X6 af`w񾖶 5ksg{_]:TB u1puӣsi}jDK&ι/l|"^P~xo+~p ]ӈ(Ruxpn$`BO>@IafrY_7Ξ'U8^]4;Ōؿ_C8zcwl[cnoxW١m"`} =ܸjtjwc_*n7و'䍪bʹ޼/Gfnr洇 D89uHPIR=t6bjnceAﭑD 0'e%Ӳ~"~g`=/czښ|1Xݵzu*©[?mm1}!6=P{ys.?FS `rRg;Esg"#}L\Ht_ea.Jx#$!nCv-Ǹ¶aXcK-K=~I_<6{^,`^fRwDiFM'vj&N8bk6(Es @#җ\2 ߰p( )¾Uxv@FH|ݱ]gabk.·jN`qT=0cʞ,C3y4vo54`yƦDcœ^p9ȡ R$;( ΢=0 0e,f1-bW :CqrA@ׇ%fk/F<|Hs{l_Rlq]b*@sgMLñТ"a"B5f̓igP~V>n ⃷QF"ʑǑ-'cemW60 0C .=@֊=w"'mn12 4qwl/>Y?PyQx4umU??#l5*j)gzS;zerՁ@a^U1\Dk+GәA[eQ!,HncBnݱkLD)/]ЛEuVs`>iaN]6*TJE"-oZjs%5-[̴|{n}IwIW_רtvg{]Mǡ_Y"6U**գ2vH*+p4Kmu_($@kj!k!?X,B"2C' !J(RI]Mԓ鐄z`nBw=a9HqM\a?0= fN[/p@nn15vf_ -^q4 ICڣ_v̱=XsF aW/ O˭++/_z7\Shx!0wR0p>'*x"$s>+n0 ÜPQ@Hȩ*& ݺwƜuICu;Ď 7N*w~㕈U&40A+۟yZmʚ.E6!^])*ʭǟ\=b8"TcQh{;q=Co\\EE4JsE/2 0ODZ]s;w|xEĿPw*WDz;t!_HE{w|zYmHh˻mi\3CܽVec\틁B JD U~v1;eݥèRn!fHݣ.[ Մ1F=EdGdRIa4&jHY]Yb}Ҥו~Es?lY+p߾Zo&Pqv*nfUPX̐ mzS0=0鲈G \|~نex݂M> W yΝ.FwQ`A.WdQ.˯)IׄQ!!KYs=RrR&2v]n_/Q%WaJY]Xb}2%4r#p_6#՜f,^7-hM7|@UEiHERbz9\ph@iJ1rq4wԵQ|qmc?6!ZIOpR)s* yB;Hp:@ ԳXVs%)Aq(_7C+~\??h!±Z0 Ti`0&HY]Xs~l͸ن34_bq2q7kTSo8qzcD*ԡ"Z8zƭ-nREΨU60JM@q!=LCbk:# ]E͉h7!& ID&xǠQtCw6d,0 sz@EPiyKVKO|ifѺh0?֭E?G}YKn VGT q6}| t$ŮUTRT:}xCDFq?$#ܒ9]iWEaXa@)JdM'ή~ΥO"erC+} S+jRo?xV-6XGpeo;:36`i>Ė"#ךHE>. _ 'Ll}1_n N;axj+>"kik֨9VSiŅ3^y=}]fMnVs`2M-ٶB5WkG;0Fy{waQsUk^^ DTIM.JC0pڹ7..#(/[uaa*F%)1S\_r I@J="ϝo47}V{V%ZkvkȈMo#?B %ǂW˶ '9 7;Q "R m}Ku;-F5i߇pND} QwjθT6z|{&$ΣVG`yJUy궭E6k&(=a0_ iBK:b70 3,6XULk&e;jz=BsxϾ^>SGg^E6:]c_PweQgc%&= \9c55G~ʯWx"4 !T̀J!c^D4lmX wO!*p{!'CĻqKY?=/\ҲY˂6}{UHx&䅎͜<=O3̰*MrהFG.9 "\r#:h۸y"`T4Prk""۱~9>eɔQJOJI ~F(He~_'0`#%favĒSQrDj՘Zn57pF76ߨ1s\LaIo֫RĤjBÝitMB-_aUO ;50!qalH5+(& @"HB|ǟr:f !r1D%˦V; PFY "?鄵J(;i 0iN4,D͵「D5I T-޼HgƇN 呶Vw\7)`2Uӈ?teoΐrK0:6F#g?!ho<麬9#9TXt[_c5aa уΝB3%@J\U!h r/Xv,lD &ǛǿVw(K$3e0f,K)M-濂\ח^_t% fͭuk4 p6gkH{:,BRt1#g>Z3ϾOsVP:rv\0ew$1T7ķq5a1䅻Ax3@@zݏx( @OIO23ewg<0P2"-_N<5]X]嘶%tGƱop;Gس?Zޖ7BDaKI|(?kxڪ$v5̵qD'2w4#@<e~s}ų]@D5DTAD9.:v G+OnaNҩ(1{6pl?.9Be!"B}70wZߢ?K#-83+|+࢛nPna@IB D)!]9} ϜNs !kɂW"6)+r HhS\]ZC'4.#LI RAhbhgf0wո*(㮿f/֡v^snȴZkǾ!6>۪ȹSxӂ֗^6qz['ʂf^WzDx80\A u [ɑB~0}=!ĹʜQ$ j cR>v {Ԫ`ཡUOB̀8ܽA̧a-ւTY_>=QX[mɯwpIQΖo=!p֨ 3۝m:l843 ̩~,`%ѻF\# V0PFd(tJ9p+s)WQ[t7"ޓVh%}.?τ;eC zLwƛ 0Cl)1 [WGt=/ lv`Ux3pN i?oQ,}v֪|F=vÙ7Tr2Q#w{gf}hyT%$Q7ў VJH7.O&HXaA eDZϠ̚Џ_iY6*bHՍ-y!j;MD 2l|F@i~y~V)GU>VwJyN XmDd]EhQV] ឌ/2f(I>:Q9pa!2->@rǡ*]tȖ#_}آ]8j?%mLZ?}zZw ttYծ;1Jŋgo,^2XS+ Ljg+Ud, hj5-9cH++[q/wSeȉfo}W_{Um462}Qx;l&;Yzy/lM Uj{;7OUڿb~S%B }$ k1>ɩ,{`Zr E]3'މ/ҵOOFGEkz2oG UQNB> 0rksxrk΄6)?\}%*ho ]^iqAM*hf@-CgCıc}X7@Ę:A_`|Bca7Dx3l=1ԃԪ r|dܘE~Ҫ0 3hqO,>j_KE ɇPO"3_]KDp8)% =UOTt2d0YeaFr%[&O[Yoݽ+jkKHtҶ:5}g~{s KZ;_ڜ?}ίo8 N-n "8E? !!Ǜ9` 0 0݂27X%|Ulڪ#3-O?Ts1I>HzZsԀ:ښ~7C}(ulhi_~yM# ? H{A|*h)B`r#7iba0 0Cw)~2sE˭r)r-Ǿϯ{j D񭠖 EMg2Fw ع.1Vq0naaG*Ĩ;ZW(3VC~ǯ -{9)Q8/>$)T!mdwihuP oyuHi2 s"q%ND&BTSn<Ґe!{詿84`T -*Fq4a!EU1]8J=ymڳwkwZ&L. pz=d9QV/ qak4g-fz_;;]i 0OcǝSYʏ.(@gw)rO 5^w66:4` @[ז52 eDZ^WӔIi޴GHm7l1x h̖V68kfcۋm.S (m词3P2"2B2mjvsByI%䛅tHMH ׍k}w9VvBja?0 CE9Tk--Y<UY7}~fQtT^;vly]bLܶ{lW>2v{,܇I!YĖ Bl/ΩuN6mE)0AS5P2 sd׫tհ>wO޶c#|!gK#oUiM7S1-o=i㼶ƼcnVl BdWdUf@DYl0)@ai/1pxC</7<6z0 3M޾"; 3\0@zgBt7 *(G⸣xmD(7'ς0L@EF_WaMH%-y&$?:CQz CD]˘mքFjFFr>W\<ߣ o bW J00#Ãf!Dwݡd"@L6$1%n~w7R! Lg֗7Q Qd^AE)>'JlmeA)4f5Q;+GPutæ,4O7wՇ!5-jy.hf8Vyf*KkaNC"JTqR_){OQ:$qIcB7C' !k1@@T/>6);0iy*LPK;ZC,4r+|,^sG׆pU'm\yºׇuk͗}9ɛIυǴzd\C2 3a>(cP!Dʅ)튪3pU0zO0 Zn5Ĭ6X;DFC MV`PG{Q3.B3ƞѡ8 sfxiG}ADx_ua15MO2L֍}eY؀i DFZçgU]>ؕB*[n=TTU ѫrAeFHF ekIfrlUZ;X3 0(3Wsj5@IB-UQcQ+.`5ŌЍ#kǾ;W 55)1nϞvƖ%?xܟr;E\cuf9])/e]bUd)'"ZI-"?)2rlߺN HvWïU f 0 sPQf2$5񴪘$7" 5jh FOMtkU Vbk!-.о1;ql9dD-^޿Rn:跉nlZLrD~R)_] E~RJQtyYhbPVaĬ]5Uc\)r֟n.=;{?koSWl "{oU9#tlg_3rdZ(o(3(BDË=>0R(̮ Sh2J!`(8e<(D]D:Q*,ۂ5euH0O*] #:u`> ?&qZRDdEkucRH@^#{9IU1IVhkw~{\7;k[gm{Ղ}m&:gM=gkfC0"K9v1!Nf^SvJ{8!B Y+љZS(ϥNh0}N9e;I˜&yM"ti)l7<_ F=OJB)-gP`3e7t"o7 2ʾpsctSPAe?uXqwѕNYDT*HqIƻ~(]Y+ H̐O*DdBt3$DDa~2EZ -ھt6,0Oh|nlH方PxM{%]zT= 6iguO嵪ؿR {B`WuqMBXCHCe9A]1IN:WAGzaa |ӮUr F# \Y)$'!E+̢2KV*a}Jp3Cn{uH@w̑ 2Oχ+ k@${˖ym H9HY)NR1drpBYB Z2@uwa~<&9>e?@ Y:y̓x ,svL Tg_Ю!mv!l)luA橇j. vH;s<ݶ݈K|ca7D8$,Е zB)`u)ɟ͛$ YgrO2 x D?@J=T6xSgB਼nu*unVv+\ӃJ(|`9U^@OzǾe'3 kP7@oC oAUq 6(׋ffitV*mH"oQvF믒-%]K\ K5.{+ w sR(nv&_$6s= *Oe_wYͅj E\^S"& GND%>0< r- %'͐#?.`PuA꬞?ʮѠvG4hNVnk$zrݥ⩶NΪ$Jj4 ݈+%E~<`Qc" RfȞz$TrCyS%zp:?p_^e{(6UƸXev{R':KFJ|^NDenv}.ti̤< D{o SkE55p`(L>ڦ(Ƴ0uMT;)]UX0ϠMIc ^!Ww[LcDm==f5Ŏk* iqT@J(UJ9C,{x˧oTaS2*N1斶3ʭ=msﯲ59}QC?/ny5# o4s`7:@dgׯp:iBz[@߹; JHuX! ūG\*l2IYކ 5= AO\J>滬'G*}Jk$z B,0TPtqG'K\62rTp&o|]KJrKLp7!";J_ ?f,H ԄrQrOD`&6rT_=1o*J}%A !*(Ca.6N=,>O{6J断(k~u<6;xρm~*0vᖿt̜^wpIsk'pB ؏Ǹgҙ|L8DOBG(c/Qg_Ixui8RttH5k yO,#5<*@gxoHk}yV*)l+x ʱp_jNty ~z')Musd~|"{CVk U 񐢯hc>vxH4r&NZ,d|O^SkTop* #RbV;{Z6 S˭9cVL]UMmluޫ{Iuv2C6s1C7N:c*0V,X/`m? J.E[>_nj+w '~ r$i}E@㉜b/E^RKI=!]a?m|\nRF@OJґnw *y;)^XށrdMN~~st Kwjd orw-G= %2m81ѐEKW] I)Nycdx>Rɨ߃;fa³CxU8g-m|x.l)R"LO \tg{GXA=jNM/ Hי]YL.vRtZ41Q@?2pe 3@g@e14@|ym]a ],FfiҙԈNɊ˵)l &uʋ= T"QUžF!ŧ,'Q\/{t67@T+!|Z~,C@+[ϖ>NϕgC]x+w%X3 7Ud&)fտoxo[G?k]睟6oByɛO:vo=~`0dlR\`ǙW40̉ EGFk?6t+0O ɍhV32Ob$y8 :dKu9 2@OZ>P`K GKRGڔMQl Йi{Oi$aw[WyD;O}\7x!WqOa^qåԧTvr 2nQ^oXz(D Sy,tBP[S*t" [nHK,wF'=ܶSmW ~j /I0:[<]1?k*6_^¦i⫀qsX?w~h B#$Z_O-]ȩxOLJ}imDu6 &N߄N1Q%Ԛ ew) ]ǜot*|Qދf.KNN=9])Ha%0 3Y|O[U{iźĘ6N4۲lo gh?c˜SG2í!6kDKs&uk~۫" SK=.kz X*kM.(5FRkH蓫H6Gt<K!]`O):kwVIX{++JO+IfP/VH]WGD_mDwHe. {9X2 0JSYO l֊E˭M!m[{g"{Lօڭvp TmpaNhW;^NъV8IjGR69B :U{[-Ba7~ 08ހ`H{: k|>6 rt{!]K|jއԅZory] tv5AJOo_=rS!z\yO˯w;`0 3TSN*Tvc?uTG%W]"#՟/ju[?@e(,!|vEqpTa^7 %VBQ͌`$%KLY-ףTY]ߴ[u Uc 3ַLD ) laBE"-/&!;Ə]CG 8v8/꜃ϝ[Du ]*CiA;AFQ6Pg^SjD~z)Ju &/AV0;(8Mr/z!Z@̒kzE~EɠP']D娹h2ID-1̐tMC5Gab DZקUT1ZdDm6V@`[q١]֙6FMhZ4f'BPj#=H> u1ᔒ\@ D2AHW|nF-k+BH@hܤlY&8NۊRp8Ua'e]dyq5!r%EIt+}ȥ6z?YK[s[0̉UTY@E{SCѵiߏ~Ir[(\}U!{&vԟy Z  H :oZ6 åQ8W!5̉;FH36L/( >ګDr emyޗӤC&' tc 0UbCuPKCu%"圂tt&@wߗcZj 81n1sP\0鈜t@Jt3.'jk}?/k:J S*qա"u]t:ҳQI]W,~7 +?%L wMtiH4 `3:-&B@/"|s%'2CqW@=Q (S8+ьmI鐚(nK r^@_7%" ASa3$2T^O;?wlѩ^?.uşp}:'pMW R8fdElTA; 4m%6p,4#ZpfAywV眴/et,ֳx_i$'fKD]Ϭī zͽKS sp"qg?"?P4f9(!d!D<[D$/ghU:O@z$ ^K[3dQQAyn?M9댅WTR>8CSm6hHkp7+pW&}'TBuMFgb#rt[\%}ȗ00 x=QU9.v_s H6bIa{ Ü:C>޽nrSxt> u=ٺ\S/, $ wDZ2TPQ&|g4|qOW>$(*6Ȑά3>oN{S)ЬChtyGΧ_eOSC~BWr**6SFY%e<$~!=O-7 ބ^tUa^WwX\=RB*Fyx+v-m{UI-RbVJO$S˭c_ٛ}kcN!TnzvQ@S.f NR sRC|)rYtMd9˂$2&:DVOO mh?W)Su\^~,{(&^~v&dl:𘄭3mh([p f=pg0EӲ[sÞݕ^ט:jF\mM#vռYstqDjVoĞ{}1zF}דJ\)f#e_pҵxsdo_ Rv=:PX+fH%%"0")nPU9qgQ ${aV*xl7D.:;']3uy7\Ԟ9/o~7U =(FՓjk;>ov\cV^|ՅMN K.UC$t2 xaWxurX R(l):]FH%KpBK0JNVU^7Bkh_1d{}OT!`:}ry>жL]T@Ɖ s1FZY{x@ݝw̞<=RJi ^ |j_\5&P%-kwi(kP{8J_wq7AgSJx=z?QlyaKF]y{GYį"",G#BJY\9?".\\-2v@Top*#*LJXD#TwY^n?v4AP;)X?_8AU`r{+Uz}Jr 6*U$}GILfɟ&>/sZ+'v'JnR",fpV# Y*|Ī&LP .OޝcIw"ГToq*wNrC%ú Է /L6jəh?oKvL~) vy`e ߈',M@ƙ6R׹!OS-ٲx?W[h!z@A_f0 @DYJpWD֮j2>ۊ_SIٺHǸ@KWV%"‰*cz'\9a幸3Y[˞)ؾUUU 7/ ߬U !_8ߟd 9!Fg@Kw-(Ev -JY$ \Y`h<й.Qw-B.sĻ4H@<),G,tW0`> @)T@:٤C:ND9ׄOy&HB#W%1RE9.z{dgXa"-MxU1Ԫl]$/6:4.=5q-f|AQl 7:Ȣ_v٨w@8ϊvM=4%|gHԜ8CW׃ F_x܆x-L7B:MRQ  Ŀ6K?xJB,YZ uJdz&D kd~ wy' 0V'10]"?Lst'R.ϙjnJTCS&a.;DemC waN(SlxUŔ"Й'.;?\9**df{dֈHMSu\7clw7vJl[{nk.G!򓲽oDgq%Fx ¹Dy"o^sRYGa wRC֤v"2Q<eaNrkrJj@z:ZXgR˭rH0-c>Z[w{ԈˣPG6hFG)hRȮ1ajC=#t(-_ӏȢ<&HlH^vyR3ܚɟP(He)2rJ sz da^OTJDBf 0LS "%fuaIE'Ĭ6Lۓ{ٻvDn6OG[^L*1b3 Ҩg7SF'yH1k~yǫД@G'MrIe/:0Z#T  C0 3ܩ*ٯsV&G 8.NZ`mM hѢ&=?UAcGf @ոOZI6J%BrO^pgn@mh =`Ff7­ܣ<IùK, ImL=fqwiaS2 EZ~ cni8&VO.ּW} TRѪQf;Db.&9 GS]KR\IT꘷C⾮A+|^"{H UD/ALӺXo<Ϩ<0'^U`/p-HDJ]`#ǏQr[E!/t6Hr0`Y9]Xb \{Jj48m!! 4<`dX@hq48´m63P; n'@#C5Fi!0.IOEeOZ(, 9 @$a`\ Do@QVsIe G([uo0.C`0@uNM t^ )r;$P*q?!9jT !R K|ծ]kP!&{ 3PTSz*tkW8v7+v4hqZѪ~X ѽ$ړ!Ul$+ ]L"?)uU%RB)rXo7B V䱥.~fH}g ]͐DO 3UeDTN;G*3o " S"-FE.^Ѝ1 3d]ܴ& S;>_0%hu6ۭ5G"}KQu۫"?)2 i]e\)eqid$oGx9W'0L7.s<!GB>a mI){V͛d6!Đ]B*ʬLHxLi0C fI{JQ9WES6BЍ xA=W,|P (Dzq7?B{> K}= zz6AS&|_7OrU DE&PRQW8 *ʬٴ]͉HYu;i|xO-K9^(`CO}18c t5|aucc&t h]녮 r J/6tB<=g DĽң@ŊP^%@2R=/f JiVn_eۀMzxKU#R' D2$KP HEJQNHMTf8m=+D3!so }-MgJXlw  &[@8ڥVEԎCR5P#@x܆Le.ɝɀTG"?2)9ᐣ+2Rkx) o/~8 0=;2>!"?:d7$rB*3$t_Nel+^y֯:+yjsnxnS!9s'п@4E҈6 .Gp `7xjz$̔Q !^(2)?鼗.o? $9^;wN#UFByCyJ2e&B(sB*3TgYf=;_;e[Ŕ+:|9z\iw}{?(Ĵlh2 )3XwlҔۍ. 0wN=M  rm"]~GޒWU/$ZYltXp/fHeǞչrkĘF, _rw-Z!EWqGaqoW(B Ǖǝa ?{Ml|=,p<FjjwABt Hz<$!o.`R+G╄y˥$ eUx2u7C ).wJ-џxdYDdB6ߢҲZ9W(al8a${\=-@նO1nطiZuv+8Q^>{C\?RsRTHBxr$q^*ػ0p2&f! %eovk.> ]`! Ϙ T9?gPQfgDwg'w-O^{([=ʢ=j&Km7+^@< 4I<PAM9iYXr#0 QYɼ/N& !" BHH'ꋻԏB0 šCJty7ؗS<m$"|[IDD˗ @SBNB eW#x4&~v:r#{jE#o% @ _T߸b>emth-%He9!q" yFvJ !80iGGr%koa%J !reVR8(!d!p[ )A Q,uEgծ(/M}zÏUKIgKR˭ն듊xOiyra]b>Ÿ9LZUMt[knYHBQCG6k$B=O=9o89`Ō>m)Y'J)xtVQ/% 0/]+a+*܅)y_޾OuQݔUH]l{ NGSQ,Ft&wZD~YВ eS֭0H! ߳f(+Pd L:$WN,eqSh.nQV 7P)922D)E)oӅnp c̩oU!VtD| _aߵ0y5׏X^_S1nPOŻV1mt MNhׯ-рZBtU턘Ҫ,S۪ǚFwsxɔQ6@,|$VB:~|/u\\?ɒ l):3 0nU7ފH'~@=0ه6;v^yx q@Q2L}EO[,|䎭SY3;#-^>mT:!0MB:k[7`-_op92?Ae[@g#"^#)07dcnVJ \lH1m٧6W4f g$TnpQ鲯&HdOU}|Znח7- %־nĘg*L/cla|_Eu;>pP[cgͽҍM+'7~2B YQ-@:@G+s~Nl4xu 0(AOL506׸E h# 2R7'x{6`O7go^" >yH-0D]>bf_o9}HYmK(U[c8V,{\K&1:zC-\_gN Ws#iQ'URFFdbQ@߻O""?T2 ;BPi䯵p7ؔl(t⯄dd%\A !*e/ wPr*>8nPX3XD6ߋc6[;3p¹? ۮŗnWvVUMh{ObVE-P9u S""8Ium09(u 0j ] IpIY ̠<!=z 0F'lNzMz_nWi.+!Y8\S*mE.8g85QR>ƇϫU}Ywzg:=u%vkr|j5$M25X/th H ]4+Pw;K4dm$qJT"#<As RCeN4e;v8ȕN`򽂈*r/H7 FEΆ$+(٥M`L$o|J]U]V L8k!*#"dĆ^7VŖX۪o>&H~f"8rt]iH `!q/+="'&=vae@.&JtEW*pV"Qc3JB3%@)BMdBD.dH{X* ]\[\_DGe^aS_U%Rߪ@ ^wy 'T"[Ycۏ~U?Iim쨎+C؍-8U|7u}flX]~]MK'zZ&m8Kee=Y OpܔQV$T_Hv0 O p-fz7"Qdt>\r,lIE`#H-D.Q0!Kx!e*S˭&) k+emK7֪0aV=VODq3]QFq9 [e g3"?)2 znfaf8 )J#GֳTD lc#=+tp+0D\AvC ^/8k_M+%-ڳϹ2*taiCZE 8 >5WiUSw"mΖưNI6(>;x :Hg㣪>3Y,`PD(j*k+--(ԶRPl &\@źAY\@"IBHf̜ޙI~^/^{Ϝs9<xLXiѩ88pp\p{QCKmVړN_mVwp8XgVo|۾z"βw5v fls h>*i/4I6G*r*0EX9O>2.DGknv+P򷦌Nxd8pplq̥2: iaK77/Ry7$/.7|g44%Ylh:6՛%W--f܁6hg-r3𘘱#㊴i.{ӽ;݁BfB:`&"M^`KB ĚIr ).yWTz{kWY/~6OCҗȜw/fgemftV)I~vV"j~1sFԦ~skSHB;3] P5TIJyA[dNuR`ȟ9;y3w/ɄrVI}Cqg$]zHhNMZ51o얒U/޼lN+$;w>8r Qs" AI8pAC8ppRbb|yf~=Sf;c߷ufYŏ?۷k3ܟL94gKbҰH/Oˬ{kW=Z1c'ݰrQv]FX Xe/juH8$"D*glE'B1 st|+Nr̬דdL7^ټtqn<|Yf=`s0}ªК}Zmkx&J) f rQR1D!by8pC!' 4 _Nd)9ƾHJ9"}u6&S;$G{}MNOں>gov'RyጉAyLK;Ҩ%2$0>ғё\ qČ;G-3V^%Bcmϓ;s[8pACO.xQ1׍v"# +b D1OJi-gG mH}hCOn$>_S~WUix{9qҕUY#Y_Mj.9? TdO6CS8Fcq<_>2nr>^lۘ(@2Ϭqefa9ف_8Qe9a%RY%*Q֘;6uuLl5I*$iyv:C3/9yAȽe\#ףjEBBogmLagZ7N53V!MH96"xKpK>q8Sh&ShANS88<8QeGYa|#B[=Dآ?^"@87m>3 #, fx 6VdtV)Gn|d\r .MoV#L}Esi~Fd4m.:pCXa6L1欶&;[m;n!'.O]q*QAS./d @%%f;Ҙ ғ5__mv&ώWZD JMࣈ1 OG6%Loyw'˅QX&F,[P[-_=YkE0KJr DC2 )w+ cbÈ;yuY.T\('/X:}5Ϗs%+~Q{//1o0:4\*L_s+n~$:!Aܿ%x:@uǪ'8LbȀE!&J)#9#qJHkL .Q&g~1cg;89r򂨒-;+sJ^?(Dا1rb)aI%[Lt8pprqN=BΩbX*vnR(L&e'?s4Ԍ&s#gNܒ4}gYkYb`D';Ɗ *IWbVС?87Y , 4TBt ^ r5P "$&ŋ8h=8ΩVD1ۅFv^.s\%2k D u{p[VϏI.ұFu'_a.…s͊xaA ﹧I겱"mr)6su;1c!Rr لk'GnQ7AI X8p( 8WB-wבߵC8A8 Rcq.ZtC.b=ywH{DyȝJ(s1UǏ kr<7 Q]%j+.U%f//RLI?-m2 t\`&HDa A,JBmU/ѯ;p(P1|ȼK!;|ٞ| _iwwa5!' ПcF)eҜ);g9!2Kl_N]vH{DŽ'9Xy/^%/}} .׎cl߾+tr%9^Rzײ|ZMC)4݇6;P|C#f,F^}XfFYg{ ˲=~YO%$e{.jkqFD3 1yRФ}5Ռ)]rH{;ȟL̎VEWknXߪL̸& O4MCYFzPfeϡN~2 d#Bz0pg?wnfE.|Yvi*0&\.TGY,.ḿ+̓1vPݙٞdw]N8ΩGz, E9Kn@;:1)=3 ,$HObү?ⷿ{ДS>y~=.IToPŮ>r}zLcΨwsOȹ^m2܏4l336鳁 " )ߌ]X  .F}s߳S ʲ=<R SjݟY-hk>^pS߽!ޓy^(g.1[t^\}_^~zx1P<ކHcnsκIO7E G (X5a9׿p{Vpv/6AaڕZ"%H&[wʱ5Xv-ʳ=h(Rv8U~e{~ܻv{{/w>||u抿|:Iw//xz/LtXvw#ccEkKX{3!۾Awꊿ_Z´\mlW.0(KYb՞ 9%1&Dyg;@&P. ]u8 eٞ&fV@0,K!8h/޸K_}sچ~Ck HlL|QsMRĦ2i榿~ˣ/x2}rIpzA>w]c ȗ@3VBs\TĥBIe{<>M('pCymmW'ʲ= +|:Ļ"|F!A.,l qwA{w/<{>"a̲uU/<ıf磿R.ŧGTΪ&="#7uD: \)!GyŌ{A~`I(%Zex'G,ۓ# Qlr¶Xٞ|6i x< 3e. ]ʳ=% Yr,![Qc;p㬗ݯzض[n]wzjWJJvծ+eK?z֥+jR\$ClFJ_" ׏dŶKbx=TXؓT*IM]]:k>e)/?@(㑏3ёĀlOa+؅k g{^1,z n^zap^Y'q|0EKNn .$[v 'Zyl>MA]y'r$ uNt8݁N#~ɟt[V1;>W߾^^ש$_&YƤUcUƻ[۹nMVx,0եt+hI@&fA18-Y, ˲=(Kxè*XTye+IUkRY",3 Se#ICr,ۓ< N&ҽ;SkP;JD Ps1'/HcFdQ' pG'@L?u lGpn]k]=}葝x[w/{z2~U Ϩ =ndҰ͌ws]LFC{$fdV44TJ%Lp  ] e+(8\A E@J90!ٞCzHE6dPeٞ}P lvO6 L ķ&9G>yV·jq`G2{}lKl_FE}~[c,(<̑&}1:U(|9P&a R5GޥJ;[6IX)ٞ7H-IH;Egݾߢwf "Rc!+ ǝHe;cdIx % gFdf aJ˳=H!U@CY' `8oۑJ7dxזe{ 8_!`AUL~'8#cqw@A9r[p~/**|Wu]WԌ)]R^/OΊ__ZeON}6㧴} Z>UGY\ !ȵ\J4]ZխC׽V&{V ^QݻZVɣFyY ` G9Gwk@wH9t>=M(K5Z19\NɁ54fՐ^\e2 2kx0I ?(=8hId8,4vg{FKXm.pHJkzp5#C8Pॷk~r<3ye\ K+?Uj^R[:nd/bV~՟jҟ|e/b3V= dBjh˲=k/em%*T CJך {'@WZ`/ݒhpA _+/Zju)ؓZ  5I(Zzz`,|M9^suDZ, 0oSnӏ6Myg0/ Y]Զw| ;pP^ qR'%+yQx8 =u:ڶ@YӍlve♟b@ʰ=}bkRȸ"1c#um].a3j};Y F,~Ϛc6$R:?#s XĨ):&y4(>2G! ?);xwh?JMT%ͺDq&݁GΉVxҼW_.'/(nr<ٹ_m;F%$E{_r`ۥ4ċ]5)7}iN653׈se 7eWe\(^p'чnV Ѭ:la8M:G&b#&vѱrV.]=/x±;pB\"RSpo87bK,\?MC?p;{;+ظ'{4YN+7B20UQ(f,Ԥ܃|fC8<;`&--^:մRvHx Se|Q[wH v-   TF?HD́ EeٞDC8p`>IWc"f~=9E:  &|Bwۼz{d}ʯ tj@>2n*)yC.3 :xcvg{F2FǢm$Eogx =e*`uB~ >- _Bnz@;y^ER\yۜL;6hݜ6ϭB;pgIN-m',w'^si`-%pՕD{1kK9Jcx= ziMR+|M ǧxғgRma);ЎH4xePo; #Si#ߞz`*$^$\RXhr%S謚q]pA]P=wV>H-."kپ6 hݰ哠#&p°+2L8$v<5Po%5]+8,gZeҮ%, :^q04V'?pP@HQUP UrIrkBw& OK5N9(I4K% xiؑa :婺\.ɴuޠaDqbnqҥ/ %LS̰ݯ `E[_wfwrY;D͐0>j.)^3|9jvpቻb L!%X.|;!HD߁g!H)>Bv)|]v jz:paqzsW{v6I@Mñ;pКH{rަ}[]u{d5+e}{S鑒?@'&Ϊ4vVD1]J*ƿp#׮|z" O\mN++f,G-E3I2,e/|n `A̵V׬Zō$}V\E_Dptl,%I1{n6RkPwi/&z潣xS04KeDeJoWIwr.pҼ= @ `[*},\?Y*ex *}I-bg-) *RKTmz}Ib&$6ݿ3rX]'NzX,Fٞg houqrAGr` 'FϽ۪c' q7Ehz89E+'QuOԬK\0!݀p-n]@k _g,ۼ_w茫8pp@OYT;BQQ'q#c-URB!4kݍFҾ;p!t+!Wiȼ{ʷJl8Ы:#sپ+Aʀ+<& ԌQڒ,欻Ԅ}jIˮ5 3Vn3,#- ,xG!) QsIE-ቻH{$g!0A1Y G֘CA:ppJ8t0(SDWX-UZH`gemҧn癭g@M#ZB.NV֩[+|~>1kmQ-cy7[QΡA6SxOg{.Ӈ6Kxi=tx"(=Ae7PlN!B{w*[teoY'EBl %J 7 ׏`PFdfp}[|뜩vy|U9Ssԗe{ xM"Y6а Nv.p*b%*# &aD\pznRMߟ[9eٞMDg;z`qvH1]/Ez:]z^sNC;\l`J_Zŕk‹-J`PVl۩)v5;xAIv;낵OQsz~HX_9.fmisFb]%rSwpU ﹧)̊m>rTXI2gԻ}L+"Knwpz,b6`pCC8ppR<ۓ=ioމ/R݁ h\} {K|C A2@C,"U1`7pK8_@w :%޹ܞH0*L]]֝80/!^R>}qp=.Q PӘ%W &$j5&wRF&;Ҙj,+nz2)mx42Sۉc <Ձvu>y(1^ }:ORWFJڭP"6qʁeٞ 4Kcw2KuߏBE݁v? "F8B#S$EHvv_[Ք`x >՛#ےJ_"+zAyˁxX9(f :>W\Z_{R[~ xDp7P%ނ'88a 5J! ݞz?#'/hkK 'y桤&[m ׏dtV)M'_szUҎA)Ѳ1ceE1ĔCWjwC@@dhfz=4ĻBXO8R##qxwvoA Usg'ىg K}Ry(׮_ZU.]'Jזf'c֖fw.ʟY:%$1{|e3%\%`(*L6;88Й7TX8h'{Ӟ[.YWwEkgU ׏`Ja\An]2? -b*ӷyuYseᖕ )v=`WkΑ0x)Kn߁NfHxE!ƊۺX6ɼݷvC/ԛ>{PJݎgڒqWgyӌTٸg~=={XmJ?pm{ S\#kK8ԉ|U'v?9 hyluؔY=xʳ=Jh)T9c X8p+55SJrYi@%9EJFJ)5TެCVE*6Vdb28"i/4WwȊ_bҪ,Keٞ)R&Z%sHxvY⽭,3g W&qw8_#ItU+O4MZޓ >?6㰜Q笻Xe/ ӯucκ4t3m@_kgv)C{_=_3gI:u/xt+K!WHt8;RbY ؞Yu<"bL:ђ~Th{zx׶uG;:q^yG8_{wiA{~ѵZ^m;Gvb߾QXҬדy~+~vrRpK&̞x5禧VobSN~sFT  )<Y@$@Z ?p0 HJ ~9pÄ) ^𙄗zx]  8݁ボ7O_ {f̶s<5t5}κYWhKA9J_zwX>2r^lAhzH;,څh[yN@\RɆѿY|,Ku?Nd8݁5t+>?mܗ"3o:Jww#YmҪHOJʟ{lXm7^ߏ~_sg,ۓ#`*zJz'rSh#8pI ;pp4tv]{aT64H752!dɕ9iҤaxwhxU_#PD}PFgcQAoY,0PgJ.<^݇T68pЮw" WN^0նLN=p<㫍HIRKd+XqKI$4pqo޳IRp&Cxt=7~d38VP+ݮ8pp"!%V$;*,ȸL~asԉԮ]Ϊ&>0/$ci8gT0gLrn?*χo>HqOސ0)[?WZ Z]AN t8pa!%=뙔?}unκ -&GnEˢҗȜw/fmIVA77S)vu%qͲ8ol8@&00X^Y`]WB8rB%x"Se8u!A+?YQ o%g. ͔'dF%>Z~Gn`a!u}qж ^8WHVt%JS\ ɾ{{yזf$9ecE$#70ifߩ%:lA/e v48pA4!nR^;w"Km@N^`+0;vq4Mxw=Я_OOQOda |2od/S;)NQn6  @Nřn݁88ppg*w wo7/Oy_\'#ωRqgU鉾$JVVY[eϙS㯑.2Hv̱Z`Yfw>T 8p qw |.9@$'#Q.Ht8DRr7FRC2:^ζ&;(4pjzz CCP0.낭 @@Ԝ`HaQiw'(qw ܽzАcO#wFӵY*m$MC_ ĐIF #7|65xC]=,ۓKckYw!DE~!u` ;pJM*eϴ8/U;NcӷgX[ŜuȎvwvB6:p&kˏSyrD4 ugI)u1vw# ɹEV 8gŌ*eΨw-Ϗ.1۹eŶ%RN>^'F$2ں8h}!r1H,Mu案qkӟ)8XI)l`>0ŮBO{Imu@Tβ)IwrG]Uil`/ٖVgnթyƼJHwTmH׿+3Ӵup,NZ䥿U!IgܟtemI)L^Nn!\R-G#2 2K}kh0w7XvyA2-`B!IJ=` DQ|mzAC?b:ǂ%ўm!DA^#1Yy#k]yR_kfK)ڢ?F ֠EaX$Oau۲+{tGpזK@.HE1Dz&a8B̳gm(ArQ߷H2m9]#~էkt@.~s]k1fcEo?Xw_PY|XQQsQ.;`XZl[Lǣ `Q6yD 6+| "^GHA%ŵEXLuXnշ4Էv?E], ?x AYJc^9)8Ľ!^@> q_09%UwIW bκYmsFҦ̦+2psOv֖fأ5Á$' ]"0>VD#c&VNMP[PE3 A7 :: zj{eQSŽ{tsQ'vv/yh& jIU4hUJoGn9vx]ƫۉ$߉ ˆc(l,ل9-cAY#]u ^Lt> mc_9U11y6-oոz=]QIT/ Ԯt_CIS,R^,Du}}1T&DB;QҙJŻ_*}.~;բ~cegj +G^zKuMZD d[33JPl.x!qo߈U>gTY/m?яfM/6J_" ׏dmi/myķDk!`EfʪUhJZ9tz,L K` #P~A}ΕRnp-6W"+;E@HRr`AYɲPgG^فؖ>[n;4nVG.5bT 2Ѭ;ٕ {:ZP RÚjܚuV΄!$vPH?K XX-w}6ΐ"hgĊ/v?yAoF]sy=t卅f.33B[cy pxPboR <6g$$؛=o?0=8dі2Px0 bC--99Z[U >[?[NFͻ"cߕ1! >+PS$Je\CbqTu{C7b!+2^B1YqK,\?Ee\{]~we2JF@i4mD,&Xpf "^NX;z@`<ǻTDbAa67>m1lDH|; -UI5)uP9fa3cLTuPV(" YP]ӽc^NbalOzL׺Fގ6&6:ht!$̟y6/,b%,Ħv;"慓,l7N}0s<;9g(KFkZh|)\aSܾo}LAN(`?jl :<K[=ҽ (hsM<٦8@I*e~npP mb!EߌCqTu{C`oKϮ{|.fl K^%..u`P*}IlȠ_z~\Umۛj,~mge$u xаlbV<+dt5@m܆i}ȐCZLH,Ps-,#TԶ|UX5imJ؅"d6p=$[=VK)eC xGY7l4®[P_XȔeIC"B<ˢ4LŹF$ Gl+"1H$4jb_》x_KB,px%NA;?*qZ*jw7r KJyfqB.W$ȱ֗H5v `Ӣ 먟qCYQWOJ?>NVY-B#:VR35Ǵh| E):p f NR~1O1d:Vד <6ֶ2qp{۠@ yנa k=Uz&|$_v}"*4G0~vFg2"m_oV_\W6j+YnDS3 >CQEЁvMU3l%7mo*B)\( PNi(Sa78$ ")ߙ-ؐLyAR ZmpBɚE -D-υ-ZJ^C1RC0@_[w%Jf_ݏ}w3bP4݁ghh5Ҧ?L±W_聒TXYʳQ,j1m&E j&"$ouzPȗv2;gk}->F81 zĹwRu{C{Vw}v3<&:`[k u-ɢՖR/R=1\A{"mUA>tIRss0mbm>W,y2 !tsm&LF_<.Cw? omu5Ecu=`Y%נ4g( ia7Cco8ա@hu=RSbنq4kpPKK2?{Qȭz,zEt}w!x6DY  =aKw;$ƽBz?ΌЗ%Q=~[3u,j~X]gWǻDA?OfRLjqs85 ^Aq8͙]crc6giOsbjǡ嗳Q [kb '"AsPrG?};z߳p&; 37/fa N&89fEmݗbk*`B^נѓLF8 c?naݢ=sq{FB8 ݰ K3(yzUuN Of,"eIvm)1EkQQ*e_O2r]`V CPR2,o)B=?}l1;CN8 l오@DIH$"gIP``VO#@F ZB4HF-ȡy ;Xہ'vnt;NzNx]{5;Ľ#!mp⾶4%Yl`gUT=~gO_mȰ>@UP2*)84D-htPP#f)-#hW PKbggcQw>`N>(d5v𠈉]BKg2$d䟡tkh (K"(1g.(cA6v 2` i.y3Ն(|i1)ʑy؇@Yw,(koF]&Pp8<4O$#`,@{C zJvGG`Cr:ü.{DiVԑּmEGF[qoe!:*~{iN Gr6T\/,&g Dߕi75X=h[4fkP~ 鎧c<踫/~47f>߉^N'hɏWDI-eGsַ)RJaSv R*GJ56u]Ž~}lImˌ~v޺IC5__w&yhqvH/CEv5ABk-n*yՇQ5gFhE(S:DVqNtpA/JH#I ,mk,bp,G OmY|[b>LDH}aŶňeA7?Eh#:F%ś|@?c%stg _wJG^ jr4 (kB,=QOf{Z<-vr5k!"j%'I!P40}[P(#ͪmMDs:YnxGE0(6C܏P pƾ"kt=cP/ ,`PeQ|owpWړ#DUPtW{dMBpD ⮐lbRMcmDV7"z/?(1UAI9.ʺཀྵIqi.$nS1Xsj <4]M6)Un`"Zk#4C8pAGrTO3N⮭C;BiVlƊl`x=$5.Kjbhhzr=I~֖fCTt' UϹu Mp32A2wJ-%o)BE@C;䡞\9`,*53Jz-irMveb +"ԕ7M`-[R`T9p)>1GscOQҗYanj 1i51ʞL^DRR\xg=}Un͟P]HMsFZqXEGrneDK@z2W>ȸN\寯D'04BSxXMɼq64=MeWF K~:^9D> i+XXlŎ/[OB]= 8ppB`9fƣ !&!Q\G_EBJY(6X/R0z".jS\Ɗ& Ϊ4vV񣧞?T^7vwNE>¬-οYu: ؟'vN88@vUIL Ӟ>.B)ȳ`$Y,זHXTX$J 2,fb=-# 6my&bebx`DްHlXldWE8h8Eb˄VN?jB5X˼g4w A,ƿp#GpHF?};_{"Iaq JV"ZTcTR⸶S|HY~qHfK+}ʷxemCVp&v/π_jo9Z"#۠@\"l~5=I7]ؒ}3mj=LiII/7Z-(4z- Imv]n>(q:ppcu4"%7{,[N/[$F[1e\Kb7 Evs4hU@ɟHw!gƼJ^x ?Qe<9o]ψ^G{}Qjd1Ȫ#:RXk1P?mmqO!)4dR7e gԗ@OT8eyrſ"4nn 377N5}SN-Ld;dbn 5=駛e~ -1pZ˧ЁXZXY$9$FF28h`^7C&T&/l}/⣑XPdYmJr~}+y4KuT<mQ!"J7^n>b7|v+..qTX~h1\C]_v|?N]_Da>tliuuVکo#uԽ-(* Ԥ2 $! Y5(bM8Ok&n"Ll%۶++ ن miAI'?`-wd 1_?l vD4v%X$x-E'6G"%'duEG[2צ-˵@lL>  }Td\$CAnb ŽcH) qHJe$.;] h?MaGs}9- E !,a8QQ!"gz$'ve;wZM]]ǵb@G9Py:.H-T`Q߶uډQIqdǹx9Lj%_J6~[Yp)R1RƤ"j-w")e"tɠ54?fiim67bu'&4IXe+aY,>LHU$bYπkS&^"RI[o ЂdBFvDaY]hDFkɉX6*49\h 7}MS8J h صgݵ ۑ[Id7{"B\mN4]= |٠`6=7ٍ>bJ\iIJwKyJnh\[=1|, %Gs}sr,mZ4SizHT.SppTp4i[֖f1iX?>khI $Ls+xoH[BTM=i.%dW JU>_}J+{.p}E͚uK)g\,5O;eE(p6$M-Sľb'yID 2]in .&eeEgob=/ [eo6XvQ"9Q6`kK?P/[^_$̒p]anp9BtHQ״j.Jv8 *A?ZΠ޴ڛw}}%Tt(EWx =k;S5ױquqꪠ䩚z~Pj})t4zb)Z$xj~*wd3\s@}9R|CT5[$Ys0}yen [ZLjw$%x.:\L%AnS{STR& qiǹ}_BCdZ$l@!M [M]&?k)Z[i+;u@x/]&)ǹ/ HuPL&rbnIeZnb/ӍD"|d*{_,NHӎeNF݃4?p49OسbYDiCVg[ 'X< }?Vw49dD8=kzBYba}yoXo97Xl/% 1h47>!6=|2h&ՋF5Z]@w+$X= ~mXox5+wBB/(pIH]v# 1Hgq !clޟ^8ZI@5y "=%(t}a4M IHlg:d>6MCfE~B)21)4F|z"Cqp6i!GW"I֖d/~UlȠ2>oc{QAɨxX&)J="ۙ3].|]_?.b D EBcУ!t_V5W$5/D-Wǀ5݇Gr6Dw; @TId Etv + IKK*A=λ71¢, pw<Һ3Ӛ7H%MY4@!TVQJ*s@4JP;GQ! dH66 E1]Ϸ zE)jsQΣH[\{DE[㽵8aC4QB4.Y}Jw,Mι?XICMlA"$2*̬I'OluCYQ;;g3)%WkH!XukʖA2}!7f\ h}laLpJb'@ B}ĝCMq!S,R @18OQʐ3 >Ir="'#D+y1VҮیTmHryvMWYs%2':Խ2EbK@[(#={tevȲq beW z˨Ϝ24k2(uK(e WUhA8=~%몪7vfR3==ʨ/*~g&yO0>;$rXUДiTRW>Ȧ,3,MK0@-zd!Pue?FJw{D3Jx9\0@*7B&ZbKe>birHq DYmDzL,Quh Vm oQNvzbomoJcgMD'6!#H˄a-V!kq _rg% #܋Ė8,gy Qxy쿉zΌ:o* )p:(1eGֻ7D+wvr:#՝X\`49ֽ3!Uo2vo>2\xE &ԸB5^Uq7c/{"TS^5I{3dDᑒ^p\?s p)'^pOԏލ9ޡ9[<aHDǀl 1‰"4;WnͯVJ bM@f gD\}$$hRq{`rao>RNu 0V1)Lwd*fRhL=Y"O8#X[3]+sS|S;j[YXTcQ&jJӉsѥۛbAŻc.2/\uϯ 0cuB8i->q$$'R8 49(aAό%j cqd ~1HI"/FPPLKmn G vaθ[-VD.f*O,:K W|E8)gzVOd+N4r?օ[P12U(E?=ܖt9oؒs .xmSBECfgkl. AQ;@ARg׀D-[BeRHlM"A&"eI{? s;_zqsC쨅@TM=ߵSHB$MvuWPA?\bw{XĨ.WHl4O\@B 19Ōzױ S%gCm3G/SꙀ"PcPK"9y #C[v Bz<: aYmVq:gx^9zXj1>9pp"`ز(VNMyZ$hkT!ڊ6owZ;*]i_.}2DTOpIy =+j7 pkI[nNa.p*׸{6L3z3 {6=1lvzk.Vx -}@3#.k3*VR;)g~ 'ҺOkPV9yr/Ep:3WB DChcT,'/p,#mMc!8hkoFőQqh+{4; ] POݽNr읁G~/G46߹u52,;s+HsTS<InS%(n2=-Sk%BHuGe׽K+o\HQ8lk: >c_f+=?WEٻޣ۾.#.g.Cm_G_$y+r+gN@p;SvX,\9i@?3E<(xȟ)r0_"Pz()oYH܍!9y'4 h&9|Ed 9pĄDg2\)(#a%cmLg*܅$ZǢH@,ͺ9%=V|"&,$EVyd3p9pp"CkSb(zKc"X+Ҷ+A@-됤"؉$L)|i:x=HCI^Ӳ#?ȱRb"J7!D5BN^`оE;zNBB围0BxD14塾}S1`E!߻ !cbw a=9q[QX܏miq/MЁ~852u!^":_#}K顫H ?"bo Z_.ʴJ\2=S09\Ypeľ2~_g̟ԹXpΞO+3+7բ&^#N1j"rezjҎTn&r.-Qk>擓 װ21[MV{!H9H$tk8AI2!{37cQ0$yv{) 7"ңYTs 06cɲ}-K+i/M &Z\ic$qjF- pYuvCjJQ>1ftA(7eΥ\-ӅNv$KԎc8) !f"9yA 'C]>R.%tofI*M>v2*J=͜dHoE2Au~B _{il!e5{=F}?TC&߶kCt1jWpTmkC܏ǂ_=W7ٕIwʄrƔQp56rvoUSm _}`'\Oe_HD;m\Jĭ|#\^)=IM$kge8Ȃ_fSȿLU;^,E}n0FMϣD,>:KmX<``&S:.)&-6>:v AOˍV.3‚l )ҰzehkLRa>< 83qW[.6Z/Ə#4WYHzҺx$}cMyn E#*Y]A$ OKKEC$la|yd@+Rg*cJ\ C le53 xVk =&Vu.LW\.4!ﱩL)!krSse!؆dȁk,?2F pI N8gۆ-IjD'-%O?`gimKB\ogxMR/UL˄yKh6 4 x;r Tr]Ebggξ|sᶝq8Ľ輭=)%JTrUm؇-煄10I>R}.=MeMiy.! sRPyYlbpw49&=>(8Ր4Mf2. fnB4X`&MӘgūg3i6ΊoѴU9ߪX` Q0ہyǀf.(&JAy"8icp->S#Ľ6r#6\gQf`orEwbu]r#X|`PB[hX\%cE 2aqolي$Ny9656y5@=YRK#e=jE6cQ;5µeW\o#ٔYrh)u@b29V Ij7`3Hd*&GF#k@fzgGpzF4|o85jQw'Hd:pa#>a)ʳSSzzOg% ZٝqKyH*ny/`?:e>K&d$Wg$G[LJ+qdp /zd`M7ec򁆲OI8wHV@},ЕO jSQe劉M` 3XwfӬU5ȊLȟ9EϜ '/XGh?s5 5}.1 ,3 ^(3sAa?3@5P '(EhD4"8x v63S{ _nPf)+v.krJVc4R{^Q,Ũk9^or''/(mydHN^3!eZ@} (߷VIEen|3sl9$-"f ;v$zePZd׉+cXpՎR%k!vH%GӋ$PϜT22)H9d)Bo+r=G0J*jn4; "* u.BD܅d+w6O4G9Ӌ}-9,BrȟiRcstwp}ٖI&g7]|Qp߳HY r(?R&`r?7ݼeqPր!'=S`$>U唰B=W5ޚzn?nb֜qq85r,X܏Ͽ◯ХP{*ҹ}v'қ75nix`z=K=O `?@w­V\_O-ÿ!1ٛڭ٘PY8+%Γ$:NG{UL6Y=2fltX;;HFY'hs w"ҍ{uAHw%VE|'4Ư*#DeJ7o!Dml01Dmm!>ځ6~>\c nͷYd&Th~zh:Hd=Zi. tF,Έ Pm,_|=äBQ^"Vp~AI'QmRPa޺rcdDb H'=F+l,'ʁ>*md;ZS.Ff.m:/8Q[p$Gtrh!AkG8 qUR)BB"ږINXFUQ h{%>fߘ ("iD٩3; Vrs,LQO,'Q <,2u%R>ִ谐@kM;Pl:W)b+ă @6-~ K]%A4lq/R}!V0 Yj- Ge$ĉ;qt:DٗMǦ;ĕj<o7=J? u*?ߜOmZ=2*E!OFrTILwRe}J+ :!nWI:H5!)1-kAA7 BVK;L8m/z!S_gJH YBiQPHzn_iq9!NT_[w`(+0b~^Yj"~c^89y-km]VSέ(MAe)o:_j  (gz}|ɴWa[8)5\OPΩaF#A,"BBI98cd4g;Ֆ[Czc 3,0_[.LBmzESNy6}2eW7'FV*_\ۑpijܬ$vV w&b6&Md2t\8y!c7f,>JMLxAdZ(1ڽ14?arFgXTشP s7AvD~q8\^_wOg*W7fCI2Y⽀K@@&v3>5Ngp q wӱO!j>3ʏEdMVYd}Hv<! $2QwNk |+'_ޚ ˘^Y;.Z?VeZ8ކLV[X.u93qqEL.,I_eW$26cdGn 'nۨ22xik"iAmeYpD2j׾9De(A36ښq.jwCh.(XdxU"NW! :#E HcRH8(K VOq8C`W-M6[kb3!i%%:p{Gn*}.Ƙ A4Ae| ޙzGRC}o.,fVe\}iK. e{_4n-9wؖp v$+r"'L;uBHd䬧@S]"\(nM$+BԛIy@ MBqv. TF1d%K ]0FKd}߯7MNOw#^Hq`mOǺ^$WgS4.QStCF?^ĜMىNB4E)/W©S_2sC(L&VEyM B 1IBP)jn! DH9y-Hvj&'yy\{ aI3cʯ>]瞟z]6\;+W''DƊ5 b^2CH>w fQ"J3lLL}C="Q֧K[]7ֹnё#%|?Ps=e#9#xe̢SXrtos$N(%.C!Oi+jʲ-…'wlLDz-}c8Ww!H٤|wyP"e7eh2'QJK#M2qV ^ O<8 w$x?ƕմR`> QM+qMb@@ @ 뛀;5i`~DmrGZ͌t}}DUe fz; l`-ޱ_ָOEle+1*nMNUGKrk/`ʴ2@=!'*U&+d>o_Fz?|=rerGSd @*UjZixխ\!fցYU)zfEUs f? `n!Y1ِf4DPbs ݕtall26ȿE(D @`D(d@5>#LF6*:4 (in!D٨vIFd|2N_ 2=E A~{!S7;#F0AA@wǡ1l0/Bj8 6ZGXK=Nhөۿ|IeM8G7f8&0w3l1An&y &jS1 ?sloH #~jj6dQе}; ;£a g8Gc٢FD|}*b; 9p:@θ y0'u7@R_T R*uG3 *$9 ׊|)1piRnNk^.ےqzI6ڙwF5lS&6t'^ʿUSj3ڌk,G\KE\ʤU8mmTJb.Ӓ&ՐqܫNLy%kDv}8q.uUUnK@TK9\j.4_2HIozń[lIW7v7u%Wc[9\1o\1i<(߼Y_W7"sJp{KmRj3`a斆_޴%kf/N[[pS :gxc Yz ox?j|Ǭ(j-}6p{=&O\aW8lRg㚆W5 0(ж;YnڙX{iՁ&jj4-mO'koyj]b5GM@RN(2UfMo4 ,0.tWf~˕| R9 5jhtT&}4;ENWi ~A^0ռmNV5+j=:jڒnpz#o-lRu:I\ D >do:\71+2ݝ&{rACH9.α˨G)T$TB@ҬKn#*fl (7i#DʙZ׾:'U1;{sx!jO#F-lD4mTP_ȋ웯F|mF>62fU%[}7w뀻'DMK-LU2v٤(KmYۤ[=!q]Tr#e_s~g}N2MTM^_W/ 0)\&ofa5 0(mWpoPqUh>RqEĚdyZm~!D$D a3jx佐i }5 k|-އ|eRsH;ouk6ely 4a8"Xbxf,9,rEoB%5s7o~dy"iJ }5d'Y΢! V{/΢r ov4̮wqS^)~r_ierB^ls(fE'kx䒰fm ~!\kfYtzQ?v2ۀ)vo92K$k߾)I5Y<'DyRcw-vZ=dz'DS?M`>=(,h}jk;@ OAdW Y_hK W9:+3im}$˃I} /A@8oY!52A}Ϧq=hyoFeq|}NT me0Rnif@6ouD1Adצ;'w8KʄY2S[:"$ւQ&s$;)g0}Gtt^ID.ZMF5tza,{BTMoUkۤk:Y<0';J7ݠі[(eO 2mpfU +eO=Q@O~_s[ZF/5_]:b8j37U_ٸ"\̌dȍ9Q%sN,HIx?A6Gd'Y h)|{9Q%ߣ#2ˢ*HIx˞5 (4 eKQ|Q%@L4ȌIe>aO`75"C.6Ƥ e < ,:^#S3S7 45#Ժ{ 0nt QӔt?DChuۊ(AH|#'` QY+h 0ZbQٔd&k JyK`$10*M\Y!om>lNt}MG),VRs`Z=R1XEA' ܛZIS߼9k̓7]})ˆ=ҷw.{<=Yr~wJmQWtg֭ȚXT}gJ 5qW4|Vrϸn\#k{5}W4gñuˠ͛*mkwU2QprA n7l¶sΝ~7 1Qua9%>xᴽ򠌪kdAwE={u$f?q{WƮkjbv{pœn?խnvx7V{=)8GW5I'F\o UMK%Ke E5l^U8C`ݿi@8mZ^2v1 =m sYU7mjЍ?g]n2ޱTāv7t?b O^XL㑟e[9ǾCK׾wcV;SYUz_ُmJ܎`j~ٔe L/4]nτȨѮ+o2^Ow?HMH AN,x}/O59 ;+흖qr쟉Pό}sJ ^Yj!¯ z8N_G7㸬qᵰ5 m5;e̜m0O>kuu0pb>(6Ń &RjtWse!oyDx;cvu2j )?#`FƷ @6n+}ʍ}y]kV\vf 'xB}d91v}m| ϟcߞQjCA jP/ʦɗÌc7r9(cOzʌ GMètDOǞj"aVENFC6x#4(/iTd0]<&L:KE0㪼,5Ŧh|A%Zc,P۾s'3a fH8`m?׶m|/aƻ;Rn73V"/a6Cmw͖`XXqEI~0y곀?)1YislfYt2t"fmNT2DOfZt`dx|NTɗpeJy0_Tiwu>d[E?2H[qfY >'`ӟJ<ӾeEB$4g>VG oxoUyklOSz͔7ITe>Yqwuu{<Ѓw{r{Mc ;KQ≮NBfȸ≮RѢx_`y`fL/uzh<5X3$k"ٸhe&(NVd֪2 ˞^;f ؿg˘,RNp53 I\T3}#q V3* 5o} YyHe=ej`.J0핲a~L7!6o3cĘJYt%_ *owIDxTb{̺;J }[ݘ;/4gZKDTA2g/fH'`3ˢXKi f.nhXp_ ݽiT<6` ,5\ RSEwUDF;~!%6CXk&E?fa8g(}$\̊vȜ X{A*ShA6nտ|m DanܝOO"Ϸ><8699MHNԤЗOw7g~ĕǔ.lZw^7BxLW nۢP[i2+u:jmOWʿ|Rcom9-o:0̌+b # %۫\ -=̌$һPwX팪_ruԃeI ʇ4'd ;9YH+3>o$&Aw̺;^ xIbCVǺ;6伽ʌg|hyh6U2M" 1^xzkA+O [3ew`#p+ ^M5E_ޠC ? |hnjsd!b/e/DXEo_)npO LC죚}xŖIрg{w7obx} z~aTC!|] Wmd+\ я>~="GUƞՓk+Y 'pޤc~{ݦR!мq;~0;u: )6ńm׷* 6r0Y< 9zw6e =DxǍ\B=Ϸ= p }j5{CwVɞ>fvUM1P4yڿq mND o;M|k93#Hmw$骆%!R[v{=ոasﲘ1@qCc?y^W˾ dc{BЧN#Arq^r.e#ks)FvjtUXp {WM:?S l(Rhul&6PMCmyj뿒܍_s7p34oOG1 7qr mvm` v\[<1k4,WzqocO#s>pa0y|zlXNjPÌ>gȤx Xπ P5`E{M_Q_5f f0Cjؼ䪺ƃˌnb:hu]- f ?dB0 8G}w3p"0xrW4?Pnʎ+e!1L^gnFwBvR8ϋM/6yߨ|:}gYn;b6Huueh)$f=+ wE鐍xfp\g_TA~3w6=o:8{bطbx>8i샚z^CnDK/`pʝp;bjn K,|`JLn([G2ˢh_l^ƩF3(d-*S0u2=$iUd]񩆻N&Af]zSe*QG^diu 2ҕg!6ƧJ fUz |uDO]4BOܻc&SΪLꆲ4'הDxTĵ`&5t9V([ꆲ40/ӓ ,{w-fQu4/-:h=$5 "nj{Of<@6=gc_ն?WMLFxt\J9Q%k@t fdZ1S~˘n,7d7")KQܔYyvS0a!3~Qu<<Vj0̃WEkCjO)9pȎeJ= ௪׳x̔{J_|jgZ9V3|B/sCNOV<0PI@7 p{TBDy8E) 7xsY-׽m7Q ]K\uϙejgKqz :eu)Iu&):88w¡ۿheoAv!B틀{'!`l7YWl2D.q3/"rJ9"N9`FW A2 p~!%! bw?+.L{\Χ;C`,JRyjHFoQ)ʢ45lՎX1ޡ7rc3hiQ-h&5l5t Wv+5qզHɪ>i\2:=K'ք `,0&@ eeT2!)z*o aNe=EY) ʕ=n(Sg@ +1>Uה&xgCٕ`'de @H/օĞ7`L~p꽕EY?ao<m(ِ_';@7.^A[1^##iU(L5pA)|h!>uCYR?5wْ[E1I[ eQ>?l\!%+1[t?6%y>öIٲc+KVPFK4:Z?WP/QR ̝r}ѷmV `p]>ߪNBTkdK.Eu _1!, QϦ%.0-B:]$'7=}~2m](!&'DO-A)ސ$Z;OK~^/*4yyJjny-@Z$k& Dfog:EYdܯK ]pCٷ`^e6)WGA==! Ѯ M;.BJ}Lv*3CL/ʢEΌ #9+2 2r4l+ktߕǝ3| d5qrІ{- _,Lf\oۊkv!"d.ogk ԰G۾s{ nw,A'Gu_FAR7 I;rG͛`.Q'['#Y&ta+Ows!DyQ %O%)2{%o ךwK_ؖ~0apI59 sl:6iQBAG/{IuߛT;/3E r $FZN1JzY`'emh%`jyYIU]?u YVC[*1oI5t'xцxz,q&5W`xSA#oIM2.1^$t+tKť߿ҪN޶9{y hoя{ C\Ӑ _eުfxțot]=OO;SLpNu۶5])ȗ%*O銎|7ґq'Yfjvh6z+n w"dykI ЭNx0n>֞tkPYr=z-7ob/m}>|}먯M=k?Kz O,ĥ'Fx;_d{ݞ6oGھoCמ߁f9i[^[t)th}_{UjX-_|^{1K<$Ǥ讈0TF!WV:UW=1 ez=`c{&ij4a`H1G`S[En nX,sѺQS>6 Ź1i| 6/i7]#L{eP}(_~tL;ufĘ4=)x23p+ʢ:,2PK 8Ju@JXٞ=m(/<_my2^nڗWo^BŸiL6o 2]+u6͓!7|[{Б%I~ekBA;ʐPhG͇Ƕv{f#iRd7̦4Aކa d͸6ꈷUӴ 3)GHM@AzmC=K4ğxk| |̦:jU0$Iʙ텚=9JLeԷ4O@>|kjxtkE~7~u( |%e=? 㓞n!^,y}*B>у{` Z|rx`NDW': ꘖaȯo2 n8F&3zb"K+~=0dɧxc^;H{kX77]Oa)cxb5{=pI\$byA;jQ xkH;\& zJwt3l4VգW‚ 6j 6K&.o_;kpц&_UJq^=zb?)_q@X"|DoH Wzړ7Ucr.ȨzUfj|m5{[1#-у;byC|૎r?yC1OWd\'0iNVX`|޺G| aujKG#mOۓΔ,, 7#0N+M=#p.znڅ3lFj 6vۓ{ѧ)8ǓNzF h@݁]α|*2i5־ϾЈafrrލ%F|o aAeN^hRXu/駮S7vhsj[e /wRǷD@n0;ߛyLz[*-ac$sOmÕ@ * NߛᮞQF^ @pe +pHrx@ ࿑ߝ.@ tE g;@ m.@ t.@ t.@ t.@ t.@ tLg;]Ld6;"JfԆ(@pu YBDomG}Qsէ]Y;]]Y^dgD,Sx _vPij@kW(3P9Bם녊*t> \=|'t}zMٺ'уw]3<ߦ=>tmAPq֕pSR_(NRmǣPx҅;n>uۿ5&}'t}U J߭}u^stKߑ[sBAE3oqs)@Di>K ל4vuh!@^e@D 5̜|&t趐>$(2xȆ i>M%HyqKO> _ox `R41}ͩDYJN:  ZHBߝ ͅy>}w oC&ggTp8ʿ6TB43(ߎtB_N(Qyr pFD軓P3obeƲ| B(y)E#t}(hzvҹ y#^N׷0;f׼=z>1eyqzCG7gd<N"N%J,Bߝ{\}ud}O3oTy^;_1;Bi1!޼B׿=Mk }U}.B#t,d}>1Y= t٨{[~ /BƐ55gNՊM Mf N"Yokz*ϥoG9KzV ən(e?]4QNGZv8~@?;A` Q<;$匕A<,t}zAE^䅾;Gh“;L[x@? 6n-m}0@ .8UF @ ] @ ] @ ] @ ] @ ] @ ] @ ] @ ] @ ] @ ] @ ] @ ] @ ] @ ] 0DHD v::"ZJDyDw#] a A&ܳ@D$W@ N@ ~7$07AWC@pj3sxS"JP̅ 32xWz x|ɶ#p9nY^d:zZӞ`E.R+aLP~+#%*iZLq08M13)y 3]I&]Zt4n_yjN4k "}) @.A<xʃmDt0yau}T:txg&Jx W 'QS 46uy6"GSypzSF@p85m^N\QP̼ۙVm5۠t|x{g0N7vDm? !d̅lM A2@l"J}[^|C&/ܣJ=z;tlf}*:aJf^)=Z$ A٠\HD7NDk%2 ~x QrhDȆ">A4[]1JZhd;N]:!d>͹W@p@ t2<A| -ۻv#lp!h9~QMW>lev9.S|BFsFU;rrSYQy 2|yx*tG~OVp IooA .pyhkQ9kШ ٤H@ _koYKy)ՂhVXUh@8/o@vx CI>5@.U[rѷ+>S_H#hlkuߍ8`@ C\_$D9\p B9ߢ_C Y`^d_T!D`/; $7:SBxXe= w` mH"gt@vWU)K yN߸Ϳ_ (쯔<ߺ=L$'}FSyu4TU|\~|=[l+Ko}F*@ /l N>@ 'o I7,0rta4+Lg?iPUiX,{_CV$!-[Xem~Ѵh*iҲUyv[=c H/kݻ#I? yuobnXkOwp-iyFpnD0#v#պ׶ )ނ m6/yV5+͛QZq޼$O2 hiۼ[]~mۿP.aM^XV(SmC=6/htǾEe4_\b I ^o70["UU޳*c8V'[ @mk]_-o(Unhҫ/?8ݢrxU-f*g@ޝ"w;mjr9ғnjP&wzhkBXZny vy$ow{.&J|[`[ >VA]_Ӗarl{dgR*Tfq[/ʼ&y[;WBh 1cok ط>ok UꗿOwzWo ?Әi[#\69GopU5~i@Nbɿh? `g>z,{`m7qq+XQo| q>"جAo- d|8o+{mb.hpgՔ< @D\Iw[}<,4ٚ򯾈98q=;#Ֆcu[#2^\c?!V\rgzb[-O2ooRWuېU})CCؿ=f\ ZUֿ 5, 啒~4A[ο3!/H ro毑h#ly>a}Ufr vg>-f>eϓiƛXxgn2Bqx>gv1 #x%˻^gD7g$$3| 잪Is̥8ǫm(P8nf%_M+Ftk@. 8mG 8lu+@v}pW4`HM &p'1/ω@}㏢k%_cr<%D~ˆtȩM"9xPWy˚|'1o|pJ,WBWDS.M|@7(˿NKNOmC;->Ie0o+םk%˿ХeEyDhPWUS#Լ_OnS|_+@`I<_4rV`vo4_):^Gw1߃ 8&sNqckĶ$?$ ݜddM+>G j@@ֻ_}S/{<''E>M|@6:5`'!Aq 柳z˿BOU&h.hpv ^<z8˿34tb_ǡp@c>=WŌԆ }ӻ* N{o#@ 7][4&ʕB r2;U~>gS -.dߢO赌v"\ߑXgzsC5og hRnAYe;_gtȁO)Ė@sFj?#[@\4$~i] 5*@B`c7!m~Uj&~g@ iuK뿰:^\~($m-($~:9@ ]mk{v@?HVWVߐP{@6U9 lC? $wmWv@rf|J y;CK(@ 9W/I$~%ʿj%N\@RD`@Bo?@p_Gp^/?H&@ rx@ od%sƃ  ˿S P/#싟h{* T X.JW^]^(r?Ъ ?P^(r?w 3@@Y,_G ơ?3" =S LwiMFpl*޿3b#zTu[(mYcTFld*].٦@ [{kt5v V̘v8^͗P L!Ld( `z>RJ0:YWRG Lk{ߜ:@I^*ǥe0}Bh c^cRIP8 r>\GY@ܺo}Oz\I }?OD9fOğ īm&SF2S_}?a"5?IQ!8 o_w0!FѴg?߲ͿC_z_UODZW)? jߦ/@\,o'm&?o k~% wCSpKD9=;0mg_g ,e ^'@ y_?@wS 4㮶g&Dæo}=l/̠rAOR]Dr~G]Fq/ʿ@ <)ζ@!W_O<=ICߑO o}@n-_OJk*O pImM/Bw-}YI#hMQ pj5LenkoLC@7y~1 7l=3\@b0@_B]mG+_Q \/GA`KG!_ 7*t@ o\@ g[ƁB,hCAL(^@ޝrGT@@e^ͱo zW]7#QW5ɿ߈,X#S/>VDԋ>?~Ӝ7z j}OS%z b8:h po W[*AN}mtpG@5I%3?GW4[_ yb~&h5 VouG8}UUJ9?E@xM@h7~@vd8sCm$ =8#_Q!I߁spq pu@7% poZ@_>pB nߴ)!OMx,vl { 5*\DhD*K<ل#=w{O6@`ǕlAz2CLU=و 6ʿ&i_.> Er%3=#_rw~Iu0@u[5 @\kzr3JnM[OMo3_#7o)!X?dN[߉CeAIk/]>Fj :Um 6jrS&@-)2GPln׈CM,U;O ?Ow>=} 0u=MCx8 cF_*2ƙ0Ό@uy/~6:0NZsl%oX\m'L/埗8@hʿWMeTT?[nQYzF!,B*"Iu=,JfoqF\PT0|)_l_>@e׺k$20P8<»So8" =_-PP] -oR @]߯o3 x'L"[_2! TUUuwmth01_#9ʿs0,5_m)y( @=@Wr  Sg /#Dv?> W8?1 r39էh?{;a/ ʿ "2DNfg~.>sL~<$w&},٨~cw r1{oY{\oL߲*)" $E[t_?ʿh[&K[}+GKE%PNjD_NN#Y;=AՈ>'"\U}'>Q"Ǿ~??9A V9f?wrL>_@*hHjޙS)@"QSD_/A[Do^ӄDoqWW#5!U{[W(EC"9cQkGA"5mSIAnԃDZcw˿?d GA")Ͼ+SiD"%hAUeM|(WU&c=lʿ<#iU T#y[Lh#ğrz`]~=SϳHnҵ׳`K`߲#@Bi{SG%P ,U@03 R?HTQ"6c_K 36W7XG^m;q刿*pE5[7)Fe0Pj7*4*TyD \lm6_K'CB7pDzq:,3k_@l[Kepk Hev_XU|Ge"mh?/-p^GIIF{ɿY56䟣37>/s U++#3:@Dkz[UW냏`mf+ U G (UÉUH@sx)"4pT3U5-!G!Ü>@*nenK@r~=ę. b bRMb=a6 "ҝ.?DhW#E$1AD$c>?@"cW#D vx g6akMF?b gY|)@<~~nO>#ģng<(68$ă,l3= ُck(px~٠ȸ8P83*8..R#CV@<`l@ϫ/?En=9ټ-. q۳[#p ; Ǿ~čP2Y/P@.Յ"@ܤZ~/?!q9ϷF )?!qׯ# Dp9W/IDAT-C }*ڭG"űoDH^>zhz@3p^ۙ\Ģ,'"Q7]AHEUq YO? *7"'o뛾_?Dwg񇠤d Zfg?( pV9<l~lC @B>6f?( ~_;f?|6C d>k?DNl ugC8z3BBL{_U>!pRn߯󭬳 É&)ݙO?mý@H~- ~ nGjz濪Z' "E&327@\Զc(/Lf?ǾP! X)5춿> Mط>'8k-A⤝ 8&B D┷5E[@0=L!q|/fo b@Ⱍُ6q qط^-cvɥs FBPg c#o~ۉC\@x{<_1W48R!: f7}30(>mC2@x%RAbWo"gg?(EbӯsPĖu~?Q#6()N]|nOpj)v)@ Mo%⿃/=@,f?f1AA Kُ fxWPfHt_y# -/D7m)Y[P! [)Z59mR!a υ6"ѶoA7 s}Euh)0g}CSC}b>b? UU_|/rAyϧE+@f=[ߘ^4YoY_?dQ[C Uo w'H)(07S'x2'?!b3} gd֋ f?;0#.~f"1mK!I`6,)}jQ`&?#Fav7KkҏilK26O  s-v0\BmfݬK0YJB@ohw(wYl9lvW@&ol߮/bͿ ęI{؏x_ ^, {?ҋQ6mo (h! 55`η_{FzWUI>-wj~n(GXɪ<^-̬^{!<]|77+TMN}UߡKO)zRe`Gzzُb=Ο 01Qc˝d1`R3)p2LɅُ˯O6" 'ߨ~@0}'D`*׻{v$?$$1iv+q$C@0 fͧI`?P $Spm ߥgf ikb=o$#P-$ 2nG..=rg_!:LmY(^n0B7 @*GKn@P-g4En+ @788AA{2 _(rC*MH{u)pe  䧏\zoTo_߁X0J63^}YOz7)-W_,C2 {`I Իseiy,:_p ͣϿ > '~jv]}Y?e0:}~T;0?gQm$yn~`c~W׷ط_Uշj)O`^WuW_ ֟ɿZ3.2BƬ;߰и@ Aƫott!`3}5 h\O"c5g?(0Nw8p.~3oO Bgl5E_*0:kʿw~@جfcϿk{t[Y8ݦ%lPd |1y_K|uK[sVWP!o`4ǾXK? 1=+ߕg ж}7䟫R#Zg}#??`@`o?οy ,;$ic߫wE}YwyKGvࣼ H.kl(McW'vOqևN..YFG8_CO?S3ߑƉ?xz?8[Wcpxpx{lpx}/=߃B-;"o3]WlGJw-#GWh_t_!S~ef{ Ϸ/?@7<Ǯ?@ طS_{ _$I`P޿:?@owDx^MG`l??@(~4H#iNΎ};B!W]=g!Sow7EA><˺8"}ߟwo3(ibG7 c(^˿^G7!%5ǾeUUnonCʩ^⿲ jfnD}jMG?(VU}c6nFho{6p;P/]Uf#/7 ćicLѤ1!-cL1yFcd>D^3kۛLgQKoѵf瓉?aw?xx#M_ǙDUvS? bn^|D4i[<e˿#@wv2Gzk?P5ُﻠt wT@nO}u知y_9T!7촿rV[>!]owӴ] Pm/@U?WK$f?\h=A\V[>"W,x'pa]>#6_? -VwhЍMfH 8:4*C/RI"[ yRQUdS/@oKY6oj&T~P *1|$D.4߷h)ElC@ja}l'$'Tq-^+@$'Aķܯ~8[>sVeUUȁTiW_~)Le{(\s[]J&}2kVjL]>ivǾuXb{(|dUU5$2cVcLyU@ /#e}[C&|_S4)޳|(pQV~ُe9}T>k?3zRy&-Ϛ Sr r_3k @? y%Ge*G$|ߪ-/$dHd&|-I7GI=ow; .? ?I7߱|$(|-}>? O`ն;߻ Kro? ck@ W[*t_XXu=%|}Wl~Dp}LH,(ُv?kIb#4 c7 @ E;dpF$y;4pܒ73>{pqs1@.}3GQXQ/ُ@ 7AFwQ8Du.f?>u'郈?nZ5k'4(lQw} &ؖn [;}`f'O_z~|}}K2-j?fppKMJgRJM~w P8IK~~W~?]K@/wiZ𯬪s`SU GIWU( @npLS>m_ @.`ό;)Mk?/LRB_R' FQ(Q^wuJ@$Ő Ͽ'E~Q'߯7IjM@| EK6G& '? "d-@@"d-@@"d-@@"d-@@"d-@@"d-@@"d-@@=C3R)gzFP3T=#% UHjzg)C BPTntK9~3RP٦2g 5=C3Rm 2 UH @=C3R)gzFP3T=# @|U%Lѳ,Fxon FێP7\}!bQ2NEݿ߬rj!?haZBK %?~JPϿd5=?jbEӉ*J˲P=J)Hq"HJ IKIJGJJIKa8y"Ֆ͒="{MvV.g)Ǘ+;-Hז,L_~NAQI!ER¬"CV1NLMZ)VL $L`V0{"eyeg :JJU*[5JLGU殖֢HVQ?ާ>حѩ1͒fX9֘&YF3U^FuX6m]}G1Չ93 |UҪU$]nnCM/OS~~>&   .y݆i񍪍&v\mu:ƑGLMv|253Θ՘lOv19|y-Եl^Zä́UUКij}ZhlfSoV6¶vʼn伲3صs-[{'BCSGhGfhgWΣ<lqu%V>svu.֪MZ26,b&*4r**4j:*@LMLyl,7*us\m|GD\bh$jR|Robrv`NJA0"`H*hL֧uӗ? ͌]֙ՙdKd'eo޴gTcOQ{rswol m ڳMu[NO [A^i۝;OrR V (m? Yrˆ[EEE[?Xި%%Ga%oDDiNe̲²7Yn\^{p(㐰­Rr_bULp]u[|͞iU-x4:zccǞ77Q'z̚KZ!'lsWnk]8q/v=s}ٚvZ{aԱC) _}ABEKr.]N<{%DƞWzuW8}nX8[[Mowf[@;]wv8d3tyoy02*|`a׏2-<>+|"ߵ~o /ۏ?yx??'󟓟O)M5MMqb݋ɗ)/f 櫳/ M^̛oy=}na>|ZZ.V|R?B,sMT cHRMz&u0`:pQ<bKGD pHYsqFIDATx{\Tu_0aQijޡL3e=]rkۅvkW-ln߬n fH[eISn/r|~ϑa&0Aƙ p~ 0 0 m4aaaڇ;0 0 X3 0 0L?;0 0 X3 0 0L?;0 0 X3 0 0L?;0 0 X3 0 0L?;0 0 X3 0 0L?;0 0 X3 0 0L?@D @}baaw"$"W[MDfo0 0 ô@{X@]@D)Rd!@>0o0 0 xއ90Pl0NvbӲ(=QQ 1 0 0L/2u#Ddow1oaaĀPl26!D2]" ,aa>@BF: tFv>_މ=agB 4y{G:{v¿݋?awBر'ܡ$NI=0yB%>"*By{?(J~HOƴheBdx{_:{v¿۽N[VV!Dgf%qaaĀB @gNC:myaaO0;9&S AoW-k 0 0 t#ܥO+=πY/"MPIk 0 0 *F%r1Ns5y#h 0 0Lf U)^% )c 0 0 pե!oY=rڽ\Ѫ7H۽ nh@0`hP8[C7^BCR/בރ{Uh@0< 0 0 3`aޏ "*jo0 0 3`޿0V}[aa ~"CAaag0јC:oD'0 0 >qQ& (N "Jw+h1)Di"MD)Ͷaa3%@ьҠ*UC%PWβ1r}q 4!"S;fW 0 0[e.EEQBDYPDzJBb"ʀ"cV!DV2 mw i}ܥbCga`>@  ů^"fu=WOw&z]`ݾ7aa[a~ ->Qn#x(@<]Wj+ -II3 0 t+,^T!DA KYaalSR%P{sjkˤTScaag7Pw, ~eʒf')] 0 0 5*s#Q,+L>ZV/Xn((~wNda%Hq泜 !Ued\֩ 0 0"z B}3Bse 0 0L/waapgaa~ waapgaa~ wf@ByiFKHc.gamX{Q(E6Bʑ&ƸingNl[2fErLL1 0 6Cyi WN it3ndv.s7ϷH^V w'ў+aacHu ڙ E 9o½wȂ"(ۢ!D"N N?y3$0FO A9>V-JܯX!Db N1D͓- ";w 0 s@yi)΂"y @O=qإbR\|&XBPv{cK\eA1]WDΡ~o0nIPcWNVsWλaiZlrル.d\*&e,Nv/,Ft/&N=$ JyBĸYk/UEaa9NK{+{c#*v{F+JD;?.gmppπbIwWtNCaiO}LeǾ: Y{DzhH} !º҅8J'6(3WewuY.ϒx<F$ ! (/-gЁ0ʯ{p["yYOaa'<@6Uc'\in|ԒW~|$Ē~'ɩ# "z B}O&Pİ nsd;\K>x^UFObyiUDQ2<J,M;'7[aa o. )tVL(7C'@54ߑϏ>Q#=д~g1q6~zr¯6X{{ 孶7netx`Ǟ&_Ȳ~RqQe+m{>yc}9^CN?Y܄e!(BcڛtԷSs6"7A< ؜}ury kD@ga=ԉC>\6̱kksfƝŕp2B%.ۈ(Cn kRoҞxg0 ' _jr:K[(YWNo6yN\TL*)[TDyipf'8/@1&^d[cM@x~SwD[>̾~ZiXp˿f}Ds+;eŚй4M'Kae;ĠũXxQ?$*ZUzWfPjW¢aojglkI}9@F]R1iG\| XwI>!rnov45TjN]YS[W7Y7_N+TXX۫/Jk,Ltpo-6`־ ^ J+((Id*ᕎoߙUʯ\!DUTkׅ;Z(aO,?:$Ο~a;{/" 'VWu|xOk4Cfjn)Qͻ\XHk~5/:=*MGݮwt1] 99 ɥ NH "PUEf(,%QA~&"}aP,9zTX7M0E802ݣ\BTu3\+0 0'(/-]$/ iCG|}K/p=Րς4~J 5D]XLT' w%/Ə?Kl5ur ;',B`NN"ʇbyQ;Z]P*,8=D}OwEcԜ]E"(O띮(#n\g(=~ l.0 !L#z,}:Cݶ/ƠuW=9!b|<#␅1'6k>wd}gZ5a{_cL+OW ;y_GcXۗ>]`AOb@Ddd7/ ^t [j:W-m;%ui6,eb7[owAH]aBbC&?m i\A;373KŤ8*Kˉ4 td2\u~Q?Ф#C~q> .潡b_ǗFL ;&~:mlڱcSML3|_4a>' (c$B,q'V!DGyOa3s8MǹxiBTCyc0 8fAL V~X*&I3pvW;jŽ5ڧ'ð` 8$DwV?H/XEh>'LVJg|!2]bh6vާ,Gt;0 Ü RQ^ZkuH8f?';#4DVn-څ㣩ʐ^Pt)a`FݸQbW4~ۏ]/A*t}@K\np&$eKiZJn&Qg%j_;0 ӈe% ߓx9j>1Gs2iV4>zl.~^X9"DkV  5HtR>s:D=Ԁ9C ˍRwxZlޠy9_f闿cn!h 5!BKݏ@3p, x6ϮM|$a8l};!y !^:%{Cm6`B\UtV`gaz "I)I9"yْOzyw/?c57?w~< 7Qa&Pڬ6J [ibMucۏ81jIT:͈z nhpp}w:7t(*%idVcK-[IEN9#C4eaa)f~[ٰ_.zAĒ)yi%soh[=YFaO |JW%;(MwN[w^ƏEIM I<Ҏu3Dheϰpgaf`Tq4⿣ Dٮ|/xsg-CuCۯ}ΨqTG8=j/>IK$[+7D?RMĈif3Ra{e={T78 wRαb_?u0kxa/ðp:f(ePuy%(ձBbscKo39(!=goRTP12 0g1)/-Γx[v'ߏ:Yxx"3%|{Mco.[y_Yk압At T6''pejFr/4z ыb݅#!6=˰p@ (Zg'4K*z!*B$:3éUg"""ySU;=E.OlK :1>_ae SI$S:_F4!cٟsoس!3V>hN2mW"jIuC4]ckmוq!BjB@pBPADP&pMBBdqF9.R:fBjRtRBteN@G$y$v&z^fS 0 ,⽆|x7h}Lcw65~ o6#^6 c:5:ڗ]7ۿ}%7?5ȱCIxc_JEF kʌFMTEfȝCHfoo}SF&" D8#NEȉH@u1@yX46Ώ3e+(rq2 X|a@yi)P(w_T8ۋםI%omo&d4Lw0*]`o_36o7vLa LoSTÜ%9\_la_TX#B$/ h#q޻zQxٯI%IFMS}F4~wvȍ&?6\@ Տf oasTdG.U =e,0DCDfE v`U՟,3Q˜89ۣɡ 0 ӗw?Ǩ_қK^1jtDžon(7paH h:|M9@8P %rPao(?`{&-QkyYWҡbN r!DeϜ#剞*˴7'Z"5[IJ2vv6!L@tgikbVaaCl.Zf5\oYz7:1Z Ź&@_ƣ!># nغ^KmEEГ~ܺ!3_6d3=cc/,6_TI!"DRvk55 J(zS96"ʂRq&J^jE`Dk:3!0_g0 ӟIV~<7xq+?꯾ߡ04".RN8'U?z>1>6R W>wylDۈu70IW7zADs.-LB07ƺT1Z%:iB̜.2o'rĽQ % F`^s[7y  i]fg 0 +=;*jx,yyRa)9{t}(Q'r]rK2Yى>al\Qݸؽ"};wޚj!I!C;|c0,&P *, n)z !b6Yn];4~D<'0 8"yІxovYuMkئ:M]k/ԴQ }Tf'G_ii_7hZq~}y/lt8pB7z]*7aEEsɩ]ɩL)[I|iɂł= 0KSmҌ8wuvܢ ja[p#^/tϑ;W}>?i*tp^;i%; W>w.~0{tز"Wr|Q½}689az0 svTXnZ ]wr[Foܢ .J>),7 v'6th]KU0V+aK㬺>{L<}0pAΩ 0 (/-R s'84ߟll^\ZРТ7{Y4~㻎6} z2[PaAS}ɟ"5OmU7d5|#Ipx[_BGG2}3 0 _c "2 s漵{kK<@eHYTaDUPQcj'?;tSꆯ#/0t5&jAY1ǷWy:=eU qGar<)[If:zk6gNgn-[ZЅv+闪#ъKŮUMNU04M z'>~zɹj|c5N `hC>' waa cus4̞xmMs_}2ӆ;Jk5!c/mO-?6qK6}|,' Fb\UpUM1Pjwa+0LEM<Z*WN /s?F$oz7Xv H6~Kֈ57)^[_[ yFqrŘdDd"$"J'nG k>3 ÜP^ZGJ' Yro>޷t4kN7-9PtݔK 4h`Vm~.jVE>WQ7ls!6ӷ>9h]Y'J'T@i"0>-ڛG2 09)<}M_ek6o J;a}F4 4T|WKЌ `@Uʦ6UFoQW>W7\6ՠL Da{ J4A)SSԛLPĵ Izj,0 øfSqT[^^Ϳ8/5ݸDlޏ~_|"TѠ 28=e/ƏiP`ǯwg7xvҚB folV>*"X"R}hxj JK話 0 s Ex~@_=2F;gΠkHqc¾+Ciٰ Ig6pٴy!)3io~Aa`B0|&^$}N3vAGQ: NE{j,0 ø->y867>ۜ8u朗 F\QlCR94@ŷ~9L2o>%PYBS{O!ypXaqv!(/m7~?اfnظ{b?mؠ+fhBP@\ |d79Yաbsط " @k00!Xπ"ӡDc]X'zDp1 ̭<42l[a:@J2-=\;}}sU)U58}v?qUnL5^6#Qoӿmk [ !F`Ȧ](jy7;}MmxŢ-hzJlg f C"Qr(AQ@D Pcڛƶqg:Tl!a`۸iOsv"~}6Ov}c'VԔ:0^GsվxݚFhPݴwmtxp0W7C럝h9A?o3E~O>>}G?kѼ#Ov4o 0*@[%3 d))FKtgE4]&4q=8a9K4H^1slECF%0 _[;mqOMfɐ=%tޯiMߍ l׼H !?i  Ja/ËGT JYt1ρ^Qtsg? dkǹK ;̹cx=5a9 +’W?uM'7D͸ ׽ytמ;&@Du}$j (| @])ޓEpgTZ"J;@Z d $DP yJ(sea E\@b5=m k1t)*K\@(eJu$!Dl)`B[]axQf*f`jiJ2Y}.{\3OMU#gIfC/+NhFޤ8Og vo\/l {g]tbt:[{EEv|1PtJ މA7!zAW{ ${.q*+n P*K{{]P;a}`)"(ݔO#~8 n10 )Db1&IL1}~EIŇ>_⎲myp'|M7;9x}&Ya E}ۄe.{%sBFiˎEPxkp[4rqK "r8"W1R- j B\Ȏb%#dW`- {1Zg9H VӡUD4%gIaa%(ںGI*,7? Ȩk_]~sVݺ ~sβĠ^eՒu>O\&2  &C_!p{Ȱ I(bgQE} OT*'PdS @kO.s*R-nc9dN@р^ wvVYIS̚ ϯ|f50 3)OyiCSG7m]*;mcDPZĻ &vtoXBʆ@}ec 0@D$/R^Z"g̭jl"o"KG_{47waO/k^A {jm{+⯝'jXl7/ZyjIRa.΍.TKLJdQ %2YhmI VJd(TZTF/5R[zD4oۀB$V>:BT/<Tq^D$();L 0hOSV5{ձs5~ }ԃ# 6tf3?DDwQsL P%Gn{ffl9->m4y[{ gub ;P(K's8cK1w ^Y)ӟ/& 0c?44ǯ}ߋʛW5쾡HcE]1Fs^i 0=⬺?vwO4P wJ.5A6(ful;EQ% B|.z؛FoO0r-Kuª+r.v~/dȗo?՗^":UBبI w92Ro_^c̅ 0 sўx?a~u>OL>Kݹ)}+? L7ETӤ7 DK4̺'+}}'kf\m*iA8n2Ni)趌P%y2nwJaocmj)v/]VjEXsDK$SH(*YZ8ř6RItt z4 F*\3 WI*,7^QIG=#;ǡGw"8 kkUѬˌ4#}'b\_? ~Fq+Bo }="jb(/{j7qeetHZjqG!Ap!F@\)뾻F21*rf6EXsH\ lPȋ(QQf$ݶrK\V(unneV=3d tm>}Κg/TXn\ bx_mR?zr4?C75"gm8'h w@ܦ!!tLB}0},,t<l ~;rSMdR-mݫ@~'J$X+VܻmvcHao/Ez D?Zށb5%ڑ5PٝySmFNXojU Br ׆u%^L:!Na:3nnDRayQGǂ3G7(jqƒĝƷ/ɡ3_?Xe&ȌY>Ϟj? 3`x{½},=S$=U^!T)a}3UPKp:%ԛ6,Pr@rU}j)RG=EqX$tL|0Q^K>7L;pߨ^=%8},MPD{[I7s{Xfm峲ʅ= (/${=[A;s۶?*֞_?y?mOaݐ2}VT_9'{[|(HhIFm.|=cXcV;O^b42u"X\(U!M&OrL(O]iYP.&P"%]IaoP^Q$/hu9 `u¼Ea5TI gwުUK%SIE%JOuc|{aL4D)HSqDTk`I`̳s?^ک4A&m_4%`N/xf0bn5g+]TK ,vZm m#R'ɐ 9NCL%@/J"!,T˵EK}v*ڊƎX_fр9kg H0p"}I3éDD:f;0/>::Qiv[n~n Ћ]>.ȑm7V׎0 LC^:!6tuv! _Qnhnc8 T3G@>v?mcPlgusxi;[e0ـV4]> @яU_5c}\i>Ӽk6 |(dA<Q!n_߿yM3zd ogX}W4E)%";5aR-F@S̈́d9C*G r94K|EhۿSJIbwMxRUaa M<] –kV%>dݰ}~Hc߿ cz]`O(_h|&u>=q BXȰ 7iJ@zHyN;.F=E H|suJl1 e= vžsƶ?xW;;MHgQX3 0L!rE6W?5艹\ ~f^|5® @}hg؎>Mz8%kBroId\x&"5ۘ)_ 9jɒ;j7CNqӰp] \NKv^4(;Zu80LQ5{lA1L;.=-L{=pSeljHjfu3]0ZsY3=Q18]R%FOv5SZIE 85uhyٶ J$DdOu U;L\OFՙ.Ej9HY=<T7T]0Cй#Ν'(.J {h\+V TXϖ}d(e+(r:?MO^u} ˟zoGsj~o7w Xqp$/t1UDE\z ٛ!tod\M,&ڙpⴞJyV] DVqX.w`>ȁ"MpIJpJhw4D>;vMoG AO`xY1# xo;j_y걓?9הgH \7sƽ͘A !Ԥ%n45Z%T'@uUN(o*"8맶b?1%*nI4mg "JXTD/etIQ: vƛp 0`-P.g:QwRmpiG hgX t+f`n_:f\D+OU7"yY:n? (-j)b1/+T˻.jIG91L} 26n7ʑsrVۤ=21u[8wwbW`"y%)/`ګ޻, qQэO[3>v6gd3/FEoxyCczK .<0ٻp\o'"Phh)hŲkzY"K(Ւ+Y"MbLtfখqRp"½CD PmM񦷺Q qKNP" BhP' jCkh:0 ӮxA ԴdxUFWxVD#{_ҍ1R}cmh4a(P㫩+7ns*&.ՆD!(ղ-[d8Eҭp_rS?9b`ޏ6( !r|p*;$ܥ<#cۣ,xv Qlݲ}ar,Y'FB^qJJNaʠbl#d .9]V@R/W_/ utN7xٛ0d!x=R^žxRYF1sZwUmɍͯDj6Fdj5z7@B1BK+Vt(%c]>3b=ژcTKPe5^A`Ĵ%僄1 $$@mi1A)75yVPm<GkRaz[~g0|gׯ{ɯ?muA;*tO7׊M_4Zm *٣&@@9Nz .퉆BaQFS 뜻R`P%]dǻo)JEl_ P:mK}cLaޏ.Kɥ -Mb2{sb+$UѯjE*6Ph# %a֛kXm "VWzq?X魌[֖]7fb }vpx42ˊ Uo1?|~@݉|dk/ڻ(QpOmiAҌ2b5ܗBd_R-Evi_@3m鷰p?{P:~_ւ %bFKn;J X/" M~f-6 @! H tY "yYv3e0g ڦiڍÇ>bA5"p7Hoz"~FǯDK:ҔHd(m%JP5VElDo3 @RSO@KQ}`,zY)CW|!D:##}d>:ʢ8_ӠvDqvaX[;)/mxO*,70^QR|Sx'%a~QM7%D~7eц%R-1 B\E}P FEv|?+]()Bgyj0!{]`0LO0=h~qhLT3ÂFyV]tf`"w4;kVjQUcA4 Bfl&TQ/16l5K D>At'[M:d)H9  0 Ebwc,|~ƈ|+_/}s&戽j|V#̨KyoT]CC3,h$5C QjJmRع ZLRwQ%n۴BҳXgZH>کCD_X+x0 x xL7g~^byM> C 4Ln|# W~hN ۴muMϜ͸aڵ8EԡZMh‹< >("'a.bJ =!WސqĝaaΌOn_WR9g¶G?=w.cxǞ_f/"muݔ@}럡# xMHwum426(Ւg~J[!-J[x. ~hXeuLJ[b;0 ø墵[c?=!Uӣ1D ;p7 sMa\?u sÍC57B_'#EP*&)V@lc3t8E{b(~%+%1L۰pg*H)榬%03# nqT8x70)/L>vn7WY?o& H޹/OKZҐ` horX`Gc8Gͮ[?:=oe(N\a lHG,i8U"#Z圶-x&}jya>ˏIw/ ?z%xOVx~TtuБ!ѰΉeΥ騱Ih ~]DZ2d?ۨ#A =z,#hǖZY %2Vr-uN@RZa-㏔juֆ,w ,9DP.DT Ht^hcWVu~"Jt#kZ:*EB>(?\0 P^*' X+5[rC;Ȼoho㪣Ze?l]C:tjNU[PA1NRڒqZpȎϐQv#Wa C6MR R(E|SAidD? "Jg!EPJ| τ"cU!,!L!D_l)a E}ῌ=/n+9?G;~)Wj},nqSPDx@ qP9vxN";>M ";Q3L+] 2+b?3D{e@fȚRTe3m:a@Ι dTASZKDh* \ %ya,A$/Q^Z-{/#G/~>q?lڐH0ƫ5V8 ԷEPqާҥw=Íȷh3Jd/qmigpLi_LD“V:`$/&\|D(fg^ש^G#HK'2YFRayNbC[9ה_!}tCtra$!{ !zh7z=}tIZrT-\%uP7SEQV]R-:4%05azSEe9Y)Dt":zLLe?EtE8oEܝLHچy}% 1ɷP"L}0L/}lĸ7Yw #b3_9P\_T`r{{T=?]'jˆC_ Qa[}HT#2lJ4R$E(˦IPĶ'^ BpRP"?BkbHOGP*$:/*-4)P|݂DR-K|#?C"nhr[1HW[k.0FO w+L"2zѶ,9Pٕ$]+B%SɮrRK%NsST!9QaZ<&] D ʹ7\ޔp39\jU|>3 |h3(}kino ολJp@Ck:QQ!zCOf}p;Z+&:2lCȎ/TV(LK(Ւ$r* (Ւ 6FY3L2]IJXuCWn_CEhy[@D NDFU#edyHEkUnWQ &tcqgqMfl%//{x$mگuLᅦvv_y_\㴚Pۡ'.r]Jd@FYw{J)ιZrQE#x-oS@7@2jǐB5@oG j"x3}iWJq @ڎr&- N٠_50 @un^pN_njCO}$\/0* GR-mDȎ?TƉ =o[O[e(j\Dr3i*?!G>DuF%#+Y3 P^Z.NMޑ s]▦]qiָ ~mO,<`N!l>m- (@,g *Dv|.O,׵xlqd~EO[eEy]i$1d "Pu2a3ޚ߿\T{c˓ 3W/#SN *ꀰ9aՎhg  "ڢc:{`&rPq2%h>TI`! QHmě$TvI4WBdϻHn3LݦSmlg; kUa次l%{҆nQp=&E}Ӹׅ}^pEH8[_dL(Qq˺)TU$e{gV!E~u{sFZ2dLKdɍGCI 0LG jO|h{ǒWNgþ[,eh4-n.[{ZbrRW}Ӹt(Sr+Nɪ sj^ݙKaozADnR!0/''q{pi@`;bRw78ԗTR-A F(OtDv|bkx[k)PљKaw4#H^֡҄wGmuiq_~ >嵹7|c7ۦg~~S)D {Pn#";F{3 lsXl Üx%&ADFY1]&Bkg8 LDvQ&%Ȫ<"GƘDѹ ڙ{0Lk#E!>Byi-%ZprPA=/<7U<-ל͉ #_ܤ!a}$Bzs|Ū+ct.p|6J3a7 ƥCyc sѣɩRD1Ć٫T)s(SHSD(Ht}ȑIj7V+ȑ JSXjr -Qw3= i$A81rgavVRLn⺓CgӧGUCu12h5zhEs㚀r0kQ4yÑK~7ZMiy,roy-[+Z9-tL.ַRZm@>xZq)xZۊA96(YZzD"?BdɈxML(7Sv9D[ ׹,ͩ ?/ @/;0)߿5d5"yY iob9tW/kǞJFtz}>y,ߣ:D)b5BSUDbUxK{ e ^T_Dv|SN8e+ }%OE܉h7!8-K"NOտw3Grᱽ B@YKŤ~a:L^Al%# K|?Mzi__-iX:өZZӊzv Oف_eUpԆV"I)Ւ ";>ֿ!BVPEݕajzĝH .)U{U LGPk ċЕF) tҌ#YҌA',]_zߜcV?yE >,hӞɦmu_{iO/ز j7k2G\eP?:lwYO]?;p?;h; NxI|iyQom17:q3 P7EY*{0rSbCc\{RaypdTĈv-,@o:5N4o_v{$qƙ?z*m*";>U";R-V81J4 D;!Ȏ#-jk-vaStp^NTwWd;4Kl fpwS'.C @K0^ .ܥbQ%I1{ Y0?Ģ /YőCϩ5n6*Pgֺ.+ ?ۜoɇPBMtL0BL@Ka^ Au{/xGV]tjW7#ZUc8UBM&JV~x'&(n9D{m[Ϳ" >MQwNfIJ2. ^qA?W[ݴ3c.>ձ[p~5]dvҒX稀MMuou?.P@eywdSA iIr3ArF 3";uo - UZmaFǓSĥ kESrSY,W!-Qs՟v^4v skI؞MB'8Qazxwㇿ'/hM6tHo׺bXyM!B}Bmpf >@AiܧFMPEv| Zv;}XkNG> |7 Re^}0N T'qZQ%Dt=Dqn@Q,P[u?ut-?|1V؞9R= EdSH^֞5Xdݬ&B+;ޗiv7=ik0xaa!P,(h;4r(˗{Z[ HO[eLN^Մ~$ɝϽ Dֈ؞TO DU<ӂ_}LK[(IF "J^hx"gSwsgӞÌC_Oac}hu}Q!8mYm RhTk+B D~.}L 070҅]e"k V~lǓ0q'.\<>5pahjW6 k^nQeg֑t(Yj,Sb@BPNDB[=S_!:GtStOq (ɒ9hI ΅RfLp@޼x*BfV(~z~0yHwoc?}zS٦_5">l-Q'5: p@4kB=56C֖o`gD?xj*P)rn+#F(I|9b:*)0o{{s\ZPţp0 MV*o|}GUaUH??%?iS+#{s!xO=% ~+cew(=e  oP0=0PmKhXx[J9Hiɒaqva?G-w^TXzAĩZ=?楛Yﱂ g|gFQ{+*C4!PqaTiӛ?oIxѷC| R p;#+lh Lhykib0BuN6 3v )10 3ދB}:^wZ /v(e auWWjTԝKa_}G?(KLc_pԊ٦BkƮ ?LǂkxKP`%͓ѦSɢ";(r ʡX4[(/ji#rO1 D CpzbNjda'odgo=(Ꞹβu!G ڙYPlQ.HOCCu5h(uZtl?r}=%i\b(ՒjITKў%2>D#\'TR-l";S5a^01B'3NN0 "'ody+OAݩ:;{w˥ͽ/jvP+D wxZ +jWFK/ DZBK3K 0^2FTTwx&0奙vZN 'jFo^_uE%\nwhmG"2ێ q{+C0* biT>hcT5ui.Wa+ɩDd ޝ=3Lbg3AKӈe.OhGkׇXa/\\{2о'h Wٟ=ZCw#KDOӁU!>MvMxu;EM\^8U~jɤTY"QFua$=*܅Ddhɵqڍ_A4js۲R[oB2Nn:ݙKJ͹e; do=c즕W-W=swo.}iѡmLxGF"ee2vKsYP_\-a~AoD3HT(Do\~s})bvRu}زA>u, w 2yV߅rC̷?s~x7~4jc?9/ ry{hZl:itzXWf|Xa?cANnހmCH*}1mȑSGbݢ7ak_[4AJ4X ח]G2\aBouNMrtZ %r//sS=V":XLBqWڔ"y&E"yY{>X0".dw>D_}2~Fo= )}#2F#pxpԆӞ (u8 @%= !@8'$astN%)JHXe ˈGH^f,(WYK 370aތoF_Djk]Ey~ :{|%[P߆S܏zq3x)1}hN>Y._%? 6i1qTL 0}ިb/W!DTDDDr lB~SX$/`jNyiq%/$ (/m td o*aŵc5:園M?9ϺbۑF✿bCUMޫˬzxR-)PޠDv|4aoZ`Wl0Epb+e0Lǭ2:V "@MtOSL:ZmLl/f᷅5n[}x>*}OOUQY,|4;4j}oȰ =6,-Hm5NTK=taz oG{ޑΩX3^j '2ۅ%/o:6GԈǞ/XyM!F$*篺fGU|?3{߿/JTK&j[K,!Wi=ZUua+pԳiiOQH^KyiE2QNHe)%kg~%O87k?d⮗\e7G}*MҖp Ο:r кww 5.E9+Ωg D/W}RϿ (SL\U=1r.cUh2Вc~A.QW<1D 虻| S'O?3Ls ?73bRV&{޸n?Rdohk1y|DvkC"ܨAc%RL H{Ȏ(a:sك4A~_EgQQFmEMPnΤ;gH^fbG}|QoP~#@46mP?hPXiP TlQ V`ױvdȩezofl=LԆ2AȇBmaiB@ 2(U#0g)=.e%u(.1fg#Tȣpdvŀa*߾YP_ȟNgn'Vǔޜ꠆ 0 i_bO_cyCtZ,jd(oTDvһ#RiPEge m,0 9bGY3 "r qE]?qħtmPΧ);'i,6\G@ldV ?}3VXg'D"!h| 1 kg0 o4P( ȲfMZ!VÔI7qӴ}^~xyᏌy^ўz7(n\{ZG!914`͏bEjy3ՒPš(u~fr"ܥ]-+(ug0LwTXn<Ç_TH^YPyf0@_[mG"0*B+QۤGT`5) Mx7y-~u~x߽ێ} S-{*#Q]^IDAT !60g=ˏA8taG~r lzWlZ𿗔U ]6˿[xʎSGaX8.Bz$d؆b>LVhVYZn;_g0 %zTNjRꙖUQL6V&P^QP~az[~gVnzg #'o`|Ga}|Թ]v }㭯_;r?[@ _*7KZjI($ǧ 0gDOGBV%: ?zj1U@%D*ʉ(Gc۪2\ 0L4,֫oz+#޲cs*-o=oO,5[7]"څ% 9\lhfnn046k@I< ";Jt ҥ`7BZ#a3G=ѓd !tnVs9(+,!D(o2YI+Y}GTѡڒ*-FG]U:˿EA)qCƙ?/W ♫y Xޙ968}*.]>` 8QI#/:I3ׅ Tl?zLR~rLp_$PaG~.w'2 0=FsvԺpA\%,Rn*6?[h3PX šѷ MZWICb5A6,Rm7ϕ[~89߯ ;2/ܑ6O`X~8k,3~UsXrEv|,Jxu;2Y5laqz*ӕlWS&hNԉp8GMt/vNWɑ9V:B 5lU2LaqT檯 nm7>Р/u̦jxoeV`㾑fx|;r?H8NhN:7cAq=qns[b Bd/Tzj V皬0 _qJEzk+d _!irNNftƢaw_Lp|뗿 vCOu;>w*CDŽFvx{+C@|T4b}#2cڐr H_WY%a @U h[b&1@$0 ĠũXB$OJT8Uq~*H@wߪ8>ݝ=UZ':LRa<ϫ<_On~Q'^(8xbwmsxh>m8%Ν["e6jLp(rwP0LiTp(jj0UeN+(jb('6hCQ,d*>p9׹'W9mʊ5}%oFOTmWz{M".pwtx˴g$T=!6&ޫV@|q.T6ױE6T\T"띢4CdQUpFP0fxO(5TTCiWlۃϡ0SMHD'2J-.q],/bp.O:<[oJp"U9!\<~zxi3t<55}b/e\P_pv$^X'`P,yK )QKP-ǝ$wl+}|{77-P4ϦoFG`UDPn.]}FV V""*\tMXZDejhÐzyՆMU[X –ѿAV_͢>V/M*,ǵK`ۃM ߺ/ 5gۑ,^[_oYEs^^JV2O 70|% jlÉ@Eз'ўhc:$io\0O&Q5)6{2,ڸP/JAJN{#npZm|9j~o^K Z8 L8+RDvX ~lG :gjQ/F>x%KY(zSIQ0}%^E[c sÇ]ptfj-sס'?mο6ܿC6+ X~GzoLX@rQ+UiHPE @O}nܗϡ~].R> X%4mL ,a>:o߁NO[ev\"b%o22g#m@1г ܺxO*,PzADqwVyx>E`2[ױhꨨ7JVӲi0ItTFsohb-m;$0Ey:0&fN;|ENr3OW$"pLr[o~-j-̧a:!2ߺjPJ P LߤF򆼣ۄjtNx* =2HT/tX_vX3l%MX=Gэ;a;8zB?yPOwlO>Oj~S$҆]õ[m+Z,rN0bOKz&Sm|C7@湅fyWҵ3v&eho5;*h".hhvQP<VGpS n?4DW`R6@{b]˕p>[8 yU x.x,Cyi&͈UJ*\,n,,;5+-Oe˓ 3n*?nہ[p=<''+нkU_yi1 C}> XvswۗpTa>D EvY?\r3Cs?;KSx*s}M<Z*l;SMnv=F\dq\";> Dܯ-1,489jKps~mށ59Y(~tKmtt|0sjOw$gn|slYc= dݳϷYCݮh!vrkjAԱr.ܑ_#Էg/};YXۀf ۈyH5Z_~[ǿ4蹣QvMOrVDa>b6+66(URKhd8e 1E=-{X2a~"^%Yw?xkWd%VR D.}<.tE\ѨA!( }g3+ V>ێD )Ǵ#X~*x:{5i5o,j[e#4 `3 |gdSM;cp0W4`eU=]7@YpXw%zԱ6 'ګ>DnCt*EUFȋ2x*0ƾM*u#5}{jc\g7j,me %Fw\)J/6(C6[_=݅F-U*ip\ )ug[)RPF6<E1On 3dgI% PZtZ~bZ `[w" K$ a̜ɝɽ3}^`fΜ{iǦ(z^+9ڗ&ll !% ҅w UԿ`֖"ynKˌkPl,׿CLTհq\$+LpAKCEceܧk*Qɓ0}ʇ;tHchϑ&HuηwB>JUZ$+ u8&7KS7qw4`pyŠ,b 4_8 tK?uK n?H`TtqO!|sn[E*6 bymV,C393or_| ;+-+NZ8i;3-09"svVwp`pR.BBEYYF'f6S.0L)*>$wSd)ycd!CQrDl&ufNug;gfCem?Q—KOvOr-Z;sNr^U )2OR}GN%IrDh-@\ ڹ9a%ݥJhS_/ȩRYHUy}uYrmP7&nJ8ЊKxfZO^Z_ R%djJ Tv} MJm>FAz.(dA[}W ǡp_ƿο4%01n,,EN"8A brQr/HBLRFhp=X'hE$$궧c(aFѵ7ߗ tۮ k]"S@Y<`Sk73J(Ode^v0 O/SIUlTn+z=k ⎿?춥ecN\|&:g=J$IYᙬ ʲH5.J/P m~"`v&S }"9ѧػ4sP,֔ N"QPɿKP9|%Q@q.C9,ГtN[OLo飉>~,jRߞX҆U^ O τvϽqNrjq&V`rZgk}oEd{ hRIR`y9 Yr\ۉѾ6K# p\nbЊ'21r o#(n M [Δ7.EY"HN?>~01ّ\R h/na$V-6 x0 tP-ѓ7c(!IBBN surriV BMR$ۙ&-Nk;wrCۈglc'@$ykŞ3y7[=VnnwB7bb:e^5 _==O>gQv7GQ"xP11Z{3: k̃S~w,=8 \~:㤎4OX'H|gg+0#ǶpwG* EgП@d!s9bL}Eip ;8JE֣qwwm+ɟy<~ d濟rro[iiI OӒrw:צ")pvH{DŽof,+ur#3Ǧ= Չ%500ʮ)&P[ݕtgousOoyM'gr7-_$4+{Dx̑,Ln%54pp3xفQuC9W}wuuUQbʰXC:LѤ2& -͡)_33Qhɫ & 8C4_d3Lc&-߽PN?yu7׭쎋-tef73:e7$,; .~30YYU)Ov6WGϳP/p7 Y(BKVFd0Mڶm֮_pC5aҦ{?%^Jb6$vKLH%N5[ݣ=&jc?d.,rd-q{ш" z؋.E&g唹?'pr ֗Udv\@}-E:\Y2٫F?n9S+Z|nɲ_fGu룞5%" 4[EjwexFKX"q:ړTAw|Yrɷ-S. ,kB'H<-OdW3/v[j3J_ښIyCR+ i֎F|ݷ{݇V84?8ڣrG Byu`V,շ'vbJߧ;x3Óݷ{Dt"kwXKm{{k14.kbppwG#9?:'gUd}Qo84/=3D mH%P֖vw/10MEG(%eom2~⃛{gЉDK5iШhu PX&T9ֶǝR]4lg o%, vkDcОɲ'INssei%aA߰AD'A4_d7f}{IwX<6}̦PgŹbWb٦A>F(QP\JN<9-%XDjRzwIF#9go>xw59?o-]8)S>zEebp+OQҁ{ow!JWg1E˴"v:8`˸"E.kV O6mBuω;pFBsJ+D^yCdz9>gIe15{(Y{k^F(ag?gcYyt{P~)ږ 稅vfxrh)lr-\Юy:7xU9޷;43Y_[]Q֯S 쟃#]MyxO+WYbȀo JZ/cb؁%{D8"/' ׮OKT6\񍁤$%5O,48h8nV#b50{(vפYi[ޘbWI Ȅ54lVdՈDBrT>fGtr1ؙ@Oc@#0#Kz Kx߶u0GA%cӋHljqZՐF!PqQ\}ҷ40{W^W{ξ=q+ ]Q \BqM(!. 7i-88p:! $;p㉺?rX*9Oj]n{f$SWb[E*+ox>[70z@ |;$f?HV]Y<%WƼ#ߥn{וUW`צ4sYhzm)o lvH Or8pd!TAwD^N6*"=FNk)Hy_u]]/ $exMFWr*R#z-K'ܧo⬾;igۍsfmW^[w* BTbe_$bKBBwݿ=J8nӁZ@‹6Hyp#Ñ8p -Y`X.!R{ۗqukr'jݩ>YJI%IKl3o?9yGrr}\x\ {狫m+5H(O]_ +rK3//,c6 bQy|V^YOONa ϓnhO)9N:~|@e׼bv,fʳ\=xb2!&iw^ ^lU xO)up 0N&,K{ JmPt鷺w/#t/[`V `eIKlxdWbMPZk{5(B9dx|F;3 :R8pp0ቻ,)e.ƒr**\d4BLEYR\vrbýJ,S׸=m oN8tO`e6l %.S+E1?c[aC5/To+j⓿4H үֱlj;pOܥ^{P=ef`&Pd|6X" \v8qwzΨz1㚦5\~Dbr܎Ƥ8>r$V׼$ҒHMX@vTu \1h/«%50ĭ~?RocƚA ؼ[RTK>l"}!_:M8pQi#\),^&XN6m8v:4ʷW-|ɩ>)ƪ85S\&5II7F?w=Jlۜg$W7=<-ۂGO{鞋w}5آH!>\ yN8vw`hK׆UUKs t:[w=7!s/O&^xlz O(Od`jq&}[E*3V\^gXi0 >?)u"-^ϜRϡ>^jI8N(:$Lt$18pp@1nPߊwlGTFHͣ"[B#X !>t^)spX|8$_ 0$I? I1M eLC(mHgoo}!o7$UJ}5}ﻞP^*%"~W;!.צZ4@>+vfx<QwcOzl>!FJ–D3þIAW8gt~p=gufʘqJ'eżt}K;_^Ȍ?ny n`x2MC6p鋓ذ7`|Y+Uig<0U 'cԷ|>sȺ8RiDHz7#"uL382dTȇpPeʒ\\QLR[Ol.X2$ O)).K GbCx<8pp R!gEa tڿHd]t%49emX_,@Am L`_]23wU2o2ezvVw;xzH Bdq #a\Pnہ888;Lu)eb U-C4_d>t7nzS/tA/{)]={(U5-,~z6'2oHm?`|4 jM<$Oӷ[T45Wbv&/}I#qG:A(LjІ7U.wCU(oHd5X~~f2~6K~ ^+_(gy!yyra?ZY'S^ݽX[^r%Xyޫ5ٷ8Ow] σڙnp6iHDm=4G:݁p}zrx^4 0m<'cm^5mA-K<[N_q{U ,8 z)o᥿.(}'!-vO]}8݁#"/'KN.kҙQ͌H!yHKl` ep:m,mot7t[gNH(,dS7n'ƀ{s"88 6qw^SS<~SXw?8c髼>.`xݖmWUggbl3Yrv}{,|dNy}"n$+o2^k3o95pk%Y~  U(ip^buwpz !DV6s4鎄/Yv}qdcH)ǠD{;,EzTNuطύJSHn֟ڥnG˕%ބˏKk ~+KXOZRlHϦiZlcmY:뇲xg.,xt_S=psP51](<lk*0j"l&U1"PQpE1 C.RhȕR! A. kUHvcoUrwvpWNu{w䔹Kx?=nWڲt&173B>+Ogo%$ˮy%hhT9֠j ӨH8c2]gѧX% !F$" z°2M "6x}'78ݑ8pp0,IC爼'-홴|wq[vPu.7{W7>7&ʓ6}'vҮ=أ-3vUG̎!<::w0sADž6(u$LX"ҳ, ,Ҥxj͘fs+5|.0؞7D }ga9(1!TM86‰;h =;h?QT۷c#tg}6^|gşvEQp S+1rM6ݓxf&X}n(]n#C|(y#.Ȏ!2lȴb!j}D[W}) ýo:¢PY 9ؖԇJx+0!e1ϼ|oXYv#,5/&3oo4S^jt1tD88ӷ3=4iן1?&m@A]\rwOזHW ]x>[[r3a!YLL Z\FBr)Ip)3@!ɡr9p2qo#@O?X9e_m~xlzQĽ>]t ǶTf_7zn/c_O,rs&uEM@dQ)Ah i$oCNoi<#nY{(I~{mq0BUXc^?pRZa&`& t?Zo=vca$M7UjQvOs:!BGFژ#[Ll3YSl93%nsʑZE[FڭXKxBj0,lTms9!?-I1w%wݘ /zu/2z0VJMXdg|p?'.c7 [qi1EODݫf٬> L8ee8Z ;0P/|wZ+)3oU?7zF3'Tdˆq.@Eg0 ۫R8%v[+Kd3rᖵr7[IcX)>kQ$XxXe@i=hnZ7ڍA=@wۤw3Hp]Ӑx19ZEPg\X}5f1x4L:c5I0Jh|%-6 *pc[x l$BTHqE„QFDXPdăEc@(2,+iZTD݌['tva)NoH.H;\P_۷/o dŖAqpnMO%e?Hƞ*}iOټxwr[=#0y~} Xug-% Ly}"ֽ€kqگکP >fT xEoZ]7j!֖bX k'  ;Pd5b" HB=! =9ю[Yͅș_(kZ6d c"[?VTnatcD? ~phMRbN4p(49rH H Qr4܆ I6/~-L@S$9Ze *zZ3t7ju}L@M4#Ψq*>GM&N"@Kx*(_"xqX3z`67X*>fDTMcT+;xEsDxqK([QŃZNRʘr,2O. =ix_gb!5&F~!fWQe,K> uYYI“"ZVY"/Vl8~ ,:v{A0&Ȼ9aJ3X מݪnlITu{C܏ x5~O9em!V>}`i$eS8a'W}gH/݃NNL2dwowD# 9I%`ٻy*tJ^7f4gGҁ^o\["8(ED$( -2%Kщ4< Ey(@9]Z#!n>Y4:JWFvȊ m99و֞^:w`11$ r~=8A '""F~ w'R"Ψ;P85.9Ee>v#DG"@Եf+,86SGG9cL@ݫ3P.4{4,mm Vrb{3ɒ8Ľ#!^qyCaM+^8^z_=PoӆHU2c%,6dM,gg9s۷2=L-!VD) *,nhG%4q/͚3Px;k) na6֘,Qd=CAEOhNͤA8N3!>_ˤ3HRĤ^;؍",.Mq`C+5v d9d:-+[Nq.AE'_mY&?'X5FFs<ɒR9LD+cA{Ff|+DzXoKMC m(9C;(~l4_toYL?6+z3.j"<ԐlH``j]9^~OZlc[E*놲l zwqv]>X`"7ZkC&(x>&lDRη< HR$Q° PSv.9{Q-%Bmc:NN:["]DX;wCYK%3M SfcBCkͷjN,oQ]Dݰ21Ԯ>@jr=EfD*diʍ[ђ%<Lɒ.+z5a+J1\cToLu!f*u[$k~ :q@]âp1C;"~lRWn5|״] "XIp5ŋz)e!DܲMz$0yǘ_3^ Z=q+dznw:O=vh?)P>h#H&T}CnT[4ŴjN;\n;(H&t\ZVIG%J SQd/1oQ !lE7Y.0ZCδZM6N'6(!W򅵋cJx1pcm5!02fIh0AsU߷6@'`8Ͳ32Lk)7PvdSBQꁱ B%^c`H QR2bhQrư%Oz,ģqpί [N_7ҚNS_r܂}z N ]  > wfx1ob]8EZ8!j6J]74 YLz3(pCHa&j8fm~" 0CxߢHsމQ];L9)$g~:4 Ya*صehC;(!r&r\`}s3/v[~nhdڮ_Bd%\'$KX!f lC?z@GKͧ|x2,Ͽ;D3B;9(P$B,2=h`1Ԥe* NRnޞ8JAGXƎQ%>G~zy3l 8. -NBlF9|σpmi.D)@7T403cdϨ  W; ]$Vfʢs㙉p܊* fY!DAGS}TBT@ c&J6@I@D.~y(V${k881 _xE}lvƜS\u@٫F% LSreR^Ÿݨ< Ԯ Y #40OwQ{f8)0%͋Pd}Pȣ>xEjB0LAJ 1&؇9TxPN)Rm 0i8"HG#pa04|"f]:%}m\\*f,_db3TϨ$؏ WߪvsPHkeWruGm#h1p2m#qO\Ԁ9B$wIrUHIebc7]K٫FwXvtZ\LF6spRi^U!L+ncoUK{u~axپpwت"]oQŀFN6 KڋZAC+l%cq:F%VgtRmڬ\j7.#!33B9Dd]l5I3YM"9- "ƒ"r5L ^$Xk0#uDktpKeF8Ľ㣲dttV8iQ(돒R]Ma)^L!ji:@U5`r`p0~,6|mr./f*{b,qmm0|/`'![ʵ*oQ ѬI-wX kk;RG%D^NVjvVʶTk76 b[ʧ}է=$4&>B[tįH/E}DEP:ik$2*,!MCM)r4#Z>E,P!am&>!W-CM*k=B.?K;q6| 2B^!Ĝc,!r*JvDes^(@Յ8Qܭ?_['T$@EWbeɀ a/oHdmA/%>@l7@܂Kd–-2~ٯ17)CȳN0,Q?%<`՞rK"Uo7ErLCd$l  nyT@ks#K.srNp2m#9+nY߸~8'ˌ͎1)]xw/5?x33Fn\zSѐs_uې5F Q*`Fp{b2>aKrH@vP%#X s1%MB͋ac aӁhK[*6!G6*w\zRhuUOtKWcKWt\w} #s C7IR9r0OͅoSJ Y@߹ $◾,>|pwG*D w))(0n(*lI;͈Ƕ:H&n?*BzxU9hv0oBsMzr(aM7$,J#S'=z u?Z"KutZCuG&uKkK㖦][5YG#PL ']Dԥf- |&(f !&8m8p࠵bC GF8'oN?NESKȠ'DS#p 6 bޘwR2n5Rf!+γ !:$z$+K0cj&[ U,o]F@%ճ6"5BS3A:,:T$ * wA@_+'%~\J<5ݟ܊~ L7 :co`0SGԳ/hsꑊb`rLv8RGq7D5B!G{?$ߺCX}F{!Fc`jS+_7x+Z:F_ FiKS`#wѻ˝cS]UwcTZ:subOp QfYD#y:p]bx'wӦص/6dK!Dq8(@ !b|jR$(hCQ'9㓑tu/ d7 |rBOJXi0ozq-b&ƥ[~z^LLE@]Uջ ._IBGm5I CI]FF!JS" xZt@l|yڝcE|! 1yZM;/ 7])i0xxbk7@l|y{8pplcx/HcBR \ hNNuxG@.Q"T`>N@y\/9;::BIpp8w˝,+aVS{j.ZKe_R{Zj>hi/g-۴jP6vJWQ_-`by`bvYJ%P<.R,cy&2}M@" a8tDz^:v/6+?#[&88&1_lCH?o#ݏvņe6ņG.G= M;w)er^Ur{JʽJʳuT=X.+c$Jϵ:V:0q%~Լ("JGI,AK ASptN[sGUrj'UTދC*n_qWSĝعݯv%iw\/|WzO|٦A-K'rRWJhx;N{{lMc_Yύg+%+ ؗ-ꎽ55'lxe]L`8J3BJ9U,_sbAɶ 9a4 {xtTT=D>DZDB\( RG,[5zmnjަ,Yh惚{c_:d=vh9`8@%W wZ1-Ό,/En ^`\O#loOدnAfۢ 9& !ǪT0H#leD=lrL 4Q"S$V¤g5YRKi^Eks6<'l[ ^Cv]X4((pm=xL:BE:6-%.6ͽ]>}{@_Nүce-y,62^¯QZhG))j H@t W $$B|>bYaH9QaFʍD^VnLHU20qnX!QDpY:Bj[(L!tܡW'>io>Vb3k`Dӿخ&b(bզI4sΔ j4< x}>]!fYZ.Nj!&mUȉgݑA;`mY:_Q²k^a50yzWvoR~*- .=W(3\A]{KOM{FԹ+rQviņ#bc4=l&L+ m!ILR2 +25o}Bw6-ȶIPva%ߞx4X%j9QE "vNT]b5_l ݢmH{mF&zDh0D63EEHNXLBV4x"Y;ckTGGkW^7250+-D@}_6 |u Am^MkO֣H~+Iڊ4o\)oKdwC>j%LFI["։@rgW <$!`%\VH)O0ib]p=vKh fHn|0C ΎU)TKZB<#i?5{w5϶Hm!LZl+S >Apm吨pΫzCc_wFی'O"'뗖zjfX j%V9#cc/r/6n9(lK1i;QRlCM#Ek5H:NF\M3kCuJ7 GBW>8_l oۂrR'-GDy 10';h[Ey&U 3QZ_t5Gq7NPpB*!fw~HHxV7 mCu̯lࣾ]Ɉ (yL=/7w]InKE:q&XC [s?<}-2_.JP3dd+$!LT Q]7!Y|A-߶#_5Bh#Ag(M;݁d7 z#i!Xi&z`>lUB EV)f; >դ;{}#ꜱ!-&$dМth$ڡER>,&I^ZF OC `hDFe 5Rɼ#iTOh 1A:QvQ%l@$3%f&Szۺ/UykL$X'D=Pi>69VƐ @,c cd B ,_G#AfTڲ^!mRx)nU}\{Ӓy!;kV"n!/6dOC^Z6(,/6|hZx΂p:j"bl>!a_ 6eVp7@D?Nt2kWzghNzI(LmtCae,G[E~[KH&D{ߋ[їqNkǜVizP'BDO*O!O=tr;4BB@~_qlȹP~/ʵ Xs=: 7 V`٦7_]{dn~2CI/s"RC$1"Rb_@^F$}Rs4i&҄lx\=`ɷ&A}@D3fi2']G&̭x$6Zaayt9$D.db%xY܅ LJz.(M 78is4n_[dlt\hr6cQ ‚ DD$!rr 6 QV@"LTETV-d:G#Aq ^t?9{m6ߝ_ڶӒ ]TPxJAfPr2y^ H\A̳쿱څLt+يC4HdE/RRkCY@ CD6A+MA]ytenF'cBV Bm3 JM_ m;'Qd)rʁPGpMnHǤNIꪤg#]A !IY6S@EDG̛|ܦ.Ķr[Qa|ODiVdO&YtFn-s1Oݧq"ºH@٫F K:Ežo61.% LH*4ưtco-6iD^ng >myS,)4M/H )n ӝ[E0 eH; Θ/6N2J(dewy|OJ`C NDذ3Ec{Gjcz;hf::8 |J{ ҎK ;h'cYlmÄF76^ǫc!U>].HAEȽ@mf.TGe Dkoܿ~)dvQ/!5Y!Ld.D SuȖkȗ;PJP_<u.Ԯ!*ѪD*EsiH6x\jӭ]W%GVbW, ņ iyD~܍"EnhUJ>ԑZyKmműZ  `ڱDr": A ﵛ*N/ 톟AxTECD/L,*C;r5}k/oIy5Bd͏{ăWo9'~ʿ -em<-#x ^ (7 imKnryfuzTM4 ;6Fb[-%(ևBGfh/s -Tp"mıqLr72wW+[ރ!%*kwWDN!n0N ̭QJ.ڵvS#ZxbSS*K֝ 6Uu"-MR~.} xI.V+ $tk`m[[YQ: \IBrYVꗣޮy"ED@ӹ/vi9m:A1GB"=%7b <9v+TLjƦP?# C*H+!QuLEݝ#W"'hjLpU" YU FWs~R\5v:NwBzS(@l_wIXq˸_n-X,?VDZQЧ|6|uk$ Z/g+H 67Vk2۫αl1xXm#) N瑁9B%KD ߲QVؘ6HY-X[F(HH[r7uU$w H}=|UI ۉB86hfVM"=Eyrѻo'*z׸/>_^_t1|?ەU>FO?N犲*x_Z)}Ef {wxmjV7MH%62}w՚~oi\ڀT}v+.@;PAT - Q :(\]B ʕQWtDqwppi)ҫ-ŶTޣ3J-HM sY?T^Ǹ`DkIܝn,-H8;~  s7>Qzğ+C2[Q. Bd @j wD۠2wVH7y9٨Fk1MN{P[rx1r1$Q;.B!4zϼ;FeGOky9+L-# EjsV" GHYl#mqcʕSZND^YNp <A(r}롣%c@ XܵHT%#G!O}?cU z E]L(Z֖y4r"+֖3y?Zd]Tն0EJC%67k:5?RZ=)u~үwz{0KN(? .-y9n>9ev{!`'s>R^bhlf@ xW@@4̚/gIDAT6dOCB&mЮ~ 88պPu#{q4D܋^.G?w=~p?I[*\0k+2}Uޙ*{_ .H>uM^V5pRۿX{v{Hf|#\iC;_C]me.|h#/g&*=+R"/gJrp2wDv%TN~. ǃ|I)ssFLf"/g+EI<%ʱBEsqQRZSZl- #+;K/6*߇;6H'o}S )a3p }][̯i{kaM:Nq;YT@{Ӓ}%$~_I@zK/}na >/.vVVٞL(.Y=7+X=D^F+r<v!%X[֋e { VVpdU>7y~\4靝9n䬝TH Ic ?d$5 l;~pׂ{N94m)χ_I7ꓚ*~sEֶnYtQS"9$*.Gby9NWJe r Qˌs"B\`)&{TgH&rEO0E䔹KE~_*AB쓎z&gA_?͈pF9ppA,!ZXp2du9~h R.@;QWQrk`-,y9KQy4QɻiS7fw !($],"/ǃd qQ(Ǩiq  >/"`j&٨M[ q0AN;1:ȵ4j;Dbz>Dۈ#=^Y2A 높~(?ms!uG9:*}ios|ϰq|+R,Ek'udx5!~]9;4U']>_G\۩s^⺧%ƾcYѐ|~ƿͮ*FzXѯQd7HyB('?H@1n:ZD絊kίQ^# ?E9+ ̝(rhGX4'v$#T}~~, +"\#!6!!\7ڥwǫMQ)=IkQ1)ȍH_+R~%!gj -lOnAr&#Nn&s4E"y᫄ dJ(m]! kA$_FNn^ M El#@n\:j6qZqhtQecm%NAü#~z Ф&i]lFJ1l}gqog|o 'ő )ԗ߈x0y?}5qYk̬M~T|ڐHmuƼ5}iDU~ߜ3s/{wc֖q&iYq-'?+rr!i9b? 6ޛD3t_!:sR^`$i7^.R~ׂB2P-r@ETJEnΈD|M "nCN[$r"UH('?՛KC@p2pd9ƪ, "n\&Y҆1. s.Gh%]KHD! )+I.B\IU@䔹v[/0]>D -p̣kSףI6_djKD^ΜdSHqHNAH$\~$0\S_Q+ (}nsZi`G&S!b>6^"e8^86H7Rh ߟ 6?exnS8WҐHP:|a԰fݍZݲ:w8mמE՜ ?4¾g/뀂yU߹A-o u*lv>V^Խ,i lkdld?+ͷ ${i)Dݳ"ُ>F^-'!9`JVGGq(+5,ys]"Q!d]T e[E* L -QO|!=ú\{'Ot1lH⽲ Hfq}ͧ7~}_KҗkbL/#94cwF=vjM}Wγj{޹hto_!Qk#p1F$Lu.䔹c7cƆvQGO "cyh!cZ+NxiQ'*ad&lBZ*j!3o?An 3yhW!Y[C.99*<:kjĹGð(/3ZIVA*Mwն>fCBdeMt߳PN[$~9ync1&%#$Kα8g|^J*,g8gŲrE 5ƵT`@y}sp~2w~5}v7*@j%r3 ~5i9%B9~ /Hf\%Ƃxږ;t$w r NJye'Y9Gۓ!5O\u9j}HG]CEXa[J> )(ӯs_Խ;S,`x$qa۟EE_ 2lWtw@8%("S4Ӓ;Cܤ`>QV ~ )'Ӭ5R&!##D.$>R{\"="8cAMHD +̫!B3Iͫm< tAHYFj R=!y8xfٕQwNvs5H:έ"}!353R˵@(96<Dbl25Z F[Dأ彘uOX֠rOnyuЋ"4W|6_ M66y8e& \`D}Y63ovmAImX`&1j\4KDC  )Cs&nLDz@ohV! $2.HC;('qz:}N]x- \i)é." q {OFwimE&˺}O[ܗR\~׃/ {mơ;zﻦ繞'G)@||iƒrODg*a, OӻJXu좾3 q^jCRãr C{ɑe7Xo;sܜ0 gc#;l%w_r9n()}zd&?g߼Ae֐gCT,lL=F,4?S$'/HԪ"~ MHGmQzAfsY$ąE92)fXIybhxXEE3"h@5[j:A唹1orubC[ao5$qH)`oXCnlx,kZd:6F`)hS ]9JAMf|'2mJU:K7B-fU&R +|KL7%y1EzAcz'Sw hxKlGC;(q/ohMok8 S\+⡦ټ9lu#}\\Z_'ʹ{3ՑQ{@wqvjzTN \Թ-d0x*j鎋K_D %܍.nr] )qHO6J<K@ $moZ| E63d`jy֌~(o^(`$/)=9!匌cE} )',y92z#BcơۮmW6Ħ6Ӱw0'+BZ$̾ȿH9$!fI6`h PiQ&F4H$U|PمM3!i1O3(>Guެw+49Y(`(~)!Ŀr޷=H2a.;b+yrW \k$ l֑:(Syk5nZ[m8]>zdPY>SΉmaLzNYz1KvDu"Ll|uݶ^f|87L`Js%h0[Zv0 JjC*2EcNo`U jL!zR=6iҲo p;pL=ޥhwe<-D2Eߡ5"/Iց+tG&TTT|nMCP3xHZвoImcRefq oEoq\L3LZ[OV7F\c~Ѳ^KE^T}'`=K";k܈Ys EF4f&H~B9DBq^u"0-K]YXhl4pqy9S唹Td"Yt^GX-t}8rE^%5$yK)̰>vF86`DgsCeוSij W'|7&?eFmdt+d>ineJIGc[('q #[49)sLzsHyYuESJx/} u^ݶ#D'2vn_Iu?;m.\I|̮vV̘'[V7P°vVh JlN ™ F r&'c_ uiRlY֤" yK0o! am7\'h F4֊fDNd~X-SzcB%*HYD* ^ߎ]{S:󷡝.8!*Rto7L?/[vzO]vN"nC2zR k+q,,Ól R6VT%!hIj7^HyB4~@)%UMI_3ύmՓ*PMF`QEp#qo#&qׯ2pUm$%z!|"q)є~eN{H94Jag'd!$R |~1Q5%m~n>rͤݒ@X cD'B:IΒ}/\m50OV. I"%'ۺ"4gpʈ;_BU'kR6n?ē߱u]U"ɒzkPb/'E*F1"7+>G*~Of|A#d  /8=HE!#1U"kjR] 5H)BUߓԫBj!ܪ?64-B'r[1{ dc%se2%WU}U%ekiMn[Qy)snzx+e@Cqn+oOXg"F#q8X#Ϲ*7{F?=鼕8='בtG@6zK>{܃-IpM uvKI(A&^aKʏ}JU>?LDc()EcGLZ;`` ŗ|`қ{=mc ',/]mKp%/&G4!)7>`/RF|0 *2A{AbǗq",iO*^*kun<7dC;KOXx|]߿DfRnk8] KF>T㯸 cB,wJz^YpOof|sJ9VVwA{Vl޽M;3ɷڋ/~4ݮ5ŗ(~"cPǗTM\#BW27!D !@SWow@j>XUGߚ& k}[K Yr.7qHAZanj^ Z,~O0muG+^+q?lP? _m(2. ^C Z0,Ē~YȿmmwP,ws:76] 2OH1^JNڹm>%Oжy)tjɘ"W otZf9U9sgpR>3)l{EzBe4Yn8H|w!8}6%\҇Iwϭn >BBʛWQ.b&Q@"f>Fsg26yp5Ӗ`_}',rE̓ CHKvG/@EN5* 5xWb铍]"$rMaƒR\]Jd|>^9ޕT?bbFd`ʪ>{voЏ?\.|ݷwD!-*i5gohgr]\CKs]?>/} G+/qaOZ**f 7܋l##m/zjc[1Kv? Dh2:_MN>nV./G}#qze+$o)'T]x"eq]V7RQu}~ @}֫L&l7|xHG& !5`[E*/ޛʐb?_wxL竌[OKgUf_>/7ԝ[Ɠk湎үעs}go(fXe.'h7BA]]52vOyʧ|w:OuG/ʯ 括ŗ\#īf½xlzm7o8&W\훷;7cdzE bYoޞ^soSՁ@ )->7[gxL+&n\Jl8%;s1Hy@{U8 c_sYfӧ[6-upa? yq "x&Wߗ5a.a{-r0;3M.~hՃ[W]K$_ʞ0U9mTR5s sۨ~)ND8_uÝBvD=w/؃LF"-iƿHu1/pw;)HEMԟ]qH]P+kQkpoҦh5#*i[CO"[ޏ|m[IQ@{RX_ӹ⾥C-m,.率m5!ƶy0NocXꏒnj4_xm/v}lvHkX,`jijnp,:6?u\MiK"%])xޞ^~M/:齕+o%o.Yw_{?ǩIΧ7~q括_T\鮋ǝnD.yhy܏;~K|Ke{<<3=2L` E( * XPK?R֥hZ j7DT,Rv@& 's~ff2!!1|} 3w{{97b̪ZY@Dzj&E_M;2Fqٮ )7e:baG -7 nLscOxdW4}'@;1n&/i`\JɱY܈1+JF%ċ1u/ Rx(\3%i]S0%F51) H|(w {(n;.7EDfyY:gƝOS ji}jM0,ֿnK{ӫv5.0Q"'(un'wztCyu-"`-ATЫ=Ҁhizqqr` @Q~cy_(ny2-vL"j';16ʱU'ƾ46')>6P܉1NyF2B'aZx3ڙ9=o[t6nw<pRL:RW7eekbZ{SVr_nODqBV?w~8_L#68ogX WH/XCzBC1AbeY?'X"84LlEᇐ =&oRutY)הiMU ^w|{yi- ijqS G4 ~:;Ӹy97qVދ;*S3̸n 36\qwB;:8lz4,~ss6P1 Nm8r-p5DcYs(qVS_4yQ 4YY /k4KU/Ӟs%t1< 7]'xrۊPg,w^/4`< OWT}W/e7~̲*kUS+94\}3nsaPB& szez>'f@fY>DŽ}/JnR&,FsUTL4G>o]][c"6>$+r'x ӢU[:Ni5P;5+e D[łvY_MRT`qM͖B'z>Zpr[, Vig.PMqL<.kxo@d8rf.=} Fޤ&` Oc􅋗5@-9$ɱiR&Ǟ T*#/i6Ouسr'ƞgrw~zf!wb;\Y:&ېZ=4Cg?.I9&dG4/jq؛;12?3ы :` zu6e?(mOUK?rxi!p9fC=}móU1\IGYTfRuS?Ծ閪Z[eqy*Ҫ_Kd{Qzb]z-q=q̕z:<+i-i~s @Xfe ύo @",6r럛l/z{[\^9,VGm0C8j$(ioߦlpxf+rq8Wz)UgBV'HE`_pW/WVfV݊t|8֪ B[m>Yu pz-|n'Bnۼ7?k ppTCwsIЩ_ mȿ%>8K[̲hyms:Т\Z>P:O /琍 l?/.5`i옲4;,a+9E@ gHͰc1r'ƞRCYD(cƍRӸpR//7\BO2c0!nVr]8KN}G_ XY/[=}AM;6Q֯0J)ΡD('dR4kӴR@77m jC/?˃]i<8މzaчPGU[b>oUIn#ud?Y KN}sѲ)ad㻯\~'`X}op~ޖ\t-iX/s9Ӿn䇟GYqJ0H+&M}#lO\>u lڈc-l'GiѬ܉9) z Z ^`)ݛuM*F"C^9kfsDrwT\Kz/|`~ͬsڃpWqΨѴHWwTLV>æ7~s.֔Ȳ!`z{x~$Ϲ68)yM.#+RC2C&߼fߎqQ8ر`&yΜޯCP_yԼh}Iسnl_~n`}0"߄Ow >擝_ϰe9lMSS޼|{?XUj1MJwX>mUC!\s4R^7h1FaC}"&Cr<>JY@>8|Q &,!{a.zoj3bKhQ2>A4+~dĖ$@lޚ>ΰ43#hmJ˻3F_qǡw"jj$Wss},= mo%ho$?u货^[O֔Kh~F@A4\|kiB1 Rdu&DČF4)7^`ȫU|X&ۋ^?7eeio!|ƌ eƑ"@<@?.0Bt1NJ-7/YBa=1%w%)eq >MSt^ZAGAzztk/A*^RC|Z긌ؒ"u?fZeٿ@fFlI0<<χ^#T DW֌?a٫d9Hw2WOPR^taǸOGIhZ* >wnߝ[2W;DǛ>֖E=cR&LYYjMu'tsP|j_ kfl[G/{W?Ծ" ǻ(o ﴔJhDX=s|Q܁n2ʩV'xc6+֬invoR ]1>޹$H?v2O=_^Jܥoyj(̲8o NkZ'v+*Z6PRZ=|pe7~zZqeU[b=)f.s^#v%jxз#]xml<7C S͹_xDCyq_] PӼA(%Y)WǦn7•o2c%[yyafY,1[2ƒRX?3bK N23-/&}0Nk!ss홺ͬH(G}-{k 7>^W[bn ?PLϨSV&_yL?>F t((َZ-F5֩gەꙕd鮿(c: 7>6 -4ux@;r07øPD#쳿 My9zEs'Ԍ{3h~.EH=@:6m IG2[*g.9a5#d .67Šz,P{s 9dρHyw@PۿVς#< ך~^ߙIyԼgaqp]/]TZۡY+-o IVK6ˎ ڝ7̙ZCD+% ZsMU MZP?xKu a!3Yͦ6@Dbnpk0kyF+/&֎*bQyn$ ovp ``"T1{@{"Hs{0F-L~2ۙ4+ {ɝvVN3 aK;P.ڦ B18&(tQuVn# 0a q߇^bj{|2=UDӨ>Yc¡t2*%*~t賹])ً#ͪ,\h&Gu4ܵ0ӱp[5Npn1 }SLtpu|gPe^z1+k5 -!A7<@$WzUUb_mPfc%CNODһmfYUP^0Zw}AaN,W\b:aTӻNH@7~6|"h튟^+nx3OrWʚj}!y\~`9!缝9<ίB\ S)d <`l9cgH5ZLFf^O]6ãӪ]g3ݐm&n\杧=DT|)ns'zQ'E޷cK pq*32[=$UUr' 1e] jʝ~c%*ˣ YvK8j1קFÍj{6vTZ}<Az>W"+0qo*^I X)Ez9ޗ<Դ@;@T_s[>PzxwU QRFٛjSsjv>whp^@a}9=_j cCiihq.\Bk1ؒ|:Ԥ,nZd0(n3rv"r !HTcHWyQ.fFoc3bK]Q&ER eƌ'yB$ >g Y  `7Nv8h \EʙaHq~2mà-~j q諷(@ᇭO<ιe+Q Tiz(K߄HWBB" lnGeln}1TԤl@G07cƹ)\s@ _T 0O]+Ρ!Ϗie3FxA5 xTsOEQqkkN (Y2u5[ᏂLm|WkqEpR`B8Vwa 0%a_9WOg̱BMcr텇`/- Q=ӡmHhT(J5oU4N5֦AM0L xY 2+5ZfL}!>𡸫o2aҸG-t r2ÚSV&۳U1M@#<Wҭi6㮿!OO\۠]8]1$I"Bf${.0J)o% :.|Д˙1}BI)C7~ F+5έ>FKO@7ﹶ`LcR(\[C_>iۃ%U cBHSV[D8m2)xVfs0U7`q}5-&"Rظ7x'B#Y f|2@A[6;(b<7 m8IRv!$ K eĭ#ίgjo>Ƥy8Fޯ!䂑?Y}{N " &x{D%sDaF5U?3/i{RS {i ߸ShG߃*4jH/q/J"̸k, !_ψ-Y?;7׼g6o]̨Һyp @3&`;Q"{M> Y^TC g߅]zKV5C!>`{ZrǡGň0 ޔqJ8>}:jo%MuIU_IE;\_A< ￝<äʥ\ Kc: ,QmDlZ4%ެBwЦ|Ѳ5X>"Li8ƆP6ݗJ^xU CZbɣzZO7ʇ2k=*-[Jd(%!_)oJڄ D0V MЮQJQOwqƏ8wK^`of3(臁M8 P` V[7kVp_|r%Π}G5,:š>ppflwXƗYT.k@[(CUM"!x'/O(WY)6C[ȼeXڄTX+TvuW܀yWY)B3]%vTq?4dq,[kT܉(է@I<X̳V#0jsSbs3Մ K#B&O""vOJߝĹ'1q&+]A fWpO.I,~YSlKy7(`FeS8yL*yI;Ә"E)6/&"@ U5qQ`hl_e $TfiTryH/8 ч*L$L 9]xKkAqk:1iM'fgH屩:n DGc?qcօuWħqLŧq锧V!YP/`- Nнd Ms1^f7|?3aaKD޻/槬,熓Qp2j,|-x3ݫ+9^ /{w{zŖDCM/ @ .1{w|Z;6uK uRt9CTT?s2XɌ ~ w֞"~Z!,<;;] |GDԑ<A~)U[_~xIP3kY1\>vnyr2x`LIlʒ #x5ɴ&ɋ| -&/2KNrIcG}~݇-'4*J57*GɯBcD}ۙjCXqr׼c_ !027 !םX[VŒ';Ʒ :i}7n[uy5-F@`?f[}YEk0/T~Ԇ4>k96ПWM| bMJ6{d<%oyW|dĞ3V8?4^<yyy3 6q寮s ;niNC|\OO >/ ~,~\&-j{f븙MVD?uk) #,>j#>xE߇V?q bƾ̓\6{I[ڤ=wco*'Mo$Oiz,PIo21·^i%\w)+K7t04, _qID=˭ozd@Cڲ_=!IX vYkUH{R(o;n%G:.nP9CO?y5O5[kZ F>ĚgwzUJ@&7-$o[ jr)Ρ_7g4/Ϊ}#KOUm#r!W~;o /t[i C3]Tc# +Ѕ/\\ecBKml?exW6นڜ?u>|.|yk#og2ʴ8;d^*RG!:^>PK&3YOA(Q=;1̾-^s+>`㧸 /e᫁v=pHYݧ\|ewFp?kZ.Š#_q=zʚ%́U*OE;e}:V>m bn}쥩LE]Q" ~\?FW`PMy^VvWS>%mZ aЖ- O&K75obq%0CmZsMmWp]2'ŧzۚs(иC&(Y;bš~9bqx[ҩ_tJI6eTd ܧ7Ŕ~~3t:QܻmnQ9S4V : {Xx'!=W͏˜;ǺEO"m%VG*'ﳀ'QdWӕ.K{(ΡM#`Y!W)ʾҀy?KRBV9n.?Kb*M @Htm6?VYCs郰`$臡i0Y&= âm(䝲1ԟr ?w]E+NgHֈCgX#v⌫sioYL]ęo}1cPSrc\-tkjgeu۰ v#Z*w:1!Ƹu~os͸1b.phմ9R7! 镵x+n6xݺ]"*:m1&$)xMt v7.Ї뎂⎣V+h[HPC%rX4 oѰ+c"Z~ -_8ή= ^"ky됾ݵD tf%v֌<uA$``՗`jnߍ}{bCda}%pihbِ8 Ml )Kny  X&e̜jLߴڮͩmQC Vk/◴iTAAy|hRFFx,y/  kS/Mi  /_w|]@m[AAȯNqAA   Bۈ.  =QAA   DqAA(  dԙ) Y%3Nڠg&Â6Ҋ %@{ksZdW"ߜ"ZDޝi.&BiD]@ț: KJr K6(= 37 牬;]M &2Dmq6]I^ҋ;G䋬ͼv @Ѡv7(K)jdSܥ :EYƉGPxE֝޷LUr/}>hbw"WWܳm}^92cH2_d9}{km!m[ޜzN` ˺="m Dd,cTd[M; "ʇFM!w"BCED; /D"*j-SۉUM1A%APXH"ΆBȻ}dA '/Y, J -h:]ޢ +z,pj|35&y^Dz'u4/?*#D@dAy<b ')n:*ӹ$%@ ډr?S"v̅\jcY!Zm!՟,h7ÌȺ(/GyF`ݲۖEGO 4/^{.oQۉa׼G!OViä7+:"cN4 <w'Ms *DI/ZDޝ{sȺQxIPZ`EP~EHtҞMieEYEcP(9"c 䈼;N!oxL1|;}o/u-Cc΄VzGO&"&3iCDmZYwurYҷ= 縋;: h6hI;w`C ("f҈; My4]'~3UOTl@˫or-e^-%Á[daY@/]6fu:$J >ҋ;G)hyg79;-wAeqoA0K;DqAA*#  =QAA   DqAA(  ]AAz  B@wAAA.  =QAA   DqAA(  ]AAz `(ww9QD""|" $DqA$݅"P] BuwA_ l1sAwF! QBD"Z )Dڙg;|} 6h9ɷrNRu0s4:"C6BafK.rkN#0?<ˡ)v>O{c|hl0=Lx{o#t/r{Ueb~%vv?K.uI B$*2(4u|J-=}UDYD,2器~ﯞ|2-3a48y㙙gLW/0uY{N_1 @xAWr%\=B6y<ݘG,ݔ.]}`^܆G.I2)h}ɹM@[DyxUK. B;aeN̳c}Yxz~);NH! Ij#V{>|ڑgG]~e RT B#SA:3;Pd[/9|']]ްhGZmeʥh:"Bg"h픓 Q# BNd'(tv3gw|[y:@`tD$ՆI<s):E m 2 OQc$ ý EZG- T)^խ:6QY }i>01s*4{€A:XA:Bh |"-I8J^:-S!26h"@ !>@ !>@ !>@ !>oD"!F Oi>00(8Sv~dff^~иf o}@QˍR1cn}p;T,W߯e&ĀĆ??+>=*{TP݌̬77obllz?.N==KIM}Oze4-ԈtXblcekcNâ(txZҕ_j۪ye7i *:˗CGNkբu& F{&6!MlxvWvw+Wޑ%7jruJw3M {"4UCo{kcKS=UpPH WBvL\dʵ"p洩66bH,bD$3}'̝6͉M YӦXD$D,{'6ps3SWJL&Ŝ?~RbqrjjT\LTdqiYYBf4$=޹sU^,[ϟtd2LMM?|`gkfKkVߵܥ^l:ޅ=Lx)k|fy{8Tpx쩵KLsA c0"6ȈiblKDY%44,**s)%6!l0 6fXLX"E{zV &6|icc3[ȰM(_|>wUDtRjrʞNdeeݾݻLccEӴX,qyr@kMKOJ>y+| .?XͽbF2Ŷf\j}XV6HJϾ>^ώ6|6[{?|>_"qpFEǜ:{a)R`0(Ɓ69k9оSL\;wH缋D^֧o/64}~YYY^ay<ї_gg//) bb+@fVfRbj“߆ Uo>Ƨ&&Y [kS;;'fFӵW@F1?'ԭڴjnog+HpO=п_z<}Hduރ^:wJ(SmRlؾu4=|p.1`!6l&55ʧҥ9ROx]#g,FVm"߿ӫZC_Oݫ-~~Ĥ-wؠyM@QAVV&O_n:^~LQ`{===i1Hxcoꦦ&O>ykUajbR*B7QF˶|Q = ;V׍ j˔D`oƋHKĥ b l_~RtFtPd0CþwJ䡬لP,9zHZZ4sێ=,3LG[bCn]{zܹ [H\6~-wtرBff #..&*6Tރ*nkܤ?/_hΫe]3Lw{70srut8 !Hfe=;4ae b C ㏍.9,|rbd?鳧rjL&VZ}e2Yfa_»(6);lߺ]BK arbhGW>5l0if2YeކRi -Ii)ٙ;VP͛Wn\wTb PXiݪK%6' K=044~:j*Qk+8:M7aujնr oZeAxtz7?zU) <.Of D"F?~mhiɐPl ƟҀ ɤP~ 6ʽ# cD bC1(Pb8-=U]G\uJ*6VV4MOw񱒒¿EDGGGER FVmEGGI$4@K$;zjxy=xMۻ+2 _eǵE-vvl&b8Œpgޚbƻ"1?V(ĢK`I%{ 1ɊV|) \MPlRl;&W÷lB X ʮە,ZB3LbCP &EA>H$JOOm֤ybR7BBBUjh`P65>@ |g'$V]ROO/""<44DR|&Wr.WꕫV5}j\t \E"8E)z[bBёcW28Οbr_/瘙7 2Dd0}X\Yh*mfbP]bm^#?3˸ ](!s( 3Sn]z\|m7Kn2[DƶVZffYٙ> _XYY3KS .5X[[_rg_8/qRZv2Dym-a3O.h!Y,C##===[T߯޼+j֬ԪYK^@.{s.SڀJ(SmRl"w|j16\^˧:GGgQQg\\Pڏ`2@ɼ,Y[GWp שC4>o޾}@ pqqiڤ)IJNgB!!ʖcQ喑v*32N>MҥBx^[#=5%5-$$$++խ_Ri4@WWg> -iZlᎣ7|233uuuY,X,Hh)_S;:qX[kbCM D=~߾})Pb/Zթ}2?JX, Mۀ Kh j-Ā۰#P$`1٠@K͛6Q\٩CY+d>EQl6ege]q%&:j6V,8XmTg_SýYc!TR95%B'ss ss s3skR;KTI„ڕ[9uױb[7fJZ⤉0iڴQ11w+}Fi4}yg$<${:0bdr\##C33SSSsOb%6RZhH" 3RS32B#G0)!)Ź\YxM#;pD$D´Ԍ>?] ̢i:22z̄I Ćc1(d2Lb u8/_8|}.yVb,-{8;y888X찰/wIQݫKǎժVutp07stpx +8UtYZXf lm^|Qґrc֊ ) wQpυtY?wg0)5SӒ3UT=}ꌻk%.v}٫zlҳl*E[zbo{Kci?ESusvv1PՠN wZZSf !!n޾n [h~N *nDlB 0:*vVb1YقO!n޾vF =%v6 k)L&݇k &tuCC_VfV+RSS==--bX,*gXFDNWw/ESۧ7EQ4Mgde~Z!I7qAmyW%NɗkU(fWZ:|?z6nݼx>(¦< H-1]אˤXӫ[lG؄AΝA>V[D,࣭^n]G 2eĂ>ܻRv9IJ .OGK+X2)Yi`FVhzƕ $X,E"tQ +!)ȸsFzzzXA+ {]j".ީWP/)=;9C IDbi #>j9[r\# }Wzos6KlB Hl˱42۶lMQH((5k QQ_Tl߮I  0I)D"D"P(ق쬬̬lP( bD"a2YY٥]R6]KKMmaySJTYVfV&?3244~@ z굔dc#.[*hr,tG}{3wɫ3RS! )߿}p7Ma0{_N|rJ%6!$6DMW\%lpV,]kŚ|~ Q漁5lAQ`4MT{67`0,d2Bub ɰTh~aSb0ã!]ny;3ss3.WWWh{as.\ֶEӲyM g$ .d[60h؛7ժUݲy=BO~ޕԥ#OKb0(! 'M KogB{l!-cDX,(0 I[144d,SjXܻscq?Hۄt0AtvvvFFFzzF/hdp8<=@ ~?u]vHLL2mLMM5av?m"e+IKRBmͥX,HR3077P(Yl&!u)J#& J4p8BH,K;5Y,bXL&`cjjg"] bVW6%NHSֶ$ŸK) D !>@ !>@ !>/ 1p+U&"N.`/?A^n.m#|Oeߍם2bCG"5;6W  18v]Xk37Ǥ0yf}v7o? ~W]}*u[-\9ՙ֒y`0_yι[)5zigsow~]2L6d1Y,iILlFP GodcGxG~mtCռnBp ՝t1^x i1=itGɔ̤#+Ft}e$؀I\@lx?z#W`xrgHZgo|J)[[<rp=Kd$9hBQ;V}i=\LYLMݖ=>`+4SJ%*^,̃K'6qa&uᵽܽ =EL˟'lݨ_! W@J.k\<^ԼoL!!`j!ʫ/D*NH}=Z5pZaǕGqKnq5{cR##U~rް5yUncK\ZmR.y* nm뻹yk7'wl٠͐+koqWj~_C^⬯fKYLeQiPyC=h+#Ph;z6+\*ȓDuq`yfmg_(F *}Ӫoyڛ&+Vv5(+߂[گRZ[I{m!nooap/v@]oZ>iK8vxD乼ܗ_ypqYNJ_9(;ݗn)OhBT֕G% )J:!ǭk:kˋwo]i~`|sJoLM:Z 3dѝg}x~}Ha+si\[3阵}sԵzkM2.}xYO7ib&iG>˄eno<߈n[x@oZyiHwHFk0కW|fAEm)POoZr[ U' Ho.Ϫm%}ݗJW(Ύ8qO?}XvAe[{ t*מ<5&:ʙ]F~o ;/prPA TEPYindu:xzF23_5z[hG{R@]r4ԧb*Mt5suW?VF|| 7,< [h?zB*^pYQ~LS@e{  U`$E]N̏|aL:B)PL}ZUp:TV/aV|*hb( )J4:!.[5*vD{WdepSV,/ :\)uV-six:ja;/K_({'Qz 6I[0YږET-u 箨nPyCUߒ?{ EYG{(wFc\ ;X%a)%P4!IJdCo|L[pymv4i ޴J `4<{ϼ\SY}?5;OG+ 5[=b*Oiu ( XzL1Vi uB+\% }rQ#ޜ/X U:~A#sSf^0bR)].qVܻ.-NK[ ㉿&>pu[ڹF财sVܜkqgb*ojLJvm\( SWWl=d᠑osCI Zlɚ(_ U5y]k}| #ZԟXML=|t1-I ;faST;~$D_>q>e2b*͜a)YY)6OXnBVJtc2 GI]STUBN׿'HL&߭>uo^rʼno:˧̾bɛvqXydDq9qwZ*?VK+:BGk`c 3ooVZLCMŇxͦGvm\/@uID{.oS'7}^8JNbKDbZ[p jrK}#?>1 ^޵WΣ$UX)gVOҧO9y0-T3|4,z zdn)N.?|X嵅&uiOQVi uB7.ݣFyoܡϾsK_=ީOUgîj̗ ٽz9yx6:!mF:\Zi&~yۭtv_jŰǚBkiMu\e-Lԩ#^*:57fw_AYWXPw[&x~A-ODw@ hkOAo%Ҋt;K@ ~*izdk"& I [LC@ zE ?O+$@>"@>"@>"@>, @ @ P!+:ZHߍ!¯U`\GI(@ E"z k@ RI|,ƍ@ A|,D Ǣi ^Zgn'ϗFףU5v\ysժZͷߴmib:䈗QͫRev|n;A0E 6ro@ ^,4ݧO42?5Ż74z^?徫^=p׊Wyj1tϐEw!U‡ `N].n͙4f6Zujm4j}d+fkѢQ.W;y4~yz[n6jeԛĪנ7^ώݩ_urhqڞœ[4mެ]eN5@ J]h5vx *!2<*>y|UzrM\ֶkfq/w0q߉hhw3ߩP?t0zNmrO:!|qX-]>R1W3\[Lm[1i7iXn*g_dz߱wo,HfK{O*wH BiB/g#M_`eq7h_ݿ۶|j yJ 87gռ\<\닳¤ct2mƥ)<4c>Klϓ `c,ڍ^)z5b_} ut ~ux 6fWԉkjgP"Nζ+Sgu113g"i@ &T{3'OLQMY&G(3T~A)MP -V'֍DZP''8ST=#pH[$4ǚֱܼ)dgDl N׿'HL@Wm|IyJKt<yjLYܿ.gR]z.ȓe=Ke ҟ BQTs*r\9AE@ J}u1z֮Oc|/0w5λ4}yQ}իZ3Nfyjꑧ ٨hʧ@ !>s 2G  eC# C_#0!!HLDV ..+_:ױ{`YBIвxq)ieD;ӧ122 *ii ѣZ"u$,B"J+elwug~A;-F8Xbcq8:uE\1!!w /EB c$i=85ZܬRW\ ;ȗ'$`b/K!s^7S>F"nPlr3;7H=]~I"̳٬^n]G 2eX=yy<ԩ5 ;;x/_G%bE ̞p0 %hiЊN}_hK bk=)߈_n=WAMUVZ3S@x|唆|*UcܚDLI{ $O/EVT9(''l_ͻ^^'9cS?w7m\ O/$G7Gj^*8#_,uzn~m\ K٥9xW)*TR ճ1y҄l߮I~TG޹,mi$&u,Yд)օZo<{Xl݊5r B:غ%'a~Ƞ!Pn`@V䏯O穻Gu!741B5ɳP~q՛jz*VP:x\yen5TԕV ?D^}`sR*䤻T2Wp uS'^woAZZڀA޼ V=; ]8h=;6>>|>4NJ ecbbӹ\ [H"->|"shlDebV(0KK+?ydА@G){4oͥzot:BirAyWA>"_ѻ a}ü?UytH,SJYĀs3}kJ gOJrTRi:u`1-ga``s)f^LURbj L}}=~X?@b(a3l*-׾_Z9n ƌAÆ >27o=f 3B`Y²z2$=$"zոfwo1Z6RY].d}5{hI(*Møp&%h;wlW/+w:O6%VDTP`#~{:\]h)++\*Ud Al,1^RÆIaaa}fI<f[ `\!tVVٺ|&Sᨾ"nP\E h{3ӰO9&ڨ #ȉkodi&:XRE cJ?I5}`enݤϟ[hol4N= t:XRE cJ?>5/+=zȃ5]<6~I6+E[W)KGFr R`I!nPTE h]Gк/_G/UjH߾MnZu7@(,bý;7J?^l6֕&\,Mu#(5226ҥSJBF5a9sx@(c ("F(dd "BP<4yь ,4WLhp}P|LFg/IGUr6p'0pӷڏ ճdc~ɲj4< ֣Sb2Rgq@cPPbh PtЖ)T|J34;~Ҍ{jT4<r|G*+MuV>x5_,#S* A 3vExLL. vHb^_F9"Ўfr? D^ZSjkKrUis5)FQIHɓ)i)0LP_mo1ff2+ M YxYW œA BBPoS貳iyRkty,Wu)P\/!3߲IBaX,`oqVb~c6Ť zG D(DWoݼ}U&{{|ȐluoڕkZido*N8;P L.-wxyX3]_*4N^5.4 C$"ៜ @PBo-cn̏s\< 4M߼u766@D{L*޲HGɖ[\w0L5 p 5e sbx?dؖ-i* FJVq5e by'%oSZݩCzr4hƍ"(ɵHܫ7VDF%H{闯A/^߲~J.^ohhXްאvhCq|"݈L4&?"P$T&L3#ͶT1mYmfҟ\:0s7/>W$A 2]4'`׭ݩcGC#Ԕب☵E L$K=5kBoܖǵ-t̶M5UvbժN4^7n 2@ϞET8XBQۏ%/韧@~䟕suk;Rd\:̝ TBS^ݫSt Z=)߈l&ҭ6:dyoi"$תgt)bB?}P+ իұ8vLQFTZә7D}}}Ms=zAbh78XB1. ~_hܗ_ypqYK3߬5no`-(/nyג 'h}gZ)ɻmǫ]+T<)^MĖZ2˳jXv@EzreHwm9|mTg' ; Ǿ}4EaҼ y[e,`ţw/\B֫g ?4sƺXzYg,Z7Xk0fܰ,]L<Sufܸ6`s+B5Yk `'g[BϠA߽ÎdNZkɏeˋ{/9v2oVrmM Pl78XB)~| Sˆeh(#_X/a!J Zg7RJuY3z WIh[\$%2xء7>/.uy r1u*MΚVwb*q"8,n<,pN) s@PCs*.w qPˠŴ> +T\e=&ƹ,hU ]yF:yPLŇjqr;;üLW ;EZS,ɵ`ٺٷηAY6ͨA4axك@HNF{cfm97~o<h Ig#)JZH&@ns{Q5W/]E BXz)}>ctp7nӧ QF̱C:1*6a)YY)6OXn?Uӽ+V7^W䚁OjP%1W75Sͳg:|F h<hSYKo ŗٳG` D#>!B֔[cp!SN6pt{S11h?< <F :7ZyJVV8XBQ1V<Š\}[gy!~@9!&| ;-/H?|&e?zwұAv5@35l<6Ku}%V.+g̛ׯq`f` ?7oܽ{㑏%+E"Jf14nF Wf\߽Cǎ Q,Y&MH&hAv-b*bx " PSo:Ӯ- 08rMcYΕ+Ʈ]h*JGcƌRoNf]Ow܅fzq~,]y@(&"A.o^wזx<\Νq,'<͚sg,\ww岖@\3UYx1i!wrkwYjp1ƩNXQ#\oz~gs"M HqϕyTA۶8yY&ǖ-^-Zrtuu+,Ev"!=LW|luOOen|u<>Μs OC]E ҏE ` 6ȑHNeJ$x/BWufMzz_5$2+Uѣ$%X F;nՎ#.,(fJjhРE[.  ݸ(G;! Sc ۗg­[=&)f"x^ p> z`%0"4l6C4C(E1˕9H(b4E}^TA= G&6[-%S ; ލ)Sr%Avv.73èQ71,''3  @X` $Gnp}YG\-A2"S:J;?kKr,w 憽{f Nx"F-Z@G? aAt4*| UL.ZDr 2C{o)С5dddT"{LaYxi2H ~#BA69 bS:;I#=HrRq.`:(`U4х+ 8\ޘUG. ؟ْt)PjNfr coMOP})}鴿˧5N{S]+;% 3{?sCХ/47wACŸ>3sk@t@co JJJH$k 3Z)z $$\hP ( dZxR'+IKYC]yY)UDzH "C س4 @;D#F&`8xU4 tb /s HG#FF`(\ P >VaP9 (2@ \顟Sʫ%X(,M,CMN5ШX'd"LA:@ Q}oYH({6I]+h;tdԮv&<}i{$RN. vl4dbV >0 $K4|#iI "C4uut/- =yU{Aq:I^~zV%rH{~ x EMOzz P2jD W"^߃> OOksb0 īW Bx8e644J 4M\`*`]l{/Lb`` +r2"D(S `@$|#@`B>=@h S5bssi΅ۜ]T0x@?@TX:.GF)cӆBh餠ن~<BKVX(mٱP(& By3P.C ݵ+},Klz~ 6i_={>ϧҘ9t>Kh,sYD)_$_ -=h}-GMPic``;88 l/ `VN~[ە)@J&߀0 &*z<2A@@t*,g%M*nr * @ 0L\z6wa~i:8UrH ?? kc&|\%II8q:K . سnn툈(XZFrX J lڄѴ)CttB"#gmېEZ"C4郓Gl3 Xit ,6B<RjOL' @JL>@U ^#G`ہT)JBe:.^:/8uI (iMa22DF,}8#!4$X̩ν;7 / :h<Vtv' 1xbs̻)T:ܥ̀еx,Qbx'"$BcMƊ?b#Ե9R @D188`:tסf;-K]!-9Vr |\s hVS* 0+'_3.;0ߙ_%'~,|>&MŽ ~krvkSܻ H!++q(ʗך:Ǐͧ(xxaCT gg)Az:=˗+Cѿ??Fkš_?\06}0ZU4lHMEj*۷(އsgt]`fF'ᗠ́΀e |X @ H3rPmRÑ~0NQ{8b 8EHݧP_$PD!wSzXv~M[b Ne_%'POQ|. ss >} _h..f^ R"S <=qڷׂaa2mm1b]IErP,'* GbV|(vUhP >~.])$18/5ButA/ dp>[8{,^ĩSdKP/AMGT;H@oj':9Co* t4`6" T.0 $@ C< (ԝq`1`0WI@_X~LIyCm5Ku1Ϗ8˶*9y$xurj~2+)펹cq*XgϣR%ETt%]rͱu+>ܹu`DQ88(ТN(/I% Á`Z:Xif·f@HvյFpikPhp@7@?`h/ `UNM`jj (f -8vbY`<~yw%OTTpS`maE2Oĩ;95odJN i'޾EÆ&ek0e x +& ⻸,ٱ#yָ}-[".NVgOzU) E`Q|PPܑ#wO\ 5@FL̵+fZy@ϞAbHIAx FF ~ &甪,D+:Ryx,@aek5ݼ?~=6Xn5hJHVk*u XXSSѳ'D"h8v,sjQEsը1ҥ ]8XJip|Gf4j4W,yPxQĠo_irA 'r+;;{С4vvy,%4}&.n֝9zHn q 89aa>\OTB:E'wfp<-< zzx혘MwtǷr/_dNN8sF1' lڤc>uhVӺu kU.:sW|Y͚8tH16z&TógqpJ-}|X֦+];Y>۔;F~E  c*0w4)7iRܹŘA(ֽ~yNݺX@Q⯿_ ر.N0J:P @кv6n-IM-@UAAQLo&(JLk:86 ݻK)))ԙj|7oZת"$8PwXPD>ĝ;o/>bD/kd2͚BMCʲtj*֯/\xF64ƍ3úv3;֢_j\,Jx"Ct4v,\@VҨX<]i4O3h=5Sh~"ippVAC|,@("V)ңF#Zٻ7ԩI%˯:MytϜ,VV޴ XR1KXܽۯ{K$m?~*a2ksM06&NRŋ%gj.5z~Ɍc33̙>Cc䯴KdiSd%ٚ'\Sq鑝-|Çʓq\A#Y#GJ7n\.A+c?|ȉҭ F di[[  M=8|xzfLȸq9ᭇW, x_Lѐ~,@( ǎ)ft("ZhѰY0wъѣ]y"ݻh)3pq\, S;r+*_ט1 7\Ξ\ܙ+\CA@ >}ggg۷ &9[ א@",ƍɛ7ozLމgw^d\y粴j֔&/=YyK{??[[T"KGF"8X5߿#$Dvr0=rG{AׄNV,-~@u2" SlW:{<'e|Gi<(2Փw,]طmf5lFWϜQ˗yJeǏj7v"T(!%𻋢KBuLeB%o暗Ůvx!8pJL|_jXiz|YۨBiuwT׶ a ]D<F#^)@@aAZZRRIc)Y&U Ω9T%%XP(RMI&8~193{Bmx'l23nmt킷-&YQ'_xHw^f]ry/fffhnj80 .JC+o: +TmWXE_@H_~X"G@@1B%0Oݿ;776yJvOjgwGRATZф4r(q+ĉ1S"9ii']J1`<`TլUffll C3J),&#lD"?96)HCAKKMLTyV6ĥ&7NTZӴ%9PWͮ]32.hB9A OC$ZX/K$G]Q嵍::49 o5+/d&؈:T9?'>~bbImPZOOw@(>-p4(ý UˁFj8@aXB- Ip{<+@y`<gXB N?L`h!˔UǮi`6⻀b:8!5i5vgڒk֭'[Y/?9_ցGr Z k)> *(Ou< 7X...yj%Z [ZZFjV]JRf\9Ң ?>֋44WR>tNhSX%=&e:2U.4M 3/oޫ̤࿵"Ӗ.;0ߙ_hBn7A l\;(L/=ow/񚗅rkD $_ceƽWDy}2nnR'wOϷUmDea+TPxA9?'F<2spնN2yT4$] pUMZNdp:^/>&4$ 5m;nGÒlCם<˞фk`m-K: pDZTV=/nnȨxP>&y5Q}b fk]ËmH$lxyMVǨnxavN-DIJLpM _#sTƵSѭ;^A,A{ JZ$(J ~W7qڒVe0Vۀ!u#mNxho[X{EyYX]ˁoXuӴ"y<Ѱlhٵ>[UO~/mIv}jyNffN㛯8٫bEɃc/JVMN[+9[1̑fSv|([4ofP__!{WUAGG$يIQ`m- `[D/~b溭G5!>SWO suunn^ rb"# s8sv!C1W666 GGN?}v4]w&]߶r{11Z2X[_ 422x+'JQ Qn@$)@ t!Ht4bo>L+f6x:u9XQ8X5+b[[&`,!Ǜ|-`np`VUrAwwԮ-K''cfiܼߴicɥ]1G,OXh~2Oe4Px ۵IfRJcnXỏ3'OFsç)mle zaߦO~?T>e|q\-RA=2˗˷9iRTzV.7A#GʎCCnⴲ= K~2O t4a$׮qQgbtMܑ~34sf;3"K \zT1u(vPZV.:yظQ䉼d_]MM%ahx~+vٰO+ mޜkBuP6mR %\˥IsΝ͚76.fpc׭Zqu{BE&6K9@ )m?Ε>}íei&"Yʊ5a¹ {ܺZVL+WW̚8\[JE]mtu5rc,V++ǿ:v|}4IQnka5Jq8{6&9޳g-^\ IC1GGm5kv8yǜ4 XP,ƌAǎŋ!4nާO7mdL0`nC+k={ p""͛A˲=/sa\ oTt־k{!!ӦZY-00xFF0ǫkizذk#=3gн"kVVi ÊN}+88dVVޫ,ӫce9~͠ރݳC(&ԬŋI%4rb7 2?n hԯo9c۷o߳2ӗaHgH liMڵ37app&j*qqS'6&arY|> x\q 8B*#%nm)/iY3=RO9{Q,Ү^|~Ϟo( E$Vt8^5جys}db1 K*^}xXmdT"& bbdWѠ‰ٽObny-d}Np%pDj &,0 * '1E-::88 \z#Ybny*׮}vLJRAZrS)luX0>1/+jDž q894h 5OӘ7wb6@.ݺu @\bܽ{kLפ0)d;mY+FEx ^0DP SS/q?cB {hBwE=;1܈EF#x6y- C+}I6kb q,5*NNx-Z 4T)<=1r$M=-Am| ٗ#q\|Dl tM#>_!ĩ| ^*TXX%jB*{-Z *Js&T 8e-L-:vfQþxCHh~ߕy^ipppPτXP2llp.cS=_ v[fE= V~8tH jsvFUU҈[NNx=z(lqcC=%wrb Rs]JQ(/Qa( Uޡ}گc?N 5ZK<.ذݺwo oFF>޾uW}wUp L[==>yA~&"clg~puԯ.Ew^M萦Qckۊӓ|b62e aK]8>8¦:*UlTHXZu,Ze ģkt^Vz6]`BIGo;?zTc!`)Bc]k'\k ks\1H H0*wO!к(]N~$qH>)_ ~fROrWy[š ]FU 6lȵ$ U`{7* B|Y OAYᨙC܈[,\jAjaÆ<G?A" ʰUMƍ5*Kgb2էx>pgb~n&[O(9@ru'Uԩh^yU-[iZ[^œ`}oVTG(NH k܃zxQc F0u*ZPyβCa9E9aSBCzy}ܼ+p&$"h`ϧQv=F0m&Mha{I n KX؃U$hc`p:(}@ύңbX6KvO>oץ$A_&'eCkN"BţgP?#rAHLɓx!%E 65kiS  kgJި${*GCx@9Y1\2]]Ԯѳ'5P>pș QQƍѸ1}ѣr#3Sme<E˖(4|v].M PA@o_w|T VU$c@g`6`F@2ת͏Uo}6A'~\;uɡ:Oju/(_'7iTϊDN z4u23] rw\U-`AP}rʂ 9Ί)zaX/ OiiR>Ji)&\fqu`꾐 q?hX: Yl?C$§O K|,NJLI_LӮ(_Ց]4jh4} TjJMZgT153/(.32a>  0DD =#h ao9 wXp-S]._ʷ1Mhs$$޼:?'U4@ PNЁ.tA(EU@ %- -Sx)HHM><*0x Xk :@tv+յ X |jG; QFq>m~4Qt*zf:1]b 9xc3:˒_"'4"Hُgbk.B  "\0PMbO_݈25>eӊK2!b(54r0n'`SEI(!bR"XWP@E7{FHGBZ4VWB}^:'CP-[QU& L0j*!B(H \Rh,`3@13EҊ~2*S@xK? @g76\@KYN lhX UOcG\ju ,Ӳރ<˿([in80-yrHH g:r7eEee0)a   D}|̿;ξmªy!°ߕ`R'`|3,lVrח (3vNA)/,ʎ!$MPCݺz Gw(d)&ۜw2DF]ХBq4XP4QH*zL ";V`1]M:2z?3Szql)Ә޺`OBS˒`[KrElr$>:dǴI$ZD2)\&LG{px\ X&BOϝ/gt@x͑ 4/v#8.52MZБG>brĂ_%'B@XO W=/.tt* BGm/HZ$m c[ՐSX @!EVp( JBhl/iU+ͣi} C'PCB4h.J ('ʢYz\ h҅䕀Β}dʑOPvS fP 품BD- 2} f<hbP $)dc%-%P\P&Quh*Rs@. иnw%i@\Dzh/12c9@}`6к xL~AZla~uĂLͯGy y\Yt$3@YD:YOT͑{0 0\h?Tk@"B7d<:J=KrMY);T)MK-0IcuX|]ڜ f&:Z)LqS)~4Bvhٍ&gҬ)~}>^(Qd!.1܃\8wk"w,d!"8}GǪ\k&p-fNp#RZX7лl>Vz ?YzXv~M΂JgW#9^@|Q/&JQ[@{h2hU_^5;t[-Ƀ25:˶˞*9y$L9E|, {W֧V?}{Dʶ 5dDF)&P[독N6YJ70obi.xǬݍEt+~ (e7Z֋&Xqa<_h*TW"Ci4>'p }>U)%q>|4,HRj60 h !/ 4jU?uo@#j XS41RN` @iQ(5O#35[y$;>UNfI0i3l^<NP !%)!%;O2/IiE˂MGWRQ\QO  h9-RI>͛A9C9 oBAԟ"4E aڈ-(J:͕PI_ 0t#.UAjz5ZZ)^:?NtP%$vßѰ߽Ó'Fh(Ґ .zz3TAz0PsoHa+&0v(tRqB,ƛ7x 3.ۯvvpq'|}BdE9Â*x!J$At4Scb!e|61N)`cf;/xr"'-쫨ϞK!<K;: ==88R%xyfFˑ*)dT1?bGcr""DA(@]04)ae*`#Qc4G@El C"fd#y`DD 302*T jB**7 P(2G"\B X]ݻGC FK""5룚cKB$µk8t\;vEE^F. %˂8x7nȷ|λ_atNd)]]OtӴa9P*b(#3g!ܾ >?ש YФ zF6,b~Ǥr\T>Çq gkh-ЫZVG.m}H|I @@wQ*zûȣ8q#D65EVMw c|cJ RǪA|~ `J|Tkut~nT}S{kXиlaf&vڵUTXFOÆetXЩaIUm۰n)|OhccaQ'龊ᾊ2q#6nDb ѣabJPQ" >_Qv=ݍmiäI6,0 :( I*ĝr/dTY={򺧚PNŀ%1K+,3XW1vJꁻ醶s2Wt!ɳeg1iRgݖzZ)Lbcţkt5l1Zxߏ3|>̟q*`Hᣊ"oǜ9y;rS%K`Ȑ~>(|=xZ\{/llz5z*4Kr%h׎x ">MFƎ| a>&u(B| wMnjn=S9ԩ5 1hsJLԮ gg̴<,'RR3޿ǽ{!? =q\iÇ5 ѧn̛__4l*U³eХ+0Jx}d)GٹIG?lT1V{jrQ>4@pvRmG % կÇao_̷ rF٭kx@NvF ?@Gz Q>*WFŊ04t$'4}C([GػP/etb$"*G ןlAccA|yBWiiHLDp0^Ɲ;x bqjwǎ0$bcE|?m=vv3{pޯV+wS>x>"统<(cj&LЂh.` JFIDAT={*LXՂ"gNRwrHŖ-8P >x]aSSiF3d¥ b>"6U:LOǙ3ؼOsGQ14ľ}ر ]zdk'w;ˌjqQ>MMRzimRRp86mB@p7ւNE Y=- Vx{t"{vz8;%>5KlT{ E`аI"9ռjrbFFlܨ%_w+2*3X(/^u\c[ffXCy(Jcz'x.׃.^ƤIۏ<V$ EƶlllzV9&T|&c( atLENrX;JScɹq&[E-[0lX4Rr۶I*a1ĦFS1ܿc暪HxOImooogg/uv6 V)\"&N}bju+FRr8X&=ԜQE;N'"?VriZ1tL-[rɼ|p|,X8a x*K?|s3$N]]ʕ+g859cС\2oBZ䵤9*+XEDyv9r8Lb 4+m>jJ۷ؾ=KK'ikfJLZd ?~DHHr8ر+ ~迷og ]\ҭ%TZ76ZW5V֩S-r9[_];EWEA.^_ϞQ# <ӧOw ;0N:uj$wGСwm[d oB˖ 'Oq5ݻ; ]\+fX m)ϟڵnի sѣG1QGv׮:s"lG,SIOt}k7[ի|8W8FFUYmr9E=Dr…νxN\jIАY?4׺aa=zT\91f™ǨXeC߃_#^ =;_.#tݺ,"9h>rDׯ_ `Ѻ5u<|D6F(՟7\GOH$5L޽iAWї/+ ͼK\l˗FS:׫w)tx8榐oMDQ!Ȉ~H7ojmk֯n݂PZ=Ow7h{{|cci;mn`eE(qMMǃUk(=kl\?-,ml蘘iH+$/O*q󦛟 ?H$5pyϞ:v|>H!ٙNM-Gs8 td3W]pl~ffeNK {{!">VYXs*^E7Z6h@ݹh|,_|є)՚4BJl,]{{:9E;wN~F"L]ТI<{Ci2rd֭cccU|ˉqs33a׮\@%I$ :ռm[}_߼W$EZhHS$? `׼gO|;w:4##CQ˓'֭`@V\kaA$?׹TW[~utٳgAaÌǍR%?kO(* Ջ0n>yB3 ׭Shڽyj{51޵yWh]]EE jKe'5))VVzkMºx\͚}9w޲ES FLDD/ԩbhJEEE)*:}(E]'Nhڵlc`իАeKƍؑni̙g>|X&"k(-[V}\e~QzERP (}*jZTX%hZBwdggjՊcFFTeݺܬÇuXǏiXfQȌ~~NjYըV\5e̷oj֤?/ԓ'k#hW\mhj2O>T! g_d~uUJ+k#44O;g^~mU6BB!ĉ-Z(,nDQWÆ- dG.]Y,!:ȑeZ|ֱgt8˗/=Ѭ,-a…\gdi[HWo4ݦO&v-9j8tQP,c…8ofղabدS D͛_ʕ#v#M6sLE$҄lV]k233uagzxQk~}˖ZKٸQ1_reL"M&''77![Cqag7S,J5 .ejE1x4rР'J29]Х>}: $;(l٢u< B!>N{A S{ta!*ْSڙx&ݦGp%MH3cP0dr:8v69|xI[XDmf@|aڵ0Q)V2cRӉ 3n-/*UС:9E\QJ2XNqvvw&Ι33ir…ڵ;@*UϘwx١VP]^E@\k(.O;)SM8@VО=Gʉeh+A:ys9R1o*U*֭?4hNY[ Q طOqn<[ہc6o.vt{9޽o޽[v+$X1}2֏Qwq!iOfC_5tʴGB*~&*ag]rvjVGe;F")ڈܸ!KxX)))kO/((iյe~}Y:=Ot۷xR67G۷iECaǎSS_+9qZA߽$^<:n|*(F Ud z͛МWǀÅ hB|KCT`9OܹXQ06=F5?ǎ)6on3χҵkkEÔŋؑ.;:Uҥ\AJ PcI20+ `dW&:A9QEI0Ɛ%ii2*Xͅal8ۀMZpq 7_XZi=`a1N@2y-q`Q+BK/fqr#n% Mgp"\P j愾}a`zRNByw"Tr"e.hsWJ0l4L믱)~)@*XÇ]D.;Dsx9sdˆ:С.s-Zpyd|4 @!Jkܹ c9sҥC##Y6$ɔ˓gҚ#G.O(~8u êg5›r'U|dǡȈ 4aUGX" |+&Ӵ;~}gdduKYrlmcjּr5Wnח^FEEnT~x[79ܸQ]r ) zI?~ d0Ei_Wii?ԕJyq,uߏtvFZPܶWeM ,mb"߄sԢ}^-lëW&r]3U,,ɕ uS8 }5Z C.-5T0yWO8X9@W+ںa₨ Mu@%/g)NQQ\Dy]u?y8(ݿtܸB s׹>}˦NTPYYg,\':eKo09YIR϶{='ϱO)O׭$oCBTi7'/]*̞t}ΩwY]$TxN Nv)O2lw߰J%o%OAyx$ _Q|<,o:k#̖e 74NKSVj1#qTק R zk ͻzZ+XSǗJeULs]~}ZJ-U'(fel.Q`3>D(mQr85ѥun1*D㭪>W 6FYCOHx<&#EA(TWt%_NNZ/=ѷ.~$ y( . zXcJ]Bz||\U>rCPjz>ZNi[[B_Gs:t0Hﲺ;u%?_R]+a&fՊի^ښU+L7|Gg.L8clk _Tn'jZPK,FZ{G p WIn:f6F08D +`/ /V=O?P_hT1H5#"Ts4p/YqEF˝T`xMt,l+ OXm{lߺq4|92!Mw/[o7V]v]İa0ԸW[\zkxI-[(Y޼=գnիŴ3.-wjSyʘ1ߏӊl6% Ul2_'h#![ge@hu Q\.*_+$:~"O˻ܴ)җ:EǙ%,T}nCCeN؛6B< %0QOw ~E {wJ RC|Cϰ\0%z7)_O1&9bA%0{EL#!x$O\\Ti5Om4Y:t)+01Nٳ%QG2 PzϞ<銸8;G6bQC&XDׯ(]Z{WEn{PA{ MLL,Thu-a~s(塬ߥ Nd$BUHAAz4mlbʃbTrɡGǎs}i֯w%`o/Oz49R8]JdR%؃J!:+H>^>hj*1@?{IP5\ZIjj23u#azzڵ333FE$|@u-( F¸2uD"o |P:5U* iE'>|jRn$b&RewUZXK3d1a H?2]C,\9fNv8۶utt49u 㭭94~TYEQ{W2D0ڪUi=/NN9vTRf b1|}eI6iCurd4!C,ׯGJ<]v/ʕ$[x}>*Uׯ,w|yhJ\y˗7=':tCfUntZÇJ+ gWl}Vx8*UR9mJʕàA%F 21ȫYV޶ ر*9 a:C(YSf&oƌ"fq7l} mT //Ujh jrgFϟ--zQ Z~yy^(*K}h );ii>\6cEٹ#mۊ(Tj])J`edKuG)XZʊQkL'+]>qDfy/Rr5F$ 6@ ?||T.Ч}[ zxXjU6ż ny'q_4J,Q1C*wyVϟ?Z5˶mU{Z"L~]׆ 8%|jm7hVR{˒E_nE(lp C;ys x1\wl-7[|wrd?pg\y>lѳS;wvMS9DڵZubazzd@֬%lS}GC?+pz'6.XA_߅ 1kV!$1* D`׬I2$GAcMEQ4HCٸs+WؠQ3-D6 8gϝб#]o7hn\&C /\ڤɒ3LuWѭ*ʲ?u$<{Gc:MӇY_Fbǎt24?s._1gD#u] uvӧ Ks9sW blwsK M^$Hpi@zM=~ ;wB}WZ,FӦsG^ŪU*#@(ݶm͞=Y:aܪX[ ޝ:taX.Jl Wa̧>KyVO;wYYYm?qBТEͭqߟq{zvi9Vw{LsiiisV8r2mOZy7eJ.]TC$ ]ё#s[б 4~}3yB~Ưױ|hG"k>qp.!!!Gϝzt{b,CHH1lҥuV76`d k4>-[=UQKtppAkġ4Ȃ!eԩYsGӦMUHܹ* 7o%nIj*5RǦM WRÇ޹ɖfY[fpMHH̦$ yx4nX X3lvx'6 \3+W/\I$۷o?wڽ{, g%rX|FJhްဎ1Օ'cvUݓzU) fa\u]M(޼ysٳ<YFŔJ͙1ohӨQNj׮PWd6^GJ+E%5R2꺔@ ' Y.X|!`H,L^5f2=7۱c͚5stp񢪤Y3\HjѱXJ ֯~PMg+,ӄ5?Vn.dY@ٲU #* &@=>PIx*qp-*j4xb I 0]G#ʈ@Fu*`V4n}]ݩ['Gn *^HÇssܸTX>~DFQb<+~DGⓁ1mhG`JJµk9=/Ѥ U% b-Cq8X25S ] y>_#rh..s(*ѡ4.6miQ!!K,886ywѣsYׯ>\_YDhT8.WWbcѳ'5í[WO౯ִZ٥LkٳKϞaԪg ).^,ׯCݼ&<{wETY&5t_# CT&OF9,\^t =Sy{ѻ+LʬTr y(X_`HԪC*]w]䄻ws(@psС9011)W {w0h4ȡ`U۷`Zn8IwjMYiL#SaK<8y(XϟkW4o~Hд)U0\ʕ1}:bceѐ@Dy{ӹtѦ :uʡ`5m7UXձb/_%D#سwʈ(CC=aDr:4֭Cxxh pvFǎU II8!!9XW/|;wTYE Wv:uи1lQ}+X#XF(a;gѻ7ʕêU9$]\4pʹѣV -[bFةT7wjե1t gglݪrqcܻe"SޅzlvBtÇs!}Ob"@v\{zؾ=܁қkRC=H$:/ S,]SUbN4m WW>gOO\C q^e7+ ˖Lxz,~ O ,B7xzP{OEEaFԯORȑtI#5>!.]`bjCfGpq۷n4S3!aj4m#s+OLGGt\>"#Ѹ1=ÙEat_~I]`nl *}oc(T5 @?1@X7:͛1m224@P --I +}5\ ybY gBkk\Kp갧DspLTU^a A|w OZX0$." ųg!iw'BE#k7n,صc+ 4mq#rV~Tg 2tFO:X6ǂMG<{P< Pll`fʕꪭž}Ѫf®]`;UVy ԵMz5~tȨQSJ Mg x} 0Evf3`0^0e ΜɡH$x Ϟ!uc (~w*BOo_L 5IůW.~mka0`|} }@ ~=Ƹqꆌ[Gx=p?Pή~*^U@?l6…{!hy6sotNDaŀ5\G+V`d^ݻW335e0м9LQzRAQpvVi ^ijgą 8v ll`eWWTɁ%l7V~̀@2Ц Maݥsp8rDEÌBٻGN>=OktQN‹X O'=1u**WV˗/G@Zl^wޚWxaUmcDy<郩S%tuݻq+Vҥ+2~P5ĸ3ښc LXSckνo޼%sDhݪ~9>lܽ?n{C17F˖%Jhտ&sbWti_+p<ΟǍ ]3P(d 7Gi:0lߎkq,.^DMD`sgMU+t_ _:Dj*ΜŋqCv;GiSmΝ +ɓ߾?]NCn:vpwĘ/fmVYweYж-Π+58(4jP׸Pʠ}1$BԿڷGӑ }}dIL5B˖rslͻι>kBZ3II Cx8|dd\.l`~ƷHq66?w\>}4U 8Da.FFpp ufTXLL0p |F,Q.< 3ۢ|ymg]b S]Ō*E0Dx823FF(S*~OP;!ea|V_S(@Fx02Rk 0QRjJ&3$^&V? ͻOk0`0=U)E픖Fht~֩CJqԨ.o%P<= 4*=)Â+؆ϢK CL"Mx2f(S|`2 >8CBUçOiʋ1RL ؆m)|Cu@,.@m1g6 t]E(UݹXMpqfmtNeeH)k;0;9t[7l}MBEB1HjbH抳R@Hi6_s?s ==C.{lxAtDVm񕍴_M=@MMGn6B)Sv4ܝYlU ,W߀JǢn~<@EBqEj=+J4)$0 ΈG<*@[?Hd [ADO1=u fLDlY"Lr{,==HKT*wO0K=mXճCvTnK;; āȞ^_ !ױ.+!qQkZ2{) 1J/m3Slk b#TW4Ja$Bh( tZei?GQ<+`$ /\-JlA34H#&2y#Lŧ7d~؋Pd)(!4;Sneeu ::F  w!<<#w*,jaxj[P V (_QK< i0f0@ t+ƌVP-ZkQDP_M\\<|M-4z!lBz: cլ&M`i…*c/Pцl[$\ڀ-_|n NBæxeªAP=+z\EOPQapwǝ +ÇEvmON }Da|b'id9UzA _D :Gؽ (LCU_oTO6qUUTW@ HExbt+ܓxBjIE zqbg a8` }gGIc {'l?fe"/Gy3]Ea э[ ;cQX{¾}h.GP4h]:=eaMwѹ Uр/&r 9HX 7|B+_~xM]zU2M Vj`T,b?b@D,E7/e s1{662&L;iƉ8H  sFSpuōض \1k*xEY$`?0V0(v"mEI˲Knw"Xzߗ1ld~ xY0{H u Z4ef}> hKC^sI?7l=T¤}^޹[ݺd|qQP:vׯ}|}omLzD/vNo! px 9νX^emNOݭnzlJDkWYYobYK;;nX|*hI꺩]U^U ă7mXb&$4k*6~W^(I4sD5+lP.DYY-DdQ=[OK4kVUɓ/>zݪUL b1}^wF4iRR]Zw"DM oӻ|!Vߤ}ZPb)ҳ[Rbf.dNz5}#Vœk9S OHznVWxmsƣo\V5|ʗ*_xdЉ:>.xA6]w됻 #*܋U6+WO#+|xGz@娣}?ժ ߣ.y.|<|˻K=>^x :6^s_oc|*k:~uKMO_=:uad?V7#nlciVZ}(% E._~~ilA O| t}!XyTMvv~ۻͪU)Ujsf=v6mbbb/e˳|>zRmJ4]68z 6xl Np&x `Ҧ˜CWQ)vL ptjw-8鳘Ưަ|iբ&q/ 7100|nl]2ʈgذb~+`8#=C3{>eM 8cW|fMVVusmHc&ދ$-~^Ί,?測=oleY:*pmssTbTxtBpG1~ώn=6hFmK.,zzz6^իmڸVO1fd#5oZ:~?( Z5 ԌӰI7w'9hmmi+R}ֱc/_Kq,ly+[GO^a#4.G(v.CLX؁}.j&tɷ|[:G|#tUKeJ$ʗ Eyr_u᠘FD}WFE ='+lQ<NU2J2?qsmAw?ȣYf=F7b~=jJy<Q~7n}FcF%~Ny'(3QP_(Ĕ)14]n[#"I~x>PsĴJ[|ؒk&fhh-N~=P ;i/#檖u[ɧ@f@x<]ܷBdʤWE;Ja -,eȑ%&&ʋ5jRVHBе$!(ZIx B +8Qd2!G;*OH 8\.cW~ͧt*=kԈmM齝)Ό=T>T9 6ZW5wYiF|ct83U7adTt^+%]' H & m;3nfxwmRmnjzOKy1W&L (ȝk`*Uz\xp4hÆ}fNLY5rdyҥׯWgC& āad(%vuj֐k4_LUs AdžnnFT`r0V{;j۸Z>!eufiҽI}&}Clz];W*L7bn*5{LN=OXyNZSu\٭umjkխR真^+æ%CMjU_yL/5Uz~#7F-GU-/%(Z:gM0hI_8h`6~TjBܹh ))8wW~AO «^1qJt=2|`,3x11#hrWk:v2?s%ظܺXзn>a׶COu@ѹXo ;kQex=՗;iV}|lZvnP}Ҙh}*}aԷd Z%(HamFuÉ?c"q樓G"٠ŏ92=ȍ*cvʶKYHF6`C pːX? _l[2Ch=(HV8Mr)3~}lV:LDݽ8ؽ;Gt]TZ\'1 U_MSEzopm܎' ձl̵c^}\tbi%Hyn:) ~Q2|$-[$Qbh]ҳg psEˠPа,F : id9UzA O?Wn0xs,BQȎ &rVxIl7(X@j]!s«ZquXaCSy 5 / BhW 0G12R2xBjIE zqbg a8` }gGdɑ3k<ɷcx$"Da:Wa"_ Υv[Ec#Eur[\!.#b%-L9jjAO#_+wݝf?` (0ГBHu``U7ϗR>۞auzIb!Z TH"×D$!: єM1LkMLJUec`Iko6c7Y0(zPrI2V2zOTϰ.iO0 jȷ KcpSKbD1[[S!}M$Y6efs'ٸm/`J(9-Mg2 տDEwsTwSb=d^0 qmR]!@C PRH),t) u^YZTnU5q*Ds1ndQ"_1 H!Z g DR|$Ȋ`څcuzHsD1[!_M!HC egBֱ0n$@ +00j5UIN5gb`T!_aw+8v!OsD(0iB:W%$ yM"|ܐQtәjLf5b0`DTb}XhCp*[Xōh w"H@?cI;{Sɦ;es%)R)3~;dP Zb :G0|I 94z,R$Pь==V"5T6ka{uXg4/vW QB&!A{:ֿP(8DW:=s 'Lf2HX.l+? 'c}K76Tb&v=;-D$瑵عֱ`v1fkӐ f3BK= LCdJV3IqLS J'+T8"K ϴ6U'/VtgΙIO9^QD)SW=N钠/c &%bq =5,f6NBLW#}1Lş+$a7Ǣ>*JdQPE*$Y08MN@~ZI;MWM8Y=54s}_wf+>)ȗ/_+p`VP|G ^aE>H 0F |h,JOmJq =!3(^Zp&x `Ҧ˜C@ +f`VKod'MG2 Q<I,ob&{o ש`U5y@l2Z4-^^a#4.G(vzҽ`d Zw$9J 'j0q3As賷ѩkFP*ž}:u=BT<]*," Ljapw*vn?2?J 6yE @ \tUPBqɅ'>DθGMփff<|ݬY/+++y~~kdVB% AкAhB! ytc9T&sJ/}t69I1sD6`t>Pe/8< 26V7CX|TTb"QY#.#Mu-8Hc "ph~ڳdBzhv3..@DD;@L 4NV7% =[dX+h!`1c&OQ/GF<ݤI uH ڧ`Uzf J8A>0aec(-G}沬|-}6=}<2k;mmN[i9C`a\joo2A+J4ެXPO\CU=p&LPV9re  w;t(;f ֨b_+Uf@wBq =6p=aGC PC8l; }pqK:F,ik⃷ʹ *1*<:!y}o?gG7Þ)Vڌ% 'ﶾpV]H)1b 'EI Xc@aZ͚ɱOÇ{/q 5tUeʬwcf-U//]qc̛{' 5 [ӛQY",Nn ѐ@xBA¿7Bz~kjsajd {YMNwdgofD-f\9Muw=)E'$T/ kj]y2 .@ `&|{B=>u4yӤ`ڸqkZ1v,vV89 08iyGxݟ,+&R`Ҽ#jб(ί4ݾ*M]- `GP{x! 7Vl刹0 mĤH5^K>V;M_=Dbۖ@T6!ԢPe$ A+!ϣF [.] 'O\"2 F\-;71f ^R,tFX~R0;\*aR 06凴!0  v۱)3vRw68,^"2,uBljM?f{wg+-GU5wYiF|c L3gI/M+$O dOQ*@X$Ud?$\TQ!.mZ<{2SP!K`\>=дiR%ܿP0\6p+~w"-8`(fl@XK4Ӫ~JHfsŨѳF۔8hȕyM9 Gu O*Se&Sl^8`x1_ʳ#6yq8v6( )ѡ+~Sf"ςC @Fz wpr foÇm(\WQ:CP>T(>S)ƎţG ~{tݻaj{W oc"Ѽ-T_F}@ i_p/@4)srn2|q)3ON6[k "pe8:4zW5!5gHWi1B88 w=yW%=k_˗O TV]jewߦ"c<*to{-Ez V\P"@&d|_m,|#OߊOs>^ kj\{%JD[#Ւ+%I&Et* p-jErTM[fHJXkMCjC~b!`Zdd|j&@?xz,^͚00Dr\Q0@HoB q(W{Dj'zϰ7m&,5]GpXM|؜ɳbE ŕLk·18 uisv ڶC¦xea@Uuh+ZCMD'puG `)@{EUhWB4{@Al?/XPdEJ&AS~0$c)q#w^f BaRz?ZO]kWcO䳬=uG>Gg28IU$ޒήc1jX`%'!8:W"!t u{R3Ǭ, JhHLYV:x]+C߾8|s%aW43 La)V$iEU(,mW^oJ{,,M7!XB!ɥr؅Ϣ~UjoVojSxРJ.9yO3kkf&ӑkal F 4\6OTn @ٛ-jU/=RJʭ?a[}|S3RƼd[X[`ik*v'= 6) DvGp/A7+1Wt, 02Q2GKI,Z/U&<$&H7ggPϻ'UzRO #" {B5Y"7Q,nT+\0x0ʕìYx@@>_d *VD׮xk\GW/`f]e!HTctOgl$; ܹpv's4 06iC`H,< 7jVggdn߲Wc#.lָyf={MyU"%L̘UgDqi;sڬފ+99lḽa-J8}ubp$&1hc 5YŤw˴Jpy F07qޝ8tիcD[mq`WDݖmӷ|Ǐ~ Wi7iWݲ42W,PLcCk\fϟfM,];x^ lL$ ̖׿;Μĉ(sR*REnFؼR)vRټV_EQ8v AHLD` .]8~c#U+f jcQ\F}@ i+N:\l pTVkO) ,0L݈Jv=Y-Wvk]ܐmlUf3Io_?s[1k7n{Zn,') F B(PV~}J t36nDם $7JIS2NF V;<~AQ &-Z`2\׆ h r/ ߌKw0Jz+M5y2e ,&\\н;֯Ǐs={36Ν?>(¾F@rY#aDMB.] $@ [r:6h!S`>d/=XjZ~\B0Vuf柅(Xb Gs8{H&k֠" > wwR2|nZ q/;]w#44Qӧc1{6ƍX~g[/bY3+||pjSB8z_eKUH//,Xۛ.dN/ *G'sohUfIVj1MT}~aUxN< NbNHi[U"ꥅ͛1q*k`5k0|x.+.>;Rl ڄ27ST Z\&L-^M8u Na0$' ׬|}xQXHy&fݝ:2W )o0[8.&]i˲%7nlnU\=7@Ә;l6fGf!m*9Y2Ac)ս<Z)ߧPjM >V-K~aǕswKg|6G)EYI7?ksC ~7)"\ps0NmU/уX,>{;Gi[juna 3Q-jm[}+/C9Wsg=^ʎݏиeo,ܺn˙ݻcbW̴l!( xrCCn݇ 7t([g ԩveˢW ː#S0!8v`'y\QPN}&D PPw6Wyr/Zc[7AͿUcqO# 6goSj!ȳe=E#G:9ENѣ11 8Wcc|ɘ5 Ċ<.۷Q eJ=K9[v$RW\֌?3333P 4oN:={R@J{NN1l޿[uvY`' NF,`b'#p9&~( ܰ 24`Z*' H7?TwLО=`RiZ\=*'Ϳ 1"N"`PH[i!%E•+Jiii]7?r䅯_}23Kh}+**ۻyժJy \RbBK3"AV<[tȾPYjSORzUۅ0Hm6 U9^B0s[;l zN.Rc\ط, ?߆HLf4MB*ge6 dDDDMw E\l_|)/Xk֨j,[;JH…[Wŧ`׋OG(<աQQ#[VIXW՘=I ]  }`LB|ŸTzp7*0xΝ˖Ŝ9$жڈ? 5p8>ޫCxyСWORC7"=]QcqQ<,hU#ޗ Kbbo<8e*ur26o.T``#ߞ- O -B*Ѫƪ4֮Ue.t~P}YeC=| 64K>ݻQ.]Z͛7߮_ u"Q/ aCy:#gjj|"dɠf#kTiXO#p';i{ڊŢO(:vJHQQvM~N#лl6y$h Y"(X}7#]ѣGU!CgfH f''/0V  sofFtN_2_q&$Vb00t*0N|@(@JØ NHMAgT}e?}$}N$$\S.u }ύnѴl )UdkӇ9f:i訲:<`:cثʻ,C4$'5inAtrPHE ś7MCu*ũ|)Xw.蚜gϲ\tN]:OKBك噒%Uۅ_g @@L"yG BǢB!!63*@ A'  ߛQxp*X|V|_!vYԫ.:WJ?aZt3yI4 06ahA$  $l~OQ*@X$UD@yб-׼d٨IvnkM!İ@hQXpyֱt4,}e0KfY~C >|8TkܳI.گ4233|~Y@o@fN}71|>@Ŋj+2ڂakB"ȓH(D"ؼ~=tg 5jtׂ(ꅱ[in#MqpKLjBKlf  k@QsllqRټ54&DEE+PPx`* (QDaaaUTj_Jb9`?0>P4'P;ᗒ#z!š"U&Req^O!,jp Q5idD]UL"cbblE"Ե(h"ښ\^B a_p# 90PTE ŕcATpթVM TE E7=|爋/꛴O1 L֧@ \l( ,&C*)EOmO*U3]Ȝ ^UHX~bcQ&';"oq(m ~ZhcˈB1XEuiMcUzjSc[{Y6lrAl : p8mjZ?m)bUbc'ˎ&.VΖG:3aWf! CpQS f+b;fpC+2=nAji8gee΅ ]h&J ~Gf]]G=(ѱ|:me[K{lr#B. OBZ&z0+EL7XPCmWޕM56 n69 Moc[K} !QMdw|IBP(=2YFUu=OZԖ=RcOAhB' p-0HL[GPgX($[bKJ |1" .ާ!TssgUz='ɜh2Ob204t~\2,7xXuFY(u颪whhhč\]:\md4 l$Hc6%H#gD)y?Ohxt5EDž( ;)fF Y$H&ێ<\kB*ѪnwJgdߴ#!S}|h(C/S-\28mʶj{S'L8ba# KˁÇyyFk\0w 9e[Ǫi@S\A@K8KU^@Jn߸_~SҿnZ!{wO7b(KS yehUٹse)Z KKH^7o*|}Up[Iа,F : rڪb,Z$Ke{ `Ū,X9лl6 tM:@ꏰdSU>mDiX:XUj*zBgHx "o_%Ͷt~ǿPp_}7#ݢLn\:lɒ!?065|x;y lެ|5t܂_/q b*{ݽY&cǎ51dff}|נuQPTw#= ~yXVa\K"!c,8(WjoWhQ7lz~~\+˫)=ͳԱgb"kwV PLmmfTˮӳ[QyU ݻ4Λg;t3E1j$yQJ -WۢǏ9h7inAtrӠhY E)οF8siu#p "pCOȵEH~ .<*Ņ{aw员(϶igWslՓ'o{`N}yp3՛nai"ưa%wv#119.cP%L0<+*wo:}Cow)EFY[{-Z_bJ$\լO05)3 vy@JewCPXX`U%woA,, 33QҋU `CZYb]tQ+U#G0Yk„uAA]]GGi!a80bz:]S)X4q MQWNN05iC`H,O<^P~Tѣ AA Ϟ}qhvV tqSKV˗o>GII:g( 4L6om,^D3k ލ_~āPDlѲeWϝ>=ȑ |pʔ1b? L˵{w.6669A߾rf}ex^~~a2}!@#z!nl6ΜAV{W^rjҥ2 V^QVKDKFRz[5UsB>OP| q3׾0+p\.м9^\ձjzVֲR,gb­[@ѧ5ŋL|~G/ XRy~y4PTWvuhTPIDAT,+>< ¯X8@Eugix{+`X?Kx}лlm7Trz=H),@Ŋh7oHhթ=cR<"IhjK0fL»w◵{"SRocuP,5 G(J4e3ٖ `!Ҧb~#6fp3r /B)+X2X,,YM1l!>88$ƺQVW"KV+ѻ{9Yذ-Z`XqjdD7d8*Ks06FŊAa۷JZ9pp-6HHg¯co|B->{!FE>y8w!ǐ!@cw<~R{vEHј9?´) w'ٹ#蛁ӟٛst2* Hx9RJκ[jDxx SNs2EUja^-HLJ\4$ZdX$05UԎd 3Ҿ(5,~@L8qMnjGǢj ;tWiEb=*ǀkn('#?`&[$)His! )yuęH~C"(0B_ƀɑJK֨İ`MBE|j|Ҿ,)\(=l6y8-hOA J=w2j ǂ$X^WF{j8K؄B~#c=^upX@xANRY 7MCu] UX8K饪T擶B%= 챘ֺT"Ţ@ڻ9b`ez;˷r&>S=P6bae:CyѠRC{{XG6-,;; (\a/0́gT* OC4-bPbRt}ɫfL M?euzHsIYtFUf[PA챎S;G u [H%8:q9hQBTL!8R)^čx>@, )C ݀5nV@z0.2v #'zзRN)0yJ$ƭ[x> <)),P!e+;\ki搻|T*?U3Ox[OK.ǎ6$t$l^*r_ns *+G 8ټ3 4~8F@ mF~?s'BaFe~~J!;adn0`5L|(; waXeK _®]8ryi$'?0(}}nJ0hחUɶ`.QuǏgAR 80:XY̗]yX&I>CF X%h 3 ""^a< 4,@ +ػ'C/E[X@EЭÇXσִ͸ܛh`޸%mh(~Gl.UڑM,Xk4 up 2oS]@ gMcj9sРy t '0(L4q0KBAir@ 4̙!zWRSVE `EXV{0a˥:pFͨ6L&&btޝ6|gՎcN3X̻?IKF ۖnW6D]+X2G12BIT6M@赮,^K|Q+Wp*<=|9lmɃI]c.\jٱT.ϯ 8H'\@ 'Oл7>}]^":wFFi_0hX/ؙ:Z<فk0@:aZ ӝu:Cwj@ǎhիvɢSQiSƜgZW)͢폣HSEn@ؿ^^VX ڵ%_/hRt_"E[X~ #>^^[ßk`HԎ UsսݫSv<~dRcT,5k] HKMEϞ:Kǽ7ϝ sl_r6 c9WeH T;_] ǣM,Z~=/RfI6P0֒D =+Wb Vx,_b fBg}^L$?Ms_) &ШvB.HBooLC >`,W01@5caIOsL-(/հb0|8j˗cȑX@`QħOX{8uv2r[B ظaaW4f ;_N[ǚ Alތ)STY.6`\?~floh,GgOPjꙘ`J>}9SdEB??ع]}%0[ e:r8WӺۦdԤʪiTغahM5q"nUemmw|W]h^UO xy04/y` @ ʅ 7N-Q/JeP(\}Γ'^UDVtÀJΔvaaL`gg'oV6>E׮ c$-=9ɜp2ʕSdffܲ堿qrvQSHMAoӦq##;j5s8 E<ƍ9:tÇ-[`ks -ʕ9F \T)))K7nN(o\yժtLEPjӦnn۶!#4?umދgddȠU3k}ZOOCݺ y˗խ?4I޻׶V-5kU'&nn`~@  QBPʔRSSxxL ~=kxh [bI;vysk*UZyhLUs0h+DOO!=.\,WMiiQ7of{yPSW=bnZem]ygϟ q USدҴ0+ ٪UqKZ@4Mku8n$'*`$Y255A;vTK%?mܸyIh\bEYbU % p8͋.U‰]ǎQn bSL@  ɚ59 Ai> J׭8n 2&*Y'N7Mͥ'/,g/3fr&f˖JS Ew`mTY5^T@W5Je2s覛6A($2gCt,@( /^ $DO.~-hNNvg˕CT#G4~]幪|yt,K>yqvCHתZgjjΜJ3m2bbNVBE@F͟T[89sTyu 5k.[Z T -kT^qqqu9ܧ*`f }WTK -,6z<۱nIqCS{nb͚Gu@qNN=g{PER%琷u0?|;.(ڣkfs.)y4xDP=9w.KթӞsMR) r YЙ3qR;u pPnefӼۼy$Ek77x3g~;TΡ*>cTBCMc%$&kun/[\}i="@Mh([c)Ed)JJ1xZVwu@:?V +UZm;hP͛W_R[D~*y+i(k'MX]dZJ;ѫ\V!Jϩ|4,Qyφ#:a@ wLdoz9[@KR ma>F^ZwbdVg1y>bn$@(4 ە=¹@hlI 81p &0ίQU{Rg]Qz|]H$ggn@;iH6vl#\<[nM(J^t\#Du]\dӅymf\ 7ע0J`U4ͷA~FFn^.kDGGJw1q3.tw= @D{DNVMG1'@ UVcdLrX *o%]LzKsin lfƬ;1ͺ6~dn8>iE_3QBl! ]R"&_̝[AdEA`|[ . 0. 33UicƘuzG<~@b^4XffJ|%TI*-#Tp ) ,QHtNlnt(9$tD2U ђ\^/M||~zv[sEQEq+2;–f؟~D`3#YZ"d@ fF"6H .g7 B mQ, dJ&? mPtbXZ`\5p`:cTQg ZKM.}Z7;,^Z6@*j)b5wWo۹}A r}Q] vSSeK(TFђrc4(`3jjj*ϨQ7W|쯝;݃}2HwWܣ ҌKz9Vtx7 ХUswb+Ū{9A"FYGV7eTaARQl".ix>?w-}@ @q D#Nћ\ЫC%ۚj +S<'pP2L!mZx%xd1B8Ps;Oʕg5!#X:kc k>lDa \}X2v{6MYCVhՍX$5v1q+.5yYN00|鉾 qr~7NZ }F wPK#GZP:u~rgg| +";Cfi5%og gs8gaaMk}%ʑm2_',Tʊ7W*|@ yߧs4ܭϽ>>ʑXfn5sK|ȳ;Sư'@ KŊ(дUlIvm5;I߾ڦ|ˀe_qqR.yXZ/ {rPlݸ7sY>T,WW6bVJ+nժ} :޽&<['@bB֘,+=Yƺpo۶/';9V, :?H|U"͛ YS'65k$sGrgOܹJ 32cjʕM@  *|hO:!Z1@ϟwv}J8yR[ip0 klcǺx(Lί]$8Ǫlzf&.\%[j;w.j|ZSe "UFr1$~D" So^Ӧ-86ongn4Tw/X>}%:i~[Qn_}}}#֬ѡj+S1#s :@ ww7w_(u!-bݺy'˳عSf/eeܽ{d.!!p@?Jl#Fg~ő#4޽5JoX)E1hsЍ?޼9zUpE@  QjneU:y4oj`'Tms}}Z4I*SJ}RE߾[)Jӧm>ͤ:P5d2rw/[VOlKJ*ҧUgMzkwoU3w" 4H5PÇXFXDmYtIZXH/$FV(x{+UjxΝ!e?,k>x_۶]\!7nya>WLoUv}y{k&>3$= ptJ8qB1c,&v^%0AՒ7nnA h}۶4AeRJ]WS,ƿBsh(WW~P%1l,P;k8E]4̼̿T꫼>}TtA:co޽U|{sm۲3{\EϞ۴۳"$<<͍id,g},BɺѣU#Q%&K5isqz=g-͐"MNReM`*p0ܿXҥMmT,XBG}w?1~aeqm;[پJѻj`OCC0PN#zV>m0l^ g իZX7`XݕʣHK jcBHxزEt^LnU˶M}F~,uxTc(jz8~<<}޽>cƽ/5m^ IlnPɓŹǻ(,FN E $5m׺5Vd<nxƋUI4ݰwΝY.C[Z֭+ao7OhUf (9ۢEM\Z5UR"zC#cCHaAm,[ UR{7jNE,_|y]*E<"<]49O%OVM //UmUH-Oء RrDΟy}dWiV-Z{ +WT!M U,bjf{gg܉rRJLLnfnS8x9S52VHz/vEpq/`A`+WpWM(]_"30 4t+/#i.5^P@a|tɕK GI\'' `U`Boг:%d$X3HYX0Ertε1 FB/͋ذ6A2c+?!)渿߫pOVi\!iAҼH+bElSӈ^m9XۈiTիUOǩic…'뗭MT|fׁnUlʉsv]ȱ1ٿV=pO|&͂X? Z7߈ d=\Ν6 _硧-οJb,yWID0 `2.{9u/7YfMܽk1{6>}JL! ü2~= (RGch}qc<|1oOɡv;n؅Wg[6WokLD LAr!9tcw>q议O)WFo}Sl}A772=*Pxj{h`A,ߓ[ I;f> !yàgOxݻѬY،RKJ_}8 s٧AJq^Z9 ׯi\\6D͚X4x<j!C^nXa_֬۷6X~ĩSxwÇeʠzueK21p9xԽOoj} κT=\.Q4A˖(_>wd]U{,0{$I&o껩*LmxCgT;oD Xj QLc!={t>m2 %J/_<LLPx@롦-^_3fƸ~1^Ug-p>Lb?:9 .x|Gj-^8˥wIR oG],L.^ zD~YInɦ'NvB[|v/ہO/2s2ǿm9dZP-1+* sFi٬Ѐ fMë^RȧK}DV7 ¦灹#m,,l#[߃n(,|ukV4jpݚ|~N/qwM9XcԔ;se'H(GI &793cyH4k6`]A(He(b Lba`;x.?"S6cHB Q =JHGBH^(GcYG#E ]|Q,觟5ǡA!$6!ie^~5x$k!@lqiY)\92FF< 8 | 25 嗀E )*/X` $[HYTYƾ&aǃ( ˅09ko ,-eoI``])!oE #_$?hXV,'m%C9HR7q*.]½{x 5Rͥx^h%`c!"?AjcBg #FOr M)ؔ\ӷڰfgnf̍s ^ pT+&0e,Y==n ooi?+$Dp AG*Sha; _+˧}-a6]BG򱆎LCG|> ].jU%li 1 $'!:{{L OҺ l8 TPM|c@Q-RpbċW9A<‚AmR>H*=^|x7}R!!+Wb T];BJE[E#ҚB CP.fHaa -Cp|Ǥij+^5hA\ ''^2Ia`4&ɧ!S zv_tB d&egc`l,_{]5JXޡO9Օt黋֖-^tAݻѸbe e#,[!3# քH,;Cn+dRiYT;:+Ų|ʕKWdnVA6kVם;7R޳G5:|TsHKxvts0Ȑ]T|$ߙ`ï_ ј5K;;[7&N-{ZZر-[Ɯ={RT(6lPX/^d/2xIuRM|-m 7.p.BG4n5k4_+e/]ѹߚ+6 ӤnJU _.˛n`Wv)onlڰ]mx/nkTR]IϓR6LvTٱ^qf,3u=]͆yhUoIiɒCt#%oIh/:[ -֫TJ6Gc%Go.};#r@ U֪aaaÆu:t(H짭{'Nz"ukbY&C``R&j"_6X\R+6#@(KGά>;\qv{zRponQ5^uԺ"7)?]w鋒_|rhK~W5Ӈw&2\+kzO`ǟ<1`BړҒ%xgԧ%6l*S;gf=ʐzuΙ0)(RRbjuL|"8p`K|f}&&ض{߾2f/ܱHDpA!ߝ̡⦡..IiɒCtH~,٧~GS5+W,qtѱKtu3)@ΜjТ|18Kgg6WvR2ܾcCe5/AA?{HtE-'TR͈&'~^;]߿˞}$Bu/}!8\ ?2}v)3f̌rFg>))-YR+KǛe8WÅp4U1!>(Q|رG'͊ *E"ܽtNB |א\&!E.̔^\\u΍%ȩe]kkOyv W ; Wn[=s̿Tp72]Ls ce*㗭՞  =',NPl[ 8jռa[Ekf>))-YR+KǛ݆4C(I=GSBOxaC'Hoz{Ιu}i[߭2]$b/seù/G%q8hXGd)<{jre&ija4DSaBH e7!Pmlȧ*T/$yW.x6ʥs>#6]3"S=,ɋ/ǰavS9s>;Dh2.ϰ>U&lW a'3WH0 tFHBB Le3(0atu| >ܝqn$$7G0쁰v'ME!Hbb+H¡>-xf?'AP I" -n(DBrG!UB ;VW$ŴV};߀;12̐/`V}jqO >~)HBroT0: 5~z c'B5ȸи }enH`$Ϥ`6)%RgGS r- I&Dq-CJ96! +xCג0{|u˖5}G2|p`e$թ|"+c1&`a7 ꖰy.tho5.+W8&h"B8% +Q!RH+F1M^bV5Zdh+]DTW k>Z}6ɖZW4b(~dfK7IՋUP]~ 2;;9 1E jc4 C )ĬQ>O;O+i{M+#(|>CR)peomXHpbYONNsIy @|:R(Xym@EaXSY/ry!0?\^YV#FOضiu$?quŶmСjԨU/_J̵$&!ӫW=g 盆kk6ɀOZ 1$4,)TN`. '0[˾&g&0#FO3;+X6o6mԩ]ׯέ,^l4pD}ϛ7ɃVzCR Ti@gʰmGH~ղ%Q ~Ο;77 mݻSV.?'Nzub+EVw:zAR)p*ުU{b 3x@Ԝ&&Zhc-%]ۯ[5=tugX]1>'f;sd~O/^{(R@ԯ9sT.QQ5sWNN%.M}رfŗ-kx5k+8B!:vĮ]CS{^!"tCj=&$(:?=^$@[Gw' "Y3ĨKFvj@RG\8w,u`\lL0:HA0f ޾Ū޺5g%JXlL&sΘ'+C>\o*n`,^'AҮ/0;4 { F㎒N}7wkZ&wSƵae;NqS|vQJv}'=OJ:\Z3ٵSezǭ|JiRN݆/2̢E[#j3cc!Ez*UѲ/MyfyrtttnJ~S4kC Q;|KMyͼDA(~eVs5yji̋&4$"ʲeC#\\p8umMR=^kXYPV+vr  ҢR吂MMዚM\yM-QOotgl8K޲:.ݾrl}VƟtj+.<ŌWOW]\;Rp (5ճHfԧ%6S-Ҕ{s쎪q򭛧9-TQ;KRG81Eɣ#a.nwǬji9 !bv II…̙GKHX&!)IS`MCvYE 5m,cXdI*Wװῳ}¶mEb0|Z kx"{7 Iᠦ?ֆmsf-`|alZJ^^_q_RHBяt7CP aT~+BzgIhDwټW63 UGi_,I*KVy.T7T5$—\=l$3@Ղa=]y0/I63P;*:zn] sN~GQWloUZX kgr wH)z}jXUZoM+@6|tTn\PM Z\ t^]0k32 fzMҔg{y/US|ށwz*CXh7qLJTyQFo)h"B8T0P Ldذ! Ĉ)69913uP8lIJgc2ɷ{}=32z¬}iey^;=T_/8*I"IoI5I4W~N?Y2~Y[64x8AѲm&\@^ uZ1Qa3.#RMO?:ݭ.k&g3B"o۴gDBE{:Is)obPR:%vf-sSHC|~.O:}CR 5m|vqOKIxkPB  O?jth|Y-`N1Xx HXs" .he;?zؙʁg乛0ûoct^ Fq%[Qð2 [<'cxn !eYpA/BcBHa2Ea|/ v 0ʖ0&xH :[ R 0Z( IBm,B)4jآϣ>"Nqx\疟v ҋA}D2BDFY `Q(X-7jҔNB~M8lisylBd)"@<'kEt bϟh1?V ~7s )"oQ#"NܽW`\<>-0ç ؠzu4iQL¨; jx }q{z$g&1X`XRd8x6iHSMc6Zr2=ógر xygOcNT!H AQkE @! bv١Kn` y8N_G&si6r(_K"%EN Rj`‰cBHDpgxW )\ʜ6X/;fHuf W!;,DivRXeu\6_]):!kbHi|ڴ+4vGq ?bcU=}ƍ1e IH9$@0g„ʞNic}VZY݊<|I&TGI>+W ,YF_?lũ۶Ht숎|9oG` BC`ܻ]W|ZxrСWIE!@JѣGWʐ>CH,qGE $$(AB|RhQB .E߾ػWR"n̙}z~ta͛NDut0z4>D:WѾ=bѐ 52˯gA%o3gb6jƸ}5j(޿߯CK}Ev.x0X`OϦ{=yDNrF. 0hHG lG7HF0!t ӧVp~x=Ou9r+,lALL_fMxq{6m>}î]K{F!-.ǐ8B*fn)6RTqe*K9O')%$JJB>#W ɓV5jqwLQ8ɮuٷOb&xx(]޹qTǐT)>Vii/#"^1?5ˮ7&K9O')%$ ǏeSS ##ڱ}3eφo8pru+ʖW}xG lO I?XX äuqo<]U<3]Is]x-xn-|S.㾈wKd2ܦ?*w񗄰Φ'xDZo3 "1yݶ5kE/Ieu|r+K9O')%$_ZD5֨ Wݻ)}0A\`ӏOu"Q#tdzm׽B7 nW1'h>tsW kG37/[toōLK YES[HPUuuݦv}<|vXXL޽R~%yRq_/Ra yl nj5 ? q9vlޭ[Y %3LMDF 6{oT6V%bzkJUC8 T-o'22T$>Gޥ>0\UJ yq33EP}j[U>spyzQVrNSȃeKHhrݺhXx*(Ç@Ǩ +VWO/*pAwg!B6[9%?d\ p]2,Z*cS?z #޹h{efeC|9:pr+K9O')%$ٿ_%eٙ#GN OHXu8ӼQ3䁐*ߴ*sOIV 9HLՉ=Q)2C##PƟvPyocD跳{RVw{ܯj9ɭ,<˖%Ν勷o߮w@?N; )M+VIm$%QUB.ߴn= Ν?W_R_K5K*7h4mz<=ż=*ɭ,<˖|[|XZEO.2HuHJ:q~$uttm1OHa_2$`djM;hr5]8O՞ӥVä&)WbF7 ~jOro~*Ra yl W^(z0ԍvo7(fV? ӏ󶘁MxK€*XUNܨBHaoXB2խq"|lmx=7SM8 >W(ƴE `Yg !N!¬Z\1YBP SEA 9jcBH ]PVgfU2 ) Hտ">jryVˬn08!FLvg)o/=KYդhU>k4q7<)=z4!E\5'W5DJ2dRptA0ͧ);! pWY<үU&@1G|{SKK RQB JNzs6TīWڧSynh/;?@Fq8SfL\msS1~F =/'^|ryNgUKˍ_(յCu~]jr"dI/X~_u~{ D"+>Q6)䨍E!K&={VZ[ori_J+5<^&;QME}0pj GUA 9jcBHҵjyalƪUS#>=m=>Ĥݹ?{ymڤѥ !"iS(X qE'z8:^U]8cAdd%-k;1NTй3!"Ő!ɓ"_\kobsV.'/ :Љ7л7ͩ6!8X//ZYY-?peD[}^ի_X[5‰\O5@4>և޽!J,Y7!S`ĉpu#;pm.+f=叅E,[VcwH+||T1 BdɒΝ^zN͛2ePB5|86nDHׯ^oҥ~_(e.=GWwwmMڸ֭`$tHEE@eO\899}5;KNN633r'KW;wBgׯѪbbk+WpFŋkMI q8}0lh]K"!+*kWܺ0ش Te?VժUXe,z捻{^!ptҥ{Ok|>ԩBBގٸlٮE>`XZ/]-C ZKвjENR;Wyxx8pjժY˗/p=\:G@ -U>{Ze1djj:90pr`۷o][R> B"c/lR6mԥF+trBnH/?),KX1LŠeu+@~>\5V"Ò{} uT!X\.4!d6AӦMCCC39&&N_2;F͚}ule2JJ㺕M]"ʣE QH_W_P<q77[U7'(koehhe?ws~V:9L!ĉ8vL^Bb", 49y[IexGw ;u()Q6m"DlJ*e`` 39222b,{o&Fo rlГ {%!,&ާqn4vûxa1u^O~ͷ?tы>wOo-/ '[0:BmCt&eq:&NDzX0?&{fMʨ˞ohˋʕMe߄Th-R͛3.nnnI]t`yҏ~~ї'|+8fU(wn֬YᲢE!yaYRI%X\K;it(2qdG=*3BD'WR)Y|, **YfT1vEY%UP$ro8Rd0_)K#I8 LuL.wǘ)&53E6:P6meֻBrQB2<\[2_C)!3 #8Gl"D\kcq\gggϞ)RH"T7e+z_-Q7>ʧr#$8;+!ucY~~-(3qhFBBL'wk<ߌN!LZTs?}p(aG 60^ݡ{;57:14C yThCE@!)~"6oƆ {7upq `ƀ06$$C*B)DRRp!ʖŠAX/aGZ8>TMw0/^`8))SDeIvtB ѿ?BB|~Nsk׆ jV^ec" [` xxP "B@&h8} Æi?A \/zǣL4~v0r$t QXRЉHI۽relقϟt)7"ܺ2K~6ܹxIbh? U.Ο?;L!JJQcbDz?F۶ truQ>oeeuVTJK!8zTڮ\AŊʀtTMjOݯvvt^UK/)V AAQܺ޽ I-wXǎӳejfBt.TvW̊={61#8 ˉ~~JhĈog>Pmդ vV>zSǎg8xVLM |U]w iXrvvv[ndTʄMoEb.s߬۶ԩaǏ d-M{  P0 ֭bU$¨QT娍uq_Xr+WM,a@BH.1cUP|ٳ~vEy217Qu7[ɑ#qj䤍u KKK-qx<^JmFw!ϙ?_5V˖AkRb;^];M9sAX'Np8Xr<bŊ۶mc+BNtakd^YO$y S c4300+uƜgVNrB $lެZ1C/˲cLLr /rFD"Ev8>|z $;m czzz#:-vQa/;`_қƟ82"SzXI5<'9!G >^ܨԑ/=xK:͝XZF=_DT)UwŮ]Td**T}Hw{rje-yzeyyf`xn-|S.㾈9F QH_W_PXil@wsCU~sfknX8ptȒ^'^mRx:Z+J'~~={c>L@HX'OP4, _\;vd%$-x#L\-_>^٠'%2{n;KBXHgӓMOطhw˜cnq{6H$O>3W^<5fBe@5V|*(Gw1Iɩ'˗Mkk]6 z-qz\R.8儐BK4o`rU! 6)g)nEБnR2L^J3!!Nb{xx?~<{,DڳgO';aldě) [ hz7G +WZ^Èw.F\9MɆԸw{V5 `݇?3N9!zhmv/W7j3l}꠩߾bz"OIximL<uYIRhe!LÞ 9l`=%1[-["2gW'tG ÞBnA潍9Hmyr~[yn;=HJxy('R11p{r,{CQ:6:ȣl&MA xy :ښguؤow|^IEmM_ꋕ]^kvf Uf_Q- 7=+Gr&#<0y{dy{TvRʗ77HUs R%$ds@$譏5Ol&|ߐŠaYMhHYR>Sa sEHν />t^fi[eۦU>#^z:<_B`ng>ŗyuP )8 P+i xVع3 !Y=E ,B 8_ӯ`k jŏl&(PmBN''kO ,B~Nq/IuxQ:6>^ѳ+,|c𩀩OĢWciَ(y Y1s6&Y"yYDcF"SqR%#i-[nn!T /~jX[%mɊ׭)xP\")trį,j`O=K+Q)>UO`b6m<)-wwNC''ke RN(u,j`B_SV޾Żw.m9x0fwD\X62BZyr$ofSB&Tw* \r9!!-]kڴbDFKUA l yxxj`BZ޼RE'53gZ~f^秺_kWBr1B ,L`cXHkkدXXĉ\ s~iV~.^T,cGB=D )]S& #T'#9Y}ӧLVZVʲj2~hp,B@m,Rq ;:VXm\.})H׿?Q,G~`bbrj˖b{Ç'cѱsw˶mիe==R jc[Q֬@HA`dTOgPgggwjժ;3wf'P6mvLTٳ>4Æ"2e`nnqsss* Rp@9rUJ <=|ӧgailY>}ί]B.al,:uB\bjcBHA`vUϟ憰0Zҥ>=ں+2hK*1ukqTn2=.?J,HRCX?EMR nݰ`~EXRhb2<,ciYgl\s;m:z&1i\\8 N:|)F@ QV !47cN'P:!!;nrf6`M: !0W˗er'PFŊ5 f!$.]| 3ȧ40)ԁ-jք+a ɂ4 3쟮RzA2BAx a;Nn;'!*!P,B),Cr B-$`e$)@ اhP-eYTF$9k,m^Bȯ 3[5bH 1]zEͤRDz03[b<&!٣"={ғ!E,Q1|amÆDTd X0\n 1d"#, .}:BT} //=իadDLȯm'M}YH>#XB:u™3iׇ?ZBUaoѣqfFP;N@ѢT̄zB!Rl,5Ý;++,X=;  eW:Nnڱ-5ڑQΉ3;9VV[_%T*|۴V fI+R ۷kM EO}Fկ]Jmaet%sz(X>>+ِM#,'e{sfd,V&<}r1ԧu'n[?'zHL᧩K~f7x0p#CvjZf#ZEm7޸sCqzj&k$&E*סFlz;naOթ?:s@zPF^n}aoݽzp΍9y(] >9wN3`Ot)$5)>j;֨TNn:UݤC{S^]JiRN݆/ȤIQ>$~,`cps{`׆7nĻxM5vk1ezvo.MzkoOkFj[QsSMfL[xU1.d8Y̓4N-5~nM6[/35YiJ5jh&g!xC@* λs 5WԖM3ۻT3>vYʏQF)ڻ]$'EXZϢ_9vj(pof3Jc[s ՗U\:.ch >ߢ>< +ǐ0 IDAT`vdZJMUzAɯ *t ,jX-/;!wgiy5Ӈw<[ն<]w鋒_|rhK~WsVͮXq~3ҽ=6Da oZ#]2y͝췤cNUmj9zw|lzVl5GPqLAKkDigv+5#a.nw, kSp˴ݶ~=a2+}Uy-)u}vG8xSܜaD=`\rufsL- EUEy5]EjGM>oj^αyfM;kѫnP\?G =Kʗ KtFMK RK&:MUɯ *ݯ ,}o\֩Wck&ǰxblp6^4تE_-PwƥL4ujh,f, ݩmoc/xk2۵8.3. [VUokvjJ_-i\Tm_(%/g 7:(3.nl)7)u& <>`kS}i&;X™}Jgx,Y=]GIf,Qdfd{{נ_lʨs\dWTUEy5]tԔע*l=(p:k z0ϻIIDVLW񇆫[T&QhҥdNSgxk,]? v}iXE?>7gݐB17l3k6 +p` z(SS*OG߬ӧ># W..e{kqɮX5AKO:;i#lc@d^JSkji:ҔE TY5e@S to%7RÂk_-ZGQˁdKRK&:MU!MU>R>Kx޿2o#2kUFs5\vXsT&zohmG4]ug!]hɯYؕ)?*R6Mm~kz;D&>2>c5!KD^_?g&K$'"~f{Wӟ,y5M3>Zx,E#f#cە2jX(ӊFfI&K'9I'o2tt)Pvʘ bc?v76M8fݩ[Hn&ETŚZ"hpq  '9iD毱2<>&'~^;nF&]F9tћh0{vM/]VKm_/^4Uq4UA"! ,yw564x8AѲm&ఢuz1βl7'f{ i^Pa3.3Zy1dGlkMД:6u;mÕRzO\Ifnѽ֏mKg̗.ǥ6LVu8C KW}P M6jҔٸvM䗭|zZ@gaP[<QZ>.UqW1|ԫ6KB~ǯ兿^s} ߯S{ Tſvo`AB+M)d!H}:.˩i뤚T T$K2SA)Nr*&ِm,e eqB^B7!OWo-IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/trace_bw_heatmap.png000066400000000000000000000134371507764646700253130ustar00rootroot00000000000000PNG  IHDR,PLTE@Ai @0`@ԥ**@333MMMfff22U݂d"".Wp͇PErz挽k ܠ ݠݐP@Uk/@@``@@`pͷ|@ }}}{{{zzzxxxwwwuuusssrrrpppooommmkkkjjjhhhgggeeecccbbb```___]]][[[ZZZXXXWWWUUUSSSRRRPPPOOOMMMKKKJJJHHHGGGEEECCCBBB@@@???===;;;:::888777555333222000///---+++***(((&&&%%%###"""  eC pHYs+IDATxYqsQ"E( "AE$ EQQDAP(el?{sdzǿ붻 """""""""""""""/;#|*~Tem̬?>>`t5UVOeftS)sJVS飩- oUiJ)\8 ..?ph1+?@ߓ* e_3`f?U M+0̺ M[NϳMoӁd0\+,f40i. c@& L I @&_R`Is30i`\!@ L40i. c@& L R`Is30i`\ c@&m@-/7+o kk.w˯ĸa4R{}ait9{.zrhq|f _}gJ7,$fОNBhr9 Su]vנ9g`'!W0-[Y֧MW$Yo:s pڙ7Um`Z `m5UeX喝5(~0UOR ^ Pk p|J)ZKz̹ڄ0ʘ솭h=^$%fBؽ`7l9 9W.Kpګ6}8O#8 9[i7<{➄^YS):?J<USR!=;-%_Z3%OA֙ 0h(<;-%@_lS`R%'!S`A\93%E ROBZ]k:SRGqhgA)W9Ii3i3 Q)0f w`@ #R`@ LOR`@ R`@ >)0fD;4Hj7v7xgp_\ˮ>b"/ U tZL5vS<Pjp}-:soݔbT/ pD8ݧpq)n˿aq\[Wٴ)\"2Xpfo8!OGVlGhOB$+`x9>i_x Og`w$$j|1-`9~hSb.&ڞp\v_ `3Vt*.g] O畛]߰e5aSZ/@oCbxbSe.OcOw#t㵿/{RNê#6E3lKݮ}:s LO{OOO;4wH1 +K1 `}o c|LI4eS ˿afJULI6fJ: .FU}[u̔t@\A_mq'LI{%;]'k`Ƚ@8S3%Em 2SR^ H v1gJzs|1zw7W8.2LIo/PW< o c@pW< o c@pW< o c@pW< H1 +0i< ZJ?cwvG@6zA 6z3R3 0l as~0l ag RgJOT/`Z)qUҙb~u &M?v[aMܨ%))`\_#%!+FJfJ ]GJfJ 9EyFLIp<ø)%0l˙&͔4zbe`\ZߺTk)E6o?;nu̔tܢl 𫥞T2Sq0l;j)e F`ֹJ 6z3 0l.< 0l as~0l ag ˥8ag =`2)`@6J(May)f]/o3w|0\_"`3ift=:2ሯne :ǝ2G::tgw}ߌxn5nMWc@v'N@g X߯/8 9cϔt?UqĹXj7@IBng+}_:o0_v1IoEN) [8kķ2SpXb̔ݹPJ^eLI6f:d$m2A3%pg.@0S0Sw)aVW̔mXݞe$`sLIܝ V뷟Ej) -f=`\'y =`@6Jp@6z?G 6z3R3 0l.ϒ< 0l ag 38ag =`\!y =`t)`@6z?M  s!%Yᷠ|RR u uy:H>u'HYkrµvB/0j)Ms!Q-|T}ٷ>+>nywek|GEiS:1\'8>l[X,wÊz ?:K+,ߋxT7Pշ(ӈ/.{"Nj[['bוP(ugaWǀוe!P7z6vD4ga78 U.-{Tm`yEck<^^qҲb1_--w,/v #r~PO#{f"eaqU3RlLWA-҅ˋH- M쫻~r9?X~魆7O aC#"""""""""""""""""?ʯ]ʣPIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/trace_recv_use.png000066400000000000000000001373431507764646700250220ustar00rootroot00000000000000PNG  IHDRX%sBIT|d pHYsaa?i9tEXtSoftwarematplotlib version 3.0.2, http://matplotlib.org/8; IDATxy\Te?(09*&$ZITjW-K5 7l1S4on7V%KTL-MPC$D9Gfp@_czsϙaμ\:2!ժ1A'""""""LDd2Hu0rZ933z?}յ}C&uVtd57o<<XZZbǎ듈HM]&55Uprr{˯U**mۖ+h~+WԚl߾M41|PTXt)Ѡ~ ԩS1m4+0111XDD$[~WbȑȈ0A<==]a:urC!=-_x wxx8,XoC5xDD[!`d4&pS޽{xwѱcG( X[[;w,WW&aXf ڴi\lٲEݻwgggؼyFT055EN_7%%ݻw)nȑ#rJ)vC=,ѡbL&æM0c h Ann.n߾q5jܹѧVBǎaff+++ 8cUz*^{54i Æ ͛7ꫯƍ/2N<4ڵX_HHd2~L&w}'`hժLLLXhX`ڶm \MbԨQ HNN?333m_|cUff&d2.] rT*hbb-[wAaazʰb uDnݐ ]v?_a{ŀЪU+Ǐ/w92gbȐ!P(h֬F* olll`nn@gKD_sICTB.õkפ:Uy߿֭СC5Ξ_>K,Ř9s&P(ꫯƍ駟:޽+i֬իWjˣ[ʰ`I.///,_cʴi.},++ҥKRĈ#4>߁-#LMM퍽{~~~R=1dxwѲeKr\x7oĉsss(J?F_ٯ^k.t fffpwwǮ]<8qҥK Uǩ>L&aÆ:!//QQQhٲ%߿}^CBBFQ?)) ǼyиqcZ C `t 8s nݺ%g D׮]zj( lٲo޽+Ν;>} hԨVZM6=v{(,,ķ~#GH-ZݬYDdff"**J:`8y$f͚ |R#11SL%KyNBf#<<gϞ{gsѣ066,ZsQ0gܿ|^|E;v nK/oׯ_G-PRR {ŠA탑tA.]РA̝;...8r,XL$$$xp0`~gL>BLL 333)SNw̙3ѬY3?1cꊞ={>v}hӦ -[&MݻwѫW/\v f͂Ξ=sطod2?flذcƌy`bb~MJ`Gয়~K/${y;vL=ͥKヱcBP 33}zӧOK믿7xcƌӧ ҏB_ܹsѹsg/ z쾪BXr%5k߿oߖU}Qܺu Z_r%rJwEHHv ccc|BTTƎ+~__|KKK_HNNѨQ#???DGG#??C@}|Wdҥx1g8tرc+V`۶m&Lɓ'#88xpo̞=qqq7n^{5\zcǎEqq1ڴiS.h`hРJ|LL 7o;w`Ï?Oٯv)DGGcP(khXhd2f̘`\|Y{ Q!NJHH>6lQQ\WII(..cƌ:ux033999۶m+\]]2OOOZam۶:uET!oO3i$ؿ BBB4;2eFyhh9"?PիW>}zbԩ7nĆ B\rEzo͛7~_xQB>|XӧOR} ___iy\deeioٲe8{B͛7 b֭?.UVIeTcǏ` qe@k<'4h ?Q ~Bq!@̞=[g?ŢYfbСӧO&&&0·b%;wJϩK.h3qDajj*ʄB޽[˗/רpB@T3ѿB bǎ:U%Kq!Сy | Q_wZPP 9--b޽ؽ{cURG;V|>##C'N(?z f͚%"//OrohSK*SCzmWӧxW~ `?k׮IeiiihѢ(,,w!LTq{Ǐx=zt0221֭[ruq6aÆx7pEiHT.]{n̜9/JJJ+ׯ… igM Xvwwׯ\y^^4}׮]d6l65o:t6\~Q`oÆ ˗q%gwi>s{A%BBB4u͛7/cǎ -M6k߿Y]v;vԈ_{nItÆ ömۤ楥X~=  qaoo/9::ֿk,{yy޽{0>{Ju_C +++̘1Wƹsթ>;;2L:SW^yEc{EpS&&&7n +J_UaDD5AǏGҥ N:'T*}# ߥK2â" ?#;кw꺷nݒ Bcǎ5kڵk;;;H@T}X8O}ATbҤIpqqtmLMMX M7ooo( EGGcٲeHIIAPPlllЧOVvWR%]/i[G} &k׮غu+RRRpqj}jOT_І 쌯Jxpm6999:BƍXJgCBBpyLYttYܤuVg]bkk LY *oII nݺ%_oOO>Xn;GbΜ9޽{c޽ʂ9u릱^^^XpuIlllǭ]5[[[lN6mRT8?t還? C@@@9sNBbb"¤/iZؔ{_]Gε%۷ǖ-[ @zz:1o̚5 /2^qQcѪU+粳Cu=Ɂh=혼a!>>^Lx)$`bbauw5>KKKW_ť<Ց#GbȐ!p޽ 777҈m޼yػw/|}}1e޽{?իWUVx7qF+xѥKڵkؿ? W_}:w9n݊={bԩBYY\={wE׮]/bX`rss \'OQF9dDFFbȐ!(***w6m۶ fΜ !wa޽Omٳ'OBx{{_~j߹sg!** %%%qazvªU֭[Cm۶!??}Pxyy=b駟Я_?888޽{h/PmcchDDuAHH<==퍦M"++ | s˗/GXX7777+V@ $nooSx0$<22qqq«k׮!66-ZиN"?>bbbЫW/\p̓Dڛ*Y@׮]+6ŋ]]VMaĤIĪU066m۶7nԨ7sL-\.[SN-7SRƢywb~ѭ[7!EŴi,EEEbرiӦB&i57|u>:zݼyS/]v7fffE1BVz}'N!!!\XXX!Crw!E&M\.bb߾}zmBL:U .Xs='r޼ySL2E8;; cccamm-^x1{lq^qqXlС055m۶b?9::~W^j>;wĜ9s011 Bo^L:UcR OOOʭsС޽{=ܹso߾BXYYA+Wq]{J۬bR4jHW?^Y܅I&iӦ"""B|?^ 2D333P(D.]Dbbb=^!^|E+hzm;=rxWFիLeeeѱ]jjp?P [[[abb"Ę1cDffzhРg|iiXdhӦ066bذaիĂ DVv:tИ]DEE-[ SSSϋ;v0mgz?i^} dB<憌Td4i>"[no, D@@Ξ=+ '"]|m۶ELL f͚U=?㘠QMB/?_ꊵk_"ԩSؼy3|}}ѤI\pK.J™3gtND5נQdXv-PVV5U?W^ƍ#55֭C~~> pB&DgЉf`NDDDDDDT0A'""""""O,eeeΆd2YmCDD!n߾ ;;;̀_NDD>:AΆ}mADDD^VZvuDDTUt|t v@&Mj9"""T*K1;Gߟ]=I&<QNDDYEw^FDDDDDDT0A'""""""LЉ&DDDDDDDut"""""": :QTm <:7u>s*"ψ:A4i&MwﮰMSSShÇGva."""""""C3HުU+,^HMME޽1`={Vg|׸pn݊K.a 2=$$Dcy…GJJ ڵkԩS;::b̙ Eqq1k4^"""""""C35襥oPXXaƍ09/**BQQR/!lӧOrؾ};<<<*l3c 4n666r vYa8( aoo_@DDDDDDTc !-- )))0ap9\z}pQ$''o߾Z뛙a۶mӧ0zhxzzNDDTϕ`Μ9pvvZny桬C#""U9nݺJo߾=~^vBm{mK,ի_]vHMMŨQP(ovxDDD֮A'""gӑ#G0`͛7#55##""]L hs'nʞt}u%;\_UxlWU]=z`ѦM:u '|~QQ$*PAx#33Ю];̝;AAA:?2e nv9 IDATNJ+`iYs #ռ3fm۶EÆ QZZ bȐ!Z!66Q!9^ky~vdVZaHMMEjj*zٳ: :iiiHNNFrr20|pCKDDD5諯† i&o/l2|ZGGG@z\zAΠh,/\HIIAv@rr2RRRеkWڵk .aQ 6mfΜ7|arr9BDDAXii)lقB4?r @nݠP(믿*T"""wE_A6l۬3`ĝ>}>>>wͱ}vxxxhRY\T"''GgD … vɓ裏0zV Ґ & ,, ΝCxx8ͥL&+!r8( aoo_#BDDDOnŊ8p &NwwwDEEa?~mFDDT vooo?~˗/Qy-7oYf:FddR1OtVYUkABJpvرcҥ ѣ(((ur"""""" Ϛ5 AAA۷e8pZ뻻#00o֬Y7n9;= b~: }lqFL2>3DDDDDDDD'BT*( I&^xiZ33p$5GNDDDDDDD1A'"""""" r z||<㑙 h׮Ν m.\iii011A~~!Bv|97hufC}]'}j2xoa3ZŋT ٳgu>  &"D""""""Ze3!!! .D||}>>>wͱ}vxxxTkEEE(**U*Ulw777!%%&L@XXΝ;pKB=Q2tqq,_GTTTHiYR1I'"""""zנ !PTTR RY-ղ."""""""C2H>k,oƖ-[p$''lsʕ+(--EZZյCቈ$蹹>|8_B///$''o߾:̝;_~ܩS'W!L!j;RP(PPP&Mv8DDDzb?DDOk-\}_ŝtcNDDDDDDTh\TfffP]-u]ۤKeU{}kKu-65/=44Re2nݺz񈏏Gff&]v;w.O?!''vvv6lfϞ JHDDDDDDT=''G[XXUUVXx1\]]_~% 'O]v?eeeXf \]]q[(,,IJe꓈>H*5\}ذaz͞pB#%%Ek@iuָp㙠SI#AOHHTJwXZZoѻ]AA+STT"iYRU:>""""""`YO> sssrcЫKb WX/.. BzWGDDDDDDD55jd:O iii֭[O?ņ zwh@ 4cǎhDFFJ*I: :;j,#-- gΜAXXX;211&DZ|r̟?QQQZdgg>>>ۇ\.\.tlDDDDDDDMgk-˝~BATj5?^x$$$A'""""""28 .Æ C.]*5Y{{{ܾ}~vv6e˖͛s͛7lDDDDDDDu^#GԴRmrss1|p\~ ^^^HNNF߾}߳g.^/UV !*2Q':2^{McYׯ#55bbb `UT*( uv""ǯq=}f~Й;88\yII Zl !=zSv.]Š+STT"iYR=qDDDDDDDsK"""P*=55ovO<ؼyTssslbРA;vuAPH\'ODDDDDDTdB}? wEII hWщ,"";vC,߸qCllk׮HLLD'v޾oV6kDTT* E9~5?DDOk-\}_:'ؾ};8RR,?^x$$$T\.\. EgVmL4 6mΝ;aaaB6󃃃-[7oJ5o޼b#"""""" &k֬^~eٳ7nw(OHHȑ#ٳg.^/UVODDDDDDToi͛ Err2c4mڴ6^FDD_!"z]}!ddds֭t邌 :t&""""""zV~Bvv6L={<@""""""gA=&&%%%ݻ7>~7nn$m6?fffŒ%KVa#-- 7n܀^z%,YvvTٵ~['ؓmȓF qW~DDDD4z߲O?ر#^u|U4iRRRw^ _ .`֭tXX"ŧLL}ѣGYr""Ď;#GI`Yt'$۷o~J5^vB.~:=֭[ {$DD :Qu¾}0v EGG@z\z@^g׬Yf͚t,DDD IHHRD~*I`YW>tК㩒\_>!ԗ}ST{7_+"NeeeHHH@XX:BDD8">j,ɐ-[VX/..۶maff___,Ynnn0e$%%+VҲRqQݲo>\rGP =-- .Pŋ'|8IDDDuW@@@'%""zڔS6m4(JzS'jkN8={jmdk׮k.\xw""""""D#A|26mwsUx[] :9r BJ[nP(_&EEEgyT""""""/4nLwc{{{4lذR !=zSggJ%rrrBhAOOO'4h zyy=Q'OFzz:>,cÆ ;w@B?"DGG#22RZVTL҉^H;v(ر#d2.eLJw$:tZ͛(͛7GnnnuܼyS=yT""""""t^~jDl߾Jpvرcҥ ѣ(((oFDDDDDDTh$莎Z_U&M¦MsNXXXHא+ im@[Xf Y  DDDDDD)w]p+V@FFd2ڶmJ'???9Rg7bʔ)}Y ^پ.q?MDDDDTLп[ 26m A+Zc8""""""}鈎Ƽy4cbb0cƌJ%DDDDDDDT1 zNNFQ|ذaj4,=#[e?V+xιi*jjuU^F׾o#ѳ'?+?|0^|JuΝ;Jpc-\hԨ,--+'Q} z1c 8qݺuoHJJҨ[bҤIܹ3JJJ0{lܹshܸvǠAuUvۈ ĉVªU>2 vR'NgϞ:+\?Q}3A/++N <:HZVTպ~"""""""@dd$zOOj]w\\ gرc8pnܸQG}DN<8|Tq;w<Ѻ)-T*&DDDDDDT/L-Z9s ͚5L&{$:tZ͛'Zr9ryCDDDDDDdh:˗/ȑ#܉ؾ};8gggJ%Je!""""""t& 4@ݫI&aӦMعs',,,P(033ʕ+Õ+WPZZ4+ͫ%6""""""@$qSNʕ+x-Z_}UΝN:!&&wANЩS'VK\DDDDDDDuL!=QVV~5߶mA JB4iҤ!"" _!"z8^ky~s9=""?lllxb8"""""""z< _lݺ==XԶ\DsnZ͑TΠUgl'...ɡC;;;d2رm>sI&dϯX" ᅬܽ{ʝCns]b֬YU9O?ťKЬY3899$~ӻ U*wypJ#""""""t&衡虦3A1dբEEEҲJhtCYhͥǕ+Wx]qqqP(޾#%""""""9 5o;88 ++J#--Mz={QPP =^Z؈ Ec{~~>v BW[nJX[[ںJPղ."""""""C*w zXXXwr\xQZ|2`mm mrrr#;}4,,,Pm =Q]H'###<1 11QkիW#66VZٳ' !!#G8jL!j;RP(PPP&Mv8DDDzb?DDOk-\}_>;1A'""""""MG#=#C(oaz;n}UMįM]Gm1V6ʼGks[k㳃i zII KTK'BHH ɰcǎ !""nnnhԨ0eTKӫ~vv6l2>}HNNƘ1c%""""""F]"-- U$((AAAz֭[e,\Æ CII 82.:3݉'"22W^ /ƍk:˗/$EaѢEs -T*p]шhkoo_AU3 zu\{p =se>ir+W\WWeMuï7_O"""":Ϡ5 111>}:޽CbX|9|Jur!2 ;vxl"DDD7FqڵJKDDDDDDT_Txz YYYqrrrpU3ҝCn;`زe >;w 88=/ 6 666hԨ:v'NvXDDDJ 6`ذa[[[M>@Nw[ׯK/$coo}_{]DDDT?޽;{n(J\t Qy}صkWSbÆ 5ԉ'P\\_u+**JxQݲd#!!]tVLзlقa IDATСCRYDDk߿FɁ 4ʛ5kP(}IDDDooo 4J:uڵkuDD9=00WFhh(ك/;wѦMj `ѢEXh|9ud:FddR1'Yfرc2e r9FQ~\\bcck!R"iZ33p$Oo(:tx7?Ghڴ)<WWj <<мysܿY7nWr9ryGDDDի :uٳg5ADDH>=L=lժURG}T-X[[Z^1+%ׯ_Ǚ3gtj闈jG-Q[jYn~>s4J$|5TT gggMĥ2 011t8DDDER~ڵ ~:lmm"[]}/WEtʕ+999 UL={DϞ=5QR*_5y|чOίo[SE(w=a{M(N4v>~*+>Ж>Ȼ&muDDDT6T*qQXZZ͛7W؊ȑ#ի,--!zѣagg}}}ԬY}+W.QyQ'|F233ѤI 6 ~iiѢ uɓ' 'Piٳg///xyyQF ܹsѤI$''Ne&y㑘H'''4kLq)akk +++ODDDDDDT L1p@>|UV\.GFFݱuVԬY U0uTdffaÆ^峲%d#*Hܗ_~ LK.ɓ'x).^L0| [JJplȐ!HHH@\\ׯooo| HDDDDDDT^8Q* `̘1^[Z7yvѺukbǎ4hҺ$dL҉\(0AͅnUYժU+srwbbUFDDDDDD6Nqԩ&L ݻ@tܹXxRRT0mnBXX㑒'N޽{&""""""* LW\ϟvvv-?+VgϢYf AAAh֬f͚|*UpQt!?33bMDDDDDDT8 ΝCll,\\'''tҥ؍A.%NYi&;Z^H9vU}׮פ7"""Ҵ| 7`oo/Z"""""""hMqoР#99YaU,FÇcx%֭N:]5j*,^hԨ"""о}?}믿{Ɗ+Pjj* /DEƩţ_qFHQ믿3:tÆ CRRFu m۶ 'NČ3˫`JALL R)|||6QYW"qgw}\]]w^Y7n(vC1bF ޽{|'OUV5kW^ *Y{%<3g}055E@@^xz+xxzz*Ǖs H$!9֭[C"xQyocǎ8s СC|ر#?G!'''_HKKSzNZZgxNVV2c&""""""R|#ǏG5Ν;SN%J&^rD"3FFFVPQ&,, DجT7Qi˗?{ ? pBԮ]...?~<~w<|؍ԨQ:::FannPHRa ,`ddd۝;w+&K ѭ[7,XN£Gh"`ѢESՈZhXhӦ `oo/lꊌ >}Z()dddM6J011Q؈ʃIثUjժ+WFbbb Zl WWWHII1cwttDnիWF={rw"""""":\={ơC#33k׆;; Ejj* kkٲe {+Wm""""""._^jUdffVZpssCxx8aggWƎc|jհyKR!PVe=>""""_xbABʗ=ZqUh.Gs>>'1umk-LAuSZ5|b9QkZIs))_ײY6C(S>)/EN'""&cJӪU`kk*UE8z{uE*UPV-!{p-A߶m&N3f !!۷RRR <+^۷͛߿B&""pBl۶Mxիv4P[#F`ȑpttDDDY9hݺ5ѦML>'OīW6Xz5Ş={)Sh8:"""{гӧ+NjTǓ'OeiJdee!++Kx->}+WT*۷q! <vvvJGωtOh׮,--qm@DD{ׇ!""S[>`DDD 44M6ő#G kkkΝ;Ç38H"##///<{LXjժptDDDA$/h5- HMCDDT$eϟO>P^=\xnnnxb)CDTlV?yA5Ga __QZWq'""))) ͚5˷_,#33Si,GoO-UշY:}Ǝji8{JSTT>!R[[[H|ٳNNNHu}ժUE*UТE =z1blmm;;;̞=j`ʔ)7nm\ӧOc޼yꫯ0eMGDD6m6L8VB۶mzjxyy˨[nW\Ann.V^ {{{\xĒ%K66l^~SQvm,[ txDDDZAm zxx8F#GxX{"22aaaw ݺu^׫WW^Edd$t""" ?=z\i:$""")و~OOO?~dddZj ׯjԨ!$ׯ_Grr##""jI=z+777GZZZy&VX1cX&++ 2La#""SSZD r9D"ƌ###a{ѭ[7|gye HJ@DDT%$$m۶nRTi$5jԀNt#44RTv}?{ FFFݹsGBDDTD"<< h """"]OO-Z@llXifff<݃7oףR+abbQɵoaaa xNNЮ]; FFDD=Զ{PP|||вeKa4<%%{߿777ԭ[K,Çc ,Z:tڷo8z(d2<興 Ejj* kkkۇ7nƍS1\999Xr%ǏVTD$lW&A" ##݉_!!!3g¾RweMcGKM@lҤI>|8ϟwuXZZB,UV?>իlVV2EjKзmۆ'bժUh۶-V^ ///\|uUzNvv6>3bڵ q=$9oժ6mڄ ;w.ڴiK.zʇg^etLj#0rH8::"""VVV,9s 00... ڵ+Ξ=駟]tohڸq;w$""F-#و{zzJG2e ._ *ݻmhh\~]qX XjI=z+/#U@hhhc"999\wVV߻. QE)V|\H1cH>TXX$YYY4d"""[V|ɈCRRN:C&aС=QF7ZsssB* ۇ=jDDD˗%:ݻ4hЯ_?ɓVQDDDZEE߾}ӧ`ffVvxQvի3g#F(r][n-H/Mq ?u!11HII1c <'%%R)))F_xya,Zzzz~H{9 ǏT8;;#::f͚ȕf͚:77ߦM?s 7nW\`dDDDCm :;cǎ-r 6`Æ }ڽCPƎJcQEs=۟W^i """"U܉|jԨ=oo&p#""Q:Ogφݻ\DEEիشivڥGЉPz¶m HYf!11;w#"" A'""c#..Ni-{k׮x왦C!""jLЉP...u떦 ""jLЉPɓk.B&)lDDDTr խ[7@޽!r"999 Hk0A'""B:tH!i=Nr9p+y~;vtDDDZOϟ4 Q=Da9:tPS$DDDKtKKKܹs U2 VVVsLLL4cZ)))DtH77|{ЉJNJ*N:CLLLGՇ}^oH$e>}իWHHH̙31o< EEDD]:A'"""P6b@TDDDڅA'""VfM\zUaik1XٳgC,k: >kbOY+HMMł ФI EEDD]k1XMQaՇ}^o)}ݴiSD|~kݺ5֭[ t"""*TRRJ*f͚R"""">LЉP֚Hq8"""*T@@/_oʕ+1qD DDDD}3!!!D p\.#$$ׇ.]PӧODD<{LݗR&9rz%D" .cǎGڵNmWX_{nZLVVKԨQݻ7޽P&%%z!jԨdgg%aaa裏`ll 333|'VVU_šER իԯ)J{8PLYY}vm66m/6*&PF*l.\-ZXr%Μ9 xxxBC*"&&111Jĥ9hҤ V\*W&8s VX%K <<ԯ,)[n h'NĎ;uV;v /^@Ϟ=A=cǎa֭ؾ};&MTVaܸq8y$bcckxzz"33S(LJJBѾ}{$$$૯B@@o߮k֤7+W^p,ygѣG&QE{ˡʕ++刈3Я_?ƍannGFbb"bbbpIj fիpppP뵔5^^^RzLUe|6lX,3]pA$z5}}G,+}@FF֮]~ ]tl޼VVVؿ?v}˸s,--~-0o<ʨׯѡCu"""舳gbɒ%O{TX100(]#&&ǏWؿgԫWOmUTA/_KKKbu7WiiiʊbtǏ8qDxD"!Tտ'N@ǎsܵkWܿ깘r033C tX||<^zB_;;; %𦯳 )c222ժU)MKKM:ڪ:r }3gSNX,FZZ`jjpB_255^}r]vpvv2x5=zZjeY [[[XXXŋ?XegÑyo 22*mb^μ=%]m IDATƍޝ()n**WY[Q 0@3Zl kkk޽[@o8<;VhYKe˖8w7ol_|/>>J""S9CCC½ *xAz>|o-,,Uquo1;;O>U(ݾ~)^zU!/_CN:~UeAʕ+zqIeZAL͡.k?Kj֬)$ׯ_9DDD*Bbb"jժ%L̛" وC6mӧ2NBFFPSUȑ# ڷo,--MWy1ܹ#LnѢtuuxB__xB}A,E ?~%Kb5,22榰ȧ* w}KKK,_B=b (p,[ /^zٳg %"Ha<)6*|ѻwoH$uJG Y۷)LMMѥK>}Z8+L6 ...044%|}}q}z͛6m@!?Fn`ii X +++?^}W~B˖-QjUiӦ駟_D,Y/lll`mm ??? .txDDDZ :YjjEDDDa߲eˠ {ݻ믿0lذRmqư-[TR_nݐdٳ0azׯ_prDPY&6oތK.aƌʕ+2YYYY&f̘&M۷och߾}c7oDvаaC>|fΜ*UXÇ1h :t'N@ݺu{_;w3gĹsk׮w dgg>_|J*O>믿p5lذǘ1cժUÌ3p ?Æ ðaðwѻl2<}R x .] Xt*2 aH$D;Dػw/5k}}}t سgabbAڒXhի}}}4i{_ѤIԩSGؗ7M{׮]pppLlܸ666055ŗ_~UV~R ѿz_~E X Ԯ]͛7W_}?{ (uٳgD8|0žnٲ%b1=7oO>077>#߿_ oF``0v߽-22vvvӃCXH}ׯX1|p,_;vDzcذa`ٲe}srr0d̙3w|ƌ޽;-Zf͚^zѣ{?زe ƎMaÆXf rssqD"All,֭[cŊW3g⢴SSS|hٲ%ѹsg;G-V?o߾ptt&Lƍرc ̝;0225O<|}􁑑\\\pY۷oW^055!5j\\\иqch:""""$$+Wqx{{#""?3vލXXB(_cĥK?qqqq+ X|9n݊>|Ctt4O?~>8{,W"&&:tP?ӧRԩS'4iD!)-S",, hܸ1^xݻcHHH@׮]ѫW/!YB:u*̆Pfǎ0a&M/b6l:PnΜ9ѽ{w 2O< $$Xהjժ#f͚1bDcؽ{74h] ZQ6_zedd@$#** ;v:r98W{O/]m۶EBBz?ǹs`oo___rq㐕#G… Xp!>86""""cHMΝmF`ܼyS߿?:iӦ!338x \]]ñcǰzdh"W^ #ymOxwww:t @JJ ѳgO͚5SvBZZUOokذ!Ο?_BCC!^sbǎ믿0~xTV :::066E.Y~~~;v, (('OĒ%K.?>VXӧO[n;;;ԨQs ؽ{wk׮-ptx ,ܹspBĠ_~8tPӧvҥ/_1x``РA?W^]GFFU#н{w=u"##G>L6 x,,,O?TlGI-7n, Л#==pe|022M6͛)w@HڲQ{}X[[^z-[oo1c(!r0ݼ8ޝES UV\ޅДILL>dӶm[$&&*{kmhhccco?~|ڼtYfK(ϱf͚? [O> DӦM1}t}Yh~DEE)}z Dnn.VZU߶tR;wn޼ball T3g`޼y n(yogHGI-tuuX6H$$Ly޽kV(jԨOollsطof͚9sF7ef͚Jc ɓ (akk ͂aOCCCSL޽{dC__Gvvvcze"oԩ_ܛ7o"99zPre\zVVV\2uttT? K,~6ϫW퍤$)r\%'XYYa̘1¤If͚IeKbb"233Y_ŋHqcccL<Ev pqaСJڵ+F|pv­[СC"::ppp=ze0Loݺ5,XT*PJ8;;+ԛ7SLСC#&&;wTڵk#,, i3g? !^~ܹsصkrrr2ժU %%O0|W0a"""J.Qʤofff í[PjUd޽;tuu~tۮZ*/_~_ШQ#oڱcʞ#Zjr055E&M|r :T֭ѲeK888`ѢEE`ҥ>|8ڴi5j`ڴid eBCC1zh!++Ka*}O>˖-ŋ[[[_^atQܼy=*ob˖- ϚV{xo߾888`h׮P&%%Ej*dgg{ٳw kڴBC 5k,lܸ1ǚ5k,XYY_~>}BCff&ƎwB__ 6͛1`"29997n޽ t K.-QTlذgƮ] gTOHA 3g?(M"ɕ%NTNZ ʒge;طo_ADTTT iii033SI}/^@ͱj*̝;M6-L&D"AFF@DTLWD=Tr~I/A'2j(tϟ?/6tuuNDT,9qơG>"w*+Wƌ3JQFjDDe͛7DD"8::b„ ,[ܹs8sLgee),lm:DDDڂ :j޽ݻ76mmB.hԨv "sL0C*UtNXX̙S Uk5Mۯ2}~`NDDD>}:`|MV=>>hѢ/''Gʕ+I ^d>vbNDDDJLLįoËxΝ;…  bڴiJ)!4Q9 UfMHRԯ__aT*-qpvvVghhիODDT0A'""BcԨQuڴiHcǎa…4i#"" LЉP3g΄1[,--}aDHDDT1A'""BD""00ϟ?f:t"""*&DDD """""""&DDDDDDDet""""""2 :׫Wk׮i:"""KWW/^H$t(DDDZ :k׮tDDDZY#""BeggDll,Zl CCCH{0A'""B]x͛7|s;j0A'""B:tH!i=Nsssq}}""*7r9?KKKTTqn޼:@__rcTDJa};wN:cx{{СCD~:ի#Gjժo5"Q 17h8""d~EJJ  @`` t"""=oʝ t""*w}a޽Fׯ۷ok(*"""Rnl#""2)33?zbXi&DDDT:`ӦMkH\,^H{hw"""Rŋ gϞEvv6NK.ɓ'5V: ϟ dff_~HHH#"" A'"""9s4bNDDDES]DpttİaPZ5MFDD8ŝ [[[,_O>œ'O|r"..NiUP6wk=46}q`ر7n.^?Qn޼I& 9 ((7o`dDDDڃ#DPi~sռys$&&Aabb"6m t"""R0aܸq[p%2O>$ $ |||ٳ 0|ݻ<  lRTiRAD&M0l0|/Zذa4hsW^1`{.bbbFvYZaWea#DDGxx8֭[???MBDDJ-A򂗗cr1cظq#?cHLLDLL N* IDAT \]]ӧO?:u hӦzb1bz.ӋDDDDDDQj /p uRRR)Uubĉ?>ׯc000֭zjoֳgr@]4?4tPlذSNcӧhժ'<lقa޽{cʕ2`ĉ _z`ʔ)HZ\^qHXZjؼys)DGDRG9MT4&LPٳg v5VcֈdԹ4sHV "" LЉP͚5SX$N.#-- >ĪU4`N*Q]}CD'(Tj֬ 7774lPCQi&DDDTٳgk:"""]l#TQ*Un+H4n&&&011+SJW@DDT~p{?:W. '"XvQǏcŊŪN:X`7nD>}F(^"" :O>]rعs' oXuKy'O2A'" z)3v*a3J1{llܸ]vT*soL*-,L&+QDDDet"""z ̟?+V@ӦMqo߾Du^px%c899)-9s攨=ຬĨJ=`aﱒ{ :Yemc*Ea…/tpppT*ųgϰ}v :qqqJ` e2TQY 4}t7nƍ*Vzzz"q-[ę3gl2^:_YX X\&T!c$#>%<___DRoG.+gNDDT1A'""mذAu~W򂕕?[ÈQy[DDD t8Un+uH,Q 557FLL <<<4F1A'"""ZvCkâ*FQPACi;=m>L_;QVjҧim][wiiZyak"^(x+g{{n㼔6&p@^.|Ki67 t| z)*J@^J(ʹ{ tLl8 A@%97:h$\%&&ꭷRzz4iÇUƩ LSNܹsΝ;_T6m*SY'N*q,AK/^жm4m4=35kʹi4}t JIIQxxC9U6±կߩyz~zI3gjʔ)%I´pBu]N:q,Aꪫ'hǎoVk֬u]'IڵkvGZvmQvvc OYYYjժtI=%I钤0´gϞBSo{<9/^Xo.\ 6^ӳ>^{ͫzlfa=jѢZh'|RkwTOn~~@Ϛ5K?F EDD讻7Ox@GѣΝ;?VPPSeP.i̙9sfm\.Xq#kޏ /ǮADZ;3>tD-$txTw#=7PHAY\R'TE$r@V0ʍ@*Թ˕~#AWSN|yQ+&uP0ADzR|H+Pi3noH$t>_E( :>HTw\{!@9 A0A2RojHt|t(#$8LLL|z5SY2͵GBCCUvmo^fDEDD(00P={`L]y啺[7?{L6MӧOׂ t%W>}}v9U9,uaH|9?ӊuWyݻ."I3gjʔ)^{MG… *$uIAAAjܸ ۷;]s,Aշo_|Zz6mѣGkȑ]v)==]mnz衵kꮻrsF Qbzj3F:uRnnLxmݺUuq<}Ν3g&NzH|Ə/ۭC*==]]XXS>srry]~ɇ~xjܸRSSս{wyM'|RԡC}3gir3%%%iԩW4?Ab| eeeIBBB ]± z&Mt饗z-kݺ,Y"I $I&6R|'Oĉ=Y֥2bf8q*چ}n~Ů :|_^ۗfN1XkPcUc+ avء(IRttõrJuAtq^ZO?ttr[8x;V7o֚5kl± ޫ]'-ܢoFsܹs%:}„ z'բE hBO>j׮;é@7n}}j֬YP]86Aԩ-[ɓ'GUttfΜ{<:zFLuYqtלT3(KfqiٲeJNNVtt%KRտ"׻\.%&&*11j̘1Zp*((- t:),.mM*)s̑$k5|/8CIOvr@effNOt>kiN A:X :>TI57 t| z9!Y :> :> :>kM se@e5@9eg"AhRܤ :> :\.xL\2Wڴ$7&H$(TyP=))I=͜9SOo=޽{kj֬՞_&p|͝;W]v &hٲeZh֬YÇ:yCP>O>k޼yz=˳+75\#Iz7UVo߾N\"皖F@x>f3 ϗ'N(>>޳,""B111ZvmQvv EaX5kA^”^4urHrir޻Kۧ{GojժUL.u'OVVVg߾}eU.ʱ zjj222ԱcG_Ww+,,LǏWffv +tn[˫F's4&{֖-[i&Ollw@@V\&--M}vT ǮA RLLײ:u(44Գ|Ĉ4iBCCOm۶-pC98W'U%A*P85kg3c [nѣGջwo-X@~~~N@ zrrZji֬Y5k3Du{uu=nǿXT3r]| ATZ>=)&5KH$8kIlH+Pe^K* AW"$PuH+ 8($t*F*(""B.K%H9P&&FE?Ԯ];/M7t9 &:bBW:-!!A Nw| :LT3R:rrry`5&%%%iԩNQ탵aVQ}PV_T LTk% .=gmɚ8qqvv"##4-t;&|VUL5Ç맟~<޵k6mڤ]pV ~z]}՞6L ,p*@)8 7P:={9]>ADuHKUc zRR:uꤠ 5nX ۽hܸqjذԩ믿^CP~KW^1cƨSNՔ)S[N: &>ТEI&JMMS:ׄi01P96Aϟ?_7Vjjwﮬ,+z7t5H|MEFFjժU۷eP.|,IRHH$)55U'NP||MDDbbbvZ&(W$*OLL'NUW]IRzzj֬ x SzzzQNNqvvv @ رcyfYضf&U躤$M:˫R*+*& Pqg}f͚yjB5ydeeey~WPVKLƍӲe˔h;vT@@V\[nENӦM+tn[nkwJi`aMMnj <ו+00P1b&MPS۶m=wupl>gIRϞ=ϟ?_Ç$͘1C[tQ[ ,߁~.w?S܋SV-͚5KfͪpOŽ8~GuTʊwq$({v& 8W$tWb2bǝ$:OƪU Hq^ KK :>+U(O$ K!AW k3b|o@T@ʘ|`#Agqe@  :>Uixɑp"AUԚ/ AT0q C A`U!q.o#OgϞhժUK;v_|tI9N/^ &hٺ+/+!!A[n\tyV9i3T >O>]#FНwީ֭[k̙Ԝ9s. 2 Ǖ|ky||֮][6999<.}״@ tIy- Szzz$%%iԩQ9ab (O.I.뱙Xo۷"JlްaCH322 nnwE@f͚رV\|ʕڵCUP>|6A'jȐ!U\\Ν{jԨQN@ ުGUZZbbbb EEE9]egOq7zh޽[999JMMUݝ. ٳg+::ZjRǎ_8] :z/^ &hʔ)ڸqu름ݻp tPO#F;T֭5sLEFFjΜ9Nc uq*>>ky||֮]PU8ϧowLp%\߭cÚtIy- Szzz999<ʒ9R亲__EՐ?|`I/נ8:F^ߦ"*wUտ~EFF:]d߾}j֬e_UM6ڵkYO7?>11QSN2(g^۷OAAAr\NSᲳ}^zNSXw{r)""EÆ W -(Kɓ5qD㼼OE7:tֿUz^F*Mgm['7M8QC Qll4w\ݻWF:}3U6u,e<纯nWŵ;IeO糟l[Vc6%gޛ*~ zu`eeeUO*38խgϞiӦ)--M1111cw^5T>Ga,,1/D@SϞ=8uŢ+NuN:i„ zu]w)**ʑ:S|1PO :>&&&&Lbb\.Oxxg)11Q TϞ={#33SC Qpp5d}(>?׀!˥{k}Y-[ԣGiӦzGUX\_>XҥW7N 6T:ut_~jw^ 0@uQÆ 5~x?~܏ϗ$%%SN Rƍ5h m߾ݫMYիձcGժUK^x^zr?>_Sٳg}myy׿ԲeKhB?t9n5h@ӟ.ؾ}ԳgO]z饺:]JM6JKKlٲųnڴi>}^x(<<\}ѡCCmڴIC qP|v^(t}Yovv飈h֬Yzg5}r?>_R\_Kҵ^5WX~„ Zl-Z5k߿N<)I:y?Ck֬ѢEdM4\׬^ZcƌѺurJ*>>^MY]tuש[nڸqz!?^K,cvRI[F5_~e8+77W'Nԧ~ 6駟otYƏ_2Pk̙ںuVZ{キTrJGv.//,;vK/dff[n5In:O$?PW2lٲeeտg϶`;v옧MRREDDX^^^yO:̆ f,r-giIDATlѢEe5j؇~hff+V5j=m~msݖUGQyddd$[z]_>֪U+뮻K.}H>63ѣs=En{K4hm…V>n2Pmt(C$Џ?EGGnΝ;%JivգG]VW_)88X;wҥ=mP߯J=z۷~W޽bHNNVƍu%hȑKMMՉ'^xuLL"""Vrr~a-YD7xg=%篸h/^ &hʔ)ڸqu름ݻW rk];|e5HsQRR6mꕘ/^g4z9+nA'OV׮]+nTJN:j۶~GLN222L͚5,/,jl+44<ɧ߅5y/)?ǏWjjjK2=\q~:tH+VP߾}(>$ (%3իW/n\E1Arrrm65iD ʕ+=?իW{>]SVVOZYYY|W߸8}^_O+""B͛7<}I&;* HKKw}}cnuرbAfcjҥO2..kmbccP^s|:qg|^R8'O%=sաC&$I:uoY+VPf͔RѥǕd,}Zx{=o^۷F'T}W:I&Yrrܹ֭[g ۽{=SlK.-[nM4l>Z쫯ڶmkw|ʡClƍqFdӧO7ڞ={̬l0m˖-tRW=~N:[_:t&Mdk׮]vg}fqqqִiS5j5kVZe6l^zYv,77rss-&&zm6lUVYflرN#n dKKK9rӦ,rΝVvm{m֭+X@@cvRqO?ԩS-%%ve˗/VZY\ ʇ{t"99Y.Kӥ9&ergኌTZZbbb.0ړ|EM.]{̡? xW$E;]@UŋM۷o, Ý(>ĉ p qrt HPxx'88X.3OqO*?#uAի222[[Vztȑ#23M6M^xծ];(C;PݺuYfyqƪWzoV`M:U~笀 hҤI0`g3gΔ=Zl_~xjݺjժVZi^u߿_z4hP 8Pw?gU&M1cĉg=zJaaa ҈#t1)))ӧ6l`C6lo޼$n<5p@nݺԩVZuZW_}U^xn̬D~^z Rn?EԵkWժUKmڴQrrgW_-IjР`CA]?z/X@G}֭[nݺk>:ɓ'5bEGG+00P-[?զ$FF @EGG뭷:q~zj={neffjjԨբE ͟?}vZ}하y .@n[?~giӦS:wy&--M w=zꥱcz?xn>BW1^$M>]m۶U:uѣGÞm٣ASڴi+V7xönj[n#FXXXeggYFFI[ZZedd٦M쥗^͛7ێ;lʔ)VV-۳gO<#VN۷mذ[+_,$$nFKII۷۫j?C7 6k֭[;ﴠ ;pڒ%KLm߾73=z=9믿Znmmڴk_|?~N5\c)))j[;~9~oovioծ]/^iS?!!bbblڵ~zڵڌ3|aÆ89rgژ1c}bv+W_> s駟g1m;v(9%Yhh͛7϶onm~~~uV33{뭷Av16?5o gixQff3f̰O?vi|ln϶>}͛>V^mfi]uUv5Çʂ :@9)}ժU6III&~gϲkffZjڵk=bۋ)**ʮZez%$$٩iz좋._~NM*OКԨQ֯_oyyyjIII֩S'33[pyGFF… cY\\+ֲeKBNNG}df&RQQQis7ۭZ٨Quܹ.77ׂ>,d˖-+r|^z͚5<x&f%{m'OlўI1x>'Nf͚O?mfޓӝ>A߱cI/ҳh󎙝=d?͋/5JbvM7yo߾$ٺu]xzwTfRI8}r L\.$˗iӦ^nwryۤIBo-nbӳgO%''f͚ѣ4h6m/Trr͛5lcǎ^3|dl}t?9svǏu_}}Y]| ԟbS㒼E${6c$̬藺שm%wѽޫ{Nqqq 3<ګ^xJ*!!A{˵j*[cƌѳ>[h5j8ӯ۵rJZJG3<իW+//O~~~JMM-խ[Եwyڷo_~Ez"=1~={ӨQc)$$Dk֬ш#c%%%Ӹq<ׯ,Y[m۶ Kvw^O2]R֭+UV/\L5kɓ' ,ٳ^yk$C-Ҏ;uS*k{e^+%|֭S%IJMM8,?,>\}ڵS7۱cZn]._|]jўeťgjݺrss~z]q۷55|puMFy/;;[vj믿^_ƌVZi˖-СN< u֭T6;tyܶm[j޼yZpaF\xa~zSN~w lQFiԨQ?.ԤnRF911QǏWz_^8qgyFԣ>f͚i޽Zt~5k{ѰaJo{K .bUvvu'+V tkҥ0`\.~sJKڎ;Vfmݦɓ'+88X֭W\-[ۿ^|EhB[֌3NzrK]w $-Z5rH ҃>Mj>|_|^u}GoEGGx-[Ե^#Gjܹׄ zAa7oZw\j;M6ѿ~ ѫW/-XsW~+ _`N<Ν;vz7(j:t{9uAЧ~mz>H*̻5ֿ+^mN;VkLr.c.H5k /R/v&LPBB.effO?-}YjrlذafVi 2Ă-00uwelScǎ-88ׯow}=^}U?--gn.{ aۭK.hl׮]cY֭-00BBBls"erիW"##m^7[luիguԱ.]x42͛7 n6o\sJ_|cnۢ.СCVvm=ztw.cӧ[&M JEm~[lôip1MD|w9y$ϟ1cƠ~&赘:vXa:tH%Ks5y6ydt'O'"*K]RXXLDՁCܟ3OCP>>>ػwo2 'OƺuвeKrn[nըDDD 氵Eǎez)))߿?lmmann:`MNN/ sss8::"22ܷ1c>b/YJ?=L,)) 2 7oXYY!88ٸ>~m4j5رcѧk֬Aaaa `Ȑ!ߞo 4666P(9r$ݻW޶mKKKXYYW_Ņ CM6 L&Î;C&?T & fffpssQTTbѢE\.Gƍ1vRҴiS!11/,,,/R+66ڵYfi'˗/ 777XYYɥr9ZjZxǯ]>} j 3gDAAF1c 7n@߾}aeegggL>jZnff& kkk( >YYYzSry}|R#G 6\\\0x`OA=ѣGXh<<<`aa+W}ƌ77R?>z~yT*1zhܾ}[_!/^ WWWcǎ8t'+޸q#O&M@.ƍw&N֭[ J=z?WxٲeXt)6m Faa!fΜ GGG( k{n)((Gc<>j ܹK UJd2uQADD4i4h0zhѣG`XZZb͚5x7`bb!CñqF,Z:t@AA~'v=tk׮B֭[1|pÇl2+8s Zn] | ܹرcC0tPÇabb"|йsgaܹpwwǩSh"!..?ރ/???Bŋ>}:fΜ ;;;|7n7onݺ|n݊'",, ˗/nܸ+WHug}OOOX}͛P(ǎ࣏>B^^͛Z #g/o߾:u*,--q5,]gΜ#G47nOǏc…P(;w.F^l1|gRiiiׯ^y|嗨_>~w$&&ÇW^_ݺuCaa!]&]o>~x`صkwy9&O aΜ9HJJѨQ#ٳ~ BFFƏBlٲT\ڵkaddR)H{{{cϐӧ#88]t)K#""ǏǾ}4_x={6 ϟA!22=/^ LAAAyf'ZKP'h]5꺺***bܸqC"++Kh޼T%Xf̞CP<((H888b!Ç'q24iosqQ@kԛ:u LQ>p@akk+:uJ}F aaa!{2cĴi47m$!n&&&",,L6l3ƍ믅B8qBpss[J&LVVV"==]c{˗/˗BlٲE;wԨwY@YF*suuᅤ0aBϘ'O/1߼ySm۶EEER3gb˖-B!xģGziiiT=z$ űcqE鱐@l߾]M߾}+{j{뭷Wf %{O+l)y}7HMMչڼ;Bϒ;77bz3UFYJ'+Ow ۷/e˖itU@L8Q5kB!5|޽TVYݭ[g{wΞ={^{M*/9kN>(+VD5Sr˓\]]}T*QPP ٳG{fDb_5Ξ=3Ҏ;/ &&&055ņ pRu{qÇǍ7P;wƷ~3g")) 6nܸk׮EEEҷo_ܹsׯ_L>KPPzV+U# ݿ?d2FOh׮le)y^J 6 &&&8z(CQQFя9wW?hڴ)> 8tڶm#G͛_Vq Kj~Q@&ڿ?ׯ`z۷}۷oi-[Dzzzѹsg7޽{΀o%1X_L1B쪫,o1ball SSStJd25ʼ5Gר7bWaff~_}K7?-337y溢N:x~ھ};~w1(J(QUٳڵk:w/bĉR{ӳwZjÆ ӨK/s]/ͥ}V=ܺuK}hҤIz~~~W^g} 8ĽkժU'Uڵk Cbƌ bcc^loo?VZ'''l۶ K.9^}U,[ -Z@vv6 ""Z*I2fffe?Bvv6:7kL74lPD+I8k-eݻ7ڶm ;;;>|-ZNiJ^lJϑz%6lX\./F"_ƣGЩS',Z{.W̺b R:|֭+-B˖-Q^=i^z077/?#Z}Ç?ĤIPPPf͚aʔ)xw]jTgnݰgZ GZF6m0{lmKQUNP ##CgHXZZ"!!k׮1u놥K>;aLۥRZROqFwm1}tb…hԨ1g zE>﫲=Q]9777l۶MP6TIYIbciicΖΦڵkL4h~<<Bzz:K/i죷7nQװaCY[[W8;cǎEAA?(1u#G$9JݟaÆ8sL_BV5O%ӯ ^y#%%WԩSagg_ү}F> V1M6T/''G6311Axx8ÑÇc֬YxWqi%dz;wIL3^ 'eeei=mTBBQ~w9#`ffAuw-bZ\\m۶ݽԇ;1cŋXb)\]tIcdƓ?]t'6mڄ_k-[ //Ortw} .h$ cȐ!1uTu֥FѣljgϞիW1{l@.] ˱m6+HOO9i2ԱҥK8u˽DZ짟~:;7nMPPvڅ'bȐ!… _~TFG3g4k4n֥K 4իWqFH[xW1f4i999z*Ο?/>}УG̝;g}VvTmtR:V/~cǎEJJ uKKKܹs'N@۶m;}*6w\ܾ}={rssrJK$''}UQu :vƍ#==+V+Zh}Xr%BBB`jj xxxիadd@iwgggL6 !ሉA kۘ?>.(( .DTTwׯcpssTӳQiϚtR]m/YD4mTrѪU+~z:&Mk֬TxzzM6iԛ9sرhРYfbڴi?ШwE1l0T*=zk׮ըOKB. {{{1c 5ZǏ72LYwء={Fyst=/RtEXZZ .F-RRRʌd{ΝJX[[7xCdggg/lll\.bȐ!zBL6MnѢ .]T{)S777ajj*lmmŋ/(fϞ-zb]v\XYY OOO1a/H\]]E~Jӽ{waꫯfffQ 6T\2keJ@DEEi}ELlR|"$$DYO<)|||DzDƍK͸",--K}}A%",-- 6 77 ~~~y5"2 q'""""""8ĝ`NDDDDDDT 0A'"""""",=Bff&!j:""rBpttSxl'"Ksgffٹ "" 899tDDTוu\tkkk|T*NDDu>:A/fccÃ8YNDDu]Yu^FDDDDDDT 0A'""""""LЉj&DDDDDDDt""""""Z :Q-\vo/>n zll,acco-Mtt4|}}Q^=ԯ_a$NNNXd RRR=z`|6>СC;"D""""""e!шErr2ڴiQ35رc SVPҺJU? rؽ{7Zn]}@PHsn,A@jj*; $$W\Ahh(2"##'-U=Q2w3334oбcG={+W… Q%}r*!}ЅPP*P*5Q`}֬Y 3߿[")) :ܺu 999u h޼y6=;;FzӧO#11{fܹС:CHII1DDDDTNǏGpp0!ɰgDž7oaaa???\|%""}dBQATJB*'!""9qㆴ~M...:u*/^-ZEXx1ի#F`DDDt"77;wDHH;UV!!!A.33:t(Ə_f111?~ODDDeKII A||<{=ߘ8q"/tDDD;BDǽz;.\qyߙG.]#Gk;\-jmxj$VDRP(x{" flO[P]1.ZRR,⋈{frp""""""" Ϛ5 pvvuV$%%!11QkL˗/ǽ{Jf%""""""z$AƨQp( x{{#11{Zqnܸ'''jhD>Qk שQ]Xi|N&t  ~t"""""""* :Q-`kccc4@6m0w\lӿݻhРzꅥK!W+npvU* ,YG0`._?oߎׯcΝ_1dCKDDDDDDdp9X$''M6ZL6M3gQXXSSj ?;v@AA|||jM6ח9= 6I܏?+++rbhݺum}XZZaÆu[f}Z JK=<>>Z:u BJॗ^Bɓ'u&jjZZWT厏&lVVV ݻѺukuT*K+Jdee#&& BZ,~""""""dd2 9R<<<\ܹ!!!8vVZ^~~>h[QfLU*t""""""t&۷X/,,Djj*~'#3334oбcG={+W… Q٥q=C.C.;6""""""3AOϛ7O:]Bj(R}||3gΠsӧO#//љ2rHt˗/׻ͬYgggܿ[nERRoժ[oaݺu~AAAKNO:ssrƨQp( x{{#11{fӦM2e ߿?>KDDDDDDT'L .;w9s攫 6;0[[[j}tDuy^j]Ph ,jQЙ2"Jcip$DDDDDc )))ظq#pܹ uN:Jןٮpqq90j(dffV(""""""Lg~m+ܹ3}]L2:uB׮]QN;I&!99BQQ郂2c~:v܉_C )WDDDDDDDu!o& qUfׯ_ǛoqzwT*q9tMgiӦIb̙8p ajjwDDDDDDD~ɓ'5;իW_TyyyԮlڴ LΉ蹣s KI&Ppt^^^Ϭ 6ĭ[w^uj5T*BDDDDDDTL?C!%%B'{w| w8yd\t [lBCCaee%-O1c.\GyZLL  8;;W8N"""""""C n H---5YXXكǏM*{͛kmm8;;ɓ)ZZU*b$YJBq |N&4_iK89}ŊUaaaؽ{74sP*P*zmF$\\^ LgReL4 7o޽{amm,B6gΜ3gеkW4hΝ wwwgω2נ[}+Ill,iٶm6صkz ±cx;ZϠ4/ϟ={T]m۶8rH%^{NDDDDu]3ǏիWѩS'Kܹ3^ǏD|DDDDDDD Π=ztdffѣd֭$""""""7(GEE=z'bӦMҭ*"&&vµk`aa___,]:ۤa…8r舑#Gb033p,TJno)i;֖oA]GE:WMDDDDDuUVah߾=UVUcǎaҤIHNNơCPTT>}@gk׮ѣGXn._O>k׮ŬY* Qm)S 'OƣG*Ibbz\\J%Ν;s|@@f͚눍+Qmq]RLLL`ll߯Pyyy[[r+o""""""@#AoР޽w&M~+WBk׮һݯիW#44TgZ Jƅ/`eeWrw8yd\t 'NBCC kD@@vLL ϟ_jFz þ}pq899I ,@DD6>2pi]RYjFV-!ݻw#)) nnn+J(R~w_D\\N:/U;!AVWI&!!!7o5[gL˗/ǽ{vDDDDDDDӳ?%00h֬Y:ia̘1Zr ,--u{! lذA!""""""J q1cJ^?#%&&jATܹs֭vQ]߼y7ֻ+WXkTn[ZqF_RU,dz7vvvq:B <<]vW>KLL  8;;WK. FFFtR +ɓq%8qB* EBB_mGFF"<<\ZWTL҉NH۷o,(Jo2 BuLrw}prr,XJcrr!""""""24נ߼y:B ,, wFRR4W*zOLGDDDDDD;\%DDDDD5@w???O8W^y\ĠSNRqg믿0j(( ( 5 K EǹsK/c̟?Ө[cǎaҤIԩ0{lW\v#F۷x1j(?)Nv:'֬Y5kh} d2$.RsΡ[nZ\zHNNF.]ׯ_^䉈j3 GӼ<:NBsx饗P(pI ZZU*UFMDDDDDDT}t^^]G׮]女^VVJerR,mbbb ,n"""""" :9sIII{n3q:4!rDxxRQ3A_x1>xxxN#)֕ ?KXXۇǏI*_`"""4#;;6ݻ;;;ۗ&LW\/cƌt'BaHJJJpv̙3ܹ3˃oc"""""""Mt&FFFx嗫I&aػw/k ,,,iժ[oaݺuf-((3sG$qӦMg}V%"//~~~pppm۶nӦMh۶->}7njg#""Я_?u055x|׮]zw"Pp6ޭʱ Rt&aaa8z(Ѱa O GDDDDDDDϦ3A믱sNϐ'_L߽JCDDDDDN:A{trq2 {yfZ04j߿?n߾]%6:y!** SL:w֭[q #((ŕ9}ժU_aggM$zw@aÆ ظq#zHHH3>W_}Um:2 ΝCaa!#9:: 'OdNDDTa޼yشi1c>A}DDD:(Cơ!++ fffhРFtSPҺJ|.]k⫯B6mcBPw߭j\\xbXYYI˭[tBy( iqvvN:_~hڴ) >} %%C#""4t[[[z7vqqAzzz Ejj8::>_QݻӹHIKFFFb#""ӵkW|/^ĉ'зo_mj5T*BDD➛o BggUFً/SSS:tÆ ܹs?>Cے啊ᅬS>^kkjL֭[7@\\ƌSitDDDDDDd`eNסCRrBr7o͛W6DDDDDDDu!nnnҐ'}hݺuEDDDDDDo 30i$?B̙3زe bbb_2:a灋[e۫a|qZ[}z3cǎETT{=kY{&"""""HpBT""""""Ss׬Y'N@&m۶0`d2$ ~3eBWWǠAp2KDDDDDDTU:[#yyyhժƏC1=z45jǏ# nnnHJJ*DDDDDDT픚/Yqww{999#Qy(u> 999z*?~,\r999).] }}}qKII=qqqFϟz`Gۉ(u=22GXonnn7F!dKm;;;tu޽{'(=?999L҉J(5A/..B6-ccw~κ rS_'J!J+4""""""")u{=0c ܿ_,KKK/>25S#>>x쯺s>}#F.[R0hҤ lllkזϣM6 ~~~hӦ /^~Zp M4#Pvm:u ej*(u%.\(\~ ={,s#ݻw \_&!""W7u@DDDDDDUB~m4iD|ݫW/KAi)M6%Ǝ͛7#99Y ai=&&SL1}t'Nď?غu`ccZj'Nxc @OOuz]JJDDDDDN!Aҥ .\#Gɓ'㑔ɓ'QF/sCv̙3#..]t{+@AAO?U%.|y׮]cǎ8t{ܾ} `ĉСCGppc -[QU1kϟ?ѣGh"tuօ>}7z+SPPXɕԩSDQQ9̐|狯srrDDDDDDDIaԩS011+>#㽒WI$ׂ @"OH,--7d"""""""PHП>^)dgg[jj?>*)Lq]6>}rssqIDGGcŊ=z4pʕwnDGGΈ( 4055}HRH>qwenll cccԭ[5kDBBBØ1cЮ];t6l@JJ K=&%%?FJJ &MTx""""""F!A/..q1DGG/РAokGth*ŋc֭6mѽ{2@DDDDDDTYIA^-044D^^,,,н{wtUWXNN CCCuCDDT&Sz83La}ʕpuuEӦM+<@"""""""zI!A2e: """"""ho]$41~N_n~N"*K -djJn:ؠVZpvvƉ'J'Œ%KPPPˆ4ϓ'OFRYk.̜99ХK#%%EiׯׯիWzj|wX`B&""/_]vGzAx##""<*KCBB0qDxyyDxx}͛ƍc={6٣(^^^V?DDDܐ!C0j( =; >>M4QstDDDE%SMLL0Z 333bJ>\]]ѱcGlذHRHr[z5+V_cԩjH$Aс30x`<**  )LMMKKK+yfԨ""jO[[[3gTC4DDDMe6n܈M6!!!HII){ĪU_!##Y'""w}vt2 wWsdDDDE%#G!00hѢ"""`eeÇqmܾ} 6'*B&""ñxb̜9AAApuAhh( 4DnNN CCCuCDDT&sttҥK/^Dƍqt>v)~:X?jT1zS|4/nUޮW7nWT> >>^ᖳQMQi&[666Uqĉ7˜1c`dd###3O>^ i.$&&&R̄/n`nn(믿R/lqKMM-U# (**RCDDDDK% %WI&;vDvv6Ξ=+? HRmDDDT.] 88X./**Bpp0:wȈ4VqØ1cЮ];t6l@JJ wpp@>}0i$_Ǭߟ++Vk׮G.]'N@NN=興4t>>LΉ*%!!!8q"PXZZ"<DQQ̐c͛0<{ :tK|uNNNDDTM4 O"H!i,Mq 2,Dooo̙۫8>|ZZZ;v,J{t{pp0ҲBDDT19'""R-@KKKa<33fff ٳK=M6,--qtQ?999L҉QV-uADDT2gggDEEɕGEEhҤdi쯒J044ۈtEEE/ѠAΝ;E~PstDDDEeSqFlڴ EJJ ?{,w"::Fs"""* lٲ+VX䄍712"""ͣ D֭qqDDDJi}]]]ٳ}1ah111J Z۶m6l؀ѣGCKKK,oٲ%_^_ի===nQU{KL:SN}NNN8zhGTv^9UgiiiJo/+..FaaaN:)QNrmRiNDDDKq m ڴiSm-_ؼyXfmm]mUeLЉ4ؒ%K0f{7m6߿\ڷozÇ#&& 4ԩSgi:>*`׮]D"ŋ?z*׶ܹpСCmV1ɑۈ+i"ԯ_7nPwDDDEcGХR),YTPUuP}V]oUQuP>.]$Zcٲehժ"""L"BDDDZ5 H֡Clڴ ͚5SSdUj-"ڬPZ{Ne>۽gƎ$F_>jժ4t""" feeq8""" 5k(a̙jHsU=88| `jj?XaݻC"m#G1cFFF3f >3v8p ݻ~KߔI$\RSٻ/kgggԪU 7w}WaRݻwSN ..._6 zLL M3g ** /^M4 ~zFB||<"##x3F]Qyrq_|Yܷb ,, Ν9z\Ne׹s>|X*\}b?sLݻ?3N}Z \~B}%KZRX077-[&=\022;A*o1c`kk+ P5a޽F֭ϟuL&_)sYp]JXzuDDD5jIJ;w RT~BYƍ' 4cê벳r;v쀉 7oٳgˍ`>}FFFh߾X֡CԩS n݂L& F;wx9ғ777T*EnĘ+s^UPP&LD"˫5+Q^֭3{#99Y5yِH$S\Q^=nAAArSO>-Z@&e{F~~>bccU;SSS4m&MBff/66rU&Er׬2U<0qD}wӧQR(,,>Ν%K &&111Xx1ϟ___uGDDQ* ;wF-ѣGr qEqZuFFLMMgjj c۶mhڴ)EEuK-&L|/r p;VijO>.]ɓ''M$ECvpmFnK\U;99cǎ֭[E^+c^?]nd^3e);GiǪZaa!Fb[Nn߫p-[Dݺu1l0q|Cw-Ю];XYYsL IDAT 쫪ަM0zhg^WkVyeZEO駟⯿.FvS?3۷hذmڸub)\u]9r^^^oWYy]#sssGU#F@RRFϕ)b-++ j[ XYY ++K׵2ĉq[uJ;_^פߵ5k_NT'IIIu֭["""MPmtA0}tٳG}W^Eaa8]cǎٳg:'RaU~~>`aa!N. ;&&F*k055E~X*^F;v>|0d2wU*Ioݺ#GSrʕ+HOO>|Rx=zb֖r5VZnefo;_^פcǎr(Ӯ];hkkWHS-GDDDTiӦ᧟~qHLLĎ;зo_ڵk5kڴi#>}IGyM<6{l 05Bff&+`ܸqH$9s&.] ;;;aҥèQ*uJc7njߏdUfO>G#R066FF5 _|<==`ܺu K.ŋ+t&0l0\pGQQgll >}gΜ+p9bhԨ 3f V\Ǐc٘4i[G+_СCaaad,X&&&DHHlmmquGDDQ8NDDJqߣf͗[ xyya̙8~#$""LЉ4sY&Νvک12""")DDD))) 婩000PCDDDD :dH$ǫ7nưaRa,lق:u; "@8q"vڅTܻw?3';<"""D"q􄥥%?ӦMБŋ#((999}.OOOֆzM6}7 +͛7Uëf̘gggHRnZa@@ҟڵku [[[ԪU ZBddyrss1sLXYYAWW...8w[۱cZj===XXX`x葸ի:t(!H[} . tuuѸqc]W>J:(IC"~=z>}@&A*ӧO/_VZ!C`ر<==1l0,_\i&T& CCCoZZZ077QUݻ}aN˖-amm;vt$''puuŌ3пxW:^WW!&LgϞ-GGG >\pB_k׮ŵk#..N全(l߾/_z쉴Rc;y$Ǝ'ի_p9xyyu={ƍcٲe077Wz˗CXXb \k׮WQEDD(=_hh($By50h ۷7oĖ-[px{{G:::oxXz5R#""(LЩLD)ǎD"CЦMGCCC|'xؖ Xb7n ]]]j VZaÆbY4zzz6luVX[[nݺPTT$n:١VZ033ða8p vYo+R)ѠAm ,b˖-:H$;v {ݮ];HR8q4h̠>Gӽ{wܽ{(ݫakk cr% 6n܈COOvvvطo_ߓ5k`ڴihܸr?{#L:'Nuشi5j}}}|(**Š+`nnSSSɝ; 5T*L&OUnzzzprrB˖-p4tRԩSHMMň#~ @TT… yfիſ/ĔǕ:3Y?3"##q1 2۱a q"##ѵkWs~8{,ףGj {)sEpp0вeK<}}ő#G޽{cP{AÆ (*w^̘1f•+W0e?r 1.]B߾}1zh<~Xomm2M6n܈MK.bY~~>jժ%WOWW'OryQQ({!"" _ѯ_2ܹsg?o#x"N<};vLMMѴiSL4 r={O>aaaֿسgu&W"22;wĦMЯ_?ܻw111X|9.\3g~W^ׯǭ[oɩLF*W_SN'ay!//!!!8z(:vhܸ1N<+$%P^XX(}v}:7&xV'N Ù3gj*<==ť.]kٳӧ&&&eWic̟?_w A׮]akk?řر#K888 ;wğ ;;Rsqq;ϟŋ8p7oѬY3hiiAAAr scᰲBRR-Z=z 66V  >1`lܸQnqq16mwƍ@5`oo˗رcСRRR`nn={B[[5‡~X#2-[mff===fffks m6$&&0 Yemm }}} VVVhܸ1ƌ;vM^P(/-?!{Yyyy;w.QNJ& ,%:uꄄWuڵa`` 7ӧO/Sogbرr| ЬY3`?~<:۷o hРR)֬YQFyݵkŋ#66HJJ*=ݻv?~ .\֭[j*lݺU~E0`<7o}ѣJ{իqoHLL~kkk5jȕ\ÇFƍ1i$ݻ< """tLIeX*D"^] >>^ܮ]MLLƶߥ}\p;w/^VZɓ'b)WK``\DBBlll@LA*=Օ`Μ9ؽ{7p e/ }"lܸW_>~7ݻ~:x9O"55gϞEaa\SN3gZl޽{cݺuشiS(3g̟?#Gƌ___zp-ѣG:uf͚CE577Gf0h _r7n~ ]]]L:]v-*mۊ+KT :UJJHIIA&M6KKRkӦ ]V.1ԬY={Ċ+p%$''ѣ+WaÆN655=˗/cСE ջ&'N' '''#99Y"y888(ܛ})888S!)) rýVZhР^xݻw+ ^vmXXX ++ ztgϞɍ*G_mJ;ϛxRSSaaa?>.]eիyRSb͚58vN>˗/9%$$ //5#>}戈=TI`Eqq1:w:u 7nz ///q߿wA׮]Qn]DDDb'N{ًDFF"88mCXlC,\ߤIٳ D"E@kkk?~\\u\s#жm[|G?gG}q۷SdddOGGG6m +?DZZZn4s:t oƜ9s`oo/>a۶m`ҤIG޽3g?L&_ -- 0`ШQ#4oqqq O>E@@ $''c011 kԨ8 ""<|}}}\v sEN`mmU-[۷o]] [O֭1~xt `ժUފxbGGDDS_sԩ#>4}69޽{ԩ={ ϟ?v܉͛?{СCWEFF5kDݺuѪU+Yƍaݴi&Lv+Vx/ V^ &&&&7orrrbʔ)E~~ґ?| V\)>bl S&11>|c///KKJJblٲJy9.\;w@__} ܻw:t(vݯ\a֬YSz˗u߿/Uj*tM|,ڵkh"L:d2eiiiضmHAOOOÇJjHs1A'""`]vŶmXr%\]]S܉V\ݻ(((ܹsqU<~F:sttĥKW^Ð!C[[[uGDDQ8NDD_; """Heee~@BB$ 0~x;4""")DDD,&&666Xf cY666QwxDDD#DzJ#kڴi1bá(**ԩS1m4\rEiiD̚5KL@KK ~~~HLLTcdDDD :k۶-кuk5DDDD8ŝH\tIf̘۷oC3goŲe"FbNDDaZn DAIJs*5j<<DHiŋcG|*:ŋXp!lllƍ#00Pnj L&.wW+d""jٳg9r$s""J@m˗/w}0$$$`ŊXr%֮]+YbBBBszBnn&""V&N_~Ea84hΝ;qy/GCCCcȐ![ ?LЉ`prr5EFDDyԖw}n޼Mŋ8y$BCC|FkFFcR)uSN1AJ#yY>GYStO"|.]C#"""Q[>oF '''3C IDATHzLQԊQ7c ZH}Ԗ?{L.ZZZclll`nn(qAAbbbRrٳغu+7n`Ȑ!r[E D"̙3+""Bm0AAAhԨ7o8`„ ~`/]vvvҥKQF+l""jN:+s9lذ-[TyDDDkbѢE:u*233!0e,^X3w\ߘ:u*о}{>| Zټy|)F_}'""Ԗ 44T|2P]`Ք:.讗>&6m뇞={2A'""zt"""R?7>Ν;? .ܹsTP%""MH@[aa!9sk[1c>Zj1/5"ו6۔I՘i3f(-oqrm+66pvvˊpq!??ZZZrO|KKrPcֈrwwݻ}._xqk׮Fx#THp066.sErekFzʉ4t""" ֦MEA@FF/[Ni&q#"7^רQGѬY oرcQUH-YD!"qDDDDDDDGЉ4P5=WF"ŋ*i{ԩSXv-APaDDDDӅ*z7U} "$ R(~:;? Kbi"h%Δ_0ڪ1FT۔VG iiZTlaFԾ H0R"f9ɹqrsu9}{-[Lzĉ].~=ĉzդI]zU۶mUfMW={TzzFuj׮]?e˖)$$աpOwA'O֤I z;(Z$܃^uyyynݺ?ϟkŋqdܻHН CGqӧOo :y:p. `$AW֊sftl AHj#ft'Q.tl AHt'Qk~}9k@F `$ :6zUre+WN͚5SbbKQQQڵk #p,A?sy)SFvޭ)SbŊVɓ'kԩ=:PvՎ'M`͝;*]1FӧOرcsIϟ/}W8p`Qv{ñ@qKEzUZ5O>'''+--M۷<<<xW @qY~!͚5K}X3( &^͠aÆe 4ѣG%Icɓ9fճ3F#%%"\?#ڷoCUV-IR:uիW[˯\u֩uֹnC>>>>bnZou릟~Igٳ%]?}zorʩgϞ Be˖Zdƌ7xCuիW/kK.OΜ9VZiժUvU %Թsgu9nnnQLLL[{6Wroq :P;+VϞk:_t {4Nv;u`ww:nc̠`$ :6@ p8/p^Ӆ(0 `$ :6@ `f w۩ 3 :6@ `$ :6@ ptܵ<do :6 e:.f)a7I1 `$Nr ` $ :6m֜Rr3̠`$ :6@ p8'"Bab| :6@ &AneddhСRʗ/gyFǎsa9~}y(l'$$hjҤCõd-\P7nԅ Թsg]vEP8\_pAz'|J*Y3gLv)44T_|vء5kָ09Y\w\UVMPzz̙?\ڵ$} ֚5kԡCW 0{z__.3===]'IJLLTffڷoo RHH]#e372hȑzG"IJKKSٲeUR%JKKu;Ȱ;wl2D۷oƍ k[bcc5agw>֙8^S܇KjڵWgЕ+Wt'O3fӭGJJJ,A7hȐ!Zx~GթSayXXʔ)իW[eڹsZn6=<<8p)W_}N}}}%____FRʕW^yE7@I}֬Y(s_~iӦt֭.]\͓{G @rYn)f̘3fAD/lr5aS弗; :6 :Jsqگ/ϵY9;k}0 `$Nwsq**ב`\$ӕYgWd{3 :6@ `$ :6@ pt%=PAHtl AHn喽-z̠`̠ߦ;].N3)V()AHNqM}ݟ50F ftl AHtl۬&nuftl ̙3UNyzz*,,L6lpuH8Ei;vԦMuIGuuh8S0`4hӧ+88Xfruh8m+W(11Q۷w(o߾s]'##CΝsxP6kNҵkPﯴ\׉Մ  5.n( A,ۘ1cn=RRR"DmgЫT"ww'O1CENee*,,LWv(_zZn(A#GwjѢ5{l=zT ruh8ݻz7XBjruh8mOq?IVFF]9sԩ#OOOiÆ  [}JEi;vԦMuIGuuh :(2SNU5`5h@ӧOWppf͚p9tP$\Doޡ}wQT؇/w1sι8n_WYqw)]vMJKKu eddX%LYZcQ~AUǨow{ :^~+> V>LIűc0+)))Qøk'NPp{X'&&F&L(0(T} [nnnΝ;`E:_ImTrVR%Ѷ⨤K*m3 ru(NQJ-?ydYlcƌȑ#YYYʕ+[-[TBBBmyqRm~t[ޭֿRrSd_zNK*UOK*m+h[qTR%:)[´zj=VիեK\𐇇CYŊۧ-/n㡠q?w]Vx)h9x/ӭ/sD'^F޽{E ٳuQ 4获9xZ^U[;wv0\Ɠ}1SD=?ΝӋ7S)JnJj$VvI%m̙35yd*$$DӦMSDDDx31L{{LLLpwwwEEEtu"AImTrVR%Ѷ⨤K*m+NZlÇkܸq8pjժ8p&tggR` $ :6PbXlRުVv}9ԉ^ps[U޽u٢lJ1119c yyy)**Jvr؆%Ik6777^ť֯_~ZAAArssӷ~Y}cEFFKիWo¾c~mѣոqc/_^AAAӧN8ᰍ_wsQ=*_Th]r%풤~vCJ**_y;v̥sssӻkձcdݺu >}G.?կ__<:s>*U\JJ԰aC5iD]$֭e^ZW^UuEz/RSS찼gϞڶm~ڶmz]MUFޱclɚ:u>%$$( @O<Ο?oձkڵzjI?o)}vE5mT|A˝GΝO< %$$hƌz4uT~֭[5n8mݺU/3o8_kٵkSOŋڸq.\oFFrIu!+V8,>|,Y jƍp:wk׮]Ҷ۔O?Tnnn~Pn}v+d=jӦ?Yo mWjȑuVM4I]h}g%@ҥ5}t޽[k֬ш#r;(L uI#ɬ[*4Æ sݻwIf˖-V͛$wB7?Ǐ7M6uYVV 0Uvek>#c}ەaÆdeecgI2K,;fΜi|}}˗:&((:^槟~2̑#GZjiӦΊ+LRǭ ~ vt%uΞ=kʔ)c.\h?~ܔ*U׷˘[.]m:ٽό>>ỹ>谯~شiڵ<::|W.%ڵk~;WqѣNTbfo.Is(/UJ5jH ͛VZYe?|}}_4 R:u /СC}V]EFFZ1۹]7r劾 KrssʋkesVm޼YtA'NÇ1 ==]nnnXCITre5kLo)ś7oVHH:(##CETZ5իWO/Nu:}:2f~Ξ=~Yeŵn>JKKu7.__]={U>l0-\Pk׮Ր!C4}tOֶJ*lٲ.k[N_Ԕ)Sm*##C˖-J*9wsڭ]7?^W,ygI^WԩS$8QA?Xhc*))ImڴQNtQI7~q{ݎ'FOOV>}4{E( C ۵qF_~;$$Dmy[233 /(++K3gtX6b&MXɞ׏ݻw Q-TV--_ƌ֭[I(:%n}СZt֮]5j[y*S8 I пMq˫q:pu5gN ~j*vEӘ\d'К5kn)IJJ$~ܹsRSS:V 'tiX1L2;w:5giӦֵC>> wFv-ZL26SNڵktҚ2e{1W_VƓt:?VX5j(!!CE1p+iӦMZh[5kL͚5sJ&]h|}}M\\IMM1Ƙ_L0$$$d|rPsUk;;v4M417o67o676;wvU1ƌ5řC-[Ν;ooosac15/6;v0=z0ܹs6خl׮]35k4Gv(/N}vyd$3uTd]}tYozava/^l|||{粶effgyԨQl۶ᵗa1&>>ZСCfѢE&((<3>^jBBB?nnj֬YcjԨa v?ތ5ǛdvZnWg 25j0k֬1[n5m۶5M6Ƨ+UP۲rʙYfX߮}V1Crʙ#Fݻw9s2eʘچ£dpq#;{7M:<3'8 )1 \s5sQaLٲeoӧsiӫW/mM^̙3g\Тӽ{whʔ)cs=gve-2Ǐ7DDD;v8lÎʶrJ#۷ϡ8ڵks}58onڴic<<ceΞ=c}YA8OLhh)[]v_dx?,^ء^tt(P0L'͘\.U Piɒ%ڵU֪U+9\вaÆҥ BOp&rS"].\.h(IڶmTfM9R{V-ٳgѣ4h ]1L'tPQ޷o_͛7O4sLM>>ѣ~7k_MfΜս{wUTI+WV.]taky,{ァ@U\YVfffw/ooo_/_vX'xBUT"##uVkyڵ%I>ܬT.] *e˖ZfM/z-+""B^^^ Vtt.^o^zIުYfϞ~IT-SܳO5^r4h *cǎ_>]zUѪX*WѣGo߾Ϊ>}Z=zP5T\95nX ,phkS@@bbb8p@TÆ z|qvOqґ#G4bk Kґ#GORJ*_5j+V΢X͛'Ir劆 @yzzvڊ:޽{պukyzzQFtnݺzܹSJs^Af111?;e0zhիWOʕ}ݧq9 7:}z!=39^%@;wQl$$c1k׮5?l6nhnj֭k"##M֭[Mʕ;cmy?`wwwj*c1YYYG1O?IHH07F2+W6O65jiԨIMM5~3K.5ڵkcvjTb^}Uc1Fٳg1Ƙٳg@7ߘCoycxyK/donvmziׯo2221A={e˖rʙٳgEe˚O>ݻ׌;x{{MZu?޽޽sc9ydΝkRSSɓ'1l۶|Gfffر9r$x5jd^|Eg~f۶mcon*T`Mfo6mdBCCM~|&66֔*U:.\0UV5ݻw7;w4˖-3w_̙3ƘL2]v&!!$&& ={Z}7Yxٳg4h1]tɳǎ3IJJ2ޖ-[:Ę777k|^v̈́(d֭[gBCC$dɒ<i f1ӦF7ްư1|Mk_m-˗hԨݭ灁1͞={rmN]jՒuʕ|{WrJ{[>bbbԳgO-_\Ə gUVVլY;c`͛usTоLiӦijܸʗ/Ç8>g%q P|rZJ2ek܎ɍn޼k͚5֭ڵk^gyrc;ziӦiܹ޽47~-[^Є ԡCj…2eU'.]j׮/_W_}ዘhذ<<}Xe7 6h̙z'%I))):uC2e86lP~d… KzT^=1B=zܹsϪyڵk֭{'Mt}|t钼$l~'iF񤤤|oauE/_8p@ 4}7lPGՉ'$Iڼym!1AiРA3f><U:\8q$u]ݻw{uQou-[(""B%&&jȐ!'|R˗׬YknVr;~6mRZ4vXȑ#9kKRR矫gϞj۶~.8+#Fh:x𠒒ji&M|X_b]mvJNNֶmt)eddH"""ty-[LQQQ'_|V Z1(66V?kǎ;wN*IիT.]hÆ JNNֺu4l0;v쎏۰a駟O?5~xڵˡNݺukϞ=׿^zYmڵktkŋk۶m_Գg|g/]!C(..NGѦM`%G͛5x`m۶MҥKLsӳgO*UJݻbŊWCUll;۷OÆ ә3gͮ[V^xٳGTZZm]v__~E6lpHoUڵ~z?~JF+W*99Y[nՏ?m۶|[֠AfM j޽ڿ+ @+VY^>C-YD{uKrwwwOcƌQݺu ܎۵o>:uJ[= zdkjڴڶm{c8!A(A&NU ԡC-[LuwQF)11Q8qL:H~+^zI /Ç_N;vc=UZUhhdM61?`oӼyԸqcEFFj޼yVʕUfM=sjР^z%]tfԻw﮿=ztt駟̙3 U޽jժ9ԙ2eV^`JUTI[O?:yyӧOO>WuN:i„ {ݺu:pڴiP7N *hٲeڽ{BCC5vXM4ѣգGG===\gܸqj޼:t蠨({[ܔ*UJK,QFFz! 0@omoúL\vMV ԱcGկ_?ǭn4e+""B={+p*y 4i$hB-[Çb *U^gyyw4i$5mT6lw}*U8߿\␸V^~eկ__-ZPժUi&uE#FА!CԬY3kܸq:.]Z ,PF[NDnNyR ԭ[7M86mڤ(;voܵ#GhժUTFF>%''gϞ힖7Nݺu#99NqpJ*ye˖zGcY.[`ׯtM< 0 `$ ohm^IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/trace_volume_heatmap.png000066400000000000000000000131251507764646700262040ustar00rootroot00000000000000PNG  IHDR,PLTE@Ai @0`@ԥ**@333MMMfff22U݂d"".Wp͇PErz挽k ܠ ݠݐP@Uk/@@``@@`pͷ|@ }}}{{{zzzxxxwwwuuusssrrrpppooommmkkkjjjhhhgggeeecccbbb```___]]][[[ZZZXXXWWWUUUSSSRRRPPPOOOMMMKKKJJJHHHGGGEEECCCBBB@@@???===;;;:::888777555333222000///---+++***(((&&&%%%###"""  eC pHYs+IDATx LYs5yDS1*"/ hA/QKͪX4Q B N^=ۙ^39yI93UEDDDDDDDDDDDDDDD4fj`lCݷ뷈f3oGy--魺q"0\"z8]v"i zo8ؿ7qyhZ{eI7Sp% L&Y vE1 ;݄ Yо6?߬/Z/0\ߖJT TR`@@\ߒ c@@@)0fjj. c@@3P5P5P57.@ T T_R`@@\)@ T1J..ڵ{̺vh0\_Cw9x ̒O@SEYkTvYh0NEYwTTW[w bѮ`ֹ]9!gX/ڵ:oǏ]#Nn:޹G@nB*=8؎pu2\>`֥9{sOu./Ki^j8\wmoڪtg{'e*As@O>v`֥/f KR`@@\&@ T TE)0fjj./H1 U U Us3P5P5H1 U Us+%9#R=C+%9RR=#O+%-R= LM$`<,2 `<,OB5`.KY) ˕)6}tVJ&6ԝuBx CѴM¯0P:?J1 U Us3P5P5K1 U Us~V  J꥝ o04T7}K~y@ߒ`^iT7}K~y@ߒ`^зWV* %?o04~Z* %?o04~Jzv}=:M:MRV-}]ct<ӜRc>\Sjl4- Pur[ӭz8SD*-Ӡr`?yL7!M[RT]Fax2iO,&MOI+%O8&OJؿ&rx>+%il4j][o7!<.W/[+%lZ04~B* %?o0[+MJ~&?o0[+MJ~&?o0[+M%M~y@ߒ`^зW-&J~&?o0[+M!=ms 7 \/u ]~0h\JavC py0 Axkڶxqo[-[# OӜGC7.\ˇ7g|ܛ^.W'`pؙO@pVs؈o0hLJ.M;?{,Nyh9a>\|4JIHTJTj.+%9R2Ae)wb^-l6LR^L?*%t,2 ŔJI4,TSL9<h[㻿R'R7o?pRd<Xx]`Xj#RR)0`^зW?"M~y@ߒ`^зW-&K%`^зW-&I%`^зW-&J%`^зW-&?J~&?o0[+JJ%`^зW-&?J~&?o0[+M/M~y@ߒ`^зW-&ߓJ~&?J`ۜg3ӴTJSirLJ w4+.8Zl @&)/Lvt52 Ť wjkWmB5yd 𕻶\)iJI{>^3iX H ߄O&OY=niWJsIMh>a27L=𷥒@@b3`1/@ @X I1 )R`@n |3 U`1S M!@ X wK1 )]R`.Ku9 ti.,̋ z&T234sDG MS/s3K~(~ڴTײ:DԝX))Z>k^ ;Lmk|Cפ,MSKuE} W2Z8.KuE}狼L>e.gKu]Wn,8܍NjM}8 ܯv>M׀}m]t>Vӳ?'+R׀pwmoڪK瀦Ws Ktj,/K1 )R`@n |3`1I Mo c@pS,[R`x'iX))VeH.+% ']dh=_z!LR- txZ7e@N֮V,TgRwNM0Zжwci*SNz&'|܄D y - txN7!<?4O}m~ a0X  c@pS,[3R`@n |3 U`1T\ 0l as0l ag ag =`I)`@6ag =`\?!ug$g}$͕pY)əRs}7=p<ϐL̫:Eロ`\o7EyjrIö82 naR39i)u[jN̻Psj%f 5R0O m8M[^i.O8+?ps.@ߍ p \? u 7!܃[u&ܜ %k_hZ:z_-u =`@6૤ag =`\/u =`|0l ag ag =`^)`@6Hp@6z[ 6z3 0l.WHp$GZ`=0i)u.ڿiL̻K:i,ΰs~5$Y`eOLOVj7!dDӳHDDDDDDDDW{|9;67m/v~nnϟSaM|^XumSaetwD3 j"hōU"=~P@/x5 c ׈?\vc{U$7gᏀw vB3]C^8q xs{}]\ C7`w>IDxc{q6vos# lRxRWEN7WźC[۫NTwl'绥U5no|y9ߨ;v} s*casM;R˫8JiGmm[7ua2l\tfx cO~_5 ~a\7gi% .IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_applications/000077500000000000000000000000001507764646700241275ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_applications/applications_intro.doxy000066400000000000000000000022001507764646700307270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \intropage{IntroApplications, --------- StarPU Applications ---------} \webforeword This part presents how to write a StarPU application from an existing application. Some of the applications presented in the following chapters and some others are available in the git repository https://gitlab.inria.fr/starpu/starpu-applications A full StarPU tutorial which can be run with Docker is available at https://starpu.gitlabpages.inria.fr/tutorials/docker/ */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_applications/code/000077500000000000000000000000001507764646700250415ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_applications/code/stencil5.c000066400000000000000000000031341507764646700267340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "stencil5.h" //! [To be included. You should update doxygen if you see this text.] #define _(row,col,ld) ((row)+(col)*(ld)) void stencil5_cpu(double *xy, double *xm1y, double *xp1y, double *xym1, double *xyp1) { *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5; } int main(int argc, char **argv) { int niter, n; int x, y, loop; read_params(argc, argv, &n, &niter); double *A = calloc(n*n, sizeof(*A)); fill(A, n, n); for(loop=0 ; loop #include #include "stencil5.h" //! [To be included. You should update doxygen if you see this text.] //! [starpu_codelet. You should update doxygen if you see this text.] #define _(row,col,ld) ((row)+(col)*(ld)) void stencil5_cpu(void *descr[], void *_args) { (void)_args; double *xy = (double *)STARPU_VARIABLE_GET_PTR(descr[0]); double *xm1y = (double *)STARPU_VARIABLE_GET_PTR(descr[1]); double *xp1y = (double *)STARPU_VARIABLE_GET_PTR(descr[2]); double *xym1 = (double *)STARPU_VARIABLE_GET_PTR(descr[3]); double *xyp1 = (double *)STARPU_VARIABLE_GET_PTR(descr[4]); *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5; } struct starpu_codelet stencil5_cl = { .cpu_funcs = {stencil5_cpu}, .nbuffers = 5, .modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}, .model = &starpu_perfmodel_nop, }; //! [starpu_codelet. You should update doxygen if you see this text.] int main(int argc, char **argv) { starpu_data_handle_t *data_handles; int ret; int niter, n; int x, y, loop; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); read_params(argc, argv, &verbose, &n, &niter); double *A = calloc(n*n, sizeof(*A)); fill(A, n, n); //! [starpu_register. You should update doxygen if you see this text.] data_handles = malloc(n*n*sizeof(*data_handles)); for(x = 0; x < n; x++) { for (y = 0; y < n; y++) { starpu_variable_data_register(&data_handles[_(x,y,n)], STARPU_MAIN_RAM, (uintptr_t)&(A[_(x,y,n)]), sizeof(double)); } } //! [starpu_register. You should update doxygen if you see this text.] for(loop=0 ; loop #include #include "stencil5.h" //! [To be included. You should update doxygen if you see this text.] #define _(row,col,ld) ((row)+(col)*(ld)) void stencil5_cpu(void *descr[], void *_args); // Same as in sequential StarPU struct starpu_codelet stencil5_cl; // Same as in sequential StarPU /* Returns the MPI node number where data indexes index is */ int my_distrib(int x, int y, int nb_nodes) { return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes; } int main(int argc, char **argv) { starpu_data_handle_t *data_handles; int niter, n; int my_rank, size, x, y, loop; //! [mpi_init. You should update doxygen if you see this text.] int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); //! [mpi_init. You should update doxygen if you see this text.] read_params(argc, argv, &n, &niter); double *A = calloc(n*n, sizeof(*A)); fill(A, n, n); data_handles = malloc(n*n*sizeof(*data_handles)); for(x = 0; x < n; x++) { for (y = 0; y < n; y++) { //! [mpi_register. You should update doxygen if you see this text.] starpu_variable_data_register(&data_handles[_(x,y,n)], STARPU_MAIN_RAM, (uintptr_t)&(A[_(x,y,n)]), sizeof(double)); int mpi_rank = my_distrib(x, y, size); starpu_mpi_data_register(data_handles[_(x,y,n)], (y*n)+x, mpi_rank); //! [mpi_register. You should update doxygen if you see this text.] } } for(loop=0 ; loop //! [starpu scal code To be included. You should update doxygen if you see this text.] //! [Prototype To be included. You should update doxygen if you see this text.] void vector_scal_cpu(void *buffers[], void *cl_arg) { //! [Prototype To be included. You should update doxygen if you see this text.] //! [Extract To be included. You should update doxygen if you see this text.] struct starpu_vector_interface *vector = buffers[0]; float *val = (float *)STARPU_VECTOR_GET_PTR(vector); unsigned n = STARPU_VECTOR_GET_NX(vector); //! [Extract To be included. You should update doxygen if you see this text.] //! [Unpack To be included. You should update doxygen if you see this text.] float factor; starpu_codelet_unpack_args(cl_arg, &factor); //! [Unpack To be included. You should update doxygen if you see this text.] //! [Compute To be included. You should update doxygen if you see this text.] unsigned i; for (i = 0; i < n; i++) val[i] *= factor; //! [Compute To be included. You should update doxygen if you see this text.] } //! [starpu scal code To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_applications/code/vector_scal_starpu.c000066400000000000000000000064721507764646700311200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] #include extern void vector_scal_cpu(void *buffers[], void *_args); extern void vector_scal_cuda(void *buffers[], void *_args); extern void vector_scal_opencl(void *buffers[], void *_args); //! [Codelet To be included. You should update doxygen if you see this text.] static struct starpu_codelet cl = { .cpu_funcs = {vector_scal_cpu}, .cuda_funcs = {vector_scal_cuda}, .opencl_funcs = {vector_scal_opencl}, .nbuffers = 1, .modes = {STARPU_RW} }; //! [Codelet To be included. You should update doxygen if you see this text.] #ifdef STARPU_USE_OPENCL struct starpu_opencl_program programs; #endif #define NX 2048 int main(void) { float *vector; unsigned i; //! [init To be included. You should update doxygen if you see this text.] int ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); //! [init To be included. You should update doxygen if you see this text.] #ifdef STARPU_USE_OPENCL starpu_opencl_load_opencl_from_file("vector_scal_opencl_kernel.cl", &programs, NULL); #endif //! [alloc To be included. You should update doxygen if you see this text.] vector = malloc(sizeof(vector[0]) * NX); for (i = 0; i < NX; i++) vector[i] = 1.0f; fprintf(stderr, "BEFORE : First element was %f\n", vector[0]); //! [alloc To be included. You should update doxygen if you see this text.] //! [register To be included. You should update doxygen if you see this text.] starpu_data_handle_t vector_handle; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); //! [register To be included. You should update doxygen if you see this text.] //! [task_insert To be included. You should update doxygen if you see this text.] float factor = 3.14; ret = starpu_task_insert(&cl, STARPU_RW, vector_handle, STARPU_VALUE, &factor, sizeof(factor), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); //! [task_insert To be included. You should update doxygen if you see this text.] //! [wait To be included. You should update doxygen if you see this text.] starpu_task_wait_for_all(); starpu_data_unregister(vector_handle); //! [wait To be included. You should update doxygen if you see this text.] fprintf(stderr, "AFTER First element is %f\n", vector[0]); free(vector); #ifdef STARPU_USE_OPENCL starpu_opencl_unload_opencl(&programs); #endif //! [shutdown To be included. You should update doxygen if you see this text.] starpu_shutdown(); //! [shutdown To be included. You should update doxygen if you see this text.] return 0; } //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_applications/stencil.doxy000066400000000000000000000063631507764646700265050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page StencilApplication A Stencil Application \section StencilOriginal The Original Application \snippet stencil5.c To be included. You should update doxygen if you see this text. \section StencilStarPU The StarPU Application The computation function must be defined through a codelet. \snippet stencil5_starpu.c starpu_codelet. You should update doxygen if you see this text. Data must be registered to StarPU. \snippet stencil5_starpu.c starpu_register. You should update doxygen if you see this text. Instead of directly calling the function, a StarPU task must be created. \snippet stencil5_starpu.c starpu_task. You should update doxygen if you see this text. And finally data must be released from StarPU. \snippet stencil5_starpu.c starpu_unregister. You should update doxygen if you see this text. The whole StarPU application looks as follows. \snippet stencil5_starpu.c To be included. You should update doxygen if you see this text. \section StencilStarPUMPI The StarPU MPI Application The initialisation for StarPU-MPI is as follows. \snippet stencil5_starpu_mpi.c mpi_init. You should update doxygen if you see this text. An additional call to starpu_mpi_data_register() is necessary. \snippet stencil5_starpu_mpi.c mpi_register. You should update doxygen if you see this text. And to insert a task, the function starpu_mpi_task_insert() must be used. \snippet stencil5_starpu_mpi.c mpi_insert. You should update doxygen if you see this text. The whole StarPU-MPI application looks as follows. \snippet stencil5_starpu_mpi.c To be included. You should update doxygen if you see this text. \section StencilRunning Running the application \verbatim $ docker run -it registry.gitlab.inria.fr/starpu/starpu-docker/starpu:latest \endverbatim If your machine has GPU devices, you can use the following command to enable the GPU devices within the docker image. \verbatim $ docker run -it --gpus all registry.gitlab.inria.fr/starpu/starpu-docker/starpu:latest \endverbatim From your docker image, you can then call the following commands. \verbatim $ git clone https://gitlab.inria.fr/starpu/starpu-applications.git $ cd starpu-applications/stencil5 $ make \endverbatim To run the non-StarPU application \verbatim $ ./stencil5 -v \endverbatim To run the sequential StarPU application \verbatim $ ./stencil5_starpu -v \endverbatim To run the StarPU MPI application. Setting the variable \ref STARPU_COMM_STATS to 1 will display the amount of communication between the different MPI processes. \verbatim $ STARPU_COMM_STATS=1 mpirun -np 4 ./stencil5_starpu_mpi -v 4 3 \endverbatim */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_applications/vector_scaling.doxy000066400000000000000000000132651507764646700300450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page VectorApplication A Vector Scaling Application \section BaseVersion Base version The non-StarPU version shows a basic example that we will be using to illustrate how to use StarPU. It simply allocates a vector, and calls a scaling function over it. \snippet vector_scal_c.c To be included. You should update doxygen if you see this text. \section StarPUCversion StarPU C version \subsection ComputationKernels Computation Kernels We are going to transform here the computation function \c vector_scal_cpu. \snippet vector_scal_c.c Original scal code To be included. You should update doxygen if you see this text. The StarPU corresponding function takes as parameters a list of DSM interfaces and a non-DSM parameter. \snippet vector_scal_cpu.c Prototype To be included. You should update doxygen if you see this text. The first DSM parameter is the vector and is available through \c buffer[0]. StarPU provides functions to get the vector data, and extract the pointer and size of the vector. \snippet vector_scal_cpu.c Extract To be included. You should update doxygen if you see this text. The non-DSM parameters are stored in the second argument of the function, and need to be unpacked. \snippet vector_scal_cpu.c Unpack To be included. You should update doxygen if you see this text. It is then possible to perform the vector scaling as in the original function. \snippet vector_scal_cpu.c Compute To be included. You should update doxygen if you see this text.
Original code StarPU code
\snippet vector_scal_c_align.c Original scal code To be included. You should update doxygen if you see this text. \snippet vector_scal_cpu.c starpu scal code To be included. You should update doxygen if you see this text.
The GPU and OpenCL implementations can be seen in \ref FullSourceCodeVectorScal. \subsection MainCode Main Code Let's look now at the main code.

Original code StarPU code
\snippet vector_scal_c_align.c Original main code To be included. You should update doxygen if you see this text. \snippet vector_scal_starpu.c To be included. You should update doxygen if you see this text.
\section BuildingandRunning Building and Running We will use the StarPU docker image. \verbatim $ docker run -it registry.gitlab.inria.fr/starpu/starpu-docker/starpu:latest \endverbatim If your machine has GPU devices, you can use the following command to enable the GPU devices within the docker image. \verbatim $ docker run -it --gpus all registry.gitlab.inria.fr/starpu/starpu-docker/starpu:latest \endverbatim From your docker image, you can then call the following commands. \verbatim $ cd tutorial/files $ make vector_scal_task_insert $ ./vector_scal_task_insert \endverbatim You can set the environment variable \ref STARPU_WORKER_STATS to \c 1 when running your application to see the number of tasks executed by each device. \verbatim $ STARPU_WORKER_STATS=1 ./vector_scal_task_insert \endverbatim If your machine has GPU devices, you can force the execution on the GPU devices by setting the number of CPU workers to 0. \verbatim # to force the implementation on a GPU device, by default, it will enable CUDA $ STARPU_WORKER_STATS=1 STARPU_NCPU=0 ./vector_scal_task_insert # to force the implementation on a OpenCL device $ STARPU_WORKER_STATS=1 STARPU_NCPU=0 STARPU_NCUDA=0 ./vector_scal_task_insert \endverbatim */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/000077500000000000000000000000001507764646700227055ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/basic_examples.doxy000066400000000000000000000410131507764646700265700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page BasicExamples Basic Examples \section HelloWorldUsingStarPUAPI Hello World This section shows how to implement a simple program that submits a task to StarPU. The full source code for this example is available in the file examples/basic_examples/hello_world.c \subsection RequiredHeaders Required Headers The header \c starpu.h should be included in any code using StarPU. \code{.c} #include \endcode \subsection DefiningACodelet Defining A Codelet A codelet is a structure that represents a computational kernel. Such a codelet may contain an implementation of the same kernel on different architectures (e.g. CUDA, x86, ...). For compatibility, make sure that the whole structure is properly initialized to zero, either by using the function starpu_codelet_init(), or by letting the compiler implicitly do it as examplified below. The field starpu_codelet::nbuffers specifies the number of data buffers that are manipulated by the codelet. Here, the codelet does not access or modify any data that is controlled by our data management library. We create a codelet which may only be executed on CPUs. When a CPU core will execute a codelet, it will call the function cpu_func, which \em must have the following prototype: \code{.c} void cpu_func(void *buffers[], void *cl_arg); \endcode In this example, we can ignore the first argument of this function which gives a description of the input and output buffers (e.g. the size and the location of the matrices) since there is none. We also ignore the second argument, which is a pointer to optional arguments for the codelet. \code{.c} void cpu_func(void *buffers[], void *cl_arg) { printf("Hello world\n"); } struct starpu_codelet cl = { .cpu_funcs = { cpu_func }, .nbuffers = 0 }; \endcode \subsection SubmittingATask Submitting A Task Before submitting any tasks to StarPU, starpu_init() must be called, or starpu_initialize() must be called by giving application arguments. The NULL argument specifies that we use the default configuration. Tasks can then be submitted until the termination of StarPU -- done by a call to starpu_shutdown(). In the example below, a task structure is allocated by a call to starpu_task_create(). This function allocates and fills the task structure with its default settings, it does not submit the task to StarPU. The field starpu_task::cl is a pointer to the codelet which the task will execute: in other words, the codelet structure describes which computational kernel should be offloaded on the different architectures, and the task structure is a wrapper containing a codelet and the piece of data on which the codelet should operate. If the field starpu_task::synchronous is non-zero, task submission will be synchronous: the function starpu_task_submit() will not return until the task has been executed. Note that the function starpu_shutdown() does not guarantee that asynchronous tasks have been executed before it returns, starpu_task_wait_for_all() can be used to this effect, or data can be unregistered (starpu_data_unregister()), which will implicitly wait for all the tasks scheduled to work on it, unless explicitly disabled thanks to starpu_data_set_default_sequential_consistency_flag() or starpu_data_set_sequential_consistency_flag(). \code{.c} int main(int argc, char **argv) { /* initialize StarPU */ starpu_init(NULL); struct starpu_task *task = starpu_task_create(); task->cl = &cl; /* Pointer to the codelet defined above */ /* starpu_task_submit will be a blocking call. If unset, starpu_task_wait() needs to be called after submitting the task. */ task->synchronous = 1; /* submit the task to StarPU */ starpu_task_submit(task); /* terminate StarPU */ starpu_shutdown(); return 0; } \endcode \subsection ExecutionOfHelloWorld Execution Of Hello World \verbatim $ make hello_world cc $(pkg-config --cflags starpu-1.4) hello_world.c -o hello_world $(pkg-config --libs starpu-1.4) $ ./hello_world Hello world \endverbatim \subsection PassingArgumentsToTheCodelet Passing Arguments To The Codelet The optional field starpu_task::cl_arg field is a pointer to a buffer (of size starpu_task::cl_arg_size) with some parameters for the kernel described by the codelet. For instance, if a codelet implements a computational kernel that multiplies its input vector by a constant, the constant could be specified by the means of this buffer, instead of registering it as a StarPU data. It must however be noted that StarPU avoids making copy whenever possible and rather passes the pointer as such, so the buffer which is pointed to must be kept allocated until the task terminates, and if several tasks are submitted with various parameters, each of them must be given a pointer to their own buffer. \code{.c} struct params { int i; float f; }; void cpu_func(void *buffers[], void *cl_arg) { struct params *params = cl_arg; printf("Hello world (params = {%i, %f} )\n", params->i, params->f); } \endcode As said before, the field starpu_codelet::nbuffers specifies the number of data buffers which are manipulated by the codelet. It does not count the argument --- the parameter cl_arg of the function cpu_func --- since it is not managed by our data management library, but just contains trivial parameters. // TODO rewrite so that it is a little clearer ? Be aware that this may be a pointer to a \em copy of the actual buffer, and not the pointer given by the programmer: if the codelet modifies this buffer, there is no guarantee that the initial buffer will be modified as well: this for instance implies that the buffer cannot be used as a synchronization medium. If synchronization is needed, data has to be registered to StarPU, see \ref VectorScalingUsingStarPUAPI. \code{.c} int main(int argc, char **argv) { /* initialize StarPU */ starpu_init(NULL); struct starpu_task *task = starpu_task_create(); task->cl = &cl; /* Pointer to the codelet defined above */ struct params params = { 1, 2.0f }; task->cl_arg = ¶ms; task->cl_arg_size = sizeof(params); /* starpu_task_submit will be a blocking call */ task->synchronous = 1; /* submit the task to StarPU */ starpu_task_submit(task); /* terminate StarPU */ starpu_shutdown(); return 0; } \endcode \verbatim $ make hello_world cc $(pkg-config --cflags starpu-1.4) hello_world.c -o hello_world $(pkg-config --libs starpu-1.4) $ ./hello_world Hello world (params = {1, 2.000000} ) \endverbatim \subsection DefiningACallback Defining A Callback Once a task has been executed, an optional callback function starpu_task::callback_func is called when defined. While the computational kernel could be offloaded on various architectures, the callback function is always executed on a CPU. The pointer starpu_task::callback_arg is passed as an argument to the callback function. The prototype of a callback function must be: \code{.c} void callback_function(void *); \endcode \code{.c} void callback_func(void *callback_arg) { printf("Callback function (arg %x)\n", callback_arg); } int main(int argc, char **argv) { /* initialize StarPU */ starpu_init(NULL); struct starpu_task *task = starpu_task_create(); task->cl = &cl; /* Pointer to the codelet defined above */ task->callback_func = callback_func; task->callback_arg = 0x42; /* starpu_task_submit will be a blocking call */ task->synchronous = 1; /* submit the task to StarPU */ starpu_task_submit(task); /* terminate StarPU */ starpu_shutdown(); return 0; } \endcode \verbatim $ make hello_world cc $(pkg-config --cflags starpu-1.4) hello_world.c -o hello_world $(pkg-config --libs starpu-1.4) $ ./hello_world Hello world Callback function (arg 42) \endverbatim \subsection WhereToExecuteACodelet Where To Execute A Codelet \code{.c} struct starpu_codelet cl = { .where = STARPU_CPU, .cpu_funcs = { cpu_func }, .nbuffers = 0 }; \endcode We create a codelet which may only be executed on the CPUs. The optional field starpu_codelet::where is a bitmask which defines where the codelet may be executed. Here, the value ::STARPU_CPU means that only CPUs can execute this codelet. When the optional field starpu_codelet::where is unset, its value is automatically set based on the availability of the different fields XXX_funcs. \section VectorScalingUsingStarPUAPI Vector Scaling The previous example has shown how to submit tasks. In this section, we show how StarPU tasks can manipulate data. The full source code for this example is given in \ref FullSourceCodeVectorScal. \subsection SourceCodeOfVectorScaling Source Code of Vector Scaling Programmers can describe the data layout of their application so that StarPU is responsible for enforcing data coherency and availability across the machine. Instead of handling complex (and non-portable) mechanisms to perform data movements, programmers only declare which piece of data is accessed and/or modified by a task, and StarPU makes sure that when a computational kernel starts somewhere (e.g. on a GPU), its data are available locally. Before submitting those tasks, programmers first need to declare the different pieces of data to StarPU using the functions starpu_*_data_register. To ease the development of applications for StarPU, it is possible to describe multiple types of data layout. A type of data layout is called an interface. There are different predefined interfaces available in StarPU, here we will consider the vector interface. The following lines show how to declare an array of NX elements of type float using the vector interface: \code{.c} float vector[NX]; starpu_data_handle_t vector_handle; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); \endcode The first argument, called the data handle, is an opaque pointer which designates the array within StarPU. This is also the structure which is used to describe which data is used by a task. The second argument is the node number where the data originally resides. Here it is ::STARPU_MAIN_RAM since the array vector is in the main memory. Then comes the pointer vector where the data can be found in main memory, the number of elements in the vector and the size of each element. The following shows how to construct a StarPU task that will manipulate the vector and a constant factor. \code{.c} float factor = 3.14; struct starpu_task *task = starpu_task_create(); task->cl = &cl; /* Pointer to the codelet defined below */ task->handles[0] = vector_handle; /* First parameter of the codelet */ task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); task->synchronous = 1; starpu_task_submit(task); \endcode Since the factor is a mere constant float value parameter, it does not need a preliminary registration, and can just be passed through the pointer starpu_task::cl_arg like in the previous example. The vector parameter is described by its handle. starpu_task::handles should be set with the handles of the data, the access modes for the data are defined in the field starpu_codelet::modes (::STARPU_R for read-only, ::STARPU_W for write-only and ::STARPU_RW for read and write access). The definition of the codelet can be written as follows: \code{.c} void scal_cpu_func(void *buffers[], void *cl_arg) { unsigned i; float *factor = cl_arg; /* length of the vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* CPU copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); for (i = 0; i < n; i++) val[i] *= *factor; } struct starpu_codelet cl = { .cpu_funcs = { scal_cpu_func }, .nbuffers = 1, .modes = { STARPU_RW } }; \endcode The first argument is an array that gives a description of all the buffers passed in the array starpu_task::handles. The size of this array is given by the field starpu_codelet::nbuffers. For the sake of genericity, this array contains pointers to the different interfaces describing each buffer. In the case of the vector interface, the location of the vector (resp. its length) is accessible in the starpu_vector_interface::ptr (resp. starpu_vector_interface::nx) of this interface. Since the vector is accessed in a read-write fashion, any modification will automatically affect future accesses to this vector made by other tasks. The second argument of the function scal_cpu_func contains a pointer to the parameters of the codelet (given in starpu_task::cl_arg), so that we read the constant factor from this pointer. \subsection ExecutionOfVectorScaling Execution of Vector Scaling \verbatim $ make vector_scal cc $(pkg-config --cflags starpu-1.4) vector_scal.c -o vector_scal $(pkg-config --libs starpu-1.4) $ ./vector_scal 0.000000 3.000000 6.000000 9.000000 12.000000 \endverbatim \section VectorScalingOnAnHybridCPUGPUMachine Vector Scaling on an Hybrid CPU/GPU Machine Contrary to the previous examples, the task submitted in this example may not only be executed by the CPUs, but also by a CUDA device. \subsection DefinitionOfTheCUDAKernel Definition of the CUDA Kernel The CUDA implementation can be written as follows. It needs to be compiled with a CUDA compiler such as nvcc, the NVIDIA CUDA compiler driver. It must be noted that the vector pointer returned by ::STARPU_VECTOR_GET_PTR is here a pointer in GPU memory, so that it can be passed as such to the kernel call vector_mult_cuda. \snippet basics_vector_scal_cuda.c To be included. You should update doxygen if you see this text. \subsection DefinitionOfTheOpenCLKernel Definition of the OpenCL Kernel The OpenCL implementation can be written as follows. StarPU provides tools to compile a OpenCL kernel stored in a file. \code{.c} __kernel void vector_mult_opencl(int nx, __global float* val, float factor) { const int i = get_global_id(0); if (i < nx) { val[i] *= factor; } } \endcode Contrary to CUDA and CPU, ::STARPU_VECTOR_GET_DEV_HANDLE has to be used, which returns a cl_mem (which is not a device pointer, but an OpenCL handle), which can be passed as such to the OpenCL kernel. The difference is important when using partitioning, see \ref PartitioningData. \snippet basics_vector_scal_opencl.c To be included. You should update doxygen if you see this text. \subsection DefinitionOfTheMainCode Definition of the Main Code The CPU implementation is the same as in the previous section. Here is the source of the main application. You can notice that the fields starpu_codelet::cuda_funcs and starpu_codelet::opencl_funcs are set to define the pointers to the CUDA and OpenCL implementations of the task. \snippet basics_vector_scal_c.c To be included. You should update doxygen if you see this text. \subsection ExecutionOfHybridVectorScaling Execution of Hybrid Vector Scaling The Makefile given at the beginning of the section must be extended to give the rules to compile the CUDA source code. Note that the source file of the OpenCL kernel does not need to be compiled now, it will be compiled at runtime when calling the function starpu_opencl_load_opencl_from_file(). \verbatim CFLAGS += $(shell pkg-config --cflags starpu-1.4) LDLIBS += $(shell pkg-config --libs starpu-1.4) CC = gcc vector_scal: vector_scal.o vector_scal_cpu.o vector_scal_cuda.o vector_scal_opencl.o %.o: %.cu nvcc $(CFLAGS) $< -c $@ clean: rm -f vector_scal *.o \endverbatim \verbatim $ make \endverbatim and to execute it, with the default configuration: \verbatim $ ./vector_scal 0.000000 3.000000 6.000000 9.000000 12.000000 \endverbatim or for example, by disabling CPU devices: \verbatim $ STARPU_NCPU=0 ./vector_scal 0.000000 3.000000 6.000000 9.000000 12.000000 \endverbatim or by disabling CUDA devices (which may permit to enable the use of OpenCL, see \ref EnablingOpenCL) : \verbatim $ STARPU_NCUDA=0 ./vector_scal 0.000000 3.000000 6.000000 9.000000 12.000000 \endverbatim */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/basics_intro.doxy000066400000000000000000000034211507764646700262710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \intropage{IntroBasics, --------- StarPU Basics ---------} \webforeword This part presents the basic knowledge of StarPU. It should be read to understand how StarPU works and how to execute a basic StarPU application.
  • Chapter \ref StarPUApplications shows how to create and run your own StarPU applications.
  • Chapter \ref BasicExamples shows how to implement simple programs that submit tasks to StarPU.
  • Chapter \ref FullSourceCodeVectorScal gives the full source code for a vector scaling application.
The next chapters cover the most important and core concepts in StarPU:
  • Chapter \ref TasksInStarPU explains the basic information on tasks management.
  • Chapter \ref DataManagement shows how to manage the data layout of your application data by using the different data interfaces provided by StarPU.
  • Chapter \ref Scheduling explains the scheduling policies provided by StarPU.
Some examples applications are provided from the StarPU sources for you to try. Chapter \ref ExamplesInStarPUSources lists these applications. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/code/000077500000000000000000000000001507764646700236175ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_c.c000066400000000000000000000107671507764646700301300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] /* * This example demonstrates how to use StarPU to scale an array by a factor. * It shows how to manipulate data with StarPU's data management library. * 1- how to declare a piece of data to StarPU (starpu_vector_data_register) * 2- how to describe which data are accessed by a task (task->handles[0]) * 3- how a kernel can manipulate the data (buffers[0].vector.ptr) */ #include #define NX 2048 extern void scal_cpu_func(void *buffers[], void *_args); extern void scal_sse_func(void *buffers[], void *_args); extern void scal_cuda_func(void *buffers[], void *_args); extern void scal_opencl_func(void *buffers[], void *_args); static struct starpu_codelet cl = { .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL, /* CPU implementation of the codelet */ .cpu_funcs = { scal_cpu_func, scal_sse_func }, .cpu_funcs_name = { "scal_cpu_func", "scal_sse_func" }, #ifdef STARPU_USE_CUDA /* CUDA implementation of the codelet */ .cuda_funcs = { scal_cuda_func }, #endif #ifdef STARPU_USE_OPENCL /* OpenCL implementation of the codelet */ .opencl_funcs = { scal_opencl_func }, #endif .nbuffers = 1, .modes = { STARPU_RW } }; #ifdef STARPU_USE_OPENCL struct starpu_opencl_program programs; #endif int main(int argc, char **argv) { /* We consider a vector of float that is initialized just as any of C * data */ float vector[NX]; unsigned i; for (i = 0; i < NX; i++) vector[i] = 1.0f; fprintf(stderr, "BEFORE: First element was %f\n", vector[0]); /* Initialize StarPU with default configuration */ starpu_init(NULL); #ifdef STARPU_USE_OPENCL starpu_opencl_load_opencl_from_file("examples/basic_examples/vector_scal_opencl_kernel.cl", &programs, NULL); #endif /* Tell StaPU to associate the "vector" vector with the "vector_handle" * identifier. When a task needs to access a piece of data, it should * refer to the handle that is associated to it. * In the case of the "vector" data interface: * - the first argument of the registration method is a pointer to the * handle that should describe the data * - the second argument is the memory node where the data (ie. "vector") * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as * opposed to an address on a GPU for instance. * - the third argument is the address of the vector in RAM * - the fourth argument is the number of elements in the vector * - the fifth argument is the size of each element. */ starpu_data_handle_t vector_handle; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); float factor = 3.14; /* create a synchronous task: any call to starpu_task_submit will block * until it is terminated */ struct starpu_task *task = starpu_task_create(); task->synchronous = 1; task->cl = &cl; /* the codelet manipulates one buffer in RW mode */ task->handles[0] = vector_handle; /* an argument is passed to the codelet, beware that this is a * READ-ONLY buffer and that the codelet may be given a pointer to a * COPY of the argument */ task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); /* execute the task on any eligible computational resource */ starpu_task_submit(task); /* StarPU does not need to manipulate the array anymore so we can stop * monitoring it */ starpu_data_unregister(vector_handle); #ifdef STARPU_USE_OPENCL starpu_opencl_unload_opencl(&programs); #endif /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); fprintf(stderr, "AFTER First element is %f\n", vector[0]); return 0; } //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c000066400000000000000000000054301507764646700304640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] #include #include /* This kernel takes a buffer and scales it by a constant factor */ void scal_cpu_func(void *buffers[], void *cl_arg) { unsigned i; float *factor = cl_arg; /* * The "buffers" array matches the task->handles array: for instance * task->handles[0] is a handle that corresponds to a data with * vector "interface", so that the first entry of the array in the * codelet is a pointer to a structure describing such a vector (ie. * struct starpu_vector_interface *). Here, we therefore manipulate * the buffers[0] element as a vector: nx gives the number of elements * in the array, ptr gives the location of the array (that was possibly * migrated/replicated), and elemsize gives the size of each elements. */ struct starpu_vector_interface *vector = buffers[0]; /* length of the vector */ unsigned n = STARPU_VECTOR_GET_NX(vector); /* get a pointer to the local copy of the vector: note that we have to * cast it in (float *) since a vector could contain any type of * elements so that the .ptr field is actually a uintptr_t */ float *val = (float *)STARPU_VECTOR_GET_PTR(vector); /* scale the vector */ for (i = 0; i < n; i++) val[i] *= *factor; } void scal_sse_func(void *buffers[], void *cl_arg) { float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); unsigned int n_iterations = n/4; __m128 *VECTOR = (__m128*) vector; __m128 FACTOR STARPU_ATTRIBUTE_ALIGNED(16); float factor = *(float *) cl_arg; FACTOR = _mm_set1_ps(factor); unsigned int i; for (i = 0; i < n_iterations; i++) VECTOR[i] = _mm_mul_ps(FACTOR, VECTOR[i]); unsigned int remainder = n%4; if (remainder != 0) { unsigned int start = 4 * n_iterations; for (i = start; i < start+remainder; ++i) { vector[i] = factor * vector[i]; } } } //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c000066400000000000000000000033341507764646700306120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] #include static __global__ void vector_mult_cuda(unsigned n, float *val, float factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) val[i] *= factor; } extern "C" void scal_cuda_func(void *buffers[], void *_args) { float *factor = (float *)_args; /* length of the vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; vector_mult_cuda<<>>(n, val, *factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c000066400000000000000000000052721507764646700311610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] #include extern struct starpu_opencl_program programs; void scal_opencl_func(void *buffers[], void *_args) { float *factor = _args; int id, devid, err; /* OpenCL specific code */ cl_kernel kernel; /* OpenCL specific code */ cl_command_queue queue; /* OpenCL specific code */ cl_event event; /* OpenCL specific code */ /* length of the vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* OpenCL copy of the vector pointer */ cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); { /* OpenCL specific code */ id = starpu_worker_get_id(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &programs, "vector_mult_opencl", /* Name of the codelet */ devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(n), &n); err |= clSetKernelArg(kernel, 1, sizeof(val), &val); err |= clSetKernelArg(kernel, 2, sizeof(*factor), factor); if (err) STARPU_OPENCL_REPORT_ERROR(err); } { /* OpenCL specific code */ size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } { /* OpenCL specific code */ clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); } } //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl000066400000000000000000000017661507764646700330400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] __kernel void vector_mult_opencl(int nx, __global float* val, float factor) { const int i = get_global_id(0); if (i < nx) { val[i] *= factor; } } //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/data_management.doxy000066400000000000000000001067671507764646700267400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page DataManagement Data Management TODO: intro which mentions consistency among other things \section DataInterface Data Interface StarPU provides several data interfaces for programmers to describe the data layout of their application. There are predefined interfaces already available in StarPU. Users can define new data interfaces as explained in \ref DefiningANewDataInterface. All functions provided by StarPU are documented in \ref API_Data_Interfaces. You will find a short list below. \subsection VariableDataInterface Variable Data Interface A variable is a given-size byte element, typically a scalar or a pointer to an application-specific structure. Here is an example of how to register a variable data to StarPU by using starpu_variable_data_register(). A full code example for the variable data interface is available in the file examples/basic_examples/variable.c. \code{.c} float var = 42.0; starpu_data_handle_t var_handle; starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); \endcode Here is an example of how to register an application-specific data to StarPU, the idea is to register the variable that contains the pointer to the application-specific data. This will not provide support for GPUs and MPI, but can be an easy start before defining your own data interface to describe the application-specific structure (see \ref DefiningANewDataInterface). \code{.c} struct mystructure *A = ...; starpu_data_handle_t var_handle; starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&A, sizeof(A)); \endcode \subsection VectorDataInterface Vector Data Interface A vector is a fixed number of elements of a given size. Here is an example of how to register a vector data to StarPU by using starpu_vector_data_register(). A full code example for the vector data interface is available in the file examples/filters/fvector.c. \code{.c} float vector[NX]; starpu_data_handle_t vector_handle; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); \endcode Vectors can be partitioned into pieces by using starpu_vector_filter_block(). They can also be partitioned with some overlapping by using starpu_vector_filter_block_shadow(). An example is in the file examples/filters/shadow.c. By default, StarPU uses the same size for each piece. If different sizes are desired, starpu_vector_filter_list() or starpu_vector_filter_list_long() can be used instead. To just divide in two pieces, starpu_vector_filter_divide_in_2() can be used. In addition, contiguous variables can be picked from a vector by using starpu_vector_filter_pick_variable() with starpu_data_filter::get_child_ops set to starpu_vector_filter_pick_variable_child_ops(). An example is in the file examples/filters/fvector_pick_variable.c. \subsection MatrixDataInterface Matrix Data Interface To register 2-D matrices with a potential padding, one can use the matrix data interface. Here is an example of how to register a matrix data to StarPU by using starpu_matrix_data_register(). A full code example for the matrix data interface is available in the file examples/filters/fmatrix.c. \code{.c} float *matrix; starpu_data_handle_t matrix_handle; matrix = (float*)malloc(width * height * sizeof(float)); starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float)); \endcode 2D matrices can be partitioned into 2D matrices along the x dimension by using starpu_matrix_filter_block(), and along the y dimension by using starpu_matrix_filter_vertical_block(). They can also be partitioned with some overlapping by using starpu_matrix_filter_block_shadow() and starpu_matrix_filter_vertical_block_shadow(). An example is in the file examples/filters/shadow2d.c. In addition, contiguous vectors can be picked from a matrix along the Y dimension by using starpu_matrix_filter_pick_vector_y() with starpu_data_filter::get_child_ops set to starpu_matrix_filter_pick_vector_child_ops(). An example is in the file examples/filters/fmatrix_pick_vector.c. Variable can be also picked from a matrix by using starpu_matrix_filter_pick_variable() with starpu_data_filter::get_child_ops needs set to starpu_matrix_filter_pick_variable_child_ops(). An example is in the file examples/filters/fmatrix_pick_variable.c. \subsection BlockDataInterface Block Data Interface To register 3-D matrices with potential paddings on Y and Z dimensions, one can use the block data interface. Here is an example of how to register a block data to StarPU by using starpu_block_data_register(). A full code example for the block data interface is available in the file examples/filters/fblock.c. \code{.c} float *block; starpu_data_handle_t block_handle; block = (float*)malloc(nx*ny*nz*sizeof(float)); starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float)); \endcode 3D matrices can be partitioned along the x dimension by using starpu_block_filter_block(), or along the y dimension by using starpu_block_filter_vertical_block(), or along the z dimension by using starpu_block_filter_depth_block(). They can also be partitioned with some overlapping by using starpu_block_filter_block_shadow(), starpu_block_filter_vertical_block_shadow(), or starpu_block_filter_depth_block_shadow(). An example is in the file examples/filters/shadow3d.c. In addition, contiguous matrices can be picked from a block along the Z dimension or the Y dimension by using starpu_block_filter_pick_matrix_z() or starpu_block_filter_pick_matrix_y() with starpu_data_filter::get_child_ops set to starpu_block_filter_pick_matrix_child_ops(). An example is in the file examples/filters/fblock_pick_matrix.c. Variable can be also picked from a block by using starpu_block_filter_pick_variable() with starpu_data_filter::get_child_ops set to starpu_block_filter_pick_variable_child_ops(). An example is in the file examples/filters/fblock_pick_variable.c. \subsection TensorDataInterface Tensor Data Interface To register 4-D matrices with potential paddings on Y, Z, and T dimensions, one can use the tensor data interface. Here is an example of how to register a tensor data to StarPU by using starpu_tensor_data_register(). A full code example for the tensor data interface is available in the file examples/filters/ftensor.c. \code{.c} float *block; starpu_data_handle_t block_handle; block = (float*)malloc(nx*ny*nz*nt*sizeof(float)); starpu_tensor_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx*ny*nz, nx, ny, nz, nt, sizeof(float)); \endcode 4D matrices can be partitioned along the x dimension by using starpu_tensor_filter_block(), or along the y dimension by using starpu_tensor_filter_vertical_block(), or along the z dimension by using starpu_tensor_filter_depth_block(), or along the t dimension by using starpu_tensor_filter_time_block(). They can also be partitioned with some overlapping by using starpu_tensor_filter_block_shadow(), starpu_tensor_filter_vertical_block_shadow(), starpu_tensor_filter_depth_block_shadow(), or starpu_tensor_filter_time_block_shadow(). An example is in the file examples/filters/shadow4d.c. In addition, contiguous blocks can be picked from a block along the T dimension, Z dimension or the Y dimension by using starpu_tensor_filter_pick_block_t(), starpu_tensor_filter_pick_block_z(), or starpu_tensor_filter_pick_block_y(), and starpu_data_filter::get_child_ops set to starpu_tensor_filter_pick_block_child_ops(). An example is in the file examples/filters/ftensor_pick_block.c. Variable can be also picked from a tensor by using starpu_tensor_filter_pick_variable() with starpu_data_filter::get_child_ops set to starpu_tensor_filter_pick_variable_child_ops(). An example is in the file examples/filters/ftensor_pick_variable.c. \subsection NdimDataInterface Ndim Data Interface To register N-dim matrices, one can use the Ndim data interface. Here is an example of how to register a 5-dim data to StarPU by using starpu_ndim_data_register(). A full code example for the ndim data interface is available in the file examples/filters/fndim.c. \code{.c} float *arr5d; starpu_data_handle_t arr5d_handle; starpu_malloc((void **)&arr5d, NX*NY*NZ*NT*NG*sizeof(float)); unsigned nn[5] = {NX, NY, NZ, NT, NG}; unsigned ldn[5] = {1, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT}; starpu_ndim_data_register(&arr5d_handle, STARPU_MAIN_RAM, (uintptr_t)arr5d, ldn, nn, 5, sizeof(float)); \endcode N-dim matrices can be partitioned along the given dimension by using starpu_ndim_filter_block(). They can also be partitioned with some overlapping by using starpu_ndim_filter_block_shadow(). An example is in the file examples/filters/shadownd.c. Taking into account existing data interfaces, there are several specialized functions which can partition a 0-dim array, 1-dim array, 2-dim array, 3-dim array or 4-dim array into
  • variables by using starpu_ndim_filter_to_variable() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_variable_child_ops() (see file examples/filters/fndim_to_variable.c)
  • ,
  • vectors by using starpu_ndim_filter_to_vector() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_vector_child_ops() (see file examples/filters/fndim_to_vector.c)
  • ,
  • matrices by using starpu_ndim_filter_to_matrix() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_matrix_child_ops() (see file examples/filters/fndim_to_matrix.c)
  • ,
  • blocks by using starpu_ndim_filter_to_block() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_block_child_ops() (see file examples/filters/fndim_to_block.c)
  • ,
  • or tensors by using starpu_ndim_filter_to_tensor() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_to_tensor_child_ops() (see file examples/filters/fndim_to_tensor.c)
  • .
In addition, contiguous (n-1)dim arrays can be picked from a ndim array along the given dimension by using starpu_ndim_filter_pick_ndim(). An example is in the file examples/filters/fndim_pick_ndim.c. In specific cases which consider existing data interfaces, contiguous variables, vectors, matrices, blocks, or tensors can be along the given dimension picked from a
  • 1-dim array by using starpu_ndim_filter_1d_pick_variable() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_variable_child_ops() (see file examples/filters/fndim_1d_pick_variable.c),
  • 2-dim array by using starpu_ndim_filter_2d_pick_vector() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_vector_child_ops() (see file examples/filters/fndim_2d_pick_vector.c),
  • 3-dim array by using starpu_ndim_filter_3d_pick_matrix() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_matrix_child_ops() (see file examples/filters/fndim_3d_pick_matrix.c),
  • 4-dim array by using starpu_ndim_filter_4d_pick_block() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_block_child_ops() (see file examples/filters/fndim_4d_pick_block.c),
  • or 5-dim array by using starpu_ndim_filter_5d_pick_tensor() and starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_tensor_child_ops() (see file examples/filters/fndim_5d_pick_tensor.c).
Variable can be also picked from a ndim array by using starpu_ndim_filter_pick_variable() with starpu_data_filter::get_child_ops set to starpu_ndim_filter_pick_variable_child_ops(). An example is in the file examples/filters/fndim_pick_variable.c. \subsection BCSRDataInterface BCSR Data Interface BCSR (Blocked Compressed Sparse Row Representation) sparse matrix data can be registered to StarPU using the bcsr data interface. Here is an example on how to do so by using starpu_bcsr_data_register(). \code{.c} /* * We use the following matrix: * * +----------------+ * | 0 1 0 0 | * | 2 3 0 0 | * | 4 5 8 9 | * | 6 7 10 11 | * +----------------+ * * nzval = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11] * colind = [0, 0, 1] * rowptr = [0, 1, 3] * r = c = 2 */ /* Size of the blocks */ int R = 2; int C = 2; int NROWS = 2; int NNZ_BLOCKS = 3; /* out of 4 */ int NZVAL_SIZE = (R*C*NNZ_BLOCKS); int nzval[NZVAL_SIZE] = { 0, 1, 2, 3, /* First block */ 4, 5, 6, 7, /* Second block */ 8, 9, 10, 11 /* Third block */ }; uint32_t colind[NNZ_BLOCKS] = { 0, /* block-column index for first block in nzval */ 0, /* block-column index for second block in nzval */ 1 /* block-column index for third block in nzval */ }; uint32_t rowptr[NROWS+1] = { 0, / * block-index in nzval of the first block of the first row. */ 1, / * block-index in nzval of the first block of the second row. */ NNZ_BLOCKS /* number of blocks, to allow an easier element's access for the kernels */ }; starpu_data_handle_t bcsr_handle; starpu_bcsr_data_register(&bcsr_handle, STARPU_MAIN_RAM, NNZ_BLOCKS, NROWS, (uintptr_t) nzval, colind, rowptr, 0, /* firstentry */ R, C, sizeof(nzval[0])); \endcode An example on how to deal with such matrices is in the file examples/spmv/dw_block_spmv.c. BCSR data handles can be partitioned into its dense matrix blocks by using starpu_bcsr_filter_canonical_block(), or split into other BCSR data handles by using starpu_bcsr_filter_vertical_block() (but only split along the leading dimension is supported, i.e. along adjacent nnz blocks). starpu_data_filter::get_child_ops needs to be set to starpu_bcsr_filter_canonical_block_child_ops() and starpu_data_filter::get_nchildren set to starpu_bcsr_filter_canonical_block_get_nchildren(). An example is available in tests/datawizard/bcsr.c. \subsection CSRDataInterface CSR Data Interface TODO To register a Compressed Sparse Row Representation (CSR) sparse matrix, one can use the CSR data interface. A full code example for the CSR data interface is available in the file mpi/tests/datatypes.c to show how to register a COO matrix data to StarPU by using starpu_csr_data_register(). CSR data handles can be partitioned into vertical CSR matrices by using starpu_csr_filter_vertical_block(). An example is available in the file examples/spmv/spmv.c. \subsection COODataInterface COO Data Interface To register 2-D matrices given in the coordinate format (COO), one can use the COO data interface. A full code example for the COO data interface is available in the file tests/datawizard/interfaces/coo/coo_interface.c to show how to register a COO matrix data to StarPU by using starpu_coo_data_register(). \section PartitioningData Partitioning Data An existing piece of data can be partitioned in sub parts to be used by different tasks, for instance: \code{.c} #define NX 1048576 #define PARTS 16 int vector[NX]; starpu_data_handle_t handle; /* Declare data to StarPU */ starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); /* Partition the vector in PARTS sub-vectors */ struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = PARTS }; starpu_data_partition(handle, &f); \endcode The handle of a sub-data block of a composite data block can be retrieved by calling starpu_data_get_child(). Or the task submission first retrieves the number of sub-data blocks in a composite data block by calling starpu_data_get_nb_children() and then uses the function starpu_data_get_sub_data() or starpu_data_vget_sub_data() to retrieve the sub-handles to be passed as tasks parameters. \code{.c} /* Submit a task on each sub-vector */ for (i=0; ihandles[0] = sub_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); starpu_task_submit(task); } \endcode Partitioning can be applied several times by using starpu_data_map_filters() or starpu_data_vmap_filters() or starpu_data_map_filters_parray() or starpu_data_map_filters_array(), see examples/basic_examples/mult.c and examples/filters/. Wherever the whole piece of data is already available, the partitioning will be done in-place, i.e. without allocating new buffers but just using pointers inside the existing copy. This is particularly important to be aware of when using OpenCL, where the kernel parameters are not pointers, but \c cl_mem handles. The kernel thus needs to be also passed the offset within the OpenCL buffer: \code{.c} void opencl_func(void *buffers[], void *cl_arg) { cl_mem vector = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); unsigned offset = STARPU_BLOCK_GET_OFFSET(buffers[0]); ... clSetKernelArg(kernel, 0, sizeof(vector), &vector); clSetKernelArg(kernel, 1, sizeof(offset), &offset); ... } \endcode And the kernel has to shift from the pointer passed by the OpenCL driver: \code{.c} __kernel void opencl_kernel(__global int *vector, unsigned offset) { block = (__global void *)block + offset; ... } \endcode When the sub-data is not of the same type as the original data, the field starpu_data_filter::get_child_ops needs to be set appropriately for StarPU to know which type should be used. starpu_data_unpartition() should be called in the end to collect back the sub-pieces of data into the original piece of data. StarPU provides various interfaces and filters for matrices, vectors, etc., but applications can also write their own data interfaces and filters, see examples/interface and examples/filters/custom_mf for an example, and see \ref DefiningANewDataInterface and \ref DefiningANewDataFilter for documentation. \section AsynchronousPartitioning Asynchronous Partitioning The partitioning functions described in the previous section are synchronous: starpu_data_partition() and starpu_data_unpartition() both wait for all the tasks currently working on the data. This can be a bottleneck for the application. An asynchronous API also exists, it works only on handles with sequential consistency. The principle is to first plan the partitioning, which returns data handles of the partition, which are not functional yet. When submitting tasks, one can mix using the handles of the partition or the whole data. One can even partition recursively and mix using handles at different levels of the recursion. Of course, StarPU will have to introduce coherency synchronization. examples/filters/fmultiple_submit_implicit.c is a complete example using this technique. One can also look at examples/filters/fmultiple_submit_readonly.c which contains the explicit coherency synchronization which are automatically introduced by StarPU for examples/filters/fmultiple_submit_implicit.c. In short, we first register a matrix and plan the partitioning: \code{.c} starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); struct starpu_data_filter f_vert = { .filter_func = starpu_matrix_filter_block, .nchildren = PARTS }; starpu_data_partition_plan(handle, &f_vert, vert_handle); \endcode starpu_data_partition_plan() returns the handles for the partition in vert_handle. One can then submit tasks working on the main \c handle, and tasks working on the sub handles vert_handle. Between using the main handle and the handles vert_handle, StarPU will automatically call starpu_data_partition_submit() and starpu_data_unpartition_submit(). Or call starpu_data_partition_submit_sequential_consistency() and starpu_data_unpartition_submit_sequential_consistency() to specify the coherency to be used for the main handle, or call starpu_data_unpartition_submit_sequential_consistency_cb() to specify a callback function for the unpartitiong task. One can also call starpu_data_partition_readonly_submit() and starpu_data_unpartition_readonly_submit() which do not guarantee coherency if the application attempts to write to the main handle or any of its sub-handles while a task is still running. However, in read-only case we can also call starpu_data_partition_readonly_submit_sequential_consistency() to specify the coherency to be used for the main handle, or call starpu_data_partition_readwrite_upgrade_submit() to upgrade the partitioning of a data handle from read-only to read-write mode for a specific sub-handle. If users want to specify that the data won't be touched in write mode anymore and use multiple partition of the data at the same time, they can call starpu_data_partition_readonly_downgrade_submit(). After the task has completed using the data partition, starpu_data_partition_clean() or starpu_data_partition_clean_node() is used to clean up a data partition on the local node or on a specific node. All this code is asynchronous, just submitting which tasks, partitioning and unpartitioning will be done at runtime. Planning several partitioning of the same data is also possible, StarPU will unpartition and repartition as needed when mixing accesses of different partitions. If data access is done in read-only mode, StarPU will allow the different partitioning to coexist. As soon as a data is accessed in read-write mode, StarPU will automatically unpartition everything and activate only the partitioning leading to the data being written to. For instance, for a stencil application, one can split a subdomain into its interior and halos, and then just submit a task updating the whole subdomain, then submit MPI sends/receives to update the halos, then submit again a task updating the whole subdomain, etc. and StarPU will automatically partition/unpartition each time. \section DataCommute Commute Data Access By default, the implicit dependencies computed from data access use the sequential semantic. Notably, write accesses are always serialized in the order of submission. In some applicative cases, the write contributions can actually be performed in any order without affecting the eventual result. In this case, it is useful to drop the strictly sequential semantic, to improve parallelism by allowing StarPU to reorder the write accesses. This can be done by using the data access flag ::STARPU_COMMUTE. Accesses without this flag will however properly be serialized against accesses with this flag. For instance: \code{.c} starpu_task_insert(&cl1, STARPU_R, h, STARPU_RW, handle, 0); starpu_task_insert(&cl2, STARPU_R, handle1, STARPU_RW|STARPU_COMMUTE, handle, 0); starpu_task_insert(&cl2, STARPU_R, handle2, STARPU_RW|STARPU_COMMUTE, handle, 0); starpu_task_insert(&cl3, STARPU_R, g, STARPU_RW, handle, 0); \endcode The two tasks running cl2 will be able to commute: depending on whether the value of handle1 or handle2 becomes available first, the corresponding task running cl2 will start first. The task running cl1 will however always be run before them, and the task running cl3 will always be run after them. tests/datawizard/commute2.c is a complete example using the data access flag. If a lot of tasks use the commute access on the same set of data and a lot of them are ready at the same time, it may become interesting to use an arbiter, see \ref ConcurrentDataAccess. \section DataReduction Data Reduction In various cases, some piece of data is used to accumulate intermediate results. For instances, the dot product of a vector, maximum/minimum finding, the histogram of a picture, etc. When these results are produced along the whole machine, it would not be efficient to accumulate them in only one place, incurring data transmission each and access concurrency. StarPU provides a mode ::STARPU_REDUX, which permits to optimize this case: it will allocate a buffer on each worker (lazily), and accumulate intermediate results there. When the data is eventually accessed in the normal mode ::STARPU_R, StarPU will collect the intermediate results in just one buffer. The function starpu_data_set_reduction_methods() must be called to specify how to initialize these buffers, and how to assemble partial results. The function starpu_data_set_reduction_methods_with_args() can also be used to pass arguments to the reduction and init tasks. For instance, examples/cg/cg.c uses that to optimize its dot product: it first defines the codelets for initialization and reduction: \code{.c} struct starpu_codelet bzero_variable_cl = { .cpu_funcs = { bzero_variable_cpu }, .cpu_funcs_name = { "bzero_variable_cpu" }, .cuda_funcs = { bzero_variable_cuda }, .nbuffers = 1, } static void accumulate_variable_cpu(void *descr[], void *cl_arg) { double *v_dst = (double *)STARPU_VARIABLE_GET_PTR(descr[0]); double *v_src = (double *)STARPU_VARIABLE_GET_PTR(descr[1]); *v_dst = *v_dst + *v_src; } static void accumulate_variable_cuda(void *descr[], void *cl_arg) { double *v_dst = (double *)STARPU_VARIABLE_GET_PTR(descr[0]); double *v_src = (double *)STARPU_VARIABLE_GET_PTR(descr[1]); cublasaxpy(1, (double)1.0, v_src, 1, v_dst, 1); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } struct starpu_codelet accumulate_variable_cl = { .cpu_funcs = { accumulate_variable_cpu }, .cpu_funcs_name = { "accumulate_variable_cpu" }, .cuda_funcs = { accumulate_variable_cuda }, .nbuffers = 2, .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, } \endcode and attaches them as reduction methods for its handle dtq: \code{.c} starpu_variable_data_register(&dtq_handle, -1, NULL, sizeof(type)); starpu_data_set_reduction_methods(dtq_handle, &accumulate_variable_cl, &bzero_variable_cl); \endcode and dtq_handle can now be used with the mode ::STARPU_REDUX for the dot products with partitioned vectors: \code{.c} for (b = 0; b < nblocks; b++) starpu_task_insert(&dot_kernel_cl, STARPU_REDUX, dtq_handle, STARPU_R, starpu_data_get_sub_data(v1, 1, b), STARPU_R, starpu_data_get_sub_data(v2, 1, b), 0); \endcode During registration, we have here provided NULL, i.e. there is no initial value to be taken into account during reduction. StarPU will thus only take into account the contributions from the tasks dot_kernel_cl. Also, it will not allocate any memory for dtq_handle before the tasks dot_kernel_cl are ready to run. If another dot product has to be performed, one could unregister dtq_handle, and re-register it. But one can also call starpu_data_deinitialize_submit() or even starpu_data_invalidate_submit() with the parameter dtq_handle, which will clear all data from the handle, thus resetting it back to the initial status register(NULL). The example examples/cg/cg.c also uses reduction for the blocked gemv kernel, leading to yet more relaxed dependencies and more parallelism. ::STARPU_REDUX can also be passed to starpu_mpi_task_insert() in the MPI case. This will however not produce any MPI communication, but just pass ::STARPU_REDUX to the underlying starpu_task_insert(). starpu_mpi_redux_data() posts tasks which will reduce the partial results among MPI nodes into the MPI node which owns the data. The function can be called by users to benefit from fine-tuning such as priority setting. If users do not call this function, StarPU wraps up reduction patterns automatically. The following example shows a hypothetical application which collects partial results into data res, then uses it for other computation, before looping again with a new reduction where the wrap-up of the reduction pattern is explicit: \code{.c} for (i = 0; i < 100; i++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &init_res, STARPU_W, res, 0); starpu_mpi_task_insert(MPI_COMM_WORLD, &work, STARPU_RW, A, STARPU_R, B, STARPU_REDUX, res, 0); starpu_mpi_redux_data(MPI_COMM_WORLD, res); starpu_mpi_task_insert(MPI_COMM_WORLD, &work2, STARPU_RW, B, STARPU_R, res, 0); } \endcode starpu_mpi_redux_data() is called automatically in various cases, including when a task reading the reduced handle is inserted through starpu_mpi_task_insert(). The previous example could avoid calling starpu_mpi_redux_data(). Default priority (0) is used. The reduction tree arity is decided based on the size of the data to reduce: a flat tree is used with a small data (default to less than 1024 bytes), a binary tree otherwise. If the environment variable \ref STARPU_MPI_REDUX_ARITY_THRESHOLD is set, the threshold between the size of a small data and a bigger data is modified. If the value is set to be negative, flat trees will always be used. If the value is set to 0, binary trees are used. Otherwise, the size of the data is compared to the size in the environment variable. Remaining distributed-memory reduction patterns are wrapped-up at the end of an application when calling starpu_mpi_wait_for_all(). More details about MPI reduction are show in Section \ref MPIMpiRedux, and some examples for MPI data reduction are available in mpi/examples/mpi_redux/. \section ConcurrentDataAccess Concurrent Data Accesses When several tasks are ready and will work on several data, StarPU is faced with the classical Dining Philosopher's problem, and has to determine the order in which it will run the tasks. Data accesses usually use sequential ordering, so data accesses are usually already serialized, and thus by default, StarPU uses the Dijkstra solution which scales very well in terms of overhead: tasks will just acquire data one by one by data handle pointer value order. When sequential ordering is disabled or the flag ::STARPU_COMMUTE is used, there may be a lot of concurrent accesses to the same data, and the Dijkstra solution gets only poor parallelism, typically in some pathological cases which do happen in various applications, for instance \code{.c} for (i = 0; i < N; i++) for (j = 0; j < N; j++) task[i][j] = starpu_task_build(&cl, STARPU_RW|STARPU_COMMUTE, A[i], STARPU_RW|STARPU_COMMUTE, B[j], 0); \endcode It creates a series of tasks that are completely parallel in terms of tasks dependencies thanks to commutation, but StarPU still has to prevent two tasks from operating on the same data. The Dijkstra solution here leads to a worst-case: the \c task[0][j] tasks will wait for each other since they all access the same \c A[0]. And \c task[1][0] will wait for \c task[0][0] because they both access the same \c B[0], \c task[1][1] will wait for \c task[0][1] because of \c B[1], etc. In the end, no parallism is achieved: \image html arbiter.png \image latex arbiter.png "" width=0.7\textwidth In this case, one can use a data access arbiter ::starpu_arbiter_t, which implements the classical centralized solution for the Dining Philosophers problem. One can call starpu_arbiter_create() to create a data access arbiter, and starpu_data_assign_arbiter() to make access to handle managed by arbiter. Once the application no longer needs the arbiter, one can call starpu_arbiter_destroy() to destroy the arbiter after all data assigned to the arbiter have been unregistered. This is more expensive in terms of overhead since it is centralized, but it opportunistically gets a lot of parallelism. The centralization can also be avoided by using several arbiters, thus separating sets of data for which arbitration will be done. If a task accesses data from different arbiters, it will acquire them arbiter by arbiter, in arbiter pointer value order. See the tests/datawizard/test_arbiter.cpp example. Arbiters however do not support the flag ::STARPU_REDUX yet. \section TemporaryBuffers Temporary Buffers There are two kinds of temporary buffers: temporary data which just pass results from a task to another, and scratch data which are needed only internally by tasks. \subsection TemporaryData Temporary Data Data can be produced by a task, and consumed by another task, without being used by other parts of the application. In such case, registration can be done without prior allocation, by using the special memory node number -1, and passing a NULL pointer. StarPU will actually allocate memory only when the task creating the content gets scheduled, and destroy it on unregistration. As the application will not use the data, it can be tedious for the application to have to unregister it. The unregistration can be done lazily by using the function starpu_data_unregister_submit(), which will record that no other tasks accessing the handle will be submitted, so that it can be freed as soon as the last task accessing it is completed. The following code examplifies both points: it registers the temporary data, submits three tasks accessing it, and records the data for automatic unregistration. \code{.c} starpu_vector_data_register(&handle, -1, NULL, n, sizeof(float)); starpu_task_insert(&produce_data, STARPU_W, handle, 0); starpu_task_insert(&compute_data, STARPU_RW, handle, 0); starpu_task_insert(&summarize_data, STARPU_R, handle, STARPU_W, result_handle, 0); starpu_data_unregister_submit(handle); \endcode The application may also want for the temporary data to be initialized on the fly before being used by the task. This can be done by using starpu_data_set_reduction_methods() to set an initialization codelet (no redux codelet is needed). \subsection ScratchData Scratch Data Some kernels sometimes need temporary data to complete the computations, like a workspace. The application could allocate it at the start of the codelet function, and free it at the end, but this would be costly. It could also allocate one buffer per worker (similarly to \ref HowToInitializeAComputationLibraryOnceForEachWorker), but this would make them systematic and permanent. A more optimized way is to use the data access mode ::STARPU_SCRATCH, as examplified below, which provides per-worker buffers without content consistency. The buffer is registered only once, using memory node -1, i.e. the application didn't allocate memory for it, and StarPU will allocate it on demand at task execution. \code{.c} starpu_variable_data_register(&workspace, -1, NULL, sizeof(float)); for (i = 0; i < N; i++) starpu_task_insert(&compute, STARPU_R, input[i], STARPU_SCRATCH, workspace, STARPU_W, output[i], 0); \endcode StarPU will make sure that the buffer is allocated before executing the task, and make this allocation per-worker: for CPU workers, notably, each worker has its own buffer. This means that each task submitted above will actually have its own workspace, which will actually be the same for all tasks running one after the other on the same worker. Also, if for instance memory becomes scarce, StarPU will notice that it can free such buffers easily, since the content does not matter. The example examples/pi uses scratches for some temporary buffer. It may be useful to additionally use the ::STARPU_NOFOOTPRINT flag, when this buffer may have various size depending e.g. on specific CUDA versions or devices, to make it simpler to use performance models for simulated execution. See for instance examples/cholesky/cholesky_kernels.c */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/examples_sources.doxy000066400000000000000000000044501507764646700271760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page ExamplesInStarPUSources Examples in StarPU Sources We have already seen some examples in Chapter \ref BasicExamples. A tutorial is also installed in the directory share/doc/starpu/tutorial/. Many examples are also available in the StarPU sources in the directory examples/. Simple examples include:
incrementer/
Trivial incrementation test.
basic_examples/
Simple documented Hello world and vector/scalar product (as shown in \ref BasicExamples), matrix product examples (as shown in \ref PerformanceModelExample), an example using the blocked matrix data interface, an example using the variable data interface, and an example using different formats on CPUs and GPUs.
matvecmult/
OpenCL example from NVidia, adapted to StarPU.
axpy/
AXPY CUBLAS operation adapted to StarPU.
native_fortran/
Example of using StarPU's native Fortran support.
fortran90/
Example of Fortran 90 bindings, using C marshalling wrappers.
fortran/
Example of Fortran 77 bindings, using C marshalling wrappers.
More advanced examples include:
filters/
Examples using filters, as shown in \ref PartitioningData.
lu/
LU matrix factorization, see for instance xlu_implicit.c
cholesky/
Cholesky matrix factorization, see for instance cholesky_implicit.c.
*/ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/scaling_vector_example.doxy000066400000000000000000000030031507764646700303230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page FullSourceCodeVectorScal Full source code for the ’Scaling a Vector’ example \section MainApplication Main Application \snippet basics_vector_scal_c.c To be included. You should update doxygen if you see this text. \section CPUKernel CPU Kernel \snippet basics_vector_scal_cpu.c To be included. You should update doxygen if you see this text. \section CUDAKernel CUDA Kernel \snippet basics_vector_scal_cuda.c To be included. You should update doxygen if you see this text. \section OpenCLKernel OpenCL Kernel \subsection InvokingtheKernel Invoking the Kernel \snippet basics_vector_scal_opencl.c To be included. You should update doxygen if you see this text. \subsection SourceoftheKernel Source of the Kernel \snippet basics_vector_scal_opencl_codelet.cl To be included. You should update doxygen if you see this text. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/scheduling.doxy000066400000000000000000000237621507764646700257510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page Scheduling Scheduling \section TaskSchedulingPolicy Task Scheduling Policies The basics of the scheduling policy are the following:
  • The scheduler gets to schedule tasks (push operation) when they become ready to be executed, i.e. they are not waiting for some tags, data dependencies or task dependencies.
  • Workers pull tasks (pop operation) one by one from the scheduler.
This means scheduling policies usually contain at least one queue of tasks to store them between the time when they become available, and the time when a worker gets to grab them. By default, StarPU uses the work-stealing scheduler \b lws. This is because it provides correct load balance and locality even if the application codelets do not have performance models. Other non-modelling scheduling policies can be selected among the list below, thanks to the environment variable \ref STARPU_SCHED. For instance, export STARPU_SCHED=dmda . Use help to get the list of available schedulers. The function starpu_sched_get_predefined_policies() returns a NULL-terminated array of all predefined scheduling policies that are available in StarPU. Functions starpu_sched_get_sched_policy_in_ctx() and starpu_sched_get_sched_policy() return the scheduling policy of a task within a specific context or a default context, respectively. \subsection NonPerformanceModelingPolicies Non Performance Modelling Policies - The eager scheduler uses a central task queue, from which all workers draw tasks to work on concurrently. This however does not permit to prefetch data since the scheduling decision is taken late. If a task has a non-0 priority, it is put at the front of the queue. - The random scheduler uses a queue per worker, and distributes tasks randomly according to assumed worker overall performance. - The ws (work stealing) scheduler uses a queue per worker, and schedules a task on the worker which released it by default. When a worker becomes idle, it steals a task from the most loaded worker. - The lws (locality work stealing) scheduler uses a queue per worker, and schedules a task on the worker which released it by default. When a worker becomes idle, it steals a task from neighbor workers. It also takes priorities into account. - The prio scheduler also uses a central task queue, but sorts tasks by priority specified by the application. - The heteroprio scheduler uses different priorities for the different processing units. This scheduler must be configured to work correctly and to expect high-performance as described in the corresponding section. \subsection DMTaskSchedulingPolicy Performance Model-Based Task Scheduling Policies If (and only if) your codelets have performance models (\ref PerformanceModelExample), you should change the scheduler thanks to the environment variable \ref STARPU_SCHED, to select one of the policies below, in order to take advantage of StarPU's performance modelling. For instance, export STARPU_SCHED=dmda . Use help to get the list of available schedulers. Note: Depending on the performance model type chosen, some preliminary calibration runs may be needed for the model to converge. If the calibration has not been done, or is insufficient yet, or if no performance model is specified for a codelet, every task built from this codelet will be scheduled using an eager fallback policy. Troubleshooting: Configuring and recompiling StarPU using the \c configure option \ref enable-verbose "--enable-verbose" displays some statistics at the end of execution about the percentage of tasks which have been scheduled by a DM* family policy using performance model hints. A low or zero percentage may be the sign that performance models are not converging or that codelets do not have performance models enabled. - The dm (deque model) scheduler takes task execution performance models into account to perform a HEFT-similar scheduling strategy: it schedules tasks where their termination time will be minimal. The difference with HEFT is that dm schedules tasks as soon as they become available, and thus in the order they become available, without taking priorities into account. - The dmda (deque model data aware) scheduler is similar to \b dm, but it also takes data transfer time into account. - The dmdap (deque model data aware prio) scheduler is similar to \b dmda, except that it sorts tasks by priority order, which allows becoming even closer to HEFT by respecting priorities after having made the scheduling decision (but it still schedules tasks in the order they become available). - The dmdar (deque model data aware ready) scheduler is similar to \b dmda, but it also privileges tasks whose data buffers are already available on the target device. - The dmdas combines \b dmdap and \b dmdar: it sorts tasks by priority order, but for a given priority it will privilege tasks whose data buffers are already available on the target device. - The dmdasd (deque model data aware sorted decision) scheduler is similar to dmdas, except that when scheduling a task, it takes into account its priority when computing the minimum completion time, since this task may get executed before others, and thus the latter should be ignored. - The heft (heterogeneous earliest finish time) scheduler is a deprecated alias for dmda. - The pheft (parallel HEFT) scheduler is similar to \b dmda, it also supports parallel tasks (still experimental). It should not be used when several contexts using it are being executed simultaneously. - The peager (parallel eager) scheduler is similar to eager, it also supports parallel tasks (still experimental). It should not be used when several contexts using it are being executed simultaneously. \subsection ExistingModularizedSchedulers Modularized Schedulers StarPU provides a powerful way to implement schedulers, as documented in \ref DefiningANewModularSchedulingPolicy. It is currently shipped with the following pre-defined Modularized Schedulers : - modular-eager , modular-eager-prefetching are eager-based Schedulers (without and with prefetching), they are naive schedulers, which try to map a task on the first available resource they find. The prefetching variant queues several tasks in advance to be able to do data prefetching. This may however degrade load balancing a bit. - modular-prio, modular-prio-prefetching, modular-eager-prio are prio-based Schedulers (without / with prefetching):, similar to Eager-Based Schedulers. They can handle tasks which have a defined priority and schedule them accordingly. The modular-eager-prio variant integrates the eager and priority queue in a single component. This allows it to do a better job at pushing tasks. - modular-random, modular-random-prio, modular-random-prefetching, modular-random-prio-prefetching are random-based Schedulers (without/with prefetching) : Select randomly a resource to be mapped on for each task. - modular-ws) implements Work Stealing: Maps tasks to workers in round-robin, but allows workers to steal work from other workers. - modular-heft, modular-heft2, and modular-heft-prio are HEFT Schedulers : \n Maps tasks to workers using a heuristic very close to Heterogeneous Earliest Finish Time. It needs that every task submitted to StarPU have a defined performance model (\ref PerformanceModelCalibration) to work efficiently, but can handle tasks without a performance model. modular-heft just takes tasks by order. modular-heft2 takes at most 5 tasks of the same priority and checks which one fits best. modular-heft-prio is similar to modular-heft, but only decides the memory node, not the exact worker, just pushing tasks to one central queue per memory node. By default, they sort tasks by priorities and privilege, running first a task which has most of its data already available on the target. These can however be changed with \ref STARPU_SCHED_SORTED_ABOVE, \ref STARPU_SCHED_SORTED_BELOW, and \ref STARPU_SCHED_READY . - modular-heteroprio is a Heteroprio Scheduler: \n Maps tasks to worker similarly to HEFT, but first attribute accelerated tasks to GPUs, then not-so-accelerated tasks to CPUs. \section TaskDistributionVsDataTransfer Task Distribution Vs Data Transfer Distributing tasks to balance the load induces data transfer penalty. StarPU thus needs to find a balance between both. The target function that the scheduler \b dmda of StarPU tries to minimize is alpha * T_execution + beta * T_data_transfer, where T_execution is the estimated execution time of the codelet (usually accurate), and T_data_transfer is the estimated data transfer time. The latter is estimated based on bus calibration before execution start, i.e. with an idle machine, thus without contention. You can force bus re-calibration by running the tool starpu_calibrate_bus. The beta parameter defaults to 1, but it can be worth trying to tweak it by using export STARPU_SCHED_BETA=2 (\ref STARPU_SCHED_BETA) for instance, since during real application execution, contention makes transfer times bigger. This is of course imprecise, but in practice, a rough estimation already gives the good results that a precise estimation would give. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/starpu_applications.doxy000066400000000000000000000372751507764646700277140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page StarPUApplications StarPU Applications, setting up Your Own Code \section SettingFlagsForCompilingLinkingAndRunningApplications Setting Flags for Compiling, Linking and Running Applications StarPU provides a pkg-config executable to facilitate the retrieval of necessary compiler and linker flags. This is useful when compiling and linking an application with StarPU, as certain flags or libraries (such as \c CUDA or \c libspe2) may be required. If StarPU is not installed in a standard location, the path of StarPU's library must be specified in the environment variable PKG_CONFIG_PATH to allow pkg-config to find it. For example, if StarPU is installed in $STARPU_PATH, you can set the variable \c PKG_CONFIG_PATH like this: \verbatim $ export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$STARPU_PATH/lib/pkgconfig \endverbatim The flags required to compile or link against StarPU are then accessible with the following commands: \verbatim $ pkg-config --cflags starpu-1.4 # options for the compiler $ pkg-config --libs starpu-1.4 # options for the linker \endverbatim Please note that it is still possible to use the API provided in StarPU version 1.0 by calling pkg-config with the starpu-1.0 package. Similar packages are provided for starpumpi-1.0 and starpufft-1.0. For the API provided in StarPU version 0.9, you can use pkg-config with the libstarpu package. Similar packages are provided for libstarpumpi and libstarpufft. Make sure that pkg-config --libs starpu-1.4 produces valid output before going further. To achieve this, make sure that your \c PKG_CONFIG_PATH is correctly set to the location where \c starpu-1.4.pc was installed during the make install process. Furthermore, if you intend to link your application statically, remember to include the --static option during the linking process. Additionally, for runtime execution, it is necessary to set the \c LD_LIBRARY_PATH environment variable. This ensures that dynamic libraries are located and loaded correctly during runtime. \verbatim $ export LD_LIBRARY_PATH=$STARPU_PATH/lib:$LD_LIBRARY_PATH \endverbatim And finally you should set the \c PATH variable to get access to various StarPU tools: \verbatim $ export PATH=$PATH:$STARPU_PATH/bin \endverbatim Run the following command to ensure that StarPU is executing properly and successfully detecting your hardware. If any issues arise, examine the output of \c lstopo from the \c hwloc project and report any problems either to the hwloc project or to us. \verbatim $ starpu_machine_display \endverbatim A tool is provided to help set all the environment variables needed by StarPU. Once StarPU is installed in a specific directory, calling the script bin/starpu_env will set in your current environment the variables STARPU_PATH, LD_LIBRARY_PATH, PKG_CONFIG_PATH, PATH and MANPATH. \verbatim $ source $STARPU_PATH/bin/starpu_env \endverbatim \section IntegratingStarPUInABuildSystem Integrating StarPU in a Build System \subsection StarPUInMake Integrating StarPU in a Make Build System When using a Makefile, the following lines can be added to set the options for the compiler and the linker: \verbatim CFLAGS += $$(pkg-config --cflags starpu-1.4) LDLIBS += $$(pkg-config --libs starpu-1.4) \endverbatim If you have a \c test-starpu.c file containing for instance: \code{.c} #include #include int main(void) { int ret; ret = starpu_init(NULL); if (ret != 0) { return 1; } printf("%d CPU cores\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER)); printf("%d CUDA GPUs\n", starpu_worker_get_count_by_type(STARPU_CUDA_WORKER)); printf("%d OpenCL GPUs\n", starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER)); starpu_shutdown(); return 0; } \endcode You can build it with make test-starpu and run it with ./test-starpu \subsection StarPUInCMake Integrating StarPU in a CMake Build System This section shows a minimal example integrating StarPU in an existing application's CMake build system. Let's assume we want to build an executable from the following source code using CMake: \code{.c} #include #include int main(void) { int ret; ret = starpu_init(NULL); if (ret != 0) { return 1; } printf("%d CPU cores\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER)); printf("%d CUDA GPUs\n", starpu_worker_get_count_by_type(STARPU_CUDA_WORKER)); printf("%d OpenCL GPUs\n", starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER)); starpu_shutdown(); return 0; } \endcode The \c CMakeLists.txt file below uses the Pkg-Config support from CMake to autodetect the StarPU installation and library dependences (such as libhwloc) provided that the PKG_CONFIG_PATH variable is set, and is sufficient to build a statically-linked executable. The CMake code uses the IMPORTED_TARGET option of pkg_check_modules to define a CMake target that can be used to compile and link StarPU codes: \code{File CMakeLists.txt} cmake_minimum_required (VERSION 3.2) project (hello_starpu) find_package(PkgConfig) pkg_check_modules(STARPU REQUIRED IMPORTED_TARGET starpu-1.4) add_executable(hello_starpu hello_starpu.c PkgConfig::STARPU) \endcode One can also use the following alternative. \code{File CMakeLists.txt} cmake_minimum_required (VERSION 3.2) project (hello_starpu) find_package(PkgConfig) pkg_check_modules(STARPU REQUIRED starpu-1.4) if (STARPU_FOUND) include_directories (${STARPU_INCLUDE_DIRS}) link_directories (${STARPU_STATIC_LIBRARY_DIRS}) link_libraries (${STARPU_STATIC_LIBRARIES}) else (STARPU_FOUND) message(FATAL_ERROR "StarPU not found") endif() add_executable(hello_starpu hello_starpu.c) \endcode The following \c CMakeLists.txt implements a more complex strategy, still relying on Pkg-Config, but also taking into account additional flags. While more complete, this approach makes CMake's build types (Debug, Release, ...) unavailable because of the direct affectation to variable CMAKE_C_FLAGS. If both the full flags support and the build types support are needed, the \c CMakeLists.txt below may be altered to work with CMAKE_C_FLAGS_RELEASE, CMAKE_C_FLAGS_DEBUG, and others as needed. This example has been successfully tested with CMake 3.2, though it may work with earlier CMake 3.x versions. \code{File CMakeLists.txt} cmake_minimum_required (VERSION 3.2) project (hello_starpu) find_package(PkgConfig) pkg_check_modules(STARPU REQUIRED starpu-1.4) # This section must appear before 'add_executable' if (STARPU_FOUND) # CFLAGS other than -I foreach(CFLAG ${STARPU_CFLAGS_OTHER}) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CFLAG}") endforeach() # Static LDFLAGS other than -L foreach(LDFLAG ${STARPU_STATIC_LDFLAGS_OTHER}) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${LDFLAG}") endforeach() # -L directories link_directories(${STARPU_STATIC_LIBRARY_DIRS}) else (STARPU_FOUND) message(FATAL_ERROR "StarPU not found") endif() add_executable(hello_starpu hello_starpu.c) # This section must appear after 'add_executable' if (STARPU_FOUND) # -I directories target_include_directories(hello_starpu PRIVATE ${STARPU_INCLUDE_DIRS}) # Static -l libs target_link_libraries(hello_starpu PRIVATE ${STARPU_STATIC_LIBRARIES}) endif() \endcode \section RunningABasicStarPUApplication Running a Basic StarPU Application Basic examples using StarPU are built in the directory examples/basic_examples/ (and installed in $STARPU_PATH/lib/starpu/examples/). You can for example run the example vector_scal. \verbatim $ ./examples/basic_examples/vector_scal BEFORE: First element was 1.000000 AFTER: First element is 3.140000 \endverbatim When StarPU is used for the first time, the directory $STARPU_HOME/.starpu/ is created, performance models will be stored in this directory (\ref STARPU_HOME). Please note that buses are benchmarked when StarPU is launched for the first time. This may take a few minutes, or less if libhwloc is installed. This step is done only once per user and per machine. \section RunningABasicStarPUApplicationOnMicrosoft Running a Basic StarPU Application on Microsoft Visual C Batch files are provided to run StarPU applications under Microsoft Visual C. They are installed in $STARPU_PATH/bin/msvc. To execute a StarPU application, you first need to set the environment variable \ref STARPU_PATH. \verbatim c:\....> cd c:\cygwin\home\ci\starpu\ c:\....> set STARPU_PATH=c:\cygwin\home\ci\starpu\ c:\....> cd bin\msvc c:\....> starpu_open.bat starpu_simple.c \endverbatim The batch script will run Microsoft Visual C with a basic project file to run the given application. The batch script starpu_clean.bat can be used to delete all compilation generated files. The batch script starpu_exec.bat can be used to compile and execute a StarPU application from the command prompt. \verbatim c:\....> cd c:\cygwin\home\ci\starpu\ c:\....> set STARPU_PATH=c:\cygwin\home\ci\starpu\ c:\....> cd bin\msvc c:\....> starpu_exec.bat ..\..\..\..\examples\basic_examples\hello_world.c \endverbatim \verbatim MSVC StarPU Execution ... /out:hello_world.exe ... Hello world (params = {1, 2.00000}) Callback function got argument 0000042 c:\....> \endverbatim \section KernelThreadsStartedByStarPU Kernel Threads Started by StarPU StarPU automatically binds one thread per CPU core. It does not use SMT/hyperthreading because kernels are usually already optimized for using a full core, and using hyperthreading would make kernel calibration rather random. Since driving GPUs is a CPU-consuming task, StarPU dedicates one core per GPU. While StarPU tasks are executing, the application is not supposed to do computations in the threads it starts itself, tasks should be used instead. If the application needs to reserve some cores for its own computations, it can do so with the field starpu_conf::reserve_ncpus, get the core IDs with starpu_get_next_bindid(), and bind to them with starpu_bind_thread_on(). Another option is for the application to pause StarPU by calling starpu_pause(), then to perform its own computations, and then to resume StarPU by calling starpu_resume() so that StarPU can execute tasks. If a computation library used by the application actually creates its own thread, it may be useful to call starpu_bind_thread_on_worker() before e.g. initializing the library, so that the library records which binding it is supposed to use. And then call starpu_bind_thread_on_main() again, or starpu_bind_thread_on_cpu() if a core was reserved with starpu_get_next_bindid(). In case that computation library wants to bind threads itself, and uses physical numbering instead of logical numbering (as defined by hwloc), starpu_cpu_os_index() can be used to convert from StarPU cpuid to OS cpu index. \section EnablingOpenCL Enabling OpenCL When both CUDA and OpenCL drivers are enabled, StarPU will launch an OpenCL worker for NVIDIA GPUs only if CUDA is not already running on them. This design choice was necessary as OpenCL and CUDA can not run at the same time on the same NVIDIA GPU, as there is currently no interoperability between them. To enable OpenCL, you need either to disable CUDA when configuring StarPU: \verbatim $ ./configure --disable-cuda \endverbatim or when running applications: \verbatim $ STARPU_NCUDA=0 ./application \endverbatim OpenCL will automatically be started on any device not yet used by CUDA. So on a machine running 4 GPUS, it is therefore possible to enable CUDA on 2 devices, and OpenCL on the other 2 devices by calling: \verbatim $ STARPU_NCUDA=2 ./application \endverbatim \section Storing_Performance_Model_Files Storing Performance Model Files StarPU stores performance model files for bus benchmarking and codelet profiles in different directories. By default, all files are stored in $STARPU_HOME/.starpu/sampling. If the environment variable \ref STARPU_HOME is not defined, its default value is $HOME on Unix environments, and $USERPROFILE on Windows environments. Environment variables \ref STARPU_PERF_MODEL_DIR and \ref STARPU_PERF_MODEL_PATH can also be used to specify other directories in which to store performance files (\ref SimulatedBenchmarks). The configure option \ref with-perf-model-dir "--with-perf-model-dir" can also be used to define a performance model directory. When looking for performance files either for bus benchmarking or for codelet performances, StarPU
  • first looks in the directory specified by the environment variable \ref STARPU_PERF_MODEL_DIR
  • then looks in the directory specified by the configure option \ref with-perf-model-dir "--with-perf-model-dir"
    or in $STARPU_HOME/.starpu/sampling if the option is not set
  • then looks in the directories specified by the environment variable \ref STARPU_PERF_MODEL_PATH
  • and finally looks in $prefix/share/starpu/perfmodels/sampling
If the files are not present and must be created, they will be created in the first defined directory from the list above. \verbatim rm -rf $PWD/xxx && STARPU_PERF_MODEL_DIR=$PWD/xxx ./application \endverbatim will use performance model files from the directory $STARPU_HOME/.starpu/sampling if they are available, otherwise will create these files in $STARPU_PERF_MODEL_DIR. To know the list of directories StarPU will search for performances files, one can use the tool starpu_perfmodel_display \verbatim $ starpu_perfmodel_display -d directory: directory: \endverbatim \verbatim $ STARPU_PERF_MODEL_DIR=/tmp/xxx starpu_perfmodel_display -d directory: directory: directory: \endverbatim When using the variable \ref STARPU_PERF_MODEL_DIR, the directory will be created if it does not exist when dumping new performance model files. When using the variable \ref STARPU_PERF_MODEL_PATH, only existing directories will be taken into account. \verbatim $ mkdir /tmp/yyy && STARPU_PERF_MODEL_DIR=/tmp/xxx STARPU_PERF_MODEL_PATH=/tmp/zzz:/tmp/yyy starpu_perfmodel_display -d [starpu][adrets][_perf_model_add_dir] Warning: directory as set by variable STARPU_PERF_MODEL_PATH does not exist directory: directory: directory: directory: \endverbatim Once your application has created the performance files in a given directory, it is thus possible to move these files in another location and keep using them. \verbatim ./application # files are created in $HOME/.starpu/sampling mv $HOME/.starpu/sampling /usr/local/starpu/sampling STARPU_PERF_MODEL_DIR=/usr/local/starpu/sampling ./application \endverbatim */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_basics/tasks.doxy000066400000000000000000000364651507764646700247550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page TasksInStarPU Tasks In StarPU \section TaskGranularity Task Granularity Similar to other runtimes, StarPU introduces some overhead in managing tasks. This overhead, while not always negligible, is mitigated by its intelligent scheduling and data management capabilities. The typical order of magnitude for this overhead is a few microseconds, which is notably smaller than the inherent CUDA overhead. To ensure that this overhead remains insignificant, the work assigned to a task should be substantial enough. The length of tasks should ideally be relatively larger to effectively counterbalance this overhead. It iss advised to consider the offline performance feedback, which provides insights into task lengths. Monitoring task lengths becomes crucial if you're encountering suboptimal performance. To gauge the scalability potential based task size, you can run the tests/microbenchs/tasks_size_overhead.sh script. It provides a visual representation of the speedup achievable with independent tasks of very small sizes. This benchmark is installed in $STARPU_PATH/lib/starpu/examples/. It gives a glimpse into how long a task should be (in µs) for StarPU overhead to be low enough to keep efficiency. The script generates a plot illustrating the speedup trends for tasks of different sizes, correlated with the number of CPUs in use. For example, in the figure below, for 128 µs tasks (the red line), StarPU overhead is low enough to guarantee a good speedup if the number of CPUs is not more than 36. But with the same number of CPUs, 64 µs tasks (the black line) cannot have a correct speedup. The number of CPUs must be decreased to about 17 in order to keep efficiency. \image html tasks_size_overhead.png \image latex tasks_size_overhead.png "" width=\textwidth To determine the task size your application is using, it is possible to use starpu_fxt_data_trace as explained in \ref DataTrace. The selection of a scheduler in StarPU also plays a significant role. Different schedulers have varying impacts on the overall execution. For example, the \c dmda scheduler may require additional time to make decisions, while the \c eager scheduler tends to be more immediate in its decisions. To assess the impact of scheduler choice on your target machine, you can once again utilize the \c tasks_size_overhead.sh script. This script provides valuable insights into how different schedulers affect performance in conjunction with task sizes. \section TaskSubmission Task Submission To enable StarPU to perform online optimizations effectively, it is recommended to submit tasks asynchronously whenever possible. The goal is to maximize the level of asynchronous submission, allowing StarPU to have more flexibility in optimizing the scheduling process. Ideally, all tasks should be submitted asynchronously, and the use of functions like starpu_task_wait_for_all() or starpu_data_unregister() should be limited to waiting for task completion. StarPU will then be able to rework the whole schedule, overlap computation with communication, manage accelerator local memory usage, etc. A simple example is in the file examples/basic_examples/variable.c \section TaskPriorities Task Priorities StarPU's default behavior considers tasks in the order they are submitted by the application. However, in scenarios where the application programmer possesses knowledge about certain tasks that should take priority due to their impact on performance (such as tasks whose output is crucial for subsequent tasks), the starpu_task::priority field can be utilized to convey this information to StarPU's scheduling process. An example is provided in the application examples/heat/dw_factolu_tag.c. \section SettingManyDataHandlesForATask Setting Many Data Handles For a Task The maximum number of data that a task can manage is fixed by the macro \ref STARPU_NMAXBUFS. This macro has a default value which can be customized through the \c configure option \ref enable-maxbuffers "--enable-maxbuffers". However, if you have specific cases where you need tasks to manage more data than the maximum allowed, you can use the field starpu_task::dyn_handles when defining a task, along with the field starpu_codelet::dyn_modes when defining the corresponding codelet. This dynamic handle mechanism enables tasks to handle additional data beyond the usual limit imposed by \ref STARPU_NMAXBUFS. \code{.c} enum starpu_data_access_mode modes[STARPU_NMAXBUFS+1] = { STARPU_R, STARPU_R, ... }; struct starpu_codelet dummy_big_cl = { .cuda_funcs = { dummy_big_kernel }, .opencl_funcs = { dummy_big_kernel }, .cpu_funcs = { dummy_big_kernel }, .cpu_funcs_name = { "dummy_big_kernel" }, .nbuffers = STARPU_NMAXBUFS+1, .dyn_modes = modes }; task = starpu_task_create(); task->cl = &dummy_big_cl; task->dyn_handles = malloc(task->cl->nbuffers * sizeof(starpu_data_handle_t)); for(i=0 ; icl->nbuffers ; i++) { task->dyn_handles[i] = handle; } starpu_task_submit(task); \endcode \code{.c} starpu_data_handle_t *handles = malloc(dummy_big_cl.nbuffers * sizeof(starpu_data_handle_t)); for(i=0 ; iexamples/basic_examples/dynamic_handles.c. \section SettingVariableDataHandlesForATask Setting a Variable Number Of Data Handles For a Task Normally, the number of data handles given to a task is set with starpu_codelet::nbuffers. This field can however be set to \ref STARPU_VARIABLE_NBUFFERS, in which case starpu_task::nbuffers must be set, and starpu_task::modes (or starpu_task::dyn_modes, see \ref SettingManyDataHandlesForATask) should be used to specify the modes for the handles. Examples in examples/basic_examples/dynamic_handles.c show how to implement it. \section InsertTaskUtility Insert Task Utility StarPU provides the wrapper function starpu_task_insert() to ease the creation and submission of tasks. Here is the implementation of a codelet: \code{.c} void func_cpu(void *descr[], void *_args) { int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); int ifactor; float ffactor; starpu_codelet_unpack_args(_args, &ifactor, &ffactor); *x0 = *x0 * ifactor; *x1 = *x1 * ffactor; } struct starpu_codelet mycodelet = { .cpu_funcs = { func_cpu }, .cpu_funcs_name = { "func_cpu" }, .nbuffers = 2, .modes = { STARPU_RW, STARPU_RW } }; \endcode And the call to starpu_task_insert(): \code{.c} starpu_task_insert(&mycodelet, STARPU_VALUE, &ifactor, sizeof(ifactor), STARPU_VALUE, &ffactor, sizeof(ffactor), STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], 0); \endcode The call to starpu_task_insert() is equivalent to the following code: \code{.c} struct starpu_task *task = starpu_task_create(); task->cl = &mycodelet; task->handles[0] = data_handles[0]; task->handles[1] = data_handles[1]; char *arg_buffer; size_t arg_buffer_size; starpu_codelet_pack_args(&arg_buffer, &arg_buffer_size, STARPU_VALUE, &ifactor, sizeof(ifactor), STARPU_VALUE, &ffactor, sizeof(ffactor), 0); task->cl_arg = arg_buffer; task->cl_arg_size = arg_buffer_size; int ret = starpu_task_submit(task); \endcode In the example file tests/main/insert_task_value.c, we use these two ways to create and submit tasks. Instead of calling starpu_codelet_pack_args(), one can also call starpu_codelet_pack_arg_init(), then starpu_codelet_pack_arg() for each data, then starpu_codelet_pack_arg_fini() as follow: \code{.c} struct starpu_task *task = starpu_task_create(); task->cl = &mycodelet; task->handles[0] = data_handles[0]; task->handles[1] = data_handles[1]; struct starpu_codelet_pack_arg_data state; starpu_codelet_pack_arg_init(&state); starpu_codelet_pack_arg(&state, &ifactor, sizeof(ifactor)); starpu_codelet_pack_arg(&state, &ffactor, sizeof(ffactor)); starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); int ret = starpu_task_submit(task); \endcode A full code example is in file tests/main/pack.c. Here a similar call using ::STARPU_DATA_ARRAY. \code{.c} starpu_task_insert(&mycodelet, STARPU_DATA_ARRAY, data_handles, 2, STARPU_VALUE, &ifactor, sizeof(ifactor), STARPU_VALUE, &ffactor, sizeof(ffactor), 0); \endcode If some part of the task insertion depends on the value of some computation, the macro ::STARPU_DATA_ACQUIRE_CB can be very convenient. For instance, assuming that the index variable i was registered as handle A_handle[i]: \code{.c} /* Compute which portion we will work on, e.g. pivot */ starpu_task_insert(&which_index, STARPU_W, i_handle, 0); /* And submit the corresponding task */ STARPU_DATA_ACQUIRE_CB(i_handle, STARPU_R, starpu_task_insert(&work, STARPU_RW, A_handle[i], 0)); \endcode The macro ::STARPU_DATA_ACQUIRE_CB submits an asynchronous request for acquiring data i for the main application, and will execute the code given as the third parameter when it is acquired. In other words, as soon as the value of i computed by the codelet which_index can be read, the portion of code passed as the third parameter of ::STARPU_DATA_ACQUIRE_CB will be executed, and is allowed to read from i to use it e.g. as an index. Note that this macro is only available when compiling StarPU with the compiler gcc. In the example file tests/datawizard/acquire_cb_insert.c, this macro is used. StarPU also provides a utility function starpu_codelet_unpack_args() to retrieve the ::STARPU_VALUE arguments passed to the task. There is several ways of calling starpu_codelet_unpack_args(). The full code examples are available in the file tests/main/insert_task_value.c. \code{.c} void func_cpu(void *descr[], void *_args) { int ifactor; float ffactor; starpu_codelet_unpack_args(_args, &ifactor, &ffactor); } \endcode \code{.c} void func_cpu(void *descr[], void *_args) { int ifactor; float ffactor; starpu_codelet_unpack_args(_args, &ifactor, 0); starpu_codelet_unpack_args(_args, &ifactor, &ffactor); } \endcode \code{.c} void func_cpu(void *descr[], void *_args) { int ifactor; float ffactor; char buffer[100]; starpu_codelet_unpack_args_and_copyleft(_args, buffer, 100, &ifactor, 0); starpu_codelet_unpack_args(buffer, &ffactor); } \endcode Instead of calling starpu_codelet_unpack_args(), one can also call starpu_codelet_unpack_arg_init(), then starpu_codelet_pack_arg() or starpu_codelet_dup_arg() or starpu_codelet_pick_arg() for each data, then starpu_codelet_unpack_arg_fini() as follow: \code{.c} void func_cpu(void *descr[], void *_args) { int ifactor; float ffactor; size_t size = sizeof(int) + 2*sizeof(size_t) + sizeof(int) + sizeof(float); struct starpu_codelet_pack_arg_data state; starpu_codelet_unpack_arg_init(&state, _args, size); starpu_codelet_unpack_arg(&state, (void**)&ifactor, sizeof(ifactor)); starpu_codelet_unpack_arg(&state, (void**)&ffactor, sizeof(ffactor)); starpu_codelet_unpack_arg_fini(&state); } \endcode \code{.c} void func_cpu(void *descr[], void *_args) { int *ifactor; float *ffactor; size_t size; size_t psize = sizeof(int) + 2*sizeof(size_t) + sizeof(int) + sizeof(float); struct starpu_codelet_pack_arg_data state; starpu_codelet_unpack_arg_init(&state, _args, psize); starpu_codelet_dup_arg(&state, (void**)&ifactor, &size); assert(size == sizeof(*ifactor)); starpu_codelet_dup_arg(&state, (void**)&ffactor, &size); assert(size == sizeof(*ffactor)); starpu_codelet_unpack_arg_fini(&state); } \endcode \code{.c} void func_cpu(void *descr[], void *_args) { int *ifactor; float *ffactor; size_t size; size_t psize = sizeof(int) + 2*sizeof(size_t) + sizeof(int) + sizeof(float); struct starpu_codelet_pack_arg_data state; starpu_codelet_unpack_arg_init(&state, _args, psize); starpu_codelet_pick_arg(&state, (void**)&ifactor, &size); assert(size == sizeof(*ifactor)); starpu_codelet_pick_arg(&state, (void**)&ffactor, &size); assert(size == sizeof(*ffactor)); starpu_codelet_unpack_arg_fini(&state); } \endcode During unpacking one can also call starpu_codelet_unpack_discard_arg() to skip saving the argument in pointer. A full code example is in file tests/main/pack.c. \section OtherTaskUtility Other Task Utility Functions Here a list of other functions to help with task management.
  • The function starpu_task_dup() creates a duplicate of an existing task. The new task is identical to the original task in terms of its parameters, dependencies, and execution characteristics.
  • The function starpu_task_set() is used to set the parameters of a task before it is executed, while starpu_task_build() is used to create a task with the specified parameters.
StarPU provides several functions to help insert data into a task. The function starpu_task_insert_data_make_room() is used to allocate memory space for a data structure that is required for inserting data into a task. This function is called before inserting any data handles into a task, and ensures that enough memory is available for the data to be stored. Once memory is allocated, the data handle can be inserted into the task using the following functions
  • starpu_task_insert_data_process_arg() processes a scalar argument of a task and inserts it into the task's data structure. This function also performs any necessary data allocation and transfer operations.
  • starpu_task_insert_data_process_array_arg() processes an array argument of a task and inserts it into the task's data structure. This function handles the allocation and transfer of the array data, as well as setting up the appropriate metadata to describe the array.
  • starpu_task_insert_data_process_mode_array_arg() processes a mode array argument of a task and inserts it into the task's data structure. This function handles the allocation and transfer of the mode array data, as well as setting up the appropriate metadata to describe the mode array. Additionally, this function also computes the necessary sizes and strides for the data associated with the mode array argument.
*/ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/000077500000000000000000000000001507764646700236405ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/advanced_data_management.doxy000066400000000000000000001411641507764646700315060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page AdvancedDataManagement Advanced Data Management \section VariableSizeDataInterface Data Interface with Variable Size Besides the data interfaces already available in StarPU, mentioned in \ref DataInterface, tasks are actually allowed to change the size of data interfaces. The simplest case is just changing the amount of data actually used within the allocated buffer. This is for instance implemented for the matrix interface: one can set the new NX/NY values with STARPU_MATRIX_SET_NX(), STARPU_MATRIX_SET_NY(), and STARPU_MATRIX_SET_LD() at the end of the task implementation. Data transfers achieved by StarPU will then use these values instead of the whole allocated size. The values of course need to be set within the original allocation. To reserve room for increasing the NX/NY values, one can use starpu_matrix_data_register_allocsize() instead of starpu_matrix_data_register(), to specify the allocation size to be used instead of the default NX*NY*ELEMSIZE. It is also available for a vector by using starpu_vector_data_register_allocsize() to specify the allocation size to be used instead of the default NX*ELEMSIZE. To support this, the data interface has to implement the functions starpu_data_interface_ops::alloc_footprint, starpu_data_interface_ops::alloc_compare, and starpu_data_interface_ops::reuse_data_on_node for proper StarPU allocation management. It might be useful to implement starpu_data_interface_ops::cache_data_on_node, otherwise StarPU will just call \c memcpy(). A more involved case is changing the amount of allocated data. The task implementation can just reallocate the buffer during its execution, and set the proper new values in the interface structure, e.g. nx, ny, ld, etc. so that the StarPU core knows the new data layout. The structure starpu_data_interface_ops however then needs to have the field starpu_data_interface_ops::dontcache set to 1, to prevent StarPU from trying to perform any cached allocation, since the allocated size will vary. An example is available in tests/datawizard/variable_size.c. The example uses its own data interface to contain some simulation information for data growth, but the principle can be applied for any data interface. The principle is to use starpu_malloc_on_node_flags() to make the new allocation, and use starpu_free_on_node_flags() to release any previous allocation. The flags have to be precisely like in the example: \code{.c} unsigned workerid = starpu_worker_get_id_check(); unsigned dst_node = starpu_worker_get_memory_node(workerid); interface->ptr = starpu_malloc_on_node_flags(dst_node, size + increase, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); starpu_free_on_node_flags(dst_node, old, size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); interface->size += increase; \endcode so that the allocated area has the expected properties and the allocation is properly accounted for. Depending on the interface (vector, CSR, etc.) you may have to fix several fields of the data interface: e.g. both nx and allocsize for vectors, and store the pointer both in ptr and dev_handle. Some interfaces make a distinction between the actual number of elements stored in the data and the actually allocated buffer. For instance, the vector interface uses the nx field for the former, and the allocsize for the latter. This allows for lazy reallocation to avoid reallocating the buffer every time to exactly match the actual number of elements. Computations and data transfers will use the field nx, while allocation functions will use the field allocsize. One just has to make sure that allocsize is always bigger or equal to nx. Important note: one can not change the size of a partitioned data. \section DataManagementAllocation Data Management Allocation When the application allocates data, whenever possible it should use the function starpu_malloc(), which will ask CUDA or OpenCL to make the allocation itself and pin the corresponding allocated memory (a basic example is in examples/basic_examples/block.c), or to use the function starpu_memory_pin() to pin memory allocated by other ways, such as local arrays (a basic example is in examples/basic_examples/vector_scal.c). This is needed to permit asynchronous data transfer, i.e. permit data transfer to overlap with computations. Otherwise, the trace will show that the state DriverCopyAsync takes a lot of time, this is because CUDA or OpenCL then reverts to synchronous transfers. Before shutting down StarPU, the application should deallocate any memory that has previously been allocated with starpu_malloc(), by calling either starpu_free() or starpu_free_noflag() which is more recommended. If the application has pinned memory using starpu_memory_pin(), it should unpin the memory using starpu_memory_unpin() before freeing the memory. If an application requires a specific alignment constraint for memory allocations made with starpu_malloc(), it can use the starpu_malloc_set_align() function to set the alignment requirement. The application can provide its own allocation function by calling starpu_malloc_set_hooks(). StarPU will then use them for all data handle allocations in the main memory. An example is in examples/basic_examples/hooks.c. StarPU provides several functions to monitor the memory usage and availability on the system. The application can use the starpu_memory_get_used() function to monitor its own memory usage on a node, and the starpu_memory_get_total_all_nodes() function to monitor the amount of total memory on all memory nodes, and the starpu_memory_get_available_all_nodes() function to monitor the amount of available memory on all memory nodes. Additionally, the starpu_memory_get_used_all_nodes() function can be used to monitor the amount of used memory on all memory nodes. By default, StarPU leaves replicates of data wherever they were used, in case they will be re-used by other tasks, thus saving the data transfer time. When some task modifies some data, all the other replicates are invalidated, and only the processing unit which ran this task will have a valid replicate of the data. If the application knows that this data will not be re-used by further tasks, it should advise StarPU to immediately replicate it to a desired list of memory nodes (given through a bitmask). This can be understood like the write-through mode of CPU caches. \code{.c} starpu_data_set_wt_mask(img_handle, 1<<0); \endcode will for instance request to always automatically transfer a replicate into the main memory (node 0), as bit 0 of the write-through bitmask is being set. An example is available in examples/pi/pi.c. \code{.c} starpu_data_set_wt_mask(img_handle, ~0U); \endcode will request to always automatically broadcast the updated data to all memory nodes. An example is available in tests/datawizard/wt_broadcast.c. Setting the write-through mask to ~0U can also be useful to make sure all memory nodes always have a copy of the data, so that it is never evicted when memory gets scarce. Implicit data dependency computation can become expensive if a lot of tasks access the same piece of data. If no dependency is required on some piece of data (e.g. because it is only accessed in read-only mode, or because write accesses are actually commutative), use the function starpu_data_set_sequential_consistency_flag() to disable implicit dependencies on this data. In the same vein, accumulation of results in the same data can become a bottleneck. The use of the mode ::STARPU_REDUX permits to optimize such accumulation (see \ref DataReduction). To a lesser extent, the use of the flag ::STARPU_COMMUTE keeps the bottleneck (see \ref DataCommute), but at least permits the accumulation to happen in any order. Applications often need a data just for temporary results. In such a case, registration can be made without an initial value, for instance this produces a vector data: \code{.c} starpu_vector_data_register(&handle, -1, 0, n, sizeof(float)); \endcode StarPU will then allocate the actual buffer only when it is actually needed, e.g. directly on the GPU without allocating in main memory. In the same vein, once the temporary results are not useful anymore, the data should be thrown away. If the handle is not to be reused, it can be unregistered: \code{.c} starpu_data_unregister_submit(handle); \endcode actual unregistration will be done after all tasks working on the handle terminate. One can also unregister the data handle by calling: \code{.c} starpu_data_unregister_no_coherency(handle); \endcode Different from starpu_data_unregister(), a valid copy of the data is not put back into the home node in the buffer that was initially registered. If the handle is to be reused, instead of unregistering it, it can simply be deinitialized: \code{.c} starpu_data_deinitialize(handle); \endcode So that the value will be ignored and not written back to main memory. Or instead it can even be invalidated (the buffers containing the current value will then be freed, and reallocated only when another task writes some value to the handle): \code{.c} starpu_data_invalidate(handle); \endcode if the data transfer is asynchronous, one can use the submit versions: \code{.c} starpu_data_deinitialize_submit(handle); \endcode or \code{.c} starpu_data_invalidate_submit(handle); \endcode A basic example is available in the files tests/datawizard/data_deinitialize.c and tests/datawizard/data_invalidation.c. \section DataAccess Data Access To access registered data outside tasks we can call the function starpu_data_acquire(). The access mode can be read-only mode ::STARPU_R, write-only mode ::STARPU_W, and read-write mode ::STARPU_RW. We will get an up-to-date copy of handle in memory located where the data was originally registered. The application can also call starpu_data_acquire_try() instead of starpu_data_acquire() to acquire the data, but if previously-submitted tasks have not completed when we ask to acquire the data, the program will crash. starpu_data_release() must be called once the application no longer needs to access the piece of data. Or call starpu_data_release_to() to partly release the piece of data acquired. We can also access registered data from a given memory node by calling the function starpu_data_acquire_on_node(), or calling starpu_data_acquire_on_node_try() if all previously-submitted tasks have completed. Correspondingly, starpu_data_release_on_node() must be called once the application no longer needs to access the piece of data and the node parameter must be exactly the same as the corresponding starpu_data_acquire_on_node() call. Or call starpu_data_release_to_on_node() to partly release the piece of data acquired. The application may access the requested data asynchronous during the execution of callback by calling starpu_data_acquire_cb(), and by calling starpu_data_acquire_cb_sequential_consistency() with the possibility of enabling or disabling data dependencies. The callback function must call starpu_data_release() once the application no longer needs to access the piece of data. Or call starpu_data_release_to() to partly release the piece of data acquired. The application can also access registered data from a given memory node instead of main memory by calling the function starpu_data_acquire_on_node_cb(), and by calling starpu_data_acquire_on_node_cb_sequential_consistency() with the possibility of enabling or disabling data dependencies. starpu_data_release_on_node() must be called once the application no longer needs to access the piece of data. Or call starpu_data_release_to_on_node() to partly release the piece of data acquired. \section DataPrefetch Data Prefetch The scheduling policies heft, dmda and pheft perform data prefetch (see \ref STARPU_PREFETCH): as soon as a scheduling decision is taken for a task, requests are issued to transfer its required data to the target processing unit, if needed, so that when the processing unit actually starts the task, its data will hopefully be already available, and it will not have to wait for the transfer to finish. The application may want to perform some manual prefetching, for several reasons such as excluding initial data transfers from performance measurements, or setting up an initial statically-computed data distribution on the machine before submitting tasks, which will thus guide StarPU toward an initial task distribution (since StarPU will try to avoid further transfers). This can be achieved by giving the function starpu_data_prefetch_on_node() the handle and the desired target memory node. An example is available in the file tests/microbenchs/prefetch_data_on_node.c. The variant starpu_data_idle_prefetch_on_node() can be used to issue the transfer only when the bus is idle. One can also call starpu_data_request_allocation() for the allocation of a piece of data on the specified memory node. We can know whether the allocation is done on the specified memory node by using starpu_data_test_if_allocated_on_node(). We can also know whether the map is done on the specified memory node by using starpu_data_test_if_mapped_on_node(). If we want higher priority to request data to be replicated to a given node as soon as possible, so that it is available there for tasks, we can call starpu_data_fetch_on_node(). We can call starpu_data_prefetch_on_node_prio() to have a priority than starpu_data_prefetch_on_node(). And call starpu_data_idle_prefetch_on_node_prio() to have a bit higher priority than starpu_data_idle_prefetch_on_node(). Conversely, one can advise StarPU that some data will not be useful in the close future by calling starpu_data_wont_use(). StarPU will then write its value back to its home node, and evict it from GPUs when room is needed. An example is available in the file tests/datawizard/partition_wontuse.c. One can also advise StarPU to evict data from the memory node directly by calling starpu_data_evict_from_node(), but it may fail if e.g. some tasks are still working on the memory node. To avoid failure one can call starpu_data_can_evict() to check whether data can be evicted from the memory node. Anyway it is more recommended to use starpu_data_wont_use(). One can query the status of handle on the specified memory node by calling starpu_data_query_status2() or starpu_data_query_status(). One can call starpu_memchunk_tidy() to tidy the available memory on the specified memory node periodically. \section ManualPartitioning Manual Partitioning Except the partitioning functions described in \ref PartitioningData and \ref AsynchronousPartitioning, one can also handle partitioning by hand, by registering several views on the same piece of data. The idea is then to manage the coherency of the various views through the common buffer in the main memory. examples/filters/fmultiple_manual.c is a complete example using this technique. In short, we first register the same matrix several times: \code{.c} starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); for (i = 0; i < PARTS; i++) starpu_matrix_data_register(&vert_handle[i], STARPU_MAIN_RAM, (uintptr_t)&matrix[0][i*(NX/PARTS)], NX, NX/PARTS, NY, sizeof(matrix[0][0])); \endcode Since StarPU is not aware that the two handles are actually pointing to the same data, we have a danger of inadvertently submitting tasks to both views, which will bring a mess since StarPU will not guarantee any coherency between the two views. To make sure we don't do this, we invalidate the view that we will not use: \code{.c} for (i = 0; i < PARTS; i++) starpu_data_invalidate(vert_handle[i]); \endcode Then we can safely work on handle. When we want to switch to the vertical slice view, all we need to do is bring coherency between them by running an empty task on the home node of the data: \code{.c} struct starpu_codelet cl_switch = { .where = STARPU_NOWHERE, .nbuffers = 3, .specific_nodes = 1, .nodes = { STARPU_MAIN_RAM, STARPU_MAIN_RAM, STARPU_MAIN_RAM }, }; ret = starpu_task_insert(&cl_switch, STARPU_RW, handle, STARPU_W, vert_handle[0], STARPU_W, vert_handle[1], 0); \endcode The execution of the task switch will get back the matrix data into the main memory, and thus the vertical slices will get the updated value there. Again, we prefer to make sure that we don't accidentally access the matrix through the whole-matrix handle: \code{.c} starpu_data_invalidate_submit(handle); \endcode Note: when enabling a set of handles in this way, the set must not have any overlapping, i.e. the handles of the set must not have any part of data in common, otherwise StarPU will not properly handle concurrent accesses between them. And now we can start using vertical slices, etc. \section DataHandlesHelpers Data handles helpers Functions starpu_data_set_user_data() and starpu_data_get_user_data() are used to associate user-defined data with a specific data handle. One can set or retrieve the field \c user_data of the data handle by calling these two functions respectively. Similarly, functions starpu_data_set_sched_data() and starpu_data_get_sched_data() are used to associate scheduling-related data with a specific data handle. One can set or retrieve the field \c sched_data of the data handle by calling these two functions respectively. One can set a name for a data handle by calling starpu_data_set_name(). One can call starpu_data_register_same() to register a new piece of data into a data handle with the same interface as the specified data handle. If necessary, one can register a void interface by using starpu_void_data_register(). There is no data really associated to this interface, but it may be used as a synchronization mechanism. One can call starpu_data_cpy() or starpu_data_cpy_priority() to copy data from one memory location to another memory location, but the latter one allows the application to specify a priority value for the copy operation. The higher the priority value, the sonner the copy operation will be scheduled and executed. One can also call starpu_data_dup_ro() function for duplicating, but this function only creates a new read-only data block that is an exact copy of the original data block. The new data block can be used independently of the original data block for read-only access. starpu_data_pack_node() and starpu_data_pack() are functions that are used to pack a data item into a binary buffer on a node or on local memory node. starpu_data_peek_node() and starpu_data_peek() are functions that allow you to read in handle's node or local node replicate the data located at the given pointer. starpu_data_unpack_node() and starpu_data_unpack() are functions that are used to unpack a data item from a binary buffer on a node or on local memory node. StarPU provides several functions for querying the size and memory allocation of variable size data items, such as: starpu_data_get_size() is a function that returns the size of a data associated with handle in bytes. This is the size of the actual data stored in memory. starpu_data_get_alloc_size() is a function that returns the amount of memory that has been allocated for a data associated with handle in anticipation. This may be larger than the actual size of the data item, due to alignment requirements or other implementation details. starpu_data_get_max_size() is a function that returns the maximum size of a handle data that can be allocated by StarPU. One can call starpu_data_get_home_node() to retrieve the identifier of the node on which the data handle is originally stored. One can call starpu_data_print() to print basic information about the data handle and the node to the specified file. \section DataPointers Handles data buffer pointers A simple understanding of StarPU handles is that it's a collection of buffers on each memory node of the machine, which contain the same data. The picture is however made more complex with the OpenCL support and with partitioning. When partitioning a handle, the data buffers of the subhandles will indeed be inside the data buffers of the main handle (to save transferring data back and forth between the main handle and the subhandles). But in OpenCL, a cl_mem is not a pointer, but an opaque value on which pointer arithmetic can not be used. That is why data interfaces contain three fields: dev_handle, offset, and ptr.
  • The field dev_handle is what the allocation function returned, and one can not do arithmetic on it.
  • The field offset is the offset inside the allocated area, most often it will be 0 because data start at the beginning of the allocated area, but when the handle is partitioned, the subhandles will have varying offset values, for each subpiece.
  • The field ptr, in the non-OpenCL case, i.e. when pointer arithmetic can be used on dev_handle, is just the sum of dev_handle and offset, provided for convenience.
This means that:
  • computation kernels can use ptr in non-OpenCL implementations.
  • computation kernels have to use dev_handle and offset in the OpenCL implementation.
  • allocation methods of data interfaces have to store the value returned by starpu_malloc_on_node() in dev_handle and ptr, and set offset to 0.
  • partitioning filters have to copy over dev_handle without modifying it, set in the child different values of offset, and set ptr accordingly as the sum of dev_handle and offset.
We can call starpu_data_handle_to_pointer() to get ptr associated with the data handle, or call starpu_data_get_local_ptr() to get the local pointer associated with the data handle. Examples in the directory examples/interface/complex_dev_handle/ show how to generate and implement an interface supporting OpenCL. To better notice the difference between simple ptr and dev_handle + offset, one can compare examples/interface/complex_interface.c vs examples/interface/complex_dev_handle/complex_dev_handle_interface.c and examples/interface/complex_filters.c vs examples/interface/complex_dev_handle/complex_dev_handle_filters.c. \section DefiningANewDataFilter Defining A New Data Filter StarPU provides a series of predefined filters in \ref API_Data_Partition, but additional filters can be defined by the application. The principle is that the filter function just fills the memory location of the i-th subpart of a data. Examples are provided in src/datawizard/interfaces/*_filters.c, check \ref starpu_data_filter::filter_func for further details. The helper function starpu_filter_nparts_compute_chunk_size_and_offset() can be used to compute the division of pieces of data. \section DefiningANewDataInterface Defining A New Data Interface This section proposes an example how to define your own interface, when the StarPU-provided interface do not fit your needs. Here we take a simple example of an array of complex numbers represented by two arrays of double values. The full source code is in examples/interface/complex_interface.c and examples/interface/complex_interface.h Let's thus define a new data interface to manage arrays of complex numbers: \code{.c} /* interface for complex numbers */ struct starpu_complex_interface { double *real; double *imaginary; int nx; }; \endcode That structure stores enough to describe one buffer of such kind of data. It is used for the buffer stored in the main memory, another instance is used for the buffer stored in a GPU, etc. A data handle is thus a collection of such structures, to describe each buffer on each memory node. Note: one should not make pointers that point into such structures, because StarPU needs to be able to copy over the content of it to various places, for instance to efficiently migrate a data buffer from one data handle to another data handle, so the actual address of the structure may vary. \subsection DefiningANewDataInterface_registration Data registration Registering such a data to StarPU is easily done using the function starpu_data_register(). The last parameter of the function, interface_complex_ops, will be described below. \code{.c} void starpu_complex_data_register(starpu_data_handle_t *handleptr, unsigned home_node, double *real, double *imaginary, int nx) { struct starpu_complex_interface complex = { .real = real, .imaginary = imaginary, .nx = nx }; starpu_data_register(handleptr, home_node, &complex, &interface_complex_ops); } \endcode The struct starpu_complex_interface complex is here used just to store the parameters provided by users to starpu_complex_data_register. starpu_data_register() will first allocate the handle, and then pass the structure starpu_complex_interface to the method starpu_data_interface_ops::register_data_handle, which records them within the data handle (it is called once per node by starpu_data_register()): \code{.c} static void complex_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_complex_interface *local_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); local_interface->nx = complex_interface->nx; if (node == home_node) { local_interface->real = complex_interface->real; local_interface->imaginary = complex_interface->imaginary; } else { local_interface->real = NULL; local_interface->imaginary = NULL; } } } \endcode If the application provided a home node, the corresponding pointers will be recorded for that node. Others have no buffer allocated yet. Possibly the interface needs some dynamic allocation (e.g. to store an array of dimensions that can have variable size). The corresponding deallocation will then be done in starpu_data_interface_ops::unregister_data_handle. Different operations need to be defined for a data interface through the type starpu_data_interface_ops. We only define here the basic operations needed to run simple applications. The source code for the different functions can be found in the file examples/interface/complex_interface.c, the details of the hooks to be provided are documented in \ref starpu_data_interface_ops . \code{.c} static struct starpu_data_interface_ops interface_complex_ops = { .register_data_handle = complex_register_data_handle, .allocate_data_on_node = complex_allocate_data_on_node, .copy_methods = &complex_copy_methods, .get_size = complex_get_size, .footprint = complex_footprint, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpu_complex_interface), }; \endcode The field starpu_data_interface_ops::interfaceid should be defined to ::STARPU_UNKNOWN_INTERFACE_ID when defining the interface, its value will be updated the first time a data is registered through the new data interface. Convenience functions can be defined to access the different fields of the complex interface from a StarPU data handle after a call to starpu_data_acquire(): \code{.c} double *starpu_complex_get_real(starpu_data_handle_t handle) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_interface->real; } double *starpu_complex_get_imaginary(starpu_data_handle_t handle); int starpu_complex_get_nx(starpu_data_handle_t handle); \endcode Similar functions need to be defined to access the different fields of the complex interface from a void * pointer to be used within codelet implementations. \snippet complex.c To be included. You should update doxygen if you see this text. Complex data interfaces can then be registered to StarPU. \code{.c} double real = 45.0; double imaginary = 12.0; starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1); starpu_task_insert(&cl_display, STARPU_R, handle1, 0); \endcode and used by codelets. \code{.c} void display_complex_codelet(void *descr[], void *_args) { int nx = STARPU_COMPLEX_GET_NX(descr[0]); double *real = STARPU_COMPLEX_GET_REAL(descr[0]); double *imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]); int i; for(i=0 ; iexamples/interface/. \subsection DefiningANewDataInterface_footprint Data footprint We need to pass a custom footprint function to the method starpu_data_interface_ops::footprint which computes data size footprint. StarPU provides several functions to compute different type of value: starpu_hash_crc32c_be_n() is used to compute the CRC of a byte buffer, starpu_hash_crc32c_be_ptr() is used to compute the CRC of a pointer value, starpu_hash_crc32c_be() is used to compute the CRC of a 32bit number, starpu_hash_crc32c_string() is used to compute the CRC of a string. \subsection DefiningANewDataInterface_allocation Data allocation To be able to run tasks on GPUs etc. StarPU needs to know how to allocate a buffer for the interface. In our example, two allocations are needed in the allocation method \c complex_allocate_data_on_node(): one for the real part and one for the imaginary part. \code{.c} static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, unsigned node) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; double *addr_real = NULL; double *addr_imaginary = NULL; starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]); addr_real = (double*) starpu_malloc_on_node(node, requested_memory); if (!addr_real) goto fail_real; addr_imaginary = (double*) starpu_malloc_on_node(node, requested_memory); if (!addr_imaginary) goto fail_imaginary; /* update the data properly in consequence */ complex_interface->real = addr_real; complex_interface->imaginary = addr_imaginary; return 2*requested_memory; fail_imaginary: starpu_free_on_node(node, (uintptr_t) addr_real, requested_memory); fail_real: return -ENOMEM; } \endcode Here we try to allocate the two parts. If either of them fails, we return \c -ENOMEM. If they succeed, we can record the obtained pointers and returned the amount of allocated memory (for memory usage accounting). Conversely, \c complex_free_data_on_node() frees the two parts: \code{.c} static void complex_free_data_on_node(void *data_interface, unsigned node) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]); starpu_free_on_node(node, (uintptr_t) complex_interface->real, requested_memory); starpu_free_on_node(node, (uintptr_t) complex_interface->imaginary, requested_memory); } \endcode We can call starpu_opencl_allocate_memory() to allocate memory on an OpenCL device. We have not made anything particular for GPUs or whatsoever: it is starpu_free_on_node() which knows how to actually make the allocation, and returns the resulting pointer, be it in main memory, in GPU memory, etc. \subsection DefiningANewDataInterface_copy Data copy Now that StarPU knows how to allocate/free a buffer, it needs to be able to copy over data into/from it. Defining a method \c copy_any_to_any() allows StarPU to perform direct transfers between main memory and GPU memory. \code{.c} static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_complex_interface *src_complex = src_interface; struct starpu_complex_interface *dst_complex = dst_interface; int ret = 0; if (starpu_interface_copy((uintptr_t) src_complex->real, 0, src_node, (uintptr_t) dst_complex->real, 0, dst_node, src_complex->nx*sizeof(src_complex->real[0]), async_data)) ret = -EAGAIN; if (starpu_interface_copy((uintptr_t) src_complex->imaginary, 0, src_node, (uintptr_t) dst_complex->imaginary, 0, dst_node, src_complex->nx*sizeof(src_complex->imaginary[0]), async_data)) ret = -EAGAIN; return ret; } \endcode We here again have no idea what is main memory or GPU memory, or even if the copy is synchronous or asynchronous: we just call starpu_interface_copy() according to the interface, passing it the pointers, and checking whether it returned \c -EAGAIN, which means the copy is asynchronous, and StarPU will appropriately wait for it thanks to the pointer \c async_data. This copy method is also available for 2D matrices starpu_interface_copy2d(), 3D matrices starpu_interface_copy3d(), 4D matrices starpu_interface_copy4d() and N-dim matrices starpu_interface_copynd(). starpu_interface_copy() will also manage copies between other devices such as CUDA devices, OpenCL devices, etc. But if necessary, we may manage these copies by ourselves as well. StarPU provides three functions starpu_cuda_copy_async_sync(), starpu_cuda_copy2d_async_sync() and starpu_cuda_copy3d_async_sync() that enable copying of 1D, 2D or 3D data between main memory and CUDA device memories. They first try to copy the data asynchronous, if fail or \c stream is \c NULL then copy the data synchronously. StarPU also provides several functions that are used to transfer data between RAM and OpenCL devices. starpu_opencl_copy_ram_to_opencl() copies data from RAM to an OpenCL device. starpu_opencl_copy_opencl_to_ram() copies data from an OpenCL device to RAM. starpu_opencl_copy_opencl_to_opencl() copies data between two OpenCL devices. starpu_opencl_copy_async_sync() copies data between two devices. If \c event is \c NULL, the copy is synchronous, and checking whether \c ret is set to \c -EAGAIN, which means the copy is asynchronous. This copy method is referenced in a structure \ref starpu_data_copy_methods \code{.c} static const struct starpu_data_copy_methods complex_copy_methods = { .any_to_any = copy_any_to_any }; \endcode which was referenced in the structure \ref starpu_data_interface_ops above. Other fields of \ref starpu_data_copy_methods allow providing optimized variants, notably for the case of 2D or 3D matrix tiles with non-trivial ld. We can call starpu_interface_data_copy() to record in offline execution traces the copy. When an asynchronous implementation of the data transfer is implemented, we can call starpu_interface_start_driver_copy_async() and starpu_interface_end_driver_copy_async() to initiate and complete asynchronous data transfers between main memory and GPU memory. \subsection DefiningANewDataInterface_pack Data pack/peek/unpack The copy methods allow for RAM/GPU transfers, but is not enough for e.g. transferring over MPI. That requires defining the pack/peek/unpack methods. The principle is that the method starpu_data_interface_ops::pack_data concatenates the buffer data into a newly-allocated contiguous bytes array, conversely starpu_data_interface_ops::peek_data extracts from a bytes array into the buffer data, and starpu_data_interface_ops::unpack_data does the same as starpu_data_interface_ops::peek_data but also frees the bytes array. \code{.c} static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); *count = complex_get_size(handle); if (ptr != NULL) { char *data; data = (void*) starpu_malloc_on_node_flags(node, *count, 0); *ptr = data; memcpy(data, complex_interface->real, complex_interface->nx*sizeof(double)); memcpy(data+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double)); } return 0; } \endcode \c complex_pack_data() first computes the size to be allocated, then allocates it, and copies over into it the content of the two real and imaginary arrays. \code{.c} static int complex_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { char *data = ptr; STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == 2 * complex_interface->nx * sizeof(double)); memcpy(complex_interface->real, data, complex_interface->nx*sizeof(double)); memcpy(complex_interface->imaginary, data+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double)); return 0; } \endcode \c complex_peek_data() simply uses \c memcpy() to copy over from the bytes array into the data buffer. \code{.c} static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { complex_peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); return 0; } \endcode And \c complex_unpack_data() just calls \c complex_peek_data() and releases the bytes array. \subsection DefiningANewDataInterface_pointers Pointers inside the data interface In the example described above, the two pointers stored in the data interface are data buffers, which may point into main memory, GPU memory, etc. One may also want to store pointers to meta-data for the interface, for instance the list of dimensions sizes for the n-dimension matrix interface, but such pointers are to be handled completely differently. More examples are provided in src/datawizard/interfaces/*_interface.c More precisely, there are two types of pointers:
  • Data pointers, which point to the actual data in RAM/GPU/etc. memory. They may be NULL when the data is not allocated (yet). StarPU will automatically call starpu_data_interface_ops::allocate_data_on_node to allocate the data pointers whenever needed, and call starpu_data_interface_ops::free_data_on_node when memory gets scarce. For instance, for the n-dimension matrix interface the pointers to the actual data (\c ptr, \c dev_handle, \c offset) are data pointers.
  • Meta-data pointers, which always point to RAM memory. They are usually always allocated so that they can always be used. For instance, for the n-dimension matrix interface the array of dimension sizes and the array of ld are meta-data pointers. These are typically allocated at data registration time in starpu_data_interface_ops::register_data_handle, and released at data unregistration time in starpu_data_interface_ops::unregister_data_handle
This means that:
  • The starpu_data_interface_ops::register_data_handle method has to allocate the meta-data pointers. If users provided a buffer for the initial value of the handle, starpu_data_interface_ops::register_data_handle sets the data pointers of the home_node interface to that buffer.
  • The interface can additionally provide a \c ptr_register helper to set the data pointer of a given node. One can call starpu_data_ptr_register() to realise.
  • The starpu_data_interface_ops::unregister_data_handle method has to deallocate the meta-data pointers.
  • The starpu_data_interface_ops::allocate_data_on_node method has to allocate the data pointers on the given node.
  • The starpu_data_interface_ops::free_data_on_node method has to deallocate the data pointers on the given node.
  • The optional starpu_data_interface_ops::cache_data_on_node transfers the data pointers from a source interface to a cached interface. If undefined, a mere memcpy is used instead. This can notably take the opportunity to clear pointers in the source interface. This also needs to copy the properties that starpu_data_interface_ops::compare (or starpu_data_interface_ops::alloc_compare if defined) needs for comparing interfaces for caching compatibility.
  • The starpu_data_interface_ops::reuse_data_on_node transfers the data pointers from a cached interface to the destination interface. If undefined, a mere memcpy is used instead.
  • The starpu_data_interface_ops::map_data has to map the data pointers on the given node. One should define function starpu_interface_map() to set this field.
  • The starpu_data_interface_ops::unmap_data has to unmap the data pointers on the given node. One should define function starpu_interface_unmap() to set this field.
  • The starpu_data_interface_ops::update_map has to update the data pointers on the given node. One should define function starpu_interface_update_map() to set this field.
  • The filtering functions have to allocate the meta-data pointers for the child interface, and when the parent interface has data pointers, it has to set the child data pointers to point into the parent data buffers.
Put another way:
  • starpu_data_register() initializes the handle structure and calls starpu_data_interface_ops::register_data_handle.
  • Then StarPU may call starpu_data_interface_ops::allocate_data_on_node and starpu_data_interface_ops::free_data_on_node as it sees fit when it needs the data allocated on some node or not.
  • Eventually, starpu_data_unregister() releases the handle buffers for all nodes (except the home node given to starpu_data_register() ), which either means calling starpu_data_interface_ops::free_data_on_node (if allocation cache is disabled), or putting them into the allocation cache. It then calls starpu_data_interface_ops::unregister_data_handle, and releases the handle structure.
Note: for compressed matrices such as CSR, BCSR, COO, the \c colind and \c rowptr arrays are not meta-data pointers, but data pointers like \c nzval, because they need to be available in GPU memory for the GPU kernels. Note: when the interface does not contain meta-data pointers, starpu_data_interface_ops::reuse_data_on_node does not need to be implemented, StarPU will just use a memcpy. Otherwise, either starpu_data_interface_ops::reuse_data_on_node must be used to transfer only the data pointers and not the meta-data pointers, or the allocation cache should be disabled by setting starpu_data_interface_ops::dontcache to 1. Note: It should be noted that because of the allocation cache, starpu_data_interface_ops::free_data_on_node may be called on an interface which is not attached to a handle anymore. This means that the meta-data pointers will have been deallocated by starpu_data_interface_ops::unregister_data_handle, and cannot be used by starpu_data_interface_ops::free_data_on_node to e.g. compute the size to be deallocated. For instance, the n-dimension matrix interface uses an additional scalar allocsize field to store the allocation size, thus still available even when the interface is in the allocation cache. Note: if starpu_data_interface_ops::unregister_data_handle is implemented and checks that pointers are NULL, starpu_data_interface_ops::cache_data_on_node needs to be implemented to clear the pointers when caching the allocation. \subsection DefiningANewDataInterface_helpers Helpers We can get the unique identifier of the interface associated with the data handle by calling starpu_data_get_interface_id(), and get the next available identifier for a newly created data interface by calling starpu_data_interface_get_next_id(). \section TheMultiformatInterface The Multiformat Interface It may be interesting to represent the same piece of data using two different data structures: one only used on CPUs, and one only used on GPUs. This can be done by using the multiformat interface. StarPU will be able to convert data from one data structure to the other when needed. Note that the scheduler dmda is the only one optimized for this interface. Users must provide StarPU with conversion codelets: \snippet multiformat.c To be included. You should update doxygen if you see this text. Kernels can be written almost as for any other interface. Note that ::STARPU_MULTIFORMAT_GET_CPU_PTR shall only be used for CPU kernels. CUDA kernels must use ::STARPU_MULTIFORMAT_GET_CUDA_PTR, and OpenCL kernels must use ::STARPU_MULTIFORMAT_GET_OPENCL_PTR. ::STARPU_MULTIFORMAT_GET_NX may be used in any kind of kernel. \code{.c} static void multiformat_scal_cpu_func(void *buffers[], void *args) { struct point *aos; unsigned int n; aos = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); ... } extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args) { unsigned int n; struct struct_of_arrays *soa; soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); ... } \endcode A full example may be found in examples/basic_examples/multiformat.c. \section SpecifyingATargetNode Specifying A Target Node For Task Data When executing a task on GPU, for instance, StarPU would normally copy all the needed data for the tasks to the embedded memory of the GPU. It may however happen that the task kernel would rather have some of the data kept in the main memory instead of copied in the GPU, a pivoting vector for instance. This can be achieved by setting the flag starpu_codelet::specific_nodes to 1, and then fill the array starpu_codelet::nodes (or starpu_codelet::dyn_nodes when starpu_codelet::nbuffers is greater than \ref STARPU_NMAXBUFS) with the node numbers where data should be copied to, or ::STARPU_SPECIFIC_NODE_LOCAL to let StarPU copy it to the memory node where the task will be executed. The function starpu_task_get_current_data_node() can be used to retrieve the memory node associated with the current task being executed. ::STARPU_SPECIFIC_NODE_CPU can also be used to request data to be put in CPU-accessible memory (and let StarPU choose the NUMA node). ::STARPU_SPECIFIC_NODE_FAST and ::STARPU_SPECIFIC_NODE_SLOW can also be used For instance, with the following codelet: \code{.c} struct starpu_codelet cl = { .cuda_funcs = { kernel }, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, }; \endcode the first data of the task will be kept in the CPU memory, while the second data will be copied to the CUDA GPU as usual. A working example is available in tests/datawizard/specific_node.c With the following codelet: \code{.c} struct starpu_codelet cl = { .cuda_funcs = { kernel }, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_SLOW}, }; \endcode The first data will be copied into fast (but probably size-limited) local memory, while the second data will be left in slow (but large) memory. This makes sense when the kernel does not make so many accesses to the second data, and thus data being remote e.g. over a PCI bus is not a performance problem, and avoids filling the fast local memory with data which does not need the performance. In cases where the kernel is fine with some data being either local or in the main memory, ::STARPU_SPECIFIC_NODE_LOCAL_OR_CPU can be used. StarPU will then be free to leave the data in the main memory and let the kernel access it from accelerators, or to move it to the accelerator before starting the kernel, for instance: \code{.c} struct starpu_codelet cl = { .cuda_funcs = { kernel }, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU}, }; \endcode An example for specifying target node is available in tests/datawizard/specific_node.c. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/advanced_scheduling.doxy000066400000000000000000000421001507764646700305140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page AdvancedScheduling Advanced Scheduling \section Energy-basedScheduling Energy-based Scheduling Note: by default, StarPU does not let CPU workers sleep, to let them react to task release as quickly as possible. For idle time to really let CPU cores save energy, one needs to use the \c configure option \ref enable-blocking-drivers "--enable-blocking-drivers". If the application can provide some energy consumption performance model (through the field starpu_codelet::energy_model), StarPU will take it into account when distributing tasks. The target function that the scheduler \b dmda minimizes becomes alpha * T_execution + beta * T_data_transfer + gamma * Consumption , where Consumption is the estimated task consumption in Joules. To tune this parameter, use export STARPU_SCHED_GAMMA=3000 (\ref STARPU_SCHED_GAMMA) for instance, to express that each Joule (i.e. kW during 1000us) is worth 3000us execution time penalty. Setting alpha and beta to zero permits to only take into account energy consumption. This is however not sufficient to correctly optimize energy: the scheduler would simply tend to run all computations on the most energy-conservative processing unit. To account for the consumption of the whole machine (including idle processing units), the idle power of the machine should be given by setting export STARPU_IDLE_POWER=200 (\ref STARPU_IDLE_POWER) for 200W, for instance. This value can often be obtained from the machine power supplier, e.g. by running \verbatim ipmitool -I lanplus -H mymachine-ipmi -U myuser -P mypasswd sdr type Current \endverbatim The energy actually consumed by the total execution can be displayed by setting export STARPU_PROFILING=1 STARPU_WORKER_STATS=1 (\ref STARPU_PROFILING and \ref STARPU_WORKER_STATS). For OpenCL devices, on-line task consumption measurement is currently supported through the OpenCL extension CL_PROFILING_POWER_CONSUMED, implemented in the MoviSim simulator. For CUDA devices, on-line task consumption measurement is supported on V100 cards and beyond. This however only works for quite long tasks, since the measurement granularity is about 10ms. Applications can however provide explicit measurements by feeding the energy performance model by hand. Fine-grain measurement is often not feasible with the feedback provided by the hardware, so users can for instance run a given task a thousand times, measure the global consumption for that series of tasks, divide it by a thousand, repeat for varying kinds of tasks and task sizes, and eventually feed StarPU with these manual measurements. For CUDA devices starting with V100, the starpu_energy_start() and starpu_energy_stop() helpers, described in \ref MeasuringEnergyandPower below, make it easy. For older models, one can use nvidia-smi -q -d POWER to get the current consumption in Watt. Multiplying this value by the average duration of a single task gives the consumption of the task in Joules, which can be given to starpu_perfmodel_update_history(). (examplified in \ref PerformanceModelExample with the performance model energy_model). Another way to provide the energy performance is to define a perfmodel with starpu_perfmodel::type ::STARPU_PER_ARCH or ::STARPU_PER_WORKER , and set the field starpu_perfmodel::arch_cost_function or starpu_perfmodel::worker_cost_function to a function which shall return the estimated consumption of the task in Joules. Such a function can for instance use starpu_task_expected_length() on the task (in µs), multiplied by the typical power consumption of the device, e.g. in W, and divided by 1000000. to get Joules. An example is in the file tests/perfmodels/regression_based_energy.c. There are other functions in StarPU that are used to measure the energy consumed by the system during execution. The starpu_energy_use() function declares that there are the energy consumptions of the task, while the starpu_energy_used() function returns the total energy consumed since the start of measurement. \subsection MeasuringEnergyandPower Measuring energy and power with StarPU We have extended the performance model of StarPU to measure energy and power values of CPUs. These values are measured using the existing Performance API (PAPI) analysis library. PAPI provides the tool designer and application engineer with a consistent interface and methodology for use of the performance counter hardware found in most major microprocessors. PAPI enables software engineers to see, in near real time, the relation between software performance and processor events. - To measure energy consumption of CPUs, we use the RAPL events, which are available on CPU architecture: RAPL_ENERGY_PKG that represents the whole CPU socket power consumption, and RAPL_ENERGY_DRAM that represents the RAM power consumption. PAPI provides a generic, portable interface for the hardware performance counters available on all modern CPUs and some other components of interest that are scattered across the chip and system. In order to use the right rapl events for energy measurement, user should check the rapl events available on the machine, using this command: \verbatim $ papi_native_avail \endverbatim Depending on the system configuration, users may have to run this as root to get the performance counter values. Since the measurement is for all the CPUs and the memory, the approach taken here is to run a series of tasks on all of them and to take the overall measurement. - The example below illustrates the energy and power measurements, using the functions starpu_energy_start() and starpu_energy_stop(). In this example, we launch several tasks of the same type in parallel. To perform the energy requirement measurement of a program, we call starpu_energy_start(), which initializes energy measurement counters and starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi) to stop counting and update the performance model. This ends up yielding the average energy requirement of a single task. The example below illustrates this for a given task type. \code{.c} unsigned N = starpu_cpu_worker_get_count() * 40; starpu_energy_start(-1, STARPU_CPU_WORKER); for (i = 0; i < N; i++) starpu_task_insert(&cl, STARPU_EXECUTE_WHERE, STARPU_CPU, STARPU_R, arg1, STARPU_RW, arg2, 0); starpu_task_t *specimen = starpu_task_build(&cl, STARPU_R, arg1, STARPU_RW, arg2, 0); starpu_energy_stop(&codelet.energy_model, specimen, 0, N, -1, STARPU_CPU_WORKER); . . . \endcode The example starts 40 times more tasks of the same type than there are CPU execution units. Once the tasks are distributed over all CPUs, the latter are all executing the same type of tasks (with the same data size and parameters); each CPU will in the end execute 40 tasks. A specimen task is then constructed and passed to starpu_energy_stop(), which will fold into the performance model the energy requirement measurement for that type and size of task. For the energy and power measurements, depending on the system configuration, users may have to run applications as root to use PAPI library. The function starpu_energy_stop() uses PAPI_stop() to stop counting and store the values into the array. We calculate both energy in Joules and power consumption in Watt. We call the function starpu_perfmodel_update_history() in the performance model to provide explicit measurements. - In the CUDA case, nvml provides per-GPU energy measurement. We can thus calibrate the performance models per GPU: \code{.c} unsigned N = 40; for (i = 0; i < starpu_cuda_worker_get_count(); i++) { int workerid = starpu_worker_get_by_type(STARPU_CUDA_WORKER, i); starpu_energy_start(workerid, STARPU_CUDA_WORKER); for (i = 0; i < N; i++) starpu_task_insert(&cl, STARPU_EXECUTE_ON_WORKER, workerid, STARPU_R, arg1, STARPU_RW, arg2, 0); starpu_task_t *specimen = starpu_task_build(&cl, STARPU_R, arg1, STARPU_RW, arg2, 0); starpu_energy_stop(&codelet.energy_model, specimen, 0, N, workerid, STARPU_CUDA_WORKER); } \endcode - A complete example is available in tests/perfmodels/regression_based_memset.c \section StaticScheduling Static Scheduling In some cases, one may want to force some scheduling, for instance force a given set of tasks to GPU0, another set to GPU1, etc. while letting some other tasks be scheduled on any other device. This can indeed be useful to guide StarPU into some work distribution, while still letting some degree of dynamism. For instance, to force execution of a task on CUDA0: \code{.c} task->execute_on_a_specific_worker = 1; task->workerid = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0); \endcode An example is in the file tests/errorcheck/invalid_tasks.c. or equivalently \code{.c} starpu_task_insert(&cl, ..., STARPU_EXECUTE_ON_WORKER, starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0), ...); \endcode One can also specify a set of worker(s) which are allowed to take the task, as an array of bit, for instance to allow workers 2 and 42: \code{.c} task->workerids = calloc(2,sizeof(uint32_t)); task->workerids[2/32] |= (1 << (2%32)); task->workerids[42/32] |= (1 << (42%32)); task->workerids_len = 2; \endcode One can also specify the order in which tasks must be executed by setting the field starpu_task::workerorder. An example is available in the file tests/main/execute_schedule.c. If this field is set to a non-zero value, it provides the per-worker consecutive order in which tasks will be executed, starting from 1. For a given of such task, the worker will thus not execute it before all the tasks with smaller order value have been executed, notably in case those tasks are not available yet due to some dependencies. This eventually gives total control of task scheduling, and StarPU will only serve as a "self-timed" task runtime. Of course, the provided order has to be runnable, i.e. a task should not depend on another task bound to the same worker with a bigger order. Note however that using scheduling contexts while statically scheduling tasks on workers could be tricky. Be careful to schedule the tasks exactly on the workers of the corresponding contexts, otherwise the workers' corresponding scheduling structures may not be allocated or the execution of the application may deadlock. Moreover, the hypervisor should not be used when statically scheduling tasks. \section configuringHeteroprio Configuring Heteroprio Within Heteroprio, one priority per processing unit type is assigned to each task, such that a task has several priorities. Each worker pops the task that has the highest priority for the hardware type it uses, which could be CPU or CUDA for example. Therefore, the priorities has to be used to manage the critical path, but also to promote the consumption of tasks by the more appropriate workers. The tasks are stored inside buckets, where each bucket corresponds to a priority set. Then each worker uses an indirect access array to know the order in which it should access the buckets. Moreover, all the tasks inside a bucket must be compatible with all the processing units that may access it (at least). These priorities are now automatically assigned by Heteroprio in auto calibration mode using heuristics. If you want to set these priorities manually, you can change \ref STARPU_HETEROPRIO_USE_AUTO_CALIBRATION and follow the example below. In this example code, we have 5 types of tasks. CPU workers can compute all of them, but CUDA workers can only execute tasks of types 0 and 1, and are expected to go 20 and 30 time faster than the CPU, respectively. \code{.c} #include // Before calling starpu_init struct starpu_conf conf; starpu_conf_init(&conf); // Inform StarPU to use Heteroprio conf.sched_policy_name = "heteroprio"; // Inform StarPU about the function that will init the priorities in Heteroprio // where init_heteroprio is a function to implement conf.sched_policy_callback = &init_heteroprio; // Do other things with conf if needed, then init StarPU starpu_init(&conf); \endcode \code{.c} void init_heteroprio(unsigned sched_ctx) { // CPU uses 5 buckets and visits them in the natural order starpu_heteroprio_set_nb_prios(sched_ctx, STARPU_CPU_WORKER, 5); // It uses direct mapping idx => idx for(unsigned idx = 0; idx < 5; ++idx){ starpu_heteroprio_set_mapping(sched_ctx, STARPU_CPU_WORKER, idx, idx); // If there is no CUDA worker we must tell that CPU is faster starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_CPU_WORKER, idx); } if(starpu_cuda_worker_get_count()){ // CUDA is enabled and uses 2 buckets starpu_heteroprio_set_nb_prios(sched_ctx, STARPU_CUDA_WORKER, 2); // CUDA will first look at bucket 1 starpu_heteroprio_set_mapping(sched_ctx, STARPU_CUDA_WORKER, 0, 1); // CUDA will then look at bucket 2 starpu_heteroprio_set_mapping(sched_ctx, STARPU_CUDA_WORKER, 1, 2); // For bucket 1 CUDA is the fastest starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_CUDA_WORKER, 1); // And CPU is 30 times slower starpu_heteroprio_set_arch_slow_factor(sched_ctx, STARPU_CPU_WORKER, 1, 30.0f); // For bucket 0 CUDA is the fastest starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_CUDA_WORKER, 0); // And CPU is 20 times slower starpu_heteroprio_set_arch_slow_factor(sched_ctx, STARPU_CPU_WORKER, 0, 20.0f); } } \endcode Then, when a task is inserted, the priority of the task will be used to select in which bucket is has to be stored. So, in the given example, the priority of a task will be between 0 and 4 included. However, tasks of priorities 0-1 must provide CPU and CUDA kernels, and tasks of priorities 2-4 must provide CPU kernels (at least). The full source code of this example is available in the file examples/scheduler/heteroprio_test.c \subsection LAHeteroprio Using locality aware Heteroprio Heteroprio supports a mode where locality is evaluated to guide the distribution of the tasks (see https://peerj.com/articles/cs-190.pdf). Currently, this mode is available using the dedicated function or an environment variable \ref STARPU_HETEROPRIO_USE_LA, and can be configured using environment variables. \code{.c} void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality); \endcode In this mode, multiple strategies are available to determine which memory node's workers are the most qualified for executing a specific task. This strategy can be set with \ref STARPU_LAHETEROPRIO_PUSH and available strategies are: - WORKER: the worker which pushed the task is preferred for the execution. - LcS: the node with the shortest data transfer time (estimated by StarPU) is the most qualified - LS_SDH: the node with the smallest data amount to be transferred will be preferred. - LS_SDH2: similar to LS_SDH, but data in write access is counted in a quadratic manner to give them more importance. - LS_SDHB: similar to LS_SDH, but data in write access is balanced with a coefficient (its value is set to 1000) and for the same amount of data, the one with fewer pieces of data to be transferred will be preferred. - LC_SMWB: similar to LS_SDH, but the amount of data in write access gets multiplied by a coefficient which gets closer to 2 as the amount of data in read access gets larger than the data in write access. - AUTO: strategy by default, this one selects the best strategy and changes it in runtime to improve performance Other environment variables to configure LaHeteteroprio are documented in \ref ConfiguringLaHeteroprio \subsection AutoHeteroprio Using Heteroprio in auto-calibration mode In this mode, Heteroprio saves data about each program execution, in order to improve future ones. By default, these files are stored in the folder used by perfmodel, but this can be changed using the \ref STARPU_HETEROPRIO_DATA_DIR environment variable. You can also specify the data filename directly using \ref STARPU_HETEROPRIO_DATA_FILE. Additionally, to assign priorities to tasks, Heteroprio needs a way to detect that some tasks are similar. By default, Heteroprio looks for tasks with the same perfmodel, or with the same codelet's name if no perfmodel was assigned. This behavior can be changed to only consider the codelet's name by setting \ref STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY to 1 Other environment variables to configure AutoHeteteroprio are documented in \ref ConfiguringAutoHeteroprio */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy000066400000000000000000000532331507764646700275250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page AdvancedTasksInStarPU Advanced Tasks In StarPU \section TaskDependencies Task Dependencies \subsection SequentialConsistency Sequential Consistency By default, task dependencies are inferred from data dependency (sequential coherency) by StarPU. The application can however disable sequential coherency for some data, and dependencies can be specifically expressed. Setting (or unsetting) sequential consistency can be done at the data level by calling starpu_data_set_sequential_consistency_flag() for a specific data (an example is in the file examples/dependency/task_end_dep.c) or starpu_data_set_default_sequential_consistency_flag() for all data (an example is in the file tests/main/subgraph_repeat.c). The sequential consistency mode can also be gotten by calling starpu_data_get_sequential_consistency_flag() for a specific data or get the default sequential consistency flag by calling starpu_data_get_default_sequential_consistency_flag(). Setting (or unsetting) sequential consistency can also be done at task level by setting the field starpu_task::sequential_consistency to \c 0 (an example is in the file tests/main/deploop.c). Sequential consistency can also be set (or unset) for each handle of a specific task, this is done by using the field starpu_task::handles_sequential_consistency. When set, its value should be an array with the number of elements being the number of handles for the task, each element of the array being the sequential consistency for the \c i-th handle of the task. The field can easily be set when calling starpu_task_insert() with the flag ::STARPU_HANDLES_SEQUENTIAL_CONSISTENCY \code{.c} char *seq_consistency = malloc(cl.nbuffers * sizeof(char)); seq_consistency[0] = 1; seq_consistency[1] = 1; seq_consistency[2] = 0; ret = starpu_task_insert(&cl, STARPU_RW, handleA, STARPU_RW, handleB, STARPU_RW, handleC, STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, seq_consistency, 0); free(seq_consistency); \endcode A full code example is available in the file examples/dependency/sequential_consistency.c. The internal algorithm used by StarPU to set up implicit dependency is as follows: \code{.c} if (sequential_consistency(task) == 1) for(i=0 ; itests/main/tag_task_data_deps.c shows how to set dependencies between tasks with different functions. The termination of a task can be delayed through the function starpu_task_end_dep_add() which specifies the number of calls to the function starpu_task_end_dep_release() needed to trigger the task termination. One can also use starpu_task_declare_end_deps() or starpu_task_declare_end_deps_array() to delay the termination of a task until the termination of other tasks. A simple example is available in the file tests/main/task_end_dep.c. starpu_tag_notify_from_apps() can be used to explicitly unlock a specific tag, but if it is called several times on the same tag, notification will be done only on first call. However, one can call starpu_tag_restart() to clear the already notified status of a tag which is not associated with a task, and then calling starpu_tag_notify_from_apps() again will notify the successors. Alternatively, starpu_tag_notify_restart_from_apps() can be used to atomically call both starpu_tag_notify_from_apps() and starpu_tag_restart() on a specific tag. To get the task associated to a specific tag, one can call starpu_tag_get_task(). Once the corresponding task has been executed and when there is no other tag that depend on this tag anymore, one can call starpu_tag_remove() to release the resources associated to the specific tag. One can use starpu_tag_clear() to clear all the tags (but it requires that no starpu_tag_wait_array() call is currently pending). \section WaitingForTasks Waiting For Tasks StarPU provides several advanced functions to wait for termination of tasks. One can wait for some explicit tasks, or for some tag attached to some tasks, or for some data results. starpu_task_wait_array() is a function that waits for an array of tasks to complete their execution. starpu_task_wait_for_all_in_ctx() is a function that waits for all tasks in a specific context to complete their execution. starpu_task_wait_for_n_submitted_in_ctx() is a function that waits for a specified number of tasks to be submitted to a specific context. starpu_task_wait_for_no_ready() is a function that waits for all tasks to become unready, which means that they are either completed or blocked on a data dependency. In order to successfully call these functions to wait for termination of tasks, starpu_task::detach should be set to 0 before task submission. The function starpu_task_nready() returns the number of tasks that are ready to execute, which means that all their data dependencies are satisfied and they are waiting to be scheduled, while the function starpu_task_nsubmitted() returns the number of tasks that have been submitted and not completed yet. The function starpu_task_finished() can be used to determine whether a specific task has completed its execution. starpu_tag_wait() and starpu_tag_wait_array() are two blocking functions that can be used to wait for tasks with specific tags to complete their execution. The former one waits for a specified task to complete while the latter one waits for a group of tasks to complete. When using e.g. starup_task_insert(), it may be more convenient to wait for the result of a task rather than waiting for a given task explicitly. That can be done thanks to starpu_data_acquire() or starpu_data_acquire_cb() that wait for the result to be available in the home node of the data. That will thus wait for all the tasks that lead to that result. One can also use starpu_data_acquire_on_node() and give it ::STARPU_ACQUIRE_NO_NODE to tell to just wait for tasks to complete, but not wait for the data to be available in the home node. One can also use starpu_data_acquire_try() or starpu_data_acquire_on_node_try() to just test for the termination. If a task is created by using starpu_task_create() or starpu_task_insert(), the field starpu_task::destroy is set to 1 by default, which means that the task structure will be automatically freed after termination. On the other hand, if the task is initialized by using starpu_task_init(), the field starpu_task::destroy is set to 0 by default, which means that the task structure will not be freed until starpu_task_destroy() is called explicitly. Otherwise, we can manually set starpu_task::destroy to 1 before submission or call starpu_task_set_destroy() after submission to activate the automatic freeing of the task structure. \section UsingMultipleImplementationsOfACodelet Using Multiple Implementations Of A Codelet One may want to write multiple implementations of a codelet for a single type of device and let StarPU choose which one to run. As an example, we will show how to use SSE to scale a vector. The codelet can be written as follows: \code{.c} #include void scal_sse_func(void *buffers[], void *cl_arg) { float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); unsigned int n_iterations = n/4; if (n % 4 != 0) n_iterations++; __m128 *VECTOR = (__m128*) vector; __m128 factor __attribute__((aligned(16))); factor = _mm_set1_ps(*(float *) cl_arg); unsigned int i; for (i = 0; i < n_iterations; i++) VECTOR[i] = _mm_mul_ps(factor, VECTOR[i]); } \endcode \code{.c} struct starpu_codelet cl = { .cpu_funcs = { scal_cpu_func, scal_sse_func }, .cpu_funcs_name = { "scal_cpu_func", "scal_sse_func" }, .nbuffers = 1, .modes = { STARPU_RW } }; \endcode The full code of this example is available in the file examples/basic_examples/vector_scal.c. Schedulers which are multi-implementation aware (only dmda and pheft for now) will use the performance models of all the provided implementations, and pick the one which seems to be the fastest. \section EnablingImplementationAccordingToCapabilities Enabling Implementation According To Capabilities Some implementations may not run on some devices. For instance, some CUDA devices do not support double floating point precision, and thus the kernel execution would just fail; or the device may not have enough shared memory for the implementation being used. The field starpu_codelet::can_execute permits to express this. For instance: \code{.c} static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) { const struct cudaDeviceProp *props; if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) return 1; /* Cuda device */ props = starpu_cuda_get_device_properties(workerid); if (props->major >= 2 || props->minor >= 3) /* At least compute capability 1.3, supports doubles */ return 1; /* Old card, does not support doubles */ return 0; } struct starpu_codelet cl = { .can_execute = can_execute, .cpu_funcs = { cpu_func }, .cpu_funcs_name = { "cpu_func" }, .cuda_funcs = { gpu_func } .nbuffers = 1, .modes = { STARPU_RW } }; \endcode A full example is available in the file examples/reductions/dot_product.c. This can be essential e.g. when running on a machine which mixes various models of CUDA devices, to take benefit from the new models without crashing on old models. Note: the function starpu_codelet::can_execute is called by the scheduler each time it tries to match a task with a worker, and should thus be very fast. The function starpu_cuda_get_device_properties() provides quick access to CUDA properties of CUDA devices to achieve such efficiency. Another example is to compile CUDA code for various compute capabilities, resulting with two CUDA functions, e.g. scal_gpu_13 for compute capability 1.3, and scal_gpu_20 for compute capability 2.0. Both functions can be provided to StarPU by using starpu_codelet::cuda_funcs, and starpu_codelet::can_execute can then be used to rule out the scal_gpu_20 variant on a CUDA device which will not be able to execute it: \code{.c} static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) { const struct cudaDeviceProp *props; if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) return 1; /* Cuda device */ if (nimpl == 0) /* Trying to execute the 1.3 capability variant, we assume it is ok in all cases. */ return 1; /* Trying to execute the 2.0 capability variant, check that the card can do it. */ props = starpu_cuda_get_device_properties(workerid); if (props->major >= 2 || props->minor >= 0) /* At least compute capability 2.0, can run it */ return 1; /* Old card, does not support 2.0, will not be able to execute the 2.0 variant. */ return 0; } struct starpu_codelet cl = { .can_execute = can_execute, .cpu_funcs = { cpu_func }, .cpu_funcs_name = { "cpu_func" }, .cuda_funcs = { scal_gpu_13, scal_gpu_20 }, .nbuffers = 1, .modes = { STARPU_RW } }; \endcode Another example is having specialized implementations for some given common sizes, for instance here we have a specialized implementation for 1024x1024 matrices: \code{.c} static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) { const struct cudaDeviceProp *props; if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) return 1; /* Cuda device */ switch (nimpl) { case 0: /* Trying to execute the generic capability variant. */ return 1; case 1: { /* Trying to execute the size == 1024 specific variant. */ struct starpu_matrix_interface *interface = starpu_data_get_interface_on_node(task->handles[0]); return STARPU_MATRIX_GET_NX(interface) == 1024 && STARPU_MATRIX_GET_NY(interface == 1024); } } } struct starpu_codelet cl = { .can_execute = can_execute, .cpu_funcs = { cpu_func }, .cpu_funcs_name = { "cpu_func" }, .cuda_funcs = { potrf_gpu_generic, potrf_gpu_1024 }, .nbuffers = 1, .modes = { STARPU_RW } }; \endcode Note that the most generic variant should be provided first, as some schedulers are not able to try the different variants. \section GettingTaskChildren Getting Task Children It may be interesting to get the list of tasks which depend on a given task, notably when using implicit dependencies, since this list is computed by StarPU. starpu_task_get_task_succs() or starpu_task_get_task_scheduled_succs() provides it. For instance: \code{.c} struct starpu_task *tasks[4]; ret = starpu_task_get_task_succs(task, sizeof(tasks)/sizeof(*tasks), tasks); \endcode And the full example of getting task children is available in the file tests/main/get_children_tasks.c \section ParallelTasks Parallel Tasks StarPU can leverage existing parallel computation libraries by the means of parallel tasks. A parallel task is a task which is run by a set of CPUs (called a parallel or combined worker) at the same time, by using an existing parallel CPU implementation of the computation to be achieved. This can also be useful to improve the load balance between slow CPUs and fast GPUs: since CPUs work collectively on a single task, the completion time of tasks on CPUs become comparable to the completion time on GPUs, thus relieving from granularity discrepancy concerns. hwloc support needs to be enabled to get good performance, otherwise StarPU will not know how to better group cores. Two modes of execution exist to accommodate with existing usages. \subsection Fork-modeParallelTasks Fork-mode Parallel Tasks In the Fork mode, StarPU will call the codelet function on one of the CPUs of the combined worker. The codelet function can use starpu_combined_worker_get_size() to get the number of threads it is allowed to start to achieve the computation. The CPU binding mask for the whole set of CPUs is already enforced, so that threads created by the function will inherit the mask, and thus execute where StarPU expected, the OS being in charge of choosing how to schedule threads on the corresponding CPUs. The application can also choose to bind threads by hand, using e.g. sched_getaffinity to know the CPU binding mask that StarPU chose. For instance, using OpenMP (full source is available in examples/openmp/vector_scal.c): \snippet forkmode.c To be included. You should update doxygen if you see this text. Other examples include for instance calling a BLAS parallel CPU implementation (see examples/mult/xgemm.c). \subsection SPMD-modeParallelTasks SPMD-mode Parallel Tasks In the SPMD mode, StarPU will call the codelet function on each CPU of the combined worker. The codelet function can use starpu_combined_worker_get_size() to get the total number of CPUs involved in the combined worker, and thus the number of calls that are made in parallel to the function, and starpu_combined_worker_get_rank() to get the rank of the current CPU within the combined worker. For instance: \code{.c} static void func(void *buffers[], void *args) { unsigned i; float *factor = _args; struct starpu_vector_interface *vector = buffers[0]; unsigned n = STARPU_VECTOR_GET_NX(vector); float *val = (float *)STARPU_VECTOR_GET_PTR(vector); /* Compute slice to compute */ unsigned m = starpu_combined_worker_get_size(); unsigned j = starpu_combined_worker_get_rank(); unsigned slice = (n+m-1)/m; for (i = j * slice; i < (j+1) * slice && i < n; i++) val[i] *= *factor; } static struct starpu_codelet cl = { .modes = { STARPU_RW }, .type = STARPU_SPMD, .max_parallelism = INT_MAX, .cpu_funcs = { func }, .cpu_funcs_name = { "func" }, .nbuffers = 1, } \endcode A full example is available in examples/spmd/vector_scal_spmd.c. Of course, this trivial example will not really benefit from parallel task execution, and was only meant to be simple to understand. The benefit comes when the computation to be done is so that threads have to e.g. exchange intermediate results, or write to the data in a complex but safe way in the same buffer. \subsection ParallelTasksPerformance Parallel Tasks Performance To benefit from parallel tasks, a parallel-task-aware StarPU scheduler has to be used. When exposed to codelets with a flag ::STARPU_FORKJOIN or ::STARPU_SPMD, the schedulers pheft (parallel-heft) and peager (parallel eager) will indeed also try to execute tasks with several CPUs. It will automatically try the various available combined worker sizes (making several measurements for each worker size) and thus be able to avoid choosing a large combined worker if the codelet does not actually scale so much. Examples using parallel-task-aware StarPU scheduler are available in tests/parallel_tasks/parallel_kernels.c and tests/parallel_tasks/parallel_kernels_spmd.c. This is however for now only proof of concept, and has not really been optimized yet. \subsection CombinedWorkers Combined Workers By default, StarPU creates combined workers according to the architecture structure as detected by hwloc. It means that for each object of the hwloc topology (NUMA node, socket, cache, ...) a combined worker will be created. If some nodes of the hierarchy have a big arity (e.g. many cores in a socket without a hierarchy of shared caches), StarPU will create combined workers of intermediate sizes. The variable \ref STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER permits to tune the maximum arity between levels of combined workers. The combined workers actually produced can be seen in the output of the tool starpu_machine_display (the environment variable \ref STARPU_SCHED has to be set to a combined worker-aware scheduler such as pheft or peager). \subsection ConcurrentParallelTasks Concurrent Parallel Tasks Unfortunately, many environments and libraries do not support concurrent calls. For instance, most OpenMP implementations (including the main ones) do not support concurrent pragma omp parallel statements without nesting them in another pragma omp parallel statement, but StarPU does not yet support creating its CPU workers by using such pragma. Other parallel libraries are also not safe when being invoked concurrently from different threads, due to the use of global variables in their sequential sections, for instance. The solution is then to use only one combined worker at a time. This can be done by setting the field starpu_conf::single_combined_worker to 1, or setting the environment variable \ref STARPU_SINGLE_COMBINED_WORKER to 1. StarPU will then run only one parallel task at a time (but other CPU and GPU tasks are not affected and can be run concurrently). The parallel task scheduler will however still try varying combined worker sizes to look for the most efficient ones. A full example is available in examples/spmd/vector_scal_spmd.c. \section SynchronizationTasks Synchronization Tasks For the application convenience, it may be useful to define tasks which do not actually make any computation, but wear for instance dependencies between other tasks or tags, or to be submitted in callbacks, etc. The obvious way is of course to make kernel functions empty, but such task will thus have to wait for a worker to become ready, transfer data, etc. A much lighter way to define a synchronization task is to set its field starpu_task::cl to NULL. The task will thus be a mere synchronization point, without any data access or execution content: as soon as its dependencies become available, it will terminate, call the callbacks, and release dependencies. An intermediate solution is to define a codelet with its field starpu_codelet::where set to \ref STARPU_NOWHERE, for instance: \code{.c} struct starpu_codelet cl = { .where = STARPU_NOWHERE, .nbuffers = 1, .modes = { STARPU_R }, } task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; starpu_task_submit(task); \endcode will create a task which simply waits for the value of handle to be available for read. This task can then be depended on, etc. A full example is available in examples/filters/fmultiple_manual.c. StarPU provides starpu_task_create_sync() to create a new synchronization task, the same as the previous example but without submitting the task. The function starpu_create_sync_task() is also used to create a new synchronization task and submit it, which is a task that waits for specific tags and calls the specified callback function when the task is finished. The function starpu_create_callback_task() can create and submit a synchronization task, which is a task that completes immediately and calls the specified callback function right after. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/bubble.doxy000066400000000000000000000135421507764646700260050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page HierarchicalDAGS Hierarchical DAGS The STF model has the intrinsic limitation of supporting static task graphs only, which leads to potential submission overhead and to a static task graph which is not necessarily adapted for execution on heterogeneous systems. To address these problems, we have extended the STF model to enable tasks subgraphs at runtime. We refer to these tasks as hierarchical tasks. This approach allows for a more dynamic task graph. This allows to dynamically adapt the granularity to meet the optimal size of the targeted computing resource. Hierarchical tasks are tasks that can transform themselves into a new task-graph dynamically at runtime. Programmers submit a coarse version of the DAG, called the bubbles graph, which represents the general shape of the application tasks graph. The execution of this bubble graph will generate and submit the computing tasks of the application. It is up to application programmers to decide how to build the bubble graph (i.e. how to structure the computation tasks graph to create some groups of tasks). Dependencies between bubbles are automatically deduced from dependencies between their computing tasks. //Figure of bubble task graph and computing task graph that maps with it \section BubblesExamples An Example In order to understand the hierarchical tasks model, an example of "bubblification" is showed here. We start from a simple example, multiplying the elements of a vector. \subsection BubblesInitialVersion Initial Version A computation is done several times on a vector split in smaller vectors. For each step and each sub-vector, a task is generated to perform the computation. \code{.c} void func_cpu(void *descr[], void *_args) { (void) _args; int x; int nx = STARPU_VECTOR_GET_NX(descr[0]); TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); for(x=0 ; xbubble/tests/vector/vector.c. To define a hierarchical task, one needs to define the fields starpu_codelet::bubble_func and starpu_codelet::bubble_gen_dag_func. The field starpu_codelet::bubble_func is a pointer function which will be executed by StarPU to decide at runtime if the task must be transformed into a bubble. If the function returns a non-zero value, the function starpu_codelet::bubble_gen_dag_func will be executed to create the new graph of tasks. The pointer functions can also be defined when calling starpu_task_insert() by using the arguments ::STARPU_BUBBLE_FUNC and ::STARPU_BUBBLE_GEN_DAG_FUNC. Both these functions can be passed parameters through the arguments ::STARPU_BUBBLE_FUNC_ARG and ::STARPU_BUBBLE_GEN_DAG_FUNC_ARG When executed, the function starpu_codelet::bubble_func will be given as parameter the task being checked, and the value specified with ::STARPU_BUBBLE_FUNC_ARG. When executed, the function starpu_codelet::bubble_gen_dag_func will be given as parameter the task being turned into a hierarchical task and the value specified with ::STARPU_BUBBLE_GEN_DAG_FUNC_ARG. An example involving these functions is in bubble/tests/basic/brec.c. And more examples are available in bubble/tests/basic/*.c. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/code/000077500000000000000000000000001507764646700245525ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/code/complex.c000066400000000000000000000021551507764646700263700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] #define STARPU_COMPLEX_GET_REAL(interface) (((struct starpu_complex_interface *)(interface))->real) #define STARPU_COMPLEX_GET_IMAGINARY(interface) (((struct starpu_complex_interface *)(interface))->imaginary) #define STARPU_COMPLEX_GET_NX(interface) (((struct starpu_complex_interface *)(interface))->nx) //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/code/disk_compute.c000066400000000000000000000112121507764646700274010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] /* Try to write into disk memory * Use mechanism to push data from main ram to disk ram */ #include #include #include #include #include #include #define NX (1024) int main(int argc, char **argv) { /* Initialize StarPU with default configuration */ int ret = starpu_init(NULL); if (ret == -ENODEV) goto enodev; /* Initialize path and name */ char pid_str[16]; int pid = getpid(); snprintf(pid_str, sizeof(pid_str), "%d", pid); const char *name_file_start = "STARPU_DISK_COMPUTE_DATA_"; const char *name_file_end = "STARPU_DISK_COMPUTE_DATA_RESULT_"; char * path_file_start = malloc(strlen(base) + 1 + strlen(name_file_start) + 1); strcpy(path_file_start, base); strcat(path_file_start, "/"); strcat(path_file_start, name_file_start); char * path_file_end = malloc(strlen(base) + 1 + strlen(name_file_end) + 1); strcpy(path_file_end, base); strcat(path_file_end, "/"); strcat(path_file_end, name_file_end); /* register a disk */ int new_dd = starpu_disk_register(&starpu_disk_unistd_ops, (void *) base, 1024*1024*1); /* can't write on /tmp/ */ if (new_dd == -ENOENT) goto enoent; unsigned dd = (unsigned) new_dd; printf("TEST DISK MEMORY \n"); /* Imagine, you want to compute data */ int *A; int *C; starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); unsigned int j; /* you register them in a vector */ for(j = 0; j < NX; ++j) { A[j] = j; C[j] = 0; } /* you create a file to store the vector ON the disk */ FILE * f = fopen(path_file_start, "wb+"); if (f == NULL) goto enoent2; /* store it in the file */ fwrite(A, sizeof(int), NX, f); /* close the file */ fclose(f); /* create a file to store result */ f = fopen(path_file_end, "wb+"); if (f == NULL) goto enoent2; /* replace all data by 0 */ fwrite(C, sizeof(int), NX, f); /* close the file */ fclose(f); /* And now, you want to use your data in StarPU */ /* Open the file ON the disk */ void * data = starpu_disk_open(dd, (void *) name_file_start, NX*sizeof(int)); void * data_result = starpu_disk_open(dd, (void *) name_file_end, NX*sizeof(int)); starpu_data_handle_t vector_handleA, vector_handleC; /* register vector in starpu */ starpu_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int)); /* and do what you want with it, here we copy it into an other vector */ starpu_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int)); starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL); /* free them */ starpu_data_unregister(vector_handleA); starpu_data_unregister(vector_handleC); /* close them in StarPU */ starpu_disk_close(dd, data, NX*sizeof(int)); starpu_disk_close(dd, data_result, NX*sizeof(int)); /* check results */ f = fopen(path_file_end, "rb+"); if (f == NULL) goto enoent; /* take data */ fread(C, sizeof(int), NX, f); /* close the file */ fclose(f); int try = 1; for (j = 0; j < NX; ++j) if (A[j] != C[j]) { printf("Fail A %d != C %d \n", A[j], C[j]); try = 0; } starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); unlink(path_file_start); unlink(path_file_end); free(path_file_start); free(path_file_end); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); if(try) printf("TEST SUCCESS\n"); else printf("TEST FAIL\n"); return (try ? EXIT_SUCCESS : EXIT_FAILURE); enodev: return 77; enoent2: starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); enoent: unlink(path_file_start); unlink(path_file_end); free(path_file_start); free(path_file_end); starpu_shutdown(); return 77; } //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/code/disk_copy.c000066400000000000000000000076501507764646700267120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] /* Try to write into disk memory * Use mechanism to push data from main ram to disk ram */ #include #include #include #include /* size of one vector */ #define NX (30*1000000/sizeof(double)) #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) int main(int argc, char **argv) { double *A, *F; /* limit main ram to force to push in disk */ setenv("STARPU_LIMIT_CPU_MEM", "160", 1); /* Initialize StarPU with default configuration */ int ret = starpu_init(NULL); if (ret == -ENODEV) goto enodev; /* register a disk */ int new_dd = starpu_disk_register(&starpu_disk_unistd_ops, (void *) "/tmp/", 1024*1024*200); /* can't write on /tmp/ */ if (new_dd == -ENOENT) goto enoent; /* allocate two memory spaces */ starpu_malloc_flags((void **)&A, NX*sizeof(double), STARPU_MALLOC_COUNT); starpu_malloc_flags((void **)&F, NX*sizeof(double), STARPU_MALLOC_COUNT); FPRINTF(stderr, "TEST DISK MEMORY \n"); unsigned int j; /* initialization with bad values */ for(j = 0; j < NX; ++j) { A[j] = j; F[j] = -j; } starpu_data_handle_t vector_handleA, vector_handleB, vector_handleC, vector_handleD, vector_handleE, vector_handleF; /* register vector in starpu */ starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(double)); starpu_vector_data_register(&vector_handleB, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleC, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleD, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleE, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleF, STARPU_MAIN_RAM, (uintptr_t)F, NX, sizeof(double)); /* copy vector A->B, B->C... */ starpu_data_cpy(vector_handleB, vector_handleA, 0, NULL, NULL); starpu_data_cpy(vector_handleC, vector_handleB, 0, NULL, NULL); starpu_data_cpy(vector_handleD, vector_handleC, 0, NULL, NULL); starpu_data_cpy(vector_handleE, vector_handleD, 0, NULL, NULL); starpu_data_cpy(vector_handleF, vector_handleE, 0, NULL, NULL); /* StarPU does not need to manipulate the array anymore so we can stop * monitoring it */ /* free them */ starpu_data_unregister(vector_handleA); starpu_data_unregister(vector_handleB); starpu_data_unregister(vector_handleC); starpu_data_unregister(vector_handleD); starpu_data_unregister(vector_handleE); starpu_data_unregister(vector_handleF); /* check if computation is correct */ int try = 1; for (j = 0; j < NX; ++j) if (A[j] != F[j]) { printf("Fail A %f != F %f \n", A[j], F[j]); try = 0; } /* free last vectors */ starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT); starpu_free_flags(F, NX*sizeof(double), STARPU_MALLOC_COUNT); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); if(try) FPRINTF(stderr, "TEST SUCCESS\n"); else FPRINTF(stderr, "TEST FAIL\n"); return (try ? EXIT_SUCCESS : EXIT_FAILURE); enodev: return 77; enoent: return 77; } //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/code/forkmode.c000066400000000000000000000026641507764646700265340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] void scal_cpu_func(void *buffers[], void *_args) { unsigned i; float *factor = _args; struct starpu_vector_interface *vector = buffers[0]; unsigned n = STARPU_VECTOR_GET_NX(vector); float *val = (float *)STARPU_VECTOR_GET_PTR(vector); #pragma omp parallel for num_threads(starpu_combined_worker_get_size()) for (i = 0; i < n; i++) val[i] *= *factor; } static struct starpu_codelet cl = { .modes = { STARPU_RW }, .where = STARPU_CPU, .type = STARPU_FORKJOIN, .max_parallelism = INT_MAX, .cpu_funcs = {scal_cpu_func}, .cpu_funcs_name = {"scal_cpu_func"}, .nbuffers = 1, }; //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/code/multiformat.c000066400000000000000000000042071507764646700272640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] #define NX 1024 struct point array_of_structs[NX]; starpu_data_handle_t handle; /* * The conversion of a piece of data is itself a task, though it is created, * submitted and destroyed by StarPU internals and not by the user. Therefore, * we have to define two codelets. * Note that for now the conversion from the CPU format to the GPU format has to * be executed on the GPU, and the conversion from the GPU to the CPU has to be * executed on the CPU. */ #ifdef STARPU_USE_OPENCL void cpu_to_opencl_opencl_func(void *buffers[], void *args); struct starpu_codelet cpu_to_opencl_cl = { .where = STARPU_OPENCL, .opencl_funcs = { cpu_to_opencl_opencl_func }, .nbuffers = 1, .modes = { STARPU_RW } }; void opencl_to_cpu_func(void *buffers[], void *args); struct starpu_codelet opencl_to_cpu_cl = { .where = STARPU_CPU, .cpu_funcs = { opencl_to_cpu_func }, .cpu_funcs_name = { "opencl_to_cpu_func" }, .nbuffers = 1, .modes = { STARPU_RW } }; #endif struct starpu_multiformat_data_interface_ops format_ops = { #ifdef STARPU_USE_OPENCL .opencl_elemsize = 2 * sizeof(float), .cpu_to_opencl_cl = &cpu_to_opencl_cl, .opencl_to_cpu_cl = &opencl_to_cpu_cl, #endif .cpu_elemsize = 2 * sizeof(float), ... }; starpu_multiformat_data_register(handle, STARPU_MAIN_RAM, &array_of_structs, NX, &format_ops); //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/code/simgrid.c000066400000000000000000000022321507764646700263530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //! [To be included. You should update doxygen if you see this text.] static struct starpu_codelet cl_potrf = { .cpu_funcs = {chol_cpu_codelet_update_potrf}, .cpu_funcs_name = {"chol_cpu_codelet_update_potrf"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_potrf}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .model = &chol_model_potrf }; //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/cuda_support.doxy000066400000000000000000000026621507764646700272630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page CUDASupport CUDA Support StarPU sets the current CUDA device by calling starpu_cuda_set_device() which takes an integer argument representing the device number, and sets the current device to the specified device number. By setting the current device, applications can select which CUDA device to use for their computations, enabling efficient management of multiple CUDA devices in a system. We can call starpu_cuda_get_nvmldev() to get identifier of the NVML device associated with a given CUDA device. Three macros STARPU_CUDA_REPORT_ERROR(), STARPU_CUBLAS_REPORT_ERROR(), and STARPU_CUSPARSE_REPORT_ERROR() are useful for debugging and troubleshooting, as they provide detailed information about the error that occur during CUDA or CUBLAS execution. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy000066400000000000000000000122371507764646700277250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page DebuggingTools Debugging Tools StarPU provides several tools to help debugging applications. Execution traces can be generated and displayed graphically, see \ref GeneratingTracesWithFxT. \section DebuggingInGeneral TroubleShooting In General Generally-speaking, if you have troubles, pass \ref enable-debug "--enable-debug" to configure to enable some checks which impact performance, but will catch common issues, possibly earlier than the actual problem you are observing, which may just be a consequence of a bug that happened earlier. Also, make sure not to have the \ref enable-fast "--enable-fast" \c configure option, which drops very useful catchup assertions. If your program is valgrind-safe, you can use it, see \ref UsingOtherDebugger. Depending on your toolchain, it might happen that you get undefined reference to `__stack_chk_guard' errors. In that case, use the --disable-fstack-protector-all option to avoid the issue. Then, if your program crashes with an assertion error, a segfault, etc. you can send us the result of \verbatim thread apply all bt \endverbatim run in gdb at the point of the crash. In case your program just hangs, but it may also be useful in case of a crash too, it helps to source gdbinit as described in the next section to be able to run and send us the output of the following commands: \verbatim starpu-workers starpu-tasks starpu-print-requests starpu-print-prequests starpu-print-frrequests starpu-print-irrequests \endverbatim To give us an idea of what is happening within StarPU. If the outputs are not too long, you can even run \verbatim starpu-all-tasks starpu-print-all-tasks starpu-print-datas-summary starpu-print-datas \endverbatim \section UsingGdb Using The Gdb Debugger Some gdb helpers are provided to show the whole StarPU state: \verbatim (gdb) source tools/gdbinit (gdb) help starpu \endverbatim For instance,
  • one can print all tasks with starpu-print-all-tasks,
  • print all data with starpu-print-datas,
  • print all pending data transfers with starpu-print-prequests, starpu-print-requests, starpu-print-frequests, starpu-print-irequests,
  • print pending MPI requests with starpu-mpi-print-detached-requests
Some functions can only work if \ref enable-debug "--enable-debug" was passed to configure (because they impact performance) \section UsingOtherDebugger Using Other Debugging Tools Valgrind can be used on StarPU: valgrind.h just needs to be found at configure time, to tell valgrind about some known false positives and disable host memory pinning. Other known false positives can be suppressed by giving the suppression files in tools/valgrind/*.suppr to valgrind's --suppressions option. The environment variable \ref STARPU_DISABLE_KERNELS can also be set to 1 to make StarPU does everything (schedule tasks, transfer memory, etc.) except actually calling the application-provided kernel functions, i.e. the computation will not happen. This permits to quickly check that the task scheme is working properly. \section WatchdogSupport Watchdog Support starpu_task_watchdog_set_hook() is used to set a callback function "watchdog hook" that will be called when there is no task completed during an expected time. The purpose of the watchdog hook is to allow the application to get the state for debugging. \section UsingTheTemanejoTaskDebugger Using The Temanejo Task Debugger StarPU can connect to Temanejo >= 1.0rc2 (see http://www.hlrs.de/temanejo), to permit nice visual task debugging. To do so, build Temanejo's libayudame.so, install Ayudame.h to e.g. /usr/local/include, apply the tools/patch-ayudame to it to fix C build, re-configure, make sure that it found it, rebuild StarPU. Run the Temanejo GUI, give it the path to your application, any options you want to pass it, the path to libayudame.so. It permits to visualize the task graph, add breakpoints, continue execution task-by-task, and run gdb on a given task, etc. \image html temanejo.png \image latex temanejo.png "" width=\textwidth Make sure to specify at least the same number of CPUs in the dialog box as your machine has, otherwise an error will happen during execution. Future versions of Temanejo should be able to tell StarPU the number of CPUs to use. Tag numbers have to be below 4000000000000000000ULL to be usable for Temanejo (to distinguish them from tasks). */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/extensions_intro.doxy000066400000000000000000000072631507764646700301670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \intropage{IntroExtensions, --------- StarPU Extensions ---------} \webforeword This part explains the advanced concepts of StarPU. It is intended for users whose applications need more than basic task submission. You can learn more knowledge about some important and core concepts in StarPU:
  • After reading Chapter \ref TasksInStarPU, you can get more information about how to manage tasks in StarPU in Chapter \ref AdvancedTasksInStarPU.
  • After reading Chapter \ref DataManagement, you can know more about how to manage the data layout of your applications in Chapter \ref AdvancedDataManagement.
  • After reading Chapter \ref Scheduling, you can get some advanced scheduling policies in StarPU in Chapters \ref AdvancedScheduling, \ref SchedulingContexts and \ref SchedulingContextHypervisor.
  • Chapter \ref HowToDefineANewSchedulingPolicy explains how to define a StarPU task scheduling policy either in a basic monolithic way, or in a modular way.
Other chapters cover some further usages of StarPU.
  • Chapters \ref CUDASupport and \ref OpenCLSupport show how to use GPU devices with CUDA or OpenCL. Chapter \ref MaxFPGASupport explains how StarPU support Field Programmable Gate Array (FPGA) applications exploiting DFE configurations.
  • If you need to store more data than what the main memory (RAM) can store, Chapter \ref OutOfCore presents how to add a new memory node on a disk and how to use it.
  • Chapter \ref MPISupport shows how to integrate MPI processes in StarPU.
  • Chapter \ref TCPIPSupport shows a TCP/IP master slave mechanism which can execute application across many remote cores without thinking about data distribution.
  • Chapter \ref Transactions shows how to cancel a sequence of already submitted tasks based on a just-in-time decision.
  • Chapter \ref FaultTolerance explains how StarPU provide supports for failure of tasks or even failure of complete nodes.
  • Chapter \ref FFTSupport explains how StarPU provides a similar library to both fftw and cufft, but by adding a support from both CPUs and GPUs.
  • Chapter \ref SOCLOpenclExtensions explains how OpenCL applications can transparently be run using StarPU, by givings unified access to every available OpenCL device.
  • We propose a hierarchical tasks model in Chapter \ref HierarchicalDAGS to enable tasks subgraphs at runtime for a more dynamic task graph.
  • You can find how to partition a machine into parallel workers in Chapter \ref ParallelWorker.
  • Chapter \ref InteroperabilitySupport shows how StarPU can coexist with other parallel software elements without resulting in computing core oversubscription or undersubscription.
  • Chapter \ref SimGridSupport shows you how to simulate execution on an arbitrary platform.
  • Tools to help debugging applications are presented in Chapter \ref DebuggingTools.
And finally, chapter \ref Helpers gives a list of StarPU utility functions. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/fault_tolerance.doxy000066400000000000000000000044141507764646700277170ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page FaultTolerance Fault Tolerance \section FaultTolerance_Introduction Introduction Due to e.g. hardware error, some tasks may fail, or even complete nodes may fail. For now, StarPU provides some support for failure of tasks. \section TaskRetry Retrying tasks In case a task implementation notices that it fail to compute properly, it can call starpu_task_failed() to notify StarPU of the failure. tests/fault-tolerance/retry.c is an example of coping with such failure: the principle is that when submitting the task, one sets its prologue callback to starpu_task_ft_prologue(). That prologue will turn the task into a meta task, which will manage the repeated submission of try-tasks to perform the computation until one of the computations succeeds. One can create a try-task for the meta task by using starpu_task_ft_create_retry(). By default, try-tasks will be just retried until one of them succeeds (i.e. the task implementation does not call starpu_task_failed()). One can change the behavior by passing a check_failsafe function as prologue parameter, which will be called at the end of the try-task attempt. It can look at starpu_task_get_current()->failed to determine whether the try-task succeeded, in which case it can call starpu_task_ft_success() on the meta-task to notify success, or if it failed, in which case it can call starpu_task_failsafe_create_retry() to create another try-task, and submit it with starpu_task_submit_nodeps(). This can however only work if the task input is not modified, and is thus not supported for tasks with data access mode ::STARPU_RW. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/fft_support.doxy000066400000000000000000000057321507764646700271270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page FFTSupport FFT Support StarPU provides libstarpufft, a library whose design is very similar to both fftw and cufft, the difference being that it takes benefit from both CPUs and GPUs. It should however be noted that GPUs do not have the same precision as CPUs, so the results may be different by a negligible amount. Different precisions are available, namely \c float, \c double and long double precisions, with the following \c fftw naming conventions:
  • double precision structures and functions are named e.g. starpufft_execute()
  • float precision structures and functions are named e.g. starpufftf_execute()
  • long double precision structures and functions are named e.g. starpufftl_execute()
The documentation below is given with names for double precision, replace starpufft_ with starpufftf_ or starpufftl_ as appropriate. Only complex numbers are supported at the moment. The application has to call starpu_init() before calling starpufft functions. Either main memory pointers or data handles can be provided.
  • To provide main memory pointers, use starpufft_start() or starpufft_execute(). Only one FFT can be performed at a time, because StarPU will have to register the data on the fly. In the starpufft_start() case, starpufft_cleanup() needs to be called to unregister the data.
  • To provide data handles (which is preferable), use starpufft_start_handle() (preferred) or starpufft_execute_handle(). Several FFTs tasks can be submitted for a given plan, which permits e.g. to start a series of FFT with just one plan. starpufft_start_handle() is preferable since it does not wait for the task completion, and thus permits to enqueue a series of tasks.
All functions are defined in \ref API_FFT_Support. Some examples illustrating the usage of FFT API are available in the directory starpufft/tests. \section FFTCompilation Compilation The flags required to compile or link against the FFT library are accessible with the following commands: \verbatim $ pkg-config --cflags starpufft-1.4 # options for the compiler $ pkg-config --libs starpufft-1.4 # options for the linker \endverbatim Also pass the option --static if the application is to be linked statically. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/helpers.doxy000066400000000000000000000032301507764646700262050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page Helpers Helpers StarPU provides several utilities functions to help programmers:
  • starpu_conf_noworker() sets configuration fields so that no worker is enabled, i.e. it sets starpu_conf::ncpus to 0, starpu_conf::ncuda to 0, etc.
  • starpu_is_initialized() returns a value indicating whether StarPU is already initialized, starpu_wait_initialized() only returns when the initialization is finished.
  • starpu_topology_print() prints the current topology of the system, and is therefore useful for debugging purposes or for understanding the underlying architecture of the system.
  • starpu_get_version() returns the version of StarPU used when running the application.
  • starpu_sleep() and starpu_usleep() allow the application to pause the execution of the current thread for a specified amount of time. starpu_sleep() pauses the thread for a specified number of seconds and starpu_usleep() for a specified number of microseconds.
*/ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/interoperability.doxy000066400000000000000000000145431507764646700301410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page InteroperabilitySupport Interoperability Support In situations where multiple parallel software elements have to coexist within the same application, uncoordinated accesses to computing units may lead such parallel software elements to collide and interfere. The purpose of the Interoperability routines of StarPU, implemented along the definition of the Resource Management APIs of Project H2020 INTERTWinE, is to enable StarPU to coexist with other parallel software elements without resulting in computing core oversubscription or undersubscription. These routines allow the programmer to dynamically control the computing resources allocated to StarPU, to add or remove processor cores and/or accelerator devices from the pool of resources used by StarPU's workers to execute tasks. They also allow multiple libraries and applicative codes using StarPU simultaneously to select distinct sets of resources independently. Internally, the Interoperability Support is built on top of Scheduling Contexts (see \ref SchedulingContexts). \section ResourceManagement StarPU Resource Management The \c starpurm module is a library built on top of the \c starpu library. It exposes a series of routines prefixed with \c starpurm_ defining the resource management API. All functions are defined in \ref API_Interop_Support. \subsection Build Linking a program with the starpurm module The \c starpurm module must be linked explicitly with the applicative executable using it. Example Makefiles in the starpurm/dev/ subdirectories show how to do so. If the \c pkg-config command is available and the \c PKG_CONFIG_PATH environment variable is properly positioned, the proper settings may be obtained with the following \c Makefile snippet: \code{Makefile} CFLAGS += $(shell pkg-config --cflags starpurm-1.4) LDFLAGS+= $(shell pkg-config --libs-only-L starpurm-1.4) LDLIBS += $(shell pkg-config --libs-only-l starpurm-1.4) \endcode \subsection InitExit Initialization and Shutdown The \c starpurm module is initialized with a call to starpurm_initialize() and must be finalized with a call to starpurm_shutdown(). The basic example is available in starpurm/tests/01_init_exit.c. The \c starpurm module supports CPU cores as well as devices. An integer ID is assigned to each supported device type. The ID assigned to a given device type can be queried with the starpurm_get_device_type_id() routine, which currently expects one of the following strings as argument and returns the corresponding ID:
  • "cpu"
  • "opencl"
  • "cuda"
The \c cpu pseudo device type is defined for convenience and designates CPU cores. The number of units of each type available for computation can be obtained with a call to starpurm_get_nb_devices_by_type(). Each CPU core unit available for computation is designated by its rank among the StarPU CPU worker threads and by its own CPUSET bit. Each non-CPU device unit can be designated both by its rank number in the type, and by the CPUSET bit corresponding to its StarPU device worker thread. The CPUSET of a computing unit or its associated worker can be obtained from its type ID and rank with starpurm_get_device_worker_cpuset(), which returns the corresponding HWLOC CPUSET. An example is available in starpurm/tests/02_list_units.c. \subsection DefCTX Default Context The \c starpurm module assumes a default, global context, manipulated through a series of routines allowing to assign and withdraw computing units from the main StarPU context. Assigning CPU cores can be done with starpurm_assign_cpu_to_starpu() and starpurm_assign_cpu_mask_to_starpu(), and assigning device units can be done with starpurm_assign_device_to_starpu() and starpurm_assign_device_mask_to_starpu(). Conversely, withdrawing CPU cores can be done with starpurm_withdraw_cpu_from_starpu() and starpurm_withdraw_cpu_mask_from_starpu(), and withdrawing device units can be done with starpurm_withdraw_device_from_starpu() and starpurm_withdraw_device_mask_from_starpu(). These routine should typically be used to control resource usage for the main applicative code. An example is available in starpurm/examples/block_test/block_test.c. \subsection TmpCTXS Temporary Contexts Besides the default, global context, \c starpurm can create temporary contexts and launch the computation of kernels confined to these temporary contexts. The routine starpurm_spawn_kernel_on_cpus() can be used to do so: it allocates a temporary context and spawns a kernel within this context. The temporary context is subsequently freed upon completion of the kernel. The temporary context is set as the default context for the kernel throughout its lifespan. This routine should typically be used to control resource usage for a parallel kernel, handled by an external library built on StarPU. Internally, it relies on the use of starpu_sched_ctx_set_context() to set the temporary context as the default context for the parallel kernel, and then restore the main context upon completion. Note: the maximum number of temporary contexts allocated concurrently at any time should not exceed ::STARPU_NMAX_SCHED_CTXS-2, otherwise, the call to starpurm_spawn_kernel_on_cpus() may block until a temporary context becomes available. The routine starpurm_spawn_kernel_on_cpus() returns upon the completion of the parallel kernel. An example is available in starpurm/examples/spawn.c. An asynchronous variant is available with the routine starpurm_spawn_kernel_on_cpus_callback(). This variant returns immediately, however it accepts a callback function, which is subsequently called to notify the calling code about the completion of the parallel kernel. An example is available in starpurm/examples/async_spawn.c. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy000066400000000000000000000253141507764646700301300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page MaxFPGASupport Maxeler FPGA Support \section Introduction Introduction Maxeler provides hardware and software solutions for accelerating computing applications on dataflow engines (DFEs). DFEs are in-house designed accelerators that encapsulate reconfigurable high-end FPGAs at their core and are equipped with large amounts of DDR memory. We extend the StarPU task programming library that initially targets heterogeneous architectures to support Field Programmable Gate Array (FPGA). To create StarPU/FPGA applications exploiting DFE configurations, MaxCompiler allows an application to be split into three parts: - Kernel, which implements the computational components of the application in hardware. - Manager configuration, which connects Kernels to the CPU, engine RAM, other Kernels and other DFEs via MaxRing. - CPU application, which interacts with the DFEs to read and write data to the Kernels and engine RAM. The Simple Live CPU interface (SLiC) is Maxeler’s application programming interface for seamless CPU-DFE integration. SLiC allows CPU applications to configure and load a number of DFEs as well as to subsequently schedule and run actions on those DFEs using simple function calls. In StarPU/FPGA applications, we use Dynamic SLiC Interface to exchange data streams between the CPU (Main Memory) and DFE (Local Memory). \section PortingApplicationsToMaxFPGA Porting Applications to Maxeler FPGA The way to port an application to FPGA is to set the field starpu_codelet::max_fpga_funcs, to provide StarPU with the function for FPGA implementation, so for instance: \verbatim struct starpu_codelet cl = { .max_fpga_funcs = {myfunc}, .nbuffers = 1, } \endverbatim A basic example is available in the file tests/maxfpga/max_fpga_basic_static.c. \subsection MaxFPGAExample StarPU/Maxeler FPGA Application To give you an idea of the interface that we used to exchange data between host (CPU) and FPGA (DFE), here is an example, based on one of the examples of Maxeler (https://trac.version.fz-juelich.de/reconfigurable/wiki/Public). StreamFMAKernel.maxj represents the Java kernel code; it implements a very simple kernel (c=a+b), and Test.c starts it from the fpga_add function; it first sets streaming up from the CPU pointers, triggers execution and waits for the result. The API to interact with DFEs is called SLiC which then also involves the MaxelerOS runtime. - StreamFMAKernel.maxj: the DFE part is described in the MaxJ programming language, which is a Java-based metaprogramming approach. \code{.java} package tests; import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v2.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEType; import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEVar; class StreamFMAKernel extends Kernel { private static final DFEType type = dfeInt(32); protected StreamFMAKernel(KernelParameters parameters) { super(parameters); DFEVar a = io.input("a", type); DFEVar b = io.input("b", type); DFEVar c; c = a+b; io.output("output", c, type); } } \endcode - StreamFMAManager.maxj: is also described in the MaxJ programming language and orchestrates data movement between the host and the DFE. \code{.java} package tests; import com.maxeler.maxcompiler.v2.build.EngineParameters; import com.maxeler.maxcompiler.v2.managers.custom.blocks.KernelBlock; import com.maxeler.platform.max5.manager.Max5LimaManager; class StreamFMAManager extends Max5LimaManager { private static final String kernel_name = "StreamFMAKernel"; public StreamFMAManager(EngineParameters arg0) { super(arg0); KernelBlock kernel = addKernel(new StreamFMAKernel(makeKernelParameters(kernel_name))); kernel.getInput("a") <== addStreamFromCPU("a"); kernel.getInput("b") <== addStreamFromCPU("b"); addStreamToCPU("output") <== kernel.getOutput("output"); } public static void main(String[] args) { StreamFMAManager manager = new StreamFMAManager(new EngineParameters(args)); manager.build(); } } \endcode Once StreamFMAKernel.maxj and StreamFMAManager.maxj are written, there are other steps to do: - Building the JAVA program: (for Kernel and Manager (.maxj)) \verbatim $ maxjc -1.7 -cp $MAXCLASSPATH streamfma/ \endverbatim - Running the Java program to generate a DFE implementation (a .max file) that can be called from a StarPU/FPGA application and slic headers (.h) for simulation: \verbatim $ java -XX:+UseSerialGC -Xmx2048m -cp $MAXCLASSPATH:. streamfma.StreamFMAManager DFEModel=MAIA maxFileName=StreamFMA target=DFE_SIM \endverbatim - Build the slic object file (simulation): \verbatim $ sliccompile StreamFMA.max \endverbatim - Test.c : to interface StarPU task-based runtime system with Maxeler's DFE devices, we use the advanced dynamic interface of SLiC in non_blocking mode. Test code must include MaxSLiCInterface.h and MaxFile.h. The .max file contains the bitstream. The StarPU/FPGA application can be written in C, C++, etc. Some examples are available in the directory tests/maxfpga. \code{.c} #include "StreamFMA.h" #include "MaxSLiCInterface.h" void fpga_add(void *buffers[], void *cl_arg) { (void)cl_arg; int *a = (int*) STARPU_VECTOR_GET_PTR(buffers[0]); int *b = (int*) STARPU_VECTOR_GET_PTR(buffers[1]); int *c = (int*) STARPU_VECTOR_GET_PTR(buffers[2]); int size = STARPU_VECTOR_GET_NX(buffers[0]); /* actions to run on an engine */ max_actions_t *act = max_actions_init(maxfile, NULL); /* set the number of ticks for a kernel */ max_set_ticks (act, "StreamFMAKernel", size); /* send input streams */ max_queue_input(act, "a", a, size *sizeof(a[0])); max_queue_input(act, "b", b, size*sizeof(b[0])); /* store output stream */ max_queue_output(act,"output", c, size*sizeof(c[0])); /* run actions on the engine */ printf("**** Run actions in non blocking mode **** \n"); /* run actions in non_blocking mode */ max_run_t *run0= max_run_nonblock(engine, act); printf("*** wait for the actions on DFE to complete *** \n"); max_wait(run0); } static struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, .max_fpga_funcs = {fpga_add}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W} }; int main(int argc, char **argv) { ... /* Implementation of a maxfile */ max_file_t *maxfile = StreamFMA_init(); /* Implementation of an engine */ max_engine_t *engine = max_load(maxfile, "*"); starpu_init(NULL); ... Task submission etc. ... starpu_shutdown(); /* deallocate the set of actions */ max_actions_free(act); /* unload and deallocate an engine obtained by way of max_load */ max_unload(engine); return 0; } \endcode To write the StarPU/FPGA application: first, the programmer must describe the codelet using StarPU’s C API. This codelet provides both a CPU implementation and an FPGA one. It also specifies that the task has two inputs and one output through the starpu_codelet::nbuffers and starpu_codelet::modes attributes. fpga_add function is the name of the FPGA implementation and is mainly divided in four steps: - Init actions to be run on DFE. - Add data to an input stream for an action. - Add data storage space for an output stream. - Run actions on DFE in non_blocking mode; a non-blocking call returns immediately, allowing the calling code to do more CPU work in parallel while the actions are run. - Wait for the actions to complete. In the main function, there are four important steps: - Implement a maxfile. - Load a DFE. - Free actions. - Unload and deallocate the DFE. The rest of the application (data registration, task submission, etc.) is as usual with StarPU. The design load can also be delegated to StarPU by specifying an array of load specifications in starpu_conf::max_fpga_load, and use starpu_max_fpga_get_local_engine() to access the loaded max engines. Complete examples are available in tests/fpga/*.c \subsection MaxFPGADataTransfers Data Transfers in StarPU/Maxeler FPGA Applications The communication between the host and the DFE is done through the Dynamic advance interface to exchange data between the main memory and the local memory of the DFE. For the moment, we use \ref STARPU_MAIN_RAM to send and store data to/from DFE's local memory. However, we aim to use a multiplexer to choose which memory node we will use to read/write data. So, users can tell that the computational kernel will take data from the main memory or DFE's local memory, for example. In StarPU applications, when \ref starpu_codelet::specific_nodes is set to 1, this specifies the memory nodes where each data should be sent to for task execution. \subsection MaxFPGAConfiguration Maxeler FPGA Configuration To configure StarPU with Maxeler FPGA accelerators, make sure that the slic-config is available from your PATH environment variable. \subsection MaxFPGALaunchingprograms Launching Programs: Simulation Maxeler provides a simple tutorial to use MaxCompiler (https://trac.version.fz-juelich.de/reconfigurable/wiki/Public). Running the Java program to generate maxfile and slic headers (hardware) on Maxeler's DFE device, takes a VERY long time, approx. 2 hours even for this very small example. That's why we use the simulation. - To start the simulation on Maxeler's DFE device: \verbatim $ maxcompilersim -c LIMA -n StreamFMA restart \endverbatim - To run the binary (simulation) \verbatim $ export LD_LIBRARY_PATH=$MAXELEROSDIR/lib:$LD_LIBRARY_PATH $ export SLIC_CONF="use_simulation=StreamFMA" \endverbatim - To force tasks to be scheduled on the FPGA, one can disable the use of CPU cores by setting the \ref STARPU_NCPU environment variable to 0. \verbatim $ STARPU_NCPU=0 ./StreamFMA \endverbatim - To stop the simulation \verbatim $ maxcompilersim -c LIMA -n StreamFMA stop \endverbatim */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/mpi_support.doxy000066400000000000000000001560761507764646700271450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page MPISupport MPI Support The integration of MPI transfers within task parallelism is done in a very natural way by the means of asynchronous interactions between the application and StarPU. This is implemented in a separate libstarpumpi library which basically provides "StarPU" equivalents of MPI_* functions, where void * buffers are replaced with ::starpu_data_handle_t, and all GPU-RAM-NIC transfers are handled efficiently by StarPU-MPI. Users have to use the usual mpirun command of the MPI implementation to start StarPU on the different MPI nodes. An MPI Insert Task function provides an even more seamless transition to a distributed application, by automatically issuing all required data transfers according to the task graph and an application-provided distribution. Some source codes are available in the directory mpi/. \section MPIBuild Building with MPI support If a mpicc compiler is already in your PATH, StarPU will automatically enable MPI support in the build. If mpicc is not in PATH, you can specify its location by passing --with-mpicc=/where/there/is/mpicc to ./configure It can be useful to enable MPI tests during make check by passing --enable-mpi-check to ./configure. And similarly to mpicc, if mpiexec in not in PATH, you can specify its location by passing --with-mpiexec=/where/there/is/mpiexec to ./configure, but this is not needed if it is next to mpicc, configure will look there in addition to PATH. Similarly, Fortran examples use mpif90, which can be specified manually with --with-mpifort if it can't be found automatically. If users want to run several MPI processes by machine (e.g. one per NUMA node), \ref STARPU_WORKERS_GETBIND needs to be left to its default value 1 to make StarPU take into account the binding set by the MPI launcher (otherwise each StarPU instance would try to bind on all cores of the machine...) However, depending on the architecture of your machine, one may end up with StarPU-MPI nodes not having any CPU workers. If a node only gets 1 CPU, it will be bound to the MPI thread, and none will be left to start a CPU worker. One can check that with the following commands. \verbatim $ mpirun -np 2 starpu_machine_display --worker CPU --count --notopology 1 CPU worker 1 CPU worker $ mpirun -np 4 starpu_machine_display --worker CPU --count --notopology 4 CPU workers 4 CPU workers 4 CPU workers 4 CPU workers $ mpirun --bind-to socket -np 2 starpu_machine_display --worker CPU --count --notopology 4 CPU workers 4 CPU workers $ STARPU_WORKERS_GETBIND=0 mpirun -np 4 starpu_machine_display --worker CPU --count --notopology 4 CPU workers 4 CPU workers 4 CPU workers 4 CPU workers $ STARPU_WORKERS_GETBIND=0 mpirun -np 2 starpu_machine_display --worker CPU --count --notopology 4 CPU workers 4 CPU workers \endverbatim or with \c hwloc \verbatim mpirun --bind-to socket -np 2 hwloc-ls --restrict binding --no-io mpirun -np 2 hwloc-ls --restrict binding --no-io \endverbatim \section ExampleDocumentation Example Used In This Documentation The example below will be used as the base for this documentation. It initializes a token on node 0, and the token is passed from node to node, incremented by one on each step. The code is not using StarPU yet. \code{.c} for (loop = 0; loop < nloops; loop++) { int tag = loop*size + rank; if (loop == 0 && rank == 0) { token = 0; fprintf(stdout, "Start with token value %d\n", token); } else { MPI_Recv(&token, 1, MPI_INT, (rank+size-1)%size, tag, MPI_COMM_WORLD); } token++; if (loop == last_loop && rank == last_rank) { fprintf(stdout, "Finished: token value %d\n", token); } else { MPI_Send(&token, 1, MPI_INT, (rank+1)%size, tag+1, MPI_COMM_WORLD); } } \endcode \section NotUsingMPISupport About Not Using The MPI Support Although StarPU provides MPI support, the application programmer may want to keep his MPI communications as they are for a start, and only delegate task execution to StarPU. This is possible by just using starpu_data_acquire(), for instance: \code{.c} for (loop = 0; loop < nloops; loop++) { int tag = loop*size + rank; /* Acquire the data to be able to write to it */ starpu_data_acquire(token_handle, STARPU_W); if (loop == 0 && rank == 0) { token = 0; fprintf(stdout, "Start with token value %d\n", token); } else { MPI_Recv(&token, 1, MPI_INT, (rank+size-1)%size, tag, MPI_COMM_WORLD); } starpu_data_release(token_handle); /* Task delegation to StarPU to increment the token. The execution might * be performed on a CPU, a GPU, etc. */ increment_token(); /* Acquire the update data to be able to read from it */ starpu_data_acquire(token_handle, STARPU_R); if (loop == last_loop && rank == last_rank) { fprintf(stdout, "Finished: token value %d\n", token); } else { MPI_Send(&token, 1, MPI_INT, (rank+1)%size, tag+1, MPI_COMM_WORLD); } starpu_data_release(token_handle); } \endcode In that case, libstarpumpi is not needed. One can also use MPI_Isend() and MPI_Irecv(), by calling starpu_data_release() after MPI_Wait() or MPI_Test() have notified completion. It is however better to use libstarpumpi, to save the application from having to synchronize with starpu_data_acquire(), and instead just submit all tasks and communications asynchronously, and wait for the overall completion. \section SimpleExample Simple Example The flags required to compile or link against the MPI layer are accessible with the following commands: \verbatim $ pkg-config --cflags starpumpi-1.4 # options for the compiler $ pkg-config --libs starpumpi-1.4 # options for the linker \endverbatim \code{.c} void increment_token(void) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = token_handle; starpu_task_submit(task); } int main(int argc, char **argv) { int rank, size; starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(unsigned)); unsigned nloops = NITER; unsigned loop; unsigned last_loop = nloops - 1; unsigned last_rank = size - 1; for (loop = 0; loop < nloops; loop++) { int tag = loop*size + rank; if (loop == 0 && rank == 0) { starpu_data_acquire(token_handle, STARPU_W); token = 0; fprintf(stdout, "Start with token value %d\n", token); starpu_data_release(token_handle); } else { starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL); } increment_token(); if (loop == last_loop && rank == last_rank) { starpu_data_acquire(token_handle, STARPU_R); fprintf(stdout, "Finished: token value %d\n", token); starpu_data_release(token_handle); } else { starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL); } } starpu_task_wait_for_all(); starpu_mpi_shutdown(); if (rank == last_rank) { fprintf(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); STARPU_ASSERT(token == nloops*size); } \endcode We have here replaced MPI_Recv() and MPI_Send() with starpu_mpi_irecv_detached() and starpu_mpi_isend_detached(), which just submit the communication to be performed. The implicit sequential consistency dependencies provide synchronization between MPI reception and emission and the corresponding tasks. The only remaining synchronization with starpu_data_acquire() is at the beginning and the end. The full source code is available in the file mpi/tests/ring.c. \section MPIInitialization How to Initialize StarPU-MPI As seen in the previous example, one has to call starpu_mpi_init_conf() to initialize StarPU-MPI. The third parameter of the function indicates if MPI should be initialized by StarPU, or if the application did it itself. If the application initializes MPI itself, it must call MPI_Init_thread() with MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE, since StarPU-MPI uses a separate thread to perform the communications. MPI_THREAD_MULTIPLE is necessary if the application also performs some MPI communications, or if STARPU_MPI_THREAD_MULTIPLE_SEND is set to non-zero. \section PointToPointCommunication Point To Point Communication The standard point to point communications of MPI have been implemented. The semantic is similar to the MPI one, but adapted to the DSM provided by StarPU. An MPI request will only be submitted when the data is available in the main memory of the node submitting the request. There are two types of asynchronous communications: the classic asynchronous communications and the detached communications. The classic asynchronous communications (starpu_mpi_isend() and starpu_mpi_irecv()) need to be followed by a call to starpu_mpi_wait() or to starpu_mpi_test() to wait for or to test the completion of the communication. As shown in the example mpi/tests/async_ring.c. Waiting for or testing the completion of detached communications is not possible, this is done internally by StarPU-MPI, on completion, the resources are automatically released. This mechanism is similar to the pthread detach state attribute, which determines whether a thread will be created in a joinable or a detached state. For send communications, data is acquired with the mode ::STARPU_R. When using the \c configure option \ref enable-mpi-pedantic-isend "--enable-mpi-pedantic-isend", the mode ::STARPU_RW is used to make sure there is no more than 1 concurrent \c MPI_Isend() call accessing a data and StarPU does not read from it from tasks during the communication. Internally, all communication are divided in 2 communications, a first message is used to exchange an envelope describing the data (i.e. its tag and its size), the data itself is sent in a second message. All MPI communications submitted by StarPU uses a unique tag, which has a default value. This value can be accessed with the function starpu_mpi_get_communication_tag() and changed with the function starpu_mpi_set_communication_tag(). The matching of tags with corresponding requests is done within StarPU-MPI. For any userland communication, the call of the corresponding function (e.g. starpu_mpi_isend()) will result in the creation of a StarPU-MPI request, the function starpu_data_acquire_cb() is then called to asynchronously request StarPU to fetch the data in main memory; when the data is ready and the corresponding buffer has already been received by MPI, it will be copied in the memory of the data, otherwise the request is stored in the early requests list. Sending requests are stored in the ready requests list. While requests need to be processed, the StarPU-MPI progression thread does the following:
  1. it polls the ready requests list. For all the ready requests, the appropriate function is called to post the corresponding MPI call. For example, an initial call to starpu_mpi_isend() will result in a call to MPI_Isend(). If the request is marked as detached, the request will then be added to the detached requests list.
  2. it posts an MPI_Irecv() to retrieve a data envelope.
  3. it polls the detached requests list. For all the detached requests, it tests its completion of the MPI request by calling MPI_Test(). On completion, the data handle is released, and if a callback was defined, it is called.
  4. finally, it checks if a data envelope has been received. If so, if the data envelope matches a request in the early requests list (i.e. the request has already been posted by the application), the corresponding MPI call is posted (similarly to the first step above). If the data envelope does not match any application request, a temporary handle is created to receive the data, a StarPU-MPI request is created and added into the ready requests list, and thus will be processed in the first step of the next loop.
To prevent putting too much pressure on the MPI library, only a limited number of requests are emitted concurrently. This behavior can be tuned with the environment variable \ref STARPU_MPI_NDETACHED_SEND. In the same fashion, the progression thread will poll for termination of existing requests after submitting a defined number of requests. This behavior can be tuned with the environment variable \ref STARPU_MPI_NREADY_PROCESS. The function starpu_mpi_issend() allows to perform a synchronous-mode, non-blocking send of a data. It can also be specified when using starpu_mpi_task_insert() with the parameter ::STARPU_SSEND. \ref MPIPtpCommunication gives the list of all the point to point communications defined in StarPU-MPI. \section ExchangingUserDefinedDataInterface Exchanging User Defined Data Interface New data interfaces defined as explained in \ref DefiningANewDataInterface can also be used within StarPU-MPI and exchanged between nodes. Two functions needs to be defined through the type starpu_data_interface_ops. The function starpu_data_interface_ops::pack_data takes a handle and returns a contiguous memory buffer allocated with \code{.c} starpu_malloc_flags(ptr, size, 0) \endcode along with its size, where data to be conveyed to another node should be copied. \code{.c} static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); *count = complex_get_size(handle); *ptr = starpu_malloc_on_node_flags(node, *count, 0); memcpy(*ptr, complex_interface->real, complex_interface->nx*sizeof(double)); memcpy(*ptr+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double)); return 0; } \endcode The inverse operation is implemented in the function starpu_data_interface_ops::unpack_data which takes a contiguous memory buffer and recreates the data handle. \code{.c} static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); memcpy(complex_interface->real, ptr, complex_interface->nx*sizeof(double)); memcpy(complex_interface->imaginary, ptr+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double)); starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); return 0; } \endcode And the starpu_data_interface_ops::peek_data operation does the same, but without freeing the buffer. Of course, one can implement starpu_data_interface_ops::unpack_data as merely calling starpu_data_interface_ops::peek_data and do the free: \code{.c} static int complex_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); STARPU_ASSERT(count == complex_get_size(handle)); struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); memcpy(complex_interface->real, ptr, complex_interface->nx*sizeof(double)); memcpy(complex_interface->imaginary, ptr+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double)); return 0; } \endcode \code{.c} static struct starpu_data_interface_ops interface_complex_ops = { ... .pack_data = complex_pack_data, .peek_data = complex_peek_data .unpack_data = complex_unpack_data }; \endcode Instead of defining pack and unpack operations, users may want to attach an MPI type to their user-defined data interface. The function starpu_mpi_interface_datatype_register() allows doing so. This function takes 3 parameters: the interface ID for which the MPI datatype is going to be defined, a function's pointer that will create the MPI datatype, and a function's pointer that will free the MPI datatype. If for some data an MPI datatype can not be built (e.g. complex data structure), the creation function can return -1, StarPU-MPI will then fallback to using pack/unpack. The functions to create and free the MPI datatype are defined and registered as follows. \code{.c} void starpu_complex_interface_datatype_allocate(starpu_data_handle_t handle, MPI_Datatype *mpi_datatype) { int ret; int blocklengths[2]; MPI_Aint displacements[2]; MPI_Datatype types[2] = {MPI_DOUBLE, MPI_DOUBLE}; struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); MPI_Get_address(complex_interface, displacements); MPI_Get_address(&complex_interface->imaginary, displacements+1); displacements[1] -= displacements[0]; displacements[0] = 0; blocklengths[0] = complex_interface->nx; blocklengths[1] = complex_interface->nx; ret = MPI_Type_create_struct(2, blocklengths, displacements, types, mpi_datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); ret = MPI_Type_commit(mpi_datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); } void starpu_complex_interface_datatype_free(MPI_Datatype *mpi_datatype) { MPI_Type_free(mpi_datatype); } static struct starpu_data_interface_ops interface_complex_ops = { ... }; interface_complex_ops.interfaceid = starpu_data_interface_get_next_id(); starpu_mpi_interface_datatype_register(interface_complex_ops.interfaceid, starpu_complex_interface_datatype_allocate, starpu_complex_interface_datatype_free); starpu_data_interface handle; starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2); ... \endcode An example is provided in the file mpi/examples/user_datatype/my_interface.c. It is also possible to use starpu_mpi_datatype_register() to register the functions through a handle rather than the interface ID, but note that in that case it is important to make sure no communication is going to occur before the function starpu_mpi_datatype_register() is called. This would otherwise produce an undefined result as the data may be received before the function is called, and so the MPI datatype would not be known by the StarPU-MPI communication engine, and the data would be processed with the pack and unpack operations. One would thus need to synchronize all nodes: \code{.c} starpu_data_interface handle; starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2); starpu_mpi_datatype_register(handle, starpu_complex_interface_datatype_allocate, starpu_complex_interface_datatype_free); starpu_mpi_barrier(MPI_COMM_WORLD); \endcode \section MPIInsertTaskUtility MPI Insert Task Utility To save the programmer from having to specify all communications, StarPU provides an "MPI Insert Task Utility". The principle is that the application decides a distribution of the data over the MPI nodes by allocating it and notifying StarPU of this decision, i.e. tell StarPU which MPI node "owns" which data. It also decides, for each handle, an MPI tag which will be used to exchange the content of the handle. All MPI nodes then process the whole task graph, and StarPU automatically determines which node actually execute which task, and trigger the required MPI transfers. The list of functions is described in \ref MPIInsertTask. Here is an stencil example showing how to use starpu_mpi_task_insert(). One first needs to define a distribution function which specifies the locality of the data. Note that the data needs to be registered to MPI by calling starpu_mpi_data_register(). This function allows setting the distribution information and the MPI tag which should be used when communicating the data. It also allows to automatically clear the MPI communication cache when unregistering the data. A basic example is in the file mpi/tests/insert_task.c. \code{.c} /* Returns the MPI node number where data is */ int my_distrib(int x, int y, int nb_nodes) { /* Block distrib */ return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes; // /* Other examples useful for other kinds of computations */ // /* / distrib */ // return (x+y) % nb_nodes; // /* Block cyclic distrib */ // unsigned side = sqrt(nb_nodes); // return x % side + (y % side) * size; } \endcode Now the data can be registered within StarPU. Data which are not owned but will be needed for computations can be registered through the lazy allocation mechanism, i.e. with a home_node set to -1. StarPU will automatically allocate the memory when it is used for the first time. One can note an optimization here (the else if test): we only register data which will be needed by the tasks that we will execute. \code{.c} unsigned matrix[X][Y]; starpu_data_handle_t data_handles[X][Y]; for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib(x, y, size); if (mpi_rank == my_rank) /* Owning data */ starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[x][y]), sizeof(unsigned)); else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) /* I don't own this index, but will need it for my computations */ starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned)); else /* I know it's useless to allocate anything for this */ data_handles[x][y] = NULL; if (data_handles[x][y]) { starpu_mpi_data_register(data_handles[x][y], x*X+y, mpi_rank); } } } \endcode Now starpu_mpi_task_insert() can be called for the different steps of the application. \code{.c} for(loop=0 ; loopmpi/examples/stencil/stencil5.c. I.e. all MPI nodes process the whole task graph, but as mentioned above, for each task, only the MPI node which owns the data being written to (here, data_handles[x][y]) will actually run the task. The other MPI nodes will automatically send the required data. To tune the placement of tasks among MPI nodes, one can use ::STARPU_EXECUTE_ON_NODE or ::STARPU_EXECUTE_ON_DATA to specify an explicit node (an example can be found in mpi/tests/insert_task_node_choice.c), or the node of a given data (e.g. one of the parameters), or use starpu_mpi_node_selection_register_policy() and ::STARPU_NODE_SELECTION_POLICY to provide a dynamic policy (an example can be found in mpi/tests/policy_register.c). The default policy is to execute the task on the node which owns a data that require write access; if the task requires several data handles with write access, the node executing the task is selected in order to minimize the amount of data to transfer between nodes. A function starpu_mpi_task_build() is also provided with the aim to only construct the task structure. All MPI nodes need to call the function, which posts the required send/recv on the various nodes as needed. Only the node which is to execute the task will then return a valid task structure, others will return NULL. This node must submit the task. All nodes then need to call the function starpu_mpi_task_post_build() -- with the same list of arguments as starpu_mpi_task_build() -- to post all the necessary data communications meant to happen after the task execution. \code{.c} struct starpu_task *task; task = starpu_mpi_task_build(MPI_COMM_WORLD, &cl, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], 0); if (task) starpu_task_submit(task); starpu_mpi_task_post_build(MPI_COMM_WORLD, &cl, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], 0); \endcode A full source code using these functions is available in the file mpi/tests/insert_task_compute.c. It is also possible to create and submit the task outside of StarPU-MPI functions and call the functions starpu_mpi_task_exchange_data_before_execution() and starpu_mpi_task_exchange_data_after_execution() to exchange data as required by the data ownership's nodes. \code{.c} struct starpu_mpi_task_exchange_params params; struct starpu_data_descr descrs[2]; struct starpu_task *task; task = starpu_task_create(); task->cl = &mycodelet; task->handles[0] = data_handles[0]; task->handles[1] = data_handles[1]; starpu_mpi_task_exchange_data_before_execution(MPI_COMM_WORLD, task, descrs, ¶ms); if (params.do_execute) starpu_task_submit(task); starpu_mpi_task_exchange_data_after_execution(MPI_COMM_WORLD, descrs, 2, params); \endcode A full source code using these functions is available in the file mpi/tests/mpi_task_submit.c. If many data handles must be registered with unique tag ids, or if multiple applications are concurrently submitting tasks to StarPU, it is then difficult to keep the uniqueness of the tags for each piece of data. StarPU provides a tag management system to allocate/free a unique range of tags when registering the data to prevent conflict from one application to another. The previous code then becomes: \code{.c} unsigned matrix[X][Y]; starpu_data_handle_t data_handles[X][Y]; int64_t mintag = starpu_mpi_tags_allocate(X*Y); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { ... if (data_handles[x][y]) { starpu_mpi_data_register(data_handles[x][y], mintag + y*Y+x, mpi_rank); } } } \endcode Then, when all these pieces of data have been unregistered, you may free the range of tags by calling: \code{.c} starpu_mpi_tags_free(mintag); \endcode where mintag was the value returned by starpu_mpi_tags_allocate(). Note that both these functions should be called by all nodes involved in the computations in the exact same order and with the same parameters to keep the tags synchronized between all nodes. Also note that StarPU will not check if a tag given to starpu_mpi_data_register() has been previously registered, this functionality only aims to prevent different parts of an application to use the same data tags. \section MPITaskUtility Other MPI Utility Functions Similarly to the function starpu_data_cpy(), the function starpu_mpi_data_cpy() can be used to transfer a data between 2 nodes. It behaves as starpu_data_cpy() if both data are owned by the same node, otherwise a transfer is initiated between the nodes. A priority and a callback function can be defined. \code{.c} ... starpu_mpi_data_register(src_handle, 12, 0); // Data is owned by node0 starpu_mpi_data_register(dst_handle, 42, 1); // Data is owned by node1 ... // Send data from node0 to node1 starpu_mpi_data_cpy(dst_handle, src_handle, MPI_COMM_WORLD, 0, callback, NULL); \endcode \section MPIInsertPruning Pruning MPI Task Insertion Making all MPI nodes process the whole graph can be a concern with a growing number of nodes. To avoid this, the application can prune the task for loops according to the data distribution, to only submit tasks on nodes which have to care about them (either to execute them, or to send the required data). A way to do some of this quite easily can be to just add an if like this: \code{.c} for(loop=0 ; loopexamples/stencil/implicit-stencil-tasks.c. If the my_distrib function can be inlined by the compiler, the latter can improve the test. If the size can be made a compile-time constant, the compiler can considerably improve the test further. If the distribution function is not too complex and the compiler is very good, the latter can even optimize the for loops, thus dramatically reducing the cost of task submission. To estimate quickly how long task submission takes, and notably how much pruning saves, a quick and easy way is to measure the submission time of just one of the MPI nodes. This can be achieved by running the application on just one MPI node with the following environment variables: \code{.sh} export STARPU_DISABLE_KERNELS=1 export STARPU_MPI_FAKE_RANK=2 export STARPU_MPI_FAKE_SIZE=1024 \endcode Here we have disabled the kernel function call to skip the actual computation time and only keep submission time, and we have asked StarPU to fake running on MPI node 2 out of 1024 nodes. \section MPITemporaryData Temporary Data To be able to use starpu_mpi_task_insert(), one has to call starpu_mpi_data_register(), so that StarPU-MPI can know what it needs to do for each data. Parameters of starpu_mpi_data_register() are normally the same on all nodes for a given data, so that all nodes agree on which node owns the data, and which tag is used to transfer its value. It can however be useful to register e.g. some temporary data on just one node, without having to register a dumb handle on all nodes, while only one node will actually need to know about it. In this case, nodes which will not need the data can just pass \c NULL to starpu_mpi_task_insert(): \code{.c} starpu_data_handle_t data0 = NULL; if (rank == 0) { starpu_variable_data_register(&data0, STARPU_MAIN_RAM, (uintptr_t) &val0, sizeof(val0)); starpu_mpi_data_register(data0, 0, rank); } starpu_mpi_task_insert(MPI_COMM_WORLD, &cl, STARPU_W, data0, 0); /* Executes on node 0 */ \endcode Here, nodes whose rank is not \c 0 will simply not take care of the data, and consider it to be on another node. This can be mixed various way, for instance here node \c 1 determines that it does not have to care about \c data0, but knows that it should send the value of its \c data1 to node \c 0, which owns data and thus will need the value of \c data1 to execute the task: \code{.c} starpu_data_handle_t data0 = NULL, data1, data; if (rank == 0) { starpu_variable_data_register(&data0, STARPU_MAIN_RAM, (uintptr_t) &val0, sizeof(val0)); starpu_mpi_data_register(data0, -1, rank); starpu_variable_data_register(&data1, -1, 0, sizeof(val1)); starpu_variable_data_register(&data, STARPU_MAIN_RAM, (uintptr_t) &val, sizeof(val)); } else if (rank == 1) { starpu_variable_data_register(&data1, STARPU_MAIN_RAM, (uintptr_t) &val1, sizeof(val1)); starpu_variable_data_register(&data, -1, 0, sizeof(val)); } starpu_mpi_data_register(data, 42, 0); starpu_mpi_data_register(data1, 43, 1); starpu_mpi_task_insert(MPI_COMM_WORLD, &cl, STARPU_W, data, STARPU_R, data0, STARPU_R, data1, 0); /* Executes on node 0 */ \endcode The full source code is available in the file mpi/tests/temporary.c. \section MPIPerNodeData Per-node Data Further than temporary data on just one node, one may want per-node data, to e.g. replicate some computation because that is less expensive than communicating the value over MPI: \code{.c} starpu_data_handle pernode, data0, data1; starpu_variable_data_register(&pernode, -1, 0, sizeof(val)); starpu_mpi_data_register(pernode, -1, STARPU_MPI_PER_NODE); /* Normal data: one on node0, one on node1 */ if (rank == 0) { starpu_variable_data_register(&data0, STARPU_MAIN_RAM, (uintptr_t) &val0, sizeof(val0)); starpu_variable_data_register(&data1, -1, 0, sizeof(val1)); } else if (rank == 1) { starpu_variable_data_register(&data0, -1, 0, sizeof(val1)); starpu_variable_data_register(&data1, STARPU_MAIN_RAM, (uintptr_t) &val1, sizeof(val1)); } starpu_mpi_data_register(data0, 42, 0); starpu_mpi_data_register(data1, 43, 1); starpu_mpi_task_insert(MPI_COMM_WORLD, &cl, STARPU_W, pernode, 0); /* Will be replicated on all nodes */ starpu_mpi_task_insert(MPI_COMM_WORLD, &cl2, STARPU_RW, data0, STARPU_R, pernode); /* Will execute on node 0, using its own pernode*/ starpu_mpi_task_insert(MPI_COMM_WORLD, &cl2, STARPU_RW, data1, STARPU_R, pernode); /* Will execute on node 1, using its own pernode*/ \endcode One can turn a normal data into per-node data, by first broadcasting it to all nodes: \code{.c} starpu_data_handle data; starpu_variable_data_register(&data, -1, 0, sizeof(val)); starpu_mpi_data_register(data, 42, 0); /* Compute some value */ starpu_mpi_task_insert(MPI_COMM_WORLD, &cl, STARPU_W, data, 0); /* Node 0 computes it */ /* Get it on all nodes */ starpu_mpi_get_data_on_all_nodes_detached(MPI_COMM_WORLD, data); /* And turn it per-node */ starpu_mpi_data_set_rank(data, STARPU_MPI_PER_NODE); \endcode The data can then be used just like per-node above. The full source code is available in the file mpi/tests/temporary.c. \section MPIMpiRedux Inter-node reduction One might want to leverage a reduction pattern across several nodes. Using ::STARPU_REDUX (see \ref DataReduction), one can obtain such patterns where each core on contributing nodes spawns their own copy to work with. In the case that the required reductions are too numerous and expensive, the access mode ::STARPU_MPI_REDUX tells StarPU to spawn only one contribution per contributing node. The setup and use of ::STARPU_MPI_REDUX is similar to ::STARPU_REDUX : the initialization and reduction codelets should be declared through starpu_data_set_reduction_methods() in the same fashion as ::STARPU_REDUX. Example mpi/examples/mpi_redux/mpi_redux.c shows how to use the ::STARPU_MPI_REDUX mode and compare it with the standard ::STARPU_REDUX. The function starpu_mpi_redux_data() is automatically called either when a task reading the reduced handle is inserted through the MPI layer of StarPU through starpu_mpi_insert_task() or when users wait for all communications and tasks to be executed through starpu_mpi_wait_for_all(). The function can be called by users to fine-tune arguments such as the priority of the reduction tasks. Tasks contributing to the inter-node reduction should be registered as accessing the contribution through ::STARPU_RW|::STARPU_COMMUTE mode, as for the ::STARPU_REDUX mode, as in the following example. \code{.c} static struct starpu_codelet contrib_cl = { .cpu_funcs = {cpu_contrib}, /* cpu implementation(s) of the routine */ .nbuffers = 1, /* number of data handles referenced by this routine */ .modes = {STARPU_RW | STARPU_COMMUTE} /* access modes for the contribution */ .name = "contribution" }; \endcode When inserting these tasks, the access mode handed out to the StarPU-MPI layer should be \c STARPU_MPI_REDUX. If a task uses a \c data owned by node 0 and is executed on the node 1, it can be inserted as in the following example. \code{.c} starpu_mpi_task_insert(MPI_COMM_WORLD, &contrib_cl, STARPU_MPI_REDUX, data, STARPU_EXECUTE_ON_NODE, 1); /* Node 1 computes it */ \endcode Note that if the specified node is set to \c -1, the option is ignored. More examples are available at \c mpi/examples/mpi_redux/mpi_redux.c and \c mpi/examples/mpi_redux/mpi_redux_tree.c. \section MPIPriorities Priorities All send functions have a _prio variant which takes an additional priority parameter, which allows making StarPU-MPI change the order of MPI requests before submitting them to MPI. The default priority is \c 0. When using the starpu_mpi_task_insert() helper, ::STARPU_PRIORITY defines both the task priority and the MPI requests priority. An example is available in the file mpi/examples/benchs/recv_wait_finalize_bench.c. To test how much MPI priorities have a good effect on performance, you can set the environment variable \ref STARPU_MPI_PRIORITIES to \c 0 to disable the use of priorities in StarPU-MPI. \section MPICache MPI Cache Support StarPU-MPI automatically optimizes duplicate data transmissions: if an MPI node \c B needs a piece of data \c D from MPI node \c A for several tasks, only one transmission of \c D will take place from \c A to \c B, and the value of \c D will be kept on \c B as long as no task modifies \c D. If a task modifies \c D, \c B will wait for all tasks which need the previous value of \c D, before invalidating the value of \c D. As a consequence, it releases the memory occupied by \c D. Whenever a task running on \c B needs the new value of \c D, allocation will take place again to receive it. Since tasks can be submitted dynamically, StarPU-MPI can not know whether the current value of data \c D will again be used by a newly-submitted task before being modified by another newly-submitted task, so until a task is submitted to modify the current value, it can not decide by itself whether to flush the cache or not. The application can however explicitly tell StarPU-MPI to flush the cache by calling starpu_mpi_cache_flush() or starpu_mpi_cache_flush_all_data(), for instance in case the data will not be used at all anymore (see for instance the cholesky example in mpi/examples/matrix_decomposition), or at least not in the close future. If a newly-submitted task actually needs the value again, another transmission of \c D will be initiated from \c A to \c B. A mere starpu_mpi_cache_flush_all_data() can for instance be added at the end of the whole algorithm, to express that no data will be reused after this (or at least that it is not interesting to keep them in cache). It may however be interesting to add fine-graph starpu_mpi_cache_flush() calls during the algorithm; the effect for the data deallocation will be the same, but it will additionally release some pressure from the StarPU-MPI cache hash table during task submission. One can determine whether a piece of data is cached with starpu_mpi_cached_receive() and starpu_mpi_cached_send(). An example is available in the file mpi/examples/cache/cache.c. Functions starpu_mpi_cached_receive_set() and starpu_mpi_cached_send_set() are automatically called by starpu_mpi_task_insert() but can also be called directly by the application. Functions starpu_mpi_cached_send_clear() and starpu_mpi_cached_receive_clear() must be called to clear data from the cache. They are also automatically called when using starpu_mpi_task_insert(). The whole caching behavior can be disabled thanks to the \ref STARPU_MPI_CACHE environment variable. The variable \ref STARPU_MPI_CACHE_STATS can be set to 1 to enable the runtime to display messages when data are added or removed from the cache holding the received data. \section MPIMigration MPI Data Migration The application can dynamically change its mind about the data distribution, to balance the load over MPI nodes, for instance. This can be done very simply by requesting an explicit move and then change the registered rank. For instance, we here switch to a new distribution function my_distrib2: we first register any data which wasn't registered already and will be needed, then migrate the data, and register the new location. \code{.c} for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib2(x, y, size); if (!data_handles[x][y] && (mpi_rank == my_rank || my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size))) /* Register newly-needed data */ starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned)); if (data_handles[x][y]) { /* Migrate the data */ starpu_mpi_data_migrate(MPI_COMM_WORLD, data_handles[x][y], mpi_rank); } } } \endcode The full example is available in the file mpi/examples/stencil/stencil5.c. From then on, further tasks submissions will use the new data distribution, which will thus change both MPI communications and task assignments. Very importantly, since all nodes have to agree on which node owns which data to determine MPI communications and task assignments the same way, all nodes have to perform the same data migration, and at the same point among task submissions. It thus does not require a strict synchronization, just a clear separation of task submissions before and after the data redistribution. Before data unregistration, it has to be migrated back to its original home node (the value, at least), since that is where the user-provided buffer resides. Otherwise, the unregistration will complain that it does not have the latest value on the original home node. \code{.c} for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { if (data_handles[x][y]) { int mpi_rank = my_distrib(x, y, size); /* Get back data to original place where the user-provided buffer is. */ starpu_mpi_data_migrate(MPI_COMM_WORLD, data_handles[x][y], mpi_rank); /* And unregister it */ starpu_data_unregister(data_handles[x][y]); } } } \endcode \section MPICollective MPI Collective Operations The functions are described in \ref MPICollectiveOperations. \code{.c} if (rank == root) { /* Allocate the vector */ vector = malloc(nblocks * sizeof(float *)); for(x=0 ; xmpi/tests/mpi_scatter_gather.c. With NewMadeleine (see \ref Nmad), broadcasts can automatically be detected and be optimized by using routing trees. This behavior can be controlled with the environment variable \ref STARPU_MPI_COOP_SENDS. See the corresponding [paper](https://hal.inria.fr/hal-02872765) for more information. Other collective operations would be easy to define, just ask starpu-devel for them! \section MPIDriver Make StarPU-MPI Progression Thread Execute Tasks The default behavior of StarPU-MPI is to spawn an MPI thread to take care only of MPI communications in an active fashion (i.e. the StarPU-MPI thread sleeps only when there are no active request submitted by the application), with the goal of being as reactive as possible to communications. Knowing that, users usually leave one free core for the MPI thread when starting a distributed execution with StarPU-MPI. However, this could result in a loss of performance for applications that does not require an extreme reactivity to MPI communications. The starpu_mpi_init_conf() routine allows users to give the starpu_conf configuration structure of StarPU (usually given to the starpu_init() routine) to StarPU-MPI, so that StarPU-MPI reserves for its own use one of the CPU drivers of the current computing node, or one of the CPU cores, and then calls starpu_init() internally. This allows the MPI communication thread to call a StarPU CPU driver to run tasks when there is no active requests to take care of, and thus recover the computational power of the "lost" core. Since there is a trade-off between executing tasks and polling MPI requests, which is how much the application wants to lose in reactivity to MPI communications to get back the computing power of the core dedicated to the StarPU-MPI thread, there are two environment variables to pilot the behavior of the MPI thread so that users can tune this trade-off depending on the behavior of the application. The \ref STARPU_MPI_DRIVER_CALL_FREQUENCY environment variable sets how many times the MPI progression thread goes through the MPI_Test() loop on each active communication request (and thus try to make communications progress by going into the MPI layer) before executing tasks. The default value for this environment variable is 0, which means that the support for interleaving task execution and communication polling is deactivated, thus returning the MPI progression thread to its original behavior. The \ref STARPU_MPI_DRIVER_TASK_FREQUENCY environment variable sets how many tasks are executed by the MPI communication thread before checking all active requests again. While this environment variable allows a better use of the core dedicated to StarPU-MPI for computations, it also decreases the reactivity of the MPI communication thread as much. \section MPIDebug Debugging MPI Communication trace will be enabled when the environment variable \ref STARPU_MPI_COMM is set to \c 1, and StarPU has been configured with the option \ref enable-verbose "--enable-verbose". Statistics will be enabled for the communication cache when the environment variable \ref STARPU_MPI_CACHE_STATS is set to \c 1. It prints messages on the standard output when data are added or removed from the received communication cache. When the environment variable \ref STARPU_MPI_STATS is set to \c 1, StarPU will display at the end of the execution for each node the volume and the bandwidth of data sent to all the other nodes. Communication statistics can also be enabled and disabled from the application by calling the functions starpu_mpi_comm_stats_enable() and starpu_mpi_comm_stats_disable(). If communication statistics have been enabled, calling the function starpu_mpi_comm_stats_retrieve() will give the amount of communications between the calling node and all the other nodes. Communication statistics will also be automatically displayed at the end of the execution, as examplified below. \verbatim [starpu_comm_stats][3] TOTAL: 476.000000 B 0.000454 MB 0.000098 B/s 0.000000 MB/s [starpu_comm_stats][3:0] 248.000000 B 0.000237 MB 0.000051 B/s 0.000000 MB/s [starpu_comm_stats][3:2] 50.000000 B 0.000217 MB 0.000047 B/s 0.000000 MB/s [starpu_comm_stats][2] TOTAL: 288.000000 B 0.000275 MB 0.000059 B/s 0.000000 MB/s [starpu_comm_stats][2:1] 70.000000 B 0.000103 MB 0.000022 B/s 0.000000 MB/s [starpu_comm_stats][2:3] 288.000000 B 0.000172 MB 0.000037 B/s 0.000000 MB/s [starpu_comm_stats][1] TOTAL: 188.000000 B 0.000179 MB 0.000038 B/s 0.000000 MB/s [starpu_comm_stats][1:0] 80.000000 B 0.000114 MB 0.000025 B/s 0.000000 MB/s [starpu_comm_stats][1:2] 188.000000 B 0.000065 MB 0.000014 B/s 0.000000 MB/s [starpu_comm_stats][0] TOTAL: 376.000000 B 0.000359 MB 0.000077 B/s 0.000000 MB/s [starpu_comm_stats][0:1] 376.000000 B 0.000141 MB 0.000030 B/s 0.000000 MB/s [starpu_comm_stats][0:3] 10.000000 B 0.000217 MB 0.000047 B/s 0.000000 MB/s \endverbatim These statistics can be plotted as heatmaps using the StarPU tool starpu_mpi_comm_matrix.py, this will produce 2 PDF files, one plot for the bandwidth, and one plot for the data volume. \image latex trace_bw_heatmap.png "Bandwidth Heatmap" width=0.5\textwidth \image html trace_bw_heatmap.png "Bandwidth Heatmap" \image latex trace_volume_heatmap.png "Data Volume Heatmap" width=0.5\textwidth \image html trace_volume_heatmap.png "Data Bandwidth Heatmap" \section MPIExamples More MPI examples MPI examples are available in the StarPU source code in mpi/examples:
  • comm shows how to use communicators with StarPU-MPI
  • complex is a simple example using a user-define data interface over MPI (complex numbers),
  • stencil5 is a simple stencil example using starpu_mpi_task_insert(),
  • matrix_decomposition is a cholesky decomposition example using starpu_mpi_task_insert(). The non-distributed version can check for
  • mpi_lu is an LU decomposition example, provided in three versions: plu_example uses explicit MPI data transfers, plu_implicit_example uses implicit MPI data transfers, plu_outofcore_example uses implicit MPI data transfers and supports data matrices which do not fit in memory (out-of-core).
\section Nmad Using the NewMadeleine communication library NewMadeleine (see https://pm2.gitlabpages.inria.fr/newmadeleine/, part of the PM2 project) is an optimizing communication library for high-performance networks. NewMadeleine provides its own interface, but also an MPI interface (called MadMPI). Thus, there are two possibilities to use NewMadeleine with StarPU:
  • using the NewMadeleine's native interface. StarPU supports this interface from its release 1.3.0, by enabling the \c configure option \ref enable-nmad "--enable-nmad". In this case, StarPU relies directly on NewMadeleine to make communications progress and NewMadeleine has to be built with the profile pukabi+madmpi.conf.
  • using the NewMadeleine's MPI interface (MadMPI). StarPU will use the standard MPI API and NewMadeleine will handle the calls to the MPI API. In this case, StarPU makes communications progress and thus communication progress has to be disabled in NewMadeleine by compiling it with the profile pukabi+madmpi-mini.conf.
To build NewMadeleine, download the latest version from the website (or, better, use the Git version to use the most recent version), then: \code{.sh} cd pm2/scripts ./pm2-build-packages ./ --prefix= \endcode With Guix, the NewMadeleine's native interface can be used by setting the parameter \c \-\-with-input=openmpi=nmad and MadMPI can be used with \c \-\-with-input=openmpi=nmad-mini. Whatever implementation (NewMadeleine or MadMPI) is used by StarPU, the public MPI interface of StarPU (described in \ref API_MPI_Support) is the same. \section MPIMasterSlave MPI Master Slave Support StarPU provides another way to execute applications across many nodes. The Master Slave support permits to use remote cores without thinking about data distribution. This support can be activated with the \c configure option \ref enable-mpi-master-slave "--enable-mpi-master-slave". However, you should not activate both MPI support and MPI Master-Slave support. The existing kernels for CPU devices can be used as such. They only have to be exposed through the name of the function in the \ref starpu_codelet::cpu_funcs_name field. Functions have to be globally-visible (i.e. not static) for StarPU to be able to look them up, and -rdynamic must be passed to gcc (or -export-dynamic to ld) so that symbols of the main program are visible. By default, one core is dedicated on the master node to manage the entire set of slaves. If the implementation of MPI you are using has a good multiple threads support, you can set the \ref STARPU_MPI_MS_MULTIPLE_THREAD environment variable to 1 to dedicate one core per slave. Choosing the number of cores on each slave device is done by setting the environment variable \ref STARPU_NMPIMSTHREADS "STARPU_NMPIMSTHREADS=\" with \ being the requested number of cores. By default, all the slave's cores are used. Setting the number of slaves nodes is done by changing the -np parameter when executing the application with mpirun or mpiexec. The master node is by default the node with the MPI rank equal to 0. To select another node, use the environment variable \ref STARPU_MPI_MASTER_NODE "STARPU_MPI_MASTER_NODE=\" with \ being the requested MPI rank node. A simple example tests/main/insert_task.c can be used to test the MPI master slave support. \section MPICheckpoint MPI Checkpoint Support StarPU provides an experimental checkpoint mechanism. It is for now only a proof of concept to see what the checkpointing cost is, since the restart part has not been integrated yet. To enable checkpointing, you should use the \c configure option \ref enable-mpi-ft "--enable-mpi-ft". The application in the directory \c mpi/examples/matrix_decomposition shows how to enable checkpoints. The API documentation is available in \ref API_MPI_FT_Support Statistics can also be enabled with the \c configure option \ref enable-mpi-ft-stats "--enable-mpi-ft-stats". */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/opencl_support.doxy000066400000000000000000000054441507764646700276300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page OpenCLSupport OpenCL Support StarPU provides several functions for managing OpenCL programs and kernels. starpu_opencl_load_program_source() and starpu_opencl_load_program_source_malloc() load the OpenCL program source from a file, but the latter one also allocates buffer for the program source. starpu_opencl_compile_opencl_from_file() and starpu_opencl_compile_opencl_from_string() are used to compile an OpenCL kernel from a source file or a string respectively. starpu_opencl_load_binary_opencl() is used to compile the binary OpenCL kernel. An example is available in examples/binary/binary.c. starpu_opencl_load_opencl_from_file() and starpu_opencl_load_opencl_from_string() are used to compile an OpenCL source code from a file or a string respectively. starpu_opencl_unload_opencl() is used to unload an OpenCL compiled program or kernel from memory. starpu_opencl_load_opencl() is used to create an OpenCL kernel for specified device. starpu_opencl_release_kernel() is used to release the specified OpenCL kernel. An example illustrating the usage of OpenCL support is available in examples/basic_examples/vector_scal_opencl.c. For managing OpenCL contexts, devices, and command queues, there are several functions: starpu_opencl_get_context(), starpu_opencl_get_device() and starpu_opencl_get_queue() are used to retrieve the OpenCL context, device and command queue associated with a given device number respectively. starpu_opencl_get_current_context() and starpu_opencl_get_current_queue() are used to retrieve the OpenCL context or command queue of the current worker that is being used by the calling thread. We can call starpu_opencl_set_kernel_args() to set the arguments for an OpenCL kernel. Examples are available in examples/filters/custom_mf/. Two functions are useful for debugging and error reporting in OpenCL applications. starpu_opencl_error_string() takes an OpenCL error code as an argument and returns a string containing a description of the error. starpu_opencl_display_error() takes an OpenCL error code as an argument and prints the corresponding error message to the standard error stream. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/out_of_core.doxy000066400000000000000000000230651507764646700270560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page OutOfCore Out Of Core \section OutOfCore_Introduction Introduction When using StarPU, one may need to store more data than what the main memory (RAM) can store. This part describes the method to add a new memory node on a disk and to use it. Similarly to what happens with GPUs (it's actually exactly the same code), when available main memory becomes scarce, StarPU will evict unused data to the disk, thus leaving room for new allocations. Whenever some evicted data is needed again for a task, StarPU will automatically fetch it back from the disk. The principle is that one first registers a disk memory node with a set of functions to manipulate data by calling starpu_disk_register(), and then registers a disk location, seen by StarPU as a void*, which can be for instance a Unix path for the \c stdio, \c unistd or \c unistd_o_direct backends, or a leveldb database for the \c leveldb backend, an HDF5 file path for the \c HDF5 backend, etc. The \c disk backend opens this place with the plug() method. StarPU can then start using it to allocate room and store data there with the disk write method, without user intervention. Users can also use starpu_disk_open() to explicitly open an object within the disk, e.g. a file name in the \c stdio or \c unistd cases, or a database key in the \c leveldb case, and then use starpu_*_register functions to turn it into a StarPU data handle. StarPU will then use this file as an external source of data, and automatically read and write data as appropriate. In the end use starpu_disk_close() to close an existing object. In any case, users also need to set \ref STARPU_LIMIT_CPU_MEM to the amount of data that StarPU will be allowed to afford. By default, StarPU will use the machine memory size, but part of it is taken by the kernel, the system, daemons, and the application's own allocated data, whose size can not be predicted. That is why users need to specify what StarPU can afford. Some Out-of-core tests are worth giving a read, see tests/disk/*.c \section UseANewDiskMemory Use a new disk memory To use a disk memory node, you have to register it with this function: \code{.c} int new_dd = starpu_disk_register(&starpu_disk_unistd_ops, (void *) "/tmp/", 1024*1024*200); \endcode Here, we use the \c unistd library to realize the read/write operations, i.e. \c fread/\c fwrite. This structure must have a path where to store files, as well as the maximum size the software can afford to store on the disk. Don't forget to check if the result is correct! This can also be achieved by just setting environment variables \ref STARPU_DISK_SWAP, \ref STARPU_DISK_SWAP_BACKEND and \ref STARPU_DISK_SWAP_SIZE : \verbatim export STARPU_DISK_SWAP=/tmp export STARPU_DISK_SWAP_BACKEND=unistd export STARPU_DISK_SWAP_SIZE=200 \endverbatim The backend can be set to \c stdio (some caching is done by \c libc and the kernel), \c unistd (only caching in the kernel), \c unistd_o_direct (no caching), \c leveldb, or \c hdf5. It is important to understand that when the backend is not set to \c unistd_o_direct, some caching will occur at the kernel level (the page cache), which will also consume memory... \ref STARPU_LIMIT_CPU_MEM might need to be set to less than half of the machine memory just to leave room for the kernel's page cache, otherwise the kernel will struggle to get memory. Using \c unistd_o_direct avoids this caching, thus allowing to set \ref STARPU_LIMIT_CPU_MEM to the machine memory size (minus some memory for normal kernel operations, system daemons, and application data). When the register call is made, StarPU will benchmark the disk. This can take some time. Warning: the size thus has to be at least \ref STARPU_DISK_SIZE_MIN bytes ! StarPU will then automatically try to evict unused data to this new disk. One can also use the standard StarPU memory node API to prefetch data etc., see the \ref API_Standard_Memory_Library and the \ref API_Data_Interfaces. The disk is unregistered during the execution of starpu_shutdown(). \section OOCDataRegistration Data Registration StarPU will only be able to achieve Out-Of-Core eviction if it controls memory allocation. For instance, if the application does the following: \code{.c} p = malloc(1024*1024*sizeof(float)); fill_with_data(p); starpu_matrix_data_register(&h, STARPU_MAIN_RAM, (uintptr_t) p, 1024, 1024, 1024, sizeof(float)); \endcode StarPU will not be able to release the corresponding memory since it's the application which allocated it, and StarPU can not know how, and thus how to release it. One thus have to use the following instead: \code{.c} starpu_matrix_data_register(&h, -1, NULL, 1024, 1024, 1024, sizeof(float)); starpu_task_insert(cl_fill_with_data, STARPU_W, h, 0); \endcode Which makes StarPU automatically do the allocation when the task running cl_fill_with_data gets executed. And then if it needs to, it will be able to release it after having pushed the data to the disk. Since no initial buffer is provided to starpu_matrix_data_register(), the handle does not have any initial value right after this call, and thus the very first task using the handle needs to use the ::STARPU_W mode like above, ::STARPU_R or ::STARPU_RW would not make sense. By default, StarPU will try to push any data handle to the disk. To specify whether a given handle should be pushed to the disk, starpu_data_set_ooc_flag() should be used. To get to know whether a given handle should be pushed to the disk, starpu_data_get_ooc_flag() should be used. \section OOCWontUse Using Wont Use By default, StarPU uses a Least-Recently-Used (LRU) algorithm to determine which data should be evicted to the disk. This algorithm can be hinted by telling which data will not be used in the coming future thanks to starpu_data_wont_use(), for instance: \code{.c} starpu_task_insert(&cl_work, STARPU_RW, h, 0); starpu_data_wont_use(h); \endcode StarPU will mark the data as "inactive" and tend to evict to the disk that data rather than others. \section ExampleDiskCopy Examples: disk_copy \snippet disk_copy.c To be included. You should update doxygen if you see this text. The full code is provided in the file tests/disk/disk_copy.c \section ExampleDiskCompute Examples: disk_compute \snippet disk_compute.c To be included. You should update doxygen if you see this text. The full code is provided in the file tests/disk/disk_compute.c \section Performances Scheduling heuristics for Out-of-core are still relatively experimental. The tricky part is that you usually have to find a compromise between privileging locality (which avoids back and forth with the disk) and privileging the critical path, i.e. taking into account priorities to avoid lack of parallelism at the end of the task graph. It is notably better to avoid defining different priorities to tasks with low priority, since that will make the scheduler want to schedule them by levels of priority, at the expense of locality. The scheduling algorithms worth trying are thus dmdar and lws, which privilege data locality over priorities. There will be work on this area in the coming future. \section FeedBackFigures Feedback Figures Beyond pure performance feedback, some figures are interesting to have a look at. Using export STARPU_BUS_STATS=1 (\ref STARPU_BUS_STATS and \ref STARPU_BUS_STATS_FILE to define a filename in which to display statistics, by default the standard error stream is used) gives an overview of the data transfers which were needed. The values can also be obtained at runtime by using starpu_bus_get_profiling_info(). An example can be read in src/profiling/profiling_helpers.c. \verbatim #--------------------- Data transfer speed for /tmp/sthibault-disk-DJzhAj (node 1): 0 -> 1: 99 MB/s 1 -> 0: 99 MB/s 0 -> 1: 23858 µs 1 -> 0: 23858 µs #--------------------- TEST DISK MEMORY #--------------------- Data transfer stats: Disk 0 -> NUMA 0 0.0000 GB 0.0000 MB/s (transfers : 0 - avg -nan MB) NUMA 0 -> Disk 0 0.0625 GB 63.6816 MB/s (transfers : 2 - avg 32.0000 MB) Total transfers: 0.0625 GB #--------------------- \endverbatim Using export STARPU_ENABLE_STATS=1 gives information for each memory node on data miss/hit and allocation miss/hit. \verbatim #--------------------- MSI cache stats : memory node NUMA 0 hit : 32 (66.67 %) miss : 16 (33.33 %) memory node Disk 0 hit : 0 (0.00 %) miss : 0 (0.00 %) #--------------------- #--------------------- Allocation cache stats: memory node NUMA 0 total alloc : 16 cached alloc: 0 (0.00 %) memory node Disk 0 total alloc : 8 cached alloc: 0 (0.00 %) #--------------------- \endverbatim \section DiskFunctions Disk functions There are various ways to operate a disk memory node, described by the structure starpu_disk_ops. For instance, the variable #starpu_disk_unistd_ops uses read/write functions. All structures are in \ref API_Out_Of_Core. Examples are provided in src/core/disk_ops/disk_*.c */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy000066400000000000000000000267561507764646700277520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page ParallelWorker Creating Parallel Workers On A Machine \section ParallelWorkerGeneralIdeas General Ideas Parallel workers are a concept introduced in this
paper where they are called clusters. The granularity problem is tackled by using resource aggregation: instead of dynamically splitting tasks, resources are aggregated to process coarse grain tasks in a parallel fashion. This is built on top of scheduling contexts to be able to handle any type of parallel tasks. This comes from a basic idea, making use of two levels of parallelism in a DAG. We keep the DAG parallelism, but consider on top of it that a task can contain internal parallelism. A good example is if each task in the DAG is OpenMP enabled. The particularity of such tasks is that we will combine the power of two runtime systems: StarPU will manage the DAG parallelism and another runtime (e.g. OpenMP) will manage the internal parallelism. The challenge is in creating an interface between the two runtime systems so that StarPU can regroup cores inside a machine (creating what we call a parallel worker) on top of which the parallel tasks (e.g. OpenMP tasks) will be run in a contained fashion. The aim of the parallel worker API is to facilitate this process automatically. For this purpose, we depend on the \c hwloc tool to detect the machine configuration and then partition it into usable parallel workers. An example of code running on parallel workers is available in examples/sched_ctx/parallel_workers.c. Let's first look at how to create a parallel worker. To enable parallel workers in StarPU, one needs to set the configure option \ref enable-parallel-worker "--enable-parallel-worker". \section CreatingParallel Workers Creating Parallel Workers Partitioning a machine into parallel workers with the parallel worker API is fairly straightforward. The simplest way is to state under which machine topology level we wish to regroup all resources. This level is a \c hwloc object, of the type hwloc_obj_type_t. More information can be found in the hwloc documentation. Once a parallel worker is created, the full machine is represented with an opaque structure starpu_parallel_worker_config. This can be printed to show the current machine state. \code{.c} struct starpu_parallel_worker_config *parallel_workers; parallel_workers = starpu_parallel_worker_init(HWLOC_OBJ_SOCKET, 0); starpu_parallel_worker_print(parallel_workers); /* submit some tasks with OpenMP computations */ starpu_parallel_worker_shutdown(parallel_workers); /* we are back to the default StarPU state */ \endcode The following graphic is an example of what a particular machine can look like once parallel workers are created. The main difference is that we have less worker queues and tasks which will be executed on several resources at once. The execution of these tasks will be left to the internal runtime system, represented with a dashed box around the resources. \image latex runtime-par.png "StarPU using parallel tasks" width=0.5\textwidth \image html runtime-par.png "StarPU using parallel tasks" Creating parallel workers as shown in the example above will create workers able to execute OpenMP code by default. The parallel worker creation function starpu_parallel_worker_init() takes optional parameters after the \c hwloc object (always terminated by the value \c 0) which allow parametrizing the parallel workers creation. These parameters can help to create parallel workers of a type different from OpenMP, or create a more precise partition of the machine. This is explained in Section \ref CreatingCustomParallelWorkers. Before starpu_shutdown(), we call starpu_parallel_worker_shutdown() to delete the parallel worker configuration. \section ExampleOfConstrainingOpenMP Example Of Constraining OpenMP Parallel workers require being able to constrain the runtime managing the internal task parallelism (internal runtime) to the resources set by StarPU. The purpose of this is to express how StarPU must communicate with the internal runtime to achieve the required cooperation. In the case of OpenMP, StarPU will provide an awake thread from the parallel worker to execute this liaison. It will then provide on demand the process ids of the other resources supposed to be in the region. Finally, thanks to an OpenMP region, we can create the required number of threads and bind each of them on the correct region. These will then be reused each time we encounter a \#pragma omp parallel in the following computations of our program. The following graphic is an example of what an OpenMP-type parallel worker looks like and how it is represented in StarPU. We can see that one StarPU (black) thread is awake, and we need to create on the other resources the OpenMP threads (in pink). \image latex parallel_worker2.png "StarPU with an OpenMP parallel worker" width=0.3\textwidth \image html parallel_worker2.png "StarPU with an OpenMP parallel worker" Finally, the following code shows how to force OpenMP to cooperate with StarPU and create the aforementioned OpenMP threads constrained in the parallel worker's resources set: \code{.c} void starpu_parallel_worker_openmp_prologue(void * sched_ctx_id) { int sched_ctx = *(int*)sched_ctx_id; int *cpuids = NULL; int ncpuids = 0; int workerid = starpu_worker_get_id(); //we can target only CPU workers if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) { //grab all the ids inside the parallel worker starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); //set the number of threads omp_set_num_threads(ncpuids); #pragma omp parallel { //bind each threads to its respective resource starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); } free(cpuids); } return; } \endcode This function is the default function used when calling starpu_parallel_worker_init() without extra parameter. Parallel workers are based on several tools and models already available within StarPU contexts, and merely extend contexts. More on contexts can be read in Section \ref SchedulingContexts. A similar example is available in the file examples/sched_ctx/parallel_code.c. \section CreatingCustomParallelWorkers Creating Custom Parallel Workers Parallel workers can be created either with the predefined types provided within StarPU, or with user-defined functions to bind another runtime inside StarPU. The predefined parallel worker types provided by StarPU are ::STARPU_PARALLEL_WORKER_OPENMP, ::STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL and ::STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL. If StarPU is compiled with the \c MKL library, ::STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL uses MKL functions to set the number of threads, which is more reliable when using an OpenMP implementation different from the Intel one. Otherwise, it will behave as ::STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL. The parallel worker type is set when calling the function starpu_parallel_worker_init() with the parameter ::STARPU_PARALLEL_WORKER_TYPE as in the example below, which is creating a \c MKL parallel worker. \code{.c} struct starpu_parallel_worker_config *parallel_workers; parallel_workers = starpu_parallel_worker_init(HWLOC_OBJ_SOCKET, STARPU_PARALLEL_WORKER_TYPE, STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL, 0); \endcode Using the default type ::STARPU_PARALLEL_WORKER_OPENMP is similar to calling starpu_parallel_worker_init() without any extra parameter. An example is available in examples/parallel_workers/parallel_workers.c. Users can also define their own function. \code{.c} void foo_func(void* foo_arg); int foo_arg = 0; struct starpu_parallel_worker_config *parallel_workers; parallel_workers = starpu_parallel_worker_init(HWLOC_OBJ_SOCKET, STARPU_PARALLEL_WORKER_CREATE_FUNC, &foo_func, STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG, &foo_arg, 0); \endcode An example is available in examples/parallel_workers/parallel_workers_func.c. Parameters that can be given to starpu_parallel_worker_init() are ::STARPU_PARALLEL_WORKER_MIN_NB, ::STARPU_PARALLEL_WORKER_MAX_NB, ::STARPU_PARALLEL_WORKER_NB, ::STARPU_PARALLEL_WORKER_POLICY_NAME, ::STARPU_PARALLEL_WORKER_POLICY_STRUCT, ::STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS, ::STARPU_PARALLEL_WORKER_PREFERE_MIN, ::STARPU_PARALLEL_WORKER_CREATE_FUNC, ::STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG, ::STARPU_PARALLEL_WORKER_TYPE, ::STARPU_PARALLEL_WORKER_AWAKE_WORKERS, ::STARPU_PARALLEL_WORKER_PARTITION_ONE, ::STARPU_PARALLEL_WORKER_NEW and ::STARPU_PARALLEL_WORKER_NCORES. \section ParallelWorkersWithSchedulingContextsAPI Parallel Workers With Scheduling As previously mentioned, the parallel worker API is implemented on top of \ref SchedulingContexts. Its main addition is to ease the creation of a machine CPU partition with no overlapping by using \c hwloc, whereas scheduling contexts can use any number of any type of resources. It is therefore possible, but not recommended, to create parallel workers using the scheduling contexts API. This can be useful mostly in the most complex machine configurations, where users have to dimension precisely parallel workers by hand using their own algorithm. \code{.c} /* the list of resources the context will manage */ int workerids[3] = {1, 3, 10}; /* indicate the list of workers assigned to it, the number of workers, the name of the context and the scheduling policy to be used within the context */ int id_ctx = starpu_sched_ctx_create(workerids, 3, "my_ctx", 0); /* let StarPU know that the following tasks will be submitted to this context */ starpu_sched_ctx_set_task_context(id); task->prologue_callback_pop_func=&runtime_interface_function_here; /* submit the task to StarPU */ starpu_task_submit(task); \endcode As this example illustrates, creating a context without scheduling policy will create a parallel worker. The interface function between StarPU and the other runtime must be specified through the field starpu_task::prologue_callback_pop_func. Such a function can be similar to the OpenMP thread team creation one (see above). An example is available in examples/sched_ctx/parallel_tasks_reuse_handle.c. Note that the OpenMP mode is the default mode both for parallel workers and contexts. The result of a parallel worker creation is a woken-up master worker and sleeping "slaves" which allow the master to run tasks on their resources. To create a parallel worker with woken-up workers, the flag ::STARPU_SCHED_CTX_AWAKE_WORKERS must be set when using the scheduling context API function starpu_sched_ctx_create(), or the flag ::STARPU_PARALLEL_WORKER_AWAKE_WORKERS must be set when using the parallel worker API function starpu_parallel worker_init(). */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy000066400000000000000000000242041507764646700327320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page SchedulingContextHypervisor Scheduling Context Hypervisor \section WhatIsTheHypervisor What Is The Hypervisor StarPU proposes a platform to construct Scheduling Contexts, to delete and modify them dynamically. A parallel kernel, can thus be isolated into a scheduling context and interferences between several parallel kernels are avoided. If users know exactly how many workers each scheduling context needs, they can assign them to the contexts at their creation time or modify them during the execution of the program. The Scheduling Context Hypervisor Plugin is available for users who do not dispose of a regular parallelism, who cannot know in advance the exact size of the context and need to resize the contexts according to the behavior of the parallel kernels. The Hypervisor receives information from StarPU concerning the execution of the tasks, the efficiency of the resources, etc. and it decides accordingly when and how the contexts can be resized. Basic strategies of resizing scheduling contexts already exist, but a platform for implementing additional custom ones is available. Several examples of hypervisor are provided in sc_hypervisor/examples/*.c \section StartTheHypervisor Start the Hypervisor The Hypervisor must be initialized once at the beginning of the application. At this point, a resizing policy should be indicated. This strategy depends on the information the application is able to provide to the hypervisor, as well as on the accuracy needed for the resizing procedure. For example, the application may be able to provide an estimation of the workload of the contexts. In this situation, the hypervisor may decide what resources the contexts need. However, if no information is provided, the hypervisor evaluates the behavior of the resources and of the application and makes a guess about the future. The hypervisor resizes only the registered contexts. The basic example is available in the file sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c. \section InterrogateTheRuntime Interrogate The Runtime The runtime provides the hypervisor with information concerning the behavior of the resources and the application. This is done by using the performance_counters which represent callbacks indicating when the resources are idle or not efficient, when the application submits tasks or when it becomes too slow. \section TriggerTheHypervisor Trigger the Hypervisor The resizing is triggered either when the application requires it (sc_hypervisor_resize_ctxs()) or when the initial distribution of resources alters the performance of the application (the application is too slow or the resource are idle for too long time). An example is available in the file sc_hypervisor/examples/hierarchical_ctxs/resize_hierarchical_ctxs.c. If the environment variable \ref SC_HYPERVISOR_TRIGGER_RESIZE is set to speed, the monitored speed of the contexts is compared to a theoretical value computed with a linear program, and the resizing is triggered whenever the two values do not correspond. Otherwise, if the environment variable is set to idle the hypervisor triggers the resizing algorithm whenever the workers are idle for a period longer than the threshold indicated by the programmer. When this happens, different resizing strategy are applied that target minimizing the total execution of the application, the instant speed or the idle time of the resources. \section ResizingStrategies Resizing Strategies The plugin proposes several strategies for resizing the scheduling context. The Application driven strategy uses users's input concerning the moment when they want to resize the contexts. Thus, users tag the task that should trigger the resizing process. One can set directly the field starpu_task::hypervisor_tag or use the macro ::STARPU_HYPERVISOR_TAG in the function starpu_task_insert(). \code{.c} task.hypervisor_tag = 2; \endcode or \code{.c} starpu_task_insert(&codelet, ..., STARPU_HYPERVISOR_TAG, 2, 0); \endcode Then users have to indicate that when a task with the specified tag is executed, the contexts should resize. \code{.c} sc_hypervisor_resize(sched_ctx, 2); \endcode Users can use the same tag to change the resizing configuration of the contexts if they consider it necessary. \code{.c} sc_hypervisor_ctl(sched_ctx, SC_HYPERVISOR_MIN_WORKERS, 6, SC_HYPERVISOR_MAX_WORKERS, 12, SC_HYPERVISOR_TIME_TO_APPLY, 2, NULL); \endcode The Idleness based strategy moves workers unused in a certain context to another one needing them. (see \ref API_SC_Hypervisor_usage) \code{.c} int workerids[3] = {1, 3, 10}; int workerids2[9] = {0, 2, 4, 5, 6, 7, 8, 9, 11}; sc_hypervisor_ctl(sched_ctx_id, SC_HYPERVISOR_MAX_IDLE, workerids, 3, 10000.0, SC_HYPERVISOR_MAX_IDLE, workerids2, 9, 50000.0, NULL); \endcode The Gflops/s rate based strategy resizes the scheduling contexts such that they all finish at the same time. The speed of each of them is computed and once one of them is significantly slower, the resizing process is triggered. In order to do these computations, users have to input the total number of instructions needed to be executed by the parallel kernels and the number of instruction to be executed by each task. The number of flops to be executed by a context are passed as parameter when they are registered to the hypervisor, \code{.c} sc_hypervisor_register_ctx(sched_ctx_id, flops) \endcode and the one to be executed by each task are passed when the task is submitted. The corresponding field is starpu_task::flops and the corresponding macro in the function starpu_task_insert() is ::STARPU_FLOPS (Caution: but take care of passing a double, not an integer, otherwise parameter passing will be bogus). When the task is executed, the resizing process is triggered. \code{.c} task.flops = 100; \endcode or \code{.c} starpu_task_insert(&codelet, ..., STARPU_FLOPS, (double) 100, 0); \endcode The Feft strategy uses a linear program to predict the best distribution of resources such that the application finishes in a minimum amount of time. As for the Gflops/s rate strategy, the programmers have to indicate the total number of flops to be executed when registering the context. This number of flops may be updated dynamically during the execution of the application whenever this information is not very accurate from the beginning. The function sc_hypervisor_update_diff_total_flops() is called in order to add or to remove a difference to the flops left to be executed. Tasks are provided also the number of flops corresponding to each one of them. During the execution of the application, the hypervisor monitors the consumed flops and recomputes the time left and the number of resources to use. The speed of each type of resource is (re)evaluated and inserter in the linear program in order to better adapt to the needs of the application. The Teft strategy uses a linear program too, that considers all the types of tasks and the number of each of them, and it tries to allocate resources such that the application finishes in a minimum amount of time. A previous calibration of StarPU would be useful in order to have good predictions of the execution time of each type of task. The types of tasks may be determined directly by the hypervisor when they are submitted. However, there are applications that do not expose all the graph of tasks from the beginning. In this case, in order to let the hypervisor know about all the tasks, the function sc_hypervisor_set_type_of_task() will just inform the hypervisor about future tasks without submitting them right away. The Ispeed strategy divides the execution of the application in several frames. For each frame, the hypervisor computes the speed of the contexts and tries making them run at the same speed. The strategy requires less contribution from users, as the hypervisor requires only the size of the frame in terms of flops. \code{.c} int workerids[3] = {1, 3, 10}; int workerids2[9] = {0, 2, 4, 5, 6, 7, 8, 9, 11}; sc_hypervisor_ctl(sched_ctx_id, SC_HYPERVISOR_ISPEED_W_SAMPLE, workerids, 3, 2000000000.0, SC_HYPERVISOR_ISPEED_W_SAMPLE, workerids2, 9, 200000000000.0, SC_HYPERVISOR_ISPEED_CTX_SAMPLE, 60000000000.0, NULL); \endcode The Throughput strategy focuses on maximizing the throughput of the resources and resizes the contexts such that the machine is running at its maximum efficiency (maximum instant speed of the workers). \section DefiningANewHypervisorPolicy Defining A New Hypervisor Policy While Scheduling Context Hypervisor Plugin comes with a variety of resizing policies (see \ref ResizingStrategies), it may sometimes be desirable to implement custom policies to address specific problems. The API described below allows users to write their own resizing policy. Here is an example of how to define a new policy \code{.c} struct sc_hypervisor_policy dummy_policy = { .handle_poped_task = dummy_handle_poped_task, .handle_pushed_task = dummy_handle_pushed_task, .handle_idle_cycle = dummy_handle_idle_cycle, .handle_idle_end = dummy_handle_idle_end, .handle_post_exec_hook = dummy_handle_post_exec_hook, .custom = 1, .name = "dummy" }; \endcode Examples are provided in sc_hypervisor/src/hypervisor_policies/*_policy.c */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/scheduling_contexts.doxy000066400000000000000000000253431507764646700306300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page SchedulingContexts Scheduling Contexts TODO: improve! \section ContextGeneralIdeas General Ideas Scheduling contexts represent abstracts sets of workers that allow the programmers to control the distribution of computational resources (i.e. CPUs and GPUs) to concurrent kernels. The main goal is to minimize interferences between the execution of multiple parallel kernels, by partitioning the underlying pool of workers using contexts. Scheduling contexts additionally allow a user to make use of a different scheduling policy depending on the target resource set. \section CreatingAContext Creating A Context By default, the application submits tasks to an initial context, which disposes of all the computation resources available to StarPU (all the workers). If the application programmer plans to launch several kernels simultaneously, by default these kernels will be executed within this initial context, using a single scheduler policy (see \ref TaskSchedulingPolicy). Meanwhile, if the application programmer is aware of the demands of these kernels and of the specificity of the machine used to execute them, the workers can be divided between several contexts. These scheduling contexts will isolate the execution of each kernel, and they will permit the use of a scheduling policy proper to each one of them. Scheduling Contexts may be created in two ways: either the programmers indicates the set of workers corresponding to each context (providing he knows the identifiers of the workers running within StarPU), or the programmer does not provide any worker list and leaves the Hypervisor to assign workers to each context according to their needs (\ref SchedulingContextHypervisor). Both cases require a call to the function starpu_sched_ctx_create(), which requires as input the worker list (the exact list or a NULL pointer), the amount of workers (or -1 to designate all workers on the platform) and a list of optional parameters such as the scheduling policy, terminated by a 0. The scheduling policy can be a character list corresponding to the name of a StarPU predefined policy or the pointer to a custom policy. The function returns an identifier of the context created, which you will use to indicate the context you want to submit the tasks to. A basic example is available in the file examples/sched_ctx/sched_ctx.c. \code{.c} /* the list of resources the context will manage */ int workerids[3] = {1, 3, 10}; /* indicate the list of workers assigned to it, the number of workers, the name of the context and the scheduling policy to be used within the context */ int id_ctx = starpu_sched_ctx_create(workerids, 3, "my_ctx", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); /* let StarPU know that the following tasks will be submitted to this context */ starpu_sched_ctx_set_context(id); /* submit the task to StarPU */ starpu_task_submit(task); \endcode Note: Parallel greedy and parallel heft scheduling policies do not support the existence of several disjoint contexts on the machine. Combined workers are constructed depending on the entire topology of the machine, not only the one belonging to a context. \subsection CreatingAContextWithTheDefaultBehavior Creating A Context With The Default Behavior If no scheduling policy is specified when creating the context, it will be used as another type of resource: a parallel worker. A parallel worker is a context without scheduler (eventually delegated to another runtime). For more information, see \ref ParallelWorker. It is therefore mandatory to stipulate a scheduler to use the contexts in this traditional way. To create a context with the default scheduler, that is either controlled through the environment variable STARPU_SCHED or the StarPU default scheduler, one can explicitly use the option STARPU_SCHED_CTX_POLICY_NAME, "" as in the following example: \code{.c} /* the list of resources the context will manage */ int workerids[3] = {1, 3, 10}; /* indicate the list of workers assigned to it, the number of workers, and use the default scheduling policy. */ int id_ctx = starpu_sched_ctx_create(workerids, 3, "my_ctx", STARPU_SCHED_CTX_POLICY_NAME, "", 0); /* .... */ \endcode A full example is available in the file examples/sched_ctx/two_cpu_contexts.c. \section CreatingAGPUContext Creating A Context To Partition a GPU The contexts can also be used to group a set of SMs of an NVIDIA GPU in order to isolate the parallel kernels and allow them to coexecution on a specified partition of the GPU. Each context will be mapped to a stream and users can indicate the number of SMs. The context can be added to a larger context already grouping CPU cores. This larger context can use a scheduling policy that assigns tasks to both CPUs and contexts (partitions of the GPU) based on performance models adjusted to the number of SMs. The GPU implementation of the task has to be modified accordingly and receive as a parameter the number of SMs. \code{.c} /* get the available streams (suppose we have nstreams = 2 by specifying them with STARPU_NWORKER_PER_CUDA=2 */ int nstreams = starpu_worker_get_stream_workerids(gpu_devid, stream_workerids, STARPU_CUDA_WORKER); int sched_ctx[nstreams]; sched_ctx[0] = starpu_sched_ctx_create(&stream_workerids[0], 1, "subctx", STARPU_SCHED_CTX_CUDA_NSMS, 6, 0); sched_ctx[1] = starpu_sched_ctx_create(&stream_workerids[1], 1, "subctx", STARPU_SCHED_CTX_CUDA_NSMS, 7, 0); int ncpus = 4; int workers[ncpus+nstreams]; workers[ncpus+0] = stream_workerids[0]; workers[ncpus+1] = stream_workerids[1]; big_sched_ctx = starpu_sched_ctx_create(workers, ncpus+nstreams, "ctx1", STARPU_SCHED_CTX_SUB_CTXS, sched_ctxs, nstreams, STARPU_SCHED_CTX_POLICY_NAME, "dmdas", 0); starpu_task_submit_to_ctx(task, big_sched_ctx); \endcode A full example is available in the file examples/sched_ctx/gpu_partition.c. \section ModifyingAContext Modifying A Context A scheduling context can be modified dynamically. The application may change its requirements during the execution, and the programmer can add additional workers to a context or remove those no longer needed. In the following example, we have two scheduling contexts sched_ctx1 and sched_ctx2. After executing a part of the tasks, some of the workers of sched_ctx1 will be moved to context sched_ctx2. \code{.c} /* the list of resources that context 1 will give away */ int workerids[3] = {1, 3, 10}; /* add the workers to context 1 */ starpu_sched_ctx_add_workers(workerids, 3, sched_ctx2); /* remove the workers from context 2 */ starpu_sched_ctx_remove_workers(workerids, 3, sched_ctx1); \endcode An example is available in the file examples/sched_ctx/sched_ctx_remove.c. \section SubmittingTasksToAContext Submitting Tasks To A Context The application may submit tasks to several contexts, either simultaneously or sequentially. If several threads of submission are used, the function starpu_sched_ctx_set_context() may be called just before starpu_task_submit(). Thus, StarPU considers that the current thread will submit tasks to the corresponding context. An example is available in the file examples/sched_ctx/gpu_partition.c. When the application may not assign a thread of submission to each context, the id of the context must be indicated by using the function starpu_task_submit_to_ctx() or the field \ref STARPU_SCHED_CTX for starpu_task_insert(). An example is available in the file examples/sched_ctx/sched_ctx.c. \section DeletingAContext Deleting A Context When a context is no longer needed, it must be deleted. The application can indicate which context should keep the resources of a deleted one. All the tasks of the context should be executed before doing this. Thus, the programmer may use either a barrier and then delete the context directly, or just indicate that other tasks will not be submitted later on to the context (such that when the last task is executed its workers will be moved to the inheritor) and delete the context at the end of the execution (when a barrier will be used eventually). \code{.c} /* when the context 2 is deleted context 1 inherits its resources */ starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); /* submit tasks to context 2 */ for (i = 0; i < ntasks; i++) starpu_task_submit_to_ctx(task[i],sched_ctx2); /* indicate that context 2 finished submitting and that */ /* as soon as the last task of context 2 finished executing */ /* its workers can be moved to the inheritor context */ starpu_sched_ctx_finished_submit(sched_ctx1); /* wait for the tasks of both contexts to finish */ starpu_task_wait_for_all(); /* delete context 2 */ starpu_sched_ctx_delete(sched_ctx2); /* delete context 1 */ starpu_sched_ctx_delete(sched_ctx1); \endcode A full example is available in the file examples/sched_ctx/sched_ctx.c. \section EmptyingAContext Emptying A Context A context may have no resources at the beginning or at a certain moment of the execution. Tasks can still be submitted to these contexts, they will be executed as soon as the contexts will have resources. A list of tasks pending to be executed is kept and will be submitted when workers are added to the contexts. \code{.c} /* create a empty context */ unsigned sched_ctx_id = starpu_sched_ctx_create(NULL, 0, "ctx", 0); /* submit a task to this context */ starpu_sched_ctx_set_context(&sched_ctx_id); ret = starpu_task_insert(&codelet, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* add CPU workers to the context */ int procs[STARPU_NMAXWORKERS]; int nprocs = starpu_cpu_worker_get_count(); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); starpu_sched_ctx_add_workers(procs, nprocs, sched_ctx_id); /* and wait for the task termination */ starpu_task_wait_for_all(); \endcode The full example is available in the file examples/sched_ctx/sched_ctx_empty.c. However, if resources are never allocated to the context, the application will not terminate. If these tasks have low priority, the application can inform StarPU to not submit them by calling the function starpu_sched_ctx_stop_task_submission(). */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy000066400000000000000000001063751507764646700324750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page HowToDefineANewSchedulingPolicy How To Define A New Scheduling Policy \section NewSchedulingPolicy_Introduction Introduction StarPU provides two ways of defining a scheduling policy, a basic monolithic way, and a modular way. The basic monolithic way is directly connected with the core of StarPU, which means that the policy then has to handle all performance details, such as data prefetching, task performance model calibration, worker locking, etc. examples/scheduler/dummy_sched.c is a trivial example which does not handle this, and thus e.g. does not achieve any data prefetching or smart scheduling. The modular way allows implementing just one component, and reuse existing components to cope with all these details. examples/scheduler/dummy_modular_sched.c is a trivial example very similar to dummy_sched.c, but implemented as a component, which allows assembling it with other components, and notably get data prefetching support for free, and task performance model calibration is properly performed, which allows to easily extend it into taking task duration into account, etc. \section SchedulingHelpers Helper functions for defining a scheduling policy (Basic or modular) Make sure to have a look at the \ref API_Scheduling_Policy section, which provides a complete list of the functions available for writing advanced schedulers. This includes getting an estimation for a task computation completion with starpu_task_expected_length(), for a speedup factor relative to CPU speed with starpu_worker_get_relative_speedup(), for the expected data transfer time in micro-seconds with starpu_task_expected_data_transfer_time(), starpu_task_expected_data_transfer_time_for(), or starpu_data_expected_transfer_time(), for the expected conversion time in micro-seconds with starpu_task_expected_conversion_time(), for the required energy with starpu_task_expected_energy() or starpu_task_worker_expected_energy(), etc. Per-worker variants are also available with starpu_task_worker_expected_length(), etc. The average over workers is also available with starpu_task_expected_length_average() and starpu_task_expected_energy_average(). Other useful functions include starpu_transfer_bandwidth(), starpu_transfer_latency(), starpu_transfer_predict(), ... The successors of a task can be obtained with starpu_task_get_task_succs(). One can also directly test the presence of a data handle with starpu_data_is_on_node(). Prefetches can be triggered by calling either starpu_prefetch_task_input_for(), starpu_idle_prefetch_task_input_for(), starpu_prefetch_task_input_for_prio(), or starpu_idle_prefetch_task_input_for_prio(). And prefetching data on a specified node can use either starpu_prefetch_task_input_on_node(), starpu_prefetch_task_input_on_node_prio(), starpu_idle_prefetch_task_input_on_node(), or starpu_idle_prefetch_task_input_on_node_prio(). The _prio versions allow specifying a priority for the transfer (instead of taking the task priority by default). These prefetches are only processed when there are no fetch data requests (i.e. a task is waiting for it) to process. The _idle versions queue the transfers on the idle prefetch queue, which is only processed when there are no non-idle prefetches to process. starpu_get_prefetch_flag() is a convenient helper for checking the value of the \ref STARPU_PREFETCH environment variable. When a scheduler does such prefetching, it should set the prefetches field of the starpu_sched_policy to 1, to prevent the core from triggering its own prefetching. For applications that need to prefetch data or to perform other pre-execution setup before a task is executed, it is useful to call the function starpu_task_notify_ready_soon_register() which registers a callback function when a task is about to become ready for execution. starpu_worker_set_going_to_sleep_callback() and starpu_worker_set_waking_up_callback() allow to register an external resource manager callback function that will be notified about workers going to sleep or waking up, when StarPU is compiled with support for blocking drivers and worker callbacks. Schedulers should call starpu_task_set_implementation() or starpu_task_get_implementation() to specify or to retrieve the codelet implementation to be executed when executing a specific task. One can determine if a worker type is capable of executing a specific task by calling the function starpu_worker_type_can_execute_task(). The function starpu_sched_find_all_worker_combinations() must be used to identify all viable worker combinations that can execute a parallel task. starpu_combined_worker_get_count() and starpu_worker_is_combined_worker() can be used to determine the number of different combined workers and whether a particular worker is a combined worker respectively. starpu_combined_worker_get_id() allows to get the identifier of the current combined worker. starpu_combined_worker_assign_workerid() allow users to or register a new combined worker and get its identifier, it then needs to be given to a worker collection with the starpu_worker_collection::add. starpu_combined_worker_get_desceiption() returns the description of a combined worker. Additionally, the function starpu_worker_is_blocked_in_parallel() is utilized to determine if a worker is currently blocked in a parallel task, whereas starpu_worker_is_slave_somewhere() can be called to determine if a worker is presently functioning as a slave for another worker. StarPU also provides two functions for initializing and preparing the execution of parallel tasks: starpu_parallel_task_barrier_init() and starpu_parallel_task_barrier_init_n(). Usual functions can be used on tasks, for instance one can use the following to get the data size for a task. \code{.c} size = 0; write = 0; if (task->cl) for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) { starpu_data_handle_t data = STARPU_TASK_GET_HANDLE(task, i) size_t datasize = starpu_data_get_size(data); size += datasize; if (STARPU_TASK_GET_MODE(task, i) & STARPU_W) write += datasize; } \endcode Task queues can be implemented with the starpu_task_list functions. The function starpu_task_list_init() is used to initialize an empty list structure. Once the list is initialized, new tasks can be added to it using the starpu_task_list_push_front() and starpu_task_list_push_back() to add a task to the front or back of the list respectively. starpu_task_list_front() and starpu_task_list_back() can be used to get the first or last task in the list without removing it. starpu_task_list_begin() and starpu_task_list_end() can be used to get the task iterators from the beginning of the list and check whether it is the end of the list respectively. starpu_task_list_next() can be used to get the next task in the list, which is not erase-safe. starpu_task_list_empty() can be used to check whether the list is empty. To remove tasks from the queue, the function starpu_task_list_erase() is used to remove a specific task from the list. starpu_task_list_pop_front() and starpu_task_list_pop_back() can be used to remove the first or last task from the list. Finally, the function starpu_task_list_ismember() is used to check whether a given task is contained in the list. The function starpu_task_list_move() is used to move list from one head to another. Access to the \c hwloc topology is available with starpu_worker_get_hwloc_obj(). \section DefiningANewBasicSchedulingPolicy Defining A New Basic Scheduling Policy A full example showing how to define a new scheduling policy is available in the StarPU sources in examples/scheduler/dummy_sched.c. The scheduler has to provide methods: \code{.c} static struct starpu_sched_policy dummy_sched_policy = { .init_sched = init_dummy_sched, .deinit_sched = deinit_dummy_sched, .add_workers = dummy_sched_add_workers, .remove_workers = dummy_sched_remove_workers, .push_task = push_task_dummy, .pop_task = pop_task_dummy, .policy_name = "dummy", .policy_description = "dummy scheduling strategy" }; \endcode The idea is that when a task becomes ready for execution, the starpu_sched_policy::push_task method is called to give the ready task to the scheduler. Then call starpu_push_task_end() to notify that the specified task has been pushed. When a worker is idle, the starpu_sched_policy::pop_task method is called to get a task from the scheduler. It is up to the scheduler to implement what is between. A simple eager scheduler is for instance to make starpu_sched_policy::push_task push the task to a global list, and make starpu_sched_policy::pop_task pop from this list. A scheduler can also use starpu_push_local_task() to directly push tasks to a per-worker queue, and then StarPU does not even need to implement starpu_sched_policy::pop_task. If there are no ready tasks within the scheduler, it can just return \c NULL, and the worker will sleep. starpu_sched_policy::add_workers and starpu_sched_policy::remove_workers are used to add or remove workers to or from a scheduling policy, so that the number of workers in a policy can be dynamically adjusted. After adding or removing workers from a scheduling policy, the worker task lists should be updated to ensure that the workers are assigned tasks appropriately. By calling starpu_sched_ctx_worker_shares_tasks_lists(), you can specify whether a worker may pop tasks from the task list of other workers or if there is a central list with tasks for all the workers. The \ref starpu_sched_policy section provides the exact rules that govern the methods of the policy. One can enumerate the workers with this iterator: \code{.c} struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); ... } \endcode To provide synchronization between workers, a per-worker lock exists to protect the data structures of a given worker. It is acquired around scheduler methods, so that the scheduler does not need any additional mutex to protect its per-worker data. In case the scheduler wants to access another scheduler's data, it should use starpu_worker_lock() and starpu_worker_unlock(), or use starpu_worker_trylock() which will not block if the lock is not immediately available, or use starpu_worker_lock_self() and starpu_worker_unlock_self() to acquire and to release a lock on the worker associated with the current thread. Calling \code{.c}starpu_worker_lock(B)\endcode from a worker \c A will however thus make worker \c A wait for worker \c B to complete its scheduling method. That may be a problem if that method takes a long time, because it is e.g. computing a heuristic or waiting for another mutex, or even cause deadlocks if worker \c B is calling \code{.c}starpu_worker_lock(A)\endcode at the same time. In such a case, worker \c B must call starpu_worker_relax_on() and starpu_worker_relax_off() around the section which potentially blocks (and does not actually need protection). While a worker is in relaxed mode, e.g. between a pair of starpu_worker_relax_on() and starpu_worker_relax_off() calls, its state can be altered by other threads: for instance, worker \c A can push tasks for worker \c B. In consequence, worker \c B must re-assess its state after \code{.c}starpu_worker_relax_off(B)\endcode, such as taking possible new tasks pushed to its queue into account. Calling starpu_worker_get_relax_state() to query the relaxation state of a worker. When the starpu_sched_policy::push_task method has pushed a task for another worker, one has to call starpu_wake_worker_relax(), starpu_wake_worker_relax_light(), starpu_wake_worker_no_relax() or starpu_wake_worker_locked() so that the worker wakes up and picks it. If the task was pushed on a shared queue, one may want to only wake one idle worker. An example doing this is available in src/sched_policies/eager_central_policy.c. When the scheduling policy makes a scheduling decision for a task, it shouhld call starpu_sched_task_break(). Schedulers can set the minimum or maximum task priority level supported by the scheduling policy by calling starpu_sched_set_min_priority() or starpu_sched_set_max_priority(), and then applications can call starpu_sched_get_min_priority() or starpu_sched_get_max_priority() to retrieve the minimum or maximum priority value. The file src/sched_policies/heteroprio.c shows how to uses these functions. When scheduling a task, it is important to check whether the specified worker can execute the codelet before assigning the task to that worker. This is done using the starpu_worker_can_execute_task() function, or starpu_combined_worker_can_execute_task() which is compatible with combined workers, or starpu_worker_can_execute_task_impl() which also returns the list of implementation numbers that can be used by the worker to execute the task, or starpu_worker_can_execute_task_first_impl() which also returns the first implementation number that can be used. A pointer to one data structure specific to the scheduler can be set with starpu_sched_ctx_set_policy_data() and fetched with starpu_sched_ctx_get_policy_data(). Per-worker data structures can then be stored in it by allocating a \ref STARPU_NMAXWORKERS -sized array of structures indexed by workers. A variety of examples of advanced schedulers can be read in src/sched_policies, for instance random_policy.c, eager_central_policy.c, work_stealing_policy.c Code protected by if (_starpu_get_nsched_ctxs() > 1) can be ignored, this is for scheduling contexts, which is an experimental feature. \section DefiningANewModularSchedulingPolicy Defining A New Modular Scheduling Policy StarPU's Modularized Schedulers are made of individual Scheduling Components Modularizedly assembled as a Scheduling Tree. Each Scheduling Component has a unique purpose, such as prioritizing tasks or mapping tasks over resources. A typical Scheduling Tree is shown below.
                                 |
             starpu_push_task    |
                                 |
                                 v
                           Fifo_Component
                                |  ^
                        Push    |  |    Can_Push
                                v  |
                          Eager_Component
                                |  ^
                                |  |
                                v  |
              --------><-------------------><---------
              |  ^                                |  ^
      Push    |  |    Can_Push            Push    |  |    Can_Push
              v  |                                v  |
         Fifo_Component                       Fifo_Component
              |  ^                                |  ^
      Pull    |  |    Can_Pull            Pull    |  |    Can_Pull
              v  |                                v  |
        Worker_Component                     Worker_Component
                  |                             |
starpu_pop_task   |                             |
                  v                             v
When a task is pushed by StarPU in a Modularized Scheduler, the task moves from a Scheduling Component to another, following the hierarchy of the Scheduling Tree, and is stored in one of the Scheduling Components of the strategy. When a worker wants to pop a task from the Modularized Scheduler, the corresponding Worker Component of the Scheduling Tree tries to pull a task from its parents, following the hierarchy, and gives it to the worker if it succeeded to get one. \subsection Interface Each Scheduling Component must follow the following pre-defined Interface to be able to interact with other Scheduling Components.
  • push_task (child_component, Task) \n The calling Scheduling Component transfers a task to its Child Component. When the Push function returns, the task no longer belongs to the calling Component. The Modularized Schedulers' model relies on this function to perform prefetching. See starpu_sched_component::push_task for more details
  • pull_task (parent_component, caller_component) -> Task \n The calling Scheduling Component requests a task from its Parent Component. When the Pull function ends, the returned task belongs to the calling Component. See starpu_sched_component::pull_task for more details
  • can_push (caller_component, parent_component) \n The calling Scheduling Component notifies its Parent Component that it is ready to accept new tasks. See starpu_sched_component::can_push for more details
  • can_pull (caller_component, child_component) \n The calling Scheduling Component notifies its Child Component that it is ready to give new tasks. See starpu_sched_component::can_pull for more details
The components also provide the following useful methods:
  • starpu_sched_component::estimated_load provides an estimated load of the component
  • starpu_sched_component::estimated_end provides an estimated date of availability of workers behind the component, after processing tasks in the component and below. This is computed only if the estimated field of the tasks have been set before passing it to the component.
\subsection BuildAModularizedScheduler Building a Modularized Scheduler \subsubsection PreImplementedComponents Pre-implemented Components StarPU is currently shipped with the following four Scheduling Components :
  • Storage Components : Fifo, Prio \n Components which store tasks. They can also prioritize them if they have a defined priority. It is possible to define a threshold for those Components following two criteria : the number of tasks stored in the Component, or the sum of the expected length of all tasks stored in the Component. When a push operation tries to queue a task beyond the threshold, the push fails. When some task leaves the queue (and thus possibly more tasks can fit), this component calls can_push from ancestors.
  • Resource-Mapping Components : Mct, Heft, Eager, Random, Work-Stealing \n "Core" of the Scheduling Strategy, those Components are the ones who make scheduling choices between their children components.
  • Worker Components : Worker \n Each Worker Component modelizes a concrete worker, and copes with the technical tricks of interacting with the StarPU core. Modular schedulers thus usually have them at the bottom of their component tree.
  • Special-Purpose Components : Perfmodel_Select, Best_Implementation \n Components dedicated to original purposes. The Perfmodel_Select Component decides which Resource-Mapping Component should be used to schedule a task: a component that assumes tasks with a calibrated performance model; a component for non-yet-calibrated tasks, that will distribute them to get measurements done as quickly as possible; and a component that takes the tasks without performance models.\n The Best_Implementation Component chooses which implementation of a task should be used on the chosen resource.
\subsubsection ProgressionAndValidationRules Progression And Validation Rules Some rules must be followed to ensure the correctness of a Modularized Scheduler :
  • At least one Storage Component without threshold is needed in a Modularized Scheduler, to store incoming tasks from StarPU. It can for instance be a global component at the top of the tree, or one component per worker at the bottom of the tree, or intermediate assemblies. The important point is that the starpu_sched_component::push_task call at the top can not fail, so there has to be a storage component without threshold between the top of the tree and the first storage component with threshold, or the workers themselves.
  • At least one Resource-Mapping Component is needed in a Modularized Scheduler. Resource-Mapping Components are the only ones which can make scheduling choices, and so the only ones which can have several children.
\subsubsection ModularizedSchedulerLocking Locking in modularized schedulers Most often, components do not need to take locks. This allows e.g. the push operation to be called in parallel when tasks get released in parallel from different workers which have completed different ancestor tasks. When a component has internal information which needs to be kept coherent, the component can define its own lock to take it as it sees fit, e.g. to protect a task queue. This may however limit scalability of the scheduler. Conversely, since push and pull operations will be called concurrently from different workers, the component might prefer to use a central mutex to serialize all scheduling decisions to avoid pathological cases (all push calls decide to put their task on the same target) \subsubsection ImplementAModularizedScheduler Implementing a Modularized Scheduler The following code shows how to implement a Tree-Eager-Prefetching Scheduler. \code{.c} static void initialize_eager_prefetching_center_policy(unsigned sched_ctx_id) { /* The eager component will decide for each task which worker will run it, * and we want fifos both above and below the component */ starpu_sched_component_initialize_simple_scheduler( starpu_sched_component_eager_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFOS_BELOW, sched_ctx_id); } /* Initializing the starpu_sched_policy struct associated to the Modularized * Scheduler : only the init_sched and deinit_sched needs to be defined to * implement a Modularized Scheduler */ struct starpu_sched_policy _starpu_sched_tree_eager_prefetching_policy = { .init_sched = initialize_eager_prefetching_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "tree-eager-prefetching", .policy_description = "eager with prefetching tree policy" }; \endcode starpu_sched_component_initialize_simple_scheduler() is a helper function which makes it very trivial to assemble a modular scheduler around a scheduling decision component as seen above (here, a dumb eager decision component). Most often, a modular scheduler can be implemented that way. A modular scheduler can also be constructed hierarchically with starpu_sched_component_composed_recipe_create(). To retrieve the current scheduling tree of a task, starpu_sched_tree_get() can be called. That modular scheduler can also be built by hand in the following way: \code{.c} #define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 2 #define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0 static void initialize_eager_prefetching_center_policy(unsigned sched_ctx_id) { unsigned ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT; double exp_len_threshold = _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT; [...] starpu_sched_ctx_create_worker_collection (sched_ctx_id, STARPU_WORKER_LIST); /* Create the Scheduling Tree */ struct starpu_sched_tree * t = starpu_sched_tree_create(sched_ctx_id); /* The Root Component is a Flow-control Fifo Component */ t->root = starpu_sched_component_fifo_create(NULL); /* The Resource-mapping Component of the strategy is an Eager Component */ struct starpu_sched_component *eager_component = starpu_sched_component_eager_create(NULL); /* Create links between Components : the Eager Component is the child * of the Root Component */ starpu_sched_component_connect(t->root, eager_component); /* A task threshold is set for the Flow-control Components which will * be connected to Worker Components. By doing so, this Modularized * Scheduler will be able to perform some prefetching on the resources */ struct starpu_sched_component_fifo_data fifo_data = { .ntasks_threshold = ntasks_threshold, .exp_len_threshold = exp_len_threshold, }; unsigned i; for(i = 0; i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++) { /* Each Worker Component has a Flow-control Fifo Component as * father */ struct starpu_sched_component * worker_component = starpu_sched_component_worker_new(i); struct starpu_sched_component * fifo_component = starpu_sched_component_fifo_create(&fifo_data); starpu_sched_component_connect(fifo_component, worker_component); /* Each Flow-control Fifo Component associated to a Worker * Component is linked to the Eager Component as one of its * children */ starpu_sched_component_connect(eager_component, fifo_component); } starpu_sched_tree_update_workers(t); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t); } /* Properly destroy the Scheduling Tree and all its Components */ static void deinitialize_eager_prefetching_center_policy(unsigned sched_ctx_id) { struct starpu_sched_tree * tree = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_sched_tree_destroy(tree); starpu_sched_ctx_delete_worker_collection(sched_ctx_id); } /* Initializing the starpu_sched_policy struct associated to the Modularized * Scheduler : only the init_sched and deinit_sched needs to be defined to * implement a Modularized Scheduler */ struct starpu_sched_policy _starpu_sched_tree_eager_prefetching_policy = { .init_sched = initialize_eager_prefetching_center_policy, .deinit_sched = deinitialize_eager_prefetching_center_policy, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "tree-eager-prefetching", .policy_description = "eager with prefetching tree policy" }; \endcode Instead of calling starpu_sched_tree_update_workers(), one can call starpu_sched_tree_update_workers_in_ctx() to update the set of workers that are available to execute tasks in a given scheduling tree within a specific StarPU context. Other modular scheduler examples can be seen in src/sched_policies/modular_*.c For instance, \c modular-heft-prio needs performance models, decides memory nodes, uses prioritized fifos above and below, and decides the best implementation. If unsure on the result of the modular scheduler construction, you can run a simple application with FxT enabled (see \ref GeneratingTracesWithFxT), and open the generated file \c trace.html in a web-browser. \subsection ModularizedSchedulersAndParallelTasks Management of parallel task At the moment, parallel tasks can be managed in modularized schedulers through combined workers: instead of connecting a scheduling component to a worker component, one can connect it to a combined worker component (i.e. a worker component created with a combined worker id). That component will handle creating task aliases for parallel execution and push them to the different workers components. \subsection WriteASchedulingComponent Writing a Scheduling Component \subsubsection GenericSchedulingComponent Generic Scheduling Component Each Scheduling Component is instantiated from a Generic Scheduling Component, which implements a generic version of the Interface. The generic implementation of Pull, Can_Pull and Can_Push functions are recursive calls to their parents (respectively to their children). However, as a Generic Scheduling Component do not know how many children it will have when it will be instantiated, it does not implement the Push function. \subsubsection InstantiationRedefineInterface Instantiation : Redefining the Interface A Scheduling Component must implement all the functions of the Interface. It is so necessary to implement a Push function to instantiate a Scheduling Component. The implemented Push function is the "fingerprint" of a Scheduling Component. Depending on how functionalities or properties programmers want to give to the Scheduling Component they are implementing, it is possible to reimplement all the functions of the Interface. For example, a Flow-control Component reimplements the Pull and the Can_Push functions of the Interface, allowing to catch the generic recursive calls of these functions. The Pull function of a Flow-control Component can, for example, pop a task from the local storage queue of the Component, and give it to the calling Component which asks for it. \subsubsection DetailedProgressionAndValidationRules Detailed Progression and Validation Rules
  • A Reservoir is a Scheduling Component which redefines a Push and a Pull function, in order to store tasks into it. A Reservoir delimit Scheduling Areas in the Scheduling Tree.
  • A Pump is the engine source of the Scheduler : it pushes/pulls tasks to/from a Scheduling Component to another. Native Pumps of a Scheduling Tree are located at the root of the Tree (incoming Push calls from StarPU), and at the leafs of the Tree (Pop calls coming from StarPU Workers). Pre-implemented Scheduling Components currently shipped with Pumps are Flow-Control Components and the Resource-Mapping Component Heft, within their defined Can_Push functions.
  • A correct Scheduling Tree requires a Pump per Scheduling Area and per Execution Flow.
The Tree-Eager-Prefetching Scheduler shown in Section \ref ImplementAModularizedScheduler follows the previous assumptions :
                                  starpu_push_task
                                       Pump
                                         |
 Area 1                                  |
                                         |
                                         v
            -----------------------Fifo_Component-----------------------------
                                       Pump
                                        |  ^
                                Push    |  |    Can_Push
                                        v  |
 Area 2                           Eager_Component
                                        |  ^
                                        |  |
                                        v  |
                      --------><-------------------><---------
                      |  ^                                |  ^
              Push    |  |    Can_Push            Push    |  |    Can_Push
                      v  |                                v  |
            -----Fifo_Component-----------------------Fifo_Component----------
                      |  ^                                |  ^
              Pull    |  |    Can_Pull            Pull    |  |    Can_Pull
 Area 3               v  |                                v  |
                     Pump                               Pump
                Worker_Component                     Worker_Component
\section UsingaNewSchedulingPolicy Using a New Scheduling Policy There are two ways to use a new scheduling policy.
  • If the code is directly available from your application, you can set the field starpu_conf::sched_policy with a pointer to your new defined scheduling policy. \code{.c} starpu_conf_init(&conf); conf.sched_policy = &dummy_sched_policy, ret = starpu_init(&conf); \endcode
  • You can also load the new policy dynamically using the environment variable \ref STARPU_SCHED_LIB. An example is given in examples/scheduler/libdummy_sched.c and examples/scheduler/libdummy_sched.sh. The variable \ref STARPU_SCHED_LIB needs to give the location of a \c .so file which needs to define a function struct starpu_sched_policy *starpu_get_sched_lib_policy(const char *name) \code{.c} struct starpu_sched_policy *get_sched_policy(const char *name) { if (!strcmp(name, "dummy")) return &dummy_sched_policy; return NULL; } \endcode To use it, you need to define both variables \ref STARPU_SCHED_LIB and \ref STARPU_SCHED \code{.sh} STARPU_SCHED_LIB=libdummy_sched.so STARPU_SCHED=dummy yourapplication \endcode If the library defines a function struct starpu_sched_policy **starpu_get_sched_lib_policies(), the policies defined by the library can be displayed using the \c help functionality. \code{.sh} STARPU_SCHED_LIB=libdummy_sched.so STARPU_SCHED=help yourapplication \endcode
\section GraphScheduling Graph-based Scheduling For performance reasons, most of the schedulers shipped with StarPU use simple list-scheduling heuristics, assuming that the application has already set priorities. This is why they do their scheduling between when tasks become available for execution and when a worker becomes idle, without looking at the task graph. Other heuristics can however look at the task graph. Recording the task graph is expensive, so it is not available by default, the scheduling heuristic has to set \c _starpu_graph_record to \c 1 from the initialization function, to make it available. Then the _starpu_graph* functions can be used. src/sched_policies/graph_test_policy.c is an example of simple greedy policy which automatically computes priorities by bottom-up rank. The idea is that while the application submits tasks, they are only pushed to a bag of tasks. When the application is finished with submitting tasks, it calls starpu_do_schedule() (or starpu_task_wait_for_all(), which calls starpu_do_schedule()), and the starpu_sched_policy::do_schedule method of the scheduler is called. This method calls \c _starpu_graph_compute_depths() to compute the bottom-up ranks, and then uses these ranks to set priorities over tasks. It then has two priority queues, one for CPUs, and one for GPUs, and uses a dumb heuristic based on the duration of the task over CPUs and GPUs to decide between the two queues. CPU workers can then pop from the CPU priority queue, and GPU workers from the GPU priority queue. \section DebuggingScheduling Debugging Scheduling All the \ref OnlinePerformanceTools and \ref OfflinePerformanceTools can be used to get information about how well the execution proceeded, and thus the overall quality of the execution. Precise debugging can also be performed by using the \ref STARPU_TASK_BREAK_ON_PUSH, \ref STARPU_TASK_BREAK_ON_SCHED, \ref STARPU_TASK_BREAK_ON_POP, and \ref STARPU_TASK_BREAK_ON_EXEC environment variables. By setting the job_id of a task in these environment variables, StarPU will raise SIGTRAP when the task is being scheduled, pushed, or popped by the scheduler. This means that when one notices that a task is being scheduled in a seemingly odd way, one can just re-execute the application in a debugger, with some of those variables set, and the execution will stop exactly at the scheduling points of this task, thus allowing to inspect the scheduler state, etc. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/simgrid.doxy000066400000000000000000000321321507764646700262040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2022 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * NOTE: XXX: also update simgrid versions in 101_building.doxy !! */ /*! \page SimGridSupport SimGrid Support StarPU can use SimGrid in order to simulate execution on an arbitrary platform. The principle is to first run the application natively on the platform that one wants to laterlater simulate, and let StarPU record performance models. One then recompiles StarPU and the application in simgrid mode, where everything is executed the same, except the execution of the codelet function, and the data transfers, which are replaced by virtual sleeps based on the performance models. This thus allows to use the performance model for tasks and data transfers, while executing natively all the rest (the task scheduler and the application, notably). This was tested with SimGrid from 3.11 to 3.16, and 3.18 to 3.36. SimGrid version 3.25 needs to be configured with -Denable_msg=ON . Other versions may have compatibility issues. 3.17 notably does not build at all. MPI simulation does not work with version 3.22. If you have installed SimGrid by hand, make sure to set \c PKG_CONFIG_PATH to the path where \c simgrid.pc was installed: \verbatim $ export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/where/simgrid/installed/lib/ppkgconfig/simgrid.pc \endverbatim \section Preparing Preparing Your Application For Simulation There are a few technical details which need to be handled for an application to be simulated through SimGrid. If the application uses gettimeofday() to make its performance measurements, the real time will be used, which will be bogus. To get the simulated time, it has to use starpu_timing_now() which returns the virtual timestamp in us. A basic example is available in tests/main/empty_task.c. For some technical reason, the application's .c file which contains \c main() has to be recompiled with \c starpu_simgrid_wrap.h, which in the SimGrid case will # define main() into starpu_main(), and it is \c libstarpu which will provide the real \c main() and will call the application's \c main(). Including \c starpu.h will already include \c starpu_simgrid_wrap.h, so usually you would not need to include \c starpu_simgrid_wrap.h explicitly, but if for some reason including the whole \c starpu.h header is not possible, you can include \c starpu_simgrid_wrap.h explicitly. To be able to test with crazy data sizes, one may want to only allocate application data if the macro \c STARPU_SIMGRID is not defined. Passing a NULL pointer to \c starpu_data_register functions is fine, data will never be read/written to by StarPU in SimGrid mode anyway. To be able to run the application with e.g. CUDA simulation on a system which does not have CUDA installed, one can fill the starpu_codelet::cuda_funcs with \c (void*)1, to express that there is a CUDA implementation, even if one does not actually provide it. StarPU will not actually run it in SimGrid mode anyway by default (unless the ::STARPU_CODELET_SIMGRID_EXECUTE or ::STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT flags are set in the codelet) \snippet simgrid.c To be included. You should update doxygen if you see this text. The full example is available in examples/cholesky/cholesky_kernels.c. \section Calibration Calibration The idea is to first compile StarPU normally, and run the application, to automatically benchmark the bus and the codelets. \verbatim $ ./configure && make $ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult [starpu][_starpu_load_history_based_model] Warning: model matvecmult is not calibrated, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. $ ... $ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult TEST PASSED \endverbatim Note that we force to use the scheduler dmda to generate performance models for the application. The application may need to be run several times before the model is calibrated. \section Simulation Simulation Then, recompile StarPU, passing \ref enable-simgrid "--enable-simgrid" to configure. Make sure to keep all the other configure options the same, and notably options such as --enable-maxcudadev. \verbatim $ ./configure --enable-simgrid \endverbatim To specify the location of SimGrid, you can either set the environment variables \c SIMGRID_CFLAGS and \c SIMGRID_LIBS, or use the \c configure options \ref with-simgrid-dir "--with-simgrid-dir", \ref with-simgrid-include-dir "--with-simgrid-include-dir" and \ref with-simgrid-lib-dir "--with-simgrid-lib-dir", for example \verbatim $ ./configure --with-simgrid-dir=/opt/local/simgrid \endverbatim You can then re-run the application. \verbatim $ make $ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult TEST FAILED !!! \endverbatim It is normal that the test fails: since the computation is not actually done (that is the whole point of SimGrid), the result is wrong, of course. If the performance model is not calibrated enough, the following error message will be displayed \verbatim $ STARPU_SCHED=dmda ./examples/matvecmult/matvecmult [starpu][_starpu_load_history_based_model] Warning: model matvecmult is not calibrated, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. [starpu][_starpu_simgrid_execute_job][assert failure] Codelet matvecmult does not have a perfmodel, or is not calibrated enough \endverbatim The number of devices can be chosen as usual with \ref STARPU_NCPU, \ref STARPU_NCUDA, and \ref STARPU_NOPENCL, and the amount of GPU memory with \ref STARPU_LIMIT_CUDA_MEM, \ref STARPU_LIMIT_CUDA_devid_MEM, \ref STARPU_LIMIT_OPENCL_MEM, and \ref STARPU_LIMIT_OPENCL_devid_MEM. \section SimulationOnAnotherMachine Simulation On Another Machine The SimGrid support even permits to perform simulations on another machine, your desktop, typically. To achieve this, one still needs to perform the Calibration step on the actual machine to be simulated, then copy them to your desktop machine (the $STARPU_HOME/.starpu directory). One can then perform the Simulation step on the desktop machine, by setting the environment variable \ref STARPU_HOSTNAME to the name of the actual machine, to make StarPU use the performance models of the simulated machine even on the desktop machine. To use multiple performance models in different ranks, in case of smpi executions in a heterogeneous platform, it is possible to use the option -hostfile-platform in starpu_smpirun, that will define \ref STARPU_MPI_HOSTNAMES with the hostnames of your hostfile. If the desktop machine does not have CUDA or OpenCL, StarPU is still able to use SimGrid to simulate execution with CUDA/OpenCL devices, but the application source code will probably disable the CUDA and OpenCL codelets in that case. Since during SimGrid execution, the functions of the codelet are actually not called by default, one can use dummy functions such as the following to still permit CUDA or OpenCL execution. \section SimulationExamples Simulation Examples StarPU ships a few performance models for a couple of systems: \c attila, \c mirage, \c idgraf, and \c sirocco. See Section \ref SimulatedBenchmarks for the details. \section FakeSimulations Simulations On Fake Machines It is possible to build fake machines which do not exist, by modifying the platform file in $STARPU_HOME/.starpu/sampling/bus/machine.platform.xml by hand: one can add more CPUs, add GPUs (but the performance model file has to be extended as well), change the available GPU memory size, PCI memory bandwidth, etc. \section TweakingSimulation Tweaking Simulation The simulation can be tweaked, to be able to tune it between a very accurate simulation and a very simple simulation (which is thus close to scheduling theory results), see the \ref STARPU_SIMGRID_TRANSFER_COST, \ref STARPU_SIMGRID_CUDA_MALLOC_COST, \ref STARPU_SIMGRID_CUDA_QUEUE_COST, \ref STARPU_SIMGRID_TASK_SUBMIT_COST, \ref STARPU_SIMGRID_TASK_PUSH_COST, \ref STARPU_SIMGRID_FETCHING_INPUT_COST and \ref STARPU_SIMGRID_SCHED_COST environment variables. \section SimulationMPIApplications MPI Applications StarPU-MPI applications can also be run in SimGrid mode. smpi currently requires that StarPU be build statically only, so --disable-shared needs to be passed to ./configure. The application needs to be compiled with \c smpicc, and run using the starpu_smpirun script, for instance: \verbatim $ STARPU_SCHED=dmda starpu_smpirun -platform cluster.xml -hostfile hostfile ./mpi/tests/pingpong \endverbatim Where \c cluster.xml is a SimGrid-MPI platform description, and \c hostfile the list of MPI nodes to be used. Examples of such files are available in \c tools/perfmodels. In homogeneous MPI clusters: for each MPI node, it will just replicate the architecture referred by \ref STARPU_HOSTNAME. To use multiple performance models in different ranks, in case of a heterogeneous platform, it is possible to use the option -hostfile-platform in starpu_smpirun, that will define \ref STARPU_MPI_HOSTNAMES with the hostnames of your hostfile. To use FxT traces, libfxt itself also needs to be built statically, and with dynamic linking flags, i.e. with \verbatim CFLAGS=-fPIC ./configure --enable-static \endverbatim \section SimulationDebuggingApplications Debugging Applications By default, SimGrid uses its own implementation of threads, which prevents \c gdb from being able to inspect stacks of all threads. To be able to fully debug an application running with SimGrid, pass the --cfg=contexts/factory:thread option to the application, to make SimGrid use system threads, which \c gdb will be able to manipulate as usual. It is also worth noting SimGrid 3.21's new parameter --cfg=simix/breakpoint which allows putting a breakpoint at a precise (deterministic!) timing of the execution. If for instance in an execution trace we see that something odd is happening at time 19000ms, we can use --cfg=simix/breakpoint:19.000 and \c SIGTRAP will be raised at that point, which will thus interrupt execution within \c gdb, allowing to inspect e.g. scheduler state, etc. \section SimulationMemoryUsage Memory Usage Since kernels are not actually run and data transfers are not actually performed, the data memory does not actually need to be allocated. This allows for instance to simulate the execution of applications processing very big data on a small laptop. The application can for instance pass 1 (or whatever bogus pointer) to StarPU data registration functions, instead of allocating data. This will however require the application to take care of not trying to access the data, and will not work in MPI mode, which performs transfers. Another way is to pass the \ref STARPU_MALLOC_SIMULATION_FOLDED flag to the starpu_malloc_flags() function. An example is available in examples/mult/xgemm.c This will make it allocate a memory area which one can read/write, but optimized so that this does not actually consume memory. Of course, the values read from such area will be bogus, but this allows the application to keep e.g. data load, store, initialization as it is, and also work in MPI mode. A more aggressive alternative is to pass also the \ref STARPU_MALLOC_SIMULATION_UNIQUE flag (alongside with \ref STARPU_MALLOC_SIMULATION_FOLDED) to the starpu_malloc_flags() function. An example is available in examples/cholesky/cholesky_tag.c . This will make StarPU reuse the pointers for allocations of the same size without calling the folded allocation again, thus decreasing some pressure on memory management. Note however that notably Linux kernels refuse obvious memory overcommitting by default, so a single allocation can typically not be bigger than the amount of physical memory, see https://www.kernel.org/doc/Documentation/vm/overcommit-accounting This prevents for instance from allocating a single huge matrix. Allocating a huge matrix in several tiles is not a problem, however. sysctl vm.overcommit_memory=1 can also be used to allow such overcommit. Note however that this folding is done by remapping the same file several times, and Linux kernels will also refuse to create too many memory areas. sysctl vm.max_map_count can be used to check and change the default (65535). By default, StarPU uses a 1MiB file, so it hopefully fits in the CPU cache. However, this limits the amount of such folded memory to a bit below 64GiB. The \ref STARPU_MALLOC_SIMULATION_FOLD environment variable can be used to increase the size of the file. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy000066400000000000000000000066211507764646700313310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page SOCLOpenclExtensions SOCL OpenCL Extensions SOCL is an OpenCL implementation based on StarPU. It gives unified access to every available OpenCL device: applications can now share entities such as Events, Contexts or Command Queues between several OpenCL implementations. In addition, command queues that are created without specifying a device provide automatic scheduling of the submitted commands on OpenCL devices contained in the context to which the command queue is attached. Setting the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE flag on a command queue also allows StarPU to reorder kernels queued on the queue, otherwise they would be serialized, and several command queues would be necessary to see kernels dispatched to the various OpenCL devices. Note: this is still an area under development and subject to change. When compiling StarPU, SOCL will be enabled if a valid OpenCL implementation is found on your system. To be able to run the SOCL test suite, the environment variable \ref SOCL_OCL_LIB_OPENCL needs to be defined to the location of the file libOpenCL.so of the OCL ICD implementation. You should for example add the following line in your file .bashrc \verbatim export SOCL_OCL_LIB_OPENCL=/usr/lib/x86_64-linux-gnu/libOpenCL.so \endverbatim You can then run the test suite in the directory socl/examples. \verbatim $ make check ... PASS: basic/basic PASS: testmap/testmap PASS: clinfo/clinfo PASS: matmul/matmul PASS: mansched/mansched ================== All 5 tests passed ================== \endverbatim The environment variable \ref OCL_ICD_VENDORS has to point to the directory where the socl.icd ICD file is installed. When compiling StarPU, the files are in the directory socl/vendors. With an installed version of StarPU, the files are installed in the directory $prefix/share/starpu/opencl/vendors. To run the tests by hand, you have to call, for example, \verbatim $ LD_PRELOAD=$SOCL_OCL_LIB_OPENCL OCL_ICD_VENDORS=socl/vendors/ socl/examples/clinfo/clinfo Number of platforms: 2 Platform Profile: FULL_PROFILE Platform Version: OpenCL 1.1 CUDA 4.2.1 Platform Name: NVIDIA CUDA Platform Vendor: NVIDIA Corporation Platform Extensions: cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll Platform Profile: FULL_PROFILE Platform Version: OpenCL 1.0 SOCL Edition (0.1.0) Platform Name: SOCL Platform Platform Vendor: Inria Platform Extensions: cl_khr_icd .... $ \endverbatim To enable the use of CPU cores via OpenCL, one can set the \ref STARPU_OPENCL_ON_CPUS environment variable to 1 and \ref STARPU_NCPUS to 0 (to avoid using CPUs both via the OpenCL driver and the normal CPU driver). */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/tcpip_support.doxy000066400000000000000000000045601507764646700274650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page TCPIPSupport TCP/IP Support \section TCPIPMasterSlave TCP/IP Master Slave Support StarPU provides a transparent way to execute applications across many nodes. The Master Slave support permits to use remote cores without thinking about data distribution. This support can be activated with the \c configure option \ref enable-tcpip-master-slave "--enable-tcpip-master-slave". The existing kernels for CPU devices can be used as such. They only have to be exposed through the name of the function in the \ref starpu_codelet::cpu_funcs_name field. Functions have to be globally-visible (i.e. not static) for StarPU to be able to look them up, and -rdynamic must be passed to gcc (or -export-dynamic to ld) so that symbols of the main program are visible. By default, one core is dedicated on the master node to manage the entire set of slaves. Choosing the number of cores on each slave device is done by setting the environment variable \ref STARPU_NTCPIPMSTHREADS "STARPU_NTCPIPMSTHREADS=\" with \ being the requested number of cores. By default, all the slave's cores are used. The master should be given the number of slaves that are expected to be run with the \ref STARPU_TCPIP_MS_SLAVES environment variable. The slaves should then be started, and their number also should be given with the \ref STARPU_TCPIP_MS_SLAVES environment variable. They should additionally be given the IP address of the master with the \ref STARPU_TCPIP_MS_MASTER environment variable. For simple local checks, one can use the starpu_tcpipexec tool, which just starts the application several times. Setting the number of slaves nodes is done by changing the -np parameter. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_extensions/transactions.doxy000066400000000000000000000073571507764646700272710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page Transactions Transactions \section TransactionsIdeas General Ideas StarPU's transactions enable the cancellation of a sequence of already submitted tasks based on a just-in-time decision. The purpose of this mechanism is typically for iterative applications to submit tasks for the next iteration ahead of time while leaving some iteration loop criterion (e.g. convergence) to be evaluated just before the first task of the next iteration is about to be scheduled. Such a sequence of collectively cancelable tasks is called a transaction \e epoch. \section TransactionsUsage Usage Some examples illustrating the usage of StarPU's transactions are available in the directory examples/transactions. \subsection TransactionsEpochCancel Epoch Cancellation If the start criterion of an epoch evaluates to \c False, all the tasks for that next epoch are canceled. Thus, StarPU's transactions let applications avoid the use of synchronization barriers commonly found between the task submission sequences of subsequent iterations, and avoid breaking the flow of dependencies in the process. Moreover, while the kernel functions of canceled transaction tasks are not executed, their dependencies are still honored in the proper order. \subsection TransactionsCodelets Transactions Enabled Codelets Codelets for tasks being part of a transaction should set their \c nbuffers field to \ref STARPU_VARIABLE_NBUFFERS. \subsection TransactionsCreation Transaction Creation A struct starpu_transaction opaque object is created using the \ref starpu_transaction_open() function, specifying a transaction start criterion callback and some user argument to be passed to that callback upon the first call. The start criterion callback should return \c True (e.g. \c !0 ) if the next transaction epoch should proceed, or \c False (e.g. \c 0 ) if the tasks belonging to that next epoch should be canceled. \c starpu_transaction_open() submits an internal task to mark the beginning of the transaction. If submitting that internal task fails with ENODEV, \c starpu_transaction_open() will return \c NULL. \subsection TransactionsTasks Transaction Tasks Tasks governed by the same transaction object should be passed that transaction object either through the \c .transaction field of \ref starpu_task structures, using the \ref STARPU_TRANSACTION argument of \ref starpu_task_insert(). \subsection TransactionsEpochNext Epoch Transition The transition from one transaction epoch to the next is expressed using the \ref starpu_transaction_next_epoch function to which the \c starpu_transaction object and a user argument are passed. Upon a call to that function, the start criterion callback is evaluated on users argument to decide whether the next epoch should proceed or be canceled. \subsection TransactionsClosing Transaction Closing The last epoch should be ended through a call to \ref starpu_transaction_close(). \section TransactionsLimitations Known limitations Support for transactions is experimental.
StarPU's transactions are currently not compatible with StarPU-MPI distributed sessions. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_faq/000077500000000000000000000000001507764646700222105ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_faq/check_list_performance.doxy000066400000000000000000000647551507764646700276270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page CheckListWhenPerformanceAreNotThere Check List When Performance Are Not There TODO: improve! To achieve good performance, we give below a list of features which should be checked. For a start, you can use \ref OfflinePerformanceTools to get a Gantt chart which will show roughly where time is spent, and focus correspondingly. \section CheckTaskSize Check Task Size Make sure that your tasks are not too small, as the StarPU runtime overhead may not be negligible. As explained in \ref TaskSizeOverhead, you can run the script \c tasks_size_overhead.sh to get an idea of the scalability of tasks depending on their duration (in µs), on your own system. Typically, 10µs-ish tasks are definitely too small, the CUDA overhead itself is much bigger than this. 1ms-ish tasks may be a good start, but will not necessarily scale to many dozens of cores, so it's better to try to get 10ms-ish tasks. It may be useful to dedicate a whole core to the main thread, so it can spend its time on submitting tasks, by setting the \ref STARPU_MAIN_THREAD_BIND environment variable to 1. Tasks durations can easily be observed when performance models are defined (see \ref PerformanceModelExample) by using the tools starpu_perfmodel_plot or starpu_perfmodel_display (see \ref PerformanceOfCodelets) When using parallel tasks, the problem is even worse since StarPU has to synchronize the tasks execution. \section ConfigurationImprovePerformance Configuration Which May Improve Performance If you do not plan to use support for GPUs or out-of-core, i.e. not use StarPU's ability to manage data coherency between several memory nodes, the \c configure option \ref enable-maxnodes "--enable-maxnodes=1" allows to considerably reduce StarPU's memory management overhead. The \c configure option \ref enable-fast "--enable-fast" disables all assertions. This makes StarPU more performant for tiny tasks by disabling all sanity checks. Only use this for measurements and production, not for development, since this will drop all basic checks. \section DataRelatedFeaturesToImprovePerformance Data Related Features Which May Improve Performance As can be seen in \ref StatesInGantt, if the application has a lot of different kinds of sizes of data, StarPU will end up freeing/reallocating data on GPU to accomodate for the different sizes. It can be very effective to round the allocated size up a bit by e.g. 10% (e.g. 11MB for all data sizes between 10MB and 11MB) so that StarPU will be able to reuse buffers of the same size for data with similar but not exactly same size. This can be registered by using starpu_matrix_data_register_allocsize(), starpu_vector_data_register_allocsize() so that StarPU records both the rounded-up data size, and the actual size used for computation. link to \ref DataManagement link to \ref DataPrefetch \section TaskRelatedFeaturesToImprovePerformance Task Related Features Which May Improve Performance link to \ref TaskGranularity link to \ref TaskSubmission link to \ref TaskPriorities \section SchedulingRelatedFeaturesToImprovePerformance Scheduling Related Features Which May Improve Performance link to \ref TaskSchedulingPolicy link to \ref TaskDistributionVsDataTransfer link to \ref Energy-basedScheduling link to \ref StaticScheduling \section CUDA-specificOptimizations CUDA-specific Optimizations For proper overlapping of asynchronous GPU data transfers, data has to be pinned by CUDA. Data allocated with starpu_malloc() is always properly pinned. If the application registers to StarPU some data which has not been allocated with starpu_malloc(), starpu_memory_pin() should be called to pin the data memory. Otherwise, the "Asynchronous copy submission" parts of the execution traces (see \ref StatesInGantt) will show the synchronous inefficiency. Note that CUDA pinning/unpinning takes a long time, so for e.g. temporary data, it is much more efficient to use a StarPU temporary data (see \ref TemporaryData), that StarPU can reuse and thus avoid the pin/unpin cost. Due to CUDA limitations, StarPU will have a hard time overlapping its own communications and the codelet computations if the application does not use a dedicated CUDA stream for its computations instead of the default stream, which synchronizes all operations of the GPU. The function starpu_cuda_get_local_stream() returns a stream which can be used by all CUDA codelet operations to avoid this issue. For instance: \code{.c} func <<>> (foo, bar); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); \endcode as well as the use of \c cudaMemcpyAsync(), etc. for each CUDA operation one needs to use a version that takes a stream parameter. If the kernel uses its own non-default stream, one can synchronize this stream with the StarPU-provided stream this way: \code{.c} cudaEvent_t event; call_kernel_with_its_own_stream() cudaEventCreateWithFlags(&event, cudaEventDisableTiming); cudaEventRecord(event, get_kernel_stream()); cudaStreamWaitEvent(starpu_cuda_get_local_stream(), event, 0); cudaEventDestroy(event); \endcode This code makes the StarPU-provided stream wait for a new event, which will be triggered by the completion of the kernel. Unfortunately, some CUDA libraries do not have stream variants of kernels. This will seriously lower the potential for overlapping. If some CUDA calls are made without specifying this local stream, synchronization needs to be explicit with cudaDeviceSynchronize() around these calls, to make sure that they get properly synchronized with the calls using the local stream. Notably, \c cudaMemcpy() and \c cudaMemset() are actually asynchronous and need such explicit synchronization! Use \c cudaMemcpyAsync() and \c cudaMemsetAsync() instead. Calling starpu_cublas_init() will ensure StarPU to properly call the CUBLAS library functions, and starpu_cublas_shutdown() will synchronously deinitialize the CUBLAS library on every CUDA device. Some libraries like Magma may however change the current stream of CUBLAS v1, one then has to call starpu_cublas_set_stream() at the beginning of the codelet to make sure that CUBLAS is really using the proper stream. When using CUBLAS v2, starpu_cublas_get_local_handle() can be called to queue CUBLAS kernels with the proper configuration. Similary, calling starpu_cublasLt_init() makes StarPU create CUBLASLT handles on each CUDA device, starpu_cublasLt_get_local_handle() can then be used to queue CUBLASLT kernels with the proper configuration. starpu_cublasLt_shutdown() will synchronously deinitialize the CUBLASLT library on every CUDA device. Since CUBLASLT handles are not bundled with CUDA streams, users should call starpu_cuda_get_local_stream() to get a CUDA stream before calling a CUBLASLT API. Similarly, calling starpu_cusparse_init() makes StarPU create CUSPARSE handles on each CUDA device, starpu_cusparse_get_local_handle() can then be used to queue CUSPARSE kernels with the proper configuration. starpu_cusparse_shutdown() will synchronously deinitialize the CUSPARSE library on every CUDA device. Similarly, calling starpu_cusolver_init() makes StarPU create CUSOLVER handles on each CUDA device, starpu_cusolverDn_get_local_handle(), starpu_cusolverSp_get_local_handle(), starpu_cusolverRf_get_local_handle(), can then be used to queue CUSOLVER kernels with the proper configuration. starpu_cusolver_shutdown() can be used to clear these handles. It is useful to use a ::STARPU_SCRATCH buffer whose size was set to the amount returned by \c cusolver*Spotrf_bufferSize . An example can be seen in \c examples/cholesky If the kernel can be made to only use this local stream or other self-allocated streams, i.e. the whole kernel submission can be made asynchronous, then one should enable asynchronous execution of the kernel. This means setting the flag ::STARPU_CUDA_ASYNC in the corresponding field starpu_codelet::cuda_flags, and dropping the cudaStreamSynchronize() call at the end of the cuda_func function, so that it returns immediately after having queued the kernel to the local stream. That way, StarPU will be able to submit and complete data transfers while kernels are executing, instead of only at each kernel submission. The kernel just has to make sure that StarPU can use the local stream to synchronize with the kernel startup and completion. Using the flag ::STARPU_CUDA_ASYNC also permits to enable concurrent kernel execution, on cards which support it (Kepler and later, notably). This is enabled by setting the environment variable \ref STARPU_NWORKER_PER_CUDA to the number of kernels to be executed concurrently. This is useful when kernels are small and do not feed the whole GPU with threads to run. Concerning memory allocation, you should really not use \c cudaMalloc()/ \c cudaFree() within the kernel, since \c cudaFree() introduces way too many synchronizations within CUDA itself. You should instead add a parameter to the codelet with the ::STARPU_SCRATCH mode access. You can then pass to the task a handle registered with the desired size but with the \c NULL pointer, the handle can even be shared between tasks, StarPU will allocate per-task data on the fly before task execution, and reuse the allocated data between tasks. See examples/pi/pi_redux.c for an example of use. \section OpenCL-specificOptimizations OpenCL-specific Optimizations If the kernel can be made to only use the StarPU-provided command queue or other self-allocated queues, i.e. the whole kernel submission can be made asynchronous, then one should enable asynchronous execution of the kernel. This means setting the flag ::STARPU_OPENCL_ASYNC in the corresponding field starpu_codelet::opencl_flags and dropping the clFinish() and starpu_opencl_collect_stats() calls at the end of the kernel, so that it returns immediately after having queued the kernel to the provided queue. That way, StarPU will be able to submit and complete data transfers while kernels are executing, instead of only at each kernel submission. The kernel just has to make sure that StarPU can use the command queue it has provided to synchronize with the kernel startup and completion. \section DetectionStuckConditions Detecting Stuck Conditions It may happen that StarPU does not make progress for a long period of time. It may be due to contention inside StarPU, but it may also be an external problem, such as a stuck MPI or CUDA driver. export STARPU_WATCHDOG_TIMEOUT=10000 (\ref STARPU_WATCHDOG_TIMEOUT) allows making StarPU print an error message whenever StarPU does not terminate any task for 10ms, but lets the application continue normally. In addition to that, export STARPU_WATCHDOG_CRASH=1 (\ref STARPU_WATCHDOG_CRASH) raises SIGABRT in this condition, thus allowing to catch the situation in \c gdb. It can also be useful to type handle SIGABRT nopass in gdb to be able to let the process continue, after inspecting the state of the process. \section HowToLimitMemoryPerNode How to Limit Memory Used By StarPU And Cache Buffer Allocations By default, StarPU makes sure to use at most 90% of the memory of GPU devices, moving data in and out of the device as appropriate, as well as using prefetch and write-back optimizations. The environment variables \ref STARPU_LIMIT_CUDA_MEM, \ref STARPU_LIMIT_CUDA_devid_MEM, \ref STARPU_LIMIT_OPENCL_MEM, and \ref STARPU_LIMIT_OPENCL_devid_MEM can be used to control how much (in MiB) of the GPU device memory should be used at most by StarPU (the default value is to use 90% of the available memory). By default, the usage of the main memory is not limited, as the default mechanisms do not provide means to evict main memory when it gets too tight. This also means that by default, StarPU will not cache buffer allocations in main memory, since it does not know how much of the system memory it can afford. The environment variable \ref STARPU_LIMIT_CPU_MEM can be used to specify how much (in MiB) of the main memory should be used at most by StarPU for buffer allocations. This way, StarPU will be able to cache buffer allocations (which can be a real benefit if a lot of buffers are involved, or if allocation fragmentation can become a problem), and when using \ref OutOfCore, StarPU will know when it should evict data out to the disk. It should be noted that by default only buffer allocations automatically done by StarPU are accounted here, i.e. allocations performed through starpu_malloc_on_node() which are used by the data interfaces (matrix, vector, etc.). This does not include allocations performed by the application through e.g. malloc(). It does not include allocations performed through starpu_malloc() either, only allocations performed explicitly with the flag \ref STARPU_MALLOC_COUNT, i.e. by calling \code{.c} starpu_malloc_flags(STARPU_MALLOC_COUNT) \endcode are taken into account. And starpu_free_flags() can be called to free the memory that was previously allocated with starpu_malloc_flags(). If the application wants to make StarPU aware of its own allocations, so that StarPU knows precisely how much data is allocated, and thus when to evict allocation caches or data out to the disk, starpu_memory_allocate() can be used to specify an amount of memory to be accounted for. starpu_memory_deallocate() can be used to account freed memory back. Those can for instance be used by data interfaces with dynamic data buffers: instead of using starpu_malloc_on_node(), they would dynamically allocate data with \c malloc()/\c realloc(), and notify StarPU of the delta by calling starpu_memory_allocate() and starpu_memory_deallocate(). By default, the memory management system uses a set of default flags for each node when allocating memory. starpu_malloc_on_node_set_default_flags() can be used to modify these default flags on a specific node. starpu_memory_get_total() and starpu_memory_get_available() can be used to get an estimation of how much memory is available. starpu_memory_wait_available() can also be used to block until an amount of memory becomes available, but it may be preferable to call \code{.c} starpu_memory_allocate(STARPU_MEMORY_WAIT) \endcode to reserve this amount immediately. \section HowToReduceTheMemoryFootprintOfInternalDataStructures How To Reduce The Memory Footprint Of Internal Data Structures It is possible to reduce the memory footprint of the task and data internal structures of StarPU by describing the shape of your machine and/or your application when calling \c configure. To reduce the memory footprint of the data internal structures of StarPU, one can set the \c configure parameters \ref enable-maxcpus "--enable-maxcpus", \ref enable-maxnumanodes "--enable-maxnumanodes", \ref enable-maxcudadev "--enable-maxcudadev", \ref enable-maxopencldev "--enable-maxopencldev" and \ref enable-maxnodes "--enable-maxnodes" to give StarPU the architecture of the machine it will run on, thus tuning the size of the structures to the machine. To reduce the memory footprint of the task internal structures of StarPU, one can set the \c configure parameter \ref enable-maxbuffers "--enable-maxbuffers" to give StarPU the maximum number of buffers that a task can use during an execution. For example, in the Cholesky factorization (dense linear algebra application), the GEMM task uses up to 3 buffers, so it is possible to set the maximum number of task buffers to 3 to run a Cholesky factorization on StarPU. The size of the various structures of StarPU can be printed by tests/microbenchs/display_structures_size. It is also often useless to submit \b all the tasks at the same time. Task submission can be blocked when a reasonable given number of tasks have been submitted, by setting the environment variables \ref STARPU_LIMIT_MIN_SUBMITTED_TASKS and \ref STARPU_LIMIT_MAX_SUBMITTED_TASKS. \code{.sh} export STARPU_LIMIT_MAX_SUBMITTED_TASKS=10000 export STARPU_LIMIT_MIN_SUBMITTED_TASKS=9000 \endcode will make StarPU block submission when 10000 tasks are submitted, and unblock submission when only 9000 tasks are still submitted, i.e. 1000 tasks have completed among the 10000 which were submitted when submission was blocked. Of course this may reduce parallelism if the threshold is set too low. The precise balance depends on the application task graph. These values can also be specified with the functions starpu_set_limit_min_submitted_tasks() and starpu_set_limit_max_submitted_tasks(). An idea of how much memory is used for tasks and data handles can be obtained by setting the environment variable \ref STARPU_MAX_MEMORY_USE to 1. \section HowtoReuseMemory How To Reuse Memory When your application needs to allocate more data than the available amount of memory usable by StarPU (given by starpu_memory_get_available()), the allocation cache system can reuse data buffers used by previously executed tasks. For this system to work with MPI tasks, you need to submit tasks progressively instead of as soon as possible, because in the case of MPI receives, the allocation cache check for reusing data buffers will be done at submission time, not at execution time. There are two options to control the task submission flow. The first one is by controlling the number of submitted tasks during the whole execution. This can be done whether by setting the environment variables \ref STARPU_LIMIT_MAX_SUBMITTED_TASKS and \ref STARPU_LIMIT_MIN_SUBMITTED_TASKS to tell StarPU when to stop submitting tasks and when to wake up and submit tasks again, or by explicitly calling starpu_task_wait_for_n_submitted() in your application code for finest grain control (for example, between two iterations of a submission loop). The second option is to control the memory size of the allocation cache. This can be done in the application by using jointly starpu_memory_get_available() and starpu_memory_wait_available() to submit tasks only when there is enough memory space to allocate the data needed by the task, i.e. when enough data are available for reuse in the allocation cache. \section PerformanceModelCalibration Performance Model Calibration Most schedulers are based on an estimation of codelet duration on each kind of processing unit. For this to be possible, the application programmer needs to configure a performance model for the codelets of the application (see \ref PerformanceModelExample for instance). History-based performance models use on-line calibration. When using a scheduler which requires such performance model, StarPU will automatically calibrate codelets which have never been calibrated yet, and save the result in $STARPU_HOME/.starpu/sampling/codelets. The models are indexed by machine name. They can then be displayed various ways, see \ref PerformanceOfCodelets . By default, StarPU stores separate performance models according to the hostname of the system. To avoid having to calibrate performance models for each node of a homogeneous cluster for instance, the model can be shared by using export STARPU_HOSTNAME=some_global_name (\ref STARPU_HOSTNAME), where some_global_name is the name of the cluster for instance, which thus overrides the hostname of the system. By default, StarPU stores separate performance models for each GPU. To avoid having to calibrate performance models for each GPU of a homogeneous set of GPU devices for instance, the model can be shared by using the environment variables \ref STARPU_PERF_MODEL_HOMOGENEOUS_CUDA, \ref STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL and \ref STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS depending on your GPU device type. \code{.shell} export STARPU_PERF_MODEL_HOMOGENEOUS_CUDA=1 export STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL=1 export STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS=1 \endcode To force continuing calibration, use export STARPU_CALIBRATE=1 (\ref STARPU_CALIBRATE). This may be necessary if your application has not-so-stable performance. It may also be useful to use STARPU_SCHED=eager to get tasks distributed over the various workers. StarPU will force calibration (and thus ignore the current result) until 10 (_STARPU_CALIBRATION_MINIMUM) measurements have been made on each architecture, to avoid bad scheduling decisions just because the first measurements were not so good. Note that StarPU will not record the very first measurement for a given codelet and a given size, because it would most often be hit by computation library loading or initialization. StarPU will also throw measurements away if it notices that after computing an average execution time, it notices that most subsequent tasks have an execution time largely outside the computed average ("Too big deviation for model..." warning messages). By looking at the details of the message and their reported measurements, it can highlight that your computation library really has non-stable measurements, which is probably an indication of an issue in the computation library, or the execution environment (e.g. rogue daemons). Details on the current performance model status can be obtained with the tool starpu_perfmodel_display: the option -l lists the available performance models, and the option -s allows choosing the performance model to be displayed. The result looks like: \verbatim $ starpu_perfmodel_display -s starpu_slu_lu_model_getrf performance model for cpu_impl_0 # hash size flops mean dev n 914f3bef 1048576 0.000000e+00 2.503577e+04 1.982465e+02 8 3e921964 65536 0.000000e+00 5.527003e+02 1.848114e+01 7 e5a07e31 4096 0.000000e+00 1.717457e+01 5.190038e+00 14 ... \endverbatim It shows that for the LU 11 kernel with a 1MiB matrix, the average execution time on CPUs was about 25ms, with a 0.2ms standard deviation, over 8 samples. It is a good idea to check this before doing actual performance measurements. A graph (both in postscript and png format) can be drawn by using the tool starpu_perfmodel_plot: \verbatim $ starpu_perfmodel_plot -s starpu_slu_lu_model_getrf 4096 16384 65536 262144 1048576 4194304 $ gnuplot starpu_starpu_slu_lu_model_getrf.gp $ gv starpu_starpu_slu_lu_model_getrf.eps \endverbatim \image html starpu_starpu_slu_lu_model_11.png \image latex starpu_starpu_slu_lu_model_11.png "" width=\textwidth If a kernel source code was modified (e.g. performance improvement), the calibration information is stale and should be dropped, to re-calibrate from start. This can be done by using export STARPU_CALIBRATE=2 (\ref STARPU_CALIBRATE). Note: history-based performance models get calibrated only if a performance-model-based scheduler is chosen. The history-based performance models can also be explicitly filled by the application without execution, if e.g. the application already has a series of measurements. This can be done by using starpu_perfmodel_update_history(), for instance: \code{.c} static struct starpu_perfmodel perf_model = { .type = STARPU_HISTORY_BASED, .symbol = "my_perfmodel", }; struct starpu_codelet cl = { .cuda_funcs = { cuda_func1, cuda_func2 }, .nbuffers = 1, .modes = {STARPU_W}, .model = &perf_model }; void feed(void) { struct my_measure *measure; struct starpu_task task; starpu_task_init(&task); task.cl = &cl; for (measure = &measures[0]; measure < measures[last]; measure++) { starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, 0, measure->size, sizeof(float)); task.handles[0] = handle; starpu_perfmodel_update_history(&perf_model, &task, STARPU_CUDA_DEFAULT + measure->cudadev, 0, measure->implementation, measure->time); starpu_task_clean(&task); starpu_data_unregister(handle); } } \endcode Measurement has to be provided in milliseconds for the completion time models, and in Joules for the energy consumption models. \section Profiling Profiling A quick view of how many tasks each worker has executed can be obtained by setting export STARPU_WORKER_STATS=1 (\ref STARPU_WORKER_STATS). This is a convenient way to check that execution did happen on accelerators, without penalizing performance with the profiling overhead. The environment variable \ref STARPU_WORKER_STATS_FILE can be defined to specify a filename in which to display statistics, by default statistics are printed on the standard error stream. A quick view of how much data transfers have been issued can be obtained by setting export STARPU_BUS_STATS=1 (\ref STARPU_BUS_STATS). The environment variable \ref STARPU_BUS_STATS_FILE can be defined to specify a filename in which to display statistics, by default statistics are printed on the standard error stream. More detailed profiling information can be enabled by using export STARPU_PROFILING=1 (\ref STARPU_PROFILING) or by calling starpu_profiling_status_set() from the source code. Statistics on the execution can then be obtained by using export STARPU_BUS_STATS=1 and export STARPU_WORKER_STATS=1 . More details on performance feedback are provided in the next chapter. \section OverheadProfiling Overhead Profiling \ref OfflinePerformanceTools can already provide an idea of to what extent and which part of StarPU brings an overhead on the execution time. To get a more precise analysis of which parts of StarPU bring the most overhead, gprof can be used. First, recompile and reinstall StarPU with gprof support: \code ../configure --enable-perf-debug --disable-shared --disable-build-tests --disable-build-examples \endcode Make sure not to leave a dynamic version of StarPU in the target path: remove any remaining libstarpu-*.so Then relink your application with the static StarPU library, make sure that running ldd on your application does not mention any \c libstarpu (i.e. it's really statically-linked). \code gcc test.c -o test $(pkg-config --cflags starpu-1.4) $(pkg-config --libs starpu-1.4) \endcode Now you can run your application, this will create a file gmon.out in the current directory, it can be processed by running gprof on your application: \code gprof ./test \endcode This will dump an analysis of the time spent in StarPU functions. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_faq/faq.doxy000066400000000000000000000553651507764646700237020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page FrequentlyAskedQuestions Frequently Asked Questions \section HowToInitializeAComputationLibraryOnceForEachWorker How To Initialize A Computation Library Once For Each Worker? Some libraries need to be initialized once for each concurrent instance that may run on the machine. For instance, a C++ computation class which is not thread-safe by itself, but for which several instantiated objects of that class can be used concurrently. This can be used in StarPU by initializing one such object per worker. For instance, the libstarpufft example does the following to be able to use FFTW on CPUs. Some global array stores the instantiated objects: \code{.c} fftw_plan plan_cpu[STARPU_NMAXWORKERS]; \endcode At initialization time of libstarpu, the objects are initialized: \code{.c} int workerid; for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { switch (starpu_worker_get_type(workerid)) { case STARPU_CPU_WORKER: plan_cpu[workerid] = fftw_plan(...); break; } } \endcode And in the codelet body, they are used: \code{.c} static void fft(void *descr[], void *_args) { int workerid = starpu_worker_get_id(); fftw_plan plan = plan_cpu[workerid]; ... fftw_execute(plan, ...); } \endcode We call starpu_worker_get_id() to retrieve the worker ID associated with the currently executing task, or call starpu_worker_get_id_check() with the error checking. This however is not sufficient for FFT on CUDA: initialization has to be done from the workers themselves. This can be done thanks to starpu_execute_on_each_worker() or starpu_execute_on_each_worker_ex() with a specified task name, or starpu_execute_on_specific_workers() with specified workers. For instance, libstarpufft does the following. \code{.c} static void fft_plan_gpu(void *args) { plan plan = args; int n2 = plan->n2[0]; int workerid = starpu_worker_get_id(); cufftPlan1d(&plan->plans[workerid].plan_cuda, n, _CUFFT_C2C, 1); cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream()); } void starpufft_plan(void) { starpu_execute_on_each_worker(fft_plan_gpu, plan, STARPU_CUDA); } \endcode \section HardwareTopology Hardware Topology \subsection InteroperabilityHWLOC Interoperability hwloc If hwloc is used, we can call starpu_get_hwloc_topology() to get the hwloc topology used by StarPU, and call starpu_get_pu_os_index() to get the OS index of a PU. We can call starpu_worker_get_hwloc_cpuset() to retrieve the hwloc CPU set associated with a worker. \subsection TopologyMemory Memory There are various functions that we can use to retrieve information of memory node, such as to get the name of a memory node we call starpu_memory_node_get_name() and to get the kind of a memory node we call starpu_node_get_kind(). To retrieve the device ID associated with a memory node we call starpu_memory_node_get_devid(). We can call starpu_worker_get_local_memory_node() to retrieve the local memory node associated with the current worker. We can also specify a worker and call starpu_worker_get_memory_node() to retrieve the associated memory node. To get the type of memory node associated with a kind of worker we call starpu_worker_get_memory_node_kind(). If we want to know the total number of memory nodes in the system we can call starpu_memory_nodes_get_count(), and we can also retrieve the total number of memory nodes in the system that match a specific memory node kind by calling starpu_memory_nodes_get_count_by_kind(). We can call starpu_memory_node_get_ids_by_type() to get the identifiers of memory nodes in the system that match a specific memory node type. To obtain a bitmap representing logical indexes of NUMA nodes we can call starpu_get_memory_location_bitmap(). \subsection TopologyWorkers Workers StarPU provides a range of functions for querying and managing the worker configurations on a given system. One such function is starpu_worker_get_count(), which returns the total number of workers in the system. In addition to this, there are also specific functions to obtain the number of workers associated with various processing units controlled by StarPU: to retrieve the number of CPUs we can call starpu_cpu_worker_get_count(), to retrieve the number of CUDA devices we can call starpu_cuda_worker_get_count(), to retrieve the number of HIP devices we can call starpu_hip_worker_get_count(), to retrieve the number of OpenCL devices we can call starpu_opencl_worker_get_count(), to retrieve the number of MPI Master Slave workers we can call starpu_mpi_ms_worker_get_count(), and to retrieve the number of TCPIP Master Slave workers we can call starpu_tcpip_ms_worker_get_count(). There are various functions that we can use to retrieve information of the worker. We call starpu_worker_get_name() to get the name of the worker, we call starpu_worker_get_devid() to get the device ID of the worker or call starpu_worker_get_devids() to retrieve the list of device IDs that are associated with a worker, and call starpu_worker_get_devnum() to get number of the device controlled by the worker which begin from 0. We call starpu_worker_get_subworkerid() to get the ID of sub-worker for the device. We call starpu_worker_get_sched_ctx_list() to retrieve a list of scheduling contexts that a worker is associated with. We call starpu_worker_get_stream_workerids() to retrieve the list of worker IDs that share the same stream as a given worker. To retrieve the total number of NUMA nodes in the system we call starpu_memory_nodes_get_numa_count(). To get the device identifier associated with a specific NUMA node and to get the NUMA node identifier associated with a specific device we can call starpu_memory_nodes_numa_id_to_devid() and starpu_memory_nodes_numa_devid_to_id() respectively. We can also print out information about the workers currently registered with StarPU. starpu_worker_display_all() prints out information of all workers, starpu_worker_display_names() prints out information of all the workers of the given type, starpu_worker_display_count() prints out the number of workers of the given type. StarPU provides various functions associated to the type of processing unit, such as starpu_worker_get_type(), which returns the type of processing unit associated to the worker, e.g. CPU or CUDA. We can call starpu_worker_get_type_as_string() to retrieve a string representation of the type of a worker or call starpu_worker_get_type_from_string() to retrieve a worker type enumeration value from a string representation of a worker type or call starpu_worker_get_type_as_env_var() to retrieve a string representation of the type of a worker that can be used as an environment variable. Another function, starpu_worker_get_count_by_type(), returns the number of workers of a specific type. starpu_worker_get_ids_by_type() returns a list of worker IDs for a specific type, and starpu_worker_get_by_type() returns the ID of the specific worker that has the specific type, starpu_worker_get_by_devid() returns the ID of the worker that has the specific type and device ID. To get the type of worker associated with a kind of memory node we call starpu_memory_node_get_worker_archtype(). To check if type of processing unit matches one of StarPU's defined worker architectures we can call starpu_worker_archtype_is_valid(), while in order to convert an architecture mask to a worker architecture we can call starpu_arch_mask_to_worker_archtype(). To retrieve the binding ID of the worker associated with the currently executing task we can call starpu_worker_get_bindid(), it is useful for applications that require information about the binding of a particular task to a specific processor. We can call starpu_bindid_get_workerids() to retrieve the list of worker IDs that are bound to a given binding ID. We can call starpu_workers_get_tree() to get information about the tree facilities provided by StarPU. \subsection TopologyBus Bus StarPU provides several functions to declare or retrieve information about the buses in a machine. The function starpu_bus_get_count() can be used to get the total number of buses available. To obtain the identifier of the bus between a source and destination point, the function starpu_bus_get_id() can be called. The source and destination points of a bus can be obtained by calling the functions starpu_bus_get_src() and starpu_bus_get_dst() respectively. Furthermore, users can use the function starpu_bus_set_direct() to declare that there is a direct link between a GPU and memory to the driver. The direct link can significantly reduce data transfer latency and improve overall performance. Moreover, users can use the function starpu_bus_get_direct() to retrieve information about whether a direct link has been established between a GPU and memory using the starpu_bus_set_direct() function. starpu_bus_set_ngpus() and starpu_bus_get_ngpus() functions can be used to declare and retrieve the number of GPUs of this bus that users need. \section UsingTheDriverAPI Using The Driver API \ref API_Running_Drivers \code{.c} int ret; struct starpu_driver = { .type = STARPU_CUDA_WORKER, .id.cuda_id = 0 }; ret = starpu_driver_init(&d); if (ret != 0) error(); while (some_condition) { ret = starpu_driver_run_once(&d); if (ret != 0) error(); } ret = starpu_driver_deinit(&d); if (ret != 0) error(); \endcode same as: \code{.c} int ret; struct starpu_driver = { .type = STARPU_CUDA_WORKER, .id.cuda_id = 0 }; ret = starpu_driver_run(&d); if (ret != 0) error(); \endcode The function starpu_driver_run() initializes the given driver, run it until starpu_drivers_request_termination() is called. To add a new kind of device to the structure starpu_driver, one needs to:
  1. Add a member to the union starpu_driver::id
  2. Modify the internal function _starpu_launch_drivers() to make sure the driver is not always launched.
  3. Modify the function starpu_driver_run() so that it can handle another kind of architecture. The function starpu_driver_run() is equal to call starpu_driver_init(), then to call starpu_driver_run_once() in a loop, and finally to call starpu_driver_deinit().
  4. Write the new function _starpu_run_foobar() in the corresponding driver.
\section On-GPURendering On-GPU Rendering Graphical-oriented applications need to draw the result of their computations, typically on the very GPU where these happened. Technologies such as OpenGL/CUDA interoperability permit to let CUDA directly work on the OpenGL buffers, making them thus immediately ready for drawing, by mapping OpenGL buffer, textures or renderbuffer objects into CUDA. CUDA however imposes some technical constraints: peer memcpy has to be disabled, and the thread that runs OpenGL has to be the one that runs CUDA computations for that GPU. To achieve this with StarPU, pass the option \ref disable-cuda-memcpy-peer "--disable-cuda-memcpy-peer" to configure (TODO: make it dynamic), OpenGL/GLUT has to be initialized first, and the interoperability mode has to be enabled by using the field starpu_conf::cuda_opengl_interoperability, and the driver loop has to be run by the application, by using the field starpu_conf::not_launched_drivers to prevent StarPU from running it in a separate thread, and by using starpu_driver_run() to run the loop. The examples gl_interop and gl_interop_idle show how it articulates in a simple case, where rendering is done in task callbacks. The former uses glutMainLoopEvent to make GLUT progress from the StarPU driver loop, while the latter uses glutIdleFunc to make StarPU progress from the GLUT main loop. Then, to use an OpenGL buffer as a CUDA data, StarPU simply needs to be given the CUDA pointer at registration, for instance: \code{.c} /* Get the CUDA worker id */ for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) if (starpu_worker_get_type(workerid) == STARPU_CUDA_WORKER) break; /* Build a CUDA pointer pointing at the OpenGL buffer */ cudaGraphicsResourceGetMappedPointer((void**)&output, &num_bytes, resource); /* And register it to StarPU */ starpu_vector_data_register(&handle, starpu_worker_get_memory_node(workerid), output, num_bytes / sizeof(float4), sizeof(float4)); /* The handle can now be used as usual */ starpu_task_insert(&cl, STARPU_RW, handle, 0); /* ... */ /* This gets back data into the OpenGL buffer */ starpu_data_unregister(handle); \endcode and display it e.g. in the callback function. \section UsingStarPUWithMKL Using StarPU With MKL 11 (Intel Composer XE 2013) Some users had issues with MKL 11 and StarPU (versions 1.1rc1 and 1.0.5) on Linux with MKL, using 1 thread for MKL and doing all the parallelism using StarPU (no multithreaded tasks), setting the environment variable MKL_NUM_THREADS to 1, and using the threaded MKL library, with iomp5. Using this configuration, StarPU only uses 1 core, no matter the value of \ref STARPU_NCPU. The problem is actually a thread pinning issue with MKL. The solution is to set the environment variable KMP_AFFINITY to disabled (http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011Update/compiler_c/optaps/common/optaps_openmp_thread_affinity.htm). \section ThreadBindingOnNetBSD Thread Binding on NetBSD When using StarPU on a NetBSD machine, if the topology discovery library hwloc is used, thread binding will fail. To prevent the problem, you should at least use the version 1.7 of hwloc, and also issue the following call: \verbatim $ sysctl -w security.models.extensions.user_set_cpu_affinity=1 \endverbatim Or add the following line in the file /etc/sysctl.conf \verbatim security.models.extensions.user_set_cpu_affinity=1 \endverbatim \section StarPUEatsCPUs StarPU permanently eats 100% of all CPUs Yes, this is on purpose. By default, StarPU uses active polling on task queues to minimize wake-up latency for better overall performance. We can call starpu_is_paused() to check whether the task processing by workers has been paused or not. If eating CPU time is a problem (e.g. application running on a desktop), pass option \ref enable-blocking-drivers "--enable-blocking-drivers" to configure. This will add some overhead when putting CPU workers to sleep or waking them, but avoid eating 100% CPU permanently. \section PauseResume Interleaving StarPU and non-StarPU code If your application only partially uses StarPU, and you do not want to call starpu_init() / starpu_shutdown() at the beginning/end of each section, StarPU workers will poll for work between the sections. To avoid this behavior, you can "pause" StarPU with the starpu_pause() function. This will prevent the StarPU workers from accepting new work (tasks that are already in progress will not be frozen), and stop them from polling for more work. Note that this does not prevent you from submitting new tasks, but they won't execute until starpu_resume() is called. Also note that StarPU must not be paused when you call starpu_shutdown(), and that this function pair works in a push/pull manner, i.e. you need to match the number of calls to these functions to clear their effect. One way to use these functions could be: \code{.c} starpu_init(NULL); starpu_worker_wait_for_initialisation(); // Wait for the worker to complete its initialization process starpu_pause(); // To submit all the tasks without a single one executing submit_some_tasks(); starpu_resume(); // The tasks start executing starpu_task_wait_for_all(); starpu_pause(); // Stop the workers from polling // Non-StarPU code starpu_resume(); // ... starpu_shutdown(); \endcode \section GPUEatingCores When running with CUDA or OpenCL devices, I am seeing less CPU cores Yes, this is on purpose. Since GPU devices are way faster than CPUs, StarPU needs to react quickly when a task is finished, to feed the GPU with another task (StarPU actually submits a couple of tasks in advance to pipeline this, but filling the pipeline still has to be happening often enough), and thus it has to dedicate threads for this, and this is a very CPU-consuming duty. StarPU thus dedicates one CPU core for driving each GPU by default. Such dedication is also useful when a codelet is hybrid, i.e. while kernels are running on the GPU, the codelet can run some computation, which thus be run by the CPU core instead of driving the GPU. One can choose to dedicate only one thread for all the CUDA devices by setting the \ref STARPU_CUDA_THREAD_PER_DEV environment variable to \c 1. The application however should use ::STARPU_CUDA_ASYNC on its CUDA codelets (asynchronous execution), otherwise the execution of a synchronous CUDA codelet will monopolize the thread, and other CUDA devices will thus starve while it is executing. \section CUDADrivers StarPU does not see my CUDA device First, make sure that CUDA is properly running outside StarPU: build and run the following program with \c -lcudart : \code{.c} #include #include #include int main(void) { int n, i, version; cudaError_t err; err = cudaGetDeviceCount(&n); if (err) { fprintf(stderr,"cuda error %d\n", err); exit(1); } cudaDriverGetVersion(&version); printf("driver version %d\n", version); cudaRuntimeGetVersion(&version); printf("runtime version %d\n", version); printf("\n"); for (i = 0; i < n; i++) { struct cudaDeviceProp props; printf("CUDA%d\n", i); err = cudaGetDeviceProperties(&props, i); if (err) { fprintf(stderr,"cudaGetDeviceProperties cuda error %d\n", err); continue; } printf("%s\n", props.name); printf("%0.3f GB\n", (float) props.totalGlobalMem / (1<<30)); printf("%u MP\n", props.multiProcessorCount); printf("\n"); err = cudaSetDevice(i); if (err) { fprintf(stderr,"cudaSetDevice(%d) cuda error %d\n", err, i); continue; } err = cudaFree(0); if (err) { fprintf(stderr,"cudaFree(0) on %d cuda error %d\n", err, i); continue; } } return 0; } \endcode If that program does not find your device, the problem is not at the StarPU level, but with the CUDA drivers, check the documentation of your CUDA setup. This program is available in the source directory of StarPU in tools/gpus/check_cuda.c, along with another CUDA program tools/gpus/cuda_list.cu. \section HIPDrivers StarPU does not see my HIP device First, make sure that HIP is properly running outside StarPU: build and run the following program with \c hipcc : \code{.c} #include #include #include int main(void) { int i, cnt; hipError_t hipres; hipres = hipGetDeviceCount(&cnt); if (hipres) { fprintf(stderr,"hip error: <%s>\n", hipGetErrorString(hipres)); exit(1); } printf("number of hip devices: %d\n", cnt); for (i = 0; i < cnt; i++) { struct hipDeviceProp_t props; printf("HIP%d\n", i); hipres = hipGetDeviceProperties(&props, i); if (hipres) { fprintf(stderr,"hip error: <%s>\n", hipGetErrorString(hipres)); continue; } printf("%s\n", props.name); printf("%0.3f GB\n", (float) props.totalGlobalMem / (1<<30)); printf("%u MP\n", props.multiProcessorCount); printf("\n"); } return 0; } \endcode If that program does not find your device, the problem is not at the StarPU level, but with the HIP drivers, check the documentation of your HIP setup. This program is available in the source directory of StarPU in tools/gpus/check_hip.c. \section OpenCLDrivers StarPU does not see my OpenCL device First, make sure that OpenCL is properly running outside StarPU: build and run the following program with \c -lOpenCL : \code{.c} #include #include #include int main(void) { cl_device_id did[16]; cl_int err; cl_platform_id pid, pids[16]; cl_uint nbplat, nb; char buf[128]; size_t size; int i, j; err = clGetPlatformIDs(sizeof(pids)/sizeof(pids[0]), pids, &nbplat); assert(err == CL_SUCCESS); printf("%u platforms\n", nbplat); for (j = 0; j < nbplat; j++) { pid = pids[j]; printf(" platform %d\n", j); err = clGetPlatformInfo(pid, CL_PLATFORM_VERSION, sizeof(buf)-1, buf, &size); assert(err == CL_SUCCESS); buf[size] = 0; printf(" platform version %s\n", buf); err = clGetDeviceIDs(pid, CL_DEVICE_TYPE_ALL, sizeof(did)/sizeof(did[0]), did, &nb); if (err == CL_DEVICE_NOT_FOUND) nb = 0; else assert(err == CL_SUCCESS); printf("%d devices\n", nb); for (i = 0; i < nb; i++) { err = clGetDeviceInfo(did[i], CL_DEVICE_VERSION, sizeof(buf)-1, buf, &size); buf[size] = 0; printf(" device %d version %s\n", i, buf); } } return 0; } \endcode If that program does not find your device, the problem is not at the StarPU level, but with the OpenCL drivers, check the documentation of your OpenCL implementation. This program is available in the source directory of StarPU in tools/gpus/check_opencl.c. \section CUDACopyError There seems to be errors when copying to and from CUDA devices You should first try to disable asynchronous copies between CUDA and CPU workers. You can either do that with the configuration parameter \ref disable-asynchronous-cuda-copy "--disable-asynchronous-cuda-copy" or with the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY. If your application keeps failing, you will find in the source directory of StarPU, a directory named tools/gpus with various programs. cuda_copy.cu is testing the direct or undirect copy between CUDA devices. You can also try to just disable the direct gpu-gpu transfers (known to fail under some hardware/cuda combinations) by setting the \ref STARPU_ENABLE_CUDA_GPU_GPU_DIRECT environment variable to 0. \section IncorrectPerformanceModelFile I keep getting a "Incorrect performance model file" error The performance model file, used by StarPU to record the performance of codelets, seem to have been corrupted. Perhaps a previous run of StarPU stopped abruptly, and thus could not save it properly. You can have a look at the file if you can fix it, but the simplest way is to just remove the file and run again, StarPU will just have to re-perform calibration for the corresponding codelet. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_faq/faq_intro.doxy000066400000000000000000000024271507764646700251040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \intropage{IntroFAQ, --------- StarPU FAQs ---------} \webforeword This part explains how to better tune your application to achieve good performance, and also how to fix some difficulties you may encounter while implementing your applications.
  • We give a list of features in Chapter \ref CheckListWhenPerformanceAreNotThere which should be checked to improve performances of your applications.
  • There are some frequently asked questions in Chapter \ref FrequentlyAskedQuestions that may help you to solve your problems.
If you have problems that cannot be solved, please contact us. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_installation/000077500000000000000000000000001507764646700241425ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_installation/building.doxy000066400000000000000000000212531507764646700266470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page BuildingAndInstallingStarPU Building and Installing StarPU Depending on the level of customization required for the library installation, we offer several solutions.
  1. Basic Installation or Evaluation: If you are looking to simply try out the library, assess its performance on simple cases, run examples, or use the latest stable version, we recommend the following options:
    • For Linux Debian or Ubuntu distributions, consider using the latest StarPU Debian package (see \ref InstallingABinaryPackage).
    • For macOS, you can opt for Brew and follow the steps in \ref InstallingASourcePackage.
    • Using an already installed module on a cluster, as explained in \ref UsingModule
  2. Customization for Specific Needs: If you intend to use StarPU but require modifications, such as switching to another version (git branch), changing the default MPI, utilizing a preferred compiler, or altering source code, consider these options:
    • Guix or Spack can be useful, as these package managers allow dynamic changes during source-based builds. Refer to \ref InstallingASourcePackage for details.
    • Alternatively, you can directly build from the source using the native build system of the library (Makefile, GNU autotools). Instructions can be found in \ref InstallingFromSource.
  3. Experiment Reproducibility: If your focus is on experiment reproducibility, we recommend using Guix. Refer to \ref InstallingASourcePackage for guidance.
Whichever solution you choose, you can utilize the tool bin/starpu_config to view all the configuration parameters used during StarPU installation. Please refer to the provided documentation for specific installation steps and details for each solution. \section InstallingABinaryPackage Installing a Binary Package One of the StarPU developers being a Debian Developer, the packages are well integrated and very up-to-date. To see which packages are available, simply type: \verbatim $ apt-cache search starpu \endverbatim To install what you need, type for example: \verbatim $ sudo apt-get install libstarpu-dev \endverbatim \section InstallingASourcePackage Installing a Source Package StarPU is available from different package managers.
  • Guix https://gitlab.inria.fr/guix-hpc/guix-hpc
  • Spack https://github.com/spack/spack/
  • Brew https://gitlab.inria.fr/solverstack/brew-repo
Documentation on how to install StarPU with these package managers is directly available from the links specified above. We give below a brief overview of the spack installation. \subsection SpackInstallation Installing the Spack Package Here is a quick guide to install StarPU with spack. \verbatim $ git clone git@github.com:spack/spack.git $ source ./spack/share/spack/setup-env.sh # if you use bash or zsh $ spack install starpu \endverbatim By default, the latest release will be installed, one can choose to install a specific release or even the master version. \verbatim $ spack install starpu@master $ spack install starpu@1.3.5 \endverbatim We strongly advise reading the detailed reference manual at https://spack.readthedocs.io/en/latest/getting_started.html \subsection UsingModule Using a Module On some clusters, StarPU is provided as a module, for example on the Jean Zay cluster. The information is available at http://www.idris.fr/jean-zay/cpu/jean-zay-cpu-starpu.html \section InstallingFromSource Building from Source StarPU can be built and installed by the standard means of the GNU autotools. The following chapter is intended to briefly remind how these tools can be used to install StarPU. \subsection OptionalDependencies Optional Dependencies The hwloc (http://www.open-mpi.org/software/hwloc) topology discovery library is not mandatory to use StarPU, but strongly recommended. It allows for topology aware scheduling, which improves performance. hwloc is available in major free operating system distributions, and for most operating systems. Make sure to not only install a hwloc or libhwloc package, but also hwloc-devel or libhwloc-dev to have \c hwloc headers etc. If libhwloc is installed in a standard location, no option is required, it will be detected automatically, otherwise \ref with-hwloc "--with-hwloc=" should be used to specify its location. If libhwloc is not available on your system, the option \ref without-hwloc "--without-hwloc" should be explicitly given when calling the script configure. \subsection GettingSources Getting Sources StarPU's sources can be obtained from the download page of the StarPU website (https://starpu.gitlabpages.inria.fr/files/). All releases and the development tree of StarPU are freely available on StarPU SCM server under the LGPL license. Some releases are available under the BSD license. The latest release can be downloaded from the StarPU download page (https://starpu.gitlabpages.inria.fr/files/). The latest nightly snapshot can be downloaded from the StarPU website (https://starpu.gitlabpages.inria.fr/files/testing/). And finally, the current development version is also accessible via git. It should only be used if you need the very latest changes (i.e. less than a day old!). \verbatim $ git clone git@gitlab.inria.fr:starpu/starpu.git \endverbatim \subsection ConfiguringStarPU Configuring StarPU Running autogen.sh is not necessary when using the tarball releases of StarPU. However, when using the source code from the git repository, you first need to generate the script configure and the different Makefiles. This requires the availability of autoconf and automake >= 2.60. \verbatim $ ./autogen.sh \endverbatim You then need to configure StarPU. Details about options that are useful to give to configure are given in \ref CompilationConfiguration. \verbatim $ ./configure \endverbatim If configure does not detect some software or produces errors, please make sure to post the contents of the file config.log when reporting the issue. By default, the files produced during the compilation are placed in the source directory. As the compilation generates a lot of files, it is advised to put them all in a separate directory. It is then easier to clean up, and this allows to compile several configurations out of the same source tree. To do so, simply enter the directory where you want the compilation to produce its files, and invoke the script configure located in the StarPU source directory. \verbatim $ mkdir build $ cd build $ ../configure \endverbatim By default, StarPU will be installed in /usr/local/bin, /usr/local/lib, etc. You can specify an installation prefix other than /usr/local using the option --prefix, for instance: \verbatim $ ../configure --prefix=$HOME/starpu \endverbatim \subsection BuildingStarPU Building StarPU \verbatim $ make \endverbatim Once everything is built, you may want to test the result. An extensive set of regression tests is provided with StarPU. Running the tests is done by calling make check (by setting the variable \c STARPU_MICROBENCHS_DISABLED to disable benchmarks) These tests are run every night and the result from the main profile is publicly available (https://starpu.gitlabpages.inria.fr/files/testing/master/). \verbatim $ STARPU_MICROBENCHS_DISABLED=1 make check \endverbatim \subsection InstallingStarPU Installing StarPU In order to install StarPU at the location which was specified during configuration: \verbatim $ make install \endverbatim If you have let StarPU install in /usr/local/, you additionally need to run \verbatim $ sudo ldconfig \endverbatim so the libraries can be found by the system. Libtool interface versioning information are included in libraries names (libstarpu-1.4.so, libstarpumpi-1.4.so and libstarpufft-1.4.so). */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_installation/configuration_and_initialization.doxy000066400000000000000000000035251507764646700336540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page ConfigurationAndInitialization Configuration and Initialization This section explains the relationship between configure options, compilation options and environment variables used by StarPU.
  1. Configure options are used during the installation process to enable or disable specific features and libraries. These options are set using flags like \ref enable-maxcpus "--enable-maxcpus", which can be used to set the maximum number of CPUs that can be used by StarPU.
  2. Compilation options are used to set specific parameters during the compilation process, such as the optimization level, architecture type, and debugging options.
  3. Environment variables are used to set runtime parameters and control the behavior of the StarPU library. For example, the \ref STARPU_NCPUS environment variable can be used to specify the number of CPUs to use at runtime, overriding the value set during compilation or installation.
Options can also be set with the different fields of the ::starpu_conf parameter given to starpu_init(), such as starpu_conf::ncpus, which is used to specify the number of CPUs that StarPU should use for computations. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_installation/configure_options.doxy000066400000000000000000000614771507764646700306220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page CompilationConfiguration Compilation Configuration The behavior of the StarPU library and tools may be tuned thanks to the following configure options. \section CommonConfiguration Common Configuration
--enable-debug
\anchor enable-debug \addindex __configure__--enable-debug Enable debugging messages.
--enable-spinlock-check
\anchor enable-spinlock-check \addindex __configure__--enable-spinlock-check Enable checking that spinlocks are taken and released properly.
--enable-fast
\anchor enable-fast \addindex __configure__--enable-fast Disable assertion checks, which saves computation time.
--enable-verbose
\anchor enable-verbose \addindex __configure__--enable-verbose Increase the verbosity of the debugging messages. This can be disabled at runtime by setting the environment variable \ref STARPU_SILENT to any value. --enable-verbose=extra increase even more the verbosity. \verbatim $ STARPU_SILENT=1 ./vector_scal \endverbatim
--enable-coverage
\anchor enable-coverage \addindex __configure__--enable-coverage Enable flags for the coverage tool gcov.
--enable-quick-check
\anchor enable-quick-check \addindex __configure__--enable-quick-check Specify tests and examples should be run on a smaller data set, i.e allowing a faster execution time
--enable-long-check
\anchor enable-long-check \addindex __configure__--enable-long-check Enable some exhaustive checks which take a really long time.
--enable-new-check
\anchor enable-new-check \addindex __configure__--enable-new-check Enable new testcases which are known to fail.
--with-hwloc
\anchor with-hwloc \addindex __configure__--with-hwloc Specify hwloc should be used by StarPU. hwloc should be found by the means of the tool pkg-config.
--with-hwloc=prefix
\anchor with-hwloc-prefix \addindex __configure__--with-hwloc-prefix Specify hwloc should be used by StarPU. hwloc should be found in the directory specified by prefix
--without-hwloc
\anchor without-hwloc \addindex __configure__--without-hwloc Specify hwloc should not be used by StarPU.
--disable-build-doc
\anchor disable-build-doc \addindex __configure__--disable-build-doc Disable the creation of the documentation. This should be done on a machine which does not have the tools doxygen and latex (plus the packages latex-xcolor and texlive-latex-extra).
--enable-build-doc-pdf
\anchor enable-build-doc-pdf \addindex __configure__--enable-build-doc-pdf By default, only the HTML documentation is generated. Use this option to also enable the generation of the PDF documentation. This should be done on a machine which does have the tools doxygen and latex (plus the packages latex-xcolor and texlive-latex-extra).
--enable-icc
\anchor enable-icc \addindex __configure__--enable-icc Enable the compilation of specific ICC examples. StarPU itself will not be compiled with ICC unless specified with CC=icc
--disable-icc
\anchor disable-icc \addindex __configure__--disable-icc Disable the usage of the ICC compiler. Otherwise, when a ICC compiler is found, some specific ICC examples are compiled as explained above.
--with-check-flags
\anchor with-check-flags \addindex __configure__--with-check-flags Specify flags which will be given to C, CXX and Fortran compilers when valid
Additionally, the script configure recognize many variables, which can be listed by typing ./configure --help. For example, ./configure NVCCFLAGS="-arch sm_20" adds a flag for the compilation of CUDA kernels, and NVCC_CC=gcc-5 allows to change the C++ compiler used by nvcc. \section ConfiguringWorkers Configuring Workers
--enable-data-locality-enforce
\anchor enable-data-locality-enforce \addindex __configure__--enable-data-locality-enforce Enable data locality enforcement when picking up a worker to execute a task. This mechanism is by default disabled.
--enable-blocking-drivers
\anchor enable-blocking-drivers \addindex __configure__--enable-blocking-drivers By default, StarPU keeps CPU workers awake permanently, for better reactivity. This option makes StarPU put CPU workers to real sleep when there are not enough tasks to compute.
--enable-worker-callbacks
\anchor enable-worker-callbacks \addindex __configure__--enable-worker-callbacks If blocking drivers are enabled, enable callbacks to notify an external resource manager about workers going to sleep and waking up.
--enable-maxcpus=count
\anchor enable-maxcpus \addindex __configure__--enable-maxcpus Use at most count CPU cores. This information is then available as the macro ::STARPU_MAXCPUS. The default value is auto. it allows StarPU to automatically detect the number of CPUs on the build machine. This should not be used if the running host has a larger number of CPUs than the build machine.
--enable-maxnumanodes=count
\anchor enable-maxnumanodes \addindex __configure__--enable-maxnumanodes Use at most count NUMA nodes. This information is then available as the macro ::STARPU_MAXNUMANODES. The default value is auto. it allows StarPU to automatically detect the number of NUMA nodes on the build machine. This should not be used if the running host has a larger number of NUMA nodes than the build machine.
--disable-cpu
\anchor disable-cpu \addindex __configure__--disable-cpu Disable the use of CPUs of the machine. Only GPUs etc. will be used.
--enable-maxcudadev=count
\anchor enable-maxcudadev \addindex __configure__--enable-maxcudadev Use at most count CUDA devices. This information is then available as the macro ::STARPU_MAXCUDADEVS.
--disable-cuda
\anchor disable-cuda \addindex __configure__--disable-cuda Disable the use of CUDA, even if a valid CUDA installation was detected.
--with-cuda-dir=prefix
\anchor with-cuda-dir \addindex __configure__--with-cuda-dir Search for CUDA under prefix, which should notably contain the file include/cuda.h.
--with-cuda-include-dir=dir
\anchor with-cuda-include-dir \addindex __configure__--with-cuda-include-dir Search for CUDA headers under dir, which should notably contain the file cuda.h. This defaults to /include appended to the value given to \ref with-cuda-dir "--with-cuda-dir".
--with-cuda-lib-dir=dir
\anchor with-cuda-lib-dir \addindex __configure__--with-cuda-lib-dir Search for CUDA libraries under dir, which should notably contain the CUDA shared libraries---e.g., libcuda.so. This defaults to /lib appended to the value given to \ref with-cuda-dir "--with-cuda-dir".
--disable-cuda-memcpy-peer
\anchor disable-cuda-memcpy-peer \addindex __configure__--disable-cuda-memcpy-peer Explicitly disable peer transfers when using CUDA 4.0.
--enable-maxopencldev=count
\anchor enable-maxopencldev \addindex __configure__--enable-maxopencldev Use at most count OpenCL devices. This information is then available as the macro ::STARPU_MAXOPENCLDEVS.
--disable-opencl
\anchor disable-opencl \addindex __configure__--disable-opencl Disable the use of OpenCL, even if the SDK is detected.
--with-opencl-dir=prefix
\anchor with-opencl-dir \addindex __configure__--with-opencl-dir Search for an OpenCL implementation under prefix, which should notably contain include/CL/cl.h (or include/OpenCL/cl.h on Mac OS).
--with-opencl-include-dir=dir
\anchor with-opencl-include-dir \addindex __configure__--with-opencl-include-dir Search for OpenCL headers under dir, which should notably contain CL/cl.h (or OpenCL/cl.h on Mac OS). This defaults to /include appended to the value given to \ref with-opencl-dir "--with-opencl-dir".
--with-opencl-lib-dir=dir
\anchor with-opencl-lib-dir \addindex __configure__--with-opencl-lib-dir Search for an OpenCL library under dir, which should notably contain the OpenCL shared libraries---e.g. libOpenCL.so. This defaults to /lib appended to the value given to \ref with-opencl-dir "--with-opencl-dir".
--enable-opencl-simulator
\anchor enable-opencl-simulator \addindex __configure__--enable-opencl-simulator Enable considering the provided OpenCL implementation as a simulator, i.e. use the kernel duration returned by OpenCL profiling information as wallclock time instead of the actual measured real time. This requires the SimGrid support.
--enable-maximplementations=count
\anchor enable-maximplementations \addindex __configure__--enable-maximplementations Allow for at most count codelet implementations for the same target device. This information is then available as the macro ::STARPU_MAXIMPLEMENTATIONS macro.
--enable-max-sched-ctxs=count
\anchor enable-max-sched-ctxs \addindex __configure__--enable-max-sched-ctxs Allow for at most count scheduling contexts This information is then available as the macro ::STARPU_NMAX_SCHED_CTXS.
--disable-asynchronous-copy
\anchor disable-asynchronous-copy \addindex __configure__--disable-asynchronous-copy Disable asynchronous copies between CPU and GPU devices. The AMD implementation of OpenCL is known to fail when copying data asynchronously. When using this implementation, it is therefore necessary to disable asynchronous data transfers.
--disable-asynchronous-cuda-copy
\anchor disable-asynchronous-cuda-copy \addindex __configure__--disable-asynchronous-cuda-copy Disable asynchronous copies between CPU and CUDA devices.
--disable-asynchronous-opencl-copy
\anchor disable-asynchronous-opencl-copy \addindex __configure__--disable-asynchronous-opencl-copy Disable asynchronous copies between CPU and OpenCL devices. The AMD implementation of OpenCL is known to fail when copying data asynchronously. When using this implementation, it is therefore necessary to disable asynchronous data transfers.
--disable-asynchronous-hip-copy
\anchor disable-asynchronous-hip-copy \addindex __configure__--disable-asynchronous-hip-copy Disable asynchronous copies between CPU and HIP devices.
--disable-asynchronous-mpi-master-slave-copy
\anchor disable-asynchronous-mpi-master-slave-copy \addindex __configure__--disable-asynchronous-mpi-master-slave-copy Disable asynchronous copies between CPU and MPI Slave devices.
--disable-asynchronous-tcpip-master-slave-copy
\anchor disable-asynchronous-tcpip-master-slave-copy \addindex __configure__--disable-asynchronous-mpi-master-slave-copy Disable asynchronous copies between CPU and MPI Slave devices.
--disable-asynchronous-fpga-copy
\anchor disable-asynchronous-fpga-copy \addindex __configure__--disable-asynchronous-fpga-copy Disable asynchronous copies between CPU and Maxeler FPGA devices.
--enable-maxnodes=count
\anchor enable-maxnodes \addindex __configure__--enable-maxnodes Use at most count memory nodes. This information is then available as the macro ::STARPU_MAXNODES. Reducing it allows to considerably reduce memory used by StarPU data structures.
--with-max-fpga=dir
\anchor with-max-fpga \addindex __configure__--with-max-fpga Enable the Maxeler FPGA driver support, and optionally specify the location of the Maxeler FPGA library.
--disable-asynchronous-max-fpga-copy
\anchor disable-asynchronous-max-fpga-copy \addindex __configure__--disable-asynchronous-max-fpga-copy Disable asynchronous copies between CPU and Maxeler FPGA devices.
\section ExtensionConfiguration Extension Configuration
--enable-starpupy
\anchor enable-starpupy \addindex __configure__--enable-starpupy Enable the StarPU Python Interface (\ref PythonInterface)
--enable-python-multi-interpreter
\anchor enable-python-multi-interpreter \addindex __configure__--enable-python-multi-interpreter Enable the use of multiple interpreters in the StarPU Python Interface (\ref MultipleInterpreters)
--disable-mpi
\anchor disable-mpi \addindex __configure__--disable-mpi Disable the build of libstarpumpi. By default, it is enabled when MPI is found.
--enable-mpi
\anchor enable-mpi \addindex __configure__--enable-mpi Enable the build of libstarpumpi. This is necessary when using Simgrid+MPI.
--with-mpicc=path
\anchor with-mpicc \addindex __configure__--with-mpicc Use the compiler mpicc at path, for StarPU-MPI. (\ref MPISupport).
--enable-mpi-pedantic-isend
\anchor enable-mpi-pedantic-isend \addindex __configure__--enable-mpi-pedantic-isend Before performing any MPI communication, StarPU-MPI waits for the data to be available in the main memory of the node submitting the request. For send communications, data is acquired with the mode ::STARPU_R. When enabling the pedantic mode, data are instead acquired with the ::STARPU_RW which thus ensures that there is not more than 1 concurrent \c MPI_Isend calls accessing the data and StarPU does not read from it from tasks during the communication.
--enable-mpi-master-slave
\anchor enable-mpi-master-slave \addindex __configure__--enable-mpi-master-slave Enable the MPI Master-Slave support. By default, it is disabled.
--enable-mpi-verbose
\anchor enable-mpi-verbose \addindex __configure__--enable-mpi-verbose Increase the verbosity of the MPI debugging messages. This can be disabled at runtime by setting the environment variable \ref STARPU_SILENT to any value. --enable-mpi-verbose=extra increase even more the verbosity. \verbatim $ STARPU_SILENT=1 mpirun -np 2 ./insert_task \endverbatim
--enable-mpi-ft
\anchor enable-mpi-ft \addindex __configure__--enable-mpi-ft Enable the MPI checkpoint mechanism. See \ref API_MPI_FT_Support
--enable-mpi-ft-stats
\anchor enable-mpi-ft-stats \addindex __configure__--enable-mpi-ft-stats Enable the statistics for the MPI checkpoint mechanism. See \ref API_MPI_FT_Support
--enable-tcpip-master-slave
\anchor enable-tcpip-master-slave \addindex __configure__--enable-mpi-master-slave Enable the TCP/IP Master-Slave support (\ref TCPIPSupport). By default, it is disabled.
--enable-nmad
\anchor enable-nmad \addindex __configure__--enable-nmad Enable the NewMadeleine implementation for StarPU-MPI. See \ref Nmad for more details.
--disable-fortran
\anchor disable-fortran \addindex __configure__--disable-fortran Disable the fortran extension. By default, it is enabled when a fortran compiler is found.
--disable-socl
\anchor disable-socl \addindex __configure__--disable-socl Disable the SOCL extension (\ref SOCLOpenclExtensions). By default, it is enabled when an OpenCL implementation is found.
--enable-openmp
\anchor enable-openmp \addindex __configure__--enable-openmp Enable OpenMP Support (\ref OpenMPRuntimeSupport)
--enable-openmp-llvm
\anchor enable-openmp-llvm \addindex __configure__--enable-openmp-llvm Enable LLVM OpenMP Support (\ref OMPLLVM)
--enable-bubble
\anchor enable-bubble \addindex __configure__--enable-bubble Enable Hierarchical dags support (\ref HierarchicalDAGS)
--enable-parallel-worker
\anchor enable-parallel-worker \addindex __configure__--enable-parallel-worker Enable parallel worker support (\ref ParallelWorker)
--enable-eclipse-plugin
\anchor enable-eclipse-plugin \addindex __configure__--enable-eclipse-plugin Enable the StarPU Eclipse Plugin. See \ref EclipsePlugin to know how to install Eclipse.
\section AdvancedConfiguration Advanced Configuration
--enable-perf-debug
\anchor enable-perf-debug \addindex __configure__--enable-perf-debug Enable performance debugging through gprof.
--enable-model-debug
\anchor enable-model-debug \addindex __configure__--enable-model-debug Enable performance model debugging.
--enable-fxt-lock
\anchor enable-fxt-lock \addindex __configure__--enable-fxt-lock Enable additional trace events which describes locks behaviour. This is however extremely heavy and should only be enabled when debugging insides of StarPU.
--enable-maxbuffers
\anchor enable-maxbuffers \addindex __configure__--enable-maxbuffers Define the maximum number of buffers that tasks will be able to take as parameters, then available as the macro ::STARPU_NMAXBUFS.
--enable-fxt-max-files=count
\anchor enable-fxt-max-files \addindex __configure__--enable-fxt-max-files Use at most count mpi nodes fxt files for generating traces. This information is then available as the macro ::STARPU_FXT_MAX_FILES. This information is used by FxT tools when considering multi node traces. Default value is 64.
--enable-allocation-cache
\anchor enable-allocation-cache \addindex __configure__--enable-allocation-cache Enable the use of a data allocation cache to avoid the cost of it with CUDA. Still experimental.
--enable-opengl-render
\anchor enable-opengl-render \addindex __configure__--enable-opengl-render Enable the use of OpenGL for the rendering of some examples. // TODO: rather default to enabled when detected
--enable-blas-lib=prefix
\anchor enable-blas-lib \addindex __configure__--enable-blas-lib Specify the blas library to be used by some of the examples. Libraries available : - \c none [default] : no BLAS library is used - \c atlas: use ATLAS library - \c goto: use GotoBLAS library - \c openblas: use OpenBLAS library - \c mkl: use MKL library (you may need to set specific \c CFLAGS and \c LDFLAGS with --with-mkl-cflags and --with-mkl-ldflags)
--enable-leveldb
\anchor enable-leveldb \addindex __configure__--enable-leveldb Enable linking with LevelDB if available
--enable-hdf5
\anchor enable-hdf5 \addindex __configure__--enable-hdf5 Enable building HDF5 support.
--with-hdf5-include-dir=path
\anchor with-hdf5-include-dir \addindex __configure__--with-hdf5-include-dir Specify the directory where is stored the header file \c hdf5.h.
--with-hdf5-lib-dir=path
\anchor with-hdf5-lib-dir \addindex __configure__--with-hdf5-lib-dir Specify the directory where is stored the library \c hdf5.
--disable-starpufft
\anchor disable-starpufft \addindex __configure__--disable-starpufft Disable the build of libstarpufft, even if fftw or cuFFT is available.
--enable-starpufft-examples
\anchor enable-starpufft-examples \addindex __configure__--enable-starpufft-examples Enable the compilation and the execution of the libstarpufft examples. By default, they are neither compiled nor checked.
--with-fxt=prefix
\anchor with-fxt \addindex __configure__--with-fxt Search for FxT under prefix. FxT (http://savannah.nongnu.org/projects/fkt) is used to generate traces of scheduling events, which can then be rendered them using ViTE (\ref Off-linePerformanceFeedback). prefix should notably contain include/fxt/fxt.h.
--with-perf-model-dir=dir
\anchor with-perf-model-dir \addindex __configure__--with-perf-model-dir Store performance models under dir, instead of the current user's home.
--with-goto-dir=prefix
\anchor with-goto-dir \addindex __configure__--with-goto-dir Search for GotoBLAS under prefix, which should notably contain libgoto.so or libgoto2.so.
--with-atlas-dir=prefix
\anchor with-atlas-dir \addindex __configure__--with-atlas-dir Search for ATLAS under prefix, which should notably contain include/cblas.h.
--with-mkl-cflags=cflags
\anchor with-mkl-cflags \addindex __configure__--with-mkl-cflags Use cflags to compile code that uses the MKL library.
--with-mkl-ldflags=ldflags
\anchor with-mkl-ldflags \addindex __configure__--with-mkl-ldflags Use ldflags when linking code that uses the MKL library. Note that the MKL website (http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/) provides a script to determine the linking flags.
--disable-glpk
\anchor disable-glpk \addindex __configure__--disable-glpk Disable the use of \c libglpk for computing area bounds.
--disable-build-tests
\anchor disable-build-tests \addindex __configure__--disable-build-tests Disable the build of tests.
--disable-build-examples
\anchor disable-build-examples \addindex __configure__--disable-build-examples Disable the build of examples.
--enable-sc-hypervisor
\anchor enable-sc-hypervisor \addindex __configure__--enable-sc-hypervisor Enable the Scheduling Context Hypervisor plugin (\ref SchedulingContextHypervisor). By default, it is disabled.
--enable-memory-stats
\anchor enable-memory-stats \addindex __configure__--enable-memory-stats Enable memory statistics (\ref MemoryFeedback).
--enable-simgrid
\anchor enable-simgrid \addindex __configure__--enable-simgrid Enable simulation of execution in SimGrid, to allow easy experimentation with various numbers of cores and GPUs, or amount of memory, etc. Experimental. The path to SimGrid can be specified through the SIMGRID_CFLAGS and SIMGRID_LIBS environment variables, for instance: \verbatim export SIMGRID_CFLAGS="-I/usr/local/simgrid/include" export SIMGRID_LIBS="-L/usr/local/simgrid/lib -lsimgrid" \endverbatim
--with-simgrid-dir
\anchor with-simgrid-dir \addindex __configure__--with-simgrid-dir Similar to the option \ref enable-simgrid "--enable-simgrid" but also allows to specify the location to the SimGrid library.
--with-simgrid-include-dir
\anchor with-simgrid-include-dir \addindex __configure__--with-simgrid-include-dir Similar to the option \ref enable-simgrid "--enable-simgrid" but also allows to specify the location to the SimGrid include directory.
--with-simgrid-lib-dir
\anchor with-simgrid-lib-dir \addindex __configure__--with-simgrid-lib-dir Similar to the option \ref enable-simgrid "--enable-simgrid" but also allows to specify the location to the SimGrid lib directory.
--with-smpirun=path
\anchor with-smpirun \addindex __configure__--with-smpirun Use the smpirun at path
--enable-simgrid-mc
\anchor enable-simgrid-mc \addindex __configure__--enable-simgrid-mc Enable the Model Checker in simulation of execution in SimGrid, to allow exploring various execution paths.
--enable-calibration-heuristic
\anchor enable-calibration-heuristic \addindex __configure__--enable-calibration-heuristic Allow to set the maximum authorized percentage of deviation for the history-based calibrator of StarPU. A correct value of this parameter must be in [0..100]. The default value of this parameter is 10. Experimental.
--enable-mlr
\anchor enable-mlr \addindex __configure__--enable-mlr Allow to enable multiple linear regression models (see \ref PerformanceModelExample)
--enable-mlr-system-blas
\anchor enable-mlr-system-blas \addindex __configure__--enable-mlr-system-blas Allow to make multiple linear regression models use the system-provided BLAS for dgels (see \ref PerformanceModelExample)
*/ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_installation/environment_variables.doxy000066400000000000000000002224241507764646700314510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2020,2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page ExecutionConfigurationThroughEnvironmentVariables Execution Configuration Through Environment Variables The StarPU library and tools's behavior can be tuned using the following environment variables. To access these variables, you can use the provided functions.
  • starpu_getenv() retrieves the value of an environment variable.
  • starpu_get_env_string_var_default() retrieves the value of an environment variable as a string. If the variable is not set, you can provide a default value.
  • starpu_get_env_size_default() retrieves the value of an environment variable as a size in bytes, or a default value if the environment variable is not set.
These functions allow to fine-tune the behavior of StarPU according to your preferences and requirements by leveraging environment variables. \section EnvConfiguringWorkers Configuring Workers \subsection Basic General Configuration
STARPU_WORKERS_NOBIND
\anchor STARPU_WORKERS_NOBIND \addindex __env__STARPU_WORKERS_NOBIND Setting it to non-zero will prevent StarPU from binding its threads to CPUs. This is for instance useful when running the test suite in parallel.
STARPU_WORKERS_GETBIND
\anchor STARPU_WORKERS_GETBIND \addindex __env__STARPU_WORKERS_GETBIND By default StarPU uses the OS-provided CPU binding to determine how many and which CPU cores it should use. This is notably useful when running several StarPU-MPI processes on the same host, to let the MPI launcher set the CPUs to be used. Default value is 1. If that binding is erroneous (e.g. because the job scheduler binds to just one core of the allocated cores), you can set \ref STARPU_WORKERS_GETBIND to 0 to make StarPU use all cores of the machine.
STARPU_WORKERS_CPUID
\anchor STARPU_WORKERS_CPUID \addindex __env__STARPU_WORKERS_CPUID Passing an array of integers in \ref STARPU_WORKERS_CPUID specifies on which logical CPU the different workers should be bound. For instance, if STARPU_WORKERS_CPUID="0 1 4 5", the first worker will be bound to logical CPU #0, the second CPU worker will be bound to logical CPU #1 and so on. Note that the logical ordering of the CPUs is either determined by the OS, or provided by the library hwloc in case it is available. Ranges can be provided: for instance, STARPU_WORKERS_CPUID="1-3 5" will bind the first three workers on logical CPUs #1, #2, and #3, and the fourth worker on logical CPU #5. Unbound ranges can also be provided: STARPU_WORKERS_CPUID="1-" will bind the workers starting from logical CPU #1 up to last CPU. Note that the first workers correspond to the CUDA workers, then come the OpenCL workers, and finally the CPU workers. For example, if we have STARPU_NCUDA=1, STARPU_NOPENCL=1, STARPU_NCPU=2 and STARPU_WORKERS_CPUID="0 2 1 3", the CUDA device will be controlled by logical CPU #0, the OpenCL device will be controlled by logical CPU #2, and the logical CPUs #1 and #3 will be used by the CPU workers. If the number of workers is larger than the array given in \ref STARPU_WORKERS_CPUID, the workers are bound to the logical CPUs in a round-robin fashion: if STARPU_WORKERS_CPUID="0 1", the first and the third (resp. second and fourth) workers will be put on CPU #0 (resp. CPU #1). This variable is ignored if the field starpu_conf::use_explicit_workers_bindid passed to starpu_init() is set. Setting \ref STARPU_WORKERS_CPUID or \ref STARPU_WORKERS_COREID overrides the binding provided by the job scheduler, as described for \ref STARPU_WORKERS_GETBIND.
STARPU_WORKERS_COREID
\anchor STARPU_WORKERS_COREID \addindex __env__STARPU_WORKERS_COREID Same as \ref STARPU_WORKERS_CPUID, but bind the workers to cores instead of PUs (hyperthreads).
STARPU_NTHREADS_PER_CORE
\anchor STARPU_NTHREADS_PER_CORE \addindex __env__STARPU_NTHREADS_PER_CORE Specify how many threads StarPU should run on each core. The default is 1 because kernels are usually already optimized for using a full core. Setting this to e.g. 2 instead allows exploiting hyperthreading.
STARPU_MAIN_THREAD_BIND
\anchor STARPU_MAIN_THREAD_BIND \addindex __env__STARPU_MAIN_THREAD_BIND Tell StarPU to bind the thread that calls starpu_initialize() to a reserved CPU, subtracted from the CPU workers.
STARPU_MAIN_THREAD_CPUID
\anchor STARPU_MAIN_THREAD_CPUID \addindex __env__STARPU_MAIN_THREAD_CPUID Tell StarPU to bind the thread that calls starpu_initialize() to the given CPU ID (using logical numbering).
STARPU_MAIN_THREAD_COREID
\anchor STARPU_MAIN_THREAD_COREID \addindex __env__STARPU_MAIN_THREAD_COREID Same as \ref STARPU_MAIN_THREAD_CPUID, but bind the thread that calls starpu_initialize() to the given core (using logical numbering), instead of the PU (hyperthread).
STARPU_WORKER_TREE
\anchor STARPU_WORKER_TREE \addindex __env__STARPU_WORKER_TREE Define to 1 to enable the tree iterator in schedulers.
STARPU_SINGLE_COMBINED_WORKER
\anchor STARPU_SINGLE_COMBINED_WORKER \addindex __env__STARPU_SINGLE_COMBINED_WORKER Tell StarPU to create several workers which won't be able to work concurrently. It will by default create combined workers, which size goes from 1 to the total number of CPU workers in the system. \ref STARPU_MIN_WORKERSIZE and \ref STARPU_MAX_WORKERSIZE can be used to change this default.
STARPU_MIN_WORKERSIZE
\anchor STARPU_MIN_WORKERSIZE \addindex __env__STARPU_MIN_WORKERSIZE Specify the minimum size of the combined workers. Default value is 2.
STARPU_MAX_WORKERSIZE
\anchor STARPU_MAX_WORKERSIZE \addindex __env__STARPU_MAX_WORKERSIZE Specify the minimum size of the combined workers. Default value is the number of CPU workers in the system.
STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER
\anchor STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER \addindex __env__STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER Specify how many elements are allowed between combined workers created from \c hwloc information. For instance, in the case of sockets with 6 cores without shared L2 caches, if \ref STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER is set to 6, no combined worker will be synthesized beyond one for the socket and one per core. If it is set to 3, 3 intermediate combined workers will be synthesized, to divide the socket cores into 3 chunks of 2 cores. If it set to 2, 2 intermediate combined workers will be synthesized, to divide the socket cores into 2 chunks of 3 cores, and then 3 additional combined workers will be synthesized, to divide the former synthesized workers into a bunch of 2 cores, and the remaining core (for which no combined worker is synthesized since there is already a normal worker for it). Default value is 2, thus makes StarPU tend to build binary trees of combined workers.
STARPU_DISABLE_ASYNCHRONOUS_COPY
\anchor STARPU_DISABLE_ASYNCHRONOUS_COPY \addindex __env__STARPU_DISABLE_ASYNCHRONOUS_COPY Disable asynchronous copies between CPU and GPU devices. The AMD implementation of OpenCL is known to fail when copying data asynchronously. When using this implementation, it is therefore necessary to disable asynchronous data transfers. One can call starpu_asynchronous_copy_disabled() to check whether asynchronous data transfers between CPU and accelerators are disabled. See also \ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY and \ref STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY.
STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK
\anchor STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK \addindex __env__STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK Set to 1 to make task transfer time estimations artificially include the time that will be needed to write back data to the main memory.
STARPU_DISABLE_PINNING
\anchor STARPU_DISABLE_PINNING \addindex __env__STARPU_DISABLE_PINNING Disable (1) or Enable (0) pinning host memory allocated through starpu_malloc(), starpu_memory_pin() and friends. Default value is Enable. This permits to test the performance effect of memory pinning.
STARPU_BACKOFF_MIN
\anchor STARPU_BACKOFF_MIN \addindex __env__STARPU_BACKOFF_MIN Set minimum exponential backoff of number of cycles to pause when spinning. Default value is 1.
STARPU_BACKOFF_MAX
\anchor STARPU_BACKOFF_MAX \addindex __env__STARPU_BACKOFF_MAX Set maximum exponential backoff of number of cycles to pause when spinning. Default value is 32.
STARPU_SINK
\anchor STARPU_SINK \addindex __env__STARPU_SINK Defined internally by StarPU when running in master slave mode.
STARPU_ENABLE_MAP
\anchor STARPU_ENABLE_MAP \addindex __env__STARPU_ENABLE_MAP Disable (0) or Enable (1) support for memory mapping between memory nodes. The default is Disabled. One can call starpu_map_enabled() to check whether memory mapping support between memory nodes is enabled.
STARPU_DATA_LOCALITY_ENFORCE
\anchor STARPU_DATA_LOCALITY_ENFORCE \addindex __env__STARPU_DATA_LOCALITY_ENFORCE Enable (1) or Disable(0) data locality enforcement when picking up a worker to execute a task. Default value is Disable.
\subsection cpuWorkers CPU Workers
STARPU_NCPU
\anchor STARPU_NCPU \addindex __env__STARPU_NCPU Specify the number of CPU workers (thus not including workers dedicated to control accelerators). Note that by default, StarPU will not allocate more CPU workers than there are physical CPUs, and that some CPUs are used to control the accelerators.
STARPU_RESERVE_NCPU
\anchor STARPU_RESERVE_NCPU \addindex __env__STARPU_RESERVE_NCPU Specify the number of CPU cores that should not be used by StarPU, so the application can use starpu_get_next_bindid() and starpu_bind_thread_on() to bind its own threads. This option is ignored if \ref STARPU_NCPU or starpu_conf::ncpus is set.
STARPU_NCPUS
\anchor STARPU_NCPUS \addindex __env__STARPU_NCPUS Deprecated. You should use \ref STARPU_NCPU.
\subsection cudaWorkers CUDA Workers
STARPU_NCUDA
\anchor STARPU_NCUDA \addindex __env__STARPU_NCUDA Specify the number of CUDA devices that StarPU can use. If \ref STARPU_NCUDA is lower than the number of physical devices, it is possible to select which GPU devices should be used by the means of the environment variable \ref STARPU_WORKERS_CUDAID. By default, StarPU will create as many CUDA workers as there are GPU devices.
STARPU_NWORKER_PER_CUDA
\anchor STARPU_NWORKER_PER_CUDA \addindex __env__STARPU_NWORKER_PER_CUDA Specify the number of workers per CUDA device, and thus the number of kernels which will be concurrently running on the devices, i.e. the number of CUDA streams. Default value is 1. For parallelism to be really achieved, one also needs to make CUDA codelets asynchronous (it is recommended for single-worker performance too anyway, see ::STARPU_CUDA_ASYNC in \ref CUDA-specificOptimizations), or to set \ref STARPU_CUDA_THREAD_PER_WORKER to 1.
STARPU_CUDA_THREAD_PER_WORKER
\anchor STARPU_CUDA_THREAD_PER_WORKER \addindex __env__STARPU_CUDA_THREAD_PER_WORKER Specify whether the cuda driver should use one thread per stream (1) or to use a single thread to drive all the streams of the device or all devices (0), and \ref STARPU_CUDA_THREAD_PER_DEV determines whether is it one thread per device or one thread for all devices. Default value is 0. Setting it to 1 is contradictory with setting \ref STARPU_CUDA_THREAD_PER_DEV.
STARPU_CUDA_THREAD_PER_DEV
\anchor STARPU_CUDA_THREAD_PER_DEV \addindex __env__STARPU_CUDA_THREAD_PER_DEV Specify whether the cuda driver should use one thread per device (1) or to use a single thread to drive all the devices (0). Default value is 1. It does not make sense to set this variable if \ref STARPU_CUDA_THREAD_PER_WORKER is set to to 1 (since \ref STARPU_CUDA_THREAD_PER_DEV is then meaningless).
STARPU_CUDA_PIPELINE
\anchor STARPU_CUDA_PIPELINE \addindex __env__STARPU_CUDA_PIPELINE Specify how many asynchronous tasks are submitted in advance on CUDA devices. This for instance permits to overlap task management with the execution of previous tasks, but it also allows concurrent execution on Fermi cards, which otherwise bring spurious synchronizations. Default value is 2. Setting the value to 0 forces a synchronous execution of all tasks.
STARPU_WORKERS_CUDAID
\anchor STARPU_WORKERS_CUDAID \addindex __env__STARPU_WORKERS_CUDAID Select which CUDA devices should be used to run CUDA workers (similarly to the \ref STARPU_WORKERS_CPUID environment variable). On a machine equipped with 4 GPUs, setting STARPU_WORKERS_CUDAID="1 3" and STARPU_NCUDA=2 specifies that 2 CUDA workers should be created, and that they should use CUDA devices #1 and #3 (the logical ordering of the devices is the one reported by CUDA). This variable is ignored if the field starpu_conf::use_explicit_workers_cuda_gpuid passed to starpu_init() is set.
STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY
\anchor STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY \addindex __env__STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY Disable asynchronous copies between CPU and CUDA devices. One can call starpu_asynchronous_cuda_copy_disabled() to check whether asynchronous data transfers between CPU and CUDA accelerators are disabled. See also \ref STARPU_DISABLE_ASYNCHRONOUS_COPY and \ref STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY.
STARPU_ENABLE_CUDA_GPU_GPU_DIRECT
\anchor STARPU_ENABLE_CUDA_GPU_GPU_DIRECT \addindex __env__STARPU_ENABLE_CUDA_GPU_GPU_DIRECT Enable (1) or Disable (0) direct CUDA transfers from GPU to GPU, without copying through RAM. Default value is Enable. This permits to test the performance effect of GPU-Direct.
STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES
\anchor STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES \addindex __env__STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES Specify if CUDA workers should do only fast allocations when running the datawizard progress of other memory nodes. This will pass the internal value _STARPU_DATAWIZARD_ONLY_FAST_ALLOC to allocation methods. Default value is 0, allowing CUDA workers to do slow allocations. This can also be specified with starpu_conf::cuda_only_fast_alloc_other_memnodes.
\subsection openclWorkers OpenCL Workers
STARPU_NOPENCL
\anchor STARPU_NOPENCL \addindex __env__STARPU_NOPENCL Specify the number of OpenCL devices that StarPU can use. If \ref STARPU_NOPENCL is lower than the number of physical devices, it is possible to select which GPU devices should be used by the means of the environment variable \ref STARPU_WORKERS_OPENCLID. By default, StarPU will create as many OpenCL workers as there are GPU devices. Note that by default StarPU will launch CUDA workers on GPU devices. You need to disable CUDA to allow the creation of OpenCL workers.
STARPU_WORKERS_OPENCLID
\anchor STARPU_WORKERS_OPENCLID \addindex __env__STARPU_WORKERS_OPENCLID Select which GPU devices should be used to run OpenCL workers (similarly to the \ref STARPU_WORKERS_CPUID environment variable) On a machine equipped with 4 GPUs, setting STARPU_WORKERS_OPENCLID="1 3" and STARPU_NOPENCL=2 specifies that 2 OpenCL workers should be created, and that they should use GPU devices #1 and #3. This variable is ignored if the field starpu_conf::use_explicit_workers_opencl_gpuid passed to starpu_init() is set.
STARPU_OPENCL_PIPELINE
\anchor STARPU_OPENCL_PIPELINE \addindex __env__STARPU_OPENCL_PIPELINE Specify how many asynchronous tasks are submitted in advance on OpenCL devices. This for instance permits to overlap task management with the execution of previous tasks, but it also allows concurrent execution on Fermi cards, which otherwise bring spurious synchronizations. Default value is 2. Setting the value to 0 forces a synchronous execution of all tasks.
STARPU_OPENCL_ON_CPUS
\anchor STARPU_OPENCL_ON_CPUS \addindex __env__STARPU_OPENCL_ON_CPUS Specify that OpenCL workers can also be run on CPU devices. By default, the OpenCL driver only enables GPU devices.
STARPU_OPENCL_ONLY_ON_CPUS
\anchor STARPU_OPENCL_ONLY_ON_CPUS \addindex __env__STARPU_OPENCL_ONLY_ON_CPUS Specify that OpenCL workers can ONLY be run on CPU devices. By default, the OpenCL driver enables GPU devices.
STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY
\anchor STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY \addindex __env__STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY Disable asynchronous copies between CPU and OpenCL devices. The AMD implementation of OpenCL is known to fail when copying data asynchronously. When using this implementation, it is therefore necessary to disable asynchronous data transfers. One can call starpu_asynchronous_opencl_copy_disabled() to check whether asynchronous data transfers between CPU and OpenCL accelerators are disabled. See also \ref STARPU_DISABLE_ASYNCHRONOUS_COPY and \ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY.
\subsection maxfpgaWorkers Maxeler FPGA Workers
STARPU_NMAX_FPGA
\anchor STARPU_NMAX_FPGA \addindex __env__STARPU_NMAX_FPGA Specify the number of Maxeler FPGA devices that StarPU can use. If \ref STARPU_NMAX_FPGA is lower than the number of physical devices, it is possible to select which Maxeler FPGA devices should be used by the means of the environment variable \ref STARPU_WORKERS_MAX_FPGAID. By default, StarPU will create as many Maxeler FPGA workers as there are GPU devices.
STARPU_WORKERS_MAX_FPGAID
\anchor STARPU_WORKERS_MAX_FPGAID \addindex __env__STARPU_WORKERS_MAX_FPGAID Select which Maxeler FPGA devices should be used to run Maxeler FPGA workers (similarly to the \ref STARPU_WORKERS_CPUID environment variable). On a machine equipped with 4 Maxeler FPGAs, setting STARPU_WORKERS_MAX_FPGAID="1 3" and STARPU_NMAX_FPGA=2 specifies that 2 Maxeler FPGA workers should be created, and that they should use Maxeler FPGA devices #1 and #3 (the logical ordering of the devices is the one reported by the Maxeler stack).
STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY
\anchor STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY \addindex __env__STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY Disable asynchronous copies between CPU and Maxeler FPGA devices. One can call starpu_asynchronous_max_fpga_copy_disabled() to check whether asynchronous data transfers between CPU and Maxeler FPGA devices are disabled.
\subsection mpimsWorkers MPI Master Slave Workers
STARPU_NMPI_MS
\anchor STARPU_NMPI_MS \addindex __env__STARPU_NMPI_MS Specify the number of MPI master slave devices that StarPU can use.
STARPU_NMPIMSTHREADS
\anchor STARPU_NMPIMSTHREADS \addindex __env__STARPU_NMPIMSTHREADS Specift the number of threads to use on the MPI Slave devices.
STARPU_MPI_MS_MULTIPLE_THREAD
\anchor STARPU_MPI_MS_MULTIPLE_THREAD \addindex __env__STARPU_MPI_MS_MULTIPLE_THREAD Specify whether the master should use one thread per slave, or one thread for driver all slaves. Default value is 0.
STARPU_MPI_MASTER_NODE
\anchor STARPU_MPI_MASTER_NODE \addindex __env__STARPU_MPI_MASTER_NODE Specify the rank of the MPI process which will be the master. Default value is 0.
STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY
\anchor STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY \addindex __env__STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY Disable asynchronous copies between CPU and MPI Slave devices. One can call starpu_asynchronous_mpi_ms_copy_disabled() to check whether asynchronous data transfers between CPU and MPI Slave devices are disabled.
\subsection tcpipmsWorkers TCP/IP Master Slave Workers
STARPU_NTCPIP_MS
\anchor STARPU_NTCPIP_MS \addindex __env__STARPU_NTCPIP_MS Specify the number of TCP/IP master slave devices that StarPU can use.
STARPU_TCPIP_MS_SLAVES
\anchor STARPU_TCPIP_MS_SLAVES \addindex __env__STARPU_TCPIP_MS_SLAVES Specify the number of TCP/IP master slave processes that are expected to be run. This should be provided both to the master and to the slaves.
STARPU_TCPIP_MS_MASTER
\anchor STARPU_TCPIP_MS_MASTER \addindex __env__STARPU_TCPIP_MS_MASTER Specify (for slaves) the IP address of the master so they can connect to it. They will then automatically connect to each other.
STARPU_TCPIP_MS_PORT
\anchor STARPU_TCPIP_MS_PORT \addindex __env__STARPU_TCPIP_MS_PORT Specify the port of the master, for connexions between slaves and the master. Default value is 1234.
STARPU_NTCPIPMSTHREADS
\anchor STARPU_NTCPIPMSTHREADS \addindex __env__STARPU_NTCPIPMSTHREADS Specify the number of threads to use on the TCP/IP Slave devices.
STARPU_TCPIP_MS_MULTIPLE_THREAD
\anchor STARPU_TCPIP_MS_MULTIPLE_THREAD \addindex __env__STARPU_TCPIP_MS_MULTIPLE_THREAD Specify whether the master should use one thread per slave, or one thread for driver all slaves. Default value is 0.
STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY
\anchor STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY \addindex __env__STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY Disable asynchronous copies between CPU and TCP/IP Slave devices. One can call starpu_asynchronous_tcpip_ms_copy_disabled() to check whether asynchronous data transfers between CPU and TCP/IP Slave devices are disabled.
\subsection hipWorkers HIP Workers
STARPU_NHIP
\anchor STARPU_NHIP \addindex __env__STARPU_NHIP Specify the number of HIP devices that StarPU can use. If \ref STARPU_NHIP is lower than the number of physical devices, it is possible to select which HIP devices should be used by the means of the environment variable \ref STARPU_WORKERS_HIPID. By default, StarPU will create as many HIP workers as there are HIP devices.
STARPU_WORKERS_HPIID
\anchor STARPU_WORKERS_HIPID \addindex __env__STARPU_WORKERS_HIPID Select which HIP devices should be used to run HIP workers (similarly to the \ref STARPU_WORKERS_HIPID environment variable). On a machine equipped with 4 HIP devices, setting STARPU_WORKERS_HIPID="1 3" and STARPU_NHIP=2 specifies that 2 HIP workers should be created, and that they should use HIP devices #1 and #3. This variable is ignored if the field starpu_conf::use_explicit_workers_hip_gpuid passed to starpu_init() is set.
STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY
\anchor STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY \addindex __env__STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY Disable asynchronous copies between CPU and HIP devices. One can call starpu_asynchronous_hip_copy_disabled() to check whether asynchronous data transfers between CPU and HIP accelerators are disabled.
\subsection mpiConf MPI Configuration
STARPU_MPI_THREAD_CPUID
\anchor STARPU_MPI_THREAD_CPUID \addindex __env__STARPU_MPI_THREAD_CPUID Tell StarPU to bind its MPI thread to the given CPU id, subtracted from the CPU workers (unless \ref STARPU_NCPU is defined). Default value is -1, it will let StarPU allocate a CPU.
STARPU_MPI_THREAD_COREID
\anchor STARPU_MPI_THREAD_COREID \addindex __env__STARPU_MPI_THREAD_COREID Same as \ref STARPU_MPI_THREAD_CPUID, but bind the MPI thread to the given core ID, instead of the PU (hyperthread).
STARPU_MPI_THREAD_MULTIPLE_SEND
\anchor STARPU_MPI_THREAD_MULTIPLE_SEND \addindex __env__STARPU_MPI_THREAD_MULTIPLE_SEND Setting it to non-zero makes StarPU emit MPI send requests from all threads, not just the MPI thread. This can improve performance, but depends on the MPI implementation to be really thread-multiple-safe.
STARPU_MPI_NOBIND
\anchor STARPU_MPI_NOBIND \addindex __env__STARPU_MPI_NOBIND Setting it to non-zero will prevent StarPU from binding the MPI to a separate core. This is for instance useful when running the testsuite on a single system.
STARPU_MPI_GPUDIRECT
\anchor STARPU_MPI_GPUDIRECT \addindex __env__STARPU_MPI_GPUDIRECT Enable (1) or disable (0) MPI GPUDirect support. Default value (-1) is to enable if available. If \ref STARPU_MPI_GPUDIRECT is explicitly set to 1, StarPU-MPI will warn if MPI does not provide the GPUDirect support.
STARPU_MPI_PSM2
\anchor STARPU_MPI_PSM2 \addindex __env__STARPU_MPI_PSM2 This variable allows to supercede PSM2 detection when asking for MPI GPUDirect support. This is helpful when using old intel compilers, for which PSM2 detection is always true. The default (1) is to enable it. If PSM2 is detected whereas it should not be, this variable can be set to 0.
STARPU_MPI_REDUX_ARITY_THRESHOLD
\anchor STARPU_MPI_REDUX_ARITY_THRESHOLD \addindex __env__STARPU_MPI_REDUX_ARITY_THRESHOLD The arity of the automatically-detected reduction trees follows the following rule: when the data to be reduced is of small size a flat tree is unrolled i.e. all the contributing nodes send their contribution to the root of the reduction. When the data to be reduced is of big size, a binary tree is used instead. The default threshold between flat and binary tree is 1024 bytes. By setting the environment variable with a negative value, all the automatically detected reduction trees will use flat trees. If this value is set to 0, then binary trees will always be selected. Otherwise, the setup value replaces the default 1024.
\section ConfiguringTheSchedulingEngine Configuring The Scheduling Engine
STARPU_SCHED
\anchor STARPU_SCHED \addindex __env__STARPU_SCHED Select the scheduling policy from those proposed by StarPU: work random, stealing, greedy, with performance models, etc. Use STARPU_SCHED=help to get the list of available schedulers.
STARPU_SCHED_LIB
\anchor STARPU_SCHED_LIB \addindex __env__STARPU_SCHED_LIB Specify the location of a dynamic library to choose a user-defined scheduling policy. See \ref UsingaNewSchedulingPolicy for more information.
STARPU_MIN_PRIO
\anchor STARPU_MIN_PRIO_env \addindex __env__STARPU_MIN_PRIO Set the minimum priority used by priorities-aware schedulers. The flag can also be set through the field starpu_conf::global_sched_ctx_min_priority.
STARPU_MAX_PRIO
\anchor STARPU_MAX_PRIO_env \addindex __env__STARPU_MAX_PRIO Set the maximum priority used by priorities-aware schedulers. The flag can also be set through the field starpu_conf::global_sched_ctx_max_priority.
STARPU_CALIBRATE
\anchor STARPU_CALIBRATE \addindex __env__STARPU_CALIBRATE Set to 1 to calibrate the performance models during the execution. Set to 2 to drop the previous values and restart the calibration from scratch. Set to 0 to disable calibration, this is the default behaviour. Note: this currently only applies to dm and dmda scheduling policies.
STARPU_CALIBRATE_MINIMUM
\anchor STARPU_CALIBRATE_MINIMUM \addindex __env__STARPU_CALIBRATE_MINIMUM Define the minimum number of calibration measurements that will be made before considering that the performance model is calibrated. Default value is 10.
STARPU_BUS_CALIBRATE
\anchor STARPU_BUS_CALIBRATE \addindex __env__STARPU_BUS_CALIBRATE Set to 1 to recalibrate the bus during initialization.
STARPU_PREFETCH
\anchor STARPU_PREFETCH \addindex __env__STARPU_PREFETCH Enable (1) or disable (0) data prefetching. Default value is Enable. If prefetching is enabled, when a task is scheduled to be executed e.g. on a GPU, StarPU will request an asynchronous transfer in advance, so that data is already present on the GPU when the task starts. As a result, computation and data transfers are overlapped.
STARPU_SCHED_ALPHA
\anchor STARPU_SCHED_ALPHA \addindex __env__STARPU_SCHED_ALPHA To estimate the cost of a task StarPU takes into account the estimated computation time (obtained thanks to performance models). The alpha factor is the coefficient to be applied to it before adding it to the communication part.
STARPU_SCHED_BETA
\anchor STARPU_SCHED_BETA \addindex __env__STARPU_SCHED_BETA To estimate the cost of a task StarPU takes into account the estimated data transfer time (obtained thanks to performance models). The beta factor is the coefficient to be applied to it before adding it to the computation part.
STARPU_SCHED_GAMMA
\anchor STARPU_SCHED_GAMMA \addindex __env__STARPU_SCHED_GAMMA Define the execution time penalty of a joule (\ref Energy-basedScheduling).
STARPU_SCHED_READY
\anchor STARPU_SCHED_READY \addindex __env__STARPU_SCHED_READY For a modular scheduler with sorted queues below the decision component, workers pick up a task which has most of its data already available. Setting this to 0 disables this.
STARPU_SCHED_SORTED_ABOVE
\anchor STARPU_SCHED_SORTED_ABOVE \addindex __env__STARPU_SCHED_SORTED_ABOVE For a modular scheduler with queues above the decision component, it is usually sorted by priority. Setting this to 0 disables this.
STARPU_SCHED_SORTED_BELOW
\anchor STARPU_SCHED_SORTED_BELOW \addindex __env__STARPU_SCHED_SORTED_BELOW For a modular scheduler with queues below the decision component, they are usually sorted by priority. Setting this to 0 disables this.
STARPU_IDLE_POWER
\anchor STARPU_IDLE_POWER \addindex __env__STARPU_IDLE_POWER Define the idle power of the machine (\ref Energy-basedScheduling).
STARPU_PROFILING
\anchor STARPU_PROFILING \addindex __env__STARPU_PROFILING Enable on-line performance monitoring (\ref EnablingOn-linePerformanceMonitoring).
STARPU_CODELET_PROFILING
\anchor STARPU_CODELET_PROFILING \addindex __env__STARPU_CODELET_PROFILING Enable on-line performance monitoring of codelets (\ref Per-codeletFeedback). (enabled by default)
STARPU_ENERGY_PROFILING
\anchor STARPU_ENERGY_PROFILING \addindex __env__STARPU_ENERGY_PROFILING Enable on-line energy monitoring of tasks (\ref Per-codeletFeedback). (disabled by default)
STARPU_PROF_PAPI_EVENTS
\anchor STARPU_PROF_PAPI_EVENTS \addindex __env__STARPU_PROF_PAPI_EVENTS Specify which PAPI events should be recorded in the trace (\ref PapiCounters).
\section ConfiguringHeteroprio Configuring The Heteroprio Scheduler \subsection ConfiguringLaHeteroprio Configuring LAHeteroprio
STARPU_HETEROPRIO_USE_LA
\anchor STARPU_HETEROPRIO_USE_LA \addindex __env__STARPU_HETEROPRIO_USE_LA Enable the locality aware mode of Heteroprio which guides the distribution of tasks to workers in order to reduce the data transfers between memory nodes.
STARPU_LAHETEROPRIO_PUSH
\anchor STARPU_LAHETEROPRIO_PUSH \addindex __env__STARPU_LAHETEROPRIO_PUSH Choose between the different push strategies for locality aware Heteroprio: \c WORKER, \c LcS, \c LS_SDH, \c LS_SDH2, \c LS_SDHB, \c LC_SMWB, \c AUTO (by default: AUTO). These are detailed in \ref LAHeteroprio
STARPU_LAHETEROPRIO_S_[ARCH]
\anchor STARPU_LAHETEROPRIO_S_[ARCH] \addindex __env__STARPU_LAHETEROPRIO_S_arch Specify the number of memory nodes contained in an affinity group. An affinity group will be composed of the closest memory nodes to a worker of a given architecture, and this worker will look for tasks available inside these memory nodes, before considering stealing tasks outside this group. ARCH can be \c CPU, \c CUDA, \c OPENCL, \c SCC, \c MPI_MS, etc.
STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH]
\anchor STARPU_LAHETEROPRIO_PRIO_STEP_[ARCH] \addindex __env__STARPU_LAHETEROPRIO_PRIO_STEP_arch Specify the number of buckets in the local memory node in which a worker will look for available tasks, before this worker starts looking for tasks in other memory nodes' buckets. ARCH indicates that this number is specific to a given arch which can be: \c CPU, \c CUDA, \c OPENCL, \c SCC, \c MPI_MS, etc.
\subsection ConfiguringAutoHeteroprio Configuring AutoHeteroprio
STARPU_HETEROPRIO_USE_AUTO_CALIBRATION
\anchor STARPU_HETEROPRIO_USE_AUTO_CALIBRATION \addindex __env__STARPU_HETEROPRIO_USE_AUTO_CALIBRATION Enable the auto calibration mode of Heteroprio which assign priorities to tasks automatically
STARPU_HETEROPRIO_DATA_DIR
\anchor STARPU_HETEROPRIO_DATA_DIR \addindex __env__STARPU_HETEROPRIO_DATA_DIR Specify the path of the directory where Heteroprio stores data about program executions. By default, these are stored in the same directory used by perfmodel.
STARPU_HETEROPRIO_DATA_FILE
\anchor STARPU_HETEROPRIO_DATA_FILE \addindex __env__STARPU_HETEROPRIO_DATA_FILE Specify the filename where Heteroprio will save data about the current program's execution.
STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY
\anchor STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY \addindex __env__STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY Choose how Heteroprio groups similar tasks. It can be 0 to group the tasks with the same perfmodel or the same codelet's name if no perfmodel was assigned. Or, it could be 1 to group the tasks only by codelet's name.
STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE
\anchor STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE \addindex __env__STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE Enable the printing of priorities' data every time they get updated.
STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING
\anchor STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING \addindex __env__STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING Enable the printing of priorities' order for each architecture every time there's a reordering.
STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY
\anchor STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY \addindex __env__STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY Specify the heuristic which will be used to assign priorities automatically. It should be an integer between 0 and 27.
STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL
\anchor STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL \addindex __env__STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL Specify the period (in number of tasks pushed), between priorities reordering operations.
STARPU_AUTOHETEROPRIO_FREEZE_GATHERING
\anchor STARPU_AUTOHETEROPRIO_FREEZE_GATHERING \addindex __env__STARPU_AUTOHETEROPRIO_FREEZE_GATHERING Disable data gathering from task executions.
\section Extensions Extensions
SOCL_OCL_LIB_OPENCL
\anchor SOCL_OCL_LIB_OPENCL \addindex __env__SOCL_OCL_LIB_OPENCL Set the location of the file libOpenCL.so of the OCL ICD implementation. The SOCL test suite is only run when \ref SOCL_OCL_LIB_OPENCL is defined.
OCL_ICD_VENDORS
\anchor OCL_ICD_VENDORS \addindex __env__OCL_ICD_VENDORS Set the directory where ICD files are installed. This is useful when using SOCL with OpenCL ICD (https://forge.imag.fr/projects/ocl-icd/). Default directory is /etc/OpenCL/vendors. StarPU installs ICD files in the directory $prefix/share/starpu/opencl/vendors.
STARPU_COMM_STATS
\anchor STARPU_COMM_STATS \addindex __env__STARPU_COMM_STATS Deprecated. You should use \ref STARPU_MPI_STATS.
STARPU_MPI_STATS
\anchor STARPU_MPI_STATS \addindex __env__STARPU_MPI_STATS Enable (!= 0) or Disable (0) communication statistics for starpumpi (\ref MPIDebug). Default value is Disable.
STARPU_MPI_CACHE
\anchor STARPU_MPI_CACHE \addindex __env__STARPU_MPI_CACHE Disable (0) or Enable (!= 0) communication cache for starpumpi (\ref MPISupport). Default value is Enable.
STARPU_MPI_COMM
\anchor STARPU_MPI_COMM \addindex __env__STARPU_MPI_COMM Enable (1) communication trace for starpumpi (\ref MPISupport). Also needs for StarPU to have been configured with the option \ref enable-verbose "--enable-verbose".
STARPU_MPI_CACHE_STATS
\anchor STARPU_MPI_CACHE_STATS \addindex __env__STARPU_MPI_CACHE_STATS Enable (1) statistics for the communication cache (\ref MPISupport). Messages are printed on the standard output when data are added or removed from the received communication cache.
STARPU_MPI_PRIORITIES
\anchor STARPU_MPI_PRIORITIES \addindex __env__STARPU_MPI_PRIORITIES Disable (0) the use of priorities to order MPI communications (\ref MPISupport).
STARPU_MPI_NDETACHED_SEND
\anchor STARPU_MPI_NDETACHED_SEND \addindex __env__STARPU_MPI_NDETACHED_SEND Set the number of send requests that StarPU-MPI will emit concurrently. Default value is 10. Setting it to 0 removes the limit of concurrent send requests.
STARPU_MPI_NREADY_PROCESS
\anchor STARPU_MPI_NREADY_PROCESS \addindex __env__STARPU_MPI_NREADY_PROCESS Set the number of requests that StarPU-MPI will submit to MPI before polling for termination of existing requests. Default value is 10. Setting it to 0 removes the limit: all requests to submit to MPI will be submitted before polling for termination of existing ones.
STARPU_MPI_FAKE_SIZE
\anchor STARPU_MPI_FAKE_SIZE \addindex __env__STARPU_MPI_FAKE_SIZE Setting to a number makes StarPU believe that there are as many MPI nodes, even if it was run on only one MPI node. This allows e.g. to simulate the execution of one of the nodes of a big cluster without actually running the rest. Of course, it does not provide computation results and timing.
STARPU_MPI_FAKE_RANK
\anchor STARPU_MPI_FAKE_RANK \addindex __env__STARPU_MPI_FAKE_RANK Setting to a number makes StarPU believe that it runs the given MPI node, even if it was run on only one MPI node. This allows e.g. to simulate the execution of one of the nodes of a big cluster without actually running the rest. Of course, it does not provide computation results and timing.
STARPU_MPI_COOP_SENDS
\anchor STARPU_MPI_COOP_SENDS \addindex __env__STARPU_MPI_COOP_SENDS Disable (0) dynamic collective operations: grouping same requests to different nodes until the data becomes available and then use a broadcast tree to execute requests.
By now, it is only supported with the NewMadeleine library (see \ref Nmad).
STARPU_MPI_RECV_WAIT_FINALIZE
\anchor STARPU_MPI_RECV_WAIT_FINALIZE \addindex __env__STARPU_MPI_RECV_WAIT_FINALIZE Disable (1) releasing the write acquire of receiving handles when data is received but the communication library still needs the data. Set to 0 by default to unlock as soon as possible tasks which only require a read access on the handle; write access will become possible for tasks when the communication library will not need the data anymore.
By now, it is only supported with the NewMadeleine library (see \ref Nmad).
STARPU_MPI_TRACE_SYNC_CLOCKS
\anchor STARPU_MPI_TRACE_SYNC_CLOCKS \addindex __env__STARPU_MPI_TRACE_SYNC_CLOCKS When \c mpi_sync_clocks is available, this library will be used to have more precise clock synchronization in traces coming from different nodes. However, the clock synchronization process can take some time (several seconds) and can be disabled by setting this variable to \c 0. In that case, a less precise but faster synchronization will be used. See \ref TraceMpi for more details.
STARPU_MPI_DRIVER_CALL_FREQUENCY
\anchor STARPU_MPI_DRIVER_CALL_FREQUENCY \addindex __env__STARPU_MPI_DRIVER_CALL_FREQUENCY When set to a positive value, activates the interleaving of the execution of tasks with the progression of MPI communications (\ref MPISupport). The starpu_mpi_init_conf() function must have been called by the application for that environment variable to be used. When set to 0, the MPI progression thread does not use at all the driver given by users, and only focuses on making MPI communications progress.
STARPU_MPI_DRIVER_TASK_FREQUENCY
\anchor STARPU_MPI_DRIVER_TASK_FREQUENCY \addindex __env__STARPU_MPI_DRIVER_TASK_FREQUENCY When set to a positive value, the interleaving of the execution of tasks with the progression of MPI communications mechanism to execute several tasks before checking communication requests again (\ref MPISupport). The starpu_mpi_init_conf() function must have been called by the application for that environment variable to be used, and the \ref STARPU_MPI_DRIVER_CALL_FREQUENCY environment variable set to a positive value.
STARPU_MPI_MEM_THROTTLE
\anchor STARPU_MPI_MEM_THROTTLE \addindex __env__STARPU_MPI_MEM_THROTTLE When set to a positive value, this makes the starpu_mpi_*recv* functions block when the memory allocation required for network reception overflows the available main memory (as typically set by \ref STARPU_LIMIT_CPU_MEM)
STARPU_MPI_EARLYDATA_ALLOCATE
\anchor STARPU_MPI_EARLYDATA_ALLOCATE \addindex __env__STARPU_MPI_EARLYDATA_ALLOCATE When set to 1, the MPI Driver will immediately allocate the data for early requests instead of issuing a data request and blocking. Default value is 0, issuing a data request. Because it is an early request and we do not know its real priority, the data request will assume \ref STARPU_DEFAULT_PRIO. In cases where there are many data requests with priorities greater than \ref STARPU_DEFAULT_PRIO the MPI drive could be blocked for long periods.
STARPU_SIMGRID
\anchor STARPU_SIMGRID \addindex __env__STARPU_SIMGRID When set to 1 (default value is 0), this makes StarPU check that it was really build with simulation support. This is convenient in scripts to avoid using a native version, that would try to update performance models...
STARPU_SIMGRID_TRANSFER_COST
\anchor STARPU_SIMGRID_TRANSFER_COST \addindex __env__STARPU_SIMGRID_TRANSFER_COST When set to 1 (which is the default value), data transfers (over PCI bus, typically) are taken into account in SimGrid mode.
STARPU_SIMGRID_CUDA_MALLOC_COST
\anchor STARPU_SIMGRID_CUDA_MALLOC_COST \addindex __env__STARPU_SIMGRID_CUDA_MALLOC_COST When set to 1 (which is the default value), CUDA malloc costs are taken into account in SimGrid mode.
STARPU_SIMGRID_CUDA_QUEUE_COST
\anchor STARPU_SIMGRID_CUDA_QUEUE_COST \addindex __env__STARPU_SIMGRID_CUDA_QUEUE_COST When set to 1 (which is the default value), CUDA task and transfer queueing costs are taken into account in SimGrid mode.
STARPU_PCI_FLAT
\anchor STARPU_PCI_FLAT \addindex __env__STARPU_PCI_FLAT When unset or set to 0, the platform file created for SimGrid will contain PCI bandwidths and routes.
STARPU_SIMGRID_CUDA_QUEUE_COST
\anchor STARPU_SIMGRID_CUDA_QUEUE_COST \addindex __env__STARPU_SIMGRID_CUDA_QUEUE_COST When unset or set to 1, simulate within SimGrid the GPU transfer queueing.
STARPU_MALLOC_SIMULATION_FOLD
\anchor STARPU_MALLOC_SIMULATION_FOLD \addindex __env__STARPU_MALLOC_SIMULATION_FOLD Define the size of the file used for folding virtual allocation, in MiB. Default value is 1, thus allowing 64GiB virtual memory when Linux's sysctl vm.max_map_count value is the default 65535.
STARPU_SIMGRID_TASK_SUBMIT_COST
\anchor STARPU_SIMGRID_TASK_SUBMIT_COST \addindex __env__STARPU_SIMGRID_TASK_SUBMIT_COST When set to 1 (which is the default value), task submission costs are taken into account in SimGrid mode. This provides more accurate SimGrid predictions, especially for the beginning of the execution.
STARPU_SIMGRID_TASK_PUSH_COST
\anchor STARPU_SIMGRID_TASK_PUSH_COST \addindex __env__STARPU_SIMGRID_TASK_PUSH_COST When set to 1 (which is the default value), task push costs are taken into account in SimGrid mode. This provides more accurate SimGrid predictions, especially with large dependency arities.
STARPU_SIMGRID_FETCHING_INPUT_COST
\anchor STARPU_SIMGRID_FETCHING_INPUT_COST \addindex __env__STARPU_SIMGRID_FETCHING_INPUT_COST When set to 1 (which is the default value), fetching input costs are taken into account in SimGrid mode. This provides more accurate SimGrid predictions, especially regarding data transfers.
STARPU_SIMGRID_SCHED_COST
\anchor STARPU_SIMGRID_SCHED_COST \addindex __env__STARPU_SIMGRID_SCHED_COST When set to 1 (0 is the default value), scheduling costs are taken into account in SimGrid mode. This provides more accurate SimGrid predictions, and allows studying scheduling overhead of the runtime system. However, it also makes simulation non-deterministic.
STARPUPY_MULTI_INTERPRETER
\anchor STARPUPY_MULTI_INTERPRETER \addindex __env__STARPUPY_MULTI_INTERPRETER Enable (1) or disable (0) multi interpreters in the StarPU Python interface (\ref MultipleInterpreters). Default value is Disable.
STARPUPY_OWN_GIL
\anchor STARPUPY_OWN_GIL \addindex __env__STARPUPY_OWN_GIL Enable (1) or disable (0) using per-interpreter GIL (\ref PythonParallelism). Default value is Disable for now, until python is fully ready for this.
\section MiscellaneousAndDebug Miscellaneous And Debug
STARPU_HOME
\anchor STARPU_HOME \addindex __env__STARPU_HOME Specify the main directory in which StarPU stores its configuration files. Default value is $HOME on Unix environments, and $USERPROFILE on Windows environments.
STARPU_PATH
\anchor STARPU_PATH \addindex __env__STARPU_PATH Only used on Windows environments. Specify the main directory in which StarPU is installed (\ref RunningABasicStarPUApplicationOnMicrosoft)
STARPU_PERF_MODEL_DIR
\anchor STARPU_PERF_MODEL_DIR \addindex __env__STARPU_PERF_MODEL_DIR Specify the main directory in which StarPU stores its performance model files. Default value is $STARPU_HOME/.starpu/sampling. See \ref Storing_Performance_Model_Files for more details.
STARPU_PERF_MODEL_PATH
\anchor STARPU_PERF_MODEL_PATH \addindex __env__STARPU_PERF_MODEL_PATH Specify a list of directories separated with ':' in which StarPU stores its performance model files. See \ref Storing_Performance_Model_Files for more details.
STARPU_PERF_MODEL_HOMOGENEOUS_CPU
\anchor STARPU_PERF_MODEL_HOMOGENEOUS_CPU \addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_CPU When set to 0, StarPU will assume that CPU devices do not have the same performance, and thus use different performance models for them, thus making kernel calibration much longer, since measurements have to be made for each CPU core.
STARPU_PERF_MODEL_HOMOGENEOUS_CUDA
\anchor STARPU_PERF_MODEL_HOMOGENEOUS_CUDA \addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_CUDA When set to 1, StarPU will assume that all CUDA devices have the same performance, and thus share performance models for them, thus allowing kernel calibration to be much faster, since measurements only have to be once for all CUDA GPUs.
STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL
\anchor STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL \addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL When set to 1, StarPU will assume that all OpenCL devices have the same performance, and thus share performance models for them, thus allowing kernel calibration to be much faster, since measurements only have to be once for all OpenCL GPUs.
STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS
\anchor STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS \addindex __env__STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS When set to 1, StarPU will assume that all MPI Slave devices have the same performance, and thus share performance models for them, thus allowing kernel calibration to be much faster, since measurements only have to be once for all MPI Slaves.
STARPU_HOSTNAME
\anchor STARPU_HOSTNAME \addindex __env__STARPU_HOSTNAME When set, force the hostname to be used when managing performance model files. Models are indexed by machine name. When running for example on a homogenenous cluster, it is possible to share the models between machines by setting export STARPU_HOSTNAME=some_global_name.
STARPU_MPI_HOSTNAMES
\anchor STARPU_MPI_HOSTNAMES \addindex __env__STARPU_MPI_HOSTNAMES Similar to \ref STARPU_HOSTNAME but to define multiple nodes on a heterogeneous cluster. The variable is a list of hostnames that will be assigned to each StarPU-MPI rank considering their position and the value of starpu_mpi_world_rank() on each rank. When running, for example, on a heterogeneous cluster, it is possible to set individual models for each machine by setting export STARPU_MPI_HOSTNAMES="name0 name1 name2". Where rank 0 will receive \c name0, rank1 will receive \c name1, and so on. This variable has precedence over \ref STARPU_HOSTNAME.
STARPU_OPENCL_PROGRAM_DIR
\anchor STARPU_OPENCL_PROGRAM_DIR \addindex __env__STARPU_OPENCL_PROGRAM_DIR Specify the directory where the OpenCL codelet source files are located. The function starpu_opencl_load_program_source() looks for the codelet in the current directory, in the directory specified by the environment variable \ref STARPU_OPENCL_PROGRAM_DIR, in the directory share/starpu/opencl of the installation directory of StarPU, and finally in the source directory of StarPU.
STARPU_SILENT
\anchor STARPU_SILENT \addindex __env__STARPU_SILENT Disable verbose mode at runtime when StarPU has been configured with the option \ref enable-verbose "--enable-verbose". Also disable the display of StarPU information and warning messages.
STARPU_MPI_DEBUG_LEVEL_MIN
\anchor STARPU_MPI_DEBUG_LEVEL_MIN \addindex __env__STARPU_MPI_DEBUG_LEVEL_MIN Set the minimum level of debug when StarPU has been configured with the option \ref enable-mpi-verbose "--enable-mpi-verbose".
STARPU_MPI_DEBUG_LEVEL_MAX
\anchor STARPU_MPI_DEBUG_LEVEL_MAX \addindex __env__STARPU_MPI_DEBUG_LEVEL_MAX Set the maximum level of debug when StarPU has been configured with the option \ref enable-mpi-verbose "--enable-mpi-verbose".
STARPU_LOGFILENAME
\anchor STARPU_LOGFILENAME \addindex __env__STARPU_LOGFILENAME Specify in which file the debugging output should be saved to.
STARPU_FXT_PREFIX
\anchor STARPU_FXT_PREFIX \addindex __env__STARPU_FXT_PREFIX Specify in which directory to save the generated trace if FxT is enabled.
STARPU_FXT_SUFFIX
\anchor STARPU_FXT_SUFFIX \addindex __env__STARPU_FXT_SUFFIX Specify in which file to save the generated trace if FxT is enabled.
STARPU_FXT_TRACE
\anchor STARPU_FXT_TRACE \addindex __env__STARPU_FXT_TRACE Enable (1) or disable (0) the FxT trace generation in \c /tmp/prof_file_XXX_YYY (the directory and file name can be changed with \ref STARPU_FXT_PREFIX and \ref STARPU_FXT_SUFFIX). Default value is Disable.
STARPU_FXT_EVENTS
\anchor STARPU_FXT_EVENTS \addindex __env__STARPU_FXT_EVENTS Specify which events will be recorded in traces. By default, all events (but VERBOSE_EXTRA ones) are recorded. One can set this variable to a comma- or pipe-separated list of the following categories, to record only events belonging to the selected categories: - USER - TASK - TASK_VERBOSE - TASK_VERBOSE_EXTRA - DATA - DATA_VERBOSE - WORKER - WORKER_VERBOSE - DSM - DSM_VERBOSE - SCHED - SCHED_VERBOSE - LOCK - LOCK_VERBOSE - EVENT - EVENT_VERBOSE - MPI - MPI_VERBOSE - MPI_VERBOSE_EXTRA - HYP - HYP_VERBOSE
The choice of which categories have to be recorded is a tradeoff between required information for offline analyzis and acceptable overhead introduced by tracing. For instance, to inspect with ViTE which tasks workers execute, one has to at least select the TASK category. Events in VERBOSE_EXTRA are very costly to record and can have an important impact on application performances. This is why there are disabled by default, and one has to explicitly select their categories using this variable to record them.
STARPU_LIMIT_CUDA_devid_MEM
\anchor STARPU_LIMIT_CUDA_devid_MEM \addindex __env__STARPU_LIMIT_CUDA_devid_MEM Specify the maximum number of megabytes that should be available to the application on the CUDA device with the identifier devid. This variable is intended to be used for experimental purposes as it emulates devices that have a limited amount of memory. When defined, the variable overwrites the value of the variable \ref STARPU_LIMIT_CUDA_MEM.
STARPU_LIMIT_CUDA_MEM
\anchor STARPU_LIMIT_CUDA_MEM \addindex __env__STARPU_LIMIT_CUDA_MEM Specify the maximum number of megabytes that should be available to the application on each CUDA devices. This variable is intended to be used for experimental purposes as it emulates devices that have a limited amount of memory.
STARPU_LIMIT_OPENCL_devid_MEM
\anchor STARPU_LIMIT_OPENCL_devid_MEM \addindex __env__STARPU_LIMIT_OPENCL_devid_MEM Specify the maximum number of megabytes that should be available to the application on the OpenCL device with the identifier devid. This variable is intended to be used for experimental purposes as it emulates devices that have a limited amount of memory. When defined, the variable overwrites the value of the variable \ref STARPU_LIMIT_OPENCL_MEM.
STARPU_LIMIT_OPENCL_MEM
\anchor STARPU_LIMIT_OPENCL_MEM \addindex __env__STARPU_LIMIT_OPENCL_MEM Specify the maximum number of megabytes that should be available to the application on each OpenCL devices. This variable is intended to be used for experimental purposes as it emulates devices that have a limited amount of memory.
STARPU_LIMIT_HIP_devid_MEM
\anchor STARPU_LIMIT_HIP_devid_MEM \addindex __env__STARPU_LIMIT_HIP_devid_MEM Specify the maximum number of megabytes that should be available to the application on the HIP device with the identifier devid. This variable is intended to be used for experimental purposes as it emulates devices that have a limited amount of memory. When defined, the variable overwrites the value of the variable \ref STARPU_LIMIT_HIP_MEM.
STARPU_LIMIT_HIP_MEM
\anchor STARPU_LIMIT_HIP_MEM \addindex __env__STARPU_LIMIT_HIP_MEM Specify the maximum number of megabytes that should be available to the application on each HIP devices. This variable is intended to be used for experimental purposes as it emulates devices that have a limited amount of memory.
STARPU_LIMIT_CPU_MEM
\anchor STARPU_LIMIT_CPU_MEM \addindex __env__STARPU_LIMIT_CPU_MEM Specify the maximum number of megabytes that should be available to the application in the main CPU memory. Setting it enables allocation cache in main memory. Setting it to zero lets StarPU overflow memory. Note: for now not all StarPU allocations get throttled by this parameter. Notably MPI reception are not throttled unless \ref STARPU_MPI_MEM_THROTTLE is set to 1.
STARPU_LIMIT_CPU_NUMA_devid_MEM
\anchor STARPU_LIMIT_CPU_NUMA_devid_MEM \addindex __env__STARPU_LIMIT_CPU_NUMA_devid_MEM Specify the maximum number of megabytes that should be available to the application on the NUMA node with the OS identifier devid. Setting it overrides the value of \ref STARPU_LIMIT_CPU_MEM.
STARPU_LIMIT_CPU_NUMA_MEM
\anchor STARPU_LIMIT_CPU_NUMA_MEM \addindex __env__STARPU_LIMIT_CPU_NUMA_MEM Specify the maximum number of megabytes that should be available to the application on each NUMA node. This is the same as specifying that same amount with \ref STARPU_LIMIT_CPU_NUMA_devid_MEM for each NUMA node number. The total memory available to StarPU will thus be this amount multiplied by the number of NUMA nodes used by StarPU. Any \ref STARPU_LIMIT_CPU_NUMA_devid_MEM additionally specified will take over \ref STARPU_LIMIT_CPU_NUMA_MEM.
STARPU_LIMIT_BANDWIDTH
\anchor STARPU_LIMIT_BANDWIDTH \addindex __env__STARPU_LIMIT_BANDWIDTH Specify the maximum available PCI bandwidth of the system in MB/s. This can only be effective with simgrid simulation. This allows to easily override the bandwidths stored in the platform file generated from measurements on the native system. This can thus be used accelerate or slow down the system bandwidth.
STARPU_SUBALLOCATOR
\anchor STARPU_SUBALLOCATOR \addindex __env__STARPU_SUBALLOCATOR Enable (1) or disable (0) the StarPU suballocator. Default value is to enable it to amortize the cost of GPU and pinned RAM allocations for small allocations: StarPU allocate large chunks of memory at a time, and suballocates the small buffers within them.
STARPU_MINIMUM_AVAILABLE_MEM
\anchor STARPU_MINIMUM_AVAILABLE_MEM \addindex __env__STARPU_MINIMUM_AVAILABLE_MEM Specify the minimum percentage of memory that should be available in GPUs, i.e. not used at all by StarPU (or in main memory, when using out of core), below which a eviction pass is performed. Default value is 0%.
STARPU_TARGET_AVAILABLE_MEM
\anchor STARPU_TARGET_AVAILABLE_MEM \addindex __env__STARPU_TARGET_AVAILABLE_MEM Specify the target percentage of memory that should be available in GPUs, i.e. not used at all by StarPU (or in main memory, when using out of core), when performing a periodic eviction pass. Default value is 0%.
STARPU_MINIMUM_CLEAN_BUFFERS
\anchor STARPU_MINIMUM_CLEAN_BUFFERS \addindex __env__STARPU_MINIMUM_CLEAN_BUFFERS Specify the minimum percentage of number of buffers that should be clean in GPUs (or in main memory, when using out of core), i.e. used by StarPU, but for which a copy is available in memory (or on disk, when using out of core), below which asynchronous writebacks will be issued. Default value is 5%.
STARPU_TARGET_CLEAN_BUFFERS
\anchor STARPU_TARGET_CLEAN_BUFFERS \addindex __env__STARPU_TARGET_CLEAN_BUFFERS Specify the target percentage of number of buffers that should be reached in GPUs (or in main memory, when using out of core), i.e. used by StarPU, but for which a copy is available in memory (or on disk, when using out of core), when performing an asynchronous writeback pass. Default value is 10%.
STARPU_DISK_SWAP
\anchor STARPU_DISK_SWAP \addindex __env__STARPU_DISK_SWAP Specify a path where StarPU can push data when the main memory is getting full.
STARPU_DISK_SWAP_BACKEND
\anchor STARPU_DISK_SWAP_BACKEND \addindex __env__STARPU_DISK_SWAP_BACKEND Specify the backend to be used by StarPU to push data when the main memory is getting full. Default value is \c unistd (i.e. using read/write functions), other values are \c stdio (i.e. using fread/fwrite), \c unistd_o_direct (i.e. using read/write with O_DIRECT), \c leveldb (i.e. using a leveldb database), and \c hdf5 (i.e. using HDF5 library).
STARPU_DISK_SWAP_SIZE
\anchor STARPU_DISK_SWAP_SIZE \addindex __env__STARPU_DISK_SWAP_SIZE Specify the maximum size in MiB to be used by StarPU to push data when the main memory is getting full. Default value is unlimited.
STARPU_LIMIT_MAX_SUBMITTED_TASKS
\anchor STARPU_LIMIT_MAX_SUBMITTED_TASKS \addindex __env__STARPU_LIMIT_MAX_SUBMITTED_TASKS Allow users to control the task submission flow by specifying to StarPU a maximum number of submitted tasks allowed at a given time, i.e. when this limit is reached task submission becomes blocking until enough tasks have completed, specified by \ref STARPU_LIMIT_MIN_SUBMITTED_TASKS. Setting it enables allocation cache buffer reuse in main memory. See \ref HowToReduceTheMemoryFootprintOfInternalDataStructures.
STARPU_LIMIT_MIN_SUBMITTED_TASKS
\anchor STARPU_LIMIT_MIN_SUBMITTED_TASKS \addindex __env__STARPU_LIMIT_MIN_SUBMITTED_TASKS Allow users to control the task submission flow by specifying to StarPU a submitted task threshold to wait before unblocking task submission. This variable has to be used in conjunction with \ref STARPU_LIMIT_MAX_SUBMITTED_TASKS which puts the task submission thread to sleep. Setting it enables allocation cache buffer reuse in main memory. See \ref HowToReduceTheMemoryFootprintOfInternalDataStructures.
STARPU_TRACE_BUFFER_SIZE
\anchor STARPU_TRACE_BUFFER_SIZE \addindex __env__STARPU_TRACE_BUFFER_SIZE Set the buffer size for recording trace events in MiB. Setting it to a big size allows to avoid pauses in the trace while it is recorded on the disk. This however also consumes memory, of course. Default value is 64.
STARPU_GENERATE_TRACE
\anchor STARPU_GENERATE_TRACE \addindex __env__STARPU_GENERATE_TRACE When set to 1, indicate that StarPU should automatically generate a Paje trace when starpu_shutdown() is called.
STARPU_GENERATE_TRACE_OPTIONS
\anchor STARPU_GENERATE_TRACE_OPTIONS \addindex __env__STARPU_GENERATE_TRACE_OPTIONS When the variable \ref STARPU_GENERATE_TRACE is set to 1 to generate a Paje trace, this variable can be set to specify options (see starpu_fxt_tool --help).
STARPU_ENABLE_STATS
\anchor STARPU_ENABLE_STATS \addindex __env__STARPU_ENABLE_STATS Enable gathering various data statistics (\ref DataStatistics).
STARPU_MEMORY_STATS
\anchor STARPU_MEMORY_STATS \addindex __env__STARPU_MEMORY_STATS When set to 0, disable the display of memory statistics on data which have not been unregistered at the end of the execution (\ref MemoryFeedback).
STARPU_MAX_MEMORY_USE
\anchor STARPU_MAX_MEMORY_USE \addindex __env__STARPU_MAX_MEMORY_USE When set to 1, display at the end of the execution the maximum memory used by StarPU for internal data structures during execution.
STARPU_BUS_STATS
\anchor STARPU_BUS_STATS \addindex __env__STARPU_BUS_STATS Enable the display of data transfers statistics when calling starpu_shutdown() (\ref Profiling). By default, statistics are printed on the standard error stream, use the environment variable \ref STARPU_BUS_STATS_FILE to define another filename.
STARPU_BUS_STATS_FILE
\anchor STARPU_BUS_STATS_FILE \addindex __env__STARPU_BUS_STATS_FILE Define the name of the file where to display data transfers statistics, see \ref STARPU_BUS_STATS.
STARPU_WORKER_STATS
\anchor STARPU_WORKER_STATS \addindex __env__STARPU_WORKER_STATS Enable the display of workers statistics when calling starpu_shutdown() (\ref Profiling). When combined with the environment variable \ref STARPU_PROFILING, it displays the energy consumption (\ref Energy-basedScheduling). By default, statistics are printed on the standard error stream, use the environment variable \ref STARPU_WORKER_STATS_FILE to define another filename.
STARPU_WORKER_STATS_FILE
\anchor STARPU_WORKER_STATS_FILE \addindex __env__STARPU_WORKER_STATS_FILE Define the name of the file where to display workers statistics, see \ref STARPU_WORKER_STATS.
STARPU_STATS
\anchor STARPU_STATS \addindex __env__STARPU_STATS When set to 0, data statistics will not be displayed at the end of the execution of an application (\ref DataStatistics).
STARPU_WATCHDOG_TIMEOUT
\anchor STARPU_WATCHDOG_TIMEOUT \addindex __env__STARPU_WATCHDOG_TIMEOUT When set to a value other than 0, allows to make StarPU print an error message whenever StarPU does not terminate any task for the given time (in µs), but lets the application continue normally. Should be used in combination with \ref STARPU_WATCHDOG_CRASH (see \ref DetectionStuckConditions).
STARPU_WATCHDOG_CRASH
\anchor STARPU_WATCHDOG_CRASH \addindex __env__STARPU_WATCHDOG_CRASH When set to a value other than 0, trigger a crash when the watch dog is reached, thus allowing to catch the situation in gdb, etc (see \ref DetectionStuckConditions)
STARPU_WATCHDOG_DELAY
\anchor STARPU_WATCHDOG_DELAY \addindex __env__STARPU_WATCHDOG_DELAY Delay the activation of the watchdog by the given time (in µs). This can be convenient for letting the application initialize data etc. before starting to look for idle time.
STARPU_TASK_PROGRESS
\anchor STARPU_TASK_PROGRESS \addindex __env__STARPU_TASK_PROGRESS Print the progression of tasks. This is convenient to determine whether a program is making progress in task execution, or is just stuck.
STARPU_TASK_BREAK_ON_PUSH
\anchor STARPU_TASK_BREAK_ON_PUSH \addindex __env__STARPU_TASK_BREAK_ON_PUSH When this variable contains a job id, StarPU will raise \c SIGTRAP when the task with that job id is being pushed to the scheduler, which will be nicely caught by debuggers (see \ref DebuggingScheduling)
STARPU_TASK_BREAK_ON_SCHED
\anchor STARPU_TASK_BREAK_ON_SCHED \addindex __env__STARPU_TASK_BREAK_ON_SCHED When this variable contains a job id, StarPU will raise \c SIGTRAP when the task with that job id is being scheduled by the scheduler (at a scheduler-specific point), which will be nicely caught by debuggers. This only works for schedulers which have such a scheduling point defined (see \ref DebuggingScheduling)
STARPU_TASK_BREAK_ON_POP
\anchor STARPU_TASK_BREAK_ON_POP \addindex __env__STARPU_TASK_BREAK_ON_POP When this variable contains a job id, StarPU will raise \c SIGTRAP when the task with that job id is being popped from the scheduler, which will be nicely caught by debuggers (see \ref DebuggingScheduling)
STARPU_TASK_BREAK_ON_EXEC
\anchor STARPU_TASK_BREAK_ON_EXEC \addindex __env__STARPU_TASK_BREAK_ON_EXEC When this variable contains a job id, StarPU will raise \c SIGTRAP when the task with that job id is being executed, which will be nicely caught by debuggers (see \ref DebuggingScheduling)
STARPU_DISABLE_KERNELS
\anchor STARPU_DISABLE_KERNELS \addindex __env__STARPU_DISABLE_KERNELS When set to a value other than 1, it disables actually calling the kernel functions, thus allowing to quickly check that the task scheme is working properly, without performing the actual application-provided computation.
STARPU_HISTORY_MAX_ERROR
\anchor STARPU_HISTORY_MAX_ERROR \addindex __env__STARPU_HISTORY_MAX_ERROR History-based performance models will drop measurements which are really far froom the measured average. This specifies the allowed variation. Default value is 50 (%), i.e. the measurement is allowed to be x1.5 faster or /1.5 slower than the average.
STARPU_RAND_SEED
\anchor STARPU_RAND_SEED \addindex __env__STARPU_RAND_SEED The random scheduler and some examples use random numbers for their own working. Depending on the examples, the seed is by default juste always 0 or the current time() (unless SimGrid mode is enabled, in which case it is always 0). \ref STARPU_RAND_SEED allows to set the seed to a specific value.
STARPU_GLOBAL_ARBITER
\anchor STARPU_GLOBAL_ARBITER \addindex __env__STARPU_GLOBAL_ARBITER When set to a positive value, StarPU will create a arbiter, which implements an advanced but centralized management of concurrent data accesses (see \ref ConcurrentDataAccess).
STARPU_USE_NUMA
\anchor STARPU_USE_NUMA \addindex __env__STARPU_USE_NUMA When defined to 1, NUMA nodes are taking into account by StarPU, i.e. StarPU will expose one StarPU memory node per NUMA node, and will thus schedule tasks according to data locality, migrated data when appropriate, etc. ::STARPU_MAIN_RAM is then associated to the NUMA node associated to the first CPU worker if it exists, the NUMA node associated to the first GPU discovered otherwise. If StarPU doesn't find any NUMA node after these steps, ::STARPU_MAIN_RAM is the first NUMA node discovered by StarPU. Applications should thus rather pass a \c NULL pointer and a -1 memory node to starpu_data_*_register functions, so that StarPU can manage memory as it wishes. If the application wants to control memory allocation on NUMA nodes for some data, it can use starpu_malloc_on_node and pass the memory node to the starpu_data_*_register functions to tell StarPU where the allocation was made. starpu_memory_nodes_get_count_by_kind() and starpu_memory_node_get_ids_by_type() can be used to get the memory nodes numbers of the CPU memory nodes. starpu_memory_nodes_numa_id_to_devid() and starpu_memory_nodes_numa_devid_to_id() are also available to convert between OS NUMA id and StarPU memory node number. If this variable is unset, or set to 0, CPU memory is considered as only one memory node (::STARPU_MAIN_RAM) and it will be up to the OS to manage migration etc. and the StarPU scheduler will not know about it.
STARPU_IDLE_FILE
\anchor STARPU_IDLE_FILE \addindex __env__STARPU_IDLE_FILE When defined, a file named after its contents will be created at the end of the execution. This file will contain the sum of the idle times of all the workers.
STARPU_HWLOC_INPUT
\anchor STARPU_HWLOC_INPUT \addindex __env__STARPU_HWLOC_INPUT When defined to the path of an XML file, \c hwloc will use this file as input instead of detecting the current platform topology, which can save significant initialization time. To produce this XML file, use lstopo file.xml
STARPU_CATCH_SIGNALS
\anchor STARPU_CATCH_SIGNALS \addindex __env__STARPU_CATCH_SIGNALS By default, StarPU catch signals \c SIGINT, \c SIGSEGV and \c SIGTRAP to perform final actions such as dumping FxT trace files even though the application has crashed. Setting this variable to a value other than 1 will disable this behaviour. This should be done on JVM systems which may use these signals for their own needs. The flag can also be set through the field starpu_conf::catch_signals.
STARPU_DISPLAY_BINDINGS
\anchor STARPU_DISPLAY_BINDINGS \addindex __env__STARPU_DISPLAY_BINDINGS Display the binding of all processes and threads running on the machine. Setting it to 1 displays the binding masks. Setting it to 2 displays the topology. If MPI is enabled, display the binding of each node.
Users can manually display the binding by calling starpu_display_bindings().
\section ConfiguringTheHypervisor Configuring The Hypervisor
SC_HYPERVISOR_POLICY
\anchor SC_HYPERVISOR_POLICY \addindex __env__SC_HYPERVISOR_POLICY Choose between the different resizing policies proposed by StarPU for the hypervisor: \c idle, \c app_driven, \c feft_lp, \c teft_lp, \c ispeed_lp, \c throughput_lp etc. Use SC_HYPERVISOR_POLICY=help to get the list of available policies for the hypervisor
SC_HYPERVISOR_TRIGGER_RESIZE
\anchor SC_HYPERVISOR_TRIGGER_RESIZE \addindex __env__SC_HYPERVISOR_TRIGGER_RESIZE Choose how should the hypervisor be triggered: speed if the resizing algorithm should be called whenever the speed of the context does not correspond to an optimal precomputed value, idle it the resizing algorithm should be called whenever the workers are idle for a period longer than the value indicated when configuring the hypervisor.
SC_HYPERVISOR_START_RESIZE
\anchor SC_HYPERVISOR_START_RESIZE \addindex __env__SC_HYPERVISOR_START_RESIZE Indicate the moment when the resizing should be available. The value correspond to the percentage of the total time of execution of the application. Default value is the resizing frame.
SC_HYPERVISOR_MAX_SPEED_GAP
\anchor SC_HYPERVISOR_MAX_SPEED_GAP \addindex __env__SC_HYPERVISOR_MAX_SPEED_GAP Indicate the ratio of speed difference between contexts that should trigger the hypervisor. This situation may occur only when a theoretical speed could not be computed and the hypervisor has no value to compare the speed to. Otherwise the resizing of a context is not influenced by the the speed of the other contexts, but only by the the value that a context should have.
SC_HYPERVISOR_STOP_PRINT
\anchor SC_HYPERVISOR_STOP_PRINT \addindex __env__SC_HYPERVISOR_STOP_PRINT By default the values of the speed of the workers is printed during the execution of the application. If the value 1 is given to this environment variable this printing is not done.
SC_HYPERVISOR_LAZY_RESIZE
\anchor SC_HYPERVISOR_LAZY_RESIZE \addindex __env__SC_HYPERVISOR_LAZY_RESIZE By default the hypervisor resizes the contexts in a lazy way, that is workers are firstly added to a new context before removing them from the previous one. Once this workers are clearly taken into account into the new context (a task was popped there) we remove them from the previous one. However if the application would like that the change in the distribution of workers should change right away this variable should be set to 0
SC_HYPERVISOR_SAMPLE_CRITERIA
\anchor SC_HYPERVISOR_SAMPLE_CRITERIA \addindex __env__SC_HYPERVISOR_SAMPLE_CRITERIA By default the hypervisor uses a sample of flops when computing the speed of the contexts and of the workers. If this variable is set to time the hypervisor uses a sample of time (10% of an approximation of the total execution time of the application)
*/ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_installation/installation_intro.doxy000066400000000000000000000025371507764646700307720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \intropage{IntroInstallation, --------- StarPU Installation ---------} \webforeword This parts shows a basic usage of StarPU and how to execute the provided examples or your own applications.
  • Chapter \ref BuildingAndInstallingStarPU shows how to build and install StarPU.
  • Chapter \ref CompilationConfiguration shows how to tune StarPU building process through configuration options.
  • Chapter \ref ExecutionConfigurationThroughEnvironmentVariables lists environment variables that can be used to tune StarPU when executing an application.
Finally, Chapter \ref ConfigurationAndInitialization shows a brief overview of how to configure and tune StarPU. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_introduction/000077500000000000000000000000001507764646700241625ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_introduction/doc_organization.doxy000066400000000000000000000054251507764646700304260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page DocOrganization Documentation Organization The documentation chapters include
  • \ref IntroInstallation
    • \ref BuildingAndInstallingStarPU
    • \ref ExecutionConfigurationThroughEnvironmentVariables
    • \ref CompilationConfiguration
  • \ref IntroBasics
    • \ref StarPUApplications
    • \ref BasicExamples
    • \ref FullSourceCodeVectorScal
    • \ref TasksInStarPU
    • \ref DataManagement
    • \ref Scheduling
    • \ref ExamplesInStarPUSources
  • \ref IntroApplications
    • \ref StencilApplication
  • \ref IntroPerformances
    • \ref BenchmarkingStarPU
    • \ref OnlinePerformanceTools
    • \ref OfflinePerformanceTools
  • \ref IntroFAQ
    • \ref CheckListWhenPerformanceAreNotThere
    • \ref FrequentlyAskedQuestions
  • \ref IntroLanguage
    • \ref NativeFortranSupport
    • \ref StarPUJavaInterface
    • \ref PythonInterface
    • \ref OpenMPRuntimeSupport
  • \ref IntroExtensions
    • \ref ConfigurationAndInitialization
    • \ref AdvancedTasksInStarPU
    • \ref AdvancedDataManagement
    • \ref AdvancedScheduling
    • \ref SchedulingContexts
    • \ref SchedulingContextHypervisor
    • \ref HowToDefineANewSchedulingPolicy
    • \ref CUDASupport
    • \ref OpenCLSupport
    • \ref MaxFPGASupport
    • \ref OutOfCore
    • \ref MPISupport
    • \ref TCPIPSupport
    • \ref Transactions
    • \ref FaultTolerance
    • \ref FFTSupport
    • \ref SOCLOpenclExtensions
    • \ref HierarchicalDAGS
    • \ref ParallelWorker
    • \ref InteroperabilitySupport
    • \ref SimGridSupport
    • \ref DebuggingTools
    • \ref Helpers
  • Appendices
    • \ref GNUFreeDocumentationLicense
    • \ref ModuleDocumentation
    • \ref FileDocumentation
    • \ref deprecated
Make sure to have had a look at those too! */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_introduction/glossary.doxy000066400000000000000000000055761507764646700267470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page Glossary Glossary A \b codelet stores pointers to different implementations of the same theoretical function. A memory node can be either the main RAM, GPU-embedded memory or disk memory. A \b bus represents a connection between memory nodes. A data handle keeps track of multiple copies of the same data (\b registered by the application) across various memory nodes. The data management library ensures coherency among these copies. The \b home memory node of a data handle is the memory node where the data was originally registered (typically the main memory node). A \b task represents a scheduled execution of a codelet on specific data handles. A \b tag is a rendez-vous point. Tasks generally have their own tag and can depend on other tags. The value of a tag is chosen by the application. A \b worker execute tasks. Typically, there is one worker per CPU computation core and one per accelerator (with a dedicated whole CPU core). A \b driver oversees a given type of worker. Currently, there are CPU, CUDA, and OpenCL drivers. A performance model is a (dynamic or static) model of the performance of a given codelet. Codelets can have performance model for execution time as well as energy consumption. A data \b interface describes the layout of the data: for a vector, it includes a pointer for the start, the number of elements and the size of elements ; for a matrix, it involves a pointer for the start, the number of elements per row, the offset between rows, and the size of each element ; etc. Codelet functions receive interfaces for the local memory node copies of data handles assigned to the scheduled task, to access their data. Data \b partitioning means dividing the data of a specific data handle (referred to as the \b father) into several \b children data handles, each representing distinct segments of the original data. A \b filter is the function responsible for deriving child data handles from a father data handle, thus defining how the partitioning should be done (e.g. horizontal, vertical, etc.) \b Acquiring a data handle can be done from the main application, allowing secure access to the data of a data handle from its home node without needing to unregister it. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_introduction/introduction_intro.doxy000066400000000000000000000161541507764646700310320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \mainpage Introduction \foreword \webforeword // we need to keep 2 blank lines above \section Motivation Motivation // This is a comment and it will be removed before the file is processed by doxygen // complex machines with heterogeneous cores/devices The use of specialized hardware, such as accelerators or coprocessors offers an interesting approach to overcoming the physical limits encountered by processor architects. As a result, many machines are now equipped with one or several accelerators (e.g. a GPU), in addition to the usual processor(s). While significant efforts have been devoted to offloading computation onto such accelerators, very little attention has been paid to portability concerns on the one hand, and to the possibility of having heterogeneous accelerators and processors interact on the other hand. StarPU is a runtime system that provides support for heterogeneous multicore architectures. It not only offers a unified view of the computational resources (i.e. CPUs and accelerators simultaneously) but also takes care of efficiently mapping and executing tasks onto an heterogeneous machine while transparently handling low-level issues such as data transfers in a portable manner. // this leads to a complicated distributed memory design // which is not (easily) manageable by hand // added value/benefits of StarPU // - portability // - scheduling, perf. portability \section StarPUInANutshell StarPU in a Nutshell StarPU is a software tool designed to enable programmers to harness the computational capabilities of both CPUs and GPUs, all while sparing them the need to meticulously adapt their programs for specific target machines and processing units. At the heart of StarPU lies its runtime support library, which takes charge of scheduling tasks supplied by applications on heterogeneous CPU/GPU systems. Furthermore, StarPU provides programming language support through an OpenCL front-end (\ref SOCLOpenclExtensions). StarPU's runtime mechanism and programming language extensions are built around a task-based programming model. In this modell, applications submit computational tasks, with CPU and/or GPU implementations. StarPU effectively schedules these tasks and manages the associated data transfers across available CPUs and GPUs. The data that a task operates on are automatically exchanged between accelerators and the main memory, thereby sparing programmers the intricacies of scheduling and the technical details tied to these transfers. StarPU excels in its adaptness at efficiently scheduling tasks using established algorithms from the literature (\ref TaskSchedulingPolicy). Furthermore addition, it provides the flexibility for scheduling experts, such as compiler or computational library developers, to implement custom scheduling policies in a manner that is easily portable (\ref HowToDefineANewSchedulingPolicy). The remainder of this section describes the main concepts used in StarPU. A video, lasting 26 minutes, accessible on the StarPU website (https://starpu.gitlabpages.inria.fr/) presents these concepts. Additionally, a serie of tutorials can be found at https://starpu.gitlabpages.inria.fr/tutorials/ One of the tutorials is available within a docker image https://starpu.gitlabpages.inria.fr/tutorials/docker/ // explain the notion of codelet and task (i.e. g(A, B) \subsection CodeletAndTasks Codelet and Tasks One of StarPU's key data structures is the \b codelet. A codelet defines a computational kernel that can potentially be implemented across various architectures, including CPUs, CUDA devices, or OpenCL devices. // TODO insert illustration f: f_spu, f_cpu, ... Another pivotal data structure is the \b task. Executing a StarPU task involves applying a codelet to a data set, utilizing one of the architectures on which the codelet is implemented. Therefore, a task describes the codelet that it uses, the data accessed, and how they are accessed during the computation (read and/or write). StarPU tasks are asynchronous, meaning that submitting a task to StarPU is a non-blocking operation. The task structure can also specify a \b callback function, which is called once StarPU succesfully completes the task. Additionally, it contains optional fields that the application may use to provide hints to the scheduler, such as priority levels. By default, task dependencies are inferred from data dependency (sequential coherency) within StarPU. However, the application has the ability to disable sequential coherency for specific data, and dependencies can also be specifically defined. A task can be uniquely identified by a 64-bit number, chosen by the application, referred to as a \b tag. Task dependencies can be enforced through callback functions, by submitting other tasks, or by specifying dependencies between tags (which can correspond to tasks that have yet to be submitted). // TODO insert illustration f(Ar, Brw, Cr) + .. // DSM \subsection StarPUDataManagementLibrary StarPU Data Management Library As StarPU dynamically schedules tasks at runtime, the need for data transfers is automatically managed in a``just-in-time'' manner between different processing units, This automated approach alleviates the burden on application programmers to explicitly handle data transfers. Furthemore, to minimize needless transfers, StarPU retains data at the location of its last use, even if modifications were made there. Additionally, StarPU allows multiple instances of the same data to coexist across various processing units simultaneously, as long as the data remains unaltered. \section ApplicationTaskification Application Taskification We will explain here shortly the concept of "taskifying" an application. Before transitioning to StarPU, you must transform your application as follows:
  • Refactor functions into "pure" functions that exclusively utilize data from their parameters.
  • Create a central main function responsible for calling these pure functions.
Once this restructuring is complete, integrating StarPU or any similar task-based library becomes straightforward. You merely replace function calls with task submissions, leveraging the library's capabilities. Chapter \ref StencilApplication shows how to easily convert an existing application to use StarPU. \section ResearchPapers Research Papers Research papers about StarPU can be found at https://starpu.gitlabpages.inria.fr/publications/. A good overview is available in the research report at http://hal.archives-ouvertes.fr/inria-00467677. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_languages/000077500000000000000000000000001507764646700234075ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_languages/code/000077500000000000000000000000001507764646700243215ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_languages/code/java_spark.java000066400000000000000000000041331507764646700273060ustar00rootroot00000000000000// StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // //! [To be included. You should update doxygen if you see this text.] package fr.labri.hpccloud.starpu.examples; import fr.labri.hpccloud.starpu.StarPU; import fr.labri.hpccloud.starpu.data.DataPairSet; import fr.labri.hpccloud.starpu.data.DataSet; import fr.labri.hpccloud.starpu.data.Tuple2; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.util.Arrays; import java.util.regex.Pattern; public class WordCount { static InputStream openFile(String filename) throws Exception { return WordCount.class.getResourceAsStream(filename); } private static final Pattern SPACE = Pattern.compile(" "); public static void main(String[] args ) throws Exception { InputStream input = new FileInputStream(args[0]); StarPU.init(); compute(input); input.close(); StarPU.shutdown(); } private static void compute(InputStream input) throws Exception { DataSet lines = DataSet.readFile (input, s->s).splitByBlocks(10); DataSet words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator()).splitByBlocks(10); DataPairSet ones = (DataPairSet)words.mapToPair(w-> new Tuple2<>(w,1)); DataPairSet counts = ones.reduceByKey((c1,c2)-> c1 + c2); for(Tuple2 p : counts.collect()) { System.out.println("("+p._1()+","+p._2()+")"); } } } //! [To be included. You should update doxygen if you see this text.] starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_languages/code/java_starpu.java000066400000000000000000000050271507764646700275070ustar00rootroot00000000000000// StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // //! [To be included. You should update doxygen if you see this text.] package fr.labri.hpccloud.starpu.examples; import fr.labri.hpccloud.starpu.Codelet; import fr.labri.hpccloud.starpu.StarPU; import fr.labri.hpccloud.starpu.data.DataHandle; import fr.labri.hpccloud.starpu.data.IntegerVariableHandle; import fr.labri.hpccloud.starpu.data.VectorHandle; import java.util.Random; import static fr.labri.hpccloud.starpu.data.DataHandle.AccessMode.*; public class VectorScal { public static final int NX = 10; public static final Float factor = 3.14f; static final Codelet scal = new Codelet() { @Override public void run(DataHandle[] buffers) { VectorHandle array = (VectorHandle)buffers[0]; int n = array.getSize(); System.out.println(String.format("scaling array %s with %d elements", array, n)); for (int i = 0; i < n; i++) { array.setValueAt(i, factor * array.getValueAt(i)); } } @Override public DataHandle.AccessMode[] getAccessModes() { return new DataHandle.AccessMode[] { STARPU_RW }; } }; public static void main(String[] args) throws Exception { int nx = (args.length == 0) ? NX : Integer.valueOf(args[0]); compute(nx); } public static void compute(int nx) throws Exception { StarPU.init(); System.out.println(String.format("VECTOR[#nx=%d]", nx)); VectorHandle arrayHandle = VectorHandle.register(nx); System.out.println(String.format("scaling array %s", arrayHandle)); for(int i=0 ; i
  • You can learn to natively access most of StarPU functionalities from Fortran 2008+ codes with some explanations and examples in Chapter \ref NativeFortranSupport.
  • You can find out how to execute Java applications with some important StarPU APIs in Chapter \ref StarPUJavaInterface.
  • Python interface supports most of the main StarPU functionalities, and new functions especially adapted to Python have been added as well. There are detailed explanations and examples in Chapter \ref PythonInterface.
  • You can learn how to execute OpenMP tasks with some specific functions in Chapter \ref OpenMPRuntimeSupport. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_languages/native_fortran_support.doxy000066400000000000000000000255671507764646700311500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page NativeFortranSupport The StarPU Native Fortran Support StarPU provides the necessary routines and support to natively access most of its functionalities from Fortran 2008+ codes. All symbols (functions, constants) are defined in fstarpu_mod.f90. Every symbol of the Native Fortran support API is prefixed by fstarpu_. Note: Mixing uses of fstarpu_ and starpu_ symbols in the same Fortran code has unspecified behavior. See \ref NFAPIMIX for a discussion about valid and unspecified combinations. \section NFImplementation Implementation Details and Specificities \subsection NFPrerequisites Prerequisites The Native Fortran support relies on Fortran 2008 specific constructs, as well as on the support for interoperability of assumed-shape arrays introduced as part of Fortran's Technical Specification ISO/IEC TS 29113:2012, for which no equivalent are available in previous versions of the standard. It has currently been tested successfully with GNU GFortran 4.9, GFortran 5.x, GFortran 6.x and the Intel Fortran Compiler >= 2016. It is known not to work with GNU GFortran < 4.9, Intel Fortran Compiler < 2016. See Section \ref NFOldFortran for information on how to write StarPU Fortran code with older compilers. \subsection NFConfiguration Configuration The Native Fortran API is enabled and its companion fstarpu_mod.f90 Fortran module source file is installed by default when a Fortran compiler is found, unless the detected Fortran compiler is known not to support the requirements for the Native Fortran API. The support can be disabled through the \c configure option \ref disable-fortran "--disable-fortran". Conditional compiled source codes may check for the availability of the Native Fortran Support by testing whether the preprocessor macro STARPU_HAVE_FC is defined or not. \subsection NFExamples Examples Several examples using the Native Fortran API are provided in StarPU's examples/native_fortran/ examples directory, to showcase the Fortran flavor of various basic and more advanced StarPU features. \subsection NFAppCompile Compiling a Native Fortran Application The Fortran module fstarpu_mod.f90 installed in StarPU's include/ directory provides all the necessary API definitions. It must be compiled with the same compiler (same vendor, same version) as the application itself, and the resulting fstarpu_mod.o object file must be linked with the application executable. Each example provided in StarPU's examples/native_fortran/ examples directory comes with its own dedicated Makefile for out-of-tree build. Such example Makefiles may be used as starting points for building application codes with StarPU. \section NFIdioms Fortran Translation for Common StarPU API Idioms All these examples assume that the standard Fortran module iso_c_binding is in use. - Specifying a NULL pointer \code{.f90} type(c_ptr) :: my_ptr ! variable to store the pointer ! [...] my_ptr = C_NULL_PTR ! assign standard constant for NULL ptr \endcode - Obtaining a pointer to some object: \code{.f90} real(8), dimension(:), allocatable, target :: va type(c_ptr) :: p_va ! variable to store a pointer to array va ! [...] p_va = c_loc(va) \endcode - Obtaining a pointer to some subroutine: \code{.f90} ! pointed routine definition recursive subroutine myfunc () bind(C) ! [...] type(c_funptr) :: p_fun ! variable to store the routine pointer ! [...] p_fun = c_funloc(my_func) \endcode - Obtaining the size of some object: \code{.f90} real(8) :: a integer(c_size_t) :: sz_a ! variable to store the size of a ! [...] sz_a = c_sizeof(a) \endcode - Obtaining the length of an array dimension: \code{.f90} real(8), dimension(:,:), allocatable, target :: vb integer(c_int) :: ln_vb_1 ! variable to store the length of vb's dimension 1 integer(c_int) :: ln_vb_2 ! variable to store the length of vb's dimension 2 ! [...] ln_vb_1 = 1+ubound(vb,1)-lbound(vb,1) ! get length of dimension 1 of vb ln_vb_2 = 1+ubound(vb,2)-lbound(vb,2) ! get length of dimension 2 of vb \endcode - Specifying a string constant: \code{.f90} type(c_ptr) :: my_cl ! a StarPU codelet ! [...] ! set the name of a codelet to string 'my_codele't: call fstarpu_codelet_set_name(my_cl, C_CHAR_"my_codelet"//C_NULL_CHAR) ! note: using the C_CHAR_ prefix and the //C_NULL_CHAR concatenation at the end ensures ! that the string constant is properly '\0' terminated, and compatible with StarPU's ! internal C routines ! ! note: plain Fortran string constants are not '\0' terminated, and as such, must not be ! passed to StarPU routines. \endcode - Combining multiple flag constants with a bitwise 'or': \code{.f90} type(c_ptr) :: my_cl ! a pointer for the codelet structure ! [...] ! add a managed buffer to a codelet, specifying both the Read/Write access mode and the Locality hint call fstarpu_codelet_add_buffer(my_cl, FSTARPU_RW.ior.FSTARPU_LOCALITY) \endcode A basic example is available in examples/native_fortran/nf_vector_scal.f90. \section NFInitExit Uses, Initialization and Shutdown The snippet below show an example of minimal StarPU code using the Native Fortran support. The program should use the standard module iso_c_binding as well as StarPU's fstarpu_mod. The StarPU runtime engine is initialized with a call to function fstarpu_init, which returns an integer status of 0 if successful or non-0 otherwise. Eventually, a call to fstarpu_shutdown ends the runtime engine and frees all internal StarPU data structures. \snippet nf_initexit.f90 To be included. You should update doxygen if you see this text. \section NFInsertTask Fortran Flavor of StarPU's Variadic Insert_task Fortran does not have a construction similar to C variadic functions, on which starpu_task_insert() relies at the time of this writing. However, Fortran's variable length arrays of c_ptr elements enable to emulate much of the convenience of C's variadic functions. This is the approach retained for implementing fstarpu_task_insert. The general syntax for using fstarpu_task_insert is as follows: \code{.f90} call fstarpu_task_insert((/ & [, , ]* & [, , ]* & , C_NULL_PTR /)) \endcode There is thus a unique array argument (/ ... /) passed to fstarpu_task_insert which itself contains the task settings. Each element of the array must be of type type(c_ptr). The last element of the array must be C_NULL_PTR. Example extracted from nf_vector.f90: \code{.f90} call fstarpu_task_insert((/ cl_vec, & ! codelet FSTARPU_R, dh_va, & ! a first data handle FSTARPU_RW.ior.FSTARPU_LOCALITY, dh_vb, & ! a second data handle C_NULL_PTR /)) ! no more args \endcode The full example is available in examples/native_fortran/nf_vector.f90. \section NFStructs Functions and Subroutines Expecting Data Structures Arguments Several StarPU structures that are expected to be passed to the C API, are replaced by function/subroutine wrapper sets to allocate, set fields and free such structure. This strategy has been preferred over defining native Fortran equivalent of such structures using Fortran's derived types, to avoid potential layout mismatch between C and Fortran StarPU data structures. Examples of such data structures wrappers include fstarpu_conf_allocate and alike, fstarpu_codelet_allocate and alike, fstarpu_data_filter_allocate and alike. Here is an example of allocating, filling and deallocating a codelet structure: \code{.f90} ! a pointer for the codelet structure type(c_ptr) :: cl_vec ! [...] ! allocate an empty codelet structure cl_vec = fstarpu_codelet_allocate() ! add a CPU implementation function to the codelet call fstarpu_codelet_add_cpu_func(cl_vec, C_FUNLOC(cl_cpu_func_vec)) ! add a CUDA implementation function to the codelet call fstarpu_codelet_add_cuda_func(cl_vec, C_FUNLOC(cl_cuda_func_vec)) ! set the codelet name call fstarpu_codelet_set_name(cl_vec, C_CHAR_"my_vec_codelet"//C_NULL_CHAR) ! add a Read-only mode data buffer to the codelet call fstarpu_codelet_add_buffer(cl_vec, FSTARPU_R) ! add a Read-Write mode data buffer to the codelet call fstarpu_codelet_add_buffer(cl_vec, FSTARPU_RW.ior.FSTARPU_LOCALITY) ! [...] ! free codelet structure call fstarpu_codelet_free(cl_vec) \endcode The full example is available in examples/native_fortran/nf_vector.f90. \section NFNotes Additional Notes about the Native Fortran Support \subsection NFOldFortran Using StarPU with Older Fortran Compilers When using older compilers, Fortran applications may still interoperate with StarPU using C marshalling functions as examplified in StarPU's examples/fortran/ and examples/fortran90/ example directories, though the process will be less convenient. Basically, the main FORTRAN code calls some C wrapper functions to submit tasks to StarPU. Then, when StarPU starts a task, another C wrapper function calls the FORTRAN routine for the task. Note that this marshalled FORTRAN support remains available even when specifying \c configure option \ref disable-fortran "--disable-fortran" (which only disables StarPU's native Fortran layer). \subsection NFAPIMIX Valid API Mixes and Language Mixes Mixing uses of fstarpu_ and starpu_ symbols in the same Fortran code has unspecified behavior. Using fstarpu_ symbols in C code has unspecified behavior. For multi-language applications using both C and Fortran source files: - C source files must use starpu_ symbols exclusively - Fortran sources must uniformly use either fstarpu_ symbols exclusively, or starpu_ symbols exclusively. Every other combination has unspecified behavior. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_languages/openmp_runtime_support.doxy000066400000000000000000000473241507764646700311630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page OpenMPRuntimeSupport The StarPU OpenMP Runtime Support (SORS) StarPU provides the necessary routines and support to implement an OpenMP (http://www.openmp.org/) runtime compliant with the revision 3.1 of the language specification, and compliant with the task-related data dependency functionalities introduced in the revision 4.0 of the language. This StarPU OpenMP Runtime Support (SORS) has been designed to be targeted by OpenMP compilers such as the Klang-OMP compiler. Most supported OpenMP directives can both be implemented inline or as outlined functions. All functions are defined in \ref API_OpenMP_Runtime_Support. Several examples supporting OpenMP API are provided in StarPU's tests/openmp/ directory. \section OMPImplementation Implementation Details and Specificities \subsection OMPMainThread Main Thread When using SORS, the main thread gets involved in executing OpenMP tasks just like every other threads, in order to be compliant with the specification execution model. This contrasts with StarPU's usual execution model, where the main thread submit tasks but does not take part in executing them. \subsection OMPTaskSemantics Extended Task Semantics The semantics of tasks generated by SORS are extended with respect to regular StarPU tasks in that SORS' tasks may block and be preempted by SORS call, whereas regular StarPU tasks cannot. SORS tasks may coexist with regular StarPU tasks. However, only the tasks created using SORS API functions inherit from extended semantics. \section OMPConfiguration Configuration SORS can be compiled into libstarpu through the \c configure option \ref enable-openmp "--enable-openmp". Conditional compiled source codes may check for the availability of the OpenMP Runtime Support by testing whether the C preprocessor macro STARPU_OPENMP is defined or not. \section OMPInitExit Initialization and Shutdown SORS needs to be executed/terminated by the starpu_omp_init() / starpu_omp_shutdown() instead of starpu_init() / starpu_shutdown(). This requirement is necessary to make sure that the main thread gets the proper execution environment to run OpenMP tasks. These calls will usually be performed by a compiler runtime. Thus, they can be executed from a constructor/destructor such as this: \code{.c} __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } \endcode Basic examples are available in the files tests/openmp/init_exit_01.c and tests/openmp/init_exit_02.c. \sa starpu_omp_init() \sa starpu_omp_shutdown() \section OMPSharing Parallel Regions and Worksharing SORS provides functions to create OpenMP parallel regions, as well as mapping work on participating workers. The current implementation does not provide nested active parallel regions: Parallel regions may be created recursively, however only the first level parallel region may have more than one worker. From an internal point-of-view, SORS' parallel regions are implemented as a set of implicit, extended semantics StarPU tasks, following the execution model of the OpenMP specification. Thus, SORS' parallel region tasks may block and be preempted, by SORS calls, enabling constructs such as barriers. \subsection OMPParallel Parallel Regions Parallel regions can be created with the function starpu_omp_parallel_region() which accepts a set of attributes as parameter. The execution of the calling task is suspended until the parallel region completes. The field starpu_omp_parallel_region_attr::cl is a regular StarPU codelet. However, only CPU codelets are supported for parallel regions. Here is an example of use: \code{.c} void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; pthread_t tid = pthread_self(); int worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); } void f(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } \endcode A basic example is available in the file tests/openmp/parallel_01.c. \sa struct starpu_omp_parallel_region_attr \sa starpu_omp_parallel_region() \subsection OMPFor Parallel For OpenMP for loops are provided by the starpu_omp_for() group of functions. Variants are available for inline or outlined implementations. SORS supports static, dynamic, and guided loop scheduling clauses. The auto scheduling clause is implemented as static. The runtime scheduling clause honors the scheduling mode selected through the environment variable \c OMP_SCHEDULE or the starpu_omp_set_schedule() function. For loops with the ordered clause are also supported. An implicit barrier can be enforced or skipped at the end of the worksharing construct, according to the value of the nowait parameter. The canonical family of starpu_omp_for() functions provide each instance with the first iteration number and the number of iterations (possibly zero) to perform. The alternate family of starpu_omp_for_alt() functions provide each instance with the (possibly empty) range of iterations to perform, including the first and excluding the last. An example is available in the file tests/openmp/parallel_for_01.c. The family of starpu_omp_ordered() functions enable to implement OpenMP's ordered construct, a region with a parallel for loop that is guaranteed to be executed in the sequential order of the loop iterations. An example is available in the file tests/openmp/parallel_for_ordered_01.c. \code{.c} void for_g(unsigned long long i, unsigned long long nb_i, void *arg) { (void) arg; for (; nb_i > 0; i++, nb_i--) { array[i] = 1; } } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; starpu_omp_for(for_g, NULL, NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 0); } \endcode \sa starpu_omp_for() \sa starpu_omp_for_inline_first() \sa starpu_omp_for_inline_next() \sa starpu_omp_for_alt() \sa starpu_omp_for_inline_first_alt() \sa starpu_omp_for_inline_next_alt() \sa starpu_omp_ordered() \sa starpu_omp_ordered_inline_begin() \sa starpu_omp_ordered_inline_end() \subsection OMPSections Sections OpenMP sections worksharing constructs are supported using the set of starpu_omp_sections() variants. The general principle is either to provide an array of per-section functions or a single function that will redirect the execution to the suitable per-section functions. An implicit barrier can be enforced or skipped at the end of the worksharing construct, according to the value of the nowait parameter. \code{.c} void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; section_funcs[0] = f; section_funcs[1] = g; section_funcs[2] = h; section_funcs[3] = i; section_args[0] = arg_f; section_args[1] = arg_g; section_args[2] = arg_h; section_args[3] = arg_i; starpu_omp_sections(4, section_f, section_args, 0); } \endcode An example is available in the file tests/openmp/parallel_sections_01.c. \sa starpu_omp_sections() \sa starpu_omp_sections_combined() \subsection OMPSingle Single OpenMP single workharing constructs are supported using the set of starpu_omp_single() variants. An implicit barrier can be enforced or skipped at the end of the worksharing construct, according to the value of the nowait parameter. An example is available in the file tests/openmp/parallel_single_nowait_01.c. \code{.c} void single_f(void *arg) { (void) arg; pthread_t tid = pthread_self(); int worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; starpu_omp_single(single_f, NULL, 0); } \endcode SORS also provides dedicated support for single sections with copyprivate clauses through the starpu_omp_single_copyprivate() function variants. The OpenMP master directive is supported as well, using the starpu_omp_master() function variants. An example is available in the file tests/openmp/parallel_single_copyprivate_01.c. \sa starpu_omp_master() \sa starpu_omp_master_inline() \sa starpu_omp_single() \sa starpu_omp_single_inline() \sa starpu_omp_single_copyprivate() \sa starpu_omp_single_copyprivate_inline_begin() \sa starpu_omp_single_copyprivate_inline_end() \section OMPTask Tasks SORS implements the necessary support of OpenMP 3.1 and OpenMP 4.0's so-called explicit tasks, together with OpenMP 4.0's data dependency management. \subsection OMPTaskExplicit Explicit Tasks Explicit OpenMP tasks are created with SORS using the starpu_omp_task_region() function. The implementation supports if, final, untied and mergeable clauses as defined in the OpenMP specification. Unless specified otherwise by the appropriate clause(s), the created task may be executed by any participating worker of the current parallel region. The current SORS implementation requires explicit tasks to be created within the context of an active parallel region. In particular, an explicit task cannot be created by the main thread outside a parallel region. Explicit OpenMP tasks created using starpu_omp_task_region() are implemented as StarPU tasks with extended semantics, and may as such be blocked and preempted by SORS routines. The current SORS implementation supports recursive explicit tasks creation, to ensure compliance with the OpenMP specification. However, it should be noted that StarPU is not designed nor optimized for efficiently scheduling of recursive task applications. The code below shows how to create 4 explicit tasks within a parallel region. \code{.c} void task_region_g(void *buffers[], void *args) { (void) buffers; (void) args; pthread tid = pthread_self(); int worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: explicit task \"g\"\n", (void *)tid, worker_id); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; struct starpu_omp_task_region_attr attr; memset(&attr, 0, sizeof(attr)); attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); } \endcode An example is available in the file tests/openmp/parallel_01.c. \sa struct starpu_omp_task_region_attr \sa starpu_omp_task_region() \subsection OMPDataDependencies Data Dependencies SORS implements inter-tasks data dependencies as specified in OpenMP 4.0. Data dependencies are expressed using regular StarPU data handles (\ref starpu_data_handle_t) plugged into the task's attr.cl codelet. The family of starpu_vector_data_register() -like functions, the starpu_omp_handle_register() and starpu_omp_handle_unregister() functions, and the starpu_omp_data_lookup() function may be used to register a memory area and to retrieve the current data handle associated with a pointer respectively. The testcase ./tests/openmp/task_02.c gives a detailed example of using OpenMP 4.0 tasks dependencies with SORS implementation. Note: the OpenMP 4.0 specification only supports data dependencies between sibling tasks, that are tasks created by the same implicit or explicit parent task. The current SORS implementation also only supports data dependencies between sibling tasks. Consequently, the behavior is unspecified if dependencies are expressed between tasks that have not been created by the same parent task. \subsection OMPTaskSyncs TaskWait and TaskGroup SORS implements both the taskwait and taskgroup OpenMP task synchronization constructs specified in OpenMP 4.0, with the starpu_omp_taskwait() and starpu_omp_taskgroup() functions, respectively. An example of starpu_omp_taskwait() use, creating two explicit tasks and waiting for their completion: \code{.c} void task_region_g(void *buffers[], void *args) { (void) buffers; (void) args; printf("Hello, World!\n"); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; struct starpu_omp_task_region_attr attr; memset(&attr, 0, sizeof(attr)); attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); starpu_omp_taskwait(); \endcode An example is available in the file tests/openmp/taskwait_01.c. An example of starpu_omp_taskgroup() use, creating a task group of two explicit tasks: \code{.c} void task_region_g(void *buffers[], void *args) { (void) buffers; (void) args; printf("Hello, World!\n"); } void taskgroup_f(void *arg) { (void)arg; struct starpu_omp_task_region_attr attr; memset(&attr, 0, sizeof(attr)); attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; starpu_omp_taskgroup(taskgroup_f, (void *)NULL); } \endcode An example is available in the file tests/openmp/taskgroup_01.c. \sa starpu_omp_task_region() \sa starpu_omp_taskwait() \sa starpu_omp_taskgroup() \sa starpu_omp_taskgroup_inline_begin() \sa starpu_omp_taskgroup_inline_end() \section OMPSynchronization Synchronization Support SORS implements objects and method to build common OpenMP synchronization constructs. \subsection OMPSimpleLock Simple Locks SORS Simple Locks are opaque starpu_omp_lock_t objects enabling multiple tasks to synchronize with each others, following the Simple Lock constructs defined by the OpenMP specification. In accordance with such specification, simple locks may not be acquired multiple times by the same task, without being released in-between; otherwise, deadlocks may result. Codes requiring the possibility to lock multiple times recursively should use Nestable Locks (\ref NestableLock). Codes NOT requiring the possibility to lock multiple times recursively should use Simple Locks as they incur less processing overhead than Nestable Locks. An example is available in the file tests/openmp/parallel_simple_lock_01.c. \sa starpu_omp_lock_t \sa starpu_omp_init_lock() \sa starpu_omp_destroy_lock() \sa starpu_omp_set_lock() \sa starpu_omp_unset_lock() \sa starpu_omp_test_lock() \subsection OMPNestableLock Nestable Locks SORS Nestable Locks are opaque starpu_omp_nest_lock_t objects enabling multiple tasks to synchronize with each others, following the Nestable Lock constructs defined by the OpenMP specification. In accordance with such specification, nestable locks may be acquired multiple times recursively by the same task without deadlocking. Nested locking and unlocking operations must be well parenthesized at any time, otherwise deadlock and/or undefined behavior may occur. Codes requiring the possibility to lock multiple times recursively should use Nestable Locks. Codes NOT requiring the possibility to lock multiple times recursively should use Simple Locks (\ref SimpleLock) instead, as they incur less processing overhead than Nestable Locks. An example is available in the file tests/openmp/parallel_nested_lock_01.c. \sa starpu_omp_nest_lock_t \sa starpu_omp_init_nest_lock() \sa starpu_omp_destroy_nest_lock() \sa starpu_omp_set_nest_lock() \sa starpu_omp_unset_nest_lock() \sa starpu_omp_test_nest_lock() \subsection OMPCritical Critical Sections SORS implements support for OpenMP critical sections through the family of \ref starpu_omp_critical functions. Critical sections may optionally be named. There is a single, common anonymous critical section. Mutual exclusion only occur within the scope of single critical section, either a named one or the anonymous one. Corresponding examples are available in the files tests/openmp/parallel_critical_01.c and tests/openmp/parallel_critical_inline_01.c. \sa starpu_omp_critical() \sa starpu_omp_critical_inline_begin() \sa starpu_omp_critical_inline_end() \subsection OMPBarrier Barriers SORS provides the starpu_omp_barrier() function to implement barriers over parallel region teams. In accordance with the OpenMP specification, the starpu_omp_barrier() function waits for every implicit task of the parallel region to reach the barrier and every explicit task launched by the parallel region to complete, before returning. An example is available in the file tests/openmp/parallel_barrier_01.c. \sa starpu_omp_barrier() \section OMPLLVM Example: An OpenMP LLVM Support SORS has been used to implement an OpenMP LLVM Support. This allows to seamlessly run OpenMP applications on top of StarPU. To enable this support, one just needs to call \c configure with the option \ref enable-openmp-llvm "--enable-openmp-llvm". After installation, the directory lib/starpu/examples/starpu_openmp_llvm contains a OpenMP application, its source code and the executable compiled with the StarPU OpenMP LLVM support, as well as a README file explaining how to use the support for your own application. One just needs to compile an OpenMP application with clang and to execute it the StarPU OpenMP LLVM support library file instead of the default libomp.so. \section OMPStandard OpenMP Standard Functions in StarPU StarPU provides severals functions which are very similar to their OpenMP counterparts but are adapted to the StarPU runtime system. These functions are: \li starpu_omp_set_num_threads() \li starpu_omp_get_num_threads() \li starpu_omp_get_thread_num() \li starpu_omp_get_max_threads() \li starpu_omp_get_num_procs() which is used to get the number of available StarPU CPU workers. \li starpu_omp_in_parallel() \li starpu_omp_set_dynamic() \li starpu_omp_get_dynamic() \li starpu_omp_set_nested() \li starpu_omp_get_nested() \li starpu_omp_get_cancellation() \li starpu_omp_set_schedule() \li starpu_omp_get_schedule() \li starpu_omp_get_thread_limit() \li starpu_omp_set_max_active_levels() \li starpu_omp_get_max_active_levels() \li starpu_omp_get_level() \li starpu_omp_get_ancestor_thread_num() \li starpu_omp_get_team_size() \li starpu_omp_get_active_level() \li starpu_omp_in_final() \li starpu_omp_get_proc_bind() \li starpu_omp_get_num_places() \li starpu_omp_get_place_num_procs() \li starpu_omp_get_place_proc_ids() \li starpu_omp_get_place_num() \li starpu_omp_get_partition_num_places() \li starpu_omp_get_partition_place_nums() \li starpu_omp_set_default_device() \li starpu_omp_get_default_device() \li starpu_omp_get_num_devices() \li starpu_omp_get_num_teams() \li starpu_omp_get_team_num() \li starpu_omp_is_initial_device() \li starpu_omp_get_initial_device() \li starpu_omp_get_max_task_priority() \li starpu_omp_get_wtime() \li starpu_omp_get_wtick() */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_languages/python.doxy000066400000000000000000001357441507764646700256530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page PythonInterface Python Interface This chapter presents the StarPU Python Interface. It provides for those used to the Python language a more concise and easy-to-use StarPU interface. This interface supports most of the main StarPU functionalities. While not all features of the C API are replicated in the Python Interface, additional functions tailored for Python's ease of use have been incorporated. Several examples using the Python API are provided in the directory starpupy/examples/. \section Installation Installation of the Python Interface Calling \c configure will enable by default the StarPU Python Interface. You can also specify the option \ref enable-starpupy "--enable-starpupy" which will fail if some requirements are missing. For now, the only requirement is the availability of the \c python3 interpreter. The python modules \c joblib and \c cloudpickle are mandatory to run parallel codes. The python module \c numpy is recommended, but not mandatory. \verbatim $ pip3 install joblib $ pip3 install cloudpickle $ pip3 install numpy $ ../configure --enable-starpupy --enable-blocking-drivers --prefix=$HOME/usr/starpu $ make $ make install \endverbatim You can then go to the directory in which StarPU is installed, and test the provided Python examples. \verbatim $ cd $HOME/usr/starpu $ . ./bin/starpu_env Setting StarPU environment for ... $ cd lib/starpu/python $ python3 starpu_py.py Example 1: Hello, world! ... $ \endverbatim \section PythonParallelism Python Parallelism Python interpreters share the Global Interpreter Lock (GIL), which requires that at any time, one and only one thread has the right to execute a task. With Python versions up to 3.11, if the application is pure Python script, even with multi-interpreters, the program cannot be executed in parallel. The sharedGIL makes the multiple interpreters execution of Python actually serial rather than parallel, and the execution of Python program is single-threaded essentially. For the pure Python script with python versions up to 3.11, the only way to achieve parallelism is to use the master-slave mechanism (Section \ref StarpupyMasterSlave). Parallelism may be implemented with multi-interpreters in the future Python version. Details can be found in Section \ref MultipleInterpreters. Otherwise parallelism can be achieved when external C applications are called or external APIs e.g. BLAS API is used for Numpy objects. Starting from python version 3.12, multiple interpreters can use a separate GIL, to allow parallelism of pure python code. This can be enabled by setting \ref STARPUPY_OWN_GIL to 1. Some corner cases are however not supported yet in python 3.12, notably the usage of futures. \section ImplementingStarPUInPython Using StarPU in Python The StarPU module should be imported in any Python code wanting to use the StarPU Python interface. \code{.py} import starpu \endcode Before using any StarPU functionality, it is necessary to call \c starpu.init(). The function \c starpu.shutdown() should be called after all StarPU functions have been called. \code{.py} import starpu starpu.init() # ... starpu.shutdown() \endcode \subsection SubmittingTasks Submitting Tasks One of the fundamental aspects of StarPU is the task submission. The Python Interface greatly simplifies this process, allowing for direct calls to the submission function without any extra complexities. The Python function used for task submission follows the format: task_submit(options)(func, *args, **kwargs). In this structure:
    • \c func represents any Python function.
    • \c args and \c kwargs denote the function's arguments.
    You can also provide the function as a string. By submitting tasks through this function, you enable StarPU to perform optimizations for your program's execution. It's recommended to submit all tasks to ensure StarPU's efficient scheduling of the underlying tasks. It's important to note that submitted tasks do not execute immediately, and you can retrieve the return value only after the task execution. The first set of parentheses allows to specify various options. Keep in mind that each option has a default value, and even if you're not providing any options, the parentheses should be retained. The options are as follows:
    • \c name (string, default: \c None) : Set the name of the task. This can be useful for debugging purposes.
    • \c synchronous (unsigned, default: 0) : If this flag is set, \c task_submit() only returns when the task has been executed (or if no worker is able to process the task). Otherwise, \c task_submit() returns immediately.
    • \c priority (int, default: 0) : Set the level of priority for the task. This is an integer value whose value must be greater than the return value of the function \c starpu.sched_get_min_priority() (for the least important tasks), and lower or equal to the return value of the function \c starpu.sched_get_max_priority() (for the most important tasks). Default priority is defined as 0 in order to allow static task initialization. Scheduling strategies that take priorities into account can use this parameter to take better scheduling decisions, but the scheduling policy may also ignore it.
    • \c color (unsigned, default: \c None) : Set the color of the task to be used in \c dag.dot.
    • \c flops (double, default: \c None) : Set the number of floating points operations that the task will have to achieve. This is useful for easily getting GFlops/s curves from the function \c starpu.perfmodel_plot, and for the hypervisor load balancing.
    • \c perfmodel (string, default: \c None) : Set the name of the performance model. This name will be used as the filename where the performance model information will be saved. After the task is executed, one can call the function \c starpu.perfmodel_plot() by giving the symbol of perfmodel to view its performance curve.
    \subsection ReturningFutureObject Returning Future Object In order to realize asynchronous frameworks, the task_submit() function returns a Future object. This is an extended use of StarPU provided by the Python interface. A Future represents an eventual result of an asynchronous operation. It is an awaitable object, Coroutines can await on Future objects until they either have a result or an exception set, or until they are canceled. Some basic examples are available in the script starpupy/examples/starpu_py.py. This feature needs the \c asyncio module to be imported. \code{.py} import starpu import asyncio starpu.init() def add(a, b): return a+b async def main(): fut = starpu.task_submit()(add, 1, 2) res = await fut print("The result of function is", res) asyncio.run(main()) starpu.shutdown() \endcode Execution: \verbatim The result of function is 3 \endverbatim When using at least the version 3.8 of python, one can also use the parameter -m asyncio which allows to directly use await instead of asyncio.run(). \verbatim $ python3 -m asyncio >>> import asyncio \endverbatim \code{.py} import starpu starpu.init() def add(a, b): print("The result is ready!") return a+b fut = starpu.task_submit()(add, 1, 2) \endcode \verbatim The result is ready! \endverbatim \code{.py} res = await fut res \endcode \verbatim 3 \endverbatim You can also use the decorator \c starpu.delayed to wrap a function. The function can then directly be submitted to StarPU and will automatically create a Future object. \code{.py} @starpu.delayed def add_deco(a, b): print("The result is ready!") return a+b fut = add_deco(1, 2) \endcode \verbatim The result is ready! \endverbatim \code{.py} res = await fut res \endcode \verbatim 3 \endverbatim To specify options when using the decorator, just do as follows: \code{.py} @starpu.delayed(name="add", color=2, perfmodel="add_deco") def add_deco(a, b): print("The result is ready!") return a+b fut = add_deco(1, 2) \endcode \verbatim The result is ready! \endverbatim \code{.py} res = await fut res \endcode \verbatim 3 \endverbatim A Future object can also be used for the next step calculation even before being ready. The calculation will be postponed until the Future has a result. In this example, after submitting the first task, a Future object fut1 is created, and it is used as an argument of a second task. The second task is submitted even without having the return value of the first task. \code{.py} import asyncio import starpu import time starpu.init() def add(a, b): time.sleep(10) print("The first result is ready!") return a+b def sub(x, a): print("The second result is ready!") return x-a fut1 = starpu.task_submit()(add, 1, 2) fut2 = starpu.task_submit()(sub, fut1, 1) \endcode \verbatim The first result is ready! The second result is ready! \endverbatim \code{.py} res = await fut2 res \endcode \verbatim 2 \endverbatim \subsection SubmitPythonBuffer Submit Python Objects Supporting The Buffer Protocol The Python buffer protocol is a framework in which Python objects can expose raw byte arrays to other Python objects. This can be extremely useful to efficiently store and manipulate large arrays of data. The StarPU Python Interface allows users to use such objects as task parameters. \code{.py} import asyncio import starpu import time import numpy as np starpu.init() def add(a,b): c = np.zeros(np.size(a)) for i in range(np.size(a)): c[i] = a[i] + b[i] return c a = np.array([1, 2, 3]) b = np.array([4, 5, 6]) fut = starpu.task_submit()(add, a, b) res = await fut res \endcode \verbatim array([5., 7., 9.]) \endverbatim StarPU uses a specific data interface to handle Python objects supporting buffer protocol, such python objects are then managed by the StarPU data management library which allows minimizing data transfers between accelerators, and avoids copying the object each time. We show the performances below of the \c numpy addition (numpy.add running the script test_perf.sh) with different array sizes (10, 20, ..., 100, 200, ..., 1000, 2000, ..., 10000, 20000, ..., 100000, 200000, ..., 1000000, 2000000, ..., 10000000, ..., 50000000). We compare two cases:
    1. Using StarPU,
    2. Without using StarPU tasks, but directly calling the numpy.add function.
    The first plot compares the task submission time when using StarPU and the program execution time without using StarPU. We can see that there is an obvious optimization using StarPU when the test array size is large. The task has not finished its execution yet as shown in second figure, the time can be used to perform other operations. \image html starpupy_perf.png width=85% \image latex starpupy_perf.png "" width=\textwidth We can also define our own function to do the \c numpy operation, e.g. the element addition: \code{.py} def add(a, b): for i in range(np.size(a)): a[i] = a[i] + b[i] \endcode We will compare operation performances with the same two cases, but based on our custom function add(a, b). We can see that the custom function is not as efficient as the \c numpy function overall. The optimization for large arrays is the same when using StarPU. \image html starpupy_func_perf.png width=85% \image latex starpupy_func_perf.png "" width=\textwidth \subsubsection AnnotationAccess Access Mode Annotation StarPU defines different access modes for a data, it can be readable (access mode is \c R), writable (access mode is \c W), or both readable and writable (access mode is \c RW). The default access mode is \c R. For the Python interface, these modes can be defined as shown below.
    1. Using the decorator starpu.access(arg="R/W/RW") to wrap the function. \code{.py} a = np.array([1, 2, 3, 4, 5, 6]) e = np.array([0, 0, 0, 0, 0, 0, 0]) @starpu.access(a="R", b="W") def assign(a,b): for i in range(min(np.size(a), np.size(b))): b[i]=a[i] fut = starpu.task_submit()(assign, a, e) starpu.acquire(e) \endcode \verbatim array([1, 2, 3, 4, 5, 6, 0]) \endverbatim \code{.py} starpu.release(e) \endcode
    2. Using the decorator starpu.delayed(options, arg="R/W/RW"). \code{.py} @starpu.delayed(a="R", b="W") def assign(a,b): for i in range(min(np.size(a), np.size(b))): b[i]=a[i] fut = assign(a, e) starpu.acquire(e) \endcode \verbatim array([1, 2, 3, 4, 5, 6, 0]) \endverbatim \code{.py} starpu.release(e) \endcode
    3. Using the method starpu.set_access(func, arg="R/W/RW") that will create a new function. \code{.py} def assign(a,b): for i in range(min(np.size(a), np.size(b))): b[i]=a[i] assign_access=starpu.set_access(assign, a="R", b="W") fut = starpu.task_submit()(assign_access, a, e) starpu.acquire(e) \endcode \verbatim array([1, 2, 3, 4, 5, 6, 0]) \endverbatim \code{.py} starpu.release(e) \endcode
    \subsubsection MethodsAcquireRelease Methods Once the access mode of one argument is set to at least \c W, it may be modified during the task execution. We should pay attention that before the task is finished, we cannot get the up-to-date value of this argument by simply using \c print function. For example: \code{.py} import asyncio import starpu import time import numpy as np starpu.init() a = np.array([1, 2, 3, 4, 5, 6]) e = np.array([0, 0, 0, 0, 0, 0, 0]) @starpu.access(a="R", b="W") def assign(a,b): time.sleep(10) for i in range(min(np.size(a), np.size(b))): b[i]=a[i] fut = starpu.task_submit()(assign, a, e) print(e) # before the task is finished \endcode \verbatim [0 0 0 0 0 0 0] \endverbatim We \c print argument \c e right after submitting the task, but since the task is not finished yet, we can only get its unchanged value. If we want to get its up-to-date value, we need extra functions. In order to access data registered to StarPU outside tasks, we provide an acquire and release mechanism.
    • The starpu.acquire(data, mode) method should be called to access registered data outside tasks (Refer to the C API starpu_data_acquire()). StarPU will ensure that the application will get an up-to-date copy of handle in main memory located where the data was originally registered, and that all concurrent accesses (e.g. from tasks) will be consistent with the access mode specified with the given mode (\c R the default mode, \c W or \c RW).
    • The starpu.release(data) method must be called once the application no longer needs to access the piece of data (Refer to the C API starpu_data_release()).
    • The starpu.unregister(data) method must be called to unregister the Python object from StarPU. (Refer to the C API starpu_data_unregister()). This method waits for all calculations to be finished before unregistering data.
    With \c acquire, even we ask to access the argument right after submitting the task, the up-to-date value will be printed once the task is finished. \code{.py} starpu.acquire(e) # before the task is finished \endcode \verbatim array([1, 2, 3, 4, 5, 6, 0]) \endverbatim In order to complete the addition operation example, execution steps are: \code{.py} import asyncio import starpu import time import numpy as np starpu.init() @starpu.access(a="RW", b="R") def add(a,b): time.sleep(10) for i in range(np.size(a)): a[i] = a[i] + b[i] a = np.array([1, 2, 3]) b = np.array([4, 5, 6]) starpu.acquire(a, mode="R") \endcode \verbatim array([1, 2, 3]) \endverbatim \code{.py} starpu.release(a) fut = starpu.task_submit()(add, a, b) starpu.acquire(b, mode="R") \endcode \verbatim array([4, 5, 6]) \endverbatim \code{.py} starpu.acquire(a, mode="R") # before the task is finished \endcode \verbatim array([5, 7, 9]) \endverbatim \code{.py} starpu.release(a) starpu.release(b) starpu.unregister(a) starpu.unregister(b) \endcode The result of \c b is printed directly right after calling \c acquire, but the up-to-date value of \c a is printed after the task is finished. Here we need to pay attention that if we want to modify an argument during the task execution and get its up-to-date value for the future operation, we should set the access mode of this argument to at least \c W, otherwise this argument object is not synchronous, and the next task which needs this object will not wait its up-to-date value to execute. If we call \c acquire but not \c release before the task submission, the task will not start to execute until the object is released. An example is shown below: \code{.py} import asyncio import starpu import numpy as np import time starpu.init() @starpu.access(a="RW") def add(a,b): print("This is the addition function") time.sleep(10) for i in range(np.size(a)): a[i] = a[i] + b[i] a = np.array([1, 2, 3]) b = np.array([4, 5, 6]) starpu.acquire(a, mode="R") \endcode \verbatim array([1, 2, 3]) \endverbatim \code{.py} fut = starpu.task_submit()(add, a, b) starpu.release(a) \endcode \verbatim This is the addition function # The task will not start until "a" is released \endverbatim \code{.py} starpu.acquire(a, mode="R") # Before the task is finished \endcode \verbatim array([5, 7, 9]) # After the task is finished \endverbatim \code{.py} starpu.release(a) starpu.unregister(a) starpu.unregister(b) \endcode \section StarPUPYInterface StarPU Data Interface for Python Objects StarPU uses data handles to manage a piece of data. A data handle keeps track of replicates of the same data (registered by the application) over various memory nodes. The data management library manages to keep them coherent. That also allows minimizing the data transfers, and avoids copying the object each time. Data handles are managed through specific data interfaces. Some examples applying this specific interface are available in script starpupy/examples/starpu_py_handle.py. \subsection PythonObject Interface for Ordinary Python Objects A specific data interface has been defined to manage Python objects, such as constant (integer, float...), string, list, etc. This interface is defined with the class Handle. When submitting a task, instead of specifying a function and its arguments, we specify a function and the handles of its arguments. In addition to returning a Future object, it is also possible to return a StarPU handle object when submitting a function. To do so, you need to set the starpu.task_submit option \c ret_handle to \c True, its default value is \c False. \code{.py} import starpu from starpu import Handle starpu.init() def add(x, y): return x + y x = Handle(2) y = Handle(3) res = starpu.task_submit(ret_handle=True)(add, x, y) \endcode We then need to call the method get() to get the latest version of this Python Object. \code{.py} res.get() \endcode \verbatim 5 \endverbatim When not setting the parameter \c ret_handle, the return object is a Future. \code{.py} res_fut = starpu.task_submit()(add, x, y) await res_fut \endcode If the Python object is immutable (such as int, float, str, tuple...), registering the same object several times is authorised. That means you can do this: \code{.py} x = Handle(2) x1 = Handle(2) \endcode x and x1 are two different Handle objects. \subsection PythonBuffer Interface for Python Objects Supporting Buffer Protocol This StarPU data interface can also be used to manage Python objects supporting buffer protocol, i.e \c numpy array, bytes, bytearray, array.array and memoryview object. \code{.py} import numpy as np import starpu from starpu import Handle starpu.init() def add(a,b): for i in range(np.size(a)): a[i] = a[i] + b[i] return a a = np.array([1, 2, 3]) b = np.array([2, 4, 6]) a_h = Handle(a) b_h = Handle(b) res = starpu.task_submit(ret_handle=True)(add, a_h, b_h) res.get() \endcode \verbatim array([3, 6, 9]) \endverbatim Different from immutable Python object, all Python objects supporting buffer protocol are mutable, and registering the same object one more time is not authorized. If you do this: \code{.py} a = np.array([1, 2, 3]) a_h = Handle(a) a1_h = Handle(a) \endcode You will get an error message: \verbatim starpupy.error: Should not register the same mutable python object once more. \endverbatim You may refer to Section \ref SubmitPythonBuffer, and realize that StarPU Python interface uses data handles to manage Python objects supporting buffer protocol by default. These objects are usually relatively large, such as a big NumPy matrix. We want to avoid multiple copies and transfers of this data over various memory nodes, so we set the default \c starpu.task_submit() option \c arg_handle to \c True for users to allow their applications to get the most optimization. To deactivate the use of this data interface, you need to set the option \c arg_handle to \c False. Since we use data handles by default, registration is implemented in the step of task submission. Therefore, you should be careful not to register again the same object after the task submission, like this: \code{.py} a = np.array([1, 2, 3]) b = np.array([2, 4, 6]) res = starpu.task_submit(ret_handle=True)(add, a, b) a_h = Handle(a) \endcode You will get the error message: \verbatim starpupy.error: Should not register the same mutable python object once more. \endverbatim As performances, we showed in Section \ref SubmitPythonBuffer, we add one case to compare with the others two cases. We still test the \c numpy addition (numpy.add running the script test_handle_perf.sh) with different array sizes (10, 20, ..., 100, 200, ..., 1000, 2000, ..., 10000, 20000, ..., 100000, 200000, ..., 1000000, 2000000, ..., 10000000, ..., 50000000). Three cases are:
    1. Using StarPU and returning future object,
    2. Using StarPU and returning handle object,
    3. Without using StarPU tasks, but directly calling the numpy.add function.
    The first plot compares the task submission time when using StarPU either returning a Future or a handle object and the program execution time without using StarPU. We can see that there is an obvious optimization using StarPU, either returning a Future or a handle object when the test array size is large. The task has not finished its execution yet as shown in second figure, the time can be used to perform other operations. When array size is not very large, returning a handle has a better execution performance than returning a Future. \image html starpupy_handle_perf.png width=85% \image latex starpupy_handle_perf.png "" width=\textwidth We can also define our own function to do the \c numpy operation, e.g. the element addition: \code{.py} def add(a, b): for i in range(np.size(a)): a[i] = a[i] + b[i] \endcode We will compare operation performances with the same three cases but based on our custom function add(a, b). We can see that the custom function is not as efficient as the \c numpy function overall. The optimisation for large arrays is the same when using StarPU. \image html starpupy_handle_func_perf.png width=85% \image latex starpupy_handle_func_perf.png "" width=\textwidth \subsubsection Methods Methods As in Section \ref MethodsAcquireRelease, the \c Handle class defines methods to provide an acquire and release mechanism.
    • The method Handle::acquire(mode) should be called before accessing the object outside tasks (Refer to the C API starpu_data_acquire()). The access mode can be \c "R", \c "W", \c "RW", the default value is "R". We will get an up-to-date copy of Python object by calling this method.
    • The method Handle::release() must be called once the application no longer needs to access the registered data (Refer to the C API starpu_data_release()).
    • The method Handle::unregister() to unregister the Python object handle from StarPU (Refer to the C API starpu_data_unregister()). This method will wait for all calculations to be finished before unregistering data.
    The previous example can be coded as follows: \code{.py} import numpy as np import starpu from starpu import Handle starpu.init() @starpu.access(a="RW", b="R") def add(a,b): for i in range(np.size(a)): a[i] = a[i] + b[i] a = np.array([1, 2, 3]) b = np.array([2, 4, 6]) a_h = Handle(a) b_h = Handle(b) a_h.acquire(mode = "R") \endcode \code{.py} array([1, 2, 3]) \endcode \code{.py} a_h.release() starpu.task_submit(ret_handle=True)(add, a_h, b_h) a_h.acquire(mode = "R") # we get the up-to-date value \endcode \verbatim array([3, 6, 9]) \endverbatim \code{.py} a_h.release() a_h.unregister() \endcode \subsection EmptyNumpy Interface for Empty Numpy Array We can register an empty \c numpy array by calling HandleNumpy(size, type). The default value for type is float64. You will find below an example which defines the function \c assign taking two arrays as parameters, the second one being an empty array which will be assigned the values of the first array. \code{.py} import numpy as np import starpu from starpu import Handle from starpu import HandleNumpy starpu.init() @starpu.access(b="W") def assign(a,b): for i in range(min(np.size(a,0), np.size(b,0))): for j in range(min(np.size(a,1), np.size(b,1))): b[i][j] = a[i][j] return b a = np.array([[1, 2, 3], [4, 5, 6]]) a_h = Handle(a) e_h = HandleNumpy((5,10), a.dtype) res = starpu.task_submit(ret_handle=True)(assign, a_h, e_h) e_h.acquire() \endcode \verbatim array([[1, 2, 3, 0, 0, 0, 0, 0, 0, 0], [4, 5, 6, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) \endverbatim \code{.py} e_h.release() \endcode \subsection HandlePartition Array Partitioning A n-dim \c numpy array can be split into several sub-arrays by calling the method Handle::partition(nchildren, dim, chunks_list) (Refer to the C API starpu_data_partition_plan()).
    • nchildren is the number of sub-handles,
    • dim is the dimension that we want to partition along, it can be 0 for vertical dimension, 1 for horizontal dimension, 2 for depth dimension, 3 for time dimension, ...etc.
    • chunks_list is a list containing the size of each segment. The total length of segments in this list must be equal to the length of the selected dimension.
    The method will return a sub-handle list, each of the sub-handles can be used when submitting a task with task_submit(). This allows to process an array in parallel, once the execution of each sub-handle is finished, the result will be directly reflected in the original n-dim array. When the sub-handles are no longer needed, the method Handle::unpartition(handle_list, nchildren) should be called to clear the partition and unregister all the sub-handles (Refer to the C API starpu_data_partition_clean()).
    • handle_list is the sub-handle list which was previously returned by the method Handle::partition(),
    • nchildren is the number of sub-handles.
    Here is an example to use these methods. \code{.py} import numpy as np import starpu from starpu import Handle starpu.init() @starpu.access(a="RW", b="R") def add(a,b): np.add(a,b,out=a) n, m = 20, 10 arr = np.arange(n*m).reshape(n, m) arr_h = Handle(arr) arr_h.acquire(mode='RW') \endcode \verbatim [[ 0 1 2 3 4 5 6 7 8 9] [ 10 11 12 13 14 15 16 17 18 19] [ 20 21 22 23 24 25 26 27 28 29] [ 30 31 32 33 34 35 36 37 38 39] [ 40 41 42 43 44 45 46 47 48 49] [ 50 51 52 53 54 55 56 57 58 59] [ 60 61 62 63 64 65 66 67 68 69] [ 70 71 72 73 74 75 76 77 78 79] [ 80 81 82 83 84 85 86 87 88 89] [ 90 91 92 93 94 95 96 97 98 99] [100 101 102 103 104 105 106 107 108 109] [110 111 112 113 114 115 116 117 118 119] [120 121 122 123 124 125 126 127 128 129] [130 131 132 133 134 135 136 137 138 139] [140 141 142 143 144 145 146 147 148 149] [150 151 152 153 154 155 156 157 158 159] [160 161 162 163 164 165 166 167 168 169] [170 171 172 173 174 175 176 177 178 179] [180 181 182 183 184 185 186 187 188 189] [190 191 192 193 194 195 196 197 198 199]] \endverbatim \code{.py} arr_h.release() split_num = 3 arr_h_list = arr_h.partition(split_num, 1, [3,2,5]) # split into 3 sub-handles, and partition along the horizontal dimension for i in range(split_num): res=starpu.task_submit(ret_handle=True)(add, arr_h_list[i], arr_h_list[i]) arr_h.acquire(mode='RW') \endcode \verbatim [[ 0 2 4 12 16 40 48 56 64 72] [ 80 88 96 104 112 120 128 136 144 152] [160 168 176 184 192 200 208 216 224 232] [240 248 256 264 272 280 288 296 304 312] [320 328 336 172 176 180 184 188 192 196] [200 204 208 212 216 220 224 228 232 236] [120 122 124 126 128 130 132 134 136 138] [140 142 144 146 148 150 152 154 156 158] [160 162 164 166 168 170 172 174 176 178] [180 182 184 186 188 190 192 194 196 198] [200 202 204 206 208 105 106 107 108 109] [110 111 112 113 114 115 116 117 118 119] [120 121 122 123 124 125 126 127 128 129] [130 131 132 133 134 135 136 137 138 139] [140 141 142 143 144 145 146 147 148 149] [150 151 152 153 154 155 156 157 158 159] [160 161 162 163 164 165 166 167 168 169] [170 171 172 173 174 175 176 177 178 179] [180 181 182 183 184 185 186 187 188 189] [190 191 192 193 194 195 196 197 198 199]] \endverbatim \code{.py} arr_h.release() arr_h.unpartition(arr_h_list, split_num) arr_h.unregister() \endcode The method Handle::get_partition_size(handle_list) can be used to get the array size of each sub-array. \code{.py} arr_h_list = arr_h.partition(split_num, 1, [3,2,5]) arr_h.get_partition_size(arr_h_list) \endcode \verbatim [60, 40, 100] \endverbatim The full script is available in starpupy/examples/starpu_py_partition.py. \section Benchmark Benchmark This benchmark gives a glimpse into how long a task should be (in µs) for the StarPU Python interface overhead to be low enough to keep efficiency. Running starpupy/benchmark/tasks_size_overhead.sh generates a plot of the speedup of tasks of various sizes, depending on the number of CPUs being used. In the first figure, the return value is a handle object. In the second figure, the return value is a future object. In the third figure, the return value is \c None. For example, in the figure of returning handle object, for a 571 µs task (the green line), StarPU overhead is low enough to guarantee a good speedup if the number of CPUs is not more than 12. But with the same number of CPUs, a 314 µs task (the blue line) cannot have a correct speedup. We need to decrease the number of CPUs to about 8 if we want to keep efficiency. \image html tasks_size_overhead_py_handle.png "(1) Returning handle object" width=50% \image latex tasks_size_overhead_py_handle.png "" width=\textwidth \image html tasks_size_overhead_py_futur.png "(2) Returning future object" width=50% \image latex tasks_size_overhead_py_futur.png "" width=\textwidth \image html tasks_size_overhead_py_none.png "(3) Returning None" width=50% \image latex tasks_size_overhead_py_none.png "" width=\textwidth \section ImitatingJoblibLibrary Running Python Functions as Pipeline Jobs (Imitating Joblib Library) The StarPU Python interface also provides parallel computing for loops using multiprocessing, similarly to the Joblib Library that can simply turn out Python code into parallel computing code and thus increase the computing speed. \subsection JobLibraryExamples Examples
    • The most basic usage is to parallelize a simple iteration. \code{.py} from math import log10 [log10(10 ** i) for i in range(10)] \endcode \verbatim [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] \endverbatim In order to spread it over several CPUs, you need to import the \c starpu.joblib module, and use its \c Parallel class: \code{.py} import starpu.joblib from math import log10 starpu.init() starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(log10)(10**i)for i in range(10)) \endcode \verbatim [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] \endverbatim It is also possible to first create an object of the \c Parallel class, and then call \c starpu.joblib.delayed to execute the generator expression. \code{.py} import starpu.joblib from math import log10 starpu.init() parallel=starpu.joblib.Parallel(n_jobs=2) parallel(starpu.joblib.delayed(log10)(10**i)for i in range(10)) \endcode \verbatim [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] \endverbatim
    • Instead of a generator expression, a list of functions can also be submitted as a task through the \c Parallel class. \code{.py} import starpu.joblib starpu.init() #generate a list to store functions g_func=[] #function no input no output print hello world def hello(): print ("Example 1: Hello, world!") g_func.append(starpu.joblib.delayed(hello)()) #function has 2 int inputs and 1 int output def multi(a, b): res_multi = a*b print("Example 2: The result of ",a,"*",b,"is",res_multi) return res_multi g_func.append(starpu.joblib.delayed(multi)(2, 3)) #function has 4 float inputs and 1 float output def add(a, b, c, d): res_add = a+b+c+d print("Example 3: The result of ",a,"+",b,"+",c,"+",d,"is",res_add) return res_add g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9)) #function has 2 int inputs 1 float input and 1 float output 1 int output def sub(a, b, c): res_sub1 = a-b-c res_sub2 = a-b print ("Example 4: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2) return res_sub1, res_sub2 g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9)) #input is iterable function list starpu.joblib.Parallel(n_jobs=2)(g_func) \endcode Execution: \verbatim Example 3: The result of 1.2 + 2.5 + 3.6 + 4.9 is 12.200000000000001 Example 1: Hello, world! Example 4: The result of 6 - 2 - 5.9 is -1.9000000000000004 and the result of 6 - 2 is 4 Example 2: The result of 2 * 3 is 6 [None, 6, 12.200000000000001, (-1.9000000000000004, 4)] \endverbatim
    • The function can also take array parameters. \code{.py} import starpu.joblib import numpy as np starpu.init() def multi_array(a, b): for i in range(len(a)): a[i] = a[i]*b[i] A = np.arange(10) B = np.arange(10, 20, 1) starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(multi_array)((i for i in A), (j for j in B))) A \endcode Here the array \c A has not been modified. \verbatim array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) \endverbatim If we pass \c A directly as an argument, its value is updated \code{.py} starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(multi_array)(A, B)) A \endcode \verbatim array([ 0, 11, 24, 39, 56, 75, 96, 119, 144, 171]) \endverbatim In the next call, the value of \c A is also updated. \code{.py} starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(multi_array)(b=(j for j in B), a=A)) A \endcode \verbatim array([ 0, 121, 288, 507, 784, 1125, 1536, 2023, 2592, 3249]) \endverbatim The above three writing methods are equivalent and their execution time are very close. However, when using directly a \c numpy arrays, its value will be updated, this does not happen when generators are provided. When using a \c numpy array, it will be handled by StarPU with a data interface.
    • Here an example mixing scalar objects and \c numpy arrays or generator expressions. \code{.py} import starpu.joblib import numpy as np starpu.init() def scal(a, t): for i in range(len(t)): t[i] = t[i]*a A = np.arange(10) starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(scal)(2, (i for i in A))) starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(scal)(2,A)) \endcode Again, the value of \c A is modified by the 2nd call. \code{.py} A \endcode \verbatim array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18]) \endverbatim
    The full script is available in starpupy/examples/starpu_py_parallel.py. \subsection ParallelParameters Parallel Parameters The \c starpu.joblib.Parallel class accepts the following parameters:
    • \c mode (string, default: \c "normal") A string with the value "normal" or "future". With the "normal" mode, you can call \c starpu.joblib.Parallel directly without using the \c asyncio module, and you will get the result when the task is executed. With the "future" mode, when calling \c starpu.joblib.Parallel, you will get a Future object as a return value. By setting the parameter end_msg, the given message will be displayed when the result is ready, then you can call \c await to get the result. The \c asyncio module should be imported in this case. \code{.py} import starpu import asyncio from math import log10 starpu.init() fut = starpu.joblib.Parallel(mode="future", n_jobs=3, end_msg="The result is ready!")(starpu.joblib.delayed(log10)(10**i)for i in range(10)) The result is ready! <_GatheringFuture finished result=[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]> await fut \endcode \verbatim [[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] \endverbatim
    • \c end_msg (string, default: \c None) A message that will be displayed when the task is executed and the result is ready. When the parameter is unset, no message will be displayed when the result is ready. In any case, you need to perform awaiting to get the result.
    • \c n_jobs (int, default: \c None) The maximum number of concurrently running jobs. If -1 all CPUs are used. If 1 is given, no parallel computing code is used at all, which is useful for debugging. For \c n_jobs below -1, (\c n_cpus + 1 + \c n_jobs) are used. Thus, for \c n_jobs = -2, all CPUs but one are used. \c None is a marker for ‘unset’ that will be interpreted as \c n_jobs=1 (sequential execution). \c n_cpus is the number of CPUs detected by StarPU on the running device.
    • \c perfmodel (string, default : \c None) Set the name of the performance model. This name will be used as the filename where the performance model information will be saved. After the task is executed, one can call the function \c starpu.perfmodel_plot() by giving the symbol of perfmodel to view its performance curve.
    \subsection JobLibPerformances Performances
    • We compare the performances of the two methods for passing arguments to \c the starpu.joblib.delayed function. The first method defines a function that contains only scalars calculations, and then we pass a generator expression as an argument. The second method defines a function that contains arrays calculations, and then we pass either \c numpy arrays or generators as arguments. The second method takes less time. \code{.py} import starpu.joblib import numpy as np import time starpu.init() N=1000000 def multi(a,b): res_multi = a*b return res_multi print("--First method") A = np.arange(N) B = np.arange(N, 2*N, 1) start_exec1 = time.time() start_cpu1 = time.process_time() starpu.joblib.Parallel(n_jobs=-1)(starpu.joblib.delayed(multi)(i,j) for i,j in zip(A,B)) end_exec1 = time.time() end_cpu1 = time.process_time() print("the program execution time is", end_exec1-start_exec1) print("the cpu execution time is", end_cpu1-start_cpu1) def multi_array(a, b): for i in range(len(a)): a[i] = a[i]*b[i] return a print("--Second method with Numpy arrays") A = np.arange(N) B = np.arange(N, 2*N, 1) start_exec2 = time.time() start_cpu2 = time.process_time() starpu.joblib.Parallel(n_jobs=-1)(starpu.joblib.delayed(multi_array)(A, B)) end_exec2 = time.time() end_cpu2 = time.process_time() print("the program execution time is", end_exec2-start_exec2) print("the cpu execution time is", end_cpu2-start_cpu2) print("--Second method with generators") A = np.arange(N) B = np.arange(N, 2*N, 1) start_exec3 = time.time() start_cpu3 = time.process_time() starpu.joblib.Parallel(n_jobs=-1)(starpu.joblib.delayed(multi_array)((i for i in A), (j for j in B))) end_exec3 = time.time() end_cpu3 = time.process_time() print("the program execution time is", end_exec3-start_exec3) print("the cpu execution time is", end_cpu3-start_cpu3) \endcode Execution: \verbatim --First method the program execution time is 3.000865936279297 the cpu execution time is 5.17138062 --Second method with Numpy arrays the program execution time is 0.7571873664855957 the cpu execution time is 0.9166007309999991 --Second method with generators the program execution time is 0.7259719371795654 the cpu execution time is 1.1182918959999988 \endverbatim
    • Performance can also be shown with the performance model. Here an example with the function \c log10. \code{.py} from math import log10 for x in [10, 100, 1000, 10000, 100000, 1000000]: for X in range(x, x*10, x): starpu.joblib.Parallel(n_jobs=-1, perfmodel="log_list")(starpu.joblib.delayed(log10)(i+1)for i in range(X)) starpu.perfmodel_plot(perfmodel="log_list") \endcode \image html starpu_log_list.png \image latex starpu_log_list.png "" width=\textwidth If we use a \c numpy array as parameter, the calculation can withstand larger size, as shown below. \code{.py} from math import log10 def log10_arr(t): for i in range(len(t)): t[i] = log10(t[i]) return t for x in [10, 100, 1000, 10000, 100000, 1000000, 10000000]: for X in range(x, x*10, x): A = np.arange(1,X+1,1) starpu.joblib.Parallel(n_jobs=-1, perfmodel="log_arr")(starpu.joblib.delayed(log10_arr)(A)) starpu.perfmodel_plot(perfmodel="log_arr") \endcode \image html starpu_log_arr.png \image latex starpu_log_arr.png "" width=\textwidth
    \section MultipleInterpreters Multiple Interpreters It is possible to use multiple interpreters when running python applications. To do so, you need to set the variable \ref STARPUPY_MULTI_INTERPRETER when running a StarPU Python application. Python interpreters share the Global Interpreter Lock (GIL), which requires that at any time, one and only one thread has the right to execute a task. In other words, GIL makes the multiple interpreters execution of Python actually serial rather than parallel, and the execution of Python program is single-threaded essentially. Therefore, if the application is pure Python script, even with multi-interpreters, the program cannot be executed in parallel, unless an external C application is called. Fortunately now there is a quite positive development. Python developers are preparing to implement stop sharing the GIL between interpreters (https://peps.nogil.dev/pep-0684/) or even make GIL optional so that Python code can be run without GIL (https://peps.nogil.dev/pep-0701/), that will facilitate true parallelism with the next Python version. In order to transfer data between interpreters, the module \c cloudpickle is used to serialize Python objects in contiguous byte array. This mechanism increases the overhead of the StarPU Python interface, as shown in the following plots, to be compared to the plots given in \ref Benchmark. In the first figure, the return value is a handle object. In the second figure, the return value is a future object. In the third figure, the return value is \c None. \image html tasks_size_overhead_py_handle_pickle.png "(1) Returning handle object" width=50% \image latex tasks_size_overhead_py_handle_pickle.png "" width=\textwidth \image html tasks_size_overhead_py_fut_pickle.png "(2) Returning future object" width=50% \image latex tasks_size_overhead_py_fut_pickle.png "" width=\textwidth \image html tasks_size_overhead_py_noret_pickle.png "(3) Returning None" width=50% \image latex tasks_size_overhead_py_noret_pickle.png "" width=\textwidth In order to reflect this influence more intuitively, we make a performance comparison. By default, StarPU uses virtually shared memory manager for Python objects supporting buffer protocol that allows to minimize data transfers. But in the case of multi-interpreter, if we do not use virtually shared memory manager, data transfer can be realized only with the help of cloudpickle. We will show the operation performances below (Running test_handle_perf_pickle.sh). The operation that we test is \c numpy addition (numpy.add), and the array size is 10, 20, ..., 100, 200, ..., 1000, 2000, ..., 10000, 2000, ..., 100000,200000, ..., 1000000, 2000000, ..., 10000000, ..., 50000000. We compared three cases: first, using virtually shared memory manager, second, without using virtually shared memory manager, third, without using StarPU task submitting, but directly calling numpy.add function. In the first figure, we compare the submission time when using StarPU and the execution time without using StarPU. We can see that there is still an obvious optimization using StarPU virtually shared memory manager when the test array size is large. However, if only using cloudpickle, StarPU Python interface cannot provide an effective optimization. And in the second figure, we can see that the same operation will take more time to finish the program execution when only using cloudpickle. \image html starpupy_handle_perf_pickle.png width=85% \image latex starpupy_handle_perf_pickle.png "" width=\textwidth We can also define our own function to do the \c numpy operation, e.g. the element addition: \code{.py} def add(a, b): for i in range(np.size(a)): a[i] = a[i] + b[i] \endcode We will compare operation performances of the same three cases, but based on the custom function add(a, b). We can see that the custom function takes more time than \c numpy function overall. Although the same operation still takes more time to submit the task when only using cloudpickle than with virtually shared memory manager, there is still a better optimization. The operation takes less time than only calling a custom function even when the array is not very large. \image html starpupy_handle_func_perf_pickle.png width=85% \image latex starpupy_handle_func_perf_pickle.png "" width=\textwidth \section StarpupyMasterSlave Master Slave Support StarPU Python interface provides MPI master slave support as well. Please refer to \ref MPIMasterSlave for the specific usage. When you write your Python script, make sure to import all required functions before the \c starpu module. Functions imported after the \c starpu module can only be submitted using their name as a string when calling \c task_submit(), this will decrease the submission efficiency. (TODO) \section StarPUPYSimgrid StarPUPY and Simgrid In simgrid mode, the Python interpreter will not be aware of simgrid and will thus not notify it when some thread is blocked waiting for something to happen in another thread. This notably means that the `asyncio` mode and waiting for a `future` will not work, and one thus has to use StarPUPY-provided functions to wait for completion, such as `starpupy.task_wait_for_all()` or `data.acquire`. Also, we have not yet implemented not calling the actual call of the task function, so the execution time will be longer than in real execution, since not only it executes computations, but also sequentially, and adds the simulation overhead. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_performances/000077500000000000000000000000001507764646700241255ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_performances/benchmarking_starpu.doxy000066400000000000000000000066401507764646700310660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page BenchmarkingStarPU Benchmarking StarPU Some interesting benchmarks are installed among examples in $STARPU_PATH/lib/starpu/examples/. Make sure to try various schedulers, for instance STARPU_SCHED=dmda. \section TaskSizeOverhead Task Size Overhead This benchmark gives a glimpse into how long a task should be (in µs) for StarPU overhead to be low enough to keep efficiency. Running tasks_size_overhead.sh generates a plot of the speedup of tasks of various sizes, depending on the number of CPUs being used. \image html tasks_size_overhead.png \image latex tasks_size_overhead.png "" width=\textwidth \section DataTransferLatency Data Transfer Latency local_pingpong performs a ping-pong between the first two CUDA nodes, and prints the measured latency. \section MatrixMatrixMultiplication Matrix-Matrix Multiplication sgemm and dgemm perform a blocked matrix-matrix multiplication using BLAS and cuBLAS. They output the obtained GFlops. \section CholeskyFactorization Cholesky Factorization cholesky_* perform a Cholesky factorization (single precision). They use different dependency primitives. \section LUFactorization LU Factorization lu_* perform an LU factorization. They use different dependency primitives. \section SimulatedBenchmarks Simulated Benchmarks It can also be convenient to try simulated benchmarks, if you want to give a try at CPU-GPU scheduling without actually having a GPU at hand. This can be done by using the SimGrid version of StarPU: first install the SimGrid simulator from https://simgrid.org/ (we tested with SimGrid from 3.11 to 3.16, and 3.18 to 3.30. SimGrid versions 3.25 and above need to be configured with \c -Denable_msg=ON. Other versions may have compatibility issues, 3.17 notably does not build at all. MPI simulation does not work with version 3.22). Then configure StarPU with \ref enable-simgrid "--enable-simgrid" and rebuild and install it, and then you can simulate the performance for a few virtualized systems shipped along StarPU: attila, mirage, idgraf, and sirocco. For instance: \verbatim $ export STARPU_PERF_MODEL_DIR=$STARPU_PATH/share/starpu/perfmodels/sampling $ export STARPU_HOSTNAME=attila $ $STARPU_PATH/lib/starpu/examples/cholesky_implicit -size $((960*20)) -nblocks 20 \endverbatim Will show the performance of the cholesky factorization with the attila system. It will be interesting to try with different matrix sizes and schedulers. Performance models are available for cholesky_*, lu_*, *gemm, with block sizes 320, 640, or 960 (plus 1440 for sirocco), and for stencil with block size 128x128x128, 192x192x192, and 256x256x256. Read Chapter \ref SimGridSupport for more information on the SimGrid support. */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_performances/offline_performance_tools.doxy000066400000000000000000001562621507764646700322710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020,2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page OfflinePerformanceTools Offline Performance Tools To get an idea of what is happening, a lot of performance feedback is available, detailed in this chapter. The various information should be checked for.
    • What does the Gantt diagram look like? (see \ref CreatingAGanttDiagram)
      • If it's mostly green (tasks running in the initial context) or context specific color prevailing, then the machine is properly utilized, and perhaps the codelets are just slow. Check their performance, see \ref PerformanceOfCodelets.
      • If it's mostly purple (FetchingInput), tasks keep waiting for data transfers, do you perhaps have far more communication than computation? Did you properly use CUDA streams to make sure communication can be overlapped? Did you use data-locality aware schedulers to avoid transfers as much as possible?
      • If it's mostly red (Blocked), tasks keep waiting for dependencies, do you have enough parallelism? It might be a good idea to check what the DAG looks like (see \ref CreatingADAGWithGraphviz).
      • If only some workers are completely red (Blocked), for some reason the scheduler didn't assign tasks to them. Perhaps the performance model is bogus, check it (see \ref PerformanceOfCodelets). Do all your codelets have a performance model? When some of them don't, the schedulers switches to a greedy algorithm which thus performs badly.
    You can also use the Temanejo task debugger (see \ref UsingTheTemanejoTaskDebugger) to visualize the task graph more easily. \section GeneratingTracesWithFxT Generating Traces With FxT StarPU can use the FxT library (see https://savannah.nongnu.org/projects/fkt/) to generate traces with a limited runtime overhead. You can get a tarball from http://download.savannah.gnu.org/releases/fkt/?C=M Compiling and installing the FxT library in the $FXTDIR path is done following the standard procedure: \verbatim $ ./configure --prefix=$FXTDIR $ make $ make install \endverbatim In order to have StarPU to generate traces, StarPU needs to be configured again after installing FxT, and configuration show: \verbatim FxT trace enabled: yes \endverbatim If configure does not find FxT automatically, it can be specified by hand with the option \ref with-fxt "--with-fxt" : \verbatim $ ./configure --with-fxt=$FXTDIR \endverbatim Or you can simply point the PKG_CONFIG_PATH environment variable to $FXTDIR/lib/pkgconfig When \ref STARPU_FXT_TRACE is set to 1, a trace is generated when StarPU is terminated by calling starpu_shutdown(). The trace is a binary file whose name has the form prof_file_XXX_YYY where XXX is the username, and YYY is the MPI id of the process that used StarPU (or 0 when running a sequential program). One can change the name of the file by setting the environment variable \ref STARPU_FXT_SUFFIX, its contents will be used instead of prof_file_XXX. This file is saved in the /tmp/ directory by default, or by the directory specified by the environment variable \ref STARPU_FXT_PREFIX. The additional \c configure option \ref enable-fxt-lock "--enable-fxt-lock" can be used to generate trace events which describes the lock's behavior during the execution. It is however very heavy and should not be used unless debugging StarPU's internal locking. When the FxT trace file prof_file_something has been generated, it is possible to generate different trace formats by calling: \verbatim $ starpu_fxt_tool -i /tmp/prof_file_something \endverbatim Or alternatively, setting the environment variable \ref STARPU_GENERATE_TRACE to 1 before application execution will make StarPU automatically generate all traces at application shutdown. Note that if the environment variable \ref STARPU_FXT_PREFIX is set, files will be generated in the given directory. One can also set the environment variable \ref STARPU_GENERATE_TRACE_OPTIONS to specify options, see starpu_fxt_tool --help, for example: \verbatim $ export STARPU_GENERATE_TRACE=1 $ export STARPU_GENERATE_TRACE_OPTIONS="-no-acquire" \endverbatim When running an MPI application, \ref STARPU_GENERATE_TRACE will not work as expected (each node will try to generate trace files, thus mixing outputs...), you have to collect the trace files from the MPI nodes, and specify them all on the command starpu_fxt_tool, for instance: \verbatim $ starpu_fxt_tool -i /tmp/prof_file_something* \endverbatim By default, the generated trace contains all information. To reduce the trace size, various -no-foo options can be passed to starpu_fxt_tool, see starpu_fxt_tool --help . \subsection CreatingAGanttDiagram Creating a Gantt Diagram One of the generated files is a trace in the Paje format. The file, located in the current directory, is named paje.trace. It can be viewed with ViTE (https://solverstack.gitlabpages.inria.fr/vite/) a trace visualizing open-source tool. To open the file paje.trace with ViTE, use the following command: \verbatim $ vite paje.trace \endverbatim Once the file is opened in ViTE interface, we will see the figure as shown below: \image html vite_open.png width=60% \image latex vite_open.png "" width=\textwidth We can then click the "No arrows" button in task bar of ViTE interface, to better observe the Gantt diagram that illustrates the start and end dates of the different tasks or activities of a program. \image html vite_no_arrow.png width=60% \image latex vite_no_arrow.png "" width=\textwidth In the Gantt diagram, the bar types such as devices (CPU or GPU) are displayed on the left side. Each task is represented by a horizontal rectangle that spans the duration of the task. The rectangles are arranged along a timeline axis, which is shown at the top of the Gantt diagram and represents the overall duration of the program in milliseconds. The position of the bar along the timeline shows when the task begins and ends. We can see some long red bars at the beginning and end of the entire timeline, which represent that the unit is idle. There are no tasks at these moments, and workers are waiting or in a sleeping state. \subsubsection ZoomingInGantt Zooming in Gantt Diagram Then as shown in the following figure, press and hold the left mouse button to select the area you want to zoom in on. Release the button to view the selected area, and we can repeat the zoom action multiple times. \image html vite_zoom.png width=60% \image latex vite_zoom.png "" width=\textwidth This zoom result is: \image html vite_zoom_result.png width=60% \image latex vite_zoom_result.png "" width=\textwidth Right-clicking anywhere on the Gantt diagram restores the previous zoom view. One can press and hold the left mouse button inside the top blue bar to select horizontally, which will horizontally zoom in on all Gantt diagrams within the selected time range. \image html vite_zoom_horiz.png width=60% \image latex vite_zoom_horiz.png "" width=\textwidth This zoom result is: \image html vite_zoom_horiz_result.png width=60% \image latex vite_zoom_horiz_result.png "" width=\textwidth \subsubsection ColorsInGantt Colors in Gantt Diagram After zooming in, we can observe numerous blocks of varying colors, each block representing a task. Blocks of diverse colors signify different types of tasks. When we double-click on any block, a pop-up window will show related status about that task, such as its type and which worker (CPU/GPU) it belongs to, etc. \image html vite_task_state.png width=60% \image latex vite_task_state.png "" width=\textwidth The state information displayed in the pop-up window can be: \li Value: refers to a type of task, which can be assigned as a task name (instead of the default \c unknown) by filling the optional starpu_codelet::name, or assigning it a performance model. The name can also be set with the field starpu_task::name or by using \ref STARPU_NAME when calling starpu_task_insert() \li Container: refers to a specific worker where the computation was performed, could be CPU or CUDA \li Type: indicates the type of this block, most often "Worker State" \li Date: represents a range of dates during which the computation was performed \li Duration: represents the duration of the computation \li Footprint: provides the data footprint of the task (used as indexing base for performance models) \li GFlop: represents the number of Gflop performed during the computation, as set in starpu_task::flops. \li Iteration: refers to the iteration number of the computation, as set by starpu_iteration_push() at the beginning of submission loops and starpu_iteration_pop() at the end of submission loops \li JobId: represents a unique identifier for the specific task, as returned by starpu_task_get_job_id() \li NumaNodes: refers to the NUMA node where the data is stored, the environment variable \ref STARPU_FXT_EVENTS needs to contain \c TASK_VERBOSE_EXTRA, otherwise it will be -1 \li Params: represents parameters or input/output types and sizes, possibly indicating the dimensions of the matrices \li Size: represents the size of the data being operated on in bytes \li Subiteration: represents a sub-iteration number if the computation was part of a larger iteration or loop, as set by starpu_iteration_push() \li SubmitOrder: represents the order in which the task was submitted by the application \li Tag: represents a unique identifier for the task, which can be set either through starpu_task::tag_id or by using \ref STARPU_TAG or \ref STARPU_TAG_ONLY when calling starpu_task_insert() \li X: represents an X-coordinate index of the first data written by the task, which was set by starpu_data_set_coordinates() or starpu_data_set_coordinates_array() function. We can also get the coordinates of the data with starpu_data_get_coordinates_array() function \li Y: represents an Y-coordinate index of the first data written by the task, which was set by starpu_data_set_coordinates() or starpu_data_set_coordinates_array() function. We can also get the coordinates of the data with starpu_data_get_coordinates_array() function \li Color: represents the color RGB value associated with the task. Tasks are by default shown in green. To use a different color for every type of task, we can specify the option -c to starpu_fxt_tool or in \ref STARPU_GENERATE_TRACE_OPTIONS. Tasks can also be given a specific color by setting the field starpu_codelet::color or the starpu_task::color. When we call starpu_task_insert(), we can use ::STARPU_TASK_COLOR to set the color. Colors are expressed with the following format \c 0xRRGGBB (e.g. \c 0xFF0000 for red). See basic_examples/task_insert_color for examples on how to assign colors In the shown figure, the set of color as following: \li Dark green represents GEMM \li Light green represents SYRK \li Blue represents TRSM \li Red indicates that the unit is idle, there are no tasks at the moment, it is currently waiting or in a sleeping state \li Magenta represents FetchingInput To modify the colors in Vite interface, select "Preferences" then "Settings" in the options bar, and then choose the "States" tab in the newly opened window to select different colors for different operations, as shown in the figure below. One has to click the reload button at the top left to reload the trace with the new colors. \image html vite_preferences.png width=60% \image latex vite_preferences.png "" width=\textwidth \subsubsection CurvesInGantt Curves in Gantt Diagram We can see that there is a curve below task blocks, which represents the corresponding GFlop/s. Double-clicking near the curve will display the current GFlop/s information in a pop-up window (as shown in the figure). If we only click on the curve, a vertical red line shows up, and we can read on it the GFlop/s values of all the curves at the same time. \image html vite_gflops.png width=60% \image latex vite_gflops.png "" width=\textwidth For GPUs, there are three additional curves above the task blocks that can be double-clicked to open a pop-up window to view information. Let's zoom in on the three curves during the entire execution process as illustrated in the figure: \image html vite_gpu_memory_select.png width=60% \image latex vite_gpu_memory_select.png "" width=\textwidth As shown in the figure below, the top curve represents the amount of GPU-managed memory in MBytes, while the bottom two curves represent the data transfer between tasks on the CPU and GPU, and between tasks on different GPUs. They respectively indicate the incoming and outgoing data transfer bandwidth. By looking at the memory curve, we can observe that the memory usage kept increasing at first, but due to the reutilization of the allocations by StarPU, the curve gradually became stable later on. \image html vite_gpu_memory.png width=60% \image latex vite_gpu_memory.png "" width=\textwidth \subsubsection StatesInGantt States in Gantt Diagram Above these three curves, we can see some blocks which represent driver copy (see the top of the figure below), i.e. a memory copy. The light green blocks represent the actual copies, the dark green blocks represent asynchronous copy submissions, and the burgundy blocks represent allocating and freeing. Double-clicking on a block allows us to view relevant information in the pop-up window. Here, a couple of issues may show up: \li If the "Allocating/Freeing" parts take a long time, it means that StarPU does not manage to re-use data buffers allocated in the GPU. If you have e.g. a lot of tiles with different sizes, it may be useful to approximate the allocation size, by using e.g. starpu_matrix_data_register_allocsize() with the proper nx / ld / ny, but an allocation size that is rounded up, so that buffers with that same rounded size can be shared. \li If the "Asynchronous copy submission" parts take a long time, it means that the CPU buffers are not pinned: you need to make sure to use starpu_malloc(), or starpu_memory_pin() (see \ref CUDA-specificOptimizations) so that the CPU buffers are pinned so that the GPU driver can efficiently process transfers asynchronously (in the "Actual copy" part) rather than synchronously (in the "Asynchronous copy submission" part). \image html vite_gpu_curves.png width=60% \image latex vite_gpu_curves.png "" width=\textwidth Below the GPU task blocks and GFlops curve (see the bottom of the figure above), we can see some other blocks that represent the CPU waiting for the GPU to complete the task. During time, CPU can do variable actions which are represented by blocks of different colors, such as: \li Dark green represents progressing, it keeps polling for task or data transfer completion \li Brown-yellow represents scheduling \li Burgundy represents submitting task \li Lake blue represents executing, it is executing the application codelet function. Here it is very short because the codelet just submits a kernel asynchronously. \li Dark blue represents callback \li Chestnut represents overhead. This state is not supposed to be long, as it represents everything that we did not classify as an operation that is supposed to be long like the operations mentioned above. If you find situations where some overhead is long, this is a bug worth reporting so we can fix it. and we can always double-click on the block to view relevant information in the pop-up window. \subsubsection TransfersInGantt Transfers in Gantt Diagram We can horizontally zoom in on a section of the Gantt diagram, and deselect the "No arrows" option. This will allow us to see a complete process of data transfer, as shown in the following figure: \image html vite_trans_arrow.png width=60% \image latex vite_trans_arrow.png "" width=\textwidth In the above figure, we can see a long segment of magenta color in CUDA2_0 task blocks. At the same time, we can see that there are numerous transfers between other workers during this time period. This indicates that CUDA2_0 is waiting for the completion of the data transfers needed by the task it wants to execute. \subsubsection SchedulerInGantt Scheduler in Gantt Diagram At the top of the entire Gantt diagram, there are three curves that represent the information of the scheduler. Let's zoom in on the three curves during the entire execution process as illustrated in the figure below: \image html vite_top_curve_select.png width=60% \image latex vite_top_curve_select.png "" width=\textwidth As shown in the figure below, from top to bottom, they respectively indicate the number of submitted uncompleted tasks, the number of ready tasks, and the total GFlop/s for this moment. By double-clicking on the curves, we can view relevant information in the pop-up window. \image html vite_top_curve.png width=60% \image latex vite_top_curve.png "" width=\textwidth \subsubsection MainThreadInGantt Main Thread in Gantt Diagram At the very bottom of the entire Gantt diagram, we will see a red bar, which represents the main thread waiting for tasks. In front of the red bar (see the figure below), there are some dark red bars, which represent the main thread submitting tasks. \image html vite_bottom_curve.png width=60% \image latex vite_bottom_curve.png "" width=\textwidth Below these red bars, we can see some white vertical lines with small circles on top, which represent events. The default events can be either task push or task pop or task wait for all. The application can inject its own events at any desired moment with the function starpu_fxt_trace_user_event() or starpu_fxt_trace_user_event_string(). Similarly, double-clicking on the white bars allows you to see relevant information in the pop-up window. \subsubsection StatisticsInGantt Statistics in Gantt Diagram To get statistics on the time spent in runtime overhead, we can use the statistics plugin of ViTE. In the Preferences menu, select Plugins. In "States Type", select "Worker State". Then click on "Reload" to update the histogram. The red "Idle" percentages are due to lack of parallelism, the "FetchingInput" percentages are due to waiting for data transfers. The brown "Overhead" and "Scheduling" percentages are due to the overhead of the runtime and of the scheduler. \image html vite_worker_state.png width=60% \image latex vite_worker_state.png "" width=\textwidth \subsection CreatingADAGWithGraphviz Creating a DAG With Graphviz Another generated trace file is a task graph described using the DOT language. The file, created in the current directory, is named dag.dot file in the current directory. It is possible to get a graphical output of the graph by using the graphviz library: \verbatim $ dot -Tpdf dag.dot -o output.pdf \endverbatim \subsection TraceTaskDetails Getting Task Details Another generated trace file gives details on the executed tasks. The file, created in the current directory, is named tasks.rec. This file is in the \c recutils format, i.e. Field: value lines, and empty lines are used to separate each task. This can be used as a convenient input for various ad-hoc analysis tools. By default, it only contains information about the actual execution. Performance models can be obtained by running starpu_tasks_rec_complete on it: \verbatim $ starpu_tasks_rec_complete tasks.rec tasks2.rec \endverbatim which will add EstimatedTime lines which contain the performance model-estimated time (in µs) for each worker starting from 0. Since it needs the performance models, it needs to be run the same way as the application execution, or at least with STARPU_HOSTNAME set to the hostname of the machine used for execution, to get the performance models of that machine. Another possibility is to obtain the performance models as an auxiliary perfmodel.rec file, by using the starpu_perfmodel_recdump utility: \verbatim $ starpu_perfmodel_recdump tasks.rec -o perfmodel.rec \endverbatim One can also simply call starpu_task_get_name() to get the name of a task. \subsection TraceSchedTaskDetails Getting Scheduling Task Details The file, sched_tasks.rec, created in the current directory, in the \c recutils format, gives information about the tasks scheduling, and lists the push and pop actions of the scheduler. For each action, it gives the timestamp, the job priority and the job id. Each action is separated from the next one by empty lines. The job id associated with the task can be retrieved by calling starpu_task_get_job_id(). \subsection MonitoringActivity Monitoring Activity Another generated trace file is an activity trace. The file, created in the current directory, is named activity.data. A profile of the application showing the activity of StarPU during the execution of the program can be generated: \verbatim $ starpu_workers_activity activity.data \endverbatim This will create a file named activity.eps in the current directory. This picture is composed of two parts. The first part shows the activity of the different workers. The green sections indicate which proportion of the time was spent executed kernels on the processing unit. The red sections indicate the proportion of time spent in StarPU: an important overhead may indicate that the granularity may be too low, and that bigger tasks may be appropriate to use the processing unit more efficiently. The black sections indicate that the processing unit was blocked because there was no task to process: this may indicate a lack of parallelism, which may be alleviated by creating more tasks when it is possible. The second part of the picture activity.eps is a graph showing the evolution of the number of tasks available in the system during the execution. Ready tasks are shown in black, and tasks that are submitted but not schedulable yet are shown in grey. \subsection Animation Getting Modular Schedular Animation When using modular schedulers (i.e. schedulers which use a modular architecture, and whose name start with "modular-"), the call to starpu_fxt_tool will also produce a trace.html file which can be viewed in a javascript-enabled web browser. It shows the flow of tasks between the components of the modular scheduler. \subsection TimeBetweenSendRecvDataUse Analyzing Time Between MPI Data Transfer and Use by Tasks starpu_fxt_tool produces a file called comms.rec which describes all MPI communications. The script starpu_send_recv_data_use.py uses this file and tasks.rec in order to produce two graphs: the first one shows durations between the reception of data and their usage by a task and the second one plots the same graph but with elapsed time between send and usage of a data by the sender. \image html trace_recv_use.png \image latex trace_recv_use.png "" width=\textwidth \image html trace_send_use.png \image latex trace_send_use.png "" width=\textwidth \subsection NumberEvents Number of events in trace files When launched with the option -number-events, starpu_fxt_tool will produce a file named number_events.data. This file contains the number of events for each event type. Events are represented with their key. To convert event keys to event names, you can use the starpu_fxt_number_events_to_names.py script: \verbatim $ starpu_fxt_number_events_to_names.py number_events.data \endverbatim The number of recorded events (and thus the performance overhead introduced by tracing) can be reduced by setting which categories of events to record with the environment variable \ref STARPU_FXT_EVENTS. \subsection LimitingScopeTrace Limiting The Scope Of The Trace For computing statistics, it is useful to limit the trace to a given portion of the time of the whole execution. This can be achieved by calling \code{.c} starpu_fxt_autostart_profiling(0) \endcode before calling starpu_init(), to prevent tracing from starting immediately. Then \code{.c} starpu_fxt_start_profiling(); \endcode and \code{.c} starpu_fxt_stop_profiling(); \endcode can be used around the portion of code to be traced. This will show up as marks in the trace, and states of workers will only show up for that portion. \section PerformanceOfCodelets Performance Of Codelets After calibrating performance models of codelets (see \ref PerformanceModelExample and \ref PerformanceModelCalibration), they can be examined by using the tool starpu_perfmodel_display: \verbatim $ starpu_perfmodel_display -l file: file: file: file: file: \endverbatim Here, the codelets of the example lu are available. We can examine the performance of the kernel 22 (in micro-seconds), which is history-based: \verbatim $ starpu_perfmodel_display -s starpu_slu_lu_model_gemm performance model for cpu # hash size mean dev n 57618ab0 19660800 2.851069e+05 1.829369e+04 109 performance model for cuda_0 # hash size mean dev n 57618ab0 19660800 1.164144e+04 1.556094e+01 315 performance model for cuda_1 # hash size mean dev n 57618ab0 19660800 1.164271e+04 1.330628e+01 360 performance model for cuda_2 # hash size mean dev n 57618ab0 19660800 1.166730e+04 3.390395e+02 456 \endverbatim We can see that for the given size, over a sample of a few hundreds of execution, the GPUs are about 20 times faster than the CPUs (numbers are in us). The standard deviation is extremely low for the GPUs, and less than 10% for CPUs. This tool can also be used for regression-based performance models. It will then display the regression formula, and in the case of non-linear regression, the same performance log as for history-based performance models: \verbatim $ starpu_perfmodel_display -s non_linear_memset_regression_based performance model for cpu_impl_0 Regression : #sample = 1400 Linear: y = alpha size ^ beta alpha = 1.335973e-03 beta = 8.024020e-01 Non-Linear: y = a size ^b + c a = 5.429195e-04 b = 8.654899e-01 c = 9.009313e-01 # hash size mean stddev n a3d3725e 4096 4.763200e+00 7.650928e-01 100 870a30aa 8192 1.827970e+00 2.037181e-01 100 48e988e9 16384 2.652800e+00 1.876459e-01 100 961e65d2 32768 4.255530e+00 3.518025e-01 100 ... \endverbatim The same can also be achieved by using StarPU's library API, see \ref API_Performance_Model and notably the function starpu_perfmodel_load_symbol(). The source code of the tool starpu_perfmodel_display can be a useful example. An XML output can also be printed by using the -x option: \verbatim $ tools/starpu_perfmodel_display -x -s non_linear_memset_regression_based \endverbatim The tool starpu_perfmodel_plot can be used to draw performance models. It writes a .gp file in the current directory, to be run with the tool gnuplot, which generates the corresponding curve both in postscript and png format. \verbatim $ tools/starpu_perfmodel_plot -s non_linear_memset_regression_based $ gnuplot starpu_non_linear_memset_regression_based.gp $ gv starpu_non_linear_memset_regression_based.eps $ geeqie starpu_non_linear_memset_regression_based.png \endverbatim \image html starpu_non_linear_memset_regression_based.png \image latex starpu_non_linear_memset_regression_based.png "" width=\textwidth When the field starpu_task::flops is set (or \ref STARPU_FLOPS is passed to starpu_task_insert()), starpu_perfmodel_plot can directly draw a GFlops/s curve, by simply adding the -f option: \verbatim $ starpu_perfmodel_plot -f -s chol_model_potrf \endverbatim This will however disable displaying the regression model, for which we can not compute GFlops/s. \image html starpu_chol_model_11_type.png \image latex starpu_chol_model_11_type.png "" width=\textwidth When the FxT trace file prof_file_something has been generated, it is possible to get a profiling of each codelet by calling: \verbatim $ starpu_fxt_tool -i /tmp/prof_file_something $ starpu_codelet_profile distrib.data codelet_name \endverbatim This will create profiling data files, and a distrib.data.gp file in the current directory, which draws the distribution of codelet time over the application execution, according to data input size. \image html distrib_data.png \image latex distrib_data.png "" width=\textwidth This is also available in the tool starpu_perfmodel_plot, by passing it the fxt trace: \verbatim $ starpu_perfmodel_plot -s non_linear_memset_regression_based -i /tmp/prof_file_foo_0 \endverbatim It will produce a .gp file which contains both the performance model curves, and the profiling measurements. \image html starpu_non_linear_memset_regression_based_2.png \image latex starpu_non_linear_memset_regression_based_2.png "" width=\textwidth If you have the statistical tool R installed, you can additionally use \verbatim $ starpu_codelet_histo_profile distrib.data \endverbatim Which will create one .pdf file per codelet and per input size, showing a histogram of the codelet execution time distribution. \image html distrib_data_histo.png \image latex distrib_data_histo.png "" width=\textwidth \section EnergyOfCodelets Energy Of Codelets A performance model of the energy of codelets can also be recorded thanks to the starpu_codelet::energy_model field of the starpu_codelet structure. StarPU usually cannot record this automatically, since the energy measurement probes are usually not fine-grain enough. It is however possible to measure it by writing a program that submits batches of tasks, let StarPU measure the energy requirement of the batch, and compute an average, see \ref MeasuringEnergyandPower . The energy performance model can then be displayed in Joules with starpu_perfmodel_display just like the time performance model. The starpu_perfmodel_plot needs an extra -e option to display the proper unit in the graph: \verbatim $ tools/starpu_perfmodel_plot -e -s non_linear_memset_regression_based_energy $ gnuplot starpu_non_linear_memset_regression_based_energy.gp $ gv starpu_non_linear_memset_regression_based_energy.eps \endverbatim \image html starpu_non_linear_memset_regression_based_energy.png \image latex starpu_non_linear_memset_regression_based_energy.png "" width=\textwidth The -f option can also be used to display the performance in terms of GFlops/s/W, i.e. the efficiency: \verbatim $ tools/starpu_perfmodel_plot -f -e -s non_linear_memset_regression_based_energy $ gnuplot starpu_gflops_non_linear_memset_regression_based_energy.gp $ gv starpu_gflops_non_linear_memset_regression_based_energy.eps \endverbatim \image html starpu_gflops_non_linear_memset_regression_based_energy.png \image latex starpu_gflops_non_linear_memset_regression_based_energy.png "" width=\textwidth We clearly see here that it is much more energy-efficient to stay in the L3 cache. One can combine the two time and energy performance models to draw Watts: \verbatim $ tools/starpu_perfmodel_plot -se non_linear_memset_regression_based non_linear_memset_regression_based_energy $ gnuplot starpu_power_non_linear_memset_regression_based.gp $ gv starpu_power_non_linear_memset_regression_based.eps \endverbatim \image html starpu_power_non_linear_memset_regression_based.png \image latex starpu_power_non_linear_memset_regression_based.png "" width=\textwidth \section DataTrace Data trace and tasks length It is possible to get statistics about tasks length and data size by using : \verbatim $ starpu_fxt_data_trace filename [codelet1 codelet2 ... codeletn] \endverbatim Where filename is the FxT trace file and codeletX the names of the codelets you want to profile (if no names are specified, starpu_fxt_data_trace will profile them all). This will create a file, data_trace.gp which can be executed to get a .eps image of these results. On the image, each point represents a task, and each color corresponds to a codelet. \image html data_trace.png \image latex data_trace.png "" width=\textwidth \section TraceStatistics Trace Statistics More than just codelet performance, it is interesting to get statistics over all kinds of StarPU states (allocations, data transfers, etc.). This is particularly useful to check what may have gone wrong in the accuracy of the SimGrid simulation. This requires the R statistical tool, with the plyr, ggplot2 and data.table packages. If your system distribution does not have packages for these, one can fetch them from CRAN: \verbatim $ R > install.packages("plyr") > install.packages("ggplot2") > install.packages("data.table") > install.packages("knitr") \endverbatim The pj_dump tool from pajeng is also needed (see https://github.com/schnorr/pajeng) One can then get textual or .csv statistics over the trace states: \verbatim $ starpu_paje_state_stats -v native.trace simgrid.trace "Value" "Events_native.csv" "Duration_native.csv" "Events_simgrid.csv" "Duration_simgrid.csv" "Callback" 220 0.075978 220 0 "chol_model_potrf" 10 565.176 10 572.8695 "chol_model_trsm" 45 9184.828 45 9170.719 "chol_model_gemm" 165 64712.07 165 64299.203 $ starpu_paje_state_stats native.trace simgrid.trace \endverbatim An other way to get statistics of StarPU states (without installing R and pj_dump) is to use the starpu_trace_state_stats.py script, which parses the generated trace.rec file instead of the paje.trace file. The output is similar to the previous script, but it doesn't need any dependencies. The different prefixes used in trace.rec are: \verbatim E: Event type N: Event name C: Event category W: Worker ID T: Thread ID S: Start time \endverbatim Here's an example on how to use it: \verbatim $ starpu_trace_state_stats.py trace.rec | column -t -s "," "Name" "Count" "Type" "Duration" "Callback" 220 Runtime 0.075978 "chol_model_potrf" 10 Task 565.176 "chol_model_trsm" 45 Task 9184.828 "chol_model_gemm" 165 Task 64712.07 \endverbatim starpu_trace_state_stats.py can also be used to compute the different efficiencies. Refer to the usage description to show some examples. And one can plot histograms of execution times, of several states, for instance: \verbatim $ starpu_paje_draw_histogram -n chol_model_potrf,chol_model_trsm,chol_model_gemm native.trace simgrid.trace \endverbatim and see the resulting pdf file: \image html paje_draw_histogram.png \image latex paje_draw_histogram.png "" width=\textwidth A quick statistical report can be generated by using: \verbatim $ starpu_paje_summary native.trace simgrid.trace \endverbatim it includes gantt charts, execution summaries, as well as state duration charts and time distribution histograms. Other external Paje analysis tools can be used on these traces, one just needs to sort the traces by timestamp order (which not guaranteed to make recording more efficient): \verbatim $ starpu_paje_sort paje.trace \endverbatim \section PapiCounters PAPI counters Performance counter values could be obtained from the PAPI framework if ./configure detected the libpapi. In Debian, the libpapi-dev package provides the required files. Additionally, the papi-tools package contains a set of useful tools, for example papi_avail to see which counters are available. To be able to use Papi counters, one may need to reduce the level of the kernel parameter kernel.perf_event_paranoid to 2 or below. See https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html for the security impact of this parameter. Then one has to set the \ref STARPU_PROFILING environment variable to 1 and specify which events to record with the \ref STARPU_PROF_PAPI_EVENTS environment variable. For instance: \verbatim export STARPU_PROFILING=1 STARPU_PROF_PAPI_EVENTS="PAPI_TOT_INS PAPI_TOT_CYC" \endverbatim The comma can also be used to separate events to monitor. In the current simple implementation, only CPU tasks have their events measured and require CPUs that support the PAPI events. It is important to note that not all events are available on all systems, and general PAPI recommendations should be followed. The counter values can be accessed using the profiling interface: \code{.c} task->profiling_info->papi_values \endcode Also, it can be accessed and/or saved with tracing when using \ref STARPU_FXT_TRACE. With the use of starpu_fxt_tool the file papi.rec is generated containing the following triple: \verbatim Task Id Event Id Value \endverbatim External tools like rec2csv can be used to convert this rec file to a csv file, where each line represents a value for an event for a task. \section TheoreticalLowerBoundOnExecutionTime Theoretical Lower Bound On Execution Time StarPU can record a trace of what tasks are needed to complete the application, and then, by using a linear system, provide a theoretical lower bound of the execution time (i.e. with an ideal scheduling). The computed bound is not really correct when not taking into account dependencies, but for an application which have enough parallelism, it is very near to the bound computed with dependencies enabled (which takes a huge lot more time to compute), and thus provides a good-enough estimation of the ideal execution time. Then there is an example to show how to use this. For kernels with history-based performance models (and provided that they are completely calibrated), StarPU can very easily provide a theoretical lower bound for the execution time of a whole set of tasks. See for instance examples/lu/lu_example.c: before submitting tasks, call the function starpu_bound_start(), and after complete execution, call starpu_bound_stop(). starpu_bound_print_lp() or starpu_bound_print_mps() can then be used to output a Linear Programming problem corresponding to the schedule of your tasks. Or starpu_bound_print_dot() can be used to print a task dependency graph in the DOT format. Run it through lp_solve or any other linear programming solver, and that will give you a lower bound for the total execution time of your tasks. If StarPU was compiled with the library glpk installed, starpu_bound_compute() can be used to solve it immediately and get the optimized minimum, in ms. Its parameter integer allows deciding whether integer resolution should be computed and returned. Besides to solve it immediately and get the optimized minimum starpu_bound_print() can also print the statistics of actual execution and theoretical upper bound. The deps parameter tells StarPU whether to take tasks, implicit data, and tag dependencies into account. Tags released in a callback or similar are not taken into account, only tags associated with a task are. It must be understood that the linear programming problem size is quadratic with the number of tasks and thus the time to solve it will be very long, it could be minutes for just a few dozen tasks. You should probably use lp_solve -timeout 1 test.pl -wmps test.mps to convert the problem to MPS format and then use a better solver, glpsol might be better than lp_solve for instance (the --pcost option may be useful), but sometimes doesn't manage to converge. cbc might look slower, but it is parallel. For lp_solve, be sure to try at least all the -B options. For instance, we often just use lp_solve -cc -B1 -Bb -Bg -Bp -Bf -Br -BG -Bd -Bs -BB -Bo -Bc -Bi , and the -gr option can also be quite useful. The resulting schedule can be observed by using the tool starpu_lp2paje, which converts it into the Paje format. Data transfer time can only be taken into account when deps is set. Only data transfers inferred from implicit data dependencies between tasks are taken into account. Other data transfers are assumed to be completely overlapped. Setting deps to 0 will only take into account the actual computations on processing units. However, it still properly takes into account the varying performances of kernels and processing units, which is quite more accurate than just comparing StarPU performances with the fastest of the kernels being used. The prio parameter tells StarPU whether to simulate taking into account the priorities as the StarPU scheduler would, i.e. schedule prioritized tasks before less prioritized tasks, to check to which extend this results to a less optimal solution. This increases even more computation time. \section starvz Trace visualization with StarVZ Creating views with StarVZ (see: https://github.com/schnorr/starvz) is made up of two steps. The initial stage consists of a pre-processing of the traces generated by the application, while the second one consists of the analysis itself and is carried out with R packages' aid. StarVZ is available at CRAN (https://cran.r-project.org/package=starvz) and depends on \c pj_dump (from \c pajeng) and \c rec2csv (from \c recutils). To download and install StarVZ, it is necessary to have \c R, \c pajeng, and \c recutils: \verbatim # For pj_dump and rec2csv apt install -y pajeng recutils # For R apt install -y r-base libxml2-dev libssl-dev libcurl4-openssl-dev libgit2-dev libboost-dev \endverbatim To install the StarVZ, the following command can be used: \verbatim echo "install.packages('starvz', repos = 'https://cloud.r-project.org')" | R --vanilla \endverbatim To generate traces from an application, it is necessary to set \ref STARPU_GENERATE_TRACE and build StarPU with FxT. Then, StarVZ can be used on a folder with StarPU FxT traces to produce a default view: \verbatim export PATH=$(Rscript -e 'cat(system.file("tools/", package = "starvz"), sep="\n")'):$PATH starvz /foo/path-to-fxt-files \endverbatim An example of default view: \image html starvz_visu.png \image latex starvz_visu.png "" width=\textwidth One can also use existing trace files (\c paje.trace, \c tasks.rec, \c data.rec, \c papi.rec and \c dag.dot) skipping the StarVZ internal call to starpu_fxt_tool with: \verbatim starvz --use-paje-trace /foo/path-to-trace-files \endverbatim Alternatively, each StarVZ step can be executed separately. Step 1 can be used on a folder with: \verbatim starvz -1 /foo/path-to-fxt-files \endverbatim Then the second step can be executed directly in R. StarVZ enables a set of different plots that can be configured on a .yaml file. A default file is provided (default.yaml); also, the options can be changed directly in R. \verbatim library(starvz) library(dplyr) dtrace <- starvz_read("./", selective = FALSE) # show idleness ratio dtrace$config$st$idleness = TRUE # show ABE bound dtrace$config$st$abe$active = TRUE # find the last task with dplyr dtrace$config$st$tasks$list = dtrace$Application %>% filter(End == max(End)) %>% .$JobId # show last task dependencies dtrace$config$st$tasks$active = TRUE dtrace$config$st$tasks$levels = 50 plot <- starvz_plot(dtrace) \endverbatim An example of visualization follows: \image html starvz_visu_r.png \image latex starvz_visu_r.png "" width=\textwidth \section EclipsePlugin StarPU Eclipse Plugin The StarPU Eclipse Plugin provides the ability to generate the different traces directly from the Eclipse IDE. \subsection EclipseInstallation Eclipse Installation Download the Eclipse installer from https://www.eclipse.org/downloads/packages/installer. When you run the installer, click on Eclipse IDE for Java Developers to start the installation process. \image html eclipse_installer.png \image latex eclipse_installer.png "" width=10cm To be able to develop C/C++ applications, you need to install the CDT plugin. To do so, go to the Help dropdown menu at the top of the Eclipse window, choose Install New Software .... In the new window, enter the URL http://download.eclipse.org/tools/cdt/releases/9.10 into the box Work with and press the return key. \image html eclipse_install_cdt.png \image latex eclipse_install_cdt.png "" width=10cm You need then to select CDT Main Features, then click the button Next twice, accept the terms of the license, and click the button Finish. Eclipse will ask you to restart. To be able to compile the plugin, you need to install the plugin development environment (PDE). To do so, go to the menu Help, choose Eclipse Marketplace.... In the new window, enter PDE into the box Find and press the return key. \image html eclipse_install_pde.png \image latex eclipse_install_pde.png "" width=10cm You can then click on the button Install of the Eclipse PDE latest. You may need to confirm the installation, then accept the terms of the license, and finally restart the Eclipse IDE. The installation is now done. \subsection PluginInstallation StarPU Eclipse Plugin Compilation and Installation StarPU can now be compiled and installed with its Eclipse plugin. To do so, you first need to configure StarPU with the option \ref enable-eclipse-plugin "--enable-eclipse-plugin". The Eclipse IDE executable \c eclipse must be in your \c PATH. \verbatim export PATH=$HOME/usr/local/eclipse/java-2021-03/eclipse:$PATH mkdir build cd build ../configure --prefix=$HOME/usr/local/starpu --enable-eclipse-plugin make make install \endverbatim The StarPU Eclipse plugin is installed in the directory \c dropins. \verbatim $ ls $HOME/usr/local/eclipse/java-2021-03/eclipse/dropins StarPU_1.0.0.202105272056.jar \endverbatim In the next section, we will show you how to use the plugin. \subsection PluginInstruction StarPU Eclipse Plugin Instruction Once StarPU has been configured and installed with its Eclipse plugin, you first need to set up your environment for StarPU. \verbatim cd $HOME/usr/local/starpu source ./bin/starpu_env \endverbatim To generate traces from the application, it is necessary to set \ref STARPU_FXT_TRACE to 1. \verbatim export STARPU_FXT_TRACE=1 \endverbatim The eclipse workspace together with an example is available in \c lib/starpu/eclipse-plugin. \verbatim cd ./lib/starpu/eclipse-plugin eclipse -data workspace \endverbatim You can then open the file \c hello/hello.c, and build the application by pressing \c Ctrl-B. \image html eclipse_hello_build.png \image latex eclipse_hello_build.png "" width=\textwidth The application can now be executed. \image html eclipse_hello_run.png \image latex eclipse_hello_run.png "" width=\textwidth After executing the C/C++ StarPU application, one can use the StarPU plugin to generate and visualise the task graph of the application. The StarPU plugin eclipse is either available through the icons in the upper toolbar, or from the dropdown menu \c StarPU. \image html eclipse_hello_plugin.png \image latex eclipse_hello_plugin.png "" width=\textwidth To start, one first need to run the StarPU FxT tool, either through the \c FxT icon of the toolbar, or from the menu \c StarPU / StarPU FxT Tool. This will call the tool \c starpu_fxt_tool to generate traces for your application execution. A message dialog box is displayed to confirm the generation of the different traces. \image html eclipse_hello_fxt.png \image latex eclipse_hello_fxt.png "" width=\textwidth One of the generated files is a Paje trace which can be viewed with ViTE, a trace explorer. To open and visualise the file \c paje.trace with ViTE, one can select the second command of the StarPU menu, which is named Generate Paje Trace, or click on the second icon named Trace in the toolbar. \image html eclipse_hello_paje_trace.png \image latex eclipse_hello_paje_trace.png "" width=\textwidth \image html eclipse_hello_vite.png \image latex eclipse_hello_vite.png "" width=\textwidth Another generated trace file is a task graph described using the DOT language. It is possible to get a graphical output of the graph by calling the graphviz library. To do this, one can click on the third command of StarPU menu. A task graph of the application in the \c png format is then generated. \image html eclipse_hello_graph.png \image latex eclipse_hello_graph.png "" width=\textwidth In StarPU eclipse plugin, one can display the graph task directly from eclipse, or through a web browser. To do this, there is another command named Generate SVG graph in the StarPU menu or HGraph in the toolbar of eclipse. From the HTML file, you can see the graph task, and by clicking on a task name, it will open the C file in which the task submission was called (if you have an editor which understands the syntax \c href="file.c#123"). \image html eclipse_hello_svg_graph.png \image latex eclipse_hello_svg_graph.png "" width=\textwidth \image html eclipse_hello_hgraph.png \image latex eclipse_hello_hgraph.png "" width=\textwidth \section MemoryFeedback Memory Feedback It is possible to enable memory statistics. To do so, you need to pass the option \ref enable-memory-stats "--enable-memory-stats" when running configure. It is then possible to call the function starpu_data_display_memory_stats() to display statistics about the current data handles registered within StarPU. Moreover, statistics will be displayed at the end of the execution on data handles which have not been cleared out. This can be disabled by setting the environment variable \ref STARPU_MEMORY_STATS to 0. For example, by adding a call to the function starpu_data_display_memory_stats() in the fblock example before unpartitioning the data, one will get something similar to: \verbatim $ STARPU_MEMORY_STATS=1 ./examples/filters/fblock ... #--------------------- Memory stats : #------- Data on Node #2 #----- Data : 0x5562074e8670 Size : 144 #-- Data access stats /!\ Work Underway Node #0 Direct access : 0 Loaded (Owner) : 0 Loaded (Shared) : 0 Invalidated (was Owner) : 1 Node #2 Direct access : 0 Loaded (Owner) : 1 Loaded (Shared) : 0 Invalidated (was Owner) : 0 #------- Data on Node #3 #----- Data : 0x5562074e9338 Size : 96 #-- Data access stats /!\ Work Underway Node #0 Direct access : 0 Loaded (Owner) : 0 Loaded (Shared) : 0 Invalidated (was Owner) : 1 Node #3 Direct access : 0 Loaded (Owner) : 1 Loaded (Shared) : 0 Invalidated (was Owner) : 0 #--------------------- ... \endverbatim \section DataStatistics Data Statistics Different data statistics can be displayed at the end of the execution of the application. To enable them, you need to define the environment variable \ref STARPU_ENABLE_STATS. When calling starpu_shutdown() various statistics will be displayed, execution, MSI cache statistics, allocation cache statistics, and data transfer statistics. The display can be disabled by setting the environment variable \ref STARPU_STATS to 0. If the environment variable \ref STARPU_BUS_STATS is defined, you can call starpu_profiling_bus_helper_display_summary() to display statistics about the bus. If the environment variable \ref STARPU_WORKER_STATS is defined, you can call starpu_profiling_worker_helper_display_summary() to display statistics about the workers. You can also call starpu_display_stats() which call both starpu_profiling_bus_helper_display_summary() and starpu_profiling_worker_helper_display_summary() at the same time. \verbatim $ ./examples/cholesky/cholesky_tag Computation took (in ms) 518.16 Synthetic GFlops : 44.21 #--------------------- MSI cache stats : TOTAL MSI stats hit 1622 (66.23 %) miss 827 (33.77 %) ... \endverbatim \verbatim $ STARPU_STATS=0 ./examples/cholesky/cholesky_tag Computation took (in ms) 518.16 Synthetic GFlop/s : 44.21 \endverbatim // TODO: data transfer stats are similar to the ones displayed when // setting STARPU_BUS_STATS \section TraceMpi Tracing MPI applications When an MPI execution is traced, especially if the execution is on several nodes, clock synchronization issues can appear. One may notice them mainly on communications (they are received before they are sent, for instance). Each processor can call the function starpu_profiling_set_id() to set the ID used for the profiling trace filename. This function can be useful when executing an MPI program on several nodes, as it enables each processor to set a unique ID that helps to differentiate its trace file from the files generated by other processors. By doing this, it becomes easier to analyze and compare the profiling results of each processor separately, which is particularly helpful for large-scale parallel applications. By default, StarPU does two MPI barriers with all MPI processes: one at the beginning of the application execution and one at the end. Then, \c starpu_fxt_tool considers all processes leave the barriers at the exact same time, which makes two points for time synchronization between MPI processes. However, a simple MPI barrier can be not precise enough, because the assumption _all processes leave the barriers at the exact same time_ is in reality false. To have a more precise barrier, one may use the [mpi_sync_clocks library](https://gitlab.inria.fr/pm2/pm2/-/tree/master/mpi_sync_clocks) (automatically provided when StarPU is built with NewMadeleine, but it can also be used with other MPI libraries). It provides a *synchronized* barrier, which aims at actually releasing all processes at the exact same time. Unfortunately, the gained precision costs some time (several seconds per barrier), that is why one can disable this precise synchronization with the environment variable \ref STARPU_MPI_TRACE_SYNC_CLOCKS set to \c 0, and use the faster MPI barrier instead. \section VerboseTraces Verbose Traces Traces can also be inspected by hand by using the tool fxt_print, for instance: \verbatim $ fxt_print -o -f /tmp/prof_file_something \endverbatim Timings are in nanoseconds (while timings as seen in ViTE are in milliseconds). */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_performances/online_performance_tools.doxy000066400000000000000000001415571507764646700321340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page OnlinePerformanceTools Online Performance Tools \section On-linePerformanceFeedback On-line Performance Feedback Some examples which apply online performance monitoring are in the directory tests/perfmodels/ \subsection EnablingOn-linePerformanceMonitoring Enabling On-line Performance Monitoring In order to enable online performance monitoring, the application can call starpu_profiling_status_set() with the parameter ::STARPU_PROFILING_ENABLE. It is possible to detect whether monitoring is already enabled or not by calling starpu_profiling_status_get(). Enabling monitoring also reinitialize all previously collected feedback. The environment variable \ref STARPU_PROFILING can also be set to 1 to achieve the same effect. The function starpu_profiling_init() can also be called during the execution to reinitialize performance counters and to start the profiling if the environment variable \ref STARPU_PROFILING is set to 1. Likewise, performance monitoring is stopped by calling starpu_profiling_status_set() with the parameter ::STARPU_PROFILING_DISABLE. Note that this does not reset the performance counters so that the application may consult them later on. More details about the performance monitoring API are available in \ref API_Profiling. \subsection Per-taskFeedback Per-task Feedback If profiling is enabled, a pointer to a structure starpu_profiling_task_info is put in the field starpu_task::profiling_info when a task terminates. This structure is automatically destroyed when the task structure is destroyed, either automatically or by calling starpu_task_destroy(). The structure starpu_profiling_task_info indicates the date when the task was submitted (starpu_profiling_task_info::submit_time), started (starpu_profiling_task_info::start_time), and terminated (starpu_profiling_task_info::end_time), relative to the initialization of StarPU with starpu_init(). User can call starpu_timing_timespec_delay_us() to calculate the time elapsed between start time and end time in microseconds. It also specifies the identifier of the worker that has executed the task (starpu_profiling_task_info::workerid). These dates are stored as timespec structures which users may convert into micro-seconds using the helper function starpu_timing_timespec_to_us(). User can call starpu_worker_get_current_task_exp_end() to get the date when the current task is expected to be finished. When \ref ::STARPU_ENERGY_PROFILING is enabled, starpu_profiling_task_info::energy_consumed, provides the amount of Joules used by the task. It is worth noting that the application may directly access this structure from the callback executed at the end of the task. The structure starpu_task associated to the callback currently being executed is indeed accessible with the function starpu_task_get_current(). \subsection Per-codeletFeedback Per-codelet Feedback The field starpu_codelet::per_worker_stats is an array of counters. Unless the \ref STARPU_CODELET_PROFILING environment variable was set to 0, the i-th entry of the array is incremented every time a task implementing the codelet is executed on the i-th worker. This array is not reinitialized when profiling is enabled or disabled. The function starpu_codelet_display_stats() can be used to display the execution statistics of a specific codelet. \subsection Per-workerFeedback Per-worker Feedback The second argument returned by the function starpu_profiling_worker_get_info() is a structure starpu_profiling_worker_info that gives statistics about the specified worker. This structure specifies: - In starpu_profiling_worker_info::start_time, when StarPU started collecting profiling information for that worker. - In starpu_profiling_worker_info::total_time, the duration of the profiling measurement interval. - In starpu_profiling_worker_info::executed_tasks, the number of tasks that were executed while profiling was enabled. It also specifies how much time was spent in various states (executing a task, executing a callback, waiting for a data transfer to complete, etc.). Since these can happen at the same time (waiting for a data transfer while executing the previous tasks, and scheduling the next task), we provide two views. Firstly, the "all" view: - In starpu_profiling_worker_info::all_executing_time, the time spent executing kernels, thus real useful work. - In starpu_profiling_worker_info::all_callback_time, the time spent executing application callbacks. - In starpu_profiling_worker_info::all_waiting_time, the time spent waiting for data transfers. - In starpu_profiling_worker_info::all_sleeping_time, the time spent during which there was no task to be executed, i.e. lack of parallelism. - In starpu_profiling_worker_info::all_scheduling_time, the time spent scheduling tasks. But these times overlap, notably with GPUs the schedulers runs while tasks are getting executed. Another view is the "split" view, which eliminates the overlapping, by considering for instance that it does not matter what is happening while tasks are getting executed, that should be accounted for "executing" time, and e.g. only the scheduling periods that happen while no task is getting executed should be accounted in "scheduling" time. More precisely: - In starpu_profiling_worker_info::executing_time, the time spent executing kernels, normally equal to starpu_profiling_worker_info::all_executing_time. - In starpu_profiling_worker_info::callback_time, the time spent executing application callbacks while not executing a task. - In starpu_profiling_worker_info::waiting_time, the time spent waiting for data transfers while not executing a task or a callback. - In starpu_profiling_worker_info::sleeping_time, the time spent during which there was no task to be executed and not executing a task or a callback or waiting for a data transfer, i.e. real lack of parallelism. - In starpu_profiling_worker_info::scheduling_time, the time spent scheduling tasks while not executing a task or a callback or waiting for a data transfer to finish, and there are tasks to be scheduled. This thus provides a split of the starpu_profiling_worker_info::total_time into various states. The difference between starpu_profiling_worker_info::total_time and the sum of this split is the remaining uncategorized overhead of the runtime. Calling starpu_profiling_worker_get_info() resets the profiling information associated to a worker. To easily display all this information, the environment variable \ref STARPU_WORKER_STATS can be set to 1 (in addition to setting \ref STARPU_PROFILING to 1). A summary will then be displayed at program termination. To display the summary in a file instead of the standard error stream, use the environment variable \ref STARPU_WORKER_STATS_FILE. \verbatim Worker stats: CUDA 0.0 (Tesla M2075 4.7 GiB 03:00.0) 133 task(s) time split: total 3212.86 ms = executing: 1588.56 ms + callback: 2.95 ms + waiting: 5.34 ms + sleeping: 1613.67 ms + scheduling: 0.01 ms + overhead 2.33 ms all time: executing: 1588.56 ms callback: 2.95 ms waiting: 22.83 ms sleeping: 1725.93 ms scheduling: 1726.88 ms 286.388333 GFlop/s CPU 0 10 task(s) time split: total 3212.89 ms = executing: 2117.19 ms + callback: 0.23 ms + waiting: 0.01 ms + sleeping: 1095.06 ms + scheduling: 0.02 ms + overhead 0.37 ms all time: executing: 2117.19 ms callback: 0.23 ms waiting: 0.01 ms sleeping: 1095.06 ms scheduling: 283.86 ms 22.029695 GFlop/s CPU 1 10 task(s) time split: total 3212.92 ms = executing: 2116.18 ms + callback: 0.17 ms + waiting: 0.01 ms + sleeping: 1096.10 ms + scheduling: 0.02 ms + overhead 0.44 ms all time: executing: 2116.18 ms callback: 0.17 ms waiting: 0.01 ms sleeping: 1096.10 ms scheduling: 284.40 ms 22.029487 GFlop/s CPU 2 10 task(s) time split: total 3212.94 ms = executing: 2116.08 ms + callback: 0.18 ms + waiting: 0.01 ms + sleeping: 1096.21 ms + scheduling: 0.02 ms + overhead 0.44 ms all time: executing: 2116.08 ms callback: 0.18 ms waiting: 0.01 ms sleeping: 1096.21 ms scheduling: 283.75 ms 22.029343 GFlop/s Global time split: total 12851.60 ms = executing: 7938.01 ms (61.77%) + callback: 3.53 ms (0.03%) + waiting: 5.36 ms (0.04%) + sleeping: 4901.05 ms (38.14%) + scheduling: 0.06 ms (0.00%) + overhead 3.59 ms (0.03%) \endverbatim The number of GFlops/s is available because the starpu_task::flops field of the tasks were filled (or \ref STARPU_FLOPS used in starpu_task_insert()). When an FxT trace is generated (see \ref GeneratingTracesWithFxT), it is also possible to use the tool starpu_workers_activity (see \ref MonitoringActivity) to generate a graphic showing the evolution of these values during the time, for the different workers. \subsection Bus-relatedFeedback Bus-related Feedback // how to enable/disable performance monitoring // what kind of information do we get ? The bus speed measured by StarPU can be displayed by using the tool starpu_machine_display, for instance: \verbatim StarPU has found: 3 CUDA devices CUDA 0 (Tesla C2050 02:00.0) CUDA 1 (Tesla C2050 03:00.0) CUDA 2 (Tesla C2050 84:00.0) from to RAM to CUDA 0 to CUDA 1 to CUDA 2 RAM 0.000000 5176.530428 5176.492994 5191.710722 CUDA 0 4523.732446 0.000000 2414.074751 2417.379201 CUDA 1 4523.718152 2414.078822 0.000000 2417.375119 CUDA 2 4534.229519 2417.069025 2417.060863 0.000000 \endverbatim Statistics about the data transfers which were performed and temporal average of bandwidth usage can be obtained by setting the environment variable \ref STARPU_BUS_STATS to 1; a summary will then be displayed at program termination. To display the summary in a file instead of the standard error stream, use the environment variable \ref STARPU_BUS_STATS_FILE. \verbatim Data transfer stats: RAM 0 -> CUDA 0 319.92 MB 213.10 MB/s (transfers : 91 - avg 3.52 MB) CUDA 0 -> RAM 0 214.45 MB 142.85 MB/s (transfers : 61 - avg 3.52 MB) RAM 0 -> CUDA 1 302.34 MB 201.39 MB/s (transfers : 86 - avg 3.52 MB) CUDA 1 -> RAM 0 133.59 MB 88.99 MB/s (transfers : 38 - avg 3.52 MB) CUDA 0 -> CUDA 1 144.14 MB 96.01 MB/s (transfers : 41 - avg 3.52 MB) CUDA 1 -> CUDA 0 130.08 MB 86.64 MB/s (transfers : 37 - avg 3.52 MB) RAM 0 -> CUDA 2 312.89 MB 208.42 MB/s (transfers : 89 - avg 3.52 MB) CUDA 2 -> RAM 0 133.59 MB 88.99 MB/s (transfers : 38 - avg 3.52 MB) CUDA 0 -> CUDA 2 151.17 MB 100.69 MB/s (transfers : 43 - avg 3.52 MB) CUDA 2 -> CUDA 0 105.47 MB 70.25 MB/s (transfers : 30 - avg 3.52 MB) CUDA 1 -> CUDA 2 175.78 MB 117.09 MB/s (transfers : 50 - avg 3.52 MB) CUDA 2 -> CUDA 1 203.91 MB 135.82 MB/s (transfers : 58 - avg 3.52 MB) Total transfers: 2.27 GB \endverbatim \subsection MPI-relatedFeedback MPI-related Feedback Statistics about the data transfers which were performed over MPI can be obtained by setting the environment variable \ref STARPU_MPI_STATS to 1; a summary will then be displayed at program termination: \verbatim [starpu_comm_stats][1] TOTAL: 456.000000 B 0.000435 MB 0.000188 B/s 0.000000 MB/s [starpu_comm_stats][1:0] 456.000000 B 0.000435 MB 0.000188 B/s 0.000000 MB/s [starpu_comm_stats][0] TOTAL: 456.000000 B 0.000435 MB 0.000188 B/s 0.000000 MB/s [starpu_comm_stats][0:1] 456.000000 B 0.000435 MB 0.000188 B/s 0.000000 MB/s \endverbatim These statistics can be plotted as heatmaps using StarPU tool starpu_mpi_comm_matrix.py (see \ref MPIDebug). \section TaskAndWorkerProfiling Task And Worker Profiling A full example showing how to use the profiling API is available in the StarPU sources in the directory examples/profiling/. \code{.c} struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->synchronous = 1; /* We will destroy the task structure by hand so that we can * query the profiling info before the task is destroyed. */ task->destroy = 0; /* Submit and wait for completion (since synchronous was set to 1) */ starpu_task_submit(task); /* The task is finished, get profiling information */ struct starpu_profiling_task_info *info = task->profiling_info; /* How much time did it take before the task started ? */ double delay += starpu_timing_timespec_delay_us(&info->submit_time, &info->start_time); /* How long was the task execution ? */ double length += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); /* We no longer need the task structure */ starpu_task_destroy(task); \endcode \code{.c} /* Display the occupancy of all workers during the test */ int worker; for (worker = 0; worker < starpu_worker_get_count(); worker++) { struct starpu_profiling_worker_info worker_info; int ret = starpu_profiling_worker_get_info(worker, &worker_info); STARPU_ASSERT(!ret); double total_time = starpu_timing_timespec_to_us(&worker_info.total_time); double executing_time = starpu_timing_timespec_to_us(&worker_info.executing_time); double sleeping_time = starpu_timing_timespec_to_us(&worker_info.sleeping_time); double overhead_time = total_time - executing_time - sleeping_time; float executing_ratio = 100.0*executing_time/total_time; float sleeping_ratio = 100.0*sleeping_time/total_time; float overhead_ratio = 100.0 - executing_ratio - sleeping_ratio; char workername[128]; starpu_worker_get_name(worker, workername, 128); fprintf(stderr, "Worker %s:\n", workername); fprintf(stderr, "\ttotal time: %.2lf ms\n", total_time*1e-3); fprintf(stderr, "\texec time: %.2lf ms (%.2f %%)\n", executing_time*1e-3, executing_ratio); fprintf(stderr, "\tblocked time: %.2lf ms (%.2f %%)\n", sleeping_time*1e-3, sleeping_ratio); fprintf(stderr, "\toverhead time: %.2lf ms (%.2f %%)\n", overhead_time*1e-3, overhead_ratio); } \endcode \section PerformanceModelExample Performance Model Example To achieve good scheduling, StarPU scheduling policies need to be able to estimate in advance the duration of a task. This is done by giving to codelets a performance model, by defining a structure starpu_perfmodel and providing its address in the field starpu_codelet::model. The fields starpu_perfmodel::symbol and starpu_perfmodel::type are mandatory, to give a name to the model, and the type of the model, since there are several kinds of performance models. Then starpu_task_get_model_name() can be called to retrieve the name of the performance model associated with a task. For compatibility, make sure to initialize the whole structure to zero, either by using explicit memset(), or by letting the compiler implicitly do it as examplified below.
    • Measured at runtime (model type ::STARPU_HISTORY_BASED). This assumes that for a given set of data input/output sizes, the performance will always be about the same. This is very true for regular kernels on GPUs for instance (<0.1% error), and just a bit less true on CPUs (~=1% error). This also assumes that there are few different sets of data input/output sizes. StarPU will then keep record of the average time of previous executions on the various processing units, and use it as an estimation. History is done per task size, by using a hash of the input and output sizes as an index. It will also save it in $STARPU_HOME/.starpu/sampling/codelets for further executions, and can be observed by using the tool starpu_perfmodel_display, or drawn by using the tool starpu_perfmodel_plot (\ref PerformanceModelCalibration). The models are indexed by machine name. To share the models between machines (e.g. for a homogeneous cluster), use export STARPU_HOSTNAME=some_global_name. Measurements are only done when using a task scheduler which makes use of it, such as dmda. Measurements can also be provided explicitly by the application, by using the function starpu_perfmodel_update_history(). An example is in the file tests/perfmodels/feed.c. The following is a small code example. If e.g. the code is recompiled with other compilation options, or several variants of the code are used, the symbol string should be changed to reflect that, in order to recalibrate a new model from zero. The symbol string can even be constructed dynamically at execution time, as long as this is done before submitting any task using it. \code{.c} static struct starpu_perfmodel mult_perf_model = { .type = STARPU_HISTORY_BASED, .symbol = "mult_perf_model" }; struct starpu_codelet cl = { .cpu_funcs = { cpu_mult }, .cpu_funcs_name = { "cpu_mult" }, .nbuffers = 3, .modes = { STARPU_R, STARPU_R, STARPU_W }, /* for the scheduling policy to be able to use performance models */ .model = &mult_perf_model }; \endcode
    • Measured at runtime and refined by regression (model types ::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED). This still assumes performance regularity, but works with various data input sizes, by applying regression over observed execution times. ::STARPU_REGRESSION_BASED uses an a*n^b regression form, ::STARPU_NL_REGRESSION_BASED uses an a*n^b+c (more precise than ::STARPU_REGRESSION_BASED, but costs a lot more to compute). For instance, tests/perfmodels/regression_based.c uses a regression-based performance model for the function \c memset(). Of course, the application has to issue tasks with varying size so that the regression can be computed. StarPU will not trust the regression unless there is at least 10% difference between the minimum and maximum observed input size. It can be useful to set the environment variable \ref STARPU_CALIBRATE to 1 and run the application on varying input sizes with \ref STARPU_SCHED set to dmda scheduler, to feed the performance model for a variety of inputs. The application can also provide the measurements explicitly by using the function starpu_perfmodel_update_history(). The tools starpu_perfmodel_display and starpu_perfmodel_plot can be used to observe how much the performance model is calibrated (\ref PerformanceModelCalibration); when their output looks good, \ref STARPU_CALIBRATE can be reset to 0 to let StarPU use the resulting performance model without recording new measures, and \ref STARPU_SCHED can be set to dmda to benefit from the performance models. If the data input sizes vary a lot, it is really important to set \ref STARPU_CALIBRATE to 0, otherwise StarPU will continue adding the measures, and result with a very big performance model, which will take time a lot of time to load and save. For non-linear regression, since computing it is quite expensive, it is only done at termination of the application. This means that the first execution of the application will use only history-based performance model to perform scheduling, without using regression.
    • Another type of model is ::STARPU_MULTIPLE_REGRESSION_BASED, which is based on multiple linear regression. In this model, users define both the relevant parameters and the equation for computing the task duration. \f[ T_{kernel} = a + b(M^{\alpha_1} * N^{\beta_1} * K^{\gamma_1}) + c(M^{\alpha_2} * N^{\beta_2} * K^{\gamma_2}) + ... \f] \f$M, N, K\f$ are the parameters of the task, added at the task creation. These need to be extracted by the cl_perf_func function, which should be defined by users. \f$\alpha, \beta, \gamma\f$ are the exponents defined by users in model->combinations table. Finally, coefficients \f$a, b, c\f$ are computed automatically by the StarPU at the end of the execution, using least squares method of the dgels_ LAPACK function. examples/mlr/mlr.c example provides more details on the usage of ::STARPU_MULTIPLE_REGRESSION_BASED models. The \ref enable-mlr "--enable-mlr" configure option needs to be set to calibrate the model. Coefficients computation is done at the end of the execution, and the results are stored in standard codelet perfmodel files. Additional files containing the duration of tasks together with the value of each parameter are stored in .starpu/sampling/codelets/tmp/ directory. These files are reused when \ref STARPU_CALIBRATE environment variable is set to 1, to recompute coefficients based on the current, but also on the previous executions. By default, StarPU uses a lightweight dgels implementation, but the \ref enable-mlr-system-blas "--enable-mlr-system-blas" configure option can be used to make StarPU use a system-provided dgels BLAS. Additionally, when multiple linear regression models are not enabled through \ref enable-mlr "--enable-mlr" or when the model->combinations are not defined, StarPU will still write output files into .starpu/sampling/codelets/tmp/ to allow performing an analysis. This analysis typically aims at finding the most appropriate equation for the codelet and tools/starpu_mlr_analysis script provides an example of how to perform such study.
    • Provided as an estimation from the application itself (model type ::STARPU_COMMON and field starpu_perfmodel::cost_function), see for instance examples/common/blas_model.h and examples/common/blas_model.c.
    • Provided explicitly by the application (model type ::STARPU_PER_ARCH): either field starpu_perfmodel::arch_cost_function, or the fields .per_arch[arch][nimpl].cost_function have to be filled with pointers to functions which return the expected duration of the task in micro-seconds, one per architecture, see for instance tests/datawizard/locality.c
    • Provided explicitly by the application (model type ::STARPU_PER_WORKER) similarly with the starpu_perfmodel::worker_cost_function field.
    For ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, and ::STARPU_NL_REGRESSION_BASED, the dimensions of task data (both input and output) are used as an index by default. ::STARPU_HISTORY_BASED uses a CRC hash of the dimensions as an index to distinguish histories, and ::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED use the total size as an index for the regression. (Data marked with ::STARPU_NOFOOTPRINT are not taken into account). The starpu_perfmodel::size_base and starpu_perfmodel::footprint fields however permit the application to override that, when for instance some of the data do not matter for task cost (e.g. mere reference table), or when using sparse structures (in which case it is the number of non-zeros which matter), or when there is some hidden parameter such as the number of iterations, or when the application actually has a very good idea of the complexity of the algorithm, and just not the speed of the processor, etc. The example in the directory examples/pi uses this to include the number of iterations in the base size. starpu_perfmodel::size_base should be used when the variance of the actual performance is known (i.e. bigger return value is longer execution time), and thus particularly useful for ::STARPU_REGRESSION_BASED or ::STARPU_NL_REGRESSION_BASED. starpu_perfmodel::footprint can be used when the variance of the actual performance is unknown (irregular performance behavior, etc.), and thus only useful for ::STARPU_HISTORY_BASED. starpu_task_data_footprint() can be used as a base and combined with other parameters through starpu_hash_crc32c_be() for instance. StarPU will automatically determine when the performance model is calibrated, or rather, it will assume the performance model is calibrated until the application submits a task for which the performance can not be predicted. For ::STARPU_HISTORY_BASED, StarPU will require 10 (STARPU_CALIBRATE_MINIMUM) measurements for a given size before estimating that an average can be taken as estimation for further executions with the same size. For ::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED, StarPU will require 10 (STARPU_CALIBRATE_MINIMUM) measurements, and that the minimum measured data size is smaller than 90% of the maximum measured data size (i.e. the measurement interval is large enough for a regression to have a meaning). Calibration can also be forced by setting the \ref STARPU_CALIBRATE environment variable to 1, or even reset by setting it to 2. How to use schedulers which can benefit from such performance model is explained in \ref TaskSchedulingPolicy. The same can be done for task energy consumption estimation, by setting the field starpu_codelet::energy_model the same way as the field starpu_codelet::model. Note: for now, the application has to give to the energy consumption performance model a name which is different from the execution time performance model. The application can request time estimations from the StarPU performance models by filling a task structure as usual without actually submitting it. The data handles can be created by calling any of the functions starpu_*_data_register with a NULL pointer and -1 node and the desired data sizes, and need to be unregistered as usual. The functions starpu_task_expected_length() and starpu_task_expected_energy() can then be called to get an estimation of the task cost on a given arch. starpu_task_footprint() can also be used to get the footprint used for indexing history-based performance models. starpu_task_destroy() needs to be called to destroy the dummy task afterwards. See tests/perfmodels/regression_based.c for an example. The application can also request an on-the-fly XML report of the performance model, by calling starpu_perfmodel_dump_xml() to print the report to a FILE*. \section PerformanceMonitoringCounters Performance Monitoring Counters This section presents the StarPU performance monitoring framework. It summarizes the objectives of the framework. It then introduces the entities involved in the framework. It presents the API of the framework, as well as some implementation details. It exposes the typical sequence of operations to plug an external tool to monitor a performance counter of StarPU. \subsection PerfMonCountObjectives Objectives The objectives of this framework are to let external tools interface with StarPU to collect various performance metrics at runtime, in a generic, safe, extensible way. For that, it enables such tools to discover the available performance metrics in a particular StarPU build, as well as the type of each performance counter value. It lets these tools build sets of performance counters to monitor, and then register listener callbacks to collect the measurement samples of these sets of performance counters at runtime. \subsection PerfMonCountEntities Entities The performance monitoring framework is built on a series of concepts and items, organized consistently. The corresponding C language objects should be considered opaque by external tools, and should only be manipulated through proper function calls and accessors. \subsubsection PerfMonCountCounter Performance Counter The performance counter entity is the fundamental object of the framework, representing one piece of performance metrics, such as for instance the total number of tasks submitted so far, that is exported by StarPU and can be collected through the framework at runtime. A performance counter has a type and belongs to a scope. A performance counter is designated by a unique name and unique ID integer. We can start or stop collecting performance counter values by using starpu_perf_counter_collection_start() and starpu_perf_counter_collection_stop(). \subsubsection PerfMonCountCounterType Performance Counter Type A performance counter has a type. A type is designated by a unique name and unique ID number. Currently, supported types include: Type Name|Type Definition ---------|-------------------------------------- "int32" |32-bit signed integers "int64" |64-bit signed integers "float" |32-bit single-precision floating point "double" |64-bit double-precision floating point \subsubsection PerfMonCountCounterScope Performance Counter Scope A performance counter belongs to a scope. The scope of a counter defines the context considered for computing the corresponding performance counter. A scope is designated with a unique name and unique ID number. Currently, defined scopes include: Scope Name |Scope Definition -------------|---------------------------------------------- "global" |Counter is global to the StarPU instance "per_worker" |Counter is within the scope of a thread worker "per_codelet"|Counter is within the scope of a task codelet \subsubsection PerfMonCountCounterSet Performance Counter Set A performance counter set is a subset of the performance counters belonging to the same scope. Each counter of the scope can be in the enabled or disabled state in a performance counter set. A performance counter set enables a performance monitoring tool to indicate the set of counters to be collected for a particular listener callback. \subsubsection PerfMonCountCounterSample Performance Counter Sample A performance counter sample corresponds to one sample of collected measurement values of a performance counter set. Only the values corresponding to enabled counters in the sample's counter set should be observed by the listener callback. Whether the sample contains valid values for counters disabled in the set is unspecified. \subsubsection PerfMonCountCounterListener Performance Counter Listener A performance counter listener is a callback function registered by some external tool to monitor a set of performance counters in a particular scope. It is called each time a new performance counter sample is ready to be observed. The sample object should not be accessed outside the callback. \subsubsection PerfMonCountCounterAPI Application Programming Interface The API of the performance monitoring framework is defined in the \ref starpu_perf_monitoring.h public header file of StarPU. This header file is automatically included with \ref starpu.h. An example of use of the routines is given in \ref PerfMonCountCounterSequence. \subsection PerfMonCountCounterImplementation Implementation Details \subsubsection PerfMonCountCounterImplRegistration Performance Counter Registration Each module of StarPU can export performance counters. In order to do so, modules that need to export some counters define a registration function that is called at StarPU initialization time. This function is responsible for calling the "_starpu_perf_counter_register()" function once for each counter it exports, to let the framework know about the list of counters managed by the module. It also registers performance sample updater callbacks for the module, one for each scope for which it exports counters. \subsubsection PerfMonCountCounterImplUpdaters Performance Sample Updaters The updater callback for a module and scope combination is internally called every time a sample for a set of performance counter must be updated. Thus, the updated callback is responsible for filling the sample's selected counters with the counter values found at the time of the call. Global updaters are currently called at task submission time, as well as any blocking tasks management function of the StarPU API, such as starpu_task_wait_for_all(), which waits for the completion of all tasks submitted up to this point. Per-worker updaters are currently called at the level of StarPU's drivers, that is, the modules in charge of task execution of hardware-specific worker threads. The actual calls occur in-between the execution of tasks. Per-codelet updaters are currently called both at task submission time, and at the level of StarPU's drivers together with the per-worker updaters. A performance sample object is locked during the sample collection. The locking prevents the following issues:
    • The listener of sample being changed during sample collection;
    • The set of counters enabled for a sample being changed;
    • Conflicting concurrent updates;
    • Updates while the sample is being read by the listener.
    The location of the updaters' calls is chosen to minimize the sequentialization effect of the locking, in order to limit the level of interference of the monitoring process. For Global updaters, the calls are performed only on the application thread(s) in charge of submitting tasks. Since, in most cases, only a single application thread submits tasks, the sequentialization effect is moderate. Per-worker updates are local to their worker, thus here again the sample lock is un-contented, unless the external monitoring tool frequently changes the set of enabled counters in the sample. \subsubsection PerfMonCountCounterImplOperations Counter operations In practice, the sample updaters only take snapshots of the actual performance counters. The performance counters themselves are updated with ad-hoc procedures depending on each counter. Such procedures typically involve atomic operations. While operations such as atomic increments or decrements on integer values are readily available, this is not the case for more complex operations such as min/max for computing peak value counters (for instance in the global and per-codelet counters for peak number of submitted tasks and peak number of ready tasks waiting for execution), and this is also not the case for computations on floating point data (used for instance in computing cumulated execution time of tasks, either per worker or per codelet). The performance monitoring framework therefore supplies such missing routines, for the internal use of StarPU. \subsubsection PerfMonCountCounterImplRuntime Runtime checks The performance monitoring framework features a comprehensive set of runtime checks to verify that both StarPU and some external tool do not access a performance counter with the wrong typed routines, to quickly detect situations of mismatch that can result from the evolution of multiple pieces of software at distinct paces. Moreover, no StarPU data structure is accessed directly, either by the external code making use of the performance monitoring framework. The use of the C enum constants is optional; referring to values through constant strings is available when more robustness is desired. These runtime checks enable the framework to be extensible. Moreover, while the framework's counters currently are permanently compiled in, they could be made optional at compile time, for instance to suppress any overhead once the analysis and optimization process has been completed by the programmer. Thanks to the runtime discovery of available counters, the applicative code, or an intermediate layer such as skeleton layer acting on its behalf, would then be able to adapt to performance analysis builds versus optimized builds. \subsection PerfMonCountCounterExported Exported Counters \subsubsection PerfMonCountCounterExportedGlobal Global Scope Counter Name |Counter Definition ---------------------------------|-------------------------------------------------------------------------------------- \c starpu.task.g_total_submitted |Total number of tasks submitted \c starpu.task.g_peak_submitted |Maximum number of tasks submitted, waiting for dependencies resolution at any time \c starpu.task.g_peak_ready |Maximum number of tasks ready for execution, waiting for an execution slot at any time \subsubsection PerfMonCountCounterExportedPerWorker Per-worker Scope Counter Name |Counter Definition --------------------------------------|------------------------------------------------------------ \c starpu.task.w_total_executed |Total number of tasks executed on a given worker \c starpu.task.w_cumul_execution_time |Cumulated execution time of tasks executed on a given worker \subsubsection PerfMonCountCounterExportedPerCodelet Per-Codelet Scope Counter Name |Counter Definition --------------------------------------|----------------------------------------------------------------------------------------------------- \c starpu.task.c_total_submitted |Total number of submitted tasks for a given codelet \c starpu.task.c_peak_submitted |Maximum number of submitted tasks for a given codelet waiting for dependencies resolution at any time \c starpu.task.c_peak_ready |Maximum number of ready tasks for a given codelet waiting for an execution slot at any time \c starpu.task.c_total_executed |Total number of executed tasks for a given codelet \c starpu.task.c_cumul_execution_time |Cumulated execution time of tasks for a given codelet \subsection PerfMonCountCounterSequence Sequence of operations This section presents a typical sequence of operations to interface an external tool with some StarPU performance counters. In this example, the counters monitored are the per-worker total number of executed tasks (\c starpu.task.w_total_executed) and the tasks' cumulated execution time (\c starpu.task.w_cumul_execution_time). Step 0: Initialize StarPU StarPU must first be initialized, by a call to starpu_init(), for performance counters to become available, since each module of StarPU registers the performance counters it exports during that initialization phase. \code{.c} int ret = starpu_init(NULL); \endcode Step 1: Allocate a counter set A counter set has to be allocated on the per-worker scope. The per-worker scope id can be obtained by name, or with the pre-defined enum value ::starpu_perf_counter_scope_per_worker. \code{.c} enum starpu_perf_counter_scope w_scope = starpu_perf_counter_scope_per_worker; struct starpu_perf_counter_set *w_set = starpu_perf_counter_set_alloc(w_scope); \endcode Step 2: Get the counter IDs Each performance counter has a unique ID used to refer to it in subsequent calls to the performance monitoring framework. \code{.c} int id_w_total_executed = starpu_perf_counter_name_to_id(w_scope, "starpu.task.w_total_executed"); int id_w_cumul_execution_time = starpu_perf_counter_name_to_id(w_scope, "starpu.task.w_cumul_execution_time"); \endcode Step 3: Enable the counters in the counter set This step indicates which counters will be collected into performance monitoring samples for the listeners referring to this counter set. \code{.c} starpu_perf_counter_set_enable_id(w_set, id_w_total_executed); starpu_perf_counter_set_enable_id(w_set, id_w_cumul_execution_time); \endcode Step 4: Write a listener callback This callback will be triggered when a sample becomes available. Upon execution, it reads the values for the two counters from the sample and displays these values, for the sake of the example. \code{.c} void w_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context) { int32_t w_total_executed = starpu_perf_counter_sample_get_int32_value(sample, id_w_total_executed); double w_cumul_execution_time = starpu_perf_counter_sample_get_double_value(sample, id_w_cumul_execution_time); printf("worker[%d]: w_total_executed = %d, w_cumul_execution_time = %lf\n", starpu_worker_get_id(), w_total_executed, w_cumul_execution_time); } \endcode Step 5: Initialize the listener This step allocates the listener structure and prepares it to listen to the selected set of per-worker counters. However, it is not actually active until Step 6, once it is attached to one or more worker. \code{.c} struct starpu_perf_counter_listener * w_listener = starpu_perf_counter_listener_init(w_set, w_listener_cb, NULL); \endcode Step 6: Set the listener on all workers This step actually makes the listener active, in this case on every StarPU worker thread. \code{.c} starpu_perf_counter_set_all_per_worker_listeners(w_listener); \endcode After this step, any task assigned to a worker will be counted in that worker selected performance counters, and reported to the listener. \section PerfKnobs Performance Steering Knobs This section presents the StarPU performance steering framework. It summarizes the objectives of the framework. It introduces the entities involved in the framework, and then details the API, implementation and sequence of operations. \subsection PerfKnobsObjectives Objectives The objectives of this framework are to let external tools interface with StarPU, observe, and act at runtime on actionable performance steering knobs exported by StarPU, in a generic, safe, extensible way. It defines an API to let such external tools discover the available performance steering knobs in a particular StarPU revision of build, as well as the type of each knob. \subsection PerfKnobsEntities Entities \subsubsection PerfKnobsEntitiesKnob Performance Steering Knob The performance steering knob entity designates one runtime-actionable knob exported by StarPU. It may represent some setting, or some constant used within StarPU for a given purpose. The value of the knob is typed, it can be obtained or modified with the appropriate getter/setter routine. The knob belongs to a scope. A performance steering knob is designated with a unique name and unique ID number. \subsubsection PerfKnobsEntitiesKnobType Knob Type A performance steering knob has a type. A type is designated by a unique name and unique ID number. Currently, supported types include: Type Name |Type Definition ----------|-------------------------------------- "int32" |32-bit signed integers "int64" |64-bit signed integers "float" |32-bit single precision floating point "double" |64-bit double precision floating point On/Off knobs are defined as "int32" type, with value 0 for Off and value !0 for On, unless otherwise specified. \subsubsection PerfKnobsEntitiesKnobScope Knob Scope A performance steering knob belongs to a scope. The scope of a knob defines the context considered for computing the corresponding knob. A scope is designated with a unique name and unique ID number. Currently, defined scopes include: Scope Name |Scope Definition ----------------|-------------------------------------------------------- "global" |Knob is global to the StarPU instance "per_worker" |Knob is within the scope of a thread worker "per_scheduler" |Knob is within the scope of a scheduling policy instance \subsubsection PerfKnobsEntitiesKnobGroup Knob Group The notion of Performance Steering Knob Group is currently internal to StarPU. It defines a series of knobs that are handled by the same couple of setter/getter functions internally. A knob group belongs to a knob scope. \subsection PerfKnobsAPI Application Programming Interface The API is defined in the \ref starpu_perf_steering.h public header file of StarPU. This header file is automatically included with \ref starpu.h. \subsection PerfKnobsImpl Implementation Details While the APIs of the monitoring and the steering frameworks share a similar design philosophy, the internals are significantly different. Since the effect of the steering knobs varies widely, there is no global locking scheme in place shared for all knobs. Instead, each knob gets its own procedures to get the value of a setting, or change it. To prevent code duplication, some related knobs may share getter/setter routines as knob groups. The steering framework does not involve callback routines. Knob get operations proceed immediately, except for the possible delay in getting access to the knob value. Knob set operations also proceed immediately, not counting the exclusive access time, though their action result may be observed with some latency, depending on the knob and on the current workload. For instance, acting on a per-worker \c starpu.worker.w_enable_worker_knob to disable a worker thread may be observed only after the corresponding worker's assigned task queue becomes empty, since its actual effect is to prevent additional tasks to be queued to the worker, and not to migrate already queued tasks to another worker. Such design choices aim at providing a compromise between offering some steering capabilities and keeping the cost of supporting such steering capabilities to an acceptable level. The framework is designed to be easily extensible. At StarPU initialization time, the framework calls initialization functions if StarPU modules to initialize the set of knobs they export. Knob get/set accessors can be shared among multiple knobs in a knob group. Thus, exporting a new knob is basically a matter of declaring it at initialization time, by specifying its name and value type, and either add its handling to an existing getter/setter pair of accessors in a knob group, or create a new group. As the performance monitoring framework, the performance steering framework is currently permanently enabled, but could be made optional at compile-time to separate testing builds from production builds. \subsection PerfKnobsExported Exported Steering Knobs \subsubsection PerfKnobsExportedGlobal Global Scope Knob Name |Knob Definition --------------------------------------------|---------------------------------------------------- \c starpu.global.g_calibrate_knob |Enable/disable the calibration of performance models \c starpu.global.g_enable_catch_signal_knob |Enable/disable the catching of UNIX signals \subsubsection PerfKnobsExportedPerWorker Per-worker Scope Knob Name |Knob Definition --------------------------------------|------------------------------------------------------------ \c starpu.worker.w_bind_to_pu_knob |Change the processing unit to which a worker thread is bound \c starpu.worker.w_enable_worker_knob |Disable/re-enable a worker thread to be selected for task execution \subsubsection PerfKnobsExportedPerScheduler Per-Scheduler Scope Knob Name |Knob Definition ---------------------------------------|---------------- \c starpu.task.s_max_priority_cap_knob |Set a capping maximum priority value for subsequently submitted tasks \c starpu.task.s_min_priority_cap_knob |Set a capping minimum priority value for subsequently submitted tasks \c starpu.dmda.s_alpha_knob |Scaling factor for the Alpha constant for Deque Model schedulers to alter the weight of the estimated task execution time \c starpu.dmda.s_beta_knob |Scaling factor for the Beta constant for Deque Model schedulers to alter the weight of the estimated data transfer time for the task's input(s) \c starpu.dmda.s_gamma_knob |Scaling factor for the Gamma constant for Deque Model schedulers to alter the weight of the estimated power consumption of the task \c starpu.dmda.s_idle_power_knob |Scaling factor for the baseline Idle power consumption estimation of the corresponding processing unit \subsection PerfKnobsSequence Sequence of operations This section presents an example of a sequence of operations representing a typical use of the performance steering knobs exported by StarPU. In this example, a worker thread is temporarily barred from executing tasks. For that, the corresponding \c starpu.worker.w_enable_worker_knob of the worker, initially set to 1 (= enabled) is changed to 0 (= disabled). Step 0: Initialize StarPU StarPU must first be initialized, by a call to starpu_init(). Performance steering knobs only become available after this step, since each module of StarPU registers the knobs it exports during that initialization phase. \code{.c} int ret = starpu_init(NULL); \endcode Step 1: Get the knob ID Each performance steering knob has a unique ID used to refer to it in subsequent calls to the performance steering framework. The knob belongs to the "per_worker" scope. \code{.c} int w_scope = starpu_perf_knob_scope_name_to_id("per_worker"); int w_enable_id = starpu_perf_knob_name_to_id(w_scope, "starpu.worker.w_enable_worker_knob"); \endcode Step 2: Get the knob current value This knob is an On/Off knob. Its value type is therefore a 32-bit integer, with value 0 for Off and value !0 for On. The getter functions for per-worker knobs expect the knob ID as first argument, and the worker ID as second argument. Here the getter call obtains the value of worker 5. \code{.c} int32_t val = starpu_perf_knob_get_per_worker_int32_value(w_enable_id, 5); \endcode Step 3: Set the knob current value The setter functions for per-worker knobs expect the knob ID as first argument, the worker ID as second argument, and the new value as third argument. Here, the value for worker 5 is set to 0 to temporarily bar the worker thread from accepting new tasks for execution. \code{.c} starpu_perf_knob_set_per_worker_int32_value(w_enable_id, 5, 0); \endcode Subsequently, setting the value of the knob back to 1 enables the corresponding to accept new tasks for execution again. \code{.c} starpu_perf_knob_set_per_worker_int32_value(w_enable_id, 5, 1); \endcode */ starpu-1.4.9+dfsg/doc/doxygen/chapters/starpu_performances/performances_intro.doxy000066400000000000000000000024541507764646700307360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \intropage{IntroPerformances, --------- StarPU Performances ---------} \webforeword This part shows how to measure application performances.
    • Chapter \ref BenchmarkingStarPU introduces some interesting benchmarks which can be found in StarPU sources.
    • Chapter \ref OnlinePerformanceTools gives information on online performance monitoring tools to help you analyze your program
    • Chapter \ref OfflinePerformanceTools gives information on offline performance tools such as a FxT library to trace execution data and tasks and a StarPU Eclipse Plugin to visualize data traces directly from the Eclipse IDE.
    */ starpu-1.4.9+dfsg/doc/doxygen/chapters/version.html000066400000000000000000000001401507764646700224110ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen/chapters/version.sty000066400000000000000000000001131507764646700222640ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen/doxygen-config-include.cfg.in000066400000000000000000000076441507764646700236740ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT += @top_builddir@/doc/doxygen/starpu_config.h \ @top_srcdir@/include/starpu.h \ @top_srcdir@/include/starpu_task.h \ @top_srcdir@/include/starpu_data.h \ @top_srcdir@/include/starpu_data_interfaces.h \ @top_srcdir@/include/starpu_data_filters.h \ @top_srcdir@/include/starpu_task_dep.h \ @top_srcdir@/include/starpu_task_list.h \ @top_srcdir@/include/starpu_task_util.h \ @top_srcdir@/include/starpu_cuda.h \ @top_srcdir@/include/starpu_cublasLt.h \ @top_srcdir@/include/starpu_cusparse.h \ @top_srcdir@/include/starpu_cublas.h \ @top_srcdir@/include/starpu_cublas_v2.h \ @top_srcdir@/include/starpu_cusolver.h \ @top_srcdir@/include/starpu_opencl.h \ @top_srcdir@/include/starpu_hip.h \ @top_srcdir@/include/starpu_max_fpga.h \ @top_srcdir@/include/starpu_worker.h \ @top_srcdir@/include/starpu_perfmodel.h \ @top_srcdir@/include/starpu_openmp.h \ @top_srcdir@/include/starpu_sched_component.h \ @top_srcdir@/include/starpu_sched_ctx.h \ @top_srcdir@/include/starpu_sched_ctx_hypervisor.h \ @top_srcdir@/include/starpu_scheduler.h \ @top_srcdir@/include/schedulers/starpu_heteroprio.h \ @top_srcdir@/include/schedulers/starpu_scheduler_toolbox.h \ @top_srcdir@/mpi/include/starpu_mpi.h \ @top_srcdir@/mpi/include/starpu_mpi_ft.h \ @top_srcdir@/mpi/include/starpu_mpi_lb.h \ @top_srcdir@/mpi/include/fstarpu_mpi_mod.f90 \ @top_srcdir@/doc/doxygen/chapters/api/bubble_support.doxy \ @top_srcdir@/include/starpu_bitmap.h \ @top_srcdir@/include/starpu_bound.h \ @top_srcdir@/include/starpu_deprecated_api.h \ @top_srcdir@/include/starpu_disk.h \ @top_srcdir@/include/starpu_driver.h \ @top_srcdir@/include/starpu_expert.h \ @top_srcdir@/include/starpu_fxt.h \ @top_srcdir@/include/starpu_hash.h \ @top_srcdir@/include/starpu_util.h \ @top_srcdir@/include/starpu_helper.h \ @top_srcdir@/include/starpu_parallel_worker.h \ @top_srcdir@/include/starpu_perf_monitoring.h \ @top_srcdir@/include/starpu_perf_steering.h \ @top_srcdir@/include/starpu_profiling.h \ @top_srcdir@/include/starpu_profiling_tool.h \ @top_srcdir@/include/starpu_rand.h \ @top_srcdir@/include/starpu_simgrid_wrap.h \ @top_srcdir@/include/starpu_sink.h \ @top_srcdir@/include/starpu_stdlib.h \ @top_srcdir@/include/starpu_task_bundle.h \ @top_srcdir@/doc/doxygen/chapters/api/threads.doxy \ @top_srcdir@/include/starpu_thread.h \ @top_srcdir@/include/starpu_thread_util.h \ @top_srcdir@/include/starpu_tree.h \ @top_srcdir@/doc/doxygen/chapters/api/fortran_support.doxy \ @top_srcdir@/include/fstarpu_mod.f90 \ @top_srcdir@/include/starpu_mod.f90 \ @top_srcdir@/starpufft/include/starpufft.h \ @top_srcdir@/sc_hypervisor/include/sc_hypervisor_config.h \ @top_srcdir@/sc_hypervisor/include/sc_hypervisor_policy.h \ @top_srcdir@/sc_hypervisor/include/sc_hypervisor_lp.h \ @top_srcdir@/sc_hypervisor/include/sc_hypervisor.h \ @top_srcdir@/sc_hypervisor/include/sc_hypervisor_monitoring.h \ @top_srcdir@/starpurm/include/starpurm.h \ @top_srcdir@/doc/doxygen/chapters/api/fft_support.doxy starpu-1.4.9+dfsg/doc/doxygen/doxygen-config.cfg.in000066400000000000000000000136351507764646700222500ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_introduction/introduction_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_introduction/doc_organization.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_introduction/glossary.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_installation/installation_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_installation/building.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_installation/configure_options.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_installation/environment_variables.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_installation/configuration_and_initialization.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/basics_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/starpu_applications.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/basic_examples.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/scaling_vector_example.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/tasks.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/data_management.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/scheduling.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/examples_sources.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_applications/applications_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_applications/vector_scaling.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_applications/stencil.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_performances/performances_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_performances/benchmarking_starpu.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_performances/online_performance_tools.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_performances/offline_performance_tools.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_faq/faq_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_faq/check_list_performance.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_faq/faq.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/languages_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/native_fortran_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/java.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/python.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/openmp_runtime_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/extensions_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_data_management.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_scheduling.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_contexts.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/cuda_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/opencl_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/out_of_core.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/mpi_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/tcpip_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/transactions.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/fault_tolerance.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/fft_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/bubble.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/interoperability.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/simgrid.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/helpers.doxy \ @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ @top_srcdir@/doc/doxygen/chapters/files.doxy EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen/chapters \ @top_srcdir@/doc/doxygen/chapters/starpu_applications/code \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/code \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/code \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/code INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ PROJECT_NAME = "StarPU Handbook" ALIASES += "intropage{2} = \page \1 \2" ALIASES += "foreword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html" ALIASES += "webforeword = " starpu-1.4.9+dfsg/doc/doxygen/doxygen_filter.sh.in000077500000000000000000000022071507764646700222210ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if [ "$(basename $1)" == "starpufft.h" ] ; then gcc -E $1 -I @top_srcdir@/include/ -I @top_builddir@/include/ |grep -i starpufft else # the macro STARPU_DEPRECATED needs to be removed as it is not properly processed by doxygen # lines starting with // in the doxygen input files are considered as comments to be removed sed -e 's/STARPU_DEPRECATED//' $1 | sed -e 's/^\/\/.*//' | sed -e 's/STARPU_TASK_LIST_INLINE//' | sed -e 's/STARPU_WARN_UNUSED_RESULT//' fi starpu-1.4.9+dfsg/doc/doxygen/refman.tex000066400000000000000000000266701507764646700202370ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % Copyright (C) 2013-2013 Simon Archipoff % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Handbook} \setcounter{tocdepth}{1} \input{./title.tex} \chapter{Introduction} \label{index} \hypertarget{index}{} \input{index} \chapter{Documentation Organization} \label{DocumentationOrganization} \hypertarget{DocumentationOrganization}{} \input{DocOrganization} \chapter{Glossary} \label{Glossary} \hypertarget{Glossary}{} \input{Glossary} \part{StarPU Installation} \chapter{Organization} \label{IntroInstallation} \hypertarget{IntroInstallation}{} \input{IntroInstallation} \chapter{Building and Installing StarPU} \label{BuildingAndInstallingStarPU} \hypertarget{BuildingAndInstallingStarPU}{} \input{BuildingAndInstallingStarPU} \chapter{Compilation Configuration} \label{CompilationConfiguration} \hypertarget{CompilationConfiguration}{} \input{CompilationConfiguration} \chapter{Execution Configuration Through Environment Variables} \label{ExecutionConfigurationThroughEnvironmentVariables} \hypertarget{ExecutionConfigurationThroughEnvironmentVariables}{} \input{ExecutionConfigurationThroughEnvironmentVariables} \chapter{Configuration and initialization} \label{ConfigurationAndInitialization} \hypertarget{ConfigurationAndInitialization}{} \input{ConfigurationAndInitialization} \part{StarPU Basics} \chapter{Organization} \label{IntroBasics} \hypertarget{IntroBasics}{} \input{IntroBasics} \chapter{StarPU Applications} \label{StarPUApplications} \hypertarget{StarPUApplications}{} \input{StarPUApplications} \chapter{Basic Examples} \label{BasicExamples} \hypertarget{BasicExamples}{} \input{BasicExamples} \chapter{Full Source Code for the ’Scaling a Vector’ Example} \label{FullSourceCodeVectorScal} \hypertarget{FullSourceCodeVectorScal}{} \input{FullSourceCodeVectorScal} \chapter{Tasks In StarPU} \label{TasksInStarPU} \hypertarget{TasksInStarPU}{} \input{TasksInStarPU} \chapter{Data Management} \label{DataManagement} \hypertarget{DataManagement}{} \input{DataManagement} \chapter{Scheduling} \label{Scheduling} \hypertarget{Scheduling}{} \input{Scheduling} \chapter{Examples in StarPU Sources} \label{ExamplesInStarPUSources} \hypertarget{ExamplesInStarPUSources}{} \input{ExamplesInStarPUSources} \part{StarPU Applications} \label{StarPUApplications} \chapter{Organization} \label{IntroApplications} \hypertarget{IntroApplications}{} \input{IntroApplications} \chapter{A Vector Scaling Application} \label{VectorScalingApplication} \hypertarget{VectorApplication}{} \input{VectorApplication} \chapter{A Stencil Application} \label{StencilApplication} \hypertarget{StencilApplication}{} \input{StencilApplication} \part{StarPU Performances} \chapter{Organization} \label{IntroPerformances} \hypertarget{IntroPerformances}{} \input{IntroPerformances} \chapter{Benchmarking StarPU} \label{BenchmarkingStarPU} \hypertarget{BenchmarkingStarPU}{} \input{BenchmarkingStarPU} \chapter{Online Performance Tools} \label{OnlinePerformanceTools} \hypertarget{OnlinePerformanceTools}{} \input{OnlinePerformanceTools} \chapter{Offline Performance Tools} \label{OfflinePerformanceTools} \hypertarget{OfflinePerformanceTools}{} \input{OfflinePerformanceTools} \part{StarPU FAQ} \chapter{Organization} \label{IntroFAQ} \hypertarget{IntroFAQ}{} \input{IntroFAQ} \chapter{Check List When Performance Are Not There} \label{CheckListWhenPerformanceAreNotThere} \hypertarget{CheckListWhenPerformanceAreNotThere}{} \input{CheckListWhenPerformanceAreNotThere} \chapter{Frequently Asked Questions} \label{FrequentlyAskedQuestions} \hypertarget{FrequentlyAskedQuestions}{} \input{FrequentlyAskedQuestions} \part{StarPU Language Bindings} \chapter{Organization} \label{IntroLanguage} \hypertarget{IntroLanguage}{} \input{IntroLanguage} \chapter{Native Fortran Support} \label{NativeFortranSupport} \hypertarget{NativeFortranSupport}{} \input{NativeFortranSupport} \chapter{StarPU Java Interface} \label{StarPUJavaInterface} \hypertarget{StarPUJavaInterface}{} \input{StarPUJavaInterface} \chapter{Python Interface} \label{PythonInterface} \hypertarget{PythonInterface}{} \input{PythonInterface} \chapter{The StarPU OpenMP Runtime Support (SORS)} \label{OpenMPRuntimeSupport} \hypertarget{OpenMPRuntimeSupport}{} \input{OpenMPRuntimeSupport} \part{StarPU Extensions} \chapter{Organization} \label{IntroExtensions} \hypertarget{IntroExtensions}{} \input{IntroExtensions} \chapter{Advanced Tasks In StarPU} \label{AdvancedTasksInStarPU} \hypertarget{AdvancedTasksInStarPU}{} \input{AdvancedTasksInStarPU} \chapter{Advanced Data Management} \label{AdvancedDataManagement} \hypertarget{AdvancedDataManagement}{} \input{AdvancedDataManagement} \chapter{Advanced Scheduling} \label{AdvancedScheduling} \hypertarget{AdvancedScheduling}{} \input{AdvancedScheduling} \chapter{Scheduling Contexts} \label{SchedulingContexts} \hypertarget{SchedulingContexts}{} \input{SchedulingContexts} \chapter{Scheduling Context Hypervisor} \label{SchedulingContextHypervisor} \hypertarget{SchedulingContextHypervisor}{} \input{SchedulingContextHypervisor} \chapter{How To Define a New Scheduling Policy} \label{HowToDefineANewSchedulingPolicy} \hypertarget{HowToDefineANewSchedulingPolicy}{} \input{HowToDefineANewSchedulingPolicy} \chapter{CUDA Support} \label{CUDASupport} \hypertarget{CUDASupport}{} \input{CUDASupport} \chapter{OpenCL Support} \label{OpenCLSupport} \hypertarget{OpenCLSupport}{} \input{OpenCLSupport} \chapter{Maxeler FPGA Support} \label{MaxFPGASupport} \hypertarget{MaxFPGASupport}{} \input{MaxFPGASupport} \chapter{Out Of Core} \label{OutOfCore} \hypertarget{OutOfCore}{} \input{OutOfCore} \chapter{MPI Support} \label{MPISupport} \hypertarget{MPISupport}{} \input{MPISupport} \chapter{TCP/IP Support} \label{TCPIPSupport} \hypertarget{TCPIPSupport}{} \input{TCPIPSupport} \chapter{Transactions} \label{Transactions} \hypertarget{Transactions}{} \input{Transactions} \chapter{Fault Tolerance} \label{FaultTolerance} \hypertarget{FaultTolerance}{} \input{FaultTolerance} \chapter{FFT Support} \label{FFTSupport} \hypertarget{FFTSupport}{} \input{FFTSupport} \chapter{SOCL OpenCL Extensions} \label{SOCLOpenclExtensions} \hypertarget{SOCLOpenclExtensions}{} \input{SOCLOpenclExtensions} \chapter{Hierarchical DAGS} \label{HierarchicalDAGS} \hypertarget{HierarchicalDAGS}{} \input{HierarchicalDAGS} \chapter{Parallel Workers} \label{ParallelWorker} \hypertarget{ParallelWorker}{} \input{ParallelWorker} \chapter{Interoperability Support} \label{InteropSupport} \hypertarget{InteropSupport}{} \input{InteroperabilitySupport} \chapter{SimGrid Support} \label{SimGridSupport} \hypertarget{SimGridSupport}{} \input{SimGridSupport} \chapter{Helpers} \label{Helpers} \hypertarget{Helpers}{} \input{Helpers} \chapter{Debugging Tools} \label{DebuggingTools} \hypertarget{DebuggingTools}{} \input{DebuggingTools} \part{Appendix} \chapter{The GNU Free Documentation License} \label{GNUFreeDocumentationLicense} \hypertarget{GNUFreeDocumentationLicense}{} \input{GNUFreeDocumentationLicense} \chapter{Module Index} \input{modules} \chapter{Module Documentation a.k.a StarPU's API} \label{ModuleDocumentation} \hypertarget{ModuleDocumentation}{} \input{group__API__Bitmap} \input{group__API__Bubble} \input{group__API__Codelet__And__Tasks} \input{group__API__CUDA__Extensions} \input{group__API__Data__Interfaces} \input{group__API__Data__Management} \input{group__API__Data__Partition} \input{group__API__Expert__Mode} \input{group__API__Explicit__Dependencies} \input{group__API__FFT__Support} \input{group__API__Fortran} \input{group__API__FxT__Support} \input{group__API__HeteroPrio} \input{group__API__HIP__Extensions} \input{group__API__Initialization__and__Termination} \input{group__API__Insert__Task} \input{group__API__Interop__Support} \input{group__API__Max__FPGA__Extensions} \input{group__API__Miscellaneous__Helpers} \input{group__API__Modularized__Scheduler} \input{group__API__MPI__FT__Support} \input{group__API__MPI__Support} \input{group__API__OpenCL__Extensions} \input{group__API__OpenMP__Runtime__Support} \input{group__API__Out__Of__Core} \input{group__API__Parallel__Tasks} \input{group__API__Parallel__Worker} \input{group__API__Perf__Monitoring} \input{group__API__Performance__Model} \input{group__API__Perf__Steering} \input{group__API__Profiling} \input{group__API__Profiling__Tool} \input{group__API__Random__Functions} \input{group__API__Running__Drivers} \input{group__API__Scheduler__Toolbox} \input{group__API__Scheduling__Contexts} \input{group__API__Scheduling__Policy} \input{group__API__SC__Hypervisor__LP} \input{group__API__SC__Hypervisor} \input{group__API__SC__Hypervisor__usage} \input{group__API__Sink} \input{group__API__Standard__Memory__Library} \input{group__API__Task__Bundles} \input{group__API__Task__Lists} \input{group__API__Theoretical__Lower__Bound__on__Execution__Time} \input{group__API__Threads} \input{group__API__Toolbox} \input{group__API__Transactions} \input{group__API__Tree} \input{group__API__Versioning} \input{group__API__Workers} \chapter{File Index} \input{files} \chapter{File Documentation} \label{FileDocumentation} \hypertarget{FileDocumentation}{} \input{starpu_8h} \input{starpu__bitmap_8h} \input{starpu__bound_8h} \input{starpu__config_8h} \input{starpu__cublas_8h} \input{starpu__cublas__v2_8h} \input{starpu__cublasLt_8h} \input{starpu__cusparse_8h} \input{starpu__cuda_8h} \input{starpu__data_8h} \input{starpu__data__filters_8h} \input{starpu__data__interfaces_8h} \input{starpu__deprecated__api_8h} \input{starpu__disk_8h} \input{starpu__driver_8h} \input{starpu__expert_8h} \input{starpu__fxt_8h} \input{starpu__hash_8h} \input{starpu__helper_8h} \input{starpu__heteroprio_8h} \input{starpu__hip_8h} \input{starpu__scheduler__toolbox_8h} \input{starpu__max__fpga_8h} \input{starpu__mod_8f90} \input{starpu__mpi_8h} \input{starpu__mpi__ft_8h} \input{starpu__mpi__lb_8h} \input{starpu__opencl_8h} \input{starpu__openmp_8h} \input{starpu__parallel__worker_8h} \input{starpu__perf__monitoring_8h} \input{starpu__perf__steering_8h} \input{starpu__perfmodel_8h} \input{starpu__profiling_8h} \input{starpu__profiling__tool_8h} \input{starpu__rand_8h} \input{starpu__sched__component_8h} \input{starpu__sched__ctx_8h} \input{starpu__sched__ctx__hypervisor_8h} \input{starpu__scheduler_8h} \input{starpu__simgrid__wrap_8h} \input{starpu__sink_8h} \input{starpu__stdlib_8h} \input{starpu__task_8h} \input{starpu__task__bundle_8h} \input{starpu__task__dep_8h} \input{starpu__task__list_8h} \input{starpu__task__util_8h} \input{starpu__thread_8h} \input{starpu__thread__util_8h} \input{starpu__tree_8h} \input{starpu__util_8h} \input{starpu__worker_8h} \input{starpufft_8h} \input{sc__hypervisor_8h} \input{sc__hypervisor__config_8h} \input{sc__hypervisor__lp_8h} \input{sc__hypervisor__monitoring_8h} \input{sc__hypervisor__policy_8h} \input{starpurm_8h} \chapter{Deprecated List} \label{deprecated} \hypertarget{deprecated}{} \input{deprecated} %\part{Index} \addcontentsline{toc}{chapter}{Index} \printindex \end{document} starpu-1.4.9+dfsg/doc/doxygen_dev/000077500000000000000000000000001507764646700170705ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_dev/Makefile.am000066400000000000000000000115431507764646700211300ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen_dev DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_dev DOX_HTML_DIR = html_dev DOX_LATEX_DIR = latex DOX_PDF = starpu_dev.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = config.h include $(top_srcdir)/doc/doxy.mk chapters = \ chapters/000_introduction.doxy \ chapters/010_core.doxy images = if STARPU_BUILD_DOC config.h: $(top_srcdir)/src/common/config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ @$(SED) -i '1s/^/\/\*\* \@file \*\/\n/' $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ config.h \ chapters/version.sty \ chapters/version.html \ $(top_srcdir)/src/datawizard/data_request.h \ $(top_srcdir)/src/datawizard/coherency.h \ $(top_srcdir)/src/datawizard/sort_data_handles.h \ $(top_srcdir)/src/datawizard/memalloc.h \ $(top_srcdir)/src/datawizard/copy_driver.h \ $(top_srcdir)/src/datawizard/filters.h \ $(top_srcdir)/src/datawizard/datastats.h \ $(top_srcdir)/src/datawizard/write_back.h \ $(top_srcdir)/src/datawizard/interfaces/data_interface.h \ $(top_srcdir)/src/datawizard/memory_manager.h \ $(top_srcdir)/src/datawizard/node_ops.h \ $(top_srcdir)/src/datawizard/memstats.h \ $(top_srcdir)/src/datawizard/datawizard.h \ $(top_srcdir)/src/datawizard/memory_nodes.h \ $(top_srcdir)/src/datawizard/footprint.h \ $(top_srcdir)/src/datawizard/malloc.h \ $(top_srcdir)/src/drivers/cpu/driver_cpu.h \ $(top_srcdir)/src/drivers/cuda/driver_cuda.h \ $(top_srcdir)/src/drivers/opencl/driver_opencl_utils.h \ $(top_srcdir)/src/drivers/opencl/driver_opencl.h \ $(top_srcdir)/src/drivers/disk/driver_disk.h \ $(top_srcdir)/src/drivers/mpi/driver_mpi_common.h \ $(top_srcdir)/src/drivers/mpi/driver_mpi_sink.h \ $(top_srcdir)/src/drivers/mpi/driver_mpi_source.h \ $(top_srcdir)/src/drivers/mp_common/sink_common.h \ $(top_srcdir)/src/drivers/mp_common/mp_common.h \ $(top_srcdir)/src/drivers/mp_common/source_common.h \ $(top_srcdir)/src/drivers/driver_common/driver_common.h \ $(top_srcdir)/src/parallel_worker/starpu_parallel_worker_create.h \ $(top_srcdir)/src/profiling/profiling.h \ $(top_srcdir)/src/profiling/bound.h \ $(top_srcdir)/src/util/starpu_data_cpy.h \ $(top_srcdir)/src/util/openmp_runtime_support.h \ $(top_srcdir)/src/util/starpu_task_insert_utils.h \ $(top_srcdir)/src/common/graph.h \ $(top_srcdir)/src/common/fxt.h \ $(top_srcdir)/src/common/starpu_spinlock.h \ $(top_srcdir)/src/common/rbtree_i.h \ $(top_srcdir)/src/common/rbtree.h \ $(top_srcdir)/src/common/timing.h \ $(top_srcdir)/src/common/rwlock.h \ $(top_srcdir)/src/common/barrier.h \ $(top_srcdir)/src/common/prio_list.h \ $(top_srcdir)/src/common/barrier_counter.h \ $(top_srcdir)/src/common/uthash.h \ $(top_srcdir)/src/common/knobs.h \ $(top_srcdir)/src/common/utils.h \ $(top_srcdir)/src/common/thread.h \ $(top_srcdir)/src/common/list.h \ $(top_srcdir)/src/debug/starpu_debug_helpers.h \ $(top_srcdir)/src/debug/traces/starpu_fxt.h \ $(top_srcdir)/src/sched_policies/fifo_queues.h \ $(top_srcdir)/src/sched_policies/helper_mct.h \ $(top_srcdir)/src/sched_policies/sched_component.h \ $(top_srcdir)/src/sched_policies/prio_deque.h \ $(top_srcdir)/src/core/jobs.h \ $(top_srcdir)/src/core/disk_ops/unistd/disk_unistd_global.h \ $(top_srcdir)/src/core/dependencies/tags.h \ $(top_srcdir)/src/core/dependencies/data_concurrency.h \ $(top_srcdir)/src/core/dependencies/implicit_data_deps.h \ $(top_srcdir)/src/core/dependencies/cg.h \ $(top_srcdir)/src/core/devices.h \ $(top_srcdir)/src/core/idle_hook.h \ $(top_srcdir)/src/core/sched_ctx_list.h \ $(top_srcdir)/src/core/perfmodel/multiple_regression.h \ $(top_srcdir)/src/core/perfmodel/perfmodel.h \ $(top_srcdir)/src/core/perfmodel/regression.h \ $(top_srcdir)/src/core/debug.h \ $(top_srcdir)/src/core/sched_ctx.h \ $(top_srcdir)/src/core/simgrid.h \ $(top_srcdir)/src/core/task_bundle.h \ $(top_srcdir)/src/core/topology.h \ $(top_srcdir)/src/core/combined_workers.h \ $(top_srcdir)/src/core/detect_combined_workers.h \ $(top_srcdir)/src/core/task.h \ $(top_srcdir)/src/core/disk.h \ $(top_srcdir)/src/core/sched_policy.h \ $(top_srcdir)/src/core/errorcheck.h \ $(top_srcdir)/src/core/progress_hook.h \ $(top_srcdir)/src/core/drivers.h \ $(top_srcdir)/src/core/workers.h endif starpu-1.4.9+dfsg/doc/doxygen_dev/Makefile.in000066400000000000000000001163541507764646700211470ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen_dev ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg doxygen_filter.sh \ doxygen-config-include.cfg CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config-include.cfg.in \ $(srcdir)/doxygen-config.cfg.in $(srcdir)/doxygen_filter.sh.in \ $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen_dev DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_dev DOX_HTML_DIR = html_dev DOX_LATEX_DIR = latex DOX_PDF = starpu_dev.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ chapters/000_introduction.doxy \ chapters/010_core.doxy images = @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/data_request.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/coherency.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/sort_data_handles.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/memalloc.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/copy_driver.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/filters.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/datastats.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/write_back.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/interfaces/data_interface.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/memory_manager.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/node_ops.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/memstats.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/datawizard.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/memory_nodes.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/footprint.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/datawizard/malloc.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/cpu/driver_cpu.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/cuda/driver_cuda.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/opencl/driver_opencl_utils.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/opencl/driver_opencl.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/disk/driver_disk.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mpi/driver_mpi_common.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mpi/driver_mpi_sink.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mpi/driver_mpi_source.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mp_common/sink_common.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mp_common/mp_common.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/mp_common/source_common.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/drivers/driver_common/driver_common.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/parallel_worker/starpu_parallel_worker_create.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/profiling/profiling.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/profiling/bound.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/util/starpu_data_cpy.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/util/openmp_runtime_support.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/util/starpu_task_insert_utils.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/graph.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/fxt.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/starpu_spinlock.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/rbtree_i.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/rbtree.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/timing.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/rwlock.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/barrier.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/prio_list.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/barrier_counter.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/uthash.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/knobs.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/utils.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/thread.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/common/list.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/debug/starpu_debug_helpers.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/debug/traces/starpu_fxt.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/sched_policies/fifo_queues.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/sched_policies/helper_mct.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/sched_policies/sched_component.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/sched_policies/prio_deque.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/jobs.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/disk_ops/unistd/disk_unistd_global.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/dependencies/tags.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/dependencies/data_concurrency.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/dependencies/implicit_data_deps.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/dependencies/cg.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/devices.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/idle_hook.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/sched_ctx_list.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/perfmodel/multiple_regression.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/perfmodel/perfmodel.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/perfmodel/regression.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/debug.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/sched_ctx.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/simgrid.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/task_bundle.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/topology.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/combined_workers.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/detect_combined_workers.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/task.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/disk.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/sched_policy.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/errorcheck.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/progress_hook.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/drivers.h \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/src/core/workers.h all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_dev/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen_dev/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ doxygen_filter.sh: $(top_builddir)/config.status $(srcdir)/doxygen_filter.sh.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ doxygen-config-include.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config-include.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@config.h: $(top_srcdir)/src/common/config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ @STARPU_BUILD_DOC_TRUE@ @$(SED) -i '1s/^/\/\*\* \@file \*\/\n/' $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen_dev/chapters/000077500000000000000000000000001507764646700207015ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_dev/chapters/000_introduction.doxy000066400000000000000000000015621507764646700247120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \mainpage Introduction \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \section Motivation Motivation */ starpu-1.4.9+dfsg/doc/doxygen_dev/chapters/010_core.doxy000066400000000000000000000453761507764646700231350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /*! \page StarPUCore StarPU Core \section CoreEntities StarPU Core Entities TODO \subsection CoreEntitiesOverview Overview Execution entities: - worker: A worker (see \ref CoreEntitiesWorkers, \ref CoreEntitiesWorkersAndContexts) entity is a CPU thread created by StarPU to manage one computing unit. The computing unit can be a local CPU core, an accelerator or GPU device, or --- on the master side when running in master-slave distributed mode --- a remote slave computing node. It is responsible for querying scheduling policies for tasks to execute. - sched_context: A scheduling context (see \ref CoreEntitiesContexts, \ref CoreEntitiesWorkersAndContexts) is a logical set of workers governed by an instance of a scheduling policy. It defines the computing units to which the scheduling policy instance may assign work entities. - driver: A driver is the set of hardware-dependent routines used by a worker to initialize its associated computing unit, execute work entities on it, and finalize the computing unit usage at the end of the session. Work entities: - task: A task is a high level work request submitted to StarPU by the application, or internally by StarPU itself. - job: A job is a low level view of a work request. It is not exposed to the application. A job structure may be shared among several task structures in the case of a parallel task. Data entities: - data handle: A data handle is a high-level, application opaque object designating a piece of data currently registered to the StarPU data management layer. Internally, it is a \ref _starpu_data_state structure. - data replicate: A data replicate is a low-level object designating one copy of a piece of data registered to StarPU as a data handle, residing in one memory node managed by StarPU. It is not exposed to the application. \subsection CoreEntitiesWorkers Workers A worker is a CPU thread created by StarPU. Its role is to manage one computing unit. This computing unit can be a local CPU core, in which case, the worker thread manages the actual CPU core to which it is assigned; or it can be a computing device such as a GPU or an accelerator (or even a remote computing node when StarPU is running in distributed master-slave mode.) When a worker manages a computing device, the CPU core to which the worker's thread is by default exclusively assigned to the device management work and does not participate to computation. \subsubsection CoreEntitiesWorkersStates States Scheduling operations related state While a worker is conducting a scheduling operations, e.g. the worker is in the process of selecting a new task to execute, flag state_sched_op_pending is set to \c !0, otherwise it is set to \c 0. While state_sched_op_pending is !0, the following exhaustive list of operations on that workers are restricted in the stated way: - adding the worker to a context is not allowed; - removing the worker from a context is not allowed; - adding the worker to a parallel task team is not allowed; - removing the worker from a parallel task team is not allowed; - querying state information about the worker is only allowed while state_relax_refcnt > 0; - in particular, querying whether the worker is blocked on a parallel team entry is only allowed while state_relax_refcnt > 0. Entering and leaving the state_sched_op_pending state is done through calls to \ref _starpu_worker_enter_sched_op() and \ref _starpu_worker_leave_sched_op() respectively (see these functions in use in functions \ref _starpu_get_worker_task() and \ref _starpu_get_multi_worker_task()). These calls ensure that any pending conflicting operation deferred while the worker was in the state_sched_op_pending state is performed in an orderly manner.
    Scheduling contexts related states Flag \c state_changing_ctx_notice is set to \c !0 when a thread is about to add the worker to a scheduling context or remove it from a scheduling context, and is currently waiting for a safe window to do so, until the targeted worker is not in a scheduling operation or parallel task operation anymore. This flag set to \c !0 will also prevent the targeted worker to attempt a fresh scheduling operation or parallel task operation to avoid starving conditions. However, a scheduling operation that was already in progress before the notice is allowed to complete. Flag \c state_changing_ctx_waiting is set to \c !0 when a scheduling context worker addition or removal involving the targeted worker is about to occur and the worker is currently performing a scheduling operation to tell the targeted worker that the initiator thread is waiting for the scheduling operation to complete and should be woken up upon completion.
    Relaxed synchronization related states Any StarPU worker may participate to scheduling operations, and in this process, may be forced to observe state information from other workers. A StarPU worker thread may therefore be observed by any thread, even other StarPU workers. Since workers may observe each other in any order, it is not possible to rely exclusively on the \c sched_mutex of each worker to protect the observation of worker state flags by other workers, because worker A observing worker B would involve locking workers in (A B) sequence, while worker B observing worker A would involve locking workers in (B A) sequence, leading to lock inversion deadlocks. In consequence, no thread must hold more than one worker's sched_mutex at any time. Instead, workers implement a relaxed locking scheme based on the \c state_relax_refcnt counter, itself protected by the worker's sched_mutex. When state_relax_refcnt > 0, the targeted worker state flags may be observed, otherwise the thread attempting the observation must repeatedly wait on the targeted worker's \c sched_cond condition until state_relax_refcnt > 0. The relaxed mode, while on, can actually be seen as a transactional consistency model, where concurrent accesses are authorized and potential conflicts are resolved after the fact. When the relaxed mode is off, the consistency model becomes a mutual exclusion model, where the sched_mutex of the worker must be held in order to access or change the worker state.
    Parallel tasks related states When a worker is scheduled to participate to the execution of a parallel task, it must wait for the whole team of workers participating to the execution of this task to be ready. While the worker waits for its teammates, it is not available to run other tasks or perform other operations. Such a waiting operation can therefore not start while conflicting operations such as scheduling operations and scheduling context resizing involving the worker are on-going. Conversely these operations and other may query whether the worker is blocked on a parallel task entry with \ref starpu_worker_is_blocked_in_parallel(). The \ref starpu_worker_is_blocked_in_parallel() function is allowed to proceed while and only while state_relax_refcnt > 0. Due to the relaxed worker locking scheme, the \c state_blocked_in_parallel flag of the targeted worker may change after it has been observed by an observer thread. In consequence, flag \c state_blocked_in_parallel_observed of the targeted worker is set to \c 1 by the observer immediately after the observation to "taint" the targeted worker. The targeted worker will clear the \c state_blocked_in_parallel_observed flag tainting and defer the processing of parallel task related requests until a full scheduling operation shot completes without the \c state_blocked_in_parallel_observed flag being tainted again. The purpose of this tainting flag is to prevent parallel task operations to be started immediately after the observation of a transient scheduling state. Worker's management of parallel tasks is governed by the following set of state flags and counters: - \c state_blocked_in_parallel: set to \c !0 while the worker is currently blocked on a parallel task; - \c state_blocked_in_parallel_observed: set to \c !0 to taint the worker when a thread has observed the state_blocked_in_parallel flag of this worker while its \c state_relax_refcnt state counter was \c >0. Any pending request to add or remove the worker from a parallel task team will be deferred until a whole scheduling operation shot completes without being tainted again. - \c state_block_in_parallel_req: set to \c !0 when a thread is waiting on a request for the worker to be added to a parallel task team. Must be protected by the worker's \c sched_mutex. - \c state_block_in_parallel_ack: set to \c !0 by the worker when acknowledging a request for being added to a parallel task team. Must be protected by the worker's \c sched_mutex. - \c state_unblock_in_parallel_req: set to \c !0 when a thread is waiting on a request for the worker to be removed from a parallel task team. Must be protected by the worker's \c sched_mutex. - \c state_unblock_in_parallel_ack: set to \c !0 by the worker when acknowledging a request for being removed from a parallel task team. Must be protected by the worker's \c sched_mutex. - \c block_in_parallel_ref_count: counts the number of consecutive pending requests to enter parallel task teams. Only the first of a train of requests for entering parallel task teams triggers the transition of the \c state_block_in_parallel_req flag from \c 0 to \c 1. Only the last of a train of requests to leave a parallel task team triggers the transition of flag \c state_unblock_in_parallel_req from \c 0 to \c 1. Must be protected by the worker's \c sched_mutex. \subsubsection CoreEntitiesWorkersOperations Operations Entry point All the operations of a worker are handled in an iterative fashion, either by the application code on a thread launched by the application, or automatically by StarPU on a device-dependent CPU thread launched by StarPU. Whether a worker's operation cycle is managed automatically or not is controlled per session by the field \c not_launched_drivers of the \c starpu_conf struct, and is decided in \c _starpu_launch_drivers function. When managed automatically, cycles of operations for a worker are handled by the corresponding driver specific _starpu__worker() function, where \c DRV is a driver name such as cpu (\c _starpu_cpu_worker) or cuda (\c _starpu_cuda_worker), for instance. Otherwise, the application must supply a thread which will repeatedly call \ref starpu_driver_run_once() for the corresponding worker. In both cases, control is then transferred to \c _starpu_cpu_driver_run_once (or the corresponding driver specific func). The cycle of operations typically includes, at least, the following operations: - task scheduling - parallel task team build-up - task input processing - data transfer processing - task execution When the worker cycles are handled by StarPU automatically, the iterative operation processing ends when the \c running field of \c _starpu_config becomes false. This field should not be read directly, instead it should be read through the \ref _starpu_machine_is_running() function.
    Task scheduling If the worker does not yet have a queued task, it calls _starpu_get_worker_task() to try and obtain a task. This may involve scheduling operations such as stealing a queued but not yet executed task from another worker. The operation may not necessarily succeed if no tasks are ready and/or suitable to run on the worker's computing unit.
    Parallel task team build-up If the worker has a task ready to run and the corresponding job has a size \c >1, then the task is a parallel job and the worker must synchronize with the other workers participating to the parallel execution of the job to assign a unique rank for each worker. The synchronization is done through the job's \c sync_mutex mutex.
    Task input processing Before the task can be executed, its input data must be made available on a memory node reachable by the worker's computing unit. To do so, the worker calls \ref _starpu_fetch_task_input()
    Data transfer processing The worker makes pending data transfers (involving memory node(s) that it is driving) progress, with a call to \ref __starpu_datawizard_progress(),
    Task execution Once the worker has a pending task assigned and the input data for that task are available in the memory node reachable by the worker's computing unit, the worker calls \c _starpu_cpu_driver_execute_task (or the corresponding driver specific function) to proceed to the execution of the task. \subsection CoreEntitiesContexts Scheduling Contexts A scheduling context is a logical set of workers governed by an instance of a scheduling policy. Tasks submitted to a given scheduling context are confined to the computing units governed by the workers belonging to this scheduling context at the time they get scheduled. A scheduling context is identified by an unsigned integer identifier between \c 0 and STARPU_NMAX_SCHED_CTXS - 1. The \c STARPU_NMAX_SCHED_CTXS identifier value is reserved to indicated an unallocated, invalid or deleted scheduling context. Accesses to the scheduling context structure are governed by a multiple-readers/single-writer lock (\c rwlock field). Changes to the structure contents, additions or removals of workers, statistics updates, all must be done with proper exclusive write access. \subsection CoreEntitiesWorkersAndContexts Workers and Scheduling Contexts A worker can be assigned to one or more scheduling contexts. It exclusively receives tasks submitted to the scheduling context(s) it is currently assigned at the time such tasks are scheduled. A worker may add itself to or remove itself from a scheduling context.
    Locking and synchronization rules between workers and scheduling contexts A thread currently holding a worker sched_mutex must not attempt to acquire a scheduling context rwlock, neither for writing nor for reading. Such an attempt constitutes a lock inversion and may result in a deadlock. A worker currently in a scheduling operation must enter the relaxed state before attempting to acquire a scheduling context rwlock, either for reading or for writing. When the set of workers assigned to a scheduling context is about to be modified, all the workers in the union between the workers belonging to the scheduling context before the change and the workers expected to belong to the scheduling context after the change must be notified using the \c notify_workers_about_changing_ctx_pending function prior to the update. After the update, all the workers in that same union must be notified for the update completion with a call to \c notify_workers_about_changing_ctx_done. The function \c notify_workers_about_changing_ctx_pending places every worker passed in argument in a state compatible with changing the scheduling context assignment of that worker, possibly blocking until that worker leaves incompatible states such as a pending scheduling operation. If the caller of \c notify_workers_about_changing_ctx_pending() is itself a worker included in the set of workers passed in argument, it does not notify itself, with the assumption that the worker is already calling \c notify_workers_about_changing_ctx_pending() from a state compatible with a scheduling context assignment update. Once a worker has been notified about a scheduling context change pending, it cannot proceed with incompatible operations such as a scheduling operation until it receives a notification that the context update operation is complete. \subsection CoreEntitiesDrivers Drivers Each driver defines a set of routines depending on some specific hardware. These routines include hardware discovery/initialization, task execution, device memory management and data transfers. While most hardware dependent routines are in source files located in the \c /src/drivers subdirectory of the StarPU tree, some can be found elsewhere in the tree such as \c src/datawizard/malloc.c for memory allocation routines or the subdirectories of \c src/datawizard/interfaces/ for data transfer routines. The driver ABI defined in the \ref _starpu_driver_ops structure includes the following operations: - \c .init: initialize a driver instance for the calling worker managing a hardware computing unit compatible with this driver. - \c .run_once: perform a single driver progress cycle for the calling worker (see \ref CoreEntitiesWorkersOperations). - \c .deinit: deinitialize the driver instance for the calling worker - \c .run: executes the following sequence automatically: call \c .init, repeatedly call \c .run_once until the function \ref _starpu_machine_is_running() returns false, call \c .deinit. The source code common to all drivers is shared in src/drivers/driver_common/driver_common.[ch]. This file includes services such as grabbing a new task to execute on a worker, managing statistics accounting on job startup and completion and updating the worker status \subsubsection CoreEntitiesDriversMP Master/Slave Drivers A subset of the drivers corresponds to drivers managing computing units in master/slave mode, that is, drivers involving a local master instance managing one or more remote slave instances on the targeted device(s). This includes devices such as discrete manycore accelerators (e.g. Intel's Knight Corners board, for instance), or pseudo devices such as a cluster of cpu nodes driver through StarPU's MPI master/slave mode. A driver instance on the master side is named the \b source, while a driver instances on the slave side is named the \b sink. A significant part of the work realized on the source and sink sides of master/slave drivers is identical among all master/slave drivers, due to the similarities in the software pattern. Therefore, many routines are shared among all these drivers in the \c src/drivers/mp_common subdirectory. In particular, a set of default commands to be used between sources and sinks is defined, assuming the availability of some communication channel between them (see enum \ref _starpu_mp_command) TODO \subsection CoreEntitiesTasksJobs Tasks and Jobs TODO \subsection CoreEntitiesData Data TODO */ starpu-1.4.9+dfsg/doc/doxygen_dev/chapters/version.html000066400000000000000000000001401507764646700232470ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen_dev/chapters/version.sty000066400000000000000000000001131507764646700231220ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen_dev/doxygen-config-include.cfg.in000066400000000000000000000013161507764646700245200ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # # do not delete that file even if it's empty starpu-1.4.9+dfsg/doc/doxygen_dev/doxygen-config.cfg.in000066400000000000000000000147441507764646700231100ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen_dev/chapters/000_introduction.doxy \ @top_srcdir@/doc/doxygen_dev/chapters/010_core.doxy \ @top_builddir@/doc/doxygen_dev/config.h \ @top_srcdir@/include/starpu_driver.h \ @top_srcdir@/include/starpu_worker.h \ @top_builddir@/doc/doxygen/starpu_config.h \ @top_srcdir@/src/datawizard/data_request.h \ @top_srcdir@/src/datawizard/coherency.h \ @top_srcdir@/src/datawizard/sort_data_handles.h \ @top_srcdir@/src/datawizard/memalloc.h \ @top_srcdir@/src/datawizard/copy_driver.h \ @top_srcdir@/src/datawizard/filters.h \ @top_srcdir@/src/datawizard/datastats.h \ @top_srcdir@/src/datawizard/write_back.h \ @top_srcdir@/src/datawizard/interfaces/data_interface.h \ @top_srcdir@/src/datawizard/memory_manager.h \ @top_srcdir@/src/datawizard/node_ops.h \ @top_srcdir@/src/datawizard/memstats.h \ @top_srcdir@/src/datawizard/datawizard.h \ @top_srcdir@/src/datawizard/memory_nodes.h \ @top_srcdir@/src/datawizard/footprint.h \ @top_srcdir@/src/datawizard/malloc.h \ @top_srcdir@/src/drivers/cpu/driver_cpu.h \ @top_srcdir@/src/drivers/cuda/driver_cuda.h \ @top_srcdir@/src/drivers/opencl/driver_opencl_utils.h \ @top_srcdir@/src/drivers/opencl/driver_opencl.h \ @top_srcdir@/src/drivers/disk/driver_disk.h \ @top_srcdir@/src/drivers/mpi/driver_mpi_common.h \ @top_srcdir@/src/drivers/mpi/driver_mpi_sink.h \ @top_srcdir@/src/drivers/mpi/driver_mpi_source.h \ @top_srcdir@/src/drivers/mp_common/sink_common.h \ @top_srcdir@/src/drivers/mp_common/mp_common.h \ @top_srcdir@/src/drivers/mp_common/source_common.h \ @top_srcdir@/src/drivers/driver_common/driver_common.h \ @top_srcdir@/src/parallel_worker/starpu_parallel_worker_create.h \ @top_srcdir@/src/profiling/profiling.h \ @top_srcdir@/src/profiling/bound.h \ @top_srcdir@/src/util/starpu_data_cpy.h \ @top_srcdir@/src/util/openmp_runtime_support.h \ @top_srcdir@/src/util/starpu_task_insert_utils.h \ @top_srcdir@/src/common/graph.h \ @top_srcdir@/src/common/fxt.h \ @top_srcdir@/src/common/starpu_spinlock.h \ @top_srcdir@/src/common/rbtree_i.h \ @top_srcdir@/src/common/rbtree.h \ @top_srcdir@/src/common/timing.h \ @top_srcdir@/src/common/rwlock.h \ @top_srcdir@/src/common/barrier.h \ @top_srcdir@/src/common/prio_list.h \ @top_srcdir@/src/common/barrier_counter.h \ @top_srcdir@/src/common/uthash.h \ @top_srcdir@/src/common/knobs.h \ @top_srcdir@/src/common/utils.h \ @top_srcdir@/src/common/thread.h \ @top_srcdir@/src/common/list.h \ @top_srcdir@/src/debug/starpu_debug_helpers.h \ @top_srcdir@/src/debug/traces/starpu_fxt.h \ @top_srcdir@/src/sched_policies/fifo_queues.h \ @top_srcdir@/src/sched_policies/helper_mct.h \ @top_srcdir@/src/sched_policies/sched_component.h \ @top_srcdir@/src/sched_policies/prio_deque.h \ @top_srcdir@/src/core/jobs.h \ @top_srcdir@/src/core/disk_ops/unistd/disk_unistd_global.h \ @top_srcdir@/src/core/dependencies/tags.h \ @top_srcdir@/src/core/dependencies/data_concurrency.h \ @top_srcdir@/src/core/dependencies/implicit_data_deps.h \ @top_srcdir@/src/core/dependencies/cg.h \ @top_srcdir@/src/core/idle_hook.h \ @top_srcdir@/src/core/sched_ctx_list.h \ @top_srcdir@/src/core/perfmodel/multiple_regression.h \ @top_srcdir@/src/core/perfmodel/perfmodel.h \ @top_srcdir@/src/core/perfmodel/regression.h \ @top_srcdir@/src/core/debug.h \ @top_srcdir@/src/core/sched_ctx.h \ @top_srcdir@/src/core/simgrid.h \ @top_srcdir@/src/core/task_bundle.h \ @top_srcdir@/src/core/topology.h \ @top_srcdir@/src/core/combined_workers.h \ @top_srcdir@/src/core/detect_combined_workers.h \ @top_srcdir@/src/core/task.h \ @top_srcdir@/src/core/disk.h \ @top_srcdir@/src/core/sched_policy.h \ @top_srcdir@/src/core/errorcheck.h \ @top_srcdir@/src/core/progress_hook.h \ @top_srcdir@/src/core/drivers.h \ @top_srcdir@/src/core/workers.h \ @top_srcdir@/mpi/src/starpu_mpi_init.h \ @top_srcdir@/mpi/src/starpu_mpi_datatype.h \ @top_srcdir@/mpi/src/starpu_mpi_task_insert.h \ @top_srcdir@/mpi/src/starpu_mpi_select_node.h \ @top_srcdir@/mpi/src/starpu_mpi_fxt.h \ @top_srcdir@/mpi/src/starpu_mpi_cache.h \ @top_srcdir@/mpi/src/mpi/starpu_mpi_mpi_backend.h \ @top_srcdir@/mpi/src/mpi/starpu_mpi_driver.h \ @top_srcdir@/mpi/src/mpi/starpu_mpi_early_data.h \ @top_srcdir@/mpi/src/mpi/starpu_mpi_comm.h \ @top_srcdir@/mpi/src/mpi/starpu_mpi_tag.h \ @top_srcdir@/mpi/src/mpi/starpu_mpi_mpi.h \ @top_srcdir@/mpi/src/mpi/starpu_mpi_early_request.h \ @top_srcdir@/mpi/src/mpi/starpu_mpi_sync_data.h \ @top_srcdir@/mpi/src/load_balancer/policy/load_data_interface.h \ @top_srcdir@/mpi/src/load_balancer/policy/load_balancer_policy.h \ @top_srcdir@/mpi/src/load_balancer/policy/data_movements_interface.h \ @top_srcdir@/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.h \ @top_srcdir@/mpi/src/nmad/starpu_mpi_nmad_backend.h \ @top_srcdir@/mpi/src/nmad/starpu_mpi_nmad.h \ @top_srcdir@/mpi/src/starpu_mpi_stats.h \ @top_srcdir@/mpi/src/starpu_mpi_private.h \ @top_srcdir@/mpi/src/starpu_mpi_cache_stats.h \ @top_srcdir@/starpurm/src/starpurm_private.h EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen_dev \ @top_srcdir@/doc/doxygen/chapters INPUT_FILTER = @top_builddir@/doc/doxygen_dev/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen_dev/refman.tex #IMAGE_PATH = @top_srcdir@/doc/doxygen_dev/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ PROJECT_NAME = "StarPU Internal Handbook" HTML_OUTPUT = html_dev starpu-1.4.9+dfsg/doc/doxygen_dev/doxygen_filter.sh.in000077500000000000000000000024301507764646700230550ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if [ "$(basename $1)" == "starpufft.h" ] ; then gcc -E $1 -I @top_srcdir@/include/ -I @top_builddir@/include/ |grep -i starpufft else # the macro STARPU_DEPRECATED needs to be removed as it is not properly processed by doxygen # lines starting with // in the doxygen input files are considered as comments to be removed # replace LIST_TYPE foo by struct foo so that doxygen processes it correctly # we assume lines only containing ); are ending of the definition of such a struct sed -e 's/STARPU_DEPRECATED//' $1 | sed 's/^\/\/.*//' | sed 's/LIST_TYPE(\(.*\),/struct \1 {/' | sed 's/^);/};/' fi starpu-1.4.9+dfsg/doc/doxygen_dev/refman.tex000066400000000000000000000075601507764646700210720ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Internal Handbook} \setcounter{tocdepth}{1} \input{./title.tex} \chapter{Introduction} \label{index} \hypertarget{index}{} \input{index} \chapter{Star\+PU Core} \label{StarPUCore} \hypertarget{StarPUCore}{} \input{StarPUCore} \chapter{Module Index} \input{modules} \chapter{Module Documentation} \label{ModuleDocumentation} \hypertarget{ModuleDocumentation}{} \input{group__workers} \chapter{File Index} \input{files} \chapter{StarPU File Documentation} \input{barrier_8h} \input{barrier__counter_8h} \input{bound_8h} \input{cg_8h} \input{coherency_8h} \input{combined__workers_8h} \input{config_8h} \input{copy__driver_8h} \input{data__concurrency_8h} \input{data__interface_8h} \input{data__request_8h} \input{datastats_8h} \input{datawizard_8h} \input{debug_8h} \input{detect__combined__workers_8h} \input{disk_8h} \input{disk__unistd__global_8h} \input{driver__common_8h} \input{driver__cpu_8h} \input{driver__cuda_8h} \input{driver__disk_8h} \input{driver__mpi__common_8h} \input{driver__mpi__sink_8h} \input{driver__mpi__source_8h} \input{driver__opencl_8h} \input{driver__opencl__utils_8h} \input{drivers_8h} \input{errorcheck_8h} \input{fifo__queues_8h} \input{filters_8h} \input{footprint_8h} \input{fxt_8h} \input{graph_8h} \input{helper__mct_8h} \input{idle__hook_8h} \input{implicit__data__deps_8h} \input{jobs_8h} \input{knobs_8h} \input{malloc_8h} \input{memalloc_8h} \input{memory__manager_8h} \input{memory__nodes_8h} \input{memstats_8h} \input{mp__common_8h} \input{multiple__regression_8h} \input{node__ops_8h} \input{openmp__runtime__support_8h} \input{perfmodel_8h} \input{prio__deque_8h} \input{prio__list_8h} \input{profiling_8h} \input{progress__hook_8h} \input{rbtree_8h} \input{rbtree__i_8h} \input{regression_8h} \input{rwlock_8h} \input{sched__component_8h} \input{sched__ctx_8h} \input{sched__ctx__list_8h} \input{sched__policy_8h} \input{simgrid_8h} \input{sink__common_8h} \input{sort__data__handles_8h} \input{source__common_8h} \input{starpu__parallel__worker__create_8h} \input{starpu__data__cpy_8h} \input{starpu__debug__helpers_8h} \input{starpu__fxt_8h} \input{starpu__spinlock_8h} \input{starpu__task__insert__utils_8h} \input{tags_8h} \input{task_8h} \input{task__bundle_8h} \input{thread_8h} \input{timing_8h} \input{topology_8h} \input{utils_8h} \input{uthash_8h} \input{write__back_8h} \chapter{StarPU MPI File Documentation} \input{starpu__mpi__cache_8h} \input{starpu__mpi__driver_8h} \input{starpu__mpi__init_8h} \input{starpu__mpi__nmad__backend_8h} \input{starpu__mpi__stats_8h} \input{starpu__mpi__cache__stats_8h} \input{starpu__mpi__early__data_8h} \input{starpu__mpi__mpi_8h} \input{starpu__mpi__nmad__unknown__datatype_8h} \input{starpu__mpi__sync__data_8h} \input{starpu__mpi__comm_8h} \input{starpu__mpi__early__request_8h} \input{starpu__mpi__mpi__backend_8h} \input{starpu__mpi__private_8h} \input{starpu__mpi__tag_8h} \input{starpu__mpi__datatype_8h} \input{starpu__mpi__fxt_8h} \input{starpu__mpi__nmad_8h} \input{starpu__mpi__select__node_8h} \input{starpu__mpi__task__insert_8h} \input{load__balancer__policy_8h} \input{load__data__interface_8h} \input{data__movements__interface_8h} \chapter{StarPU Resource Manager File Documentation} \input{starpurm__private_8h} \end{document} starpu-1.4.9+dfsg/doc/doxygen_web_applications/000077500000000000000000000000001507764646700216355ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_applications/Makefile.am000066400000000000000000000040011507764646700236640ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen_web_applications DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_applications DOX_HTML_DIR = html_web_applications DOX_LATEX_DIR = latex DOX_PDF = starpu_web_applications.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h include $(top_srcdir)/doc/doxy.mk chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_applications/applications_intro.doxy \ ../doxygen/chapters/starpu_applications/vector_scaling.doxy \ ../doxygen/chapters/starpu_applications/code/vector_scal_c.c \ ../doxygen/chapters/starpu_applications/code/vector_scal_c_align.c \ ../doxygen/chapters/starpu_applications/code/vector_scal_cpu.c \ ../doxygen/chapters/starpu_applications/code/vector_scal_starpu.c \ ../doxygen/chapters/starpu_applications/stencil.doxy \ ../doxygen/chapters/starpu_applications/code/stencil5.c \ ../doxygen/chapters/starpu_applications/code/stencil5_starpu.c \ ../doxygen/chapters/starpu_applications/code/stencil5_starpu_mpi.c \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = if STARPU_BUILD_DOC starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ starpu_config.h \ chapters/version.sty \ chapters/version.html endif starpu-1.4.9+dfsg/doc/doxygen_web_applications/Makefile.in000066400000000000000000001042521507764646700237060ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen_web_applications ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen_web_applications DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_applications DOX_HTML_DIR = html_web_applications DOX_LATEX_DIR = latex DOX_PDF = starpu_web_applications.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_applications/applications_intro.doxy \ ../doxygen/chapters/starpu_applications/vector_scaling.doxy \ ../doxygen/chapters/starpu_applications/code/vector_scal_c.c \ ../doxygen/chapters/starpu_applications/code/vector_scal_c_align.c \ ../doxygen/chapters/starpu_applications/code/vector_scal_cpu.c \ ../doxygen/chapters/starpu_applications/code/vector_scal_starpu.c \ ../doxygen/chapters/starpu_applications/stencil.doxy \ ../doxygen/chapters/starpu_applications/code/stencil5.c \ ../doxygen/chapters/starpu_applications/code/stencil5_starpu.c \ ../doxygen/chapters/starpu_applications/code/stencil5_starpu_mpi.c \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ starpu_config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_applications/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen_web_applications/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen_web_applications/chapters/000077500000000000000000000000001507764646700234465ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_applications/chapters/version.html000066400000000000000000000001401507764646700260140ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen_web_applications/chapters/version.sty000066400000000000000000000001131507764646700256670ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen_web_applications/doxygen-config.cfg.in000066400000000000000000000043131507764646700256440ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_applications/applications_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_applications/vector_scaling.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_applications/stencil.doxy \ @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ @top_srcdir@/doc/doxygen/chapters/files.doxy \ @top_srcdir@/doc/doxygen/chapters/api EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen/chapters \ @top_srcdir@/doc/doxygen/chapters/starpu_applications/code INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ HTML_OUTPUT = html_web_applications @INCLUDE_PATH = ../../doc/doxygen/ PROJECT_NAME = "StarPU Handbook - StarPU Applications" ALIASES += "intropage{2} = \mainpage" ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" ALIASES += "foreword = " starpu-1.4.9+dfsg/doc/doxygen_web_applications/refman.tex000066400000000000000000000026071507764646700236340ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % Copyright (C) 2013-2013 Simon Archipoff % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Handbook - StarPU Applications} \setcounter{tocdepth}{2} \input{./title.tex} \chapter{Organization} \label{index} \hypertarget{index}{} \input{index} \chapter{A Vector Scaling Application} \label{VectorScalingApplication} \hypertarget{VectorApplication}{} \input{VectorApplication} \chapter{A Stencil Application} \label{StencilApplication} \hypertarget{StencilApplication}{} \input{StencilApplication} \part{Appendix} \chapter{The GNU Free Documentation License} \label{GNUFreeDocumentationLicense} \hypertarget{GNUFreeDocumentationLicense}{} \input{GNUFreeDocumentationLicense} %\part{Index} %#\addcontentsline{toc}{chapter}{Index} %\printindex \end{document} starpu-1.4.9+dfsg/doc/doxygen_web_basics/000077500000000000000000000000001507764646700204135ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_basics/Makefile.am000066400000000000000000000113721507764646700224530ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen_web_basics DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_basics DOX_HTML_DIR = html_web_basics DOX_LATEX_DIR = latex DOX_PDF = starpu_web_basics.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h include $(top_srcdir)/doc/doxy.mk chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_basics/basics_intro.doxy \ ../doxygen/chapters/starpu_basics/starpu_applications.doxy \ ../doxygen/chapters/starpu_basics/basic_examples.doxy \ ../doxygen/chapters/starpu_basics/scaling_vector_example.doxy \ ../doxygen/chapters/starpu_basics/tasks.doxy \ ../doxygen/chapters/starpu_basics/data_management.doxy \ ../doxygen/chapters/starpu_basics/scheduling.doxy \ ../doxygen/chapters/starpu_basics/examples_sources.doxy \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_c.c \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png if STARPU_BUILD_DOC starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ starpu_config.h \ chapters/version.sty \ chapters/version.html endif starpu-1.4.9+dfsg/doc/doxygen_web_basics/Makefile.in000066400000000000000000001116201507764646700224610ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen_web_basics ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen_web_basics DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_basics DOX_HTML_DIR = html_web_basics DOX_LATEX_DIR = latex DOX_PDF = starpu_web_basics.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_basics/basics_intro.doxy \ ../doxygen/chapters/starpu_basics/starpu_applications.doxy \ ../doxygen/chapters/starpu_basics/basic_examples.doxy \ ../doxygen/chapters/starpu_basics/scaling_vector_example.doxy \ ../doxygen/chapters/starpu_basics/tasks.doxy \ ../doxygen/chapters/starpu_basics/data_management.doxy \ ../doxygen/chapters/starpu_basics/scheduling.doxy \ ../doxygen/chapters/starpu_basics/examples_sources.doxy \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_c.c \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_cpu.c \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_cuda.c \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl.c \ ../doxygen/chapters/starpu_basics/code/basics_vector_scal_opencl_codelet.cl \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ starpu_config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_basics/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen_web_basics/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen_web_basics/chapters/000077500000000000000000000000001507764646700222245ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_basics/chapters/version.html000066400000000000000000000001401507764646700245720ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen_web_basics/chapters/version.sty000066400000000000000000000001131507764646700244450ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen_web_basics/doxygen-config.cfg.in000066400000000000000000000051541507764646700244260ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_basics/basics_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/starpu_applications.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/basic_examples.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/scaling_vector_example.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/tasks.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/data_management.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/scheduling.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/examples_sources.doxy \ @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ @top_srcdir@/doc/doxygen/chapters/files.doxy \ @top_srcdir@/doc/doxygen/chapters/api EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen/chapters \ @top_srcdir@/doc/doxygen/chapters/starpu_basics/code INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ HTML_OUTPUT = html_web_basics @INCLUDE_PATH = ../../doc/doxygen/ PROJECT_NAME = "StarPU Handbook - StarPU Basics" ALIASES += "intropage{2} = \mainpage" ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" ALIASES += "foreword = " starpu-1.4.9+dfsg/doc/doxygen_web_basics/refman.tex000066400000000000000000000036761507764646700224210ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % Copyright (C) 2013-2013 Simon Archipoff % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Handbook - StarPU Basics} \setcounter{tocdepth}{2} \input{./title.tex} \chapter{Organization} \label{index} \hypertarget{index}{} \input{index} \chapter{StarPU Applications} \label{StarPUApplications} \hypertarget{StarPUApplications}{} \input{StarPUApplications} \chapter{Basic Examples} \label{BasicExamples} \hypertarget{BasicExamples}{} \input{BasicExamples} \chapter{Full Source Code for the ’Scaling a Vector’ Example} \label{FullSourceCodeVectorScal} \hypertarget{FullSourceCodeVectorScal}{} \input{FullSourceCodeVectorScal} \chapter{Tasks In StarPU} \label{TasksInStarPU} \hypertarget{TasksInStarPU}{} \input{TasksInStarPU} \chapter{Data Management} \label{DataManagement} \hypertarget{DataManagement}{} \input{DataManagement} \chapter{Scheduling} \label{Scheduling} \hypertarget{Scheduling}{} \input{Scheduling} \chapter{Examples in StarPU Sources} \label{ExamplesInStarPUSources} \hypertarget{ExamplesInStarPUSources}{} \input{ExamplesInStarPUSources} \part{Appendix} \chapter{The GNU Free Documentation License} \label{GNUFreeDocumentationLicense} \hypertarget{GNUFreeDocumentationLicense}{} \input{GNUFreeDocumentationLicense} %\part{Index} %#\addcontentsline{toc}{chapter}{Index} %\printindex \end{document} starpu-1.4.9+dfsg/doc/doxygen_web_extensions/000077500000000000000000000000001507764646700213465ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_extensions/Makefile.am000066400000000000000000000133471507764646700234120ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen_web_extensions DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_extensions DOX_HTML_DIR = html_web_extensions DOX_LATEX_DIR = latex DOX_PDF = starpu_web_extensions.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h include $(top_srcdir)/doc/doxy.mk chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_extensions/extensions_intro.doxy \ ../doxygen/chapters/starpu_extensions/advanced_tasks.doxy \ ../doxygen/chapters/starpu_extensions/advanced_data_management.doxy \ ../doxygen/chapters/starpu_extensions/helpers.doxy \ ../doxygen/chapters/starpu_extensions/debugging_tools.doxy \ ../doxygen/chapters/starpu_extensions/advanced_scheduling.doxy \ ../doxygen/chapters/starpu_extensions/scheduling_contexts.doxy \ ../doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ ../doxygen/chapters/starpu_extensions/cuda_support.doxy \ ../doxygen/chapters/starpu_extensions/opencl_support.doxy \ ../doxygen/chapters/starpu_extensions/max_fpga_support.doxy \ ../doxygen/chapters/starpu_extensions/out_of_core.doxy \ ../doxygen/chapters/starpu_extensions/mpi_support.doxy \ ../doxygen/chapters/starpu_extensions/tcpip_support.doxy \ ../doxygen/chapters/starpu_extensions/transactions.doxy \ ../doxygen/chapters/starpu_extensions/fault_tolerance.doxy \ ../doxygen/chapters/starpu_extensions/fft_support.doxy \ ../doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \ ../doxygen/chapters/starpu_extensions/bubble.doxy \ ../doxygen/chapters/starpu_extensions/parallel_worker.doxy \ ../doxygen/chapters/starpu_extensions/interoperability.doxy \ ../doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy \ ../doxygen/chapters/starpu_extensions/simgrid.doxy \ ../doxygen/chapters/starpu_extensions/code/complex.c \ ../doxygen/chapters/starpu_extensions/code/disk_compute.c \ ../doxygen/chapters/starpu_extensions/code/disk_copy.c \ ../doxygen/chapters/starpu_extensions/code/forkmode.c \ ../doxygen/chapters/starpu_extensions/code/multiformat.c \ ../doxygen/chapters/starpu_extensions/code/simgrid.c \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png if STARPU_BUILD_DOC starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ starpu_config.h \ chapters/version.sty \ chapters/version.html endif starpu-1.4.9+dfsg/doc/doxygen_web_extensions/Makefile.in000066400000000000000000001136111507764646700234160ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen_web_extensions ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen_web_extensions DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_extensions DOX_HTML_DIR = html_web_extensions DOX_LATEX_DIR = latex DOX_PDF = starpu_web_extensions.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_extensions/extensions_intro.doxy \ ../doxygen/chapters/starpu_extensions/advanced_tasks.doxy \ ../doxygen/chapters/starpu_extensions/advanced_data_management.doxy \ ../doxygen/chapters/starpu_extensions/helpers.doxy \ ../doxygen/chapters/starpu_extensions/debugging_tools.doxy \ ../doxygen/chapters/starpu_extensions/advanced_scheduling.doxy \ ../doxygen/chapters/starpu_extensions/scheduling_contexts.doxy \ ../doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ ../doxygen/chapters/starpu_extensions/cuda_support.doxy \ ../doxygen/chapters/starpu_extensions/opencl_support.doxy \ ../doxygen/chapters/starpu_extensions/max_fpga_support.doxy \ ../doxygen/chapters/starpu_extensions/out_of_core.doxy \ ../doxygen/chapters/starpu_extensions/mpi_support.doxy \ ../doxygen/chapters/starpu_extensions/tcpip_support.doxy \ ../doxygen/chapters/starpu_extensions/transactions.doxy \ ../doxygen/chapters/starpu_extensions/fault_tolerance.doxy \ ../doxygen/chapters/starpu_extensions/fft_support.doxy \ ../doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \ ../doxygen/chapters/starpu_extensions/bubble.doxy \ ../doxygen/chapters/starpu_extensions/parallel_worker.doxy \ ../doxygen/chapters/starpu_extensions/interoperability.doxy \ ../doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy \ ../doxygen/chapters/starpu_extensions/simgrid.doxy \ ../doxygen/chapters/starpu_extensions/code/complex.c \ ../doxygen/chapters/starpu_extensions/code/disk_compute.c \ ../doxygen/chapters/starpu_extensions/code/disk_copy.c \ ../doxygen/chapters/starpu_extensions/code/forkmode.c \ ../doxygen/chapters/starpu_extensions/code/multiformat.c \ ../doxygen/chapters/starpu_extensions/code/simgrid.c \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ starpu_config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_extensions/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen_web_extensions/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen_web_extensions/chapters/000077500000000000000000000000001507764646700231575ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_extensions/chapters/version.html000066400000000000000000000001401507764646700255250ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen_web_extensions/chapters/version.sty000066400000000000000000000001131507764646700254000ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen_web_extensions/doxygen-config.cfg.in000066400000000000000000000101631507764646700253550ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_extensions/extensions_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_tasks.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_data_management.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/advanced_scheduling.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_contexts.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_context_hypervisor.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/scheduling_policy_definition.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/cuda_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/opencl_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/max_fpga_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/out_of_core.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/mpi_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/tcpip_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/transactions.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/fault_tolerance.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/fft_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/socl_opencl_extensions.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/bubble.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/parallel_worker.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/interoperability.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/simgrid.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/debugging_tools.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/helpers.doxy \ @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ @top_srcdir@/doc/doxygen/chapters/files.doxy \ @top_srcdir@/doc/doxygen/chapters/api EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen/chapters \ @top_srcdir@/doc/doxygen/chapters/starpu_extensions/code INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ HTML_OUTPUT = html_web_extensions @INCLUDE_PATH = ../../doc/doxygen/ PROJECT_NAME = "StarPU Handbook - StarPU Extensions" ALIASES += "intropage{2} = \mainpage" ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" ALIASES += "foreword = " starpu-1.4.9+dfsg/doc/doxygen_web_extensions/refman.tex000066400000000000000000000070341507764646700233440ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % Copyright (C) 2013-2013 Simon Archipoff % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Handbook - StarPU Extensions} \setcounter{tocdepth}{2} \input{./title.tex} \chapter{Organization} \label{index} \hypertarget{index}{} \input{index} \chapter{Advanced Tasks In StarPU} \label{AdvancedTasksInStarPU} \hypertarget{AdvancedTasksInStarPU}{} \input{AdvancedTasksInStarPU} \chapter{Advanced Data Management} \label{AdvancedDataManagement} \hypertarget{AdvancedDataManagement}{} \input{AdvancedDataManagement} \chapter{Advanced Scheduling} \label{AdvancedScheduling} \hypertarget{AdvancedScheduling}{} \input{AdvancedScheduling} \chapter{Scheduling Contexts} \label{SchedulingContexts} \hypertarget{SchedulingContexts}{} \input{SchedulingContexts} \chapter{Scheduling Context Hypervisor} \label{SchedulingContextHypervisor} \hypertarget{SchedulingContextHypervisor}{} \input{SchedulingContextHypervisor} \chapter{How To Define a New Scheduling Policy} \label{HowToDefineANewSchedulingPolicy} \hypertarget{HowToDefineANewSchedulingPolicy}{} \input{HowToDefineANewSchedulingPolicy} \chapter{CUDA Support} \label{CUDASupport} \hypertarget{CUDASupport}{} \input{CUDASupport} \chapter{OpenCL Support} \label{OpenCLSupport} \hypertarget{OpenCLSupport}{} \input{OpenCLSupport} \chapter{Maxeler FPGA Support} \label{MaxFPGASupport} \hypertarget{MaxFPGASupport}{} \input{MaxFPGASupport} \chapter{Out Of Core} \label{OutOfCore} \hypertarget{OutOfCore}{} \input{OutOfCore} \chapter{MPI Support} \label{MPISupport} \hypertarget{MPISupport}{} \input{MPISupport} \chapter{TCP/IP Support} \label{TCPIPSupport} \hypertarget{TCPIPSupport}{} \input{TCPIPSupport} \chapter{Transactions} \label{Transactions} \hypertarget{Transactions}{} \input{Transactions} \chapter{Fault Tolerance} \label{FaultTolerance} \hypertarget{FaultTolerance}{} \input{FaultTolerance} \chapter{FFT Support} \label{FFTSupport} \hypertarget{FFTSupport}{} \input{FFTSupport} \chapter{SOCL OpenCL Extensions} \label{SOCLOpenclExtensions} \hypertarget{SOCLOpenclExtensions}{} \input{SOCLOpenclExtensions} \chapter{Hierarchical DAGS} \label{HierarchicalDAGS} \hypertarget{HierarchicalDAGS}{} \input{HierarchicalDAGS} \chapter{Parallel Workers} \label{ParallelWorker} \hypertarget{ParallelWorker}{} \input{ParallelWorker} \chapter{Interoperability Support} \label{InteropSupport} \hypertarget{InteropSupport}{} \input{InteroperabilitySupport} \chapter{SimGrid Support} \label{SimGridSupport} \hypertarget{SimGridSupport}{} \input{SimGridSupport} \chapter{Debugging Tools} \label{DebuggingTools} \hypertarget{DebuggingTools}{} \input{DebuggingTools} \chapter{Helpers} \label{Helpers} \hypertarget{Helpers}{} \input{Helpers} \part{Appendix} \chapter{The GNU Free Documentation License} \label{GNUFreeDocumentationLicense} \hypertarget{GNUFreeDocumentationLicense}{} \input{GNUFreeDocumentationLicense} %\part{Index} %#\addcontentsline{toc}{chapter}{Index} %\printindex \end{document} starpu-1.4.9+dfsg/doc/doxygen_web_faq/000077500000000000000000000000001507764646700177165ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_faq/Makefile.am000066400000000000000000000101351507764646700217520ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen_web_faq DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_faq DOX_HTML_DIR = html_web_faq DOX_LATEX_DIR = latex DOX_PDF = starpu_web_faq.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h include $(top_srcdir)/doc/doxy.mk chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_faq/faq_intro.doxy \ ../doxygen/chapters/starpu_faq/check_list_performance.doxy \ ../doxygen/chapters/starpu_faq/faq.doxy \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png if STARPU_BUILD_DOC starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ starpu_config.h \ chapters/version.sty \ chapters/version.html endif starpu-1.4.9+dfsg/doc/doxygen_web_faq/Makefile.in000066400000000000000000001103521507764646700217650ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen_web_faq ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen_web_faq DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_faq DOX_HTML_DIR = html_web_faq DOX_LATEX_DIR = latex DOX_PDF = starpu_web_faq.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_faq/faq_intro.doxy \ ../doxygen/chapters/starpu_faq/check_list_performance.doxy \ ../doxygen/chapters/starpu_faq/faq.doxy \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ starpu_config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_faq/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen_web_faq/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen_web_faq/chapters/000077500000000000000000000000001507764646700215275ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_faq/chapters/version.html000066400000000000000000000001401507764646700240750ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen_web_faq/chapters/version.sty000066400000000000000000000001131507764646700237500ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen_web_faq/doxygen-config.cfg.in000066400000000000000000000041221507764646700237230ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_faq/faq_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_faq/check_list_performance.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_faq/faq.doxy \ @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ @top_srcdir@/doc/doxygen/chapters/files.doxy \ @top_srcdir@/doc/doxygen/chapters/api EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen/chapters INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ @INCLUDE_PATH = ../../doc/doxygen/ HTML_OUTPUT = html_web_faq PROJECT_NAME = "StarPU Handbook - StarPU FAQs" ALIASES += "intropage{2} = \mainpage" ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" ALIASES += "foreword = " starpu-1.4.9+dfsg/doc/doxygen_web_faq/refman.tex000066400000000000000000000027221507764646700217130ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % Copyright (C) 2013-2013 Simon Archipoff % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Handbook - StarPU FAQs} \setcounter{tocdepth}{2} \input{./title.tex} \chapter{Organization} \label{index} \hypertarget{index}{} \input{index} \chapter{Check List When Performance Are Not There} \label{CheckListWhenPerformanceAreNotThere} \hypertarget{CheckListWhenPerformanceAreNotThere}{} \input{CheckListWhenPerformanceAreNotThere} \chapter{Frequently Asked Questions} \label{FrequentlyAskedQuestions} \hypertarget{FrequentlyAskedQuestions}{} \input{FrequentlyAskedQuestions} \part{Appendix} \chapter{The GNU Free Documentation License} \label{GNUFreeDocumentationLicense} \hypertarget{GNUFreeDocumentationLicense}{} \input{GNUFreeDocumentationLicense} %\part{Index} %#\addcontentsline{toc}{chapter}{Index} %\printindex \end{document} starpu-1.4.9+dfsg/doc/doxygen_web_installation/000077500000000000000000000000001507764646700216505ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_installation/Makefile.am000066400000000000000000000104761507764646700237140ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen_web_installation DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_installation DOX_HTML_DIR = html_web_installation DOX_LATEX_DIR = latex DOX_PDF = starpu_web_installation.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h include $(top_srcdir)/doc/doxy.mk chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_installation/installation_intro.doxy \ ../doxygen/chapters/starpu_installation/environment_variables.doxy \ ../doxygen/chapters/starpu_installation/building.doxy \ ../doxygen/chapters/starpu_installation/configure_options.doxy \ ../doxygen/chapters/starpu_installation/configuration_and_initialization.doxy \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png if STARPU_BUILD_DOC starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ starpu_config.h \ chapters/version.sty \ chapters/version.html endif starpu-1.4.9+dfsg/doc/doxygen_web_installation/Makefile.in000066400000000000000000001107461507764646700237260ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen_web_installation ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen_web_installation DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_installation DOX_HTML_DIR = html_web_installation DOX_LATEX_DIR = latex DOX_PDF = starpu_web_installation.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_installation/installation_intro.doxy \ ../doxygen/chapters/starpu_installation/environment_variables.doxy \ ../doxygen/chapters/starpu_installation/building.doxy \ ../doxygen/chapters/starpu_installation/configure_options.doxy \ ../doxygen/chapters/starpu_installation/configuration_and_initialization.doxy \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ starpu_config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_installation/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen_web_installation/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen_web_installation/chapters/000077500000000000000000000000001507764646700234615ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_installation/chapters/version.html000066400000000000000000000001401507764646700260270ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen_web_installation/chapters/version.sty000066400000000000000000000001131507764646700257020ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen_web_installation/doxygen-config.cfg.in000066400000000000000000000045441507764646700256650ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_installation/installation_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_installation/building.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_installation/configure_options.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_installation/environment_variables.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_installation/configuration_and_initialization.doxy \ @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ @top_srcdir@/doc/doxygen/chapters/files.doxy \ @top_srcdir@/doc/doxygen/chapters/api EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen/chapters INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ @INCLUDE_PATH = ../../doc/doxygen/ HTML_OUTPUT = html_web_installation PROJECT_NAME = "StarPU Handbook - StarPU Installation" ALIASES += "intropage{2} = \mainpage" ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" ALIASES += "foreword = " starpu-1.4.9+dfsg/doc/doxygen_web_installation/refman.tex000066400000000000000000000035261507764646700236500ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % Copyright (C) 2013-2013 Simon Archipoff % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Handbook - StarPU Installation} \setcounter{tocdepth}{2} \input{./title.tex} \chapter{Organization} \label{index} \hypertarget{index}{} \input{index} \chapter{Building and Installing StarPU} \label{BuildingAndInstallingStarPU} \hypertarget{BuildingAndInstallingStarPU}{} \input{BuildingAndInstallingStarPU} \chapter{Compilation Configuration} \label{CompilationConfiguration} \hypertarget{CompilationConfiguration}{} \input{CompilationConfiguration} \chapter{Execution Configuration Through Environment Variables} \label{ExecutionConfigurationThroughEnvironmentVariables} \hypertarget{ExecutionConfigurationThroughEnvironmentVariables}{} \input{ExecutionConfigurationThroughEnvironmentVariables} \chapter{Configuration and initialization} \label{ConfigurationAndInitialization} \hypertarget{ConfigurationAndInitialization}{} \input{ConfigurationAndInitialization} \part{Appendix} \chapter{The GNU Free Documentation License} \label{GNUFreeDocumentationLicense} \hypertarget{GNUFreeDocumentationLicense}{} \input{GNUFreeDocumentationLicense} %\part{Index} %#\addcontentsline{toc}{chapter}{Index} %\printindex \end{document} starpu-1.4.9+dfsg/doc/doxygen_web_introduction/000077500000000000000000000000001507764646700216705ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_introduction/Makefile.am000066400000000000000000000101421507764646700237220ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen_web_introduction DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_introduction DOX_HTML_DIR = html_web_introduction DOX_LATEX_DIR = latex DOX_PDF = starpu_web_introduction.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h include $(top_srcdir)/doc/doxy.mk chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_introduction/introduction_intro.doxy \ ../doxygen/chapters/starpu_introduction/glossary.doxy \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png if STARPU_BUILD_DOC starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ starpu_config.h \ chapters/version.sty \ chapters/version.html endif starpu-1.4.9+dfsg/doc/doxygen_web_introduction/Makefile.in000066400000000000000000001104121507764646700237340ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen_web_introduction ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen_web_introduction DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_introduction DOX_HTML_DIR = html_web_introduction DOX_LATEX_DIR = latex DOX_PDF = starpu_web_introduction.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_introduction/introduction_intro.doxy \ ../doxygen/chapters/starpu_introduction/glossary.doxy \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ starpu_config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_introduction/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen_web_introduction/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen_web_introduction/chapters/000077500000000000000000000000001507764646700235015ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_introduction/chapters/version.html000066400000000000000000000001401507764646700260470ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen_web_introduction/chapters/version.sty000066400000000000000000000001131507764646700257220ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen_web_introduction/doxygen-config.cfg.in000066400000000000000000000040411507764646700256750ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_introduction/introduction_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_introduction/glossary.doxy \ @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ @top_srcdir@/doc/doxygen/chapters/files.doxy \ @top_srcdir@/doc/doxygen/chapters/api EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen/chapters INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ @INCLUDE_PATH = ../../doc/doxygen/ HTML_OUTPUT = html_web_introduction PROJECT_NAME = "StarPU Handbook - StarPU Introduction" ALIASES += "intropage{2} = \mainpage" ALIASES += "foreword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html" ALIASES += "webforeword = \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" starpu-1.4.9+dfsg/doc/doxygen_web_introduction/refman.tex000066400000000000000000000023271507764646700236660ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % Copyright (C) 2013-2013 Simon Archipoff % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Handbook - StarPU Introduction} \setcounter{tocdepth}{2} \input{./title.tex} \chapter{Organization} \label{index} \hypertarget{index}{} \input{index} \chapter{Glossary} \label{Glossary} \hypertarget{Glossary}{} \input{Glossary} \part{Appendix} \chapter{The GNU Free Documentation License} \label{GNUFreeDocumentationLicense} \hypertarget{GNUFreeDocumentationLicense}{} \input{GNUFreeDocumentationLicense} %\part{Index} %#\addcontentsline{toc}{chapter}{Index} %\printindex \end{document} starpu-1.4.9+dfsg/doc/doxygen_web_languages/000077500000000000000000000000001507764646700211155ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_languages/Makefile.am000066400000000000000000000107021507764646700231510ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen_web_languages DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_languages DOX_HTML_DIR = html_web_languages DOX_LATEX_DIR = latex DOX_PDF = starpu_web_languages.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h include $(top_srcdir)/doc/doxy.mk chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_languages/languages_intro.doxy \ ../doxygen/chapters/starpu_languages/native_fortran_support.doxy \ ../doxygen/chapters/starpu_languages/java.doxy \ ../doxygen/chapters/starpu_languages/python.doxy \ ../doxygen/chapters/starpu_languages/openmp_runtime_support.doxy \ ../doxygen/chapters/starpu_languages/code/nf_initexit.f90 \ ../doxygen/chapters/starpu_languages/code/java_starpu.java \ ../doxygen/chapters/starpu_languages/code/java_spark.java \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png if STARPU_BUILD_DOC starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ starpu_config.h \ chapters/version.sty \ chapters/version.html endif starpu-1.4.9+dfsg/doc/doxygen_web_languages/Makefile.in000066400000000000000000001111411507764646700231610ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen_web_languages ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen_web_languages DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_languages DOX_HTML_DIR = html_web_languages DOX_LATEX_DIR = latex DOX_PDF = starpu_web_languages.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_languages/languages_intro.doxy \ ../doxygen/chapters/starpu_languages/native_fortran_support.doxy \ ../doxygen/chapters/starpu_languages/java.doxy \ ../doxygen/chapters/starpu_languages/python.doxy \ ../doxygen/chapters/starpu_languages/openmp_runtime_support.doxy \ ../doxygen/chapters/starpu_languages/code/nf_initexit.f90 \ ../doxygen/chapters/starpu_languages/code/java_starpu.java \ ../doxygen/chapters/starpu_languages/code/java_spark.java \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ starpu_config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_languages/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen_web_languages/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen_web_languages/chapters/000077500000000000000000000000001507764646700227265ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_languages/chapters/version.html000066400000000000000000000001401507764646700252740ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen_web_languages/chapters/version.sty000066400000000000000000000001131507764646700251470ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen_web_languages/doxygen-config.cfg.in000066400000000000000000000046231507764646700251300ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_languages/languages_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/native_fortran_support.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/java.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/python.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/openmp_runtime_support.doxy \ @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ @top_srcdir@/doc/doxygen/chapters/files.doxy \ @top_srcdir@/doc/doxygen/chapters/api EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen/chapters \ @top_srcdir@/doc/doxygen/chapters/starpu_languages/code INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ @INCLUDE_PATH = ../../doc/doxygen/ HTML_OUTPUT = html_web_languages PROJECT_NAME = "StarPU Handbook - StarPU Language Bindings" ALIASES += "intropage{2} = \mainpage" ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" ALIASES += "foreword = " starpu-1.4.9+dfsg/doc/doxygen_web_languages/refman.tex000066400000000000000000000032121507764646700231050ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % Copyright (C) 2013-2013 Simon Archipoff % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Handbook - StarPU Language Bindings} \setcounter{tocdepth}{2} \input{./title.tex} \chapter{Organization} \label{index} \hypertarget{index}{} \input{index} \chapter{Native Fortran Support} \label{NativeFortranSupport} \hypertarget{NativeFortranSupport}{} \input{NativeFortranSupport} \chapter{StarPU Java Interface} \label{StarPUJavaInterface} \hypertarget{StarPUJavaInterface}{} \input{StarPUJavaInterface} \chapter{Python Interface} \label{PythonInterface} \hypertarget{PythonInterface}{} \input{PythonInterface} \chapter{The StarPU OpenMP Runtime Support (SORS)} \label{OpenMPRuntimeSupport} \hypertarget{OpenMPRuntimeSupport}{} \input{OpenMPRuntimeSupport} \part{Appendix} \chapter{The GNU Free Documentation License} \label{GNUFreeDocumentationLicense} \hypertarget{GNUFreeDocumentationLicense}{} \input{GNUFreeDocumentationLicense} %\part{Index} %#\addcontentsline{toc}{chapter}{Index} %\printindex \end{document} starpu-1.4.9+dfsg/doc/doxygen_web_performances/000077500000000000000000000000001507764646700216335ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_performances/Makefile.am000066400000000000000000000104151507764646700236700ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOX_DIR = $(top_builddir)/doc/doxygen_web_performances DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_performances DOX_HTML_DIR = html_web_performances DOX_LATEX_DIR = latex DOX_PDF = starpu_web_performances.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h include $(top_srcdir)/doc/doxy.mk chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_performances/performances_intro.doxy \ ../doxygen/chapters/starpu_performances/benchmarking_starpu.doxy \ ../doxygen/chapters/starpu_performances/online_performance_tools.doxy \ ../doxygen/../doxygen/chapters/starpu_performances/offline_performance_tools.doxy \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png if STARPU_BUILD_DOC starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ dox_inputs = $(DOX_CONFIG) \ $(chapters) \ starpu_config.h \ chapters/version.sty \ chapters/version.html endif starpu-1.4.9+dfsg/doc/doxygen_web_performances/Makefile.in000066400000000000000000001106651507764646700237110ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@am__append_1 = $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@am__append_2 = $(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_3 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@am__append_4 = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@am__append_5 = \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html subdir = doc/doxygen_web_performances ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = doxygen-config.cfg CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(srcdir)/doxygen-config.cfg.in $(top_srcdir)/doc/doxy.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ DOX_DIR = $(top_builddir)/doc/doxygen_web_performances DOX_CONFIG = $(top_srcdir)/doc/doxygen.cfg DOX_MAIN_DIR = doxygen_web_performances DOX_HTML_DIR = html_web_performances DOX_LATEX_DIR = latex DOX_PDF = starpu_web_performances.pdf DOX_TAG = starpu.tag DOX_STARPU_CONFIG = starpu_config.h # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DOXYGEN = doxygen PDFLATEX = pdflatex MAKEINDEX = makeindex txtdir = $(docdir)/manual EXTRA_DIST = $(am__append_1) $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) refman.tex $(chapters) \ $(images) @STARPU_AVAILABLE_DOC_PDF_TRUE@@STARPU_BUILD_DOC_FALSE@txt_DATA = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@txt_DATA = $(DOX_DIR)/$(DOX_PDF) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@DOX_HTML_SRCDIR = $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@DOX_HTML_SRCDIR = $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@CLEANFILES = $(DOX_TAG) $(DOX_STARPU_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ -r \ @STARPU_BUILD_DOC_TRUE@ $(DOX_HTML_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_LATEX_DIR) \ @STARPU_BUILD_DOC_TRUE@ $(DOX_DIR)/$(DOX_PDF) chapters = \ ../doxygen/chapters/foreword.doxy \ ../doxygen/chapters/starpu_performances/performances_intro.doxy \ ../doxygen/chapters/starpu_performances/benchmarking_starpu.doxy \ ../doxygen/chapters/starpu_performances/online_performance_tools.doxy \ ../doxygen/../doxygen/chapters/starpu_performances/offline_performance_tools.doxy \ ../doxygen/chapters/files.doxy \ ../doxygen/chapters/fdl_1_3.doxy images = \ ../doxygen/chapters/images/arbiter.png \ ../doxygen/chapters/images/data_trace.png \ ../doxygen/chapters/images/distrib_data.png \ ../doxygen/chapters/images/distrib_data_histo.png \ ../doxygen/chapters/images/paje_draw_histogram.png \ ../doxygen/chapters/images/parallel_worker2.png \ ../doxygen/chapters/images/runtime-par.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_2.png \ ../doxygen/chapters/images/starpu_starpu_slu_lu_model_11.png \ ../doxygen/chapters/images/starpu_chol_model_11_type.png \ ../doxygen/chapters/images/tasks_size_overhead.png \ ../doxygen/chapters/images/temanejo.png \ ../doxygen/chapters/images/eclipse_installer.png \ ../doxygen/chapters/images/eclipse_install_cdt.png \ ../doxygen/chapters/images/eclipse_hello_build.png \ ../doxygen/chapters/images/eclipse_hello_run.png \ ../doxygen/chapters/images/eclipse_hello_fxt.png \ ../doxygen/chapters/images/eclipse_hello_graph.png \ ../doxygen/chapters/images/eclipse_hello_vite.png \ ../doxygen/chapters/images/eclipse_hello_svg_graph.png \ ../doxygen/chapters/images/eclipse_hello_plugin.png \ ../doxygen/chapters/images/eclipse_hello_paje_trace.png \ ../doxygen/chapters/images/eclipse_hello_hgraph.png \ ../doxygen/chapters/images/eclipse_install_pde.png \ ../doxygen/chapters/images/starpu_gflops_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_log_arr.png \ ../doxygen/chapters/images/starpu_log_list.png \ ../doxygen/chapters/images/starpu_non_linear_memset_regression_based_energy.png \ ../doxygen/chapters/images/starpu_power_non_linear_memset_regression_based.png \ ../doxygen/chapters/images/starvz_visu.png \ ../doxygen/chapters/images/starvz_visu_r.png \ ../doxygen/chapters/images/trace_bw_heatmap.png \ ../doxygen/chapters/images/trace_recv_use.png \ ../doxygen/chapters/images/trace_send_use.png \ ../doxygen/chapters/images/trace_volume_heatmap.png \ ../doxygen/chapters/images/starpupy_handle_func_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_perf_pickle.png \ ../doxygen/chapters/images/starpupy_handle_func_perf.png \ ../doxygen/chapters/images/starpupy_handle_perf.png \ ../doxygen/chapters/images/tasks_size_overhead_py_fut_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_futur.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle_pickle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_handle.png \ ../doxygen/chapters/images/tasks_size_overhead_py_none.png \ ../doxygen/chapters/images/tasks_size_overhead_py_noret_pickle.png @STARPU_BUILD_DOC_TRUE@dox_inputs = $(DOX_CONFIG) \ @STARPU_BUILD_DOC_TRUE@ $(chapters) \ @STARPU_BUILD_DOC_TRUE@ starpu_config.h \ @STARPU_BUILD_DOC_TRUE@ chapters/version.sty \ @STARPU_BUILD_DOC_TRUE@ chapters/version.html all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/doc/doxy.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/doxygen_web_performances/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign doc/doxygen_web_performances/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/doc/doxy.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): doxygen-config.cfg: $(top_builddir)/config.status $(srcdir)/doxygen-config.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_FALSE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) uninstall-hook .MAKE: install-am install-exec-am install-strip uninstall-am .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-hook \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip install-txtDATA installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-hook uninstall-txtDATA .PRECIOUS: Makefile @STARPU_BUILD_DOC_PDF_TRUE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_PDF_FALSE@@STARPU_BUILD_DOC_TRUE@all: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@install-exec-hook: $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_BUILD_DOC_TRUE@uninstall-hook: @STARPU_BUILD_DOC_TRUE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@install-exec-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @$(MKDIR_P) $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @(cd $(DOX_HTML_SRCDIR) && $(PROG_FIND) . -type f -exec $(INSTALL_DATA) {} $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) \;) @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@uninstall-hook: @STARPU_AVAILABLE_DOC_TRUE@@STARPU_BUILD_DOC_FALSE@ @rm -rf $(DESTDIR)$(docdir)/manual/$(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@chapters/version.sty: $(chapters) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_sty @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%F" > timestamp_sty_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_sty` +"%B %Y" > timestamp_sty_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_sty_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{'`cat timestamp_sty_updated`'}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo ':newcommand{:STARPUUPDATED}{unknown date}' > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo ':newcommand{:STARPUVERSION}{$(VERSION)}' >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/:/\\/g' $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_sty timestamp_sty_updated timestamp_sty_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@chapters/version.html: $(chapters) $(images) @STARPU_BUILD_DOC_TRUE@ @for f in $(chapters) ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; then $(PROG_STAT) --format=%Y $(top_srcdir)/doc/$(DOX_MAIN_DIR)/$$f ; fi \ @STARPU_BUILD_DOC_TRUE@ done | sort -r | head -1 > timestamp_html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html ; then \ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%F" > timestamp_html_updated ;\ @STARPU_BUILD_DOC_TRUE@ LC_ALL=C $(PROG_DATE) --date=@`cat timestamp_html` +"%B %Y" > timestamp_html_updated_month ;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @echo "This manual documents the version $(VERSION) of StarPU." > $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html @STARPU_BUILD_DOC_TRUE@ @if test -s timestamp_html_updated ; then \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on "`cat timestamp_html_updated`"." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ echo "Its contents was last updated on unknown_date." >> $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.html;\ @STARPU_BUILD_DOC_TRUE@ fi @STARPU_BUILD_DOC_TRUE@ @for f in timestamp_html timestamp_html_updated timestamp_html_updated_month ; do \ @STARPU_BUILD_DOC_TRUE@ if test -f $$f ; then $(RM) $$f ; fi ;\ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@doxy: @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@$(DOX_HTML_DIR): $(DOX_TAG) @STARPU_BUILD_DOC_TRUE@ @$(MKDIR_P) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_TAG): $(dox_inputs) @STARPU_BUILD_DOC_TRUE@ @rm -fr $(DOX_HTML_DIR) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @$(DOXYGEN) $(DOX_CONFIG) @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/ModuleDocumentation <\/li>/Modules<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/DocOrganization.html ; then $(SED) -i 's/FileDocumentation <\/li>/Files<\/a>/' $(DOX_HTML_DIR)/DocOrganization.html ; fi @STARPU_BUILD_DOC_TRUE@ # comment for the line below: what we really want to do is to remove the line, but dy doing so, it avoids opening the interactive menu when browsing files @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_HTML_DIR)/navtreedata.js ; then $(SED) -i 's/\[ "Files", "Files.html", null \]/\[ "", "Files.html", null \]/' $(DOX_HTML_DIR)/navtreedata.js ; fi @STARPU_BUILD_DOC_TRUE@ @$(SED) -i 's/.*"Files.html".*//' $(DOX_HTML_DIR)/pages.html @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/main.tex ; then mv $(DOX_LATEX_DIR)/main.tex $(DOX_LATEX_DIR)/index.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then $(SED) -i '/\\begin{titlepage}/,$$d' $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ @if test -f $(DOX_LATEX_DIR)/refman.tex ; then cat $(top_srcdir)/doc/$(DOX_MAIN_DIR)/refman.tex >> $(DOX_LATEX_DIR)/refman.tex ; fi @STARPU_BUILD_DOC_TRUE@ $(top_srcdir)/doc/sectionNumbering.py $(top_builddir)/doc/$(DOX_MAIN_DIR) $(DOX_HTML_DIR) @STARPU_BUILD_DOC_TRUE@$(DOX_DIR)/$(DOX_PDF): $(DOX_TAG) refman.tex $(images) @STARPU_BUILD_DOC_TRUE@ $(MKDIR_P) $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/chapters/version.sty $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @cp $(top_srcdir)/doc/title.tex $(DOX_LATEX_DIR) @STARPU_BUILD_DOC_TRUE@ @if test -f $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex ; then cp $(top_srcdir)/doc/$(DOX_MAIN_DIR)/modules.tex $(DOX_LATEX_DIR) ; fi @STARPU_BUILD_DOC_TRUE@ @echo $(PDFLATEX) $(DOX_LATEX_DIR)/refman.tex @STARPU_BUILD_DOC_TRUE@ @cd $(DOX_LATEX_DIR) ;\ @STARPU_BUILD_DOC_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out ;\ @STARPU_BUILD_DOC_TRUE@ for f in group__API__* ; do sed -i '1 i \\\clearpage' $$f ; done ;\ @STARPU_BUILD_DOC_TRUE@ if test -f ExecutionConfigurationThroughEnvironmentVariables.tex ; then $(SED) -i -e 's/__env__/\\_Environment Variables!/' -e 's/\\-\\_\\-\\-\\_\\-env\\-\\_\\-\\-\\_\\-//' ExecutionConfigurationThroughEnvironmentVariables.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f CompilationConfiguration.tex ; then $(SED) -i -e 's/__configure__/\\_Configure Options!/' -e 's/\\-\\_\\-\\-\\_\\-configure\\-\\_\\-\\-\\_\\-//' CompilationConfiguration.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item Module\\.Documentation/\\item \\hyperlink{ModuleDocumentation}{Module Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ if test -f DocOrganization.tex ; then $(SED) -i s'/\\item File\\.Documentation/\\item \\hyperlink{FileDocumentation}{File Documentation}/' DocOrganization.tex ; fi ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ ! < refman.log grep -v group__ | grep -v _amgrp | grep -v deprecated__ | grep "multiply defined" || exit 1 ;\ @STARPU_BUILD_DOC_TRUE@ $(MAKEINDEX) refman.idx ;\ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex ;\ @STARPU_BUILD_DOC_TRUE@ for i in $(shell seq 1 5); do \ @STARPU_BUILD_DOC_TRUE@ if $(EGREP) 'Rerun (LaTeX|to get cross-references right)' refman.log > /dev/null 2>&1; then \ @STARPU_BUILD_DOC_TRUE@ max_print_line=1000000 $(PDFLATEX) -interaction batchmode refman.tex; \ @STARPU_BUILD_DOC_TRUE@ else \ @STARPU_BUILD_DOC_TRUE@ break ; \ @STARPU_BUILD_DOC_TRUE@ fi; \ @STARPU_BUILD_DOC_TRUE@ done @STARPU_BUILD_DOC_TRUE@ mv $(DOX_LATEX_DIR)/refman.pdf $(DOX_DIR)/$(DOX_PDF) @STARPU_BUILD_DOC_TRUE@starpu_config.h: $(top_srcdir)/include/starpu_config.h.in @STARPU_BUILD_DOC_TRUE@ @$(SED) 's/#undef \(.*\)/#define \1 1/' $< > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/doc/doxygen_web_performances/chapters/000077500000000000000000000000001507764646700234445ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/doxygen_web_performances/chapters/version.html000066400000000000000000000001401507764646700260120ustar00rootroot00000000000000This manual documents the version 1.4.9 of StarPU. Its contents was last updated on 2025-10-24. starpu-1.4.9+dfsg/doc/doxygen_web_performances/chapters/version.sty000066400000000000000000000001131507764646700256650ustar00rootroot00000000000000\newcommand{\STARPUUPDATED}{2025-10-24} \newcommand{\STARPUVERSION}{1.4.9} starpu-1.4.9+dfsg/doc/doxygen_web_performances/doxygen-config.cfg.in000066400000000000000000000044061507764646700256450ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # Copyright (C) 2011-2011 Télécom Sud Paris # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # INPUT = @top_srcdir@/doc/doxygen/chapters/starpu_performances/performances_intro.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_performances/benchmarking_starpu.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_performances/online_performance_tools.doxy \ @top_srcdir@/doc/doxygen/chapters/starpu_performances/offline_performance_tools.doxy \ @top_srcdir@/doc/doxygen/chapters/fdl_1_3.doxy \ @top_srcdir@/doc/doxygen/chapters/files.doxy \ @top_srcdir@/doc/doxygen/chapters/api EXAMPLE_PATH = @top_srcdir@/doc/doxygen \ @top_srcdir@/doc/doxygen/chapters INPUT_FILTER = @top_builddir@/doc/doxygen/doxygen_filter.sh #LATEX_HEADER = @top_srcdir@/doc/doxygen/refman.tex IMAGE_PATH = @top_srcdir@/doc/doxygen/chapters/images GENERATE_LATEX = @DOC_GENERATE_LATEX@ @INCLUDE_PATH = ../../doc/doxygen/ HTML_OUTPUT = html_web_performances PROJECT_NAME = "StarPU Handbook - StarPU Performances" ALIASES += "intropage{2} = \mainpage" ALIASES += "webforeword = \htmlonly

    Foreword

    \endhtmlonly \htmlinclude version.html \htmlinclude foreword.html \htmlonly This is a sub-part of the StarPU documentation, go here to read the whole documentation. \endhtmlonly" ALIASES += "foreword = " starpu-1.4.9+dfsg/doc/doxygen_web_performances/refman.tex000066400000000000000000000030261507764646700236260ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % Copyright (C) 2013-2013 Simon Archipoff % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \newcommand\starputitle{StarPU Handbook - StarPU Performances} \setcounter{tocdepth}{2} \input{./title.tex} \chapter{Organization} \label{index} \hypertarget{index}{} \input{index} \chapter{Benchmarking StarPU} \label{BenchmarkingStarPU} \hypertarget{BenchmarkingStarPU}{} \input{BenchmarkingStarPU} \chapter{Online Performance Tools} \label{OnlinePerformanceTools} \hypertarget{OnlinePerformanceTools}{} \input{OnlinePerformanceTools} \chapter{Offline Performance Tools} \label{OfflinePerformanceTools} \hypertarget{OfflinePerformanceTools}{} \input{OfflinePerformanceTools} \part{Appendix} \chapter{The GNU Free Documentation License} \label{GNUFreeDocumentationLicense} \hypertarget{GNUFreeDocumentationLicense}{} \input{GNUFreeDocumentationLicense} %\part{Index} %#\addcontentsline{toc}{chapter}{Index} %\printindex \end{document} starpu-1.4.9+dfsg/doc/extractHeadline.sh000077500000000000000000000027741507764646700202320ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # root=$(dirname $0) ( echo "** Full documentation" echo " - [[./starpu.pdf][PDF]] - [[./html/][HTML]]" echo "** Parts of the documentation" for doc in doxygen_web_introduction doxygen_web_installation doxygen_web_basics doxygen_web_applications doxygen_web_performances doxygen_web_faq doxygen_web_languages doxygen_web_extensions do x=$(echo $doc | sed 's/.*_web_//') if test -f $root/doxygen/chapters/starpu_$x/${x}_intro.doxy then headline=$(grep -A2 webforeword $root/doxygen/chapters/starpu_$x/${x}_intro.doxy | tail -1) echo "- $x" if test -n "$headline" then echo " - $headline" fi echo " - [[./starpu_web_$x.pdf][PDF]] - [[./html_web_$x/][HTML]]" fi done echo "** Developers documentation" echo " - [[./starpu_dev.pdf][PDF]] - [[./html_dev/][HTML]]" ) > ./README.org starpu-1.4.9+dfsg/doc/fixLinks.sh000077500000000000000000000032651507764646700167110ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # root=$(dirname $0) root_src=$root root_build=$1 files=$(find $root_build -name "*html") if test "$files" == "" then # there is no html files to process exit fi for d in $root_src/doxygen/chapters/starpu_* do for f in $(find $d -name "*.doxy") do #echo $f part=$(basename $(dirname $f)) link=$(grep -F "\page" $f | awk '{print $3}') if test -z "$link" then continue fi x1=$(echo $part | sed 's/starpu/doxygen_web/') x2=$(echo $part | sed 's/starpu/html_web/') title=$(grep -F "\page" $f | sed 's;..! .page '$link';;') #echo $part #echo $link #echo $f #echo $title # we replace the link with the correct link in the installation directory, it will not work in the build directory # there we would have to use ../../$x1/$x2/${link}.html for ff in $(grep -lrs "Chapter $link" $(find $root_build -name "*html")) do script=$(mktemp) echo "sed -i 's;Chapter "$link";Chapter "$title";' $ff" > $script . $script done done done starpu-1.4.9+dfsg/doc/sectionNumbering.py000077500000000000000000000051021507764646700204430ustar00rootroot00000000000000#!/usr/bin/python3 # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import os import operator import sys files = {} with open(sys.argv[1]+"/doxygen-config.cfg", "r", encoding="utf-8") as fin: for line in fin.readlines(): if ".doxy" in line and not "foreword.doxy" in line: for x in line.split(" "): xx = x.strip() if ".doxy" in xx: with open(xx, "r", encoding="utf-8") as fin: for line in fin.readlines(): if "\page" in line: line = line.replace("/*! \page ", "").strip() files[xx] = line[0:line.index(" ")]+".html" htmlfiles = ["index.html"] htmlfiles.extend(files.values()) htmldir=sys.argv[2]+"/" chapter=0 for x in htmlfiles: chapter+=1 section=0 with open(htmldir+x, "r", encoding="utf-8") as fin: with open(htmldir+x+".count.html", "w", encoding="utf-8") as fout: for line in fin.readlines(): if not "Foreword" in line: if "
    " in line: line = line.replace("
    ", "
    "+str(chapter)+". ") if "

    " in line: section += 1 line = line.replace("

    ", "

    " + str(chapter) + "." + str(section)) subsection = 0 if "

    " in line: subsection += 1 line = line.replace("

    ", "

    " + str(chapter) + "." + str(section) + "." + str(subsection)) subsubsection = 0 if "

    " in line: subsubsection += 1 line = line.replace("

    ", "

    " + str(chapter) + "." + str(section) + "." + str(subsection) + "." + str(subsubsection)) fout.write(line) os.rename(htmldir+x+".count.html", htmldir+x) starpu-1.4.9+dfsg/doc/title.tex000066400000000000000000000033111507764646700164160ustar00rootroot00000000000000% StarPU --- Runtime system for heterogeneous multicore architectures. % % Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria % % StarPU is free software; you can redistribute it and/or modify % it under the terms of the GNU Lesser General Public License as published by % the Free Software Foundation; either version 2.1 of the License, or (at % your option) any later version. % % StarPU is distributed in the hope that it will be useful, but % WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. % % See the GNU Lesser General Public License in COPYING.LGPL for more details. % \input{./version.sty} \setlength{\parskip}{0pt} \begin{titlepage} \vspace*{4cm} {\Huge \textbf{\starputitle}}\\ \rule{\textwidth}{1.5mm} \begin{flushright} {\Large for StarPU \STARPUVERSION} \end{flushright} \rule{\textwidth}{1mm} ~\\ \vspace*{15cm} \begin{flushright} Generated by Doxygen. \end{flushright} \end{titlepage} \begin{figure}[p] This manual documents the usage of StarPU version \STARPUVERSION. Its contents was last updated on \STARPUUPDATED.\\ Copyright © 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria \medskip \begin{quote} Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included in the section entitled “GNU Free Documentation License”. \end{quote} \end{figure} \pagenumbering{roman} \tableofcontents \pagenumbering{arabic} \hypersetup{pageanchor=true,citecolor=blue} starpu-1.4.9+dfsg/doc/tutorial/000077500000000000000000000000001507764646700164205ustar00rootroot00000000000000starpu-1.4.9+dfsg/doc/tutorial/Makefile000066400000000000000000000027051507764646700200640ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # CFLAGS += $$(pkg-config --cflags starpu-1.4) LDLIBS += $$(pkg-config --libs starpu-1.4) HAS_CUDA = $(shell starpu_config | grep 'STARPU_USE_CUDA 1') NVCC ?= nvcc -std=c++11 HAS_OPENCL = $(shell starpu_config | grep 'STARPU_USE_OPENCL 1') %.o: %.cu $(NVCC) $(CFLAGS) $< -c TARGETS = hello_world vector_scal all: $(TARGETS) VECTOR_SCAL_PREREQUISITES = vector_scal.o vector_scal_cpu.o ifneq ($(strip $(HAS_CUDA)),) VECTOR_SCAL_PREREQUISITES += vector_scal_cuda.o VECTOR_SCAL_COMPILER = $(NVCC) else VECTOR_SCAL_COMPILER = $(CC) endif ifneq ($(strip $(HAS_OPENCL)),) VECTOR_SCAL_PREREQUISITES += vector_scal_opencl.o LDLIBS += -lOpenCL endif vector_scal: $(VECTOR_SCAL_PREREQUISITES) $(VECTOR_SCAL_COMPILER) $^ $(LDLIBS) -o $@ $(LDFLAGS) clean: rm -f $(TARGETS) *.o starpu-1.4.9+dfsg/doc/tutorial/README000066400000000000000000000023661507764646700173070ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # Instructions on how to compile and run StarPU examples ------------------------------------------------------ % export STARPU_DIR= % export PATH=$PATH:$STARPU_DIR/bin % export PKG_CONFIG_PATH=$STARPU_DIR/lib/pkgconfig:$PKG_CONFIG_PATH % export LD_LIBRARY_PATH=$STARPU_DIR/lib:$LD_LIBRARY_PATH % starpu_machine_display % make hello_world % ./hello_world % make vector_scal % ./vector_scal % STARPU_NCPU=0 ./vector_scal % STARPU_NCPU=0 STARPU_NCUDA=0 ./vector_scal % STARPU_NCPU=0 STARPU_NOPENCL=0 ./vector_scal % STARPU_NOPENCL=0 STARPU_NCUDA=0 ./vector_scal starpu-1.4.9+dfsg/doc/tutorial/hello_world.c000066400000000000000000000033521507764646700211010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include struct params { int i; float f; }; void cpu_func(void *buffers[], void *cl_arg) { struct params *params = cl_arg; printf("Hello world (params = {%i, %f})\n", params->i, params->f); } struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .nbuffers = 0 }; void callback_func(void *callback_arg) { printf("Callback function (arg %p)\n", callback_arg); } int main(int argc, char **argv) { int ret; /* initialize StarPU */ ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task *task = starpu_task_create(); task->cl = &cl; /* Pointer to the codelet defined above */ struct params params = { 1, 2.0f }; task->cl_arg = ¶ms; task->cl_arg_size = sizeof(params); task->callback_func = callback_func; task->callback_arg = (void*) (uintptr_t) 0x42; /* starpu_task_submit will be a blocking call */ task->synchronous = 1; /* submit the task to StarPU */ ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* terminate StarPU */ starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/doc/tutorial/hello_world_msvc.c000066400000000000000000000034401507764646700221270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include struct params { int i; float f; }; void cpu_func(void *buffers[], void *cl_arg) { struct params *params = cl_arg; printf("Hello world (params = {%i, %f})\n", params->i, params->f); } void callback_func(void *callback_arg) { printf("Callback function (arg %p)\n", callback_arg); } int main(int argc, char **argv) { int ret; struct starpu_codelet cl; struct starpu_task *task; struct params params; starpu_codelet_init(&cl); cl.cpu_funcs[0] = cpu_func; cl.nbuffers = 0; /* initialize StarPU */ ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); task = starpu_task_create(); task->cl = &cl; /* Pointer to the codelet defined above */ params.i = 1; params.f = 2.0f; task->cl_arg = ¶ms; task->cl_arg_size = sizeof(params); task->callback_func = callback_func; task->callback_arg = (void*) (uintptr_t) 0x42; /* starpu_task_submit will be a blocking call */ task->synchronous = 1; /* submit the task to StarPU */ ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* terminate StarPU */ starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/doc/tutorial/vector_scal.c000066400000000000000000000076031507764646700210760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example demonstrates how to use StarPU to scale an array by a factor. * It shows how to manipulate data with StarPU's data management library. * 1- how to declare a piece of data to StarPU (starpu_vector_data_register) * 2- how to submit a task to StarPU * 3- how a kernel can manipulate the data (buffers[0].vector.ptr) */ #include #define NX 2048 extern void vector_scal_cpu(void *buffers[], void *_args); extern void vector_scal_cuda(void *buffers[], void *_args); extern void vector_scal_opencl(void *buffers[], void *_args); static struct starpu_codelet cl = { /* CPU implementation of the codelet */ .cpu_funcs = {vector_scal_cpu}, #ifdef STARPU_USE_CUDA /* CUDA implementation of the codelet */ .cuda_funcs = {vector_scal_cuda}, #endif #ifdef STARPU_USE_OPENCL /* OpenCL implementation of the codelet */ .opencl_funcs = {vector_scal_opencl}, #endif .nbuffers = 1, .modes = {STARPU_RW} }; #ifdef STARPU_USE_OPENCL struct starpu_opencl_program programs; #endif int main(int argc, char **argv) { /* We consider a vector of float that is initialized just as any of C * data */ float vector[NX]; unsigned i; for (i = 0; i < NX; i++) vector[i] = 1.0f; fprintf(stderr, "BEFORE : First element was %f\n", vector[0]); /* Initialize StarPU with default configuration */ int ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL starpu_opencl_load_opencl_from_file("vector_scal_opencl_kernel.cl", &programs, NULL); #endif /* Tell StaPU to associate the "vector" vector with the "vector_handle" * identifier. When a task needs to access a piece of data, it should * refer to the handle that is associated to it. * In the case of the "vector" data interface: * - the first argument of the registration method is a pointer to the * handle that should describe the data * - the second argument is the memory node where the data (ie. "vector") * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as * opposed to an address on a GPU for instance. * - the third argument is the address of the vector in RAM * - the fourth argument is the number of elements in the vector * - the fifth argument is the size of each element. */ starpu_data_handle_t vector_handle; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); float factor = 3.14; ret = starpu_task_insert(&cl, /* an argument is passed to the codelet, beware that this is a * READ-ONLY buffer and that the codelet may be given a pointer to a * COPY of the argument */ STARPU_VALUE, &factor, sizeof(factor), /* the codelet manipulates one buffer in RW mode */ STARPU_RW, vector_handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Wait for tasks completion */ starpu_task_wait_for_all(); /* StarPU does not need to manipulate the array anymore so we can stop * monitoring it */ starpu_data_unregister(vector_handle); #ifdef STARPU_USE_OPENCL starpu_opencl_unload_opencl(&programs); #endif /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); fprintf(stderr, "AFTER First element is %f\n", vector[0]); return 0; } starpu-1.4.9+dfsg/doc/tutorial/vector_scal_cpu.c000066400000000000000000000036121507764646700217410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include /* This kernel takes a buffer and scales it by a constant factor */ void vector_scal_cpu(void *buffers[], void *cl_arg) { unsigned i; float factor; /* * The "buffers" array matches the task->handles array: for instance * task->handles[0] is a handle that corresponds to a data with * vector "interface", so that the first entry of the array in the * codelet is a pointer to a structure describing such a vector (ie. * struct starpu_vector_interface *). Here, we therefore manipulate * the buffers[0] element as a vector: nx gives the number of elements * in the array, ptr gives the location of the array (that was possibly * migrated/replicated), and elemsize gives the size of each elements. */ struct starpu_vector_interface *vector = buffers[0]; /* length of the vector */ unsigned n = STARPU_VECTOR_GET_NX(vector); /* get a pointer to the local copy of the vector : note that we have to * cast it in (float *) since a vector could contain any type of * elements so that the .ptr field is actually a uintptr_t */ float *val = (float *)STARPU_VECTOR_GET_PTR(vector); /* scale the vector */ starpu_codelet_unpack_args(cl_arg, &factor); for (i = 0; i < n; i++) val[i] *= factor; } starpu-1.4.9+dfsg/doc/tutorial/vector_scal_cuda.cu000066400000000000000000000031701507764646700222520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include static __global__ void vector_mult_cuda(float *val, unsigned int n, float factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) val[i] *= factor; } extern "C" void vector_scal_cuda(void *buffers[], void *cl_arg) { float factor; starpu_codelet_unpack_args(cl_arg, &factor); /* length of the vector */ unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; vector_mult_cuda<<>>(val, n, factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/doc/tutorial/vector_scal_opencl.c000066400000000000000000000042531507764646700224340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include extern struct starpu_opencl_program programs; void vector_scal_opencl(void *buffers[], void *cl_arg) { float factor; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; starpu_codelet_unpack_args(cl_arg, &factor); /* length of the vector */ unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); /* OpenCL copy of the vector pointer */ cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); id = starpu_worker_get_id(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &programs, "vector_mult_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(n), &n); err |= clSetKernelArg(kernel, 1, sizeof(val), &val); err |= clSetKernelArg(kernel, 2, sizeof(factor), &factor); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/doc/tutorial/vector_scal_opencl_kernel.cl000066400000000000000000000015631507764646700241510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor) { const int i = get_global_id(0); if (i < nx) { val[i] *= factor; } } starpu-1.4.9+dfsg/eclipse-plugin/000077500000000000000000000000001507764646700167305ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/.classpath000066400000000000000000000007621507764646700207200ustar00rootroot00000000000000 starpu-1.4.9+dfsg/eclipse-plugin/.project000066400000000000000000000011771507764646700204050ustar00rootroot00000000000000 plugin org.eclipse.jdt.core.javabuilder org.eclipse.pde.ManifestBuilder org.eclipse.pde.SchemaBuilder org.eclipse.pde.PluginNature org.eclipse.jdt.core.javanature starpu-1.4.9+dfsg/eclipse-plugin/.settings/000077500000000000000000000000001507764646700206465ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/.settings/org.eclipse.jdt.core.prefs000066400000000000000000000007311507764646700256310ustar00rootroot00000000000000eclipse.preferences.version=1 org.eclipse.jdt.core.compiler.codegen.targetPlatform=11 org.eclipse.jdt.core.compiler.compliance=11 org.eclipse.jdt.core.compiler.problem.assertIdentifier=error org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled org.eclipse.jdt.core.compiler.problem.enumIdentifier=error org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=warning org.eclipse.jdt.core.compiler.release=enabled org.eclipse.jdt.core.compiler.source=11 starpu-1.4.9+dfsg/eclipse-plugin/META-INF/000077500000000000000000000000001507764646700200705ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/META-INF/MANIFEST.MF000066400000000000000000000005701507764646700215240ustar00rootroot00000000000000Manifest-Version: 1.0 Bundle-ManifestVersion: 2 Bundle-Name: StarPU Bundle-SymbolicName: StarPU;singleton:=true Bundle-Version: 1.0.0.qualifier Require-Bundle: org.eclipse.ui, org.eclipse.e4.ui.model.workbench, org.eclipse.equinox.registry, org.eclipse.e4.core.di.annotations Automatic-Module-Name: StarPU Bundle-RequiredExecutionEnvironment: JavaSE-11 Bundle-ClassPath: . starpu-1.4.9+dfsg/eclipse-plugin/Makefile.am000066400000000000000000000020161507764646700207630ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk EXTRA_DIST = \ tools/cproject.sh \ tools/install_workspace.sh \ build.properties \ build.xml \ .classpath \ plugin.xml \ .project \ META-INF/MANIFEST.MF \ icons/fxt.png \ icons/svg.png \ icons/taskGraph.png \ icons/vite.png \ .settings/org.eclipse.jdt.core.prefs SUBDIRS = src SUBDIRS += examples starpu-1.4.9+dfsg/eclipse-plugin/Makefile.in000066400000000000000000000743251507764646700210100ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = eclipse-plugin ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # EXTRA_DIST = \ tools/cproject.sh \ tools/install_workspace.sh \ build.properties \ build.xml \ .classpath \ plugin.xml \ .project \ META-INF/MANIFEST.MF \ icons/fxt.png \ icons/svg.png \ icons/taskGraph.png \ icons/vite.png \ .settings/org.eclipse.jdt.core.prefs SUBDIRS = src examples all: all-recursive .SUFFIXES: .SUFFIXES: .cu .cubin .hip .o $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign eclipse-plugin/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign eclipse-plugin/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am tags tags-am uninstall uninstall-am .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/eclipse-plugin/build.properties000066400000000000000000000002761507764646700221520ustar00rootroot00000000000000source.. = src/ output.. = build/bin/ bin.includes = plugin.xml,\ META-INF/,\ .,\ icons/,\ .classpath,\ src.includes = .classpath starpu-1.4.9+dfsg/eclipse-plugin/build.xml000066400000000000000000000463031507764646700205570ustar00rootroot00000000000000 starpu-1.4.9+dfsg/eclipse-plugin/examples/000077500000000000000000000000001507764646700205465ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/examples/Makefile.am000066400000000000000000000026071507764646700226070ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk EXTRA_DIST = \ ./hello/hello.c \ ./hello/.settings/language.settings.xml \ ./hello/.project ECLIPSE_DIR = $(shell dirname @ECLIPSE@) ECLIPSE_WORKSPACE = $(abs_top_builddir)/eclipse-plugin/workspace txtdir = $(libdir)/starpu/eclipse-plugin/examples/hello txt_DATA = hello/hello.c \ hello/.cproject \ hello/.project script=$(abs_top_srcdir)/eclipse-plugin/tools/install_workspace.sh install-data-hook: $(INSTALL_DATA) $(abs_top_srcdir)/eclipse-plugin/examples/hello/.settings/language.settings.xml $(txtdir).settings $(ECLIPSE_DIR)/eclipse -noSplash -data $(DESTDIR)$(txtdir)/../../workspace -application org.eclipse.cdt.managedbuilder.core.headlessbuild -import $(txtdir) starpu-1.4.9+dfsg/eclipse-plugin/examples/Makefile.in000066400000000000000000000651521507764646700226240ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = eclipse-plugin/examples ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(txtdir)" DATA = $(txt_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # EXTRA_DIST = \ ./hello/hello.c \ ./hello/.settings/language.settings.xml \ ./hello/.project ECLIPSE_DIR = $(shell dirname @ECLIPSE@) ECLIPSE_WORKSPACE = $(abs_top_builddir)/eclipse-plugin/workspace txtdir = $(libdir)/starpu/eclipse-plugin/examples/hello txt_DATA = hello/hello.c \ hello/.cproject \ hello/.project script = $(abs_top_srcdir)/eclipse-plugin/tools/install_workspace.sh all: all-am .SUFFIXES: .SUFFIXES: .cu .cubin .hip .o $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign eclipse-plugin/examples/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign eclipse-plugin/examples/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-txtDATA: $(txt_DATA) @$(NORMAL_INSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(txtdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(txtdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(txtdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(txtdir)" || exit $$?; \ done uninstall-txtDATA: @$(NORMAL_UNINSTALL) @list='$(txt_DATA)'; test -n "$(txtdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(txtdir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(txtdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-txtDATA @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-data-hook install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-txtDATA .MAKE: install-am install-data-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am \ install-data-hook install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-man install-pdf install-pdf-am \ install-ps install-ps-am install-strip install-txtDATA \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am uninstall-txtDATA .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null install-data-hook: $(INSTALL_DATA) $(abs_top_srcdir)/eclipse-plugin/examples/hello/.settings/language.settings.xml $(txtdir).settings $(ECLIPSE_DIR)/eclipse -noSplash -data $(DESTDIR)$(txtdir)/../../workspace -application org.eclipse.cdt.managedbuilder.core.headlessbuild -import $(txtdir) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/eclipse-plugin/examples/hello/000077500000000000000000000000001507764646700216515ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/examples/hello/.cproject.in000066400000000000000000000354331507764646700241000ustar00rootroot00000000000000 starpu-1.4.9+dfsg/eclipse-plugin/examples/hello/.project000066400000000000000000000013651507764646700233250ustar00rootroot00000000000000 hello org.eclipse.cdt.managedbuilder.core.genmakebuilder clean,full,incremental, org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder full,incremental, org.eclipse.cdt.core.cnature org.eclipse.cdt.managedbuilder.core.managedBuildNature org.eclipse.cdt.managedbuilder.core.ScannerConfigNature starpu-1.4.9+dfsg/eclipse-plugin/examples/hello/.settings/000077500000000000000000000000001507764646700235675ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/examples/hello/.settings/language.settings.xml000066400000000000000000000050471507764646700277410ustar00rootroot00000000000000 starpu-1.4.9+dfsg/eclipse-plugin/examples/hello/hello.c000066400000000000000000000063021507764646700231210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include void display_cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; int nx, i; struct starpu_vector_interface *vector; int *val; vector = (struct starpu_vector_interface *) buffers[0]; nx = STARPU_VECTOR_GET_NX(vector); val = (int *)STARPU_VECTOR_GET_PTR(vector); for (i = 0; i < nx; i++) fprintf(stdout, "V[%d] = %d\n", i, val[i]); } void scal_cpu_func(void *buffers[], void *cl_arg) { int factor, nx, i; struct starpu_vector_interface *vector; int *val; vector = (struct starpu_vector_interface *) buffers[0]; nx = STARPU_VECTOR_GET_NX(vector); val = (int *)STARPU_VECTOR_GET_PTR(vector); starpu_codelet_unpack_args(cl_arg, &factor); for (i = 0; i < nx; i++) val[i] *= factor; } void hello_cpu_func(void *buffers[], void *cl_arg) { (void)buffers; int answer; starpu_codelet_unpack_args(cl_arg, &answer); fprintf(stdout, "Hello world, the answer is %d\n", answer); } struct starpu_codelet hello_codelet = { .cpu_funcs = {hello_cpu_func}, .cpu_funcs_name = {"hello_cpu_func"}, .nbuffers = 0, .name = "hello" }; struct starpu_codelet scal_codelet = { .cpu_funcs = {scal_cpu_func}, .cpu_funcs_name = {"scal_cpu_func"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "scal" }; struct starpu_codelet display_codelet = { .cpu_funcs = {display_cpu_func}, .cpu_funcs_name = {"display_cpu_func"}, .nbuffers = 1, .modes = {STARPU_R}, .name = "display" }; #define NX 5 int main(void) { int answer = 42; int ret; int vector[NX]; unsigned i; starpu_data_handle_t vector_handle; setenv("STARPU_FXT_TRACE", "1", 1); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); for (i = 0; i < NX; i++) vector[i] = i+1; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); ret = starpu_task_insert(&hello_codelet, STARPU_VALUE, &answer, sizeof(answer), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_insert(&scal_codelet, STARPU_RW, vector_handle, STARPU_VALUE, &answer, sizeof(answer), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_insert(&display_codelet, STARPU_R, vector_handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_data_unregister(vector_handle); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/eclipse-plugin/icons/000077500000000000000000000000001507764646700200435ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/icons/fxt.png000066400000000000000000000025241507764646700213550ustar00rootroot00000000000000PNG  IHDR#GgAMA a cHRMz&u0`:pQ<PLTEީΕ̎ꌌЭȵȵȵQMJ]bf~|qrt\WSRVZuss Xj{ݕȽK;,(5E񿵬#atohclt|~QA2-;K$8CNejqx!ptwQ@2-;K#RbqқE6)&3AQ@2-;K  `rƾwtp% zO>0+8I940T]eporvi\;AING@>DLȵȴ֠ǎ䐐ɱZJbKGDHtIME q_IDAT(c` 021c,pl\7/D 6 *&.! R"%-r@RAS*^5j@BC-m bfV6`5vp5N.n ?аȨظĤdt5 )iY9yE%ej+*kj[Z۠nFV?aS:m:Ìfϙ;o>ÂcYt+V^v 7m޲CͶ;vͰg:|HjNΧN3g9wH\OɥWkq}?y Qo޾c 6Y&NNE%tEXtdate:create2021-02-25T11:17:16-05:00S%tEXtdate:modify2021-02-25T11:17:16-05:00+otEXtSoftwaregnome-screenshot>IENDB`starpu-1.4.9+dfsg/eclipse-plugin/icons/svg.png000066400000000000000000000033271507764646700213550ustar00rootroot00000000000000PNG  IHDR,J3gAMA a cHRMz&u0`:pQ<bKGDtIME4UIDATX՗{T?A'wg߻츙($W0qvWG:%^;&{:÷5^?Wヌl/aR6N8TlrD>vP\T. eU|%4]GUf3U 2/8O?JG{u!_~h^ЅהAl5>"Š67J!=dAro2ONHKk#d8? GSIENDB`starpu-1.4.9+dfsg/eclipse-plugin/icons/taskGraph.png000066400000000000000000000026511507764646700225010ustar00rootroot00000000000000PNG  IHDR#pdD7gAMA a cHRMz&u0`:pQ<bKGDtIME #H=:IDATH͗kleovݲ˶K7Z.dKK#(h(?@!!5!`ॄCSHH*-i**J)Pӽ wLiÛL&͙7RJχ!YR!111a2nMZv;$ )%8 p8C/R666ʝ;w+W@8D.RdZWWG~~>.k UUGuP @q!2EQHNNk`A)2ds1x%5 b盤aZOSuUUǟMɜb˫* S #::2{a׏GM&vr 5+h<~ۘm2)p{~gy/z k475aDRsɚ-M\2a"s%J5hjj ^¼G磪*&EMmQ saהDr3ʓy#{(.[x~>\Wg_FFtWs>`mF "@gYfBOuA Ĥ& K k^[Cb>i _W Ռ0Bw\ W3VTr!v,g cq}\qfq2ӄ(rp|' HOwsr?NaXmf̘ j~.emLwgfrBl+L"7e& mlߺD>aW Uz(@ov D& !м؉F,}(N_BT\\\: < ~Y⣭tttX/noLHIgjLOGsrHKM%=8q wsRv~/W8'<5YXM6`;vHMFmFHE/.Y IO岺ZJ)}~-%!KsCBCwfЎT)%[n`0Ubfr#4Mae(--l6vPVVFZZ1r ooo>BEב0 {_ϝl6,үɾcXJ%tEXtdate:create2021-02-25T11:18:27-05:005 %tEXtdate:modify2021-02-25T11:18:27-05:00D|tEXtSoftwaregnome-screenshot>IENDB`starpu-1.4.9+dfsg/eclipse-plugin/icons/vite.png000066400000000000000000000026301507764646700215210ustar00rootroot00000000000000PNG  IHDR#gAMA a cHRMz&u0`:pQ<PLTEȥѹ㍍쨨uuv타י|  ΣȜ砛ƿ񱡐 ǺLT[`^\͐s'ރsc4>I͚Ľ1*(;1)~쥛lpsldZ=0%bt hns_[W .$tE7*Whz³}p vNB7P[g{pȣ258TI@OXa⽻Զټι̷彻𬬬ɧꍍی餤bKGDHtIME 3=pIDAT(c` 021bl%\<|؀X8n;$$,> E%|jU:>5@BK[]=}C#cS3s j,ml@.@͝? 0(8$4,<"2*:&6.>!1)9Դt̬ܼ¢Ҳ*45 5u M-m ]=}j'Ld4y ig̜5{\y,\I͒˖Xjoظi۶عk7]{G;~gΞ;%WU q |jnŧ}!%?QO=6w!j?| VP?HuŒuһ%tEXtdate:create2021-02-25T11:17:51-05:00%tEXtdate:modify2021-02-25T11:17:51-05:00tEXtSoftwaregnome-screenshot>IENDB`starpu-1.4.9+dfsg/eclipse-plugin/plugin.xml000066400000000000000000000126531507764646700207570ustar00rootroot00000000000000 starpu-1.4.9+dfsg/eclipse-plugin/src/000077500000000000000000000000001507764646700175175ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/src/Makefile.am000066400000000000000000000045151507764646700215600ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk EXTRA_DIST = \ deploy/build.xml \ deploy/javaCompilerArgs \ starpu/handlers/SvgHandler.java \ starpu/handlers/TraceVizHandler.java \ starpu/handlers/TaskGraphHandler.java \ starpu/handlers/TraceGenHandler.java \ starpu/handlers/TraceUtils.java ECLIPSE_DIR = $(shell dirname @ECLIPSE@) ECLIPSE_WORKSPACE = $(abs_top_builddir)/eclipse-plugin/workspace all: package cp $(abs_top_srcdir)/eclipse-plugin/build.xml $(abs_top_srcdir)/eclipse-plugin/plugin_build.xml rm -f build/plugin/plugins/StarPU_*jar $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.ant.core.antRunner -buildfile $(abs_top_srcdir)/eclipse-plugin/src/deploy/build.xml rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar cp build/plugin/plugins/*.jar $(ECLIPSE_DIR)/dropins/ mv $(abs_top_srcdir)/eclipse-plugin/plugin_build.xml $(abs_top_srcdir)/eclipse-plugin/build.xml package: $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.cdt.managedbuilder.core.headlessbuild -import $(abs_top_srcdir)/eclipse-plugin rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.ant.core.antRunner -buildfile $(abs_top_srcdir)/eclipse-plugin/build.xml txtdir = $(libdir)/starpu/eclipse-plugin/workspace script = $(abs_top_srcdir)/eclipse-plugin/tools/install_workspace.sh install-data-hook: (cd $(abs_top_builddir)/eclipse-plugin/workspace && $(PROG_FIND) . -type f -exec $(script) {} $(DESTDIR)$(txtdir) $(INSTALL_DATA) \;) clean-local: rm -rf build distclean-local: clean-local rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar rm -rf $(ECLIPSE_WORKSPACE) starpu-1.4.9+dfsg/eclipse-plugin/src/Makefile.in000066400000000000000000000627471507764646700216040ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = eclipse-plugin/src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # EXTRA_DIST = \ deploy/build.xml \ deploy/javaCompilerArgs \ starpu/handlers/SvgHandler.java \ starpu/handlers/TraceVizHandler.java \ starpu/handlers/TaskGraphHandler.java \ starpu/handlers/TraceGenHandler.java \ starpu/handlers/TraceUtils.java ECLIPSE_DIR = $(shell dirname @ECLIPSE@) ECLIPSE_WORKSPACE = $(abs_top_builddir)/eclipse-plugin/workspace txtdir = $(libdir)/starpu/eclipse-plugin/workspace script = $(abs_top_srcdir)/eclipse-plugin/tools/install_workspace.sh all: all-am .SUFFIXES: .SUFFIXES: .cu .cubin .hip .o $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign eclipse-plugin/src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign eclipse-plugin/src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool clean-local mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic distclean-local dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-data-hook install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-data-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ clean-local cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distclean-local distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-data-hook install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \ uninstall-am .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null all: package cp $(abs_top_srcdir)/eclipse-plugin/build.xml $(abs_top_srcdir)/eclipse-plugin/plugin_build.xml rm -f build/plugin/plugins/StarPU_*jar $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.ant.core.antRunner -buildfile $(abs_top_srcdir)/eclipse-plugin/src/deploy/build.xml rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar cp build/plugin/plugins/*.jar $(ECLIPSE_DIR)/dropins/ mv $(abs_top_srcdir)/eclipse-plugin/plugin_build.xml $(abs_top_srcdir)/eclipse-plugin/build.xml package: $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.cdt.managedbuilder.core.headlessbuild -import $(abs_top_srcdir)/eclipse-plugin rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar $(ECLIPSE_DIR)/eclipse -noSplash -data $(ECLIPSE_WORKSPACE) -application org.eclipse.ant.core.antRunner -buildfile $(abs_top_srcdir)/eclipse-plugin/build.xml install-data-hook: (cd $(abs_top_builddir)/eclipse-plugin/workspace && $(PROG_FIND) . -type f -exec $(script) {} $(DESTDIR)$(txtdir) $(INSTALL_DATA) \;) clean-local: rm -rf build distclean-local: clean-local rm -f $(ECLIPSE_DIR)/dropins/StarPU_*jar rm -rf $(ECLIPSE_WORKSPACE) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/eclipse-plugin/src/deploy/000077500000000000000000000000001507764646700210135ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/src/deploy/build.xml000066400000000000000000000004711507764646700226360ustar00rootroot00000000000000 starpu-1.4.9+dfsg/eclipse-plugin/src/deploy/javaCompilerArgs000066400000000000000000000337501507764646700241770ustar00rootroot00000000000000#ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.ui_3.119.0.v20210111-1350.jar[~org/eclipse/ui/internal/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.runtime_3.20.100.v20210111-0815.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.osgi_3.16.200.v20210226-1447.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.osgi.compatibility.state_1.2.300.v20210212-1137.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.common_3.14.100.v20210212-1143.jar[+org/eclipse/core/runtime/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.jobs_3.10.1100.v20210111-0815.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.registry_3.10.100.v20210212-1143.jar[~org/eclipse/core/internal/adapter/*:~org/eclipse/core/internal/registry/*:~org/eclipse/core/internal/registry/osgi/*:~org/eclipse/core/internal/registry/spi/*:+org/eclipse/core/runtime/*:+org/eclipse/core/runtime/dynamichelpers/*:+org/eclipse/core/runtime/spi/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.preferences_3.8.200.v20210212-1143.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.contenttype_3.7.900.v20210111-0918.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.app_1.5.100.v20210212-1143.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.osgi.services_3.10.0.v20210212-1137.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.osgi.util_3.6.0.v20210212-1137.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/javax.servlet_3.1.0.v201410161800.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.swt_3.116.0.v20210302-1107.jar[+org/eclipse/swt/*:+org/eclipse/swt/accessibility/*:+org/eclipse/swt/awt/*:+org/eclipse/swt/browser/*:+org/eclipse/swt/custom/*:+org/eclipse/swt/dnd/*:+org/eclipse/swt/events/*:+org/eclipse/swt/graphics/*:+org/eclipse/swt/layout/*:+org/eclipse/swt/opengl/*:+org/eclipse/swt/printing/*:+org/eclipse/swt/program/*:+org/eclipse/swt/widgets/*:~org/eclipse/swt/internal/*:~org/eclipse/swt/internal/image/*:~org/eclipse/swt/internal/accessibility/gtk/*:~org/eclipse/swt/internal/cairo/*:~org/eclipse/swt/internal/gtk/*:~org/eclipse/swt/internal/opengl/glx/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.swt.browser.chromium.gtk.linux.x86_64_3.116.0.v20210302-1107.jar[+org/eclipse/swt/*:+org/eclipse/swt/accessibility/*:+org/eclipse/swt/awt/*:+org/eclipse/swt/browser/*:+org/eclipse/swt/custom/*:+org/eclipse/swt/dnd/*:+org/eclipse/swt/events/*:+org/eclipse/swt/graphics/*:+org/eclipse/swt/layout/*:+org/eclipse/swt/opengl/*:+org/eclipse/swt/printing/*:+org/eclipse/swt/program/*:+org/eclipse/swt/widgets/*:~org/eclipse/swt/internal/*:~org/eclipse/swt/internal/image/*:~org/eclipse/swt/internal/accessibility/gtk/*:~org/eclipse/swt/internal/cairo/*:~org/eclipse/swt/internal/gtk/*:~org/eclipse/swt/internal/opengl/glx/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.swt.gtk.linux.x86_64_3.116.0.v20210302-1107.jar[+org/eclipse/swt/*:+org/eclipse/swt/accessibility/*:+org/eclipse/swt/awt/*:+org/eclipse/swt/browser/*:+org/eclipse/swt/custom/*:+org/eclipse/swt/dnd/*:+org/eclipse/swt/events/*:+org/eclipse/swt/graphics/*:+org/eclipse/swt/layout/*:+org/eclipse/swt/opengl/*:+org/eclipse/swt/printing/*:+org/eclipse/swt/program/*:+org/eclipse/swt/widgets/*:~org/eclipse/swt/internal/*:~org/eclipse/swt/internal/image/*:~org/eclipse/swt/internal/accessibility/gtk/*:~org/eclipse/swt/internal/cairo/*:~org/eclipse/swt/internal/gtk/*:~org/eclipse/swt/internal/opengl/glx/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.jface_3.22.100.v20210126-0831.jar[+org/eclipse/jface/*:+org/eclipse/jface/action/*:+org/eclipse/jface/action/images/*:+org/eclipse/jface/bindings/*:+org/eclipse/jface/bindings/keys/*:+org/eclipse/jface/bindings/keys/formatting/*:+org/eclipse/jface/commands/*:+org/eclipse/jface/contexts/*:+org/eclipse/jface/dialogs/*:+org/eclipse/jface/dialogs/images/*:+org/eclipse/jface/fieldassist/*:+org/eclipse/jface/fieldassist/images/*:+org/eclipse/jface/images/*:~org/eclipse/jface/internal/*:~org/eclipse/jface/internal/provisional/action/*:+org/eclipse/jface/layout/*:+org/eclipse/jface/menus/*:+org/eclipse/jface/operation/*:+org/eclipse/jface/preference/*:+org/eclipse/jface/preference/images/*:+org/eclipse/jface/resource/*:+org/eclipse/jface/util/*:+org/eclipse/jface/viewers/*:+org/eclipse/jface/viewers/deferred/*:+org/eclipse/jface/widgets/*:+org/eclipse/jface/window/*:+org/eclipse/jface/wizard/*:+org/eclipse/jface/wizard/images/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.commands_3.9.800.v20201021-1339.jar[+org/eclipse/core/commands/*:+org/eclipse/core/commands/common/*:+org/eclipse/core/commands/contexts/*:~org/eclipse/core/commands/internal/util/*:+org/eclipse/core/commands/operations/*:+org/eclipse/core/commands/util/*:~org/eclipse/core/internal/commands/operations/*:~org/eclipse/core/internal/commands/util/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.equinox.bidi_1.3.100.v20210212-1143.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.ui.workbench_3.122.100.v20210215-1525.jar[~org/eclipse/e4/ui/workbench/addons/perspectiveswitcher/*:+org/eclipse/ui/*:+org/eclipse/ui/about/*:+org/eclipse/ui/actions/*:+org/eclipse/ui/activities/*:+org/eclipse/ui/application/*:+org/eclipse/ui/branding/*:+org/eclipse/ui/browser/*:+org/eclipse/ui/commands/*:+org/eclipse/ui/contexts/*:+org/eclipse/ui/databinding/*:+org/eclipse/ui/databinding/typed/*:+org/eclipse/ui/dialogs/*:+org/eclipse/ui/dnd/*:+org/eclipse/ui/fieldassist/*:+org/eclipse/ui/handlers/*:+org/eclipse/ui/help/*:~org/eclipse/ui/internal/*:~org/eclipse/ui/internal/about/*:~org/eclipse/ui/internal/actions/*:~org/eclipse/ui/internal/activities/*:~org/eclipse/ui/internal/activities/ws/*:~org/eclipse/ui/internal/application/*:~org/eclipse/ui/internal/browser/*:~org/eclipse/ui/internal/commands/*:~org/eclipse/ui/internal/contexts/*:~org/eclipse/ui/internal/decorators/*:~org/eclipse/ui/internal/dialogs/*:~org/eclipse/ui/internal/dialogs/cpd/*:~org/eclipse/ui/internal/e4/compatibility/*:~org/eclipse/ui/internal/e4/migration/*:~org/eclipse/ui/internal/editorsupport/*:~org/eclipse/ui/internal/expressions/*:~org/eclipse/ui/internal/handlers/*:~org/eclipse/ui/internal/help/*:~org/eclipse/ui/internal/intro/*:~org/eclipse/ui/internal/keys/*:~org/eclipse/ui/internal/keys/model/*:~org/eclipse/ui/internal/layout/*:~org/eclipse/ui/internal/menus/*:~org/eclipse/ui/internal/misc/*:~org/eclipse/ui/internal/model/*:~org/eclipse/ui/internal/operations/*:~org/eclipse/ui/internal/part/*:~org/eclipse/ui/internal/preferences/*:~org/eclipse/ui/internal/progress/*:~org/eclipse/ui/internal/provisional/application/*:~org/eclipse/ui/internal/quickaccess/*:~org/eclipse/ui/internal/quickaccess/providers/*:~org/eclipse/ui/internal/registry/*:~org/eclipse/ui/internal/services/*:~org/eclipse/ui/internal/splash/*:~org/eclipse/ui/internal/statushandlers/*:~org/eclipse/ui/internal/testing/*:~org/eclipse/ui/internal/themes/*:~org/eclipse/ui/internal/tweaklets/*:~org/eclipse/ui/internal/util/*:~org/eclipse/ui/internal/wizards/*:~org/eclipse/ui/internal/wizards/preferences/*:+org/eclipse/ui/intro/*:+org/eclipse/ui/keys/*:+org/eclipse/ui/menus/*:+org/eclipse/ui/model/*:+org/eclipse/ui/operations/*:+org/eclipse/ui/part/*:+org/eclipse/ui/plugin/*:+org/eclipse/ui/preferences/*:+org/eclipse/ui/progress/*:+org/eclipse/ui/quickaccess/*:+org/eclipse/ui/services/*:+org/eclipse/ui/splash/*:+org/eclipse/ui/statushandlers/*:+org/eclipse/ui/swt/*:+org/eclipse/ui/themes/*:+org/eclipse/ui/views/*:+org/eclipse/ui/wizards/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/com.ibm.icu_67.1.0.v20200706-1749.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/javax.annotation_1.3.5.v20200909-1856.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/javax.inject_1.0.0.v20091030.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.commands_0.13.0.v20201119-1132.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.expressions_3.7.100.v20210203-1000.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.contexts_1.8.400.v20191217-1710.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.di_1.7.700.v20210128-2123.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.di.annotations_1.6.600.v20191216-2352.jar[+org/eclipse/e4/core/di/annotations/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.services_2.2.600.v20210110-1654.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench_1.12.100.v20210122-1731.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.apache.commons.jxpath_1.3.0.v200911051830.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.model.workbench_2.1.1000.v20210111-0958.jar[~org/eclipse/e4/ui/model/*:+org/eclipse/e4/ui/model/application/*:+org/eclipse/e4/ui/model/application/commands/*:~org/eclipse/e4/ui/model/application/commands/impl/*:~org/eclipse/e4/ui/model/application/commands/util/*:+org/eclipse/e4/ui/model/application/descriptor/basic/*:~org/eclipse/e4/ui/model/application/descriptor/basic/impl/*:~org/eclipse/e4/ui/model/application/descriptor/basic/util/*:~org/eclipse/e4/ui/model/application/impl/*:+org/eclipse/e4/ui/model/application/ui/*:+org/eclipse/e4/ui/model/application/ui/advanced/*:~org/eclipse/e4/ui/model/application/ui/advanced/impl/*:~org/eclipse/e4/ui/model/application/ui/advanced/util/*:+org/eclipse/e4/ui/model/application/ui/basic/*:~org/eclipse/e4/ui/model/application/ui/basic/impl/*:~org/eclipse/e4/ui/model/application/ui/basic/util/*:~org/eclipse/e4/ui/model/application/ui/impl/*:+org/eclipse/e4/ui/model/application/ui/menu/*:~org/eclipse/e4/ui/model/application/ui/menu/impl/*:~org/eclipse/e4/ui/model/application/ui/menu/util/*:~org/eclipse/e4/ui/model/application/ui/util/*:~org/eclipse/e4/ui/model/application/util/*:+org/eclipse/e4/ui/model/fragment/*:~org/eclipse/e4/ui/model/fragment/impl/*:~org/eclipse/e4/ui/model/fragment/util/*:~org/eclipse/e4/ui/model/internal/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.emf.ecore_2.23.0.v20200630-0516.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.emf.common_2.22.0.v20210114-1734.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.resources_3.14.0.v20210215-0934.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.ant.core_3.5.800.v20200608-1251.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.variables_3.4.800.v20200120-1101.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.filesystem_1.7.700.v20200110-1734.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.filesystem.linux.x86_64_1.2.300.v20180828-0158.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.emf.xpath_0.2.800.v20200609-0849.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.services_1.5.0.v20210115-1333.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.di_1.3.0.v20210222-1018.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.di.extensions.supplier_0.15.800.v20210110-1654.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.core.di.extensions_0.16.0.v20200507-0938.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.emf.ecore.change_2.14.0.v20190528-0725.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.emf.ecore.xmi_2.16.0.v20190528-0725.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.jdt.annotation_2.2.600.v20200408-1511.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench.renderers.swt_0.15.0.v20201125-0918.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench.swt_0.16.0.v20201230-1610.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.dialogs_1.2.100.v20201109-2317.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.databinding_1.10.100.v20200926-1123.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.databinding.observable_1.10.0.v20200730-0848.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.core.databinding.property_1.8.100.v20200619-0651.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.jface.databinding_1.12.200.v20210111-0911.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.css.core_0.13.0.v20201015-0653.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.w3c.css.sac_1.3.1.v200903091627.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.apache.batik.css_1.13.0.v20200622-2037.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.apache.batik.i18n_1.13.0.v20200622-2037.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.apache.xmlgraphics_2.4.0.v20200622-2037.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.apache.commons.io_2.6.0.v20190123-2029.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.apache.commons.logging_1.2.0.v20180409-1502.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/javax.xml_1.3.4.v201005080400.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.w3c.dom.events_3.0.0.draft20060413_v201105210656.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.w3c.dom.svg_1.1.0.v201011041433.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.w3c.dom.smil_1.0.1.v200903091627.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.apache.batik.util_1.13.0.v20200622-2037.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.apache.batik.constants_1.13.0.v20200622-2037.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.css.swt_0.14.100.v20201217-1340.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.bindings_0.13.0.v20201119-1132.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench3_0.15.500.v20201021-1339.jar[+org/eclipse/ui/testing/*:+org/eclipse/ui/testing/dumps/*:?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.css.swt.theme_0.13.0.v20201026-1147.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.swt.gtk_1.1.100.v20210108-1832.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.widgets_1.2.800.v20201021-1339.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.urischeme_1.1.300.v20210113-1544.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/com.sun.jna_4.5.1.v20190425-1842.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/com.sun.jna.platform_4.5.1.v20190425-1842.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.jface.notifications_0.3.0.v20210218-1820.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.help_3.8.800.v20200525-0755.jar[?**/*] #ADAPTER#ACCESS#.p2/pool/plugins/org.eclipse.e4.ui.workbench.addons.swt_1.4.100.v20201221-2332.jar[?**/*] starpu-1.4.9+dfsg/eclipse-plugin/src/starpu/000077500000000000000000000000001507764646700210355ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/src/starpu/handlers/000077500000000000000000000000001507764646700226355ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/src/starpu/handlers/SvgHandler.java000066400000000000000000000055101507764646700255360ustar00rootroot00000000000000// StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // package starpu.handlers; import java.awt.EventQueue; import java.io.File; import java.io.PrintWriter; import java.util.regex.Pattern; import org.eclipse.core.commands.AbstractHandler; import org.eclipse.core.commands.ExecutionEvent; import org.eclipse.core.commands.ExecutionException; import org.eclipse.core.runtime.IPath; import org.eclipse.ui.IEditorInput; import org.eclipse.ui.IPathEditorInput; import org.eclipse.ui.handlers.HandlerUtil; public class SvgHandler extends AbstractHandler { @Override public Object execute(ExecutionEvent event) throws ExecutionException { EventQueue.invokeLater(() -> { try { String workDir = System.getProperty("user.dir") + "/" + TraceUtils.getRandomDirectoryName(); String inputfilename = workDir + "/dag.dot"; File f = new File(inputfilename); if (!f.isFile()) throw new Exception("File <" + inputfilename + "> does not exist. Have you run StarPU FxT tool?"); String[] cmd1 = { "dot", "-Tcmapx", inputfilename, "-o", workDir + "/output.map"}; TraceUtils.runCommand(cmd1); String[] cmd2 = { "dot", "-Tsvg", inputfilename, "-o", workDir + "/output.svg" }; TraceUtils.runCommand(cmd2); IEditorInput input = HandlerUtil.getActiveEditor(event).getEditorInput(); if (!(input instanceof IPathEditorInput)) { System.out.println("There is no path"); } else { String map = TraceUtils.readFileToString(workDir + "/output.map"); Pattern p = Pattern.compile("href=\"([^#\"/]+/)*"); IPath ipath = ((IPathEditorInput) input).getPath().makeAbsolute().removeLastSegments(1); String path = ipath.toString(); String replaceBy = "href=\"" + path + "/"; map = p.matcher(map).replaceAll(replaceBy); PrintWriter pw = new PrintWriter(workDir + "/output.html"); pw.println(new String("\n" + "\n")); pw.println(map); pw.println(new String("")); pw.close(); } String[] cmd8 = { "firefox", workDir + "/output.html" }; TraceUtils.runCommand(cmd8); } catch (Exception e) { TraceUtils.displayMessage("Error: " + e.toString()); e.printStackTrace(); } }); return null; } } starpu-1.4.9+dfsg/eclipse-plugin/src/starpu/handlers/TaskGraphHandler.java000066400000000000000000000044441507764646700266700ustar00rootroot00000000000000// StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // package starpu.handlers; import java.awt.EventQueue; import java.awt.Image; import java.io.File; import javax.imageio.ImageIO; import javax.swing.ImageIcon; import javax.swing.JFrame; import javax.swing.JLabel; import org.eclipse.core.commands.AbstractHandler; import org.eclipse.core.commands.ExecutionEvent; import org.eclipse.core.commands.ExecutionException; public class TaskGraphHandler extends AbstractHandler { @Override public Object execute(ExecutionEvent event) throws ExecutionException { EventQueue.invokeLater(() -> { try { String workDir = System.getProperty("user.dir") + "/" + TraceUtils.getRandomDirectoryName(); String inputfilename = workDir + "/dag.dot"; File f = new File(inputfilename); if (!f.isFile()) throw new Exception("File <" + inputfilename + "> does not exist. Have you run StarPU FxT tool?"); String[] cmd2 = { "dot", "-Tpng", inputfilename, "-o", workDir + "/" + "output.png" }; starpu.handlers.TraceUtils.runCommand(cmd2); String[] cmd3 = { "starpu_tasks_rec_complete", workDir + "/" + "tasks.rec" }; starpu.handlers.TraceUtils.runCommand(cmd3); JFrame frame = new JFrame(); File imageFile = new File(workDir + "/" + "output.png"); Image i = ImageIO.read(imageFile); ImageIcon image = new ImageIcon(i); JLabel imageLabel = new JLabel(image); frame.add(imageLabel); frame.pack(); imageLabel.setVisible(true); frame.setVisible(true); frame.setTitle("StarPU application: Task Graph.png"); } catch (Exception e) { TraceUtils.displayMessage("Error: " + e.toString()); e.printStackTrace(); } }); return null; } } starpu-1.4.9+dfsg/eclipse-plugin/src/starpu/handlers/TraceGenHandler.java000066400000000000000000000047401507764646700264730ustar00rootroot00000000000000// StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // package starpu.handlers; import java.awt.EventQueue; import java.io.File; import org.eclipse.core.commands.AbstractHandler; import org.eclipse.core.commands.ExecutionEvent; import org.eclipse.core.commands.ExecutionException; import org.eclipse.jface.dialogs.MessageDialog; import org.eclipse.ui.IWorkbenchWindow; import org.eclipse.ui.handlers.HandlerUtil; public class TraceGenHandler extends AbstractHandler { @Override public Object execute(ExecutionEvent event) throws ExecutionException { IWorkbenchWindow window = HandlerUtil.getActiveWorkbenchWindowChecked(event); MessageDialog.openInformation(window.getShell(), "StarPU FxT Tool", "Running Starpu FxT Tool: generation of different trace formats"); EventQueue.invokeLater(() -> { try { String value = System.getenv("STARPU_FXT_PREFIX"); if (value != null) { System.out.println("STARPU_FXT_PREFIX=" + value); } else { System.out.println("STARPU_FXT_PREFIX does not have a value"); value = "/tmp"; } String value1 = System.getenv("STARPU_FXT_SUFFIX"); if (value1 != null) { System.out.println("STARPU_FXT_SUFFIX=" + value1); } else { System.out.println("STARPU_FXT_SUFFIX does not have a value"); String value2 = System.getenv("USER"); value1 = "prof_file_" + value2 + "_0"; } String inputfilename = value + "/" + value1; File f = new File(inputfilename); if (!f.isFile()) throw new Exception("File <" + inputfilename + "> does not exist. Have you run your application?"); String[] command = {"starpu_fxt_tool", "-i", inputfilename, "-d", TraceUtils.getRandomDirectoryName(), "-c", "-no-acquire"}; TraceUtils.runCommand(command); } catch (Exception e) { TraceUtils.displayMessage("Error: " + e.toString()); e.printStackTrace(); } }); return null; } } starpu-1.4.9+dfsg/eclipse-plugin/src/starpu/handlers/TraceUtils.java000066400000000000000000000047361507764646700255710ustar00rootroot00000000000000// StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // package starpu.handlers; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Arrays; import java.util.Random; import javax.swing.BoxLayout; import javax.swing.JButton; import javax.swing.JFrame; import javax.swing.JLabel; import javax.swing.JPanel; public class TraceUtils { private static int x = 1000 + new Random().nextInt(9999); public static void runCommand(String[] command) throws Exception { System.out.println("Running command " + Arrays.toString(command)); Process p = Runtime.getRuntime().exec(command); String line; BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((line = in.readLine()) != null) { System.out.println(line); } in.close(); } public static String getRandomDirectoryName() { return "traces_" + x; } public static void displayMessage(String message) { final JFrame f = new JFrame("StarPU Message"); JLabel l = new JLabel(message); JButton b19 = new JButton("OK"); b19.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent evt) { f.setVisible(false); } }); JPanel p = new JPanel(); p.setLayout(new BoxLayout(p, BoxLayout.Y_AXIS)); p.add(l); p.add(b19); f.add(p); f.pack(); f.setVisible(true); } public static String readFileToString(String filename) throws IOException { BufferedReader reader = new BufferedReader(new FileReader(filename)); StringBuilder stringBuilder = new StringBuilder(); char[] buffer = new char[10]; while (reader.read(buffer) != -1) { stringBuilder.append(new String(buffer)); buffer = new char[10]; } reader.close(); return stringBuilder.toString(); } } starpu-1.4.9+dfsg/eclipse-plugin/src/starpu/handlers/TraceVizHandler.java000066400000000000000000000031601507764646700265250ustar00rootroot00000000000000// StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // package starpu.handlers; import java.awt.EventQueue; import java.io.File; import org.eclipse.core.commands.AbstractHandler; import org.eclipse.core.commands.ExecutionEvent; import org.eclipse.core.commands.ExecutionException; public class TraceVizHandler extends AbstractHandler { @Override public Object execute(ExecutionEvent event) throws ExecutionException { EventQueue.invokeLater(() -> { try { String workDir = System.getProperty("user.dir") + "/" + TraceUtils.getRandomDirectoryName(); String inputfilename = workDir + "/paje.trace"; File f = new File(inputfilename); if (!f.isFile()) throw new Exception("File <" + inputfilename + "> does not exist. Have you run StarPU FxT tool?"); String[] cmd1 = { "vite", inputfilename }; starpu.handlers.TraceUtils.runCommand(cmd1); } catch (Exception e) { TraceUtils.displayMessage("Error: " + e.toString()); e.printStackTrace(); } }); return null; } } starpu-1.4.9+dfsg/eclipse-plugin/tools/000077500000000000000000000000001507764646700200705ustar00rootroot00000000000000starpu-1.4.9+dfsg/eclipse-plugin/tools/cproject.sh000077500000000000000000000020151507764646700222360ustar00rootroot00000000000000#!/bin/bash # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # type=$1 shift if test "$type" == "module" then input='@STARPU_LIB@="true"' elif test "$type" == "option" then input='' else echo Unknown type $type exit 1 fi for x in $* do echo $input | sed -e 's/@STARPU_LIB@/'$x'/' done | tr '\012' ' ' starpu-1.4.9+dfsg/eclipse-plugin/tools/install_workspace.sh000077500000000000000000000015731507764646700241610ustar00rootroot00000000000000#!/bin/bash # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # src=$1 dst=$2 shift shift if test ! -d $dst/$(dirname $src) then echo mkdir -p $dst/$(dirname $src) mkdir -p $dst/$(dirname $src) fi echo $* $src $dst/$(dirname $src) $* $src $dst/$(dirname $src) starpu-1.4.9+dfsg/examples/000077500000000000000000000000001507764646700156265ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/Makefile.am000066400000000000000000001133611507764646700176670ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2016-2016 Uppsala University # Copyright (C) 2011-2011 Télécom Sud Paris # Copyright (C) 2017-2017 Erwan Leria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk include $(top_srcdir)/make/starpu-loader.mk SUFFIXES = .hip AM_CFLAGS += $(MAGMA_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS += $(MAGMA_CFLAGS) $(APP_CXXFLAGS) AM_FFLAGS += $(MAGMA_CFLAGS) $(APP_FFLAGS) AM_FCFLAGS += $(MAGMA_CFLAGS) $(APP_FCFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) LIBS += $(MAGMA_LIBS) SUBDIRS = stencil BUILT_SOURCES = if STARPU_USE_OPENCL nobase_STARPU_OPENCL_DATA_DATA = endif EXTRA_DIST = \ README.txt \ axpy/axpy.h \ axpy/axpy_opencl_kernel.cl \ basic_examples/vector_scal_opencl_kernel.cl \ basic_examples/multiformat_types.h \ basic_examples/multiformat_opencl_kernel.cl \ basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \ common/blas_model.c \ spmd/vector_scal_spmd.c \ spmv/spmv_cuda.cu \ spmv/spmv_opencl.cl \ spmv/matrix_market/examples/fidapm05.mtx \ mult/xgemm.c \ mult/xgemm_layout.c \ mult/xgemm.h \ mult/sgemm.sh \ lu/xlu.c \ lu/xlu_pivot.c \ lu/xlu_implicit.c \ lu/xlu_implicit_pivot.c \ lu/xlu_kernels.c \ lu/lu_example.c \ incrementer/incrementer_kernels_opencl_kernel.cl \ basic_examples/variable_kernels_opencl_kernel.cl \ matvecmult/matvecmult_kernel.cl \ basic_examples/block_opencl_kernel.cl \ filters/fblock_opencl_kernel.cl \ filters/custom_mf/conversion_opencl.cl \ filters/custom_mf/custom_opencl.cl \ filters/custom_mf/custom_types.h \ interface/complex_kernels.cl \ interface/complex_dev_handle/complex_dev_handle_kernels.cl \ reductions/dot_product.h \ reductions/dot_product_opencl_kernels.cl \ scheduler/libdummy_sched.sh \ scheduler/schedulers.sh \ scheduler/schedulers_context.sh \ fortran/Makefile \ sched_ctx/axpy_partition_gpu.h \ sched_ctx/axpy_partition_gpu.cu \ heat/heat.sh \ cholesky/libmy_dmda.h \ cholesky/cholesky.sh \ cholesky/cholesky_julia.sh \ cholesky/cholesky_compiled.c \ lu/lu.sh \ subgraphs/main.h \ native_fortran/Makefile_nf_dynbuf.mk \ native_fortran/Makefile_nf_example.mk \ native_fortran/Makefile_nf_matrix.mk \ native_fortran/Makefile_nf_partition.mk \ native_fortran/Makefile_nf_sched_ctx.mk \ native_fortran/Makefile_nf_varbuf.mk \ native_fortran/Makefile_nf_vector.mk \ cpp/Makefile_add_vectors_cpp11.mk \ cpp/Makefile_add_vectors.mk \ fortran90/Makefile.mk \ profiling_tool/prof.sh CLEANFILES = *.gcno *.gcda *.linkinfo *.mod starpu_idle_microsec.log *.mps */*.mps */*/*.mps *.dot */*.dot */*/*.dot *.pl */*.pl */*/*.pl *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 native_fortran/fstarpu_mod.f90 *.csv *.md *.Rmd *.pdf *.html clean-local: -rm -rf mult/sgemm.traces lu/lu.traces pkglib_LTLIBRARIES = if STARPU_HAVE_ICC .icc.o: $(V_icc) $(ICC) $(ICC_ARGS) -x c $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $< -c -o $@ endif examplebindir = $(libdir)/starpu/examples/ examplebin_PROGRAMS = noinst_HEADERS = \ axpy/axpy.h \ cg/cg.h \ cg/cg_kernels.c \ heat/lu_kernels_model.h \ heat/dw_sparse_cg.h \ heat/heat.h \ heat/dw_factolu.h \ lu/xlu.h \ lu/xlu_kernels.h \ lu/lu-float.h \ lu/lu-double.h \ lu/complex_float.h \ lu/complex_double.h \ lu/blas_complex.h \ cholesky/cholesky.h \ sched_ctx_utils/sched_ctx_utils.h \ common/blas_model.h \ common/blas.h \ mult/simple.h \ mult/double.h \ fortran/starpu_fortran.h \ ppm_downscaler/ppm_downscaler.h \ ppm_downscaler/yuv_downscaler.h \ spmv/matrix_market/mmio.h \ spmv/matrix_market/mm_to_bcsr.h \ spmv/spmv.h \ spmv/dw_block_spmv.h \ basic_examples/multiformat_types.h \ filters/custom_mf/custom_interface.h \ filters/custom_mf/custom_types.h \ interface/complex_interface.h \ interface/complex_codelet.h \ interface/complex_dev_handle/complex_dev_handle_interface.h \ interface/complex_dev_handle/complex_dev_handle_codelet.h \ pi/pi.h \ pi/SobolQRNG/sobol.h \ pi/SobolQRNG/sobol_gold.h \ pi/SobolQRNG/sobol_gpu.h \ pi/SobolQRNG/sobol_primitives.h \ reductions/dot_product.h \ basic_examples/vector_scal_cpu_template.h \ sched_ctx/axpy_partition_gpu.h ##################################### # What to install and what to check # ##################################### examplebin_PROGRAMS += $(STARPU_EXAMPLES) TESTS = $(SHELL_TESTS) $(STARPU_EXAMPLES) SHELL_TESTS = SHELL_TESTS += scheduler/schedulers.sh SHELL_TESTS += scheduler/schedulers_context.sh if !STARPU_NO_BLAS_LIB if STARPU_USE_FXT SHELL_TESTS += mult/sgemm.sh endif endif check_PROGRAMS = $(STARPU_EXAMPLES) # STARPU_EXAMPLES list all applications which have to be compiled and checked # Applications which should only be compiled are added directly in examplebin_PROGRAMS # see for instance mandelbrot/mandelbrot STARPU_EXAMPLES = STARPU_EXAMPLES += \ sched_ctx/prio \ scheduler/dummy_sched \ scheduler/dummy_modular_sched \ worker_collections/worker_list_example \ api/bcsr_data_interface \ api/block_data_interface \ api/coo_data_interface \ api/csr_data_interface \ api/matrix_data_interface \ api/multiformat_data_interface \ api/tensor_data_interface \ api/variable_data_interface \ api/vector_data_interface \ api/void_data_interface if !STARPU_SIMGRID STARPU_EXAMPLES += \ basic_examples/hello_world \ basic_examples/hooks \ basic_examples/topology \ basic_examples/vector_scal \ basic_examples/mult \ basic_examples/block \ basic_examples/variable \ basic_examples/multiformat \ basic_examples/dynamic_handles \ basic_examples/task_insert_color \ basic_examples/ndim \ mlr/mlr \ cpp/incrementer_cpp \ cpp/add_vectors \ cpp/add_vectors_interface \ filters/alloc \ filters/fread \ filters/fvector \ filters/fvector_pick_variable \ filters/ftensor \ filters/ftensor_pick_block \ filters/ftensor_pick_variable \ filters/fblock \ filters/fblock_pick_matrix \ filters/fblock_pick_variable \ filters/fmatrix \ filters/fmatrix_pick_vector \ filters/fmatrix_pick_variable \ filters/fndim \ filters/fndim_pick_ndim \ filters/fndim_5d_pick_tensor \ filters/fndim_4d_pick_block \ filters/fndim_3d_pick_matrix \ filters/fndim_2d_pick_vector \ filters/fndim_1d_pick_variable \ filters/fndim_pick_variable \ filters/fndim_to_tensor \ filters/fndim_to_block \ filters/fndim_to_matrix \ filters/fndim_to_vector \ filters/fndim_to_variable \ filters/fmultiple_manual \ filters/fmultiple_submit \ filters/fmultiple_submit_readonly \ filters/fmultiple_submit_readonly_downgrade \ filters/fmultiple_submit_implicit \ filters/frecursive \ filters/shadow \ filters/shadow2d \ filters/shadow3d \ filters/shadow4d \ filters/shadownd \ tag_example/tag_example \ tag_example/tag_example2 \ tag_example/tag_example3 \ tag_example/tag_example4 \ tag_example/tag_restartable \ transactions/trs_inc \ spmd/vector_scal_spmd \ spmv/spmv \ callback/callback \ callback/prologue \ incrementer/incrementer \ binary/binary \ interface/complex \ interface/complex_dev_handle/complex_dev_handle \ matvecmult/matvecmult \ profiling/profiling \ perf_monitoring/perf_counters_01 \ perf_monitoring/perf_counters_02 \ perf_steering/perf_knobs_01 \ perf_steering/perf_knobs_02 \ perf_steering/perf_knobs_03 \ scheduler/heteroprio_test \ sched_ctx/sched_ctx \ sched_ctx/sched_ctx_empty \ sched_ctx/sched_ctx_remove \ sched_ctx/sched_ctx_delete \ sched_ctx/two_cpu_contexts \ sched_ctx/dummy_sched_with_ctx \ worker_collections/worker_tree_example \ reductions/dot_product \ reductions/minmax_reduction \ dependency/task_end_dep \ dependency/task_end_dep_add \ dependency/sequential_consistency \ subgraphs/manual \ subgraphs/partition \ subgraphs/plan endif if !STARPU_SIMGRID SHELL_TESTS += \ profiling_tool/prof.sh pkglib_LTLIBRARIES += \ profiling_tool/libprofiling_tool.la profiling_tool_libprofiling_tool_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version endif if !STARPU_SIMGRID STARPU_EXAMPLES += \ scheduler/dummy_sched SHELL_TESTS += \ scheduler/libdummy_sched.sh pkglib_LTLIBRARIES += \ scheduler/libdummy_sched.la scheduler_libdummy_sched_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version if STARPU_HAVE_CXX11 STARPU_EXAMPLES += \ cpp/add_vectors_cpp11 endif if STARPU_HAVE_F77 if STARPU_HAVE_F77_H STARPU_EXAMPLES += \ fortran/hello endif STARPU_EXAMPLES += \ basic_examples/vector_scal_fortran endif if STARPU_HAVE_FC if !STARPU_SANITIZE STARPU_EXAMPLES += \ fortran90/f90_example \ native_fortran/nf_vector \ native_fortran/nf_matrix \ native_fortran/nf_example \ native_fortran/nf_dynbuf \ native_fortran/nf_varbuf \ native_fortran/nf_sched_ctx \ native_fortran/nf_partition endif endif endif if STARPU_USE_CUDA STARPU_EXAMPLES += \ mult/sgemm \ mult/dgemm examplebin_PROGRAMS += \ mult/sgemm_layout \ mult/dgemm_layout endif if STARPU_USE_HIPBLAS STARPU_EXAMPLES += \ mult/sgemm \ mult/dgemm endif if !STARPU_NO_BLAS_LIB STARPU_EXAMPLES += \ mult/sgemm \ mult/dgemm \ lu/lu_example_float \ lu/lu_example_double \ lu/lu_implicit_example_float \ lu/lu_implicit_example_double \ cholesky/cholesky_tag \ cholesky/cholesky_tile_tag \ cholesky/cholesky_implicit \ cholesky/cholesky_compil examplebin_PROGRAMS += \ mult/sgemm_layout \ mult/dgemm_layout if !STARPU_SIMGRID STARPU_EXAMPLES += \ axpy/axpy \ cholesky/cholesky_grain_tag \ heat/heat \ cg/cg \ pipeline/pipeline \ transactions/trs_sgemm SHELL_TESTS += \ heat/heat.sh \ lu/lu.sh endif if STARPU_SIMGRID if !STARPU_QUICK_CHECK SHELL_TESTS += \ cholesky/cholesky.sh endif endif endif if !STARPU_SIMGRID if STARPU_MKL_BLAS_LIB STARPU_EXAMPLES += \ lu/lu_example_complex_float \ lu/lu_example_complex_double \ lu/lu_implicit_example_complex_float \ lu/lu_implicit_example_complex_double endif if STARPU_HAVE_CBLAS_H if STARPU_HAVE_CBLAS_SGEMV STARPU_EXAMPLES += \ spmv/dw_block_spmv endif endif if !STARPU_SIMGRID if STARPU_HAVE_F77 if STARPU_HAVE_F77_H STARPU_EXAMPLES += \ fortran/hello endif STARPU_EXAMPLES += \ basic_examples/vector_scal_fortran endif endif if STARPU_HAVE_OPENMP STARPU_EXAMPLES += \ openmp/vector_scal_omp \ sched_ctx/sched_ctx_without_sched_policy\ sched_ctx/nested_sched_ctxs \ sched_ctx/sched_ctx_without_sched_policy_awake\ sched_ctx/parallel_tasks_reuse_handle \ sched_ctx/parallel_code if STARPU_HAVE_HWLOC if STARPU_HWLOC_HAVE_TOPOLOGY_DUP STARPU_EXAMPLES += \ parallel_workers/parallel_workers \ parallel_workers/parallel_workers_func \ parallel_workers/parallel_workers_oldapi parallel_workers_parallel_workers_CFLAGS = \ $(AM_CFLAGS) -fopenmp parallel_workers_parallel_workers_func_CFLAGS = \ $(AM_CFLAGS) -fopenmp parallel_workers_parallel_workers_oldapi_CFLAGS = \ $(AM_CFLAGS) -fopenmp endif endif endif endif !STARPU_SIMGRID if STARPU_USE_CUDA STARPU_EXAMPLES += \ sched_ctx/gpu_partition sched_ctx_gpu_partition_SOURCES = \ sched_ctx/gpu_partition.c \ sched_ctx/axpy_partition_gpu.cu endif ################## # Basic examples # ################## basic_examples_vector_scal_SOURCES = \ basic_examples/vector_scal.c \ basic_examples/vector_scal_cpu.c basic_examples_mult_SOURCES = \ basic_examples/mult.c if STARPU_HAVE_ICC if STARPU_CROSS_COMPILING basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ else basic_examples_vector_scal_SOURCES += \ basic_examples/vector_scal_cpu_icc.icc basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(ICC) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ endif else basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ endif if STARPU_USE_CUDA basic_examples_vector_scal_SOURCES += \ basic_examples/vector_scal_cuda.cu basic_examples_mult_SOURCES += \ basic_examples/mult_cuda.cu endif if STARPU_USE_HIP basic_examples_vector_scal_SOURCES += \ basic_examples/vector_scal_hip.hip basic_examples_mult_SOURCES += \ basic_examples/mult_hip.hip endif if STARPU_USE_OPENCL basic_examples_vector_scal_SOURCES += \ basic_examples/vector_scal_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ basic_examples/vector_scal_opencl_kernel.cl endif if STARPU_HAVE_F77 basic_examples_vector_scal_fortran_SOURCES = \ basic_examples/vector_scal_fortran.F \ basic_examples/vector_scal_c.c \ basic_examples/vector_scal_cpu.c if STARPU_USE_CUDA basic_examples_vector_scal_fortran_SOURCES += \ basic_examples/vector_scal_cuda.cu basic_examples_vector_scal_fortran_LDADD = \ $(STARPU_CUDA_FORTRAN_LDFLAGS) endif if STARPU_HAVE_F77_H fortran_hello_SOURCES = \ fortran/hello_c.c \ fortran/hello.F \ fortran/starpu_fortran.h endif endif if STARPU_HAVE_FC fortran90_f90_example_SOURCES = \ fortran90/mod_types.f90 \ fortran90/starpu_mod.f90 \ fortran90/mod_interface.f90 \ fortran90/mod_compute.f90 \ fortran90/marshalling.c \ fortran90/f90_example.f90 native_fortran_nf_vector_SOURCES = \ native_fortran/nf_codelets.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_vector.f90 native_fortran_nf_matrix_SOURCES = \ native_fortran/nf_codelets.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_matrix.f90 native_fortran_nf_example_SOURCES = \ native_fortran/nf_types.f90 \ native_fortran/nf_compute.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_example.f90 native_fortran_nf_dynbuf_SOURCES = \ native_fortran/nf_dynbuf_cl.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_dynbuf.f90 native_fortran_nf_varbuf_SOURCES = \ native_fortran/nf_varbuf_cl.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_varbuf.f90 native_fortran_nf_sched_ctx_SOURCES = \ native_fortran/nf_sched_ctx_cl.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_sched_ctx.f90 native_fortran_nf_partition_SOURCES = \ native_fortran/nf_partition_cl.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_partition.f90 endif ####################### # Multiformat example # ####################### basic_examples_multiformat_SOURCES = \ basic_examples/multiformat.c \ basic_examples/multiformat_conversion_codelets.c if STARPU_USE_CUDA basic_examples_multiformat_SOURCES += \ basic_examples/multiformat_cuda.cu \ basic_examples/multiformat_conversion_codelets_cuda.cu endif if STARPU_USE_OPENCL basic_examples_multiformat_SOURCES += \ basic_examples/multiformat_opencl.c \ basic_examples/multiformat_conversion_codelets_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ basic_examples/multiformat_opencl_kernel.cl \ basic_examples/multiformat_conversion_codelets_opencl_kernel.cl endif ################# # block example # ################# basic_examples_block_SOURCES = \ basic_examples/block.c \ basic_examples/block_cpu.c if STARPU_USE_CUDA basic_examples_block_SOURCES += \ basic_examples/block_cuda.cu endif if STARPU_USE_HIP basic_examples_block_SOURCES += \ basic_examples/block_hip.hip endif if STARPU_USE_OPENCL basic_examples_block_SOURCES += \ basic_examples/block_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ basic_examples/block_opencl_kernel.cl endif #################### # Variable example # #################### basic_examples_variable_SOURCES = \ basic_examples/variable.c \ basic_examples/variable_kernels_cpu.c if STARPU_USE_CUDA basic_examples_variable_SOURCES += \ basic_examples/variable_kernels.cu endif if STARPU_USE_OPENCL basic_examples_variable_SOURCES += \ basic_examples/variable_kernels_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ basic_examples/variable_kernels_opencl_kernel.cl endif ########### # Filters # ########### filters_fvector_SOURCES = \ filters/fvector.c \ filters/fvector_cpu.c if STARPU_USE_CUDA filters_fvector_SOURCES += \ filters/fvector_cuda.cu endif if STARPU_USE_HIP filters_fvector_SOURCES += \ filters/fvector_hip.hip endif filters_fmatrix_SOURCES = \ filters/fmatrix.c \ filters/fmatrix_print.c \ filters/fmatrix_cpu.c if STARPU_USE_CUDA filters_fmatrix_SOURCES += \ filters/fmatrix_cuda.cu endif if STARPU_USE_HIP filters_fmatrix_SOURCES += \ filters/fmatrix_hip.hip endif filters_fblock_SOURCES = \ filters/fblock.c \ filters/fblock_print.c \ filters/fblock_cpu.c if STARPU_USE_CUDA filters_fblock_SOURCES += \ filters/fblock_cuda.cu endif if STARPU_USE_HIP filters_fblock_SOURCES += \ filters/fblock_hip.hip endif if STARPU_USE_OPENCL filters_fblock_SOURCES += \ filters/fblock_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ filters/fblock_opencl_kernel.cl endif filters_ftensor_SOURCES = \ filters/ftensor.c \ filters/ftensor_print.c \ filters/ftensor_cpu.c if STARPU_USE_CUDA filters_ftensor_SOURCES += \ filters/ftensor_cuda.cu endif if STARPU_USE_HIP filters_ftensor_SOURCES += \ filters/ftensor_hip.hip endif filters_fndim_SOURCES = \ filters/fndim.c \ filters/ftensor_print.c \ filters/f4d_cpu.c if STARPU_USE_CUDA filters_fndim_SOURCES += \ filters/f4d_cuda.cu endif if STARPU_USE_HIP filters_fndim_SOURCES += \ filters/f4d_hip.hip endif filters_fmatrix_pick_vector_SOURCES = \ filters/fmatrix_pick_vector.c \ filters/fmatrix_print.c \ filters/fvector_cpu.c if STARPU_USE_CUDA filters_fmatrix_pick_vector_SOURCES += \ filters/fvector_cuda.cu endif if STARPU_USE_HIP filters_fmatrix_pick_vector_SOURCES += \ filters/fvector_hip.hip endif filters_fmatrix_pick_variable_SOURCES = \ filters/fmatrix_pick_variable.c \ filters/fmatrix_print.c if STARPU_USE_CUDA filters_fmatrix_pick_variable_SOURCES += \ filters/fvariable_cuda.cu endif filters_fblock_pick_matrix_SOURCES = \ filters/fblock_pick_matrix.c \ filters/fblock_print.c \ filters/fmatrix_print.c \ filters/fmatrix_cpu.c if STARPU_USE_CUDA filters_fblock_pick_matrix_SOURCES += \ filters/fmatrix_cuda.cu endif if STARPU_USE_HIP filters_fblock_pick_matrix_SOURCES += \ filters/fmatrix_hip.hip endif filters_fblock_pick_variable_SOURCES = \ filters/fblock_pick_variable.c \ filters/fblock_print.c if STARPU_USE_CUDA filters_fblock_pick_variable_SOURCES += \ filters/fvariable_cuda.cu endif filters_ftensor_pick_block_SOURCES = \ filters/ftensor_pick_block.c \ filters/ftensor_print.c \ filters/fblock_print.c \ filters/fblock_cpu.c if STARPU_USE_CUDA filters_ftensor_pick_block_SOURCES += \ filters/fblock_cuda.cu endif if STARPU_USE_HIP filters_ftensor_pick_block_SOURCES += \ filters/fblock_hip.hip endif filters_ftensor_pick_variable_SOURCES = \ filters/ftensor_pick_variable.c \ filters/ftensor_print.c if STARPU_USE_CUDA filters_ftensor_pick_variable_SOURCES += \ filters/fvariable_cuda.cu endif filters_fndim_pick_ndim_SOURCES = \ filters/fndim_pick_ndim.c \ filters/ftensor_print.c \ filters/fblock_print.c \ filters/f3d_cpu.c if STARPU_USE_CUDA filters_fndim_pick_ndim_SOURCES += \ filters/f3d_cuda.cu endif if STARPU_USE_HIP filters_fndim_pick_ndim_SOURCES += \ filters/f3d_hip.hip endif filters_fndim_5d_pick_tensor_SOURCES = \ filters/fndim_5d_pick_tensor.c \ filters/f5d_print.c \ filters/ftensor_print.c \ filters/ftensor_cpu.c if STARPU_USE_CUDA filters_fndim_5d_pick_tensor_SOURCES += \ filters/ftensor_cuda.cu endif if STARPU_USE_HIP filters_fndim_5d_pick_tensor_SOURCES += \ filters/ftensor_hip.hip endif filters_fndim_4d_pick_block_SOURCES = \ filters/fndim_4d_pick_block.c \ filters/ftensor_print.c \ filters/fblock_print.c \ filters/fblock_cpu.c if STARPU_USE_CUDA filters_fndim_4d_pick_block_SOURCES += \ filters/fblock_cuda.cu endif if STARPU_USE_HIP filters_fndim_4d_pick_block_SOURCES += \ filters/fblock_hip.hip endif filters_fndim_3d_pick_matrix_SOURCES = \ filters/fndim_3d_pick_matrix.c \ filters/fblock_print.c \ filters/fmatrix_print.c \ filters/fmatrix_cpu.c if STARPU_USE_CUDA filters_fndim_3d_pick_matrix_SOURCES += \ filters/fmatrix_cuda.cu endif if STARPU_USE_HIP filters_fndim_3d_pick_matrix_SOURCES += \ filters/fmatrix_hip.hip endif filters_fndim_2d_pick_vector_SOURCES = \ filters/fndim_2d_pick_vector.c \ filters/fmatrix_print.c \ filters/fvector_cpu.c if STARPU_USE_CUDA filters_fndim_2d_pick_vector_SOURCES += \ filters/fvector_cuda.cu endif if STARPU_USE_HIP filters_fndim_2d_pick_vector_SOURCES += \ filters/fvector_hip.hip endif filters_fndim_pick_variable_SOURCES = \ filters/fndim_pick_variable.c \ filters/f5d_print.c filters_fndim_to_tensor_SOURCES = \ filters/fndim_to_tensor.c \ filters/ftensor_print.c \ filters/ftensor_cpu.c if STARPU_USE_CUDA filters_fndim_to_tensor_SOURCES += \ filters/ftensor_cuda.cu endif if STARPU_USE_HIP filters_fndim_to_tensor_SOURCES += \ filters/ftensor_hip.hip endif filters_fndim_to_block_SOURCES = \ filters/fndim_to_block.c \ filters/fblock_print.c \ filters/fblock_cpu.c if STARPU_USE_CUDA filters_fndim_to_block_SOURCES += \ filters/fblock_cuda.cu endif if STARPU_USE_HIP filters_fndim_to_block_SOURCES += \ filters/fblock_hip.hip endif filters_fndim_to_matrix_SOURCES = \ filters/fndim_to_matrix.c \ filters/fmatrix_print.c \ filters/fmatrix_cpu.c if STARPU_USE_CUDA filters_fndim_to_matrix_SOURCES += \ filters/fmatrix_cuda.cu endif if STARPU_USE_HIP filters_fndim_to_matrix_SOURCES += \ filters/fmatrix_hip.hip endif filters_fndim_to_vector_SOURCES = \ filters/fndim_to_vector.c \ filters/fvector_cpu.c if STARPU_USE_CUDA filters_fndim_to_vector_SOURCES += \ filters/fvector_cuda.cu endif if STARPU_USE_HIP filters_fndim_to_vector_SOURCES += \ filters/fvector_hip.hip endif filters_fmultiple_manual_SOURCES = \ filters/fmultiple_manual.c if STARPU_USE_CUDA filters_fmultiple_manual_SOURCES += \ filters/fmultiple_cuda.cu endif if STARPU_USE_HIP filters_fmultiple_manual_SOURCES += \ filters/fmultiple_hip.hip endif filters_fmultiple_submit_SOURCES = \ filters/fmultiple_submit.c if STARPU_USE_CUDA filters_fmultiple_submit_SOURCES += \ filters/fmultiple_cuda.cu endif if STARPU_USE_HIP filters_fmultiple_submit_SOURCES += \ filters/fmultiple_hip.hip endif filters_fmultiple_submit_readonly_SOURCES = \ filters/fmultiple_submit_readonly.c filters_fmultiple_submit_readonly_downgrade_SOURCES = \ filters/fmultiple_submit_readonly_downgrade.c if STARPU_USE_CUDA filters_fmultiple_submit_readonly_SOURCES += \ filters/fmultiple_cuda.cu filters_fmultiple_submit_readonly_downgrade_SOURCES += \ filters/fmultiple_cuda.cu endif if STARPU_USE_HIP filters_fmultiple_submit_readonly_SOURCES += \ filters/fmultiple_hip.hip filters_fmultiple_submit_readonly_downgrade_SOURCES += \ filters/fmultiple_hip.hip endif filters_fmultiple_submit_implicit_SOURCES = \ filters/fmultiple_submit_implicit.c if STARPU_USE_CUDA filters_fmultiple_submit_implicit_SOURCES += \ filters/fmultiple_cuda.cu endif if STARPU_USE_HIP filters_fmultiple_submit_implicit_SOURCES += \ filters/fmultiple_hip.hip endif ############################# # Custom multiformat filter # ############################# #TODO: see why the application is failing #lt-custom_mf_filter: .../src/datawizard/malloc.c:784: starpu_free_on_node: Assertion `chunk != _starpu_chunk_list_end(chunks[dst_node])' failed. examplebin_PROGRAMS += \ filters/custom_mf/custom_mf_filter filters_custom_mf_custom_mf_filter_SOURCES=\ filters/custom_mf/custom_mf_filter.c \ filters/custom_mf/custom_interface.c \ filters/custom_mf/custom_conversion_codelets.c if STARPU_USE_CUDA filters_custom_mf_custom_mf_filter_SOURCES += \ filters/custom_mf/conversion.cu \ filters/custom_mf/cuda.cu endif if STARPU_USE_OPENCL filters_custom_mf_custom_mf_filter_SOURCES += \ filters/custom_mf/conversion_opencl.c \ filters/custom_mf/custom_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ filters/custom_mf/conversion_opencl.cl \ filters/custom_mf/custom_opencl.cl endif ################ # AXPY example # ################ if !STARPU_NO_BLAS_LIB axpy_axpy_SOURCES = \ axpy/axpy.c \ common/blas.c if STARPU_USE_OPENCL axpy_axpy_SOURCES += \ axpy/axpy_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ axpy/axpy_opencl_kernel.cl endif axpy_axpy_LDADD = \ $(STARPU_BLAS_LDFLAGS) endif ################ # Mult example # ################ mult_sgemm_SOURCES = \ mult/sgemm.c mult_sgemm_LDADD = \ $(STARPU_BLAS_LDFLAGS) mult_sgemm_layout_SOURCES = \ mult/sgemm_layout.c mult_sgemm_layout_LDADD = \ $(STARPU_BLAS_LDFLAGS) mult_dgemm_SOURCES = \ mult/dgemm.c mult_dgemm_LDADD = \ $(STARPU_BLAS_LDFLAGS) mult_dgemm_layout_SOURCES = \ mult/dgemm_layout.c mult_dgemm_layout_LDADD = \ $(STARPU_BLAS_LDFLAGS) if !STARPU_NO_BLAS_LIB mult_sgemm_SOURCES += \ common/blas.c mult_dgemm_SOURCES += \ common/blas.c mult_sgemm_layout_SOURCES += \ common/blas.c mult_dgemm_layout_SOURCES += \ common/blas.c endif ##################### # Trs_sgemm example # ##################### if !STARPU_NO_BLAS_LIB transactions_trs_sgemm_SOURCES = \ transactions/trs_sgemm.c \ common/blas.c transactions_trs_sgemm_LDADD = \ $(STARPU_BLAS_LDFLAGS) endif #################### # Cholesky example # #################### if !STARPU_NO_BLAS_LIB pkglib_LTLIBRARIES += \ cholesky/libmy_dmda.la cholesky_libmy_dmda_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version SHELL_TESTS += \ cholesky/cholesky_julia.sh cholesky_cholesky_tag_SOURCES = \ cholesky/cholesky_tag.c \ cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c \ common/blas.c cholesky_cholesky_tag_LDADD = \ $(STARPU_BLAS_LDFLAGS) cholesky_cholesky_tile_tag_SOURCES = \ cholesky/cholesky_tile_tag.c \ cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c \ common/blas.c cholesky_cholesky_tile_tag_LDADD = \ $(STARPU_BLAS_LDFLAGS) cholesky_cholesky_grain_tag_SOURCES = \ cholesky/cholesky_grain_tag.c \ cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c \ common/blas.c cholesky_cholesky_grain_tag_LDADD = \ $(STARPU_BLAS_LDFLAGS) cholesky_cholesky_implicit_SOURCES = \ cholesky/cholesky_implicit.c \ cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c \ sched_ctx_utils/sched_ctx_utils.c \ common/blas.c cholesky_cholesky_implicit_LDADD = \ $(STARPU_BLAS_LDFLAGS) cholesky_cholesky_compil_SOURCES = \ cholesky/cholesky_compil.c \ cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c \ sched_ctx_utils/sched_ctx_utils.c \ common/blas.c cholesky_cholesky_compil_LDADD = \ $(STARPU_BLAS_LDFLAGS) endif ############## # LU example # ############## if !STARPU_NO_BLAS_LIB lu_lu_example_float_SOURCES = \ lu/lu_example_float.c \ lu/slu.c \ lu/slu_pivot.c \ lu/slu_kernels.c \ common/blas.c lu_lu_example_float_LDADD = \ $(STARPU_BLAS_LDFLAGS) lu_lu_example_double_SOURCES = \ lu/lu_example_double.c \ lu/dlu.c \ lu/dlu_pivot.c \ lu/dlu_kernels.c \ common/blas.c lu_lu_example_double_LDADD = \ $(STARPU_BLAS_LDFLAGS) lu_lu_implicit_example_float_SOURCES = \ lu/lu_example_float.c \ lu/slu_implicit.c \ lu/slu_implicit_pivot.c \ lu/slu_kernels.c \ common/blas.c lu_lu_implicit_example_float_LDADD = \ $(STARPU_BLAS_LDFLAGS) lu_lu_implicit_example_double_SOURCES = \ lu/lu_example_double.c \ lu/dlu_implicit.c \ lu/dlu_implicit_pivot.c \ lu/dlu_kernels.c \ common/blas.c lu_lu_implicit_example_double_LDADD = \ $(STARPU_BLAS_LDFLAGS) if STARPU_MKL_BLAS_LIB lu_lu_example_complex_float_SOURCES = \ lu/lu_example_complex_float.c \ lu/clu.c \ lu/clu_pivot.c \ lu/clu_kernels.c \ lu/blas_complex.c \ common/blas.c lu_lu_example_complex_float_LDADD = \ $(STARPU_BLAS_LDFLAGS) lu_lu_implicit_example_complex_float_SOURCES = \ lu/lu_example_complex_float.c \ lu/clu_implicit.c \ lu/clu_implicit_pivot.c \ lu/clu_kernels.c \ lu/blas_complex.c \ common/blas.c lu_lu_implicit_example_complex_float_LDADD = \ $(STARPU_BLAS_LDFLAGS) lu_lu_example_complex_double_SOURCES = \ lu/lu_example_complex_double.c \ lu/zlu.c \ lu/zlu_pivot.c \ lu/zlu_kernels.c \ lu/blas_complex.c \ common/blas.c lu_lu_example_complex_double_LDADD = \ $(STARPU_BLAS_LDFLAGS) lu_lu_implicit_example_complex_double_SOURCES = \ lu/lu_example_complex_double.c \ lu/zlu_implicit.c \ lu/zlu_implicit_pivot.c \ lu/zlu_kernels.c \ lu/blas_complex.c \ common/blas.c lu_lu_implicit_example_complex_double_LDADD = \ $(STARPU_BLAS_LDFLAGS) endif endif ################ # Heat example # ################ if !STARPU_NO_BLAS_LIB heat_heat_SOURCES = \ heat/heat.c \ heat/dw_factolu.c \ heat/dw_factolu_tag.c \ heat/dw_factolu_grain.c \ heat/dw_sparse_cg.c \ heat/heat_display.c \ heat/lu_kernels_model.c \ heat/dw_sparse_cg_kernels.c \ heat/dw_factolu_kernels.c \ common/blas.c heat_heat_LDADD = \ $(STARPU_OPENGL_RENDER_LDFLAGS) \ $(STARPU_BLAS_LDFLAGS) endif ############## # CG example # ############## if !STARPU_NO_BLAS_LIB cg_cg_SOURCES = \ cg/cg.c \ common/blas.c cg_cg_LDADD = \ $(STARPU_BLAS_LDFLAGS) endif ################ # SPMD example # ################ spmd_vector_scal_spmd_SOURCES = \ spmd/vector_scal_spmd.c ################ # SpMV example # ################ spmv_spmv_SOURCES = \ spmv/spmv.c \ spmv/spmv_kernels.c if STARPU_USE_CUDA spmv_spmv_SOURCES += \ spmv/spmv_cuda.cu endif spmv_dw_block_spmv_SOURCES = \ spmv/dw_block_spmv.c \ spmv/dw_block_spmv_kernels.c \ spmv/matrix_market/mm_to_bcsr.c \ spmv/matrix_market/mmio.c spmv_dw_block_spmv_LDADD = \ $(STARPU_BLAS_LDFLAGS) ########################### # C++ Incrementer example # ########################### cpp_incrementer_cpp_SOURCES = \ cpp/incrementer_cpp.cpp if STARPU_USE_CUDA cpp_incrementer_cpp_SOURCES += \ incrementer/incrementer_kernels.cu endif if STARPU_USE_OPENCL cpp_incrementer_cpp_SOURCES += \ incrementer/incrementer_kernels_opencl.c endif ########################### # C++ Add vectors example # ########################### cpp_add_vectors_SOURCES = \ cpp/add_vectors.cpp cpp_add_vectors_interface_SOURCES = \ cpp/add_vectors_interface.cpp if STARPU_HAVE_CXX11 cpp_add_vectors_cpp11_SOURCES = \ cpp/add_vectors_cpp11.cpp endif ####################### # Incrementer example # ####################### incrementer_incrementer_SOURCES = \ incrementer/incrementer.c if STARPU_USE_CUDA incrementer_incrementer_SOURCES += \ incrementer/incrementer_kernels.cu endif if STARPU_USE_OPENCL incrementer_incrementer_SOURCES += \ incrementer/incrementer_kernels_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ incrementer/incrementer_kernels_opencl_kernel.cl endif ################## # Binary example # ################## binary_binary_SOURCES = \ binary/binary.c if STARPU_USE_OPENCL binary_binary_SOURCES += \ incrementer/incrementer_kernels_opencl.c endif ##################### # interface example # ##################### interface_complex_SOURCES = \ interface/complex.c \ interface/complex_interface.c \ interface/complex_filters.c if STARPU_USE_CUDA interface_complex_SOURCES += \ interface/complex_kernels.cu endif if STARPU_USE_OPENCL interface_complex_SOURCES +=\ interface/complex_kernels_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ interface/complex_kernels.cl endif interface_complex_dev_handle_complex_dev_handle_SOURCES = \ interface/complex_dev_handle/complex_dev_handle.c \ interface/complex_dev_handle/complex_dev_handle_interface.c \ interface/complex_dev_handle/complex_dev_handle_filters.c if STARPU_USE_CUDA interface_complex_dev_handle_complex_dev_handle_SOURCES += \ interface/complex_dev_handle/complex_dev_handle_kernels.cu endif if STARPU_USE_OPENCL interface_complex_dev_handle_complex_dev_handle_SOURCES +=\ interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ interface/complex_dev_handle/complex_dev_handle_kernels.cl endif ###################### # matVecMult example # ###################### if STARPU_USE_OPENCL nobase_STARPU_OPENCL_DATA_DATA += \ matvecmult/matvecmult_kernel.cl endif ####################### # dot_product example # ####################### reductions_dot_product_SOURCES = \ reductions/dot_product.c if STARPU_USE_CUDA reductions_dot_product_SOURCES += \ reductions/dot_product_kernels.cu endif if STARPU_USE_OPENCL nobase_STARPU_OPENCL_DATA_DATA += \ reductions/dot_product_opencl_kernels.cl endif ################## # Mandelbrot Set # ################## examplebin_PROGRAMS += \ mandelbrot/mandelbrot mandelbrot_mandelbrot_CPPFLAGS = $(AM_CPPFLAGS) if STARPU_HAVE_X11 mandelbrot_mandelbrot_CPPFLAGS += $(X_CFLAGS) mandelbrot_mandelbrot_LDADD = $(X_PRE_LIBS) $(X_LIBS) -lX11 $(X_EXTRA_LIBS) endif #################### # Image downscaler # #################### examplebin_PROGRAMS += \ ppm_downscaler/ppm_downscaler \ ppm_downscaler/yuv_downscaler ###### # Pi # ###### if !STARPU_HAVE_WINDOWS examplebin_PROGRAMS += \ pi/pi \ pi/pi_redux pi_pi_SOURCES = \ pi/pi.c \ pi/SobolQRNG/sobol_gold.c \ pi/SobolQRNG/sobol_primitives.c if STARPU_USE_CUDA pi_pi_SOURCES += \ pi/pi_kernel.cu \ pi/SobolQRNG/sobol_gpu.cu endif pi_pi_redux_SOURCES = \ pi/pi_redux.c if STARPU_USE_CUDA pi_pi_redux_SOURCES += \ pi/pi_redux_kernel.cu pi_pi_redux_LDADD = \ $(STARPU_CURAND_LDFLAGS) endif endif ########################### # OpenGL interoperability # ########################### if STARPU_HAVE_OPENGL examplebin_PROGRAMS += \ gl_interop/gl_interop \ gl_interop/gl_interop_idle gl_interop_gl_interop_LDADD = \ $(STARPU_OPENGL_RENDER_LDFLAGS) gl_interop_gl_interop_idle_LDADD = \ $(STARPU_OPENGL_RENDER_LDFLAGS) endif #################### # pipeline example # #################### if !STARPU_NO_BLAS_LIB pipeline_pipeline_SOURCES = \ pipeline/pipeline.c \ common/blas.c pipeline_pipeline_LDADD = \ $(STARPU_BLAS_LDFLAGS) endif ################## # openmp example # ################## if STARPU_HAVE_OPENMP openmp_vector_scal_omp_CFLAGS = \ $(AM_CFLAGS) -fopenmp sched_ctx_parallel_code_CFLAGS = \ $(AM_CFLAGS) -fopenmp sched_ctx_sched_ctx_without_sched_policy_CFLAGS = \ $(AM_CFLAGS) -fopenmp sched_ctx_nested_sched_ctxs_CFLAGS = \ $(AM_CFLAGS) -fopenmp sched_ctx_parallel_tasks_reuse_handle_CFLAGS = \ $(AM_CFLAGS) -fopenmp endif ###################### # subgraphs examples # ###################### subgraphs_manual_SOURCES = \ subgraphs/manual.c \ subgraphs/codelets.c subgraphs_partition_SOURCES = \ subgraphs/partition.c \ subgraphs/codelets.c subgraphs_plan_SOURCES = \ subgraphs/plan.c \ subgraphs/codelets.c # - link over source file to build our own object fortran90/starpu_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ native_fortran/fstarpu_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ if STARPU_HAVE_FC # Fortran90 example # - express the creation of .mod along .o starpu_mod.mod: fortran90/starpu_mod.o mod_types.mod: fortran90/mod_types.o mod_compute.mod: fortran90/mod_compute.o mod_interface.mod: fortran90/mod_interface.o # - list explicit dependences to control proper module files dependencies fortran90/mod_compute.o: mod_types.mod mod_interface.mod starpu_mod.mod fortran90/f90_example.o: mod_types.mod mod_interface.mod mod_compute.mod starpu_mod.mod # Native Fortran example # - express the creation of .mod along .o fstarpu_mod.mod: native_fortran/fstarpu_mod.o nf_codelets.mod: native_fortran/nf_codelets.o nf_compute.mod: native_fortran/nf_compute.o nf_dynbuf_cl.mod: native_fortran/nf_dynbuf_cl.o nf_partition_cl.mod: native_fortran/nf_partition_cl.o nf_sched_ctx_cl.mod: native_fortran/nf_sched_ctx_cl.o nf_types.mod: native_fortran/nf_types.o nf_varbuf_cl.mod: native_fortran/nf_varbuf_cl.o # - list explicit dependences to control proper module files dependencies native_fortran/nf_codelets.o: fstarpu_mod.mod native_fortran/nf_compute.o: nf_types.mod fstarpu_mod.mod native_fortran/nf_dynbuf_cl.o: fstarpu_mod.mod native_fortran/nf_dynbuf.o: nf_dynbuf_cl.mod fstarpu_mod.mod native_fortran/nf_example.o: nf_types.mod nf_compute.mod fstarpu_mod.mod native_fortran/nf_matrix.o: nf_codelets.mod fstarpu_mod.mod native_fortran/nf_partition_cl.o: fstarpu_mod.mod native_fortran/nf_partition.o: nf_partition_cl.mod fstarpu_mod.mod native_fortran/nf_sched_ctx_cl.o: fstarpu_mod.mod native_fortran/nf_sched_ctx.o: nf_sched_ctx_cl.mod fstarpu_mod.mod native_fortran/nf_varbuf_cl.o: fstarpu_mod.mod native_fortran/nf_varbuf.o: nf_varbuf_cl.mod fstarpu_mod.mod native_fortran/nf_vector.o: nf_codelets.mod fstarpu_mod.mod endif starpu-1.4.9+dfsg/examples/Makefile.in000066400000000000000000015041401507764646700177000ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_20) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader examplebin_PROGRAMS = $(am__EXEEXT_15) $(am__EXEEXT_16) \ $(am__EXEEXT_17) filters/custom_mf/custom_mf_filter$(EXEEXT) \ mandelbrot/mandelbrot$(EXEEXT) \ ppm_downscaler/ppm_downscaler$(EXEEXT) \ ppm_downscaler/yuv_downscaler$(EXEEXT) $(am__EXEEXT_18) \ $(am__EXEEXT_19) TESTS = $(SHELL_TESTS) $(am__EXEEXT_15) @STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_FXT_TRUE@am__append_8 = mult/sgemm.sh check_PROGRAMS = $(am__EXEEXT_15) @STARPU_SIMGRID_FALSE@am__append_9 = basic_examples/hello_world \ @STARPU_SIMGRID_FALSE@ basic_examples/hooks \ @STARPU_SIMGRID_FALSE@ basic_examples/topology \ @STARPU_SIMGRID_FALSE@ basic_examples/vector_scal \ @STARPU_SIMGRID_FALSE@ basic_examples/mult basic_examples/block \ @STARPU_SIMGRID_FALSE@ basic_examples/variable \ @STARPU_SIMGRID_FALSE@ basic_examples/multiformat \ @STARPU_SIMGRID_FALSE@ basic_examples/dynamic_handles \ @STARPU_SIMGRID_FALSE@ basic_examples/task_insert_color \ @STARPU_SIMGRID_FALSE@ basic_examples/ndim mlr/mlr \ @STARPU_SIMGRID_FALSE@ cpp/incrementer_cpp cpp/add_vectors \ @STARPU_SIMGRID_FALSE@ cpp/add_vectors_interface filters/alloc \ @STARPU_SIMGRID_FALSE@ filters/fread filters/fvector \ @STARPU_SIMGRID_FALSE@ filters/fvector_pick_variable \ @STARPU_SIMGRID_FALSE@ filters/ftensor \ @STARPU_SIMGRID_FALSE@ filters/ftensor_pick_block \ @STARPU_SIMGRID_FALSE@ filters/ftensor_pick_variable \ @STARPU_SIMGRID_FALSE@ filters/fblock \ @STARPU_SIMGRID_FALSE@ filters/fblock_pick_matrix \ @STARPU_SIMGRID_FALSE@ filters/fblock_pick_variable \ @STARPU_SIMGRID_FALSE@ filters/fmatrix \ @STARPU_SIMGRID_FALSE@ filters/fmatrix_pick_vector \ @STARPU_SIMGRID_FALSE@ filters/fmatrix_pick_variable \ @STARPU_SIMGRID_FALSE@ filters/fndim filters/fndim_pick_ndim \ @STARPU_SIMGRID_FALSE@ filters/fndim_5d_pick_tensor \ @STARPU_SIMGRID_FALSE@ filters/fndim_4d_pick_block \ @STARPU_SIMGRID_FALSE@ filters/fndim_3d_pick_matrix \ @STARPU_SIMGRID_FALSE@ filters/fndim_2d_pick_vector \ @STARPU_SIMGRID_FALSE@ filters/fndim_1d_pick_variable \ @STARPU_SIMGRID_FALSE@ filters/fndim_pick_variable \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_tensor \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_block \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_matrix \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_vector \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_variable \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_manual \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_submit \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_readonly \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_readonly_downgrade \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_implicit \ @STARPU_SIMGRID_FALSE@ filters/frecursive filters/shadow \ @STARPU_SIMGRID_FALSE@ filters/shadow2d filters/shadow3d \ @STARPU_SIMGRID_FALSE@ filters/shadow4d filters/shadownd \ @STARPU_SIMGRID_FALSE@ tag_example/tag_example \ @STARPU_SIMGRID_FALSE@ tag_example/tag_example2 \ @STARPU_SIMGRID_FALSE@ tag_example/tag_example3 \ @STARPU_SIMGRID_FALSE@ tag_example/tag_example4 \ @STARPU_SIMGRID_FALSE@ tag_example/tag_restartable \ @STARPU_SIMGRID_FALSE@ transactions/trs_inc \ @STARPU_SIMGRID_FALSE@ spmd/vector_scal_spmd spmv/spmv \ @STARPU_SIMGRID_FALSE@ callback/callback callback/prologue \ @STARPU_SIMGRID_FALSE@ incrementer/incrementer binary/binary \ @STARPU_SIMGRID_FALSE@ interface/complex \ @STARPU_SIMGRID_FALSE@ interface/complex_dev_handle/complex_dev_handle \ @STARPU_SIMGRID_FALSE@ matvecmult/matvecmult \ @STARPU_SIMGRID_FALSE@ profiling/profiling \ @STARPU_SIMGRID_FALSE@ perf_monitoring/perf_counters_01 \ @STARPU_SIMGRID_FALSE@ perf_monitoring/perf_counters_02 \ @STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_01 \ @STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_02 \ @STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_03 \ @STARPU_SIMGRID_FALSE@ scheduler/heteroprio_test \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_empty \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_remove \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_delete \ @STARPU_SIMGRID_FALSE@ sched_ctx/two_cpu_contexts \ @STARPU_SIMGRID_FALSE@ sched_ctx/dummy_sched_with_ctx \ @STARPU_SIMGRID_FALSE@ worker_collections/worker_tree_example \ @STARPU_SIMGRID_FALSE@ reductions/dot_product \ @STARPU_SIMGRID_FALSE@ reductions/minmax_reduction \ @STARPU_SIMGRID_FALSE@ dependency/task_end_dep \ @STARPU_SIMGRID_FALSE@ dependency/task_end_dep_add \ @STARPU_SIMGRID_FALSE@ dependency/sequential_consistency \ @STARPU_SIMGRID_FALSE@ subgraphs/manual subgraphs/partition \ @STARPU_SIMGRID_FALSE@ subgraphs/plan scheduler/dummy_sched @STARPU_SIMGRID_FALSE@am__append_10 = profiling_tool/prof.sh \ @STARPU_SIMGRID_FALSE@ scheduler/libdummy_sched.sh @STARPU_SIMGRID_FALSE@am__append_11 = \ @STARPU_SIMGRID_FALSE@ profiling_tool/libprofiling_tool.la \ @STARPU_SIMGRID_FALSE@ scheduler/libdummy_sched.la @STARPU_HAVE_CXX11_TRUE@@STARPU_SIMGRID_FALSE@am__append_12 = \ @STARPU_HAVE_CXX11_TRUE@@STARPU_SIMGRID_FALSE@ cpp/add_vectors_cpp11 @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__append_13 = \ @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@ fortran/hello @STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__append_14 = \ @STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@ basic_examples/vector_scal_fortran @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_15 = \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ fortran90/f90_example \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_vector \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_matrix \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_example \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_dynbuf \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_varbuf \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_sched_ctx \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_partition @STARPU_USE_CUDA_TRUE@am__append_16 = \ @STARPU_USE_CUDA_TRUE@ mult/sgemm \ @STARPU_USE_CUDA_TRUE@ mult/dgemm @STARPU_USE_CUDA_TRUE@am__append_17 = \ @STARPU_USE_CUDA_TRUE@ mult/sgemm_layout \ @STARPU_USE_CUDA_TRUE@ mult/dgemm_layout @STARPU_USE_HIPBLAS_TRUE@am__append_18 = \ @STARPU_USE_HIPBLAS_TRUE@ mult/sgemm \ @STARPU_USE_HIPBLAS_TRUE@ mult/dgemm @STARPU_NO_BLAS_LIB_FALSE@am__append_19 = \ @STARPU_NO_BLAS_LIB_FALSE@ mult/sgemm \ @STARPU_NO_BLAS_LIB_FALSE@ mult/dgemm \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_implicit_example_float \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_implicit_example_double \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tag \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tile_tag \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_compil @STARPU_NO_BLAS_LIB_FALSE@am__append_20 = \ @STARPU_NO_BLAS_LIB_FALSE@ mult/sgemm_layout \ @STARPU_NO_BLAS_LIB_FALSE@ mult/dgemm_layout @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_21 = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ axpy/axpy \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cholesky/cholesky_grain_tag \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ heat/heat \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cg/cg \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ pipeline/pipeline \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ transactions/trs_sgemm @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_22 = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ heat/heat.sh \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ lu/lu.sh @STARPU_NO_BLAS_LIB_FALSE@@STARPU_QUICK_CHECK_FALSE@@STARPU_SIMGRID_TRUE@am__append_23 = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_QUICK_CHECK_FALSE@@STARPU_SIMGRID_TRUE@ cholesky/cholesky.sh @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@am__append_24 = \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_example_complex_float \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_example_complex_double \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_implicit_example_complex_float \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_implicit_example_complex_double @STARPU_HAVE_CBLAS_H_TRUE@@STARPU_HAVE_CBLAS_SGEMV_TRUE@@STARPU_SIMGRID_FALSE@am__append_25 = \ @STARPU_HAVE_CBLAS_H_TRUE@@STARPU_HAVE_CBLAS_SGEMV_TRUE@@STARPU_SIMGRID_FALSE@ spmv/dw_block_spmv @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__append_26 = \ @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@ fortran/hello @STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__append_27 = \ @STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@ basic_examples/vector_scal_fortran @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@am__append_28 = \ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ openmp/vector_scal_omp \ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_without_sched_policy\ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/nested_sched_ctxs \ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_without_sched_policy_awake\ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/parallel_tasks_reuse_handle \ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/parallel_code @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@am__append_29 = \ @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers \ @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers_func \ @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers_oldapi @STARPU_USE_CUDA_TRUE@am__append_30 = \ @STARPU_USE_CUDA_TRUE@ sched_ctx/gpu_partition @STARPU_CROSS_COMPILING_FALSE@@STARPU_HAVE_ICC_TRUE@am__append_31 = \ @STARPU_CROSS_COMPILING_FALSE@@STARPU_HAVE_ICC_TRUE@ basic_examples/vector_scal_cpu_icc.icc @STARPU_USE_CUDA_TRUE@am__append_32 = \ @STARPU_USE_CUDA_TRUE@ basic_examples/vector_scal_cuda.cu @STARPU_USE_CUDA_TRUE@am__append_33 = \ @STARPU_USE_CUDA_TRUE@ basic_examples/mult_cuda.cu @STARPU_USE_HIP_TRUE@am__append_34 = \ @STARPU_USE_HIP_TRUE@ basic_examples/vector_scal_hip.hip @STARPU_USE_HIP_TRUE@am__append_35 = \ @STARPU_USE_HIP_TRUE@ basic_examples/mult_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_36 = \ @STARPU_USE_OPENCL_TRUE@ basic_examples/vector_scal_opencl.c @STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@am__append_37 = \ @STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@ basic_examples/vector_scal_cuda.cu @STARPU_USE_CUDA_TRUE@am__append_38 = \ @STARPU_USE_CUDA_TRUE@ basic_examples/multiformat_cuda.cu \ @STARPU_USE_CUDA_TRUE@ basic_examples/multiformat_conversion_codelets_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_39 = \ @STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_opencl.c \ @STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_conversion_codelets_opencl.c @STARPU_USE_CUDA_TRUE@am__append_40 = \ @STARPU_USE_CUDA_TRUE@ basic_examples/block_cuda.cu @STARPU_USE_HIP_TRUE@am__append_41 = \ @STARPU_USE_HIP_TRUE@ basic_examples/block_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_42 = \ @STARPU_USE_OPENCL_TRUE@ basic_examples/block_opencl.c @STARPU_USE_CUDA_TRUE@am__append_43 = \ @STARPU_USE_CUDA_TRUE@ basic_examples/variable_kernels.cu @STARPU_USE_OPENCL_TRUE@am__append_44 = \ @STARPU_USE_OPENCL_TRUE@ basic_examples/variable_kernels_opencl.c @STARPU_USE_CUDA_TRUE@am__append_45 = \ @STARPU_USE_CUDA_TRUE@ filters/fvector_cuda.cu @STARPU_USE_HIP_TRUE@am__append_46 = \ @STARPU_USE_HIP_TRUE@ filters/fvector_hip.hip @STARPU_USE_CUDA_TRUE@am__append_47 = \ @STARPU_USE_CUDA_TRUE@ filters/fmatrix_cuda.cu @STARPU_USE_HIP_TRUE@am__append_48 = \ @STARPU_USE_HIP_TRUE@ filters/fmatrix_hip.hip @STARPU_USE_CUDA_TRUE@am__append_49 = \ @STARPU_USE_CUDA_TRUE@ filters/fblock_cuda.cu @STARPU_USE_HIP_TRUE@am__append_50 = \ @STARPU_USE_HIP_TRUE@ filters/fblock_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_51 = \ @STARPU_USE_OPENCL_TRUE@ filters/fblock_opencl.c @STARPU_USE_CUDA_TRUE@am__append_52 = \ @STARPU_USE_CUDA_TRUE@ filters/ftensor_cuda.cu @STARPU_USE_HIP_TRUE@am__append_53 = \ @STARPU_USE_HIP_TRUE@ filters/ftensor_hip.hip @STARPU_USE_CUDA_TRUE@am__append_54 = \ @STARPU_USE_CUDA_TRUE@ filters/f4d_cuda.cu @STARPU_USE_HIP_TRUE@am__append_55 = \ @STARPU_USE_HIP_TRUE@ filters/f4d_hip.hip @STARPU_USE_CUDA_TRUE@am__append_56 = \ @STARPU_USE_CUDA_TRUE@ filters/fvector_cuda.cu @STARPU_USE_HIP_TRUE@am__append_57 = \ @STARPU_USE_HIP_TRUE@ filters/fvector_hip.hip @STARPU_USE_CUDA_TRUE@am__append_58 = \ @STARPU_USE_CUDA_TRUE@ filters/fvariable_cuda.cu @STARPU_USE_CUDA_TRUE@am__append_59 = \ @STARPU_USE_CUDA_TRUE@ filters/fmatrix_cuda.cu @STARPU_USE_HIP_TRUE@am__append_60 = \ @STARPU_USE_HIP_TRUE@ filters/fmatrix_hip.hip @STARPU_USE_CUDA_TRUE@am__append_61 = \ @STARPU_USE_CUDA_TRUE@ filters/fvariable_cuda.cu @STARPU_USE_CUDA_TRUE@am__append_62 = \ @STARPU_USE_CUDA_TRUE@ filters/fblock_cuda.cu @STARPU_USE_HIP_TRUE@am__append_63 = \ @STARPU_USE_HIP_TRUE@ filters/fblock_hip.hip @STARPU_USE_CUDA_TRUE@am__append_64 = \ @STARPU_USE_CUDA_TRUE@ filters/fvariable_cuda.cu @STARPU_USE_CUDA_TRUE@am__append_65 = \ @STARPU_USE_CUDA_TRUE@ filters/f3d_cuda.cu @STARPU_USE_HIP_TRUE@am__append_66 = \ @STARPU_USE_HIP_TRUE@ filters/f3d_hip.hip @STARPU_USE_CUDA_TRUE@am__append_67 = \ @STARPU_USE_CUDA_TRUE@ filters/ftensor_cuda.cu @STARPU_USE_HIP_TRUE@am__append_68 = \ @STARPU_USE_HIP_TRUE@ filters/ftensor_hip.hip @STARPU_USE_CUDA_TRUE@am__append_69 = \ @STARPU_USE_CUDA_TRUE@ filters/fblock_cuda.cu @STARPU_USE_HIP_TRUE@am__append_70 = \ @STARPU_USE_HIP_TRUE@ filters/fblock_hip.hip @STARPU_USE_CUDA_TRUE@am__append_71 = \ @STARPU_USE_CUDA_TRUE@ filters/fmatrix_cuda.cu @STARPU_USE_HIP_TRUE@am__append_72 = \ @STARPU_USE_HIP_TRUE@ filters/fmatrix_hip.hip @STARPU_USE_CUDA_TRUE@am__append_73 = \ @STARPU_USE_CUDA_TRUE@ filters/fvector_cuda.cu @STARPU_USE_HIP_TRUE@am__append_74 = \ @STARPU_USE_HIP_TRUE@ filters/fvector_hip.hip @STARPU_USE_CUDA_TRUE@am__append_75 = \ @STARPU_USE_CUDA_TRUE@ filters/ftensor_cuda.cu @STARPU_USE_HIP_TRUE@am__append_76 = \ @STARPU_USE_HIP_TRUE@ filters/ftensor_hip.hip @STARPU_USE_CUDA_TRUE@am__append_77 = \ @STARPU_USE_CUDA_TRUE@ filters/fblock_cuda.cu @STARPU_USE_HIP_TRUE@am__append_78 = \ @STARPU_USE_HIP_TRUE@ filters/fblock_hip.hip @STARPU_USE_CUDA_TRUE@am__append_79 = \ @STARPU_USE_CUDA_TRUE@ filters/fmatrix_cuda.cu @STARPU_USE_HIP_TRUE@am__append_80 = \ @STARPU_USE_HIP_TRUE@ filters/fmatrix_hip.hip @STARPU_USE_CUDA_TRUE@am__append_81 = \ @STARPU_USE_CUDA_TRUE@ filters/fvector_cuda.cu @STARPU_USE_HIP_TRUE@am__append_82 = \ @STARPU_USE_HIP_TRUE@ filters/fvector_hip.hip @STARPU_USE_CUDA_TRUE@am__append_83 = \ @STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu @STARPU_USE_HIP_TRUE@am__append_84 = \ @STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip @STARPU_USE_CUDA_TRUE@am__append_85 = \ @STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu @STARPU_USE_HIP_TRUE@am__append_86 = \ @STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip @STARPU_USE_CUDA_TRUE@am__append_87 = \ @STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu @STARPU_USE_CUDA_TRUE@am__append_88 = \ @STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu @STARPU_USE_HIP_TRUE@am__append_89 = \ @STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip @STARPU_USE_HIP_TRUE@am__append_90 = \ @STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip @STARPU_USE_CUDA_TRUE@am__append_91 = \ @STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.cu @STARPU_USE_HIP_TRUE@am__append_92 = \ @STARPU_USE_HIP_TRUE@ filters/fmultiple_hip.hip @STARPU_USE_CUDA_TRUE@am__append_93 = \ @STARPU_USE_CUDA_TRUE@ filters/custom_mf/conversion.cu \ @STARPU_USE_CUDA_TRUE@ filters/custom_mf/cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_94 = \ @STARPU_USE_OPENCL_TRUE@ filters/custom_mf/conversion_opencl.c \ @STARPU_USE_OPENCL_TRUE@ filters/custom_mf/custom_opencl.c @STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@am__append_95 = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@ axpy/axpy_opencl.c @STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@am__append_96 = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@ axpy/axpy_opencl_kernel.cl @STARPU_NO_BLAS_LIB_FALSE@am__append_97 = \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am__append_98 = \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am__append_99 = \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am__append_100 = \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c #################### # Cholesky example # #################### @STARPU_NO_BLAS_LIB_FALSE@am__append_101 = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/libmy_dmda.la @STARPU_NO_BLAS_LIB_FALSE@am__append_102 = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_julia.sh @STARPU_USE_CUDA_TRUE@am__append_103 = \ @STARPU_USE_CUDA_TRUE@ spmv/spmv_cuda.cu @STARPU_USE_CUDA_TRUE@am__append_104 = \ @STARPU_USE_CUDA_TRUE@ incrementer/incrementer_kernels.cu @STARPU_USE_OPENCL_TRUE@am__append_105 = \ @STARPU_USE_OPENCL_TRUE@ incrementer/incrementer_kernels_opencl.c @STARPU_USE_CUDA_TRUE@am__append_106 = \ @STARPU_USE_CUDA_TRUE@ incrementer/incrementer_kernels.cu @STARPU_USE_OPENCL_TRUE@am__append_107 = \ @STARPU_USE_OPENCL_TRUE@ incrementer/incrementer_kernels_opencl.c @STARPU_USE_OPENCL_TRUE@am__append_108 = \ @STARPU_USE_OPENCL_TRUE@ incrementer/incrementer_kernels_opencl.c @STARPU_USE_CUDA_TRUE@am__append_109 = \ @STARPU_USE_CUDA_TRUE@ interface/complex_kernels.cu @STARPU_USE_OPENCL_TRUE@am__append_110 = \ @STARPU_USE_OPENCL_TRUE@ interface/complex_kernels_opencl.c @STARPU_USE_CUDA_TRUE@am__append_111 = \ @STARPU_USE_CUDA_TRUE@ interface/complex_dev_handle/complex_dev_handle_kernels.cu @STARPU_USE_OPENCL_TRUE@am__append_112 = \ @STARPU_USE_OPENCL_TRUE@ interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c @STARPU_USE_CUDA_TRUE@am__append_113 = \ @STARPU_USE_CUDA_TRUE@ reductions/dot_product_kernels.cu @STARPU_HAVE_X11_TRUE@am__append_114 = $(X_CFLAGS) ###### # Pi # ###### @STARPU_HAVE_WINDOWS_FALSE@am__append_115 = \ @STARPU_HAVE_WINDOWS_FALSE@ pi/pi \ @STARPU_HAVE_WINDOWS_FALSE@ pi/pi_redux @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@am__append_116 = \ @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ pi/pi_kernel.cu \ @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ pi/SobolQRNG/sobol_gpu.cu @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@am__append_117 = \ @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ pi/pi_redux_kernel.cu ########################### # OpenGL interoperability # ########################### @STARPU_HAVE_OPENGL_TRUE@am__append_118 = \ @STARPU_HAVE_OPENGL_TRUE@ gl_interop/gl_interop \ @STARPU_HAVE_OPENGL_TRUE@ gl_interop/gl_interop_idle subdir = examples ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = @STARPU_SIMGRID_FALSE@am__EXEEXT_1 = \ @STARPU_SIMGRID_FALSE@ basic_examples/hello_world$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/hooks$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/topology$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/vector_scal$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/mult$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/block$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/variable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/multiformat$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/dynamic_handles$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/task_insert_color$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ basic_examples/ndim$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ mlr/mlr$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ cpp/incrementer_cpp$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ cpp/add_vectors$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ cpp/add_vectors_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/alloc$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fread$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fvector$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fvector_pick_variable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/ftensor$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/ftensor_pick_block$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/ftensor_pick_variable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fblock$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fblock_pick_matrix$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fblock_pick_variable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fmatrix$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fmatrix_pick_vector$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fmatrix_pick_variable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_pick_ndim$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_5d_pick_tensor$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_4d_pick_block$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_3d_pick_matrix$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_2d_pick_vector$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_1d_pick_variable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_pick_variable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_tensor$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_block$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_matrix$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_vector$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fndim_to_variable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_manual$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_submit$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_readonly$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_readonly_downgrade$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/fmultiple_submit_implicit$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/frecursive$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/shadow$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/shadow2d$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/shadow3d$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/shadow4d$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/shadownd$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ tag_example/tag_example$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ tag_example/tag_example2$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ tag_example/tag_example3$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ tag_example/tag_example4$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ tag_example/tag_restartable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ transactions/trs_inc$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ spmd/vector_scal_spmd$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ spmv/spmv$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ callback/callback$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ callback/prologue$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ incrementer/incrementer$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ binary/binary$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ interface/complex$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ interface/complex_dev_handle/complex_dev_handle$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ matvecmult/matvecmult$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ profiling/profiling$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perf_monitoring/perf_counters_01$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perf_monitoring/perf_counters_02$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_01$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_02$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perf_steering/perf_knobs_03$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ scheduler/heteroprio_test$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_empty$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_remove$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_delete$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_ctx/two_cpu_contexts$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_ctx/dummy_sched_with_ctx$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ worker_collections/worker_tree_example$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ reductions/dot_product$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ reductions/minmax_reduction$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ dependency/task_end_dep$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ dependency/task_end_dep_add$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ dependency/sequential_consistency$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ subgraphs/manual$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ subgraphs/partition$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ subgraphs/plan$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ scheduler/dummy_sched$(EXEEXT) @STARPU_HAVE_CXX11_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_2 = cpp/add_vectors_cpp11$(EXEEXT) @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_3 = fortran/hello$(EXEEXT) @STARPU_HAVE_F77_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_4 = basic_examples/vector_scal_fortran$(EXEEXT) @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_5 = fortran90/f90_example$(EXEEXT) \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_vector$(EXEEXT) \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_matrix$(EXEEXT) \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_example$(EXEEXT) \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_dynbuf$(EXEEXT) \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_varbuf$(EXEEXT) \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_sched_ctx$(EXEEXT) \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_partition$(EXEEXT) @STARPU_USE_CUDA_TRUE@am__EXEEXT_6 = mult/sgemm$(EXEEXT) \ @STARPU_USE_CUDA_TRUE@ mult/dgemm$(EXEEXT) @STARPU_USE_HIPBLAS_TRUE@am__EXEEXT_7 = mult/sgemm$(EXEEXT) \ @STARPU_USE_HIPBLAS_TRUE@ mult/dgemm$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_8 = mult/sgemm$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mult/dgemm$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_implicit_example_float$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_implicit_example_double$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tag$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tile_tag$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_compil$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_9 = axpy/axpy$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cholesky/cholesky_grain_tag$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ heat/heat$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cg/cg$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ pipeline/pipeline$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ transactions/trs_sgemm$(EXEEXT) @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_10 = lu/lu_example_complex_float$(EXEEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_example_complex_double$(EXEEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_implicit_example_complex_float$(EXEEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_SIMGRID_FALSE@ lu/lu_implicit_example_complex_double$(EXEEXT) @STARPU_HAVE_CBLAS_H_TRUE@@STARPU_HAVE_CBLAS_SGEMV_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_11 = spmv/dw_block_spmv$(EXEEXT) @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_12 = openmp/vector_scal_omp$(EXEEXT) \ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_without_sched_policy$(EXEEXT) \ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/nested_sched_ctxs$(EXEEXT) \ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT) \ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/parallel_tasks_reuse_handle$(EXEEXT) \ @STARPU_HAVE_OPENMP_TRUE@@STARPU_SIMGRID_FALSE@ sched_ctx/parallel_code$(EXEEXT) @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_13 = parallel_workers/parallel_workers$(EXEEXT) \ @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers_func$(EXEEXT) \ @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ parallel_workers/parallel_workers_oldapi$(EXEEXT) @STARPU_USE_CUDA_TRUE@am__EXEEXT_14 = \ @STARPU_USE_CUDA_TRUE@ sched_ctx/gpu_partition$(EXEEXT) am__EXEEXT_15 = sched_ctx/prio$(EXEEXT) scheduler/dummy_sched$(EXEEXT) \ scheduler/dummy_modular_sched$(EXEEXT) \ worker_collections/worker_list_example$(EXEEXT) \ api/bcsr_data_interface$(EXEEXT) \ api/block_data_interface$(EXEEXT) \ api/coo_data_interface$(EXEEXT) \ api/csr_data_interface$(EXEEXT) \ api/matrix_data_interface$(EXEEXT) \ api/multiformat_data_interface$(EXEEXT) \ api/tensor_data_interface$(EXEEXT) \ api/variable_data_interface$(EXEEXT) \ api/vector_data_interface$(EXEEXT) \ api/void_data_interface$(EXEEXT) $(am__EXEEXT_1) \ $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \ $(am__EXEEXT_5) $(am__EXEEXT_6) $(am__EXEEXT_7) \ $(am__EXEEXT_8) $(am__EXEEXT_9) $(am__EXEEXT_10) \ $(am__EXEEXT_11) $(am__EXEEXT_3) $(am__EXEEXT_4) \ $(am__EXEEXT_12) $(am__EXEEXT_13) $(am__EXEEXT_14) @STARPU_USE_CUDA_TRUE@am__EXEEXT_16 = mult/sgemm_layout$(EXEEXT) \ @STARPU_USE_CUDA_TRUE@ mult/dgemm_layout$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_17 = mult/sgemm_layout$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mult/dgemm_layout$(EXEEXT) @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_18 = pi/pi$(EXEEXT) \ @STARPU_HAVE_WINDOWS_FALSE@ pi/pi_redux$(EXEEXT) @STARPU_HAVE_OPENGL_TRUE@am__EXEEXT_19 = \ @STARPU_HAVE_OPENGL_TRUE@ gl_interop/gl_interop$(EXEEXT) \ @STARPU_HAVE_OPENGL_TRUE@ gl_interop/gl_interop_idle$(EXEEXT) am__installdirs = "$(DESTDIR)$(examplebindir)" \ "$(DESTDIR)$(pkglibdir)" "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_20 = loader$(EXEEXT) PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } LTLIBRARIES = $(pkglib_LTLIBRARIES) cholesky_libmy_dmda_la_LIBADD = cholesky_libmy_dmda_la_SOURCES = cholesky/libmy_dmda.c am__dirstamp = $(am__leading_dot)dirstamp cholesky_libmy_dmda_la_OBJECTS = cholesky/libmy_dmda.lo AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = cholesky_libmy_dmda_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(AM_CFLAGS) $(CFLAGS) $(cholesky_libmy_dmda_la_LDFLAGS) \ $(LDFLAGS) -o $@ @STARPU_NO_BLAS_LIB_FALSE@am_cholesky_libmy_dmda_la_rpath = -rpath \ @STARPU_NO_BLAS_LIB_FALSE@ $(pkglibdir) profiling_tool_libprofiling_tool_la_LIBADD = profiling_tool_libprofiling_tool_la_SOURCES = \ profiling_tool/libprofiling_tool.c profiling_tool_libprofiling_tool_la_OBJECTS = \ profiling_tool/libprofiling_tool.lo profiling_tool_libprofiling_tool_la_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(profiling_tool_libprofiling_tool_la_LDFLAGS) $(LDFLAGS) -o \ $@ @STARPU_SIMGRID_FALSE@am_profiling_tool_libprofiling_tool_la_rpath = \ @STARPU_SIMGRID_FALSE@ -rpath $(pkglibdir) scheduler_libdummy_sched_la_LIBADD = scheduler_libdummy_sched_la_SOURCES = scheduler/libdummy_sched.c scheduler_libdummy_sched_la_OBJECTS = scheduler/libdummy_sched.lo scheduler_libdummy_sched_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(AM_CFLAGS) $(CFLAGS) $(scheduler_libdummy_sched_la_LDFLAGS) \ $(LDFLAGS) -o $@ @STARPU_SIMGRID_FALSE@am_scheduler_libdummy_sched_la_rpath = -rpath \ @STARPU_SIMGRID_FALSE@ $(pkglibdir) api_bcsr_data_interface_SOURCES = api/bcsr_data_interface.c api_bcsr_data_interface_OBJECTS = api/bcsr_data_interface.$(OBJEXT) api_bcsr_data_interface_LDADD = $(LDADD) api_block_data_interface_SOURCES = api/block_data_interface.c api_block_data_interface_OBJECTS = api/block_data_interface.$(OBJEXT) api_block_data_interface_LDADD = $(LDADD) api_coo_data_interface_SOURCES = api/coo_data_interface.c api_coo_data_interface_OBJECTS = api/coo_data_interface.$(OBJEXT) api_coo_data_interface_LDADD = $(LDADD) api_csr_data_interface_SOURCES = api/csr_data_interface.c api_csr_data_interface_OBJECTS = api/csr_data_interface.$(OBJEXT) api_csr_data_interface_LDADD = $(LDADD) api_matrix_data_interface_SOURCES = api/matrix_data_interface.c api_matrix_data_interface_OBJECTS = \ api/matrix_data_interface.$(OBJEXT) api_matrix_data_interface_LDADD = $(LDADD) api_multiformat_data_interface_SOURCES = \ api/multiformat_data_interface.c api_multiformat_data_interface_OBJECTS = \ api/multiformat_data_interface.$(OBJEXT) api_multiformat_data_interface_LDADD = $(LDADD) api_tensor_data_interface_SOURCES = api/tensor_data_interface.c api_tensor_data_interface_OBJECTS = \ api/tensor_data_interface.$(OBJEXT) api_tensor_data_interface_LDADD = $(LDADD) api_variable_data_interface_SOURCES = api/variable_data_interface.c api_variable_data_interface_OBJECTS = \ api/variable_data_interface.$(OBJEXT) api_variable_data_interface_LDADD = $(LDADD) api_vector_data_interface_SOURCES = api/vector_data_interface.c api_vector_data_interface_OBJECTS = \ api/vector_data_interface.$(OBJEXT) api_vector_data_interface_LDADD = $(LDADD) api_void_data_interface_SOURCES = api/void_data_interface.c api_void_data_interface_OBJECTS = api/void_data_interface.$(OBJEXT) api_void_data_interface_LDADD = $(LDADD) am__axpy_axpy_SOURCES_DIST = axpy/axpy.c common/blas.c \ axpy/axpy_opencl.c @STARPU_NO_BLAS_LIB_FALSE@@STARPU_USE_OPENCL_TRUE@am__objects_1 = axpy/axpy_opencl.$(OBJEXT) @STARPU_NO_BLAS_LIB_FALSE@am_axpy_axpy_OBJECTS = axpy/axpy.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__objects_1) axpy_axpy_OBJECTS = $(am_axpy_axpy_OBJECTS) am__DEPENDENCIES_1 = @STARPU_NO_BLAS_LIB_FALSE@axpy_axpy_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__basic_examples_block_SOURCES_DIST = basic_examples/block.c \ basic_examples/block_cpu.c basic_examples/block_cuda.cu \ basic_examples/block_hip.hip basic_examples/block_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_2 = \ @STARPU_USE_CUDA_TRUE@ basic_examples/block_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_3 = \ @STARPU_USE_HIP_TRUE@ basic_examples/block_hip.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_4 = \ @STARPU_USE_OPENCL_TRUE@ basic_examples/block_opencl.$(OBJEXT) am_basic_examples_block_OBJECTS = basic_examples/block.$(OBJEXT) \ basic_examples/block_cpu.$(OBJEXT) $(am__objects_2) \ $(am__objects_3) $(am__objects_4) basic_examples_block_OBJECTS = $(am_basic_examples_block_OBJECTS) basic_examples_block_LDADD = $(LDADD) basic_examples_dynamic_handles_SOURCES = \ basic_examples/dynamic_handles.c basic_examples_dynamic_handles_OBJECTS = \ basic_examples/dynamic_handles.$(OBJEXT) basic_examples_dynamic_handles_LDADD = $(LDADD) basic_examples_hello_world_SOURCES = basic_examples/hello_world.c basic_examples_hello_world_OBJECTS = \ basic_examples/hello_world.$(OBJEXT) basic_examples_hello_world_LDADD = $(LDADD) basic_examples_hooks_SOURCES = basic_examples/hooks.c basic_examples_hooks_OBJECTS = basic_examples/hooks.$(OBJEXT) basic_examples_hooks_LDADD = $(LDADD) am__basic_examples_mult_SOURCES_DIST = basic_examples/mult.c \ basic_examples/mult_cuda.cu basic_examples/mult_hip.hip @STARPU_USE_CUDA_TRUE@am__objects_5 = \ @STARPU_USE_CUDA_TRUE@ basic_examples/mult_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_6 = \ @STARPU_USE_HIP_TRUE@ basic_examples/mult_hip.$(OBJEXT) am_basic_examples_mult_OBJECTS = basic_examples/mult.$(OBJEXT) \ $(am__objects_5) $(am__objects_6) basic_examples_mult_OBJECTS = $(am_basic_examples_mult_OBJECTS) basic_examples_mult_LDADD = $(LDADD) am__basic_examples_multiformat_SOURCES_DIST = \ basic_examples/multiformat.c \ basic_examples/multiformat_conversion_codelets.c \ basic_examples/multiformat_cuda.cu \ basic_examples/multiformat_conversion_codelets_cuda.cu \ basic_examples/multiformat_opencl.c \ basic_examples/multiformat_conversion_codelets_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_7 = basic_examples/multiformat_cuda.$(OBJEXT) \ @STARPU_USE_CUDA_TRUE@ basic_examples/multiformat_conversion_codelets_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_8 = basic_examples/multiformat_opencl.$(OBJEXT) \ @STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_conversion_codelets_opencl.$(OBJEXT) am_basic_examples_multiformat_OBJECTS = \ basic_examples/multiformat.$(OBJEXT) \ basic_examples/multiformat_conversion_codelets.$(OBJEXT) \ $(am__objects_7) $(am__objects_8) basic_examples_multiformat_OBJECTS = \ $(am_basic_examples_multiformat_OBJECTS) basic_examples_multiformat_LDADD = $(LDADD) basic_examples_ndim_SOURCES = basic_examples/ndim.c basic_examples_ndim_OBJECTS = basic_examples/ndim.$(OBJEXT) basic_examples_ndim_LDADD = $(LDADD) basic_examples_task_insert_color_SOURCES = \ basic_examples/task_insert_color.c basic_examples_task_insert_color_OBJECTS = \ basic_examples/task_insert_color.$(OBJEXT) basic_examples_task_insert_color_LDADD = $(LDADD) basic_examples_topology_SOURCES = basic_examples/topology.c basic_examples_topology_OBJECTS = basic_examples/topology.$(OBJEXT) basic_examples_topology_LDADD = $(LDADD) am__basic_examples_variable_SOURCES_DIST = basic_examples/variable.c \ basic_examples/variable_kernels_cpu.c \ basic_examples/variable_kernels.cu \ basic_examples/variable_kernels_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_9 = basic_examples/variable_kernels.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_10 = basic_examples/variable_kernels_opencl.$(OBJEXT) am_basic_examples_variable_OBJECTS = \ basic_examples/variable.$(OBJEXT) \ basic_examples/variable_kernels_cpu.$(OBJEXT) $(am__objects_9) \ $(am__objects_10) basic_examples_variable_OBJECTS = \ $(am_basic_examples_variable_OBJECTS) basic_examples_variable_LDADD = $(LDADD) am__basic_examples_vector_scal_SOURCES_DIST = \ basic_examples/vector_scal.c basic_examples/vector_scal_cpu.c \ basic_examples/vector_scal_cpu_icc.icc \ basic_examples/vector_scal_cuda.cu \ basic_examples/vector_scal_hip.hip \ basic_examples/vector_scal_opencl.c @STARPU_CROSS_COMPILING_FALSE@@STARPU_HAVE_ICC_TRUE@am__objects_11 = basic_examples/vector_scal_cpu_icc.$(OBJEXT) @STARPU_USE_CUDA_TRUE@am__objects_12 = basic_examples/vector_scal_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_13 = \ @STARPU_USE_HIP_TRUE@ basic_examples/vector_scal_hip.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_14 = basic_examples/vector_scal_opencl.$(OBJEXT) am_basic_examples_vector_scal_OBJECTS = \ basic_examples/vector_scal.$(OBJEXT) \ basic_examples/vector_scal_cpu.$(OBJEXT) $(am__objects_11) \ $(am__objects_12) $(am__objects_13) $(am__objects_14) basic_examples_vector_scal_OBJECTS = \ $(am_basic_examples_vector_scal_OBJECTS) basic_examples_vector_scal_LDADD = $(LDADD) am__basic_examples_vector_scal_fortran_SOURCES_DIST = \ basic_examples/vector_scal_fortran.F \ basic_examples/vector_scal_c.c \ basic_examples/vector_scal_cpu.c \ basic_examples/vector_scal_cuda.cu @STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@am__objects_15 = basic_examples/vector_scal_cuda.$(OBJEXT) @STARPU_HAVE_F77_TRUE@am_basic_examples_vector_scal_fortran_OBJECTS = basic_examples/vector_scal_fortran.$(OBJEXT) \ @STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_c.$(OBJEXT) \ @STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_cpu.$(OBJEXT) \ @STARPU_HAVE_F77_TRUE@ $(am__objects_15) basic_examples_vector_scal_fortran_OBJECTS = \ $(am_basic_examples_vector_scal_fortran_OBJECTS) @STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@basic_examples_vector_scal_fortran_DEPENDENCIES = $(am__DEPENDENCIES_1) am__binary_binary_SOURCES_DIST = binary/binary.c \ incrementer/incrementer_kernels_opencl.c @STARPU_USE_OPENCL_TRUE@am__objects_16 = incrementer/incrementer_kernels_opencl.$(OBJEXT) am_binary_binary_OBJECTS = binary/binary.$(OBJEXT) $(am__objects_16) binary_binary_OBJECTS = $(am_binary_binary_OBJECTS) binary_binary_LDADD = $(LDADD) callback_callback_SOURCES = callback/callback.c callback_callback_OBJECTS = callback/callback.$(OBJEXT) callback_callback_LDADD = $(LDADD) callback_prologue_SOURCES = callback/prologue.c callback_prologue_OBJECTS = callback/prologue.$(OBJEXT) callback_prologue_LDADD = $(LDADD) am__cg_cg_SOURCES_DIST = cg/cg.c common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_cg_cg_OBJECTS = cg/cg.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) cg_cg_OBJECTS = $(am_cg_cg_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@cg_cg_DEPENDENCIES = $(am__DEPENDENCIES_1) am__cholesky_cholesky_compil_SOURCES_DIST = \ cholesky/cholesky_compil.c cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c sched_ctx_utils/sched_ctx_utils.c \ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_compil_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_compil.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) cholesky_cholesky_compil_OBJECTS = \ $(am_cholesky_cholesky_compil_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_compil_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__cholesky_cholesky_grain_tag_SOURCES_DIST = \ cholesky/cholesky_grain_tag.c cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_grain_tag_OBJECTS = cholesky/cholesky_grain_tag.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) cholesky_cholesky_grain_tag_OBJECTS = \ $(am_cholesky_cholesky_grain_tag_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_grain_tag_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__cholesky_cholesky_implicit_SOURCES_DIST = \ cholesky/cholesky_implicit.c cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c sched_ctx_utils/sched_ctx_utils.c \ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_implicit_OBJECTS = cholesky/cholesky_implicit.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) cholesky_cholesky_implicit_OBJECTS = \ $(am_cholesky_cholesky_implicit_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__cholesky_cholesky_tag_SOURCES_DIST = cholesky/cholesky_tag.c \ cholesky/cholesky_models.c cholesky/cholesky_kernels.c \ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_tag_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tag.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) cholesky_cholesky_tag_OBJECTS = $(am_cholesky_cholesky_tag_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tag_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__cholesky_cholesky_tile_tag_SOURCES_DIST = \ cholesky/cholesky_tile_tag.c cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_tile_tag_OBJECTS = cholesky/cholesky_tile_tag.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) cholesky_cholesky_tile_tag_OBJECTS = \ $(am_cholesky_cholesky_tile_tag_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tile_tag_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am_cpp_add_vectors_OBJECTS = cpp/add_vectors.$(OBJEXT) cpp_add_vectors_OBJECTS = $(am_cpp_add_vectors_OBJECTS) cpp_add_vectors_LDADD = $(LDADD) am__cpp_add_vectors_cpp11_SOURCES_DIST = cpp/add_vectors_cpp11.cpp @STARPU_HAVE_CXX11_TRUE@am_cpp_add_vectors_cpp11_OBJECTS = \ @STARPU_HAVE_CXX11_TRUE@ cpp/add_vectors_cpp11.$(OBJEXT) cpp_add_vectors_cpp11_OBJECTS = $(am_cpp_add_vectors_cpp11_OBJECTS) cpp_add_vectors_cpp11_LDADD = $(LDADD) am_cpp_add_vectors_interface_OBJECTS = \ cpp/add_vectors_interface.$(OBJEXT) cpp_add_vectors_interface_OBJECTS = \ $(am_cpp_add_vectors_interface_OBJECTS) cpp_add_vectors_interface_LDADD = $(LDADD) am__cpp_incrementer_cpp_SOURCES_DIST = cpp/incrementer_cpp.cpp \ incrementer/incrementer_kernels.cu \ incrementer/incrementer_kernels_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_17 = incrementer/incrementer_kernels.$(OBJEXT) am_cpp_incrementer_cpp_OBJECTS = cpp/incrementer_cpp.$(OBJEXT) \ $(am__objects_17) $(am__objects_16) cpp_incrementer_cpp_OBJECTS = $(am_cpp_incrementer_cpp_OBJECTS) cpp_incrementer_cpp_LDADD = $(LDADD) dependency_sequential_consistency_SOURCES = \ dependency/sequential_consistency.c dependency_sequential_consistency_OBJECTS = \ dependency/sequential_consistency.$(OBJEXT) dependency_sequential_consistency_LDADD = $(LDADD) dependency_task_end_dep_SOURCES = dependency/task_end_dep.c dependency_task_end_dep_OBJECTS = dependency/task_end_dep.$(OBJEXT) dependency_task_end_dep_LDADD = $(LDADD) dependency_task_end_dep_add_SOURCES = dependency/task_end_dep_add.c dependency_task_end_dep_add_OBJECTS = \ dependency/task_end_dep_add.$(OBJEXT) dependency_task_end_dep_add_LDADD = $(LDADD) filters_alloc_SOURCES = filters/alloc.c filters_alloc_OBJECTS = filters/alloc.$(OBJEXT) filters_alloc_LDADD = $(LDADD) am__filters_custom_mf_custom_mf_filter_SOURCES_DIST = \ filters/custom_mf/custom_mf_filter.c \ filters/custom_mf/custom_interface.c \ filters/custom_mf/custom_conversion_codelets.c \ filters/custom_mf/conversion.cu filters/custom_mf/cuda.cu \ filters/custom_mf/conversion_opencl.c \ filters/custom_mf/custom_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_18 = \ @STARPU_USE_CUDA_TRUE@ filters/custom_mf/conversion.$(OBJEXT) \ @STARPU_USE_CUDA_TRUE@ filters/custom_mf/cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_19 = filters/custom_mf/conversion_opencl.$(OBJEXT) \ @STARPU_USE_OPENCL_TRUE@ filters/custom_mf/custom_opencl.$(OBJEXT) am_filters_custom_mf_custom_mf_filter_OBJECTS = \ filters/custom_mf/custom_mf_filter.$(OBJEXT) \ filters/custom_mf/custom_interface.$(OBJEXT) \ filters/custom_mf/custom_conversion_codelets.$(OBJEXT) \ $(am__objects_18) $(am__objects_19) filters_custom_mf_custom_mf_filter_OBJECTS = \ $(am_filters_custom_mf_custom_mf_filter_OBJECTS) filters_custom_mf_custom_mf_filter_LDADD = $(LDADD) am__filters_fblock_SOURCES_DIST = filters/fblock.c \ filters/fblock_print.c filters/fblock_cpu.c \ filters/fblock_cuda.cu filters/fblock_hip.hip \ filters/fblock_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_20 = filters/fblock_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_21 = filters/fblock_hip.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_22 = \ @STARPU_USE_OPENCL_TRUE@ filters/fblock_opencl.$(OBJEXT) am_filters_fblock_OBJECTS = filters/fblock.$(OBJEXT) \ filters/fblock_print.$(OBJEXT) filters/fblock_cpu.$(OBJEXT) \ $(am__objects_20) $(am__objects_21) $(am__objects_22) filters_fblock_OBJECTS = $(am_filters_fblock_OBJECTS) filters_fblock_LDADD = $(LDADD) am__filters_fblock_pick_matrix_SOURCES_DIST = \ filters/fblock_pick_matrix.c filters/fblock_print.c \ filters/fmatrix_print.c filters/fmatrix_cpu.c \ filters/fmatrix_cuda.cu filters/fmatrix_hip.hip @STARPU_USE_CUDA_TRUE@am__objects_23 = filters/fmatrix_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_24 = filters/fmatrix_hip.$(OBJEXT) am_filters_fblock_pick_matrix_OBJECTS = \ filters/fblock_pick_matrix.$(OBJEXT) \ filters/fblock_print.$(OBJEXT) filters/fmatrix_print.$(OBJEXT) \ filters/fmatrix_cpu.$(OBJEXT) $(am__objects_23) \ $(am__objects_24) filters_fblock_pick_matrix_OBJECTS = \ $(am_filters_fblock_pick_matrix_OBJECTS) filters_fblock_pick_matrix_LDADD = $(LDADD) am__filters_fblock_pick_variable_SOURCES_DIST = \ filters/fblock_pick_variable.c filters/fblock_print.c \ filters/fvariable_cuda.cu @STARPU_USE_CUDA_TRUE@am__objects_25 = \ @STARPU_USE_CUDA_TRUE@ filters/fvariable_cuda.$(OBJEXT) am_filters_fblock_pick_variable_OBJECTS = \ filters/fblock_pick_variable.$(OBJEXT) \ filters/fblock_print.$(OBJEXT) $(am__objects_25) filters_fblock_pick_variable_OBJECTS = \ $(am_filters_fblock_pick_variable_OBJECTS) filters_fblock_pick_variable_LDADD = $(LDADD) am__filters_fmatrix_SOURCES_DIST = filters/fmatrix.c \ filters/fmatrix_print.c filters/fmatrix_cpu.c \ filters/fmatrix_cuda.cu filters/fmatrix_hip.hip am_filters_fmatrix_OBJECTS = filters/fmatrix.$(OBJEXT) \ filters/fmatrix_print.$(OBJEXT) filters/fmatrix_cpu.$(OBJEXT) \ $(am__objects_23) $(am__objects_24) filters_fmatrix_OBJECTS = $(am_filters_fmatrix_OBJECTS) filters_fmatrix_LDADD = $(LDADD) am__filters_fmatrix_pick_variable_SOURCES_DIST = \ filters/fmatrix_pick_variable.c filters/fmatrix_print.c \ filters/fvariable_cuda.cu am_filters_fmatrix_pick_variable_OBJECTS = \ filters/fmatrix_pick_variable.$(OBJEXT) \ filters/fmatrix_print.$(OBJEXT) $(am__objects_25) filters_fmatrix_pick_variable_OBJECTS = \ $(am_filters_fmatrix_pick_variable_OBJECTS) filters_fmatrix_pick_variable_LDADD = $(LDADD) am__filters_fmatrix_pick_vector_SOURCES_DIST = \ filters/fmatrix_pick_vector.c filters/fmatrix_print.c \ filters/fvector_cpu.c filters/fvector_cuda.cu \ filters/fvector_hip.hip @STARPU_USE_CUDA_TRUE@am__objects_26 = filters/fvector_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_27 = filters/fvector_hip.$(OBJEXT) am_filters_fmatrix_pick_vector_OBJECTS = \ filters/fmatrix_pick_vector.$(OBJEXT) \ filters/fmatrix_print.$(OBJEXT) filters/fvector_cpu.$(OBJEXT) \ $(am__objects_26) $(am__objects_27) filters_fmatrix_pick_vector_OBJECTS = \ $(am_filters_fmatrix_pick_vector_OBJECTS) filters_fmatrix_pick_vector_LDADD = $(LDADD) am__filters_fmultiple_manual_SOURCES_DIST = \ filters/fmultiple_manual.c filters/fmultiple_cuda.cu \ filters/fmultiple_hip.hip @STARPU_USE_CUDA_TRUE@am__objects_28 = \ @STARPU_USE_CUDA_TRUE@ filters/fmultiple_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_29 = filters/fmultiple_hip.$(OBJEXT) am_filters_fmultiple_manual_OBJECTS = \ filters/fmultiple_manual.$(OBJEXT) $(am__objects_28) \ $(am__objects_29) filters_fmultiple_manual_OBJECTS = \ $(am_filters_fmultiple_manual_OBJECTS) filters_fmultiple_manual_LDADD = $(LDADD) am__filters_fmultiple_submit_SOURCES_DIST = \ filters/fmultiple_submit.c filters/fmultiple_cuda.cu \ filters/fmultiple_hip.hip am_filters_fmultiple_submit_OBJECTS = \ filters/fmultiple_submit.$(OBJEXT) $(am__objects_28) \ $(am__objects_29) filters_fmultiple_submit_OBJECTS = \ $(am_filters_fmultiple_submit_OBJECTS) filters_fmultiple_submit_LDADD = $(LDADD) am__filters_fmultiple_submit_implicit_SOURCES_DIST = \ filters/fmultiple_submit_implicit.c filters/fmultiple_cuda.cu \ filters/fmultiple_hip.hip am_filters_fmultiple_submit_implicit_OBJECTS = \ filters/fmultiple_submit_implicit.$(OBJEXT) $(am__objects_28) \ $(am__objects_29) filters_fmultiple_submit_implicit_OBJECTS = \ $(am_filters_fmultiple_submit_implicit_OBJECTS) filters_fmultiple_submit_implicit_LDADD = $(LDADD) am__filters_fmultiple_submit_readonly_SOURCES_DIST = \ filters/fmultiple_submit_readonly.c filters/fmultiple_cuda.cu \ filters/fmultiple_hip.hip am_filters_fmultiple_submit_readonly_OBJECTS = \ filters/fmultiple_submit_readonly.$(OBJEXT) $(am__objects_28) \ $(am__objects_29) filters_fmultiple_submit_readonly_OBJECTS = \ $(am_filters_fmultiple_submit_readonly_OBJECTS) filters_fmultiple_submit_readonly_LDADD = $(LDADD) am__filters_fmultiple_submit_readonly_downgrade_SOURCES_DIST = \ filters/fmultiple_submit_readonly_downgrade.c \ filters/fmultiple_cuda.cu filters/fmultiple_hip.hip am_filters_fmultiple_submit_readonly_downgrade_OBJECTS = \ filters/fmultiple_submit_readonly_downgrade.$(OBJEXT) \ $(am__objects_28) $(am__objects_29) filters_fmultiple_submit_readonly_downgrade_OBJECTS = \ $(am_filters_fmultiple_submit_readonly_downgrade_OBJECTS) filters_fmultiple_submit_readonly_downgrade_LDADD = $(LDADD) am__filters_fndim_SOURCES_DIST = filters/fndim.c \ filters/ftensor_print.c filters/f4d_cpu.c filters/f4d_cuda.cu \ filters/f4d_hip.hip @STARPU_USE_CUDA_TRUE@am__objects_30 = filters/f4d_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_31 = filters/f4d_hip.$(OBJEXT) am_filters_fndim_OBJECTS = filters/fndim.$(OBJEXT) \ filters/ftensor_print.$(OBJEXT) filters/f4d_cpu.$(OBJEXT) \ $(am__objects_30) $(am__objects_31) filters_fndim_OBJECTS = $(am_filters_fndim_OBJECTS) filters_fndim_LDADD = $(LDADD) filters_fndim_1d_pick_variable_SOURCES = \ filters/fndim_1d_pick_variable.c filters_fndim_1d_pick_variable_OBJECTS = \ filters/fndim_1d_pick_variable.$(OBJEXT) filters_fndim_1d_pick_variable_LDADD = $(LDADD) am__filters_fndim_2d_pick_vector_SOURCES_DIST = \ filters/fndim_2d_pick_vector.c filters/fmatrix_print.c \ filters/fvector_cpu.c filters/fvector_cuda.cu \ filters/fvector_hip.hip am_filters_fndim_2d_pick_vector_OBJECTS = \ filters/fndim_2d_pick_vector.$(OBJEXT) \ filters/fmatrix_print.$(OBJEXT) filters/fvector_cpu.$(OBJEXT) \ $(am__objects_26) $(am__objects_27) filters_fndim_2d_pick_vector_OBJECTS = \ $(am_filters_fndim_2d_pick_vector_OBJECTS) filters_fndim_2d_pick_vector_LDADD = $(LDADD) am__filters_fndim_3d_pick_matrix_SOURCES_DIST = \ filters/fndim_3d_pick_matrix.c filters/fblock_print.c \ filters/fmatrix_print.c filters/fmatrix_cpu.c \ filters/fmatrix_cuda.cu filters/fmatrix_hip.hip am_filters_fndim_3d_pick_matrix_OBJECTS = \ filters/fndim_3d_pick_matrix.$(OBJEXT) \ filters/fblock_print.$(OBJEXT) filters/fmatrix_print.$(OBJEXT) \ filters/fmatrix_cpu.$(OBJEXT) $(am__objects_23) \ $(am__objects_24) filters_fndim_3d_pick_matrix_OBJECTS = \ $(am_filters_fndim_3d_pick_matrix_OBJECTS) filters_fndim_3d_pick_matrix_LDADD = $(LDADD) am__filters_fndim_4d_pick_block_SOURCES_DIST = \ filters/fndim_4d_pick_block.c filters/ftensor_print.c \ filters/fblock_print.c filters/fblock_cpu.c \ filters/fblock_cuda.cu filters/fblock_hip.hip am_filters_fndim_4d_pick_block_OBJECTS = \ filters/fndim_4d_pick_block.$(OBJEXT) \ filters/ftensor_print.$(OBJEXT) filters/fblock_print.$(OBJEXT) \ filters/fblock_cpu.$(OBJEXT) $(am__objects_20) \ $(am__objects_21) filters_fndim_4d_pick_block_OBJECTS = \ $(am_filters_fndim_4d_pick_block_OBJECTS) filters_fndim_4d_pick_block_LDADD = $(LDADD) am__filters_fndim_5d_pick_tensor_SOURCES_DIST = \ filters/fndim_5d_pick_tensor.c filters/f5d_print.c \ filters/ftensor_print.c filters/ftensor_cpu.c \ filters/ftensor_cuda.cu filters/ftensor_hip.hip @STARPU_USE_CUDA_TRUE@am__objects_32 = filters/ftensor_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_33 = filters/ftensor_hip.$(OBJEXT) am_filters_fndim_5d_pick_tensor_OBJECTS = \ filters/fndim_5d_pick_tensor.$(OBJEXT) \ filters/f5d_print.$(OBJEXT) filters/ftensor_print.$(OBJEXT) \ filters/ftensor_cpu.$(OBJEXT) $(am__objects_32) \ $(am__objects_33) filters_fndim_5d_pick_tensor_OBJECTS = \ $(am_filters_fndim_5d_pick_tensor_OBJECTS) filters_fndim_5d_pick_tensor_LDADD = $(LDADD) am__filters_fndim_pick_ndim_SOURCES_DIST = filters/fndim_pick_ndim.c \ filters/ftensor_print.c filters/fblock_print.c \ filters/f3d_cpu.c filters/f3d_cuda.cu filters/f3d_hip.hip @STARPU_USE_CUDA_TRUE@am__objects_34 = filters/f3d_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_35 = filters/f3d_hip.$(OBJEXT) am_filters_fndim_pick_ndim_OBJECTS = \ filters/fndim_pick_ndim.$(OBJEXT) \ filters/ftensor_print.$(OBJEXT) filters/fblock_print.$(OBJEXT) \ filters/f3d_cpu.$(OBJEXT) $(am__objects_34) $(am__objects_35) filters_fndim_pick_ndim_OBJECTS = \ $(am_filters_fndim_pick_ndim_OBJECTS) filters_fndim_pick_ndim_LDADD = $(LDADD) am_filters_fndim_pick_variable_OBJECTS = \ filters/fndim_pick_variable.$(OBJEXT) \ filters/f5d_print.$(OBJEXT) filters_fndim_pick_variable_OBJECTS = \ $(am_filters_fndim_pick_variable_OBJECTS) filters_fndim_pick_variable_LDADD = $(LDADD) am__filters_fndim_to_block_SOURCES_DIST = filters/fndim_to_block.c \ filters/fblock_print.c filters/fblock_cpu.c \ filters/fblock_cuda.cu filters/fblock_hip.hip am_filters_fndim_to_block_OBJECTS = filters/fndim_to_block.$(OBJEXT) \ filters/fblock_print.$(OBJEXT) filters/fblock_cpu.$(OBJEXT) \ $(am__objects_20) $(am__objects_21) filters_fndim_to_block_OBJECTS = $(am_filters_fndim_to_block_OBJECTS) filters_fndim_to_block_LDADD = $(LDADD) am__filters_fndim_to_matrix_SOURCES_DIST = filters/fndim_to_matrix.c \ filters/fmatrix_print.c filters/fmatrix_cpu.c \ filters/fmatrix_cuda.cu filters/fmatrix_hip.hip am_filters_fndim_to_matrix_OBJECTS = \ filters/fndim_to_matrix.$(OBJEXT) \ filters/fmatrix_print.$(OBJEXT) filters/fmatrix_cpu.$(OBJEXT) \ $(am__objects_23) $(am__objects_24) filters_fndim_to_matrix_OBJECTS = \ $(am_filters_fndim_to_matrix_OBJECTS) filters_fndim_to_matrix_LDADD = $(LDADD) am__filters_fndim_to_tensor_SOURCES_DIST = filters/fndim_to_tensor.c \ filters/ftensor_print.c filters/ftensor_cpu.c \ filters/ftensor_cuda.cu filters/ftensor_hip.hip am_filters_fndim_to_tensor_OBJECTS = \ filters/fndim_to_tensor.$(OBJEXT) \ filters/ftensor_print.$(OBJEXT) filters/ftensor_cpu.$(OBJEXT) \ $(am__objects_32) $(am__objects_33) filters_fndim_to_tensor_OBJECTS = \ $(am_filters_fndim_to_tensor_OBJECTS) filters_fndim_to_tensor_LDADD = $(LDADD) filters_fndim_to_variable_SOURCES = filters/fndim_to_variable.c filters_fndim_to_variable_OBJECTS = \ filters/fndim_to_variable.$(OBJEXT) filters_fndim_to_variable_LDADD = $(LDADD) am__filters_fndim_to_vector_SOURCES_DIST = filters/fndim_to_vector.c \ filters/fvector_cpu.c filters/fvector_cuda.cu \ filters/fvector_hip.hip am_filters_fndim_to_vector_OBJECTS = \ filters/fndim_to_vector.$(OBJEXT) \ filters/fvector_cpu.$(OBJEXT) $(am__objects_26) \ $(am__objects_27) filters_fndim_to_vector_OBJECTS = \ $(am_filters_fndim_to_vector_OBJECTS) filters_fndim_to_vector_LDADD = $(LDADD) filters_fread_SOURCES = filters/fread.c filters_fread_OBJECTS = filters/fread.$(OBJEXT) filters_fread_LDADD = $(LDADD) filters_frecursive_SOURCES = filters/frecursive.c filters_frecursive_OBJECTS = filters/frecursive.$(OBJEXT) filters_frecursive_LDADD = $(LDADD) am__filters_ftensor_SOURCES_DIST = filters/ftensor.c \ filters/ftensor_print.c filters/ftensor_cpu.c \ filters/ftensor_cuda.cu filters/ftensor_hip.hip am_filters_ftensor_OBJECTS = filters/ftensor.$(OBJEXT) \ filters/ftensor_print.$(OBJEXT) filters/ftensor_cpu.$(OBJEXT) \ $(am__objects_32) $(am__objects_33) filters_ftensor_OBJECTS = $(am_filters_ftensor_OBJECTS) filters_ftensor_LDADD = $(LDADD) am__filters_ftensor_pick_block_SOURCES_DIST = \ filters/ftensor_pick_block.c filters/ftensor_print.c \ filters/fblock_print.c filters/fblock_cpu.c \ filters/fblock_cuda.cu filters/fblock_hip.hip am_filters_ftensor_pick_block_OBJECTS = \ filters/ftensor_pick_block.$(OBJEXT) \ filters/ftensor_print.$(OBJEXT) filters/fblock_print.$(OBJEXT) \ filters/fblock_cpu.$(OBJEXT) $(am__objects_20) \ $(am__objects_21) filters_ftensor_pick_block_OBJECTS = \ $(am_filters_ftensor_pick_block_OBJECTS) filters_ftensor_pick_block_LDADD = $(LDADD) am__filters_ftensor_pick_variable_SOURCES_DIST = \ filters/ftensor_pick_variable.c filters/ftensor_print.c \ filters/fvariable_cuda.cu am_filters_ftensor_pick_variable_OBJECTS = \ filters/ftensor_pick_variable.$(OBJEXT) \ filters/ftensor_print.$(OBJEXT) $(am__objects_25) filters_ftensor_pick_variable_OBJECTS = \ $(am_filters_ftensor_pick_variable_OBJECTS) filters_ftensor_pick_variable_LDADD = $(LDADD) am__filters_fvector_SOURCES_DIST = filters/fvector.c \ filters/fvector_cpu.c filters/fvector_cuda.cu \ filters/fvector_hip.hip am_filters_fvector_OBJECTS = filters/fvector.$(OBJEXT) \ filters/fvector_cpu.$(OBJEXT) $(am__objects_26) \ $(am__objects_27) filters_fvector_OBJECTS = $(am_filters_fvector_OBJECTS) filters_fvector_LDADD = $(LDADD) filters_fvector_pick_variable_SOURCES = \ filters/fvector_pick_variable.c filters_fvector_pick_variable_OBJECTS = \ filters/fvector_pick_variable.$(OBJEXT) filters_fvector_pick_variable_LDADD = $(LDADD) filters_shadow_SOURCES = filters/shadow.c filters_shadow_OBJECTS = filters/shadow.$(OBJEXT) filters_shadow_LDADD = $(LDADD) filters_shadow2d_SOURCES = filters/shadow2d.c filters_shadow2d_OBJECTS = filters/shadow2d.$(OBJEXT) filters_shadow2d_LDADD = $(LDADD) filters_shadow3d_SOURCES = filters/shadow3d.c filters_shadow3d_OBJECTS = filters/shadow3d.$(OBJEXT) filters_shadow3d_LDADD = $(LDADD) filters_shadow4d_SOURCES = filters/shadow4d.c filters_shadow4d_OBJECTS = filters/shadow4d.$(OBJEXT) filters_shadow4d_LDADD = $(LDADD) filters_shadownd_SOURCES = filters/shadownd.c filters_shadownd_OBJECTS = filters/shadownd.$(OBJEXT) filters_shadownd_LDADD = $(LDADD) am__fortran_hello_SOURCES_DIST = fortran/hello_c.c fortran/hello.F \ fortran/starpu_fortran.h @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@am_fortran_hello_OBJECTS = fortran/hello_c.$(OBJEXT) \ @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@ fortran/hello.$(OBJEXT) fortran_hello_OBJECTS = $(am_fortran_hello_OBJECTS) fortran_hello_LDADD = $(LDADD) am__fortran90_f90_example_SOURCES_DIST = fortran90/mod_types.f90 \ fortran90/starpu_mod.f90 fortran90/mod_interface.f90 \ fortran90/mod_compute.f90 fortran90/marshalling.c \ fortran90/f90_example.f90 @STARPU_HAVE_FC_TRUE@am_fortran90_f90_example_OBJECTS = \ @STARPU_HAVE_FC_TRUE@ fortran90/mod_types.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ fortran90/starpu_mod.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ fortran90/mod_interface.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ fortran90/mod_compute.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ fortran90/marshalling.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ fortran90/f90_example.$(OBJEXT) fortran90_f90_example_OBJECTS = $(am_fortran90_f90_example_OBJECTS) fortran90_f90_example_LDADD = $(LDADD) gl_interop_gl_interop_SOURCES = gl_interop/gl_interop.c gl_interop_gl_interop_OBJECTS = gl_interop/gl_interop.$(OBJEXT) @STARPU_HAVE_OPENGL_TRUE@gl_interop_gl_interop_DEPENDENCIES = \ @STARPU_HAVE_OPENGL_TRUE@ $(am__DEPENDENCIES_1) gl_interop_gl_interop_idle_SOURCES = gl_interop/gl_interop_idle.c gl_interop_gl_interop_idle_OBJECTS = \ gl_interop/gl_interop_idle.$(OBJEXT) @STARPU_HAVE_OPENGL_TRUE@gl_interop_gl_interop_idle_DEPENDENCIES = \ @STARPU_HAVE_OPENGL_TRUE@ $(am__DEPENDENCIES_1) am__heat_heat_SOURCES_DIST = heat/heat.c heat/dw_factolu.c \ heat/dw_factolu_tag.c heat/dw_factolu_grain.c \ heat/dw_sparse_cg.c heat/heat_display.c \ heat/lu_kernels_model.c heat/dw_sparse_cg_kernels.c \ heat/dw_factolu_kernels.c common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_heat_heat_OBJECTS = heat/heat.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_tag.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_grain.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_sparse_cg.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ heat/heat_display.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ heat/lu_kernels_model.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_sparse_cg_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) heat_heat_OBJECTS = $(am_heat_heat_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@heat_heat_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__incrementer_incrementer_SOURCES_DIST = incrementer/incrementer.c \ incrementer/incrementer_kernels.cu \ incrementer/incrementer_kernels_opencl.c am_incrementer_incrementer_OBJECTS = \ incrementer/incrementer.$(OBJEXT) $(am__objects_17) \ $(am__objects_16) incrementer_incrementer_OBJECTS = \ $(am_incrementer_incrementer_OBJECTS) incrementer_incrementer_LDADD = $(LDADD) am__interface_complex_SOURCES_DIST = interface/complex.c \ interface/complex_interface.c interface/complex_filters.c \ interface/complex_kernels.cu \ interface/complex_kernels_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_36 = \ @STARPU_USE_CUDA_TRUE@ interface/complex_kernels.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_37 = interface/complex_kernels_opencl.$(OBJEXT) am_interface_complex_OBJECTS = interface/complex.$(OBJEXT) \ interface/complex_interface.$(OBJEXT) \ interface/complex_filters.$(OBJEXT) $(am__objects_36) \ $(am__objects_37) interface_complex_OBJECTS = $(am_interface_complex_OBJECTS) interface_complex_LDADD = $(LDADD) am__interface_complex_dev_handle_complex_dev_handle_SOURCES_DIST = \ interface/complex_dev_handle/complex_dev_handle.c \ interface/complex_dev_handle/complex_dev_handle_interface.c \ interface/complex_dev_handle/complex_dev_handle_filters.c \ interface/complex_dev_handle/complex_dev_handle_kernels.cu \ interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_38 = interface/complex_dev_handle/complex_dev_handle_kernels.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_39 = interface/complex_dev_handle/complex_dev_handle_kernels_opencl.$(OBJEXT) am_interface_complex_dev_handle_complex_dev_handle_OBJECTS = \ interface/complex_dev_handle/complex_dev_handle.$(OBJEXT) \ interface/complex_dev_handle/complex_dev_handle_interface.$(OBJEXT) \ interface/complex_dev_handle/complex_dev_handle_filters.$(OBJEXT) \ $(am__objects_38) $(am__objects_39) interface_complex_dev_handle_complex_dev_handle_OBJECTS = \ $(am_interface_complex_dev_handle_complex_dev_handle_OBJECTS) interface_complex_dev_handle_complex_dev_handle_LDADD = $(LDADD) loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) am__lu_lu_example_complex_double_SOURCES_DIST = \ lu/lu_example_complex_double.c lu/zlu.c lu/zlu_pivot.c \ lu/zlu_kernels.c lu/blas_complex.c common/blas.c @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_example_complex_double_OBJECTS = lu/lu_example_complex_double.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_pivot.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_kernels.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) lu_lu_example_complex_double_OBJECTS = \ $(am_lu_lu_example_complex_double_OBJECTS) @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_double_DEPENDENCIES = $(am__DEPENDENCIES_1) am__lu_lu_example_complex_float_SOURCES_DIST = \ lu/lu_example_complex_float.c lu/clu.c lu/clu_pivot.c \ lu/clu_kernels.c lu/blas_complex.c common/blas.c @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_example_complex_float_OBJECTS = lu/lu_example_complex_float.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_pivot.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_kernels.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) lu_lu_example_complex_float_OBJECTS = \ $(am_lu_lu_example_complex_float_OBJECTS) @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_float_DEPENDENCIES = $(am__DEPENDENCIES_1) am__lu_lu_example_double_SOURCES_DIST = lu/lu_example_double.c \ lu/dlu.c lu/dlu_pivot.c lu/dlu_kernels.c common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_example_double_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_pivot.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) lu_lu_example_double_OBJECTS = $(am_lu_lu_example_double_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_double_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__lu_lu_example_float_SOURCES_DIST = lu/lu_example_float.c lu/slu.c \ lu/slu_pivot.c lu/slu_kernels.c common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_example_float_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_pivot.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) lu_lu_example_float_OBJECTS = $(am_lu_lu_example_float_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_float_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__lu_lu_implicit_example_complex_double_SOURCES_DIST = \ lu/lu_example_complex_double.c lu/zlu_implicit.c \ lu/zlu_implicit_pivot.c lu/zlu_kernels.c lu/blas_complex.c \ common/blas.c @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_implicit_example_complex_double_OBJECTS = lu/lu_example_complex_double.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_implicit.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_implicit_pivot.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_kernels.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) lu_lu_implicit_example_complex_double_OBJECTS = \ $(am_lu_lu_implicit_example_complex_double_OBJECTS) @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_double_DEPENDENCIES = $(am__DEPENDENCIES_1) am__lu_lu_implicit_example_complex_float_SOURCES_DIST = \ lu/lu_example_complex_float.c lu/clu_implicit.c \ lu/clu_implicit_pivot.c lu/clu_kernels.c lu/blas_complex.c \ common/blas.c @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_implicit_example_complex_float_OBJECTS = lu/lu_example_complex_float.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_implicit.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_implicit_pivot.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_kernels.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.$(OBJEXT) \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) lu_lu_implicit_example_complex_float_OBJECTS = \ $(am_lu_lu_implicit_example_complex_float_OBJECTS) @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_float_DEPENDENCIES = $(am__DEPENDENCIES_1) am__lu_lu_implicit_example_double_SOURCES_DIST = \ lu/lu_example_double.c lu/dlu_implicit.c \ lu/dlu_implicit_pivot.c lu/dlu_kernels.c common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_implicit_example_double_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_implicit.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_implicit_pivot.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) lu_lu_implicit_example_double_OBJECTS = \ $(am_lu_lu_implicit_example_double_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_double_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__lu_lu_implicit_example_float_SOURCES_DIST = lu/lu_example_float.c \ lu/slu_implicit.c lu/slu_implicit_pivot.c lu/slu_kernels.c \ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_lu_lu_implicit_example_float_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_implicit.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_implicit_pivot.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) lu_lu_implicit_example_float_OBJECTS = \ $(am_lu_lu_implicit_example_float_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_float_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c mandelbrot_mandelbrot_OBJECTS = \ mandelbrot/mandelbrot-mandelbrot.$(OBJEXT) @STARPU_HAVE_X11_TRUE@mandelbrot_mandelbrot_DEPENDENCIES = \ @STARPU_HAVE_X11_TRUE@ $(am__DEPENDENCIES_1) \ @STARPU_HAVE_X11_TRUE@ $(am__DEPENDENCIES_1) \ @STARPU_HAVE_X11_TRUE@ $(am__DEPENDENCIES_1) matvecmult_matvecmult_SOURCES = matvecmult/matvecmult.c matvecmult_matvecmult_OBJECTS = matvecmult/matvecmult.$(OBJEXT) matvecmult_matvecmult_LDADD = $(LDADD) mlr_mlr_SOURCES = mlr/mlr.c mlr_mlr_OBJECTS = mlr/mlr.$(OBJEXT) mlr_mlr_LDADD = $(LDADD) am__mult_dgemm_SOURCES_DIST = mult/dgemm.c common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am__objects_40 = common/blas.$(OBJEXT) am_mult_dgemm_OBJECTS = mult/dgemm.$(OBJEXT) $(am__objects_40) mult_dgemm_OBJECTS = $(am_mult_dgemm_OBJECTS) mult_dgemm_DEPENDENCIES = $(am__DEPENDENCIES_1) am__mult_dgemm_layout_SOURCES_DIST = mult/dgemm_layout.c common/blas.c am_mult_dgemm_layout_OBJECTS = mult/dgemm_layout.$(OBJEXT) \ $(am__objects_40) mult_dgemm_layout_OBJECTS = $(am_mult_dgemm_layout_OBJECTS) mult_dgemm_layout_DEPENDENCIES = $(am__DEPENDENCIES_1) am__mult_sgemm_SOURCES_DIST = mult/sgemm.c common/blas.c am_mult_sgemm_OBJECTS = mult/sgemm.$(OBJEXT) $(am__objects_40) mult_sgemm_OBJECTS = $(am_mult_sgemm_OBJECTS) mult_sgemm_DEPENDENCIES = $(am__DEPENDENCIES_1) am__mult_sgemm_layout_SOURCES_DIST = mult/sgemm_layout.c common/blas.c am_mult_sgemm_layout_OBJECTS = mult/sgemm_layout.$(OBJEXT) \ $(am__objects_40) mult_sgemm_layout_OBJECTS = $(am_mult_sgemm_layout_OBJECTS) mult_sgemm_layout_DEPENDENCIES = $(am__DEPENDENCIES_1) am__native_fortran_nf_dynbuf_SOURCES_DIST = \ native_fortran/nf_dynbuf_cl.f90 native_fortran/fstarpu_mod.f90 \ native_fortran/nf_dynbuf.f90 @STARPU_HAVE_FC_TRUE@am_native_fortran_nf_dynbuf_OBJECTS = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_dynbuf_cl.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_dynbuf.$(OBJEXT) native_fortran_nf_dynbuf_OBJECTS = \ $(am_native_fortran_nf_dynbuf_OBJECTS) native_fortran_nf_dynbuf_LDADD = $(LDADD) am__native_fortran_nf_example_SOURCES_DIST = \ native_fortran/nf_types.f90 native_fortran/nf_compute.f90 \ native_fortran/fstarpu_mod.f90 native_fortran/nf_example.f90 @STARPU_HAVE_FC_TRUE@am_native_fortran_nf_example_OBJECTS = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_types.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_compute.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_example.$(OBJEXT) native_fortran_nf_example_OBJECTS = \ $(am_native_fortran_nf_example_OBJECTS) native_fortran_nf_example_LDADD = $(LDADD) am__native_fortran_nf_matrix_SOURCES_DIST = \ native_fortran/nf_codelets.f90 native_fortran/fstarpu_mod.f90 \ native_fortran/nf_matrix.f90 @STARPU_HAVE_FC_TRUE@am_native_fortran_nf_matrix_OBJECTS = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_codelets.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_matrix.$(OBJEXT) native_fortran_nf_matrix_OBJECTS = \ $(am_native_fortran_nf_matrix_OBJECTS) native_fortran_nf_matrix_LDADD = $(LDADD) am__native_fortran_nf_partition_SOURCES_DIST = \ native_fortran/nf_partition_cl.f90 \ native_fortran/fstarpu_mod.f90 native_fortran/nf_partition.f90 @STARPU_HAVE_FC_TRUE@am_native_fortran_nf_partition_OBJECTS = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_partition_cl.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_partition.$(OBJEXT) native_fortran_nf_partition_OBJECTS = \ $(am_native_fortran_nf_partition_OBJECTS) native_fortran_nf_partition_LDADD = $(LDADD) am__native_fortran_nf_sched_ctx_SOURCES_DIST = \ native_fortran/nf_sched_ctx_cl.f90 \ native_fortran/fstarpu_mod.f90 native_fortran/nf_sched_ctx.f90 @STARPU_HAVE_FC_TRUE@am_native_fortran_nf_sched_ctx_OBJECTS = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_sched_ctx_cl.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_sched_ctx.$(OBJEXT) native_fortran_nf_sched_ctx_OBJECTS = \ $(am_native_fortran_nf_sched_ctx_OBJECTS) native_fortran_nf_sched_ctx_LDADD = $(LDADD) am__native_fortran_nf_varbuf_SOURCES_DIST = \ native_fortran/nf_varbuf_cl.f90 native_fortran/fstarpu_mod.f90 \ native_fortran/nf_varbuf.f90 @STARPU_HAVE_FC_TRUE@am_native_fortran_nf_varbuf_OBJECTS = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_varbuf_cl.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_varbuf.$(OBJEXT) native_fortran_nf_varbuf_OBJECTS = \ $(am_native_fortran_nf_varbuf_OBJECTS) native_fortran_nf_varbuf_LDADD = $(LDADD) am__native_fortran_nf_vector_SOURCES_DIST = \ native_fortran/nf_codelets.f90 native_fortran/fstarpu_mod.f90 \ native_fortran/nf_vector.f90 @STARPU_HAVE_FC_TRUE@am_native_fortran_nf_vector_OBJECTS = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_codelets.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_vector.$(OBJEXT) native_fortran_nf_vector_OBJECTS = \ $(am_native_fortran_nf_vector_OBJECTS) native_fortran_nf_vector_LDADD = $(LDADD) openmp_vector_scal_omp_SOURCES = openmp/vector_scal_omp.c openmp_vector_scal_omp_OBJECTS = \ openmp/vector_scal_omp-vector_scal_omp.$(OBJEXT) openmp_vector_scal_omp_LDADD = $(LDADD) openmp_vector_scal_omp_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ parallel_workers_parallel_workers_SOURCES = \ parallel_workers/parallel_workers.c parallel_workers_parallel_workers_OBJECTS = \ parallel_workers/parallel_workers-parallel_workers.$(OBJEXT) parallel_workers_parallel_workers_LDADD = $(LDADD) parallel_workers_parallel_workers_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ parallel_workers_parallel_workers_func_SOURCES = \ parallel_workers/parallel_workers_func.c parallel_workers_parallel_workers_func_OBJECTS = parallel_workers/parallel_workers_func-parallel_workers_func.$(OBJEXT) parallel_workers_parallel_workers_func_LDADD = $(LDADD) parallel_workers_parallel_workers_func_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(parallel_workers_parallel_workers_func_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ parallel_workers_parallel_workers_oldapi_SOURCES = \ parallel_workers/parallel_workers_oldapi.c parallel_workers_parallel_workers_oldapi_OBJECTS = parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.$(OBJEXT) parallel_workers_parallel_workers_oldapi_LDADD = $(LDADD) parallel_workers_parallel_workers_oldapi_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(parallel_workers_parallel_workers_oldapi_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ perf_monitoring_perf_counters_01_SOURCES = \ perf_monitoring/perf_counters_01.c perf_monitoring_perf_counters_01_OBJECTS = \ perf_monitoring/perf_counters_01.$(OBJEXT) perf_monitoring_perf_counters_01_LDADD = $(LDADD) perf_monitoring_perf_counters_02_SOURCES = \ perf_monitoring/perf_counters_02.c perf_monitoring_perf_counters_02_OBJECTS = \ perf_monitoring/perf_counters_02.$(OBJEXT) perf_monitoring_perf_counters_02_LDADD = $(LDADD) perf_steering_perf_knobs_01_SOURCES = perf_steering/perf_knobs_01.c perf_steering_perf_knobs_01_OBJECTS = \ perf_steering/perf_knobs_01.$(OBJEXT) perf_steering_perf_knobs_01_LDADD = $(LDADD) perf_steering_perf_knobs_02_SOURCES = perf_steering/perf_knobs_02.c perf_steering_perf_knobs_02_OBJECTS = \ perf_steering/perf_knobs_02.$(OBJEXT) perf_steering_perf_knobs_02_LDADD = $(LDADD) perf_steering_perf_knobs_03_SOURCES = perf_steering/perf_knobs_03.c perf_steering_perf_knobs_03_OBJECTS = \ perf_steering/perf_knobs_03.$(OBJEXT) perf_steering_perf_knobs_03_LDADD = $(LDADD) am__pi_pi_SOURCES_DIST = pi/pi.c pi/SobolQRNG/sobol_gold.c \ pi/SobolQRNG/sobol_primitives.c pi/pi_kernel.cu \ pi/SobolQRNG/sobol_gpu.cu @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@am__objects_41 = pi/pi_kernel.$(OBJEXT) \ @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ pi/SobolQRNG/sobol_gpu.$(OBJEXT) @STARPU_HAVE_WINDOWS_FALSE@am_pi_pi_OBJECTS = pi/pi.$(OBJEXT) \ @STARPU_HAVE_WINDOWS_FALSE@ pi/SobolQRNG/sobol_gold.$(OBJEXT) \ @STARPU_HAVE_WINDOWS_FALSE@ pi/SobolQRNG/sobol_primitives.$(OBJEXT) \ @STARPU_HAVE_WINDOWS_FALSE@ $(am__objects_41) pi_pi_OBJECTS = $(am_pi_pi_OBJECTS) pi_pi_LDADD = $(LDADD) am__pi_pi_redux_SOURCES_DIST = pi/pi_redux.c pi/pi_redux_kernel.cu @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@am__objects_42 = pi/pi_redux_kernel.$(OBJEXT) @STARPU_HAVE_WINDOWS_FALSE@am_pi_pi_redux_OBJECTS = \ @STARPU_HAVE_WINDOWS_FALSE@ pi/pi_redux.$(OBJEXT) \ @STARPU_HAVE_WINDOWS_FALSE@ $(am__objects_42) pi_pi_redux_OBJECTS = $(am_pi_pi_redux_OBJECTS) @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@pi_pi_redux_DEPENDENCIES = $(am__DEPENDENCIES_1) am__pipeline_pipeline_SOURCES_DIST = pipeline/pipeline.c common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_pipeline_pipeline_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ pipeline/pipeline.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) pipeline_pipeline_OBJECTS = $(am_pipeline_pipeline_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@pipeline_pipeline_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) ppm_downscaler_ppm_downscaler_SOURCES = \ ppm_downscaler/ppm_downscaler.c ppm_downscaler_ppm_downscaler_OBJECTS = \ ppm_downscaler/ppm_downscaler.$(OBJEXT) ppm_downscaler_ppm_downscaler_LDADD = $(LDADD) ppm_downscaler_yuv_downscaler_SOURCES = \ ppm_downscaler/yuv_downscaler.c ppm_downscaler_yuv_downscaler_OBJECTS = \ ppm_downscaler/yuv_downscaler.$(OBJEXT) ppm_downscaler_yuv_downscaler_LDADD = $(LDADD) profiling_profiling_SOURCES = profiling/profiling.c profiling_profiling_OBJECTS = profiling/profiling.$(OBJEXT) profiling_profiling_LDADD = $(LDADD) am__reductions_dot_product_SOURCES_DIST = reductions/dot_product.c \ reductions/dot_product_kernels.cu @STARPU_USE_CUDA_TRUE@am__objects_43 = reductions/dot_product_kernels.$(OBJEXT) am_reductions_dot_product_OBJECTS = reductions/dot_product.$(OBJEXT) \ $(am__objects_43) reductions_dot_product_OBJECTS = $(am_reductions_dot_product_OBJECTS) reductions_dot_product_LDADD = $(LDADD) reductions_minmax_reduction_SOURCES = reductions/minmax_reduction.c reductions_minmax_reduction_OBJECTS = \ reductions/minmax_reduction.$(OBJEXT) reductions_minmax_reduction_LDADD = $(LDADD) sched_ctx_dummy_sched_with_ctx_SOURCES = \ sched_ctx/dummy_sched_with_ctx.c sched_ctx_dummy_sched_with_ctx_OBJECTS = \ sched_ctx/dummy_sched_with_ctx.$(OBJEXT) sched_ctx_dummy_sched_with_ctx_LDADD = $(LDADD) am__sched_ctx_gpu_partition_SOURCES_DIST = sched_ctx/gpu_partition.c \ sched_ctx/axpy_partition_gpu.cu @STARPU_USE_CUDA_TRUE@am_sched_ctx_gpu_partition_OBJECTS = \ @STARPU_USE_CUDA_TRUE@ sched_ctx/gpu_partition.$(OBJEXT) \ @STARPU_USE_CUDA_TRUE@ sched_ctx/axpy_partition_gpu.$(OBJEXT) sched_ctx_gpu_partition_OBJECTS = \ $(am_sched_ctx_gpu_partition_OBJECTS) sched_ctx_gpu_partition_LDADD = $(LDADD) sched_ctx_nested_sched_ctxs_SOURCES = sched_ctx/nested_sched_ctxs.c sched_ctx_nested_sched_ctxs_OBJECTS = \ sched_ctx/nested_sched_ctxs-nested_sched_ctxs.$(OBJEXT) sched_ctx_nested_sched_ctxs_LDADD = $(LDADD) sched_ctx_nested_sched_ctxs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ sched_ctx_parallel_code_SOURCES = sched_ctx/parallel_code.c sched_ctx_parallel_code_OBJECTS = \ sched_ctx/parallel_code-parallel_code.$(OBJEXT) sched_ctx_parallel_code_LDADD = $(LDADD) sched_ctx_parallel_code_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ sched_ctx_parallel_tasks_reuse_handle_SOURCES = \ sched_ctx/parallel_tasks_reuse_handle.c sched_ctx_parallel_tasks_reuse_handle_OBJECTS = sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.$(OBJEXT) sched_ctx_parallel_tasks_reuse_handle_LDADD = $(LDADD) sched_ctx_parallel_tasks_reuse_handle_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ sched_ctx_prio_SOURCES = sched_ctx/prio.c sched_ctx_prio_OBJECTS = sched_ctx/prio.$(OBJEXT) sched_ctx_prio_LDADD = $(LDADD) sched_ctx_sched_ctx_SOURCES = sched_ctx/sched_ctx.c sched_ctx_sched_ctx_OBJECTS = sched_ctx/sched_ctx.$(OBJEXT) sched_ctx_sched_ctx_LDADD = $(LDADD) sched_ctx_sched_ctx_delete_SOURCES = sched_ctx/sched_ctx_delete.c sched_ctx_sched_ctx_delete_OBJECTS = \ sched_ctx/sched_ctx_delete.$(OBJEXT) sched_ctx_sched_ctx_delete_LDADD = $(LDADD) sched_ctx_sched_ctx_empty_SOURCES = sched_ctx/sched_ctx_empty.c sched_ctx_sched_ctx_empty_OBJECTS = \ sched_ctx/sched_ctx_empty.$(OBJEXT) sched_ctx_sched_ctx_empty_LDADD = $(LDADD) sched_ctx_sched_ctx_remove_SOURCES = sched_ctx/sched_ctx_remove.c sched_ctx_sched_ctx_remove_OBJECTS = \ sched_ctx/sched_ctx_remove.$(OBJEXT) sched_ctx_sched_ctx_remove_LDADD = $(LDADD) sched_ctx_sched_ctx_without_sched_policy_SOURCES = \ sched_ctx/sched_ctx_without_sched_policy.c sched_ctx_sched_ctx_without_sched_policy_OBJECTS = sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.$(OBJEXT) sched_ctx_sched_ctx_without_sched_policy_LDADD = $(LDADD) sched_ctx_sched_ctx_without_sched_policy_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ sched_ctx_sched_ctx_without_sched_policy_awake_SOURCES = \ sched_ctx/sched_ctx_without_sched_policy_awake.c sched_ctx_sched_ctx_without_sched_policy_awake_OBJECTS = \ sched_ctx/sched_ctx_without_sched_policy_awake.$(OBJEXT) sched_ctx_sched_ctx_without_sched_policy_awake_LDADD = $(LDADD) sched_ctx_two_cpu_contexts_SOURCES = sched_ctx/two_cpu_contexts.c sched_ctx_two_cpu_contexts_OBJECTS = \ sched_ctx/two_cpu_contexts.$(OBJEXT) sched_ctx_two_cpu_contexts_LDADD = $(LDADD) scheduler_dummy_modular_sched_SOURCES = \ scheduler/dummy_modular_sched.c scheduler_dummy_modular_sched_OBJECTS = \ scheduler/dummy_modular_sched.$(OBJEXT) scheduler_dummy_modular_sched_LDADD = $(LDADD) scheduler_dummy_sched_SOURCES = scheduler/dummy_sched.c scheduler_dummy_sched_OBJECTS = scheduler/dummy_sched.$(OBJEXT) scheduler_dummy_sched_LDADD = $(LDADD) scheduler_heteroprio_test_SOURCES = scheduler/heteroprio_test.c scheduler_heteroprio_test_OBJECTS = \ scheduler/heteroprio_test.$(OBJEXT) scheduler_heteroprio_test_LDADD = $(LDADD) am_spmd_vector_scal_spmd_OBJECTS = spmd/vector_scal_spmd.$(OBJEXT) spmd_vector_scal_spmd_OBJECTS = $(am_spmd_vector_scal_spmd_OBJECTS) spmd_vector_scal_spmd_LDADD = $(LDADD) am_spmv_dw_block_spmv_OBJECTS = spmv/dw_block_spmv.$(OBJEXT) \ spmv/dw_block_spmv_kernels.$(OBJEXT) \ spmv/matrix_market/mm_to_bcsr.$(OBJEXT) \ spmv/matrix_market/mmio.$(OBJEXT) spmv_dw_block_spmv_OBJECTS = $(am_spmv_dw_block_spmv_OBJECTS) spmv_dw_block_spmv_DEPENDENCIES = $(am__DEPENDENCIES_1) am__spmv_spmv_SOURCES_DIST = spmv/spmv.c spmv/spmv_kernels.c \ spmv/spmv_cuda.cu @STARPU_USE_CUDA_TRUE@am__objects_44 = spmv/spmv_cuda.$(OBJEXT) am_spmv_spmv_OBJECTS = spmv/spmv.$(OBJEXT) spmv/spmv_kernels.$(OBJEXT) \ $(am__objects_44) spmv_spmv_OBJECTS = $(am_spmv_spmv_OBJECTS) spmv_spmv_LDADD = $(LDADD) am_subgraphs_manual_OBJECTS = subgraphs/manual.$(OBJEXT) \ subgraphs/codelets.$(OBJEXT) subgraphs_manual_OBJECTS = $(am_subgraphs_manual_OBJECTS) subgraphs_manual_LDADD = $(LDADD) am_subgraphs_partition_OBJECTS = subgraphs/partition.$(OBJEXT) \ subgraphs/codelets.$(OBJEXT) subgraphs_partition_OBJECTS = $(am_subgraphs_partition_OBJECTS) subgraphs_partition_LDADD = $(LDADD) am_subgraphs_plan_OBJECTS = subgraphs/plan.$(OBJEXT) \ subgraphs/codelets.$(OBJEXT) subgraphs_plan_OBJECTS = $(am_subgraphs_plan_OBJECTS) subgraphs_plan_LDADD = $(LDADD) tag_example_tag_example_SOURCES = tag_example/tag_example.c tag_example_tag_example_OBJECTS = tag_example/tag_example.$(OBJEXT) tag_example_tag_example_LDADD = $(LDADD) tag_example_tag_example2_SOURCES = tag_example/tag_example2.c tag_example_tag_example2_OBJECTS = tag_example/tag_example2.$(OBJEXT) tag_example_tag_example2_LDADD = $(LDADD) tag_example_tag_example3_SOURCES = tag_example/tag_example3.c tag_example_tag_example3_OBJECTS = tag_example/tag_example3.$(OBJEXT) tag_example_tag_example3_LDADD = $(LDADD) tag_example_tag_example4_SOURCES = tag_example/tag_example4.c tag_example_tag_example4_OBJECTS = tag_example/tag_example4.$(OBJEXT) tag_example_tag_example4_LDADD = $(LDADD) tag_example_tag_restartable_SOURCES = tag_example/tag_restartable.c tag_example_tag_restartable_OBJECTS = \ tag_example/tag_restartable.$(OBJEXT) tag_example_tag_restartable_LDADD = $(LDADD) transactions_trs_inc_SOURCES = transactions/trs_inc.c transactions_trs_inc_OBJECTS = transactions/trs_inc.$(OBJEXT) transactions_trs_inc_LDADD = $(LDADD) am__transactions_trs_sgemm_SOURCES_DIST = transactions/trs_sgemm.c \ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_transactions_trs_sgemm_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ transactions/trs_sgemm.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.$(OBJEXT) transactions_trs_sgemm_OBJECTS = $(am_transactions_trs_sgemm_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@transactions_trs_sgemm_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) worker_collections_worker_list_example_SOURCES = \ worker_collections/worker_list_example.c worker_collections_worker_list_example_OBJECTS = \ worker_collections/worker_list_example.$(OBJEXT) worker_collections_worker_list_example_LDADD = $(LDADD) worker_collections_worker_tree_example_SOURCES = \ worker_collections/worker_tree_example.c worker_collections_worker_tree_example_OBJECTS = \ worker_collections/worker_tree_example.$(OBJEXT) worker_collections_worker_tree_example_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ api/$(DEPDIR)/bcsr_data_interface.Po \ api/$(DEPDIR)/block_data_interface.Po \ api/$(DEPDIR)/coo_data_interface.Po \ api/$(DEPDIR)/csr_data_interface.Po \ api/$(DEPDIR)/matrix_data_interface.Po \ api/$(DEPDIR)/multiformat_data_interface.Po \ api/$(DEPDIR)/tensor_data_interface.Po \ api/$(DEPDIR)/variable_data_interface.Po \ api/$(DEPDIR)/vector_data_interface.Po \ api/$(DEPDIR)/void_data_interface.Po axpy/$(DEPDIR)/axpy.Po \ axpy/$(DEPDIR)/axpy_opencl.Po \ basic_examples/$(DEPDIR)/block.Po \ basic_examples/$(DEPDIR)/block_cpu.Po \ basic_examples/$(DEPDIR)/block_opencl.Po \ basic_examples/$(DEPDIR)/dynamic_handles.Po \ basic_examples/$(DEPDIR)/hello_world.Po \ basic_examples/$(DEPDIR)/hooks.Po \ basic_examples/$(DEPDIR)/mult.Po \ basic_examples/$(DEPDIR)/multiformat.Po \ basic_examples/$(DEPDIR)/multiformat_conversion_codelets.Po \ basic_examples/$(DEPDIR)/multiformat_conversion_codelets_opencl.Po \ basic_examples/$(DEPDIR)/multiformat_opencl.Po \ basic_examples/$(DEPDIR)/ndim.Po \ basic_examples/$(DEPDIR)/task_insert_color.Po \ basic_examples/$(DEPDIR)/topology.Po \ basic_examples/$(DEPDIR)/variable.Po \ basic_examples/$(DEPDIR)/variable_kernels_cpu.Po \ basic_examples/$(DEPDIR)/variable_kernels_opencl.Po \ basic_examples/$(DEPDIR)/vector_scal.Po \ basic_examples/$(DEPDIR)/vector_scal_c.Po \ basic_examples/$(DEPDIR)/vector_scal_cpu.Po \ basic_examples/$(DEPDIR)/vector_scal_opencl.Po \ binary/$(DEPDIR)/binary.Po callback/$(DEPDIR)/callback.Po \ callback/$(DEPDIR)/prologue.Po cg/$(DEPDIR)/cg.Po \ cholesky/$(DEPDIR)/cholesky_compil.Po \ cholesky/$(DEPDIR)/cholesky_grain_tag.Po \ cholesky/$(DEPDIR)/cholesky_implicit.Po \ cholesky/$(DEPDIR)/cholesky_kernels.Po \ cholesky/$(DEPDIR)/cholesky_models.Po \ cholesky/$(DEPDIR)/cholesky_tag.Po \ cholesky/$(DEPDIR)/cholesky_tile_tag.Po \ cholesky/$(DEPDIR)/libmy_dmda.Plo common/$(DEPDIR)/blas.Po \ cpp/$(DEPDIR)/add_vectors.Po \ cpp/$(DEPDIR)/add_vectors_cpp11.Po \ cpp/$(DEPDIR)/add_vectors_interface.Po \ cpp/$(DEPDIR)/incrementer_cpp.Po \ dependency/$(DEPDIR)/sequential_consistency.Po \ dependency/$(DEPDIR)/task_end_dep.Po \ dependency/$(DEPDIR)/task_end_dep_add.Po \ filters/$(DEPDIR)/alloc.Po filters/$(DEPDIR)/f3d_cpu.Po \ filters/$(DEPDIR)/f4d_cpu.Po filters/$(DEPDIR)/f5d_print.Po \ filters/$(DEPDIR)/fblock.Po filters/$(DEPDIR)/fblock_cpu.Po \ filters/$(DEPDIR)/fblock_opencl.Po \ filters/$(DEPDIR)/fblock_pick_matrix.Po \ filters/$(DEPDIR)/fblock_pick_variable.Po \ filters/$(DEPDIR)/fblock_print.Po filters/$(DEPDIR)/fmatrix.Po \ filters/$(DEPDIR)/fmatrix_cpu.Po \ filters/$(DEPDIR)/fmatrix_pick_variable.Po \ filters/$(DEPDIR)/fmatrix_pick_vector.Po \ filters/$(DEPDIR)/fmatrix_print.Po \ filters/$(DEPDIR)/fmultiple_manual.Po \ filters/$(DEPDIR)/fmultiple_submit.Po \ filters/$(DEPDIR)/fmultiple_submit_implicit.Po \ filters/$(DEPDIR)/fmultiple_submit_readonly.Po \ filters/$(DEPDIR)/fmultiple_submit_readonly_downgrade.Po \ filters/$(DEPDIR)/fndim.Po \ filters/$(DEPDIR)/fndim_1d_pick_variable.Po \ filters/$(DEPDIR)/fndim_2d_pick_vector.Po \ filters/$(DEPDIR)/fndim_3d_pick_matrix.Po \ filters/$(DEPDIR)/fndim_4d_pick_block.Po \ filters/$(DEPDIR)/fndim_5d_pick_tensor.Po \ filters/$(DEPDIR)/fndim_pick_ndim.Po \ filters/$(DEPDIR)/fndim_pick_variable.Po \ filters/$(DEPDIR)/fndim_to_block.Po \ filters/$(DEPDIR)/fndim_to_matrix.Po \ filters/$(DEPDIR)/fndim_to_tensor.Po \ filters/$(DEPDIR)/fndim_to_variable.Po \ filters/$(DEPDIR)/fndim_to_vector.Po \ filters/$(DEPDIR)/fread.Po filters/$(DEPDIR)/frecursive.Po \ filters/$(DEPDIR)/ftensor.Po filters/$(DEPDIR)/ftensor_cpu.Po \ filters/$(DEPDIR)/ftensor_pick_block.Po \ filters/$(DEPDIR)/ftensor_pick_variable.Po \ filters/$(DEPDIR)/ftensor_print.Po \ filters/$(DEPDIR)/fvector.Po filters/$(DEPDIR)/fvector_cpu.Po \ filters/$(DEPDIR)/fvector_pick_variable.Po \ filters/$(DEPDIR)/shadow.Po filters/$(DEPDIR)/shadow2d.Po \ filters/$(DEPDIR)/shadow3d.Po filters/$(DEPDIR)/shadow4d.Po \ filters/$(DEPDIR)/shadownd.Po \ filters/custom_mf/$(DEPDIR)/conversion_opencl.Po \ filters/custom_mf/$(DEPDIR)/custom_conversion_codelets.Po \ filters/custom_mf/$(DEPDIR)/custom_interface.Po \ filters/custom_mf/$(DEPDIR)/custom_mf_filter.Po \ filters/custom_mf/$(DEPDIR)/custom_opencl.Po \ fortran/$(DEPDIR)/hello_c.Po \ fortran90/$(DEPDIR)/marshalling.Po \ gl_interop/$(DEPDIR)/gl_interop.Po \ gl_interop/$(DEPDIR)/gl_interop_idle.Po \ heat/$(DEPDIR)/dw_factolu.Po \ heat/$(DEPDIR)/dw_factolu_grain.Po \ heat/$(DEPDIR)/dw_factolu_kernels.Po \ heat/$(DEPDIR)/dw_factolu_tag.Po \ heat/$(DEPDIR)/dw_sparse_cg.Po \ heat/$(DEPDIR)/dw_sparse_cg_kernels.Po heat/$(DEPDIR)/heat.Po \ heat/$(DEPDIR)/heat_display.Po \ heat/$(DEPDIR)/lu_kernels_model.Po \ incrementer/$(DEPDIR)/incrementer.Po \ incrementer/$(DEPDIR)/incrementer_kernels_opencl.Po \ interface/$(DEPDIR)/complex.Po \ interface/$(DEPDIR)/complex_filters.Po \ interface/$(DEPDIR)/complex_interface.Po \ interface/$(DEPDIR)/complex_kernels_opencl.Po \ interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle.Po \ interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_filters.Po \ interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_interface.Po \ interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_kernels_opencl.Po \ lu/$(DEPDIR)/blas_complex.Po lu/$(DEPDIR)/clu.Po \ lu/$(DEPDIR)/clu_implicit.Po \ lu/$(DEPDIR)/clu_implicit_pivot.Po lu/$(DEPDIR)/clu_kernels.Po \ lu/$(DEPDIR)/clu_pivot.Po lu/$(DEPDIR)/dlu.Po \ lu/$(DEPDIR)/dlu_implicit.Po \ lu/$(DEPDIR)/dlu_implicit_pivot.Po lu/$(DEPDIR)/dlu_kernels.Po \ lu/$(DEPDIR)/dlu_pivot.Po \ lu/$(DEPDIR)/lu_example_complex_double.Po \ lu/$(DEPDIR)/lu_example_complex_float.Po \ lu/$(DEPDIR)/lu_example_double.Po \ lu/$(DEPDIR)/lu_example_float.Po lu/$(DEPDIR)/slu.Po \ lu/$(DEPDIR)/slu_implicit.Po \ lu/$(DEPDIR)/slu_implicit_pivot.Po lu/$(DEPDIR)/slu_kernels.Po \ lu/$(DEPDIR)/slu_pivot.Po lu/$(DEPDIR)/zlu.Po \ lu/$(DEPDIR)/zlu_implicit.Po \ lu/$(DEPDIR)/zlu_implicit_pivot.Po lu/$(DEPDIR)/zlu_kernels.Po \ lu/$(DEPDIR)/zlu_pivot.Po \ mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po \ matvecmult/$(DEPDIR)/matvecmult.Po mlr/$(DEPDIR)/mlr.Po \ mult/$(DEPDIR)/dgemm.Po mult/$(DEPDIR)/dgemm_layout.Po \ mult/$(DEPDIR)/sgemm.Po mult/$(DEPDIR)/sgemm_layout.Po \ openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po \ parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po \ parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po \ parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po \ perf_monitoring/$(DEPDIR)/perf_counters_01.Po \ perf_monitoring/$(DEPDIR)/perf_counters_02.Po \ perf_steering/$(DEPDIR)/perf_knobs_01.Po \ perf_steering/$(DEPDIR)/perf_knobs_02.Po \ perf_steering/$(DEPDIR)/perf_knobs_03.Po pi/$(DEPDIR)/pi.Po \ pi/$(DEPDIR)/pi_redux.Po pi/SobolQRNG/$(DEPDIR)/sobol_gold.Po \ pi/SobolQRNG/$(DEPDIR)/sobol_primitives.Po \ pipeline/$(DEPDIR)/pipeline.Po \ ppm_downscaler/$(DEPDIR)/ppm_downscaler.Po \ ppm_downscaler/$(DEPDIR)/yuv_downscaler.Po \ profiling/$(DEPDIR)/profiling.Po \ profiling_tool/$(DEPDIR)/libprofiling_tool.Plo \ reductions/$(DEPDIR)/dot_product.Po \ reductions/$(DEPDIR)/minmax_reduction.Po \ sched_ctx/$(DEPDIR)/dummy_sched_with_ctx.Po \ sched_ctx/$(DEPDIR)/gpu_partition.Po \ sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po \ sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po \ sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po \ sched_ctx/$(DEPDIR)/prio.Po sched_ctx/$(DEPDIR)/sched_ctx.Po \ sched_ctx/$(DEPDIR)/sched_ctx_delete.Po \ sched_ctx/$(DEPDIR)/sched_ctx_empty.Po \ sched_ctx/$(DEPDIR)/sched_ctx_remove.Po \ sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po \ sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy_awake.Po \ sched_ctx/$(DEPDIR)/two_cpu_contexts.Po \ sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po \ scheduler/$(DEPDIR)/dummy_modular_sched.Po \ scheduler/$(DEPDIR)/dummy_sched.Po \ scheduler/$(DEPDIR)/heteroprio_test.Po \ scheduler/$(DEPDIR)/libdummy_sched.Plo \ spmd/$(DEPDIR)/vector_scal_spmd.Po \ spmv/$(DEPDIR)/dw_block_spmv.Po \ spmv/$(DEPDIR)/dw_block_spmv_kernels.Po spmv/$(DEPDIR)/spmv.Po \ spmv/$(DEPDIR)/spmv_kernels.Po \ spmv/matrix_market/$(DEPDIR)/mm_to_bcsr.Po \ spmv/matrix_market/$(DEPDIR)/mmio.Po \ subgraphs/$(DEPDIR)/codelets.Po subgraphs/$(DEPDIR)/manual.Po \ subgraphs/$(DEPDIR)/partition.Po subgraphs/$(DEPDIR)/plan.Po \ tag_example/$(DEPDIR)/tag_example.Po \ tag_example/$(DEPDIR)/tag_example2.Po \ tag_example/$(DEPDIR)/tag_example3.Po \ tag_example/$(DEPDIR)/tag_example4.Po \ tag_example/$(DEPDIR)/tag_restartable.Po \ transactions/$(DEPDIR)/trs_inc.Po \ transactions/$(DEPDIR)/trs_sgemm.Po \ worker_collections/$(DEPDIR)/worker_list_example.Po \ worker_collections/$(DEPDIR)/worker_tree_example.Po am__mv = mv -f PPF77COMPILE = $(F77) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FFLAGS) $(FFLAGS) LTPPF77COMPILE = $(LIBTOOL) $(AM_V_lt) --tag=F77 $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(F77) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_FFLAGS) $(FFLAGS) AM_V_PPF77 = $(am__v_PPF77_@AM_V@) am__v_PPF77_ = $(am__v_PPF77_@AM_DEFAULT_V@) am__v_PPF77_0 = @echo " PPF77 " $@; am__v_PPF77_1 = F77LD = $(F77) F77LINK = $(LIBTOOL) $(AM_V_lt) --tag=F77 $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(F77LD) $(AM_FFLAGS) $(FFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_F77LD = $(am__v_F77LD_@AM_V@) am__v_F77LD_ = $(am__v_F77LD_@AM_DEFAULT_V@) am__v_F77LD_0 = @echo " F77LD " $@; am__v_F77LD_1 = COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CXXFLAGS) $(CXXFLAGS) AM_V_CXX = $(am__v_CXX_@AM_V@) am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) am__v_CXX_0 = @echo " CXX " $@; am__v_CXX_1 = CXXLD = $(CXX) CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) am__v_CXXLD_0 = @echo " CXXLD " $@; am__v_CXXLD_1 = FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) LTFCCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) AM_V_FC = $(am__v_FC_@AM_V@) am__v_FC_ = $(am__v_FC_@AM_DEFAULT_V@) am__v_FC_0 = @echo " FC " $@; am__v_FC_1 = FCLD = $(FC) FCLINK = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_FCLD = $(am__v_FCLD_@AM_V@) am__v_FCLD_ = $(am__v_FCLD_@AM_DEFAULT_V@) am__v_FCLD_0 = @echo " FCLD " $@; am__v_FCLD_1 = SOURCES = cholesky/libmy_dmda.c profiling_tool/libprofiling_tool.c \ scheduler/libdummy_sched.c api/bcsr_data_interface.c \ api/block_data_interface.c api/coo_data_interface.c \ api/csr_data_interface.c api/matrix_data_interface.c \ api/multiformat_data_interface.c api/tensor_data_interface.c \ api/variable_data_interface.c api/vector_data_interface.c \ api/void_data_interface.c $(axpy_axpy_SOURCES) \ $(basic_examples_block_SOURCES) \ basic_examples/dynamic_handles.c basic_examples/hello_world.c \ basic_examples/hooks.c $(basic_examples_mult_SOURCES) \ $(basic_examples_multiformat_SOURCES) basic_examples/ndim.c \ basic_examples/task_insert_color.c basic_examples/topology.c \ $(basic_examples_variable_SOURCES) \ $(basic_examples_vector_scal_SOURCES) \ $(basic_examples_vector_scal_fortran_SOURCES) \ $(binary_binary_SOURCES) callback/callback.c \ callback/prologue.c $(cg_cg_SOURCES) \ $(cholesky_cholesky_compil_SOURCES) \ $(cholesky_cholesky_grain_tag_SOURCES) \ $(cholesky_cholesky_implicit_SOURCES) \ $(cholesky_cholesky_tag_SOURCES) \ $(cholesky_cholesky_tile_tag_SOURCES) \ $(cpp_add_vectors_SOURCES) $(cpp_add_vectors_cpp11_SOURCES) \ $(cpp_add_vectors_interface_SOURCES) \ $(cpp_incrementer_cpp_SOURCES) \ dependency/sequential_consistency.c dependency/task_end_dep.c \ dependency/task_end_dep_add.c filters/alloc.c \ $(filters_custom_mf_custom_mf_filter_SOURCES) \ $(filters_fblock_SOURCES) \ $(filters_fblock_pick_matrix_SOURCES) \ $(filters_fblock_pick_variable_SOURCES) \ $(filters_fmatrix_SOURCES) \ $(filters_fmatrix_pick_variable_SOURCES) \ $(filters_fmatrix_pick_vector_SOURCES) \ $(filters_fmultiple_manual_SOURCES) \ $(filters_fmultiple_submit_SOURCES) \ $(filters_fmultiple_submit_implicit_SOURCES) \ $(filters_fmultiple_submit_readonly_SOURCES) \ $(filters_fmultiple_submit_readonly_downgrade_SOURCES) \ $(filters_fndim_SOURCES) filters/fndim_1d_pick_variable.c \ $(filters_fndim_2d_pick_vector_SOURCES) \ $(filters_fndim_3d_pick_matrix_SOURCES) \ $(filters_fndim_4d_pick_block_SOURCES) \ $(filters_fndim_5d_pick_tensor_SOURCES) \ $(filters_fndim_pick_ndim_SOURCES) \ $(filters_fndim_pick_variable_SOURCES) \ $(filters_fndim_to_block_SOURCES) \ $(filters_fndim_to_matrix_SOURCES) \ $(filters_fndim_to_tensor_SOURCES) filters/fndim_to_variable.c \ $(filters_fndim_to_vector_SOURCES) filters/fread.c \ filters/frecursive.c $(filters_ftensor_SOURCES) \ $(filters_ftensor_pick_block_SOURCES) \ $(filters_ftensor_pick_variable_SOURCES) \ $(filters_fvector_SOURCES) filters/fvector_pick_variable.c \ filters/shadow.c filters/shadow2d.c filters/shadow3d.c \ filters/shadow4d.c filters/shadownd.c $(fortran_hello_SOURCES) \ $(fortran90_f90_example_SOURCES) gl_interop/gl_interop.c \ gl_interop/gl_interop_idle.c $(heat_heat_SOURCES) \ $(incrementer_incrementer_SOURCES) \ $(interface_complex_SOURCES) \ $(interface_complex_dev_handle_complex_dev_handle_SOURCES) \ loader.c $(lu_lu_example_complex_double_SOURCES) \ $(lu_lu_example_complex_float_SOURCES) \ $(lu_lu_example_double_SOURCES) $(lu_lu_example_float_SOURCES) \ $(lu_lu_implicit_example_complex_double_SOURCES) \ $(lu_lu_implicit_example_complex_float_SOURCES) \ $(lu_lu_implicit_example_double_SOURCES) \ $(lu_lu_implicit_example_float_SOURCES) \ mandelbrot/mandelbrot.c matvecmult/matvecmult.c mlr/mlr.c \ $(mult_dgemm_SOURCES) $(mult_dgemm_layout_SOURCES) \ $(mult_sgemm_SOURCES) $(mult_sgemm_layout_SOURCES) \ $(native_fortran_nf_dynbuf_SOURCES) \ $(native_fortran_nf_example_SOURCES) \ $(native_fortran_nf_matrix_SOURCES) \ $(native_fortran_nf_partition_SOURCES) \ $(native_fortran_nf_sched_ctx_SOURCES) \ $(native_fortran_nf_varbuf_SOURCES) \ $(native_fortran_nf_vector_SOURCES) openmp/vector_scal_omp.c \ parallel_workers/parallel_workers.c \ parallel_workers/parallel_workers_func.c \ parallel_workers/parallel_workers_oldapi.c \ perf_monitoring/perf_counters_01.c \ perf_monitoring/perf_counters_02.c \ perf_steering/perf_knobs_01.c perf_steering/perf_knobs_02.c \ perf_steering/perf_knobs_03.c $(pi_pi_SOURCES) \ $(pi_pi_redux_SOURCES) $(pipeline_pipeline_SOURCES) \ ppm_downscaler/ppm_downscaler.c \ ppm_downscaler/yuv_downscaler.c profiling/profiling.c \ $(reductions_dot_product_SOURCES) \ reductions/minmax_reduction.c sched_ctx/dummy_sched_with_ctx.c \ $(sched_ctx_gpu_partition_SOURCES) \ sched_ctx/nested_sched_ctxs.c sched_ctx/parallel_code.c \ sched_ctx/parallel_tasks_reuse_handle.c sched_ctx/prio.c \ sched_ctx/sched_ctx.c sched_ctx/sched_ctx_delete.c \ sched_ctx/sched_ctx_empty.c sched_ctx/sched_ctx_remove.c \ sched_ctx/sched_ctx_without_sched_policy.c \ sched_ctx/sched_ctx_without_sched_policy_awake.c \ sched_ctx/two_cpu_contexts.c scheduler/dummy_modular_sched.c \ scheduler/dummy_sched.c scheduler/heteroprio_test.c \ $(spmd_vector_scal_spmd_SOURCES) $(spmv_dw_block_spmv_SOURCES) \ $(spmv_spmv_SOURCES) $(subgraphs_manual_SOURCES) \ $(subgraphs_partition_SOURCES) $(subgraphs_plan_SOURCES) \ tag_example/tag_example.c tag_example/tag_example2.c \ tag_example/tag_example3.c tag_example/tag_example4.c \ tag_example/tag_restartable.c transactions/trs_inc.c \ $(transactions_trs_sgemm_SOURCES) \ worker_collections/worker_list_example.c \ worker_collections/worker_tree_example.c DIST_SOURCES = cholesky/libmy_dmda.c \ profiling_tool/libprofiling_tool.c scheduler/libdummy_sched.c \ api/bcsr_data_interface.c api/block_data_interface.c \ api/coo_data_interface.c api/csr_data_interface.c \ api/matrix_data_interface.c api/multiformat_data_interface.c \ api/tensor_data_interface.c api/variable_data_interface.c \ api/vector_data_interface.c api/void_data_interface.c \ $(am__axpy_axpy_SOURCES_DIST) \ $(am__basic_examples_block_SOURCES_DIST) \ basic_examples/dynamic_handles.c basic_examples/hello_world.c \ basic_examples/hooks.c $(am__basic_examples_mult_SOURCES_DIST) \ $(am__basic_examples_multiformat_SOURCES_DIST) \ basic_examples/ndim.c basic_examples/task_insert_color.c \ basic_examples/topology.c \ $(am__basic_examples_variable_SOURCES_DIST) \ $(am__basic_examples_vector_scal_SOURCES_DIST) \ $(am__basic_examples_vector_scal_fortran_SOURCES_DIST) \ $(am__binary_binary_SOURCES_DIST) callback/callback.c \ callback/prologue.c $(am__cg_cg_SOURCES_DIST) \ $(am__cholesky_cholesky_compil_SOURCES_DIST) \ $(am__cholesky_cholesky_grain_tag_SOURCES_DIST) \ $(am__cholesky_cholesky_implicit_SOURCES_DIST) \ $(am__cholesky_cholesky_tag_SOURCES_DIST) \ $(am__cholesky_cholesky_tile_tag_SOURCES_DIST) \ $(cpp_add_vectors_SOURCES) \ $(am__cpp_add_vectors_cpp11_SOURCES_DIST) \ $(cpp_add_vectors_interface_SOURCES) \ $(am__cpp_incrementer_cpp_SOURCES_DIST) \ dependency/sequential_consistency.c dependency/task_end_dep.c \ dependency/task_end_dep_add.c filters/alloc.c \ $(am__filters_custom_mf_custom_mf_filter_SOURCES_DIST) \ $(am__filters_fblock_SOURCES_DIST) \ $(am__filters_fblock_pick_matrix_SOURCES_DIST) \ $(am__filters_fblock_pick_variable_SOURCES_DIST) \ $(am__filters_fmatrix_SOURCES_DIST) \ $(am__filters_fmatrix_pick_variable_SOURCES_DIST) \ $(am__filters_fmatrix_pick_vector_SOURCES_DIST) \ $(am__filters_fmultiple_manual_SOURCES_DIST) \ $(am__filters_fmultiple_submit_SOURCES_DIST) \ $(am__filters_fmultiple_submit_implicit_SOURCES_DIST) \ $(am__filters_fmultiple_submit_readonly_SOURCES_DIST) \ $(am__filters_fmultiple_submit_readonly_downgrade_SOURCES_DIST) \ $(am__filters_fndim_SOURCES_DIST) \ filters/fndim_1d_pick_variable.c \ $(am__filters_fndim_2d_pick_vector_SOURCES_DIST) \ $(am__filters_fndim_3d_pick_matrix_SOURCES_DIST) \ $(am__filters_fndim_4d_pick_block_SOURCES_DIST) \ $(am__filters_fndim_5d_pick_tensor_SOURCES_DIST) \ $(am__filters_fndim_pick_ndim_SOURCES_DIST) \ $(filters_fndim_pick_variable_SOURCES) \ $(am__filters_fndim_to_block_SOURCES_DIST) \ $(am__filters_fndim_to_matrix_SOURCES_DIST) \ $(am__filters_fndim_to_tensor_SOURCES_DIST) \ filters/fndim_to_variable.c \ $(am__filters_fndim_to_vector_SOURCES_DIST) filters/fread.c \ filters/frecursive.c $(am__filters_ftensor_SOURCES_DIST) \ $(am__filters_ftensor_pick_block_SOURCES_DIST) \ $(am__filters_ftensor_pick_variable_SOURCES_DIST) \ $(am__filters_fvector_SOURCES_DIST) \ filters/fvector_pick_variable.c filters/shadow.c \ filters/shadow2d.c filters/shadow3d.c filters/shadow4d.c \ filters/shadownd.c $(am__fortran_hello_SOURCES_DIST) \ $(am__fortran90_f90_example_SOURCES_DIST) \ gl_interop/gl_interop.c gl_interop/gl_interop_idle.c \ $(am__heat_heat_SOURCES_DIST) \ $(am__incrementer_incrementer_SOURCES_DIST) \ $(am__interface_complex_SOURCES_DIST) \ $(am__interface_complex_dev_handle_complex_dev_handle_SOURCES_DIST) \ loader.c $(am__lu_lu_example_complex_double_SOURCES_DIST) \ $(am__lu_lu_example_complex_float_SOURCES_DIST) \ $(am__lu_lu_example_double_SOURCES_DIST) \ $(am__lu_lu_example_float_SOURCES_DIST) \ $(am__lu_lu_implicit_example_complex_double_SOURCES_DIST) \ $(am__lu_lu_implicit_example_complex_float_SOURCES_DIST) \ $(am__lu_lu_implicit_example_double_SOURCES_DIST) \ $(am__lu_lu_implicit_example_float_SOURCES_DIST) \ mandelbrot/mandelbrot.c matvecmult/matvecmult.c mlr/mlr.c \ $(am__mult_dgemm_SOURCES_DIST) \ $(am__mult_dgemm_layout_SOURCES_DIST) \ $(am__mult_sgemm_SOURCES_DIST) \ $(am__mult_sgemm_layout_SOURCES_DIST) \ $(am__native_fortran_nf_dynbuf_SOURCES_DIST) \ $(am__native_fortran_nf_example_SOURCES_DIST) \ $(am__native_fortran_nf_matrix_SOURCES_DIST) \ $(am__native_fortran_nf_partition_SOURCES_DIST) \ $(am__native_fortran_nf_sched_ctx_SOURCES_DIST) \ $(am__native_fortran_nf_varbuf_SOURCES_DIST) \ $(am__native_fortran_nf_vector_SOURCES_DIST) \ openmp/vector_scal_omp.c parallel_workers/parallel_workers.c \ parallel_workers/parallel_workers_func.c \ parallel_workers/parallel_workers_oldapi.c \ perf_monitoring/perf_counters_01.c \ perf_monitoring/perf_counters_02.c \ perf_steering/perf_knobs_01.c perf_steering/perf_knobs_02.c \ perf_steering/perf_knobs_03.c $(am__pi_pi_SOURCES_DIST) \ $(am__pi_pi_redux_SOURCES_DIST) \ $(am__pipeline_pipeline_SOURCES_DIST) \ ppm_downscaler/ppm_downscaler.c \ ppm_downscaler/yuv_downscaler.c profiling/profiling.c \ $(am__reductions_dot_product_SOURCES_DIST) \ reductions/minmax_reduction.c sched_ctx/dummy_sched_with_ctx.c \ $(am__sched_ctx_gpu_partition_SOURCES_DIST) \ sched_ctx/nested_sched_ctxs.c sched_ctx/parallel_code.c \ sched_ctx/parallel_tasks_reuse_handle.c sched_ctx/prio.c \ sched_ctx/sched_ctx.c sched_ctx/sched_ctx_delete.c \ sched_ctx/sched_ctx_empty.c sched_ctx/sched_ctx_remove.c \ sched_ctx/sched_ctx_without_sched_policy.c \ sched_ctx/sched_ctx_without_sched_policy_awake.c \ sched_ctx/two_cpu_contexts.c scheduler/dummy_modular_sched.c \ scheduler/dummy_sched.c scheduler/heteroprio_test.c \ $(spmd_vector_scal_spmd_SOURCES) $(spmv_dw_block_spmv_SOURCES) \ $(am__spmv_spmv_SOURCES_DIST) $(subgraphs_manual_SOURCES) \ $(subgraphs_partition_SOURCES) $(subgraphs_plan_SOURCES) \ tag_example/tag_example.c tag_example/tag_example2.c \ tag_example/tag_example3.c tag_example/tag_example4.c \ tag_example/tag_restartable.c transactions/trs_inc.c \ $(am__transactions_trs_sgemm_SOURCES_DIST) \ worker_collections/worker_list_example.c \ worker_collections/worker_tree_example.c RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac DATA = $(nobase_STARPU_OPENCL_DATA_DATA) HEADERS = $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ check recheck distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) $(STARPU_OPENCL_LDFLAGS) \ $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) $(MAGMA_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(am__append_4) $(am__append_6) LAUNCHER = $(am__append_3) $(am__append_5) AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(MAGMA_CFLAGS) $(APP_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(MAGMA_CFLAGS) $(APP_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(MAGMA_CFLAGS) $(APP_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2016-2016 Uppsala University # Copyright (C) 2011-2011 Télécom Sud Paris # Copyright (C) 2017-2017 Erwan Leria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # SUFFIXES = .hip AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ SUBDIRS = stencil BUILT_SOURCES = ###################### # matVecMult example # ###################### @STARPU_USE_OPENCL_TRUE@nobase_STARPU_OPENCL_DATA_DATA = basic_examples/vector_scal_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ basic_examples/block_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ basic_examples/variable_kernels_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ filters/fblock_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ filters/custom_mf/conversion_opencl.cl \ @STARPU_USE_OPENCL_TRUE@ filters/custom_mf/custom_opencl.cl \ @STARPU_USE_OPENCL_TRUE@ $(am__append_96) \ @STARPU_USE_OPENCL_TRUE@ incrementer/incrementer_kernels_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ interface/complex_kernels.cl \ @STARPU_USE_OPENCL_TRUE@ interface/complex_dev_handle/complex_dev_handle_kernels.cl \ @STARPU_USE_OPENCL_TRUE@ matvecmult/matvecmult_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ reductions/dot_product_opencl_kernels.cl EXTRA_DIST = \ README.txt \ axpy/axpy.h \ axpy/axpy_opencl_kernel.cl \ basic_examples/vector_scal_opencl_kernel.cl \ basic_examples/multiformat_types.h \ basic_examples/multiformat_opencl_kernel.cl \ basic_examples/multiformat_conversion_codelets_opencl_kernel.cl \ common/blas_model.c \ spmd/vector_scal_spmd.c \ spmv/spmv_cuda.cu \ spmv/spmv_opencl.cl \ spmv/matrix_market/examples/fidapm05.mtx \ mult/xgemm.c \ mult/xgemm_layout.c \ mult/xgemm.h \ mult/sgemm.sh \ lu/xlu.c \ lu/xlu_pivot.c \ lu/xlu_implicit.c \ lu/xlu_implicit_pivot.c \ lu/xlu_kernels.c \ lu/lu_example.c \ incrementer/incrementer_kernels_opencl_kernel.cl \ basic_examples/variable_kernels_opencl_kernel.cl \ matvecmult/matvecmult_kernel.cl \ basic_examples/block_opencl_kernel.cl \ filters/fblock_opencl_kernel.cl \ filters/custom_mf/conversion_opencl.cl \ filters/custom_mf/custom_opencl.cl \ filters/custom_mf/custom_types.h \ interface/complex_kernels.cl \ interface/complex_dev_handle/complex_dev_handle_kernels.cl \ reductions/dot_product.h \ reductions/dot_product_opencl_kernels.cl \ scheduler/libdummy_sched.sh \ scheduler/schedulers.sh \ scheduler/schedulers_context.sh \ fortran/Makefile \ sched_ctx/axpy_partition_gpu.h \ sched_ctx/axpy_partition_gpu.cu \ heat/heat.sh \ cholesky/libmy_dmda.h \ cholesky/cholesky.sh \ cholesky/cholesky_julia.sh \ cholesky/cholesky_compiled.c \ lu/lu.sh \ subgraphs/main.h \ native_fortran/Makefile_nf_dynbuf.mk \ native_fortran/Makefile_nf_example.mk \ native_fortran/Makefile_nf_matrix.mk \ native_fortran/Makefile_nf_partition.mk \ native_fortran/Makefile_nf_sched_ctx.mk \ native_fortran/Makefile_nf_varbuf.mk \ native_fortran/Makefile_nf_vector.mk \ cpp/Makefile_add_vectors_cpp11.mk \ cpp/Makefile_add_vectors.mk \ fortran90/Makefile.mk \ profiling_tool/prof.sh CLEANFILES = *.gcno *.gcda *.linkinfo *.mod starpu_idle_microsec.log *.mps */*.mps */*/*.mps *.dot */*.dot */*/*.dot *.pl */*.pl */*/*.pl *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 native_fortran/fstarpu_mod.f90 *.csv *.md *.Rmd *.pdf *.html pkglib_LTLIBRARIES = $(am__append_11) $(am__append_101) examplebindir = $(libdir)/starpu/examples/ noinst_HEADERS = \ axpy/axpy.h \ cg/cg.h \ cg/cg_kernels.c \ heat/lu_kernels_model.h \ heat/dw_sparse_cg.h \ heat/heat.h \ heat/dw_factolu.h \ lu/xlu.h \ lu/xlu_kernels.h \ lu/lu-float.h \ lu/lu-double.h \ lu/complex_float.h \ lu/complex_double.h \ lu/blas_complex.h \ cholesky/cholesky.h \ sched_ctx_utils/sched_ctx_utils.h \ common/blas_model.h \ common/blas.h \ mult/simple.h \ mult/double.h \ fortran/starpu_fortran.h \ ppm_downscaler/ppm_downscaler.h \ ppm_downscaler/yuv_downscaler.h \ spmv/matrix_market/mmio.h \ spmv/matrix_market/mm_to_bcsr.h \ spmv/spmv.h \ spmv/dw_block_spmv.h \ basic_examples/multiformat_types.h \ filters/custom_mf/custom_interface.h \ filters/custom_mf/custom_types.h \ interface/complex_interface.h \ interface/complex_codelet.h \ interface/complex_dev_handle/complex_dev_handle_interface.h \ interface/complex_dev_handle/complex_dev_handle_codelet.h \ pi/pi.h \ pi/SobolQRNG/sobol.h \ pi/SobolQRNG/sobol_gold.h \ pi/SobolQRNG/sobol_gpu.h \ pi/SobolQRNG/sobol_primitives.h \ reductions/dot_product.h \ basic_examples/vector_scal_cpu_template.h \ sched_ctx/axpy_partition_gpu.h SHELL_TESTS = scheduler/schedulers.sh scheduler/schedulers_context.sh \ $(am__append_8) $(am__append_10) $(am__append_22) \ $(am__append_23) $(am__append_102) # STARPU_EXAMPLES list all applications which have to be compiled and checked # Applications which should only be compiled are added directly in examplebin_PROGRAMS # see for instance mandelbrot/mandelbrot STARPU_EXAMPLES = sched_ctx/prio scheduler/dummy_sched \ scheduler/dummy_modular_sched \ worker_collections/worker_list_example api/bcsr_data_interface \ api/block_data_interface api/coo_data_interface \ api/csr_data_interface api/matrix_data_interface \ api/multiformat_data_interface api/tensor_data_interface \ api/variable_data_interface api/vector_data_interface \ api/void_data_interface $(am__append_9) $(am__append_12) \ $(am__append_13) $(am__append_14) $(am__append_15) \ $(am__append_16) $(am__append_18) $(am__append_19) \ $(am__append_21) $(am__append_24) $(am__append_25) \ $(am__append_26) $(am__append_27) $(am__append_28) \ $(am__append_29) $(am__append_30) @STARPU_SIMGRID_FALSE@profiling_tool_libprofiling_tool_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version @STARPU_SIMGRID_FALSE@scheduler_libdummy_sched_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@parallel_workers_parallel_workers_CFLAGS = \ @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ $(AM_CFLAGS) -fopenmp @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@parallel_workers_parallel_workers_func_CFLAGS = \ @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ $(AM_CFLAGS) -fopenmp @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@parallel_workers_parallel_workers_oldapi_CFLAGS = \ @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@@STARPU_SIMGRID_FALSE@ $(AM_CFLAGS) -fopenmp @STARPU_USE_CUDA_TRUE@sched_ctx_gpu_partition_SOURCES = \ @STARPU_USE_CUDA_TRUE@ sched_ctx/gpu_partition.c \ @STARPU_USE_CUDA_TRUE@ sched_ctx/axpy_partition_gpu.cu ################## # Basic examples # ################## basic_examples_vector_scal_SOURCES = basic_examples/vector_scal.c \ basic_examples/vector_scal_cpu.c $(am__append_31) \ $(am__append_32) $(am__append_34) $(am__append_36) basic_examples_mult_SOURCES = basic_examples/mult.c $(am__append_33) \ $(am__append_35) @STARPU_CROSS_COMPILING_FALSE@@STARPU_HAVE_ICC_TRUE@basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(ICC) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ @STARPU_CROSS_COMPILING_TRUE@@STARPU_HAVE_ICC_TRUE@basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ @STARPU_HAVE_ICC_FALSE@basic_examples_vector_scal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) $(basic_examples_vector_scal_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ @STARPU_HAVE_F77_TRUE@basic_examples_vector_scal_fortran_SOURCES = \ @STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_fortran.F \ @STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_c.c \ @STARPU_HAVE_F77_TRUE@ basic_examples/vector_scal_cpu.c \ @STARPU_HAVE_F77_TRUE@ $(am__append_37) @STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@basic_examples_vector_scal_fortran_LDADD = \ @STARPU_HAVE_F77_TRUE@@STARPU_USE_CUDA_TRUE@ $(STARPU_CUDA_FORTRAN_LDFLAGS) @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@fortran_hello_SOURCES = \ @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@ fortran/hello_c.c \ @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@ fortran/hello.F \ @STARPU_HAVE_F77_H_TRUE@@STARPU_HAVE_F77_TRUE@ fortran/starpu_fortran.h @STARPU_HAVE_FC_TRUE@fortran90_f90_example_SOURCES = \ @STARPU_HAVE_FC_TRUE@ fortran90/mod_types.f90 \ @STARPU_HAVE_FC_TRUE@ fortran90/starpu_mod.f90 \ @STARPU_HAVE_FC_TRUE@ fortran90/mod_interface.f90 \ @STARPU_HAVE_FC_TRUE@ fortran90/mod_compute.f90 \ @STARPU_HAVE_FC_TRUE@ fortran90/marshalling.c \ @STARPU_HAVE_FC_TRUE@ fortran90/f90_example.f90 @STARPU_HAVE_FC_TRUE@native_fortran_nf_vector_SOURCES = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_codelets.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_vector.f90 @STARPU_HAVE_FC_TRUE@native_fortran_nf_matrix_SOURCES = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_codelets.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_matrix.f90 @STARPU_HAVE_FC_TRUE@native_fortran_nf_example_SOURCES = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_types.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_compute.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_example.f90 @STARPU_HAVE_FC_TRUE@native_fortran_nf_dynbuf_SOURCES = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_dynbuf_cl.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_dynbuf.f90 @STARPU_HAVE_FC_TRUE@native_fortran_nf_varbuf_SOURCES = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_varbuf_cl.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_varbuf.f90 @STARPU_HAVE_FC_TRUE@native_fortran_nf_sched_ctx_SOURCES = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_sched_ctx_cl.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_sched_ctx.f90 @STARPU_HAVE_FC_TRUE@native_fortran_nf_partition_SOURCES = \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_partition_cl.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_FC_TRUE@ native_fortran/nf_partition.f90 ####################### # Multiformat example # ####################### basic_examples_multiformat_SOURCES = basic_examples/multiformat.c \ basic_examples/multiformat_conversion_codelets.c \ $(am__append_38) $(am__append_39) ################# # block example # ################# basic_examples_block_SOURCES = basic_examples/block.c \ basic_examples/block_cpu.c $(am__append_40) $(am__append_41) \ $(am__append_42) #################### # Variable example # #################### basic_examples_variable_SOURCES = basic_examples/variable.c \ basic_examples/variable_kernels_cpu.c $(am__append_43) \ $(am__append_44) ########### # Filters # ########### filters_fvector_SOURCES = filters/fvector.c filters/fvector_cpu.c \ $(am__append_45) $(am__append_46) filters_fmatrix_SOURCES = filters/fmatrix.c filters/fmatrix_print.c \ filters/fmatrix_cpu.c $(am__append_47) $(am__append_48) filters_fblock_SOURCES = filters/fblock.c filters/fblock_print.c \ filters/fblock_cpu.c $(am__append_49) $(am__append_50) \ $(am__append_51) filters_ftensor_SOURCES = filters/ftensor.c filters/ftensor_print.c \ filters/ftensor_cpu.c $(am__append_52) $(am__append_53) filters_fndim_SOURCES = filters/fndim.c filters/ftensor_print.c \ filters/f4d_cpu.c $(am__append_54) $(am__append_55) filters_fmatrix_pick_vector_SOURCES = filters/fmatrix_pick_vector.c \ filters/fmatrix_print.c filters/fvector_cpu.c $(am__append_56) \ $(am__append_57) filters_fmatrix_pick_variable_SOURCES = \ filters/fmatrix_pick_variable.c filters/fmatrix_print.c \ $(am__append_58) filters_fblock_pick_matrix_SOURCES = filters/fblock_pick_matrix.c \ filters/fblock_print.c filters/fmatrix_print.c \ filters/fmatrix_cpu.c $(am__append_59) $(am__append_60) filters_fblock_pick_variable_SOURCES = filters/fblock_pick_variable.c \ filters/fblock_print.c $(am__append_61) filters_ftensor_pick_block_SOURCES = filters/ftensor_pick_block.c \ filters/ftensor_print.c filters/fblock_print.c \ filters/fblock_cpu.c $(am__append_62) $(am__append_63) filters_ftensor_pick_variable_SOURCES = \ filters/ftensor_pick_variable.c filters/ftensor_print.c \ $(am__append_64) filters_fndim_pick_ndim_SOURCES = filters/fndim_pick_ndim.c \ filters/ftensor_print.c filters/fblock_print.c \ filters/f3d_cpu.c $(am__append_65) $(am__append_66) filters_fndim_5d_pick_tensor_SOURCES = filters/fndim_5d_pick_tensor.c \ filters/f5d_print.c filters/ftensor_print.c \ filters/ftensor_cpu.c $(am__append_67) $(am__append_68) filters_fndim_4d_pick_block_SOURCES = filters/fndim_4d_pick_block.c \ filters/ftensor_print.c filters/fblock_print.c \ filters/fblock_cpu.c $(am__append_69) $(am__append_70) filters_fndim_3d_pick_matrix_SOURCES = filters/fndim_3d_pick_matrix.c \ filters/fblock_print.c filters/fmatrix_print.c \ filters/fmatrix_cpu.c $(am__append_71) $(am__append_72) filters_fndim_2d_pick_vector_SOURCES = filters/fndim_2d_pick_vector.c \ filters/fmatrix_print.c filters/fvector_cpu.c $(am__append_73) \ $(am__append_74) filters_fndim_pick_variable_SOURCES = \ filters/fndim_pick_variable.c \ filters/f5d_print.c filters_fndim_to_tensor_SOURCES = filters/fndim_to_tensor.c \ filters/ftensor_print.c filters/ftensor_cpu.c $(am__append_75) \ $(am__append_76) filters_fndim_to_block_SOURCES = filters/fndim_to_block.c \ filters/fblock_print.c filters/fblock_cpu.c $(am__append_77) \ $(am__append_78) filters_fndim_to_matrix_SOURCES = filters/fndim_to_matrix.c \ filters/fmatrix_print.c filters/fmatrix_cpu.c $(am__append_79) \ $(am__append_80) filters_fndim_to_vector_SOURCES = filters/fndim_to_vector.c \ filters/fvector_cpu.c $(am__append_81) $(am__append_82) filters_fmultiple_manual_SOURCES = filters/fmultiple_manual.c \ $(am__append_83) $(am__append_84) filters_fmultiple_submit_SOURCES = filters/fmultiple_submit.c \ $(am__append_85) $(am__append_86) filters_fmultiple_submit_readonly_SOURCES = \ filters/fmultiple_submit_readonly.c $(am__append_87) \ $(am__append_89) filters_fmultiple_submit_readonly_downgrade_SOURCES = \ filters/fmultiple_submit_readonly_downgrade.c $(am__append_88) \ $(am__append_90) filters_fmultiple_submit_implicit_SOURCES = \ filters/fmultiple_submit_implicit.c $(am__append_91) \ $(am__append_92) filters_custom_mf_custom_mf_filter_SOURCES = \ filters/custom_mf/custom_mf_filter.c \ filters/custom_mf/custom_interface.c \ filters/custom_mf/custom_conversion_codelets.c \ $(am__append_93) $(am__append_94) ################ # AXPY example # ################ @STARPU_NO_BLAS_LIB_FALSE@axpy_axpy_SOURCES = axpy/axpy.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c $(am__append_95) @STARPU_NO_BLAS_LIB_FALSE@axpy_axpy_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) ################ # Mult example # ################ mult_sgemm_SOURCES = mult/sgemm.c $(am__append_97) mult_sgemm_LDADD = \ $(STARPU_BLAS_LDFLAGS) mult_sgemm_layout_SOURCES = mult/sgemm_layout.c $(am__append_99) mult_sgemm_layout_LDADD = \ $(STARPU_BLAS_LDFLAGS) mult_dgemm_SOURCES = mult/dgemm.c $(am__append_98) mult_dgemm_LDADD = \ $(STARPU_BLAS_LDFLAGS) mult_dgemm_layout_SOURCES = mult/dgemm_layout.c $(am__append_100) mult_dgemm_layout_LDADD = \ $(STARPU_BLAS_LDFLAGS) ##################### # Trs_sgemm example # ##################### @STARPU_NO_BLAS_LIB_FALSE@transactions_trs_sgemm_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ transactions/trs_sgemm.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@transactions_trs_sgemm_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_libmy_dmda_la_LDFLAGS = $(ldflags) -no-undefined -module -avoid-version @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tag_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tag.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tag_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tile_tag_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_tile_tag.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_tile_tag_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_grain_tag_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_grain_tag.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_grain_tag_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_compil_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_compil.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_compil_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) ############## # LU example # ############## @STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_float_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_pivot.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_float_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_double_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_pivot.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_double_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_float_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_float.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_implicit.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_implicit_pivot.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/slu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_float_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_double_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_double.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_implicit.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_implicit_pivot.c \ @STARPU_NO_BLAS_LIB_FALSE@ lu/dlu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_double_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_float_SOURCES = \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_complex_float.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_pivot.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_kernels.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_float_LDADD = \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_float_SOURCES = \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_complex_float.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_implicit.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_implicit_pivot.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/clu_kernels.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_float_LDADD = \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_double_SOURCES = \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_complex_double.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_pivot.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_kernels.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_example_complex_double_LDADD = \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_double_SOURCES = \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/lu_example_complex_double.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_implicit.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_implicit_pivot.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/zlu_kernels.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ lu/blas_complex.c \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@lu_lu_implicit_example_complex_double_LDADD = \ @STARPU_MKL_BLAS_LIB_TRUE@@STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) ################ # Heat example # ################ @STARPU_NO_BLAS_LIB_FALSE@heat_heat_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ heat/heat.c \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu.c \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_tag.c \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_grain.c \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_sparse_cg.c \ @STARPU_NO_BLAS_LIB_FALSE@ heat/heat_display.c \ @STARPU_NO_BLAS_LIB_FALSE@ heat/lu_kernels_model.c \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_sparse_cg_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ heat/dw_factolu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@heat_heat_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_OPENGL_RENDER_LDFLAGS) \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) ############## # CG example # ############## @STARPU_NO_BLAS_LIB_FALSE@cg_cg_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ cg/cg.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@cg_cg_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) ################ # SPMD example # ################ spmd_vector_scal_spmd_SOURCES = \ spmd/vector_scal_spmd.c ################ # SpMV example # ################ spmv_spmv_SOURCES = spmv/spmv.c spmv/spmv_kernels.c $(am__append_103) spmv_dw_block_spmv_SOURCES = \ spmv/dw_block_spmv.c \ spmv/dw_block_spmv_kernels.c \ spmv/matrix_market/mm_to_bcsr.c \ spmv/matrix_market/mmio.c spmv_dw_block_spmv_LDADD = \ $(STARPU_BLAS_LDFLAGS) ########################### # C++ Incrementer example # ########################### cpp_incrementer_cpp_SOURCES = cpp/incrementer_cpp.cpp \ $(am__append_104) $(am__append_105) ########################### # C++ Add vectors example # ########################### cpp_add_vectors_SOURCES = \ cpp/add_vectors.cpp cpp_add_vectors_interface_SOURCES = \ cpp/add_vectors_interface.cpp @STARPU_HAVE_CXX11_TRUE@cpp_add_vectors_cpp11_SOURCES = \ @STARPU_HAVE_CXX11_TRUE@ cpp/add_vectors_cpp11.cpp ####################### # Incrementer example # ####################### incrementer_incrementer_SOURCES = incrementer/incrementer.c \ $(am__append_106) $(am__append_107) ################## # Binary example # ################## binary_binary_SOURCES = binary/binary.c $(am__append_108) ##################### # interface example # ##################### interface_complex_SOURCES = interface/complex.c \ interface/complex_interface.c interface/complex_filters.c \ $(am__append_109) $(am__append_110) interface_complex_dev_handle_complex_dev_handle_SOURCES = \ interface/complex_dev_handle/complex_dev_handle.c \ interface/complex_dev_handle/complex_dev_handle_interface.c \ interface/complex_dev_handle/complex_dev_handle_filters.c \ $(am__append_111) $(am__append_112) ####################### # dot_product example # ####################### reductions_dot_product_SOURCES = reductions/dot_product.c \ $(am__append_113) mandelbrot_mandelbrot_CPPFLAGS = $(AM_CPPFLAGS) $(am__append_114) @STARPU_HAVE_X11_TRUE@mandelbrot_mandelbrot_LDADD = $(X_PRE_LIBS) $(X_LIBS) -lX11 $(X_EXTRA_LIBS) @STARPU_HAVE_WINDOWS_FALSE@pi_pi_SOURCES = pi/pi.c \ @STARPU_HAVE_WINDOWS_FALSE@ pi/SobolQRNG/sobol_gold.c \ @STARPU_HAVE_WINDOWS_FALSE@ pi/SobolQRNG/sobol_primitives.c \ @STARPU_HAVE_WINDOWS_FALSE@ $(am__append_116) @STARPU_HAVE_WINDOWS_FALSE@pi_pi_redux_SOURCES = pi/pi_redux.c \ @STARPU_HAVE_WINDOWS_FALSE@ $(am__append_117) @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@pi_pi_redux_LDADD = \ @STARPU_HAVE_WINDOWS_FALSE@@STARPU_USE_CUDA_TRUE@ $(STARPU_CURAND_LDFLAGS) @STARPU_HAVE_OPENGL_TRUE@gl_interop_gl_interop_LDADD = \ @STARPU_HAVE_OPENGL_TRUE@ $(STARPU_OPENGL_RENDER_LDFLAGS) @STARPU_HAVE_OPENGL_TRUE@gl_interop_gl_interop_idle_LDADD = \ @STARPU_HAVE_OPENGL_TRUE@ $(STARPU_OPENGL_RENDER_LDFLAGS) #################### # pipeline example # #################### @STARPU_NO_BLAS_LIB_FALSE@pipeline_pipeline_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ pipeline/pipeline.c \ @STARPU_NO_BLAS_LIB_FALSE@ common/blas.c @STARPU_NO_BLAS_LIB_FALSE@pipeline_pipeline_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) ################## # openmp example # ################## @STARPU_HAVE_OPENMP_TRUE@openmp_vector_scal_omp_CFLAGS = \ @STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp @STARPU_HAVE_OPENMP_TRUE@sched_ctx_parallel_code_CFLAGS = \ @STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp @STARPU_HAVE_OPENMP_TRUE@sched_ctx_sched_ctx_without_sched_policy_CFLAGS = \ @STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp @STARPU_HAVE_OPENMP_TRUE@sched_ctx_nested_sched_ctxs_CFLAGS = \ @STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp @STARPU_HAVE_OPENMP_TRUE@sched_ctx_parallel_tasks_reuse_handle_CFLAGS = \ @STARPU_HAVE_OPENMP_TRUE@ $(AM_CFLAGS) -fopenmp ###################### # subgraphs examples # ###################### subgraphs_manual_SOURCES = \ subgraphs/manual.c \ subgraphs/codelets.c subgraphs_partition_SOURCES = \ subgraphs/partition.c \ subgraphs/codelets.c subgraphs_plan_SOURCES = \ subgraphs/plan.c \ subgraphs/codelets.c all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-recursive .SUFFIXES: .SUFFIXES: .hip .F .c .cpp .cu .cubin .f .f90 .icc .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign examples/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign examples/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(MKDIR_P) '$(DESTDIR)$(pkglibdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" || exit 1; \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(pkglibdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(pkglibdir)"; \ } uninstall-pkglibLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(pkglib_LTLIBRARIES)'; test -n "$(pkglibdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$f"; \ done clean-pkglibLTLIBRARIES: -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) @list='$(pkglib_LTLIBRARIES)'; \ locs=`for p in $$list; do echo $$p; done | \ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ sort -u`; \ test -z "$$locs" || { \ echo rm -f $${locs}; \ rm -f $${locs}; \ } cholesky/$(am__dirstamp): @$(MKDIR_P) cholesky @: > cholesky/$(am__dirstamp) cholesky/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) cholesky/$(DEPDIR) @: > cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/libmy_dmda.lo: cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/libmy_dmda.la: $(cholesky_libmy_dmda_la_OBJECTS) $(cholesky_libmy_dmda_la_DEPENDENCIES) $(EXTRA_cholesky_libmy_dmda_la_DEPENDENCIES) cholesky/$(am__dirstamp) $(AM_V_CCLD)$(cholesky_libmy_dmda_la_LINK) $(am_cholesky_libmy_dmda_la_rpath) $(cholesky_libmy_dmda_la_OBJECTS) $(cholesky_libmy_dmda_la_LIBADD) $(LIBS) profiling_tool/$(am__dirstamp): @$(MKDIR_P) profiling_tool @: > profiling_tool/$(am__dirstamp) profiling_tool/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) profiling_tool/$(DEPDIR) @: > profiling_tool/$(DEPDIR)/$(am__dirstamp) profiling_tool/libprofiling_tool.lo: profiling_tool/$(am__dirstamp) \ profiling_tool/$(DEPDIR)/$(am__dirstamp) profiling_tool/libprofiling_tool.la: $(profiling_tool_libprofiling_tool_la_OBJECTS) $(profiling_tool_libprofiling_tool_la_DEPENDENCIES) $(EXTRA_profiling_tool_libprofiling_tool_la_DEPENDENCIES) profiling_tool/$(am__dirstamp) $(AM_V_CCLD)$(profiling_tool_libprofiling_tool_la_LINK) $(am_profiling_tool_libprofiling_tool_la_rpath) $(profiling_tool_libprofiling_tool_la_OBJECTS) $(profiling_tool_libprofiling_tool_la_LIBADD) $(LIBS) scheduler/$(am__dirstamp): @$(MKDIR_P) scheduler @: > scheduler/$(am__dirstamp) scheduler/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) scheduler/$(DEPDIR) @: > scheduler/$(DEPDIR)/$(am__dirstamp) scheduler/libdummy_sched.lo: scheduler/$(am__dirstamp) \ scheduler/$(DEPDIR)/$(am__dirstamp) scheduler/libdummy_sched.la: $(scheduler_libdummy_sched_la_OBJECTS) $(scheduler_libdummy_sched_la_DEPENDENCIES) $(EXTRA_scheduler_libdummy_sched_la_DEPENDENCIES) scheduler/$(am__dirstamp) $(AM_V_CCLD)$(scheduler_libdummy_sched_la_LINK) $(am_scheduler_libdummy_sched_la_rpath) $(scheduler_libdummy_sched_la_OBJECTS) $(scheduler_libdummy_sched_la_LIBADD) $(LIBS) api/$(am__dirstamp): @$(MKDIR_P) api @: > api/$(am__dirstamp) api/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) api/$(DEPDIR) @: > api/$(DEPDIR)/$(am__dirstamp) api/bcsr_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/bcsr_data_interface$(EXEEXT): $(api_bcsr_data_interface_OBJECTS) $(api_bcsr_data_interface_DEPENDENCIES) $(EXTRA_api_bcsr_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/bcsr_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_bcsr_data_interface_OBJECTS) $(api_bcsr_data_interface_LDADD) $(LIBS) api/block_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/block_data_interface$(EXEEXT): $(api_block_data_interface_OBJECTS) $(api_block_data_interface_DEPENDENCIES) $(EXTRA_api_block_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/block_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_block_data_interface_OBJECTS) $(api_block_data_interface_LDADD) $(LIBS) api/coo_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/coo_data_interface$(EXEEXT): $(api_coo_data_interface_OBJECTS) $(api_coo_data_interface_DEPENDENCIES) $(EXTRA_api_coo_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/coo_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_coo_data_interface_OBJECTS) $(api_coo_data_interface_LDADD) $(LIBS) api/csr_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/csr_data_interface$(EXEEXT): $(api_csr_data_interface_OBJECTS) $(api_csr_data_interface_DEPENDENCIES) $(EXTRA_api_csr_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/csr_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_csr_data_interface_OBJECTS) $(api_csr_data_interface_LDADD) $(LIBS) api/matrix_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/matrix_data_interface$(EXEEXT): $(api_matrix_data_interface_OBJECTS) $(api_matrix_data_interface_DEPENDENCIES) $(EXTRA_api_matrix_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/matrix_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_matrix_data_interface_OBJECTS) $(api_matrix_data_interface_LDADD) $(LIBS) api/multiformat_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/multiformat_data_interface$(EXEEXT): $(api_multiformat_data_interface_OBJECTS) $(api_multiformat_data_interface_DEPENDENCIES) $(EXTRA_api_multiformat_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/multiformat_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_multiformat_data_interface_OBJECTS) $(api_multiformat_data_interface_LDADD) $(LIBS) api/tensor_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/tensor_data_interface$(EXEEXT): $(api_tensor_data_interface_OBJECTS) $(api_tensor_data_interface_DEPENDENCIES) $(EXTRA_api_tensor_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/tensor_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_tensor_data_interface_OBJECTS) $(api_tensor_data_interface_LDADD) $(LIBS) api/variable_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/variable_data_interface$(EXEEXT): $(api_variable_data_interface_OBJECTS) $(api_variable_data_interface_DEPENDENCIES) $(EXTRA_api_variable_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/variable_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_variable_data_interface_OBJECTS) $(api_variable_data_interface_LDADD) $(LIBS) api/vector_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/vector_data_interface$(EXEEXT): $(api_vector_data_interface_OBJECTS) $(api_vector_data_interface_DEPENDENCIES) $(EXTRA_api_vector_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/vector_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_vector_data_interface_OBJECTS) $(api_vector_data_interface_LDADD) $(LIBS) api/void_data_interface.$(OBJEXT): api/$(am__dirstamp) \ api/$(DEPDIR)/$(am__dirstamp) api/void_data_interface$(EXEEXT): $(api_void_data_interface_OBJECTS) $(api_void_data_interface_DEPENDENCIES) $(EXTRA_api_void_data_interface_DEPENDENCIES) api/$(am__dirstamp) @rm -f api/void_data_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(api_void_data_interface_OBJECTS) $(api_void_data_interface_LDADD) $(LIBS) axpy/$(am__dirstamp): @$(MKDIR_P) axpy @: > axpy/$(am__dirstamp) axpy/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) axpy/$(DEPDIR) @: > axpy/$(DEPDIR)/$(am__dirstamp) axpy/axpy.$(OBJEXT): axpy/$(am__dirstamp) \ axpy/$(DEPDIR)/$(am__dirstamp) common/$(am__dirstamp): @$(MKDIR_P) common @: > common/$(am__dirstamp) common/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) common/$(DEPDIR) @: > common/$(DEPDIR)/$(am__dirstamp) common/blas.$(OBJEXT): common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) axpy/axpy_opencl.$(OBJEXT): axpy/$(am__dirstamp) \ axpy/$(DEPDIR)/$(am__dirstamp) axpy/axpy$(EXEEXT): $(axpy_axpy_OBJECTS) $(axpy_axpy_DEPENDENCIES) $(EXTRA_axpy_axpy_DEPENDENCIES) axpy/$(am__dirstamp) @rm -f axpy/axpy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(axpy_axpy_OBJECTS) $(axpy_axpy_LDADD) $(LIBS) basic_examples/$(am__dirstamp): @$(MKDIR_P) basic_examples @: > basic_examples/$(am__dirstamp) basic_examples/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) basic_examples/$(DEPDIR) @: > basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/block.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/block_cpu.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/block_cuda.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/block_hip.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/block_opencl.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/block$(EXEEXT): $(basic_examples_block_OBJECTS) $(basic_examples_block_DEPENDENCIES) $(EXTRA_basic_examples_block_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/block$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_block_OBJECTS) $(basic_examples_block_LDADD) $(LIBS) basic_examples/dynamic_handles.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/dynamic_handles$(EXEEXT): $(basic_examples_dynamic_handles_OBJECTS) $(basic_examples_dynamic_handles_DEPENDENCIES) $(EXTRA_basic_examples_dynamic_handles_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/dynamic_handles$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_dynamic_handles_OBJECTS) $(basic_examples_dynamic_handles_LDADD) $(LIBS) basic_examples/hello_world.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/hello_world$(EXEEXT): $(basic_examples_hello_world_OBJECTS) $(basic_examples_hello_world_DEPENDENCIES) $(EXTRA_basic_examples_hello_world_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/hello_world$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_hello_world_OBJECTS) $(basic_examples_hello_world_LDADD) $(LIBS) basic_examples/hooks.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/hooks$(EXEEXT): $(basic_examples_hooks_OBJECTS) $(basic_examples_hooks_DEPENDENCIES) $(EXTRA_basic_examples_hooks_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/hooks$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_hooks_OBJECTS) $(basic_examples_hooks_LDADD) $(LIBS) basic_examples/mult.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/mult_cuda.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/mult_hip.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/mult$(EXEEXT): $(basic_examples_mult_OBJECTS) $(basic_examples_mult_DEPENDENCIES) $(EXTRA_basic_examples_mult_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/mult$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_mult_OBJECTS) $(basic_examples_mult_LDADD) $(LIBS) basic_examples/multiformat.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/multiformat_conversion_codelets.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/multiformat_cuda.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/multiformat_conversion_codelets_cuda.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/multiformat_opencl.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/multiformat_conversion_codelets_opencl.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/multiformat$(EXEEXT): $(basic_examples_multiformat_OBJECTS) $(basic_examples_multiformat_DEPENDENCIES) $(EXTRA_basic_examples_multiformat_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/multiformat$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_multiformat_OBJECTS) $(basic_examples_multiformat_LDADD) $(LIBS) basic_examples/ndim.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/ndim$(EXEEXT): $(basic_examples_ndim_OBJECTS) $(basic_examples_ndim_DEPENDENCIES) $(EXTRA_basic_examples_ndim_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/ndim$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_ndim_OBJECTS) $(basic_examples_ndim_LDADD) $(LIBS) basic_examples/task_insert_color.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/task_insert_color$(EXEEXT): $(basic_examples_task_insert_color_OBJECTS) $(basic_examples_task_insert_color_DEPENDENCIES) $(EXTRA_basic_examples_task_insert_color_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/task_insert_color$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_task_insert_color_OBJECTS) $(basic_examples_task_insert_color_LDADD) $(LIBS) basic_examples/topology.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/topology$(EXEEXT): $(basic_examples_topology_OBJECTS) $(basic_examples_topology_DEPENDENCIES) $(EXTRA_basic_examples_topology_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/topology$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_topology_OBJECTS) $(basic_examples_topology_LDADD) $(LIBS) basic_examples/variable.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/variable_kernels_cpu.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/variable_kernels.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/variable_kernels_opencl.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/variable$(EXEEXT): $(basic_examples_variable_OBJECTS) $(basic_examples_variable_DEPENDENCIES) $(EXTRA_basic_examples_variable_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/variable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_examples_variable_OBJECTS) $(basic_examples_variable_LDADD) $(LIBS) basic_examples/vector_scal.$(OBJEXT): basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/vector_scal_cpu.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/vector_scal_cpu_icc.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/vector_scal_cuda.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/vector_scal_hip.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/vector_scal_opencl.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/vector_scal$(EXEEXT): $(basic_examples_vector_scal_OBJECTS) $(basic_examples_vector_scal_DEPENDENCIES) $(EXTRA_basic_examples_vector_scal_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/vector_scal$(EXEEXT) $(AM_V_GEN)$(basic_examples_vector_scal_LINK) $(basic_examples_vector_scal_OBJECTS) $(basic_examples_vector_scal_LDADD) $(LIBS) basic_examples/vector_scal_fortran.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/vector_scal_c.$(OBJEXT): \ basic_examples/$(am__dirstamp) \ basic_examples/$(DEPDIR)/$(am__dirstamp) basic_examples/vector_scal_fortran$(EXEEXT): $(basic_examples_vector_scal_fortran_OBJECTS) $(basic_examples_vector_scal_fortran_DEPENDENCIES) $(EXTRA_basic_examples_vector_scal_fortran_DEPENDENCIES) basic_examples/$(am__dirstamp) @rm -f basic_examples/vector_scal_fortran$(EXEEXT) $(AM_V_F77LD)$(F77LINK) $(basic_examples_vector_scal_fortran_OBJECTS) $(basic_examples_vector_scal_fortran_LDADD) $(LIBS) binary/$(am__dirstamp): @$(MKDIR_P) binary @: > binary/$(am__dirstamp) binary/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) binary/$(DEPDIR) @: > binary/$(DEPDIR)/$(am__dirstamp) binary/binary.$(OBJEXT): binary/$(am__dirstamp) \ binary/$(DEPDIR)/$(am__dirstamp) incrementer/$(am__dirstamp): @$(MKDIR_P) incrementer @: > incrementer/$(am__dirstamp) incrementer/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) incrementer/$(DEPDIR) @: > incrementer/$(DEPDIR)/$(am__dirstamp) incrementer/incrementer_kernels_opencl.$(OBJEXT): \ incrementer/$(am__dirstamp) \ incrementer/$(DEPDIR)/$(am__dirstamp) binary/binary$(EXEEXT): $(binary_binary_OBJECTS) $(binary_binary_DEPENDENCIES) $(EXTRA_binary_binary_DEPENDENCIES) binary/$(am__dirstamp) @rm -f binary/binary$(EXEEXT) $(AM_V_CCLD)$(LINK) $(binary_binary_OBJECTS) $(binary_binary_LDADD) $(LIBS) callback/$(am__dirstamp): @$(MKDIR_P) callback @: > callback/$(am__dirstamp) callback/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) callback/$(DEPDIR) @: > callback/$(DEPDIR)/$(am__dirstamp) callback/callback.$(OBJEXT): callback/$(am__dirstamp) \ callback/$(DEPDIR)/$(am__dirstamp) callback/callback$(EXEEXT): $(callback_callback_OBJECTS) $(callback_callback_DEPENDENCIES) $(EXTRA_callback_callback_DEPENDENCIES) callback/$(am__dirstamp) @rm -f callback/callback$(EXEEXT) $(AM_V_CCLD)$(LINK) $(callback_callback_OBJECTS) $(callback_callback_LDADD) $(LIBS) callback/prologue.$(OBJEXT): callback/$(am__dirstamp) \ callback/$(DEPDIR)/$(am__dirstamp) callback/prologue$(EXEEXT): $(callback_prologue_OBJECTS) $(callback_prologue_DEPENDENCIES) $(EXTRA_callback_prologue_DEPENDENCIES) callback/$(am__dirstamp) @rm -f callback/prologue$(EXEEXT) $(AM_V_CCLD)$(LINK) $(callback_prologue_OBJECTS) $(callback_prologue_LDADD) $(LIBS) cg/$(am__dirstamp): @$(MKDIR_P) cg @: > cg/$(am__dirstamp) cg/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) cg/$(DEPDIR) @: > cg/$(DEPDIR)/$(am__dirstamp) cg/cg.$(OBJEXT): cg/$(am__dirstamp) cg/$(DEPDIR)/$(am__dirstamp) cg/cg$(EXEEXT): $(cg_cg_OBJECTS) $(cg_cg_DEPENDENCIES) $(EXTRA_cg_cg_DEPENDENCIES) cg/$(am__dirstamp) @rm -f cg/cg$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cg_cg_OBJECTS) $(cg_cg_LDADD) $(LIBS) cholesky/cholesky_compil.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_models.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_kernels.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) sched_ctx_utils/$(am__dirstamp): @$(MKDIR_P) sched_ctx_utils @: > sched_ctx_utils/$(am__dirstamp) sched_ctx_utils/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) sched_ctx_utils/$(DEPDIR) @: > sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) sched_ctx_utils/sched_ctx_utils.$(OBJEXT): \ sched_ctx_utils/$(am__dirstamp) \ sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_compil$(EXEEXT): $(cholesky_cholesky_compil_OBJECTS) $(cholesky_cholesky_compil_DEPENDENCIES) $(EXTRA_cholesky_cholesky_compil_DEPENDENCIES) cholesky/$(am__dirstamp) @rm -f cholesky/cholesky_compil$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_compil_OBJECTS) $(cholesky_cholesky_compil_LDADD) $(LIBS) cholesky/cholesky_grain_tag.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_grain_tag$(EXEEXT): $(cholesky_cholesky_grain_tag_OBJECTS) $(cholesky_cholesky_grain_tag_DEPENDENCIES) $(EXTRA_cholesky_cholesky_grain_tag_DEPENDENCIES) cholesky/$(am__dirstamp) @rm -f cholesky/cholesky_grain_tag$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_grain_tag_OBJECTS) $(cholesky_cholesky_grain_tag_LDADD) $(LIBS) cholesky/cholesky_implicit.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_implicit$(EXEEXT): $(cholesky_cholesky_implicit_OBJECTS) $(cholesky_cholesky_implicit_DEPENDENCIES) $(EXTRA_cholesky_cholesky_implicit_DEPENDENCIES) cholesky/$(am__dirstamp) @rm -f cholesky/cholesky_implicit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_implicit_OBJECTS) $(cholesky_cholesky_implicit_LDADD) $(LIBS) cholesky/cholesky_tag.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_tag$(EXEEXT): $(cholesky_cholesky_tag_OBJECTS) $(cholesky_cholesky_tag_DEPENDENCIES) $(EXTRA_cholesky_cholesky_tag_DEPENDENCIES) cholesky/$(am__dirstamp) @rm -f cholesky/cholesky_tag$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_tag_OBJECTS) $(cholesky_cholesky_tag_LDADD) $(LIBS) cholesky/cholesky_tile_tag.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_tile_tag$(EXEEXT): $(cholesky_cholesky_tile_tag_OBJECTS) $(cholesky_cholesky_tile_tag_DEPENDENCIES) $(EXTRA_cholesky_cholesky_tile_tag_DEPENDENCIES) cholesky/$(am__dirstamp) @rm -f cholesky/cholesky_tile_tag$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_tile_tag_OBJECTS) $(cholesky_cholesky_tile_tag_LDADD) $(LIBS) cpp/$(am__dirstamp): @$(MKDIR_P) cpp @: > cpp/$(am__dirstamp) cpp/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) cpp/$(DEPDIR) @: > cpp/$(DEPDIR)/$(am__dirstamp) cpp/add_vectors.$(OBJEXT): cpp/$(am__dirstamp) \ cpp/$(DEPDIR)/$(am__dirstamp) cpp/add_vectors$(EXEEXT): $(cpp_add_vectors_OBJECTS) $(cpp_add_vectors_DEPENDENCIES) $(EXTRA_cpp_add_vectors_DEPENDENCIES) cpp/$(am__dirstamp) @rm -f cpp/add_vectors$(EXEEXT) $(AM_V_CXXLD)$(CXXLINK) $(cpp_add_vectors_OBJECTS) $(cpp_add_vectors_LDADD) $(LIBS) cpp/add_vectors_cpp11.$(OBJEXT): cpp/$(am__dirstamp) \ cpp/$(DEPDIR)/$(am__dirstamp) cpp/add_vectors_cpp11$(EXEEXT): $(cpp_add_vectors_cpp11_OBJECTS) $(cpp_add_vectors_cpp11_DEPENDENCIES) $(EXTRA_cpp_add_vectors_cpp11_DEPENDENCIES) cpp/$(am__dirstamp) @rm -f cpp/add_vectors_cpp11$(EXEEXT) $(AM_V_CXXLD)$(CXXLINK) $(cpp_add_vectors_cpp11_OBJECTS) $(cpp_add_vectors_cpp11_LDADD) $(LIBS) cpp/add_vectors_interface.$(OBJEXT): cpp/$(am__dirstamp) \ cpp/$(DEPDIR)/$(am__dirstamp) cpp/add_vectors_interface$(EXEEXT): $(cpp_add_vectors_interface_OBJECTS) $(cpp_add_vectors_interface_DEPENDENCIES) $(EXTRA_cpp_add_vectors_interface_DEPENDENCIES) cpp/$(am__dirstamp) @rm -f cpp/add_vectors_interface$(EXEEXT) $(AM_V_CXXLD)$(CXXLINK) $(cpp_add_vectors_interface_OBJECTS) $(cpp_add_vectors_interface_LDADD) $(LIBS) cpp/incrementer_cpp.$(OBJEXT): cpp/$(am__dirstamp) \ cpp/$(DEPDIR)/$(am__dirstamp) incrementer/incrementer_kernels.$(OBJEXT): \ incrementer/$(am__dirstamp) \ incrementer/$(DEPDIR)/$(am__dirstamp) cpp/incrementer_cpp$(EXEEXT): $(cpp_incrementer_cpp_OBJECTS) $(cpp_incrementer_cpp_DEPENDENCIES) $(EXTRA_cpp_incrementer_cpp_DEPENDENCIES) cpp/$(am__dirstamp) @rm -f cpp/incrementer_cpp$(EXEEXT) $(AM_V_CXXLD)$(CXXLINK) $(cpp_incrementer_cpp_OBJECTS) $(cpp_incrementer_cpp_LDADD) $(LIBS) dependency/$(am__dirstamp): @$(MKDIR_P) dependency @: > dependency/$(am__dirstamp) dependency/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) dependency/$(DEPDIR) @: > dependency/$(DEPDIR)/$(am__dirstamp) dependency/sequential_consistency.$(OBJEXT): \ dependency/$(am__dirstamp) \ dependency/$(DEPDIR)/$(am__dirstamp) dependency/sequential_consistency$(EXEEXT): $(dependency_sequential_consistency_OBJECTS) $(dependency_sequential_consistency_DEPENDENCIES) $(EXTRA_dependency_sequential_consistency_DEPENDENCIES) dependency/$(am__dirstamp) @rm -f dependency/sequential_consistency$(EXEEXT) $(AM_V_CCLD)$(LINK) $(dependency_sequential_consistency_OBJECTS) $(dependency_sequential_consistency_LDADD) $(LIBS) dependency/task_end_dep.$(OBJEXT): dependency/$(am__dirstamp) \ dependency/$(DEPDIR)/$(am__dirstamp) dependency/task_end_dep$(EXEEXT): $(dependency_task_end_dep_OBJECTS) $(dependency_task_end_dep_DEPENDENCIES) $(EXTRA_dependency_task_end_dep_DEPENDENCIES) dependency/$(am__dirstamp) @rm -f dependency/task_end_dep$(EXEEXT) $(AM_V_CCLD)$(LINK) $(dependency_task_end_dep_OBJECTS) $(dependency_task_end_dep_LDADD) $(LIBS) dependency/task_end_dep_add.$(OBJEXT): dependency/$(am__dirstamp) \ dependency/$(DEPDIR)/$(am__dirstamp) dependency/task_end_dep_add$(EXEEXT): $(dependency_task_end_dep_add_OBJECTS) $(dependency_task_end_dep_add_DEPENDENCIES) $(EXTRA_dependency_task_end_dep_add_DEPENDENCIES) dependency/$(am__dirstamp) @rm -f dependency/task_end_dep_add$(EXEEXT) $(AM_V_CCLD)$(LINK) $(dependency_task_end_dep_add_OBJECTS) $(dependency_task_end_dep_add_LDADD) $(LIBS) filters/$(am__dirstamp): @$(MKDIR_P) filters @: > filters/$(am__dirstamp) filters/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) filters/$(DEPDIR) @: > filters/$(DEPDIR)/$(am__dirstamp) filters/alloc.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/alloc$(EXEEXT): $(filters_alloc_OBJECTS) $(filters_alloc_DEPENDENCIES) $(EXTRA_filters_alloc_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/alloc$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_alloc_OBJECTS) $(filters_alloc_LDADD) $(LIBS) filters/custom_mf/$(am__dirstamp): @$(MKDIR_P) filters/custom_mf @: > filters/custom_mf/$(am__dirstamp) filters/custom_mf/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) filters/custom_mf/$(DEPDIR) @: > filters/custom_mf/$(DEPDIR)/$(am__dirstamp) filters/custom_mf/custom_mf_filter.$(OBJEXT): \ filters/custom_mf/$(am__dirstamp) \ filters/custom_mf/$(DEPDIR)/$(am__dirstamp) filters/custom_mf/custom_interface.$(OBJEXT): \ filters/custom_mf/$(am__dirstamp) \ filters/custom_mf/$(DEPDIR)/$(am__dirstamp) filters/custom_mf/custom_conversion_codelets.$(OBJEXT): \ filters/custom_mf/$(am__dirstamp) \ filters/custom_mf/$(DEPDIR)/$(am__dirstamp) filters/custom_mf/conversion.$(OBJEXT): \ filters/custom_mf/$(am__dirstamp) \ filters/custom_mf/$(DEPDIR)/$(am__dirstamp) filters/custom_mf/cuda.$(OBJEXT): filters/custom_mf/$(am__dirstamp) \ filters/custom_mf/$(DEPDIR)/$(am__dirstamp) filters/custom_mf/conversion_opencl.$(OBJEXT): \ filters/custom_mf/$(am__dirstamp) \ filters/custom_mf/$(DEPDIR)/$(am__dirstamp) filters/custom_mf/custom_opencl.$(OBJEXT): \ filters/custom_mf/$(am__dirstamp) \ filters/custom_mf/$(DEPDIR)/$(am__dirstamp) filters/custom_mf/custom_mf_filter$(EXEEXT): $(filters_custom_mf_custom_mf_filter_OBJECTS) $(filters_custom_mf_custom_mf_filter_DEPENDENCIES) $(EXTRA_filters_custom_mf_custom_mf_filter_DEPENDENCIES) filters/custom_mf/$(am__dirstamp) @rm -f filters/custom_mf/custom_mf_filter$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_custom_mf_custom_mf_filter_OBJECTS) $(filters_custom_mf_custom_mf_filter_LDADD) $(LIBS) filters/fblock.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fblock_print.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fblock_cpu.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fblock_cuda.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fblock_hip.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fblock_opencl.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fblock$(EXEEXT): $(filters_fblock_OBJECTS) $(filters_fblock_DEPENDENCIES) $(EXTRA_filters_fblock_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fblock$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fblock_OBJECTS) $(filters_fblock_LDADD) $(LIBS) filters/fblock_pick_matrix.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmatrix_print.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmatrix_cpu.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmatrix_cuda.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmatrix_hip.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fblock_pick_matrix$(EXEEXT): $(filters_fblock_pick_matrix_OBJECTS) $(filters_fblock_pick_matrix_DEPENDENCIES) $(EXTRA_filters_fblock_pick_matrix_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fblock_pick_matrix$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fblock_pick_matrix_OBJECTS) $(filters_fblock_pick_matrix_LDADD) $(LIBS) filters/fblock_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fvariable_cuda.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fblock_pick_variable$(EXEEXT): $(filters_fblock_pick_variable_OBJECTS) $(filters_fblock_pick_variable_DEPENDENCIES) $(EXTRA_filters_fblock_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fblock_pick_variable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fblock_pick_variable_OBJECTS) $(filters_fblock_pick_variable_LDADD) $(LIBS) filters/fmatrix.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmatrix$(EXEEXT): $(filters_fmatrix_OBJECTS) $(filters_fmatrix_DEPENDENCIES) $(EXTRA_filters_fmatrix_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fmatrix$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fmatrix_OBJECTS) $(filters_fmatrix_LDADD) $(LIBS) filters/fmatrix_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmatrix_pick_variable$(EXEEXT): $(filters_fmatrix_pick_variable_OBJECTS) $(filters_fmatrix_pick_variable_DEPENDENCIES) $(EXTRA_filters_fmatrix_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fmatrix_pick_variable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fmatrix_pick_variable_OBJECTS) $(filters_fmatrix_pick_variable_LDADD) $(LIBS) filters/fmatrix_pick_vector.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fvector_cpu.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fvector_cuda.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fvector_hip.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmatrix_pick_vector$(EXEEXT): $(filters_fmatrix_pick_vector_OBJECTS) $(filters_fmatrix_pick_vector_DEPENDENCIES) $(EXTRA_filters_fmatrix_pick_vector_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fmatrix_pick_vector$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fmatrix_pick_vector_OBJECTS) $(filters_fmatrix_pick_vector_LDADD) $(LIBS) filters/fmultiple_manual.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmultiple_cuda.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmultiple_hip.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmultiple_manual$(EXEEXT): $(filters_fmultiple_manual_OBJECTS) $(filters_fmultiple_manual_DEPENDENCIES) $(EXTRA_filters_fmultiple_manual_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fmultiple_manual$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fmultiple_manual_OBJECTS) $(filters_fmultiple_manual_LDADD) $(LIBS) filters/fmultiple_submit.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmultiple_submit$(EXEEXT): $(filters_fmultiple_submit_OBJECTS) $(filters_fmultiple_submit_DEPENDENCIES) $(EXTRA_filters_fmultiple_submit_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fmultiple_submit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fmultiple_submit_OBJECTS) $(filters_fmultiple_submit_LDADD) $(LIBS) filters/fmultiple_submit_implicit.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmultiple_submit_implicit$(EXEEXT): $(filters_fmultiple_submit_implicit_OBJECTS) $(filters_fmultiple_submit_implicit_DEPENDENCIES) $(EXTRA_filters_fmultiple_submit_implicit_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fmultiple_submit_implicit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fmultiple_submit_implicit_OBJECTS) $(filters_fmultiple_submit_implicit_LDADD) $(LIBS) filters/fmultiple_submit_readonly.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fmultiple_submit_readonly$(EXEEXT): $(filters_fmultiple_submit_readonly_OBJECTS) $(filters_fmultiple_submit_readonly_DEPENDENCIES) $(EXTRA_filters_fmultiple_submit_readonly_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fmultiple_submit_readonly$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fmultiple_submit_readonly_OBJECTS) $(filters_fmultiple_submit_readonly_LDADD) $(LIBS) filters/fmultiple_submit_readonly_downgrade.$(OBJEXT): \ filters/$(am__dirstamp) filters/$(DEPDIR)/$(am__dirstamp) filters/fmultiple_submit_readonly_downgrade$(EXEEXT): $(filters_fmultiple_submit_readonly_downgrade_OBJECTS) $(filters_fmultiple_submit_readonly_downgrade_DEPENDENCIES) $(EXTRA_filters_fmultiple_submit_readonly_downgrade_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fmultiple_submit_readonly_downgrade$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fmultiple_submit_readonly_downgrade_OBJECTS) $(filters_fmultiple_submit_readonly_downgrade_LDADD) $(LIBS) filters/fndim.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/ftensor_print.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/f4d_cpu.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/f4d_cuda.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/f4d_hip.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim$(EXEEXT): $(filters_fndim_OBJECTS) $(filters_fndim_DEPENDENCIES) $(EXTRA_filters_fndim_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_OBJECTS) $(filters_fndim_LDADD) $(LIBS) filters/fndim_1d_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_1d_pick_variable$(EXEEXT): $(filters_fndim_1d_pick_variable_OBJECTS) $(filters_fndim_1d_pick_variable_DEPENDENCIES) $(EXTRA_filters_fndim_1d_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_1d_pick_variable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_1d_pick_variable_OBJECTS) $(filters_fndim_1d_pick_variable_LDADD) $(LIBS) filters/fndim_2d_pick_vector.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_2d_pick_vector$(EXEEXT): $(filters_fndim_2d_pick_vector_OBJECTS) $(filters_fndim_2d_pick_vector_DEPENDENCIES) $(EXTRA_filters_fndim_2d_pick_vector_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_2d_pick_vector$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_2d_pick_vector_OBJECTS) $(filters_fndim_2d_pick_vector_LDADD) $(LIBS) filters/fndim_3d_pick_matrix.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_3d_pick_matrix$(EXEEXT): $(filters_fndim_3d_pick_matrix_OBJECTS) $(filters_fndim_3d_pick_matrix_DEPENDENCIES) $(EXTRA_filters_fndim_3d_pick_matrix_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_3d_pick_matrix$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_3d_pick_matrix_OBJECTS) $(filters_fndim_3d_pick_matrix_LDADD) $(LIBS) filters/fndim_4d_pick_block.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_4d_pick_block$(EXEEXT): $(filters_fndim_4d_pick_block_OBJECTS) $(filters_fndim_4d_pick_block_DEPENDENCIES) $(EXTRA_filters_fndim_4d_pick_block_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_4d_pick_block$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_4d_pick_block_OBJECTS) $(filters_fndim_4d_pick_block_LDADD) $(LIBS) filters/fndim_5d_pick_tensor.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/f5d_print.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/ftensor_cpu.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/ftensor_cuda.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/ftensor_hip.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_5d_pick_tensor$(EXEEXT): $(filters_fndim_5d_pick_tensor_OBJECTS) $(filters_fndim_5d_pick_tensor_DEPENDENCIES) $(EXTRA_filters_fndim_5d_pick_tensor_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_5d_pick_tensor$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_5d_pick_tensor_OBJECTS) $(filters_fndim_5d_pick_tensor_LDADD) $(LIBS) filters/fndim_pick_ndim.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/f3d_cpu.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/f3d_cuda.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/f3d_hip.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_pick_ndim$(EXEEXT): $(filters_fndim_pick_ndim_OBJECTS) $(filters_fndim_pick_ndim_DEPENDENCIES) $(EXTRA_filters_fndim_pick_ndim_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_pick_ndim$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_pick_ndim_OBJECTS) $(filters_fndim_pick_ndim_LDADD) $(LIBS) filters/fndim_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_pick_variable$(EXEEXT): $(filters_fndim_pick_variable_OBJECTS) $(filters_fndim_pick_variable_DEPENDENCIES) $(EXTRA_filters_fndim_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_pick_variable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_pick_variable_OBJECTS) $(filters_fndim_pick_variable_LDADD) $(LIBS) filters/fndim_to_block.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_to_block$(EXEEXT): $(filters_fndim_to_block_OBJECTS) $(filters_fndim_to_block_DEPENDENCIES) $(EXTRA_filters_fndim_to_block_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_to_block$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_to_block_OBJECTS) $(filters_fndim_to_block_LDADD) $(LIBS) filters/fndim_to_matrix.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_to_matrix$(EXEEXT): $(filters_fndim_to_matrix_OBJECTS) $(filters_fndim_to_matrix_DEPENDENCIES) $(EXTRA_filters_fndim_to_matrix_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_to_matrix$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_to_matrix_OBJECTS) $(filters_fndim_to_matrix_LDADD) $(LIBS) filters/fndim_to_tensor.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_to_tensor$(EXEEXT): $(filters_fndim_to_tensor_OBJECTS) $(filters_fndim_to_tensor_DEPENDENCIES) $(EXTRA_filters_fndim_to_tensor_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_to_tensor$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_to_tensor_OBJECTS) $(filters_fndim_to_tensor_LDADD) $(LIBS) filters/fndim_to_variable.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_to_variable$(EXEEXT): $(filters_fndim_to_variable_OBJECTS) $(filters_fndim_to_variable_DEPENDENCIES) $(EXTRA_filters_fndim_to_variable_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_to_variable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_to_variable_OBJECTS) $(filters_fndim_to_variable_LDADD) $(LIBS) filters/fndim_to_vector.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fndim_to_vector$(EXEEXT): $(filters_fndim_to_vector_OBJECTS) $(filters_fndim_to_vector_DEPENDENCIES) $(EXTRA_filters_fndim_to_vector_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fndim_to_vector$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fndim_to_vector_OBJECTS) $(filters_fndim_to_vector_LDADD) $(LIBS) filters/fread.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fread$(EXEEXT): $(filters_fread_OBJECTS) $(filters_fread_DEPENDENCIES) $(EXTRA_filters_fread_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fread$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fread_OBJECTS) $(filters_fread_LDADD) $(LIBS) filters/frecursive.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/frecursive$(EXEEXT): $(filters_frecursive_OBJECTS) $(filters_frecursive_DEPENDENCIES) $(EXTRA_filters_frecursive_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/frecursive$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_frecursive_OBJECTS) $(filters_frecursive_LDADD) $(LIBS) filters/ftensor.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/ftensor$(EXEEXT): $(filters_ftensor_OBJECTS) $(filters_ftensor_DEPENDENCIES) $(EXTRA_filters_ftensor_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/ftensor$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_ftensor_OBJECTS) $(filters_ftensor_LDADD) $(LIBS) filters/ftensor_pick_block.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/ftensor_pick_block$(EXEEXT): $(filters_ftensor_pick_block_OBJECTS) $(filters_ftensor_pick_block_DEPENDENCIES) $(EXTRA_filters_ftensor_pick_block_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/ftensor_pick_block$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_ftensor_pick_block_OBJECTS) $(filters_ftensor_pick_block_LDADD) $(LIBS) filters/ftensor_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/ftensor_pick_variable$(EXEEXT): $(filters_ftensor_pick_variable_OBJECTS) $(filters_ftensor_pick_variable_DEPENDENCIES) $(EXTRA_filters_ftensor_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/ftensor_pick_variable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_ftensor_pick_variable_OBJECTS) $(filters_ftensor_pick_variable_LDADD) $(LIBS) filters/fvector.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fvector$(EXEEXT): $(filters_fvector_OBJECTS) $(filters_fvector_DEPENDENCIES) $(EXTRA_filters_fvector_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fvector$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fvector_OBJECTS) $(filters_fvector_LDADD) $(LIBS) filters/fvector_pick_variable.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/fvector_pick_variable$(EXEEXT): $(filters_fvector_pick_variable_OBJECTS) $(filters_fvector_pick_variable_DEPENDENCIES) $(EXTRA_filters_fvector_pick_variable_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/fvector_pick_variable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_fvector_pick_variable_OBJECTS) $(filters_fvector_pick_variable_LDADD) $(LIBS) filters/shadow.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/shadow$(EXEEXT): $(filters_shadow_OBJECTS) $(filters_shadow_DEPENDENCIES) $(EXTRA_filters_shadow_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/shadow$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_shadow_OBJECTS) $(filters_shadow_LDADD) $(LIBS) filters/shadow2d.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/shadow2d$(EXEEXT): $(filters_shadow2d_OBJECTS) $(filters_shadow2d_DEPENDENCIES) $(EXTRA_filters_shadow2d_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/shadow2d$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_shadow2d_OBJECTS) $(filters_shadow2d_LDADD) $(LIBS) filters/shadow3d.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/shadow3d$(EXEEXT): $(filters_shadow3d_OBJECTS) $(filters_shadow3d_DEPENDENCIES) $(EXTRA_filters_shadow3d_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/shadow3d$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_shadow3d_OBJECTS) $(filters_shadow3d_LDADD) $(LIBS) filters/shadow4d.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/shadow4d$(EXEEXT): $(filters_shadow4d_OBJECTS) $(filters_shadow4d_DEPENDENCIES) $(EXTRA_filters_shadow4d_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/shadow4d$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_shadow4d_OBJECTS) $(filters_shadow4d_LDADD) $(LIBS) filters/shadownd.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/shadownd$(EXEEXT): $(filters_shadownd_OBJECTS) $(filters_shadownd_DEPENDENCIES) $(EXTRA_filters_shadownd_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/shadownd$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_shadownd_OBJECTS) $(filters_shadownd_LDADD) $(LIBS) fortran/$(am__dirstamp): @$(MKDIR_P) fortran @: > fortran/$(am__dirstamp) fortran/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) fortran/$(DEPDIR) @: > fortran/$(DEPDIR)/$(am__dirstamp) fortran/hello_c.$(OBJEXT): fortran/$(am__dirstamp) \ fortran/$(DEPDIR)/$(am__dirstamp) fortran/hello.$(OBJEXT): fortran/$(am__dirstamp) \ fortran/$(DEPDIR)/$(am__dirstamp) fortran/hello$(EXEEXT): $(fortran_hello_OBJECTS) $(fortran_hello_DEPENDENCIES) $(EXTRA_fortran_hello_DEPENDENCIES) fortran/$(am__dirstamp) @rm -f fortran/hello$(EXEEXT) $(AM_V_F77LD)$(F77LINK) $(fortran_hello_OBJECTS) $(fortran_hello_LDADD) $(LIBS) fortran90/$(am__dirstamp): @$(MKDIR_P) fortran90 @: > fortran90/$(am__dirstamp) fortran90/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) fortran90/$(DEPDIR) @: > fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/mod_types.$(OBJEXT): fortran90/$(am__dirstamp) \ fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/starpu_mod.$(OBJEXT): fortran90/$(am__dirstamp) \ fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/mod_interface.$(OBJEXT): fortran90/$(am__dirstamp) \ fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/mod_compute.$(OBJEXT): fortran90/$(am__dirstamp) \ fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/marshalling.$(OBJEXT): fortran90/$(am__dirstamp) \ fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/f90_example.$(OBJEXT): fortran90/$(am__dirstamp) \ fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/f90_example$(EXEEXT): $(fortran90_f90_example_OBJECTS) $(fortran90_f90_example_DEPENDENCIES) $(EXTRA_fortran90_f90_example_DEPENDENCIES) fortran90/$(am__dirstamp) @rm -f fortran90/f90_example$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(fortran90_f90_example_OBJECTS) $(fortran90_f90_example_LDADD) $(LIBS) gl_interop/$(am__dirstamp): @$(MKDIR_P) gl_interop @: > gl_interop/$(am__dirstamp) gl_interop/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) gl_interop/$(DEPDIR) @: > gl_interop/$(DEPDIR)/$(am__dirstamp) gl_interop/gl_interop.$(OBJEXT): gl_interop/$(am__dirstamp) \ gl_interop/$(DEPDIR)/$(am__dirstamp) gl_interop/gl_interop$(EXEEXT): $(gl_interop_gl_interop_OBJECTS) $(gl_interop_gl_interop_DEPENDENCIES) $(EXTRA_gl_interop_gl_interop_DEPENDENCIES) gl_interop/$(am__dirstamp) @rm -f gl_interop/gl_interop$(EXEEXT) $(AM_V_CCLD)$(LINK) $(gl_interop_gl_interop_OBJECTS) $(gl_interop_gl_interop_LDADD) $(LIBS) gl_interop/gl_interop_idle.$(OBJEXT): gl_interop/$(am__dirstamp) \ gl_interop/$(DEPDIR)/$(am__dirstamp) gl_interop/gl_interop_idle$(EXEEXT): $(gl_interop_gl_interop_idle_OBJECTS) $(gl_interop_gl_interop_idle_DEPENDENCIES) $(EXTRA_gl_interop_gl_interop_idle_DEPENDENCIES) gl_interop/$(am__dirstamp) @rm -f gl_interop/gl_interop_idle$(EXEEXT) $(AM_V_CCLD)$(LINK) $(gl_interop_gl_interop_idle_OBJECTS) $(gl_interop_gl_interop_idle_LDADD) $(LIBS) heat/$(am__dirstamp): @$(MKDIR_P) heat @: > heat/$(am__dirstamp) heat/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) heat/$(DEPDIR) @: > heat/$(DEPDIR)/$(am__dirstamp) heat/heat.$(OBJEXT): heat/$(am__dirstamp) \ heat/$(DEPDIR)/$(am__dirstamp) heat/dw_factolu.$(OBJEXT): heat/$(am__dirstamp) \ heat/$(DEPDIR)/$(am__dirstamp) heat/dw_factolu_tag.$(OBJEXT): heat/$(am__dirstamp) \ heat/$(DEPDIR)/$(am__dirstamp) heat/dw_factolu_grain.$(OBJEXT): heat/$(am__dirstamp) \ heat/$(DEPDIR)/$(am__dirstamp) heat/dw_sparse_cg.$(OBJEXT): heat/$(am__dirstamp) \ heat/$(DEPDIR)/$(am__dirstamp) heat/heat_display.$(OBJEXT): heat/$(am__dirstamp) \ heat/$(DEPDIR)/$(am__dirstamp) heat/lu_kernels_model.$(OBJEXT): heat/$(am__dirstamp) \ heat/$(DEPDIR)/$(am__dirstamp) heat/dw_sparse_cg_kernels.$(OBJEXT): heat/$(am__dirstamp) \ heat/$(DEPDIR)/$(am__dirstamp) heat/dw_factolu_kernels.$(OBJEXT): heat/$(am__dirstamp) \ heat/$(DEPDIR)/$(am__dirstamp) heat/heat$(EXEEXT): $(heat_heat_OBJECTS) $(heat_heat_DEPENDENCIES) $(EXTRA_heat_heat_DEPENDENCIES) heat/$(am__dirstamp) @rm -f heat/heat$(EXEEXT) $(AM_V_CCLD)$(LINK) $(heat_heat_OBJECTS) $(heat_heat_LDADD) $(LIBS) incrementer/incrementer.$(OBJEXT): incrementer/$(am__dirstamp) \ incrementer/$(DEPDIR)/$(am__dirstamp) incrementer/incrementer$(EXEEXT): $(incrementer_incrementer_OBJECTS) $(incrementer_incrementer_DEPENDENCIES) $(EXTRA_incrementer_incrementer_DEPENDENCIES) incrementer/$(am__dirstamp) @rm -f incrementer/incrementer$(EXEEXT) $(AM_V_CCLD)$(LINK) $(incrementer_incrementer_OBJECTS) $(incrementer_incrementer_LDADD) $(LIBS) interface/$(am__dirstamp): @$(MKDIR_P) interface @: > interface/$(am__dirstamp) interface/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) interface/$(DEPDIR) @: > interface/$(DEPDIR)/$(am__dirstamp) interface/complex.$(OBJEXT): interface/$(am__dirstamp) \ interface/$(DEPDIR)/$(am__dirstamp) interface/complex_interface.$(OBJEXT): interface/$(am__dirstamp) \ interface/$(DEPDIR)/$(am__dirstamp) interface/complex_filters.$(OBJEXT): interface/$(am__dirstamp) \ interface/$(DEPDIR)/$(am__dirstamp) interface/complex_kernels.$(OBJEXT): interface/$(am__dirstamp) \ interface/$(DEPDIR)/$(am__dirstamp) interface/complex_kernels_opencl.$(OBJEXT): interface/$(am__dirstamp) \ interface/$(DEPDIR)/$(am__dirstamp) interface/complex$(EXEEXT): $(interface_complex_OBJECTS) $(interface_complex_DEPENDENCIES) $(EXTRA_interface_complex_DEPENDENCIES) interface/$(am__dirstamp) @rm -f interface/complex$(EXEEXT) $(AM_V_CCLD)$(LINK) $(interface_complex_OBJECTS) $(interface_complex_LDADD) $(LIBS) interface/complex_dev_handle/$(am__dirstamp): @$(MKDIR_P) interface/complex_dev_handle @: > interface/complex_dev_handle/$(am__dirstamp) interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) interface/complex_dev_handle/$(DEPDIR) @: > interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) interface/complex_dev_handle/complex_dev_handle.$(OBJEXT): \ interface/complex_dev_handle/$(am__dirstamp) \ interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) interface/complex_dev_handle/complex_dev_handle_interface.$(OBJEXT): \ interface/complex_dev_handle/$(am__dirstamp) \ interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) interface/complex_dev_handle/complex_dev_handle_filters.$(OBJEXT): \ interface/complex_dev_handle/$(am__dirstamp) \ interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) interface/complex_dev_handle/complex_dev_handle_kernels.$(OBJEXT): \ interface/complex_dev_handle/$(am__dirstamp) \ interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) interface/complex_dev_handle/complex_dev_handle_kernels_opencl.$(OBJEXT): \ interface/complex_dev_handle/$(am__dirstamp) \ interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) interface/complex_dev_handle/complex_dev_handle$(EXEEXT): $(interface_complex_dev_handle_complex_dev_handle_OBJECTS) $(interface_complex_dev_handle_complex_dev_handle_DEPENDENCIES) $(EXTRA_interface_complex_dev_handle_complex_dev_handle_DEPENDENCIES) interface/complex_dev_handle/$(am__dirstamp) @rm -f interface/complex_dev_handle/complex_dev_handle$(EXEEXT) $(AM_V_CCLD)$(LINK) $(interface_complex_dev_handle_complex_dev_handle_OBJECTS) $(interface_complex_dev_handle_complex_dev_handle_LDADD) $(LIBS) loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) lu/$(am__dirstamp): @$(MKDIR_P) lu @: > lu/$(am__dirstamp) lu/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) lu/$(DEPDIR) @: > lu/$(DEPDIR)/$(am__dirstamp) lu/lu_example_complex_double.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/zlu.$(OBJEXT): lu/$(am__dirstamp) lu/$(DEPDIR)/$(am__dirstamp) lu/zlu_pivot.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/zlu_kernels.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/blas_complex.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/lu_example_complex_double$(EXEEXT): $(lu_lu_example_complex_double_OBJECTS) $(lu_lu_example_complex_double_DEPENDENCIES) $(EXTRA_lu_lu_example_complex_double_DEPENDENCIES) lu/$(am__dirstamp) @rm -f lu/lu_example_complex_double$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lu_lu_example_complex_double_OBJECTS) $(lu_lu_example_complex_double_LDADD) $(LIBS) lu/lu_example_complex_float.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/clu.$(OBJEXT): lu/$(am__dirstamp) lu/$(DEPDIR)/$(am__dirstamp) lu/clu_pivot.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/clu_kernels.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/lu_example_complex_float$(EXEEXT): $(lu_lu_example_complex_float_OBJECTS) $(lu_lu_example_complex_float_DEPENDENCIES) $(EXTRA_lu_lu_example_complex_float_DEPENDENCIES) lu/$(am__dirstamp) @rm -f lu/lu_example_complex_float$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lu_lu_example_complex_float_OBJECTS) $(lu_lu_example_complex_float_LDADD) $(LIBS) lu/lu_example_double.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/dlu.$(OBJEXT): lu/$(am__dirstamp) lu/$(DEPDIR)/$(am__dirstamp) lu/dlu_pivot.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/dlu_kernels.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/lu_example_double$(EXEEXT): $(lu_lu_example_double_OBJECTS) $(lu_lu_example_double_DEPENDENCIES) $(EXTRA_lu_lu_example_double_DEPENDENCIES) lu/$(am__dirstamp) @rm -f lu/lu_example_double$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lu_lu_example_double_OBJECTS) $(lu_lu_example_double_LDADD) $(LIBS) lu/lu_example_float.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/slu.$(OBJEXT): lu/$(am__dirstamp) lu/$(DEPDIR)/$(am__dirstamp) lu/slu_pivot.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/slu_kernels.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/lu_example_float$(EXEEXT): $(lu_lu_example_float_OBJECTS) $(lu_lu_example_float_DEPENDENCIES) $(EXTRA_lu_lu_example_float_DEPENDENCIES) lu/$(am__dirstamp) @rm -f lu/lu_example_float$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lu_lu_example_float_OBJECTS) $(lu_lu_example_float_LDADD) $(LIBS) lu/zlu_implicit.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/zlu_implicit_pivot.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/lu_implicit_example_complex_double$(EXEEXT): $(lu_lu_implicit_example_complex_double_OBJECTS) $(lu_lu_implicit_example_complex_double_DEPENDENCIES) $(EXTRA_lu_lu_implicit_example_complex_double_DEPENDENCIES) lu/$(am__dirstamp) @rm -f lu/lu_implicit_example_complex_double$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lu_lu_implicit_example_complex_double_OBJECTS) $(lu_lu_implicit_example_complex_double_LDADD) $(LIBS) lu/clu_implicit.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/clu_implicit_pivot.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/lu_implicit_example_complex_float$(EXEEXT): $(lu_lu_implicit_example_complex_float_OBJECTS) $(lu_lu_implicit_example_complex_float_DEPENDENCIES) $(EXTRA_lu_lu_implicit_example_complex_float_DEPENDENCIES) lu/$(am__dirstamp) @rm -f lu/lu_implicit_example_complex_float$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lu_lu_implicit_example_complex_float_OBJECTS) $(lu_lu_implicit_example_complex_float_LDADD) $(LIBS) lu/dlu_implicit.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/dlu_implicit_pivot.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/lu_implicit_example_double$(EXEEXT): $(lu_lu_implicit_example_double_OBJECTS) $(lu_lu_implicit_example_double_DEPENDENCIES) $(EXTRA_lu_lu_implicit_example_double_DEPENDENCIES) lu/$(am__dirstamp) @rm -f lu/lu_implicit_example_double$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lu_lu_implicit_example_double_OBJECTS) $(lu_lu_implicit_example_double_LDADD) $(LIBS) lu/slu_implicit.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/slu_implicit_pivot.$(OBJEXT): lu/$(am__dirstamp) \ lu/$(DEPDIR)/$(am__dirstamp) lu/lu_implicit_example_float$(EXEEXT): $(lu_lu_implicit_example_float_OBJECTS) $(lu_lu_implicit_example_float_DEPENDENCIES) $(EXTRA_lu_lu_implicit_example_float_DEPENDENCIES) lu/$(am__dirstamp) @rm -f lu/lu_implicit_example_float$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lu_lu_implicit_example_float_OBJECTS) $(lu_lu_implicit_example_float_LDADD) $(LIBS) mandelbrot/$(am__dirstamp): @$(MKDIR_P) mandelbrot @: > mandelbrot/$(am__dirstamp) mandelbrot/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mandelbrot/$(DEPDIR) @: > mandelbrot/$(DEPDIR)/$(am__dirstamp) mandelbrot/mandelbrot-mandelbrot.$(OBJEXT): \ mandelbrot/$(am__dirstamp) \ mandelbrot/$(DEPDIR)/$(am__dirstamp) mandelbrot/mandelbrot$(EXEEXT): $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_DEPENDENCIES) $(EXTRA_mandelbrot_mandelbrot_DEPENDENCIES) mandelbrot/$(am__dirstamp) @rm -f mandelbrot/mandelbrot$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_LDADD) $(LIBS) matvecmult/$(am__dirstamp): @$(MKDIR_P) matvecmult @: > matvecmult/$(am__dirstamp) matvecmult/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) matvecmult/$(DEPDIR) @: > matvecmult/$(DEPDIR)/$(am__dirstamp) matvecmult/matvecmult.$(OBJEXT): matvecmult/$(am__dirstamp) \ matvecmult/$(DEPDIR)/$(am__dirstamp) matvecmult/matvecmult$(EXEEXT): $(matvecmult_matvecmult_OBJECTS) $(matvecmult_matvecmult_DEPENDENCIES) $(EXTRA_matvecmult_matvecmult_DEPENDENCIES) matvecmult/$(am__dirstamp) @rm -f matvecmult/matvecmult$(EXEEXT) $(AM_V_CCLD)$(LINK) $(matvecmult_matvecmult_OBJECTS) $(matvecmult_matvecmult_LDADD) $(LIBS) mlr/$(am__dirstamp): @$(MKDIR_P) mlr @: > mlr/$(am__dirstamp) mlr/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mlr/$(DEPDIR) @: > mlr/$(DEPDIR)/$(am__dirstamp) mlr/mlr.$(OBJEXT): mlr/$(am__dirstamp) mlr/$(DEPDIR)/$(am__dirstamp) mlr/mlr$(EXEEXT): $(mlr_mlr_OBJECTS) $(mlr_mlr_DEPENDENCIES) $(EXTRA_mlr_mlr_DEPENDENCIES) mlr/$(am__dirstamp) @rm -f mlr/mlr$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mlr_mlr_OBJECTS) $(mlr_mlr_LDADD) $(LIBS) mult/$(am__dirstamp): @$(MKDIR_P) mult @: > mult/$(am__dirstamp) mult/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mult/$(DEPDIR) @: > mult/$(DEPDIR)/$(am__dirstamp) mult/dgemm.$(OBJEXT): mult/$(am__dirstamp) \ mult/$(DEPDIR)/$(am__dirstamp) mult/dgemm$(EXEEXT): $(mult_dgemm_OBJECTS) $(mult_dgemm_DEPENDENCIES) $(EXTRA_mult_dgemm_DEPENDENCIES) mult/$(am__dirstamp) @rm -f mult/dgemm$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mult_dgemm_OBJECTS) $(mult_dgemm_LDADD) $(LIBS) mult/dgemm_layout.$(OBJEXT): mult/$(am__dirstamp) \ mult/$(DEPDIR)/$(am__dirstamp) mult/dgemm_layout$(EXEEXT): $(mult_dgemm_layout_OBJECTS) $(mult_dgemm_layout_DEPENDENCIES) $(EXTRA_mult_dgemm_layout_DEPENDENCIES) mult/$(am__dirstamp) @rm -f mult/dgemm_layout$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mult_dgemm_layout_OBJECTS) $(mult_dgemm_layout_LDADD) $(LIBS) mult/sgemm.$(OBJEXT): mult/$(am__dirstamp) \ mult/$(DEPDIR)/$(am__dirstamp) mult/sgemm$(EXEEXT): $(mult_sgemm_OBJECTS) $(mult_sgemm_DEPENDENCIES) $(EXTRA_mult_sgemm_DEPENDENCIES) mult/$(am__dirstamp) @rm -f mult/sgemm$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mult_sgemm_OBJECTS) $(mult_sgemm_LDADD) $(LIBS) mult/sgemm_layout.$(OBJEXT): mult/$(am__dirstamp) \ mult/$(DEPDIR)/$(am__dirstamp) mult/sgemm_layout$(EXEEXT): $(mult_sgemm_layout_OBJECTS) $(mult_sgemm_layout_DEPENDENCIES) $(EXTRA_mult_sgemm_layout_DEPENDENCIES) mult/$(am__dirstamp) @rm -f mult/sgemm_layout$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mult_sgemm_layout_OBJECTS) $(mult_sgemm_layout_LDADD) $(LIBS) native_fortran/$(am__dirstamp): @$(MKDIR_P) native_fortran @: > native_fortran/$(am__dirstamp) native_fortran/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) native_fortran/$(DEPDIR) @: > native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_dynbuf_cl.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/fstarpu_mod.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_dynbuf.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_dynbuf$(EXEEXT): $(native_fortran_nf_dynbuf_OBJECTS) $(native_fortran_nf_dynbuf_DEPENDENCIES) $(EXTRA_native_fortran_nf_dynbuf_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_dynbuf$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_dynbuf_OBJECTS) $(native_fortran_nf_dynbuf_LDADD) $(LIBS) native_fortran/nf_types.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_compute.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_example.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_example$(EXEEXT): $(native_fortran_nf_example_OBJECTS) $(native_fortran_nf_example_DEPENDENCIES) $(EXTRA_native_fortran_nf_example_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_example$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_example_OBJECTS) $(native_fortran_nf_example_LDADD) $(LIBS) native_fortran/nf_codelets.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_matrix.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_matrix$(EXEEXT): $(native_fortran_nf_matrix_OBJECTS) $(native_fortran_nf_matrix_DEPENDENCIES) $(EXTRA_native_fortran_nf_matrix_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_matrix$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_matrix_OBJECTS) $(native_fortran_nf_matrix_LDADD) $(LIBS) native_fortran/nf_partition_cl.$(OBJEXT): \ native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_partition.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_partition$(EXEEXT): $(native_fortran_nf_partition_OBJECTS) $(native_fortran_nf_partition_DEPENDENCIES) $(EXTRA_native_fortran_nf_partition_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_partition$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_partition_OBJECTS) $(native_fortran_nf_partition_LDADD) $(LIBS) native_fortran/nf_sched_ctx_cl.$(OBJEXT): \ native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_sched_ctx.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_sched_ctx$(EXEEXT): $(native_fortran_nf_sched_ctx_OBJECTS) $(native_fortran_nf_sched_ctx_DEPENDENCIES) $(EXTRA_native_fortran_nf_sched_ctx_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_sched_ctx$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_sched_ctx_OBJECTS) $(native_fortran_nf_sched_ctx_LDADD) $(LIBS) native_fortran/nf_varbuf_cl.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_varbuf.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_varbuf$(EXEEXT): $(native_fortran_nf_varbuf_OBJECTS) $(native_fortran_nf_varbuf_DEPENDENCIES) $(EXTRA_native_fortran_nf_varbuf_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_varbuf$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_varbuf_OBJECTS) $(native_fortran_nf_varbuf_LDADD) $(LIBS) native_fortran/nf_vector.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_vector$(EXEEXT): $(native_fortran_nf_vector_OBJECTS) $(native_fortran_nf_vector_DEPENDENCIES) $(EXTRA_native_fortran_nf_vector_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_vector$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_vector_OBJECTS) $(native_fortran_nf_vector_LDADD) $(LIBS) openmp/$(am__dirstamp): @$(MKDIR_P) openmp @: > openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) openmp/$(DEPDIR) @: > openmp/$(DEPDIR)/$(am__dirstamp) openmp/vector_scal_omp-vector_scal_omp.$(OBJEXT): \ openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) openmp/vector_scal_omp$(EXEEXT): $(openmp_vector_scal_omp_OBJECTS) $(openmp_vector_scal_omp_DEPENDENCIES) $(EXTRA_openmp_vector_scal_omp_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/vector_scal_omp$(EXEEXT) $(AM_V_CCLD)$(openmp_vector_scal_omp_LINK) $(openmp_vector_scal_omp_OBJECTS) $(openmp_vector_scal_omp_LDADD) $(LIBS) parallel_workers/$(am__dirstamp): @$(MKDIR_P) parallel_workers @: > parallel_workers/$(am__dirstamp) parallel_workers/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) parallel_workers/$(DEPDIR) @: > parallel_workers/$(DEPDIR)/$(am__dirstamp) parallel_workers/parallel_workers-parallel_workers.$(OBJEXT): \ parallel_workers/$(am__dirstamp) \ parallel_workers/$(DEPDIR)/$(am__dirstamp) parallel_workers/parallel_workers$(EXEEXT): $(parallel_workers_parallel_workers_OBJECTS) $(parallel_workers_parallel_workers_DEPENDENCIES) $(EXTRA_parallel_workers_parallel_workers_DEPENDENCIES) parallel_workers/$(am__dirstamp) @rm -f parallel_workers/parallel_workers$(EXEEXT) $(AM_V_CCLD)$(parallel_workers_parallel_workers_LINK) $(parallel_workers_parallel_workers_OBJECTS) $(parallel_workers_parallel_workers_LDADD) $(LIBS) parallel_workers/parallel_workers_func-parallel_workers_func.$(OBJEXT): \ parallel_workers/$(am__dirstamp) \ parallel_workers/$(DEPDIR)/$(am__dirstamp) parallel_workers/parallel_workers_func$(EXEEXT): $(parallel_workers_parallel_workers_func_OBJECTS) $(parallel_workers_parallel_workers_func_DEPENDENCIES) $(EXTRA_parallel_workers_parallel_workers_func_DEPENDENCIES) parallel_workers/$(am__dirstamp) @rm -f parallel_workers/parallel_workers_func$(EXEEXT) $(AM_V_CCLD)$(parallel_workers_parallel_workers_func_LINK) $(parallel_workers_parallel_workers_func_OBJECTS) $(parallel_workers_parallel_workers_func_LDADD) $(LIBS) parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.$(OBJEXT): \ parallel_workers/$(am__dirstamp) \ parallel_workers/$(DEPDIR)/$(am__dirstamp) parallel_workers/parallel_workers_oldapi$(EXEEXT): $(parallel_workers_parallel_workers_oldapi_OBJECTS) $(parallel_workers_parallel_workers_oldapi_DEPENDENCIES) $(EXTRA_parallel_workers_parallel_workers_oldapi_DEPENDENCIES) parallel_workers/$(am__dirstamp) @rm -f parallel_workers/parallel_workers_oldapi$(EXEEXT) $(AM_V_CCLD)$(parallel_workers_parallel_workers_oldapi_LINK) $(parallel_workers_parallel_workers_oldapi_OBJECTS) $(parallel_workers_parallel_workers_oldapi_LDADD) $(LIBS) perf_monitoring/$(am__dirstamp): @$(MKDIR_P) perf_monitoring @: > perf_monitoring/$(am__dirstamp) perf_monitoring/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) perf_monitoring/$(DEPDIR) @: > perf_monitoring/$(DEPDIR)/$(am__dirstamp) perf_monitoring/perf_counters_01.$(OBJEXT): \ perf_monitoring/$(am__dirstamp) \ perf_monitoring/$(DEPDIR)/$(am__dirstamp) perf_monitoring/perf_counters_01$(EXEEXT): $(perf_monitoring_perf_counters_01_OBJECTS) $(perf_monitoring_perf_counters_01_DEPENDENCIES) $(EXTRA_perf_monitoring_perf_counters_01_DEPENDENCIES) perf_monitoring/$(am__dirstamp) @rm -f perf_monitoring/perf_counters_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perf_monitoring_perf_counters_01_OBJECTS) $(perf_monitoring_perf_counters_01_LDADD) $(LIBS) perf_monitoring/perf_counters_02.$(OBJEXT): \ perf_monitoring/$(am__dirstamp) \ perf_monitoring/$(DEPDIR)/$(am__dirstamp) perf_monitoring/perf_counters_02$(EXEEXT): $(perf_monitoring_perf_counters_02_OBJECTS) $(perf_monitoring_perf_counters_02_DEPENDENCIES) $(EXTRA_perf_monitoring_perf_counters_02_DEPENDENCIES) perf_monitoring/$(am__dirstamp) @rm -f perf_monitoring/perf_counters_02$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perf_monitoring_perf_counters_02_OBJECTS) $(perf_monitoring_perf_counters_02_LDADD) $(LIBS) perf_steering/$(am__dirstamp): @$(MKDIR_P) perf_steering @: > perf_steering/$(am__dirstamp) perf_steering/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) perf_steering/$(DEPDIR) @: > perf_steering/$(DEPDIR)/$(am__dirstamp) perf_steering/perf_knobs_01.$(OBJEXT): perf_steering/$(am__dirstamp) \ perf_steering/$(DEPDIR)/$(am__dirstamp) perf_steering/perf_knobs_01$(EXEEXT): $(perf_steering_perf_knobs_01_OBJECTS) $(perf_steering_perf_knobs_01_DEPENDENCIES) $(EXTRA_perf_steering_perf_knobs_01_DEPENDENCIES) perf_steering/$(am__dirstamp) @rm -f perf_steering/perf_knobs_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perf_steering_perf_knobs_01_OBJECTS) $(perf_steering_perf_knobs_01_LDADD) $(LIBS) perf_steering/perf_knobs_02.$(OBJEXT): perf_steering/$(am__dirstamp) \ perf_steering/$(DEPDIR)/$(am__dirstamp) perf_steering/perf_knobs_02$(EXEEXT): $(perf_steering_perf_knobs_02_OBJECTS) $(perf_steering_perf_knobs_02_DEPENDENCIES) $(EXTRA_perf_steering_perf_knobs_02_DEPENDENCIES) perf_steering/$(am__dirstamp) @rm -f perf_steering/perf_knobs_02$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perf_steering_perf_knobs_02_OBJECTS) $(perf_steering_perf_knobs_02_LDADD) $(LIBS) perf_steering/perf_knobs_03.$(OBJEXT): perf_steering/$(am__dirstamp) \ perf_steering/$(DEPDIR)/$(am__dirstamp) perf_steering/perf_knobs_03$(EXEEXT): $(perf_steering_perf_knobs_03_OBJECTS) $(perf_steering_perf_knobs_03_DEPENDENCIES) $(EXTRA_perf_steering_perf_knobs_03_DEPENDENCIES) perf_steering/$(am__dirstamp) @rm -f perf_steering/perf_knobs_03$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perf_steering_perf_knobs_03_OBJECTS) $(perf_steering_perf_knobs_03_LDADD) $(LIBS) pi/$(am__dirstamp): @$(MKDIR_P) pi @: > pi/$(am__dirstamp) pi/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) pi/$(DEPDIR) @: > pi/$(DEPDIR)/$(am__dirstamp) pi/pi.$(OBJEXT): pi/$(am__dirstamp) pi/$(DEPDIR)/$(am__dirstamp) pi/SobolQRNG/$(am__dirstamp): @$(MKDIR_P) pi/SobolQRNG @: > pi/SobolQRNG/$(am__dirstamp) pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) pi/SobolQRNG/$(DEPDIR) @: > pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) pi/SobolQRNG/sobol_gold.$(OBJEXT): pi/SobolQRNG/$(am__dirstamp) \ pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) pi/SobolQRNG/sobol_primitives.$(OBJEXT): pi/SobolQRNG/$(am__dirstamp) \ pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) pi/pi_kernel.$(OBJEXT): pi/$(am__dirstamp) \ pi/$(DEPDIR)/$(am__dirstamp) pi/SobolQRNG/sobol_gpu.$(OBJEXT): pi/SobolQRNG/$(am__dirstamp) \ pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) pi/pi$(EXEEXT): $(pi_pi_OBJECTS) $(pi_pi_DEPENDENCIES) $(EXTRA_pi_pi_DEPENDENCIES) pi/$(am__dirstamp) @rm -f pi/pi$(EXEEXT) $(AM_V_CCLD)$(LINK) $(pi_pi_OBJECTS) $(pi_pi_LDADD) $(LIBS) pi/pi_redux.$(OBJEXT): pi/$(am__dirstamp) pi/$(DEPDIR)/$(am__dirstamp) pi/pi_redux_kernel.$(OBJEXT): pi/$(am__dirstamp) \ pi/$(DEPDIR)/$(am__dirstamp) pi/pi_redux$(EXEEXT): $(pi_pi_redux_OBJECTS) $(pi_pi_redux_DEPENDENCIES) $(EXTRA_pi_pi_redux_DEPENDENCIES) pi/$(am__dirstamp) @rm -f pi/pi_redux$(EXEEXT) $(AM_V_CCLD)$(LINK) $(pi_pi_redux_OBJECTS) $(pi_pi_redux_LDADD) $(LIBS) pipeline/$(am__dirstamp): @$(MKDIR_P) pipeline @: > pipeline/$(am__dirstamp) pipeline/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) pipeline/$(DEPDIR) @: > pipeline/$(DEPDIR)/$(am__dirstamp) pipeline/pipeline.$(OBJEXT): pipeline/$(am__dirstamp) \ pipeline/$(DEPDIR)/$(am__dirstamp) pipeline/pipeline$(EXEEXT): $(pipeline_pipeline_OBJECTS) $(pipeline_pipeline_DEPENDENCIES) $(EXTRA_pipeline_pipeline_DEPENDENCIES) pipeline/$(am__dirstamp) @rm -f pipeline/pipeline$(EXEEXT) $(AM_V_CCLD)$(LINK) $(pipeline_pipeline_OBJECTS) $(pipeline_pipeline_LDADD) $(LIBS) ppm_downscaler/$(am__dirstamp): @$(MKDIR_P) ppm_downscaler @: > ppm_downscaler/$(am__dirstamp) ppm_downscaler/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) ppm_downscaler/$(DEPDIR) @: > ppm_downscaler/$(DEPDIR)/$(am__dirstamp) ppm_downscaler/ppm_downscaler.$(OBJEXT): \ ppm_downscaler/$(am__dirstamp) \ ppm_downscaler/$(DEPDIR)/$(am__dirstamp) ppm_downscaler/ppm_downscaler$(EXEEXT): $(ppm_downscaler_ppm_downscaler_OBJECTS) $(ppm_downscaler_ppm_downscaler_DEPENDENCIES) $(EXTRA_ppm_downscaler_ppm_downscaler_DEPENDENCIES) ppm_downscaler/$(am__dirstamp) @rm -f ppm_downscaler/ppm_downscaler$(EXEEXT) $(AM_V_CCLD)$(LINK) $(ppm_downscaler_ppm_downscaler_OBJECTS) $(ppm_downscaler_ppm_downscaler_LDADD) $(LIBS) ppm_downscaler/yuv_downscaler.$(OBJEXT): \ ppm_downscaler/$(am__dirstamp) \ ppm_downscaler/$(DEPDIR)/$(am__dirstamp) ppm_downscaler/yuv_downscaler$(EXEEXT): $(ppm_downscaler_yuv_downscaler_OBJECTS) $(ppm_downscaler_yuv_downscaler_DEPENDENCIES) $(EXTRA_ppm_downscaler_yuv_downscaler_DEPENDENCIES) ppm_downscaler/$(am__dirstamp) @rm -f ppm_downscaler/yuv_downscaler$(EXEEXT) $(AM_V_CCLD)$(LINK) $(ppm_downscaler_yuv_downscaler_OBJECTS) $(ppm_downscaler_yuv_downscaler_LDADD) $(LIBS) profiling/$(am__dirstamp): @$(MKDIR_P) profiling @: > profiling/$(am__dirstamp) profiling/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) profiling/$(DEPDIR) @: > profiling/$(DEPDIR)/$(am__dirstamp) profiling/profiling.$(OBJEXT): profiling/$(am__dirstamp) \ profiling/$(DEPDIR)/$(am__dirstamp) profiling/profiling$(EXEEXT): $(profiling_profiling_OBJECTS) $(profiling_profiling_DEPENDENCIES) $(EXTRA_profiling_profiling_DEPENDENCIES) profiling/$(am__dirstamp) @rm -f profiling/profiling$(EXEEXT) $(AM_V_CCLD)$(LINK) $(profiling_profiling_OBJECTS) $(profiling_profiling_LDADD) $(LIBS) reductions/$(am__dirstamp): @$(MKDIR_P) reductions @: > reductions/$(am__dirstamp) reductions/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) reductions/$(DEPDIR) @: > reductions/$(DEPDIR)/$(am__dirstamp) reductions/dot_product.$(OBJEXT): reductions/$(am__dirstamp) \ reductions/$(DEPDIR)/$(am__dirstamp) reductions/dot_product_kernels.$(OBJEXT): reductions/$(am__dirstamp) \ reductions/$(DEPDIR)/$(am__dirstamp) reductions/dot_product$(EXEEXT): $(reductions_dot_product_OBJECTS) $(reductions_dot_product_DEPENDENCIES) $(EXTRA_reductions_dot_product_DEPENDENCIES) reductions/$(am__dirstamp) @rm -f reductions/dot_product$(EXEEXT) $(AM_V_CCLD)$(LINK) $(reductions_dot_product_OBJECTS) $(reductions_dot_product_LDADD) $(LIBS) reductions/minmax_reduction.$(OBJEXT): reductions/$(am__dirstamp) \ reductions/$(DEPDIR)/$(am__dirstamp) reductions/minmax_reduction$(EXEEXT): $(reductions_minmax_reduction_OBJECTS) $(reductions_minmax_reduction_DEPENDENCIES) $(EXTRA_reductions_minmax_reduction_DEPENDENCIES) reductions/$(am__dirstamp) @rm -f reductions/minmax_reduction$(EXEEXT) $(AM_V_CCLD)$(LINK) $(reductions_minmax_reduction_OBJECTS) $(reductions_minmax_reduction_LDADD) $(LIBS) sched_ctx/$(am__dirstamp): @$(MKDIR_P) sched_ctx @: > sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) sched_ctx/$(DEPDIR) @: > sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/dummy_sched_with_ctx.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/dummy_sched_with_ctx$(EXEEXT): $(sched_ctx_dummy_sched_with_ctx_OBJECTS) $(sched_ctx_dummy_sched_with_ctx_DEPENDENCIES) $(EXTRA_sched_ctx_dummy_sched_with_ctx_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/dummy_sched_with_ctx$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_dummy_sched_with_ctx_OBJECTS) $(sched_ctx_dummy_sched_with_ctx_LDADD) $(LIBS) sched_ctx/gpu_partition.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/axpy_partition_gpu.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/gpu_partition$(EXEEXT): $(sched_ctx_gpu_partition_OBJECTS) $(sched_ctx_gpu_partition_DEPENDENCIES) $(EXTRA_sched_ctx_gpu_partition_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/gpu_partition$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_gpu_partition_OBJECTS) $(sched_ctx_gpu_partition_LDADD) $(LIBS) sched_ctx/nested_sched_ctxs-nested_sched_ctxs.$(OBJEXT): \ sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/nested_sched_ctxs$(EXEEXT): $(sched_ctx_nested_sched_ctxs_OBJECTS) $(sched_ctx_nested_sched_ctxs_DEPENDENCIES) $(EXTRA_sched_ctx_nested_sched_ctxs_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/nested_sched_ctxs$(EXEEXT) $(AM_V_CCLD)$(sched_ctx_nested_sched_ctxs_LINK) $(sched_ctx_nested_sched_ctxs_OBJECTS) $(sched_ctx_nested_sched_ctxs_LDADD) $(LIBS) sched_ctx/parallel_code-parallel_code.$(OBJEXT): \ sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/parallel_code$(EXEEXT): $(sched_ctx_parallel_code_OBJECTS) $(sched_ctx_parallel_code_DEPENDENCIES) $(EXTRA_sched_ctx_parallel_code_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/parallel_code$(EXEEXT) $(AM_V_CCLD)$(sched_ctx_parallel_code_LINK) $(sched_ctx_parallel_code_OBJECTS) $(sched_ctx_parallel_code_LDADD) $(LIBS) sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.$(OBJEXT): \ sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/parallel_tasks_reuse_handle$(EXEEXT): $(sched_ctx_parallel_tasks_reuse_handle_OBJECTS) $(sched_ctx_parallel_tasks_reuse_handle_DEPENDENCIES) $(EXTRA_sched_ctx_parallel_tasks_reuse_handle_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/parallel_tasks_reuse_handle$(EXEEXT) $(AM_V_CCLD)$(sched_ctx_parallel_tasks_reuse_handle_LINK) $(sched_ctx_parallel_tasks_reuse_handle_OBJECTS) $(sched_ctx_parallel_tasks_reuse_handle_LDADD) $(LIBS) sched_ctx/prio.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/prio$(EXEEXT): $(sched_ctx_prio_OBJECTS) $(sched_ctx_prio_DEPENDENCIES) $(EXTRA_sched_ctx_prio_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/prio$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_prio_OBJECTS) $(sched_ctx_prio_LDADD) $(LIBS) sched_ctx/sched_ctx.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx$(EXEEXT): $(sched_ctx_sched_ctx_OBJECTS) $(sched_ctx_sched_ctx_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/sched_ctx$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_OBJECTS) $(sched_ctx_sched_ctx_LDADD) $(LIBS) sched_ctx/sched_ctx_delete.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx_delete$(EXEEXT): $(sched_ctx_sched_ctx_delete_OBJECTS) $(sched_ctx_sched_ctx_delete_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_delete_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/sched_ctx_delete$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_delete_OBJECTS) $(sched_ctx_sched_ctx_delete_LDADD) $(LIBS) sched_ctx/sched_ctx_empty.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx_empty$(EXEEXT): $(sched_ctx_sched_ctx_empty_OBJECTS) $(sched_ctx_sched_ctx_empty_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_empty_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/sched_ctx_empty$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_empty_OBJECTS) $(sched_ctx_sched_ctx_empty_LDADD) $(LIBS) sched_ctx/sched_ctx_remove.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx_remove$(EXEEXT): $(sched_ctx_sched_ctx_remove_OBJECTS) $(sched_ctx_sched_ctx_remove_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_remove_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/sched_ctx_remove$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_remove_OBJECTS) $(sched_ctx_sched_ctx_remove_LDADD) $(LIBS) sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.$(OBJEXT): \ sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx_without_sched_policy$(EXEEXT): $(sched_ctx_sched_ctx_without_sched_policy_OBJECTS) $(sched_ctx_sched_ctx_without_sched_policy_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_without_sched_policy_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/sched_ctx_without_sched_policy$(EXEEXT) $(AM_V_CCLD)$(sched_ctx_sched_ctx_without_sched_policy_LINK) $(sched_ctx_sched_ctx_without_sched_policy_OBJECTS) $(sched_ctx_sched_ctx_without_sched_policy_LDADD) $(LIBS) sched_ctx/sched_ctx_without_sched_policy_awake.$(OBJEXT): \ sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT): $(sched_ctx_sched_ctx_without_sched_policy_awake_OBJECTS) $(sched_ctx_sched_ctx_without_sched_policy_awake_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_without_sched_policy_awake_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_without_sched_policy_awake_OBJECTS) $(sched_ctx_sched_ctx_without_sched_policy_awake_LDADD) $(LIBS) sched_ctx/two_cpu_contexts.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/two_cpu_contexts$(EXEEXT): $(sched_ctx_two_cpu_contexts_OBJECTS) $(sched_ctx_two_cpu_contexts_DEPENDENCIES) $(EXTRA_sched_ctx_two_cpu_contexts_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/two_cpu_contexts$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_two_cpu_contexts_OBJECTS) $(sched_ctx_two_cpu_contexts_LDADD) $(LIBS) scheduler/dummy_modular_sched.$(OBJEXT): scheduler/$(am__dirstamp) \ scheduler/$(DEPDIR)/$(am__dirstamp) scheduler/dummy_modular_sched$(EXEEXT): $(scheduler_dummy_modular_sched_OBJECTS) $(scheduler_dummy_modular_sched_DEPENDENCIES) $(EXTRA_scheduler_dummy_modular_sched_DEPENDENCIES) scheduler/$(am__dirstamp) @rm -f scheduler/dummy_modular_sched$(EXEEXT) $(AM_V_CCLD)$(LINK) $(scheduler_dummy_modular_sched_OBJECTS) $(scheduler_dummy_modular_sched_LDADD) $(LIBS) scheduler/dummy_sched.$(OBJEXT): scheduler/$(am__dirstamp) \ scheduler/$(DEPDIR)/$(am__dirstamp) scheduler/dummy_sched$(EXEEXT): $(scheduler_dummy_sched_OBJECTS) $(scheduler_dummy_sched_DEPENDENCIES) $(EXTRA_scheduler_dummy_sched_DEPENDENCIES) scheduler/$(am__dirstamp) @rm -f scheduler/dummy_sched$(EXEEXT) $(AM_V_CCLD)$(LINK) $(scheduler_dummy_sched_OBJECTS) $(scheduler_dummy_sched_LDADD) $(LIBS) scheduler/heteroprio_test.$(OBJEXT): scheduler/$(am__dirstamp) \ scheduler/$(DEPDIR)/$(am__dirstamp) scheduler/heteroprio_test$(EXEEXT): $(scheduler_heteroprio_test_OBJECTS) $(scheduler_heteroprio_test_DEPENDENCIES) $(EXTRA_scheduler_heteroprio_test_DEPENDENCIES) scheduler/$(am__dirstamp) @rm -f scheduler/heteroprio_test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(scheduler_heteroprio_test_OBJECTS) $(scheduler_heteroprio_test_LDADD) $(LIBS) spmd/$(am__dirstamp): @$(MKDIR_P) spmd @: > spmd/$(am__dirstamp) spmd/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) spmd/$(DEPDIR) @: > spmd/$(DEPDIR)/$(am__dirstamp) spmd/vector_scal_spmd.$(OBJEXT): spmd/$(am__dirstamp) \ spmd/$(DEPDIR)/$(am__dirstamp) spmd/vector_scal_spmd$(EXEEXT): $(spmd_vector_scal_spmd_OBJECTS) $(spmd_vector_scal_spmd_DEPENDENCIES) $(EXTRA_spmd_vector_scal_spmd_DEPENDENCIES) spmd/$(am__dirstamp) @rm -f spmd/vector_scal_spmd$(EXEEXT) $(AM_V_CCLD)$(LINK) $(spmd_vector_scal_spmd_OBJECTS) $(spmd_vector_scal_spmd_LDADD) $(LIBS) spmv/$(am__dirstamp): @$(MKDIR_P) spmv @: > spmv/$(am__dirstamp) spmv/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) spmv/$(DEPDIR) @: > spmv/$(DEPDIR)/$(am__dirstamp) spmv/dw_block_spmv.$(OBJEXT): spmv/$(am__dirstamp) \ spmv/$(DEPDIR)/$(am__dirstamp) spmv/dw_block_spmv_kernels.$(OBJEXT): spmv/$(am__dirstamp) \ spmv/$(DEPDIR)/$(am__dirstamp) spmv/matrix_market/$(am__dirstamp): @$(MKDIR_P) spmv/matrix_market @: > spmv/matrix_market/$(am__dirstamp) spmv/matrix_market/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) spmv/matrix_market/$(DEPDIR) @: > spmv/matrix_market/$(DEPDIR)/$(am__dirstamp) spmv/matrix_market/mm_to_bcsr.$(OBJEXT): \ spmv/matrix_market/$(am__dirstamp) \ spmv/matrix_market/$(DEPDIR)/$(am__dirstamp) spmv/matrix_market/mmio.$(OBJEXT): spmv/matrix_market/$(am__dirstamp) \ spmv/matrix_market/$(DEPDIR)/$(am__dirstamp) spmv/dw_block_spmv$(EXEEXT): $(spmv_dw_block_spmv_OBJECTS) $(spmv_dw_block_spmv_DEPENDENCIES) $(EXTRA_spmv_dw_block_spmv_DEPENDENCIES) spmv/$(am__dirstamp) @rm -f spmv/dw_block_spmv$(EXEEXT) $(AM_V_CCLD)$(LINK) $(spmv_dw_block_spmv_OBJECTS) $(spmv_dw_block_spmv_LDADD) $(LIBS) spmv/spmv.$(OBJEXT): spmv/$(am__dirstamp) \ spmv/$(DEPDIR)/$(am__dirstamp) spmv/spmv_kernels.$(OBJEXT): spmv/$(am__dirstamp) \ spmv/$(DEPDIR)/$(am__dirstamp) spmv/spmv_cuda.$(OBJEXT): spmv/$(am__dirstamp) \ spmv/$(DEPDIR)/$(am__dirstamp) spmv/spmv$(EXEEXT): $(spmv_spmv_OBJECTS) $(spmv_spmv_DEPENDENCIES) $(EXTRA_spmv_spmv_DEPENDENCIES) spmv/$(am__dirstamp) @rm -f spmv/spmv$(EXEEXT) $(AM_V_CCLD)$(LINK) $(spmv_spmv_OBJECTS) $(spmv_spmv_LDADD) $(LIBS) subgraphs/$(am__dirstamp): @$(MKDIR_P) subgraphs @: > subgraphs/$(am__dirstamp) subgraphs/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) subgraphs/$(DEPDIR) @: > subgraphs/$(DEPDIR)/$(am__dirstamp) subgraphs/manual.$(OBJEXT): subgraphs/$(am__dirstamp) \ subgraphs/$(DEPDIR)/$(am__dirstamp) subgraphs/codelets.$(OBJEXT): subgraphs/$(am__dirstamp) \ subgraphs/$(DEPDIR)/$(am__dirstamp) subgraphs/manual$(EXEEXT): $(subgraphs_manual_OBJECTS) $(subgraphs_manual_DEPENDENCIES) $(EXTRA_subgraphs_manual_DEPENDENCIES) subgraphs/$(am__dirstamp) @rm -f subgraphs/manual$(EXEEXT) $(AM_V_CCLD)$(LINK) $(subgraphs_manual_OBJECTS) $(subgraphs_manual_LDADD) $(LIBS) subgraphs/partition.$(OBJEXT): subgraphs/$(am__dirstamp) \ subgraphs/$(DEPDIR)/$(am__dirstamp) subgraphs/partition$(EXEEXT): $(subgraphs_partition_OBJECTS) $(subgraphs_partition_DEPENDENCIES) $(EXTRA_subgraphs_partition_DEPENDENCIES) subgraphs/$(am__dirstamp) @rm -f subgraphs/partition$(EXEEXT) $(AM_V_CCLD)$(LINK) $(subgraphs_partition_OBJECTS) $(subgraphs_partition_LDADD) $(LIBS) subgraphs/plan.$(OBJEXT): subgraphs/$(am__dirstamp) \ subgraphs/$(DEPDIR)/$(am__dirstamp) subgraphs/plan$(EXEEXT): $(subgraphs_plan_OBJECTS) $(subgraphs_plan_DEPENDENCIES) $(EXTRA_subgraphs_plan_DEPENDENCIES) subgraphs/$(am__dirstamp) @rm -f subgraphs/plan$(EXEEXT) $(AM_V_CCLD)$(LINK) $(subgraphs_plan_OBJECTS) $(subgraphs_plan_LDADD) $(LIBS) tag_example/$(am__dirstamp): @$(MKDIR_P) tag_example @: > tag_example/$(am__dirstamp) tag_example/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) tag_example/$(DEPDIR) @: > tag_example/$(DEPDIR)/$(am__dirstamp) tag_example/tag_example.$(OBJEXT): tag_example/$(am__dirstamp) \ tag_example/$(DEPDIR)/$(am__dirstamp) tag_example/tag_example$(EXEEXT): $(tag_example_tag_example_OBJECTS) $(tag_example_tag_example_DEPENDENCIES) $(EXTRA_tag_example_tag_example_DEPENDENCIES) tag_example/$(am__dirstamp) @rm -f tag_example/tag_example$(EXEEXT) $(AM_V_CCLD)$(LINK) $(tag_example_tag_example_OBJECTS) $(tag_example_tag_example_LDADD) $(LIBS) tag_example/tag_example2.$(OBJEXT): tag_example/$(am__dirstamp) \ tag_example/$(DEPDIR)/$(am__dirstamp) tag_example/tag_example2$(EXEEXT): $(tag_example_tag_example2_OBJECTS) $(tag_example_tag_example2_DEPENDENCIES) $(EXTRA_tag_example_tag_example2_DEPENDENCIES) tag_example/$(am__dirstamp) @rm -f tag_example/tag_example2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(tag_example_tag_example2_OBJECTS) $(tag_example_tag_example2_LDADD) $(LIBS) tag_example/tag_example3.$(OBJEXT): tag_example/$(am__dirstamp) \ tag_example/$(DEPDIR)/$(am__dirstamp) tag_example/tag_example3$(EXEEXT): $(tag_example_tag_example3_OBJECTS) $(tag_example_tag_example3_DEPENDENCIES) $(EXTRA_tag_example_tag_example3_DEPENDENCIES) tag_example/$(am__dirstamp) @rm -f tag_example/tag_example3$(EXEEXT) $(AM_V_CCLD)$(LINK) $(tag_example_tag_example3_OBJECTS) $(tag_example_tag_example3_LDADD) $(LIBS) tag_example/tag_example4.$(OBJEXT): tag_example/$(am__dirstamp) \ tag_example/$(DEPDIR)/$(am__dirstamp) tag_example/tag_example4$(EXEEXT): $(tag_example_tag_example4_OBJECTS) $(tag_example_tag_example4_DEPENDENCIES) $(EXTRA_tag_example_tag_example4_DEPENDENCIES) tag_example/$(am__dirstamp) @rm -f tag_example/tag_example4$(EXEEXT) $(AM_V_CCLD)$(LINK) $(tag_example_tag_example4_OBJECTS) $(tag_example_tag_example4_LDADD) $(LIBS) tag_example/tag_restartable.$(OBJEXT): tag_example/$(am__dirstamp) \ tag_example/$(DEPDIR)/$(am__dirstamp) tag_example/tag_restartable$(EXEEXT): $(tag_example_tag_restartable_OBJECTS) $(tag_example_tag_restartable_DEPENDENCIES) $(EXTRA_tag_example_tag_restartable_DEPENDENCIES) tag_example/$(am__dirstamp) @rm -f tag_example/tag_restartable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(tag_example_tag_restartable_OBJECTS) $(tag_example_tag_restartable_LDADD) $(LIBS) transactions/$(am__dirstamp): @$(MKDIR_P) transactions @: > transactions/$(am__dirstamp) transactions/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) transactions/$(DEPDIR) @: > transactions/$(DEPDIR)/$(am__dirstamp) transactions/trs_inc.$(OBJEXT): transactions/$(am__dirstamp) \ transactions/$(DEPDIR)/$(am__dirstamp) transactions/trs_inc$(EXEEXT): $(transactions_trs_inc_OBJECTS) $(transactions_trs_inc_DEPENDENCIES) $(EXTRA_transactions_trs_inc_DEPENDENCIES) transactions/$(am__dirstamp) @rm -f transactions/trs_inc$(EXEEXT) $(AM_V_CCLD)$(LINK) $(transactions_trs_inc_OBJECTS) $(transactions_trs_inc_LDADD) $(LIBS) transactions/trs_sgemm.$(OBJEXT): transactions/$(am__dirstamp) \ transactions/$(DEPDIR)/$(am__dirstamp) transactions/trs_sgemm$(EXEEXT): $(transactions_trs_sgemm_OBJECTS) $(transactions_trs_sgemm_DEPENDENCIES) $(EXTRA_transactions_trs_sgemm_DEPENDENCIES) transactions/$(am__dirstamp) @rm -f transactions/trs_sgemm$(EXEEXT) $(AM_V_CCLD)$(LINK) $(transactions_trs_sgemm_OBJECTS) $(transactions_trs_sgemm_LDADD) $(LIBS) worker_collections/$(am__dirstamp): @$(MKDIR_P) worker_collections @: > worker_collections/$(am__dirstamp) worker_collections/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) worker_collections/$(DEPDIR) @: > worker_collections/$(DEPDIR)/$(am__dirstamp) worker_collections/worker_list_example.$(OBJEXT): \ worker_collections/$(am__dirstamp) \ worker_collections/$(DEPDIR)/$(am__dirstamp) worker_collections/worker_list_example$(EXEEXT): $(worker_collections_worker_list_example_OBJECTS) $(worker_collections_worker_list_example_DEPENDENCIES) $(EXTRA_worker_collections_worker_list_example_DEPENDENCIES) worker_collections/$(am__dirstamp) @rm -f worker_collections/worker_list_example$(EXEEXT) $(AM_V_CCLD)$(LINK) $(worker_collections_worker_list_example_OBJECTS) $(worker_collections_worker_list_example_LDADD) $(LIBS) worker_collections/worker_tree_example.$(OBJEXT): \ worker_collections/$(am__dirstamp) \ worker_collections/$(DEPDIR)/$(am__dirstamp) worker_collections/worker_tree_example$(EXEEXT): $(worker_collections_worker_tree_example_OBJECTS) $(worker_collections_worker_tree_example_DEPENDENCIES) $(EXTRA_worker_collections_worker_tree_example_DEPENDENCIES) worker_collections/$(am__dirstamp) @rm -f worker_collections/worker_tree_example$(EXEEXT) $(AM_V_CCLD)$(LINK) $(worker_collections_worker_tree_example_OBJECTS) $(worker_collections_worker_tree_example_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f api/*.$(OBJEXT) -rm -f axpy/*.$(OBJEXT) -rm -f basic_examples/*.$(OBJEXT) -rm -f binary/*.$(OBJEXT) -rm -f callback/*.$(OBJEXT) -rm -f cg/*.$(OBJEXT) -rm -f cholesky/*.$(OBJEXT) -rm -f cholesky/*.lo -rm -f common/*.$(OBJEXT) -rm -f cpp/*.$(OBJEXT) -rm -f dependency/*.$(OBJEXT) -rm -f filters/*.$(OBJEXT) -rm -f filters/custom_mf/*.$(OBJEXT) -rm -f fortran/*.$(OBJEXT) -rm -f fortran90/*.$(OBJEXT) -rm -f gl_interop/*.$(OBJEXT) -rm -f heat/*.$(OBJEXT) -rm -f incrementer/*.$(OBJEXT) -rm -f interface/*.$(OBJEXT) -rm -f interface/complex_dev_handle/*.$(OBJEXT) -rm -f lu/*.$(OBJEXT) -rm -f mandelbrot/*.$(OBJEXT) -rm -f matvecmult/*.$(OBJEXT) -rm -f mlr/*.$(OBJEXT) -rm -f mult/*.$(OBJEXT) -rm -f native_fortran/*.$(OBJEXT) -rm -f openmp/*.$(OBJEXT) -rm -f parallel_workers/*.$(OBJEXT) -rm -f perf_monitoring/*.$(OBJEXT) -rm -f perf_steering/*.$(OBJEXT) -rm -f pi/*.$(OBJEXT) -rm -f pi/SobolQRNG/*.$(OBJEXT) -rm -f pipeline/*.$(OBJEXT) -rm -f ppm_downscaler/*.$(OBJEXT) -rm -f profiling/*.$(OBJEXT) -rm -f profiling_tool/*.$(OBJEXT) -rm -f profiling_tool/*.lo -rm -f reductions/*.$(OBJEXT) -rm -f sched_ctx/*.$(OBJEXT) -rm -f sched_ctx_utils/*.$(OBJEXT) -rm -f scheduler/*.$(OBJEXT) -rm -f scheduler/*.lo -rm -f spmd/*.$(OBJEXT) -rm -f spmv/*.$(OBJEXT) -rm -f spmv/matrix_market/*.$(OBJEXT) -rm -f subgraphs/*.$(OBJEXT) -rm -f tag_example/*.$(OBJEXT) -rm -f transactions/*.$(OBJEXT) -rm -f worker_collections/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/bcsr_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/block_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/coo_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/csr_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/matrix_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/multiformat_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/tensor_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/variable_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/vector_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@api/$(DEPDIR)/void_data_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@axpy/$(DEPDIR)/axpy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@axpy/$(DEPDIR)/axpy_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/block.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/block_cpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/block_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/dynamic_handles.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/hello_world.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/hooks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/mult.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/multiformat.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/multiformat_conversion_codelets.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/multiformat_conversion_codelets_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/multiformat_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/ndim.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/task_insert_color.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/topology.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/variable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/variable_kernels_cpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/variable_kernels_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/vector_scal.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/vector_scal_c.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/vector_scal_cpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic_examples/$(DEPDIR)/vector_scal_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@binary/$(DEPDIR)/binary.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@callback/$(DEPDIR)/callback.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@callback/$(DEPDIR)/prologue.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cg/$(DEPDIR)/cg.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_compil.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_grain_tag.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_models.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_tag.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_tile_tag.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/libmy_dmda.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/blas.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cpp/$(DEPDIR)/add_vectors.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cpp/$(DEPDIR)/add_vectors_cpp11.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cpp/$(DEPDIR)/add_vectors_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cpp/$(DEPDIR)/incrementer_cpp.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@dependency/$(DEPDIR)/sequential_consistency.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@dependency/$(DEPDIR)/task_end_dep.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@dependency/$(DEPDIR)/task_end_dep_add.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/alloc.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/f3d_cpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/f4d_cpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/f5d_print.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_cpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_pick_matrix.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_pick_variable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fblock_print.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix_cpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix_pick_variable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix_pick_vector.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmatrix_print.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_manual.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_submit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_submit_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_submit_readonly.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fmultiple_submit_readonly_downgrade.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_1d_pick_variable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_2d_pick_vector.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_3d_pick_matrix.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_4d_pick_block.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_5d_pick_tensor.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_pick_ndim.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_pick_variable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_block.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_matrix.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_tensor.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_variable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fndim_to_vector.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fread.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/frecursive.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor_cpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor_pick_block.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor_pick_variable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/ftensor_print.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fvector.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fvector_cpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/fvector_pick_variable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadow.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadow2d.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadow3d.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadow4d.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/shadownd.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/conversion_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/custom_conversion_codelets.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/custom_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/custom_mf_filter.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/custom_mf/$(DEPDIR)/custom_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@fortran/$(DEPDIR)/hello_c.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@fortran90/$(DEPDIR)/marshalling.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@gl_interop/$(DEPDIR)/gl_interop.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@gl_interop/$(DEPDIR)/gl_interop_idle.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_factolu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_factolu_grain.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_factolu_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_factolu_tag.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_sparse_cg.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/dw_sparse_cg_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/heat.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/heat_display.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@heat/$(DEPDIR)/lu_kernels_model.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@incrementer/$(DEPDIR)/incrementer.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@incrementer/$(DEPDIR)/incrementer_kernels_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@interface/$(DEPDIR)/complex.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@interface/$(DEPDIR)/complex_filters.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@interface/$(DEPDIR)/complex_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@interface/$(DEPDIR)/complex_kernels_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_filters.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_kernels_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/blas_complex.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu_implicit_pivot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/clu_pivot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu_implicit_pivot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/dlu_pivot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/lu_example_complex_double.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/lu_example_complex_float.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/lu_example_double.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/lu_example_float.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu_implicit_pivot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/slu_pivot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu_implicit_pivot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lu/$(DEPDIR)/zlu_pivot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matvecmult/$(DEPDIR)/matvecmult.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mlr/$(DEPDIR)/mlr.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/dgemm.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/dgemm_layout.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/sgemm.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/sgemm_layout.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perf_monitoring/$(DEPDIR)/perf_counters_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perf_monitoring/$(DEPDIR)/perf_counters_02.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perf_steering/$(DEPDIR)/perf_knobs_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perf_steering/$(DEPDIR)/perf_knobs_02.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perf_steering/$(DEPDIR)/perf_knobs_03.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@pi/$(DEPDIR)/pi.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@pi/$(DEPDIR)/pi_redux.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@pi/SobolQRNG/$(DEPDIR)/sobol_gold.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@pi/SobolQRNG/$(DEPDIR)/sobol_primitives.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@pipeline/$(DEPDIR)/pipeline.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@ppm_downscaler/$(DEPDIR)/ppm_downscaler.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@ppm_downscaler/$(DEPDIR)/yuv_downscaler.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/profiling.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@profiling_tool/$(DEPDIR)/libprofiling_tool.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@reductions/$(DEPDIR)/dot_product.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@reductions/$(DEPDIR)/minmax_reduction.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/dummy_sched_with_ctx.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/gpu_partition.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/prio.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_delete.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_empty.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_remove.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy_awake.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/two_cpu_contexts.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@scheduler/$(DEPDIR)/dummy_modular_sched.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@scheduler/$(DEPDIR)/dummy_sched.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@scheduler/$(DEPDIR)/heteroprio_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@scheduler/$(DEPDIR)/libdummy_sched.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@spmd/$(DEPDIR)/vector_scal_spmd.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@spmv/$(DEPDIR)/dw_block_spmv.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@spmv/$(DEPDIR)/dw_block_spmv_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@spmv/$(DEPDIR)/spmv.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@spmv/$(DEPDIR)/spmv_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@spmv/matrix_market/$(DEPDIR)/mm_to_bcsr.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@spmv/matrix_market/$(DEPDIR)/mmio.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@subgraphs/$(DEPDIR)/codelets.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@subgraphs/$(DEPDIR)/manual.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@subgraphs/$(DEPDIR)/partition.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@subgraphs/$(DEPDIR)/plan.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_example.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_example2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_example3.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_example4.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@tag_example/$(DEPDIR)/tag_restartable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@transactions/$(DEPDIR)/trs_inc.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@transactions/$(DEPDIR)/trs_sgemm.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@worker_collections/$(DEPDIR)/worker_list_example.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@worker_collections/$(DEPDIR)/worker_tree_example.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .F.o: $(AM_V_PPF77)$(PPF77COMPILE) -c -o $@ $< .F.obj: $(AM_V_PPF77)$(PPF77COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .F.lo: $(AM_V_PPF77)$(LTPPF77COMPILE) -c -o $@ $< .F.f: $(F77COMPILE) -F $< .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` mandelbrot/mandelbrot-mandelbrot.o: mandelbrot/mandelbrot.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mandelbrot_mandelbrot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mandelbrot/mandelbrot-mandelbrot.o -MD -MP -MF mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Tpo -c -o mandelbrot/mandelbrot-mandelbrot.o `test -f 'mandelbrot/mandelbrot.c' || echo '$(srcdir)/'`mandelbrot/mandelbrot.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Tpo mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mandelbrot/mandelbrot.c' object='mandelbrot/mandelbrot-mandelbrot.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mandelbrot_mandelbrot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mandelbrot/mandelbrot-mandelbrot.o `test -f 'mandelbrot/mandelbrot.c' || echo '$(srcdir)/'`mandelbrot/mandelbrot.c mandelbrot/mandelbrot-mandelbrot.obj: mandelbrot/mandelbrot.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mandelbrot_mandelbrot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT mandelbrot/mandelbrot-mandelbrot.obj -MD -MP -MF mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Tpo -c -o mandelbrot/mandelbrot-mandelbrot.obj `if test -f 'mandelbrot/mandelbrot.c'; then $(CYGPATH_W) 'mandelbrot/mandelbrot.c'; else $(CYGPATH_W) '$(srcdir)/mandelbrot/mandelbrot.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Tpo mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mandelbrot/mandelbrot.c' object='mandelbrot/mandelbrot-mandelbrot.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mandelbrot_mandelbrot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mandelbrot/mandelbrot-mandelbrot.obj `if test -f 'mandelbrot/mandelbrot.c'; then $(CYGPATH_W) 'mandelbrot/mandelbrot.c'; else $(CYGPATH_W) '$(srcdir)/mandelbrot/mandelbrot.c'; fi` openmp/vector_scal_omp-vector_scal_omp.o: openmp/vector_scal_omp.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) -MT openmp/vector_scal_omp-vector_scal_omp.o -MD -MP -MF openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Tpo -c -o openmp/vector_scal_omp-vector_scal_omp.o `test -f 'openmp/vector_scal_omp.c' || echo '$(srcdir)/'`openmp/vector_scal_omp.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Tpo openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='openmp/vector_scal_omp.c' object='openmp/vector_scal_omp-vector_scal_omp.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) -c -o openmp/vector_scal_omp-vector_scal_omp.o `test -f 'openmp/vector_scal_omp.c' || echo '$(srcdir)/'`openmp/vector_scal_omp.c openmp/vector_scal_omp-vector_scal_omp.obj: openmp/vector_scal_omp.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) -MT openmp/vector_scal_omp-vector_scal_omp.obj -MD -MP -MF openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Tpo -c -o openmp/vector_scal_omp-vector_scal_omp.obj `if test -f 'openmp/vector_scal_omp.c'; then $(CYGPATH_W) 'openmp/vector_scal_omp.c'; else $(CYGPATH_W) '$(srcdir)/openmp/vector_scal_omp.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Tpo openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='openmp/vector_scal_omp.c' object='openmp/vector_scal_omp-vector_scal_omp.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(openmp_vector_scal_omp_CFLAGS) $(CFLAGS) -c -o openmp/vector_scal_omp-vector_scal_omp.obj `if test -f 'openmp/vector_scal_omp.c'; then $(CYGPATH_W) 'openmp/vector_scal_omp.c'; else $(CYGPATH_W) '$(srcdir)/openmp/vector_scal_omp.c'; fi` parallel_workers/parallel_workers-parallel_workers.o: parallel_workers/parallel_workers.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers-parallel_workers.o -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Tpo -c -o parallel_workers/parallel_workers-parallel_workers.o `test -f 'parallel_workers/parallel_workers.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Tpo parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers.c' object='parallel_workers/parallel_workers-parallel_workers.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers-parallel_workers.o `test -f 'parallel_workers/parallel_workers.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers.c parallel_workers/parallel_workers-parallel_workers.obj: parallel_workers/parallel_workers.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers-parallel_workers.obj -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Tpo -c -o parallel_workers/parallel_workers-parallel_workers.obj `if test -f 'parallel_workers/parallel_workers.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Tpo parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers.c' object='parallel_workers/parallel_workers-parallel_workers.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers-parallel_workers.obj `if test -f 'parallel_workers/parallel_workers.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers.c'; fi` parallel_workers/parallel_workers_func-parallel_workers_func.o: parallel_workers/parallel_workers_func.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_func_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers_func-parallel_workers_func.o -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Tpo -c -o parallel_workers/parallel_workers_func-parallel_workers_func.o `test -f 'parallel_workers/parallel_workers_func.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers_func.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Tpo parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers_func.c' object='parallel_workers/parallel_workers_func-parallel_workers_func.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_func_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers_func-parallel_workers_func.o `test -f 'parallel_workers/parallel_workers_func.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers_func.c parallel_workers/parallel_workers_func-parallel_workers_func.obj: parallel_workers/parallel_workers_func.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_func_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers_func-parallel_workers_func.obj -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Tpo -c -o parallel_workers/parallel_workers_func-parallel_workers_func.obj `if test -f 'parallel_workers/parallel_workers_func.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers_func.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers_func.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Tpo parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers_func.c' object='parallel_workers/parallel_workers_func-parallel_workers_func.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_func_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers_func-parallel_workers_func.obj `if test -f 'parallel_workers/parallel_workers_func.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers_func.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers_func.c'; fi` parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o: parallel_workers/parallel_workers_oldapi.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_oldapi_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Tpo -c -o parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o `test -f 'parallel_workers/parallel_workers_oldapi.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers_oldapi.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Tpo parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers_oldapi.c' object='parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_oldapi_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.o `test -f 'parallel_workers/parallel_workers_oldapi.c' || echo '$(srcdir)/'`parallel_workers/parallel_workers_oldapi.c parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj: parallel_workers/parallel_workers_oldapi.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_oldapi_CFLAGS) $(CFLAGS) -MT parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj -MD -MP -MF parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Tpo -c -o parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj `if test -f 'parallel_workers/parallel_workers_oldapi.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers_oldapi.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers_oldapi.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Tpo parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parallel_workers/parallel_workers_oldapi.c' object='parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(parallel_workers_parallel_workers_oldapi_CFLAGS) $(CFLAGS) -c -o parallel_workers/parallel_workers_oldapi-parallel_workers_oldapi.obj `if test -f 'parallel_workers/parallel_workers_oldapi.c'; then $(CYGPATH_W) 'parallel_workers/parallel_workers_oldapi.c'; else $(CYGPATH_W) '$(srcdir)/parallel_workers/parallel_workers_oldapi.c'; fi` sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o: sched_ctx/nested_sched_ctxs.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) -MT sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o -MD -MP -MF sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Tpo -c -o sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o `test -f 'sched_ctx/nested_sched_ctxs.c' || echo '$(srcdir)/'`sched_ctx/nested_sched_ctxs.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Tpo sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/nested_sched_ctxs.c' object='sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) -c -o sched_ctx/nested_sched_ctxs-nested_sched_ctxs.o `test -f 'sched_ctx/nested_sched_ctxs.c' || echo '$(srcdir)/'`sched_ctx/nested_sched_ctxs.c sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj: sched_ctx/nested_sched_ctxs.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) -MT sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj -MD -MP -MF sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Tpo -c -o sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj `if test -f 'sched_ctx/nested_sched_ctxs.c'; then $(CYGPATH_W) 'sched_ctx/nested_sched_ctxs.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/nested_sched_ctxs.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Tpo sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/nested_sched_ctxs.c' object='sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_nested_sched_ctxs_CFLAGS) $(CFLAGS) -c -o sched_ctx/nested_sched_ctxs-nested_sched_ctxs.obj `if test -f 'sched_ctx/nested_sched_ctxs.c'; then $(CYGPATH_W) 'sched_ctx/nested_sched_ctxs.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/nested_sched_ctxs.c'; fi` sched_ctx/parallel_code-parallel_code.o: sched_ctx/parallel_code.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) -MT sched_ctx/parallel_code-parallel_code.o -MD -MP -MF sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Tpo -c -o sched_ctx/parallel_code-parallel_code.o `test -f 'sched_ctx/parallel_code.c' || echo '$(srcdir)/'`sched_ctx/parallel_code.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Tpo sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/parallel_code.c' object='sched_ctx/parallel_code-parallel_code.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) -c -o sched_ctx/parallel_code-parallel_code.o `test -f 'sched_ctx/parallel_code.c' || echo '$(srcdir)/'`sched_ctx/parallel_code.c sched_ctx/parallel_code-parallel_code.obj: sched_ctx/parallel_code.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) -MT sched_ctx/parallel_code-parallel_code.obj -MD -MP -MF sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Tpo -c -o sched_ctx/parallel_code-parallel_code.obj `if test -f 'sched_ctx/parallel_code.c'; then $(CYGPATH_W) 'sched_ctx/parallel_code.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/parallel_code.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Tpo sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/parallel_code.c' object='sched_ctx/parallel_code-parallel_code.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_code_CFLAGS) $(CFLAGS) -c -o sched_ctx/parallel_code-parallel_code.obj `if test -f 'sched_ctx/parallel_code.c'; then $(CYGPATH_W) 'sched_ctx/parallel_code.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/parallel_code.c'; fi` sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o: sched_ctx/parallel_tasks_reuse_handle.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) $(CFLAGS) -MT sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o -MD -MP -MF sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Tpo -c -o sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o `test -f 'sched_ctx/parallel_tasks_reuse_handle.c' || echo '$(srcdir)/'`sched_ctx/parallel_tasks_reuse_handle.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Tpo sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/parallel_tasks_reuse_handle.c' object='sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) $(CFLAGS) -c -o sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.o `test -f 'sched_ctx/parallel_tasks_reuse_handle.c' || echo '$(srcdir)/'`sched_ctx/parallel_tasks_reuse_handle.c sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj: sched_ctx/parallel_tasks_reuse_handle.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) $(CFLAGS) -MT sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj -MD -MP -MF sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Tpo -c -o sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj `if test -f 'sched_ctx/parallel_tasks_reuse_handle.c'; then $(CYGPATH_W) 'sched_ctx/parallel_tasks_reuse_handle.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/parallel_tasks_reuse_handle.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Tpo sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/parallel_tasks_reuse_handle.c' object='sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_parallel_tasks_reuse_handle_CFLAGS) $(CFLAGS) -c -o sched_ctx/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.obj `if test -f 'sched_ctx/parallel_tasks_reuse_handle.c'; then $(CYGPATH_W) 'sched_ctx/parallel_tasks_reuse_handle.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/parallel_tasks_reuse_handle.c'; fi` sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o: sched_ctx/sched_ctx_without_sched_policy.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) $(CFLAGS) -MT sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o -MD -MP -MF sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Tpo -c -o sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o `test -f 'sched_ctx/sched_ctx_without_sched_policy.c' || echo '$(srcdir)/'`sched_ctx/sched_ctx_without_sched_policy.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Tpo sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/sched_ctx_without_sched_policy.c' object='sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) $(CFLAGS) -c -o sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.o `test -f 'sched_ctx/sched_ctx_without_sched_policy.c' || echo '$(srcdir)/'`sched_ctx/sched_ctx_without_sched_policy.c sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj: sched_ctx/sched_ctx_without_sched_policy.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) $(CFLAGS) -MT sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj -MD -MP -MF sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Tpo -c -o sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj `if test -f 'sched_ctx/sched_ctx_without_sched_policy.c'; then $(CYGPATH_W) 'sched_ctx/sched_ctx_without_sched_policy.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/sched_ctx_without_sched_policy.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Tpo sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_ctx/sched_ctx_without_sched_policy.c' object='sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_ctx_sched_ctx_without_sched_policy_CFLAGS) $(CFLAGS) -c -o sched_ctx/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.obj `if test -f 'sched_ctx/sched_ctx_without_sched_policy.c'; then $(CYGPATH_W) 'sched_ctx/sched_ctx_without_sched_policy.c'; else $(CYGPATH_W) '$(srcdir)/sched_ctx/sched_ctx_without_sched_policy.c'; fi` .cpp.o: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< .cpp.obj: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .cpp.lo: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< .f90.o: $(AM_V_FC)$(FCCOMPILE) -c -o $@ $< .f90.obj: $(AM_V_FC)$(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .f90.lo: $(AM_V_FC)$(LTFCCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf api/.libs api/_libs -rm -rf axpy/.libs axpy/_libs -rm -rf basic_examples/.libs basic_examples/_libs -rm -rf binary/.libs binary/_libs -rm -rf callback/.libs callback/_libs -rm -rf cg/.libs cg/_libs -rm -rf cholesky/.libs cholesky/_libs -rm -rf cpp/.libs cpp/_libs -rm -rf dependency/.libs dependency/_libs -rm -rf filters/.libs filters/_libs -rm -rf filters/custom_mf/.libs filters/custom_mf/_libs -rm -rf fortran/.libs fortran/_libs -rm -rf fortran90/.libs fortran90/_libs -rm -rf gl_interop/.libs gl_interop/_libs -rm -rf heat/.libs heat/_libs -rm -rf incrementer/.libs incrementer/_libs -rm -rf interface/.libs interface/_libs -rm -rf interface/complex_dev_handle/.libs interface/complex_dev_handle/_libs -rm -rf lu/.libs lu/_libs -rm -rf mandelbrot/.libs mandelbrot/_libs -rm -rf matvecmult/.libs matvecmult/_libs -rm -rf mlr/.libs mlr/_libs -rm -rf mult/.libs mult/_libs -rm -rf native_fortran/.libs native_fortran/_libs -rm -rf openmp/.libs openmp/_libs -rm -rf parallel_workers/.libs parallel_workers/_libs -rm -rf perf_monitoring/.libs perf_monitoring/_libs -rm -rf perf_steering/.libs perf_steering/_libs -rm -rf pi/.libs pi/_libs -rm -rf pipeline/.libs pipeline/_libs -rm -rf ppm_downscaler/.libs ppm_downscaler/_libs -rm -rf profiling/.libs profiling/_libs -rm -rf profiling_tool/.libs profiling_tool/_libs -rm -rf reductions/.libs reductions/_libs -rm -rf sched_ctx/.libs sched_ctx/_libs -rm -rf scheduler/.libs scheduler/_libs -rm -rf spmd/.libs spmd/_libs -rm -rf spmv/.libs spmv/_libs -rm -rf subgraphs/.libs subgraphs/_libs -rm -rf tag_example/.libs tag_example/_libs -rm -rf transactions/.libs transactions/_libs -rm -rf worker_collections/.libs worker_collections/_libs install-nobase_STARPU_OPENCL_DATADATA: $(nobase_STARPU_OPENCL_DATA_DATA) @$(NORMAL_INSTALL) @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" || exit 1; \ fi; \ $(am__nobase_list) | while read dir files; do \ xfiles=; for file in $$files; do \ if test -f "$$file"; then xfiles="$$xfiles $$file"; \ else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ test -z "$$xfiles" || { \ test "x$$dir" = x. || { \ echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir"; }; \ echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir" || exit $$?; }; \ done uninstall-nobase_STARPU_OPENCL_DATADATA: @$(NORMAL_UNINSTALL) @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ dir='$(DESTDIR)$(STARPU_OPENCL_DATAdir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? scheduler/schedulers.sh.log: scheduler/schedulers.sh @p='scheduler/schedulers.sh'; \ b='scheduler/schedulers.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) scheduler/schedulers_context.sh.log: scheduler/schedulers_context.sh @p='scheduler/schedulers_context.sh'; \ b='scheduler/schedulers_context.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mult/sgemm.sh.log: mult/sgemm.sh @p='mult/sgemm.sh'; \ b='mult/sgemm.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) profiling_tool/prof.sh.log: profiling_tool/prof.sh @p='profiling_tool/prof.sh'; \ b='profiling_tool/prof.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) scheduler/libdummy_sched.sh.log: scheduler/libdummy_sched.sh @p='scheduler/libdummy_sched.sh'; \ b='scheduler/libdummy_sched.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) heat/heat.sh.log: heat/heat.sh @p='heat/heat.sh'; \ b='heat/heat.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) lu/lu.sh.log: lu/lu.sh @p='lu/lu.sh'; \ b='lu/lu.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cholesky/cholesky.sh.log: cholesky/cholesky.sh @p='cholesky/cholesky.sh'; \ b='cholesky/cholesky.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cholesky/cholesky_julia.sh.log: cholesky/cholesky_julia.sh @p='cholesky/cholesky_julia.sh'; \ b='cholesky/cholesky_julia.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/prio.log: sched_ctx/prio$(EXEEXT) @p='sched_ctx/prio$(EXEEXT)'; \ b='sched_ctx/prio'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) scheduler/dummy_sched.log: scheduler/dummy_sched$(EXEEXT) @p='scheduler/dummy_sched$(EXEEXT)'; \ b='scheduler/dummy_sched'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) scheduler/dummy_modular_sched.log: scheduler/dummy_modular_sched$(EXEEXT) @p='scheduler/dummy_modular_sched$(EXEEXT)'; \ b='scheduler/dummy_modular_sched'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) worker_collections/worker_list_example.log: worker_collections/worker_list_example$(EXEEXT) @p='worker_collections/worker_list_example$(EXEEXT)'; \ b='worker_collections/worker_list_example'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/bcsr_data_interface.log: api/bcsr_data_interface$(EXEEXT) @p='api/bcsr_data_interface$(EXEEXT)'; \ b='api/bcsr_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/block_data_interface.log: api/block_data_interface$(EXEEXT) @p='api/block_data_interface$(EXEEXT)'; \ b='api/block_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/coo_data_interface.log: api/coo_data_interface$(EXEEXT) @p='api/coo_data_interface$(EXEEXT)'; \ b='api/coo_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/csr_data_interface.log: api/csr_data_interface$(EXEEXT) @p='api/csr_data_interface$(EXEEXT)'; \ b='api/csr_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/matrix_data_interface.log: api/matrix_data_interface$(EXEEXT) @p='api/matrix_data_interface$(EXEEXT)'; \ b='api/matrix_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/multiformat_data_interface.log: api/multiformat_data_interface$(EXEEXT) @p='api/multiformat_data_interface$(EXEEXT)'; \ b='api/multiformat_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/tensor_data_interface.log: api/tensor_data_interface$(EXEEXT) @p='api/tensor_data_interface$(EXEEXT)'; \ b='api/tensor_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/variable_data_interface.log: api/variable_data_interface$(EXEEXT) @p='api/variable_data_interface$(EXEEXT)'; \ b='api/variable_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/vector_data_interface.log: api/vector_data_interface$(EXEEXT) @p='api/vector_data_interface$(EXEEXT)'; \ b='api/vector_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) api/void_data_interface.log: api/void_data_interface$(EXEEXT) @p='api/void_data_interface$(EXEEXT)'; \ b='api/void_data_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/hello_world.log: basic_examples/hello_world$(EXEEXT) @p='basic_examples/hello_world$(EXEEXT)'; \ b='basic_examples/hello_world'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/hooks.log: basic_examples/hooks$(EXEEXT) @p='basic_examples/hooks$(EXEEXT)'; \ b='basic_examples/hooks'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/topology.log: basic_examples/topology$(EXEEXT) @p='basic_examples/topology$(EXEEXT)'; \ b='basic_examples/topology'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/vector_scal.log: basic_examples/vector_scal$(EXEEXT) @p='basic_examples/vector_scal$(EXEEXT)'; \ b='basic_examples/vector_scal'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/mult.log: basic_examples/mult$(EXEEXT) @p='basic_examples/mult$(EXEEXT)'; \ b='basic_examples/mult'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/block.log: basic_examples/block$(EXEEXT) @p='basic_examples/block$(EXEEXT)'; \ b='basic_examples/block'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/variable.log: basic_examples/variable$(EXEEXT) @p='basic_examples/variable$(EXEEXT)'; \ b='basic_examples/variable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/multiformat.log: basic_examples/multiformat$(EXEEXT) @p='basic_examples/multiformat$(EXEEXT)'; \ b='basic_examples/multiformat'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/dynamic_handles.log: basic_examples/dynamic_handles$(EXEEXT) @p='basic_examples/dynamic_handles$(EXEEXT)'; \ b='basic_examples/dynamic_handles'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/task_insert_color.log: basic_examples/task_insert_color$(EXEEXT) @p='basic_examples/task_insert_color$(EXEEXT)'; \ b='basic_examples/task_insert_color'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/ndim.log: basic_examples/ndim$(EXEEXT) @p='basic_examples/ndim$(EXEEXT)'; \ b='basic_examples/ndim'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mlr/mlr.log: mlr/mlr$(EXEEXT) @p='mlr/mlr$(EXEEXT)'; \ b='mlr/mlr'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cpp/incrementer_cpp.log: cpp/incrementer_cpp$(EXEEXT) @p='cpp/incrementer_cpp$(EXEEXT)'; \ b='cpp/incrementer_cpp'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cpp/add_vectors.log: cpp/add_vectors$(EXEEXT) @p='cpp/add_vectors$(EXEEXT)'; \ b='cpp/add_vectors'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cpp/add_vectors_interface.log: cpp/add_vectors_interface$(EXEEXT) @p='cpp/add_vectors_interface$(EXEEXT)'; \ b='cpp/add_vectors_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/alloc.log: filters/alloc$(EXEEXT) @p='filters/alloc$(EXEEXT)'; \ b='filters/alloc'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fread.log: filters/fread$(EXEEXT) @p='filters/fread$(EXEEXT)'; \ b='filters/fread'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fvector.log: filters/fvector$(EXEEXT) @p='filters/fvector$(EXEEXT)'; \ b='filters/fvector'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fvector_pick_variable.log: filters/fvector_pick_variable$(EXEEXT) @p='filters/fvector_pick_variable$(EXEEXT)'; \ b='filters/fvector_pick_variable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/ftensor.log: filters/ftensor$(EXEEXT) @p='filters/ftensor$(EXEEXT)'; \ b='filters/ftensor'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/ftensor_pick_block.log: filters/ftensor_pick_block$(EXEEXT) @p='filters/ftensor_pick_block$(EXEEXT)'; \ b='filters/ftensor_pick_block'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/ftensor_pick_variable.log: filters/ftensor_pick_variable$(EXEEXT) @p='filters/ftensor_pick_variable$(EXEEXT)'; \ b='filters/ftensor_pick_variable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fblock.log: filters/fblock$(EXEEXT) @p='filters/fblock$(EXEEXT)'; \ b='filters/fblock'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fblock_pick_matrix.log: filters/fblock_pick_matrix$(EXEEXT) @p='filters/fblock_pick_matrix$(EXEEXT)'; \ b='filters/fblock_pick_matrix'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fblock_pick_variable.log: filters/fblock_pick_variable$(EXEEXT) @p='filters/fblock_pick_variable$(EXEEXT)'; \ b='filters/fblock_pick_variable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fmatrix.log: filters/fmatrix$(EXEEXT) @p='filters/fmatrix$(EXEEXT)'; \ b='filters/fmatrix'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fmatrix_pick_vector.log: filters/fmatrix_pick_vector$(EXEEXT) @p='filters/fmatrix_pick_vector$(EXEEXT)'; \ b='filters/fmatrix_pick_vector'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fmatrix_pick_variable.log: filters/fmatrix_pick_variable$(EXEEXT) @p='filters/fmatrix_pick_variable$(EXEEXT)'; \ b='filters/fmatrix_pick_variable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim.log: filters/fndim$(EXEEXT) @p='filters/fndim$(EXEEXT)'; \ b='filters/fndim'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_pick_ndim.log: filters/fndim_pick_ndim$(EXEEXT) @p='filters/fndim_pick_ndim$(EXEEXT)'; \ b='filters/fndim_pick_ndim'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_5d_pick_tensor.log: filters/fndim_5d_pick_tensor$(EXEEXT) @p='filters/fndim_5d_pick_tensor$(EXEEXT)'; \ b='filters/fndim_5d_pick_tensor'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_4d_pick_block.log: filters/fndim_4d_pick_block$(EXEEXT) @p='filters/fndim_4d_pick_block$(EXEEXT)'; \ b='filters/fndim_4d_pick_block'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_3d_pick_matrix.log: filters/fndim_3d_pick_matrix$(EXEEXT) @p='filters/fndim_3d_pick_matrix$(EXEEXT)'; \ b='filters/fndim_3d_pick_matrix'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_2d_pick_vector.log: filters/fndim_2d_pick_vector$(EXEEXT) @p='filters/fndim_2d_pick_vector$(EXEEXT)'; \ b='filters/fndim_2d_pick_vector'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_1d_pick_variable.log: filters/fndim_1d_pick_variable$(EXEEXT) @p='filters/fndim_1d_pick_variable$(EXEEXT)'; \ b='filters/fndim_1d_pick_variable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_pick_variable.log: filters/fndim_pick_variable$(EXEEXT) @p='filters/fndim_pick_variable$(EXEEXT)'; \ b='filters/fndim_pick_variable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_to_tensor.log: filters/fndim_to_tensor$(EXEEXT) @p='filters/fndim_to_tensor$(EXEEXT)'; \ b='filters/fndim_to_tensor'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_to_block.log: filters/fndim_to_block$(EXEEXT) @p='filters/fndim_to_block$(EXEEXT)'; \ b='filters/fndim_to_block'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_to_matrix.log: filters/fndim_to_matrix$(EXEEXT) @p='filters/fndim_to_matrix$(EXEEXT)'; \ b='filters/fndim_to_matrix'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_to_vector.log: filters/fndim_to_vector$(EXEEXT) @p='filters/fndim_to_vector$(EXEEXT)'; \ b='filters/fndim_to_vector'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fndim_to_variable.log: filters/fndim_to_variable$(EXEEXT) @p='filters/fndim_to_variable$(EXEEXT)'; \ b='filters/fndim_to_variable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fmultiple_manual.log: filters/fmultiple_manual$(EXEEXT) @p='filters/fmultiple_manual$(EXEEXT)'; \ b='filters/fmultiple_manual'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fmultiple_submit.log: filters/fmultiple_submit$(EXEEXT) @p='filters/fmultiple_submit$(EXEEXT)'; \ b='filters/fmultiple_submit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fmultiple_submit_readonly.log: filters/fmultiple_submit_readonly$(EXEEXT) @p='filters/fmultiple_submit_readonly$(EXEEXT)'; \ b='filters/fmultiple_submit_readonly'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fmultiple_submit_readonly_downgrade.log: filters/fmultiple_submit_readonly_downgrade$(EXEEXT) @p='filters/fmultiple_submit_readonly_downgrade$(EXEEXT)'; \ b='filters/fmultiple_submit_readonly_downgrade'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/fmultiple_submit_implicit.log: filters/fmultiple_submit_implicit$(EXEEXT) @p='filters/fmultiple_submit_implicit$(EXEEXT)'; \ b='filters/fmultiple_submit_implicit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/frecursive.log: filters/frecursive$(EXEEXT) @p='filters/frecursive$(EXEEXT)'; \ b='filters/frecursive'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/shadow.log: filters/shadow$(EXEEXT) @p='filters/shadow$(EXEEXT)'; \ b='filters/shadow'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/shadow2d.log: filters/shadow2d$(EXEEXT) @p='filters/shadow2d$(EXEEXT)'; \ b='filters/shadow2d'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/shadow3d.log: filters/shadow3d$(EXEEXT) @p='filters/shadow3d$(EXEEXT)'; \ b='filters/shadow3d'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/shadow4d.log: filters/shadow4d$(EXEEXT) @p='filters/shadow4d$(EXEEXT)'; \ b='filters/shadow4d'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/shadownd.log: filters/shadownd$(EXEEXT) @p='filters/shadownd$(EXEEXT)'; \ b='filters/shadownd'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tag_example/tag_example.log: tag_example/tag_example$(EXEEXT) @p='tag_example/tag_example$(EXEEXT)'; \ b='tag_example/tag_example'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tag_example/tag_example2.log: tag_example/tag_example2$(EXEEXT) @p='tag_example/tag_example2$(EXEEXT)'; \ b='tag_example/tag_example2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tag_example/tag_example3.log: tag_example/tag_example3$(EXEEXT) @p='tag_example/tag_example3$(EXEEXT)'; \ b='tag_example/tag_example3'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tag_example/tag_example4.log: tag_example/tag_example4$(EXEEXT) @p='tag_example/tag_example4$(EXEEXT)'; \ b='tag_example/tag_example4'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tag_example/tag_restartable.log: tag_example/tag_restartable$(EXEEXT) @p='tag_example/tag_restartable$(EXEEXT)'; \ b='tag_example/tag_restartable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) transactions/trs_inc.log: transactions/trs_inc$(EXEEXT) @p='transactions/trs_inc$(EXEEXT)'; \ b='transactions/trs_inc'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) spmd/vector_scal_spmd.log: spmd/vector_scal_spmd$(EXEEXT) @p='spmd/vector_scal_spmd$(EXEEXT)'; \ b='spmd/vector_scal_spmd'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) spmv/spmv.log: spmv/spmv$(EXEEXT) @p='spmv/spmv$(EXEEXT)'; \ b='spmv/spmv'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) callback/callback.log: callback/callback$(EXEEXT) @p='callback/callback$(EXEEXT)'; \ b='callback/callback'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) callback/prologue.log: callback/prologue$(EXEEXT) @p='callback/prologue$(EXEEXT)'; \ b='callback/prologue'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) incrementer/incrementer.log: incrementer/incrementer$(EXEEXT) @p='incrementer/incrementer$(EXEEXT)'; \ b='incrementer/incrementer'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) binary/binary.log: binary/binary$(EXEEXT) @p='binary/binary$(EXEEXT)'; \ b='binary/binary'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) interface/complex.log: interface/complex$(EXEEXT) @p='interface/complex$(EXEEXT)'; \ b='interface/complex'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) interface/complex_dev_handle/complex_dev_handle.log: interface/complex_dev_handle/complex_dev_handle$(EXEEXT) @p='interface/complex_dev_handle/complex_dev_handle$(EXEEXT)'; \ b='interface/complex_dev_handle/complex_dev_handle'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) matvecmult/matvecmult.log: matvecmult/matvecmult$(EXEEXT) @p='matvecmult/matvecmult$(EXEEXT)'; \ b='matvecmult/matvecmult'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) profiling/profiling.log: profiling/profiling$(EXEEXT) @p='profiling/profiling$(EXEEXT)'; \ b='profiling/profiling'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perf_monitoring/perf_counters_01.log: perf_monitoring/perf_counters_01$(EXEEXT) @p='perf_monitoring/perf_counters_01$(EXEEXT)'; \ b='perf_monitoring/perf_counters_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perf_monitoring/perf_counters_02.log: perf_monitoring/perf_counters_02$(EXEEXT) @p='perf_monitoring/perf_counters_02$(EXEEXT)'; \ b='perf_monitoring/perf_counters_02'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perf_steering/perf_knobs_01.log: perf_steering/perf_knobs_01$(EXEEXT) @p='perf_steering/perf_knobs_01$(EXEEXT)'; \ b='perf_steering/perf_knobs_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perf_steering/perf_knobs_02.log: perf_steering/perf_knobs_02$(EXEEXT) @p='perf_steering/perf_knobs_02$(EXEEXT)'; \ b='perf_steering/perf_knobs_02'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perf_steering/perf_knobs_03.log: perf_steering/perf_knobs_03$(EXEEXT) @p='perf_steering/perf_knobs_03$(EXEEXT)'; \ b='perf_steering/perf_knobs_03'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) scheduler/heteroprio_test.log: scheduler/heteroprio_test$(EXEEXT) @p='scheduler/heteroprio_test$(EXEEXT)'; \ b='scheduler/heteroprio_test'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/sched_ctx.log: sched_ctx/sched_ctx$(EXEEXT) @p='sched_ctx/sched_ctx$(EXEEXT)'; \ b='sched_ctx/sched_ctx'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/sched_ctx_empty.log: sched_ctx/sched_ctx_empty$(EXEEXT) @p='sched_ctx/sched_ctx_empty$(EXEEXT)'; \ b='sched_ctx/sched_ctx_empty'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/sched_ctx_remove.log: sched_ctx/sched_ctx_remove$(EXEEXT) @p='sched_ctx/sched_ctx_remove$(EXEEXT)'; \ b='sched_ctx/sched_ctx_remove'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/sched_ctx_delete.log: sched_ctx/sched_ctx_delete$(EXEEXT) @p='sched_ctx/sched_ctx_delete$(EXEEXT)'; \ b='sched_ctx/sched_ctx_delete'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/two_cpu_contexts.log: sched_ctx/two_cpu_contexts$(EXEEXT) @p='sched_ctx/two_cpu_contexts$(EXEEXT)'; \ b='sched_ctx/two_cpu_contexts'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/dummy_sched_with_ctx.log: sched_ctx/dummy_sched_with_ctx$(EXEEXT) @p='sched_ctx/dummy_sched_with_ctx$(EXEEXT)'; \ b='sched_ctx/dummy_sched_with_ctx'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) worker_collections/worker_tree_example.log: worker_collections/worker_tree_example$(EXEEXT) @p='worker_collections/worker_tree_example$(EXEEXT)'; \ b='worker_collections/worker_tree_example'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) reductions/dot_product.log: reductions/dot_product$(EXEEXT) @p='reductions/dot_product$(EXEEXT)'; \ b='reductions/dot_product'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) reductions/minmax_reduction.log: reductions/minmax_reduction$(EXEEXT) @p='reductions/minmax_reduction$(EXEEXT)'; \ b='reductions/minmax_reduction'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) dependency/task_end_dep.log: dependency/task_end_dep$(EXEEXT) @p='dependency/task_end_dep$(EXEEXT)'; \ b='dependency/task_end_dep'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) dependency/task_end_dep_add.log: dependency/task_end_dep_add$(EXEEXT) @p='dependency/task_end_dep_add$(EXEEXT)'; \ b='dependency/task_end_dep_add'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) dependency/sequential_consistency.log: dependency/sequential_consistency$(EXEEXT) @p='dependency/sequential_consistency$(EXEEXT)'; \ b='dependency/sequential_consistency'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) subgraphs/manual.log: subgraphs/manual$(EXEEXT) @p='subgraphs/manual$(EXEEXT)'; \ b='subgraphs/manual'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) subgraphs/partition.log: subgraphs/partition$(EXEEXT) @p='subgraphs/partition$(EXEEXT)'; \ b='subgraphs/partition'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) subgraphs/plan.log: subgraphs/plan$(EXEEXT) @p='subgraphs/plan$(EXEEXT)'; \ b='subgraphs/plan'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cpp/add_vectors_cpp11.log: cpp/add_vectors_cpp11$(EXEEXT) @p='cpp/add_vectors_cpp11$(EXEEXT)'; \ b='cpp/add_vectors_cpp11'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) fortran/hello.log: fortran/hello$(EXEEXT) @p='fortran/hello$(EXEEXT)'; \ b='fortran/hello'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basic_examples/vector_scal_fortran.log: basic_examples/vector_scal_fortran$(EXEEXT) @p='basic_examples/vector_scal_fortran$(EXEEXT)'; \ b='basic_examples/vector_scal_fortran'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) fortran90/f90_example.log: fortran90/f90_example$(EXEEXT) @p='fortran90/f90_example$(EXEEXT)'; \ b='fortran90/f90_example'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_vector.log: native_fortran/nf_vector$(EXEEXT) @p='native_fortran/nf_vector$(EXEEXT)'; \ b='native_fortran/nf_vector'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_matrix.log: native_fortran/nf_matrix$(EXEEXT) @p='native_fortran/nf_matrix$(EXEEXT)'; \ b='native_fortran/nf_matrix'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_example.log: native_fortran/nf_example$(EXEEXT) @p='native_fortran/nf_example$(EXEEXT)'; \ b='native_fortran/nf_example'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_dynbuf.log: native_fortran/nf_dynbuf$(EXEEXT) @p='native_fortran/nf_dynbuf$(EXEEXT)'; \ b='native_fortran/nf_dynbuf'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_varbuf.log: native_fortran/nf_varbuf$(EXEEXT) @p='native_fortran/nf_varbuf$(EXEEXT)'; \ b='native_fortran/nf_varbuf'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_sched_ctx.log: native_fortran/nf_sched_ctx$(EXEEXT) @p='native_fortran/nf_sched_ctx$(EXEEXT)'; \ b='native_fortran/nf_sched_ctx'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_partition.log: native_fortran/nf_partition$(EXEEXT) @p='native_fortran/nf_partition$(EXEEXT)'; \ b='native_fortran/nf_partition'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mult/sgemm.log: mult/sgemm$(EXEEXT) @p='mult/sgemm$(EXEEXT)'; \ b='mult/sgemm'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mult/dgemm.log: mult/dgemm$(EXEEXT) @p='mult/dgemm$(EXEEXT)'; \ b='mult/dgemm'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) lu/lu_example_float.log: lu/lu_example_float$(EXEEXT) @p='lu/lu_example_float$(EXEEXT)'; \ b='lu/lu_example_float'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) lu/lu_example_double.log: lu/lu_example_double$(EXEEXT) @p='lu/lu_example_double$(EXEEXT)'; \ b='lu/lu_example_double'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) lu/lu_implicit_example_float.log: lu/lu_implicit_example_float$(EXEEXT) @p='lu/lu_implicit_example_float$(EXEEXT)'; \ b='lu/lu_implicit_example_float'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) lu/lu_implicit_example_double.log: lu/lu_implicit_example_double$(EXEEXT) @p='lu/lu_implicit_example_double$(EXEEXT)'; \ b='lu/lu_implicit_example_double'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cholesky/cholesky_tag.log: cholesky/cholesky_tag$(EXEEXT) @p='cholesky/cholesky_tag$(EXEEXT)'; \ b='cholesky/cholesky_tag'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cholesky/cholesky_tile_tag.log: cholesky/cholesky_tile_tag$(EXEEXT) @p='cholesky/cholesky_tile_tag$(EXEEXT)'; \ b='cholesky/cholesky_tile_tag'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cholesky/cholesky_implicit.log: cholesky/cholesky_implicit$(EXEEXT) @p='cholesky/cholesky_implicit$(EXEEXT)'; \ b='cholesky/cholesky_implicit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cholesky/cholesky_compil.log: cholesky/cholesky_compil$(EXEEXT) @p='cholesky/cholesky_compil$(EXEEXT)'; \ b='cholesky/cholesky_compil'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) axpy/axpy.log: axpy/axpy$(EXEEXT) @p='axpy/axpy$(EXEEXT)'; \ b='axpy/axpy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cholesky/cholesky_grain_tag.log: cholesky/cholesky_grain_tag$(EXEEXT) @p='cholesky/cholesky_grain_tag$(EXEEXT)'; \ b='cholesky/cholesky_grain_tag'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) heat/heat.log: heat/heat$(EXEEXT) @p='heat/heat$(EXEEXT)'; \ b='heat/heat'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cg/cg.log: cg/cg$(EXEEXT) @p='cg/cg$(EXEEXT)'; \ b='cg/cg'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) pipeline/pipeline.log: pipeline/pipeline$(EXEEXT) @p='pipeline/pipeline$(EXEEXT)'; \ b='pipeline/pipeline'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) transactions/trs_sgemm.log: transactions/trs_sgemm$(EXEEXT) @p='transactions/trs_sgemm$(EXEEXT)'; \ b='transactions/trs_sgemm'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) lu/lu_example_complex_float.log: lu/lu_example_complex_float$(EXEEXT) @p='lu/lu_example_complex_float$(EXEEXT)'; \ b='lu/lu_example_complex_float'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) lu/lu_example_complex_double.log: lu/lu_example_complex_double$(EXEEXT) @p='lu/lu_example_complex_double$(EXEEXT)'; \ b='lu/lu_example_complex_double'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) lu/lu_implicit_example_complex_float.log: lu/lu_implicit_example_complex_float$(EXEEXT) @p='lu/lu_implicit_example_complex_float$(EXEEXT)'; \ b='lu/lu_implicit_example_complex_float'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) lu/lu_implicit_example_complex_double.log: lu/lu_implicit_example_complex_double$(EXEEXT) @p='lu/lu_implicit_example_complex_double$(EXEEXT)'; \ b='lu/lu_implicit_example_complex_double'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) spmv/dw_block_spmv.log: spmv/dw_block_spmv$(EXEEXT) @p='spmv/dw_block_spmv$(EXEEXT)'; \ b='spmv/dw_block_spmv'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/vector_scal_omp.log: openmp/vector_scal_omp$(EXEEXT) @p='openmp/vector_scal_omp$(EXEEXT)'; \ b='openmp/vector_scal_omp'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/sched_ctx_without_sched_policy.log: sched_ctx/sched_ctx_without_sched_policy$(EXEEXT) @p='sched_ctx/sched_ctx_without_sched_policy$(EXEEXT)'; \ b='sched_ctx/sched_ctx_without_sched_policy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/nested_sched_ctxs.log: sched_ctx/nested_sched_ctxs$(EXEEXT) @p='sched_ctx/nested_sched_ctxs$(EXEEXT)'; \ b='sched_ctx/nested_sched_ctxs'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/sched_ctx_without_sched_policy_awake.log: sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT) @p='sched_ctx/sched_ctx_without_sched_policy_awake$(EXEEXT)'; \ b='sched_ctx/sched_ctx_without_sched_policy_awake'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/parallel_tasks_reuse_handle.log: sched_ctx/parallel_tasks_reuse_handle$(EXEEXT) @p='sched_ctx/parallel_tasks_reuse_handle$(EXEEXT)'; \ b='sched_ctx/parallel_tasks_reuse_handle'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/parallel_code.log: sched_ctx/parallel_code$(EXEEXT) @p='sched_ctx/parallel_code$(EXEEXT)'; \ b='sched_ctx/parallel_code'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_workers/parallel_workers.log: parallel_workers/parallel_workers$(EXEEXT) @p='parallel_workers/parallel_workers$(EXEEXT)'; \ b='parallel_workers/parallel_workers'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_workers/parallel_workers_func.log: parallel_workers/parallel_workers_func$(EXEEXT) @p='parallel_workers/parallel_workers_func$(EXEEXT)'; \ b='parallel_workers/parallel_workers_func'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_workers/parallel_workers_oldapi.log: parallel_workers/parallel_workers_oldapi$(EXEEXT) @p='parallel_workers/parallel_workers_oldapi$(EXEEXT)'; \ b='parallel_workers/parallel_workers_oldapi'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/gpu_partition.log: sched_ctx/gpu_partition$(EXEEXT) @p='sched_ctx/gpu_partition$(EXEEXT)'; \ b='sched_ctx/gpu_partition'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-recursive all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) $(DATA) $(HEADERS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(examplebindir)" "$(DESTDIR)$(pkglibdir)" "$(DESTDIR)$(STARPU_OPENCL_DATAdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-recursive install-exec: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f api/$(DEPDIR)/$(am__dirstamp) -rm -f api/$(am__dirstamp) -rm -f axpy/$(DEPDIR)/$(am__dirstamp) -rm -f axpy/$(am__dirstamp) -rm -f basic_examples/$(DEPDIR)/$(am__dirstamp) -rm -f basic_examples/$(am__dirstamp) -rm -f binary/$(DEPDIR)/$(am__dirstamp) -rm -f binary/$(am__dirstamp) -rm -f callback/$(DEPDIR)/$(am__dirstamp) -rm -f callback/$(am__dirstamp) -rm -f cg/$(DEPDIR)/$(am__dirstamp) -rm -f cg/$(am__dirstamp) -rm -f cholesky/$(DEPDIR)/$(am__dirstamp) -rm -f cholesky/$(am__dirstamp) -rm -f common/$(DEPDIR)/$(am__dirstamp) -rm -f common/$(am__dirstamp) -rm -f cpp/$(DEPDIR)/$(am__dirstamp) -rm -f cpp/$(am__dirstamp) -rm -f dependency/$(DEPDIR)/$(am__dirstamp) -rm -f dependency/$(am__dirstamp) -rm -f filters/$(DEPDIR)/$(am__dirstamp) -rm -f filters/$(am__dirstamp) -rm -f filters/custom_mf/$(DEPDIR)/$(am__dirstamp) -rm -f filters/custom_mf/$(am__dirstamp) -rm -f fortran/$(DEPDIR)/$(am__dirstamp) -rm -f fortran/$(am__dirstamp) -rm -f fortran90/$(DEPDIR)/$(am__dirstamp) -rm -f fortran90/$(am__dirstamp) -rm -f gl_interop/$(DEPDIR)/$(am__dirstamp) -rm -f gl_interop/$(am__dirstamp) -rm -f heat/$(DEPDIR)/$(am__dirstamp) -rm -f heat/$(am__dirstamp) -rm -f incrementer/$(DEPDIR)/$(am__dirstamp) -rm -f incrementer/$(am__dirstamp) -rm -f interface/$(DEPDIR)/$(am__dirstamp) -rm -f interface/$(am__dirstamp) -rm -f interface/complex_dev_handle/$(DEPDIR)/$(am__dirstamp) -rm -f interface/complex_dev_handle/$(am__dirstamp) -rm -f lu/$(DEPDIR)/$(am__dirstamp) -rm -f lu/$(am__dirstamp) -rm -f mandelbrot/$(DEPDIR)/$(am__dirstamp) -rm -f mandelbrot/$(am__dirstamp) -rm -f matvecmult/$(DEPDIR)/$(am__dirstamp) -rm -f matvecmult/$(am__dirstamp) -rm -f mlr/$(DEPDIR)/$(am__dirstamp) -rm -f mlr/$(am__dirstamp) -rm -f mult/$(DEPDIR)/$(am__dirstamp) -rm -f mult/$(am__dirstamp) -rm -f native_fortran/$(DEPDIR)/$(am__dirstamp) -rm -f native_fortran/$(am__dirstamp) -rm -f openmp/$(DEPDIR)/$(am__dirstamp) -rm -f openmp/$(am__dirstamp) -rm -f parallel_workers/$(DEPDIR)/$(am__dirstamp) -rm -f parallel_workers/$(am__dirstamp) -rm -f perf_monitoring/$(DEPDIR)/$(am__dirstamp) -rm -f perf_monitoring/$(am__dirstamp) -rm -f perf_steering/$(DEPDIR)/$(am__dirstamp) -rm -f perf_steering/$(am__dirstamp) -rm -f pi/$(DEPDIR)/$(am__dirstamp) -rm -f pi/$(am__dirstamp) -rm -f pi/SobolQRNG/$(DEPDIR)/$(am__dirstamp) -rm -f pi/SobolQRNG/$(am__dirstamp) -rm -f pipeline/$(DEPDIR)/$(am__dirstamp) -rm -f pipeline/$(am__dirstamp) -rm -f ppm_downscaler/$(DEPDIR)/$(am__dirstamp) -rm -f ppm_downscaler/$(am__dirstamp) -rm -f profiling/$(DEPDIR)/$(am__dirstamp) -rm -f profiling/$(am__dirstamp) -rm -f profiling_tool/$(DEPDIR)/$(am__dirstamp) -rm -f profiling_tool/$(am__dirstamp) -rm -f reductions/$(DEPDIR)/$(am__dirstamp) -rm -f reductions/$(am__dirstamp) -rm -f sched_ctx/$(DEPDIR)/$(am__dirstamp) -rm -f sched_ctx/$(am__dirstamp) -rm -f sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) -rm -f sched_ctx_utils/$(am__dirstamp) -rm -f scheduler/$(DEPDIR)/$(am__dirstamp) -rm -f scheduler/$(am__dirstamp) -rm -f spmd/$(DEPDIR)/$(am__dirstamp) -rm -f spmd/$(am__dirstamp) -rm -f spmv/$(DEPDIR)/$(am__dirstamp) -rm -f spmv/$(am__dirstamp) -rm -f spmv/matrix_market/$(DEPDIR)/$(am__dirstamp) -rm -f spmv/matrix_market/$(am__dirstamp) -rm -f subgraphs/$(DEPDIR)/$(am__dirstamp) -rm -f subgraphs/$(am__dirstamp) -rm -f tag_example/$(DEPDIR)/$(am__dirstamp) -rm -f tag_example/$(am__dirstamp) -rm -f transactions/$(DEPDIR)/$(am__dirstamp) -rm -f transactions/$(am__dirstamp) -rm -f worker_collections/$(DEPDIR)/$(am__dirstamp) -rm -f worker_collections/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) clean: clean-recursive clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-local clean-noinstPROGRAMS \ clean-pkglibLTLIBRARIES mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f api/$(DEPDIR)/bcsr_data_interface.Po -rm -f api/$(DEPDIR)/block_data_interface.Po -rm -f api/$(DEPDIR)/coo_data_interface.Po -rm -f api/$(DEPDIR)/csr_data_interface.Po -rm -f api/$(DEPDIR)/matrix_data_interface.Po -rm -f api/$(DEPDIR)/multiformat_data_interface.Po -rm -f api/$(DEPDIR)/tensor_data_interface.Po -rm -f api/$(DEPDIR)/variable_data_interface.Po -rm -f api/$(DEPDIR)/vector_data_interface.Po -rm -f api/$(DEPDIR)/void_data_interface.Po -rm -f axpy/$(DEPDIR)/axpy.Po -rm -f axpy/$(DEPDIR)/axpy_opencl.Po -rm -f basic_examples/$(DEPDIR)/block.Po -rm -f basic_examples/$(DEPDIR)/block_cpu.Po -rm -f basic_examples/$(DEPDIR)/block_opencl.Po -rm -f basic_examples/$(DEPDIR)/dynamic_handles.Po -rm -f basic_examples/$(DEPDIR)/hello_world.Po -rm -f basic_examples/$(DEPDIR)/hooks.Po -rm -f basic_examples/$(DEPDIR)/mult.Po -rm -f basic_examples/$(DEPDIR)/multiformat.Po -rm -f basic_examples/$(DEPDIR)/multiformat_conversion_codelets.Po -rm -f basic_examples/$(DEPDIR)/multiformat_conversion_codelets_opencl.Po -rm -f basic_examples/$(DEPDIR)/multiformat_opencl.Po -rm -f basic_examples/$(DEPDIR)/ndim.Po -rm -f basic_examples/$(DEPDIR)/task_insert_color.Po -rm -f basic_examples/$(DEPDIR)/topology.Po -rm -f basic_examples/$(DEPDIR)/variable.Po -rm -f basic_examples/$(DEPDIR)/variable_kernels_cpu.Po -rm -f basic_examples/$(DEPDIR)/variable_kernels_opencl.Po -rm -f basic_examples/$(DEPDIR)/vector_scal.Po -rm -f basic_examples/$(DEPDIR)/vector_scal_c.Po -rm -f basic_examples/$(DEPDIR)/vector_scal_cpu.Po -rm -f basic_examples/$(DEPDIR)/vector_scal_opencl.Po -rm -f binary/$(DEPDIR)/binary.Po -rm -f callback/$(DEPDIR)/callback.Po -rm -f callback/$(DEPDIR)/prologue.Po -rm -f cg/$(DEPDIR)/cg.Po -rm -f cholesky/$(DEPDIR)/cholesky_compil.Po -rm -f cholesky/$(DEPDIR)/cholesky_grain_tag.Po -rm -f cholesky/$(DEPDIR)/cholesky_implicit.Po -rm -f cholesky/$(DEPDIR)/cholesky_kernels.Po -rm -f cholesky/$(DEPDIR)/cholesky_models.Po -rm -f cholesky/$(DEPDIR)/cholesky_tag.Po -rm -f cholesky/$(DEPDIR)/cholesky_tile_tag.Po -rm -f cholesky/$(DEPDIR)/libmy_dmda.Plo -rm -f common/$(DEPDIR)/blas.Po -rm -f cpp/$(DEPDIR)/add_vectors.Po -rm -f cpp/$(DEPDIR)/add_vectors_cpp11.Po -rm -f cpp/$(DEPDIR)/add_vectors_interface.Po -rm -f cpp/$(DEPDIR)/incrementer_cpp.Po -rm -f dependency/$(DEPDIR)/sequential_consistency.Po -rm -f dependency/$(DEPDIR)/task_end_dep.Po -rm -f dependency/$(DEPDIR)/task_end_dep_add.Po -rm -f filters/$(DEPDIR)/alloc.Po -rm -f filters/$(DEPDIR)/f3d_cpu.Po -rm -f filters/$(DEPDIR)/f4d_cpu.Po -rm -f filters/$(DEPDIR)/f5d_print.Po -rm -f filters/$(DEPDIR)/fblock.Po -rm -f filters/$(DEPDIR)/fblock_cpu.Po -rm -f filters/$(DEPDIR)/fblock_opencl.Po -rm -f filters/$(DEPDIR)/fblock_pick_matrix.Po -rm -f filters/$(DEPDIR)/fblock_pick_variable.Po -rm -f filters/$(DEPDIR)/fblock_print.Po -rm -f filters/$(DEPDIR)/fmatrix.Po -rm -f filters/$(DEPDIR)/fmatrix_cpu.Po -rm -f filters/$(DEPDIR)/fmatrix_pick_variable.Po -rm -f filters/$(DEPDIR)/fmatrix_pick_vector.Po -rm -f filters/$(DEPDIR)/fmatrix_print.Po -rm -f filters/$(DEPDIR)/fmultiple_manual.Po -rm -f filters/$(DEPDIR)/fmultiple_submit.Po -rm -f filters/$(DEPDIR)/fmultiple_submit_implicit.Po -rm -f filters/$(DEPDIR)/fmultiple_submit_readonly.Po -rm -f filters/$(DEPDIR)/fmultiple_submit_readonly_downgrade.Po -rm -f filters/$(DEPDIR)/fndim.Po -rm -f filters/$(DEPDIR)/fndim_1d_pick_variable.Po -rm -f filters/$(DEPDIR)/fndim_2d_pick_vector.Po -rm -f filters/$(DEPDIR)/fndim_3d_pick_matrix.Po -rm -f filters/$(DEPDIR)/fndim_4d_pick_block.Po -rm -f filters/$(DEPDIR)/fndim_5d_pick_tensor.Po -rm -f filters/$(DEPDIR)/fndim_pick_ndim.Po -rm -f filters/$(DEPDIR)/fndim_pick_variable.Po -rm -f filters/$(DEPDIR)/fndim_to_block.Po -rm -f filters/$(DEPDIR)/fndim_to_matrix.Po -rm -f filters/$(DEPDIR)/fndim_to_tensor.Po -rm -f filters/$(DEPDIR)/fndim_to_variable.Po -rm -f filters/$(DEPDIR)/fndim_to_vector.Po -rm -f filters/$(DEPDIR)/fread.Po -rm -f filters/$(DEPDIR)/frecursive.Po -rm -f filters/$(DEPDIR)/ftensor.Po -rm -f filters/$(DEPDIR)/ftensor_cpu.Po -rm -f filters/$(DEPDIR)/ftensor_pick_block.Po -rm -f filters/$(DEPDIR)/ftensor_pick_variable.Po -rm -f filters/$(DEPDIR)/ftensor_print.Po -rm -f filters/$(DEPDIR)/fvector.Po -rm -f filters/$(DEPDIR)/fvector_cpu.Po -rm -f filters/$(DEPDIR)/fvector_pick_variable.Po -rm -f filters/$(DEPDIR)/shadow.Po -rm -f filters/$(DEPDIR)/shadow2d.Po -rm -f filters/$(DEPDIR)/shadow3d.Po -rm -f filters/$(DEPDIR)/shadow4d.Po -rm -f filters/$(DEPDIR)/shadownd.Po -rm -f filters/custom_mf/$(DEPDIR)/conversion_opencl.Po -rm -f filters/custom_mf/$(DEPDIR)/custom_conversion_codelets.Po -rm -f filters/custom_mf/$(DEPDIR)/custom_interface.Po -rm -f filters/custom_mf/$(DEPDIR)/custom_mf_filter.Po -rm -f filters/custom_mf/$(DEPDIR)/custom_opencl.Po -rm -f fortran/$(DEPDIR)/hello_c.Po -rm -f fortran90/$(DEPDIR)/marshalling.Po -rm -f gl_interop/$(DEPDIR)/gl_interop.Po -rm -f gl_interop/$(DEPDIR)/gl_interop_idle.Po -rm -f heat/$(DEPDIR)/dw_factolu.Po -rm -f heat/$(DEPDIR)/dw_factolu_grain.Po -rm -f heat/$(DEPDIR)/dw_factolu_kernels.Po -rm -f heat/$(DEPDIR)/dw_factolu_tag.Po -rm -f heat/$(DEPDIR)/dw_sparse_cg.Po -rm -f heat/$(DEPDIR)/dw_sparse_cg_kernels.Po -rm -f heat/$(DEPDIR)/heat.Po -rm -f heat/$(DEPDIR)/heat_display.Po -rm -f heat/$(DEPDIR)/lu_kernels_model.Po -rm -f incrementer/$(DEPDIR)/incrementer.Po -rm -f incrementer/$(DEPDIR)/incrementer_kernels_opencl.Po -rm -f interface/$(DEPDIR)/complex.Po -rm -f interface/$(DEPDIR)/complex_filters.Po -rm -f interface/$(DEPDIR)/complex_interface.Po -rm -f interface/$(DEPDIR)/complex_kernels_opencl.Po -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle.Po -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_filters.Po -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_interface.Po -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_kernels_opencl.Po -rm -f lu/$(DEPDIR)/blas_complex.Po -rm -f lu/$(DEPDIR)/clu.Po -rm -f lu/$(DEPDIR)/clu_implicit.Po -rm -f lu/$(DEPDIR)/clu_implicit_pivot.Po -rm -f lu/$(DEPDIR)/clu_kernels.Po -rm -f lu/$(DEPDIR)/clu_pivot.Po -rm -f lu/$(DEPDIR)/dlu.Po -rm -f lu/$(DEPDIR)/dlu_implicit.Po -rm -f lu/$(DEPDIR)/dlu_implicit_pivot.Po -rm -f lu/$(DEPDIR)/dlu_kernels.Po -rm -f lu/$(DEPDIR)/dlu_pivot.Po -rm -f lu/$(DEPDIR)/lu_example_complex_double.Po -rm -f lu/$(DEPDIR)/lu_example_complex_float.Po -rm -f lu/$(DEPDIR)/lu_example_double.Po -rm -f lu/$(DEPDIR)/lu_example_float.Po -rm -f lu/$(DEPDIR)/slu.Po -rm -f lu/$(DEPDIR)/slu_implicit.Po -rm -f lu/$(DEPDIR)/slu_implicit_pivot.Po -rm -f lu/$(DEPDIR)/slu_kernels.Po -rm -f lu/$(DEPDIR)/slu_pivot.Po -rm -f lu/$(DEPDIR)/zlu.Po -rm -f lu/$(DEPDIR)/zlu_implicit.Po -rm -f lu/$(DEPDIR)/zlu_implicit_pivot.Po -rm -f lu/$(DEPDIR)/zlu_kernels.Po -rm -f lu/$(DEPDIR)/zlu_pivot.Po -rm -f mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po -rm -f matvecmult/$(DEPDIR)/matvecmult.Po -rm -f mlr/$(DEPDIR)/mlr.Po -rm -f mult/$(DEPDIR)/dgemm.Po -rm -f mult/$(DEPDIR)/dgemm_layout.Po -rm -f mult/$(DEPDIR)/sgemm.Po -rm -f mult/$(DEPDIR)/sgemm_layout.Po -rm -f openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po -rm -f parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po -rm -f parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po -rm -f parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po -rm -f perf_monitoring/$(DEPDIR)/perf_counters_01.Po -rm -f perf_monitoring/$(DEPDIR)/perf_counters_02.Po -rm -f perf_steering/$(DEPDIR)/perf_knobs_01.Po -rm -f perf_steering/$(DEPDIR)/perf_knobs_02.Po -rm -f perf_steering/$(DEPDIR)/perf_knobs_03.Po -rm -f pi/$(DEPDIR)/pi.Po -rm -f pi/$(DEPDIR)/pi_redux.Po -rm -f pi/SobolQRNG/$(DEPDIR)/sobol_gold.Po -rm -f pi/SobolQRNG/$(DEPDIR)/sobol_primitives.Po -rm -f pipeline/$(DEPDIR)/pipeline.Po -rm -f ppm_downscaler/$(DEPDIR)/ppm_downscaler.Po -rm -f ppm_downscaler/$(DEPDIR)/yuv_downscaler.Po -rm -f profiling/$(DEPDIR)/profiling.Po -rm -f profiling_tool/$(DEPDIR)/libprofiling_tool.Plo -rm -f reductions/$(DEPDIR)/dot_product.Po -rm -f reductions/$(DEPDIR)/minmax_reduction.Po -rm -f sched_ctx/$(DEPDIR)/dummy_sched_with_ctx.Po -rm -f sched_ctx/$(DEPDIR)/gpu_partition.Po -rm -f sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po -rm -f sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po -rm -f sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po -rm -f sched_ctx/$(DEPDIR)/prio.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_delete.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_empty.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_remove.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy_awake.Po -rm -f sched_ctx/$(DEPDIR)/two_cpu_contexts.Po -rm -f sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po -rm -f scheduler/$(DEPDIR)/dummy_modular_sched.Po -rm -f scheduler/$(DEPDIR)/dummy_sched.Po -rm -f scheduler/$(DEPDIR)/heteroprio_test.Po -rm -f scheduler/$(DEPDIR)/libdummy_sched.Plo -rm -f spmd/$(DEPDIR)/vector_scal_spmd.Po -rm -f spmv/$(DEPDIR)/dw_block_spmv.Po -rm -f spmv/$(DEPDIR)/dw_block_spmv_kernels.Po -rm -f spmv/$(DEPDIR)/spmv.Po -rm -f spmv/$(DEPDIR)/spmv_kernels.Po -rm -f spmv/matrix_market/$(DEPDIR)/mm_to_bcsr.Po -rm -f spmv/matrix_market/$(DEPDIR)/mmio.Po -rm -f subgraphs/$(DEPDIR)/codelets.Po -rm -f subgraphs/$(DEPDIR)/manual.Po -rm -f subgraphs/$(DEPDIR)/partition.Po -rm -f subgraphs/$(DEPDIR)/plan.Po -rm -f tag_example/$(DEPDIR)/tag_example.Po -rm -f tag_example/$(DEPDIR)/tag_example2.Po -rm -f tag_example/$(DEPDIR)/tag_example3.Po -rm -f tag_example/$(DEPDIR)/tag_example4.Po -rm -f tag_example/$(DEPDIR)/tag_restartable.Po -rm -f transactions/$(DEPDIR)/trs_inc.Po -rm -f transactions/$(DEPDIR)/trs_sgemm.Po -rm -f worker_collections/$(DEPDIR)/worker_list_example.Po -rm -f worker_collections/$(DEPDIR)/worker_tree_example.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-examplebinPROGRAMS \ install-nobase_STARPU_OPENCL_DATADATA install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-pkglibLTLIBRARIES install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f api/$(DEPDIR)/bcsr_data_interface.Po -rm -f api/$(DEPDIR)/block_data_interface.Po -rm -f api/$(DEPDIR)/coo_data_interface.Po -rm -f api/$(DEPDIR)/csr_data_interface.Po -rm -f api/$(DEPDIR)/matrix_data_interface.Po -rm -f api/$(DEPDIR)/multiformat_data_interface.Po -rm -f api/$(DEPDIR)/tensor_data_interface.Po -rm -f api/$(DEPDIR)/variable_data_interface.Po -rm -f api/$(DEPDIR)/vector_data_interface.Po -rm -f api/$(DEPDIR)/void_data_interface.Po -rm -f axpy/$(DEPDIR)/axpy.Po -rm -f axpy/$(DEPDIR)/axpy_opencl.Po -rm -f basic_examples/$(DEPDIR)/block.Po -rm -f basic_examples/$(DEPDIR)/block_cpu.Po -rm -f basic_examples/$(DEPDIR)/block_opencl.Po -rm -f basic_examples/$(DEPDIR)/dynamic_handles.Po -rm -f basic_examples/$(DEPDIR)/hello_world.Po -rm -f basic_examples/$(DEPDIR)/hooks.Po -rm -f basic_examples/$(DEPDIR)/mult.Po -rm -f basic_examples/$(DEPDIR)/multiformat.Po -rm -f basic_examples/$(DEPDIR)/multiformat_conversion_codelets.Po -rm -f basic_examples/$(DEPDIR)/multiformat_conversion_codelets_opencl.Po -rm -f basic_examples/$(DEPDIR)/multiformat_opencl.Po -rm -f basic_examples/$(DEPDIR)/ndim.Po -rm -f basic_examples/$(DEPDIR)/task_insert_color.Po -rm -f basic_examples/$(DEPDIR)/topology.Po -rm -f basic_examples/$(DEPDIR)/variable.Po -rm -f basic_examples/$(DEPDIR)/variable_kernels_cpu.Po -rm -f basic_examples/$(DEPDIR)/variable_kernels_opencl.Po -rm -f basic_examples/$(DEPDIR)/vector_scal.Po -rm -f basic_examples/$(DEPDIR)/vector_scal_c.Po -rm -f basic_examples/$(DEPDIR)/vector_scal_cpu.Po -rm -f basic_examples/$(DEPDIR)/vector_scal_opencl.Po -rm -f binary/$(DEPDIR)/binary.Po -rm -f callback/$(DEPDIR)/callback.Po -rm -f callback/$(DEPDIR)/prologue.Po -rm -f cg/$(DEPDIR)/cg.Po -rm -f cholesky/$(DEPDIR)/cholesky_compil.Po -rm -f cholesky/$(DEPDIR)/cholesky_grain_tag.Po -rm -f cholesky/$(DEPDIR)/cholesky_implicit.Po -rm -f cholesky/$(DEPDIR)/cholesky_kernels.Po -rm -f cholesky/$(DEPDIR)/cholesky_models.Po -rm -f cholesky/$(DEPDIR)/cholesky_tag.Po -rm -f cholesky/$(DEPDIR)/cholesky_tile_tag.Po -rm -f cholesky/$(DEPDIR)/libmy_dmda.Plo -rm -f common/$(DEPDIR)/blas.Po -rm -f cpp/$(DEPDIR)/add_vectors.Po -rm -f cpp/$(DEPDIR)/add_vectors_cpp11.Po -rm -f cpp/$(DEPDIR)/add_vectors_interface.Po -rm -f cpp/$(DEPDIR)/incrementer_cpp.Po -rm -f dependency/$(DEPDIR)/sequential_consistency.Po -rm -f dependency/$(DEPDIR)/task_end_dep.Po -rm -f dependency/$(DEPDIR)/task_end_dep_add.Po -rm -f filters/$(DEPDIR)/alloc.Po -rm -f filters/$(DEPDIR)/f3d_cpu.Po -rm -f filters/$(DEPDIR)/f4d_cpu.Po -rm -f filters/$(DEPDIR)/f5d_print.Po -rm -f filters/$(DEPDIR)/fblock.Po -rm -f filters/$(DEPDIR)/fblock_cpu.Po -rm -f filters/$(DEPDIR)/fblock_opencl.Po -rm -f filters/$(DEPDIR)/fblock_pick_matrix.Po -rm -f filters/$(DEPDIR)/fblock_pick_variable.Po -rm -f filters/$(DEPDIR)/fblock_print.Po -rm -f filters/$(DEPDIR)/fmatrix.Po -rm -f filters/$(DEPDIR)/fmatrix_cpu.Po -rm -f filters/$(DEPDIR)/fmatrix_pick_variable.Po -rm -f filters/$(DEPDIR)/fmatrix_pick_vector.Po -rm -f filters/$(DEPDIR)/fmatrix_print.Po -rm -f filters/$(DEPDIR)/fmultiple_manual.Po -rm -f filters/$(DEPDIR)/fmultiple_submit.Po -rm -f filters/$(DEPDIR)/fmultiple_submit_implicit.Po -rm -f filters/$(DEPDIR)/fmultiple_submit_readonly.Po -rm -f filters/$(DEPDIR)/fmultiple_submit_readonly_downgrade.Po -rm -f filters/$(DEPDIR)/fndim.Po -rm -f filters/$(DEPDIR)/fndim_1d_pick_variable.Po -rm -f filters/$(DEPDIR)/fndim_2d_pick_vector.Po -rm -f filters/$(DEPDIR)/fndim_3d_pick_matrix.Po -rm -f filters/$(DEPDIR)/fndim_4d_pick_block.Po -rm -f filters/$(DEPDIR)/fndim_5d_pick_tensor.Po -rm -f filters/$(DEPDIR)/fndim_pick_ndim.Po -rm -f filters/$(DEPDIR)/fndim_pick_variable.Po -rm -f filters/$(DEPDIR)/fndim_to_block.Po -rm -f filters/$(DEPDIR)/fndim_to_matrix.Po -rm -f filters/$(DEPDIR)/fndim_to_tensor.Po -rm -f filters/$(DEPDIR)/fndim_to_variable.Po -rm -f filters/$(DEPDIR)/fndim_to_vector.Po -rm -f filters/$(DEPDIR)/fread.Po -rm -f filters/$(DEPDIR)/frecursive.Po -rm -f filters/$(DEPDIR)/ftensor.Po -rm -f filters/$(DEPDIR)/ftensor_cpu.Po -rm -f filters/$(DEPDIR)/ftensor_pick_block.Po -rm -f filters/$(DEPDIR)/ftensor_pick_variable.Po -rm -f filters/$(DEPDIR)/ftensor_print.Po -rm -f filters/$(DEPDIR)/fvector.Po -rm -f filters/$(DEPDIR)/fvector_cpu.Po -rm -f filters/$(DEPDIR)/fvector_pick_variable.Po -rm -f filters/$(DEPDIR)/shadow.Po -rm -f filters/$(DEPDIR)/shadow2d.Po -rm -f filters/$(DEPDIR)/shadow3d.Po -rm -f filters/$(DEPDIR)/shadow4d.Po -rm -f filters/$(DEPDIR)/shadownd.Po -rm -f filters/custom_mf/$(DEPDIR)/conversion_opencl.Po -rm -f filters/custom_mf/$(DEPDIR)/custom_conversion_codelets.Po -rm -f filters/custom_mf/$(DEPDIR)/custom_interface.Po -rm -f filters/custom_mf/$(DEPDIR)/custom_mf_filter.Po -rm -f filters/custom_mf/$(DEPDIR)/custom_opencl.Po -rm -f fortran/$(DEPDIR)/hello_c.Po -rm -f fortran90/$(DEPDIR)/marshalling.Po -rm -f gl_interop/$(DEPDIR)/gl_interop.Po -rm -f gl_interop/$(DEPDIR)/gl_interop_idle.Po -rm -f heat/$(DEPDIR)/dw_factolu.Po -rm -f heat/$(DEPDIR)/dw_factolu_grain.Po -rm -f heat/$(DEPDIR)/dw_factolu_kernels.Po -rm -f heat/$(DEPDIR)/dw_factolu_tag.Po -rm -f heat/$(DEPDIR)/dw_sparse_cg.Po -rm -f heat/$(DEPDIR)/dw_sparse_cg_kernels.Po -rm -f heat/$(DEPDIR)/heat.Po -rm -f heat/$(DEPDIR)/heat_display.Po -rm -f heat/$(DEPDIR)/lu_kernels_model.Po -rm -f incrementer/$(DEPDIR)/incrementer.Po -rm -f incrementer/$(DEPDIR)/incrementer_kernels_opencl.Po -rm -f interface/$(DEPDIR)/complex.Po -rm -f interface/$(DEPDIR)/complex_filters.Po -rm -f interface/$(DEPDIR)/complex_interface.Po -rm -f interface/$(DEPDIR)/complex_kernels_opencl.Po -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle.Po -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_filters.Po -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_interface.Po -rm -f interface/complex_dev_handle/$(DEPDIR)/complex_dev_handle_kernels_opencl.Po -rm -f lu/$(DEPDIR)/blas_complex.Po -rm -f lu/$(DEPDIR)/clu.Po -rm -f lu/$(DEPDIR)/clu_implicit.Po -rm -f lu/$(DEPDIR)/clu_implicit_pivot.Po -rm -f lu/$(DEPDIR)/clu_kernels.Po -rm -f lu/$(DEPDIR)/clu_pivot.Po -rm -f lu/$(DEPDIR)/dlu.Po -rm -f lu/$(DEPDIR)/dlu_implicit.Po -rm -f lu/$(DEPDIR)/dlu_implicit_pivot.Po -rm -f lu/$(DEPDIR)/dlu_kernels.Po -rm -f lu/$(DEPDIR)/dlu_pivot.Po -rm -f lu/$(DEPDIR)/lu_example_complex_double.Po -rm -f lu/$(DEPDIR)/lu_example_complex_float.Po -rm -f lu/$(DEPDIR)/lu_example_double.Po -rm -f lu/$(DEPDIR)/lu_example_float.Po -rm -f lu/$(DEPDIR)/slu.Po -rm -f lu/$(DEPDIR)/slu_implicit.Po -rm -f lu/$(DEPDIR)/slu_implicit_pivot.Po -rm -f lu/$(DEPDIR)/slu_kernels.Po -rm -f lu/$(DEPDIR)/slu_pivot.Po -rm -f lu/$(DEPDIR)/zlu.Po -rm -f lu/$(DEPDIR)/zlu_implicit.Po -rm -f lu/$(DEPDIR)/zlu_implicit_pivot.Po -rm -f lu/$(DEPDIR)/zlu_kernels.Po -rm -f lu/$(DEPDIR)/zlu_pivot.Po -rm -f mandelbrot/$(DEPDIR)/mandelbrot-mandelbrot.Po -rm -f matvecmult/$(DEPDIR)/matvecmult.Po -rm -f mlr/$(DEPDIR)/mlr.Po -rm -f mult/$(DEPDIR)/dgemm.Po -rm -f mult/$(DEPDIR)/dgemm_layout.Po -rm -f mult/$(DEPDIR)/sgemm.Po -rm -f mult/$(DEPDIR)/sgemm_layout.Po -rm -f openmp/$(DEPDIR)/vector_scal_omp-vector_scal_omp.Po -rm -f parallel_workers/$(DEPDIR)/parallel_workers-parallel_workers.Po -rm -f parallel_workers/$(DEPDIR)/parallel_workers_func-parallel_workers_func.Po -rm -f parallel_workers/$(DEPDIR)/parallel_workers_oldapi-parallel_workers_oldapi.Po -rm -f perf_monitoring/$(DEPDIR)/perf_counters_01.Po -rm -f perf_monitoring/$(DEPDIR)/perf_counters_02.Po -rm -f perf_steering/$(DEPDIR)/perf_knobs_01.Po -rm -f perf_steering/$(DEPDIR)/perf_knobs_02.Po -rm -f perf_steering/$(DEPDIR)/perf_knobs_03.Po -rm -f pi/$(DEPDIR)/pi.Po -rm -f pi/$(DEPDIR)/pi_redux.Po -rm -f pi/SobolQRNG/$(DEPDIR)/sobol_gold.Po -rm -f pi/SobolQRNG/$(DEPDIR)/sobol_primitives.Po -rm -f pipeline/$(DEPDIR)/pipeline.Po -rm -f ppm_downscaler/$(DEPDIR)/ppm_downscaler.Po -rm -f ppm_downscaler/$(DEPDIR)/yuv_downscaler.Po -rm -f profiling/$(DEPDIR)/profiling.Po -rm -f profiling_tool/$(DEPDIR)/libprofiling_tool.Plo -rm -f reductions/$(DEPDIR)/dot_product.Po -rm -f reductions/$(DEPDIR)/minmax_reduction.Po -rm -f sched_ctx/$(DEPDIR)/dummy_sched_with_ctx.Po -rm -f sched_ctx/$(DEPDIR)/gpu_partition.Po -rm -f sched_ctx/$(DEPDIR)/nested_sched_ctxs-nested_sched_ctxs.Po -rm -f sched_ctx/$(DEPDIR)/parallel_code-parallel_code.Po -rm -f sched_ctx/$(DEPDIR)/parallel_tasks_reuse_handle-parallel_tasks_reuse_handle.Po -rm -f sched_ctx/$(DEPDIR)/prio.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_delete.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_empty.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_remove.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy-sched_ctx_without_sched_policy.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_without_sched_policy_awake.Po -rm -f sched_ctx/$(DEPDIR)/two_cpu_contexts.Po -rm -f sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po -rm -f scheduler/$(DEPDIR)/dummy_modular_sched.Po -rm -f scheduler/$(DEPDIR)/dummy_sched.Po -rm -f scheduler/$(DEPDIR)/heteroprio_test.Po -rm -f scheduler/$(DEPDIR)/libdummy_sched.Plo -rm -f spmd/$(DEPDIR)/vector_scal_spmd.Po -rm -f spmv/$(DEPDIR)/dw_block_spmv.Po -rm -f spmv/$(DEPDIR)/dw_block_spmv_kernels.Po -rm -f spmv/$(DEPDIR)/spmv.Po -rm -f spmv/$(DEPDIR)/spmv_kernels.Po -rm -f spmv/matrix_market/$(DEPDIR)/mm_to_bcsr.Po -rm -f spmv/matrix_market/$(DEPDIR)/mmio.Po -rm -f subgraphs/$(DEPDIR)/codelets.Po -rm -f subgraphs/$(DEPDIR)/manual.Po -rm -f subgraphs/$(DEPDIR)/partition.Po -rm -f subgraphs/$(DEPDIR)/plan.Po -rm -f tag_example/$(DEPDIR)/tag_example.Po -rm -f tag_example/$(DEPDIR)/tag_example2.Po -rm -f tag_example/$(DEPDIR)/tag_example3.Po -rm -f tag_example/$(DEPDIR)/tag_example4.Po -rm -f tag_example/$(DEPDIR)/tag_restartable.Po -rm -f transactions/$(DEPDIR)/trs_inc.Po -rm -f transactions/$(DEPDIR)/trs_sgemm.Po -rm -f worker_collections/$(DEPDIR)/worker_list_example.Po -rm -f worker_collections/$(DEPDIR)/worker_tree_example.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-examplebinPROGRAMS \ uninstall-nobase_STARPU_OPENCL_DATADATA \ uninstall-pkglibLTLIBRARIES .MAKE: $(am__recursive_targets) all check check-am install install-am \ install-exec install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-TESTS check-am clean \ clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-local clean-noinstPROGRAMS \ clean-pkglibLTLIBRARIES cscopelist-am ctags ctags-am distclean \ distclean-compile distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-examplebinPROGRAMS install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-man \ install-nobase_STARPU_OPENCL_DATADATA install-pdf \ install-pdf-am install-pkglibLTLIBRARIES install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ recheck tags tags-am uninstall uninstall-am \ uninstall-examplebinPROGRAMS \ uninstall-nobase_STARPU_OPENCL_DATADATA \ uninstall-pkglibLTLIBRARIES .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS clean-local: -rm -rf mult/sgemm.traces lu/lu.traces @STARPU_HAVE_ICC_TRUE@.icc.o: @STARPU_HAVE_ICC_TRUE@ $(V_icc) $(ICC) $(ICC_ARGS) -x c $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $< -c -o $@ # - link over source file to build our own object fortran90/starpu_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ native_fortran/fstarpu_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ # Fortran90 example # - express the creation of .mod along .o @STARPU_HAVE_FC_TRUE@starpu_mod.mod: fortran90/starpu_mod.o @STARPU_HAVE_FC_TRUE@mod_types.mod: fortran90/mod_types.o @STARPU_HAVE_FC_TRUE@mod_compute.mod: fortran90/mod_compute.o @STARPU_HAVE_FC_TRUE@mod_interface.mod: fortran90/mod_interface.o # - list explicit dependences to control proper module files dependencies @STARPU_HAVE_FC_TRUE@fortran90/mod_compute.o: mod_types.mod mod_interface.mod starpu_mod.mod @STARPU_HAVE_FC_TRUE@fortran90/f90_example.o: mod_types.mod mod_interface.mod mod_compute.mod starpu_mod.mod # Native Fortran example # - express the creation of .mod along .o @STARPU_HAVE_FC_TRUE@fstarpu_mod.mod: native_fortran/fstarpu_mod.o @STARPU_HAVE_FC_TRUE@nf_codelets.mod: native_fortran/nf_codelets.o @STARPU_HAVE_FC_TRUE@nf_compute.mod: native_fortran/nf_compute.o @STARPU_HAVE_FC_TRUE@nf_dynbuf_cl.mod: native_fortran/nf_dynbuf_cl.o @STARPU_HAVE_FC_TRUE@nf_partition_cl.mod: native_fortran/nf_partition_cl.o @STARPU_HAVE_FC_TRUE@nf_sched_ctx_cl.mod: native_fortran/nf_sched_ctx_cl.o @STARPU_HAVE_FC_TRUE@nf_types.mod: native_fortran/nf_types.o @STARPU_HAVE_FC_TRUE@nf_varbuf_cl.mod: native_fortran/nf_varbuf_cl.o # - list explicit dependences to control proper module files dependencies @STARPU_HAVE_FC_TRUE@native_fortran/nf_codelets.o: fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_compute.o: nf_types.mod fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_dynbuf_cl.o: fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_dynbuf.o: nf_dynbuf_cl.mod fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_example.o: nf_types.mod nf_compute.mod fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_matrix.o: nf_codelets.mod fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_partition_cl.o: fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_partition.o: nf_partition_cl.mod fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_sched_ctx_cl.o: fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_sched_ctx.o: nf_sched_ctx_cl.mod fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_varbuf_cl.o: fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_varbuf.o: nf_varbuf_cl.mod fstarpu_mod.mod @STARPU_HAVE_FC_TRUE@native_fortran/nf_vector.o: nf_codelets.mod fstarpu_mod.mod # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/examples/README.txt000066400000000000000000000055761507764646700173410ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # audio This applies a simple band filter over audio files axpy This computes the AXPY BLAS over a big vector basic_examples This contains very trivial examples: hello world, scaling a vector, etc. binary This shows how to store and load compiled OpenCL kernels on and from the file system callback This shows how to use task callbacks cg This computes a Conjugate Gradient cholesky This computes a Cholesky factorization common This holds common code for BLAS kernels cpp This shows how to use StarPU from C++ filters This contains several partitioning examples fortran90 This shows how to use StarPU from Fortran90 gl_interop This shows how interoperation can be done between StarPU CUDA computations and OpenGL rendering heat This uses a finite element method to compute heat propagation thanks to an LU factorization or a conjugate gradient incrementer This just increments a variable interface This shows how to implement a user-defined data type, here simply complex floats lu This computes an LU factorization mandelbrot This computes and outputs the mandelbrot set matvecmult This computes a matrix-vector multiplication mult This computes a matrix-matrix multiplication openmp This shows how to use an OpenMP code inside a StarPU parallel task pi This computes Pi thanks to random numbers pipeline This shows how to submit a pipeline to StarPU with limited buffer use, and avoiding submitted all the tasks at once ppm_downscaler This downscales PPM pictures profiling This examplifies how to get profiling information on executed tasks reductions This examplifies how to use value reductions sched_ctx This examplifies how to use scheduling contexts sched_ctx_utils This is just common code for scheduling contexts scheduler This examplifies how to implement a user-defined scheduler spmd This shows how to define a parallel task spmv This computes a sparse matrix-vector multiplication stencil This computes a dumb 3D stencil with 1D subdomain decomposition tag_example This examplifies how to use tags for dependencies top This examplifies how to enrich StarPU-top with information worker_collections This examplifies how to use worker collections starpu-1.4.9+dfsg/examples/api/000077500000000000000000000000001507764646700163775ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/api/bcsr_data_interface.c000066400000000000000000000030011507764646700224770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the BCSR data // interface only uses StarPU's public API #define starpu_interface_bcsr_ops my_starpu_interface_bcsr_ops #define starpu_bcsr_data_register my_starpu_bcsr_data_register #define starpu_bcsr_get_nnz my_starpu_bcsr_get_nnz #define starpu_bcsr_get_nrow my_starpu_bcsr_get_nrow #define starpu_bcsr_get_firstentry my_starpu_bcsr_get_firstentry #define starpu_bcsr_get_r my_starpu_bcsr_get_r #define starpu_bcsr_get_c my_starpu_bcsr_get_c #define starpu_bcsr_get_elemsize my_starpu_bcsr_get_elemsize #define starpu_bcsr_get_local_nzval my_starpu_bcsr_get_local_nzval #define starpu_bcsr_get_local_colind my_starpu_bcsr_get_local_colind #define starpu_bcsr_get_local_rowptr my_starpu_bcsr_get_local_rowptr #include "../../src/datawizard/interfaces/bcsr_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/api/block_data_interface.c000066400000000000000000000027201507764646700226470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the block data // interface only uses StarPU's public API #define starpu_interface_block_ops my_starpu_interface_block_ops #define starpu_block_data_register my_starpu_block_data_register #define starpu_block_ptr_register my_starpu_block_ptr_register #define starpu_block_get_nx my_starpu_block_get_nx #define starpu_block_get_ny my_starpu_block_get_ny #define starpu_block_get_nz my_starpu_block_get_nz #define starpu_block_get_local_ldy my_starpu_block_get_local_ldy #define starpu_block_get_local_ldz my_starpu_block_get_local_ldz #define starpu_block_get_local_ptr my_starpu_block_get_local_ptr #define starpu_block_get_elemsize my_starpu_block_get_elemsize #include "../../src/datawizard/interfaces/block_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/api/coo_data_interface.c000066400000000000000000000017521507764646700223410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the COO data // interface only uses StarPU's public API #define starpu_interface_coo_ops my_starpu_interface_coo_ops #define starpu_coo_data_register my_starpu_coo_data_register #include "../../src/datawizard/interfaces/coo_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/api/csr_data_interface.c000066400000000000000000000026171507764646700223510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the CSR data // interface only uses StarPU's public API #define starpu_interface_csr_ops my_starpu_interface_csr_ops #define starpu_csr_data_register my_starpu_csr_data_register #define starpu_csr_get_nnz my_starpu_csr_get_nnz #define starpu_csr_get_nrow my_starpu_csr_get_nrow #define starpu_csr_get_firstentry my_starpu_csr_get_firstentry #define starpu_csr_get_elemsize my_starpu_csr_get_elemsize #define starpu_csr_get_local_nzval my_starpu_csr_get_local_nzval #define starpu_csr_get_local_colind my_starpu_csr_get_local_colind #define starpu_csr_get_local_rowptr my_starpu_csr_get_local_rowptr #include "../../src/datawizard/interfaces/csr_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/api/matrix_data_interface.c000066400000000000000000000030131507764646700230550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the matrix data // interface only uses StarPU's public API #define starpu_interface_matrix_ops my_starpu_interface_matrix_ops #define starpu_matrix_data_register my_starpu_matrix_data_register #define starpu_matrix_data_register_allocsize my_starpu_matrix_data_register_allocsize #define starpu_matrix_ptr_register my_starpu_matrix_data_ptr_register #define starpu_matrix_get_nx my_starpu_matrix_get_nx #define starpu_matrix_get_ny my_starpu_matrix_get_ny #define starpu_matrix_get_local_ld my_starpu_matrix_get_local_ld #define starpu_matrix_get_local_ptr my_starpu_matrix_get_local_ptr #define starpu_matrix_get_elemsize my_starpu_matrix_get_elemsize #define starpu_matrix_get_allocsize my_starpu_matrix_get_allocsize #include "../../src/datawizard/interfaces/matrix_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/api/multiformat_data_interface.c000066400000000000000000000020321507764646700241140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the multiformat data // interface only uses StarPU's public API #define starpu_interface_multiformat_ops my_starpu_interface_multiformat_ops #define starpu_multiformat_data_register my_starpu_multiformat_data_register #include "../../src/datawizard/interfaces/multiformat_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/api/tensor_data_interface.c000066400000000000000000000031431507764646700230670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the tensor data // interface only uses StarPU's public API #define starpu_interface_tensor_ops my_starpu_interface_tensor_ops #define starpu_tensor_data_register my_starpu_tensor_data_register #define starpu_tensor_ptr_register my_starpu_tensor_data_ptr_register #define starpu_tensor_get_nx my_starpu_tensor_get_nx #define starpu_tensor_get_ny my_starpu_tensor_get_ny #define starpu_tensor_get_nz my_starpu_tensor_get_nz #define starpu_tensor_get_nt my_starpu_tensor_get_nt #define starpu_tensor_get_local_ldy my_starpu_tensor_get_local_ldy #define starpu_tensor_get_local_ldz my_starpu_tensor_get_local_ldz #define starpu_tensor_get_local_ldt my_starpu_tensor_get_local_ldt #define starpu_tensor_get_local_ptr my_starpu_tensor_get_local_ptr #define starpu_tensor_get_elemsize my_starpu_tensor_get_elemsize #include "../../src/datawizard/interfaces/tensor_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/api/variable_data_interface.c000066400000000000000000000023311507764646700233400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the variable data // interface only uses StarPU's public API #define starpu_interface_variable_ops my_starpu_interface_variable_ops #define starpu_variable_data_register my_starpu_variable_data_register #define starpu_variable_ptr_register my_starpu_variable_ptr_register #define starpu_variable_get_local_ptr my_starpu_variable_get_local_ptr #define starpu_variable_get_elemsize my_starpu_variable_get_elemsize #include "../../src/datawizard/interfaces/variable_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/api/vector_data_interface.c000066400000000000000000000026251507764646700230630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the vector data // interface only uses StarPU's public API #define starpu_interface_vector_ops my_starpu_interface_vector_ops #define starpu_vector_data_register my_starpu_vector_data_register #define starpu_vector_data_register_allocsize my_starpu_vector_data_register_allocsize #define starpu_vector_ptr_register my_starpu_vector_data_ptr_register #define starpu_vector_get_nx my_starpu_vector_get_nx #define starpu_vector_get_local_ptr my_starpu_vector_get_local_ptr #define starpu_vector_get_elemsize my_starpu_vector_get_elemsize #define starpu_vector_get_allocsize my_starpu_vector_get_allocsize #include "../../src/datawizard/interfaces/vector_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/api/void_data_interface.c000066400000000000000000000017601507764646700225210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // This program checks that the implementation of the void data // interface only uses StarPU's public API #define starpu_interface_void_ops my_starpu_interface_void_ops #define starpu_void_data_register my_starpu_void_data_register #include "../../src/datawizard/interfaces/void_interface.c" int main() { return 0; } starpu-1.4.9+dfsg/examples/axpy/000077500000000000000000000000001507764646700166075ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/axpy/axpy.c000066400000000000000000000136121507764646700177370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This creates two dumb vectors, splits them into chunks, and for each pair of * chunk, run axpy on them. */ #include #include #include #include #include #include #ifdef STARPU_USE_CUDA #include #endif #include "axpy.h" #define AXPY STARPU_SAXPY #define CUBLASAXPY cublasSaxpy #define N (16*1024*1024) #define NBLOCKS 8 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define EPSILON 1e-6 TYPE *_vec_x, *_vec_y; TYPE _alpha = 3.41; /* descriptors for StarPU */ starpu_data_handle_t _handle_y, _handle_x; void axpy_cpu(void *descr[], void *arg) { TYPE alpha = *((TYPE *)arg); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); TYPE *block_x = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *block_y = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); AXPY((int)n, alpha, block_x, 1, block_y, 1); } #ifdef STARPU_USE_CUDA void axpy_gpu(void *descr[], void *arg) { TYPE alpha = *((TYPE *)arg); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); TYPE *block_x = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *block_y = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); cublasStatus_t status = CUBLASAXPY(starpu_cublas_get_local_handle(), (int)n, &alpha, block_x, 1, block_y, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif #ifdef STARPU_USE_OPENCL extern void axpy_opencl(void *buffers[], void *args); #endif static struct starpu_perfmodel axpy_model = { .type = STARPU_HISTORY_BASED, .symbol = "axpy" }; static struct starpu_codelet axpy_cl = { .cpu_funcs = {axpy_cpu}, .cpu_funcs_name = {"axpy_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {axpy_gpu}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, #ifdef STARPU_USE_OPENCL .opencl_funcs = {axpy_opencl}, #elif defined(STARPU_SIMGRID) .opencl_funcs = {(void*)1}, #endif .opencl_flags = {STARPU_OPENCL_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .name = "axpy", .model = &axpy_model }; static int check(void) { int i; for (i = 0; i < N; i++) { TYPE expected_value = _alpha * _vec_x[i] + 4.0; if (fabs(_vec_y[i] - expected_value) > expected_value * EPSILON) { FPRINTF(stderr,"at %d, %f*%f+%f=%f, expected %f\n", i, _alpha, _vec_x[i], 4.0, _vec_y[i], expected_value); return EXIT_FAILURE; } } return EXIT_SUCCESS; } #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; #endif int main(void) { int ret, exit_value = 0; /* Initialize StarPU */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_cublas_init(); /* This is equivalent to vec_a = malloc(N*sizeof(TYPE)); vec_b = malloc(N*sizeof(TYPE)); */ starpu_malloc((void **)&_vec_x, N*sizeof(TYPE)); assert(_vec_x); starpu_malloc((void **)&_vec_y, N*sizeof(TYPE)); assert(_vec_y); unsigned i; for (i = 0; i < N; i++) { _vec_x[i] = 1.0f; /*(TYPE)starpu_drand48(); */ _vec_y[i] = 4.0f; /*(TYPE)starpu_drand48(); */ } FPRINTF(stderr, "BEFORE x[0] = %2.2f\n", _vec_x[0]); FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]); /* Declare the data to StarPU */ starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE)); starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE)); /* Divide the vector into blocks */ struct starpu_data_filter block_filter = { .filter_func = starpu_vector_filter_block, .nchildren = NBLOCKS }; starpu_data_partition(_handle_x, &block_filter); starpu_data_partition(_handle_y, &block_filter); double start; double end; start = starpu_timing_now(); unsigned b; for (b = 0; b < NBLOCKS; b++) { struct starpu_task *task = starpu_task_create(); task->cl = &axpy_cl; task->cl_arg = &_alpha; task->cl_arg_size = sizeof(_alpha); task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b); task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b); task->tag_id = b; ret = starpu_task_submit(task); if (ret == -ENODEV) { exit_value = 77; goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); enodev: starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM); starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM); starpu_data_unregister(_handle_x); starpu_data_unregister(_handle_y); end = starpu_timing_now(); double timing = end - start; FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing); FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[0], _alpha); if (exit_value != 77) exit_value = check(); starpu_free_noflag((void *)_vec_x, N*sizeof(TYPE)); starpu_free_noflag((void *)_vec_y, N*sizeof(TYPE)); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif /* Stop StarPU */ starpu_shutdown(); return exit_value; } starpu-1.4.9+dfsg/examples/axpy/axpy.h000066400000000000000000000014461507764646700177460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef AXPY_H__ #define AXPY_H__ #define TYPE float #endif /* AXPY_H__ */ starpu-1.4.9+dfsg/examples/axpy/axpy_opencl.c000066400000000000000000000045671507764646700213100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* OpenCL codelet for axpy */ #include #include "axpy.h" extern struct starpu_opencl_program opencl_program; void axpy_opencl(void *buffers[], void *_args) { TYPE *alpha = _args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); unsigned x_offset = STARPU_VECTOR_GET_OFFSET(buffers[0]); cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); unsigned y_offset = STARPU_VECTOR_GET_OFFSET(buffers[1]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "_axpy_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(x), &x); err|= clSetKernelArg(kernel, 1, sizeof(x_offset), &x_offset); err|= clSetKernelArg(kernel, 2, sizeof(y), &y); err|= clSetKernelArg(kernel, 3, sizeof(y_offset), &y_offset); err|= clSetKernelArg(kernel, 4, sizeof(n), &n); err|= clSetKernelArg(kernel, 5, sizeof(*alpha), alpha); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/axpy/axpy_opencl_kernel.cl000066400000000000000000000021071507764646700230100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* OpenCL kernel implementing axpy */ #include "axpy.h" __kernel void _axpy_opencl(__global TYPE *x, unsigned x_offset, __global TYPE *y, unsigned y_offset, unsigned nx, TYPE alpha) { const int i = get_global_id(0); x = (__global char*) x + x_offset; y = (__global char*) y + y_offset; if (i < nx) y[i] = alpha * x[i] + y[i]; } starpu-1.4.9+dfsg/examples/basic_examples/000077500000000000000000000000001507764646700206055ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/basic_examples/block.c000066400000000000000000000073321507764646700220500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void cpu_codelet(void *descr[], void *_args); #ifdef STARPU_USE_CUDA extern void cuda_codelet(void *descr[], void *_args); #endif #ifdef STARPU_USE_HIP extern void hip_codelet(void *descr[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void opencl_codelet(void *descr[], void *_args); struct starpu_opencl_program opencl_code; #endif typedef void (*device_func)(void **, void *); int execute_on(uint32_t where, device_func func, float *block, int pnx, int pny, int pnz, float multiplier) { struct starpu_codelet cl; starpu_data_handle_t block_handle; int i; starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, pnx, pnx*pny, pnx, pny, pnz, sizeof(float)); starpu_codelet_init(&cl); cl.where = where; cl.cuda_funcs[0] = func; cl.hip_funcs[0] = func; cl.cpu_funcs[0] = func; cl.opencl_funcs[0] = func; cl.nbuffers = 1; cl.modes[0] = STARPU_RW, cl.model = NULL; cl.name = "block_scale"; struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->callback_func = NULL; task->handles[0] = block_handle; task->cl_arg = &multiplier; task->cl_arg_size = sizeof(multiplier); int ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); task->destroy = 0; starpu_task_destroy(task); return 1; } starpu_task_wait_for_all(); /* update the array in RAM */ starpu_data_unregister(block_handle); for(i=0 ; i void cpu_codelet(void *descr[], void *_args) { float *block = (float *)STARPU_BLOCK_GET_PTR(descr[0]); int nx = (int)STARPU_BLOCK_GET_NX(descr[0]); int ny = (int)STARPU_BLOCK_GET_NY(descr[0]); int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]); unsigned ldy = STARPU_BLOCK_GET_LDY(descr[0]); unsigned ldz = STARPU_BLOCK_GET_LDZ(descr[0]); float *multiplier = (float *)_args; int i, j, k; for(k=0; k static __global__ void cuda_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier) { int i, j, k; for(k=0; k>>(block, nx, ny, nz, ldy, ldz, *multiplier); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/examples/basic_examples/block_hip.hip000066400000000000000000000032321507764646700232410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include static __global__ void hip_block(float *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float multiplier) { int i, j, k; for(k=0; k #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr) \ do \ { \ int check_err; \ check_err = clSetKernelArg(kernel, n, size, ptr); \ if (check_err != CL_SUCCESS) \ STARPU_OPENCL_REPORT_ERROR(check_err); \ } while (0) extern struct starpu_opencl_program opencl_code; void opencl_codelet(void *descr[], void *_args) { cl_kernel kernel; cl_command_queue queue; cl_event event; int id, devid, err; cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(descr[0]); int nx = (int)STARPU_BLOCK_GET_NX(descr[0]); int ny = (int)STARPU_BLOCK_GET_NY(descr[0]); int nz = (int)STARPU_BLOCK_GET_NZ(descr[0]); int ldy = (int)STARPU_BLOCK_GET_LDY(descr[0]); int ldz = (int) STARPU_BLOCK_GET_LDZ(descr[0]); float *multiplier = (float *)_args; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "block", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); CHECK_CL_SET_KERNEL_ARG(kernel, 0, sizeof(block), &block); CHECK_CL_SET_KERNEL_ARG(kernel, 1, sizeof(nx), &nx); CHECK_CL_SET_KERNEL_ARG(kernel, 2, sizeof(ny), &ny); CHECK_CL_SET_KERNEL_ARG(kernel, 3, sizeof(nz), &nz); CHECK_CL_SET_KERNEL_ARG(kernel, 4, sizeof(ldy), &ldy); CHECK_CL_SET_KERNEL_ARG(kernel, 5, sizeof(ldz), &ldz); CHECK_CL_SET_KERNEL_ARG(kernel, 6, sizeof(*multiplier), multiplier); { size_t global=nx*ny*nz; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/basic_examples/block_opencl_kernel.cl000066400000000000000000000015731507764646700251250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void block(__global float *b, int nx, int ny, int nz, int ldy, int ldz, float multiplier) { const int i = get_global_id(0); if (i < (nz*ldz)+(ny*ldy)+nx) b[i] = b[i] * multiplier; } starpu-1.4.9+dfsg/examples/basic_examples/dynamic_handles.c000066400000000000000000000141351507764646700240770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void dummy_small_kernel(void *descr[], void *cl_arg) { int nb_data; int i; starpu_codelet_unpack_args(cl_arg, &nb_data); assert(nb_data == 1); FPRINTF(stderr, "Number of data: %d\n", nb_data); for(i=0 ; isynchronous = 1; task->cl = &dummy_small_cl; starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size, STARPU_VALUE, &(task->cl->nbuffers), sizeof(task->cl->nbuffers), 0); task->dyn_handles = malloc(sizeof(*task->dyn_handles)); task->dyn_handles[0] = handle; task->cl_arg_free = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* This tests a large constant number of arguments with starpu_task_submit */ task2 = starpu_task_create(); task2->synchronous = 1; task2->cl = &dummy_big_cl; task2->cl_arg_free = 1; starpu_codelet_pack_args(&task2->cl_arg, &task2->cl_arg_size, STARPU_VALUE, &(task2->cl->nbuffers), sizeof(task2->cl->nbuffers), 0); task2->dyn_handles = malloc(task2->cl->nbuffers * sizeof(*(task2->dyn_handles))); task2->dyn_modes = malloc(task2->cl->nbuffers * sizeof(*(task2->dyn_modes))); for(i=0 ; icl->nbuffers ; i++) { task2->dyn_handles[i] = handle; task2->dyn_modes[i] = STARPU_RW; } ret = starpu_task_submit(task2); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* This tests a large variable number of arguments with starpu_task_submit */ task3 = starpu_task_create(); task3->synchronous = 1; task3->cl = &dummy_variable_cl; task3->cl_arg_free = 1; starpu_codelet_pack_args(&task3->cl_arg, &task3->cl_arg_size, STARPU_VALUE, &(dummy_big_cl.nbuffers), sizeof(dummy_big_cl.nbuffers), 0); task3->dyn_handles = malloc(dummy_big_cl.nbuffers * sizeof(*(task3->dyn_handles))); task3->dyn_modes = malloc(dummy_big_cl.nbuffers * sizeof(*(task3->dyn_modes))); task3->nbuffers = dummy_big_cl.nbuffers; for(i=0 ; idyn_handles[i] = handle; task3->dyn_modes[i] = STARPU_RW; } ret = starpu_task_submit(task3); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* This tests a small number of arguments with starpu_task_insert */ ret = starpu_task_insert(&dummy_small_cl, STARPU_VALUE, &(dummy_small_cl.nbuffers), sizeof(dummy_small_cl.nbuffers), STARPU_RW, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* This tests a large constant number of arguments with starpu_task_insert */ descrs = malloc(dummy_big_cl.nbuffers * sizeof(struct starpu_data_descr)); for(i=0 ; icl_arg) * - how to declare a callback function that is called once the task has been * executed * - how to specify if starpu_task_submit is a blocking or non-blocking * operation (task->synchronous) */ #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) /* When the task is done, task->callback_func(task->callback_arg) is called. Any * callback function must have the prototype void (*)(void *). * NB: Callback are NOT allowed to perform potentially blocking operations */ void callback_func(void *callback_arg) { FPRINTF(stdout, "Callback function got argument %p\n", callback_arg); } /* Every implementation of a codelet must have this prototype, the first * argument (buffers) describes the buffers/streams that are managed by the * DSM; the second arguments references read-only data that is passed as an * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there * are no data input/output managed by the DSM (cl.nbuffers = 0) */ struct params { int i; float f; }; void cpu_func(void *buffers[], void *cl_arg) { (void)buffers; struct params *params = (struct params *) cl_arg; FPRINTF(stdout, "Hello world (params = {%i, %f})\n", params->i, params->f); } int main(void) { struct starpu_codelet cl; struct starpu_task *task; struct params params = {1, 2.0f}; int ret; /* initialize StarPU : passing a NULL argument means that we use * default configuration for the scheduling policies and the number of * processors/accelerators */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* create a new task that is non-blocking by default : the task is not * submitted to the scheduler until the starpu_task_submit function is * called */ task = starpu_task_create(); starpu_codelet_init(&cl); /* this codelet may only be executed on a CPU, and its cpu * implementation is function "cpu_func" */ cl.cpu_funcs[0] = cpu_func; cl.cpu_funcs_name[0] = "cpu_func"; /* the codelet does not manipulate any data that is managed * by our DSM */ cl.nbuffers = 0; cl.name="hello"; /* the task uses codelet "cl" */ task->cl = &cl; /* It is possible to pass buffers that are not managed by the DSM to the * kernels: the second argument of the "cpu_func" function is a pointer to a * buffer that contains information for the codelet (cl_arg stands for * codelet argument). In the case of accelerators, it is possible that * the codelet is given a pointer to a copy of that buffer: this buffer * is read-only so that any modification is not passed to other copies * of the buffer. For this reason, a buffer passed as a codelet * argument (cl_arg) is NOT a valid synchronization medium! */ task->cl_arg = ¶ms; task->cl_arg_size = sizeof(params); /* once the task has been executed, callback_func(0x42) * will be called on a CPU */ task->callback_func = callback_func; task->callback_arg = (void*) (uintptr_t) 0x42; /* starpu_task_submit will be a blocking call */ task->synchronous = 1; /* submit the task to StarPU */ ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* terminate StarPU: statistics and other debug outputs are not * guaranteed to be generated unless this function is called. Once it * is called, it is not possible to submit tasks anymore, and the user * is responsible for making sure all tasks have already been executed: * calling starpu_shutdown() before the termination of all the tasks * results in an undefined behaviour */ starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/basic_examples/hooks.c000066400000000000000000000026251507764646700221010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 21 static int check_malloc = 0; static int check_free = 0; int malloc_hook(unsigned dst_node, void **A, size_t dim, int flags) { int ret = 0; *A = malloc(dim); if (!*A) ret = -ENOMEM; check_malloc++; return ret; } int free_hook(unsigned dst_node, void *A, size_t dim, int flags) { free(A); check_free++; return 0; } int main(void) { int* vector; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc_set_hooks(malloc_hook, free_hook); starpu_malloc((void **)&vector, NX*sizeof(int)); starpu_free_noflag(vector, NX*sizeof(int)); STARPU_ASSERT(check_malloc == 1 && check_free == 1); starpu_shutdown(); } starpu-1.4.9+dfsg/examples/basic_examples/mult.c000066400000000000000000000347111507764646700217400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example shows a simple implementation of a blocked matrix * multiplication. Note that this is NOT intended to be an efficient * implementation of sgemm! In this example, we show: * - how to declare dense matrices (starpu_matrix_data_register) * - how to manipulate matrices within codelets (eg. descr[0].blas.ld) * - how to use filters to partition the matrices into blocks * (starpu_data_partition and starpu_data_map_filters) * - how to unpartition data (starpu_data_unpartition) and how to stop * monitoring data (starpu_data_unregister) * - how to manipulate subsets of data (starpu_data_get_sub_data) * - how to construct an autocalibrated performance model (starpu_perfmodel) * - how to submit asynchronous tasks */ #include #include #include #include #include static float *A, *B, *C, *Cref; static starpu_data_handle_t A_handle, B_handle, C_handle; static unsigned nslicesx = 4; static unsigned nslicesy = 4; #ifdef STARPU_QUICK_CHECK static unsigned xdim = 512; static unsigned ydim = 512; static unsigned zdim = 256; #else static unsigned xdim = 1024; static unsigned ydim = 1024; static unsigned zdim = 512; #endif extern void hip_mult(void *descr[], void *arg); extern void cuda_mult(void *descr[], void *arg); /* * That program should compute C = A * B * * A of size (z,y) * B of size (x,z) * C of size (x,y) * * |---------------| * z | B | * |---------------| * z x * |----| |---------------| * | | | | * | | | | * | A | y | C | * | | | | * | | | | * |----| |---------------| * * Note: we use FORTRAN ordering. */ /* * The codelet is passed 3 matrices, the "descr" union-type field gives a * description of the layout of those 3 matrices in the local memory (ie. RAM * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have * registered data with the "matrix" data interface, we use the matrix macros. */ void cpu_mult(void *descr[], void *arg) { (void)arg; float *subA, *subB, *subC; uint32_t nxC, nyC, nyA; uint32_t ldA, ldB, ldC; /* ptr gives a pointer to the first element of the local copy */ subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]); subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]); /* * Note: STARPU_MATRIX_GET_NX/NY is different from X/Y of the FORTRAN * ordering: * - nx is the number of consecutive elements (thus the number of rows * in FORTRAN order) * - ny is the number of series that are separated by ld elements (thus * the number of columns in FORTRAN order) * - ld stands for leading dimension * * NB: in case some filters were used, the leading dimension is not * guaranteed to be the same in main memory (on the original matrix) * and on the accelerator! */ nxC = STARPU_MATRIX_GET_NX(descr[2]); nyC = STARPU_MATRIX_GET_NY(descr[2]); nyA = STARPU_MATRIX_GET_NY(descr[0]); ldA = STARPU_MATRIX_GET_LD(descr[0]); ldB = STARPU_MATRIX_GET_LD(descr[1]); ldC = STARPU_MATRIX_GET_LD(descr[2]); /* we use a FORTRAN-ordering! */ unsigned i,j,k; for (i = 0; i < nyC; i++) /* iterate over columns of C */ { for (j = 0; j < nxC; j++) /* iterate over rows of C */ { float sum = 0.0; for (k = 0; k < nyA; k++) { sum += subA[j+k*ldA]*subB[k+i*ldB]; } subC[j + i*ldC] = sum; } } } static void init_problem_data(void) { unsigned i,j; /* we initialize matrices A, B and C in the usual way */ starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&B, xdim*zdim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&C, xdim*ydim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); Cref = (float *) malloc(xdim*ydim*sizeof(float)); assert(A); assert(B); assert(C); assert(Cref); /* fill the A and B matrices */ starpu_srand48(2009); for (j=0; j < ydim; j++) { for (i=0; i < zdim; i++) { A[j+i*ydim] = (float)(starpu_drand48()); } } for (j=0; j < zdim; j++) { for (i=0; i < xdim; i++) { B[j+i*zdim] = (float)(starpu_drand48()); } } for (j=0; j < ydim; j++) { for (i=0; i < xdim; i++) { C[j+i*ydim] = (float)(0); Cref[j+i*ydim] = (float)(0); } } } static void partition_mult_data(void) { /* note that we assume a FORTRAN ordering here! */ /* The BLAS data interface is described by 4 parameters: * - the location of the first element of the matrix to monitor (3rd * argument) * - the number of elements between columns, aka leading dimension * (4th arg) * - the number of (contiguous) elements per column, ie. contiguous * elements (5th arg) * - the number of columns (6th arg) * The first elements is a pointer to the data_handle that will be * associated to the matrix, and the second elements gives the memory * node in which resides the matrix: 0 means that the 3rd argument is * an address in main memory. */ starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, ydim, ydim, zdim, sizeof(float)); starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, zdim, zdim, xdim, sizeof(float)); starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, ydim, ydim, xdim, sizeof(float)); /* A filter is a method to partition a data into disjoint chunks, it is * described by the means of the "struct starpu_data_filter" structure that * contains a function that is applied on a data handle to partition it * into smaller chunks, and an argument that is passed to the function * (eg. the number of blocks to create here). */ /* StarPU supplies some basic filters such as the partition of a matrix * into blocks, note that we are using a FORTRAN ordering so that the * name of the filters are a bit misleading */ struct starpu_data_filter vert = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nslicesx }; struct starpu_data_filter horiz = { .filter_func = starpu_matrix_filter_block, .nchildren = nslicesy }; /* * Illustration with nslicex = 4 and nslicey = 2, it is possible to access * sub-data by using the "starpu_data_get_sub_data" method, which takes a data handle, * the number of filters to apply, and the indexes for each filters, for * instance: * * A' handle is starpu_data_get_sub_data(A_handle, 1, 1); * B' handle is starpu_data_get_sub_data(B_handle, 1, 2); * C' handle is starpu_data_get_sub_data(C_handle, 2, 2, 1); * * Note that here we applied 2 filters recursively onto C. * * "starpu_data_get_sub_data(C_handle, 1, 3)" would return a handle to the 4th column * of blocked matrix C for example. * * |---|---|---|---| * | | | B'| | B * |---|---|---|---| * 0 1 2 3 * |----| |---|---|---|---| * | | | | | | | * | | 0 | | | | | * |----| |---|---|---|---| * | A' | | | | C'| | * | | | | | | | * |----| |---|---|---|---| * A C * * IMPORTANT: applying filters is equivalent to partitioning a piece of * data in a hierarchical manner, so that memory consistency is enforced * for each of the elements independently. The tasks should therefore NOT * access inner nodes (eg. one column of C or the whole C) but only the * leafs of the tree (ie. blocks here). Manipulating inner nodes is only * possible by disapplying the filters (using starpu_data_unpartition), to * enforce memory consistency. */ starpu_data_partition(B_handle, &vert); starpu_data_partition(A_handle, &horiz); /* starpu_data_map_filters is a variable-arity function, the first argument * is the handle of the data to partition, the second argument is the * number of filters to apply recursively. Filters are applied in the * same order as the arguments. * This would be equivalent to starpu_data_partition(C_handle, &vert) and * then applying horiz on each sub-data (ie. each column of C) */ starpu_data_map_filters(C_handle, 2, &vert, &horiz); } static struct starpu_perfmodel mult_perf_model = { .type = STARPU_HISTORY_BASED, .symbol = "mult_perf_model" }; static struct starpu_codelet cl = { /* CPU implementation of the codelet */ .cpu_funcs = {cpu_mult}, .cpu_funcs_name = {"cpu_mult"}, #ifdef STARPU_USE_HIP /* HIP implementation of the codelet */ .hip_funcs = {hip_mult}, .hip_flags = {STARPU_HIP_ASYNC}, #endif #ifdef STARPU_USE_CUDA /* CUDA implementation of the codelet */ .cuda_funcs = {cuda_mult}, .cuda_flags = {STARPU_CUDA_ASYNC}, .where = STARPU_CUDA, #endif /* the codelet manipulates 3 buffers that are managed by the DSM */ .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, /* in case the scheduling policy may use performance models */ .model = &mult_perf_model }; static int launch_tasks(void) { int ret; /* partition the work into slices */ unsigned taskx, tasky; for (taskx = 0; taskx < nslicesx; taskx++) { for (tasky = 0; tasky < nslicesy; tasky++) { /* C[taskx, tasky] = A[tasky] B[taskx] */ /* by default, starpu_task_create() returns an * asynchronous task (ie. task->synchronous = 0) */ struct starpu_task *task = starpu_task_create(); /* this task implements codelet "cl" */ task->cl = &cl; /* * |---|---|---|---| * | | * | | | B * |---|---|---|---| * X * |----| |---|---|---|---| * |****| Y | |***| | | * |****| | |***| | | * |----| |---|---|---|---| * | | | | | | | * | | | | | | | * |----| |---|---|---|---| * A C */ /* there was a single filter applied to matrices A * (respectively B) so we grab the handle to the chunk * identified by "tasky" (respectively "taskx). The "1" * tells StarPU that there is a single argument to the * variable-arity function starpu_data_get_sub_data */ task->handles[0] = starpu_data_get_sub_data(A_handle, 1, tasky); task->handles[1] = starpu_data_get_sub_data(B_handle, 1, taskx); /* 2 filters were applied on matrix C, so we give * starpu_data_get_sub_data 2 arguments. The order of the arguments * must match the order in which the filters were * applied. * NB: starpu_data_get_sub_data(C_handle, 1, k) would have returned * a handle to the column number k of matrix C. * NB2: starpu_data_get_sub_data(C_handle, 2, taskx, tasky) is * equivalent to * starpu_data_get_sub_data(starpu_data_get_sub_data(C_handle, 1, taskx), 1, tasky)*/ task->handles[2] = starpu_data_get_sub_data(C_handle, 2, taskx, tasky); /* this is not a blocking call since task->synchronous = 0 */ ret = starpu_task_submit(task); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } return 0; } void check_result(float* C_gpu, float* C_ref, uint32_t ldC) { unsigned i,j; for (i = 0; i < ydim; i++) { for (j = 0; j < xdim; j++) { if(C_gpu[j + i*ldC]-C_ref[j + i*ldC] > 1e-6*C_ref[j + i*ldC]) { printf("| Cref[%u,%u]=%f - Cgpu[%u,%u]=%f | Error in the computation of C: the difference between the two is bigger than 1e-6 * the reference" , i, j, C_ref[j + i*ldC], i, j, C_gpu[j + i*ldC]); exit(1); } } } printf("SUCCESSFUL COMPUTATION\n"); } int main(void) { int ret; /* start the runtime */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* initialize matrices A, B and C and register them to StarPU */ init_problem_data(); /* partition matrices into blocks that can be manipulated by the * codelets */ partition_mult_data(); /* submit all tasks in an asynchronous fashion */ ret = launch_tasks(); if (ret == -ENODEV) goto enodev; /* cpu compution to check */ /* ============================================= */ uint32_t ldA = ydim; uint32_t ldB = zdim; uint32_t ldC = ydim; unsigned i,j,k; for (i = 0; i < ydim; i++) { for (j = 0; j < xdim; j++) { float sum = 0.0; for (k = 0; k < zdim; k++) { sum += A[j+k*ldA]*B[k+i*ldB]; } Cref[j + i*ldC] = sum; } } /* ============================================= */ /* wait for termination */ starpu_task_wait_for_all(); /* remove the filters applied by the means of starpu_data_map_filters; now * it's not possible to manipulate a subset of C using starpu_data_get_sub_data until * starpu_data_map_filters is called again on C_handle. * The second argument is the memory node where the different subsets * should be reassembled, 0 = main memory (RAM) */ starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); /* stop monitoring matrix C : after this, it is not possible to pass C * (or any subset of C) as a codelet input/output. This also implements * a barrier so that the piece of data is put back into main memory in * case it was only available on a GPU for instance. */ starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); starpu_data_unregister(C_handle); /* Comment to remove printing of results */ check_result(C, Cref, ldC); starpu_free_flags(A, zdim*ydim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_free_flags(B, xdim*zdim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_free_flags(C, xdim*ydim*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); free(Cref); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/basic_examples/mult_cuda.cu000066400000000000000000000104041507764646700231120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example shows a simple implementation of a blocked matrix * multiplication. Note that this is NOT intended to be an efficient * implementation of sgemm! In this example, we show: * - how to declare dense matrices (starpu_matrix_data_register) * - how to manipulate matrices within codelets (eg. descr[0].blas.ld) * - how to use filters to partition the matrices into blocks * (starpu_data_partition and starpu_data_map_filters) * - how to unpartition data (starpu_data_unpartition) and how to stop * monitoring data (starpu_data_unregister) * - how to manipulate subsets of data (starpu_data_get_sub_data) * - how to construct an autocalibrated performance model (starpu_perfmodel) * - how to submit asynchronous tasks */ #include #include #include #include #include #define THREADS_PER_BLOCK 256 /* * That program should compute C = A * B * * A of size (z,y) * B of size (x,z) * C of size (x,y) |---------------| z | B | |---------------| z x |----| |---------------| | | | | | | | | | A | y | C | | | | | | | | | |----| |---------------| * Note: we use FORTRAN ordering. */ /* * The codelet is passed 3 matrices, the "descr" union-type field gives a * description of the layout of those 3 matrices in the local memory (ie. RAM * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have * registered data with the "matrix" data interface, we use the matrix macros. */ static __global__ void cuda_mult_kernel(uint32_t nxC, uint32_t nyC, uint32_t nyA, uint32_t ldA, uint32_t ldB, uint32_t ldC, float * subA, float * subB, float * subC ) { uint32_t id, i, j, k; float sum; id = blockIdx.x * blockDim.x + threadIdx.x; i = id % nxC; j = id / nxC; if (j >= nyC) { return; } sum = 0.; for (k = 0 ; k < nyA ; k++) { sum += subA[i + k*ldA] * subB[k + j*ldB]; } subC[i + j*ldC] = sum; } extern "C" void cuda_mult(void *descr[], void *arg) { (void)arg; float *d_subA, *d_subB, *d_subC; uint32_t nxC, nyC, nyA; uint32_t ldA, ldB, ldC; uint32_t nblocks; /* ptr gives a pointer to the first element of the local copy */ d_subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); d_subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]); d_subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]); /* * Note: STARPU_MATRIX_GET_NX/NY is different from X/Y of the FORTRAN * ordering: * - nx is the number of consecutive elements (thus the number of rows * in FORTRAN order) * - ny is the number of series that are separated by ld elements (thus * the number of columns in FORTRAN order) * - ld stands for leading dimension * * NB: in case some filters were used, the leading dimension is not * guaranteed to be the same in main memory (on the original matrix) * and on the accelerator! */ nxC = STARPU_MATRIX_GET_NX(descr[2]); nyC = STARPU_MATRIX_GET_NY(descr[2]); nyA = STARPU_MATRIX_GET_NY(descr[0]); ldA = STARPU_MATRIX_GET_LD(descr[0]); ldB = STARPU_MATRIX_GET_LD(descr[1]); ldC = STARPU_MATRIX_GET_LD(descr[2]); nblocks = (nxC * nyC + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK; cuda_mult_kernel <<< nblocks, THREADS_PER_BLOCK, 0, starpu_cuda_get_local_stream() >>> (nxC, nyC, nyA, ldA, ldB, ldC, d_subA, d_subB, d_subC); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/basic_examples/mult_hip.hip000066400000000000000000000104031507764646700231260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example shows a simple implementation of a blocked matrix * multiplication. Note that this is NOT intended to be an efficient * implementation of sgemm! In this example, we show: * - how to declare dense matrices (starpu_matrix_data_register) * - how to manipulate matrices within codelets (eg. descr[0].blas.ld) * - how to use filters to partition the matrices into blocks * (starpu_data_partition and starpu_data_map_filters) * - how to unpartition data (starpu_data_unpartition) and how to stop * monitoring data (starpu_data_unregister) * - how to manipulate subsets of data (starpu_data_get_sub_data) * - how to construct an autocalibrated performance model (starpu_perfmodel) * - how to submit asynchronous tasks */ #include #include #include #include #include #define THREADS_PER_BLOCK 256 /* * That program should compute C = A * B * * A of size (z,y) * B of size (x,z) * C of size (x,y) |---------------| z | B | |---------------| z x |----| |---------------| | | | | | | | | | A | y | C | | | | | | | | | |----| |---------------| * Note: we use FORTRAN ordering. */ /* * The codelet is passed 3 matrices, the "descr" union-type field gives a * description of the layout of those 3 matrices in the local memory (ie. RAM * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have * registered data with the "matrix" data interface, we use the matrix macros. */ static __global__ void hip_mult_kernel(uint32_t nxC, uint32_t nyC, uint32_t nyA, uint32_t ldA, uint32_t ldB, uint32_t ldC, float * subA, float * subB, float * subC ) { uint32_t id, i, j, k; float sum; id = blockIdx.x * blockDim.x + threadIdx.x; i = id % nxC; j = id / nxC; if (j >= nyC) { return; } sum = 0.; for (k = 0 ; k < nyA ; k++) { sum += subA[i + k*ldA] * subB[k + j*ldB]; } subC[i + j*ldC] = sum; } extern "C" void hip_mult(void *descr[], void *arg) { (void)arg; float *subA, *subB, *subC; uint32_t nxC, nyC, nyA; uint32_t ldA, ldB, ldC; uint32_t nblocks; /* ptr gives a pointer to the first element of the local copy */ subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]); subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]); /* * Note: STARPU_MATRIX_GET_NX/NY is different from X/Y of the FORTRAN * ordering: * - nx is the number of consecutive elements (thus the number of rows * in FORTRAN order) * - ny is the number of series that are separated by ld elements (thus * the number of columns in FORTRAN order) * - ld stands for leading dimension * * NB: in case some filters were used, the leading dimension is not * guaranteed to be the same in main memory (on the original matrix) * and on the accelerator! */ nxC = STARPU_MATRIX_GET_NX(descr[2]); nyC = STARPU_MATRIX_GET_NY(descr[2]); nyA = STARPU_MATRIX_GET_NY(descr[0]); ldA = STARPU_MATRIX_GET_LD(descr[0]); ldB = STARPU_MATRIX_GET_LD(descr[1]); ldC = STARPU_MATRIX_GET_LD(descr[2]); nblocks = (nxC * nyC + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK; hipLaunchKernelGGL(hip_mult_kernel, nblocks, THREADS_PER_BLOCK, 0, starpu_hip_get_local_stream(), nxC, nyC, nyA, ldA, ldB, ldC, subA, subB, subC ); hipError_t status = hipGetLastError(); if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/basic_examples/multiformat.c000066400000000000000000000160471507764646700233240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "multiformat_types.h" static int ncpu = 0; #ifdef STARPU_USE_CUDA static int ncuda = 0; #endif #ifdef STARPU_USE_OPENCL static int nopencl = 0; #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) static struct point array_of_structs[N_ELEMENTS]; static starpu_data_handle_t array_of_structs_handle; void multiformat_scal_cpu_func(void *buffers[], void *args) { struct point *aos; unsigned int n, i; (void)args; aos = (struct point *) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); for (i = 0; i < n; i++) { aos[i].x *= aos[i].y; } } #ifdef STARPU_USE_CUDA extern struct starpu_codelet cpu_to_cuda_cl; extern struct starpu_codelet cuda_to_cpu_cl; #endif #ifdef STARPU_USE_OPENCL extern struct starpu_codelet cpu_to_opencl_cl; extern struct starpu_codelet opencl_to_cpu_cl; #endif static struct starpu_multiformat_data_interface_ops format_ops = { #ifdef STARPU_USE_CUDA .cuda_elemsize = 2* sizeof(float), .cpu_to_cuda_cl = &cpu_to_cuda_cl, .cuda_to_cpu_cl = &cuda_to_cpu_cl, #endif #ifdef STARPU_USE_OPENCL .opencl_elemsize = 2 * sizeof(float), .cpu_to_opencl_cl = &cpu_to_opencl_cl, .opencl_to_cpu_cl = &opencl_to_cpu_cl, #endif .cpu_elemsize = sizeof(struct point), }; #ifdef STARPU_USE_CUDA extern void multiformat_scal_cuda_func(void *buffers[], void *arg); #endif #ifdef STARPU_USE_OPENCL extern void multiformat_scal_opencl_func(void *buffers[], void *arg); #endif #ifdef STARPU_USE_CPU static struct starpu_codelet cpu_cl = { .cpu_funcs = {multiformat_scal_cpu_func}, .cpu_funcs_name = {"multiformat_scal_cpu_func"}, .nbuffers = 1, .modes = { STARPU_RW }, .name = "codelet_real" }; #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA static struct starpu_codelet cuda_cl = { .cuda_funcs = { multiformat_scal_cuda_func }, .nbuffers = 1, .modes = { STARPU_RW }, .name = "cuda_codelet" }; #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL static struct starpu_codelet opencl_cl = { .opencl_funcs = { multiformat_scal_opencl_func }, .nbuffers = 1, .modes = { STARPU_RW }, .name = "opencl_codelet" }; #endif /* !STARPU_USE_OPENCL */ /* * Main functions */ static void init_problem_data(void) { int i; for (i = 0; i < N_ELEMENTS; i++) { array_of_structs[i].x = 1.0 + i; array_of_structs[i].y = 42.0; } } static void register_data(void) { starpu_multiformat_data_register(&array_of_structs_handle, STARPU_MAIN_RAM, &array_of_structs, N_ELEMENTS, &format_ops); } static int create_and_submit_task(unsigned int dev) { struct starpu_task *task = starpu_task_create(); switch (dev) { #ifdef STARPU_USE_CPU case STARPU_CPU: task->cl = &cpu_cl; break; #endif #ifdef STARPU_USE_CUDA case STARPU_CUDA: task->cl = &cuda_cl; break; #endif #ifdef STARPU_USE_OPENCL case STARPU_OPENCL: task->cl = &opencl_cl; break; #endif default: assert(0); } task->synchronous = 1; task->handles[0] = array_of_structs_handle; task->cl_arg = NULL; task->cl_arg_size = 0; return starpu_task_submit(task); } static void create_and_submit_tasks(void) { #ifdef STARPU_USE_CUDA if (ncuda > 0) { int err; err = create_and_submit_task(STARPU_CUDA); if (err != 0) { FPRINTF(stderr, "Cuda : %s\n", strerror(-err)); return; } } #endif #ifdef STARPU_USE_CPU if (ncpu > 0) { int err; err = create_and_submit_task(STARPU_CPU); if (err != 0) { FPRINTF(stderr, "CPU : %s\n", strerror(-err)); return; } } #endif #ifdef STARPU_USE_OPENCL if (nopencl > 0) { int err; err = create_and_submit_task(STARPU_OPENCL); if (err != 0) { FPRINTF(stderr, "OpenCL : %s\n", strerror(-err)); return; } } #endif /* !STARPU_USE_OPENCL */ } static void unregister_data(void) { starpu_data_unregister(array_of_structs_handle); } static void print_it(void) { int i; for (i = 0; i < N_ELEMENTS; i++) { FPRINTF(stderr, "(%.2f %.2f) ", array_of_structs[i].x, array_of_structs[i].y); } FPRINTF(stderr, "\n"); } static int check_it(void) { int i; for (i = 0; i < N_ELEMENTS; i++) { float expected_value = i + 1.0; #ifdef STARPU_USE_CUDA if (ncuda > 0) expected_value *= array_of_structs[i].y; #endif #ifdef STARPU_USE_OPENCL if (nopencl > 0) expected_value *= array_of_structs[i].y; #endif expected_value *= array_of_structs[i].y; if (array_of_structs[i].x != expected_value) return EXIT_FAILURE; } return EXIT_SUCCESS; } #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; struct starpu_opencl_program opencl_conversion_program; #endif static int gpus_available(void) { #ifdef STARPU_USE_CUDA if (ncuda > 0) return 1; #endif #ifdef STARPU_USE_OPENCL if (nopencl > 0) return 1; #endif return 0; } int main(void) { #ifdef STARPU_USE_CPU int ret; struct starpu_conf conf; starpu_conf_init(&conf); /* this example doesn't support Master-Slave */ conf.nmpi_ms = 0; conf.ntcpip_ms = 0; #if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) conf.ncuda = 0; #endif ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ncpu = starpu_cpu_worker_get_count(); #ifdef STARPU_USE_CUDA ncuda = starpu_cuda_worker_get_count(); #endif #ifdef STARPU_USE_OPENCL nopencl = starpu_opencl_worker_get_count(); #endif if (ncpu == 0 || !gpus_available()) { starpu_shutdown(); return 77; } #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/multiformat_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl", &opencl_conversion_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif init_problem_data(); print_it(); register_data(); create_and_submit_tasks(); unregister_data(); print_it(); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); ret = starpu_opencl_unload_opencl(&opencl_conversion_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); return check_it(); #else /* Without the CPU, there is no point in using the multiformat * interface, so this test is pointless. */ return 77; #endif } starpu-1.4.9+dfsg/examples/basic_examples/multiformat_conversion_codelets.c000066400000000000000000000045601507764646700274500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "multiformat_types.h" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #ifdef STARPU_USE_CUDA void cuda_to_cpu(void *buffers[], void *arg) { (void)arg; struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); struct point *dst = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); int i; for (i = 0; i < n; i++) { dst[i].x = src->x[i]; dst[i].y = src->y[i]; } } extern void cpu_to_cuda_cuda_func(void *buffers[], void *args); struct starpu_codelet cpu_to_cuda_cl = { .cuda_funcs = {cpu_to_cuda_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codelet_cpu_to_cuda" }; struct starpu_codelet cuda_to_cpu_cl = { .cpu_funcs = {cuda_to_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codelet_cude_to_cpu" }; #endif #ifdef STARPU_USE_OPENCL void opencl_to_cpu(void *buffers[], void *arg) { (void)arg; FPRINTF(stderr, "User Entering %s\n", __starpu_func__); struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); struct point *dst = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); int i; for (i = 0; i < n; i++) { dst[i].x = src->x[i]; dst[i].y = src->y[i]; } } extern void cpu_to_opencl_opencl_func(void *buffers[], void *args); struct starpu_codelet cpu_to_opencl_cl = { .opencl_funcs = {cpu_to_opencl_opencl_func}, .opencl_flags = {STARPU_OPENCL_ASYNC}, .nbuffers = 1, .modes = {STARPU_RW}, }; struct starpu_codelet opencl_to_cpu_cl = { .cpu_funcs = {opencl_to_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, }; #endif starpu-1.4.9+dfsg/examples/basic_examples/multiformat_conversion_codelets_cuda.cu000066400000000000000000000031221507764646700306220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "multiformat_types.h" static __global__ void cpu_to_cuda_cuda(struct point *src, struct struct_of_arrays *dst, unsigned n) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) { dst->x[i] = src[i].x; dst->y[i] = src[i].y; } } extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args) { struct point *src; struct struct_of_arrays *dst; src = (struct point *) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); dst = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; cpu_to_cuda_cuda<<>>(src, dst, n); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/basic_examples/multiformat_conversion_codelets_opencl.c000066400000000000000000000045771507764646700310200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include extern struct starpu_opencl_program opencl_conversion_program; void cpu_to_opencl_opencl_func(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_conversion_program, "cpu_to_opencl_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(src), &src); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(dst), &dst); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(n), &n); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/basic_examples/multiformat_conversion_codelets_opencl_kernel.cl000066400000000000000000000017031507764646700325200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "multiformat_types.h" __kernel void cpu_to_opencl_opencl(__global struct point *src, __global struct struct_of_arrays *dst, unsigned int n) { const unsigned int i = get_global_id(0); if (i < n) { dst->x[i] = src[i].x; dst->y[i] = src[i].y; } } starpu-1.4.9+dfsg/examples/basic_examples/multiformat_cuda.cu000066400000000000000000000033251507764646700245000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "multiformat_types.h" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) static __global__ void multiformat_cuda(struct struct_of_arrays *soa, unsigned n) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) soa->x[i] *= soa->y[i]; } extern "C" void multiformat_scal_cuda_func(void *buffers[], void *_args) { (void) _args; FPRINTF(stderr, "Running the cuda kernel (%s)\n", __starpu_func__); unsigned int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); struct struct_of_arrays *soa; soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; multiformat_cuda<<>>(soa, n); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/examples/basic_examples/multiformat_opencl.c000066400000000000000000000044121507764646700246550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include extern struct starpu_opencl_program opencl_program; void multiformat_scal_opencl_func(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); cl_mem val = (cl_mem)STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "multiformat_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(n), &n); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/basic_examples/multiformat_opencl_kernel.cl000066400000000000000000000015711507764646700263740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "multiformat_types.h" __kernel void multiformat_opencl(__global struct struct_of_arrays *soa, int nx) { const int i = get_global_id(0); if (i < nx) soa->x[i] *= soa->y[i]; } starpu-1.4.9+dfsg/examples/basic_examples/multiformat_types.h000066400000000000000000000015571507764646700245550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef MULTIFORMAT_TYPES_H #define MULTIFORMAT_TYPES_H #define N_ELEMENTS 10 struct struct_of_arrays { float x[N_ELEMENTS]; float y[N_ELEMENTS]; }; struct point { float x, y; }; #endif starpu-1.4.9+dfsg/examples/basic_examples/ndim.c000066400000000000000000000053011507764646700216770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 2 #define NY 3 #define NZ 2 #define NT 2 void arr4d_cpu_func(void *buffers[], void *args) { (void)args; int *arr4d = (int *) STARPU_NDIM_GET_PTR(buffers[0]); int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); int nx = nn[0]; int ny = nn[1]; int nz = nn[2]; int nt = nn[3]; unsigned ldy = ldn[1]; unsigned ldz = ldn[2]; unsigned ldt = ldn[3]; int i, j, k, l; for (l = 0; l < nt; l++) { for (k = 0; k < nz; k++) { for (j = 0; j < ny; j++) { for (i = 0; i < nx; i++) { arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] *= 10; } } } } } struct starpu_codelet arr4d_cl = { .cpu_funcs = {arr4d_cpu_func}, .cpu_funcs_name = {"arr4d_cpu_func"}, .nbuffers = 1, .modes = { STARPU_RW }, .name = "arr4d_cl" }; int main(void) { int ret; int arr4d[NX*NY*NZ*NT]; int val = 0; int i, j, k, l; starpu_data_handle_t arr4d_handle; unsigned nn[4] = {NX, NY, NZ, NT}; unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); for (l = 0; l < NT; l++) for (k = 0; k < NZ; k++) for (j = 0; j < NY; j++) for (i = 0; i < NX; i++) arr4d[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i] = val++; starpu_ndim_data_register(&arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(arr4d[0])); ret = starpu_task_insert(&arr4d_cl, STARPU_RW, arr4d_handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_unregister(arr4d_handle); for (l = 0; l < NT; l++) { fprintf(stderr, "------\n"); for (k = 0; k < NZ; k++) { for (j = 0; j < NY; j++) { fprintf(stderr, "|\t"); for (i = 0; i < NX; i++) fprintf(stderr, "%d\t", arr4d[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i]); fprintf(stderr, " |"); } fprintf(stderr, "\n"); } fprintf(stderr, "------\n"); } starpu_shutdown(); return 0; enodev: starpu_data_unregister(arr4d_handle); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/basic_examples/task_insert_color.c000066400000000000000000000046631507764646700245060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void func(void *descr[], void *_args) { int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); (void)_args; *x *= 2; } struct starpu_codelet mycodelet = { .modes = { STARPU_RW }, .cpu_funcs = {func}, .cpu_funcs_name = {"func"}, .nbuffers = 1 }; struct starpu_codelet mycodelet_color = { .modes = { STARPU_RW }, .cpu_funcs = {func}, .cpu_funcs_name = {"func"}, .nbuffers = 1, .color = 0x0000FF, }; int main(void) { int value=42; starpu_data_handle_t handle; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); // In the trace file, the following task should be green (executed on CPU) ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask_green", 0); if (STARPU_UNLIKELY(ret == -ENODEV)) { starpu_data_unregister(handle); goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); // In the trace file, the following task will be red as specified by STARPU_TASK_COLOR ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask_red", STARPU_TASK_COLOR, 0xFF0000, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); // In the trace file, the following task will be blue as specified by the field color of mycodelet_color ret = starpu_task_insert(&mycodelet_color, STARPU_RW, handle, STARPU_NAME, "mytask_blue", 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: return 77; } starpu-1.4.9+dfsg/examples/basic_examples/topology.c000066400000000000000000000021361507764646700226270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) int main(void) { int ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_worker_display_names(stdout, STARPU_CPU_WORKER); starpu_topology_print(stdout); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/basic_examples/variable.c000066400000000000000000000055751507764646700225520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #ifdef STARPU_QUICK_CHECK static unsigned niter = 500; #elif !defined(STARPU_LONG_CHECK) static unsigned niter = 5000; #else static unsigned niter = 50000; #endif extern void cpu_codelet(void *descr[], void *_args); #ifdef STARPU_USE_CUDA extern void cuda_codelet(void *descr[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void opencl_codelet(void *descr[], void *_args); struct starpu_opencl_program opencl_program; #endif int main(int argc, char **argv) { unsigned i; float foo; starpu_data_handle_t float_array_handle; struct starpu_codelet cl; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (argc == 2) niter = atoi(argv[1]); foo = 0.0f; starpu_variable_data_register(&float_array_handle, STARPU_MAIN_RAM /* home node */, (uintptr_t)&foo, sizeof(float)); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/variable_kernels_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_codelet_init(&cl); cl.cpu_funcs[0] = cpu_codelet; cl.cpu_funcs_name[0] = "cpu_codelet"; #ifdef STARPU_USE_CUDA cl.cuda_funcs[0] = cuda_codelet; #endif #ifdef STARPU_USE_OPENCL cl.opencl_funcs[0] = opencl_codelet; #endif cl.nbuffers = 1; cl.modes[0] = STARPU_RW; cl.model = NULL; cl.name = "variable_inc"; for (i = 0; i < niter; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->callback_func = NULL; task->handles[0] = float_array_handle; ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); starpu_data_unregister(float_array_handle); goto enodev; } } starpu_task_wait_for_all(); /* update the array in RAM */ starpu_data_unregister(float_array_handle); FPRINTF(stderr, "variable -> %f\n", foo); FPRINTF(stderr, "result is %scorrect\n", foo==niter?"":"IN"); starpu_shutdown(); return (foo == niter) ? EXIT_SUCCESS:EXIT_FAILURE; enodev: return 77; } starpu-1.4.9+dfsg/examples/basic_examples/variable_kernels.cu000066400000000000000000000021541507764646700244500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include static __global__ void cuda_variable(float * tab) { *tab += 1.0f; return; } extern "C" void cuda_codelet(void *descr[], void *_args) { float *val = (float *)STARPU_VARIABLE_GET_PTR(descr[0]); cuda_variable<<<1,1, 0, starpu_cuda_get_local_stream()>>>(val); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/examples/basic_examples/variable_kernels_cpu.c000066400000000000000000000015161507764646700251330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include void cpu_codelet(void *descr[], void *_args) { (void)_args; float *val = (float *)STARPU_VARIABLE_GET_PTR(descr[0]); *val += 1.0f; } starpu-1.4.9+dfsg/examples/basic_examples/variable_kernels_opencl.c000066400000000000000000000030731507764646700256240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include extern struct starpu_opencl_program opencl_program; void opencl_codelet(void *descr[], void *_args) { cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); cl_kernel kernel; cl_command_queue queue; cl_event event; int id, devid, err; (void)_args; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "variable", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=1; size_t local=1; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/basic_examples/variable_kernels_opencl_kernel.cl000066400000000000000000000014571507764646700273440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void variable(__global float* input) { const int i = get_global_id(0); if (i == 0) input[i] = input[i] + 1.0f; } starpu-1.4.9+dfsg/examples/basic_examples/vector_scal.c000066400000000000000000000151111507764646700232540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example demonstrates how to use StarPU to scale an array by a factor. * It shows how to manipulate data with StarPU's data management library. * 1- how to declare a piece of data to StarPU (starpu_vector_data_register) * 2- how to describe which data are accessed by a task (task->handles[0]) * 3- how a kernel can manipulate the data (buffers[0].vector.ptr) */ #include #include #include #include #define NX 204800 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void scal_cpu_func(void *buffers[], void *_args); extern void scal_cpu_func_icc(void *buffers[], void *_args); extern void scal_sse_func(void *buffers[], void *_args); extern void scal_sse_func_icc(void *buffers[], void *_args); extern void scal_cuda_func(void *buffers[], void *_args); extern void scal_hip_func(void *buffers[], void *_args); extern void scal_opencl_func(void *buffers[], void *_args); static struct starpu_perfmodel vector_scal_model = { .type = STARPU_HISTORY_BASED, .symbol = "vector_scal" }; static struct starpu_perfmodel vector_scal_energy_model = { .type = STARPU_HISTORY_BASED, .symbol = "vector_scal_energy" }; static struct starpu_codelet cl = { /* CPU implementation of the codelet */ .cpu_funcs = { scal_cpu_func #if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__) , scal_cpu_func_icc #endif #ifdef __SSE__ , scal_sse_func #if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__) , scal_sse_func_icc #endif #endif }, .cpu_funcs_name = { "scal_cpu_func", #if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__) "scal_cpu_func_icc", #endif #ifdef __SSE__ "scal_sse_func", #if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__) "scal_sse_func_icc" #endif #endif }, #ifdef STARPU_USE_CUDA /* CUDA implementation of the codelet */ .cuda_funcs = {scal_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL /* OpenCL implementation of the codelet */ .opencl_funcs = {scal_opencl_func}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif #ifdef STARPU_USE_HIP /* HIP implementation of the codelet */ .hip_funcs = {scal_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .model = &vector_scal_model, .energy_model = &vector_scal_energy_model }; #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; #endif static int approximately_equal(float a, float b) { #ifdef STARPU_HAVE_NEARBYINTF int ai = (int) nearbyintf(a * 1000.0); int bi = (int) nearbyintf(b * 1000.0); #elif defined(STARPU_HAVE_RINTF) int ai = (int) rintf(a * 1000.0); int bi = (int) rintf(b * 1000.0); #else #error "Please define either nearbyintf or rintf." #endif return ai == bi; } int main(void) { /* We consider a vector of float that is initialized just as any of C data */ float vector[NX]; unsigned i; for (i = 0; i < NX; i++) vector[i] = (i+1.0f); /* Initialize StarPU with default configuration */ int ret = starpu_init(NULL); if (ret == -ENODEV) goto enodev; FPRINTF(stderr, "[BEFORE] 1-th element : %3.2f\n", vector[1]); FPRINTF(stderr, "[BEFORE] (NX-1)th element: %3.2f\n", vector[NX-1]); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/basic_examples/vector_scal_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif /* Tell StaPU to associate the "vector" vector with the "vector_handle" * identifier. When a task needs to access a piece of data, it should * refer to the handle that is associated to it. * In the case of the "vector" data interface: * - the first argument of the registration method is a pointer to the * handle that should describe the data * - the second argument is the memory node where the data (ie. "vector") * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as * opposed to an address on a GPU for instance. * - the third argument is the address of the vector in RAM * - the fourth argument is the number of elements in the vector * - the fifth argument is the size of each element. */ starpu_data_handle_t vector_handle; starpu_memory_pin(vector, sizeof(vector)); starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); float factor = 3.14; /* create a synchronous task: any call to starpu_task_submit will block * until it is terminated */ struct starpu_task *task = starpu_task_create(); task->synchronous = 1; task->cl = &cl; /* the codelet manipulates one buffer in RW mode */ task->handles[0] = vector_handle; /* an argument is passed to the codelet, beware that this is a * READ-ONLY buffer and that the codelet may be given a pointer to a * COPY of the argument */ task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); /* execute the task on any eligible computational resource */ ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* StarPU does not need to manipulate the array anymore so we can stop * monitoring it */ starpu_data_unregister(vector_handle); starpu_memory_unpin(vector, sizeof(vector)); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); ret = approximately_equal(vector[1], (1+1.0f) * factor) && approximately_equal(vector[NX-1], (NX-1+1.0f) * factor); FPRINTF(stderr, "[AFTER] 1-th element : %3.2f (should be %3.2f)\n", vector[1], (1+1.0f) * factor); FPRINTF(stderr, "[AFTER] (NX-1)-th element: %3.2f (should be %3.2f)\n", vector[NX-1], (NX-1+1.0f) * factor); FPRINTF(stderr, "[AFTER] Computation is%s correct\n", ret?"":" NOT"); return (ret ? EXIT_SUCCESS : EXIT_FAILURE); enodev: return 77; } starpu-1.4.9+dfsg/examples/basic_examples/vector_scal_c.c000066400000000000000000000074171507764646700235700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example demonstrates how to use StarPU to scale an array by a factor. * It shows how to manipulate data with StarPU's data management library. * 1- how to declare a piece of data to StarPU (starpu_vector_data_register) * 2- how to describe which data are accessed by a task (task->handles[0]) * 3- how a kernel can manipulate the data (buffers[0].vector.ptr) * * This is a variant of vector_scal.c which shows it can be integrated with fortran. */ #include #include extern void scal_cpu_func(void *buffers[], void *_args); extern void scal_cuda_func(void *buffers[], void *_args); static struct starpu_perfmodel vector_scal_model = { .type = STARPU_HISTORY_BASED, .symbol = "vector_scal_model" }; static struct starpu_codelet cl = { .modes = { STARPU_RW }, /* CPU implementation of the codelet */ .cpu_funcs = {scal_cpu_func}, .cpu_funcs_name = {"scal_cpu_func"}, #ifdef STARPU_USE_CUDA /* CUDA implementation of the codelet */ .cuda_funcs = {scal_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 1, .model = &vector_scal_model }; int compute_(int *F_NX, float *vector) { int NX = *F_NX; int ret; /* Initialize StarPU with default configuration */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Tell StaPU to associate the "vector" vector with the "vector_handle" * identifier. When a task needs to access a piece of data, it should * refer to the handle that is associated to it. * In the case of the "vector" data interface: * - the first argument of the registration method is a pointer to the * handle that should describe the data * - the second argument is the memory node where the data (ie. "vector") * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as * opposed to an address on a GPU for instance. * - the third argument is the address of the vector in RAM * - the fourth argument is the number of elements in the vector * - the fifth argument is the size of each element. */ starpu_data_handle_t vector_handle; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); float factor = 3.14; /* create a synchronous task: any call to starpu_task_submit will block * until it is terminated */ struct starpu_task *task = starpu_task_create(); task->synchronous = 1; task->cl = &cl; /* the codelet manipulates one buffer in RW mode */ task->handles[0] = vector_handle; /* an argument is passed to the codelet, beware that this is a * READ-ONLY buffer and that the codelet may be given a pointer to a * COPY of the argument */ task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); /* execute the task on any eligible computational resource */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* StarPU does not need to manipulate the array anymore so we can stop * monitoring it */ starpu_data_unregister(vector_handle); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/examples/basic_examples/vector_scal_cpu.c000066400000000000000000000015631507764646700241310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example complements vector_scal.c: here we implement a CPU version. */ #include "vector_scal_cpu_template.h" VECTOR_SCAL_CPU_FUNC(scal_cpu_func) VECTOR_SCAL_SSE_FUNC(scal_sse_func) starpu-1.4.9+dfsg/examples/basic_examples/vector_scal_cpu_icc.icc000066400000000000000000000016331507764646700252610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example complements vector_scal.c: here we implement a CPU version, * meant to be compiled by icc. */ #include "vector_scal_cpu_template.h" VECTOR_SCAL_CPU_FUNC(scal_cpu_func_icc) VECTOR_SCAL_SSE_FUNC(scal_sse_func_icc) starpu-1.4.9+dfsg/examples/basic_examples/vector_scal_cpu_template.h000066400000000000000000000112261507764646700260260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example complements vector_scal.c: here we implement a CPU version. */ #ifndef __VECTOR_SCAL_CPU_TEMPLATE_H__ #define __VECTOR_SCAL_CPU_TEMPLATE_H__ #include #ifdef __SSE__ #include #endif /* This kernel takes a buffer and scales it by a constant factor */ #define VECTOR_SCAL_CPU_FUNC(func_name) \ void func_name(void *buffers[], void *cl_arg) \ { \ unsigned i; \ float *factor = (float *) cl_arg; \ \ /* \ * The "buffers" array matches the task->handles array: for instance \ * task->handles[0] is a handle that corresponds to a data with \ * vector "interface", so that the first entry of the array in the \ * codelet is a pointer to a structure describing such a vector (ie. \ * struct starpu_vector_interface *). Here, we therefore manipulate \ * the buffers[0] element as a vector: nx gives the number of elements \ * in the array, ptr gives the location of the array (that was possibly\ * migrated/replicated), and elemsize gives the size of each elements. \ */ \ \ struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; \ \ /* length of the vector */ \ unsigned n = STARPU_VECTOR_GET_NX(vector); \ \ /* get a pointer to the local copy of the vector : note that we have to\ * cast it in (float *) since a vector could contain any type of \ * elements so that the .ptr field is actually a uintptr_t */ \ float *val = (float *)STARPU_VECTOR_GET_PTR(vector); \ \ /* scale the vector */ \ for (i = 0; i < n; i++) \ val[i] *= *factor; \ } #ifdef __SSE__ #define VECTOR_SCAL_SSE_FUNC(func_name) \ void func_name(void *buffers[], void *cl_arg) \ { \ float *vector = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); \ unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); \ unsigned int n_iterations = n/4; \ \ __m128 *VECTOR = (__m128*) vector; \ __m128 FACTOR STARPU_ATTRIBUTE_ALIGNED(16); \ float factor = *(float *) cl_arg; \ FACTOR = _mm_set1_ps(factor); \ \ unsigned int i; \ for (i = 0; i < n_iterations; i++) \ VECTOR[i] = _mm_mul_ps(FACTOR, VECTOR[i]); \ \ unsigned int remainder = n%4; \ if (remainder != 0) \ { \ unsigned int start = 4 * n_iterations; \ for (i = start; i < start+remainder; ++i) \ { \ vector[i] = factor * vector[i]; \ } \ } \ } #else /* !__SSE__ */ #define VECTOR_SCAL_SSE_FUNC(func_name) #endif /* !__SSE__ */ #endif /* !__VECTOR_SCAL_CPU_TEMPLATE_H__ */ starpu-1.4.9+dfsg/examples/basic_examples/vector_scal_cuda.cu000066400000000000000000000031701507764646700244370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example complements vector_scal.c: here we implement a CUDA version. */ #include static __global__ void vector_mult_cuda(unsigned n, float *val, float factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) val[i] *= factor; } extern "C" void scal_cuda_func(void *buffers[], void *_args) { float *factor = (float *)_args; /* length of the vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; vector_mult_cuda<<>>(n, val, *factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/basic_examples/vector_scal_fortran.F000066400000000000000000000021661507764646700247600ustar00rootroot00000000000000C StarPU --- Runtime system for heterogeneous multicore architectures. C C Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria C C StarPU is free software; you can redistribute it and/or modify C it under the terms of the GNU Lesser General Public License as published by C the Free Software Foundation; either version 2.1 of the License, or (at C your option) any later version. C C StarPU is distributed in the hope that it will be useful, but C WITHOUT ANY WARRANTY; without even the implied warranty of C MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. C C See the GNU Lesser General Public License in COPYING.LGPL for more details. C PROGRAM VECTOR_SCAL INTEGER,PARAMETER :: F_NX=2048 REAL,DIMENSION(F_NX) :: VECTOR INTEGER :: I DO I=1,F_NX,1 VECTOR(I)=1.0 ENDDO WRITE (*,*) ' BEFORE : First element was ', VECTOR(1) WRITE (*,*) ' BEFORE : Last element was ', VECTOR(F_NX) CALL COMPUTE(F_NX, VECTOR) WRITE (*,*) ' AFTER : First element is ', VECTOR(1) WRITE (*,*) ' AFTER : Last element is ', VECTOR(F_NX) END PROGRAM starpu-1.4.9+dfsg/examples/basic_examples/vector_scal_hip.hip000066400000000000000000000031321507764646700244520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example complements vector_scal.c: here we implement a HIP version. */ #include static __global__ void vector_mult_hip(unsigned n, float *val, float factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) val[i] *= factor; } extern "C" void scal_hip_func(void *buffers[], void *_args) { float *factor = (float *)_args; /* length of the vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ float *val = (float *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; hipLaunchKernelGGL(vector_mult_hip, nblocks, threads_per_block, 0, starpu_hip_get_local_stream(), n, val, *factor); hipError_t status = hipGetLastError(); if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/basic_examples/vector_scal_opencl.c000066400000000000000000000042101507764646700246120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example complements vector_scal.c: here we implement a OpenCL version. */ #include extern struct starpu_opencl_program opencl_program; void scal_opencl_func(void *buffers[], void *_args) { float *factor = _args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; /* length of the vector */ unsigned int n = STARPU_VECTOR_GET_NX(buffers[0]); /* OpenCL copy of the vector pointer */ cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "vector_mult_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(n), &n); err |= clSetKernelArg(kernel, 1, sizeof(val), &val); err |= clSetKernelArg(kernel, 2, sizeof(*factor), factor); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/basic_examples/vector_scal_opencl_kernel.cl000066400000000000000000000015631507764646700263360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void vector_mult_opencl(unsigned int nx, __global float* val, float factor) { const int i = get_global_id(0); if (i < nx) { val[i] *= factor; } } starpu-1.4.9+dfsg/examples/binary/000077500000000000000000000000001507764646700171125ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/binary/binary.c000066400000000000000000000114241507764646700205440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This shows how to load OpenCL programs, either from a .cl file, or from a * string containing the program itself. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #ifdef STARPU_USE_OPENCL extern void opencl_codelet(void *descr[], void *_args); struct starpu_opencl_program opencl_program; #endif struct starpu_codelet cl = { #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_codelet}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW} }; int compute(char *file_name, int load_as_file, int with_malloc) { float float_array[4] STARPU_ATTRIBUTE_ALIGNED(16) = { 0.0f, 0.0f, 0.0f, 0.0f}; starpu_data_handle_t float_array_handle; unsigned i; int ret = 0; #ifdef STARPU_QUICK_CHECK unsigned niter = 50; #else unsigned niter = 500; #endif starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&float_array, 4, sizeof(float)); #ifdef STARPU_USE_OPENCL if (load_as_file) { ret = starpu_opencl_compile_opencl_from_file(file_name, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_compile_opencl_from_file"); ret = starpu_opencl_load_binary_opencl(file_name, &opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_binary_opencl"); } else if (with_malloc) { char *located_file_name; char *located_dir_name; char *opencl_program_source; starpu_opencl_load_program_source_malloc(file_name, &located_file_name, &located_dir_name, &opencl_program_source); ret = starpu_opencl_compile_opencl_from_string(opencl_program_source, "incrementer", NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_compile_opencl_from_file"); ret = starpu_opencl_load_binary_opencl("incrementer", &opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_binary_opencl"); free(located_file_name); free(located_dir_name); free(opencl_program_source); } else { char located_file_name[1024]; char located_dir_name[1024]; char opencl_program_source[16384]; starpu_opencl_load_program_source(file_name, located_file_name, located_dir_name, opencl_program_source); ret = starpu_opencl_compile_opencl_from_string(opencl_program_source, "incrementer", NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_compile_opencl_from_file"); ret = starpu_opencl_load_binary_opencl("incrementer", &opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_binary_opencl"); } #endif for (i = 0; i < niter; i++) { ret = starpu_task_insert(&cl, STARPU_RW, float_array_handle, STARPU_TAG_ONLY, (starpu_tag_t) i, 0); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_task_wait_for_all(); /* update the array in RAM */ starpu_data_unregister(float_array_handle); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0], float_array[1], float_array[2], float_array[3]); if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3]) { FPRINTF(stderr, "Incorrect result\n"); ret = 1; } return ret; } int main(void) { int ret = 0; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 0; ret = starpu_init(&conf); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "This application requires an OpenCL worker.\n"); return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_opencl_worker_get_count() == 0) { FPRINTF(stderr, "This application requires an OpenCL worker.\n"); starpu_shutdown(); return 77; } ret = compute("examples/incrementer/incrementer_kernels_opencl_kernel.cl", 1, -1); if (ret == 0) ret = compute("examples/incrementer/incrementer_kernels_opencl_kernel.cl", 0, 0); else FPRINTF(stderr, "Error when calling compute %d\n", ret); if (ret == 0) ret = compute("examples/incrementer/incrementer_kernels_opencl_kernel.cl", 0, 1); else FPRINTF(stderr, "Error when calling compute %d\n", ret); starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/examples/callback/000077500000000000000000000000001507764646700173625ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/callback/callback.c000066400000000000000000000043121507764646700212620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is an example of using a callback. We submit a task, whose callback * submits another task (without any callback). */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) starpu_data_handle_t handle; void cpu_codelet(void *descr[], void *_args) { (void)_args; int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); *val += 1; } struct starpu_codelet cl = { .modes = { STARPU_RW }, .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 1, .name = "callback" }; void callback_func(void *callback_arg) { int ret; (void)callback_arg; struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } int main(void) { int v=40; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int)); struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->callback_func = callback_func; task->callback_arg = NULL; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); starpu_data_unregister(handle); FPRINTF(stderr, "v -> %d\n", v); starpu_shutdown(); return (v == 42) ? 0 : 1; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/callback/prologue.c000066400000000000000000000056211507764646700213660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is an example of using a prologue callback. We submit a task, whose * prologue callback (i.e. before task gets scheduled) prints a value, and * whose pop_prologue callback (i.e. after task gets scheduled, but before task * execution) prints another value. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) starpu_data_handle_t handle; void cpu_codelet(void *descr[], void *_args) { (void)_args; int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); *val += 1; printf("task executing \n"); } struct starpu_codelet cl = { .modes = { STARPU_RW }, .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 1, .name = "callback" }; void prologue_callback_func(void *callback_arg) { double *x = (double*)callback_arg; printf("x = %lf\n", *x); STARPU_ASSERT(*x == -999.0); } void pop_prologue_callback_func(void *args) { unsigned val = (uintptr_t) args; printf("pop_prologue_callback val %u \n", val); STARPU_ASSERT(val == 5); } int main(void) { int v=40; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int)); double x = -999.0; struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->prologue_callback_func = prologue_callback_func; task->prologue_callback_arg = &x; task->prologue_callback_pop_func = pop_prologue_callback_func; task->prologue_callback_pop_arg = (void*) 5; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_insert(&cl, STARPU_RW, handle, STARPU_PROLOGUE_CALLBACK, prologue_callback_func, STARPU_PROLOGUE_CALLBACK_ARG_NFREE, &x, STARPU_PROLOGUE_CALLBACK_POP, pop_prologue_callback_func, STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE, 5, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); enodev: starpu_data_unregister(handle); FPRINTF(stderr, "v -> %d\n", v); starpu_shutdown(); return (ret == -ENODEV) ? 77 : 0; } starpu-1.4.9+dfsg/examples/cg/000077500000000000000000000000001507764646700162175ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/cg/cg.c000066400000000000000000000215571507764646700167660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include /* * Conjugate Gradient * * Input: * - matrix A * - vector b * - vector x (starting value) * - int i_max, error tolerance eps < 1. * Output: * - vector x * * Pseudo code: * * i <- 0 * r <- b - Ax * d <- r * delta_new <- dot(r,r) * delta_0 <- delta_new * * while (i < i_max && delta_new > eps^2 delta_0) * { * q <- Ad * alpha <- delta_new/dot(d, q) * x <- x + alpha d * * If (i is divisible by 50) * r <- b - Ax * else * r <- r - alpha q * * delta_old <- delta_new * delta_new <- dot(r,r) * beta <- delta_new/delta_old * d <- r + beta d * i <- i + 1 * } * * The dot() operations makes use of reduction to optimize parallelism. * */ #include "cg.h" static int copy_handle(starpu_data_handle_t dst, starpu_data_handle_t src, unsigned nblocks); #define HANDLE_TYPE_VECTOR starpu_data_handle_t #define HANDLE_TYPE_MATRIX starpu_data_handle_t #define TASK_INSERT(cl, ...) starpu_task_insert(cl, ##__VA_ARGS__) #define GET_VECTOR_BLOCK(v, i) starpu_data_get_sub_data(v, 1, i) #define GET_MATRIX_BLOCK(m, i, j) starpu_data_get_sub_data(m, 2, i, j) #define BARRIER() #define GET_DATA_HANDLE(handle) #define FPRINTF_SERVER FPRINTF #include "cg_kernels.c" static TYPE *A, *b, *x; static TYPE *r, *d, *q; static int copy_handle(starpu_data_handle_t dst, starpu_data_handle_t src, unsigned nb) { unsigned block; for (block = 0; block < nb; block++) starpu_data_cpy(starpu_data_get_sub_data(dst, 1, block), starpu_data_get_sub_data(src, 1, block), 1, NULL, NULL); return 0; } /* * Generate Input data */ static void generate_random_problem(void) { int i, j; starpu_malloc((void **)&A, n*n*sizeof(TYPE)); starpu_malloc((void **)&b, n*sizeof(TYPE)); starpu_malloc((void **)&x, n*sizeof(TYPE)); assert(A && b && x); for (j = 0; j < n; j++) { b[j] = (TYPE)1.0; x[j] = (TYPE)0.0; /* We take Hilbert matrix that is not well conditioned but definite positive: H(i,j) = 1/(1+i+j) */ for (i = 0; i < n; i++) { A[n*j + i] = (TYPE)(1.0/(1.0+i+j)); } } /* Internal vectors */ starpu_malloc((void **)&r, n*sizeof(TYPE)); starpu_malloc((void **)&d, n*sizeof(TYPE)); starpu_malloc((void **)&q, n*sizeof(TYPE)); assert(r && d && q); memset(r, 0, n*sizeof(TYPE)); memset(d, 0, n*sizeof(TYPE)); memset(q, 0, n*sizeof(TYPE)); } static void free_data(void) { starpu_free_noflag(A, n*n*sizeof(TYPE)); starpu_free_noflag(b, n*sizeof(TYPE)); starpu_free_noflag(x, n*sizeof(TYPE)); starpu_free_noflag(r, n*sizeof(TYPE)); starpu_free_noflag(d, n*sizeof(TYPE)); starpu_free_noflag(q, n*sizeof(TYPE)); } static void register_data(void) { starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, n, n, n, sizeof(TYPE)); starpu_vector_data_register(&b_handle, STARPU_MAIN_RAM, (uintptr_t)b, n, sizeof(TYPE)); starpu_vector_data_register(&x_handle, STARPU_MAIN_RAM, (uintptr_t)x, n, sizeof(TYPE)); starpu_vector_data_register(&r_handle, STARPU_MAIN_RAM, (uintptr_t)r, n, sizeof(TYPE)); starpu_vector_data_register(&d_handle, STARPU_MAIN_RAM, (uintptr_t)d, n, sizeof(TYPE)); starpu_vector_data_register(&q_handle, STARPU_MAIN_RAM, (uintptr_t)q, n, sizeof(TYPE)); starpu_variable_data_register(&dtq_handle, STARPU_MAIN_RAM, (uintptr_t)&dtq, sizeof(TYPE)); starpu_variable_data_register(&rtr_handle, STARPU_MAIN_RAM, (uintptr_t)&rtr, sizeof(TYPE)); if (use_reduction) { starpu_data_set_reduction_methods(q_handle, &accumulate_vector_cl, &bzero_vector_cl); starpu_data_set_reduction_methods(r_handle, &accumulate_vector_cl, &bzero_vector_cl); starpu_data_set_reduction_methods(dtq_handle, &accumulate_variable_cl, &bzero_variable_cl); starpu_data_set_reduction_methods(rtr_handle, &accumulate_variable_cl, &bzero_variable_cl); } } static void unregister_data(void) { starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); starpu_data_unpartition(b_handle, STARPU_MAIN_RAM); starpu_data_unpartition(x_handle, STARPU_MAIN_RAM); starpu_data_unpartition(r_handle, STARPU_MAIN_RAM); starpu_data_unpartition(d_handle, STARPU_MAIN_RAM); starpu_data_unpartition(q_handle, STARPU_MAIN_RAM); starpu_data_unregister(A_handle); starpu_data_unregister(b_handle); starpu_data_unregister(x_handle); starpu_data_unregister(r_handle); starpu_data_unregister(d_handle); starpu_data_unregister(q_handle); starpu_data_unregister(dtq_handle); starpu_data_unregister(rtr_handle); } /* * Data partitioning filters */ struct starpu_data_filter vector_filter; struct starpu_data_filter matrix_filter_1; struct starpu_data_filter matrix_filter_2; static void partition_data(void) { assert(n % nblocks == 0); /* * Partition the A matrix */ /* Partition into contiguous parts */ matrix_filter_1.filter_func = starpu_matrix_filter_block; matrix_filter_1.nchildren = nblocks; /* Partition into non-contiguous parts */ matrix_filter_2.filter_func = starpu_matrix_filter_vertical_block; matrix_filter_2.nchildren = nblocks; /* A is in FORTRAN ordering, starpu_data_get_sub_data(A_handle, 2, i, * j) designates the block in column i and row j. */ starpu_data_map_filters(A_handle, 2, &matrix_filter_1, &matrix_filter_2); /* * Partition the vectors */ vector_filter.filter_func = starpu_vector_filter_block; vector_filter.nchildren = nblocks; starpu_data_partition(b_handle, &vector_filter); starpu_data_partition(x_handle, &vector_filter); starpu_data_partition(r_handle, &vector_filter); starpu_data_partition(d_handle, &vector_filter); starpu_data_partition(q_handle, &vector_filter); } /* * Debug */ #if 0 static void display_vector(starpu_data_handle_t handle, TYPE *ptr) { unsigned block_size = n / nblocks; unsigned b, ind; for (b = 0; b < nblocks; b++) { starpu_data_acquire(starpu_data_get_sub_data(handle, 1, b), STARPU_R); for (ind = 0; ind < block_size; ind++) { FPRINTF(stderr, "%2.2e ", ptr[b*block_size + ind]); } FPRINTF(stderr, "| "); starpu_data_release(starpu_data_get_sub_data(handle, 1, b)); } FPRINTF(stderr, "\n"); } static void display_matrix(void) { unsigned i, j; for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { FPRINTF(stderr, "%2.2e ", A[j*n + i]); } FPRINTF(stderr, "\n"); } } #endif static void display_x_result(void) { unsigned j, i; starpu_data_handle_t sub; FPRINTF(stderr, "Computed X vector:\n"); unsigned block_size = n / nblocks; for (j = 0; j < nblocks; j++) { sub = starpu_data_get_sub_data(x_handle, 1, j); starpu_data_acquire(sub, STARPU_R); for (i = 0; i < block_size; i++) { FPRINTF(stderr, "% 02.2e\n", x[j*block_size + i]); } starpu_data_release(sub); } } static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0) { FPRINTF_SERVER(stderr, "usage: %s [-h] [-nblocks #blocks] [-display-result] [-n problem_size] [-no-reduction] [-maxiter i]\n", argv[0]); exit(-1); } } parse_common_args(argc, argv); } int main(int argc, char **argv) { int ret; double start, end; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return 77; parse_args(argc, argv); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() + starpu_opencl_worker_get_count() == 0) { starpu_shutdown(); return 77; } starpu_cublas_init(); FPRINTF(stderr, "************** PARAMETERS ***************\n"); FPRINTF(stderr, "Problem size (-n): %lld\n", n); FPRINTF(stderr, "Maximum number of iterations (-maxiter): %d\n", i_max); FPRINTF(stderr, "Number of blocks (-nblocks): %u\n", nblocks); FPRINTF(stderr, "Reduction (-no-reduction): %s\n", use_reduction ? "enabled" : "disabled"); start = starpu_timing_now(); generate_random_problem(); register_data(); partition_data(); end = starpu_timing_now(); FPRINTF(stderr, "Problem initialization timing : %2.2f seconds\n", (end-start)/1e6); ret = cg(); if (ret == -ENODEV) { ret = 77; goto enodev; } starpu_task_wait_for_all(); if (display_result) { display_x_result(); } enodev: unregister_data(); free_data(); starpu_cublas_shutdown(); starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/examples/cg/cg.h000066400000000000000000000027271507764646700167710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_EXAMPLE_CG_H__ #define __STARPU_EXAMPLE_CG_H__ #include #include #include #ifdef STARPU_USE_CUDA #include #endif #define DOUBLE #ifdef DOUBLE #define TYPE double #define GEMV STARPU_DGEMV #define DOT STARPU_DDOT #define AXPY STARPU_DAXPY #define SCAL STARPU_DSCAL #define cublasdot cublasDdot #define cublasscal cublasDscal #define cublasaxpy cublasDaxpy #define cublasgemv cublasDgemv #define cublasscal cublasDscal #else #define TYPE float #define GEMV STARPU_SGEMV #define DOT STARPU_SDOT #define AXPY STARPU_SAXPY #define SCAL STARPU_SSCAL #define cublasdot cublasSdot #define cublasscal cublasSscal #define cublasaxpy cublasSaxpy #define cublasgemv cublasSgemv #define cublasscal cublasSscal #endif #endif /* __STARPU_EXAMPLE_CG_H__ */ starpu-1.4.9+dfsg/examples/cg/cg_kernels.c000066400000000000000000000516401507764646700205050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Standard BLAS kernels used by CG */ #include "cg.h" #include #include #ifdef STARPU_USE_CUDA #include #include static const TYPE gp1 = 1.0; static const TYPE gm1 = -1.0; #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) static unsigned nblocks = 8; #ifdef STARPU_QUICK_CHECK static int i_max = 5; static int long long n = 2048; #elif !defined(STARPU_LONG_CHECK) static int long long n = 4096; static int i_max = 100; #else static int long long n = 4096; static int i_max = 1000; #endif static double eps = (10e-14); int use_reduction = 1; int display_result = 0; HANDLE_TYPE_MATRIX A_handle; HANDLE_TYPE_VECTOR b_handle; HANDLE_TYPE_VECTOR x_handle; HANDLE_TYPE_VECTOR r_handle; HANDLE_TYPE_VECTOR d_handle; HANDLE_TYPE_VECTOR q_handle; starpu_data_handle_t dtq_handle; starpu_data_handle_t rtr_handle; TYPE dtq, rtr; #if 0 static void print_vector_from_descr(unsigned nx, TYPE *v) { unsigned i; for (i = 0; i < nx; i++) { fprintf(stderr, "%2.2e ", v[i]); } fprintf(stderr, "\n"); } static void print_matrix_from_descr(unsigned nx, unsigned ny, unsigned ld, TYPE *mat) { unsigned i, j; for (j = 0; j < nx; j++) { for (i = 0; i < ny; i++) { fprintf(stderr, "%2.2e ", mat[j+i*ld]); } fprintf(stderr, "\n"); } } #endif static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) { (void)task; (void)nimpl; enum starpu_worker_archtype type = starpu_worker_get_type(workerid); if (type == STARPU_CPU_WORKER || type == STARPU_OPENCL_WORKER) return 1; #ifdef STARPU_USE_CUDA #ifdef STARPU_SIMGRID /* We don't know, let's assume it can */ return 1; #else /* Cuda device */ const struct cudaDeviceProp *props; props = starpu_cuda_get_device_properties(workerid); if (props->major >= 2 || props->minor >= 3) /* At least compute capability 1.3, supports doubles */ return 1; #endif #endif /* Old card, does not support doubles */ return 0; } /* * Reduction accumulation methods */ #ifdef STARPU_USE_CUDA static void accumulate_variable_cuda(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v_dst = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); TYPE *v_src = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); cublasStatus_t status = cublasaxpy(starpu_cublas_get_local_handle(), 1, &gp1, v_src, 1, v_dst, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif void accumulate_variable_cpu(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v_dst = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); TYPE *v_src = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); *v_dst = *v_dst + *v_src; } static struct starpu_perfmodel accumulate_variable_model = { .type = STARPU_HISTORY_BASED, .symbol = "accumulate_variable" }; struct starpu_codelet accumulate_variable_cl = { .can_execute = can_execute, .cpu_funcs = {accumulate_variable_cpu}, .cpu_funcs_name = {"accumulate_variable_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {accumulate_variable_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .nbuffers = 2, .model = &accumulate_variable_model, .name = "accumulate_variable" }; #ifdef STARPU_USE_CUDA static void accumulate_vector_cuda(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v_dst = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v_src = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); cublasStatus_t status = cublasaxpy(starpu_cublas_get_local_handle(), nx, &gp1, v_src, 1, v_dst, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif void accumulate_vector_cpu(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v_dst = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v_src = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); AXPY(nx, (TYPE)1.0, v_src, 1, v_dst, 1); } static struct starpu_perfmodel accumulate_vector_model = { .type = STARPU_HISTORY_BASED, .symbol = "accumulate_vector" }; struct starpu_codelet accumulate_vector_cl = { .can_execute = can_execute, .cpu_funcs = {accumulate_vector_cpu}, .cpu_funcs_name = {"accumulate_vector_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {accumulate_vector_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .nbuffers = 2, .model = &accumulate_vector_model, .name = "accumulate_vector" }; /* * Reduction initialization methods */ #ifdef STARPU_USE_CUDA extern void zero_vector(TYPE *x, unsigned nelems); static void bzero_variable_cuda(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); size_t size = STARPU_VARIABLE_GET_ELEMSIZE(descr[0]); cudaMemsetAsync(v, 0, size, starpu_cuda_get_local_stream()); } #endif void bzero_variable_cpu(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); *v = (TYPE)0.0; } static struct starpu_perfmodel bzero_variable_model = { .type = STARPU_HISTORY_BASED, .symbol = "bzero_variable" }; struct starpu_codelet bzero_variable_cl = { .can_execute = can_execute, .cpu_funcs = {bzero_variable_cpu}, .cpu_funcs_name = {"bzero_variable_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {bzero_variable_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .modes = {STARPU_W}, .nbuffers = 1, .model = &bzero_variable_model, .name = "bzero_variable" }; #ifdef STARPU_USE_CUDA static void bzero_vector_cuda(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); cudaMemsetAsync(v, 0, nx * elemsize, starpu_cuda_get_local_stream()); } #endif void bzero_vector_cpu(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *v = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); memset(v, 0, nx*sizeof(TYPE)); } static struct starpu_perfmodel bzero_vector_model = { .type = STARPU_HISTORY_BASED, .symbol = "bzero_vector" }; struct starpu_codelet bzero_vector_cl = { .can_execute = can_execute, .cpu_funcs = {bzero_vector_cpu}, .cpu_funcs_name = {"bzero_vector_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {bzero_vector_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .modes = {STARPU_W}, .nbuffers = 1, .model = &bzero_vector_model, .name = "bzero_vector" }; /* * DOT kernel : s = dot(v1, v2) */ #ifdef STARPU_USE_CUDA static void dot_kernel_cuda(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *dot = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); unsigned nx = STARPU_VECTOR_GET_NX(descr[1]); cublasHandle_t handle = starpu_cublas_get_local_handle(); cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); cublasStatus_t status = cublasdot(handle, nx, v1, 1, v2, 1, dot); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST); } #endif void dot_kernel_cpu(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *dot = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); unsigned nx = STARPU_VECTOR_GET_NX(descr[1]); TYPE local_dot; /* Note that we explicitly cast the result of the DOT kernel because * some BLAS library will return a double for sdot for instance. */ local_dot = (TYPE)DOT(nx, v1, 1, v2, 1); *dot = *dot + local_dot; } static struct starpu_perfmodel dot_kernel_model = { .type = STARPU_HISTORY_BASED, .symbol = "dot_kernel" }; static struct starpu_codelet dot_kernel_cl = { .can_execute = can_execute, .cpu_funcs = {dot_kernel_cpu}, .cpu_funcs_name = {"dot_kernel_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dot_kernel_cuda}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .model = &dot_kernel_model, .name = "dot_kernel" }; int dot_kernel(HANDLE_TYPE_VECTOR v1, HANDLE_TYPE_VECTOR v2, starpu_data_handle_t s, unsigned nb) { int ret; /* Blank the accumulation variable */ if (use_reduction) starpu_data_invalidate_submit(s); else { ret = TASK_INSERT(&bzero_variable_cl, STARPU_W, s, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); } unsigned block; for (block = 0; block < nb; block++) { ret = TASK_INSERT(&dot_kernel_cl, use_reduction?STARPU_REDUX:STARPU_RW, s, STARPU_R, GET_VECTOR_BLOCK(v1, block), STARPU_R, GET_VECTOR_BLOCK(v2, block), STARPU_TAG_ONLY, (starpu_tag_t) block, 0); STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); } return 0; } /* * SCAL kernel : v1 = p1 v1 */ #ifdef STARPU_USE_CUDA static void scal_kernel_cuda(void *descr[], void *cl_arg) { TYPE p1; starpu_codelet_unpack_args(cl_arg, &p1); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); /* v1 = p1 v1 */ TYPE alpha = p1; cublasStatus_t status = cublasscal(starpu_cublas_get_local_handle(), nx, &alpha, v1, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif void scal_kernel_cpu(void *descr[], void *cl_arg) { TYPE alpha; starpu_codelet_unpack_args(cl_arg, &alpha); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); /* v1 = alpha v1 */ SCAL(nx, alpha, v1, 1); } static struct starpu_perfmodel scal_kernel_model = { .type = STARPU_HISTORY_BASED, .symbol = "scal_kernel" }; static struct starpu_codelet scal_kernel_cl = { .can_execute = can_execute, .cpu_funcs = {scal_kernel_cpu}, .cpu_funcs_name = {"scal_kernel_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {scal_kernel_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 1, .model = &scal_kernel_model, .name = "scal_kernel" }; /* * GEMV kernel : v1 = p1 * v1 + p2 * M v2 */ #ifdef STARPU_USE_CUDA static void gemv_kernel_cuda(void *descr[], void *cl_arg) { TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); TYPE *M = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny = STARPU_MATRIX_GET_NY(descr[1]); TYPE alpha, beta; starpu_codelet_unpack_args(cl_arg, &beta, &alpha); /* Compute v1 = alpha M v2 + beta v1 */ cublasStatus_t status = cublasgemv(starpu_cublas_get_local_handle(), CUBLAS_OP_N, nx, ny, &alpha, M, ld, v2, 1, &beta, v1, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif void gemv_kernel_cpu(void *descr[], void *cl_arg) { TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[2]); TYPE *M = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny = STARPU_MATRIX_GET_NY(descr[1]); TYPE alpha, beta; starpu_codelet_unpack_args(cl_arg, &beta, &alpha); int worker_size = starpu_combined_worker_get_size(); if (worker_size > 1) { /* Parallel CPU task */ unsigned i = starpu_combined_worker_get_rank(); unsigned bs = (ny + worker_size - 1)/worker_size; unsigned new_nx = STARPU_MIN(nx, bs*(i+1)) - bs*i; nx = new_nx; v1 = &v1[bs*i]; M = &M[bs*i]; } /* Compute v1 = alpha M v2 + beta v1 */ GEMV("N", nx, ny, alpha, M, ld, v2, 1, beta, v1, 1); } static struct starpu_perfmodel gemv_kernel_model = { .type = STARPU_HISTORY_BASED, .symbol = "gemv_kernel" }; static struct starpu_codelet gemv_kernel_cl = { .can_execute = can_execute, .type = STARPU_SPMD, .max_parallelism = INT_MAX, .cpu_funcs = {gemv_kernel_cpu}, .cpu_funcs_name = {"gemv_kernel_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {gemv_kernel_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 3, .model = &gemv_kernel_model, .name = "gemv_kernel" }; int gemv_kernel(HANDLE_TYPE_VECTOR v1, HANDLE_TYPE_MATRIX matrix, HANDLE_TYPE_VECTOR v2, TYPE p1, TYPE p2, unsigned nb) { unsigned b1, b2; int ret; for (b2 = 0; b2 < nb; b2++) { ret = TASK_INSERT(&scal_kernel_cl, STARPU_RW, GET_VECTOR_BLOCK(v1, b2), STARPU_VALUE, &p1, sizeof(p1), STARPU_TAG_ONLY, (starpu_tag_t) b2, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); } for (b2 = 0; b2 < nb; b2++) { for (b1 = 0; b1 < nb; b1++) { TYPE one = 1.0; ret = TASK_INSERT(&gemv_kernel_cl, use_reduction?STARPU_REDUX:STARPU_RW, GET_VECTOR_BLOCK(v1, b2), STARPU_R, GET_MATRIX_BLOCK(matrix, b2, b1), STARPU_R, GET_VECTOR_BLOCK(v2, b1), STARPU_VALUE, &one, sizeof(one), STARPU_VALUE, &p2, sizeof(p2), STARPU_TAG_ONLY, ((starpu_tag_t)b2) * nb + b1, 0); STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); } } return 0; } /* * AXPY + SCAL kernel : v1 = p1 * v1 + p2 * v2 */ #ifdef STARPU_USE_CUDA static void scal_axpy_kernel_cuda(void *descr[], void *cl_arg) { TYPE p1, p2; starpu_codelet_unpack_args(cl_arg, &p1, &p2); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); /* Compute v1 = p1 * v1 + p2 * v2. * v1 = p1 v1 * v1 = v1 + p2 v2 */ cublasStatus_t status; status = cublasscal(starpu_cublas_get_local_handle(), nx, &p1, v1, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); status = cublasaxpy(starpu_cublas_get_local_handle(), nx, &p2, v2, 1, v1, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif void scal_axpy_kernel_cpu(void *descr[], void *cl_arg) { TYPE p1, p2; starpu_codelet_unpack_args(cl_arg, &p1, &p2); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); /* Compute v1 = p1 * v1 + p2 * v2. * v1 = p1 v1 * v1 = v1 + p2 v2 */ SCAL(nx, p1, v1, 1); AXPY(nx, p2, v2, 1, v1, 1); } static struct starpu_perfmodel scal_axpy_kernel_model = { .type = STARPU_HISTORY_BASED, .symbol = "scal_axpy_kernel" }; static struct starpu_codelet scal_axpy_kernel_cl = { .can_execute = can_execute, .cpu_funcs = {scal_axpy_kernel_cpu}, .cpu_funcs_name = {"scal_axpy_kernel_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {scal_axpy_kernel_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 2, .model = &scal_axpy_kernel_model, .name = "scal_axpy_kernel" }; int scal_axpy_kernel(HANDLE_TYPE_VECTOR v1, TYPE p1, HANDLE_TYPE_VECTOR v2, TYPE p2, unsigned nb) { unsigned block; for (block = 0; block < nb; block++) { int ret; ret = TASK_INSERT(&scal_axpy_kernel_cl, STARPU_RW, GET_VECTOR_BLOCK(v1, block), STARPU_R, GET_VECTOR_BLOCK(v2, block), STARPU_VALUE, &p1, sizeof(p1), STARPU_VALUE, &p2, sizeof(p2), STARPU_TAG_ONLY, (starpu_tag_t) block, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); } return 0; } /* * AXPY kernel : v1 = v1 + p1 * v2 */ #ifdef STARPU_USE_CUDA static void axpy_kernel_cuda(void *descr[], void *cl_arg) { TYPE p1; starpu_codelet_unpack_args(cl_arg, &p1); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); /* Compute v1 = v1 + p1 * v2. */ cublasStatus_t status = cublasaxpy(starpu_cublas_get_local_handle(), nx, &p1, v2, 1, v1, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif void axpy_kernel_cpu(void *descr[], void *cl_arg) { TYPE p1; starpu_codelet_unpack_args(cl_arg, &p1); TYPE *v1 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); TYPE *v2 = (TYPE *)STARPU_VECTOR_GET_PTR(descr[1]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); /* Compute v1 = p1 * v1 + p2 * v2. */ AXPY(nx, p1, v2, 1, v1, 1); } static struct starpu_perfmodel axpy_kernel_model = { .type = STARPU_HISTORY_BASED, .symbol = "axpy_kernel" }; static struct starpu_codelet axpy_kernel_cl = { .can_execute = can_execute, .cpu_funcs = {axpy_kernel_cpu}, .cpu_funcs_name = {"axpy_kernel_cpu"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {axpy_kernel_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 2, .model = &axpy_kernel_model, .name = "axpy_kernel" }; int axpy_kernel(HANDLE_TYPE_VECTOR v1, HANDLE_TYPE_VECTOR v2, TYPE p1, unsigned nb) { unsigned block; for (block = 0; block < nb; block++) { int ret; ret = TASK_INSERT(&axpy_kernel_cl, STARPU_RW, GET_VECTOR_BLOCK(v1, block), STARPU_R, GET_VECTOR_BLOCK(v2, block), STARPU_VALUE, &p1, sizeof(p1), STARPU_TAG_ONLY, (starpu_tag_t) block, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "TASK_INSERT"); } return 0; } /* * Main loop */ int cg(void) { TYPE delta_new, delta_0, error, delta_old, alpha, beta; double start, end, timing; int i = 0, ret; /* r <- b */ ret = copy_handle(r_handle, b_handle, nblocks); if (ret == -ENODEV) return ret; /* r <- r - A x */ ret = gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks); if (ret == -ENODEV) return ret; /* d <- r */ ret = copy_handle(d_handle, r_handle, nblocks); if (ret == -ENODEV) return ret; /* delta_new = dot(r,r) */ ret = dot_kernel(r_handle, r_handle, rtr_handle, nblocks); if (ret == -ENODEV) return ret; GET_DATA_HANDLE(rtr_handle); starpu_data_acquire(rtr_handle, STARPU_R); delta_new = rtr; delta_0 = delta_new; starpu_data_release(rtr_handle); FPRINTF_SERVER(stderr, "Delta limit: %e\n", (double) (eps*eps*delta_0)); FPRINTF_SERVER(stderr, "**************** INITIAL ****************\n"); FPRINTF_SERVER(stderr, "Delta 0: %e\n", delta_new); BARRIER(); start = starpu_timing_now(); while ((i < i_max) && ((double)delta_new > (double)(eps*eps*delta_0))) { starpu_iteration_push(i); /* q <- A d */ gemv_kernel(q_handle, A_handle, d_handle, 0.0, 1.0, nblocks); /* dtq <- dot(d,q) */ dot_kernel(d_handle, q_handle, dtq_handle, nblocks); /* alpha = delta_new / dtq */ GET_DATA_HANDLE(dtq_handle); starpu_data_acquire(dtq_handle, STARPU_R); alpha = delta_new / dtq; starpu_data_release(dtq_handle); /* x <- x + alpha d */ axpy_kernel(x_handle, d_handle, alpha, nblocks); if ((i % 50) == 0) { /* r <- b */ copy_handle(r_handle, b_handle, nblocks); /* r <- r - A x */ gemv_kernel(r_handle, A_handle, x_handle, 1.0, -1.0, nblocks); } else { /* r <- r - alpha q */ axpy_kernel(r_handle, q_handle, -alpha, nblocks); } /* delta_new = dot(r,r) */ dot_kernel(r_handle, r_handle, rtr_handle, nblocks); GET_DATA_HANDLE(rtr_handle); starpu_data_acquire(rtr_handle, STARPU_R); delta_old = delta_new; delta_new = rtr; beta = delta_new / delta_old; starpu_data_release(rtr_handle); /* d <- beta d + r */ scal_axpy_kernel(d_handle, beta, r_handle, 1.0, nblocks); if ((i % 10) == 0) { /* We here take the error as ||r||_2 / (n||b||_2) */ error = sqrt(delta_new/delta_0)/(1.0*n); FPRINTF_SERVER(stderr, "*****************************************\n"); FPRINTF_SERVER(stderr, "iter %d DELTA %e - %e\n", i, delta_new, error); } starpu_iteration_pop(); i++; } BARRIER(); end = starpu_timing_now(); timing = end - start; error = sqrt(delta_new/delta_0)/(1.0*n); FPRINTF_SERVER(stderr, "*****************************************\n"); FPRINTF_SERVER(stderr, "iter %d DELTA %e - %e\n", i, delta_new, error); FPRINTF_SERVER(stderr, "Total timing : %2.2f seconds\n", timing/1e6); FPRINTF_SERVER(stderr, "Seconds per iteration : %2.2e seconds\n", timing/1e6/i); FPRINTF_SERVER(stderr, "Number of iterations per second : %2.2e it/s\n", i/(timing/1e6)); return 0; } void parse_common_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-n") == 0) { n = (int long long)atoi(argv[++i]); continue; } if (strcmp(argv[i], "-display-result") == 0) { display_result = 1; continue; } if (strcmp(argv[i], "-maxiter") == 0) { i_max = atoi(argv[++i]); if (i_max <= 0) { FPRINTF_SERVER(stderr, "the number of iterations must be positive, not %d\n", i_max); exit(EXIT_FAILURE); } continue; } if (strcmp(argv[i], "-nblocks") == 0) { nblocks = atoi(argv[++i]); continue; } if (strcmp(argv[i], "-no-reduction") == 0) { use_reduction = 0; continue; } } } starpu-1.4.9+dfsg/examples/cholesky/000077500000000000000000000000001507764646700174475ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/cholesky/cholesky.h000066400000000000000000000250431507764646700214450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DW_CHOLESKY_H__ #define __DW_CHOLESKY_H__ #include #include #include #ifdef STARPU_USE_CUDA #include #include #endif #include #include #ifdef STARPU_HAVE_VALGRIND_H #include #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) #define NMAXBLOCKS 128 #define TAG_POTRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) #define TAG_TRSM(k,j) ((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ | ((unsigned long long)(i)<<16) \ | (unsigned long long)(j)))) #define TAG_POTRF_AUX(k, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | (1ULL<<56) | (unsigned long long)(k))) #define TAG_TRSM_AUX(k,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) \ | ((3ULL<<56) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_GEMM_AUX(k,i,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) \ | ((4ULL<<56) | ((unsigned long long)(k)<<32) \ | ((unsigned long long)(i)<<16) \ | (unsigned long long)(j)))) #define BLOCKSIZE (size_p/nblocks_p) #define BLAS3_FLOP(n1,n2,n3) \ (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) /* This is from magma -- Innovative Computing Laboratory -- Electrical Engineering and Computer Science Department -- University of Tennessee -- (C) Copyright 2009 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Tennessee, Knoxville nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) #define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)) * (double)(__n) - (1. / 6.))) #define FLOPS_SPOTRF(__n) (FMULS_POTRF((__n)) + FADDS_POTRF((__n))) #define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) #define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) #define FMULS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m))) #define FADDS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m))) #define FMULS_TRSM FMULS_TRMM #define FADDS_TRSM FMULS_TRMM #define FLOPS_STRSM(__m, __n) (FMULS_TRSM((__m), (__n)) + FADDS_TRSM((__m), (__n))) #define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) #define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) #define FLOPS_SSYRK(__k, __n) (FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n))) #define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) #define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) #define FLOPS_SGEMM(__m, __n, __k) (FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k))) /* End of magma code */ static unsigned size_p; static unsigned nblocks_p; static unsigned nbigblocks_p; static inline void init_sizes(void) { int power = starpu_cpu_worker_get_count() + 32 * starpu_cuda_worker_get_count(); int power_cbrt = cbrt(power); #ifndef STARPU_LONG_CHECK power_cbrt /= 2; #endif if (power_cbrt < 1) power_cbrt = 1; #ifdef STARPU_QUICK_CHECK if (!size_p) size_p = 320*2*power_cbrt; if (!nblocks_p) nblocks_p = 2*power_cbrt; if (!nbigblocks_p) nbigblocks_p = power_cbrt; #else if (!size_p) size_p = 960*8*power_cbrt; if (!nblocks_p) nblocks_p = 8*power_cbrt; if (!nbigblocks_p) nbigblocks_p = 4*power_cbrt; #endif } static unsigned pinned_p = 1; static unsigned noprio_p = 0; static unsigned check_p = 0; static unsigned bound_p = 0; static unsigned bound_deps_p = 0; static unsigned bound_lp_p = 0; static unsigned bound_mps_p = 0; static unsigned with_ctxs_p = 0; static unsigned with_noctxs_p = 0; static unsigned chole1_p = 0; static unsigned chole2_p = 0; extern struct starpu_perfmodel chol_model_potrf; extern struct starpu_perfmodel chol_model_trsm; extern struct starpu_perfmodel chol_model_syrk; extern struct starpu_perfmodel chol_model_gemm; extern struct starpu_codelet cl_potrf; extern struct starpu_codelet cl_trsm; extern struct starpu_codelet cl_syrk; extern struct starpu_codelet cl_gemm; extern struct starpu_codelet cl_potrf_gpu; extern struct starpu_codelet cl_trsm_gpu; extern struct starpu_codelet cl_syrk_gpu; extern struct starpu_codelet cl_gemm_gpu; extern struct starpu_codelet cl_potrf_cpu; extern struct starpu_codelet cl_trsm_cpu; extern struct starpu_codelet cl_syrk_cpu; extern struct starpu_codelet cl_gemm_cpu; extern starpu_data_handle_t scratch; void cholesky_kernel_init(int nb); void cholesky_kernel_fini(void); void chol_cpu_codelet_update_potrf(void **, void *); void chol_cpu_codelet_update_trsm(void **, void *); void chol_cpu_codelet_update_syrk(void **, void *); void chol_cpu_codelet_update_gemm(void **, void *); double cpu_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cpu_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cpu_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cpu_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_potrf(void *descr[], void *_args); void chol_cublas_codelet_update_trsm(void *descr[], void *_args); void chol_cublas_codelet_update_syrk(void *descr[], void *_args); void chol_cublas_codelet_update_gemm(void *descr[], void *_args); double cuda_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cuda_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cuda_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cuda_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); #endif void initialize_chol_model(struct starpu_perfmodel* model, char* symbol, double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)); static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-with_ctxs") == 0) { with_ctxs_p = 1; break; } else if (strcmp(argv[i], "-with_noctxs") == 0) { with_noctxs_p = 1; break; } else if (strcmp(argv[i], "-chole1") == 0) { chole1_p = 1; break; } else if (strcmp(argv[i], "-chole2") == 0) { chole2_p = 1; break; } else if (strcmp(argv[i], "-size") == 0) { char *argptr; size_p = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nblocks_p = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-nbigblocks") == 0) { char *argptr; nbigblocks_p = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-no-pin") == 0) { pinned_p = 0; } else if (strcmp(argv[i], "-no-prio") == 0) { noprio_p = 1; } else if (strcmp(argv[i], "-commute") == 0) { cl_syrk.modes[1] |= STARPU_COMMUTE; cl_gemm.modes[2] |= STARPU_COMMUTE; } else if (strcmp(argv[i], "-bound") == 0) { bound_p = 1; } else if (strcmp(argv[i], "-bound-lp") == 0) { bound_lp_p = 1; } else if (strcmp(argv[i], "-bound-mps") == 0) { bound_mps_p = 1; } else if (strcmp(argv[i], "-bound-deps") == 0) { bound_deps_p = 1; } else if (strcmp(argv[i], "-check") == 0) { check_p = 1; } else /* if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i],"--help") == 0) */ { fprintf(stderr,"usage : %s [-size size] [-nblocks nblocks] [-no-pin] [-no-prio] [-bound] [-bound-deps] [-bound-lp] [-check]\n", argv[0]); fprintf(stderr,"Currently selected: %ux%u and %ux%u blocks\n", size_p, size_p, nblocks_p, nblocks_p); exit(0); } } #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) size_p = 16; #endif } #endif /* __DW_CHOLESKY_H__ */ starpu-1.4.9+dfsg/examples/cholesky/cholesky.sh000077500000000000000000000045051507764646700216330ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ROOT=${0%.sh} [ -z "$STARPU_SCHED" ] || STARPU_SCHEDS="$STARPU_SCHED" #[ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS=`$(dirname $0)/../../tools/starpu_sched_display` [ -n "$STARPU_SCHEDS" ] || STARPU_SCHEDS="dmdas modular-dmdas modular-heft2 modular-heft modular-heft-prio modular-heteroprio dmdap dmdar dmda dmdasd modular-dmdap modular-dmdar modular-dmda prio lws" [ -n "$STARPU_HOSTNAME" ] || export STARPU_HOSTNAME=mirage unset MALLOC_PERTURB_ INCR=2 STOP=32 if [ -n "$STARPU_SIMGRID" ] then INCR=4 STOP=14 # These use the thread factory, and are thus much longer if [ -n "$STARPU_QUICK_CHECK" ] then INCR=8 STOP=10 fi if [ -n "$STARPU_LONG_CHECK" ] then INCR=4 STOP=32 fi fi ( echo -n "#" for STARPU_SCHED in $STARPU_SCHEDS ; do echo -n " $STARPU_SCHED" done echo $MS_LAUNCHER $STARPU_LAUNCH ${ROOT}_implicit -size $((10 * 960)) -nblocks 10 -check [ $? = 0 ] || exit 1 for size in `seq 2 $INCR $STOP` ; do echo -n "$((size * 960))" for STARPU_SCHED in $STARPU_SCHEDS do export STARPU_SCHED GFLOPS=`$MS_LAUNCHER $STARPU_LAUNCH ${ROOT}_implicit -size $((size * 960)) -nblocks $size 2> /dev/null | grep -v GFlop/s | cut -d ' ' -f 3` [ -n "$GFLOPS" ] || GFLOPS='""' echo -n " $GFLOPS" done echo done ) | tee cholesky.output [ -n "$TERMINAL" ] || TERMINAL=eps [ -n "$OUTFILE" ] || OUTFILE=cholesky.eps cat > cholesky.gp << EOF set terminal $TERMINAL set output "$OUTFILE" set key top left set xlabel "size" set ylabel "GFlop/s" plot \\ EOF N=2 COMMA="" for STARPU_SCHED in $STARPU_SCHEDS do echo "$COMMA'cholesky.output' using 1:$N with lines title '$STARPU_SCHED' \\" >> cholesky.gp N=$(($N + 1)) COMMA=", " done gnuplot cholesky.gp #gv $OUTFILE true starpu-1.4.9+dfsg/examples/cholesky/cholesky_compil.c000066400000000000000000000305641507764646700230070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This version of the Cholesky factorization can include an * externally-compiler-generated loop nest, which allows to play with * compiler-side optimizations. */ /* Note: this is using fortran ordering, i.e. column-major ordering, i.e. * elements with consecutive row number are consecutive in memory */ #include "cholesky.h" #include "../sched_ctx_utils/sched_ctx_utils.h" #include #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) #include "magma.h" #endif #include "starpu_cusolver.h" /* * code to bootstrap the factorization * and construct the DAG */ static void callback_turn_spmd_on(void *arg) { (void)arg; cl_gemm.type = STARPU_SPMD; } static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) { double start; double end; unsigned long nelems = starpu_matrix_get_nx(dataA); unsigned long nn = nelems/nblocks; unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; if (bound_p || bound_lp_p || bound_mps_p) starpu_bound_start(bound_deps_p, 0); starpu_fxt_start_profiling(); start = starpu_timing_now(); #define min(x,y) (x m) { mat[m+n*size] = 0.0f; /* debug */ } } } float *test_mat = malloc((size_t)size*size*sizeof(float)); STARPU_ASSERT(test_mat); STARPU_SSYRK("L", "N", size, size, 1.0f, mat, size, 0.0f, test_mat, size); FPRINTF(stderr, "comparing results ...\n"); #ifdef PRINT_OUTPUT for (m = 0; m < size; m++) { for (n = 0; n < size; n++) { if (n <= m) { FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size]); } else { FPRINTF(stdout, ".\t"); } } FPRINTF(stdout, "\n"); } #endif for (m = 0; m < size; m++) { for (n = 0; n < size; n++) { if (n <= m) { float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size:0.0f); float err = fabsf(test_mat[m +n*size] - orig) / orig; if (err > 0.0001) { FPRINTF(stderr, "Error[%llu, %llu] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size], orig, err); assert(0); } } } } free(test_mat); } starpu_free_flags(mat, (size_t)size*size*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED|STARPU_MALLOC_SIMULATION_UNIQUE); #endif } int main(int argc, char **argv) { #ifdef STARPU_HAVE_MAGMA magma_init(); #endif int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); //starpu_fxt_stop_profiling(); init_sizes(); parse_args(argc, argv); if(with_ctxs_p || with_noctxs_p || chole1_p || chole2_p) parse_args_ctx(argc, argv); #ifdef STARPU_USE_CUDA initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,cuda_chol_task_potrf_cost); initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,cuda_chol_task_trsm_cost); initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,cuda_chol_task_syrk_cost); initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,cuda_chol_task_gemm_cost); #else initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,NULL); initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,NULL); initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,NULL); initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,NULL); #endif starpu_cublas_init(); starpu_cusolver_init(); if(with_ctxs_p) { construct_contexts(); start_2benchs(execute_cholesky); } else if(with_noctxs_p) start_2benchs(execute_cholesky); else if(chole1_p) start_1stbench(execute_cholesky); else if(chole2_p) start_2ndbench(execute_cholesky); else execute_cholesky(size_p, nblocks_p); starpu_cusolver_shutdown(); starpu_cublas_shutdown(); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/cholesky/cholesky_compiled.c000066400000000000000000000023051507764646700233100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This is the base code, just like can be read in Chameleon */ /* A source-to-source compiler can very easily produce this kind of code, with rewritten loops etc */ unsigned k, m, n; for (k = 0; k < nblocks; k++) { POTRF(A(k,k), (2*nblocks - 2*k)); for (m = k+1; m < nblocks; m++) TRSM(A(k,k), A(m,k), (2*nblocks - 2*k - m)); for (n = k+1; n < nblocks; n++) { SYRK(A(n,k), A(n, n), (2*nblocks - 2*k - n)); for (m = n+1; m < nblocks; m++) GEMM(A(m,k), A(n,k), A(m,n), (2*nblocks - 2*k - n - m)); } } starpu-1.4.9+dfsg/examples/cholesky/cholesky_grain_tag.c000066400000000000000000000311411507764646700234470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This version of the Cholesky factorization uses explicit dependency * declaration through dependency tags. * It also uses data partitioning to split the matrix into submatrices. * It also changes the partitioning during execution: when called first, * cholesky_grain_rec splits the matrix with a big granularity (nblocks) and * processes nbigblocks blocks, before calling itself again, to process the * remainder of the matrix with a smaller granularity. */ /* Note: this is using fortran ordering, i.e. column-major ordering, i.e. * elements with consecutive row number are consecutive in memory */ #include "cholesky.h" #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) #include "magma.h" #endif #include "starpu_cusolver.h" /* * Some useful functions */ static struct starpu_task *create_task(starpu_tag_t id) { struct starpu_task *task = starpu_task_create(); task->cl_arg = NULL; task->use_tag = 1; task->tag_id = id; return task; } /* * Create the codelets */ static struct starpu_task * create_task_potrf(starpu_data_handle_t dataA, unsigned k, unsigned reclevel) { /* FPRINTF(stdout, "task potrf k = %d TAG = %llx\n", k, (TAG_POTRF(k))); */ struct starpu_task *task = create_task(TAG_POTRF_AUX(k, reclevel)); task->cl = &cl_potrf; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) /* Temporary data to save libcusolver from allocating/deallocating memory */ task->handles[1] = scratch; #endif /* this is an important task */ if (!noprio_p) task->priority = STARPU_MAX_PRIO; /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_POTRF_AUX(k, reclevel), 1, TAG_GEMM_AUX(k-1, k, k, reclevel)); } int n = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_SPOTRF(n); return task; } static int create_task_trsm(starpu_data_handle_t dataA, unsigned k, unsigned m, unsigned reclevel) { int ret; struct starpu_task *task = create_task(TAG_TRSM_AUX(k, m, reclevel)); task->cl = &cl_trsm; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, m, k); if (!noprio_p && (m == k+1)) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_AUX(k, m, reclevel), 2, TAG_POTRF_AUX(k, reclevel), TAG_GEMM_AUX(k-1, m, k, reclevel)); } else { starpu_tag_declare_deps(TAG_TRSM_AUX(k, m, reclevel), 1, TAG_POTRF_AUX(k, reclevel)); } int nx = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_STRSM(nx, nx); ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned m, unsigned n, unsigned reclevel) { int ret; /* FPRINTF(stdout, "task gemm k,n,m = %d,%d,%d TAG = %llx\nx", k,m,n, TAG_GEMM_AUX(k,m,n)); */ struct starpu_task *task = create_task(TAG_GEMM_AUX(k, m, n, reclevel)); if (m == n) { task->cl = &cl_syrk; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, n, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, n, n); int nx = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_SSYRK(nx, nx); } else { task->cl = &cl_gemm; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, n, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, m, k); task->handles[2] = starpu_data_get_sub_data(dataA, 2, m, n); int nx = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_SGEMM(nx, nx, nx); } if ((n == k + 1) && (m == k +1)) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GEMM_AUX(k, m, n, reclevel), 3, TAG_GEMM_AUX(k-1, m, n, reclevel), TAG_TRSM_AUX(k, n, reclevel), TAG_TRSM_AUX(k, m, reclevel)); } else { starpu_tag_declare_deps(TAG_GEMM_AUX(k, m, n, reclevel), 2, TAG_TRSM_AUX(k, n, reclevel), TAG_TRSM_AUX(k, m, reclevel)); } ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } /* * code to bootstrap the factorization * and construct the DAG */ static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned reclevel) { int ret; /* create a new codelet */ struct starpu_task *entry_task = NULL; /* create all the DAG nodes */ unsigned k, m, n; starpu_data_handle_t dataA; /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float)); starpu_data_set_sequential_consistency_flag(dataA, 0); /* Split into blocks of complete rows first */ struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_block, .nchildren = nblocks }; /* Then split rows into tiles */ struct starpu_data_filter f2 = { /* Note: here "vertical" is for row-major, we are here using column-major. */ .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nblocks }; starpu_data_map_filters(dataA, 2, &f, &f2); cholesky_kernel_init(size / nblocks); for (k = 0; k < nbigblocks; k++) { starpu_iteration_push(k); struct starpu_task *task = create_task_potrf(dataA, k, reclevel); /* we defer the launch of the first task */ if (k == 0) { entry_task = task; } else { ret = starpu_task_submit(task); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (m = k+1; m m) { mat[m+n*size_p] = 0.0f; /* debug */ } } } float *test_mat = malloc((size_t)size_p*size_p*sizeof(float)); STARPU_ASSERT(test_mat); STARPU_SSYRK("L", "N", size_p, size_p, 1.0f, mat, size_p, 0.0f, test_mat, size_p); FPRINTF(stderr, "comparing results ...\n"); #ifdef PRINT_OUTPUT for (m = 0; m < size_p; m++) { for (n = 0; n < size_p; n++) { if (n <= m) { FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size_p]); } else { FPRINTF(stdout, ".\t"); } } FPRINTF(stdout, "\n"); } #endif for (m = 0; m < size_p; m++) { for (n = 0; n < size_p; n++) { if (n <= m) { float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size_p:0.0f); float err = fabsf(test_mat[m +n*size_p] - orig) / orig; if (err > 0.0001) { FPRINTF(stderr, "Error[%llu, %llu] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size_p], orig, err); assert(0); } } } } free(test_mat); } #endif shutdown_system(&mat, size_p, pinned_p); return ret; } starpu-1.4.9+dfsg/examples/cholesky/cholesky_implicit.c000066400000000000000000000257331507764646700233400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This version of the Cholesky factorization uses implicit dependency computation. * The whole algorithm thus appears clearly in the task submission loop in _cholesky(). */ /* Note: this is using fortran ordering, i.e. column-major ordering, i.e. * elements with consecutive row number are consecutive in memory */ #include "cholesky.h" #include "../sched_ctx_utils/sched_ctx_utils.h" #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) #include "magma.h" #endif #include "starpu_cusolver.h" /* * code to bootstrap the factorization * and construct the DAG */ static void callback_turn_spmd_on(void *arg) { (void)arg; cl_gemm.type = STARPU_SPMD; } static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) { double start; double end; unsigned k,m,n; unsigned long nx = starpu_matrix_get_nx(dataA); unsigned long nn = nx/nblocks; unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; if (bound_p || bound_lp_p || bound_mps_p) starpu_bound_start(bound_deps_p, 0); starpu_fxt_start_profiling(); start = starpu_timing_now(); /* create all the DAG nodes */ for (k = 0; k < nblocks; k++) { int ret; starpu_iteration_push(k); starpu_data_handle_t sdatakk = starpu_data_get_sub_data(dataA, 2, k, k); ret = starpu_task_insert(&cl_potrf, STARPU_PRIORITY, noprio_p ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO, STARPU_RW, sdatakk, #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) STARPU_SCRATCH, scratch, #endif STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL, STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), STARPU_NAME, "POTRF", STARPU_TAG_ONLY, TAG_POTRF(k), 0); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); for (m = k+1; m m) { mat[m+n*size] = 0.0f; /* debug */ } } } float *test_mat = malloc((size_t)size*size*sizeof(float)); STARPU_ASSERT(test_mat); STARPU_SSYRK("L", "N", size, size, 1.0f, mat, size, 0.0f, test_mat, size); FPRINTF(stderr, "comparing results ...\n"); #ifdef PRINT_OUTPUT for (m = 0; m < size; m++) { for (n = 0; n < size; n++) { if (n <= m) { FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size]); } else { FPRINTF(stdout, ".\t"); } } FPRINTF(stdout, "\n"); } #endif for (m = 0; m < size; m++) { for (n = 0; n < size; n++) { if (n <= m) { float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size:0.0f); float err = fabsf(test_mat[m +n*size] - orig) / orig; if (err > 0.0001) { FPRINTF(stderr, "Error[%llu, %llu] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size], orig, err); assert(0); } } } } free(test_mat); } starpu_free_flags(mat, (size_t)size*size*sizeof(float), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED|STARPU_MALLOC_SIMULATION_UNIQUE); #endif } int main(int argc, char **argv) { #ifdef STARPU_HAVE_MAGMA magma_init(); #endif int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); //starpu_fxt_stop_profiling(); init_sizes(); parse_args(argc, argv); if(with_ctxs_p || with_noctxs_p || chole1_p || chole2_p) parse_args_ctx(argc, argv); #ifdef STARPU_USE_CUDA initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,cuda_chol_task_potrf_cost); initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,cuda_chol_task_trsm_cost); initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,cuda_chol_task_syrk_cost); initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,cuda_chol_task_gemm_cost); #else initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,NULL); initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,NULL); initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,NULL); initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,NULL); #endif starpu_cublas_init(); starpu_cusolver_init(); if(with_ctxs_p) { construct_contexts(); start_2benchs(execute_cholesky); } else if(with_noctxs_p) start_2benchs(execute_cholesky); else if(chole1_p) start_1stbench(execute_cholesky); else if(chole2_p) start_2ndbench(execute_cholesky); else execute_cholesky(size_p, nblocks_p); starpu_cusolver_shutdown(); starpu_cublas_shutdown(); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/cholesky/cholesky_julia.sh000077500000000000000000000014301507764646700230110ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ROOT=${0%cholesky_julia.sh} STARPU_SCHED_LIB=$ROOT/.libs/libmy_dmda.so STARPU_SCHED=mydm $ROOT/cholesky_tag starpu-1.4.9+dfsg/examples/cholesky/cholesky_kernels.c000066400000000000000000000336341507764646700231700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Standard kernels for the Cholesky factorization */ #include #include "cholesky.h" #include "../common/blas.h" #if defined(STARPU_USE_CUDA) #include #include "starpu_cusolver.h" #if defined(STARPU_HAVE_MAGMA) #include "magma.h" #include "magma_lapack.h" #endif #endif /* * GEMM */ #if defined(STARPU_USE_CUDA) static const float p1 = 1.0; static const float m1 = -1.0; #endif starpu_data_handle_t scratch = NULL; static inline void chol_common_cpu_codelet_update_gemm(void *descr[], int s, void *_args) { (void)_args; /* printf("gemm\n"); */ float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NY(descr[2]); unsigned dy = STARPU_MATRIX_GET_NX(descr[2]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); switch (s) { case 0: { /* CPU kernel */ int worker_size = starpu_combined_worker_get_size(); if (worker_size == 1) { /* Sequential CPU kernel */ STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, right, ld12, 1.0f, center, ld22); } else { /* Parallel CPU kernel */ unsigned rank = starpu_combined_worker_get_rank(); unsigned block_size = (dx + worker_size - 1)/worker_size; unsigned new_dx = STARPU_MIN(dx, block_size*(rank+1)) - block_size*rank; float *new_left = &left[block_size*rank]; float *new_center = ¢er[block_size*rank]; STARPU_SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21, right, ld12, 1.0f, new_center, ld22); } break; } #ifdef STARPU_USE_CUDA case 1: { /* CUDA kernel */ cublasStatus_t status = cublasSgemm(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz, &m1, left, ld21, right, ld12, &p1, center, ld22); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_gemm(void *descr[], void *_args) { chol_common_cpu_codelet_update_gemm(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_gemm(void *descr[], void *_args) { chol_common_cpu_codelet_update_gemm(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ /* * SYRK */ static inline void chol_common_cpu_codelet_update_syrk(void *descr[], int s, void *_args) { (void)_args; /* printf("syrk\n"); */ float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned dx = STARPU_MATRIX_GET_NY(descr[1]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[1]); switch (s) { case 0: { /* CPU kernel */ STARPU_SSYRK("L", "N", dx, dz, -1.0f, left, ld21, 1.0f, center, ld22); break; } #ifdef STARPU_USE_CUDA case 1: { /* CUDA kernel */ cublasStatus_t status = cublasSsyrk(starpu_cublas_get_local_handle(), CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, dx, dz, &m1, left, ld21, &p1, center, ld22); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_syrk(void *descr[], void *_args) { chol_common_cpu_codelet_update_syrk(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_syrk(void *descr[], void *_args) { chol_common_cpu_codelet_update_syrk(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ /* * TRSM */ static inline void chol_common_codelet_update_trsm(void *descr[], int s, void *_args) { (void)_args; /* printf("trsm\n"); */ float *sub11; float *sub21; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]); unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif switch (s) { case 0: STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21); break; #ifdef STARPU_USE_CUDA case 1: status = cublasStrsm(starpu_cublas_get_local_handle(), CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, nx21, ny21, &p1, sub11, ld11, sub21, ld21); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_trsm(void *descr[], void *_args) { chol_common_codelet_update_trsm(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_trsm(void *descr[], void *_args) { chol_common_codelet_update_trsm(descr, 1, _args); } #endif /* * POTRF */ static inline void chol_common_codelet_update_potrf(void *descr[], int s, void *_args) { (void)_args; /* printf("potrf\n"); */ float *sub11; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned nx = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); unsigned z; switch (s) { case 0: #ifdef STARPU_MKL STARPU_SPOTRF("L", nx, sub11, ld); #else /* * - alpha 11 <- lambda 11 = sqrt(alpha11) * - alpha 21 <- l 21 = alpha 21 / lambda 11 * - A22 <- A22 - l21 trans(l21) */ for (z = 0; z < nx; z++) { float lambda11; lambda11 = sqrt(sub11[z+z*ld]); sub11[z+z*ld] = lambda11; STARPU_ASSERT(lambda11 != 0.0f); STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1); STARPU_SSYR("L", nx - z - 1, -1.0f, &sub11[(z+1)+z*ld], 1, &sub11[(z+1)+(z+1)*ld], ld); } #endif break; #ifdef STARPU_USE_CUDA case 1: #ifdef STARPU_HAVE_LIBCUSOLVER { cusolverStatus_t sstatus; float *workspace = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); int Lwork = STARPU_VARIABLE_GET_ELEMSIZE(descr[1]) / sizeof(float); sstatus = cusolverDnSpotrf(starpu_cusolverDn_get_local_handle(), CUBLAS_FILL_MODE_LOWER, nx, sub11, ld, workspace, Lwork, NULL); if (sstatus != CUSOLVER_STATUS_SUCCESS) STARPU_CUSOLVER_REPORT_ERROR(sstatus); } #elif defined(STARPU_HAVE_MAGMA) { int ret; int info; #if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) cudaStream_t stream = starpu_cuda_get_local_stream(); cublasSetKernelStream(stream); magmablasSetKernelStream(stream); #else starpu_cublas_set_stream(); #endif ret = magma_spotrf_gpu(MagmaLower, nx, sub11, ld, &info); if (ret != MAGMA_SUCCESS) { fprintf(stderr, "Error in Magma: %d\n", ret); STARPU_ABORT(); } #if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) cudaError_t cures = cudaStreamSynchronize(stream); #else cudaError_t cures = cudaDeviceSynchronize(); #endif STARPU_ASSERT(!cures); } #else { float *lambda11; cublasStatus_t status; cudaStream_t stream = starpu_cuda_get_local_stream(); cublasHandle_t handle = starpu_cublas_get_local_handle(); cudaHostAlloc((void **)&lambda11, sizeof(float), 0); for (z = 0; z < nx; z++) { cudaMemcpyAsync(lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); STARPU_ASSERT(*lambda11 != 0.0f); *lambda11 = sqrt(*lambda11); /* cublasSetVector(1, sizeof(float), lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float)); */ cudaMemcpyAsync(&sub11[z+z*ld], lambda11, sizeof(float), cudaMemcpyHostToDevice, stream); float scal = 1.0f/(*lambda11); status = cublasSscal(handle, nx - z - 1, &scal, &sub11[(z+1)+z*ld], 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); status = cublasSsyr(handle, CUBLAS_FILL_MODE_UPPER, nx - z - 1, &m1, &sub11[(z+1)+z*ld], 1, &sub11[(z+1)+(z+1)*ld], ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } cudaStreamSynchronize(stream); cudaFreeHost(lambda11); } #endif break; #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_potrf(void *descr[], void *_args) { chol_common_codelet_update_potrf(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_potrf(void *descr[], void *_args) { chol_common_codelet_update_potrf(descr, 1, _args); } #endif/* STARPU_USE_CUDA */ struct starpu_perfmodel chol_model_potrf; struct starpu_perfmodel chol_model_trsm; struct starpu_perfmodel chol_model_syrk; struct starpu_perfmodel chol_model_gemm; struct starpu_codelet cl_potrf = { .type = STARPU_SEQ, .cpu_funcs = {chol_cpu_codelet_update_potrf}, .cpu_funcs_name = {"chol_cpu_codelet_update_potrf"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_potrf}, # if defined(STARPU_HAVE_LIBCUSOLVER) .cuda_flags = {STARPU_CUDA_ASYNC}, # endif #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) .nbuffers = 2, #else .nbuffers = 1, #endif .modes = { STARPU_RW #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) , STARPU_SCRATCH | STARPU_NOFOOTPRINT #endif }, .model = &chol_model_potrf, .color = 0xffff00, }; struct starpu_codelet cl_trsm = { .type = STARPU_SEQ, .cpu_funcs = {chol_cpu_codelet_update_trsm}, .cpu_funcs_name = {"chol_cpu_codelet_update_trsm"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_trsm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = { STARPU_R, STARPU_RW }, .model = &chol_model_trsm, .color = 0x8080ff, }; struct starpu_codelet cl_syrk = { .type = STARPU_SEQ, .max_parallelism = INT_MAX, .cpu_funcs = {chol_cpu_codelet_update_syrk}, .cpu_funcs_name = {"chol_cpu_codelet_update_syrk"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_syrk}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = { STARPU_R, STARPU_RW }, .model = &chol_model_syrk, .color = 0x00ff00, }; struct starpu_codelet cl_gemm = { .type = STARPU_SEQ, .max_parallelism = INT_MAX, .cpu_funcs = {chol_cpu_codelet_update_gemm}, .cpu_funcs_name = {"chol_cpu_codelet_update_gemm"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_gemm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .modes = { STARPU_R, STARPU_R, STARPU_RW }, .model = &chol_model_gemm, .color = 0x00c000, }; struct starpu_codelet cl_potrf_gpu = { #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_potrf}, # if defined(STARPU_HAVE_LIBCUSOLVER) .cuda_flags = {STARPU_CUDA_ASYNC}, # endif #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) .nbuffers = 2, #else .nbuffers = 1, #endif .modes = { STARPU_RW #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) , STARPU_SCRATCH | STARPU_NOFOOTPRINT #endif }, .model = &chol_model_potrf, .color = 0xffff00, }; struct starpu_codelet cl_trsm_gpu = { #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_trsm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = { STARPU_R, STARPU_RW }, .model = &chol_model_trsm, .color = 0x8080ff, }; struct starpu_codelet cl_gemm_gpu = { #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_gemm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .modes = { STARPU_R, STARPU_R, STARPU_RW }, .model = &chol_model_gemm, .color = 0x00ff00, }; struct starpu_codelet cl_potrf_cpu = { .type = STARPU_SEQ, .cpu_funcs = {chol_cpu_codelet_update_potrf}, .cpu_funcs_name = {"chol_cpu_codelet_update_potrf"}, #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) .nbuffers = 2, #else .nbuffers = 1, #endif .modes = { STARPU_RW #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) , STARPU_SCRATCH | STARPU_NOFOOTPRINT #endif }, .model = &chol_model_potrf, .color = 0xffff00, }; struct starpu_codelet cl_trsm_cpu = { .type = STARPU_SEQ, .cpu_funcs = {chol_cpu_codelet_update_trsm}, .cpu_funcs_name = {"chol_cpu_codelet_update_trsm"}, .nbuffers = 2, .modes = { STARPU_R, STARPU_RW }, .model = &chol_model_trsm, .color = 0x8080ff, }; struct starpu_codelet cl_gemm_cpu = { .type = STARPU_SEQ, .max_parallelism = INT_MAX, .cpu_funcs = {chol_cpu_codelet_update_gemm}, .cpu_funcs_name = {"chol_cpu_codelet_update_gemm"}, .nbuffers = 3, .modes = { STARPU_R, STARPU_R, STARPU_RW }, .model = &chol_model_gemm, .color = 0x00ff00, }; void cholesky_kernel_init(int nb) { #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) int Lwork = 0; if (starpu_cuda_worker_get_count()) cusolverDnSpotrf_bufferSize(starpu_cusolverDn_get_local_handle(), CUBLAS_FILL_MODE_LOWER, nb, NULL, nb, &Lwork); starpu_variable_data_register(&scratch, -1, 0, Lwork * sizeof(float)); #endif } void cholesky_kernel_fini(void) { #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) starpu_data_unregister(scratch); #endif } starpu-1.4.9+dfsg/examples/cholesky/cholesky_models.c000066400000000000000000000122111507764646700227740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011 Télécom-SudParis * Copyright (C) 2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Example of a cost model for BLAS operations. This is really just an * example! */ /* * As a convention, in that file, buffers[0] is represented by A, * buffers[1] is B ... */ /* * Number of flops of Gemm */ #include #include #include "cholesky.h" /* #define USE_PERTURBATION 1 */ #ifdef USE_PERTURBATION #define PERTURB(a) ((starpu_drand48()*2.0f*(AMPL) + 1.0f - (AMPL))*(a)) #else #define PERTURB(a) (a) #endif double cpu_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cpu_chol_task_potrf_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cuda_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/5.088633/0.9883); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cuda_chol_task_potrf_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cpu_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cpu_chol_task_trsm_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cuda_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/87.29520); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cuda_chol_task_trsm_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cpu_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760)/2; #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cpu_chol_task_syrk_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cuda_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666)/2; #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cuda_chol_task_syrk_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cpu_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cpu_chol_task_gemm_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cuda_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cuda_chol_task_gemm_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } void initialize_chol_model(struct starpu_perfmodel* model, char * symbol, double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)) { struct starpu_perfmodel_per_arch *per_arch; model->symbol = symbol; model->type = STARPU_HISTORY_BASED; starpu_perfmodel_init(model); per_arch = starpu_perfmodel_get_model_per_devices(model, 0, STARPU_CPU_WORKER, 0, 1, -1); per_arch->cost_function = cpu_cost_function; // We could also call directly: // starpu_perfmodel_set_per_devices_cost_function(model, 0, cpu_cost_function, STARPU_CPU_WORKER, 0, 1, -1); if(starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) != 0) { per_arch = starpu_perfmodel_get_model_per_devices(model, 0, STARPU_CUDA_WORKER, 0, 1, -1); per_arch->cost_function = cuda_cost_function; } } starpu-1.4.9+dfsg/examples/cholesky/cholesky_tag.c000066400000000000000000000264341507764646700223000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This version of the Cholesky factorization uses explicit dependency * declaration through dependency tags. * It also uses data partitioning to split the matrix into submatrices */ /* Note: this is using fortran ordering, i.e. column-major ordering, i.e. * elements with consecutive row number are consecutive in memory */ #include "cholesky.h" #include #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) #include "magma.h" #endif #include /* * Some useful functions */ static struct starpu_task *create_task(starpu_tag_t id) { struct starpu_task *task = starpu_task_create(); task->cl_arg = NULL; task->use_tag = 1; task->tag_id = id; return task; } /* * Create the codelets */ static struct starpu_task * create_task_potrf(starpu_data_handle_t dataA, unsigned k) { /* FPRINTF(stdout, "task potrf k = %d TAG = %llx\n", k, (TAG_POTRF(k))); */ struct starpu_task *task = create_task(TAG_POTRF(k)); task->cl = &cl_potrf; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) /* Temporary data to save libcusolver from allocating/deallocating memory */ task->handles[1] = scratch; #endif /* this is an important task */ if (!noprio_p) task->priority = STARPU_MAX_PRIO; /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_POTRF(k), 1, TAG_GEMM(k-1, k, k)); } int n = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_SPOTRF(n); return task; } static int create_task_trsm(starpu_data_handle_t dataA, unsigned k, unsigned m) { int ret; struct starpu_task *task = create_task(TAG_TRSM(k, m)); task->cl = &cl_trsm; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, m, k); if (!noprio_p && (m == k+1)) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM(k, m), 2, TAG_POTRF(k), TAG_GEMM(k-1, m, k)); } else { starpu_tag_declare_deps(TAG_TRSM(k, m), 1, TAG_POTRF(k)); } int nx = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_STRSM(nx, nx); ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned m, unsigned n) { int ret; /* FPRINTF(stdout, "task gemm k,n,m = %d,%d,%d TAG = %llx\n", k,m,n, TAG_GEMM(k,m,n)); */ struct starpu_task *task = create_task(TAG_GEMM(k, m, n)); if (m == n) { task->cl = &cl_syrk; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, n, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, n, n); int nx = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_SSYRK(nx, nx); } else { task->cl = &cl_gemm; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, n, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, m, k); task->handles[2] = starpu_data_get_sub_data(dataA, 2, m, n); int nx = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_SGEMM(nx, nx, nx); } if (!noprio_p && (n == k + 1) && (m == k +1)) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GEMM(k, m, n), 3, TAG_GEMM(k-1, m, n), TAG_TRSM(k, n), TAG_TRSM(k, m)); } else { starpu_tag_declare_deps(TAG_GEMM(k, m, n), 2, TAG_TRSM(k, n), TAG_TRSM(k, m)); } ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } /* * code to bootstrap the factorization * and construct the DAG */ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) { int ret; double start; double end; struct starpu_task *entry_task = NULL; /* create all the DAG nodes */ unsigned k, m, n; start = starpu_timing_now(); for (k = 0; k < nblocks; k++) { starpu_iteration_push(k); struct starpu_task *task = create_task_potrf(dataA, k); /* we defer the launch of the first task */ if (k == 0) { entry_task = task; } else { ret = starpu_task_submit(task); if (ret == -ENODEV) { starpu_data_unpartition(dataA, STARPU_MAIN_RAM); return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (m = k+1; m m) { mat[m+n*size_p] = 0.0f; /* debug */ } } } float *test_mat = malloc(size_p*size_p*sizeof(float)); STARPU_ASSERT(test_mat); STARPU_SSYRK("L", "N", size_p, size_p, 1.0f, mat, size_p, 0.0f, test_mat, size_p); FPRINTF(stderr, "comparing results ...\n"); #ifdef PRINT_OUTPUT for (m = 0; m < size_p; m++) { for (n = 0; n < size_p; n++) { if (n <= m) { FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size_p]); } else { FPRINTF(stdout, ".\t"); } } FPRINTF(stdout, "\n"); } #endif for (m = 0; m < size_p; m++) { for (n = 0; n < size_p; n++) { if (n <= m) { float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size_p:0.0f); float err = fabsf(test_mat[m +n*size_p] - orig) / orig; if (err > 0.0001) { FPRINTF(stderr, "Error[%llu, %llu] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size_p], orig, err); assert(0); } } } } free(test_mat); } #endif shutdown_system(&mat, size_p, pinned_p); return ret; } starpu-1.4.9+dfsg/examples/cholesky/cholesky_tile_tag.c000066400000000000000000000207111507764646700233050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This version of the Cholesky factorization uses explicit dependency * declaration through dependency tags. * It also directly registers matrix tiles instead of using partitioning. */ /* Note: this is using fortran ordering, i.e. column-major ordering, i.e. * elements with consecutive row number are consecutive in memory */ #include "cholesky.h" #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) #include "magma.h" #endif #include "starpu_cusolver.h" /* A [ m ] [ n ] */ float *A[NMAXBLOCKS][NMAXBLOCKS]; starpu_data_handle_t A_state[NMAXBLOCKS][NMAXBLOCKS]; /* * Some useful functions */ static struct starpu_task *create_task(starpu_tag_t id) { struct starpu_task *task = starpu_task_create(); task->cl_arg = NULL; task->use_tag = 1; task->tag_id = id; return task; } /* * Create the codelets */ static struct starpu_task * create_task_potrf(unsigned k, unsigned nblocks) { (void)nblocks; /* FPRINTF(stdout, "task potrf k = %d TAG = %llx\n", k, (TAG_POTRF(k))); */ struct starpu_task *task = create_task(TAG_POTRF(k)); task->cl = &cl_potrf; /* which sub-data is manipulated ? */ task->handles[0] = A_state[k][k]; #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) /* Temporary data to save libcusolver from allocating/deallocating memory */ task->handles[1] = scratch; #endif /* this is an important task */ task->priority = STARPU_MAX_PRIO; /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_POTRF(k), 1, TAG_GEMM(k-1, k, k)); } int n = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_SPOTRF(n); return task; } static int create_task_trsm(unsigned k, unsigned m) { int ret; struct starpu_task *task = create_task(TAG_TRSM(m, k)); task->cl = &cl_trsm; /* which sub-data is manipulated ? */ task->handles[0] = A_state[k][k]; task->handles[1] = A_state[m][k]; if (m == k+1) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM(m, k), 2, TAG_POTRF(k), TAG_GEMM(k-1, m, k)); } else { starpu_tag_declare_deps(TAG_TRSM(m, k), 1, TAG_POTRF(k)); } int n = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_STRSM(n, n); ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_gemm(unsigned k, unsigned m, unsigned n) { int ret; /* FPRINTF(stdout, "task gemm k,n,m = %d,%d,%d TAG = %llx\n", k,m,n, TAG_GEMM(k,m,n)); */ struct starpu_task *task = create_task(TAG_GEMM(k, m, n)); if (m == n) { task->cl = &cl_syrk; /* which sub-data is manipulated ? */ task->handles[0] = A_state[n][k]; task->handles[1] = A_state[n][n]; int nx = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_SSYRK(nx, nx); } else { task->cl = &cl_gemm; /* which sub-data is manipulated ? */ task->handles[0] = A_state[n][k]; task->handles[1] = A_state[m][k]; task->handles[2] = A_state[m][n]; int nx = starpu_matrix_get_nx(task->handles[0]); task->flops = FLOPS_SGEMM(nx, nx, nx); } if (!noprio_p && (n == k + 1) && (m == k +1)) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GEMM(k, m, n), 3, TAG_GEMM(k-1, m, n), TAG_TRSM(n, k), TAG_TRSM(m, k)); } else { starpu_tag_declare_deps(TAG_GEMM(k, m, n), 2, TAG_TRSM(n, k), TAG_TRSM(m, k)); } ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } /* * code to bootstrap the factorization * and construct the DAG */ static int cholesky_no_stride(void) { int ret; double start; double end; struct starpu_task *entry_task = NULL; /* create all the DAG nodes */ unsigned k, m, n; for (k = 0; k < nblocks_p; k++) { starpu_iteration_push(k); struct starpu_task *task = create_task_potrf(k, nblocks_p); /* we defer the launch of the first task */ if (k == 0) { entry_task = task; } else { ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (m = k+1; m #include #include #include #include #include /* for fpclassify() checks on knob values */ #ifndef DBL_MIN #define DBL_MIN __DBL_MIN__ #endif #ifndef DBL_MAX #define DBL_MAX __DBL_MAX__ #endif struct _starpu_dmda_data { double alpha; double beta; double _gamma; double idle_power; starpu_st_fifo_taskq_t queue_array[STARPU_NMAXWORKERS]; }; /* The dmda scheduling policy uses * * alpha * T_computation + beta * T_communication + gamma * Consumption * * Here are the default values of alpha, beta, gamma */ #define _STARPU_SCHED_ALPHA_DEFAULT 1.0 #define _STARPU_SCHED_BETA_DEFAULT 1.0 #define _STARPU_SCHED_GAMMA_DEFAULT 1000.0 static void initialize_dmda_policy(unsigned sched_ctx_id) { fprintf(stderr, "HELLO FROM MY_DM\n"); struct _starpu_dmda_data *dt; dt = calloc(1, sizeof(struct _starpu_dmda_data)); assert(dt); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)dt); dt->alpha = starpu_getenv_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT); dt->beta = starpu_getenv_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT); /* data->_gamma: cost of one Joule in us. If gamma is set to 10^6, then one Joule cost 1s */ dt->_gamma = starpu_getenv_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT); /* data->idle_power: Idle power of the whole machine in Watt */ dt->idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); } static void deinitialize_dmda_policy(unsigned sched_ctx_id) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); free(dt); } static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned i; for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; /* if the worker has already belonged to this context the queue and the synchronization variables have been already initialized */ dt->queue_array[workerid] = starpu_st_fifo_taskq_create(); } } static void dmda_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned i; for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; starpu_st_fifo_taskq_destroy(dt->queue_array[workerid]); dt->queue_array[workerid] = NULL; } } static int dm_push_task(struct starpu_task *task) { /* Julia version should look like this: * * best_worker = -1 * best_implem = -1 * best_EFT = 0 * for worker in workers: * for implem in implems: * if !worker_can_execute_task_impl(worker, task, implem) * continue * end * EFT = EFT(task, worker, implem) * if best_worker == -1 || EFT < best_EFT * best_worker = worker * best_implem = implem * best_EFT = EFT * end * end * end * push!(data.queue[worker], task, impl) */ unsigned sched_ctx_id = task->sched_ctx; struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); int best = -1; double best_exp_end_of_task = 0.0; unsigned best_impl = 0; double predicted = 0.0; double predicted_transfer = 0.0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; double now = starpu_timing_now(); // Find couple (worker, implem) that minimizes EFT(task, worker, implem) workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned nimpl; unsigned impl_mask; unsigned worker = workers->get_next(workers, &it); starpu_st_fifo_taskq_t fifo = dt->queue_array[worker]; double exp_start = starpu_st_fifo_exp_start_get(fifo); double pipeline_len = starpu_st_fifo_pipeline_len_get(fifo); double exp_len = starpu_st_fifo_exp_len_get(fifo); /* Sometimes workers didn't take the tasks as early as we expected */ double new_exp_start = isnan(exp_start) ? now + pipeline_len : STARPU_MAX(exp_start, now); if (!starpu_worker_can_execute_task_impl(worker, task, &impl_mask)) continue; for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if (!(impl_mask & (1U << nimpl))) { /* no one on that queue may execute this task */ continue; } // todo: handle case where no calibration or no model double local_length = starpu_task_worker_expected_length(task, worker, sched_ctx_id, nimpl); double local_penalty = starpu_task_expected_data_transfer_time_for(task, worker); double exp_end = new_exp_start + exp_len + local_length; if (best == -1 || exp_end < best_exp_end_of_task) { /* a better solution was found */ best_exp_end_of_task = exp_end; best = worker; best_impl = nimpl; predicted = local_length; predicted_transfer = local_penalty; } } } STARPU_ASSERT(best >= 0); // Set task implem. starpu_task_set_implementation(task, best_impl); // Update expected start of the next task in the queue and expected end of the last task in the queue // This code should be generated automatically. starpu_st_fifo_taskq_t fifo = dt->queue_array[best]; double exp_start = starpu_st_fifo_exp_start_get(fifo); double pipeline_len = starpu_st_fifo_pipeline_len_get(fifo); double exp_len = starpu_st_fifo_exp_len_get(fifo); now = starpu_timing_now(); starpu_worker_lock(best); double new_exp_start = isnan(exp_start) ? now + pipeline_len : STARPU_MAX(exp_start, now); starpu_st_fifo_exp_start_set(fifo, new_exp_start); double new_exp_end = new_exp_start + exp_len; starpu_st_fifo_exp_end_set(fifo, new_exp_end); if ((now + predicted_transfer) < new_exp_end) { /* We may hope that the transfer will be finished by * the start of the task. */ predicted_transfer = 0.0; } else { /* The transfer will not be finished by then, take the * remainder into account */ predicted_transfer = (now + predicted_transfer) - new_exp_end; } double new_exp_len = exp_len; if(!isnan(predicted_transfer)) new_exp_len += predicted_transfer; if(!isnan(predicted)) new_exp_len += predicted; starpu_st_fifo_exp_len_set(fifo, new_exp_len); starpu_st_fifo_exp_end_set(fifo, new_exp_start + new_exp_len); starpu_worker_unlock(best); // Not sure what's the purpose of this. task->predicted = predicted; task->predicted_transfer = predicted_transfer; // Prefetch if (starpu_get_prefetch_flag()) starpu_prefetch_task_input_for(task, best); // Push task to worker queue starpu_worker_lock(best); starpu_st_fifo_taskq_push_back_task(fifo, task); starpu_st_fifo_ntasks_inc(fifo, 1); starpu_st_fifo_nprocessed_inc(fifo, 1); #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) starpu_wake_worker_locked(best); #endif starpu_push_task_end(task); starpu_worker_unlock(best); starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, best); return 0; } static struct starpu_task *dmda_pop_task(unsigned sched_ctx_id) { /* Julia version should look like this: * * return pop!(data.queue[worker]) */ struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_task *task; unsigned workerid = starpu_worker_get_id_check(); starpu_st_fifo_taskq_t fifo = dt->queue_array[workerid]; /* Take the opportunity to update start time */ double new_exp_start = STARPU_MAX(starpu_timing_now(), starpu_st_fifo_exp_start_get(fifo)); double new_exp_end = new_exp_start + starpu_st_fifo_exp_end_get(fifo); starpu_st_fifo_exp_start_set(fifo, new_exp_start); starpu_st_fifo_exp_end_set(fifo, new_exp_end); task = starpu_st_fifo_taskq_pop_local_task(fifo); if (task) { double transfer_model = task->predicted_transfer; if (!isnan(transfer_model)) { /* We now start the transfer, move it from predicted to pipelined */ double new_exp_len = starpu_st_fifo_exp_len_get(fifo); new_exp_len -= transfer_model; double new_pipeline_len = starpu_st_fifo_pipeline_len_get(fifo); new_pipeline_len += transfer_model; starpu_st_fifo_exp_len_set(fifo, new_exp_len); starpu_st_fifo_pipeline_len_set(fifo, new_pipeline_len); new_exp_start = starpu_timing_now() + new_pipeline_len; new_exp_end = new_exp_start + new_exp_len; starpu_st_fifo_exp_start_set(fifo, new_exp_start); starpu_st_fifo_exp_end_set(fifo, new_exp_end); } starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); } return task; } // This code should be generated automatically. static void dmda_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id) { unsigned workerid = starpu_worker_get_id_check(); struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_st_fifo_taskq_t fifo = dt->queue_array[workerid]; const double now = starpu_timing_now(); /* Once the task is executing, we can update the predicted amount * of work. */ starpu_worker_lock_self(); double model = task->predicted; double transfer_model = task->predicted_transfer; if(!isnan(transfer_model)) { /* The transfer is over, remove it from pipelined */ starpu_st_fifo_pipeline_len_inc(fifo, -transfer_model); } if(!isnan(model)) { /* We now start the computation, move it from predicted to pipelined */ starpu_st_fifo_exp_len_inc(fifo, -model); starpu_st_fifo_pipeline_len_inc(fifo, model); starpu_st_fifo_exp_start_set(fifo, starpu_timing_now() + starpu_st_fifo_pipeline_len_get(fifo)); starpu_st_fifo_exp_end_set(fifo, starpu_st_fifo_exp_start_get(fifo) + starpu_st_fifo_exp_len_get(fifo)); } /* Take the opportunity to update start time */ starpu_st_fifo_exp_start_set(fifo, STARPU_MAX(now + starpu_st_fifo_pipeline_len_get(fifo), starpu_st_fifo_exp_start_get(fifo))); starpu_st_fifo_exp_end_set(fifo, starpu_st_fifo_exp_start_get(fifo) + starpu_st_fifo_exp_len_get(fifo)); starpu_worker_unlock_self(); } // This code should be generated automatically. static void dmda_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned workerid = starpu_worker_get_id_check(); starpu_st_fifo_taskq_t fifo = dt->queue_array[workerid]; starpu_worker_lock_self(); if(!isnan(task->predicted)) /* The execution is over, remove it from pipelined */ starpu_st_fifo_pipeline_len_inc(fifo, -task->predicted); starpu_st_fifo_exp_start_set(fifo, STARPU_MAX(starpu_timing_now() + starpu_st_fifo_pipeline_len_get(fifo), starpu_st_fifo_exp_start_get(fifo))); starpu_st_fifo_exp_end_set(fifo, starpu_st_fifo_exp_start_get(fifo) + starpu_st_fifo_exp_len_get(fifo)); starpu_worker_unlock_self(); } struct starpu_sched_policy my_dm_policy = { .init_sched = initialize_dmda_policy, .deinit_sched = deinitialize_dmda_policy, .add_workers = dmda_add_workers, .remove_workers = dmda_remove_workers, .push_task = dm_push_task, .simulate_push_task = NULL, .pop_task = dmda_pop_task, .pre_exec_hook = dmda_pre_exec_hook, .post_exec_hook = dmda_post_exec_hook, .policy_name = "mydm", .policy_description = "performance model", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; struct starpu_sched_policy *predefined_policies[] = { &my_dm_policy }; struct starpu_sched_policy *starpu_get_sched_lib_policy(const char *name) { if (!strcmp(name, "mydm")) return &my_dm_policy; return NULL; } struct starpu_sched_policy **starpu_get_sched_lib_policies(void) { return predefined_policies; } starpu-1.4.9+dfsg/examples/cholesky/libmy_dmda.h000066400000000000000000000015341507764646700217240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MY_DMDA_H__ #define __STARPU_MY_DMDA_H__ #include extern struct starpu_sched_policy my_dm_policy; #endif /* __STARPU_MY_DMDA_H__ */ starpu-1.4.9+dfsg/examples/common/000077500000000000000000000000001507764646700171165ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/common/blas.c000066400000000000000000000422641507764646700202130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "blas.h" /* * This files contains BLAS wrappers for the different BLAS implementations * (eg. REFBLAS, ATLAS, GOTOBLAS ...). We assume a Fortran orientation as most * libraries do not supply C-based ordering. */ #ifdef STARPU_ATLAS inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc) { enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_TRANSPOSE tb = (toupper(transb[0]) == 'N')?CblasNoTrans:CblasTrans; cblas_sgemm(CblasColMajor, ta, tb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); } inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, double alpha, double *A, int lda, double *B, int ldb, double beta, double *C, int ldc) { enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_TRANSPOSE tb = (toupper(transb[0]) == 'N')?CblasNoTrans:CblasTrans; cblas_dgemm(CblasColMajor, ta, tb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); } inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, float *X, int incX, float beta, float *Y, int incY) { enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; cblas_sgemv(CblasColMajor, ta, M, N, alpha, A, lda, X, incX, beta, Y, incY); } inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, double *X, int incX, double beta, double *Y, int incY) { enum CBLAS_TRANSPOSE ta = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; cblas_dgemv(CblasColMajor, ta, M, N, alpha, A, lda, X, incX, beta, Y, incY); } inline float STARPU_SASUM(int N, float *X, int incX) { return cblas_sasum(N, X, incX); } inline double STARPU_DASUM(int N, double *X, int incX) { return cblas_dasum(N, X, incX); } void STARPU_SSCAL(int N, float alpha, float *X, int incX) { cblas_sscal(N, alpha, X, incX); } void STARPU_DSCAL(int N, double alpha, double *X, int incX) { cblas_dscal(N, alpha, X, incX); } void STARPU_STRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb) { enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; enum CBLAS_TRANSPOSE transa_ = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; cblas_strsm(CblasColMajor, side_, uplo_, transa_, diag_, m, n, alpha, A, lda, B, ldb); } void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const double alpha, const double *A, const int lda, double *B, const int ldb) { enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; enum CBLAS_TRANSPOSE transa_ = (toupper(transa[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; cblas_dtrsm(CblasColMajor, side_, uplo_, transa_, diag_, m, n, alpha, A, lda, B, ldb); } void STARPU_SSYR (const char *uplo, const int n, const float alpha, const float *x, const int incx, float *A, const int lda) { enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; cblas_ssyr(CblasColMajor, uplo_, n, alpha, x, incx, A, lda); } void STARPU_SSYRK (const char *uplo, const char *trans, const int n, const int k, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc) { enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; enum CBLAS_TRANSPOSE trans_ = (toupper(trans[0]) == 'N')?CblasNoTrans:CblasTrans; cblas_ssyrk(CblasColMajor, uplo_, trans_, n, k, alpha, A, lda, beta, C, ldc); } void STARPU_SGER(const int m, const int n, const float alpha, const float *x, const int incx, const float *y, const int incy, float *A, const int lda) { cblas_sger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda); } void STARPU_DGER(const int m, const int n, const double alpha, const double *x, const int incx, const double *y, const int incy, double *A, const int lda) { cblas_dger(CblasColMajor, m, n, alpha, x, incx, y, incy, A, lda); } void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, const int n, const float *A, const int lda, float *x, const int incx) { enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; enum CBLAS_TRANSPOSE trans_ = (toupper(trans[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; cblas_strsv(CblasColMajor, uplo_, trans_, diag_, n, A, lda, x, incx); } void STARPU_STRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb) { enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; cblas_strmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb); } void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const double alpha, const double *A, const int lda, double *B, const int ldb) { enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; cblas_dtrmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb); } void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, const int n, const float *A, const int lda, float *X, const int incX) { enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; cblas_strmv(CblasColMajor, uplo_, transA_, diag_, n, A, lda, X, incX); } void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY) { cblas_saxpy(n, alpha, X, incX, Y, incY); } void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY) { cblas_daxpy(n, alpha, X, incX, Y, incY); } int STARPU_ISAMAX (const int n, float *X, const int incX) { int retVal; retVal = cblas_isamax(n, X, incX); return retVal; } int STARPU_IDAMAX (const int n, double *X, const int incX) { int retVal; retVal = cblas_idamax(n, X, incX); return retVal; } float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy) { return cblas_sdot(n, x, incx, y, incy); } double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy) { return cblas_ddot(n, x, incx, y, incy); } void STARPU_SSWAP(const int n, float *x, const int incx, float *y, const int incy) { cblas_sswap(n, x, incx, y, incy); } void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int incy) { cblas_dswap(n, x, incx, y, incy); } #elif defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL) || defined(STARPU_ARMPL) inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc) { sgemm_(transa, transb, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, double alpha, double *A, int lda, double *B, int ldb, double beta, double *C, int ldc) { dgemm_(transa, transb, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, float *X, int incX, float beta, float *Y, int incY) { sgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); } inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, double *X, int incX, double beta, double *Y, int incY) { dgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); } inline float STARPU_SASUM(int N, float *X, int incX) { return sasum_(&N, X, &incX); } inline double STARPU_DASUM(int N, double *X, int incX) { return dasum_(&N, X, &incX); } void STARPU_SSCAL(int N, float alpha, float *X, int incX) { sscal_(&N, &alpha, X, &incX); } void STARPU_DSCAL(int N, double alpha, double *X, int incX) { dscal_(&N, &alpha, X, &incX); } void STARPU_STRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb) { strsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const double alpha, const double *A, const int lda, double *B, const int ldb) { dtrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void STARPU_SSYR (const char *uplo, const int n, const float alpha, const float *x, const int incx, float *A, const int lda) { ssyr_(uplo, &n, &alpha, x, &incx, A, &lda); } void STARPU_SSYRK (const char *uplo, const char *trans, const int n, const int k, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc) { ssyrk_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); } void STARPU_SGER(const int m, const int n, const float alpha, const float *x, const int incx, const float *y, const int incy, float *A, const int lda) { sger_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } void STARPU_DGER(const int m, const int n, const double alpha, const double *x, const int incx, const double *y, const int incy, double *A, const int lda) { dger_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, const int n, const float *A, const int lda, float *x, const int incx) { strsv_(uplo, trans, diag, &n, A, &lda, x, &incx); } void STARPU_STRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb) { strmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const double alpha, const double *A, const int lda, double *B, const int ldb) { dtrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, const int n, const float *A, const int lda, float *X, const int incX) { strmv_(uplo, transA, diag, &n, A, &lda, X, &incX); } void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY) { saxpy_(&n, &alpha, X, &incX, Y, &incY); } void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY) { daxpy_(&n, &alpha, X, &incX, Y, &incY); } int STARPU_ISAMAX (const int n, float *X, const int incX) { int retVal; retVal = isamax_ (&n, X, &incX); return retVal; } int STARPU_IDAMAX (const int n, double *X, const int incX) { int retVal; retVal = idamax_ (&n, X, &incX); return retVal; } float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy) { float retVal = 0; /* GOTOBLAS will return a FLOATRET which is a double, not a float */ retVal = (float)sdot_(&n, x, &incx, y, &incy); return retVal; } double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy) { return ddot_(&n, x, &incx, y, &incy); } void STARPU_SSWAP(const int n, float *X, const int incX, float *Y, const int incY) { sswap_(&n, X, &incX, Y, &incY); } void STARPU_DSWAP(const int n, double *X, const int incX, double *Y, const int incY) { dswap_(&n, X, &incX, Y, &incY); } #if defined(STARPU_MKL) || defined(STARPU_ARMPL) void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda) { int info = 0; spotrf_(uplo, &n, a, &lda, &info); } void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda) { int info = 0; dpotrf_(uplo, &n, a, &lda, &info); } #endif #elif defined(STARPU_SIMGRID) inline void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc) { } inline void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, double alpha, double *A, int lda, double *B, int ldb, double beta, double *C, int ldc) { } inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, float *X, int incX, float beta, float *Y, int incY) { } inline void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, double *X, int incX, double beta, double *Y, int incY) { } inline float STARPU_SASUM(int N, float *X, int incX) { return 0.; } inline double STARPU_DASUM(int N, double *X, int incX) { return 0.; } void STARPU_SSCAL(int N, float alpha, float *X, int incX) { } void STARPU_DSCAL(int N, double alpha, double *X, int incX) { } void STARPU_STRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb) { } void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const double alpha, const double *A, const int lda, double *B, const int ldb) { } void STARPU_SSYR (const char *uplo, const int n, const float alpha, const float *x, const int incx, float *A, const int lda) { } void STARPU_SSYRK (const char *uplo, const char *trans, const int n, const int k, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc) { } void STARPU_SGER(const int m, const int n, const float alpha, const float *x, const int incx, const float *y, const int incy, float *A, const int lda) { } void STARPU_DGER(const int m, const int n, const double alpha, const double *x, const int incx, const double *y, const int incy, double *A, const int lda) { } void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, const int n, const float *A, const int lda, float *x, const int incx) { } void STARPU_STRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb) { } void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const double alpha, const double *A, const int lda, double *B, const int ldb) { } void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, const int n, const float *A, const int lda, float *X, const int incX) { } void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incY) { } void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY) { } int STARPU_ISAMAX (const int n, float *X, const int incX) { return 0; } int STARPU_IDAMAX (const int n, double *X, const int incX) { return 0; } float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy) { return 0.; } double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy) { return 0.; } void STARPU_SSWAP(const int n, float *X, const int incX, float *Y, const int incY) { } void STARPU_DSWAP(const int n, double *X, const int incX, double *Y, const int incY) { } void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda) { } void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda) { } #else #error "no BLAS lib available..." #endif starpu-1.4.9+dfsg/examples/common/blas.h000066400000000000000000000214701507764646700202140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __BLAS_H__ #define __BLAS_H__ #include #if defined(STARPU_ATLAS) || defined(STARPU_HAVE_CBLAS_H) #include #endif void STARPU_SGEMM(char *transa, char *transb, int M, int N, int K, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc); void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, double alpha, double *A, int lda, double *B, int ldb, double beta, double *C, int ldc); void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda, float *X, int incX, float beta, float *Y, int incY); void STARPU_DGEMV(char *transa, int M, int N, double alpha, double *A, int lda, double *X, int incX, double beta, double *Y, int incY); float STARPU_SASUM(int N, float *X, int incX); double STARPU_DASUM(int N, double *X, int incX); void STARPU_SSCAL(int N, float alpha, float *X, int incX); void STARPU_DSCAL(int N, double alpha, double *X, int incX); void STARPU_STRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb); void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const double alpha, const double *A, const int lda, double *B, const int ldb); void STARPU_DGEMM(char *transa, char *transb, int M, int N, int K, double alpha, double *A, int lda, double *B, int ldb, double beta, double *C, int ldc); void STARPU_SSYR (const char *uplo, const int n, const float alpha, const float *x, const int incx, float *A, const int lda); void STARPU_SSYRK (const char *uplo, const char *trans, const int n, const int k, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc); void STARPU_SGER (const int m, const int n, const float alpha, const float *x, const int incx, const float *y, const int incy, float *A, const int lda); void STARPU_DGER(const int m, const int n, const double alpha, const double *x, const int incx, const double *y, const int incy, double *A, const int lda); void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, const int n, const float *A, const int lda, float *x, const int incx); void STARPU_STRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb); void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const double alpha, const double *A, const int lda, double *B, const int ldb); void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, const int n, const float *A, const int lda, float *X, const int incX); void STARPU_SAXPY(const int n, const float alpha, float *X, const int incX, float *Y, const int incy); void STARPU_DAXPY(const int n, const double alpha, double *X, const int incX, double *Y, const int incY); int STARPU_ISAMAX(const int n, float *X, const int incX); int STARPU_IDAMAX(const int n, double *X, const int incX); float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy); double STARPU_DDOT(const int n, const double *x, const int incx, const double *y, const int incy); void STARPU_SSWAP(const int n, float *x, const int incx, float *y, const int incy); void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int incy); #if defined(STARPU_MKL) || defined(STARPU_ARMPL) void STARPU_SPOTRF(const char*uplo, const int n, float *a, const int lda); void STARPU_DPOTRF(const char*uplo, const int n, double *a, const int lda); #endif #if defined(STARPU_GOTO) || defined(STARPU_OPENBLAS) || defined(STARPU_SYSTEM_BLAS) || defined(STARPU_MKL) || defined(STARPU_ARMPL) #ifdef _STARPU_F2C_COMPATIBILITY /* for compatibility with F2C, FLOATRET may not be a float but a double in GOTOBLAS */ /* Don't know how to detect this automatically */ #define _STARPU_FLOATRET double #else #define _STARPU_FLOATRET float #endif extern void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc); extern void dgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc); extern void sgemv_(const char *trans, const int *m, const int *n, const float *alpha, const float *a, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy); extern void dgemv_(const char *trans, const int *m, const int *n, const double *alpha, const double *a, const int *lda, const double *x, const int *incx, const double *beta, double *y, const int *incy); extern void ssyr_(const char *uplo, const int *n, const float *alpha, const float *x, const int *incx, float *A, const int *lda); extern void ssyrk_(const char *uplo, const char *trans, const int *n, const int *k, const float *alpha, const float *A, const int *lda, const float *beta, float *C, const int *ldc); extern void strsm_(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const float *alpha, const float *A, const int *lda, float *B, const int *ldb); extern void dtrsm_(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const double *alpha, const double *A, const int *lda, double *B, const int *ldb); extern _STARPU_FLOATRET sasum_ (const int *n, const float *x, const int *incx); extern double dasum_(const int *n, const double *x, const int *incx); extern void sscal_(const int *n, const float *alpha, float *x, const int *incx); extern void dscal_(const int *n, const double *alpha, double *x, const int *incx); extern void sger_(const int *m, const int *n, const float *alpha, const float *x, const int *incx, const float *y, const int *incy, float *A, const int *lda); extern void dger_(const int *m, const int *n, const double *alpha, const double *x, const int *incx, const double *y, const int *incy, double *A, const int *lda); extern void strsv_(const char *uplo, const char *trans, const char *diag, const int *n, const float *A, const int *lda, float *x, const int *incx); extern void strmm_(const char *side, const char *uplo, const char *transA, const char *diag, const int *m, const int *n, const float *alpha, const float *A, const int *lda, float *B, const int *ldb); extern void dtrmm_(const char *side, const char *uplo, const char *transA, const char *diag, const int *m, const int *n, const double *alpha, const double *A, const int *lda, double *B, const int *ldb); extern void strmv_(const char *uplo, const char *transA, const char *diag, const int *n, const float *A, const int *lda, float *X, const int *incX); extern void saxpy_(const int *n, const float *alpha, const float *X, const int *incX, float *Y, const int *incy); extern void daxpy_(const int *n, const double *alpha, const double *X, const int *incX, double *Y, const int *incy); extern int isamax_(const int *n, const float *X, const int *incX); extern int idamax_(const int *n, const double *X, const int *incX); extern _STARPU_FLOATRET sdot_(const int *n, const float *x, const int *incx, const float *y, const int *incy); extern double ddot_(const int *n, const double *x, const int *incx, const double *y, const int *incy); extern void sswap_(const int *n, float *x, const int *incx, float *y, const int *incy); extern void dswap_(const int *n, double *x, const int *incx, double *y, const int *incy); #if (defined STARPU_MKL) || (defined STARPU_ARMPL) extern void spotrf_(const char*uplo, const int *n, float *a, const int *lda, int *info); extern void dpotrf_(const char*uplo, const int *n, double *a, const int *lda, int *info); #endif #endif #endif /* __BLAS_H__ */ starpu-1.4.9+dfsg/examples/common/blas_model.c000066400000000000000000000024351507764646700213670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "blas_model.h" #include /* * As a convention, in that file, descr[0] is represented by A, * descr[1] is B ... */ /* * Number of flops of Gemm */ double gemm_cost(struct starpu_task *task, unsigned nimpl) { /* C = A * B */ uint32_t nxC, nyC, nxA; nxC = starpu_matrix_get_nx(task->descr[2].handle); nyC = starpu_matrix_get_ny(task->descr[2].handle); nxA = starpu_matrix_get_nx(task->descr[0].handle); /* printf("nxC %d nxC %d nxA %d\n", nxC, nyC, nxA); */ double cost = ((double)nxC)*((double)nyC)*((double)nxA/1000.0f/4.11f); /* printf("cost %e \n", cost); */ return cost; } starpu-1.4.9+dfsg/examples/common/blas_model.h000066400000000000000000000031461507764646700213740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __BLAS_MODEL_H__ #define __BLAS_MODEL_H__ #include double gemm_cost(struct starpu_task *task, unsigned nimpl); static struct starpu_perfmodel starpu_sgemm_model = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = "sgemm_atlas" #elif defined(STARPU_GOTO) .symbol = "sgemm_goto" #elif defined(STARPU_OPENBLAS) .symbol = "sgemm_openblas" #else .symbol = "sgemm" #endif }; static struct starpu_perfmodel starpu_sgemm_model_common = { .cost_function = gemm_cost, .type = STARPU_COMMON, }; static struct starpu_perfmodel starpu_dgemm_model = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = "dgemm_atlas" #elif defined(STARPU_GOTO) .symbol = "dgemm_goto" #elif defined(STARPU_OPENBLAS) .symbol = "dgemm_openblas" #else .symbol = "dgemm" #endif }; static struct starpu_perfmodel starpu_dgemm_model_common = { .cost_function = gemm_cost, .type = STARPU_COMMON, }; #endif /* __BLAS_MODEL_H__ */ starpu-1.4.9+dfsg/examples/cpp/000077500000000000000000000000001507764646700164105ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/cpp/Makefile_add_vectors.mk000066400000000000000000000017351507764646700230410ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROG = add_vectors SRCCXX = add_vectors.cpp CXX = g++ CXXFLAGS = -g -DPRINT_OUTPUT $(shell pkg-config --cflags starpu-1.3) LDLIBS = $(shell pkg-config --libs starpu-1.3) OBJS = $(SRCCXX:%.cpp=%.o) .phony: all clean all: $(PROG) $(PROG): $(OBJS) $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) clean: rm -fv *.o $(PROG) starpu-1.4.9+dfsg/examples/cpp/Makefile_add_vectors_cpp11.mk000066400000000000000000000017641507764646700240470ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROG = add_vectors_cpp11 SRCCXX = add_vectors_cpp11.cpp CXX = g++ CXXFLAGS = -g -std=c++11 -DPRINT_OUTPUT $(shell pkg-config --cflags starpu-1.3) LDLIBS = $(shell pkg-config --libs starpu-1.3) OBJS = $(SRCCXX:%.cpp=%.o) .phony: all clean all: $(PROG) $(PROG): $(OBJS) $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) clean: rm -fv *.o $(PROG) starpu-1.4.9+dfsg/examples/cpp/add_vectors.cpp000066400000000000000000000114461507764646700214170ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is a small example of a C++ program using starpu. We here just * add two std::vector without copying them (0 copy). */ #include #include #ifdef PRINT_OUTPUT #include #endif #include void cpu_kernel_add_vectors(void *buffers[], void *cl_arg) { // get the current task starpu_task* task = starpu_task_get_current(); // get the user data (pointers to the vec_A, vec_B, vec_C std::vector) void* u_data0 = starpu_data_get_user_data(task->handles[0]); assert(u_data0); void* u_data1 = starpu_data_get_user_data(task->handles[1]); assert(u_data1); void* u_data2 = starpu_data_get_user_data(task->handles[2]); assert(u_data2); // cast void* in std::vector* std::vector* vec_A = static_cast*>(u_data0); std::vector* vec_B = static_cast*>(u_data1); std::vector* vec_C = static_cast*>(u_data2); // all the std::vector have to have the same size assert(vec_A->size() == vec_B->size() && vec_B->size() == vec_C->size()); // performs the vector addition (vec_C[] = vec_A[] + vec_B[]) for (size_t i = 0; i < vec_C->size(); i++) (*vec_C)[i] = (*vec_A)[i] + (*vec_B)[i]; } #define VEC_SIZE 1024 int main(int argc, char **argv) { std::vector vec_A(VEC_SIZE, 2); // all the vector is initialized to 2 std::vector vec_B(VEC_SIZE, 3); // all the vector is initialized to 3 std::vector vec_C(VEC_SIZE, 0); // all the vector is initialized to 0 struct starpu_conf conf; starpu_conf_init(&conf); /* starpu_data_get_user_data cannot work in master-slave */ conf.nmpi_ms = 0; conf.ntcpip_ms = 0; // initialize StarPU with default configuration int ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* StarPU can overwrite object if NUMA transfers are made */ if (starpu_memory_nodes_get_numa_count() > 1) { starpu_shutdown(); return 77; } // StarPU data registering starpu_data_handle_t spu_vec_A; starpu_data_handle_t spu_vec_B; starpu_data_handle_t spu_vec_C; // give the data of the vector to StarPU (C array) starpu_vector_data_register(&spu_vec_A, STARPU_MAIN_RAM, (uintptr_t)&vec_A[0], vec_A.size(), sizeof(char)); starpu_vector_data_register(&spu_vec_B, STARPU_MAIN_RAM, (uintptr_t)&vec_B[0], vec_B.size(), sizeof(char)); starpu_vector_data_register(&spu_vec_C, STARPU_MAIN_RAM, (uintptr_t)&vec_C[0], vec_C.size(), sizeof(char)); // pass the pointer to the C++ vector object to StarPU starpu_data_set_user_data(spu_vec_A, (void*)&vec_A); starpu_data_set_user_data(spu_vec_B, (void*)&vec_B); starpu_data_set_user_data(spu_vec_C, (void*)&vec_C); // create the StarPU codelet starpu_codelet cl; starpu_codelet_init(&cl); cl.cpu_funcs [0] = cpu_kernel_add_vectors; cl.cpu_funcs_name[0] = "cpu_kernel_add_vectors"; cl.nbuffers = 3; cl.modes [0] = STARPU_R; cl.modes [1] = STARPU_R; cl.modes [2] = STARPU_W; cl.name = "add_vectors"; // submit a new StarPU task to execute ret = starpu_task_insert(&cl, STARPU_R, spu_vec_A, STARPU_R, spu_vec_B, STARPU_W, spu_vec_C, 0); if (ret == -ENODEV) { // StarPU data unregistering starpu_data_unregister(spu_vec_C); starpu_data_unregister(spu_vec_B); starpu_data_unregister(spu_vec_A); // terminate StarPU, no task can be submitted after starpu_shutdown(); return 77; } STARPU_CHECK_RETURN_VALUE(ret, "task_submit::add_vectors"); // wait the task starpu_task_wait_for_all(); // StarPU data unregistering starpu_data_unregister(spu_vec_C); starpu_data_unregister(spu_vec_B); starpu_data_unregister(spu_vec_A); // terminate StarPU, no task can be submitted after starpu_shutdown(); // check results bool fail = false; int i = 0; while (!fail && i < VEC_SIZE) fail = vec_C[i++] != 5; if (fail) { #ifdef PRINT_OUTPUT std::cout << "Example failed..." << std::endl; #endif return EXIT_FAILURE; } else { #ifdef PRINT_OUTPUT std::cout << "Example successfully passed!" << std::endl; #endif return EXIT_SUCCESS; } } starpu-1.4.9+dfsg/examples/cpp/add_vectors_cpp11.cpp000066400000000000000000000114451507764646700224220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is a small example of a C++ program using starpu. We here just * add two std::vector without copying them (0 copy). */ #include #include #ifdef PRINT_OUTPUT #include #endif #include #if !defined(STARPU_HAVE_CXX11) int main(int argc, char **argv) { return 77; } #else void cpu_kernel_add_vectors(void *buffers[], void *cl_arg) { // get the current task auto task = starpu_task_get_current(); // get the user data (pointers to the vec_A, vec_B, vec_C std::vector) auto u_data0 = starpu_data_get_user_data(task->handles[0]); assert(u_data0); auto u_data1 = starpu_data_get_user_data(task->handles[1]); assert(u_data1); auto u_data2 = starpu_data_get_user_data(task->handles[2]); assert(u_data2); // cast void* in std::vector* auto vec_A = static_cast*>(u_data0); auto vec_B = static_cast*>(u_data1); auto vec_C = static_cast*>(u_data2); // all the std::vector have to have the same size assert(vec_A->size() == vec_B->size() && vec_B->size() == vec_C->size()); // performs the vector addition (vec_C[] = vec_A[] + vec_B[]) for (size_t i = 0; i < vec_C->size(); i++) (*vec_C)[i] = (*vec_A)[i] + (*vec_B)[i]; } int main(int argc, char **argv) { constexpr int vec_size = 1024; std::vector vec_A(vec_size, 2); // all the vector is initialized to 2 std::vector vec_B(vec_size, 3); // all the vector is initialized to 3 std::vector vec_C(vec_size, 0); // all the vector is initialized to 0 struct starpu_conf conf; starpu_conf_init(&conf); /* starpu_data_get_user_data cannot work in master-slave */ conf.nmpi_ms = 0; conf.ntcpip_ms = 0; // initialize StarPU with default configuration auto ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_memory_nodes_get_numa_count() > 1) { starpu_shutdown(); return 77; } // StarPU data registering starpu_data_handle_t spu_vec_A; starpu_data_handle_t spu_vec_B; starpu_data_handle_t spu_vec_C; // give the data of the vector to StarPU (C array) starpu_vector_data_register(&spu_vec_A, STARPU_MAIN_RAM, (uintptr_t)vec_A.data(), vec_A.size(), sizeof(char)); starpu_vector_data_register(&spu_vec_B, STARPU_MAIN_RAM, (uintptr_t)vec_B.data(), vec_B.size(), sizeof(char)); starpu_vector_data_register(&spu_vec_C, STARPU_MAIN_RAM, (uintptr_t)vec_C.data(), vec_C.size(), sizeof(char)); // pass the pointer to the C++ vector object to StarPU starpu_data_set_user_data(spu_vec_A, (void*)&vec_A); starpu_data_set_user_data(spu_vec_B, (void*)&vec_B); starpu_data_set_user_data(spu_vec_C, (void*)&vec_C); // create the StarPU codelet starpu_codelet cl; starpu_codelet_init(&cl); cl.cpu_funcs [0] = cpu_kernel_add_vectors; cl.cpu_funcs_name[0] = "cpu_kernel_add_vectors"; cl.nbuffers = 3; cl.modes [0] = STARPU_R; cl.modes [1] = STARPU_R; cl.modes [2] = STARPU_W; cl.name = "add_vectors"; // submit a new StarPU task to execute ret = starpu_task_insert(&cl, STARPU_R, spu_vec_A, STARPU_R, spu_vec_B, STARPU_W, spu_vec_C, 0); if (ret == -ENODEV) { // StarPU data unregistering starpu_data_unregister(spu_vec_C); starpu_data_unregister(spu_vec_B); starpu_data_unregister(spu_vec_A); // terminate StarPU, no task can be submitted after starpu_shutdown(); return 77; } STARPU_CHECK_RETURN_VALUE(ret, "task_submit::add_vectors"); // wait the task starpu_task_wait_for_all(); // StarPU data unregistering starpu_data_unregister(spu_vec_C); starpu_data_unregister(spu_vec_B); starpu_data_unregister(spu_vec_A); // terminate StarPU, no task can be submitted after starpu_shutdown(); // check results auto fail = false; auto i = 0; while (!fail && i < vec_size) fail = vec_C[i++] != 5; if (fail) { #ifdef PRINT_OUTPUT std::cout << "Example failed..." << std::endl; #endif return EXIT_FAILURE; } else { #ifdef PRINT_OUTPUT std::cout << "Example successfully passed!" << std::endl; #endif return EXIT_SUCCESS; } } #endif starpu-1.4.9+dfsg/examples/cpp/add_vectors_interface.cpp000066400000000000000000000440401507764646700234330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is a small example of a C++ program using STL and starpu. We here just * add two std::vector with duplicating vectors. StarPU achieves data * transfers between objects. */ #if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNU_MINOR < 9)) int main(int argc, char **argv) { return 77; } #else #include #include #ifdef PRINT_OUTPUT #include #endif #include #define MY_TYPE char, my_allocator /* create an allocator to put data on the correct NUMA node */ template class my_allocator { public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef T value_type; my_allocator() { this->node = STARPU_MAIN_RAM; } my_allocator(const my_allocator& a) { node = a.get_node(); } explicit my_allocator(const unsigned thenode) { this->node = thenode; } pointer allocate(size_type n, const void * = 0) { T* t = (T*) starpu_malloc_on_node(this->node, n * sizeof(T)); return t; } void deallocate(void* p, size_type n) { if (p) { starpu_free_on_node(this->node, (uintptr_t) p, n * sizeof(T)); } } unsigned get_node() const { return node; } pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } my_allocator& operator=(const my_allocator&ref) { node = ref.node; return *this; } void construct(pointer p, const T& val) { new ((T*) p) T(val); } void destroy(pointer p) { p->~T(); } size_type max_size() const { return size_type(-1); } template struct rebind { typedef my_allocator other; }; template explicit my_allocator(const my_allocator&ref) { node = ref.node; } template my_allocator& operator=(const my_allocator&ref) { node = ref.node; return *this; } private: unsigned node; }; /* * Create a new interface to catch C++ vector and make appropriate data transfers */ struct vector_cpp_interface { enum starpu_data_interface_id id; uintptr_t ptr; uint32_t nx; size_t elemsize; std::vector* vec; uint32_t slice_base; }; #define VECTOR_CPP_GET_VEC(interface) ({ (((struct vector_cpp_interface *)(interface))->vec); }) static int vector_interface_copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); #if __cplusplus >= 201103L static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s = { .can_copy = NULL, .ram_to_ram = NULL, .ram_to_cuda = NULL, .ram_to_hip = NULL, .ram_to_opencl = NULL, .ram_to_max_fpga = NULL, .cuda_to_ram = NULL, .cuda_to_cuda = NULL, .hip_to_ram = NULL, .hip_to_hip = NULL, .opencl_to_ram = NULL, .opencl_to_opencl = NULL, .max_fpga_to_ram = NULL, .ram_to_cuda_async = NULL, .cuda_to_ram_async = NULL, .cuda_to_cuda_async = NULL, .ram_to_hip_async = NULL, .hip_to_ram_async = NULL, .hip_to_hip_async = NULL, .ram_to_opencl_async = NULL, .opencl_to_ram_async = NULL, .opencl_to_opencl_async = NULL, .ram_to_max_fpga_async = NULL, .max_fpga_to_ram_async = NULL, .any_to_any = vector_interface_copy_any_to_any, }; #else static const struct starpu_data_copy_methods vector_cpp_copy_data_methods_s = { NULL, // can_copy NULL, // ram_to_ram NULL, // ram_to_cuda NULL, // ram_to_hip NULL, // ram_to_opencl NULL, // ram_to_max_fpga NULL, // cuda_to_ram NULL, // cuda_to_cuda NULL, // hip_to_ram NULL, // hip_to_hip NULL, // opencl_to_ram NULL, // opencl_to_opencl NULL, // max_fpga_to_ram NULL, // ram_to_cuda_async NULL, // cuda_to_ram_async NULL, // cuda_to_cuda_async NULL, // ram_to_hip_async NULL, // hip_to_ram_async NULL, // hip_to_hip_async NULL, // ram_to_opencl_async NULL, // opencl_to_ram_async NULL, // opencl_to_opencl_async NULL, // ram_to_max_fpga_asyn NULL, // max_fpga_to_ram_asyn vector_interface_copy_any_to_any, }; #endif static void register_vector_cpp_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static starpu_ssize_t allocate_vector_cpp_buffer_on_node(void *data_interface_, unsigned dst_node); static void *vector_cpp_to_pointer(void *data_interface, unsigned node); static void free_vector_cpp_buffer_on_node(void *data_interface, unsigned node); static void free_vector_cpp_buffer_on_node(void *data_interface, unsigned node); static size_t vector_cpp_interface_get_size(starpu_data_handle_t handle); static uint32_t footprint_vector_cpp_interface_crc32(starpu_data_handle_t handle); static int vector_cpp_compare(void *data_interface_a, void *data_interface_b); static void display_vector_cpp_interface(starpu_data_handle_t handle, FILE *f); static int pack_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static starpu_ssize_t vector_cpp_describe(void *data_interface, char *buf, size_t size); #if __cplusplus >= 201103L static struct starpu_data_interface_ops interface_vector_cpp_ops = { .register_data_handle = register_vector_cpp_handle, .unregister_data_handle = NULL, .allocate_data_on_node = allocate_vector_cpp_buffer_on_node, .free_data_on_node = free_vector_cpp_buffer_on_node, .cache_data_on_node = NULL, .reuse_data_on_node = NULL, .map_data = NULL, .unmap_data = NULL, .update_map = NULL, .init = NULL, .copy_methods = &vector_cpp_copy_data_methods_s, .handle_to_pointer = NULL, .to_pointer = vector_cpp_to_pointer, .get_size = vector_cpp_interface_get_size, .get_alloc_size = NULL, .get_max_size = NULL, .footprint = footprint_vector_cpp_interface_crc32, .alloc_footprint = NULL, .compare = vector_cpp_compare, .alloc_compare = NULL, .display = display_vector_cpp_interface, .describe = vector_cpp_describe, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct vector_cpp_interface), .is_multiformat = 0, .dontcache = 0, .get_mf_ops = NULL, .pack_data = pack_vector_cpp_handle, .peek_data = peek_vector_cpp_handle, .unpack_data = unpack_vector_cpp_handle, .pack_meta = NULL, .unpack_meta = NULL, .free_meta = NULL, .name = (char *) "VECTOR_CPP_INTERFACE" }; #else static struct starpu_data_interface_ops interface_vector_cpp_ops = { register_vector_cpp_handle, NULL, allocate_vector_cpp_buffer_on_node, free_vector_cpp_buffer_on_node, NULL, NULL, NULL, NULL, NULL, &vector_cpp_copy_data_methods_s, vector_cpp_to_pointer, vector_cpp_interface_get_size, NULL, NULL, footprint_vector_cpp_interface_crc32, NULL, vector_cpp_compare, NULL, display_vector_cpp_interface, vector_cpp_describe, STARPU_UNKNOWN_INTERFACE_ID, sizeof(struct vector_cpp_interface), 0, 0, NULL, pack_vector_cpp_handle, peek_vector_cpp_handle, unpack_vector_cpp_handle, NULL, NULL, NULL, (char *) "VECTOR_CPP_INTERFACE" }; #endif static void *vector_cpp_to_pointer(void *data_interface, unsigned node) { (void) node; struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface; return (void*) vector_interface->ptr; } static void register_vector_cpp_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct vector_cpp_interface *local_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = vector_interface->ptr; local_interface->vec = vector_interface->vec; } else { local_interface->ptr = 0; local_interface->vec = NULL; } local_interface->id = vector_interface->id; local_interface->nx = vector_interface->nx; local_interface->elemsize = vector_interface->elemsize; local_interface->slice_base = vector_interface->slice_base; } } /* declare a new data with the vector interface */ void vector_cpp_data_register(starpu_data_handle_t *handleptr, int home_node, std::vector* vec, uint32_t nx, size_t elemsize) { #if __cplusplus >= 201103L struct vector_cpp_interface vector = { .id = STARPU_UNKNOWN_INTERFACE_ID, .ptr = (uintptr_t) &(*vec)[0], .nx = nx, .elemsize = elemsize, .vec = vec, .slice_base = 0 }; #else struct vector_cpp_interface vector = { STARPU_UNKNOWN_INTERFACE_ID, (uintptr_t) &(*vec)[0], (uintptr_t) &(*vec)[0], 0, nx, elemsize, vec, 0 }; #endif starpu_data_register(handleptr, home_node, &vector, &interface_vector_cpp_ops); } /* offer an access to the data parameters */ uint32_t vector_cpp_get_nx(starpu_data_handle_t handle) { struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return vector_interface->nx; } static uint32_t footprint_vector_cpp_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(vector_cpp_get_nx(handle), 0); } static int vector_cpp_compare(void *data_interface_a, void *data_interface_b) { struct vector_cpp_interface *vector_a = (struct vector_cpp_interface *) data_interface_a; struct vector_cpp_interface *vector_b = (struct vector_cpp_interface *) data_interface_b; /* Two vectors are considered compatible if they have the same size */ return ((vector_a->nx == vector_b->nx) && (vector_a->elemsize == vector_b->elemsize)); } static void display_vector_cpp_interface(starpu_data_handle_t handle, FILE *f) { struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%u\t", vector_interface->nx); } static int pack_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, node); *count = vector_interface->nx*vector_interface->elemsize; if (ptr != NULL) { *ptr = (void*) starpu_malloc_on_node_flags(node, *count, 0); memcpy(*ptr, (void*)vector_interface->ptr, vector_interface->elemsize*vector_interface->nx); } return 0; } static int peek_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == vector_interface->elemsize * vector_interface->nx); memcpy((void*)vector_interface->ptr, ptr, count); return 0; } static int unpack_vector_cpp_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { peek_vector_cpp_handle(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); return 0; } static size_t vector_cpp_interface_get_size(starpu_data_handle_t handle) { size_t size; struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); size = vector_interface->nx*vector_interface->elemsize; return size; } size_t vector_cpp_get_elemsize(starpu_data_handle_t handle) { struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return vector_interface->elemsize; } /* memory allocation/deallocation primitives for the vector interface */ /* returns the size of the allocated area */ static starpu_ssize_t allocate_vector_cpp_buffer_on_node(void *data_interface_, unsigned dst_node) { struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface_; uint32_t nx = vector_interface->nx; size_t elemsize = vector_interface->elemsize; starpu_ssize_t allocated_memory; const my_allocator allocator(dst_node); std::vector * vec = new std::vector(nx, 0, allocator); vector_interface->vec = vec; if (!vector_interface->vec) return -ENOMEM; allocated_memory = nx*elemsize; /* update the data properly in consequence */ vector_interface->ptr = (uintptr_t) &((*vec)[0]); return allocated_memory; } static void free_vector_cpp_buffer_on_node(void *data_interface, unsigned node) { struct vector_cpp_interface *vector_interface = (struct vector_cpp_interface *) data_interface; delete vector_interface->vec; vector_interface->vec = NULL; vector_interface->ptr = 0; } static int vector_interface_copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct vector_cpp_interface *src_vector = (struct vector_cpp_interface *) src_interface; struct vector_cpp_interface *dst_vector = (struct vector_cpp_interface *) dst_interface; int ret; ret = starpu_interface_copy(src_vector->ptr, 0, src_node, dst_vector->ptr, 0, dst_node, src_vector->nx*src_vector->elemsize, async_data); return ret; } static starpu_ssize_t vector_cpp_describe(void *data_interface, char *buf, size_t size) { struct vector_cpp_interface *vector = (struct vector_cpp_interface *) data_interface; return snprintf(buf, size, "V%ux%u", (unsigned) vector->nx, (unsigned) vector->elemsize); } /* * End of interface */ /* Kernel using STL objects */ void cpu_kernel_add_vectors(void *buffers[], void *cl_arg) { std::vector* vec_A = VECTOR_CPP_GET_VEC(buffers[0]); std::vector* vec_B = VECTOR_CPP_GET_VEC(buffers[1]); std::vector* vec_C = VECTOR_CPP_GET_VEC(buffers[2]); // all the std::vector have to have the same size assert(vec_A->size() == vec_B->size() && vec_B->size() == vec_C->size()); // performs the vector addition (vec_C[] = vec_A[] + vec_B[]) for (size_t i = 0; i < vec_C->size(); i++) (*vec_C)[i] = (*vec_A)[i] + (*vec_B)[i]; } #define VEC_SIZE 1024 int main(int argc, char **argv) { struct starpu_conf conf; bool fail; starpu_conf_init(&conf); /* _starpu_src_common_execute_kernel doesn't support this yet */ conf.nmpi_ms = 0; conf.ntcpip_ms = 0; // initialize StarPU with default configuration int ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); { /* Test data transfers between NUMA nodes if available */ unsigned last_numa_node = starpu_memory_nodes_get_numa_count() - 1; const my_allocator allocator_main_ram(STARPU_MAIN_RAM); const my_allocator allocator_last_numa(last_numa_node); std::vector vec_A(VEC_SIZE, 2, allocator_main_ram); // all the vector is initialized to 2 std::vector vec_B(VEC_SIZE, 3, allocator_main_ram); // all the vector is initialized to 3 std::vector vec_C(VEC_SIZE, 0, allocator_last_numa); // all the vector is initialized to 0 // StarPU data registering starpu_data_handle_t spu_vec_A; starpu_data_handle_t spu_vec_B; starpu_data_handle_t spu_vec_C; // give the data of the vector to StarPU (C array) vector_cpp_data_register(&spu_vec_A, STARPU_MAIN_RAM, &vec_A, vec_A.size(), sizeof(char)); vector_cpp_data_register(&spu_vec_B, STARPU_MAIN_RAM, &vec_B, vec_B.size(), sizeof(char)); vector_cpp_data_register(&spu_vec_C, last_numa_node, &vec_C, vec_C.size(), sizeof(char)); // create the StarPU codelet starpu_codelet cl; starpu_codelet_init(&cl); cl.cpu_funcs [0] = cpu_kernel_add_vectors; cl.cpu_funcs_name[0] = "cpu_kernel_add_vectors"; cl.nbuffers = 3; cl.modes [0] = STARPU_R; cl.modes [1] = STARPU_R; cl.modes [2] = STARPU_W; cl.name = "add_vectors"; // submit a new StarPU task to execute ret = starpu_task_insert(&cl, STARPU_R, spu_vec_A, STARPU_R, spu_vec_B, STARPU_W, spu_vec_C, 0); if (ret == -ENODEV) { // StarPU data unregistering starpu_data_unregister(spu_vec_C); starpu_data_unregister(spu_vec_B); starpu_data_unregister(spu_vec_A); // terminate StarPU, no task can be submitted after starpu_shutdown(); return 77; } STARPU_CHECK_RETURN_VALUE(ret, "task_submit::add_vectors"); // wait the task starpu_task_wait_for_all(); // StarPU data unregistering starpu_data_unregister(spu_vec_C); starpu_data_unregister(spu_vec_B); starpu_data_unregister(spu_vec_A); // check results fail = false; int i = 0; while (!fail && i < VEC_SIZE) fail = vec_C[i++] != 5; } // terminate StarPU, no task can be submitted after starpu_shutdown(); if (fail) { #ifdef PRINT_OUTPUT std::cout << "Example failed..." << std::endl; #endif return EXIT_FAILURE; } else { #ifdef PRINT_OUTPUT std::cout << "Example successfully passed!" << std::endl; #endif return EXIT_SUCCESS; } } #endif starpu-1.4.9+dfsg/examples/cpp/incrementer_cpp.cpp000066400000000000000000000066771507764646700223110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is a small example of a C++ program using starpu. We here just * increment two values of a vector several times. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #ifdef STARPU_USE_CUDA extern "C" void cuda_codelet(void *descr[], __attribute__ ((unused)) void *_args); #endif #ifdef STARPU_USE_OPENCL extern "C" void opencl_codelet(void *descr[], __attribute__ ((unused)) void *_args); struct starpu_opencl_program opencl_program; #endif extern "C" void cpu_codelet(void *descr[], __attribute__ ((unused)) void *_args) { float *val = (float *)STARPU_VECTOR_GET_PTR(descr[0]); val[0] += 1.0f; val[1] += 1.0f; } int main(int argc, char **argv) { int ret = 0; starpu_data_handle_t float_array_handle; float float_array[4] __attribute__ ((aligned (16))) = { 0.0f, 0.0f, 0.0f, 0.0f}; struct starpu_codelet cl; unsigned i; unsigned niter = 50; struct starpu_conf conf; starpu_conf_init(&conf); ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&float_array, 4, sizeof(float)); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/incrementer/incrementer_kernels_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_codelet_init(&cl); cl.cpu_funcs[0] = cpu_codelet; cl.cpu_funcs_name[0] = "cpu_codelet"; #ifdef STARPU_USE_CUDA cl.cuda_funcs[0] = cuda_codelet; cl.cuda_flags[0] = STARPU_CUDA_ASYNC; #endif #ifdef STARPU_USE_OPENCL cl.opencl_funcs[0] = opencl_codelet; cl.opencl_flags[0] = STARPU_OPENCL_ASYNC; #endif cl.nbuffers = 1; cl.modes[0] = STARPU_RW; cl.name = "incrementer"; for (i = 0; i < niter; i++) { ret = starpu_task_insert(&cl, STARPU_RW, float_array_handle, STARPU_TAG_ONLY, (starpu_tag_t) i, 0); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(77); } } starpu_task_wait_for_all(); /* update the array in RAM */ starpu_data_unregister(float_array_handle); FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0], float_array[1], float_array[2], float_array[3]); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3]) { FPRINTF(stderr, "Incorrect result\n"); return EXIT_FAILURE; } return EXIT_SUCCESS; } starpu-1.4.9+dfsg/examples/dependency/000077500000000000000000000000001507764646700177445ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/dependency/sequential_consistency.c000066400000000000000000000114311507764646700247030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_codeletA(void *descr[], void *args); void cpu_codeletB(void *descr[], void *args); void cpu_codeletC(void *descr[], void *args); struct starpu_codelet clA = { .cpu_funcs = {cpu_codeletA}, .cpu_funcs_name = {"cpu_codeletA"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codeletA" }; struct starpu_codelet clB = { .cpu_funcs = {cpu_codeletB}, .cpu_funcs_name = {"cpu_codeletB"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codeletB" }; struct starpu_codelet clC = { .cpu_funcs = {cpu_codeletC}, .cpu_funcs_name = {"cpu_codeletC"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codeletC" }; void cpu_codeletA(void *descr[], void *args) { int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); starpu_data_handle_t value_handle; starpu_tag_t tagHoldC; int ret; unsigned char handle_sequential_consistency[] = {0}; FPRINTF(stderr, "[Task A] Value = %d\n", *val); starpu_codelet_unpack_args(args, &value_handle, &tagHoldC); // With several data, one would need to use a dynamically // allocated array for the sequential consistency, // the array could be freed immediately after calling // starpu_task_insert() ret = starpu_task_insert(&clB, STARPU_RW, value_handle, STARPU_CALLBACK_WITH_ARG_NFREE, starpu_tag_notify_from_apps, tagHoldC, STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handle_sequential_consistency, STARPU_NAME, "taskB", 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); *val *= 2; } void cpu_codeletB(void *descr[], void *args) { (void)args; int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); FPRINTF(stderr, "[Task B] Value = %d\n", *val); STARPU_ASSERT_MSG(*val == 24, "Incorrect value %d (expected 24)\n", *val); *val += 1; } void cpu_codeletC(void *descr[], void *args) { (void)args; int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); FPRINTF(stderr, "[Task C] Value = %d\n", *val); STARPU_ASSERT_MSG(*val == 25, "Incorrect value %d (expected 25)\n", *val); *val *= 2; } /* * Submit taskA and hold it * Submit taskC and hold it * Release taskA * Execute taskA --> submit taskB * Execute taskB --> callback: release taskC * * All three tasks use the same data in RW, taskB is submitted after * taskC, so taskB should normally only execute after taskC but as the * sequential consistency for (taskB, data) is unset, taskB can * execute straight away */ int main(void) { int value=12; int ret; starpu_data_handle_t value_handle; starpu_tag_t tagHoldA = 42; starpu_tag_t tagHoldC = 84; starpu_tag_t tagA = 421; starpu_tag_t tagC = 842; struct starpu_conf conf; if (sizeof(starpu_tag_t) > sizeof(void*)) { // Can't pass a tag_t through callback arg :/ return 77; } starpu_conf_init(&conf); conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (STARPU_UNLIKELY(ret == -ENODEV)) { return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() < 1) { FPRINTF(stderr, "This application requires at least 1 cpu worker\n"); starpu_shutdown(); return 77; } starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); starpu_tag_declare_deps_array(tagA, 1, &tagHoldA); starpu_tag_declare_deps_array(tagC, 1, &tagHoldC); ret = starpu_task_insert(&clA, STARPU_TAG, tagA, STARPU_RW, value_handle, STARPU_VALUE, &value_handle, sizeof(starpu_data_handle_t), STARPU_VALUE, &tagHoldC, sizeof(starpu_tag_t), STARPU_NAME, "taskA", 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&clC, STARPU_TAG, tagC, STARPU_RW, value_handle, STARPU_NAME, "taskC", 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); // Release taskA (we want to make sure it will execute after taskC has been submitted) starpu_tag_notify_from_apps(tagHoldA); starpu_data_unregister(value_handle); STARPU_ASSERT_MSG(value == 50, "Incorrect value %d (expected 50)\n", value); starpu_shutdown(); FPRINTF(stderr, "Value = %d\n", value); return ret; } starpu-1.4.9+dfsg/examples/dependency/task_end_dep.c000066400000000000000000000062551507764646700225400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This shows how to defer termination of a task until the termination of * another task. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define INIT 12 void cpu_codelet2(void *descr[], void *args) { int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); (void)args; STARPU_ASSERT(*val == 2*INIT); starpu_sleep(0.1); STARPU_ASSERT(*val == 2*INIT); *val *= 2; } struct starpu_codelet cl2 = { .cpu_funcs = {cpu_codelet2}, .cpu_funcs_name = {"cpu_codelet2"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codelet2" }; void cpu_codelet(void *descr[], void *args) { (void)args; int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); struct starpu_task *task = starpu_task_get_current(); int ret; ret = starpu_task_insert(&cl2, STARPU_RW, task->handles[0], STARPU_TASK_END_DEPS_ARRAY, 1, &task, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); STARPU_ASSERT(*val == INIT); starpu_sleep(0.1); STARPU_ASSERT(*val == INIT); *val *= 2; } struct starpu_codelet cl = { .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codelet" }; int main(void) { int value=INIT; int ret; starpu_data_handle_t value_handle; struct starpu_conf conf; struct starpu_task *task; starpu_conf_init(&conf); conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (STARPU_UNLIKELY(ret == -ENODEV)) { return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() < 1) { FPRINTF(stderr, "This application requires at least 1 cpu worker\n"); starpu_shutdown(); return 77; } starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); task = starpu_task_build(&cl, STARPU_RW, value_handle, 0); STARPU_ASSERT(task); task->detach = 0; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_data_set_sequential_consistency_flag(value_handle, 0); starpu_data_acquire_on_node(value_handle, STARPU_MAIN_RAM, STARPU_R); /* Waiting for the main task should have also waited for the subtask */ STARPU_ASSERT(value == 2*2*INIT); starpu_data_release_on_node(value_handle, STARPU_MAIN_RAM); starpu_data_unregister(value_handle); STARPU_ASSERT(value == 2*2*INIT); starpu_shutdown(); FPRINTF(stderr, "Value = %d\n", value); return ret; } starpu-1.4.9+dfsg/examples/dependency/task_end_dep_add.c000066400000000000000000000050031507764646700233360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This shows how to defer termination of a task thanks to * starpu_task_end_dep_add. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define INIT 12 void cpu_codelet2(void *descr[], void *args) { (void)descr; (void)args; } struct starpu_codelet cl2 = { .cpu_funcs = {cpu_codelet2}, .cpu_funcs_name = {"cpu_codelet2"}, .name = "codelet2" }; void cpu_codelet(void *descr[], void *args) { (void)args; int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); struct starpu_task *task; int ret; task = starpu_task_get_current(); starpu_task_end_dep_add(task, 1); ret = starpu_task_insert(&cl2, STARPU_CALLBACK_WITH_ARG_NFREE, starpu_task_end_dep_release, task, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); STARPU_ASSERT(*val == INIT); *val *= 2; } struct starpu_codelet cl = { .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codelet" }; int main(void) { int value=INIT; int ret; starpu_data_handle_t value_handle; struct starpu_conf conf; starpu_conf_init(&conf); conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (STARPU_UNLIKELY(ret == -ENODEV)) { return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() < 1) { FPRINTF(stderr, "This application requires at least 1 cpu worker\n"); starpu_shutdown(); return 77; } starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); ret = starpu_task_insert(&cl, STARPU_RW, value_handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_unregister(value_handle); STARPU_ASSERT(value == 2*INIT); starpu_shutdown(); FPRINTF(stderr, "Value = %d\n", value); return ret; } starpu-1.4.9+dfsg/examples/filters/000077500000000000000000000000001507764646700172765ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/filters/alloc.c000066400000000000000000000054301507764646700205360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpu.h" #define NPARTS 4 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void init_cpu(void* buffers[], void *args) { double *v = (double*)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned nx = STARPU_VECTOR_GET_NX(buffers[0]); unsigned i; for (i=0; i #include "custom_types.h" #include "custom_interface.h" static __global__ void custom_cuda(struct point *aop, unsigned n, float *x, float *y) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) { x[i] = aop[i].x; y[i] = aop[i].y; } } extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args) { (void) _args; unsigned int n = CUSTOM_GET_NX(buffers[0]); float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]); float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]); struct point *aop; aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; custom_cuda<<>>(aop, n, x, y); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/custom_mf/conversion_opencl.c000066400000000000000000000045411507764646700251670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "custom_types.h" #include "custom_interface.h" extern struct starpu_opencl_program _opencl_conversion_program; void cpu_to_opencl_opencl_func(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; unsigned n = CUSTOM_GET_NX(buffers[0]); n*=2; struct point *aop; aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_conversion_program, "custom_opencl_conversion", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); void *x = CUSTOM_GET_OPENCL_X_PTR(buffers[0]); if (starpu_opencl_set_kernel_args(&err, &kernel, sizeof(aop), &aop, sizeof(x), &x, sizeof(n), &n, 0) != 3) { STARPU_OPENCL_REPORT_ERROR(err); assert(0); } { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel( queue, kernel, 1, /* work_dim */ NULL, /* global_work_offset */ &global, /* global_work_size */ &local, /* local_work_size */ 0, /* num_events_in_wait_list */ NULL, /* event_wait_list */ NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/filters/custom_mf/conversion_opencl.cl000066400000000000000000000020061507764646700253350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "custom_types.h" /* * The first n/2 values of x are actual xs. The last N/2 values are ys. */ __kernel void custom_opencl_conversion(__global struct point *aop, __global float *x, int nx) { const int i = get_global_id(0); if (i < nx/2) x[i] = aop[i].x; else if (i < nx) x[i] = aop[i-nx/2].y; } starpu-1.4.9+dfsg/examples/filters/custom_mf/cuda.cu000066400000000000000000000027071507764646700225450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "custom_types.h" #include "custom_interface.h" static __global__ void scal_cuda(unsigned n, float *x, float *y) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) x[i] *= y[i]; } extern "C" void custom_scal_cuda_func(void *buffers[], void *_args) { (void) _args; unsigned int n = CUSTOM_GET_NX(buffers[0]); float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]); float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; scal_cuda<<>>(n, x, y); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/custom_mf/custom_conversion_codelets.c000066400000000000000000000045151507764646700271040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "custom_interface.h" #include "custom_types.h" #ifdef STARPU_USE_CUDA void cuda_to_cpu(void *buffers[], void *arg) { (void)arg; int n = CUSTOM_GET_NX(buffers[0]); float *x = (float*) CUSTOM_GET_X_PTR(buffers[0]); float *y = (float*) CUSTOM_GET_Y_PTR(buffers[0]); struct point *aop; aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); int i; for (i = 0; i < n; i++) { aop[i].x = x[i]; aop[i].y = y[i]; } return; } extern void cpu_to_cuda_cuda_func(void *buffers[], void *args); struct starpu_codelet cpu_to_cuda_cl = { .cuda_funcs = {cpu_to_cuda_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, .modes = { STARPU_RW }, .nbuffers = 1, .name = "codelet_cpu_to_cuda" }; struct starpu_codelet cuda_to_cpu_cl = { .cpu_funcs = {cuda_to_cpu}, .modes = { STARPU_RW }, .nbuffers = 1, .name = "codelet_cuda_to_cpu" }; #endif #ifdef STARPU_USE_OPENCL void opencl_to_cpu_cpu_func(void *buffers[], void *arg) { (void)arg; int n = CUSTOM_GET_NX(buffers[0]); float *x = (float *) CUSTOM_GET_OPENCL_X_PTR(buffers[0]); struct point *aop; aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); int i; for (i = 0; i < n; i++) { aop[i].x = x[i]; aop[i].y = x[i+n]; } } extern void cpu_to_opencl_opencl_func(void *buffers[], void *arg); struct starpu_codelet cpu_to_opencl_cl = { .opencl_funcs = { cpu_to_opencl_opencl_func }, .opencl_flags = {STARPU_OPENCL_ASYNC}, .modes = { STARPU_RW }, .nbuffers = 1, .name = "codelet_cpu_to_opencl" }; struct starpu_codelet opencl_to_cpu_cl = { .cpu_funcs = { opencl_to_cpu_cpu_func }, .modes = { STARPU_RW }, .nbuffers = 1, .name = "codelet_opencl_to_cpu" }; #endif /* !STARPU_USE_OPENCL */ starpu-1.4.9+dfsg/examples/filters/custom_mf/custom_interface.c000066400000000000000000000337501507764646700250000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "custom_interface.h" #include "custom_types.h" #ifdef STARPU_USE_CUDA static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream); static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream); static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream); #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event); static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event); #endif /* !STARPU_USE_OPENCL */ static const struct starpu_data_copy_methods custom_copy_data_methods_s = { .ram_to_ram = NULL, #ifdef STARPU_USE_CUDA .ram_to_cuda = NULL, .cuda_to_ram = NULL, .ram_to_cuda_async = copy_ram_to_cuda_async, .cuda_to_ram_async = copy_cuda_to_ram_async, .cuda_to_cuda = copy_cuda_to_cuda, .cuda_to_cuda_async = copy_cuda_to_cuda_async, #endif #ifdef STARPU_USE_OPENCL .ram_to_opencl = copy_ram_to_opencl, .opencl_to_ram = copy_opencl_to_ram, .opencl_to_opencl = copy_opencl_to_opencl, .ram_to_opencl_async = copy_ram_to_opencl_async, .opencl_to_ram_async = copy_opencl_to_ram_async, #endif }; static void register_custom_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static starpu_ssize_t allocate_custom_buffer_on_node(void *data_interface_, unsigned dst_node); static void* custom_to_pointer(void *data_interface, unsigned node); static void free_custom_buffer_on_node(void *data_interface, unsigned node); static size_t custom_interface_get_size(starpu_data_handle_t handle); static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle); static void display_custom_interface(starpu_data_handle_t handle, FILE *f); static uint32_t custom_get_nx(starpu_data_handle_t handle); static struct starpu_multiformat_data_interface_ops*get_mf_ops(void *data_interface) { struct custom_data_interface *custom; custom = (struct custom_data_interface *) data_interface; return custom->ops; } static struct starpu_data_interface_ops interface_custom_ops = { .register_data_handle = register_custom_handle, .allocate_data_on_node = allocate_custom_buffer_on_node, .to_pointer = custom_to_pointer, .free_data_on_node = free_custom_buffer_on_node, .copy_methods = &custom_copy_data_methods_s, .get_size = custom_interface_get_size, .footprint = footprint_custom_interface_crc32, .compare = NULL, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct custom_data_interface), .display = display_custom_interface, .is_multiformat = 1, .get_mf_ops = get_mf_ops }; static void register_custom_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct custom_data_interface *custom_interface; custom_interface = (struct custom_data_interface *) data_interface; int node; int nnodes = starpu_memory_nodes_get_count(); for (node = 0; node < nnodes; node++) { struct custom_data_interface *local_interface = (struct custom_data_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->cpu_ptr = custom_interface->cpu_ptr; #ifdef STARPU_USE_CUDA local_interface->cuda_ptr = custom_interface->cuda_ptr; #endif #ifdef STARPU_USE_OPENCL local_interface->opencl_ptr = custom_interface->opencl_ptr; #endif } else { local_interface->cpu_ptr = NULL; #ifdef STARPU_USE_CUDA local_interface->cuda_ptr = NULL; #endif #ifdef STARPU_USE_OPENCL local_interface->opencl_ptr = NULL; #endif } local_interface->nx = custom_interface->nx; local_interface->ops = custom_interface->ops; } } static starpu_ssize_t allocate_custom_buffer_on_node(void *data_interface, unsigned node) { starpu_ssize_t size = 0; struct custom_data_interface *custom_interface; custom_interface = (struct custom_data_interface *) data_interface; size = custom_interface->nx * custom_interface->ops->cpu_elemsize; custom_interface->cpu_ptr = (void*) starpu_malloc_on_node(node, size); if (!custom_interface->cpu_ptr) goto fail_cpu; #ifdef STARPU_USE_CUDA custom_interface->cuda_ptr = (void*) starpu_malloc_on_node(node, size); if (!custom_interface->cuda_ptr) goto fail_cuda; #endif #ifdef STARPU_USE_OPENCL custom_interface->opencl_ptr = (void*) starpu_malloc_on_node(node, size); if (!custom_interface->opencl_ptr) goto fail_opencl; #endif return size #ifdef STARPU_USE_CUDA +size #endif #ifdef STARPU_USE_OPENCL +size #endif ; #ifdef STARPU_USE_OPENCL fail_opencl: #ifdef STARPU_USE_CUDA starpu_free_on_node(node, (uintptr_t) custom_interface->cuda_ptr, size); #endif #endif #ifdef STARPU_USE_CUDA fail_cuda: #endif starpu_free_on_node(node, (uintptr_t) custom_interface->cpu_ptr, size); fail_cpu: return -ENOMEM; } static void free_custom_buffer_on_node(void *data_interface, unsigned node) { struct custom_data_interface *custom_interface = (struct custom_data_interface *) data_interface; size_t size = custom_interface->nx * custom_interface->ops->cpu_elemsize; starpu_free_on_node(node, (uintptr_t) custom_interface->cpu_ptr, size); custom_interface->cpu_ptr = NULL; #ifdef STARPU_USE_CUDA starpu_free_on_node(node, (uintptr_t) custom_interface->cuda_ptr, size); custom_interface->cuda_ptr = NULL; #endif #ifdef STARPU_USE_OPENCL starpu_free_on_node(node, (uintptr_t) custom_interface->opencl_ptr, size); custom_interface->opencl_ptr = NULL; #endif } static void* custom_to_pointer(void *data, unsigned node) { struct custom_data_interface *data_interface = data; switch(starpu_node_get_kind(node)) { case STARPU_CPU_RAM: return data_interface->cpu_ptr; #ifdef STARPU_USE_CUDA case STARPU_CUDA_RAM: return data_interface->cuda_ptr; #endif #ifdef STARPU_USE_OPENCL case STARPU_OPENCL_RAM: return data_interface->opencl_ptr; #endif default: assert(0); } } static size_t custom_interface_get_size(starpu_data_handle_t handle) { size_t size; struct custom_data_interface *data_interface; data_interface = (struct custom_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); size = data_interface->nx * data_interface->ops->cpu_elemsize; return size; } static uint32_t footprint_custom_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(custom_get_nx(handle), 0); } static void display_custom_interface(starpu_data_handle_t handle, FILE *f) { struct custom_data_interface *ci = (struct custom_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "Custom interface of size %u", ci->nx); } static uint32_t custom_get_nx(starpu_data_handle_t handle) { struct custom_data_interface *data_interface; data_interface = (struct custom_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return data_interface->nx; } void custom_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, uint32_t nx, struct starpu_multiformat_data_interface_ops *format_ops) { struct custom_data_interface custom = { .cpu_ptr = ptr, #ifdef STARPU_USE_CUDA .cuda_ptr = NULL, #endif #ifdef STARPU_USE_OPENCL .opencl_ptr = NULL, #endif .nx = nx, .ops = format_ops }; starpu_data_register(handle, home_node, &custom, &interface_custom_ops); } #ifdef STARPU_USE_CUDA static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream, enum cudaMemcpyKind kind) { (void)src_node; (void)dst_node; struct custom_data_interface *src_custom, *dst_custom; src_custom = (struct custom_data_interface *) src_interface; dst_custom = (struct custom_data_interface *) dst_interface; starpu_ssize_t size = 0; cudaError_t err; switch (kind) { case cudaMemcpyHostToDevice: { size = src_custom->nx * src_custom->ops->cpu_elemsize; if (dst_custom->cpu_ptr == NULL) { err = cudaMalloc(&dst_custom->cpu_ptr, size); assert(err == cudaSuccess); } err = cudaMemcpyAsync(dst_custom->cpu_ptr, src_custom->cpu_ptr, size, kind, stream); assert(err == cudaSuccess); err = cudaMalloc(&dst_custom->cuda_ptr, size); assert(err == cudaSuccess); break; } case cudaMemcpyDeviceToHost: size = 2*src_custom->nx*sizeof(float); if (dst_custom->cuda_ptr == NULL) { dst_custom->cuda_ptr = malloc(size); if (dst_custom->cuda_ptr == NULL) return -ENOMEM; } err = cudaMemcpyAsync(dst_custom->cuda_ptr, src_custom->cuda_ptr, size, kind, stream); assert(err == cudaSuccess); break; default: assert(0); } return 0; } static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream) { return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice); } static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream) { return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost); } static int copy_cuda_to_cuda(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { (void)src_interface; (void)src_node; (void)dst_interface; (void)dst_node; assert(0); } static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream) { (void)src_interface; (void)src_node; (void)dst_interface; (void)dst_node; (void)stream; assert(0); } #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL static int copy_ram_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { (void) src_interface; (void) src_node; (void) dst_interface; (void) dst_node; return 0; } static int copy_opencl_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { (void) src_interface; (void) src_node; (void) dst_interface; (void) dst_node; return 0; } static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { (void) src_interface; (void) src_node; (void) dst_interface; (void) dst_node; return 0; } static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) { (void)event; starpu_ssize_t size; struct custom_data_interface *src_custom, *dst_custom; src_custom = (struct custom_data_interface *) src_interface; dst_custom = (struct custom_data_interface *) dst_interface; /* * Opencl stuff. */ cl_context context; cl_command_queue queue; int id = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(id); starpu_opencl_get_queue(devid, &queue); starpu_opencl_get_context(devid, &context); /* Real stuff */ int err; cl_int ret; size = src_custom->nx * 2 * sizeof(float); if (dst_custom->cpu_ptr == NULL) { ret = starpu_opencl_allocate_memory(devid, (cl_mem*)&dst_custom->cpu_ptr, size, CL_MEM_READ_WRITE); assert(ret == CL_SUCCESS); } err = starpu_opencl_copy_ram_to_opencl(src_custom->cpu_ptr, src_node, dst_custom->cpu_ptr, dst_node, size, 0, NULL, &ret); assert(err == 0); return 0; } static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) { (void)event; starpu_ssize_t size; struct custom_data_interface *src_custom, *dst_custom; src_custom = (struct custom_data_interface *) src_interface; dst_custom = (struct custom_data_interface *) dst_interface; /* * Opencl stuff. */ cl_context context; cl_command_queue queue; int id = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(id); starpu_opencl_get_queue(devid, &queue); starpu_opencl_get_context(devid, &context); /* real stuff */ int err; cl_int ret; size = src_custom->nx * 2 * sizeof(float); if (!dst_custom->opencl_ptr) { dst_custom->opencl_ptr = malloc(size); assert(dst_custom->opencl_ptr != NULL); } err = starpu_opencl_copy_opencl_to_ram(src_custom->opencl_ptr, src_node, dst_custom->opencl_ptr, dst_node, size, 0, NULL, &ret); assert(err == 0); return 0; } #endif /* !STARPU_USE_OPENCL */ starpu-1.4.9+dfsg/examples/filters/custom_mf/custom_interface.h000066400000000000000000000032641507764646700250020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __CUSTOM_INTERFACE_H__ #define __CUSTOM_INTERFACE_H__ #include struct custom_data_interface { void *cpu_ptr; void *cuda_ptr; void *opencl_ptr; struct starpu_multiformat_data_interface_ops *ops; uint32_t nx; }; void custom_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, uint32_t nx, struct starpu_multiformat_data_interface_ops* ops); #define CUSTOM_GET_NX(interface) (((struct custom_data_interface*)(interface))->nx) #define CUSTOM_GET_CPU_PTR(interface) (((struct custom_data_interface*)(interface))->cpu_ptr) #ifdef STARPU_USE_CUDA #define CUSTOM_GET_X_PTR(interface) (((struct custom_data_interface*)(interface))->cuda_ptr) #define CUSTOM_GET_Y_PTR(interface) \ (((struct custom_data_interface*)(interface))->cuda_ptr)+ \ CUSTOM_GET_NX((interface)) #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL #define CUSTOM_GET_OPENCL_X_PTR(interface) (((struct custom_data_interface *)(interface))->opencl_ptr) #endif #endif /* ! __CUSTOM_INTERFACE_H__ */ starpu-1.4.9+dfsg/examples/filters/custom_mf/custom_mf_filter.c000066400000000000000000000161701507764646700250040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "custom_interface.h" #include "custom_types.h" #define N 12 #define DEBUG 1 #ifdef STARPU_USE_CUDA static unsigned int _ncuda; #endif #ifdef STARPU_USE_OPENCL static unsigned int _nopencl; #endif static struct point _array_of_structs[N]; static starpu_data_handle_t _handle; static unsigned int _nchunks = 6; #ifdef STARPU_USE_CUDA extern struct starpu_codelet cpu_to_cuda_cl; extern struct starpu_codelet cuda_to_cpu_cl; #endif #ifdef STARPU_USE_OPENCL extern struct starpu_codelet cpu_to_opencl_cl; extern struct starpu_codelet opencl_to_cpu_cl; #endif static struct starpu_multiformat_data_interface_ops format_ops = { #ifdef STARPU_USE_CUDA .cuda_elemsize = sizeof(struct struct_of_arrays), .cpu_to_cuda_cl = &cpu_to_cuda_cl, .cuda_to_cpu_cl = &cuda_to_cpu_cl, #endif #ifdef STARPU_USE_OPENCL .opencl_elemsize = sizeof(struct struct_of_arrays), .cpu_to_opencl_cl = &cpu_to_opencl_cl, .opencl_to_cpu_cl = &opencl_to_cpu_cl, #endif .cpu_elemsize = sizeof(struct point), }; static void custom_filter(void *father, void *child, struct starpu_data_filter *f, unsigned id, unsigned nchunks) { (void)f; struct custom_data_interface *custom_father, *custom_child; custom_father = (struct custom_data_interface *) father; custom_child = (struct custom_data_interface *) child; assert(N % nchunks == 0); // XXX starpu_ssize_t chunk_size = N/nchunks; if (custom_father->cpu_ptr) { struct point *tmp = (struct point *) custom_father->cpu_ptr; tmp += id * chunk_size; custom_child->cpu_ptr = tmp; } #ifdef STARPU_USE_CUDA else if (custom_father->cuda_ptr) { struct struct_of_arrays *soa_father, *soa_child; soa_father = (struct struct_of_arrays*) custom_father->cuda_ptr; soa_child = (struct struct_of_arrays*) custom_child->cuda_ptr; soa_child->x = soa_father->x + chunk_size; soa_child->y = soa_father->y + chunk_size; } #endif #ifdef STARPU_USE_OPENCL else if (custom_father->opencl_ptr) { struct struct_of_arrays *soa_father, *soa_child; soa_father = (struct struct_of_arrays*) custom_father->opencl_ptr; soa_child = (struct struct_of_arrays*) custom_child->opencl_ptr; soa_child->x = soa_father->x + chunk_size; soa_child->y = soa_father->y + chunk_size; } #endif /* !STARPU_USE_OPENCL */ custom_child->ops = custom_father->ops; custom_child->nx = chunk_size; } static void register_and_partition_data(void) { int i; for (i = 0; i < N; i++) { _array_of_structs[i].x = i+1.0; _array_of_structs[i].y = 42.0; } custom_data_register(&_handle, STARPU_MAIN_RAM, &_array_of_structs, N, &format_ops); struct starpu_data_filter f = { .filter_func = custom_filter, .nchildren = _nchunks, .get_nchildren = NULL, .get_child_ops = NULL }; starpu_data_partition(_handle, &f); } static void unpartition_and_unregister_data(void) { starpu_data_unpartition(_handle, STARPU_MAIN_RAM); starpu_data_unregister(_handle); } static void custom_scal_cpu_func(void *buffers[], void *args) { struct point *aos; unsigned int n, i; (void)args; aos = CUSTOM_GET_CPU_PTR(buffers[0]); n = CUSTOM_GET_NX(buffers[0]); for (i = 0; i < n; i++) aos[i].x *= aos[i].y; } #ifdef STARPU_USE_CUDA extern void custom_scal_cuda_func(void *buffers[], void *args); #endif static struct starpu_codelet cpu_cl = { .cpu_funcs = { custom_scal_cpu_func}, .nbuffers = 1, .modes = { STARPU_RW }, .name = "codelet_real" }; #ifdef STARPU_USE_CUDA static struct starpu_codelet cuda_cl = { .cuda_funcs = { custom_scal_cuda_func }, .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 1, .modes = { STARPU_RW }, .name = "cuda_codelet" }; #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL extern void custom_scal_opencl_func(void *buffers[], void *args); static struct starpu_codelet opencl_cl = { .opencl_funcs = { custom_scal_opencl_func }, .opencl_flags = {STARPU_OPENCL_ASYNC}, .nbuffers = 1, .modes = { STARPU_RW }, .name = "opencl_codelet" }; #endif /* !STARPU_USE_OPENCL */ static int create_and_submit_tasks(void) { int err; unsigned int i; for (i = 0; i < _nchunks; i++) { struct starpu_task *task = starpu_task_create(); switch (i%3) { case 0: task->cl = &cpu_cl; break; case 1: #ifdef STARPU_USE_CUDA if (_ncuda > 0) task->cl = &cuda_cl; else #endif task->cl = &cpu_cl; break; case 2: #ifdef STARPU_USE_OPENCL if (_nopencl > 0) task->cl = &opencl_cl; else #endif task->cl = &cpu_cl; break; default: /* We should never get here */ assert(0); } task->handles[0] = starpu_data_get_sub_data(_handle, 1, i); err = starpu_task_submit(task); if (err != 0) return err; } err = starpu_task_wait_for_all(); if (err != 0) return err; return 0; } #if DEBUG static void print_it(void) { int i; for (i = 0; i < N; i++) { FPRINTF(stderr, "(%.2f, %.2f) ", _array_of_structs[i].x, _array_of_structs[i].y); } FPRINTF(stderr, "\n"); } #endif static int check_it(void) { int i; for (i = 0; i < N; i++) { float expected_value = (i + 1.0)*42.0; if (_array_of_structs[i].x != expected_value) return EXIT_FAILURE; } return EXIT_SUCCESS; } #ifdef STARPU_USE_OPENCL struct starpu_opencl_program _opencl_program; struct starpu_opencl_program _opencl_conversion_program; #endif /* !STARPU_USE_OPENCL */ int main(void) { #ifndef STARPU_USE_CPU return 77; #else int err; err = starpu_init(NULL); if (err == -ENODEV) goto enodev; #ifdef STARPU_USE_CUDA _ncuda = starpu_cuda_worker_get_count(); #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL _nopencl = starpu_opencl_worker_get_count(); if (_nopencl > 0) { char *f1 = "examples/filters/custom_mf/custom_opencl.cl"; char *f2 = "examples/filters/custom_mf/conversion_opencl.cl"; err = starpu_opencl_load_opencl_from_file(f1, &_opencl_program, NULL); assert(err == 0); err = starpu_opencl_load_opencl_from_file(f2, &_opencl_conversion_program, NULL); assert(err == 0); } #endif /* !STARPU_USE_OPENCL */ register_and_partition_data(); #if DEBUG print_it(); #endif err = create_and_submit_tasks(); if (err != 0) { FPRINTF(stderr, "create_submit_task : %s\n", strerror(-err)); return EXIT_FAILURE; } unpartition_and_unregister_data(); #if DEBUG print_it(); #endif #ifdef STARPU_USE_OPENCL if (_nopencl > 0) { err = starpu_opencl_unload_opencl(&_opencl_program); assert(err == 0); err = starpu_opencl_unload_opencl(&_opencl_conversion_program); assert(err == 0); } #endif /* !STARPU_USE_OPENCL */ starpu_shutdown(); print_it(); return check_it(); enodev: return 77; #endif } starpu-1.4.9+dfsg/examples/filters/custom_mf/custom_opencl.c000066400000000000000000000044731507764646700243200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "custom_types.h" #include "custom_interface.h" extern struct starpu_opencl_program _opencl_program; void custom_scal_opencl_func(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; unsigned n = CUSTOM_GET_NX(buffers[0]); struct point *aop; aop = (struct point *) CUSTOM_GET_CPU_PTR(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_program, "custom_scal_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); void *x = CUSTOM_GET_OPENCL_X_PTR(buffers[0]); if (starpu_opencl_set_kernel_args(&err, &kernel, sizeof(aop), &aop, sizeof(x), &x, sizeof(n), &n, 0) != 3) { STARPU_OPENCL_REPORT_ERROR(err); assert(0); } { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel( queue, kernel, 1, /* work_dim */ NULL, /* global_work_offset */ &global, /* global_work_size */ &local, /* local_work_size */ 0, /* num_events_in_wait_list */ NULL, /* event_wait_list */ NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/filters/custom_mf/custom_opencl.cl000066400000000000000000000015701507764646700244670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "custom_types.h" __kernel void custom_scal_opencl(__global struct point *aop, __global float *x, int nx) { const int i = get_global_id(0); if (i < nx) x[i] *= x[i+nx]; } starpu-1.4.9+dfsg/examples/filters/custom_mf/custom_types.h000066400000000000000000000016621507764646700242060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __CUSTOM_TYPES_H__ #define __CUSTOM_TYPES_H__ struct struct_of_arrays { float *x, *y; }; struct point { float x, y; }; #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #endif starpu-1.4.9+dfsg/examples/filters/f3d_cpu.c000066400000000000000000000022411507764646700207640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include void f3d_cpu_func(void *buffers[], void *cl_arg) { int i, j, k; int *factor = (int *) cl_arg; int *arr3d = (int *)STARPU_NDIM_GET_PTR(buffers[0]); int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); int nx = nn[0]; int ny = nn[1]; int nz = nn[2]; unsigned ldy = ldn[1]; unsigned ldz = ldn[2]; for(k=0; k static __global__ void f3d_cuda(int *arr3d, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) { int i, j, k; for(k=0; k>>(arr3d, nx, ny, nz, ldy, ldz, *factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/f3d_hip.hip000066400000000000000000000033401507764646700213140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb HIP kernel to fill a 3D matrix */ #include static __global__ void f3d_hip(int *arr3d, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) { int i, j, k; for(k=0; k void f4d_cpu_func(void *buffers[], void *cl_arg) { int i, j, k, l; int *factor = (int *) cl_arg; int *arr4d = (int *)STARPU_NDIM_GET_PTR(buffers[0]); int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); int nx = nn[0]; int ny = nn[1]; int nz = nn[2]; int nt = nn[3]; unsigned ldy = ldn[1]; unsigned ldz = ldn[2]; unsigned ldt = ldn[3]; for(l=0; l static __global__ void f4d_cuda(int *arr4d, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) { int i, j, k, l; for(l=0; l>>(arr4d, nx, ny, nz, nt, ldy, ldz, ldt, *factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/f4d_hip.hip000066400000000000000000000036621507764646700213240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb HIP kernel to fill a 4D matrix */ #include static __global__ void f4d_hip(int *arr4d, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) { int i, j, k, l; for(l=0; l #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void print_5darr(int *arr5d, int nx, int ny, int nz, int nt, int ng, unsigned ldy, unsigned ldz, unsigned ldt, unsigned ldg) { int i, j, k, l, m; FPRINTF(stderr, "5dim array=%p nx=%d ny=%d nz=%d nt=%d ng=%d ldy=%u ldz=%u ldt=%u ldg=%u\n", arr5d, nx, ny, nz, nt, ng, ldy, ldz, ldt, ldg); for(m=0 ; m #define NX 5 #define NY 4 #define NZ 3 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void block_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void block_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void block_hip_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_OPENCL extern void opencl_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; #endif extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); extern void print_block(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); extern void print_block_data(starpu_data_handle_t block_handle); int main(void) { int *block; int i, j, k; int ret; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {block_cpu_func}, .cpu_funcs_name = {"block_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {block_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {block_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_func}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "block_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&block, NX*NY*NZ*sizeof(int)); assert(block); generate_block_data(block, NX, NY, NZ, NX, NX*NY); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/filters/fblock_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif /* Declare data to StarPU */ starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int)); FPRINTF(stderr, "IN Block\n"); print_block_data(handle); /* Partition the block in PARTS sub-blocks */ struct starpu_data_filter f = { .filter_func = starpu_block_filter_block, .nchildren = PARTS }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl_arg = &multiplier; task->cl_arg_size = sizeof(multiplier); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); print_block_data(handle); starpu_data_unregister(handle); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif /* Print result block */ FPRINTF(stderr, "OUT Block\n"); print_block(block, NX, NY, NZ, NX, NX*NY); starpu_free_noflag(block, NX*NY*NZ*sizeof(int)); starpu_shutdown(); return 0; enodev: FPRINTF(stderr, "WARNING: No one can execute this task\n"); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fblock_cpu.c000066400000000000000000000023701507764646700215530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb kernel to fill a 3D matrix */ #include void block_cpu_func(void *buffers[], void *cl_arg) { int i, j, k; int *factor = (int *) cl_arg; int *block = (int *)STARPU_BLOCK_GET_PTR(buffers[0]); int nx = (int)STARPU_BLOCK_GET_NX(buffers[0]); int ny = (int)STARPU_BLOCK_GET_NY(buffers[0]); int nz = (int)STARPU_BLOCK_GET_NZ(buffers[0]); unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); for(k=0; k static __global__ void fblock_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) { int i, j, k; for(k=0; k>>(block, nx, ny, nz, ldy, ldz, *factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/fblock_hip.hip000066400000000000000000000034201507764646700220770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb HIP kernel to fill a 3D matrix */ #include static __global__ void fblock_hip(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor) { int i, j, k; for(k=0; k #define CHECK_CL_SET_KERNEL_ARG(kernel, n, size, ptr) \ do \ { \ int check_err; \ check_err = clSetKernelArg(kernel, n, size, ptr); \ if (check_err != CL_SUCCESS) \ STARPU_OPENCL_REPORT_ERROR(check_err); \ } while (0) extern struct starpu_opencl_program opencl_program; void opencl_func(void *buffers[], void *cl_arg) { int id, devid, err; cl_kernel kernel; cl_command_queue queue; int *factor = cl_arg; cl_mem block = (cl_mem)STARPU_BLOCK_GET_DEV_HANDLE(buffers[0]); unsigned offset = STARPU_BLOCK_GET_OFFSET(buffers[0]); int nx = (int)STARPU_BLOCK_GET_NX(buffers[0]); int ny = (int)STARPU_BLOCK_GET_NY(buffers[0]); int nz = (int)STARPU_BLOCK_GET_NZ(buffers[0]); unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "fblock_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); CHECK_CL_SET_KERNEL_ARG(kernel, 0, sizeof(block), &block); CHECK_CL_SET_KERNEL_ARG(kernel, 1, sizeof(offset), &offset); CHECK_CL_SET_KERNEL_ARG(kernel, 2, sizeof(nx), &nx); CHECK_CL_SET_KERNEL_ARG(kernel, 3, sizeof(ny), &ny); CHECK_CL_SET_KERNEL_ARG(kernel, 4, sizeof(nz), &nz); CHECK_CL_SET_KERNEL_ARG(kernel, 5, sizeof(ldy), &ldy); CHECK_CL_SET_KERNEL_ARG(kernel, 6, sizeof(ldz), &ldz); CHECK_CL_SET_KERNEL_ARG(kernel, 7, sizeof(*factor), factor); { size_t global[3]={nx,ny,nz}; err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/filters/fblock_opencl_kernel.cl000066400000000000000000000022151507764646700237560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb OpenCL kernel to fill a 3D matrix */ __kernel void fblock_opencl(__global int* block, unsigned offset, int nx, int ny, int nz, unsigned ldy, unsigned ldz, int factor) { const int idx = get_global_id(0); const int idy = get_global_id(1); const int idz = get_global_id(2); if (idx >= nx) return; if (idy >= ny) return; if (idz >= nz) return; block = (__global int*) ((__global char *)block + offset); int i = idz*ldz + idy*ldy + idx; block[i] = factor; } starpu-1.4.9+dfsg/examples/filters/fblock_pick_matrix.c000066400000000000000000000074111507764646700232770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 5 #define NY 4 #define NZ 3 #define PARTS 2 #define POS 1 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void matrix_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void matrix_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void matrix_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); extern void print_block_data(starpu_data_handle_t block_handle); extern void print_matrix_data(starpu_data_handle_t matrix_handle); int main(void) { int *block; int i, j, k; int ret; int factor = 2; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {matrix_cpu_func}, .cpu_funcs_name = {"matrix_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {matrix_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {matrix_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "block_pick_matrix_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&block, NX*NY*NZ*sizeof(int)); assert(block); generate_block_data(block, NX, NY, NZ, NX, NX*NY); /* Declare data to StarPU */ starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int)); FPRINTF(stderr, "IN Block: \n"); print_block_data(handle); /* Partition the block in PARTS sub-matrices */ struct starpu_data_filter f = { .filter_func = starpu_block_filter_pick_matrix_y, .filter_arg_ptr = (void*)(uintptr_t) POS, .nchildren = PARTS, /* the children use a matrix interface*/ .get_child_ops = starpu_block_filter_pick_matrix_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = matrix_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result matrix */ FPRINTF(stderr, "OUT Matrix %d: \n", i); print_matrix_data(matrix_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT Block: \n"); print_block_data(handle); starpu_data_unregister(handle); starpu_free_noflag(block, NX*NY*NZ*sizeof(int)); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fblock_pick_variable.c000066400000000000000000000073121507764646700235600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 5 #define NY 4 #define NZ 3 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { int *factor = (int *) cl_arg; /* local copy of the variable pointer */ int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); *val *= *factor; } #ifdef STARPU_USE_CUDA extern void variable_cuda_func(void *buffers[], void *cl_arg); #endif extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); extern void print_block_data(starpu_data_handle_t block_handle); int main(void) { int *block; int i, j, k; int ret; int factor = 2; uint32_t pos[3] = {1,2,1}; starpu_data_handle_t handle; starpu_data_handle_t var_handle; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {variable_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "block_pick_variable_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&block, NX*NY*NZ*sizeof(int)); assert(block); generate_block_data(block, NX, NY, NZ, NX, NX*NY); /* Declare data to StarPU */ starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int)); FPRINTF(stderr, "IN Block: \n"); print_block_data(handle); /* Pick a variable in the block */ struct starpu_data_filter f_var = { .filter_func = starpu_block_filter_pick_variable, .filter_arg_ptr = (void*)pos, .nchildren = 1, /* the children use a variable interface*/ .get_child_ops = starpu_block_filter_pick_variable_child_ops }; starpu_data_partition_plan(handle, &f_var, &var_handle); FPRINTF(stderr, "Sub Variable:\n"); int *variable = (int *)starpu_variable_get_local_ptr(var_handle); starpu_data_acquire(var_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(var_handle); FPRINTF(stderr,"\n"); /* Submit the task */ struct starpu_task *task = starpu_task_create(); FPRINTF(stderr,"Dealing with sub-variable\n"); task->handles[0] = var_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result variable */ FPRINTF(stderr,"OUT Variable:\n"); starpu_data_acquire(var_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(var_handle); FPRINTF(stderr,"\n"); starpu_data_partition_clean(handle, 1, &var_handle); /* Unpartition the data, unregister it from StarPU and shutdown */ //starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT Block: \n"); print_block_data(handle); starpu_data_unregister(handle); starpu_free_noflag(block, NX*NY*NZ*sizeof(int)); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fblock_print.c000066400000000000000000000044661507764646700221300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void print_block(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz) { int i, j, k; FPRINTF(stderr, "block=%p nx=%d ny=%d nz=%d ldy=%u ldz=%u\n", block, nx, ny, nz, ldy, ldz); for(k=0 ; k #define NX 5 #define NY 4 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void matrix_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void matrix_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void matrix_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); extern void print_matrix_data(starpu_data_handle_t matrix_handle); int main(void) { unsigned j; int *matrix; int ret, i; int factor = 12; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {matrix_cpu_func}, .cpu_funcs_name = {"matrix_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {matrix_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {matrix_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "matrix_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&matrix, NX*NY*sizeof(int)); generate_matrix_data(matrix, NX, NY, NX); /* Declare data to StarPU */ starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); FPRINTF(stderr,"IN Matrix: \n"); print_matrix_data(handle); /* Partition the matrix in PARTS sub-matrices */ struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_block, .nchildren = PARTS }; starpu_data_partition(handle, &f); /* Submit a task on each sub-vector */ for (i=0; ihandles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr,"OUT Matrix: \n"); print_matrix_data(handle); starpu_data_unregister(handle); starpu_free_noflag(matrix, NX*NY*sizeof(int)); starpu_shutdown(); return ret; enodev: FPRINTF(stderr, "WARNING: No one can execute this task\n"); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fmatrix_cpu.c000066400000000000000000000022631507764646700217660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb kernel to fill a 2D matrix */ #include void matrix_cpu_func(void *buffers[], void *cl_arg) { int i, j; int *factor = (int *) cl_arg; /* length of the matrix */ int nx = (int)STARPU_MATRIX_GET_NX(buffers[0]); int ny = (int)STARPU_MATRIX_GET_NY(buffers[0]); unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); /* local copy of the matrix pointer */ int *matrix = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); for(j=0; j static __global__ void fmatrix_cuda(int *matrix, int nx, int ny, unsigned ld, float factor) { int i, j; for(j=0; j>>(matrix, nx, ny, ld, *factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/fmatrix_hip.hip000066400000000000000000000030321507764646700223100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb HIP kernel to fill a 2D matrix */ #include static __global__ void fmatrix_hip(int *matrix, int nx, int ny, unsigned ld, float factor) { int i, j; for(j=0; j #include #define NX 10 #define NY 21 #define PARTSX 2 #define PARTSY 3 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void variable_cpu_func(void *buffers[], void *cl_arg) { int *factor = (int *) cl_arg; /* local copy of the variable pointer */ int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); *val *= *factor; } #ifdef STARPU_USE_CUDA extern void variable_cuda_func(void *buffers[], void *cl_arg); #endif extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); extern void print_matrix_data(starpu_data_handle_t matrix_handle); int main(void) { int *matrix; int ret, i, j; int factor = 12; uint32_t pos[2]; starpu_data_handle_t handle; struct starpu_codelet cl_r = { .cpu_funcs = {variable_cpu_func}, .cpu_funcs_name = {"variable_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {variable_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_R}, .name = "matrix_pick_variable_scal_r" }; struct starpu_codelet cl_rw = { .cpu_funcs = {variable_cpu_func}, .cpu_funcs_name = {"variable_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {variable_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "matrix_pick_variable_scal_rw" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&matrix, NX*NY*sizeof(int)); generate_matrix_data(matrix, NX, NY, NX); /* Declare data to StarPU */ starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); FPRINTF(stderr, "IN Matrix: \n"); print_matrix_data(handle); /* Partition the matrix in PARTS sub-matrices */ struct starpu_data_filter f_matrix_vert = { .filter_func = starpu_matrix_filter_block, .nchildren = PARTSX }; struct starpu_data_filter f_matrix_horiz = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = PARTSY }; starpu_data_map_filters(handle, 2, &f_matrix_vert, &f_matrix_horiz); starpu_data_handle_t sub_matrix_handle; int nn; for(nn=0; nn<=10; nn++) { int indxi = starpu_drand48()*(PARTSX); int indxj = starpu_drand48()*(PARTSY); sub_matrix_handle = starpu_data_get_sub_data(handle, 2, indxi, indxj); FPRINTF(stderr, "sub Matrix: \n"); print_matrix_data(sub_matrix_handle); starpu_data_handle_t var_handle; pos[0] = starpu_drand48()*(NX/PARTSX); pos[1] = starpu_drand48()*(NY/PARTSY); /* Pick a variable in the matrix */ struct starpu_data_filter f_var = { .filter_func = starpu_matrix_filter_pick_variable, .filter_arg_ptr = (void*)pos, .nchildren = 1, /* the children use a variable interface*/ .get_child_ops = starpu_matrix_filter_pick_variable_child_ops }; starpu_data_partition_plan(sub_matrix_handle, &f_var, &var_handle); FPRINTF(stderr, "Sub Variable:\n"); int *variable = (int *)starpu_variable_get_local_ptr(var_handle); starpu_data_acquire(var_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(var_handle); FPRINTF(stderr,"\n"); /* Submit the task */ struct starpu_task *task = starpu_task_create(); FPRINTF(stderr,"Dealing with sub-variable\n"); task->handles[0] = var_handle; if(starpu_drand48()>=0.2) task->cl = &cl_r; else task->cl = &cl_rw; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result variable */ FPRINTF(stderr,"OUT Variable:\n"); starpu_data_acquire(var_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(var_handle); FPRINTF(stderr,"\n"); starpu_data_partition_clean(sub_matrix_handle, 1, &var_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr,"OUT Matrix: \n"); print_matrix_data(handle); starpu_data_unregister(handle); starpu_free_noflag(matrix, NX*NY*sizeof(int)); starpu_shutdown(); return ret; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fmatrix_pick_vector.c000066400000000000000000000077441507764646700235200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 5 #define NY 4 #define PARTS 2 #define POS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void vector_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void vector_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void vector_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); extern void print_matrix_data(starpu_data_handle_t matrix_handle); int main(void) { int *matrix; int ret, i, j; int factor = 12; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {vector_cpu_func}, .cpu_funcs_name = {"vector_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {vector_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {vector_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "matrix_pick_vector_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&matrix, NX*NY*sizeof(int)); generate_matrix_data(matrix, NX, NY, NX); /* Declare data to StarPU */ starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0])); FPRINTF(stderr, "IN Matrix: \n"); print_matrix_data(handle); /* Partition the matrix in PARTS sub-vectors */ struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_pick_vector_y, .filter_arg_ptr = (void*)(uintptr_t) POS, .nchildren = PARTS, /* the children use a vector interface*/ .get_child_ops = starpu_matrix_filter_pick_vector_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; ihandles[0] = vector_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result vector */ FPRINTF(stderr,"OUT Vector %d: \n", i); starpu_data_acquire(vector_handle, STARPU_R); for(j=0 ; j #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void print_matrix(int *matrix, int nx, int ny, unsigned ld) { int i, j; FPRINTF(stderr, "matrix=%p nx=%d ny=%d ld=%u\n", matrix, nx, ny, ld); for(j=0 ; j static __global__ void _fmultiple_check_scale_cuda(int *val, int nx, int ny, unsigned ld, int start, int factor) { int i, j; for(j=0; j>>(val, nx, ny, ld, start, factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } static __global__ void _fmultiple_check_cuda(int *val, int nx, int ny, unsigned ld, int start, int factor) { int i, j; for(j=0; j>>(val, nx, ny, ld, start, factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/fmultiple_hip.hip000066400000000000000000000050701507764646700226430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb HIP kernel to check the matrix values and scale it up */ #include static __global__ void _fmultiple_check_scale_hip(int *val, int nx, int ny, unsigned ld, int start, int factor) { int i, j; for(j=0; j #define NX 6 #define NY 6 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void matrix_fill(void *buffers[], void *cl_arg) { unsigned i, j; (void)cl_arg; /* length of the matrix */ unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); for(j=0; j= 0); for (i = 1; i < nbuffers; i++) STARPU_ASSERT(starpu_task_get_current_data_node(i) == node); } struct starpu_codelet cl_switch = { #if 1 /* Check for the values */ .cpu_funcs = {empty}, #else /* For production code: we do not need to actually execute anything */ .where = STARPU_NOWHERE, #endif .nbuffers = STARPU_VARIABLE_NBUFFERS, .name = "switch", }; int main(void) { unsigned n=1; int matrix[NX][NY]; int ret, i; /* We haven't taken care otherwise */ STARPU_ASSERT((NX%PARTS) == 0); STARPU_ASSERT((NY%PARTS) == 0); starpu_data_handle_t handle; starpu_data_handle_t vert_handle[PARTS]; starpu_data_handle_t horiz_handle[PARTS]; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* force to execute task on the home_node, here it is STARPU_MAIN_RAM */ cl_switch.specific_nodes = 1; for (i = 0; i < STARPU_NMAXBUFS; i++) cl_switch.nodes[i] = STARPU_MAIN_RAM; /* Declare the whole matrix to StarPU */ starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0][0])); /* Also declare the vertical slices to StarPU */ for (i = 0; i < PARTS; i++) { starpu_matrix_data_register(&vert_handle[i], STARPU_MAIN_RAM, (uintptr_t)&matrix[0][i*(NX/PARTS)], NX, NX/PARTS, NY, sizeof(matrix[0][0])); /* But make it invalid for now, we'll access data through the whole matrix first */ starpu_data_invalidate(vert_handle[i]); } /* And the horizontal slices to StarPU */ for (i = 0; i < PARTS; i++) { starpu_matrix_data_register(&horiz_handle[i], STARPU_MAIN_RAM, (uintptr_t)&matrix[i*(NY/PARTS)][0], NX, NX, NY/PARTS, sizeof(matrix[0][0])); starpu_data_invalidate(horiz_handle[i]); } /* Fill the matrix */ ret = starpu_task_insert(&cl_fill, STARPU_W, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Now switch to vertical view of the matrix */ struct starpu_data_descr vert_descr[PARTS]; for (i = 0; i < PARTS; i++) { vert_descr[i].handle = vert_handle[i]; vert_descr[i].mode = STARPU_W; } ret = starpu_task_insert(&cl_switch, STARPU_RW, handle, STARPU_DATA_MODE_ARRAY, vert_descr, PARTS, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* And make sure we don't accidentally access the matrix through the whole-matrix handle */ starpu_data_invalidate_submit(handle); /* Check the values of the vertical slices */ for (i = 0; i < PARTS; i++) { int factor = 1; int start = i*(NX/PARTS); ret = starpu_task_insert(&cl_check_scale, STARPU_RW, vert_handle[i], STARPU_VALUE, &start, sizeof(start), STARPU_VALUE, &factor, sizeof(factor), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Now switch back to total view of the matrix */ for (i = 0; i < PARTS; i++) vert_descr[i].mode = STARPU_RW; ret = starpu_task_insert(&cl_switch, STARPU_DATA_MODE_ARRAY, vert_descr, PARTS, STARPU_W, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* And make sure we don't accidentally access the matrix through the vertical slices */ for (i = 0; i < PARTS; i++) starpu_data_invalidate_submit(vert_handle[i]); /* And switch to horizontal view of the matrix */ struct starpu_data_descr horiz_descr[PARTS]; for (i = 0; i < PARTS; i++) { horiz_descr[i].handle = horiz_handle[i]; horiz_descr[i].mode = STARPU_W; } ret = starpu_task_insert(&cl_switch, STARPU_RW, handle, STARPU_DATA_MODE_ARRAY, horiz_descr, PARTS, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* And make sure we don't accidentally access the matrix through the whole-matrix handle */ starpu_data_invalidate_submit(handle); /* Check the values of the horizontal slices */ for (i = 0; i < PARTS; i++) { int factor = 2; int start = factor*100*i*(NY/PARTS); ret = starpu_task_insert(&cl_check_scale, STARPU_RW, horiz_handle[i], STARPU_VALUE, &start, sizeof(start), STARPU_VALUE, &factor, sizeof(factor), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* * Unregister data from StarPU and shutdown It does not really matter * which view is active at unregistration here, since all views cover * the whole matrix, so it will be completely updated in the main memory. */ for (i = 0; i < PARTS; i++) { starpu_data_unregister(vert_handle[i]); starpu_data_unregister(horiz_handle[i]); } starpu_data_unregister(handle); starpu_shutdown(); return ret; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fmultiple_submit.c000066400000000000000000000144341507764646700230340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This examplifies how to access the same matrix with different partitioned * views, doing the coherency through partition planning. * We first run a kernel on the whole matrix to fill it, then run a kernel on * each vertical slice to check the value and multiply it by two, then run a * kernel on each horizontal slice to do the same. */ #include #define NX 6 #define NY 6 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void matrix_fill(void *buffers[], void *cl_arg) { unsigned i, j; (void)cl_arg; /* length of the matrix */ unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); for(j=0; j #define NX 6 #define NY 6 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void matrix_fill(void *buffers[], void *cl_arg) { unsigned i, j; (void)cl_arg; /* length of the matrix */ unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); for(j=0; j #define NX 6 #define NY 6 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void matrix_fill(void *buffers[], void *cl_arg) { unsigned i, j; (void)cl_arg; /* length of the matrix */ unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); for(j=0; j #define NX 6 #define NY 6 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void matrix_fill(void *buffers[], void *cl_arg) { unsigned i, j; /* length of the matrix */ unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); unsigned offset_i, offset_j; starpu_codelet_unpack_args(cl_arg, &offset_i, &offset_j); for(j=0; j #define NX 6 #define NY 5 #define NZ 4 #define NT 3 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void f4d_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void f4d_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void f4d_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); extern void print_tensor(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); extern void print_4dim_data(starpu_data_handle_t ndim_handle); int main(void) { int *arr4d; int i, j, k, l; int ret; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {f4d_cpu_func}, #ifdef STARPU_USE_CUDA .cuda_funcs = {f4d_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {f4d_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "ndim_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr4d, NX*NY*NZ*NT*sizeof(int)); assert(arr4d); generate_tensor_data(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); unsigned nn[4] = {NX, NY, NZ, NT}; unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); FPRINTF(stderr, "IN Ndim Array\n"); print_4dim_data(handle); /* Partition the ndim array in PARTS sub-ndimarrays */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_block, .filter_arg = 0, //Partition the array along X dimension .nchildren = PARTS }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl_arg = &multiplier; task->cl_arg_size = sizeof(multiplier); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); print_4dim_data(handle); starpu_data_unregister(handle); /* Print result ndim array*/ FPRINTF(stderr, "OUT Ndim Array\n"); print_tensor(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); starpu_free_noflag(arr4d, NX*NY*NZ*NT*sizeof(int)); starpu_shutdown(); return 0; enodev: FPRINTF(stderr, "WARNING: No one can execute this task\n"); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_1d_pick_variable.c000066400000000000000000000070731507764646700240050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 21 #define PARTS 3 #define POS 5 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { int *factor = (int *) cl_arg; /* local copy of the variable pointer */ int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); *val *= *factor; } int main(void) { int i; int *arr1d; starpu_data_handle_t handle; int factor = 10; int ret; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr1d_pick_variable_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr1d, NX*sizeof(int)); FPRINTF(stderr,"IN 1-dim Array: \n"); for(i=0 ; ihandles[0] = variable_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); FPRINTF(stderr,"OUT Variable %d: \n", i); starpu_data_acquire(variable_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(variable_handle); FPRINTF(stderr,"\n"); } starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); FPRINTF(stderr,"OUT 1-dim Array: \n"); for(i=0 ; i #define NX 5 #define NY 4 #define PARTS 2 #define POS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void vector_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void vector_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void vector_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); extern void print_2dim_data(starpu_data_handle_t ndim_handle); int main(void) { int *arr2d; int ret, i, j; int factor = 12; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {vector_cpu_func}, .cpu_funcs_name = {"vector_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {vector_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {vector_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr2d_pick_vector_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr2d, NX*NY*sizeof(int)); generate_matrix_data(arr2d, NX, NY, NX); unsigned nn[2] = {NX, NY}; unsigned ldn[2] = {1, NX}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr2d, ldn, nn, 2, sizeof(int)); FPRINTF(stderr, "IN 2-dim Array: \n"); print_2dim_data(handle); /* Partition the 2-dim array in PARTS sub-vectors */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_2d_pick_vector, .filter_arg = 1, //Partition the array along Y dimension .filter_arg_ptr = (void*)(uintptr_t) POS, .nchildren = PARTS, /* the children use a vector interface*/ .get_child_ops = starpu_ndim_filter_pick_vector_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; ihandles[0] = vector_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result vector */ FPRINTF(stderr,"OUT Vector %d: \n", i); starpu_data_acquire(vector_handle, STARPU_R); for(j=0 ; j #define NX 5 #define NY 4 #define NZ 3 #define PARTS 2 #define POS 1 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void matrix_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void matrix_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void matrix_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); extern void print_3dim_data(starpu_data_handle_t ndim_handle); extern void print_matrix_data(starpu_data_handle_t matrix_handle); int main(void) { int *arr3d; int i, j, k; int ret; int factor = 2; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {matrix_cpu_func}, .cpu_funcs_name = {"matrix_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {matrix_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {matrix_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr3d_pick_matrix_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr3d, NX*NY*NZ*sizeof(int)); assert(arr3d); generate_block_data(arr3d, NX, NY, NZ, NX, NX*NY); unsigned nn[3] = {NX, NY, NZ}; unsigned ldn[3] = {1, NX, NX*NY}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr3d, ldn, nn, 3, sizeof(int)); FPRINTF(stderr, "IN 3-dim Array: \n"); print_3dim_data(handle); /* Partition the 3-dim array in PARTS sub-matrices */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_3d_pick_matrix, .filter_arg = 1, //Partition the array along Y dimension .filter_arg_ptr = (void*)(uintptr_t) POS, .nchildren = PARTS, /* the children use a matrix interface*/ .get_child_ops = starpu_ndim_filter_pick_matrix_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = matrix_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result matrix */ FPRINTF(stderr, "OUT Matrix %d: \n", i); print_matrix_data(matrix_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT 3-dim Array: \n"); print_3dim_data(handle); starpu_data_unregister(handle); starpu_free_noflag(arr3d, NX*NY*NZ*sizeof(int)); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_4d_pick_block.c000066400000000000000000000076741507764646700233240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 6 #define NY 5 #define NZ 4 #define NT 3 #define PARTS 2 #define POS 1 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void block_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void block_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void block_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); extern void print_4dim_data(starpu_data_handle_t ndim_handle); extern void print_block_data(starpu_data_handle_t block_handle); int main(void) { int *arr4d; int i, j, k, l; int ret; int factor = 2; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {block_cpu_func}, .cpu_funcs_name = {"block_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {block_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {block_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr4d_pick_block_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr4d, NX*NY*NZ*NT*sizeof(int)); assert(arr4d); generate_tensor_data(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); unsigned nn[4] = {NX, NY, NZ, NT}; unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); FPRINTF(stderr, "IN 4-dim Array: \n"); print_4dim_data(handle); /* Partition the 4-dim array in PARTS sub-blocks */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_4d_pick_block, .filter_arg = 2, //Partition the array along Z dimension .filter_arg_ptr = (void*)(uintptr_t) POS, .nchildren = PARTS, /* the children use a block interface*/ .get_child_ops = starpu_ndim_filter_pick_block_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = block_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result block */ FPRINTF(stderr, "OUT Block %d: \n", i); print_block_data(block_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT 4-dim Array: \n"); print_4dim_data(handle); starpu_data_unregister(handle); starpu_free_noflag(arr4d, NX*NY*NZ*NT*sizeof(int)); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_5d_pick_tensor.c000066400000000000000000000100331507764646700235240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 6 #define NY 5 #define NZ 4 #define NT 3 #define NG 2 #define PARTS 2 #define POS 1 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void tensor_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void tensor_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void tensor_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_5dim_data(int *arr5d, int nx, int ny, int nz, int nt, int ng, unsigned ldy, unsigned ldz, unsigned ldt, unsigned ldg); extern void print_5dim_data(starpu_data_handle_t ndim_handle); extern void print_tensor_data(starpu_data_handle_t ndim_handle); int main(void) { int *arr5d; int i, j, k, l, m; int ret; int factor = 2; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {tensor_cpu_func}, .cpu_funcs_name = {"tensor_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {tensor_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {tensor_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr5d_pick_tensor_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr5d, NX*NY*NZ*NT*NG*sizeof(int)); assert(arr5d); generate_5dim_data(arr5d, NX, NY, NZ, NT, NG, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT); unsigned nn[5] = {NX, NY, NZ, NT, NG}; unsigned ldn[5] = {1, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr5d, ldn, nn, 5, sizeof(int)); FPRINTF(stderr, "IN 5-dim Array: \n"); print_5dim_data(handle); /* Partition the 5-dim array in PARTS tensors */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_5d_pick_tensor, .filter_arg = 3, //Partition the array along T dimension .filter_arg_ptr = (void*)(uintptr_t) POS, .nchildren = PARTS, /* the children use a tensor interface*/ .get_child_ops = starpu_ndim_filter_pick_tensor_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = tensor_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result tensor */ FPRINTF(stderr, "OUT Tensor %d: \n", i); print_tensor_data(tensor_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT 5-dim Array: \n"); print_5dim_data(handle); starpu_data_unregister(handle); starpu_free_noflag(arr5d, NX*NY*NZ*NT*NG*sizeof(int)); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_pick_ndim.c000066400000000000000000000074751507764646700225710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 6 #define NY 5 #define NZ 4 #define NT 3 #define PARTS 2 #define POS 1 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void f3d_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void f3d_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void f3d_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); extern void print_4dim_data(starpu_data_handle_t ndim_handle); extern void print_3dim_data(starpu_data_handle_t ndim_handle); int main(void) { int *arr4d; int i, j, k, l; int ret; int factor = 2; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {f3d_cpu_func}, #ifdef STARPU_USE_CUDA .cuda_funcs = {f3d_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {f3d_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr4d_pick_arr3d_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr4d, NX*NY*NZ*NT*sizeof(int)); assert(arr4d); generate_tensor_data(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); unsigned nn[4] = {NX, NY, NZ, NT}; unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); FPRINTF(stderr, "IN 4-dim Array: \n"); print_4dim_data(handle); /* Partition the 4-dim array in PARTS sub 3-dim arrays */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_pick_ndim, .filter_arg = 2, //Partition the array along Z dimension .filter_arg_ptr = (void*)(uintptr_t) POS, .nchildren = PARTS }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = sub3d_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result 3-dim array */ FPRINTF(stderr, "OUT 3-dim Array %d: \n", i); print_3dim_data(sub3d_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT 4-dim Array: \n"); print_4dim_data(handle); starpu_data_unregister(handle); starpu_free_noflag(arr4d, NX*NY*NZ*NT*sizeof(int)); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_pick_variable.c000066400000000000000000000074411507764646700234200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 6 #define NY 5 #define NZ 4 #define NT 3 #define NG 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { int *factor = (int *) cl_arg; /* local copy of the variable pointer */ int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); *val *= *factor; } extern void generate_5dim_data(int *arr5d, int nx, int ny, int nz, int nt, int ng, unsigned ldy, unsigned ldz, unsigned ldt, unsigned ldg); extern void print_5dim_data(starpu_data_handle_t ndim_handle); int main(void) { int *arr5d; int i, j, k, l, m; int ret; int factor = 2; uint32_t pos[5] = {1,2,1,2,1}; starpu_data_handle_t handle; starpu_data_handle_t var_handle; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr5d_pick_variable_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr5d, NX*NY*NZ*NT*NG*sizeof(int)); assert(arr5d); generate_5dim_data(arr5d, NX, NY, NZ, NT, NG, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT); unsigned nn[5] = {NX, NY, NZ, NT, NG}; unsigned ldn[5] = {1, NX, NX*NY, NX*NY*NZ, NX*NY*NZ*NT}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr5d, ldn, nn, 5, sizeof(int)); FPRINTF(stderr, "IN 5-dim Array: \n"); print_5dim_data(handle); /* Pick a variable in the 5-dim array */ struct starpu_data_filter f_var = { .filter_func = starpu_ndim_filter_pick_variable, .filter_arg_ptr = (void*)pos, .nchildren = 1, /* the children use a variable interface*/ .get_child_ops = starpu_ndim_filter_pick_variable_child_ops }; starpu_data_partition_plan(handle, &f_var, &var_handle); FPRINTF(stderr, "Sub Variable:\n"); int *variable = (int *)starpu_variable_get_local_ptr(var_handle); starpu_data_acquire(var_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(var_handle); FPRINTF(stderr,"\n"); /* Submit the task */ struct starpu_task *task = starpu_task_create(); FPRINTF(stderr,"Dealing with sub-variable\n"); task->handles[0] = var_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result variable */ FPRINTF(stderr,"OUT Variable:\n"); starpu_data_acquire(var_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(var_handle); FPRINTF(stderr,"\n"); starpu_data_partition_clean(handle, 1, &var_handle); /* Unpartition the data, unregister it from StarPU and shutdown */ //starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT 5-dim Array: \n"); print_5dim_data(handle); starpu_data_unregister(handle); starpu_free_noflag(arr5d, NX*NY*NZ*NT*NG*sizeof(int)); starpu_shutdown(); return 0; enodev: FPRINTF(stderr, "WARNING: No one can execute this task\n"); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_to_block.c000066400000000000000000000075461507764646700224270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 5 #define NY 4 #define NZ 3 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void block_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void block_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void block_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_block_data(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz); extern void print_3dim_data(starpu_data_handle_t ndim_handle); extern void print_block_data(starpu_data_handle_t block_handle); int main(void) { int *arr3d; int i, j, k; int ret; int factor = 2; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {block_cpu_func}, .cpu_funcs_name = {"block_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {block_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {block_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr3d_to_matrix_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr3d, NX*NY*NZ*sizeof(int)); assert(arr3d); generate_block_data(arr3d, NX, NY, NZ, NX, NX*NY); unsigned nn[3] = {NX, NY, NZ}; unsigned ldn[3] = {1, NX, NX*NY}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr3d, ldn, nn, 3, sizeof(int)); FPRINTF(stderr, "IN 3-dim Array: \n"); print_3dim_data(handle); /* Partition the 3-dim array in PARTS sub-blocks */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_to_block, .filter_arg = 0, //Partition the array along X dimension .nchildren = PARTS, /* the children use a block interface*/ .get_child_ops = starpu_ndim_filter_to_block_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = block_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result block */ FPRINTF(stderr, "OUT Block %d: \n", i); print_block_data(block_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT 3-dim Array: \n"); print_3dim_data(handle); starpu_data_unregister(handle); starpu_free_noflag(arr3d, NX*NY*NZ*sizeof(int)); starpu_shutdown(); return 0; enodev: FPRINTF(stderr, "WARNING: No one can execute this task\n"); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_to_matrix.c000066400000000000000000000073551507764646700226370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 5 #define NY 4 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void matrix_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void matrix_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void matrix_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_matrix_data(int *matrix, int nx, int ny, unsigned ld); extern void print_2dim_data(starpu_data_handle_t ndim_handle); extern void print_matrix_data(starpu_data_handle_t matrix_handle); int main(void) { int *arr2d; int ret, i, j, k; int factor = 12; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {matrix_cpu_func}, .cpu_funcs_name = {"matrix_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {matrix_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {matrix_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr2d_to_matrix_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr2d, NX*NY*sizeof(int)); generate_matrix_data(arr2d, NX, NY, NX); unsigned nn[2] = {NX, NY}; unsigned ldn[2] = {1, NX}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr2d, ldn, nn, 2, sizeof(int)); FPRINTF(stderr, "IN 2-dim Array: \n"); print_2dim_data(handle); /* Partition the 2-dim array in PARTS sub-matrices */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_to_matrix, .filter_arg = 1, //Partition the array along Y dimension .nchildren = PARTS, /* the children use a matrix interface*/ .get_child_ops = starpu_ndim_filter_to_matrix_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = matrix_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result matrix */ FPRINTF(stderr, "OUT Matrix %d: \n", i); print_matrix_data(matrix_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr,"OUT 2-dim Array: \n"); print_2dim_data(handle); starpu_data_unregister(handle); starpu_free_noflag(arr2d, NX*NY*sizeof(int)); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_to_tensor.c000066400000000000000000000077261507764646700226470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 6 #define NY 5 #define NZ 4 #define NT 3 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void tensor_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void tensor_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void tensor_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); extern void print_4dim_data(starpu_data_handle_t ndim_handle); extern void print_tensor_data(starpu_data_handle_t tensor_handle); int main(void) { int *arr4d; int i, j, k, l; int ret; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {tensor_cpu_func}, .cpu_funcs_name = {"tensor_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {tensor_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {tensor_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr4d_to_tensor_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr4d, NX*NY*NZ*NT*sizeof(int)); assert(arr4d); generate_tensor_data(arr4d, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); unsigned nn[4] = {NX, NY, NZ, NT}; unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); FPRINTF(stderr, "IN 4-dim Array: \n"); print_4dim_data(handle); /* Partition the 4-dim array in PARTS sub-tensors */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_to_tensor, .filter_arg = 0, //Partition the array along X dimension .nchildren = PARTS, /* the children use a tensor interface*/ .get_child_ops = starpu_ndim_filter_to_tensor_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = tensor_handle; task->cl_arg = &multiplier; task->cl_arg_size = sizeof(multiplier); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result tensor*/ FPRINTF(stderr, "OUT Tensor %d: \n", i); print_tensor_data(tensor_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT 4-dim Array: \n"); print_4dim_data(handle); starpu_data_unregister(handle); starpu_free_noflag(arr4d, NX*NY*NZ*NT*sizeof(int)); starpu_shutdown(); return 0; enodev: FPRINTF(stderr, "WARNING: No one can execute this task\n"); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_to_variable.c000066400000000000000000000064531507764646700231160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define PARTS 1 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { int *factor = (int *) cl_arg; /* local copy of the variable pointer */ int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); *val *= *factor; } int main(void) { int i; int arr0d; starpu_data_handle_t handle; int factor = 10; int ret; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr0d_to_variable_scal" }; FPRINTF(stderr,"IN 0-dim Array: \n"); arr0d = 1; FPRINTF(stderr, "%5d ", arr0d); FPRINTF(stderr,"\n"); ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Declare data to StarPU */ starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&arr0d, NULL, NULL, 0, sizeof(int)); /* Transfer the 0-dim array to a variable */ struct starpu_data_filter f = { .filter_func = starpu_ndim_filter_to_variable, .nchildren = PARTS, /* the children use a variable interface*/ .get_child_ops = starpu_ndim_filter_to_variable_child_ops }; starpu_data_partition(handle, &f); /* Submit a task on the variable */ for (i=0; ihandles[0] = variable_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result variable */ FPRINTF(stderr,"OUT Variable %d: \n", i); starpu_data_acquire(variable_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(variable_handle); FPRINTF(stderr,"\n"); } starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_shutdown(); FPRINTF(stderr,"OUT 0-dim Array: \n"); FPRINTF(stderr, "%5d ", arr0d); FPRINTF(stderr,"\n"); return 0; enodev: FPRINTF(stderr, "WARNING: No one can execute this task\n"); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/fndim_to_vector.c000066400000000000000000000076041507764646700226320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 21 #define PARTS 3 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void vector_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void vector_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void vector_hip_func(void *buffers[], void *cl_arg); #endif int main(void) { int i, j; int *arr1d; int factor = 10; int ret; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {vector_cpu_func}, .cpu_funcs_name = {"vector_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {vector_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {vector_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "arr1d_to_vector_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&arr1d, NX*sizeof(int)); FPRINTF(stderr,"IN 1-dim Array: \n"); for(i=0 ; ihandles[0] = vector_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result vector */ FPRINTF(stderr,"OUT Vector %d: \n", i); starpu_data_acquire(vector_handle, STARPU_R); for(j=0 ; j #define NX 20 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void display_func(void *buffers[], void *cl_arg) { unsigned i; /* length of the vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); FPRINTF(stderr, "vector with n=%u : ", n); for (i = 0; i < n; i++) FPRINTF(stderr, "%5d ", val[i]); FPRINTF(stderr, "\n"); } void cpu_func(void *buffers[], void *cl_arg) { unsigned i; /* length of the vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); FPRINTF(stderr, "computing on vector with n=%u\n", n); for (i = 0; i < n; i++) val[i] *= 2; } int main(void) { int i; int vector[NX]; starpu_data_handle_t handle; starpu_data_handle_t subhandles[PARTS]; int ret; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "vector_scal" }; struct starpu_codelet print_cl = { .cpu_funcs = {display_func}, .cpu_funcs_name = {"display_func"}, .nbuffers = 1, .modes = {STARPU_R}, .name = "vector_display" }; for(i=0 ; i #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_codelet(void *buffers[], void *cl_arg) { unsigned i, j; int factor; starpu_codelet_unpack_args(cl_arg, &factor, 0); /* length of the matrix */ unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); /* local copy of the matrix pointer */ int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); FPRINTF(stderr, "computing on matrix with nx=%u, ny=%u, ld=%u\n", nx, ny, ld); for(j=0; j #define NX 6 #define NY 5 #define NZ 4 #define NT 3 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void tensor_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void tensor_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void tensor_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); extern void print_tensor(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); extern void print_tensor_data(starpu_data_handle_t tensor_handle); int main(void) { int *tensor; int i, j, k, l; int ret; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {tensor_cpu_func}, .cpu_funcs_name = {"tensor_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {tensor_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {tensor_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "tensor_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&tensor, NX*NY*NZ*NT*sizeof(int)); assert(tensor); generate_tensor_data(tensor, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); /* Declare data to StarPU */ starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)tensor, NX, NX*NY, NX*NY*NZ, NX, NY, NZ, NT, sizeof(int)); FPRINTF(stderr, "IN Tensor\n"); print_tensor_data(handle); /* Partition the tensor in PARTS sub-tensors */ struct starpu_data_filter f = { .filter_func = starpu_tensor_filter_block, .nchildren = PARTS }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl_arg = &multiplier; task->cl_arg_size = sizeof(multiplier); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); print_tensor_data(handle); starpu_data_unregister(handle); /* Print result tensor */ FPRINTF(stderr, "OUT Tensor\n"); print_tensor(tensor, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); starpu_free_noflag(tensor, NX*NY*NZ*NT*sizeof(int)); starpu_shutdown(); return 0; enodev: FPRINTF(stderr, "WARNING: No one can execute this task\n"); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/ftensor_cpu.c000066400000000000000000000026241507764646700217750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb kernel to fill a 4D matrix */ #include void tensor_cpu_func(void *buffers[], void *cl_arg) { int i, j, k, l; int *factor = (int *) cl_arg; int *tensor = (int *)STARPU_TENSOR_GET_PTR(buffers[0]); int nx = (int)STARPU_TENSOR_GET_NX(buffers[0]); int ny = (int)STARPU_TENSOR_GET_NY(buffers[0]); int nz = (int)STARPU_TENSOR_GET_NZ(buffers[0]); int nt = (int)STARPU_TENSOR_GET_NT(buffers[0]); unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); for(l=0; l static __global__ void ftensor_cuda(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) { int i, j, k, l; for(l=0; l>>(tensor, nx, ny, nz, nt, ldy, ldz, ldt, *factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/ftensor_hip.hip000066400000000000000000000040421507764646700223200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb HIP kernel to fill a 4D matrix */ #include static __global__ void ftensor_hip(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, float factor) { int i, j, k, l; for(l=0; l #define NX 6 #define NY 5 #define NZ 4 #define NT 3 #define PARTS 2 #define POS 1 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void block_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void block_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void block_hip_func(void *buffers[], void *cl_arg); #endif extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); extern void print_tensor_data(starpu_data_handle_t tensor_handle); extern void print_block_data(starpu_data_handle_t block_handle); int main(void) { int *tensor; int i, j, k, l; int ret; int factor = 2; starpu_data_handle_t handle; struct starpu_codelet cl = { .cpu_funcs = {block_cpu_func}, .cpu_funcs_name = {"block_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {block_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {block_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "tensor_pick_block_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&tensor, NX*NY*NZ*NT*sizeof(int)); assert(tensor); generate_tensor_data(tensor, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); /* Declare data to StarPU */ starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)tensor, NX, NX*NY, NX*NY*NZ, NX, NY, NZ, NT, sizeof(int)); FPRINTF(stderr, "IN Tensor: \n"); print_tensor_data(handle); /* Partition the tensor in PARTS sub-blocks */ struct starpu_data_filter f = { .filter_func = starpu_tensor_filter_pick_block_z, .filter_arg_ptr = (void*)(uintptr_t) POS, .nchildren = PARTS, /* the children use a block interface*/ .get_child_ops = starpu_tensor_filter_pick_block_child_ops }; starpu_data_partition(handle, &f); FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle)); for(i=0 ; icl = &cl; task->synchronous = 1; task->callback_func = NULL; task->handles[0] = block_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result block */ FPRINTF(stderr, "OUT Block %d: \n", i); print_block_data(block_handle); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT Tensor: \n"); print_tensor_data(handle); starpu_data_unregister(handle); starpu_free_noflag(tensor, NX*NY*NZ*NT*sizeof(int)); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/ftensor_pick_variable.c000066400000000000000000000074541507764646700240070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 6 #define NY 5 #define NZ 4 #define NT 3 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { int *factor = (int *) cl_arg; /* local copy of the variable pointer */ int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); *val *= *factor; } #ifdef STARPU_USE_CUDA extern void variable_cuda_func(void *buffers[], void *cl_arg); #endif extern void generate_tensor_data(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt); extern void print_tensor_data(starpu_data_handle_t tensor_handle); int main(void) { int *tensor; int i, j, k, l; int ret; int factor = 2; uint32_t pos[4] = {1,2,1,2}; starpu_data_handle_t handle; starpu_data_handle_t var_handle; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {variable_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "tensor_pick_variable_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&tensor, NX*NY*NZ*NT*sizeof(int)); assert(tensor); generate_tensor_data(tensor, NX, NY, NZ, NT, NX, NX*NY, NX*NY*NZ); /* Declare data to StarPU */ starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)tensor, NX, NX*NY, NX*NY*NZ, NX, NY, NZ, NT, sizeof(int)); FPRINTF(stderr, "IN Tensor: \n"); print_tensor_data(handle); /* Pick a variable in the tensor */ struct starpu_data_filter f_var = { .filter_func = starpu_tensor_filter_pick_variable, .filter_arg_ptr = (void*)pos, .nchildren = 1, /* the children use a variable interface*/ .get_child_ops = starpu_tensor_filter_pick_variable_child_ops }; starpu_data_partition_plan(handle, &f_var, &var_handle); FPRINTF(stderr, "Sub Variable:\n"); int *variable = (int *)starpu_variable_get_local_ptr(var_handle); starpu_data_acquire(var_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(var_handle); FPRINTF(stderr,"\n"); /* Submit the task */ struct starpu_task *task = starpu_task_create(); FPRINTF(stderr,"Dealing with sub-variable\n"); task->handles[0] = var_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result variable */ FPRINTF(stderr,"OUT Variable:\n"); starpu_data_acquire(var_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(var_handle); FPRINTF(stderr,"\n"); starpu_data_partition_clean(handle, 1, &var_handle); /* Unpartition the data, unregister it from StarPU and shutdown */ //starpu_data_unpartition(handle, STARPU_MAIN_RAM); FPRINTF(stderr, "OUT Tensor: \n"); print_tensor_data(handle); starpu_data_unregister(handle); starpu_free_noflag(tensor, NX*NY*NZ*NT*sizeof(int)); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/filters/ftensor_print.c000066400000000000000000000052251507764646700223420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void print_tensor(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt) { int i, j, k, l; FPRINTF(stderr, "tensor=%p nx=%d ny=%d nz=%d nt=%d ldy=%u ldz=%u ldt=%u\n", tensor, nx, ny, nz, nt, ldy, ldz, ldt); for(l=0 ; l static __global__ void fvariable_cuda(int *val, int factor) { *val *= factor; } extern "C" void variable_cuda_func(void *buffers[], void *_args) { int *factor = (int *)_args; int *val = (int *) STARPU_VARIABLE_GET_PTR(buffers[0]); fvariable_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(val, *factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/fvector.c000066400000000000000000000064221507764646700211160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This examplifies how to use partitioning filters. We here just split a * vector into slices, and run a dumb kernel on them. */ #include #define NX 21 #define PARTS 3 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern void vector_cpu_func(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA extern void vector_cuda_func(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_HIP extern void vector_hip_func(void *buffers[], void *cl_arg); #endif int main(void) { int i; int* vector; starpu_data_handle_t handle; int factor=1; int ret; struct starpu_codelet cl = { .cpu_funcs = {vector_cpu_func}, .cpu_funcs_name = {"vector_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {vector_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {vector_hip_func}, .hip_flags = {STARPU_HIP_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "vector_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&vector, NX*sizeof(int)); for(i=0 ; ihandles[0] = sub_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Unpartition the data, unregister it from StarPU and shutdown */ starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); FPRINTF(stderr,"OUT Vector: "); for(i=0 ; i void vector_cpu_func(void *buffers[], void *cl_arg) { int i; int *factor = (int *) cl_arg; /* length of the vector */ int n = (int)STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the vector pointer */ int *vector = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); for (i = 0; i < n; i++) vector[i] *= *factor; } starpu-1.4.9+dfsg/examples/filters/fvector_cuda.cu000066400000000000000000000024721507764646700223000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb CUDA kernel to fill a 1D matrix */ #include static __global__ void fvector_cuda(int *vector, int n, float factor) { int i; for (i = 0; i < n; i++) vector[i] *= factor; } extern "C" void vector_cuda_func(void *buffers[], void *_args) { int *factor = (int *)_args; int *vector = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); int n = (int)STARPU_VECTOR_GET_NX(buffers[0]); fvector_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(vector, n, *factor); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/fvector_hip.hip000066400000000000000000000025021507764646700223070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* dumb HIP kernel to fill a 1D matrix */ #include static __global__ void fvector_hip(int *vector, int n, float factor) { int i; for (i = 0; i < n; i++) vector[i] *= factor; } extern "C" void vector_hip_func(void *buffers[], void *_args) { int *factor = (int *)_args; int *vector = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); int n = (int)STARPU_VECTOR_GET_NX(buffers[0]); hipLaunchKernelGGL(fvector_hip, 1, 1, 0, starpu_hip_get_local_stream(), vector, n, *factor); hipError_t status = hipGetLastError(); if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/filters/fvector_pick_variable.c000066400000000000000000000070011507764646700237630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 21 #define PARTS 3 #define POS 5 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { int *factor = (int *) cl_arg; /* local copy of the variable pointer */ int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); *val *= *factor; } int main(void) { int i; int* vector; starpu_data_handle_t handle; int factor = 10; int ret; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "vector_pick_variable_scal" }; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&vector, NX*sizeof(int)); FPRINTF(stderr,"IN Vector: \n"); for(i=0 ; ihandles[0] = sub_handle; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Print result variable */ FPRINTF(stderr,"OUT Variable %d: \n", i); starpu_data_acquire(sub_handle, STARPU_R); FPRINTF(stderr, "%5d ", *variable); starpu_data_release(sub_handle); FPRINTF(stderr,"\n"); } starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); FPRINTF(stderr,"OUT Vector: \n"); for(i=0 ; i /* Shadow width */ #define SHADOW 2 #define NX 30 #define PARTS 3 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; unsigned i; /* length of the shadowed source vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the shadowed source vector pointer */ int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); /* length of the destination vector */ unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]); /* local copy of the destination vector pointer */ int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]); /* If things go right, sizes should match */ STARPU_ASSERT(n == n2); for (i = 0; i < n; i++) val2[i] = val[i]; } #ifdef STARPU_USE_CUDA void cuda_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the shadowed source vector pointer */ int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); /* length of the destination vector */ unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]); /* local copy of the destination vector pointer */ int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]); /* If things go right, sizes should match */ STARPU_ASSERT(n == n2); cudaMemcpyAsync(val2, val, n*sizeof(*val), cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); } #endif int main(void) { unsigned j; int vector[NX + 2*SHADOW]; int vector2[NX + PARTS*2*SHADOW]; starpu_data_handle_t handle, handle2; int ret, i; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_W} }; for(i=0 ; ihandles[0] = sub_handle; task->handles[1] = sub_handle2; task->cl = &cl; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unpartition(handle2, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_data_unregister(handle2); starpu_shutdown(); FPRINTF(stderr,"OUT Vector: "); for(i=0 ; i /* Shadow width */ #define SHADOWX 3 #define SHADOWY 2 #define NX 20 #define NY 30 #define PARTSX 2 #define PARTSY 3 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); unsigned n = STARPU_MATRIX_GET_NX(buffers[0]); unsigned m = STARPU_MATRIX_GET_NY(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); /* length of the destination matrix */ unsigned ld2 = STARPU_MATRIX_GET_LD(buffers[1]); unsigned n2 = STARPU_MATRIX_GET_NX(buffers[1]); unsigned m2 = STARPU_MATRIX_GET_NY(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_MATRIX_GET_PTR(buffers[1]); unsigned i, j; /* If things go right, sizes should match */ STARPU_ASSERT(n == n2); STARPU_ASSERT(m == m2); for (j = 0; j < m; j++) for (i = 0; i < n; i++) val2[j*ld2+i] = val[j*ld+i]; } #ifdef STARPU_USE_CUDA void cuda_func(void *buffers[], void *cl_arg) { (void)cl_arg; cudaError_t cures; /* length of the shadowed source matrix */ unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); unsigned n = STARPU_MATRIX_GET_NX(buffers[0]); unsigned m = STARPU_MATRIX_GET_NY(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); /* length of the destination matrix */ unsigned ld2 = STARPU_MATRIX_GET_LD(buffers[1]); unsigned n2 = STARPU_MATRIX_GET_NX(buffers[1]); unsigned m2 = STARPU_MATRIX_GET_NY(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_MATRIX_GET_PTR(buffers[1]); /* If things go right, sizes should match */ STARPU_ASSERT(n == n2); STARPU_ASSERT(m == m2); cures = cudaMemcpy2DAsync(val2, ld2*sizeof(*val2), val, ld*sizeof(*val), n*sizeof(*val), m, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } #endif int main(void) { unsigned i, j, k, l; int matrix[NY + 2*SHADOWY][NX + 2*SHADOWX]; int matrix2[NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; starpu_data_handle_t handle, handle2; int ret; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_W} }; memset(matrix, -1, sizeof(matrix)); for(j=1 ; j<=NY ; j++) for(i=1 ; i<=NX ; i++) matrix[SHADOWY+j-1][SHADOWX+i-1] = i+j; /* Copy borders */ for (j = SHADOWY ; jhandles[0] = sub_handle; task->handles[1] = sub_handle2; task->cl = &cl; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unpartition(handle2, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_data_unregister(handle2); starpu_shutdown(); FPRINTF(stderr,"OUT Matrix:\n"); for(j=0 ; j /* Shadow width */ #define SHADOWX 2 #define SHADOWY 3 #define SHADOWZ 4 #define NX 12 #define NY 9 #define NZ 6 #define PARTSX 4 #define PARTSY 3 #define PARTSZ 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); unsigned x = STARPU_BLOCK_GET_NX(buffers[0]); unsigned y = STARPU_BLOCK_GET_NY(buffers[0]); unsigned z = STARPU_BLOCK_GET_NZ(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_BLOCK_GET_PTR(buffers[0]); /* length of the destination matrix */ unsigned ldy2 = STARPU_BLOCK_GET_LDY(buffers[1]); unsigned ldz2 = STARPU_BLOCK_GET_LDZ(buffers[1]); unsigned x2 = STARPU_BLOCK_GET_NX(buffers[1]); unsigned y2 = STARPU_BLOCK_GET_NY(buffers[1]); unsigned z2 = STARPU_BLOCK_GET_NZ(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_BLOCK_GET_PTR(buffers[1]); unsigned i, j, k; /* If things go right, sizes should match */ STARPU_ASSERT(x == x2); STARPU_ASSERT(y == y2); STARPU_ASSERT(z == z2); for (k = 0; k < z; k++) for (j = 0; j < y; j++) for (i = 0; i < x; i++) val2[k*ldz2+j*ldy2+i] = val[k*ldz+j*ldy+i]; } #ifdef STARPU_USE_CUDA void cuda_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); unsigned x = STARPU_BLOCK_GET_NX(buffers[0]); unsigned y = STARPU_BLOCK_GET_NY(buffers[0]); unsigned z = STARPU_BLOCK_GET_NZ(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_BLOCK_GET_PTR(buffers[0]); /* length of the destination matrix */ unsigned ldy2 = STARPU_BLOCK_GET_LDY(buffers[1]); unsigned ldz2 = STARPU_BLOCK_GET_LDZ(buffers[1]); unsigned x2 = STARPU_BLOCK_GET_NX(buffers[1]); unsigned y2 = STARPU_BLOCK_GET_NY(buffers[1]); unsigned z2 = STARPU_BLOCK_GET_NZ(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_BLOCK_GET_PTR(buffers[1]); unsigned k; cudaError_t cures; /* If things go right, sizes should match */ STARPU_ASSERT(x == x2); STARPU_ASSERT(y == y2); STARPU_ASSERT(z == z2); for (k = 0; k < z; k++) { cures = cudaMemcpy2DAsync(val2+k*ldz2, ldy2*sizeof(*val2), val+k*ldz, ldy*sizeof(*val), x*sizeof(*val), y, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); STARPU_ASSERT(!cures); } } #endif int main(void) { unsigned i, j, k, l, m, n; int matrix[NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX]; int matrix2[NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; starpu_data_handle_t handle, handle2; int ret; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_W} }; memset(matrix, -1, sizeof(matrix)); for(k=1 ; k<=NZ ; k++) for(j=1 ; j<=NY ; j++) for(i=1 ; i<=NX ; i++) matrix[SHADOWZ+k-1][SHADOWY+j-1][SHADOWX+i-1] = i+j+k; /* Copy planes */ for (k = SHADOWZ ; khandles[0] = sub_handle; task->handles[1] = sub_handle2; task->cl = &cl; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } } starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unpartition(handle2, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_data_unregister(handle2); starpu_shutdown(); FPRINTF(stderr,"OUT Matrix:\n"); for(k=0 ; k /* Shadow width */ #define SHADOWX 2 #define SHADOWY 2 #define SHADOWZ 1 #define SHADOWT 1 #define NX 6 #define NY 6 #define NZ 2 #define NT 2 #define PARTSX 2 #define PARTSY 2 #define PARTSZ 2 #define PARTST 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); unsigned x = STARPU_TENSOR_GET_NX(buffers[0]); unsigned y = STARPU_TENSOR_GET_NY(buffers[0]); unsigned z = STARPU_TENSOR_GET_NZ(buffers[0]); unsigned t = STARPU_TENSOR_GET_NT(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_TENSOR_GET_PTR(buffers[0]); /* length of the destination matrix */ unsigned ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]); unsigned ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]); unsigned ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]); unsigned x2 = STARPU_TENSOR_GET_NX(buffers[1]); unsigned y2 = STARPU_TENSOR_GET_NY(buffers[1]); unsigned z2 = STARPU_TENSOR_GET_NZ(buffers[1]); unsigned t2 = STARPU_TENSOR_GET_NT(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_TENSOR_GET_PTR(buffers[1]); unsigned i, j, k, l; /* If things go right, sizes should match */ STARPU_ASSERT(x == x2); STARPU_ASSERT(y == y2); STARPU_ASSERT(z == z2); STARPU_ASSERT(t == t2); for (l = 0; l < t; l++) for (k = 0; k < z; k++) for (j = 0; j < y; j++) for (i = 0; i < x; i++) val2[l*ldt2+k*ldz2+j*ldy2+i] = val[l*ldt+k*ldz+j*ldy+i]; } #ifdef STARPU_USE_CUDA void cuda_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix*/ unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); unsigned x = STARPU_TENSOR_GET_NX(buffers[0]); unsigned y = STARPU_TENSOR_GET_NY(buffers[0]); unsigned z = STARPU_TENSOR_GET_NZ(buffers[0]); unsigned t = STARPU_TENSOR_GET_NT(buffers[0]); /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_TENSOR_GET_PTR(buffers[0]); /* length of the destination matrix */ unsigned ldy2 = STARPU_TENSOR_GET_LDY(buffers[1]); unsigned ldz2 = STARPU_TENSOR_GET_LDZ(buffers[1]); unsigned ldt2 = STARPU_TENSOR_GET_LDT(buffers[1]); unsigned x2 = STARPU_TENSOR_GET_NX(buffers[1]); unsigned y2 = STARPU_TENSOR_GET_NY(buffers[1]); unsigned z2 = STARPU_TENSOR_GET_NZ(buffers[1]); unsigned t2 = STARPU_TENSOR_GET_NT(buffers[1]); /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_TENSOR_GET_PTR(buffers[1]); unsigned k, l; cudaError_t cures; /* If things go right, sizes should match */ STARPU_ASSERT(x == x2); STARPU_ASSERT(y == y2); STARPU_ASSERT(z == z2); STARPU_ASSERT(t == t2); for (l = 0; l < t; l++) { for (k = 0; k < z; k++) { cures = cudaMemcpy2DAsync(val2+k*ldz2+l*ldt2, ldy2*sizeof(*val2), val+k*ldz+l*ldt, ldy*sizeof(*val), x*sizeof(*val), y, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); STARPU_ASSERT(!cures); } } } #endif int main(void) { unsigned i, j, k, l, m, n, p, q; int matrix[NT + 2*SHADOWT][NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX]; int matrix2[NT + PARTST*2*SHADOWT][NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; starpu_data_handle_t handle, handle2; int ret; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_W} }; memset(matrix, -1, sizeof(matrix)); for(l=1 ; l<=NT ; l++) for(k=1 ; k<=NZ ; k++) for(j=1 ; j<=NY ; j++) for(i=1 ; i<=NX ; i++) matrix[SHADOWT+l-1][SHADOWZ+k-1][SHADOWY+j-1][SHADOWX+i-1] = i+j+k+l; /*copy cubes*/ for (l = SHADOWT ; lhandles[0] = sub_handle; task->handles[1] = sub_handle2; task->cl = &cl; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } } } starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unpartition(handle2, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_data_unregister(handle2); starpu_shutdown(); FPRINTF(stderr,"OUT Matrix:\n"); for(l=0 ; l /* Shadow width */ #define SHADOWX 2 #define SHADOWY 2 #define SHADOWZ 1 #define SHADOWT 1 #define SHADOWG 1 #define NX 6 #define NY 6 #define NZ 2 #define NT 2 #define NG 2 #define PARTSX 2 #define PARTSY 2 #define PARTSZ 2 #define PARTST 2 #define PARTSG 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ unsigned *nn = STARPU_NDIM_GET_NN(buffers[0]); unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); unsigned x = nn[0]; unsigned y = nn[1]; unsigned z = nn[2]; unsigned t = nn[3]; unsigned g = nn[4]; unsigned ldy = ldn[1]; unsigned ldz = ldn[2]; unsigned ldt = ldn[3]; unsigned ldg = ldn[4]; /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_NDIM_GET_PTR(buffers[0]); /* length of the destination matrix */ unsigned *nn2 = STARPU_NDIM_GET_NN(buffers[1]); unsigned *ldn2 = STARPU_NDIM_GET_LDN(buffers[1]); unsigned x2 = nn2[0]; unsigned y2 = nn2[1]; unsigned z2 = nn2[2]; unsigned t2 = nn2[3]; unsigned g2 = nn2[4]; unsigned ldy2 = ldn2[1]; unsigned ldz2 = ldn2[2]; unsigned ldt2 = ldn2[3]; unsigned ldg2 = ldn2[4]; /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_NDIM_GET_PTR(buffers[1]); unsigned i, j, k, l, m; /* If things go right, sizes should match */ STARPU_ASSERT(x == x2); STARPU_ASSERT(y == y2); STARPU_ASSERT(z == z2); STARPU_ASSERT(t == t2); STARPU_ASSERT(g == g2); for(m = 0; m < g; m++) for (l = 0; l < t; l++) for (k = 0; k < z; k++) for (j = 0; j < y; j++) for (i = 0; i < x; i++) val2[m*ldg2+l*ldt2+k*ldz2+j*ldy2+i] = val[m*ldg+l*ldt+k*ldz+j*ldy+i]; } #ifdef STARPU_USE_CUDA void cuda_func(void *buffers[], void *cl_arg) { (void)cl_arg; /* length of the shadowed source matrix */ unsigned *nn = STARPU_NDIM_GET_NN(buffers[0]); unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); unsigned x = nn[0]; unsigned y = nn[1]; unsigned z = nn[2]; unsigned t = nn[3]; unsigned g = nn[4]; unsigned ldy = ldn[1]; unsigned ldz = ldn[2]; unsigned ldt = ldn[3]; unsigned ldg = ldn[4]; /* local copy of the shadowed source matrix pointer */ int *val = (int *)STARPU_NDIM_GET_PTR(buffers[0]); /* length of the destination matrix */ unsigned *nn2 = STARPU_NDIM_GET_NN(buffers[1]); unsigned *ldn2 = STARPU_NDIM_GET_LDN(buffers[1]); unsigned x2 = nn2[0]; unsigned y2 = nn2[1]; unsigned z2 = nn2[2]; unsigned t2 = nn2[3]; unsigned g2 = nn2[4]; unsigned ldy2 = ldn2[1]; unsigned ldz2 = ldn2[2]; unsigned ldt2 = ldn2[3]; unsigned ldg2 = ldn2[4]; /* local copy of the destination matrix pointer */ int *val2 = (int *)STARPU_NDIM_GET_PTR(buffers[1]); unsigned k, l, m; cudaError_t cures; /* If things go right, sizes should match */ STARPU_ASSERT(x == x2); STARPU_ASSERT(y == y2); STARPU_ASSERT(z == z2); STARPU_ASSERT(t == t2); STARPU_ASSERT(g == g2); for(m = 0; m < g; m++) { for (l = 0; l < t; l++) { for (k = 0; k < z; k++) { cures = cudaMemcpy2DAsync(val2+k*ldz2+l*ldt2+m*ldg2, ldy2*sizeof(*val2), val+k*ldz+l*ldt+m*ldg, ldy*sizeof(*val), x*sizeof(*val), y, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); STARPU_ASSERT(!cures); } } } } #endif int main(void) { unsigned i, j, k, l, m, n, p, q, r, s; int matrix[NG + 2*SHADOWG][NT + 2*SHADOWT][NZ + 2*SHADOWZ][NY + 2*SHADOWY][NX + 2*SHADOWX]; int matrix2[NG + PARTSG*2*SHADOWG][NT + PARTST*2*SHADOWT][NZ + PARTSZ*2*SHADOWZ][NY + PARTSY*2*SHADOWY][NX + PARTSX*2*SHADOWX]; starpu_data_handle_t handle, handle2; int ret; struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_W} }; memset(matrix, -1, sizeof(matrix)); for(m=1 ; m<=NG ; m++) for(l=1 ; l<=NT ; l++) for(k=1 ; k<=NZ ; k++) for(j=1 ; j<=NY ; j++) for(i=1 ; i<=NX ; i++) matrix[SHADOWG+m-1][SHADOWT+l-1][SHADOWZ+k-1][SHADOWY+j-1][SHADOWX+i-1] = i+j+k+l+m; /*copy tensors*/ for(m=SHADOWG ; mhandles[0] = sub_handle; task->handles[1] = sub_handle2; task->cl = &cl; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } } } } starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unpartition(handle2, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_data_unregister(handle2); starpu_shutdown(); FPRINTF(stderr,"OUT Matrix:\n"); for(m=0 ; m mesh%elt(i) CALL starpu_register_element_c(numpar%Neq_max,elt%Np,elt%Ng,elt%ro,elt%dro, & elt%basis,elt%ro_h,elt%dro_h,elt%basis_h) ENDDO !Compute DO it = 1,it_tot ! compute new dro for each element DO i = 1,Nelt elt => mesh%elt(i) CALL starpu_loop_element_task_c(numpar%coeff,elt%ro_h,elt%dro_h,elt%basis_h) ENDDO ! sync (if needed by the algorithm) CALL starpu_task_wait_for_all() ! - - - - - ! copy dro to ro for each element DO i = 1,Nelt elt => mesh%elt(i) CALL starpu_copy_element_task_c(elt%ro_h,elt%dro_h) ENDDO ! sync (if needed by the algorithm) CALL starpu_task_wait_for_all() ENDDO !Unregistration of elements DO i = 1,Nelt elt => mesh%elt(i) CALL starpu_unregister_element_c(elt%ro_h,elt%dro_h,elt%basis_h) ENDDO !Terminate StarPU, no task can be submitted after CALL starpu_shutdown() !Check data with StarPU WRITE(6,'(a)') " " WRITE(6,'(a)') " %%%% RESULTS STARPU %%%% " WRITE(6,'(a)') " " DO i = 1,Nelt WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = " WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro WRITE(6,'(a)') " ------------------------ " ENDDO !Same compute without StarPU DO i = 1,Nelt elt => mesh%elt(i) CALL init_element(elt%ro,elt%dro,elt%basis,numpar%Neq_max,elt%Np,elt%Ng,i) ENDDO DO it = 1, it_tot DO i = 1,Nelt elt => mesh%elt(i) CALL loop_element_cpu(elt%ro,elt%dro,elt%basis,numpar%coeff,numpar%Neq_max,elt%Ng,elt%Np) elt%ro = elt%ro + elt%dro ENDDO ENDDO WRITE(6,'(a)') " " WRITE(6,'(a)') " %%%% RESULTS VERIFICATION %%%% " WRITE(6,'(a)') " " DO i = 1,Nelt WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = " WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro WRITE(6,'(a)') " ------------------------ " ENDDO WRITE(6,'(a)') " " !Deallocation DO i = 1,Nelt elt => mesh%elt(i) DEALLOCATE(elt%ro) DEALLOCATE(elt%dro) DEALLOCATE(elt%basis) ENDDO DEALLOCATE(mesh%elt) END PROGRAM f90_example starpu-1.4.9+dfsg/examples/fortran90/marshalling.c000066400000000000000000000115421507764646700221220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2015-2015 ONERA * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Helper functions to initialize StarPU and register element matrices */ #include //--------------------------------------------------------------// void starpu_register_element_c(int Neq_max,int Np, int Ng,double **ro, double **dro, double **basis, void **ro_h, void **dro_h, void **basis_h) { starpu_data_handle_t ro_handle; starpu_data_handle_t dro_handle; starpu_data_handle_t basis_handle; starpu_matrix_data_register(&ro_handle, 0, (uintptr_t)ro,Neq_max,Neq_max,Np, sizeof(double)); starpu_matrix_data_register(&dro_handle, 0, (uintptr_t)dro,Neq_max,Neq_max,Np, sizeof(double)); starpu_matrix_data_register(&basis_handle, 0, (uintptr_t)basis,Np,Np,Ng, sizeof(double)); *ro_h = ro_handle; *dro_h = dro_handle; *basis_h = basis_handle; } void starpu_unregister_element_c(void **ro_h, void **dro_h, void **basis_h) { starpu_data_handle_t ro_handle = *ro_h; starpu_data_handle_t dro_handle = *dro_h; starpu_data_handle_t basis_handle = *basis_h; starpu_data_unregister(ro_handle); starpu_data_unregister(dro_handle); starpu_data_unregister(basis_handle); } //--------------------------------------------------------------// void loop_element_cpu_fortran(double coeff, int Neq_max, int Np, int Ng, void *ro_ptr, void *dro_ptr, void *basis_ptr, void *cl_arg); void loop_element_cpu_func(void *buffers[], void *cl_arg); struct starpu_codelet cl_loop_element = { .cpu_funcs = {loop_element_cpu_func}, .nbuffers = 3, .modes = {STARPU_R,STARPU_RW,STARPU_R}, .name = "LOOP_ELEMENT" }; void loop_element_cpu_func(void *buffers[], void *cl_arg) { double coeff; double **ro = (double **) STARPU_MATRIX_GET_PTR(buffers[0]); int Neq_max = STARPU_MATRIX_GET_NX(buffers[0]); double **dro = (double **) STARPU_MATRIX_GET_PTR(buffers[1]); double **basis = (double **) STARPU_MATRIX_GET_PTR(buffers[2]); int Np = STARPU_MATRIX_GET_NX(buffers[2]); int Ng = STARPU_MATRIX_GET_NY(buffers[2]); starpu_codelet_unpack_args(cl_arg, &coeff); void *ro_ptr = &ro; void *dro_ptr = &dro; void *basis_ptr = &basis; loop_element_cpu_fortran(coeff,Neq_max,Np,Ng, ro_ptr,dro_ptr,basis_ptr,cl_arg); } void starpu_loop_element_task_c(double coeff, void **ro_h, void **dro_h, void **basis_h) { int ret; starpu_data_handle_t ro_handle = *ro_h; starpu_data_handle_t dro_handle = *dro_h; starpu_data_handle_t basis_handle = *basis_h; /* execute the task on any eligible computational resource */ ret = starpu_task_insert(&cl_loop_element, STARPU_VALUE, &coeff, sizeof(double), STARPU_R, ro_handle, STARPU_RW, dro_handle, STARPU_R, basis_handle, 0); /* verification */ STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } //--------------------------------------------------------------// void copy_element_cpu_fortran(int Neq_max, int Np, void *ro_ptr, void *dro_ptr); void copy_element_cpu_func(void *buffers[], void *cl_arg); struct starpu_codelet cl_copy_element = { .cpu_funcs = {copy_element_cpu_func}, .nbuffers = 2, .modes = {STARPU_RW,STARPU_R}, .name = "COPY_ELEMENT" }; void copy_element_cpu_func(void *buffers[], void *cl_arg) { (void)cl_arg; double **ro = (double **) STARPU_MATRIX_GET_PTR(buffers[0]); int Neq_max = STARPU_MATRIX_GET_NX(buffers[0]); int Np = STARPU_MATRIX_GET_NY(buffers[0]); double **dro = (double **) STARPU_MATRIX_GET_PTR(buffers[1]); void *ro_ptr = &ro; void *dro_ptr = &dro; copy_element_cpu_fortran(Neq_max,Np,ro_ptr,dro_ptr); } void starpu_copy_element_task_c(void **ro_h, void **dro_h) { int ret; starpu_data_handle_t ro_handle = *ro_h; starpu_data_handle_t dro_handle = *dro_h; /* execute the task on any eligible computational resource */ ret = starpu_task_insert(&cl_copy_element, STARPU_RW, ro_handle, STARPU_R, dro_handle, 0); /* verification */ STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } //--------------------------------------------------------------// int starpu_my_init_c() { /* Initialize StarPU with default configuration */ int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy_name = "dmda"; ret = starpu_init(&conf); /* int ret = starpu_init(NULL); */ return ret; } starpu-1.4.9+dfsg/examples/fortran90/mod_compute.f90000066400000000000000000000100051507764646700223010ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! Copyright (C) 2015-2015 ONERA ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! ! Computation kernels for the simulation MODULE mod_compute USE mod_types USE starpu_mod USE mod_interface USE iso_c_binding IMPLICIT NONE CONTAINS !--------------------------------------------------------------! SUBROUTINE init_element(ro,dro,basis,Neq_max,Np,Ng,i) INTEGER(KIND=C_INT),INTENT(IN) :: Neq_max,Np,Ng,i REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: ro,basis,dro !Local variables INTEGER(KIND=C_INT) :: n,nb,neq DO nb=1,Np DO neq= 1,Neq_max ro(neq,nb) = 0.01*(nb+neq)*i END DO END DO DO nb=1,Np DO neq= 1,Neq_max dro(neq,nb) = 0.05*(nb-neq)*i END DO END DO DO n=1,Ng DO nb=1,Np basis(nb,n) = 0.05*(n+nb)*i END DO END DO END SUBROUTINE init_element !--------------------------------------------------------------! RECURSIVE SUBROUTINE loop_element_cpu_fortran(coeff,Neq_max,Np,Ng, & & ro_ptr,dro_ptr,basis_ptr) BIND(C) INTEGER(KIND=C_INT),VALUE :: Neq_max,Np,Ng REAL(KIND=C_DOUBLE),VALUE :: coeff TYPE(C_PTR) :: ro_ptr,dro_ptr,basis_ptr !Local variables REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER :: ro,dro,basis CALL C_F_POINTER(ro_ptr,ro,[Neq_max,Np]) CALL C_F_POINTER(dro_ptr,dro,[Neq_max,Np]) CALL C_F_POINTER(basis_ptr,basis,[Np,Ng]) CALL loop_element_cpu(ro,dro,basis,coeff,Neq_max,Ng,Np) END SUBROUTINE loop_element_cpu_fortran !--------------------------------------------------------------! RECURSIVE SUBROUTINE loop_element_cpu(ro,dro,basis,coeff,Neq_max,Ng,Np) REAL(KIND=C_DOUBLE),INTENT(IN) :: coeff INTEGER(KIND=C_INT),INTENT(IN) :: Neq_max,Ng,Np REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(IN) :: ro,basis REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: dro !Local variables REAL(KIND=C_DOUBLE) :: coeff2,r INTEGER(KIND=C_INT) :: n,nb,neq DO n=1,Ng r = 0. DO nb=1,Np DO neq= 1,Neq_max r = r + basis(nb,n) * ro(neq,nb) ENDDO ENDDO coeff2 = r + coeff DO nb=1,Np DO neq = 1,Neq_max dro(neq,nb) = coeff2 + dro(neq,nb) ENDDO ENDDO ENDDO END SUBROUTINE loop_element_cpu !--------------------------------------------------------------! RECURSIVE SUBROUTINE copy_element_cpu_fortran(Neq_max,Np, & & ro_ptr,dro_ptr) BIND(C) INTEGER(KIND=C_INT),VALUE :: Neq_max,Np TYPE(C_PTR) :: ro_ptr,dro_ptr !Local variables REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER :: ro,dro CALL C_F_POINTER(ro_ptr,ro,[Neq_max,Np]) CALL C_F_POINTER(dro_ptr,dro,[Neq_max,Np]) CALL copy_element_cpu(ro,dro) END SUBROUTINE copy_element_cpu_fortran !--------------------------------------------------------------! RECURSIVE SUBROUTINE copy_element_cpu(ro,dro) REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: ro REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(IN) :: dro ro = ro + dro END SUBROUTINE copy_element_cpu END MODULE mod_compute starpu-1.4.9+dfsg/examples/fortran90/mod_interface.f90000066400000000000000000000043451507764646700225770ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! Copyright (C) 2015-2015 ONERA ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! ! Fortran module interface for StarPU initialization and element registration MODULE mod_interface INTERFACE FUNCTION starpu_my_init_c() BIND(C) USE iso_c_binding INTEGER(KIND=C_INT) :: starpu_my_init_c END FUNCTION starpu_my_init_c END INTERFACE INTERFACE SUBROUTINE starpu_register_element_c(Neq,Np,Ng,ro,dro,basis,ro_h,dro_h,basis_h) BIND(C) USE iso_c_binding INTEGER(KIND=C_INT),VALUE :: Neq,Np,Ng REAL(KIND=C_DOUBLE),DIMENSION(Neq,Np) :: ro,dro REAL(KIND=C_DOUBLE),DIMENSION(Np,Ng) :: basis TYPE(C_PTR), INTENT(OUT) :: ro_h, dro_h, basis_h END SUBROUTINE starpu_register_element_c END INTERFACE INTERFACE SUBROUTINE starpu_unregister_element_c( & ro_h,dro_h,basis_h) BIND(C) USE iso_c_binding TYPE(C_PTR), INTENT(IN) :: ro_h, dro_h, basis_h END SUBROUTINE starpu_unregister_element_c END INTERFACE INTERFACE SUBROUTINE starpu_loop_element_task_c(coeff, & ro_h,dro_h,basis_h) BIND(C) USE iso_c_binding REAL(KIND=C_DOUBLE),VALUE :: coeff TYPE(C_PTR), INTENT(IN) :: ro_h, dro_h, basis_h END SUBROUTINE starpu_loop_element_task_c END INTERFACE INTERFACE SUBROUTINE starpu_copy_element_task_c( & ro_h,dro_h) BIND(C) USE iso_c_binding TYPE(C_PTR), INTENT(IN) :: ro_h, dro_h END SUBROUTINE starpu_copy_element_task_c END INTERFACE END MODULE mod_interface starpu-1.4.9+dfsg/examples/fortran90/mod_types.f90000066400000000000000000000024371507764646700220030ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! Copyright (C) 2015-2015 ONERA ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! MODULE mod_types USE iso_c_binding TYPE type_numpar REAL(KIND=C_DOUBLE) :: coeff INTEGER(KIND=C_INT) :: Neq_max END TYPE type_numpar TYPE type_mesh_elt INTEGER(KIND=C_INT) :: Ng, Np REAL(KIND=C_DOUBLE),POINTER,DIMENSION(:,:) :: ro, dro REAL(KIND=C_DOUBLE),POINTER,DIMENSION(:,:) :: basis TYPE(C_PTR) :: ro_h, dro_h, basis_h END TYPE type_mesh_elt TYPE type_mesh TYPE(type_mesh_elt), POINTER, DIMENSION(:) :: elt END TYPE type_mesh END MODULE mod_types starpu-1.4.9+dfsg/examples/fortran90/starpu_mod.f90000066400000000000000000000077151507764646700221610ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! MODULE starpu_mod ! == starpu.h == ! starpu_conf_init INTERFACE SUBROUTINE starpu_conf_init(conf) BIND(C) USE iso_c_binding TYPE(C_PTR), VALUE :: conf END SUBROUTINE starpu_conf_init END INTERFACE ! starpu_init INTERFACE FUNCTION starpu_init(conf) BIND(C) USE iso_c_binding TYPE(C_PTR), VALUE :: conf INTEGER(KIND=C_INT) :: starpu_init END FUNCTION starpu_init END INTERFACE ! starpu_initialize ! starpu_pause INTERFACE SUBROUTINE starpu_pause() BIND(C) USE iso_c_binding END SUBROUTINE starpu_pause END INTERFACE ! starpu_resume INTERFACE SUBROUTINE starpu_resume() BIND(C) USE iso_c_binding END SUBROUTINE starpu_resume END INTERFACE ! starpu_shutdown INTERFACE SUBROUTINE starpu_shutdown() BIND(C) USE iso_c_binding END SUBROUTINE starpu_shutdown END INTERFACE ! starpu_topology_print ! starpu_asynchronous_copy_disabled INTERFACE SUBROUTINE starpu_asynchronous_copy_disabled() BIND(C) USE iso_c_binding END SUBROUTINE starpu_asynchronous_copy_disabled END INTERFACE ! starpu_asynchronous_cuda_copy_disabled INTERFACE SUBROUTINE starpu_asynchronous_cuda_copy_disabled() BIND(C) USE iso_c_binding END SUBROUTINE starpu_asynchronous_cuda_copy_disabled END INTERFACE ! starpu_asynchronous_opencl_copy_disabled INTERFACE SUBROUTINE starpu_asynchronous_opencl_copy_disabled() BIND(C) USE iso_c_binding END SUBROUTINE starpu_asynchronous_opencl_copy_disabled END INTERFACE ! starpu_display_stats INTERFACE SUBROUTINE starpu_display_stats() BIND(C) USE iso_c_binding END SUBROUTINE starpu_display_stats END INTERFACE ! starpu_get_version INTERFACE SUBROUTINE starpu_get_version(major,minor,release) BIND(C) USE iso_c_binding INTEGER(KIND=C_INT), INTENT(OUT) :: major,minor,release END SUBROUTINE starpu_get_version END INTERFACE ! starpu_cpu_worker_get_count INTERFACE FUNCTION starpu_cpu_worker_get_count() BIND(C) USE iso_c_binding INTEGER(KIND=C_INT) :: starpu_cpu_worker_get_count END FUNCTION starpu_cpu_worker_get_count END INTERFACE ! == starpu_task.h == ! starpu_tag_declare_deps ! starpu_tag_declare_deps_array ! starpu_task_declare_deps_array ! starpu_tag_wait ! starpu_tag_wait_array ! starpu_tag_notify_from_apps ! starpu_tag_restart ! starpu_tag_remove ! starpu_task_init ! starpu_task_clean ! starpu_task_create ! starpu_task_destroy ! starpu_task_set_destroy ! starpu_task_submit ! starpu_task_submit_to_ctx ! starpu_task_finished ! starpu_task_wait ! starpu_task_wait_for_all INTERFACE SUBROUTINE starpu_task_wait_for_all() BIND(C) USE iso_c_binding END SUBROUTINE starpu_task_wait_for_all END INTERFACE ! starpu_task_wait_for_n_submitted ! starpu_task_wait_for_all_in_ctx ! starpu_task_wait_for_n_submitted_in_ctx ! starpu_task_wait_for_no_ready ! starpu_task_nready ! starpu_task_nsubmitted ! starpu_codelet_init ! starpu_codelet_display_stats ! starpu_task_get_current ! starpu_parallel_task_barrier_init ! starpu_parallel_task_barrier_init_n ! starpu_task_dup ! starpu_task_set_implementation ! starpu_task_get_implementation END MODULE starpu_mod starpu-1.4.9+dfsg/examples/gl_interop/000077500000000000000000000000001507764646700177705ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/gl_interop/gl_interop.c000066400000000000000000000070101507764646700222740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example demonstrates how to use StarPU combined with OpenGL rendering, * which needs: * * - initializing GLUT first, * - enabling it at initialization, * - running the corresponding CUDA worker in the GLUT thread (here, the main * thread). */ #include #include #if (defined(STARPU_USE_CUDA) && defined(STARPU_OPENGL_RENDER)) #include void dummy(void *buffers[], void *cl_arg) { float *v = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); printf("Codelet running\n"); cudaMemsetAsync(v, 0, STARPU_VECTOR_GET_NX(buffers[0]) * sizeof(float), starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); printf("Codelet done\n"); } struct starpu_codelet cl = { .cuda_funcs = { dummy }, .nbuffers = 1, .modes = { STARPU_W }, }; void foo(void) { } void display(float i) { glClear(GL_COLOR_BUFFER_BIT); glColor3f(1, 1, 1); glBegin(GL_LINES); glVertex2f(-i, -i); glVertex2f(i, i); glEnd(); glFinish(); glutPostRedisplay(); glutMainLoopEvent(); } void callback_func(void *foo) { printf("Callback running, rendering\n"); float i = 1.; while (i > 0) { starpu_usleep(100000); display(i); i -= 0.1; } printf("rendering done\n"); /* Tell it was already the last submitted task */ starpu_drivers_request_termination(); } #endif int main(int argc, char **argv) { #if !(defined(STARPU_USE_CUDA) && defined(STARPU_OPENGL_RENDER)) return 77; #else struct starpu_conf conf; int cuda_device = 0; int cuda_devices[] = { cuda_device }; struct starpu_driver drivers[] = { { .type = STARPU_CUDA_WORKER, .id.cuda_id = cuda_device } }; int ret; struct starpu_task *task; starpu_data_handle_t handle; glutInit(&argc, argv); glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB); glutInitWindowPosition(0, 0); glutInitWindowSize(300,200); glutCreateWindow("StarPU OpenGL interoperability test"); glClearColor (0.5, 0.5, 0.5, 0.0); /* Enable OpenGL interoperability */ starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncuda = 1; conf.cuda_opengl_interoperability = cuda_devices; conf.n_cuda_opengl_interoperability = sizeof(cuda_devices) / sizeof(*cuda_devices); conf.not_launched_drivers = drivers; conf.n_not_launched_drivers = sizeof(drivers) / sizeof(*drivers); ret = starpu_init(&conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_vector_data_register(&handle, -1, 0, 10, sizeof(float)); /* Submit just one dumb task */ task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; task->callback_func = callback_func; task->callback_arg = NULL; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* And run the driver inside main, which will run the task */ printf("running the driver\n"); starpu_driver_run(&drivers[0]); printf("finished running the driver\n"); starpu_shutdown(); return 0; #endif } starpu-1.4.9+dfsg/examples/gl_interop/gl_interop_idle.c000066400000000000000000000077721507764646700233100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example demonstrates how to use StarPU combined with OpenGL rendering, * which needs: * * - initializing GLUT first, * - enabling it at initialization, * - running the corresponding CUDA worker in the GLUT thread (here, the main * thread). * * The difference with gl_interop.c is that this version runs StarPU Tasks in * the glut idle handler. */ #include #include #if (defined(STARPU_USE_CUDA) && defined(STARPU_OPENGL_RENDER)) #include void dummy(void *buffers[], void *cl_arg) { float *v = (float *) STARPU_VECTOR_GET_PTR(buffers[0]); printf("Codelet running\n"); cudaMemsetAsync(v, 0, STARPU_VECTOR_GET_NX(buffers[0]) * sizeof(float), starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); printf("Codelet done\n"); } struct starpu_codelet cl = { .cuda_funcs = { dummy }, .nbuffers = 1, .modes = { STARPU_W }, }; void foo(void) { } void display(float i) { glClear(GL_COLOR_BUFFER_BIT); glColor3f(1, 1, 1); glBegin(GL_LINES); glVertex2f(-i, -i); glVertex2f(i, i); glEnd(); glFinish(); glutPostRedisplay(); } static int cuda_devices[] = { 0 }; static struct starpu_driver drivers[] = { { .type = STARPU_CUDA_WORKER } }; void callback_func(void *foo) { printf("Callback running, rendering\n"); float i = 1.; while (i > 0) { starpu_usleep(100000); display(i); i -= 0.1; } printf("rendering done\n"); /* Tell it was already the last submitted task */ starpu_drivers_request_termination(); /* And terminate StarPU */ starpu_driver_deinit(&drivers[0]); starpu_shutdown(); exit(0); } static void idle(void) { starpu_driver_run_once(&drivers[0]); } #endif int main(int argc, char **argv) { #if !(defined(STARPU_USE_CUDA) && defined(STARPU_OPENGL_RENDER)) return 77; #else struct starpu_conf conf; int ret; struct starpu_task *task; starpu_data_handle_t handle; int cuda_device = 0; cuda_devices[0] = cuda_device; drivers[0].id.cuda_id = cuda_device; glutInit(&argc, argv); glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB); glutInitWindowPosition(0, 0); glutInitWindowSize(300,200); glutCreateWindow("StarPU OpenGL interoperability test"); glClearColor (0.5, 0.5, 0.5, 0.0); /* Enable OpenGL interoperability */ starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncuda = 1; conf.cuda_opengl_interoperability = cuda_devices; conf.n_cuda_opengl_interoperability = sizeof(cuda_devices) / sizeof(*cuda_devices); conf.not_launched_drivers = drivers; conf.n_not_launched_drivers = sizeof(drivers) / sizeof(*drivers); ret = starpu_init(&conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_vector_data_register(&handle, -1, 0, 10, sizeof(float)); /* Submit just one dumb task */ task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; task->callback_func = callback_func; task->callback_arg = NULL; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* And run the driver inside main, which will run the task */ printf("running the driver\n"); /* Initialize it */ starpu_driver_init(&drivers[0]); /* Register driver loop content as idle handler */ glutIdleFunc(idle); /* Now run the glut loop */ glutMainLoop(); /* And deinitialize driver */ starpu_driver_deinit(&drivers[0]); printf("finished running the driver\n"); starpu_shutdown(); return 0; #endif } starpu-1.4.9+dfsg/examples/heat/000077500000000000000000000000001507764646700165475ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/heat/dw_factolu.c000066400000000000000000000565421507764646700210560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This implements an LU factorization. * The task graph is submitted through continuation: the rest of the graph is * submitted as appropriate in the tasks' callback. */ #include "dw_factolu.h" #ifdef STARPU_HAVE_HELGRIND_H #include #endif #ifndef ANNOTATE_HAPPENS_BEFORE #define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) #endif #ifndef ANNOTATE_HAPPENS_AFTER #define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) #endif #if 0 #define debug(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__) #else #define debug(fmt, ...) #endif struct starpu_perfmodel model_getrf; struct starpu_perfmodel model_trsm_ll; struct starpu_perfmodel model_trsm_ru; struct starpu_perfmodel model_gemm; static unsigned *advance_11; /* size nblocks, whether the 11 task is done */ static unsigned *advance_12_21; /* size nblocks*nblocks */ static unsigned *advance_22; /* array of nblocks *nblocks*nblocks */ static double start; static double end; static unsigned no_prio = 0; static struct starpu_codelet cl_getrf = { .cpu_funcs = {dw_cpu_codelet_update_getrf}, .cpu_funcs_name = {"dw_cpu_codelet_update_getrf"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_getrf}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .model = &model_getrf }; static struct starpu_codelet cl_trsm_ll = { .cpu_funcs = {dw_cpu_codelet_update_trsm_ll}, .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ll"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_trsm_ll}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &model_trsm_ll }; static struct starpu_codelet cl_trsm_ru = { .cpu_funcs = {dw_cpu_codelet_update_trsm_ru}, .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ru"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_trsm_ru}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &model_trsm_ru }; static struct starpu_codelet cl_gemm = { .cpu_funcs = {dw_cpu_codelet_update_gemm}, .cpu_funcs_name = {"dw_cpu_codelet_update_gemm"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_gemm}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW}, .model = &model_gemm }; #define STARTED 0x01 #define DONE 0x11 /* * Upgraded Callbacks : break the pipeline design ! */ void dw_callback_v2_codelet_update_gemm(void *argcb) { int ret; cl_args *args = argcb; unsigned k = args->k; unsigned i = args->i; unsigned j = args->j; unsigned nblocks = args->nblocks; debug("ugemm %d %d %d\n", k, i, j); /* we did task 22k,i,j */ advance_22[k*nblocks*nblocks + i + j*nblocks] = DONE; if ((i == j) && (i == k+1)) { /* we now reduce the LU22 part (recursion appears there) */ cl_args *ugetrfarg = malloc(sizeof(cl_args)); struct starpu_task *task = starpu_task_create(); task->callback_func = dw_callback_v2_codelet_update_getrf; task->callback_arg = ugetrfarg; task->cl = &cl_getrf; task->cl_arg = ugetrfarg; task->cl_arg_size = sizeof(*ugetrfarg); task->handles[0] = starpu_data_get_sub_data(args->dataA, 2, k+1, k+1); ugetrfarg->dataA = args->dataA; ugetrfarg->i = k + 1; ugetrfarg->nblocks = args->nblocks; /* schedule the codelet */ if (!no_prio) task->priority = STARPU_MAX_PRIO; debug("ugemm %d %d %d start ugetrf %d\n", k, i, j, k + 1); ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* 11k+1 + 22k,k+1,j => 21 k+1,j */ if (i == k + 1 && j > k + 1) { uint8_t dep; /* 11 k+1*/ dep = advance_11[(k+1)]; if (dep & DONE) { /* try to push the task */ uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1) + j*nblocks], STARTED); if ((u & STARTED) == 0) { /* we are the only one that should launch that task */ cl_args *utrsmrua = malloc(sizeof(cl_args)); struct starpu_task *task_trsm_ru = starpu_task_create(); task_trsm_ru->callback_func = dw_callback_v2_codelet_update_trsm_ru; task_trsm_ru->callback_arg = utrsmrua; task_trsm_ru->cl = &cl_trsm_ru; task_trsm_ru->cl_arg = utrsmrua; task_trsm_ru->cl_arg_size = sizeof(*utrsmrua); utrsmrua->i = k+1; utrsmrua->k = j; utrsmrua->nblocks = args->nblocks; utrsmrua->dataA = args->dataA; task_trsm_ru->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->i); task_trsm_ru->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->k); debug("ugemm %d %d %d start utrsmru %d %d\n", k, i, j, k+1, j); ret = starpu_task_submit(task_trsm_ru); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } } /* 11k + 22k-1,i,k => 12 k,i */ if (j == k + 1 && i > k + 1) { uint8_t dep; /* 11 k+1*/ dep = advance_11[(k+1)]; if (dep & DONE) { /* try to push the task */ uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[(k+1)*nblocks + i], STARTED); if ((u & STARTED) == 0) { /* we are the only one that should launch that task */ cl_args *utrsmlla = malloc(sizeof(cl_args)); struct starpu_task *task_trsm_ll = starpu_task_create(); task_trsm_ll->callback_func = dw_callback_v2_codelet_update_trsm_ll; task_trsm_ll->callback_arg = utrsmlla; task_trsm_ll->cl = &cl_trsm_ll; task_trsm_ll->cl_arg = utrsmlla; task_trsm_ll->cl_arg_size = sizeof(*utrsmlla); utrsmlla->i = k+1; utrsmlla->k = i; utrsmlla->nblocks = args->nblocks; utrsmlla->dataA = args->dataA; task_trsm_ll->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->i, utrsmlla->i); task_trsm_ll->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->k, utrsmlla->i); debug("ugemm %d %d %d start utrsmll %d %d\n", k, i, j, k+1, i); ret = starpu_task_submit(task_trsm_ll); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } } free(args); } void dw_callback_v2_codelet_update_trsm_ll(void *argcb) { int ret; cl_args *args = argcb; /* now launch the update of LU22 */ unsigned i = args->i; unsigned k = args->k; unsigned nblocks = args->nblocks; debug("utrsmll %d %d\n", i, k); /* we did task 21i,k */ advance_12_21[i*nblocks + k] = DONE; unsigned slicey; for (slicey = i+1; slicey < nblocks; slicey++) { /* can we launch 22 i,args->k,slicey ? */ /* deps : 21 args->k, slicey */ uint8_t dep; dep = advance_12_21[i + slicey*nblocks]; if (dep & DONE) { /* perhaps we may schedule the 22 i,args->k,slicey task */ uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + slicey*nblocks + k], STARTED); if ((u & STARTED) == 0) { /* update that square matrix */ cl_args *ugemma = malloc(sizeof(cl_args)); struct starpu_task *task_gemm = starpu_task_create(); task_gemm->callback_func = dw_callback_v2_codelet_update_gemm; task_gemm->callback_arg = ugemma; task_gemm->cl = &cl_gemm; task_gemm->cl_arg = ugemma; task_gemm->cl_arg_size = sizeof(*ugemma); ugemma->k = i; ugemma->i = k; ugemma->j = slicey; ugemma->dataA = args->dataA; ugemma->nblocks = nblocks; task_gemm->handles[0] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->k); task_gemm->handles[1] = starpu_data_get_sub_data(args->dataA, 2, ugemma->k, ugemma->j); task_gemm->handles[2] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->j); /* schedule that codelet */ if (!no_prio && (slicey == i+1)) task_gemm->priority = STARPU_MAX_PRIO; debug("utrsmll %d %d start ugemm %d %d %d\n", i, k, i, k, slicey); ret = starpu_task_submit(task_gemm); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } } free(argcb); } void dw_callback_v2_codelet_update_trsm_ru(void *argcb) { int ret; cl_args *args = argcb; /* now launch the update of LU22 */ unsigned i = args->i; unsigned k = args->k; unsigned nblocks = args->nblocks; /* we did task 21i,k */ advance_12_21[i + k*nblocks] = DONE; debug("utrsmru %d %d\n", i, k); unsigned slicex; for (slicex = i+1; slicex < nblocks; slicex++) { /* can we launch 22 i,slicex,k ? */ /* deps : 12 slicex k */ uint8_t dep; dep = advance_12_21[i*nblocks + slicex]; if (dep & DONE) { /* perhaps we may schedule the 22 i,args->k,slicey task */ uint8_t u = STARPU_ATOMIC_OR(&advance_22[i*nblocks*nblocks + k*nblocks + slicex], STARTED); if ((u & STARTED) == 0) { /* update that square matrix */ cl_args *ugemma = malloc(sizeof(cl_args)); struct starpu_task *task_gemm = starpu_task_create(); task_gemm->callback_func = dw_callback_v2_codelet_update_gemm; task_gemm->callback_arg = ugemma; task_gemm->cl = &cl_gemm; task_gemm->cl_arg = ugemma; task_gemm->cl_arg_size = sizeof(*ugemma); ugemma->k = i; ugemma->i = slicex; ugemma->j = k; ugemma->dataA = args->dataA; ugemma->nblocks = nblocks; task_gemm->handles[0] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->k); task_gemm->handles[1] = starpu_data_get_sub_data(args->dataA, 2, ugemma->k, ugemma->j); task_gemm->handles[2] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->j); /* schedule that codelet */ if (!no_prio && (slicex == i+1)) task_gemm->priority = STARPU_MAX_PRIO; debug("utrsmru %d %d start ugemm %d %d %d\n", i, k, i, slicex, k); ret = starpu_task_submit(task_gemm); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } } free(argcb); } void dw_callback_v2_codelet_update_getrf(void *argcb) { /* in case there remains work, go on */ cl_args *args = argcb; unsigned nblocks = args->nblocks; unsigned i = args->i; debug("ugetrf %d\n", i); /* we did task 11k */ advance_11[i] = DONE; if (i == nblocks - 1) { /* we are done */ free(argcb); return; } else { /* put new tasks */ unsigned slice; for (slice = i + 1; slice < nblocks; slice++) { /* can we launch 12i,slice ? */ uint8_t deps12; if (i == 0) { deps12 = DONE; } else { deps12 = advance_22[(i-1)*nblocks*nblocks + slice + i*nblocks]; } if (deps12 & DONE) { /* we may perhaps launch the task 12i,slice */ uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i*nblocks + slice], STARTED); if ((u & STARTED) == 0) { int ret; /* we are the only one that should launch that task */ cl_args *utrsmlla = malloc(sizeof(cl_args)); struct starpu_task *task_trsm_ll = starpu_task_create(); task_trsm_ll->callback_func = dw_callback_v2_codelet_update_trsm_ll; task_trsm_ll->callback_arg = utrsmlla; task_trsm_ll->cl = &cl_trsm_ll; task_trsm_ll->cl_arg = utrsmlla; task_trsm_ll->cl_arg_size = sizeof(*utrsmlla); utrsmlla->i = i; utrsmlla->k = slice; utrsmlla->nblocks = args->nblocks; utrsmlla->dataA = args->dataA; task_trsm_ll->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->i, utrsmlla->i); task_trsm_ll->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->k, utrsmlla->i); if (!no_prio && (slice == i +1)) task_trsm_ll->priority = STARPU_MAX_PRIO; debug("ugetrf %d start utrsmll %d %d\n", i, i, slice); ret = starpu_task_submit(task_trsm_ll); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } /* can we launch 21i,slice ? */ if (i == 0) { deps12 = DONE; } else { deps12 = advance_22[(i-1)*nblocks*nblocks + slice*nblocks + i]; } if (deps12 & DONE) { /* we may perhaps launch the task 12i,slice */ uint8_t u = STARPU_ATOMIC_OR(&advance_12_21[i + slice*nblocks], STARTED); if ((u & STARTED) == 0) { int ret; /* we are the only one that should launch that task */ cl_args *utrsmrua = malloc(sizeof(cl_args)); struct starpu_task *task_trsm_ru = starpu_task_create(); task_trsm_ru->callback_func = dw_callback_v2_codelet_update_trsm_ru; task_trsm_ru->callback_arg = utrsmrua; task_trsm_ru->cl = &cl_trsm_ru; task_trsm_ru->cl_arg = utrsmrua; task_trsm_ru->cl_arg_size = sizeof(*utrsmrua); utrsmrua->i = i; utrsmrua->k = slice; utrsmrua->nblocks = args->nblocks; utrsmrua->dataA = args->dataA; task_trsm_ru->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->i); task_trsm_ru->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->k); if (!no_prio && (slice == i +1)) task_trsm_ru->priority = STARPU_MAX_PRIO; debug("ugetrf %d start utrsmru %d %d\n", i, i, slice); ret = starpu_task_submit(task_trsm_ru); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } } } free(argcb); } /* * Callbacks */ void dw_callback_codelet_update_getrf(void *argcb) { /* in case there remains work, go on */ cl_args *args = argcb; if (args->i == args->nblocks - 1) { /* we are done */ free(argcb); return; } else { /* put new tasks */ unsigned nslices; nslices = args->nblocks - 1 - args->i; unsigned *remaining = malloc(sizeof(unsigned)); *remaining = 2*nslices; unsigned slice; for (slice = args->i + 1; slice < args->nblocks; slice++) { int ret; /* update slice from utrsmll */ cl_args *utrsmlla = malloc(sizeof(cl_args)); /* update slice from utrsmru */ cl_args *utrsmrua = malloc(sizeof(cl_args)); struct starpu_task *task_trsm_ll = starpu_task_create(); task_trsm_ll->callback_func = dw_callback_codelet_update_trsm_ll_21; task_trsm_ll->callback_arg = utrsmlla; task_trsm_ll->cl = &cl_trsm_ll; task_trsm_ll->cl_arg = utrsmlla; task_trsm_ll->cl_arg_size = sizeof(*utrsmlla); struct starpu_task *task_trsm_ru = starpu_task_create(); task_trsm_ru->callback_func = dw_callback_codelet_update_trsm_ll_21; task_trsm_ru->callback_arg = utrsmrua; task_trsm_ru->cl = &cl_trsm_ru; task_trsm_ru->cl_arg = utrsmrua; task_trsm_ru->cl_arg_size = sizeof(*utrsmrua); utrsmlla->i = args->i; utrsmlla->k = slice; utrsmlla->nblocks = args->nblocks; utrsmlla->dataA = args->dataA; utrsmlla->remaining = remaining; utrsmrua->i = args->i; utrsmrua->k = slice; utrsmrua->nblocks = args->nblocks; utrsmrua->dataA = args->dataA; utrsmrua->remaining = remaining; task_trsm_ll->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->i, utrsmlla->i); task_trsm_ll->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmlla->k, utrsmlla->i); task_trsm_ru->handles[0] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->i); task_trsm_ru->handles[1] = starpu_data_get_sub_data(args->dataA, 2, utrsmrua->i, utrsmrua->k); ret = starpu_task_submit(task_trsm_ll); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task_trsm_ru); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } free(remaining); } } void dw_callback_codelet_update_gemm(void *argcb) { cl_args *args = argcb; unsigned remaining = STARPU_ATOMIC_ADD(args->remaining, (-1)); ANNOTATE_HAPPENS_BEFORE(args->remaining); if (remaining == 0) { int ret; ANNOTATE_HAPPENS_AFTER(args->remaining); /* all worker already used the counter */ free(args->remaining); /* we now reduce the LU22 part (recursion appears there) */ cl_args *ugetrfarg = malloc(sizeof(cl_args)); struct starpu_task *task = starpu_task_create(); task->callback_func = dw_callback_codelet_update_getrf; task->callback_arg = ugetrfarg; task->cl = &cl_getrf; task->cl_arg = ugetrfarg; task->cl_arg_size = sizeof(*ugetrfarg); task->handles[0] = starpu_data_get_sub_data(args->dataA, 2, args->k + 1, args->k + 1); ugetrfarg->dataA = args->dataA; ugetrfarg->i = args->k + 1; ugetrfarg->nblocks = args->nblocks; /* schedule the codelet */ ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } free(args); } void dw_callback_codelet_update_trsm_ll_21(void *argcb) { cl_args *args = argcb; unsigned remaining = STARPU_ATOMIC_ADD(args->remaining, -1); ANNOTATE_HAPPENS_BEFORE(args->remaining); if (remaining == 0) { ANNOTATE_HAPPENS_AFTER(args->remaining); /* now launch the update of LU22 */ unsigned i = args->i; unsigned nblocks = args->nblocks; /* the number of tasks to be done */ unsigned *remaining_tasks = malloc(sizeof(unsigned)); *remaining_tasks = (nblocks - 1 - i)*(nblocks - 1 - i); unsigned slicey, slicex; for (slicey = i+1; slicey < nblocks; slicey++) { for (slicex = i+1; slicex < nblocks; slicex++) { int ret; /* update that square matrix */ cl_args *ugemma = malloc(sizeof(cl_args)); struct starpu_task *task_gemm = starpu_task_create(); task_gemm->callback_func = dw_callback_codelet_update_gemm; task_gemm->callback_arg = ugemma; task_gemm->cl = &cl_gemm; task_gemm->cl_arg = ugemma; task_gemm->cl_arg_size = sizeof(*ugemma); ugemma->k = i; ugemma->i = slicex; ugemma->j = slicey; ugemma->dataA = args->dataA; ugemma->nblocks = nblocks; ugemma->remaining = remaining_tasks; task_gemm->handles[0] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->k); task_gemm->handles[1] = starpu_data_get_sub_data(args->dataA, 2, ugemma->k, ugemma->j); task_gemm->handles[2] = starpu_data_get_sub_data(args->dataA, 2, ugemma->i, ugemma->j); /* schedule that codelet */ ret = starpu_task_submit(task_gemm); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } free(remaining_tasks); } } /* * code to bootstrap the factorization */ void dw_codelet_facto(starpu_data_handle_t dataA, unsigned nblocks) { int ret; cl_args *args = malloc(sizeof(cl_args)); args->i = 0; args->nblocks = nblocks; args->dataA = dataA; start = starpu_timing_now(); /* inject a new task with this codelet into the system */ struct starpu_task *task = starpu_task_create(); task->callback_func = dw_callback_codelet_update_getrf; task->callback_arg = args; task->cl = &cl_getrf; task->cl_arg = args; task->handles[0] = starpu_data_get_sub_data(dataA, 2, 0, 0); /* schedule the codelet */ ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); end = starpu_timing_now(); double timing = end - start; unsigned n = starpu_matrix_get_nx(dataA); double flop = (2.0f*n*n*n)/3.0f; PRINTF("# size\tms\tGFlop/s\n"); PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f); } void dw_codelet_facto_v2(starpu_data_handle_t dataA, unsigned nblocks) { advance_11 = calloc(nblocks, sizeof(*advance_11)); STARPU_ASSERT(advance_11); advance_12_21 = calloc(nblocks*nblocks, sizeof(*advance_12_21)); STARPU_ASSERT(advance_12_21); advance_22 = calloc(nblocks*nblocks*nblocks, sizeof(*advance_22)); STARPU_ASSERT(advance_22); cl_args *args = calloc(1, sizeof(cl_args)); args->i = 0; args->nblocks = nblocks; args->dataA = dataA; start = starpu_timing_now(); /* inject a new task with this codelet into the system */ struct starpu_task *task = starpu_task_create(); task->callback_func = dw_callback_v2_codelet_update_getrf; task->callback_arg = args; task->cl = &cl_getrf; task->cl_arg = args; task->cl_arg_size = sizeof(*args); task->handles[0] = starpu_data_get_sub_data(dataA, 2, 0, 0); /* schedule the codelet */ int ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } starpu_task_wait_for_all(); end = starpu_timing_now(); double timing = end - start; unsigned n = starpu_matrix_get_nx(dataA); double flop = (2.0f*n*n*n)/3.0f; PRINTF("# size\tms\tGFlop/s\n"); PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, flop/timing/1000.0f); free(advance_11); free(advance_12_21); free(advance_22); } void initialize_system(float **A, float **B, unsigned dim, unsigned pinned) { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_ATLAS char * symbol_getrf = "lu_model_getrf_atlas"; char * symbol_trsm_ll = "lu_model_trsm_ll_atlas"; char * symbol_trsm_ru = "lu_model_trsm_ru_atlas"; char * symbol_gemm = "lu_model_gemm_atlas"; #elif defined(STARPU_GOTO) char * symbol_getrf = "lu_model_getrf_goto"; char * symbol_trsm_ll = "lu_model_trsm_ll_goto"; char * symbol_trsm_ru = "lu_model_trsm_ru_goto"; char * symbol_gemm = "lu_model_gemm_goto"; #elif defined(STARPU_OPENBLAS) char * symbol_getrf = "lu_model_getrf_openblas"; char * symbol_trsm_ll = "lu_model_trsm_ll_openblas"; char * symbol_trsm_ru = "lu_model_trsm_ru_openblas"; char * symbol_gemm = "lu_model_gemm_openblas"; #else char * symbol_getrf = "lu_model_getrf"; char * symbol_trsm_ll = "lu_model_trsm_ll"; char * symbol_trsm_ru = "lu_model_trsm_ru"; char * symbol_gemm = "lu_model_gemm"; #endif initialize_lu_kernels_model(&model_getrf,symbol_getrf,task_getrf_cost,task_getrf_cost_cpu,task_getrf_cost_cuda); initialize_lu_kernels_model(&model_trsm_ll,symbol_trsm_ll,task_trsm_ll_cost,task_trsm_ll_cost_cpu,task_trsm_ll_cost_cuda); initialize_lu_kernels_model(&model_trsm_ru,symbol_trsm_ru,task_trsm_ru_cost,task_trsm_ru_cost_cpu,task_trsm_ru_cost_cuda); initialize_lu_kernels_model(&model_gemm,symbol_gemm,task_gemm_cost,task_gemm_cost_cpu,task_gemm_cost_cuda); starpu_cublas_init(); if (pinned) { starpu_malloc((void **)A, (size_t)dim*dim*sizeof(float)); starpu_malloc((void **)B, (size_t)dim*sizeof(float)); } else { *A = malloc((size_t)dim*dim*sizeof(float)); STARPU_ASSERT(*A); *B = malloc((size_t)dim*sizeof(float)); STARPU_ASSERT(*B); } } void free_system(float *A, float *B, unsigned dim, unsigned pinned) { if (pinned) { starpu_free_noflag(A, (size_t)dim*dim*sizeof(float)); starpu_free_noflag(B, (size_t)dim*sizeof(float)); } else { free(A); free(B); } } void dw_factoLU(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned version, unsigned _no_prio) { #ifdef CHECK_RESULTS FPRINTF(stderr, "Checking results ...\n"); float *Asaved; Asaved = malloc((size_t)ld*ld*sizeof(float)); memcpy(Asaved, matA, (size_t)ld*ld*sizeof(float)); #endif no_prio = _no_prio; starpu_data_handle_t dataA; /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float)); struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nblocks }; struct starpu_data_filter f2 = { .filter_func = starpu_matrix_filter_block, .nchildren = nblocks }; starpu_data_map_filters(dataA, 2, &f, &f2); switch (version) { case 1: dw_codelet_facto(dataA, nblocks); break; default: case 2: dw_codelet_facto_v2(dataA, nblocks); break; } /* gather all the data */ starpu_data_unpartition(dataA, STARPU_MAIN_RAM); starpu_data_unregister(dataA); #ifdef CHECK_RESULTS compare_A_LU(Asaved, matA, size, ld); #endif } starpu-1.4.9+dfsg/examples/heat/dw_factolu.h000066400000000000000000000113261507764646700210520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DW_FACTO_LU_H__ #define __DW_FACTO_LU_H__ #include #include #include #include #ifdef STARPU_USE_CUDA #include #include #endif #include "../common/blas.h" #include "lu_kernels_model.h" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) #define BLAS3_FLOP(n1,n2,n3) \ (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) typedef struct { starpu_data_handle_t dataA; unsigned i; unsigned j; unsigned k; unsigned nblocks; unsigned *remaining; } cl_args; #ifdef CHECK_RESULTS static void compare_A_LU(float *A, float *LU, unsigned size, unsigned ld) { unsigned i,j; float *L; float *U; L = malloc(size*size*sizeof(float)); U = malloc(size*size*sizeof(float)); memset(L, 0, size*size*sizeof(float)); memset(U, 0, size*size*sizeof(float)); /* only keep the lower part */ for (j = 0; j < size; j++) { for (i = 0; i < j; i++) { L[j+i*size] = LU[j+i*ld]; } /* diag i = j */ L[j+j*size] = LU[j+j*ld]; U[j+j*size] = 1.0f; for (i = j+1; i < size; i++) { U[j+i*size] = LU[j+i*ld]; } } #if 0 /* display L */ FPRINTF(stdout, "(LU): \n"); for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { /* if (i <= j) { */ FPRINTF(stdout, "%2.2f\t", LU[j +i*size]); /* } else { FPRINTF(stdout, ".\t"); } */ } FPRINTF(stdout, "\n"); } /* display L */ FPRINTF(stdout, "L: \n"); for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { /* if (i <= j) { */ FPRINTF(stdout, "%2.2f\t", L[j +i*size]); /* } else { FPRINTF(stdout, ".\t"); } */ } FPRINTF(stdout, "\n"); } /* display U */ FPRINTF(stdout, "U: \n"); for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { /* if (i <= j) { */ FPRINTF(stdout, "%2.2f\t", U[j +i*size]); /* } else { FPRINTF(stdout, ".\t"); } */ } FPRINTF(stdout, "\n"); } #endif /* now A_err = L, compute L*U */ STARPU_STRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size); float max_err = 0.0f; for (i = 0; i < size ; i++) { for (j = 0; j < size; j++) { max_err = STARPU_MAX(max_err, fabs(L[j+i*size] - A[j+i*ld])); } } #if 0 /* display A */ FPRINTF(stdout, "A: \n"); for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { /* if (i <= j) { */ FPRINTF(stdout, "%2.2f\t", A[j +i*size]); /* } else { FPRINTF(stdout, ".\t"); } */ } FPRINTF(stdout, "\n"); } /* display LU */ FPRINTF(stdout, "LU: \n"); for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { /* if (i <= j) { */ FPRINTF(stdout, "%2.2f\t", L[j +i*size]); /* } else { FPRINTF(stdout, ".\t"); } */ } FPRINTF(stdout, "\n"); } #endif FPRINTF(stdout, "max error between A and L*U = %f \n", max_err); } #endif /* CHECK_RESULTS */ void dw_cpu_codelet_update_getrf(void **, void *); void dw_cpu_codelet_update_trsm_ll(void **, void *); void dw_cpu_codelet_update_trsm_ru(void **, void *); void dw_cpu_codelet_update_gemm(void **, void *); #ifdef STARPU_USE_CUDA void dw_cublas_codelet_update_getrf(void *descr[], void *_args); void dw_cublas_codelet_update_trsm_ll(void *descr[], void *_args); void dw_cublas_codelet_update_trsm_ru(void *descr[], void *_args); void dw_cublas_codelet_update_gemm(void *descr[], void *_args); #endif void dw_callback_codelet_update_getrf(void *); void dw_callback_codelet_update_trsm_ll_21(void *); void dw_callback_codelet_update_gemm(void *); void dw_callback_v2_codelet_update_getrf(void *); void dw_callback_v2_codelet_update_trsm_ll(void *); void dw_callback_v2_codelet_update_trsm_ru(void *); void dw_callback_v2_codelet_update_gemm(void *); extern struct starpu_perfmodel model_getrf; extern struct starpu_perfmodel model_trsm_ll; extern struct starpu_perfmodel model_trsm_ru; extern struct starpu_perfmodel model_gemm; #endif /* __DW_FACTO_LU_H__ */ starpu-1.4.9+dfsg/examples/heat/dw_factolu_grain.c000066400000000000000000000241301507764646700222220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This implements an LU factorization. * The task graph is submitted through dependency tags. * It also changes the partitioning during execution: when called first, * dw_factoLU_grain_inner splits the matrix with a big granularity (nblocks) * and processes nbigblocks blocks, before calling itself again, to process the * remainder of the matrix with a smaller granularity. */ #include "dw_factolu.h" #define TAG_GETRF(k, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | (1ULL<<56) | (unsigned long long)(k))) #define TAG_TRSM_LL(k,i, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | ((2ULL<<56) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(i)))) #define TAG_TRSM_RU(k,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | ((3ULL<<56) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_GEMM(k,i,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | ((4ULL<<56) | ((unsigned long long)(k)<<32) \ | ((unsigned long long)(i)<<16) \ | (unsigned long long)(j)))) /* * Construct the DAG */ static struct starpu_task *create_task(starpu_tag_t id) { struct starpu_task *task = starpu_task_create(); task->cl_arg = NULL; task->use_tag = 1; task->tag_id = id; return task; } static struct starpu_codelet cl_getrf = { .modes = { STARPU_RW }, .cpu_funcs = {dw_cpu_codelet_update_getrf}, .cpu_funcs_name = {"dw_cpu_codelet_update_getrf"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_getrf}, #endif .nbuffers = 1, .model = &model_getrf }; static struct starpu_task *create_task_getrf(starpu_data_handle_t dataA, unsigned k, unsigned tag_prefix) { /* FPRINTF(stdout, "task 11 k = %d TAG = %llx\n", k, (TAG_GETRF(k))); */ struct starpu_task *task = create_task(TAG_GETRF(k, tag_prefix)); task->cl = &cl_getrf; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); /* this is an important task */ task->priority = STARPU_MAX_PRIO; /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GETRF(k, tag_prefix), 1, TAG_GEMM(k-1, k, k, tag_prefix)); } return task; } static struct starpu_codelet cl_trsm_ll = { .modes = { STARPU_R, STARPU_RW }, .cpu_funcs = {dw_cpu_codelet_update_trsm_ll}, .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ll"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_trsm_ll}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .model = &model_trsm_ll }; static void create_task_trsm_ll(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned tag_prefix) { int ret; /* FPRINTF(stdout, "task 12 k,i = %d,%d TAG = %llx\n", k,i, TAG_TRSM_LL(k,i)); */ struct starpu_task *task = create_task(TAG_TRSM_LL(k, i, tag_prefix)); task->cl = &cl_trsm_ll; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, i, k); if (i == k+1) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_LL(k, i, tag_prefix), 2, TAG_GETRF(k, tag_prefix), TAG_GEMM(k-1, i, k, tag_prefix)); } else { starpu_tag_declare_deps(TAG_TRSM_LL(k, i, tag_prefix), 1, TAG_GETRF(k, tag_prefix)); } ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static struct starpu_codelet cl_trsm_ru = { .modes = { STARPU_R, STARPU_RW }, .cpu_funcs = {dw_cpu_codelet_update_trsm_ru}, .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ru"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_trsm_ru}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .model = &model_trsm_ru }; static void create_task_trsm_ru(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned tag_prefix) { int ret; struct starpu_task *task = create_task(TAG_TRSM_RU(k, j, tag_prefix)); task->cl = &cl_trsm_ru; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j); if (j == k+1) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_RU(k, j, tag_prefix), 2, TAG_GETRF(k, tag_prefix), TAG_GEMM(k-1, k, j, tag_prefix)); } else { starpu_tag_declare_deps(TAG_TRSM_RU(k, j, tag_prefix), 1, TAG_GETRF(k, tag_prefix)); } ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static struct starpu_codelet cl_gemm = { .modes = { STARPU_R, STARPU_R, STARPU_RW }, .cpu_funcs = {dw_cpu_codelet_update_gemm}, .cpu_funcs_name = {"dw_cpu_codelet_update_gemm"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_gemm}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .model = &model_gemm }; static void create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned tag_prefix) { int ret; /* FPRINTF(stdout, "task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); */ struct starpu_task *task = create_task(TAG_GEMM(k, i, j, tag_prefix)); task->cl = &cl_gemm; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, i, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j); task->handles[2] = starpu_data_get_sub_data(dataA, 2, i, j); if ((i == k + 1) && (j == k +1)) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GEMM(k, i, j, tag_prefix), 3, TAG_GEMM(k-1, i, j, tag_prefix), TAG_TRSM_LL(k, i, tag_prefix), TAG_TRSM_RU(k, j, tag_prefix)); } else { starpu_tag_declare_deps(TAG_GEMM(k, i, j, tag_prefix), 2, TAG_TRSM_LL(k, i, tag_prefix), TAG_TRSM_RU(k, j, tag_prefix)); } ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static void dw_factoLU_grain_inner(float *matA, unsigned size, unsigned inner_size, unsigned ld, unsigned blocksize, unsigned tag_prefix) { int ret; /* * (re)partition data */ starpu_data_handle_t dataA; starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float)); STARPU_ASSERT((size % blocksize) == 0); STARPU_ASSERT((inner_size % blocksize) == 0); unsigned nblocks = size / blocksize; unsigned maxk = inner_size / blocksize; struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nblocks }; struct starpu_data_filter f2 = { .filter_func = starpu_matrix_filter_block, .nchildren = nblocks }; starpu_data_map_filters(dataA, 2, &f, &f2); /* * submit tasks */ struct starpu_task *entry_task = NULL; /* create all the DAG nodes */ unsigned i,j,k; /* if maxk < nblocks we'll stop before the LU decomposition is totally done */ for (k = 0; k < maxk; k++) { struct starpu_task *task = create_task_getrf(dataA, k, tag_prefix); /* we defer the launch of the first task */ if (k == 0) { entry_task = task; } else { ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (i = k+1; i #endif #ifdef STARPU_USE_CUDA #include static const float p1 = 1.0; static const float m1 = -1.0; #endif unsigned count_getrf_per_worker[STARPU_NMAXWORKERS] = {0}; unsigned count_trsm_ll_per_worker[STARPU_NMAXWORKERS] = {0}; unsigned count_trsm_ru_per_worker[STARPU_NMAXWORKERS] = {0}; unsigned count_gemm_per_worker[STARPU_NMAXWORKERS] = {0}; unsigned count_total_per_worker[STARPU_NMAXWORKERS] = {0}; unsigned count_getrf_total = 0; unsigned count_trsm_ll_total = 0; unsigned count_trsm_ru_total = 0; unsigned count_gemm_total = 0; void display_stat_heat(void) { unsigned nworkers = starpu_worker_get_count(); FPRINTF(stderr, "STATS : \n"); unsigned worker; for (worker = 0; worker < nworkers; worker++) { count_total_per_worker[worker] = count_getrf_per_worker[worker] + count_trsm_ll_per_worker[worker] + count_trsm_ru_per_worker[worker] + count_gemm_per_worker[worker]; count_getrf_total += count_getrf_per_worker[worker]; count_trsm_ll_total += count_trsm_ll_per_worker[worker]; count_trsm_ru_total += count_trsm_ru_per_worker[worker]; count_gemm_total += count_gemm_per_worker[worker]; } FPRINTF(stderr, "\t11 (diagonal block LU)\n"); for (worker = 0; worker < nworkers; worker++) { if (count_total_per_worker[worker]) { char name[64]; starpu_worker_get_name(worker, name, sizeof(name)); FPRINTF(stderr, "\t\t%s -> %u / %u (%2.2f %%)\n", name, count_getrf_per_worker[worker], count_getrf_total, (100.0*count_getrf_per_worker[worker])/count_getrf_total); } } FPRINTF(stderr, "\t12 (TRSM)\n"); for (worker = 0; worker < nworkers; worker++) { if (count_total_per_worker[worker]) { char name[64]; starpu_worker_get_name(worker, name, sizeof(name)); FPRINTF(stderr, "\t\t%s -> %u / %u (%2.2f %%)\n", name, count_trsm_ll_per_worker[worker], count_trsm_ll_total, (100.0*count_trsm_ll_per_worker[worker])/count_trsm_ll_total); } } FPRINTF(stderr, "\t21 (TRSM)\n"); for (worker = 0; worker < nworkers; worker++) { if (count_total_per_worker[worker]) { char name[64]; starpu_worker_get_name(worker, name, sizeof(name)); FPRINTF(stderr, "\t\t%s -> %u / %u (%2.2f %%)\n", name, count_trsm_ru_per_worker[worker], count_trsm_ru_total, (100.0*count_trsm_ru_per_worker[worker])/count_trsm_ru_total); } } FPRINTF(stderr, "\t22 (SGEMM)\n"); for (worker = 0; worker < nworkers; worker++) { if (count_total_per_worker[worker]) { char name[64]; starpu_worker_get_name(worker, name, sizeof(name)); FPRINTF(stderr, "\t\t%s -> %u / %u (%2.2f %%)\n", name, count_gemm_per_worker[worker], count_gemm_total, (100.0*count_gemm_per_worker[worker])/count_gemm_total); } } } /* * GEMM */ static inline void dw_common_cpu_codelet_update_gemm(void *descr[], int s, void *_args) { (void)_args; float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif switch (s) { case 0: STARPU_SGEMM("N", "N", dy, dx, dz, -1.0f, left, ld21, right, ld12, 1.0f, center, ld22); break; #ifdef STARPU_USE_CUDA case 1: status = cublasSgemm(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_N, dx, dy, dz, &m1, left, ld21, right, ld12, &p1, center, ld22); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } } void dw_cpu_codelet_update_gemm(void *descr[], void *_args) { dw_common_cpu_codelet_update_gemm(descr, 0, _args); int id = starpu_worker_get_id_check(); count_gemm_per_worker[id]++; } #ifdef STARPU_USE_CUDA void dw_cublas_codelet_update_gemm(void *descr[], void *_args) { dw_common_cpu_codelet_update_gemm(descr, 1, _args); int id = starpu_worker_get_id_check(); count_gemm_per_worker[id]++; } #endif /* STARPU_USE_CUDA */ /* * TRSM_LL */ static inline void dw_common_codelet_update_trsm_ll(void *descr[], int s, void *_args) { (void)_args; float *sub11; float *sub12; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub12 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif /* solve L11 U12 = A12 (find U12) */ switch (s) { case 0: STARPU_STRSM("L", "L", "N", "N", nx12, ny12, 1.0f, sub11, ld11, sub12, ld12); break; #ifdef STARPU_USE_CUDA case 1: status = cublasStrsm(starpu_cublas_get_local_handle(), CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, ny12, nx12, &p1, sub11, ld11, sub12, ld12); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } } void dw_cpu_codelet_update_trsm_ll(void *descr[], void *_args) { dw_common_codelet_update_trsm_ll(descr, 0, _args); int id = starpu_worker_get_id_check(); count_trsm_ll_per_worker[id]++; } #ifdef STARPU_USE_CUDA void dw_cublas_codelet_update_trsm_ll(void *descr[], void *_args) { dw_common_codelet_update_trsm_ll(descr, 1, _args); int id = starpu_worker_get_id_check(); count_trsm_ll_per_worker[id]++; } #endif /* STARPU_USE_CUDA */ /* * TRSM_RU */ static inline void dw_common_codelet_update_trsm_ru(void *descr[], int s, void *_args) { (void)_args; float *sub11; float *sub21; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif switch (s) { case 0: STARPU_STRSM("R", "U", "N", "U", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21); break; #ifdef STARPU_USE_CUDA case 1: status = cublasStrsm(starpu_cublas_get_local_handle(), CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, ny21, nx21, &p1, sub11, ld11, sub21, ld21); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } } void dw_cpu_codelet_update_trsm_ru(void *descr[], void *_args) { dw_common_codelet_update_trsm_ru(descr, 0, _args); int id = starpu_worker_get_id_check(); count_trsm_ru_per_worker[id]++; } #ifdef STARPU_USE_CUDA void dw_cublas_codelet_update_trsm_ru(void *descr[], void *_args) { dw_common_codelet_update_trsm_ru(descr, 1, _args); int id = starpu_worker_get_id_check(); count_trsm_ru_per_worker[id]++; } #endif /* * GETRF */ static inline void debug_print(float *tab, unsigned ld, unsigned n) { unsigned j,i; for (j = 0; j < n; j++) { for (i = 0; i < n; i++) { FPRINTF(stderr, "%2.2f\t", tab[(size_t)j+(size_t)i*ld]); } FPRINTF(stderr, "\n"); } FPRINTF(stderr, "\n"); } static inline void dw_common_codelet_update_getrf(void *descr[], int s, void *_args) { (void)_args; float *sub11; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); unsigned long z; #ifdef STARPU_USE_CUDA cudaStream_t stream; cublasStatus_t status; #endif switch (s) { case 0: for (z = 0; z < nx; z++) { float pivot; pivot = sub11[z+z*ld]; #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) { if (fpclassify(pivot) == FP_ZERO) /* Running in valgrind, don't care about the result */ pivot = 1.0f; } else #endif STARPU_ASSERT(fpclassify(pivot) != FP_ZERO); STARPU_SSCAL(nx - z - 1, (1.0f/pivot), &sub11[z+(z+1)*ld], ld); STARPU_SGER(nx - z - 1, nx - z - 1, -1.0f, &sub11[z+(z+1)*ld], ld, &sub11[(z+1)+z*ld], 1, &sub11[(z+1) + (z+1)*ld],ld); } break; #ifdef STARPU_USE_CUDA case 1: /* TODO: Use cusolver */ stream = starpu_cuda_get_local_stream(); for (z = 0; z < nx; z++) { float pivot; cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) { if (fpclassify(pivot) == FP_ZERO) /* Running in valgrind, don't care about the result */ pivot = 1.0f; } else #endif STARPU_ASSERT(fpclassify(pivot) != FP_ZERO); float scal = 1.0f/pivot; status = cublasSscal(starpu_cublas_get_local_handle(), nx - z - 1, &scal, &sub11[z+(z+1)*ld], ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); status = cublasSger(starpu_cublas_get_local_handle(), nx - z - 1, nx - z - 1, &m1, &sub11[z+(z+1)*ld], ld, &sub11[(z+1)+z*ld], 1, &sub11[(z+1) + (z+1)*ld],ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } cudaStreamSynchronize(stream); break; #endif default: STARPU_ABORT(); break; } } void dw_cpu_codelet_update_getrf(void *descr[], void *_args) { dw_common_codelet_update_getrf(descr, 0, _args); int id = starpu_worker_get_id_check(); count_getrf_per_worker[id]++; } #ifdef STARPU_USE_CUDA void dw_cublas_codelet_update_getrf(void *descr[], void *_args) { dw_common_codelet_update_getrf(descr, 1, _args); int id = starpu_worker_get_id_check(); count_getrf_per_worker[id]++; } #endif /* STARPU_USE_CUDA */ starpu-1.4.9+dfsg/examples/heat/dw_factolu_tag.c000066400000000000000000000200331507764646700216730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This implements an LU factorization. * The task graph is submitted through dependency tags. */ #include "dw_factolu.h" #define TAG_GETRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) #define TAG_TRSM_LL(k,i) ((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(i)))) #define TAG_TRSM_RU(k,j) ((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ | ((unsigned long long)(i)<<16) \ | (unsigned long long)(j)))) static unsigned no_prio = 0; /* * Construct the DAG */ static struct starpu_task *create_task(starpu_tag_t id) { struct starpu_task *task = starpu_task_create(); task->cl_arg = NULL; task->use_tag = 1; task->tag_id = id; return task; } static struct starpu_codelet cl_getrf = { .modes = { STARPU_RW }, .cpu_funcs = {dw_cpu_codelet_update_getrf}, .cpu_funcs_name = {"dw_cpu_codelet_update_getrf"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_getrf}, #endif .nbuffers = 1, .model = &model_getrf }; static struct starpu_task *create_task_getrf(starpu_data_handle_t dataA, unsigned k) { /* printf("task 11 k = %d TAG = %llx\n", k, (TAG_GETRF(k))); */ struct starpu_task *task = create_task(TAG_GETRF(k)); task->cl = &cl_getrf; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); /* this is an important task */ if (!no_prio) task->priority = STARPU_MAX_PRIO; /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GETRF(k), 1, TAG_GEMM(k-1, k, k)); } return task; } static struct starpu_codelet cl_trsm_ll = { .modes = { STARPU_R, STARPU_RW }, .cpu_funcs = {dw_cpu_codelet_update_trsm_ll}, .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ll"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_trsm_ll}, #endif .nbuffers = 2, .model = &model_trsm_ll }; static void create_task_trsm_ll(starpu_data_handle_t dataA, unsigned k, unsigned i) { int ret; /* printf("task 12 k,i = %d,%d TAG = %llx\n", k,i, TAG_TRSM_LL(k,i)); */ struct starpu_task *task = create_task(TAG_TRSM_LL(k, i)); task->cl = &cl_trsm_ll; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, i, k); if (!no_prio && (i == k+1)) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_LL(k, i), 2, TAG_GETRF(k), TAG_GEMM(k-1, i, k)); } else { starpu_tag_declare_deps(TAG_TRSM_LL(k, i), 1, TAG_GETRF(k)); } ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static struct starpu_codelet cl_trsm_ru = { .modes = { STARPU_R, STARPU_RW }, .cpu_funcs = {dw_cpu_codelet_update_trsm_ru}, .cpu_funcs_name = {"dw_cpu_codelet_update_trsm_ru"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_trsm_ru}, #endif .nbuffers = 2, .model = &model_trsm_ru }; static void create_task_trsm_ru(starpu_data_handle_t dataA, unsigned k, unsigned j) { int ret; struct starpu_task *task = create_task(TAG_TRSM_RU(k, j)); task->cl = &cl_trsm_ru; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j); if (!no_prio && (j == k+1)) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_RU(k, j), 2, TAG_GETRF(k), TAG_GEMM(k-1, k, j)); } else { starpu_tag_declare_deps(TAG_TRSM_RU(k, j), 1, TAG_GETRF(k)); } ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static struct starpu_codelet cl_gemm = { .modes = { STARPU_R, STARPU_R, STARPU_RW }, .cpu_funcs = {dw_cpu_codelet_update_gemm}, .cpu_funcs_name = {"dw_cpu_codelet_update_gemm"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dw_cublas_codelet_update_gemm}, #endif .nbuffers = 3, .model = &model_gemm }; static void create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j) { int ret; /* printf("task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); */ struct starpu_task *task = create_task(TAG_GEMM(k, i, j)); task->cl = &cl_gemm; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, i, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, j); task->handles[2] = starpu_data_get_sub_data(dataA, 2, i, j); if (!no_prio && (i == k + 1) && (j == k +1)) { task->priority = STARPU_MAX_PRIO; } /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GEMM(k, i, j), 3, TAG_GEMM(k-1, i, j), TAG_TRSM_LL(k, i), TAG_TRSM_RU(k, j)); } else { starpu_tag_declare_deps(TAG_GEMM(k, i, j), 2, TAG_TRSM_LL(k, i), TAG_TRSM_RU(k, j)); } ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* * code to bootstrap the factorization */ static void dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks) { int ret; double start; double end; struct starpu_task *entry_task = NULL; /* create all the DAG nodes */ unsigned i,j,k; for (k = 0; k < nblocks; k++) { struct starpu_task *task = create_task_getrf(dataA, k); /* we defer the launch of the first task */ if (k == 0) { entry_task = task; } else { ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (i = k+1; iuse_tag = 1; task->tag_id = id; return task; } static void create_data(float **_nzvalA, float **_vecb, float **_vecx, uint32_t *_nnz, uint32_t *_nrow, uint32_t **_colind, uint32_t **_rowptr) { /* we need a sparse symmetric (definite positive ?) matrix and a "dense" vector */ /* example of 3-band matrix */ float *nzval; uint32_t nnz; uint32_t *colind; uint32_t *rowptr; nnz = 3*_size-2; nzval = malloc(nnz*sizeof(float)); colind = malloc(nnz*sizeof(uint32_t)); rowptr = malloc(_size*sizeof(uint32_t)); assert(nzval); assert(colind); assert(rowptr); /* fill the matrix */ unsigned row; unsigned pos = 0; for (row = 0; row < _size; row++) { rowptr[row] = pos; if (row > 0) { nzval[pos] = 1.0f; colind[pos] = row-1; pos++; } nzval[pos] = 5.0f; colind[pos] = row; pos++; if (row < _size - 1) { nzval[pos] = 1.0f; colind[pos] = row+1; pos++; } } *_nnz = nnz; *_nrow = _size; *_nzvalA = nzval; *_colind = colind; *_rowptr = rowptr; STARPU_ASSERT(pos == nnz); /* initiate the 2 vectors */ float *invec, *outvec; invec = malloc(_size*sizeof(float)); assert(invec); outvec = malloc(_size*sizeof(float)); assert(outvec); /* fill those */ unsigned ind; for (ind = 0; ind < _size; ind++) { invec[ind] = 2.0f; outvec[ind] = 0.0f; } *_vecb = invec; *_vecx = outvec; } void init_problem(void) { /* create the sparse input matrix */ float *nzval; float *vecb; float *vecx; uint32_t nnz; uint32_t nrow; uint32_t *colind; uint32_t *rowptr; create_data(&nzval, &vecb, &vecx, &nnz, &nrow, &colind, &rowptr); conjugate_gradient(nzval, vecb, vecx, nnz, nrow, colind, rowptr); } /* * cg initialization phase */ static struct starpu_codelet cl1 = { .cpu_funcs = { cpu_codelet_func_1 }, .cpu_funcs_name = { "cpu_codelet_func_1" }, .nbuffers = 4, .modes = { STARPU_R, STARPU_R, STARPU_W, STARPU_R }, }; static struct starpu_codelet cl2 = { .cpu_funcs = { cpu_codelet_func_2 }, .cpu_funcs_name = { "cpu_codelet_func_2" }, .nbuffers = 2, .modes = { STARPU_W, STARPU_R }, }; static struct starpu_codelet cl3 = { .cpu_funcs = { cpu_codelet_func_3 }, .cpu_funcs_name = { "cpu_codelet_func_3" }, #ifdef STARPU_USE_CUDA .cuda_funcs = { cublas_codelet_func_3 }, #endif .nbuffers = 1, .modes = { STARPU_R }, }; void init_cg(struct cg_problem *problem) { int ret; problem->i = 0; /* r = b - A x */ struct starpu_task *task1 = create_task(1UL); task1->cl = &cl1; task1->handles[0] = problem->ds_matrixA; task1->handles[1] = problem->ds_vecx; task1->handles[2] = problem->ds_vecr; task1->handles[3] = problem->ds_vecb; /* d = r */ struct starpu_task *task2 = create_task(2UL); task2->cl = &cl2; task2->handles[0] = problem->ds_vecd; task2->handles[1] = problem->ds_vecr; starpu_tag_declare_deps((starpu_tag_t)2UL, 1, (starpu_tag_t)1UL); /* delta_new = trans(r) r */ struct starpu_task *task3 = create_task(3UL); task3->cl = &cl3; task3->cl_arg = problem; task3->cl_arg_size = sizeof(*problem); task3->handles[0] = problem->ds_vecr; task3->callback_func = iteration_cg; task3->callback_arg = problem; /* XXX 3 should only depend on 1 ... */ starpu_tag_declare_deps((starpu_tag_t)3UL, 1, (starpu_tag_t)2UL); /* launch the computation now */ ret = starpu_task_submit(task1); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task2); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task3); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* * the inner iteration of the cg algorithm * the codelet code launcher is its own callback ! */ static struct starpu_codelet cl4 = { .cpu_funcs = { cpu_codelet_func_4 }, .cpu_funcs_name = { "cpu_codelet_func_4" }, .nbuffers = 3, .modes = { STARPU_R, STARPU_R, STARPU_W }, }; static struct starpu_codelet cl5 = { .cpu_funcs = { cpu_codelet_func_5 }, .cpu_funcs_name = { "cpu_codelet_func_5" }, #ifdef STARPU_USE_CUDA .cuda_funcs = { cublas_codelet_func_5 }, #endif .nbuffers = 2, .modes = { STARPU_R, STARPU_R }, }; static struct starpu_codelet cl6 = { .cpu_funcs = { cpu_codelet_func_6 }, .cpu_funcs_name = { "cpu_codelet_func_6" }, #ifdef STARPU_USE_CUDA .cuda_funcs = { cublas_codelet_func_6 }, .cuda_flags = { STARPU_CUDA_ASYNC }, #endif .nbuffers = 2, .modes = { STARPU_RW, STARPU_R }, }; static struct starpu_codelet cl7 = { .cpu_funcs = { cpu_codelet_func_7 }, .cpu_funcs_name = { "cpu_codelet_func_7" }, #ifdef STARPU_USE_CUDA .cuda_funcs = { cublas_codelet_func_7 }, .cuda_flags = { STARPU_CUDA_ASYNC }, #endif .nbuffers = 2, .modes = { STARPU_RW, STARPU_R }, }; static struct starpu_codelet cl8 = { .cpu_funcs = { cpu_codelet_func_8 }, .cpu_funcs_name = { "cpu_codelet_func_8" }, #ifdef STARPU_USE_CUDA .cuda_funcs = { cublas_codelet_func_8 }, #endif .nbuffers = 1, .modes = { STARPU_R }, }; static struct starpu_codelet cl9 = { .cpu_funcs = { cpu_codelet_func_9 }, .cpu_funcs_name = { "cpu_codelet_func_9" }, #ifdef STARPU_USE_CUDA .cuda_funcs = { cublas_codelet_func_9 }, .cuda_flags = { STARPU_CUDA_ASYNC }, #endif .nbuffers = 2, .modes = { STARPU_RW, STARPU_R }, }; void launch_new_cg_iteration(struct cg_problem *problem) { int ret; unsigned iter = problem->i; unsigned long long maskiter = ((unsigned long long)iter*1024); /* q = A d */ struct starpu_task *task4 = create_task(maskiter | 4UL); task4->cl = &cl4; task4->handles[0] = problem->ds_matrixA; task4->handles[1] = problem->ds_vecd; task4->handles[2] = problem->ds_vecq; /* alpha = delta_new / (trans(d) q)*/ struct starpu_task *task5 = create_task(maskiter | 5UL); task5->cl = &cl5; task5->cl_arg = problem; task5->cl_arg_size = sizeof(*problem); task5->handles[0] = problem->ds_vecd; task5->handles[1] = problem->ds_vecq; starpu_tag_declare_deps((starpu_tag_t)(maskiter | 5UL), 1, (starpu_tag_t)(maskiter | 4UL)); /* x = x + alpha d */ struct starpu_task *task6 = create_task(maskiter | 6UL); task6->cl = &cl6; task6->cl_arg = problem; task6->cl_arg_size = sizeof(*problem); task6->handles[0] = problem->ds_vecx; task6->handles[1] = problem->ds_vecd; starpu_tag_declare_deps((starpu_tag_t)(maskiter | 6UL), 1, (starpu_tag_t)(maskiter | 5UL)); /* r = r - alpha q */ struct starpu_task *task7 = create_task(maskiter | 7UL); task7->cl = &cl7; task7->cl_arg = problem; task7->cl_arg_size = sizeof(*problem); task7->handles[0] = problem->ds_vecr; task7->handles[1] = problem->ds_vecq; starpu_tag_declare_deps((starpu_tag_t)(maskiter | 7UL), 1, (starpu_tag_t)(maskiter | 6UL)); /* update delta_* and compute beta */ struct starpu_task *task8 = create_task(maskiter | 8UL); task8->cl = &cl8; task8->cl_arg = problem; task8->cl_arg_size = sizeof(*problem); task8->handles[0] = problem->ds_vecr; starpu_tag_declare_deps((starpu_tag_t)(maskiter | 8UL), 1, (starpu_tag_t)(maskiter | 7UL)); /* d = r + beta d */ struct starpu_task *task9 = create_task(maskiter | 9UL); task9->cl = &cl9; task9->cl_arg = problem; task9->cl_arg_size = sizeof(*problem); task9->handles[0] = problem->ds_vecd; task9->handles[1] = problem->ds_vecr; starpu_tag_declare_deps((starpu_tag_t)(maskiter | 9UL), 1, (starpu_tag_t)(maskiter | 8UL)); task9->callback_func = iteration_cg; task9->callback_arg = problem; /* launch the computation now */ ret = starpu_task_submit(task4); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task5); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task6); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task7); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task8); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task9); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } void iteration_cg(void *problem) { struct cg_problem *pb = problem; FPRINTF(stdout, "i : %d (MAX %d)\n\tdelta_new %f (%f)\n", pb->i, MAXITER, pb->delta_new, sqrt(pb->delta_new / pb->size)); if ((pb->i < MAXITER) && (pb->delta_new > pb->epsilon)) { if (pb->i % 1000 == 0) FPRINTF(stdout, "i : %d\n\tdelta_new %f (%f)\n", pb->i, pb->delta_new, sqrt(pb->delta_new / pb->size)); pb->i++; /* we did not reach the stop condition yet */ launch_new_cg_iteration(problem); } else { /* we may stop */ FPRINTF(stdout, "We are done ... after %d iterations \n", pb->i - 1); FPRINTF(stdout, "i : %d\n\tdelta_new %2.5f\n", pb->i, pb->delta_new); sem_post(pb->sem); } } /* * initializing the problem */ void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz, unsigned nrow, uint32_t *colind, uint32_t *rowptr) { /* first register all the data structures to StarPU */ starpu_data_handle_t ds_matrixA; starpu_data_handle_t ds_vecx, ds_vecb; starpu_data_handle_t ds_vecr, ds_vecd, ds_vecq; /* first the user-allocated data */ starpu_csr_data_register(&ds_matrixA, STARPU_MAIN_RAM, nnz, nrow, (uintptr_t)nzvalA, colind, rowptr, 0, sizeof(float)); starpu_vector_data_register(&ds_vecx, STARPU_MAIN_RAM, (uintptr_t)vecx, nrow, sizeof(float)); starpu_vector_data_register(&ds_vecb, STARPU_MAIN_RAM, (uintptr_t)vecb, nrow, sizeof(float)); /* then allocate the algorithm intern data */ float *ptr_vecr, *ptr_vecd, *ptr_vecq; unsigned i; starpu_malloc((void **)&ptr_vecr, nrow*sizeof(float)); starpu_malloc((void **)&ptr_vecd, nrow*sizeof(float)); starpu_malloc((void **)&ptr_vecq, nrow*sizeof(float)); for (i = 0; i < nrow; i++) { ptr_vecr[i] = 0.0f; ptr_vecd[i] = 0.0f; ptr_vecq[i] = 0.0f; } FPRINTF(stdout, "nrow = %u \n", nrow); /* and register them as well */ starpu_vector_data_register(&ds_vecr, STARPU_MAIN_RAM, (uintptr_t)ptr_vecr, nrow, sizeof(float)); starpu_vector_data_register(&ds_vecd, STARPU_MAIN_RAM, (uintptr_t)ptr_vecd, nrow, sizeof(float)); starpu_vector_data_register(&ds_vecq, STARPU_MAIN_RAM, (uintptr_t)ptr_vecq, nrow, sizeof(float)); /* we now have the complete problem */ struct cg_problem problem; problem.ds_matrixA = ds_matrixA; problem.ds_vecx = ds_vecx; problem.ds_vecb = ds_vecb; problem.ds_vecr = ds_vecr; problem.ds_vecd = ds_vecd; problem.ds_vecq = ds_vecq; problem.epsilon = EPSILON; problem.size = nrow; problem.delta_old = 1.0; problem.delta_new = 1.0; /* just to make sure we do at least one iteration */ /* we need a semaphore to synchronize with callbacks */ sem_t sem; sem_init(&sem, 0, 0U); problem.sem = &sem; init_cg(&problem); sem_wait(&sem); sem_destroy(&sem); starpu_task_wait_for_all(); print_results(vecx, nrow); starpu_data_unregister(ds_matrixA); starpu_data_unregister(ds_vecx); starpu_data_unregister(ds_vecb); starpu_data_unregister(ds_vecr); starpu_data_unregister(ds_vecd); starpu_data_unregister(ds_vecq); starpu_free_noflag(ptr_vecr, nrow*sizeof(float)); starpu_free_noflag(ptr_vecd, nrow*sizeof(float)); starpu_free_noflag(ptr_vecq, nrow*sizeof(float)); } void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz, unsigned nrow, uint32_t *colind, uint32_t *rowptr) { /* start the runtime */ int ret; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_cublas_init(); conjugate_gradient(nzvalA, vecb, vecx, nnz, nrow, colind, rowptr); starpu_shutdown(); } starpu-1.4.9+dfsg/examples/heat/dw_sparse_cg.h000066400000000000000000000061741507764646700213700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DW_SPARSE_CG_H__ #define __DW_SPARSE_CG_H__ #include #include #include #include #include #include #include #include #include #include "../common/blas.h" #define MAXITER 100000 #define EPSILON 0.0000001f /* code parameters */ static uint32_t _size = 33554432; static unsigned _usecpu = 0; static unsigned _blocks = 512; static unsigned _grids = 8; struct cg_problem { starpu_data_handle_t ds_matrixA; starpu_data_handle_t ds_vecx; starpu_data_handle_t ds_vecb; starpu_data_handle_t ds_vecr; starpu_data_handle_t ds_vecd; starpu_data_handle_t ds_vecq; sem_t *sem; float alpha; float beta; float delta_0; float delta_old; float delta_new; float epsilon; int i; unsigned size; }; /* some useful functions */ static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-size") == 0) { char *argptr; _size = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-block") == 0) { char *argptr; _blocks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-grid") == 0) { char *argptr; _grids = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-cpu") == 0) { _usecpu = 1; } } } static void print_results(float *result, unsigned size) { printf("**** RESULTS **** \n"); unsigned i; for (i = 0; i < STARPU_MIN(size, 16); i++) { printf("%u -> %f\n", i, result[i]); } } void cpu_codelet_func_1(void *descr[], void *arg); void cpu_codelet_func_2(void *descr[], void *arg); void cublas_codelet_func_3(void *descr[], void *arg); void cpu_codelet_func_3(void *descr[], void *arg); void cpu_codelet_func_4(void *descr[], void *arg); void cpu_codelet_func_5(void *descr[], void *arg); void cublas_codelet_func_5(void *descr[], void *arg); void cublas_codelet_func_6(void *descr[], void *arg); void cpu_codelet_func_6(void *descr[], void *arg); void cublas_codelet_func_7(void *descr[], void *arg); void cpu_codelet_func_7(void *descr[], void *arg); void cublas_codelet_func_8(void *descr[], void *arg); void cpu_codelet_func_8(void *descr[], void *arg); void cublas_codelet_func_9(void *descr[], void *arg); void cpu_codelet_func_9(void *descr[], void *arg); void iteration_cg(void *problem); void conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz, unsigned nrow, uint32_t *colind, uint32_t *rowptr); #endif /* __DW_SPARSE_CG_H__ */ starpu-1.4.9+dfsg/examples/heat/dw_sparse_cg_kernels.c000066400000000000000000000240311507764646700230760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "dw_sparse_cg.h" #ifdef STARPU_USE_CUDA #include #endif /* * Algorithm : * * i = 0 * r = b - A x * (d = A x ; r = r - d) * d = r * delta_new = trans(r) r * delta_0 = delta_new * * while (i < i_max && delta_new > eps^2 delta_0) * { * q = A d * alpha = delta_new / (trans(d) q) * x = x + alpha d * if (i is divisible by 50) * r = b - A x * else * r = r - alpha q * delta_old = delta_new * delta_new = trans(r) r * beta = delta_new / delta_old * d = r + beta d * i = i + 1 * } */ /* * compute r = b - A x * * descr[0] = A, descr[1] = x, descr [2] = r, descr[3] = b */ void cpu_codelet_func_1(void *descr[], void *arg) { (void)arg; float *nzval = (float *)STARPU_CSR_GET_NZVAL(descr[0]); uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]); uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]); uint32_t firstentry = STARPU_CSR_GET_ELEMSIZE(descr[0]); float *vecx = (float *)STARPU_VECTOR_GET_PTR(descr[1]); float *vecr = (float *)STARPU_VECTOR_GET_PTR(descr[2]); float *vecb = (float *)STARPU_VECTOR_GET_PTR(descr[3]); uint32_t nrow; nrow = STARPU_CSR_GET_NROW(descr[0]); unsigned row; for (row = 0; row < nrow; row++) { float tmp = 0.0f; unsigned index; unsigned firstindex = rowptr[row] - firstentry; unsigned lastindex = rowptr[row+1] - firstentry; for (index = firstindex; index < lastindex; index++) { unsigned col; col = colind[index]; tmp += nzval[index]*vecx[col]; } vecr[row] = vecb[row] - tmp; } } /* * compute d = r * descr[0] = d, descr[1] = r */ void cpu_codelet_func_2(void *descr[], void *arg) { (void)arg; /* simply copy r into d */ uint32_t nx = STARPU_VECTOR_GET_NX(descr[0]); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); STARPU_ASSERT(STARPU_VECTOR_GET_NX(descr[0]) == STARPU_VECTOR_GET_NX(descr[1])); STARPU_ASSERT(STARPU_VECTOR_GET_ELEMSIZE(descr[0]) == STARPU_VECTOR_GET_ELEMSIZE(descr[1])); float *src = (float *)STARPU_VECTOR_GET_PTR(descr[1]); float *dst = (float *)STARPU_VECTOR_GET_PTR(descr[0]); memcpy(dst, src, nx*elemsize); } /* * compute delta_new = trans(r) r * delta_0 = delta_new * * args = &delta_new, &delta_0 */ void cpu_codelet_func_3(void *descr[], void *arg) { struct cg_problem *pb = arg; float dot; float *vec; int size; /* get the vector */ vec = (float *)STARPU_VECTOR_GET_PTR(descr[0]); size = (int)STARPU_VECTOR_GET_NX(descr[0]); dot = STARPU_SDOT(size, vec, 1, vec, 1); fprintf(stderr, "func 3 : DOT = %f\n", dot); pb->delta_new = dot; pb->delta_0 = dot; } #ifdef STARPU_USE_CUDA void cublas_codelet_func_3(void *descr[], void *arg) { struct cg_problem *pb = arg; float dot; float *vec; uint32_t size; /* get the vector */ vec = (float *)STARPU_VECTOR_GET_PTR(descr[0]); size = STARPU_VECTOR_GET_NX(descr[0]); cublasStatus_t status = cublasSdot (starpu_cublas_get_local_handle(), size, vec, 1, vec, 1, &dot); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); pb->delta_new = dot; pb->delta_0 = dot; } #endif /* * compute q with : q = A d * * descr[0] = A, descr[1] = d, descr [2] = q */ void cpu_codelet_func_4(void *descr[], void *arg) { (void)arg; float *nzval = (float *)STARPU_CSR_GET_NZVAL(descr[0]); uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]); uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]); uint32_t firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]); float *vecd = (float *)STARPU_VECTOR_GET_PTR(descr[1]); float *vecq = (float *)STARPU_VECTOR_GET_PTR(descr[2]); uint32_t nrow; nrow = STARPU_CSR_GET_NROW(descr[0]); unsigned row; for (row = 0; row < nrow; row++) { float tmp = 0.0f; unsigned index; unsigned firstindex = rowptr[row] - firstentry; unsigned lastindex = rowptr[row+1] - firstentry; for (index = firstindex; index < lastindex; index++) { unsigned col; col = colind[index]; tmp += nzval[index]*vecd[col]; } vecq[row] = tmp; } } /* * compute alpha = delta_new / (trans(d) q) * * descr[0] = d, descr[1] = q * args = &alpha, &delta_new */ void cpu_codelet_func_5(void *descr[], void *arg) { float dot; struct cg_problem *pb = arg; float *vecd, *vecq; uint32_t size; /* get the vector */ vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]); STARPU_ASSERT(STARPU_VECTOR_GET_NX(descr[0]) == STARPU_VECTOR_GET_NX(descr[1])); size = STARPU_VECTOR_GET_NX(descr[0]); dot = STARPU_SDOT(size, vecd, 1, vecq, 1); pb->alpha = pb->delta_new / dot; } #ifdef STARPU_USE_CUDA void cublas_codelet_func_5(void *descr[], void *arg) { float dot; struct cg_problem *pb = arg; float *vecd, *vecq; uint32_t size; /* get the vector */ vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]); STARPU_ASSERT(STARPU_VECTOR_GET_NX(descr[0]) == STARPU_VECTOR_GET_NX(descr[1])); size = STARPU_VECTOR_GET_NX(descr[0]); cublasStatus_t status = cublasSdot (starpu_cublas_get_local_handle(), size, vecd, 1, vecq, 1, &dot); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); pb->alpha = pb->delta_new / dot; } #endif /* * compute x = x + alpha d * * descr[0] : x, descr[1] : d * args = &alpha */ void cpu_codelet_func_6(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecx, *vecd; uint32_t size; /* get the vector */ vecx = (float *)STARPU_VECTOR_GET_PTR(descr[0]); vecd = (float *)STARPU_VECTOR_GET_PTR(descr[1]); size = STARPU_VECTOR_GET_NX(descr[0]); STARPU_SAXPY(size, pb->alpha, vecd, 1, vecx, 1); } #ifdef STARPU_USE_CUDA void cublas_codelet_func_6(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecx, *vecd; uint32_t size; /* get the vector */ vecx = (float *)STARPU_VECTOR_GET_PTR(descr[0]); vecd = (float *)STARPU_VECTOR_GET_PTR(descr[1]); size = STARPU_VECTOR_GET_NX(descr[0]); cublasStatus_t status = cublasSaxpy (starpu_cublas_get_local_handle(), size, &pb->alpha, vecd, 1, vecx, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif /* * compute r = r - alpha q * * descr[0] : r, descr[1] : q * args = &alpha */ void cpu_codelet_func_7(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecr, *vecq; uint32_t size; /* get the vector */ vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]); size = STARPU_VECTOR_GET_NX(descr[0]); STARPU_SAXPY(size, -pb->alpha, vecq, 1, vecr, 1); } #ifdef STARPU_USE_CUDA void cublas_codelet_func_7(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecr, *vecq; uint32_t size; /* get the vector */ vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); vecq = (float *)STARPU_VECTOR_GET_PTR(descr[1]); size = STARPU_VECTOR_GET_NX(descr[0]); float scal = -pb->alpha; cublasStatus_t status = cublasSaxpy (starpu_cublas_get_local_handle(), size, &scal, vecq, 1, vecr, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif /* * compute delta_old = delta_new * delta_new = trans(r) r * beta = delta_new / delta_old * * descr[0] = r * args = &delta_old, &delta_new, &beta */ void cpu_codelet_func_8(void *descr[], void *arg) { float dot; struct cg_problem *pb = arg; float *vecr; uint32_t size; /* get the vector */ vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); size = STARPU_VECTOR_GET_NX(descr[0]); dot = STARPU_SDOT(size, vecr, 1, vecr, 1); pb->delta_old = pb->delta_new; pb->delta_new = dot; pb->beta = pb->delta_new/pb->delta_old; } #ifdef STARPU_USE_CUDA void cublas_codelet_func_8(void *descr[], void *arg) { float dot; struct cg_problem *pb = arg; float *vecr; uint32_t size; /* get the vector */ vecr = (float *)STARPU_VECTOR_GET_PTR(descr[0]); size = STARPU_VECTOR_GET_NX(descr[0]); cublasStatus_t status = cublasSdot(starpu_cublas_get_local_handle(), size, vecr, 1, vecr, 1, &dot); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); pb->delta_old = pb->delta_new; pb->delta_new = dot; pb->beta = pb->delta_new/pb->delta_old; } #endif /* * compute d = r + beta d * * descr[0] : d, descr[1] : r * args = &beta * */ void cpu_codelet_func_9(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecd, *vecr; uint32_t size; /* get the vector */ vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); vecr = (float *)STARPU_VECTOR_GET_PTR(descr[1]); size = STARPU_VECTOR_GET_NX(descr[0]); /* d = beta d */ STARPU_SSCAL(size, pb->beta, vecd, 1); /* d = r + d */ STARPU_SAXPY (size, 1.0f, vecr, 1, vecd, 1); } #ifdef STARPU_USE_CUDA void cublas_codelet_func_9(void *descr[], void *arg) { struct cg_problem *pb = arg; float *vecd, *vecr; uint32_t size; /* get the vector */ vecd = (float *)STARPU_VECTOR_GET_PTR(descr[0]); vecr = (float *)STARPU_VECTOR_GET_PTR(descr[1]); size = STARPU_VECTOR_GET_NX(descr[0]); /* d = beta d */ cublasStatus_t status; status = cublasSscal(starpu_cublas_get_local_handle(), size, &pb->beta, vecd, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); /* d = r + d */ float scal = 1.0f; status = cublasSaxpy (starpu_cublas_get_local_handle(), size, &scal, vecr, 1, vecd, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif starpu-1.4.9+dfsg/examples/heat/heat.c000066400000000000000000000462071507764646700176450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * heat propagation simulation through either direct LU factorization or * iterative conjugate gradient. */ #include "heat.h" /* default values */ #ifdef STARPU_QUICK_CHECK static unsigned ntheta = 8+2; static unsigned nthick = 8+2; #else static unsigned ntheta = 32+2; static unsigned nthick = 32+2; #endif static unsigned nblocks = 16; static unsigned nbigblocks = 8; static unsigned shape = 0; static unsigned pinned = 0; static unsigned check = 0; static unsigned version = 2; static unsigned use_cg = 0; /* use a LU decomposition of CG ? */ static unsigned no_prio = 0; extern void do_conjugate_gradient(float *nzvalA, float *vecb, float *vecx, uint32_t nnz, unsigned nrow, uint32_t *colind, uint32_t *rowptr); static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-cg") == 0) { use_cg = 1; } if (strcmp(argv[i], "-shape") == 0) { char *argptr; shape = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nthick") == 0) { char *argptr; nthick = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-ntheta") == 0) { char *argptr; ntheta = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nblocks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nbigblocks") == 0) { char *argptr; nbigblocks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-v1") == 0) { version = 1; } if (strcmp(argv[i], "-v2") == 0) { version = 2; } if (strcmp(argv[i], "-v3") == 0) { version = 3; } if (strcmp(argv[i], "-v4") == 0) { version = 4; } if (strcmp(argv[i], "-pin") == 0) { pinned = 1; } if (strcmp(argv[i], "-check") == 0) { check = 1; } if (strcmp(argv[i], "-no-prio") == 0) { no_prio = 1; } if (strcmp(argv[i], "-size") == 0) { char *argptr; unsigned size = strtol(argv[++i], &argptr, 10); nthick = 130; ntheta = (size/128) + 2; STARPU_ASSERT((nthick - 2)*(ntheta - 2) == size); } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0) { printf("usage : %s [-v1|-v2|-v3|-v4] [-pin] [-nthick number] [-ntheta number] [-shape [0|1|2]] [-cg] [-size number] [-no-prio]\n", argv[0]); } } } /* * The Finite element method code * * B C * ********** * * 0 * * * * * * * * * 1 * * ********** * A D */ static inline float diff_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, unsigned theta_psi, unsigned thick_psi, unsigned xy, point *pmesh) { float xa,ya,xb,yb,xc,yc; float tmp; assert(theta_tr + 2 <= ntheta); assert(thick_tr + 2 <= nthick); /* A */ xa = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x; ya = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y; /* B */ if (side_tr) { /* lower D is actually B here */ xb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x; yb = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y; } else { /* upper */ xb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x; yb = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y; } xc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x; yc = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y; /* now look for the actual psi node */ if (NODE_NUMBER(theta_tr, thick_tr) == NODE_NUMBER(theta_psi, thick_psi)) { /* A nothing to do */ } else if (NODE_NUMBER(theta_tr+1, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi)) { /* psi matches C */ /* swap A and C coordinates */ tmp = xa; xa = xc; xc = tmp; tmp = ya; ya = yc; yc = tmp; } else if (side_tr && (NODE_NUMBER(theta_tr+1, thick_tr) == NODE_NUMBER(theta_psi, thick_psi))) { /* psi is D (that was stored in C) XXX */ tmp = xa; xa = xb; xb = tmp; tmp = ya; ya = yb; yb = tmp; } else if (!side_tr && (NODE_NUMBER(theta_tr, thick_tr+1) == NODE_NUMBER(theta_psi, thick_psi))) { /* psi is C */ tmp = xa; xa = xb; xb = tmp; tmp = ya; ya = yb; yb = tmp; } else { /* the psi node is not a node of the current triangle */ return 0.0f; } /* now the triangle should have A as the psi node */ float denom; float value; denom = (xa - xb)*(yc - ya) - (xc - xb)*(ya - yb); switch (xy) { case X: value = (yc - yb)/denom; break; case Y: value = -(xc - xb)/denom; break; default: assert(0); } return value; } static inline float diff_y_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, unsigned theta_psi, unsigned thick_psi, point *pmesh) { return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, Y, pmesh); } static inline float diff_x_psi(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, unsigned theta_psi, unsigned thick_psi, point *pmesh) { return diff_psi(theta_tr, thick_tr, side_tr, theta_psi, thick_psi, X, pmesh); } static inline float surface_triangle(unsigned theta_tr, unsigned thick_tr, unsigned side_tr, point *pmesh) { float surface; float tmp; float xi, xj, xk, yi, yj, yk; STARPU_ASSERT(theta_tr + 2 <= ntheta); STARPU_ASSERT(thick_tr + 2 <= nthick); xi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].x; yi = pmesh[NODE_NUMBER(theta_tr, thick_tr)].y; xj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].x; yj = pmesh[NODE_NUMBER(theta_tr+1, thick_tr+1)].y; if (side_tr) { /* lower */ xk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].x; yk = pmesh[NODE_NUMBER(theta_tr+1, thick_tr)].y; } else { xk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].x; yk = pmesh[NODE_NUMBER(theta_tr, thick_tr+1)].y; } tmp = (xi - xj)*(yk -yj) - (xk - xj)*(yi -yj); surface = 0.5*fabs(tmp); return surface; } static inline float integral_triangle(int theta_tr, int thick_tr, unsigned side_tr, unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh) { float surface; float value; float dxi, dxj, dyi, dyj; if (theta_tr < 0) return 0.0f; if (theta_tr + 2 > (int)ntheta) return 0.0f; if (thick_tr < 0) return 0.0f; if (thick_tr + 2 > (int)nthick) return 0.0f; dxi = diff_x_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh); dyi = diff_y_psi(theta_tr, thick_tr, side_tr, theta_i, thick_i, pmesh); dxj = diff_x_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh); dyj = diff_y_psi(theta_tr, thick_tr, side_tr, theta_j, thick_j, pmesh); surface = surface_triangle(theta_tr, thick_tr, side_tr, pmesh); value = (dxi*dxj + dyi*dyj)*surface; return value; } static inline float integrale_sum(unsigned theta_i, unsigned thick_i, unsigned theta_j, unsigned thick_j, point *pmesh) { float integral = 0.0f; integral += integral_triangle(theta_i - 1, thick_i - 1, 1, theta_i, thick_i, theta_j, thick_j, pmesh); integral += integral_triangle(theta_i - 1, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh); integral += integral_triangle(theta_i - 1, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh); integral += integral_triangle(theta_i, thick_i, 0, theta_i, thick_i, theta_j, thick_j, pmesh); integral += integral_triangle(theta_i, thick_i, 1, theta_i, thick_i, theta_j, thick_j, pmesh); integral += integral_triangle(theta_i, thick_i - 1, 0, theta_i, thick_i, theta_j, thick_j, pmesh); return integral; } static float compute_A_value(unsigned i, unsigned j, point *pmesh) { float value = 0.0f; unsigned thick_i, thick_j; unsigned theta_i, theta_j; /* add all contributions from all connex triangles */ thick_i = NODE_TO_THICK(i); thick_j = NODE_TO_THICK(j); theta_i = NODE_TO_THETA(i); theta_j = NODE_TO_THETA(j); /* Compute the Sum of all the integral over all triangles */ if ((abs((int)thick_i - (int)thick_j) <= 1) && (abs((int)theta_i - (int)theta_j) <= 1)) { if ((theta_j == theta_i -1) && (thick_j == thick_i +1)) goto done; if ((theta_j == theta_i + 1) && (thick_j == thick_i - 1)) goto done; /* this may not be a null entry */ value += integrale_sum(theta_i, thick_i, theta_j, thick_j, pmesh); } done: return value; } #define TRANSLATE(k) (RefArray[(k)]) #define TRANSLATEBACK(k) (RefArrayBack[(k)]) static void solve_system(unsigned size, unsigned subsize, float *result, int *RefArray, float *Bformer, float *A, float *B) { unsigned i; /* solve the actual problem LU X = B */ /* solve LX' = Y with X' = UX */ /* solve UX = X' */ FPRINTF(stderr, "Solving the problem ...\n"); float *savedB = NULL; float *LUB = NULL; if (check) { savedB = malloc(subsize*sizeof(float)); memcpy(savedB, B, subsize*sizeof(float)); LUB = malloc(subsize*sizeof(float)); } /* L */ STARPU_STRSV("L", "N", "N", subsize, A, subsize, B, 1); /* U */ STARPU_STRSV("U", "N", "U", subsize, A, subsize, B, 1); STARPU_ASSERT(DIM == size); if (check) { /* compute the error on (LUB - savedB) which should be 0 */ /* LUB = B */ memcpy(LUB, B, subsize*sizeof(float)); /* LUB = U * LUB */ STARPU_STRMV("U", "N", "U", subsize, A, subsize, LUB, 1); /* LUB = L * LUB */ STARPU_STRMV("L", "N", "N", subsize, A, subsize, LUB, 1); /* LUB -= B */ STARPU_SAXPY(subsize, -1.0f, savedB, 1, LUB, 1); /* check if LUB is close to the 0 vector */ int maxind = STARPU_ISAMAX(subsize, LUB, 1); FPRINTF(stderr, "max error (LUX - B) = %e\n",LUB[maxind - 1]); float sum = STARPU_SASUM(subsize, LUB, 1); FPRINTF(stderr,"avg. error %e\n", sum/subsize); free(LUB); free(savedB); } /* now display back the ACTUAL result */ for (i = 0; i < subsize; i++) { result[TRANSLATE(i)] = B[i]; } for (i = subsize ; i < size; i++) { result[TRANSLATE(i)] = Bformer[TRANSLATE(i)]; } } unsigned compute_pivot_array(int *RefArray, int *RefArrayBack, unsigned size) { unsigned k; unsigned index = 0; unsigned theta, thick; unsigned newsize; for (k = 0; k < size; k++) { RefArray[k] = k; RefArrayBack[k] = k; } /* first inner nodes */ for (theta = 1; theta < ntheta - 1 ; theta++) { for (thick = 1; thick < nthick - 1; thick++) { /* inner nodes are unknown */ RefArrayBack[NODE_NUMBER(theta, thick)] = index; RefArray[index] = NODE_NUMBER(theta, thick); index++; } } newsize = index; for (theta=0; theta < ntheta; theta++) { /* Lower boundary "South" */ RefArrayBack[NODE_NUMBER(theta, 0)] = index; RefArray[index++] = NODE_NUMBER(theta, 0); /* Upper boundary "North" */ RefArrayBack[NODE_NUMBER(theta, nthick-1)] = index; RefArray[index++] = NODE_NUMBER(theta, nthick-1); } for (thick = 1; thick < nthick -1; thick++) { /* "West "*/ RefArrayBack[NODE_NUMBER(0, thick)] = index; RefArray[index++] = NODE_NUMBER(0, thick); /* "East" */ RefArrayBack[NODE_NUMBER(ntheta-1, thick)] = index; RefArray[index++] = NODE_NUMBER(ntheta-1, thick); } assert(index == size); return newsize; } void build_mesh(point *mesh) { unsigned theta, thick; /* first build the mesh by determining all points positions */ for (theta = 0; theta < ntheta; theta++) { float angle; angle = (ntheta - 1 - theta) * Pi/(ntheta-1); for (thick = 0; thick < nthick; thick++) { float r; r = thick * (RMAX - RMIN)/(nthick - 1) + RMIN; switch (shape) { default: case 0: mesh[NODE_NUMBER(theta,thick)].x = r*cosf(angle); mesh[NODE_NUMBER(theta,thick)].y = r*sinf(angle); break; case 1: mesh[NODE_NUMBER(theta,thick)].x = -100 + RMIN+((RMAX-RMIN)*theta)/(ntheta - 1); mesh[NODE_NUMBER(theta,thick)].y = RMIN+((RMAX-RMIN)*thick)/(nthick - 1); break; case 2: mesh[NODE_NUMBER(theta,thick)].x = r*(2.0f*theta/(ntheta - 1)- 1.0f); mesh[NODE_NUMBER(theta,thick)].y = r*(2.0f*thick/(nthick - 1)- 1.0f); break; } } } } static unsigned long build_neighbour_vector(unsigned long*neighbours, unsigned node, int *RefArray, int *RefArrayBack) { /* where is that point in the former space ? */ int former = TRANSLATE(node); int former_thick, former_theta; former_thick= (int)NODE_TO_THICK(former); former_theta = (int)NODE_TO_THETA(former); /* do a list of all the possible neighbours */ unsigned nneighbours = 0; int dtheta, dthick; for (dthick = -1; dthick <= 1; dthick++) { if ((former_thick + dthick) >= 0 && (former_thick + dthick) <= (int)nthick) { for (dtheta = -1; dtheta <= 1; dtheta++) { if ((former_theta + dtheta) >= 0 && (former_theta + dtheta) <= (int)ntheta) { /* we got a possible neighbour */ unsigned pnode = NODE_NUMBER((former_theta + dtheta), (former_thick + dthick)); neighbours[nneighbours++] = TRANSLATEBACK(pnode); } } } } unsigned i; /* order that list */ for (i = 0; i < nneighbours; i++) { /* find the i^th smallest entry for position i */ unsigned index; unsigned min , min_index; min = neighbours[i]; min_index = i; for (index = i+1; index < nneighbours; index++) { STARPU_ASSERT(neighbours[i] != neighbours[index]); if (neighbours[index] < min) { min = neighbours[index]; min_index = index; } } /* swap values */ neighbours[min_index] = neighbours[i]; neighbours[i] = min; } return nneighbours; } static void build_sparse_stiffness_matrix_B(point *pmesh, float *B, float *Bformer, unsigned size, unsigned newsize, int *RefArray, int *RefArrayBack) { unsigned i,j; /* first give the value of known nodes (at boundaries) */ for (i = 0; i < size; i++) { Bformer[i] = 0.0f; } for (i = 0; i < nthick; i++) { Bformer[i] = 200.0f; Bformer[size-1-i] = 200.0f; } for (i = 1; i < ntheta-1; i++) { Bformer[i*nthick] = 200.0f; Bformer[(i+1)*nthick-1] = 100.0f; } /* now the actual stiffness (reordered) matrix*/ for (j = 0 ; j < newsize ; j++) { unsigned long neighbour; unsigned long nneighbours; unsigned long neighbours[9]; nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack); B[j] = Bformer[TRANSLATE(j)]; for (neighbour = 0; neighbour < nneighbours; neighbour++) { unsigned n = neighbours[neighbour]; if (n >= newsize) { B[j] -= compute_A_value(TRANSLATE(n), TRANSLATE(j), pmesh)*Bformer[TRANSLATE(n)]; } } } } static unsigned build_sparse_stiffness_matrix_A(point *pmesh, float **nzval, uint32_t **colind, uint32_t *rowptr, unsigned newsize, int *RefArray, int *RefArrayBack) { unsigned j; unsigned pos = 0; *nzval = NULL; *colind = NULL; /* now the actual stiffness (reordered) matrix*/ for (j = 0 ; j < newsize ; j++) { rowptr[j] = pos; unsigned long neighbour; unsigned long nneighbours; unsigned long neighbours[9]; nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack); for (neighbour = 0; neighbour < nneighbours; neighbour++) { unsigned nodeneighbour = neighbours[neighbour]; if (nodeneighbour < newsize) { float val; val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh); if (val != 0.0f) { *nzval = realloc(*nzval, (pos+1)*sizeof(float)); STARPU_ASSERT(*nzval); *colind = realloc(*colind, (pos+1)*sizeof(uint32_t)); STARPU_ASSERT(*colind); (*nzval)[pos] = val; (*colind)[pos] = nodeneighbour; pos++; } } } } rowptr[newsize] = pos; return pos; } static void build_dense_stiffness_matrix_A(point *pmesh, float *A, unsigned newsize, int *RefArray, int *RefArrayBack) { unsigned long j; /* touch all the memory */ memset(A, 0, newsize*newsize*sizeof(float)); /* now the actual stiffness (reordered) matrix*/ for (j = 0 ; j < newsize ; j++) { unsigned long neighbour; unsigned long nneighbours; unsigned long neighbours[9]; nneighbours = build_neighbour_vector(&neighbours[0], j, RefArray, RefArrayBack); for (neighbour = 0; neighbour < nneighbours; neighbour++) { unsigned long nodeneighbour = neighbours[neighbour]; if (nodeneighbour < newsize) { float val; val = compute_A_value(TRANSLATE(j), TRANSLATE(nodeneighbour), pmesh); A[j+ (unsigned long)newsize*nodeneighbour] = val; } } } } int main(int argc, char **argv) { float *A; float *B; unsigned newsize; float *result; int *RefArray, *RefArrayBack; point *pmesh; float *Bformer; parse_args(argc, argv); pmesh = malloc(DIM*sizeof(point)); RefArray = malloc(DIM*sizeof(int)); RefArrayBack = malloc(DIM*sizeof(int)); Bformer = malloc(DIM*sizeof(float)); result = calloc(DIM, sizeof(float)); build_mesh(pmesh); /* now simplify that problem given the boundary conditions * to do so, we remove the already known variables from the system * by pivoting the various know variable, RefArray keep track of that * pivoting */ newsize = compute_pivot_array(RefArray, RefArrayBack, DIM); /* we can either use a direct method (LU decomposition here) or an * iterative method (conjugate gradient here) */ if (use_cg) { unsigned nnz; float *nzval; uint32_t *colind; uint32_t *rowptr; rowptr = malloc((newsize+1)*sizeof(uint32_t)); B = malloc(newsize*sizeof(float)); build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack); nnz = build_sparse_stiffness_matrix_A(pmesh, &nzval, &colind, rowptr, newsize, RefArray, RefArrayBack); do_conjugate_gradient(nzval, B, result, nnz, newsize, colind, rowptr); /* XXX */ memcpy(B, result, newsize*sizeof(float)); /* now display back the ACTUAL result */ unsigned i; for (i = 0; i < newsize; i++) { result[TRANSLATE(i)] = B[i]; } for (i = newsize ; i < DIM; i++) { result[TRANSLATE(i)] = Bformer[TRANSLATE(i)]; } free(nzval); free(colind); free(rowptr); free(B); } else { /* unfortunately CUDA does not allow late memory registration, * we need to do the malloc using CUDA itself ... */ initialize_system(&A, &B, newsize, pinned); /* then build the stiffness matrix A */ build_sparse_stiffness_matrix_B(pmesh, B, Bformer, DIM, newsize, RefArray, RefArrayBack); build_dense_stiffness_matrix_A(pmesh, A, newsize, RefArray, RefArrayBack); FPRINTF(stderr, "Problem size : %ux%u (%ux%u) (%lu MB)\n", newsize, newsize, DIM, DIM, ((unsigned long)newsize*newsize*4UL)/(1024*1024)); STARPU_ASSERT(newsize % nblocks == 0); switch (version) { case 1: case 2: dw_factoLU(A, newsize, newsize, nblocks, version, no_prio); break; case 3: dw_factoLU_tag(A, newsize, newsize, nblocks, no_prio); break; case 4: dw_factoLU_grain(A, newsize, newsize, nblocks, nbigblocks); break; default: STARPU_ABORT(); } display_stat_heat(); if (check) solve_system(DIM, newsize, result, RefArray, Bformer, A, B); starpu_cublas_shutdown(); starpu_shutdown(); free_system(A, B, newsize, pinned); } #ifdef STARPU_OPENGL_RENDER const char *display = getenv("DISPLAY"); if (display && display[0]) opengl_render(ntheta, nthick, result, pmesh, argc, argv); #endif free(pmesh); free(RefArray); free(RefArrayBack); free(Bformer); free(result); return 0; } starpu-1.4.9+dfsg/examples/heat/heat.h000066400000000000000000000043341507764646700176450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __HEAT_H__ #define __HEAT_H__ #include #include #include #include #include /* needed for STARPU_OPENGL_RENDER */ #include #include #ifdef STARPU_OPENGL_RENDER #include #include #include #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) #define X 0 #define Y 1 #define DIM ntheta*nthick #define RMIN (150.0f) #define RMAX (200.0f) #define Pi (3.141592f) #define NODE_NUMBER(theta, thick) ((unsigned long)((thick)+(theta)*nthick)) #define NODE_TO_THICK(n) ((n) % nthick) #define NODE_TO_THETA(n) ((n) / nthick) typedef struct point_t { float x; float y; } point; extern void dw_factoLU(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned version, unsigned no_prio); extern void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio); extern void dw_factoLU_grain(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks); extern void initialize_system(float **A, float **B, unsigned dim, unsigned pinned); extern void free_system(float *A, float *B, unsigned dim, unsigned pinned); void display_stat_heat(void); #ifdef STARPU_OPENGL_RENDER extern void opengl_render(unsigned _ntheta, unsigned _nthick, float *_result, point *_pmesh, int argc_, char **argv_); #endif #endif /* __HEAT_H__ */ starpu-1.4.9+dfsg/examples/heat/heat.sh000077500000000000000000000023121507764646700200250ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Test various LU options set -e PREFIX=$(dirname $0) $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -shape 0 $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -shape 1 # sometimes lead to pivot being 0 #$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -shape 2 $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -cg # TODO: FIXME # segfault #$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -v1 # (actually the default...) $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -v2 # hang #$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -v3 # hang #$MS_LAUNCHER $STARPU_LAUNCH $PREFIX/heat -v4 starpu-1.4.9+dfsg/examples/heat/heat_display.c000066400000000000000000000137761507764646700213770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "heat.h" #ifdef STARPU_OPENGL_RENDER /* * Just some dummy OpenGL code to display our results * */ static float minval, maxval; static unsigned ntheta; static unsigned nthick; static float *result; static unsigned printmesh =0; static point *pmesh; float xmin, xmax, ymin, ymax; float xcenter, ycenter; static void generate_graph(void) { unsigned theta, thick; for (theta = 0; theta < ntheta-1; theta++) { for (thick = 0; thick < nthick-1; thick++) { unsigned nodeA = NODE_NUMBER(theta, thick); unsigned nodeB = NODE_NUMBER(theta, thick+1); unsigned nodeC = NODE_NUMBER(theta+1, thick+1); unsigned nodeD = NODE_NUMBER(theta+1, thick); float colorA_R, colorB_R, colorC_R, colorD_R; float colorA_G, colorB_G, colorC_G, colorD_G; float colorA_B, colorB_B, colorC_B, colorD_B; if (maxval == minval) { colorA_R = 1.0f; colorA_G = 1.0f; colorA_B = 1.0f; colorB_R = 1.0f; colorB_G = 1.0f; colorB_B = 1.0f; colorC_R = 1.0f; colorC_G = 1.0f; colorC_B = 1.0f; colorD_R = 1.0f; colorD_G = 1.0f; colorD_B = 1.0f; } else { float amplitude = maxval - minval; float coeffA, coeffB, coeffC, coeffD; coeffA = (result[nodeA] - minval)/amplitude; coeffB = (result[nodeB] - minval)/amplitude; coeffC = (result[nodeC] - minval)/amplitude; coeffD = (result[nodeD] - minval)/amplitude; colorA_R = coeffA>0.5f?1.0f:(2.0*coeffA)*1.0f; colorB_R = coeffB>0.5f?1.0f:(2.0*coeffB)*1.0f; colorC_R = coeffC>0.5f?1.0f:(2.0*coeffC)*1.0f; colorD_R = coeffD>0.5f?1.0f:(2.0*coeffD)*1.0f; colorA_B = 0.0f; colorB_B = 0.0f; colorC_B = 0.0f; colorD_B = 0.0f; colorA_G = coeffA<0.5f?1.0f:2.0*(1 - coeffA)*1.0f; colorB_G = coeffB<0.5f?1.0f:2.0*(1 - coeffB)*1.0f; colorC_G = coeffC<0.5f?1.0f:2.0*(1 - coeffC)*1.0f; colorD_G = coeffD<0.5f?1.0f:2.0*(1 - coeffD)*1.0f; } if (printmesh) { glColor3f (0.0f, 0.0f, 0.0f); glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); glLineWidth(3.0f); glBegin(GL_POLYGON); glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 2.0f); glVertex3f(pmesh[nodeD].x, pmesh[nodeD].y, 2.0f); glVertex3f(pmesh[nodeC].x, pmesh[nodeC].y, 2.0f); glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 2.0f); glEnd(); glBegin(GL_POLYGON); glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 1.0f); glVertex3f(pmesh[nodeC].x, pmesh[nodeC].y, 1.0f); glVertex3f(pmesh[nodeB].x, pmesh[nodeB].y, 1.0f); glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 1.0f); glEnd(); } glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); glBegin(GL_POLYGON); glColor3f (colorA_R, colorA_G, colorA_B); glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 0.0f); glColor3f (colorD_R, colorD_G, colorD_B); glVertex3f(pmesh[nodeD].x, pmesh[nodeD].y, 0.0f); glColor3f (colorC_R, colorC_G, colorC_B); glVertex3f(pmesh[nodeC].x, pmesh[nodeC].y, 0.0f); glEnd(); glBegin(GL_POLYGON); glColor3f (colorA_R, colorA_G, colorA_B); glVertex3f(pmesh[nodeA].x, pmesh[nodeA].y, 0.0f); glColor3f (colorC_R, colorC_G, colorC_B); glVertex3f(pmesh[nodeC].x, pmesh[nodeC].y, 0.0f); glColor3f (colorB_R, colorB_G, colorB_B); glVertex3f(pmesh[nodeB].x, pmesh[nodeB].y, 0.0f); glEnd(); } } } static void display(void) { glClear (GL_COLOR_BUFFER_BIT); glLoadIdentity (); /* clear the matrix */ float amplitude = STARPU_MAX(xmax - xmin, ymax - ymin); float factor = 1.0/amplitude; glScalef (factor, factor, factor); /* modeling transformation */ gluLookAt (xcenter, ycenter, 30.0f, xcenter, ycenter, 0.0f, 0.0f, 1.0f, 0.0f); /* printf("factor %f\n", factor); glRotatef(-0,0.0,0.0,0.0); */ generate_graph(); glFlush (); } static void pressKey(unsigned char key, int x, int y) { switch (key) { case 'q': exit(0); default: printmesh = !printmesh; display(); break; } } static void reshape (int w, int h) { glViewport (0, 0, (GLsizei) w, (GLsizei) h); glMatrixMode (GL_PROJECTION); glLoadIdentity (); glFrustum (xmin, xmax, ymin, ymax, 5.0f, 5.0f); glMatrixMode (GL_MODELVIEW); } void find_limits(void) { minval = 100000000.0f; maxval = -10000000.0f; unsigned i; for (i = 0; i < DIM; i++) { /* find min */ minval = STARPU_MIN(result[i], minval); /* find max */ maxval = STARPU_MAX(result[i], maxval); } xmin = 10000000.0f; xmax = -10000000.0f; ymin = 10000000.0f; ymax = -10000000.0f; unsigned theta, thick; for (theta = 0; theta < ntheta; theta++) { for (thick = 0; thick < nthick; thick++) { point *p = &pmesh[NODE_NUMBER(theta, thick)]; if (p->x < xmin) xmin = p->x; if (p->x > xmax) xmax = p->x; if (p->y < ymin) ymin = p->y; if (p->y > ymax) ymax = p->y; } } ycenter = (ymin + ymax)/2; xcenter = (xmin + xmax)/2; } void opengl_render(unsigned _ntheta, unsigned _nthick, float *_result, point *_pmesh, int argc_, char **argv_) { FPRINTF(stderr, "OpenGL rendering ... \n"); ntheta = _ntheta; nthick = _nthick; result = _result; printmesh = 0; pmesh = _pmesh; find_limits(); glutInit(&argc_, argv_); glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB); glutInitWindowSize (800, 800); glutInitWindowPosition (100, 100); glutCreateWindow ("Temperature"); /* init */ glClearColor (0.0, 0.0, 0.0, 0.0); glShadeModel (GL_MODELVIEW); glutKeyboardFunc(pressKey); glutDisplayFunc(display); glutReshapeFunc(reshape); glutMainLoop(); } #endif /* STARPU_OPENGL_RENDER */ starpu-1.4.9+dfsg/examples/heat/lu_kernels_model.c000066400000000000000000000132341507764646700222410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "lu_kernels_model.h" /* * As a convention, in that file, buffers[0] is represented by A, * buffers[1] is B ... */ /* * Number of flops of Gemm */ /* #define USE_PERTURBATION 1 */ #ifdef USE_PERTURBATION #define PERTURB(a) ((starpu_drand48()*2.0f*(AMPL) + 1.0f - (AMPL))*(a)) #else #define PERTURB(a) (a) #endif /* * * Generic models * */ double task_getrf_cost(struct starpu_task *task, unsigned nimpl) { (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = ((n*n*n)/537.5); return PERTURB(cost); } double task_trsm_ll_cost(struct starpu_task *task, unsigned nimpl) { (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); /* double cost = ((n*n*n)/1744.695); */ double cost = ((n*n*n)/3210.80); /* fprintf(stderr, "task TRSM_LL predicts %e\n", cost); */ return PERTURB(cost); } double task_trsm_ru_cost(struct starpu_task *task, unsigned nimpl) { (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); /* double cost = ((n*n*n)/1744.695); */ double cost = ((n*n*n)/3691.53); /* fprintf(stderr, "task TRSM_RU predicts %e\n", cost); */ return PERTURB(cost); } double task_gemm_cost(struct starpu_task *task, unsigned nimpl) { (void)nimpl; uint32_t nx, ny, nz; nx = starpu_matrix_get_nx(task->handles[2]); ny = starpu_matrix_get_ny(task->handles[2]); nz = starpu_matrix_get_ny(task->handles[0]); double cost = ((nx*ny*nz)/4110.0); return PERTURB(cost); } /* * * Models for CUDA * */ double task_getrf_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = ((n*n*n)/1853.7806); /* printf("CUDA task GETRF ; predict %e\n", cost); */ return PERTURB(cost); } double task_trsm_ll_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = ((n*n*n)/42838.5718); /* printf("CUDA task TRSM_LL ; predict %e\n", cost); */ return PERTURB(cost); } double task_trsm_ru_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = ((n*n*n)/49208.667); /* printf("CUDA task TRSM_RU ; predict %e\n", cost); */ return PERTURB(cost); } double task_gemm_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t nx, ny, nz; nx = starpu_matrix_get_nx(task->handles[2]); ny = starpu_matrix_get_ny(task->handles[2]); nz = starpu_matrix_get_ny(task->handles[0]); double cost = ((nx*ny*nz)/57523.560); /* printf("CUDA task GEMM ; predict %e\n", cost); */ return PERTURB(cost); } /* * * Models for CPUs * */ double task_getrf_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = ((n*n*n)/537.5); /* printf("CPU task GETRF ; predict %e\n", cost); */ return PERTURB(cost); } double task_trsm_ll_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = ((n*n*n)/6668.224); /* printf("CPU task TRSM_LL ; predict %e\n", cost); */ return PERTURB(cost); } double task_trsm_ru_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = ((n*n*n)/6793.8423); /* printf("CPU task TRSM_RU ; predict %e\n", cost); */ return PERTURB(cost); } double task_gemm_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t nx, ny, nz; nx = starpu_matrix_get_nx(task->handles[2]); ny = starpu_matrix_get_ny(task->handles[2]); nz = starpu_matrix_get_ny(task->handles[0]); double cost = ((nx*ny*nz)/4203.0175); /* printf("CPU task GEMM ; predict %e\n", cost); */ return PERTURB(cost); } void initialize_lu_kernels_model(struct starpu_perfmodel* model, char * symbol, double (*cost_function)(struct starpu_task *, unsigned), double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)) { (void)cost_function; model->symbol = symbol; model->type = STARPU_HISTORY_BASED; starpu_perfmodel_init(model); starpu_perfmodel_set_per_devices_cost_function(model, 0, cpu_cost_function, STARPU_CPU_WORKER, 0, 1, -1); if(starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) != 0) { starpu_perfmodel_set_per_devices_cost_function(model, 0, cuda_cost_function, STARPU_CUDA_WORKER, 0, 1, -1); } } starpu-1.4.9+dfsg/examples/heat/lu_kernels_model.h000066400000000000000000000044051507764646700222460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __LU_KERNELS_MODEL_H__ #define __LU_KERNELS_MODEL_H__ #include double task_getrf_cost(struct starpu_task *task, unsigned nimpl); double task_trsm_ll_cost(struct starpu_task *task, unsigned nimpl); double task_trsm_ru_cost(struct starpu_task *task, unsigned nimpl); double task_gemm_cost(struct starpu_task *task, unsigned nimpl); double task_getrf_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double task_trsm_ll_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double task_trsm_ru_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double task_gemm_cost_cuda(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double task_getrf_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double task_trsm_ll_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double task_trsm_ru_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double task_gemm_cost_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); void initialize_lu_kernels_model(struct starpu_perfmodel* model, char * symbol, double (*cost_function)(struct starpu_task *, unsigned), double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)); #endif /* __LU_KERNELS_MODEL_H__ */ starpu-1.4.9+dfsg/examples/incrementer/000077500000000000000000000000001507764646700201415ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/incrementer/incrementer.c000066400000000000000000000071151507764646700226240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is just a small example which increments two values of a vector several times. */ #include #ifdef STARPU_QUICK_CHECK static unsigned niter = 500; #elif !defined(STARPU_LONG_CHECK) static unsigned niter = 5000; #else static unsigned niter = 50000; #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #ifdef STARPU_USE_CUDA extern void cuda_codelet(void *descr[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void opencl_codelet(void *descr[], void *_args); struct starpu_opencl_program opencl_program; #endif void cpu_codelet(void *descr[], void *_args) { (void)_args; float *val = (float *)STARPU_VECTOR_GET_PTR(descr[0]); val[0] += 1.0f; val[1] += 1.0f; } int main(int argc, char **argv) { int ret = 0; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_QUICK_CHECK niter /= 100; #endif if (argc == 2) niter = atoi(argv[1]); float float_array[4] STARPU_ATTRIBUTE_ALIGNED(16) = { 0.0f, 0.0f, 0.0f, 0.0f}; starpu_data_handle_t float_array_handle; starpu_vector_data_register(&float_array_handle, STARPU_MAIN_RAM /* home node */, (uintptr_t)&float_array, 4, sizeof(float)); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/incrementer/incrementer_kernels_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif struct starpu_codelet cl = { .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_codelet}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_codelet}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .name = "increment" }; double start; double end; start = starpu_timing_now(); unsigned i; for (i = 0; i < niter; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->callback_func = NULL; task->handles[0] = float_array_handle; ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_task_wait_for_all(); /* update the array in RAM */ starpu_data_unregister(float_array_handle); end = starpu_timing_now(); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif FPRINTF(stderr, "array -> %f, %f, %f, %f\n", float_array[0], float_array[1], float_array[2], float_array[3]); if (float_array[0] != niter || float_array[0] != float_array[1] + float_array[2] + float_array[3]) { FPRINTF(stderr, "Incorrect result\n"); ret = 1; } double timing = end - start; FPRINTF(stderr, "%u elems took %f ms\n", niter, timing/1000); starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/examples/incrementer/incrementer_kernels.cu000066400000000000000000000022201507764646700245240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* CUDA kernel for incrementation */ #include static __global__ void cuda_incrementer(float * tab) { tab[0] = tab[0] + 1.0f; tab[2] = tab[2] + 1.0f; return; } extern "C" void cuda_codelet(void *descr[], void *_args) { (void)_args; float *val = (float *)STARPU_VECTOR_GET_PTR(descr[0]); cuda_incrementer<<<1,1, 0, starpu_cuda_get_local_stream()>>>(val); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/incrementer/incrementer_kernels_opencl.c000066400000000000000000000034241507764646700257060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* OpenCL codelet for incrementation */ #include extern struct starpu_opencl_program opencl_program; void opencl_codelet(void *descr[], void *_args) { (void)_args; cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); cl_kernel kernel; cl_command_queue queue; int id, devid, err; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "incrementer", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=4; size_t local, s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/incrementer/incrementer_kernels_opencl_kernel.cl000066400000000000000000000015441507764646700274230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* OpenCL kernel for incrementation */ __kernel void incrementer(__global float* input) { const int i = get_global_id(0); if (i == 0 || i == 3) input[i] = input[i] + 1.0f; } starpu-1.4.9+dfsg/examples/interface/000077500000000000000000000000001507764646700175665ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/interface/complex.c000066400000000000000000000212531507764646700214040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_interface.h" #include "complex_codelet.h" void copy_complex_codelet_cpu(void *descr[], void *_args) { int i; int nx = STARPU_COMPLEX_GET_NX(descr[0]); double *i_real = STARPU_COMPLEX_GET_REAL(descr[0]); double *i_imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]); double *o_real = STARPU_COMPLEX_GET_REAL(descr[1]); double *o_imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[1]); for(i=0 ; imajor >= 2 || props->minor >= 3) { /* At least compute capability 1.3, supports doubles */ return 1; } else { /* Old card does not support doubles */ return 0; } #endif #else return 1; #endif } #ifdef STARPU_USE_CUDA extern void copy_complex_codelet_cuda(void *descr[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void copy_complex_codelet_opencl(void *buffers[], void *args); #endif struct starpu_codelet cl_copy = { .cpu_funcs = {copy_complex_codelet_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {copy_complex_codelet_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {copy_complex_codelet_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .can_execute = can_execute, .name = "cl_copy" }; #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; #endif int main(void) { int ret = 0; starpu_data_handle_t handle1; starpu_data_handle_t handle2; starpu_data_handle_t handle3; starpu_data_handle_t handle4; double real = 45.0; double imaginary = 12.0; double copy_real = 78.0; double copy_imaginary = 78.0; int compare; int *compare_ptr = &compare; starpu_data_handle_t vectorh; struct starpu_vector_interface *vectori; double *vector; // When using master-slave MPI mode, it is necessary for the slaves to know about the complex interface starpu_complex_data_register_ops(); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/interface/complex_kernels.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1); starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, ©_real, ©_imaginary, 1); /* Create a vector of two complexs. */ starpu_complex_data_register(&handle3, -1, 0, 0, 2); starpu_complex_data_register(&handle4, -1, 0, 0, 1); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Compare two different complexs. */ ret = starpu_task_insert(&cl_compare, STARPU_R, handle1, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); if (compare != 0) { _FPRINTF(stderr, "Complex numbers should NOT be similar\n"); goto end; } /* Copy one into the other. */ ret = starpu_task_insert(&cl_copy, STARPU_R, handle1, STARPU_W, handle2, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* And compare again. */ ret = starpu_task_insert(&cl_compare, STARPU_R, handle1, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); if (compare != 1) { _FPRINTF(stderr, "Complex numbers should be similar\n"); } /* Put another value again */ starpu_data_acquire(handle2, STARPU_W); copy_real = 78.0; copy_imaginary = 77.0; starpu_data_release(handle2); /* Split it in two pieces (thus one complex each). */ struct starpu_data_filter f = { .filter_func = starpu_complex_filter_block, .nchildren = 2, }; starpu_data_partition(handle3, &f); /* Copy the two complexs into each part */ ret = starpu_task_insert(&cl_copy, STARPU_R, handle1, STARPU_W, starpu_data_get_sub_data(handle3, 1, 0), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_copy, STARPU_R, handle2, STARPU_W, starpu_data_get_sub_data(handle3, 1, 1), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Gather the two pieces. */ starpu_data_unpartition(handle3, STARPU_MAIN_RAM); /* Show it. */ ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle3", strlen("handle3")+1, STARPU_R, handle3, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Get the real and imaginary vectors. */ struct starpu_data_filter fcanon = { .filter_func = starpu_complex_filter_canonical, .nchildren = 2, .get_child_ops = starpu_complex_filter_canonical_child_ops, }; starpu_data_partition(handle3, &fcanon); /* Check the corresponding data. */ vectorh = starpu_data_get_sub_data(handle3, 1, 0); starpu_data_acquire(vectorh, STARPU_R); vectori = starpu_data_get_interface_on_node(vectorh, STARPU_MAIN_RAM); vector = (double*) vectori->ptr; STARPU_ASSERT_MSG(vector[0] == 45., "Bogus value: %f instead of %f", vector[0], 45.); STARPU_ASSERT_MSG(vector[1] == 78., "Bogus value: %f instead of %f", vector[1], 78.); starpu_data_release(vectorh); vectorh = starpu_data_get_sub_data(handle3, 1, 1); starpu_data_acquire(vectorh, STARPU_R); vectori = starpu_data_get_interface_on_node(vectorh, STARPU_MAIN_RAM); vector = (double*) vectori->ptr; STARPU_ASSERT_MSG(vector[0] == 12., "Bogus value: %f instead of %f", vector[0], 12.); STARPU_ASSERT_MSG(vector[1] == 77., "Bogus value: %f instead of %f", vector[1], 77.); starpu_data_release(vectorh); starpu_data_unpartition(handle3, STARPU_MAIN_RAM); /* Use helper starpu_data_cpy */ ret = starpu_data_cpy(handle4, handle1, 0, NULL, NULL); if (ret == -ENODEV) goto end; ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle4", strlen("handle4")+1, STARPU_R, handle4, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Compare two different complexs. */ ret = starpu_task_insert(&cl_compare, STARPU_R, handle1, STARPU_R, handle4, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); if (compare != 1) { _FPRINTF(stderr, "Complex numbers should be similar\n"); goto end; } end: #ifdef STARPU_USE_OPENCL { int ret2 = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret2, "starpu_opencl_unload_opencl"); } #endif starpu_data_unregister(handle1); starpu_data_unregister(handle2); starpu_data_unregister(handle3); starpu_data_unregister(handle4); starpu_shutdown(); if (ret == -ENODEV) return 77; else return !compare; } starpu-1.4.9+dfsg/examples/interface/complex_codelet.h000066400000000000000000000054731507764646700231160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_interface.h" #ifndef __COMPLEX_CODELET_H #define __COMPLEX_CODELET_H #define _FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) /* Dumb performance model for simgrid */ static double complex_cost_function(struct starpu_task *task, unsigned nimpl) { (void) task; (void) nimpl; return 0.000001; } static struct starpu_perfmodel complex_model = { .type = STARPU_COMMON, .cost_function = complex_cost_function, .symbol = "complex" }; void compare_complex_codelet(void *descr[], void *_args) { int nx1 = STARPU_COMPLEX_GET_NX(descr[0]); double *real1 = STARPU_COMPLEX_GET_REAL(descr[0]); double *imaginary1 = STARPU_COMPLEX_GET_IMAGINARY(descr[0]); int nx2 = STARPU_COMPLEX_GET_NX(descr[1]); double *real2 = STARPU_COMPLEX_GET_REAL(descr[1]); double *imaginary2 = STARPU_COMPLEX_GET_IMAGINARY(descr[1]); int *compare; starpu_codelet_unpack_args(_args, &compare); *compare = (nx1 == nx2); if (nx1 == nx2) { int i; for(i=0 ; i #include "complex_dev_handle_interface.h" #include "complex_dev_handle_codelet.h" void copy_complex_dev_handle_codelet_cpu(void *descr[], void *_args) { int i; int nx = STARPU_COMPLEX_DEV_HANDLE_GET_NX(descr[0]); double *i_real = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[0]); double *i_imaginary = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[0]); double *o_real = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[1]); double *o_imaginary = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[1]); for(i=0 ; imajor >= 2 || props->minor >= 3) { /* At least compute capability 1.3, supports doubles */ return 1; } else { /* Old card does not support doubles */ return 0; } #endif #else return 1; #endif } #ifdef STARPU_USE_CUDA extern void copy_complex_dev_handle_codelet_cuda(void *descr[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void copy_complex_dev_handle_codelet_opencl(void *buffers[], void *args); #endif struct starpu_codelet cl_dev_handle_copy = { .cpu_funcs = {copy_complex_dev_handle_codelet_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {copy_complex_dev_handle_codelet_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {copy_complex_dev_handle_codelet_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .can_execute = can_execute, .name = "cl_dev_handle_copy" }; #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; #endif int main(void) { int ret = 0; starpu_data_handle_t handle1; starpu_data_handle_t handle2; starpu_data_handle_t handle3; starpu_data_handle_t handle4; double real = 45.0; double imaginary = 12.0; double copy_real = 78.0; double copy_imaginary = 78.0; int compare; int *compare_ptr = &compare; starpu_data_handle_t vectorh; struct starpu_vector_interface *vectori; double *vector; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/interface/complex_dev_handle/complex_dev_handle_kernels.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_complex_dev_handle_data_register(&handle1, STARPU_MAIN_RAM, (uintptr_t)&real, (uintptr_t)&imaginary, 1); starpu_complex_dev_handle_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)©_real, (uintptr_t)©_imaginary, 1); /* Create a vector of two complexs. */ starpu_complex_dev_handle_data_register(&handle3, -1, 0, 0, 2); starpu_complex_dev_handle_data_register(&handle4, -1, 0, 0, 1); ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Compare two different complexs. */ ret = starpu_task_insert(&cl_dev_handle_compare, STARPU_R, handle1, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); if (compare != 0) { _FPRINTF(stderr, "Complex numbers should NOT be similar\n"); goto end; } /* Copy one into the other. */ ret = starpu_task_insert(&cl_dev_handle_copy, STARPU_R, handle1, STARPU_W, handle2, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* And compare again. */ ret = starpu_task_insert(&cl_dev_handle_compare, STARPU_R, handle1, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); if (compare != 1) { _FPRINTF(stderr, "Complex numbers should be similar\n"); } /* Put another value again */ starpu_data_acquire(handle2, STARPU_W); copy_real = 78.0; copy_imaginary = 77.0; starpu_data_release(handle2); /* Split it in two pieces (thus one complex each). */ struct starpu_data_filter f = { .filter_func = starpu_complex_dev_handle_filter_block, .nchildren = 2, }; starpu_data_partition(handle3, &f); /* Copy the two complexs into each part */ ret = starpu_task_insert(&cl_dev_handle_copy, STARPU_R, handle1, STARPU_W, starpu_data_get_sub_data(handle3, 1, 0), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_dev_handle_copy, STARPU_R, handle2, STARPU_W, starpu_data_get_sub_data(handle3, 1, 1), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Gather the two pieces. */ starpu_data_unpartition(handle3, STARPU_MAIN_RAM); /* Show it. */ ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle3", strlen("handle3")+1, STARPU_R, handle3, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Get the real and imaginary vectors. */ struct starpu_data_filter fcanon = { .filter_func = starpu_complex_dev_handle_filter_canonical, .nchildren = 2, .get_child_ops = starpu_complex_dev_handle_filter_canonical_child_ops, }; starpu_data_partition(handle3, &fcanon); /* Check the corresponding data. */ vectorh = starpu_data_get_sub_data(handle3, 1, 0); starpu_data_acquire(vectorh, STARPU_R); vectori = starpu_data_get_interface_on_node(vectorh, STARPU_MAIN_RAM); vector = (double*) vectori->ptr; STARPU_ASSERT_MSG(vector[0] == 45., "Bogus value: %f instead of %f", vector[0], 45.); STARPU_ASSERT_MSG(vector[1] == 78., "Bogus value: %f instead of %f", vector[1], 78.); starpu_data_release(vectorh); vectorh = starpu_data_get_sub_data(handle3, 1, 1); starpu_data_acquire(vectorh, STARPU_R); vectori = starpu_data_get_interface_on_node(vectorh, STARPU_MAIN_RAM); vector = (double*) vectori->ptr; STARPU_ASSERT_MSG(vector[0] == 12., "Bogus value: %f instead of %f", vector[0], 12.); STARPU_ASSERT_MSG(vector[1] == 77., "Bogus value: %f instead of %f", vector[1], 77.); starpu_data_release(vectorh); starpu_data_unpartition(handle3, STARPU_MAIN_RAM); /* Use helper starpu_data_cpy */ starpu_data_cpy(handle4, handle1, 0, NULL, NULL); ret = starpu_task_insert(&cl_dev_handle_display, STARPU_VALUE, "handle4", strlen("handle4")+1, STARPU_R, handle4, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Compare two different complexs. */ ret = starpu_task_insert(&cl_dev_handle_compare, STARPU_R, handle1, STARPU_R, handle4, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); if (compare != 1) { _FPRINTF(stderr, "Complex numbers should be similar\n"); goto end; } end: #ifdef STARPU_USE_OPENCL { int ret2 = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret2, "starpu_opencl_unload_opencl"); } #endif starpu_data_unregister(handle1); starpu_data_unregister(handle2); starpu_data_unregister(handle3); starpu_data_unregister(handle4); starpu_shutdown(); if (ret == -ENODEV) return 77; else return !compare; } starpu-1.4.9+dfsg/examples/interface/complex_dev_handle/complex_dev_handle_codelet.h000066400000000000000000000064031507764646700311010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_dev_handle_interface.h" #ifndef __COMPLEX_DEV_HANDLE_CODELET_H #define __COMPLEX_DEV_HANDLE_CODELET_H #define _FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) /* Dumb performance model for simgrid */ static double complex_dev_handle_cost_function(struct starpu_task *task, unsigned nimpl) { (void) task; (void) nimpl; return 0.000001; } static struct starpu_perfmodel complex_dev_handle_model = { .type = STARPU_COMMON, .cost_function = complex_dev_handle_cost_function, .symbol = "complex_dev_handle" }; void compare_complex_dev_handle_codelet(void *descr[], void *_args) { int nx1 = STARPU_COMPLEX_DEV_HANDLE_GET_NX(descr[0]); double* real1 = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[0]); double* imaginary1 = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[0]); int nx2 = STARPU_COMPLEX_DEV_HANDLE_GET_NX(descr[1]); double* real2 = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[1]); double* imaginary2 = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[1]); int *compare; starpu_codelet_unpack_args(_args, &compare); *compare = (nx1 == nx2); if (nx1 == nx2) { int i; for(i=0 ; i #include "complex_dev_handle_interface.h" void starpu_complex_dev_handle_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpu_complex_dev_handle_interface *complex_dev_handle_father = father_interface; struct starpu_complex_dev_handle_interface *complex_dev_handle_child = child_interface; uint32_t nx = complex_dev_handle_father->nx; size_t elemsize = sizeof(double); STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); uint32_t child_nx; size_t offset; /* Compute the split */ starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset); complex_dev_handle_child->nx = child_nx; if (complex_dev_handle_father->dev_handle_real) { if (complex_dev_handle_father->ptr_real) { complex_dev_handle_child->ptr_real = complex_dev_handle_father->ptr_real + offset; complex_dev_handle_child->ptr_imaginary = complex_dev_handle_father->ptr_imaginary + offset; } complex_dev_handle_child->dev_handle_real = complex_dev_handle_father->dev_handle_real; complex_dev_handle_child->offset_real = complex_dev_handle_father->offset_real + offset; complex_dev_handle_child->dev_handle_imaginary = complex_dev_handle_father->dev_handle_imaginary; complex_dev_handle_child->offset_imaginary = complex_dev_handle_father->offset_imaginary + offset; } } void starpu_complex_dev_handle_filter_canonical(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpu_complex_dev_handle_interface *complex_dev_handle_father = father_interface; struct starpu_vector_interface *vector_child = child_interface; STARPU_ASSERT_MSG(nchunks == 2, "complex_dev_handle can only be split into two pieces"); STARPU_ASSERT_MSG(id < 2, "complex_dev_handle has only two pieces"); vector_child->id = STARPU_VECTOR_INTERFACE_ID; vector_child->nx = complex_dev_handle_father->nx; vector_child->elemsize = sizeof(double); vector_child->slice_base = 0; vector_child->allocsize = vector_child->nx * vector_child->elemsize; if (complex_dev_handle_father->dev_handle_real) { if (complex_dev_handle_father->ptr_real) { if (id == 0) vector_child->ptr = complex_dev_handle_father->ptr_real; else vector_child->ptr = complex_dev_handle_father->ptr_imaginary; } if (id == 0) { vector_child->dev_handle = complex_dev_handle_father->dev_handle_real; vector_child->offset = complex_dev_handle_father->offset_real; } else { vector_child->dev_handle = complex_dev_handle_father->dev_handle_imaginary; vector_child->offset = complex_dev_handle_father->offset_imaginary; } } } struct starpu_data_interface_ops *starpu_complex_dev_handle_filter_canonical_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned child) { return &starpu_interface_vector_ops; } starpu-1.4.9+dfsg/examples/interface/complex_dev_handle/complex_dev_handle_interface.c000066400000000000000000000305451507764646700314210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_dev_handle_interface.h" uintptr_t starpu_complex_dev_handle_get_ptr_real(starpu_data_handle_t handle) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_dev_handle_interface->ptr_real; } uintptr_t starpu_complex_dev_handle_get_ptr_imaginary(starpu_data_handle_t handle) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_dev_handle_interface->ptr_imaginary; } int starpu_complex_dev_handle_get_nx(starpu_data_handle_t handle) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_dev_handle_interface->nx; } uintptr_t starpu_complex_dev_handle_get_dev_handle_real(starpu_data_handle_t handle) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_dev_handle_interface->dev_handle_real; } uintptr_t starpu_complex_dev_handle_get_dev_handle_imaginary(starpu_data_handle_t handle) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_dev_handle_interface->dev_handle_imaginary; } size_t starpu_complex_dev_handle_get_offset_real(starpu_data_handle_t handle) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_dev_handle_interface->offset_real; } size_t starpu_complex_dev_handle_get_offset_imaginary(starpu_data_handle_t handle) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_dev_handle_interface->offset_imaginary; } static void complex_dev_handle_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_complex_dev_handle_interface *local_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, node); local_interface->nx = complex_dev_handle_interface->nx; if (node == home_node) { local_interface->ptr_real = complex_dev_handle_interface->ptr_real; local_interface->dev_handle_real = complex_dev_handle_interface->dev_handle_real; local_interface->offset_real = complex_dev_handle_interface->offset_real; local_interface->ptr_imaginary = complex_dev_handle_interface->ptr_imaginary; local_interface->dev_handle_imaginary = complex_dev_handle_interface->dev_handle_imaginary; local_interface->offset_imaginary = complex_dev_handle_interface->offset_imaginary; } else { local_interface->ptr_real = 0; local_interface->dev_handle_real = 0; local_interface->offset_real = 0; local_interface->ptr_imaginary = 0; local_interface->dev_handle_imaginary = 0; local_interface->offset_imaginary = 0; } } } static starpu_ssize_t complex_dev_handle_allocate_data_on_node(void *data_interface, unsigned node) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) data_interface; uintptr_t addr_real = 0, addr_imaginary = 0, dev_handle_real, dev_handle_imaginary; starpu_ssize_t requested_memory = complex_dev_handle_interface->nx * sizeof(double); dev_handle_real = starpu_malloc_on_node(node, requested_memory); if (!dev_handle_real) goto fail_real; dev_handle_imaginary = starpu_malloc_on_node(node, requested_memory); if (!dev_handle_imaginary) goto fail_imaginary; if (starpu_node_get_kind(node) != STARPU_OPENCL_RAM) { addr_real = dev_handle_real; addr_imaginary = dev_handle_imaginary; } /* update the data properly in consequence */ complex_dev_handle_interface->ptr_real = addr_real; complex_dev_handle_interface->dev_handle_real = dev_handle_real; complex_dev_handle_interface->offset_real = 0; complex_dev_handle_interface->ptr_imaginary = addr_imaginary; complex_dev_handle_interface->dev_handle_imaginary = dev_handle_imaginary; complex_dev_handle_interface->offset_imaginary = 0; return 2*requested_memory; fail_imaginary: starpu_free_on_node(node, dev_handle_real, requested_memory); fail_real: return -ENOMEM; } static void complex_dev_handle_free_data_on_node(void *data_interface, unsigned node) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) data_interface; starpu_ssize_t requested_memory = complex_dev_handle_interface->nx * sizeof(double); starpu_free_on_node(node, (uintptr_t) complex_dev_handle_interface->dev_handle_real, requested_memory); complex_dev_handle_interface->ptr_real = 0; complex_dev_handle_interface->dev_handle_real = 0; starpu_free_on_node(node, (uintptr_t) complex_dev_handle_interface->dev_handle_imaginary, requested_memory); complex_dev_handle_interface->ptr_imaginary = 0; complex_dev_handle_interface->dev_handle_imaginary = 0; } static size_t complex_dev_handle_get_size(starpu_data_handle_t handle) { size_t size; struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); size = complex_dev_handle_interface->nx * 2 * sizeof(double); return size; } static uint32_t complex_dev_handle_footprint(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(starpu_complex_dev_handle_get_nx(handle), 0); } static int complex_dev_handle_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, node); *count = complex_dev_handle_get_size(handle); if (ptr != NULL) { char *real = (void *)complex_dev_handle_interface->ptr_real; char *imaginary = (void *)complex_dev_handle_interface->ptr_imaginary; *ptr = (void*) starpu_malloc_on_node_flags(node, *count, 0); char *data = (char*) *ptr; memcpy(data, real, complex_dev_handle_interface->nx*sizeof(double)); memcpy(data+complex_dev_handle_interface->nx*sizeof(double), imaginary, complex_dev_handle_interface->nx*sizeof(double)); } return 0; } static int complex_dev_handle_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { char *data = ptr; STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == 2 * complex_dev_handle_interface->nx * sizeof(double)); char *real = (void *)complex_dev_handle_interface->ptr_real; char *imaginary = (void *)complex_dev_handle_interface->ptr_imaginary; memcpy(real, data, complex_dev_handle_interface->nx*sizeof(double)); memcpy(imaginary, data+complex_dev_handle_interface->nx*sizeof(double), complex_dev_handle_interface->nx*sizeof(double)); return 0; } static int complex_dev_handle_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { complex_dev_handle_peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); return 0; } static starpu_ssize_t complex_dev_handle_describe(void *data_interface, char *buf, size_t size) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = (struct starpu_complex_dev_handle_interface *) data_interface; return snprintf(buf, size, "Complex_dev_handle%d", complex_dev_handle_interface->nx); } static int complex_dev_handle_compare(void *data_interface_a, void *data_interface_b) { struct starpu_complex_dev_handle_interface *complex_dev_handle_a = (struct starpu_complex_dev_handle_interface *) data_interface_a; struct starpu_complex_dev_handle_interface *complex_dev_handle_b = (struct starpu_complex_dev_handle_interface *) data_interface_b; return (complex_dev_handle_a->nx == complex_dev_handle_b->nx); } int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_complex_dev_handle_interface *src_complex_dev_handle = src_interface; struct starpu_complex_dev_handle_interface *dst_complex_dev_handle = dst_interface; int ret = 0; if (starpu_interface_copy(src_complex_dev_handle->dev_handle_real, src_complex_dev_handle->offset_real, src_node, dst_complex_dev_handle->dev_handle_real, dst_complex_dev_handle->offset_real, dst_node, src_complex_dev_handle->nx*sizeof(double), async_data)) ret = -EAGAIN; if (starpu_interface_copy(src_complex_dev_handle->dev_handle_imaginary, src_complex_dev_handle->offset_imaginary, src_node, dst_complex_dev_handle->dev_handle_imaginary, dst_complex_dev_handle->offset_imaginary, dst_node, src_complex_dev_handle->nx*sizeof(double), async_data)) ret = -EAGAIN; return ret; } const struct starpu_data_copy_methods complex_dev_handle_copy_methods = { .any_to_any = copy_any_to_any }; struct starpu_data_interface_ops interface_complex_dev_handle_ops = { .register_data_handle = complex_dev_handle_register_data_handle, .allocate_data_on_node = complex_dev_handle_allocate_data_on_node, .free_data_on_node = complex_dev_handle_free_data_on_node, .copy_methods = &complex_dev_handle_copy_methods, .get_size = complex_dev_handle_get_size, .footprint = complex_dev_handle_footprint, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpu_complex_dev_handle_interface), .to_pointer = NULL, .pack_data = complex_dev_handle_pack_data, .peek_data = complex_dev_handle_peek_data, .unpack_data = complex_dev_handle_unpack_data, .describe = complex_dev_handle_describe, .compare = complex_dev_handle_compare }; void starpu_complex_dev_handle_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr_real, uintptr_t ptr_imaginary, int nx) { struct starpu_complex_dev_handle_interface complex_dev_handle = { .ptr_real = ptr_real, .dev_handle_real = ptr_real, .ptr_imaginary = ptr_imaginary, .dev_handle_imaginary = ptr_imaginary, .nx = nx }; starpu_data_register(handleptr, home_node, &complex_dev_handle, &interface_complex_dev_handle_ops); } void starpu_complex_dev_handle_ptr_register(starpu_data_handle_t handle, int node, uintptr_t ptr_real, uintptr_t ptr_imaginary, uintptr_t dev_handle_real, uintptr_t dev_handle_imaginary, size_t offset_real, size_t offset_imaginary) { struct starpu_complex_dev_handle_interface *complex_dev_handle_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); complex_dev_handle_interface->ptr_real = ptr_real; complex_dev_handle_interface->dev_handle_real = dev_handle_real; complex_dev_handle_interface->offset_real = offset_real; complex_dev_handle_interface->ptr_imaginary = ptr_imaginary; complex_dev_handle_interface->dev_handle_imaginary = dev_handle_imaginary; complex_dev_handle_interface->offset_imaginary = offset_imaginary; } starpu-1.4.9+dfsg/examples/interface/complex_dev_handle/complex_dev_handle_interface.h000066400000000000000000000070021507764646700314160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __COMPLEX_DEV_HANDLE_INTERFACE_H #define __COMPLEX_DEV_HANDLE_INTERFACE_H /* interface for complex numbers supporting opencl*/ struct starpu_complex_dev_handle_interface { int nx; uintptr_t ptr_real; uintptr_t dev_handle_real; size_t offset_real; uintptr_t ptr_imaginary; uintptr_t dev_handle_imaginary; size_t offset_imaginary; }; void starpu_complex_dev_handle_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr_real, uintptr_t ptr_imaginary, int nx); void starpu_complex_dev_handle_ptr_register(starpu_data_handle_t handle, int node, uintptr_t ptr_real, uintptr_t ptr_imaginary, uintptr_t dev_handle_real, uintptr_t dev_handle_imaginary, size_t offset_real, size_t offset_imaginary); int starpu_complex_dev_handle_get_nx(starpu_data_handle_t handle); uintptr_t starpu_complex_dev_handle_get_ptr_real(starpu_data_handle_t handle); uintptr_t starpu_complex_dev_handle_get_dev_handle_real(starpu_data_handle_t handle); size_t starpu_complex_dev_handle_get_offset_real(starpu_data_handle_t handle); uintptr_t starpu_complex_dev_handle_get_ptr_imaginary(starpu_data_handle_t handle); uintptr_t starpu_complex_dev_handle_get_dev_handle_imaginary(starpu_data_handle_t handle); size_t starpu_complex_dev_handle_get_offset_imaginary(starpu_data_handle_t handle); #define STARPU_COMPLEX_DEV_HANDLE_GET_NX(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->nx) #define STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->ptr_real) #define STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_REAL(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->dev_handle_real) #define STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_REAL(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->offset_real) #define STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->ptr_imaginary) #define STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_IMAGINARY(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->dev_handle_imaginary) #define STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_IMAGINARY(interface) (((struct starpu_complex_dev_handle_interface *)(interface))->offset_imaginary) /* Split complex vector into smaller complex vectors */ void starpu_complex_dev_handle_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks); /* Split complex into two simple vectors */ void starpu_complex_dev_handle_filter_canonical(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks); struct starpu_data_interface_ops *starpu_complex_dev_handle_filter_canonical_child_ops(struct starpu_data_filter *f, unsigned child); #endif /* __COMPLEX_DEV_HANDLE_INTERFACE_H */ starpu-1.4.9+dfsg/examples/interface/complex_dev_handle/complex_dev_handle_kernels.cl000066400000000000000000000026241507764646700312750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Use the "double" type */ #pragma OPENCL EXTENSION cl_khr_fp64 : enable __kernel void complex_copy_opencl(__global double *o_real, unsigned o_real_offset, __global double *o_imaginary, unsigned o_imaginary_offset, __global double *i_real, unsigned i_real_offset, __global double *i_imaginary, unsigned i_imaginary_offset, unsigned nx) { const int i = get_global_id(0); if (i < nx) { o_real = (__global char*) o_real + o_real_offset; o_imaginary = (__global char*) o_imaginary + o_imaginary_offset; i_real = (__global char*) i_real + i_real_offset; i_imaginary = (__global char*) i_imaginary + i_imaginary_offset; o_real[i] = i_real[i]; o_imaginary[i] = i_imaginary[i]; } } starpu-1.4.9+dfsg/examples/interface/complex_dev_handle/complex_dev_handle_kernels.cu000066400000000000000000000035121507764646700313030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_dev_handle_interface.h" static __global__ void complex_copy_cuda(double *o_real, double *o_imaginary, double *i_real, double *i_imaginary, unsigned n) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) { o_real[i] = i_real[i]; o_imaginary[i] = i_imaginary[i]; } } extern "C" void copy_complex_dev_handle_codelet_cuda(void *descr[], void *_args) { (void)_args; int nx = STARPU_COMPLEX_DEV_HANDLE_GET_NX(descr[0]); double *i_real = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[0]); double *i_imaginary = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[0]); double *o_real = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_REAL(descr[1]); double *o_imaginary = (double*)STARPU_COMPLEX_DEV_HANDLE_GET_PTR_IMAGINARY(descr[1]); unsigned threads_per_block = 64; unsigned nblocks = (nx + threads_per_block-1) / threads_per_block; complex_copy_cuda<<>>(o_real, o_imaginary, i_real, i_imaginary, nx); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/interface/complex_dev_handle/complex_dev_handle_kernels_opencl.c000066400000000000000000000063201507764646700324560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_dev_handle_interface.h" extern struct starpu_opencl_program opencl_program; void copy_complex_dev_handle_codelet_opencl(void *buffers[], void *_args) { (void) _args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; /* length of the vector */ unsigned n = STARPU_COMPLEX_DEV_HANDLE_GET_NX(buffers[0]); /* OpenCL copy of the vector pointer */ cl_mem i_real = (cl_mem) STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_REAL(buffers[0]); unsigned i_real_offset = STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_REAL(buffers[0]); cl_mem i_imaginary = (cl_mem) STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_IMAGINARY(buffers[0]); unsigned i_imaginary_offset = STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_IMAGINARY(buffers[0]); cl_mem o_real = (cl_mem) STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_REAL(buffers[1]); unsigned o_real_offset = STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_REAL(buffers[1]); cl_mem o_imaginary = (cl_mem) STARPU_COMPLEX_DEV_HANDLE_GET_DEV_HANDLE_IMAGINARY(buffers[1]); unsigned o_imaginary_offset = STARPU_COMPLEX_DEV_HANDLE_GET_OFFSET_IMAGINARY(buffers[1]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "complex_copy_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(o_real), &o_real); err|= clSetKernelArg(kernel, 1, sizeof(o_real_offset), &o_real_offset); err|= clSetKernelArg(kernel, 2, sizeof(o_imaginary), &o_imaginary); err|= clSetKernelArg(kernel, 3, sizeof(o_imaginary_offset), &o_imaginary_offset); err|= clSetKernelArg(kernel, 4, sizeof(i_real), &i_real); err|= clSetKernelArg(kernel, 5, sizeof(i_real_offset), &i_real_offset); err|= clSetKernelArg(kernel, 6, sizeof(i_imaginary), &i_imaginary); err|= clSetKernelArg(kernel, 7, sizeof(i_imaginary_offset), &i_imaginary_offset); err|= clSetKernelArg(kernel, 8, sizeof(n), &n); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/interface/complex_filters.c000066400000000000000000000052671507764646700231430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_interface.h" void starpu_complex_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpu_complex_interface *complex_father = father_interface; struct starpu_complex_interface *complex_child = child_interface; uint32_t nx = complex_father->nx; size_t elemsize = sizeof(double); STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); uint32_t child_nx; size_t offset; /* Compute the split */ starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset); complex_child->nx = child_nx; if (complex_father->real) { complex_child->real = (void*) ((uintptr_t) complex_father->real + offset); complex_child->imaginary = (void*) ((uintptr_t) complex_father->imaginary + offset); } } void starpu_complex_filter_canonical(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpu_complex_interface *complex_father = father_interface; struct starpu_vector_interface *vector_child = child_interface; STARPU_ASSERT_MSG(nchunks == 2, "complex can only be split into two pieces"); STARPU_ASSERT_MSG(id < 2, "complex has only two pieces"); vector_child->id = STARPU_VECTOR_INTERFACE_ID; if (id == 0) vector_child->ptr = (uintptr_t) complex_father->real; else vector_child->ptr = (uintptr_t) complex_father->imaginary; /* the complex interface doesn't support dev_handle/offset */ vector_child->dev_handle = vector_child->ptr; vector_child->offset = 0; vector_child->nx = complex_father->nx; vector_child->elemsize = sizeof(double); vector_child->slice_base = 0; vector_child->allocsize = vector_child->nx * vector_child->elemsize; } struct starpu_data_interface_ops *starpu_complex_filter_canonical_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned child) { return &starpu_interface_vector_ops; } starpu-1.4.9+dfsg/examples/interface/complex_interface.c000066400000000000000000000221771507764646700234320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_interface.h" double *starpu_complex_get_real(starpu_data_handle_t handle) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_interface->real; } double *starpu_complex_get_imaginary(starpu_data_handle_t handle) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_interface->imaginary; } int starpu_complex_get_nx(starpu_data_handle_t handle) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return complex_interface->nx; } static void complex_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_complex_interface *local_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); local_interface->nx = complex_interface->nx; if (node == home_node) { local_interface->real = complex_interface->real; local_interface->imaginary = complex_interface->imaginary; } else { local_interface->real = NULL; local_interface->imaginary = NULL; } } } static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, unsigned node) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; double *addr_real = NULL; double *addr_imaginary = NULL; starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]); addr_real = (double*) starpu_malloc_on_node(node, requested_memory); if (!addr_real) goto fail_real; addr_imaginary = (double*) starpu_malloc_on_node(node, requested_memory); if (!addr_imaginary) goto fail_imaginary; /* update the data properly in consequence */ complex_interface->real = addr_real; complex_interface->imaginary = addr_imaginary; return 2*requested_memory; fail_imaginary: starpu_free_on_node(node, (uintptr_t) addr_real, requested_memory); fail_real: return -ENOMEM; } static void complex_free_data_on_node(void *data_interface, unsigned node) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; starpu_ssize_t requested_memory = complex_interface->nx * sizeof(complex_interface->real[0]); starpu_free_on_node(node, (uintptr_t) complex_interface->real, requested_memory); complex_interface->real = NULL; starpu_free_on_node(node, (uintptr_t) complex_interface->imaginary, requested_memory); complex_interface->imaginary = NULL; } static size_t complex_get_size(starpu_data_handle_t handle) { size_t size; struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); size = complex_interface->nx * 2 * sizeof(double); return size; } static uint32_t complex_footprint(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(starpu_complex_get_nx(handle), 0); } static int complex_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); *count = complex_get_size(handle); if (ptr != NULL) { char *data; data = (void*) starpu_malloc_on_node_flags(node, *count, 0); *ptr = data; memcpy(data, complex_interface->real, complex_interface->nx*sizeof(double)); memcpy(data+complex_interface->nx*sizeof(double), complex_interface->imaginary, complex_interface->nx*sizeof(double)); } return 0; } static int complex_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { char *data = ptr; STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == 2 * complex_interface->nx * sizeof(double)); memcpy(complex_interface->real, data, complex_interface->nx*sizeof(double)); memcpy(complex_interface->imaginary, data+complex_interface->nx*sizeof(double), complex_interface->nx*sizeof(double)); return 0; } static int complex_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { complex_peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); return 0; } static starpu_ssize_t complex_describe(void *data_interface, char *buf, size_t size) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; return snprintf(buf, size, "Complex%d", complex_interface->nx); } static int complex_compare(void *data_interface_a, void *data_interface_b) { struct starpu_complex_interface *complex_a = (struct starpu_complex_interface *) data_interface_a; struct starpu_complex_interface *complex_b = (struct starpu_complex_interface *) data_interface_b; return (complex_a->nx == complex_b->nx); } #define _pack(dst, src) do { memcpy(dst, &src, sizeof(src)); dst += sizeof(src); } while(0) #define _unpack(dst, src) do { memcpy(&dst, src, sizeof(dst)); src += sizeof(dst); } while(0) static starpu_ssize_t complex_size_meta(struct starpu_complex_interface *complex_interface) { return sizeof(complex_interface->real) + sizeof(complex_interface->imaginary) + sizeof(complex_interface->nx); } static int complex_pack_meta(void *data_interface, void **ptr, starpu_ssize_t *count) { struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) data_interface; *count = complex_size_meta(complex_interface); *ptr = calloc(1, *count); char *cur = *ptr; _pack(cur, complex_interface->real); _pack(cur, complex_interface->imaginary); _pack(cur, complex_interface->nx); return 0; } static int complex_unpack_meta(void **data_interface, void *ptr, starpu_ssize_t *count) { *data_interface = calloc(1, sizeof(struct starpu_complex_interface)); struct starpu_complex_interface *complex_interface = (struct starpu_complex_interface *) (*data_interface); char *cur = ptr; _unpack(complex_interface->real, cur); _unpack(complex_interface->imaginary, cur); _unpack(complex_interface->nx, cur); *count = complex_size_meta(complex_interface); return 0; } int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_complex_interface *src_complex = src_interface; struct starpu_complex_interface *dst_complex = dst_interface; int ret = 0; if (starpu_interface_copy((uintptr_t) src_complex->real, 0, src_node, (uintptr_t) dst_complex->real, 0, dst_node, src_complex->nx*sizeof(src_complex->real[0]), async_data)) ret = -EAGAIN; if (starpu_interface_copy((uintptr_t) src_complex->imaginary, 0, src_node, (uintptr_t) dst_complex->imaginary, 0, dst_node, src_complex->nx*sizeof(src_complex->imaginary[0]), async_data)) ret = -EAGAIN; return ret; } const struct starpu_data_copy_methods complex_copy_methods = { .any_to_any = copy_any_to_any }; struct starpu_data_interface_ops interface_complex_ops = { .register_data_handle = complex_register_data_handle, .allocate_data_on_node = complex_allocate_data_on_node, .free_data_on_node = complex_free_data_on_node, .copy_methods = &complex_copy_methods, .get_size = complex_get_size, .footprint = complex_footprint, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpu_complex_interface), .to_pointer = NULL, .pack_data = complex_pack_data, .peek_data = complex_peek_data, .unpack_data = complex_unpack_data, .describe = complex_describe, .compare = complex_compare, .pack_meta = complex_pack_meta, .unpack_meta = complex_unpack_meta, .free_meta = NULL }; void starpu_complex_data_register_ops() { starpu_data_register_ops(&interface_complex_ops); } void starpu_complex_data_register(starpu_data_handle_t *handleptr, int home_node, double *real, double *imaginary, int nx) { struct starpu_complex_interface complex = { .real = real, .imaginary = imaginary, .nx = nx }; starpu_data_register(handleptr, home_node, &complex, &interface_complex_ops); } starpu-1.4.9+dfsg/examples/interface/complex_interface.h000066400000000000000000000040531507764646700234300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __COMPLEX_INTERFACE_H #define __COMPLEX_INTERFACE_H /* interface for complex numbers */ struct starpu_complex_interface { double *real; double *imaginary; int nx; }; void starpu_complex_data_register(starpu_data_handle_t *handle, int home_node, double *real, double *imaginary, int nx); void starpu_complex_data_register_ops(); double *starpu_complex_get_real(starpu_data_handle_t handle); double *starpu_complex_get_imaginary(starpu_data_handle_t handle); int starpu_complex_get_nx(starpu_data_handle_t handle); #define STARPU_COMPLEX_GET_REAL(interface) (((struct starpu_complex_interface *)(interface))->real) #define STARPU_COMPLEX_GET_IMAGINARY(interface) (((struct starpu_complex_interface *)(interface))->imaginary) #define STARPU_COMPLEX_GET_NX(interface) (((struct starpu_complex_interface *)(interface))->nx) /* Split complex vector into smaller complex vectors */ void starpu_complex_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks); /* Split complex into two simple vectors */ void starpu_complex_filter_canonical(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks); struct starpu_data_interface_ops *starpu_complex_filter_canonical_child_ops(struct starpu_data_filter *f, unsigned child); #endif /* __COMPLEX_INTERFACE_H */ starpu-1.4.9+dfsg/examples/interface/complex_kernels.cl000066400000000000000000000020711507764646700233000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Use the "double" type */ #pragma OPENCL EXTENSION cl_khr_fp64 : enable __kernel void complex_copy_opencl(__global double *o_real, __global double *o_imaginary, __global double *i_real, __global double *i_imaginary, unsigned nx) { const int i = get_global_id(0); if (i < nx) { o_real[i] = i_real[i]; o_imaginary[i] = i_imaginary[i]; } } starpu-1.4.9+dfsg/examples/interface/complex_kernels.cu000066400000000000000000000033111507764646700233070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_interface.h" static __global__ void complex_copy_cuda(double *o_real, double *o_imaginary, double *i_real, double *i_imaginary, unsigned n) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) { o_real[i] = i_real[i]; o_imaginary[i] = i_imaginary[i]; } } extern "C" void copy_complex_codelet_cuda(void *descr[], void *_args) { (void)_args; int nx = STARPU_COMPLEX_GET_NX(descr[0]); double *i_real = STARPU_COMPLEX_GET_REAL(descr[0]); double *i_imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[0]); double *o_real = STARPU_COMPLEX_GET_REAL(descr[1]); double *o_imaginary = STARPU_COMPLEX_GET_IMAGINARY(descr[1]); unsigned threads_per_block = 64; unsigned nblocks = (nx + threads_per_block-1) / threads_per_block; complex_copy_cuda<<>>(o_real, o_imaginary, i_real, i_imaginary, nx); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/interface/complex_kernels_opencl.c000066400000000000000000000047051507764646700244720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "complex_interface.h" extern struct starpu_opencl_program opencl_program; void copy_complex_codelet_opencl(void *buffers[], void *_args) { (void) _args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; /* length of the vector */ unsigned n = STARPU_COMPLEX_GET_NX(buffers[0]); /* OpenCL copy of the vector pointer */ cl_mem i_real = (cl_mem) STARPU_COMPLEX_GET_REAL(buffers[0]); cl_mem i_imaginary = (cl_mem) STARPU_COMPLEX_GET_IMAGINARY(buffers[0]); cl_mem o_real = (cl_mem) STARPU_COMPLEX_GET_REAL(buffers[1]); cl_mem o_imaginary = (cl_mem) STARPU_COMPLEX_GET_IMAGINARY(buffers[1]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "complex_copy_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(o_real), &o_real); err|= clSetKernelArg(kernel, 1, sizeof(o_imaginary), &o_imaginary); err|= clSetKernelArg(kernel, 2, sizeof(i_real), &i_real); err|= clSetKernelArg(kernel, 3, sizeof(i_imaginary), &i_imaginary); err|= clSetKernelArg(kernel, 4, sizeof(n), &n); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/examples/loader.c000066400000000000000000000274611507764646700172520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/examples/lu/000077500000000000000000000000001507764646700162465ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/lu/blas_complex.c000066400000000000000000000147421507764646700210720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "blas_complex.h" /* * This file contains BLAS wrappers for the different BLAS implementations * (eg. REFBLAS, STARPU_ATLAS, GOTOBLAS ...). We assume a Fortran orientation as most * libraries do not supply C-based ordering. */ #ifdef STARPU_ATLAS #error not implemented #elif defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) #error not implemented #elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL) inline void CGEMM(char *transa, char *transb, int M, int N, int K, complex float alpha, complex float *A, int lda, complex float *B, int ldb, complex float beta, complex float *C, int ldc) { cgemm_(transa, transb, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } inline void ZGEMM(char *transa, char *transb, int M, int N, int K, complex double alpha, complex double *A, int lda, complex double *B, int ldb, complex double beta, complex double *C, int ldc) { zgemm_(transa, transb, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } inline void CGEMV(char *transa, int M, int N, complex float alpha, complex float *A, int lda, complex float *X, int incX, complex float beta, complex float *Y, int incY) { cgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); } inline void ZGEMV(char *transa, int M, int N, complex double alpha, complex double *A, int lda, complex double *X, int incX, complex double beta, complex double *Y, int incY) { zgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); } inline float SCASUM(int N, complex float *X, int incX) { return scasum_(&N, X, &incX); } inline double DZASUM(int N, complex double *X, int incX) { return dzasum_(&N, X, &incX); } void CSCAL(int N, complex float alpha, complex float *X, int incX) { cscal_(&N, &alpha, X, &incX); } void ZSCAL(int N, complex double alpha, complex double *X, int incX) { zscal_(&N, &alpha, X, &incX); } void CTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const complex float alpha, const complex float *A, const int lda, complex float *B, const int ldb) { ctrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void ZTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const complex double alpha, const complex double *A, const int lda, complex double *B, const int ldb) { ztrsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void CSYR (const char *uplo, const int n, const complex float alpha, const complex float *x, const int incx, complex float *A, const int lda) { csyr_(uplo, &n, &alpha, x, &incx, A, &lda); } void CSYRK (const char *uplo, const char *trans, const int n, const int k, const complex float alpha, const complex float *A, const int lda, const complex float beta, complex float *C, const int ldc) { csyrk_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); } void CGERU(const int m, const int n, const complex float alpha, const complex float *x, const int incx, const complex float *y, const int incy, complex float *A, const int lda) { cgeru_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } void ZGERU(const int m, const int n, const complex double alpha, const complex double *x, const int incx, const complex double *y, const int incy, complex double *A, const int lda) { zgeru_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } void CTRSV (const char *uplo, const char *trans, const char *diag, const int n, const complex float *A, const int lda, complex float *x, const int incx) { ctrsv_(uplo, trans, diag, &n, A, &lda, x, &incx); } void CTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const complex float alpha, const complex float *A, const int lda, complex float *B, const int ldb) { ctrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void ZTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const complex double alpha, const complex double *A, const int lda, complex double *B, const int ldb) { ztrmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void CTRMV(const char *uplo, const char *transA, const char *diag, const int n, const complex float *A, const int lda, complex float *X, const int incX) { ctrmv_(uplo, transA, diag, &n, A, &lda, X, &incX); } void CAXPY(const int n, const complex float alpha, complex float *X, const int incX, complex float *Y, const int incY) { caxpy_(&n, &alpha, X, &incX, Y, &incY); } void ZAXPY(const int n, const complex double alpha, complex double *X, const int incX, complex double *Y, const int incY) { zaxpy_(&n, &alpha, X, &incX, Y, &incY); } int ICAMAX (const int n, complex float *X, const int incX) { int retVal; retVal = icamax_ (&n, X, &incX); return retVal; } int IZAMAX (const int n, complex double *X, const int incX) { int retVal; retVal = izamax_ (&n, X, &incX); return retVal; } complex float CDOTU(const int n, const complex float *x, const int incx, const complex float *y, const int incy) { complex float retVal = 0; /* GOTOBLAS will return a FLOATRET which is a double, not a float */ retVal = (float)cdotu_(&n, x, &incx, y, &incy); return retVal; } complex double ZDOTU(const int n, const complex double *x, const int incx, const complex double *y, const int incy) { return zdotu_(&n, x, &incx, y, &incy); } void CSWAP(const int n, complex float *X, const int incX, complex float *Y, const int incY) { cswap_(&n, X, &incX, Y, &incY); } void ZSWAP(const int n, complex double *X, const int incX, complex double *Y, const int incY) { zswap_(&n, X, &incX, Y, &incY); } #else #error "no BLAS lib available..." #endif starpu-1.4.9+dfsg/examples/lu/blas_complex.h000066400000000000000000000216441507764646700210760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __BLAS_H__ #define __BLAS_H__ #include #if defined(STARPU_MKL) #define MKLcomplex8 complex float #define MKLcomplex16 complex double #endif void CGEMM(char *transa, char *transb, int M, int N, int K, complex float alpha, complex float *A, int lda, complex float *B, int ldb, complex float beta, complex float *C, int ldc); void ZGEMM(char *transa, char *transb, int M, int N, int K, complex double alpha, complex double *A, int lda, complex double *B, int ldb, complex double beta, complex double *C, int ldc); void CGEMV(char *transa, int M, int N, complex float alpha, complex float *A, int lda, complex float *X, int incX, complex float beta, complex float *Y, int incY); void ZGEMV(char *transa, int M, int N, complex double alpha, complex double *A, int lda, complex double *X, int incX, complex double beta, complex double *Y, int incY); float SCASUM(int N, complex float *X, int incX); double DZASUM(int N, complex double *X, int incX); void CSCAL(int N, complex float alpha, complex float *X, int incX); void ZSCAL(int N, complex double alpha, complex double *X, int incX); void CTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const complex float alpha, const complex float *A, const int lda, complex float *B, const int ldb); void ZTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const complex double alpha, const complex double *A, const int lda, complex double *B, const int ldb); void CSYR (const char *uplo, const int n, const complex float alpha, const complex float *x, const int incx, complex float *A, const int lda); void CSYRK (const char *uplo, const char *trans, const int n, const int k, const complex float alpha, const complex float *A, const int lda, const complex float beta, complex float *C, const int ldc); void CGERU (const int m, const int n, const complex float alpha, const complex float *x, const int incx, const complex float *y, const int incy, complex float *A, const int lda); void ZGERU(const int m, const int n, const complex double alpha, const complex double *x, const int incx, const complex double *y, const int incy, complex double *A, const int lda); void CTRSV (const char *uplo, const char *trans, const char *diag, const int n, const complex float *A, const int lda, complex float *x, const int incx); void CTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const complex float alpha, const complex float *A, const int lda, complex float *B, const int ldb); void ZTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const complex double alpha, const complex double *A, const int lda, complex double *B, const int ldb); void CTRMV(const char *uplo, const char *transA, const char *diag, const int n, const complex float *A, const int lda, complex float *X, const int incX); void CAXPY(const int n, const complex float alpha, complex float *X, const int incX, complex float *Y, const int incy); void ZAXPY(const int n, const complex double alpha, complex double *X, const int incX, complex double *Y, const int incY); int ICAMAX (const int n, complex float *X, const int incX); int IZAMAX (const int n, complex double *X, const int incX); complex float CDOTU(const int n, const complex float *x, const int incx, const complex float *y, const int incy); complex double ZDOTU(const int n, const complex double *x, const int incx, const complex double *y, const int incy); void CSWAP(const int n, complex float *x, const int incx, complex float *y, const int incy); void ZSWAP(const int n, complex double *x, const int incx, complex double *y, const int incy); #if defined(STARPU_GOTO) || defined(STARPU_SYSTEM_BLAS) #error not implemented #elif defined(STARPU_OPENBLAS) || defined(STARPU_MKL) extern void cgemm_ (const char *transa, const char *transb, const int *m, const int *n, const int *k, const complex float *alpha, const complex float *A, const int *lda, const complex float *B, const int *ldb, const complex float *beta, complex float *C, const int *ldc); extern void zgemm_ (const char *transa, const char *transb, const int *m, const int *n, const int *k, const complex double *alpha, const complex double *A, const int *lda, const complex double *B, const int *ldb, const complex double *beta, complex double *C, const int *ldc); extern void cgemv_(const char *trans, int *m, int *n, complex float *alpha, void *a, int *lda, void *x, int *incx, complex float *beta, void *y, int *incy); extern void zgemv_(const char *trans, int *m, int *n, complex double *alpha, void *a, int *lda, void *x, int *incx, complex double *beta, void *y, int *incy); extern void csyr_ (const char *uplo, const int *n, const complex float *alpha, const complex float *x, const int *incx, complex float *A, const int *lda); extern void csyrk_ (const char *uplo, const char *trans, const int *n, const int *k, const complex float *alpha, const complex float *A, const int *lda, const complex float *beta, complex float *C, const int *ldc); extern void ctrsm_ (const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const complex float *alpha, const complex float *A, const int *lda, complex float *B, const int *ldb); extern void ztrsm_ (const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const complex double *alpha, const complex double *A, const int *lda, complex double *B, const int *ldb); extern complex double scasum_ (const int *n, const complex float *x, const int *incx); extern complex double dzasum_ (const int *n, const complex double *x, const int *incx); extern void cscal_ (const int *n, const complex float *alpha, complex float *x, const int *incx); extern void zscal_ (const int *n, const complex double *alpha, complex double *x, const int *incx); extern void cgeru_(const int *m, const int *n, const complex float *alpha, const complex float *x, const int *incx, const complex float *y, const int *incy, complex float *A, const int *lda); extern void zgeru_(const int *m, const int *n, const complex double *alpha, const complex double *x, const int *incx, const complex double *y, const int *incy, complex double *A, const int *lda); extern void ctrsv_ (const char *uplo, const char *trans, const char *diag, const int *n, const complex float *A, const int *lda, complex float *x, const int *incx); extern void ctrmm_(const char *side, const char *uplo, const char *transA, const char *diag, const int *m, const int *n, const complex float *alpha, const complex float *A, const int *lda, complex float *B, const int *ldb); extern void ztrmm_(const char *side, const char *uplo, const char *transA, const char *diag, const int *m, const int *n, const complex double *alpha, const complex double *A, const int *lda, complex double *B, const int *ldb); extern void ctrmv_(const char *uplo, const char *transA, const char *diag, const int *n, const complex float *A, const int *lda, complex float *X, const int *incX); extern void caxpy_(const int *n, const complex float *alpha, complex float *X, const int *incX, complex float *Y, const int *incy); extern void zaxpy_(const int *n, const complex double *alpha, complex double *X, const int *incX, complex double *Y, const int *incy); extern int icamax_(const int *n, complex float *X, const int *incX); extern int izamax_(const int *n, complex double *X, const int *incX); /* for some reason, FLOATRET is not a float but a double in GOTOBLAS */ extern complex double cdotu_(const int *n, const complex float *x, const int *incx, const complex float *y, const int *incy); extern complex double zdotu_(const int *n, const complex double *x, const int *incx, const complex double *y, const int *incy); extern void cswap_(const int *n, complex float *x, const int *incx, complex float *y, const int *incy); extern void zswap_(const int *n, complex double *x, const int *incx, complex double *y, const int *incy); #endif #endif /* __BLAS_COMPLEX_H__ */ starpu-1.4.9+dfsg/examples/lu/clu.c000066400000000000000000000014221507764646700171740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Complex float LU version, explicit dependencies */ #include "complex_float.h" #include "xlu.c" starpu-1.4.9+dfsg/examples/lu/clu_implicit.c000066400000000000000000000014331507764646700210700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Complex float LU version, implicit dependencies */ #include "complex_float.h" #include "xlu_implicit.c" starpu-1.4.9+dfsg/examples/lu/clu_implicit_pivot.c000066400000000000000000000014671507764646700223200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Complex float LU version, implicit dependencies, and partial pivoting */ #include "complex_float.h" #include "xlu_implicit_pivot.c" starpu-1.4.9+dfsg/examples/lu/clu_kernels.c000066400000000000000000000014031507764646700207160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Complex float LU kernels */ #include "complex_float.h" #include "xlu_kernels.c" starpu-1.4.9+dfsg/examples/lu/clu_pivot.c000066400000000000000000000014271507764646700204220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Complex float LU kernels with partial pivoting */ #include "complex_float.h" #include "xlu_pivot.c" starpu-1.4.9+dfsg/examples/lu/complex_double.h000066400000000000000000000030461507764646700214230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Complex double macros */ #include #include "blas_complex.h" #define TYPE complex double #define CUBLAS_TYPE cuDoubleComplex #define STARPU_LU(name) starpu_zlu_##name #define COMPLEX_LU #define CUBLAS_GEMM cublasZgemm #define CUBLAS_TRSM cublasZtrsm #define CUBLAS_SCAL cublasZscal #define CUBLAS_GER cublasZgeru #define CUBLAS_SWAP cublasZswap #define CUBLAS_IAMAX cublasIzamax #define CUSOLVER_GETRF cusolverDnZgetrf #define CUSOLVER_GETRF_BUFFERSIZE cusolverDnZgetrf_bufferSize #define CPU_GEMM ZGEMM #define CPU_TRSM ZTRSM #define CPU_SCAL ZSCAL #define CPU_GER ZGERU #define CPU_SWAP ZSWAP #define CPU_TRMM ZTRMM #define CPU_AXPY ZAXPY #define CPU_ASUM DZASUM #define CPU_IAMAX IZAMAX #define PIVOT_THRESHHOLD 10e-5 #define CAN_EXECUTE .can_execute = can_execute, #define ISZERO(f) (fpclassify(creal(f)) == FP_ZERO && fpclassify(cimag(f)) == FP_ZERO) starpu-1.4.9+dfsg/examples/lu/complex_float.h000066400000000000000000000030021507764646700212460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Complex float macros */ #include #include "blas_complex.h" #define TYPE complex float #define CUBLAS_TYPE cuComplex #define STARPU_LU(name) starpu_clu_##name #define COMPLEX_LU #define CUBLAS_GEMM cublasCgemm #define CUBLAS_TRSM cublasCtrsm #define CUBLAS_SCAL cublasCscal #define CUBLAS_GER cublasCgeru #define CUBLAS_SWAP cublasCswap #define CUBLAS_IAMAX cublasIcamax #define CUSOLVER_GETRF cusolverDnCgetrf #define CUSOLVER_GETRF_BUFFERSIZE cusolverDnCgetrf_bufferSize #define CPU_GEMM CGEMM #define CPU_TRSM CTRSM #define CPU_SCAL CSCAL #define CPU_GER CGERU #define CPU_SWAP CSWAP #define CPU_TRMM CTRMM #define CPU_AXPY CAXPY #define CPU_ASUM SCASUM #define CPU_IAMAX ICAMAX #define PIVOT_THRESHHOLD 10e-5 #define CAN_EXECUTE #define ISZERO(f) (fpclassify(creal(f)) == FP_ZERO && fpclassify(cimag(f)) == FP_ZERO) starpu-1.4.9+dfsg/examples/lu/dlu.c000066400000000000000000000014141507764646700171760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real double LU version, explicit dependencies */ #include "lu-double.h" #include "xlu.c" starpu-1.4.9+dfsg/examples/lu/dlu_implicit.c000066400000000000000000000014251507764646700210720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real double LU version, implicit dependencies */ #include "lu-double.h" #include "xlu_implicit.c" starpu-1.4.9+dfsg/examples/lu/dlu_implicit_pivot.c000066400000000000000000000014611507764646700223130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real double LU version, implicit dependencies with partial pivoting */ #include "lu-double.h" #include "xlu_implicit_pivot.c" starpu-1.4.9+dfsg/examples/lu/dlu_kernels.c000066400000000000000000000013751507764646700207270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real double LU kernels */ #include "lu-double.h" #include "xlu_kernels.c" starpu-1.4.9+dfsg/examples/lu/dlu_pivot.c000066400000000000000000000014211507764646700204150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real double LU kernels with partial pivoting */ #include "lu-double.h" #include "xlu_pivot.c" starpu-1.4.9+dfsg/examples/lu/lu-double.h000066400000000000000000000027421507764646700203140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real double macros */ #define TYPE double #define CUBLAS_TYPE TYPE #define STARPU_LU(name) starpu_dlu_##name #define CUBLAS_GEMM cublasDgemm #define CUBLAS_TRSM cublasDtrsm #define CUBLAS_SCAL cublasDscal #define CUBLAS_GER cublasDger #define CUBLAS_SWAP cublasDswap #define CUBLAS_IAMAX cublasIdamax #define CUSOLVER_GETRF cusolverDnDgetrf #define CUSOLVER_GETRF_BUFFERSIZE cusolverDnDgetrf_bufferSize #define CPU_GEMM STARPU_DGEMM #define CPU_TRSM STARPU_DTRSM #define CPU_SCAL STARPU_DSCAL #define CPU_GER STARPU_DGER #define CPU_SWAP STARPU_DSWAP #define CPU_TRMM STARPU_DTRMM #define CPU_AXPY STARPU_DAXPY #define CPU_ASUM STARPU_DASUM #define CPU_IAMAX STARPU_IDAMAX #define PIVOT_THRESHHOLD 10e-10 #define CAN_EXECUTE .can_execute = can_execute, #define ISZERO(f) (fpclassify(f) == FP_ZERO) starpu-1.4.9+dfsg/examples/lu/lu-float.h000066400000000000000000000027021507764646700201430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real float macros */ #define TYPE float #define CUBLAS_TYPE TYPE #define STARPU_LU(name) starpu_slu_##name #define CUBLAS_GEMM cublasSgemm #define CUBLAS_TRSM cublasStrsm #define CUBLAS_SCAL cublasSscal #define CUBLAS_GER cublasSger #define CUBLAS_SWAP cublasSswap #define CUBLAS_IAMAX cublasIsamax #define CUSOLVER_GETRF cusolverDnSgetrf #define CUSOLVER_GETRF_BUFFERSIZE cusolverDnSgetrf_bufferSize #define CPU_GEMM STARPU_SGEMM #define CPU_TRSM STARPU_STRSM #define CPU_SCAL STARPU_SSCAL #define CPU_GER STARPU_SGER #define CPU_SWAP STARPU_SSWAP #define CPU_TRMM STARPU_STRMM #define CPU_AXPY STARPU_SAXPY #define CPU_ASUM STARPU_SASUM #define CPU_IAMAX STARPU_ISAMAX #define PIVOT_THRESHHOLD 10e-5 #define CAN_EXECUTE #define ISZERO(f) (fpclassify(f) == FP_ZERO) starpu-1.4.9+dfsg/examples/lu/lu.sh000077500000000000000000000040331507764646700172250ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Test various LU options set -e PREFIX=$(dirname $0) rm -rf $PREFIX/lu.traces mkdir -p $PREFIX/lu.traces export STARPU_FXT_PREFIX=$PREFIX/lu.traces export STARPU_FXT_TRACE=1 if [ "$STARPU_QUICK_CHECK" = 1 ] then SIDE=16 else SIDE=160 fi $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 4)) -nblocks 4 -piv $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 4)) -nblocks 4 -no-stride $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 4)) -nblocks 4 -bound $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 2)) -nblocks 2 -bounddeps -directory $STARPU_FXT_PREFIX $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_implicit_example_float -size $(($SIDE * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio -directory $STARPU_FXT_PREFIX $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 4)) -nblocks 4 -piv $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 4)) -nblocks 4 -no-stride $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 4)) -nblocks 4 -bound $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 2)) -nblocks 2 -bounddeps -directory $PREFIX/lu.traces $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/lu_example_float -size $(($SIDE * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio -directory $STARPU_FXT_PREFIX starpu-1.4.9+dfsg/examples/lu/lu_example.c000066400000000000000000000242341507764646700205520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Main body for the LU factorization: matrix initialization and result * checking */ #include #include #include #include #include #include #include "xlu.h" #include "xlu_kernels.h" #ifdef STARPU_HAVE_VALGRIND_H #include #endif #include "starpu_cusolver.h" static unsigned long size = 0; static unsigned nblocks = 0; static unsigned check = 0; static unsigned pivot = 0; static unsigned no_stride = 0; static unsigned profile = 0; static unsigned no_prio=0; unsigned bound = 0; unsigned bounddeps = 0; unsigned boundprio = 0; char *directory = NULL; #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) TYPE *A, *A_saved; /* in case we use non-strided blocks */ TYPE **A_blocks; static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-size") == 0) { char *argptr; size = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nblocks = strtol(argv[++i], &argptr, 10); } #ifndef STARPU_SIMGRID else if (strcmp(argv[i], "-check") == 0) { check = 1; } else if (strcmp(argv[i], "-piv") == 0) { pivot = 1; } else if (strcmp(argv[i], "-no-stride") == 0) { no_stride = 1; } #endif else if (strcmp(argv[i], "-profile") == 0) { profile = 1; } else if (strcmp(argv[i], "-bound") == 0) { bound = 1; } else if (strcmp(argv[i], "-bounddeps") == 0) { bound = 1; bounddeps = 1; } else if (strcmp(argv[i], "-bounddepsprio") == 0) { bound = 1; bounddeps = 1; boundprio = 1; } else if (strcmp(argv[i], "-directory") == 0) { free(directory); directory = strdup(argv[++i]); } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { fprintf(stderr,"usage: lu [-size n] [-nblocks b] [-piv] [-no-stride] [-profile] [-bound] [-bounddeps] [-bounddepsprio] [-directory d]\n"); fprintf(stderr,"Default is size %lu and nblocks %u\n", size, nblocks); exit(0); } } } static void display_matrix(TYPE *m, unsigned n, unsigned ld, char *str) { (void)m; (void)n; (void)ld; (void)str; #if 0 FPRINTF(stderr, "***********\n"); FPRINTF(stderr, "Display matrix %s\n", str); unsigned i,j; for (j = 0; j < n; j++) { for (i = 0; i < n; i++) { FPRINTF(stderr, "%2.2f\t", m[i+j*ld]); } FPRINTF(stderr, "\n"); } FPRINTF(stderr, "***********\n"); #endif } void copy_blocks_into_matrix(void) { unsigned blocksize = (size/nblocks); unsigned i, j; unsigned bi, bj; for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { for (j = 0; j < blocksize; j++) for (i = 0; i < blocksize; i++) { A[(i+bi*blocksize) + (j + bj*blocksize)*size] = A_blocks[bi+nblocks*bj][i + j * blocksize]; } starpu_free_noflag(A_blocks[bi+nblocks*bj], (size_t)blocksize*blocksize*sizeof(TYPE)); } } void copy_matrix_into_blocks(void) { unsigned blocksize = (size/nblocks); unsigned i, j; unsigned bi, bj; for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { starpu_malloc((void **)&A_blocks[bi+nblocks*bj], (size_t)blocksize*blocksize*sizeof(TYPE)); for (j = 0; j < blocksize; j++) for (i = 0; i < blocksize; i++) { A_blocks[bi+nblocks*bj][i + j * blocksize] = A[(i+bi*blocksize) + (j + bj*blocksize)*size]; } } } static void init_matrix(void) { /* allocate matrix */ #ifdef STARPU_SIMGRID A = (void*) 1; #else starpu_malloc_flags((void **)&A, (size_t)size*size*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); #endif STARPU_ASSERT(A); starpu_srand48((long int)time(NULL)); /* starpu_srand48(0); */ #ifndef STARPU_SIMGRID /* initialize matrix content */ unsigned long i,j; for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { A[i + j*size] = (TYPE)starpu_drand48(); #ifdef COMPLEX_LU /* also randomize the imaginary component for complex number cases */ A[i + j*size] += (TYPE)(I*starpu_drand48()); #endif if (i == j) { A[i + j*size] += 1; A[i + j*size] *= size; } } } #endif } static void save_matrix(void) { A_saved = malloc((size_t)size*size*sizeof(TYPE)); STARPU_ASSERT(A_saved); memcpy(A_saved, A, (size_t)size*size*sizeof(TYPE)); } static double frobenius_norm(TYPE *v, unsigned n) { double sum2 = 0.0; /* compute sqrt(Sum(|x|^2)) */ unsigned i,j; for (j = 0; j < n; j++) for (i = 0; i < n; i++) { double a = fabsl((double)v[i+n*j]); sum2 += a*a; } return sqrt(sum2); } static void pivot_saved_matrix(unsigned *ipiv) { unsigned k; for (k = 0; k < size; k++) { if (k != ipiv[k]) { /* FPRINTF(stderr, "SWAP %d and %d\n", k, ipiv[k]); */ CPU_SWAP(size, &A_saved[k*size], 1, &A_saved[ipiv[k]*size], 1); } } } static void check_result(void) { unsigned i,j; TYPE *L, *U; L = malloc((size_t)size*size*sizeof(TYPE)); U = malloc((size_t)size*size*sizeof(TYPE)); memset(L, 0, size*size*sizeof(TYPE)); memset(U, 0, size*size*sizeof(TYPE)); /* only keep the lower part */ for (j = 0; j < size; j++) { for (i = 0; i < j; i++) { L[j+i*size] = A[j+i*size]; } /* diag i = j */ L[j+j*size] = A[j+j*size]; U[j+j*size] = 1.0; for (i = j+1; i < size; i++) { U[j+i*size] = A[j+i*size]; } } display_matrix(L, size, size, "L"); display_matrix(U, size, size, "U"); /* now A_err = L, compute L*U */ CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size); display_matrix(A_saved, size, size, "P A_saved"); display_matrix(L, size, size, "LU"); /* compute "LU - A" in L*/ CPU_AXPY(size*size, -1.0, A_saved, 1, L, 1); display_matrix(L, size, size, "Residuals"); #ifdef COMPLEX_LU double err = CPU_ASUM(size*size, L, 1); int max = CPU_IAMAX(size*size, L, 1); TYPE l_max = L[max]; FPRINTF(stderr, "Avg error : %e\n", err/(size*size)); FPRINTF(stderr, "Max error : %e\n", sqrt(creal(l_max)*creal(l_max)+cimag(l_max)*cimag(l_max))); #else TYPE err = CPU_ASUM(size*size, L, 1); int max = CPU_IAMAX(size*size, L, 1); FPRINTF(stderr, "Avg error : %e\n", err/(size*size)); FPRINTF(stderr, "Max error : %e\n", L[max]); #endif double residual = frobenius_norm(L, size); double matnorm = frobenius_norm(A_saved, size); FPRINTF(stderr, "||%sA-LU|| / (||A||*N) : %e\n", pivot?"P":"", residual/(matnorm*size)); if (residual/(matnorm*size) > PIVOT_THRESHHOLD) exit(-1); free(L); free(U); free(A_saved); } int main(int argc, char **argv) { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int power = starpu_cpu_worker_get_count() + 32 * starpu_cuda_worker_get_count(); int power_cbrt = cbrt(power); #ifndef STARPU_LONG_CHECK power_cbrt /= 2; #endif if (power_cbrt < 1) power_cbrt = 1; #ifdef STARPU_QUICK_CHECK if (!size) size = 320*2*power_cbrt; if (!nblocks) nblocks = 2*power_cbrt; #else if (!size) size = 960*8*power_cbrt; if (!nblocks) nblocks = 8*power_cbrt; #endif parse_args(argc, argv); #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) size = 16; #endif starpu_cublas_init(); starpu_cusolver_init(); init_matrix(); #ifndef STARPU_SIMGRID unsigned *ipiv = NULL; if (check) save_matrix(); display_matrix(A, size, size, "A"); if (profile) starpu_profiling_status_set(STARPU_PROFILING_ENABLE); /* Factorize the matrix (in place) */ if (pivot) { ipiv = malloc(size*sizeof(unsigned)); if (no_stride) { /* in case the LU decomposition uses non-strided blocks, we _copy_ the matrix into smaller blocks */ A_blocks = malloc(nblocks*nblocks*sizeof(TYPE *)); copy_matrix_into_blocks(); ret = STARPU_LU(lu_decomposition_pivot_no_stride)(A_blocks, ipiv, size, size, nblocks, no_prio); copy_blocks_into_matrix(); free(A_blocks); } else { double start; double end; start = starpu_timing_now(); ret = STARPU_LU(lu_decomposition_pivot)(A, ipiv, size, size, nblocks, no_prio); end = starpu_timing_now(); double timing = end - start; unsigned n = size; double flop = (2.0f*n*n*n)/3.0f; FPRINTF(stderr, "Synthetic GFlop/s (TOTAL) : \n"); FPRINTF(stdout, "%u %6.2f\n", n, (flop/timing/1000.0f)); } } else #endif { ret = STARPU_LU(lu_decomposition)(A, size, size, nblocks, no_prio); } if (profile) { FPRINTF(stderr, "Setting profile\n"); starpu_profiling_status_set(STARPU_PROFILING_DISABLE); starpu_profiling_bus_helper_display_summary(); } if (bound) { if (bounddeps) { if (!directory) directory = strdup("."); char filename[256]; snprintf(filename, sizeof(filename), "%s/%s", directory, "lu.pl"); FILE *f = fopen(filename, "w"); starpu_bound_print_lp(f); FPRINTF(stderr,"system printed to %s\n", filename); fclose(f); snprintf(filename, sizeof(filename), "%s/%s", directory, "lu.mps"); f = fopen(filename, "w"); starpu_bound_print_mps(f); FPRINTF(stderr,"system printed to %s\n", filename); fclose(f); snprintf(filename, sizeof(filename), "%s/%s", directory, "lu.dot"); f = fopen(filename, "w"); starpu_bound_print_dot(f); FPRINTF(stderr,"system printed to %s\n", filename); fclose(f); } } #ifndef STARPU_SIMGRID if (check) { FPRINTF(stderr, "Checking result\n"); if (pivot) { pivot_saved_matrix(ipiv); } check_result(); } if (pivot) free(ipiv); #endif #ifndef STARPU_SIMGRID starpu_free_flags(A, (size_t)size*size*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); #endif starpu_cusolver_shutdown(); starpu_cublas_shutdown(); starpu_shutdown(); free(directory); if (ret == -ENODEV) return 77; else return 0; } starpu-1.4.9+dfsg/examples/lu/lu_example_complex_double.c000066400000000000000000000014451507764646700236320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Main body for the LU factorization, complex double version */ #include "complex_double.h" #include "lu_example.c" starpu-1.4.9+dfsg/examples/lu/lu_example_complex_float.c000066400000000000000000000014431507764646700234630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Main body for the LU factorization, complex float version */ #include "complex_float.h" #include "lu_example.c" starpu-1.4.9+dfsg/examples/lu/lu_example_double.c000066400000000000000000000014351507764646700221020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Main body for the LU factorization, real double version */ #include "lu-double.h" #include "lu_example.c" starpu-1.4.9+dfsg/examples/lu/lu_example_float.c000066400000000000000000000014331507764646700217330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Main body for the LU factorization, real float version */ #include "lu-float.h" #include "lu_example.c" starpu-1.4.9+dfsg/examples/lu/slu.c000066400000000000000000000014121507764646700172130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real float LU version, explicit dependencies */ #include "lu-float.h" #include "xlu.c" starpu-1.4.9+dfsg/examples/lu/slu_implicit.c000066400000000000000000000014231507764646700211070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real float LU version, implicit dependencies */ #include "lu-float.h" #include "xlu_implicit.c" starpu-1.4.9+dfsg/examples/lu/slu_implicit_pivot.c000066400000000000000000000014571507764646700223370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real float LU version, implicit dependencies with partial pivoting */ #include "lu-float.h" #include "xlu_implicit_pivot.c" starpu-1.4.9+dfsg/examples/lu/slu_kernels.c000066400000000000000000000013731507764646700207440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real float LU kernels */ #include "lu-float.h" #include "xlu_kernels.c" starpu-1.4.9+dfsg/examples/lu/slu_pivot.c000066400000000000000000000014171507764646700204410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Real float LU kernels with partial pivoting */ #include "lu-float.h" #include "xlu_pivot.c" starpu-1.4.9+dfsg/examples/lu/xlu.c000066400000000000000000000166061507764646700172330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* LU StarPU implementation using explicit tag dependencies */ #include "xlu.h" #include "xlu_kernels.h" #include "starpu_cusolver.h" /* * Construct the DAG */ static struct starpu_task *create_task(starpu_tag_t id) { struct starpu_task *task = starpu_task_create(); task->cl_arg = NULL; task->use_tag = 1; task->tag_id = id; return task; } static struct starpu_task *create_task_getrf(starpu_data_handle_t dataA, unsigned k, unsigned no_prio, int nblocks) { /* printf("task GETRF k = %d TAG = %llx\n", k, (TAG_GETRF(k))); */ struct starpu_task *task = create_task(TAG_GETRF(k)); task->cl = &cl_getrf; task->color = 0xffff00; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) task->handles[1] = scratch; #endif /* this is an important task */ if (!no_prio) task->priority = 3*nblocks - 3*k; /* Bottom-level-based prio */ /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GETRF(k), 1, TAG_GEMM(k-1, k, k)); } return task; } static int create_task_trsm_ll(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio, int nblocks) { int ret; /* printf("task TRSM_LL k,i = %d,%d TAG = %llx\n", k,i, TAG_TRSM_LL(k,i)); */ struct starpu_task *task = create_task(TAG_TRSM_LL(k, j)); task->cl = &cl_trsm_ll; task->color = 0x8080ff; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); if (!no_prio) task->priority = 3*nblocks - (2*k + j); /* Bottom-level-based prio */ /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 2, TAG_GETRF(k), TAG_GEMM(k-1, k, j)); } else { starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 1, TAG_GETRF(k)); } ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_trsm_ru(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio, int nblocks) { int ret; struct starpu_task *task = create_task(TAG_TRSM_RU(k, i)); task->cl = &cl_trsm_ru; task->color = 0x8080c0; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i); if (!no_prio) task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 2, TAG_GETRF(k), TAG_GEMM(k-1, i, k)); } else { starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 1, TAG_GETRF(k)); } ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio, int nblocks) { int ret; /* printf("task GEMM k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); */ struct starpu_task *task = create_task(TAG_GEMM(k, i, j)); task->cl = &cl_gemm; task->color = 0x00ff00; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, i); /* produced by TAG_TRSM_RU(k, i) */ task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); /* produced by TAG_TRSM_LL(k, j) */ task->handles[2] = starpu_data_get_sub_data(dataA, 2, j, i); /* produced by TAG_GEMM(k-1, i, j) */ if (!no_prio) task->priority = 3*nblocks - (k + i + j); /* Bottom-level-based prio */ /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GEMM(k, i, j), 3, TAG_GEMM(k-1, i, j), TAG_TRSM_LL(k, j), TAG_TRSM_RU(k, i)); } else { starpu_tag_declare_deps(TAG_GEMM(k, i, j), 2, TAG_TRSM_LL(k, j), TAG_TRSM_RU(k, i)); } ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } /* * code to bootstrap the factorization */ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio) { int ret; double start; double end; struct starpu_task *entry_task = NULL; /* create all the DAG nodes */ unsigned i,j,k; if (bound) starpu_bound_start(bounddeps, boundprio); for (k = 0; k < nblocks; k++) { starpu_iteration_push(k); struct starpu_task *task = create_task_getrf(dataA, k, no_prio, nblocks); /* we defer the launch of the first task */ if (k == 0) { entry_task = task; } else { ret = starpu_task_submit(task); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (i = k+1; i #include #define TAG_GETRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) #define TAG_TRSM_LL(k,i) ((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(i)))) #define TAG_TRSM_RU(k,j) ((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ | ((unsigned long long)(i)<<16) \ | (unsigned long long)(j)))) #define PIVOT(k,i) ((starpu_tag_t)(((5ULL<<60) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(i)))) #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) #define BLAS3_FLOP(n1,n2,n3) \ (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) #ifdef CHECK_RESULTS static void compare_A_LU(float *A, float *LU, unsigned size, unsigned ld) { unsigned i,j; float *L; float *U; L = malloc(size*size*sizeof(float)); U = malloc(size*size*sizeof(float)); memset(L, 0, size*size*sizeof(float)); memset(U, 0, size*size*sizeof(float)); /* only keep the lower part */ for (j = 0; j < size; j++) { for (i = 0; i < j; i++) { L[j+i*size] = LU[j+i*ld]; } /* diag i = j */ L[j+j*size] = LU[j+j*ld]; U[j+j*size] = 1.0f; for (i = j+1; i < size; i++) { U[j+i*size] = LU[j+i*ld]; } } /* now A_err = L, compute L*U */ STARPU_STRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size); float max_err = 0.0f; for (i = 0; i < size ; i++) { for (j = 0; j < size; j++) { max_err = STARPU_MAX(max_err, fabs(L[j+i*size] - A[j+i*ld])); } } FPRINTF(stdout, "max error between A and L*U = %f \n", max_err); } #endif /* CHECK_RESULTS */ void dw_cpu_codelet_update_getrf(void **, void *); void dw_cpu_codelet_update_trsm_ll(void **, void *); void dw_cpu_codelet_update_trsm_ru(void **, void *); void dw_cpu_codelet_update_gemm(void **, void *); #ifdef STARPU_USE_CUDA void dw_cublas_codelet_update_getrf(void *descr[], void *_args); void dw_cublas_codelet_update_trsm_ll(void *descr[], void *_args); void dw_cublas_codelet_update_trsm_ru(void *descr[], void *_args); void dw_cublas_codelet_update_gemm(void *descr[], void *_args); #endif void dw_callback_codelet_update_getrf(void *); void dw_callback_codelet_update_trsm_ll_21(void *); void dw_callback_codelet_update_gemm(void *); void dw_callback_v2_codelet_update_getrf(void *); void dw_callback_v2_codelet_update_trsm_ll(void *); void dw_callback_v2_codelet_update_trsm_ru(void *); void dw_callback_v2_codelet_update_gemm(void *); extern struct starpu_perfmodel model_getrf; extern struct starpu_perfmodel model_trsm_ll; extern struct starpu_perfmodel model_trsm_ru; extern struct starpu_perfmodel model_gemm; extern unsigned bound; extern unsigned bounddeps; extern unsigned boundprio; extern starpu_data_handle_t scratch; void lu_kernel_init(int nb); void lu_kernel_fini(void); struct piv_s { unsigned *piv; /* complete pivot array */ unsigned first; /* first element */ unsigned last; /* last element */ }; int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio); int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio); int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks, unsigned no_prio); #endif /* __XLU_H__ */ starpu-1.4.9+dfsg/examples/lu/xlu_implicit.c000066400000000000000000000140031507764646700211120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* LU StarPU implementation using implicit task dependencies. */ #include "xlu.h" #include "xlu_kernels.h" #include "starpu_cusolver.h" static int create_task_getrf(starpu_data_handle_t dataA, unsigned k, unsigned no_prio, int nblocks) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &cl_getrf; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) task->handles[1] = scratch; #endif task->tag_id = TAG_GETRF(k); task->color = 0xffff00; /* this is an important task */ if (!no_prio) task->priority = 3*nblocks - 3*k; /* Bottom-level-based prio */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_trsm_ll(starpu_data_handle_t dataA, unsigned k, unsigned j, unsigned no_prio, int nblocks) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &cl_trsm_ll; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); task->tag_id = TAG_TRSM_LL(k,j); task->color = 0x8080ff; if (!no_prio) task->priority = 3*nblocks - (2*k + j); /* Bottom-level-based prio */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_trsm_ru(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned no_prio, int nblocks) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &cl_trsm_ru; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k); task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i); task->tag_id = TAG_TRSM_RU(k,i); task->color = 0x8080c0; if (!no_prio) task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_gemm(starpu_data_handle_t dataA, unsigned k, unsigned i, unsigned j, unsigned no_prio, int nblocks) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &cl_gemm; task->color = 0x00ff00; /* which sub-data is manipulated ? */ task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, i); task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k); task->handles[2] = starpu_data_get_sub_data(dataA, 2, j, i); task->tag_id = TAG_GEMM(k,i,j); if (!no_prio) task->priority = 3*nblocks - (k + i + j); /* Bottom-level-based prio */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } /* * code to bootstrap the factorization */ static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks, unsigned no_prio) { double start; double end; /* create all the DAG nodes */ unsigned i,j,k; if (bound) starpu_bound_start(bounddeps, boundprio); start = starpu_timing_now(); for (k = 0; k < nblocks; k++) { int ret; starpu_iteration_push(k); ret = create_task_getrf(dataA, k, no_prio, nblocks); if (ret == -ENODEV) return ret; for (i = k+1; icl = &cl_pivot; task->color = 0xc0c000; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, i); task->tag_id = PIVOT(k, i); task->cl_arg = &piv_description[k]; /* this is an important task */ if (!no_prio) task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_getrf_pivot(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, struct piv_s *piv_description, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &cl_getrf_pivot; task->color = 0xffff00; task->cl_arg = &piv_description[k]; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, k); task->tag_id = TAG_GETRF(k); /* this is an important task */ if (!no_prio) task->priority = 3*nblocks - 3*k; /* Bottom-level-based prio */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_trsm_ll(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &cl_trsm_ll; task->color = 0x8080ff; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, k); task->handles[1] = get_block(dataAp, nblocks, j, k); task->tag_id = TAG_TRSM_LL(k,j); if (!no_prio) task->priority = 3*nblocks - (2*k + j); /* Bottom-level-based prio */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_trsm_ru(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &cl_trsm_ru; task->color = 0x8080c0; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, k); task->handles[1] = get_block(dataAp, nblocks, k, i); task->tag_id = TAG_TRSM_RU(k,i); if (!no_prio) task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_gemm(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &cl_gemm; task->color = 0x00ff00; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, i); task->handles[1] = get_block(dataAp, nblocks, j, k); task->handles[2] = get_block(dataAp, nblocks, j, i); task->tag_id = TAG_GEMM(k,i,j); if (!no_prio) task->priority = 3*nblocks - (k + i + j); /* Bottom-level-based prio */ ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } /* * code to bootstrap the factorization */ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp, struct piv_s *piv_description, unsigned nblocks, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), double *timing, unsigned no_prio) { double start; double end; /* create all the DAG nodes */ unsigned i,j,k; if (bound) starpu_bound_start(bounddeps, boundprio); start = starpu_timing_now(); for (k = 0; k < nblocks; k++) { int ret; starpu_iteration_push(k); ret = create_task_getrf_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio); if (ret == -ENODEV) return ret; for (i = 0; i < nblocks; i++) { if (i != k) { ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio); if (ret == -ENODEV) return ret; } } for (i = k+1; i #include #ifdef STARPU_USE_CUDA #include #include "starpu_cusolver.h" #endif #define str(s) #s #define xstr(s) str(s) #define STARPU_LU_STR(name) xstr(STARPU_LU(name)) #ifdef STARPU_USE_CUDA static const TYPE p1 = 1.0f; static const TYPE m1 = -1.0f; #endif starpu_data_handle_t scratch = NULL; /* * GEMM */ static inline void STARPU_LU(common_gemm)(void *descr[], int s, void *_args) { (void)_args; TYPE *right = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *left = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *center = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif switch (s) { case 0: CPU_GEMM("N", "N", dy, dx, dz, (TYPE)-1.0, right, ld21, left, ld12, (TYPE)1.0, center, ld22); break; #ifdef STARPU_USE_CUDA case 1: { status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_N, dx, dy, dz, (CUBLAS_TYPE *)&m1, (CUBLAS_TYPE *)right, ld21, (CUBLAS_TYPE *)left, ld12, (CUBLAS_TYPE *)&p1, (CUBLAS_TYPE *)center, ld22); if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS)) STARPU_CUBLAS_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } } void STARPU_LU(cpu_gemm)(void *descr[], void *_args) { STARPU_LU(common_gemm)(descr, 0, _args); } #ifdef STARPU_USE_CUDA void STARPU_LU(cublas_gemm)(void *descr[], void *_args) { STARPU_LU(common_gemm)(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ static struct starpu_perfmodel STARPU_LU(model_gemm) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_LU_STR(lu_model_gemm_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_LU_STR(lu_model_gemm_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_LU_STR(lu_model_gemm_openblas) #else .symbol = STARPU_LU_STR(lu_model_gemm) #endif }; #ifdef STARPU_USE_CUDA static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) { (void)task; (void)nimpl; enum starpu_worker_archtype type = starpu_worker_get_type(workerid); if (type == STARPU_CPU_WORKER) return 1; #ifdef STARPU_SIMGRID /* We don't know, let's assume it can */ return 1; #else /* Cuda device */ const struct cudaDeviceProp *props; props = starpu_cuda_get_device_properties(workerid); if (props->major >= 2 || props->minor >= 3) { /* At least compute capability 1.3, supports doubles */ return 1; } else { /* Old card does not support doubles */ return 0; } #endif } #endif #define STRINGIFY_(x) #x #define STRINGIFY(x) STRINGIFY_(x) struct starpu_codelet cl_gemm = { .cpu_funcs = {STARPU_LU(cpu_gemm)}, .cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_gemm))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_LU(cublas_gemm)}, CAN_EXECUTE #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW}, .model = &STARPU_LU(model_gemm) }; /* * TRSM_LL */ static inline void STARPU_LU(common_trsmll)(void *descr[], int s, void *_args) { (void)_args; TYPE *sub11; TYPE *sub12; sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); sub12 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif /* solve L11 U12 = A12 (find U12) */ switch (s) { case 0: CPU_TRSM("L", "L", "N", "N", nx12, ny12, (TYPE)1.0, sub11, ld11, sub12, ld12); break; #ifdef STARPU_USE_CUDA case 1: status = CUBLAS_TRSM(starpu_cublas_get_local_handle(), CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, ny12, nx12, (CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub12, ld12); if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS)) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } } void STARPU_LU(cpu_trsmll)(void *descr[], void *_args) { STARPU_LU(common_trsmll)(descr, 0, _args); } #ifdef STARPU_USE_CUDA void STARPU_LU(cublas_trsmll)(void *descr[], void *_args) { STARPU_LU(common_trsmll)(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ static struct starpu_perfmodel STARPU_LU(model_trsm_ll) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_LU_STR(lu_model_trsm_ll_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_LU_STR(lu_model_trsm_ll_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_LU_STR(lu_model_trsm_ll_openblas) #else .symbol = STARPU_LU_STR(lu_model_trsm_ll) #endif }; struct starpu_codelet cl_trsm_ll = { .cpu_funcs = {STARPU_LU(cpu_trsmll)}, .cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_trsmll))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_LU(cublas_trsmll)}, CAN_EXECUTE #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &STARPU_LU(model_trsm_ll) }; /* * TRSM_RU */ static inline void STARPU_LU(common_trsmru)(void *descr[], int s, void *_args) { (void)_args; TYPE *sub11; TYPE *sub21; sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif switch (s) { case 0: CPU_TRSM("R", "U", "N", "U", nx21, ny21, (TYPE)1.0, sub11, ld11, sub21, ld21); break; #ifdef STARPU_USE_CUDA case 1: status = CUBLAS_TRSM(starpu_cublas_get_local_handle(), CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, ny21, nx21, (CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub21, ld21); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } } void STARPU_LU(cpu_trsmru)(void *descr[], void *_args) { STARPU_LU(common_trsmru)(descr, 0, _args); } #ifdef STARPU_USE_CUDA void STARPU_LU(cublas_trsmru)(void *descr[], void *_args) { STARPU_LU(common_trsmru)(descr, 1, _args); } #endif static struct starpu_perfmodel STARPU_LU(model_trsm_ru) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_LU_STR(lu_model_trsm_ru_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_LU_STR(lu_model_trsm_ru_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_LU_STR(lu_model_trsm_ru_openblas) #else .symbol = STARPU_LU_STR(lu_model_trsm_ru) #endif }; struct starpu_codelet cl_trsm_ru = { .cpu_funcs = {STARPU_LU(cpu_trsmru)}, .cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_trsmru))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_LU(cublas_trsmru)}, CAN_EXECUTE #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &STARPU_LU(model_trsm_ru) }; /* * GETRF */ static inline void STARPU_LU(common_getrf)(void *descr[], int s, void *_args) { (void)_args; TYPE *sub11; sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); unsigned long z; #ifdef STARPU_USE_CUDA cublasStatus_t status; cublasHandle_t handle; cudaStream_t stream; #endif switch (s) { case 0: for (z = 0; z < nx; z++) { TYPE pivot; pivot = sub11[z+z*ld]; STARPU_ASSERT(!ISZERO(pivot)); CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld); CPU_GER(nx - z - 1, nx - z - 1, -1.0, &sub11[(z+1)+z*ld], 1, &sub11[z+(z+1)*ld], ld, &sub11[(z+1) + (z+1)*ld],ld); } break; #ifdef STARPU_USE_CUDA case 1: #ifdef STARPU_HAVE_LIBCUSOLVER { cusolverStatus_t sstatus; CUBLAS_TYPE *cublas_sub11 = (CUBLAS_TYPE *)sub11; CUBLAS_TYPE *workspace = (CUBLAS_TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); sstatus = CUSOLVER_GETRF(starpu_cusolverDn_get_local_handle(), nx, nx, cublas_sub11, ld, workspace, NULL, NULL); if (sstatus != CUSOLVER_STATUS_SUCCESS) STARPU_CUSOLVER_REPORT_ERROR(sstatus); } #else handle = starpu_cublas_get_local_handle(); stream = starpu_cuda_get_local_stream(); for (z = 0; z < nx; z++) { TYPE pivot; TYPE inv_pivot; cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); STARPU_ASSERT(!ISZERO(pivot)); inv_pivot = 1.0/pivot; status = CUBLAS_SCAL(handle, nx - z - 1, (CUBLAS_TYPE*)&inv_pivot, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); status = CUBLAS_GER(handle, nx - z - 1, nx - z - 1, (CUBLAS_TYPE*)&m1, (CUBLAS_TYPE*)&sub11[(z+1)+z*ld], 1, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld, (CUBLAS_TYPE*)&sub11[(z+1) + (z+1)*ld],ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } cudaStreamSynchronize(stream); #endif break; #endif default: STARPU_ABORT(); break; } } void STARPU_LU(cpu_getrf)(void *descr[], void *_args) { STARPU_LU(common_getrf)(descr, 0, _args); } #ifdef STARPU_USE_CUDA void STARPU_LU(cublas_getrf)(void *descr[], void *_args) { STARPU_LU(common_getrf)(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ static struct starpu_perfmodel STARPU_LU(model_getrf) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_LU_STR(lu_model_getrf_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_LU_STR(lu_model_getrf_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_LU_STR(lu_model_getrf_openblas) #else .symbol = STARPU_LU_STR(lu_model_getrf) #endif }; struct starpu_codelet cl_getrf = { .cpu_funcs = {STARPU_LU(cpu_getrf)}, .cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_getrf))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_LU(cublas_getrf)}, CAN_EXECUTE #if defined(STARPU_HAVE_LIBCUSOLVER) .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) .nbuffers = 2, #else .nbuffers = 1, #endif .modes = { STARPU_RW #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) , STARPU_SCRATCH | STARPU_NOFOOTPRINT #endif }, .model = &STARPU_LU(model_getrf) }; /* * GETRF with pivoting */ static inline void STARPU_LU(common_getrf_pivot)(void *descr[], int s, void *_args) { TYPE *sub11; sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); unsigned long z; struct piv_s *piv = _args; unsigned *ipiv = piv->piv; unsigned first = piv->first; #ifdef STARPU_USE_CUDA cublasStatus_t status; cublasHandle_t handle; cudaStream_t stream; #endif switch (s) { case 0: for (z = 0; z < nx; z++) { TYPE pivot; pivot = sub11[z+z*ld]; if (fabs((double)(pivot)) < PIVOT_THRESHHOLD) { /* find the pivot */ int piv_ind = CPU_IAMAX(nx - z, &sub11[z*(ld+1)], ld); ipiv[z + first] = piv_ind + z + first; /* swap if needed */ if (piv_ind != 0) { CPU_SWAP(nx, &sub11[z*ld], 1, &sub11[(z+piv_ind)*ld], 1); } pivot = sub11[z+z*ld]; } STARPU_ASSERT(!ISZERO(pivot)); CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld); CPU_GER(nx - z - 1, nx - z - 1, -1.0, &sub11[(z+1)+z*ld], 1, &sub11[z+(z+1)*ld], ld, &sub11[(z+1) + (z+1)*ld],ld); } break; #ifdef STARPU_USE_CUDA case 1: handle = starpu_cublas_get_local_handle(); stream = starpu_cuda_get_local_stream(); for (z = 0; z < nx; z++) { TYPE pivot; TYPE inv_pivot; cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); if (fabs((double)(pivot)) < PIVOT_THRESHHOLD) { /* find the pivot */ int piv_ind; status = CUBLAS_IAMAX(handle, nx - z, (CUBLAS_TYPE*)&sub11[z*(ld+1)], ld, &piv_ind); piv_ind -= 1; if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); ipiv[z + first] = piv_ind + z + first; /* swap if needed */ if (piv_ind != 0) { status = CUBLAS_SWAP(handle, nx, (CUBLAS_TYPE*)&sub11[z*ld], 1, (CUBLAS_TYPE*)&sub11[(z+piv_ind)*ld], 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); } STARPU_ASSERT(!ISZERO(pivot)); inv_pivot = 1.0/pivot; status = CUBLAS_SCAL(handle, nx - z - 1, (CUBLAS_TYPE*)&inv_pivot, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); status = CUBLAS_GER(handle, nx - z - 1, nx - z - 1, (CUBLAS_TYPE*)&m1, (CUBLAS_TYPE*)&sub11[(z+1)+z*ld], 1, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld, (CUBLAS_TYPE*)&sub11[(z+1) + (z+1)*ld],ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } cudaStreamSynchronize(stream); break; #endif default: STARPU_ABORT(); break; } } void STARPU_LU(cpu_getrf_pivot)(void *descr[], void *_args) { STARPU_LU(common_getrf_pivot)(descr, 0, _args); } #ifdef STARPU_USE_CUDA void STARPU_LU(cublas_getrf_pivot)(void *descr[], void *_args) { STARPU_LU(common_getrf_pivot)(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ static struct starpu_perfmodel STARPU_LU(model_getrf_pivot) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_LU_STR(lu_model_getrf_pivot_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_LU_STR(lu_model_getrf_pivot_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_LU_STR(lu_model_getrf_pivot_openblas) #else .symbol = STARPU_LU_STR(lu_model_getrf_pivot) #endif }; struct starpu_codelet cl_getrf_pivot = { .cpu_funcs = {STARPU_LU(cpu_getrf_pivot)}, // It uses shared-memory cl_arg //.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_getrf_pivot))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_LU(cublas_getrf_pivot)}, CAN_EXECUTE #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .nbuffers = 1, .modes = { STARPU_RW }, .model = &STARPU_LU(model_getrf_pivot) }; /* * Pivoting */ static inline void STARPU_LU(common_pivot)(void *descr[], int s, void *_args) { TYPE *matrix; matrix = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); unsigned row; struct piv_s *piv = _args; unsigned *ipiv = piv->piv; unsigned first = piv->first; #ifdef STARPU_USE_CUDA cublasStatus_t status; cublasHandle_t handle; #endif switch (s) { case 0: for (row = 0; row < nx; row++) { unsigned rowpiv = ipiv[row+first] - first; if (rowpiv != row) { CPU_SWAP(nx, &matrix[row*ld], 1, &matrix[rowpiv*ld], 1); } } break; #ifdef STARPU_USE_CUDA case 1: handle = starpu_cublas_get_local_handle(); for (row = 0; row < nx; row++) { unsigned rowpiv = ipiv[row+first] - first; if (rowpiv != row) { status = CUBLAS_SWAP(handle, nx, (CUBLAS_TYPE*)&matrix[row*ld], 1, (CUBLAS_TYPE*)&matrix[rowpiv*ld], 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } } break; #endif default: STARPU_ABORT(); break; } } void STARPU_LU(cpu_pivot)(void *descr[], void *_args) { STARPU_LU(common_pivot)(descr, 0, _args); } #ifdef STARPU_USE_CUDA void STARPU_LU(cublas_pivot)(void *descr[], void *_args) { STARPU_LU(common_pivot)(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ static struct starpu_perfmodel STARPU_LU(model_pivot) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_LU_STR(lu_model_pivot_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_LU_STR(lu_model_pivot_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_LU_STR(lu_model_pivot_openblas) #else .symbol = STARPU_LU_STR(lu_model_pivot) #endif }; struct starpu_codelet cl_pivot = { .cpu_funcs = {STARPU_LU(cpu_pivot)}, // It uses shared-memory cl_arg //.cpu_funcs_name = {STRINGIFY(STARPU_LU(cpu_pivot))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_LU(cublas_pivot)}, CAN_EXECUTE #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &STARPU_LU(model_pivot) }; void lu_kernel_init(int nb) { #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) int Lwork = 0; if (starpu_cuda_worker_get_count()) CUSOLVER_GETRF_BUFFERSIZE(starpu_cusolverDn_get_local_handle(), nb, nb, NULL, nb, &Lwork); starpu_variable_data_register(&scratch, -1, 0, Lwork * sizeof(TYPE)); #endif } void lu_kernel_fini(void) { #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_LIBCUSOLVER) starpu_data_unregister(scratch); #endif } starpu-1.4.9+dfsg/examples/lu/xlu_kernels.h000066400000000000000000000033401507764646700207520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __XLU_KERNELS_H__ #define __XLU_KERNELS_H__ #include void STARPU_LU(cpu_pivot)(void *descr[], void *_args); void STARPU_LU(cpu_getrf_pivot)(void *descr[], void *_args); void STARPU_LU(cpu_getrf)(void *descr[], void *_args); void STARPU_LU(cpu_trsmll)(void *descr[], void *_args); void STARPU_LU(cpu_trsmru)(void *descr[], void *_args); void STARPU_LU(cpu_gemm)(void *descr[], void *_args); #ifdef STARPU_USE_CUDA void STARPU_LU(cublas_pivot)(void *descr[], void *_args); void STARPU_LU(cublas_getrf_pivot)(void *descr[], void *_args); void STARPU_LU(cublas_getrf)(void *descr[], void *_args); void STARPU_LU(cublas_trsmll)(void *descr[], void *_args); void STARPU_LU(cublas_trsmru)(void *descr[], void *_args); void STARPU_LU(cublas_gemm)(void *descr[], void *_args); #endif extern struct starpu_codelet cl_getrf; extern struct starpu_codelet cl_getrf_pivot; extern struct starpu_codelet cl_trsm_ll; extern struct starpu_codelet cl_trsm_ru; extern struct starpu_codelet cl_gemm; extern struct starpu_codelet cl_pivot; #endif /* __XLU_KERNELS_H__ */ starpu-1.4.9+dfsg/examples/lu/xlu_pivot.c000066400000000000000000000306541507764646700204530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* LU Kernels with partial pivoting */ #include "xlu.h" #include "xlu_kernels.h" /* * Construct the DAG */ static struct starpu_task *create_task(starpu_tag_t id) { struct starpu_task *task = starpu_task_create(); task->cl_arg = NULL; task->use_tag = 1; task->tag_id = id; return task; } static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks, struct piv_s *piv_description, unsigned k, unsigned i, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) { int ret; struct starpu_task *task = create_task(PIVOT(k, i)); task->cl = &cl_pivot; task->color = 0xc0c000; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, i); task->cl_arg = &piv_description[k]; /* this is an important task */ if (!no_prio) task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ /* enforce dependencies ... */ if (k == 0) { starpu_tag_declare_deps(PIVOT(k, i), 1, TAG_GETRF(k)); } else { if (i > k) { starpu_tag_declare_deps(PIVOT(k, i), 2, TAG_GETRF(k), TAG_GEMM(k-1, i, k)); } else { starpu_tag_t *tags = malloc((nblocks - k)*sizeof(starpu_tag_t)); tags[0] = TAG_GETRF(k); unsigned ind, ind2; for (ind = k + 1, ind2 = 0; ind < nblocks; ind++, ind2++) { tags[1 + ind2] = TAG_GEMM(k-1, ind, k); } /* perhaps we could do better ... :/ */ starpu_tag_declare_deps_array(PIVOT(k, i), (nblocks-k), tags); free(tags); } } ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static struct starpu_task *create_task_getrf_pivot(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, struct piv_s *piv_description, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) { struct starpu_task *task = create_task(TAG_GETRF(k)); task->cl = &cl_getrf_pivot; task->color = 0xffff00; task->cl_arg = &piv_description[k]; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, k); /* this is an important task */ if (!no_prio) task->priority = 3*nblocks - 3*k; /* Bottom-level-based prio */ /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GETRF(k), 1, TAG_GEMM(k-1, k, k)); } return task; } static int create_task_trsm_ll(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned j, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) { int ret; /* printf("task trsm_ll k,i = %d,%d TAG = %llx\n", k,i, TAG_TRSM_LL(k,i)); */ struct starpu_task *task = create_task(TAG_TRSM_LL(k, j)); task->cl = &cl_trsm_ll; task->color = 0x8080ff; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, k); task->handles[1] = get_block(dataAp, nblocks, j, k); if (!no_prio) task->priority = 3*nblocks - (2*k + j); /* Bottom-level-based prio */ /* enforce dependencies ... */ #if 0 starpu_tag_declare_deps(TAG_TRSM_LL(k, i), 1, PIVOT(k, i)); #endif if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 2, TAG_GETRF(k), TAG_GEMM(k-1, k, j)); } else { starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 1, TAG_GETRF(k)); } ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_trsm_ru(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) { int ret; struct starpu_task *task = create_task(TAG_TRSM_RU(k, i)); task->cl = &cl_trsm_ru; task->color = 0x8080c0; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, k); task->handles[1] = get_block(dataAp, nblocks, k, i); if (!no_prio) task->priority = 3*nblocks - (2*k + i); /* Bottom-level-based prio */ /* enforce dependencies ... */ starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 1, PIVOT(k, i)); ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } static int create_task_gemm(starpu_data_handle_t *dataAp, unsigned nblocks, unsigned k, unsigned i, unsigned j, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), unsigned no_prio) { int ret; /* printf("task gemm k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); */ struct starpu_task *task = create_task(TAG_GEMM(k, i, j)); task->cl = &cl_gemm; task->color = 0x00ff00; /* which sub-data is manipulated ? */ task->handles[0] = get_block(dataAp, nblocks, k, i); /* produced by TAG_TRSM_RU(k, i) */ task->handles[1] = get_block(dataAp, nblocks, j, k); /* produced by TAG_TRSM_LL(k, j) */ task->handles[2] = get_block(dataAp, nblocks, j, i); /* produced by TAG_GEMM(k-1, i, j) */ if (!no_prio) task->priority = 3*nblocks - (k + i + j); /* Bottom-level-based prio */ /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GEMM(k, i, j), 3, TAG_GEMM(k-1, i, j), TAG_TRSM_LL(k, j), TAG_TRSM_RU(k, i)); } else { starpu_tag_declare_deps(TAG_GEMM(k, i, j), 2, TAG_TRSM_LL(k, j), TAG_TRSM_RU(k, i)); } ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return ret; } /* * code to bootstrap the factorization */ static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp, struct piv_s *piv_description, unsigned nblocks, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), double *timing, unsigned no_prio) { int ret; double start; double end; struct starpu_task *entry_task = NULL; /* create all the DAG nodes */ unsigned i,j,k; if (bound) starpu_bound_start(bounddeps, boundprio); for (k = 0; k < nblocks; k++) { starpu_iteration_push(k); struct starpu_task *task = create_task_getrf_pivot(dataAp, nblocks, k, piv_description, get_block, no_prio); /* we defer the launch of the first task */ if (k == 0) { entry_task = task; } else { ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); if (ret == -ENODEV) return ret; } for (i = 0; i < nblocks; i++) { if (i != k) { ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block, no_prio); if (ret == -ENODEV) return ret; } } for (i = k+1; i #include #include #ifdef STARPU_HAVE_X11 #include #include int use_x11_p = 1; #endif #ifdef STARPU_HAVE_HELGRIND_H #include #endif #ifndef ANNOTATE_HAPPENS_BEFORE #define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) #endif #ifndef ANNOTATE_HAPPENS_AFTER #define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) #endif int demo_p = 0; static double demozoom_p = 0.05; /* NB: The X11 code is inspired from the http://locklessinc.com/articles/mandelbrot/ article */ static int nblocks_p = 20; static int height_p = 400; static int width_p = 640; static int maxIt_p = 20000; /* max number of iteration in the Mandelbrot function */ static int niter_p = -1; /* number of loops in case we don't use X11, -1 means infinite */ static int use_spmd_p = 0; static double leftX_p = -0.745; static double rightX_p = -0.74375; static double topY_p = .15; static double bottomY_p = .14875; /* * X11 window management */ #ifdef STARPU_HAVE_X11 /* X11 data */ static Display *dpy_p; static Window win_p; static XImage *bitmap_p; static GC gc_p; static KeySym Left_p=-1, Right_p, Down_p, Up_p, Alt_p; static void exit_x11(void) { XDestroyImage(bitmap_p); XDestroyWindow(dpy_p, win_p); XCloseDisplay(dpy_p); } static void init_x11(int width, int height, unsigned *buffer) { /* Attempt to open the display */ dpy_p = XOpenDisplay(NULL); /* Failure */ if (!dpy_p) exit(0); unsigned long white = WhitePixel(dpy_p, DefaultScreen(dpy_p)); unsigned long black = BlackPixel(dpy_p, DefaultScreen(dpy_p)); win_p = XCreateSimpleWindow(dpy_p, DefaultRootWindow(dpy_p), 0, 0, width, height, 0, black, white); /* We want to be notified when the window appears */ XSelectInput(dpy_p, win_p, StructureNotifyMask); /* Make it appear */ XMapWindow(dpy_p, win_p); XTextProperty tp; char name[128] = "Mandelbrot - StarPU"; char *n = name; Status st = XStringListToTextProperty(&n, 1, &tp); if (st) XSetWMName(dpy_p, win_p, &tp); /* Wait for the MapNotify event */ XFlush(dpy_p); int depth = DefaultDepth(dpy_p, DefaultScreen(dpy_p)); Visual *visual = DefaultVisual(dpy_p, DefaultScreen(dpy_p)); /* Make bitmap */ bitmap_p = XCreateImage(dpy_p, visual, depth, ZPixmap, 0, (char *)buffer, width, height, 32, 0); /* Init GC */ gc_p = XCreateGC(dpy_p, win_p, 0, NULL); XSetForeground(dpy_p, gc_p, black); XSelectInput(dpy_p, win_p, ExposureMask | KeyPressMask | StructureNotifyMask); Atom wmDeleteMessage; wmDeleteMessage = XInternAtom(dpy_p, "WM_DELETE_WINDOW", False); XSetWMProtocols(dpy_p, win_p, &wmDeleteMessage, 1); Left_p = XStringToKeysym ("Left"); Right_p = XStringToKeysym ("Right"); Up_p = XStringToKeysym ("Up"); Down_p = XStringToKeysym ("Down"); Alt_p = XStringToKeysym ("Alt"); } static int handle_events(void) { XEvent event; XNextEvent(dpy_p, &event); if (event.type == KeyPress) { KeySym key; char text[255]; XLookupString(&event.xkey,text,255,&key,0); if (key == Left_p) { double widthX = rightX_p - leftX_p; leftX_p -= 0.25*widthX; rightX_p -= 0.25*widthX; } else if (key == Right_p) { double widthX = rightX_p - leftX_p; leftX_p += 0.25*widthX; rightX_p += 0.25*widthX; } else if (key == Up_p) { double heightY = topY_p - bottomY_p; topY_p += 0.25*heightY; bottomY_p += 0.25*heightY; } else if (key == Down_p) { double heightY = topY_p - bottomY_p; topY_p -= 0.25*heightY; bottomY_p -= 0.25*heightY; } else { double widthX = rightX_p - leftX_p; double heightY = topY_p - bottomY_p; if (text[0] == '-') { /* Zoom out */ leftX_p -= 0.125*widthX; rightX_p += 0.125*widthX; topY_p += 0.125*heightY; bottomY_p -= 0.125*heightY; } else if (text[0] == '+') { /* Zoom in */ leftX_p += 0.125*widthX; rightX_p -= 0.125*widthX; topY_p -= 0.125*heightY; bottomY_p += 0.125*heightY; } } if (text[0]=='q') { return -1; } } if (event.type==ButtonPress) { /* tell where the mouse Button was Pressed */ printf("You pressed a button at (%i,%i)\n", event.xbutton.x,event.xbutton.y); } return 0; } #endif /* * OpenCL kernel */ #ifdef STARPU_USE_OPENCL char *mandelbrot_opencl_src = "\ #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n \ #define MIN(a,b) (((a)<(b))? (a) : (b)) \n \ __kernel void mandelbrot_kernel(__global unsigned* a, \n \ double leftX, double topY, \n \ double stepX, double stepY, \n \ int maxIt, int iby, int block_size, int width) \n \ { \n \ size_t id_x = get_global_id(0); \n \ size_t id_y = get_global_id(1); \n \ if ((id_x < width) && (id_y < block_size)) \n \ { \n \ double xc = leftX + id_x * stepX; \n \ double yc = topY - (id_y + iby*block_size) * stepY; \n \ int it; \n \ double x,y; \n \ x = y = (double)0.0; \n \ for (it=0;it 4.0) break; \n \ double twoxy = (double)2.0*x*y; \n \ x = x2 - y2 + xc; \n \ y = twoxy + yc; \n \ } \n \ unsigned int v = MIN((1024*((float)(it)/(2000))), 256); \n \ a[id_x + width * id_y] = (v<<16|(255-v)<<8); \n \ } \n \ }"; static struct starpu_opencl_program opencl_programs; static void compute_block_opencl(void *descr[], void *cl_arg) { int iby, block_size; double stepX, stepY; int *pcnt; /* unused for CUDA tasks */ starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt); cl_mem data = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); cl_kernel kernel; cl_command_queue queue; cl_int err; int id = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_programs, "mandelbrot_kernel", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clSetKernelArg(kernel, 0, sizeof(data), &data); clSetKernelArg(kernel, 1, sizeof(leftX_p), &leftX_p); clSetKernelArg(kernel, 2, sizeof(topY_p), &topY_p); clSetKernelArg(kernel, 3, sizeof(stepX), &stepX); clSetKernelArg(kernel, 4, sizeof(stepY), &stepY); clSetKernelArg(kernel, 5, sizeof(maxIt_p), &maxIt_p); clSetKernelArg(kernel, 6, sizeof(iby), &iby); clSetKernelArg(kernel, 7, sizeof(block_size), &block_size); clSetKernelArg(kernel, 8, sizeof(width_p), &width_p); unsigned dim = 16; size_t local[2] = {dim, 1}; size_t global[2] = {width_p, block_size}; err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); starpu_opencl_release_kernel(kernel); } #endif /* * CPU kernel */ static void compute_block(void *descr[], void *cl_arg) { int iby, block_size; double stepX, stepY; int *pcnt; /* unused for sequential tasks */ starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt); unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); int local_iy; for (local_iy = 0; local_iy < block_size; local_iy++) { int ix, iy; iy = iby*block_size + local_iy; for (ix = 0; ix < width_p; ix++) { double cx = leftX_p + ix * stepX; double cy = topY_p - iy * stepY; /* Z = X+I*Y */ double x = 0; double y = 0; int it; for (it = 0; it < maxIt_p; it++) { double x2 = x*x; double y2 = y*y; /* Stop iterations when |Z| > 2 */ if (x2 + y2 > 4.0) break; double twoxy = 2.0*x*y; /* Z = Z^2 + C */ x = x2 - y2 + cx; y = twoxy + cy; } unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256); data[ix + local_iy*width_p] = (v<<16|(255-v)<<8); } } } static void compute_block_spmd(void *descr[], void *cl_arg) { int iby, block_size; double stepX, stepY; int *pcnt; starpu_codelet_unpack_args(cl_arg, &iby, &block_size, &stepX, &stepY, &pcnt); unsigned *data = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); while (1) { int ix, iy; /* global coordinates */ int local_iy; /* current line */ local_iy = STARPU_ATOMIC_ADD((unsigned int *)pcnt, 1) - 1; ANNOTATE_HAPPENS_BEFORE(pcnt); if (local_iy >= block_size) { ANNOTATE_HAPPENS_AFTER(pcnt); break; } iy = iby*block_size + local_iy; for (ix = 0; ix < width_p; ix++) { double cx = leftX_p + ix * stepX; double cy = topY_p - iy * stepY; /* Z = X+I*Y */ double x = 0; double y = 0; int it; for (it = 0; it < maxIt_p; it++) { double x2 = x*x; double y2 = y*y; /* Stop iterations when |Z| > 2 */ if (x2 + y2 > 4.0) break; double twoxy = 2.0*x*y; /* Z = Z^2 + C */ x = x2 - y2 + cx; y = twoxy + cy; } unsigned int v = STARPU_MIN((1024*((float)(it)/(2000))), 256); data[ix + local_iy*width_p] = (v<<16|(255-v)<<8); } } } static struct starpu_codelet spmd_mandelbrot_cl = { .type = STARPU_SPMD, .max_parallelism = INT_MAX, .cpu_funcs = {compute_block_spmd}, #ifdef STARPU_USE_OPENCL .opencl_funcs = {compute_block_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .nbuffers = 1 }; static struct starpu_codelet mandelbrot_cl = { .type = STARPU_SEQ, .cpu_funcs = {compute_block}, #ifdef STARPU_USE_OPENCL .opencl_funcs = {compute_block_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .nbuffers = 1 }; static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-h") == 0) { fprintf(stderr, "Usage: %s [-h] [ -width 800] [-height 600] [-nblocks 16] [-no-x11] [-pos leftx:rightx:bottomy:topy] [-niter 1000] [-spmd] [-demo] [-demozoom 0.2]\n", argv[0]); exit(-1); } if (strcmp(argv[i], "-width") == 0) { char *argptr; width_p = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-height") == 0) { char *argptr; height_p = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nblocks_p = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-niter") == 0) { char *argptr; niter_p = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-pos") == 0) { int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX_p, &rightX_p, &bottomY_p, &topY_p); assert(ret == 4); } if (strcmp(argv[i], "-demo") == 0) { demo_p = 1; leftX_p = -50.22749575062760; rightX_p = 48.73874621262927; topY_p = -49.35016705749115; bottomY_p = 49.64891691946615; } if (strcmp(argv[i], "-demozoom") == 0) { char *argptr; demozoom_p = strtof(argv[++i], &argptr); } if (strcmp(argv[i], "-no-x11") == 0) { #ifdef STARPU_HAVE_X11 use_x11_p = 0; #endif } if (strcmp(argv[i], "-spmd") == 0) { use_spmd_p = 1; } } } int main(int argc, char **argv) { int ret; parse_args(argc, argv); /* We don't use CUDA in that example */ struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 0; if (use_spmd_p) { conf.sched_policy_name = "peager"; } ret = starpu_init(&conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned *buffer; starpu_malloc((void **)&buffer, height_p*width_p*sizeof(unsigned)); #ifdef STARPU_HAVE_X11 if (use_x11_p) init_x11(width_p, height_p, buffer); #endif int block_size = height_p/nblocks_p; STARPU_ASSERT((height_p % nblocks_p) == 0); #ifdef STARPU_USE_OPENCL starpu_opencl_load_opencl_from_string(mandelbrot_opencl_src, &opencl_programs, NULL); #endif starpu_data_handle_t block_handles[nblocks_p]; int iby; for (iby = 0; iby < nblocks_p; iby++) { unsigned *data = &buffer[iby*block_size*width_p]; starpu_vector_data_register(&block_handles[iby], STARPU_MAIN_RAM, (uintptr_t)data, block_size*width_p, sizeof(unsigned)); } unsigned iter = 0; double start, end; start = starpu_timing_now(); while (niter_p-- != 0) { double stepX = (rightX_p - leftX_p)/width_p; double stepY = (topY_p - bottomY_p)/height_p; /* In case we have a SPMD task, each worker will grab tasks in * a greedy and select which piece of image to compute by * incrementing a counter shared by all the workers within the * parallel task. */ int per_block_cnt[nblocks_p]; starpu_iteration_push(niter_p); for (iby = 0; iby < nblocks_p; iby++) { per_block_cnt[iby] = 0; int *pcnt = &per_block_cnt[iby]; ret = starpu_task_insert(use_spmd_p?&spmd_mandelbrot_cl:&mandelbrot_cl, STARPU_VALUE, &iby, sizeof(iby), STARPU_VALUE, &block_size, sizeof(block_size), STARPU_VALUE, &stepX, sizeof(stepX), STARPU_VALUE, &stepY, sizeof(stepY), STARPU_W, block_handles[iby], STARPU_VALUE, &pcnt, sizeof(int *), STARPU_TAG_ONLY, ((starpu_tag_t)niter_p)*nblocks_p + iby, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } for (iby = 0; iby < nblocks_p; iby++) { #ifdef STARPU_HAVE_X11 if (use_x11_p) { starpu_data_acquire(block_handles[iby], STARPU_R); XPutImage(dpy_p, win_p, gc_p, bitmap_p, 0, iby*block_size, 0, iby*block_size, width_p, block_size); starpu_data_release(block_handles[iby]); } #endif } starpu_iteration_pop(); if (demo_p) { /* Zoom in */ double zoom_factor = demozoom_p; double widthX = rightX_p - leftX_p; double heightY = topY_p - bottomY_p; iter++; /* If the window is too small, we reset the demo and display some statistics */ if ((fabs(widthX) < 1e-12) || (fabs(heightY) < 1e-12)) { leftX_p = -50.22749575062760; rightX_p = 48.73874621262927; topY_p = -49.35016705749115; bottomY_p = 49.64891691946615; end = starpu_timing_now(); double timing = end - start; fprintf(stderr, "Time to generate %u frames : %f s\n", iter, timing/1000000.0); fprintf(stderr, "Average FPS: %f\n", ((double)iter*1e+6)/timing); /* Reset counters */ iter = 0; start = starpu_timing_now(); } else { leftX_p += (zoom_factor/2)*widthX; rightX_p -= (zoom_factor/2)*widthX; topY_p -= (zoom_factor/2)*heightY; bottomY_p += (zoom_factor/2)*heightY; } } #ifdef STARPU_HAVE_X11 else if (use_x11_p && handle_events()) break; #endif } #ifdef STARPU_HAVE_X11 if (use_x11_p) exit_x11(); #endif for (iby = 0; iby < nblocks_p; iby++) starpu_data_unregister(block_handles[iby]); /* starpu_data_free_pinned_if_possible(buffer); */ starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/matvecmult/000077500000000000000000000000001507764646700200075ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/matvecmult/matvecmult.c000066400000000000000000000143331507764646700223400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_code; void opencl_codelet(void *descr[], void *_args) { (void)_args; cl_kernel kernel; cl_command_queue queue; int id, devid, err, n; cl_mem matrix = (cl_mem)STARPU_MATRIX_GET_DEV_HANDLE(descr[0]); cl_mem vector = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[1]); cl_mem mult = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[2]); int nx = STARPU_MATRIX_GET_NX(descr[0]); int ny = STARPU_MATRIX_GET_NY(descr[0]); int ld = STARPU_MATRIX_GET_LD(descr[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "matVecMult", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); n=0; err = clSetKernelArg(kernel, n++, sizeof(matrix), &matrix); err |= clSetKernelArg(kernel, n++, sizeof(vector), &vector); err |= clSetKernelArg(kernel, n++, sizeof(nx), (void*)&nx); err |= clSetKernelArg(kernel, n++, sizeof(ny), (void*)&ny); err |= clSetKernelArg(kernel, n++, sizeof(mult), &mult); err |= clSetKernelArg(kernel, n++, sizeof(ld), (void*)&ld); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=nx*ny; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } #endif void fillArray(float* pfData, int iSize) { int i; const float fScale = 1.0f / (float)RAND_MAX; for (i = 0; i < iSize; ++i) { pfData[i] = fScale * rand(); } } #if 0 void printArray(float* pfData, int iSize) { int i; for (i = 0; i < iSize; ++i) { FPRINTF(stderr, "%f ", pfData[i]); } FPRINTF(stderr, "\n"); } #endif void matVecMult(const float *matrix, const float *vector, int width, int height, float *mult) { int i, j; for (i = 0; i < height; ++i) { double sum = 0; for (j = 0; j < width; ++j) { double a = matrix[i * width + j]; double b = vector[j]; sum += a * b; } mult[i] = (float)sum; } } int compareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon) { float error = 0; float ref = 0; unsigned int i; for(i = 0; i < len; ++i) { float diff = reference[i] - data[i]; error += diff * diff; ref += reference[i] * reference[i]; } float normRef = sqrtf(ref); if (fabs(ref) < 1e-7) return 1; float normError = sqrtf(error); error = normError / normRef; return error < epsilon ? 0 : 1; } static struct starpu_perfmodel starpu_matvecmult_model = { .type = STARPU_HISTORY_BASED, .symbol = "matvecmult" }; static struct starpu_codelet cl = { #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_codelet}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW}, .model = &starpu_matvecmult_model }; int main(void) { struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.nopencl = 1; /* int width=1100; */ /* int height=244021; */ int width=20; int height=4; float *matrix, *vector, *mult; float *correctResult; unsigned int mem_size_matrix, mem_size_vector, mem_size_mult; starpu_data_handle_t matrix_handle, vector_handle, mult_handle; int ret, submit; ret = starpu_init(&conf); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "This application requires an OpenCL worker.\n"); return 77; } mem_size_matrix = width * height * sizeof(float); starpu_malloc((void **)&matrix, mem_size_matrix); mem_size_vector = width * sizeof(float); starpu_malloc((void **)&vector, mem_size_vector); mem_size_mult = height * sizeof(float); starpu_malloc((void **)&mult, mem_size_mult); correctResult = (float*)malloc(mem_size_mult); assert(matrix); assert(vector); assert(mult); assert(correctResult); fillArray(matrix, width*height); fillArray(vector, width); fillArray(mult, height); matVecMult(matrix, vector, width, height, correctResult); starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float)); starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, width, sizeof(float)); starpu_vector_data_register(&mult_handle, STARPU_MAIN_RAM, (uintptr_t)mult, height, sizeof(float)); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/matvecmult/matvecmult_kernel.cl", &opencl_code, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->callback_func = NULL; task->handles[0] = matrix_handle; task->handles[1] = vector_handle; task->handles[2] = mult_handle; submit = starpu_task_submit(task); if (STARPU_UNLIKELY(submit == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task. This application requires an OpenCL worker.\n"); } else { starpu_task_wait_for_all(); } starpu_data_unregister(matrix_handle); starpu_data_unregister(vector_handle); starpu_data_unregister(mult_handle); if (STARPU_LIKELY(submit != -ENODEV)) { int res = compareL2fe(correctResult, mult, height, 1e-6f); FPRINTF(stdout, "TEST %s\n\n", (res == 0) ? "PASSED" : "FAILED !!!"); } #if 0 printArray(matrix, width*height); printArray(vector, width); printArray(mult, height); #endif starpu_free_noflag(matrix, mem_size_matrix); starpu_free_noflag(vector, mem_size_vector); starpu_free_noflag(mult, mem_size_mult); free(correctResult); starpu_shutdown(); return (submit == -ENODEV) ? 77 : 0; } starpu-1.4.9+dfsg/examples/matvecmult/matvecmult_kernel.cl000066400000000000000000000017001507764646700240460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void matVecMult(const __global float *A, const __global float *X, int n, int m, __global float *Y, int ld) { const int i = get_global_id(0); if (i < m) { float val = 0; int j; for (j = 0; j < n; j++) val += A[i*ld+j] * X[j]; Y[i] = val; } } starpu-1.4.9+dfsg/examples/mlr/000077500000000000000000000000001507764646700164205ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/mlr/mlr.c000066400000000000000000000137011507764646700173600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This examples demonstrates how to use multiple linear regression * models. * * First, there is mlr_codelet__init codelet for which we know the * parameters, but not the their exponents and relations. This tasks * should be benchmarked and analyzed to find the model, using * "tools/starpu_mlr_analysis" script as a template. * * For the second (codelet cl_model_final), it is assumed that the * analysis has already been performed and that the duration of the * codelet mlr_codelet_final will be computed using the following * equation: * * T = a + b * (M^2*N) + c * (N^3*K) * * where M, N, K are the parameters of the task, exponents are coming * from model->combinations[..][..] and finally a, b, c are * coefficients which mostly depend on the machine speed. * * These coefficients are going to be automatically computed using * least square method. * */ #include #include #include #include #ifdef STARPU_QUICK_CHECK #define NTASKS 10 #else #define NTASKS 1000 #endif static long sum; /* Performance function of the task, which is in this case very simple, as the parameter values just need to be written in the array "parameters" */ static void cl_params(struct starpu_task *task, double *parameters) { int m, n, k; int* vector_mn; vector_mn = (int*)STARPU_VECTOR_GET_PTR(task->interfaces[0]); m = vector_mn[0]; n = vector_mn[1]; starpu_codelet_unpack_args(task->cl_arg, &k); parameters[0] = m; parameters[1] = n; parameters[2] = k; } /* Function of the task that will be executed. In this case running dummy cycles, just to make sure task duration is significant */ void cpu_func(void *buffers[], void *cl_arg) { long i; int m,n,k; int* vector_mn; vector_mn = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); m = vector_mn[0]; n = vector_mn[1]; starpu_codelet_unpack_args(cl_arg, &k); for(i=0; i < (long) (m*m*n); i++) STARPU_ATOMIC_ADD(&sum, i); for(i=0; i < (long) (n*n*n*k); i++) STARPU_ATOMIC_ADD(&sum, i); } /* ############################################ */ /* Start of the part specific to multiple linear regression perfmodels */ /* Defining perfmodel, number of parameters and their names Initially * application developer only knows these parameters. The execution of * this codelet will generate traces that can be analyzed using * "tools/starpu_mlr_analysis" as a template to obtain the parameters * combinations and exponents. */ static const char * parameters_names[] = { "M", "N", "K", }; static struct starpu_perfmodel cl_model_init = { .type = STARPU_MULTIPLE_REGRESSION_BASED, .symbol = "mlr_init", .parameters = cl_params, .nparameters = 3, .parameters_names = parameters_names, }; /* Defining the equation for modeling duration of the task. The * parameters combinations and exponents are computed externally * offline, for example using "tools/starpu_mlr_analysis" tool as a * template. */ /* M^2 * N^1 * K^0 */ static unsigned combi1 [3] = { 2, 1, 0 }; /* M^0 * N^3 * K^1 */ static unsigned combi2 [3] = { 0, 3, 1 }; static unsigned *combinations[] = { combi1, combi2 }; static struct starpu_perfmodel cl_model_final = { .type = STARPU_MULTIPLE_REGRESSION_BASED, .symbol = "mlr_final", .parameters = cl_params, .nparameters = 3, .parameters_names = parameters_names, .ncombinations = 2, .combinations = combinations, }; /* End of the part specific to multiple linear regression perfmodels */ /* ############################################ */ static struct starpu_codelet cl_init = { .cpu_funcs = { cpu_func }, .cpu_funcs_name = { "cpu_func" }, .nbuffers = 1, .modes = {STARPU_R}, .model = &cl_model_init, }; static struct starpu_codelet cl_final = { .cpu_funcs = { cpu_func }, .cpu_funcs_name = { "cpu_func" }, .nbuffers = 1, .modes = {STARPU_R}, .model = &cl_model_final, }; int main(void) { /* Initialization */ unsigned i; int ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return 77; sum=0; int* vector_mn = calloc(2, sizeof(int)); starpu_data_handle_t vector_mn_handle; starpu_vector_data_register(&vector_mn_handle, STARPU_MAIN_RAM, (uintptr_t)vector_mn, 2, sizeof(int)); /* Giving pseudo-random values to the M,N,K parameters and inserting tasks */ for (i = 0; i < 42; i++) { int j; int m,n,k; m = (int) ((rand() % 10)+1); n = (int) ((rand() % 10)+1); k = (int) ((rand() % 10)+1); /* To illustrate the usage, M and N are stored in a data handle */ starpu_data_acquire(vector_mn_handle, STARPU_W); vector_mn[0] = m; vector_mn[1] = n; starpu_data_release(vector_mn_handle); for (j = 0; j < NTASKS; j++) { ret = starpu_task_insert(&cl_init, STARPU_R, vector_mn_handle, STARPU_VALUE, &k, sizeof(int), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_final, STARPU_R, vector_mn_handle, STARPU_VALUE, &k, sizeof(int), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } } starpu_data_unregister(vector_mn_handle); free(vector_mn); starpu_shutdown(); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; starpu_perfmodel_dump_xml(stdout, &cl_model_final); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/mult/000077500000000000000000000000001507764646700166075ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/mult/dgemm.c000066400000000000000000000013261507764646700200460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "double.h" #include "xgemm.c" starpu-1.4.9+dfsg/examples/mult/dgemm_layout.c000066400000000000000000000013351507764646700214430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "double.h" #include "xgemm_layout.c" starpu-1.4.9+dfsg/examples/mult/double.h000066400000000000000000000020301507764646700202250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define TYPE double #define EPSILON 0.000000000001 #define CUBLAS_GEMM cublasDgemm #define HIPBLAS_GEMM hipblasDgemm #define CPU_GEMM STARPU_DGEMM #define CPU_ASUM STARPU_DASUM #define CPU_IAMAX STARPU_IDAMAX #define STARPU_GEMM(name) starpu_dgemm_##name #define str(s) #s #define xstr(s) str(s) #define STARPU_GEMM_STR(name) xstr(STARPU_GEMM(name)) starpu-1.4.9+dfsg/examples/mult/sgemm.c000066400000000000000000000013261507764646700200650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "simple.h" #include "xgemm.c" starpu-1.4.9+dfsg/examples/mult/sgemm.sh000077500000000000000000000076531507764646700202710ustar00rootroot00000000000000#!/bin/sh -x # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2018-2018 Federal University of Rio Grande do Sul (UFRGS) # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Test parsing of FxT traces # Testing another specific scheduler, no need to run this [ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = dmdas ] || exit 77 # XXX: Also see tests/overlap/overlap.sh set -e PREFIX=$(dirname $0) rm -rf $PREFIX/sgemm.traces mkdir -p $PREFIX/sgemm.traces export STARPU_FXT_PREFIX=$PREFIX/sgemm.traces STARPU_FXT_TRACE=1 STARPU_SCHED=dmdas $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/sgemm -check if [ -x $PREFIX/../../tools/starpu_fxt_tool ]; then $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s starpu_sgemm_gemm -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 [ -f $STARPU_FXT_PREFIX/starpu_starpu_sgemm_gemm.gp -a -f $STARPU_FXT_PREFIX/starpu_starpu_sgemm_gemm.data -a -f $STARPU_FXT_PREFIX/starpu_starpu_sgemm_gemm.data ] # Generate paje, dag, data, etc. $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -d $STARPU_FXT_PREFIX -memory-states -label-deps -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 $PREFIX/../../tools/starpu_paje_sort $STARPU_FXT_PREFIX/paje.trace ! type pj_dump || pj_dump -e 0 < $STARPU_FXT_PREFIX/paje.trace $PREFIX/../../tools/starpu_codelet_profile $STARPU_FXT_PREFIX/distrib.data starpu_sgemm_gemm [ -f $STARPU_FXT_PREFIX/distrib.data.gp ] data=`ls $STARPU_FXT_PREFIX/distrib.data.[0-9]*` [ -n "$data" ] $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_data_trace -d $STARPU_FXT_PREFIX $STARPU_FXT_PREFIX/prof_file_${USER}_0 starpu_sgemm_gemm [ -f $STARPU_FXT_PREFIX/data_trace.gp ] $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_stats -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_tasks_rec_complete $STARPU_FXT_PREFIX/tasks.rec $STARPU_FXT_PREFIX/tasks2.rec python3 $PREFIX/../../tools/starpu_trace_state_stats.py $STARPU_FXT_PREFIX/trace.rec ! type gnuplot || ( $PREFIX/../../tools/starpu_workers_activity -d $STARPU_FXT_PREFIX $STARPU_FXT_PREFIX/activity.data && [ -f $STARPU_FXT_PREFIX/activity.eps ] ) # needs some R packages $PREFIX/../../tools/starpu_paje_draw_histogram $STARPU_FXT_PREFIX/paje.trace || true $PREFIX/../../tools/starpu_paje_state_stats $STARPU_FXT_PREFIX/paje.trace || true $PREFIX/../../tools/starpu_paje_summary $STARPU_FXT_PREFIX/paje.trace || true $PREFIX/../../tools/starpu_codelet_histo_profile $STARPU_FXT_PREFIX/distrib.data || true [ -f $STARPU_FXT_PREFIX/distrib.data.starpu_sgemm_gemm.0.492beed5.33177600.pdf ] || true if [ -x $PREFIX/../../tools/starpu_replay ]; then $STARPU_LAUNCH $PREFIX/../../tools/starpu_replay $STARPU_FXT_PREFIX/tasks.rec fi [ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump $STARPU_FXT_PREFIX/tasks.rec -o $STARPU_FXT_PREFIX/perfs2.rec [ -f $STARPU_FXT_PREFIX/perfs2.rec ] fi [ ! -x $PREFIX/../../tools/starpu_perfmodel_display ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_display -s starpu_sgemm_gemm [ ! -x $PREFIX/../../tools/starpu_perfmodel_display ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_display -x -s starpu_sgemm_gemm [ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump -o $STARPU_FXT_PREFIX/perfs.rec [ -f $STARPU_FXT_PREFIX/perfs.rec ] starpu-1.4.9+dfsg/examples/mult/sgemm_layout.c000066400000000000000000000013351507764646700214620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "simple.h" #include "xgemm_layout.c" starpu-1.4.9+dfsg/examples/mult/simple.h000066400000000000000000000020201507764646700202430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define TYPE float #define EPSILON 0.000001 #define CUBLAS_GEMM cublasSgemm #define HIPBLAS_GEMM hipblasSgemm #define CPU_GEMM STARPU_SGEMM #define CPU_ASUM STARPU_SASUM #define CPU_IAMAX STARPU_ISAMAX #define STARPU_GEMM(name) starpu_sgemm_##name #define str(s) #s #define xstr(s) str(s) #define STARPU_GEMM_STR(name) xstr(STARPU_GEMM(name)) starpu-1.4.9+dfsg/examples/mult/xgemm.c000066400000000000000000000335031507764646700200740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * Copyright (C) 2017-2017 Erwan Leria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Simple parallel GEMM implementation: partition the output matrix in the two * dimensions, and the input matrices in the corresponding dimension, and * perform the output computations in parallel. */ #include "xgemm.h" static void init_problem_data(void) { #ifndef STARPU_SIMGRID unsigned i,j; #endif starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); #ifndef STARPU_SIMGRID /* fill the A and B matrices */ for (j=0; j < ydim; j++) { for (i=0; i < zdim; i++) { A[j+i*ydim] = (TYPE)(starpu_drand48()); } } for (j=0; j < zdim; j++) { for (i=0; i < xdim; i++) { B[j+i*zdim] = (TYPE)(starpu_drand48()); } } for (j=0; j < ydim; j++) { for (i=0; i < xdim; i++) { C[j+i*ydim] = (TYPE)(0); } } #endif } static void partition_mult_data(void) { unsigned x, y, z; starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, ydim, ydim, zdim, sizeof(TYPE)); starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, zdim, zdim, xdim, sizeof(TYPE)); starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, ydim, ydim, xdim, sizeof(TYPE)); struct starpu_data_filter vert; memset(&vert, 0, sizeof(vert)); vert.filter_func = starpu_matrix_filter_vertical_block; vert.nchildren = nslicesx; struct starpu_data_filter horiz; memset(&horiz, 0, sizeof(horiz)); horiz.filter_func = starpu_matrix_filter_block; horiz.nchildren = nslicesy; if (tiled) { struct starpu_data_filter vertA; memset(&vertA, 0, sizeof(vertA)); vertA.filter_func = starpu_matrix_filter_vertical_block; vertA.nchildren = nslicesz; struct starpu_data_filter horizB; memset(&horizB, 0, sizeof(horizB)); horizB.filter_func = starpu_matrix_filter_block; horizB.nchildren = nslicesz; starpu_data_map_filters(A_handle, 2, &vertA, &horiz); starpu_data_map_filters(B_handle, 2, &vert, &horizB); starpu_data_map_filters(C_handle, 2, &vert, &horiz); for (y = 0; y < nslicesy; y++) for (z = 0; z < nslicesz; z++) starpu_data_set_coordinates(starpu_data_get_sub_data(A_handle, 2, z, y), 2, z, y); for (x = 0; x < nslicesx; x++) for (z = 0; z < nslicesz; z++) starpu_data_set_coordinates(starpu_data_get_sub_data(B_handle, 2, x, z), 2, x, z); } else { starpu_data_partition(B_handle, &vert); starpu_data_partition(A_handle, &horiz); starpu_data_map_filters(C_handle, 2, &vert, &horiz); for (y = 0; y < nslicesy; y++) starpu_data_set_coordinates(starpu_data_get_sub_data(A_handle, 1, y), 2, 0, y); for (x = 0; x < nslicesx; x++) starpu_data_set_coordinates(starpu_data_get_sub_data(B_handle, 1, x), 2, x, 0); } for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) starpu_data_set_coordinates(starpu_data_get_sub_data(C_handle, 2, x, y), 2, x, y); } #ifdef STARPU_USE_CUDA static void cublas_mult(void *descr[], void *arg, const TYPE *beta) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_N, nxC, nyC, nyA, &p1_cuda, subA, ldA, subB, ldB, beta, subC, ldC); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif #ifdef STARPU_HAVE_BLAS void cpu_mult(void *descr[], void *arg, TYPE beta) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); int worker_size = starpu_combined_worker_get_size(); if (worker_size == 1) { /* Sequential CPU task */ CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, beta, subC, ldC); } else { /* Parallel CPU task */ unsigned rank = starpu_combined_worker_get_rank(); unsigned block_size = (nyC + worker_size - 1)/worker_size; unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); TYPE *new_subB = &subB[block_size*rank]; TYPE *new_subC = &subC[block_size*rank]; CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, beta, new_subC, ldC); } } #endif static struct starpu_codelet cl_gemm0 = { #ifdef STARPU_HAVE_BLAS .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ .max_parallelism = INT_MAX, .cpu_funcs = {cpu_gemm0}, .cpu_funcs_name = {"cpu_gemm0"}, #endif #ifdef STARPU_USE_CUDA .cuda_funcs = {cublas_gemm0}, #elif defined(STARPU_USE_HIP) && defined(STARPU_USE_HIPBLAS) .hip_funcs = {hipblas_gemm0}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .hip_flags = {STARPU_HIP_ASYNC}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .model = &starpu_gemm_model }; static struct starpu_codelet cl_gemm = { #ifdef STARPU_HAVE_BLAS .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ .max_parallelism = INT_MAX, .cpu_funcs = {cpu_gemm}, .cpu_funcs_name = {"cpu_gemm"}, #endif #ifdef STARPU_USE_CUDA .cuda_funcs = {cublas_gemm}, #elif defined(STARPU_USE_HIP) && defined(STARPU_USE_HIPBLAS) .hip_funcs = {hipblas_gemm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .hip_flags = {STARPU_HIP_ASYNC}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW}, .model = &starpu_gemm_model }; static void parse_args(int argc, char **argv) { int i; int size_set = 0; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-3d") == 0) { tiled = 1; } else if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nslicesx = strtol(argv[++i], &argptr, 10); nslicesy = nslicesx; nslicesz = nslicesx; if (nslicesx == 0) { fprintf(stderr, "the number of blocks in X cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-nblocksx") == 0) { char *argptr; nslicesx = strtol(argv[++i], &argptr, 10); if (nslicesx == 0) { fprintf(stderr, "the number of blocks in X cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-nblocksy") == 0) { char *argptr; nslicesy = strtol(argv[++i], &argptr, 10); if (nslicesy == 0) { fprintf(stderr, "the number of blocks in Y cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-nblocksz") == 0) { char *argptr; nslicesz = strtol(argv[++i], &argptr, 10); if (nslicesz == 0) { fprintf(stderr, "the number of blocks in Z cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-x") == 0) { char *argptr; xdim = strtol(argv[++i], &argptr, 10); if (xdim == 0) { fprintf(stderr, "the X dimension cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-xy") == 0) { char *argptr; xdim = ydim = strtol(argv[++i], &argptr, 10); if (xdim == 0) { fprintf(stderr, "the XY dimensions cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-xyz") == 0) { char *argptr; xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); size_set = 1; } else if (strcmp(argv[i], "-y") == 0) { char *argptr; ydim = strtol(argv[++i], &argptr, 10); if (ydim == 0) { fprintf(stderr, "the Y dimension cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-z") == 0) { char *argptr; zdim = strtol(argv[++i], &argptr, 10); if (zdim == 0) { fprintf(stderr, "the Z dimension cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-size") == 0) { char *argptr; xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); if (xdim == 0) { fprintf(stderr, "the size cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-iter") == 0) { char *argptr; niter = strtol(argv[++i], &argptr, 10); if (niter == 0) { fprintf(stderr, "the number of iterations cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-nsleeps") == 0) { char *argptr; nsleeps = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-bound") == 0) { bound = 1; } else if (strcmp(argv[i], "-hostname") == 0) { print_hostname = 1; } else if (strcmp(argv[i], "-check") == 0) { check = 1; } else if (strcmp(argv[i], "-spmd") == 0) { cl_gemm0.type = STARPU_SPMD; } else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) { fprintf(stderr,"Usage: %s [-3d] [-nblocks n] [-nblocksx x] [-nblocksy y] [-nblocksz z] [-x x] [-y y] [-xy n] [-z z] [-xyz n] [-size size] [-iter iter] [-bound] [-check] [-spmd] [-hostname] [-nsleeps nsleeps]\n", argv[0]); if (tiled) fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, nslicesz, xdim / nslicesx, ydim / nslicesy, zdim / nslicesz, niter, nsleeps); else fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, xdim / nslicesx, ydim / nslicesy, zdim, niter, nsleeps); exit(EXIT_SUCCESS); } else { fprintf(stderr,"Unrecognized option %s\n", argv[i]); exit(EXIT_FAILURE); } } #ifndef STARPU_SIMGRID if (check && !size_set) { /* Check is sequential, reduce its default duration */ xdim /= 2; ydim /= 2; } #endif #ifdef STARPU_QUICK_CHECK niter /= 10; if(niter==0) niter=1; #endif } static int run_data(void) { PRINTF("# "); if (print_hostname) PRINTF("node\t"); PRINTF("x\ty\tz\tms\tGFlop/s"); if (bound) PRINTF("\tTms\tTGFlop/s\tTims\tTiGFlop/s"); PRINTF("\n"); unsigned sleeps; for (sleeps = 0; sleeps < nsleeps; sleeps++) { if (bound) starpu_bound_start(0, 0); starpu_fxt_start_profiling(); double start = starpu_timing_now(); unsigned x, y, z, iter; for (iter = 0; iter < niter; iter++) { if (tiled) { for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { starpu_data_handle_t Ctile = starpu_data_get_sub_data(C_handle, 2, x, y); for (z = 0; z < nslicesz; z++) { struct starpu_codelet *cl = z == 0 ? &cl_gemm0 : &cl_gemm; int ret = starpu_task_insert(cl, cl->modes[0], starpu_data_get_sub_data(A_handle, 2, z, y), cl->modes[1], starpu_data_get_sub_data(B_handle, 2, x, z), cl->modes[2], Ctile, STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * (zdim/nslicesz)), 0); if (ret == -ENODEV) { check = 0; return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_data_wont_use(Ctile); } } else { for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { int ret = starpu_task_insert(&cl_gemm0, cl_gemm0.modes[0], starpu_data_get_sub_data(A_handle, 1, y), cl_gemm0.modes[1], starpu_data_get_sub_data(B_handle, 1, x), cl_gemm0.modes[2], starpu_data_get_sub_data(C_handle, 2, x, y), STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim), 0); if (ret == -ENODEV) { check = 0; return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_wont_use(starpu_data_get_sub_data(C_handle, 2, x, y)); } } starpu_task_wait_for_all(); } double end = starpu_timing_now(); starpu_fxt_stop_profiling(); if (bound) starpu_bound_stop(); double timing = end - start; double min, min_int; double flops = 2.0*((unsigned long long)(niter))*((unsigned long long)xdim) *((unsigned long long)ydim)*((unsigned long long)zdim); if (bound) starpu_bound_compute(&min, &min_int, 1); if (print_hostname) { char hostname[255]; gethostname(hostname, 255); PRINTF("%s\t", hostname); } PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/(niter)/1000.0, flops/timing/1000.0); if (bound) PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0); PRINTF("\n"); if (sleeps < nsleeps-1) { sleep(10); } } return 0; } starpu-1.4.9+dfsg/examples/mult/xgemm.h000066400000000000000000000135051507764646700201010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Erwan Leria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef TYPE #error "Do not compile xgemm.c directly, compile sgemm.c or dgemm.c" #endif #include #include #include #include #include #include #include #ifdef STARPU_HAVE_BLAS #include #endif #ifdef STARPU_USE_CUDA #include #include static const TYPE p1_cuda = 1.0; static const TYPE v0_cuda = 0.0; #endif #ifdef STARPU_USE_HIP #include #include static const TYPE p1_hip = 1.0; static const TYPE v0_hip = 0.0; #endif #ifdef STARPU_QUICK_CHECK static unsigned niter = 2; #else static unsigned niter = 10; #endif static unsigned nsleeps = 1; static unsigned nslicesx = 4; static unsigned nslicesy = 4; static unsigned nslicesz = 4; #if defined(STARPU_QUICK_CHECK) && !defined(STARPU_SIMGRID) static unsigned xdim = 256; static unsigned ydim = 256; static unsigned zdim = 64; #else static unsigned xdim = 960*4; static unsigned ydim = 960*4; static unsigned zdim = 960*4; #endif static unsigned check = 0; static unsigned bound = 0; static unsigned print_hostname = 0; static unsigned tiled = 0; #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); fflush(stdout); }} while(0) static TYPE *A, *B, *C; static starpu_data_handle_t A_handle, B_handle, C_handle; #ifdef STARPU_HAVE_BLAS static int check_output(void) { /* compute C = C - AB */ CPU_GEMM("N", "N", ydim, xdim, zdim, (TYPE)-1.0f, A, ydim, B, zdim, (TYPE)1.0f, C, ydim); /* make sure C = 0 */ TYPE err; err = CPU_ASUM(xdim*ydim, C, 1); if (err < EPSILON*xdim*ydim*zdim) { FPRINTF(stderr, "Results are OK\n"); return 0; } else { int max; max = CPU_IAMAX(xdim*ydim, C, 1); FPRINTF(stderr, "There were errors ... err = %f\n", err); FPRINTF(stderr, "Max error : %e\n", C[max]); return 1; } } #endif static int clean_problem_data(int enodev) { int ret = enodev; starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); starpu_data_unregister(C_handle); #ifdef STARPU_HAVE_BLAS #ifndef STARPU_SIMGRID if (!enodev && check) ret = check_output(); #endif #endif starpu_free_flags(A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_free_flags(B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_free_flags(C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); return ret; } #ifdef STARPU_USE_CUDA static void cublas_mult(void *descr[], void *arg, const TYPE *beta); static void cublas_gemm0(void *descr[], void *arg) { cublas_mult(descr, arg, &v0_cuda); } static void cublas_gemm(void *descr[], void *arg) { cublas_mult(descr, arg, &p1_cuda); } #endif #ifdef STARPU_USE_HIP #ifdef STARPU_USE_HIPBLAS static void hipblas_mult(void *descr[], void *arg, const TYPE *beta) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); hipblasStatus_t status = HIPBLAS_GEMM(starpu_hipblas_get_local_handle(), HIPBLAS_OP_N, HIPBLAS_OP_N, nxC, nyC, nyA, &p1_hip, subA, ldA, subB, ldB, beta, subC, ldC); if (status != HIPBLAS_STATUS_SUCCESS) STARPU_HIPBLAS_REPORT_ERROR(status); } static void hipblas_gemm0(void *descr[], void *arg) { hipblas_mult(descr, arg, &v0_hip); } static void hipblas_gemm(void *descr[], void *arg) { hipblas_mult(descr, arg, &p1_hip); } #endif #endif #ifdef STARPU_HAVE_BLAS void cpu_mult(void *descr[], void *arg, TYPE beta); void cpu_gemm0(void *descr[], void *arg) { cpu_mult(descr, arg, 0.); } void cpu_gemm(void *descr[], void *arg) { cpu_mult(descr, arg, 1.); } #endif static struct starpu_perfmodel starpu_gemm_model = { .type = STARPU_HISTORY_BASED, .symbol = STARPU_GEMM_STR(gemm) }; static void parse_args(int argc, char **argv); static void init_problem_data(void); static void partition_mult_data(void); static int run_data(void); int main(int argc, char **argv) { parse_args(argc, argv); starpu_fxt_autostart_profiling(0); int ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_cublas_init(); starpu_hipblas_init(); init_problem_data(); partition_mult_data(); ret = run_data(); ret = clean_problem_data(ret); starpu_cublas_shutdown(); starpu_hipblas_shutdown(); starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/examples/mult/xgemm_layout.c000066400000000000000000000765511507764646700215030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Erwan Leria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Simple parallel GEMM implementation: partition the output matrix in the two * dimensions, and the input matrices in the corresponding dimension, and * perform the output computations in parallel. */ #include "xgemm.h" static unsigned invalidate_c_tile = 0; static unsigned random_task_order = 0; static unsigned recursive_matrix_layout = 0; static unsigned random_data_access = 0; static unsigned count_do_schedule = 1; static unsigned sparse_matrix = 0; /* % de chance qu'une tâche soit créé avec sparse matrix. */ static int chance_to_be_created = 100; static TYPE **Cscratch; static void init_problem_data(void) { #ifndef STARPU_SIMGRID unsigned i,j; #endif starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); #ifndef STARPU_SIMGRID /* fill the A and B matrices */ for (j=0; j < ydim; j++) { for (i=0; i < zdim; i++) { A[j+i*ydim] = (TYPE)(starpu_drand48()); } } for (j=0; j < zdim; j++) { for (i=0; i < xdim; i++) { B[j+i*zdim] = (TYPE)(starpu_drand48()); } } for (j=0; j < ydim; j++) { for (i=0; i < xdim; i++) { C[j+i*ydim] = (TYPE)(0); } } #endif if (!tiled) { unsigned x; unsigned ncuda = starpu_cuda_worker_get_count(); Cscratch = malloc(sizeof(TYPE*) * ncuda); for(x = 0; x < ncuda; x++) { unsigned worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, x); unsigned node = starpu_worker_get_memory_node(worker); Cscratch[x] = (TYPE*) starpu_malloc_on_node(node, (xdim / nslicesx) * (ydim / nslicesy) * sizeof(TYPE)); } } } void nop(void *descr[], void *arg) { (void) descr; (void) arg; } static struct starpu_codelet redux_cl = { .where = STARPU_NOWHERE, .cpu_funcs = {nop}, .cpu_funcs_name = {"nop"}, .cuda_funcs = {nop}, .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_RW | STARPU_COMMUTE, STARPU_R}, .model = &starpu_perfmodel_nop }; static struct starpu_codelet init_cl = { .where = STARPU_NOWHERE, .cpu_funcs = {nop}, .cpu_funcs_name = {"nop"}, .cuda_funcs = {nop}, .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 1, .modes = {STARPU_W}, .model = &starpu_perfmodel_nop }; static void partition_mult_data(void) { unsigned x, y, z; starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, ydim, ydim, zdim, sizeof(TYPE)); starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, zdim, zdim, xdim, sizeof(TYPE)); starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, ydim, ydim, xdim, sizeof(TYPE)); starpu_data_set_reduction_methods(C_handle, &redux_cl, &init_cl); struct starpu_data_filter vert; memset(&vert, 0, sizeof(vert)); vert.filter_func = starpu_matrix_filter_vertical_block; vert.nchildren = nslicesx; struct starpu_data_filter horiz; memset(&horiz, 0, sizeof(horiz)); horiz.filter_func = starpu_matrix_filter_block; horiz.nchildren = nslicesy; if (tiled) { struct starpu_data_filter vertA; memset(&vertA, 0, sizeof(vertA)); vertA.filter_func = starpu_matrix_filter_vertical_block; vertA.nchildren = nslicesz; struct starpu_data_filter horizB; memset(&horizB, 0, sizeof(horizB)); horizB.filter_func = starpu_matrix_filter_block; horizB.nchildren = nslicesz; starpu_data_map_filters(A_handle, 2, &vertA, &horiz); starpu_data_map_filters(B_handle, 2, &vert, &horizB); starpu_data_map_filters(C_handle, 2, &vert, &horiz); for (y = 0; y < nslicesy; y++) for (z = 0; z < nslicesz; z++) starpu_data_set_coordinates(starpu_data_get_sub_data(A_handle, 2, z, y), 2, z, y); for (x = 0; x < nslicesx; x++) for (z = 0; z < nslicesz; z++) starpu_data_set_coordinates(starpu_data_get_sub_data(B_handle, 2, x, z), 2, x, z); } else { starpu_data_partition(B_handle, &vert); starpu_data_partition(A_handle, &horiz); starpu_data_map_filters(C_handle, 2, &vert, &horiz); for (y = 0; y < nslicesy; y++) starpu_data_set_coordinates(starpu_data_get_sub_data(A_handle, 1, y), 2, 0, y); for (x = 0; x < nslicesx; x++) starpu_data_set_coordinates(starpu_data_get_sub_data(B_handle, 1, x), 2, x, 0); } for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) starpu_data_set_coordinates(starpu_data_get_sub_data(C_handle, 2, x, y), 2, x, y); } #ifdef STARPU_USE_CUDA static void cublas_mult2d(void *descr[], void *arg, const TYPE *beta) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned worker = starpu_worker_get_id_check(); unsigned devid = starpu_worker_get_devid(worker); TYPE *subC = Cscratch[devid]; unsigned nxC = STARPU_MATRIX_GET_NY(descr[1]); unsigned nyC = STARPU_MATRIX_GET_NX(descr[0]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = nxC; cudaStream_t stream = starpu_cuda_get_local_stream(); cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_N, nxC, nyC, nyA, &p1_cuda, subA, ldA, subB, ldB, beta, subC, ldC); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif #ifdef STARPU_USE_CUDA static void cublas_mult(void *descr[], void *arg, const TYPE *beta) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); cudaStream_t stream = starpu_cuda_get_local_stream(); if (nxC == ldC) cudaMemsetAsync(subC, 0, sizeof(*subC) * nxC * nyC, stream); else { unsigned i; for (i = 0; i < nyC; i++) cudaMemsetAsync(subC + i*ldC, 0, sizeof(*subC) * nxC, stream); } cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_N, nxC, nyC, nyA, &p1_cuda, subA, ldA, subB, ldB, beta, subC, ldC); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif #ifdef STARPU_USE_CUDA static void cublas_gemm2d(void *descr[], void *arg) { cublas_mult2d(descr, arg, &v0_cuda); } #endif #ifdef STARPU_HAVE_BLAS void cpu_mult2d(void *descr[], void *arg, TYPE beta) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned nxC = STARPU_MATRIX_GET_NY(descr[1]); unsigned nyC = STARPU_MATRIX_GET_NX(descr[0]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = nxC; TYPE subC[nxC*nyC]; int worker_size = starpu_combined_worker_get_size(); if (worker_size == 1) { /* Sequential CPU task */ CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, beta, subC, ldC); } else { /* Parallel CPU task */ unsigned rank = starpu_combined_worker_get_rank(); unsigned block_size = (nyC + worker_size - 1)/worker_size; unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); TYPE *new_subB = &subB[block_size*rank]; TYPE *new_subC = &subC[block_size*rank]; CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, beta, new_subC, ldC); } } #endif #ifdef STARPU_HAVE_BLAS void cpu_mult(void *descr[], void *arg, TYPE beta) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); int worker_size = starpu_combined_worker_get_size(); if (nxC == ldC) memset(subC, 0, sizeof(*subC) * nxC * nyC); else { unsigned i; for (i = 0; i < nyC; i++) memset(subC + i*ldC, 0, sizeof(*subC) * nxC); } if (worker_size == 1) { /* Sequential CPU task */ CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, beta, subC, ldC); } else { /* Parallel CPU task */ unsigned rank = starpu_combined_worker_get_rank(); unsigned block_size = (nyC + worker_size - 1)/worker_size; unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); TYPE *new_subB = &subB[block_size*rank]; TYPE *new_subC = &subC[block_size*rank]; CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, beta, new_subC, ldC); } } #endif #ifdef STARPU_HAVE_BLAS void cpu_gemm2d(void *descr[], void *arg) { cpu_mult2d(descr, arg, 0.); } #endif /* Codelet for 2D matrix */ static struct starpu_codelet cl_gemm2d = { #ifdef STARPU_HAVE_BLAS .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ .max_parallelism = INT_MAX, .cpu_funcs = {cpu_gemm2d}, .cpu_funcs_name = {"cpu_gemm2d"}, #endif #ifdef STARPU_USE_CUDA .cuda_funcs = {cublas_gemm2d}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_R}, .model = &starpu_gemm_model }; /* Codelet for 3D matrix z = 0 */ static struct starpu_codelet cl_gemm0 = { #ifdef STARPU_HAVE_BLAS .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ .max_parallelism = INT_MAX, .cpu_funcs = {cpu_gemm0}, .cpu_funcs_name = {"cpu_gemm0"}, #endif #ifdef STARPU_USE_CUDA .cuda_funcs = {cublas_gemm0}, #elif defined(STARPU_USE_HIP) && defined(STARPU_USE_HIPBLAS) .hip_funcs = {hipblas_gemm0}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .hip_flags = {STARPU_HIP_ASYNC}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_R}, .model = &starpu_gemm_model }; /* Codelet for 3D matrix z = 1, 2, 3 */ static struct starpu_codelet cl_gemm = { #ifdef STARPU_HAVE_BLAS .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ .max_parallelism = INT_MAX, .cpu_funcs = {cpu_gemm}, .cpu_funcs_name = {"cpu_gemm"}, #endif #ifdef STARPU_USE_CUDA .cuda_funcs = {cublas_gemm}, #elif defined(STARPU_USE_HIP) && defined(STARPU_USE_HIPBLAS) .hip_funcs = {hipblas_gemm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .hip_flags = {STARPU_HIP_ASYNC}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_REDUX}, .model = &starpu_gemm_model }; /** INVALIDATE_C_TILE Pour choisir de mettre ou non les RW dans les codelets gemm en 3D. To randomize tasks or their order RANDOM_TASK_ORDER (only for 2D matrix) RECURSIVE_MATRIX_LAYOUT (only for 2D matrix) RANDOM_DATA_ACCESS (only for 2D matrix) COUNT_DO_SCHEDULE do schedule for HFP pris en compte ou non SPARSE_MATRIX 0 by default. Something else than 0 correspond to the percentage of chance of a task to be created. So SPARSE_MATRIX=10 means you a 10% of the tasks (on average). Fix STARPU_RAND_SEED if you want to have similar results among different schedulers! */ static void parse_args(int argc, char **argv) { int i; int size_set = 0; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-3d") == 0) { tiled = 1; } else if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nslicesx = strtol(argv[++i], &argptr, 10); nslicesy = nslicesx; nslicesz = nslicesx; if (nslicesx == 0) { fprintf(stderr, "the number of blocks in X cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-nblocksx") == 0) { char *argptr; nslicesx = strtol(argv[++i], &argptr, 10); if (nslicesx == 0) { fprintf(stderr, "the number of blocks in X cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-nblocksy") == 0) { char *argptr; nslicesy = strtol(argv[++i], &argptr, 10); if (nslicesy == 0) { fprintf(stderr, "the number of blocks in Y cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-nblocksz") == 0) { char *argptr; nslicesz = strtol(argv[++i], &argptr, 10); if (nslicesz == 0) { fprintf(stderr, "the number of blocks in Z cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-x") == 0) { char *argptr; xdim = strtol(argv[++i], &argptr, 10); if (xdim == 0) { fprintf(stderr, "the X dimension cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-xy") == 0) { char *argptr; xdim = ydim = strtol(argv[++i], &argptr, 10); if (xdim == 0) { fprintf(stderr, "the XY dimensions cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-xyz") == 0) { char *argptr; xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); size_set = 1; } else if (strcmp(argv[i], "-xyz") == 0) { char *argptr; xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-y") == 0) { char *argptr; ydim = strtol(argv[++i], &argptr, 10); if (ydim == 0) { fprintf(stderr, "the Y dimension cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-z") == 0) { char *argptr; zdim = strtol(argv[++i], &argptr, 10); if (zdim == 0) { fprintf(stderr, "the Z dimension cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-size") == 0) { char *argptr; xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); if (xdim == 0) { fprintf(stderr, "the size cannot be 0!\n"); exit(EXIT_FAILURE); } size_set = 1; } else if (strcmp(argv[i], "-iter") == 0) { char *argptr; niter = strtol(argv[++i], &argptr, 10); if (niter == 0) { fprintf(stderr, "the number of iterations cannot be 0!\n"); exit(EXIT_FAILURE); } } else if (strcmp(argv[i], "-nsleeps") == 0) { char *argptr; nsleeps = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-bound") == 0) { bound = 1; } else if (strcmp(argv[i], "-invalidate-c-tile") == 0) { invalidate_c_tile = 1; } else if (strcmp(argv[i], "-random-task-order") == 0) { random_task_order = 1; } else if (strcmp(argv[i], "-random-data-access") == 0) { random_data_access = 1; } else if (strcmp(argv[i], "-recursive-matrix-layout") == 0) { recursive_matrix_layout = 1; } else if (strcmp(argv[i], "-no-count-do-schedule") == 0) { count_do_schedule = 0; } else if (strcmp(argv[i], "-sparse-matrix") == 0) { char *argptr; sparse_matrix = strtol(argv[++i], &argptr, 10); if (sparse_matrix > 100) { fprintf(stderr, "incorrect value %u for sparse-matrix parameter!\n", sparse_matrix); exit(EXIT_FAILURE); } if (sparse_matrix != 0) { chance_to_be_created = sparse_matrix; } } else if (strcmp(argv[i], "-hostname") == 0) { print_hostname = 1; } else if (strcmp(argv[i], "-check") == 0) { check = 1; } else if (strcmp(argv[i], "-spmd") == 0) { cl_gemm0.type = STARPU_SPMD; } else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) { fprintf(stderr,"Usage: %s [-3d] [-nblocks n] [-nblocksx x] [-nblocksy y] [-nblocksz z] [-x x] [-y y] [-xy n] [-z z] [-xyz n] [-size size] [-iter iter] [-bound] [-check] [-spmd] [-hostname] [-nsleeps nsleeps]\n", argv[0]); if (tiled) fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, nslicesz, xdim / nslicesx, ydim / nslicesy, zdim / nslicesz, niter, nsleeps); else fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks (size %ux%u length %u), %u iterations, %u sleeps\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, xdim / nslicesx, ydim / nslicesy, zdim, niter, nsleeps); exit(EXIT_SUCCESS); } else { fprintf(stderr,"Unrecognized option %s\n", argv[i]); exit(EXIT_FAILURE); } } #ifndef STARPU_SIMGRID if (check && !size_set) { /* Check is sequential, reduce its default duration */ xdim /= 2; ydim /= 2; } #endif #ifdef STARPU_QUICK_CHECK niter /= 10; if(niter==0) niter=1; #endif } #define check_evicted(main_handle, i1, i2) do { \ if (index++ < next_evicted) \ continue; \ int is_allocated; \ starpu_data_handle_t sub_handle = starpu_data_get_sub_data(main_handle, 2, i1, i2); \ starpu_data_query_status(sub_handle, node, &is_allocated, NULL, NULL); \ if (is_allocated && starpu_data_can_evict(sub_handle, node, is_prefetch)) \ { \ next_evicted = index; \ FPRINTF(stderr,"evicting %p\n", sub_handle); \ return sub_handle; \ } \ } while(0) /* Don't do this at home, kids, this is really dumb! */ starpu_data_handle_t dumb_victim_selector(starpu_data_handle_t *toload, unsigned node, enum starpu_is_prefetch is_prefetch) { static unsigned next_evicted; // index of next data to evict, to avoid getting stuck. Yes this is awful. unsigned index = 0; if (tiled) { if (next_evicted == nslicesy*nslicesz + nslicesx+nslicesz + nslicesx*nslicesy) next_evicted = 0; unsigned x, y, z; for (y = 0; y < nslicesy; y++) for (z = 0; z < nslicesz; z++) check_evicted(A_handle, z, y); for (x = 0; x < nslicesx; x++) for (z = 0; z < nslicesz; z++) check_evicted(B_handle, x, z); for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) check_evicted(C_handle, x, y); } else { if (next_evicted == 3*nslicesx*nslicesy) next_evicted = 0; unsigned x, y; for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) check_evicted(A_handle, 1, y); for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) check_evicted(B_handle, 1, x); for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) check_evicted(C_handle, x, y); } FPRINTF(stderr,"uh, no evictable data\n"); next_evicted = 0; return NULL; } int data_evict_from_non_cpus(starpu_data_handle_t handle) { int global_ret=0; unsigned nodeid; for (nodeid = 0; nodeid < starpu_memory_nodes_get_count(); nodeid++) { if (starpu_node_get_kind(nodeid) != STARPU_CPU_RAM) { int ret = starpu_data_evict_from_node(handle, nodeid); if (ret != 0) global_ret = ret; } } return global_ret; } #define SCHEDULE_WAIT() do { \ if (count_do_schedule == 0) \ { \ starpu_do_schedule(); \ start = starpu_timing_now(); \ starpu_resume(); \ starpu_task_wait_for_all(); \ end = starpu_timing_now(); \ } \ else \ { \ start = starpu_timing_now(); \ starpu_do_schedule(); \ starpu_resume(); \ starpu_task_wait_for_all(); \ end = starpu_timing_now(); \ }} while(0) static int run_data(void) { PRINTF("# "); if (print_hostname) PRINTF("node\t"); PRINTF("x\ty\tz\tms\tGFlops\tDeviance"); if (bound) PRINTF("\tTms\tTGFlops\tTims\tTiGFlops\tTDeviance"); PRINTF("\n"); starpu_seed(0); unsigned sleeps; for(sleeps = 0; sleeps < nsleeps; sleeps++) { if (bound) starpu_bound_start(0, 0); starpu_fxt_start_profiling(); double start, end; //start = starpu_timing_now(); /* Moved before starpu_resume so we don't start time during scheduling */ double timing = 0; double timing_square = 0; /* Matrice 3D */ if (tiled) { unsigned iter; for (iter = 0; iter < niter; iter++) { starpu_pause(); /* To get all tasks at once */ unsigned x,y; for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { starpu_data_handle_t Ctile = starpu_data_get_sub_data(C_handle, 2, x, y); if (invalidate_c_tile == 1) { starpu_data_invalidate(Ctile); /* Modifie les perfs pour DMDAR, à N>35 cela plombe ces performances au niveau de EAGER. La raison est l'allocation. */ } unsigned z; for (z = 0; z < nslicesz; z++) { /* Ajout pour sparse matrix. */ if (random()%100 < chance_to_be_created) { struct starpu_codelet *cl; cl = (z == 0) ? &cl_gemm0 : &cl_gemm; int ret = starpu_task_insert(cl, cl->modes[0], starpu_data_get_sub_data(A_handle, 2, z, y), cl->modes[1], starpu_data_get_sub_data(B_handle, 2, x, z), cl->modes[2], Ctile, STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * (zdim/nslicesz)), 0); if (ret == -ENODEV) { check = 0; starpu_resume(); return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } } starpu_data_wont_use(Ctile); } SCHEDULE_WAIT(); if (niter > 1) { if (iter != 0) { timing += end - start; timing_square += (end-start) * (end-start); } for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { data_evict_from_non_cpus(starpu_data_get_sub_data(C_handle, 2, x, y)); unsigned z; for (z = 0; z < nslicesz; z++) { data_evict_from_non_cpus(starpu_data_get_sub_data(A_handle, 2, z, y)); data_evict_from_non_cpus(starpu_data_get_sub_data(B_handle, 2, x, z)); } } } else { timing = end - start; } } } else if (random_task_order == 1 && recursive_matrix_layout == 0 && random_data_access == 0) { /* Randomize the order in which task are sent, but the tasks are the same */ unsigned tab_x[nslicesx][nslicesx]; unsigned tab_y[nslicesy][nslicesy]; unsigned iter; for (iter = 0; iter < niter; iter++) { unsigned i, j; for (i=0; i < nslicesx; i++) for (j = 0; j < nslicesx; j++) tab_x[i][j] = i; for (i=0; i < nslicesy; i++) for (j = 0; j < nslicesy; j++) tab_y[i][j] = j; //Shuffle for(i=0; i 1) { if (iter != 0) { timing += end - start; timing_square += (end-start) * (end-start); } for (i = 0; i < nslicesx; i++) for (j = 0; j < nslicesy; j++) { data_evict_from_non_cpus(starpu_data_get_sub_data(A_handle, 1, j)); data_evict_from_non_cpus(starpu_data_get_sub_data(B_handle, 1, i)); } } else { timing = end - start; } } //End if RANDOM_TASK_ORDER == 1 } else if (recursive_matrix_layout == 1 && random_data_access == 0) { /* Tasks arrive in a "Z-order" */ unsigned tab_x[nslicesx][nslicesx]; unsigned tab_y[nslicesy][nslicesy]; unsigned iter; for (iter = 0; iter < niter; iter++) { unsigned i, j; for (i= 0; i < nslicesx; i++) for (j = 0; j < nslicesx; j++) tab_x[i][j] = i; for (i= 0; i < nslicesy; i++) for (j = 0; j < nslicesy; j++) tab_y[i][j] = j; for (i= 0; i < nslicesx; i++) { int x_z_layout, x_z_layout_i; int i_bis = 0; for (j = 0; j < nslicesx; j++) { if (i_bis%2 == 1) { x_z_layout_i = nslicesx/2; } if (j >= 4) { x_z_layout = (j/4)*2; } tab_x[i][j] = j%2 + x_z_layout + x_z_layout_i; } x_z_layout = 0; x_z_layout_i = 0; if (i%2 == 1) { i_bis++; } } for (i= 0; i < nslicesy; i++) { int y_z_layout_i = 0; int i_bis = 0; int y_z_layout = 0; for (j = 0; j < nslicesy; j++) { int j_bis = 0; if (i >= 4) { y_z_layout_i = 4*(i/4); } if (j_bis%2 == 1) { y_z_layout = 1; } if (i%2 == 1) { y_z_layout += 2; } tab_y[i][j] = y_z_layout + y_z_layout_i; if (j%2 == 1) { j_bis++; } y_z_layout = 0; y_z_layout_i = 0; } y_z_layout = 0; if (i%2 == 1) { i_bis++; } } starpu_pause(); for (i = 0; i < nslicesx; i++) { for (j = 0; j < nslicesy; j++) { if (random()%100 < chance_to_be_created) { int ret = starpu_task_insert(&cl_gemm2d, cl_gemm2d.modes[0], starpu_data_get_sub_data(A_handle, 1, tab_y[i][j]), cl_gemm2d.modes[1], starpu_data_get_sub_data(B_handle, 1, tab_x[i][j]), cl_gemm2d.modes[2], starpu_data_get_sub_data(C_handle, 2, tab_x[i][j], tab_y[i][j]), STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim), 0); if (ret == -ENODEV) { starpu_resume(); return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_invalidate_submit(starpu_data_get_sub_data(C_handle, 2, tab_x[i][j], tab_y[i][j])); } } } SCHEDULE_WAIT(); if (iter != 0) { timing += end - start; timing_square += (end-start) * (end-start); } } //End If RECURSIVE_MATRIX_LAYOUT == 1 } /* This is the random 2D matrix operation we use */ else if (random_data_access == 1) { /* Each task takes as data a random line and a random column from A and B */ unsigned iter; for (iter = 0; iter < niter; iter++) { starpu_pause(); unsigned x, y; for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { if (random()%100 < chance_to_be_created) { int ret = starpu_task_insert(&cl_gemm2d, cl_gemm2d.modes[0], starpu_data_get_sub_data(A_handle, 1, random()%nslicesy), cl_gemm2d.modes[1], starpu_data_get_sub_data(B_handle, 1, random()%nslicesx), cl_gemm2d.modes[2], starpu_data_get_sub_data(C_handle, 2, x, y), STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim), 0); if (ret == -ENODEV) { starpu_resume(); return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_invalidate_submit(starpu_data_get_sub_data(C_handle, 2, x, y)); } } SCHEDULE_WAIT(); /* If I have more than 1 iteration I want the mean timing, else I don't */ if (niter > 1) { if (iter != 0) { timing += end - start; timing_square += (end-start) * (end-start); } for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { data_evict_from_non_cpus(starpu_data_get_sub_data(A_handle, 1, y)); data_evict_from_non_cpus(starpu_data_get_sub_data(B_handle, 1, x)); } } else { timing = end - start; } } } else { /* Normal execution of xgemm */ unsigned iter; for (iter = 0; iter < niter; iter++) { starpu_pause(); unsigned x,y; for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { if (random()%100 < chance_to_be_created) { int ret = starpu_task_insert(&cl_gemm2d, cl_gemm2d.modes[0], starpu_data_get_sub_data(A_handle, 1, y), cl_gemm2d.modes[1], starpu_data_get_sub_data(B_handle, 1, x), cl_gemm2d.modes[2], starpu_data_get_sub_data(C_handle, 2, x, y), STARPU_FLOPS, (double) (2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim), 0); if (ret == -ENODEV) { starpu_resume(); return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_invalidate_submit(starpu_data_get_sub_data(C_handle, 2, x, y)); } } SCHEDULE_WAIT(); if (niter > 1) { if (iter != 0) { timing += end - start; timing_square += (end-start) * (end-start); } for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { data_evict_from_non_cpus(starpu_data_get_sub_data(A_handle, 1, y)); data_evict_from_non_cpus(starpu_data_get_sub_data(B_handle, 1, x)); } } else { timing = end - start; } } /* End of normal execution of 2D matrix. */ } starpu_fxt_stop_profiling(); if (bound) starpu_bound_stop(); double min, min_int; if (bound) starpu_bound_compute(&min, &min_int, 1); if (print_hostname) { char hostname[255]; gethostname(hostname, 255); PRINTF("%s\t", hostname); } /* Don't count first iteration */ niter--; if (niter+1 > 1) /* We also print the deviance */ { double flops = 2.0 * ((unsigned long long)(niter)) * ((unsigned long long)xdim) * ((unsigned long long)ydim) * ((unsigned long long)zdim); /* Cas sparse je divise les flops */ if (sparse_matrix != 0) { flops = (flops*sparse_matrix)/100; } double average = timing/niter; double deviation = sqrt(fabs(timing_square / niter - average*average)); PRINTF("%u\t%u\t%u\t%.0f\t%.1f\t%f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0, flops/niter/(average*average)*deviation/1000.0); if (bound) PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f\t%f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0, flops/niter/(average*average)*deviation/1000.0); PRINTF("\n"); } else /* We don't */ { double flops = 2.0 * ((unsigned long long)(niter+1)) * ((unsigned long long)xdim) * ((unsigned long long)ydim) * ((unsigned long long)zdim); PRINTF("%u\t%u\t%u\t%.0f\t%.1f\t%f", xdim, ydim, zdim, timing/(niter+1)/1000.0, flops/timing/1000.0, 0.0); if (bound) PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f\t%f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0, 0.0); PRINTF("\n"); } if (sleeps < nsleeps-1) { sleep(10); } } return 0; } starpu-1.4.9+dfsg/examples/native_fortran/000077500000000000000000000000001507764646700206475ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/native_fortran/Makefile_nf_dynbuf.mk000066400000000000000000000025701507764646700247530ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2015-2015 ONERA # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROG = nf_dynbuf STARPU_VERSION=1.3 FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 SRCSF = nf_dynbuf_cl.f90 \ nf_dynbuf.f90 FC = gfortran FCFLAGS = -fdefault-real-8 -J. -g LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) OBJS = fstarpu_mod.o $(SRCSF:%.f90=%.o) .phony: all clean all: $(PROG) $(PROG): $(OBJS) $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) fstarpu_mod.o: $(FSTARPU_MOD) $(FC) $(FCFLAGS) -c -o $@ $< %.o: %.f90 $(FC) $(FCFLAGS) -c -o $@ $< clean: rm -fv *.o *.mod $(PROG) # modfiles generation dependences nf_dynbuf_cl.o: nf_dynbuf_cl.f90 fstarpu_mod.o nf_dynbuf.o: nf_dynbuf.f90 nf_types.o fstarpu_mod.o starpu-1.4.9+dfsg/examples/native_fortran/Makefile_nf_example.mk000066400000000000000000000026401507764646700251150ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2015-2015 ONERA # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROG = nf_example STARPU_VERSION=1.3 FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 SRCSF = nf_types.f90 \ nf_compute.f90 \ nf_example.f90 FC = gfortran FCFLAGS = -fdefault-real-8 -J. -g LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) OBJS = fstarpu_mod.o $(SRCSF:%.f90=%.o) .phony: all clean all: $(PROG) $(PROG): $(OBJS) $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) fstarpu_mod.o: $(FSTARPU_MOD) $(FC) $(FCFLAGS) -c -o $@ $< %.o: %.f90 $(FC) $(FCFLAGS) -c -o $@ $< clean: rm -fv *.o *.mod $(PROG) # modfiles generation dependences nf_compute.o: nf_compute.f90 nf_types.o fstarpu_mod.o nf_example.o: nf_example.f90 nf_types.o nf_compute.o fstarpu_mod.o starpu-1.4.9+dfsg/examples/native_fortran/Makefile_nf_matrix.mk000066400000000000000000000026501507764646700247670ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROG = nf_matrix STARPU_VERSION=1.3 FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 SRCSF = nf_matrix.f90 \ nf_codelets.f90 FC = gfortran CC = gcc CFLAGS = -g $(shell pkg-config --cflags starpu-$(STARPU_VERSION)) FCFLAGS = -fdefault-real-8 -J. -g LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) OBJS = $(SRCSC:%.c=%.o) fstarpu_mod.o $(SRCSF:%.f90=%.o) .phony: all clean all: $(PROG) $(PROG): $(OBJS) $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) %.o: %.c $(CC) $(CFLAGS) -c -o $@ $< fstarpu_mod.o: $(FSTARPU_MOD) $(FC) $(FCFLAGS) -c -o $@ $< %.o: %.f90 $(FC) $(FCFLAGS) -c -o $@ $< clean: rm -fv *.o *.mod $(PROG) nf_matrix.o: nf_matrix.f90 nf_codelets.o fstarpu_mod.o nf_codelets.o: fstarpu_mod.o starpu-1.4.9+dfsg/examples/native_fortran/Makefile_nf_partition.mk000066400000000000000000000027221507764646700254740ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROG = nf_partition STARPU_VERSION=1.3 FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 SRCSF = nf_partition_cl.f90 \ nf_partition.f90 FC = gfortran CC = gcc CFLAGS = -g $(shell pkg-config --cflags starpu-$(STARPU_VERSION)) FCFLAGS = -fdefault-real-8 -J. -g LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) OBJS = $(SRCSC:%.c=%.o) fstarpu_mod.o $(SRCSF:%.f90=%.o) .phony: all clean all: $(PROG) $(PROG): $(OBJS) $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) %.o: %.c $(CC) $(CFLAGS) -c -o $@ $< fstarpu_mod.o: $(FSTARPU_MOD) $(FC) $(FCFLAGS) -c -o $@ $< %.o: %.f90 $(FC) $(FCFLAGS) -c -o $@ $< clean: rm -fv *.o *.mod $(PROG) nf_parition_cl.o: nf_partition_cl.f90 fstarpu_mod.o nf_partition.o: nf_partition.f90 nf_parition_cl.o fstarpu_mod.o starpu-1.4.9+dfsg/examples/native_fortran/Makefile_nf_sched_ctx.mk000066400000000000000000000025401507764646700254250ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROG = nf_sched_ctx STARPU_VERSION=1.3 FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 SRCSF = nf_sched_ctx_cl.f90 \ nf_sched_ctx.f90 FC = gfortran FCFLAGS = -fdefault-real-8 -J. -g LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) OBJS = fstarpu_mod.o $(SRCSF:%.f90=%.o) .phony: all clean all: $(PROG) $(PROG): $(OBJS) $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) fstarpu_mod.o: $(FSTARPU_MOD) $(FC) $(FCFLAGS) -c -o $@ $< %.o: %.f90 $(FC) $(FCFLAGS) -c -o $@ $< clean: rm -fv *.o *.mod $(PROG) # modfiles generation dependences nf_sched_ctx_cl.o: nf_sched_ctx_cl.f90 fstarpu_mod.o nf_sched_ctx.o: nf_sched_ctx.f90 fstarpu_mod.o starpu-1.4.9+dfsg/examples/native_fortran/Makefile_nf_varbuf.mk000066400000000000000000000025261507764646700247520ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROG = nf_varbuf STARPU_VERSION=1.3 FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 SRCSF = nf_varbuf_cl.f90 \ nf_varbuf.f90 FC = gfortran FCFLAGS = -fdefault-real-8 -J. -g LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) OBJS = fstarpu_mod.o $(SRCSF:%.f90=%.o) .phony: all clean all: $(PROG) $(PROG): $(OBJS) $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) fstarpu_mod.o: $(FSTARPU_MOD) $(FC) $(FCFLAGS) -c -o $@ $< %.o: %.f90 $(FC) $(FCFLAGS) -c -o $@ $< clean: rm -fv *.o *.mod $(PROG) # modfiles generation dependences nf_varbuf_cl.o: nf_varbuf_cl.f90 fstarpu_mod.o nf_varbuf.o: nf_varbuf.f90 nf_types.o fstarpu_mod.o starpu-1.4.9+dfsg/examples/native_fortran/Makefile_nf_vector.mk000066400000000000000000000026501507764646700247650ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROG = nf_vector STARPU_VERSION=1.3 FSTARPU_MOD = $(shell pkg-config --variable=starpu_includedir starpu-$(STARPU_VERSION))/fstarpu_mod.f90 SRCSF = nf_vector.f90 \ nf_codelets.f90 FC = gfortran CC = gcc CFLAGS = -g $(shell pkg-config --cflags starpu-$(STARPU_VERSION)) FCFLAGS = -fdefault-real-8 -J. -g LDLIBS = $(shell pkg-config --libs starpu-$(STARPU_VERSION)) OBJS = $(SRCSC:%.c=%.o) fstarpu_mod.o $(SRCSF:%.f90=%.o) .phony: all clean all: $(PROG) $(PROG): $(OBJS) $(FC) $(LDFLAGS) -o $@ $^ $(LDLIBS) %.o: %.c $(CC) $(CFLAGS) -c -o $@ $< fstarpu_mod.o: $(FSTARPU_MOD) $(FC) $(FCFLAGS) -c -o $@ $< %.o: %.f90 $(FC) $(FCFLAGS) -c -o $@ $< clean: rm -fv *.o *.mod $(PROG) nf_vector.o: nf_vector.f90 nf_codelets.o fstarpu_mod.o nf_codelets.o: fstarpu_mod.o starpu-1.4.9+dfsg/examples/native_fortran/fstarpu_mod.f90000066400000000000000000005046651507764646700235320ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! !> @ingroup API_Fortran !> @brief Fortran API module fstarpu_mod use iso_c_binding implicit none ! Note: Constants truly are intptr_t, but are declared as c_ptr to be ! readily usable in c_ptr arrays to mimic variadic functions. ! Note: Bitwise or operator is provided by the .ior. overloaded operator type(c_ptr), bind(C) :: FSTARPU_R type(c_ptr), bind(C) :: FSTARPU_W type(c_ptr), bind(C) :: FSTARPU_RW type(c_ptr), bind(C) :: FSTARPU_SCRATCH type(c_ptr), bind(C) :: FSTARPU_REDUX type(c_ptr), bind(C) :: FSTARPU_MPI_REDUX type(c_ptr), bind(C) :: FSTARPU_COMMUTE type(c_ptr), bind(C) :: FSTARPU_SSEND type(c_ptr), bind(C) :: FSTARPU_LOCALITY type(c_ptr), bind(C) :: FSTARPU_DATA_ARRAY type(c_ptr), bind(C) :: FSTARPU_DATA_MODE_ARRAY type(c_ptr), bind(C) :: FSTARPU_CL_ARGS type(c_ptr), bind(C) :: FSTARPU_CL_ARGS_NFREE type(c_ptr), bind(C) :: FSTARPU_TASK_DEPS_ARRAY type(c_ptr), bind(C) :: FSTARPU_CALLBACK type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_PRIORITY type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_NODE type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_DATA type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_WORKER type(c_ptr), bind(C) :: FSTARPU_WORKER_ORDER type(c_ptr), bind(C) :: FSTARPU_EXECUTE_WHERE type(c_ptr), bind(C) :: FSTARPU_HYPERVISOR_TAG type(c_ptr), bind(C) :: FSTARPU_POSSIBLY_PARALLEL type(c_ptr), bind(C) :: FSTARPU_FLOPS type(c_ptr), bind(C) :: FSTARPU_TAG type(c_ptr), bind(C) :: FSTARPU_TAG_ONLY type(c_ptr), bind(C) :: FSTARPU_NAME type(c_ptr), bind(C) :: FSTARPU_TASK_COLOR type(c_ptr), bind(C) :: FSTARPU_TASK_SYNCHRONOUS type(c_ptr), bind(C) :: FSTARPU_HANDLES_SEQUENTIAL_CONSISTENCY type(c_ptr), bind(C) :: FSTARPU_TASK_END_DEP type(c_ptr), bind(C) :: FSTARPU_NODE_SELECTION_POLICY type(c_ptr), bind(C) :: FSTARPU_TASK_SCHED_DATA type(c_ptr), bind(C) :: FSTARPU_VALUE type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX type(c_ptr), bind(C) :: FSTARPU_CPU_WORKER type(c_ptr), bind(C) :: FSTARPU_CUDA_WORKER type(c_ptr), bind(C) :: FSTARPU_OPENCL_WORKER type(c_ptr), bind(C) :: FSTARPU_ANY_WORKER integer(c_int), bind(C) :: FSTARPU_NMAXBUFS type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_NAME type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_STRUCT type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MIN_PRIO type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MAX_PRIO type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_HIERARCHY_LEVEL type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_NESTED type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_AWAKE_WORKERS type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_INIT type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_USER_DATA type(c_ptr), bind(C) :: FSTARPU_NOWHERE type(c_ptr), bind(C) :: FSTARPU_CPU type(c_ptr), bind(C) :: FSTARPU_CUDA type(c_ptr), bind(C) :: FSTARPU_OPENCL type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT type(c_ptr), bind(C) :: FSTARPU_CUDA_ASYNC type(c_ptr), bind(C) :: FSTARPU_OPENCL_ASYNC !type(c_ptr), bind(C) :: FSTARPU_PER_WORKER !type(c_ptr), bind(C) :: FSTARPU_PER_ARCH !type(c_ptr), bind(C) :: FSTARPU_PER_COMMON type(c_ptr), bind(C) :: FSTARPU_HISTORY_BASED type(c_ptr), bind(C) :: FSTARPU_REGRESSION_BASED type(c_ptr), bind(C) :: FSTARPU_NL_REGRESSION_BASED type(c_ptr), bind(C) :: FSTARPU_MULTIPLE_REGRESSION_BASED type(c_ptr), bind(C) :: FSTARPU_SEQ type(c_ptr), bind(C) :: FSTARPU_SPMD type(c_ptr), bind(C) :: FSTARPU_FORKJOIN ! (some) portable iso_c_binding types type(c_ptr), bind(C) :: FSTARPU_SZ_C_DOUBLE type(c_ptr), bind(C) :: FSTARPU_SZ_C_FLOAT type(c_ptr), bind(C) :: FSTARPU_SZ_C_CHAR type(c_ptr), bind(C) :: FSTARPU_SZ_C_INT type(c_ptr), bind(C) :: FSTARPU_SZ_C_INTPTR_T type(c_ptr), bind(C) :: FSTARPU_SZ_C_PTR type(c_ptr), bind(C) :: FSTARPU_SZ_C_SIZE_T ! (some) native Fortran types type(c_ptr), bind(C) :: FSTARPU_SZ_CHARACTER type(c_ptr), bind(C) :: FSTARPU_SZ_INTEGER type(c_ptr), bind(C) :: FSTARPU_SZ_INT4 type(c_ptr), bind(C) :: FSTARPU_SZ_INT8 type(c_ptr), bind(C) :: FSTARPU_SZ_REAL type(c_ptr), bind(C) :: FSTARPU_SZ_REAL4 type(c_ptr), bind(C) :: FSTARPU_SZ_REAL8 type(c_ptr), bind(C) :: FSTARPU_SZ_DOUBLE_PRECISION type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX4 type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX8 integer(c_int), bind(C), target :: FSTARPU_DEFAULT_PRIO interface operator (.ior.) procedure or_cptrs end interface operator (.ior.) interface ! == starpu.h == ! void starpu_conf_init(struct starpu_conf *conf); subroutine fstarpu_conf_init (conf) bind(C,name="starpu_conf_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: conf end subroutine fstarpu_conf_init function fstarpu_conf_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_conf_allocate end function fstarpu_conf_allocate subroutine fstarpu_conf_free (conf) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: conf end subroutine fstarpu_conf_free subroutine fstarpu_conf_set_sched_policy_name (conf, policy_name) bind(C) use iso_c_binding, only: c_ptr, c_char type(c_ptr), value, intent(in) :: conf character(c_char), intent(in) :: policy_name end subroutine fstarpu_conf_set_sched_policy_name subroutine fstarpu_conf_set_min_prio (conf, min_prio) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: min_prio end subroutine fstarpu_conf_set_min_prio subroutine fstarpu_conf_set_max_prio (conf, max_prio) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: max_prio end subroutine fstarpu_conf_set_max_prio subroutine fstarpu_conf_set_ncpu (conf, ncpu) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: ncpu end subroutine fstarpu_conf_set_ncpu subroutine fstarpu_conf_set_ncuda (conf, ncuda) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: ncuda end subroutine fstarpu_conf_set_ncuda subroutine fstarpu_conf_set_nopencl (conf, nopencl) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: nopencl end subroutine fstarpu_conf_set_nopencl ! starpu_init: see fstarpu_init ! starpu_initialize: see fstarpu_init ! void starpu_pause(void); subroutine fstarpu_pause() bind(C,name="starpu_pause") end subroutine fstarpu_pause ! void starpu_resume(void); subroutine fstarpu_resume() bind(C,name="starpu_resume") end subroutine fstarpu_resume ! int starpu_is_paused(void); function fstarpu_is_paused() bind(C,name="starpu_is_paused") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_is_paused end function fstarpu_is_paused ! void starpu_shutdown(void); subroutine fstarpu_shutdown () bind(C,name="starpu_shutdown") end subroutine fstarpu_shutdown ! starpu_topology_print subroutine fstarpu_topology_print () bind(C) end subroutine fstarpu_topology_print ! int starpu_asynchronous_copy_disabled(void); function fstarpu_asynchronous_copy_disabled() bind(C,name="starpu_asynchronous_copy_disabled") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_asynchronous_copy_disabled end function fstarpu_asynchronous_copy_disabled ! int starpu_asynchronous_cuda_copy_disabled(void); function fstarpu_asynchronous_cuda_copy_disabled() bind(C,name="starpu_asynchronous_cuda_copy_disabled") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_asynchronous_cuda_copy_disabled end function fstarpu_asynchronous_cuda_copy_disabled ! int starpu_asynchronous_opencl_copy_disabled(void); function fstarpu_asynchronous_opencl_copy_disabled() bind(C,name="starpu_asynchronous_opencl_copy_disabled") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_asynchronous_opencl_copy_disabled end function fstarpu_asynchronous_opencl_copy_disabled ! void starpu_display_stats(); subroutine fstarpu_display_stats() bind(C,name="starpu_display_stats") end subroutine fstarpu_display_stats ! void starpu_get_version(int *major, int *minor, int *release); subroutine fstarpu_get_version(major,minor,release) bind(C,name="starpu_get_version") use iso_c_binding, only: c_int integer(c_int), intent(out) :: major,minor,release end subroutine fstarpu_get_version ! == starpu_worker.h == ! unsigned starpu_worker_get_count(void); function fstarpu_worker_get_count() bind(C,name="starpu_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_count end function fstarpu_worker_get_count ! unsigned starpu_combined_worker_get_count(void); function fstarpu_combined_worker_get_count() bind(C,name="starpu_combined_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_count end function fstarpu_combined_worker_get_count ! unsigned starpu_worker_is_combined_worker(int id); function fstarpu_worker_is_combined_worker(id) bind(C,name="starpu_worker_is_combined_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_is_combined_worker integer(c_int), value, intent(in) :: id end function fstarpu_worker_is_combined_worker ! unsigned starpu_cpu_worker_get_count(void); function fstarpu_cpu_worker_get_count() bind(C,name="starpu_cpu_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_cpu_worker_get_count end function fstarpu_cpu_worker_get_count ! unsigned starpu_cuda_worker_get_count(void); function fstarpu_cuda_worker_get_count() bind(C,name="starpu_cuda_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_cuda_worker_get_count end function fstarpu_cuda_worker_get_count ! unsigned starpu_opencl_worker_get_count(void); function fstarpu_opencl_worker_get_count() bind(C,name="starpu_opencl_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_opencl_worker_get_count end function fstarpu_opencl_worker_get_count ! int starpu_worker_get_id(void); function fstarpu_worker_get_id() bind(C,name="starpu_worker_get_id") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_id end function fstarpu_worker_get_id ! _starpu_worker_get_id_check ! starpu_worker_get_id_check ! int starpu_worker_get_bindid(int workerid); function fstarpu_worker_get_bindid(id) bind(C,name="starpu_worker_get_bindid") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_bindid integer(c_int), value, intent(in) :: id end function fstarpu_worker_get_bindid ! int starpu_combined_worker_get_id(void); function fstarpu_combined_worker_get_id() bind(C,name="starpu_combined_worker_get_id") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_id end function fstarpu_combined_worker_get_id ! int starpu_combined_worker_get_size(void); function fstarpu_combined_worker_get_size() bind(C,name="starpu_combined_worker_get_size") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_size end function fstarpu_combined_worker_get_size ! int starpu_combined_worker_get_rank(void); function fstarpu_combined_worker_get_rank() bind(C,name="starpu_combined_worker_get_rank") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_rank end function fstarpu_combined_worker_get_rank ! enum starpu_worker_archtype starpu_worker_get_type(int id); function fstarpu_worker_get_type(id) bind(C) use iso_c_binding, only: c_int, c_ptr type(c_ptr) :: fstarpu_worker_get_type ! C function returns c_intptr_t integer(c_int),value,intent(in) :: id end function fstarpu_worker_get_type ! int starpu_worker_get_count_by_type(enum starpu_worker_archtype type); function fstarpu_worker_get_count_by_type(typeid) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_count_by_type type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func end function fstarpu_worker_get_count_by_type ! int starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); function fstarpu_worker_get_ids_by_type(typeid, workerids, maxsize) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_ids_by_type type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func integer(c_int),intent(out) :: workerids(*) integer(c_int),value,intent(in) :: maxsize end function fstarpu_worker_get_ids_by_type ! int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num); function fstarpu_worker_get_by_type(typeid, num) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_by_type type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func integer(c_int),value,intent(in) :: num end function fstarpu_worker_get_by_type ! int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid); function fstarpu_worker_get_by_devid(typeid, devid) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_by_devid type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func integer(c_int),value,intent(in) :: devid end function fstarpu_worker_get_by_devid ! void starpu_worker_get_name(int id, char *dst, size_t maxlen); subroutine fstarpu_worker_get_name(id, dst, maxlen) bind(C,name="starpu_worker_get_name") use iso_c_binding, only: c_int, c_char, c_size_t integer(c_int),value,intent(in) :: id character(c_char),intent(out) :: dst(*) integer(c_size_t),value,intent(in) :: maxlen end subroutine fstarpu_worker_get_name ! int starpu_worker_get_devid(int id); function fstarpu_worker_get_devid(id) bind(C,name="starpu_worker_get_devid") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_devid integer(c_int), value, intent(in) :: id end function fstarpu_worker_get_devid ! struct starpu_tree* starpu_workers_get_tree(void); ! unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx); ! unsigned starpu_worker_is_blocked(int workerid); function fstarpu_worker_is_blocked(id) bind(C,name="starpu_worker_is_blocked") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_is_blocked integer(c_int), value, intent(in) :: id end function fstarpu_worker_is_blocked ! unsigned starpu_worker_is_slave_somewhere(int workerid); function fstarpu_worker_is_slave_somewhere(id) bind(C,name="starpu_worker_is_slave_somewhere") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_is_slave_somewhere integer(c_int), value, intent(in) :: id end function fstarpu_worker_is_slave_somewhere ! char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type); subroutine fstarpu_worker_get_type_as_string(typeid,dst,maxlen) bind(C) use iso_c_binding, only: c_ptr, c_char, c_size_t type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func character(c_char),intent(out) :: dst(*) integer(c_size_t),value,intent(in) :: maxlen end subroutine fstarpu_worker_get_type_as_string ! int starpu_bindid_get_workerids(int bindid, int **workerids); ! == starpu_task.h == function fstarpu_task_create_sync (handle, mode) bind(C,name="starpu_task_create_sync") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_create_sync type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: mode end function fstarpu_task_create_sync ! void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array); subroutine fstarpu_tag_declare_deps_array(id,ndeps,tag_array) bind(C,name="starpu_tag_declare_deps_array") use iso_c_binding, only: c_int, c_long_long integer(c_int), value, intent(in) :: id integer(c_int), value, intent(in) :: ndeps integer(c_long_long), intent(in) :: tag_array(*) end subroutine fstarpu_tag_declare_deps_array ! void starpu_task_declare_deps(starpu_tag_t id, unsigned ndeps, ...); subroutine fstarpu_task_declare_deps(task,ndeps,root_task) bind(C,name="starpu_task_declare_deps") use iso_c_binding, only: c_int, c_ptr type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: ndeps type(c_ptr), value, intent(in) :: root_task end subroutine fstarpu_task_declare_deps ! void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); subroutine fstarpu_task_declare_deps_array(task,ndeps,task_array) bind(C,name="starpu_task_declare_deps_array") use iso_c_binding, only: c_int, c_ptr type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: ndeps type(c_ptr), intent(in) :: task_array(*) end subroutine fstarpu_task_declare_deps_array ! void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps) subroutine fstarpu_task_end_dep_add(task, nb_deps) & bind(C,name="starpu_task_end_dep_add") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: nb_deps end subroutine fstarpu_task_end_dep_add ! void starpu_task_end_dep_release(struct starpu_task *t) subroutine fstarpu_task_end_dep_release(task) & bind(C,name="starpu_task_end_dep_release") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_end_dep_release ! int starpu_tag_wait(starpu_tag_t id); function fstarpu_tag_wait(id) bind(C,name="starpu_tag_wait") use iso_c_binding, only: c_int, c_long_long integer(c_int) :: fstarpu_tag_wait integer(c_long_long), value, intent(in) :: id end function fstarpu_tag_wait ! int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id); function fstarpu_tag_wait_array(ntags,tag_array) bind(C,name="starpu_tag_wait_array") use iso_c_binding, only: c_int, c_long_long integer(c_int) :: fstarpu_tag_wait_array integer(c_int), value, intent(in) :: ntags integer(c_long_long), intent(in) :: tag_array(*) end function fstarpu_tag_wait_array ! void starpu_tag_notify_from_apps(starpu_tag_t id); subroutine fstarpu_tag_notify_from_apps(id) bind(C,name="starpu_tag_notify_from_apps") use iso_c_binding, only: c_long_long integer(c_long_long), value, intent(in) :: id end subroutine fstarpu_tag_notify_from_apps ! void starpu_tag_restart(starpu_tag_t id); subroutine fstarpu_tag_restart(id) bind(C,name="starpu_tag_restart") use iso_c_binding, only: c_long_long integer(c_long_long), value, intent(in) :: id end subroutine fstarpu_tag_restart ! void starpu_tag_remove(starpu_tag_t id); subroutine fstarpu_tag_remove(id) bind(C,name="starpu_tag_remove") use iso_c_binding, only: c_long_long integer(c_long_long), value, intent(in) :: id end subroutine fstarpu_tag_remove ! struct starpu_task *starpu_tag_get_task(starpu_tag_t id); function fstarpu_tag_get_task(id) bind(C,name="starpu_tag_get_task") use iso_c_binding, only: c_ptr, c_long_long type(c_ptr) :: fstarpu_tag_get_task integer(c_long_long), value, intent(in) :: id end function fstarpu_tag_get_task ! void starpu_task_init(struct starpu_task *task); subroutine fstarpu_task_init (task) bind(C,name="starpu_task_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_init ! void starpu_task_clean(struct starpu_task *task); subroutine fstarpu_task_clean (task) bind(C,name="starpu_task_clean") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_clean ! struct starpu_task *starpu_task_create(void) STARPU_ATTRIBUTE_MALLOC; function fstarpu_task_create () bind(C,name="starpu_task_create") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_create end function fstarpu_task_create ! void starpu_task_destroy(struct starpu_task *task); subroutine fstarpu_task_destroy (task) bind(C,name="starpu_task_destroy") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_destroy ! void starpu_task_set_destroy(struct starpu_task *task); subroutine fstarpu_task_set_destroy (task) bind(C,name="starpu_task_set_destroy") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_set_destroy ! int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_submit (task) bind(C,name="starpu_task_submit") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_submit type(c_ptr), value, intent(in) :: task end function fstarpu_task_submit ! int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id); function fstarpu_task_submit_to_ctx (task,sched_ctx_id) bind(C,name="starpu_task_submit_to_ctx") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_submit_to_ctx type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_task_submit_to_ctx ! int starpu_task_finished(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_finished (task) bind(C,name="starpu_task_finished") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_finished type(c_ptr), value, intent(in) :: task end function fstarpu_task_finished ! int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_wait (task) bind(C,name="starpu_task_wait") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_wait type(c_ptr), value, intent(in) :: task end function fstarpu_task_wait ! int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_wait_array(task_array,ntasks) bind(C,name="starpu_task_wait_array") use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_task_wait_array integer(c_int), value, intent(in) :: ntasks type(c_ptr), intent(in) :: task_array end function fstarpu_task_wait_array ! int starpu_task_wait_for_all(void); subroutine fstarpu_task_wait_for_all () bind(C,name="starpu_task_wait_for_all") end subroutine fstarpu_task_wait_for_all ! int starpu_task_wait_for_n_submitted(unsigned n); subroutine fstarpu_task_wait_for_n_submitted (n) bind(C,name="starpu_task_wait_for_n_submitted") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: n end subroutine fstarpu_task_wait_for_n_submitted ! int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id); subroutine fstarpu_task_wait_for_all_in_ctx (ctx) bind(C,name="starpu_task_wait_for_all_in_ctx") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_task_wait_for_all_in_ctx ! int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n); subroutine fstarpu_task_wait_for_n_submitted_in_ctx (ctx,n) bind(C,name="starpu_task_wait_for_n_submitted_in_ctx") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx integer(c_int), value, intent(in) :: n end subroutine fstarpu_task_wait_for_n_submitted_in_ctx ! int starpu_task_wait_for_no_ready(void); function fstarpu_task_wait_for_no_ready () bind(C,name="starpu_task_wait_for_no_ready") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_task_wait_for_no_ready end function fstarpu_task_wait_for_no_ready ! int starpu_task_nready(void); function fstarpu_task_nready () bind(C,name="starpu_task_nready") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_task_nready end function fstarpu_task_nready ! int starpu_task_nsubmitted(void); function fstarpu_task_nsubmitted () bind(C,name="starpu_task_nsubmitted") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_task_nsubmitted end function fstarpu_task_nsubmitted ! void starpu_do_schedule(void); subroutine fstarpu_do_schedule () bind(C,name="starpu_do_schedule") end subroutine fstarpu_do_schedule ! starpu_codelet_init subroutine fstarpu_codelet_init (codelet) bind(C,name="starpu_codelet_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: codelet end subroutine fstarpu_codelet_init ! starpu_codelet_display_stats subroutine fstarpu_codelet_display_stats (codelet) bind(C,name="starpu_codelet_display_stats") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: codelet end subroutine fstarpu_codelet_display_stats ! struct starpu_task *starpu_task_get_current(void); function fstarpu_task_get_current () bind(C,name="starpu_task_get_current") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_get_current end function fstarpu_task_get_current ! void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid); subroutine fstarpu_parallel_task_barrier_init_init (task,id) & bind(C,name="starpu_parallel_task_barrier_init_init") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: id end subroutine fstarpu_parallel_task_barrier_init_init ! void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size); subroutine fstarpu_parallel_task_barrier_init_n_init_n (task,sz) & bind(C,name="starpu_parallel_task_barrier_init_n_init_n") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sz end subroutine fstarpu_parallel_task_barrier_init_n_init_n ! struct starpu_task *starpu_task_dup(struct starpu_task *task); function fstarpu_task_dup (task) bind(C,name="starpu_task_dup") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_dup type(c_ptr), value, intent(in) :: task end function fstarpu_task_dup ! void starpu_task_set_implementation(struct starpu_task *task, unsigned impl); subroutine fstarpu_task_set_implementation (task,impl) & bind(C,name="starpu_task_set_implementation") use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: impl end subroutine fstarpu_task_set_implementation ! unsigned starpu_task_get_implementation(struct starpu_task *task); function fstarpu_task_get_implementation (task) & bind(C,name="starpu_task_get_implementation") use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: task integer(c_int) :: fstarpu_task_get_implementation end function fstarpu_task_get_implementation ! -- function fstarpu_codelet_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_codelet_allocate end function fstarpu_codelet_allocate subroutine fstarpu_codelet_free (cl) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl end subroutine fstarpu_codelet_free subroutine fstarpu_codelet_set_name (cl, cl_name) bind(C) use iso_c_binding, only: c_ptr, c_char type(c_ptr), value, intent(in) :: cl character(c_char), intent(in) :: cl_name end subroutine fstarpu_codelet_set_name subroutine fstarpu_codelet_set_color (cl, cl_color) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: cl integer(c_int), value, intent(in) :: cl_color end subroutine fstarpu_codelet_set_color subroutine fstarpu_codelet_set_model (cl, cl_perfmodel) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: cl_perfmodel end subroutine fstarpu_codelet_set_model subroutine fstarpu_codelet_set_energy_model (cl, cl_perfmodel) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: cl_perfmodel end subroutine fstarpu_codelet_set_energy_model subroutine fstarpu_codelet_add_cpu_func (cl, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: cl type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_codelet_add_cpu_func subroutine fstarpu_codelet_add_cuda_func (cl, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: cl type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_codelet_add_cuda_func subroutine fstarpu_codelet_add_cuda_flags (cl, flags) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t end subroutine fstarpu_codelet_add_cuda_flags subroutine fstarpu_codelet_add_opencl_func (cl, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: cl type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_codelet_add_opencl_func subroutine fstarpu_codelet_add_opencl_flags (cl, flags) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t end subroutine fstarpu_codelet_add_opencl_flags subroutine fstarpu_codelet_add_buffer (cl, mode) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t end subroutine fstarpu_codelet_add_buffer subroutine fstarpu_codelet_set_variable_nbuffers (cl) bind(C) use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: cl end subroutine fstarpu_codelet_set_variable_nbuffers subroutine fstarpu_codelet_set_nbuffers (cl, nbuffers) bind(C) use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: cl integer(c_int), value, intent(in) :: nbuffers end subroutine fstarpu_codelet_set_nbuffers subroutine fstarpu_codelet_set_flags (cl, flags) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t end subroutine fstarpu_codelet_set_flags subroutine fstarpu_codelet_set_where (cl, where) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: where ! C function expects an intptr_t end subroutine fstarpu_codelet_set_where subroutine fstarpu_codelet_set_type (cl, type_constant) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: type_constant ! C function expects an intptr_t end subroutine fstarpu_codelet_set_type subroutine fstarpu_codelet_set_max_parallelism (cl, max_parallelism) bind(C) use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: cl integer(c_int), value, intent(in) :: max_parallelism end subroutine fstarpu_codelet_set_max_parallelism function fstarpu_perfmodel_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_perfmodel_allocate end function fstarpu_perfmodel_allocate subroutine fstarpu_perfmodel_free (model) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: model end subroutine fstarpu_perfmodel_free subroutine fstarpu_perfmodel_set_symbol (model, model_symbol) bind(C) use iso_c_binding, only: c_ptr, c_char type(c_ptr), value, intent(in) :: model character(c_char), intent(in) :: model_symbol end subroutine fstarpu_perfmodel_set_symbol subroutine fstarpu_perfmodel_set_type (model, type) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: model type(c_ptr), value, intent(in) :: type ! C function expects an intptr_t end subroutine fstarpu_perfmodel_set_type ! == starpu_data_interface.h == ! uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags); ! uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size); function fstarpu_malloc_on_node(node,sz) bind(C,name="starpu_malloc_on_node") use iso_c_binding, only: c_int,c_intptr_t,c_size_t integer(c_intptr_t) :: fstarpu_malloc_on_node integer(c_int), value, intent(in) :: node integer(c_size_t), value, intent(in) :: sz end function fstarpu_malloc_on_node ! void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags); ! void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size); subroutine fstarpu_free_on_node(node,addr,sz) bind(C,name="starpu_free_on_node") use iso_c_binding, only: c_int,c_intptr_t,c_size_t integer(c_int), value, intent(in) :: node integer(c_intptr_t), value, intent(in) :: addr integer(c_size_t), value, intent(in) :: sz end subroutine fstarpu_free_on_node ! void starpu_malloc_on_node_set_default_flags(unsigned node, int flags); ! int starpu_data_interface_get_next_id(void); ! void starpu_data_register(starpu_data_handle_t *handleptr, unsigned home_node, void *data_interface, struct starpu_data_interface_ops *ops); ! void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node); subroutine fstarpug_data_ptr_register (dh,node) bind(C,name="starpu_data_ptr_register") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpug_data_ptr_register ! void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc); subroutine fstarpu_data_register_same (dh_dst,dh_src) bind(C,name="starpu_data_register_same") use iso_c_binding, only: c_ptr type(c_ptr), intent(out) :: dh_dst type(c_ptr), value, intent(in) :: dh_src end subroutine fstarpu_data_register_same ! void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node); function fstarpu_data_handle_to_pointer (dh,node) bind(C,name="starpu_data_handle_to_pointer") use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_handle_to_pointer type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end function fstarpu_data_handle_to_pointer ! void *starpu_data_get_local_ptr(starpu_data_handle_t handle); function fstarpu_data_get_local_ptr (dh) bind(C,name="starpu_data_get_local_ptr") use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_get_local_ptr type(c_ptr), value, intent(in) :: dh end function fstarpu_data_get_local_ptr ! void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node); ! == starpu_data_interface.h: tensor == ! void starpu_tensor_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize); subroutine fstarpu_tensor_data_register(dh, home_node, ptr, ldy, ldz, ldt, nx, ny, nz, nt, elt_size) & bind(C,name="starpu_tensor_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz integer(c_int), value, intent(in) :: ldt integer(c_int), value, intent(in) :: nx integer(c_int), value, intent(in) :: ny integer(c_int), value, intent(in) :: nz integer(c_int), value, intent(in) :: nt integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_tensor_data_register ! void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt); subroutine fstarpu_tensor_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz, ldt) & bind(C,name="starpu_tensor_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz integer(c_int), value, intent(in) :: ldt end subroutine fstarpu_tensor_ptr_register function fstarpu_tensor_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_tensor_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ptr function fstarpu_tensor_get_ldy(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ldy type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ldy function fstarpu_tensor_get_ldz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ldz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ldz function fstarpu_tensor_get_ldt(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ldt type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ldt function fstarpu_tensor_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_nx function fstarpu_tensor_get_ny(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ny type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ny function fstarpu_tensor_get_nz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_nz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_nz function fstarpu_tensor_get_nt(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_nt type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_nt ! == starpu_data_interface.h: block == ! void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize); subroutine fstarpu_block_data_register(dh, home_node, ptr, ldy, ldz, nx, ny, nz, elt_size) & bind(C,name="starpu_block_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz integer(c_int), value, intent(in) :: nx integer(c_int), value, intent(in) :: ny integer(c_int), value, intent(in) :: nz integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_block_data_register ! void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz); subroutine fstarpu_block_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz) & bind(C,name="starpu_block_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz end subroutine fstarpu_block_ptr_register function fstarpu_block_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_block_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ptr function fstarpu_block_get_ldy(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_ldy type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ldy function fstarpu_block_get_ldz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_ldz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ldz function fstarpu_block_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_nx function fstarpu_block_get_ny(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_ny type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ny function fstarpu_block_get_nz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_nz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_nz ! == starpu_data_interface.h: matrix == ! void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize); subroutine fstarpu_matrix_data_register(dh, home_node, ptr, ld, nx, ny, elt_size) & bind(C,name="starpu_matrix_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: ld integer(c_int), value, intent(in) :: nx integer(c_int), value, intent(in) :: ny integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_matrix_data_register ! void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld); subroutine fstarpu_matrix_ptr_register(dh, node, ptr, dev_handle, offset, ld) & bind(C,name="starpu_matrix_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset integer(c_int), value, intent(in) :: ld end subroutine fstarpu_matrix_ptr_register function fstarpu_matrix_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_matrix_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_ptr function fstarpu_matrix_get_ld(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_matrix_get_ld type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_ld function fstarpu_matrix_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_matrix_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_nx function fstarpu_matrix_get_ny(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_matrix_get_ny type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_ny ! == starpu_data_interface.h: vector == ! void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize); subroutine fstarpu_vector_data_register(dh, home_node, ptr,nx, elt_size) & bind(C,name="starpu_vector_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: nx integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_vector_data_register ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) & bind(C,name="starpu_vector_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset end subroutine fstarpu_vector_ptr_register function fstarpu_vector_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_vector_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_vector_get_ptr function fstarpu_vector_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_vector_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_vector_get_nx ! == starpu_data_interface.h: variable == ! void starpu_variable_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, size_t size); subroutine fstarpu_variable_data_register(dh, home_node, ptr, elt_size) & bind(C,name="starpu_variable_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_variable_data_register ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) & bind(C,name="starpu_variable_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset end subroutine fstarpu_variable_ptr_register function fstarpu_variable_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_variable_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_variable_get_ptr ! == starpu_data_interface.h: void == ! void starpu_void_data_register(starpu_data_handle_t *handle); subroutine fstarpu_void_data_register(dh) & bind(C,name="starpu_void_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh end subroutine fstarpu_void_data_register ! == starpu_data_filter.h == function fstarpu_data_filter_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_data_filter_allocate end function fstarpu_data_filter_allocate subroutine fstarpu_data_filter_free (filter) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: filter end subroutine fstarpu_data_filter_free ! Note: use fstarpu_df_alloc_ prefix instead of fstarpu_data_filter_allocate_ ! to fit within the Fortran id length limit */ function fstarpu_df_alloc_bcsr_filter_canonical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_bcsr_filter_canonical_block end function fstarpu_df_alloc_bcsr_filter_canonical_block function fstarpu_df_alloc_csr_filter_vertical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_csr_filter_vertical_block end function fstarpu_df_alloc_csr_filter_vertical_block function fstarpu_df_alloc_matrix_filter_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block end function fstarpu_df_alloc_matrix_filter_block function fstarpu_df_alloc_matrix_filter_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block_shadow end function fstarpu_df_alloc_matrix_filter_block_shadow function fstarpu_df_alloc_matrix_filter_vertical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block end function fstarpu_df_alloc_matrix_filter_vertical_block function fstarpu_df_alloc_matrix_filter_vertical_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block_shadow end function fstarpu_df_alloc_matrix_filter_vertical_block_shadow function fstarpu_df_alloc_vector_filter_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_block end function fstarpu_df_alloc_vector_filter_block function fstarpu_df_alloc_vector_filter_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_block_shadow end function fstarpu_df_alloc_vector_filter_block_shadow function fstarpu_df_alloc_vector_filter_list () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_list end function fstarpu_df_alloc_vector_filter_list function fstarpu_df_alloc_vector_filter_divide_in_2 () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_divide_in_2 end function fstarpu_df_alloc_vector_filter_divide_in_2 function fstarpu_df_alloc_block_filter_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_block end function fstarpu_df_alloc_block_filter_block function fstarpu_df_alloc_block_filter_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_block_shadow end function fstarpu_df_alloc_block_filter_block_shadow function fstarpu_df_alloc_block_filter_vertical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block end function fstarpu_df_alloc_block_filter_vertical_block function fstarpu_df_alloc_block_filter_vertical_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block_shadow end function fstarpu_df_alloc_block_filter_vertical_block_shadow subroutine fstarpu_data_filter_set_filter_func (filter, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: filter type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_data_filter_set_filter_func subroutine fstarpu_data_filter_set_nchildren (filter, nchildren) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: filter integer(c_int), value, intent(in) :: nchildren end subroutine fstarpu_data_filter_set_nchildren subroutine fstarpu_data_filter_set_get_nchildren_func (filter, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: filter type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_data_filter_set_get_nchildren_func subroutine fstarpu_data_filter_set_get_child_ops_func (filter, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: filter type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_data_filter_set_get_child_ops_func subroutine fstarpu_data_filter_set_filter_arg (filter, filter_arg) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: filter integer(c_int), value, intent(in) :: filter_arg end subroutine fstarpu_data_filter_set_filter_arg subroutine fstarpu_data_filter_set_filter_arg_ptr (filter, filter_arg_ptr) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: filter_arg_ptr end subroutine fstarpu_data_filter_set_filter_arg_ptr ! void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f); subroutine fstarpu_data_partition (dh,filter) bind(C,name="starpu_data_partition") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: filter end subroutine fstarpu_data_partition ! void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node); subroutine fstarpu_data_unpartition (root_dh,gathering_node) bind(C,name="starpu_data_unpartition") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: root_dh integer(c_int), value, intent(in) :: gathering_node end subroutine fstarpu_data_unpartition ! void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children); subroutine fstarpu_data_partition_plan (dh,filter,children) & bind(C,name="starpu_data_partition_plan") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: filter type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_plan ! void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_submit (dh,nparts,children) & bind(C,name="starpu_data_partition_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_submit ! void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_readonly_submit (dh,nparts,children) & bind(C,name="starpu_data_partition_readonly_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_readonly_submit ! void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_readwrite_upgrade_submit (dh,nparts,children) & bind(C,name="starpu_data_partition_readwrite_upgrade_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_readwrite_upgrade_submit ! void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); subroutine fstarpu_data_unpartition_submit (dh,nparts,children,gathering_node) & bind(C,name="starpu_data_unpartition_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) integer(c_int), value, intent(in) :: gathering_node end subroutine fstarpu_data_unpartition_submit ! void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); subroutine fstarpu_data_unpartition_readonly_submit (dh,nparts,children,gathering_node) & bind(C,name="starpu_data_unpartition_readonly_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) integer(c_int), value, intent(in) :: gathering_node end subroutine fstarpu_data_unpartition_readonly_submit ! void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_clean (dh,nparts,children) & bind(C,name="starpu_data_partition_clean") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_clean ! int starpu_data_get_nb_children(starpu_data_handle_t handle); function fstarpu_data_get_nb_children(dh) bind(C,name="starpu_data_get_nb_children") use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_data_get_nb_children type(c_ptr), value, intent(in) :: dh end function fstarpu_data_get_nb_children ! starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i); function fstarpu_data_get_child(dh,i) bind(C,name="starpu_data_get_child") use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_get_child type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: i end function fstarpu_data_get_child ! starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... ); ! . see: fstarpu_data_get_sub_data ! starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa); ! . see: fstarpu_data_get_sub_data ! note: defined in filters.c function fstarpu_data_get_sub_data (root_dh,depth,indices) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_get_sub_data type(c_ptr), value, intent(in) :: root_dh integer(c_int), value, intent(in) :: depth integer(c_int), intent(in) :: indices(*) end function fstarpu_data_get_sub_data ! void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...); ! . see fstarpu_data_map_filters ! void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa); ! . see fstarpu_data_map_filters ! note: defined in filters.c subroutine fstarpu_data_map_filters (root_dh,nfilters,filters) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: root_dh integer(c_int), value, intent(in) :: nfilters type(c_ptr), intent(in) :: filters(*) end subroutine fstarpu_data_map_filters ! void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_block ! void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_block_shadow ! void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_vertical_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_vertical_block ! void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_vertical_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_vertical_block_shadow ! void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_block ! void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_block_shadow ! void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_list_long (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_list_long") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_list_long ! void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_list (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_list") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_list ! void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_divide_in_2 (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_divide_in_2") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_divide_in_2 ! void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_block ! void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_block_shadow ! void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_vertical_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_vertical_block ! void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_vertical_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_vertical_block_shadow ! void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_depth_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_depth_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_depth_block ! void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_depth_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_depth_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_depth_block_shadow ! == starpu_data.h == ! void starpu_data_unregister(starpu_data_handle_t handle); subroutine fstarpu_data_unregister (dh) bind(C,name="starpu_data_unregister") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_unregister ! void starpu_data_unregister_no_coherency(starpu_data_handle_t handle); subroutine fstarpu_data_unregister_no_coherency (dh) bind(C,name="starpu_data_unregister_no_coherency") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_unregister_no_coherency ! void starpu_data_unregister_submit(starpu_data_handle_t handle); subroutine fstarpu_data_unregister_submit (dh) bind(C,name="starpu_data_unregister_submit") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_unregister_submit ! void starpu_data_deinitialize(starpu_data_handle_t handle); subroutine fstarpu_data_deinitialize (dh) bind(C,name="starpu_data_deinitialize") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_deinitialize ! void starpu_data_deinitialize_submit(starpu_data_handle_t handle); subroutine fstarpu_data_deinitialize_submit (dh) bind(C,name="starpu_data_deinitialize_submit") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_deinitialize_submit ! void starpu_data_invalidate(starpu_data_handle_t handle); subroutine fstarpu_data_invalidate (dh) bind(C,name="starpu_data_invalidate") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_invalidate ! void starpu_data_invalidate_submit(starpu_data_handle_t handle); subroutine fstarpu_data_invalidate_submit (dh) bind(C,name="starpu_data_invalidate_submit") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_invalidate_submit ! void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important); subroutine fstarpu_data_advise_as_important (dh,is_important) bind(C,name="starpu_data_advise_as_important") use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: is_important end subroutine fstarpu_data_advise_as_important ! starpu_data_acquire: see fstarpu_data_acquire subroutine fstarpu_data_acquire (dh, mode) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t end subroutine fstarpu_data_acquire ! int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); ! int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); ! int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); ! int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); ! int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); ! void starpu_data_release(starpu_data_handle_t handle); subroutine fstarpu_data_release (dh) bind(C,name="starpu_data_release") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_release ! void starpu_data_release_on_node(starpu_data_handle_t handle, int node); subroutine fstarpu_data_release_on_node (dh, node) bind(C,name="starpu_data_release_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpu_data_release_on_node ! starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC; function fstarpu_arbiter_create () bind(C,name="starpu_arbiter_create") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_arbiter_create end function fstarpu_arbiter_create ! void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter); subroutine fstarpu_data_assign_arbiter (dh,arbiter) bind(C,name="starpu_data_assign_arbiter") use iso_c_binding, only: c_ptr type(c_ptr), intent(out) :: dh type(c_ptr), value, intent(in) :: arbiter end subroutine fstarpu_data_assign_arbiter ! void starpu_arbiter_destroy(starpu_arbiter_t arbiter); subroutine fstarpu_arbiter_destroy (arbiter) bind(C,name="starpu_arbiter_destroy") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: arbiter end subroutine fstarpu_arbiter_destroy ! void starpu_data_display_memory_stats(); subroutine fstarpu_display_memory_stats() bind(C,name="starpu_display_memory_stats") end subroutine fstarpu_display_memory_stats ! int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node); subroutine fstarpu_data_request_allocation (dh, node) & bind(C,name="starpu_data_request_allocation") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpu_data_request_allocation ! int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); subroutine fstarpu_data_fetch_on_node (dh, node, async) & bind(C,name="starpu_data_fetch_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async end subroutine fstarpu_data_fetch_on_node ! int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); subroutine fstarpu_data_prefetch_on_node (dh, node, async) & bind(C,name="starpu_data_prefetch_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async end subroutine fstarpu_data_prefetch_on_node ! int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); subroutine fstarpu_data_prefetch_on_node_prio (dh, node, async, prio) & bind(C,name="starpu_data_prefetch_on_node_prio") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async integer(c_int), value, intent(in) :: prio end subroutine fstarpu_data_prefetch_on_node_prio ! int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); subroutine fstarpu_data_idle_prefetch_on_node (dh, node, async) & bind(C,name="starpu_data_idle_prefetch_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async end subroutine fstarpu_data_idle_prefetch_on_node ! int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); subroutine fstarpu_data_idle_prefetch_on_node_prio (dh, node, async, prio) & bind(C,name="starpu_data_idle_prefetch_on_node_prio") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async integer(c_int), value, intent(in) :: prio end subroutine fstarpu_data_idle_prefetch_on_node_prio !unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node); function fstarpu_data_is_on_node(dh, node) & bind(C,name="starpu_data_is_on_node") use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_data_is_on_node type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end function fstarpu_data_is_on_node ! void starpu_data_wont_use(starpu_data_handle_t handle); subroutine fstarpu_data_wont_use (dh) bind(c,name="starpu_data_wont_use") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_wont_use ! unsigned starpu_worker_get_memory_node(unsigned workerid); function fstarpu_worker_get_memory_node(id) bind(C,name="starpu_worker_get_memory_node") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_memory_node integer(c_int), value, intent(in) :: id end function fstarpu_worker_get_memory_node ! unsigned starpu_memory_nodes_get_count(void); function fstarpu_memory_nodes_get_count() bind(C,name="starpu_memory_nodes_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_memory_nodes_get_count end function fstarpu_memory_nodes_get_count ! enum starpu_node_kind starpu_node_get_kind(unsigned node); ! void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask); ! void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag); ! unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle); ! unsigned starpu_data_get_default_sequential_consistency_flag(void); ! void starpu_data_set_default_sequential_consistency_flag(unsigned flag); ! void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested); ! void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl); subroutine fstarpu_data_set_reduction_methods (dh,redux_cl,init_cl) bind(C,name="starpu_data_set_reduction_methods") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: redux_cl type(c_ptr), value, intent(in) :: init_cl end subroutine fstarpu_data_set_reduction_methods ! void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_args, struct starpu_codelet *init_cl, void *init_args) subroutine fstarpu_data_set_reduction_methods_with_args (dh,redux_cl,redux_args,init_cl,init_args) & bind(C,name="starpu_data_set_reduction_methods_with_args") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: redux_cl type(c_ptr), value, intent(in) :: redux_args type(c_ptr), value, intent(in) :: init_cl type(c_ptr), value, intent(in) :: init_args end subroutine fstarpu_data_set_reduction_methods_with_args ! struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle); ! unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node); function fstarpu_data_test_if_allocated_on_node(dh,mem_node) bind(C,name="starpu_data_test_if_allocated_on_node") use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_data_test_if_allocated_on_node type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: mem_node end function fstarpu_data_test_if_allocated_on_node ! void starpu_memchunk_tidy(unsigned memory_node); subroutine fstarpu_memchunk_tidy (mem_node) bind(c,name="starpu_memchunk_tidy") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: mem_node end subroutine fstarpu_memchunk_tidy ! == starpu_task_util.h == ! starpu_data_handle_t *fstarpu_data_handle_array_alloc(int nb); function fstarpu_data_handle_array_alloc (nb) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_handle_array_alloc integer(c_int), value, intent(in) :: nb end function fstarpu_data_handle_array_alloc ! void fstarpu_data_handle_array_free(starpu_data_handle_t *handles); subroutine fstarpu_data_handle_array_free (handles) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: handles end subroutine fstarpu_data_handle_array_free ! void fstarpu_data_handle_array_set(starpu_data_handle_t *handles, int i, starpu_data_handle_t handle); subroutine fstarpu_data_handle_array_set (handles, i, handle) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: handles integer(c_int), value, intent(in) :: i type(c_ptr), value, intent(in) :: handle end subroutine fstarpu_data_handle_array_set ! struct starpu_data_descr *fstarpu_data_descr_array_alloc(int nb); function fstarpu_data_descr_array_alloc (nb) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_descr_array_alloc integer(c_int), value, intent(in) :: nb end function fstarpu_data_descr_array_alloc ! struct starpu_data_descr *fstarpu_data_descr_alloc(void); function fstarpu_data_descr_alloc () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_data_descr_alloc end function fstarpu_data_descr_alloc ! void fstarpu_data_descr_array_free(struct starpu_data_descr *descrs); subroutine fstarpu_data_descr_array_free (descrs) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: descrs end subroutine fstarpu_data_descr_array_free ! void fstarpu_data_descr_free(struct starpu_data_descr *descr); subroutine fstarpu_data_descrg_free (descr) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: descr end subroutine fstarpu_data_descrg_free ! void fstarpu_data_descr_array_set(struct starpu_data_descr *descrs, int i, starpu_data_handle_t handle, intptr_t mode); subroutine fstarpu_data_descr_array_set (descrs, i, handle, mode) bind(C) use iso_c_binding, only: c_ptr, c_int, c_intptr_t type(c_ptr), value, intent(in) :: descrs integer(c_int), value, intent(in) :: i type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t end subroutine fstarpu_data_descr_array_set ! void fstarpu_data_descr_set(struct starpu_data_descr *descr, starpu_data_handle_t handle, intptr_t mode); subroutine fstarpu_data_descr_set (descr, handle, mode) bind(C) use iso_c_binding, only: c_ptr, c_intptr_t type(c_ptr), value, intent(in) :: descr type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t end subroutine fstarpu_data_descr_set subroutine fstarpu_task_insert(arglist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_task_insert subroutine fstarpu_insert_task(arglist) bind(C,name="fstarpu_task_insert") use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_insert_task subroutine fstarpu_unpack_arg(cl_arg,bufferlist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl_arg type(c_ptr), dimension(*), intent(in) :: bufferlist end subroutine fstarpu_unpack_arg ! void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg) subroutine fstarpu_create_sync_task(sync_tag, ndeps, tag_array, callback, callback_arg) & bind(C,name="starpu_create_sync_task") use iso_c_binding, only: c_int, c_long_long, c_ptr, c_funptr integer(c_int), value, intent(in) :: sync_tag integer(c_int), value, intent(in) :: ndeps integer(c_long_long), intent(in) :: tag_array(*) type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: callback_arg end subroutine fstarpu_create_sync_task ! == starpu_sched_ctx.h == ! starpu_sched_ctx_create: see fstarpu_sched_ctx_create function fstarpu_sched_ctx_create(workers_array,nworkers,ctx_name, arglist) bind(C) use iso_c_binding, only: c_int, c_char, c_ptr integer(c_int) :: fstarpu_sched_ctx_create integer(c_int), intent(in) :: workers_array(*) integer(c_int), value, intent(in) :: nworkers character(c_char), intent(in) :: ctx_name type(c_ptr), dimension(*), intent(in) :: arglist end function fstarpu_sched_ctx_create ! unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap); function fstarpu_sched_ctx_create_inside_interval(policy_name, sched_ctx_name, & min_ncpus, max_ncpus, min_ngpus, max_ngpus, allow_overlap) & bind(C,name="starpu_sched_ctx_create_inside_interval") use iso_c_binding, only: c_int, c_char integer(c_int) :: fstarpu_sched_ctx_create_inside_interval character(c_char), intent(in) :: policy_name character(c_char), intent(in) :: sched_ctx_name integer(c_int), value, intent(in) :: min_ncpus integer(c_int), value, intent(in) :: max_ncpus integer(c_int), value, intent(in) :: min_ngpus integer(c_int), value, intent(in) :: max_ngpus integer(c_int), value, intent(in) :: allow_overlap end function fstarpu_sched_ctx_create_inside_interval ! void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args); subroutine fstarpu_sched_ctx_register_close_callback (sched_ctx_id, close_callback, args) & bind(c,name="starpu_sched_ctx_register_close_callback") use iso_c_binding, only: c_ptr, c_funptr, c_int integer(c_int), value, intent(in) :: sched_ctx_id type(c_funptr), value, intent(in) :: close_callback type(c_ptr), value, intent(in) :: args end subroutine fstarpu_sched_ctx_register_close_callback ! void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_add_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_add_workers") use iso_c_binding, only: c_int integer(c_int), intent(in) :: workerids (*) integer(c_int), value, intent(in) :: nworkers integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_add_workers ! void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_remove_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_remove_workers") use iso_c_binding, only: c_int integer(c_int), intent(in) :: workerids (*) integer(c_int), value, intent(in) :: nworkers integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_remove_workers ! starpu_sched_ctx_display_workers: see fstarpu_sched_ctx_display_workers subroutine fstarpu_sched_ctx_display_workers (ctx) bind(C) use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_display_workers ! void starpu_sched_ctx_delete(unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_delete (ctx) bind(C,name="starpu_sched_ctx_delete") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_delete ! void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor); subroutine fstarpu_sched_ctx_set_inheritor (ctx,inheritor) bind(C,name="starpu_sched_ctx_set_inheritor") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx integer(c_int), value, intent(in) :: inheritor end subroutine fstarpu_sched_ctx_set_inheritor ! unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_inheritor (ctx) bind(C,name="starpu_sched_ctx_get_inheritor") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_inheritor integer(c_int), value, intent(in) :: ctx end function fstarpu_sched_ctx_get_inheritor ! unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_hierarchy_level (ctx) bind(C,name="starpu_sched_ctx_get_hierarchy_level") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_hierarchy_level integer(c_int), value, intent(in) :: ctx end function fstarpu_sched_ctx_get_hierarchy_level ! void starpu_sched_ctx_set_context(unsigned *sched_ctx_id); subroutine fstarpu_sched_ctx_set_context (ctx_ptr) bind(C,name="starpu_sched_ctx_set_context") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: ctx_ptr end subroutine fstarpu_sched_ctx_set_context ! unsigned starpu_sched_ctx_get_context(void); function fstarpu_sched_ctx_get_context () bind(C,name="starpu_sched_ctx_get_context") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_context end function fstarpu_sched_ctx_get_context ! void starpu_sched_ctx_stop_task_submission(void); subroutine fstarpu_sched_ctx_stop_task_submission () bind(c,name="starpu_sched_ctx_stop_task_submission") use iso_c_binding end subroutine fstarpu_sched_ctx_stop_task_submission ! void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_finished_submit (sched_ctx_id) bind(c,name="starpu_sched_ctx_finished_submit") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_finished_submit ! unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids); ! unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids); ! unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_nworkers (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_nworkers") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_nworkers integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_nworkers ! unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2); function fstarpu_sched_ctx_get_nshared_workers (sched_ctx_id, sched_ctx_id2) & bind(c,name="starpu_sched_ctx_get_nshared_workers") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_nshared_workers integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: sched_ctx_id2 end function fstarpu_sched_ctx_get_nshared_workers ! unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id); function fstarpu_sched_ctx_contains_worker (workerid, sched_ctx_id) & bind(c,name="starpu_sched_ctx_contains_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_contains_worker integer(c_int), value, intent(in) :: workerid integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_contains_worker ! unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id); function fstarpu_sched_ctx_contains_type_of_worker (arch, sched_ctx_id) & bind(c,name="starpu_sched_ctx_contains_type_of_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_contains_type_of_worker integer(c_int), value, intent(in) :: arch integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_contains_type_of_worker ! unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id); function fstarpu_sched_ctx_worker_get_id (sched_ctx_id) & bind(c,name="starpu_sched_ctx_worker_get_id") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_worker_get_id integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_worker_get_id ! unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task); function fstarpu_sched_ctx_get_ctx_for_task (task) & bind(c,name="starpu_sched_ctx_get_ctx_for_task") use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_sched_ctx_get_ctx_for_task type(c_ptr), value, intent(in) :: task end function fstarpu_sched_ctx_get_ctx_for_task ! unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid); function fstarpu_sched_ctx_overlapping_ctxs_on_worker (workerid) & bind(c,name="starpu_sched_ctx_overlapping_ctxs_on_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_overlapping_ctxs_on_worker integer(c_int), value, intent(in) :: workerid end function fstarpu_sched_ctx_overlapping_ctxs_on_worker ! int starpu_sched_get_min_priority(void); function fstarpu_sched_get_min_priority () & bind(c,name="starpu_sched_get_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_get_min_priority end function fstarpu_sched_get_min_priority ! int starpu_sched_get_max_priority(void); function fstarpu_sched_get_max_priority () & bind(c,name="starpu_sched_get_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_get_max_priority end function fstarpu_sched_get_max_priority ! int starpu_sched_set_min_priority(int min_prio); function fstarpu_sched_set_min_priority (min_prio) & bind(c,name="starpu_sched_set_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_set_min_priority integer(c_int), value, intent(in) :: min_prio end function fstarpu_sched_set_min_priority ! int starpu_sched_set_max_priority(int max_prio); function fstarpu_sched_set_max_priority (max_prio) & bind(c,name="starpu_sched_set_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_set_max_priority integer(c_int), value, intent(in) :: max_prio end function fstarpu_sched_set_max_priority ! int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_min_priority (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_min_priority integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_min_priority ! int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_max_priority (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_max_priority integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_max_priority ! int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio); function fstarpu_sched_ctx_set_min_priority (sched_ctx_id, min_prio) & bind(c,name="starpu_sched_ctx_set_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_set_min_priority integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: min_prio end function fstarpu_sched_ctx_set_min_priority ! int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio); function fstarpu_sched_ctx_set_max_priority (sched_ctx_id, max_prio) & bind(c,name="starpu_sched_ctx_set_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_set_max_priority integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: max_prio end function fstarpu_sched_ctx_set_max_priority ! int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id); function fstarpu_sched_ctx_min_priority_is_set (sched_ctx_id) & bind(c,name="starpu_sched_ctx_min_priority_is_set") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_min_priority_is_set integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_min_priority_is_set ! int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id); function fstarpu_sched_ctx_max_priority_is_set (sched_ctx_id) & bind(c,name="starpu_sched_ctx_max_priority_is_set") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_max_priority_is_set integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_max_priority_is_set ! void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_user_data(sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_user_data") use iso_c_binding, only: c_int, c_ptr integer(c_int), value, intent(in) :: sched_ctx_id type(c_ptr) :: fstarpu_sched_ctx_get_user_data end function fstarpu_sched_ctx_get_user_data ! struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC; ! void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_delete_worker_collection (sched_ctx_id) & bind(c,name="starpu_sched_ctx_delete_worker_collection") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_delete_worker_collection ! struct starpu_worker_collection *starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id); ! void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data); subroutine fstarpu_sched_ctx_set_policy_data (sched_ctx_id, policy_data) & bind(c,name="starpu_sched_ctx_set_policy_data") use iso_c_binding, only: c_int, c_ptr integer(c_int), value, intent(in) :: sched_ctx_id type(c_ptr), value, intent(in) :: policy_data end subroutine fstarpu_sched_ctx_set_policy_data ! void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_policy_data (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_policy_data") use iso_c_binding, only: c_int, c_ptr type(c_ptr) :: fstarpu_sched_ctx_get_policy_data integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_policy_data ! void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id); function fstarpu_sched_ctx_exec_parallel_code (func, param, sched_ctx_id) & bind(c,name="starpu_sched_ctx_exec_parallel_code") use iso_c_binding, only: c_int, c_funptr, c_ptr type(c_ptr) :: fstarpu_sched_ctx_exec_parallel_code type(c_funptr), value, intent(in) :: func type(c_ptr), value, intent(in) :: param integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_exec_parallel_code ! int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_nready_tasks (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_nready_tasks") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_nready_tasks integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_nready_tasks ! double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_nready_flops (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_nready_flops") use iso_c_binding, only: c_int, c_double real(c_double) :: fstarpu_sched_ctx_get_nready_flops integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_nready_flops ! void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid); subroutine fstarpu_sched_ctx_list_task_counters_increment (sched_ctx_id, workerid) & bind(c,name="starpu_sched_ctx_list_task_counters_increment") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: workerid end subroutine fstarpu_sched_ctx_list_task_counters_increment ! void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid); subroutine fstarpu_sched_ctx_list_task_counters_decrement (sched_ctx_id, workerid) & bind(c,name="starpu_sched_ctx_list_task_counters_decrement") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: workerid end subroutine fstarpu_sched_ctx_list_task_counters_decrement ! void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid); subroutine fstarpu_sched_ctx_list_task_counters_reset (sched_ctx_id, workerid) & bind(c,name="starpu_sched_ctx_list_task_counters_reset") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: workerid end subroutine fstarpu_sched_ctx_list_task_counters_reset ! void starpu_sched_ctx_list_task_counters_increment_all(struct starpu_task *task, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_list_task_counters_increment_all (task, sched_ctx_id) & bind(c,name="starpu_sched_ctx_list_task_counters_increment_all") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_list_task_counters_increment_all ! void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_list_task_counters_decrement_all (task, sched_ctx_id) & bind(c,name="starpu_sched_ctx_list_task_counters_decrement_all") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_list_task_counters_decrement_all ! void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_list_task_counters_reset_all (task, sched_ctx_id) & bind(c,name="starpu_sched_ctx_list_task_counters_reset_all") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_list_task_counters_reset_all ! unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id); function fstarpu_sched_ctx_get_priority (worker, sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_priority integer(c_int), value, intent(in) :: worker integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_priority ! void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids); ! void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid); subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid (cpuid) & bind(c,name="starpu_sched_ctx_bind_current_thread_to_cpuid") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: cpuid end subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid ! int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers); function fstarpu_sched_ctx_book_workers_for_task (sched_ctx_id, workerids, nworkers) & bind(c,name="starpu_sched_ctx_book_workers_for_task") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_book_workers_for_task integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), intent(in) :: workerids(*) integer(c_int), value, intent(in) :: nworkers end function fstarpu_sched_ctx_book_workers_for_task ! void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master); subroutine fstarpu_sched_ctx_unbook_workers_for_task (sched_ctx_id, master) & bind(c,name="starpu_sched_ctx_unbook_workers_for_task") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: master end subroutine fstarpu_sched_ctx_unbook_workers_for_task ! unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id); function fstarpu_sched_ctx_worker_is_master_for_child_ctx (workerid, sched_ctx_id) & bind(c,name="starpu_sched_ctx_worker_is_master_for_child_ctx") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_worker_is_master_for_child_ctx integer(c_int), value, intent(in) :: workerid integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_worker_is_master_for_child_ctx ! unsigned starpu_sched_ctx_master_get_context(int masterid); function fstarpu_sched_ctx_master_get_context (masterid) & bind(c,name="starpu_sched_ctx_master_get_context") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_master_get_context integer(c_int), value, intent(in) :: masterid end function fstarpu_sched_ctx_master_get_context ! void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double flops); subroutine fstarpu_sched_ctx_revert_task_counters (sched_ctx_id, flops) & bind(c,name="starpu_sched_ctx_revert_task_counters") use iso_c_binding, only: c_int, c_double integer(c_int), value, intent(in) :: sched_ctx_id real(c_double), value, intent(in) :: flops end subroutine fstarpu_sched_ctx_revert_task_counters ! void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_ctx, unsigned manage_mutex); subroutine fstarpu_sched_ctx_move_task_to_ctx (task, sched_ctx, manage_mutex) & bind(c,name="starpu_sched_ctx_move_task_to_ctx") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx integer(c_int), value, intent(in) :: manage_mutex end subroutine fstarpu_sched_ctx_move_task_to_ctx ! int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_worker_rank (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_worker_rank") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_worker_rank integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_worker_rank ! unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers); ! void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_call_pushed_task_cb (workerid, sched_ctx_id) & bind(c,name="starpu_sched_ctx_call_pushed_task_cb") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: workerid integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_call_pushed_task_cb ! == starpu_fxt.h == ! void starpu_fxt_options_init(struct starpu_fxt_options *options); subroutine fstarpu_fxt_options_init (fxt_options) bind(C,name="starpu_fxt_options_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: fxt_options end subroutine fstarpu_fxt_options_init ! void starpu_fxt_generate_trace(struct starpu_fxt_options *options); subroutine fstarpu_fxt_generate_trace (fxt_options) bind(C,name="starpu_fxt_generate_trace") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: fxt_options end subroutine fstarpu_fxt_generate_trace ! void starpu_fxt_autostart_profiling(int autostart); subroutine fstarpu_fxt_autostart_profiling (autostart) bind(c,name="starpu_fxt_autostart_profiling") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: autostart end subroutine fstarpu_fxt_autostart_profiling ! void starpu_fxt_start_profiling(void); subroutine fstarpu_fxt_start_profiling () bind(c,name="starpu_fxt_start_profiling") use iso_c_binding end subroutine fstarpu_fxt_start_profiling ! void starpu_fxt_stop_profiling(void); subroutine fstarpu_fxt_stop_profiling () bind(c,name="starpu_fxt_stop_profiling") use iso_c_binding end subroutine fstarpu_fxt_stop_profiling ! void starpu_fxt_write_data_trace(char *filename_in); subroutine fstarpu_fxt_write_data_trace (filename) bind(c,name="starpu_fxt_write_data_trace") use iso_c_binding, only: c_char character(c_char), intent(in) :: filename end subroutine fstarpu_fxt_write_data_trace ! void starpu_fxt_trace_user_event(unsigned long code); subroutine fstarpu_trace_user_event (code) bind(c,name="starpu_trace_user_event") use iso_c_binding, only: c_long integer(c_long), value, intent(in) :: code end subroutine fstarpu_trace_user_event ! double starpu_timing_now(void) function fstarpu_timing_now () bind(C,name="starpu_timing_now") use iso_c_binding, only: c_double real(c_double) :: fstarpu_timing_now end function fstarpu_timing_now ! == starpu_cuda.h == ! cudaStream_t starpu_cuda_get_local_stream(void); function fstarpu_cuda_get_local_stream () bind(C,name="starpu_cuda_get_local_stream") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_cuda_get_local_stream end function fstarpu_cuda_get_local_stream ! == starpu_stdlib.h == ! int starpu_malloc(void **A, size_t dim); function fstarpu_malloc (ptr, len) bind(C,name="starpu_malloc") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), intent(out) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_malloc end function fstarpu_malloc ! int starpu_free_noflag(void *A, size_t dim); function fstarpu_free_noflag (ptr, len) bind(C,name="starpu_free_noflag") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), value, intent(in) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_free_noflag end function fstarpu_free_noflag ! int starpu_memory_pin(void *addr, size_t size); function fstarpu_memory_pin (ptr, len) bind(C,name="starpu_memory_pin") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), value, intent(in) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_memory_pin end function fstarpu_memory_pin ! int starpu_memory_unpin(void *addr, size_t size); function fstarpu_memory_unpin (ptr, len) bind(C,name="starpu_memory_unpin") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), value, intent(in) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_memory_unpin end function fstarpu_memory_unpin ! int starpu_sleep(float nb_sec); subroutine fstarpu_sleep (nb_sec) bind(C,name="starpu_sleep") use iso_c_binding, only: c_float real(c_float), value, intent(in) :: nb_sec end subroutine fstarpu_sleep ! int starpu_usleep(float nb_sec); subroutine fstarpu_usleep (nb_sec) bind(C,name="starpu_usleep") use iso_c_binding, only: c_float real(c_float), value, intent(in) :: nb_sec end subroutine fstarpu_usleep ! void starpu_cublas_init(void); subroutine fstarpu_cublas_init () bind(C,name="starpu_cublas_init") end subroutine fstarpu_cublas_init ! void starpu_cublas_shutdown(void); subroutine fstarpu_cublas_shutdown () bind(C,name="starpu_cublas_shutdown") end subroutine fstarpu_cublas_shutdown end interface contains function or_cptrs(op1,op2) type(c_ptr) :: or_cptrs type(c_ptr),intent(in) :: op1,op2 integer(c_intptr_t) :: i_op1,i_op2 i_op1 = transfer(op1,0_c_intptr_t) i_op2 = transfer(op2,0_c_intptr_t) or_cptrs = transfer(ior(i_op1,i_op2), C_NULL_PTR) end function function ip_to_p(i) bind(C) use iso_c_binding, only: c_ptr,c_intptr_t,C_NULL_PTR type(c_ptr) :: ip_to_p integer(c_intptr_t), value, intent(in) :: i ip_to_p = transfer(i,C_NULL_PTR) end function ip_to_p function p_to_ip(p) bind(C) use iso_c_binding, only: c_ptr,c_intptr_t integer(c_intptr_t) :: p_to_ip type(c_ptr), value, intent(in) :: p p_to_ip = transfer(p,0_c_intptr_t) end function p_to_ip function sz_to_p(sz) bind(C) use iso_c_binding, only: c_ptr,c_size_t,c_intptr_t type(c_ptr) :: sz_to_p integer(c_size_t), value, intent(in) :: sz sz_to_p = ip_to_p(int(sz,kind=c_intptr_t)) end function sz_to_p function fstarpu_init (conf) bind(C) use iso_c_binding integer(c_int) :: fstarpu_init type(c_ptr), value, intent(in) :: conf real(c_double) :: FSTARPU_SZ_C_DOUBLE_dummy real(c_float) :: FSTARPU_SZ_C_FLOAT_dummy character(c_char) :: FSTARPU_SZ_C_CHAR_dummy integer(c_int) :: FSTARPU_SZ_C_INT_dummy integer(c_intptr_t) :: FSTARPU_SZ_C_INTPTR_T_dummy type(c_ptr) :: FSTARPU_SZ_C_PTR_dummy integer(c_size_t) :: FSTARPU_SZ_C_SIZE_T_dummy character :: FSTARPU_SZ_CHARACTER_dummy integer :: FSTARPU_SZ_INTEGER_dummy integer(4) :: FSTARPU_SZ_INT4_dummy integer(8) :: FSTARPU_SZ_INT8_dummy real :: FSTARPU_SZ_REAL_dummy real(4) :: FSTARPU_SZ_REAL4_dummy real(8) :: FSTARPU_SZ_REAL8_dummy double precision :: FSTARPU_SZ_DOUBLE_PRECISION_dummy complex :: FSTARPU_SZ_COMPLEX_dummy complex(4) :: FSTARPU_SZ_COMPLEX4_dummy complex(8) :: FSTARPU_SZ_COMPLEX8_dummy ! Note: Referencing global C constants from Fortran has ! been found unreliable on some architectures, notably ! on Darwin. The get_integer/get_pointer_constant ! scheme is a workaround to that issue. interface ! These functions are not exported to the end user function fstarpu_get_constant(s) bind(C) use iso_c_binding, only: c_ptr,c_char type(c_ptr) :: fstarpu_get_constant ! C function returns an intptr_t character(kind=c_char) :: s end function fstarpu_get_constant function fstarpu_init_internal (conf) bind(C,name="starpu_init") use iso_c_binding, only: c_ptr,c_int integer(c_int) :: fstarpu_init_internal type(c_ptr), value :: conf end function fstarpu_init_internal end interface ! Initialize Fortran constants from C peers FSTARPU_R = fstarpu_get_constant(C_CHAR_"FSTARPU_R"//C_NULL_CHAR) FSTARPU_W = fstarpu_get_constant(C_CHAR_"FSTARPU_W"//C_NULL_CHAR) FSTARPU_RW = fstarpu_get_constant(C_CHAR_"FSTARPU_RW"//C_NULL_CHAR) FSTARPU_SCRATCH = fstarpu_get_constant(C_CHAR_"FSTARPU_SCRATCH"//C_NULL_CHAR) FSTARPU_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_REDUX"//C_NULL_CHAR) FSTARPU_MPI_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_MPI_REDUX"//C_NULL_CHAR) FSTARPU_COMMUTE = fstarpu_get_constant(C_CHAR_"FSTARPU_COMMUTE"//C_NULL_CHAR) FSTARPU_SSEND = fstarpu_get_constant(C_CHAR_"FSTARPU_SSEND"//C_NULL_CHAR) FSTARPU_LOCALITY = fstarpu_get_constant(C_CHAR_"FSTARPU_LOCALITY"//C_NULL_CHAR) FSTARPU_DATA_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_ARRAY"//C_NULL_CHAR) FSTARPU_DATA_MODE_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_MODE_ARRAY"//C_NULL_CHAR) FSTARPU_CL_ARGS = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS"//C_NULL_CHAR) FSTARPU_CL_ARGS_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS_NFREE"//C_NULL_CHAR) FSTARPU_TASK_DEPS_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_DEPS_ARRAY"//C_NULL_CHAR) FSTARPU_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK"//C_NULL_CHAR) FSTARPU_CALLBACK_WITH_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG"//C_NULL_CHAR) FSTARPU_CALLBACK_WITH_ARG_NFREE = & fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG_NFREE"//C_NULL_CHAR) FSTARPU_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG"//C_NULL_CHAR) FSTARPU_CALLBACK_ARG_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG_NFREE"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE = & fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_POP = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_POP_ARG = & fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE = & fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE"//C_NULL_CHAR) FSTARPU_PRIORITY = fstarpu_get_constant(C_CHAR_"FSTARPU_PRIORITY"//C_NULL_CHAR) FSTARPU_EXECUTE_ON_NODE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_NODE"//C_NULL_CHAR) FSTARPU_EXECUTE_ON_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_DATA"//C_NULL_CHAR) FSTARPU_EXECUTE_ON_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_WORKER"//C_NULL_CHAR) FSTARPU_WORKER_ORDER = fstarpu_get_constant(C_CHAR_"FSTARPU_WORKER_ORDER"//C_NULL_CHAR) FSTARPU_EXECUTE_WHERE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_WHERE"//C_NULL_CHAR) FSTARPU_HYPERVISOR_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_HYPERVISOR_TAG"//C_NULL_CHAR) FSTARPU_POSSIBLY_PARALLEL = fstarpu_get_constant(C_CHAR_"FSTARPU_POSSIBLY_PARALLEL"//C_NULL_CHAR) FSTARPU_FLOPS = fstarpu_get_constant(C_CHAR_"FSTARPU_FLOPS"//C_NULL_CHAR) FSTARPU_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG"//C_NULL_CHAR) FSTARPU_TAG_ONLY = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG_ONLY"//C_NULL_CHAR) FSTARPU_NAME = fstarpu_get_constant(C_CHAR_"FSTARPU_NAME"//C_NULL_CHAR) FSTARPU_NODE_SELECTION_POLICY = fstarpu_get_constant(C_CHAR_"FSTARPU_NODE_SELECTION_POLICY"//C_NULL_CHAR) FSTARPU_TASK_SCHED_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_SCHED_DATA"//C_NULL_CHAR) FSTARPU_VALUE = fstarpu_get_constant(C_CHAR_"FSTARPU_VALUE"//C_NULL_CHAR) FSTARPU_SCHED_CTX = fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX"//C_NULL_CHAR) FSTARPU_CPU_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CPU_WORKER"//C_NULL_CHAR) FSTARPU_CUDA_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_WORKER"//C_NULL_CHAR) FSTARPU_OPENCL_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_WORKER"//C_NULL_CHAR) FSTARPU_ANY_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_ANY_WORKER"//C_NULL_CHAR) FSTARPU_NMAXBUFS = int(p_to_ip(fstarpu_get_constant(C_CHAR_"FSTARPU_NMAXBUFS"//C_NULL_CHAR)),c_int) FSTARPU_SCHED_CTX_POLICY_NAME = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_NAME"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_STRUCT = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_STRUCT"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_MIN_PRIO = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MIN_PRIO"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_MAX_PRIO = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MAX_PRIO"//C_NULL_CHAR) FSTARPU_SCHED_CTX_HIERARCHY_LEVEL = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_HIERARCHY_LEVEL"//C_NULL_CHAR) FSTARPU_SCHED_CTX_NESTED = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_NESTED"//C_NULL_CHAR) FSTARPU_SCHED_CTX_AWAKE_WORKERS = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_AWAKE_WORKERS"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_INIT = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_INIT"//C_NULL_CHAR) FSTARPU_SCHED_CTX_USER_DATA = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_USER_DATA"//C_NULL_CHAR) FSTARPU_NOWHERE = & fstarpu_get_constant(C_CHAR_"FSTARPU_NOWHERE"//C_NULL_CHAR) FSTARPU_CPU = & fstarpu_get_constant(C_CHAR_"FSTARPU_CPU"//C_NULL_CHAR) FSTARPU_CUDA = & fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA"//C_NULL_CHAR) FSTARPU_OPENCL = & fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL"//C_NULL_CHAR) FSTARPU_CODELET_SIMGRID_EXECUTE = & fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE"//C_NULL_CHAR) FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT = & fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT"//C_NULL_CHAR) FSTARPU_CUDA_ASYNC = & fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_ASYNC"//C_NULL_CHAR) FSTARPU_OPENCL_ASYNC = & fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_ASYNC"//C_NULL_CHAR) !FSTARPU_PER_WORKER = & ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_WORKER"//C_NULL_CHAR) !FSTARPU_PER_ARCH = & ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_ARCH"//C_NULL_CHAR) !FSTARPU_PER_COMMON = & ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_COMMON"//C_NULL_CHAR) FSTARPU_HISTORY_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_HISTORY_BASED"//C_NULL_CHAR) FSTARPU_REGRESSION_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_REGRESSION_BASED"//C_NULL_CHAR) FSTARPU_NL_REGRESSION_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_NL_REGRESSION_BASED"//C_NULL_CHAR) FSTARPU_MULTIPLE_REGRESSION_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_MULTIPLE_REGRESSION_BASED"//C_NULL_CHAR) FSTARPU_SEQ = & fstarpu_get_constant(C_CHAR_"FSTARPU_SEQ"//C_NULL_CHAR) FSTARPU_SPMD = & fstarpu_get_constant(C_CHAR_"FSTARPU_SPMD"//C_NULL_CHAR) FSTARPU_FORKJOIN = & fstarpu_get_constant(C_CHAR_"FSTARPU_FORKJOIN"//C_NULL_CHAR) ! Initialize size constants as 'c_ptr' FSTARPU_SZ_C_DOUBLE = sz_to_p(c_sizeof(FSTARPU_SZ_C_DOUBLE_dummy)) FSTARPU_SZ_C_FLOAT = sz_to_p(c_sizeof(FSTARPU_SZ_C_FLOAT_dummy)) FSTARPU_SZ_C_CHAR = sz_to_p(c_sizeof(FSTARPU_SZ_C_CHAR_dummy)) FSTARPU_SZ_C_INT = sz_to_p(c_sizeof(FSTARPU_SZ_C_INT_dummy)) FSTARPU_SZ_C_INTPTR_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_INTPTR_T_dummy)) FSTARPU_SZ_C_PTR = sz_to_p(c_sizeof(FSTARPU_SZ_C_PTR_dummy)) FSTARPU_SZ_C_SIZE_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_SIZE_T_dummy)) FSTARPU_SZ_CHARACTER = sz_to_p(c_sizeof(FSTARPU_SZ_CHARACTER_dummy)) FSTARPU_SZ_INTEGER = sz_to_p(c_sizeof(FSTARPU_SZ_INTEGER_dummy)) FSTARPU_SZ_INT4 = sz_to_p(c_sizeof(FSTARPU_SZ_INT4_dummy)) FSTARPU_SZ_INT8 = sz_to_p(c_sizeof(FSTARPU_SZ_INT8_dummy)) FSTARPU_SZ_REAL = sz_to_p(c_sizeof(FSTARPU_SZ_REAL_dummy)) FSTARPU_SZ_REAL4 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL4_dummy)) FSTARPU_SZ_REAL8 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL8_dummy)) FSTARPU_SZ_DOUBLE_PRECISION = sz_to_p(c_sizeof(FSTARPU_SZ_DOUBLE_PRECISION_dummy)) FSTARPU_SZ_COMPLEX = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX_dummy)) FSTARPU_SZ_COMPLEX4 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX4_dummy)) FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) FSTARPU_DEFAULT_PRIO = int(p_to_ip(& fstarpu_get_constant(C_CHAR_"FSTARPU_DEFAULT_PRIO"//C_NULL_CHAR)),c_int) ! Initialize StarPU if (c_associated(conf)) then fstarpu_init = fstarpu_init_internal(conf) else fstarpu_init = fstarpu_init_internal(C_NULL_PTR) end if end function fstarpu_init function fstarpu_csizet_to_cptr(i) bind(C) use iso_c_binding type(c_ptr) :: fstarpu_csizet_to_cptr integer(c_size_t) :: i fstarpu_csizet_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) end function fstarpu_csizet_to_cptr function fstarpu_int_to_cptr(i) bind(C) use iso_c_binding type(c_ptr) :: fstarpu_int_to_cptr integer(c_int) :: i fstarpu_int_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) end function fstarpu_int_to_cptr function fstarpu_long_to_cptr(i) bind(C) use iso_c_binding type(c_ptr) :: fstarpu_long_to_cptr integer(c_long) :: i fstarpu_long_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) end function fstarpu_long_to_cptr ! Note: do not add binding declarations here in 'CONTAINS' ! section, because the compiler generates empty functions for ! them. ! Instead, put binding declarations in the 'INTERFACE' section ! above. end module fstarpu_mod starpu-1.4.9+dfsg/examples/native_fortran/nf_codelets.f90000066400000000000000000000074321507764646700234620ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! module nf_codelets contains ! 'cl_vec' codelet routine ! ! Note: codelet routines must: ! . be declared recursive (~ 'reentrant routine') ! . be declared with the 'bind(C)' attribute for proper C interfacing recursive subroutine cl_cpu_func_vec (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused real(8), dimension(:), pointer :: va integer, dimension(:), pointer :: vb integer :: nx_va,nx_vb,i write(*,*) "task -->" ! get 'va' number of elements nx_va = fstarpu_vector_get_nx(buffers, 0) write(*,*) "nx_va" write(*,*) nx_va ! get 'vb' number of elements nx_vb = fstarpu_vector_get_nx(buffers, 1) write(*,*) "nx_vb" write(*,*) nx_vb ! get 'va' converted Fortran pointer call c_f_pointer(fstarpu_vector_get_ptr(buffers, 0), va, shape=[nx_va]) write(*,*) "va" do i=1,nx_va write(*,*) i,va(i) end do ! get 'vb' converted Fortran pointer call c_f_pointer(fstarpu_vector_get_ptr(buffers, 1), vb, shape=[nx_vb]) write(*,*) "vb" do i=1,nx_vb write(*,*) i,vb(i) end do write(*,*) "task <--" end subroutine cl_cpu_func_vec ! 'cl_mat' codelet routine recursive subroutine cl_cpu_func_mat (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused real(8), dimension(:,:), pointer :: ma integer, dimension(:,:), pointer :: mb integer :: ld_ma,nx_ma,ny_ma integer :: ld_mb,nx_mb,ny_mb integer :: i,j write(*,*) "task -->" ld_ma = fstarpu_matrix_get_ld(buffers, 0) nx_ma = fstarpu_matrix_get_nx(buffers, 0) ny_ma = fstarpu_matrix_get_ny(buffers, 0) write(*,*) "ld_ma" write(*,*) ld_ma write(*,*) "nx_ma" write(*,*) nx_ma write(*,*) "ny_ma" write(*,*) ny_ma ld_mb = fstarpu_matrix_get_ld(buffers, 1) nx_mb = fstarpu_matrix_get_nx(buffers, 1) ny_mb = fstarpu_matrix_get_ny(buffers, 1) write(*,*) "ld_mb" write(*,*) ld_mb write(*,*) "nx_mb" write(*,*) nx_mb write(*,*) "ny_mb" write(*,*) ny_mb call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), ma, shape=[ld_ma,ny_ma]) write(*,*) "ma" do i=1,nx_ma do j=1,ny_ma write(*,*) i,j,ma(i,j) end do write(*,*) '-' end do call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), mb, shape=[ld_mb,ny_mb]) write(*,*) "mb" do i=1,nx_mb do j=1,ny_mb write(*,*) i,j,mb(i,j) end do write(*,*) '-' end do write(*,*) "task <--" end subroutine cl_cpu_func_mat end module nf_codelets starpu-1.4.9+dfsg/examples/native_fortran/nf_compute.f90000066400000000000000000000105211507764646700233250ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! Copyright (C) 2015-2015 ONERA ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! ! Computation kernels for the simulation MODULE nf_compute USE nf_types USE fstarpu_mod USE iso_c_binding IMPLICIT NONE CONTAINS !--------------------------------------------------------------! SUBROUTINE init_element(ro,dro,basis,Neq_max,Np,Ng,i) INTEGER(KIND=C_INT),INTENT(IN) :: Neq_max,Np,Ng,i REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: ro,basis,dro !Local variables INTEGER(KIND=C_INT) :: n,nb,neq DO nb=1,Np DO neq= 1,Neq_max ro(neq,nb) = 0.01*(nb+neq)*i END DO END DO DO nb=1,Np DO neq= 1,Neq_max dro(neq,nb) = 0.05*(nb-neq)*i END DO END DO DO n=1,Ng DO nb=1,Np basis(nb,n) = 0.05*(n+nb)*i END DO END DO END SUBROUTINE init_element !--------------------------------------------------------------! RECURSIVE SUBROUTINE loop_element_cpu_fortran(buffers, cl_args) BIND(C) TYPE(C_PTR), VALUE, INTENT(IN) :: buffers, cl_args INTEGER(KIND=C_INT) :: Neq_max,Np,Ng REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER :: ro,dro,basis REAL(KIND=C_DOUBLE),TARGET :: coeff Neq_max = fstarpu_matrix_get_nx(buffers, 0) Np = fstarpu_matrix_get_nx(buffers, 2) Ng = fstarpu_matrix_get_ny(buffers, 2) CALL fstarpu_unpack_arg(cl_args,(/ c_loc(coeff) /)) CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), ro, shape=[Neq_max,Np]) CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), dro, shape=[Neq_max,Np]) CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 2), basis, shape=[Np,Ng]) CALL loop_element_cpu(ro,dro,basis,coeff,Neq_max,Ng,Np) END SUBROUTINE loop_element_cpu_fortran !--------------------------------------------------------------! RECURSIVE SUBROUTINE loop_element_cpu(ro,dro,basis,coeff,Neq_max,Ng,Np) REAL(KIND=C_DOUBLE),INTENT(IN) :: coeff INTEGER(KIND=C_INT),INTENT(IN) :: Neq_max,Ng,Np REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(IN) :: ro,basis REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: dro !Local variables REAL(KIND=C_DOUBLE) :: coeff2,r INTEGER(KIND=C_INT) :: n,nb,neq DO n=1,Ng r = 0. DO nb=1,Np DO neq= 1,Neq_max r = r + basis(nb,n) * ro(neq,nb) ENDDO ENDDO coeff2 = r + coeff DO nb=1,Np DO neq = 1,Neq_max dro(neq,nb) = coeff2 + dro(neq,nb) ENDDO ENDDO ENDDO END SUBROUTINE loop_element_cpu !--------------------------------------------------------------! RECURSIVE SUBROUTINE copy_element_cpu_fortran(buffers, cl_args) BIND(C) TYPE(C_PTR), VALUE, INTENT(IN) :: buffers, cl_args INTEGER(KIND=C_INT) :: Neq_max,Np REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER :: ro,dro Neq_max = fstarpu_matrix_get_nx(buffers, 0) Np = fstarpu_matrix_get_ny(buffers, 0) CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), ro, shape=[Neq_max,Np]) CALL c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), dro, shape=[Neq_max,Np]) CALL copy_element_cpu(ro,dro) END SUBROUTINE copy_element_cpu_fortran !--------------------------------------------------------------! RECURSIVE SUBROUTINE copy_element_cpu(ro,dro) REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(INOUT) :: ro REAL(KIND=C_DOUBLE),DIMENSION(:,:),POINTER,INTENT(IN) :: dro ro = ro + dro END SUBROUTINE copy_element_cpu END MODULE nf_compute starpu-1.4.9+dfsg/examples/native_fortran/nf_dynbuf.f90000066400000000000000000000054401507764646700231440ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! program nf_dynbuf use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module use nf_dynbuf_cl implicit none type(c_ptr) :: cl_dynbuf_big ! a pointer for the codelet structure type(c_ptr) :: dh_var type(c_ptr) :: descrs_var integer(c_int),target :: nbuffers integer(c_int) :: err ! return status for fstarpu_init integer(c_int) :: ncpu ! number of cpus workers integer(c_int),target :: var integer(c_int) :: i var = 42 ! initialize StarPU with default settings err = fstarpu_init(C_NULL_PTR) if (err == -19) then stop 77 end if ! stop there if no CPU worker available ncpu = fstarpu_cpu_worker_get_count() if (ncpu == 0) then call fstarpu_shutdown() stop 77 end if ! allocate an empty codelet structure cl_dynbuf_big = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(cl_dynbuf_big, C_CHAR_"dummy_big_kernel"//C_NULL_CHAR) call fstarpu_codelet_add_cpu_func(cl_dynbuf_big, C_FUNLOC(cl_cpu_func_dynbuf_big)) write(*,*) "FSTARPU_NMAXBUFS",FSTARPU_NMAXBUFS nbuffers = FSTARPU_NMAXBUFS+1 call fstarpu_codelet_set_nbuffers(cl_dynbuf_big, nbuffers) call fstarpu_variable_data_register(dh_var, 0, c_loc(var), c_sizeof(var)) descrs_var = fstarpu_data_descr_array_alloc(nbuffers) do i=0,nbuffers-1 call fstarpu_data_descr_array_set(descrs_var, i, dh_var, FSTARPU_RW) end do call fstarpu_task_insert((/ cl_dynbuf_big, & FSTARPU_VALUE, c_loc(nbuffers), FSTARPU_SZ_C_INT, & FSTARPU_DATA_MODE_ARRAY, descrs_var, c_loc(nbuffers), & C_NULL_PTR /)) call fstarpu_task_wait_for_all() call fstarpu_data_descr_array_free(descrs_var) call fstarpu_data_unregister(dh_var) ! free codelet structure call fstarpu_codelet_free(cl_dynbuf_big) ! shut StarPU down call fstarpu_shutdown() end program nf_dynbuf starpu-1.4.9+dfsg/examples/native_fortran/nf_dynbuf_cl.f90000066400000000000000000000027621507764646700236260ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! module nf_dynbuf_cl contains recursive subroutine cl_cpu_func_dynbuf_big (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int),target :: nb_data integer(c_int),pointer :: val integer(c_int) :: i call fstarpu_unpack_arg(cl_args,(/ c_loc(nb_data) /)) write(*,*) "number of data:", nb_data do i=0,nb_data-1 call c_f_pointer(fstarpu_variable_get_ptr(buffers, i), val) write(*,*) "i:", i, ", val:", val if (val /= 42) then stop 1 end if end do end subroutine cl_cpu_func_dynbuf_big end module nf_dynbuf_cl starpu-1.4.9+dfsg/examples/native_fortran/nf_example.f90000066400000000000000000000150001507764646700233010ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! Copyright (C) 2015-2015 ONERA ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! ! This is an example of Fortran90 program making use of StarPU. ! It registers a few matrices for each element of a domain, performs ! update computations on them, and checks the result. PROGRAM f90_example USE nf_types USE fstarpu_mod USE nf_compute USE iso_c_binding IMPLICIT NONE TYPE(type_mesh) :: mesh TYPE(type_numpar),TARGET :: numpar TYPE(type_mesh_elt),POINTER :: elt => NULL() INTEGER(KIND=C_INT) :: i,Nelt,res,cpus INTEGER(KIND=C_INT) :: starpu_maj,starpu_min,starpu_rev INTEGER(KIND=C_INT) :: it,it_tot INTEGER(KIND=C_INT), PARAMETER :: loop_color = INT(Z'7FFF00', KIND=C_INT) INTEGER(KIND=C_INT), PARAMETER :: copy_color = INT(Z'3F7FFF', KIND=C_INT) REAL(KIND=C_DOUBLE),TARGET :: flops INTEGER(C_INT), TARGET :: max_prio TYPE(C_PTR) :: cl_loop_element = C_NULL_PTR ! loop codelet TYPE(C_PTR) :: cl_copy_element = C_NULL_PTR ! copy codelet !Initialization with arbitrary data Nelt = 2 it_tot = 2 numpar%Neq_max = 5 numpar%coeff = 1.0 ALLOCATE(mesh%elt(Nelt)) DO i = 1,Nelt elt => mesh%elt(i) elt%Ng = 4 elt%Np = 2 ALLOCATE(elt%ro(numpar%Neq_max,elt%Np)) ALLOCATE(elt%dro(numpar%Neq_max,elt%Np)) ALLOCATE(elt%basis(elt%Np,elt%Ng)) CALL init_element(elt%ro,elt%dro,elt%basis,numpar%Neq_max,elt%Np,elt%Ng,i) ENDDO !Initialization of StarPU res = fstarpu_init(C_NULL_PTR) IF (res == -19) THEN STOP 77 END IF CALL fstarpu_get_version(starpu_maj,starpu_min,starpu_rev) WRITE(6,'(a,i4,a,i4,a,i4)') "StarPU version: ", starpu_maj , "." , starpu_min , "." , starpu_rev cpus = fstarpu_cpu_worker_get_count() IF (cpus == 0) THEN CALL fstarpu_shutdown() STOP 77 END IF max_prio = fstarpu_sched_get_max_priority() cl_loop_element = fstarpu_codelet_allocate() CALL fstarpu_codelet_add_cpu_func(cl_loop_element, C_FUNLOC(loop_element_cpu_fortran)) CALL fstarpu_codelet_add_buffer(cl_loop_element, FSTARPU_R) CALL fstarpu_codelet_add_buffer(cl_loop_element, FSTARPU_RW) CALL fstarpu_codelet_add_buffer(cl_loop_element, FSTARPU_R) CALL fstarpu_codelet_set_name(cl_loop_element, C_CHAR_"LOOP_ELEMENT"//C_NULL_CHAR) CALL fstarpu_codelet_set_color(cl_loop_element, loop_color) cl_copy_element = fstarpu_codelet_allocate() CALL fstarpu_codelet_add_cpu_func(cl_copy_element, C_FUNLOC(copy_element_cpu_fortran)) CALL fstarpu_codelet_add_buffer(cl_copy_element, FSTARPU_RW) CALL fstarpu_codelet_add_buffer(cl_copy_element, FSTARPU_R) CALL fstarpu_codelet_set_name(cl_copy_element, C_CHAR_"COPY_ELEMENT"//C_NULL_CHAR) CALL fstarpu_codelet_set_color(cl_copy_element, copy_color) !Registration of elements DO i = 1,Nelt elt => mesh%elt(i) call fstarpu_matrix_data_register(elt%ro_h, 0, c_loc(elt%ro), numpar%Neq_max, numpar%Neq_max, elt%Np, c_sizeof(elt%ro(1,1))) call fstarpu_matrix_data_register(elt%dro_h, 0, c_loc(elt%dro), numpar%Neq_max, numpar%Neq_max, elt%Np, c_sizeof(elt%dro(1,1))) call fstarpu_matrix_data_register(elt%basis_h, 0, c_loc(elt%basis), elt%Np, elt%Np, elt%Ng, c_sizeof(elt%basis(1,1))) ENDDO !Compute DO it = 1,it_tot ! compute new dro for each element DO i = 1,Nelt elt => mesh%elt(i) flops = elt%Ng * ( (elt%Np * numpar%Neq_max * 2) + 1 + elt%Np * numpar%Neq_max) CALL fstarpu_task_insert((/ cl_loop_element, & FSTARPU_VALUE, c_loc(numpar%coeff), FSTARPU_SZ_C_DOUBLE, & FSTARPU_R, elt%ro_h, & FSTARPU_RW, elt%dro_h, & FSTARPU_R, elt%basis_h, & FSTARPU_FLOPS, c_loc(flops), & FSTARPU_PRIORITY, c_loc(FSTARPU_DEFAULT_PRIO), & C_NULL_PTR /)) ENDDO ! sync (if needed by the algorithm) CALL fstarpu_task_wait_for_all() ! - - - - - ! copy dro to ro for each element DO i = 1,Nelt elt => mesh%elt(i) CALL fstarpu_task_insert((/ cl_copy_element, & FSTARPU_RW, elt%ro_h, & FSTARPU_R, elt%dro_h, & FSTARPU_PRIORITY, c_loc(max_prio), & C_NULL_PTR /)) ENDDO ! sync (if needed by the algorithm) CALL fstarpu_task_wait_for_all() ENDDO !Unregistration of elements DO i = 1,Nelt elt => mesh%elt(i) CALL fstarpu_data_unregister(elt%ro_h) CALL fstarpu_data_unregister(elt%dro_h) CALL fstarpu_data_unregister(elt%basis_h) ENDDO !Terminate StarPU, no task can be submitted after CALL fstarpu_shutdown() !Check data with StarPU WRITE(6,'(a)') " " WRITE(6,'(a)') " %%%% RESULTS STARPU %%%% " WRITE(6,'(a)') " " DO i = 1,Nelt WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = " WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro WRITE(6,'(a)') " ------------------------ " ENDDO !Same compute without StarPU DO i = 1,Nelt elt => mesh%elt(i) CALL init_element(elt%ro,elt%dro,elt%basis,numpar%Neq_max,elt%Np,elt%Ng,i) ENDDO DO it = 1, it_tot DO i = 1,Nelt elt => mesh%elt(i) CALL loop_element_cpu(elt%ro,elt%dro,elt%basis,numpar%coeff,numpar%Neq_max,elt%Ng,elt%Np) elt%ro = elt%ro + elt%dro ENDDO ENDDO WRITE(6,'(a)') " " WRITE(6,'(a)') " %%%% RESULTS VERIFICATION %%%% " WRITE(6,'(a)') " " DO i = 1,Nelt WRITE(6,'(a,i4,a)') " elt ", i , " ; elt%ro = " WRITE(6,'(10(1x,F11.2))') mesh%elt(i)%ro WRITE(6,'(a)') " ------------------------ " ENDDO WRITE(6,'(a)') " " !Deallocation CALL fstarpu_codelet_free(cl_loop_element) CALL fstarpu_codelet_free(cl_copy_element) DO i = 1,Nelt elt => mesh%elt(i) DEALLOCATE(elt%ro) DEALLOCATE(elt%dro) DEALLOCATE(elt%basis) ENDDO DEALLOCATE(mesh%elt) END PROGRAM f90_example starpu-1.4.9+dfsg/examples/native_fortran/nf_matrix.f90000066400000000000000000000077131507764646700231660ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! program nf_matrix use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module use nf_codelets implicit none real(8), dimension(:,:), allocatable, target :: ma integer, dimension(:,:), allocatable, target :: mb integer :: i,j type(c_ptr) :: cl_mat ! a pointer for the codelet structure type(c_ptr) :: dh_ma ! a pointer for the 'ma' vector data handle type(c_ptr) :: dh_mb ! a pointer for the 'mb' vector data handle integer(c_int) :: err ! return status for fstarpu_init integer(c_int) :: ncpu ! number of cpus workers real(c_double) :: start_time ! start clock in usec real(c_double) :: end_time ! end clock in usec allocate(ma(5,6)) do i=1,5 do j=1,6 ma(i,j) = (i*10)+j end do end do allocate(mb(7,8)) do i=1,7 do j=1,8 mb(i,j) = (i*10)+j end do end do ! initialize StarPU with default settings err = fstarpu_init(C_NULL_PTR) if (err == -19) then stop 77 end if ! stop there if no CPU worker available ncpu = fstarpu_cpu_worker_get_count() if (ncpu == 0) then call fstarpu_shutdown() stop 77 end if ! collect the start clock time start_time = fstarpu_timing_now() ! allocate an empty codelet structure cl_mat = fstarpu_codelet_allocate() ! set the codelet name call fstarpu_codelet_set_name(cl_mat, C_CHAR_"my_mat_codelet"//C_NULL_CHAR) ! add a CPU implementation function to the codelet call fstarpu_codelet_add_cpu_func(cl_mat, C_FUNLOC(cl_cpu_func_mat)) ! add a Read-only mode data buffer to the codelet call fstarpu_codelet_add_buffer(cl_mat, FSTARPU_R) ! add a Read-Write mode data buffer to the codelet call fstarpu_codelet_add_buffer(cl_mat, FSTARPU_RW) ! register 'ma', a vector of real(8) elements !dh_ma = fstarpu_matrix_data_register(c_loc(ma), 5, 5, 6, c_sizeof(ma(1,1)), 0) call fstarpu_matrix_data_register(dh_ma, 0, c_loc(ma), 5, 5, 6, c_sizeof(ma(1,1))) ! register 'mb', a vector of integer elements call fstarpu_matrix_data_register(dh_mb, 0, c_loc(mb), 7, 7, 8, c_sizeof(mb(1,1))) ! insert a task with codelet cl_mat, and vectors 'ma' and 'mb' ! ! Note: The array argument must follow the layout: ! (/ ! , ! [ [, ! [ [, ! [ [ #include #include #ifdef STARPU_QUICK_CHECK #define NX 2048 #else #define NX 2048000 #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void scal_cpu_func(void *buffers[], void *_args) { unsigned i; float *factor = _args, f = *factor; struct starpu_vector_interface *vector = buffers[0]; unsigned n = STARPU_VECTOR_GET_NX(vector); float *val = (float *)STARPU_VECTOR_GET_PTR(vector); FPRINTF(stderr, "running task with %d CPUs.\n", starpu_combined_worker_get_size()); #pragma omp parallel for num_threads(starpu_combined_worker_get_size()) for (i = 0; i < n; i++) { float v = val[i]; int j; for (j = 0; j < 100; j++) v = v * f; val[i] = v; } } static struct starpu_perfmodel vector_scal_model = { .type = STARPU_HISTORY_BASED, .symbol = "vector_scal_parallel" }; static struct starpu_codelet cl = { .modes = { STARPU_RW }, .type = STARPU_FORKJOIN, .max_parallelism = INT_MAX, .cpu_funcs = {scal_cpu_func}, .cpu_funcs_name = {"scal_cpu_func"}, .nbuffers = 1, .model = &vector_scal_model, }; int main(void) { struct starpu_conf conf; float *vector; unsigned i; int ret; starpu_conf_init(&conf); /* Most OpenMP implementations do not support concurrent parallel * sections, so only enable one combined worker at a time. */ conf.single_combined_worker = 1; conf.sched_policy_name = "pheft"; ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&vector, NX*sizeof(float)); for (i = 0; i < NX; i++) vector[i] = (i+1.0f); FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]); FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]); starpu_data_handle_t vector_handle; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); float factor = 1.001; for (i = 0; i < 100; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = vector_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(vector_handle); FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]); FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]); starpu_free_noflag(vector, NX*sizeof(float)); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); return 0; enodev: starpu_data_unregister(vector_handle); starpu_free_noflag(vector, NX*sizeof(float)); starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/examples/parallel_workers/000077500000000000000000000000001507764646700211765ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/parallel_workers/parallel_workers.c000066400000000000000000000077031507764646700247210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #if !defined(STARPU_PARALLEL_WORKER) int main(void) { return 77; } #else #ifdef STARPU_QUICK_CHECK #define NTASKS 8 #else #define NTASKS 32 #endif #define SIZE 4000 /* Codelet SUM */ static void sum_cpu(void * descr[], void *cl_arg) { double * v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]); double * v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]); double * v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[1]); int size; starpu_codelet_unpack_args(cl_arg, &size); fprintf(stderr, "sum_cpu\n"); int i, k; #pragma omp parallel fprintf(stderr, "hello from the task %d\n", omp_get_thread_num()); for (k=0;k<10;k++) { #pragma omp parallel for for (i=0; i 6) ? 1 : 0, /* Note that this mode requires that you put a prologue callback managing this on all tasks to be taken into account. */ STARPU_PROLOGUE_CALLBACK_POP, &starpu_parallel_worker_openmp_prologue, 0); if (ret == -ENODEV) goto out; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } out: /* wait for all tasks at the end*/ starpu_task_wait_for_all(); starpu_data_unregister(handle1); starpu_data_unregister(handle2); starpu_parallel_worker_shutdown(parallel_workers); starpu_shutdown(); return (ret == -ENODEV) ? 77 : 0 ; enodev: starpu_shutdown(); return 77; } #endif starpu-1.4.9+dfsg/examples/parallel_workers/parallel_workers_func.c000066400000000000000000000050321507764646700257250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #if !defined(STARPU_PARALLEL_WORKER) int main(void) { return 77; } #else static void display_cpu(void *descr[], void *cl_arg) { (void)descr; (void)cl_arg; #pragma omp parallel { #ifdef __linux__ fprintf(stderr, "thread %d on cpu %d\n", omp_get_thread_num(), sched_getcpu()); #endif } } static struct starpu_codelet display_cl = { .cpu_funcs = {display_cpu, NULL}, .nbuffers = 0, }; void bind_func(void *arg) { (void) arg; int workerid = starpu_worker_get_id_check(); if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) { struct starpu_task *task = starpu_task_get_current(); int sched_ctx = task->sched_ctx; int *cpuids = NULL; int ncpuids = 0; starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); omp_set_num_threads(ncpuids); #pragma omp parallel { starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); } free(cpuids); } return; } int main(void) { int ret, i; struct starpu_parallel_worker_config *parallel_workers; setenv("STARPU_NMPI_MS","0",1); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); parallel_workers = starpu_parallel_worker_init(HWLOC_OBJ_SOCKET, STARPU_PARALLEL_WORKER_POLICY_NAME, "dmdas", STARPU_PARALLEL_WORKER_CREATE_FUNC, &bind_func, STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG, NULL, 0); if (parallel_workers == NULL) goto enodev; starpu_parallel_worker_print(parallel_workers); ret = starpu_task_insert(&display_cl, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* wait for all tasks at the end*/ starpu_task_wait_for_all(); starpu_parallel_worker_shutdown(parallel_workers); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } #endif starpu-1.4.9+dfsg/examples/parallel_workers/parallel_workers_oldapi.c000066400000000000000000000027621507764646700262510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #if !defined(STARPU_PARALLEL_WORKER) int main(void) { return 77; } #else int main(void) { int ret; struct starpu_cluster_machine *clusters; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* We regroup resources under each sockets into a parallel worker. We express a partition * of one socket to create two internal parallel workers */ clusters = starpu_cluster_machine(HWLOC_OBJ_SOCKET, STARPU_CLUSTER_POLICY_NAME, "dmdas", STARPU_PARALLEL_WORKER_PARTITION_ONE, STARPU_PARALLEL_WORKER_NEW, STARPU_PARALLEL_WORKER_NB, 2, STARPU_PARALLEL_WORKER_NCORES, 1, 0); if (clusters != NULL) { starpu_cluster_print(clusters); starpu_uncluster_machine(clusters); } starpu_shutdown(); return 0; } #endif starpu-1.4.9+dfsg/examples/perf_monitoring/000077500000000000000000000000001507764646700210275ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/perf_monitoring/perf_counters_01.c000066400000000000000000000076251507764646700243630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include static void print_scope(const enum starpu_perf_counter_scope scope) { int nb = starpu_perf_counter_nb(scope); int i; printf("scope %s\n", starpu_perf_counter_scope_id_to_name(scope)); for (i=0; i #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) /* global counters */ static int id_g_total_submitted; static int id_g_peak_submitted; static int id_g_peak_ready; /* per worker counters */ static int id_w_total_executed; static int id_w_cumul_execution_time; /* per_codelet counters */ static int id_c_total_submitted; static int id_c_peak_submitted; static int id_c_peak_ready; static int id_c_total_executed; static int id_c_cumul_execution_time; void g_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context) { (void) listener; (void) context; int64_t g_total_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_g_total_submitted); int64_t g_peak_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_g_peak_submitted); int64_t g_peak_ready = starpu_perf_counter_sample_get_int64_value(sample, id_g_peak_ready); printf("global: g_total_submitted = %"PRId64", g_peak_submitted = %"PRId64", g_peak_ready = %"PRId64"\n", g_total_submitted, g_peak_submitted, g_peak_ready); } void w_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context) { (void) listener; (void) context; int workerid = starpu_worker_get_id(); int64_t w_total_executed = starpu_perf_counter_sample_get_int64_value(sample, id_w_total_executed); double w_cumul_execution_time = starpu_perf_counter_sample_get_double_value(sample, id_w_cumul_execution_time); printf("worker[%d]: w_total_executed = %"PRId64", w_cumul_execution_time = %lf\n", workerid, w_total_executed, w_cumul_execution_time); } void c_listener_cb(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context) { (void) listener; struct starpu_codelet *cl = context; int64_t c_total_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_c_total_submitted); int64_t c_peak_submitted = starpu_perf_counter_sample_get_int64_value(sample, id_c_peak_submitted); int64_t c_peak_ready = starpu_perf_counter_sample_get_int64_value(sample, id_c_peak_ready); int64_t c_total_executed = starpu_perf_counter_sample_get_int64_value(sample, id_c_total_executed); double c_cumul_execution_time = starpu_perf_counter_sample_get_double_value(sample, id_c_cumul_execution_time); if (cl->name != NULL) { printf("codelet[%s]: c_total_submitted = %"PRId64", c_peak_submitted = %"PRId64", c_peak_ready = %"PRId64", c_total_executed = %"PRId64", c_cumul_execution_time = %lf\n", cl->name, c_total_submitted, c_peak_submitted, c_peak_ready, c_total_executed, c_cumul_execution_time); } else { printf("codelet[%p]: c_total_submitted = %"PRId64", c_peak_submitted = %"PRId64", c_peak_ready = %"PRId64", c_total_executed = %"PRId64", c_cumul_execution_time = %lf\n", cl, c_total_submitted, c_peak_submitted, c_peak_ready, c_total_executed, c_cumul_execution_time); } } void func(void *buffers[], void *cl_args) { int *int_vector = (int*)STARPU_VECTOR_GET_PTR(buffers[0]); int NX = (int)STARPU_VECTOR_GET_NX(buffers[0]); const int niters; starpu_codelet_unpack_args(cl_args, &niters); int i; for (i=0; i #include #include static void print_scope(const enum starpu_perf_knob_scope scope) { int nb = starpu_perf_knob_nb(scope); int i; printf("scope %s\n", starpu_perf_knob_scope_id_to_name(scope)); for (i=0; i #include #include int main(int argc, char **argv) { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); { const char * const knob_name = "starpu.global.g_calibrate_knob"; const char * const knob_scope_name = "global"; const char * const knob_type_name = "int32"; int32_t val, val_save; const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); printf("%s:\n", knob_name); val_save = val = starpu_perf_knob_get_global_int32_value(id); printf("- %d\n", val); starpu_perf_knob_set_global_int32_value(id, 1); val = starpu_perf_knob_get_global_int32_value(id); printf("- %d\n", val); STARPU_ASSERT(val == 1); starpu_perf_knob_set_global_int32_value(id, 0); val = starpu_perf_knob_get_global_int32_value(id); printf("- %d\n", val); STARPU_ASSERT(val == 0); starpu_perf_knob_set_global_int32_value(id, val_save); val = starpu_perf_knob_get_global_int32_value(id); printf("- %d\n", val); STARPU_ASSERT(val == val_save); } { const char * const knob_name = "starpu.global.g_enable_catch_signal_knob"; const char * const knob_scope_name = "global"; const char * const knob_type_name = "int32"; int32_t val, val_save; const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); printf("%s:\n", knob_name); val_save = val = starpu_perf_knob_get_global_int32_value(id); printf("- %d\n", val); starpu_perf_knob_set_global_int32_value(id, 1); val = starpu_perf_knob_get_global_int32_value(id); printf("- %d\n", val); STARPU_ASSERT(val == 1); starpu_perf_knob_set_global_int32_value(id, 0); val = starpu_perf_knob_get_global_int32_value(id); printf("- %d\n", val); STARPU_ASSERT(val == 0); starpu_perf_knob_set_global_int32_value(id, val_save); val = starpu_perf_knob_get_global_int32_value(id); printf("- %d\n", val); STARPU_ASSERT(val == val_save); } { const char * const knob_name = "starpu.worker.w_bind_to_pu_knob"; const char * const knob_scope_name = "per_worker"; const char * const knob_type_name = "int32"; int32_t val; const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); printf("%s:\n", knob_name); unsigned int ncpu = starpu_cpu_worker_get_count(); unsigned int i; for (i=0; i= 0); printf("- %u: %d\n", i, val); } } { const char * const knob_name = "starpu.task.s_max_priority_cap_knob"; const char * const knob_scope_name = "per_scheduler"; const char * const knob_type_name = "int32"; int32_t val; const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); printf("%s:\n", knob_name); val = starpu_perf_knob_get_per_scheduler_int32_value(id, "prio"); printf("- %d\n", val); } { const char * const knob_name = "starpu.task.s_min_priority_cap_knob"; const char * const knob_scope_name = "per_scheduler"; const char * const knob_type_name = "int32"; int32_t val; const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); printf("%s:\n", knob_name); val = starpu_perf_knob_get_per_scheduler_int32_value(id, "prio"); printf("- %d\n", val); } starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/perf_steering/perf_knobs_03.c000066400000000000000000000126661507764646700232730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #define NTASKS 100 volatile int task_count[2]; void cpu_func(void *buffer[], void *cl_arg) { (void)buffer; (void)cl_arg; int workerid = starpu_worker_get_id(); STARPU_ASSERT(workerid == 0 || workerid == 1); task_count[workerid]++; } int main(int argc, char **argv) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = 2; { const char *sched_pol_name = starpu_getenv("STARPU_SCHED"); if (sched_pol_name != NULL && strcmp(sched_pol_name, "prio") != 0) { fprintf(stderr, "example uses 'prio' scheduling policy.\n"); return 77; } } conf.sched_policy_name = "prio"; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() != 2 || starpu_cuda_worker_get_count() != 0 || starpu_opencl_worker_get_count() != 0 || starpu_mpi_ms_worker_get_count() != 0) { starpu_shutdown(); fprintf(stderr, "example needs exactly two cpu cores.\n"); return 77; } { const char * const max_prio_knob_name = "starpu.task.s_max_priority_cap_knob"; const char * const min_prio_knob_name = "starpu.task.s_min_priority_cap_knob"; const char * const knob_scope_name = "per_scheduler"; const char * const knob_type_name = "int32"; int32_t max_prio_val; int32_t min_prio_val; const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); const int max_prio_id = starpu_perf_knob_name_to_id(scope_id, max_prio_knob_name); STARPU_ASSERT(starpu_perf_knob_get_type_id(max_prio_id) == starpu_perf_knob_type_name_to_id(knob_type_name)); const int min_prio_id = starpu_perf_knob_name_to_id(scope_id, min_prio_knob_name); STARPU_ASSERT(starpu_perf_knob_get_type_id(min_prio_id) == starpu_perf_knob_type_name_to_id(knob_type_name)); printf("%s:\n", max_prio_knob_name); max_prio_val = starpu_perf_knob_get_per_scheduler_int32_value(max_prio_id, "prio"); printf("- %d\n", max_prio_val); printf("%s:\n", min_prio_knob_name); min_prio_val = starpu_perf_knob_get_per_scheduler_int32_value(min_prio_id, "prio"); printf("- %d\n", min_prio_val); STARPU_ASSERT(max_prio_val >= min_prio_val); if (min_prio_val > 0) { starpu_perf_knob_set_per_scheduler_int32_value(min_prio_id, "prio", 0); starpu_perf_knob_set_per_scheduler_int32_value(max_prio_id, "prio", 0); } else { starpu_perf_knob_set_per_scheduler_int32_value(max_prio_id, "prio", 0); starpu_perf_knob_set_per_scheduler_int32_value(min_prio_id, "prio", 0); } printf("%s:\n", max_prio_knob_name); max_prio_val = starpu_perf_knob_get_per_scheduler_int32_value(max_prio_id, "prio"); printf("- %d\n", max_prio_val); printf("%s:\n", min_prio_knob_name); min_prio_val = starpu_perf_knob_get_per_scheduler_int32_value(min_prio_id, "prio"); printf("- %d\n", min_prio_val); STARPU_ASSERT(max_prio_val == 0); STARPU_ASSERT(min_prio_val == 0); } { const char * const knob_name = "starpu.worker.w_enable_worker_knob"; const char * const knob_scope_name = "per_worker"; const char * const knob_type_name = "int32"; int32_t val; const int scope_id = starpu_perf_knob_scope_name_to_id(knob_scope_name); const int id = starpu_perf_knob_name_to_id(scope_id, knob_name); STARPU_ASSERT(starpu_perf_knob_get_type_id(id) == starpu_perf_knob_type_name_to_id(knob_type_name)); struct starpu_codelet cl = { .cpu_funcs = {cpu_func} }; task_count[0] = 0; task_count[1] = 0; val = starpu_perf_knob_get_per_worker_int32_value(id, 0); STARPU_ASSERT(val == 1); val = starpu_perf_knob_get_per_worker_int32_value(id, 1); STARPU_ASSERT(val == 1); starpu_perf_knob_set_per_worker_int32_value(id, 1, 0); val = starpu_perf_knob_get_per_worker_int32_value(id, 1); STARPU_ASSERT(val == 0); int i; for (i=0; i select the number of tasks\n"); fprintf(stderr,"-nshot select the number of shot per task\n"); exit(0); } } } static struct starpu_perfmodel model = { .type = STARPU_HISTORY_BASED, .size_base = size_base, .symbol = "monte_carlo_pi" }; static struct starpu_codelet pi_cl = { .cpu_funcs = {cpu_kernel}, .cpu_funcs_name = {"cpu_kernel"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_kernel}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .model = &model }; int main(int argc, char **argv) { unsigned i; int ret; parse_args(argc, argv); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Initialize the random number generator */ unsigned *sobol_qrng_directions = malloc(n_dimensions*n_directions*sizeof(unsigned)); STARPU_ASSERT(sobol_qrng_directions); initSobolDirectionVectors(n_dimensions, sobol_qrng_directions); /* Any worker may use that array now */ starpu_data_handle_t sobol_qrng_direction_handle; starpu_vector_data_register(&sobol_qrng_direction_handle, STARPU_MAIN_RAM, (uintptr_t)sobol_qrng_directions, n_dimensions*n_directions, sizeof(unsigned)); unsigned *cnt_array = calloc(ntasks, sizeof(unsigned)); STARPU_ASSERT(cnt_array); starpu_data_handle_t cnt_array_handle; starpu_vector_data_register(&cnt_array_handle, STARPU_MAIN_RAM, (uintptr_t)cnt_array, ntasks, sizeof(unsigned)); /* Use a write-through policy : when the data is modified on an * accelerator, we know that it will only be modified once and be * accessed by the CPU later on */ starpu_data_set_wt_mask(cnt_array_handle, (1<cl = &pi_cl; STARPU_ASSERT(starpu_data_get_sub_data(cnt_array_handle, 1, i)); task->handles[0] = sobol_qrng_direction_handle; task->handles[1] = starpu_data_get_sub_data(cnt_array_handle, 1, i); ret = starpu_task_submit(task); STARPU_ASSERT(!ret); } starpu_task_wait_for_all(); /* Get the cnt_array back in main memory */ starpu_data_unpartition(cnt_array_handle, STARPU_MAIN_RAM); starpu_data_unregister(cnt_array_handle); starpu_data_unregister(sobol_qrng_direction_handle); /* Count the total number of entries */ unsigned long total_cnt = 0; for (i = 0; i < ntasks; i++) total_cnt += cnt_array[i]; end = starpu_timing_now(); double timing = end - start; unsigned long total_shot_cnt = ntasks * nshot_per_task; /* Total surface : Pi * r^ 2 = Pi*1^2, total square surface : 2^2 = 4, probability to impact the disk: pi/4 */ FPRINTF(stderr, "Pi approximation : %f (%lu / %lu)\n", ((TYPE)total_cnt*4)/(total_shot_cnt), total_cnt, total_shot_cnt); FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0); FPRINTF(stderr, "Speed : %f GShot/s\n", total_shot_cnt/(1e3*timing)); if (!getenv("STARPU_SSILENT")) starpu_codelet_display_stats(&pi_cl); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/pi/pi.h000066400000000000000000000016051507764646700170210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __PI_H__ #define __PI_H__ #include #include #define TYPE float /* extern "C" void cuda_kernel(void *descr[], void *cl_arg); */ static int n_dimensions = 100; #endif /* __PI_H__ */ starpu-1.4.9+dfsg/examples/pi/pi_kernel.cu000066400000000000000000000107511507764646700205430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* First draw a series of coordinates, then count how many fall inside the * circle quarter */ #include "SobolQRNG/sobol_gpu.h" #include "pi.h" #define MAXNBLOCKS 128 #define MAXTHREADSPERBLOCK 256 static __global__ void monte_carlo(TYPE *random_numbers_x, TYPE *random_numbers_y, unsigned n, unsigned *output_cnt) { __shared__ unsigned scnt[MAXTHREADSPERBLOCK]; /* Do we have a successful shot ? */ const int tid = threadIdx.x + blockIdx.x*blockDim.x; const int nthreads = gridDim.x * blockDim.x; /* Blank the shared mem buffer */ if (threadIdx.x < MAXTHREADSPERBLOCK) scnt[threadIdx.x] = 0; __syncthreads(); int ind; for (ind = tid; ind < n; ind += nthreads) { TYPE x = random_numbers_x[ind]; TYPE y = random_numbers_y[ind]; TYPE dist = (x*x + y*y); unsigned success = (dist <= 1.0f)?1:0; scnt[threadIdx.x] += success; } __syncthreads(); /* Perform a reduction to compute the sum on each thread within that block */ /* NB: We assume that the number of threads per block is a power of 2 ! */ unsigned s; for (s = blockDim.x/2; s!=0; s>>=1) { if (threadIdx.x < s) scnt[threadIdx.x] += scnt[threadIdx.x + s]; __syncthreads(); } /* report the number of successful shots in the block */ if (threadIdx.x == 0) output_cnt[blockIdx.x] = scnt[0]; __syncthreads(); } static __global__ void sum_per_block_cnt(unsigned *output_cnt, unsigned *cnt) { __shared__ unsigned accumulator[MAXNBLOCKS]; unsigned i; /* Load the values from global mem */ for (i = 0; i < blockDim.x; i++) accumulator[i] = output_cnt[i]; __syncthreads(); /* Perform a reduction in shared memory */ unsigned s; for (s = blockDim.x/2; s!=0; s>>=1) { if (threadIdx.x < s) accumulator[threadIdx.x] += accumulator[threadIdx.x + s]; __syncthreads(); } /* Save the result in global memory */ if (threadIdx.x == 0) *cnt = accumulator[0]; } extern "C" void cuda_kernel(void *descr[], void *cl_arg) { cudaError_t cures; unsigned *directions = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned long long *nshot_per_task = (unsigned long long *) cl_arg; unsigned nx = *nshot_per_task; /* Generate Random numbers */ float *random_numbers; cudaMalloc((void **)&random_numbers, 2*nx*sizeof(float)); STARPU_ASSERT(random_numbers); sobolGPU(2*nx/n_dimensions, n_dimensions, directions, random_numbers); cudaStreamSynchronize(starpu_cuda_get_local_stream()); TYPE *random_numbers_x = &random_numbers[0]; TYPE *random_numbers_y = &random_numbers[nx]; unsigned *cnt = (unsigned *)STARPU_VECTOR_GET_PTR(descr[1]); /* How many blocks do we use ? */ unsigned nblocks = 128; // TODO STARPU_ASSERT(nblocks <= MAXNBLOCKS); unsigned *per_block_cnt; cudaMalloc((void **)&per_block_cnt, nblocks*sizeof(unsigned)); STARPU_ASSERT((nx % nblocks) == 0); /* How many threads per block ? At most 256, but no more threads than * there are entries to process per block. */ unsigned nthread_per_block = STARPU_MIN(MAXTHREADSPERBLOCK, (nx / nblocks)); /* each entry of per_block_cnt contains the number of successful shots * in the corresponding block. */ monte_carlo<<>>(random_numbers_x, random_numbers_y, nx, per_block_cnt); cures = cudaGetLastError(); if (cures != cudaSuccess) STARPU_CUDA_REPORT_ERROR(cures); /* Note that we do not synchronize between kernel calls because there is an implicit serialization */ /* compute the total number of successful shots by adding the elements * of the per_block_cnt array */ sum_per_block_cnt<<<1, nblocks, 0, starpu_cuda_get_local_stream()>>>(per_block_cnt, cnt); cures = cudaGetLastError(); if (cures != cudaSuccess) STARPU_CUDA_REPORT_ERROR(cures); cures = cudaStreamSynchronize(starpu_cuda_get_local_stream()); if (cures) STARPU_CUDA_REPORT_ERROR(cures); cudaFree(per_block_cnt); cudaFree(random_numbers); } starpu-1.4.9+dfsg/examples/pi/pi_redux.c000066400000000000000000000254411507764646700202270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This computes Pi by using drawing random coordinates (thanks to the sobol * generator) and check whether they fall within one quarter of a circle. The * proportion gives an approximation of Pi. For each task, we draw a number of * coordinates, and we gather the number of successful draws. * * This version uses reduction to optimize gathering the number of successful * draws. */ #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define PI 3.14159265358979323846 #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CURAND) #warning CURAND is required to run that example on CUDA devices #endif #ifdef STARPU_HAVE_CURAND #include #include #endif static unsigned long long nshot_per_task = 16*1024*1024ULL; /* default value */ static unsigned long ntasks = 1024; static unsigned long ntasks_warmup = 0; static unsigned use_redux = 1; static unsigned do_warmup = 0; /* * Initialization of the Random Number Generators (RNG) */ #ifdef STARPU_HAVE_CURAND /* RNG for the CURAND library */ static curandGenerator_t curandgens[STARPU_NMAXWORKERS]; #endif /* state for the erand48 function : note the huge padding to avoid false-sharing */ #define PADDING 1024 static unsigned short xsubi[STARPU_NMAXWORKERS*PADDING]; static starpu_drand48_data randbuffer[STARPU_NMAXWORKERS*PADDING]; /* Function to initialize the random number generator in the current worker */ static void init_rng(void *arg) { (void)arg; #ifdef STARPU_HAVE_CURAND curandStatus_t res; #endif int workerid = starpu_worker_get_id_check(); switch (starpu_worker_get_type(workerid)) { case STARPU_CPU_WORKER: /* create a seed */ starpu_srand48_r((long int)workerid, &randbuffer[PADDING*workerid]); xsubi[0 + PADDING*workerid] = (unsigned short)workerid; xsubi[1 + PADDING*workerid] = (unsigned short)workerid; xsubi[2 + PADDING*workerid] = (unsigned short)workerid; break; #ifdef STARPU_HAVE_CURAND case STARPU_CUDA_WORKER: /* Create a RNG */ res = curandCreateGenerator(&curandgens[workerid], CURAND_RNG_PSEUDO_DEFAULT); STARPU_ASSERT(res == CURAND_STATUS_SUCCESS); /* Seed it with worker's id */ res = curandSetPseudoRandomGeneratorSeed(curandgens[workerid], (unsigned long long)workerid); STARPU_ASSERT(res == CURAND_STATUS_SUCCESS); break; #endif default: STARPU_ABORT(); break; } } /* The amount of work does not depend on the data size at all :) */ static size_t size_base(struct starpu_task *task, unsigned nimpl) { (void)task; (void)nimpl; return nshot_per_task; } static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-ntasks") == 0) { char *argptr; ntasks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nshot") == 0) { char *argptr; nshot_per_task = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-noredux") == 0) { use_redux = 0; } if (strcmp(argv[i], "-warmup") == 0) { do_warmup = 1; ntasks_warmup = 8; /* arbitrary number of warmup tasks */ } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { fprintf(stderr, "Usage: %s [-ntasks n] [-noredux] [-warmup] [-h]\n", argv[0]); exit(-1); } } } /* * Monte-carlo kernel */ void pi_func_cpu(void *descr[], void *cl_arg) { (void)cl_arg; int workerid = starpu_worker_get_id_check(); unsigned short *worker_xsub; worker_xsub = &xsubi[PADDING*workerid]; starpu_drand48_data *buffer; buffer = &randbuffer[PADDING*workerid]; unsigned long local_cnt = 0; /* Fill the scratchpad with random numbers */ unsigned i; for (i = 0; i < nshot_per_task; i++) { double randx, randy; starpu_erand48_r(worker_xsub, buffer, &randx); starpu_erand48_r(worker_xsub, buffer, &randy); double x = (2.0*randx - 1.0); double y = (2.0*randy - 1.0); double dist = x*x + y*y; if (dist < 1.0) local_cnt++; } /* Put the contribution of that task into the counter */ unsigned long *cnt = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[1]); *cnt = *cnt + local_cnt; } extern void pi_redux_cuda_kernel(float *x, float *y, unsigned n, unsigned long *shot_cnt); #ifdef STARPU_HAVE_CURAND static void pi_func_cuda(void *descr[], void *cl_arg) { (void)cl_arg; curandStatus_t res; int workerid = starpu_worker_get_id_check(); /* CURAND is a bit silly: it assumes that any error is fatal. Calling * cudaGetLastError resets the last error value. */ (void) cudaGetLastError(); /* Fill the scratchpad with random numbers. Note that both x and y * arrays are in stored the same vector. */ float *scratchpad_xy = (float *)STARPU_VECTOR_GET_PTR(descr[0]); res = curandGenerateUniform(curandgens[workerid], scratchpad_xy, 2*nshot_per_task); STARPU_ASSERT(res == CURAND_STATUS_SUCCESS); float *x = &scratchpad_xy[0]; float *y = &scratchpad_xy[nshot_per_task]; unsigned long *shot_cnt = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[1]); pi_redux_cuda_kernel(x, y, nshot_per_task, shot_cnt); } #endif static struct starpu_perfmodel pi_model = { .type = STARPU_HISTORY_BASED, .size_base = size_base, .symbol = "monte_carlo_pi_scratch" }; static struct starpu_codelet pi_cl = { .cpu_funcs = {pi_func_cpu}, .cpu_funcs_name = {"pi_func_cpu"}, #ifdef STARPU_HAVE_CURAND .cuda_funcs = {pi_func_cuda}, #endif .nbuffers = 2, .modes = {STARPU_SCRATCH, STARPU_RW}, .model = &pi_model }; static struct starpu_perfmodel pi_model_redux = { .type = STARPU_HISTORY_BASED, .size_base = size_base, .symbol = "monte_carlo_pi_scratch_redux" }; static struct starpu_codelet pi_cl_redux = { .cpu_funcs = {pi_func_cpu}, .cpu_funcs_name = {"pi_func_cpu"}, #ifdef STARPU_HAVE_CURAND .cuda_funcs = {pi_func_cuda}, #endif .nbuffers = 2, .modes = {STARPU_SCRATCH, STARPU_REDUX}, .model = &pi_model_redux }; /* * Codelets to implement reduction */ void init_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; unsigned long *val = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]); *val = 0; } #ifdef STARPU_HAVE_CURAND static void init_cuda_func(void *descr[], void *cl_arg) { (void)cl_arg; unsigned long *val = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]); cudaMemsetAsync(val, 0, sizeof(unsigned long), starpu_cuda_get_local_stream()); } #endif static struct starpu_codelet init_codelet = { .cpu_funcs = {init_cpu_func}, .cpu_funcs_name = {"init_cpu_func"}, #ifdef STARPU_HAVE_CURAND .cuda_funcs = {init_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .modes = {STARPU_W}, .nbuffers = 1 }; #ifdef STARPU_HAVE_CURAND /* Dummy implementation of the addition of two unsigned longs in CUDA */ static void redux_cuda_func(void *descr[], void *cl_arg) { (void)cl_arg; unsigned long *d_a = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned long *d_b = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[1]); unsigned long h_a, h_b; cudaMemcpyAsync(&h_a, d_a, sizeof(h_a), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); cudaMemcpyAsync(&h_b, d_b, sizeof(h_b), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); h_a += h_b; cudaMemcpyAsync(d_a, &h_a, sizeof(h_a), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); } #endif void redux_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; unsigned long *a = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned long *b = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[1]); *a = *a + *b; } static struct starpu_codelet redux_codelet = { .cpu_funcs = {redux_cpu_func}, .cpu_funcs_name = {"redux_cpu_func"}, #ifdef STARPU_HAVE_CURAND .cuda_funcs = {redux_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .nbuffers = 2 }; /* * Main program */ int main(int argc, char **argv) { unsigned i; int ret; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return 77; parse_args(argc, argv); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Launch a Random Number Generator (RNG) on each worker */ starpu_execute_on_each_worker(init_rng, NULL, STARPU_CPU|STARPU_CUDA); /* Create a scratchpad data */ starpu_data_handle_t xy_scratchpad_handle; starpu_vector_data_register(&xy_scratchpad_handle, -1, (uintptr_t)NULL, 2*nshot_per_task, sizeof(float)); /* Create a variable that will be used to count the number of shots * that actually hit the unit circle when shooting randomly in * [-1,1]^2. */ unsigned long shot_cnt = 0; starpu_data_handle_t shot_cnt_handle; starpu_variable_data_register(&shot_cnt_handle, STARPU_MAIN_RAM, (uintptr_t)&shot_cnt, sizeof(shot_cnt)); starpu_data_set_reduction_methods(shot_cnt_handle, &redux_codelet, &init_codelet); double start; double end; for (i = 0; i < ntasks_warmup; i++) { struct starpu_task *task = starpu_task_create(); task->cl = use_redux?&pi_cl_redux:&pi_cl; task->handles[0] = xy_scratchpad_handle; task->handles[1] = shot_cnt_handle; ret = starpu_task_submit(task); STARPU_ASSERT(!ret); } start = starpu_timing_now(); for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = use_redux?&pi_cl_redux:&pi_cl; task->handles[0] = xy_scratchpad_handle; task->handles[1] = shot_cnt_handle; ret = starpu_task_submit(task); STARPU_ASSERT(!ret); } starpu_data_unregister(shot_cnt_handle); starpu_data_unregister(xy_scratchpad_handle); end = starpu_timing_now(); double timing = end - start; /* Total surface : Pi * r^ 2 = Pi*1^2, total square surface : 2^2 = 4, * probability to impact the disk: pi/4 */ unsigned long total = (ntasks + ntasks_warmup)*nshot_per_task; double pi_approx = ((double)shot_cnt*4.0)/total; FPRINTF(stderr, "Reductions? %s\n", use_redux?"yes":"no"); FPRINTF(stderr, "Pi approximation : %f (%lu / %lu)\n", pi_approx, shot_cnt, total); FPRINTF(stderr, "Error %e \n", pi_approx - PI); FPRINTF(stderr, "Total time : %f ms\n", timing/1000.0); FPRINTF(stderr, "Speed : %f GShot/s\n", total/(1e3*timing)); starpu_shutdown(); if (fabs(pi_approx - PI) > 1.0) return 1; return 0; } starpu-1.4.9+dfsg/examples/pi/pi_redux_kernel.cu000066400000000000000000000075321507764646700217550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This counts how many fall inside the circle quarter */ #include #define MAXNBLOCKS 128 #define MAXTHREADSPERBLOCK 256 static __global__ void monte_carlo(float *x, float *y, unsigned n, unsigned long *output_cnt) { __shared__ unsigned scnt[MAXTHREADSPERBLOCK]; /* Do we have a successful shot ? */ const int tid = threadIdx.x + blockIdx.x*blockDim.x; const int nthreads = gridDim.x * blockDim.x; /* Blank the shared mem buffer */ if (threadIdx.x < MAXTHREADSPERBLOCK) scnt[threadIdx.x] = 0; __syncthreads(); int ind; for (ind = tid; ind < n; ind += nthreads) { float xval = (2.0f * x[ind] - 1.0f); float yval = (2.0f * y[ind] - 1.0f); float dist = (xval*xval + yval*yval); unsigned long success = (dist <= 1.0f)?1:0; scnt[threadIdx.x] += success; } __syncthreads(); /* Perform a reduction to compute the sum on each thread within that block */ /* NB: We assume that the number of threads per block is a power of 2 ! */ unsigned long s; for (s = blockDim.x/2; s!=0; s>>=1) { if (threadIdx.x < s) scnt[threadIdx.x] += scnt[threadIdx.x + s]; __syncthreads(); } /* report the number of successful shots in the block */ if (threadIdx.x == 0) output_cnt[blockIdx.x] = scnt[0]; __syncthreads(); } static __global__ void sum_per_block_cnt(unsigned long *output_cnt, unsigned long *cnt) { __shared__ unsigned long accumulator[MAXNBLOCKS]; unsigned i; /* Load the values from global mem */ for (i = 0; i < blockDim.x; i++) accumulator[i] = output_cnt[i]; __syncthreads(); /* Perform a reduction in shared memory */ unsigned s; for (s = blockDim.x/2; s!=0; s>>=1) { if (threadIdx.x < s) accumulator[threadIdx.x] += accumulator[threadIdx.x + s]; __syncthreads(); } /* Save the result in global memory */ if (threadIdx.x == 0) *cnt = *cnt + accumulator[0]; } extern "C" void pi_redux_cuda_kernel(float *x, float *y, unsigned n, unsigned long *shot_cnt) { cudaError_t cures; /* How many blocks do we use ? */ unsigned nblocks = 128; // TODO STARPU_ASSERT(nblocks <= MAXNBLOCKS); STARPU_ASSERT((n % nblocks) == 0); unsigned long *per_block_cnt; cudaMalloc((void **)&per_block_cnt, nblocks*sizeof(unsigned long)); /* How many threads per block ? At most 256, but no more threads than * there are entries to process per block. */ unsigned nthread_per_block = STARPU_MIN(MAXTHREADSPERBLOCK, (n / nblocks)); /* each entry of per_block_cnt contains the number of successful shots * in the corresponding block. */ monte_carlo<<>>(x, y, n, per_block_cnt); cures = cudaGetLastError(); if (cures != cudaSuccess) STARPU_CUDA_REPORT_ERROR(cures); /* Note that we do not synchronize between kernel calls because there is an implicit serialization */ /* compute the total number of successful shots by adding the elements * of the per_block_cnt array */ sum_per_block_cnt<<<1, nblocks, 0, starpu_cuda_get_local_stream()>>>(per_block_cnt, shot_cnt); cures = cudaGetLastError(); if (cures != cudaSuccess) STARPU_CUDA_REPORT_ERROR(cures); cures = cudaStreamSynchronize(starpu_cuda_get_local_stream()); if (cures) STARPU_CUDA_REPORT_ERROR(cures); cudaFree(per_block_cnt); } starpu-1.4.9+dfsg/examples/pipeline/000077500000000000000000000000001507764646700174335ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/pipeline/pipeline.c000066400000000000000000000152701507764646700214110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This examples shows how to submit a pipeline to StarPU with limited buffer * use, and avoiding submitted all the tasks at once. * * This is a dumb example pipeline, depicted here: * * x--\ * >==axpy-->sum * y--/ * * x and y produce vectors full of x and y values, axpy multiplies them, and sum * sums it up. We thus have 3 temporary buffers */ #include #include #include #include #ifdef STARPU_USE_CUDA #include #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) /* Vector size */ #ifdef STARPU_QUICK_CHECK #define N 16 #else #define N 1048576 #endif /* Number of iteration buffers, and thus overlapped pipeline iterations */ #define K 16 /* Number of concurrently submitted pipeline iterations */ #define C 64 /* Number of iterations */ #define L 256 /* X / Y codelets */ void pipeline_cpu_x(void *descr[], void *args) { float x; float *val = (float *) STARPU_VECTOR_GET_PTR(descr[0]); int n = STARPU_VECTOR_GET_NX(descr[0]); int i; starpu_codelet_unpack_args(args, &x); for (i = 0; i < n ; i++) val[i] = x; } static struct starpu_perfmodel pipeline_model_x = { .type = STARPU_HISTORY_BASED, .symbol = "pipeline_model_x" }; static struct starpu_codelet pipeline_codelet_x = { .cpu_funcs = {pipeline_cpu_x}, .cpu_funcs_name = {"pipeline_cpu_x"}, .nbuffers = 1, .modes = {STARPU_W}, .model = &pipeline_model_x }; /* axpy codelets */ void pipeline_cpu_axpy(void *descr[], void *arg) { (void)arg; float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); float *y = (float *) STARPU_VECTOR_GET_PTR(descr[1]); int n = STARPU_VECTOR_GET_NX(descr[0]); STARPU_SAXPY(n, 1., x, 1, y, 1); } #ifdef STARPU_USE_CUDA void pipeline_cublas_axpy(void *descr[], void *arg) { (void)arg; float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); float *y = (float *) STARPU_VECTOR_GET_PTR(descr[1]); int n = STARPU_VECTOR_GET_NX(descr[0]); float alpha = 1.; cublasStatus_t status = cublasSaxpy(starpu_cublas_get_local_handle(), n, &alpha, x, 1, y, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif static struct starpu_perfmodel pipeline_model_axpy = { .type = STARPU_HISTORY_BASED, .symbol = "pipeline_model_axpy" }; static struct starpu_codelet pipeline_codelet_axpy = { .cpu_funcs = {pipeline_cpu_axpy}, .cpu_funcs_name = {"pipeline_cpu_axpy"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {pipeline_cublas_axpy}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &pipeline_model_axpy }; /* sum codelet */ void pipeline_cpu_sum(void *descr[], void *arg) { (void)arg; float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); int n = STARPU_VECTOR_GET_NX(descr[0]); float y; y = STARPU_SASUM(n, x, 1); FPRINTF(stderr,"CPU finished with %f\n", y); } #ifdef STARPU_USE_CUDA void pipeline_cublas_sum(void *descr[], void *arg) { (void)arg; float *x = (float *) STARPU_VECTOR_GET_PTR(descr[0]); int n = STARPU_VECTOR_GET_NX(descr[0]); float y; cublasStatus_t status = cublasSasum(starpu_cublas_get_local_handle(), n, x, 1, &y); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); FPRINTF(stderr,"CUBLAS finished with %f\n", y); } #endif static struct starpu_perfmodel pipeline_model_sum = { .type = STARPU_HISTORY_BASED, .symbol = "pipeline_model_sum" }; static struct starpu_codelet pipeline_codelet_sum = { .cpu_funcs = {pipeline_cpu_sum}, .cpu_funcs_name = {"pipeline_cpu_sum"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {pipeline_cublas_sum}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .nbuffers = 1, .modes = {STARPU_R}, .model = &pipeline_model_sum }; static void release_sem(void *arg) { sem_post(arg); }; int main(void) { int ret = 0; int k, l, c; starpu_data_handle_t buffersX[K], buffersY[K], buffersP[K]; sem_t sems[C]; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_cublas_init(); /* Initialize the K temporary buffers. No need to allocate it ourselves * Since it's the X and Y kernels which will fill the initial values. */ for (k = 0; k < K; k++) { starpu_vector_data_register(&buffersX[k], -1, 0, N, sizeof(float)); starpu_vector_data_register(&buffersY[k], -1, 0, N, sizeof(float)); starpu_vector_data_register(&buffersP[k], -1, 0, N, sizeof(float)); } /* Initialize way to wait for the C previous concurrent stages */ for (c = 0; c < C; c++) sem_init(&sems[c], 0, 0); /* Submits the l pipeline stages */ for (l = 0; l < L; l++) { float x = l; float y = 2*l; /* First wait for the C previous concurrent stages */ if (l >= C) { starpu_do_schedule(); sem_wait(&sems[l%C]); } /* Now submit the next stage */ ret = starpu_task_insert(&pipeline_codelet_x, STARPU_W, buffersX[l%K], STARPU_VALUE, &x, sizeof(x), STARPU_TAG_ONLY, (starpu_tag_t) (100*l), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert x"); ret = starpu_task_insert(&pipeline_codelet_x, STARPU_W, buffersY[l%K], STARPU_VALUE, &y, sizeof(y), STARPU_TAG_ONLY, (starpu_tag_t) (100*l+1), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert y"); ret = starpu_task_insert(&pipeline_codelet_axpy, STARPU_R, buffersX[l%K], STARPU_RW, buffersY[l%K], STARPU_TAG_ONLY, (starpu_tag_t) l, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert axpy"); ret = starpu_task_insert(&pipeline_codelet_sum, STARPU_R, buffersY[l%K], STARPU_CALLBACK_WITH_ARG_NFREE, release_sem, &sems[l%C], STARPU_TAG_ONLY, (starpu_tag_t) l, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert sum"); } starpu_task_wait_for_all(); enodev: for (k = 0; k < K; k++) { starpu_data_unregister(buffersX[k]); starpu_data_unregister(buffersY[k]); starpu_data_unregister(buffersP[k]); } starpu_shutdown(); return (ret == -ENODEV ? 77 : 0); } starpu-1.4.9+dfsg/examples/ppm_downscaler/000077500000000000000000000000001507764646700206435ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/ppm_downscaler/ppm_downscaler.c000066400000000000000000000115211507764646700240240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This uses a dummy algorithm to downscale a ppm file. */ /* TODO: turn this into StarPU. */ #include "ppm_downscaler.h" #include #ifdef STARPU_HAVE_MALLOC_H #include #endif #include #include #include struct ppm_image *allocate_new_ppm(int ncols, int nlines, int coldepth) { struct ppm_image *ppm = (struct ppm_image *) malloc(sizeof(struct ppm_image)); assert(ppm); ppm->ncols = ncols; ppm->nlines = nlines; ppm->coldepth = coldepth; #ifdef STARPU_HAVE_MEMALIGN ppm->data = (struct ppm_color *) memalign(16384, ncols*nlines*sizeof(struct ppm_color)); #else ppm->data = (struct ppm_color *) malloc(ncols*nlines*sizeof(struct ppm_color)); #endif assert(ppm->data); return ppm; } struct ppm_image *file_to_ppm(char *filename) { int ret; struct ppm_image *ppm = (struct ppm_image *) malloc(sizeof(struct ppm_image)); assert(ppm); FILE *file = fopen(filename, "r"); assert(file); /* read the file's dimensions */ ret = fscanf(file, "P6\n%d %d\n%d\n", &ppm->ncols, &ppm->nlines, &ppm->coldepth); if (ret != 3) { fclose(file); fprintf(stderr, "file %s is not valid\n", filename); exit(-1); } /* allocate a buffer for the image */ #ifdef STARPU_HAVE_MEMALIGN ppm->data = (struct ppm_color *) memalign(16384, ppm->ncols*ppm->nlines*sizeof(struct ppm_color)); #else ppm->data = (struct ppm_color *) malloc(ppm->ncols*ppm->nlines*sizeof(struct ppm_color)); #endif assert(ppm->data); ret = fread(ppm->data, sizeof(struct ppm_color), ppm->ncols*ppm->nlines, file); STARPU_ASSERT(ret == ppm->ncols*ppm->nlines); int i; for (i = 0; i < ppm->ncols*ppm->nlines; i++) { /* fprintf(stderr, "READ (index %d) -> r %d g %d b %d\n", i, ppm->data[i].r, ppm->data[i].g, ppm->data[i].b); */ } fclose(file); return ppm; } void ppm_to_file(struct ppm_image *ppm, char *filename) { FILE *file = fopen(filename, "w+"); assert(file); /* read the file's dimensions */ fprintf(file, "P6\n%d %d\n%d\n", ppm->ncols, ppm->nlines, ppm->coldepth); fwrite(&ppm->data[0], sizeof(struct ppm_color), ppm->ncols*ppm->nlines, file); fclose(file); } char *filename_in = "serpents.ppm"; char *filename_out = "serpents.small.ppm"; void parse_args(int argc, char **argv) { if (argc == 3) { filename_in = argv[1]; filename_out = argv[2]; } } /* what is the downscaling factor ? */ #define FACTOR 2 void dummy_downscale(struct ppm_image *input_ppm, struct ppm_image *output_ppm) { struct ppm_color *in = input_ppm->data; struct ppm_color *out = output_ppm->data; int line, col; for (line = 0; line < output_ppm->nlines; line++) { for (col = 0; col < output_ppm->ncols; col++) { unsigned sum_r = 0, sum_g = 0, sum_b = 0; unsigned big_col = col*FACTOR; unsigned big_line = line*FACTOR; /* compute the average value of all components */ unsigned i, j; for (i = 0; i < FACTOR; i++) { for (j = 0; j < FACTOR; j++) { unsigned index = (big_col + i)+(big_line + j)*input_ppm->ncols; /* fprintf(stderr, "(col %d, line %d) i %d j %d index %d -> r %d g %d b %d\n", col, line, i, j, index, in[index].r, in[index].g, in[index].b); */ sum_r += (unsigned)in[index].r; sum_g += (unsigned)in[index].g; sum_b += (unsigned)in[index].b; } } out[col + line*output_ppm->ncols].r = (unsigned char)(sum_r/(FACTOR*FACTOR)); out[col + line*output_ppm->ncols].g = (unsigned char)(sum_g/(FACTOR*FACTOR)); out[col + line*output_ppm->ncols].b = (unsigned char)(sum_b/(FACTOR*FACTOR)); /* fprintf(stderr, "col %d line %d -> sum_r = %d out -> %d\n", col, line, sum_r, out[col + line*FACTOR].r); */ } } } int main(int argc, char **argv) { struct ppm_image *input_ppm, *output_ppm; parse_args(argc, argv); input_ppm = file_to_ppm(filename_in); fprintf(stderr, "Read input ppm file : ncols = %d, nlines = %d, coldept = %d\n", input_ppm->nlines, input_ppm->ncols, input_ppm->coldepth); assert(input_ppm->nlines % FACTOR == 0); assert(input_ppm->ncols % FACTOR == 0); output_ppm = allocate_new_ppm(input_ppm->ncols/FACTOR, input_ppm->nlines/FACTOR, input_ppm->coldepth); dummy_downscale(input_ppm, output_ppm); ppm_to_file(output_ppm, filename_out); free(input_ppm); free(output_ppm); return 0; } starpu-1.4.9+dfsg/examples/ppm_downscaler/ppm_downscaler.h000066400000000000000000000016301507764646700240310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* we make the assumption that there are 256 color levels at most */ struct ppm_color { unsigned char r; unsigned char g; unsigned char b; }; struct ppm_image { int nlines; int ncols; int coldepth; struct ppm_color *data; }; starpu-1.4.9+dfsg/examples/ppm_downscaler/yuv_downscaler.c000066400000000000000000000226021507764646700240550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This uses a dummy algorithm to downscale a 1920x1080 yuv film. * Each frame is split in horizontal stripes which are processed in parallel. */ #include #include #include #include #include #include #include "yuv_downscaler.h" static double start; static double end; static const char *filename_in_default = "hugefile.2s.yuv"; static const char *filename_out_default = "hugefile.2s.out.yuv"; static char filename_in[1024]; static char filename_out[1024]; void parse_args(int argc, char **argv) { if (argc == 3) { strncpy(filename_in, argv[1], 1023); strncpy(filename_out, argv[2], 1023); } else { strncpy(filename_in, filename_in_default, 1023); strncpy(filename_out, filename_out_default, 1023); } } #define FRAMESIZE sizeof(struct yuv_frame) #define NEW_FRAMESIZE sizeof(struct yuv_new_frame) void ds_kernel_cpu(void *descr[], void *arg) { (void)arg; uint8_t *input = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[0]); const unsigned input_ld = STARPU_MATRIX_GET_LD(descr[0]); uint8_t *output = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[1]); const unsigned output_ld = STARPU_MATRIX_GET_LD(descr[1]); const unsigned ncols = STARPU_MATRIX_GET_NX(descr[0]); const unsigned nlines = STARPU_MATRIX_GET_NY(descr[0]); unsigned line, col; for (line = 0; line < nlines; line+=FACTOR) for (col = 0; col < ncols; col+=FACTOR) { unsigned sum = 0; unsigned lline, lcol; for (lline = 0; lline < FACTOR; lline++) for (lcol = 0; lcol < FACTOR; lcol++) { unsigned in_index = (lcol + col) + (lline + line)*input_ld; sum += input[in_index]; } unsigned out_index = (col / FACTOR) + (line / FACTOR)*output_ld; output[out_index] = (uint8_t)(sum/(FACTOR*FACTOR)); } } static struct starpu_codelet ds_codelet = { .cpu_funcs = {ds_kernel_cpu}, .cpu_funcs_name = {"ds_kernel_cpu"}, .nbuffers = 2, /* input -> output */ .modes = {STARPU_R, STARPU_W}, .model = NULL }; /* each block contains BLOCK_HEIGHT consecutive lines */ static struct starpu_data_filter filter_y = { .filter_func = starpu_matrix_filter_block, .nchildren= HEIGHT/BLOCK_HEIGHT }; static struct starpu_data_filter filter_uv = { .filter_func = starpu_matrix_filter_block, .nchildren = (HEIGHT/2)/BLOCK_HEIGHT }; int main(int argc, char **argv) { int ret; size_t sret; assert(HEIGHT % (2*BLOCK_HEIGHT) == 0); assert(HEIGHT % FACTOR == 0); parse_args(argc, argv); /* fprintf(stderr, "Reading input file ...\n"); */ /* how many frames ? */ struct stat stbuf; ret = stat(filename_in, &stbuf); assert(ret); size_t filesize = stbuf.st_size; unsigned nframes = filesize/FRAMESIZE; /* fprintf(stderr, "filesize %lx (FRAME SIZE %lx NEW SIZE %lx); nframes %d\n", filesize, FRAMESIZE, NEW_FRAMESIZE, nframes); */ assert((filesize % sizeof(struct yuv_frame)) == 0); struct yuv_frame *yuv_in_buffer = (struct yuv_frame *) malloc(nframes*FRAMESIZE); assert(yuv_in_buffer); /* fprintf(stderr, "Alloc output file ...\n"); */ struct yuv_new_frame *yuv_out_buffer = (struct yuv_new_frame *) calloc(nframes, NEW_FRAMESIZE); assert(yuv_out_buffer); /* fetch input data */ FILE *f_in = fopen(filename_in, "r"); if (!f_in) { fprintf(stderr, "couldn't open input file %s\n", filename_in); exit(EXIT_FAILURE); } /* allocate room for an output buffer */ FILE *f_out = fopen(filename_out, "w+"); if (!f_out) { fprintf(stderr, "couldn't open output file %s\n", filename_out); exit(EXIT_FAILURE); } sret = fread(yuv_in_buffer, FRAMESIZE, nframes, f_in); assert(sret == nframes); starpu_data_handle_t *frame_y_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); starpu_data_handle_t *frame_u_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); starpu_data_handle_t *frame_v_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); starpu_data_handle_t *new_frame_y_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); starpu_data_handle_t *new_frame_u_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); starpu_data_handle_t *new_frame_v_handle = (starpu_data_handle_t *) calloc(nframes, sizeof(starpu_data_handle_t)); ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* register and partition all layers */ unsigned frame; for (frame = 0; frame < nframes; frame++) { /* register Y layer */ starpu_matrix_data_register(&frame_y_handle[frame], STARPU_MAIN_RAM, (uintptr_t)&yuv_in_buffer[frame].y, WIDTH, WIDTH, HEIGHT, sizeof(uint8_t)); starpu_data_partition(frame_y_handle[frame], &filter_y); starpu_matrix_data_register(&new_frame_y_handle[frame], STARPU_MAIN_RAM, (uintptr_t)&yuv_out_buffer[frame].y, NEW_WIDTH, NEW_WIDTH, NEW_HEIGHT, sizeof(uint8_t)); starpu_data_partition(new_frame_y_handle[frame], &filter_y); /* register U layer */ starpu_matrix_data_register(&frame_u_handle[frame], STARPU_MAIN_RAM, (uintptr_t)&yuv_in_buffer[frame].u, WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t)); starpu_data_partition(frame_u_handle[frame], &filter_uv); starpu_matrix_data_register(&new_frame_u_handle[frame], STARPU_MAIN_RAM, (uintptr_t)&yuv_out_buffer[frame].u, NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t)); starpu_data_partition(new_frame_u_handle[frame], &filter_uv); /* register V layer */ starpu_matrix_data_register(&frame_v_handle[frame], STARPU_MAIN_RAM, (uintptr_t)&yuv_in_buffer[frame].v, WIDTH/2, WIDTH/2, HEIGHT/2, sizeof(uint8_t)); starpu_data_partition(frame_v_handle[frame], &filter_uv); starpu_matrix_data_register(&new_frame_v_handle[frame], STARPU_MAIN_RAM, (uintptr_t)&yuv_out_buffer[frame].v, NEW_WIDTH/2, NEW_WIDTH/2, NEW_HEIGHT/2, sizeof(uint8_t)); starpu_data_partition(new_frame_v_handle[frame], &filter_uv); } /* how many tasks are there ? */ unsigned nblocks_y = filter_y.nchildren; unsigned nblocks_uv = filter_uv.nchildren; unsigned ntasks = (nblocks_y + 2*nblocks_uv)*nframes; fprintf(stderr, "Start computation: there will be %u tasks for %u frames\n", ntasks, nframes); start = starpu_timing_now(); /* do the computation */ for (frame = 0; frame < nframes; frame++) { starpu_iteration_push(frame); unsigned blocky; for (blocky = 0; blocky < nblocks_y; blocky++) { struct starpu_task *task = starpu_task_create(); task->cl = &ds_codelet; /* input */ task->handles[0] = starpu_data_get_sub_data(frame_y_handle[frame], 1, blocky); /* output */ task->handles[1] = starpu_data_get_sub_data(new_frame_y_handle[frame], 1, blocky); ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } unsigned blocku; for (blocku = 0; blocku < nblocks_uv; blocku++) { struct starpu_task *task = starpu_task_create(); task->cl = &ds_codelet; /* input */ task->handles[0] = starpu_data_get_sub_data(frame_u_handle[frame], 1, blocku); /* output */ task->handles[1] = starpu_data_get_sub_data(new_frame_u_handle[frame], 1, blocku); ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } unsigned blockv; for (blockv = 0; blockv < nblocks_uv; blockv++) { struct starpu_task *task = starpu_task_create(); task->cl = &ds_codelet; /* input */ task->handles[0] = starpu_data_get_sub_data(frame_v_handle[frame], 1, blockv); /* output */ task->handles[1] = starpu_data_get_sub_data(new_frame_v_handle[frame], 1, blockv); ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_iteration_pop(); } /* make sure all output buffers are sync'ed */ for (frame = 0; frame < nframes; frame++) { starpu_data_unregister(frame_y_handle[frame]); starpu_data_unregister(frame_u_handle[frame]); starpu_data_unregister(frame_v_handle[frame]); starpu_data_unregister(new_frame_y_handle[frame]); starpu_data_unregister(new_frame_u_handle[frame]); starpu_data_unregister(new_frame_v_handle[frame]); } free(frame_y_handle); free(frame_u_handle); free(frame_v_handle); free(new_frame_y_handle); free(new_frame_u_handle); free(new_frame_v_handle); /* There is an implicit barrier: the unregister methods will block * until the computation is done and that the result was put back into * memory. */ end = starpu_timing_now(); double timing = end - start; printf("# s\tFPS\n"); printf("%f\t%f\n", timing/1000000, (1000000*nframes)/timing); fwrite(yuv_out_buffer, NEW_FRAMESIZE, nframes, f_out); /* partition the layers into smaller parts */ starpu_shutdown(); if (fclose(f_in) != 0) fprintf(stderr, "Could not close %s properly\n", filename_in); if (fclose(f_out) != 0) fprintf(stderr, "Could not close %s properly\n", filename_out); return 0; } starpu-1.4.9+dfsg/examples/ppm_downscaler/yuv_downscaler.h000066400000000000000000000021241507764646700240570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define WIDTH 1920 #define HEIGHT 1080 #define FACTOR 2 #define NEW_WIDTH (WIDTH/FACTOR) #define NEW_HEIGHT (HEIGHT/FACTOR) #define BLOCK_HEIGHT 20 #include struct yuv_frame { uint8_t y[WIDTH*HEIGHT]; uint8_t u[(WIDTH*HEIGHT)/4]; uint8_t v[(WIDTH*HEIGHT)/4]; }; struct yuv_new_frame { uint8_t y[NEW_WIDTH*NEW_HEIGHT]; uint8_t u[(NEW_WIDTH*NEW_HEIGHT)/4]; uint8_t v[(NEW_WIDTH*NEW_HEIGHT)/4]; }; starpu-1.4.9+dfsg/examples/profiling/000077500000000000000000000000001507764646700176175ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/profiling/profiling.c000066400000000000000000000107171507764646700217620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This examplifies how to get task execution profiling from the application. */ #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #ifdef STARPU_QUICK_CHECK static unsigned niter = 50; #else static unsigned niter = 500; #endif void sleep_codelet(void *descr[], void *arg) { (void)descr; (void)arg; starpu_usleep(1000); } int main(int argc, char **argv) { int ret; if (argc == 2) niter = atoi(argv[1]); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Enable profiling */ starpu_profiling_status_set(STARPU_PROFILING_ENABLE); #ifdef STARPU_QUICK_CHECK /* We should observe at least 50ms in the sleep time reported by every * worker. */ starpu_usleep(50000); #else /* We should observe at least 500ms in the sleep time reported by every * worker. */ starpu_usleep(500000); #endif struct starpu_codelet cl = { .cpu_funcs = {sleep_codelet}, .cpu_funcs_name = {"sleep_codelet"}, .cuda_funcs = {sleep_codelet}, .opencl_funcs = {sleep_codelet}, .nbuffers = 0, .name = "sleep" }; struct starpu_task **tasks = (struct starpu_task **) malloc(niter*sizeof(struct starpu_task *)); assert(tasks); unsigned i; for (i = 0; i < niter; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; /* We will destroy the task structure by hand so that we can * query the profiling info before the task is destroyed. */ task->destroy = 0; tasks[i] = task; ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_task_wait_for_all(); double delay_sum = 0.0; double length_sum = 0.0; for (i = 0; i < niter; i++) { struct starpu_task *task = tasks[i]; struct starpu_profiling_task_info *info = task->profiling_info; /* How much time did it take before the task started ? */ delay_sum += starpu_timing_timespec_delay_us(&info->submit_time, &info->start_time); /* How long was the task execution ? */ length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); /* We don't need the task structure anymore */ starpu_task_destroy(task); } free(tasks); if (niter) { FPRINTF(stderr, "Avg. delay : %2.2lf us\n", (delay_sum)/niter); FPRINTF(stderr, "Avg. length : %2.2lf us\n", (length_sum)/niter); } /* Display the occupancy of all workers during the test */ unsigned worker; for (worker = 0; worker < starpu_worker_get_count(); worker++) { struct starpu_profiling_worker_info worker_info; ret = starpu_profiling_worker_get_info(worker, &worker_info); STARPU_ASSERT(!ret); double total_time = starpu_timing_timespec_to_us(&worker_info.total_time); double executing_time = starpu_timing_timespec_to_us(&worker_info.executing_time); double sleeping_time = starpu_timing_timespec_to_us(&worker_info.sleeping_time); double overhead_time = total_time - executing_time - sleeping_time; float executing_ratio = 100.0*executing_time/total_time; float sleeping_ratio = 100.0*sleeping_time/total_time; float overhead_ratio = 100.0 - executing_ratio - sleeping_ratio; char workername[128]; starpu_worker_get_name(worker, workername, 128); FPRINTF(stderr, "Worker %s:\n", workername); FPRINTF(stderr, "\t%d task(s)\n", worker_info.executed_tasks); FPRINTF(stderr, "\ttotal time : %.2lf ms\n", total_time*1e-3); FPRINTF(stderr, "\texec time : %.2lf ms (%.2f %%)\n", executing_time*1e-3, executing_ratio); FPRINTF(stderr, "\tblocked time : %.2lf ms (%.2f %%)\n", sleeping_time*1e-3, sleeping_ratio); FPRINTF(stderr, "\toverhead time: %.2lf ms (%.2f %%)\n", overhead_time*1e-3, overhead_ratio); } starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/profiling_tool/000077500000000000000000000000001507764646700206545ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/profiling_tool/libprofiling_tool.c000066400000000000000000000052331507764646700245400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include void myfunction_cb(struct starpu_prof_tool_info *prof_info, union starpu_prof_tool_event_info *event_info, struct starpu_prof_tool_api_info *api_info) { if (NULL != prof_info) { printf("CALLBACK CALLED %d\n", prof_info->event_type); } else { printf("CALLBACK CALLED NULL INFO\n"); return; } switch (prof_info->event_type) { case starpu_prof_tool_event_driver_init: printf("init driver\n"); break; case starpu_prof_tool_event_driver_init_start: printf("begin init driver\n"); break; case starpu_prof_tool_event_driver_init_end: printf("end init driver\n"); break; case starpu_prof_tool_event_start_cpu_exec: printf("Start exec fun %p on device %d\n", prof_info->fun_ptr, prof_info->device_number); break; case starpu_prof_tool_event_end_cpu_exec: printf("End exec fun %p on device %d\n", prof_info->fun_ptr, prof_info->device_number); break; case starpu_prof_tool_event_start_transfer: printf("Start transfer on memnode %ud\n", prof_info->memnode); break; case starpu_prof_tool_event_end_transfer: printf("End transfer on memnode %ud\n", prof_info->memnode); break; default: printf("Unknown callback %d\n", prof_info->event_type); break; } } /* Mandatory */ void starpu_prof_tool_library_register(starpu_prof_tool_entry_register_func reg, starpu_prof_tool_entry_register_func unreg) { enum starpu_prof_tool_command info = 0; reg(starpu_prof_tool_event_driver_init, &myfunction_cb, info); reg(starpu_prof_tool_event_driver_init_start, &myfunction_cb, info); reg(starpu_prof_tool_event_driver_init_end, &myfunction_cb, info); reg(starpu_prof_tool_event_start_cpu_exec, &myfunction_cb, info); reg(starpu_prof_tool_event_end_cpu_exec, &myfunction_cb, info); reg(starpu_prof_tool_event_start_transfer, &myfunction_cb, info); reg(starpu_prof_tool_event_end_transfer, &myfunction_cb, info); fprintf(stderr,"REGISTER LIBRARY\n"); } starpu-1.4.9+dfsg/examples/profiling_tool/prof.sh000077500000000000000000000016601507764646700221640ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2022-2022 École de Technologie Supérieure (ETS, Montréal) # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ROOT=${0%/prof.sh} if test -x $ROOT/../basic_examples/hello_world then STARPU_PROF_TOOL=$ROOT/.libs/libprofiling_tool.so $ROOT/../basic_examples/hello_world else exit 77 fi starpu-1.4.9+dfsg/examples/reductions/000077500000000000000000000000001507764646700200055ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/reductions/dot_product.c000066400000000000000000000275301507764646700225060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This computes the dot product of a big vector, using data reduction to * optimize the dot reduction. */ #include #include #include #include #ifdef STARPU_USE_CUDA #include #include #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) static float *_x; static float *_y; static starpu_data_handle_t *_x_handles; static starpu_data_handle_t *_y_handles; #ifdef STARPU_USE_OPENCL static struct starpu_opencl_program _opencl_program; #endif #ifdef STARPU_QUICK_CHECK static unsigned _nblocks = 128; #else static unsigned _nblocks = 4096; #endif static unsigned _entries_per_block = 1024; static DOT_TYPE _dot = 0.0f; static starpu_data_handle_t _dot_handle; #ifdef STARPU_USE_CUDA static int cublas_version; #endif static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) { (void)task; (void)nimpl; enum starpu_worker_archtype type = starpu_worker_get_type(workerid); if (type == STARPU_CPU_WORKER || type == STARPU_OPENCL_WORKER) return 1; #ifdef STARPU_USE_CUDA #ifdef STARPU_SIMGRID /* We don't know, let's assume it can */ return 1; #else /* Cuda device */ const struct cudaDeviceProp *props; props = starpu_cuda_get_device_properties(workerid); if (props->major >= 2 || props->minor >= 3) /* At least compute capability 1.3, supports doubles */ return 1; #endif #endif /* Old card, does not support doubles */ return 0; } /* * Codelet to create a neutral element */ void init_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); *dot = 0.0f; } #ifdef STARPU_USE_CUDA void init_cuda_func(void *descr[], void *cl_arg) { (void)cl_arg; DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); cudaMemsetAsync(dot, 0, sizeof(DOT_TYPE), starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_USE_OPENCL void init_opencl_func(void *buffers[], void *cl_arg) { (void)cl_arg; cl_int err; cl_command_queue queue; cl_mem dot = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]); starpu_opencl_get_current_queue(&queue); DOT_TYPE zero = (DOT_TYPE) 0.0; err = clEnqueueWriteBuffer(queue, dot, CL_TRUE, 0, sizeof(DOT_TYPE), &zero, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } #endif static struct starpu_codelet init_codelet = { .can_execute = can_execute, .cpu_funcs = {init_cpu_func}, .cpu_funcs_name = {"init_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {init_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {init_opencl_func}, #endif .modes = {STARPU_W}, .nbuffers = 1, .name = "init", }; /* * Codelet to perform the reduction of two elements */ void redux_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; DOT_TYPE *dota = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); DOT_TYPE *dotb = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); *dota = *dota + *dotb; } #ifdef STARPU_USE_CUDA extern void redux_cuda_func(void *descr[], void *_args); #endif #ifdef STARPU_USE_OPENCL void redux_opencl_func(void *buffers[], void *args) { (void)args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_mem dota = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[0]); cl_mem dotb = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[1]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_program, "_redux_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(dota), &dota); err|= clSetKernelArg(kernel, 1, sizeof(dotb), &dotb); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=1; size_t local=1; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } #endif static struct starpu_codelet redux_codelet = { .can_execute = can_execute, .cpu_funcs = {redux_cpu_func}, .cpu_funcs_name = {"redux_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {redux_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {redux_opencl_func}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .nbuffers = 2, .name = "redux" }; /* * Dot product codelet */ void dot_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; float *local_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); float *local_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[2]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); DOT_TYPE local_dot = 0.0; unsigned i; for (i = 0; i < n; i++) { local_dot += (DOT_TYPE)local_x[i]*(DOT_TYPE)local_y[i]; } *dot = *dot + local_dot; } #ifdef STARPU_USE_CUDA void dot_cuda_func(void *descr[], void *cl_arg) { (void)cl_arg; DOT_TYPE current_dot; float local_dot; float *local_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); float *local_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); DOT_TYPE *dot = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[2]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); cudaMemcpyAsync(¤t_dot, dot, sizeof(DOT_TYPE), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); cublasStatus_t status = cublasSdot(starpu_cublas_get_local_handle(), n, local_x, 1, local_y, 1, &local_dot); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); /* FPRINTF(stderr, "current_dot %f local dot %f -> %f\n", current_dot, local_dot, current_dot + local_dot); */ current_dot += local_dot; cudaMemcpyAsync(dot, ¤t_dot, sizeof(DOT_TYPE), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_USE_OPENCL void dot_opencl_func(void *buffers[], void *cl_arg) { (void)cl_arg; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_mem x = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); cl_mem y = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); cl_mem dot = (cl_mem) STARPU_VARIABLE_GET_PTR(buffers[2]); unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &_opencl_program, "_dot_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(x), &x); err|= clSetKernelArg(kernel, 1, sizeof(y), &y); err|= clSetKernelArg(kernel, 2, sizeof(dot), &dot); err|= clSetKernelArg(kernel, 3, sizeof(n), &n); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=1; size_t local=1; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } #endif static struct starpu_codelet dot_codelet = { .can_execute = can_execute, .cpu_funcs = {dot_cpu_func}, .cpu_funcs_name = {"dot_cpu_func"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dot_cuda_func}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {dot_opencl_func}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_REDUX}, .name = "dot" }; /* * Tasks initialization */ int main(void) { int ret; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return 77; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/reductions/dot_product_opencl_kernels.cl", &_opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif #ifdef STARPU_USE_CUDA unsigned devices = starpu_cuda_worker_get_count(); if (devices) { cublasHandle_t handle; cublasCreate(&handle); cublasGetVersion(handle, &cublas_version); cublasDestroy(handle); if (cublas_version >= 7050) starpu_cublas_init(); else /* Disable the sdot cublas kernel, it is bogus with a * non-blocking stream (Nvidia bugid 1669886) */ dot_codelet.cuda_funcs[0] = NULL; } #endif unsigned long nelems = _nblocks*_entries_per_block; size_t size = nelems*sizeof(float); starpu_malloc((void **)&_x, size*sizeof(float)); starpu_malloc((void **)&_y, size*sizeof(float)); _x_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t)); _y_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t)); assert(_x && _y); starpu_srand48(0); DOT_TYPE reference_dot = 0.0; unsigned long i; for (i = 0; i < nelems; i++) { _x[i] = (float)starpu_drand48(); _y[i] = (float)starpu_drand48(); reference_dot += (DOT_TYPE)_x[i]*(DOT_TYPE)_y[i]; } unsigned block; for (block = 0; block < _nblocks; block++) { starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM, (uintptr_t)&_x[_entries_per_block*block], _entries_per_block, sizeof(float)); starpu_vector_data_register(&_y_handles[block], STARPU_MAIN_RAM, (uintptr_t)&_y[_entries_per_block*block], _entries_per_block, sizeof(float)); } starpu_variable_data_register(&_dot_handle, STARPU_MAIN_RAM, (uintptr_t)&_dot, sizeof(DOT_TYPE)); /* * Compute dot product with StarPU */ starpu_data_set_reduction_methods(_dot_handle, &redux_codelet, &init_codelet); for (block = 0; block < _nblocks; block++) { struct starpu_task *task = starpu_task_create(); task->cl = &dot_codelet; task->destroy = 1; task->handles[0] = _x_handles[block]; task->handles[1] = _y_handles[block]; task->handles[2] = _dot_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_ASSERT(!ret); } for (block = 0; block < _nblocks; block++) { starpu_data_unregister(_x_handles[block]); starpu_data_unregister(_y_handles[block]); } starpu_data_unregister(_dot_handle); FPRINTF(stderr, "Reference : %e vs. %e (Delta %e)\n", reference_dot, _dot, reference_dot - _dot); #ifdef STARPU_USE_CUDA if (cublas_version >= 7050) starpu_cublas_shutdown(); #endif #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&_opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); starpu_free_noflag(_x, size*sizeof(float)); starpu_free_noflag(_y, size*sizeof(float)); free(_x_handles); free(_y_handles); if (fabs(reference_dot - _dot) < reference_dot * 1e-6) return EXIT_SUCCESS; else { FPRINTF(stderr, "ERROR: fabs(%e - %e) >= %e * 1e-6\n", reference_dot, _dot, reference_dot); return EXIT_FAILURE; } enodev: starpu_shutdown(); FPRINTF(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return 77; } starpu-1.4.9+dfsg/examples/reductions/dot_product.h000066400000000000000000000014261507764646700225070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef DOT_PRODUCT_H__ #define DOT_PRODUCT_H__ #define DOT_TYPE double #endif /* DOT_PRODUCT_H__ */ starpu-1.4.9+dfsg/examples/reductions/dot_product_kernels.cu000066400000000000000000000023541507764646700244130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Trivial dot reduction CUDA kernel */ #include #define DOT_TYPE double static __global__ void cuda_redux(DOT_TYPE *dota, DOT_TYPE *dotb) { *dota = *dota + *dotb; return; } extern "C" void redux_cuda_func(void *descr[], void *_args) { (void)_args; DOT_TYPE *dota = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); DOT_TYPE *dotb = (DOT_TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); cuda_redux<<<1,1, 0, starpu_cuda_get_local_stream()>>>(dota, dotb); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/reductions/dot_product_opencl_kernels.cl000066400000000000000000000022321507764646700257350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Trivial dot reduction OpenCL kernel */ #include "dot_product.h" #pragma OPENCL EXTENSION cl_khr_fp64 : enable __kernel void _redux_opencl(__global DOT_TYPE *dota, __global DOT_TYPE *dotb) { *dota += *dotb; } __kernel void _dot_opencl(__global float *x, __global float *y, __global DOT_TYPE *dot, unsigned n) { /* FIXME: real parallel implementation */ unsigned i; __local double tmp; tmp = 0.0; for (i = 0; i < n ; i++) tmp += x[i]*y[i]; *dot += tmp; } starpu-1.4.9+dfsg/examples/reductions/minmax_reduction.c000066400000000000000000000132201507764646700235140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This computes the minimum and maximum values of a big vector, using data * reduction to optimize the computation. */ #include #include #include #include #ifdef STARPU_QUICK_CHECK static unsigned _nblocks = 512; static unsigned _entries_per_bock = 64; #else static unsigned _nblocks = 8192; static unsigned _entries_per_bock = 1024; #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define TYPE double #define TYPE_MAX DBL_MAX #define TYPE_MIN DBL_MIN static TYPE *_x; static starpu_data_handle_t *_x_handles; /* The first element (resp. second) stores the min element (resp. max). */ static TYPE _minmax[2]; static starpu_data_handle_t _minmax_handle; /* * Codelet to create a neutral element */ void minmax_neutral_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *array = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); /* Initialize current min to the greatest possible value. */ array[0] = TYPE_MAX; /* Initialize current max to the smallest possible value. */ array[1] = TYPE_MIN; } static struct starpu_codelet minmax_init_codelet = { .cpu_funcs = {minmax_neutral_cpu_func}, .cpu_funcs_name = {"minmax_neutral_cpu_func"}, .modes = {STARPU_W}, .nbuffers = 1, .name = "init" }; /* * Codelet to perform the reduction of two elements */ void minmax_redux_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; TYPE *array_dst = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]); TYPE *array_src = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); /* Compute the min value */ TYPE min_dst = array_dst[0]; TYPE min_src = array_src[0]; array_dst[0] = STARPU_MIN(min_dst, min_src); /* Compute the max value */ TYPE max_dst = array_dst[1]; TYPE max_src = array_src[1]; array_dst[1] = STARPU_MAX(max_dst, max_src); } static struct starpu_codelet minmax_redux_codelet = { .cpu_funcs = {minmax_redux_cpu_func}, .cpu_funcs_name = {"minmax_redux_cpu_func"}, .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .nbuffers = 2, .name = "redux" }; /* * Compute max/min within a vector and update the min/max value */ void minmax_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; /* The array containing the values */ TYPE *local_array = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); TYPE *minmax = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]); TYPE local_min = minmax[0]; TYPE local_max = minmax[1]; /* Compute the min and the max elements in the array */ unsigned i; for (i = 0; i < n; i++) { TYPE val = local_array[i]; local_min = STARPU_MIN(local_min, val); local_max = STARPU_MAX(local_max, val); } minmax[0] = local_min; minmax[1] = local_max; } static struct starpu_codelet minmax_codelet = { .cpu_funcs = {minmax_cpu_func}, .cpu_funcs_name = {"minmax_cpu_func"}, .nbuffers = 2, .modes = {STARPU_R, STARPU_REDUX}, .name = "minmax" }; /* * Tasks initialization */ int main(void) { unsigned long i; int ret; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return 77; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned long nelems = _nblocks*_entries_per_bock; size_t size = nelems*sizeof(TYPE); starpu_malloc((void **)&_x, size*sizeof(TYPE)); _x_handles = (starpu_data_handle_t *) malloc(_nblocks * sizeof(starpu_data_handle_t)); assert(_x && _x_handles); /* Initialize the vector with random values */ starpu_srand48(0); for (i = 0; i < nelems; i++) _x[i] = (TYPE)starpu_drand48(); unsigned block; for (block = 0; block < _nblocks; block++) { uintptr_t block_start = (uintptr_t)&_x[_entries_per_bock*block]; starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM, block_start, _entries_per_bock, sizeof(TYPE)); } /* Initialize current min */ _minmax[0] = TYPE_MAX; /* Initialize current max */ _minmax[1] = TYPE_MIN; starpu_variable_data_register(&_minmax_handle, STARPU_MAIN_RAM, (uintptr_t)_minmax, 2*sizeof(TYPE)); /* Set the methods to define neutral elements and to perform the reduction operation */ starpu_data_set_reduction_methods(_minmax_handle, &minmax_redux_codelet, &minmax_init_codelet); for (block = 0; block < _nblocks; block++) { struct starpu_task *task = starpu_task_create(); task->cl = &minmax_codelet; task->handles[0] = _x_handles[block]; task->handles[1] = _minmax_handle; ret = starpu_task_submit(task); if (ret) { STARPU_ASSERT(ret == -ENODEV); FPRINTF(stderr, "This test can only run on CPUs, but there are no CPU workers (this is not a bug).\n"); return 77; } } for (block = 0; block < _nblocks; block++) { starpu_data_unregister(_x_handles[block]); } starpu_data_unregister(_minmax_handle); FPRINTF(stderr, "Min : %e\n", _minmax[0]); FPRINTF(stderr, "Max : %e\n", _minmax[1]); STARPU_ASSERT(_minmax[0] <= _minmax[1]); starpu_free_noflag(_x, size*sizeof(TYPE)); free(_x_handles); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/sched_ctx/000077500000000000000000000000001507764646700175725ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/sched_ctx/axpy_partition_gpu.cu000066400000000000000000000045761507764646700240640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This creates two dumb vectors, splits them into chunks, and for each pair of * chunk, run axpy on them. */ #include #include "axpy_partition_gpu.h" #include //This code demonstrates how to transform a kernel to execute on a given set of GPU SMs. // Original kernel __global__ void saxpy(int n, float a, float *x, float *y) { int i = blockIdx.x*blockDim.x + threadIdx.x; if (i>>(__P_HKARGS,n,a,x,y); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/sched_ctx/axpy_partition_gpu.h000066400000000000000000000105711507764646700236740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This creates two dumb vectors, splits them into chunks, and for each pair of * chunk, run axpy on them. */ #pragma once __device__ static uint get_smid(void) { #if defined(__CUDACC__) uint ret; asm("mov.u32 %0, %smid;" : "=r"(ret)); return ret; #else return 0; #endif } #define __P_HKARGS dimGrid, active_blocks ,occupancy, block_assignment_d, mapping_start #define __P_KARGS dim3 blocks, int active_blocks, int occupancy, unsigned int* block_assignment, int mapping_start #define __P_DARGS blocks,blockid #define __P_BEGIN \ __shared__ unsigned int block_start; \ int smid = get_smid(); \ if(threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) \ { \ block_start = atomicDec(&block_assignment[smid],0xDEADBEEF); \ } \ __syncthreads(); \ \ if(block_start > active_blocks) \ { \ return; \ } #define __P_LOOPXY \ dim3 blockid; \ blockid.z = 0; \ \ int gridDim_sum = blocks.x*blocks.y; \ int startBlock = block_start + (smid - mapping_start) * occupancy; \ int blockid_sum; \ for(blockid_sum = startBlock; blockid_sum < gridDim_sum; blockid_sum +=active_blocks) \ { \ blockid.x = blockid_sum % blocks.x; \ blockid.y = blockid_sum / blocks.x; #define __P_LOOPEND } // Needed if shared memory is used #define __P_LOOPEND_SAFE __syncthreads(); } #define __P_LOOPX \ dim3 blockid; \ blockid.z = 0; \ blockid.y = 0; \ int gridDim_sum = blocks.x; \ int startBlock = (smid-mapping_start) + block_start*(active_blocks/occupancy); \ int blockid_sum; \ for(blockid_sum = startBlock; blockid_sum < gridDim_sum; blockid_sum +=active_blocks) \ { \ blockid.x = blockid_sum; // int startBlock = block_start + (smid - mapping_start) * occupancy; \ //////////// HOST side functions template static void buildPartitionedBlockMapping(F cudaFun, int threads, int shmem, int mapping_start, int allocation, int &width, int &active_blocks, unsigned int *block_assignment_d,cudaStream_t current_stream = #ifdef cudaStreamPerThread cudaStreamPerThread #else NULL #endif ) { int occupancy; int nb_SM = 13; //TODO: replace with call int mapping_end = mapping_start + allocation - 1; // exclusive unsigned int block_assignment[15]; #if CUDART_VERSION >= 6050 cudaOccupancyMaxActiveBlocksPerMultiprocessor(&occupancy,cudaFun,threads,shmem); #else occupancy = 4; #endif width = occupancy * nb_SM; // Physical wrapper grid size. Fits GPU exactly active_blocks = occupancy*allocation; // The total number of blocks doing work int i; for(i = 0; i < mapping_start; i++) block_assignment[i] = (unsigned) -1; for(i = mapping_start; i <= mapping_end; i++) { block_assignment[i] = occupancy - 1; } for(i = mapping_end+1; i < nb_SM; i++) block_assignment[i] = (unsigned) -1; cudaMemcpyAsync((void*)block_assignment_d,block_assignment,sizeof(block_assignment),cudaMemcpyHostToDevice, current_stream); //cudaMemcpy((void*)block_assignment_d,block_assignment,sizeof(block_assignment),cudaMemcpyHostToDevice); //cudaDeviceSynchronize(); } #define __P_HOSTSETUP(KERNEL,GRIDDIM,BLOCKSIZE,SHMEMSIZE,MAPPING_START,MAPPING_END,STREAM) \ unsigned int* block_assignment_d; cudaMalloc((void**) &block_assignment_d,15*sizeof(unsigned int)); \ int width = 0; \ int active_blocks = 0; \ buildPartitionedBlockMapping(KERNEL,BLOCKSIZE,SHMEMSIZE,(MAPPING_START),(MAPPING_END)-(MAPPING_START), \ width, active_blocks, block_assignment_d,STREAM); \ int occupancy = active_blocks/((MAPPING_END)-(MAPPING_START)); \ dim3 dimGrid = (GRIDDIM); \ int mapping_start = (MAPPING_START); starpu-1.4.9+dfsg/examples/sched_ctx/dummy_sched_with_ctx.c000066400000000000000000000126471507764646700241620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is an example of an application-defined scheduler run inside a * scheduling context. * This is a mere eager scheduler with a centralized list of tasks to schedule: * when a task becomes ready (push) it is put on the list. When a device * becomes ready (pop), a task is taken from the list. */ #include #include #ifdef STARPU_QUICK_CHECK #define NTASKS 320 #elif !defined(STARPU_LONG_CHECK) #define NTASKS 3200 #else #define NTASKS 32000 #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) struct dummy_sched_data { struct starpu_task_list sched_list; starpu_pthread_mutex_t policy_mutex; }; static void init_dummy_sched(unsigned sched_ctx_id) { struct dummy_sched_data *data = (struct dummy_sched_data*)malloc(sizeof(struct dummy_sched_data)); /* Create a linked-list of tasks and a condition variable to protect it */ starpu_task_list_init(&data->sched_list); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); FPRINTF(stderr, "Initialising Dummy scheduler\n"); } static void deinit_dummy_sched(unsigned sched_ctx_id) { struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_ASSERT(starpu_task_list_empty(&data->sched_list)); STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); free(data); FPRINTF(stderr, "Destroying Dummy scheduler\n"); } static int push_task_dummy(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); /* NB: In this simplistic strategy, we assume that the context in which we push task has at least one worker*/ /* lock all workers when pushing tasks on a list where all of them would pop for tasks */ STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_task_list_push_front(&data->sched_list, task); starpu_push_task_end(task); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); /*if there are no tasks block */ /* wake people waiting for a task */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker; worker = workers->get_next(workers, &it); starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond); STARPU_PTHREAD_MUTEX_LOCK(sched_mutex); STARPU_PTHREAD_COND_SIGNAL(sched_cond); STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex); } return 0; } /* The mutex associated to the calling worker is already taken by StarPU */ static struct starpu_task *pop_task_dummy(unsigned sched_ctx_id) { /* NB: In this simplistic strategy, we assume that all workers are able * to execute all tasks, otherwise, it would have been necessary to go * through the entire list until we find a task that is executable from * the calling worker. So we just take the head of the list and give it * to the worker. */ struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); #ifdef STARPU_NON_BLOCKING_DRIVERS if (starpu_task_list_empty(&data->sched_list)) return NULL; #endif STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); struct starpu_task *task = NULL; if (!starpu_task_list_empty(&data->sched_list)) task = starpu_task_list_pop_back(&data->sched_list); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); return task; } static struct starpu_sched_policy dummy_sched_policy = { .init_sched = init_dummy_sched, .add_workers = NULL, .remove_workers = NULL, .deinit_sched = deinit_dummy_sched, .push_task = push_task_dummy, .pop_task = pop_task_dummy, .post_exec_hook = NULL, .policy_name = "dummy", .policy_description = "dummy scheduling strategy", .worker_type = STARPU_WORKER_LIST, }; int main(void) { int ntasks = NTASKS; int ret; /* struct starpu_conf conf; */ /* starpu_conf_init(&conf); */ /* conf.sched_policy = &dummy_sched_policy, */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned sched_ctx = starpu_sched_ctx_create(NULL, -1, "dummy", STARPU_SCHED_CTX_POLICY_STRUCT, &dummy_sched_policy, 0); #ifdef STARPU_QUICK_CHECK ntasks /= 100; #endif starpu_sched_ctx_set_context(&sched_ctx); int i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/sched_ctx/gpu_partition.c000066400000000000000000000143011507764646700226210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This creates two dumb vectors & run axpy on them. */ #include #include #include #include #include #include #define N 512*512 #define NITER 100 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define EPSILON 1e-6 float *_vec_x[NITER], *_vec_y[NITER]; float _alpha = 3.41; /* descriptors for StarPU */ starpu_data_handle_t _handle_y[NITER], _handle_x[NITER]; void axpy_cpu(void *descr[], void *arg) { float alpha = *((float *)arg); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); float *block_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); float *block_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); unsigned i; for(i = 0; i < n; i++) block_y[i] = alpha * block_x[i] + block_y[i]; } #ifdef STARPU_USE_CUDA extern void cuda_axpy(void *descr[], void *_args); #endif static struct starpu_perfmodel axpy_model = { .type = STARPU_HISTORY_BASED, .symbol = "axpy" }; static struct starpu_codelet axpy_cl = { /* .cpu_funcs = {axpy_cpu}, */ /* .cpu_funcs_name = {"axpy_cpu"}, */ #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_axpy}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .name = "axpy", .model = &axpy_model }; static int check(int niter) { int i; for (i = 0; i < N; i++) { float expected_value = _alpha * _vec_x[niter][i] + 4.0; if (fabs(_vec_y[niter][i] - expected_value) > expected_value * EPSILON) { FPRINTF(stderr,"[error for iter %d, indice %d], obtained value %f NOT expected value %f (%f*%f+%f)\n", niter, i, _vec_y[niter][i], expected_value, _alpha, _vec_x[niter][i], 4.0); return EXIT_FAILURE; } } return EXIT_SUCCESS; } int main(void) { int ret, exit_value = 0; int iter; #ifdef STARPU_USE_CUDA int ncuda = 0; int gpu_devid = -1; #endif #ifdef STARPU_DEVEL #warning temporary fix: skip test as cuda computation fails #endif return 77; #ifndef STARPU_HAVE_SETENV return 77; #else /* Have separate threads for streams */ setenv("STARPU_CUDA_THREAD_PER_WORKER", "1", 1); setenv("STARPU_NWORKER_PER_CUDA", "2", 1); setenv("STARPU_NCUDA", "1", 1); #endif /* Initialize StarPU */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_CUDA ncuda = starpu_worker_get_devids(STARPU_CUDA_WORKER, &gpu_devid, 1); FPRINTF(stderr, "gpu_devid found %d \n", gpu_devid); #endif if (ncuda == 0) { starpu_shutdown(); return 77; } for(iter = 0; iter < NITER; iter++) { /* This is equivalent to vec_a = malloc(N*sizeof(float)); vec_b = malloc(N*sizeof(float)); */ starpu_malloc((void **)&_vec_x[iter], N*sizeof(float)); assert(_vec_x[iter]); starpu_malloc((void **)&_vec_y[iter], N*sizeof(float)); assert(_vec_y[iter]); unsigned i; for (i = 0; i < N; i++) { _vec_x[iter][i] = 1.0f; /*(float)starpu_drand48(); */ _vec_y[iter][i] = 4.0f; /*(float)starpu_drand48(); */ } /* Declare the data to StarPU */ starpu_vector_data_register(&_handle_x[iter], STARPU_MAIN_RAM, (uintptr_t)_vec_x[iter], N, sizeof(float)); starpu_vector_data_register(&_handle_y[iter], STARPU_MAIN_RAM, (uintptr_t)_vec_y[iter], N, sizeof(float)); } double start; double end; #ifdef STARPU_USE_CUDA unsigned nworkers = starpu_worker_get_count(); int stream_workerids[nworkers]; int nstreams = starpu_worker_get_stream_workerids(gpu_devid, stream_workerids, STARPU_CUDA_WORKER); int s; for(s = 0; s < nstreams; s++) FPRINTF(stderr, "stream w %d \n", stream_workerids[s]); int ncpus = starpu_cpu_worker_get_count(); int workers[ncpus+nstreams]; starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, ncpus); unsigned sched_ctxs[nstreams]; int nsms[nstreams]; nsms[0] = 6; nsms[1] = 7; for(s = 0; s < nstreams; s++) { sched_ctxs[s] = starpu_sched_ctx_create(&stream_workerids[s], 1, "subctx", STARPU_SCHED_CTX_CUDA_NSMS, nsms[s], 0); workers[ncpus+s] = stream_workerids[s]; } unsigned sched_ctx1 = starpu_sched_ctx_create(workers, ncpus+nstreams, "ctx1", STARPU_SCHED_CTX_SUB_CTXS, sched_ctxs, nstreams, STARPU_SCHED_CTX_POLICY_NAME, "dmdas", 0); FPRINTF(stderr, "parent ctx %u\n", sched_ctx1); starpu_sched_ctx_set_context(&sched_ctx1); #endif start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { struct starpu_task *task = starpu_task_create(); task->cl = &axpy_cl; task->cl_arg = &_alpha; task->cl_arg_size = sizeof(_alpha); task->handles[0] = _handle_x[iter]; task->handles[1] = _handle_y[iter]; ret = starpu_task_submit(task); if (ret == -ENODEV) { exit_value = 77; goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); enodev: for(iter = 0; iter < NITER; iter++) { starpu_data_unregister(_handle_x[iter]); starpu_data_unregister(_handle_y[iter]); } end = starpu_timing_now(); double timing = end - start; FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(float)/timing); // FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[iter][0], _alpha); if (exit_value != 77) { for(iter = 0; iter < NITER; iter++) { exit_value = check(iter); if(exit_value != EXIT_SUCCESS) break; } } for(iter = 0; iter < NITER; iter++) { starpu_free_noflag((void *)_vec_x[iter], N*sizeof(float)); starpu_free_noflag((void *)_vec_y[iter], N*sizeof(float)); } /* Stop StarPU */ starpu_shutdown(); return exit_value; } starpu-1.4.9+dfsg/examples/sched_ctx/nested_sched_ctxs.c000066400000000000000000000135751507764646700234420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef STARPU_QUICK_CHECK #define NTASKS 64 #else #define NTASKS 100 #endif int tasks_executed[2]; int parallel_code(int sched_ctx) { int i; int t = 0; int *cpuids = NULL; int ncpuids = 0; starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); // printf("execute task of %d threads \n", ncpuids); #pragma omp parallel num_threads(ncpuids) { starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); #ifdef __linux__ // printf("cpu = %d ctx%d nth = %d\n", sched_getcpu(), sched_ctx, omp_get_num_threads()); #endif #pragma omp for for(i = 0; i < NTASKS; i++) t++; } free(cpuids); return t; } static void sched_ctx_func(void *descr[], void *arg) { (void)descr; unsigned sched_ctx = (uintptr_t)arg; int t = parallel_code(sched_ctx); if (sched_ctx > 0 && sched_ctx < 3) { STARPU_ATOMIC_ADD(&tasks_executed[sched_ctx-1], t); } //printf("w %d executed %d it \n", w, n); } static struct starpu_codelet sched_ctx_codelet = { .cpu_funcs = {sched_ctx_func}, .model = NULL, .nbuffers = 0, .name = "sched_ctx" }; int main(void) { tasks_executed[0] = 0; tasks_executed[1] = 0; int ntasks = NTASKS; int ret, j, k; unsigned ncpus = 0; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int nprocs1 = 1; int nprocs2 = 1; int *procs1, *procs2; #ifdef STARPU_USE_CPU ncpus = starpu_cpu_worker_get_count(); procs1 = (int*)malloc(ncpus*sizeof(int)); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus); if (ncpus > 1) { nprocs1 = ncpus/2; nprocs2 = nprocs1; k = 0; procs2 = (int*)malloc(nprocs2*sizeof(int)); for(j = nprocs1; j < nprocs1+nprocs2; j++) procs2[k++] = procs1[j]; } else { procs2 = (int*)malloc(nprocs2*sizeof(int)); procs2[0] = procs1[0]; } #endif if (ncpus == 0) { #ifdef STARPU_USE_CPU free(procs1); free(procs2); #endif starpu_shutdown(); return 77; } /*create contexts however you want*/ unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); /*indicate what to do with the resources when context 2 finishes (it depends on your application)*/ starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); int nprocs3 = nprocs1/2; int nprocs4 = nprocs3; int nprocs5 = nprocs2/2; int nprocs6 = nprocs5; int *procs3 = NULL; int *procs4 = NULL; int *procs5 = NULL; int *procs6 = NULL; if (nprocs3) procs3 = malloc(nprocs3 * sizeof(*procs3)); if (nprocs4) procs4 = malloc(nprocs4 * sizeof(*procs4)); if (nprocs5) procs5 = malloc(nprocs5 * sizeof(*procs5)); if (nprocs6) procs6 = malloc(nprocs6 * sizeof(*procs6)); k = 0; for(j = 0; j < nprocs3; j++) procs3[k++] = procs1[j]; k = 0; for(j = nprocs3; j < nprocs3+nprocs4; j++) procs4[k++] = procs1[j]; k = 0; for(j = 0; j < nprocs5; j++) procs5[k++] = procs2[j]; k = 0; for(j = nprocs5; j < nprocs5+nprocs6; j++) procs6[k++] = procs2[j]; int sched_ctx3 = -1; int sched_ctx4 = -1; int sched_ctx5 = -1; int sched_ctx6 = -1; if (nprocs3) sched_ctx3 = starpu_sched_ctx_create(procs3, nprocs3, "ctx3", STARPU_SCHED_CTX_NESTED, sched_ctx1, 0); if (nprocs4) sched_ctx4 = starpu_sched_ctx_create(procs4, nprocs4, "ctx4", STARPU_SCHED_CTX_NESTED, sched_ctx1, 0); if (nprocs5) sched_ctx5 = starpu_sched_ctx_create(procs5, nprocs5, "ctx5", STARPU_SCHED_CTX_NESTED, sched_ctx2, 0); if (nprocs6) sched_ctx6 = starpu_sched_ctx_create(procs6, nprocs6, "ctx6", STARPU_SCHED_CTX_NESTED, sched_ctx2, 0); int i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet; task->cl_arg = (void*)(uintptr_t) sched_ctx1; task->possibly_parallel = 1; /*submit tasks to context*/ ret = starpu_task_submit_to_ctx(task,sched_ctx1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet; task->cl_arg = (void*)(uintptr_t) sched_ctx2; task->possibly_parallel = 1; /*submit tasks to context*/ ret = starpu_task_submit_to_ctx(task,sched_ctx2); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* tell starpu when you finished submitting tasks to this context in order to allow moving resources from this context to the inheritor one when its corresponding tasks finished executing */ /* wait for all tasks at the end*/ starpu_task_wait_for_all(); if (nprocs3) { starpu_sched_ctx_delete(sched_ctx3); free(procs3); } if (nprocs4) { starpu_sched_ctx_delete(sched_ctx4); free(procs4); } if (nprocs5) { starpu_sched_ctx_delete(sched_ctx5); free(procs5); } if (nprocs6) { starpu_sched_ctx_delete(sched_ctx6); free(procs6); } starpu_sched_ctx_delete(sched_ctx1); starpu_sched_ctx_delete(sched_ctx2); printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS*NTASKS); printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS*NTASKS); #ifdef STARPU_USE_CPU free(procs1); free(procs2); #endif starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/sched_ctx/parallel_code.c000066400000000000000000000054071507764646700225320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef STARPU_USE_CPU #include #ifdef STARPU_QUICK_CHECK #define NTASKS 4 #else #define NTASKS 10 #endif int parallel_code(unsigned *sched_ctx) { int i; int t = 0; int *cpuids = NULL; int ncpuids = 0; starpu_sched_ctx_get_available_cpuids(*sched_ctx, &cpuids, &ncpuids); /* printf("execute task of %d threads \n", ncpuids); */ omp_set_num_threads(ncpuids); #pragma omp parallel { starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); #ifdef __linux__ /* printf("cpu = %d ctx%d nth = %d\n", sched_getcpu(), *sched_ctx, omp_get_num_threads()); */ #endif #pragma omp for for(i = 0; i < NTASKS; i++) { #pragma omp atomic t++; } } free(cpuids); return t; } void *th(void* p) { unsigned* sched_ctx = (unsigned*)p; void* ret; ret = starpu_sched_ctx_exec_parallel_code((void*)parallel_code, p, *sched_ctx); pthread_exit(ret); } int main(void) { int ret; void* tasks_executed; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int nprocs1; int *procs1; unsigned ncpus = starpu_cpu_worker_get_count(); procs1 = (int*)malloc(ncpus*sizeof(int)); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus); nprocs1 = ncpus; unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); /* This is the interesting part, we can launch a code to hijack the context and use its cores to do something else entirely thanks to this */ pthread_t mp; STARPU_PTHREAD_CREATE(&mp, NULL, th, &sched_ctx1); STARPU_PTHREAD_JOIN(mp, &tasks_executed); /* Finished, delete the context and print the amount of executed tasks */ starpu_sched_ctx_delete(sched_ctx1); printf("ctx%u: tasks starpu executed %ld out of %d\n", sched_ctx1, (intptr_t)tasks_executed, NTASKS); starpu_shutdown(); free(procs1); return 0; } #else /* STARPU_USE_CPU */ int main(int argc, char **argv) { /* starpu_sched_ctx_exec_parallel_code() requires a CPU worker has parallel region master */ return 77; /* STARPU_TEST_SKIPPED */ } #endif /* STARPU_USE_CPU */ starpu-1.4.9+dfsg/examples/sched_ctx/parallel_tasks_reuse_handle.c000066400000000000000000000135411507764646700254610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef STARPU_QUICK_CHECK #define NTASKS 64 #define SIZE 40 #define LOOPS 4 #else #define NTASKS 100 #define SIZE 400 #define LOOPS 10 #endif #define N_NESTED_CTXS 2 struct context { int ncpus; int *cpus; unsigned id; }; /* Helper for the task that will initiate everything */ void parallel_task_prologue_init_once_and_for_all(void * sched_ctx_) { fprintf(stderr, "%p: %s -->\n", (void*)pthread_self(), __func__); int sched_ctx = *(int *)sched_ctx_; int *cpuids = NULL; int ncpuids = 0; starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); #pragma omp parallel num_threads(ncpuids) { starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); } omp_set_num_threads(ncpuids); free(cpuids); fprintf(stderr, "%p: %s <--\n", (void*)pthread_self(), __func__); return; } void noop(void * buffers[], void * cl_arg) { (void)buffers; (void)cl_arg; } static struct starpu_codelet init_parallel_worker_cl= { .cpu_funcs = {noop}, .nbuffers = 0, .name = "init_parallel_worker" }; /* function called to initialize the parallel "workers" */ void parallel_task_init_one_context(unsigned * context_id) { struct starpu_task * t; int ret; t = starpu_task_build(&init_parallel_worker_cl, STARPU_SCHED_CTX, *context_id, 0); t->destroy = 1; t->prologue_callback_pop_func=parallel_task_prologue_init_once_and_for_all; if (t->prologue_callback_pop_arg_free) free(t->prologue_callback_pop_arg); t->prologue_callback_pop_arg=context_id; t->prologue_callback_pop_arg_free=0; ret = starpu_task_submit(t); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } struct context main_context; struct context *contexts; void parallel_task_init() { /* Context creation */ main_context.ncpus = starpu_cpu_worker_get_count(); main_context.cpus = (int *) malloc(main_context.ncpus*sizeof(int)); fprintf(stderr, "ncpus : %d \n",main_context.ncpus); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, main_context.cpus, main_context.ncpus); main_context.id = starpu_sched_ctx_create(main_context.cpus, main_context.ncpus,"main_ctx", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); /* Initialize nested contexts */ contexts = malloc(sizeof(struct context)*N_NESTED_CTXS); int cpus_per_context = main_context.ncpus/N_NESTED_CTXS; int i; for(i = 0; i < N_NESTED_CTXS; i++) { contexts[i].ncpus = cpus_per_context; if (i == N_NESTED_CTXS-1) contexts[i].ncpus += main_context.ncpus%N_NESTED_CTXS; contexts[i].cpus = main_context.cpus+i*cpus_per_context; } for(i = 0; i < N_NESTED_CTXS; i++) contexts[i].id = starpu_sched_ctx_create(contexts[i].cpus, contexts[i].ncpus,"nested_ctx", STARPU_SCHED_CTX_NESTED,main_context.id, 0); for (i = 0; i < N_NESTED_CTXS; i++) { parallel_task_init_one_context(&contexts[i].id); } starpu_task_wait_for_all(); starpu_sched_ctx_set_context(&main_context.id); } void parallel_task_deinit() { int i; for (i=0; idestroy = 1; t->possibly_parallel = 1; ret=starpu_task_submit(t); if (ret == -ENODEV) goto out; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } out: /* wait for all tasks at the end*/ starpu_task_wait_for_all(); starpu_data_unregister(handle1); starpu_data_unregister(handle2); parallel_task_deinit(); starpu_free_noflag(array1, SIZE*sizeof(double)); starpu_free_noflag(array2, SIZE*sizeof(double)); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/sched_ctx/prio.c000066400000000000000000000041061507764646700207100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) int main(void) { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned sched_ctx1 = starpu_sched_ctx_create(NULL, -1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, "prio", 0); FPRINTF(stderr, "min prio %d\n", starpu_sched_ctx_get_min_priority(sched_ctx1)); FPRINTF(stderr, "max prio %d\n", starpu_sched_ctx_get_max_priority(sched_ctx1)); unsigned sched_ctx2 = starpu_sched_ctx_create(NULL, -1, "ctx2", STARPU_SCHED_CTX_POLICY_NAME, "prio", STARPU_SCHED_CTX_POLICY_MIN_PRIO, -12, STARPU_SCHED_CTX_POLICY_MAX_PRIO, 32, 0); FPRINTF(stderr, "min prio %d\n", starpu_sched_ctx_get_min_priority(sched_ctx2)); FPRINTF(stderr, "max prio %d\n", starpu_sched_ctx_get_max_priority(sched_ctx2)); if (starpu_sched_ctx_get_min_priority(sched_ctx2) != -12) { FPRINTF(stderr, "Error with min priority: %d != %d\n", starpu_sched_ctx_get_min_priority(sched_ctx2), -12); ret = 1; } if (starpu_sched_ctx_get_max_priority(sched_ctx2) != 32) { FPRINTF(stderr, "Error with max priority: %d != %d\n", starpu_sched_ctx_get_max_priority(sched_ctx2), 32); ret = 1; } starpu_sched_ctx_delete(sched_ctx1); starpu_sched_ctx_delete(sched_ctx2); starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/examples/sched_ctx/sched_ctx.c000066400000000000000000000112341507764646700217030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef STARPU_HAVE_VALGRIND_H #include #endif #ifdef STARPU_QUICK_CHECK #define NTASKS 64 #else #define NTASKS 1000 #endif int tasks_executed = 0; int ctx1_tasks_executed = 0; int ctx2_tasks_executed = 0; int cpu_tasks_executed = 0; int gpu_tasks_executed = 0; static void sched_ctx_cpu_func(void *descr[], void *arg) { (void)descr; (void)arg; (void)STARPU_ATOMIC_ADD(&tasks_executed,1); (void)STARPU_ATOMIC_ADD(&ctx1_tasks_executed,1); (void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1); } static void sched_ctx2_cpu_func(void *descr[], void *arg) { (void)descr; (void)arg; (void)STARPU_ATOMIC_ADD(&tasks_executed,1); (void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1); (void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1); } static void sched_ctx2_cuda_func(void *descr[], void *arg) { (void)descr; (void)arg; (void)STARPU_ATOMIC_ADD(&tasks_executed,1); (void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1); (void)STARPU_ATOMIC_ADD(&gpu_tasks_executed,1); } static struct starpu_codelet sched_ctx_codelet1 = { .cpu_funcs = {sched_ctx_cpu_func}, .model = NULL, .nbuffers = 0, .name = "sched_ctx" }; static struct starpu_codelet sched_ctx_codelet2 = { .cpu_funcs = {sched_ctx2_cpu_func}, .cuda_funcs = {sched_ctx2_cuda_func}, .model = NULL, .nbuffers = 0, .name = "sched_ctx" }; int main(void) { int ntasks = NTASKS; int ret; int nprocs1 = 0; int nprocs2 = 0; int procs1[STARPU_NMAXWORKERS], procs2[STARPU_NMAXWORKERS]; char *sched = getenv("STARPU_SCHED"); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) ntasks = 8; #endif #ifdef STARPU_USE_CPU nprocs1 = starpu_cpu_worker_get_count(); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, nprocs1); #endif // if there is no cpu, skip if (nprocs1 == 0) goto enodev; #ifdef STARPU_USE_CUDA nprocs2 = starpu_cuda_worker_get_count(); starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, procs2, nprocs2); #endif if (nprocs2 == 0) { nprocs2 = 1; procs2[0] = procs1[0]; } /*create contexts however you want*/ unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, sched?sched:"eager", 0); unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", STARPU_SCHED_CTX_POLICY_NAME, sched?sched:"eager", 0); /*indicate what to do with the resources when context 2 finishes (it depends on your application)*/ starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); starpu_sched_ctx_display_workers(sched_ctx2, stderr); int i; for (i = 0; i < ntasks/2; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet1; task->cl_arg = NULL; /*submit tasks to context*/ ret = starpu_task_submit_to_ctx(task,sched_ctx1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* tell starpu when you finished submitting tasks to this context in order to allow moving resources from this context to the inheritor one when its corresponding tasks finished executing */ starpu_sched_ctx_finished_submit(sched_ctx1); for (i = 0; i < ntasks/2; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet2; task->cl_arg = NULL; ret = starpu_task_submit_to_ctx(task,sched_ctx2); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_sched_ctx_finished_submit(sched_ctx2); /* wait for all tasks at the end*/ starpu_task_wait_for_all(); starpu_sched_ctx_add_workers(procs1, nprocs1, sched_ctx2); starpu_sched_ctx_delete(sched_ctx1); starpu_sched_ctx_delete(sched_ctx2); printf("tasks executed %d out of %d\n", tasks_executed, ntasks); printf("tasks executed on ctx1: %d\n", ctx1_tasks_executed); printf("tasks executed on ctx2: %d\n", ctx2_tasks_executed); printf("tasks executed on CPU: %d\n", cpu_tasks_executed); printf("tasks executed on GPU: %d\n", gpu_tasks_executed); enodev: starpu_shutdown(); return nprocs1 == 0 ? 77 : 0; } starpu-1.4.9+dfsg/examples/sched_ctx/sched_ctx_delete.c000066400000000000000000000027461507764646700232350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include int main(void) { int ret; int nprocs = 0; int procs[STARPU_NMAXWORKERS]; unsigned sched_ctx1, sched_ctx2; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_worker_get_count_by_type(STARPU_CPU_WORKER) == 0) { // Needs at least 1 CPU worker starpu_shutdown(); return 77; } nprocs = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); sched_ctx1 = starpu_sched_ctx_create(procs, nprocs, "ctx1", 0); sched_ctx2 = starpu_sched_ctx_create(procs, nprocs, "ctx2", 0); starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); starpu_sched_ctx_delete(sched_ctx1); starpu_sched_ctx_delete(sched_ctx2); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/sched_ctx/sched_ctx_empty.c000066400000000000000000000034311507764646700231210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) static void cpu_func(void *descr[], void *arg) { (void)descr; (void)arg; FPRINTF(stdout, "Hello world\n"); } static struct starpu_codelet codelet = { .cpu_funcs = {cpu_func}, .nbuffers = 0, .name = "codelet" }; int main(void) { int ret; int nprocs = 0; int procs[STARPU_NMAXWORKERS]; unsigned sched_ctx_id; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); nprocs = starpu_cpu_worker_get_count(); // if there is no cpu, skip if (nprocs == 0) goto enodev; sched_ctx_id = starpu_sched_ctx_create(NULL, 0, "ctx", 0); starpu_sched_ctx_set_context(&sched_ctx_id); ret = starpu_task_insert(&codelet, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); starpu_sched_ctx_add_workers(procs, nprocs, sched_ctx_id); starpu_task_wait_for_all(); starpu_sched_ctx_delete(sched_ctx_id); enodev: starpu_shutdown(); return nprocs == 0 ? 77 : 0; } starpu-1.4.9+dfsg/examples/sched_ctx/sched_ctx_remove.c000066400000000000000000000113501507764646700232570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef STARPU_HAVE_VALGRIND_H #include #endif #ifdef STARPU_QUICK_CHECK #define NTASKS 64 #else #define NTASKS 1000 #endif int tasks_executed = 0; int ctx1_tasks_executed = 0; int ctx2_tasks_executed = 0; int cpu_tasks_executed = 0; int gpu_tasks_executed = 0; static void sched_ctx_cpu_func(void *descr[], void *arg) { (void)descr; (void)arg; (void)STARPU_ATOMIC_ADD(&tasks_executed,1); (void)STARPU_ATOMIC_ADD(&ctx1_tasks_executed,1); (void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1); } static void sched_ctx2_cpu_func(void *descr[], void *arg) { (void)descr; (void)arg; (void)STARPU_ATOMIC_ADD(&tasks_executed,1); (void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1); (void)STARPU_ATOMIC_ADD(&cpu_tasks_executed,1); } static void sched_ctx2_cuda_func(void *descr[], void *arg) { (void)descr; (void)arg; (void)STARPU_ATOMIC_ADD(&tasks_executed,1); (void)STARPU_ATOMIC_ADD(&ctx2_tasks_executed,1); (void)STARPU_ATOMIC_ADD(&gpu_tasks_executed,1); } static struct starpu_codelet sched_ctx_codelet1 = { .cpu_funcs = {sched_ctx_cpu_func}, .model = NULL, .nbuffers = 0, .name = "sched_ctx" }; static struct starpu_codelet sched_ctx_codelet2 = { .cpu_funcs = {sched_ctx2_cpu_func}, .cuda_funcs = {sched_ctx2_cuda_func}, .model = NULL, .nbuffers = 0, .name = "sched_ctx" }; int main(void) { int ntasks = NTASKS; int ret; int nprocs1 = 0; int nprocs2 = 0; int procs1[STARPU_NMAXWORKERS], procs2[STARPU_NMAXWORKERS]; char *sched = getenv("STARPU_SCHED"); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) ntasks = 8; #endif #ifdef STARPU_USE_CPU nprocs1 = starpu_cpu_worker_get_count(); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, nprocs1); #endif // if there is not enough cpu, skip if (nprocs1 <= 1) goto enodev; #ifdef STARPU_USE_CUDA nprocs2 = starpu_cuda_worker_get_count(); starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, procs2, nprocs2); #endif if (nprocs2 == 0) { nprocs2 = 1; procs2[0] = procs1[0]; } /*create contexts however you want*/ unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_POLICY_NAME, sched?sched:"eager", 0); unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", STARPU_SCHED_CTX_POLICY_NAME, sched?sched:"eager", 0); /*indicate what to do with the resources when context 2 finishes (it depends on your application)*/ starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); starpu_sched_ctx_display_workers(sched_ctx2, stderr); int i; for (i = 0; i < ntasks/2; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet1; task->cl_arg = NULL; /*submit tasks to context*/ ret = starpu_task_submit_to_ctx(task,sched_ctx1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* tell starpu when you finished submitting tasks to this context in order to allow moving resources from this context to the inheritor one when its corresponding tasks finished executing */ starpu_sched_ctx_finished_submit(sched_ctx1); starpu_sched_ctx_add_workers(procs1, nprocs1/2, sched_ctx2); starpu_sched_ctx_remove_workers(procs1, nprocs1/2, sched_ctx1); for (i = 0; i < ntasks/2; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet2; task->cl_arg = NULL; ret = starpu_task_submit_to_ctx(task,sched_ctx2); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_sched_ctx_finished_submit(sched_ctx2); /* wait for all tasks at the end*/ starpu_task_wait_for_all(); starpu_sched_ctx_delete(sched_ctx1); starpu_sched_ctx_delete(sched_ctx2); printf("tasks executed %d out of %d\n", tasks_executed, ntasks); printf("tasks executed on ctx1: %d\n", ctx1_tasks_executed); printf("tasks executed on ctx2: %d\n", ctx2_tasks_executed); printf("tasks executed on CPU: %d\n", cpu_tasks_executed); printf("tasks executed on GPU: %d\n", gpu_tasks_executed); enodev: starpu_shutdown(); return nprocs1 <= 1 ? 77 : 0; } starpu-1.4.9+dfsg/examples/sched_ctx/sched_ctx_without_sched_policy.c000066400000000000000000000104641507764646700262170ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifndef STARPU_QUICK_CHECK #define NTASKS 64 #else #define NTASKS 10 #endif int tasks_executed[2]; int parallel_code(int sched_ctx) { int i; int t = 0; int *cpuids = NULL; int ncpuids = 0; starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); // printf("execute task of %d threads \n", ncpuids); #pragma omp parallel num_threads(ncpuids) reduction(+:t) { starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); #ifdef __linux__ // printf("cpu = %d ctx%d nth = %d\n", sched_getcpu(), sched_ctx, omp_get_num_threads()); #endif #pragma omp for for(i = 0; i < NTASKS; i++) t++; } free(cpuids); return t; } static void sched_ctx_func(void *descr[], void *arg) { (void)descr; unsigned sched_ctx = (uintptr_t)arg; tasks_executed[sched_ctx-1] += parallel_code(sched_ctx); } static struct starpu_codelet sched_ctx_codelet = { .cpu_funcs = {sched_ctx_func}, .model = NULL, .nbuffers = 0, .name = "sched_ctx" }; int main(void) { tasks_executed[0] = 0; tasks_executed[1] = 0; int ntasks = NTASKS; int ret, j, k; unsigned ncpus = 0; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int nprocs1 = 1; int nprocs2 = 1; int *procs1, *procs2; #ifdef STARPU_USE_CUDA int ncuda = 0; int *procscuda; ncuda = starpu_cuda_worker_get_count(); procscuda = (int*)malloc(ncuda*sizeof(int)); starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, procscuda, ncuda); #endif #ifdef STARPU_USE_CPU ncpus = starpu_cpu_worker_get_count(); procs1 = (int*)malloc(ncpus*sizeof(int)); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus); if(ncpus > 1) { nprocs1 = ncpus/2; nprocs2 = ncpus-nprocs1; k = 0; procs2 = (int*)malloc(nprocs2*sizeof(int)); for(j = nprocs1; j < nprocs1+nprocs2; j++) procs2[k++] = procs1[j]; } else { procs2 = (int*)malloc(nprocs2*sizeof(int)); procs2[0] = procs1[0]; } #endif if (ncpus == 0) goto enodev; #ifdef STARPU_USE_CUDA if (ncuda > 0 && nprocs1 > 1) { procs1[nprocs1-1] = procscuda[0]; } #endif /*create contexts however you want*/ unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", 0); unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", 0); starpu_sched_ctx_display_workers(sched_ctx1, stderr); starpu_sched_ctx_display_workers(sched_ctx2, stderr); int i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet; task->cl_arg = (void*)(uintptr_t) sched_ctx1; /*submit tasks to context*/ ret = starpu_task_submit_to_ctx(task,sched_ctx1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet; task->cl_arg = (void*)(uintptr_t) sched_ctx2; /*submit tasks to context*/ ret = starpu_task_submit_to_ctx(task,sched_ctx2); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* tell starpu when you finished submitting tasks to this context in order to allow moving resources from this context to the inheritor one when its corresponding tasks finished executing */ /* wait for all tasks at the end*/ starpu_task_wait_for_all(); starpu_sched_ctx_delete(sched_ctx1); starpu_sched_ctx_delete(sched_ctx2); printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_executed[0], NTASKS*NTASKS); printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_executed[1], NTASKS*NTASKS); enodev: #ifdef STARPU_USE_CPU free(procs1); free(procs2); #endif starpu_shutdown(); return ncpus == 0 ? 77 : 0; } starpu-1.4.9+dfsg/examples/sched_ctx/sched_ctx_without_sched_policy_awake.c000066400000000000000000000076441507764646700273750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef STARPU_QUICK_CHECK #define NTASKS 64 #else #define NTASKS 100 #endif int tasks_executed[2][STARPU_NMAXWORKERS]; int parallel_code(int sched_ctx) { int i; int t = 0; int workerid = starpu_worker_get_id(); for(i = 0; i < NTASKS; i++) t++; tasks_executed[sched_ctx-1][workerid] = t; // printf("executed %d tasks on worker %d of sched_ctx %d \n", t, workerid, sched_ctx); return t; } static void sched_ctx_func(void *descr[], void *arg) { (void)descr; unsigned sched_ctx = (uintptr_t)arg; parallel_code(sched_ctx); } static struct starpu_codelet sched_ctx_codelet = { .cpu_funcs = {sched_ctx_func}, .model = NULL, .nbuffers = 0, .name = "sched_ctx" }; int main(void) { int i; for(i = 0; i < STARPU_NMAXWORKERS; i++) { tasks_executed[0][i] = 0; tasks_executed[1][i] = 0; } int ntasks = NTASKS; int ret, j, k; unsigned ncpus = 0; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int nprocs1 = 1; int nprocs2 = 1; int *procs1, *procs2; #ifdef STARPU_USE_CPU ncpus = starpu_cpu_worker_get_count(); procs1 = (int*)malloc(ncpus*sizeof(int)); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs1, ncpus); if(ncpus > 1) { nprocs1 = ncpus/2; nprocs2 = ncpus-nprocs1; k = 0; procs2 = (int*)malloc(nprocs2*sizeof(int)); for(j = nprocs1; j < nprocs1+nprocs2; j++) procs2[k++] = procs1[j]; } else { procs2 = (int*)malloc(nprocs2*sizeof(int)); procs2[0] = procs1[0]; } #endif if (ncpus == 0) goto enodev; /*create contexts however you want*/ unsigned sched_ctx1 = starpu_sched_ctx_create(procs1, nprocs1, "ctx1", STARPU_SCHED_CTX_AWAKE_WORKERS, 0); unsigned sched_ctx2 = starpu_sched_ctx_create(procs2, nprocs2, "ctx2", STARPU_SCHED_CTX_AWAKE_WORKERS, 0); for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet; task->cl_arg = (void*)(uintptr_t) sched_ctx1; /*submit tasks to context*/ ret = starpu_task_submit_to_ctx(task,sched_ctx1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &sched_ctx_codelet; task->cl_arg = (void*)(uintptr_t) sched_ctx2; /*submit tasks to context*/ ret = starpu_task_submit_to_ctx(task,sched_ctx2); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* tell starpu when you finished submitting tasks to this context in order to allow moving resources from this context to the inheritor one when its corresponding tasks finished executing */ /* wait for all tasks at the end*/ starpu_task_wait_for_all(); starpu_sched_ctx_delete(sched_ctx1); starpu_sched_ctx_delete(sched_ctx2); int tasks_per_ctx[2]; tasks_per_ctx[0] = 0; tasks_per_ctx[1] = 0; for(i = 0; i < STARPU_NMAXWORKERS; i++) { tasks_per_ctx[0] += tasks_executed[0][i]; tasks_per_ctx[1] += tasks_executed[1][i]; } printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx1, tasks_per_ctx[0]/nprocs1, NTASKS); printf("ctx%u: tasks starpu executed %d out of %d\n", sched_ctx2, tasks_per_ctx[1]/nprocs2, NTASKS); enodev: #ifdef STARPU_USE_CPU free(procs1); free(procs2); #endif starpu_shutdown(); return ncpus == 0 ? 77 : 0; } starpu-1.4.9+dfsg/examples/sched_ctx/two_cpu_contexts.c000066400000000000000000000067351507764646700233600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include /* This example case follows the same pattern its native Fortran version nf_sched_ctx.f90 */ static void sched_ctx_cpu_func(void *descr[], void *cl_args) { (void)descr; int task_id; starpu_codelet_unpack_args(cl_args, &task_id); printf("task: %d, workerid: %d\n", task_id, starpu_worker_get_id()); } static struct starpu_codelet sched_ctx_codelet = { .cpu_funcs = {sched_ctx_cpu_func}, .model = NULL, .nbuffers = 0, .name = "sched_ctx" }; int main(void) { int ncpu; int nprocs1; int nprocs2; int *procs = NULL; int *procs1 = NULL; int *procs2 = NULL; int i; int n = 20; int ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ncpu = starpu_cpu_worker_get_count(); /* actually we really need at least 2 CPU workers such to allocate 2 * non overlapping contexts */ if (ncpu < 2) { starpu_shutdown(); return 77; } procs = calloc(ncpu, sizeof(int)); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, ncpu); nprocs1 = ncpu / 2; procs1 = calloc(nprocs1, sizeof(int)); for (i=0; i unsigned size1; unsigned size2; unsigned nblocks1; unsigned nblocks2; unsigned cpu1; unsigned cpu2; unsigned gpu; unsigned gpu1; unsigned gpu2; struct params { unsigned id; unsigned ctx; int the_other_ctx; int *procs; int nprocs; void (*bench)(unsigned, unsigned); unsigned size; unsigned nblocks; }; struct retvals { double flops; double avg_timing; }; #define NSAMPLES 1 int first = 1; starpu_pthread_mutex_t mut; struct retvals rv[2]; struct params p1, p2; starpu_pthread_key_t key; void init() { size1 = 4*1024; size2 = 4*1024; nblocks1 = 16; nblocks2 = 16; cpu1 = 0; cpu2 = 0; gpu = 0; gpu1 = 0; gpu2 = 0; rv[0].flops = 0.0; rv[1].flops = 0.0; rv[1].avg_timing = 0.0; p1.ctx = 0; p2.ctx = 0; p1.id = 0; p2.id = 1; STARPU_PTHREAD_KEY_CREATE(&key, NULL); } void update_sched_ctx_timing_results(double flops, double avg_timing) { unsigned *id = STARPU_PTHREAD_GETSPECIFIC(key); rv[*id].flops += flops; rv[*id].avg_timing += avg_timing; } void* start_bench(void *val) { struct params *p = (struct params*)val; int i; STARPU_PTHREAD_SETSPECIFIC(key, &p->id); if(p->ctx != 0) starpu_sched_ctx_set_context(&p->ctx); for(i = 0; i < NSAMPLES; i++) p->bench(p->size, p->nblocks); if(p->ctx != 0) { STARPU_PTHREAD_MUTEX_LOCK(&mut); if(first) { starpu_sched_ctx_delete(p->ctx); } first = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&mut); } rv[p->id].flops /= NSAMPLES; rv[p->id].avg_timing /= NSAMPLES; return NULL; } void start_2benchs(void (*bench)(unsigned, unsigned)) { p1.bench = bench; p1.size = size1; printf("size %u\n", size1); p1.nblocks = nblocks1; p2.bench = bench; p2.size = size2; printf("size %u\n", size2); p2.nblocks = nblocks2; starpu_pthread_t tid[2]; STARPU_PTHREAD_MUTEX_INIT(&mut, NULL); double start; double end; start = starpu_timing_now(); STARPU_PTHREAD_CREATE(&tid[0], NULL, (void*)start_bench, (void*)&p1); STARPU_PTHREAD_CREATE(&tid[1], NULL, (void*)start_bench, (void*)&p2); STARPU_PTHREAD_JOIN(tid[0], NULL); STARPU_PTHREAD_JOIN(tid[1], NULL); end = starpu_timing_now(); STARPU_PTHREAD_MUTEX_DESTROY(&mut); double timing = end - start; timing /= 1000000; printf("%2.2f %2.2f ", rv[0].flops, rv[1].flops); printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, rv[1].avg_timing, timing); } void start_1stbench(void (*bench)(unsigned, unsigned)) { p1.bench = bench; p1.size = size1; p1.nblocks = nblocks1; double start; double end; start = starpu_timing_now(); start_bench((void*)&p1); end = starpu_timing_now(); STARPU_PTHREAD_MUTEX_DESTROY(&mut); double timing = end - start; timing /= 1000000; printf("%2.2f ", rv[0].flops); printf("%2.2f %2.2f\n", rv[0].avg_timing, timing); } void start_2ndbench(void (*bench)(unsigned, unsigned)) { p2.bench = bench; p2.size = size2; p2.nblocks = nblocks2; double start; double end; start = starpu_timing_now(); start_bench((void*)&p2); end = starpu_timing_now(); STARPU_PTHREAD_MUTEX_DESTROY(&mut); double timing = end - start; timing /= 1000000; printf("%2.2f ", rv[1].flops); printf("%2.2f %2.2f\n", rv[1].avg_timing, timing); } void construct_contexts() { unsigned nprocs1 = cpu1 + gpu + gpu1; unsigned nprocs2 = cpu2 + gpu + gpu2; unsigned n_all_gpus = gpu + gpu1 + gpu2; int procs[nprocs1]; unsigned i; int k = 0; for(i = 0; i < gpu; i++) { procs[k++] = i; printf("%u ", i); } for(i = gpu; i < gpu + gpu1; i++) { procs[k++] = i; printf("%u ", i); } for(i = n_all_gpus; i < n_all_gpus + cpu1; i++) { procs[k++] = i; printf("%u ", i); } printf("\n "); p1.ctx = starpu_sched_ctx_create(procs, nprocs1, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); p2.the_other_ctx = (int)p1.ctx; p1.procs = procs; p1.nprocs = nprocs1; int procs2[nprocs2]; k = 0; for(i = 0; i < gpu; i++) { procs2[k++] = i; printf("%u ", i); } for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++) { procs2[k++] = i; printf("%u ", i); } for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++) { procs2[k++] = i; printf("%u ", i); } printf("\n"); p2.ctx = starpu_sched_ctx_create(procs2, nprocs2, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); p1.the_other_ctx = (int)p2.ctx; p2.procs = procs2; starpu_sched_ctx_set_inheritor(p1.ctx, p2.ctx); starpu_sched_ctx_set_inheritor(p2.ctx, p1.ctx); p2.nprocs = nprocs2; } void parse_args_ctx(int argc, char **argv) { init(); int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-size1") == 0) { char *argptr; size1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks1") == 0) { char *argptr; nblocks1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-size2") == 0) { char *argptr; size2 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks2") == 0) { char *argptr; nblocks2 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-cpu1") == 0) { char *argptr; cpu1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-cpu2") == 0) { char *argptr; cpu2 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-gpu") == 0) { char *argptr; gpu = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-gpu1") == 0) { char *argptr; gpu1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-gpu2") == 0) { char *argptr; gpu2 = strtol(argv[++i], &argptr, 10); } } } starpu-1.4.9+dfsg/examples/sched_ctx_utils/sched_ctx_utils.h000066400000000000000000000021271507764646700243510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include void parse_args_ctx(int argc, char **argv); void update_sched_ctx_timing_results(double gflops, double timing); void construct_contexts(); void start_2benchs(void (*bench)(unsigned size, unsigned nblocks)); void start_1stbench(void (*bench)(unsigned size, unsigned nblocks)); void start_2ndbench(void (*bench)(unsigned size, unsigned nblocks)); starpu-1.4.9+dfsg/examples/scheduler/000077500000000000000000000000001507764646700176045ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/scheduler/dummy_modular_sched.c000066400000000000000000000155111507764646700237770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is an example of an application-defined scheduler. * This is a mere eager scheduler with a centralized list of tasks to schedule: * when a task becomes ready (push) it is put on the list. When a device * becomes ready (pop), a task is taken from the list. */ #include #include #include #ifdef STARPU_QUICK_CHECK #define NTASKS 320 #elif !defined(STARPU_LONG_CHECK) #define NTASKS 3200 #else #define NTASKS 32000 #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) struct dummy_sched_params { int verbose; }; struct dummy_sched_data { int verbose; struct starpu_task_list sched_list; starpu_pthread_mutex_t policy_mutex; }; static void dummy_deinit_data(struct starpu_sched_component * component) { struct dummy_sched_data *data = component->data; STARPU_ASSERT(starpu_task_list_empty(&data->sched_list)); if (data->verbose) fprintf(stderr, "Destroying Dummy scheduler\n"); STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); free(data); } static int dummy_push_task(struct starpu_sched_component *component, struct starpu_task *task) { struct dummy_sched_data *data = component->data; if (data->verbose) fprintf(stderr, "pushing task %p\n", task); /* NB: In this simplistic strategy, we assume that the context in which we push task has at least one worker*/ /* lock all workers when pushing tasks on a list where all of them would pop for tasks */ STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_task_list_push_front(&data->sched_list, task); starpu_push_task_end(task); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); /* Tell below that they can now pull */ component->can_pull(component); return 0; } static struct starpu_task *dummy_pull_task(struct starpu_sched_component *component, struct starpu_sched_component *to) { struct dummy_sched_data *data = component->data; if (data->verbose) fprintf(stderr, "%p pulling for a task\n", to); #ifdef STARPU_NON_BLOCKING_DRIVERS if (starpu_task_list_empty(&data->sched_list)) return NULL; #endif STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); struct starpu_task *task = NULL; if (!starpu_task_list_empty(&data->sched_list)) task = starpu_task_list_pop_back(&data->sched_list); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); return task; } static int dummy_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to) { struct dummy_sched_data *data = component->data; int didwork = 0; if (data->verbose) fprintf(stderr, "%p tells me I can push to him\n", to); struct starpu_task *task; task = starpu_sched_component_pump_to(component, to, &didwork); if (task) { if (data->verbose) fprintf(stderr, "oops, %p couldn't take our task\n", to); /* Oops, we couldn't push everything, put back this task */ STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_task_list_push_back(&data->sched_list, task); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); } else { if (data->verbose) { if (didwork) fprintf(stderr, "pushed some tasks to %p\n", to); else fprintf(stderr, "I didn't have anything for %p\n", to); } } /* There is room now */ return didwork || starpu_sched_component_can_push(component, to); } static int dummy_can_pull(struct starpu_sched_component * component) { struct dummy_sched_data *data = component->data; if (data->verbose) fprintf(stderr,"telling below they can pull\n"); return starpu_sched_component_can_pull(component); } struct starpu_sched_component *dummy_create(struct starpu_sched_tree *tree, struct dummy_sched_params *params) { struct starpu_sched_component *component = starpu_sched_component_create(tree, "dummy"); struct dummy_sched_data *data = malloc(sizeof(*data)); STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); /* Create a linked-list of tasks and a condition variable to protect it */ starpu_task_list_init(&data->sched_list); data->verbose = params->verbose; component->data = data; component->push_task = dummy_push_task; component->pull_task = dummy_pull_task; component->can_push = dummy_can_push; component->can_pull = dummy_can_pull; component->deinit_data = dummy_deinit_data; return component; } static void init_dummy_sched(unsigned sched_ctx_id) { FPRINTF(stderr, "Initialising Dummy scheduler\n"); struct dummy_sched_params params = { .verbose = 0, }; starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) dummy_create, ¶ms, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO, sched_ctx_id); } static void deinit_dummy_sched(unsigned sched_ctx_id) { struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_sched_tree_destroy(t); } static struct starpu_sched_policy dummy_sched_policy = { .init_sched = init_dummy_sched, .deinit_sched = deinit_dummy_sched, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "dummy", .policy_description = "dummy modular scheduling strategy", .worker_type = STARPU_WORKER_LIST, }; int main(void) { int ntasks = NTASKS; int ret; struct starpu_conf conf; char *sched = getenv("STARPU_SCHED"); if (sched && sched[0]) /* Testing a specific scheduler, no need to run this */ return 77; starpu_conf_init(&conf); conf.sched_policy = &dummy_sched_policy, ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_QUICK_CHECK ntasks /= 100; #endif starpu_codelet_nop.model = &starpu_perfmodel_nop; int i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/scheduler/dummy_sched.c000066400000000000000000000121151507764646700222510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is an example of an application-defined scheduler. * This is a mere eager scheduler with a centralized list of tasks to schedule: * when a task becomes ready (push) it is put on the list. When a device * becomes ready (pop), a task is taken from the list. */ #include #include #ifdef STARPU_QUICK_CHECK #define NTASKS 320 #elif !defined(STARPU_LONG_CHECK) #define NTASKS 3200 #else #define NTASKS 32000 #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) struct dummy_sched_data { struct starpu_task_list sched_list; starpu_pthread_mutex_t policy_mutex; }; static void init_dummy_sched(unsigned sched_ctx_id) { struct dummy_sched_data *data = (struct dummy_sched_data*)malloc(sizeof(struct dummy_sched_data)); /* Create a linked-list of tasks and a condition variable to protect it */ starpu_task_list_init(&data->sched_list); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); FPRINTF(stderr, "Initialising Dummy scheduler\n"); } static void deinit_dummy_sched(unsigned sched_ctx_id) { struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_ASSERT(starpu_task_list_empty(&data->sched_list)); STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); free(data); FPRINTF(stderr, "Destroying Dummy scheduler\n"); } static int push_task_dummy(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); /* NB: In this simplistic strategy, we assume that the context in which we push task has at least one worker*/ /* lock all workers when pushing tasks on a list where all of them would pop for tasks */ STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_task_list_push_front(&data->sched_list, task); starpu_push_task_end(task); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); /*if there are no tasks block */ /* wake people waiting for a task */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); starpu_wake_worker_relax_light(worker); } return 0; } /* The mutex associated to the calling worker is already taken by StarPU */ static struct starpu_task *pop_task_dummy(unsigned sched_ctx_id) { /* NB: In this simplistic strategy, we assume that all workers are able * to execute all tasks, otherwise, it would have been necessary to go * through the entire list until we find a task that is executable from * the calling worker. So we just take the head of the list and give it * to the worker. */ struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); #ifdef STARPU_NON_BLOCKING_DRIVERS if (starpu_task_list_empty(&data->sched_list)) return NULL; #endif STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); struct starpu_task *task = NULL; if (!starpu_task_list_empty(&data->sched_list)) task = starpu_task_list_pop_back(&data->sched_list); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); return task; } static struct starpu_sched_policy dummy_sched_policy = { .init_sched = init_dummy_sched, .deinit_sched = deinit_dummy_sched, .push_task = push_task_dummy, .pop_task = pop_task_dummy, .policy_name = "dummy", .policy_description = "dummy scheduling strategy", .worker_type = STARPU_WORKER_LIST, }; int main(void) { int ntasks = NTASKS; int ret; struct starpu_conf conf; char *sched = getenv("STARPU_SCHED"); if (sched && sched[0]) /* Testing a specific scheduler, no need to run this */ return 77; starpu_conf_init(&conf); conf.sched_policy = &dummy_sched_policy, ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_QUICK_CHECK ntasks /= 100; #endif starpu_codelet_nop.model = &starpu_perfmodel_nop; int i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/scheduler/heteroprio_test.c000066400000000000000000000153471507764646700232010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is an example making use of the heteroprio scheduler, it shows how * priorities are taken into account. */ #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void initSchedulerCallback(unsigned sched_ctx) { // CPU uses 3 buckets #ifdef STARPU_USE_CPU if (starpu_cpu_worker_get_count()) { starpu_heteroprio_set_nb_prios(0, STARPU_CPU_WORKER, 3); // It uses direct mapping idx => idx unsigned idx; for(idx = 0; idx < 3; ++idx) { starpu_heteroprio_set_mapping(sched_ctx, STARPU_CPU_WORKER, idx, idx); starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_CPU_WORKER, idx); } } #endif #ifdef STARPU_USE_OPENCL // OpenCL is enabled and uses 2 buckets starpu_heteroprio_set_nb_prios(sched_ctx, STARPU_OPENCL_WORKER, 2); // OpenCL will first look to priority 2 int prio2 = starpu_cpu_worker_get_count() ? 2 : 1; starpu_heteroprio_set_mapping(sched_ctx, STARPU_OPENCL_WORKER, 0, prio2); // For this bucket OpenCL is the fastest starpu_heteroprio_set_faster_arch(sched_ctx, STARPU_OPENCL_WORKER, prio2); // And CPU is 4 times slower #ifdef STARPU_USE_CPU starpu_heteroprio_set_arch_slow_factor(sched_ctx, STARPU_CPU_WORKER, 2, 4.0f); #endif int prio1 = starpu_cpu_worker_get_count() ? 1 : 0; starpu_heteroprio_set_mapping(sched_ctx, STARPU_OPENCL_WORKER, 1, prio1); // We let the CPU as the fastest and tell that OpenCL is 1.7 times slower starpu_heteroprio_set_arch_slow_factor(sched_ctx, STARPU_OPENCL_WORKER, prio1, 1.7f); #endif } void callback_a_cpu(void *buffers[], void *cl_arg) { (void)buffers; (void)cl_arg; starpu_usleep(100000); FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); } void callback_b_cpu(void *buffers[], void *cl_arg) { (void)buffers; (void)cl_arg; starpu_usleep(100000); FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); } void callback_c_cpu(void *buffers[], void *cl_arg) { (void)buffers; (void)cl_arg; starpu_usleep(100000); FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); } #ifdef STARPU_USE_OPENCL void callback_a_opencl(void *buffers[], void *cl_arg) { (void)buffers; (void)cl_arg; starpu_usleep(100000); FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); } void callback_b_opencl(void *buffers[], void *cl_arg) { (void)buffers; (void)cl_arg; starpu_usleep(100000); FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); } void callback_c_opencl(void *buffers[], void *cl_arg) { (void)buffers; (void)cl_arg; starpu_usleep(100000); FPRINTF(stderr, "[COMMUTE_LOG] callback %s\n", __FUNCTION__); fflush(stderr); } #endif int main(void) { int ret; struct starpu_conf conf; int ncpus, nopencls; ret = starpu_conf_init(&conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init"); assert(ret == 0); conf.sched_policy_name = "heteroprio"; conf.sched_policy_callback = &initSchedulerCallback; ret = starpu_init(&conf); if (ret == -ENODEV) return 77; ncpus = starpu_cpu_worker_get_count(); nopencls = starpu_opencl_worker_get_count(); FPRINTF(stderr, "Worker = %u\n", starpu_worker_get_count()); FPRINTF(stderr, "Worker CPU = %d\n", ncpus); FPRINTF(stderr, "Worker OpenCL = %d\n", nopencls); if (ncpus + nopencls == 0) { FPRINTF(stderr, "Needs at least one CPU or OpenCL device\n"); starpu_shutdown(); return 77; } struct starpu_codelet codeleteA; { memset(&codeleteA, 0, sizeof(codeleteA)); codeleteA.nbuffers = 2; codeleteA.modes[0] = STARPU_RW; codeleteA.modes[1] = STARPU_RW; codeleteA.name = "codeleteA"; #ifdef STARPU_USE_CPU codeleteA.cpu_funcs[0] = callback_a_cpu; #endif #ifdef STARPU_USE_OPENCL codeleteA.opencl_funcs[0] = callback_a_opencl; #endif } struct starpu_codelet codeleteB; { memset(&codeleteB, 0, sizeof(codeleteB)); codeleteB.nbuffers = 2; codeleteB.modes[0] = STARPU_RW; codeleteB.modes[1] = STARPU_RW; codeleteB.name = "codeleteB"; codeleteB.cpu_funcs[0] = callback_b_cpu; #ifdef STARPU_USE_OPENCL codeleteB.opencl_funcs[0] = callback_b_opencl; #endif } struct starpu_codelet codeleteC; { memset(&codeleteC, 0, sizeof(codeleteC)); codeleteC.nbuffers = 2; codeleteC.modes[0] = STARPU_RW; codeleteC.modes[1] = STARPU_RW; codeleteC.name = "codeleteC"; codeleteC.cpu_funcs[0] = callback_c_cpu; #ifdef STARPU_USE_OPENCL codeleteC.opencl_funcs[0] = callback_c_opencl; #endif } const int nbHandles = 10; FPRINTF(stderr, "Nb handles = %d\n", nbHandles); starpu_data_handle_t handles[nbHandles]; memset(handles, 0, sizeof(handles[0])*nbHandles); int dataA[nbHandles]; int idx; for(idx = 0; idx < nbHandles; ++idx) { dataA[idx] = idx; } int idxHandle; for(idxHandle = 0; idxHandle < nbHandles; ++idxHandle) { starpu_variable_data_register(&handles[idxHandle], 0, (uintptr_t)&dataA[idxHandle], sizeof(dataA[idxHandle])); } const int nbTasks = 4; FPRINTF(stderr, "Submit %d tasks \n", nbTasks); int prio2 = starpu_cpu_worker_get_count() ? 2 : 1; int idxTask; for(idxTask = 0; idxTask < nbTasks; ++idxTask) { ret = starpu_task_insert(&codeleteA, STARPU_PRIORITY, 0, (STARPU_RW), handles[(idxTask*2)%nbHandles], (STARPU_RW), handles[(idxTask*3+1)%nbHandles], 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&codeleteB, STARPU_PRIORITY, 1, (STARPU_RW), handles[(idxTask*2 +1)%nbHandles], (STARPU_RW), handles[(idxTask*2)%nbHandles], 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&codeleteC, STARPU_PRIORITY, prio2, (STARPU_RW), handles[(idxTask)%nbHandles], (STARPU_RW), handles[(idxTask*idxTask)%nbHandles], 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } FPRINTF(stderr, "Wait task\n"); starpu_task_wait_for_all(); FPRINTF(stderr, "Release data\n"); for(idxHandle = 0 ; idxHandle < nbHandles ; ++idxHandle) { starpu_data_unregister(handles[idxHandle]); } FPRINTF(stderr, "Shutdown\n"); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/scheduler/libdummy_sched.c000066400000000000000000000107611507764646700227450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is an example of an application-defined scheduler. * This is a mere eager scheduler with a centralized list of tasks to schedule: * when a task becomes ready (push) it is put on the list. When a device * becomes ready (pop), a task is taken from the list. */ #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) struct dummy_sched_data { struct starpu_task_list sched_list; starpu_pthread_mutex_t policy_mutex; }; static void init_dummy_sched(unsigned sched_ctx_id) { struct dummy_sched_data *data = (struct dummy_sched_data*)malloc(sizeof(struct dummy_sched_data)); /* Create a linked-list of tasks and a condition variable to protect it */ starpu_task_list_init(&data->sched_list); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); FPRINTF(stderr, "Initialising Dummy scheduler\n"); } static void deinit_dummy_sched(unsigned sched_ctx_id) { struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_ASSERT(starpu_task_list_empty(&data->sched_list)); STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); free(data); FPRINTF(stderr, "Destroying Dummy scheduler\n"); } static int push_task_dummy(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); /* NB: In this simplistic strategy, we assume that the context in which we push task has at least one worker*/ /* lock all workers when pushing tasks on a list where all of them would pop for tasks */ STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_task_list_push_front(&data->sched_list, task); starpu_push_task_end(task); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); /*if there are no tasks block */ /* wake people waiting for a task */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); starpu_wake_worker_relax_light(worker); } return 0; } /* The mutex associated to the calling worker is already taken by StarPU */ static struct starpu_task *pop_task_dummy(unsigned sched_ctx_id) { /* NB: In this simplistic strategy, we assume that all workers are able * to execute all tasks, otherwise, it would have been necessary to go * through the entire list until we find a task that is executable from * the calling worker. So we just take the head of the list and give it * to the worker. */ struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); #ifdef STARPU_NON_BLOCKING_DRIVERS if (starpu_task_list_empty(&data->sched_list)) return NULL; #endif STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); struct starpu_task *task = NULL; if (!starpu_task_list_empty(&data->sched_list)) task = starpu_task_list_pop_back(&data->sched_list); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); return task; } static struct starpu_sched_policy dummy_sched_policy = { .init_sched = init_dummy_sched, .deinit_sched = deinit_dummy_sched, .push_task = push_task_dummy, .pop_task = pop_task_dummy, .policy_name = "dummy", .policy_description = "dummy scheduling strategy", .worker_type = STARPU_WORKER_LIST, }; struct starpu_sched_policy *starpu_get_sched_lib_policy(const char *name) { if (!strcmp(name, "dummy")) return &dummy_sched_policy; return NULL; } struct starpu_sched_policy *predefined_policies[] = { &dummy_sched_policy }; struct starpu_sched_policy **starpu_get_sched_lib_policies(void) { return predefined_policies; } starpu-1.4.9+dfsg/examples/scheduler/libdummy_sched.sh000077500000000000000000000015651507764646700231420ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ROOT=${0%/libdummy_sched.sh} if test -x $ROOT/../incrementer/incrementer then STARPU_SCHED_LIB=$ROOT/.libs/libdummy_sched.so STARPU_SCHED=dummy $ROOT/../incrementer/incrementer else exit 77 fi starpu-1.4.9+dfsg/examples/scheduler/schedulers.sh000077500000000000000000000037151507764646700223120ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # check_success() { if [ $1 -eq 77 ] ; then ( echo SKIP: STARPU_SCHED=$sched $basedir/../cholesky/cholesky_tag >&9 ) 2> /dev/null || true echo "skip" >&2 exit $1 elif [ $1 -ne 0 ] ; then ( echo FAIL: STARPU_SCHED=$sched $basedir/../cholesky/cholesky_tag >&9 ) 2> /dev/null || true echo "failure" >&2 exit $1 else ( echo PASS: STARPU_SCHED=$sched $basedir/../cholesky/cholesky_tag >&9 ) 2> /dev/null || true fi } basedir=$(dirname $0) if test ! -x $basedir/../cholesky/cholesky_tag then echo "Application $basedir/../cholesky/cholesky_tag unavailable" exit 77 fi if [ -n "$STARPU_SCHED" ] then SCHEDULERS=$STARPU_SCHED else SCHEDULERS=`$basedir/../../tools/starpu_sched_display | grep -v heteroprio` fi if [ "$STARPU_QUICK_CHECK" = 1 ] then SIDE=32 else SIDE=320 fi run() { sched=$1 echo "cholesky.$sched" STARPU_SCHED=$sched $STARPU_SUB_PARALLEL $MS_LAUNCHER $STARPU_LAUNCH $basedir/../cholesky/cholesky_tag -size $(($SIDE*3)) -nblocks 3 check_success $? } if [ -n "$STARPU_SUB_PARALLEL" ] then for sched in $SCHEDULERS do run $sched & done RESULT=0 while true do wait -n RET=$? if [ $RET = 127 ] ; then break ; fi if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi done exit $RESULT else for sched in $SCHEDULERS do run $sched done fi starpu-1.4.9+dfsg/examples/scheduler/schedulers_context.sh000077500000000000000000000036341507764646700240560ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # check_success() { if [ $1 -eq 77 ] ; then ( echo SKIP: STARPU_SCHED=$sched $basedir/../sched_ctx/sched_ctx >&9 ) 2> /dev/null || true echo "skip" >&2 exit $1 elif [ $1 -ne 0 ] ; then ( echo FAIL: STARPU_SCHED=$sched $basedir/../sched_ctx/sched_ctx >&9 ) 2> /dev/null || true echo "failure" >&2 exit $1 else ( echo PASS: STARPU_SCHED=$sched $basedir/../sched_ctx/sched_ctx >&9 ) 2> /dev/null || true fi } basedir=$(dirname $0) if test ! -x $basedir/../sched_ctx/sched_ctx then echo "Application $basedir/../sched_ctx/sched_ctx unavailable" exit 77 fi if [ -n "$STARPU_SCHED" ] then SCHEDULERS="$STARPU_SCHED" else SCHEDULERS=`$basedir/../../tools/starpu_sched_display | grep -v pheft | grep -v peager | grep -v heteroprio | grep -v modular-gemm` fi run() { sched=$1 echo "sched_ctx.$sched" STARPU_SCHED=$sched $STARPU_SUB_PARALLEL $MS_LAUNCHER $STARPU_LAUNCH $basedir/../sched_ctx/sched_ctx check_success $? } if [ -n "$STARPU_SUB_PARALLEL" ] then for sched in $SCHEDULERS do run $sched & done RESULT=0 while true do wait -n RET=$? if [ $RET = 127 ] ; then break ; fi if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi done exit $RESULT else for sched in $SCHEDULERS do run $sched done fi starpu-1.4.9+dfsg/examples/spmd/000077500000000000000000000000001507764646700165715ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/spmd/vector_scal_spmd.c000066400000000000000000000103771507764646700222740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This shows how to implement an spmd parallel StarPU task: scal_cpu_func is * called in parallel over several cores, and has to split the work accordingly. * This is a mere vector scaling example. */ /* gcc build: * * gcc -O2 -g vector_scal.c -o vector_scal $(pkg-config --cflags starpu-1.0) $(pkg-config --libs starpu-1.0) * */ #include #include #include #define MIN(a,b) ((a)<(b)?(a):(b)) #define NX 204800 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #ifdef STARPU_QUICK_CHECK #define ITER 10 #else #define ITER 100 #endif static int get_first_element_rank(int nel, int rank, int nb_workers) { if(rank == 0) return 0; /* We get the number of bigger parts which stand before the part */ int nb_big_parts = MIN(nel % nb_workers, rank); return nb_big_parts * (nel / nb_workers + 1) + (rank - nb_big_parts) * (nel / nb_workers); } void scal_cpu_func(void *buffers[], void *_args) { int i; float *factor = _args, f = *factor; struct starpu_vector_interface *vector = buffers[0]; int n = STARPU_VECTOR_GET_NX(vector); float *val = (float *)STARPU_VECTOR_GET_PTR(vector); int nb_workers = starpu_combined_worker_get_size(); int rank = starpu_combined_worker_get_rank(); if (rank == 0) FPRINTF(stderr, "running task with %d CPUs.\n", starpu_combined_worker_get_size()); /* We add 1 to the (nel_total % nb_workers) first workers, thus we get an evenly split data. */ int nel_worker = (n / nb_workers) + ((rank < (n % nb_workers)) ? 1 : 0); int begin = get_first_element_rank(n, rank, nb_workers); for (i = 0; i < nel_worker; i++) { rank = i + begin; float v = val[rank]; int j; for (j = 0; j < 100; j++) v = v * f; val[rank] = v; } } static struct starpu_perfmodel vector_scal_model = { .type = STARPU_HISTORY_BASED, .symbol = "vector_scal_parallel" }; static struct starpu_codelet cl = { .modes = { STARPU_RW }, .type = STARPU_SPMD, .max_parallelism = INT_MAX, .cpu_funcs = {scal_cpu_func}, .cpu_funcs_name = {"scal_cpu_func"}, .nbuffers = 1, .model = &vector_scal_model, }; int main(void) { struct starpu_conf conf; float *vector; unsigned i; int ret; starpu_conf_init(&conf); conf.single_combined_worker = 1; conf.sched_policy_name = "pheft"; { ret = starpu_init(NULL); if (ret == -ENODEV) return 77; conf.ncpus = starpu_cpu_worker_get_count(); conf.ncpus /= 2; starpu_shutdown(); } ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&vector, NX*sizeof(float)); for (i = 0; i < NX; i++) vector[i] = (i+1.0f); FPRINTF(stderr, "BEFORE: First element was %f\n", vector[0]); FPRINTF(stderr, "BEFORE: Last element was %f\n", vector[NX-1]); starpu_data_handle_t vector_handle; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); float factor = 1.001; for (i = 0; i < ITER; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = vector_handle; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) { ret = 77; break; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(vector_handle); FPRINTF(stderr, "AFTER: First element is %f\n", vector[0]); FPRINTF(stderr, "AFTER: Last element is %f\n", vector[NX-1]); starpu_free_noflag(vector, NX*sizeof(float)); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/examples/spmv/000077500000000000000000000000001507764646700166135ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/spmv/dw_block_spmv.c000066400000000000000000000204661507764646700216200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This computes an SPMV on a BCSR sparse matrix. It simply splits the matrix * into its blocks, thus turning the problem into mere matrix-vector products * (GEMV) which can be run in parallel. */ #include "dw_block_spmv.h" #include "matrix_market/mm_to_bcsr.h" #ifdef STARPU_HAVE_HELGRIND_H #include #endif #ifndef ANNOTATE_HAPPENS_BEFORE #define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) #endif #ifndef ANNOTATE_HAPPENS_AFTER #define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) static double start; static double end; static sem_t sem; static unsigned c = 256; static unsigned r = 256; static int remainingtasks = -1; static starpu_data_handle_t sparse_matrix; static starpu_data_handle_t vector_in, vector_out; static uint32_t size; static char *inputfile; static bcsr_t *bcsr_matrix; static float *vector_in_ptr; static float *vector_out_ptr; void create_data(void) { /* read the input file */ bcsr_matrix = mm_file_to_bcsr(inputfile, c, r); /* declare the corresponding block CSR to the runtime */ starpu_bcsr_data_register(&sparse_matrix, STARPU_MAIN_RAM, bcsr_matrix->nnz_blocks, bcsr_matrix->nrows_blocks, (uintptr_t)bcsr_matrix->val, bcsr_matrix->colind, bcsr_matrix->rowptr, 0, bcsr_matrix->r, bcsr_matrix->c, sizeof(float)); size = c*r*starpu_bcsr_get_nnz(sparse_matrix); /* printf("size = %d \n ", size); */ /* initiate the 2 vectors */ starpu_malloc((void **)&vector_in_ptr, size*sizeof(float)); assert(vector_in_ptr); starpu_malloc((void **)&vector_out_ptr, size*sizeof(float)); assert(vector_out_ptr); /* fill those */ unsigned ind; for (ind = 0; ind < size; ind++) { vector_in_ptr[ind] = 2.0f; vector_out_ptr[ind] = 0.0f; } starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float)); starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float)); } void unregister_data(void) { starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM); starpu_data_unregister(sparse_matrix); starpu_data_unpartition(vector_in, STARPU_MAIN_RAM); starpu_data_unregister(vector_in); starpu_data_unpartition(vector_out, STARPU_MAIN_RAM); starpu_data_unregister(vector_out); } void init_problem_callback(void *arg) { unsigned *remaining = arg; unsigned val = STARPU_ATOMIC_ADD(remaining, -1); ANNOTATE_HAPPENS_BEFORE(&remaining); /* if (val < 10) printf("callback %d remaining \n", val); */ if (val == 0) { ANNOTATE_HAPPENS_AFTER(&remaining); printf("DONE ...\n"); end = starpu_timing_now(); sem_post(&sem); } } void call_filters(void) { struct starpu_data_filter bcsr_f; struct starpu_data_filter vector_in_f, vector_out_f; bcsr_f.filter_func = starpu_bcsr_filter_canonical_block; bcsr_f.get_nchildren = starpu_bcsr_filter_canonical_block_get_nchildren; /* the children use a matrix interface ! */ bcsr_f.get_child_ops = starpu_bcsr_filter_canonical_block_child_ops; vector_in_f.filter_func = starpu_vector_filter_block; vector_in_f.nchildren = size/c; vector_in_f.get_nchildren = NULL; vector_in_f.get_child_ops = NULL; vector_out_f.filter_func = starpu_vector_filter_block; vector_out_f.nchildren = size/r; vector_out_f.get_nchildren = NULL; vector_out_f.get_child_ops = NULL; starpu_data_partition(sparse_matrix, &bcsr_f); starpu_data_partition(vector_in, &vector_in_f); starpu_data_partition(vector_out, &vector_out_f); } #define NSPMV 32 unsigned totaltasks; struct starpu_codelet cl = { .cpu_funcs = { cpu_block_spmv}, .cpu_funcs_name = { "cpu_block_spmv" }, #ifdef STARPU_USE_CUDA .cuda_funcs = {cublas_block_spmv}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW} }; void launch_spmv_codelets(void) { struct starpu_task *task_tab; uint8_t *is_entry_tab; /* we call one codelet per block */ unsigned nblocks = starpu_bcsr_get_nnz(sparse_matrix); unsigned nrows = starpu_bcsr_get_nrow(sparse_matrix); remainingtasks = NSPMV*nblocks; totaltasks = remainingtasks; unsigned taskid = 0; task_tab = calloc(totaltasks, sizeof(struct starpu_task)); STARPU_ASSERT(task_tab); is_entry_tab = calloc(totaltasks, sizeof(uint8_t)); STARPU_ASSERT(is_entry_tab); printf("there will be %d codelets\n", remainingtasks); uint32_t *rowptr = starpu_bcsr_get_local_rowptr(sparse_matrix); uint32_t *colind = starpu_bcsr_get_local_colind(sparse_matrix); start = starpu_timing_now(); unsigned loop; for (loop = 0; loop < NSPMV; loop++) { unsigned row; unsigned part = 0; for (row = 0; row < nrows; row++) { unsigned index; if (rowptr[row] == rowptr[row+1]) { continue; } for (index = rowptr[row]; index < rowptr[row+1]; index++, part++) { struct starpu_task *task = &task_tab[taskid]; starpu_task_init(task); task->use_tag = 1; task->tag_id = taskid; task->callback_func = init_problem_callback; task->callback_arg = &remainingtasks; task->cl = &cl; task->cl_arg = NULL; unsigned i = colind[index]; unsigned j = row; task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part); task->handles[1] = starpu_data_get_sub_data(vector_in, 1, i); task->handles[2] = starpu_data_get_sub_data(vector_out, 1, j); /* all tasks in the same row are dependent so that we don't wait too much for data * we need to wait on the previous task if we are not the first task of a row */ if (index != rowptr[row & ~0x3]) { /* this is not the first task in the row */ starpu_tag_declare_deps((starpu_tag_t)taskid, 1, (starpu_tag_t)(taskid-1)); is_entry_tab[taskid] = 0; } else { /* this is an entry task */ is_entry_tab[taskid] = 1; } taskid++; } } } printf("start submitting tasks !\n"); /* submit ALL tasks now */ unsigned nchains = 0; unsigned task; for (task = 0; task < totaltasks; task++) { int ret; if (is_entry_tab[task]) { nchains++; } ret = starpu_task_submit(&task_tab[task]); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } printf("end of task submission (there was %u chains for %u tasks : ratio %u tasks per chain) !\n", nchains, totaltasks, totaltasks/nchains); free(is_entry_tab); } void init_problem(void) { /* create the sparse input matrix */ create_data(); /* create a new codelet that will perform a SpMV on it */ call_filters(); } void print_results(void) { unsigned row; for (row = 0; row < STARPU_MIN(size, 16); row++) { printf("%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]); } } int main(int argc, char *argv[]) { int ret; if (argc < 2) { FPRINTF(stderr, "usage : %s filename [tile size]\n", argv[0]); exit(-1); } if (argc == 3) { /* third argument is the tile size */ char *argptr; r = strtol(argv[2], &argptr, 10); c = r; } inputfile = argv[1]; /* start the runtime */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_cublas_init(); sem_init(&sem, 0, 0U); init_problem(); launch_spmv_codelets(); sem_wait(&sem); sem_destroy(&sem); unregister_data(); print_results(); double totalflop = 2.0*c*r*totaltasks; double timing = end - start; FPRINTF(stderr, "Computation took (in ms)\n"); FPRINTF(stdout, "%2.2f\n", timing/1000); FPRINTF(stderr, "Flop %e\n", totalflop); FPRINTF(stderr, "GFlop/s : %2.2f\n", totalflop/timing/1000); starpu_free_noflag(vector_in_ptr, size*sizeof(float)); starpu_free_noflag(vector_out_ptr, size*sizeof(float)); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/spmv/dw_block_spmv.h000066400000000000000000000021171507764646700216160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DW_BLOCK_SPMV_H__ #define __DW_BLOCK_SPMV_H__ #include #include #include #include #include #include #include #include void cpu_block_spmv(void *descr[], void *_args); #ifdef STARPU_USE_CUDA void cublas_block_spmv(void *descr[], void *_args); #endif /* STARPU_USE_CUDA */ #endif /* __DW_BLOCK_SPMV_H__ */ starpu-1.4.9+dfsg/examples/spmv/dw_block_spmv_kernels.c000066400000000000000000000040521507764646700233340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Standard GEMV kernel (on one matrix block of the sparse matrix) */ #include "dw_block_spmv.h" /* * U22 */ #ifdef STARPU_USE_CUDA #include static const float p1 = 1.0; static const float m1 = -1.0; #endif static inline void common_block_spmv(void *descr[], int s, void *_args) { /* printf("22\n"); */ float *block = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *in = (float *)STARPU_VECTOR_GET_PTR(descr[1]); float *out = (float *)STARPU_VECTOR_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NX(descr[0]); unsigned dy = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); switch (s) { case 0: cblas_sgemv(CblasRowMajor, CblasNoTrans, dx, dy, 1.0f, block, ld, in, 1, 1.0f, out, 1); break; #ifdef STARPU_USE_CUDA case 1: { cublasStatus_t status = cublasSgemv (starpu_cublas_get_local_handle(), CUBLAS_OP_T, dx, dy, &p1, block, ld, in, 1, &p1, out, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } } void cpu_block_spmv(void *descr[], void *_args) { /* printf("CPU CODELET \n"); */ common_block_spmv(descr, 0, _args); } #ifdef STARPU_USE_CUDA void cublas_block_spmv(void *descr[], void *_args) { /* printf("CUBLAS CODELET \n"); */ common_block_spmv(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ starpu-1.4.9+dfsg/examples/spmv/matrix_market/000077500000000000000000000000001507764646700214625ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/spmv/matrix_market/examples/000077500000000000000000000000001507764646700233005ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/spmv/matrix_market/examples/fidapm05.mtx000066400000000000000000000330101507764646700254340ustar00rootroot00000000000000%%MatrixMarket matrix coordinate real general 42 42 520 1 1 1.9555555555555e+00 2 1 -1.9999999999999e-01 10 1 -1.0666666666667e+00 11 1 -3.5555555555556e-01 19 1 1.3322676295502e-15 20 1 1.1111111111111e-01 25 1 1.3333333333333e-01 26 1 4.4444444444444e-03 27 1 1.3333333333333e-02 1 2 -1.9999999999999e-01 2 2 1.2444444444444e+00 3 2 -2.0000000000001e-01 4 2 -3.3333333333334e-02 10 2 -3.5555555555555e-01 11 2 -4.0000000000000e-01 12 2 -3.5555555555555e-01 13 2 1.1111111111111e-01 19 2 1.1111111111111e-01 20 2 -6.6666666666669e-02 21 2 1.1111111111111e-01 22 2 -2.2222222222222e-02 25 2 3.3333333333333e-02 26 2 1.1111111111111e-03 27 2 6.6666666666666e-03 28 2 3.3333333333334e-02 29 2 1.1111111111111e-03 30 2 6.6666666666669e-03 2 3 -2.0000000000001e-01 3 3 1.9555555555555e+00 4 3 -1.9999999999999e-01 11 3 -3.5555555555555e-01 12 3 -1.0666666666667e+00 13 3 -3.5555555555556e-01 20 3 1.1111111111111e-01 21 3 3.5527136788005e-15 22 3 1.1111111111111e-01 28 3 1.3333333333333e-01 29 3 4.4444444444444e-03 30 3 4.0000000000000e-02 2 4 -3.3333333333334e-02 3 4 -1.9999999999999e-01 4 4 1.2444444444444e+00 5 4 -2.0000000000001e-01 6 4 -3.3333333333337e-02 11 4 1.1111111111111e-01 12 4 -3.5555555555554e-01 13 4 -4.0000000000000e-01 14 4 -3.5555555555555e-01 15 4 1.1111111111112e-01 20 4 -2.2222222222222e-02 21 4 1.1111111111111e-01 22 4 -6.6666666666664e-02 23 4 1.1111111111111e-01 24 4 -2.2222222222223e-02 28 4 3.3333333333331e-02 29 4 1.1111111111111e-03 30 4 1.3333333333333e-02 31 4 3.3333333333334e-02 32 4 1.1111111111111e-03 33 4 1.3333333333334e-02 4 5 -2.0000000000001e-01 5 5 1.9555555555555e+00 6 5 -1.9999999999997e-01 13 5 -3.5555555555555e-01 14 5 -1.0666666666667e+00 15 5 -3.5555555555557e-01 22 5 1.1111111111111e-01 23 5 7.5495165674511e-15 24 5 1.1111111111111e-01 31 5 1.3333333333333e-01 32 5 4.4444444444444e-03 33 5 6.6666666666666e-02 4 6 -3.3333333333337e-02 5 6 -1.9999999999997e-01 6 6 1.2444444444445e+00 7 6 -2.0000000000003e-01 8 6 -3.3333333333331e-02 13 6 1.1111111111111e-01 14 6 -3.5555555555555e-01 15 6 -4.0000000000001e-01 16 6 -3.5555555555554e-01 17 6 1.1111111111111e-01 22 6 -2.2222222222223e-02 23 6 1.1111111111111e-01 24 6 -6.6666666666670e-02 31 6 3.3333333333334e-02 32 6 1.1111111111111e-03 33 6 2.0000000000000e-02 34 6 3.3333333333332e-02 35 6 1.1111111111110e-03 36 6 1.9999999999999e-02 40 6 1.1111111111111e-01 41 6 -2.2222222222222e-02 6 7 -2.0000000000003e-01 7 7 1.9555555555555e+00 8 7 -1.9999999999998e-01 15 7 -3.5555555555554e-01 16 7 -1.0666666666666e+00 17 7 -3.5555555555554e-01 24 7 1.1111111111110e-01 34 7 1.3333333333333e-01 35 7 4.4444444444444e-03 36 7 9.3333333333331e-02 40 7 -6.6613381477509e-15 41 7 1.1111111111112e-01 6 8 -3.3333333333331e-02 7 8 -1.9999999999998e-01 8 8 1.2444444444445e+00 9 8 -2.0000000000005e-01 15 8 1.1111111111111e-01 16 8 -3.5555555555558e-01 17 8 -3.9999999999998e-01 18 8 -3.5555555555556e-01 24 8 -2.2222222222222e-02 34 8 3.3333333333333e-02 35 8 1.1111111111112e-03 36 8 2.6666666666666e-02 37 8 3.3333333333334e-02 38 8 1.1111111111111e-03 39 8 2.6666666666667e-02 40 8 1.1111111111111e-01 41 8 -6.6666666666668e-02 42 8 1.1111111111111e-01 8 9 -2.0000000000005e-01 9 9 1.9555555555556e+00 17 9 -3.5555555555552e-01 18 9 -1.0666666666667e+00 37 9 1.3333333333333e-01 38 9 4.4444444444443e-03 39 9 1.2000000000000e-01 41 9 1.1111111111111e-01 42 9 3.1086244689504e-15 1 10 -1.0666666666667e+00 2 10 -3.5555555555555e-01 10 10 5.6888888888889e+00 11 10 -1.0666666666667e+00 19 10 -1.0666666666667e+00 20 10 -3.5555555555555e-01 25 10 1.1102230246252e-16 26 10 1.7777777777778e-02 27 10 1.3877787807814e-17 1 11 -3.5555555555556e-01 2 11 -4.0000000000000e-01 3 11 -3.5555555555555e-01 4 11 1.1111111111111e-01 10 11 -1.0666666666667e+00 11 11 3.9111111111110e+00 12 11 -1.0666666666666e+00 13 11 -5.3290705182007e-15 19 11 -3.5555555555555e-01 20 11 -3.9999999999999e-01 21 11 -3.5555555555555e-01 22 11 1.1111111111111e-01 26 11 4.4444444444443e-03 27 11 5.5511151231258e-17 28 11 5.5511151231258e-17 29 11 4.4444444444446e-03 30 11 -3.4694469519536e-17 2 12 -3.5555555555555e-01 3 12 -1.0666666666667e+00 4 12 -3.5555555555554e-01 11 12 -1.0666666666666e+00 12 12 5.6888888888888e+00 13 12 -1.0666666666667e+00 20 12 -3.5555555555555e-01 21 12 -1.0666666666667e+00 22 12 -3.5555555555555e-01 28 12 2.2204460492503e-16 29 12 1.7777777777778e-02 30 12 1.9428902930940e-16 2 13 1.1111111111111e-01 3 13 -3.5555555555556e-01 4 13 -4.0000000000000e-01 5 13 -3.5555555555555e-01 6 13 1.1111111111111e-01 11 13 -5.3290705182007e-15 12 13 -1.0666666666667e+00 13 13 3.9111111111111e+00 14 13 -1.0666666666667e+00 15 13 -2.6645352591004e-15 20 13 1.1111111111111e-01 21 13 -3.5555555555556e-01 22 13 -3.9999999999999e-01 23 13 -3.5555555555555e-01 24 13 1.1111111111111e-01 28 13 2.2204460492503e-16 29 13 4.4444444444443e-03 30 13 1.6653345369377e-16 31 13 1.1102230246252e-16 32 13 4.4444444444446e-03 33 13 6.9388939039072e-17 4 14 -3.5555555555555e-01 5 14 -1.0666666666667e+00 6 14 -3.5555555555555e-01 13 14 -1.0666666666667e+00 14 14 5.6888888888888e+00 15 14 -1.0666666666666e+00 22 14 -3.5555555555555e-01 23 14 -1.0666666666667e+00 24 14 -3.5555555555554e-01 31 14 -7.7715611723761e-16 32 14 1.7777777777778e-02 33 14 -1.6653345369377e-16 4 15 1.1111111111112e-01 5 15 -3.5555555555557e-01 6 15 -4.0000000000001e-01 7 15 -3.5555555555554e-01 8 15 1.1111111111111e-01 13 15 -2.6645352591004e-15 14 15 -1.0666666666666e+00 15 15 3.9111111111110e+00 16 15 -1.0666666666667e+00 22 15 1.1111111111112e-01 23 15 -3.5555555555557e-01 24 15 -3.9999999999999e-01 31 15 4.4408920985006e-16 32 15 4.4444444444444e-03 34 15 -4.7184478546569e-16 35 15 4.4444444444444e-03 36 15 -3.1918911957973e-16 40 15 -3.5555555555555e-01 41 15 1.1111111111111e-01 6 16 -3.5555555555554e-01 7 16 -1.0666666666666e+00 8 16 -3.5555555555558e-01 15 16 -1.0666666666667e+00 16 16 5.6888888888888e+00 17 16 -1.0666666666667e+00 24 16 -3.5555555555553e-01 35 16 1.7777777777777e-02 36 16 3.8857805861880e-16 40 16 -1.0666666666666e+00 41 16 -3.5555555555558e-01 6 17 1.1111111111111e-01 7 17 -3.5555555555554e-01 8 17 -3.9999999999998e-01 9 17 -3.5555555555552e-01 16 17 -1.0666666666667e+00 17 17 3.9111111111111e+00 18 17 -1.0666666666667e+00 24 17 1.1111111111111e-01 34 17 -2.2204460492503e-16 35 17 4.4444444444441e-03 36 17 -2.2204460492503e-16 37 17 -1.3877787807815e-16 38 17 4.4444444444448e-03 39 17 -2.7755575615629e-16 40 17 -3.5555555555553e-01 41 17 -3.9999999999997e-01 42 17 -3.5555555555551e-01 8 18 -3.5555555555556e-01 9 18 -1.0666666666667e+00 17 18 -1.0666666666667e+00 18 18 5.6888888888888e+00 37 18 -6.6613381477509e-16 38 18 1.7777777777778e-02 39 18 -3.3306690738755e-16 41 18 -3.5555555555555e-01 42 18 -1.0666666666667e+00 1 19 1.3322676295502e-15 2 19 1.1111111111111e-01 10 19 -1.0666666666667e+00 11 19 -3.5555555555555e-01 19 19 1.9555555555556e+00 20 19 -2.0000000000000e-01 25 19 -1.3333333333333e-01 26 19 -2.2222222222222e-02 27 19 -1.3333333333333e-02 1 20 1.1111111111111e-01 2 20 -6.6666666666669e-02 3 20 1.1111111111111e-01 4 20 -2.2222222222222e-02 10 20 -3.5555555555555e-01 11 20 -3.9999999999999e-01 12 20 -3.5555555555555e-01 13 20 1.1111111111111e-01 19 20 -2.0000000000000e-01 20 20 1.2444444444444e+00 21 20 -2.0000000000001e-01 22 20 -3.3333333333331e-02 25 20 -3.3333333333334e-02 26 20 -5.5555555555556e-03 27 20 -6.6666666666667e-03 28 20 -3.3333333333334e-02 29 20 -5.5555555555557e-03 30 20 -6.6666666666669e-03 2 21 1.1111111111111e-01 3 21 3.5527136788005e-15 4 21 1.1111111111111e-01 11 21 -3.5555555555555e-01 12 21 -1.0666666666667e+00 13 21 -3.5555555555556e-01 20 21 -2.0000000000001e-01 21 21 1.9555555555556e+00 22 21 -2.0000000000000e-01 28 21 -1.3333333333333e-01 29 21 -2.2222222222222e-02 30 21 -4.0000000000000e-02 2 22 -2.2222222222222e-02 3 22 1.1111111111111e-01 4 22 -6.6666666666664e-02 5 22 1.1111111111111e-01 6 22 -2.2222222222223e-02 11 22 1.1111111111111e-01 12 22 -3.5555555555555e-01 13 22 -3.9999999999999e-01 14 22 -3.5555555555555e-01 15 22 1.1111111111112e-01 20 22 -3.3333333333331e-02 21 22 -2.0000000000000e-01 22 22 1.2444444444444e+00 23 22 -2.0000000000001e-01 24 22 -3.3333333333335e-02 28 22 -3.3333333333332e-02 29 22 -5.5555555555553e-03 30 22 -1.3333333333333e-02 31 22 -3.3333333333334e-02 32 22 -5.5555555555556e-03 33 22 -1.3333333333333e-02 4 23 1.1111111111111e-01 5 23 7.5495165674511e-15 6 23 1.1111111111111e-01 13 23 -3.5555555555555e-01 14 23 -1.0666666666667e+00 15 23 -3.5555555555557e-01 22 23 -2.0000000000001e-01 23 23 1.9555555555555e+00 24 23 -1.9999999999998e-01 31 23 -1.3333333333333e-01 32 23 -2.2222222222222e-02 33 23 -6.6666666666667e-02 4 24 -2.2222222222223e-02 5 24 1.1111111111111e-01 6 24 -6.6666666666670e-02 7 24 1.1111111111110e-01 8 24 -2.2222222222222e-02 13 24 1.1111111111111e-01 14 24 -3.5555555555554e-01 15 24 -3.9999999999999e-01 16 24 -3.5555555555553e-01 17 24 1.1111111111111e-01 22 24 -3.3333333333335e-02 23 24 -1.9999999999998e-01 24 24 1.2444444444444e+00 31 24 -3.3333333333334e-02 32 24 -5.5555555555556e-03 33 24 -2.0000000000000e-02 34 24 -3.3333333333331e-02 35 24 -5.5555555555552e-03 36 24 -1.9999999999999e-02 40 24 -2.0000000000004e-01 41 24 -3.3333333333329e-02 1 25 1.3333333333333e-01 2 25 3.3333333333333e-02 10 25 1.1102230246252e-16 19 25 -1.3333333333333e-01 20 25 -3.3333333333334e-02 25 25 0.0000000000000e+00 1 26 4.4444444444444e-03 2 26 1.1111111111111e-03 10 26 1.7777777777778e-02 11 26 4.4444444444443e-03 19 26 -2.2222222222222e-02 20 26 -5.5555555555556e-03 26 26 0.0000000000000e+00 1 27 1.3333333333333e-02 2 27 6.6666666666666e-03 10 27 1.3877787807814e-17 11 27 5.5511151231258e-17 19 27 -1.3333333333333e-02 20 27 -6.6666666666667e-03 27 27 0.0000000000000e+00 2 28 3.3333333333334e-02 3 28 1.3333333333333e-01 4 28 3.3333333333331e-02 11 28 5.5511151231258e-17 12 28 2.2204460492503e-16 13 28 2.2204460492503e-16 20 28 -3.3333333333334e-02 21 28 -1.3333333333333e-01 22 28 -3.3333333333332e-02 28 28 0.0000000000000e+00 2 29 1.1111111111111e-03 3 29 4.4444444444444e-03 4 29 1.1111111111111e-03 11 29 4.4444444444446e-03 12 29 1.7777777777778e-02 13 29 4.4444444444443e-03 20 29 -5.5555555555557e-03 21 29 -2.2222222222222e-02 22 29 -5.5555555555553e-03 29 29 0.0000000000000e+00 2 30 6.6666666666669e-03 3 30 4.0000000000000e-02 4 30 1.3333333333333e-02 11 30 -3.4694469519536e-17 12 30 1.9428902930940e-16 13 30 1.6653345369377e-16 20 30 -6.6666666666669e-03 21 30 -4.0000000000000e-02 22 30 -1.3333333333333e-02 30 30 0.0000000000000e+00 4 31 3.3333333333334e-02 5 31 1.3333333333333e-01 6 31 3.3333333333334e-02 13 31 1.1102230246252e-16 14 31 -7.7715611723761e-16 15 31 4.4408920985006e-16 22 31 -3.3333333333334e-02 23 31 -1.3333333333333e-01 24 31 -3.3333333333334e-02 31 31 0.0000000000000e+00 4 32 1.1111111111111e-03 5 32 4.4444444444444e-03 6 32 1.1111111111111e-03 13 32 4.4444444444446e-03 14 32 1.7777777777778e-02 15 32 4.4444444444444e-03 22 32 -5.5555555555556e-03 23 32 -2.2222222222222e-02 24 32 -5.5555555555556e-03 32 32 0.0000000000000e+00 4 33 1.3333333333334e-02 5 33 6.6666666666666e-02 6 33 2.0000000000000e-02 13 33 6.9388939039072e-17 14 33 -1.6653345369377e-16 22 33 -1.3333333333333e-02 23 33 -6.6666666666667e-02 24 33 -2.0000000000000e-02 33 33 0.0000000000000e+00 6 34 3.3333333333332e-02 7 34 1.3333333333333e-01 8 34 3.3333333333333e-02 15 34 -4.7184478546569e-16 17 34 -2.2204460492503e-16 24 34 -3.3333333333331e-02 34 34 0.0000000000000e+00 40 34 -1.3333333333333e-01 41 34 -3.3333333333334e-02 6 35 1.1111111111110e-03 7 35 4.4444444444444e-03 8 35 1.1111111111112e-03 15 35 4.4444444444444e-03 16 35 1.7777777777777e-02 17 35 4.4444444444441e-03 24 35 -5.5555555555552e-03 35 35 0.0000000000000e+00 40 35 -2.2222222222222e-02 41 35 -5.5555555555555e-03 6 36 1.9999999999999e-02 7 36 9.3333333333331e-02 8 36 2.6666666666666e-02 15 36 -3.1918911957973e-16 16 36 3.8857805861880e-16 17 36 -2.2204460492503e-16 24 36 -1.9999999999999e-02 36 36 0.0000000000000e+00 40 36 -9.3333333333331e-02 41 36 -2.6666666666667e-02 8 37 3.3333333333334e-02 9 37 1.3333333333333e-01 17 37 -1.3877787807815e-16 18 37 -6.6613381477509e-16 37 37 0.0000000000000e+00 41 37 -3.3333333333334e-02 42 37 -1.3333333333333e-01 8 38 1.1111111111111e-03 9 38 4.4444444444443e-03 17 38 4.4444444444448e-03 18 38 1.7777777777778e-02 38 38 0.0000000000000e+00 41 38 -5.5555555555556e-03 42 38 -2.2222222222222e-02 8 39 2.6666666666667e-02 9 39 1.2000000000000e-01 17 39 -2.7755575615629e-16 18 39 -3.3306690738755e-16 39 39 0.0000000000000e+00 41 39 -2.6666666666667e-02 42 39 -1.2000000000000e-01 6 40 1.1111111111111e-01 7 40 -6.6613381477509e-15 8 40 1.1111111111111e-01 15 40 -3.5555555555555e-01 16 40 -1.0666666666666e+00 17 40 -3.5555555555553e-01 24 40 -2.0000000000004e-01 34 40 -1.3333333333333e-01 35 40 -2.2222222222222e-02 36 40 -9.3333333333331e-02 40 40 1.9555555555555e+00 41 40 -1.9999999999998e-01 6 41 -2.2222222222222e-02 7 41 1.1111111111112e-01 8 41 -6.6666666666668e-02 9 41 1.1111111111111e-01 15 41 1.1111111111111e-01 16 41 -3.5555555555558e-01 17 41 -3.9999999999997e-01 18 41 -3.5555555555555e-01 24 41 -3.3333333333329e-02 34 41 -3.3333333333334e-02 35 41 -5.5555555555555e-03 36 41 -2.6666666666667e-02 37 41 -3.3333333333334e-02 38 41 -5.5555555555556e-03 39 41 -2.6666666666667e-02 40 41 -1.9999999999998e-01 41 41 1.2444444444445e+00 42 41 -2.0000000000005e-01 8 42 1.1111111111111e-01 9 42 3.1086244689504e-15 17 42 -3.5555555555551e-01 18 42 -1.0666666666667e+00 37 42 -1.3333333333333e-01 38 42 -2.2222222222222e-02 39 42 -1.2000000000000e-01 41 42 -2.0000000000005e-01 42 42 1.9555555555556e+00 starpu-1.4.9+dfsg/examples/spmv/matrix_market/mm_to_bcsr.c000066400000000000000000000206131507764646700237540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "mm_to_bcsr.h" /* Some debug functions */ static void print_block(tmp_block_t *block, unsigned r, unsigned c) { printf(" **** block %u %u **** \n", block->i, block->j); unsigned i, j; for (j = 0; j < r; j++) { for (i = 0; i < c; i++) { printf("%2.2f\t", block->val[i + j*c]); } printf("\n"); } } static void print_all_blocks(tmp_block_t *block_list, unsigned r, unsigned c) { tmp_block_t *current_block = block_list; while(current_block) { print_block(current_block, r, c); current_block = current_block->next; } } static void print_bcsr(bcsr_t *bcsr) { fprintf(stderr, "** BSCR **\n"); fprintf(stderr, "non zero - blocks = %u\n", bcsr->nnz_blocks); fprintf(stderr, "nrows - blocks = %u\n", bcsr->nrows_blocks); fprintf(stderr, "block size : c %u r %u\n", bcsr->c, bcsr->r); } static unsigned count_blocks(tmp_block_t *block_list) { unsigned count = 0; tmp_block_t *current_block = block_list; while(current_block) { count++; current_block = current_block->next; } return count; } static unsigned count_row_blocks(tmp_block_t *block_list) { unsigned maxrow = 0; tmp_block_t *current_block = block_list; while(current_block) { if (current_block->j > maxrow) maxrow = current_block->j; current_block = current_block->next; } return (maxrow+1); } /* Find the block that corresponds to (i,j) if it exists in the list */ static tmp_block_t *search_block(tmp_block_t *block_list, unsigned i, unsigned j) { tmp_block_t *current_block = block_list; /* printf("search %d %d\n", i, j); */ while (current_block) { if ((current_block->i == i) && (current_block->j == j)) { /* we found the block */ return current_block; } current_block = current_block->next; }; /* no entry was found ... */ return NULL; } static tmp_block_t *create_block(unsigned c, unsigned r) { tmp_block_t *block; block = malloc(sizeof(tmp_block_t)); block->val = calloc(c*r, sizeof(float)); return block; } /* determine if next block is bigger in lexical order */ static unsigned next_block_is_bigger(tmp_block_t *block, unsigned i, unsigned j) { tmp_block_t *next = block->next; if (next) { /* we evaluate lexical order */ if (next->j < j) return 0; if (next->j > j) return 1; /* next->j == j */ return (next->i > i); } /* this is the last block, so it's bigger */ return 1; } /* we insert a block in the list, directly at the appropriate place */ static void insert_block(tmp_block_t *block, tmp_block_t **block_list, unsigned i, unsigned j) { /* insert block at the beginning of the list */ /*block->next = *block_list; *block_list = block; */ /* insert the block in lexicographical order */ /* first find an element that is bigger, then insert the block just before it */ tmp_block_t *current_block = *block_list; if (!current_block) { /* list was empty */ *block_list = block; block->next = NULL; return; } while (current_block) { if (next_block_is_bigger(current_block, i, j)) { /* insert block here */ block->next = current_block->next; current_block->next = block; return; } current_block = current_block->next; }; /* should not be reached ! */ } /* we add an element to the list of blocks, it is either added to an existing block or in a block specifically created if there was none */ static void insert_elem(tmp_block_t **block_list, unsigned abs_i, unsigned abs_j, float val, unsigned c, unsigned r) { /* we are looking for the block that contains (abs_i, abs_j) (abs = absolute) */ unsigned i,j; i = abs_i / c; j = abs_j / r; tmp_block_t *block; block = search_block(*block_list, i, j); if (!block) { /* the block does not exist yet */ /* create it */ block = create_block(c, r); block->i = i; block->j = j; /* printf("create block %d %d !\n", i, j); */ /* insert it in the block list */ insert_block(block, block_list, i, j); } /* now insert the value in the corresponding block */ unsigned local_i, local_j, local_index; local_i = abs_i % c; local_j = abs_j % r; local_index = local_j * c + local_i; block->val[local_index] = val; } /* transform a list of values (with coordinates) into a list of blocks that are easily processed into BCSR */ static tmp_block_t * mm_to_blocks(int nz, unsigned *I, unsigned *J, float *val, unsigned c, unsigned r) { int elem; /* at first, the list of block is empty */ tmp_block_t *block_list = NULL; for (elem = 0; elem < nz; elem++) { insert_elem(&block_list, I[elem], J[elem], val[elem], c, r); } return block_list; } static void fill_bcsr(tmp_block_t *block_list, unsigned c, unsigned r, bcsr_t *bcsr) { unsigned block = 0; unsigned current_offset = 0; size_t block_size = c*r*sizeof(float); tmp_block_t *current_block = block_list; while(current_block) { /* copy the val from the block to the contiguous area in the BCSR */ memcpy(&bcsr->val[current_offset], current_block->val, block_size); /* write the the index of the block * XXX should it be in blocks ? */ bcsr->colind[block] = current_block->i; if ((bcsr->rowptr[current_block->j] == 0) && (current_block->j != 0)) { /* this is the first element of the line */ bcsr->rowptr[current_block->j] = block; } block++; current_offset = block*c*r; current_block = current_block->next; }; /* for all lines where there were no block at all (XXX), fill the 0 in rowptr */ /* the first row must start at 0 ? */ bcsr->rowptr[0] = 0; unsigned row; for (row = 1; row < bcsr->nrows_blocks; row++) { if (bcsr->rowptr[row] == 0) bcsr->rowptr[row] = bcsr->rowptr[row-1]; } bcsr->rowptr[bcsr->nrows_blocks] = bcsr->nnz_blocks; } static bcsr_t * blocks_to_bcsr(tmp_block_t *block_list, unsigned c, unsigned r) { unsigned nblocks; /* print_all_blocks(block_list, r, c); */ nblocks = count_blocks(block_list); bcsr_t *bcsr = malloc(sizeof(bcsr_t)); bcsr->nnz_blocks = nblocks; bcsr->r = r; bcsr->c = c; unsigned nrows_blocks = count_row_blocks(block_list); bcsr->nrows_blocks = nrows_blocks; bcsr->val = malloc(nblocks*r*c*sizeof(float)); bcsr->colind = malloc(nblocks*sizeof(unsigned)); bcsr->rowptr = calloc((nrows_blocks + 1), sizeof(unsigned)); fill_bcsr(block_list, c, r, bcsr); return bcsr; } bcsr_t *mm_to_bcsr(unsigned nz, unsigned *I, unsigned *J, float *val, unsigned c, unsigned r) { bcsr_t *bcsr; tmp_block_t *block_list; block_list = mm_to_blocks(nz, I, J, val, c, r); bcsr = blocks_to_bcsr(block_list, c, r); print_bcsr(bcsr); return bcsr; } bcsr_t *mm_file_to_bcsr(char *filename, unsigned c, unsigned r) { FILE *f; MM_typecode matcode; int M, N; int nz; int i; unsigned *I, *J; float *val; bcsr_t *bcsr; if ((f = fopen(filename, "r")) == NULL) { fprintf(stderr, "File <%s> not found\n", filename); exit(1); } if (mm_read_banner(f, &matcode) != 0) { printf("Could not process Matrix Market banner.\n"); exit(1); } /* This is how one can screen matrix types if their application */ /* only supports a subset of the Matrix Market data types. */ if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode)) { printf("Sorry, this application does not support "); printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode)); exit(1); } /* find out size of sparse matrix .... */ if ((mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1); /* reseve memory for matrices */ I = malloc(nz * sizeof(unsigned)); J = malloc(nz * sizeof(unsigned)); /* XXX float ! */ val = (float *) malloc(nz * sizeof(float)); for (i=0; i #include #include #include #include "mmio.h" /* convert a matrix stored in a file with the matrix market format into the * BCSR format */ typedef struct tmp_block { /* we have a linked list of blocks */ struct tmp_block *next; /* column i, row j*/ unsigned i, j; float *val; } tmp_block_t; typedef struct { unsigned r,c; unsigned nnz_blocks; unsigned nrows_blocks; float *val; uint32_t *colind; uint32_t *rowptr; } bcsr_t; /* directly read input from a file */ bcsr_t *mm_file_to_bcsr(char *filename, unsigned c, unsigned r); /* read the matrix as a set of valuated coordinates */ bcsr_t *mm_to_bcsr(unsigned nz, unsigned *I_, unsigned *J, float *val, unsigned c, unsigned r); starpu-1.4.9+dfsg/examples/spmv/matrix_market/mmio.c000066400000000000000000000301621507764646700225710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Matrix Market I/O library for ANSI C * * See http://math.nist.gov/MatrixMarket for details. * * */ #include #include #include #include #include #include "mmio.h" int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, double **val_, int **I_, int **J_) { FILE *f; MM_typecode matcode; int M, N, nz; int i; double *val; int *I, *J; if ((f = fopen(fname, "r")) == NULL) { fprintf(stderr, "File <%s> not found\n", fname); return -1; } if (mm_read_banner(f, &matcode) != 0) { fprintf(stderr, "mm_read_unsymetric: Could not process Matrix Market banner "); fprintf(stderr, " in file [%s]\n", fname); return -1; } if (!(mm_is_real(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode))) { fprintf(stderr, "Sorry, this application does not support "); fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode)); return -1; } /* find out size of sparse matrix: M, N, nz .... */ if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) { fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); return -1; } *M_ = M; *N_ = N; *nz_ = nz; /* reseve memory for matrices */ I = (int *) malloc(nz * sizeof(int)); J = (int *) malloc(nz * sizeof(int)); val = (double *) malloc(nz * sizeof(double)); *val_ = val; *I_ = I; *J_ = J; /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ for (i=0; i 0) { nzval[pos] = LOWER_BAND; colind[pos] = row-1; pos++; } nzval[pos] = MIDDLE_BAND; colind[pos] = row; pos++; if (row < size - 1) { nzval[pos] = UPPER_BAND; colind[pos] = row+1; pos++; } } STARPU_ASSERT(pos == nnz); rowptr[size] = nnz; /* initiate the 2 vectors */ starpu_malloc((void **)&vector_in_ptr, size*sizeof(float)); starpu_malloc((void **)&vector_out_ptr, size*sizeof(float)); starpu_malloc((void **)&vector_exp_out_ptr, size*sizeof(float)); assert(vector_in_ptr && vector_out_ptr && vector_exp_out_ptr); /* fill them */ for (ind = 0; ind < size; ind++) { vector_in_ptr[ind] = ind % 100; vector_out_ptr[ind] = 0.0f; } /* * Register the CSR matrix and the 2 vectors */ starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float)); starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float)); starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float)); /* * Partition the CSR matrix and the output vector */ csr_f.nchildren = nblocks; vector_f.nchildren = nblocks; starpu_data_partition(sparse_matrix, &csr_f); starpu_data_partition(vector_out, &vector_f); /* * If we use OpenCL, we need to compile the SpMV kernel */ #ifdef STARPU_USE_OPENCL compile_spmv_opencl_kernel(); #endif start = starpu_timing_now(); /* * Create and submit StarPU tasks */ for (part = 0; part < nblocks; part++) { struct starpu_task *task = starpu_task_create(); task->cl = &spmv_cl; task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part); task->handles[1] = vector_in; task->handles[2] = starpu_data_get_sub_data(vector_out, 1, part); ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_task_wait_for_all(); end = starpu_timing_now(); /* * Unregister the CSR matrix and the output vector */ starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM); starpu_data_unpartition(vector_out, STARPU_MAIN_RAM); /* * Unregister data */ starpu_data_unregister(sparse_matrix); starpu_data_unregister(vector_in); starpu_data_unregister(vector_out); /* * Display the result */ for (row = 0; row < STARPU_MIN(size, 16); row++) { FPRINTF(stdout, "%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]); } /* Check the result */ memset(vector_exp_out_ptr, 0, sizeof(vector_exp_out_ptr[0])*size); for (row = 0; row < size; row++) { if (row > 0) vector_exp_out_ptr[row] += LOWER_BAND * vector_in_ptr[row-1]; vector_exp_out_ptr[row] += MIDDLE_BAND * vector_in_ptr[row]; if (row < size-1) vector_exp_out_ptr[row] += UPPER_BAND * vector_in_ptr[row+1]; } for (row = 0; row < size; row++) { if (vector_out_ptr[row] != vector_exp_out_ptr[row]) { FPRINTF(stderr, "check failed at %u: %f vs expected %f\n", row, vector_out_ptr[row], vector_exp_out_ptr[row]); exit(EXIT_FAILURE); } } starpu_free_noflag(nzval, nnz*sizeof(float)); starpu_free_noflag(colind, nnz*sizeof(uint32_t)); starpu_free_noflag(rowptr, (size+1)*sizeof(uint32_t)); starpu_free_noflag(vector_in_ptr, size*sizeof(float)); starpu_free_noflag(vector_out_ptr, size*sizeof(float)); starpu_free_noflag(vector_exp_out_ptr, size*sizeof(float)); /* * Stop StarPU */ starpu_shutdown(); timing = end - start; FPRINTF(stderr, "Computation took (in ms)\n"); FPRINTF(stdout, "%2.2f\n", timing/1000); return 0; } starpu-1.4.9+dfsg/examples/spmv/spmv.h000066400000000000000000000023371507764646700177560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SPMV_H__ #define __SPMV_H__ #include #include #include #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #ifdef STARPU_USE_CUDA void spmv_kernel_cuda(void *descr[], void *args); #endif #ifdef STARPU_USE_OPENCL void spmv_kernel_opencl(void *descr[], void *args); void compile_spmv_opencl_kernel(void); #endif void spmv_kernel_cpu(void *descr[], void *arg); #endif /* __SPMV_H__ */ starpu-1.4.9+dfsg/examples/spmv/spmv_cuda.cu000066400000000000000000000064761507764646700211420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* CUDA kernel for SPMV */ #include #define MIN(a,b) ((a)<(b)?(a):(b)) extern "C" __global__ void spmv_kernel(uint32_t nnz, uint32_t nrow, float *nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, uint32_t elemsize, float *vecin, uint32_t nx_in, uint32_t elemsize1, float * vecout, uint32_t nx_out, uint32_t elemsize2) { /* only one dimension is used here */ unsigned nthreads = gridDim.x*blockDim.x; unsigned threadid = threadIdx.x + blockIdx.x*blockDim.x; unsigned rowstart = threadid * ((nrow + (nthreads - 1))/nthreads); unsigned rowend = MIN(nrow, (threadid+1) * ((nrow + (nthreads - 1))/nthreads)); unsigned row; for (row = rowstart; row < rowend; row++) { float tmp = 0.0f; unsigned index; unsigned firstindex = rowptr[row] - firstentry; unsigned lastindex = rowptr[row+1] - firstentry; for (index = firstindex; index < lastindex; index++) { tmp += nzval[index]*vecin[colind[index]]; } vecout[row] = tmp; } } extern "C" __global__ void spmv_kernel_3(uint32_t nnz, uint32_t nrow, float *nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, float *vecin, uint32_t nx_in, float * vecout, uint32_t nx_out) { /* only one dimension is used here */ unsigned block_rowstart = blockIdx.x*( (nrow + gridDim.x - 1)/gridDim.x ); unsigned block_rowend = MIN((blockIdx.x+1)*( (nrow + gridDim.x - 1)/gridDim.x ), nrow); unsigned row; for (row = block_rowstart + threadIdx.x; row < block_rowend; row+=blockDim.x) { float tmp = 0.0f; unsigned index; unsigned firstindex = rowptr[row] - firstentry; unsigned lastindex = rowptr[row+1] - firstentry; for (index = firstindex; index < lastindex; index++) { tmp += nzval[index]*vecin[colind[index]]; } vecout[row] = tmp; } } extern "C" void spmv_kernel_cuda(void *descr[], void *args) { uint32_t nnz = STARPU_CSR_GET_NNZ(descr[0]); uint32_t nrow = STARPU_CSR_GET_NROW(descr[0]); float *nzval = (float *)STARPU_CSR_GET_NZVAL(descr[0]); uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]); uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]); uint32_t firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]); float *vecin = (float *)STARPU_VECTOR_GET_PTR(descr[1]); uint32_t nx_in = STARPU_VECTOR_GET_NX(descr[1]); float *vecout = (float *)STARPU_VECTOR_GET_PTR(descr[2]); uint32_t nx_out = STARPU_VECTOR_GET_NX(descr[2]); dim3 dimBlock(8, 1); dim3 dimGrid(512, 1); spmv_kernel_3<<>> (nnz, nrow, nzval, colind, rowptr, firstentry, vecin, nx_in, vecout, nx_out); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/spmv/spmv_kernels.c000066400000000000000000000102711507764646700214700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* OpenCL codelet for SPMV */ #include "spmv.h" #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_codelet; void spmv_kernel_opencl(void *descr[], void *args) { cl_kernel kernel; cl_command_queue queue; int id, devid, err, n; (void)args; int nnz = (int) STARPU_CSR_GET_NNZ(descr[0]); int nrow = (int) STARPU_CSR_GET_NROW(descr[0]); cl_mem nzval = (cl_mem)STARPU_CSR_GET_NZVAL(descr[0]); cl_mem colind = (cl_mem)STARPU_CSR_GET_COLIND(descr[0]); cl_mem rowptr = (cl_mem)STARPU_CSR_GET_ROWPTR(descr[0]); int firstentry = STARPU_CSR_GET_FIRSTENTRY(descr[0]); cl_mem vecin = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[1]); int nx_in = (int)STARPU_VECTOR_GET_NX(descr[1]); cl_mem vecout = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[2]); int nx_out = (int)STARPU_VECTOR_GET_NX(descr[2]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_codelet, "spmv", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); n=0; err = clSetKernelArg(kernel, n++, sizeof(nnz), &nnz); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, n++, sizeof(nrow), &nrow); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, n++, sizeof(nzval), &nzval); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, n++, sizeof(colind), &colind); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, n++, sizeof(rowptr), &rowptr); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, n++, sizeof(firstentry), &firstentry); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, n++, sizeof(vecin), &vecin); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, n++, sizeof(nx_in), &nx_in); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, n++, sizeof(vecout), &vecout); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, n++, sizeof(nx_out), &nx_out); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=nrow; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } void compile_spmv_opencl_kernel(void) { int ret; ret = starpu_opencl_load_opencl_from_file("examples/spmv/spmv_opencl.cl", &opencl_codelet, NULL); if (ret) { FPRINTF(stderr, "Failed to compile OpenCL codelet\n"); exit(ret); } } #endif void spmv_kernel_cpu(void *descr[], void *arg) { (void)arg; float *nzval = (float *)STARPU_CSR_GET_NZVAL(descr[0]); uint32_t *colind = STARPU_CSR_GET_COLIND(descr[0]); uint32_t *rowptr = STARPU_CSR_GET_ROWPTR(descr[0]); float *vecin = (float *)STARPU_VECTOR_GET_PTR(descr[1]); float *vecout = (float *)STARPU_VECTOR_GET_PTR(descr[2]); uint32_t firstelem = STARPU_CSR_GET_FIRSTENTRY(descr[0]); uint32_t nrow; nrow = STARPU_CSR_GET_NROW(descr[0]); STARPU_ASSERT(nrow == STARPU_VECTOR_GET_NX(descr[2])); unsigned row; for (row = 0; row < nrow; row++) { float tmp = 0.0f; unsigned index; unsigned firstindex = rowptr[row] - firstelem; unsigned lastindex = rowptr[row+1] - firstelem; for (index = firstindex; index < lastindex; index++) { unsigned col; col = colind[index]; tmp += nzval[index]*vecin[col]; } vecout[row] = tmp; } } starpu-1.4.9+dfsg/examples/spmv/spmv_opencl.cl000066400000000000000000000025171507764646700214650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* OpenCL kernel for SPMV */ __kernel void spmv(int nnz, int nrow, __global float* nzval, __global unsigned* colind, __global unsigned* rowptr, int firstentry, __global float *vecin, int nx_in, __global float *vecout, int nx_out) { const int row = get_global_id(0); if (row < nrow) { float tmp = 0.0f; unsigned index; unsigned firstindex = rowptr[row] - firstentry; unsigned lastindex = rowptr[row+1] - firstentry; for (index = firstindex; index < lastindex; index++) { unsigned col; col = colind[index]; tmp += nzval[index]*vecin[col]; } vecout[row] = tmp; } } starpu-1.4.9+dfsg/examples/stencil/000077500000000000000000000000001507764646700172675ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/stencil/0.5.out000066400000000000000000000241551507764646700203310ustar00rootroot00000000000000| 0 0 2 0 2 1 2 1 0 1 2 1 0 1 0 1 0 0 1 0 2 0 2 1 0 1 1 0 2 0 2 1 0 2 0 0 1 0 2 2 0 1 0 1 2 1 2 1 0 0 2 0 1 0 0 1 0 1 2 0 2 2 1 0 | 0 0 0 2 2 2 1 2 1 1 1 1 1 0 1 0 0 0 0 1 0 2 1 1 1 1 1 2 0 2 2 2 0 2 0 0 0 0 2 2 0 0 0 2 2 2 2 0 0 0 0 2 0 0 0 0 1 2 2 2 2 2 2 0 | * 0 0 2 2 2 2 * 1 1 1 1 * * * * 0 0 * * * * * 1 1 * * * * * * * * 0 * * * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * | 0 * * * * * * 1 * * * * 1 1 0 0 * * 0 1 1 1 1 * * 1 1 1 2 2 2 2 2 * 0 0 0 * 2 2 0 0 0 2 2 2 2 0 0 0 0 0 0 0 0 1 1 1 2 2 2 2 2 0 | 0 1 2 2 2 2 2 1 1 1 1 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 2 2 0 0 2 2 2 2 2 0 0 0 0 0 0 0 0 1 1 1 2 2 2 2 0 0 | 0 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 0 2 0 0 2 2 2 2 2 0 0 0 0 0 0 0 0 1 1 1 2 2 2 0 0 0 | 0 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 0 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 1 1 1 1 2 2 2 0 0 0 | 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 2 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 2 2 2 0 0 0 0 0 0 0 2 1 1 0 2 2 2 0 0 0 | 0 2 2 2 2 * * * * * 0 1 * * * * * 2 * * * * * * * * * 2 2 2 2 * * * * * * * * * 0 0 2 2 2 2 2 * * * 0 0 * 2 * * * * 2 2 2 * * * | * * * * * 0 0 0 0 0 * * 1 1 1 1 1 * 2 1 0 1 2 1 1 2 2 * * * * 2 1 1 1 1 0 0 0 0 * * * 2 2 * * 0 0 0 * * 0 * 2 1 1 2 * * * 2 1 0 | 2 2 2 2 2 0 0 0 0 0 1 1 1 1 1 0 0 2 2 1 1 1 1 1 1 2 2 2 2 2 2 0 1 1 0 1 0 0 0 0 1 2 2 * * 2 2 2 0 2 1 1 0 0 2 2 1 2 2 2 2 2 1 0 | 2 2 0 2 0 0 0 0 0 1 0 0 1 1 1 0 0 2 2 1 1 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 2 2 2 1 1 0 0 0 0 1 1 0 2 2 2 2 2 1 | 2 2 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 0 1 2 0 0 0 0 1 2 2 2 2 2 0 1 2 1 1 1 0 0 0 0 1 1 0 2 2 2 2 2 2 | * 0 0 0 0 0 0 0 0 0 0 * * * * * 0 * * * * * * * * * * 2 2 2 2 0 0 2 2 0 0 0 0 1 2 2 2 2 2 0 2 1 1 1 1 0 0 0 * * * * * * 2 2 2 * | 0 * 0 0 0 0 0 0 * * * 0 1 1 1 0 * 1 1 1 1 1 1 1 2 2 2 * * * * * * * * * 0 0 * * * * * * * * * * * * * * * * 0 1 1 0 2 2 * * * 2 | 0 0 * * * * * * 0 0 0 0 1 1 1 0 0 1 1 2 1 1 1 1 2 2 2 2 2 2 2 0 0 2 2 0 * * 0 0 2 2 2 2 2 0 1 1 1 1 1 0 0 1 1 1 1 0 0 2 1 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2 0 0 2 2 0 1 1 1 2 2 2 2 2 2 2 2 0 0 2 2 1 0 2 0 2 0 2 2 2 2 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 2 2 2 2 0 1 1 2 2 0 2 1 1 2 0 0 0 2 2 0 0 0 2 2 1 2 2 2 2 0 1 1 1 1 0 1 1 1 1 1 2 0 0 1 1 2 2 2 | 1 0 0 0 0 0 0 0 0 0 0 * * * * * * 0 0 * * * * * * * 0 0 1 1 0 0 2 0 0 0 0 0 2 2 2 2 2 2 2 0 1 1 1 1 0 * * * * 2 2 0 * 1 1 2 2 2 | * * * * * 0 0 2 0 * * 0 1 1 1 1 0 * * 0 2 2 2 2 2 0 * * * * * * * * 2 0 0 0 * * * * 2 2 * * * * * * * 1 1 1 1 * * * 0 * * * * * | 2 0 0 0 0 * * * * 0 0 0 1 1 1 1 0 2 2 0 2 2 2 2 2 0 0 0 1 1 0 0 0 1 * * * * 2 2 2 2 * * 2 2 1 1 1 1 1 1 1 1 1 2 2 0 0 1 1 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 2 1 1 1 1 0 2 0 2 2 2 2 2 0 0 2 1 1 0 0 0 2 2 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 0 0 0 1 1 1 1 1 | 0 1 0 0 0 0 0 0 0 0 0 0 2 1 1 1 1 0 0 0 2 2 2 2 2 0 0 2 1 1 1 0 0 2 2 2 0 0 2 2 2 2 2 2 2 1 2 1 1 1 1 1 1 1 1 2 0 0 0 1 1 1 1 1 | 2 0 0 0 0 0 0 0 0 2 0 0 0 1 1 1 1 0 0 2 2 2 * * * * 0 2 2 1 2 0 0 0 2 0 0 0 0 2 2 2 2 2 1 1 2 1 1 1 1 1 1 1 * * * 2 0 1 0 1 1 1 | * * 0 0 0 0 2 0 * * * * * * 1 1 * * * * * * 2 2 0 0 * * * * * * 0 0 0 1 0 0 0 0 2 * * * * * * * 1 1 1 1 * * 1 2 0 * * * * * 1 * | 2 0 * * * * * * 2 2 2 0 0 1 * * 1 0 2 2 2 2 2 2 0 0 2 2 2 1 0 0 * * * * * * * * * 2 2 2 1 1 1 1 * * * * 1 1 1 0 0 0 0 0 0 1 * 1 | 2 0 0 1 0 0 2 0 2 2 2 0 0 1 1 1 1 1 2 2 2 2 2 2 0 0 2 2 2 2 0 0 0 0 0 2 1 2 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 | 2 0 0 1 0 0 0 2 2 2 2 0 0 1 1 1 1 1 2 2 2 2 2 2 0 0 2 2 2 2 0 0 0 0 0 2 2 2 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 | 1 0 0 1 0 0 0 2 2 2 2 0 0 1 1 1 1 1 2 2 2 2 2 2 0 0 2 2 2 2 0 0 0 0 2 2 2 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 | 1 0 0 1 0 0 0 2 2 2 2 0 0 1 1 1 1 1 2 2 2 2 2 2 0 0 2 2 2 2 0 0 0 0 2 2 2 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 | 1 0 0 0 0 0 0 2 2 2 0 * * * * * * * * 2 2 * * * * * * * 2 2 2 0 0 0 2 2 2 0 0 0 2 2 2 2 1 1 1 1 1 1 * * * * * * * * 1 1 1 1 1 1 | * * * * * * * * * * * 0 0 0 1 1 1 1 2 * * 2 2 2 0 0 2 2 * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 0 0 * * * * * * | 0 0 0 0 0 0 0 2 2 2 1 2 1 2 1 1 1 1 1 2 2 1 1 2 0 2 2 2 2 2 0 0 0 0 0 2 2 2 0 2 2 2 2 2 0 0 0 1 1 1 1 1 1 1 1 0 0 2 0 1 1 1 1 2 | 0 0 0 0 0 0 2 2 2 2 2 1 2 2 1 1 1 1 0 1 0 1 0 2 2 2 2 0 2 2 2 0 1 0 2 2 2 2 2 2 2 2 2 0 0 0 0 1 1 1 1 1 1 1 1 0 2 1 0 1 0 0 2 2 | 0 0 0 0 0 0 2 2 2 2 2 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 2 2 2 2 2 2 2 1 2 2 0 0 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 2 2 | 0 0 0 0 0 2 2 0 * * * * * * * * * 1 * 0 0 0 * * * * * * * * * 0 0 * * 1 1 1 2 2 2 1 1 2 0 0 2 1 * * 1 1 1 * * * * * * * 0 0 1 2 | * * * * * * * * 2 2 2 1 0 0 2 0 2 * 2 * * * 0 0 0 0 0 0 2 2 2 * * 2 2 * * * * * * * * * * * * * 1 1 * * * 1 1 1 1 1 1 0 * * * * | 0 0 1 0 0 0 0 0 2 2 2 1 0 2 0 2 1 2 2 0 0 0 0 0 0 0 0 2 2 2 2 0 0 2 2 1 1 1 2 2 2 1 2 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 2 2 | 2 0 0 0 0 0 0 0 2 2 1 1 2 0 2 1 2 2 2 0 0 0 0 0 0 0 0 2 2 2 2 0 2 2 2 1 1 1 2 2 2 2 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 2 2 | 0 0 0 0 0 0 0 2 2 2 1 1 2 2 2 1 2 2 2 1 0 0 0 0 0 0 2 2 2 2 2 0 0 2 1 1 1 1 2 2 2 2 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 | 0 0 0 2 0 2 0 * * * * * * * * 1 * 2 2 0 0 0 2 0 0 0 2 2 0 2 2 1 2 1 0 1 1 1 2 2 2 2 0 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 | * * * * * * * 2 2 0 1 0 2 2 2 * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * 1 * * * * * * 1 1 * * * * * * * * * * * * | 0 1 0 0 0 2 0 2 2 0 1 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0 2 0 0 2 2 2 2 0 1 1 1 1 1 2 2 2 0 * 2 2 2 0 1 0 * * 1 1 1 1 1 1 1 0 0 0 0 0 | 0 1 1 0 0 2 2 2 2 1 1 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 2 2 2 2 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 2 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 | 0 1 1 0 0 2 2 2 2 2 2 1 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 2 1 1 1 0 1 0 0 0 1 1 1 2 2 2 2 2 2 2 2 2 2 1 0 0 0 0 1 1 0 1 1 1 1 2 1 0 | 1 1 1 0 0 2 2 2 2 2 2 * * * 2 2 2 2 2 0 0 0 0 0 2 0 1 0 1 1 1 1 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 1 1 1 1 2 1 1 | * * * * * * * * * * * 2 2 2 * * * * * * 0 0 0 * * * * * * * * * * * * * * * * * * 2 2 2 2 1 0 0 2 0 0 0 0 0 0 0 * * * * * * * * | 1 2 1 0 0 2 2 1 1 2 2 2 2 2 0 1 2 2 1 0 * * * 0 0 2 0 1 1 1 1 1 0 0 0 0 0 1 1 2 2 * * * * * * * * * * * 0 0 * * 0 2 1 1 1 2 1 1 | 1 2 0 0 0 1 1 1 1 2 2 2 2 2 2 0 2 2 0 0 1 1 0 0 0 0 0 1 1 1 1 1 0 0 0 2 2 2 2 2 2 2 2 2 2 1 0 0 0 0 0 0 * * 0 1 0 0 1 1 1 2 1 1 | 1 2 0 0 0 1 1 1 1 1 1 2 2 2 2 2 0 2 0 0 1 1 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 2 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 1 | 1 2 2 0 0 1 1 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 1 | 1 2 2 0 * 1 1 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 2 | * * * * 0 * * * * * * * * * 2 * * * * * * * * * * * * * * * * * * * * * 2 2 2 2 2 2 2 2 2 2 0 0 0 0 2 0 0 0 * * * * * * * * * * | 2 2 2 0 0 1 1 1 1 1 1 2 2 2 * 2 2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 * * * * * * 2 * * 2 * * * 0 * * * * 1 0 0 1 1 1 1 2 2 2 | 2 2 2 0 0 1 1 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 2 2 2 2 2 2 * 2 2 * 0 0 0 * 0 0 0 1 0 0 0 1 1 1 1 2 2 2 | 2 2 2 0 0 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 2 2 0 0 0 0 1 1 1 1 1 1 1 1 0 2 2 2 2 2 2 0 2 2 2 2 0 0 0 0 0 1 1 1 1 0 1 1 1 1 2 2 2 | 2 2 0 0 0 1 1 1 1 1 1 2 2 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2 1 2 1 1 1 1 1 1 2 2 2 2 2 2 2 0 2 2 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 2 2 0 | 2 * 0 0 0 1 1 1 1 1 0 1 2 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 2 2 0 0 2 0 1 1 1 1 1 0 0 0 1 1 1 2 0 0 | * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 2 0 0 0 0 0 1 1 * * * * * * * * * 0 * * | 2 2 0 0 0 2 1 1 1 1 1 1 2 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2 0 2 2 1 1 1 1 1 2 2 2 2 0 * 0 * * * * * * * * * 1 1 1 0 0 0 1 1 2 * 2 0 | 2 2 0 0 0 0 1 1 1 1 1 1 2 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2 2 0 1 1 1 1 1 1 2 1 2 0 0 0 * 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 1 1 2 1 0 1 | 2 0 0 0 0 0 1 1 1 2 2 2 2 2 2 2 0 0 0 0 2 2 2 2 1 2 1 0 0 0 1 1 1 1 1 1 1 1 0 2 0 0 0 0 0 0 0 0 0 0 1 1 2 2 2 0 0 0 1 1 1 0 0 0 | * * 0 0 0 0 2 1 1 2 2 1 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 0 0 0 1 2 1 1 2 1 1 1 2 0 0 0 0 0 0 0 0 0 0 0 1 1 2 2 2 0 0 0 1 2 1 0 0 0 | 2 1 * * * * * * * * * * * * * * * * * * * * * * * * * * 0 * * * * * * * * * * 0 * 0 1 0 0 0 0 0 0 0 0 1 2 2 2 2 * * * * * * * * | 2 1 0 0 0 2 2 1 1 2 1 2 2 2 2 2 0 0 0 0 0 2 1 1 1 1 1 0 * 1 1 2 2 0 1 0 1 0 1 * 2 * * * 0 0 0 0 0 * * * * * * * 1 2 0 2 1 0 0 0 | 2 1 0 0 0 0 2 1 1 2 0 2 2 2 2 2 0 0 0 0 0 2 1 1 1 1 1 0 0 1 1 2 2 1 0 0 1 1 1 1 2 1 0 0 * * * * * 0 0 2 2 2 2 1 1 0 0 2 2 0 0 0 | 2 1 0 0 0 0 2 1 1 2 0 0 2 1 2 2 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 1 1 0 0 1 1 1 1 2 2 0 0 0 2 0 2 0 0 0 2 2 2 2 2 2 0 2 2 2 0 0 0 | 0 0 * 0 0 0 0 1 1 1 1 2 2 2 2 2 0 0 0 0 0 0 1 1 1 1 0 0 0 1 2 2 1 1 0 0 1 1 1 1 2 2 0 0 0 2 1 2 0 0 0 2 2 2 2 2 2 2 2 2 2 0 0 0 | * * 0 * 0 * * * * * * * * * * * * * 0 0 0 * * * * * * 0 0 * 2 2 * * 0 * * * * * * * 0 0 0 2 1 1 0 0 0 2 2 2 2 2 * * * * * * * * | 0 0 0 0 * 0 0 1 1 1 1 2 1 1 2 1 2 0 * * * 0 1 1 1 1 0 * * 1 * * 2 1 * 0 1 1 1 0 1 2 * * * * * * * * * * * * * * 2 2 2 2 2 0 2 1 | 0 0 0 0 0 0 0 1 1 1 1 2 1 2 1 1 2 2 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 0 0 0 2 0 0 2 0 0 0 0 2 2 1 0 0 0 1 2 2 2 2 2 2 2 2 2 2 0 1 | 1 0 0 0 0 0 1 1 1 1 1 0 0 0 1 2 2 1 2 1 2 1 0 1 1 0 1 0 0 0 2 2 2 2 0 2 2 0 0 0 0 0 0 0 0 2 2 0 0 0 0 1 2 2 2 2 1 1 2 2 2 2 0 1 | * * 0 0 0 1 * * * * * * 0 0 0 0 0 0 1 1 1 1 1 1 1 1 * 0 0 2 2 2 2 2 2 2 2 0 0 2 2 1 1 0 0 2 2 2 2 2 0 1 2 2 2 1 1 1 2 2 2 2 * * | 1 0 * * * * 0 2 1 1 1 0 * * * * * * * * * * * * * * 0 * * * * * * * * * * * * * * * * * 0 2 2 2 2 0 0 * * 2 1 1 * * * * * * 2 1 | 1 0 1 1 1 1 0 2 1 1 0 0 0 0 0 0 0 0 1 1 1 2 0 1 1 1 0 0 0 1 2 2 2 2 2 0 2 2 2 2 1 1 1 2 * * * * * * * 0 0 * * * 1 1 2 2 2 0 0 1 | 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 0 0 1 1 0 0 0 0 2 2 2 2 0 0 2 2 2 2 1 1 2 2 0 2 2 2 2 0 0 0 0 1 1 1 1 2 2 2 2 0 0 1 | 2 1 1 1 2 2 1 1 1 1 0 0 0 0 0 0 0 0 2 0 2 2 0 0 0 0 0 0 0 0 2 2 1 1 0 0 0 2 2 2 1 1 2 0 2 0 2 2 2 0 0 0 0 1 1 1 1 1 1 2 2 0 2 1 | 1 1 1 2 2 2 1 1 1 1 0 1 0 2 2 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 2 2 2 1 1 1 0 0 0 0 2 2 0 0 0 0 1 1 1 1 1 2 2 0 2 2 2 | starpu-1.4.9+dfsg/examples/stencil/0.out000066400000000000000000000300321507764646700201550ustar00rootroot00000000000000| 0 0 2 1 0 1 1 1 0 1 1 0 1 1 0 0 1 2 0 2 0 0 1 0 2 1 0 2 1 0 2 1 2 0 0 1 0 1 2 0 1 2 2 0 0 1 2 0 1 0 1 0 0 1 2 0 2 0 0 1 2 1 0 2 | * * 2 0 * 0 * 0 * * 0 * * * * * * 0 * * 0 * * 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 * * * * * * * | 0 0 * * 1 * 1 * 2 2 * 0 2 0 1 1 2 * 1 2 * 1 2 * 2 0 1 2 0 1 0 1 2 1 0 1 2 2 2 0 0 0 1 2 0 0 0 0 1 1 0 2 0 2 1 0 * 2 0 2 2 0 1 0 | 0 2 1 2 1 1 1 0 2 1 1 2 0 1 2 0 1 1 0 1 2 0 1 1 0 2 0 1 1 0 2 0 1 2 0 0 1 0 0 0 0 1 2 0 1 2 0 0 0 0 1 0 0 0 2 1 0 1 0 0 0 1 2 2 | * * * 1 * 0 2 1 1 * 0 0 0 0 * * * * 0 * 0 * 0 * * 1 * * * * * * * * * * * * 0 0 * * * * 0 * * * * * * * 0 * * * * * * 0 0 * * * | 1 0 1 * 2 * * * * 2 * * * * 1 1 0 2 * 2 * 1 * 1 0 * 2 0 0 0 1 0 0 2 2 0 2 2 * * 2 1 0 1 * 0 0 2 1 2 1 0 * 0 1 0 1 0 2 * * 2 1 0 | 0 0 2 1 1 0 0 2 2 0 1 0 2 1 2 0 1 0 1 1 0 2 2 1 0 0 2 0 1 0 2 2 0 1 2 0 0 0 2 0 2 1 2 0 2 0 1 0 0 1 0 0 2 0 2 0 0 0 2 0 0 2 2 1 | * 1 1 2 * 1 0 0 2 2 0 0 0 2 0 0 2 0 * * 0 * * * * 1 0 * 0 * * 0 0 * * * * * * 2 * * * * 0 * * 0 * * * * 0 * * * * * * * * * * * | 1 * * * 1 * * * * * * * * * * * * * 2 1 * 2 2 1 2 * * 2 * 0 0 * * 1 0 0 1 2 0 * 2 0 2 1 * 2 1 * 0 2 0 1 * 0 0 1 0 1 2 0 1 0 2 1 | 1 2 1 2 1 1 0 1 0 0 0 2 0 0 1 1 0 0 2 0 2 0 0 0 1 1 2 0 1 2 1 1 0 0 0 0 2 1 0 0 0 1 2 0 2 2 2 0 0 2 0 1 2 0 2 0 1 1 0 1 0 0 0 1 | * * 0 0 1 1 1 2 1 0 0 1 0 2 2 0 1 0 0 0 * * 0 * * * 0 0 1 2 0 2 1 2 * * * 1 * * 0 * * 0 * * 0 2 * * 0 1 0 1 2 0 0 0 1 * * 2 * * | 1 0 * * * * * * 0 2 * * * * * * * * * * 2 1 * 2 1 0 * * * * * * * * 0 2 0 * 1 2 * 0 1 * 2 1 * * 1 2 * * * * * * * * * 2 1 * 1 2 | 0 1 1 1 2 1 0 0 * * 0 2 0 2 1 0 0 0 1 0 2 1 1 0 2 1 2 2 0 2 1 1 0 2 2 0 2 0 2 0 1 0 0 0 0 0 2 0 1 0 2 0 0 2 1 0 2 1 2 0 1 1 0 0 | 0 * * 1 2 1 2 1 2 0 0 0 1 1 0 2 0 2 0 0 0 * * 0 1 0 2 0 2 0 0 1 1 1 1 0 0 * * * 2 0 0 0 * * * * 2 0 0 * 0 0 1 2 0 0 2 2 2 * * * | * 0 2 * * * * 0 * 0 0 2 * * * * * * * * * 2 1 * * * * * * * * * * * * * * 2 0 2 * * * * 1 1 0 2 * * * 1 * * * * * * * * * 1 2 1 | 1 0 2 0 1 2 2 * 2 * * * 0 0 0 2 0 0 1 0 0 0 0 2 2 1 0 0 2 1 1 1 0 2 0 1 1 0 0 2 1 0 2 0 0 0 0 1 0 1 2 1 0 0 0 1 1 0 0 2 2 2 2 0 | 0 0 2 1 0 2 0 0 2 1 0 2 0 1 0 0 1 0 2 1 1 1 0 0 2 0 1 2 2 1 0 2 0 1 0 1 2 0 1 0 2 1 2 1 0 2 0 2 0 2 2 0 1 2 0 0 1 0 2 0 0 0 2 1 | * * * * * * * 0 2 2 0 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | 1 0 2 0 1 2 0 * * * * * 0 1 1 0 0 0 0 1 0 0 0 2 1 0 2 1 2 0 1 1 1 0 2 0 0 1 0 0 2 0 0 0 0 2 2 1 0 0 0 2 1 0 0 2 2 2 2 2 1 2 0 2 | 0 1 2 2 0 1 0 2 0 1 1 0 2 0 1 1 1 2 0 2 0 0 1 0 1 1 0 1 0 2 1 1 2 1 0 0 2 1 0 0 0 2 0 0 2 1 2 0 1 0 0 0 0 1 0 1 0 2 2 0 2 0 0 0 | * * * * * * * * 0 0 1 1 * * 0 * * * * * * * * * * * * 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 * * * * * * * * | 2 0 1 2 0 1 2 2 * * * * 2 2 * 1 0 2 2 0 1 1 0 0 2 0 1 * 0 0 0 0 2 0 2 0 1 0 0 1 0 1 0 2 0 1 1 2 0 1 0 2 0 1 2 * 1 0 0 2 1 1 1 0 | 2 1 0 0 2 0 2 2 2 0 1 2 0 0 2 0 1 0 0 1 1 0 2 0 2 1 2 0 1 0 0 0 0 1 1 2 2 2 2 2 0 0 0 2 0 2 0 1 1 1 2 1 0 1 0 0 0 0 2 2 0 1 0 1 | * * * * 0 * * * * * 0 * 0 0 * * * * * * * * * * * 2 2 0 2 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * 0 2 * * * * * | 2 0 2 0 * 2 0 2 2 1 * 2 * * 1 1 1 0 0 2 1 1 1 2 1 * * * * * * 1 0 0 0 2 1 0 0 0 0 0 0 2 2 1 0 2 0 0 1 2 1 2 1 1 2 * * 0 2 2 0 0 | 0 2 0 0 0 0 0 0 2 2 2 2 0 0 1 1 2 0 0 0 0 0 0 0 2 1 0 1 1 0 0 2 1 1 2 0 2 2 2 0 1 2 0 2 1 2 1 2 0 2 1 2 1 1 0 1 2 0 0 0 0 2 0 0 | * * * * * 0 0 * 1 * * 2 0 * * * 0 0 * 0 0 0 * 0 * 1 0 2 2 0 0 * * * 0 1 * 0 * 0 * * * * 0 * 0 1 * * * * * * * * * 2 0 * * 1 * * | 0 0 0 1 0 * * 2 * 2 1 * * 2 1 2 * * 1 * * * 1 * 1 * * * * * * 0 2 0 * * 2 * 1 * 2 0 1 2 * 2 * * 1 2 1 0 1 2 0 1 0 * * 0 0 * 2 0 | 1 2 1 0 1 0 1 2 0 0 1 0 1 0 2 0 1 1 1 1 0 0 2 1 0 1 2 1 2 0 0 1 1 0 1 0 0 0 2 1 0 0 2 0 2 1 0 0 1 1 1 2 0 2 0 0 1 2 2 1 1 0 0 2 | * 2 * * * * 1 1 2 2 0 * * * * * * * 0 0 1 0 0 0 2 1 0 1 0 2 0 * * * 0 0 0 2 0 2 0 * 0 * 2 1 1 0 0 1 0 * * * * * * * * 0 1 1 * * | 0 * 2 0 2 0 * * * * * 0 1 0 2 1 2 1 * * * * * * * * * * * * * 2 0 1 * * * * * * * 1 * 2 * * * * * * * 2 2 1 2 1 0 0 0 * * * 0 1 | 1 2 2 0 1 2 1 2 0 0 0 0 1 0 0 1 0 0 1 2 2 1 0 0 0 2 2 1 1 0 1 2 0 2 0 1 2 2 0 2 0 0 0 1 0 2 0 0 0 0 0 2 1 1 2 1 1 0 1 1 0 2 2 0 | 1 * 0 0 2 1 1 2 2 0 1 0 1 0 0 * 0 2 0 1 2 0 * * * * 0 0 2 0 * 0 2 1 2 0 0 1 1 0 1 1 0 * 0 2 1 0 0 2 1 0 * * * * * 2 0 0 0 0 1 1 | * 2 * * * * * * * * * * * * * 2 * * * * * * 2 1 0 1 * * * * 2 * * * * * * * * * * * * 2 * * * * * * * * 0 0 1 0 0 * * * * * * * | 0 2 0 0 1 0 2 2 0 0 1 0 0 0 1 1 0 2 0 0 0 2 2 0 1 0 1 1 2 1 0 2 2 1 0 1 2 1 0 1 2 1 0 1 0 0 2 1 1 0 2 1 0 0 0 2 2 0 0 1 0 0 2 1 | 2 2 0 2 1 0 1 2 1 2 0 1 0 * 2 0 0 0 0 1 0 2 0 2 0 1 1 2 1 0 2 0 1 0 2 2 0 0 2 2 1 1 0 0 2 2 0 1 2 0 1 0 1 * * * 0 0 2 0 0 0 0 2 | * * * * * * * * * * 0 1 * 1 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 0 0 * * * * * * * * | 0 0 0 2 0 0 0 2 1 2 * * 1 2 0 1 0 2 0 2 0 2 2 0 1 0 0 1 0 2 1 0 0 2 0 0 0 0 0 2 0 0 1 2 1 0 0 2 0 1 0 2 0 2 1 0 2 2 1 0 2 0 1 1 | 2 1 0 1 2 0 0 0 2 0 0 2 1 1 2 0 0 2 0 2 1 0 0 0 1 2 0 0 2 0 1 0 0 0 1 2 0 1 2 1 2 1 0 2 1 0 0 2 1 1 1 0 0 0 0 2 1 0 0 2 0 2 1 0 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 * * * * * * * * * * * * * * * * * | 0 2 0 0 0 2 1 0 1 2 0 2 1 0 2 0 2 2 0 0 2 0 0 0 2 1 2 1 1 1 1 2 2 0 0 2 2 0 0 1 2 2 0 1 0 2 * 0 0 1 1 2 0 1 2 2 2 1 2 0 0 1 2 1 | 1 2 1 1 2 0 0 0 1 2 0 0 0 2 0 1 1 0 1 0 0 0 0 0 0 0 2 2 2 1 1 1 2 1 2 0 1 0 0 1 0 2 0 2 0 0 2 1 2 1 2 2 0 0 1 2 0 0 0 0 2 1 1 1 | * 2 * * 2 1 * * * * * * * * * * * 2 * * * * * 2 1 2 0 * * * * * * * 0 * 2 * * * * * * * * * * * * * * * 0 * * * * * * 0 2 0 0 2 | 1 * 2 1 * * 1 2 0 0 2 1 2 1 0 0 0 * 0 0 0 2 0 * * * * 0 2 0 1 0 0 1 * 2 * 1 0 2 1 2 2 2 0 2 1 0 0 0 1 2 * 2 1 2 0 0 1 * * * * * | 2 1 1 2 1 1 0 0 0 0 1 2 0 0 0 0 1 2 0 0 0 2 1 1 0 1 0 0 1 0 2 0 2 1 0 2 0 0 2 0 1 0 1 1 2 0 2 0 2 1 0 1 2 1 1 0 1 2 1 0 0 0 0 1 | 0 1 2 0 * 1 * * * * * * * 0 * * 1 0 2 * * * * * * 0 0 0 * * * * * * 0 2 1 * * * * * * * * * * * 0 1 0 0 * 0 2 0 1 2 * * 0 2 1 2 | * * * * 1 * 1 2 1 2 1 0 1 * 1 2 * * * 2 1 0 0 0 1 * * * 1 0 1 0 2 0 * * * 0 1 2 0 0 2 1 2 0 0 0 * * * * 2 * * * * * 0 0 * * * * | 2 2 0 0 0 2 0 0 1 1 1 0 1 0 2 0 0 1 0 0 2 2 1 2 0 2 2 1 0 1 0 2 2 2 0 0 0 2 0 2 2 0 1 2 0 1 1 1 2 0 0 0 0 2 1 0 0 2 2 1 0 1 0 2 | 0 0 0 2 0 1 2 * * * 0 * * * 2 2 1 0 * * * * * * 0 * * 2 1 1 2 * * * * * 0 * * * 2 1 1 0 * * * * 1 1 0 0 0 * * 0 1 0 0 0 * 2 0 0 | * * * * * * * 0 2 1 * 2 0 1 * * * * 1 0 2 0 0 2 * 0 2 * * * * 0 1 1 0 2 * 1 1 2 * * * * 1 0 2 1 * * * * * 0 0 * * * * * 2 * * * | 1 0 0 0 2 2 0 0 2 1 0 1 1 0 2 1 2 1 2 0 0 1 2 1 0 0 2 2 1 1 0 0 1 0 1 1 0 2 0 1 2 0 1 2 0 2 0 0 1 2 0 0 2 0 2 0 0 0 0 0 1 0 0 2 | 1 0 2 0 0 1 2 * * * * * 2 0 2 * 0 0 0 1 0 * * * * * 1 2 0 1 0 1 2 2 * * 1 2 0 1 2 0 2 2 * 0 0 * 0 2 0 * 2 0 0 0 0 0 2 0 1 0 2 0 | * * * * * * * 2 1 0 2 1 * * * 1 * * * * * 0 0 1 0 0 * * * * * * * * 1 0 * * * * * * * * 0 * * 1 * * * 1 * * * * * * * * * * * * | 1 2 2 2 1 2 1 0 2 1 2 0 2 0 2 0 1 1 1 2 0 0 2 0 2 1 0 2 1 0 2 1 2 1 0 0 0 0 0 0 0 1 0 1 1 0 1 2 0 1 2 0 0 0 2 0 2 0 2 1 0 2 0 0 | 2 0 0 2 0 0 2 * 1 * * 0 0 2 0 1 0 2 1 1 2 1 2 0 0 0 1 2 0 2 2 0 2 0 2 1 0 0 0 0 1 1 * 2 * 1 2 1 0 1 0 0 0 1 0 2 0 2 2 1 2 0 1 0 | * * * 2 * * * 0 * 0 1 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 * 0 * * * * * * * * * * * * * * * * * * * | 0 1 0 * 2 0 0 1 2 0 2 2 1 1 1 2 1 1 0 0 0 0 2 0 0 2 0 1 1 2 2 0 0 2 0 1 2 0 2 2 0 0 2 2 0 1 0 1 2 0 2 0 0 1 2 0 2 1 1 0 2 2 0 1 | 2 0 0 0 2 1 0 0 1 1 0 2 2 0 2 1 0 2 0 0 2 1 0 2 0 1 0 1 2 1 1 2 2 1 0 2 1 2 1 0 2 1 0 0 0 1 0 0 0 0 2 1 2 0 1 0 0 0 0 0 1 2 0 2 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | 1 0 1 2 2 1 0 1 0 1 0 1 2 0 1 0 1 0 1 0 1 1 0 0 0 2 2 1 0 2 0 2 0 2 1 0 2 0 1 0 0 0 0 2 0 1 0 2 2 1 2 0 2 0 0 2 0 1 1 2 1 0 2 0 | 1 0 1 0 0 0 2 1 0 2 1 2 2 1 0 2 2 0 0 0 1 1 0 0 1 2 0 1 2 0 0 0 0 0 0 0 1 1 2 0 2 1 0 2 0 1 0 1 2 2 0 0 0 2 0 1 2 1 2 0 0 2 1 2 | * * * * * * * * * * * * * * * * * * * * * * * * * 0 * * * 1 1 * 0 * * * 1 0 1 0 * * * * * * 1 * * * * * * * * * 0 1 0 * * * * * | 0 1 2 0 2 0 0 1 1 1 2 2 0 1 2 0 0 2 1 0 1 1 0 2 2 * 2 1 0 * * 2 * 0 0 1 * * * * 1 2 0 1 1 2 * 0 0 0 0 0 2 0 2 1 * * * 1 1 2 0 2 | 2 1 0 0 1 0 2 0 2 0 1 1 1 0 2 0 2 0 2 0 0 2 0 2 0 0 1 2 2 2 1 0 1 0 0 1 2 0 2 1 2 2 1 0 1 1 0 2 0 1 1 0 1 0 1 0 0 1 2 0 0 1 0 1 | 0 * 0 * * * * * * * 1 * * * * * 0 * 0 * * * * * * * * 1 * 1 1 0 0 0 0 0 1 * 0 * * * * * * * * 0 * * * * * * 0 0 0 2 0 0 * 2 0 0 | * 2 * 2 2 0 1 2 1 0 * 2 0 1 1 0 * 1 * 1 1 0 1 0 1 0 2 * 2 * * * * * * * * 2 * 1 1 2 0 0 0 1 0 * 0 2 0 0 0 1 * * * * * * 2 * * * | 1 0 1 0 0 0 0 1 2 0 0 0 2 1 1 1 2 0 0 0 0 2 1 1 0 0 1 1 2 0 2 0 2 2 2 1 0 0 2 2 0 1 1 1 0 2 1 0 2 0 0 2 0 2 0 0 0 2 1 0 0 2 2 1 | 0 1 2 0 2 0 * * * 0 * 2 * * * * * * 0 0 0 * * 0 * * * 0 1 0 2 2 0 2 2 2 1 1 0 0 * * * 1 0 0 1 1 0 2 * * * * * * 0 2 1 1 1 2 0 1 | * * * * * * 2 1 0 * 0 * 0 0 0 1 0 2 * * * 0 1 * 1 1 1 * * * * * * * * * * * * * 1 0 2 * * * * * * * 1 2 2 1 1 2 * * * * * * * * | 2 1 2 0 0 0 1 0 0 0 2 1 0 1 1 1 2 0 2 0 1 0 1 2 1 0 0 0 0 0 0 2 0 0 0 1 2 1 1 0 1 2 0 1 2 0 1 1 2 0 1 0 0 2 2 0 1 0 0 2 2 0 1 0 | 0 0 0 2 0 * * * 1 0 * 2 2 2 0 2 1 2 0 2 * * * * * * * 1 0 0 2 1 2 0 0 2 0 1 0 2 0 1 0 0 1 0 0 0 1 0 2 1 * * * * * * 1 0 0 1 0 2 | * * * * * 0 2 1 * * 1 * * * * * * * * * 0 1 1 0 0 1 0 * * * * * * * * * * * * * * * * * * * * * * * * * 2 2 0 1 0 1 * * * * * * | 1 1 0 2 0 2 0 1 0 1 0 1 0 0 0 2 0 0 0 2 1 2 1 2 2 0 0 1 0 2 0 1 2 0 0 1 2 2 2 1 1 0 1 0 1 0 0 1 2 1 0 0 2 1 2 0 2 0 0 2 1 0 2 0 | 0 0 1 0 0 0 1 2 0 2 1 0 * 0 1 2 0 0 0 * * * * * * 0 2 0 1 0 2 0 1 1 2 0 1 1 0 1 0 2 1 0 2 0 1 1 1 2 2 2 0 0 2 0 2 2 2 2 2 1 0 1 | * * * * * * * * * * * * 1 * * * * * * 0 1 1 0 0 0 * * * * * 1 2 2 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | 1 1 0 2 2 0 0 2 2 2 2 1 2 0 0 0 0 1 0 0 0 0 0 0 0 2 1 0 1 0 * * * * * 0 1 2 2 2 1 0 0 2 0 2 2 1 1 1 0 2 1 0 0 1 2 1 0 1 2 1 2 1 | 2 0 0 0 1 0 2 0 1 2 0 0 1 0 0 1 0 2 0 1 2 1 0 1 1 1 0 2 0 2 1 0 2 0 2 0 2 0 1 0 2 0 2 2 1 1 2 2 1 1 0 0 2 1 0 0 2 1 0 1 2 1 1 0 | * * 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 * 2 0 1 * 1 * * * * * * * * * * * * * 0 0 * * * * * * * * * * * * | 0 0 * 1 1 1 0 0 2 0 1 0 1 2 0 0 0 0 2 1 0 0 1 2 0 2 2 0 0 1 * 2 * * * 2 * 2 1 2 0 1 1 1 0 2 0 1 0 2 * * 1 0 2 1 0 0 1 2 1 2 0 1 | 0 0 2 2 0 2 1 2 0 1 0 1 2 0 2 1 1 0 1 1 1 0 0 1 2 0 0 1 1 1 0 1 2 2 0 0 1 0 0 0 1 0 0 1 2 1 0 0 1 1 2 0 0 1 2 0 1 2 1 1 0 0 2 0 | 2 * 0 * * * * * * * * * * * * * 2 0 * * * * * * * * * * * * 1 0 0 1 1 0 0 * 1 * * * 0 * * * * 2 1 0 * 0 2 0 * * * * * * * * * * | * 2 * 1 0 0 1 2 2 0 0 1 0 1 1 0 * * 0 1 1 0 2 1 2 0 1 1 2 1 * * * * * * * 2 * 2 0 1 * 1 2 0 2 * * * 2 * * * 1 0 0 0 0 1 2 2 2 0 | 0 2 1 0 2 2 2 0 2 2 1 2 1 0 1 2 0 1 2 2 0 1 1 0 2 0 0 1 2 1 0 0 2 2 0 0 0 1 0 0 2 0 2 1 2 0 0 1 1 1 2 0 1 1 0 1 1 2 0 0 0 2 0 0 | 2 * 0 2 2 * * * * * * * * * * * * 0 0 2 0 * * * * * * * * * * 0 0 0 0 2 1 1 2 * * * * * 0 * 0 2 1 0 2 1 * * 0 0 * * 0 1 2 0 2 2 | * 1 * * * 0 0 0 1 0 0 2 0 2 1 0 0 * * * * 0 0 2 0 2 0 1 0 2 0 * * * * * * * * 1 2 1 2 1 * 2 * * * * * * 1 2 * * 2 1 * * * * * * | 2 1 0 0 1 0 2 0 2 0 0 0 0 2 1 0 2 1 2 1 0 0 0 1 1 0 0 1 0 0 2 2 2 2 1 2 2 1 0 1 0 2 0 1 2 1 0 0 2 1 0 2 1 0 1 0 0 2 1 2 2 1 2 1 | 0 * 1 2 2 * * 2 0 0 0 1 0 1 2 0 * * 1 0 2 0 2 * 2 * 2 0 * 0 2 1 2 1 1 2 1 0 0 0 2 0 1 0 2 0 2 0 1 0 * 2 2 0 2 0 0 2 1 * * 0 0 1 | * 1 * * * 0 1 * * * * * * * * * 0 1 * * * * * 0 * 0 * * 1 * * * * * * * * * * * * * * * * * * * * * 0 * * * * * * * * 2 0 * * * | 0 0 1 2 1 1 2 1 2 1 1 0 1 0 1 0 2 2 0 1 2 1 1 2 1 0 1 0 0 2 2 1 0 2 0 0 1 0 0 2 0 0 2 0 0 0 0 2 1 0 1 2 2 0 0 1 0 1 0 1 2 0 0 0 | 0 1 1 0 2 1 1 0 0 2 1 1 2 2 2 2 0 0 0 0 2 1 2 0 1 1 1 0 * 0 1 0 0 0 2 2 2 2 1 2 0 2 0 2 0 2 0 0 0 1 0 * 1 0 0 0 0 1 * * * 2 2 0 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * 2 * * * * * * * * * * * * * * * * * * * * * * 2 * * * * * * 0 0 1 * * * | 0 1 2 2 0 1 0 0 0 0 1 2 1 2 0 1 0 1 1 1 2 0 1 2 2 2 0 2 0 1 1 2 1 2 0 1 0 0 1 0 0 1 1 0 2 0 2 0 2 0 0 0 0 0 2 1 2 1 0 2 1 0 0 1 | 0 0 0 2 0 2 2 0 2 1 2 0 0 0 1 2 0 2 0 1 1 2 0 1 0 2 1 2 0 1 2 0 1 0 1 1 0 0 1 2 0 1 0 2 0 2 0 0 1 0 2 0 2 1 2 0 0 0 0 1 0 0 2 2 | starpu-1.4.9+dfsg/examples/stencil/1.out000066400000000000000000000231411507764646700201610ustar00rootroot00000000000000| 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 0 2 0 2 0 2 0 2 0 2 1 2 1 1 1 1 0 2 0 2 0 2 0 2 0 2 1 0 1 1 1 1 2 0 2 0 2 0 2 0 2 1 0 1 1 1 1 0 0 | 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 2 0 2 0 2 0 2 0 2 1 2 1 1 1 1 1 1 0 2 0 2 0 2 0 2 1 1 0 1 1 1 1 0 2 0 2 0 2 0 2 1 1 0 1 1 1 0 0 0 | * * * 0 * 0 * 0 0 * 1 1 1 1 1 * * 0 * * * * * 1 * 1 1 1 1 1 1 1 * * * * * * * * * 1 1 1 1 1 1 * * * * * * * * * 1 1 1 1 1 0 0 0 | 0 0 2 * 2 * 2 * * 2 * * 1 * * 2 2 * 2 0 2 0 2 * 2 * * * 1 1 * * 0 0 2 0 2 0 2 0 1 * * * 1 * * 0 0 2 0 2 0 2 1 1 * * * 1 * * * * | 0 0 0 2 0 2 0 0 0 2 1 1 * 1 1 2 2 2 0 2 0 2 2 2 1 1 1 1 * * 1 1 0 0 0 2 0 2 0 0 1 1 1 1 * 1 1 0 0 0 2 0 2 1 1 1 1 1 1 * 1 0 0 0 | 0 0 0 0 2 0 0 0 0 2 1 1 1 1 1 2 2 2 2 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 2 0 0 0 1 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 2 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 2 0 0 0 0 0 0 1 1 1 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 2 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 * * 2 * 0 0 0 0 * * * * * 1 1 * * * * * 2 2 1 1 1 1 1 2 1 * * * 0 0 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 * 0 * * * * 0 0 1 1 1 * * 1 1 0 0 1 * * * * * * * * * 1 1 0 * * | 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 0 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 2 2 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 * 1 0 0 0 2 0 0 * * * * 1 1 1 1 0 * * * 2 2 1 1 1 1 1 1 1 1 0 0 0 | * * * 0 0 0 * * * * 2 2 2 1 1 2 2 2 * * * * * * * * * * * * * 1 * * * * * * * 0 0 1 1 * * * * * 0 2 2 * * * * 1 * * * * * * * * | 0 2 0 * * * 0 0 2 2 * * * * * * * * 2 2 2 2 2 2 2 1 1 0 0 1 1 1 1 0 0 0 2 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 * 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 1 2 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 2 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 1 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 * * * 2 2 2 1 1 2 2 2 2 2 2 2 * * * * * * * * 1 * * * * * * * * * * * * 1 1 * * * * * * * * * * * * * 1 1 * * * * | * * * * * * * 0 2 2 * * * * * * * * * * * * 2 2 2 2 2 0 0 1 * 1 1 0 0 0 0 0 0 0 0 1 1 * * 1 1 0 0 0 2 2 2 2 1 1 1 1 * * 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 * 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 0 0 1 1 1 1 * * * * * * * * 1 1 1 1 1 1 * * * * 2 2 2 1 1 1 1 1 1 1 0 0 0 | * * * * * * * 0 * * * * 2 1 1 2 2 2 2 2 2 2 2 * * * * * * * * * * 0 0 0 0 0 0 0 1 * * 1 * * * 0 0 0 2 * * * 1 1 1 1 1 * * * * * | 0 0 0 0 0 0 0 0 2 2 2 2 * * * 2 2 2 * * * * * 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 * 1 1 1 0 0 0 2 2 2 2 * * * * * 1 1 2 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 * * * 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 * * * * * 1 1 * 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 * * * * * * * * * 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 * * * * * * * * * * * * * 0 1 1 1 1 * * 1 * * * * * * * * * * * * * * * * 0 | * 0 0 0 0 0 0 0 2 2 * * * * * * * * 2 2 * * * * * * 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 * | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 * * 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 * * * * * 0 0 0 0 0 0 * * * * * * * * * 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 * * * 1 1 1 1 1 * * * * * * 0 1 1 1 1 1 1 1 0 * * * * * * * * * * * * * * * 0 | * * 0 0 0 0 0 0 2 2 * * * * * * * * * 2 2 * * * * 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 * | 0 0 * * * * * * * * 2 2 2 1 1 2 2 2 2 * * 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 1 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 * * * * * * 0 0 0 0 0 0 * * * * * * * * * 0 0 2 2 2 2 1 1 1 1 1 * 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 * * * * 2 2 2 2 2 2 2 2 * * * 2 1 1 1 1 1 * * * 0 * * 0 1 1 1 1 1 1 1 0 * * * 2 * * * * * * * 1 * * 0 0 | 0 0 0 0 0 0 0 0 2 2 * * 2 1 1 2 * * 2 2 2 2 * * 2 2 2 2 1 1 1 1 1 0 0 0 * 0 0 0 1 1 1 1 1 1 1 0 0 0 2 * 2 2 1 1 1 1 1 1 0 0 * * | * * * 0 0 0 0 * * * 2 2 2 1 1 2 2 2 * * * * 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 | 0 0 0 * * * * 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 1 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 * * * * * * * 0 0 0 0 0 * * * * * * * * * 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 * * 2 2 2 2 2 2 2 2 2 * * * 2 1 1 1 1 1 0 * * * * * 0 1 1 1 1 1 1 1 0 * * * 2 * * * * * * * * * 0 0 0 | 0 0 0 0 0 0 0 0 2 * * * * 1 1 * * * * 2 2 * * * 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 * 2 2 1 1 1 1 1 0 0 * * 0 | * * * 0 0 0 0 * * 2 2 2 2 1 1 2 2 2 2 * * 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 * | 0 0 0 * * * * 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 * * * * * * * 0 0 0 2 2 2 2 1 1 1 1 1 0 0 0 0 0 | 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 * * * * * * * * * * * * * 0 0 0 0 | * 0 0 0 0 0 0 0 2 * * * * * * * * * * * * * * * 2 2 2 2 1 1 1 1 0 0 0 0 0 0 0 0 1 0 2 0 1 1 1 0 0 0 2 2 2 2 1 1 1 1 1 0 * * * * | 0 _ _ _ _ _ _ _ _ 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 2 1 1 1 0 0 0 2 2 2 2 2 2 1 1 1 0 0 0 0 0 | starpu-1.4.9+dfsg/examples/stencil/2.out000066400000000000000000000227361507764646700201730ustar00rootroot00000000000000| 2 0 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 1 1 1 1 1 1 1 2 2 2 2 2 1 0 0 0 0 1 0 0 0 0 0 0 0 2 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 0 2 0 2 0 0 0 0 0 2 2 0 2 1 1 1 1 1 1 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 1 * * * * * * 0 0 0 0 * * * * * * * * 1 1 1 * * * * * * * * * * 1 1 1 * * * * * | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 * 1 1 1 1 1 * * 2 2 2 2 2 1 * * * * 0 0 0 0 0 0 0 0 * * * 1 2 2 2 2 2 2 2 2 1 * * * 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 * 1 1 1 * 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 * 1 * 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 * 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | * * 2 2 2 2 2 0 0 0 0 0 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 * * * * | 0 2 * * * * * * * * * * 0 * * * * 1 1 1 1 1 1 1 1 1 1 * * * * * * * * * * * * * * * * * 1 1 * * * * * * * * * * 1 1 * * 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 * * 1 1 1 1 1 1 * * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 * * 1 2 2 2 2 2 2 2 2 1 * * 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 * * 1 1 * * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 * * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | * * * 2 2 2 * * * * * * * * * 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 * * * 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 * * * * * * | 0 2 2 * * * 2 0 0 0 0 0 0 2 2 * * 1 1 1 1 1 1 1 1 1 1 2 2 * * * * * * * * * * * 0 0 0 * 1 1 1 2 2 2 2 2 2 2 2 1 * * 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 * * 1 1 1 1 1 1 1 1 * * 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 * * 1 2 2 2 * * * * * * 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 * * 1 1 1 1 * * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * * * * 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 * * | * * * 2 2 * * * * * * * * * * 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 * * * * * * * * * * * * 1 1 1 1 2 2 2 2 2 2 2 2 1 1 * * * * * 0 0 | 0 2 2 * * 2 2 0 0 0 0 0 0 2 2 * * 1 1 1 1 1 1 1 1 1 1 2 2 * * 0 0 0 0 0 0 0 0 0 0 0 0 * * 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 0 0 0 0 0 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 * * 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 2 2 2 2 2 2 2 * * 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 * * 1 1 1 1 1 * * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * * 2 2 2 2 2 * 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 1 1 * * 1 1 * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 * * * * * 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 1 1 1 1 * * 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | * * 2 2 2 2 2 2 * * * * * * 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 * * * * * * | 0 2 * 2 2 2 * * 0 0 0 0 0 2 * 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 * * * * * * * * * * * * * * 1 1 1 2 2 2 2 2 2 2 2 1 1 * 1 0 0 0 0 0 | 0 2 2 * * * 2 2 0 0 0 0 0 2 2 * * 1 1 1 1 1 1 1 1 1 1 2 2 * 1 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 * * 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 2 2 2 2 2 2 2 * * 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 * 1 1 1 1 1 1 1 * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * * 2 2 2 2 2 * 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 1 1 1 * * 1 1 1 * * 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 * * 2 * * 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 * 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | * * 2 2 2 2 2 2 * * * * * * 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 * * * 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 * * * * * * | 0 2 * * 2 2 * * 2 0 0 0 0 2 * * 1 1 1 1 1 1 1 1 1 1 1 2 2 2 * * * * * * * * * * 0 0 0 * 1 1 1 2 2 2 2 2 2 2 2 1 1 * 1 0 0 0 0 0 | 0 2 2 2 * * 2 2 2 0 0 0 0 2 2 2 * 1 1 1 1 1 1 1 1 1 1 2 * * 1 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 * 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 2 2 2 2 2 2 2 * * 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 * * 1 1 1 1 1 1 * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 * * 2 2 2 2 * * 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 * 1 1 1 * * 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 * * 2 * 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 * 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | * * * 2 2 2 2 2 * 0 0 * * * 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 * * * | 0 2 2 * * * * * 2 * * 0 0 2 * * * 1 1 1 1 1 1 1 1 1 1 2 2 2 * * * * * * * * * * * * * 1 1 1 1 2 2 2 2 2 2 2 2 1 1 * * * * 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 * * * 1 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 * * 1 1 1 1 1 * * 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 * * 1 2 2 2 2 2 * * * * 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 * 1 1 1 * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 * * 2 * * * 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 * 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | * * * 2 2 2 2 2 * 0 0 0 * * * 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 * * * * * 2 * * * 0 2 2 * * 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 * * * * * 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 * * * * * * * | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 * * 1 1 1 1 1 1 1 * * * * * * * * * * * 0 0 0 0 0 * * 1 1 1 2 2 2 2 2 2 2 2 1 * 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 * * 1 1 1 1 * 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 * * 1 2 * * * * * * * * 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 * 1 * * 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 * * 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | * * * * * * * * * 0 0 0 * * * * 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 * | 0 2 2 2 2 2 2 2 2 * * * 0 2 2 2 * * 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 * * * * 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 * * 1 1 1 1 1 * * * * * * * * 0 0 0 0 0 0 * * * 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 * * 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 * * 1 * * 1 1 2 2 2 1 0 0 * * * * * * 0 0 0 * * * * * * * * * * * * * * * 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 * 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | 0 2 2 2 2 2 2 2 2 0 0 0 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 0 0 0 0 0 | starpu-1.4.9+dfsg/examples/stencil/3.out000066400000000000000000000231411507764646700201630ustar00rootroot00000000000000| 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 1 1 1 1 2 1 0 0 0 0 0 0 0 0 0 2 2 2 0 2 0 1 2 1 1 1 1 1 2 2 2 | 2 2 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 0 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 0 2 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 * * * * * * * * * * 2 * 2 * * * * * * * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * * 0 0 0 0 * * * * 2 2 2 * 1 1 1 1 1 1 1 * * 2 2 | * * * * 0 0 0 0 0 0 0 0 0 0 * 0 * 0 2 0 2 0 2 2 * * * 1 1 1 1 1 1 1 * * * * * 0 0 * * * * 0 0 0 2 * * * 2 * * * * * * * 1 2 * * | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 2 2 1 1 1 * * * * * * * 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 * 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 * * * * * * * * * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 * * * * * * * * * * * * * * 0 * 2 2 2 2 2 1 1 1 1 1 1 1 1 1 * * * 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 * * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * * 2 2 * * * * * * * * * * 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * * 2 2 1 1 * * * * * * 2 2 2 | * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * * * * 1 1 1 1 1 1 * * 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 * 2 2 2 2 2 2 1 1 1 1 1 1 1 * * * * * * * * * * * * * * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 * * * * * * * * * * * * * * 0 * 2 2 2 2 2 1 1 * * * * * 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 2 2 * * 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 * * * * 2 2 2 | 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 2 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 * * * * * 1 1 1 1 * 2 2 | * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * 2 1 1 1 1 1 1 1 1 2 * 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 * * * * * * 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 * * * * * * * * * * * 1 1 1 1 0 0 * * * * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 * * * * * * * * * * * * * * * 2 2 2 2 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * * * * * * * * * * * 2 2 | * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 * * * * * * * * * * * * * * * * * 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * * * * 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 * * * * * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 * * * * * * * * * * * 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 * * * * 0 0 0 0 0 0 0 0 0 0 0 * 2 2 * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 1 1 * * 2 2 2 | 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * * * * * * * * 1 1 * 2 2 | * * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 * * * * * * * * * * * * 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 * * * * * * * * 1 1 1 1 1 1 1 0 0 0 0 0 * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 * * 2 2 2 * * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 * * * * * * * * * * * * * 0 0 * 2 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 * * * * 2 2 2 | 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * * * * * * 1 1 1 1 * 2 2 | 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 | * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 * * * * * * * * * * * * * * * * * * * * * * 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 * * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 * * * * * * * * * * * * * * 2 2 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * 2 1 1 * * * * * * 2 2 2 | 2 2 2 * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 * * * 1 1 1 1 1 1 * 2 2 | 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 | 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 * * * * * * * * * * * * 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * | * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * * * * * * * * * 1 1 1 1 1 1 1 0 0 0 0 0 * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * * 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 * * * * * * * * 2 * 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 * * * * * * 1 1 1 2 2 2 | 2 2 2 * * * * * * * 0 0 0 0 0 0 0 0 * 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 * * * * 2 2 | 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 * 2 | 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 1 1 * * * * * * * * * * * * * * * * * * * * * * 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 * | * 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 * * * 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 * 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 2 2 1 1 1 1 1 1 * * * * * * * * * * 0 0 0 0 0 0 0 0 2 2 * 2 2 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 2 2 * * * * * * 1 1 1 1 1 1 1 1 1 0 * 0 0 0 0 0 0 0 2 2 2 * * 1 1 1 1 1 1 1 1 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 * * * * * * 2 2 2 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 * * 0 0 0 0 0 2 2 2 2 2 * * * * * * * * 2 2 2 | 2 2 2 * * * * * * * * * * * 0 0 0 0 2 2 2 2 * 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 * * * * * * 2 2 2 2 1 1 1 1 1 1 1 1 * 2 2 | 2 2 * 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 * 2 2 2 1 1 1 1 1 1 1 1 2 * 2 | 2 * 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 * 2 * * * * * * * * 1 2 2 * | _ 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 * 2 1 1 1 1 1 1 1 * * 2 2 | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 _ _ | starpu-1.4.9+dfsg/examples/stencil/4.out000066400000000000000000000237521507764646700201740ustar00rootroot00000000000000| 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 2 2 0 2 0 2 0 2 0 0 0 0 2 0 | 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 2 2 2 0 2 0 2 0 0 0 0 0 0 2 | * * * * * * 0 * * * * 2 * 2 * 2 * 2 * 2 * 2 * * * * * * * * * * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * * * 2 2 * * * * * * * * * * * * | 2 0 2 0 0 0 * 0 0 0 0 * 0 * 0 * 0 * 0 * 0 * 0 2 0 2 0 2 0 2 0 1 * * 1 1 1 1 1 1 1 1 1 1 1 1 * 0 0 2 * * 2 2 0 2 0 0 0 0 0 0 0 0 | 0 2 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 * * * * * * * | 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * * * * 2 2 2 2 * * * 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 1 1 * 1 1 1 1 1 1 1 1 * 1 1 * 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 1 1 1 1 * 1 1 1 1 1 1 1 1 * * 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 * * * * * * | 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 1 * * 1 1 1 1 1 * * 1 1 1 1 * * * * 2 2 2 2 * * * * 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 2 * 1 1 1 1 1 1 1 1 1 * * 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 1 * 1 1 1 1 1 1 1 1 1 1 * * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 * * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 * * * * * * * * * | 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * 2 2 2 1 1 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 1 1 1 1 * 1 1 1 1 1 * * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 1 1 1 * * 1 1 * 1 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 1 1 1 1 * * 1 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 * * * * * * * * * | 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * 2 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 * * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * * 1 1 1 1 1 * * 1 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 * * * * * * * * * | 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * 2 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 * * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 1 * * 1 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 * * * * * * * * | 0 0 0 0 0 0 * * * * * * * * * * * * * * * * 2 2 2 1 1 * * 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 1 * * * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 * * * * * * * | 0 0 0 0 0 0 * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * * 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 1 * 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 * * * * * * | 0 0 0 0 * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * * 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 1 * 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 * * * * * | 0 0 0 0 * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * * 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 * * 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 * * * * * | 0 0 0 * * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * * 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 * 1 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 * * * * | 0 0 * * * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 * * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * * * 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 * 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 * 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 * 1 1 1 1 1 * 1 1 * 1 1 1 1 1 * 1 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 * * * | 0 * * * * * * * * * * * * * * * * * * * * * 2 2 2 1 1 * 1 1 1 1 1 * * 1 1 1 1 1 * 1 1 1 1 1 * * * * 2 2 2 2 * * * * * * * 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 * 2 2 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 0 0 2 * 2 2 * 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 * 2 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 1 * 1 1 0 0 2 2 * * 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 * 1 1 1 1 1 _ _ _ _ _ _ _ _ 1 1 1 1 1 * 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 1 1 1 1 0 0 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 | starpu-1.4.9+dfsg/examples/stencil/6.out000066400000000000000000000251711507764646700201730ustar00rootroot00000000000000| 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 | 2 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 | 2 * * * * * * 2 * 2 * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 1 1 1 1 1 1 | * 0 0 0 0 0 0 * 0 * 0 * 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 2 0 2 0 1 0 1 1 * 1 1 1 1 1 1 1 1 1 1 * | 2 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 2 0 1 0 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 * 1 1 1 1 1 1 * 1 1 | 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 | * 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 1 1 * 1 1 * 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 1 1 * * 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 * 1 1 1 1 1 1 1 1 1 * 1 1 | 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 | * 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 1 * 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * 1 * 1 1 1 * 1 1 | 2 0 0 0 * * * * * * * * * * * * 2 * 2 * 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 | * * * * 0 0 0 0 0 0 0 0 0 0 0 0 * 0 * 0 * 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 0 0 * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 | 2 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 | * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 | 2 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 | * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * 1 1 1 * * 1 1 1 * * 1 1 1 1 * 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 1 * 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 | * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 | 2 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 | * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * 1 1 1 1 * 1 1 1 * * 1 1 1 1 * 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * * 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 | * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * * 1 1 1 1 1 * 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * * 1 1 1 * 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 | * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 | 2 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 | * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 * 1 1 1 _ _ _ _ _ _ 1 1 1 * * | 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 _ _ _ _ _ _ _ _ _ _ _ _ 1 1 | starpu-1.4.9+dfsg/examples/stencil/Makefile.am000066400000000000000000000066741507764646700213400ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk include $(top_srcdir)/make/starpu-loader.mk if STARPU_SIMGRID LOADER_BIN = $(LAUNCHER) endif AM_CFLAGS += $(APP_CFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) LIBS += $(STARPU_EXPORTED_LIBS) if STARPU_USE_MPI LIBS += $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la AM_CPPFLAGS += -I$(top_srcdir)/mpi/include LAUNCHER = $(STARPU_MPIEXEC) LAUNCHER_ENV = $(MPI_RUN_ENV) if STARPU_SIMGRID NVCCFLAGS += --compiler-options -fPIC endif endif CC = $(CC_OR_MPICC) ##################################### # What to install and what to check # ##################################### check_PROGRAMS = $(STARPU_EXAMPLES) if !STARPU_SIMGRID if STARPU_USE_MPI if STARPU_MPI_CHECK TESTS = $(STARPU_EXAMPLES) endif else TESTS = $(STARPU_EXAMPLES) endif endif ################### # stencil example # ################### STARPU_EXAMPLES = \ stencil \ implicit_stencil examplebindir = $(libdir)/starpu/examples/stencil examplebin_PROGRAMS = \ stencil \ implicit_stencil stencil_SOURCES = \ life.c \ stencil-kernels.c \ stencil-tasks.c \ stencil-blocks.c \ stencil.c noinst_HEADERS = \ stencil.h \ implicit-stencil.h \ shadow.h if STARPU_USE_CUDA stencil_SOURCES += \ life_cuda.cu \ shadow.cu endif if STARPU_USE_OPENCL stencil_SOURCES += \ life_opencl.c \ shadow_opencl.c endif implicit_stencil_SOURCES = \ life.c \ implicit-stencil-kernels.c \ implicit-stencil-tasks.c \ implicit-stencil-blocks.c \ implicit-stencil.c if STARPU_USE_CUDA implicit_stencil_SOURCES += \ life_cuda.cu \ shadow.cu endif if STARPU_USE_OPENCL implicit_stencil_SOURCES += \ life_opencl.c \ shadow_opencl.c endif outs = \ 0.5.out \ 0.out \ 1.out \ 2.out \ 3.out \ 4.out \ 6.out \ mpi.out EXTRA_DIST = $(outs) results run README pics: $(outs:.out=.xpm) CLEANFILES = *.gcno *.gcda *.xpm starpu_idle_microsec.log .out.out2: $(GREP) '^|' $< | tr -d ' ' > $@ .out2.xpm: ( width=$$(expr $$(head -n 1 < $< | wc -c) - 1) ; \ height=`wc -l < $<` ; \ echo "/* XPM */" ; \ echo "static char * test_xpm[] = {" ; \ echo "\"$$width $$height 9 1\"," ; \ echo "\"_ c None\"," ; \ echo "\"0 c #FF0000\"," ; \ echo "\"1 c #00FF00\"," ; \ echo "\"2 c #0000FF\"," ; \ echo "\"3 c #FFFF00\"," ; \ echo "\"4 c #FF00FF\"," ; \ echo "\"5 c #00FFFF\"," ; \ echo "\"| c #FFFFFF\"," ; \ echo "\"* c #000000\"," ; \ < $< $(SED) -e 's/^/"/' -e 's/$$/",/' | $(SED) -e '$$s/",$$/"};/' ) > $@ view: feh --zoom 800 -F 0.xpm 0.5.xpm 1.xpm 2.xpm 3.xpm 4.xpm 6.xpm mpi.xpm starpu-1.4.9+dfsg/examples/stencil/Makefile.in000066400000000000000000001724231507764646700213450ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_2) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader @STARPU_USE_MPI_TRUE@am__append_8 = $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la @STARPU_USE_MPI_TRUE@am__append_9 = -I$(top_srcdir)/mpi/include @STARPU_SIMGRID_TRUE@@STARPU_USE_MPI_TRUE@am__append_10 = --compiler-options -fPIC check_PROGRAMS = $(am__EXEEXT_1) @STARPU_MPI_CHECK_TRUE@@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_TRUE@TESTS = $(am__EXEEXT_1) @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_FALSE@TESTS = $(am__EXEEXT_1) examplebin_PROGRAMS = stencil$(EXEEXT) implicit_stencil$(EXEEXT) @STARPU_USE_CUDA_TRUE@am__append_11 = \ @STARPU_USE_CUDA_TRUE@ life_cuda.cu \ @STARPU_USE_CUDA_TRUE@ shadow.cu @STARPU_USE_OPENCL_TRUE@am__append_12 = \ @STARPU_USE_OPENCL_TRUE@ life_opencl.c \ @STARPU_USE_OPENCL_TRUE@ shadow_opencl.c @STARPU_USE_CUDA_TRUE@am__append_13 = \ @STARPU_USE_CUDA_TRUE@ life_cuda.cu \ @STARPU_USE_CUDA_TRUE@ shadow.cu @STARPU_USE_OPENCL_TRUE@am__append_14 = \ @STARPU_USE_OPENCL_TRUE@ life_opencl.c \ @STARPU_USE_OPENCL_TRUE@ shadow_opencl.c subdir = examples/stencil ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__EXEEXT_1 = stencil$(EXEEXT) implicit_stencil$(EXEEXT) am__installdirs = "$(DESTDIR)$(examplebindir)" @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_2 = loader$(EXEEXT) PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) am__implicit_stencil_SOURCES_DIST = life.c implicit-stencil-kernels.c \ implicit-stencil-tasks.c implicit-stencil-blocks.c \ implicit-stencil.c life_cuda.cu shadow.cu life_opencl.c \ shadow_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_1 = life_cuda.$(OBJEXT) \ @STARPU_USE_CUDA_TRUE@ shadow.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_2 = life_opencl.$(OBJEXT) \ @STARPU_USE_OPENCL_TRUE@ shadow_opencl.$(OBJEXT) am_implicit_stencil_OBJECTS = life.$(OBJEXT) \ implicit-stencil-kernels.$(OBJEXT) \ implicit-stencil-tasks.$(OBJEXT) \ implicit-stencil-blocks.$(OBJEXT) implicit-stencil.$(OBJEXT) \ $(am__objects_1) $(am__objects_2) implicit_stencil_OBJECTS = $(am_implicit_stencil_OBJECTS) implicit_stencil_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) am__stencil_SOURCES_DIST = life.c stencil-kernels.c stencil-tasks.c \ stencil-blocks.c stencil.c life_cuda.cu shadow.cu \ life_opencl.c shadow_opencl.c am_stencil_OBJECTS = life.$(OBJEXT) stencil-kernels.$(OBJEXT) \ stencil-tasks.$(OBJEXT) stencil-blocks.$(OBJEXT) \ stencil.$(OBJEXT) $(am__objects_1) $(am__objects_2) stencil_OBJECTS = $(am_stencil_OBJECTS) stencil_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/implicit-stencil-blocks.Po \ ./$(DEPDIR)/implicit-stencil-kernels.Po \ ./$(DEPDIR)/implicit-stencil-tasks.Po \ ./$(DEPDIR)/implicit-stencil.Po ./$(DEPDIR)/life.Po \ ./$(DEPDIR)/life_opencl.Po ./$(DEPDIR)/loader-loader.Po \ ./$(DEPDIR)/shadow_opencl.Po ./$(DEPDIR)/stencil-blocks.Po \ ./$(DEPDIR)/stencil-kernels.Po ./$(DEPDIR)/stencil-tasks.Po \ ./$(DEPDIR)/stencil.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(implicit_stencil_SOURCES) loader.c $(stencil_SOURCES) DIST_SOURCES = $(am__implicit_stencil_SOURCES_DIST) loader.c \ $(am__stencil_SOURCES_DIST) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac HEADERS = $(noinst_HEADERS) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) AM_RECURSIVE_TARGETS = check recheck TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk README DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = $(CC_OR_MPICC) CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) \ $(STARPU_EXPORTED_LIBS) $(am__append_8) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) $(am__append_10) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ @STARPU_USE_MPI_TRUE@LAUNCHER_ENV = $(MPI_RUN_ENV) LAUNCHER_ENV = $(am__append_4) $(am__append_6) @STARPU_USE_MPI_TRUE@LAUNCHER = $(STARPU_MPIEXEC) LAUNCHER = $(am__append_3) $(am__append_5) AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # @STARPU_SIMGRID_TRUE@LOADER_BIN = $(LAUNCHER) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ \ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) $(am__append_9) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ ################### # stencil example # ################### STARPU_EXAMPLES = \ stencil \ implicit_stencil examplebindir = $(libdir)/starpu/examples/stencil stencil_SOURCES = life.c stencil-kernels.c stencil-tasks.c \ stencil-blocks.c stencil.c $(am__append_11) $(am__append_12) noinst_HEADERS = \ stencil.h \ implicit-stencil.h \ shadow.h implicit_stencil_SOURCES = life.c implicit-stencil-kernels.c \ implicit-stencil-tasks.c implicit-stencil-blocks.c \ implicit-stencil.c $(am__append_13) $(am__append_14) outs = \ 0.5.out \ 0.out \ 1.out \ 2.out \ 3.out \ 4.out \ 6.out \ mpi.out EXTRA_DIST = $(outs) results run README CLEANFILES = *.gcno *.gcda *.xpm starpu_idle_microsec.log all: all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .out .out2 .test .test$(EXEEXT) .trs .xpm $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign examples/stencil/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign examples/stencil/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list implicit_stencil$(EXEEXT): $(implicit_stencil_OBJECTS) $(implicit_stencil_DEPENDENCIES) $(EXTRA_implicit_stencil_DEPENDENCIES) @rm -f implicit_stencil$(EXEEXT) $(AM_V_CCLD)$(LINK) $(implicit_stencil_OBJECTS) $(implicit_stencil_LDADD) $(LIBS) loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) stencil$(EXEEXT): $(stencil_OBJECTS) $(stencil_DEPENDENCIES) $(EXTRA_stencil_DEPENDENCIES) @rm -f stencil$(EXEEXT) $(AM_V_CCLD)$(LINK) $(stencil_OBJECTS) $(stencil_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/implicit-stencil-blocks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/implicit-stencil-kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/implicit-stencil-tasks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/implicit-stencil.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/life.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/life_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shadow_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stencil-blocks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stencil-kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stencil-tasks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stencil.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? stencil.log: stencil$(EXEEXT) @p='stencil$(EXEEXT)'; \ b='stencil'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) implicit_stencil.log: implicit_stencil$(EXEEXT) @p='implicit_stencil$(EXEEXT)'; \ b='implicit_stencil'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-am all-am: Makefile $(PROGRAMS) $(HEADERS) installdirs: for dir in "$(DESTDIR)$(examplebindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -f ./$(DEPDIR)/implicit-stencil-blocks.Po -rm -f ./$(DEPDIR)/implicit-stencil-kernels.Po -rm -f ./$(DEPDIR)/implicit-stencil-tasks.Po -rm -f ./$(DEPDIR)/implicit-stencil.Po -rm -f ./$(DEPDIR)/life.Po -rm -f ./$(DEPDIR)/life_opencl.Po -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f ./$(DEPDIR)/shadow_opencl.Po -rm -f ./$(DEPDIR)/stencil-blocks.Po -rm -f ./$(DEPDIR)/stencil-kernels.Po -rm -f ./$(DEPDIR)/stencil-tasks.Po -rm -f ./$(DEPDIR)/stencil.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-examplebinPROGRAMS install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/implicit-stencil-blocks.Po -rm -f ./$(DEPDIR)/implicit-stencil-kernels.Po -rm -f ./$(DEPDIR)/implicit-stencil-tasks.Po -rm -f ./$(DEPDIR)/implicit-stencil.Po -rm -f ./$(DEPDIR)/life.Po -rm -f ./$(DEPDIR)/life_opencl.Po -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f ./$(DEPDIR)/shadow_opencl.Po -rm -f ./$(DEPDIR)/stencil-blocks.Po -rm -f ./$(DEPDIR)/stencil-kernels.Po -rm -f ./$(DEPDIR)/stencil-tasks.Po -rm -f ./$(DEPDIR)/stencil.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-examplebinPROGRAMS .MAKE: check-am install-am install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am \ install-examplebinPROGRAMS install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-examplebinPROGRAMS .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS pics: $(outs:.out=.xpm) .out.out2: $(GREP) '^|' $< | tr -d ' ' > $@ .out2.xpm: ( width=$$(expr $$(head -n 1 < $< | wc -c) - 1) ; \ height=`wc -l < $<` ; \ echo "/* XPM */" ; \ echo "static char * test_xpm[] = {" ; \ echo "\"$$width $$height 9 1\"," ; \ echo "\"_ c None\"," ; \ echo "\"0 c #FF0000\"," ; \ echo "\"1 c #00FF00\"," ; \ echo "\"2 c #0000FF\"," ; \ echo "\"3 c #FFFF00\"," ; \ echo "\"4 c #FF00FF\"," ; \ echo "\"5 c #00FFFF\"," ; \ echo "\"| c #FFFFFF\"," ; \ echo "\"* c #000000\"," ; \ < $< $(SED) -e 's/^/"/' -e 's/$$/",/' | $(SED) -e '$$s/",$$/"};/' ) > $@ view: feh --zoom 800 -F 0.xpm 0.5.xpm 1.xpm 2.xpm 3.xpm 4.xpm 6.xpm mpi.xpm # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/examples/stencil/README000066400000000000000000000031341507764646700201500ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # This is a sample 3D stencil application (here just using the game of life rules for simplicity), split on the z axis. This is a suggest order of read: life.c life.cu: Heart of the stencil computation: compute a new state from an old one. shadow.cu shadow.h: Perform replication of data on X and Y edges, to fold the domain on itself through mere replication of the source state. stencil.h: Declarations stencil-kernels.c: Computation Kernels stencil-blocks.c: Manage block and tags allocation stencil-tasks.c: Schedule tasks for updates and saves stencil.c: Main application *.out: various results according to beta value (communication vs computation penalty ratio), run make pics or make view to get pictures. mpi.out: results on MPI. results: a few results You can also use the implicit distributed flavour of this application (e.g. with communications between processes automatically inferred by StarPU-MPI), which is called implicit_stencil. starpu-1.4.9+dfsg/examples/stencil/implicit-stencil-blocks.c000066400000000000000000000277001507764646700241650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "implicit-stencil.h" #include /* Manage block and tags allocation */ static struct block_description *blocks; static unsigned sizex, sizey, sizez; static unsigned nbz; static unsigned *block_sizes_z; /* * Tags for various codelet completion */ /* * common tag format: */ static starpu_tag_t tag_common(int z, int dir, int type) { return (((((starpu_tag_t)type) << 4) | ((dir+1)/2)) << 32)|(starpu_tag_t)z; } /* Completion of last update tasks */ starpu_tag_t TAG_FINISH(int z) { z = (z + nbz)%nbz; starpu_tag_t tag = tag_common(z, 0, 1); return tag; } /* Completion of the save codelet for MPI send/recv */ starpu_tag_t TAG_START(int z, int dir) { z = (z + nbz)%nbz; starpu_tag_t tag = tag_common(z, dir, 2); return tag; } /* * common MPI tag format: */ static int mpi_tag_common(int z, int dir, int layer_or_boundary, int buffer) { return (z<<12) | (layer_or_boundary << 8) | ((((1+dir)/2))<<4) | buffer; } int MPI_TAG_LAYERS(int z, int buffer) { z = (z + nbz)%nbz; /* No direction for layers ; layer is 0 */ int tag = mpi_tag_common(z, 0, 0, buffer); return tag; } int MPI_TAG_BOUNDARIES(int z, int dir, int buffer) { z = (z + nbz)%nbz; int tag = mpi_tag_common(z, dir, 1, buffer); return tag; } /* * Block descriptors */ /* Compute the size of the different blocks */ static void compute_block_sizes(void) { block_sizes_z = (unsigned *) malloc(nbz*sizeof(unsigned)); STARPU_ASSERT(block_sizes_z); /* Perhaps the last chunk is smaller */ unsigned default_block_size = (sizez+nbz-1)/nbz; unsigned remaining = sizez; unsigned b; for (b = 0; b < nbz; b++) { block_sizes_z[b] = MIN(default_block_size, remaining); remaining -= block_sizes_z[b]; } STARPU_ASSERT(remaining == 0); } unsigned get_block_size(int bz) { return block_sizes_z[bz]; } struct block_description *get_block_description(int z) { z = (z + nbz)%nbz; STARPU_ASSERT(&blocks[z]); return &blocks[z]; } int get_block_mpi_node(int z) { z = (z + nbz)%nbz; return blocks[z].mpi_node; } void create_blocks_array(unsigned _sizex, unsigned _sizey, unsigned _sizez, unsigned _nbz) { /* Store the parameters */ nbz = _nbz; sizex = _sizex; sizey = _sizey; sizez = _sizez; /* Create a grid of block descriptors */ blocks = (struct block_description *) calloc(nbz, sizeof(struct block_description)); STARPU_ASSERT(blocks); /* What is the size of the different blocks ? */ compute_block_sizes(); unsigned bz; for (bz = 0; bz < nbz; bz++) { struct block_description * block = get_block_description(bz); /* Which block is it ? */ block->bz = bz; /* For simplicity, we store which are the neighbours blocks */ block->boundary_blocks[B] = get_block_description((bz-1+nbz)%nbz); block->boundary_blocks[T] = get_block_description((bz+1)%nbz); } } void free_blocks_array() { free(blocks); free(block_sizes_z); } /* * Initialization of the blocks */ void assign_blocks_to_workers(int rank) { unsigned bz; /* NB: perhaps we could count a GPU as multiple workers */ /* how many workers are there ? */ /*unsigned nworkers = starpu_worker_get_count();*/ /* how many blocks are on that MPI node ? */ // unsigned nblocks = 0; // for (bz = 0; bz < nbz; bz++) // { // struct block_description *block = // get_block_description(bz); // // if (block->mpi_node == rank) // nblocks++; // } /* how many blocks per worker ? */ /*unsigned nblocks_per_worker = (nblocks + nworkers - 1)/nworkers;*/ /* we now attribute up to nblocks_per_worker blocks per workers */ unsigned attributed = 0; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); if (block->mpi_node == rank) { unsigned workerid; /* Manage initial block distribution between CPU and GPU */ #if 0 #if 1 /* GPUs then CPUs */ if (attributed < 3*18) workerid = attributed / 18; else workerid = 3+ (attributed - 3*18) / 2; #else /* GPUs interleaved with CPUs */ if ((attributed % 20) <= 1) workerid = 3 + attributed / 20; else if (attributed < 60) workerid = attributed / 20; else workerid = (attributed - 60)/2 + 6; #endif #else /* Only GPUS */ workerid = (attributed / 21) % 3; #endif /*= attributed/nblocks_per_worker;*/ block->preferred_worker = workerid; attributed++; } } } void assign_blocks_to_mpi_nodes(int world_size) { unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size; unsigned bz; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); block->mpi_node = bz / nzblocks_per_process; } } static size_t allocated = 0; static void allocate_block_on_node(starpu_data_handle_t *handleptr, unsigned bz, TYPE **ptr, unsigned nx, unsigned ny, unsigned nz) { int ret; size_t block_size = nx*ny*nz*sizeof(TYPE); /* Allocate memory */ #if 1 ret = starpu_malloc_flags((void **)ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); STARPU_ASSERT(ret == 0); #else *ptr = malloc(block_size); STARPU_ASSERT(*ptr); #endif allocated += block_size; //#ifndef STARPU_SIMGRID // /* Fill the blocks with 0 */ // memset(*ptr, 0, block_size); //#endif /* Register it to StarPU */ starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE)); starpu_data_set_coordinates(*handleptr, 1, bz); } static void free_block_on_node(starpu_data_handle_t handleptr, unsigned nx, unsigned ny, unsigned nz) { void *ptr = (void *) starpu_block_get_local_ptr(handleptr); size_t block_size = nx*ny*nz*sizeof(TYPE); starpu_data_unregister(handleptr); starpu_free_flags(ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); } void display_memory_consumption(int rank, double time) { FPRINTF(stderr, "%lu B of memory were allocated on node %d in %f ms\n", (unsigned long)allocated, rank, time/1000); } void allocate_memory_on_node(int rank) { unsigned bz; /* Correctly allocate and declare all data handles to StarPU. */ for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); int node = block->mpi_node; unsigned size_bz = block_sizes_z[bz]; if (node == rank) { /* Main blocks */ allocate_block_on_node(&block->layers_handle[0], bz, &block->layers[0], (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K)); allocate_block_on_node(&block->layers_handle[1], bz, &block->layers[1], (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K)); /* Boundary blocks : Top */ allocate_block_on_node(&block->boundaries_handle[T][0], bz, &block->boundaries[T][0], (sizex + 2*K), (sizey + 2*K), K); allocate_block_on_node(&block->boundaries_handle[T][1], bz, &block->boundaries[T][1], (sizex + 2*K), (sizey + 2*K), K); /* Boundary blocks : Bottom */ allocate_block_on_node(&block->boundaries_handle[B][0], bz, &block->boundaries[B][0], (sizex + 2*K), (sizey + 2*K), K); allocate_block_on_node(&block->boundaries_handle[B][1], bz, &block->boundaries[B][1], (sizex + 2*K), (sizey + 2*K), K); } /* Register void blocks to StarPU, that StarPU-MPI will request to * neighbour nodes if needed for the local computation */ else { /* Main blocks */ starpu_block_data_register(&block->layers_handle[0], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K), sizeof(TYPE)); starpu_block_data_register(&block->layers_handle[1], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K), sizeof(TYPE)); /* Boundary blocks : Top */ starpu_block_data_register(&block->boundaries_handle[T][0], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE)); starpu_block_data_register(&block->boundaries_handle[T][1], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE)); /* Boundary blocks : Bottom */ starpu_block_data_register(&block->boundaries_handle[B][0], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE)); starpu_block_data_register(&block->boundaries_handle[B][1], -1, (uintptr_t) NULL, (sizex + 2*K), (sizex + 2*K)*(sizey + 2*K), (sizex + 2*K), (sizey + 2*K), K, sizeof(TYPE)); } #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) /* Register all data to StarPU-MPI, even the ones that are not * allocated on the local node. */ /* Main blocks */ starpu_mpi_data_register(block->layers_handle[0], MPI_TAG_LAYERS(bz, 0), node); starpu_mpi_data_register(block->layers_handle[1], MPI_TAG_LAYERS(bz, 1), node); /* Boundary blocks : Top */ starpu_mpi_data_register(block->boundaries_handle[T][0], MPI_TAG_BOUNDARIES(bz, T, 0), node); starpu_mpi_data_register(block->boundaries_handle[T][1], MPI_TAG_BOUNDARIES(bz, T, 1), node); /* Boundary blocks : Bottom */ starpu_mpi_data_register(block->boundaries_handle[B][0], MPI_TAG_BOUNDARIES(bz, B, 0), node); starpu_mpi_data_register(block->boundaries_handle[B][1], MPI_TAG_BOUNDARIES(bz, B, 1), node); #endif } /* Initialize all the data in parallel */ for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); int node = block->mpi_node; if (node == rank) { /* Set all the data to 0 */ create_task_memset(sizex, sizey, bz); /* Initialize the first layer with some random data */ create_task_initlayer(sizex, sizey, bz); } } starpu_task_wait_for_all(); } void free_memory_on_node(int rank) { unsigned bz; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); int node = block->mpi_node; /* Main blocks */ if (node == rank) { free_block_on_node(block->layers_handle[0], (sizex + 2*K), (sizey + 2*K), K); free_block_on_node(block->layers_handle[1], (sizex + 2*K), (sizey + 2*K), K); } else { starpu_data_unregister(block->layers_handle[0]); starpu_data_unregister(block->layers_handle[1]); } /* Boundary blocks : Top */ if (node == rank) { free_block_on_node(block->boundaries_handle[T][0], (sizex + 2*K), (sizey + 2*K), K); free_block_on_node(block->boundaries_handle[T][1], (sizex + 2*K), (sizey + 2*K), K); } else { starpu_data_unregister(block->boundaries_handle[T][0]); starpu_data_unregister(block->boundaries_handle[T][1]); } /* Boundary blocks : Bottom */ if (node == rank) { free_block_on_node(block->boundaries_handle[B][0], (sizex + 2*K), (sizey + 2*K), K); free_block_on_node(block->boundaries_handle[B][1], (sizex + 2*K), (sizey + 2*K), K); } else { starpu_data_unregister(block->boundaries_handle[B][0]); starpu_data_unregister(block->boundaries_handle[B][1]); } } } /* check how many cells are alive */ void check(int rank) { unsigned bz; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); int node = block->mpi_node; /* Main blocks */ if (node == rank) { unsigned size_bz = block_sizes_z[bz]; #ifdef LIFE unsigned x, y, z; unsigned sum = 0; for (x = 0; x < sizex; x++) for (y = 0; y < sizey; y++) for (z = 0; z < size_bz; z++) sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)]; printf("block %u got %u/%u alive\n", bz, sum, sizex*sizey*size_bz); #endif } } } starpu-1.4.9+dfsg/examples/stencil/implicit-stencil-kernels.c000066400000000000000000000611661507764646700243570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "implicit-stencil.h" /* Computation Kernels */ /* * There are three codeletets: * * - cl_update, which takes a block and the boundaries of its neighbours, loads * the boundaries into the block and perform some update loops: * * comp. buffer save. buffers comp. buffer save. buffers comp. buffer * | ... | * | | +------------------+ +------------------+ * | #N+1 | | #N+1 bottom copy====>#N+1 bottom copy | * +-------------+ +------------------+ +------------------+ * | #N top copy | | #N top copy | | | * +-------------+ +------------------+ | | * | #N | * ... * | | +----------------+ +----------------------+ * | | | #N bottom copy | | block #N bottom copy | * ^ +------------------+ +----------------+ +----------------------+ * | | #N-1 top copy <====#N-1 top copy | | block #N-1 | * | +------------------+ +----------------+ | | * Z ... * * - save_cl_top, which take a block and its top boundary, and saves the top of * the block into the boundary (to be given as bottom of the neighbour above * this block). * * comp. buffer save. buffers comp. buffer save. buffers comp. buffer * | ... | * | | +------------------+ +------------------+ * | #N+1 | | #N+1 bottom copy | | #N+1 bottom copy | * +-------------+ +------------------+ +------------------+ * | #N top copy | | #N top copy <==== | * +-------------+ +------------------+ |..................| * | #N | * ... * | | +----------------+ +----------------------+ * | | | #N bottom copy | | block #N bottom copy | * ^ +------------------+ +----------------+ +----------------------+ * | | #N-1 top copy | | #N-1 top copy | | block #N-1 | * | +------------------+ +----------------+ | | * Z ... * * - save_cl_bottom, same for the bottom * comp. buffer save. buffers comp. buffer save. buffers comp. buffer * | ... | * | | +------------------+ +------------------+ * | #N+1 | | #N+1 bottom copy | | #N+1 bottom copy | * +-------------+ +------------------+ +------------------+ * | #N top copy | | #N top copy | | | * +-------------+ +------------------+ | | * | #N | * ... * |..................| +----------------+ +----------------------+ * | ====>#N bottom copy | | block #N bottom copy | * ^ +------------------+ +----------------+ +----------------------+ * | | #N-1 top copy | | #N-1 top copy | | block #N-1 | * | +------------------+ +----------------+ | | * Z ... * * The idea is that the computation buffers thus don't have to move, only their * boundaries are copied to buffers that do move (be it CPU/GPU, GPU/GPU or via * MPI) * * For each of the buffers above, there are two (0/1) buffers to make new/old switch costless. */ #if 0 # define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__) #else # define DEBUG(fmt, ...) (void) 0 #endif /* Record which GPU ran which block, for nice pictures */ int who_runs_what_len; int *who_runs_what; int *who_runs_what_index; double *last_tick; /* Achieved iterations */ static int achieved_iter; /* Record how many updates each worker performed */ unsigned update_per_worker[STARPU_NMAXWORKERS]; static void record_who_runs_what(struct block_description *block) { double now, now2, diff, delta = get_ticks() * 1000; int workerid = starpu_worker_get_id_check(); now = starpu_timing_now(); now2 = now - start; diff = now2 - last_tick[block->bz]; while (diff >= delta) { last_tick[block->bz] += delta; diff = now2 - last_tick[block->bz]; if (who_runs_what_index[block->bz] < who_runs_what_len) who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = -1; } if (who_runs_what_index[block->bz] < who_runs_what_len) who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = global_workerid(workerid); } static void check_load(struct starpu_block_interface *block, struct starpu_block_interface *boundary) { /* Sanity checks */ STARPU_ASSERT(block->nx == boundary->nx); STARPU_ASSERT(block->ny == boundary->ny); STARPU_ASSERT(boundary->nz == K); /* NB: this is not fully guaranteed ... but it's *very* likely and that * makes our life much simpler */ STARPU_ASSERT(block->ldy == boundary->ldy); STARPU_ASSERT(block->ldz == boundary->ldz); } /* * Load a neighbour's boundary into block, CPU version */ static void load_subblock_from_buffer_cpu(void *_block, void *_boundary, unsigned firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; memcpy(&block_data[offset], boundary_data, boundary_size); } /* * Load a neighbour's boundary into block, CUDA version */ #ifdef STARPU_USE_CUDA static void load_subblock_from_buffer_cuda(void *_block, void *_boundary, unsigned firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; cudaMemcpyAsync(&block_data[offset], boundary_data, boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); } /* * cl_update (CUDA version) */ static void update_func_cuda(void *descr[], void *arg) { unsigned z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); int workerid = starpu_worker_get_id_check(); DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if (block->bz == 0) FPRINTF(stderr,"!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); else DEBUG("!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG("!!! RANK %d !!!\n", rank); #endif DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); unsigned block_size_z = get_block_size(block->bz); unsigned i; update_per_worker[workerid]++; record_who_runs_what(block); /* * Load neighbours' boundaries : TOP */ /* The offset along the z axis is (block_size_z + K) */ load_subblock_from_buffer_cuda(descr[0], descr[2], block_size_z+K); load_subblock_from_buffer_cuda(descr[1], descr[3], block_size_z+K); /* * Load neighbours' boundaries : BOTTOM */ load_subblock_from_buffer_cuda(descr[0], descr[4], 0); load_subblock_from_buffer_cuda(descr[1], descr[5], 0); /* * Stencils ... do the actual work here :) TODO */ for (i=1; i<=K; i++) { struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2]; TYPE *old = (void*) oldb->ptr, *newer = (void*) newb->ptr; /* Shadow data */ cuda_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); /* And perform actual computation */ #ifdef LIFE cuda_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); #else cudaMemcpyAsync(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); #endif /* LIFE */ } } #endif /* STARPU_USE_CUDA */ /* * Load a neighbour's boundary into block, OpenCL version */ #ifdef STARPU_USE_OPENCL static void load_subblock_from_buffer_opencl(struct starpu_block_interface *block, struct starpu_block_interface *boundary, unsigned firstz) { check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; cl_mem block_data = (cl_mem)block->dev_handle; cl_mem boundary_data = (cl_mem)boundary->dev_handle; cl_command_queue cq; starpu_opencl_get_current_queue(&cq); cl_int ret = clEnqueueCopyBuffer(cq, boundary_data, block_data, 0, offset, boundary_size, 0, NULL, NULL); if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); } /* * cl_update (OpenCL version) */ static void update_func_opencl(void *descr[], void *arg) { unsigned z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); int workerid = starpu_worker_get_id_check(); DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if (block->bz == 0) FPRINTF(stderr,"!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); else DEBUG("!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG("!!! RANK %d !!!\n", rank); #endif DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); unsigned block_size_z = get_block_size(block->bz); unsigned i; update_per_worker[workerid]++; record_who_runs_what(block); cl_command_queue cq; starpu_opencl_get_current_queue(&cq); /* * Load neighbours' boundaries : TOP */ /* The offset along the z axis is (block_size_z + K) */ load_subblock_from_buffer_opencl(descr[0], descr[2], block_size_z+K); load_subblock_from_buffer_opencl(descr[1], descr[3], block_size_z+K); /* * Load neighbours' boundaries : BOTTOM */ load_subblock_from_buffer_opencl(descr[0], descr[4], 0); load_subblock_from_buffer_opencl(descr[1], descr[5], 0); /* * Stencils ... do the actual work here :) TODO */ for (i=1; i<=K; i++) { struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2]; TYPE *old = (void*) oldb->dev_handle, *newer = (void*) newb->dev_handle; /* Shadow data */ opencl_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); /* And perform actual computation */ #ifdef LIFE opencl_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); #else cl_event event; cl_int ret = clEnqueueCopyBuffer(cq, old, newer, 0, 0, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), 0, NULL, &event); if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); #endif /* LIFE */ } } #endif /* STARPU_USE_OPENCL */ /* * cl_update (CPU version) */ void update_func_cpu(void *descr[], void *arg) { unsigned zz; starpu_codelet_unpack_args(arg, &zz); struct block_description *block = get_block_description(zz); int workerid = starpu_worker_get_id_check(); DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if (block->bz == 0) DEBUG("!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); else DEBUG("!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG("!!! RANK %d !!!\n", rank); #endif DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); unsigned block_size_z = get_block_size(block->bz); unsigned i; update_per_worker[workerid]++; record_who_runs_what(block); /* * Load neighbours' boundaries : TOP */ /* The offset along the z axis is (block_size_z + K) */ load_subblock_from_buffer_cpu(descr[0], descr[2], block_size_z+K); load_subblock_from_buffer_cpu(descr[1], descr[3], block_size_z+K); /* * Load neighbours' boundaries : BOTTOM */ load_subblock_from_buffer_cpu(descr[0], descr[4], 0); load_subblock_from_buffer_cpu(descr[1], descr[5], 0); /* * Stencils ... do the actual work here :) TODO */ for (i=1; i<=K; i++) { struct starpu_block_interface *oldb = (struct starpu_block_interface *) descr[i%2], *newb = (struct starpu_block_interface *) descr[(i+1)%2]; TYPE *old = (TYPE*) oldb->ptr, *newer = (TYPE*) newb->ptr; /* Shadow data */ unsigned ldy = oldb->ldy, ldz = oldb->ldz; unsigned nx = oldb->nx, ny = oldb->ny, nz = oldb->nz; unsigned x, y, z; unsigned stepx = 1; unsigned stepy = 1; unsigned stepz = 1; unsigned idx = 0; unsigned idy = 0; unsigned idz = 0; TYPE *ptr = old; # include "shadow.h" /* And perform actual computation */ #ifdef LIFE life_update(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); #else memcpy(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer)); #endif /* LIFE */ } } /* Performance model and codelet structure */ static struct starpu_perfmodel cl_update_model = { .type = STARPU_HISTORY_BASED, .symbol = "cl_update" }; struct starpu_codelet cl_update = { .cpu_funcs = {update_func_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {update_func_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {update_func_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .model = &cl_update_model, .nbuffers = 6, .modes = {STARPU_RW, STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R} }; /* * Save the block internal boundaries to give them to our neighbours. */ /* CPU version */ static void load_subblock_into_buffer_cpu(void *_block, void *_boundary, unsigned firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; memcpy(boundary_data, &block_data[offset], boundary_size); } /* CUDA version */ #ifdef STARPU_USE_CUDA static void load_subblock_into_buffer_cuda(void *_block, void *_boundary, unsigned firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; cudaMemcpyAsync(boundary_data, &block_data[offset], boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); } #endif /* STARPU_USE_CUDA */ /* OPENCL version */ #ifdef STARPU_USE_OPENCL static void load_subblock_into_buffer_opencl(struct starpu_block_interface *block, struct starpu_block_interface *boundary, unsigned firstz) { check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; cl_mem block_data = (cl_mem)block->dev_handle; cl_mem boundary_data = (cl_mem)boundary->dev_handle; cl_command_queue cq; starpu_opencl_get_current_queue(&cq); cl_int ret = clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, NULL); if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); } #endif /* STARPU_USE_OPENCL */ /* Record how many top/bottom saves each worker performed */ unsigned top_per_worker[STARPU_NMAXWORKERS]; unsigned bottom_per_worker[STARPU_NMAXWORKERS]; /* top save, CPU version */ void dummy_func_top_cpu(void *descr[], void *arg) { unsigned z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); int workerid = starpu_worker_get_id_check(); top_per_worker[workerid]++; DEBUG("DO SAVE Bottom block %d\n", block->bz); /* The offset along the z axis is (block_size_z + K)- K */ unsigned block_size_z = get_block_size(block->bz); load_subblock_into_buffer_cpu(descr[0], descr[2], block_size_z); load_subblock_into_buffer_cpu(descr[1], descr[3], block_size_z); } /* bottom save, CPU version */ void dummy_func_bottom_cpu(void *descr[], void *arg) { unsigned z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); STARPU_ASSERT(block); int workerid = starpu_worker_get_id_check(); bottom_per_worker[workerid]++; DEBUG("DO SAVE Top block %d\n", block->bz); load_subblock_into_buffer_cpu(descr[0], descr[2], K); load_subblock_into_buffer_cpu(descr[1], descr[3], K); } /* top save, CUDA version */ #ifdef STARPU_USE_CUDA static void dummy_func_top_cuda(void *descr[], void *arg) { unsigned z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); int workerid = starpu_worker_get_id_check(); top_per_worker[workerid]++; DEBUG("DO SAVE Top block %d\n", block->bz); /* The offset along the z axis is (block_size_z + K)- K */ unsigned block_size_z = get_block_size(block->bz); load_subblock_into_buffer_cuda(descr[0], descr[2], block_size_z); load_subblock_into_buffer_cuda(descr[1], descr[3], block_size_z); } /* bottom save, CUDA version */ static void dummy_func_bottom_cuda(void *descr[], void *arg) { unsigned z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); (void) block; int workerid = starpu_worker_get_id_check(); bottom_per_worker[workerid]++; DEBUG("DO SAVE Bottom block %d on CUDA\n", block->bz); load_subblock_into_buffer_cuda(descr[0], descr[2], K); load_subblock_into_buffer_cuda(descr[1], descr[3], K); } #endif /* STARPU_USE_CUDA */ /* top save, OpenCL version */ #ifdef STARPU_USE_OPENCL static void dummy_func_top_opencl(void *descr[], void *arg) { unsigned z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); int workerid = starpu_worker_get_id_check(); top_per_worker[workerid]++; DEBUG("DO SAVE Top block %d\n", block->bz); /* The offset along the z axis is (block_size_z + K)- K */ unsigned block_size_z = get_block_size(block->bz); load_subblock_into_buffer_opencl(descr[0], descr[2], block_size_z); load_subblock_into_buffer_opencl(descr[1], descr[3], block_size_z); } /* bottom save, OPENCL version */ static void dummy_func_bottom_opencl(void *descr[], void *arg) { unsigned z; starpu_codelet_unpack_args(arg, &z); struct block_description *block = get_block_description(z); (void) block; int workerid = starpu_worker_get_id_check(); bottom_per_worker[workerid]++; DEBUG("DO SAVE Bottom block %d on OPENCL\n", block->bz); load_subblock_into_buffer_opencl(descr[0], descr[2], K); load_subblock_into_buffer_opencl(descr[1], descr[3], K); } #endif /* STARPU_USE_OPENCL */ /* Performance models and codelet for save */ static struct starpu_perfmodel save_cl_bottom_model = { .type = STARPU_HISTORY_BASED, .symbol = "save_cl_bottom" }; static struct starpu_perfmodel save_cl_top_model = { .type = STARPU_HISTORY_BASED, .symbol = "save_cl_top" }; struct starpu_codelet save_cl_bottom = { .cpu_funcs = {dummy_func_bottom_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dummy_func_bottom_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {dummy_func_bottom_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .model = &save_cl_bottom_model, .nbuffers = 4, .modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W} }; struct starpu_codelet save_cl_top = { .cpu_funcs = {dummy_func_top_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dummy_func_top_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {dummy_func_top_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .model = &save_cl_top_model, .nbuffers = 4, .modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W} }; /* Memset a block's buffers */ void memset_func(void *descr[], void *arg) { (void)descr; unsigned sizex, sizey, bz; starpu_codelet_unpack_args(arg, &sizex, &sizey, &bz); struct block_description *block = get_block_description(bz); unsigned size_bz = get_block_size(bz); unsigned x,y,z; for (x = 0; x < sizex + 2*K; x++) { for (y = 0; y < sizey + 2*K; y++) { /* Main blocks */ for (z = 0; z < size_bz + 2*K; z++) { block->layers[0][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; block->layers[1][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; } for (z = 0; z < K; z++) { /* Boundary blocks : Top */ block->boundaries[T][0][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; block->boundaries[T][1][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; /* Boundary blocks : Bottom */ block->boundaries[B][0][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; block->boundaries[B][1][(x)+(y)*(sizex + 2*K)+(z)*(sizex+2*K)*(sizey+2*K)] = 0; } } } //memset(block->layers[0], 0, (sizex + 2*K)*(sizey + 2*K)*(size_bz + 2*K)*sizeof(block->layers[0])); //memset(block->layers[1], 0, (sizex + 2*K)*(sizey + 2*K)*(size_bz + 2*K)*sizeof(block->layers[1])); //memset(block->boundaries[T][0], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[T][0])); //memset(block->boundaries[T][1], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[T][1])); //memset(block->boundaries[B][0], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[B][0])); //memset(block->boundaries[B][1], 0, (sizex + 2*K)*(sizey + 2*K)*K*sizeof(block->boundaries[B][1])); } static double memset_cost_function(struct starpu_task *task, unsigned nimpl) { (void) task; (void) nimpl; return 0.000001; } static struct starpu_perfmodel memset_model = { .type = STARPU_COMMON, .cost_function = memset_cost_function, .symbol = "memset" }; struct starpu_codelet cl_memset = { .cpu_funcs = {memset_func}, .cpu_funcs_name = {"memset_func"}, .model = &memset_model, .nbuffers = 6, .modes = {STARPU_W, STARPU_W, STARPU_W, STARPU_W, STARPU_W, STARPU_W} }; /* Initialize a block's layer */ static void initlayer_func(void *descr[], void *arg) { (void)descr; unsigned sizex, sizey, bz; starpu_codelet_unpack_args(arg, &sizex, &sizey, &bz); struct block_description *block = get_block_description(bz); unsigned size_bz = get_block_size(bz); /* Initialize layer with some random data */ unsigned x, y, z; unsigned sum = 0; for (x = 0; x < sizex; x++) for (y = 0; y < sizey; y++) for (z = 0; z < size_bz; z++) sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)] = (int)((x/7.+y/13.+(bz*size_bz + z)/17.) * 10.) % 2; } static double initlayer_cost_function(struct starpu_task *task, unsigned nimpl) { (void) task; (void) nimpl; return 0.000001; } static struct starpu_perfmodel initlayer_model = { .type = STARPU_COMMON, .cost_function = initlayer_cost_function, .symbol = "initlayer" }; struct starpu_codelet cl_initlayer = { .cpu_funcs = {initlayer_func}, .model = &initlayer_model, .nbuffers = 1, .modes = {STARPU_W} }; starpu-1.4.9+dfsg/examples/stencil/implicit-stencil-tasks.c000066400000000000000000000133471507764646700240370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "implicit-stencil.h" #define BIND_LAST 1 /* * Schedule tasks for updates and saves */ /* * NB: iter = 0: initialization phase, TAG_U(z, 0) = TAG_INIT * * dir is -1 or +1. */ #if 0 # define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__) #else # define DEBUG(fmt, ...) #endif #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) #include #undef starpu_task_insert #define starpu_task_insert(...) starpu_mpi_task_insert(MPI_COMM_WORLD, __VA_ARGS__) #endif /* * Schedule initialization tasks */ void create_task_memset(unsigned sizex, unsigned sizey, unsigned z) { struct block_description *descr = get_block_description(z); int ret = starpu_task_insert(&cl_memset, STARPU_VALUE, &sizex, sizeof(unsigned), STARPU_VALUE, &sizey, sizeof(unsigned), STARPU_VALUE, &z, sizeof(unsigned), STARPU_W, descr->layers_handle[0], STARPU_W, descr->layers_handle[1], STARPU_W, descr->boundaries_handle[T][0], STARPU_W, descr->boundaries_handle[T][1], STARPU_W, descr->boundaries_handle[B][0], STARPU_W, descr->boundaries_handle[B][1], 0); if (ret) { FPRINTF(stderr, "Could not submit task memset: %d\n", ret); if (ret == -ENODEV) exit(77); STARPU_ABORT(); } } void create_task_initlayer(unsigned sizex, unsigned sizey, unsigned z) { struct block_description *descr = get_block_description(z); int ret = starpu_task_insert(&cl_initlayer, STARPU_VALUE, &sizex, sizeof(unsigned), STARPU_VALUE, &sizey, sizeof(unsigned), STARPU_VALUE, &z, sizeof(unsigned), STARPU_W, descr->layers_handle[0], 0); if (ret) { FPRINTF(stderr, "Could not submit task initlayer: %d\n", ret); if (ret == -ENODEV) exit(77); STARPU_ABORT(); } } /* * Schedule saving boundaries of blocks to communication buffers */ static void create_task_save_local(unsigned z, int dir) { struct block_description *descr = get_block_description(z); struct starpu_codelet *codelet; int ret; codelet = (dir == -1)?&save_cl_bottom:&save_cl_top; ret = starpu_task_insert(codelet, STARPU_VALUE, &z, sizeof(unsigned), STARPU_R, descr->layers_handle[0], STARPU_R, descr->layers_handle[1], STARPU_W, descr->boundaries_handle[(1-dir)/2][0], STARPU_W, descr->boundaries_handle[(1-dir)/2][1], STARPU_PRIORITY, STARPU_MAX_PRIO, 0); if (ret) { FPRINTF(stderr, "Could not submit task save: %d\n", ret); if (ret == -ENODEV) exit(77); STARPU_ABORT(); } } /* * Schedule update computation in computation buffer */ void create_task_update(unsigned iter, unsigned z, int local_rank) { STARPU_ASSERT(iter != 0); unsigned old_layer = (K*(iter-1)) % 2; unsigned new_layer = (old_layer + 1) % 2; struct block_description *descr = get_block_description(z); struct block_description *bottom_neighbour = descr->boundary_blocks[B]; struct block_description *top_neighbour = descr->boundary_blocks[T]; struct starpu_codelet *codelet = &cl_update; // Simple-level prio //int prio = ((bottom_neighbour->mpi_node != local_rank) || (top_neighbour->mpi_node != local_rank)) ? STARPU_MAX_PRIO : STARPU_DEFAULT_PRIO; // Two-level prio int prio = ((bottom_neighbour->mpi_node != local_rank) || (top_neighbour->mpi_node != local_rank)) ? STARPU_MAX_PRIO : ((bottom_neighbour->boundary_blocks[B]->mpi_node != local_rank) || (top_neighbour->boundary_blocks[T]->mpi_node != local_rank)) ? STARPU_MAX_PRIO-1 : STARPU_DEFAULT_PRIO; int ret = starpu_task_insert(codelet, STARPU_VALUE, &z, sizeof(unsigned), STARPU_RW, descr->layers_handle[old_layer], STARPU_RW, descr->layers_handle[new_layer], STARPU_R, bottom_neighbour->boundaries_handle[T][old_layer], STARPU_R, bottom_neighbour->boundaries_handle[T][new_layer], STARPU_R, top_neighbour->boundaries_handle[B][old_layer], STARPU_R, top_neighbour->boundaries_handle[B][new_layer], STARPU_PRIORITY, prio, 0); if (ret) { FPRINTF(stderr, "Could not submit task update block: %d\n", ret); if (ret == -ENODEV) exit(77); STARPU_ABORT(); } } /* * Create all the tasks */ void create_tasks(int rank) { int iter; int bz; int niter = get_niter(); int nbz = get_nbz(); for (iter = 0; iter <= niter; iter++) { for (bz = 0; bz < nbz; bz++) { if ((iter > 0) && ((get_block_mpi_node(bz) == rank)|| (get_block_mpi_node(bz+1) == rank)|| (get_block_mpi_node(bz-1) == rank))) create_task_update(iter, bz, rank); } for (bz = 0; bz < nbz; bz++) { if (iter != niter) { int node_z = get_block_mpi_node(bz); int node_z_and_b = get_block_mpi_node(bz-1); int node_z_and_t = get_block_mpi_node(bz+1); if ((node_z == rank) || ((node_z != node_z_and_b) && (node_z_and_b == rank))) create_task_save_local(bz, +1); if ((node_z == rank) || ((node_z != node_z_and_t) && (node_z_and_t == rank))) create_task_save_local(bz, -1); } } } } starpu-1.4.9+dfsg/examples/stencil/implicit-stencil.c000066400000000000000000000232261507764646700227110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "implicit-stencil.h" #ifdef STARPU_HAVE_VALGRIND_H #include #endif /* Main application */ /* default parameter values */ static unsigned bind_tasks = 0; static unsigned ticks = 1000; #ifdef STARPU_QUICK_CHECK static unsigned niter = 4; #define SIZE 16 #define NBZ 8 #else static unsigned niter = 32; #define SIZE 128 #define NBZ 64 #endif /* Problem size */ static unsigned sizex = SIZE; static unsigned sizey = SIZE; static unsigned sizez = NBZ*SIZE; /* Number of blocks (scattered over the different MPI processes) */ unsigned nbz = NBZ; double start; double begin, end; double timing; /* * Initialization */ unsigned get_bind_tasks(void) { return bind_tasks; } unsigned get_nbz(void) { return nbz; } unsigned get_niter(void) { return niter; } unsigned get_ticks(void) { return ticks; } static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-b") == 0) { bind_tasks = 1; } if (strcmp(argv[i], "-nbz") == 0) { nbz = atoi(argv[++i]); } if (strcmp(argv[i], "-sizex") == 0) { sizex = atoi(argv[++i]); } if (strcmp(argv[i], "-sizey") == 0) { sizey = atoi(argv[++i]); } if (strcmp(argv[i], "-sizez") == 0) { sizez = atoi(argv[++i]); } if (strcmp(argv[i], "-niter") == 0) { niter = atoi(argv[++i]); } if (strcmp(argv[i], "-ticks") == 0) { ticks = atoi(argv[++i]); } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { fprintf(stderr, "Usage : %s [options...]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, "-b bind tasks on CPUs/GPUs\n"); fprintf(stderr, "-nbz Number of blocks on Z axis (%u by default)\n", nbz); fprintf(stderr, "-size[xyz] Domain size on x/y/z axis (%ux%ux%u by default)\n", sizex, sizey, sizez); fprintf(stderr, "-niter Number of iterations (%u by default)\n", niter); fprintf(stderr, "-ticks How often to put ticks in the output (ms, %u by default)\n", ticks); exit(0); } } #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) { sizex = sizey = 3; nbz = 10; sizez = nbz*3; } #endif } static void init_problem(int argc, char **argv, int rank, int world_size) { parse_args(argc, argv); create_blocks_array(sizex, sizey, sizez, nbz); /* Select the MPI process which should compute the different blocks */ assign_blocks_to_mpi_nodes(world_size); assign_blocks_to_workers(rank); /* Allocate the different memory blocks, if used by the MPI process */ start = starpu_timing_now(); allocate_memory_on_node(rank); end = starpu_timing_now(); timing = end - begin; display_memory_consumption(rank, timing); who_runs_what_len = 2*niter; who_runs_what = (int *) calloc(nbz * who_runs_what_len, sizeof(*who_runs_what)); who_runs_what_index = (int *) calloc(nbz, sizeof(*who_runs_what_index)); last_tick = (double *) calloc(nbz, sizeof(*last_tick)); } static void free_problem(int rank) { free_memory_on_node(rank); free_blocks_array(); free(who_runs_what); free(who_runs_what_index); free(last_tick); } /* * Main body */ void func(unsigned task_per_worker[STARPU_NMAXWORKERS]) { unsigned total = 0; int worker; for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) total += task_per_worker[worker]; for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) { if (task_per_worker[worker]) { char name[64]; starpu_worker_get_name(worker, name, sizeof(name)); FPRINTF(stderr,"\t%s -> %u (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total); } } } unsigned global_workerid(unsigned local_workerid) { #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); unsigned workers_per_node = starpu_worker_get_count(); return (local_workerid + rank*workers_per_node); #else return local_workerid; #endif } int main(int argc, char **argv) { int rank; int world_size; int ret; #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int thread_support; if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support)) { FPRINTF(stderr, "MPI_Init_thread failed\n"); } if (thread_support == MPI_THREAD_FUNNELED) FPRINTF(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); if (thread_support < MPI_THREAD_FUNNELED) FPRINTF(stderr,"Warning: MPI does not have thread support!\n"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); #else rank = 0; world_size = 1; #endif if (rank == 0) { FPRINTF(stderr, "Running on %d nodes\n", world_size); fflush(stderr); } struct starpu_conf conf; starpu_conf_init(&conf); /*nbz is a global variable, this example doesn't support Master-Slave*/ conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #endif #ifdef STARPU_USE_OPENCL opencl_life_init(); opencl_shadow_init(); #endif /*STARPU_USE_OPENCL*/ init_problem(argc, argv, rank, world_size); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); #endif if (rank == 0) FPRINTF(stderr, "GO !\n"); start = starpu_timing_now(); begin = starpu_timing_now(); create_tasks(rank); //starpu_tag_notify_from_apps(TAG_INIT_TASK); //wait_end_tasks(rank); starpu_task_wait_for_all(); end = starpu_timing_now(); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); #endif #if 0 check(rank); #endif /*display_debug(nbz, niter, rank);*/ /* timing in us */ timing = end - begin; double min_timing = timing; double max_timing = timing; double sum_timing = timing; #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int reduce_ret; reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); /* XXX we should do a gather instead, here we assume that non initialized values are still 0 */ int *who_runs_what_tmp = malloc(nbz * who_runs_what_len * sizeof(*who_runs_what)); reduce_ret = MPI_Reduce(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); memcpy(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len * sizeof(*who_runs_what)); free(who_runs_what_tmp); /* XXX we should do a gather instead, here we assume that non initialized values are still 0 */ int *who_runs_what_index_tmp = malloc(nbz * sizeof(*who_runs_what_index)); reduce_ret = MPI_Reduce(who_runs_what_index, who_runs_what_index_tmp, nbz, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); memcpy(who_runs_what_index, who_runs_what_index_tmp, nbz * sizeof(*who_runs_what_index)); free(who_runs_what_index_tmp); #endif if (rank == 0) { #if 1 FPRINTF(stderr, "update:\n"); func(update_per_worker); FPRINTF(stderr, "top:\n"); func(top_per_worker); FPRINTF(stderr, "bottom:\n"); func(bottom_per_worker); #endif #if 1 unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size; int iter; for (iter = 0; iter < who_runs_what_len; iter++) { unsigned last, bz; last = 1; for (bz = 0; bz < nbz; bz++) { if ((bz % nzblocks_per_process) == 0) FPRINTF(stderr, "| "); if (who_runs_what_index[bz] <= iter) FPRINTF(stderr,"_ "); else { last = 0; if (who_runs_what[bz + iter * nbz] == -1) FPRINTF(stderr,"* "); else FPRINTF(stderr, "%d ", who_runs_what[bz + iter * nbz]); } } FPRINTF(stderr, "\n"); if (last) break; } #endif fflush(stderr); FPRINTF(stdout, "Computation took: %f ms on %d MPI processes\n", max_timing/1000, world_size); FPRINTF(stdout, "\tMIN : %f ms\n", min_timing/1000); FPRINTF(stdout, "\tMAX : %f ms\n", max_timing/1000); FPRINTF(stdout, "\tAVG : %f ms\n", sum_timing/(world_size*1000)); } free_problem(rank); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) starpu_mpi_shutdown(); #endif starpu_shutdown(); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) MPI_Finalize(); #endif #ifdef STARPU_USE_OPENCL opencl_life_free(); opencl_shadow_free(); #endif /*STARPU_USE_OPENCL*/ return 0; } starpu-1.4.9+dfsg/examples/stencil/implicit-stencil.h000066400000000000000000000111561507764646700227150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __IMPLICIT_STENCIL_H__ #define __IMPLICIT_STENCIL_H__ #include #include #include #ifndef __CUDACC__ #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) #include #include #endif #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define LIFE #ifdef LIFE #define TYPE unsigned char extern void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); #else #define TYPE float #endif #define K 1 #define NDIRS 2 /* Split only on the z axis to make things simple */ typedef enum { B = 0, T = 1 } direction; /* Description of a domain block */ struct block_description { /* Which MPI node should process that block ? */ int mpi_node; unsigned preferred_worker; unsigned bz; /* For each of the following buffers, there are two (0/1) buffers to * make new/old switch costless. */ /* This is the computation buffer for this block, it includes * neighbours' border to make computation easier */ TYPE *layers[2]; starpu_data_handle_t layers_handle[2]; /* This is the "save" buffer, i.e. a copy of our neighbour's border. * This one is used for CPU/GPU or MPI communication (rather than the * whole domain block) */ TYPE *boundaries[NDIRS][2]; starpu_data_handle_t boundaries_handle[NDIRS][2]; /* Shortcut pointer to the neighbours */ struct block_description *boundary_blocks[NDIRS]; }; #define TAG_INIT_TASK ((starpu_tag_t)1) starpu_tag_t TAG_FINISH(int z); starpu_tag_t TAG_START(int z, int dir); int MPI_TAG0(int z, int iter, int dir); int MPI_TAG1(int z, int iter, int dir); #define MIN(a,b) ((a)<(b)?(a):(b)) void create_blocks_array(unsigned sizex, unsigned sizey, unsigned sizez, unsigned nbz); void free_blocks_array(); struct block_description *get_block_description(int z); void assign_blocks_to_mpi_nodes(int world_size); void allocate_memory_on_node(int rank); void assign_blocks_to_workers(int rank); void create_tasks(int rank); void wait_end_tasks(int rank); void check(int rank); void free_memory_on_node(int rank); void display_memory_consumption(int rank, double time); int get_block_mpi_node(int z); unsigned get_block_size(int z); unsigned get_bind_tasks(void); unsigned get_nbz(void); unsigned get_niter(void); unsigned get_ticks(void); unsigned global_workerid(unsigned local_workerid); void create_task_memset(unsigned sizex, unsigned sizey, unsigned z); void create_task_initlayer(unsigned sizex, unsigned sizey, unsigned z); void create_task_update(unsigned iter, unsigned z, int local_rank); void create_task_save(unsigned iter, unsigned z, int dir, int local_rank); extern int starpu_mpi_initialize(void); extern int starpu_mpi_shutdown(void); /* kernels */ extern struct starpu_codelet cl_update; extern struct starpu_codelet save_cl_bottom; extern struct starpu_codelet save_cl_top; extern struct starpu_codelet cl_memset; extern struct starpu_codelet cl_initlayer; extern unsigned update_per_worker[STARPU_NMAXWORKERS]; extern unsigned top_per_worker[STARPU_NMAXWORKERS]; extern unsigned bottom_per_worker[STARPU_NMAXWORKERS]; extern double start; extern int who_runs_what_len; extern int *who_runs_what; extern int *who_runs_what_index; extern double *last_tick; #ifndef _externC #define _externC #endif _externC void cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); _externC void cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i); _externC void opencl_shadow_init(void); _externC void opencl_shadow_free(void); _externC void opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i); _externC void opencl_life_init(void); _externC void opencl_life_free(void); _externC void opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); #endif /* __IMPLICIT_STENCIL_H__ */ starpu-1.4.9+dfsg/examples/stencil/life.c000066400000000000000000000026631507764646700203610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "stencil.h" /* Heart of the stencil computation: compute a new state from an old one. */ void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter) { (void)bz; int x, y, z, num, alive; for (z = iter; z < nz - iter; z++) { for (y = K; y < ny - K; y++) { for (x = K; x < nx - K; x++) { num = 0 + old[x+(y+1)*ldy+(z+0)*ldz] + old[x+(y+1)*ldy+(z+1)*ldz] + old[x+(y+0)*ldy+(z+1)*ldz] + old[x+(y-1)*ldy+(z+1)*ldz] + old[x+(y-1)*ldy+(z+0)*ldz] + old[x+(y-1)*ldy+(z-1)*ldz] + old[x+(y+0)*ldy+(z-1)*ldz] + old[x+(y+1)*ldy+(z-1)*ldz] ; alive = old[x+y*ldy+z*ldz]; alive = (alive && num == 2) || num == 3; newp[x+y*ldy+z*ldz] = alive; } } } } starpu-1.4.9+dfsg/examples/stencil/life_cuda.cu000066400000000000000000000061361507764646700215410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define _externC extern "C" #include "stencil.h" /* Heart of the stencil computation: compute a new state from an old one. */ extern "C" __global__ void cuda_life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter) { unsigned idx = threadIdx.x + blockIdx.x * blockDim.x; unsigned idy = threadIdx.y + blockIdx.y * blockDim.y; //unsigned idz = threadIdx.z + blockIdx.z * blockDim.z; unsigned idz = 0; unsigned stepx = blockDim.x * gridDim.x; unsigned stepy = blockDim.y * gridDim.y; //unsigned stepz = blockDim.z * gridDim.z; unsigned stepz = 1; unsigned x, y, z; unsigned num, alive; for (z = iter + idz; z < nz - iter; z += stepz) for (y = K + idy; y < ny - K; y += stepy) { for (x = K + idx; x < nx - K; x += stepx) { unsigned index = x + y*ldy + z*ldz; num = 0 + old[index+1*ldy+0*ldz] + old[index+1*ldy+1*ldz] + old[index+0*ldy+1*ldz] + old[index-1*ldy+1*ldz] + old[index-1*ldy+0*ldz] + old[index-1*ldy-1*ldz] + old[index+0*ldy-1*ldz] + old[index+1*ldy-1*ldz] ; alive = old[index]; alive = (alive && num == 2) || num == 3; newp[index] = alive; } } } extern "C" void cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter) { unsigned max_parallelism = 512; unsigned threads_per_dim_x = max_parallelism; while (threads_per_dim_x / 2 >= nx) threads_per_dim_x /= 2; unsigned threads_per_dim_y = max_parallelism / threads_per_dim_x; while (threads_per_dim_y / 2 >= ny) threads_per_dim_y /= 2; #if 0 unsigned threads_per_dim_z = 4; dim3 dimBlock(threads_per_dim_x, threads_per_dim_y, threads_per_dim_z); dim3 dimGrid(nx / threads_per_dim_x, ny / threads_per_dim_y, nz / threads_per_dim_z); #else dim3 dimBlock(threads_per_dim_x, threads_per_dim_y); dim3 dimGrid((nx + threads_per_dim_x-1) / threads_per_dim_x, (ny + threads_per_dim_y-1) / threads_per_dim_y); #endif cuda_life_update <<>> (bz, old, newp, nx, ny, nz, ldy, ldz, iter); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/stencil/life_opencl.c000066400000000000000000000070741507764646700217220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Heart of the stencil computation: compute a new state from an old one. */ /* #define _externC extern "C" */ #include #define CL_TARGET_OPENCL_VERSION 100 #ifdef __APPLE__ #include #else #include #endif #include #define str(x) #x #define clsrc(t,k) "__kernel void\n\ #define TYPE " str(t) "\n\ #define K " str(k) "\n\ life_update(int bz, __global const TYPE *old, __global TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter)\n\ {\n \ unsigned idx = get_global_id(0);\n \ unsigned idy = get_global_id(1);\n \ //unsigned idz = threadIdx.z + blockIdx.z * blockDim.z;\n \ unsigned idz = 0;\n \ unsigned stepx = get_global_size(0);\n \ unsigned stepy = get_global_size(1);\n \ //unsigned stepz = blockDim.z * gridDim.z;\n \ unsigned stepz = 1;\n \ unsigned x, y, z;\n \ unsigned num, alive;\n \ \n \ for (z = iter + idz; z < nz - iter; z += stepz)\n \ for (y = K + idy; y < ny - K; y += stepy) \n \ {\n \ for (x = K + idx; x < nx - K; x += stepx) \ {\n \ unsigned index = x + y*ldy + z*ldz;\n \ num = 0\n \ + old[index+1*ldy+0*ldz]\n \ + old[index+1*ldy+1*ldz]\n \ + old[index+0*ldy+1*ldz]\n \ + old[index-1*ldy+1*ldz]\n \ + old[index-1*ldy+0*ldz]\n \ + old[index-1*ldy-1*ldz]\n \ + old[index+0*ldy-1*ldz]\n \ + old[index+1*ldy-1*ldz]\n \ ;\n \ alive = old[index];\n \ alive = (alive && num == 2) || num == 3;\n \ newp[index] = alive;\n \ }\n \ }\n \ }" static const char * src = clsrc(TYPE,K); static struct starpu_opencl_program program; void opencl_life_init(void) { starpu_opencl_load_opencl_from_string(src, &program, NULL); } void opencl_life_free(void) { int ret = starpu_opencl_unload_opencl(&program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } void opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter) { #if 0 size_t dim[] = {nx, ny, nz}; #else size_t dim[] = {nx, ny, 1}; #endif int devid,id; cl_int err; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); cl_kernel kernel; cl_command_queue cq; err = starpu_opencl_load_kernel(&kernel, &cq, &program, "life_update", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clSetKernelArg(kernel, 0, sizeof(bz), &bz); clSetKernelArg(kernel, 1, sizeof(old), &old); clSetKernelArg(kernel, 2, sizeof(newp), &newp); clSetKernelArg(kernel, 3, sizeof(nx), &nx); clSetKernelArg(kernel, 4, sizeof(ny), &ny); clSetKernelArg(kernel, 5, sizeof(nz), &nz); clSetKernelArg(kernel, 6, sizeof(ldy), &ldy); clSetKernelArg(kernel, 7, sizeof(ldz), &ldz); clSetKernelArg(kernel, 8, sizeof(iter), &iter); err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu-1.4.9+dfsg/examples/stencil/loader.c000066400000000000000000000274611507764646700207130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/examples/stencil/mpi.out000066400000000000000000000260141507764646700206100ustar00rootroot00000000000000Warning: MPI only has funneled thread support, not serialized, hoping this will work Running on 2 nodes Warning: MPI only has funneled thread support, not serialized, hoping this will work 9 MB of memory were allocated on node 1 9 MB of memory were allocated on node 0 GO ! update: CPU 0 -> 2048 (100.00%) top: CPU 0 -> 2048 (100.00%) bottom: CPU 0 -> 2048 (100.00%) | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 | * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * | 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 | 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 0 | * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 | * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * | 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 | 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 | * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 | 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 | * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 | 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 | * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 | * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 | 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 | * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 | 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 | * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 | * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 | 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 | * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 | 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 | * 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 | * * * * * * * * * * * * 0 0 0 0 0 0 0 0 * * * * * * * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 | * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 | 0 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 0 | * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 | * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 | 0 0 0 0 * * * * * * * * * * * * * * * * * * * * * * * * * 0 0 0 | * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 | * * * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * * * * | 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 | 0 0 0 0 0 0 0 0 0 * * * * * * * * * * * * * * 0 0 0 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | * 1 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 * 1 1 1 1 * 1 1 1 1 * 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 * * 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 1 * 1 1 1 * 1 1 1 1 * 1 1 1 1 1 1 * 1 1 1 1 * 1 1 1 * 1 1 1 | * * * * * * * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 * * * * * * | 1 1 1 1 * 1 1 1 * 1 1 1 1 _ _ _ _ _ _ 1 1 1 1 * 1 1 1 * 1 1 1 1 | 0 0 0 0 0 0 0 * * * * * * * * * * * * * * * * * * * 0 0 0 0 0 0 | * 1 1 1 1 * 1 1 1 _ _ _ _ _ _ _ _ _ _ _ _ _ _ 1 1 1 * 1 1 1 1 * | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 * 1 1 1 1 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 1 1 1 1 * 1 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | 1 1 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 1 1 | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ starpu-1.4.9+dfsg/examples/stencil/results000066400000000000000000001422401507764646700207160ustar00rootroot00000000000000B=0 | 0 1 1 1 2 0 0 1 0 1 1 0 2 0 0 2 1 0 2 1 0 2 0 1 2 0 2 1 2 0 0 1 0 1 1 2 0 1 0 2 0 1 0 0 2 0 1 2 0 2 2 1 0 0 2 1 0 0 2 0 0 1 2 1 | 0 0 0 1 2 1 1 0 0 0 1 1 0 2 1 0 1 0 0 1 1 0 1 0 1 0 0 1 0 2 0 2 1 0 0 0 1 1 2 0 0 2 0 0 1 2 0 1 2 0 2 0 1 0 2 0 1 2 0 1 2 0 2 2 | 2 0 1 2 1 2 0 2 0 1 0 2 2 1 0 0 2 0 0 0 2 0 0 0 1 0 1 0 1 0 2 1 2 0 2 2 0 2 2 1 0 0 1 0 0 1 2 1 0 0 0 2 2 0 1 0 2 1 2 0 1 0 0 1 | 0 0 1 0 0 2 1 0 0 1 2 1 1 2 1 2 1 0 2 2 1 0 0 0 0 1 2 1 0 0 1 2 1 0 0 1 0 2 0 1 2 1 2 1 0 1 0 2 0 0 0 0 2 1 0 0 2 0 1 0 1 2 1 2 | 1 0 0 0 0 2 1 2 2 0 2 1 2 0 1 1 0 0 1 2 0 1 2 1 1 0 0 2 2 1 0 1 2 0 0 0 0 2 1 0 2 0 0 2 0 1 1 0 2 1 2 2 0 1 1 1 2 0 0 0 0 2 0 2 | 0 0 1 1 0 1 1 2 1 2 2 1 0 1 0 2 2 0 1 2 1 0 1 0 1 0 0 2 1 0 2 1 0 0 2 0 0 1 0 1 2 0 2 0 1 2 0 1 1 0 2 0 0 2 1 0 1 0 0 1 2 0 2 0 | 2 0 1 2 1 0 0 0 1 0 2 0 1 2 0 0 0 2 0 1 0 0 0 0 1 1 0 2 1 0 0 1 0 2 1 2 0 2 2 0 2 1 1 1 0 0 0 0 1 2 2 1 1 0 0 0 2 0 2 1 2 0 2 1 | 1 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0 0 2 0 2 0 0 2 0 0 2 1 2 2 2 0 1 1 0 1 1 1 1 1 0 1 0 2 2 2 0 1 0 1 0 2 0 1 1 0 0 1 2 1 0 2 1 2 | 2 0 1 2 0 2 0 1 0 2 1 0 0 0 0 0 2 2 1 0 2 2 1 0 2 0 1 0 0 2 0 0 2 0 1 1 0 0 0 2 2 1 2 0 1 2 0 2 0 0 2 2 1 0 0 2 0 2 1 0 1 0 2 0 | 0 0 2 1 2 0 2 2 1 0 0 2 1 2 0 1 0 2 1 2 0 2 0 0 1 0 0 0 1 1 0 1 2 1 2 1 1 0 1 2 1 0 2 0 1 2 0 0 0 2 2 2 0 2 0 2 0 0 0 0 1 0 2 0 | 0 0 0 1 2 0 0 1 0 1 2 2 0 0 1 1 0 2 1 0 2 2 1 1 0 0 2 0 1 2 0 0 0 0 1 2 0 0 0 2 1 0 0 2 0 2 1 2 0 0 2 1 2 0 2 1 2 1 2 1 0 2 0 1 | 1 1 1 1 2 2 0 1 2 1 0 0 0 1 2 2 0 1 2 1 2 0 1 1 2 0 1 1 0 2 0 1 0 1 2 0 2 0 2 0 1 0 2 0 2 2 1 2 0 1 2 0 0 0 1 2 0 0 0 0 0 2 2 0 | 0 2 0 0 2 0 1 1 1 0 0 1 2 0 1 1 2 0 0 0 0 1 0 0 2 0 1 0 0 1 0 0 1 2 1 0 2 2 1 1 0 2 1 2 0 2 1 0 0 0 1 0 2 1 2 1 0 2 0 1 2 1 0 0 | 1 1 0 0 1 1 1 0 1 2 2 0 1 2 2 0 1 2 0 2 1 1 2 2 1 0 0 1 2 2 2 1 2 0 2 0 0 0 0 2 2 0 0 1 0 0 1 0 0 2 0 1 2 0 0 1 1 2 0 1 0 2 1 2 | 1 2 2 1 0 0 0 1 0 2 0 0 2 1 1 1 1 1 0 1 2 1 2 1 0 2 2 0 2 0 2 1 0 0 1 2 0 1 2 0 1 2 0 0 1 0 2 1 1 2 0 1 0 2 2 0 0 0 0 0 1 0 0 1 | 0 0 2 0 2 2 0 1 2 0 1 0 2 0 2 0 1 0 1 2 2 1 0 1 2 2 1 0 0 1 0 1 0 2 0 1 0 0 2 1 2 0 0 0 1 1 1 1 1 0 0 0 2 0 1 0 0 0 2 2 0 2 0 0 | 0 0 1 0 0 0 1 0 0 1 2 0 2 0 2 0 2 1 0 1 0 1 0 2 0 0 0 1 0 0 1 2 1 0 2 2 1 1 2 0 2 1 1 2 2 2 0 0 2 1 2 0 0 0 2 1 1 2 2 0 1 0 1 0 | 2 1 1 0 2 2 0 1 0 2 1 1 0 0 1 0 0 0 2 1 0 1 0 2 2 2 0 1 2 2 0 2 0 0 0 2 0 1 2 0 1 2 2 0 1 0 0 1 1 0 2 0 2 1 0 2 0 1 1 0 1 0 0 0 | 2 0 2 0 2 1 1 0 1 0 1 2 1 0 2 0 0 1 0 2 0 2 0 2 0 1 0 1 0 2 0 1 2 0 0 0 1 0 1 0 0 2 1 2 1 0 2 1 2 0 1 2 2 0 2 0 0 0 1 1 2 0 2 2 | 2 1 0 2 0 2 1 2 0 2 0 0 1 0 0 1 0 0 0 0 2 1 2 0 2 1 2 2 0 2 1 0 2 0 2 2 2 1 0 2 2 1 0 2 1 0 0 0 1 0 0 1 2 1 0 0 0 2 1 1 1 0 2 1 | 2 2 0 0 1 0 0 1 0 1 0 0 0 2 0 2 1 0 1 2 2 0 2 0 2 0 2 0 0 2 1 0 0 1 0 0 2 1 2 2 1 1 2 0 1 2 2 0 2 0 0 2 0 0 0 1 0 2 0 1 2 2 0 1 | 2 0 2 1 2 0 2 2 1 2 0 0 0 2 1 0 0 2 2 0 0 1 2 2 0 2 2 0 0 0 1 2 1 0 0 0 2 0 2 2 2 0 0 1 2 0 1 1 0 1 2 1 0 0 0 0 1 0 0 2 1 1 0 1 | 0 0 1 1 1 0 1 0 2 1 0 0 2 1 2 0 1 1 2 0 1 1 0 0 1 2 0 1 2 0 0 0 2 0 1 0 1 0 0 2 1 0 2 1 2 0 0 2 0 2 1 0 1 2 2 2 1 0 2 0 1 0 1 2 | 2 2 0 2 1 0 2 0 0 0 0 0 1 2 0 1 0 2 0 1 0 0 2 0 2 0 2 1 0 2 1 0 2 2 0 1 0 1 0 1 2 0 0 1 2 1 0 0 1 2 1 2 0 1 0 1 2 2 0 0 1 2 1 1 | 0 2 0 0 1 0 1 2 0 0 0 2 1 0 2 2 2 1 0 1 2 0 0 1 1 2 1 0 1 1 0 2 1 2 0 0 0 1 0 1 1 0 0 0 1 0 2 2 1 0 0 2 0 2 1 2 0 2 0 0 1 1 0 2 | 2 1 1 2 0 2 0 0 0 0 1 2 2 0 0 2 0 2 1 0 2 0 2 0 1 1 0 2 1 0 1 0 0 0 2 2 0 2 1 0 0 2 2 2 0 1 2 1 0 0 2 0 1 0 0 0 0 1 2 1 0 2 0 2 | 1 0 2 1 0 0 0 1 0 1 0 2 2 0 1 1 0 2 0 0 2 1 1 2 1 0 0 0 2 2 2 2 1 1 1 2 1 0 1 2 0 2 0 2 0 0 2 0 1 0 0 2 1 0 0 2 1 2 0 2 0 0 1 0 | 1 1 2 2 1 2 0 2 0 0 2 0 2 0 1 0 1 0 0 2 1 0 1 2 1 1 2 0 2 1 2 1 0 0 2 2 0 2 0 0 0 2 0 0 0 1 0 1 0 2 0 1 1 2 2 0 0 2 2 0 2 0 0 0 | 0 2 0 1 0 0 1 0 1 2 1 1 2 0 0 0 0 2 2 1 0 1 0 2 0 2 0 1 2 0 2 0 2 0 0 2 1 0 2 2 1 0 2 1 2 0 0 0 1 1 1 1 2 1 1 0 1 2 1 2 1 2 1 2 | 0 2 0 0 0 1 1 0 1 1 2 0 0 2 2 0 2 2 2 2 2 0 2 0 1 2 0 0 0 0 1 1 0 2 0 1 2 0 2 1 0 2 2 0 0 2 2 0 1 0 2 2 1 2 0 0 2 2 1 0 0 0 0 1 | 2 0 0 2 0 2 2 0 2 0 0 0 1 0 0 0 1 0 2 2 2 0 2 0 0 2 0 2 0 1 2 0 2 1 0 2 1 2 0 0 2 0 0 1 0 2 2 2 0 1 0 0 2 0 0 0 1 1 0 1 0 2 0 1 | 0 2 2 0 0 2 0 2 0 0 2 1 1 0 0 0 2 0 1 2 0 2 2 1 0 2 0 0 1 0 2 2 0 0 0 1 1 2 1 1 0 2 0 1 0 0 2 1 0 2 1 0 1 0 2 0 2 2 2 2 1 1 0 2 | 0 1 2 1 0 1 0 2 1 1 2 0 0 2 0 0 1 1 0 2 1 0 2 2 2 0 1 0 0 0 0 0 0 0 2 2 1 1 0 1 2 0 1 0 0 1 0 1 0 2 2 0 0 0 1 0 2 2 0 2 0 2 1 0 | 1 0 1 0 2 0 2 2 0 2 1 1 0 2 0 0 0 2 2 0 2 2 0 1 0 0 1 0 2 1 0 0 0 1 2 0 0 2 0 0 0 0 2 2 1 1 2 2 1 0 2 1 1 0 0 1 1 0 2 0 1 1 0 2 | 0 2 0 2 1 1 0 2 0 1 2 0 2 1 2 2 2 2 0 0 0 1 1 0 2 0 1 2 0 1 2 2 0 0 0 1 2 0 0 0 2 0 1 0 2 1 2 0 0 0 0 0 2 0 0 2 0 2 2 2 0 0 0 2 | 1 2 0 2 1 0 2 0 1 1 2 0 0 2 1 2 0 2 0 1 2 2 1 0 2 0 2 0 2 2 0 1 2 2 0 0 2 0 0 2 0 1 0 0 2 2 1 0 1 2 0 0 2 2 0 0 0 0 2 2 0 1 2 1 | 1 0 1 1 0 1 0 2 1 0 1 1 1 1 1 0 0 0 2 2 1 0 0 1 1 0 0 2 0 2 0 0 2 0 0 0 0 2 2 0 0 1 1 0 2 1 1 2 1 0 2 1 1 0 0 0 0 0 1 2 0 0 0 0 | 2 0 0 2 1 0 2 0 1 2 0 2 0 1 2 1 0 0 0 1 0 2 0 2 0 2 1 0 0 0 2 0 0 1 0 2 2 2 1 2 2 0 1 0 1 1 0 2 0 0 0 1 2 0 1 2 2 1 1 0 1 1 1 1 | 1 0 2 0 1 0 0 0 0 0 0 0 0 2 2 2 2 0 2 0 0 0 1 0 2 1 0 0 0 2 0 1 2 2 2 0 2 0 1 1 2 2 2 2 2 0 1 0 2 2 2 0 1 1 0 1 2 2 0 1 1 2 0 2 | 2 0 0 0 0 1 2 1 1 0 1 1 1 0 2 1 0 2 0 1 0 2 1 2 2 1 0 2 0 1 1 0 2 0 0 2 2 0 0 2 0 0 2 1 0 1 0 1 0 1 2 1 2 0 0 2 0 2 0 2 0 2 0 0 | 1 0 1 2 2 0 1 2 1 0 0 2 1 2 2 1 0 1 0 0 0 1 1 0 0 2 1 0 0 2 1 1 1 0 0 0 0 0 0 2 1 1 0 2 2 1 1 0 2 2 2 0 1 0 0 0 2 0 0 2 1 1 0 2 | 1 1 1 0 1 2 0 2 1 2 0 2 0 2 1 0 2 0 2 0 0 0 1 2 0 2 0 2 2 0 0 1 1 2 1 2 0 1 2 0 0 1 0 0 2 0 1 0 2 1 0 0 2 2 0 2 0 2 2 2 1 2 0 1 | 1 1 0 2 2 1 2 0 0 1 0 2 2 0 2 0 1 0 2 1 0 0 0 0 2 0 2 1 0 1 2 1 0 0 1 2 0 1 0 1 0 1 1 0 0 2 2 2 2 0 1 1 0 0 0 0 2 0 1 2 2 1 2 0 | 0 0 0 0 1 0 2 2 1 2 1 0 2 2 1 2 1 0 2 0 1 2 2 0 1 0 2 0 0 0 2 1 0 2 1 0 1 0 0 0 0 0 0 1 0 2 0 1 0 1 0 2 1 0 0 0 2 0 2 2 2 0 1 2 | 0 1 0 2 0 0 1 0 1 1 0 0 0 1 2 2 0 1 0 2 2 1 0 0 2 0 1 1 2 2 2 0 1 1 2 2 1 1 2 0 2 1 2 2 2 0 0 2 2 0 1 0 0 0 0 0 2 0 1 1 2 2 1 0 | 0 0 0 2 1 0 2 2 1 0 1 1 1 0 1 2 2 2 1 0 0 2 0 2 2 1 0 1 0 1 2 1 0 0 0 0 1 0 1 0 2 2 0 1 2 2 0 1 0 0 0 2 2 1 0 0 2 0 0 0 2 0 1 0 | 0 1 0 0 0 0 0 1 0 0 0 1 0 1 1 2 0 2 0 1 1 2 2 0 1 1 0 1 2 1 0 1 0 2 1 0 1 1 0 2 1 1 0 2 2 1 1 0 2 2 0 0 0 0 2 1 0 2 0 0 0 1 2 2 | 1 1 2 1 2 2 0 0 2 1 2 0 2 0 0 0 0 0 0 1 1 2 2 2 1 0 1 0 2 0 0 2 0 1 2 1 0 2 1 1 0 1 0 2 0 1 1 0 2 0 1 2 2 0 1 2 0 2 0 0 1 0 2 0 | 2 0 0 0 1 0 2 0 2 0 0 0 2 1 1 0 1 2 0 2 1 1 1 1 1 0 2 1 1 2 0 2 0 2 1 2 0 2 1 0 0 2 1 1 0 2 0 2 0 0 1 0 0 0 1 0 2 0 2 0 1 0 2 0 | 2 0 2 0 1 0 0 1 1 0 0 0 1 2 2 1 0 2 1 1 0 1 0 2 1 0 0 0 1 2 0 0 0 0 1 2 1 2 1 0 0 0 0 0 1 2 0 1 0 2 2 0 0 0 2 1 0 1 2 0 0 1 0 0 | 0 1 0 1 2 0 0 1 0 2 0 0 2 1 2 1 0 0 0 2 1 0 2 0 0 0 0 2 2 0 0 0 2 1 0 2 2 0 1 2 2 2 0 2 2 0 2 2 2 1 0 2 1 0 1 2 1 2 0 2 0 1 0 1 | 2 0 2 0 1 2 0 1 0 0 2 1 1 0 0 0 1 0 1 2 0 2 1 0 1 1 2 2 0 0 2 0 0 0 2 1 0 0 1 2 2 0 0 0 0 2 0 2 2 2 1 0 0 0 0 1 0 0 2 1 1 2 0 2 | 0 0 1 0 2 2 1 0 2 2 0 1 0 1 1 0 0 1 1 2 0 0 2 0 0 0 2 1 2 0 0 0 1 2 0 1 0 1 2 1 0 0 1 0 2 1 0 0 1 0 2 0 1 1 0 2 1 1 0 2 2 0 2 1 | 1 0 1 1 0 1 2 0 2 1 0 0 1 0 0 0 0 0 0 0 0 2 0 2 0 0 0 2 1 0 2 2 0 2 1 1 1 2 0 1 0 0 0 1 2 0 1 2 1 2 2 0 2 0 1 1 2 1 2 0 1 2 1 0 | 0 2 0 1 0 1 2 0 1 0 1 0 2 0 0 1 0 0 2 0 2 0 2 0 1 2 0 0 1 2 1 1 0 0 0 1 1 0 1 0 2 2 0 1 2 2 0 2 1 2 2 0 0 2 1 0 0 2 0 2 1 2 0 0 | 2 2 2 0 0 0 0 2 1 0 0 0 2 2 1 2 0 1 0 2 0 0 1 0 2 0 1 2 0 0 0 2 0 2 0 1 1 2 1 0 2 2 2 1 2 2 2 1 2 2 0 1 1 1 2 0 2 0 0 2 2 0 1 0 | 0 0 2 0 1 2 2 1 0 0 2 0 2 1 1 0 1 0 0 1 1 1 2 1 1 0 1 2 0 2 0 1 0 0 0 1 0 2 0 0 1 0 0 0 1 0 1 2 0 1 0 1 0 0 0 1 0 2 0 0 2 2 0 2 | 0 2 1 0 1 1 0 2 0 2 2 1 2 1 1 1 0 0 1 2 2 2 0 1 0 0 1 2 1 0 2 0 0 1 0 2 0 0 0 2 2 1 0 0 0 2 1 2 0 2 1 0 1 0 1 0 2 0 2 0 1 2 2 2 | 0 1 0 2 2 1 2 0 2 0 0 1 2 1 0 0 2 2 0 0 2 0 2 1 0 1 2 2 0 1 1 0 0 0 2 1 1 0 2 0 0 0 0 0 1 0 0 0 0 0 0 2 1 2 2 1 1 0 0 1 0 1 2 0 | 2 1 0 0 1 1 2 2 0 1 0 2 0 2 1 0 2 0 1 0 0 2 0 1 0 0 0 1 0 0 0 0 0 1 1 0 0 2 0 1 0 1 0 2 1 0 2 2 2 0 0 2 1 1 2 1 2 1 1 0 1 2 0 1 | 1 0 0 2 1 0 2 0 2 1 2 2 1 0 1 2 0 1 1 1 2 1 0 0 0 0 2 1 0 2 0 1 2 2 0 0 0 0 2 2 1 0 1 0 0 2 0 2 0 0 2 0 2 1 0 2 1 0 0 1 0 0 0 0 | 0 1 0 2 0 2 1 0 1 2 0 0 2 0 0 2 2 0 1 2 1 2 1 0 0 2 2 0 1 2 2 0 0 2 0 1 2 0 1 2 0 1 0 2 0 1 0 2 0 0 0 0 0 0 1 2 1 1 0 0 1 1 0 2 | 0 2 1 0 2 0 0 1 0 1 2 2 1 2 0 1 0 2 0 1 0 0 0 0 1 0 0 2 2 2 1 2 2 1 0 0 2 1 0 1 2 0 0 1 0 0 2 2 1 0 0 2 0 2 1 0 2 0 0 0 2 0 2 1 | 0 0 1 2 0 2 2 2 1 0 2 0 2 2 0 0 1 0 1 0 2 2 0 0 0 1 2 2 1 1 1 0 0 1 2 1 0 1 0 1 2 2 0 2 0 2 2 0 2 0 2 2 0 2 1 2 0 0 2 0 2 2 0 0 B=1 | 0 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 0 1 2 2 2 2 2 1 2 1 0 0 0 0 1 0 0 2 0 2 0 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 0 1 1 1 | 0 0 0 2 0 2 0 2 0 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 0 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 0 2 1 1 1 | 0 0 0 0 2 0 2 0 0 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 0 2 1 1 1 | 0 0 0 0 0 2 0 0 0 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 0 2 1 1 1 | 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 | 0 0 0 0 0 0 2 0 2 1 1 1 1 1 1 1 2 2 0 2 2 2 2 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 | 0 0 0 0 0 0 0 2 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 1 0 0 0 0 2 2 1 1 1 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 0 0 0 0 0 0 2 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 2 1 1 2 2 2 2 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 2 2 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 2 1 1 1 0 0 0 0 0 0 0 2 2 0 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 2 1 1 1 1 0 0 0 0 0 0 0 2 2 0 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 2 1 1 1 1 1 0 0 0 0 0 0 0 2 2 0 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 1 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0 2 2 2 2 2 | 0 0 0 0 0 0 2 1 0 0 0 0 0 1 1 1 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 1 0 2 2 2 1 2 2 2 2 2 2 | 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 1 2 2 2 2 2 2 2 1 0 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 | 0 1 1 0 0 0 1 1 1 1 0 0 0 0 0 1 2 2 2 2 2 2 2 0 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 0 1 2 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 1 0 2 2 2 2 2 0 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 0 2 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 0 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 0 2 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 0 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 0 0 0 0 2 2 2 | 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 1 2 2 2 2 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 2 2 0 0 0 0 0 2 2 2 B=2 | 2 0 2 0 2 0 0 0 2 2 2 0 0 0 0 2 0 2 1 1 1 1 1 1 1 1 2 0 2 0 2 0 2 1 1 1 1 0 2 0 2 0 2 0 2 0 1 2 1 1 1 1 1 0 0 0 2 0 2 0 2 0 2 0 | 0 2 0 2 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 0 2 0 2 0 1 1 1 1 0 0 2 0 2 0 2 0 1 1 1 1 1 1 1 1 0 0 0 0 2 0 2 0 2 0 2 | 2 0 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 0 2 0 1 1 1 1 1 0 0 0 2 0 2 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 0 2 0 2 0 | 0 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 0 2 1 1 1 1 1 0 0 0 0 2 0 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 0 2 0 2 | 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 0 2 0 | 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 0 2 | 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 2 2 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 2 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 0 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 | 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 B=3 | 0 0 2 0 2 0 0 0 2 2 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 0 2 0 2 0 1 0 1 2 1 1 1 1 1 2 1 2 0 2 0 0 | 0 0 0 2 0 0 0 0 2 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 2 1 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 2 0 2 0 2 0 2 0 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 2 0 2 0 2 0 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 0 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 2 0 2 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 2 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 | 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 0 0 0 0 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 0 0 B=4 | 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 2 1 2 2 2 2 2 2 2 | 2 2 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 0 2 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 | 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 2 2 2 2 2 2 2 2 B=6 | 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 2 0 2 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 2 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 2 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 2 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 starpu-1.4.9+dfsg/examples/stencil/run000077500000000000000000000021141507764646700200170ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # [ -z "$N" ] && N=1 export N [ -z "$STARPU_SCHED_BETA" ] && STARPU_SCHED_BETA="2" export STARPU_SCHED_BETA echo sched $STARPU_SCHED 1>&2 echo sched_beta $STARPU_SCHED_BETA 1>&2 echo prefetch $STARPU_PREFETCH 1>&2 echo calibrate $STARPU_CALIBRATE 1>&2 echo ncpus $STARPU_NCPUS 1>&2 echo ncuda $STARPU_NCUDA 1>&2 echo N $N ./stencil -nbz $(($N * 64)) -sizex 128 -sizey 128 -sizez $(( $((16 * $N)) * 128 )) -niter 64 "$@" starpu-1.4.9+dfsg/examples/stencil/shadow.cu000066400000000000000000000044241507764646700211110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define _externC extern "C" #include "stencil.h" /* Perform replication of data on X and Y edges, to fold the domain on itself through mere replication of the source state. */ extern "C" __global__ void cuda_shadow( int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i) { unsigned idx = threadIdx.x + blockIdx.x * blockDim.x; unsigned idy = threadIdx.y + blockIdx.y * blockDim.y; //unsigned idz = threadIdx.z + blockIdx.z * blockDim.z; unsigned idz = 0; unsigned stepx = blockDim.x * gridDim.x; unsigned stepy = blockDim.y * gridDim.y; //unsigned stepz = blockDim.z * gridDim.z; unsigned stepz = 1; unsigned x, y, z; #include "shadow.h" } extern "C" void cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i) { unsigned max_parallelism = 512; unsigned threads_per_dim_x = max_parallelism; while (threads_per_dim_x / 2 >= nx) threads_per_dim_x /= 2; unsigned threads_per_dim_y = max_parallelism / threads_per_dim_x; while (threads_per_dim_y / 2 >= ny) threads_per_dim_y /= 2; #if 0 unsigned threads_per_dim_z = 4; dim3 dimBlock(threads_per_dim_x, threads_per_dim_y, threads_per_dim_z); dim3 dimGrid(nx / threads_per_dim_x, ny / threads_per_dim_y, nz / threads_per_dim_z); #else dim3 dimBlock(threads_per_dim_x, threads_per_dim_y); dim3 dimGrid((nx + threads_per_dim_x-1) / threads_per_dim_x, (ny + threads_per_dim_y-1) / threads_per_dim_y); #endif cuda_shadow <<>> (bz, ptr, nx, ny, nz, ldy, ldz, i); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/examples/stencil/shadow.h000066400000000000000000000033171507764646700207310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Perform replication of data on X and Y edges, to fold the domain on * itself through mere replication of the source state. */ /* TODO: rather use a dummy for loop, to assign the job to the threads that will work on it? */ if (idy == 0) for (z = i-1 + idz; z < nz-(i-1); z += stepz) for (x = K + idx; x < nx-K; x += stepx) { unsigned index = x+z*ldz; ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy]; ptr[index+(ny-K)*ldy] = ptr[index+K*ldy]; } if (idx == 0) for (z = i-1 + idz; z < nz-(i-1); z += stepz) for (y = K + idy; y < ny-K; y += stepy) { unsigned index = y*ldy+z*ldz; ptr[(K-1)+index] = ptr[(nx-K-1)+index]; ptr[(nx-K)+index] = ptr[K+index]; } if (idx == 0 && idy == 0) for (z = i-1 + idz; z < nz-(i-1); z += stepz) { unsigned index = z*ldz; ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index]; ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index]; ptr[(K-1)+(ny-K)*ldy+index] = ptr[(nx-K-1)+K*ldy+index]; ptr[(nx-K)+(ny-K)*ldy+index] = ptr[K+K*ldy+index]; } starpu-1.4.9+dfsg/examples/stencil/shadow_opencl.c000066400000000000000000000070031507764646700222600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "stencil.h" /* Perform replication of data on X and Y edges, to fold the domain on * itself through mere replication of the source state. */ #define str(x) #x #define clsrc(t,k) "__kernel void\n\ #define TYPE " str(t) "\n\ #define K " str(k) "\n\ shadow(int bz, __global TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i)\n\ {\n\ unsigned idx = get_global_id(0);\n\ unsigned idy = get_global_id(1);\n\ //unsigned idz = threadIdx.z + blockIdx.z * blockDim.z;\n\ unsigned idz = 0;\n\ unsigned stepx = get_global_size(0);\n\ unsigned stepy = get_global_size(1);\n\ //unsigned stepz = blockDim.z * gridDim.z;\n\ unsigned stepz = 1;\n\ unsigned x, y, z;\n\ if (idy == 0)\n\ for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\ for (x = K + idx; x < nx-K; x += stepx) \ {\n \ unsigned index = x+z*ldz;\n\ ptr[index+(K-1)*ldy] = ptr[index+(ny-K-1)*ldy];\n\ ptr[index+(ny-K)*ldy] = ptr[index+K*ldy];\n\ }\n\ \n\ if (idx == 0)\n\ for (z = i-1 + idz; z < nz-(i-1); z += stepz)\n\ for (y = K + idy; y < ny-K; y += stepy) \ {\n \ unsigned index = y*ldy+z*ldz;\n\ ptr[(K-1)+index] = ptr[(nx-K-1)+index];\n\ ptr[(nx-K)+index] = ptr[K+index];\n\ }\n\ \n\ if (idx == 0 && idy == 0)\n\ for (z = i-1 + idz; z < nz-(i-1); z += stepz) \ {\n \ unsigned index = z*ldz;\n\ ptr[K-1+(K-1)*ldy+index] = ptr[(nx-K-1)+(ny-K-1)*ldy+index];\n\ ptr[(nx-K)+(K-1)*ldy+index] = ptr[K+(ny-K-1)*ldy+index];\n\ ptr[(K-1)+(ny-K)*ldy+index] = ptr[(nx-K-1)+K*ldy+index];\n\ ptr[(nx-K)+(ny-K)*ldy+index] = ptr[K+K*ldy+index];\n\ }\n\ }" static const char * src = clsrc(TYPE,K); static struct starpu_opencl_program program; void opencl_shadow_init(void) { starpu_opencl_load_opencl_from_string(src, &program, NULL); } void opencl_shadow_free(void) { int ret = starpu_opencl_unload_opencl(&program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } void opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i) { #if 0 size_t dim[] = {nx, ny, nz}; #else size_t dim[] = {nx, ny, 1}; #endif int devid,id; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); cl_kernel kernel; cl_command_queue cq; cl_int err; err = starpu_opencl_load_kernel(&kernel, &cq, &program, "shadow", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clSetKernelArg(kernel, 0, sizeof(bz), &bz); clSetKernelArg(kernel, 1, sizeof(ptr), &ptr); clSetKernelArg(kernel, 2, sizeof(nx), &nx); clSetKernelArg(kernel, 3, sizeof(ny), &ny); clSetKernelArg(kernel, 4, sizeof(nz), &nz); clSetKernelArg(kernel, 5, sizeof(ldy), &ldy); clSetKernelArg(kernel, 6, sizeof(ldz), &ldz); clSetKernelArg(kernel, 7, sizeof(i), &i); err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, dim, NULL, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu-1.4.9+dfsg/examples/stencil/stencil-blocks.c000066400000000000000000000233641507764646700223570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "stencil.h" #include /* Manage block and tags allocation */ static struct block_description *blocks; static unsigned sizex, sizey, sizez; static unsigned nbz; static unsigned *block_sizes_z; /* * Tags for various codelet completion */ /* * common tag format: */ static starpu_tag_t tag_common(int z, int dir, int type) { return (((((starpu_tag_t)type) << 4) | ((dir+1)/2)) << 32)|(starpu_tag_t)z; } /* Completion of last update tasks */ starpu_tag_t TAG_FINISH(int z) { z = (z + nbz)%nbz; starpu_tag_t tag = tag_common(z, 0, 1); return tag; } /* Completion of the save codelet for MPI send/recv */ starpu_tag_t TAG_START(int z, int dir) { z = (z + nbz)%nbz; starpu_tag_t tag = tag_common(z, dir, 2); return tag; } /* * common MPI tag format: * iter is actually not needed for coherency, but it makes debugging easier */ static int mpi_tag_common(int z, int iter, int dir, int buffer) { return (((((iter << 12)|z)<<4) | ((1+dir)/2))<<4)|buffer; } int MPI_TAG0(int z, int iter, int dir) { z = (z + nbz)%nbz; int tag = mpi_tag_common(z, iter, dir, 0); return tag; } int MPI_TAG1(int z, int iter, int dir) { z = (z + nbz)%nbz; int tag = mpi_tag_common(z, iter, dir, 1); return tag; } /* * Block descriptors */ /* Compute the size of the different blocks */ static void compute_block_sizes(void) { block_sizes_z = (unsigned *) malloc(nbz*sizeof(unsigned)); STARPU_ASSERT(block_sizes_z); /* Perhaps the last chunk is smaller */ unsigned default_block_size = (sizez+nbz-1)/nbz; unsigned remaining = sizez; unsigned b; for (b = 0; b < nbz; b++) { block_sizes_z[b] = MIN(default_block_size, remaining); remaining -= block_sizes_z[b]; } STARPU_ASSERT(remaining == 0); } unsigned get_block_size(int bz) { return block_sizes_z[bz]; } struct block_description *get_block_description(int z) { z = (z + nbz)%nbz; STARPU_ASSERT(&blocks[z]); return &blocks[z]; } int get_block_mpi_node(int z) { z = (z + nbz)%nbz; return blocks[z].mpi_node; } void create_blocks_array(unsigned _sizex, unsigned _sizey, unsigned _sizez, unsigned _nbz) { /* Store the parameters */ nbz = _nbz; sizex = _sizex; sizey = _sizey; sizez = _sizez; /* Create a grid of block descriptors */ blocks = (struct block_description *) calloc(nbz, sizeof(struct block_description)); STARPU_ASSERT(blocks); /* What is the size of the different blocks ? */ compute_block_sizes(); unsigned bz; for (bz = 0; bz < nbz; bz++) { struct block_description * block = get_block_description(bz); /* Which block is it ? */ block->bz = bz; /* For simplicity, we store which are the neighbours blocks */ block->boundary_blocks[B] = get_block_description((bz-1+nbz)%nbz); block->boundary_blocks[T] = get_block_description((bz+1)%nbz); } } void free_blocks_array() { free(blocks); free(block_sizes_z); } /* * Initialization of the blocks */ void assign_blocks_to_workers(int rank) { unsigned bz; /* NB: perhaps we could count a GPU as multiple workers */ /* how many workers are there ? */ /*unsigned nworkers = starpu_worker_get_count();*/ /* how many blocks are on that MPI node ? */ // unsigned nblocks = 0; // for (bz = 0; bz < nbz; bz++) // { // struct block_description *block = // get_block_description(bz); // // if (block->mpi_node == rank) // nblocks++; // } /* how many blocks per worker ? */ /*unsigned nblocks_per_worker = (nblocks + nworkers - 1)/nworkers;*/ /* we now attribute up to nblocks_per_worker blocks per workers */ unsigned attributed = 0; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); if (block->mpi_node == rank) { unsigned workerid; /* Manage initial block distribution between CPU and GPU */ #if 0 #if 1 /* GPUs then CPUs */ if (attributed < 3*18) workerid = attributed / 18; else workerid = 3+ (attributed - 3*18) / 2; #else /* GPUs interleaved with CPUs */ if ((attributed % 20) <= 1) workerid = 3 + attributed / 20; else if (attributed < 60) workerid = attributed / 20; else workerid = (attributed - 60)/2 + 6; #endif #else /* Only GPUS */ workerid = (attributed / 21) % 3; #endif /*= attributed/nblocks_per_worker;*/ block->preferred_worker = workerid; attributed++; } } } void assign_blocks_to_mpi_nodes(int world_size) { unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size; unsigned bz; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); block->mpi_node = bz / nzblocks_per_process; } } static size_t allocated = 0; static void allocate_block_on_node(starpu_data_handle_t *handleptr, unsigned bz, TYPE **ptr, unsigned nx, unsigned ny, unsigned nz) { int ret; size_t block_size = nx*ny*nz*sizeof(TYPE); /* Allocate memory */ #if 1 ret = starpu_malloc_flags((void **)ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); STARPU_ASSERT(ret == 0); #else *ptr = malloc(block_size); STARPU_ASSERT(*ptr); #endif allocated += block_size; #ifndef STARPU_SIMGRID /* Fill the blocks with 0 */ memset(*ptr, 0, block_size); #endif /* Register it to StarPU */ starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE)); starpu_data_set_coordinates(*handleptr, 1, bz); } static void free_block_on_node(starpu_data_handle_t handleptr, unsigned nx, unsigned ny, unsigned nz) { void *ptr = (void *) starpu_block_get_local_ptr(handleptr); size_t block_size = nx*ny*nz*sizeof(TYPE); starpu_data_unregister(handleptr); starpu_free_flags(ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); } void display_memory_consumption(int rank) { FPRINTF(stderr, "%lu B of memory were allocated on node %d\n", (unsigned long) allocated, rank); } void allocate_memory_on_node(int rank) { unsigned bz; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); int node = block->mpi_node; /* Main blocks */ if (node == rank) { unsigned size_bz = block_sizes_z[bz]; allocate_block_on_node(&block->layers_handle[0], bz, &block->layers[0], (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K)); #ifndef STARPU_SIMGRID #ifdef LIFE unsigned x, y, z; unsigned sum = 0; for (x = 0; x < sizex; x++) for (y = 0; y < sizey; y++) for (z = 0; z < size_bz; z++) /* Just random data */ sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)] = (int)((x/7.+y/13.+(bz*size_bz + z)/17.) * 10.) % 2; /* printf("block %d starts with %d/%d alive\n", bz, sum, sizex*sizey*size_bz);*/ #endif #endif allocate_block_on_node(&block->layers_handle[1], bz, &block->layers[1], (sizex + 2*K), (sizey + 2*K), (size_bz + 2*K)); } /* Boundary blocks : Top */ int top_node = block->boundary_blocks[T]->mpi_node; if ((node == rank) || (top_node == rank)) { allocate_block_on_node(&block->boundaries_handle[T][0], bz, &block->boundaries[T][0], (sizex + 2*K), (sizey + 2*K), K); allocate_block_on_node(&block->boundaries_handle[T][1], bz, &block->boundaries[T][1], (sizex + 2*K), (sizey + 2*K), K); } /* Boundary blocks : Bottom */ int bottom_node = block->boundary_blocks[B]->mpi_node; if ((node == rank) || (bottom_node == rank)) { allocate_block_on_node(&block->boundaries_handle[B][0], bz, &block->boundaries[B][0], (sizex + 2*K), (sizey + 2*K), K); allocate_block_on_node(&block->boundaries_handle[B][1], bz, &block->boundaries[B][1], (sizex + 2*K), (sizey + 2*K), K); } } } void free_memory_on_node(int rank) { unsigned bz; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); int node = block->mpi_node; /* Main blocks */ if (node == rank) { free_block_on_node(block->layers_handle[0], (sizex + 2*K), (sizey + 2*K), K); free_block_on_node(block->layers_handle[1], (sizex + 2*K), (sizey + 2*K), K); } /* Boundary blocks : Top */ int top_node = block->boundary_blocks[T]->mpi_node; if ((node == rank) || (top_node == rank)) { free_block_on_node(block->boundaries_handle[T][0], (sizex + 2*K), (sizey + 2*K), K); free_block_on_node(block->boundaries_handle[T][1], (sizex + 2*K), (sizey + 2*K), K); } /* Boundary blocks : Bottom */ int bottom_node = block->boundary_blocks[B]->mpi_node; if ((node == rank) || (bottom_node == rank)) { free_block_on_node(block->boundaries_handle[B][0], (sizex + 2*K), (sizey + 2*K), K); free_block_on_node(block->boundaries_handle[B][1], (sizex + 2*K), (sizey + 2*K), K); } } } /* check how many cells are alive */ void check(int rank) { unsigned bz; for (bz = 0; bz < nbz; bz++) { struct block_description *block = get_block_description(bz); int node = block->mpi_node; /* Main blocks */ if (node == rank) { #ifdef LIFE unsigned size_bz = block_sizes_z[bz]; unsigned x, y, z; unsigned sum = 0; for (x = 0; x < sizex; x++) for (y = 0; y < sizey; y++) for (z = 0; z < size_bz; z++) sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)]; printf("block %u got %u/%u alive\n", bz, sum, sizex*sizey*size_bz); #endif } } } starpu-1.4.9+dfsg/examples/stencil/stencil-kernels.c000066400000000000000000000521261507764646700225430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "stencil.h" /* Computation Kernels */ /* * There are three codeletets: * * - cl_update, which takes a block and the boundaries of its neighbours, loads * the boundaries into the block and perform some update loops: * * comp. buffer save. buffers comp. buffer save. buffers comp. buffer * | ... | * | | +------------------+ +------------------+ * | #N+1 | | #N+1 bottom copy====>#N+1 bottom copy | * +-------------+ +------------------+ +------------------+ * | #N top copy | | #N top copy | | | * +-------------+ +------------------+ | | * | #N | * ... * | | +----------------+ +----------------------+ * | | | #N bottom copy | | block #N bottom copy | * ^ +------------------+ +----------------+ +----------------------+ * | | #N-1 top copy <====#N-1 top copy | | block #N-1 | * | +------------------+ +----------------+ | | * Z ... * * - save_cl_top, which take a block and its top boundary, and saves the top of * the block into the boundary (to be given as bottom of the neighbour above * this block). * * comp. buffer save. buffers comp. buffer save. buffers comp. buffer * | ... | * | | +------------------+ +------------------+ * | #N+1 | | #N+1 bottom copy | | #N+1 bottom copy | * +-------------+ +------------------+ +------------------+ * | #N top copy | | #N top copy <==== | * +-------------+ +------------------+ |..................| * | #N | * ... * | | +----------------+ +----------------------+ * | | | #N bottom copy | | block #N bottom copy | * ^ +------------------+ +----------------+ +----------------------+ * | | #N-1 top copy | | #N-1 top copy | | block #N-1 | * | +------------------+ +----------------+ | | * Z ... * * - save_cl_bottom, same for the bottom * comp. buffer save. buffers comp. buffer save. buffers comp. buffer * | ... | * | | +------------------+ +------------------+ * | #N+1 | | #N+1 bottom copy | | #N+1 bottom copy | * +-------------+ +------------------+ +------------------+ * | #N top copy | | #N top copy | | | * +-------------+ +------------------+ | | * | #N | * ... * |..................| +----------------+ +----------------------+ * | ====>#N bottom copy | | block #N bottom copy | * ^ +------------------+ +----------------+ +----------------------+ * | | #N-1 top copy | | #N-1 top copy | | block #N-1 | * | +------------------+ +----------------+ | | * Z ... * * The idea is that the computation buffers thus don't have to move, only their * boundaries are copied to buffers that do move (be it CPU/GPU, GPU/GPU or via * MPI) * * For each of the buffers above, there are two (0/1) buffers to make new/old switch costless. */ #if 0 # define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__) #else # define DEBUG(fmt, ...) (void) 0 #endif /* Record which GPU ran which block, for nice pictures */ int who_runs_what_len; int *who_runs_what; int *who_runs_what_index; double *last_tick; /* Achieved iterations */ static int achieved_iter; /* Record how many updates each worker performed */ unsigned update_per_worker[STARPU_NMAXWORKERS]; static void record_who_runs_what(struct block_description *block) { double now, now2, diff, delta = get_ticks() * 1000; int workerid = starpu_worker_get_id_check(); now = starpu_timing_now(); now2 = now - start; diff = now2 - last_tick[block->bz]; while (diff >= delta) { last_tick[block->bz] += delta; diff = now2 - last_tick[block->bz]; if (who_runs_what_index[block->bz] < who_runs_what_len) who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = -1; } if (who_runs_what_index[block->bz] < who_runs_what_len) who_runs_what[block->bz + (who_runs_what_index[block->bz]++) * get_nbz()] = global_workerid(workerid); } static void check_load(struct starpu_block_interface *block, struct starpu_block_interface *boundary) { /* Sanity checks */ STARPU_ASSERT(block->nx == boundary->nx); STARPU_ASSERT(block->ny == boundary->ny); STARPU_ASSERT(boundary->nz == K); /* NB: this is not fully guaranteed ... but it's *very* likely and that * makes our life much simpler */ STARPU_ASSERT(block->ldy == boundary->ldy); STARPU_ASSERT(block->ldz == boundary->ldz); } /* * Load a neighbour's boundary into block, CPU version */ static void load_subblock_from_buffer_cpu(void *_block, void *_boundary, unsigned firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; memcpy(&block_data[offset], boundary_data, boundary_size); } /* * Load a neighbour's boundary into block, CUDA version */ #ifdef STARPU_USE_CUDA static void load_subblock_from_buffer_cuda(void *_block, void *_boundary, unsigned firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; cudaMemcpyAsync(&block_data[offset], boundary_data, boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); } /* * cl_update (CUDA version) */ static void update_func_cuda(void *descr[], void *arg) { struct block_description *block = arg; int workerid = starpu_worker_get_id_check(); DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if (block->bz == 0) FPRINTF(stderr,"!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); else DEBUG("!!! DO update_func_cuda z %u CUDA%d !!!\n", block->bz, workerid); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG("!!! RANK %d !!!\n", rank); #endif DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); unsigned block_size_z = get_block_size(block->bz); unsigned i; update_per_worker[workerid]++; record_who_runs_what(block); /* * Load neighbours' boundaries : TOP */ /* The offset along the z axis is (block_size_z + K) */ load_subblock_from_buffer_cuda(descr[0], descr[2], block_size_z+K); load_subblock_from_buffer_cuda(descr[1], descr[3], block_size_z+K); /* * Load neighbours' boundaries : BOTTOM */ load_subblock_from_buffer_cuda(descr[0], descr[4], 0); load_subblock_from_buffer_cuda(descr[1], descr[5], 0); /* * Stencils ... do the actual work here :) TODO */ for (i=1; i<=K; i++) { struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2]; TYPE *old = (void*) oldb->ptr, *newer = (void*) newb->ptr; /* Shadow data */ cuda_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); /* And perform actual computation */ #ifdef LIFE cuda_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); #else cudaMemcpyAsync(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); #endif /* LIFE */ } } #endif /* STARPU_USE_CUDA */ /* * Load a neighbour's boundary into block, OpenCL version */ #ifdef STARPU_USE_OPENCL static void load_subblock_from_buffer_opencl(struct starpu_block_interface *block, struct starpu_block_interface *boundary, unsigned firstz) { check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; cl_mem block_data = (cl_mem)block->dev_handle; cl_mem boundary_data = (cl_mem)boundary->dev_handle; cl_command_queue cq; starpu_opencl_get_current_queue(&cq); cl_int ret = clEnqueueCopyBuffer(cq, boundary_data, block_data, 0, offset, boundary_size, 0, NULL, NULL); if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); } /* * cl_update (OpenCL version) */ static void update_func_opencl(void *descr[], void *arg) { struct block_description *block = arg; int workerid = starpu_worker_get_id_check(); DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if (block->bz == 0) FPRINTF(stderr,"!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); else DEBUG("!!! DO update_func_opencl z %u OPENCL%d !!!\n", block->bz, workerid); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG("!!! RANK %d !!!\n", rank); #endif DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); unsigned block_size_z = get_block_size(block->bz); unsigned i; update_per_worker[workerid]++; record_who_runs_what(block); cl_command_queue cq; starpu_opencl_get_current_queue(&cq); /* * Load neighbours' boundaries : TOP */ /* The offset along the z axis is (block_size_z + K) */ load_subblock_from_buffer_opencl(descr[0], descr[2], block_size_z+K); load_subblock_from_buffer_opencl(descr[1], descr[3], block_size_z+K); /* * Load neighbours' boundaries : BOTTOM */ load_subblock_from_buffer_opencl(descr[0], descr[4], 0); load_subblock_from_buffer_opencl(descr[1], descr[5], 0); /* * Stencils ... do the actual work here :) TODO */ for (i=1; i<=K; i++) { struct starpu_block_interface *oldb = descr[i%2], *newb = descr[(i+1)%2]; TYPE *old = (void*) oldb->dev_handle, *newer = (void*) newb->dev_handle; /* Shadow data */ opencl_shadow_host(block->bz, old, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); /* And perform actual computation */ #ifdef LIFE opencl_life_update_host(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); #else cl_event event; cl_int ret = clEnqueueCopyBuffer(cq, old, newer, 0, 0, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer), 0, NULL, &event); if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); #endif /* LIFE */ } } #endif /* STARPU_USE_OPENCL */ /* * cl_update (CPU version) */ void update_func_cpu(void *descr[], void *arg) { struct block_description *block = (struct block_description *) arg; int workerid = starpu_worker_get_id_check(); DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); if (block->bz == 0) FPRINTF(stderr,"!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); else DEBUG("!!! DO update_func_cpu z %u worker%d !!!\n", block->bz, workerid); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG("!!! RANK %d !!!\n", rank); #endif DEBUG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); unsigned block_size_z = get_block_size(block->bz); unsigned i; update_per_worker[workerid]++; record_who_runs_what(block); /* * Load neighbours' boundaries : TOP */ /* The offset along the z axis is (block_size_z + K) */ load_subblock_from_buffer_cpu(descr[0], descr[2], block_size_z+K); load_subblock_from_buffer_cpu(descr[1], descr[3], block_size_z+K); /* * Load neighbours' boundaries : BOTTOM */ load_subblock_from_buffer_cpu(descr[0], descr[4], 0); load_subblock_from_buffer_cpu(descr[1], descr[5], 0); /* * Stencils ... do the actual work here :) TODO */ for (i=1; i<=K; i++) { struct starpu_block_interface *oldb = (struct starpu_block_interface *) descr[i%2], *newb = (struct starpu_block_interface *) descr[(i+1)%2]; TYPE *old = (TYPE*) oldb->ptr, *newer = (TYPE*) newb->ptr; /* Shadow data */ unsigned ldy = oldb->ldy, ldz = oldb->ldz; unsigned nx = oldb->nx, ny = oldb->ny, nz = oldb->nz; unsigned x, y, z; unsigned stepx = 1; unsigned stepy = 1; unsigned stepz = 1; unsigned idx = 0; unsigned idy = 0; unsigned idz = 0; TYPE *ptr = old; # include "shadow.h" /* And perform actual computation */ #ifdef LIFE life_update(block->bz, old, newer, oldb->nx, oldb->ny, oldb->nz, oldb->ldy, oldb->ldz, i); #else memcpy(newer, old, oldb->nx * oldb->ny * oldb->nz * sizeof(*newer)); #endif /* LIFE */ } } /* Performance model and codelet structure */ static struct starpu_perfmodel cl_update_model = { .type = STARPU_HISTORY_BASED, .symbol = "cl_update" }; struct starpu_codelet cl_update = { .cpu_funcs = {update_func_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {update_func_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {update_func_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .model = &cl_update_model, .nbuffers = 6, .modes = {STARPU_RW, STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R} }; /* * Save the block internal boundaries to give them to our neighbours. */ /* CPU version */ static void load_subblock_into_buffer_cpu(void *_block, void *_boundary, unsigned firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; memcpy(boundary_data, &block_data[offset], boundary_size); } /* CUDA version */ #ifdef STARPU_USE_CUDA static void load_subblock_into_buffer_cuda(void *_block, void *_boundary, unsigned firstz) { struct starpu_block_interface *block = (struct starpu_block_interface *)_block; struct starpu_block_interface *boundary = (struct starpu_block_interface *)_boundary; check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; TYPE *block_data = (TYPE *)block->ptr; TYPE *boundary_data = (TYPE *)boundary->ptr; cudaMemcpyAsync(boundary_data, &block_data[offset], boundary_size, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); } #endif /* STARPU_USE_CUDA */ /* OPENCL version */ #ifdef STARPU_USE_OPENCL static void load_subblock_into_buffer_opencl(struct starpu_block_interface *block, struct starpu_block_interface *boundary, unsigned firstz) { check_load(block, boundary); /* We do a contiguous memory transfer */ size_t boundary_size = K*block->ldz*block->elemsize; unsigned offset = firstz*block->ldz; cl_mem block_data = (cl_mem)block->dev_handle; cl_mem boundary_data = (cl_mem)boundary->dev_handle; cl_command_queue cq; starpu_opencl_get_current_queue(&cq); cl_int ret = clEnqueueCopyBuffer(cq, block_data, boundary_data, offset, 0, boundary_size, 0, NULL, NULL); if (ret != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(ret); } #endif /* STARPU_USE_OPENCL */ /* Record how many top/bottom saves each worker performed */ unsigned top_per_worker[STARPU_NMAXWORKERS]; unsigned bottom_per_worker[STARPU_NMAXWORKERS]; /* top save, CPU version */ void dummy_func_top_cpu(void *descr[], void *arg) { struct block_description *block = (struct block_description *) arg; int workerid = starpu_worker_get_id_check(); top_per_worker[workerid]++; DEBUG("DO SAVE Bottom block %d\n", block->bz); /* The offset along the z axis is (block_size_z + K)- K */ unsigned block_size_z = get_block_size(block->bz); load_subblock_into_buffer_cpu(descr[0], descr[2], block_size_z); load_subblock_into_buffer_cpu(descr[1], descr[3], block_size_z); } /* bottom save, CPU version */ void dummy_func_bottom_cpu(void *descr[], void *arg) { struct block_description *block = (struct block_description *) arg; (void) block; int workerid = starpu_worker_get_id_check(); bottom_per_worker[workerid]++; DEBUG("DO SAVE Top block %d\n", block->bz); load_subblock_into_buffer_cpu(descr[0], descr[2], K); load_subblock_into_buffer_cpu(descr[1], descr[3], K); } /* top save, CUDA version */ #ifdef STARPU_USE_CUDA static void dummy_func_top_cuda(void *descr[], void *arg) { struct block_description *block = (struct block_description *) arg; int workerid = starpu_worker_get_id_check(); top_per_worker[workerid]++; DEBUG("DO SAVE Top block %d\n", block->bz); /* The offset along the z axis is (block_size_z + K)- K */ unsigned block_size_z = get_block_size(block->bz); load_subblock_into_buffer_cuda(descr[0], descr[2], block_size_z); load_subblock_into_buffer_cuda(descr[1], descr[3], block_size_z); } /* bottom save, CUDA version */ static void dummy_func_bottom_cuda(void *descr[], void *arg) { struct block_description *block = (struct block_description *) arg; (void) block; int workerid = starpu_worker_get_id_check(); bottom_per_worker[workerid]++; DEBUG("DO SAVE Bottom block %d on CUDA\n", block->bz); load_subblock_into_buffer_cuda(descr[0], descr[2], K); load_subblock_into_buffer_cuda(descr[1], descr[3], K); } #endif /* STARPU_USE_CUDA */ /* top save, OpenCL version */ #ifdef STARPU_USE_OPENCL static void dummy_func_top_opencl(void *descr[], void *arg) { struct block_description *block = (struct block_description *) arg; (void) block; int workerid = starpu_worker_get_id_check(); top_per_worker[workerid]++; DEBUG("DO SAVE Top block %d\n", block->bz); /* The offset along the z axis is (block_size_z + K)- K */ unsigned block_size_z = get_block_size(block->bz); load_subblock_into_buffer_opencl(descr[0], descr[2], block_size_z); load_subblock_into_buffer_opencl(descr[1], descr[3], block_size_z); } /* bottom save, OPENCL version */ static void dummy_func_bottom_opencl(void *descr[], void *arg) { struct block_description *block = (struct block_description *) arg; (void) block; int workerid = starpu_worker_get_id_check(); bottom_per_worker[workerid]++; DEBUG("DO SAVE Bottom block %d on OPENCL\n", block->bz); load_subblock_into_buffer_opencl(descr[0], descr[2], K); load_subblock_into_buffer_opencl(descr[1], descr[3], K); } #endif /* STARPU_USE_OPENCL */ /* Performance models and codelet for save */ static struct starpu_perfmodel save_cl_bottom_model = { .type = STARPU_HISTORY_BASED, .symbol = "save_cl_bottom" }; static struct starpu_perfmodel save_cl_top_model = { .type = STARPU_HISTORY_BASED, .symbol = "save_cl_top" }; struct starpu_codelet save_cl_bottom = { .cpu_funcs = {dummy_func_bottom_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dummy_func_bottom_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {dummy_func_bottom_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .model = &save_cl_bottom_model, .nbuffers = 4, .modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W} }; struct starpu_codelet save_cl_top = { .cpu_funcs = {dummy_func_top_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dummy_func_top_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {dummy_func_top_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .model = &save_cl_top_model, .nbuffers = 4, .modes = {STARPU_R, STARPU_R, STARPU_W, STARPU_W} }; starpu-1.4.9+dfsg/examples/stencil/stencil-tasks.c000066400000000000000000000236011507764646700222210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "stencil.h" #define BIND_LAST 1 /* * Schedule tasks for updates and saves */ /* * NB: iter = 0: initialization phase, TAG_U(z, 0) = TAG_INIT * * dir is -1 or +1. */ #if 0 # define DEBUG(fmt, ...) fprintf(stderr,fmt,##__VA_ARGS__) #else # define DEBUG(fmt, ...) #endif /* * SAVE */ /* R(z) = R(z+d) = local, just call the save kernel */ static void create_task_save_local(unsigned iter, unsigned z, int dir) { struct starpu_task *save_task = starpu_task_create(); struct block_description *descr = get_block_description(z); save_task->cl = (dir == -1)?&save_cl_bottom:&save_cl_top; save_task->cl_arg = descr; /* Saving our border... */ save_task->handles[0] = descr->layers_handle[0]; save_task->handles[1] = descr->layers_handle[1]; /* ... to the neighbour's copy */ struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2]; save_task->handles[2] = neighbour->boundaries_handle[(1-dir)/2][0]; save_task->handles[3] = neighbour->boundaries_handle[(1-dir)/2][1]; /* Bind */ if (iter <= BIND_LAST) save_task->execute_on_a_specific_worker = get_bind_tasks(); save_task->workerid = descr->preferred_worker; int ret = starpu_task_submit(save_task); if (ret) { FPRINTF(stderr, "Could not submit task save: %d\n", ret); if (ret == -ENODEV) exit(77); STARPU_ABORT(); } } /* R(z) = local & R(z+d) != local */ /* We need to send our save over MPI */ static void send_done(void *arg) { uintptr_t z = (uintptr_t) arg; (void) z; DEBUG("DO SEND %d\n", (int)z); } #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) /* Post MPI send */ static void create_task_save_mpi_send(unsigned iter, unsigned z, int dir, int local_rank) { struct block_description *descr = get_block_description(z); STARPU_ASSERT(descr->mpi_node == local_rank); struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2]; int dest = neighbour->mpi_node; STARPU_ASSERT(neighbour->mpi_node != local_rank); /* Send neighbour's border copy to the neighbour */ starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0]; starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1]; int ret; ret = starpu_mpi_isend_detached(handle0, dest, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); ret = starpu_mpi_isend_detached(handle1, dest, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, send_done, (void*)(uintptr_t)z); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } /* R(z) != local & R(z+d) = local */ /* We need to receive over MPI */ static void recv_done(void *arg) { uintptr_t z = (uintptr_t) arg; (void) z; DEBUG("DO RECV %d\n", (int)z); } /* Post MPI recv */ static void create_task_save_mpi_recv(unsigned iter, unsigned z, int dir, int local_rank) { struct block_description *descr = get_block_description(z); STARPU_ASSERT(descr->mpi_node != local_rank); struct block_description *neighbour = descr->boundary_blocks[(1+dir)/2]; int source = descr->mpi_node; STARPU_ASSERT(neighbour->mpi_node == local_rank); /* Receive our neighbour's border in our neighbour copy */ starpu_data_handle_t handle0 = neighbour->boundaries_handle[(1-dir)/2][0]; starpu_data_handle_t handle1 = neighbour->boundaries_handle[(1-dir)/2][1]; int ret; ret = starpu_mpi_irecv_detached(handle0, source, MPI_TAG0(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); ret = starpu_mpi_irecv_detached(handle1, source, MPI_TAG1(z, iter, dir), MPI_COMM_WORLD, recv_done, (void*)(uintptr_t)z); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); } #endif /* STARPU_USE_MPI */ /* * Schedule saving boundaries of blocks to communication buffers */ void create_task_save(unsigned iter, unsigned z, int dir, int local_rank) { int node_z = get_block_mpi_node(z); int node_z_and_d = get_block_mpi_node(z+dir); #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) if (node_z == local_rank) { /* Save data from update */ create_task_save_local(iter, z, dir); if (node_z_and_d != local_rank) { /* R(z) = local & R(z+d) != local, We have to send the data */ create_task_save_mpi_send(iter, z, dir, local_rank); } } else { /* node_z != local_rank, this MPI node doesn't have the saved data */ if (node_z_and_d == local_rank) { create_task_save_mpi_recv(iter, z, dir, local_rank); } else { /* R(z) != local & R(z+d) != local We don't have the saved data and don't need it, we shouldn't even have been called! */ STARPU_ABORT(); } } #else /* !STARPU_USE_MPI */ STARPU_ASSERT((node_z == local_rank) && (node_z_and_d == local_rank)); create_task_save_local(iter, z, dir); #endif /* STARPU_USE_MPI */ } /* * Schedule update computation in computation buffer */ void create_task_update(unsigned iter, unsigned z, int local_rank) { (void)local_rank; // unneeded parameter, we keep it to have a similar function prototype to the implicit case STARPU_ASSERT(iter != 0); struct starpu_task *task = starpu_task_create(); unsigned niter = get_niter(); /* We are going to synchronize with the last tasks */ if (iter == niter) { task->use_tag = 1; task->tag_id = TAG_FINISH(z); } unsigned old_layer = (K*(iter-1)) % 2; unsigned new_layer = (old_layer + 1) % 2; struct block_description *descr = get_block_description(z); task->handles[0] = descr->layers_handle[new_layer]; task->handles[1] = descr->layers_handle[old_layer]; task->handles[2] = descr->boundaries_handle[T][new_layer]; task->handles[3] = descr->boundaries_handle[T][old_layer]; task->handles[4] = descr->boundaries_handle[B][new_layer]; task->handles[5] = descr->boundaries_handle[B][old_layer]; task->cl = &cl_update; task->cl_arg = descr; if (iter <= BIND_LAST) task->execute_on_a_specific_worker = get_bind_tasks(); task->workerid = descr->preferred_worker; int ret = starpu_task_submit(task); if (ret) { FPRINTF(stderr, "Could not submit task update block: %d\n", ret); if (ret == -ENODEV) exit(77); STARPU_ABORT(); } } /* Dummy empty codelet taking one buffer */ void null_func(void *descr[], void *arg) { (void)descr; (void)arg; } static double null_cost_function(struct starpu_task *task, unsigned nimpl) { (void) task; (void) nimpl; return 0.000001; } static struct starpu_perfmodel null_model = { .type = STARPU_COMMON, .cost_function = null_cost_function, .symbol = "null" }; static struct starpu_codelet null = { .modes = { STARPU_W, STARPU_W }, .cpu_funcs = {null_func}, .cpu_funcs_name = {"null_func"}, .cuda_funcs = {null_func}, .opencl_funcs = {null_func}, .nbuffers = 2, .model = &null_model, .name = "start" }; void create_start_task(int z, int dir) { /* Dumb task depending on the init task and simulating writing the neighbour buffers, to avoid communications and computation running before we start measuring time */ struct starpu_task *wait_init = starpu_task_create(); struct block_description *descr = get_block_description(z); starpu_tag_t tag_init = TAG_INIT_TASK; wait_init->cl = &null; wait_init->use_tag = 1; wait_init->tag_id = TAG_START(z, dir); wait_init->handles[0] = descr->boundaries_handle[(1 + dir) / 2][0]; wait_init->handles[1] = descr->boundaries_handle[(1 + dir) / 2][1]; starpu_tag_declare_deps_array(wait_init->tag_id, 1, &tag_init); int ret = starpu_task_submit(wait_init); if (ret) { FPRINTF(stderr, "Could not submit task initial wait: %d\n", ret); if (ret == -ENODEV) exit(77); STARPU_ABORT(); } } /* * Create all the tasks */ void create_tasks(int rank) { int iter; int bz; int niter = get_niter(); int nbz = get_nbz(); for (bz = 0; bz < nbz; bz++) { if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank)) create_start_task(bz, +1); if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank)) create_start_task(bz, -1); } for (iter = 0; iter <= niter; iter++) { starpu_iteration_push(iter); for (bz = 0; bz < nbz; bz++) { if ((iter > 0) && (get_block_mpi_node(bz) == rank)) create_task_update(iter, bz, rank); } for (bz = 0; bz < nbz; bz++) { if (iter != niter) { if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz+1) == rank)) create_task_save(iter, bz, +1, rank); if ((get_block_mpi_node(bz) == rank) || (get_block_mpi_node(bz-1) == rank)) create_task_save(iter, bz, -1, rank); } } starpu_iteration_pop(); } } /* * Wait for termination */ void wait_end_tasks(int rank) { int bz; int nbz = get_nbz(); for (bz = 0; bz < nbz; bz++) { if (get_block_mpi_node(bz) == rank) { /* Wait for the task producing block "bz" */ starpu_tag_wait(TAG_FINISH(bz)); /* Get the result back to memory */ struct block_description *block = get_block_description(bz); starpu_data_acquire(block->layers_handle[0], STARPU_R); starpu_data_acquire(block->layers_handle[1], STARPU_R); /* the data_acquire here is done to make sure * the data is sent back to the ram memory, we * can safely do a data_release, to avoid the * data_unregister to block later on */ starpu_data_release(block->layers_handle[0]); starpu_data_release(block->layers_handle[1]); } } } starpu-1.4.9+dfsg/examples/stencil/stencil.c000066400000000000000000000231121507764646700210730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "stencil.h" #ifdef STARPU_HAVE_VALGRIND_H #include #endif /* Main application */ /* default parameter values */ static unsigned bind_tasks = 0; static unsigned ticks = 1000; #ifdef STARPU_QUICK_CHECK static unsigned niter = 4; #define SIZE 16 #define NBZ 8 #else static unsigned niter = 32; #define SIZE 128 #define NBZ 64 #endif /* Problem size */ static unsigned sizex = SIZE; static unsigned sizey = SIZE; static unsigned sizez = NBZ*SIZE; /* Number of blocks (scattered over the different MPI processes) */ unsigned nbz = NBZ; /* * Initialization */ unsigned get_bind_tasks(void) { return bind_tasks; } unsigned get_nbz(void) { return nbz; } unsigned get_niter(void) { return niter; } unsigned get_ticks(void) { return ticks; } static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-b") == 0) { bind_tasks = 1; } if (strcmp(argv[i], "-nbz") == 0) { nbz = atoi(argv[++i]); } if (strcmp(argv[i], "-sizex") == 0) { sizex = atoi(argv[++i]); } if (strcmp(argv[i], "-sizey") == 0) { sizey = atoi(argv[++i]); } if (strcmp(argv[i], "-sizez") == 0) { sizez = atoi(argv[++i]); } if (strcmp(argv[i], "-niter") == 0) { niter = atoi(argv[++i]); } if (strcmp(argv[i], "-ticks") == 0) { ticks = atoi(argv[++i]); } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { fprintf(stderr, "Usage : %s [options...]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, "-b bind tasks on CPUs/GPUs\n"); fprintf(stderr, "-nbz Number of blocks on Z axis (%u by default)\n", nbz); fprintf(stderr, "-size[xyz] Domain size on x/y/z axis (%ux%ux%u by default)\n", sizex, sizey, sizez); fprintf(stderr, "-niter Number of iterations (%u by default)\n", niter); fprintf(stderr, "-ticks How often to put ticks in the output (ms, %u by default)\n", ticks); exit(0); } } #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) { sizex = sizey = 3; nbz = 10; sizez = nbz*3; } #endif } static void init_problem(int argc, char **argv, int rank, int world_size) { parse_args(argc, argv); create_blocks_array(sizex, sizey, sizez, nbz); /* Select the MPI process which should compute the different blocks */ assign_blocks_to_mpi_nodes(world_size); assign_blocks_to_workers(rank); /* Allocate the different memory blocks, if used by the MPI process */ allocate_memory_on_node(rank); display_memory_consumption(rank); who_runs_what_len = 2*niter; who_runs_what = (int *) calloc(nbz * who_runs_what_len, sizeof(*who_runs_what)); who_runs_what_index = (int *) calloc(nbz, sizeof(*who_runs_what_index)); last_tick = (double *) calloc(nbz, sizeof(*last_tick)); } static void free_problem(int rank) { free_memory_on_node(rank); free_blocks_array(); free(who_runs_what); free(who_runs_what_index); free(last_tick); } /* * Main body */ double start; double begin, end; double timing; void func(unsigned task_per_worker[STARPU_NMAXWORKERS]) { unsigned total = 0; int worker; for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) total += task_per_worker[worker]; for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) { if (task_per_worker[worker]) { char name[64]; starpu_worker_get_name(worker, name, sizeof(name)); FPRINTF(stderr,"\t%s -> %u (%2.2f%%)\n", name, task_per_worker[worker], (100.0*task_per_worker[worker])/total); } } } unsigned global_workerid(unsigned local_workerid) { #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); unsigned workers_per_node = starpu_worker_get_count(); return (local_workerid + rank*workers_per_node); #else return local_workerid; #endif } int main(int argc, char **argv) { int rank; int world_size; int ret; #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int thread_support; if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support)) { FPRINTF(stderr, "MPI_Init_thread failed\n"); } if (thread_support == MPI_THREAD_FUNNELED) FPRINTF(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); if (thread_support < MPI_THREAD_FUNNELED) FPRINTF(stderr,"Warning: MPI does not have thread support!\n"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); #else rank = 0; world_size = 1; #endif if (rank == 0) { FPRINTF(stderr, "Running on %d nodes\n", world_size); fflush(stderr); } struct starpu_conf conf; starpu_conf_init(&conf); /*nbz is a global variable, this example doesn't support Master-Slave*/ conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #endif #ifdef STARPU_USE_OPENCL opencl_life_init(); opencl_shadow_init(); #endif /*STARPU_USE_OPENCL*/ init_problem(argc, argv, rank, world_size); create_tasks(rank); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); #endif if (rank == 0) FPRINTF(stderr, "GO !\n"); start = starpu_timing_now(); begin = starpu_timing_now(); starpu_tag_notify_from_apps(TAG_INIT_TASK); wait_end_tasks(rank); end = starpu_timing_now(); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); #endif #if 0 check(rank); #endif /*display_debug(nbz, niter, rank);*/ #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) starpu_mpi_shutdown(); #endif /* timing in us */ timing = end - begin; double min_timing = timing; double max_timing = timing; double sum_timing = timing; #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) int reduce_ret; reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); /* XXX we should do a gather instead, here we assume that non initialized values are still 0 */ int *who_runs_what_tmp = malloc(nbz * who_runs_what_len * sizeof(*who_runs_what)); reduce_ret = MPI_Reduce(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); memcpy(who_runs_what, who_runs_what_tmp, nbz * who_runs_what_len * sizeof(*who_runs_what)); free(who_runs_what_tmp); /* XXX we should do a gather instead, here we assume that non initialized values are still 0 */ int *who_runs_what_index_tmp = malloc(nbz * sizeof(*who_runs_what_index)); reduce_ret = MPI_Reduce(who_runs_what_index, who_runs_what_index_tmp, nbz, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); memcpy(who_runs_what_index, who_runs_what_index_tmp, nbz * sizeof(*who_runs_what_index)); free(who_runs_what_index_tmp); #endif if (rank == 0) { #if 1 FPRINTF(stderr, "update:\n"); func(update_per_worker); FPRINTF(stderr, "top:\n"); func(top_per_worker); FPRINTF(stderr, "bottom:\n"); func(bottom_per_worker); #endif #if 1 unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size; int iter; for (iter = 0; iter < who_runs_what_len; iter++) { starpu_iteration_push(iter); unsigned last, bz; last = 1; for (bz = 0; bz < nbz; bz++) { if ((bz % nzblocks_per_process) == 0) FPRINTF(stderr, "| "); if (who_runs_what_index[bz] <= iter) FPRINTF(stderr,"_ "); else { last = 0; if (who_runs_what[bz + iter * nbz] == -1) FPRINTF(stderr,"* "); else FPRINTF(stderr, "%d ", who_runs_what[bz + iter * nbz]); } } FPRINTF(stderr, "\n"); starpu_iteration_pop(); if (last) break; } #endif fflush(stderr); FPRINTF(stdout, "Computation took: %f ms on %d MPI processes\n", max_timing/1000, world_size); FPRINTF(stdout, "\tMIN : %f ms\n", min_timing/1000); FPRINTF(stdout, "\tMAX : %f ms\n", max_timing/1000); FPRINTF(stdout, "\tAVG : %f ms\n", sum_timing/(world_size*1000)); } free_problem(rank); starpu_shutdown(); #if defined(STARPU_USE_MPI) && !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MPI_MASTER_SLAVE) MPI_Finalize(); #endif #ifdef STARPU_USE_OPENCL opencl_life_free(); opencl_shadow_free(); #endif /*STARPU_USE_OPENCL*/ return 0; } starpu-1.4.9+dfsg/examples/stencil/stencil.h000066400000000000000000000105461507764646700211070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STENCIL_H__ #define __STENCIL_H__ #include #include #include #ifndef __CUDACC__ #if defined(STARPU_USE_MPI) && !defined(STARPU_USE_MPI_MASTER_SLAVE) #include #include #endif #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define LIFE #ifdef LIFE #define TYPE unsigned char extern void life_update(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); #else #define TYPE float #endif #define K 1 #define NDIRS 2 /* Split only on the z axis to make things simple */ typedef enum { B = 0, T = 1 } direction; /* Description of a domain block */ struct block_description { /* Which MPI node should process that block ? */ int mpi_node; unsigned preferred_worker; unsigned bz; /* For each of the following buffers, there are two (0/1) buffers to * make new/old switch costless. */ /* This is the computation buffer for this block, it includes * neighbours' border to make computation easier */ TYPE *layers[2]; starpu_data_handle_t layers_handle[2]; /* This is the "save" buffer, i.e. a copy of our neighbour's border. * This one is used for CPU/GPU or MPI communication (rather than the * whole domain block) */ TYPE *boundaries[NDIRS][2]; starpu_data_handle_t boundaries_handle[NDIRS][2]; /* Shortcut pointer to the neighbours */ struct block_description *boundary_blocks[NDIRS]; }; #define TAG_INIT_TASK ((starpu_tag_t)1) starpu_tag_t TAG_FINISH(int z); starpu_tag_t TAG_START(int z, int dir); int MPI_TAG0(int z, int iter, int dir); int MPI_TAG1(int z, int iter, int dir); #define MIN(a,b) ((a)<(b)?(a):(b)) void create_blocks_array(unsigned sizex, unsigned sizey, unsigned sizez, unsigned nbz); void free_blocks_array(); struct block_description *get_block_description(int z); void assign_blocks_to_mpi_nodes(int world_size); void allocate_memory_on_node(int rank); void assign_blocks_to_workers(int rank); void create_tasks(int rank); void wait_end_tasks(int rank); void check(int rank); void free_memory_on_node(int rank); void display_memory_consumption(int rank); int get_block_mpi_node(int z); unsigned get_block_size(int z); unsigned get_bind_tasks(void); unsigned get_nbz(void); unsigned get_niter(void); unsigned get_ticks(void); unsigned global_workerid(unsigned local_workerid); void create_task_update(unsigned iter, unsigned z, int local_rank); void create_task_save(unsigned iter, unsigned z, int dir, int local_rank); extern int starpu_mpi_initialize(void); extern int starpu_mpi_shutdown(void); /* kernels */ extern struct starpu_codelet cl_update; extern struct starpu_codelet save_cl_bottom; extern struct starpu_codelet save_cl_top; extern unsigned update_per_worker[STARPU_NMAXWORKERS]; extern unsigned top_per_worker[STARPU_NMAXWORKERS]; extern unsigned bottom_per_worker[STARPU_NMAXWORKERS]; extern double start; extern int who_runs_what_len; extern int *who_runs_what; extern int *who_runs_what_index; extern double *last_tick; #ifndef _externC #define _externC #endif _externC void cuda_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); _externC void cuda_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i); _externC void opencl_shadow_init(void); _externC void opencl_shadow_free(void); _externC void opencl_shadow_host(int bz, TYPE *ptr, int nx, int ny, int nz, int ldy, int ldz, int i); _externC void opencl_life_init(void); _externC void opencl_life_free(void); _externC void opencl_life_update_host(int bz, const TYPE *old, TYPE *newp, int nx, int ny, int nz, int ldy, int ldz, int iter); #endif /* __STENCIL_H__ */ starpu-1.4.9+dfsg/examples/subgraphs/000077500000000000000000000000001507764646700176245ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/subgraphs/codelets.c000066400000000000000000000045751507764646700216050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void matrix_fill(void *buffers[], void *cl_arg) { unsigned i, j; (void)cl_arg; /* length of the matrix */ unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); for(j=0; j #define NX 6 #define NY 6 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern struct starpu_codelet cl_fill; extern struct starpu_codelet cl_check_scale; void empty(void *buffers[], void *cl_arg) { /* This doesn't need to do anything, it's simply used to make coherency * between the two views, by simply running on the home node of the * data, thus getting back all data pieces there. */ (void)buffers; (void)cl_arg; /* This check is just for testsuite */ int node = starpu_task_get_current_data_node(0); unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(starpu_task_get_current()); STARPU_ASSERT(node >= 0); for (i = 1; i < nbuffers; i++) STARPU_ASSERT(starpu_task_get_current_data_node(i) == node); } struct starpu_codelet cl_switch = { .cpu_funcs = {empty}, .nbuffers = STARPU_VARIABLE_NBUFFERS, .name = "switch", }; int do_starpu_init() { int ret, i; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* force to execute task on the home_node, here it is STARPU_MAIN_RAM */ cl_switch.specific_nodes = 1; for(i = 0; i < STARPU_NMAXBUFS; i++) cl_switch.nodes[i] = STARPU_MAIN_RAM; return 0; } void do_init_sub_data(int matrix[NX][NY], starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int x, int y, int nx, int ny, int ld) { int i; for (i = 0; i < PARTS; i++) { starpu_matrix_data_register(&sub_handle[i], STARPU_MAIN_RAM, (uintptr_t)&matrix[i*x][i*y], nx, ny, ld, sizeof(matrix[0][0])); /* But make it invalid for now, we'll access data through the whole matrix first */ starpu_data_invalidate(sub_handle[i]); } } int do_apply_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int factor, int start) { int i, ret; /* Now switch to vertical view of the matrix */ struct starpu_data_descr descr[PARTS]; for (i = 0; i < PARTS; i++) { descr[i].handle = sub_handle[i]; descr[i].mode = STARPU_W; } ret = starpu_task_insert(&cl_switch, STARPU_RW, handle, STARPU_DATA_MODE_ARRAY, descr, PARTS, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* And make sure we don't accidentally access the matrix through the whole-matrix handle */ starpu_data_invalidate_submit(handle); /* Check the values of the vertical slices */ for (i = 0; i < PARTS; i++) { int xstart = i*start; ret = starpu_task_insert(&cl_check_scale, STARPU_RW, sub_handle[i], STARPU_VALUE, &xstart, sizeof(xstart), STARPU_VALUE, &factor, sizeof(factor), 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } return 0; } int do_clean_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS]) { int i, ret; struct starpu_data_descr descr[PARTS]; /* Now switch back to total view of the matrix */ for (i = 0; i < PARTS; i++) { descr[i].handle = sub_handle[i]; descr[i].mode = STARPU_RW; } ret = starpu_task_insert(&cl_switch, STARPU_DATA_MODE_ARRAY, descr, PARTS, STARPU_W, handle, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* And make sure we don't accidentally access the matrix through the sub slices */ for (i = 0; i < PARTS; i++) starpu_data_invalidate_submit(sub_handle[i]); return 0; } void do_clean_sub_data(starpu_data_handle_t sub_handle[PARTS]) { int i; for (i = 0; i < PARTS; i++) { starpu_data_unregister(sub_handle[i]); } } #include "main.h" starpu-1.4.9+dfsg/examples/subgraphs/partition.c000066400000000000000000000046661507764646700220150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 6 #define NY 6 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern struct starpu_codelet cl_fill; extern struct starpu_codelet cl_check_scale; int do_starpu_init() { int ret, i; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); return 0; } void do_init_sub_data(int matrix[NX][NY], starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int x, int y, int nx, int ny, int ld) { // nothing to do } int do_apply_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int factor, int start) { int i, ret; struct starpu_data_filter f = { .filter_func = filter_func, .nchildren = PARTS }; starpu_data_partition(handle, &f); /* Check the values of the slices */ for (i = 0; i < PARTS; i++) { int xstart = i*start; ret = starpu_task_insert(&cl_check_scale, STARPU_RW, starpu_data_get_sub_data(handle, 1, i), STARPU_VALUE, &xstart, sizeof(xstart), STARPU_VALUE, &factor, sizeof(factor), 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unpartition(handle, STARPU_MAIN_RAM); return 0; } int do_clean_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS]) { // nothing to do return 0; } void do_clean_sub_data(starpu_data_handle_t sub_handle[PARTS]) { // nothing to do } #include "main.h" starpu-1.4.9+dfsg/examples/subgraphs/plan.c000066400000000000000000000046171507764646700207320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define NX 6 #define NY 6 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) extern struct starpu_codelet cl_fill; extern struct starpu_codelet cl_check_scale; int do_starpu_init() { int ret, i; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); return 0; } void do_init_sub_data(int matrix[NX][NY], starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int x, int y, int nx, int ny, int ld) { struct starpu_data_filter f = { .filter_func = filter_func, .nchildren = PARTS }; starpu_data_partition_plan(handle, &f, sub_handle); } int do_apply_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS], void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts), int factor, int start) { int i, ret; /* Check the values of the slices */ for (i = 0; i < PARTS; i++) { int xstart = i*start; ret = starpu_task_insert(&cl_check_scale, STARPU_RW, sub_handle[i], STARPU_VALUE, &xstart, sizeof(xstart), STARPU_VALUE, &factor, sizeof(factor), 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } return 0; } int do_clean_sub_graph(starpu_data_handle_t handle, starpu_data_handle_t sub_handle[PARTS]) { starpu_data_partition_clean(handle, PARTS, sub_handle); return 0; } void do_clean_sub_data(starpu_data_handle_t sub_handle[PARTS]) { // nothing to do } #include "main.h" starpu-1.4.9+dfsg/examples/tag_example/000077500000000000000000000000001507764646700201145ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/tag_example/tag_example.c000066400000000000000000000122521507764646700225500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows how to use tags to define a grid of dependencies, shaped this way: * * ... ... * v v * ... -> task (i, j) --> task (i, j+1) --> ... * v v * ... -> task (i+1,j) --> task (i+1,j+1) --> ... * v v * ... ... */ #include #include #include #include #include #ifdef STARPU_HAVE_HELGRIND_H #include #endif #ifndef ANNOTATE_HAPPENS_BEFORE #define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) #endif #ifndef ANNOTATE_HAPPENS_AFTER #define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) #endif #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define TAG(i, j, iter) ((starpu_tag_t) (((uint64_t)(iter)<<48) | ((uint64_t)(j)<<24) | (i))) #ifdef STARPU_QUICK_CHECK #define Ni 32 #define Nj 32 #define Nk 32 #else #define Ni 64 #define Nj 32 #define Nk 128 #endif static unsigned ni = Ni, nj = Nj, nk = Nk; static unsigned callback_cnt; static unsigned iter = 0; static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-iter") == 0) { char *argptr; nk = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-i") == 0) { char *argptr; ni = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-j") == 0) { char *argptr; nj = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-h") == 0) { printf("usage : %s [-iter iter] [-i i] [-j j]\n", argv[0]); } } } void callback_cpu(void *argcb); static void express_deps(unsigned i, unsigned j, unsigned iter); static void tag_cleanup_grid(unsigned piter) { unsigned i,j; for (j = 0; j < nj; j++) for (i = 0; i < ni; i++) { starpu_tag_remove(TAG(i,j,piter)); } } static int create_task_grid(unsigned piter) { unsigned i, j; int ret; /* FPRINTF(stderr, "start iter %d...\n", piter); */ callback_cnt = (ni*nj); /* create non-entry tasks */ for (j = 0; j < nj; j++) for (i = 1; i < ni; i++) { /* create a new task */ struct starpu_task *task = starpu_task_create(); task->callback_func = callback_cpu; /* jb->argcb = &coords[i][j]; */ task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->use_tag = 1; task->tag_id = TAG(i, j, piter); /* express deps : (i,j) depends on (i-1, j-1) & (i-1, j+1) */ express_deps(i, j, piter); ret = starpu_task_submit(task); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* create entry tasks */ for (j = 0; j < nj; j++) { /* create a new task */ struct starpu_task *task = starpu_task_create(); task->callback_func = callback_cpu; task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->use_tag = 1; /* this is an entry task */ task->tag_id = TAG(0, j, piter); ret = starpu_task_submit(task); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } return 0; } void callback_cpu(void *argcb) { (void)argcb; unsigned newcnt = STARPU_ATOMIC_ADD(&callback_cnt, -1); ANNOTATE_HAPPENS_BEFORE(&callback_cnt); if (newcnt == 0) { ANNOTATE_HAPPENS_AFTER(&callback_cnt); if (++iter < nk) { /* cleanup old grids ... */ if (iter > 2) tag_cleanup_grid(iter-2); /* create a new iteration */ create_task_grid(iter); } } } static void express_deps(unsigned i, unsigned j, unsigned piter) { if (j > 0) { /* (i,j-1) exists */ if (j < nj - 1) { /* (i,j+1) exists */ starpu_tag_declare_deps(TAG(i,j,piter), 2, TAG(i-1,j-1,piter), TAG(i-1,j+1,piter)); } else { /* (i,j+1) does not exist */ starpu_tag_declare_deps(TAG(i,j,piter), 1, TAG(i-1,j-1,piter)); } } else { /* (i, (j-1) does not exist */ if (j < nj - 1) { /* (i,j+1) exists */ starpu_tag_declare_deps(TAG(i,j,piter), 1, TAG(i-1,j+1,piter)); } else { /* (i,j+1) does not exist */ STARPU_ABORT(); } } } int main(int argc, char **argv) { int ret; #ifdef STARPU_HAVE_HELGRIND_H if (RUNNING_ON_VALGRIND) { ni /= 2; nj /= 2; nk /= 2; } #endif ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); parse_args(argc, argv); FPRINTF(stderr, "ITER: %u\n", nk); ret = create_task_grid(0); if (ret == 0) starpu_task_wait_for_all(); tag_cleanup_grid(nk-2); tag_cleanup_grid(nk-1); starpu_shutdown(); FPRINTF(stderr, "TEST DONE ...\n"); return ret; } starpu-1.4.9+dfsg/examples/tag_example/tag_example2.c000066400000000000000000000056171507764646700226410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows how to submit a series of tasks in a chain of dependency: * * ... -> task (i) --> task (i+1) --> ... * * This is repeated several times */ #include #include #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define TAG(i, iter) ((starpu_tag_t) (((uint64_t)iter)<<32 | (i))) #define Ni 64 #define Nk 256 static unsigned ni = Ni, nk = Nk; static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-iter") == 0) { char *argptr; nk = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-i") == 0) { char *argptr; ni = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-h") == 0) { printf("usage : %s [-iter iter] [-i i]\n", argv[0]); } } } void callback_cpu(void *argcb); static void tag_cleanup_grid(unsigned iter) { unsigned i; for (i = 0; i < ni; i++) starpu_tag_remove(TAG(i,iter)); } static int create_task_grid(unsigned iter) { unsigned i; /* FPRINTF(stderr, "start iter %d ni %d...\n", iter, ni); */ for (i = 0; i < ni; i++) { int ret; /* create a new task */ struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->use_tag = 1; task->tag_id = TAG(i, iter); if (i != 0) starpu_tag_declare_deps(TAG(i,iter), 1, TAG(i-1,iter)); ret = starpu_task_submit(task); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } return 0; } int main(int argc , char **argv) { unsigned i; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_QUICK_CHECK ni /= 4; nk /= 16; #endif parse_args(argc, argv); FPRINTF(stderr, "ITER : %u\n", nk); for (i = 0; i < nk; i++) { ret = create_task_grid(i); if (ret == 77) goto enodev; starpu_tag_wait(TAG(ni-1, i)); /* cleanup old grids ... */ if (i > 1) tag_cleanup_grid(i-1); } starpu_task_wait_for_all(); enodev: tag_cleanup_grid(nk-1); starpu_shutdown(); FPRINTF(stderr, "TEST DONE ...\n"); return ret; } starpu-1.4.9+dfsg/examples/tag_example/tag_example3.c000066400000000000000000000057161507764646700226420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows how to submit a series of tasks in a chain of dependency: * * ... -> task (i) --> task (i+1) --> ... * * but here submitted in reverse order. * * This is repeated several times */ #include #include #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define TAG(i, iter) ((starpu_tag_t) (((uint64_t)iter)<<32 | (i))) #define Ni 64 #define Nk 256 static unsigned ni = Ni, nk = Nk; static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-iter") == 0) { char *argptr; nk = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-i") == 0) { char *argptr; ni = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-h") == 0) { printf("usage : %s [-iter iter] [-i i]\n", argv[0]); } } } void callback_cpu(void *argcb); static void tag_cleanup_grid(unsigned iter) { unsigned i; for (i = 0; i < ni; i++) starpu_tag_remove(TAG(i,iter)); } static int create_task_grid(unsigned iter) { int i; /* FPRINTF(stderr, "start iter %d ni %d...\n", iter, ni); */ for (i = ni - 1; i > 0; i--) { int ret; /* create a new task */ struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->use_tag = 1; task->tag_id = TAG(i, iter); if (i != 1) starpu_tag_declare_deps(TAG(i,iter), 1, TAG(i-1,iter)); ret = starpu_task_submit(task); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } return 0; } void cpu_codelet(void *descr[], void *_args) { (void)descr; (void)_args; } int main(int argc, char **argv) { unsigned i; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_QUICK_CHECK ni /= 4; nk /= 16; #endif parse_args(argc, argv); FPRINTF(stderr, "ITER : %u\n", nk); for (i = 0; i < nk; i++) { ret = create_task_grid(i); if (ret == 77) goto enodev; starpu_tag_wait(TAG(ni-1, i)); /* cleanup old grids ... */ if (i > 1) tag_cleanup_grid(i-1); } enodev: starpu_shutdown(); FPRINTF(stderr, "TEST DONE ...\n"); return ret; } starpu-1.4.9+dfsg/examples/tag_example/tag_example4.c000066400000000000000000000067711507764646700226450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows how to make a task depend on either of a series of tasks. * * For each i, we submit i tasks of type A, which fill the i-th variable, and i * tasks of type B, which check that the i-th variable is filled. Thanks to * tag dependency, B tasks are scheduled as soon as one of the corresponding A * task is finished. */ #include #include #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define TAG(i, iter) ((starpu_tag_t) (((uint64_t)i)<<32 | (iter))) void cpu_codelet_A(void *descr[], void *_args) { (void)descr; int *arg = _args; STARPU_ATOMIC_OR(arg, 1); fprintf(stderr,"A"); } void cpu_codelet_B(void *descr[], void *_args) { (void)descr; int *arg = _args; if (*arg != 1) exit(EXIT_FAILURE); fprintf(stderr,"B"); } struct starpu_codelet cl_A = { .cpu_funcs = { cpu_codelet_A}, .cuda_funcs = { cpu_codelet_A}, .opencl_funcs = { cpu_codelet_A}, .nbuffers = 0, .name = "dummyA" }; struct starpu_codelet cl_B = { .cpu_funcs = { cpu_codelet_B}, .cuda_funcs = { cpu_codelet_B}, .opencl_funcs = { cpu_codelet_B}, .nbuffers = 0, .name = "dummyB" }; #define Ni 64 static unsigned ni = Ni; static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-iter") == 0) { char *argptr; ni = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-h") == 0) { printf("usage : %s [-iter iter]\n", argv[0]); } } } int main(int argc, char **argv) { unsigned i, j; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) exit(77); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); parse_args(argc, argv); FPRINTF(stderr, "ITER : %u\n", ni); { int array[ni]; memset(array, 0, sizeof(array)); for (i = 1; i < ni; i++) { for (j = 1; j < i; j++) { struct starpu_task *task_A = starpu_task_create(); task_A->cl = &cl_A; task_A->cl_arg = &array[i]; task_A->use_tag = 1; task_A->tag_id = TAG(0, i); ret = starpu_task_submit(task_A); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (j = 1; j < i; j++) { struct starpu_task *task_B = starpu_task_create(); task_B->cl = &cl_B; task_B->cl_arg = &array[i]; task_B->use_tag = 1; task_B->tag_id = TAG(j, i); starpu_tag_declare_deps(TAG(j, i), 1, TAG(0, i)); ret = starpu_task_submit(task_B); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } starpu_task_wait_for_all(); } for (i = 1; i < ni; i++) { for (j = 0; j < i; j++) starpu_tag_remove(TAG(j, i)); } enodev: starpu_shutdown(); FPRINTF(stderr, "TEST DONE ...\n"); if (ret == -ENODEV) return 77; else return 0; } starpu-1.4.9+dfsg/examples/tag_example/tag_restartable.c000066400000000000000000000071721507764646700234320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example shows how to reuse a tag. * We define a series of dependency chains, shaped this way: * * ... ... * v v * ... task (i, j) task (i, j+1) ... * v v * ... task (i+1,j) task (i+1,j+1) ... * v v * ... ... * * And this grid is used several times, by waiting for the completion of a * chain before starting it over. */ #include #include #include #include #include #include #include #define Nrolls 4 #define SLEEP 1 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define TAG(i, iter) ((starpu_tag_t) (((uint64_t)((iter)%Nrolls))<<32 | (i))) #define Ni 64 #define Nk 256 static unsigned ni = Ni, nk = Nk; struct starpu_task **tasks[Nrolls]; static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-iter") == 0) { char *argptr; nk = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-i") == 0) { char *argptr; ni = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-h") == 0) { printf("usage : %s [-iter iter] [-i i]\n", argv[0]); } } } void callback_cpu(void *argcb); static void create_task_grid(unsigned iter) { unsigned i; FPRINTF(stderr, "init iter %u ni %u...\n", iter, ni); for (i = 0; i < ni; i++) { /* create a new task */ struct starpu_task *task = tasks[iter][i] = starpu_task_create(); task->cl = &starpu_codelet_nop; task->use_tag = 1; task->tag_id = TAG(i, iter); task->detach = 1; task->destroy = 0; if (i != 0) starpu_tag_declare_deps(TAG(i,iter), 1, TAG(i-1,iter)); } } static int start_task_grid(unsigned iter) { unsigned i; /* FPRINTF(stderr, "start grid %d ni %d...\n", iter, ni); */ for (i = 0; i < ni; i++) { int ret; ret = starpu_task_submit(tasks[iter][i]); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } return 0; } int main(int argc, char **argv) { unsigned i, j; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_QUICK_CHECK ni /= 4; nk /= 16; #endif parse_args(argc, argv); FPRINTF(stderr, "ITER : %u\n", nk); for (i = 0; i < Nrolls; i++) { tasks[i] = (struct starpu_task **) malloc(ni * sizeof(*tasks[i])); create_task_grid(i); } for (i = 0; i < nk; i++) { ret = start_task_grid(i % Nrolls); if (ret == 77) goto enodev; if (i+1 >= Nrolls) /* Wait before re-using same tasks & tags */ starpu_tag_wait(TAG(ni-1, i + 1)); } starpu_shutdown(); FPRINTF(stderr, "TEST DONE ...\n"); enodev: for (i = 0; i < Nrolls; i++) { for (j = 0; j < ni; j++) starpu_task_destroy(tasks[i][j]); free(tasks[i]); } return ret; } starpu-1.4.9+dfsg/examples/transactions/000077500000000000000000000000001507764646700203365ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/transactions/trs_inc.c000066400000000000000000000067741507764646700221610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is just a small example which increments two values of a vector several times. */ #include #ifdef STARPU_QUICK_CHECK static unsigned niter = 500; #elif !defined(STARPU_LONG_CHECK) static unsigned niter = 5000; #else static unsigned niter = 50000; #endif #define DO_TRANS_MOD 10 #define DO_START_MOD 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) static int _do_start_transaction(int val) { if ((val / DO_TRANS_MOD) % DO_START_MOD == 0) { return 0; } else { return 1; } } int do_start_transaction(void *descr, void *arg) { int val = (int)(intptr_t)arg; int ret = _do_start_transaction(val); return ret; } void cpu_func(void *descr[], void *_args) { (void)_args; int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); *val += 1; } int main(int argc, char **argv) { int ret = 0; double start; double end; struct starpu_conf conf; starpu_conf_init(&conf); conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (argc == 2) niter = atoi(argv[1]); int value = 0; starpu_data_handle_t handle; starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); struct starpu_codelet cl = { .cpu_funcs = { cpu_func }, .cpu_funcs_name = { "cpu_func" }, .nbuffers = STARPU_VARIABLE_NBUFFERS, .name = "trs_increment" }; start = starpu_timing_now(); struct starpu_transaction *transaction = starpu_transaction_open(do_start_transaction, (void*)(intptr_t)0); if (transaction == NULL) { starpu_cublas_shutdown(); starpu_shutdown(); return 77; /* transaction begin task submit failed with ENODEV */ } int simulated_transaction_status = _do_start_transaction(0); int expected_result = 0; unsigned i; for (i = 0; i < niter; i++) { if (i>0 && (i%DO_TRANS_MOD == 0)) { starpu_transaction_next_epoch(transaction, (void*)(intptr_t)i); simulated_transaction_status = _do_start_transaction(i); } if (simulated_transaction_status) { expected_result ++; } ret = starpu_task_insert(&cl, STARPU_RW, handle, STARPU_TRANSACTION, transaction, 0); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); starpu_data_unregister(handle); goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_transaction_close(transaction); starpu_task_wait_for_all(); starpu_data_unregister(handle); end = starpu_timing_now(); if (value != expected_result) { FPRINTF(stderr, "Incorrect result, value = %d, expected %d\n", value, expected_result); ret = 1; } double timing = end - start; FPRINTF(stderr, "%u,%f,%d\n", niter, timing/1000, value); enodev: starpu_shutdown(); return (ret == -ENODEV ? 77 : ret); } starpu-1.4.9+dfsg/examples/transactions/trs_sgemm.c000066400000000000000000000300311507764646700224770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Erwan Leria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Simple parallel GEMM implementation: partition the output matrix in the two * dimensions, and the input matrices in the corresponding dimension, and * perform the output computations in parallel. */ #define TYPE float #define CUBLAS_GEMM cublasSgemm #define CPU_GEMM STARPU_SGEMM #define CPU_ASUM STARPU_SASUM #define CPU_IAMAX STARPU_ISAMAX #define STARPU_GEMM(name) starpu_sgemm_##name #define str(s) #s #define xstr(s) str(s) #define STARPU_GEMM_STR(name) xstr(STARPU_GEMM(name)) #include #include #include #include #include #include #include static int do_start_trs_mod=2; static int _do_start_transaction(int val) { if (do_start_trs_mod == 0) { return 0; } return ((val+1) % do_start_trs_mod == 0); } int do_start_transaction(void *descr, void *arg) { int val = (int)(intptr_t)arg; int ret = _do_start_transaction(val); return ret; } #ifdef STARPU_USE_CUDA #include #include static const TYPE p1 = 1.0; static const TYPE m1 = -1.0; static const TYPE v0 = 0.0; #endif static unsigned niter = 10; static unsigned nslicesx = 4; static unsigned nslicesy = 4; #if defined(STARPU_QUICK_CHECK) && !defined(STARPU_SIMGRID) static unsigned xdim = 256; static unsigned ydim = 256; static unsigned zdim = 64; #else static unsigned xdim = 960*4; static unsigned ydim = 960*4; static unsigned zdim = 960*4; #endif static unsigned check = 0; static TYPE *A, *B, *C; static starpu_data_handle_t A_handle, B_handle, C_handle; #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); }} while(0) static void check_output(void) { /* compute C = C - AB */ CPU_GEMM("N", "N", ydim, xdim, zdim, (TYPE)-1.0f, A, ydim, B, zdim, (TYPE)1.0f, C, ydim); /* make sure C = 0 */ TYPE err; err = CPU_ASUM(xdim*ydim, C, 1); if (err < xdim*ydim*0.001) { FPRINTF(stderr, "Results are OK\n"); } else { int max; max = CPU_IAMAX(xdim*ydim, C, 1); FPRINTF(stderr, "There were errors ... err = %f\n", err); FPRINTF(stderr, "Max error : %e\n", C[max]); } } static void init_problem_data(void) { #ifndef STARPU_SIMGRID unsigned i,j; #endif starpu_malloc_flags((void **)&A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); #ifndef STARPU_SIMGRID /* fill the A and B matrices */ for (j=0; j < ydim; j++) { for (i=0; i < zdim; i++) { A[j+i*ydim] = (TYPE)(starpu_drand48()); } } for (j=0; j < zdim; j++) { for (i=0; i < xdim; i++) { B[j+i*zdim] = (TYPE)(starpu_drand48()); } } for (j=0; j < ydim; j++) { for (i=0; i < xdim; i++) { C[j+i*ydim] = (TYPE)(0); } } #endif } static void partition_mult_data(void) { starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, ydim, ydim, zdim, sizeof(TYPE)); starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, zdim, zdim, xdim, sizeof(TYPE)); starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, ydim, ydim, xdim, sizeof(TYPE)); struct starpu_data_filter vert; memset(&vert, 0, sizeof(vert)); vert.filter_func = starpu_matrix_filter_vertical_block; vert.nchildren = nslicesx; struct starpu_data_filter horiz; memset(&horiz, 0, sizeof(horiz)); horiz.filter_func = starpu_matrix_filter_block; horiz.nchildren = nslicesy; starpu_data_partition(B_handle, &vert); starpu_data_partition(A_handle, &horiz); starpu_data_map_filters(C_handle, 2, &vert, &horiz); } #ifdef STARPU_USE_CUDA static void cublas_mult(void *descr[], void *arg) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_N, nxC, nyC, nyA, &p1, subA, ldA, subB, ldB, &v0, subC, ldC); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif void cpu_mult(void *descr[], void *arg) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); int worker_size = starpu_combined_worker_get_size(); if (worker_size == 1) { /* Sequential CPU task */ CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC); } else { /* Parallel CPU task */ unsigned rank = starpu_combined_worker_get_rank(); unsigned block_size = (nyC + worker_size - 1)/worker_size; unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); TYPE *new_subB = &subB[block_size*rank]; TYPE *new_subC = &subC[block_size*rank]; CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, (TYPE)0.0, new_subC, ldC); } } static struct starpu_perfmodel starpu_gemm_model = { .type = STARPU_HISTORY_BASED, .symbol = STARPU_GEMM_STR(gemm) }; static struct starpu_codelet cl = { .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ .max_parallelism = INT_MAX, .cpu_funcs = {cpu_mult}, .cpu_funcs_name = {"cpu_mult"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cublas_mult}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = STARPU_VARIABLE_NBUFFERS, /* required for transactions */ /* .modes = {STARPU_R, STARPU_R, STARPU_RW}, */ .model = &starpu_gemm_model }; static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nslicesx = strtol(argv[++i], &argptr, 10); nslicesy = nslicesx; } else if (strcmp(argv[i], "-nblocksx") == 0) { char *argptr; nslicesx = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-nblocksy") == 0) { char *argptr; nslicesy = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-x") == 0) { char *argptr; xdim = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-y") == 0) { char *argptr; ydim = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-z") == 0) { char *argptr; zdim = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-size") == 0) { char *argptr; xdim = ydim = zdim = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-iter") == 0) { char *argptr; niter = strtol(argv[++i], &argptr, 10); } /* Modulo operand to decide which iterations to confirm or cancel. * * An iteration will be confirmed if ((iter+1) % MOD) == 0, and * cancelled otherwise. As a special value, if MOD == 0, all * iterations will be cancelled. */ else if (strcmp(argv[i], "-mod") == 0) { char *argptr; do_start_trs_mod = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-check") == 0) { check = 1; } else if (strcmp(argv[i], "-spmd") == 0) { cl.type = STARPU_SPMD; } else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) { fprintf(stderr,"Usage: %s [-nblocks n] [-nblocksx x] [-nblocksy y] [-x x] [-y y] [-z z] [-size size] [-iter iter] [-check] [-spmd] [-mod start_trs_mod]\n", argv[0]); fprintf(stderr,"Currently selected: %ux%u * %ux%u and %ux%u blocks, %u iterations, transaction confirmation modulo = %d\n", zdim, ydim, xdim, zdim, nslicesx, nslicesy, niter, do_start_trs_mod); exit(EXIT_SUCCESS); } else { fprintf(stderr,"Unrecognized option %s", argv[i]); exit(EXIT_FAILURE); } } } int main(int argc, char **argv) { double start, end; int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.nmpi_ms = 0; conf.ntcpip_ms = 0; parse_args(argc, argv); starpu_fxt_autostart_profiling(0); ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_cublas_init(); init_problem_data(); partition_mult_data(); starpu_fxt_start_profiling(); start = starpu_timing_now(); unsigned x, y, iter; struct starpu_transaction *transaction = starpu_transaction_open(do_start_transaction, (void*)(intptr_t)0); if (transaction == NULL) { starpu_cublas_shutdown(); starpu_shutdown(); return 77; /* transaction begin task submit failed with ENODEV */ } int simulated_transaction_status = _do_start_transaction(0); int n_cancelled_iter=0; for (iter = 0; iter < niter; iter++) { if (iter > 0) { starpu_transaction_next_epoch(transaction, (void*)(intptr_t)iter); simulated_transaction_status = _do_start_transaction(iter); } if (simulated_transaction_status == 0) { n_cancelled_iter++; } for (x = 0; x < nslicesx; x++) { for (y = 0; y < nslicesy; y++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = starpu_data_get_sub_data(A_handle, 1, y); task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x); task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y); task->modes[0] = STARPU_R; task->modes[1] = STARPU_R; task->modes[2] = STARPU_RW; task->nbuffers = 3; task->transaction = transaction; task->flops = 2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim; ret = starpu_task_submit(task); if (ret == -ENODEV) { ret = 77; goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_data_wont_use(starpu_data_get_sub_data(C_handle, 2, x, y)); } } } starpu_transaction_close(transaction); starpu_task_wait_for_all(); end = starpu_timing_now(); starpu_fxt_stop_profiling(); double timing = end - start; PRINTF("# x\ty\tz\ts\tniter\tncancelled"); PRINTF("\n"); PRINTF("%u\t%u\t%u\t%.3f\t%d\t%d", xdim, ydim, zdim, timing/1.0e6, niter, n_cancelled_iter); PRINTF("\n"); enodev: starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); starpu_data_unregister(C_handle); if (check) check_output(); starpu_free_flags(A, zdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_free_flags(B, xdim*zdim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_free_flags(C, xdim*ydim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_cublas_shutdown(); starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/examples/worker_collections/000077500000000000000000000000001507764646700215355ustar00rootroot00000000000000starpu-1.4.9+dfsg/examples/worker_collections/worker_list_example.c000066400000000000000000000047731507764646700257730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This shows how to manipulate worker lists. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) int main() { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int procs[STARPU_NMAXWORKERS]; unsigned ncpus = starpu_cpu_worker_get_count(); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, ncpus); struct starpu_worker_collection *co = (struct starpu_worker_collection*)malloc(sizeof(struct starpu_worker_collection)); co->has_next = starpu_worker_list.has_next; co->get_next = starpu_worker_list.get_next; co->add = starpu_worker_list.add; co->remove = starpu_worker_list.remove; co->init = starpu_worker_list.init; co->deinit = starpu_worker_list.deinit; co->init_iterator = starpu_worker_list.init_iterator; co->type = STARPU_WORKER_LIST; FPRINTF(stderr, "ncpus %u\n", ncpus); double start_time; double end_time; start_time = starpu_timing_now(); co->init(co); end_time = starpu_timing_now(); double timing = (end_time - start_time) / 1000; unsigned i; for(i = 0; i < ncpus; i++) { int added = co->add(co, procs[i]); FPRINTF(stderr, "added proc %d to the tree \n", added); } struct starpu_sched_ctx_iterator it; int pu; co->init_iterator(co, &it); while(co->has_next(co, &it)) { pu = co->get_next(co, &it); FPRINTF(stderr, "pu = %d out of %u workers \n", pu, co->nworkers); } for(i = 0; i < 6; i++) { co->remove(co, i); FPRINTF(stderr, "remove %u out of %u workers\n", i, co->nworkers); } while(co->has_next(co, &it)) { pu = co->get_next(co, &it); FPRINTF(stderr, "pu = %d out of %u workers\n", pu, co->nworkers); } FPRINTF(stderr, "timing init = %lf \n", timing); co->deinit(co); free(co); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/examples/worker_collections/worker_tree_example.c000066400000000000000000000053051507764646700257470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This shows how to manipulate worker trees. */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #if !defined(STARPU_HAVE_HWLOC) #warning hwloc is not enabled. Skipping test int main(int argc, char **argv) { return 77; } #else int main() { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int procs[STARPU_NMAXWORKERS]; unsigned ncpus = starpu_cpu_worker_get_count(); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, ncpus); struct starpu_worker_collection *co = (struct starpu_worker_collection*)calloc(1, sizeof(struct starpu_worker_collection)); co->has_next = starpu_worker_tree.has_next; co->get_next = starpu_worker_tree.get_next; co->add = starpu_worker_tree.add; co->remove = starpu_worker_tree.remove; co->init = starpu_worker_tree.init; co->deinit = starpu_worker_tree.deinit; co->init_iterator = starpu_worker_tree.init_iterator; co->type = STARPU_WORKER_TREE; FPRINTF(stderr, "ncpus %u \n", ncpus); double start_time; double end_time; start_time = starpu_timing_now(); co->init(co); end_time = starpu_timing_now(); double timing = (end_time - start_time) / 1000; unsigned i; for(i = 0; i < ncpus; i++) { int added = co->add(co, procs[i]); FPRINTF(stderr, "added proc %d to the tree \n", added); } struct starpu_sched_ctx_iterator it; int pu; co->init_iterator(co, &it); while(co->has_next(co, &it)) { pu = co->get_next(co, &it); FPRINTF(stderr, "pu = %d out of %u workers \n", pu, co->nworkers); } unsigned six = 6; if (six < ncpus) six = ncpus/2; for(i = 0; i < six; i++) { co->remove(co, i); FPRINTF(stderr, "remove %u out of %u workers\n", i, co->nworkers); } while(co->has_next(co, &it)) { pu = co->get_next(co, &it); FPRINTF(stderr, "pu = %d out of %u workers \n", pu, co->nworkers); } FPRINTF(stderr, "timing init = %lf \n", timing); co->deinit(co); starpu_shutdown(); free(co); return 0; } #endif starpu-1.4.9+dfsg/include/000077500000000000000000000000001507764646700154335ustar00rootroot00000000000000starpu-1.4.9+dfsg/include/fstarpu_mod.f90000066400000000000000000005046651507764646700203160ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! !> @ingroup API_Fortran !> @brief Fortran API module fstarpu_mod use iso_c_binding implicit none ! Note: Constants truly are intptr_t, but are declared as c_ptr to be ! readily usable in c_ptr arrays to mimic variadic functions. ! Note: Bitwise or operator is provided by the .ior. overloaded operator type(c_ptr), bind(C) :: FSTARPU_R type(c_ptr), bind(C) :: FSTARPU_W type(c_ptr), bind(C) :: FSTARPU_RW type(c_ptr), bind(C) :: FSTARPU_SCRATCH type(c_ptr), bind(C) :: FSTARPU_REDUX type(c_ptr), bind(C) :: FSTARPU_MPI_REDUX type(c_ptr), bind(C) :: FSTARPU_COMMUTE type(c_ptr), bind(C) :: FSTARPU_SSEND type(c_ptr), bind(C) :: FSTARPU_LOCALITY type(c_ptr), bind(C) :: FSTARPU_DATA_ARRAY type(c_ptr), bind(C) :: FSTARPU_DATA_MODE_ARRAY type(c_ptr), bind(C) :: FSTARPU_CL_ARGS type(c_ptr), bind(C) :: FSTARPU_CL_ARGS_NFREE type(c_ptr), bind(C) :: FSTARPU_TASK_DEPS_ARRAY type(c_ptr), bind(C) :: FSTARPU_CALLBACK type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_PRIORITY type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_NODE type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_DATA type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_WORKER type(c_ptr), bind(C) :: FSTARPU_WORKER_ORDER type(c_ptr), bind(C) :: FSTARPU_EXECUTE_WHERE type(c_ptr), bind(C) :: FSTARPU_HYPERVISOR_TAG type(c_ptr), bind(C) :: FSTARPU_POSSIBLY_PARALLEL type(c_ptr), bind(C) :: FSTARPU_FLOPS type(c_ptr), bind(C) :: FSTARPU_TAG type(c_ptr), bind(C) :: FSTARPU_TAG_ONLY type(c_ptr), bind(C) :: FSTARPU_NAME type(c_ptr), bind(C) :: FSTARPU_TASK_COLOR type(c_ptr), bind(C) :: FSTARPU_TASK_SYNCHRONOUS type(c_ptr), bind(C) :: FSTARPU_HANDLES_SEQUENTIAL_CONSISTENCY type(c_ptr), bind(C) :: FSTARPU_TASK_END_DEP type(c_ptr), bind(C) :: FSTARPU_NODE_SELECTION_POLICY type(c_ptr), bind(C) :: FSTARPU_TASK_SCHED_DATA type(c_ptr), bind(C) :: FSTARPU_VALUE type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX type(c_ptr), bind(C) :: FSTARPU_CPU_WORKER type(c_ptr), bind(C) :: FSTARPU_CUDA_WORKER type(c_ptr), bind(C) :: FSTARPU_OPENCL_WORKER type(c_ptr), bind(C) :: FSTARPU_ANY_WORKER integer(c_int), bind(C) :: FSTARPU_NMAXBUFS type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_NAME type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_STRUCT type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MIN_PRIO type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MAX_PRIO type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_HIERARCHY_LEVEL type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_NESTED type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_AWAKE_WORKERS type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_INIT type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_USER_DATA type(c_ptr), bind(C) :: FSTARPU_NOWHERE type(c_ptr), bind(C) :: FSTARPU_CPU type(c_ptr), bind(C) :: FSTARPU_CUDA type(c_ptr), bind(C) :: FSTARPU_OPENCL type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT type(c_ptr), bind(C) :: FSTARPU_CUDA_ASYNC type(c_ptr), bind(C) :: FSTARPU_OPENCL_ASYNC !type(c_ptr), bind(C) :: FSTARPU_PER_WORKER !type(c_ptr), bind(C) :: FSTARPU_PER_ARCH !type(c_ptr), bind(C) :: FSTARPU_PER_COMMON type(c_ptr), bind(C) :: FSTARPU_HISTORY_BASED type(c_ptr), bind(C) :: FSTARPU_REGRESSION_BASED type(c_ptr), bind(C) :: FSTARPU_NL_REGRESSION_BASED type(c_ptr), bind(C) :: FSTARPU_MULTIPLE_REGRESSION_BASED type(c_ptr), bind(C) :: FSTARPU_SEQ type(c_ptr), bind(C) :: FSTARPU_SPMD type(c_ptr), bind(C) :: FSTARPU_FORKJOIN ! (some) portable iso_c_binding types type(c_ptr), bind(C) :: FSTARPU_SZ_C_DOUBLE type(c_ptr), bind(C) :: FSTARPU_SZ_C_FLOAT type(c_ptr), bind(C) :: FSTARPU_SZ_C_CHAR type(c_ptr), bind(C) :: FSTARPU_SZ_C_INT type(c_ptr), bind(C) :: FSTARPU_SZ_C_INTPTR_T type(c_ptr), bind(C) :: FSTARPU_SZ_C_PTR type(c_ptr), bind(C) :: FSTARPU_SZ_C_SIZE_T ! (some) native Fortran types type(c_ptr), bind(C) :: FSTARPU_SZ_CHARACTER type(c_ptr), bind(C) :: FSTARPU_SZ_INTEGER type(c_ptr), bind(C) :: FSTARPU_SZ_INT4 type(c_ptr), bind(C) :: FSTARPU_SZ_INT8 type(c_ptr), bind(C) :: FSTARPU_SZ_REAL type(c_ptr), bind(C) :: FSTARPU_SZ_REAL4 type(c_ptr), bind(C) :: FSTARPU_SZ_REAL8 type(c_ptr), bind(C) :: FSTARPU_SZ_DOUBLE_PRECISION type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX4 type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX8 integer(c_int), bind(C), target :: FSTARPU_DEFAULT_PRIO interface operator (.ior.) procedure or_cptrs end interface operator (.ior.) interface ! == starpu.h == ! void starpu_conf_init(struct starpu_conf *conf); subroutine fstarpu_conf_init (conf) bind(C,name="starpu_conf_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: conf end subroutine fstarpu_conf_init function fstarpu_conf_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_conf_allocate end function fstarpu_conf_allocate subroutine fstarpu_conf_free (conf) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: conf end subroutine fstarpu_conf_free subroutine fstarpu_conf_set_sched_policy_name (conf, policy_name) bind(C) use iso_c_binding, only: c_ptr, c_char type(c_ptr), value, intent(in) :: conf character(c_char), intent(in) :: policy_name end subroutine fstarpu_conf_set_sched_policy_name subroutine fstarpu_conf_set_min_prio (conf, min_prio) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: min_prio end subroutine fstarpu_conf_set_min_prio subroutine fstarpu_conf_set_max_prio (conf, max_prio) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: max_prio end subroutine fstarpu_conf_set_max_prio subroutine fstarpu_conf_set_ncpu (conf, ncpu) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: ncpu end subroutine fstarpu_conf_set_ncpu subroutine fstarpu_conf_set_ncuda (conf, ncuda) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: ncuda end subroutine fstarpu_conf_set_ncuda subroutine fstarpu_conf_set_nopencl (conf, nopencl) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: nopencl end subroutine fstarpu_conf_set_nopencl ! starpu_init: see fstarpu_init ! starpu_initialize: see fstarpu_init ! void starpu_pause(void); subroutine fstarpu_pause() bind(C,name="starpu_pause") end subroutine fstarpu_pause ! void starpu_resume(void); subroutine fstarpu_resume() bind(C,name="starpu_resume") end subroutine fstarpu_resume ! int starpu_is_paused(void); function fstarpu_is_paused() bind(C,name="starpu_is_paused") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_is_paused end function fstarpu_is_paused ! void starpu_shutdown(void); subroutine fstarpu_shutdown () bind(C,name="starpu_shutdown") end subroutine fstarpu_shutdown ! starpu_topology_print subroutine fstarpu_topology_print () bind(C) end subroutine fstarpu_topology_print ! int starpu_asynchronous_copy_disabled(void); function fstarpu_asynchronous_copy_disabled() bind(C,name="starpu_asynchronous_copy_disabled") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_asynchronous_copy_disabled end function fstarpu_asynchronous_copy_disabled ! int starpu_asynchronous_cuda_copy_disabled(void); function fstarpu_asynchronous_cuda_copy_disabled() bind(C,name="starpu_asynchronous_cuda_copy_disabled") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_asynchronous_cuda_copy_disabled end function fstarpu_asynchronous_cuda_copy_disabled ! int starpu_asynchronous_opencl_copy_disabled(void); function fstarpu_asynchronous_opencl_copy_disabled() bind(C,name="starpu_asynchronous_opencl_copy_disabled") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_asynchronous_opencl_copy_disabled end function fstarpu_asynchronous_opencl_copy_disabled ! void starpu_display_stats(); subroutine fstarpu_display_stats() bind(C,name="starpu_display_stats") end subroutine fstarpu_display_stats ! void starpu_get_version(int *major, int *minor, int *release); subroutine fstarpu_get_version(major,minor,release) bind(C,name="starpu_get_version") use iso_c_binding, only: c_int integer(c_int), intent(out) :: major,minor,release end subroutine fstarpu_get_version ! == starpu_worker.h == ! unsigned starpu_worker_get_count(void); function fstarpu_worker_get_count() bind(C,name="starpu_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_count end function fstarpu_worker_get_count ! unsigned starpu_combined_worker_get_count(void); function fstarpu_combined_worker_get_count() bind(C,name="starpu_combined_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_count end function fstarpu_combined_worker_get_count ! unsigned starpu_worker_is_combined_worker(int id); function fstarpu_worker_is_combined_worker(id) bind(C,name="starpu_worker_is_combined_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_is_combined_worker integer(c_int), value, intent(in) :: id end function fstarpu_worker_is_combined_worker ! unsigned starpu_cpu_worker_get_count(void); function fstarpu_cpu_worker_get_count() bind(C,name="starpu_cpu_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_cpu_worker_get_count end function fstarpu_cpu_worker_get_count ! unsigned starpu_cuda_worker_get_count(void); function fstarpu_cuda_worker_get_count() bind(C,name="starpu_cuda_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_cuda_worker_get_count end function fstarpu_cuda_worker_get_count ! unsigned starpu_opencl_worker_get_count(void); function fstarpu_opencl_worker_get_count() bind(C,name="starpu_opencl_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_opencl_worker_get_count end function fstarpu_opencl_worker_get_count ! int starpu_worker_get_id(void); function fstarpu_worker_get_id() bind(C,name="starpu_worker_get_id") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_id end function fstarpu_worker_get_id ! _starpu_worker_get_id_check ! starpu_worker_get_id_check ! int starpu_worker_get_bindid(int workerid); function fstarpu_worker_get_bindid(id) bind(C,name="starpu_worker_get_bindid") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_bindid integer(c_int), value, intent(in) :: id end function fstarpu_worker_get_bindid ! int starpu_combined_worker_get_id(void); function fstarpu_combined_worker_get_id() bind(C,name="starpu_combined_worker_get_id") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_id end function fstarpu_combined_worker_get_id ! int starpu_combined_worker_get_size(void); function fstarpu_combined_worker_get_size() bind(C,name="starpu_combined_worker_get_size") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_size end function fstarpu_combined_worker_get_size ! int starpu_combined_worker_get_rank(void); function fstarpu_combined_worker_get_rank() bind(C,name="starpu_combined_worker_get_rank") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_rank end function fstarpu_combined_worker_get_rank ! enum starpu_worker_archtype starpu_worker_get_type(int id); function fstarpu_worker_get_type(id) bind(C) use iso_c_binding, only: c_int, c_ptr type(c_ptr) :: fstarpu_worker_get_type ! C function returns c_intptr_t integer(c_int),value,intent(in) :: id end function fstarpu_worker_get_type ! int starpu_worker_get_count_by_type(enum starpu_worker_archtype type); function fstarpu_worker_get_count_by_type(typeid) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_count_by_type type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func end function fstarpu_worker_get_count_by_type ! int starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); function fstarpu_worker_get_ids_by_type(typeid, workerids, maxsize) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_ids_by_type type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func integer(c_int),intent(out) :: workerids(*) integer(c_int),value,intent(in) :: maxsize end function fstarpu_worker_get_ids_by_type ! int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num); function fstarpu_worker_get_by_type(typeid, num) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_by_type type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func integer(c_int),value,intent(in) :: num end function fstarpu_worker_get_by_type ! int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid); function fstarpu_worker_get_by_devid(typeid, devid) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_by_devid type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func integer(c_int),value,intent(in) :: devid end function fstarpu_worker_get_by_devid ! void starpu_worker_get_name(int id, char *dst, size_t maxlen); subroutine fstarpu_worker_get_name(id, dst, maxlen) bind(C,name="starpu_worker_get_name") use iso_c_binding, only: c_int, c_char, c_size_t integer(c_int),value,intent(in) :: id character(c_char),intent(out) :: dst(*) integer(c_size_t),value,intent(in) :: maxlen end subroutine fstarpu_worker_get_name ! int starpu_worker_get_devid(int id); function fstarpu_worker_get_devid(id) bind(C,name="starpu_worker_get_devid") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_devid integer(c_int), value, intent(in) :: id end function fstarpu_worker_get_devid ! struct starpu_tree* starpu_workers_get_tree(void); ! unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx); ! unsigned starpu_worker_is_blocked(int workerid); function fstarpu_worker_is_blocked(id) bind(C,name="starpu_worker_is_blocked") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_is_blocked integer(c_int), value, intent(in) :: id end function fstarpu_worker_is_blocked ! unsigned starpu_worker_is_slave_somewhere(int workerid); function fstarpu_worker_is_slave_somewhere(id) bind(C,name="starpu_worker_is_slave_somewhere") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_is_slave_somewhere integer(c_int), value, intent(in) :: id end function fstarpu_worker_is_slave_somewhere ! char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type); subroutine fstarpu_worker_get_type_as_string(typeid,dst,maxlen) bind(C) use iso_c_binding, only: c_ptr, c_char, c_size_t type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func character(c_char),intent(out) :: dst(*) integer(c_size_t),value,intent(in) :: maxlen end subroutine fstarpu_worker_get_type_as_string ! int starpu_bindid_get_workerids(int bindid, int **workerids); ! == starpu_task.h == function fstarpu_task_create_sync (handle, mode) bind(C,name="starpu_task_create_sync") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_create_sync type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: mode end function fstarpu_task_create_sync ! void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array); subroutine fstarpu_tag_declare_deps_array(id,ndeps,tag_array) bind(C,name="starpu_tag_declare_deps_array") use iso_c_binding, only: c_int, c_long_long integer(c_int), value, intent(in) :: id integer(c_int), value, intent(in) :: ndeps integer(c_long_long), intent(in) :: tag_array(*) end subroutine fstarpu_tag_declare_deps_array ! void starpu_task_declare_deps(starpu_tag_t id, unsigned ndeps, ...); subroutine fstarpu_task_declare_deps(task,ndeps,root_task) bind(C,name="starpu_task_declare_deps") use iso_c_binding, only: c_int, c_ptr type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: ndeps type(c_ptr), value, intent(in) :: root_task end subroutine fstarpu_task_declare_deps ! void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); subroutine fstarpu_task_declare_deps_array(task,ndeps,task_array) bind(C,name="starpu_task_declare_deps_array") use iso_c_binding, only: c_int, c_ptr type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: ndeps type(c_ptr), intent(in) :: task_array(*) end subroutine fstarpu_task_declare_deps_array ! void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps) subroutine fstarpu_task_end_dep_add(task, nb_deps) & bind(C,name="starpu_task_end_dep_add") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: nb_deps end subroutine fstarpu_task_end_dep_add ! void starpu_task_end_dep_release(struct starpu_task *t) subroutine fstarpu_task_end_dep_release(task) & bind(C,name="starpu_task_end_dep_release") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_end_dep_release ! int starpu_tag_wait(starpu_tag_t id); function fstarpu_tag_wait(id) bind(C,name="starpu_tag_wait") use iso_c_binding, only: c_int, c_long_long integer(c_int) :: fstarpu_tag_wait integer(c_long_long), value, intent(in) :: id end function fstarpu_tag_wait ! int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id); function fstarpu_tag_wait_array(ntags,tag_array) bind(C,name="starpu_tag_wait_array") use iso_c_binding, only: c_int, c_long_long integer(c_int) :: fstarpu_tag_wait_array integer(c_int), value, intent(in) :: ntags integer(c_long_long), intent(in) :: tag_array(*) end function fstarpu_tag_wait_array ! void starpu_tag_notify_from_apps(starpu_tag_t id); subroutine fstarpu_tag_notify_from_apps(id) bind(C,name="starpu_tag_notify_from_apps") use iso_c_binding, only: c_long_long integer(c_long_long), value, intent(in) :: id end subroutine fstarpu_tag_notify_from_apps ! void starpu_tag_restart(starpu_tag_t id); subroutine fstarpu_tag_restart(id) bind(C,name="starpu_tag_restart") use iso_c_binding, only: c_long_long integer(c_long_long), value, intent(in) :: id end subroutine fstarpu_tag_restart ! void starpu_tag_remove(starpu_tag_t id); subroutine fstarpu_tag_remove(id) bind(C,name="starpu_tag_remove") use iso_c_binding, only: c_long_long integer(c_long_long), value, intent(in) :: id end subroutine fstarpu_tag_remove ! struct starpu_task *starpu_tag_get_task(starpu_tag_t id); function fstarpu_tag_get_task(id) bind(C,name="starpu_tag_get_task") use iso_c_binding, only: c_ptr, c_long_long type(c_ptr) :: fstarpu_tag_get_task integer(c_long_long), value, intent(in) :: id end function fstarpu_tag_get_task ! void starpu_task_init(struct starpu_task *task); subroutine fstarpu_task_init (task) bind(C,name="starpu_task_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_init ! void starpu_task_clean(struct starpu_task *task); subroutine fstarpu_task_clean (task) bind(C,name="starpu_task_clean") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_clean ! struct starpu_task *starpu_task_create(void) STARPU_ATTRIBUTE_MALLOC; function fstarpu_task_create () bind(C,name="starpu_task_create") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_create end function fstarpu_task_create ! void starpu_task_destroy(struct starpu_task *task); subroutine fstarpu_task_destroy (task) bind(C,name="starpu_task_destroy") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_destroy ! void starpu_task_set_destroy(struct starpu_task *task); subroutine fstarpu_task_set_destroy (task) bind(C,name="starpu_task_set_destroy") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_set_destroy ! int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_submit (task) bind(C,name="starpu_task_submit") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_submit type(c_ptr), value, intent(in) :: task end function fstarpu_task_submit ! int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id); function fstarpu_task_submit_to_ctx (task,sched_ctx_id) bind(C,name="starpu_task_submit_to_ctx") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_submit_to_ctx type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_task_submit_to_ctx ! int starpu_task_finished(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_finished (task) bind(C,name="starpu_task_finished") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_finished type(c_ptr), value, intent(in) :: task end function fstarpu_task_finished ! int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_wait (task) bind(C,name="starpu_task_wait") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_wait type(c_ptr), value, intent(in) :: task end function fstarpu_task_wait ! int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_wait_array(task_array,ntasks) bind(C,name="starpu_task_wait_array") use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_task_wait_array integer(c_int), value, intent(in) :: ntasks type(c_ptr), intent(in) :: task_array end function fstarpu_task_wait_array ! int starpu_task_wait_for_all(void); subroutine fstarpu_task_wait_for_all () bind(C,name="starpu_task_wait_for_all") end subroutine fstarpu_task_wait_for_all ! int starpu_task_wait_for_n_submitted(unsigned n); subroutine fstarpu_task_wait_for_n_submitted (n) bind(C,name="starpu_task_wait_for_n_submitted") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: n end subroutine fstarpu_task_wait_for_n_submitted ! int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id); subroutine fstarpu_task_wait_for_all_in_ctx (ctx) bind(C,name="starpu_task_wait_for_all_in_ctx") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_task_wait_for_all_in_ctx ! int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n); subroutine fstarpu_task_wait_for_n_submitted_in_ctx (ctx,n) bind(C,name="starpu_task_wait_for_n_submitted_in_ctx") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx integer(c_int), value, intent(in) :: n end subroutine fstarpu_task_wait_for_n_submitted_in_ctx ! int starpu_task_wait_for_no_ready(void); function fstarpu_task_wait_for_no_ready () bind(C,name="starpu_task_wait_for_no_ready") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_task_wait_for_no_ready end function fstarpu_task_wait_for_no_ready ! int starpu_task_nready(void); function fstarpu_task_nready () bind(C,name="starpu_task_nready") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_task_nready end function fstarpu_task_nready ! int starpu_task_nsubmitted(void); function fstarpu_task_nsubmitted () bind(C,name="starpu_task_nsubmitted") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_task_nsubmitted end function fstarpu_task_nsubmitted ! void starpu_do_schedule(void); subroutine fstarpu_do_schedule () bind(C,name="starpu_do_schedule") end subroutine fstarpu_do_schedule ! starpu_codelet_init subroutine fstarpu_codelet_init (codelet) bind(C,name="starpu_codelet_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: codelet end subroutine fstarpu_codelet_init ! starpu_codelet_display_stats subroutine fstarpu_codelet_display_stats (codelet) bind(C,name="starpu_codelet_display_stats") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: codelet end subroutine fstarpu_codelet_display_stats ! struct starpu_task *starpu_task_get_current(void); function fstarpu_task_get_current () bind(C,name="starpu_task_get_current") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_get_current end function fstarpu_task_get_current ! void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid); subroutine fstarpu_parallel_task_barrier_init_init (task,id) & bind(C,name="starpu_parallel_task_barrier_init_init") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: id end subroutine fstarpu_parallel_task_barrier_init_init ! void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size); subroutine fstarpu_parallel_task_barrier_init_n_init_n (task,sz) & bind(C,name="starpu_parallel_task_barrier_init_n_init_n") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sz end subroutine fstarpu_parallel_task_barrier_init_n_init_n ! struct starpu_task *starpu_task_dup(struct starpu_task *task); function fstarpu_task_dup (task) bind(C,name="starpu_task_dup") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_dup type(c_ptr), value, intent(in) :: task end function fstarpu_task_dup ! void starpu_task_set_implementation(struct starpu_task *task, unsigned impl); subroutine fstarpu_task_set_implementation (task,impl) & bind(C,name="starpu_task_set_implementation") use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: impl end subroutine fstarpu_task_set_implementation ! unsigned starpu_task_get_implementation(struct starpu_task *task); function fstarpu_task_get_implementation (task) & bind(C,name="starpu_task_get_implementation") use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: task integer(c_int) :: fstarpu_task_get_implementation end function fstarpu_task_get_implementation ! -- function fstarpu_codelet_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_codelet_allocate end function fstarpu_codelet_allocate subroutine fstarpu_codelet_free (cl) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl end subroutine fstarpu_codelet_free subroutine fstarpu_codelet_set_name (cl, cl_name) bind(C) use iso_c_binding, only: c_ptr, c_char type(c_ptr), value, intent(in) :: cl character(c_char), intent(in) :: cl_name end subroutine fstarpu_codelet_set_name subroutine fstarpu_codelet_set_color (cl, cl_color) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: cl integer(c_int), value, intent(in) :: cl_color end subroutine fstarpu_codelet_set_color subroutine fstarpu_codelet_set_model (cl, cl_perfmodel) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: cl_perfmodel end subroutine fstarpu_codelet_set_model subroutine fstarpu_codelet_set_energy_model (cl, cl_perfmodel) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: cl_perfmodel end subroutine fstarpu_codelet_set_energy_model subroutine fstarpu_codelet_add_cpu_func (cl, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: cl type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_codelet_add_cpu_func subroutine fstarpu_codelet_add_cuda_func (cl, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: cl type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_codelet_add_cuda_func subroutine fstarpu_codelet_add_cuda_flags (cl, flags) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t end subroutine fstarpu_codelet_add_cuda_flags subroutine fstarpu_codelet_add_opencl_func (cl, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: cl type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_codelet_add_opencl_func subroutine fstarpu_codelet_add_opencl_flags (cl, flags) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t end subroutine fstarpu_codelet_add_opencl_flags subroutine fstarpu_codelet_add_buffer (cl, mode) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t end subroutine fstarpu_codelet_add_buffer subroutine fstarpu_codelet_set_variable_nbuffers (cl) bind(C) use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: cl end subroutine fstarpu_codelet_set_variable_nbuffers subroutine fstarpu_codelet_set_nbuffers (cl, nbuffers) bind(C) use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: cl integer(c_int), value, intent(in) :: nbuffers end subroutine fstarpu_codelet_set_nbuffers subroutine fstarpu_codelet_set_flags (cl, flags) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t end subroutine fstarpu_codelet_set_flags subroutine fstarpu_codelet_set_where (cl, where) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: where ! C function expects an intptr_t end subroutine fstarpu_codelet_set_where subroutine fstarpu_codelet_set_type (cl, type_constant) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: type_constant ! C function expects an intptr_t end subroutine fstarpu_codelet_set_type subroutine fstarpu_codelet_set_max_parallelism (cl, max_parallelism) bind(C) use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: cl integer(c_int), value, intent(in) :: max_parallelism end subroutine fstarpu_codelet_set_max_parallelism function fstarpu_perfmodel_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_perfmodel_allocate end function fstarpu_perfmodel_allocate subroutine fstarpu_perfmodel_free (model) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: model end subroutine fstarpu_perfmodel_free subroutine fstarpu_perfmodel_set_symbol (model, model_symbol) bind(C) use iso_c_binding, only: c_ptr, c_char type(c_ptr), value, intent(in) :: model character(c_char), intent(in) :: model_symbol end subroutine fstarpu_perfmodel_set_symbol subroutine fstarpu_perfmodel_set_type (model, type) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: model type(c_ptr), value, intent(in) :: type ! C function expects an intptr_t end subroutine fstarpu_perfmodel_set_type ! == starpu_data_interface.h == ! uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags); ! uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size); function fstarpu_malloc_on_node(node,sz) bind(C,name="starpu_malloc_on_node") use iso_c_binding, only: c_int,c_intptr_t,c_size_t integer(c_intptr_t) :: fstarpu_malloc_on_node integer(c_int), value, intent(in) :: node integer(c_size_t), value, intent(in) :: sz end function fstarpu_malloc_on_node ! void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags); ! void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size); subroutine fstarpu_free_on_node(node,addr,sz) bind(C,name="starpu_free_on_node") use iso_c_binding, only: c_int,c_intptr_t,c_size_t integer(c_int), value, intent(in) :: node integer(c_intptr_t), value, intent(in) :: addr integer(c_size_t), value, intent(in) :: sz end subroutine fstarpu_free_on_node ! void starpu_malloc_on_node_set_default_flags(unsigned node, int flags); ! int starpu_data_interface_get_next_id(void); ! void starpu_data_register(starpu_data_handle_t *handleptr, unsigned home_node, void *data_interface, struct starpu_data_interface_ops *ops); ! void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node); subroutine fstarpug_data_ptr_register (dh,node) bind(C,name="starpu_data_ptr_register") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpug_data_ptr_register ! void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc); subroutine fstarpu_data_register_same (dh_dst,dh_src) bind(C,name="starpu_data_register_same") use iso_c_binding, only: c_ptr type(c_ptr), intent(out) :: dh_dst type(c_ptr), value, intent(in) :: dh_src end subroutine fstarpu_data_register_same ! void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node); function fstarpu_data_handle_to_pointer (dh,node) bind(C,name="starpu_data_handle_to_pointer") use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_handle_to_pointer type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end function fstarpu_data_handle_to_pointer ! void *starpu_data_get_local_ptr(starpu_data_handle_t handle); function fstarpu_data_get_local_ptr (dh) bind(C,name="starpu_data_get_local_ptr") use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_get_local_ptr type(c_ptr), value, intent(in) :: dh end function fstarpu_data_get_local_ptr ! void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node); ! == starpu_data_interface.h: tensor == ! void starpu_tensor_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize); subroutine fstarpu_tensor_data_register(dh, home_node, ptr, ldy, ldz, ldt, nx, ny, nz, nt, elt_size) & bind(C,name="starpu_tensor_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz integer(c_int), value, intent(in) :: ldt integer(c_int), value, intent(in) :: nx integer(c_int), value, intent(in) :: ny integer(c_int), value, intent(in) :: nz integer(c_int), value, intent(in) :: nt integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_tensor_data_register ! void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt); subroutine fstarpu_tensor_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz, ldt) & bind(C,name="starpu_tensor_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz integer(c_int), value, intent(in) :: ldt end subroutine fstarpu_tensor_ptr_register function fstarpu_tensor_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_tensor_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ptr function fstarpu_tensor_get_ldy(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ldy type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ldy function fstarpu_tensor_get_ldz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ldz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ldz function fstarpu_tensor_get_ldt(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ldt type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ldt function fstarpu_tensor_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_nx function fstarpu_tensor_get_ny(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ny type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ny function fstarpu_tensor_get_nz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_nz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_nz function fstarpu_tensor_get_nt(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_nt type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_nt ! == starpu_data_interface.h: block == ! void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize); subroutine fstarpu_block_data_register(dh, home_node, ptr, ldy, ldz, nx, ny, nz, elt_size) & bind(C,name="starpu_block_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz integer(c_int), value, intent(in) :: nx integer(c_int), value, intent(in) :: ny integer(c_int), value, intent(in) :: nz integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_block_data_register ! void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz); subroutine fstarpu_block_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz) & bind(C,name="starpu_block_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz end subroutine fstarpu_block_ptr_register function fstarpu_block_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_block_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ptr function fstarpu_block_get_ldy(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_ldy type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ldy function fstarpu_block_get_ldz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_ldz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ldz function fstarpu_block_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_nx function fstarpu_block_get_ny(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_ny type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ny function fstarpu_block_get_nz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_nz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_nz ! == starpu_data_interface.h: matrix == ! void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize); subroutine fstarpu_matrix_data_register(dh, home_node, ptr, ld, nx, ny, elt_size) & bind(C,name="starpu_matrix_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: ld integer(c_int), value, intent(in) :: nx integer(c_int), value, intent(in) :: ny integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_matrix_data_register ! void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld); subroutine fstarpu_matrix_ptr_register(dh, node, ptr, dev_handle, offset, ld) & bind(C,name="starpu_matrix_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset integer(c_int), value, intent(in) :: ld end subroutine fstarpu_matrix_ptr_register function fstarpu_matrix_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_matrix_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_ptr function fstarpu_matrix_get_ld(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_matrix_get_ld type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_ld function fstarpu_matrix_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_matrix_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_nx function fstarpu_matrix_get_ny(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_matrix_get_ny type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_ny ! == starpu_data_interface.h: vector == ! void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize); subroutine fstarpu_vector_data_register(dh, home_node, ptr,nx, elt_size) & bind(C,name="starpu_vector_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: nx integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_vector_data_register ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) & bind(C,name="starpu_vector_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset end subroutine fstarpu_vector_ptr_register function fstarpu_vector_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_vector_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_vector_get_ptr function fstarpu_vector_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_vector_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_vector_get_nx ! == starpu_data_interface.h: variable == ! void starpu_variable_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, size_t size); subroutine fstarpu_variable_data_register(dh, home_node, ptr, elt_size) & bind(C,name="starpu_variable_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_variable_data_register ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) & bind(C,name="starpu_variable_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset end subroutine fstarpu_variable_ptr_register function fstarpu_variable_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_variable_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_variable_get_ptr ! == starpu_data_interface.h: void == ! void starpu_void_data_register(starpu_data_handle_t *handle); subroutine fstarpu_void_data_register(dh) & bind(C,name="starpu_void_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh end subroutine fstarpu_void_data_register ! == starpu_data_filter.h == function fstarpu_data_filter_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_data_filter_allocate end function fstarpu_data_filter_allocate subroutine fstarpu_data_filter_free (filter) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: filter end subroutine fstarpu_data_filter_free ! Note: use fstarpu_df_alloc_ prefix instead of fstarpu_data_filter_allocate_ ! to fit within the Fortran id length limit */ function fstarpu_df_alloc_bcsr_filter_canonical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_bcsr_filter_canonical_block end function fstarpu_df_alloc_bcsr_filter_canonical_block function fstarpu_df_alloc_csr_filter_vertical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_csr_filter_vertical_block end function fstarpu_df_alloc_csr_filter_vertical_block function fstarpu_df_alloc_matrix_filter_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block end function fstarpu_df_alloc_matrix_filter_block function fstarpu_df_alloc_matrix_filter_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block_shadow end function fstarpu_df_alloc_matrix_filter_block_shadow function fstarpu_df_alloc_matrix_filter_vertical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block end function fstarpu_df_alloc_matrix_filter_vertical_block function fstarpu_df_alloc_matrix_filter_vertical_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block_shadow end function fstarpu_df_alloc_matrix_filter_vertical_block_shadow function fstarpu_df_alloc_vector_filter_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_block end function fstarpu_df_alloc_vector_filter_block function fstarpu_df_alloc_vector_filter_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_block_shadow end function fstarpu_df_alloc_vector_filter_block_shadow function fstarpu_df_alloc_vector_filter_list () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_list end function fstarpu_df_alloc_vector_filter_list function fstarpu_df_alloc_vector_filter_divide_in_2 () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_divide_in_2 end function fstarpu_df_alloc_vector_filter_divide_in_2 function fstarpu_df_alloc_block_filter_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_block end function fstarpu_df_alloc_block_filter_block function fstarpu_df_alloc_block_filter_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_block_shadow end function fstarpu_df_alloc_block_filter_block_shadow function fstarpu_df_alloc_block_filter_vertical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block end function fstarpu_df_alloc_block_filter_vertical_block function fstarpu_df_alloc_block_filter_vertical_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block_shadow end function fstarpu_df_alloc_block_filter_vertical_block_shadow subroutine fstarpu_data_filter_set_filter_func (filter, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: filter type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_data_filter_set_filter_func subroutine fstarpu_data_filter_set_nchildren (filter, nchildren) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: filter integer(c_int), value, intent(in) :: nchildren end subroutine fstarpu_data_filter_set_nchildren subroutine fstarpu_data_filter_set_get_nchildren_func (filter, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: filter type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_data_filter_set_get_nchildren_func subroutine fstarpu_data_filter_set_get_child_ops_func (filter, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: filter type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_data_filter_set_get_child_ops_func subroutine fstarpu_data_filter_set_filter_arg (filter, filter_arg) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: filter integer(c_int), value, intent(in) :: filter_arg end subroutine fstarpu_data_filter_set_filter_arg subroutine fstarpu_data_filter_set_filter_arg_ptr (filter, filter_arg_ptr) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: filter_arg_ptr end subroutine fstarpu_data_filter_set_filter_arg_ptr ! void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f); subroutine fstarpu_data_partition (dh,filter) bind(C,name="starpu_data_partition") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: filter end subroutine fstarpu_data_partition ! void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node); subroutine fstarpu_data_unpartition (root_dh,gathering_node) bind(C,name="starpu_data_unpartition") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: root_dh integer(c_int), value, intent(in) :: gathering_node end subroutine fstarpu_data_unpartition ! void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children); subroutine fstarpu_data_partition_plan (dh,filter,children) & bind(C,name="starpu_data_partition_plan") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: filter type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_plan ! void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_submit (dh,nparts,children) & bind(C,name="starpu_data_partition_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_submit ! void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_readonly_submit (dh,nparts,children) & bind(C,name="starpu_data_partition_readonly_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_readonly_submit ! void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_readwrite_upgrade_submit (dh,nparts,children) & bind(C,name="starpu_data_partition_readwrite_upgrade_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_readwrite_upgrade_submit ! void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); subroutine fstarpu_data_unpartition_submit (dh,nparts,children,gathering_node) & bind(C,name="starpu_data_unpartition_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) integer(c_int), value, intent(in) :: gathering_node end subroutine fstarpu_data_unpartition_submit ! void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); subroutine fstarpu_data_unpartition_readonly_submit (dh,nparts,children,gathering_node) & bind(C,name="starpu_data_unpartition_readonly_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) integer(c_int), value, intent(in) :: gathering_node end subroutine fstarpu_data_unpartition_readonly_submit ! void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_clean (dh,nparts,children) & bind(C,name="starpu_data_partition_clean") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_clean ! int starpu_data_get_nb_children(starpu_data_handle_t handle); function fstarpu_data_get_nb_children(dh) bind(C,name="starpu_data_get_nb_children") use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_data_get_nb_children type(c_ptr), value, intent(in) :: dh end function fstarpu_data_get_nb_children ! starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i); function fstarpu_data_get_child(dh,i) bind(C,name="starpu_data_get_child") use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_get_child type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: i end function fstarpu_data_get_child ! starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... ); ! . see: fstarpu_data_get_sub_data ! starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa); ! . see: fstarpu_data_get_sub_data ! note: defined in filters.c function fstarpu_data_get_sub_data (root_dh,depth,indices) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_get_sub_data type(c_ptr), value, intent(in) :: root_dh integer(c_int), value, intent(in) :: depth integer(c_int), intent(in) :: indices(*) end function fstarpu_data_get_sub_data ! void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...); ! . see fstarpu_data_map_filters ! void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa); ! . see fstarpu_data_map_filters ! note: defined in filters.c subroutine fstarpu_data_map_filters (root_dh,nfilters,filters) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: root_dh integer(c_int), value, intent(in) :: nfilters type(c_ptr), intent(in) :: filters(*) end subroutine fstarpu_data_map_filters ! void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_block ! void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_block_shadow ! void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_vertical_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_vertical_block ! void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_vertical_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_vertical_block_shadow ! void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_block ! void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_block_shadow ! void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_list_long (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_list_long") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_list_long ! void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_list (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_list") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_list ! void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_divide_in_2 (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_divide_in_2") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_divide_in_2 ! void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_block ! void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_block_shadow ! void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_vertical_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_vertical_block ! void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_vertical_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_vertical_block_shadow ! void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_depth_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_depth_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_depth_block ! void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_depth_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_depth_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_depth_block_shadow ! == starpu_data.h == ! void starpu_data_unregister(starpu_data_handle_t handle); subroutine fstarpu_data_unregister (dh) bind(C,name="starpu_data_unregister") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_unregister ! void starpu_data_unregister_no_coherency(starpu_data_handle_t handle); subroutine fstarpu_data_unregister_no_coherency (dh) bind(C,name="starpu_data_unregister_no_coherency") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_unregister_no_coherency ! void starpu_data_unregister_submit(starpu_data_handle_t handle); subroutine fstarpu_data_unregister_submit (dh) bind(C,name="starpu_data_unregister_submit") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_unregister_submit ! void starpu_data_deinitialize(starpu_data_handle_t handle); subroutine fstarpu_data_deinitialize (dh) bind(C,name="starpu_data_deinitialize") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_deinitialize ! void starpu_data_deinitialize_submit(starpu_data_handle_t handle); subroutine fstarpu_data_deinitialize_submit (dh) bind(C,name="starpu_data_deinitialize_submit") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_deinitialize_submit ! void starpu_data_invalidate(starpu_data_handle_t handle); subroutine fstarpu_data_invalidate (dh) bind(C,name="starpu_data_invalidate") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_invalidate ! void starpu_data_invalidate_submit(starpu_data_handle_t handle); subroutine fstarpu_data_invalidate_submit (dh) bind(C,name="starpu_data_invalidate_submit") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_invalidate_submit ! void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important); subroutine fstarpu_data_advise_as_important (dh,is_important) bind(C,name="starpu_data_advise_as_important") use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: is_important end subroutine fstarpu_data_advise_as_important ! starpu_data_acquire: see fstarpu_data_acquire subroutine fstarpu_data_acquire (dh, mode) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t end subroutine fstarpu_data_acquire ! int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); ! int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); ! int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); ! int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); ! int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); ! void starpu_data_release(starpu_data_handle_t handle); subroutine fstarpu_data_release (dh) bind(C,name="starpu_data_release") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_release ! void starpu_data_release_on_node(starpu_data_handle_t handle, int node); subroutine fstarpu_data_release_on_node (dh, node) bind(C,name="starpu_data_release_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpu_data_release_on_node ! starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC; function fstarpu_arbiter_create () bind(C,name="starpu_arbiter_create") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_arbiter_create end function fstarpu_arbiter_create ! void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter); subroutine fstarpu_data_assign_arbiter (dh,arbiter) bind(C,name="starpu_data_assign_arbiter") use iso_c_binding, only: c_ptr type(c_ptr), intent(out) :: dh type(c_ptr), value, intent(in) :: arbiter end subroutine fstarpu_data_assign_arbiter ! void starpu_arbiter_destroy(starpu_arbiter_t arbiter); subroutine fstarpu_arbiter_destroy (arbiter) bind(C,name="starpu_arbiter_destroy") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: arbiter end subroutine fstarpu_arbiter_destroy ! void starpu_data_display_memory_stats(); subroutine fstarpu_display_memory_stats() bind(C,name="starpu_display_memory_stats") end subroutine fstarpu_display_memory_stats ! int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node); subroutine fstarpu_data_request_allocation (dh, node) & bind(C,name="starpu_data_request_allocation") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpu_data_request_allocation ! int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); subroutine fstarpu_data_fetch_on_node (dh, node, async) & bind(C,name="starpu_data_fetch_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async end subroutine fstarpu_data_fetch_on_node ! int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); subroutine fstarpu_data_prefetch_on_node (dh, node, async) & bind(C,name="starpu_data_prefetch_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async end subroutine fstarpu_data_prefetch_on_node ! int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); subroutine fstarpu_data_prefetch_on_node_prio (dh, node, async, prio) & bind(C,name="starpu_data_prefetch_on_node_prio") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async integer(c_int), value, intent(in) :: prio end subroutine fstarpu_data_prefetch_on_node_prio ! int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); subroutine fstarpu_data_idle_prefetch_on_node (dh, node, async) & bind(C,name="starpu_data_idle_prefetch_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async end subroutine fstarpu_data_idle_prefetch_on_node ! int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); subroutine fstarpu_data_idle_prefetch_on_node_prio (dh, node, async, prio) & bind(C,name="starpu_data_idle_prefetch_on_node_prio") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async integer(c_int), value, intent(in) :: prio end subroutine fstarpu_data_idle_prefetch_on_node_prio !unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node); function fstarpu_data_is_on_node(dh, node) & bind(C,name="starpu_data_is_on_node") use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_data_is_on_node type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end function fstarpu_data_is_on_node ! void starpu_data_wont_use(starpu_data_handle_t handle); subroutine fstarpu_data_wont_use (dh) bind(c,name="starpu_data_wont_use") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_wont_use ! unsigned starpu_worker_get_memory_node(unsigned workerid); function fstarpu_worker_get_memory_node(id) bind(C,name="starpu_worker_get_memory_node") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_memory_node integer(c_int), value, intent(in) :: id end function fstarpu_worker_get_memory_node ! unsigned starpu_memory_nodes_get_count(void); function fstarpu_memory_nodes_get_count() bind(C,name="starpu_memory_nodes_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_memory_nodes_get_count end function fstarpu_memory_nodes_get_count ! enum starpu_node_kind starpu_node_get_kind(unsigned node); ! void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask); ! void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag); ! unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle); ! unsigned starpu_data_get_default_sequential_consistency_flag(void); ! void starpu_data_set_default_sequential_consistency_flag(unsigned flag); ! void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested); ! void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl); subroutine fstarpu_data_set_reduction_methods (dh,redux_cl,init_cl) bind(C,name="starpu_data_set_reduction_methods") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: redux_cl type(c_ptr), value, intent(in) :: init_cl end subroutine fstarpu_data_set_reduction_methods ! void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_args, struct starpu_codelet *init_cl, void *init_args) subroutine fstarpu_data_set_reduction_methods_with_args (dh,redux_cl,redux_args,init_cl,init_args) & bind(C,name="starpu_data_set_reduction_methods_with_args") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: redux_cl type(c_ptr), value, intent(in) :: redux_args type(c_ptr), value, intent(in) :: init_cl type(c_ptr), value, intent(in) :: init_args end subroutine fstarpu_data_set_reduction_methods_with_args ! struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle); ! unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node); function fstarpu_data_test_if_allocated_on_node(dh,mem_node) bind(C,name="starpu_data_test_if_allocated_on_node") use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_data_test_if_allocated_on_node type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: mem_node end function fstarpu_data_test_if_allocated_on_node ! void starpu_memchunk_tidy(unsigned memory_node); subroutine fstarpu_memchunk_tidy (mem_node) bind(c,name="starpu_memchunk_tidy") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: mem_node end subroutine fstarpu_memchunk_tidy ! == starpu_task_util.h == ! starpu_data_handle_t *fstarpu_data_handle_array_alloc(int nb); function fstarpu_data_handle_array_alloc (nb) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_handle_array_alloc integer(c_int), value, intent(in) :: nb end function fstarpu_data_handle_array_alloc ! void fstarpu_data_handle_array_free(starpu_data_handle_t *handles); subroutine fstarpu_data_handle_array_free (handles) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: handles end subroutine fstarpu_data_handle_array_free ! void fstarpu_data_handle_array_set(starpu_data_handle_t *handles, int i, starpu_data_handle_t handle); subroutine fstarpu_data_handle_array_set (handles, i, handle) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: handles integer(c_int), value, intent(in) :: i type(c_ptr), value, intent(in) :: handle end subroutine fstarpu_data_handle_array_set ! struct starpu_data_descr *fstarpu_data_descr_array_alloc(int nb); function fstarpu_data_descr_array_alloc (nb) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_descr_array_alloc integer(c_int), value, intent(in) :: nb end function fstarpu_data_descr_array_alloc ! struct starpu_data_descr *fstarpu_data_descr_alloc(void); function fstarpu_data_descr_alloc () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_data_descr_alloc end function fstarpu_data_descr_alloc ! void fstarpu_data_descr_array_free(struct starpu_data_descr *descrs); subroutine fstarpu_data_descr_array_free (descrs) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: descrs end subroutine fstarpu_data_descr_array_free ! void fstarpu_data_descr_free(struct starpu_data_descr *descr); subroutine fstarpu_data_descrg_free (descr) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: descr end subroutine fstarpu_data_descrg_free ! void fstarpu_data_descr_array_set(struct starpu_data_descr *descrs, int i, starpu_data_handle_t handle, intptr_t mode); subroutine fstarpu_data_descr_array_set (descrs, i, handle, mode) bind(C) use iso_c_binding, only: c_ptr, c_int, c_intptr_t type(c_ptr), value, intent(in) :: descrs integer(c_int), value, intent(in) :: i type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t end subroutine fstarpu_data_descr_array_set ! void fstarpu_data_descr_set(struct starpu_data_descr *descr, starpu_data_handle_t handle, intptr_t mode); subroutine fstarpu_data_descr_set (descr, handle, mode) bind(C) use iso_c_binding, only: c_ptr, c_intptr_t type(c_ptr), value, intent(in) :: descr type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t end subroutine fstarpu_data_descr_set subroutine fstarpu_task_insert(arglist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_task_insert subroutine fstarpu_insert_task(arglist) bind(C,name="fstarpu_task_insert") use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_insert_task subroutine fstarpu_unpack_arg(cl_arg,bufferlist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl_arg type(c_ptr), dimension(*), intent(in) :: bufferlist end subroutine fstarpu_unpack_arg ! void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg) subroutine fstarpu_create_sync_task(sync_tag, ndeps, tag_array, callback, callback_arg) & bind(C,name="starpu_create_sync_task") use iso_c_binding, only: c_int, c_long_long, c_ptr, c_funptr integer(c_int), value, intent(in) :: sync_tag integer(c_int), value, intent(in) :: ndeps integer(c_long_long), intent(in) :: tag_array(*) type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: callback_arg end subroutine fstarpu_create_sync_task ! == starpu_sched_ctx.h == ! starpu_sched_ctx_create: see fstarpu_sched_ctx_create function fstarpu_sched_ctx_create(workers_array,nworkers,ctx_name, arglist) bind(C) use iso_c_binding, only: c_int, c_char, c_ptr integer(c_int) :: fstarpu_sched_ctx_create integer(c_int), intent(in) :: workers_array(*) integer(c_int), value, intent(in) :: nworkers character(c_char), intent(in) :: ctx_name type(c_ptr), dimension(*), intent(in) :: arglist end function fstarpu_sched_ctx_create ! unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap); function fstarpu_sched_ctx_create_inside_interval(policy_name, sched_ctx_name, & min_ncpus, max_ncpus, min_ngpus, max_ngpus, allow_overlap) & bind(C,name="starpu_sched_ctx_create_inside_interval") use iso_c_binding, only: c_int, c_char integer(c_int) :: fstarpu_sched_ctx_create_inside_interval character(c_char), intent(in) :: policy_name character(c_char), intent(in) :: sched_ctx_name integer(c_int), value, intent(in) :: min_ncpus integer(c_int), value, intent(in) :: max_ncpus integer(c_int), value, intent(in) :: min_ngpus integer(c_int), value, intent(in) :: max_ngpus integer(c_int), value, intent(in) :: allow_overlap end function fstarpu_sched_ctx_create_inside_interval ! void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args); subroutine fstarpu_sched_ctx_register_close_callback (sched_ctx_id, close_callback, args) & bind(c,name="starpu_sched_ctx_register_close_callback") use iso_c_binding, only: c_ptr, c_funptr, c_int integer(c_int), value, intent(in) :: sched_ctx_id type(c_funptr), value, intent(in) :: close_callback type(c_ptr), value, intent(in) :: args end subroutine fstarpu_sched_ctx_register_close_callback ! void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_add_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_add_workers") use iso_c_binding, only: c_int integer(c_int), intent(in) :: workerids (*) integer(c_int), value, intent(in) :: nworkers integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_add_workers ! void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_remove_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_remove_workers") use iso_c_binding, only: c_int integer(c_int), intent(in) :: workerids (*) integer(c_int), value, intent(in) :: nworkers integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_remove_workers ! starpu_sched_ctx_display_workers: see fstarpu_sched_ctx_display_workers subroutine fstarpu_sched_ctx_display_workers (ctx) bind(C) use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_display_workers ! void starpu_sched_ctx_delete(unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_delete (ctx) bind(C,name="starpu_sched_ctx_delete") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_delete ! void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor); subroutine fstarpu_sched_ctx_set_inheritor (ctx,inheritor) bind(C,name="starpu_sched_ctx_set_inheritor") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx integer(c_int), value, intent(in) :: inheritor end subroutine fstarpu_sched_ctx_set_inheritor ! unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_inheritor (ctx) bind(C,name="starpu_sched_ctx_get_inheritor") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_inheritor integer(c_int), value, intent(in) :: ctx end function fstarpu_sched_ctx_get_inheritor ! unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_hierarchy_level (ctx) bind(C,name="starpu_sched_ctx_get_hierarchy_level") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_hierarchy_level integer(c_int), value, intent(in) :: ctx end function fstarpu_sched_ctx_get_hierarchy_level ! void starpu_sched_ctx_set_context(unsigned *sched_ctx_id); subroutine fstarpu_sched_ctx_set_context (ctx_ptr) bind(C,name="starpu_sched_ctx_set_context") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: ctx_ptr end subroutine fstarpu_sched_ctx_set_context ! unsigned starpu_sched_ctx_get_context(void); function fstarpu_sched_ctx_get_context () bind(C,name="starpu_sched_ctx_get_context") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_context end function fstarpu_sched_ctx_get_context ! void starpu_sched_ctx_stop_task_submission(void); subroutine fstarpu_sched_ctx_stop_task_submission () bind(c,name="starpu_sched_ctx_stop_task_submission") use iso_c_binding end subroutine fstarpu_sched_ctx_stop_task_submission ! void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_finished_submit (sched_ctx_id) bind(c,name="starpu_sched_ctx_finished_submit") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_finished_submit ! unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids); ! unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids); ! unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_nworkers (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_nworkers") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_nworkers integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_nworkers ! unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2); function fstarpu_sched_ctx_get_nshared_workers (sched_ctx_id, sched_ctx_id2) & bind(c,name="starpu_sched_ctx_get_nshared_workers") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_nshared_workers integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: sched_ctx_id2 end function fstarpu_sched_ctx_get_nshared_workers ! unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id); function fstarpu_sched_ctx_contains_worker (workerid, sched_ctx_id) & bind(c,name="starpu_sched_ctx_contains_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_contains_worker integer(c_int), value, intent(in) :: workerid integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_contains_worker ! unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id); function fstarpu_sched_ctx_contains_type_of_worker (arch, sched_ctx_id) & bind(c,name="starpu_sched_ctx_contains_type_of_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_contains_type_of_worker integer(c_int), value, intent(in) :: arch integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_contains_type_of_worker ! unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id); function fstarpu_sched_ctx_worker_get_id (sched_ctx_id) & bind(c,name="starpu_sched_ctx_worker_get_id") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_worker_get_id integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_worker_get_id ! unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task); function fstarpu_sched_ctx_get_ctx_for_task (task) & bind(c,name="starpu_sched_ctx_get_ctx_for_task") use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_sched_ctx_get_ctx_for_task type(c_ptr), value, intent(in) :: task end function fstarpu_sched_ctx_get_ctx_for_task ! unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid); function fstarpu_sched_ctx_overlapping_ctxs_on_worker (workerid) & bind(c,name="starpu_sched_ctx_overlapping_ctxs_on_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_overlapping_ctxs_on_worker integer(c_int), value, intent(in) :: workerid end function fstarpu_sched_ctx_overlapping_ctxs_on_worker ! int starpu_sched_get_min_priority(void); function fstarpu_sched_get_min_priority () & bind(c,name="starpu_sched_get_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_get_min_priority end function fstarpu_sched_get_min_priority ! int starpu_sched_get_max_priority(void); function fstarpu_sched_get_max_priority () & bind(c,name="starpu_sched_get_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_get_max_priority end function fstarpu_sched_get_max_priority ! int starpu_sched_set_min_priority(int min_prio); function fstarpu_sched_set_min_priority (min_prio) & bind(c,name="starpu_sched_set_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_set_min_priority integer(c_int), value, intent(in) :: min_prio end function fstarpu_sched_set_min_priority ! int starpu_sched_set_max_priority(int max_prio); function fstarpu_sched_set_max_priority (max_prio) & bind(c,name="starpu_sched_set_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_set_max_priority integer(c_int), value, intent(in) :: max_prio end function fstarpu_sched_set_max_priority ! int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_min_priority (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_min_priority integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_min_priority ! int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_max_priority (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_max_priority integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_max_priority ! int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio); function fstarpu_sched_ctx_set_min_priority (sched_ctx_id, min_prio) & bind(c,name="starpu_sched_ctx_set_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_set_min_priority integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: min_prio end function fstarpu_sched_ctx_set_min_priority ! int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio); function fstarpu_sched_ctx_set_max_priority (sched_ctx_id, max_prio) & bind(c,name="starpu_sched_ctx_set_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_set_max_priority integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: max_prio end function fstarpu_sched_ctx_set_max_priority ! int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id); function fstarpu_sched_ctx_min_priority_is_set (sched_ctx_id) & bind(c,name="starpu_sched_ctx_min_priority_is_set") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_min_priority_is_set integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_min_priority_is_set ! int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id); function fstarpu_sched_ctx_max_priority_is_set (sched_ctx_id) & bind(c,name="starpu_sched_ctx_max_priority_is_set") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_max_priority_is_set integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_max_priority_is_set ! void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_user_data(sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_user_data") use iso_c_binding, only: c_int, c_ptr integer(c_int), value, intent(in) :: sched_ctx_id type(c_ptr) :: fstarpu_sched_ctx_get_user_data end function fstarpu_sched_ctx_get_user_data ! struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC; ! void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_delete_worker_collection (sched_ctx_id) & bind(c,name="starpu_sched_ctx_delete_worker_collection") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_delete_worker_collection ! struct starpu_worker_collection *starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id); ! void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data); subroutine fstarpu_sched_ctx_set_policy_data (sched_ctx_id, policy_data) & bind(c,name="starpu_sched_ctx_set_policy_data") use iso_c_binding, only: c_int, c_ptr integer(c_int), value, intent(in) :: sched_ctx_id type(c_ptr), value, intent(in) :: policy_data end subroutine fstarpu_sched_ctx_set_policy_data ! void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_policy_data (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_policy_data") use iso_c_binding, only: c_int, c_ptr type(c_ptr) :: fstarpu_sched_ctx_get_policy_data integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_policy_data ! void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id); function fstarpu_sched_ctx_exec_parallel_code (func, param, sched_ctx_id) & bind(c,name="starpu_sched_ctx_exec_parallel_code") use iso_c_binding, only: c_int, c_funptr, c_ptr type(c_ptr) :: fstarpu_sched_ctx_exec_parallel_code type(c_funptr), value, intent(in) :: func type(c_ptr), value, intent(in) :: param integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_exec_parallel_code ! int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_nready_tasks (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_nready_tasks") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_nready_tasks integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_nready_tasks ! double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_nready_flops (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_nready_flops") use iso_c_binding, only: c_int, c_double real(c_double) :: fstarpu_sched_ctx_get_nready_flops integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_nready_flops ! void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid); subroutine fstarpu_sched_ctx_list_task_counters_increment (sched_ctx_id, workerid) & bind(c,name="starpu_sched_ctx_list_task_counters_increment") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: workerid end subroutine fstarpu_sched_ctx_list_task_counters_increment ! void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid); subroutine fstarpu_sched_ctx_list_task_counters_decrement (sched_ctx_id, workerid) & bind(c,name="starpu_sched_ctx_list_task_counters_decrement") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: workerid end subroutine fstarpu_sched_ctx_list_task_counters_decrement ! void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid); subroutine fstarpu_sched_ctx_list_task_counters_reset (sched_ctx_id, workerid) & bind(c,name="starpu_sched_ctx_list_task_counters_reset") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: workerid end subroutine fstarpu_sched_ctx_list_task_counters_reset ! void starpu_sched_ctx_list_task_counters_increment_all(struct starpu_task *task, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_list_task_counters_increment_all (task, sched_ctx_id) & bind(c,name="starpu_sched_ctx_list_task_counters_increment_all") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_list_task_counters_increment_all ! void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_list_task_counters_decrement_all (task, sched_ctx_id) & bind(c,name="starpu_sched_ctx_list_task_counters_decrement_all") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_list_task_counters_decrement_all ! void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_list_task_counters_reset_all (task, sched_ctx_id) & bind(c,name="starpu_sched_ctx_list_task_counters_reset_all") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_list_task_counters_reset_all ! unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id); function fstarpu_sched_ctx_get_priority (worker, sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_priority integer(c_int), value, intent(in) :: worker integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_priority ! void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids); ! void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid); subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid (cpuid) & bind(c,name="starpu_sched_ctx_bind_current_thread_to_cpuid") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: cpuid end subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid ! int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers); function fstarpu_sched_ctx_book_workers_for_task (sched_ctx_id, workerids, nworkers) & bind(c,name="starpu_sched_ctx_book_workers_for_task") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_book_workers_for_task integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), intent(in) :: workerids(*) integer(c_int), value, intent(in) :: nworkers end function fstarpu_sched_ctx_book_workers_for_task ! void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master); subroutine fstarpu_sched_ctx_unbook_workers_for_task (sched_ctx_id, master) & bind(c,name="starpu_sched_ctx_unbook_workers_for_task") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: master end subroutine fstarpu_sched_ctx_unbook_workers_for_task ! unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id); function fstarpu_sched_ctx_worker_is_master_for_child_ctx (workerid, sched_ctx_id) & bind(c,name="starpu_sched_ctx_worker_is_master_for_child_ctx") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_worker_is_master_for_child_ctx integer(c_int), value, intent(in) :: workerid integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_worker_is_master_for_child_ctx ! unsigned starpu_sched_ctx_master_get_context(int masterid); function fstarpu_sched_ctx_master_get_context (masterid) & bind(c,name="starpu_sched_ctx_master_get_context") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_master_get_context integer(c_int), value, intent(in) :: masterid end function fstarpu_sched_ctx_master_get_context ! void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double flops); subroutine fstarpu_sched_ctx_revert_task_counters (sched_ctx_id, flops) & bind(c,name="starpu_sched_ctx_revert_task_counters") use iso_c_binding, only: c_int, c_double integer(c_int), value, intent(in) :: sched_ctx_id real(c_double), value, intent(in) :: flops end subroutine fstarpu_sched_ctx_revert_task_counters ! void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_ctx, unsigned manage_mutex); subroutine fstarpu_sched_ctx_move_task_to_ctx (task, sched_ctx, manage_mutex) & bind(c,name="starpu_sched_ctx_move_task_to_ctx") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx integer(c_int), value, intent(in) :: manage_mutex end subroutine fstarpu_sched_ctx_move_task_to_ctx ! int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_worker_rank (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_worker_rank") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_worker_rank integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_worker_rank ! unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers); ! void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_call_pushed_task_cb (workerid, sched_ctx_id) & bind(c,name="starpu_sched_ctx_call_pushed_task_cb") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: workerid integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_call_pushed_task_cb ! == starpu_fxt.h == ! void starpu_fxt_options_init(struct starpu_fxt_options *options); subroutine fstarpu_fxt_options_init (fxt_options) bind(C,name="starpu_fxt_options_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: fxt_options end subroutine fstarpu_fxt_options_init ! void starpu_fxt_generate_trace(struct starpu_fxt_options *options); subroutine fstarpu_fxt_generate_trace (fxt_options) bind(C,name="starpu_fxt_generate_trace") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: fxt_options end subroutine fstarpu_fxt_generate_trace ! void starpu_fxt_autostart_profiling(int autostart); subroutine fstarpu_fxt_autostart_profiling (autostart) bind(c,name="starpu_fxt_autostart_profiling") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: autostart end subroutine fstarpu_fxt_autostart_profiling ! void starpu_fxt_start_profiling(void); subroutine fstarpu_fxt_start_profiling () bind(c,name="starpu_fxt_start_profiling") use iso_c_binding end subroutine fstarpu_fxt_start_profiling ! void starpu_fxt_stop_profiling(void); subroutine fstarpu_fxt_stop_profiling () bind(c,name="starpu_fxt_stop_profiling") use iso_c_binding end subroutine fstarpu_fxt_stop_profiling ! void starpu_fxt_write_data_trace(char *filename_in); subroutine fstarpu_fxt_write_data_trace (filename) bind(c,name="starpu_fxt_write_data_trace") use iso_c_binding, only: c_char character(c_char), intent(in) :: filename end subroutine fstarpu_fxt_write_data_trace ! void starpu_fxt_trace_user_event(unsigned long code); subroutine fstarpu_trace_user_event (code) bind(c,name="starpu_trace_user_event") use iso_c_binding, only: c_long integer(c_long), value, intent(in) :: code end subroutine fstarpu_trace_user_event ! double starpu_timing_now(void) function fstarpu_timing_now () bind(C,name="starpu_timing_now") use iso_c_binding, only: c_double real(c_double) :: fstarpu_timing_now end function fstarpu_timing_now ! == starpu_cuda.h == ! cudaStream_t starpu_cuda_get_local_stream(void); function fstarpu_cuda_get_local_stream () bind(C,name="starpu_cuda_get_local_stream") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_cuda_get_local_stream end function fstarpu_cuda_get_local_stream ! == starpu_stdlib.h == ! int starpu_malloc(void **A, size_t dim); function fstarpu_malloc (ptr, len) bind(C,name="starpu_malloc") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), intent(out) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_malloc end function fstarpu_malloc ! int starpu_free_noflag(void *A, size_t dim); function fstarpu_free_noflag (ptr, len) bind(C,name="starpu_free_noflag") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), value, intent(in) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_free_noflag end function fstarpu_free_noflag ! int starpu_memory_pin(void *addr, size_t size); function fstarpu_memory_pin (ptr, len) bind(C,name="starpu_memory_pin") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), value, intent(in) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_memory_pin end function fstarpu_memory_pin ! int starpu_memory_unpin(void *addr, size_t size); function fstarpu_memory_unpin (ptr, len) bind(C,name="starpu_memory_unpin") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), value, intent(in) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_memory_unpin end function fstarpu_memory_unpin ! int starpu_sleep(float nb_sec); subroutine fstarpu_sleep (nb_sec) bind(C,name="starpu_sleep") use iso_c_binding, only: c_float real(c_float), value, intent(in) :: nb_sec end subroutine fstarpu_sleep ! int starpu_usleep(float nb_sec); subroutine fstarpu_usleep (nb_sec) bind(C,name="starpu_usleep") use iso_c_binding, only: c_float real(c_float), value, intent(in) :: nb_sec end subroutine fstarpu_usleep ! void starpu_cublas_init(void); subroutine fstarpu_cublas_init () bind(C,name="starpu_cublas_init") end subroutine fstarpu_cublas_init ! void starpu_cublas_shutdown(void); subroutine fstarpu_cublas_shutdown () bind(C,name="starpu_cublas_shutdown") end subroutine fstarpu_cublas_shutdown end interface contains function or_cptrs(op1,op2) type(c_ptr) :: or_cptrs type(c_ptr),intent(in) :: op1,op2 integer(c_intptr_t) :: i_op1,i_op2 i_op1 = transfer(op1,0_c_intptr_t) i_op2 = transfer(op2,0_c_intptr_t) or_cptrs = transfer(ior(i_op1,i_op2), C_NULL_PTR) end function function ip_to_p(i) bind(C) use iso_c_binding, only: c_ptr,c_intptr_t,C_NULL_PTR type(c_ptr) :: ip_to_p integer(c_intptr_t), value, intent(in) :: i ip_to_p = transfer(i,C_NULL_PTR) end function ip_to_p function p_to_ip(p) bind(C) use iso_c_binding, only: c_ptr,c_intptr_t integer(c_intptr_t) :: p_to_ip type(c_ptr), value, intent(in) :: p p_to_ip = transfer(p,0_c_intptr_t) end function p_to_ip function sz_to_p(sz) bind(C) use iso_c_binding, only: c_ptr,c_size_t,c_intptr_t type(c_ptr) :: sz_to_p integer(c_size_t), value, intent(in) :: sz sz_to_p = ip_to_p(int(sz,kind=c_intptr_t)) end function sz_to_p function fstarpu_init (conf) bind(C) use iso_c_binding integer(c_int) :: fstarpu_init type(c_ptr), value, intent(in) :: conf real(c_double) :: FSTARPU_SZ_C_DOUBLE_dummy real(c_float) :: FSTARPU_SZ_C_FLOAT_dummy character(c_char) :: FSTARPU_SZ_C_CHAR_dummy integer(c_int) :: FSTARPU_SZ_C_INT_dummy integer(c_intptr_t) :: FSTARPU_SZ_C_INTPTR_T_dummy type(c_ptr) :: FSTARPU_SZ_C_PTR_dummy integer(c_size_t) :: FSTARPU_SZ_C_SIZE_T_dummy character :: FSTARPU_SZ_CHARACTER_dummy integer :: FSTARPU_SZ_INTEGER_dummy integer(4) :: FSTARPU_SZ_INT4_dummy integer(8) :: FSTARPU_SZ_INT8_dummy real :: FSTARPU_SZ_REAL_dummy real(4) :: FSTARPU_SZ_REAL4_dummy real(8) :: FSTARPU_SZ_REAL8_dummy double precision :: FSTARPU_SZ_DOUBLE_PRECISION_dummy complex :: FSTARPU_SZ_COMPLEX_dummy complex(4) :: FSTARPU_SZ_COMPLEX4_dummy complex(8) :: FSTARPU_SZ_COMPLEX8_dummy ! Note: Referencing global C constants from Fortran has ! been found unreliable on some architectures, notably ! on Darwin. The get_integer/get_pointer_constant ! scheme is a workaround to that issue. interface ! These functions are not exported to the end user function fstarpu_get_constant(s) bind(C) use iso_c_binding, only: c_ptr,c_char type(c_ptr) :: fstarpu_get_constant ! C function returns an intptr_t character(kind=c_char) :: s end function fstarpu_get_constant function fstarpu_init_internal (conf) bind(C,name="starpu_init") use iso_c_binding, only: c_ptr,c_int integer(c_int) :: fstarpu_init_internal type(c_ptr), value :: conf end function fstarpu_init_internal end interface ! Initialize Fortran constants from C peers FSTARPU_R = fstarpu_get_constant(C_CHAR_"FSTARPU_R"//C_NULL_CHAR) FSTARPU_W = fstarpu_get_constant(C_CHAR_"FSTARPU_W"//C_NULL_CHAR) FSTARPU_RW = fstarpu_get_constant(C_CHAR_"FSTARPU_RW"//C_NULL_CHAR) FSTARPU_SCRATCH = fstarpu_get_constant(C_CHAR_"FSTARPU_SCRATCH"//C_NULL_CHAR) FSTARPU_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_REDUX"//C_NULL_CHAR) FSTARPU_MPI_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_MPI_REDUX"//C_NULL_CHAR) FSTARPU_COMMUTE = fstarpu_get_constant(C_CHAR_"FSTARPU_COMMUTE"//C_NULL_CHAR) FSTARPU_SSEND = fstarpu_get_constant(C_CHAR_"FSTARPU_SSEND"//C_NULL_CHAR) FSTARPU_LOCALITY = fstarpu_get_constant(C_CHAR_"FSTARPU_LOCALITY"//C_NULL_CHAR) FSTARPU_DATA_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_ARRAY"//C_NULL_CHAR) FSTARPU_DATA_MODE_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_MODE_ARRAY"//C_NULL_CHAR) FSTARPU_CL_ARGS = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS"//C_NULL_CHAR) FSTARPU_CL_ARGS_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS_NFREE"//C_NULL_CHAR) FSTARPU_TASK_DEPS_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_DEPS_ARRAY"//C_NULL_CHAR) FSTARPU_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK"//C_NULL_CHAR) FSTARPU_CALLBACK_WITH_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG"//C_NULL_CHAR) FSTARPU_CALLBACK_WITH_ARG_NFREE = & fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG_NFREE"//C_NULL_CHAR) FSTARPU_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG"//C_NULL_CHAR) FSTARPU_CALLBACK_ARG_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG_NFREE"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE = & fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_POP = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_POP_ARG = & fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE = & fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE"//C_NULL_CHAR) FSTARPU_PRIORITY = fstarpu_get_constant(C_CHAR_"FSTARPU_PRIORITY"//C_NULL_CHAR) FSTARPU_EXECUTE_ON_NODE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_NODE"//C_NULL_CHAR) FSTARPU_EXECUTE_ON_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_DATA"//C_NULL_CHAR) FSTARPU_EXECUTE_ON_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_WORKER"//C_NULL_CHAR) FSTARPU_WORKER_ORDER = fstarpu_get_constant(C_CHAR_"FSTARPU_WORKER_ORDER"//C_NULL_CHAR) FSTARPU_EXECUTE_WHERE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_WHERE"//C_NULL_CHAR) FSTARPU_HYPERVISOR_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_HYPERVISOR_TAG"//C_NULL_CHAR) FSTARPU_POSSIBLY_PARALLEL = fstarpu_get_constant(C_CHAR_"FSTARPU_POSSIBLY_PARALLEL"//C_NULL_CHAR) FSTARPU_FLOPS = fstarpu_get_constant(C_CHAR_"FSTARPU_FLOPS"//C_NULL_CHAR) FSTARPU_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG"//C_NULL_CHAR) FSTARPU_TAG_ONLY = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG_ONLY"//C_NULL_CHAR) FSTARPU_NAME = fstarpu_get_constant(C_CHAR_"FSTARPU_NAME"//C_NULL_CHAR) FSTARPU_NODE_SELECTION_POLICY = fstarpu_get_constant(C_CHAR_"FSTARPU_NODE_SELECTION_POLICY"//C_NULL_CHAR) FSTARPU_TASK_SCHED_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_SCHED_DATA"//C_NULL_CHAR) FSTARPU_VALUE = fstarpu_get_constant(C_CHAR_"FSTARPU_VALUE"//C_NULL_CHAR) FSTARPU_SCHED_CTX = fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX"//C_NULL_CHAR) FSTARPU_CPU_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CPU_WORKER"//C_NULL_CHAR) FSTARPU_CUDA_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_WORKER"//C_NULL_CHAR) FSTARPU_OPENCL_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_WORKER"//C_NULL_CHAR) FSTARPU_ANY_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_ANY_WORKER"//C_NULL_CHAR) FSTARPU_NMAXBUFS = int(p_to_ip(fstarpu_get_constant(C_CHAR_"FSTARPU_NMAXBUFS"//C_NULL_CHAR)),c_int) FSTARPU_SCHED_CTX_POLICY_NAME = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_NAME"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_STRUCT = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_STRUCT"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_MIN_PRIO = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MIN_PRIO"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_MAX_PRIO = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MAX_PRIO"//C_NULL_CHAR) FSTARPU_SCHED_CTX_HIERARCHY_LEVEL = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_HIERARCHY_LEVEL"//C_NULL_CHAR) FSTARPU_SCHED_CTX_NESTED = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_NESTED"//C_NULL_CHAR) FSTARPU_SCHED_CTX_AWAKE_WORKERS = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_AWAKE_WORKERS"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_INIT = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_INIT"//C_NULL_CHAR) FSTARPU_SCHED_CTX_USER_DATA = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_USER_DATA"//C_NULL_CHAR) FSTARPU_NOWHERE = & fstarpu_get_constant(C_CHAR_"FSTARPU_NOWHERE"//C_NULL_CHAR) FSTARPU_CPU = & fstarpu_get_constant(C_CHAR_"FSTARPU_CPU"//C_NULL_CHAR) FSTARPU_CUDA = & fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA"//C_NULL_CHAR) FSTARPU_OPENCL = & fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL"//C_NULL_CHAR) FSTARPU_CODELET_SIMGRID_EXECUTE = & fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE"//C_NULL_CHAR) FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT = & fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT"//C_NULL_CHAR) FSTARPU_CUDA_ASYNC = & fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_ASYNC"//C_NULL_CHAR) FSTARPU_OPENCL_ASYNC = & fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_ASYNC"//C_NULL_CHAR) !FSTARPU_PER_WORKER = & ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_WORKER"//C_NULL_CHAR) !FSTARPU_PER_ARCH = & ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_ARCH"//C_NULL_CHAR) !FSTARPU_PER_COMMON = & ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_COMMON"//C_NULL_CHAR) FSTARPU_HISTORY_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_HISTORY_BASED"//C_NULL_CHAR) FSTARPU_REGRESSION_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_REGRESSION_BASED"//C_NULL_CHAR) FSTARPU_NL_REGRESSION_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_NL_REGRESSION_BASED"//C_NULL_CHAR) FSTARPU_MULTIPLE_REGRESSION_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_MULTIPLE_REGRESSION_BASED"//C_NULL_CHAR) FSTARPU_SEQ = & fstarpu_get_constant(C_CHAR_"FSTARPU_SEQ"//C_NULL_CHAR) FSTARPU_SPMD = & fstarpu_get_constant(C_CHAR_"FSTARPU_SPMD"//C_NULL_CHAR) FSTARPU_FORKJOIN = & fstarpu_get_constant(C_CHAR_"FSTARPU_FORKJOIN"//C_NULL_CHAR) ! Initialize size constants as 'c_ptr' FSTARPU_SZ_C_DOUBLE = sz_to_p(c_sizeof(FSTARPU_SZ_C_DOUBLE_dummy)) FSTARPU_SZ_C_FLOAT = sz_to_p(c_sizeof(FSTARPU_SZ_C_FLOAT_dummy)) FSTARPU_SZ_C_CHAR = sz_to_p(c_sizeof(FSTARPU_SZ_C_CHAR_dummy)) FSTARPU_SZ_C_INT = sz_to_p(c_sizeof(FSTARPU_SZ_C_INT_dummy)) FSTARPU_SZ_C_INTPTR_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_INTPTR_T_dummy)) FSTARPU_SZ_C_PTR = sz_to_p(c_sizeof(FSTARPU_SZ_C_PTR_dummy)) FSTARPU_SZ_C_SIZE_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_SIZE_T_dummy)) FSTARPU_SZ_CHARACTER = sz_to_p(c_sizeof(FSTARPU_SZ_CHARACTER_dummy)) FSTARPU_SZ_INTEGER = sz_to_p(c_sizeof(FSTARPU_SZ_INTEGER_dummy)) FSTARPU_SZ_INT4 = sz_to_p(c_sizeof(FSTARPU_SZ_INT4_dummy)) FSTARPU_SZ_INT8 = sz_to_p(c_sizeof(FSTARPU_SZ_INT8_dummy)) FSTARPU_SZ_REAL = sz_to_p(c_sizeof(FSTARPU_SZ_REAL_dummy)) FSTARPU_SZ_REAL4 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL4_dummy)) FSTARPU_SZ_REAL8 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL8_dummy)) FSTARPU_SZ_DOUBLE_PRECISION = sz_to_p(c_sizeof(FSTARPU_SZ_DOUBLE_PRECISION_dummy)) FSTARPU_SZ_COMPLEX = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX_dummy)) FSTARPU_SZ_COMPLEX4 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX4_dummy)) FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) FSTARPU_DEFAULT_PRIO = int(p_to_ip(& fstarpu_get_constant(C_CHAR_"FSTARPU_DEFAULT_PRIO"//C_NULL_CHAR)),c_int) ! Initialize StarPU if (c_associated(conf)) then fstarpu_init = fstarpu_init_internal(conf) else fstarpu_init = fstarpu_init_internal(C_NULL_PTR) end if end function fstarpu_init function fstarpu_csizet_to_cptr(i) bind(C) use iso_c_binding type(c_ptr) :: fstarpu_csizet_to_cptr integer(c_size_t) :: i fstarpu_csizet_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) end function fstarpu_csizet_to_cptr function fstarpu_int_to_cptr(i) bind(C) use iso_c_binding type(c_ptr) :: fstarpu_int_to_cptr integer(c_int) :: i fstarpu_int_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) end function fstarpu_int_to_cptr function fstarpu_long_to_cptr(i) bind(C) use iso_c_binding type(c_ptr) :: fstarpu_long_to_cptr integer(c_long) :: i fstarpu_long_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) end function fstarpu_long_to_cptr ! Note: do not add binding declarations here in 'CONTAINS' ! section, because the compiler generates empty functions for ! them. ! Instead, put binding declarations in the 'INTERFACE' section ! above. end module fstarpu_mod starpu-1.4.9+dfsg/include/omp.h000066400000000000000000000114361507764646700164040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_OPENMP_OMP_H__ #define __STARPU_OPENMP_OMP_H__ #if defined STARPU_OPENMP typedef starpu_omp_lock_t omp_lock_t; typedef starpu_omp_nest_lock_t omp_nest_lock_t; enum omp_sched_value { omp_sched_undefined = 0, omp_sched_static = 1, omp_sched_dynamic = 2, omp_sched_guided = 3, omp_sched_auto = 4, omp_sched_runtime = 5 }; enum omp_proc_bind_value { omp_proc_bind_undefined = -1, omp_proc_bind_false = 0, omp_proc_bind_true = 1, omp_proc_bind_master = 2, omp_proc_bind_close = 3, omp_proc_bind_spread = 4 }; #ifdef __cplusplus extern "C" { #define __STARPU_OMP_NOTHROW throw() #else #define __STARPU_OMP_NOTHROW __attribute__((__nothrow__)) #endif extern void omp_set_num_threads(int threads) __STARPU_OMP_NOTHROW; extern int omp_get_num_threads() __STARPU_OMP_NOTHROW; extern int omp_get_thread_num() __STARPU_OMP_NOTHROW; extern int omp_get_max_threads() __STARPU_OMP_NOTHROW; extern int omp_get_num_procs(void) __STARPU_OMP_NOTHROW; extern int omp_in_parallel(void) __STARPU_OMP_NOTHROW; extern void omp_set_dynamic(int dynamic_threads) __STARPU_OMP_NOTHROW; extern int omp_get_dynamic(void) __STARPU_OMP_NOTHROW; extern void omp_set_nested(int nested) __STARPU_OMP_NOTHROW; extern int omp_get_nested(void) __STARPU_OMP_NOTHROW; extern int omp_get_cancellation(void) __STARPU_OMP_NOTHROW; extern void omp_set_schedule(enum omp_sched_value kind, int modifier) __STARPU_OMP_NOTHROW; extern void omp_get_schedule(enum omp_sched_value *kind, int *modifier) __STARPU_OMP_NOTHROW; extern int omp_get_thread_limit(void) __STARPU_OMP_NOTHROW; extern void omp_set_max_active_levels(int max_levels) __STARPU_OMP_NOTHROW; extern int omp_get_max_active_levels(void) __STARPU_OMP_NOTHROW; extern int omp_get_level(void) __STARPU_OMP_NOTHROW; extern int omp_get_ancestor_thread_num(int level) __STARPU_OMP_NOTHROW; extern int omp_get_team_size(int level) __STARPU_OMP_NOTHROW; extern int omp_get_active_level(void) __STARPU_OMP_NOTHROW; extern int omp_in_final(void) __STARPU_OMP_NOTHROW; extern enum omp_proc_bind_value omp_get_proc_bind(void) __STARPU_OMP_NOTHROW; extern int omp_get_num_places(void) __STARPU_OMP_NOTHROW; extern int omp_get_place_num_procs(int place_num) __STARPU_OMP_NOTHROW; extern void omp_get_place_proc_ids(int place_num, int *ids) __STARPU_OMP_NOTHROW; extern int omp_get_place_num(void) __STARPU_OMP_NOTHROW; extern int omp_get_partition_num_places(void) __STARPU_OMP_NOTHROW; extern void omp_get_partition_place_nums(int *place_nums) __STARPU_OMP_NOTHROW; extern void omp_set_default_device(int device_num) __STARPU_OMP_NOTHROW; extern int omp_get_default_device(void) __STARPU_OMP_NOTHROW; extern int omp_get_num_devices(void) __STARPU_OMP_NOTHROW; extern int omp_get_num_teams(void) __STARPU_OMP_NOTHROW; extern int omp_get_team_num(void) __STARPU_OMP_NOTHROW; extern int omp_is_initial_device(void) __STARPU_OMP_NOTHROW; extern int omp_get_initial_device(void) __STARPU_OMP_NOTHROW; extern int omp_get_max_task_priority(void) __STARPU_OMP_NOTHROW; extern void omp_init_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; extern void omp_destroy_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; extern void omp_set_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; extern void omp_unset_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; extern int omp_test_lock(omp_lock_t *lock) __STARPU_OMP_NOTHROW; extern void omp_init_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; extern void omp_destroy_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; extern void omp_set_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; extern void omp_unset_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; extern int omp_test_nest_lock(omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; extern void omp_atomic_fallback_inline_begin(void) __STARPU_OMP_NOTHROW; extern void omp_atomic_fallback_inline_end(void) __STARPU_OMP_NOTHROW; extern double omp_get_wtime(void) __STARPU_OMP_NOTHROW; extern double omp_get_wtick(void) __STARPU_OMP_NOTHROW; extern void *omp_get_local_cuda_stream(void) __STARPU_OMP_NOTHROW; #ifdef __cplusplus } #endif #endif /* STARPU_USE_OPENMP && !STARPU_DONT_INCLUDE_OPENMP_HEADERS */ #endif /* __STARPU_OPENMP_OMP_H__ */ starpu-1.4.9+dfsg/include/pthread_win32/000077500000000000000000000000001507764646700201045ustar00rootroot00000000000000starpu-1.4.9+dfsg/include/pthread_win32/pthread.h000066400000000000000000000333271507764646700217140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This is a minimal pthread implementation based on windows functions. * It is *not* intended to be complete - just complete enough to get * StarPU running. */ #ifndef __STARPU_PTHREAD_H__ #define __STARPU_PTHREAD_H__ /* TODO: * pthread_rwlock_* * pthread_spinlock_* */ #include #include #include #ifndef STARPU_CONFIGURE #include #endif #ifdef STARPU_HAVE_UNISTD_H #include #endif #include #include #include #ifdef __CYGWIN32__ #include #define unixErrno() cygwin_internal(CW_GET_ERRNO_FROM_WINERROR, (GetLastError()) #else #define unixErrno() EIO #endif #if 0 #define setSystemErrno() \ do { \ fprintf(stderr, "%s:%d: win %d\n", __FILE__, __LINE__, GetLastError()); \ errno = unixErrno(); \ } \ while (0) #define winPthreadAssertWindows(expr) \ do { \ if (!(expr)) \ { \ fprintf(stderr, "%s:%d: %d\n", __FILE__, __LINE__, unixErrno()); \ return unixErrno(); \ } \ } \ while (0) #define winPthreadAssertPthread(expr) \ do { \ int ret = (expr); \ if (ret) \ { \ fprintf(stderr, "%s:%d: %d\n", __FILE__, __LINE__, ret); \ return ret; \ } \ } \ while (0) #define winPthreadAssert(expr) \ do { \ if (!(expr)) \ { \ fprintf(stderr, "%s:%d: %d\n", __FILE__, __LINE__, errno); \ return EIO; \ } \ } \ while (0) #else #define setSystemErrno() errno = unixErrno() #define winPthreadAssertWindows(expr) \ do { \ if (!(expr)) { return unixErrno(); } \ } \ while (0) #define winPthreadAssertPthread(expr) \ do { \ int ret = (expr); \ if (ret) return ret; \ } \ while (0) #define winPthreadAssert(expr) \ do { \ if (!(expr)) return EIO; \ } \ while (0) #endif #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ /*********** * threads * ***********/ typedef DWORD pthread_attr_t; typedef HANDLE pthread_t; static __inline pthread_t pthread_self(void) { return GetCurrentThread(); } static __inline int pthread_equal(pthread_t t1, pthread_t t2) { return t1 == t2; } static __inline int pthread_attr_init(pthread_attr_t *attr) { *attr = 0; return 0; } #define PTHREAD_CREATE_DETACHED 1 static __inline int pthread_attr_setdetachstate(pthread_attr_t *attr, int yes) { (void)attr; (void)yes; /* not supported, ignore */ return 0; } static __inline int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize) { (void)attr; (void)stacksize; /* not supported, ignore */ return 0; } static __inline int pthread_attr_destroy(pthread_attr_t *attr) { (void)attr; return 0; } /* "real" cleanup handling not yet implemented */ typedef struct { void (*routine)(void *); void *arg; } __pthread_cleanup_handler; void pthread_cleanup_push(void (*routine)(void *), void *arg); #define pthread_cleanup_push(routine, arg) \ do { \ __pthread_cleanup_handler __cleanup_handler = {routine, arg}; void pthread_cleanup_pop(int execute); #define pthread_cleanup_pop(execute) \ if (execute) __cleanup_handler.routine(__cleanup_handler.arg); \ } \ while (0) \ ; static __inline int pthread_create( pthread_t *thread, const pthread_attr_t *attr, void *(*fun)(void *), void *arg) { if (attr && *attr) return EINVAL; winPthreadAssertWindows(*thread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)fun, arg, 0, NULL)); return 0; } static __inline int pthread_setcancelstate(int state, int *oldstate) { (void)state; (void)oldstate; /* not yet implemented */ return 0; } static __inline int pthread_cancel(pthread_t thread) { /* This is quite harsh */ winPthreadAssertWindows(TerminateThread(thread, 0)); return 0; } static __inline void pthread_exit(void *res) { ExitThread((DWORD)(DWORD_PTR)res); } static __inline int pthread_join(pthread_t thread, void **res) { again: switch (WaitForSingleObject(thread, INFINITE)) { default: case WAIT_FAILED: return unixErrno(); case WAIT_ABANDONED: case WAIT_OBJECT_0: break; case WAIT_TIMEOUT: goto again; } if (res) { DWORD _res; if (GetExitCodeThread(thread, &_res)) *res = (void *)(DWORD_PTR)_res; } return 0; } /*********** * mutexes * ***********/ #define PTHREAD_MUTEX_INITIALIZER NULL typedef HANDLE pthread_mutex_t; #define PTHREAD_MUTEX_RECURSIVE 1 #define PTHREAD_MUTEX_ERRORCHECK 2 typedef int pthread_mutexattr_t; static __inline int pthread_mutexattr_init(pthread_mutexattr_t *attr) { *attr = PTHREAD_MUTEX_ERRORCHECK; return 0; } static __inline int pthread_mutexattr_destroy(pthread_mutexattr_t *attr) { *attr = -1; return 0; } static __inline int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type) { if (type != PTHREAD_MUTEX_RECURSIVE && type != PTHREAD_MUTEX_ERRORCHECK) return EINVAL; *attr = type; return 0; } static __inline int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr) { /* TODO: we could use CreateMutex and ReleaseMutex to support recursivity */ if (attr && *attr != PTHREAD_MUTEX_ERRORCHECK) return EINVAL; winPthreadAssertWindows(*mutex = CreateSemaphore(NULL, 1, 1, NULL)); return 0; } static __inline int pthread_mutex_unlock(pthread_mutex_t *mutex) { winPthreadAssertWindows(ReleaseSemaphore(*mutex, 1, NULL)); return 0; } static __inline int pthread_mutex_lock(pthread_mutex_t *mutex); static __inline int __pthread_mutex_alloc_concurrently(pthread_mutex_t *mutex) { HANDLE mutex_init_mutex; /* Get access to one global named mutex to serialize mutex initialization */ winPthreadAssertWindows((mutex_init_mutex = CreateSemaphore(NULL, 1, 1, "StarPU mutex init"))); winPthreadAssertPthread(pthread_mutex_lock(&mutex_init_mutex)); /* Now we are the one that can initialize it */ if (!*mutex) winPthreadAssertPthread(pthread_mutex_init(mutex, NULL)); winPthreadAssertPthread(pthread_mutex_unlock(&mutex_init_mutex)); winPthreadAssertWindows(CloseHandle(mutex_init_mutex)); return 0; } static __inline int pthread_mutex_lock(pthread_mutex_t *mutex) { if (!*mutex) __pthread_mutex_alloc_concurrently(mutex); again: switch (WaitForSingleObject(*mutex, INFINITE)) { default: case WAIT_FAILED: return unixErrno(); case WAIT_ABANDONED: case WAIT_OBJECT_0: return 0; case WAIT_TIMEOUT: goto again; } } static __inline int pthread_mutex_trylock(pthread_mutex_t *mutex) { if (!*mutex) __pthread_mutex_alloc_concurrently(mutex); switch (WaitForSingleObject(*mutex, 0)) { default: case WAIT_FAILED: return unixErrno(); case WAIT_ABANDONED: case WAIT_OBJECT_0: return 0; case WAIT_TIMEOUT: return EBUSY; } } static __inline int pthread_mutex_destroy(pthread_mutex_t *mutex) { winPthreadAssertWindows(CloseHandle(*mutex)); *mutex = INVALID_HANDLE_VALUE; return 0; } /******************************************** * rwlock * * VERY LAZY, don't even look at it please! * * Should be fine unoptimized for now. * * TODO: FIXME, using conds for instance? * ********************************************/ #define PTHREAD_RWLOCK_INITIALIZER NULL typedef pthread_mutex_t pthread_rwlock_t; typedef int pthread_rwlockattr_t; #define pthread_rwlock_init(lock, attr) pthread_mutex_init(lock, NULL) #define pthread_rwlock_wrlock(lock) pthread_mutex_lock(lock) #define pthread_rwlock_trywrlock(lock) pthread_mutex_trylock(lock) #define pthread_rwlock_rdlock(lock) pthread_mutex_lock(lock) #define pthread_rwlock_tryrdlock(lock) pthread_mutex_trylock(lock) #define pthread_rwlock_unlock(lock) pthread_mutex_unlock(lock) #define pthread_rwlock_destroy(lock) pthread_mutex_destroy(lock) /************** * conditions * **************/ typedef struct { HANDLE sem; volatile unsigned nbwait; } pthread_cond_t; #define PTHREAD_COND_INITIALIZER \ { \ NULL, 0 \ } #if !defined(STARPU_HAVE_STRUCT_TIMESPEC) || defined(_MSC_VER) #ifndef STARPU_TIMESPEC_DEFINED #define STARPU_TIMESPEC_DEFINED 1 struct timespec { time_t tv_sec; /* Seconds */ long tv_nsec; /* Nanoseconds */ }; #endif /* STARPU_TIMESPEC_DEFINED */ #endif /* STARPU_HAVE_STRUCT_TIMESPEC */ typedef unsigned pthread_condattr_t; static __inline int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) { if (attr) return EINVAL; winPthreadAssertWindows(cond->sem = CreateSemaphore(NULL, 0, MAXLONG, NULL)); cond->nbwait = 0; return 0; } static __inline int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *time) { if (!cond->sem) winPthreadAssertPthread(pthread_cond_init(cond, NULL)); cond->nbwait++; winPthreadAssertPthread(pthread_mutex_unlock(mutex)); again: switch (WaitForSingleObject(cond->sem, time->tv_sec * 1000 + time->tv_nsec / 1000)) { default: case WAIT_FAILED: { int error = unixErrno(); winPthreadAssertPthread(pthread_mutex_lock(mutex)); return error; } case WAIT_TIMEOUT: goto again; case WAIT_ABANDONED: case WAIT_OBJECT_0: break; } winPthreadAssertPthread(pthread_mutex_lock(mutex)); cond->nbwait--; return 0; } static __inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { if (!cond->sem) winPthreadAssertPthread(pthread_cond_init(cond, NULL)); cond->nbwait++; winPthreadAssertPthread(pthread_mutex_unlock(mutex)); again: switch (WaitForSingleObject(cond->sem, INFINITE)) { case WAIT_FAILED: { int error; error = unixErrno(); winPthreadAssertPthread(pthread_mutex_lock(mutex)); return error; } case WAIT_TIMEOUT: goto again; case WAIT_ABANDONED: case WAIT_OBJECT_0: break; } winPthreadAssertPthread(pthread_mutex_lock(mutex)); cond->nbwait--; return 0; } static __inline int pthread_cond_signal(pthread_cond_t *cond) { if (!cond->sem) winPthreadAssertPthread(pthread_cond_init(cond, NULL)); if (cond->nbwait) ReleaseSemaphore(cond->sem, 1, NULL); return 0; } static __inline int pthread_cond_broadcast(pthread_cond_t *cond) { if (!cond->sem) winPthreadAssertPthread(pthread_cond_init(cond, NULL)); ReleaseSemaphore(cond->sem, cond->nbwait, NULL); return 0; } static __inline int pthread_cond_destroy(pthread_cond_t *cond) { if (cond->sem) { winPthreadAssertWindows(CloseHandle(cond->sem)); cond->sem = NULL; } return 0; } /******* * TLS * *******/ typedef DWORD pthread_key_t; #define PTHREAD_ONCE_INIT \ { \ PTHREAD_MUTEX_INITIALIZER, 0 \ } typedef struct { pthread_mutex_t mutex; unsigned done; } pthread_once_t; static __inline int pthread_once(pthread_once_t *once, void (*oncefun)(void)) { winPthreadAssertPthread(pthread_mutex_lock(&once->mutex)); if (!once->done) { oncefun(); once->done = 1; } winPthreadAssertPthread(pthread_mutex_unlock(&once->mutex)); return 0; } static __inline int pthread_key_create(pthread_key_t *key, void (*freefun)(void *)) { (void)freefun; pthread_key_t res; winPthreadAssertWindows((res = TlsAlloc()) != 0xFFFFFFFF); *key = res; return 0; } static __inline int pthread_key_delete(pthread_key_t key) { winPthreadAssertWindows(TlsFree(key)); return 0; } static __inline void *pthread_getspecific(pthread_key_t key) { return TlsGetValue(key); } static __inline int pthread_setspecific(pthread_key_t key, const void *data) { winPthreadAssertWindows(TlsSetValue(key, (LPVOID)data)); return 0; } #ifdef __cplusplus } #endif /* __cplusplus */ #endif /* __STARPU_PTHREAD_H__ */ starpu-1.4.9+dfsg/include/pthread_win32/semaphore.h000066400000000000000000000034461507764646700222470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This is a minimal pthread implementation based on windows functions. * It is *not* intended to be complete - just complete enough to get * StarPU running. */ #ifndef __STARPU_SEMAPHORE_H__ #define __STARPU_SEMAPHORE_H__ #include "pthread.h" /************** * semaphores * **************/ typedef HANDLE sem_t; static __inline int sem_init(sem_t *sem, int pshared, unsigned int value) { (void)pshared; winPthreadAssertWindows(*sem = CreateSemaphore(NULL, value, MAXLONG, NULL)); return 0; } static __inline int do_sem_wait(sem_t *sem, DWORD timeout) { switch (WaitForSingleObject(*sem, timeout)) { default: case WAIT_FAILED: setSystemErrno(); return -1; case WAIT_TIMEOUT: errno = EAGAIN; return -1; case WAIT_ABANDONED: case WAIT_OBJECT_0: return 0; } } #define sem_wait(sem) do_sem_wait(sem, INFINITE) #define sem_trywait(sem) do_sem_wait(sem, 0) static __inline int sem_post(sem_t *sem) { winPthreadAssertWindows(ReleaseSemaphore(*sem, 1, NULL)); return 0; } static __inline int sem_destroy(sem_t *sem) { winPthreadAssertWindows(CloseHandle(*sem)); return 0; } #endif /* __STARPU_SEMAPHORE_H__ */ starpu-1.4.9+dfsg/include/schedulers/000077500000000000000000000000001507764646700175745ustar00rootroot00000000000000starpu-1.4.9+dfsg/include/schedulers/starpu_heteroprio.h000066400000000000000000000112571507764646700235310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_SCHEDULER_HETEROPRIO_H__ #define __STARPU_SCHEDULER_HETEROPRIO_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_HeteroPrio Heteroprio Scheduler @brief This is the interface for the heteroprio scheduler @{ */ #define STARPU_HETEROPRIO_MAX_PREFETCH 2 #if STARPU_HETEROPRIO_MAX_PREFETCH <= 0 #error STARPU_HETEROPRIO_MAX_PREFETCH == 1 means no prefetch so STARPU_HETEROPRIO_MAX_PREFETCH must >= 1 #endif #define STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT 28 /** todo */ enum starpu_autoheteroprio_priority_ordering_policy { STARPU_HETEROPRIO_NOD_TIME_COMBINATION, // 0 STARPU_HETEROPRIO_BEST_NODS_SCORE, STARPU_HETEROPRIO_BEST_NODS, STARPU_HETEROPRIO_URT_PURE, STARPU_HETEROPRIO_URT, STARPU_HETEROPRIO_URT_2, // 5 STARPU_HETEROPRIO_URT_DOT_DIFF_PURE, STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2, STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE, STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2, STARPU_HETEROPRIO_URT_DOT_DIFF_2, // 10 STARPU_HETEROPRIO_URT_DOT_DIFF_3, STARPU_HETEROPRIO_URT_DOT_DIFF_4, STARPU_HETEROPRIO_URT_DOT_DIFF_5, STARPU_HETEROPRIO_URT_DOT_DIFF_6, STARPU_HETEROPRIO_URT_DOT_DIFF_7, // 15 STARPU_HETEROPRIO_URT_DOT_DIFF_8, STARPU_HETEROPRIO_URT_DOT_DIFF_9, STARPU_HETEROPRIO_URT_DOT_DIFF_10, STARPU_HETEROPRIO_URT_DOT_DIFF_11, STARPU_HETEROPRIO_URTS_PER_SECONDS, // 20 STARPU_HETEROPRIO_URTS_PER_SECONDS_2, STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF, STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF, STARPU_HETEROPRIO_URTS_TIME_COMBINATION, STARPU_HETEROPRIO_NODS_PER_SECOND, STARPU_HETEROPRIO_NODS_TIME_RELEASED, STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF }; static const char starpu_autoheteroprio_priority_ordering_policy_names[STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT][64] = { "STARPU_HETEROPRIO_NOD_TIME_COMBINATION", "STARPU_HETEROPRIO_BEST_NODS_SCORE", "STARPU_HETEROPRIO_BEST_NODS", "STARPU_HETEROPRIO_URT_PURE", "STARPU_HETEROPRIO_URT", "STARPU_HETEROPRIO_URT_2", "STARPU_HETEROPRIO_URT_DOT_DIFF_PURE", "STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2", "STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE", "STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2", "STARPU_HETEROPRIO_URT_DOT_DIFF_2", "STARPU_HETEROPRIO_URT_DOT_DIFF_3", "STARPU_HETEROPRIO_URT_DOT_DIFF_4", "STARPU_HETEROPRIO_URT_DOT_DIFF_5", "STARPU_HETEROPRIO_URT_DOT_DIFF_6", "STARPU_HETEROPRIO_URT_DOT_DIFF_7", "STARPU_HETEROPRIO_URT_DOT_DIFF_8", "STARPU_HETEROPRIO_URT_DOT_DIFF_9", "STARPU_HETEROPRIO_URT_DOT_DIFF_10", "STARPU_HETEROPRIO_URT_DOT_DIFF_11", "STARPU_HETEROPRIO_URTS_PER_SECONDS", "STARPU_HETEROPRIO_URTS_PER_SECONDS_2", "STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF", "STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF", "STARPU_HETEROPRIO_URTS_TIME_COMBINATION", "STARPU_HETEROPRIO_NODS_PER_SECOND", "STARPU_HETEROPRIO_NODS_TIME_RELEASED", "STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF", }; /** Set if heteroprio should use data locality or not */ void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality); /** Tell how many prio there are for a given arch */ void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned max_prio); /** Set the mapping for a given arch prio=>bucket */ void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id); /** Tell which arch is the faster for the tasks of a bucket (optional) */ void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id); /** Tell how slow is a arch for the tasks of a bucket (optional) */ void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor); /** One memory node will be one wgroup */ void starpu_heteroprio_map_wgroup_memory_nodes(unsigned sched_ctx_id); /** Print the current setup groups */ void starpu_heteroprio_print_wgroups(FILE *stream, unsigned sched_ctx_id); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_SCHEDULER_HETEROPRIO_H__ */ starpu-1.4.9+dfsg/include/schedulers/starpu_scheduler_toolbox.h000066400000000000000000000171141507764646700250730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_SCHEDULER_TOOLBOX_FIFO_QUEUES_H__ #define __STARPU_SCHEDULER_TOOLBOX_FIFO_QUEUES_H__ #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Scheduler_Toolbox Scheduler Toolbox @brief This is the interface for the scheduler toolbox The definitions of the different queue types below (e.g ::starpu_st_fifo_taskq_t) are private and are thus not available outside the StarPU source directory. Hence when defining your own scheduler outside of StarPU source directory, you should use the functions below. Look for example in the scheduler defined in examples/cholesky/libmy_dmda.c @{ */ /** Opaque type for FIFO task queue */ typedef struct starpu_st_fifo_taskq *starpu_st_fifo_taskq_t; /** Create a FIFO task queue */ starpu_st_fifo_taskq_t starpu_st_fifo_taskq_create(void) STARPU_ATTRIBUTE_MALLOC; void starpu_st_fifo_taskq_init(starpu_st_fifo_taskq_t fifo); void starpu_st_fifo_taskq_destroy(starpu_st_fifo_taskq_t fifo); int starpu_st_fifo_taskq_empty(starpu_st_fifo_taskq_t fifo); double starpu_st_fifo_taskq_get_exp_len_prev_task_list(starpu_st_fifo_taskq_t fifo_queue, struct starpu_task *task, int workerid, int nimpl, int *fifo_ntasks); /** get the number of tasks currently in the queue */ unsigned starpu_st_fifo_ntasks_get(starpu_st_fifo_taskq_t fifo); /** increase by n the number of tasks currently in the queue */ void starpu_st_fifo_ntasks_inc(starpu_st_fifo_taskq_t fifo, int n); /** get the number of tasks currently in the queue corresponding to each priority */ unsigned *starpu_st_fifo_ntasks_per_priority_get(starpu_st_fifo_taskq_t fifo); /** get the number of tasks that were processed */ unsigned starpu_st_fifo_nprocessed_get(starpu_st_fifo_taskq_t fifo); /** increase by n the number of tasks that were processed */ void starpu_st_fifo_nprocessed_inc(starpu_st_fifo_taskq_t fifo, int n); /** only meaningful if the queue is only used by a single worker */ /** Get the expected start date of next item to do in the queue (i.e. not started yet). This is thus updated when we start it. */ double starpu_st_fifo_exp_start_get(starpu_st_fifo_taskq_t fifo); /** Set the expected start date of next item to do in the queue (i.e. not started yet). */ void starpu_st_fifo_exp_start_set(starpu_st_fifo_taskq_t fifo, double exp_start); /** get the expected end date of last task in the queue */ double starpu_st_fifo_exp_end_get(starpu_st_fifo_taskq_t fifo); /** set the expected end date of last task in the queue */ void starpu_st_fifo_exp_end_set(starpu_st_fifo_taskq_t fifo, double exp_end); /** get the expected duration of the set of tasks in the queue */ double starpu_st_fifo_exp_len_get(starpu_st_fifo_taskq_t fifo); /** set the expected duration of the set of tasks in the queue */ void starpu_st_fifo_exp_len_set(starpu_st_fifo_taskq_t fifo, double exp_len); /** increase or decrease the expected duration of the set of tasks in the queue */ void starpu_st_fifo_exp_len_inc(starpu_st_fifo_taskq_t fifo, double exp_len); /** get the expected duration of the set of tasks in the queue corresponding to each priority */ double *starpu_st_fifo_exp_len_per_priority_get(starpu_st_fifo_taskq_t fifo); /** get the expected duration of what is already pushed to the worker */ double starpu_st_fifo_pipeline_len_get(starpu_st_fifo_taskq_t fifo); /** set the expected duration of what is already pushed to the worker */ void starpu_st_fifo_pipeline_len_set(starpu_st_fifo_taskq_t fifo, double pipeline_len); /** increase the expected duration of what is already pushed to the worker (the value can be negative) */ void starpu_st_fifo_pipeline_len_inc(starpu_st_fifo_taskq_t fifo, double pipeline_len); int starpu_st_fifo_taskq_push_sorted_task(starpu_st_fifo_taskq_t fifo_queue, struct starpu_task *task); int starpu_st_fifo_taskq_push_task(starpu_st_fifo_taskq_t fifo, struct starpu_task *task); int starpu_st_fifo_taskq_push_back_task(starpu_st_fifo_taskq_t fifo_queue, struct starpu_task *task); int starpu_st_fifo_taskq_pop_this_task(starpu_st_fifo_taskq_t fifo_queue, int workerid, struct starpu_task *task); struct starpu_task *starpu_st_fifo_taskq_pop_task(starpu_st_fifo_taskq_t fifo, int workerid); /** This is the same as starpu_st_fifo_taskq_pop_task(), but without checking that the worker will be able to execute this task. This is useful when the scheduler has already checked it. */ struct starpu_task *starpu_st_fifo_taskq_pop_local_task(starpu_st_fifo_taskq_t fifo); /** Pop the first task that can be executed on the calling driver and taking into account readiness of data */ struct starpu_task *starpu_st_fifo_taskq_pop_first_ready_task(starpu_st_fifo_taskq_t fifo_queue, unsigned workerid, int num_priorities); /** Opaque type for PRIO task queue */ typedef struct starpu_st_prio_deque *starpu_st_prio_deque_t; /** all _starpu_prio_deque_pop/deque_task function return a task or a NULL pointer if none are available * in O(lg(nb priorities)) */ void starpu_st_prio_deque_init(starpu_st_prio_deque_t pdeque); void starpu_st_prio_deque_destroy(starpu_st_prio_deque_t pdeque); /** return 0 iff the struct starpu_st_prio_deque is not empty */ int starpu_st_prio_deque_is_empty(starpu_st_prio_deque_t pdeque); int starpu_st_prio_deque_push_back_task(starpu_st_prio_deque_t pdeque, struct starpu_task *task); /** push a task in O(lg(nb priorities)) */ int starpu_st_prio_deque_push_front_task(starpu_st_prio_deque_t pdeque, struct starpu_task *task); /** deque a task of the higher priority available from the front of the list for the highest priority */ struct starpu_task *starpu_st_prio_deque_pop_task_for_worker(starpu_st_prio_deque_t pdeque, int workerid, struct starpu_task **skipped); /** return a task that can be executed by workerid from the back of the list for the highest priority */ struct starpu_task *starpu_st_prio_deque_deque_task_for_worker(starpu_st_prio_deque_t pdeque, int workerid, struct starpu_task **skipped); struct starpu_task *starpu_st_prio_deque_deque_first_ready_task(starpu_st_prio_deque_t pdeque, unsigned workerid); struct starpu_task *starpu_st_prio_deque_pop_task(starpu_st_prio_deque_t pdeque); struct starpu_task *starpu_st_prio_deque_highest_task(starpu_st_prio_deque_t pdeque); struct starpu_task *starpu_st_prio_deque_pop_back_task(starpu_st_prio_deque_t pdeque); int starpu_st_prio_deque_pop_this_task(starpu_st_prio_deque_t pdeque, int workerid, struct starpu_task *task); void starpu_st_prio_deque_erase(starpu_st_prio_deque_t pdeque, struct starpu_task *task); int starpu_st_normalize_prio(int priority, int num_priorities, unsigned sched_ctx_id); int starpu_st_non_ready_buffers_count(struct starpu_task *task, unsigned worker); void starpu_st_non_ready_buffers_size(struct starpu_task *task, unsigned worker, size_t *non_readyp, size_t *non_loadingp, size_t *non_allocatedp); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_SCHEDULER_TOOLBOX_FIFO_QUEUES_H__ */ starpu-1.4.9+dfsg/include/starpu.h000066400000000000000000000661461507764646700171370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_H__ #define __STARPU_H__ #include #ifndef _MSC_VER #include #else #include typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; typedef unsigned long long uint64_t; typedef UINT_PTR uintptr_t; typedef char int8_t; typedef short int16_t; typedef int int32_t; typedef long long int64_t; typedef INT_PTR intptr_t; #endif #include #ifdef STARPU_HAVE_WINDOWS #include #endif #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__) && !defined(__HIPCC__) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef BUILDING_STARPU #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Initialization_and_Termination Initialization and Termination @{ */ /** Structure passed to the starpu_init() function to configure StarPU. It has to be initialized with starpu_conf_init(). When the default value is used, StarPU automatically selects the number of processing units and takes the default scheduling policy. The environment variables overwrite the equivalent parameters unless starpu_conf::precedence_over_environment_variables is set. */ struct starpu_conf { /** @private Will be initialized by starpu_conf_init(). Should not be set by hand. */ int magic; /** @private Tell starpu_init() if MPI will be initialized later. */ int will_use_mpi; /** Name of the scheduling policy. This can also be specified with the environment variable \ref STARPU_SCHED. (default = NULL). */ const char *sched_policy_name; /** Definition of the scheduling policy. This field is ignored if starpu_conf::sched_policy_name is set. (default = NULL) */ struct starpu_sched_policy *sched_policy; /** Callback function that can later be used by the scheduler. The scheduler can retrieve this function by calling starpu_sched_ctx_get_sched_policy_callback() */ void (*sched_policy_callback)(unsigned); /** For all parameters specified in this structure that can also be set with environment variables, by default, StarPU chooses the value of the environment variable against the value set in starpu_conf. Setting the parameter starpu_conf::precedence_over_environment_variables to 1 allows to give precedence to the value set in the structure over the environment variable. */ int precedence_over_environment_variables; /** Number of CPU cores that StarPU can use. This can also be specified with the environment variable \ref STARPU_NCPU. (default = \c -1) */ int ncpus; /** Number of CPU cores to that StarPU should leave aside. They can then be used by application threads, by calling starpu_get_next_bindid() to get their ID, and starpu_bind_thread_on() to bind the current thread to them. */ int reserve_ncpus; /** Number of CUDA devices that StarPU can use. This can also be specified with the environment variable \ref STARPU_NCUDA. (default = \c -1) */ int ncuda; /** Number of HIP devices that StarPU can use. This can also be specified with the environment variable \ref STARPU_NHIP. (default = \c -1) */ int nhip; /** Number of OpenCL devices that StarPU can use. This can also be specified with the environment variable \ref STARPU_NOPENCL. (default = \c -1) */ int nopencl; /** Number of Maxeler FPGA devices that StarPU can use. This can also be specified with the environment variable \ref STARPU_NMAX_FPGA. (default = -1) */ int nmax_fpga; /** Number of MPI Master Slave devices that StarPU can use. This can also be specified with the environment variable \ref STARPU_NMPI_MS. (default = \c -1) */ int nmpi_ms; /** Number of TCP/IP Master Slave devices that StarPU can use. This can also be specified with the environment variable \ref STARPU_NTCPIP_MS. (default = \c -1) */ int ntcpip_ms; /** If this flag is set, the starpu_conf::workers_bindid array indicates where the different workers are bound, otherwise StarPU automatically selects where to bind the different workers. This can also be specified with the environment variable \ref STARPU_WORKERS_CPUID. (default = \c 0) */ unsigned use_explicit_workers_bindid; /** If the starpu_conf::use_explicit_workers_bindid flag is set, this array indicates where to bind the different workers. The i-th entry of the starpu_conf::workers_bindid indicates the logical identifier of the processor which should execute the i-th worker. Note that the logical ordering of the CPUs is either determined by the OS, or provided by the \c hwloc library in case it is available. */ unsigned workers_bindid[STARPU_NMAXWORKERS]; /** If this flag is set, the CUDA workers will be attached to the CUDA devices specified in the starpu_conf::workers_cuda_gpuid array. Otherwise, StarPU affects the CUDA devices in a round-robin fashion. This can also be specified with the environment variable \ref STARPU_WORKERS_CUDAID. (default = \c 0) */ unsigned use_explicit_workers_cuda_gpuid; /** If the starpu_conf::use_explicit_workers_cuda_gpuid flag is set, this array contains the logical identifiers of the CUDA devices (as used by \c cudaGetDevice()). */ unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS]; /** If this flag is set, the HIP workers will be attached to the HIP devices specified in the starpu_conf::workers_hip_gpuid array. Otherwise, StarPU affects the HIP devices in a round-robin fashion. This can also be specified with the environment variable \ref STARPU_WORKERS_HIPID. (default = \c 0) */ unsigned use_explicit_workers_hip_gpuid; /** If the starpu_conf::use_explicit_workers_hip_gpuid flag is set, this array contains the logical identifiers of the HIP devices (as used by \c hipGetDevice()). */ unsigned workers_hip_gpuid[STARPU_NMAXWORKERS]; /** If this flag is set, the OpenCL workers will be attached to the OpenCL devices specified in the starpu_conf::workers_opencl_gpuid array. Otherwise, StarPU affects the OpenCL devices in a round-robin fashion. This can also be specified with the environment variable \ref STARPU_WORKERS_OPENCLID. (default = \c 0) */ unsigned use_explicit_workers_opencl_gpuid; /** If the starpu_conf::use_explicit_workers_opencl_gpuid flag is set, this array contains the logical identifiers of the OpenCL devices to be used. */ unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS]; /** If this flag is set, the Maxeler FPGA workers will be attached to the Maxeler FPGA devices specified in the starpu_conf::workers_max_fpga_deviceid array. Otherwise, StarPU affects the Maxeler FPGA devices in a round-robin fashion. This can also be specified with the environment variable \ref STARPU_WORKERS_MAX_FPGAID. (default = 0) */ unsigned use_explicit_workers_max_fpga_deviceid; /** If the starpu_conf::use_explicit_workers_max_fpga_deviceid flag is set, this array contains the logical identifiers of the Maxeler FPGA devices to be used. */ unsigned workers_max_fpga_deviceid[STARPU_NMAXWORKERS]; #ifdef STARPU_USE_MAX_FPGA /** This allows to specify the Maxeler file(s) to be loaded on Maxeler FPGAs. This is an array of starpu_max_load, the last of which shall have file set to NULL. In order to use all available devices, starpu_max_load::engine_id_pattern can be set to "*", but only the last non-NULL entry can be set so. If this is not set, it is assumed that the basic static SLiC interface is used. */ struct starpu_max_load *max_fpga_load; #else void *max_fpga_load; #endif /** If this flag is set, the MPI Master Slave workers will be attached to the MPI Master Slave devices specified in the array starpu_conf::workers_mpi_ms_deviceid. Otherwise, StarPU affects the MPI Master Slave devices in a round-robin fashion. (default = \c 0) */ unsigned use_explicit_workers_mpi_ms_deviceid; /** If the flag starpu_conf::use_explicit_workers_mpi_ms_deviceid is set, the array contains the logical identifiers of the MPI Master Slave devices to be used. */ unsigned workers_mpi_ms_deviceid[STARPU_NMAXWORKERS]; /** If this flag is set, StarPU will recalibrate the bus. If this value is equal to -1, the default value is used. This can also be specified with the environment variable \ref STARPU_BUS_CALIBRATE. (default = \c 0) */ int bus_calibrate; /** If this flag is set, StarPU will calibrate the performance models when executing tasks. If this value is equal to -1, the default value is used. If the value is equal to 1, it will force continuing calibration. If the value is equal to 2, the existing performance models will be overwritten. This can also be specified with the environment variable \ref STARPU_CALIBRATE. (default = \c 0) */ int calibrate; /** This flag should be set to 1 to enforce data locality when choosing a worker to execute a task. This can also be specified with the environment variable \ref STARPU_DATA_LOCALITY_ENFORCE. This can also be specified at compilation time by giving to the configure script the option \ref enable-data-locality-enforce "--enable-data-locality-enforce". (default = \c 0) */ int data_locality_enforce; /** By default, StarPU executes parallel tasks concurrently. Some parallel libraries (e.g. most OpenMP implementations) however do not support concurrent calls to parallel code. In such case, setting this flag makes StarPU only start one parallel task at a time (but other CPU and GPU tasks are not affected and can be run concurrently). The parallel task scheduler will however still try varying combined worker sizes to look for the most efficient ones. This can also be specified with the environment variable \ref STARPU_SINGLE_COMBINED_WORKER. (default = \c 0) */ int single_combined_worker; /** This flag should be set to 1 to disable asynchronous copies between CPUs and all accelerators. The AMD implementation of OpenCL is known to fail when copying data asynchronously. When using this implementation, it is therefore necessary to disable asynchronous data transfers. This can also be specified with the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_COPY. This can also be specified at compilation time by giving to the configure script the option \ref disable-asynchronous-copy "--disable-asynchronous-copy". (default = \c 0) */ int disable_asynchronous_copy; /** This flag should be set to 1 to disable asynchronous copies between CPUs and CUDA accelerators. This can also be specified with the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY. This can also be specified at compilation time by giving to the configure script the option \ref disable-asynchronous-cuda-copy "--disable-asynchronous-cuda-copy". (default = \c 0) */ int disable_asynchronous_cuda_copy; /** This flag should be set to 1 to disable asynchronous copies between CPUs and HIP accelerators. This can also be specified with the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY. This can also be specified at compilation time by giving to the configure script the option \ref disable-asynchronous-hip-copy "--disable-asynchronous-hip-copy". (default = \c 0) */ int disable_asynchronous_hip_copy; /** This flag should be set to 1 to disable asynchronous copies between CPUs and OpenCL accelerators. The AMD implementation of OpenCL is known to fail when copying data asynchronously. When using this implementation, it is therefore necessary to disable asynchronous data transfers. This can also be specified with the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY. This can also be specified at compilation time by giving to the configure script the option \ref disable-asynchronous-opencl-copy "--disable-asynchronous-opencl-copy". (default = \c 0) */ int disable_asynchronous_opencl_copy; /** This flag should be set to 1 to disable asynchronous copies between CPUs and MPI Master Slave devices. This can also be specified with the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY. This can also be specified at compilation time by giving to the configure script the option \ref disable-asynchronous-mpi-master-slave-copy "--disable-asynchronous-mpi-master-slave-copy". (default = \c 0). */ int disable_asynchronous_mpi_ms_copy; /** This flag should be set to 1 to disable asynchronous copies between CPUs and TCP/IP Master Slave devices. This can also be specified with the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY. This can also be specified at compilation time by giving to the configure script the option \ref disable-asynchronous-tcpip-master-slave-copy "--disable-asynchronous-tcpip-master-slave-copy". (default = \c 0). */ int disable_asynchronous_tcpip_ms_copy; /** This flag should be set to 1 to disable asynchronous copies between CPUs and Maxeler FPGA devices. This can also be specified with the environment variable \ref STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY. This can also be specified at compilation time by giving to the configure script the option \ref disable-asynchronous-fpga-copy "--disable-asynchronous-fpga-copy". (default = 0). */ int disable_asynchronous_max_fpga_copy; /** This flag should be set to 1 to disable memory mapping support between memory nodes. This can also be specified with the environment variable \ref STARPU_ENABLE_MAP. */ int enable_map; /** Enable CUDA/OpenGL interoperation on these CUDA devices. This can be set to an array of CUDA device identifiers for which \c cudaGLSetGLDevice() should be called instead of \c cudaSetDevice(). Its size is specified by the starpu_conf::n_cuda_opengl_interoperability field below (default = NULL) */ unsigned *cuda_opengl_interoperability; /** Size of the array starpu_conf::cuda_opengl_interoperability */ unsigned n_cuda_opengl_interoperability; /** Array of drivers that should not be launched by StarPU. The application will run in one of its own threads. (default = NULL) */ struct starpu_driver *not_launched_drivers; /** The number of StarPU drivers that should not be launched by StarPU, i.e number of elements of the array starpu_conf::not_launched_drivers. (default = \c 0) */ unsigned n_not_launched_drivers; /** Specify the buffer size used for FxT tracing. Starting from FxT version 0.2.12, the buffer will automatically be flushed when it fills in, but it may still be interesting to specify a bigger value to avoid any flushing (which would disturb the trace). */ uint64_t trace_buffer_size; /** Set the minimum priority used by priorities-aware schedulers. This also can be specified with the environment variable \ref STARPU_MIN_PRIO */ int global_sched_ctx_min_priority; /** Set the maximum priority used by priorities-aware schedulers. This also can be specified with the environment variable \ref STARPU_MAX_PRIO */ int global_sched_ctx_max_priority; #ifdef STARPU_WORKER_CALLBACKS void (*callback_worker_going_to_sleep)(unsigned workerid); void (*callback_worker_waking_up)(unsigned workerid); #endif /** Specify if StarPU should catch \c SIGINT, \c SIGSEGV and \c SIGTRAP signals to make sure final actions (e.g dumping FxT trace files) are done even though the application has crashed. By default (value = \c 1), signals are caught. It should be disabled on systems which already catch these signals for their own needs (e.g JVM) This can also be specified with the environment variable \ref STARPU_CATCH_SIGNALS. */ int catch_signals; /** Specify whether StarPU should automatically start to collect performance counters after initialization */ unsigned start_perf_counter_collection; /** Minimum spinning backoff of drivers (default = \c 1) */ unsigned driver_spinning_backoff_min; /** Maximum spinning backoff of drivers. (default = \c 32) */ unsigned driver_spinning_backoff_max; /** Specify if CUDA workers should do only fast allocations when running the datawizard progress of other memory nodes. This will pass the interval value _STARPU_DATAWIZARD_ONLY_FAST_ALLOC to the allocation method. Default value is 0, allowing CUDA workers to do slow allocations. This can also be specified with the environment variable \ref STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES. */ int cuda_only_fast_alloc_other_memnodes; }; /** Initialize the \p conf structure with the default values. In case some configuration parameters are already specified through environment variables, starpu_conf_init() initializes the fields of \p conf according to the environment variables. For instance if \ref STARPU_CALIBRATE is set, its value is put in the field starpu_conf::calibrate of \p conf. Upon successful completion, this function returns 0. Otherwise, -EINVAL indicates that the argument was NULL. */ int starpu_conf_init(struct starpu_conf *conf); /** Set fields of \p conf so that no worker is enabled, i.e. set starpu_conf::ncpus = 0, starpu_conf::ncuda = 0, etc. This allows to portably enable only a given type of worker:
    starpu_conf_noworker(&conf);
    conf.ncpus = -1;
    See \ref ConfigurationAndInitialization for more details. */ int starpu_conf_noworker(struct starpu_conf *conf); /** StarPU initialization method, must be called prior to any other StarPU call. It is possible to specify StarPU’s configuration (e.g. scheduling policy, number of cores, ...) by passing a non-NULL \p conf. Default configuration is used if \p conf is NULL. Upon successful completion, this function returns 0. Otherwise, -ENODEV indicates that no worker was available (and thus StarPU was not initialized). See \ref SubmittingATask for more details. */ int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT; /** Similar to starpu_init(), but also take the \p argc and \p argv as defined by the application, which is necessary when running in Simgrid mode or MPI Master Slave mode. Do not call starpu_init() and starpu_initialize() in the same program. See \ref SubmittingATask for more details. */ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv); /** Return 1 if StarPU is already initialized. See \ref ConfigurationAndInitialization for more details. */ int starpu_is_initialized(void); /** Wait for starpu_init() call to finish. See \ref ConfigurationAndInitialization for more details. */ void starpu_wait_initialized(void); /** StarPU termination method, must be called at the end of the application: statistics and other post-mortem debugging information are not guaranteed to be available until this method has been called. See \ref SubmittingATask for more details. */ void starpu_shutdown(void); /** Suspend the processing of new tasks by workers. It can be used in a program where StarPU is used during only a part of the execution. Without this call, the workers continue to poll for new tasks in a tight loop, wasting CPU time. The symmetric call to starpu_resume() should be used to unfreeze the workers. See \ref KernelThreadsStartedByStarPU and \ref PauseResume for more details. */ void starpu_pause(void); /** Symmetrical call to starpu_pause(), used to resume the workers polling for new tasks. This would be typically called only once having submitted all tasks. See \ref KernelThreadsStartedByStarPU and \ref PauseResume for more details. */ void starpu_resume(void); /** Return !0 if task processing by workers is currently paused, 0 otherwise. See \ref StarPUEatsCPUs for more details. */ int starpu_is_paused(void); /** Value to be passed to starpu_get_next_bindid() and starpu_bind_thread_on() when binding a thread which will significantly eat CPU time, and should thus have its own dedicated CPU. */ #define STARPU_THREAD_ACTIVE (1 << 0) /** Return a PU binding ID which can be used to bind threads with starpu_bind_thread_on(). \p flags can be set to ::STARPU_THREAD_ACTIVE or 0. When \p npreferred is set to non-zero, \p preferred is an array of size \p npreferred in which a preference of PU binding IDs can be set. By default StarPU will return the first PU available for binding. See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. */ unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred); /** Bind the calling thread on the given \p cpuid (which should have been obtained with starpu_get_next_bindid()). Return -1 if a thread was already bound to this PU (but binding will still have been done, and a warning will have been printed), so the caller can tell the user how to avoid the issue. \p name should be set to a unique string so that different calls with the same name for the same \p cpuid does not produce a warning. See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. */ int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name); /** Bind the calling thread on the cores corresponding to the \p workerid . \p workerid can be a basic worker or a combined worker. This can be used e.g. before initializing a library which records at initialization time the thread binding to be used when running kernels. See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. */ void starpu_bind_thread_on_worker(unsigned workerid); /** Bind the calling thread back to the core reserved for the main thread. This can be used e.g. after initializing a library which records at initialization time the thread binding to be used when running kernels. See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. */ void starpu_bind_thread_on_main(void); /** Bind the calling thread on the given \p cpuid This can be used e.g. after initializing a library which records at initialization time the thread binding to be used when running kernels. See \ref KernelThreadsStartedByStarPU and \ref cpuWorkers for more details. */ void starpu_bind_thread_on_cpu(int cpuid); /** Return the OS number of a given \p cpuid StarPU uses logical numbering (as define by hwloc) all along, but in case interaction is needed with another binding tool that uses numbering as defined by the OS, we need to convert from hwloc logical numbering to hwloc physical numbering. */ int starpu_cpu_os_index(int cpuid); /** Print a description of the topology on \p f. See \ref ConfigurationAndInitialization for more details. */ void starpu_topology_print(FILE *f); /** Return 1 if asynchronous data transfers between CPU and accelerators are disabled. See \ref Basic for more details. */ int starpu_asynchronous_copy_disabled(void); /** Return 1 if asynchronous data transfers between CPU and CUDA accelerators are disabled. See \ref cudaWorkers for more details. */ int starpu_asynchronous_cuda_copy_disabled(void); /** Return 1 if asynchronous data transfers between CPU and HIP accelerators are disabled. See \ref hipWorkers for more details. */ int starpu_asynchronous_hip_copy_disabled(void); /** Return 1 if asynchronous data transfers between CPU and OpenCL accelerators are disabled. See \ref openclWorkers for more details. */ int starpu_asynchronous_opencl_copy_disabled(void); /** Return 1 if asynchronous data transfers between CPU and Maxeler FPGA devices are disabled. See \ref maxfpgaWorkers for more details. */ int starpu_asynchronous_max_fpga_copy_disabled(void); /** Return 1 if asynchronous data transfers between CPU and MPI Slave devices are disabled. See \ref mpimsWorkers for more details. */ int starpu_asynchronous_mpi_ms_copy_disabled(void); /** Return 1 if asynchronous data transfers between CPU and TCP/IP Slave devices are disabled. See \ref tcpipmsWorkers for more details. */ int starpu_asynchronous_tcpip_ms_copy_disabled(void); /** Return 1 if asynchronous data transfers with a given kind of memory are disabled. */ int starpu_asynchronous_copy_disabled_for(enum starpu_node_kind kind); /** Return 1 if memory mapping support between memory nodes is enabled. See \ref Basic for more details. */ int starpu_map_enabled(void); /** Call starpu_profiling_bus_helper_display_summary() and starpu_profiling_worker_helper_display_summary(). See \ref DataStatistics for more details. */ void starpu_display_stats(void); /** @} */ /** @defgroup API_Versioning Versioning @{ */ /** Return as 3 integers the version of StarPU used when running the application. See \ref ConfigurationAndInitialization for more details. */ void starpu_get_version(int *major, int *minor, int *release); /** @} */ #ifdef __cplusplus } #endif #include "starpu_deprecated_api.h" #endif /* __STARPU_H__ */ starpu-1.4.9+dfsg/include/starpu_bitmap.h000066400000000000000000000201031507764646700204520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_BITMAP_H__ #define __STARPU_BITMAP_H__ #include #include #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Bitmap Bitmap @brief This is the interface for the bitmap utilities provided by StarPU. @{ */ #ifndef _STARPU_LONG_BIT #define _STARPU_LONG_BIT ((int)(sizeof(unsigned long) * 8)) #endif #define _STARPU_BITMAP_SIZE ((STARPU_NMAXWORKERS - 1) / _STARPU_LONG_BIT) + 1 /** create a empty starpu_bitmap */ static inline struct starpu_bitmap *starpu_bitmap_create(void) STARPU_ATTRIBUTE_MALLOC; /** zero a starpu_bitmap */ static inline void starpu_bitmap_init(struct starpu_bitmap *b); /** free \p b */ static inline void starpu_bitmap_destroy(struct starpu_bitmap *b); /** set bit \p e in \p b */ static inline void starpu_bitmap_set(struct starpu_bitmap *b, int e); /** unset bit \p e in \p b */ static inline void starpu_bitmap_unset(struct starpu_bitmap *b, int e); /** unset all bits in \p b */ static inline void starpu_bitmap_unset_all(struct starpu_bitmap *b); /** return true iff bit \p e is set in \p b */ static inline int starpu_bitmap_get(struct starpu_bitmap *b, int e); /** Basically compute \c starpu_bitmap_unset_all(\p a) ; \p a = \p b & \p c; */ static inline void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c); /** Basically compute \p a |= \p b */ static inline void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b); /** return 1 iff \p e is set in \p b1 AND \p e is set in \p b2 */ static inline int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e); /** return the number of set bits in \p b */ static inline int starpu_bitmap_cardinal(struct starpu_bitmap *b); /** return the index of the first set bit of \p b, -1 if none */ static inline int starpu_bitmap_first(struct starpu_bitmap *b); /** return the position of the last set bit of \p b, -1 if none */ static inline int starpu_bitmap_last(struct starpu_bitmap *b); /** return the position of set bit right after \p e in \p b, -1 if none */ static inline int starpu_bitmap_next(struct starpu_bitmap *b, int e); /** todo */ static inline int starpu_bitmap_has_next(struct starpu_bitmap *b, int e); /** @} */ /** todo */ struct starpu_bitmap { unsigned long bits[_STARPU_BITMAP_SIZE]; int cardinal; }; #ifdef _STARPU_DEBUG_BITMAP static int _starpu_check_bitmap(struct starpu_bitmap *b) { int card = b->cardinal; int i = starpu_bitmap_first(b); int j; for (j = 0; j < card; j++) { if (i == -1) return 0; int tmp = starpu_bitmap_next(b, i); if (tmp == i) return 0; i = tmp; } if (i != -1) return 0; return 1; } #else #define _starpu_check_bitmap(b) 1 #endif static int _starpu_count_bit_static(unsigned long e) { #if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4) return __builtin_popcountl(e); #else int c = 0; while (e) { c += e & 1; e >>= 1; } return c; #endif } static inline struct starpu_bitmap *starpu_bitmap_create(void) { return (struct starpu_bitmap *)calloc(1, sizeof(struct starpu_bitmap)); } static inline void starpu_bitmap_init(struct starpu_bitmap *b) { memset(b, 0, sizeof(*b)); } static inline void starpu_bitmap_destroy(struct starpu_bitmap *b) { free(b); } static inline void starpu_bitmap_set(struct starpu_bitmap *b, int e) { if (!starpu_bitmap_get(b, e)) b->cardinal++; else return; STARPU_ASSERT(e / _STARPU_LONG_BIT < _STARPU_BITMAP_SIZE); b->bits[e / _STARPU_LONG_BIT] |= (1ul << (e % _STARPU_LONG_BIT)); STARPU_ASSERT(_starpu_check_bitmap(b)); } static inline void starpu_bitmap_unset(struct starpu_bitmap *b, int e) { if (starpu_bitmap_get(b, e)) b->cardinal--; else return; STARPU_ASSERT(e / _STARPU_LONG_BIT < _STARPU_BITMAP_SIZE); if (e / _STARPU_LONG_BIT > _STARPU_BITMAP_SIZE) return; b->bits[e / _STARPU_LONG_BIT] &= ~(1ul << (e % _STARPU_LONG_BIT)); STARPU_ASSERT(_starpu_check_bitmap(b)); } static inline void starpu_bitmap_unset_all(struct starpu_bitmap *b) { memset(b->bits, 0, _STARPU_BITMAP_SIZE * sizeof(unsigned long)); } static inline void starpu_bitmap_unset_and(struct starpu_bitmap *a, struct starpu_bitmap *b, struct starpu_bitmap *c) { a->cardinal = 0; int i; for (i = 0; i < _STARPU_BITMAP_SIZE; i++) { a->bits[i] = b->bits[i] & c->bits[i]; a->cardinal += _starpu_count_bit_static(a->bits[i]); } } static inline int starpu_bitmap_get(struct starpu_bitmap *b, int e) { STARPU_ASSERT(e / _STARPU_LONG_BIT < _STARPU_BITMAP_SIZE); if (e / _STARPU_LONG_BIT >= _STARPU_BITMAP_SIZE) return 0; return (b->bits[e / _STARPU_LONG_BIT] & (1ul << (e % _STARPU_LONG_BIT))) ? 1 : 0; } static inline void starpu_bitmap_or(struct starpu_bitmap *a, struct starpu_bitmap *b) { int i; a->cardinal = 0; for (i = 0; i < _STARPU_BITMAP_SIZE; i++) { a->bits[i] |= b->bits[i]; a->cardinal += _starpu_count_bit_static(a->bits[i]); } } static inline int starpu_bitmap_and_get(struct starpu_bitmap *b1, struct starpu_bitmap *b2, int e) { return starpu_bitmap_get(b1, e) && starpu_bitmap_get(b2, e); } static inline int starpu_bitmap_cardinal(struct starpu_bitmap *b) { return b->cardinal; } static inline int _starpu_get_first_bit_rank(unsigned long ms) { STARPU_ASSERT(ms != 0); #if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) return __builtin_ffsl(ms) - 1; #else unsigned long m = 1ul; int i = 0; while (!(m & ms)) i++, m <<= 1; return i; #endif } static inline int _starpu_get_last_bit_rank(unsigned long l) { STARPU_ASSERT(l != 0); #if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) return 8 * sizeof(l) - __builtin_clzl(l); #else int ibit = _STARPU_LONG_BIT - 1; while ((!(1ul << ibit)) & l) ibit--; STARPU_ASSERT(ibit >= 0); return ibit; #endif } static inline int starpu_bitmap_first(struct starpu_bitmap *b) { int i = 0; while (i < _STARPU_BITMAP_SIZE && !b->bits[i]) i++; if (i == _STARPU_BITMAP_SIZE) return -1; int nb_long = i; unsigned long ms = b->bits[i]; return (nb_long * _STARPU_LONG_BIT) + _starpu_get_first_bit_rank(ms); } static inline int starpu_bitmap_has_next(struct starpu_bitmap *b, int e) { int nb_long = (e + 1) / _STARPU_LONG_BIT; int nb_bit = (e + 1) % _STARPU_LONG_BIT; unsigned long mask = (~0ul) << nb_bit; if (b->bits[nb_long] & mask) return 1; for (nb_long++; nb_long < _STARPU_BITMAP_SIZE; nb_long++) if (b->bits[nb_long]) return 1; return 0; } static inline int starpu_bitmap_last(struct starpu_bitmap *b) { if (b->cardinal == 0) return -1; int ilong; for (ilong = _STARPU_BITMAP_SIZE - 1; ilong >= 0; ilong--) { if (b->bits[ilong]) break; } STARPU_ASSERT(ilong >= 0); unsigned long l = b->bits[ilong]; return ilong * _STARPU_LONG_BIT + _starpu_get_last_bit_rank(l); } static inline int starpu_bitmap_next(struct starpu_bitmap *b, int e) { int nb_long = e / _STARPU_LONG_BIT; int nb_bit = e % _STARPU_LONG_BIT; unsigned long rest = nb_bit == _STARPU_LONG_BIT - 1 ? 0 : (~0ul << (nb_bit + 1)) & b->bits[nb_long]; if (nb_bit != (_STARPU_LONG_BIT - 1) && rest) { int i = _starpu_get_first_bit_rank(rest); STARPU_ASSERT(i >= 0 && i < _STARPU_LONG_BIT); return (nb_long * _STARPU_LONG_BIT) + i; } for (nb_long++; nb_long < _STARPU_BITMAP_SIZE; nb_long++) if (b->bits[nb_long]) return nb_long * _STARPU_LONG_BIT + _starpu_get_first_bit_rank(b->bits[nb_long]); return -1; } #ifdef __cplusplus } #endif #endif /* __STARPU_BITMAP_H__ */ starpu-1.4.9+dfsg/include/starpu_bound.h000066400000000000000000000055431507764646700203200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_BOUND_H__ #define __STARPU_BOUND_H__ #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Theoretical_Lower_Bound_on_Execution_Time Theoretical Lower Bound on Execution Time @brief Compute theoretical upper computation efficiency bound corresponding to some actual execution. @{ */ /** Start recording tasks (resets stats). \p deps tells whether dependencies should be recorded too (this is quite expensive) See \ref TheoreticalLowerBoundOnExecutionTime for more details. */ void starpu_bound_start(int deps, int prio); /** Stop recording tasks See \ref TheoreticalLowerBoundOnExecutionTime for more details. */ void starpu_bound_stop(void); /** Emit the DAG that was recorded on \p output. See \ref TheoreticalLowerBoundOnExecutionTime for more details. */ void starpu_bound_print_dot(FILE *output); /** Get theoretical upper bound (in ms) (needs glpk support detected by configure script). It returns 0 if some performance models are not calibrated. \p integer permits to choose between integer solving (which takes a long time but is correct), and relaxed solving (which provides an approximate solution). See \ref TheoreticalLowerBoundOnExecutionTime for more details. */ void starpu_bound_compute(double *res, double *integer_res, int integer); /** Emit the Linear Programming system on \p output for the recorded tasks, in the lp format See \ref TheoreticalLowerBoundOnExecutionTime for more details. */ void starpu_bound_print_lp(FILE *output); /** Emit the Linear Programming system on \p output for the recorded tasks, in the mps format See \ref TheoreticalLowerBoundOnExecutionTime for more details. */ void starpu_bound_print_mps(FILE *output); /** Emit on \p output the statistics of actual execution vs theoretical upper bound. \p integer permits to choose between integer solving (which takes a long time but is correct), and relaxed solving (which provides an approximate solution). See \ref TheoreticalLowerBoundOnExecutionTime for more details. */ void starpu_bound_print(FILE *output, int integer); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_BOUND_H__ */ starpu-1.4.9+dfsg/include/starpu_config.h.in000066400000000000000000000245261507764646700210650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is the public config.h file, installed along libstarpu. * * It should only contain the build-time #defines which have an effect on the * API & ABI. */ #ifndef __STARPU_CONFIG_PUBLIC_H__ #define __STARPU_CONFIG_PUBLIC_H__ /** Define the major version of StarPU. This is the version used when compiling the application. @ingroup API_Versioning */ #undef STARPU_MAJOR_VERSION /** @ingroup API_Versioning Define the minor version of StarPU. This is the version used when compiling the application. */ #undef STARPU_MINOR_VERSION /** Define the release version of StarPU. This is the version used when compiling the application. @ingroup API_Versioning */ #undef STARPU_RELEASE_VERSION #undef STARPU_USE_CPU /** Defined when StarPU has been installed with CUDA support. It should be used in your code to detect the availability of CUDA. @ingroup API_CUDA_Extensions */ #undef STARPU_USE_CUDA /** Defined when StarPU is testing the CUDA0 driver. */ #undef STARPU_USE_CUDA0 /** Defined when StarPU is testing the CUDA1 driver. */ #undef STARPU_USE_CUDA1 /** Defined when StarPU has been installed with HIP support. It should be used in your code to detect the availability of HIP. @ingroup API_HIP_Extensions */ #undef STARPU_USE_HIP /** Defined when StarPU has been installed with HIP BLAS support. It should be used in your code to detect the availability of HIP BLAS. @ingroup API_HIP_Extensions */ #undef STARPU_USE_HIPBLAS /** Defined when StarPU has been installed with NVidia-ML support. It should be used in your code to detect the availability of NVML-related functions. @ingroup API_CUDA_Extensions */ #undef STARPU_HAVE_NVML_H /** Defined when StarPU has been installed with OpenCL support. It should be used in your code to detect the availability of OpenCL as shown in \ref FullSourceCodeVectorScal. @ingroup API_OpenCL_Extensions */ #undef STARPU_USE_OPENCL /** Defined when StarPU has been installed with FPGA support. It should be used in your code to detect the availability of FPGA. @ingroup API_Max_FPGA_Extensions */ #undef STARPU_USE_MAX_FPGA /** Defined when StarPU has been installed with MPI Master Slave support. It should be used in your code to detect the availability of MPI Master Slave. @ingroup API_MPI_Support */ #undef STARPU_USE_MPI_MASTER_SLAVE /** Defined when StarPU has been installed with TCP/IP Master Slave support. It should be used in your code to detect the availability of TCP/IP Master Slave. */ #undef STARPU_USE_TCPIP_MASTER_SLAVE /** Defined when StarPU has been installed with OpenMP Runtime support. It should be used in your code to detect the availability of the runtime support for OpenMP. @ingroup API_OpenMP_Runtime_Support */ #undef STARPU_OPENMP #undef STARPU_BUBBLE #undef STARPU_PARALLEL_WORKER #undef STARPU_SIMGRID #undef STARPU_SIMGRID_MC #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT #undef STARPU_HAVE_SIMGRID_MSG_H #undef STARPU_HAVE_MSG_MSG_H #undef STARPU_HAVE_SIMGRID_ACTOR_H #undef STARPU_HAVE_SIMGRID_SEMAPHORE_H #undef STARPU_HAVE_SIMGRID_MUTEX_H #undef STARPU_HAVE_SIMGRID_COND_H #undef STARPU_HAVE_SIMGRID_BARRIER_H #undef STARPU_HAVE_XBT_SYNCHRO_H #undef STARPU_HAVE_VALGRIND_H #undef STARPU_HAVE_MEMCHECK_H #undef STARPU_VALGRIND_FULL #undef STARPU_SANITIZE_LEAK #undef STARPU_NON_BLOCKING_DRIVERS /* workers must call callbacks on sleep/wake-up */ #undef STARPU_WORKER_CALLBACKS #undef STARPU_HAVE_ICC /** Defined when StarPU has been installed with MPI support. It should be used in your code to detect the availability of MPI. @ingroup API_MPI_Support */ #undef STARPU_USE_MPI #undef STARPU_USE_MPI_MPI #undef STARPU_USE_MPI_NMAD #undef STARPU_USE_MPI_FT #undef STARPU_USE_MPI_FT_STATS #undef STARPU_ATLAS #undef STARPU_GOTO #undef STARPU_OPENBLAS #undef STARPU_MKL #undef STARPU_ARMPL #undef STARPU_SYSTEM_BLAS #undef STARPU_HAVE_CBLAS_H #undef STARPU_HAVE_BLAS /** Define the directory in which the OpenCL codelets of the applications provided with StarPU have been installed. @ingroup API_OpenCL_Extensions */ #undef STARPU_OPENCL_DATADIR #undef STARPU_HAVE_LIBCUBLASLT #undef STARPU_HAVE_LIBCUSPARSE #undef STARPU_HAVE_LIBCUSOLVER #undef STARPU_HAVE_MAGMA #undef STARPU_OPENGL_RENDER #undef STARPU_USE_GTK #undef STARPU_HAVE_X11 #undef STARPU_PAPI #undef STARPU_HAVE_POSIX_MEMALIGN #undef STARPU_HAVE_MEMALIGN #undef STARPU_HAVE_MALLOC_H #undef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP #undef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8 #undef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP #undef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8 #undef STARPU_HAVE_SYNC_FETCH_AND_ADD #undef STARPU_HAVE_SYNC_FETCH_AND_ADD_8 #undef STARPU_HAVE_SYNC_FETCH_AND_OR #undef STARPU_HAVE_SYNC_FETCH_AND_OR_8 #undef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET #undef STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N #undef STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8 #undef STARPU_HAVE_ATOMIC_EXCHANGE_N #undef STARPU_HAVE_ATOMIC_EXCHANGE_N_8 #undef STARPU_HAVE_ATOMIC_FETCH_ADD #undef STARPU_HAVE_ATOMIC_FETCH_ADD_8 #undef STARPU_HAVE_ATOMIC_FETCH_OR #undef STARPU_HAVE_ATOMIC_FETCH_OR_8 #undef STARPU_HAVE_ATOMIC_TEST_AND_SET #undef STARPU_HAVE_SYNC_SYNCHRONIZE #undef STARPU_DEVEL #undef STARPU_MODEL_DEBUG #undef STARPU_NO_ASSERT #undef STARPU_DEBUG #undef STARPU_VERBOSE #undef STARPU_GDB_PATH #undef STARPU_HAVE_FFTW #undef STARPU_HAVE_FFTWF #undef STARPU_HAVE_FFTWL #undef STARPU_HAVE_CUFFTDOUBLECOMPLEX #undef STARPU_HAVE_CURAND /** Define the maximum number of memory nodes managed by StarPU. The default value can be modified at configure by using the option \ref enable-maxnodes "--enable-maxnodes". Reducing it allows to considerably reduce memory used by StarPU data structures. @ingroup API_Workers */ #undef STARPU_MAXNODES /** Define the maximum number of buffers that tasks will be able to take as parameters. The default value is 8, it can be changed by using the configure option \ref enable-maxbuffers "--enable-maxbuffers". @ingroup API_Codelet_And_Tasks */ #undef STARPU_NMAXBUFS /** Define the maximum number of fxt mpi files that can be read when generating traces. The default value is 64, it can be changed by using the configure option \ref enable-fxt-max-files "--enable-fxt-max-files". @ingroup API_MPI_Support */ #undef STARPU_FXT_MAX_FILES /** Define the maximum number of CPU workers managed by StarPU. The default value can be modified at configure by using the option \ref enable-maxcpus "--enable-maxcpus". @ingroup API_Workers */ #undef STARPU_MAXCPUS /** Define the maximum number of NUMA nodes managed by StarPU. The default value can be modified at configure by using the option \ref enable-maxnumanodes "--enable-maxnumanodes". @ingroup API_Workers */ #undef STARPU_MAXNUMANODES /** Define the maximum number of CUDA devices that are supported by StarPU. @ingroup API_CUDA_Extensions */ #undef STARPU_MAXCUDADEVS /** Define the maximum number of OpenCL devices that are supported by StarPU. @ingroup API_OpenCL_Extensions */ #undef STARPU_MAXOPENCLDEVS /** Define the maximum number of Maxeler FPGA devices that are supported by StarPU. @ingroup API_Max_FPGA_Extensions */ #undef STARPU_MAXMAXFPGADEVS /** Define the maximum number of HIP devices that are supported by StarPU. @ingroup API_HIP_Extensions */ #undef STARPU_MAXHIPDEVS /** Define the maximum number of workers managed by StarPU. @ingroup API_Workers */ #undef STARPU_NMAXWORKERS /** Define the maximum number of scheduling contexts managed by StarPU. The default value can be modified at configure by using the option \ref enable-max-sched-ctxs "--enable-max-sched-ctxs". @ingroup API_Scheduling_Policy */ #undef STARPU_NMAX_SCHED_CTXS /** Define the maximum number of implementations per architecture. The default value can be modified at configure by using the option \ref enable-maximplementations "--enable-maximplementations". @ingroup API_Scheduling_Policy */ #undef STARPU_MAXIMPLEMENTATIONS #undef STARPU_USE_SC_HYPERVISOR #undef STARPU_SC_HYPERVISOR_DEBUG #undef STARPU_HAVE_GLPK_H #undef STARPU_HAVE_CUDA_MEMCPY_PEER #undef STARPU_HAVE_LIBNUMA #undef STARPU_HAVE_WINDOWS #undef STARPU_LINUX_SYS #undef STARPU_HAVE_SETENV #undef STARPU_HAVE_UNSETENV #undef STARPU_HAVE_UNISTD_H #undef STARPU_HAVE_HDF5 #undef STARPU_HAVE_MPI_COMM_CREATE_GROUP #undef STARPU_USE_FXT #undef STARPU_FXT_LOCK_TRACES #ifdef _MSC_VER typedef long starpu_ssize_t; #define __starpu_func__ __FUNCTION__ #else #include typedef ssize_t starpu_ssize_t; #define __starpu_func__ __func__ #endif #if defined(c_plusplus) || defined(__cplusplus) /* inline is part of C++ */ #define __starpu_inline inline #elif defined(_MSC_VER) || defined(__HP_cc) #define __starpu_inline __inline #else #define __starpu_inline __inline__ #endif #undef STARPU_QUICK_CHECK #undef STARPU_LONG_CHECK #undef STARPU_USE_DRAND48 #undef STARPU_USE_ERAND48_R #undef STARPU_HAVE_NEARBYINTF #undef STARPU_HAVE_RINTF #undef STARPU_HAVE_HWLOC #undef STARPU_HAVE_PTHREAD_SPIN_LOCK #undef STARPU_HAVE_PTHREAD_BARRIER #undef STARPU_HAVE_PTHREAD_SETNAME_NP #undef STARPU_HAVE_STRUCT_TIMESPEC #undef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO #undef STARPU_PTHREAD_COND_INITIALIZER_ZERO #undef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO /** This is only for building examples */ #undef STARPU_HAVE_HELGRIND_H /** Enable Fortran to C MPI interface */ #undef HAVE_MPI_COMM_F2C #undef STARPU_HAVE_DARWIN #undef STARPU_HAVE_CXX11 #undef STARPU_HAVE_STRERROR_R #undef STARPU_HAVE_STATEMENT_EXPRESSIONS #undef STARPU_PERF_MODEL_DIR #undef STARPU_PYTHON_HAVE_NUMPY #undef STARPU_PROF_TOOL #endif starpu-1.4.9+dfsg/include/starpu_cublas.h000066400000000000000000000035651507764646700204640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_CUBLAS_H__ #define __STARPU_CUBLAS_H__ #ifdef __cplusplus extern "C" { #endif /** @ingroup API_CUDA_Extensions @{ */ /** Initialize CUBLAS on every CUDA device. The CUBLAS library must be initialized prior to any CUBLAS call. Calling starpu_cublas_init() will initialize CUBLAS on every CUDA device controlled by StarPU. This call blocks until CUBLAS has been properly initialized on every device. See \ref CUDA-specificOptimizations for more details. */ void starpu_cublas_init(void); /** Set the proper CUBLAS stream for CUBLAS v1. This must be called from the CUDA codelet before calling CUBLAS v1 kernels, so that they are queued on the proper CUDA stream. When using one thread per CUDA worker, this function does not do anything since the CUBLAS stream does not change, and is set once by starpu_cublas_init(). See \ref CUDA-specificOptimizations for more details. */ void starpu_cublas_set_stream(void); /** Synchronously deinitialize the CUBLAS library on every CUDA device. See \ref CUDA-specificOptimizations for more details. */ void starpu_cublas_shutdown(void); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_CUBLAS_H__ */ starpu-1.4.9+dfsg/include/starpu_cublasLt.h000066400000000000000000000033471507764646700207620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_CUBLASLT_H__ #define __STARPU_CUBLASLT_H__ #ifdef STARPU_USE_CUDA #ifdef STARPU_HAVE_LIBCUBLASLT #include #endif #endif #ifdef __cplusplus extern "C" { #endif /** @ingroup API_CUDA_Extensions @{ */ /** Initialize CUBLASLT on every CUDA device controlled by StarPU. This call blocks until CUBLASLT has been properly initialized on every device. See \ref CUDA-specificOptimizations for more details. */ void starpu_cublasLt_init(void); /** Synchronously deinitialize the CUBLASLT library on every CUDA device. See \ref CUDA-specificOptimizations for more details. */ void starpu_cublasLt_shutdown(void); #ifdef STARPU_USE_CUDA #ifdef STARPU_HAVE_LIBCUBLASLT /** Return the CUBLASLT handle to be used to queue CUBLASLT kernels. It is properly initialized and configured for multistream by starpu_cublasLt_init(). See \ref CUDA-specificOptimizations for more details. */ cublasLtHandle_t starpu_cublasLt_get_local_handle(void); #endif #endif /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_CUBLASLT_H__ */ starpu-1.4.9+dfsg/include/starpu_cublas_v2.h000066400000000000000000000023241507764646700210630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_CUBLAS_V2_H__ #define __STARPU_CUBLAS_V2_H__ #ifdef STARPU_USE_CUDA #include #ifdef __cplusplus extern "C" { #endif /** @ingroup API_CUDA_Extensions @{ */ /** Return the CUBLAS handle to be used to queue CUBLAS kernels. It is properly initialized and configured for multistream by starpu_cublas_init(). See \ref CUDA-specificOptimizations for more details. */ cublasHandle_t starpu_cublas_get_local_handle(void); /** @} */ #ifdef __cplusplus } #endif #endif #endif /* __STARPU_CUBLAS_V2_H__ */ starpu-1.4.9+dfsg/include/starpu_cuda.h000066400000000000000000000134001507764646700201140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_CUDA_H__ #define __STARPU_CUDA_H__ #include #ifdef STARPU_USE_CUDA #include #include #include #ifdef STARPU_HAVE_NVML_H #include #endif #ifdef __cplusplus extern "C" { #endif /** @defgroup API_CUDA_Extensions CUDA Extensions @{ */ /** Report a CUBLAS error. See \ref CUDASupport for more details. */ void starpu_cublas_report_error(const char *func, const char *file, int line, int status); /** Call starpu_cublas_report_error(), passing the current function, file and line position. */ #define STARPU_CUBLAS_REPORT_ERROR(status) starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status) /** Report a CUDA error. See \ref CUDASupport for more details. */ void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status); /** Call starpu_cuda_report_error(), passing the current function, file and line position. */ #define STARPU_CUDA_REPORT_ERROR(status) starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status) /** Return the current worker’s CUDA stream. StarPU provides a stream for every CUDA device controlled by StarPU. This function is only provided for convenience so that programmers can easily use asynchronous operations within codelets without having to create a stream by hand. Note that the application is not forced to use the stream provided by starpu_cuda_get_local_stream() and may also create its own streams. Synchronizing with cudaDeviceSynchronize() is allowed, but will reduce the likelihood of having all transfers overlapped. See \ref CUDA-specificOptimizations for more details. */ cudaStream_t starpu_cuda_get_local_stream(void); /** Return a pointer to device properties for worker \p workerid (assumed to be a CUDA worker). See \ref EnablingImplementationAccordingToCapabilities for more details. */ const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid); /** Copy \p ssize bytes from the pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node. The function first tries to copy the data asynchronous (unless \p stream is NULL). If the asynchronous copy fails or if \p stream is NULL, it copies the data synchronously. The function returns -EAGAIN if the asynchronous launch was successful. It returns 0 if the synchronous copy was successful, or fails otherwise. See \ref CUDASupport for more details. */ int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind); /** Copy \p numblocks blocks of \p blocksize bytes from the pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node. The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in the source (resp. destination) interface. The function first tries to copy the data asynchronous (unless \p stream is NULL). If the asynchronous copy fails or if \p stream is NULL, it copies the data synchronously. The function returns -EAGAIN if the asynchronous launch was successful. It returns 0 if the synchronous copy was successful, or fails otherwise. See \ref CUDASupport for more details. */ int starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, cudaStream_t stream, enum cudaMemcpyKind kind); /** Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from the pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node. The blocks are grouped by \p numblocks_1 blocks whose start addresses are ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) interface. The function first tries to copy the data asynchronous (unless \p stream is NULL). If the asynchronous copy fails or if \p stream is NULL, it copies the data synchronously. The function returns -EAGAIN if the asynchronous launch was successful. It returns 0 if the synchronous copy was successful, or fails otherwise. See \ref CUDASupport for more details. */ int starpu_cuda_copy3d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, cudaStream_t stream, enum cudaMemcpyKind kind); /** Call cudaSetDevice(\p devid) or cudaGLSetGLDevice(\p devid), according to whether \p devid is among the field starpu_conf::cuda_opengl_interoperability. See \ref CUDASupport for more details. */ void starpu_cuda_set_device(unsigned devid); #ifdef STARPU_HAVE_NVML_H /** Return the nvml device for a CUDA device See \ref CUDASupport for more details. */ nvmlDevice_t starpu_cuda_get_nvmldev(unsigned devid); #endif /** @} */ #ifdef __cplusplus } #endif #endif /* STARPU_USE_CUDA */ #endif /* __STARPU_CUDA_H__ */ starpu-1.4.9+dfsg/include/starpu_cusolver.h000066400000000000000000000051621507764646700210500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_CUSOLVER_H__ #define __STARPU_CUSOLVER_H__ #ifdef STARPU_USE_CUDA #include #include #include #endif #ifdef __cplusplus extern "C" { #endif /** @ingroup API_CUDA_Extensions @{ */ /** Initialize CUSOLVER on every CUDA device controlled by StarPU. This call blocks until CUSOLVER has been properly initialized on every device. See \ref CUDA-specificOptimizations */ void starpu_cusolver_init(void); /** Synchronously deinitialize the CUSOLVER library on every CUDA device. See \ref CUDA-specificOptimizations */ void starpu_cusolver_shutdown(void); #ifdef STARPU_USE_CUDA /** Return the CUSOLVER Dense handle to be used to queue CUSOLVER kernels. It is properly initialized and configured for multistream by starpu_cusolver_init(). See \ref CUDA-specificOptimizations */ cusolverDnHandle_t starpu_cusolverDn_get_local_handle(void); /** Return the CUSOLVER Sparse handle to be used to queue CUSOLVER kernels. It is properly initialized and configured for multistream by starpu_cusolver_init(). See \ref CUDA-specificOptimizations */ cusolverSpHandle_t starpu_cusolverSp_get_local_handle(void); /** Return the CUSOLVER Refactorization handle to be used to queue CUSOLVER kernels. It is properly initialized and configured for multistream by starpu_cusolver_init(). See \ref CUDA-specificOptimizations */ cusolverRfHandle_t starpu_cusolverRf_get_local_handle(void); /** Report a CUSOLVER error. See \ref CUDASupport for more details. */ void starpu_cusolver_report_error(const char *func, const char *file, int line, cusolverStatus_t status); /** Call starpu_cusolver_report_error(), passing the current function, file and line position. */ #define STARPU_CUSOLVER_REPORT_ERROR(status) starpu_cusolver_report_error(__starpu_func__, __FILE__, __LINE__, status) #endif /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_CUSOLVER_H__ */ starpu-1.4.9+dfsg/include/starpu_cusparse.h000066400000000000000000000032331507764646700210300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_CUSPARSE_H__ #define __STARPU_CUSPARSE_H__ #ifdef STARPU_USE_CUDA #include #endif #ifdef __cplusplus extern "C" { #endif /** @ingroup API_CUDA_Extensions @{ */ /** Initialize CUSPARSE on every CUDA device controlled by StarPU. This call blocks until CUSPARSE has been properly initialized on every device. See \ref CUDA-specificOptimizations for more details. */ void starpu_cusparse_init(void); /** Synchronously deinitialize the CUSPARSE library on every CUDA device. See \ref CUDA-specificOptimizations for more details. */ void starpu_cusparse_shutdown(void); #ifdef STARPU_USE_CUDA /** Return the CUSPARSE handle to be used to queue CUSPARSE kernels. It is properly initialized and configured for multistream by starpu_cusparse_init(). See \ref CUDA-specificOptimizations for more details. */ cusparseHandle_t starpu_cusparse_get_local_handle(void); #endif /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_CUSPARSE_H__ */ starpu-1.4.9+dfsg/include/starpu_data.h000066400000000000000000000761631507764646700201300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_DATA_H__ #define __STARPU_DATA_H__ #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Data_Management Data Management @brief Data management facilities provided by StarPU. We show how to use existing data interfaces in \ref API_Data_Interfaces, but developers can design their own data interfaces if required. @{ */ struct _starpu_data_state; /** StarPU uses ::starpu_data_handle_t as an opaque handle to manage a piece of data. Once a piece of data has been registered to StarPU, it is associated to a ::starpu_data_handle_t which keeps track of the state of the piece of data over the entire machine, so that we can maintain data consistency and locate data replicates for instance. See \ref DataInterface for more details. */ typedef struct _starpu_data_state *starpu_data_handle_t; /** Describe a StarPU data access mode Note: when adding a flag here, update _starpu_detect_implicit_data_deps_with_handle Note: other STARPU_* values in include/starpu_task_util.h */ enum starpu_data_access_mode { STARPU_NONE = 0, /**< todo */ STARPU_R = (1 << 0), /**< read-only mode */ STARPU_W = (1 << 1), /**< write-only mode */ STARPU_RW = (STARPU_R | STARPU_W), /**< read-write mode. Equivalent to ::STARPU_R|::STARPU_W */ STARPU_SCRATCH = (1 << 2), /**< A temporary buffer is allocated for the task, but StarPU does not enforce data consistency---i.e. each device has its own buffer, independently from each other (even for CPUs), and no data transfer is ever performed. This is useful for temporary variables to avoid allocating/freeing buffers inside each task. Currently, no behavior is defined concerning the relation with the ::STARPU_R and ::STARPU_W modes and the value provided at registration --- i.e., the value of the scratch buffer is undefined at entry of the codelet function. It is being considered for future extensions at least to define the initial value. For now, data to be used in ::STARPU_SCRATCH mode should be registered with node -1 and a NULL pointer, since the value of the provided buffer is simply ignored for now. See \ref ScratchData for more details. */ STARPU_REDUX = (1 << 3), /**< Reduction mode. StarPU will allocate on the fly a per-worker buffer, so that various tasks that access the same data in ::STARPU_REDUX mode can execute in parallel. When a task accesses the data without ::STARPU_REDUX, StarPU will automatically reduce the different contributions. Codelets contributing to these reductions with ::STARPU_REDUX must be registered with ::STARPU_RW | ::STARPU_COMMUTE access modes. See \ref DataReduction for more details. */ STARPU_COMMUTE = (1 << 4), /**< ::STARPU_COMMUTE can be passed along ::STARPU_W or ::STARPU_RW to express that StarPU can let tasks commute, which is useful e.g. when bringing a contribution into some data, which can be done in any order (but still require sequential consistency against reads or non-commutative writes). See \ref DataCommute for more details. */ STARPU_SSEND = (1 << 5), /**< used in starpu_mpi_task_insert() to specify the data has to be sent using a synchronous and non-blocking mode (see starpu_mpi_issend()) */ STARPU_LOCALITY = (1 << 6), /**< used to tell the scheduler which data is the most important for the task, and should thus be used to try to group tasks on the same core or cache, etc. For now only the ws and lws schedulers take this flag into account, and only when rebuild with \c USE_LOCALITY flag defined in the src/sched_policies/work_stealing_policy.c source code. TODO add extended description in documentation. */ STARPU_MPI_REDUX = (1 << 7), /**< Inter-node reduction only. This is similar to ::STARPU_REDUX, except that StarPU will allocate a per-node buffer only, i.e. parallelism will be achieved between nodes, but not within each node. This is useful when the per-worker buffers allocated with ::STARPU_REDUX consume too much memory. See \ref MPIMpiRedux for more details. */ STARPU_NOPLAN = (1 << 8), /**< Disable automatic submission of asynchronous partitioning/unpartitioning, only use internally by StarPU */ STARPU_UNMAP = (1 << 9), /**< Request unmapping the destination replicate, only use internally by StarPU */ STARPU_NOFOOTPRINT = (1 << 10), /**< Ignore this data for the footprint computation. See \ref ScratchData */ STARPU_ACCESS_MODE_MAX = (1 << 11) /**< The purpose of ::STARPU_ACCESS_MODE_MAX is to be the maximum of this enum. */ }; struct starpu_data_interface_ops; /** Set the name of the data, to be shown in various profiling tools. See \ref CreatingAGanttDiagram for more details. */ void starpu_data_set_name(starpu_data_handle_t handle, const char *name); /** Set the coordinates of the data, to be shown in various profiling tools. \p dimensions is the size of the \p dims array. This can be for instance the tile coordinates within a big matrix. See \ref CreatingAGanttDiagram for more details. */ void starpu_data_set_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]); /** Set the coordinates of the data, to be shown in various profiling tools. \p dimensions is the number of subsequent \c int parameters. This can be for instance the tile coordinates within a big matrix. See \ref CreatingAGanttDiagram for more details. */ void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...); /** Get the coordinates of the data, as set by a previous call to starpu_data_set_coordinates_array() or starpu_data_set_coordinates() \p dimensions is the size of the \p dims array. This returns the actual number of returned coordinates. See \ref CreatingAGanttDiagram for more details. */ unsigned starpu_data_get_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]); /** Unregister a data \p handle from StarPU. If the data was automatically allocated by StarPU because the home node was -1, all automatically allocated buffers are freed. Otherwise, a valid copy of the data is put back into the home node in the buffer that was initially registered. Using a data handle that has been unregistered from StarPU results in an undefined behaviour. In case we do not need to update the value of the data in the home node, we can use the function starpu_data_unregister_no_coherency() instead. See \ref TaskSubmission for more details. */ void starpu_data_unregister(starpu_data_handle_t handle); /** Similar to starpu_data_unregister(), except that StarPU does not put back a valid copy into the home node, in the buffer that was initially registered. See \ref DataManagementAllocation for more details. */ void starpu_data_unregister_no_coherency(starpu_data_handle_t handle); /** Destroy the data \p handle once it is no longer needed by any submitted task. No coherency is provided. This is not safe to call starpu_data_unregister_submit() on a handle that comes from the registration of a non-NULL application home buffer, since the moment when the unregistration will happen is unknown to the application. Only calling starpu_shutdown() allows to be sure that the data was really unregistered. See \ref TemporaryData for more details. */ void starpu_data_unregister_submit(starpu_data_handle_t handle); /** Deinitialize all replicates of the data \p handle immediately. After data deinitialization, the first access to \p handle must be performed in ::STARPU_W mode. Accessing an deinitialized data in ::STARPU_R mode results in undefined behaviour. See \ref DataManagementAllocation for more details. */ void starpu_data_deinitialize(starpu_data_handle_t handle); /** Submit deinitialization of the data \p handle after completion of previously submitted tasks. See \ref DataManagementAllocation for more details. */ void starpu_data_deinitialize_submit(starpu_data_handle_t handle); /** Destroy all replicates of the data \p handle immediately. After data invalidation, the first access to \p handle must be performed in ::STARPU_W mode. Accessing an invalidated data in ::STARPU_R mode results in undefined behaviour. See \ref DataManagementAllocation for more details. This is the same as starpu_data_deinitialize(), plus explicitly releasing the buffers. */ void starpu_data_invalidate(starpu_data_handle_t handle); /** Submit invalidation of the data \p handle after completion of previously submitted tasks. See \ref DataManagementAllocation for more details. This is the same as starpu_data_deinitialize_submit(), plus explicitly releasing the buffers. */ void starpu_data_invalidate_submit(starpu_data_handle_t handle); /** Specify that the data \p handle can be discarded without impacting the application. */ void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important); /** @name Access registered data from the application @{ */ /** This macro can be used to acquire data, but not require it to be available on a given node, only enforce R/W dependencies. This can for instance be used to wait for tasks which produce the data, but without requesting a fetch to the main memory. */ #define STARPU_ACQUIRE_NO_NODE -1 /** Similar to ::STARPU_ACQUIRE_NO_NODE, but will lock the data on all nodes, preventing them from being evicted for instance. This is mostly useful inside StarPU only. */ #define STARPU_ACQUIRE_NO_NODE_LOCK_ALL -2 /** The application must call this function prior to accessing registered data from main memory outside tasks. StarPU ensures that the application will get an up-to-date copy of \p handle in main memory located where the data was originally registered, and that all concurrent accesses (e.g. from tasks) will be consistent with the access mode specified with \p mode. starpu_data_release() must be called once the application no longer needs to access the piece of data. Note that implicit data dependencies are also enforced by starpu_data_acquire(), i.e. starpu_data_acquire() will wait for all tasks scheduled to work on the data, unless they have been disabled explicitly by calling starpu_data_set_default_sequential_consistency_flag() or starpu_data_set_sequential_consistency_flag(). starpu_data_acquire() is a blocking call, so that it cannot be called from tasks or from their callbacks (in that case, starpu_data_acquire() returns -EDEADLK). Upon successful completion, this function returns 0. See \ref DataAccess for more details. */ int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode); /** Similar to starpu_data_acquire(), except that the data will be available on the given memory node instead of main memory. ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an explicit node number. See \ref DataAccess for more details. */ int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); /** Asynchronous equivalent of starpu_data_acquire(). When the data specified in \p handle is available in the access \p mode, the \p callback function is executed. The application may access the requested data during the execution of \p callback. The \p callback function must call starpu_data_release() once the application no longer needs to access the piece of data. Note that implicit data dependencies are also enforced by starpu_data_acquire_cb() in case they are not disabled. Contrary to starpu_data_acquire(), this function is non-blocking and may be called from task callbacks. Upon successful completion, this function returns 0. See \ref DataAccess for more details. */ int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); /** Similar to starpu_data_acquire_cb(), except that the data will be available on the given memory node instead of main memory. ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an explicit node number. See \ref DataAccess for more details. */ int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); /** Similar to starpu_data_acquire_cb() with the possibility of enabling or disabling data dependencies. When the data specified in \p handle is available in the access \p mode, the \p callback function is executed. The application may access the requested data during the execution of this \p callback. The \p callback function must call starpu_data_release() once the application no longer needs to access the piece of data. Note that implicit data dependencies are also enforced by starpu_data_acquire_cb_sequential_consistency() in case they are not disabled specifically for the given \p handle or by the parameter \p sequential_consistency. Similarly to starpu_data_acquire_cb(), this function is non-blocking and may be called from task callbacks. Upon successful completion, this function returns 0. See \ref DataAccess for more details. */ int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); /** Similar to starpu_data_acquire_cb_sequential_consistency(), except that the data will be available on the given memory node instead of main memory. ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an explicit node number. See \ref DataAccess for more details. */ int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); /** Similar to starpu_data_acquire_on_node_cb_sequential_consistency(), except that the \e pre_sync_jobid and \e post_sync_jobid parameters can be used to retrieve the jobid of the synchronization tasks. \e pre_sync_jobid happens just before the acquisition, and \e post_sync_jobid happens just after the release. \p callback_acquired is called when the data is acquired in terms of semantic, but the data is not fetched yet. It is given a pointer to the node, which it can modify if it wishes so. This is a very internal interface, subject to changes, do not use this. */ int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback_acquired)(void *arg, int *node, enum starpu_data_access_mode mode), void (*callback)(void *arg), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid, int prio); /** The application can call this function instead of starpu_data_acquire() so as to acquire the data like starpu_data_acquire(), but only if all previously-submitted tasks have completed, in which case starpu_data_acquire_try() returns 0. StarPU will have ensured that the application will get an up-to-date copy of \p handle in main memory located where the data was originally registered. starpu_data_release() must be called once the application no longer needs to access the piece of data. See \ref DataAccess for more details. */ int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode); /** Similar to starpu_data_acquire_try(), except that the data will be available on the given memory node instead of main memory. ::STARPU_ACQUIRE_NO_NODE and ::STARPU_ACQUIRE_NO_NODE_LOCK_ALL can be used instead of an explicit node number. See \ref DataAccess for more details. */ int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); #ifdef __GCC__ /** STARPU_DATA_ACQUIRE_CB() is the same as starpu_data_acquire_cb(), except that the code to be executed in a callback is directly provided as a macro parameter, and the data \p handle is automatically released after it. This permits to easily execute code which depends on the value of some registered data. This is non-blocking too and may be called from task callbacks. */ #define STARPU_DATA_ACQUIRE_CB(handle, mode, code) \ do \ { \ void callback(void *arg) \ { \ code; \ starpu_data_release(handle); \ } \ starpu_data_acquire_cb(handle, mode, callback, NULL); \ } \ while (0) #endif /** Release the piece of data acquired by the application either by starpu_data_acquire() or by starpu_data_acquire_cb(). See \ref DataAccess for more details. */ void starpu_data_release(starpu_data_handle_t handle); /** Similar to starpu_data_release(), except that the data was made available on the given memory \p node instead of main memory. The \p node parameter must be exactly the same as the corresponding \c starpu_data_acquire_on_node* call. See \ref DataAccess for more details. */ void starpu_data_release_on_node(starpu_data_handle_t handle, int node); /** Partly release the piece of data acquired by the application either by starpu_data_acquire() or by starpu_data_acquire_cb(), switching the acquisition down to \p down_to_mode. For now, only releasing from ::STARPU_RW or ::STARPU_W acquisition down to ::STARPU_R is supported, or down to the same acquisition. ::STARPU_NONE can also be passed as \p down_to_mode, in which case this is equivalent to calling starpu_data_release(). See \ref DataAccess for more details. */ void starpu_data_release_to(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode); /** Similar to starpu_data_release_to(), except that the data was made available on the given memory \p node instead of main memory. The \p node parameter must be exactly the same as the corresponding \c starpu_data_acquire_on_node* call. See \ref DataAccess for more details. */ void starpu_data_release_to_on_node(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode, int node); /** @} */ /** This is an arbiter, which implements an advanced but centralized management of concurrent data accesses, see \ref ConcurrentDataAccess for the details. */ typedef struct starpu_arbiter *starpu_arbiter_t; /** Create a data access arbiter, see \ref ConcurrentDataAccess for the details */ starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC; /** Make access to \p handle managed by \p arbiter, see \ref ConcurrentDataAccess for the details. */ void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter); /** Destroy the \p arbiter. This must only be called after all data assigned to it have been unregistered. See \ref ConcurrentDataAccess for the details. */ void starpu_arbiter_destroy(starpu_arbiter_t arbiter); /** Explicitly ask StarPU to allocate room for a piece of data on the specified memory \p node. See \ref DataPrefetch for more details. */ int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node); /** Prefetch levels Data requests are ordered by priorities, but also by prefetching level, between data that a task wants now, and data that we will probably want "soon". */ enum starpu_is_prefetch { /** A task really needs it now! */ STARPU_FETCH = 0, /** A task will need it soon */ STARPU_TASK_PREFETCH = 1, /** It is a good idea to have it asap */ STARPU_PREFETCH = 2, /** Get this here when you have time to */ STARPU_IDLEFETCH = 3, STARPU_NFETCH }; /** Issue a fetch request for the data \p handle to \p node, i.e. requests that the data be replicated to the given node as soon as possible, so that it is available there for tasks. If \p async is 0, the call will block until the transfer is achieved, else the call will return immediately, after having just queued the request. In the latter case, the request will asynchronously wait for the completion of any task writing on the data. See \ref DataPrefetch for more details. */ int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); /** Issue a prefetch request for the data \p handle to \p node, i.e. requests that the data be replicated to \p node when there is room for it, so that it is available there for tasks. If \p async is 0, the call will block until the transfer is achieved, else the call will return immediately, after having just queued the request. In the latter case, the request will asynchronously wait for the completion of any task writing on the data. See \ref DataPrefetch for more details. */ int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); /** See \ref DataPrefetch for more details. */ int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); /** Issue an idle prefetch request for the data \p handle to \p node, i.e. requests that the data be replicated to \p node, so that it is available there for tasks, but only when the bus is really idle. If \p async is 0, the call will block until the transfer is achieved, else the call will return immediately, after having just queued the request. In the latter case, the request will asynchronously wait for the completion of any task writing on the data. See \ref DataPrefetch for more details. */ int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); /** See \ref DataPrefetch for more details. */ int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); /** Check whether a valid copy of \p handle is currently available on memory node \p node (or a transfer request for getting so is ongoing). See \ref SchedulingHelpers for more details. */ unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node); /** Advise StarPU that \p handle will not be used in the close future, and is thus a good candidate for eviction from GPUs. StarPU will thus write its value back to its home node when the bus is idle, and select this data in priority for eviction when memory gets low. See \ref DataPrefetch for more details. */ void starpu_data_wont_use(starpu_data_handle_t handle); /** Advise StarPU to evict \p handle from the memory node \p node StarPU will thus write its value back to its home node, before evicting it. This may however fail if e.g. some task is still working on it. If the eviction was successful, 0 is returned ; -1 is returned otherwise. See \ref DataPrefetch for more details. */ int starpu_data_evict_from_node(starpu_data_handle_t handle, unsigned node); /** Set the write-through mask of the data \p handle (and its children), i.e. a bitmask of nodes where the data should be always replicated after modification. It also prevents the data from being evicted from these nodes when memory gets scarse. When the data is modified, it is automatically transferred into those memory nodes. For instance a 1<<0 write-through mask means that the CUDA workers will commit their changes in main memory (node 0). See \ref DataManagementAllocation for more details. */ void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask); /** @name Implicit Data Dependencies In this section, we describe how StarPU makes it possible to insert implicit task dependencies in order to enforce sequential data consistency. When this data consistency is enabled on a specific data handle, any data access will appear as sequentially consistent from the application. For instance, if the application submits two tasks that access the same piece of data in read-only mode, and then a third task that access it in write mode, dependencies will be added between the two first tasks and the third one. Implicit data dependencies are also inserted in the case of data accesses from the application. @{ */ /** Set the data consistency mode associated to a data handle. The consistency mode set using this function has the priority over the default mode which can be set with starpu_data_set_default_sequential_consistency_flag(). See \ref SequentialConsistency and \ref DataManagementAllocation for more details. */ void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag); /** Get the data consistency mode associated to the data handle \p handle. See \ref SequentialConsistency for more details. */ unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle); /** Return the default sequential consistency flag. See \ref SequentialConsistency for more details. */ unsigned starpu_data_get_default_sequential_consistency_flag(void); /** Set the default sequential consistency flag. If a non-zero value is passed, a sequential data consistency will be enforced for all handles registered after this function call, otherwise it is disabled. By default, StarPU enables sequential data consistency. It is also possible to select the data consistency mode of a specific data handle with the function starpu_data_set_sequential_consistency_flag(). See \ref SequentialConsistency for more details. */ void starpu_data_set_default_sequential_consistency_flag(unsigned flag); /** @} */ /** Set whether this data should be elligible to be evicted to disk storage (1) or not (0). The default is 1. See \ref OOCDataRegistration for more details. */ void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag); /** Get whether this data was set to be elligible to be evicted to disk storage (1) or not (0). See \ref OOCDataRegistration for more details. */ unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle); /** Query the status of \p handle on the specified \p memory_node. \p is_allocated tells whether memory was allocated there for the data. \p is_valid tells whether the actual value is available there. \p is_loading tells whether the actual value is getting loaded there. \p is_requested tells whether the actual value is requested to be loaded there by some fetch/prefetch/idlefetch request. See \ref DataPrefetch for more details. */ void starpu_data_query_status2(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_loading, int *is_requested); /** Same as starpu_data_query_status2(), but without the is_loading parameter. See \ref DataPrefetch for more details. */ void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested); struct starpu_codelet; /** Set the codelets to be used for \p handle when it is accessed in the mode ::STARPU_REDUX. Per-worker buffers will be initialized with the codelet \p init_cl (which has to take one handle with ::STARPU_W), and reduction between per-worker buffers will be done with the codelet \p redux_cl (which has to take a first accumulation handle with ::STARPU_RW|::STARPU_COMMUTE, and a second contribution handle with ::STARPU_R). See \ref DataReduction and \ref TemporaryData for more details. */ void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl); /** Same as starpu_data_set_reduction_methods() but allows to pass arguments to the reduction and init tasks */ void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_cl_arg, struct starpu_codelet *init_cl, void *init_cl_arg); struct starpu_data_interface_ops *starpu_data_get_interface_ops(starpu_data_handle_t handle); /** See \ref DataPrefetch for more details. */ unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node); /** See \ref DataPrefetch for more details. */ unsigned starpu_data_test_if_mapped_on_node(starpu_data_handle_t handle, unsigned memory_node); /** See \ref DataPrefetch for more details. */ void starpu_memchunk_tidy(unsigned memory_node); /** Set the field \c user_data for the \p handle to \p user_data . It can then be retrieved with starpu_data_get_user_data(). \p user_data can be any application-defined value, for instance a pointer to an object-oriented container for the data. See \ref DataHandlesHelpers for more details. */ void starpu_data_set_user_data(starpu_data_handle_t handle, void *user_data); /** Retrieve the field \c user_data previously set for the \p handle. See \ref DataHandlesHelpers for more details. */ void *starpu_data_get_user_data(starpu_data_handle_t handle); /** Set the field \c sched_data for the \p handle to \p sched_data . It can then be retrieved with starpu_data_get_sched_data(). \p sched_data can be any scheduler-defined value. See \ref DataHandlesHelpers for more details. */ void starpu_data_set_sched_data(starpu_data_handle_t handle, void *sched_data); /** Retrieve the field \c sched_data previously set for the \p handle. See \ref DataHandlesHelpers for more details. */ void *starpu_data_get_sched_data(starpu_data_handle_t handle); /** Check whether data \p handle can be evicted now from node \p node. See \ref DataPrefetch for more details. */ int starpu_data_can_evict(starpu_data_handle_t handle, unsigned node, enum starpu_is_prefetch is_prefetch); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_DATA_H__ */ starpu-1.4.9+dfsg/include/starpu_data_filters.h000066400000000000000000001340561507764646700216540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_DATA_FILTERS_H__ #define __STARPU_DATA_FILTERS_H__ #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Data_Partition Data Partition @{ */ struct starpu_data_interface_ops; /** Describe a data partitioning operation, to be given to starpu_data_partition(). See \ref DefiningANewDataFilter for more details. */ struct starpu_data_filter { /** Fill the \p child_interface structure with interface information for the \p i -th child of the parent \p father_interface (among \p nparts). The \p filter structure is provided, allowing to inspect the starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr parameters. The details of what needs to be filled in \p child_interface vary according to the data interface, but generally speaking:
    • id is usually just copied over from the father, when the sub data has the same structure as the father, e.g. a subvector is a vector, a submatrix is a matrix, etc. This is however not the case for instance when dividing a BCSR matrix into its dense blocks, which then are matrices.
    • nx, ny and alike are usually divided by the number of subdata, depending how the subdivision is done (e.g. nx division vs ny division for vertical matrix division vs horizontal matrix division).
    • ld for matrix interfaces are usually just copied over: the leading dimension (ld) usually does not change.
    • elemsize is usually just copied over.
    • ptr, the pointer to the data, has to be computed according to \p i and the father's ptr, so as to point to the start of the sub data. This should however be done only if the father has ptr different from NULL: in the OpenCL case notably, the dev_handle and offset fields are used instead.
    • dev_handle should be just copied over from the parent.
    • offset has to be computed according to \p i and the father's offset, so as to provide the offset of the start of the sub data. This is notably used for the OpenCL case.
    */ void (*filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts); unsigned nchildren; /**< Number of parts to partition the data into. */ /** Return the number of children. This can be used instead of starpu_data_filter::nchildren when the number of children depends on the actual data (e.g. the number of blocks in a sparse matrix). */ unsigned (*get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle); /** When children use different data interface, return which interface is used by child number \p id. */ struct starpu_data_interface_ops *(*get_child_ops)(struct starpu_data_filter *, unsigned id); unsigned filter_arg; /**< Additional parameter for the filter function */ /** Additional pointer parameter for the filter function, such as the sizes of the different parts. */ void *filter_arg_ptr; }; /** @name Basic API @{ */ /** Request the partitioning of \p initial_handle into several subdata according to the filter \p f. Here an example of how to use the function. \code{.c} struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_block, .nchildren = nslicesx }; starpu_data_partition(A_handle, &f); \endcode See \ref PartitioningData for more details. */ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f); /** Unapply the filter which has been applied to \p root_data, thus unpartitioning the data. The pieces of data are collected back into one big piece in the \p gathering_node (usually ::STARPU_MAIN_RAM). Tasks working on the partitioned data will be waited for by starpu_data_unpartition(). Here an example of how to use the function. \code{.c} starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); \endcode See \ref PartitioningData for more details. */ void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node); /** Return the \p i -th child of the given \p handle, which must have been partitioned beforehand. See \ref PartitioningData for more details. */ starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i); /** Return the number of children \p handle has been partitioned into. See \ref PartitioningData for more details. */ int starpu_data_get_nb_children(starpu_data_handle_t handle); /** After partitioning a StarPU data by applying a filter, starpu_data_get_sub_data() can be used to get handles for each of the data portions. \p root_data is the parent data that was partitioned. \p depth is the number of filters to traverse (in case several filters have been applied, to e.g. partition in row blocks, and then in column blocks), and the subsequent parameters are the indexes. The function returns a handle to the subdata. Here an example of how to use the function. \code{.c} h = starpu_data_get_sub_data(A_handle, 1, taskx); \endcode See \ref PartitioningData for more details. */ starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ...); /** Similar to starpu_data_get_sub_data() but use a \c va_list for the parameter list. See \ref PartitioningData for more details. */ starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa); /** Apply \p nfilters filters to the handle designated by \p root_handle recursively. \p nfilters pointers to variables of the type starpu_data_filter should be given. See \ref PartitioningData for more details. */ void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...); /** Apply \p nfilters filters to the handle designated by \p root_handle recursively. Use a \p va_list of pointers to variables of the type starpu_data_filter. See \ref PartitioningData for more details. */ void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa); /** Apply \p nfilters filters to the handle designated by \p root_handle recursively. The pointer of the filter list \p filters of the type starpu_data_filter should be given. See \ref PartitioningData for more details. */ void starpu_data_map_filters_parray(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter **filters); /** Apply \p nfilters filters to the handle designated by \p root_handle recursively. The list of filter \p filters of the type starpu_data_filter should be given. See \ref PartitioningData for more details. */ void starpu_data_map_filters_array(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter *filters); /** @} */ /** @name Asynchronous API @{ */ /** Plan to partition \p initial_handle into several subdata according to the filter \p f. The handles are returned into the \p children array, which has to be the same size as the number of parts described in \p f. Here is an example of how to use the function: \code{.c} starpu_data_handle_t children[nslicesx]; struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_block, .nchildren = nslicesx }; starpu_data_partition_plan(A_handle, &f, children); \endcode See \ref AsynchronousPartitioning for more details. */ void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children); /** Submit the actual partitioning of \p initial_handle into the \p nparts \p children handles. This call is asynchronous, it only submits that the partitioning should be done, so that the \p children handles can now be used to submit tasks, and \p initial_handle can not be used to submit tasks any more (to guarantee coherency). For instance, \code{.c} starpu_data_partition_submit(A_handle, nslicesx, children); \endcode See \ref AsynchronousPartitioning for more details. */ void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); /** Similar to starpu_data_partition_submit(), but do not invalidate \p initial_handle. This allows to continue using it, but the application has to be careful not to write to \p initial_handle or \p children handles, only read from them, since the coherency is otherwise not guaranteed. This thus allows to submit various tasks which concurrently read from various partitions of the data. When the application wants to write to \p initial_handle again, it should call starpu_data_unpartition_submit(), which will properly add dependencies between the reads on the \p children and the writes to be submitted. If instead the application wants to write to \p children handles, it should call starpu_data_partition_readwrite_upgrade_submit(), which will correctly add dependencies between the reads on the \p initial_handle and the writes to be submitted. See \ref AsynchronousPartitioning for more details. */ void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); /** Similar to starpu_data_partition_readonly_submit(), but allow to specify the coherency to be used for the main data \p initial_handle. See \ref AsynchronousPartitioning for more details. */ void starpu_data_partition_readonly_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency); /** Assume that a partitioning of \p initial_handle has already been submitted in readonly mode through starpu_data_partition_readonly_submit(), and will upgrade that partitioning into read-write mode for the \p children, by invalidating \p initial_handle, and adding the necessary dependencies. See \ref AsynchronousPartitioning for more details. */ void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); /** Assume that a partitioning of \p initial_handle has already been submitted in read-write mode through starpu_data_partition_submit(), and will downgrade that partitioning into read-only mode for the \p children, fetching data back to the \p initial_handle, and adding the necessary dependencies. See \ref AsynchronousPartitioning for more details. */ void starpu_data_partition_readonly_downgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); /** Assuming that \p initial_handle is partitioned into \p children, submit an unpartitionning of \p initial_handle, i.e. submit a gathering of the pieces on the requested \p gathering_node memory node, and submit an invalidation of the children. See \ref AsynchronousPartitioning for more details. */ void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); /** Similar to starpu_data_partition_submit(), but do not invalidate \p initial_handle. This allows to continue using it, but the application has to be careful not to write to \p initial_handle or \p children handles, only read from them, since the coherency is otherwise not guaranteed. This thus allows to submit various tasks which concurrently read from various partitions of the data. See \ref AsynchronousPartitioning for more details. */ void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); /** Clear the partition planning established between \p root_data and \p children with starpu_data_partition_plan(). This will notably submit an unregister all the \p children, which can thus not be used any more afterwards. See \ref AsynchronousPartitioning for more details. */ void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children); /** Similar to starpu_data_partition_clean() but the root data will be gathered on the given node. See \ref AsynchronousPartitioning for more details. */ void starpu_data_partition_clean_node(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children, int gather_node); /** Similar to starpu_data_unpartition_submit_sequential_consistency() but allow to specify a callback function for the unpartitiong task. See \ref AsynchronousPartitioning for more details. */ void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg); /** Similar to starpu_data_partition_submit() but also allow to specify the coherency to be used for the main data \p initial_handle through the parameter \p sequential_consistency. See \ref AsynchronousPartitioning for more details. */ void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency); /** Similar to starpu_data_unpartition_submit() but also allow to specify the coherency to be used for the main data \p initial_handle through the parameter \p sequential_consistency. See \ref AsynchronousPartitioning for more details. */ void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node, int sequential_consistency); /** @} */ /** @name Predefined BCSR Filter Functions Predefined partitioning functions for BCSR data. Examples on how to use them are shown in \ref PartitioningData. @{ */ /** Partition a block-sparse matrix into dense matrices. starpu_data_filter::get_child_ops needs to be set to starpu_bcsr_filter_canonical_block_child_ops() and starpu_data_filter::get_nchildren set to starpu_bcsr_filter_canonical_block_get_nchildren(). See \ref BCSRDataInterface for more details. */ void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return the number of children obtained with starpu_bcsr_filter_canonical_block(). See \ref BCSRDataInterface for more details. */ unsigned starpu_bcsr_filter_canonical_block_get_nchildren(struct starpu_data_filter *f, starpu_data_handle_t handle); /** Return the child_ops of the partition obtained with starpu_bcsr_filter_canonical_block(). See \ref BCSRDataInterface for more details. */ struct starpu_data_interface_ops *starpu_bcsr_filter_canonical_block_child_ops(struct starpu_data_filter *f, unsigned child); /** Partition a block-sparse matrix into block-sparse matrices. The split is done along the leading dimension, i.e. along adjacent nnz blocks. See \ref BCSRDataInterface for more details. */ void starpu_bcsr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** @} */ /** @name Predefined CSR Filter Functions Predefined partitioning functions for CSR data. Examples on how to use them are shown in \ref PartitioningData. @{ */ /** Partition a block-sparse matrix into vertical block-sparse matrices. See \ref CSRDataInterface for more details. */ void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** @} */ /** @name Predefined Matrix Filter Functions Predefined partitioning functions for matrix data. Examples on how to use them are shown in \ref PartitioningData. Note: this is using the C element order which is row-major, i.e. elements with consecutive x coordinates are consecutive in memory. @{ */ /** Partition a dense Matrix along the x dimension, thus getting (x/\p nparts ,y) matrices. If \p nparts does not divide x, the last submatrix contains the remainder. See \ref MatrixDataInterface for more details. */ void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a dense Matrix along the x dimension, with a shadow border starpu_data_filter::filter_arg_ptr, thus getting ((x-2*shadow)/\p nparts +2*shadow,y) matrices. If \p nparts does not divide x-2*shadow, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. A usage example is available in examples/filters/shadow2d.c See \ref MatrixDataInterface for more details. */ void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a dense Matrix along the y dimension, thus getting (x,y/\p nparts) matrices. If \p nparts does not divide y, the last submatrix contains the remainder. See \ref MatrixDataInterface for more details. */ void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a dense Matrix along the y dimension, with a shadow border starpu_data_filter::filter_arg_ptr, thus getting (x,(y-2*shadow)/\p nparts +2*shadow) matrices. If \p nparts does not divide y-2*shadow, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. A usage example is available in examples/filters/shadow2d.c See \ref MatrixDataInterface for more details. */ void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous vectors from a matrix along the Y dimension. The starting position on Y-axis is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_matrix_filter_pick_vector_child_ops(). A usage example is available in examples/filters/fmatrix_pick_vector.c See \ref MatrixDataInterface for more details. */ void starpu_matrix_filter_pick_vector_y(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return the child_ops of the partition obtained with starpu_matrix_filter_pick_vector_y(). See \ref MatrixDataInterface for more details. */ struct starpu_data_interface_ops *starpu_matrix_filter_pick_vector_child_ops(struct starpu_data_filter *f, unsigned child); /** Pick \p nparts contiguous variables from a matrix. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_matrix_filter_pick_variable_child_ops(). A usage example is available in examples/filters/fmatrix_pick_variable.c See \ref MatrixDataInterface for more details. */ void starpu_matrix_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return the child_ops of the partition obtained with starpu_matrix_filter_pick_variable(). See \ref MatrixDataInterface for more details. */ struct starpu_data_interface_ops *starpu_matrix_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); /** @} */ /** @name Predefined Vector Filter Functions Predefined partitioning functions for vector data. Examples on how to use them are shown in \ref PartitioningData. @{ */ /** Return in \p child_interface the \p id th element of the vector represented by \p father_interface once partitioned in \p nparts chunks of equal size. See \ref VectorDataInterface for more details. */ void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return in \p child_interface the \p id th element of the vector represented by \p father_interface once partitioned in \p nparts chunks of equal size with a shadow border starpu_data_filter::filter_arg_ptr, thus getting a vector of size (n-2*shadow)/nparts+2*shadow. The starpu_data_filter::filter_arg_ptr field of \p f must be the shadow size casted into \c void*. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. An usage example is available in examples/filters/shadow.c See \ref VectorDataInterface for more details. */ void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return in \p child_interface the \p id th element of the vector represented by \p father_interface once partitioned into \p nparts chunks according to the starpu_data_filter::filter_arg_ptr field of \p f. The starpu_data_filter::filter_arg_ptr field must point to an array of \p nparts long elements, each of which specifies the number of elements in each chunk of the partition. See \ref VectorDataInterface for more details. */ void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return in \p child_interface the \p id th element of the vector represented by \p father_interface once partitioned into \p nparts chunks according to the starpu_data_filter::filter_arg_ptr field of \p f. The starpu_data_filter::filter_arg_ptr field must point to an array of \p nparts uint32_t elements, each of which specifies the number of elements in each chunk of the partition. See \ref VectorDataInterface for more details. */ void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return in \p child_interface the \p id th element of the vector represented by \p father_interface once partitioned in 2 chunks of equal size, ignoring nparts. Thus, \p id must be 0 or 1. See \ref VectorDataInterface for more details. */ void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous variables from a vector. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_vector_filter_pick_variable_child_ops(). A usage example is available in examples/filters/fvector_pick_variable.c See \ref VectorDataInterface for more details. */ void starpu_vector_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return the child_ops of the partition obtained with starpu_vector_filter_pick_variable(). See \ref VectorDataInterface for more details. */ struct starpu_data_interface_ops *starpu_vector_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); /** @} */ /** @name Predefined Block Filter Functions Predefined partitioning functions for block data. Examples on how to use them are shown in \ref PartitioningData. An example is available in \c examples/filters/shadow3d.c Note: this is using the C element order which is row-major, i.e. elements with consecutive x coordinates are consecutive in memory. @{ */ /** Partition a block along the X dimension, thus getting (x/\p nparts ,y,z) 3D matrices. If \p nparts does not divide x, the last submatrix contains the remainder. See \ref BlockDataInterface for more details. */ void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a block along the X dimension, with a shadow border starpu_data_filter::filter_arg_ptr, thus getting ((x-2*shadow)/\p nparts +2*shadow,y,z) blocks. If \p nparts does not divide x, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. See \ref BlockDataInterface for more details. */ void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a block along the Y dimension, thus getting (x,y/\p nparts ,z) blocks. If \p nparts does not divide y, the last submatrix contains the remainder. See \ref BlockDataInterface for more details. */ void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a block along the Y dimension, with a shadow border starpu_data_filter::filter_arg_ptr, thus getting (x,(y-2*shadow)/\p nparts +2*shadow,z) 3D matrices. If \p nparts does not divide y, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. See \ref BlockDataInterface for more details. */ void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a block along the Z dimension, thus getting (x,y,z/\p nparts) blocks. If \p nparts does not divide z, the last submatrix contains the remainder. See \ref BlockDataInterface for more details. */ void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a block along the Z dimension, with a shadow border starpu_data_filter::filter_arg_ptr, thus getting (x,y,(z-2*shadow)/\p nparts +2*shadow) blocks. If \p nparts does not divide z, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. See \ref BlockDataInterface for more details. */ void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous matrices from a block along the Z dimension. The starting position on Z-axis is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_block_filter_pick_matrix_child_ops(). A usage example is available in examples/filters/fblock_pick_matrix.c See \ref BlockDataInterface for more details. */ void starpu_block_filter_pick_matrix_z(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous matrices from a block along the Y dimension. The starting position on Y-axis is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_block_filter_pick_matrix_child_ops(). A usage example is available in examples/filters/fblock_pick_matrix.c See \ref BlockDataInterface for more details. */ void starpu_block_filter_pick_matrix_y(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return the child_ops of the partition obtained with starpu_block_filter_pick_matrix_z() and starpu_block_filter_pick_matrix_y(). See \ref BlockDataInterface for more details. */ struct starpu_data_interface_ops *starpu_block_filter_pick_matrix_child_ops(struct starpu_data_filter *f, unsigned child); /** Pick \p nparts contiguous variables from a block. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_block_filter_pick_variable_child_ops(). A usage example is available in examples/filters/fblock_pick_variable.c See \ref BlockDataInterface for more details. */ void starpu_block_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return the child_ops of the partition obtained with starpu_block_filter_pick_variable(). See \ref BlockDataInterface for more details. */ struct starpu_data_interface_ops *starpu_block_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); /** @} */ /** @name Predefined Tensor Filter Functions Predefined partitioning functions for tensor data. @{ */ /** Partition a tensor along the X dimension, thus getting (x/\p nparts ,y,z,t) tensors. If \p nparts does not divide x, the last submatrix contains the remainder. See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a tensor along the X dimension, with a shadow border starpu_data_filter::filter_arg_ptr, thus getting ((x-2*shadow)/\p nparts +2*shadow,y,z,t) tensors. If \p nparts does not divide x, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a tensor along the Y dimension, thus getting (x,y/\p nparts ,z,t) tensors. If \p nparts does not divide y, the last submatrix contains the remainder. See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a tensor along the Y dimension, with a shadow border starpu_data_filter::filter_arg_ptr, thus getting (x,(y-2*shadow)/\p nparts +2*shadow,z,t) tensors. If \p nparts does not divide y, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a tensor along the Z dimension, thus getting (x,y,z/\p nparts,t) tensors. If \p nparts does not divide z, the last submatrix contains the remainder. See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a tensor along the Z dimension, with a shadow border starpu_data_filter::filter_arg_ptr, thus getting (x,y,(z-2*shadow)/\p nparts +2*shadow,t) tensors. If \p nparts does not divide z, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a tensor along the T dimension, thus getting (x,y,z,t/\p nparts) tensors. If \p nparts does not divide t, the last submatrix contains the remainder. See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_time_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a tensor along the T dimension, with a shadow border starpu_data_filter::filter_arg_ptr, thus getting (x,y,z,(t-2*shadow)/\p nparts +2*shadow) tensors. If \p nparts does not divide t, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_time_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous blocks from a tensor along the T dimension. The starting position on T-axis is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_tensor_filter_pick_block_child_ops(). A usage example is available in examples/filters/ftensor_pick_block.c See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_pick_block_t(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous blocks from a tensor along the Z dimension. The starting position on Z-axis is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_tensor_filter_pick_block_child_ops(). A usage example is available in examples/filters/ftensor_pick_block.c See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_pick_block_z(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous blocks from a tensor along the Y dimension. The starting position on Y-axis is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_tensor_filter_pick_block_child_ops(). A usage example is available in examples/filters/ftensor_pick_block.c See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_pick_block_y(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return the child_ops of the partition obtained with starpu_tensor_filter_pick_block_t(), starpu_tensor_filter_pick_block_z() and starpu_tensor_filter_pick_block_y(). See \ref TensorDataInterface for more details. */ struct starpu_data_interface_ops *starpu_tensor_filter_pick_block_child_ops(struct starpu_data_filter *f, unsigned child); /** Pick \p nparts contiguous variables from a tensor. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_tensor_filter_pick_variable_child_ops(). A usage example is available in examples/filters/ftensor_pick_variable.c See \ref TensorDataInterface for more details. */ void starpu_tensor_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return the child_ops of the partition obtained with starpu_tensor_filter_pick_variable(). See \ref TensorDataInterface for more details. */ struct starpu_data_interface_ops *starpu_tensor_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); /** @} */ /** @name Predefined Ndim Filter Functions Predefined partitioning functions for ndim array data. @{ */ /** Partition a ndim array along the given dimension set in starpu_data_filter::filter_arg. If \p nparts does not divide the element number on dimension, the last submatrix contains the remainder. See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a ndim array along the given dimension set in starpu_data_filter::filter_arg, with a shadow border starpu_data_filter::filter_arg_ptr. If \p nparts does not divide the element number on dimension, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a 4-dim array into \p nparts tensors along the given dimension set in starpu_data_filter::filter_arg. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_to_tensor_child_ops(). A usage example is available in examples/filters/fndim_to_tensor.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_to_tensor(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a 3-dim array into \p nparts blocks along the given dimension set in starpu_data_filter::filter_arg. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_to_block_child_ops(). A usage example is available in examples/filters/fndim_to_block.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_to_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a 2-dim array into \p nparts matrices along the given dimension set in starpu_data_filter::filter_arg. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_to_matrix_child_ops(). A usage example is available in examples/filters/fndim_to_matrix.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_to_matrix(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Partition a 1-dim array into \p nparts vectors. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_to_vector_child_ops(). A usage example is available in examples/filters/fndim_to_vector.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_to_vector(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Transfer a 0-dim array to a variable. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_to_variable_child_ops(). A usage example is available in examples/filters/fndim_to_variable.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_to_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous (n-1)dim arrays from a ndim array along the given dimension set in starpu_data_filter::filter_arg. The starting position is set in starpu_data_filter::filter_arg_ptr. A usage example is available in examples/filters/fndim_pick_ndim.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_pick_ndim(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous tensors from a 5-dim array along the given dimension set in starpu_data_filter::filter_arg. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_pick_tensor_child_ops(). A usage example is available in examples/filters/fndim_5d_pick_tensor.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_5d_pick_tensor(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous blocks from a 4-dim array along the given dimension set in starpu_data_filter::filter_arg. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_pick_block_child_ops(). A usage example is available in examples/filters/fndim_4d_pick_block.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_4d_pick_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous matrices from a 3-dim array along the given dimension set in starpu_data_filter::filter_arg. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_pick_matrix_child_ops(). A usage example is available in examples/filters/fndim_3d_pick_matrix.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_3d_pick_matrix(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous vectors from a 2-dim array along the given dimension set in starpu_data_filter::filter_arg. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_pick_vector_child_ops(). A usage example is available in examples/filters/fndim_2d_pick_vector.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_2d_pick_vector(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous variables from a 1-dim array. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_pick_variable_child_ops(). A usage example is available in examples/filters/fndim_1d_pick_variable.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_1d_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Pick \p nparts contiguous variables from a ndim array. The starting position is set in starpu_data_filter::filter_arg_ptr. starpu_data_filter::get_child_ops needs to be set to starpu_ndim_filter_pick_variable_child_ops(). A usage example is available in examples/filters/fndim_pick_variable.c See \ref NdimDataInterface for more details. */ void starpu_ndim_filter_pick_variable(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); /** Return the child_ops of the partition obtained with starpu_ndim_filter_pick_tensor(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_pick_tensor_child_ops(struct starpu_data_filter *f, unsigned child); /** Return the child_ops of the partition obtained with starpu_ndim_filter_pick_block(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_pick_block_child_ops(struct starpu_data_filter *f, unsigned child); /** Return the child_ops of the partition obtained with starpu_ndim_filter_pick_matrix(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_pick_matrix_child_ops(struct starpu_data_filter *f, unsigned child); /** Return the child_ops of the partition obtained with starpu_ndim_filter_pick_vector(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_pick_vector_child_ops(struct starpu_data_filter *f, unsigned child); /** Return the child_ops of the partition obtained with starpu_ndim_filter_pick_variable(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_pick_variable_child_ops(struct starpu_data_filter *f, unsigned child); /** Return the child_ops of the partition obtained with starpu_ndim_filter_to_tensor(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_to_tensor_child_ops(struct starpu_data_filter *f, unsigned child); /** Return the child_ops of the partition obtained with starpu_ndim_filter_to_block(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_to_block_child_ops(struct starpu_data_filter *f, unsigned child); /** Return the child_ops of the partition obtained with starpu_ndim_filter_to_matrix(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_to_matrix_child_ops(struct starpu_data_filter *f, unsigned child); /** Return the child_ops of the partition obtained with starpu_ndim_filter_to_vector(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_to_vector_child_ops(struct starpu_data_filter *f, unsigned child); /** Return the child_ops of the partition obtained with starpu_ndim_filter_to_variable(). See \ref NdimDataInterface for more details. */ struct starpu_data_interface_ops *starpu_ndim_filter_to_variable_child_ops(struct starpu_data_filter *f, unsigned child); /** Given an integer \p n, \p n the number of parts it must be divided in, \p id the part currently considered, determines the \p chunk_size and the \p offset, taking into account the size of the elements stored in the data structure \p elemsize and \p blocksize, which is most often 1. See \ref DefiningANewDataFilter for more details. */ void starpu_filter_nparts_compute_chunk_size_and_offset(unsigned n, unsigned nparts, size_t elemsize, unsigned id, unsigned blocksize, unsigned *chunk_size, size_t *offset); /** @} */ /** @} */ #ifdef __cplusplus } #endif #endif starpu-1.4.9+dfsg/include/starpu_data_interfaces.h000066400000000000000000003273741507764646700223360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_DATA_INTERFACES_H__ #define __STARPU_DATA_INTERFACES_H__ #include #ifdef STARPU_USE_CUDA /* to use CUDA streams */ #include typedef cudaStream_t starpu_cudaStream_t; #endif #ifdef STARPU_USE_HIP /* to use HIP streams */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wundef" #pragma GCC diagnostic ignored "-Wunused-result" #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #ifndef __cplusplus #pragma GCC diagnostic ignored "-Wimplicit-int" #endif #pragma GCC diagnostic ignored "-Wreturn-type" #include #pragma GCC diagnostic pop typedef hipStream_t starpu_hipStream_t; #endif #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Data_Interfaces Data Interfaces @brief Data management is done at a high-level in StarPU: rather than accessing a mere list of contiguous buffers, the tasks may manipulate data that are described by a high-level construct which we call data interface. An example of data interface is the "vector" interface which describes a contiguous data array on a specific memory node. This interface is a simple structure containing the number of elements in the array, the size of the elements, and the address of the array in the appropriate address space (this address may be invalid if there is no valid copy of the array in the memory node). More information on the data interfaces provided by StarPU are given in \ref API_Data_Interfaces. When a piece of data managed by StarPU is used by a task, the task implementation is given a pointer to an interface describing a valid copy of the data that is accessible from the current processing unit. Every worker is associated to a memory node which is a logical abstraction of the address space from which the processing unit gets its data. For instance, the memory node associated to the different CPU workers represents main memory (RAM), the memory node associated to a GPU is DRAM embedded on the device. Every memory node is identified by a logical index which is accessible from the function starpu_worker_get_memory_node(). When registering a piece of data to StarPU, the specified memory node indicates where the piece of data initially resides (we also call this memory node the home node of a piece of data). In the case of NUMA systems, functions starpu_memory_nodes_numa_devid_to_id() and starpu_memory_nodes_numa_id_to_devid() can be used to convert from NUMA node numbers as seen by the Operating System and NUMA node numbers as seen by StarPU. There are several ways to register a memory region so that it can be managed by StarPU. StarPU provides data interfaces for vectors, 2D matrices, 3D matrices as well as BCSR and CSR sparse matrices. Each data interface is provided with a set of field access functions. The ones using a void * parameter aimed to be used in codelet implementations (see for example the code in \ref VectorScalingUsingStarPUAPI). Applications can provide their own interface as shown in \ref DefiningANewDataInterface. @{ */ /** Define the per-interface methods. If the starpu_data_copy_methods::any_to_any method is provided, it will be used by default if no specific method is provided. It can still be useful to provide more specific method in case of e.g. available particular CUDA, HIP or OpenCL support. See \ref DefiningANewDataInterface_copy for more details. */ struct starpu_data_copy_methods { /** If defined, allow the interface to declare whether it supports transferring from \p src_interface on node \p src_node to \p dst_interface on node \p dst_node, run from node \p handling_node. If not defined, it is assumed that the interface supports all transfers. */ int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node); /** Define how to copy data from the \p src_interface interface on the \p src_node CPU node to the \p dst_interface interface on the \p dst_node CPU node. Return 0 on success. */ int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node CPU node to the \p dst_interface interface on the \p dst_node CUDA node. Return 0 on success. */ int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node CPU node to the \p dst_interface interface on the \p dst_node HIP node. Return 0 on success. */ int (*ram_to_hip)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node CPU node to the \p dst_interface interface on the \p dst_node OpenCL node. Return 0 on success. */ int (*ram_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node CPU node to the \p dst_interface interface on the \p dst_node FPGA node. Return 0 on success. */ int (*ram_to_max_fpga)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node CUDA node to the \p dst_interface interface on the \p dst_node CPU node. Return 0 on success. */ int (*cuda_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA node. Return 0 on success. */ int (*cuda_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node HIP node to the \p dst_interface interface on the \p dst_node CPU node. Return 0 on success. */ int (*hip_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node HIP node to the \p dst_interface interface on the \p dst_node HIP node. Return 0 on success. */ int (*hip_to_hip)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node OpenCL node to the \p dst_interface interface on the \p dst_node CPU node. Return 0 on success. */ int (*opencl_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node OpenCL node to the \p dst_interface interface on the \p dst_node OpenCL node. Return 0 on success. */ int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node FPGA node to the \p dst_interface interface on the \p dst_node CPU node. Return 0 on success. */ int (*max_fpga_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node); #ifdef STARPU_USE_CUDA /** Define how to copy data from the \p src_interface interface on the \p src_node CPU node to the \p dst_interface interface on the \p dst_node CUDA node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream); /** Define how to copy data from the \p src_interface interface on the \p src_node CUDA node to the \p dst_interface interface on the \p dst_node CPU node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*cuda_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream); /** Define how to copy data from the \p src_interface interface on the \p src_node CUDA node to the \p dst_interface interface on the \p dst_node CUDA node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*cuda_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_cudaStream_t stream); #else int (*ram_to_cuda_async)(void); int (*cuda_to_ram_async)(void); int (*cuda_to_cuda_async)(void); #endif #ifdef STARPU_USE_HIP /** Define how to copy data from the \p src_interface interface on the \p src_node CPU node to the \p dst_interface interface on the \p dst_node HIP node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*ram_to_hip_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream); /** Define how to copy data from the \p src_interface interface on the \p src_node HIP node to the \p dst_interface interface on the \p dst_node CPU node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*hip_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream); /** Define how to copy data from the \p src_interface interface on the \p src_node HIP node to the \p dst_interface interface on the \p dst_node HIP node, using the given stream. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*hip_to_hip_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, starpu_hipStream_t stream); #else int (*ram_to_hip_async)(void); int (*hip_to_ram_async)(void); int (*hip_to_hip_async)(void); #endif #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__) && !defined(__HIPCC__) /** Define how to copy data from the \p src_interface interface on the \p src_node CPU node to the \p dst_interface interface on the \p dst_node OpenCL node, by recording in \p event, a pointer to a cl_event, the event of the last submitted transfer. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*ram_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event); /** Define how to copy data from the \p src_interface interface on the \p src_node OpenCL node to the \p dst_interface interface on the \p dst_node CPU node, by recording in \p event, a pointer to a cl_event, the event of the last submitted transfer. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*opencl_to_ram_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event); /** Define how to copy data from the \p src_interface interface on the \p src_node OpenCL node to the \p dst_interface interface on the \p dst_node OpenCL node, by recording in \p event, a pointer to a cl_event, the event of the last submitted transfer. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event); #else int (*ram_to_opencl_async)(void); int (*opencl_to_ram_async)(void); int (*opencl_to_opencl_async)(void); #endif /** Define how to copy data from the \p src_interface interface on the \p src_node CPU node to the \p dst_interface interface on the \p dst_node FPGA node. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*ram_to_max_fpga_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node FPGA node to the \p dst_interface interface on the \p dst_node CPU node. Must return 0 if the transfer was actually completed completely synchronously, or -EAGAIN if at least some transfers are still ongoing and should be awaited for by the core. */ int (*max_fpga_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node); /** Define how to copy data from the \p src_interface interface on the \p src_node node to the \p dst_interface interface on the \p dst_node node. This is meant to be implemented through the starpu_interface_copy() helper, to which async_data should be passed as such, and will be used to manage asynchronicity. This must return -EAGAIN if any of the starpu_interface_copy() calls has returned -EAGAIN (i.e. at least some transfer is still ongoing), and return 0 otherwise. This can only be implemented if the interface has ready-to-send data blocks. If the interface is more involved than this, i.e. it needs to collect pieces of data before transferring, starpu_data_interface_ops::pack_data and starpu_data_interface_ops::peek_data should be implemented instead, and the core will just transfer the resulting data buffer. */ int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); }; /** Identifier for all predefined StarPU data interfaces */ enum starpu_data_interface_id { STARPU_UNKNOWN_INTERFACE_ID = -1, /**< Unknown interface */ STARPU_MATRIX_INTERFACE_ID = 0, /**< Identifier for the matrix data interface */ STARPU_BLOCK_INTERFACE_ID = 1, /**< Identifier for the block data interface*/ STARPU_VECTOR_INTERFACE_ID = 2, /**< Identifier for the vector data interface*/ STARPU_CSR_INTERFACE_ID = 3, /**< Identifier for the CSR data interface*/ STARPU_BCSR_INTERFACE_ID = 4, /**< Identifier for the BCSR data interface*/ STARPU_VARIABLE_INTERFACE_ID = 5, /**< Identifier for the variable data interface*/ STARPU_VOID_INTERFACE_ID = 6, /**< Identifier for the void data interface*/ STARPU_MULTIFORMAT_INTERFACE_ID = 7, /**< Identifier for the multiformat data interface*/ STARPU_COO_INTERFACE_ID = 8, /**< Identifier for the COO data interface*/ STARPU_TENSOR_INTERFACE_ID = 9, /**< Identifier for the tensor data interface*/ STARPU_NDIM_INTERFACE_ID = 10, /**< Identifier for the ndim array data interface*/ STARPU_MAX_INTERFACE_ID = 11 /**< Maximum number of data interfaces */ }; /** Per-interface data management methods. */ struct starpu_data_interface_ops { /** Register an existing interface into a data handle. This iterates over all memory nodes to initialize all fields of the data interface on each of them. Since data is not allocated yet except on the home node, pointers should be left as NULL except on the \p home_node (if >= 0), for which the pointers should be copied from the given \p data_interface, which was filled with the application's pointers. This method is mandatory. See \ref DefiningANewDataInterface_registration for more details. */ void (*register_data_handle)(starpu_data_handle_t handle, int home_node, void *data_interface); /** Unregister a data handle. This iterates over all memory nodes to free any pointer in the data interface on each of them. At this point, free_data_on_node has been already called on each of them. This just clears anything that would still be left. See \ref DefiningANewDataInterface_registration for more details. */ void (*unregister_data_handle)(starpu_data_handle_t handle); /** Allocate data for the interface on a given node. This should use starpu_malloc_on_node() to perform the allocation(s), and fill the pointers in the data interface. It should return the size of the allocated memory, or -ENOMEM if memory could not be allocated. Note that the memory node can be CPU memory, GPU memory, or even disk area. The result returned by starpu_malloc_on_node() should be just stored as uintptr_t without trying to interpret it since it may be a GPU pointer, a disk descriptor, etc. This method is mandatory to be able to support memory nodes. See \ref DefiningANewDataInterface_pointers for more details. */ starpu_ssize_t (*allocate_data_on_node)(void *data_interface, unsigned node); /** Free data of the interface on a given node. This method is mandatory to be able to support memory nodes. See \ref DefiningANewDataInterface_pointers for more details. */ void (*free_data_on_node)(void *data_interface, unsigned node); /** Cache the buffers from the given node to a caching interface. This method is optional, mostly useful when also making starpu_data_interface_ops::unregister_data_handle check that pointers are NULL. \p src_interface is an interface that already has buffers allocated, but which we don't need any more. \p cached_interface is a new interface into which the buffer pointers should be transferred, for later reuse when allocating data of the same kind. Usually we can just memcpy over the set of pointers and descriptions (this is what StarPU does when this method is not implemented), but if unregister_data_handle checks that pointers are NULL, we need to additionally clear the pointers in \p src_interface. Also, it is not useful to copy the whole interface, only the pointers need to be copied (essentially the pointers that starpu_data_interface_ops::reuse_data_on_node will then transfer into a new handle interface), as well as the properties that starpu_data_interface_ops::compare (or starpu_data_interface_ops::alloc_compare if defined) needs for comparing interfaces for caching compatibility. When this method is not defined, StarPU will just copy the \p cached_interface into \p src_interface. See \ref VariableSizeDataInterface and \ref DefiningANewDataInterface_pointers for more details. */ void (*cache_data_on_node)(void *cached_interface, void *src_interface, unsigned node); /** Reuse on the given node the buffers of the provided interface This method is optional, mostly useful when also defining alloc_footprint to share tiles of the same allocation size but different shapes, or when the interface contains pointers which are initialized at registration (e.g. nn array in the ndim interface) \p cached_interface is an already-allocated buffer that we want to reuse, and \p new_data_interface is an interface in which we want to install that already-allocated buffer. Usually we can just memcpy over the set of pointers and descriptions. But e.g. with 2D tiles the ld value may not be correct, and memcpy would wrongly overwrite it in new_data_interface, i.e. reusing a vertical tile allocation for a horizontal tile, or vice-versa. reuse_data_on_node should thus copy over pointers, and define fields that are usually set by allocate_data_on_node (e.g. ld). See \ref VariableSizeDataInterface and \ref DefiningANewDataInterface_pointers for more details. */ void (*reuse_data_on_node)(void *dst_data_interface, const void *cached_interface, unsigned node); /** Map data from a source to a destination. Define function starpu_interface_map() to set this field. See \ref DefiningANewDataInterface_pointers for more details. */ int (*map_data)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Unmap data from a source to a destination. Define function starpu_interface_unmap() to set this field. See \ref DefiningANewDataInterface_pointers for more details. */ int (*unmap_data)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Update map data from a source to a destination. Define function starpu_interface_update_map() to set this field. See \ref DefiningANewDataInterface_pointers for more details. */ int (*update_map)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); /** Initialize the interface. This method is optional. It is called when initializing the handler on all the memory nodes. */ void (*init)(void *data_interface); /** Struct with pointer to functions for performing ram/cuda/opencl synchronous and asynchronous transfers. This field is mandatory to be able to support memory nodes, except disk nodes which can be supported by just implementing starpu_data_interface_ops::pack_data and starpu_data_interface_ops::unpack_data. */ const struct starpu_data_copy_methods *copy_methods; /** @deprecated Use starpu_data_interface_ops::to_pointer instead. Return the current pointer (if any) for the handle on the given node. This method is only required if starpu_data_interface_ops::to_pointer is not implemented. */ void *(*handle_to_pointer)(starpu_data_handle_t handle, unsigned node); /** Return the current pointer (if any) for the given interface on the given node. This method is only required for starpu_data_handle_to_pointer() and starpu_data_get_local_ptr(), and for disk support. */ void *(*to_pointer)(void *data_interface, unsigned node); /** Return an estimation of the size of data, for performance models and tracing feedback. */ size_t (*get_size)(starpu_data_handle_t handle); /** Return an estimation of the size of allocated data, for allocation management. If not specified, the starpu_data_interface_ops::get_size method is used instead. */ size_t (*get_alloc_size)(starpu_data_handle_t handle); /** Return the maximum size that the data may need to increase to. For instance, in the case of compressed matrix tiles this is the size when the block is fully dense. This is currently only used for feedback tools. */ size_t (*get_max_size)(starpu_data_handle_t handle); /** Return a 32bit footprint which characterizes the data size and layout (nx, ny, ld, elemsize, etc.), required for indexing performance models. starpu_hash_crc32c_be() and alike can be used to produce this 32bit value from various types of values. */ uint32_t (*footprint)(starpu_data_handle_t handle); /** Return a 32bit footprint which characterizes the data allocation, to be used for indexing allocation cache. If not specified, the starpu_data_interface_ops::footprint method is used instead. If specified, alloc_compare should be set to provide the strict comparison, and reuse_data_on_node should be set to provide correct buffer reuse. */ uint32_t (*alloc_footprint)(starpu_data_handle_t handle); /** Compare the data size and layout of two interfaces (nx, ny, ld, elemsize, etc.), to be used for indexing performance models. It should return 1 if the two interfaces size and layout match computation-wise, and 0 otherwise. It does *not* compare the actual content of the interfaces. */ int (*compare)(void *data_interface_a, void *data_interface_b); /** Compare the data allocation of two interfaces etc.), to be used for indexing allocation cache. It should return 1 if the two interfaces are allocation-compatible, i.e. basically have the same alloc_size, and 0 otherwise. If not specified, the starpu_data_interface_ops::compare method is used instead. */ int (*alloc_compare)(void *data_interface_a, void *data_interface_b); /** Dump the sizes of a handle to a file. This is required for performance models */ void (*display)(starpu_data_handle_t handle, FILE *f); /** Describe the data into a string in a brief way, such as one letter to describe the type of data, and the data dimensions. This is required for tracing feedback. */ starpu_ssize_t (*describe)(void *data_interface, char *buf, size_t size); /** An identifier that is unique to each interface. */ enum starpu_data_interface_id interfaceid; /** Size of the interface data descriptor. */ size_t interface_size; /** */ char is_multiformat; /** If set to non-zero, StarPU will never try to reuse an allocated buffer for a different handle. This can be notably useful for application-defined interfaces which have a dynamic size, and for which it thus does not make sense to reuse the buffer since will probably not have the proper size. */ char dontcache; /** */ struct starpu_multiformat_data_interface_ops *(*get_mf_ops)(void *data_interface); /** Pack the data handle into a contiguous buffer at the address allocated with starpu_malloc_flags(ptr, size, 0) (and thus returned in \p ptr) and set the size of the newly created buffer in \p count. If \p ptr is NULL, the function should not copy the data in the buffer but just set count to the size of the buffer which would have been allocated. The special value -1 indicates the size is yet unknown. This method (and starpu_data_interface_ops::unpack_data) is required for disk support if the starpu_data_copy_methods::any_to_any method is not implemented (because the in-memory data layout is too complex). This is also required for MPI support if there is no registered MPI data type. */ int (*pack_data)(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); /** Read the data handle from the contiguous buffer at the address \p ptr of size \p count. */ int (*peek_data)(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); /** Unpack the data handle from the contiguous buffer at the address \p ptr of size \p count. The memory at the address \p ptr should be freed after the data unpacking operation. */ int (*unpack_data)(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); /** Pack the interface into a contiguous buffer and set the size of the newly created buffer in \p count. This function is used in master slave mode for data interfaces with a dynamic content. */ int (*pack_meta)(void *data_interface, void **ptr, starpu_ssize_t *count); /** Unpack the interface from the given buffer and set the size of the unpacked data in \p count. This function is used in master slave mode for data interfaces with a dynamic content. */ int (*unpack_meta)(void **data_interface, void *ptr, starpu_ssize_t *count); /** Free the allocated memory by a previous call to unpack_meta() */ int (*free_meta)(void *data_interface); /** Name of the interface */ char *name; }; /** @name Basic API @{ */ /** Register a piece of data into the handle located at the \p handleptr address. The \p data_interface buffer contains the initial description of the data in the \p home_node. The \p ops argument is a pointer to a structure describing the different methods used to manipulate this type of interface. See starpu_data_interface_ops for more details on this structure. If \p home_node is -1, StarPU will automatically allocate the memory when it is used for the first time in write-only mode. Once such data handle has been automatically allocated, it is possible to access it using any access mode. Note that StarPU supplies a set of predefined types of interface (e.g. vector or matrix) which can be registered by the means of helper functions (e.g. starpu_vector_data_register() or starpu_matrix_data_register()). See \ref DefiningANewDataInterface_registration for more details. */ void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops); /** Register the given data interface operations. If the field starpu_data_interface_ops::field is set to ::STARPU_UNKNOWN_INTERFACE_ID, then a new identifier will be set by calling starpu_data_interface_get_next_id(). The function is automatically called when registering a piece of data with starpu_data_register(). It is only necessary to call it beforehand for some specific cases (such as the usmaster slave mode). */ void starpu_data_register_ops(struct starpu_data_interface_ops *ops); /** Register that a buffer for \p handle on \p node will be set. This is typically used by starpu_*_ptr_register helpers before setting the interface pointers for this node, to tell the core that that is now allocated. See \ref DefiningANewDataInterface_pointers for more details. */ void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node); /** Register a new piece of data into the handle \p handledst with the same interface as the handle \p handlesrc. See \ref DataHandlesHelpers for more details. */ void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc); /** Return the pointer associated with \p handle on node \p node or NULL if handle’s interface does not support this operation or data for this \p handle is not allocated on that \p node. See \ref DataPointers for more details. */ void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node); /** Return the local pointer associated with \p handle or NULL if \p handle’s interface does not have any data allocated locally. See \ref DataPointers for more details. */ void *starpu_data_get_local_ptr(starpu_data_handle_t handle); /** Return the interface associated with \p handle on \p memory_node. See \ref DefiningANewDataInterface_pack for more details. */ void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node); /** Return the unique identifier of the interface associated with the given \p handle. See \ref DefiningANewDataInterface_helpers for more details. */ enum starpu_data_interface_id starpu_data_get_interface_id(starpu_data_handle_t handle); /** Execute the packing operation of the interface of the data registered at \p handle (see starpu_data_interface_ops). This packing operation must allocate a buffer large enough at \p ptr on node \p node and copy into the newly allocated buffer the data associated to \p handle. \p count will be set to the size of the allocated buffer. If \p ptr is NULL, the function should not copy the data in the buffer but just set \p count to the size of the buffer which would have been allocated. The special value -1 indicates the size is yet unknown. See \ref DataHandlesHelpers for more details. */ int starpu_data_pack_node(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); /** Like starpu_data_pack_node(), but for the local memory node. See \ref DataHandlesHelpers for more details. */ int starpu_data_pack(starpu_data_handle_t handle, void **ptr, starpu_ssize_t *count); /** Read in handle's \p node replicate the data located at \p ptr of size \p count as described by the interface of the data. The interface registered at \p handle must define a peeking operation (see starpu_data_interface_ops). See \ref DataHandlesHelpers for more details. */ int starpu_data_peek_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); /** Read in handle's local replicate the data located at \p ptr of size \p count as described by the interface of the data. The interface registered at \p handle must define a peeking operation (see starpu_data_interface_ops). See \ref DataHandlesHelpers for more details. */ int starpu_data_peek(starpu_data_handle_t handle, void *ptr, size_t count); /** Unpack in handle the data located at \p ptr of size \p count allocated on node \p node as described by the interface of the data. The interface registered at \p handle must define an unpacking operation (see starpu_data_interface_ops). See \ref DataHandlesHelpers for more details. */ int starpu_data_unpack_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); /** Unpack in handle the data located at \p ptr of size \p count as described by the interface of the data. The interface registered at \p handle must define a unpacking operation (see starpu_data_interface_ops). See \ref DataHandlesHelpers for more details. */ int starpu_data_unpack(starpu_data_handle_t handle, void *ptr, size_t count); /** Return the size of the data associated with \p handle. See \ref DataHandlesHelpers for more details. */ size_t starpu_data_get_size(starpu_data_handle_t handle); /** Return the size of the allocated data associated with \p handle. See \ref DataHandlesHelpers for more details. */ size_t starpu_data_get_alloc_size(starpu_data_handle_t handle); /** Return the maximum size that the \p handle data may need to increase to. See \ref DataHandlesHelpers for more details. */ starpu_ssize_t starpu_data_get_max_size(starpu_data_handle_t handle); /** See \ref DataHandlesHelpers for more details. */ int starpu_data_get_home_node(starpu_data_handle_t handle); /** Print basic information on \p handle on \p node. See \ref DataHandlesHelpers for more details. */ void starpu_data_print(starpu_data_handle_t handle, unsigned node, FILE *stream); /** Return the next available id for a newly created data interface (\ref DefiningANewDataInterface). */ int starpu_data_interface_get_next_id(void); /** Copy \p size bytes from byte offset \p src_offset of \p src on \p src_node to byte offset \p dst_offset of \p dst on \p dst_node. This is to be used in the starpu_data_copy_methods::any_to_any copy method, which is provided with \p async_data to be passed to starpu_interface_copy(). this returns -EAGAIN if the transfer is still ongoing, or 0 if the transfer is already completed. See \ref DefiningANewDataInterface_copy for more details. */ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data); /** Copy \p numblocks blocks of \p blocksize bytes from byte offset \p src_offset of \p src on \p src_node to byte offset \p dst_offset of \p dst on \p dst_node. The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in the source (resp. destination) interface. If blocksize == ld_src == ld_dst, the transfer is optimized into a single starpu_interface_copy call. This is to be used in the starpu_data_copy_methods::any_to_any copy method for 2D data, which is provided with \p async_data to be passed to starpu_interface_copy(). this returns -EAGAIN if the transfer is still ongoing, or 0 if the transfer is already completed. See \ref DefiningANewDataInterface_copy for more details. */ int starpu_interface_copy2d(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, void *async_data); /** Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from byte offset \p src_offset of \p src on \p src_node to byte offset \p dst_offset of \p dst on \p dst_node. The blocks are grouped by \p numblocks_1 blocks whose start addresses are ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) interface. Such groups are grouped by numblocks_2 groups whose start addresses are ld2_src (resp. ld2_dst) bytes apart in the source (resp. destination) interface. If the blocks are contiguous, the transfers will be optimized. This is to be used in the starpu_data_copy_methods::any_to_any copy method for 3D data, which is provided with \p async_data to be passed to starpu_interface_copy(). this returns -EAGAIN if the transfer is still ongoing, or 0 if the transfer is already completed. See \ref DefiningANewDataInterface_copy for more details. */ int starpu_interface_copy3d(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks1, size_t ld1_src, size_t ld1_dst, size_t numblocks2, size_t ld2_src, size_t ld2_dst, void *async_data); /** Copy \p numblocks_1 * \p numblocks_2 * \p numblocks_3 blocks of \p blocksize bytes from byte offset \p src_offset of \p src on \p src_node to byte offset \p dst_offset of \p dst on \p dst_node. The blocks are grouped by \p numblocks_1 blocks whose start addresses are ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) interface. Such groups are grouped by numblocks_2 groups whose start addresses are ld2_src (resp. ld2_dst) bytes apart in the source (resp. destination) interface. Such groups are grouped by numblocks_3 groups whose start addresses are ld3_src (resp. ld3_dst) bytes apart in the source (resp. destination) interface. If the blocks are contiguous, the transfers will be optimized. This is to be used in the starpu_data_copy_methods::any_to_any copy method for 4D data, which is provided with \p async_data to be passed to starpu_interface_copy(). this returns -EAGAIN if the transfer is still ongoing, or 0 if the transfer is already completed. See \ref DefiningANewDataInterface_copy for more details. */ int starpu_interface_copy4d(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks1, size_t ld1_src, size_t ld1_dst, size_t numblocks2, size_t ld2_src, size_t ld2_dst, size_t numblocks3, size_t ld3_src, size_t ld3_dst, void *async_data); /** Copy \p nn[1] * \p nn[2]...* \p nn[ndim-1] blocks of \p nn[0] * \p elemsize bytes from byte offset \p src_offset of \p src on \p src_node to byte offset \p dst_offset of \p dst on \p dst_node. The blocks are grouped by \p nn[i] blocks (i = 1, 2, ... ndim-1) whose start addresses are ldn_src[i] * \p elemsize (resp. ld1_dst[i] * \p elemsize) bytes apart in the source (resp. destination) interface. If the blocks are contiguous, the transfers will be optimized. This is to be used in the starpu_data_copy_methods::any_to_any copy method for Ndim data, which is provided with \p async_data to be passed to starpu_interface_copy(). this returns -EAGAIN if the transfer is still ongoing, or 0 if the transfer is already completed. See \ref DefiningANewDataInterface_copy for more details. */ int starpu_interface_copynd(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t elemsize, size_t ndim, uint32_t *nn, uint32_t *ldn_src, uint32_t *ldn_dst, void *async_data); /** When an asynchronous implementation of the data transfer is implemented, the call to the underlying CUDA, OpenCL, etc. call should be surrounded by calls to starpu_interface_start_driver_copy_async() and starpu_interface_end_driver_copy_async(), so that it is recorded in offline execution traces, and the timing of the submission is checked. \p start must point to a variable whose value will be passed unchanged to starpu_interface_end_driver_copy_async(). See \ref DefiningANewDataInterface_copy for more details. */ void starpu_interface_start_driver_copy_async(unsigned src_node, unsigned dst_node, double *start); /** See starpu_interface_start_driver_copy_async(). See \ref DefiningANewDataInterface_copy for more details. */ void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node, double start); /** Record in offline execution traces the copy of \p size bytes from node \p src_node to node \p dst_node. See \ref DefiningANewDataInterface_copy for more details. */ void starpu_interface_data_copy(unsigned src_node, unsigned dst_node, size_t size); /** Allocate \p size bytes on node \p dst_node with the given allocation \p flags (such as ::STARPU_MALLOC_PINNED, ::STARPU_MALLOC_COUNT, etc.). This returns 0 if allocation failed, the allocation method should then return -ENOMEM as allocated size. Deallocation must be done with starpu_free_on_node_flags(). See \ref VariableSizeDataInterface for more details. */ uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags); /** Allocate \p size bytes on node \p dst_node with the default allocation flags. This returns 0 if allocation failed, the allocation method should then return -ENOMEM as allocated size. Deallocation must be done with starpu_free_on_node(). See \ref DefiningANewDataInterface_allocation for more details. */ uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size); /** Free \p addr of \p size bytes on node \p dst_node which was previously allocated with starpu_malloc_on_node_flags() with the given allocation \p flags. See \ref VariableSizeDataInterface for more details. */ void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags); /** Free \p addr of \p size bytes on node \p dst_node which was previously allocated with starpu_malloc_on_node(). See \ref DefiningANewDataInterface_allocation for more details. */ void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size); /** Define the default flags for allocations performed by starpu_malloc_on_node() and starpu_free_on_node(). The default is \ref STARPU_MALLOC_PINNED | \ref STARPU_MALLOC_COUNT. See \ref HowToLimitMemoryPerNode for more details. */ void starpu_malloc_on_node_set_default_flags(unsigned node, int flags); /** @} */ /** @name MAP API @{ */ /** Used to set starpu_data_interface_ops::map_data. See \ref DefiningANewDataInterface_pointers for more details. */ uintptr_t starpu_interface_map(uintptr_t src, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret); /** Used to set starpu_data_interface_ops::unmap_data. See \ref DefiningANewDataInterface_pointers for more details. */ int starpu_interface_unmap(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size); /** Used to set starpu_data_interface_ops::update_map. See \ref DefiningANewDataInterface_pointers for more details. */ int starpu_interface_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size); /** @} */ /** @name Accessing Matrix Data Interfaces @{ */ extern struct starpu_data_interface_ops starpu_interface_matrix_ops; /** Matrix interface for dense matrices */ struct starpu_matrix_interface { enum starpu_data_interface_id id; /**< Identifier of the interface */ uintptr_t ptr; /**< local pointer of the matrix */ uintptr_t dev_handle; /**< device handle of the matrix */ size_t offset; /**< offset in the matrix */ uint32_t nx; /**< number of elements on the x-axis of the matrix */ uint32_t ny; /**< number of elements on the y-axis of the matrix */ uint32_t ld; /**< number of elements between each row of the matrix. Maybe be equal to starpu_matrix_interface::nx when there is no padding. */ size_t elemsize; /**< size of the elements of the matrix */ size_t allocsize; /**< size actually currently allocated */ }; /** Register the \p nx x \p ny 2D matrix of \p elemsize-byte elements pointed by \p ptr and initialize \p handle to represent it. \p ld specifies the number of elements between rows. a value greater than \p nx adds padding, which can be useful for alignment purposes. Here an example of how to use the function. \code{.c} float *matrix; starpu_data_handle_t matrix_handle; matrix = (float*)malloc(width * height * sizeof(float)); starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t)matrix, width, width, height, sizeof(float)); \endcode See \ref MatrixDataInterface for more details. */ void starpu_matrix_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize); /** Similar to starpu_matrix_data_register, but additionally specifies which allocation size should be used instead of the initial nx*ny*elemsize. See \ref VariableSizeDataInterface for more details. */ void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize, size_t allocsize); /** Register into the \p handle that to store data on node \p node it should use the buffer located at \p ptr, or device handle \p dev_handle and offset \p offset (for OpenCL, notably), with \p ld elements between rows. */ void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld); /** Return the number of elements on the x-axis of the matrix designated by \p handle. */ uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle); /** Return the number of elements on the y-axis of the matrix designated by \p handle. */ uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle); /** Return the number of elements between each row of the matrix designated by \p handle. Maybe be equal to nx when there is no padding. */ uint32_t starpu_matrix_get_local_ld(starpu_data_handle_t handle); /** Return the local pointer associated with \p handle. */ uintptr_t starpu_matrix_get_local_ptr(starpu_data_handle_t handle); /** Return the size of the elements registered into the matrix designated by \p handle. */ size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle); /** Return the allocated size of the matrix designated by \p handle. */ size_t starpu_matrix_get_allocsize(starpu_data_handle_t handle); #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) #define STARPU_MATRIX_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_matrix_interface *)(interface))->id) == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix.") #define STARPU_MATRIX_GET_PTR(interface) ( \ { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->ptr); \ }) #define STARPU_MATRIX_GET_DEV_HANDLE(interface) ( \ { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->dev_handle); \ }) #define STARPU_MATRIX_GET_OFFSET(interface) ( \ { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->offset); \ }) #define STARPU_MATRIX_GET_NX(interface) ( \ { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->nx); \ }) #define STARPU_MATRIX_GET_NY(interface) ( \ { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->ny); \ }) #define STARPU_MATRIX_GET_LD(interface) ( \ { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->ld); \ }) #define STARPU_MATRIX_GET_ELEMSIZE(interface) ( \ { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->elemsize); \ }) #define STARPU_MATRIX_GET_ALLOCSIZE(interface) ( \ { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->allocsize); \ }) #else /** Return a pointer to the matrix designated by \p interface, valid on CPUs and CUDA devices only. For OpenCL devices, the device handle and offset need to be used instead. */ #define STARPU_MATRIX_GET_PTR(interface) (((struct starpu_matrix_interface *)(interface))->ptr) /** Return a device handle for the matrix designated by \p interface, to be used with OpenCL. The offset returned by ::STARPU_MATRIX_GET_OFFSET has to be used in addition to this. */ #define STARPU_MATRIX_GET_DEV_HANDLE(interface) (((struct starpu_matrix_interface *)(interface))->dev_handle) /** Return the offset in the matrix designated by \p interface, to be used with the device handle. */ #define STARPU_MATRIX_GET_OFFSET(interface) (((struct starpu_matrix_interface *)(interface))->offset) /** Return the number of elements on the x-axis of the matrix designated by \p interface. */ #define STARPU_MATRIX_GET_NX(interface) (((struct starpu_matrix_interface *)(interface))->nx) /** Return the number of elements on the y-axis of the matrix designated by \p interface. */ #define STARPU_MATRIX_GET_NY(interface) (((struct starpu_matrix_interface *)(interface))->ny) /** Return the number of elements between each row of the matrix designated by \p interface. May be equal to nx when there is no padding. */ #define STARPU_MATRIX_GET_LD(interface) (((struct starpu_matrix_interface *)(interface))->ld) /** Return the size of the elements registered into the matrix designated by \p interface. */ #define STARPU_MATRIX_GET_ELEMSIZE(interface) (((struct starpu_matrix_interface *)(interface))->elemsize) /** Return the allocated size of the matrix designated by \p interface. */ #define STARPU_MATRIX_GET_ALLOCSIZE(interface) (((struct starpu_matrix_interface *)(interface))->allocsize) #endif /** Set the number of elements on the x-axis of the matrix designated by \p interface. */ #define STARPU_MATRIX_SET_NX(interface, newnx) \ do { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->nx) = (newnx); \ } \ while (0) /** Set the number of elements on the y-axis of the matrix designated by \p interface. */ #define STARPU_MATRIX_SET_NY(interface, newny) \ do { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->ny) = (newny); \ } \ while (0) /** Set the number of elements between each row of the matrix designated by \p interface. May be set to the same value as nx when there is no padding. */ #define STARPU_MATRIX_SET_LD(interface, newld) \ do { \ STARPU_MATRIX_CHECK(interface); \ (((struct starpu_matrix_interface *)(interface))->ld) = (newld); \ } \ while (0) /** @} */ /** @name Accessing COO Data Interfaces @{ */ extern struct starpu_data_interface_ops starpu_interface_coo_ops; /** COO Matrices */ struct starpu_coo_interface { enum starpu_data_interface_id id; /**< identifier of the interface */ uint32_t *columns; /**< column array of the matrix */ uint32_t *rows; /**< row array of the matrix */ uintptr_t values; /**< values of the matrix */ uint32_t nx; /**< number of elements on the x-axis of the matrix */ uint32_t ny; /**< number of elements on the y-axis of the matrix */ uint32_t n_values; /**< number of values registered in the matrix */ size_t elemsize; /**< size of the elements of the matrix */ }; /** Register the \p nx x \p ny 2D matrix given in the COO format, using the \p columns, \p rows, \p values arrays, which must have \p n_values elements of size \p elemsize. Initialize \p handleptr. See \ref COODataInterface for more details. */ void starpu_coo_data_register(starpu_data_handle_t *handleptr, int home_node, uint32_t nx, uint32_t ny, uint32_t n_values, uint32_t *columns, uint32_t *rows, uintptr_t values, size_t elemsize); /** Return a pointer to the column array of the matrix designated by \p interface. */ #define STARPU_COO_GET_COLUMNS(interface) (((struct starpu_coo_interface *)(interface))->columns) /** Return a device handle for the column array of the matrix designated by \p interface, to be used with OpenCL. The offset returned by ::STARPU_COO_GET_OFFSET has to be used in addition to this. */ #define STARPU_COO_GET_COLUMNS_DEV_HANDLE(interface) (((struct starpu_coo_interface *)(interface))->columns) /** Return a pointer to the rows array of the matrix designated by \p interface. */ #define STARPU_COO_GET_ROWS(interface) (((struct starpu_coo_interface *)(interface))->rows) /** Return a device handle for the row array of the matrix designated by \p interface, to be used on OpenCL. The offset returned by ::STARPU_COO_GET_OFFSET has to be used in addition to this. */ #define STARPU_COO_GET_ROWS_DEV_HANDLE(interface) (((struct starpu_coo_interface *)(interface))->rows) /** Return a pointer to the values array of the matrix designated by \p interface. */ #define STARPU_COO_GET_VALUES(interface) (((struct starpu_coo_interface *)(interface))->values) /** Return a device handle for the value array of the matrix designated by \p interface, to be used on OpenCL. The offset returned by ::STARPU_COO_GET_OFFSET has to be used in addition to this. */ #define STARPU_COO_GET_VALUES_DEV_HANDLE(interface) (((struct starpu_coo_interface *)(interface))->values) /** Return the offset in the arrays of the COO matrix designated by \p interface. */ #define STARPU_COO_GET_OFFSET 0 /** Return the number of elements on the x-axis of the matrix designated by \p interface. */ #define STARPU_COO_GET_NX(interface) (((struct starpu_coo_interface *)(interface))->nx) /** Return the number of elements on the y-axis of the matrix designated by \p interface. */ #define STARPU_COO_GET_NY(interface) (((struct starpu_coo_interface *)(interface))->ny) /** Return the number of values registered in the matrix designated by \p interface. */ #define STARPU_COO_GET_NVALUES(interface) (((struct starpu_coo_interface *)(interface))->n_values) /** Return the size of the elements registered into the matrix designated by \p interface. */ #define STARPU_COO_GET_ELEMSIZE(interface) (((struct starpu_coo_interface *)(interface))->elemsize) /** @} */ /** @name Block Data Interface @{ */ extern struct starpu_data_interface_ops starpu_interface_block_ops; /* TODO: rename to 3dmatrix? */ /* TODO: add allocsize support */ /** Block interface for 3D dense blocks */ struct starpu_block_interface { enum starpu_data_interface_id id; /**< identifier of the interface */ uintptr_t ptr; /**< local pointer of the block */ uintptr_t dev_handle; /**< device handle of the block. */ size_t offset; /**< offset in the block. */ uint32_t nx; /**< number of elements on the x-axis of the block. */ uint32_t ny; /**< number of elements on the y-axis of the block. */ uint32_t nz; /**< number of elements on the z-axis of the block. */ uint32_t ldy; /**< number of elements between two lines */ uint32_t ldz; /**< number of elements between two planes */ size_t elemsize; /**< size of the elements of the block. */ }; /** Register the \p nx x \p ny x \p nz 3D matrix of \p elemsize byte elements pointed by \p ptr and initialize \p handle to represent it. Again, \p ldy and \p ldz specify the number of elements between rows and between z planes. Here an example of how to use the function. \code{.c} float *block; starpu_data_handle_t block_handle; block = (float*)malloc(nx*ny*nz*sizeof(float)); starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, nx, nx*ny, nx, ny, nz, sizeof(float)); \endcode See \ref BlockDataInterface for more details. */ void starpu_block_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize); /** Register into the \p handle that to store data on node \p node it should use the buffer located at \p ptr, or device handle \p dev_handle and offset \p offset (for OpenCL, notably), with \p ldy elements between rows and \p ldz elements between z planes. */ void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz); /** Return the number of elements on the x-axis of the block designated by \p handle. */ uint32_t starpu_block_get_nx(starpu_data_handle_t handle); /** Return the number of elements on the y-axis of the block designated by \p handle. */ uint32_t starpu_block_get_ny(starpu_data_handle_t handle); /** Return the number of elements on the z-axis of the block designated by \p handle. */ uint32_t starpu_block_get_nz(starpu_data_handle_t handle); /** Return the number of elements between each row of the block designated by \p handle, in the format of the current memory node. */ uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle); /** Return the number of elements between each z plane of the block designated by \p handle, in the format of the current memory node. */ uint32_t starpu_block_get_local_ldz(starpu_data_handle_t handle); /** Return the local pointer associated with \p handle. */ uintptr_t starpu_block_get_local_ptr(starpu_data_handle_t handle); /** Return the size of the elements of the block designated by \p handle. */ size_t starpu_block_get_elemsize(starpu_data_handle_t handle); #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) #define STARPU_BLOCK_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_block_interface *)(interface))->id) == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block.") #define STARPU_BLOCK_GET_PTR(interface) ( \ { \ STARPU_BLOCK_CHECK(interface); \ (((struct starpu_block_interface *)(interface))->ptr); \ }) #define STARPU_BLOCK_GET_DEV_HANDLE(interface) ( \ { \ STARPU_BLOCK_CHECK(interface); \ (((struct starpu_block_interface *)(interface))->dev_handle); \ }) #define STARPU_BLOCK_GET_OFFSET(interface) ( \ { \ STARPU_BLOCK_CHECK(interface); \ (((struct starpu_block_interface *)(interface))->offset); \ }) #define STARPU_BLOCK_GET_NX(interface) ( \ { \ STARPU_BLOCK_CHECK(interface); \ (((struct starpu_block_interface *)(interface))->nx); \ }) #define STARPU_BLOCK_GET_NY(interface) ( \ { \ STARPU_BLOCK_CHECK(interface); \ (((struct starpu_block_interface *)(interface))->ny); \ }) #define STARPU_BLOCK_GET_NZ(interface) ( \ { \ STARPU_BLOCK_CHECK(interface); \ (((struct starpu_block_interface *)(interface))->nz); \ }) #define STARPU_BLOCK_GET_LDY(interface) ( \ { \ STARPU_BLOCK_CHECK(interface); \ (((struct starpu_block_interface *)(interface))->ldy); \ }) #define STARPU_BLOCK_GET_LDZ(interface) ( \ { \ STARPU_BLOCK_CHECK(interface); \ (((struct starpu_block_interface *)(interface))->ldz); \ }) #define STARPU_BLOCK_GET_ELEMSIZE(interface) ( \ { \ STARPU_BLOCK_CHECK(interface); \ (((struct starpu_block_interface *)(interface))->elemsize); \ }) #else /** Return a pointer to the block designated by \p interface. */ #define STARPU_BLOCK_GET_PTR(interface) (((struct starpu_block_interface *)(interface))->ptr) /** Return a device handle for the block designated by \p interface, to be used on OpenCL. The offset returned by ::STARPU_BLOCK_GET_OFFSET has to be used in addition to this. */ #define STARPU_BLOCK_GET_DEV_HANDLE(interface) (((struct starpu_block_interface *)(interface))->dev_handle) /** Return the offset in the block designated by \p interface, to be used with the device handle. */ #define STARPU_BLOCK_GET_OFFSET(interface) (((struct starpu_block_interface *)(interface))->offset) /** Return the number of elements on the x-axis of the block designated by \p interface. */ #define STARPU_BLOCK_GET_NX(interface) (((struct starpu_block_interface *)(interface))->nx) /** Return the number of elements on the y-axis of the block designated by \p interface. */ #define STARPU_BLOCK_GET_NY(interface) (((struct starpu_block_interface *)(interface))->ny) /** Return the number of elements on the z-axis of the block designated by \p interface. */ #define STARPU_BLOCK_GET_NZ(interface) (((struct starpu_block_interface *)(interface))->nz) /** Return the number of elements between each row of the block designated by \p interface. May be equal to nx when there is no padding. */ #define STARPU_BLOCK_GET_LDY(interface) (((struct starpu_block_interface *)(interface))->ldy) /** Return the number of elements between each z plane of the block designated by \p interface. May be equal to nx*ny when there is no padding. */ #define STARPU_BLOCK_GET_LDZ(interface) (((struct starpu_block_interface *)(interface))->ldz) /** Return the size of the elements of the block designated by \p interface. */ #define STARPU_BLOCK_GET_ELEMSIZE(interface) (((struct starpu_block_interface *)(interface))->elemsize) #endif /** @} */ /** @name Tensor Data Interface @{ */ extern struct starpu_data_interface_ops starpu_interface_tensor_ops; /* TODO: rename to 4dtensor? */ /* TODO: add allocsize support */ /** Tensor interface for 4D dense tensors */ struct starpu_tensor_interface { enum starpu_data_interface_id id; /**< identifier of the interface */ uintptr_t ptr; /**< local pointer of the tensor */ uintptr_t dev_handle; /**< device handle of the tensor. */ size_t offset; /**< offset in the tensor. */ uint32_t nx; /**< number of elements on the x-axis of the tensor. */ uint32_t ny; /**< number of elements on the y-axis of the tensor. */ uint32_t nz; /**< number of elements on the z-axis of the tensor. */ uint32_t nt; /**< number of elements on the t-axis of the tensor. */ uint32_t ldy; /**< number of elements between two lines */ uint32_t ldz; /**< number of elements between two planes */ uint32_t ldt; /**< number of elements between two cubes */ size_t elemsize; /**< size of the elements of the tensor. */ }; /** Register the \p nx x \p ny x \p nz x \p nt 4D tensor of \p elemsize byte elements pointed by \p ptr and initialize \p handle to represent it. Again, \p ldy, \p ldz, and \p ldt specify the number of elements between rows, between z planes and between t cubes. Here an example of how to use the function. \code{.c} float *tensor; starpu_data_handle_t tensor_handle; tensor = (float*)malloc(nx*ny*nz*nt*sizeof(float)); starpu_tensor_data_register(&tensor_handle, STARPU_MAIN_RAM, (uintptr_t)tensor, nx, nx*ny, nx*ny*nz, nx, ny, nz, nt, sizeof(float)); \endcode See \ref TensorDataInterface for more details. */ void starpu_tensor_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize); /** Register into the \p handle that to store data on node \p node it should use the buffer located at \p ptr, or device handle \p dev_handle and offset \p offset (for OpenCL, notably), with \p ldy elements between rows, and \p ldz elements between z planes, and \p ldt elements between t cubes. */ void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt); /** Return the number of elements on the x-axis of the tensor designated by \p handle. */ uint32_t starpu_tensor_get_nx(starpu_data_handle_t handle); /** Return the number of elements on the y-axis of the tensor designated by \p handle. */ uint32_t starpu_tensor_get_ny(starpu_data_handle_t handle); /** Return the number of elements on the z-axis of the tensor designated by \p handle. */ uint32_t starpu_tensor_get_nz(starpu_data_handle_t handle); /** Return the number of elements on the t-axis of the tensor designated by \p handle. */ uint32_t starpu_tensor_get_nt(starpu_data_handle_t handle); /** Return the number of elements between each row of the tensor designated by \p handle, in the format of the current memory node. */ uint32_t starpu_tensor_get_local_ldy(starpu_data_handle_t handle); /** Return the number of elements between each z plane of the tensor designated by \p handle, in the format of the current memory node. */ uint32_t starpu_tensor_get_local_ldz(starpu_data_handle_t handle); /** Return the number of elements between each t cubes of the tensor designated by \p handle, in the format of the current memory node. */ uint32_t starpu_tensor_get_local_ldt(starpu_data_handle_t handle); /** Return the local pointer associated with \p handle. */ uintptr_t starpu_tensor_get_local_ptr(starpu_data_handle_t handle); /** Return the size of the elements of the tensor designated by \p handle. */ size_t starpu_tensor_get_elemsize(starpu_data_handle_t handle); #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) #define STARPU_TENSOR_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_tensor_interface *)(interface))->id) == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a tensor.") #define STARPU_TENSOR_GET_PTR(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->ptr); \ }) #define STARPU_TENSOR_GET_DEV_HANDLE(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->dev_handle); \ }) #define STARPU_TENSOR_GET_OFFSET(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->offset); \ }) #define STARPU_TENSOR_GET_NX(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->nx); \ }) #define STARPU_TENSOR_GET_NY(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->ny); \ }) #define STARPU_TENSOR_GET_NZ(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->nz); \ }) #define STARPU_TENSOR_GET_NT(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->nt); \ }) #define STARPU_TENSOR_GET_LDY(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->ldy); \ }) #define STARPU_TENSOR_GET_LDZ(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->ldz); \ }) #define STARPU_TENSOR_GET_LDT(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->ldt); \ }) #define STARPU_TENSOR_GET_ELEMSIZE(interface) ( \ { \ STARPU_TENSOR_CHECK(interface); \ (((struct starpu_tensor_interface *)(interface))->elemsize); \ }) #else /** Return a pointer to the tensor designated by \p interface. */ #define STARPU_TENSOR_GET_PTR(interface) (((struct starpu_tensor_interface *)(interface))->ptr) /** Return a device handle for the tensor designated by \p interface, to be used on OpenCL. The offset returned by ::STARPU_TENSOR_GET_OFFSET has to be used in addition to this. */ #define STARPU_TENSOR_GET_DEV_HANDLE(interface) (((struct starpu_tensor_interface *)(interface))->dev_handle) /** Return the offset in the tensor designated by \p interface, to be used with the device handle. */ #define STARPU_TENSOR_GET_OFFSET(interface) (((struct starpu_tensor_interface *)(interface))->offset) /** Return the number of elements on the x-axis of the tensor designated by \p interface. */ #define STARPU_TENSOR_GET_NX(interface) (((struct starpu_tensor_interface *)(interface))->nx) /** Return the number of elements on the y-axis of the tensor designated by \p interface. */ #define STARPU_TENSOR_GET_NY(interface) (((struct starpu_tensor_interface *)(interface))->ny) /** Return the number of elements on the z-axis of the tensor designated by \p interface. */ #define STARPU_TENSOR_GET_NZ(interface) (((struct starpu_tensor_interface *)(interface))->nz) /** Return the number of elements on the t-axis of the tensor designated by \p interface. */ #define STARPU_TENSOR_GET_NT(interface) (((struct starpu_tensor_interface *)(interface))->nt) /** Return the number of elements between each row of the tensor designated by \p interface. May be equal to nx when there is no padding. */ #define STARPU_TENSOR_GET_LDY(interface) (((struct starpu_tensor_interface *)(interface))->ldy) /** Return the number of elements between each z plane of the tensor designated by \p interface. May be equal to nx*ny when there is no padding. */ #define STARPU_TENSOR_GET_LDZ(interface) (((struct starpu_tensor_interface *)(interface))->ldz) /** Return the number of elements between each t cubes of the tensor designated by \p interface. May be equal to nx*ny*nz when there is no padding. */ #define STARPU_TENSOR_GET_LDT(interface) (((struct starpu_tensor_interface *)(interface))->ldt) /** Return the size of the elements of the tensor designated by \p interface. */ #define STARPU_TENSOR_GET_ELEMSIZE(interface) (((struct starpu_tensor_interface *)(interface))->elemsize) #endif /** @} */ /** @name Ndim Array Data Interface @{ */ extern struct starpu_data_interface_ops starpu_interface_ndim_ops; /** ndim interface for ndim array */ struct starpu_ndim_interface { enum starpu_data_interface_id id; /**< identifier of the interface */ uintptr_t ptr; /**< local pointer of the ndim */ uintptr_t dev_handle; /**< device handle of the ndim. */ size_t offset; /**< offset in the ndim. */ size_t allocsize; /**< size actually currently allocated. */ uint32_t *nn; /**< array of element number on each dimension */ uint32_t *ldn; /**< array of element number between two units on each dimension */ size_t ndim; /**< size of the dimension. */ size_t elemsize; /**< size of the elements of the ndim. */ }; /** Register the \p nn[0] x \p nn[1] x ... \p ndim-dimension matrix of \p elemsize byte elements pointed by \p ptr and initialize \p handle to represent it. Again, \p ldn, specifies the number of elements between two units on each dimension. Here an example of how to use the function. \code{.c} float *ndim_arr; size_t arrsize = 1; int i; for (i = 0; i < ndim; i++) arrsize = arrsize * nn[i]; starpu_data_handle_t ndim_handle; ndim_arr = (float*)malloc(arrsize*sizeof(float)); starpu_ndim_data_register(&ndim_handle, STARPU_MAIN_RAM, (uintptr_t)ndim_arr, ldn, nn, ndim, sizeof(float)); \endcode See \ref NdimDataInterface for more details. */ void starpu_ndim_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t *ldn, uint32_t *nn, size_t ndim, size_t elemsize); /** Register into the \p handle that to store data on node \p node it should use the buffer located at \p ptr, or device handle \p dev_handle and offset \p offset (for OpenCL, notably), with \p ldn elements between two units on each dimension. */ void starpu_ndim_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t *ldn); /** Return the number of elements on each dimension of the ndim array designated by \p handle. */ uint32_t *starpu_ndim_get_nn(starpu_data_handle_t handle); /** Return the number of elements on the i-axis of the ndim array designated by \p handle. When i=0, it means x-axis, when i=1, it means y-axis, when i=2, it means z-axis, etc. */ uint32_t starpu_ndim_get_ni(starpu_data_handle_t handle, size_t i); /** Return the number of elements between two units on each dimension of the ndim array designated by \p handle, in the format of the current memory node. */ uint32_t *starpu_ndim_get_local_ldn(starpu_data_handle_t handle); /** Return the number of elements between two units i-axis dimension of the ndim array designated by \p handle, in the format of the current memory node. */ uint32_t starpu_ndim_get_local_ldi(starpu_data_handle_t handle, size_t i); /** Return the local pointer associated with \p handle. */ uintptr_t starpu_ndim_get_local_ptr(starpu_data_handle_t handle); /** Return the dimension size. */ size_t starpu_ndim_get_ndim(starpu_data_handle_t handle); /** Return the size of the elements of the ndim array designated by \p handle. */ size_t starpu_ndim_get_elemsize(starpu_data_handle_t handle); #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) #define STARPU_NDIM_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_ndim_interface *)(interface))->id) == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim.") #define STARPU_NDIM_GET_PTR(interface) ( \ { \ STARPU_NDIM_CHECK(interface); \ (((struct starpu_ndim_interface *)(interface))->ptr); \ }) #define STARPU_NDIM_GET_DEV_HANDLE(interface) ( \ { \ STARPU_NDIM_CHECK(interface); \ (((struct starpu_ndim_interface *)(interface))->dev_handle); \ }) #define STARPU_NDIM_GET_OFFSET(interface) ( \ { \ STARPU_NDIM_CHECK(interface); \ (((struct starpu_ndim_interface *)(interface))->offset); \ }) #define STARPU_NDIM_GET_NN(interface) ( \ { \ STARPU_NDIM_CHECK(interface); \ (((struct starpu_ndim_interface *)(interface))->nn); \ }) #define STARPU_NDIM_GET_LDN(interface) ( \ { \ STARPU_NDIM_CHECK(interface); \ (((struct starpu_ndim_interface *)(interface))->ldn); \ }) #define STARPU_NDIM_GET_NDIM(interface) ( \ { \ STARPU_NDIM_CHECK(interface); \ (((struct starpu_ndim_interface *)(interface))->ndim); \ }) #define STARPU_NDIM_GET_ELEMSIZE(interface) ( \ { \ STARPU_NDIM_CHECK(interface); \ (((struct starpu_ndim_interface *)(interface))->elemsize); \ }) #else /** Return a pointer to the ndim array designated by \p interface. */ #define STARPU_NDIM_GET_PTR(interface) (((struct starpu_ndim_interface *)(interface))->ptr) /** Return a device handle for the ndim array designated by \p interface, to be used on OpenCL. The offset returned by ::STARPU_NDIM_GET_OFFSET has to be used in addition to this. */ #define STARPU_NDIM_GET_DEV_HANDLE(interface) (((struct starpu_ndim_interface *)(interface))->dev_handle) /** Return the offset in the ndim designated by \p interface, to be used with the device handle. */ #define STARPU_NDIM_GET_OFFSET(interface) (((struct starpu_ndim_interface *)(interface))->offset) /** Return the number of elements on each dimension of the ndim array designated by \p interface. */ #define STARPU_NDIM_GET_NN(interface) (((struct starpu_ndim_interface *)(interface))->nn) /** Return the number of elements between each two units on each dimension of the ndim array designated by \p interface. May be equal to nx when there is no padding. */ #define STARPU_NDIM_GET_LDN(interface) (((struct starpu_ndim_interface *)(interface))->ldn) /** Return the dimension size of the ndim array designated by \p interface. */ #define STARPU_NDIM_GET_NDIM(interface) (((struct starpu_ndim_interface *)(interface))->ndim) /** Return the size of the elements of the ndim array designated by \p interface. */ #define STARPU_NDIM_GET_ELEMSIZE(interface) (((struct starpu_ndim_interface *)(interface))->elemsize) #endif /** @} */ /** @name Vector Data Interface @{ */ extern struct starpu_data_interface_ops starpu_interface_vector_ops; /** todo */ struct starpu_vector_interface { enum starpu_data_interface_id id; /**< Identifier of the interface */ uintptr_t ptr; /**< local pointer of the vector */ uintptr_t dev_handle; /**< device handle of the vector. */ size_t offset; /**< offset in the vector */ uint32_t nx; /**< number of elements on the x-axis of the vector */ size_t elemsize; /**< size of the elements of the vector */ uint32_t slice_base; /**< vector slice base, used by the StarPU OpenMP runtime support */ size_t allocsize; /**< size actually currently allocated */ }; /** Register the \p nx \p elemsize-byte elements pointed to by \p ptr and initialize \p handle to represent it. Here an example of how to use the function. \code{.c} float vector[NX]; starpu_data_handle_t vector_handle; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NX, sizeof(vector[0])); \endcode See \ref VectorDataInterface for more details. */ void starpu_vector_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize); /** Similar to starpu_vector_data_register, but additionally specifies which allocation size should be used instead of the initial nx*elemsize. See \ref VariableSizeDataInterface for more details. */ void starpu_vector_data_register_allocsize(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize, size_t allocsize); /** Register into the \p handle that to store data on node \p node it should use the buffer located at \p ptr, or device handle \p dev_handle and offset \p offset (for OpenCL, notably) */ void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); /** Return the number of elements registered into the array designated by \p handle. */ uint32_t starpu_vector_get_nx(starpu_data_handle_t handle); /** Return the size of each element of the array designated by \p handle. */ size_t starpu_vector_get_elemsize(starpu_data_handle_t handle); /** Return the allocated size of the array designated by \p handle. */ size_t starpu_vector_get_allocsize(starpu_data_handle_t handle); /** Return the local pointer associated with \p handle. */ uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle); #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) #define STARPU_VECTOR_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_vector_interface *)(interface))->id) == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector.") #define STARPU_VECTOR_GET_PTR(interface) ( \ { \ STARPU_VECTOR_CHECK(interface); \ (((struct starpu_vector_interface *)(interface))->ptr); \ }) #define STARPU_VECTOR_GET_DEV_HANDLE(interface) ( \ { \ STARPU_VECTOR_CHECK(interface); \ (((struct starpu_vector_interface *)(interface))->dev_handle); \ }) #define STARPU_VECTOR_GET_OFFSET(interface) ( \ { \ STARPU_VECTOR_CHECK(interface); \ (((struct starpu_vector_interface *)(interface))->offset); \ }) #define STARPU_VECTOR_GET_NX(interface) ( \ { \ STARPU_VECTOR_CHECK(interface); \ (((struct starpu_vector_interface *)(interface))->nx); \ }) #define STARPU_VECTOR_GET_ELEMSIZE(interface) ( \ { \ STARPU_VECTOR_CHECK(interface); \ (((struct starpu_vector_interface *)(interface))->elemsize); \ }) #define STARPU_VECTOR_GET_ALLOCSIZE(interface) ( \ { \ STARPU_VECTOR_CHECK(interface); \ (((struct starpu_vector_interface *)(interface))->allocsize); \ }) #define STARPU_VECTOR_GET_SLICE_BASE(interface) ( \ { \ STARPU_VECTOR_CHECK(interface); \ (((struct starpu_vector_interface *)(interface))->slice_base); \ }) #else /** Return a pointer to the array designated by \p interface, valid on CPUs and CUDA only. For OpenCL, the device handle and offset need to be used instead. */ #define STARPU_VECTOR_GET_PTR(interface) (((struct starpu_vector_interface *)(interface))->ptr) /** Return a device handle for the array designated by \p interface, to be used with OpenCL. the offset returned by ::STARPU_VECTOR_GET_OFFSET has to be used in addition to this. */ #define STARPU_VECTOR_GET_DEV_HANDLE(interface) (((struct starpu_vector_interface *)(interface))->dev_handle) /** Return the offset in the array designated by \p interface, to be used with the device handle. */ #define STARPU_VECTOR_GET_OFFSET(interface) (((struct starpu_vector_interface *)(interface))->offset) /** Return the number of elements registered into the array designated by \p interface. */ #define STARPU_VECTOR_GET_NX(interface) (((struct starpu_vector_interface *)(interface))->nx) /** Return the size of each element of the array designated by \p interface. */ #define STARPU_VECTOR_GET_ELEMSIZE(interface) (((struct starpu_vector_interface *)(interface))->elemsize) /** Return the size of each element of the array designated by \p interface. */ #define STARPU_VECTOR_GET_ALLOCSIZE(interface) (((struct starpu_vector_interface *)(interface))->allocsize) /** Return the OpenMP slice base annotation of each element of the array designated by \p interface. */ #define STARPU_VECTOR_GET_SLICE_BASE(interface) (((struct starpu_vector_interface *)(interface))->slice_base) #endif /** Set the number of elements registered into the array designated by \p interface. */ #define STARPU_VECTOR_SET_NX(interface, newnx) \ do { \ STARPU_VECTOR_CHECK(interface); \ (((struct starpu_vector_interface *)(interface))->nx) = (newnx); \ } \ while (0) /** @} */ /** @name Variable Data Interface @{ */ extern struct starpu_data_interface_ops starpu_interface_variable_ops; /** Variable interface for a single data (not a vector, a matrix, a list, ...) */ struct starpu_variable_interface { enum starpu_data_interface_id id; /**< Identifier of the interface */ uintptr_t ptr; /**< local pointer of the variable */ uintptr_t dev_handle; /**< device handle of the variable. */ size_t offset; /**< offset in the variable */ size_t elemsize; /**< size of the variable */ }; /** Register the \p size byte element pointed to by \p ptr, which is typically a scalar or a pointer to an application-specific structure, and initialize \p handle to represent this data item. Here an example of how to use the function. \code{.c} float var = 42.0; starpu_data_handle_t var_handle; starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); \endcode See \ref VariableDataInterface for more details. */ void starpu_variable_data_register(starpu_data_handle_t *handle, int home_node, uintptr_t ptr, size_t size); /** Register into the \p handle that to store data on node \p node it should use the buffer located at \p ptr, or device handle \p dev_handle and offset \p offset (for OpenCL, notably) */ void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); /** Return the size of the variable designated by \p handle. */ size_t starpu_variable_get_elemsize(starpu_data_handle_t handle); /** Return a pointer to the variable designated by \p handle. */ uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle); #if defined(STARPU_HAVE_STATEMENT_EXPRESSIONS) && defined(STARPU_DEBUG) #define STARPU_VARIABLE_CHECK(interface) STARPU_ASSERT_MSG((((struct starpu_variable_interface *)(interface))->id) == STARPU_VARIABLE_INTERFACE_ID, "Error. The given data is not a variable.") #define STARPU_VARIABLE_GET_PTR(interface) ( \ { \ STARPU_VARIABLE_CHECK(interface); \ (((struct starpu_variable_interface *)(interface))->ptr); \ }) #define STARPU_VARIABLE_GET_OFFSET(interface) ( \ { \ STARPU_VARIABLE_CHECK(interface); \ (((struct starpu_variable_interface *)(interface))->offset); \ }) #define STARPU_VARIABLE_GET_ELEMSIZE(interface) ( \ { \ STARPU_VARIABLE_CHECK(interface); \ (((struct starpu_variable_interface *)(interface))->elemsize); \ }) #define STARPU_VARIABLE_GET_DEV_HANDLE(interface) ( \ { \ STARPU_VARIABLE_CHECK(interface); \ (((struct starpu_variable_interface *)(interface))->ptr); \ }) #else /** Return a pointer to the variable designated by \p interface. */ #define STARPU_VARIABLE_GET_PTR(interface) (((struct starpu_variable_interface *)(interface))->ptr) /** Return the offset in the variable designated by \p interface, to be used with the device handle. */ #define STARPU_VARIABLE_GET_OFFSET(interface) (((struct starpu_variable_interface *)(interface))->offset) /** Return the size of the variable designated by \p interface. */ #define STARPU_VARIABLE_GET_ELEMSIZE(interface) (((struct starpu_variable_interface *)(interface))->elemsize) /** Return a device handle for the variable designated by \p interface, to be used with OpenCL. The offset returned by ::STARPU_VARIABLE_GET_OFFSET has to be used in addition to this. */ #define STARPU_VARIABLE_GET_DEV_HANDLE(interface) (((struct starpu_variable_interface *)(interface))->ptr) #endif /** @} */ /** @name Void Data Interface @{ */ extern struct starpu_data_interface_ops starpu_interface_void_ops; /** Register a void interface. There is no data really associated to that interface, but it may be used as a synchronization mechanism. It also permits to express an abstract piece of data that is managed by the application internally: this makes it possible to forbid the concurrent execution of different tasks accessing the same void data in read-write concurrently. See \ref DataHandlesHelpers for more details. */ void starpu_void_data_register(starpu_data_handle_t *handle); /** @} */ /** @name CSR Data Interface @{ */ extern struct starpu_data_interface_ops starpu_interface_csr_ops; /** CSR interface for sparse matrices (compressed sparse row representation) */ struct starpu_csr_interface { enum starpu_data_interface_id id; /**< Identifier of the interface */ uint32_t nnz; /**< number of non-zero entries */ uint32_t nrow; /**< number of rows */ uintptr_t nzval; /**< non-zero values */ uint32_t *colind; /**< position of non-zero entries on the row */ uint32_t *rowptr; /**< index (in nzval) of the first entry of the row */ uint32_t *ram_colind; /**< position of non-zero entries on the row (stored in RAM) */ uint32_t *ram_rowptr; /**< index (in nzval) of the first entry of the row (stored in RAM) */ uint32_t firstentry; /**< k for k-based indexing (0 or 1 usually). also useful when partitioning the matrix. */ size_t elemsize; /**< size of the elements of the matrix */ }; /** Register a CSR (Compressed Sparse Row Representation) sparse matrix. See \ref CSRDataInterface for more details. */ void starpu_csr_data_register(starpu_data_handle_t *handle, int home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize); /** Return the number of non-zero values in the matrix designated by \p handle. */ uint32_t starpu_csr_get_nnz(starpu_data_handle_t handle); /** Return the size of the row pointer array of the matrix designated by \p handle. */ uint32_t starpu_csr_get_nrow(starpu_data_handle_t handle); /** Return the index at which all arrays (the column indexes, the row pointers...) of the matrix designated by \p handle. */ uint32_t starpu_csr_get_firstentry(starpu_data_handle_t handle); /** Return a local pointer to the non-zero values of the matrix designated by \p handle. */ uintptr_t starpu_csr_get_local_nzval(starpu_data_handle_t handle); /** Return a local pointer to the column index of the matrix designated by \p handle. */ uint32_t *starpu_csr_get_local_colind(starpu_data_handle_t handle); /** Return a local pointer to the row pointer array of the matrix designated by \p handle. */ uint32_t *starpu_csr_get_local_rowptr(starpu_data_handle_t handle); /** Return the size of the elements registered into the matrix designated by \p handle. */ size_t starpu_csr_get_elemsize(starpu_data_handle_t handle); /** Return the number of non-zero values in the matrix designated by \p interface. */ #define STARPU_CSR_GET_NNZ(interface) (((struct starpu_csr_interface *)(interface))->nnz) /** Return the size of the row pointer array of the matrix designated by \p interface. */ #define STARPU_CSR_GET_NROW(interface) (((struct starpu_csr_interface *)(interface))->nrow) /** Return a pointer to the non-zero values of the matrix designated by \p interface. */ #define STARPU_CSR_GET_NZVAL(interface) (((struct starpu_csr_interface *)(interface))->nzval) /** Return a device handle for the array of non-zero values in the matrix designated by \p interface. The offset returned by ::STARPU_CSR_GET_OFFSET has to used in addition to this. */ #define STARPU_CSR_GET_NZVAL_DEV_HANDLE(interface) (((struct starpu_csr_interface *)(interface))->nnz) /** Return a pointer to the column index of the matrix designated by \p interface. */ #define STARPU_CSR_GET_COLIND(interface) (((struct starpu_csr_interface *)(interface))->colind) /** Return a RAM pointer to the column index of the matrix designated by \p interface. */ #define STARPU_CSR_GET_RAM_COLIND(interface) (((struct starpu_csr_interface *)(interface))->ram_colind) /** Return a device handle for the column index of the matrix designated by \p interface. The offset returned by ::STARPU_CSR_GET_OFFSET has to be used in addition to this. */ #define STARPU_CSR_GET_COLIND_DEV_HANDLE(interface) (((struct starpu_csr_interface *)(interface))->colind) /** Return a pointer to the row pointer array of the matrix designated by \p interface. */ #define STARPU_CSR_GET_ROWPTR(interface) (((struct starpu_csr_interface *)(interface))->rowptr) /** Return a RAM pointer to the row pointer array of the matrix designated by \p interface. */ #define STARPU_CSR_GET_RAM_ROWPTR(interface) (((struct starpu_csr_interface *)(interface))->ram_rowptr) /** Return a device handle for the row pointer array of the matrix designated by \p interface. The offset returned by ::STARPU_CSR_GET_OFFSET has to be used in addition to this. */ #define STARPU_CSR_GET_ROWPTR_DEV_HANDLE(interface) (((struct starpu_csr_interface *)(interface))->rowptr) /** Return the offset in the arrays (colind, rowptr, nzval) of the matrix designated by \p interface, to be used with the device handles. */ #define STARPU_CSR_GET_OFFSET 0 /** Return the index at which all arrays (the column indexes, the row pointers...) of the \p interface start. */ #define STARPU_CSR_GET_FIRSTENTRY(interface) (((struct starpu_csr_interface *)(interface))->firstentry) /** Return the size of the elements registered into the matrix designated by \p interface. */ #define STARPU_CSR_GET_ELEMSIZE(interface) (((struct starpu_csr_interface *)(interface))->elemsize) /** @} */ /** @name BCSR Data Interface @{ */ extern struct starpu_data_interface_ops starpu_interface_bcsr_ops; /** BCSR interface for sparse matrices (blocked compressed sparse row representation) Note: when a BCSR matrix is partitioned, nzval, colind, and rowptr point into the corresponding father arrays. The rowptr content is thus the same as the father's. Firstentry is used to offset this so it becomes valid for the child arrays. */ struct starpu_bcsr_interface { enum starpu_data_interface_id id; /**< Identifier of the interface */ uint32_t nnz; /**< number of non-zero BLOCKS */ uint32_t nrow; /**< number of rows (in terms of BLOCKS) */ uintptr_t nzval; /**< non-zero values: nnz blocks of r*c elements */ uint32_t *colind; /**< array of nnz elements, colind[i] is the block-column index for block i in nzval */ uint32_t *rowptr; /**< array of nrow+1 * elements, rowptr[i] is * the block-index (in * nzval) of the first block * of row i. By convention, * rowptr[nrow] is the * number of blocks, this * allows an easier access * of the matrix's elements * for the kernels. */ uint32_t *ram_colind; /**< array of nnz elements (stored in RAM) */ uint32_t *ram_rowptr; /**< array of nrow+1 elements (stored in RAM) */ uint32_t firstentry; /**< k for k-based indexing (0 or 1 usually). Also useful when partitioning the matrix. */ uint32_t r; /**< height of the blocks */ uint32_t c; /**< width of the blocks */ size_t elemsize; /**< size of the elements of the matrix */ }; /** This variant of starpu_data_register() uses the BCSR (Blocked Compressed Sparse Row Representation) sparse matrix interface. Register the sparse matrix made of \p nnz non-zero blocks of elements of size \p elemsize stored in \p nzval and initializes \p handle to represent it. Blocks have size \p r * \p c. \p nrow is the number of rows (in terms of blocks), \p colind is an array of nnz elements, colind[i] is the block-column index for block i in \p nzval, \p rowptr is an array of nrow+1 elements, rowptr[i] is the block-index (in \p nzval) of the first block of row i. By convention, rowptr[nrow] is the number of blocks, this allows an easier access of the matrix's elements for the kernels. \p firstentry is the index of the first entry of the given arrays (usually 0 or 1). Here an example with the following matrix: \code | 0 1 0 0 | \endcode \code | 2 3 0 0 | \endcode \code | 4 5 8 9 | \endcode \code | 6 7 10 11 | \endcode \code nzval = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11] \endcode \code colind = [0, 0, 1] \endcode \code rowptr = [0, 1, 3] \endcode \code r = c = 2 \endcode which translates into the following code \code{.c} int R = 2; // Size of the blocks int C = 2; int NROWS = 2; int NNZ_BLOCKS = 3; // out of 4 int NZVAL_SIZE = (R*C*NNZ_BLOCKS); int nzval[NZVAL_SIZE] = { 0, 1, 2, 3, // First block 4, 5, 6, 7, // Second block 8, 9, 10, 11 // Third block }; uint32_t colind[NNZ_BLOCKS] = { 0, // block-column index for first block in nzval 0, // block-column index for second block in nzval 1 // block-column index for third block in nzval }; uint32_t rowptr[NROWS+1] = { 0, // block-index in nzval of the first block of the first row. 1, // block-index in nzval of the first block of the second row. NNZ_BLOCKS // number of blocks, to allow an easier element's access for the kernels }; starpu_data_handle_t bcsr_handle; starpu_bcsr_data_register(&bcsr_handle, STARPU_MAIN_RAM, NNZ_BLOCKS, NROWS, (uintptr_t) nzval, colind, rowptr, 0, // firstentry R, C, sizeof(nzval[0])); \endcode See \ref BCSRDataInterface for more details. */ void starpu_bcsr_data_register(starpu_data_handle_t *handle, int home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, uint32_t r, uint32_t c, size_t elemsize); /** Return the number of non-zero elements in the matrix designated by \p handle. */ uint32_t starpu_bcsr_get_nnz(starpu_data_handle_t handle); /** Return the number of rows (in terms of blocks of size r*c) in the matrix designated by \p handle. */ uint32_t starpu_bcsr_get_nrow(starpu_data_handle_t handle); /** Return the index at which all arrays (the column indexes, the row pointers...) of the matrix desginated by \p handle. */ uint32_t starpu_bcsr_get_firstentry(starpu_data_handle_t handle); /** Return a pointer to the non-zero values of the matrix designated by \p handle. */ uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle_t handle); /** Return a pointer to the column index, which holds the positions of the non-zero entries in the matrix designated by \p handle. */ uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle_t handle); /** Return the row pointer array of the matrix designated by \p handle. */ uint32_t *starpu_bcsr_get_local_rowptr(starpu_data_handle_t handle); /** Return the number of rows in a block. */ uint32_t starpu_bcsr_get_r(starpu_data_handle_t handle); /** Return the number of columns in a block. */ uint32_t starpu_bcsr_get_c(starpu_data_handle_t handle); /** Return the size of the elements in the matrix designated by \p handle. */ size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle); /** Return the number of non-zero values in the matrix designated by \p interface. */ #define STARPU_BCSR_GET_NNZ(interface) (((struct starpu_bcsr_interface *)(interface))->nnz) /** Return the number of block rows in the matrix designated by \p interface. */ #define STARPU_BCSR_GET_NROW(interface) (((struct starpu_bcsr_interface *)(interface))->nrow) /** Return a pointer to the non-zero values of the matrix designated by \p interface. */ #define STARPU_BCSR_GET_NZVAL(interface) (((struct starpu_bcsr_interface *)(interface))->nzval) /** Return a device handle for the array of non-zero values in the matrix designated by \p interface. The offset returned by ::STARPU_BCSR_GET_OFFSET has to be used in addition to this. */ #define STARPU_BCSR_GET_NZVAL_DEV_HANDLE(interface) (((struct starpu_bcsr_interface *)(interface))->nnz) /** Return a pointer to the column index of the matrix designated by \p interface. */ #define STARPU_BCSR_GET_COLIND(interface) (((struct starpu_bcsr_interface *)(interface))->colind) /** Return a RAM pointer to the column index of the matrix designated by \p interface. */ #define STARPU_BCSR_GET_RAM_COLIND(interface) (((struct starpu_bcsr_interface *)(interface))->ram_colind) /** Return a device handle for the column index of the matrix designated by \p interface. The offset returned by ::STARPU_BCSR_GET_OFFSET has to be used in addition to this. */ #define STARPU_BCSR_GET_COLIND_DEV_HANDLE(interface) (((struct starpu_bcsr_interface *)(interface))->colind) /** Return a pointer to the row pointer array of the matrix designated by \p interface. */ #define STARPU_BCSR_GET_ROWPTR(interface) (((struct starpu_bcsr_interface *)(interface))->rowptr) /** Return a RAM pointer to the row pointer array of the matrix designated by \p interface. */ #define STARPU_BCSR_GET_RAM_ROWPTR(interface) (((struct starpu_bcsr_interface *)(interface))->ram_rowptr) /** Return a device handle for the row pointer array of the matrix designated by \p interface. The offset returned by ::STARPU_BCSR_GET_OFFSET has to be used in addition to this. */ #define STARPU_BCSR_GET_ROWPTR_DEV_HANDLE(interface) (((struct starpu_bcsr_interface *)(interface))->rowptr) /** Return the base of the indexing (0 or 1 usually) in the matrix designated by \p interface. */ #define STARPU_BCSR_GET_FIRSTENTRY(interface) (((struct starpu_bcsr_interface *)(interface))->firstentry) /** Return the height of blocks in the matrix designated by \p interface. */ #define STARPU_BCSR_GET_R(interface) (((struct starpu_bcsr_interface *)(interface))->r) /** Return the width of blocks in the matrix designated by \p interface. */ #define STARPU_BCSR_GET_C(interface) (((struct starpu_bcsr_interface *)(interface))->c) /** Return the size of elements in the matrix designated by \p interface. */ #define STARPU_BCSR_GET_ELEMSIZE(interface) (((struct starpu_bcsr_interface *)(interface))->elemsize) /** Return the offset in the arrays (coling, rowptr, nzval) of the matrix designated by \p interface, to be used with the device handles. */ #define STARPU_BCSR_GET_OFFSET 0 /** @} */ /** @name Multiformat Data Interface @{ */ /** Multiformat operations */ struct starpu_multiformat_data_interface_ops { size_t cpu_elemsize; /**< size of each element on CPUs */ size_t opencl_elemsize; /**< size of each element on OpenCL devices */ struct starpu_codelet *cpu_to_opencl_cl; /**< pointer to a codelet which converts from CPU to OpenCL */ struct starpu_codelet *opencl_to_cpu_cl; /**< pointer to a codelet which converts from OpenCL to CPU */ size_t cuda_elemsize; /**< size of each element on CUDA devices */ struct starpu_codelet *cpu_to_cuda_cl; /**< pointer to a codelet which converts from CPU to CUDA */ struct starpu_codelet *cuda_to_cpu_cl; /**< pointer to a codelet which converts from CUDA to CPU */ }; /** todo */ struct starpu_multiformat_interface { enum starpu_data_interface_id id; void *cpu_ptr; void *cuda_ptr; void *hip_ptr; void *opencl_ptr; uint32_t nx; struct starpu_multiformat_data_interface_ops *ops; }; /** Register a piece of data that can be represented in different ways, depending upon the processing unit that manipulates it. It allows the programmer, for instance, to use an array of structures when working on a CPU, and a structure of arrays when working on a GPU. \p nobjects is the number of elements in the data. \p format_ops describes the format. See \ref TheMultiformatInterface for more details. */ void starpu_multiformat_data_register(starpu_data_handle_t *handle, int home_node, void *ptr, uint32_t nobjects, struct starpu_multiformat_data_interface_ops *format_ops); /** Return the local pointer to the data with CPU format. */ #define STARPU_MULTIFORMAT_GET_CPU_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->cpu_ptr) /** Return the local pointer to the data with CUDA format. */ #define STARPU_MULTIFORMAT_GET_CUDA_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->cuda_ptr) /** Return the local pointer to the data with HIP format. */ #define STARPU_MULTIFORMAT_GET_HIP_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->hip_ptr) /** Return the local pointer to the data with OpenCL format. */ #define STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->opencl_ptr) /** Return the number of elements in the data. */ #define STARPU_MULTIFORMAT_GET_NX(interface) (((struct starpu_multiformat_interface *)(interface))->nx) /** @} */ /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_DATA_INTERFACES_H__ */ starpu-1.4.9+dfsg/include/starpu_deprecated_api.h000066400000000000000000000140501507764646700221330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_DEPRECATED_API_H__ #define __STARPU_DEPRECATED_API_H__ #ifdef __cplusplus extern "C" { #endif #if defined(STARPU_USE_DEPRECATED_API) || defined(STARPU_USE_DEPRECATED_ONE_ZERO_API) #warning Your application is using deprecated types. You may want to update to use the latest API, by using tools/dev/rename.sh. #endif /* defined(STARPU_USE_DEPRECATED_API) || defined(STARPU_USE_DEPRECATED_ONE_ZERO_API) */ #define starpu_permodel_history_based_expected_perf starpu_perfmodel_history_based_expected_perf #ifdef STARPU_USE_DEPRECATED_ONE_ZERO_API #define starpu_allocate_buffer_on_node starpu_malloc_on_node #define starpu_free_buffer_on_node starpu_free_on_node #define starpu_helper_cublas_init starpu_cublas_init #define starpu_helper_cublas_shutdown starpu_cublas_shutdown #define starpu_canonical_block_filter_bcsr starpu_bcsr_filter_canonical_block #define starpu_vertical_block_filter_func_csr starpu_csr_filter_vertical_block #define starpu_block_filter_func starpu_matrix_filter_block #define starpu_block_shadow_filter_func starpu_matrix_filter_block_shadow #define starpu_vertical_block_filter_func starpu_matrix_filter_vertical_block #define starpu_vertical_block_shadow_filter_func starpu_matrix_filter_vertical_block_shadow #define starpu_block_filter_func_vector starpu_vector_filter_block #define starpu_block_shadow_filter_func_vector starpu_vector_filter_block_shadow #define starpu_vector_list_filter_func starpu_vector_filter_list #define starpu_vector_divide_in_2_filter_func starpu_vector_filter_divide_in_2 #define starpu_block_filter_func_block starpu_block_filter_block #define starpu_block_shadow_filter_func_block starpu_block_filter_block_shadow #define starpu_vertical_block_filter_func_block starpu_block_filter_vertical_block #define starpu_vertical_block_shadow_filter_func_block starpu_block_filter_vertical_block_shadow #define starpu_depth_block_filter_func_block starpu_block_filter_depth_block #define starpu_depth_block_shadow_filter_func_block starpu_block_filter_depth_block_shadow #define starpu_display_codelet_stats starpu_codelet_display_stats #define starpu_access_mode starpu_data_access_mode #define starpu_buffer_descr starpu_data_descr #define starpu_memory_display_stats starpu_data_display_memory_stats #define starpu_handle_to_pointer starpu_data_handle_to_pointer #define starpu_handle_get_local_ptr starpu_data_get_local_ptr #define starpu_crc32_be_n starpu_hash_crc32c_be_n #define starpu_crc32_be starpu_hash_crc32c_be #define starpu_crc32_string starpu_hash_crc32c_string #define starpu_perf_archtype starpu_perfmodel_archtype #define starpu_history_based_expected_perf starpu_perfmodel_history_based_expected_perf #define starpu_task_profiling_info starpu_profiling_task_info #define starpu_worker_profiling_info starpu_profiling_worker_info #define starpu_bus_profiling_info starpu_profiling_bus_info #define starpu_set_profiling_id starpu_profiling_set_id #define starpu_worker_get_profiling_info starpu_profiling_worker_get_info #define starpu_bus_profiling_helper_display_summary starpu_profiling_bus_helper_display_summary #define starpu_worker_profiling_helper_display_summary starpu_profiling_worker_helper_display_summary #define starpu_archtype starpu_worker_archtype #define starpu_handle_get_interface_id starpu_data_get_interface_id #define starpu_handle_get_size starpu_data_get_size #define starpu_handle_pack_data starpu_data_pack #define starpu_handle_unpack_data starpu_data_unpack #endif /* STARPU_USE_DEPRECATED_ONE_ZERO_API */ #ifdef STARPU_USE_DEPRECATED_API typedef starpu_data_handle_t starpu_data_handle; typedef struct starpu_block_interface starpu_block_interface_t; typedef struct starpu_matrix_interface starpu_matrix_interface_t; typedef struct starpu_vector_interface starpu_vector_interface_t; typedef struct starpu_variable_interface starpu_variable_interface_t; typedef struct starpu_csr_interface starpu_csr_interface_t; typedef struct starpu_bcsr_interface starpu_bcsr_interface_t; typedef struct starpu_multiformat_interface starpu_multiformat_interface_t; #define starpu_machine_topology_s starpu_machine_topology #define starpu_htbl32_node_s starpu_htbl32_node #define starpu_history_list_t starpu_history_list #define starpu_buffer_descr_t starpu_buffer_descr #define starpu_regression_model_t starpu_regression_model #define starpu_per_arch_perfmodel_t starpu_per_arch_perfmodel #define starpu_perfmodel_t starpu_perfmodel #define starpu_sched_policy_s starpu_sched_policy #define starpu_data_interface_ops_t starpu_data_interface_ops typedef struct starpu_buffer_descr starpu_buffer_descr; typedef struct starpu_codelet starpu_codelet; typedef struct starpu_codelet starpu_codelet_t; typedef enum starpu_access_mode starpu_access_mode; #define starpu_print_bus_bandwidth starpu_bus_print_bandwidth #define starpu_get_handle_interface_id starpu_handle_get_interface_id #define starpu_get_current_task starpu_task_get_current #define starpu_unpack_cl_args starpu_codelet_unpack_args #define starpu_pack_cl_args starpu_codelet_pack_args #define starpu_task_deinit starpu_task_clean #endif /* STARPU_USE_DEPRECATED_API */ #ifdef __cplusplus } #endif #endif /* __STARPU_DEPRECATED_API_H__ */ starpu-1.4.9+dfsg/include/starpu_disk.h000066400000000000000000000170011507764646700201330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_DISK_H__ #define __STARPU_DISK_H__ #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Out_Of_Core Out Of Core @{ */ /** Set of functions to manipulate data on disk. See \ref DiskFunctions for more details. */ struct starpu_disk_ops { /** Connect a disk memory at location \p parameter with size \p size, and return a base as void*, which will be passed by StarPU to all other methods. */ void *(*plug)(void *parameter, starpu_ssize_t size); /** Disconnect a disk memory \p base. */ void (*unplug)(void *base); /** Measure the bandwidth and the latency for the disk \p node and save it. Returns 1 if it could measure it. */ int (*bandwidth)(unsigned node, void *base); /** Create a new location for data of size \p size. Return an opaque object pointer. */ void *(*alloc)(void *base, size_t size); /** Free a data \p obj previously allocated with starpu_disk_ops::alloc. */ void (*free)(void *base, void *obj, size_t size); /** Open an existing location of data, at a specific position \p pos dependent on the backend. */ void *(*open)(void *base, void *pos, size_t size); /** Close, without deleting it, a location of data \p obj. */ void (*close)(void *base, void *obj, size_t size); /** Read \p size bytes of data from \p obj in \p base, at offset \p offset, and put into \p buf. Return the actual number of read bytes. */ int (*read)(void *base, void *obj, void *buf, off_t offset, size_t size); /** Write \p size bytes of data to \p obj in \p base, at offset \p offset, from \p buf. Return 0 on success. */ int (*write)(void *base, void *obj, const void *buf, off_t offset, size_t size); /** Read all data from \p obj of \p base, from offset 0. Returns it in an allocated buffer \p ptr, of size \p size */ int (*full_read)(void *base, void *obj, void **ptr, size_t *size, unsigned dst_node); /** Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to \p size, so that a \c full_read will get it. */ int (*full_write)(void *base, void *obj, void *ptr, size_t size); /** Asynchronously write \p size bytes of data to \p obj in \p base, at offset \p offset, from \p buf. Return a void* pointer that StarPU will pass to \c xxx_request methods for testing for the completion. */ void *(*async_write)(void *base, void *obj, void *buf, off_t offset, size_t size); /** Asynchronously read \p size bytes of data from \p obj in \p base, at offset \p offset, and put into \p buf. Return a void* pointer that StarPU will pass to \c xxx_request methods for testing for the completion. */ void *(*async_read)(void *base, void *obj, void *buf, off_t offset, size_t size); /** Read all data from \p obj of \p base, from offset 0. Return it in an allocated buffer \p ptr, of size \p size */ void *(*async_full_read)(void *base, void *obj, void **ptr, size_t *size, unsigned dst_node); /** Write data in \p ptr to \p obj of \p base, from offset 0, and truncate \p obj to \p size, so that a starpu_disk_ops::full_read will get it. */ void *(*async_full_write)(void *base, void *obj, void *ptr, size_t size); /** Copy from offset \p offset_src of disk object \p obj_src in \p base_src to offset \p offset_dst of disk object \p obj_dst in \p base_dst. Return a void* pointer that StarPU will pass to \c xxx_request methods for testing for the completion. */ void *(*copy)(void *base_src, void *obj_src, off_t offset_src, void *base_dst, void *obj_dst, off_t offset_dst, size_t size); /** Wait for completion of request \p async_channel returned by a previous asynchronous read, write or copy. */ void (*wait_request)(void *async_channel); /** Test for completion of request \p async_channel returned by a previous asynchronous read, write or copy. Return 1 on completion, 0 otherwise. */ int (*test_request)(void *async_channel); /** Free the request allocated by a previous asynchronous read, write or copy. */ void (*free_request)(void *async_channel); /* TODO: readv, writev, read2d, write2d, etc. */ }; /** Use the stdio library (fwrite, fread...) to read/write on disk. Warning: It creates one file per allocation ! Do not support asynchronous transfers. */ extern struct starpu_disk_ops starpu_disk_stdio_ops; /** Use the HDF5 library. It doesn't support multiple opening from different processes. You may only allow one process to write in the HDF5 file. If HDF5 library is not compiled with --thread-safe you can't open more than one HDF5 file at the same time. */ extern struct starpu_disk_ops starpu_disk_hdf5_ops; /** Use the unistd library (write, read...) to read/write on disk. Warning: It creates one file per allocation ! */ extern struct starpu_disk_ops starpu_disk_unistd_ops; /** Use the unistd library (write, read...) to read/write on disk with the O_DIRECT flag. Warning: It creates one file per allocation ! Only available on Linux systems. */ extern struct starpu_disk_ops starpu_disk_unistd_o_direct_ops; /** Use the leveldb created by Google. More information at https://code.google.com/p/leveldb/ Do not support asynchronous transfers. */ extern struct starpu_disk_ops starpu_disk_leveldb_ops; /** Close an existing data opened with starpu_disk_open(). See \ref OutOfCore_Introduction for more details. */ void starpu_disk_close(unsigned node, void *obj, size_t size); /** Open an existing file memory in a disk node. \p size is the size of the file. \p pos is the specific position dependent on the backend, given to the \c open method of the disk operations. Return an opaque object pointer. See \ref OutOfCore_Introduction for more details. */ void *starpu_disk_open(unsigned node, void *pos, size_t size); /** Register a disk memory node with a set of functions to manipulate data. The \c plug member of \p func will be passed \p parameter, and return a \c base which will be passed to all \p func methods.
    SUCCESS: return the disk node.
    FAIL: return an error code.
    \p size must be at least \ref STARPU_DISK_SIZE_MIN bytes ! \p size being negative means infinite size. See \ref OutOfCore_Introduction for more details. */ int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size); /** Minimum size of a registered disk. The size of a disk is the last parameter of the function starpu_disk_register(). */ #define STARPU_DISK_SIZE_MIN (16 * 1024 * 1024) /** Contain the node number of the disk swap, if set up through the \ref STARPU_DISK_SWAP variable. */ extern int starpu_disk_swap_node; /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_DISK_H__ */ starpu-1.4.9+dfsg/include/starpu_driver.h000066400000000000000000000063101507764646700204750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_DRIVER_H__ #define __STARPU_DRIVER_H__ #include #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__) && !defined(__HIPCC__) #include #endif #if defined(STARPU_USE_MAX_FPGA) #include #endif #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Running_Drivers Running Drivers @{ */ /** Pre-initialize drivers So as to register information on device types, memory types, etc. Only use internally by StarPU. */ void starpu_drivers_preinit(void); /** structure for designating a given driver. See \ref UsingTheDriverAPI for more details. */ struct starpu_driver { /** Type of the driver. Only ::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER and ::STARPU_OPENCL_WORKER are currently supported. */ enum starpu_worker_archtype type; /** Identifier of the driver. */ union { unsigned cpu_id; unsigned cuda_id; unsigned hip_id; #if defined(STARPU_USE_OPENCL) && !defined(__CUDACC__) && !defined(__HIPCC__) cl_device_id opencl_id; #endif } id; }; /** Initialize the given driver, run it until it receives a request to terminate, deinitialize it and return 0 on success. Return -EINVAL if starpu_driver::type is not a valid StarPU device type (::STARPU_CPU_WORKER, ::STARPU_CUDA_WORKER or ::STARPU_OPENCL_WORKER). This is the same as using the following functions: calling starpu_driver_init(), then calling starpu_driver_run_once() in a loop, and finally starpu_driver_deinit(). See \ref UsingTheDriverAPI for more details. */ int starpu_driver_run(struct starpu_driver *d); /** Notify all running drivers that they should terminate. See \ref UsingTheDriverAPI for more details. */ void starpu_drivers_request_termination(void); /** Initialize the given driver. Return 0 on success, -EINVAL if starpu_driver::type is not a valid ::starpu_worker_archtype. See \ref UsingTheDriverAPI for more details. */ int starpu_driver_init(struct starpu_driver *d); /** Run the driver once, then return 0 on success, -EINVAL if starpu_driver::type is not a valid ::starpu_worker_archtype. See \ref UsingTheDriverAPI for more details. */ int starpu_driver_run_once(struct starpu_driver *d); /** Deinitialize the given driver. Return 0 on success, -EINVAL if starpu_driver::type is not a valid ::starpu_worker_archtype. See \ref UsingTheDriverAPI for more details. */ int starpu_driver_deinit(struct starpu_driver *d); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_DRIVER_H__ */ starpu-1.4.9+dfsg/include/starpu_expert.h000066400000000000000000000026161507764646700205160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_EXPERT_H__ #define __STARPU_EXPERT_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Expert_Mode Expert Mode @{ */ /** Wake all the workers, so they can inspect data requests and task submissions again. */ void starpu_wake_all_blocked_workers(void); /** Register a progression hook, to be called when workers are idle. */ int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg); /** Unregister a given progression hook. */ void starpu_progression_hook_deregister(int hook_id); int starpu_idle_hook_register(unsigned (*func)(void *arg), void *arg); void starpu_idle_hook_deregister(int hook_id); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_H__ */ starpu-1.4.9+dfsg/include/starpu_fxt.h000066400000000000000000000125521507764646700200100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2013-2013 Joris Pablo * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_FXT_H__ #define __STARPU_FXT_H__ #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_FxT_Support FxT Support @{ */ /** todo */ struct starpu_fxt_codelet_event { char symbol[2048]; int workerid; char perfmodel_archname[256]; uint32_t hash; size_t size; float time; }; /** Store information related to clock synchronizations: mainly the offset to apply to each time. */ struct starpu_fxt_mpi_offset { uint64_t local_time_start; /**< node time for the barrier at the beginning of the program */ int64_t offset_start; /**< offset to apply to node time, computed at the beginning of the program */ uint64_t local_time_end; /**< node time for the barrier at the end of the program (optional) */ int64_t offset_end; /**< offset to apply to node time, computed at the end of the program (optional) */ int nb_barriers; /**< number of barriers to synchronize clocks during the execution of the program (can be 0, 1 or 2) */ }; /** todo */ struct starpu_fxt_options { unsigned per_task_colour; unsigned no_events; unsigned no_counter; unsigned no_bus; unsigned no_flops; unsigned ninputfiles; unsigned no_smooth; unsigned no_acquire; unsigned memory_states; unsigned internal; unsigned label_deps; unsigned use_task_color; char *filenames[STARPU_FXT_MAX_FILES]; char *out_paje_path; char *distrib_time_path; char *activity_path; char *sched_tasks_path; char *dag_path; char *tasks_path; char *data_path; char *papi_path; char *comms_path; char *number_events_path; char *anim_path; char *states_path; char *dir; char worker_names[STARPU_NMAXWORKERS][256]; int nworkers; struct starpu_perfmodel_arch worker_archtypes[STARPU_NMAXWORKERS]; /** In case we are going to gather multiple traces (e.g in the case of MPI processes), we may need to prefix the name of the containers. */ char *file_prefix; /** In case we are going to gather multiple traces (e.g in the case of MPI processes), we may need to synchronize clocks and apply an offset. */ struct starpu_fxt_mpi_offset file_offset; /** In case we are going to gather multiple traces (e.g in the case of MPI processes), this variable stores the MPI rank of the trace file. */ int file_rank; /** In case we want to dump the list of codelets to an external tool */ struct starpu_fxt_codelet_event **dumped_codelets; /** In case we want to dump the list of codelets to an external tool, number of dumped codelets. */ long dumped_codelets_count; }; void starpu_fxt_options_init(struct starpu_fxt_options *options); void starpu_fxt_options_shutdown(struct starpu_fxt_options *options); void starpu_fxt_generate_trace(struct starpu_fxt_options *options); /** Determine whether profiling should be started by starpu_init(), or only when starpu_fxt_start_profiling() is called. \p autostart should be 1 to do so, or 0 to prevent it. This function has to be called before starpu_init(). See \ref LimitingScopeTrace for more details. */ void starpu_fxt_autostart_profiling(int autostart); /** Start recording the trace. The trace is by default started from starpu_init() call, but can be paused by using starpu_fxt_stop_profiling(), in which case starpu_fxt_start_profiling() should be called to resume recording events. See \ref LimitingScopeTrace for more details. */ void starpu_fxt_start_profiling(void); /** Stop recording the trace. The trace is by default stopped when calling starpu_shutdown(). starpu_fxt_stop_profiling() can however be used to stop it earlier. starpu_fxt_start_profiling() can then be called to start recording it again, etc. See \ref LimitingScopeTrace for more details. */ void starpu_fxt_stop_profiling(void); void starpu_fxt_write_data_trace(char *filename_in); void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir); /** Wrapper to get value of env variable STARPU_FXT_TRACE */ int starpu_fxt_is_enabled(void); /** Add an event in the execution trace if FxT is enabled. See \ref CreatingAGanttDiagram for more details. */ void starpu_fxt_trace_user_event(unsigned long code); /** Add a string event in the execution trace if FxT is enabled. See \ref CreatingAGanttDiagram for more details. */ void starpu_fxt_trace_user_event_string(const char *s); /** Add a string event in the execution trace if FxT is enabled even during initialization. */ void starpu_fxt_trace_user_meta_string(const char *s); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_FXT_H__ */ starpu-1.4.9+dfsg/include/starpu_hash.h000066400000000000000000000046751507764646700201410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_HASH_H__ #define __STARPU_HASH_H__ #include #include #ifdef __cplusplus extern "C" { #endif /** @ingroup API_Data_Interfaces @{ */ /** Compute the CRC of a byte buffer seeded by the \p inputcrc current state. The return value should be considered as the new current state for future CRC computation. This is used for computing data size footprint. See \ref DefiningANewDataInterface_footprint for more details. */ uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc); /** Compute the CRC of a pointer value seeded by the \p inputcrc current state. The return value should be considered as the new current state for future CRC computation. This is used for computing data size footprint. See \ref DefiningANewDataInterface_footprint for more details. */ uint32_t starpu_hash_crc32c_be_ptr(void *input, uint32_t inputcrc); /** Compute the CRC of a 32bit number seeded by the \p inputcrc current state. The return value should be considered as the new current state for future CRC computation. This is used for computing data size footprint. See \ref DefiningANewDataInterface_footprint for more details. */ uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc); /** Compute the CRC of a string seeded by the \p inputcrc current state. The return value should be considered as the new current state for future CRC computation. This is used for computing data size footprint. See \ref DefiningANewDataInterface_footprint for more details. */ uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_HASH_H__ */ starpu-1.4.9+dfsg/include/starpu_helper.h000066400000000000000000000230441507764646700204640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_HELPER_H__ #define __STARPU_HELPER_H__ #include #ifdef STARPU_HAVE_HWLOC #include #endif #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Miscellaneous_Helpers Miscellaneous Helpers @{ */ /** Return the min of the two parameters. */ #define STARPU_MIN(a, b) ((a) < (b) ? (a) : (b)) /** Return the max of the two parameters. */ #define STARPU_MAX(a, b) ((a) < (b) ? (b) : (a)) /** Define a value which can be used to mark pointers as invalid values. */ #define STARPU_POISON_PTR ((void *)0xdeadbeef) extern int _starpu_silent; /** Retrieve the value of an environment variable. See \ref ExecutionConfigurationThroughEnvironmentVariables for more details. */ char *starpu_getenv(const char *str); /** Same as starpu_get_env_string_var_default() */ #define starpu_getenv_string_var_default(s, ss, d) starpu_get_env_string_var_default(s, ss, d) /** If the environment variable \c str is defined and its value is contained in the array \c strings, return the array position. Raise an error if the environment variable \c str is defined with a value not in \c strings Return \c defvalue if the environment variable \c str is not defined. See \ref ExecutionConfigurationThroughEnvironmentVariables for more details. */ int starpu_get_env_string_var_default(const char *str, const char *strings[], int defvalue); /** Same as starpu_get_env_size_default() */ #define starpu_getenv_size_default(s, d) starpu_get_env_size_default(s, d) /** If the environment variable \c str is defined with a well-defined size value, return the value as a size in bytes. Expected size qualifiers are b, B, k, K, m, M, g, G. The default qualifier is K. If the environment variable \c str is not defined or is empty, return \c defval Raise an error if the value of the environment variable \c str is not well-defined. See \ref ExecutionConfigurationThroughEnvironmentVariables for more details. */ int starpu_get_env_size_default(const char *str, int defval); /** Same as starpu_get_env_number() */ #define starpu_getenv_number(s) starpu_get_env_number(s) /** Return the integer value of the environment variable named \p str. Return 0 otherwise (the variable does not exist or has a non-integer value). */ static __starpu_inline int starpu_get_env_number(const char *str) { char *strval; strval = starpu_getenv(str); if (strval) { /* the env variable was actually set */ long int val; char *pcheck; val = strtol(strval, &pcheck, 10); if (*pcheck) { fprintf(stderr, "The %s environment variable must contain an integer\n", str); STARPU_ABORT(); } /* fprintf(stderr, "ENV %s WAS %d\n", str, val); */ STARPU_ASSERT_MSG(val >= 0, "The value for the environment variable '%s' cannot be negative", str); return (int)val; } else { /* there is no such env variable */ /* fprintf("There was no %s ENV\n", str); */ return -1; } } /** Same as starpu_get_env_number_default() */ #define starpu_getenv_number_default(s, d) starpu_get_env_number_default(s, d) static __starpu_inline int starpu_get_env_number_default(const char *str, int defval) { int ret = starpu_get_env_number(str); if (ret == -1) ret = defval; return ret; } /** Same as starpu_get_env_float_default() */ #define starpu_getenv_float_default(s, d) starpu_get_env_float_default(s, d) static __starpu_inline float starpu_get_env_float_default(const char *str, float defval) { char *strval; strval = starpu_getenv(str); if (strval) { /* the env variable was actually set */ float val; char *pcheck; val = strtof(strval, &pcheck); if (*pcheck) { fprintf(stderr, "The %s environment variable must contain a float\n", str); STARPU_ABORT(); } /* fprintf(stderr, "ENV %s WAS %f\n", str, val); */ return val; } else { /* there is no such env variable */ /* fprintf("There was no %s ENV\n", str); */ return defval; } } /** Execute the given function \p func on a subset of workers. When calling this method, the offloaded function \p func is executed by every StarPU worker that are eligible to execute the function. The argument \p arg is passed to the offloaded function. The argument \p where specifies on which types of processing units the function should be executed. Similarly to the field starpu_codelet::where, it is possible to specify that the function should be executed on every CUDA device and every CPU by passing ::STARPU_CPU|::STARPU_CUDA. This function blocks until \p func has been executed on every appropriate processing units, and thus may not be called from a callback function for instance. See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. */ void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where); /** Same as starpu_execute_on_each_worker(), except that the task name is specified in the argument \p name. See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. */ void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t where, const char *name); /** Call \p func(\p arg) on every worker in the \p workers array. \p num_workers indicates the number of workers in this array. This function is synchronous, but the different workers may execute the function in parallel. See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. */ void starpu_execute_on_specific_workers(void (*func)(void *), void *arg, unsigned num_workers, unsigned *workers, const char *name); /** Return the current date in micro-seconds. See \ref Preparing for more details. */ double starpu_timing_now(void); /** Copy the content of \p src_handle into \p dst_handle. The parameter \p asynchronous indicates whether the function should block or not. In the case of an asynchronous call, it is possible to synchronize with the termination of this operation either by the means of implicit dependencies (if enabled) or by calling starpu_task_wait_for_all(). If \p callback_func is not NULL, this callback function is executed after the handle has been copied, and it is given the pointer \p callback_arg as argument. See \ref DataHandlesHelpers for more details. */ int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void *), void *callback_arg); /** Like starpu_data_cpy(), copy the content of \p src_handle into \p dst_handle, but additionally take a \p priority parameter to sort it among the whole task graph. See \ref DataHandlesHelpers for more details. */ int starpu_data_cpy_priority(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void *), void *callback_arg, int priority); /** Create a copy of \p src_handle, and return a new handle in \p dst_handle, which is to be used only for read accesses. This allows StarPU to optimize it by not actually copying the data whenever possible (e.g. it may possibly simply return src_handle itself). The parameter \p asynchronous indicates whether the function should block or not. In the case of an asynchronous call, it is possible to synchronize with the termination of this operation either by the means of implicit dependencies (if enabled) or by calling starpu_task_wait_for_all(). If \p callback_func is not NULL, this callback function is executed after the handle has been copied, and it is given the pointer \p callback_arg as argument. See \ref DataHandlesHelpers for more details. */ int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous); /** Call hwloc-ps or lstopo to display binding of each process and thread running on the machine.
    Use the environment variable \ref STARPU_DISPLAY_BINDINGS to automatically call this function at the beginning of the execution of StarPU. See \ref MiscellaneousAndDebug for more details. */ void starpu_display_bindings(void); /** If \c hwloc is used, convert the given \p logical_index of a PU to the OS index of this PU. If \c hwloc is not used, return \p logical_index. See \ref HardwareTopology for more details. */ int starpu_get_pu_os_index(unsigned logical_index); /** Return a bitmap representing logical indexes of NUMA nodes where the buffer targeted by \p ptr is allocated. An error is notified by a negative result. See \ref HardwareTopology for more details. */ long starpu_get_memory_location_bitmap(void *ptr, size_t size); #ifdef STARPU_HAVE_HWLOC /** Get the hwloc topology used by StarPU. One can use this pointer to get information about topology, but not to change settings related to topology. See \ref HardwareTopology for more details. */ hwloc_topology_t starpu_get_hwloc_topology(void); #endif /** @} */ #ifdef __cplusplus } #endif #endif // __STARPU_HELPER_H__ starpu-1.4.9+dfsg/include/starpu_hip.h000066400000000000000000000125271507764646700177710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_HIP_H__ #define __STARPU_HIP_H__ #include #ifdef STARPU_USE_HIP #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wundef" #pragma GCC diagnostic ignored "-Wunused-result" #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #ifndef __cplusplus #pragma GCC diagnostic ignored "-Wimplicit-int" #endif #pragma GCC diagnostic ignored "-Wreturn-type" #ifdef STARPU_USE_HIPBLAS #include #endif #include #include #pragma GCC diagnostic pop #ifdef __cplusplus extern "C" { #endif /** @defgroup API_HIP_Extensions HIP Extensions @{ */ /** Report a HIPBLAS error. */ void starpu_hipblas_report_error(const char *func, const char *file, int line, int status); /** Call starpu_hipblas_report_error(), passing the current function, file and line position. */ #define STARPU_HIPBLAS_REPORT_ERROR(status) starpu_hipblas_report_error(__starpu_func__, __FILE__, __LINE__, status) /** Report a HIP error. */ void starpu_hip_report_error(const char *func, const char *file, int line, hipError_t status); /** Call starpu_hip_report_error(), passing the current function, file and line position. */ #define STARPU_HIP_REPORT_ERROR(status) starpu_hip_report_error(__starpu_func__, __FILE__, __LINE__, status) /** Return the current worker’s HIP stream. StarPU provides a stream for every HIP device controlled by StarPU. This function is only provided for convenience so that programmers can easily use asynchronous operations within codelets without having to create a stream by hand. Note that the application is not forced to use the stream provided by starpu_hip_get_local_stream() and may also create its own streams. Synchronizing with hipDeviceSynchronize() is allowed, but will reduce the likelihood of having all transfers overlapped. */ hipStream_t starpu_hip_get_local_stream(void); /** Return a pointer to device properties for worker \p workerid (assumed to be a HIP worker). */ const struct hipDeviceProp_t *starpu_hip_get_device_properties(unsigned workerid); /** Copy \p ssize bytes from the pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node. The function first tries to copy the data asynchronous (unless \p stream is NULL). If the asynchronous copy fails or if \p stream is NULL, it copies the data synchronously. The function returns -EAGAIN if the asynchronous launch was successful. It returns 0 if the synchronous copy was successful, or fails otherwise. */ int starpu_hip_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, hipStream_t stream, hipMemcpyKind kind); /** Copy \p numblocks blocks of \p blocksize bytes from the pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node. The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in the source (resp. destination) interface. The function first tries to copy the data asynchronous (unless \p stream is NULL). If the asynchronous copy fails or if \p stream is NULL, it copies the data synchronously. The function returns -EAGAIN if the asynchronous launch was successful. It returns 0 if the synchronous copy was successful, or fails otherwise. */ int starpu_hip_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, hipStream_t stream, hipMemcpyKind kind); /** Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from the pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node. The blocks are grouped by \p numblocks_1 blocks whose start addresses are ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination) interface. The function first tries to copy the data asynchronous (unless \p stream is NULL). If the asynchronous copy fails or if \p stream is NULL, it copies the data synchronously. The function returns -EAGAIN if the asynchronous launch was successful. It returns 0 if the synchronous copy was successful, or fails otherwise. */ int starpu_hip_copy3d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, hipStream_t stream, hipMemcpyKind kind); /** Call hipSetDevice(\p devid). */ void starpu_hip_set_device(int devid); /** @} */ #ifdef __cplusplus } #endif #endif /* STARPU_USE_HIP */ #endif /* __STARPU_HIP_H__ */ starpu-1.4.9+dfsg/include/starpu_hipblas.h000066400000000000000000000033041507764646700206240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_HIPBLAS_H__ #define __STARPU_HIPBLAS_H__ #ifdef STARPU_USE_HIP #ifdef STARPU_USE_HIPBLAS #include #endif #endif #ifdef __cplusplus extern "C" { #endif /** @ingroup API_HIP_Extensions @{ */ /** Initialize HIPBLAS on every HIPdevice. The HIPBLAS library must be initialized prior to any HIPBLAS call. Calling starpu_hipblas_init() will initialize HIPBLAS on every HIP device controlled by StarPU. This call blocks until HIPBLAS has been properly initialized on every device. */ void starpu_hipblas_init(void); #ifdef STARPU_USE_HIP #ifdef STARPU_USE_HIPBLAS /** Return the HIPBLAS handle to be used to queue HIPBLAS kernels. It is properly initialized and configured for multistream by starpu_hipblas_init(). */ hipblasHandle_t starpu_hipblas_get_local_handle(void); #endif #endif /** Synchronously deinitialize the HIPBLAS library on every HIP device. */ void starpu_hipblas_shutdown(void); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_HIPBLAS_H__ */ starpu-1.4.9+dfsg/include/starpu_max_fpga.h000066400000000000000000000033331507764646700207660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MAX_FPGA_H__ #define __STARPU_MAX_FPGA_H__ #include #if defined STARPU_USE_MAX_FPGA #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Max_FPGA_Extensions Maxeler FPGA Extensions @{ */ /** This specifies a Maxeler file to be loaded on some engines. */ struct starpu_max_load { max_file_t *file; /**< Provide the file to be loaded */ const char *engine_id_pattern; /**< Provide the engine(s) on which to be loaded, following the Maxeler engine naming, i.e. typically "*:0", "*:1", etc. In an array of struct starpu_max_load, only one can have the "*" specification. */ }; /** Maxeler engine of the current worker. See \ref MaxFPGAExample for more details. */ max_engine_t *starpu_max_fpga_get_local_engine(void); /** @} */ #ifdef __cplusplus } #endif #endif /* STARPU_USE_MAX_FPGA */ #endif /* __STARPU_MAX_FPGA_H__ */ starpu-1.4.9+dfsg/include/starpu_mod.f90000066400000000000000000000077151507764646700201420ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! MODULE starpu_mod ! == starpu.h == ! starpu_conf_init INTERFACE SUBROUTINE starpu_conf_init(conf) BIND(C) USE iso_c_binding TYPE(C_PTR), VALUE :: conf END SUBROUTINE starpu_conf_init END INTERFACE ! starpu_init INTERFACE FUNCTION starpu_init(conf) BIND(C) USE iso_c_binding TYPE(C_PTR), VALUE :: conf INTEGER(KIND=C_INT) :: starpu_init END FUNCTION starpu_init END INTERFACE ! starpu_initialize ! starpu_pause INTERFACE SUBROUTINE starpu_pause() BIND(C) USE iso_c_binding END SUBROUTINE starpu_pause END INTERFACE ! starpu_resume INTERFACE SUBROUTINE starpu_resume() BIND(C) USE iso_c_binding END SUBROUTINE starpu_resume END INTERFACE ! starpu_shutdown INTERFACE SUBROUTINE starpu_shutdown() BIND(C) USE iso_c_binding END SUBROUTINE starpu_shutdown END INTERFACE ! starpu_topology_print ! starpu_asynchronous_copy_disabled INTERFACE SUBROUTINE starpu_asynchronous_copy_disabled() BIND(C) USE iso_c_binding END SUBROUTINE starpu_asynchronous_copy_disabled END INTERFACE ! starpu_asynchronous_cuda_copy_disabled INTERFACE SUBROUTINE starpu_asynchronous_cuda_copy_disabled() BIND(C) USE iso_c_binding END SUBROUTINE starpu_asynchronous_cuda_copy_disabled END INTERFACE ! starpu_asynchronous_opencl_copy_disabled INTERFACE SUBROUTINE starpu_asynchronous_opencl_copy_disabled() BIND(C) USE iso_c_binding END SUBROUTINE starpu_asynchronous_opencl_copy_disabled END INTERFACE ! starpu_display_stats INTERFACE SUBROUTINE starpu_display_stats() BIND(C) USE iso_c_binding END SUBROUTINE starpu_display_stats END INTERFACE ! starpu_get_version INTERFACE SUBROUTINE starpu_get_version(major,minor,release) BIND(C) USE iso_c_binding INTEGER(KIND=C_INT), INTENT(OUT) :: major,minor,release END SUBROUTINE starpu_get_version END INTERFACE ! starpu_cpu_worker_get_count INTERFACE FUNCTION starpu_cpu_worker_get_count() BIND(C) USE iso_c_binding INTEGER(KIND=C_INT) :: starpu_cpu_worker_get_count END FUNCTION starpu_cpu_worker_get_count END INTERFACE ! == starpu_task.h == ! starpu_tag_declare_deps ! starpu_tag_declare_deps_array ! starpu_task_declare_deps_array ! starpu_tag_wait ! starpu_tag_wait_array ! starpu_tag_notify_from_apps ! starpu_tag_restart ! starpu_tag_remove ! starpu_task_init ! starpu_task_clean ! starpu_task_create ! starpu_task_destroy ! starpu_task_set_destroy ! starpu_task_submit ! starpu_task_submit_to_ctx ! starpu_task_finished ! starpu_task_wait ! starpu_task_wait_for_all INTERFACE SUBROUTINE starpu_task_wait_for_all() BIND(C) USE iso_c_binding END SUBROUTINE starpu_task_wait_for_all END INTERFACE ! starpu_task_wait_for_n_submitted ! starpu_task_wait_for_all_in_ctx ! starpu_task_wait_for_n_submitted_in_ctx ! starpu_task_wait_for_no_ready ! starpu_task_nready ! starpu_task_nsubmitted ! starpu_codelet_init ! starpu_codelet_display_stats ! starpu_task_get_current ! starpu_parallel_task_barrier_init ! starpu_parallel_task_barrier_init_n ! starpu_task_dup ! starpu_task_set_implementation ! starpu_task_get_implementation END MODULE starpu_mod starpu-1.4.9+dfsg/include/starpu_opencl.h000066400000000000000000000324771507764646700204770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_OPENCL_H__ #define __STARPU_OPENCL_H__ #include #ifdef STARPU_USE_OPENCL #ifndef CL_TARGET_OPENCL_VERSION #define CL_TARGET_OPENCL_VERSION 100 #endif #ifdef __APPLE__ #include #else #include #endif #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_OpenCL_Extensions OpenCL Extensions @{ */ /** Store the OpenCL programs as compiled for the different OpenCL devices. */ struct starpu_opencl_program { /** Store each program for each OpenCL device. */ cl_program programs[STARPU_MAXOPENCLDEVS]; }; /** @name Writing OpenCL kernels @{ */ /** Return the OpenCL context of the device designated by \p devid in \p context. See \ref OpenCLSupport for more details. */ void starpu_opencl_get_context(int devid, cl_context *context); /** Return the cl_device_id corresponding to \p devid in \p device. See \ref OpenCLSupport for more details. */ void starpu_opencl_get_device(int devid, cl_device_id *device); /** Return the command queue of the device designated by \p devid into \p queue. See \ref OpenCLSupport for more details. */ void starpu_opencl_get_queue(int devid, cl_command_queue *queue); /** Return the context of the current worker. See \ref OpenCLSupport for more details. */ void starpu_opencl_get_current_context(cl_context *context); /** Return the computation kernel command queue of the current worker. See \ref OpenCLSupport for more details. */ void starpu_opencl_get_current_queue(cl_command_queue *queue); /** Set the arguments of a given kernel. The list of arguments must be given as (size_t size_of_the_argument, cl_mem * pointer_to_the_argument). The last argument must be 0. Return the number of arguments that were successfully set. In case of failure, return the id of the argument that could not be set and \p err is set to the error returned by OpenCL. Otherwise, return the number of arguments that were set. Here an example: \code{.c} int n; cl_int err; cl_kernel kernel; n = starpu_opencl_set_kernel_args(&err, 2, &kernel, sizeof(foo), &foo, sizeof(bar), &bar, 0); if (n != 2) fprintf(stderr, "Error : %d\n", err); \endcode See \ref OpenCLSupport for more details. */ int starpu_opencl_set_kernel_args(cl_int *err, cl_kernel *kernel, ...); /** @} */ /** @name Compiling OpenCL kernels Source codes for OpenCL kernels can be stored in a file or in a string. StarPU provides functions to build the program executable for each available OpenCL device as a cl_program object. This program executable can then be loaded within a specific queue as explained in the next section. These are only helpers, Applications can also fill a starpu_opencl_program array by hand for more advanced use (e.g. different programs on the different OpenCL devices, for relocation purpose for instance). @{ */ /** Store the contents of the file \p source_file_name in the buffer \p opencl_program_source. The file \p source_file_name can be located in the current directory, or in the directory specified by the environment variable \ref STARPU_OPENCL_PROGRAM_DIR, or in the directory share/starpu/opencl of the installation directory of StarPU, or in the source directory of StarPU. When the file is found, \p located_file_name is the full name of the file as it has been located on the system, \p located_dir_name the directory where it has been located. Otherwise, they are both set to the empty string. See \ref OpenCLSupport for more details. */ void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source); /** Similar to function starpu_opencl_load_program_source() but allocate the buffers \p located_file_name, \p located_dir_name and \p opencl_program_source. See \ref OpenCLSupport for more details. */ void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source); /** Compile the OpenCL kernel stored in the file \p source_file_name with the given options \p build_options and store the result in the directory $STARPU_HOME/.starpu/opencl with the same filename as \p source_file_name. The compilation is done for every OpenCL device, and the filename is suffixed with the vendor id and the device id of the OpenCL device. See \ref OpenCLSupport for more details. */ int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char *build_options); /** Compile the OpenCL kernel in the string \p opencl_program_source with the given options \p build_options and store the result in the directory $STARPU_HOME/.starpu/opencl with the filename \p file_name. The compilation is done for every OpenCL device, and the filename is suffixed with the vendor id and the device id of the OpenCL device. See \ref OpenCLSupport for more details. */ int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char *build_options); /** Compile the binary OpenCL kernel identified with \p kernel_id. For every OpenCL device, the binary OpenCL kernel will be loaded from the file $STARPU_HOME/.starpu/opencl/\.\.vendor_id_\_device_id_\. See \ref OpenCLSupport for more details. */ int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs); /** Compile an OpenCL source code stored in a file. See \ref OpenCLSupport for more details. */ int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char *build_options); /** Compile an OpenCL source code stored in a string. See \ref OpenCLSupport for more details. */ int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char *build_options); /** Unload an OpenCL compiled code. See \ref OpenCLSupport for more details. */ int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs); /** @} */ /** @name Loading OpenCL kernels @{ */ /** Create a kernel \p kernel for device \p devid, on its computation command queue returned in \p queue, using program \p opencl_programs and name \p kernel_name. See \ref OpenCLSupport for more details. */ int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid); /** Release the given \p kernel, to be called after kernel execution. See \ref OpenCLSupport for more details. */ int starpu_opencl_release_kernel(cl_kernel kernel); /** @} */ /** @name OpenCL Statistics @{ */ /** Collect statistics on a kernel execution. After termination of the kernels, the OpenCL codelet should call this function with the event returned by \c clEnqueueNDRangeKernel(), to let StarPU collect statistics about the kernel execution (used cycles, consumed energy). See \ref OpenCL-specificOptimizations for more details. */ int starpu_opencl_collect_stats(cl_event event); /** @} */ /** @name OpenCL Utilities @{ */ /** Return the error message in English corresponding to \p status, an OpenCL error code. See \ref OpenCLSupport for more details. */ const char *starpu_opencl_error_string(cl_int status); /** Given a valid error status, print the corresponding error message on \c stdout, along with the function name \p func, the filename \p file, the line number \p line and the message \p msg. See \ref OpenCLSupport for more details. */ void starpu_opencl_display_error(const char *func, const char *file, int line, const char *msg, cl_int status); /** Call the function starpu_opencl_display_error() with the error \p status, the current function name, current file and line number, and a empty message. */ #define STARPU_OPENCL_DISPLAY_ERROR(status) starpu_opencl_display_error(__starpu_func__, __FILE__, __LINE__, NULL, status) /** Call the function starpu_opencl_display_error() and abort. */ static __starpu_inline void starpu_opencl_report_error(const char *func, const char *file, int line, const char *msg, cl_int status) { starpu_opencl_display_error(func, file, line, msg, status); assert(0); } /** Call the function starpu_opencl_report_error() with the error \p status, the current function name, current file and line number, and a empty message. */ #define STARPU_OPENCL_REPORT_ERROR(status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, NULL, status) /** Call the function starpu_opencl_report_error() with \p msg and \p status, the current function name, current file and line number. */ #define STARPU_OPENCL_REPORT_ERROR_WITH_MSG(msg, status) starpu_opencl_report_error(__starpu_func__, __FILE__, __LINE__, msg, status) /** Allocate \p size bytes of memory, stored in \p addr. \p flags must be a valid combination of \c cl_mem_flags values. See \ref DefiningANewDataInterface_allocation for more details. */ cl_int starpu_opencl_allocate_memory(int devid, cl_mem *addr, size_t size, cl_mem_flags flags); /** Copy \p size bytes from the given \p ptr on RAM \p src_node to the given \p buffer on OpenCL \p dst_node. \p offset is the offset, in bytes, in \p buffer. if \p event is NULL, the copy is synchronous, i.e the queue is synchronised before returning. If not NULL, \p event can be used after the call to wait for this particular copy to complete. This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code otherwise. The integer pointed to by \p ret is set to -EAGAIN if the asynchronous launch was successful, or to 0 if \p event was NULL. See \ref DefiningANewDataInterface_copy for more details. */ cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node, cl_mem buffer, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret); /** Copy \p size bytes asynchronously from the given \p buffer on OpenCL \p src_node to the given \p ptr on RAM \p dst_node. \p offset is the offset, in bytes, in \p buffer. if \p event is NULL, the copy is synchronous, i.e the queue is synchronised before returning. If not NULL, \p event can be used after the call to wait for this particular copy to complete. This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code otherwise. The integer pointed to by \p ret is set to -EAGAIN if the asynchronous launch was successful, or to 0 if \p event was NULL. See \ref DefiningANewDataInterface_copy for more details. */ cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node, void *ptr, unsigned dst_node, size_t size, size_t offset, cl_event *event, int *ret); /** Copy \p size bytes asynchronously from byte offset \p src_offset of \p src on OpenCL \p src_node to byte offset \p dst_offset of \p dst on OpenCL \p dst_node. if \p event is NULL, the copy is synchronous, i.e. the queue is synchronised before returning. If not NULL, \p event can be used after the call to wait for this particular copy to complete. This function returns CL_SUCCESS if the copy was successful, or a valid OpenCL error code otherwise. The integer pointed to by \p ret is set to -EAGAIN if the asynchronous launch was successful, or to 0 if \p event was NULL. See \ref DefiningANewDataInterface_copy for more details. */ cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node, size_t src_offset, cl_mem dst, unsigned dst_node, size_t dst_offset, size_t size, cl_event *event, int *ret); /** Copy \p size bytes from byte offset \p src_offset of \p src on \p src_node to byte offset \p dst_offset of \p dst on \p dst_node. if \p event is NULL, the copy is synchronous, i.e. the queue is synchronised before returning. If not NULL, \p event can be used after the call to wait for this particular copy to complete. The function returns -EAGAIN if the asynchronous launch was successful. It returns 0 if the synchronous copy was successful, or fails otherwise. See \ref DefiningANewDataInterface_copy for more details. */ cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event); /** @} */ /** @} */ #ifdef __cplusplus } #endif #endif /* STARPU_USE_OPENCL */ #endif /* __STARPU_OPENCL_H__ */ starpu-1.4.9+dfsg/include/starpu_openmp.h000066400000000000000000001250401507764646700205020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_OPENMP_H__ #define __STARPU_OPENMP_H__ #include /** @defgroup API_OpenMP_Runtime_Support OpenMP Runtime Support @brief API for implementing OpenMP runtimes on top of StarPU. @{ */ #if defined STARPU_OPENMP /** Opaque Simple Lock object (\anchor SimpleLock) for inter-task synchronization operations. \sa starpu_omp_init_lock() \sa starpu_omp_destroy_lock() \sa starpu_omp_set_lock() \sa starpu_omp_unset_lock() \sa starpu_omp_test_lock() */ typedef struct { void *internal; /**< opaque pointer for internal use */ } starpu_omp_lock_t; /** Opaque Nestable Lock object (\anchor NestableLock) for inter-task synchronization operations. \sa starpu_omp_init_nest_lock() \sa starpu_omp_destroy_nest_lock() \sa starpu_omp_set_nest_lock() \sa starpu_omp_unset_nest_lock() \sa starpu_omp_test_nest_lock() */ typedef struct { void *internal; /**< opaque pointer for internal use */ } starpu_omp_nest_lock_t; /** Set of constants for selecting the for loop iteration scheduling algorithm (\anchor OMPFor) as defined by the OpenMP specification. \sa starpu_omp_for() \sa starpu_omp_for_inline_first() \sa starpu_omp_for_inline_next() \sa starpu_omp_for_alt() \sa starpu_omp_for_inline_first_alt() \sa starpu_omp_for_inline_next_alt() */ enum starpu_omp_sched_value { starpu_omp_sched_undefined = 0, /**< Undefined iteration scheduling algorithm. */ starpu_omp_sched_static = 1, /**< \b Static iteration scheduling algorithm.*/ starpu_omp_sched_dynamic = 2, /**< \b Dynamic iteration scheduling algorithm.*/ starpu_omp_sched_guided = 3, /**< \b Guided iteration scheduling algorithm.*/ starpu_omp_sched_auto = 4, /**< \b Automatically chosen iteration scheduling algorithm.*/ starpu_omp_sched_runtime = 5 /**< Choice of iteration scheduling algorithm deferred at \b runtime.*/ }; /** Set of constants for selecting the processor binding method, as defined in the OpenMP specification. \sa starpu_omp_get_proc_bind() */ enum starpu_omp_proc_bind_value { starpu_omp_proc_bind_undefined = -1, /**< Undefined processor binding method.*/ starpu_omp_proc_bind_false = 0, /**< Team threads may be moved between places at any time.*/ starpu_omp_proc_bind_true = 1, /**< Team threads may not be moved between places.*/ starpu_omp_proc_bind_master = 2, /**< Assign every thread in the team to the same place as the \b master thread.*/ starpu_omp_proc_bind_close = 3, /**< Assign every thread in the team to a place \b close to the parent thread.*/ starpu_omp_proc_bind_spread = 4 /**< Assign team threads as a sparse distribution over the selected places.*/ }; /** Set of attributes used for creating a new parallel region. \sa starpu_omp_parallel_region() */ struct starpu_omp_parallel_region_attr { /** ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the parallel region implicit tasks. The codelet must provide a CPU implementation function. */ struct starpu_codelet cl; /** Array of zero or more ::starpu_data_handle_t data handle to be passed to the parallel region implicit tasks. */ starpu_data_handle_t *handles; /** Optional pointer to an inline argument to be passed to the region implicit tasks. */ void *cl_arg; /** Size of the optional inline argument to be passed to the region implicit tasks, or 0 if unused. */ size_t cl_arg_size; /** Boolean indicating whether the optional inline argument should be automatically freed (true), or not (false). */ unsigned cl_arg_free; /** Boolean indicating whether the \b if clause of the corresponding pragma omp parallel is true or false. */ int if_clause; /** Integer indicating the requested number of threads in the team of the newly created parallel region, or 0 to let the runtime choose the number of threads alone. This attribute may be ignored by the runtime system if the requested number of threads is higher than the number of threads that the runtime can create. */ int num_threads; }; /** Set of attributes used for creating a new task region. \sa starpu_omp_task_region() */ struct starpu_omp_task_region_attr { /** ::starpu_codelet (\ref API_Codelet_And_Tasks) to use for the task region explicit task. The codelet must provide a CPU implementation function or an accelerator implementation for offloaded target regions. */ struct starpu_codelet cl; /** Array of zero or more ::starpu_data_handle_t data handle to be passed to the task region explicit tasks. */ starpu_data_handle_t *handles; /** Optional pointer to an inline argument to be passed to the region implicit tasks. */ void *cl_arg; /** Size of the optional inline argument to be passed to the region implicit tasks, or 0 if unused. */ size_t cl_arg_size; /** Boolean indicating whether the optional inline argument should be automatically freed (true), or not (false). */ unsigned cl_arg_free; int priority; /** Boolean indicating whether the \b if clause of the corresponding pragma omp task is true or false. */ int if_clause; /** Boolean indicating whether the \b final clause of the corresponding pragma omp task is true or false. */ int final_clause; /** Boolean indicating whether the \b untied clause of the corresponding pragma omp task is true or false. */ int untied_clause; /** Boolean indicating whether the \b mergeable clause of the corresponding pragma omp task is true or false. */ int mergeable_clause; /** taskloop attribute */ int is_loop; int nogroup_clause; int collapse; int num_tasks; unsigned long long nb_iterations; unsigned long long grainsize; unsigned long long begin_i; unsigned long long end_i; unsigned long long chunk; }; #ifdef __cplusplus extern "C" { #define __STARPU_OMP_NOTHROW throw() #else #define __STARPU_OMP_NOTHROW __attribute__((__nothrow__)) #endif /** @name Initialisation @{ */ /** Initialize StarPU and its OpenMP Runtime support. See \ref OMPInitExit for more details. */ extern int starpu_omp_init(void) __STARPU_OMP_NOTHROW; /** Shutdown StarPU and its OpenMP Runtime support. See \ref OMPInitExit for more details. */ extern void starpu_omp_shutdown(void) __STARPU_OMP_NOTHROW; /** @} */ /** @name Parallel \anchor ORS_Parallel @{ */ /** Generate and launch an OpenMP parallel region and return after its completion. \p attr specifies the attributes for the generated parallel region. If this function is called from inside another, generating, parallel region, the generated parallel region is nested within the generating parallel region. This function can be used to implement \#pragma omp parallel. See \ref OMPParallel for more details. */ extern void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr) __STARPU_OMP_NOTHROW; /** Execute a function only on the master thread of the OpenMP parallel region it is called from. When called from a thread that is not the master of the parallel region it is called from, this function does nothing. \p f is the function to be called. \p arg is an argument passed to function \p f. This function can be used to implement \#pragma omp master. See \ref OMPSingle for more details. */ extern void starpu_omp_master(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW; /** Determine whether the calling thread is the master of the OpenMP parallel region it is called from or not. This function can be used to implement \#pragma omp master without code outlining. \return !0 if called by the region's master thread. \return 0 if not called by the region's master thread. See \ref OMPSingle for more details. */ extern int starpu_omp_master_inline(void) __STARPU_OMP_NOTHROW; /** @} */ /** @name Synchronization \anchor ORS_Synchronization @{ */ /** Wait until each participating thread of the innermost OpenMP parallel region has reached the barrier and each explicit OpenMP task bound to this region has completed its execution. This function can be used to implement \#pragma omp barrier. See \ref OMPBarrier for more details. */ extern void starpu_omp_barrier(void) __STARPU_OMP_NOTHROW; /** Wait until no other thread is executing within the context of the selected critical section, then proceeds to the exclusive execution of a function within the critical section. \p f is the function to be executed in the critical section. \p arg is an argument passed to function \p f. \p name is the name of the selected critical section. If name == NULL, the selected critical section is the unique anonymous critical section. This function can be used to implement \#pragma omp critical. See \ref OMPCritical for more details. */ extern void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name) __STARPU_OMP_NOTHROW; /** Wait until execution can proceed exclusively within the context of the selected critical section. \p name is the name of the selected critical section. If name == NULL, the selected critical section is the unique anonymous critical section. This function together with #starpu_omp_critical_inline_end can be used to implement \#pragma omp critical without code outlining. See \ref OMPCritical for more details. */ extern void starpu_omp_critical_inline_begin(const char *name) __STARPU_OMP_NOTHROW; /** End the exclusive execution within the context of the selected critical section. \p name is the name of the selected critical section. If name==NULL, the selected critical section is the unique anonymous critical section. This function together with #starpu_omp_critical_inline_begin can be used to implement \#pragma omp critical without code outlining. See \ref OMPCritical for more details. */ extern void starpu_omp_critical_inline_end(const char *name) __STARPU_OMP_NOTHROW; /** @} */ /** @name Worksharing \anchor ORS_Worksharing @{ */ /** Ensure that a single participating thread of the innermost OpenMP parallel region executes a function. \p f is the function to be executed by a single thread. \p arg is an argument passed to function \p f. \p nowait is a flag indicating whether an implicit barrier is requested after the single section (nowait==0) or not (nowait==!0). This function can be used to implement \#pragma omp single. See \ref OMPSingle for more details. */ extern void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait) __STARPU_OMP_NOTHROW; /** Decide whether the current thread is elected to run the following single section among the participating threads of the innermost OpenMP parallel region. This function can be used to implement \#pragma omp single without code outlining. \return !0 if the calling thread has won the election. \return 0 if the calling thread has lost the election. See \ref OMPSingle for more details. */ extern int starpu_omp_single_inline(void) __STARPU_OMP_NOTHROW; /** Execute \p f on a single task of the current parallel region task, and then broadcast the contents of the memory block pointed by the copyprivate pointer \p data and of size \p data_size to the corresponding \p data pointed memory blocks of all the other participating region tasks. This function can be used to implement \#pragma omp single with a copyprivate clause. \sa starpu_omp_single_copyprivate_inline \sa starpu_omp_single_copyprivate_inline_begin \sa starpu_omp_single_copyprivate_inline_end See \ref OMPSingle for more details. */ extern void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size) __STARPU_OMP_NOTHROW; /** Elect one task among the tasks of the current parallel region task to execute the following single section, and then broadcast the copyprivate pointer \p data to all the other participating region tasks. This function can be used to implement \#pragma omp single with a copyprivate clause without code outlining. \sa starpu_omp_single_copyprivate_inline \sa starpu_omp_single_copyprivate_inline_end See \ref OMPSingle for more details. */ extern void *starpu_omp_single_copyprivate_inline_begin(void *data) __STARPU_OMP_NOTHROW; /** Complete the execution of a single section and return the broadcasted copyprivate pointer for tasks that lost the election and NULL for the task that won the election. This function can be used to implement \#pragma omp single with a copyprivate clause without code outlining. Return the copyprivate pointer for tasks that lost the election and therefore did not execute the code of the single section. Return NULL for the task that won the election and executed the code of the single section. \sa starpu_omp_single_copyprivate_inline \sa starpu_omp_single_copyprivate_inline_begin See \ref OMPSingle for more details. */ extern void starpu_omp_single_copyprivate_inline_end(void) __STARPU_OMP_NOTHROW; /** Execute a parallel loop together with the other threads participating to the innermost parallel region. \p f is the function to be executed iteratively. \p arg is an argument passed to function \p f. \p nb_iterations is the number of iterations to be performed by the parallel loop. \p chunk is the number of consecutive iterations that should be affected to the same thread when scheduling the loop workshares, it follows the semantics of the \c modifier argument in OpenMP \#pragma omp for specification. \p schedule is the scheduling mode according to the OpenMP specification. \p ordered is a flag indicating whether the loop region may contain an ordered section (ordered==!0) or not (ordered==0). \p nowait is a flag indicating whether an implicit barrier is requested after the for section (nowait==0) or not (nowait==!0). The function \p f will be called with arguments \p _first_i, the first iteration to perform, \p _nb_i, the number of consecutive iterations to perform before returning, \p arg, the free \p arg argument. This function can be used to implement \#pragma omp for. See \ref OMPFor for more details. */ extern void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW; /** Decide whether the current thread should start to execute a parallel loop section. See #starpu_omp_for for the argument description. This function together with #starpu_omp_for_inline_next can be used to implement \#pragma omp for without code outlining. \return !0 if the calling thread participates to the loop region and should execute a first chunk of iterations. In that case, \p *_first_i will be set to the first iteration of the chunk to perform and \p *_nb_i will be set to the number of iterations of the chunk to perform. \return 0 if the calling thread does not participate to the loop region because all the available iterations have been affected to the other threads of the parallel region. \sa starpu_omp_for See \ref OMPFor for more details. */ extern int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW; /** Decide whether the current thread should continue to execute a parallel loop section. See #starpu_omp_for for the argument description. This function together with #starpu_omp_for_inline_first can be used to implement \#pragma omp for without code outlining. \return !0 if the calling thread should execute a next chunk of iterations. In that case, \p *_first_i will be set to the first iteration of the chunk to perform and \p *_nb_i will be set to the number of iterations of the chunk to perform. \return 0 if the calling thread does not participate anymore to the loop region because all the available iterations have been affected to the other threads of the parallel region. \sa starpu_omp_for See \ref OMPFor for more details. */ extern int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) __STARPU_OMP_NOTHROW; /** Alternative implementation of a parallel loop. Differ from #starpu_omp_for in the expected arguments of the loop function \c f. The function \p f will be called with arguments \p _begin_i, the first iteration to perform, \p _end_i, the first iteration not to perform before returning, \p arg, the free \p arg argument. This function can be used to implement \#pragma omp for. \sa starpu_omp_for See \ref OMPFor for more details. */ extern void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) __STARPU_OMP_NOTHROW; /** Inline version of the alternative implementation of a parallel loop. This function together with #starpu_omp_for_inline_next_alt can be used to implement \#pragma omp for without code outlining. \sa starpu_omp_for \sa starpu_omp_for_alt \sa starpu_omp_for_inline_first See \ref OMPFor for more details. */ extern int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW; /** Inline version of the alternative implementation of a parallel loop. This function together with #starpu_omp_for_inline_first_alt can be used to implement \#pragma omp for without code outlining. \sa starpu_omp_for \sa starpu_omp_for_alt \sa starpu_omp_for_inline_next See \ref OMPFor for more details. */ extern int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) __STARPU_OMP_NOTHROW; /** Ensure that a function is sequentially executed once for each iteration in order within a parallel loop, by the thread that own the iteration. \p f is the function to be executed by the thread that own the current iteration. \p arg is an argument passed to function \p f. This function can be used to implement \#pragma omp ordered. See \ref OMPFor for more details. */ extern void starpu_omp_ordered(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW; /** Wait until all the iterations of a parallel loop below the iteration owned by the current thread have been executed. This function together with #starpu_omp_ordered_inline_end can be used to implement \#pragma omp ordered without code code outlining. See \ref OMPFor for more details. */ extern void starpu_omp_ordered_inline_begin(void) __STARPU_OMP_NOTHROW; /** Notify that the ordered section for the current iteration has been completed. This function together with #starpu_omp_ordered_inline_begin can be used to implement \#pragma omp ordered without code code outlining. See \ref OMPFor for more details. */ extern void starpu_omp_ordered_inline_end(void) __STARPU_OMP_NOTHROW; /** Ensure that each function of a given array of functions is executed by one and only one thread. \p nb_sections is the number of functions in the array \p section_f. \p section_f is the array of functions to be executed as sections. \p section_arg is an array of arguments to be passed to the corresponding function. \p nowait is a flag indicating whether an implicit barrier is requested after the execution of all the sections (nowait==0) or not (nowait==!0). This function can be used to implement \#pragma omp sections and \#pragma omp section. See \ref OMPSections for more details. */ extern void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait) __STARPU_OMP_NOTHROW; /** Alternative implementation of sections. Differ from #starpu_omp_sections in that all the sections are combined within a single function in this version. \p section_f is the function implementing the combined sections. The function \p section_f will be called with arguments \p section_num, the section number to be executed, \p arg, the entry of \p section_arg corresponding to this section. This function can be used to implement \#pragma omp sections and \#pragma omp section. \sa starpu_omp_sections See \ref OMPSections for more details. */ extern void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void *section_arg, int nowait) __STARPU_OMP_NOTHROW; /** @} */ /** @name Task \anchor ORS_Task @{ */ /** Generate an explicit child task. The execution of the generated task is asynchronous with respect to the calling code unless specified otherwise. \p attr specifies the attributes for the generated task region. This function can be used to implement \#pragma omp task. See \ref OMPTaskExplicit for more details. */ extern void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW; /** Wait for the completion of the tasks generated by the current task. This function does not wait for the descendants of the tasks generated by the current task. This function can be used to implement \#pragma omp taskwait. See \ref OMPTaskSyncs for more details. */ extern void starpu_omp_taskwait(void) __STARPU_OMP_NOTHROW; /** Launch a function and wait for the completion of every descendant task generated during the execution of the function. This function can be used to implement \#pragma omp taskgroup. \sa starpu_omp_taskgroup_inline_begin \sa starpu_omp_taskgroup_inline_end See \ref OMPTaskSyncs for more details. */ extern void starpu_omp_taskgroup(void (*f)(void *arg), void *arg) __STARPU_OMP_NOTHROW; /** Launch a function and gets ready to wait for the completion of every descendant task generated during the dynamic scope of the taskgroup. This function can be used to implement \#pragma omp taskgroup without code outlining. \sa starpu_omp_taskgroup \sa starpu_omp_taskgroup_inline_end See \ref OMPTaskSyncs for more details. */ extern void starpu_omp_taskgroup_inline_begin(void) __STARPU_OMP_NOTHROW; /** Wait for the completion of every descendant task generated during the dynamic scope of the taskgroup. This function can be used to implement \#pragma omp taskgroup without code outlining. \sa starpu_omp_taskgroup \sa starpu_omp_taskgroup_inline_begin See \ref OMPTaskSyncs for more details. */ extern void starpu_omp_taskgroup_inline_end(void) __STARPU_OMP_NOTHROW; extern void starpu_omp_taskloop_inline_begin(struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW; extern void starpu_omp_taskloop_inline_end(const struct starpu_omp_task_region_attr *attr) __STARPU_OMP_NOTHROW; /** @} */ /** @name API \anchor ORS_API @{ */ /** Set ICVS nthreads_var for the parallel regions to be created with the current region. Note: The StarPU OpenMP runtime support currently ignores this setting for nested parallel regions. \sa starpu_omp_get_num_threads \sa starpu_omp_get_thread_num \sa starpu_omp_get_max_threads \sa starpu_omp_get_num_procs See \ref OMPStandard for more details. */ extern void starpu_omp_set_num_threads(int threads) __STARPU_OMP_NOTHROW; /** Return the number of threads of the current region. \return the number of threads of the current region. \sa starpu_omp_set_num_threads \sa starpu_omp_get_thread_num \sa starpu_omp_get_max_threads \sa starpu_omp_get_num_procs See \ref OMPStandard for more details. */ extern int starpu_omp_get_num_threads(void) __STARPU_OMP_NOTHROW; /** Return the rank of the current thread among the threads of the current region. \return the rank of the current thread in the current region. \sa starpu_omp_set_num_threads \sa starpu_omp_get_num_threads \sa starpu_omp_get_max_threads \sa starpu_omp_get_num_procs See \ref OMPStandard for more details. */ extern int starpu_omp_get_thread_num(void) __STARPU_OMP_NOTHROW; /** Return the maximum number of threads that can be used to create a region from the current region. \return the maximum number of threads that can be used to create a region from the current region. \sa starpu_omp_set_num_threads \sa starpu_omp_get_num_threads \sa starpu_omp_get_thread_num \sa starpu_omp_get_num_procs See \ref OMPStandard for more details. */ extern int starpu_omp_get_max_threads(void) __STARPU_OMP_NOTHROW; /** Return the number of StarPU CPU workers. \return the number of StarPU CPU workers. \sa starpu_omp_set_num_threads \sa starpu_omp_get_num_threads \sa starpu_omp_get_thread_num \sa starpu_omp_get_max_threads See \ref OMPStandard for more details. */ extern int starpu_omp_get_num_procs(void) __STARPU_OMP_NOTHROW; /** Return whether it is called from the scope of a parallel region or not. \return !0 if called from a parallel region scope. \return 0 otherwise. See \ref OMPStandard for more details. */ extern int starpu_omp_in_parallel(void) __STARPU_OMP_NOTHROW; /** Enable (1) or disable (0) dynamically adjusting the number of parallel threads. Note: The StarPU OpenMP runtime support currently ignores the argument of this function. \sa starpu_omp_get_dynamic See \ref OMPStandard for more details. */ extern void starpu_omp_set_dynamic(int dynamic_threads) __STARPU_OMP_NOTHROW; /** Return the state of dynamic thread number adjustment. \return !0 if dynamic thread number adjustment is enabled. \return 0 otherwise. \sa starpu_omp_set_dynamic See \ref OMPStandard for more details. */ extern int starpu_omp_get_dynamic(void) __STARPU_OMP_NOTHROW; /** Enable (1) or disable (0) nested parallel regions. Note: The StarPU OpenMP runtime support currently ignores the argument of this function. \sa starpu_omp_get_nested \sa starpu_omp_get_max_active_levels \sa starpu_omp_set_max_active_levels \sa starpu_omp_get_level \sa starpu_omp_get_active_level See \ref OMPStandard for more details. */ extern void starpu_omp_set_nested(int nested) __STARPU_OMP_NOTHROW; /** Return whether nested parallel sections are enabled or not. \return !0 if nested parallel sections are enabled. \return 0 otherwise. \sa starpu_omp_set_nested \sa starpu_omp_get_max_active_levels \sa starpu_omp_set_max_active_levels \sa starpu_omp_get_level \sa starpu_omp_get_active_level See \ref OMPStandard for more details. */ extern int starpu_omp_get_nested(void) __STARPU_OMP_NOTHROW; /** Return the state of the cancel ICVS var. See \ref OMPStandard for more details. */ extern int starpu_omp_get_cancellation(void) __STARPU_OMP_NOTHROW; /** Set the default scheduling kind for upcoming loops within the current parallel section. \p kind is the scheduler kind, \p modifier complements the scheduler kind with information such as the chunk size, in accordance with the OpenMP specification. \sa starpu_omp_get_schedule See \ref OMPFor for more details. */ extern void starpu_omp_set_schedule(enum starpu_omp_sched_value kind, int modifier) __STARPU_OMP_NOTHROW; /** Return the kind and the modifier of the current default loop scheduler. \sa starpu_omp_set_schedule See \ref OMPStandard for more details. */ extern void starpu_omp_get_schedule(enum starpu_omp_sched_value *kind, int *modifier) __STARPU_OMP_NOTHROW; /** Return the number of StarPU CPU workers. \return the number of StarPU CPU workers. See \ref OMPStandard for more details. */ extern int starpu_omp_get_thread_limit(void) __STARPU_OMP_NOTHROW; /** Set the maximum number of allowed active parallel section levels. Note: The StarPU OpenMP runtime support currently ignores the argument of this function and assume \p max_levels equals 1 instead. \sa starpu_omp_set_nested \sa starpu_omp_get_nested \sa starpu_omp_get_max_active_levels \sa starpu_omp_get_level \sa starpu_omp_get_active_level See \ref OMPStandard for more details. */ extern void starpu_omp_set_max_active_levels(int max_levels) __STARPU_OMP_NOTHROW; /** Return the current maximum number of allowed active parallel section levels \return the current maximum number of allowed active parallel section levels. \sa starpu_omp_set_nested \sa starpu_omp_get_nested \sa starpu_omp_set_max_active_levels \sa starpu_omp_get_level \sa starpu_omp_get_active_level See \ref OMPStandard for more details. */ extern int starpu_omp_get_max_active_levels(void) __STARPU_OMP_NOTHROW; /** Return the nesting level of the current parallel section. \return the nesting level of the current parallel section. \sa starpu_omp_set_nested \sa starpu_omp_get_nested \sa starpu_omp_get_max_active_levels \sa starpu_omp_set_max_active_levels \sa starpu_omp_get_active_level See \ref OMPStandard for more details. */ extern int starpu_omp_get_level(void) __STARPU_OMP_NOTHROW; /** Return the number of the ancestor of the current parallel section. \return the number of the ancestor of the current parallel section. See \ref OMPStandard for more details. */ extern int starpu_omp_get_ancestor_thread_num(int level) __STARPU_OMP_NOTHROW; /** Return the size of the team of the current parallel section. \return the size of the team of the current parallel section. See \ref OMPStandard for more details. */ extern int starpu_omp_get_team_size(int level) __STARPU_OMP_NOTHROW; /** Return the nestinglevel of the current innermost active parallel section. \return the nestinglevel of the current innermost active parallel section. \sa starpu_omp_set_nested \sa starpu_omp_get_nested \sa starpu_omp_get_max_active_levels \sa starpu_omp_set_max_active_levels \sa starpu_omp_get_level See \ref OMPStandard for more details. */ extern int starpu_omp_get_active_level(void) __STARPU_OMP_NOTHROW; /** Check whether the current task is final or not. \return !0 if called from a final task. \return 0 otherwise. See \ref OMPStandard for more details. */ extern int starpu_omp_in_final(void) __STARPU_OMP_NOTHROW; /** Return the proc_bind setting of the current parallel region. \return the proc_bind setting of the current parallel region. See \ref OMPStandard for more details. */ extern enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void) __STARPU_OMP_NOTHROW; /** Return the number of places available to the execution environment in the place list. \return the number of places available to the execution environment in the place list. See \ref OMPStandard for more details. */ extern int starpu_omp_get_num_places(void) __STARPU_OMP_NOTHROW; /** Return the number of processors available to the execution environment in the specified place. \return the number of processors available to the execution environment in the specified place. See \ref OMPStandard for more details. */ extern int starpu_omp_get_place_num_procs(int place_num) __STARPU_OMP_NOTHROW; /** Return the numerical identifiers of the processors available to the execution environment in the specified place. See \ref OMPStandard for more details. */ extern void starpu_omp_get_place_proc_ids(int place_num, int *ids) __STARPU_OMP_NOTHROW; /** Return the place number of the place to which the encountering thread is bound. \return the place number of the place to which the encountering thread is bound. See \ref OMPStandard for more details. */ extern int starpu_omp_get_place_num(void) __STARPU_OMP_NOTHROW; /** Return the number of places in the place partition of the innermost implicit task. \return the number of places in the place partition of the innermost implicit task. See \ref OMPStandard for more details. */ extern int starpu_omp_get_partition_num_places(void) __STARPU_OMP_NOTHROW; /** Return the list of place numbers corresponding to the places in the place-partition-var ICV of the innermost implicit task. See \ref OMPStandard for more details. */ extern void starpu_omp_get_partition_place_nums(int *place_nums) __STARPU_OMP_NOTHROW; /** Set the number of the device to use as default. Note: The StarPU OpenMP runtime support currently ignores the argument of this function. \sa starpu_omp_get_default_device \sa starpu_omp_is_initial_device See \ref OMPStandard for more details. */ extern void starpu_omp_set_default_device(int device_num) __STARPU_OMP_NOTHROW; /** Return the number of the device used as default. \return the number of the device used as default. \sa starpu_omp_set_default_device \sa starpu_omp_is_initial_device See \ref OMPStandard for more details. */ extern int starpu_omp_get_default_device(void) __STARPU_OMP_NOTHROW; /** Return the number of the devices. \return the number of the devices. See \ref OMPStandard for more details. */ extern int starpu_omp_get_num_devices(void) __STARPU_OMP_NOTHROW; /** Return the number of teams in the current teams region. \return the number of teams in the current teams region. \sa starpu_omp_get_num_teams See \ref OMPStandard for more details. */ extern int starpu_omp_get_num_teams(void) __STARPU_OMP_NOTHROW; /** Return the team number of the calling thread. \return the team number of the calling thread. \sa starpu_omp_get_num_teams See \ref OMPStandard for more details. */ extern int starpu_omp_get_team_num(void) __STARPU_OMP_NOTHROW; /** Check whether the current device is the initial device or not. See \ref OMPStandard for more details. */ extern int starpu_omp_is_initial_device(void) __STARPU_OMP_NOTHROW; /** Return a device number that represents the host device. \return a device number that represents the host device. See \ref OMPStandard for more details. */ extern int starpu_omp_get_initial_device(void) __STARPU_OMP_NOTHROW; /** Return the maximum value that can be specified in the priority clause. \return !0 if called from the host device. \return 0 otherwise. \sa starpu_omp_set_default_device \sa starpu_omp_get_default_device See \ref OMPStandard for more details. */ extern int starpu_omp_get_max_task_priority(void) __STARPU_OMP_NOTHROW; /** Initialize an opaque lock object. \sa starpu_omp_destroy_lock \sa starpu_omp_set_lock \sa starpu_omp_unset_lock \sa starpu_omp_test_lock See \ref OMPSimpleLock for more details. */ extern void starpu_omp_init_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; /** Destroy an opaque lock object. \sa starpu_omp_init_lock \sa starpu_omp_set_lock \sa starpu_omp_unset_lock \sa starpu_omp_test_lock See \ref OMPSimpleLock for more details. */ extern void starpu_omp_destroy_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; /** Lock an opaque lock object. If the lock is already locked, the function will block until it succeeds in exclusively acquiring the lock. \sa starpu_omp_init_lock \sa starpu_omp_destroy_lock \sa starpu_omp_unset_lock \sa starpu_omp_test_lock See \ref OMPSimpleLock for more details. */ extern void starpu_omp_set_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; /** Unlock a previously locked lock object. The behaviour of this function is unspecified if it is called on an unlocked lock object. \sa starpu_omp_init_lock \sa starpu_omp_destroy_lock \sa starpu_omp_set_lock \sa starpu_omp_test_lock See \ref OMPSimpleLock for more details. */ extern void starpu_omp_unset_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; /** Unblockingly attempt to lock a lock object and return whether it succeeded or not. \return !0 if the function succeeded in acquiring the lock. \return 0 if the lock was already locked. \sa starpu_omp_init_lock \sa starpu_omp_destroy_lock \sa starpu_omp_set_lock \sa starpu_omp_unset_lock See \ref OMPSimpleLock for more details. */ extern int starpu_omp_test_lock(starpu_omp_lock_t *lock) __STARPU_OMP_NOTHROW; /** Initialize an opaque lock object supporting nested locking operations. \sa starpu_omp_destroy_nest_lock \sa starpu_omp_set_nest_lock \sa starpu_omp_unset_nest_lock \sa starpu_omp_test_nest_lock See \ref OMPNestableLock for more details. */ extern void starpu_omp_init_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; /** Destroy an opaque lock object supporting nested locking operations. \sa starpu_omp_init_nest_lock \sa starpu_omp_set_nest_lock \sa starpu_omp_unset_nest_lock \sa starpu_omp_test_nest_lock See \ref OMPNestableLock for more details. */ extern void starpu_omp_destroy_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; /** Lock an opaque lock object supporting nested locking operations. If the lock is already locked by another task, the function will block until it succeeds in exclusively acquiring the lock. If the lock is already taken by the current task, the function will increase the nested locking level of the lock object. \sa starpu_omp_init_nest_lock \sa starpu_omp_destroy_nest_lock \sa starpu_omp_unset_nest_lock \sa starpu_omp_test_nest_lock See \ref OMPNestableLock for more details. */ extern void starpu_omp_set_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; /** Unlock a previously locked lock object supporting nested locking operations. If the lock has been locked multiple times in nested fashion, the nested locking level is decreased and the lock remains locked. Otherwise, if the lock has only been locked once, it becomes unlocked. The behaviour of this function is unspecified if it is called on an unlocked lock object. The behaviour of this function is unspecified if it is called from a different task than the one that locked the lock object. \sa starpu_omp_init_nest_lock \sa starpu_omp_destroy_nest_lock \sa starpu_omp_set_nest_lock \sa starpu_omp_test_nest_lock See \ref OMPNestableLock for more details. */ extern void starpu_omp_unset_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; /** Unblocking attempt to lock an opaque lock object supporting nested locking operations and returns whether it succeeded or not. If the lock is already locked by another task, the function will return without having acquired the lock. If the lock is already taken by the current task, the function will increase the nested locking level of the lock object. \return !0 if the function succeeded in acquiring the lock. \return 0 if the lock was already locked. \sa starpu_omp_init_nest_lock \sa starpu_omp_destroy_nest_lock \sa starpu_omp_set_nest_lock \sa starpu_omp_unset_nest_lock See \ref OMPNestableLock for more details. */ extern int starpu_omp_test_nest_lock(starpu_omp_nest_lock_t *lock) __STARPU_OMP_NOTHROW; /** Implement the entry point of a fallback global atomic region. Block until it succeeds in acquiring exclusive access to the global atomic region. \sa starpu_omp_atomic_fallback_inline_end */ extern void starpu_omp_atomic_fallback_inline_begin(void) __STARPU_OMP_NOTHROW; /** Implement the exit point of a fallback global atomic region. Release the exclusive access to the global atomic region. \sa starpu_omp_atomic_fallback_inline_begin */ extern void starpu_omp_atomic_fallback_inline_end(void) __STARPU_OMP_NOTHROW; /** Return the elapsed wallclock time in seconds. \return the elapsed wallclock time in seconds. \sa starpu_omp_get_wtick See \ref OMPStandard for more details. */ extern double starpu_omp_get_wtime(void) __STARPU_OMP_NOTHROW; /** Return the precision of the time used by \p starpu_omp_get_wtime(). \return the precision of the time used by \p starpu_omp_get_wtime(). \sa starpu_omp_get_wtime See \ref OMPStandard for more details. */ extern double starpu_omp_get_wtick(void) __STARPU_OMP_NOTHROW; /** Enable setting additional vector metadata needed by the OpenMP Runtime Support. \p handle is vector data handle. \p slice_base is the base of an array slice, expressed in number of vector elements from the array base. \sa STARPU_VECTOR_GET_SLICE_BASE */ extern void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base) __STARPU_OMP_NOTHROW; /** Only use internally by StarPU. */ extern struct starpu_arbiter *starpu_omp_get_default_arbiter(void) __STARPU_OMP_NOTHROW; /** Register a handle for ptr->handle data lookup. \sa starpu_omp_handle_unregister \sa starpu_omp_data_lookup See \ref OMPDataDependencies for more details. */ extern void starpu_omp_handle_register(starpu_data_handle_t handle) __STARPU_OMP_NOTHROW; /** Unregister a handle from ptr->handle data lookup. \sa starpu_omp_handle_register \sa starpu_omp_data_lookup See \ref OMPDataDependencies for more details. */ extern void starpu_omp_handle_unregister(starpu_data_handle_t handle) __STARPU_OMP_NOTHROW; /** Return the handle corresponding to the data pointed to by the \p ptr host pointer. \return the handle or \c NULL if not found. See \ref OMPDataDependencies for more details. */ extern starpu_data_handle_t starpu_omp_data_lookup(const void *ptr) __STARPU_OMP_NOTHROW; /** @} */ #ifdef __cplusplus } #endif #endif /* STARPU_USE_OPENMP && !STARPU_DONT_INCLUDE_OPENMP_HEADERS */ /** @} */ #endif /* __STARPU_OPENMP_H__ */ starpu-1.4.9+dfsg/include/starpu_parallel_worker.h000066400000000000000000000155451507764646700224010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_PARALLEL_WORKERS_UTIL_H__ #define __STARPU_PARALLEL_WORKERS_UTIL_H__ #include #ifdef STARPU_PARALLEL_WORKER #ifdef STARPU_HAVE_HWLOC #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Parallel_Worker Parallel Workers @{ */ /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_MIN_NB (1 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_MAX_NB (2 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_NB (3 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_PREFERE_MIN (4 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS (5 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_POLICY_NAME (6 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_POLICY_STRUCT (7 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_CREATE_FUNC (8 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG (9 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_TYPE (10 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_AWAKE_WORKERS (11 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_PARTITION_ONE (12 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_NEW (13 << STARPU_MODE_SHIFT) /** Used when calling starpu_parallel_worker_init() */ #define STARPU_PARALLEL_WORKER_NCORES (14 << STARPU_MODE_SHIFT) /** These represent the default available functions to enforce parallel_worker use by the sub-runtime */ enum starpu_parallel_worker_types { STARPU_PARALLEL_WORKER_OPENMP, /**< todo */ STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL, /**< todo */ STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL, /**< todo */ }; /** Parallel_Worker configuration */ struct starpu_parallel_worker_config; /** Create parallel_workers on the machine with the given parameters. See \ref CreatingParallel for more details. This returns NULL if too many parallel workers were created. The --enable-max-sched-ctxs configure option can be used to increase the limitation. */ struct starpu_parallel_worker_config *starpu_parallel_worker_init(hwloc_obj_type_t parallel_worker_level, ...); /** Delete the given parallel_workers configuration */ int starpu_parallel_worker_shutdown(struct starpu_parallel_worker_config *parallel_workers); /** Print the given parallel_workers configuration. See \ref CreatingParallel for more details. */ int starpu_parallel_worker_print(struct starpu_parallel_worker_config *parallel_workers); /** Prologue functions */ void starpu_parallel_worker_openmp_prologue(void *); #define starpu_parallel_worker_intel_openmp_mkl_prologue starpu_parallel_worker_openmp_prologue #ifdef STARPU_MKL void starpu_parallel_worker_gnu_openmp_mkl_prologue(void *); #endif /* STARPU_MKL */ #define STARPU_CLUSTER_MIN_NB STARPU_PARALLEL_WORKER_MIN_NB /**< @deprecated Use ::STARPU_PARALLEL_WORKER_MIN_NB */ #define STARPU_CLUSTER_MAX_NB STARPU_PARALLEL_WORKER_MAX_NB /**< @deprecated Use ::STARPU_PARALLEL_WORKER_MAX_NB */ #define STARPU_CLUSTER_NB STARPU_PARALLEL_WORKER_NB /**< @deprecated Use ::STARPU_PARALLEL_WORKER_NB */ #define STARPU_CLUSTER_PREFERE_MIN STARPU_PARALLEL_WORKER_PREFERE_MIN /**< @deprecated Use ::STARPU_PARALLEL_WORKER_PREFERE_MIN */ #define STARPU_CLUSTER_KEEP_HOMOGENEOUS STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS /**< @deprecated Use ::STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS */ #define STARPU_CLUSTER_POLICY_NAME STARPU_PARALLEL_WORKER_POLICY_NAME /**< @deprecated Use ::STARPU_PARALLEL_WORKER_POLICY_NAME */ #define STARPU_CLUSTER_POLICY_STRUCT STARPU_PARALLEL_WORKER_POLICY_STRUCT /**< @deprecated Use ::STARPU_PARALLEL_WORKER_POLICY_STRUCT */ #define STARPU_CLUSTER_CREATE_FUNC STARPU_PARALLEL_WORKER_CREATE_FUNC /**< @deprecated Use ::STARPU_PARALLEL_WORKER_CREATE_FUNC */ #define STARPU_CLUSTER_CREATE_FUNC_ARG STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG /**< @deprecated Use ::STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG */ #define STARPU_CLUSTER_TYPE STARPU_PARALLEL_WORKER_TYPE /**< @deprecated Use ::STARPU_PARALLEL_WORKER_TYPE */ #define STARPU_CLUSTER_AWAKE_WORKERS STARPU_PARALLEL_WORKER_AWAKE_WORKERS /**< @deprecated Use ::STARPU_PARALLEL_WORKER_AWAKE_WORKERS */ #define STARPU_CLUSTER_PARTITION_ONE STARPU_PARALLEL_WORKER_PARTITION_ONE /**< @deprecated Use ::STARPU_PARALLEL_WORKER_PARTITION_ONE */ #define STARPU_CLUSTER_NEW STARPU_PARALLEL_WORKER_NEW /**< @deprecated Use ::STARPU_PARALLEL_WORKER_NEW */ #define STARPU_CLUSTER_NCORES STARPU_PARALLEL_WORKER_NCORES /**< @deprecated Use ::STARPU_PARALLEL_WORKER_NCORES */ /** @deprecated Use ::starpu_parallel_worker_types */ enum starpu_cluster_types { STARPU_CLUSTER_OPENMP, /**< deprecated */ STARPU_CLUSTER_INTEL_OPENMP_MKL, /**< deprecated */ #ifdef STARPU_MKL STARPU_CLUSTER_GNU_OPENMP_MKL, /**< deprecated */ #endif }; /** @deprecated Use starpu_parallel_worker_config */ struct starpu_cluster_machine; /** @deprecated Use starpu_parallel_worker_init() */ struct starpu_cluster_machine *starpu_cluster_machine(hwloc_obj_type_t cluster_level, ...) STARPU_DEPRECATED; /** @deprecated Use starpu_parallel_worker_shutdown() */ int starpu_uncluster_machine(struct starpu_cluster_machine *clusters) STARPU_DEPRECATED; /** @deprecated Use starpu_parallel_worker_print() */ int starpu_cluster_print(struct starpu_cluster_machine *clusters) STARPU_DEPRECATED; /** @} */ #ifdef __cplusplus } #endif #endif #endif #endif /* __STARPU_PARALLEL_WORKERS_UTIL_H__ */ starpu-1.4.9+dfsg/include/starpu_perf_monitoring.h000066400000000000000000000153111507764646700224040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_PERF_MONITORING_H__ #define __STARPU_PERF_MONITORING_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Perf_Monitoring Performance Monitoring Counters @brief API to access performance monitoring counters. @{ */ /** @name API \anchor PM_API @{ */ /** Enum of all possible performance counter scopes. */ enum starpu_perf_counter_scope { starpu_perf_counter_scope_undefined = 0, /**< undefined scope */ starpu_perf_counter_scope_global = 2, /**< global scope */ starpu_perf_counter_scope_per_worker = 4, /**< per-worker scope */ starpu_perf_counter_scope_per_codelet = 6 /**< per-codelet scope */ }; /** Enum of all possible performance counter value type. */ enum starpu_perf_counter_type { starpu_perf_counter_type_undefined = 0, /**< undefined value type */ starpu_perf_counter_type_int32 = 1, /**< signed 32-bit integer value */ starpu_perf_counter_type_int64 = 2, /**< signed 64-bit integer value */ starpu_perf_counter_type_float = 3, /**< 32-bit single precision floating-point value */ starpu_perf_counter_type_double = 4 /**< 64-bit double precision floating-point value */ }; struct starpu_perf_counter_listener; struct starpu_perf_counter_sample; struct starpu_perf_counter_set; /** Start collecting performance counter values. */ void starpu_perf_counter_collection_start(void); /** Stop collecting performance counter values. */ void starpu_perf_counter_collection_stop(void); /** @} */ /** @name Scope Related Routines @{ */ /** Translate scope name constant string to scope id. */ int starpu_perf_counter_scope_name_to_id(const char *name); /** Translate scope id to scope name constant string. */ const char *starpu_perf_counter_scope_id_to_name(enum starpu_perf_counter_scope scope); /** @} */ /** @name Type Related Routines @{ */ /** Translate type name constant string to type id. */ int starpu_perf_counter_type_name_to_id(const char *name); /** Translate type id to type name constant string. */ const char *starpu_perf_counter_type_id_to_name(enum starpu_perf_counter_type type); /** @} */ /** @name Counter Related Routines @{ */ /** Return the number of performance counters for the given scope. */ int starpu_perf_counter_nb(enum starpu_perf_counter_scope scope); /** Translate a performance counter name to its id. */ int starpu_perf_counter_name_to_id(enum starpu_perf_counter_scope scope, const char *name); /** Translate a performance counter rank in its scope to its counter id. */ int starpu_perf_counter_nth_to_id(enum starpu_perf_counter_scope scope, int nth); /** Translate a counter id to its name constant string. */ const char *starpu_perf_counter_id_to_name(int id); /** Return the counter's type id. */ int starpu_perf_counter_get_type_id(int id); /** Return the counter's help string. */ const char *starpu_perf_counter_get_help_string(int id); /** @} */ /** @name Listener Related Routines @{ */ /** Display the list of counters defined in the given scope. */ void starpu_perf_counter_list_avail(enum starpu_perf_counter_scope scope); /** Display the list of counters defined in all scopes. */ void starpu_perf_counter_list_all_avail(void); /** Allocate a new performance counter set. */ struct starpu_perf_counter_set *starpu_perf_counter_set_alloc(enum starpu_perf_counter_scope scope); /** Free a performance counter set. */ void starpu_perf_counter_set_free(struct starpu_perf_counter_set *set); /** Enable a given counter in the set. */ void starpu_perf_counter_set_enable_id(struct starpu_perf_counter_set *set, int id); /** Disable a given counter in the set. */ void starpu_perf_counter_set_disable_id(struct starpu_perf_counter_set *set, int id); /** Initialize a new performance counter listener. */ struct starpu_perf_counter_listener *starpu_perf_counter_listener_init(struct starpu_perf_counter_set *set, void (*callback)(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context), void *user_arg); /** End a performance counter listener. */ void starpu_perf_counter_listener_exit(struct starpu_perf_counter_listener *listener); /** Set a listener for the global scope. */ void starpu_perf_counter_set_global_listener(struct starpu_perf_counter_listener *listener); /** Set a listener for the per_worker scope on a given worker. */ void starpu_perf_counter_set_per_worker_listener(unsigned workerid, struct starpu_perf_counter_listener *listener); /** Set a common listener for all workers. */ void starpu_perf_counter_set_all_per_worker_listeners(struct starpu_perf_counter_listener *listener); /** Set a per_codelet listener for a codelet. */ void starpu_perf_counter_set_per_codelet_listener(struct starpu_codelet *cl, struct starpu_perf_counter_listener *listener); /** Unset the global listener. */ void starpu_perf_counter_unset_global_listener(void); /** Unset the per_worker listener. */ void starpu_perf_counter_unset_per_worker_listener(unsigned workerid); /** Unset all per_worker listeners. */ void starpu_perf_counter_unset_all_per_worker_listeners(void); /** Unset a per_codelet listener. */ void starpu_perf_counter_unset_per_codelet_listener(struct starpu_codelet *cl); /** @} */ /** @name Sample Related Routines @{ */ /** Read an int32 counter value from a sample. */ int32_t starpu_perf_counter_sample_get_int32_value(struct starpu_perf_counter_sample *sample, const int counter_id); /** Read an int64 counter value from a sample. */ int64_t starpu_perf_counter_sample_get_int64_value(struct starpu_perf_counter_sample *sample, const int counter_id); /** Read a float counter value from a sample. */ float starpu_perf_counter_sample_get_float_value(struct starpu_perf_counter_sample *sample, const int counter_id); /** Read a double counter value from a sample. */ double starpu_perf_counter_sample_get_double_value(struct starpu_perf_counter_sample *sample, const int counter_id); /** @} */ /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_PERF_MONITORING_H__ */ starpu-1.4.9+dfsg/include/starpu_perf_steering.h000066400000000000000000000156531507764646700220500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_PERF_STEERING_H__ #define __STARPU_PERF_STEERING_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Perf_Steering Performance Steering Knobs @brief API to access performance steering counters. @{ */ /** @name API \anchor PM_API @{ */ /** Enum of all possible performance knob scopes. */ enum starpu_perf_knob_scope { starpu_perf_knob_scope_undefined = 0, /**< undefined scope */ starpu_perf_knob_scope_global = 1, /**< global scope */ starpu_perf_knob_scope_per_worker = 3, /**< per-worker scope */ starpu_perf_knob_scope_per_scheduler = 5 /**< per-scheduler scope */ }; /** Enum of all possible performance knob value type. */ enum starpu_perf_knob_type { starpu_perf_knob_type_undefined = 0, /**< undefined value type */ starpu_perf_knob_type_int32 = 1, /**< signed 32-bit integer value */ starpu_perf_knob_type_int64 = 2, /**< signed 64-bit integer value */ starpu_perf_knob_type_float = 3, /**< 32-bit single precision floating-point value */ starpu_perf_knob_type_double = 4 /**< 64-bit double precision floating-point value */ }; /** @} */ /** @name Scope Related Routines @{ */ /** Translate scope name constant string to scope id. */ int starpu_perf_knob_scope_name_to_id(const char *name); /** Translate scope id to scope name constant string. */ const char *starpu_perf_knob_scope_id_to_name(enum starpu_perf_knob_scope scope); /** @} */ /** @name Type Related Routines @{ */ /** Translate type name constant string to type id. */ int starpu_perf_knob_type_name_to_id(const char *name); /** Translate type id to type name constant string. */ const char *starpu_perf_knob_type_id_to_name(enum starpu_perf_knob_type type); /** @} */ /** @name Performance Steering Knob Related Routines @{ */ /** Return the number of performance steering knobs for the given scope. */ int starpu_perf_knob_nb(enum starpu_perf_knob_scope scope); /** Translate a performance knob name to its id. */ int starpu_perf_knob_name_to_id(enum starpu_perf_knob_scope scope, const char *name); /** Translate a performance knob name to its id. */ int starpu_perf_knob_nth_to_id(enum starpu_perf_knob_scope scope, int nth); /** Translate a performance knob rank in its scope to its knob id. */ const char *starpu_perf_knob_id_to_name(int id); /** Translate a knob id to its name constant string. */ int starpu_perf_knob_get_type_id(int id); /** Return the knob's help string. */ const char *starpu_perf_knob_get_help_string(int id); /** Display the list of knobs defined in the given scope. */ void starpu_perf_knob_list_avail(enum starpu_perf_knob_scope scope); /** Display the list of knobs defined in all scopes. */ void starpu_perf_knob_list_all_avail(void); /** Get knob value for Global scope. */ int32_t starpu_perf_knob_get_global_int32_value(const int knob_id); /** Get knob value for Global scope. */ int64_t starpu_perf_knob_get_global_int64_value(const int knob_id); /** Get knob value for Global scope. */ float starpu_perf_knob_get_global_float_value(const int knob_id); /** Get knob value for Global scope. */ double starpu_perf_knob_get_global_double_value(const int knob_id); /** Set int32 knob value for Global scope. */ void starpu_perf_knob_set_global_int32_value(const int knob_id, int32_t new_value); /** Set int64 knob value for Global scope. */ void starpu_perf_knob_set_global_int64_value(const int knob_id, int64_t new_value); /** Set float knob value for Global scope. */ void starpu_perf_knob_set_global_float_value(const int knob_id, float new_value); /** Set double knob value for Global scope. */ void starpu_perf_knob_set_global_double_value(const int knob_id, double new_value); /** Get int32 value for Per_worker scope. */ int32_t starpu_perf_knob_get_per_worker_int32_value(const int knob_id, unsigned workerid); /** Get int64 value for Per_worker scope. */ int64_t starpu_perf_knob_get_per_worker_int64_value(const int knob_id, unsigned workerid); /** Get float value for Per_worker scope. */ float starpu_perf_knob_get_per_worker_float_value(const int knob_id, unsigned workerid); /** Get double value for Per_worker scope. */ double starpu_perf_knob_get_per_worker_double_value(const int knob_id, unsigned workerid); /** Set int32 value for Per_worker scope. */ void starpu_perf_knob_set_per_worker_int32_value(const int knob_id, unsigned workerid, int32_t new_value); /** Set int64 value for Per_worker scope. */ void starpu_perf_knob_set_per_worker_int64_value(const int knob_id, unsigned workerid, int64_t new_value); /** Set float value for Per_worker scope. */ void starpu_perf_knob_set_per_worker_float_value(const int knob_id, unsigned workerid, float new_value); /** Set double value for Per_worker scope. */ void starpu_perf_knob_set_per_worker_double_value(const int knob_id, unsigned workerid, double new_value); /** Get int32 value for per_scheduler scope. */ int32_t starpu_perf_knob_get_per_scheduler_int32_value(const int knob_id, const char *sched_policy_name); /** Get int64 value for per_scheduler scope. */ int64_t starpu_perf_knob_get_per_scheduler_int64_value(const int knob_id, const char *sched_policy_name); /** Get float value for per_scheduler scope. */ float starpu_perf_knob_get_per_scheduler_float_value(const int knob_id, const char *sched_policy_name); /** Get double value for per_scheduler scope. */ double starpu_perf_knob_get_per_scheduler_double_value(const int knob_id, const char *sched_policy_name); /** Set int32 value for per_scheduler scope. */ void starpu_perf_knob_set_per_scheduler_int32_value(const int knob_id, const char *sched_policy_name, int32_t new_value); /** Set int64 value for per_scheduler scope. */ void starpu_perf_knob_set_per_scheduler_int64_value(const int knob_id, const char *sched_policy_name, int64_t new_value); /** Set float value for per_scheduler scope. */ void starpu_perf_knob_set_per_scheduler_float_value(const int knob_id, const char *sched_policy_name, float new_value); /** Set double value for per_scheduler scope. */ void starpu_perf_knob_set_per_scheduler_double_value(const int knob_id, const char *sched_policy_name, double new_value); /** @} */ /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_PERF_STEERING_H__ */ starpu-1.4.9+dfsg/include/starpu_perfmodel.h000066400000000000000000000444071507764646700211700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_PERFMODEL_H__ #define __STARPU_PERFMODEL_H__ #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Performance_Model Performance Model @{ */ struct starpu_task; struct starpu_data_descr; /** todo */ struct starpu_perfmodel_device { enum starpu_worker_archtype type; /**< type of the device */ int devid; /**< identifier of the precise device */ int ncores; /**< number of execution in parallel, minus 1 */ }; /** todo */ struct starpu_perfmodel_arch { int ndevices; /**< number of the devices for the given arch */ struct starpu_perfmodel_device *devices; /**< list of the devices for the given arch */ }; /** todo */ struct starpu_perfmodel_history_entry { double mean; /**< mean_n = 1/n sum */ double deviation; /**< n dev_n = sum2 - 1/n (sum)^2 */ double sum; /**< sum of samples (in µs) */ double sum2; /**< sum of samples^2 */ unsigned nsample; /**< number of samples */ unsigned nerror; uint32_t footprint; /**< data footprint */ size_t size; /**< in bytes */ double flops; /**< Provided by the application */ double duration; starpu_tag_t tag; double *parameters; }; /** todo */ struct starpu_perfmodel_history_list { struct starpu_perfmodel_history_list *next; struct starpu_perfmodel_history_entry *entry; }; /** todo */ struct starpu_perfmodel_regression_model { double sumlny; /**< sum of ln(measured) */ double sumlnx; /**< sum of ln(size) */ double sumlnx2; /**< sum of ln(size)^2 */ unsigned long minx; /**< minimum size */ unsigned long maxx; /**< maximum size */ double sumlnxlny; /**< sum of ln(size)*ln(measured) */ double alpha; /**< estimated = alpha * size ^ beta */ double beta; /**< estimated = alpha * size ^ beta */ unsigned valid; /**< whether the linear regression model is valid (i.e. enough measures) */ double a; /**< estimated = a size ^b + c */ double b; /**< estimated = a size ^b + c */ double c; /**< estimated = a size ^b + c */ unsigned nl_valid; /**< whether the non-linear regression model is valid (i.e. enough measures) */ unsigned nsample; /**< number of sample values for non-linear regression */ double *coeff; /**< list of computed coefficients for multiple linear regression model */ unsigned ncoeff; /**< number of coefficients for multiple linear regression model */ unsigned multi_valid; /**< whether the multiple linear regression model is valid */ }; struct starpu_perfmodel_history_table; #define starpu_per_arch_perfmodel starpu_perfmodel_per_arch STARPU_DEPRECATED typedef double (*starpu_perfmodel_per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); typedef size_t (*starpu_perfmodel_per_arch_size_base)(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); /** information about the performance model of a given arch. */ struct starpu_perfmodel_per_arch { /** Used by ::STARPU_PER_ARCH, must point to functions which take a task, the target arch and implementation number (as mere conveniency, since the array is already indexed by these), and must return a task duration estimation in micro-seconds. */ starpu_perfmodel_per_arch_cost_function cost_function; /** Same as in structure starpu_perfmodel, but per-arch, in case it depends on the architecture-specific implementation. */ starpu_perfmodel_per_arch_size_base size_base; /** \private The history of performance measurements. */ struct starpu_perfmodel_history_table *history; /** \private Used by ::STARPU_HISTORY_BASED, ::STARPU_NL_REGRESSION_BASED and ::STARPU_MULTIPLE_REGRESSION_BASED, records all execution history measures. */ struct starpu_perfmodel_history_list *list; /** \private Used by ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED and ::STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated factors of the regression. */ struct starpu_perfmodel_regression_model regression; char debug_path[256]; }; /** todo */ enum starpu_perfmodel_type { STARPU_PERFMODEL_INVALID = 0, STARPU_PER_WORKER, /**< Application-provided per-worker cost model function */ STARPU_PER_ARCH, /**< Application-provided per-arch cost model function */ STARPU_COMMON, /**< Application-provided common cost model function, with per-arch factor */ STARPU_HISTORY_BASED, /**< Automatic history-based cost model */ STARPU_REGRESSION_BASED, /**< Automatic linear regression-based cost model (alpha * size ^ beta) */ STARPU_NL_REGRESSION_BASED, /**< Automatic non-linear regression-based cost model (a * size ^ b + c) */ STARPU_MULTIPLE_REGRESSION_BASED /**< Automatic multiple linear regression-based cost model. Application provides parameters, their combinations and exponents. */ }; struct _starpu_perfmodel_state; typedef struct _starpu_perfmodel_state *starpu_perfmodel_state_t; /** Contain all information about a performance model. At least the type and symbol fields have to be filled when defining a performance model for a codelet. For compatibility, make sure to initialize the whole structure to zero, either by using explicit memset, or by letting the compiler implicitly do it in e.g. static storage case. If not provided, other fields have to be zero. */ struct starpu_perfmodel { /** type of performance model
    • ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED, ::STARPU_NL_REGRESSION_BASED: No other fields needs to be provided, this is purely history-based.
    • ::STARPU_MULTIPLE_REGRESSION_BASED: Need to provide fields starpu_perfmodel::nparameters (number of different parameters), starpu_perfmodel::ncombinations (number of parameters combinations-tuples) and table starpu_perfmodel::combinations which defines exponents of the equation. Function cl_perf_func also needs to define how to extract parameters from the task.
    • ::STARPU_PER_ARCH: either field starpu_perfmodel::arch_cost_function has to be filled with a function that returns the cost in micro-seconds on the arch given as parameter, or field starpu_perfmodel::per_arch has to be filled with functions which return the cost in micro-seconds.
    • ::STARPU_COMMON: field starpu_perfmodel::cost_function has to be filled with a function that returns the cost in micro-seconds on a CPU, timing on other archs will be determined by multiplying by an arch-specific factor.
    */ enum starpu_perfmodel_type type; /** Used by ::STARPU_COMMON. Take a task and implementation number, and must return a task duration estimation in micro-seconds. */ double (*cost_function)(struct starpu_task *, unsigned nimpl); /** Used by ::STARPU_PER_ARCH. Take a task, an arch and implementation number, and must return a task duration estimation in micro-seconds on that arch. */ double (*arch_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch *arch, unsigned nimpl); /** Used by ::STARPU_PER_WORKER. Take a task, a worker id and implementation number, and must return a task duration estimation in micro-seconds on that worker. */ double (*worker_cost_function)(struct starpu_task *, unsigned workerid, unsigned nimpl); /** Used by ::STARPU_HISTORY_BASED, ::STARPU_REGRESSION_BASED and ::STARPU_NL_REGRESSION_BASED. If not NULL, take a task and implementation number, and return the size to be used as index to distinguish histories and as a base for regressions. */ size_t (*size_base)(struct starpu_task *, unsigned nimpl); /** Used by ::STARPU_HISTORY_BASED. If not NULL, take a task and return the footprint to be used as index to distinguish histories. The default is to use the starpu_task_data_footprint() function. */ uint32_t (*footprint)(struct starpu_task *); /** symbol name for the performance model, which will be used as file name to store the model. It must be set otherwise the model will be ignored. */ const char *symbol; /** name of the file storing the performance model. It is non NULL if the model has been loaded or stored in a file. */ char *path; /** \private Whether the performance model is already loaded from the disk. */ unsigned is_loaded; /** \private */ unsigned benchmarking; /** \private */ unsigned is_init; void (*parameters)(struct starpu_task *task, double *parameters); /** \private Names of parameters used for multiple linear regression models (M, N, K) */ const char **parameters_names; /** \private Number of parameters used for multiple linear regression models */ unsigned nparameters; /** \private Table of combinations of parameters (and the exponents) used for multiple linear regression models */ unsigned **combinations; /** \private Number of combination of parameters used for multiple linear regression models */ unsigned ncombinations; /** \private */ starpu_perfmodel_state_t state; }; /** Initialize the \p model performance model structure. This is automatically called when e.g. submitting a task using a codelet using this performance model. */ void starpu_perfmodel_init(struct starpu_perfmodel *model); /** Deinitialize the \p model performance model structure. You need to call this before deallocating the structure. You will probably want to call starpu_perfmodel_unload_model() before calling this function, to save the perfmodel. */ int starpu_perfmodel_deinit(struct starpu_perfmodel *model); /** starpu_energy_start - start counting hardware events in an event set - \p workerid is the worker on which calibration is to be performed (in the case of GPUs, use -1 for CPUs) - \p archi is the type of architecture on which calibration will be run See \ref MeasuringEnergyandPower for more details. */ int starpu_energy_start(int workerid, enum starpu_worker_archtype archi); /** starpu_energy_stop - stop counting hardware events in an event set - \p model is the energy performance model to be filled with the result - \p task is a task specimen, so the performance model folds the result according to the parameter sizes of the task. - \p nimpl is the implementation number run during calibration - \p ntasks is the number of tasks run during calibration - \p workerid is the worker on which calibration was performed (in the case of GPUs, use -1 for CPUs) - \p archi is the type of architecture on which calibration was run See \ref MeasuringEnergyandPower for more details. */ int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi); /** Load the performance model found in the file named \p filename. \p model has to be completely zero, and will be filled with the information stored in the given file. */ int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model); /** Load a given performance model. \p model has to be completely zero, and will be filled with the information stored in $STARPU_HOME/.starpu. The function is intended to be used by external tools that want to read the performance model files. */ int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model); /** Unload \p model which has been previously loaded through the function starpu_perfmodel_load_symbol() */ int starpu_perfmodel_unload_model(struct starpu_perfmodel *model); /** Save the performance model in its file. */ void starpu_save_history_based_model(struct starpu_perfmodel *model); /** Fills \p path (supposed to be \p maxlen long) with the full path to the performance model file for symbol \p symbol. This path can later on be used for instance with starpu_perfmodel_load_file() . */ void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen); /** Dump performance model \p model to output stream \p output, in XML format. See \ref PerformanceModelExample for more details. */ void starpu_perfmodel_dump_xml(FILE *output, struct starpu_perfmodel *model); /** Free internal memory used for sampling management. It should only be called by an application which is not calling starpu_shutdown() as this function already calls it. See for example tools/starpu_perfmodel_display.c. */ void starpu_perfmodel_free_sampling(void); /** Return the architecture type of the worker \p workerid. */ struct starpu_perfmodel_arch *starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id); int starpu_perfmodel_get_narch_combs(void); int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device *devices); int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices); struct starpu_perfmodel_arch *starpu_perfmodel_arch_comb_fetch(int comb); struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_arch(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned impl); struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, ...); int starpu_perfmodel_set_per_devices_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...); int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...); /** Return the path to the debugging information for the performance model. */ void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl); const char *starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype); /** Return the architecture name for \p arch */ void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl); /** Return the estimated time in µs of a task with the given model and the given footprint. */ double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, uint32_t footprint); /** If starpu_init() is not used, starpu_perfmodel_initialize() should be used called calling starpu_perfmodel_* functions. */ void starpu_perfmodel_initialize(void); /** Print a list of all performance models on \p output */ int starpu_perfmodel_list(FILE *output); void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output); int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output); int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output); int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model); /** Feed the performance model \p model with one explicit measurement (in µs or J), in addition to measurements done by StarPU itself. This can be useful when the application already has an existing set of measurements done in good conditions, that StarPU could benefit from instead of doing on-line measurements. An example of use can be seen in \ref PerformanceModelExample. Note that this records only one measurement, and StarPU would ignore the first measurement (since it is usually disturbed by library loading etc.). Make sure to call this function several times to record all your measurements. You can also call starpu_perfmodel_update_history_n() to directly provide an average performed on several tasks. See \ref PerformanceModelCalibration for more details. */ void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured); /** Feed the performance model \p model with an explicit average measurement (in µs or J). This is similar to starpu_perfmodel_update_history(), but records a batch of \p number measurements provided as the average of the measurements \p average_measured. */ void starpu_perfmodel_update_history_n(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double average_measured, unsigned number); /** Print the directory name storing performance models on \p output */ void starpu_perfmodel_directory(FILE *output); /** Print a matrix of bus bandwidths on \p f. */ void starpu_bus_print_bandwidth(FILE *f); /** Print the affinity devices on \p f. */ void starpu_bus_print_affinity(FILE *f); /** Print on \p f the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency. */ void starpu_bus_print_filenames(FILE *f); /** Return the bandwidth of data transfer between two memory nodes. See \ref SchedulingHelpers for more details. */ double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node); /** Return the latency of data transfer between two memory nodes. See \ref SchedulingHelpers for more details. */ double starpu_transfer_latency(unsigned src_node, unsigned dst_node); /** Return the estimated time to transfer a given size between two memory nodes. See \ref SchedulingHelpers for more details. */ double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size); /** Performance model which just always return 1µs. */ extern struct starpu_perfmodel starpu_perfmodel_nop; /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_PERFMODEL_H__ */ starpu-1.4.9+dfsg/include/starpu_profiling.h000066400000000000000000000312771507764646700212050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_PROFILING_H__ #define __STARPU_PROFILING_H__ #include #include #include #ifdef STARPU_PAPI #include #endif #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Profiling Profiling @{ */ /** Used when calling the function starpu_profiling_status_set() to disable profiling. */ #define STARPU_PROFILING_DISABLE 0 /** Used when calling the function starpu_profiling_status_set() to enable profiling. */ #define STARPU_PROFILING_ENABLE 1 /** Information about the execution of a task. It is accessible from the field starpu_task::profiling_info if profiling was enabled. */ struct starpu_profiling_task_info { /** Date of task submission (relative to the initialization of StarPU). */ struct timespec submit_time; /** Time when the task was submitted to the scheduler. */ struct timespec push_start_time; /** Time when the scheduler finished with the task submission. */ struct timespec push_end_time; /** Time when the scheduler started to be requested for a task, and eventually gave that task. */ struct timespec pop_start_time; /** Time when the scheduler finished providing the task for execution. */ struct timespec pop_end_time; /** Time when the worker started fetching input data. */ struct timespec acquire_data_start_time; /** Time when the worker finished fetching input data. */ struct timespec acquire_data_end_time; /** Date of task execution beginning (relative to the initialization of StarPU). */ struct timespec start_time; /** Date of task execution termination (relative to the initialization of StarPU). */ struct timespec end_time; /** Time when the worker started releasing data. */ struct timespec release_data_start_time; /** Time when the worker finished releasing data. */ struct timespec release_data_end_time; /** Time when the worker started the application callback for the task. */ struct timespec callback_start_time; /** Time when the worker finished the application callback for the task. */ struct timespec callback_end_time; /* TODO add expected length, expected start/end ? */ /** Identifier of the worker which has executed the task. */ int workerid; /** Number of cycles used by the task, only available in the MoviSim */ uint64_t used_cycles; /** Number of cycles stalled within the task, only available in the MoviSim */ uint64_t stall_cycles; /** Energy consumed by the task, in Joules */ double energy_consumed; #ifdef STARPU_PAPI /** PAPI Events **/ long long int papi_values[PAPI_MAX_HWCTRS]; int papi_event_set; #endif }; /** Profiling information associated to a worker. The timing is provided since the previous call to starpu_profiling_worker_get_info(). The executing_time, callback_time, waiting_time, sleeping_time, and scheduling_time are exclusive to each other, i.e. they can be added up, their sum is smaller than total_time. The difference between total_time and the sum is the uncategorized runtime overhead. */ struct starpu_profiling_worker_info { /** Starting date for the reported profiling measurements. */ struct timespec start_time; /** Duration of the profiling measurement interval. */ struct timespec total_time; /** Time spent by the worker to execute tasks during the profiling measurement interval. */ struct timespec executing_time; /** Time spent by the worker to execute callbacks, while not executing a * task, during the profiling measurement interval. */ struct timespec callback_time; /** Time spent by the worker waiting for a data transfer to finish, * while not executing a task or a callback, during the profiling * measurement interval. */ struct timespec waiting_time; /** Time spent idling by the worker because no task were available, and * not executing a task or a callback or waiting for a data transfer to * finish, during the profiling measurement interval. */ struct timespec sleeping_time; /** Time spent by the worker scheduling tasks, while not executing a * task or a callback or waiting for a data transfer to finish, and there * are tasks to be scheduled, during the profiling measurement interval. */ struct timespec scheduling_time; /** Time spent by the worker to execute tasks during the profiling measurement interval. * Normally always equal to executing_time. */ struct timespec all_executing_time; /** Time spent by the worker to execute callbacks during the profiling measurement interval. * Normally always greater than callback_time. */ struct timespec all_callback_time; /** Time spent by the worker waiting for a data transfer to finish during the profiling measurement interval. * Normally always greater than waiting_time. */ struct timespec all_waiting_time; /** Time spent idling by the worker because no task were available during the profiling measurement interval. * Normally always greater than sleeping_time. */ struct timespec all_sleeping_time; /** Time spent by the worker scheduling tasks during the profiling measurement interval. * Normally always greater than scheduling_time. */ struct timespec all_scheduling_time; /** Number of tasks executed by the worker during the profiling measurement interval. */ int executed_tasks; /** Number of cycles used by the worker, only available in the MoviSim */ uint64_t used_cycles; /** Number of cycles stalled within the worker, only available in the MoviSim */ uint64_t stall_cycles; /** Energy consumed by the worker, in Joules */ double energy_consumed; /* TODO: add wasted time due to failed tasks */ double flops; }; /** todo */ struct starpu_profiling_bus_info { /** Time of bus profiling startup. */ struct timespec start_time; /** Total time of bus profiling. */ struct timespec total_time; /** Number of bytes transferred during profiling. */ int long long transferred_bytes; /** Number of transfers during profiling. */ int transfer_count; }; /** Reset performance counters and enable profiling if the environment variable \ref STARPU_PROFILING is set to a positive value. See \ref EnablingOn-linePerformanceMonitoring for more details. */ void starpu_profiling_init(void); /** Set the ID used for profiling trace filename. Has to be called before starpu_init(). See \ref TraceMpi for more details. */ void starpu_profiling_set_id(int new_id); /** Set the profiling status. Profiling is activated by passing \ref STARPU_PROFILING_ENABLE in \p status. Passing \ref STARPU_PROFILING_DISABLE disables profiling. Calling this function resets all profiling measurements. When profiling is enabled, the field starpu_task::profiling_info points to a valid structure starpu_profiling_task_info containing information about the execution of the task. Negative return values indicate an error, otherwise the previous status is returned. See \ref EnablingOn-linePerformanceMonitoring for more details. */ int starpu_profiling_status_set(int status); /** Return the current profiling status or a negative value in case there was an error. See \ref EnablingOn-linePerformanceMonitoring for more details. */ int starpu_profiling_status_get(void); #ifdef BUILDING_STARPU #include #ifdef __GNUC__ extern int _starpu_profiling; #define starpu_profiling_status_get() ( \ { \ int __ret; \ ANNOTATE_HAPPENS_AFTER(&_starpu_profiling); \ __ret = _starpu_profiling; \ ANNOTATE_HAPPENS_BEFORE(&_starpu_profiling); \ __ret; \ }) #endif #endif /** Get the profiling info associated to the worker identified by \p workerid, and reset the profiling measurements. If the argument \p worker_info is NULL, only reset the counters associated to worker \p workerid. Upon successful completion, this function returns 0. Otherwise, a negative value is returned. See \ref Per-workerFeedback for more details. */ int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *worker_info); /** Return the number of buses in the machine. See \ref HardwareTopology for more details. */ int starpu_bus_get_count(void); /** Return the identifier of the bus between \p src and \p dst. See \ref HardwareTopology for more details. */ int starpu_bus_get_id(int src, int dst); /** Return the source point of bus \p busid. See \ref HardwareTopology for more details. */ int starpu_bus_get_src(int busid); /** Return the destination point of bus \p busid. See \ref HardwareTopology for more details. */ int starpu_bus_get_dst(int busid); /** See \ref HardwareTopology for more details. */ void starpu_bus_set_direct(int busid, int direct); /** See \ref HardwareTopology for more details. */ int starpu_bus_get_direct(int busid); /** See \ref HardwareTopology for more details. */ void starpu_bus_set_ngpus(int busid, int ngpus); /** See \ref HardwareTopology for more details. */ int starpu_bus_get_ngpus(int busid); /** See _starpu_profiling_bus_helper_display_summary in src/profiling/profiling_helpers.c for a usage example. Note that calling starpu_bus_get_profiling_info() resets the counters to zero. See \ref FeedBackFigures for more details. */ int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info); /* Some helper functions to manipulate profiling API output */ /* Reset timespec */ static __starpu_inline void starpu_timespec_clear(struct timespec *tsp) { tsp->tv_sec = 0; tsp->tv_nsec = 0; } #define STARPU_NS_PER_S 1000000000 /* Computes result = a + b */ static __starpu_inline void starpu_timespec_add(struct timespec *a, struct timespec *b, struct timespec *result) { result->tv_sec = a->tv_sec + b->tv_sec; result->tv_nsec = a->tv_nsec + b->tv_nsec; if (result->tv_nsec >= STARPU_NS_PER_S) { ++(result)->tv_sec; result->tv_nsec -= STARPU_NS_PER_S; } } /* Computes res += b */ static __starpu_inline void starpu_timespec_accumulate(struct timespec *result, struct timespec *a) { result->tv_sec += a->tv_sec; result->tv_nsec += a->tv_nsec; if (result->tv_nsec >= STARPU_NS_PER_S) { ++(result)->tv_sec; result->tv_nsec -= STARPU_NS_PER_S; } } /* Computes result = a - b */ static __starpu_inline void starpu_timespec_sub(const struct timespec *a, const struct timespec *b, struct timespec *result) { result->tv_sec = a->tv_sec - b->tv_sec; result->tv_nsec = a->tv_nsec - b->tv_nsec; if ((result)->tv_nsec < 0) { --(result)->tv_sec; result->tv_nsec += STARPU_NS_PER_S; } } #define starpu_timespec_cmp(a, b, CMP) \ (((a)->tv_sec == (b)->tv_sec) ? ((a)->tv_nsec CMP(b)->tv_nsec) : ((a)->tv_sec CMP(b)->tv_sec)) /** Return the time elapsed between \p start and \p end in microseconds. See \ref Per-taskFeedback for more details. */ double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end); /** Convert the given timespec \p ts into microseconds. See \ref Per-taskFeedback for more details. */ double starpu_timing_timespec_to_us(struct timespec *ts); /** Display statistics about the bus on \c stderr. if the environment variable \ref STARPU_BUS_STATS is defined. The function is called automatically by starpu_shutdown(). See \ref DataStatistics for more details. */ void starpu_profiling_bus_helper_display_summary(void); /** Display statistic about the workers on \c stderr if the environment variable \ref STARPU_WORKER_STATS is defined. The function is called automatically by starpu_shutdown(). See \ref DataStatistics for more details. */ void starpu_profiling_worker_helper_display_summary(void); /** Display statistics about the current data handles registered within StarPU. StarPU must have been configured with the configure option \ref enable-memory-stats "--enable-memory-stats" (see \ref MemoryFeedback). See \ref MemoryFeedback for more details. */ void starpu_data_display_memory_stats(void); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_PROFILING_H__ */ starpu-1.4.9+dfsg/include/starpu_profiling_tool.h000066400000000000000000000074351507764646700222410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_PROFILING_TOOL_H__ #define __STARPU_PROFILING_TOOL_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Profiling_Tool Profiling Tool @{ */ /** Event type */ enum starpu_prof_tool_event { starpu_prof_tool_event_none = 0, starpu_prof_tool_event_init, starpu_prof_tool_event_terminate, starpu_prof_tool_event_init_begin, starpu_prof_tool_event_init_end, starpu_prof_tool_event_driver_init, starpu_prof_tool_event_driver_deinit, starpu_prof_tool_event_driver_init_start, starpu_prof_tool_event_driver_init_end, starpu_prof_tool_event_start_cpu_exec, starpu_prof_tool_event_end_cpu_exec, starpu_prof_tool_event_start_gpu_exec, starpu_prof_tool_event_end_gpu_exec, starpu_prof_tool_event_start_transfer, starpu_prof_tool_event_end_transfer, starpu_prof_tool_event_user_start, starpu_prof_tool_event_user_end }; /** todo */ enum starpu_prof_tool_driver_type { starpu_prof_tool_driver_cpu, starpu_prof_tool_driver_gpu, starpu_prof_tool_driver_hip, starpu_prof_tool_driver_ocl }; /** todo */ enum starpu_prof_tool_command { starpu_prof_tool_command_reg = 0, starpu_prof_tool_command_toggle = 1, starpu_prof_tool_command_toggle_per_thread = 2 }; /** General information */ struct starpu_prof_tool_info { struct starpu_conf *conf; enum starpu_prof_tool_event event_type; unsigned int starpu_version[3]; int thread_id; int worker_id; int device_number; enum starpu_prof_tool_driver_type driver_type; // not sure unsigned memnode; unsigned bytes_to_transfer; unsigned bytes_transfered; void* fun_ptr; /* NULL when not relevant (driver init etc) */ /* int valid_bytes; int version; starpu_device_t device_type; int device_number; starpu_ssize_t async; starpu_ssize_t async_queue; const char* src_file; const char* func_name; int line_no, end_line_no; int func_line_no, func_end_line_no;*/ }; /** Event info */ union starpu_prof_tool_event_info { enum starpu_prof_tool_event event_type; /* starpu_data_event_info data_event; starpu_launch_event_info launch_event; starpu_other_event_info other_event;*/ }; /** API info */ struct starpu_prof_tool_api_info { /*acc_device_api device_api; int valid_bytes; acc_device_t device_type; int vendor; const void* device_handle; const void* context_handle; const void* async_handle;*/ }; typedef void (*starpu_prof_tool_cb_func)(struct starpu_prof_tool_info*, union starpu_prof_tool_event_info*, struct starpu_prof_tool_api_info*); /** Register / unregister events */ typedef void (*starpu_prof_tool_entry_register_func)(enum starpu_prof_tool_event event_type, starpu_prof_tool_cb_func cb, enum starpu_prof_tool_command info); /** A function with this signature must be implemented by external tools that want to use the callbacks */ typedef void (*starpu_prof_tool_entry_func)(starpu_prof_tool_entry_register_func reg, starpu_prof_tool_entry_register_func unreg); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_PROFILING_TOOL_H__ */ starpu-1.4.9+dfsg/include/starpu_rand.h000066400000000000000000000060571507764646700201360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_RAND_H__ #define __STARPU_RAND_H__ #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Random_Functions Random Functions @{ */ #ifdef STARPU_SIMGRID /* In simgrid mode, force using seed 0 by default to get reproducible behavior by default */ #define starpu_seed(seed) starpu_getenv_number_default("STARPU_RAND_SEED", 0) #else #define starpu_seed(seed) starpu_getenv_number_default("STARPU_RAND_SEED", (seed)) #endif #ifdef STARPU_USE_DRAND48 #define starpu_srand48(seed) srand48(starpu_seed(seed)) #define starpu_drand48() drand48() #define starpu_lrand48() lrand48() #define starpu_erand48(xsubi) erand48(xsubi) #ifdef STARPU_USE_ERAND48_R typedef struct drand48_data starpu_drand48_data; #define starpu_srand48_r(seed, buffer) srand48_r(starpu_seed(seed), buffer) #define starpu_drand48_r(buffer, result) drand48_r(buffer, result) #define starpu_lrand48_r(buffer, result) lrand48_r(buffer, result) #define starpu_erand48_r(xsubi, buffer, result) erand48_r(xsubi, buffer, result) #else typedef int starpu_drand48_data; #define starpu_srand48_r(seed, buffer) srand48(starpu_seed(seed)) #define starpu_drand48_r(buffer, result) \ do { \ *(result) = drand48(); \ } \ while (0) #define starpu_lrand48_r(buffer, result) \ do { \ *(result) = lrand48(); \ } \ while (0) #define starpu_erand48_r(xsubi, buffer, result) \ do { \ (void)buffer; \ *(result) = erand48(xsubi); \ } \ while (0) #endif #else typedef int starpu_drand48_data; #define starpu_srand48(seed) srand(starpu_seed(seed)) #define starpu_drand48() (double)(rand()) / RAND_MAX #define starpu_lrand48() rand() #define starpu_erand48(xsubi) starpu_drand48() #define starpu_srand48_r(seed, buffer) srand(starpu_seed(seed)) #define starpu_erand48_r(xsubi, buffer, result) \ do { \ (void)xsubi; \ (void)buffer; \ *(result) = ((double)(rand()) / RAND_MAX); \ } \ while (0) #endif /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_RAND_H__ */ starpu-1.4.9+dfsg/include/starpu_sched_component.h000066400000000000000000001016551507764646700223620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * Copyright (C) 2017-2017 Arthur Chevalier * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_SCHED_COMPONENT_H__ #define __STARPU_SCHED_COMPONENT_H__ #ifdef STARPU_HAVE_HWLOC #include #endif #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Modularized_Scheduler Modularized Scheduler Interface @{ */ /** flags for starpu_sched_component::properties */ enum starpu_sched_component_properties { STARPU_SCHED_COMPONENT_HOMOGENEOUS = (1 << 0), /**< indicate that all workers have the same starpu_worker_archtype */ STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE = (1 << 1) /**< indicate that all workers have the same memory component */ }; /** indicate if component is homogeneous */ #define STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component) ((component)->properties & STARPU_SCHED_COMPONENT_HOMOGENEOUS) /** indicate if all workers have the same memory component */ #define STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE(component) ((component)->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE) /** Structure for a scheduler module. A scheduler is a tree-like structure of them, some parts of scheduler can be shared by several contexes to perform some local optimisations, so, for all components, a list of parent is defined by \c sched_ctx_id. They embed there specialised method in a pseudo object-style, so calls are like component->push_task(component,task) */ struct starpu_sched_component { /** The tree containing the component*/ struct starpu_sched_tree *tree; /** set of underlying workers */ struct starpu_bitmap workers; /** subset of starpu_sched_component::workers that is currently available in the context The push method should take this value into account, it is set with: component->workers UNION tree->workers UNION component->child[i]->workers_in_ctx iff exist x such as component->children[i]->parents[x] == component */ struct starpu_bitmap workers_in_ctx; /** private data */ void *data; char *name; /** number of compoments's children */ unsigned nchildren; /** vector of component's children */ struct starpu_sched_component **children; /** number of component's parents */ unsigned nparents; /** vector of component's parents */ struct starpu_sched_component **parents; /** add a child to component */ void (*add_child)(struct starpu_sched_component *component, struct starpu_sched_component *child); /** remove a child from component */ void (*remove_child)(struct starpu_sched_component *component, struct starpu_sched_component *child); void (*add_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent); void (*remove_parent)(struct starpu_sched_component *component, struct starpu_sched_component *parent); /** push a task in the scheduler module. this function is called to push a task on component subtree, this can either perform a recursive call on a child or store the task in the component, then it will be returned by a further pull_task call. the caller must ensure that component is able to execute task. This method must either return 0 if it the task was properly stored or passed over to a child component, or return a value different from 0 if the task could not be consumed (e.g. the queue is full). */ int (*push_task)(struct starpu_sched_component *, struct starpu_task *); /** pop a task from the scheduler module. this function is called by workers to get a task from their parents. this function should first return a locally stored task or perform a recursive call on the parents. the task returned by this function should be executable by the caller */ struct starpu_task *(*pull_task)(struct starpu_sched_component *from, struct starpu_sched_component *to); /** This function is called by a component which implements a queue, allowing it to signify to its parents that an empty slot is available in its queue. This should return 1 if some tasks could be pushed The basic implementation of this function is a recursive call to its parents, the user has to specify a personally-made function to catch those calls. */ int (*can_push)(struct starpu_sched_component *from, struct starpu_sched_component *to); /** This function allow a component to wake up a worker. It is currently called by component which implements a queue, to signify to its children that a task have been pushed in its local queue, and is available to be popped by a worker, for example. This should return 1 if some some container or worker could (or will) pull some tasks. The basic implementation of this function is a recursive call to its children, until at least one worker have been woken up. */ int (*can_pull)(struct starpu_sched_component *component); /** This function is called when starpu_do_schedule() is called by the application. */ void (*do_schedule)(struct starpu_sched_component *component); int (*notify)(struct starpu_sched_component *component, int message_ID, void *arg); /** heuristic to compute load of scheduler module. Basically the number of tasks divided by the sum of relatives speedup of workers available in context. estimated_load(component) = sum(estimated_load(component_children)) + nb_local_tasks / average(relative_speedup(underlying_worker)) */ double (*estimated_load)(struct starpu_sched_component *component); /** return the time when a worker will enter in starvation. This function is relevant only if the task->predicted member has been set. */ double (*estimated_end)(struct starpu_sched_component *component); /** called by starpu_sched_component_destroy. Should free data allocated during creation */ void (*deinit_data)(struct starpu_sched_component *component); /** this function is called for each component when workers are added or removed from a context */ void (*notify_change_workers)(struct starpu_sched_component *component); int properties; #ifdef STARPU_HAVE_HWLOC /** the hwloc object associated to scheduler module. points to the part of topology that is binded to this component, eg: a numa node for a ws component that would balance load between underlying sockets */ hwloc_obj_t obj; #else void *obj; #endif }; /** The actual scheduler */ struct starpu_sched_tree { /** entry module of the scheduler */ struct starpu_sched_component *root; /** set of workers available in this context, this value is used to mask workers in modules */ struct starpu_bitmap workers; /** context id of the scheduler */ unsigned sched_ctx_id; /** lock used to protect the scheduler, it is taken in read mode pushing a task and in write mode for adding or removing workers */ starpu_pthread_mutex_t lock; }; /** @name Scheduling Tree API @{ */ /** create a empty initialized starpu_sched_tree. See \ref ImplementAModularizedScheduler for more details. */ struct starpu_sched_tree *starpu_sched_tree_create(unsigned sched_ctx_id) STARPU_ATTRIBUTE_MALLOC; /** destroy tree and free all non shared component in it. See \ref ImplementAModularizedScheduler for more details. */ void starpu_sched_tree_destroy(struct starpu_sched_tree *tree); /** calls starpu_sched_tree_destroy, ready for use for starpu_sched_policy::deinit_sched field. See \ref ImplementAModularizedScheduler for more details. */ void starpu_sched_tree_deinitialize(unsigned sched_ctx_id); /** See \ref ImplementAModularizedScheduler for more details. */ struct starpu_sched_tree *starpu_sched_tree_get(unsigned sched_ctx_id); /** recursively set all starpu_sched_component::workers, do not take into account shared parts (except workers). See \ref ImplementAModularizedScheduler for more details. */ void starpu_sched_tree_update_workers(struct starpu_sched_tree *t); /** recursively set all starpu_sched_component::workers_in_ctx, do not take into account shared parts (except workers) See \ref ImplementAModularizedScheduler for more details. */ void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree *t); /** compatibility with starpu_sched_policy interface. See \ref ImplementAModularizedScheduler for more details. */ int starpu_sched_tree_push_task(struct starpu_task *task); /** compatibility with starpu_sched_policy interface. See \ref ImplementAModularizedScheduler for more details. */ struct starpu_task *starpu_sched_tree_pop_task(unsigned sched_ctx); /** Push a task to a component. This is a helper for component->push_task(component, task) plus tracing. */ int starpu_sched_component_push_task(struct starpu_sched_component *from, struct starpu_sched_component *to, struct starpu_task *task); /** Pull a task from a component. This is a helper for component->pull_task(component) plus tracing. */ struct starpu_task *starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to); struct starpu_task *starpu_sched_component_pump_to(struct starpu_sched_component *component, struct starpu_sched_component *to, int *success); struct starpu_task *starpu_sched_component_pump_downstream(struct starpu_sched_component *component, int *success); int starpu_sched_component_send_can_push_to_parents(struct starpu_sched_component *component); /** compatibility with starpu_sched_policy interface */ void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers); /** compatibility with starpu_sched_policy interface */ void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers); /** Run the do_schedule method of the components. This is a helper for starpu_sched_policy::do_schedule. */ void starpu_sched_tree_do_schedule(unsigned sched_ctx_id); /** Attach component \p child to parent \p parent. Some component may accept only one child, others accept several (e.g. MCT) */ void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child); /** @} */ /** @name Generic Scheduling Component API @{ */ typedef struct starpu_sched_component *(*starpu_sched_component_create_t)(struct starpu_sched_tree *tree, void *data); /** allocate and initialize component field with defaults values : .pop_task make recursive call on father .estimated_load compute relative speedup and tasks in sub tree .estimated_end return the minimum of recursive call on children .add_child is starpu_sched_component_add_child .remove_child is starpu_sched_component_remove_child .notify_change_workers does nothing .deinit_data does nothing */ struct starpu_sched_component *starpu_sched_component_create(struct starpu_sched_tree *tree, const char *name) STARPU_ATTRIBUTE_MALLOC; /** free data allocated by starpu_sched_component_create and call component->deinit_data(component) set to NULL the member starpu_sched_component::fathers[sched_ctx_id] of all child if its equal to \p component */ void starpu_sched_component_destroy(struct starpu_sched_component *component); /** recursively destroy non shared parts of a \p component 's tree */ void starpu_sched_component_destroy_rec(struct starpu_sched_component *component); void starpu_sched_component_add_child(struct starpu_sched_component *component, struct starpu_sched_component *child); /** return true iff \p component can execute \p task, this function take into account the workers available in the scheduling context */ int starpu_sched_component_can_execute_task(struct starpu_sched_component *component, struct starpu_task *task); /** return a non NULL value if \p component can execute \p task. write the execution prediction length for the best implementation of the best worker available and write this at \p length address. this result is more relevant if starpu_sched_component::is_homogeneous is non NULL. if a worker need to be calibrated for an implementation, nan is set to \p length. */ int STARPU_WARN_UNUSED_RESULT starpu_sched_component_execute_preds(struct starpu_sched_component *component, struct starpu_task *task, double *length); /** return the average time to transfer \p task data to underlying \p component workers. */ double starpu_sched_component_transfer_length(struct starpu_sched_component *component, struct starpu_task *task); void starpu_sched_component_prefetch_on_node(struct starpu_sched_component *component, struct starpu_task *task); /** @} */ /** @name Worker Component API @{ */ /** return the struct starpu_sched_component corresponding to \p workerid. Undefined if \p workerid is not a valid workerid */ struct starpu_sched_component *starpu_sched_component_worker_get(unsigned sched_ctx, int workerid); struct starpu_sched_component *starpu_sched_component_worker_new(unsigned sched_ctx, int workerid); /** Create a combined worker that pushes tasks in parallel to workers \p workers (size \p nworkers). */ struct starpu_sched_component *starpu_sched_component_parallel_worker_create(struct starpu_sched_tree *tree, unsigned nworkers, unsigned *workers); /** return the workerid of \p worker_component, undefined if starpu_sched_component_is_worker(worker_component) == 0 */ int starpu_sched_component_worker_get_workerid(struct starpu_sched_component *worker_component); /** return true iff \p component is a worker component */ int starpu_sched_component_is_worker(struct starpu_sched_component *component); /** return true iff \p component is a simple worker component */ int starpu_sched_component_is_simple_worker(struct starpu_sched_component *component); /** return true iff \p component is a combined worker component */ int starpu_sched_component_is_combined_worker(struct starpu_sched_component *component); /** compatibility with starpu_sched_policy interface update predictions for workers */ void starpu_sched_component_worker_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id); /** compatibility with starpu_sched_policy interface */ void starpu_sched_component_worker_post_exec_hook(struct starpu_task *task, unsigned sched_ctx_id); /** @} */ /** @name Flow-control Fifo Component API These can be used as methods of components. Note: they are not to be called directly, one should really call the methods of the components. @{ */ /** default function for the pull component method, just call pull of parents until one of them returns a task */ struct starpu_task *starpu_sched_component_parents_pull_task(struct starpu_sched_component *component, struct starpu_sched_component *to); /** default function for the can_push component method, just call can_push of parents until one of them returns non-zero */ int starpu_sched_component_can_push(struct starpu_sched_component *component, struct starpu_sched_component *to); /** default function for the can_pull component method, just call can_pull of children until one of them returns non-zero */ int starpu_sched_component_can_pull(struct starpu_sched_component *component); /** function for the can_pull component method, call can_pull of all children */ int starpu_sched_component_can_pull_all(struct starpu_sched_component *component); /** default function for the estimated_load component method, just sum up the loads of the children of the component. */ double starpu_sched_component_estimated_load(struct starpu_sched_component *component); /** function that can be used for the estimated_end component method, compute the minimum completion time of the children. */ double starpu_sched_component_estimated_end_min(struct starpu_sched_component *component); /** function that can be used for the estimated_end component method, compute the minimum completion time of the children, and add to it an estimation of how existing queued work, plus the exp_len work, can be completed. This is typically used instead of starpu_sched_component_estimated_end_min when the component contains a queue of tasks, which thus needs to be added to the estimations. */ double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component *component, double exp_len); /** default function for the estimated_end component method, compute the average completion time of the children. */ double starpu_sched_component_estimated_end_average(struct starpu_sched_component *component); /** todo */ struct starpu_sched_component_fifo_data { unsigned ntasks_threshold; double exp_len_threshold; int ready; int exp; }; /** Return a struct starpu_sched_component with a fifo. A stable sort is performed according to tasks priorities. A push_task call on this component does not perform recursive calls, underlying components will have to call pop_task to get it. starpu_sched_component::estimated_end function compute the estimated length by dividing the sequential length by the number of underlying workers. */ struct starpu_sched_component *starpu_sched_component_fifo_create(struct starpu_sched_tree *tree, struct starpu_sched_component_fifo_data *fifo_data) STARPU_ATTRIBUTE_MALLOC; /** return true iff \p component is a fifo component */ int starpu_sched_component_is_fifo(struct starpu_sched_component *component); /** @} */ /** @name Flow-control Prio Component API @{ */ /** todo */ struct starpu_sched_component_prio_data { unsigned ntasks_threshold; double exp_len_threshold; int ready; int exp; }; struct starpu_sched_component *starpu_sched_component_prio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_prio_data *prio_data) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_prio(struct starpu_sched_component *component); /** @} */ /** @name Resource-mapping Work-Stealing Component API @{ */ /** return a component that perform a work stealing scheduling. Tasks are pushed in a round robin way. estimated_end return the average of expected length of fifos, starting at the average of the expected_end of his children. When a worker have to steal a task, it steal a task in a round robin way, and get the last pushed task of the higher priority. */ struct starpu_sched_component *starpu_sched_component_work_stealing_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; /** return true iff \p component is a work stealing component */ int starpu_sched_component_is_work_stealing(struct starpu_sched_component *component); /** undefined if there is no work stealing component in the scheduler. If any, \p task is pushed in a default way if the caller is the application, and in the caller's fifo if its a worker. */ int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task); /** @} */ /** @name Resource-mapping Random Component API @{ */ /** create a component that perform a random scheduling */ struct starpu_sched_component *starpu_sched_component_random_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; /** return true iff \p component is a random component */ int starpu_sched_component_is_random(struct starpu_sched_component *); /** @} */ /** @name Resource-mapping Eager Component API @{ */ struct starpu_sched_component *starpu_sched_component_eager_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_eager(struct starpu_sched_component *); /** @} */ /** @name Resource-mapping Eager Prio Component API @{ */ struct starpu_sched_component *starpu_sched_component_eager_prio_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_eager_prio(struct starpu_sched_component *); /** @} */ /** @name Resource-mapping Eager-Calibration Component API @{ */ struct starpu_sched_component *starpu_sched_component_eager_calibration_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_eager_calibration(struct starpu_sched_component *); /** @} */ /** @name Resource-mapping MCT Component API @{ */ /** todo */ struct starpu_sched_component_mct_data { double alpha; double beta; double _gamma; double idle_power; int nolock; }; /** create a component with mct_data parameters. the mct component does not do anything but pushing tasks on no_perf_model_component and calibrating_component */ struct starpu_sched_component *starpu_sched_component_mct_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_mct(struct starpu_sched_component *component); /** @} */ /** @name Resource-mapping Heft Component API @{ */ struct starpu_sched_component *starpu_sched_component_heft_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data *mct_data) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_heft(struct starpu_sched_component *component); /** @} */ /** @name Resource-mapping Heteroprio Component API @{ */ /** todo */ struct starpu_sched_component_heteroprio_data { struct starpu_sched_component_mct_data *mct; unsigned batch; }; struct starpu_sched_component *starpu_sched_component_heteroprio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_heteroprio_data *params) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_heteroprio(struct starpu_sched_component *component); /** @} */ /** @name Special-purpose Best_Implementation Component API @{ */ /** Select the implementation that offer the shortest computation length for the first worker that can execute the task. Or an implementation that need to be calibrated. Also set starpu_task::predicted and starpu_task::predicted_transfer for memory component of the first suitable workerid. If starpu_sched_component::push method is called and starpu_sched_component::nchild > 1 the result is undefined. */ struct starpu_sched_component *starpu_sched_component_best_implementation_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; /** @} */ /** @name Special-purpose Perfmodel_Select Component API @{ */ /** todo */ struct starpu_sched_component_perfmodel_select_data { struct starpu_sched_component *calibrator_component; struct starpu_sched_component *no_perfmodel_component; struct starpu_sched_component *perfmodel_component; }; struct starpu_sched_component *starpu_sched_component_perfmodel_select_create(struct starpu_sched_tree *tree, struct starpu_sched_component_perfmodel_select_data *perfmodel_select_data) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_perfmodel_select(struct starpu_sched_component *component); /** @} */ /** @name Staged pull Component API @{ */ struct starpu_sched_component *starpu_sched_component_stage_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_stage(struct starpu_sched_component *component); /** @} */ /** @name User-choice push Component API @{ */ struct starpu_sched_component *starpu_sched_component_userchoice_create(struct starpu_sched_tree *tree, void *arg) STARPU_ATTRIBUTE_MALLOC; int starpu_sched_component_is_userchoice(struct starpu_sched_component *component); /** @} */ /** @name Recipe Component API @{ */ /** parameters for starpu_sched_component_composed_component_create */ struct starpu_sched_component_composed_recipe; /** return an empty recipe for a composed component, it should not be used without modification. See \ref ImplementAModularizedScheduler for more details. */ struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create(void) STARPU_ATTRIBUTE_MALLOC; /** return a recipe to build a composed component with a \p create_component */ struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create_singleton(struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg) STARPU_ATTRIBUTE_MALLOC; /** add \p create_component under all previous components in recipe */ void starpu_sched_component_composed_recipe_add(struct starpu_sched_component_composed_recipe *recipe, struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void *arg), void *arg); /** destroy composed_sched_component, this should be done after starpu_sched_component_composed_component_create was called */ void starpu_sched_component_composed_recipe_destroy(struct starpu_sched_component_composed_recipe *); /** create a component that behave as all component of recipe where linked. Except that you can not use starpu_sched_component_is_foo function if recipe contain a single create_foo arg_foo pair, create_foo(arg_foo) is returned instead of a composed component */ struct starpu_sched_component *starpu_sched_component_composed_component_create(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe *recipe) STARPU_ATTRIBUTE_MALLOC; #ifdef STARPU_HAVE_HWLOC /** Define how build a scheduler according to topology. Each level (except for hwloc_machine_composed_sched_component) can be NULL, then the level is just skipped. Bugs everywhere, do not rely on. */ struct starpu_sched_component_specs { /** the composed component to put on the top of the scheduler this member must not be NULL as it is the root of the topology */ struct starpu_sched_component_composed_recipe *hwloc_machine_composed_sched_component; /** the composed component to put for each memory component */ struct starpu_sched_component_composed_recipe *hwloc_component_composed_sched_component; /** the composed component to put for each socket */ struct starpu_sched_component_composed_recipe *hwloc_socket_composed_sched_component; /** the composed component to put for each cache */ struct starpu_sched_component_composed_recipe *hwloc_cache_composed_sched_component; /** a function that return a starpu_sched_component_composed_recipe to put on top of a worker of type \p archtype. NULL is a valid return value, then no component will be added on top */ struct starpu_sched_component_composed_recipe *(*worker_composed_sched_component)(enum starpu_worker_archtype archtype); /** this flag is a dirty hack because of the poor expressivity of this interface. As example, if you want to build a heft component with a fifo component per numa component, and you also have GPUs, if this flag is set, GPUs will share those fifos. If this flag is not set, a new fifo will be built for each of them (if they have the same starpu_perf_arch and the same numa component it will be shared. it indicates if heterogeneous workers should be brothers or cousins, as example, if a gpu and a cpu should share or not there numa node */ int mix_heterogeneous_workers; }; /** build a scheduler for \p sched_ctx_id according to \p s and the hwloc topology of the machine. */ struct starpu_sched_tree *starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs s); #endif /* STARPU_HAVE_HWLOC */ /** @name Basic API @{ */ #define STARPU_SCHED_SIMPLE_DECIDE_MASK (3 << 0) /** Request to create downstream queues per worker, i.e. the scheduling decision-making component will choose exactly which workers tasks should got to. */ #define STARPU_SCHED_SIMPLE_DECIDE_WORKERS (1 << 0) /** Request to create downstream queues per memory nodes, i.e. the scheduling decision-making component will choose which memory node tasks will go to. */ #define STARPU_SCHED_SIMPLE_DECIDE_MEMNODES (2 << 0) /** Request to create downstream queues per computation arch, i.e. the scheduling decision-making component will choose whether tasks go to CPUs, or CUDA, or OpenCL, etc. */ #define STARPU_SCHED_SIMPLE_DECIDE_ARCHS (3 << 0) /** Request to create the scheduling decision-making component even if there is only one available choice. This is useful for instance when the decision-making component will store tasks itself (and not use STARPU_SCHED_SIMPLE_FIFO_ABOVE) to decide in which order tasks should be passed below. */ #define STARPU_SCHED_SIMPLE_DECIDE_ALWAYS (1 << 3) /** Request to add a perfmodel selector above the scheduling decision-making component. That way, only tasks with a calibrated performance model will be given to the component, other tasks will go to an eager branch that will distributed tasks so that their performance models will get calibrated. In other words, this is needed when using a component which needs performance models for tasks. */ #define STARPU_SCHED_SIMPLE_PERFMODEL (1 << 4) /** Request that a component be added just above workers, that chooses the best task implementation. */ #define STARPU_SCHED_SIMPLE_IMPL (1 << 5) /** Request to create a fifo above the scheduling decision-making component, otherwise tasks will be pushed directly to the component. This is useful to store tasks if there is a fifo below which limits the number of tasks to be scheduld in advance. The scheduling decision-making component can also store tasks itself, in which case this flag is not useful. */ #define STARPU_SCHED_SIMPLE_FIFO_ABOVE (1 << 6) /** Request that the fifo above be sorted by priorities */ #define STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO (1 << 7) /** Request to create fifos below the scheduling decision-making component, otherwise tasks will be pulled directly from workers. This is useful to be able to schedule a (tunable) small number of tasks in advance only. */ #define STARPU_SCHED_SIMPLE_FIFOS_BELOW (1 << 8) /** Request that the fifos below be sorted by priorities */ #define STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO (1 << 9) /** Request that the fifos below be pulled rather ready tasks */ #define STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY (1 << 10) /** Request that the fifos below have no size limit */ #define STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT (1 << 16) /** Request that work between workers using the same fifo below be distributed using a work stealing component. */ #define STARPU_SCHED_SIMPLE_WS_BELOW (1 << 11) /** Request to not only choose between simple workers, but also choose between combined workers. */ #define STARPU_SCHED_SIMPLE_COMBINED_WORKERS (1 << 12) /** Request that the fifos below keep track of expected duration, start and end time of theirs elements */ #define STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP (1 << 13) /** Request to prepend a component before the decision component. This should be used alone and followed by the component creation function pointer and its data. */ #define STARPU_SCHED_SIMPLE_PRE_DECISION (1 << 14) /** Create a simple modular scheduler tree around a scheduling decision-making component \p component. The details of what should be built around \p component is described by \p flags. The different STARPU_SCHED_SIMPL_DECIDE_* flags are mutually exclusive. \p data is passed to the \p create_decision_component function when creating the decision component. See \ref ImplementAModularizedScheduler for more details. */ void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id); /** Create a simple modular scheduler tree around several scheduling decision-making components. The parameters are similar to starpu_sched_component_initialize_simple_scheduler, but per scheduling decision, for instance: starpu_sched_component_initialize_simple_schedulers(sched_ctx_id, 2, create1, data1, flags1, create2, data2, flags2); The different flags parameters must be coherent: same decision flags. They must not include the perfmodel flag (not supported yet). */ void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id, unsigned ndecisions, ...); /** @} */ #define STARPU_COMPONENT_MUTEX_LOCK(m) \ do \ { \ const int _relaxed_state = starpu_worker_get_relax_state(); \ if (!_relaxed_state) \ starpu_worker_relax_on(); \ STARPU_PTHREAD_MUTEX_LOCK((m)); \ if (!_relaxed_state) \ starpu_worker_relax_off(); \ } \ while (0) #define STARPU_COMPONENT_MUTEX_TRYLOCK(m) STARPU_PTHREAD_MUTEX_TRYLOCK((m)) #define STARPU_COMPONENT_MUTEX_UNLOCK(m) STARPU_PTHREAD_MUTEX_UNLOCK((m)) /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_SCHED_COMPONENT_H__ */ starpu-1.4.9+dfsg/include/starpu_sched_ctx.h000066400000000000000000000367771507764646700211720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2017-2017 Arthur Chevalier * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_SCHED_CTX_H__ #define __STARPU_SCHED_CTX_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Scheduling_Contexts Scheduling Contexts @brief StarPU permits on one hand grouping workers in combined workers in order to execute a parallel task and on the other hand grouping tasks in bundles that will be executed by a single specified worker. In contrast when we group workers in scheduling contexts we submit starpu tasks to them and we schedule them with the policy assigned to the context. Scheduling contexts can be created, deleted and modified dynamically. @{ */ /** @name Scheduling Contexts Basic API @{ */ /** Used when calling starpu_sched_ctx_create() to specify a name for a scheduling policy */ #define STARPU_SCHED_CTX_POLICY_NAME (1 << 16) /** Used when calling starpu_sched_ctx_create() to specify a pointer to a scheduling policy */ #define STARPU_SCHED_CTX_POLICY_STRUCT (2 << 16) /** Used when calling starpu_sched_ctx_create() to specify a minimum scheduler priority value. */ #define STARPU_SCHED_CTX_POLICY_MIN_PRIO (3 << 16) /** Used when calling starpu_sched_ctx_create() to specify a maximum scheduler priority value. */ #define STARPU_SCHED_CTX_POLICY_MAX_PRIO (4 << 16) #define STARPU_SCHED_CTX_HIERARCHY_LEVEL (5 << 16) #define STARPU_SCHED_CTX_NESTED (6 << 16) /** Used when calling starpu_sched_ctx_create() to specify ??? */ #define STARPU_SCHED_CTX_AWAKE_WORKERS (7 << 16) /** Used when calling starpu_sched_ctx_create() to specify a function pointer allowing to initialize the scheduling policy. */ #define STARPU_SCHED_CTX_POLICY_INIT (8 << 16) /** Used when calling starpu_sched_ctx_create() to specify a pointer to some user data related to the context being created. */ #define STARPU_SCHED_CTX_USER_DATA (9 << 16) /** Used when calling starpu_sched_ctx_create() in order to create a context on the NVIDIA GPU to specify the number of SMs the context should have */ #define STARPU_SCHED_CTX_CUDA_NSMS (10 << 16) /** Used when calling starpu_sched_ctx_create() to specify a list of sub contexts of the current context. */ #define STARPU_SCHED_CTX_SUB_CTXS (11 << 16) /** Create a scheduling context with the given parameters (see below) and assign the workers in \p workerids_ctx to execute the tasks submitted to it. The return value represents the identifier of the context that has just been created. It will be further used to indicate the context the tasks will be submitted to. The return value should be at most ::STARPU_NMAX_SCHED_CTXS. The arguments following the name of the scheduling context can be of the following types:
    • ::STARPU_SCHED_CTX_POLICY_NAME, followed by the name of a predefined scheduling policy. Use an empty string to create the context with the default scheduling policy.
    • ::STARPU_SCHED_CTX_POLICY_STRUCT, followed by a pointer to a custom scheduling policy (struct starpu_sched_policy *)
    • ::STARPU_SCHED_CTX_POLICY_MIN_PRIO, followed by a integer representing the minimum priority value to be defined for the scheduling policy.
    • ::STARPU_SCHED_CTX_POLICY_MAX_PRIO, followed by a integer representing the maximum priority value to be defined for the scheduling policy.
    • ::STARPU_SCHED_CTX_POLICY_INIT, followed by a function pointer (ie. void init_sched(void)) allowing to initialize the scheduling policy.
    • ::STARPU_SCHED_CTX_USER_DATA, followed by a pointer to a custom user data structure, to be retrieved by \ref starpu_sched_ctx_get_user_data().
    See \ref CreatingAContext for more details. */ unsigned starpu_sched_ctx_create(int *workerids_ctx, int nworkers_ctx, const char *sched_ctx_name, ...); /** Create a context indicating an approximate interval of resources */ unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap); /** Execute the callback whenever the last task of the context finished executing, it is called with the parameters \p sched_ctx and any other parameter needed by the application (packed in \p args) */ void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void *args), void *args); /** Add dynamically the workers in \p workerids_ctx to the context \p sched_ctx_id. The last argument cannot be greater than ::STARPU_NMAX_SCHED_CTXS. See \ref ModifyingAContext for more details. */ void starpu_sched_ctx_add_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id); /** Remove the workers in \p workerids_ctx from the context \p sched_ctx_id. The last argument cannot be greater than ::STARPU_NMAX_SCHED_CTXS. See \ref ModifyingAContext for more details. */ void starpu_sched_ctx_remove_workers(int *workerids_ctx, unsigned nworkers_ctx, unsigned sched_ctx_id); /** Print on the file \p f the worker names belonging to the context \p sched_ctx_id */ void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f); /** Delete scheduling context \p sched_ctx_id and transfer remaining workers to the inheritor scheduling context. See \ref DeletingAContext for more details. */ void starpu_sched_ctx_delete(unsigned sched_ctx_id); /** Indicate that the context \p inheritor will inherit the resources of the context \p sched_ctx_id when \p sched_ctx_id will be deleted. See \ref DeletingAContext for more details. */ void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor); unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id); unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id); /** Set the scheduling context the subsequent tasks will be submitted to. See \ref SubmittingTasksToAContext and \ref TmpCTXS for more details. */ void starpu_sched_ctx_set_context(unsigned *sched_ctx_id); /** Return the scheduling context the tasks are currently submitted to, or ::STARPU_NMAX_SCHED_CTXS if no default context has been defined by calling the function starpu_sched_ctx_set_context(). */ unsigned starpu_sched_ctx_get_context(void); /** Stop submitting tasks from the empty context list until the next time the context has time to check the empty context list. See \ref EmptyingAContext for more details. */ void starpu_sched_ctx_stop_task_submission(void); /** Indicate starpu that the application finished submitting to this context in order to move the workers to the inheritor as soon as possible. See \ref DeletingAContext for more details. */ void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id); /** Return the list of workers in the array \p workerids, the return value is the number of workers. The user should free the \p workerids table after finishing using it (it is allocated inside the function with the proper size) */ unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids); /** Return the list of workers in the array \p workerids, the return value is the number of workers. This list is provided in raw order, i.e. not sorted by tree or list order, and the user should not free the \p workerids table. This function is thus much less costly than starpu_sched_ctx_get_workers_list(). */ unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids); /** Return the number of workers managed by the specified context (Usually needed to verify if it manages any workers or if it should be blocked) */ unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id); /** Return the number of workers shared by two contexts. */ unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2); /** Return 1 if the worker belongs to the context and 0 otherwise */ unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id); unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id); /** Return the workerid if the worker belongs to the context and -1 otherwise. If the thread calling this function is not a worker the function returns -1 as it calls the function starpu_worker_get_id(). */ unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id); unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task); unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid); /** Check if a worker is shared between several contexts */ unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid); /** Return the user data pointer associated to the scheduling context. */ void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id); void starpu_sched_ctx_set_user_data(unsigned sched_ctx_id, void *user_data); /** Allocate the scheduling policy data (private information of the scheduler like queues, variables, additional condition variables) the context. See \ref DefiningANewBasicSchedulingPolicy for more details. */ void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data); /** Return the scheduling policy data (private information of the scheduler) of the contexts previously assigned to. See \ref DefiningANewBasicSchedulingPolicy for more details. */ void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id); struct starpu_sched_policy *starpu_sched_ctx_get_sched_policy(unsigned sched_ctx_id); /** Execute any parallel code on the workers of the sched_ctx (workers are blocked) */ void *starpu_sched_ctx_exec_parallel_code(void *(*func)(void *), void *param, unsigned sched_ctx_id); int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id); double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id); void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid); void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid); void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid); void starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id); void starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id); void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id); void starpu_sched_ctx_set_priority(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority); unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id); void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids); void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid); int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers); void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master); /** Return the first context (child of sched_ctx_id) where the workerid is master */ unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id); /** Return the context id of masterid if it master of a context. If not, return ::STARPU_NMAX_SCHED_CTXS. */ unsigned starpu_sched_ctx_master_get_context(int masterid); void starpu_sched_ctx_revert_task_counters_ctx_locked(unsigned sched_ctx_id, double flops); void starpu_sched_ctx_move_task_to_ctx_locked(struct starpu_task *task, unsigned sched_ctx, unsigned with_repush); int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id); /** Return the function associated with the scheduler context \p sched_ctx_id which was given through the field starpu_conf::sched_policy_callback */ void (*starpu_sched_ctx_get_sched_policy_callback(unsigned sched_ctx_id))(unsigned); unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers); int starpu_sched_ctx_get_stream_worker(unsigned sub_ctx); int starpu_sched_ctx_get_nsms(unsigned sched_ctx); void starpu_sched_ctx_get_sms_interval(int stream_workerid, int *start, int *end); /** @} */ /** @name Scheduling Context Priorities @{ */ /** Return the current minimum priority level supported by the scheduling policy of the given scheduler context. */ int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id); /** Return the current maximum priority level supported by the scheduling policy of the given scheduler context. */ int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id); /** Define the minimum task priority level supported by the scheduling policy of the given scheduler context. The default minimum priority level is the same as the default priority level which is 0 by convention. The application may access that value by calling the function starpu_sched_ctx_get_min_priority(). This function should only be called from the initialization method of the scheduling policy, and should not be used directly from the application. */ int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio); /** Define the maximum priority level supported by the scheduling policy of the given scheduler context. The default maximum priority level is 1. The application may access that value by calling the starpu_sched_ctx_get_max_priority() function. This function should only be called from the initialization method of the scheduling policy, and should not be used directly from the application. */ int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio); int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id); int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id); /** Provided for legacy reasons. */ #define STARPU_MIN_PRIO (starpu_sched_get_min_priority()) /** Provided for legacy reasons. */ #define STARPU_MAX_PRIO (starpu_sched_get_max_priority()) /** By convention, the default priority level should be 0 so that we can statically allocate tasks with a default priority. */ #define STARPU_DEFAULT_PRIO 0 /** @} */ /** @name Scheduling Context Worker Collection @{ */ /** Create a worker collection of the type indicated by the last parameter for the context specified through the first parameter. */ struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC; /** Delete the worker collection of the specified scheduling context */ void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id); /** Return the worker collection managed by the indicated context */ struct starpu_worker_collection *starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id); /** @} */ /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_SCHED_CTX_H__ */ starpu-1.4.9+dfsg/include/starpu_sched_ctx_hypervisor.h000066400000000000000000000057441507764646700234520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_SCHED_CTX_HYPERVISOR_H__ #define __STARPU_SCHED_CTX_HYPERVISOR_H__ #ifdef __cplusplus extern "C" { #endif /** @ingroup API_Scheduling_Contexts @{ */ /** @name Scheduling Context Link with Hypervisor @{ */ /** Performance counters used by the starpu to indicate the hypervisor how the application and the resources are executing. */ struct starpu_sched_ctx_performance_counters { /** Inform the hypervisor for how long a worker has been idle in the specified context */ void (*notify_idle_cycle)(unsigned sched_ctx_id, int worker, double idle_time); /** Inform the hypervisor that a task executing a specified number of instructions has been popped from the worker */ void (*notify_poped_task)(unsigned sched_ctx_id, int worker); /** Notify the hypervisor that a task has been scheduled on the queue of the worker corresponding to the specified context */ void (*notify_pushed_task)(unsigned sched_ctx_id, int worker); /** Notify the hypervisor that a task has just been executed */ void (*notify_post_exec_task)(struct starpu_task *task, size_t data_size, uint32_t footprint, int hypervisor_tag, double flops); /** Notify the hypervisor that a task has just been submitted */ void (*notify_submitted_job)(struct starpu_task *task, uint32_t footprint, size_t data_size); void (*notify_empty_ctx)(unsigned sched_ctx_id, struct starpu_task *task); /** Notify the hypervisor that the context was deleted */ void (*notify_delete_context)(unsigned sched_ctx); }; /** Indicate to starpu the pointer to the performance counter */ void starpu_sched_ctx_set_perf_counters(unsigned sched_ctx_id, void *perf_counters); /** Callback that lets the scheduling policy tell the hypervisor that a task was pushed on a worker */ void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id); /** Allow the hypervisor to let starpu know it's initialised */ void starpu_sched_ctx_notify_hypervisor_exists(void); /** Ask starpu if it is informed if the hypervisor is initialised */ unsigned starpu_sched_ctx_check_if_hypervisor_exists(void); void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample); /** @} */ /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_SCHED_CTX_HYPERVISOR_H__ */ starpu-1.4.9+dfsg/include/starpu_scheduler.h000066400000000000000000000504531507764646700211670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_SCHEDULER_H__ #define __STARPU_SCHEDULER_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Scheduling_Policy Scheduling Policy @brief TODO. While StarPU comes with a variety of scheduling policies (see \ref TaskSchedulingPolicy), it may sometimes be desirable to implement custom policies to address specific problems. The API described below allows users to write their own scheduling policy. @{ */ struct starpu_task; /** Contain all the methods that implement a scheduling policy. An application may specify which scheduling strategy in the field starpu_conf::sched_policy passed to the function starpu_init(). For each task going through the scheduler, the following methods get called in the given order:
    • starpu_sched_policy::submit_hook when the task is submitted
    • starpu_sched_policy::push_task when the task becomes ready. The scheduler is here given the task
    • starpu_sched_policy::pop_task when the worker is idle. The scheduler here gives back the task to the core. It must not access this task any more
    • starpu_sched_policy::pre_exec_hook right before the worker actually starts the task computation (after transferring any missing data).
    • starpu_sched_policy::post_exec_hook right after the worker actually completes the task computation.
    For each task not going through the scheduler (because starpu_task::execute_on_a_specific_worker was set), these get called:
    • starpu_sched_policy::submit_hook when the task is submitted
    • starpu_sched_policy::push_task_notify when the task becomes ready. This is just a notification, the scheduler does not have to do anything about the task.
    • starpu_sched_policy::pre_exec_hook right before the worker actually starts the task computation (after transferring any missing data).
    • starpu_sched_policy::post_exec_hook right after the worker actually completes the task computation.
    */ struct starpu_sched_policy { /** Initialize the scheduling policy, called before any other method. */ void (*init_sched)(unsigned sched_ctx_id); /** Cleanup the scheduling policy */ void (*deinit_sched)(unsigned sched_ctx_id); /** Insert a task into the scheduler, called when the task becomes ready for execution. This must call starpu_push_task_end() once it has effectively pushed the task to a queue (to note the time when this was done in the task), but before releasing mutexes (so that the task hasn't been already taken by a worker). */ int (*push_task)(struct starpu_task *); double (*simulate_push_task)(struct starpu_task *); /** Notify the scheduler that a task was pushed on a given worker. This method is called when a task that was explicitly assigned to a worker becomes ready and is about to be executed by the worker. This method therefore permits to keep the state of the scheduler coherent even when StarPU bypasses the scheduling strategy. Note: to get an estimation of the task duration, \p perf_workerid needs to be used rather than \p workerid, for the case of parallel tasks. */ void (*push_task_notify)(struct starpu_task *, int workerid, int perf_workerid, unsigned sched_ctx_id); /** Get a task from the scheduler. If this method returns NULL, the worker will start sleeping. If later on some task are pushed for this worker, starpu_wake_worker() must be called to wake the worker so it can call the pop_task() method again. The mutex associated to the worker is already taken when this method is called. This method may release it (e.g. for scalability reasons when doing work stealing), but it must acquire it again before taking the decision whether to return a task or NULL, so the atomicity of deciding to return NULL and making the worker actually sleep is preserved. Otherwise in simgrid or blocking driver mode the worker might start sleeping while a task has just been pushed for it. If this method is defined as NULL, the worker will only execute tasks from its local queue. In this case, the push_task method should use the starpu_push_local_task method to assign tasks to the different workers. */ struct starpu_task *(*pop_task)(unsigned sched_ctx_id); /** Optional field. This method is called when a task is submitted. */ void (*submit_hook)(struct starpu_task *task); /** Optional field. This method is called every time a task is starting. */ void (*pre_exec_hook)(struct starpu_task *, unsigned sched_ctx_id); /** Optional field. This method is called every time a task has been executed. */ void (*post_exec_hook)(struct starpu_task *, unsigned sched_ctx_id); /** Optional field. This method is called when it is a good time to start scheduling tasks. This is notably called when the application calls starpu_task_wait_for_all() or starpu_do_schedule() explicitly. */ void (*do_schedule)(unsigned sched_ctx_id); /** Initialize scheduling structures corresponding to each worker used by the policy. */ void (*add_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers); /** Deinitialize scheduling structures corresponding to each worker used by the policy. */ void (*remove_workers)(unsigned sched_ctx_id, int *workerids, unsigned nworkers); /** Whether this scheduling policy does data prefetching, and thus the core should not try to do it opportunistically. */ int prefetches; /** Optional field. Name of the policy. */ const char *policy_name; /** Optional field. Human readable description of the policy. */ const char *policy_description; enum starpu_worker_collection_type worker_type; }; /** Return an NULL-terminated array of all the predefined scheduling policies. See \ref TaskSchedulingPolicy for more details. */ struct starpu_sched_policy **starpu_sched_get_predefined_policies(void); /** Allow an external library to return a scheduling policy to be loaded dynamically. See \ref UsingaNewSchedulingPolicy for more details. */ struct starpu_sched_policy *starpu_get_sched_lib_policy(const char *name); /** Allow an external library to return a list of scheduling policies to be loaded dynamically. See \ref UsingaNewSchedulingPolicy for more details. */ struct starpu_sched_policy **starpu_get_sched_lib_policies(void); /** Return the scheduler policy of the default context. See \ref TaskSchedulingPolicy for more details. */ struct starpu_sched_policy *starpu_sched_get_sched_policy_in_ctx(unsigned sched_ctx_id); /** Return the scheduler policy of the given context. See \ref TaskSchedulingPolicy for more details. */ struct starpu_sched_policy *starpu_sched_get_sched_policy(void); /** When there is no available task for a worker, StarPU blocks this worker on a condition variable. This function specifies which condition variable (and the associated mutex) should be used to block (and to wake up) a worker. Note that multiple workers may use the same condition variable. For instance, in the case of a scheduling strategy with a single task queue, the same condition variable would be used to block and wake up all workers. */ void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond); /** Return the job identifier associated with the task. See \ref TraceSchedTaskDetails for more details. */ unsigned long starpu_task_get_job_id(struct starpu_task *task); /** TODO: check if this is correct Return the current minimum priority level supported by the scheduling policy. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_sched_get_min_priority(void); /** TODO: check if this is correct Return the current maximum priority level supported by the scheduling policy. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_sched_get_max_priority(void); /** TODO: check if this is correct Define the minimum task priority level supported by the scheduling policy. The default minimum priority level is the same as the default priority level which is 0 by convention. The application may access that value by calling the function starpu_sched_get_min_priority(). This function should only be called from the initialization method of the scheduling policy, and should not be used directly from the application. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_sched_set_min_priority(int min_prio); /** TODO: check if this is correct Define the maximum priority level supported by the scheduling policy. The default maximum priority level is 1. The application may access that value by calling the function starpu_sched_get_max_priority(). This function should only be called from the initialization method of the scheduling policy, and should not be used directly from the application. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_sched_set_max_priority(int max_prio); /** Check if the worker specified by workerid can execute the codelet. Schedulers need to call it before assigning a task to a worker, otherwise the task may fail to execute. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl); /** Check if the worker specified by workerid can execute the codelet and return which implementation numbers can be used. Schedulers need to call it before assigning a task to a worker, otherwise the task may fail to execute. This should be preferred rather than calling starpu_worker_can_execute_task() for each and every implementation. It can also be used with impl_mask == NULL to check for at least one implementation without determining which. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *task, unsigned *impl_mask); /** Check if the worker specified by workerid can execute the codelet and return the first implementation which can be used. Schedulers need to call it before assigning a task to a worker, otherwise the task may fail to execute. This should be preferred rather than calling starpu_worker_can_execute_task() for each and every implementation. It can also be used with impl_mask == NULL to check for at least one implementation without determining which. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_task *task, unsigned *nimpl); /** The scheduling policy may put tasks directly into a worker’s local queue so that it is not always necessary to create its own queue when the local queue is sufficient. \p back is ignored: the task priority is used to order tasks in this queue. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_push_local_task(int workerid, struct starpu_task *task, int back); /** Must be called by a scheduler to notify that the given task has just been pushed. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_push_task_end(struct starpu_task *task); /** Whether \ref STARPU_PREFETCH was set. See \ref SchedulingHelpers for more details. */ int starpu_get_prefetch_flag(void); /** Prefetch data for a given p task on a given p node with a given priority. See \ref SchedulingHelpers for more details. */ int starpu_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned node, int prio); /** Prefetch data for a given p task on a given p node. See \ref SchedulingHelpers for more details. */ int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node); /** Prefetch data for a given p task on a given p node when the bus is idle with a given priority. See \ref SchedulingHelpers for more details. */ int starpu_idle_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned node, int prio); /** Prefetch data for a given p task on a given p node when the bus is idle. See \ref SchedulingHelpers for more details. */ int starpu_idle_prefetch_task_input_on_node(struct starpu_task *task, unsigned node); /** Prefetch data for a given p task on a given p worker with a given priority. See \ref SchedulingHelpers for more details. */ int starpu_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio); /** Prefetch data for a given p task on a given p worker. See \ref SchedulingHelpers for more details. */ int starpu_prefetch_task_input_for(struct starpu_task *task, unsigned worker); /** Prefetch data for a given p task on a given p worker when the bus is idle with a given priority. See \ref SchedulingHelpers for more details. */ int starpu_idle_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio); /** Prefetch data for a given p task on a given p worker when the bus is idle. See \ref SchedulingHelpers for more details. */ int starpu_idle_prefetch_task_input_for(struct starpu_task *task, unsigned worker); /** Return the footprint for a given task, taking into account user-provided perfmodel footprint or size_base functions. See \ref PerformanceModelExample for more details. */ uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); /** Return the raw footprint for the data of a given task (without taking into account user-provided functions). See \ref PerformanceModelExample for more details. */ uint32_t starpu_task_data_footprint(struct starpu_task *task); /** Return expected task duration in micro-seconds on a given architecture \p arch using given implementation \p nimpl. See \ref SchedulingHelpers for more details. */ double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); /** Same as starpu_task_expected_length() but for a precise worker. See \ref SchedulingHelpers for more details. */ double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl); /** Return expected task duration in micro-seconds, averaged over the different workers driven by the scheduler \p sched_ctx_id Note: this is not just the average of the durations using the number of processing units as coefficients, but their efficiency at processing the task, thus the harmonic average of the durations. See \ref SchedulingHelpers for more details. */ double starpu_task_expected_length_average(struct starpu_task *task, unsigned sched_ctx_id); /** Return an estimated speedup factor relative to CPU speed. See \ref SchedulingHelpers for more details. */ double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch *perf_arch); /** Return expected data transfer time in micro-seconds for the given \p memory_node. Prefer using starpu_task_expected_data_transfer_time_for() which is more precise. See \ref SchedulingHelpers for more details. */ double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task); /** Return expected data transfer time in micro-seconds for the given \p worker. See \ref SchedulingHelpers for more details. */ double starpu_task_expected_data_transfer_time_for(struct starpu_task *task, unsigned worker); /** Predict the transfer time (in micro-seconds) to move \p handle to a memory node. See \ref SchedulingHelpers for more details. */ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_data_access_mode mode); /** Return expected energy use in J. See \ref SchedulingHelpers for more details. */ double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); /** Same as starpu_task_expected_energy but for a precise worker. See \ref SchedulingHelpers for more details. */ double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl); /** Return expected task energy use in J, averaged over the different workers driven by the scheduler \p sched_ctx_id Note: this is not just the average of the energy uses using the number of processing units as coefficients, but their efficiency at processing the task, thus the harmonic average of the energy uses. See \ref SchedulingHelpers for more details. */ double starpu_task_expected_energy_average(struct starpu_task *task, unsigned sched_ctx_id); /** Return expected conversion time in ms (multiformat interface only). See \ref SchedulingHelpers for more details. */ double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl); typedef void (*starpu_notify_ready_soon_func)(void *data, struct starpu_task *task, double delay); /** Register a callback to be called when it is determined when a task will be ready an estimated amount of time from now, because its last dependency has just started and we know how long it will take. See \ref SchedulingHelpers for more details. */ void starpu_task_notify_ready_soon_register(starpu_notify_ready_soon_func f, void *data); /** The scheduling policies indicates if the worker may pop tasks from the list of other workers or if there is a central list with task for all the workers. See \ref DefiningANewBasicSchedulingPolicy for more details. */ void starpu_sched_ctx_worker_shares_tasks_lists(int workerid, int sched_ctx_id); /** The scheduling policy should call this when it makes a scheduling decision for a task. This will possibly stop execution at this point, and then the programmer can inspect local variables etc. to determine why this scheduling decision was done. See \ref STARPU_TASK_BREAK_ON_SCHED See \ref DefiningANewBasicSchedulingPolicy for more details. */ void starpu_sched_task_break(struct starpu_task *task); /** @name Worker operations @{ */ /** Wake up \p workerid while temporarily entering the current worker relax state if needed during the waiting process. Return 1 if \p workerid has been woken up or its state_keep_awake flag has been set to \c 1, and \c 0 otherwise (if \p workerid was not in the STATE_SLEEPING or in the STATE_SCHEDULING). See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_wake_worker_relax(int workerid); /** Must be called to wake up a worker that is sleeping on the cond. Return 0 whenever the worker is not in a sleeping state or has the state_keep_awake flag on. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_wake_worker_no_relax(int workerid); /** Version of starpu_wake_worker_no_relax() which assumes that the sched mutex is locked See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_wake_worker_locked(int workerid); /** Light version of starpu_wake_worker_relax() which, when possible, speculatively set keep_awake on the target worker without waiting for the worker to enter the relax state. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_wake_worker_relax_light(int workerid); /** @} */ /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_SCHEDULER_H__ */ starpu-1.4.9+dfsg/include/starpu_simgrid_wrap.h000066400000000000000000000020031507764646700216640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_SIMGRID_WRAP_H__ #define __STARPU_SIMGRID_WRAP_H__ #include #ifdef STARPU_SIMGRID #ifndef main #define main starpu_main #ifdef __cplusplus extern "C" int starpu_main(int argc, char *argv[]); extern "C" int starpu_main(int argc, char **argv); #endif #endif #endif #endif /* __STARPU_SIMGRID_WRAP_H__ */ starpu-1.4.9+dfsg/include/starpu_sink.h000066400000000000000000000016641507764646700201550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_SINK_H__ #define __STARPU_SINK_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Sink Sink @{ */ void starpu_sink_common_worker(int argc, char **argv); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_SINK_H__ */ starpu-1.4.9+dfsg/include/starpu_stdlib.h000066400000000000000000000300221507764646700204600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022-2022 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_STDLIB_H__ #define __STARPU_STDLIB_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Standard_Memory_Library Standard Memory Library @{ */ /** Value passed to the function starpu_malloc_flags() to indicate the memory allocation should be pinned. */ #define STARPU_MALLOC_PINNED ((1ULL) << 1) /** Value passed to the function starpu_malloc_flags() to indicate the memory allocation should be in the limit defined by the environment variables \ref STARPU_LIMIT_CUDA_devid_MEM, \ref STARPU_LIMIT_CUDA_MEM, \ref STARPU_LIMIT_OPENCL_devid_MEM, \ref STARPU_LIMIT_OPENCL_MEM, \ref STARPU_LIMIT_HIP_MEM, \ref STARPU_LIMIT_HIP_devid_MEM and \ref STARPU_LIMIT_CPU_MEM (see Section \ref HowToLimitMemoryPerNode). If no memory is available, it tries to reclaim memory from StarPU. Memory allocated this way needs to be freed by calling the function starpu_free_flags() with the same flag. */ #define STARPU_MALLOC_COUNT ((1ULL) << 2) /** Value passed to the function starpu_malloc_flags() along ::STARPU_MALLOC_COUNT to indicate that while the memory allocation should be kept in the limits defined for ::STARPU_MALLOC_COUNT, no reclaiming should be performed by starpu_malloc_flags() itself, thus potentially overflowing the memory node a bit. StarPU will reclaim memory after next task termination, according to the \ref STARPU_MINIMUM_AVAILABLE_MEM, \ref STARPU_TARGET_AVAILABLE_MEM, \ref STARPU_MINIMUM_CLEAN_BUFFERS, and \ref STARPU_TARGET_CLEAN_BUFFERS environment variables. If ::STARPU_MEMORY_WAIT is set, no overflowing will happen, starpu_malloc_flags() will wait for other eviction mechanisms to release enough memory. */ #define STARPU_MALLOC_NORECLAIM ((1ULL) << 3) /** Value passed to starpu_memory_allocate() to specify that the function should wait for the requested amount of memory to become available, and atomically allocate it. */ #define STARPU_MEMORY_WAIT ((1ULL) << 4) /** Value passed to starpu_memory_allocate() to specify that the function should allocate the amount of memory, even if that means overflowing the total size of the memory node. */ #define STARPU_MEMORY_OVERFLOW ((1ULL) << 5) /** Value passed to the function starpu_malloc_flags() to indicate that when StarPU is using simgrid, the allocation can be "folded", i.e. a memory area is allocated, but its content is actually a replicate of the same memory area, to avoid having to actually allocate that much memory . This thus allows to have a memory area that does not actually consumes memory, to which one can read from and write to normally, but get bogus values. */ #define STARPU_MALLOC_SIMULATION_FOLDED ((1ULL) << 6) /** Value passed to the function starpu_malloc_flags() to indicate that when StarPU is using simgrid, the allocation for that size could be unique. Different from only STARPU_MALLOC_SIMULATION_FOLDED, the same address will be given for all mallocs of that particular size. */ #define STARPU_MALLOC_SIMULATION_UNIQUE ((1ULL)<<7) /** @deprecated Equivalent to starpu_malloc(). This macro is provided to avoid breaking old codes. */ #define starpu_data_malloc_pinned_if_possible starpu_malloc /** @deprecated Equivalent to starpu_free(). This macro is provided to avoid breaking old codes. */ #define starpu_data_free_pinned_if_possible starpu_free /** Set an alignment constraints for starpu_malloc() allocations. \p align must be a power of two. This is for instance called automatically by the OpenCL driver to specify its own alignment constraints. See \ref DataManagementAllocation for more details. */ void starpu_malloc_set_align(size_t align); /** Allocate data of the given size \p dim in main memory, and return the pointer to the allocated data through \p A. It will also try to pin it in CUDA or OpenCL, so that data transfers from this buffer can be asynchronous, and thus permit data transfer and computation overlapping. The allocated buffer must be freed thanks to the starpu_free_noflag() function. See \ref DataManagementAllocation for more details. */ int starpu_malloc(void **A, size_t dim); /** @deprecated Free memory which has previously been allocated with starpu_malloc(). This function is deprecated, one should use starpu_free_noflag(). The function does nothing if the pointer is \c NULL. See \ref DataManagementAllocation for more details. */ int starpu_free(void *A) STARPU_DEPRECATED; /** Perform a memory allocation based on the constraints defined by the given flag. See \ref HowToLimitMemoryPerNode for more details. */ int starpu_malloc_flags(void **A, size_t dim, int flags); /** Free memory by specifying its size. The given flags should be consistent with the ones given to starpu_malloc_flags() when allocating the memory. The function does nothing if the pointer is \c NULL. See \ref HowToLimitMemoryPerNode for more details. */ int starpu_free_flags(void *A, size_t dim, int flags); /** Free memory by specifying its size. Should be used for memory allocated with starpu_malloc(). The function does nothing if the pointer is \c NULL. See \ref DataManagementAllocation for more details. */ int starpu_free_noflag(void *A, size_t dim); typedef int (*starpu_malloc_hook)(unsigned dst_node, void **A, size_t dim, int flags); typedef int (*starpu_free_hook)(unsigned dst_node, void *A, size_t dim, int flags); /** Set allocation functions to be used by StarPU. By default, StarPU will use \c malloc() (or \c cudaHostAlloc() if CUDA GPUs are used) for all its data handle allocations. The application can specify another allocation primitive by calling this. The malloc_hook should pass the allocated pointer through the \c A parameter, and return 0 on success. On allocation failure, it should return -ENOMEM. The \c flags parameter contains ::STARPU_MALLOC_PINNED if the memory should be pinned by the hook for GPU transfer efficiency. The hook can use starpu_memory_pin() to achieve this. The \c dst_node parameter is the starpu memory node, one can convert it to an hwloc logical id with starpu_memory_nodes_numa_id_to_hwloclogid() or to an OS NUMA number with starpu_memory_nodes_numa_devid_to_id(). See \ref DataManagementAllocation for more details. */ void starpu_malloc_set_hooks(starpu_malloc_hook malloc_hook, starpu_free_hook free_hook); /** Pin the given memory area, so that CPU-GPU transfers can be done asynchronously with DMAs. The memory must be unpinned with starpu_memory_unpin() before being freed. Return 0 on success, -1 on error. See \ref DataManagementAllocation for more details. */ int starpu_memory_pin(void *addr, size_t size); /** Unpin the given memory area previously pinned with starpu_memory_pin(). Return 0 on success, -1 on error. See \ref DataManagementAllocation for more details. */ int starpu_memory_unpin(void *addr, size_t size); /** If a memory limit is defined on the given node (see Section \ref HowToLimitMemoryPerNode), return the amount of total memory on the node. Otherwise return -1. See \ref HowToLimitMemoryPerNode for more details. */ starpu_ssize_t starpu_memory_get_total(unsigned node); /** If a memory limit is defined on the given node (see Section \ref HowToLimitMemoryPerNode), return the amount of available memory on the node. Otherwise return -1. See \ref HowToLimitMemoryPerNode for more details. */ starpu_ssize_t starpu_memory_get_available(unsigned node); /** Return the amount of used memory on the node. See \ref DataManagementAllocation for more details. */ size_t starpu_memory_get_used(unsigned node); /** Return the amount of total memory on all memory nodes for whose a memory limit is defined (see Section \ref DataManagementAllocation). */ starpu_ssize_t starpu_memory_get_total_all_nodes(void); /** Return the amount of available memory on all memory nodes for whose a memory limit is defined (see Section \ref DataManagementAllocation). */ starpu_ssize_t starpu_memory_get_available_all_nodes(void); /** Return the amount of used memory on all memory nodes. See \ref DataManagementAllocation for more details. */ size_t starpu_memory_get_used_all_nodes(void); /** If a memory limit is defined on the given node (see Section \ref HowToLimitMemoryPerNode), try to allocate some of it. This does not actually allocate memory, but only accounts for it. This can be useful when the application allocates data another way, but want StarPU to be aware of the allocation size e.g. for memory reclaiming. By default, return -ENOMEM if there is not enough room on the given node. \p flags can be either ::STARPU_MEMORY_WAIT or ::STARPU_MEMORY_OVERFLOW to change this. See \ref HowToLimitMemoryPerNode for more details. */ int starpu_memory_allocate(unsigned node, size_t size, int flags); /** If a memory limit is defined on the given node (see Section \ref HowToLimitMemoryPerNode), free some of it. This does not actually free memory, but only accounts for it, like starpu_memory_allocate(). The amount does not have to be exactly the same as what was passed to starpu_memory_allocate(), only the eventual amount needs to be the same, i.e. one call to starpu_memory_allocate() can be followed by several calls to starpu_memory_deallocate() to declare the deallocation piece by piece. See \ref HowToLimitMemoryPerNode for more details. */ void starpu_memory_deallocate(unsigned node, size_t size); /** If a memory limit is defined on the given node (see Section \ref HowToLimitMemoryPerNode), this will wait for \p size bytes to become available on \p node. Of course, since another thread may be allocating memory concurrently, this does not necessarily mean that this amount will be actually available, just that it was reached. To atomically wait for some amount of memory and reserve it, starpu_memory_allocate() should be used with the ::STARPU_MEMORY_WAIT flag. See \ref HowToLimitMemoryPerNode for more details. */ void starpu_memory_wait_available(unsigned node, size_t size); /** Sleep for the given \p nb_sec seconds. Similar to calling Unix' \c sleep function, except that it takes a float to allow sub-second sleeping, and when StarPU is compiled in SimGrid mode it does not really sleep but just makes SimGrid record that the thread has taken some time to sleep. See \ref Helpers for more details. */ void starpu_sleep(float nb_sec); /** Sleep for the given \p nb_micro_sec micro-seconds. In simgrid mode, this only sleeps within virtual time. See \ref Helpers for more details. */ void starpu_usleep(float nb_micro_sec); /** Account for \p joules J being used. This is support in simgrid mode, to record how much energy was used, and will show up in further call to starpu_energy_used(). See \ref Energy-basedScheduling fore more details. */ void starpu_energy_use(float joules); /** Return the amount of energy having been used in J. This account the amounts passed to starpu_energy_use(), but also the static energy use set by the \ref STARPU_IDLE_POWER environment variable. See \ref Energy-basedScheduling fore more details. */ double starpu_energy_used(void); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_STDLIB_H__ */ starpu-1.4.9+dfsg/include/starpu_task.h000066400000000000000000002242211507764646700201470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011 Télécom Sud Paris * Copyright (C) 2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_TASK_H__ #define __STARPU_TASK_H__ #include #include #ifdef STARPU_USE_CUDA #include #endif #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Codelet_And_Tasks Codelet And Tasks @brief API to manipulate codelets and tasks. @{ */ /** To be used when setting the field starpu_codelet::where to specify that the codelet has no computation part, and thus does not need to be scheduled, and data does not need to be actually loaded. This is thus essentially used for synchronization tasks. */ #define STARPU_NOWHERE ((1ULL) << 0) /** Convert from enum starpu_worker_archtype to worker type mask for use in "where" fields */ #define STARPU_WORKER_TO_MASK(worker_archtype) (1ULL << (worker_archtype + 1)) /** To be used when setting the field starpu_codelet::where (or starpu_task::where) to specify the codelet (or the task) may be executed on a CPU processing unit. */ #define STARPU_CPU STARPU_WORKER_TO_MASK(STARPU_CPU_WORKER) /** To be used when setting the field starpu_codelet::where (or starpu_task::where) to specify the codelet (or the task) may be executed on a CUDA processing unit. */ #define STARPU_CUDA STARPU_WORKER_TO_MASK(STARPU_CUDA_WORKER) /** To be used when setting the field starpu_codelet::where (or starpu_task::where) to specify the codelet (or the task) may be executed on a HIP processing unit. */ #define STARPU_HIP STARPU_WORKER_TO_MASK(STARPU_HIP_WORKER) /** To be used when setting the field starpu_codelet::where (or starpu_task::where) to specify the codelet (or the task) may be executed on a OpenCL processing unit. */ #define STARPU_OPENCL STARPU_WORKER_TO_MASK(STARPU_OPENCL_WORKER) /** To be used when setting the field starpu_codelet::where (or starpu_task::where) to specify the codelet (or the task) may be executed on a MAX FPGA. */ #define STARPU_MAX_FPGA STARPU_WORKER_TO_MASK(STARPU_MAX_FPGA_WORKER) /** To be used when setting the field starpu_codelet::where (or starpu_task::where) to specify the codelet (or the task) may be executed on a MPI Slave processing unit. */ #define STARPU_MPI_MS STARPU_WORKER_TO_MASK(STARPU_MPI_MS_WORKER) /** To be used when setting the field starpu_codelet::where (or starpu_task::where) to specify the codelet (or the task) may be executed on a TCP/IP Slave processing unit. */ #define STARPU_TCPIP_MS STARPU_WORKER_TO_MASK(STARPU_TCPIP_MS_WORKER) /** Value to be set in starpu_codelet::flags to execute the codelet functions even in simgrid mode. */ #define STARPU_CODELET_SIMGRID_EXECUTE (1 << 0) /** Value to be set in starpu_codelet::flags to execute the codelet functions even in simgrid mode, and later inject the measured timing inside the simulation. */ #define STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT (1 << 1) /** Value to be set in starpu_codelet::flags to make starpu_task_submit() not submit automatic asynchronous partitioning/unpartitioning. */ #define STARPU_CODELET_NOPLANS (1 << 2) /** Value to be set in starpu_codelet::cuda_flags to allow asynchronous CUDA kernel execution. This requires to use the proper CUDA stream, see \ref CUDA-specificOptimizations */ #define STARPU_CUDA_ASYNC (1 << 0) /** Value to be set in starpu_codelet::hip_flags to allow asynchronous HIP kernel execution. This requires to use the proper HIP stream */ #define STARPU_HIP_ASYNC (1 << 0) /** Value to be set in starpu_codelet::opencl_flags to allow asynchronous OpenCL kernel execution. This requires to use proper queueing, see \ref OpenCL-specificOptimizations */ #define STARPU_OPENCL_ASYNC (1 << 0) /** To be used as memory node number for the main CPU memory node. */ #define STARPU_MAIN_RAM 0 /** Describe the type of parallel task. See \ref ParallelTasks for details. */ enum starpu_codelet_type { STARPU_SEQ = 0, /**< (default) for classical sequential tasks. */ STARPU_SPMD, /**< for a parallel task whose threads are handled by StarPU, the code has to use starpu_combined_worker_get_size() and starpu_combined_worker_get_rank() to distribute the work. */ STARPU_FORKJOIN /**< for a parallel task whose threads are started by the codelet function, which has to use starpu_combined_worker_get_size() to determine how many threads should be started. */ }; /** todo */ enum starpu_task_status { STARPU_TASK_INIT, /**< The task has just been initialized. */ #define STARPU_TASK_INIT 0 #define STARPU_TASK_INVALID STARPU_TASK_INIT /**< old name for STARPU_TASK_INIT */ STARPU_TASK_BLOCKED, /**< The task has just been submitted, and its dependencies has not been checked yet. */ STARPU_TASK_READY, /**< The task is ready for execution. */ STARPU_TASK_RUNNING, /**< The task is running on some worker. */ STARPU_TASK_FINISHED, /**< The task is finished executing. */ STARPU_TASK_BLOCKED_ON_TAG, /**< The task is waiting for a tag. */ STARPU_TASK_BLOCKED_ON_TASK, /**< The task is waiting for a task. */ STARPU_TASK_BLOCKED_ON_DATA, /**< The task is waiting for some data. */ STARPU_TASK_STOPPED /**< The task is stopped. */ }; /** CPU implementation of a codelet. */ typedef void (*starpu_cpu_func_t)(void **, void *); /** CUDA implementation of a codelet. */ typedef void (*starpu_cuda_func_t)(void **, void *); /** HIP implementation of a codelet. */ typedef void (*starpu_hip_func_t)(void **, void *); /** OpenCL implementation of a codelet. */ typedef void (*starpu_opencl_func_t)(void **, void *); /** Maxeler FPGA implementation of a codelet. */ typedef void (*starpu_max_fpga_func_t)(void **, void *); /** @ingroup API_Bubble Hierarchical Dags Bubble decision function */ typedef int (*starpu_bubble_func_t)(struct starpu_task *t, void *arg); /** @ingroup API_Bubble Hierarchical Dags Bubble DAG generation function */ typedef void (*starpu_bubble_gen_dag_func_t)(struct starpu_task *t, void *arg); /** @deprecated Setting the field starpu_codelet::cpu_func with this macro indicates the codelet will have several implementations. The use of this macro is deprecated. One should always only define the field starpu_codelet::cpu_funcs. */ #define STARPU_MULTIPLE_CPU_IMPLEMENTATIONS ((starpu_cpu_func_t)-1) /** @deprecated Setting the field starpu_codelet::cuda_func with this macro indicates the codelet will have several implementations. The use of this macro is deprecated. One should always only define the field starpu_codelet::cuda_funcs. */ #define STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS ((starpu_cuda_func_t)-1) /** @deprecated Setting the field starpu_codelet::hip_func with this macro indicates the codelet will have several implementations. The use of this macro is deprecated. One should always only define the field starpu_codelet::hip_funcs. */ #define STARPU_MULTIPLE_HIP_IMPLEMENTATIONS ((starpu_hip_func_t)-1) /** @deprecated Setting the field starpu_codelet::opencl_func with this macro indicates the codelet will have several implementations. The use of this macro is deprecated. One should always only define the field starpu_codelet::opencl_funcs. */ #define STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS ((starpu_opencl_func_t)-1) /** Value to set in starpu_codelet::nbuffers to specify that the codelet can accept a variable number of buffers, specified in starpu_task::nbuffers. */ #define STARPU_VARIABLE_NBUFFERS (-1) /** Value to be set in the starpu_codelet::nodes field to request StarPU to put the data in local memory of the worker running the task (this is the default behavior). */ #define STARPU_SPECIFIC_NODE_LOCAL (-1) /** Value to be set in the starpu_codelet::nodes field to request StarPU to put the data in CPU-accessible memory (and let StarPU choose the NUMA node). */ #define STARPU_SPECIFIC_NODE_CPU (-2) /** Value to be set in the starpu_codelet::nodes field to request StarPU to put the data in some slow memory. */ #define STARPU_SPECIFIC_NODE_SLOW (-3) /** Value to be set in the starpu_codelet::nodes field to request StarPU to put the data in some fast memory. */ #define STARPU_SPECIFIC_NODE_FAST (-4) /** Value to be set in the starpu_codelet::nodes field to let StarPU decide whether to put the data in the local memory of the worker running the task, or in CPU-accessible memory (and let StarPU choose the NUMA node). */ #define STARPU_SPECIFIC_NODE_LOCAL_OR_CPU (-5) /** Value to be set in the starpu_codelet::nodes field to make StarPU not actually put the data in any particular memory, i.e. the task will only get the sequential consistency dependencies, but not actually trigger any data transfer. */ #define STARPU_SPECIFIC_NODE_NONE (-6) struct starpu_transaction; struct _starpu_trs_epoch; typedef struct _starpu_trs_epoch *starpu_trs_epoch_t; struct starpu_task; /** The codelet structure describes a kernel that is possibly implemented on various targets. For compatibility, make sure to initialize the whole structure to zero, either by using explicit memset, or the function starpu_codelet_init(), or by letting the compiler implicitly do it in e.g. static storage case. Note that the codelet structure needs to exist until the task is terminated. If dynamic codelet allocation is desired, release should be done no sooner than the starpu_task::callback_func callback time. If the application wants to make the structure constant, it needs to be filled exactly as StarPU expects: - starpu_codelet::cpu_funcs, starpu_codelet::cuda_funcs, etc. must be used instead of the deprecated starpu_codelet::cpu_func, starpu_codelet::cuda_func, etc. - the starpu_codelet::where field must be set. and additionally, starpu_codelet::checked must be set to 1 to tell StarPU that the conditions above are properly met. Also, the \ref STARPU_CODELET_PROFILING environment variable must be set to 0. An example is provided in tests/main/const_codelet.c */ struct starpu_codelet { /** Optional field to indicate which types of processing units are able to execute the codelet. The different values ::STARPU_CPU, ::STARPU_CUDA, ::STARPU_HIP, ::STARPU_OPENCL can be combined to specify on which types of processing units the codelet can be executed. ::STARPU_CPU|::STARPU_CUDA for instance indicates that the codelet is implemented for both CPU cores and CUDA devices while ::STARPU_OPENCL indicates that it is only available on OpenCL devices. If the field is unset, its value will be automatically set based on the availability of the XXX_funcs fields defined below. It can also be set to ::STARPU_NOWHERE to specify that no computation has to be actually done. */ uint32_t where; /** Define a function which should return 1 if the worker designated by \p workerid can execute the \p nimpl -th implementation of \p task, 0 otherwise. */ int (*can_execute)(unsigned workerid, struct starpu_task *task, unsigned nimpl); /** Optional field to specify the type of the codelet. The default is ::STARPU_SEQ, i.e. usual sequential implementation. Other values (::STARPU_SPMD or ::STARPU_FORKJOIN) declare that a parallel implementation is also available. See \ref ParallelTasks for details. */ enum starpu_codelet_type type; /** Optional field. If a parallel implementation is available, this denotes the maximum combined worker size that StarPU will use to execute parallel tasks for this codelet. */ int max_parallelism; /** @deprecated Optional field which has been made deprecated. One should use instead the field starpu_codelet::cpu_funcs. */ starpu_cpu_func_t cpu_func STARPU_DEPRECATED; /** @deprecated Optional field which has been made deprecated. One should use instead the starpu_codelet::cuda_funcs field. */ starpu_cuda_func_t cuda_func STARPU_DEPRECATED; /** @deprecated Optional field which has been made deprecated. One should use instead the starpu_codelet::opencl_funcs field. */ starpu_opencl_func_t opencl_func STARPU_DEPRECATED; /** Optional array of function pointers to the CPU implementations of the codelet. The functions prototype must be: \code{.c} void cpu_func(void *buffers[], void *cl_arg) \endcode The first argument being the array of data managed by the data management library, and the second argument is a pointer to the argument passed from the field starpu_task::cl_arg. If the field starpu_codelet::where is set, then the field tarpu_codelet::cpu_funcs is ignored if ::STARPU_CPU does not appear in the field starpu_codelet::where, it must be non-NULL otherwise. */ starpu_cpu_func_t cpu_funcs[STARPU_MAXIMPLEMENTATIONS]; /** Optional array of function pointers to the CUDA implementations of the codelet. The functions must be host-functions written in the CUDA runtime API. Their prototype must be: \code{.c} void cuda_func(void *buffers[], void *cl_arg) \endcode If the field starpu_codelet::where is set, then the field starpu_codelet::cuda_funcs is ignored if ::STARPU_CUDA does not appear in the field starpu_codelet::where, it must be non-NULL otherwise. */ starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS]; /** Optional array of flags for CUDA execution. They specify some semantic details about CUDA kernel execution, such as asynchronous execution. */ char cuda_flags[STARPU_MAXIMPLEMENTATIONS]; /** Optional array of function pointers to the HIP implementations of the codelet. The functions must be host-functions written in the HIP runtime API. Their prototype must be: \code{.c} void hip_func(void *buffers[], void *cl_arg) \endcode If the field starpu_codelet::where is set, then the field starpu_codelet::hip_funcs is ignored if ::STARPU_HIP does not appear in the field starpu_codelet::where, it must be non-NULL otherwise. */ starpu_hip_func_t hip_funcs[STARPU_MAXIMPLEMENTATIONS]; /** Optional array of flags for HIP execution. They specify some semantic details about HIP kernel execution, such as asynchronous execution. */ char hip_flags[STARPU_MAXIMPLEMENTATIONS]; /** Optional array of function pointers to the OpenCL implementations of the codelet. The functions prototype must be: \code{.c} void opencl_func(void *buffers[], void *cl_arg) \endcode If the field starpu_codelet::where field is set, then the field starpu_codelet::opencl_funcs is ignored if ::STARPU_OPENCL does not appear in the field starpu_codelet::where, it must be non-NULL otherwise. */ starpu_opencl_func_t opencl_funcs[STARPU_MAXIMPLEMENTATIONS]; /** Optional array of flags for OpenCL execution. They specify some semantic details about OpenCL kernel execution, such as asynchronous execution. */ char opencl_flags[STARPU_MAXIMPLEMENTATIONS]; /** Optional array of function pointers to the Maxeler FPGA implementations of the codelet. The functions prototype must be: \code{.c} void fpga_func(void *buffers[], void *cl_arg) \endcode The first argument being the array of data managed by the data management library, and the second argument is a pointer to the argument passed from the field starpu_task::cl_arg. If the field starpu_codelet::where is set, then the field starpu_codelet::max_fpga_funcs is ignored if ::STARPU_MAX_FPGA does not appear in the field starpu_codelet::where, it must be non-NULL otherwise. */ starpu_max_fpga_func_t max_fpga_funcs[STARPU_MAXIMPLEMENTATIONS]; /** Optional array of strings which provide the name of the CPU functions referenced in the array starpu_codelet::cpu_funcs. This can be used when running on MPI MS devices for StarPU to simply look up the MPI MS function implementation through its name. */ const char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS]; /** Optional function to decide if the task is to be transformed into a bubble */ starpu_bubble_func_t bubble_func; /** Optional function to transform the task into a new graph */ starpu_bubble_gen_dag_func_t bubble_gen_dag_func; /** Specify the number of arguments taken by the codelet. These arguments are managed by the DSM and are accessed from the void *buffers[] array. The constant argument passed with the field starpu_task::cl_arg is not counted in this number. This value should not be above \ref STARPU_NMAXBUFS. It may be set to \ref STARPU_VARIABLE_NBUFFERS to specify that the number of buffers and their access modes will be set in starpu_task::nbuffers and starpu_task::modes or starpu_task::dyn_modes, which thus permits to define codelets with a varying number of data. */ int nbuffers; /** Is an array of ::starpu_data_access_mode. It describes the required access modes to the data needed by the codelet (e.g. ::STARPU_RW). The number of entries in this array must be specified in the field starpu_codelet::nbuffers, and should not exceed \ref STARPU_NMAXBUFS. If insufficient, this value can be set with the configure option \ref enable-maxbuffers "--enable-maxbuffers". */ enum starpu_data_access_mode modes[STARPU_NMAXBUFS]; /** Is an array of ::starpu_data_access_mode. It describes the required access modes to the data needed by the codelet (e.g. ::STARPU_RW). The number of entries in this array must be specified in the field starpu_codelet::nbuffers. This field should be used for codelets having a number of data greater than \ref STARPU_NMAXBUFS (see \ref SettingManyDataHandlesForATask). When defining a codelet, one should either define this field or the field starpu_codelet::modes defined above. */ enum starpu_data_access_mode *dyn_modes; /** Default value is 0. If this flag is set, StarPU will not systematically send all data to the memory node where the task will be executing, it will read the starpu_codelet::nodes or starpu_codelet::dyn_nodes array to determine, for each data, on which memory node to send it. */ unsigned specific_nodes; /** Optional field. When starpu_codelet::specific_nodes is 1, this specifies the memory nodes where each data should be sent to for task execution. This can be a specific memory node (>= 0), or any of ::STARPU_SPECIFIC_NODE_LOCAL, ::STARPU_SPECIFIC_NODE_CPU, ::STARPU_SPECIFIC_NODE_SLOW, :STARPU_SPECIFIC_NODE_FASTSTARPU_SPECIFIC_NODE_FAST, ::STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, ::STARPU_SPECIFIC_NODE_NONE. The number of entries in this array is starpu_codelet::nbuffers, and should not exceed \ref STARPU_NMAXBUFS. */ int nodes[STARPU_NMAXBUFS]; /** Optional field. When starpu_codelet::specific_nodes is 1, this specifies the memory nodes where each data should be sent to for task execution. The number of entries in this array is starpu_codelet::nbuffers. This field should be used for codelets having a number of data greater than \ref STARPU_NMAXBUFS (see \ref SettingManyDataHandlesForATask). When defining a codelet, one should either define this field or the field starpu_codelet::nodes defined above. */ int *dyn_nodes; /** Optional pointer to the task duration performance model associated to this codelet. This optional field is ignored when set to NULL or when its field starpu_perfmodel::symbol is not set. */ struct starpu_perfmodel *model; /** Optional pointer to the task energy consumption performance model associated to this codelet (in J). This optional field is ignored when set to NULL or when its field starpu_perfmodel::symbol is not set. In the case of parallel codelets, this has to account for all processing units involved in the parallel execution. */ struct starpu_perfmodel *energy_model; /** Optional array for statistics collected at runtime: this is filled by StarPU and should not be accessed directly, but for example by calling the function starpu_codelet_display_stats() (See starpu_codelet_display_stats() for details). */ unsigned long per_worker_stats[STARPU_NMAXWORKERS]; /** Optional name of the codelet. This can be useful for debugging purposes. */ const char *name; /** Optional color of the codelet. This can be useful for debugging purposes. Value 0 acts like if this field wasn't specified. Color representation is hex triplet (for example: 0xff0000 is red, 0x0000ff is blue, 0xffa500 is orange, ...). */ unsigned color; /** Optional field, the default value is NULL. This is a function pointer of prototype void (*f)(void *) which specifies a possible callback. If this pointer is non-NULL, the callback function is executed on the host after the execution of the task. If the task defines a callback, the codelet callback is not called, unless called within the task callback function. The callback is passed the value contained in the starpu_task::callback_arg field. No callback is executed if the field is set to NULL. */ void (*callback_func)(void *); /** Various flags for the codelet. */ int flags; struct starpu_perf_counter_sample *perf_counter_sample; struct starpu_perf_counter_sample_cl_values *perf_counter_values; /** Whether _starpu_codelet_check_deprecated_fields was already done or not. */ int checked; }; /** Codelet with empty function defined for all drivers */ extern struct starpu_codelet starpu_codelet_nop; /** Describe a data handle along with an access mode. */ struct starpu_data_descr { starpu_data_handle_t handle; /**< data */ enum starpu_data_access_mode mode; /**< access mode */ }; /** Describe a task that can be offloaded on the various processing units managed by StarPU. It instantiates a codelet. It can either be allocated dynamically with the function starpu_task_create(), or declared statically. In the latter case, the programmer has to zero the structure starpu_task and to fill the different fields properly. The indicated default values correspond to the configuration of a task allocated with starpu_task_create(). */ struct starpu_task { /** Optional name of the task. This can be useful for debugging purposes. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_NAME followed by the const char *. */ const char *name; /** Optional file name where the task was submitted. This can be useful for debugging purposes. */ const char *file; /** Optional line number where the task was submitted. This can be useful for debugging purposes. */ int line; /** Pointer to the corresponding structure starpu_codelet. This describes where the kernel should be executed, and supplies the appropriate implementations. When set to NULL, no code is executed during the tasks, such empty tasks can be useful for synchronization purposes. */ struct starpu_codelet *cl; /** When set, specify where the task is allowed to be executed. When unset, take the value of starpu_codelet::where. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_EXECUTE_WHERE followed by an unsigned long long. */ int32_t where; /** Specify the number of buffers. This is only used when starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS. With starpu_task_insert() and alike this is automatically computed when using ::STARPU_DATA_ARRAY and alike. */ int nbuffers; /** Keep dyn_handles, dyn_interfaces and dyn_modes before the equivalent static arrays, so we can detect dyn_handles being NULL while nbuffers being bigger that STARPU_NMAXBUFS (otherwise the overflow would put a non-NULL) */ /** Array of ::starpu_data_handle_t. Specify the handles to the different pieces of data accessed by the task. The number of entries in this array must be specified in the field starpu_codelet::nbuffers. This field should be used for tasks having a number of data greater than \ref STARPU_NMAXBUFS (see \ref SettingManyDataHandlesForATask). When defining a task, one should either define this field or the field starpu_task::handles defined below. With starpu_task_insert() and alike this is automatically filled when using ::STARPU_DATA_ARRAY and alike. */ starpu_data_handle_t *dyn_handles; /** Array of data pointers to the memory node where execution will happen, managed by the DSM. Is used when the field starpu_task::dyn_handles is defined. This is filled by StarPU. */ void **dyn_interfaces; /** Used only when starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS. Array of ::starpu_data_access_mode which describes the required access modes to the data needed by the codelet (e.g. ::STARPU_RW). The number of entries in this array must be specified in the field starpu_codelet::nbuffers. This field should be used for codelets having a number of data greater than \ref STARPU_NMAXBUFS (see \ref SettingManyDataHandlesForATask). When defining a codelet, one should either define this field or the field starpu_task::modes defined below. With starpu_task_insert() and alike this is automatically filled when using ::STARPU_DATA_MODE_ARRAY and alike. */ enum starpu_data_access_mode *dyn_modes; /** Array of ::starpu_data_handle_t. Specify the handles to the different pieces of data accessed by the task. The number of entries in this array must be specified in the field starpu_codelet::nbuffers, and should not exceed \ref STARPU_NMAXBUFS. If insufficient, this value can be set with the configure option \ref enable-maxbuffers "--enable-maxbuffers". With starpu_task_insert() and alike this is automatically filled when using ::STARPU_R and alike. */ starpu_data_handle_t handles[STARPU_NMAXBUFS]; /** Array of Data pointers to the memory node where execution will happen, managed by the DSM. This is filled by StarPU. */ void *interfaces[STARPU_NMAXBUFS]; /** Used only when starpu_codelet::nbuffers is \ref STARPU_VARIABLE_NBUFFERS. Array of ::starpu_data_access_mode which describes the required access modes to the data needed by the codelet (e.g. ::STARPU_RW). The number of entries in this array must be specified in the field starpu_task::nbuffers, and should not exceed \ref STARPU_NMAXBUFS. If insufficient, this value can be set with the configure option \ref enable-maxbuffers "--enable-maxbuffers". With starpu_task_insert() and alike this is automatically filled when using ::STARPU_DATA_MODE_ARRAY and alike. */ enum starpu_data_access_mode modes[STARPU_NMAXBUFS]; /** Optional pointer to an array of characters which allows to define the sequential consistency for each handle for the current task. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_HANDLES_SEQUENTIAL_CONSISTENCY followed by an unsigned char * */ unsigned char *handles_sequential_consistency; /** Optional pointer which is passed to the codelet through the second argument of the codelet implementation (e.g. starpu_codelet::cpu_func or starpu_codelet::cuda_func). The default value is NULL. Note that the pointer is passed unchanged to most drivers, so the application has to ensure the liveness of the pointed data, by using static memory or dynamic allocation (starpu_task::cl_arg_free can be used for convenience in that case). For the master/slave drivers however, the content pointed by cl_arg is copied to the slave, so the size of the data must be set in starpu_task::cl_arg_size. starpu_codelet_pack_args() and starpu_codelet_unpack_args() are helpers that can can be used to respectively pack and unpack data into and from it and update starpu_task::cl_arg_size accordingly. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_CL_ARGS followed by a void* and a size_t. */ void *cl_arg; /** Optional field. For some specific drivers, the pointer starpu_task::cl_arg cannot not be directly given to the driver function. A buffer of size starpu_task::cl_arg_size needs to be allocated on the driver. This buffer is then filled with the starpu_task::cl_arg_size bytes starting at address starpu_task::cl_arg. In this case, the argument given to the codelet is therefore not the starpu_task::cl_arg pointer, but the address of the buffer in local store (LS) instead. This field is ignored for CPU, CUDA and OpenCL codelets, where the starpu_task::cl_arg pointer is given as such. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_CL_ARGS followed by a void* and a size_t. */ size_t cl_arg_size; /** Optional pointer which points to the return value of submitted task. The default value is NULL. starpu_codelet_pack_arg() and starpu_codelet_unpack_arg() can be used to respectively pack and unpack the return value into and form it. starpu_task::cl_ret can be used for MPI support. The only requirement is that the size of the return value must be set in starpu_task::cl_ret_size . */ void *cl_ret; /** Optional field. The buffer of starpu_codelet_pack_arg() and starpu_codelet_unpack_arg() can be allocated with the starpu_task::cl_ret_size bytes starting at address starpu_task::cl_ret. starpu_task::cl_ret_size can be used for MPI support. */ size_t cl_ret_size; /** Optional field, the default value is NULL. This is a function pointer of prototype void (*f)(void *) which specifies a possible callback. If this pointer is non-NULL, the callback function is executed on the host after the execution of the task. Contrary to starpu_task::callback_func, it is called before releasing tasks which depend on this task, so those cannot be already executing. The callback is passed the value contained in the starpu_task::epilogue_callback_arg field. No callback is executed if the field is set to NULL. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_EPILOGUE_CALLBACK followed by the function pointer. */ void (*epilogue_callback_func)(void *); /** Optional field, the default value is NULL. This is the pointer passed to the epilogue callback function. This field is ignored if the field starpu_task::epilogue_callback_func is set to NULL. */ void *epilogue_callback_arg; /** Optional field, the default value is NULL. This is a function pointer of prototype void (*f)(void *) which specifies a possible callback. If this pointer is non-NULL, the callback function is executed on the host after the execution of the task. Contrary to starpu_task::epilogue_callback, it is called after releasing tasks which depend on this task, so those might already be executing. The callback is passed the value contained in the starpu_task::callback_arg field. No callback is executed if the field is set to NULL. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_CALLBACK followed by the function pointer, or thanks to ::STARPU_CALLBACK_WITH_ARG (or ::STARPU_CALLBACK_WITH_ARG_NFREE) followed by the function pointer and the argument. */ void (*callback_func)(void *); /** Optional field, the default value is NULL. This is the pointer passed to the callback function. This field is ignored if the field starpu_task::callback_func is set to NULL. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_CALLBACK_ARG followed by the argument pointer, or thanks to ::STARPU_CALLBACK_WITH_ARG or ::STARPU_CALLBACK_WITH_ARG_NFREE followed by the function pointer and the argument. */ void *callback_arg; /** Optional field, the default value is NULL. This is a function pointer of prototype void (*f)(void *) which specifies a possible callback. If this pointer is non-NULL, the callback function is executed on the host when the task becomes ready for execution, before getting scheduled. The callback is passed the value contained in the starpu_task::prologue_callback_arg field. No callback is executed if the field is set to NULL. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_PROLOGUE_CALLBACK followed by the function pointer. */ void (*prologue_callback_func)(void *); /** Optional field, the default value is NULL. This is the pointer passed to the prologue callback function. This field is ignored if the field starpu_task::prologue_callback_func is set to NULL. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_PROLOGUE_CALLBACK_ARG followed by the argument */ void *prologue_callback_arg; /** Optional field, the default value is NULL. This is a function pointer of prototype void (*f)(void*) which specifies a possible callback. If this pointer is non-NULL, the callback function is executed on the host when the task is pop-ed from the scheduler, just before getting executed. The callback is passed the value contained in the starpu_task::prologue_callback_pop_arg field. No callback is executed if the field is set to NULL. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_PROLOGUE_CALLBACK_POP followed by the function pointer. */ void (*prologue_callback_pop_func)(void *); /** Optional field, the default value is NULL. This is the pointer passed to the prologue_callback_pop function. This field is ignored if the field starpu_task::prologue_callback_pop_func is set to NULL. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_PROLOGUE_CALLBACK_POP_ARG followed by the argument. */ void *prologue_callback_pop_arg; /** Transaction to which the task belongs, if any */ struct starpu_transaction *transaction; /** Transaction epoch to which the task belongs, if any */ starpu_trs_epoch_t trs_epoch; /** Optional field. Contain the tag associated to the task if the field starpu_task::use_tag is set, ignored otherwise. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_TAG followed by a starpu_tag_t. */ starpu_tag_t tag_id; /** Optional field. In case starpu_task::cl_arg was allocated by the application through malloc(), setting starpu_task::cl_arg_free to 1 makes StarPU automatically call free(cl_arg) when destroying the task. This saves the user from defining a callback just for that. With starpu_task_insert() and alike this is set to 1 when using ::STARPU_CL_ARGS. */ unsigned cl_arg_free : 1; /** Optional field. In case starpu_task::cl_ret was allocated by the application through malloc(), setting starpu_task::cl_ret_free to 1 makes StarPU automatically call free(cl_ret) when destroying the task. */ unsigned cl_ret_free : 1; /** Optional field. In case starpu_task::callback_arg was allocated by the application through malloc(), setting starpu_task::callback_arg_free to 1 makes StarPU automatically call free(callback_arg) when destroying the task. With starpu_task_insert() and alike, this is set to 1 when using ::STARPU_CALLBACK_ARG or ::STARPU_CALLBACK_WITH_ARG, or set to 0 when using ::STARPU_CALLBACK_ARG_NFREE */ unsigned callback_arg_free : 1; /** Optional field. In case starpu_task::epilogue_callback_arg was allocated by the application through malloc(), setting starpu_task::epilogue_callback_arg_free to 1 makes StarPU automatically call free(epilogue_callback_arg) when destroying the task. */ unsigned epilogue_callback_arg_free : 1; /** Optional field. In case starpu_task::prologue_callback_arg was allocated by the application through malloc(), setting starpu_task::prologue_callback_arg_free to 1 makes StarPU automatically call free(prologue_callback_arg) when destroying the task. With starpu_task_insert() and alike this is set to 1 when using ::STARPU_PROLOGUE_CALLBACK_ARG, or set to 0 when using ::STARPU_PROLOGUE_CALLBACK_ARG_NFREE */ unsigned prologue_callback_arg_free : 1; /** Optional field. In case starpu_task::prologue_callback_pop_arg was allocated by the application through malloc(), setting starpu_task::prologue_callback_pop_arg_free to 1 makes StarPU automatically call free(prologue_callback_pop_arg) when destroying the task. With starpu_task_insert() and alike this is set to 1 when using ::STARPU_PROLOGUE_CALLBACK_POP_ARG, or set to 0 when using ::STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE */ unsigned prologue_callback_pop_arg_free : 1; /** Optional field, the default value is 0. If set, this flag indicates that the task should be associated with the tag contained in the starpu_task::tag_id field. Tag allow the application to synchronize with the task and to express task dependencies easily. With starpu_task_insert() and alike this is set to 1 when using ::STARPU_TAG. */ unsigned use_tag : 1; /** If this flag is set (which is the default), sequential consistency is enforced for the data parameters of this task for which sequential consistency is enabled. Clearing this flag permits to disable sequential consistency for this task, even if data have it enabled. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_SEQUENTIAL_CONSISTENCY followed by an unsigned. */ unsigned sequential_consistency : 1; /** If this flag is set, the function starpu_task_submit() is blocking and returns only when the task has been executed (or if no worker is able to process the task). Otherwise, starpu_task_submit() returns immediately. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_TASK_SYNCHRONOUS followed an int. */ unsigned synchronous : 1; /** Default value is 0. If this flag is set, StarPU will bypass the scheduler and directly affect this task to the worker specified by the field starpu_task::workerid. With starpu_task_insert() and alike this is set to 1 when using ::STARPU_EXECUTE_ON_WORKER. */ unsigned execute_on_a_specific_worker : 1; /** Optional field, default value is 1. If this flag is set, it is not possible to synchronize with the task by the means of starpu_task_wait() later on. Internal data structures are only guaranteed to be freed once starpu_task_wait() is called if the flag is not set. With starpu_task_insert() and alike this is set to 1. */ unsigned detach : 1; /** Optional value. Default value is 0 for starpu_task_init(), and 1 for starpu_task_create(). If this flag is set, the task structure will automatically be freed, either after the execution of the callback if the task is detached, or during starpu_task_wait() otherwise. If this flag is not set, dynamically allocated data structures will not be freed until starpu_task_destroy() is called explicitly. Setting this flag for a statically allocated task structure will result in undefined behaviour. The flag is set to 1 when the task is created by calling starpu_task_create(). Note that starpu_task_wait_for_all() will not free any task. With starpu_task_insert() and alike this is set to 1. Calling starpu_task_set_destroy() can be used to set this field to 1 after submission. Indeed this function will manage concurrency against the termination of the task. */ unsigned destroy : 1; /** Optional field. If this flag is set, the task will be re-submitted to StarPU once it has been executed. This flag must not be set if the flag starpu_task::destroy is set. This flag must be set before making another task depend on this one. With starpu_task_insert() and alike this is set to 0. */ unsigned regenerate : 1; /** do not allocate a submitorder id for this task With starpu_task_insert() and alike this can be specified thanks to ::STARPU_TASK_NO_SUBMITORDER followed by an unsigned. */ unsigned no_submitorder : 1; /** @private This is only used for tasks that use multiformat handle. This should only be used by StarPU. */ unsigned char mf_skip; /** Whether this task has failed and will thus have to be retried Set by StarPU. */ unsigned char failed; /** Whether the scheduler has pushed the task on some queue Set by StarPU. */ unsigned char scheduled; /** Whether the scheduler has prefetched the task's data Set by StarPU. */ unsigned char prefetched; /** Optional field. If the field starpu_task::execute_on_a_specific_worker is set, this field indicates the identifier of the worker that should process this task (as returned by starpu_worker_get_id()). This field is ignored if the field starpu_task::execute_on_a_specific_worker is set to 0. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_EXECUTE_ON_WORKER followed by an int. */ unsigned workerid; /** Optional field. If the field starpu_task::execute_on_a_specific_worker is set, this field indicates the per-worker consecutive order in which tasks should be executed on the worker. Tasks will be executed in consecutive starpu_task::workerorder values, thus ignoring the availability order or task priority. See \ref StaticScheduling for more details. This field is ignored if the field starpu_task::execute_on_a_specific_worker is set to 0. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_WORKER_ORDER followed by an unsigned. */ unsigned workerorder; /** Optional field. If the field starpu_task::workerids_len is different from 0, this field indicates an array of bits (stored as uint32_t values) which indicate the set of workers which are allowed to execute the task. starpu_task::workerid takes precedence over this. With starpu_task_insert() and alike, this can be specified along the field workerids_len thanks to ::STARPU_TASK_WORKERIDS followed by a number of workers and an array of bits which size is the number of workers. */ uint32_t *workerids; /** Optional field. This provides the number of uint32_t values in the starpu_task::workerids array. With starpu_task_insert() and alike, this can be specified along the field workerids thanks to ::STARPU_TASK_WORKERIDS followed by a number of workers and an array of bits which size is the number of workers. */ unsigned workerids_len; /** Optional field, the default value is ::STARPU_DEFAULT_PRIO. This field indicates a level of priority for the task. This is an integer value that must be set between the return values of the function starpu_sched_get_min_priority() for the least important tasks, and that of the function starpu_sched_get_max_priority() for the most important tasks (included). The ::STARPU_MIN_PRIO and ::STARPU_MAX_PRIO macros are provided for convenience and respectively return the value of starpu_sched_get_min_priority() and starpu_sched_get_max_priority(). Default priority is ::STARPU_DEFAULT_PRIO, which is always defined as 0 in order to allow static task initialization. Scheduling strategies that take priorities into account can use this parameter to take better scheduling decisions, but the scheduling policy may also ignore it. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_PRIORITY followed by an unsigned long long. */ int priority; /** Current state of the task. Call starpu_task_status_get_as_string() to get the status as a string. Set by StarPU. */ enum starpu_task_status status; /** @private This field is set when initializing a task. The function starpu_task_submit() will fail if the field does not have the correct value. This will hence avoid submitting tasks which have not been properly initialised. */ int magic; /** Allow to get the type of task, for filtering out tasks in profiling outputs, whether it is really internal to StarPU (::STARPU_TASK_TYPE_INTERNAL), a data acquisition synchronization task (::STARPU_TASK_TYPE_DATA_ACQUIRE), or a normal task (::STARPU_TASK_TYPE_NORMAL) Set by StarPU. */ unsigned type; /** color of the task to be used in dag.dot. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_TASK_COLOR followed by an int. */ unsigned color; /** Scheduling context. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_SCHED_CTX followed by an unsigned. */ unsigned sched_ctx; /** Help the hypervisor monitor the execution of this task. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_HYPERVISOR_TAG followed by an int. */ int hypervisor_tag; /** TODO: related with sched contexts and parallel tasks With starpu_task_insert() and alike this can be specified thanks to ::STARPU_POSSIBLY_PARALLEL followed by an unsigned. */ unsigned possibly_parallel; /** Optional field. The bundle that includes this task. If no bundle is used, this should be NULL. */ starpu_task_bundle_t bundle; /** Optional field. Profiling information for the task. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_TASK_PROFILING_INFO followed by a pointer to the appropriate struct. */ struct starpu_profiling_task_info *profiling_info; /** The application can set this to the number of floating points operations that the task will have to achieve. StarPU will measure the time that the task takes, and divide the two to get the GFlop/s achieved by the task. This will allow getting GFlops/s curves from the tool starpu_perfmodel_plot, and is useful for the hypervisor load balancing. With starpu_task_insert() and alike this can be specified thanks to ::STARPU_FLOPS followed by a double. */ double flops; /** Output field. Predicted duration of the task in microseconds. This field is only set if the scheduling strategy uses performance models. Set by StarPU. */ double predicted; /** Output field. Predicted data transfer duration for the task in microseconds. This field is only valid if the scheduling strategy uses performance models. Set by StarPU. */ double predicted_transfer; double predicted_start; /** @private A pointer to the previous task. This should only be used by StarPU schedulers. */ struct starpu_task *prev; /** @private A pointer to the next task. This should only be used by StarPU schedulers. */ struct starpu_task *next; /** @private This is private to StarPU, do not modify. */ void *starpu_private; #ifdef STARPU_OPENMP /** @private This is private to StarPU, do not modify. */ struct starpu_omp_task *omp_task; #else void *omp_task; #endif /** When using hierarchical dags, the job identifier of the bubble task which created the current task */ unsigned long bubble_parent; /** When using hierarchical dags, a pointer to the bubble decision function */ starpu_bubble_func_t bubble_func; /** When using hierarchical dags, a pointer to an argument to be given when calling the bubble decision function */ void *bubble_func_arg; /** When using hierarchical dags, a pointer to the bubble DAG generation function */ starpu_bubble_gen_dag_func_t bubble_gen_dag_func; /** When using hierarchical dags, a pointer to an argument to be given when calling the bubble DAG generation function */ void *bubble_gen_dag_func_arg; /** @private This is private to StarPU, do not modify. */ unsigned nb_termination_call_required; /** This field is managed by the scheduler, is it allowed to do whatever with it. Typically, some area would be allocated on push, and released on pop. With starpu_task_insert() and alike this is set when using ::STARPU_TASK_SCHED_DATA. */ void *sched_data; }; /** To be used in the starpu_task::type field, for normal application tasks. */ #define STARPU_TASK_TYPE_NORMAL 0 /** To be used in the starpu_task::type field, for StarPU-internal tasks. */ #define STARPU_TASK_TYPE_INTERNAL (1 << 0) /** To be used in the starpu_task::type field, for StarPU-internal data acquisition tasks. */ #define STARPU_TASK_TYPE_DATA_ACQUIRE (1 << 1) /* Note: remember to update starpu_task_init and starpu_task_ft_create_retry * as well */ /** Value to be used to initialize statically allocated tasks. This is equivalent to initializing a structure starpu_task with the function starpu_task_init(). */ #define STARPU_TASK_INITIALIZER \ { \ .cl = NULL, \ .where = -1, \ .cl_arg = NULL, \ .cl_arg_size = 0, \ .cl_ret = NULL, \ .cl_ret_size = 0, \ .callback_func = NULL, \ .callback_arg = NULL, \ .epilogue_callback_func = NULL, \ .epilogue_callback_arg = NULL, \ .priority = STARPU_DEFAULT_PRIO, \ .use_tag = 0, \ .sequential_consistency = 1, \ .synchronous = 0, \ .execute_on_a_specific_worker = 0, \ .workerorder = 0, \ .bundle = NULL, \ .detach = 1, \ .destroy = 0, \ .regenerate = 0, \ .status = STARPU_TASK_INIT, \ .profiling_info = NULL, \ .predicted = NAN, \ .predicted_transfer = NAN, \ .predicted_start = NAN, \ .starpu_private = NULL, \ .magic = 42, \ .type = 0, \ .color = 0, \ .sched_ctx = STARPU_NMAX_SCHED_CTXS, \ .hypervisor_tag = 0, \ .flops = 0.0, \ .scheduled = 0, \ .prefetched = 0, \ .dyn_handles = NULL, \ .dyn_interfaces = NULL, \ .dyn_modes = NULL, \ .name = NULL, \ .possibly_parallel = 0 \ } /** Return the number of buffers for \p task, i.e. starpu_codelet::nbuffers, or starpu_task::nbuffers if the former is \ref STARPU_VARIABLE_NBUFFERS. */ #define STARPU_TASK_GET_NBUFFERS(task) ((unsigned)((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS ? ((task)->nbuffers) : ((task)->cl->nbuffers))) /** Return the \p i -th data handle of \p task. If \p task is defined with a static or dynamic number of handles, will either return the \p i -th element of the field starpu_task::handles or the \p i -th element of the field starpu_task::dyn_handles (see \ref SettingManyDataHandlesForATask) */ #define STARPU_TASK_GET_HANDLE(task, i) (((task)->dyn_handles) ? (task)->dyn_handles[i] : (task)->handles[i]) /** Return all the data handles of \p task. If \p task is defined with a static or dynamic number of handles, will either return all the element of the field starpu_task::handles or all the elements of the field starpu_task::dyn_handles (see \ref SettingManyDataHandlesForATask) */ #define STARPU_TASK_GET_HANDLES(task) (((task)->dyn_handles) ? (task)->dyn_handles : (task)->handles) /** Set the \p i -th data handle of \p task with \p handle. If \p task is defined with a static or dynamic number of handles, will either set the \p i -th element of the field starpu_task::handles or the \p i -th element of the field starpu_task::dyn_handles (see \ref SettingManyDataHandlesForATask) */ #define STARPU_TASK_SET_HANDLE(task, handle, i) \ do { \ if ((task)->dyn_handles) \ (task)->dyn_handles[i] = handle; \ else \ (task)->handles[i] = handle; \ } \ while (0) /** Return the access mode of the \p i -th data handle of \p codelet. If \p codelet is defined with a static or dynamic number of handles, will either return the \p i -th element of the field starpu_codelet::modes or the \p i -th element of the field starpu_codelet::dyn_modes (see \ref SettingManyDataHandlesForATask) */ #define STARPU_CODELET_GET_MODE(codelet, i) \ (((codelet)->dyn_modes) ? (codelet)->dyn_modes[i] : (assert(i < STARPU_NMAXBUFS), (codelet)->modes[i])) /** Set the access mode of the \p i -th data handle of \p codelet. If \p codelet is defined with a static or dynamic number of handles, will either set the \p i -th element of the field starpu_codelet::modes or the \p i -th element of the field starpu_codelet::dyn_modes (see \ref SettingManyDataHandlesForATask) */ #define STARPU_CODELET_SET_MODE(codelet, mode, i) \ do { \ if ((codelet)->dyn_modes) \ (codelet)->dyn_modes[i] = mode; \ else \ (codelet)->modes[i] = mode; \ } \ while (0) /** Return the access mode of the \p i -th data handle of \p task. If \p task is defined with a static or dynamic number of handles, will either return the \p i -th element of the field starpu_task::modes or the \p i -th element of the field starpu_task::dyn_modes (see \ref SettingManyDataHandlesForATask) */ #define STARPU_TASK_GET_MODE(task, i) \ ((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (task)->dyn_modes ? (((task)->dyn_modes) ? (task)->dyn_modes[i] : (task)->modes[i]) : STARPU_CODELET_GET_MODE((task)->cl, i)) /** Set the access mode of the \p i -th data handle of \p task. If \p task is defined with a static or dynamic number of handles, will either set the \p i -th element of the field starpu_task::modes or the \p i -th element of the field starpu_task::dyn_modes (see \ref SettingManyDataHandlesForATask) */ #define STARPU_TASK_SET_MODE(task, mode, i) \ do { \ if ((task)->cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (task)->cl->nbuffers > STARPU_NMAXBUFS) \ if ((task)->dyn_modes) \ (task)->dyn_modes[i] = mode; \ else \ (task)->modes[i] = mode; \ else \ { \ enum starpu_data_access_mode cl_mode = STARPU_CODELET_GET_MODE((task)->cl, i); \ STARPU_ASSERT_MSG(cl_mode == mode, \ "Task <%s> can't set its %d-th buffer mode to %d as the codelet it derives from uses %d", \ (task)->cl->name, i, mode, cl_mode); \ } \ } \ while (0) /** Return the target node of the \p i -th data handle of \p codelet. If \p node is defined with a static or dynamic number of handles, will either return the \p i -th element of the field starpu_codelet::nodes or the \p i -th element of the field starpu_codelet::dyn_nodes (see \ref SettingManyDataHandlesForATask) */ #define STARPU_CODELET_GET_NODE(codelet, i) (((codelet)->dyn_nodes) ? (codelet)->dyn_nodes[i] : (codelet)->nodes[i]) /** Set the target node of the \p i -th data handle of \p codelet. If \p codelet is defined with a static or dynamic number of handles, will either set the \p i -th element of the field starpu_codelet::nodes or the \p i -th element of the field starpu_codelet::dyn_nodes (see \ref SettingManyDataHandlesForATask) */ #define STARPU_CODELET_SET_NODE(codelet, __node, i) \ do { \ if ((codelet)->dyn_nodes) \ (codelet)->dyn_nodes[i] = __node; \ else \ (codelet)->nodes[i] = __node; \ } \ while (0) /** Initialize \p task with default values. This function is implicitly called by starpu_task_create(). By default, tasks initialized with starpu_task_init() must be deinitialized explicitly with starpu_task_clean(). Tasks can also be initialized statically, using ::STARPU_TASK_INITIALIZER. See \ref PerformanceModelCalibration for more details. */ void starpu_task_init(struct starpu_task *task); /** Release all the structures automatically allocated to execute \p task, but not the task structure itself and values set by the user remain unchanged. It is thus useful for statically allocated tasks for instance. It is also useful when users want to execute the same operation several times with as least overhead as possible. It is called automatically by starpu_task_destroy(). It has to be called only after explicitly waiting for the task or after starpu_shutdown() (waiting for the callback is not enough, since StarPU still manipulates the task after calling the callback). See \ref PerformanceModelCalibration for more details. */ void starpu_task_clean(struct starpu_task *task); /** Allocate a task structure and initialize it with default values. Tasks allocated dynamically with starpu_task_create() are automatically freed when the task is terminated. This means that the task pointer can not be used any more once the task is submitted, since it can be executed at any time (unless dependencies make it wait) and thus freed at any time. If the field starpu_task::destroy is explicitly unset, the resources used by the task have to be freed by calling starpu_task_destroy(). See \ref SubmittingATask for more details. */ struct starpu_task *starpu_task_create(void) STARPU_ATTRIBUTE_MALLOC; /** Allocate a task structure that does nothing but accesses data \p handle with mode \p mode. This allows to synchronize with the task graph, according to the sequential consistency, against tasks submitted before or after submitting this task. One can then use starpu_task_declare_deps_array() or starpu_task_end_dep_add() / starpu_task_end_dep_release() to add dependencies against this task before submitting it. See \ref SynchronizationTasks for more details. */ struct starpu_task *starpu_task_create_sync(starpu_data_handle_t handle, enum starpu_data_access_mode mode) STARPU_ATTRIBUTE_MALLOC; /** Free the resource allocated during starpu_task_create() and associated with \p task. This function is called automatically after the execution of a task when the field starpu_task::destroy is set, which is the default for tasks created by starpu_task_create(). Calling this function on a statically allocated task results in an undefined behaviour. See \ref Per-taskFeedback and \ref PerformanceModelExample for more details. */ void starpu_task_destroy(struct starpu_task *task); /** Tell StarPU to free the resources associated with \p task when the task is over. This is equivalent to having set task->destroy = 1 before submission, the difference is that this can be called after submission and properly deals with concurrency with the task execution. See \ref WaitingForTasks for more details. */ void starpu_task_set_destroy(struct starpu_task *task); /** Submit \p task to StarPU. Calling this function does not mean that the task will be executed immediately as there can be data or task (tag) dependencies that are not fulfilled yet: StarPU will take care of scheduling this task with respect to such dependencies. This function returns immediately if the field starpu_task::synchronous is set to 0, and block until the termination of the task otherwise. It is also possible to synchronize the application with asynchronous tasks by the means of tags, using the function starpu_tag_wait() function for instance. In case of success, this function returns 0, a return value of -ENODEV means that there is no worker able to process this task (e.g. there is no GPU available and this task is only implemented for CUDA devices). starpu_task_submit() can be called from anywhere, including codelet functions and callbacks, provided that the field starpu_task::synchronous is set to 0. See \ref SubmittingATask for more details. */ int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; #ifdef STARPU_USE_FXT static inline int starpu_task_submit_line(struct starpu_task *task, const char *file, int line) { task->file = file; task->line = line; return starpu_task_submit(task); } #define starpu_task_submit(task) starpu_task_submit_line((task), __FILE__, __LINE__) #endif /** Submit \p task to StarPU with dependency bypass. This can only be called on behalf of another task which has already taken the proper dependencies, e.g. this task is just an attempt of doing the actual computation of that task. See \ref TaskRetry for more details. */ int starpu_task_submit_nodeps(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; /** Submit \p task to the context \p sched_ctx_id. By default, starpu_task_submit() submits the task to a global context that is created automatically by StarPU. See \ref SubmittingTasksToAContext for more details. */ int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id); /** Return 1 if \p task is terminated. See \ref WaitingForTasks for more details. */ int starpu_task_finished(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; /** Block until \p task has been executed. It is not possible to synchronize with a task more than once. It is not possible to wait for synchronous or detached tasks. Upon successful completion, this function returns 0. Otherwise, -EINVAL indicates that the specified task was either synchronous or detached. See \ref SubmittingATask for more details. */ int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; /** Allow to wait for an array of tasks. Upon successful completion, this function returns 0. Otherwise, -EINVAL indicates that one of the tasks was either synchronous or detached. See \ref WaitingForTasks for more details. */ int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) STARPU_WARN_UNUSED_RESULT; /** Block until all the tasks that were submitted (to the current context or the global one if there is no current context) are terminated. It does not destroy these tasks. See \ref SubmittingATask for more details. */ int starpu_task_wait_for_all(void); /** Block until there are \p n submitted tasks left (to the current context or the global one if there is no current context) to be executed. It does not destroy these tasks. See \ref HowtoReuseMemory for more details. */ int starpu_task_wait_for_n_submitted(unsigned n); /** Wait until all the tasks that were already submitted to the context \p sched_ctx_id have been terminated. See \ref WaitingForTasks for more details. */ int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id); /** Wait until there are \p n tasks submitted left to be executed that were already submitted to the context \p sched_ctx_id. See \ref WaitingForTasks for more details. */ int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n); /** Wait until there is no more ready task. See \ref WaitingForTasks for more details. */ int starpu_task_wait_for_no_ready(void); /** Return the number of submitted tasks which are ready for execution are already executing. It thus does not include tasks waiting for dependencies. See \ref WaitingForTasks for more details. */ int starpu_task_nready(void); /** Return the number of submitted tasks which have not completed yet. See \ref WaitingForTasks for more details. */ int starpu_task_nsubmitted(void); /** Set the iteration number for all the tasks to be submitted after this call. This is typically called at the beginning of a task submission loop. This number will then show up in tracing tools. A corresponding starpu_iteration_pop() call must be made to match the call to starpu_iteration_push(), at the end of the same task submission loop, typically. Nested calls to starpu_iteration_push() and starpu_iteration_pop() are allowed, to describe a loop nest for instance, provided that they match properly. See \ref CreatingAGanttDiagram for more details. */ void starpu_iteration_push(unsigned long iteration); /** Drop the iteration number for submitted tasks. This must match a previous call to starpu_iteration_push(), and is typically called at the end of a task submission loop. See \ref CreatingAGanttDiagram for more details. */ void starpu_iteration_pop(void); /** See \ref GraphScheduling for more details. */ void starpu_do_schedule(void); /** Initialize \p cl with default values. Codelets should preferably be initialized statically as shown in \ref DefiningACodelet. However such a initialisation is not always possible, e.g. when using C++. See \ref DefiningACodelet for more details. */ void starpu_codelet_init(struct starpu_codelet *cl); /** Output on \c stderr some statistics on the codelet \p cl. See \ref Per-codeletFeedback for more details. */ void starpu_codelet_display_stats(struct starpu_codelet *cl); /** Return the task currently executed by the worker, or NULL if it is called either from a thread that is not a task or simply because there is no task being executed at the moment. See \ref Per-taskFeedback for more details. */ struct starpu_task *starpu_task_get_current(void); /** Return the memory node number of parameter \p i of the task currently executed, or -1 if it is called either from a thread that is not a task or simply because there is no task being executed at the moment. Usually, the returned memory node number is simply the memory node for the current worker. That may however be different when using e.g. starpu_codelet::specific_nodes. See \ref SpecifyingATargetNode for more details. */ int starpu_task_get_current_data_node(unsigned i); /** Return the name of the performance model of \p task. See \ref PerformanceModelExample for more details. */ const char *starpu_task_get_model_name(struct starpu_task *task); /** Return the name of \p task, i.e. either its starpu_task::name field, or the name of the corresponding performance model. See \ref TraceTaskDetails for more details. */ const char *starpu_task_get_name(struct starpu_task *task); /** Allocate a task structure which is the exact duplicate of \p task. See \ref OtherTaskUtility for more details. */ struct starpu_task *starpu_task_dup(struct starpu_task *task); /** This function should be called by schedulers to specify the codelet implementation to be executed when executing \p task. See \ref SchedulingHelpers for more details. */ void starpu_task_set_implementation(struct starpu_task *task, unsigned impl); /** Return the codelet implementation to be executed when executing \p task. See \ref SchedulingHelpers for more details. */ unsigned starpu_task_get_implementation(struct starpu_task *task); /** Create and submit an empty task that unlocks a tag once all its dependencies are fulfilled. See \ref SynchronizationTasks for more details. */ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg); /** Create and submit an empty task with the given callback. See \ref SynchronizationTasks for more details. */ void starpu_create_callback_task(void (*callback)(void *), void *callback_arg); /** Function to be used as a prologue callback to enable fault tolerance for the task. This prologue will create a try-task, i.e a duplicate of the task, which will to the actual computation. The prologue argument can be set to a check_ft function that will be called on termination of the duplicate, which can check the result of the task, and either confirm success, or resubmit another attempt. If it is not set, the default implementation is to just resubmit a new try-task. See \ref TaskRetry for more details. */ void starpu_task_ft_prologue(void *check_ft); /** Create a try-task for a \p meta_task, given a \p template_task task template. The meta task can be passed as template on the first call, but since it is mangled by starpu_task_ft_create_retry(), further calls (typically made by the check_ft callback) need to be passed the previous try-task as template task. \p check_ft is similar to the prologue argument of starpu_task_ft_prologue(), and is typically set to the very function calling starpu_task_ft_create_retry(). The try-task is returned, and can be modified (e.g. to change scheduling parameters) before being submitted with starpu_task_submit_nodeps(). See \ref TaskRetry for more details. */ struct starpu_task *starpu_task_ft_create_retry(const struct starpu_task *meta_task, const struct starpu_task *template_task, void (*check_ft)(void *)); /** Record that this task failed, and should thus be retried. This is usually called from the task codelet function itself, after checking the result and noticing that the computation went wrong, and thus the task should be retried. The performance of this task execution will not be recorded for performance models. This can only be called for a task whose data access modes are either ::STARPU_R and ::STARPU_W. */ void starpu_task_ft_failed(struct starpu_task *task); /** Notify that the try-task was successful and thus the meta-task was successful. See \ref TaskRetry for more details. */ void starpu_task_ft_success(struct starpu_task *meta_task); /** Set the function to call when the watchdog detects that StarPU has not finished any task for \ref STARPU_WATCHDOG_TIMEOUT seconds. See \ref WatchdogSupport for more details. */ void starpu_task_watchdog_set_hook(void (*hook)(void *), void *hook_arg); /** Return the given status as a string */ char *starpu_task_status_get_as_string(enum starpu_task_status status); /** Specify a minimum number of submitted tasks allowed at a given time, this allows to control the task submission flow. The value can also be specified with the environment variable \ref STARPU_LIMIT_MIN_SUBMITTED_TASKS. See \ref HowToReduceTheMemoryFootprintOfInternalDataStructures for more details. */ void starpu_set_limit_min_submitted_tasks(int limit_min); /** Specify a maximum number of submitted tasks allowed at a given time, this allows to control the task submission flow. The value can also be specified with the environment variable \ref STARPU_LIMIT_MAX_SUBMITTED_TASKS. See \ref HowToReduceTheMemoryFootprintOfInternalDataStructures for more details. */ void starpu_set_limit_max_submitted_tasks(int limit_min); /** @} */ /** @defgroup API_Transactions Transactions @{ */ /** Function to open a new transaction object and start the first transaction epoch. @return A pointer to an initializes struct starpu_transaction or \c NULL if submitting the transaction begin task failed with \c ENODEV. See \ref TransactionsCreation for more details. */ struct starpu_transaction *starpu_transaction_open(int (*do_start_func)(void *buffer, void *arg), void *do_start_arg); /** Function to mark the end of the current transaction epoch and start a new epoch. See \ref TransactionsEpochNext for more details. */ void starpu_transaction_next_epoch(struct starpu_transaction *p_trs, void *do_start_arg); /** Function to mark the end of the last transaction epoch and free the transaction object. See \ref TransactionsClosing for more details. */ void starpu_transaction_close(struct starpu_transaction *p_trs); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_TASK_H__ */ starpu-1.4.9+dfsg/include/starpu_task_bundle.h000066400000000000000000000065001507764646700214760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_TASK_BUNDLE_H__ #define __STARPU_TASK_BUNDLE_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Task_Bundles Task Bundles @{ */ struct starpu_task; struct starpu_perfmodel_arch; /** Opaque structure describing a list of tasks that should be scheduled on the same worker whenever it’s possible. It must be considered as a hint given to the scheduler as there is no guarantee that they will be executed on the same worker. */ typedef struct _starpu_task_bundle *starpu_task_bundle_t; /** Factory function creating and initializing \p bundle, when the call returns, memory needed is allocated and \p bundle is ready to use. */ void starpu_task_bundle_create(starpu_task_bundle_t *bundle); /** Insert \p task in \p bundle. Until \p task is removed from \p bundle its expected length and data transfer time will be considered along those of the other tasks of bundle. This function must not be called if \p bundle is already closed and/or \p task is already submitted. On success, it returns 0. There are two cases of error : if \p bundle is already closed it returns -EPERM, if \p task was already submitted it returns -EINVAL. */ int starpu_task_bundle_insert(starpu_task_bundle_t bundle, struct starpu_task *task); /** Remove \p task from \p bundle. Of course \p task must have been previously inserted in \p bundle. This function must not be called if \p bundle is already closed and/or \p task is already submitted. Doing so would result in undefined behaviour. On success, it returns 0. If \p bundle is already closed it returns -ENOENT. */ int starpu_task_bundle_remove(starpu_task_bundle_t bundle, struct starpu_task *task); /** Inform the runtime that the user will not modify \p bundle anymore, it means no more inserting or removing task. Thus the runtime can destroy it when possible. */ void starpu_task_bundle_close(starpu_task_bundle_t bundle); /** Return the expected duration of \p bundle in micro-seconds. */ double starpu_task_bundle_expected_length(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch *arch, unsigned nimpl); /** Return the time (in micro-seconds) expected to transfer all data used within \p bundle. */ double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundle, unsigned memory_node); /** Return the expected energy consumption of \p bundle in J. */ double starpu_task_bundle_expected_energy(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch *arch, unsigned nimpl); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_TASK_BUNDLE_H__ */ starpu-1.4.9+dfsg/include/starpu_task_dep.h000066400000000000000000000247261507764646700210070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_TASK_DEP_H__ #define __STARPU_TASK_DEP_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Explicit_Dependencies Explicit Dependencies @{ */ /** Declare task dependencies between a \p task and an array of tasks of length \p ndeps. This function must be called prior to the submission of the task, but it may called after the submission or the execution of the tasks in the array, provided the tasks are still valid (i.e. they were not automatically destroyed). Calling this function on a task that was already submitted or with an entry of \p task_array that is no longer a valid task results in an undefined behaviour. If \p ndeps is 0, no dependency is added. It is possible to call starpu_task_declare_deps_array() several times on the same task, in this case, the dependencies are added. It is possible to have redundancy in the task dependencies. See \ref TasksAndTagsDependencies for more details. */ void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); /** Declare task dependencies between a \p task and an series of \p ndeps tasks, similarly to starpu_task_declare_deps_array(), but the tasks are passed after \p ndeps, which indicates how many tasks \p task shall be made to depend on. If \p ndeps is 0, no dependency is added. See \ref TasksAndTagsDependencies for more details. */ void starpu_task_declare_deps(struct starpu_task *task, unsigned ndeps, ...); /** Declare task end dependencies between a \p task and an array of tasks of length \p ndeps. \p task will appear as terminated not only when \p task is termination, but also when the tasks of \p task_array have terminated. This function must be called prior to the termination of the task, but it may called after the submission or the execution of the tasks in the array, provided the tasks are still valid (i.e. they were not automatically destroyed). Calling this function on a task that was already terminated or with an entry of \p task_array that is no longer a valid task results in an undefined behaviour. If \p ndeps is 0, no dependency is added. It is possible to call starpu_task_declare_end_deps_array() several times on the same task, in this case, the dependencies are added. It is currently not implemented to have redundancy in the task dependencies. See \ref TasksAndTagsDependencies for more details. */ void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); /** Declare task end dependencies between a \p task and an series of \p ndeps tasks, similarly to starpu_task_declare_end_deps_array(), but the tasks are passed after \p ndeps, which indicates how many tasks \p task 's termination shall be made to depend on. If \p ndeps is 0, no dependency is added. See \ref TasksAndTagsDependencies for more details. */ void starpu_task_declare_end_deps(struct starpu_task *task, unsigned ndeps, ...); /** Fill \p task_array with the list of tasks which are direct children of \p task. \p ndeps is the size of \p task_array. This function returns the number of direct children. \p task_array can be set to NULL if \p ndeps is 0, which allows to compute the number of children before allocating an array to store them. This function can only be called if \p task has not completed yet, otherwise the results are undefined. The result may also be outdated if some additional dependency has been added in the meanwhile. See \ref GettingTaskChildren for more details. */ int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); /** Behave like starpu_task_get_task_succs(), except that it only reports tasks which will go through the scheduler, thus avoiding tasks with not codelet, or with explicit placement. See \ref GettingTaskChildren for more details. */ int starpu_task_get_task_scheduled_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); /** Add \p nb_deps end dependencies to the task \p t. This means the task will not terminate until the required number of calls to the function starpu_task_end_dep_release() has been made. See \ref TasksAndTagsDependencies for more details. */ void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps); /** Unlock 1 end dependency to the task \p t. This function must be called after starpu_task_end_dep_add(). See \ref TasksAndTagsDependencies for more details. */ void starpu_task_end_dep_release(struct starpu_task *t); /** Define a task logical identifier. It is possible to associate a task with a unique tag chosen by the application, and to express dependencies between tasks by the means of those tags. To do so, fill the field starpu_task::tag_id with a tag number (can be arbitrary) and set the field starpu_task::use_tag to 1. If starpu_tag_declare_deps() is called with this tag number, the task will not be started until the tasks which holds the declared dependency tags are completed. */ typedef uint64_t starpu_tag_t; /** Specify the dependencies of the task identified by tag \p id. The first argument specifies the tag which is configured, the second argument gives the number of tag(s) on which \p id depends. The following arguments are the tags which have to be terminated to unlock the task. This function must be called before the associated task is submitted to StarPU with starpu_task_submit(). WARNING! Use with caution. Because of the variable arity of starpu_tag_declare_deps(), note that the last arguments must be of type ::starpu_tag_t : constant values typically need to be explicitly casted. Otherwise, due to integer sizes and argument passing on the stack, the C compiler might consider the tag 0x200000003 instead of 0x2 and 0x3 when calling starpu_tag_declare_deps(0x1, 2, 0x2, 0x3). Using the starpu_tag_declare_deps_array() function avoids this hazard. \code{.c} // Tag 0x1 depends on tags 0x32 and 0x52 starpu_tag_declare_deps((starpu_tag_t)0x1, 2, (starpu_tag_t)0x32, (starpu_tag_t)0x52); \endcode See \ref TasksAndTagsDependencies for more details. */ void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...); /** Similar to starpu_tag_declare_deps(), except that its does not take a variable number of arguments but an \p array of tags of size \p ndeps. \code{.c} // Tag 0x1 depends on tags 0x32 and 0x52 starpu_tag_t tag_array[2] = {0x32, 0x52}; starpu_tag_declare_deps_array((starpu_tag_t)0x1, 2, tag_array); \endcode See \ref TasksAndTagsDependencies for more details. */ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array); /** Block until the task associated to tag \p id has been executed. This is a blocking call which must therefore not be called within tasks or callbacks, but only from the application directly. It is possible to synchronize with the same tag multiple times, as long as the starpu_tag_remove() function is not called. Note that it is still possible to synchronize with a tag associated to a task for which the structure starpu_task was freed (e.g. if the field starpu_task::destroy was enabled). See \ref WaitingForTasks for more details. */ int starpu_tag_wait(starpu_tag_t id); /** Similar to starpu_tag_wait() except that it blocks until all the \p ntags tags contained in the array \p id are terminated. See \ref WaitingForTasks for more details. */ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id); /** Clear the already notified status of a tag which is not associated with a task. Before that, calling starpu_tag_notify_from_apps() again will not notify the successors. After that, the next call to starpu_tag_notify_from_apps() will notify the successors. See \ref TasksAndTagsDependencies for more details. */ void starpu_tag_restart(starpu_tag_t id); /** Release the resources associated to tag \p id. It can be called once the corresponding task has been executed and when there is no other tag that depend on this tag anymore. See \ref TasksAndTagsDependencies for more details. */ void starpu_tag_remove(starpu_tag_t id); /** Explicitly unlock tag \p id. It may be useful in the case of applications which execute part of their computation outside StarPU tasks (e.g. third-party libraries). It is also provided as a convenient tool for the programmer, for instance to entirely construct the task DAG before actually giving StarPU the opportunity to execute the tasks. When called several times on the same tag, notification will be done only on first call, thus implementing "OR" dependencies, until the tag is restarted using starpu_tag_restart(). See \ref TasksAndTagsDependencies for more details. */ void starpu_tag_notify_from_apps(starpu_tag_t id); /** Atomically call starpu_tag_notify_from_apps() and starpu_tag_restart() on tag \p id. This is useful with cyclic graphs, when we want to safely trigger its startup. See \ref TasksAndTagsDependencies for more details. */ void starpu_tag_notify_restart_from_apps(starpu_tag_t id); /** Return the task associated to the tag \p id. See \ref TasksAndTagsDependencies for more details. */ struct starpu_task *starpu_tag_get_task(starpu_tag_t id); /** Calls starpu_tag_remove() for all tags. The current implementation requires that no starpu_tag_wait_array() is currently pending. See \ref TasksAndTagsDependencies for more details. */ void starpu_tag_clear(void); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_TASK_DEP_H__ */ starpu-1.4.9+dfsg/include/starpu_task_list.h000066400000000000000000000102231507764646700211750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_TASK_LIST_H__ #define __STARPU_TASK_LIST_H__ #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Task_Lists Task Lists @{ */ /* NOTE: this needs to have at least the same size as lists in src/common/list.h */ #ifdef BUILDING_STARPU #define STARPU_TASK_LIST_INLINE extern inline #else /** Store a double-chained list of tasks */ struct starpu_task_list { struct starpu_task *head; /**< head of the list */ struct starpu_task *tail; /**< tail of the list */ }; #define STARPU_TASK_LIST_INLINE extern #endif /** Initialize a list structure. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE void starpu_task_list_init(struct starpu_task_list *list); /** Push \p task at the front of \p list. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE void starpu_task_list_push_front(struct starpu_task_list *list, struct starpu_task *task); /** Push \p task at the back of \p list. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE void starpu_task_list_push_back(struct starpu_task_list *list, struct starpu_task *task); /** Get the front of \p list (without removing it). See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE struct starpu_task *starpu_task_list_front(const struct starpu_task_list *list); /** Get the back of \p list (without removing it). See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE struct starpu_task *starpu_task_list_back(const struct starpu_task_list *list); /** Test if \p list is empty. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE int starpu_task_list_empty(const struct starpu_task_list *list); /** Remove \p task from \p list. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE void starpu_task_list_erase(struct starpu_task_list *list, struct starpu_task *task); /** Remove the element at the front of \p list. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE struct starpu_task *starpu_task_list_pop_front(struct starpu_task_list *list); /** Remove the element at the back of \p list. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE struct starpu_task *starpu_task_list_pop_back(struct starpu_task_list *list); /** Get the first task of \p list. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE struct starpu_task *starpu_task_list_begin(const struct starpu_task_list *list); /** Get the end of \p list. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE struct starpu_task *starpu_task_list_end(const struct starpu_task_list *list STARPU_ATTRIBUTE_UNUSED); /** Get the next task of \p list. This is not erase-safe. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE struct starpu_task *starpu_task_list_next(const struct starpu_task *task); /** Test whether the given task \p look is contained in the \p list. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE int starpu_task_list_ismember(const struct starpu_task_list *list, const struct starpu_task *look); /** Move list from one head \p lsrc to another \p ldst. See \ref SchedulingHelpers for more details. */ STARPU_TASK_LIST_INLINE void starpu_task_list_move(struct starpu_task_list *ldst, struct starpu_task_list *lsrc); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_TASK_LIST_H__ */ starpu-1.4.9+dfsg/include/starpu_task_util.h000066400000000000000000000566011507764646700212110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifndef __STARPU_TASK_UTIL_H__ #define __STARPU_TASK_UTIL_H__ #include #include #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Insert_Task Task Insert Utility @{ */ /* NOTE: when adding a value here, please make sure to update both * src/util/starpu_task_insert_utils.c (in two places) and * mpi/src/starpu_mpi_task_insert.c and mpi/src/starpu_mpi_task_insert_fortran.c */ #define STARPU_MODE_SHIFT 17 /** Used when calling starpu_task_insert(), must be followed by a pointer to a constant value and the size of the constant */ #define STARPU_VALUE (1 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a pointer to a callback function */ #define STARPU_CALLBACK (2 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by two pointers: one to a callback function, and the other to be given as an argument to the callback function; this is equivalent to using both ::STARPU_CALLBACK and ::STARPU_CALLBACK_ARG. */ #define STARPU_CALLBACK_WITH_ARG (3 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a pointer to be given as an argument to the callback function */ #define STARPU_CALLBACK_ARG (4 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a integer defining a priority level */ #define STARPU_PRIORITY (5 << STARPU_MODE_SHIFT) /** \ingroup API_MPI_Support Used when calling starpu_mpi_task_insert(), must be followed by a integer value which specified the node on which to execute the codelet. */ #define STARPU_EXECUTE_ON_NODE (6 << STARPU_MODE_SHIFT) /** \ingroup API_MPI_Support Used when calling starpu_mpi_task_insert(), must be followed by a data handle to specify that the node owning the given data will execute the codelet. */ #define STARPU_EXECUTE_ON_DATA (7 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an array of handles and the number of elements in the array (as int). This is equivalent to passing the handles as separate parameters with ::STARPU_R, ::STARPU_W or ::STARPU_RW. */ #define STARPU_DATA_ARRAY (8 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an array of struct starpu_data_descr and the number of elements in the array (as int). This is equivalent to passing the handles with the corresponding modes. */ #define STARPU_DATA_MODE_ARRAY (9 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a tag. */ #define STARPU_TAG (10 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a tag. */ #define STARPU_HYPERVISOR_TAG (11 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an amount of floating point operations, as a double. Users MUST explicitly cast into double, otherwise parameter passing will not work. */ #define STARPU_FLOPS (12 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by the id of the scheduling context to which to submit the task to. */ #define STARPU_SCHED_CTX (13 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a pointer to a prologue callback function */ #define STARPU_PROLOGUE_CALLBACK (14 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a pointer to be given as an argument to the prologue callback function */ #define STARPU_PROLOGUE_CALLBACK_ARG (15 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a pointer to a prologue callback pop function */ #define STARPU_PROLOGUE_CALLBACK_POP (16 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a pointer to be given as an argument to the prologue callback pop function */ #define STARPU_PROLOGUE_CALLBACK_POP_ARG (17 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an integer value specifying the worker on which to execute the task (as specified by starpu_task::execute_on_a_specific_worker) */ #define STARPU_EXECUTE_ON_WORKER (18 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an unsigned long long value specifying the mask of worker on which to execute the task (as specified by starpu_task::where) */ #define STARPU_EXECUTE_WHERE (19 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a tag stored in starpu_task::tag_id. Leave starpu_task::use_tag as 0. */ #define STARPU_TAG_ONLY (20 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an unsigned stored in starpu_task::possibly_parallel. */ #define STARPU_POSSIBLY_PARALLEL (21 << STARPU_MODE_SHIFT) /** used when calling starpu_task_insert(), must be followed by an integer value specifying the worker order in which to execute the tasks (as specified by starpu_task::workerorder) */ #define STARPU_WORKER_ORDER (22 << STARPU_MODE_SHIFT) /** \ingroup API_MPI_Support Used when calling starpu_mpi_task_insert(), must be followed by a identifier to a node selection policy. This is needed when several nodes own data in ::STARPU_W mode. */ #define STARPU_NODE_SELECTION_POLICY (23 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a char * stored in starpu_task::name. */ #define STARPU_NAME (24 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a memory buffer containing the arguments to be given to the task, and by the size of the arguments. The memory buffer should be the result of a previous call to starpu_codelet_pack_args(), and will be freed (i.e. starpu_task::cl_arg_free will be set to 1) */ #define STARPU_CL_ARGS (25 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), similarly to ::STARPU_CL_ARGS, must be followed by a memory buffer containing the arguments to be given to the task, and by the size of the arguments. The memory buffer should be the result of a previous call to starpu_codelet_pack_args(), and will NOT be freed (i.e. starpu_task::cl_arg_free will be set to 0) */ #define STARPU_CL_ARGS_NFREE (26 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a number of tasks as int, and an array containing these tasks. The function starpu_task_declare_deps_array() will be called with the given values. */ #define STARPU_TASK_DEPS_ARRAY (27 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an integer representing a color */ #define STARPU_TASK_COLOR (28 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an array of characters representing the sequential consistency for each buffer of the task. */ #define STARPU_HANDLES_SEQUENTIAL_CONSISTENCY (29 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an integer stating if the task is synchronous or not */ #define STARPU_TASK_SYNCHRONOUS (30 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a number of tasks as int, and an array containing these tasks. The function starpu_task_declare_end_deps_array() will be called with the given values. */ #define STARPU_TASK_END_DEPS_ARRAY (31 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an integer which will be given to starpu_task_end_dep_add() */ #define STARPU_TASK_END_DEP (32 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an unsigned being a number of workers, and an array of bits which size is the number of workers, the array indicates the set of workers which are allowed to execute the task. */ #define STARPU_TASK_WORKERIDS (33 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an unsigned which sets the sequential consistency for the data parameters of the task. */ #define STARPU_SEQUENTIAL_CONSISTENCY (34 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert() and alike, must be followed by a pointer to a struct starpu_profiling_task_info */ #define STARPU_TASK_PROFILING_INFO (35 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert() and alike, must be followed by an unsigned specifying not to allocate a submitorder id for the task */ #define STARPU_TASK_NO_SUBMITORDER (36 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), similarly to ::STARPU_CALLBACK_ARG, must be followed by a pointer to be given as an argument to the callback function, the argument will not be freed, i.e starpu_task::callback_arg_free will be set to 0 */ #define STARPU_CALLBACK_ARG_NFREE (37 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), similarly to ::STARPU_CALLBACK_WITH_ARG, must be followed by two pointers: one to a callback function, and the other to be given as an argument to the callback function; this is equivalent to using both ::STARPU_CALLBACK and ::STARPU_CALLBACK_ARG_NFREE. */ #define STARPU_CALLBACK_WITH_ARG_NFREE (38 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), similarly to ::STARPU_PROLOGUE_CALLBACK_ARG, must be followed by a pointer to be given as an argument to the prologue callback function, the argument will not be freed, i.e starpu_task::prologue_callback_arg_free will be set to 0 */ #define STARPU_PROLOGUE_CALLBACK_ARG_NFREE (39 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), similarly to ::STARPU_PROLOGUE_CALLBACK_POP_ARG, must be followed by a pointer to be given as an argument to the prologue callback pop function, the argument will not be freed, i.e starpu_task::prologue_callback_pop_arg_free will be set to 0 */ #define STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE (40 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert() and alike, must be followed by a void* specifying the value to be set in starpu_task::sched_data */ #define STARPU_TASK_SCHED_DATA (41 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert() and alike, must be followed by a struct starpu_transaction * specifying the value to be set in the transaction field of the task. */ #define STARPU_TRANSACTION (42 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a char * stored in starpu_task::file. This is automatically set when FXT is enabled. */ #define STARPU_TASK_FILE (43 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by an int stored in starpu_task::line. This is automatically set when FXT is enabled. */ #define STARPU_TASK_LINE (44 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a pointer to a epilogue callback function */ #define STARPU_EPILOGUE_CALLBACK (45 << STARPU_MODE_SHIFT) /** Used when calling starpu_task_insert(), must be followed by a pointer to be given as an argument to the epilogue callback function */ #define STARPU_EPILOGUE_CALLBACK_ARG (46 << STARPU_MODE_SHIFT) /** \ingroup API_Bubble Used when calling starpu_task_insert(), must be followed by a pointer to a bubble decision function ::starpu_bubble_func_t */ #define STARPU_BUBBLE_FUNC (47 << STARPU_MODE_SHIFT) /** \ingroup API_Bubble Used when calling starpu_task_insert(), must be followed by a pointer which will be passed to the function defined in starpu_codelet::bubble_func */ #define STARPU_BUBBLE_FUNC_ARG (48 << STARPU_MODE_SHIFT) /** \ingroup API_Bubble Used when calling starpu_task_insert(), must be followed by a pointer to a bubble DAG generation function ::starpu_bubble_gen_dag_func_t */ #define STARPU_BUBBLE_GEN_DAG_FUNC (49 << STARPU_MODE_SHIFT) /** \ingroup API_Bubble Used when calling starpu_task_insert(), must be followed by a pointer which will be passed to the function defined in starpu_codelet::bubble_gen_dag_func */ #define STARPU_BUBBLE_GEN_DAG_FUNC_ARG (50 << STARPU_MODE_SHIFT) /** \ingroup API_Bubble Used when calling starpu_task_insert(), must be followed by a pointer to a task. The task will be set as the bubble parent task when using the offline tracing tool. */ #define STARPU_BUBBLE_PARENT (51 << STARPU_MODE_SHIFT) /** This has to be the last mode value plus 1 */ #define STARPU_SHIFTED_MODE_MAX (52 << STARPU_MODE_SHIFT) /** Set the given \p task corresponding to \p cl with the following arguments. The argument list must be zero-terminated. The arguments following the codelet are the same as the ones for the function starpu_task_insert(). If some arguments of type ::STARPU_VALUE are given, the parameter starpu_task::cl_arg_free will be set to 1. See \ref OtherTaskUtility for more details. */ int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...); #ifdef STARPU_USE_FXT #define starpu_task_set(task, cl, ...) starpu_task_set(task, cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) #endif /** Create a task corresponding to \p cl with the following arguments. The argument list must be zero-terminated. The arguments following the codelet are the same as the ones for the function starpu_task_insert(). If some arguments of type ::STARPU_VALUE are given, the parameter starpu_task::cl_arg_free will be set to 1. See \ref OtherTaskUtility for more details. */ struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...); #ifdef STARPU_USE_FXT #define starpu_task_build(cl, ...) starpu_task_build(cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) #endif /** Create and submit a task corresponding to \p cl with the following given arguments. The argument list must be zero-terminated. The arguments following the codelet can be of the following types:
    • ::STARPU_R, ::STARPU_W, ::STARPU_RW, ::STARPU_SCRATCH, ::STARPU_REDUX an access mode followed by a data handle;
    • ::STARPU_DATA_ARRAY followed by an array of data handles and its number of elements;
    • ::STARPU_DATA_MODE_ARRAY followed by an array of struct starpu_data_descr, i.e data handles with their associated access modes, and its number of elements;
    • ::STARPU_EXECUTE_ON_WORKER, ::STARPU_WORKER_ORDER followed by an integer value specifying the worker on which to execute the task (as specified by starpu_task::execute_on_a_specific_worker)
    • the specific values ::STARPU_VALUE, ::STARPU_CALLBACK, ::STARPU_CALLBACK_ARG, ::STARPU_CALLBACK_WITH_ARG, ::STARPU_PRIORITY, ::STARPU_TAG, ::STARPU_TAG_ONLY, ::STARPU_FLOPS, ::STARPU_SCHED_CTX, ::STARPU_CL_ARGS, ::STARPU_CL_ARGS_NFREE, ::STARPU_TASK_DEPS_ARRAY, ::STARPU_TASK_COLOR, ::STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, ::STARPU_TASK_SYNCHRONOUS, ::STARPU_TASK_END_DEP followed by the appropriated objects as defined elsewhere.
    When using ::STARPU_DATA_ARRAY, the access mode of the data handles is not defined, it will be taken from the codelet starpu_codelet::modes or starpu_codelet::dyn_modes field. One should use ::STARPU_DATA_MODE_ARRAY to define the data handles along with the access modes. Parameters to be passed to the codelet implementation are defined through the type ::STARPU_VALUE. The function starpu_codelet_unpack_args() must be called within the codelet implementation to retrieve them. See \ref InsertTaskUtility for more details. */ int starpu_task_insert(struct starpu_codelet *cl, ...); #ifdef STARPU_USE_FXT #define starpu_task_insert(cl, ...) starpu_task_insert(cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) #endif /** Identical to starpu_task_insert(). Kept to avoid breaking old codes. */ int starpu_insert_task(struct starpu_codelet *cl, ...); #ifdef STARPU_USE_FXT #define starpu_insert_task(cl, ...) starpu_insert_task(cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) #endif /** Assuming that there are already \p current_buffer data handles passed to the task, and if *allocated_buffers is not 0, the task->dyn_handles array has size \p *allocated_buffers, this function makes room for \p room other data handles, allocating or reallocating task->dyn_handles as necessary and updating \p *allocated_buffers accordingly. One can thus start with *allocated_buffers equal to 0 and current_buffer equal to 0, then make room by calling this function, then store handles with STARPU_TASK_SET_HANDLE(), make room again with this function, store yet more handles, etc. See \ref OtherTaskUtility for more details. */ void starpu_task_insert_data_make_room(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int current_buffer, int room); /** Store data handle \p handle into task \p task with mode \p arg_type, updating \p *allocated_buffers and \p *current_buffer accordingly. See \ref OtherTaskUtility for more details. */ void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int arg_type, starpu_data_handle_t handle); /** Store \p nb_handles data handles \p handles into task \p task, updating \p *allocated_buffers and \p *current_buffer accordingly. See \ref OtherTaskUtility for more details. */ void starpu_task_insert_data_process_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_handles, starpu_data_handle_t *handles); /** Store \p nb_descrs data handles described by \p descrs into task \p task, updating \p *allocated_buffers and \p *current_buffer accordingly. See \ref OtherTaskUtility for more details. */ void starpu_task_insert_data_process_mode_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_descrs, struct starpu_data_descr *descrs); /** Pack arguments of type ::STARPU_VALUE into a buffer which can be given to a codelet and later unpacked with the function starpu_codelet_unpack_args(). Instead of calling starpu_codelet_pack_args(), one can also call starpu_codelet_pack_arg_init(), then starpu_codelet_pack_arg() for each data, then starpu_codelet_pack_arg_fini(). See \ref InsertTaskUtility for more details. */ void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...); /** Structure to be used for starpu_codelet_pack_arg_init() & co, and starpu_codelet_unpack_arg_init() & co. The contents is public, however users should not directly access it, but only use as a parameter to the appropriate functions. */ struct starpu_codelet_pack_arg_data { char *arg_buffer; size_t arg_buffer_size; size_t arg_buffer_used; size_t current_offset; int nargs; }; /** Initialize struct starpu_codelet_pack_arg before calling starpu_codelet_pack_arg() and starpu_codelet_pack_arg_fini(). This will simply initialize the content of the structure. See \ref InsertTaskUtility for more details. */ void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state); /** Pack one argument into struct starpu_codelet_pack_arg \p state. That structure has to be initialized before with starpu_codelet_pack_arg_init(), and after all starpu_codelet_pack_arg() calls performed, starpu_codelet_pack_arg_fini() has to be used to get the \p cl_arg and \p cl_arg_size to be put in the task. See \ref InsertTaskUtility for more details. */ void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, const void *ptr, size_t ptr_size); /** Finish packing data, after calling starpu_codelet_pack_arg_init() once and starpu_codelet_pack_arg() several times. See \ref InsertTaskUtility for more details. */ void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size); /** Retrieve the arguments of type ::STARPU_VALUE associated to a task automatically created using the function starpu_task_insert(). If any parameter's value is 0, unpacking will stop there and ignore the remaining parameters. See \ref InsertTaskUtility for more details. */ void starpu_codelet_unpack_args(void *cl_arg, ...); /** Initialize \p state with \p cl_arg and \p cl_arg_size. This has to be called before calling starpu_codelet_unpack_arg(). See \ref InsertTaskUtility for more details. */ void starpu_codelet_unpack_arg_init(struct starpu_codelet_pack_arg_data *state, void *cl_arg, size_t cl_arg_size); /** Unpack the next argument of size \p size from \p state into \p ptr with a copy. \p state has to be initialized before with starpu_codelet_unpack_arg_init(). See \ref InsertTaskUtility for more details. */ void starpu_codelet_unpack_arg(struct starpu_codelet_pack_arg_data *state, void *ptr, size_t size); /** Unpack the next argument of unknown size from \p state into \p ptr with a copy. \p ptr is allocated before copying in it the value of the argument. The size of the argument is returned in \p size. \p has to be initialized before with starpu_codelet_unpack_arg_init(). See \ref InsertTaskUtility for more details. */ void starpu_codelet_dup_arg(struct starpu_codelet_pack_arg_data *state, void **ptr, size_t *size); /** Unpack the next argument of unknown size from \p state into \p ptr. \p ptr will be a pointer to the memory of the argument. The size of the argument is returned in \p size. \p has to be initialized before with starpu_codelet_unpack_arg_init(). See \ref InsertTaskUtility for more details. */ void starpu_codelet_pick_arg(struct starpu_codelet_pack_arg_data *state, void **ptr, size_t *size); /** Finish unpacking data, after calling starpu_codelet_unpack_arg_init() once and starpu_codelet_unpack_arg() or starpu_codelet_dup_arg() or starpu_codelet_pick_arg() several times. See \ref InsertTaskUtility for more details. */ void starpu_codelet_unpack_arg_fini(struct starpu_codelet_pack_arg_data *state); /** Call this function during unpacking to skip saving the argument in ptr. See \ref InsertTaskUtility for more details. */ void starpu_codelet_unpack_discard_arg(struct starpu_codelet_pack_arg_data *state); /** Similar to starpu_codelet_unpack_args(), but if any parameter is 0, copy the part of \p cl_arg that has not been read in \p buffer which can then be used in a later call to one of the unpack functions. See \ref InsertTaskUtility for more details. */ void starpu_codelet_unpack_args_and_copyleft(void *cl_arg, void *buffer, size_t buffer_size, ...); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_TASK_UTIL_H__ */ starpu-1.4.9+dfsg/include/starpu_thread.h000066400000000000000000000431611507764646700204560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // The documentation for this file is in doc/doxygen/chapters/api/threads.doxy #ifndef __STARPU_THREAD_H__ #define __STARPU_THREAD_H__ #include #include #ifdef STARPU_SIMGRID #include #ifdef STARPU_HAVE_SIMGRID_MUTEX_H #include #include #elif defined(STARPU_HAVE_XBT_SYNCHRO_H) #include #else #include #endif #ifdef STARPU_HAVE_SIMGRID_ACTOR_H #include #endif #ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H #include #endif #ifdef STARPU_HAVE_SIMGRID_MUTEX_H #include #endif #ifdef STARPU_HAVE_SIMGRID_COND_H #include #endif #ifdef STARPU_HAVE_SIMGRID_BARRIER_H #include #endif #ifdef STARPU_HAVE_SIMGRID_HOST_H #include #endif #ifdef STARPU_HAVE_SIMGRID_MSG_H #include #elif defined(STARPU_HAVE_MSG_MSG_H) #include #endif #elif !defined(_MSC_VER) || defined(BUILDING_STARPU) #include #include #endif #include #ifdef __cplusplus extern "C" { #endif /* * Encapsulation of the pthread_create function. */ #ifdef STARPU_SIMGRID #ifdef STARPU_HAVE_SIMGRID_ACTOR_H typedef sg_actor_t starpu_pthread_t; #else typedef msg_process_t starpu_pthread_t; #endif typedef struct { size_t stacksize; } starpu_pthread_attr_t; #ifdef STARPU_HAVE_SIMGRID_ACTOR_H typedef sg_host_t starpu_sg_host_t; #else typedef msg_host_t starpu_sg_host_t; #endif int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2); starpu_pthread_t starpu_pthread_self(void); int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine)(void *), void *arg, starpu_sg_host_t host); int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine)(void *), void *arg); starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[]); int starpu_pthread_join(starpu_pthread_t thread, void **retval); int starpu_pthread_detach(starpu_pthread_t thread); int starpu_pthread_exit(void *retval) STARPU_ATTRIBUTE_NORETURN; int starpu_pthread_attr_init(starpu_pthread_attr_t *attr); int starpu_pthread_attr_destroy(starpu_pthread_attr_t *attr); int starpu_pthread_attr_setdetachstate(starpu_pthread_attr_t *attr, int detachstate); int starpu_pthread_attr_setstacksize(starpu_pthread_attr_t *attr, size_t stacksize); #elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* STARPU_SIMGRID */ typedef pthread_t starpu_pthread_t; typedef pthread_attr_t starpu_pthread_attr_t; #define starpu_pthread_equal pthread_equal #define starpu_pthread_self pthread_self #define starpu_pthread_create pthread_create #define starpu_pthread_create_on(name, thread, attr, routine, arg, where) starpu_pthread_create(thread, attr, routine, arg) #define starpu_pthread_join pthread_join #define starpu_pthread_detach pthread_detach #define starpu_pthread_exit pthread_exit #define starpu_pthread_attr_init pthread_attr_init #define starpu_pthread_attr_destroy pthread_attr_destroy #define starpu_pthread_attr_setdetachstate pthread_attr_setdetachstate #define starpu_pthread_attr_setstacksize pthread_attr_setstacksize #endif /* STARPU_SIMGRID, _MSC_VER */ #ifdef STARPU_HAVE_PTHREAD_SETNAME_NP #ifdef STARPU_HAVE_DARWIN #define starpu_pthread_setname(name) pthread_setname_np(name) #else #define starpu_pthread_setname(name) pthread_setname_np(pthread_self(), name) #endif #else #define starpu_pthread_setname(name) #endif /* * Encapsulation of the pthread_mutex_* functions. */ #ifdef STARPU_SIMGRID #ifdef STARPU_HAVE_SIMGRID_MUTEX_H typedef sg_mutex_t starpu_pthread_mutex_t; #else typedef xbt_mutex_t starpu_pthread_mutex_t; #endif typedef int starpu_pthread_mutexattr_t; #define STARPU_PTHREAD_MUTEX_INITIALIZER NULL int starpu_pthread_mutex_init(starpu_pthread_mutex_t *mutex, const starpu_pthread_mutexattr_t *mutexattr); int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex); int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex); int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex); int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex); int starpu_pthread_mutexattr_gettype(const starpu_pthread_mutexattr_t *attr, int *type); int starpu_pthread_mutexattr_settype(starpu_pthread_mutexattr_t *attr, int type); int starpu_pthread_mutexattr_destroy(starpu_pthread_mutexattr_t *attr); int starpu_pthread_mutexattr_init(starpu_pthread_mutexattr_t *attr); #elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ typedef pthread_mutex_t starpu_pthread_mutex_t; typedef pthread_mutexattr_t starpu_pthread_mutexattr_t; #define starpu_pthread_mutex_init pthread_mutex_init #define starpu_pthread_mutex_destroy pthread_mutex_destroy #define starpu_pthread_mutexattr_gettype pthread_mutexattr_gettype #define starpu_pthread_mutexattr_settype pthread_mutexattr_settype #define starpu_pthread_mutexattr_destroy pthread_mutexattr_destroy #define starpu_pthread_mutexattr_init pthread_mutexattr_init #ifdef STARPU_FXT_LOCK_TRACES int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex); int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex); int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex); #else #define starpu_pthread_mutex_lock pthread_mutex_lock #define starpu_pthread_mutex_unlock pthread_mutex_unlock #define starpu_pthread_mutex_trylock pthread_mutex_trylock #endif #define STARPU_PTHREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER #endif /* STARPU_SIMGRID, _MSC_VER */ #if !defined(_MSC_VER) || defined(BUILDING_STARPU) int starpu_pthread_mutex_lock_sched(starpu_pthread_mutex_t *mutex); int starpu_pthread_mutex_unlock_sched(starpu_pthread_mutex_t *mutex); int starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex); void starpu_pthread_mutex_check_sched(starpu_pthread_mutex_t *mutex, char *file, int line); #endif /* * Encapsulation of the pthread_key_* functions. */ #ifdef STARPU_SIMGRID typedef int starpu_pthread_key_t; int starpu_pthread_key_create(starpu_pthread_key_t *key, void (*destr_function)(void *)); int starpu_pthread_key_delete(starpu_pthread_key_t key); int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer); void *starpu_pthread_getspecific(starpu_pthread_key_t key); #elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ typedef pthread_key_t starpu_pthread_key_t; #define starpu_pthread_key_create pthread_key_create #define starpu_pthread_key_delete pthread_key_delete #define starpu_pthread_setspecific pthread_setspecific #define starpu_pthread_getspecific pthread_getspecific #endif /* STARPU_SIMGRID, _MSC_VER */ /* * Encapsulation of the pthread_cond_* functions. */ #ifdef STARPU_SIMGRID #ifdef STARPU_HAVE_SIMGRID_COND_H typedef sg_cond_t starpu_pthread_cond_t; #else typedef xbt_cond_t starpu_pthread_cond_t; #endif typedef int starpu_pthread_condattr_t; #define STARPU_PTHREAD_COND_INITIALIZER NULL int starpu_pthread_cond_init(starpu_pthread_cond_t *cond, starpu_pthread_condattr_t *cond_attr); int starpu_pthread_cond_signal(starpu_pthread_cond_t *cond); int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond); int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex); int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime); int starpu_pthread_cond_destroy(starpu_pthread_cond_t *cond); #elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ typedef pthread_cond_t starpu_pthread_cond_t; typedef pthread_condattr_t starpu_pthread_condattr_t; #define STARPU_PTHREAD_COND_INITIALIZER PTHREAD_COND_INITIALIZER #define starpu_pthread_cond_init pthread_cond_init #define starpu_pthread_cond_signal pthread_cond_signal #define starpu_pthread_cond_broadcast pthread_cond_broadcast #ifdef STARPU_FXT_LOCK_TRACES int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex); #else #define starpu_pthread_cond_wait pthread_cond_wait #endif #define starpu_pthread_cond_timedwait pthread_cond_timedwait #define starpu_pthread_cond_destroy pthread_cond_destroy #endif /* STARPU_SIMGRID, _MSC_VER */ /* * Encapsulation of the pthread_rwlock_* functions. */ #ifdef STARPU_SIMGRID #ifdef STARPU_HAVE_SIMGRID_MUTEX_H typedef sg_mutex_t starpu_pthread_rwlock_t; #else typedef xbt_mutex_t starpu_pthread_rwlock_t; #endif typedef int starpu_pthread_rwlockattr_t; #define STARPU_PTHREAD_RWLOCK_INITIALIZER NULL int starpu_pthread_rwlock_init(starpu_pthread_rwlock_t *rwlock, const starpu_pthread_rwlockattr_t *attr); int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock); int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock); int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock); int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock); int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock); int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock); #elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ typedef pthread_rwlock_t starpu_pthread_rwlock_t; typedef pthread_rwlockattr_t starpu_pthread_rwlockattr_t; #define STARPU_PTHREAD_RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER #define starpu_pthread_rwlock_init pthread_rwlock_init #define starpu_pthread_rwlock_destroy pthread_rwlock_destroy #ifdef STARPU_FXT_LOCK_TRACES int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock); int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock); int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock); int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock); int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock); #else #define starpu_pthread_rwlock_rdlock pthread_rwlock_rdlock #define starpu_pthread_rwlock_tryrdlock pthread_rwlock_tryrdlock #define starpu_pthread_rwlock_wrlock pthread_rwlock_wrlock #define starpu_pthread_rwlock_trywrlock pthread_rwlock_trywrlock #define starpu_pthread_rwlock_unlock pthread_rwlock_unlock #endif #endif /* STARPU_SIMGRID, _MSC_VER */ /* * Encapsulation of the pthread_barrier_* functions. */ #if defined(STARPU_SIMGRID) || (!defined(STARPU_HAVE_PTHREAD_BARRIER) && (!defined(_MSC_VER) || defined(BUILDING_STARPU))) #if defined(STARPU_SIMGRID) && (defined(STARPU_HAVE_SIMGRID_BARRIER_H) || defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) || defined(xbt_barrier_init)) #ifdef STARPU_HAVE_SIMGRID_BARRIER_H typedef sg_bar_t starpu_pthread_barrier_t; #else typedef xbt_bar_t starpu_pthread_barrier_t; #endif typedef int starpu_pthread_barrierattr_t; #ifdef SG_BARRIER_SERIAL_THREAD #define STARPU_PTHREAD_BARRIER_SERIAL_THREAD SG_BARRIER_SERIAL_THREAD #else #define STARPU_PTHREAD_BARRIER_SERIAL_THREAD -1 #endif #else typedef struct { starpu_pthread_mutex_t mutex; starpu_pthread_cond_t cond; starpu_pthread_cond_t cond_destroy; unsigned count; unsigned done; unsigned busy; } starpu_pthread_barrier_t; typedef int starpu_pthread_barrierattr_t; #define STARPU_PTHREAD_BARRIER_SERIAL_THREAD -1 #endif int starpu_pthread_barrier_init(starpu_pthread_barrier_t *barrier, const starpu_pthread_barrierattr_t *attr, unsigned count); int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier); int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier); #elif !defined(_MSC_VER) /* STARPU_SIMGRID, !STARPU_HAVE_PTHREAD_BARRIER */ typedef pthread_barrier_t starpu_pthread_barrier_t; typedef pthread_barrierattr_t starpu_pthread_barrierattr_t; #define starpu_pthread_barrier_init pthread_barrier_init #define starpu_pthread_barrier_destroy pthread_barrier_destroy #ifdef STARPU_FXT_LOCK_TRACES int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier); #else #define starpu_pthread_barrier_wait pthread_barrier_wait #endif #define STARPU_PTHREAD_BARRIER_SERIAL_THREAD PTHREAD_BARRIER_SERIAL_THREAD #endif /* STARPU_SIMGRID, !STARPU_HAVE_PTHREAD_BARRIER, _MSC_VER */ /* * Encapsulation of the pthread_spin_* functions. */ #if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) typedef struct { #ifdef STARPU_SIMGRID int taken; #elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) unsigned taken STARPU_ATTRIBUTE_ALIGNED(16); #else /* we only have a trivial implementation yet ! */ uint32_t taken STARPU_ATTRIBUTE_ALIGNED(16); #endif } starpu_pthread_spinlock_t; int starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared); int starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock); int starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock); int starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock); int starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock); #elif !defined(_MSC_VER) /* !(defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)) */ typedef pthread_spinlock_t starpu_pthread_spinlock_t; #define starpu_pthread_spin_init pthread_spin_init #define starpu_pthread_spin_destroy pthread_spin_destroy #define starpu_pthread_spin_lock pthread_spin_lock #define starpu_pthread_spin_trylock pthread_spin_trylock #define starpu_pthread_spin_unlock pthread_spin_unlock #endif /* !(defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK)) */ /* * Other needed pthread definitions */ #if defined(_MSC_VER) && !defined(BUILDING_STARPU) typedef void *starpu_pthread_rwlock_t; typedef void *starpu_pthread_mutex_t; typedef void *starpu_pthread_cond_t; typedef void *starpu_pthread_barrier_t; #endif /* _MSC_VER */ /* * Simgrid-specific register/wait synchronization * * Producers create a "queue" object, and when they have produced something, * they call either queue_signal or queue_broadcast in order to wake either one * or all consumers waiting on the queue. * * starpu_pthread_queue_init(&global_queue1->queue); * while (1) { * element = compute(); * push(element, global_queue1); * starpu_pthread_queue_signal(global_queue1); * } * starpu_pthread_queue_destroy(&global_queue1->queue); * * Consumers create a "wait" object, then queue_register on as many queues they * want. In their consumption loop, they wait_reset, then test for availability * on all producers, and if none was available, call wait_wait to actually wait * for producers. On termination, consumers have to queue_unregister before * destroying the "wait" object: * * starpu_pthread_wait_t wait; * * starpu_pthread_wait_init(&wait); * starpu_pthread_queue_register(&wait, &global_queue1->queue); * starpu_pthread_queue_register(&wait, &global_queue2->queue); * * while (1) { * int sleep = 1; * starpu_pthread_wait_reset(&wait); * if (global_queue1->navailable) * { * work(global_queue1); * sleep = 0; * } * if (global_queue2->navailable) * { * work(global_queue2); * sleep = 0; * } * if (sleep) * starpu_pthread_wait_wait(&wait); * } * starpu_pthread_queue_unregister(&wait, &global_queue1->queue); * starpu_pthread_queue_unregister(&wait, &global_queue2->queue); * starpu_pthread_wait_destroy(&wait); */ #ifdef STARPU_SIMGRID typedef struct { starpu_pthread_mutex_t mutex; starpu_pthread_cond_t cond; unsigned block; } starpu_pthread_wait_t; typedef struct { starpu_pthread_mutex_t mutex; starpu_pthread_wait_t **queue; unsigned allocqueue; unsigned nqueue; } starpu_pthread_queue_t; int starpu_pthread_queue_init(starpu_pthread_queue_t *q); int starpu_pthread_queue_signal(starpu_pthread_queue_t *q); int starpu_pthread_queue_broadcast(starpu_pthread_queue_t *q); int starpu_pthread_queue_destroy(starpu_pthread_queue_t *q); int starpu_pthread_wait_init(starpu_pthread_wait_t *w); int starpu_pthread_queue_register(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q); int starpu_pthread_queue_unregister(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q); int starpu_pthread_wait_reset(starpu_pthread_wait_t *w); int starpu_pthread_wait_wait(starpu_pthread_wait_t *w); int starpu_pthread_wait_timedwait(starpu_pthread_wait_t *w, const struct timespec *abstime); int starpu_pthread_wait_destroy(starpu_pthread_wait_t *w); #endif /* * Encapsulation of the semaphore functions. */ #ifdef STARPU_SIMGRID #ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H typedef sg_sem_t starpu_sem_t; #else typedef msg_sem_t starpu_sem_t; #endif int starpu_sem_destroy(starpu_sem_t *sem); int starpu_sem_getvalue(starpu_sem_t *sem, int *retval); int starpu_sem_init(starpu_sem_t *sem, int pshared, unsigned value); int starpu_sem_post(starpu_sem_t *sem); int starpu_sem_trywait(starpu_sem_t *sem); int starpu_sem_wait(starpu_sem_t *sem); #elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ typedef sem_t starpu_sem_t; #define starpu_sem_destroy sem_destroy #define starpu_sem_getvalue sem_getvalue #define starpu_sem_init sem_init #define starpu_sem_post sem_post int starpu_sem_trywait(starpu_sem_t *sem); int starpu_sem_wait(starpu_sem_t *sem); #endif #ifdef __cplusplus } #endif #endif /* __STARPU_THREAD_H__ */ starpu-1.4.9+dfsg/include/starpu_thread_util.h000066400000000000000000000614051507764646700215140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // The documentation for this file is in doc/doxygen/chapters/api/threads.doxy #ifndef __STARPU_THREAD_UTIL_H__ #define __STARPU_THREAD_UTIL_H__ #include #include #include #ifdef __cplusplus extern "C" { #endif #if !(defined(_MSC_VER) && !defined(BUILDING_STARPU)) /* * Encapsulation of the starpu_pthread_create_* functions. */ #define STARPU_PTHREAD_CREATE_ON(name, thread, attr, routine, arg, where) \ do { \ int p_ret = starpu_pthread_create_on((name), (thread), (attr), (routine), (arg), (where)); \ if (STARPU_UNLIKELY(p_ret != 0)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_create_on: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_CREATE(thread, attr, routine, arg) \ do { \ int p_ret = starpu_pthread_create((thread), (attr), (routine), (arg)); \ if (STARPU_UNLIKELY(p_ret != 0)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_create: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_JOIN(thread, retval) \ do { \ int p_ret = starpu_pthread_join((thread), (retval)); \ if (STARPU_UNLIKELY(p_ret != 0)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_join: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) /* * Encapsulation of the starpu_pthread_mutex_* functions. */ #define _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) \ do { \ int p_ret = starpu_pthread_mutex_init((mutex), (attr)); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_mutex_init: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #ifdef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO #define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) \ do { \ if (!attr) \ memset(mutex, 0, sizeof(*mutex)); \ else \ _STARPU_PTHREAD_MUTEX_INIT(mutex, attr); \ } \ while (0) #define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) \ do { \ if (attr) \ _STARPU_PTHREAD_MUTEX_INIT(mutex, attr); \ } \ while (0) #else #define STARPU_PTHREAD_MUTEX_INIT(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) #define STARPU_PTHREAD_MUTEX_INIT0(mutex, attr) _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) #endif #define STARPU_PTHREAD_MUTEX_DESTROY(mutex) \ do { \ int p_ret = starpu_pthread_mutex_destroy(mutex); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_mutex_destroy: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #ifdef STARPU_DEBUG #define _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, file, line) \ starpu_pthread_mutex_check_sched((mutex), file, line) #else #define _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, file, line) #endif #define STARPU_PTHREAD_MUTEX_LOCK(mutex) \ do { \ int p_ret = starpu_pthread_mutex_lock(mutex); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_mutex_lock: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, __FILE__, __LINE__); \ } \ while (0) #define STARPU_PTHREAD_MUTEX_LOCK_SCHED(mutex) \ do { \ int p_ret = starpu_pthread_mutex_lock_sched(mutex); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_mutex_lock_sched: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_MUTEX_TRYLOCK(mutex) \ _starpu_pthread_mutex_trylock(mutex, __FILE__, __LINE__) static STARPU_INLINE int _starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex, char *file, int line) { int p_ret = starpu_pthread_mutex_trylock(mutex); if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) { fprintf(stderr, "%s:%d starpu_pthread_mutex_trylock: %s\n", file, line, strerror(p_ret)); STARPU_ABORT(); } _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, file, line); return p_ret; } #define STARPU_PTHREAD_MUTEX_TRYLOCK_SCHED(mutex) \ _starpu_pthread_mutex_trylock_sched(mutex, __FILE__, __LINE__) static STARPU_INLINE int _starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex, char *file, int line) { int p_ret = starpu_pthread_mutex_trylock_sched(mutex); if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) { fprintf(stderr, "%s:%d starpu_pthread_mutex_trylock_sched: %s\n", file, line, strerror(p_ret)); STARPU_ABORT(); } return p_ret; } #define STARPU_PTHREAD_MUTEX_UNLOCK(mutex) \ do { \ _STARPU_CHECK_NOT_SCHED_MUTEX(mutex, __FILE__, __LINE__); \ int p_ret = starpu_pthread_mutex_unlock(mutex); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_mutex_unlock: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(mutex) \ do { \ int p_ret = starpu_pthread_mutex_unlock_sched(mutex); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_mutex_unlock_sched: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) /* * Encapsulation of the starpu_pthread_key_* functions. */ #define STARPU_PTHREAD_KEY_CREATE(key, destr) \ do { \ int p_ret = starpu_pthread_key_create((key), (destr)); \ if (STARPU_UNLIKELY(p_ret != 0)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_key_create: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ } \ } \ while (0) #define STARPU_PTHREAD_KEY_DELETE(key) \ do { \ int p_ret = starpu_pthread_key_delete((key)); \ if (STARPU_UNLIKELY(p_ret != 0)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_key_delete: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ } \ } \ while (0) #define STARPU_PTHREAD_SETSPECIFIC(key, ptr) \ do { \ int p_ret = starpu_pthread_setspecific((key), (ptr)); \ if (STARPU_UNLIKELY(p_ret != 0)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_setspecific: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ }; \ } \ while (0) #define STARPU_PTHREAD_GETSPECIFIC(key) starpu_pthread_getspecific((key)) /* * Encapsulation of the starpu_pthread_rwlock_* functions. */ #define _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) \ do { \ int p_ret = starpu_pthread_rwlock_init((rwlock), (attr)); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_rwlock_init: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #ifdef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO #define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) \ do { \ if (!attr) \ memset(rwlock, 0, sizeof(*rwlock)); \ else \ _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr); \ } \ while (0) #define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) \ do { \ if (attr) \ _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr); \ } \ while (0) #else #define STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) #define STARPU_PTHREAD_RWLOCK_INIT0(rwlock, attr) _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) #endif #define STARPU_PTHREAD_RWLOCK_RDLOCK(rwlock) \ do { \ int p_ret = starpu_pthread_rwlock_rdlock(rwlock); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_rwlock_rdlock: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_RWLOCK_TRYRDLOCK(rwlock) \ _starpu_pthread_rwlock_tryrdlock(rwlock, __FILE__, __LINE__) static STARPU_INLINE int _starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock, char *file, int line) { int p_ret = starpu_pthread_rwlock_tryrdlock(rwlock); if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) { fprintf(stderr, "%s:%d starpu_pthread_rwlock_tryrdlock: %s\n", file, line, strerror(p_ret)); STARPU_ABORT(); } return p_ret; } #define STARPU_PTHREAD_RWLOCK_WRLOCK(rwlock) \ do { \ int p_ret = starpu_pthread_rwlock_wrlock(rwlock); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_rwlock_wrlock: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_RWLOCK_TRYWRLOCK(rwlock) \ _starpu_pthread_rwlock_trywrlock(rwlock, __FILE__, __LINE__) static STARPU_INLINE int _starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock, char *file, int line) { int p_ret = starpu_pthread_rwlock_trywrlock(rwlock); if (STARPU_UNLIKELY(p_ret != 0 && p_ret != EBUSY)) { fprintf(stderr, "%s:%d starpu_pthread_rwlock_trywrlock: %s\n", file, line, strerror(p_ret)); STARPU_ABORT(); } return p_ret; } #define STARPU_PTHREAD_RWLOCK_UNLOCK(rwlock) \ do { \ int p_ret = starpu_pthread_rwlock_unlock(rwlock); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_rwlock_unlock: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_RWLOCK_DESTROY(rwlock) \ do { \ int p_ret = starpu_pthread_rwlock_destroy(rwlock); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_rwlock_destroy: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) /* * Encapsulation of the starpu_pthread_cond_* functions. */ #define _STARPU_PTHREAD_COND_INIT(cond, attr) \ do { \ int p_ret = starpu_pthread_cond_init((cond), (attr)); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_cond_init: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #ifdef STARPU_PTHREAD_COND_INITIALIZER_ZERO #define STARPU_PTHREAD_COND_INIT(cond, attr) \ do { \ if (!attr) \ memset(cond, 0, sizeof(*cond)); \ else \ _STARPU_PTHREAD_COND_INIT(cond, attr); \ } \ while (0) #define STARPU_PTHREAD_COND_INIT0(cond, attr) \ do { \ if (attr) \ _STARPU_PTHREAD_COND_INIT(cond, attr); \ } \ while (0) #else #define STARPU_PTHREAD_COND_INIT(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr) #define STARPU_PTHREAD_COND_INIT0(cond, attr) _STARPU_PTHREAD_COND_INIT(cond, attr) #endif #define STARPU_PTHREAD_COND_DESTROY(cond) \ do { \ int p_ret = starpu_pthread_cond_destroy(cond); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_cond_destroy: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_COND_SIGNAL(cond) \ do { \ int p_ret = starpu_pthread_cond_signal(cond); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_cond_signal: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_COND_BROADCAST(cond) \ do { \ int p_ret = starpu_pthread_cond_broadcast(cond); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_cond_broadcast: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_COND_WAIT(cond, mutex) \ do { \ int p_ret = starpu_pthread_cond_wait((cond), (mutex)); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_cond_wait: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) /* pthread_cond_timedwait not yet available on windows, but we don't run simgrid there anyway */ #ifdef STARPU_SIMGRID #define STARPU_PTHREAD_COND_TIMEDWAIT(cond, mutex, abstime) \ _starpu_pthread_cond_timedwait(cond, mutex, abstime, __FILE__, __LINE__) static STARPU_INLINE int _starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime, char *file, int line) { int p_ret = starpu_pthread_cond_timedwait(cond, mutex, abstime); if (STARPU_UNLIKELY(p_ret != 0 && p_ret != ETIMEDOUT)) { fprintf(stderr, "%s:%d starpu_pthread_cond_timedwait: %s\n", file, line, strerror(p_ret)); STARPU_ABORT(); } return p_ret; } #endif /* * Encapsulation of the starpu_pthread_barrier_* functions. */ #define STARPU_PTHREAD_BARRIER_INIT(barrier, attr, count) \ do { \ int p_ret = starpu_pthread_barrier_init((barrier), (attr), (count)); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_barrier_init: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_BARRIER_DESTROY(barrier) \ do { \ int p_ret = starpu_pthread_barrier_destroy((barrier)); \ if (STARPU_UNLIKELY(p_ret)) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_barrier_destroy: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #define STARPU_PTHREAD_BARRIER_WAIT(barrier) \ do { \ int p_ret = starpu_pthread_barrier_wait((barrier)); \ if (STARPU_UNLIKELY(!((p_ret == 0) || (p_ret == STARPU_PTHREAD_BARRIER_SERIAL_THREAD)))) \ { \ fprintf(stderr, \ "%s:%d starpu_pthread_barrier_wait: %s\n", \ __FILE__, __LINE__, strerror(p_ret)); \ STARPU_ABORT(); \ } \ } \ while (0) #endif /* _MSC_VER */ #ifdef __cplusplus } #endif #endif /* __STARPU_THREAD_UTIL_H__ */ starpu-1.4.9+dfsg/include/starpu_tree.h000066400000000000000000000030661507764646700201460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_TREE_H__ #define __STARPU_TREE_H__ #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Tree Tree @brief API tree facilities @{ */ /** todo */ struct starpu_tree { struct starpu_tree *nodes; struct starpu_tree *father; int arity; int id; int level; int is_pu; }; void starpu_tree_reset_visited(struct starpu_tree *tree, char *visited); void starpu_tree_prepare_children(unsigned arity, struct starpu_tree *father); void starpu_tree_insert(struct starpu_tree *tree, int id, int level, int is_pu, int arity, struct starpu_tree *father); struct starpu_tree *starpu_tree_get(struct starpu_tree *tree, int id); struct starpu_tree *starpu_tree_get_neighbour(struct starpu_tree *tree, struct starpu_tree *node, char *visited, char *present); void starpu_tree_free(struct starpu_tree *tree); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_TREE_H__ */ starpu-1.4.9+dfsg/include/starpu_util.h000066400000000000000000001064741507764646700201730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_UTIL_H__ #define __STARPU_UTIL_H__ #include #include #include #include #include #include #ifdef __GLIBC__ #include #endif #ifdef STARPU_SIMGRID_MC #include #endif #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Toolbox Toolbox @brief The following macros allow to make GCC extensions portable, and to have a code which can be compiled with any C compiler. @{ */ /** Return true (non-zero) if GCC version \p maj.\p min or later is being used (macro taken from glibc.) */ #if defined __GNUC__ && defined __GNUC_MINOR__ #define STARPU_GNUC_PREREQ(maj, min) \ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) #else #define STARPU_GNUC_PREREQ(maj, min) 0 #endif /** When building with a GNU C Compiler, allow programmers to mark an expression as unlikely. */ #ifdef __GNUC__ #define STARPU_UNLIKELY(expr) (__builtin_expect(!!(expr), 0)) #else #define STARPU_UNLIKELY(expr) (expr) #endif /** When building with a GNU C Compiler, allow programmers to mark an expression as likely. */ #ifdef __GNUC__ #define STARPU_LIKELY(expr) (__builtin_expect(!!(expr), 1)) #else #define STARPU_LIKELY(expr) (expr) #endif /** When building with a GNU C Compiler, defined to __attribute__((unused)) */ #ifdef __GNUC__ #define STARPU_ATTRIBUTE_UNUSED __attribute__((unused)) #else #define STARPU_ATTRIBUTE_UNUSED #endif /** When building with a GNU C Compiler, defined to __attribute__((noreturn)) */ #ifdef __GNUC__ #define STARPU_ATTRIBUTE_NORETURN __attribute__((noreturn)) #else #define STARPU_ATTRIBUTE_NORETURN #endif /** When building with a GNU C Compiler, defined to __attribute__((visibility ("default"))) */ #ifdef __GNUC__ #define STARPU_ATTRIBUTE_VISIBILITY_DEFAULT __attribute__((visibility("default"))) #else #define STARPU_ATTRIBUTE_VISIBILITY_DEFAULT #endif /** When building with a GNU C Compiler, defined to \#pragma GCC visibility push(hidden) */ #ifdef __GNUC__ #define STARPU_VISIBILITY_PUSH_HIDDEN #pragma GCC visibility push(hidden) #else #define STARPU_VISIBILITY_PUSH_HIDDEN #endif /** When building with a GNU C Compiler, defined to \#pragma GCC visibility pop */ #ifdef __GNUC__ #define STARPU_VISIBILITY_POP #pragma GCC visibility pop #else #define STARPU_VISIBILITY_POP #endif /** When building with a GNU C Compiler, defined to __attribute__((malloc)) */ #ifdef __GNUC__ #define STARPU_ATTRIBUTE_MALLOC __attribute__((malloc)) #else #define STARPU_ATTRIBUTE_MALLOC #endif /** When building with a GNU C Compiler, defined to __attribute__((warn_unused_result)) */ #ifdef __GNUC__ #define STARPU_ATTRIBUTE_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) #else #define STARPU_ATTRIBUTE_WARN_UNUSED_RESULT #endif /** When building with a GNU C Compiler, defined to __attribute__((pure)) */ #ifdef __GNUC__ #define STARPU_ATTRIBUTE_PURE __attribute__((pure)) #else #define STARPU_ATTRIBUTE_PURE #endif /** When building with a GNU C Compiler, defined to__attribute__((aligned(size))) */ #ifdef __GNUC__ #define STARPU_ATTRIBUTE_ALIGNED(size) __attribute__((aligned(size))) #else #define STARPU_ATTRIBUTE_ALIGNED(size) #endif #ifdef __GNUC__ #define STARPU_ATTRIBUTE_FORMAT(type, string, first) __attribute__((format(type, string, first))) #else #define STARPU_ATTRIBUTE_FORMAT(type, string, first) #endif /* Note that if we're compiling C++, then just use the "inline" keyword, since it's part of C++ */ #if defined(c_plusplus) || defined(__cplusplus) #define STARPU_INLINE inline #elif defined(_MSC_VER) || defined(__HP_cc) #define STARPU_INLINE __inline #else #define STARPU_INLINE __inline__ #endif #if STARPU_GNUC_PREREQ(4, 3) #define STARPU_ATTRIBUTE_CALLOC_SIZE(num, size) __attribute__((alloc_size(num, size))) #define STARPU_ATTRIBUTE_ALLOC_SIZE(size) __attribute__((alloc_size(size))) #else #define STARPU_ATTRIBUTE_CALLOC_SIZE(num, size) #define STARPU_ATTRIBUTE_ALLOC_SIZE(size) #endif #if STARPU_GNUC_PREREQ(3, 1) && !defined(BUILDING_STARPU) && !defined(STARPU_USE_DEPRECATED_API) && !defined(STARPU_USE_DEPRECATED_ONE_ZERO_API) #define STARPU_DEPRECATED __attribute__((__deprecated__)) #else #define STARPU_DEPRECATED #endif /* __GNUC__ */ #if STARPU_GNUC_PREREQ(3, 3) #define STARPU_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) #else #define STARPU_WARN_UNUSED_RESULT #endif /* __GNUC__ */ #define STARPU_BACKTRACE_LENGTH 32 #ifdef __GLIBC__ #define STARPU_DUMP_BACKTRACE() \ do { \ void *__ptrs[STARPU_BACKTRACE_LENGTH]; \ int __n = backtrace(__ptrs, STARPU_BACKTRACE_LENGTH); \ backtrace_symbols_fd(__ptrs, __n, 2); \ } \ while (0) #else #define STARPU_DUMP_BACKTRACE() \ do { \ } \ while (0) #endif #ifdef STARPU_SIMGRID_MC #define STARPU_SIMGRID_ASSERT(x) MC_assert(!!(x)) #else #define STARPU_SIMGRID_ASSERT(x) #endif /** Unless StarPU has been configured with the option \ref enable-fast "--enable-fast", this macro will abort if the expression \p x is false. */ #ifdef STARPU_NO_ASSERT #define STARPU_ASSERT(x) \ do { \ if (0) { (void)(x); } \ } \ while (0) #else #if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS) #define STARPU_ASSERT(x) \ do { \ if (STARPU_UNLIKELY(!(x))) \ { \ STARPU_DUMP_BACKTRACE(); \ STARPU_SIMGRID_ASSERT(0 && #x); \ *(int *)NULL = 0; \ } \ } \ while (0) #else #define STARPU_ASSERT(x) \ do { \ if (STARPU_UNLIKELY(!(x))) \ { \ STARPU_DUMP_BACKTRACE(); \ STARPU_SIMGRID_ASSERT(0 && #x); \ assert(0 && #x); \ } \ } \ while (0) #endif #endif /** Unless StarPU has been configured with the option \ref enable-fast "--enable-fast", this macro will abort if the pointer \p x is not pointing to valid memory. */ #ifdef STARPU_NO_ASSERT #define STARPU_ASSERT_ACCESSIBLE(x) \ do { \ if (0) { (void)(x); } \ } \ while (0) #else #define STARPU_ASSERT_ACCESSIBLE(ptr) \ do { \ volatile char __c STARPU_ATTRIBUTE_UNUSED = *(char *)(ptr); \ } \ while (0) #endif /** This macro will abort compilation if the expression \p x is false. */ #if STARPU_GNUC_PREREQ(4, 6) && !defined __cplusplus && !defined(__STRICT_ANSI__) #define STARPU_STATIC_ASSERT(x) _Static_assert(x, #x) #else #define STARPU_STATIC_ASSERT(x) STARPU_ASSERT(x) #endif /** This macro will abort if the expression \p x is false. The string \p msg will be displayed. */ #if defined(__INTEL_COMPILER) #pragma warning disable 279 // otherwise icc triggers "warning #279: controlling expression is constant" (probably because of assert(0 && #x)) #endif #if defined(__CUDACC__) || defined(STARPU_HAVE_WINDOWS) #define STARPU_ASSERT_MSG_ALWAYS(x, msg, ...) \ do { \ if (STARPU_UNLIKELY(!(x))) \ { \ STARPU_DUMP_BACKTRACE(); \ fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ##__VA_ARGS__); \ STARPU_SIMGRID_ASSERT(0 && #x); \ *(int *)NULL = 0; \ } \ } \ while (0) #else #define STARPU_ASSERT_MSG_ALWAYS(x, msg, ...) \ do { \ if (STARPU_UNLIKELY(!(x))) \ { \ STARPU_DUMP_BACKTRACE(); \ fprintf(stderr, "\n[starpu][%s][assert failure] " msg "\n\n", __starpu_func__, ##__VA_ARGS__); \ STARPU_SIMGRID_ASSERT(0 && #x); \ assert(0 && #x); \ abort(); \ *(int *)NULL = 0; \ } \ } \ while (0) #endif /** Unless StarPU has been configured with the option \ref enable-fast "--enable-fast", this macro will abort if the expression \p x is false. The string \p msg will be displayed. */ #ifdef STARPU_NO_ASSERT #define STARPU_ASSERT_MSG(x, msg, ...) \ do { \ if (0) \ { \ (void)(x); \ (void)msg; \ } \ } \ while (0) #else #define STARPU_ASSERT_MSG(x, msg, ...) \ STARPU_ASSERT_MSG_ALWAYS(x, msg, ##__VA_ARGS__) #endif #ifdef __APPLE_CC__ #ifdef __clang_analyzer__ #define _starpu_abort() exit(42) #else #define _starpu_abort() *(volatile int *)NULL = 0 #endif #else #define _starpu_abort() abort() #endif /** Abort the program. */ #define STARPU_ABORT() \ do { \ STARPU_DUMP_BACKTRACE(); \ fprintf(stderr, "[starpu][abort][%s()@%s:%d]\n", __starpu_func__, __FILE__, __LINE__); \ _starpu_abort(); \ } \ while (0) /** Print the string '[starpu][abort][name of the calling function:name of the file:line in the file]' followed by the given string \p msg and abort the program */ #define STARPU_ABORT_MSG(msg, ...) \ do { \ STARPU_DUMP_BACKTRACE(); \ fprintf(stderr, "[starpu][abort][%s()@%s:%d] " msg "\n", __starpu_func__, __FILE__, __LINE__, ##__VA_ARGS__); \ _starpu_abort(); \ } \ while (0) #if defined(_MSC_VER) #undef STARPU_HAVE_STRERROR_R #endif #if defined(STARPU_HAVE_STRERROR_R) #if (!defined(__GLIBC__) || !__GLIBC__) || ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && (!defined(_GNU_SOURCE))) /* XSI-compliant version of strerror_r returns an int */ #define starpu_strerror_r(errnum, buf, buflen) \ do \ { \ int _ret = strerror_r((errnum), (buf), (buflen)); \ STARPU_ASSERT(_ret == 0); \ } \ while (0) #else /* GNU-specific version of strerror_r returns a char * */ #define starpu_strerror_r(errnum, buf, buflen) \ do \ { \ char *const _user_buf = (buf); \ const size_t _user_buflen = (buflen); \ /* the GNU-specific behaviour when 'buf' == NULL cannot be emulated with the XSI-compliant version */ \ STARPU_ASSERT((buf) != NULL); \ char *_tmp_buf = strerror_r((errnum), _user_buf, _user_buflen); \ if (_tmp_buf != _user_buf) \ { \ if (_user_buflen > 0) \ { \ strncpy(_user_buf, _tmp_buf, _user_buflen - 1); \ _user_buf[_user_buflen - 1] = '\0'; \ } \ } \ } \ while (0) #endif /* strerror_r ABI version */ #endif /* STARPU_HAVE_STRERROR_R */ /** Abort the program (after displaying \p message) if \p err has a value which is not 0. */ #if defined(STARPU_HAVE_STRERROR_R) #define STARPU_CHECK_RETURN_VALUE(err, message, ...) \ { \ if (STARPU_UNLIKELY(err != 0)) \ { \ char xmessage[256]; \ starpu_strerror_r(-err, xmessage, 256); \ fprintf(stderr, "[starpu] Unexpected value: <%d:%s> returned for " message "\n", err, xmessage, ##__VA_ARGS__); \ STARPU_ABORT(); \ } \ } #else #define STARPU_CHECK_RETURN_VALUE(err, message, ...) \ { \ if (STARPU_UNLIKELY(err != 0)) \ { \ fprintf(stderr, "[starpu] Unexpected value: <%d> returned for " message "\n", err, ##__VA_ARGS__); \ STARPU_ABORT(); \ } \ } #endif /** Abort the program (after displaying \p message) if \p err is different from \p value. */ #if defined(STARPU_HAVE_STRERROR_R) #define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) \ { \ if (STARPU_UNLIKELY(err != value)) \ { \ char xmessage[256]; \ starpu_strerror_r(-err, xmessage, 256); \ fprintf(stderr, "[starpu] Unexpected value: <%d!=%d:%s> returned for " message "\n", err, value, xmessage, ##__VA_ARGS__); \ STARPU_ABORT(); \ } \ } #else #define STARPU_CHECK_RETURN_VALUE_IS(err, value, message, ...) \ { \ if (STARPU_UNLIKELY(err != value)) \ { \ fprintf(stderr, "[starpu] Unexpected value: <%d != %d> returned for " message "\n", err, value, ##__VA_ARGS__); \ STARPU_ABORT(); \ } \ } #endif /* Note: do not use _starpu_cmpxchg / _starpu_xchg / _starpu_cmpxchgl / * _starpu_xchgl / _starpu_cmpxchg64 / _starpu_xchg64, which only * assembly-hand-written fallbacks used when building with an old gcc. * Rather use STARPU_VAL_COMPARE_AND_SWAP and STARPU_VAL_EXCHANGE available on * all platforms with a recent-enough gcc */ #if defined(__i386__) || defined(__x86_64__) static __starpu_inline unsigned _starpu_cmpxchg(unsigned *ptr, unsigned old, unsigned next) { __asm__ __volatile__("lock cmpxchgl %2,%1" : "+a"(old), "+m"(*ptr) : "q"(next) : "memory"); return old; } #define STARPU_HAVE_CMPXCHG static __starpu_inline unsigned _starpu_xchg(unsigned *ptr, unsigned next) { /* Note: xchg is always locked already */ __asm__ __volatile__("xchgl %1,%0" : "+m"(*ptr), "+q"(next) : : "memory"); return next; } #define STARPU_HAVE_XCHG static __starpu_inline uint32_t _starpu_cmpxchg32(uint32_t *ptr, uint32_t old, uint32_t next) { __asm__ __volatile__("lock cmpxchgl %2,%1" : "+a"(old), "+m"(*ptr) : "q"(next) : "memory"); return old; } #define STARPU_HAVE_CMPXCHG32 static __starpu_inline uint32_t _starpu_xchg32(uint32_t *ptr, uint32_t next) { /* Note: xchg is always locked already */ __asm__ __volatile__("xchgl %1,%0" : "+m"(*ptr), "+q"(next) : : "memory"); return next; } #define STARPU_HAVE_XCHG32 #if defined(__i386__) static __starpu_inline unsigned long _starpu_cmpxchgl(unsigned long *ptr, unsigned long old, unsigned long next) { __asm__ __volatile__("lock cmpxchgl %2,%1" : "+a"(old), "+m"(*ptr) : "q"(next) : "memory"); return old; } #define STARPU_HAVE_CMPXCHGL static __starpu_inline unsigned long _starpu_xchgl(unsigned long *ptr, unsigned long next) { /* Note: xchg is always locked already */ __asm__ __volatile__("xchgl %1,%0" : "+m"(*ptr), "+q"(next) : : "memory"); return next; } #define STARPU_HAVE_XCHGL #endif #if defined(__x86_64__) static __starpu_inline unsigned long _starpu_cmpxchgl(unsigned long *ptr, unsigned long old, unsigned long next) { __asm__ __volatile__("lock cmpxchgq %2,%1" : "+a"(old), "+m"(*ptr) : "q"(next) : "memory"); return old; } #define STARPU_HAVE_CMPXCHGL static __starpu_inline unsigned long _starpu_xchgl(unsigned long *ptr, unsigned long next) { /* Note: xchg is always locked already */ __asm__ __volatile__("xchgq %1,%0" : "+m"(*ptr), "+q"(next) : : "memory"); return next; } #define STARPU_HAVE_XCHGL #endif #if defined(__i386__) static __starpu_inline uint64_t _starpu_cmpxchg64(uint64_t *ptr, uint64_t old, uint64_t next) { uint32_t next_hi = next >> 32; uint32_t next_lo = next & 0xfffffffful; __asm__ __volatile__("lock cmpxchg8b %1" : "+A"(old), "+m"(*ptr) : "c"(next_hi), "b"(next_lo) : "memory"); return old; } #define STARPU_HAVE_CMPXCHG64 #endif #if defined(__x86_64__) static __starpu_inline uint64_t _starpu_cmpxchg64(uint64_t *ptr, uint64_t old, uint64_t next) { __asm__ __volatile__("lock cmpxchgq %2,%1" : "+a"(old), "+m"(*ptr) : "q"(next) : "memory"); return old; } #define STARPU_HAVE_CMPXCHG64 static __starpu_inline uint64_t _starpu_xchg64(uint64_t *ptr, uint64_t next) { /* Note: xchg is always locked already */ __asm__ __volatile__("xchgq %1,%0" : "+m"(*ptr), "+q"(next) : : "memory"); return next; } #define STARPU_HAVE_XCHG64 #endif #endif #define STARPU_ATOMIC_SOMETHING(name, expr) \ static __starpu_inline unsigned starpu_atomic_##name(unsigned *ptr, unsigned value) \ { \ unsigned old, next; \ while (1) \ { \ old = *ptr; \ next = expr; \ if (_starpu_cmpxchg(ptr, old, next) == old) \ break; \ }; \ return expr; \ } #define STARPU_ATOMIC_SOMETHINGL(name, expr) \ static __starpu_inline unsigned long starpu_atomic_##name##l(unsigned long *ptr, unsigned long value) \ { \ unsigned long old, next; \ while (1) \ { \ old = *ptr; \ next = expr; \ if (_starpu_cmpxchgl(ptr, old, next) == old) \ break; \ }; \ return expr; \ } #define STARPU_ATOMIC_SOMETHING64(name, expr) \ static __starpu_inline uint64_t starpu_atomic_##name##64(uint64_t * ptr, uint64_t value) \ { \ uint64_t old, next; \ while (1) \ { \ old = *ptr; \ next = expr; \ if (_starpu_cmpxchg64(ptr, old, next) == old) \ break; \ }; \ return expr; \ } /* Atomic addition, returns the new value */ #if defined(STARPU_HAVE_SYNC_FETCH_AND_ADD) #define STARPU_ATOMIC_ADD(ptr, value) (__sync_fetch_and_add((ptr), (value)) + (value)) #define STARPU_ATOMIC_ADDL(ptr, value) (__sync_fetch_and_add((ptr), (value)) + (value)) #elif defined(STARPU_HAVE_ATOMIC_FETCH_ADD) #define STARPU_ATOMIC_ADD(ptr, value) (__atomic_fetch_add((ptr), (value), __ATOMIC_SEQ_CST) + (value)) #define STARPU_ATOMIC_ADDL(ptr, value) (__atomic_fetch_add((ptr), (value), __ATOMIC_SEQ_CST) + (value)) #else #if defined(STARPU_HAVE_CMPXCHG) STARPU_ATOMIC_SOMETHING(add, old + value) #define STARPU_ATOMIC_ADD(ptr, value) starpu_atomic_add(ptr, value) #endif #if defined(STARPU_HAVE_CMPXCHGL) STARPU_ATOMIC_SOMETHINGL(add, old + value) #define STARPU_ATOMIC_ADDL(ptr, value) starpu_atomic_addl(ptr, value) #endif #endif #if defined(STARPU_HAVE_SYNC_FETCH_AND_ADD_8) #define STARPU_ATOMIC_ADD64(ptr, value) (__sync_fetch_and_add((ptr), (value)) + (value)) #elif defined(STARPU_HAVE_ATOMIC_FETCH_ADD_8) #define STARPU_ATOMIC_ADD64(ptr, value) (__atomic_fetch_add((ptr), (value), __ATOMIC_SEQ_CST) + (value)) #else #if defined(STARPU_HAVE_CMPXCHG64) STARPU_ATOMIC_SOMETHING64(add, old + value) #define STARPU_ATOMIC_ADD64(ptr, value) starpu_atomic_add64(ptr, value) #endif #endif /* Atomic OR, returns the *old* value */ #if defined(STARPU_HAVE_SYNC_FETCH_AND_OR) #define STARPU_ATOMIC_OR(ptr, value) (__sync_fetch_and_or((ptr), (value))) #define STARPU_ATOMIC_ORL(ptr, value) (__sync_fetch_and_or((ptr), (value))) #elif defined(STARPU_HAVE_ATOMIC_FETCH_OR) #define STARPU_ATOMIC_OR(ptr, value) (__atomic_fetch_or((ptr), (value), __ATOMIC_SEQ_CST)) #define STARPU_ATOMIC_ORL(ptr, value) (__atomic_fetch_or((ptr), (value), __ATOMIC_SEQ_CST)) #else #if defined(STARPU_HAVE_CMPXCHG) STARPU_ATOMIC_SOMETHING(or, old | value) #define STARPU_ATOMIC_OR(ptr, value) starpu_atomic_or(ptr, value) #endif #if defined(STARPU_HAVE_CMPXCHGL) STARPU_ATOMIC_SOMETHINGL(or, old | value) #define STARPU_ATOMIC_ORL(ptr, value) starpu_atomic_orl(ptr, value) #endif #endif #if defined(STARPU_HAVE_SYNC_FETCH_AND_OR_8) #define STARPU_ATOMIC_OR64(ptr, value) (__sync_fetch_and_or((ptr), (value))) #elif defined(STARPU_HAVE_ATOMIC_FETCH_OR_8) #define STARPU_ATOMIC_OR64(ptr, value) (__atomic_fetch_or((ptr), (value), __ATOMIC_SEQ_CST)) #else #if defined(STARPU_HAVE_CMPXCHG64) STARPU_ATOMIC_SOMETHING64(or, old | value) #define STARPU_ATOMIC_OR64(ptr, value) starpu_atomic_or64(ptr, value) #endif #endif /* Try to replace `old' with `value' at `ptr'. Returns true iff the swap was successful. */ #ifdef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP #define STARPU_BOOL_COMPARE_AND_SWAP(ptr, old, value) (__sync_bool_compare_and_swap((ptr), (old), (value))) #else #ifdef STARPU_HAVE_CMPXCHG #define STARPU_BOOL_COMPARE_AND_SWAP(ptr, old, value) (_starpu_cmpxchg((ptr), (old), (value)) == (old)) #endif #endif #ifdef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP #define STARPU_BOOL_COMPARE_AND_SWAP32(ptr, old, value) (__sync_bool_compare_and_swap((ptr), (old), (value))) #else #ifdef STARPU_HAVE_CMPXCHG32 #define STARPU_BOOL_COMPARE_AND_SWAP32(ptr, old, value) (_starpu_cmpxchg32((ptr), (old), (value)) == (old)) #endif #endif #if defined(STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8) #define STARPU_BOOL_COMPARE_AND_SWAP64(ptr, old, value) (__sync_bool_compare_and_swap((ptr), (old), (value))) #elif defined(STARPU_HAVE_ATOMIC_EXCHANGE_N_8) && defined(__GNUC__) static __starpu_inline int starpu_bool_compare_and_swap64(uint64_t *ptr, uint64_t old, uint64_t value) { uint64_t expected = old; return __atomic_compare_exchange_n(ptr, &expected, value, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } #define STARPU_BOOL_COMPARE_AND_SWAP64(ptr, old, value) starpu_bool_compare_and_swap64((ptr), (old), (value)) #else #ifdef STARPU_HAVE_CMPXCHG64 #define STARPU_BOOL_COMPARE_AND_SWAP64(ptr, old, value) (_starpu_cmpxchg64((ptr), (old), (value)) == (old)) #endif #endif #if UINTPTR_MAX == UINT64_MAX #define STARPU_BOOL_COMPARE_AND_SWAP_PTR(ptr, old, value) STARPU_BOOL_COMPARE_AND_SWAP64((uint64_t*) (ptr), (uint64_t) (old), (uint64_t) (value)) #else #define STARPU_BOOL_COMPARE_AND_SWAP_PTR(ptr, old, value) STARPU_BOOL_COMPARE_AND_SWAP32(ptr, old, value) #endif /* Try to replace `old' with `value' at `ptr'. Returns the value actually seen at `ptr'. */ #ifdef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP #define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (__sync_val_compare_and_swap((ptr), (old), (value))) #else #ifdef STARPU_HAVE_CMPXCHG #define STARPU_VAL_COMPARE_AND_SWAP(ptr, old, value) (_starpu_cmpxchg((ptr), (old), (value))) #endif #endif #ifdef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP #define STARPU_VAL_COMPARE_AND_SWAP32(ptr, old, value) (__sync_val_compare_and_swap((ptr), (old), (value))) #else #ifdef STARPU_HAVE_CMPXCHG32 #define STARPU_VAL_COMPARE_AND_SWAP32(ptr, old, value) (_starpu_cmpxchg32((ptr), (old), (value))) #endif #endif #if defined(STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8) #define STARPU_VAL_COMPARE_AND_SWAP64(ptr, old, value) (__sync_val_compare_and_swap((ptr), (old), (value))) #elif defined(STARPU_HAVE_ATOMIC_EXCHANGE_N_8) && defined(__GNUC__) static __starpu_inline uint64_t starpu_val_compare_and_swap64(uint64_t *ptr, uint64_t old, uint64_t value) { uint64_t expected = old; if (__atomic_compare_exchange_n(ptr, &expected, value, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) return old; else return expected; } #define STARPU_VAL_COMPARE_AND_SWAP64(ptr, old, value) starpu_val_compare_and_swap64((ptr), (old), (value)) #else #ifdef STARPU_HAVE_CMPXCHG64 #define STARPU_VAL_COMPARE_AND_SWAP64(ptr, old, value) (_starpu_cmpxchg64((ptr), (old), (value))) #endif #endif #if UINTPTR_MAX == UINT64_MAX #define STARPU_VAL_COMPARE_AND_SWAP_PTR(ptr, old, value) ((void*)STARPU_VAL_COMPARE_AND_SWAP64((uint64_t*) (ptr), (uint64_t) (old), (uint64_t) (value))) #else #define STARPU_VAL_COMPARE_AND_SWAP_PTR(ptr, old, value) STARPU_VAL_COMPARE_AND_SWAP32(ptr, old, value) #endif #ifdef STARPU_HAVE_ATOMIC_EXCHANGE_N_8 #define STARPU_VAL_EXCHANGE64(ptr, value) STARPU_VAL_EXCHANGE((ptr)(value)) #else #ifdef STARPU_HAVE_XCHG64 #define STARPU_VAL_EXCHANGE64(ptr, value) (_starpu_xchg64((ptr), (value))) #endif #endif #ifdef STARPU_HAVE_ATOMIC_EXCHANGE_N #define STARPU_VAL_EXCHANGE(ptr, value) (__atomic_exchange_n((ptr), (value), __ATOMIC_SEQ_CST)) #define STARPU_VAL_EXCHANGEL(ptr, value) STARPU_VAL_EXCHANGE((ptr)(value)) #define STARPU_VAL_EXCHANGE32(ptr, value) STARPU_VAL_EXCHANGE((ptr)(value)) #else #ifdef STARPU_HAVE_XCHG #define STARPU_VAL_EXCHANGE(ptr, value) (_starpu_xchg((ptr), (value))) #endif #ifdef STARPU_HAVE_XCHGL #define STARPU_VAL_EXCHANGEL(ptr, value) (_starpu_xchgl((ptr), (value))) #endif #ifdef STARPU_HAVE_XCHG32 #define STARPU_VAL_EXCHANGE32(ptr, value) (_starpu_xchg32((ptr), (value))) #endif #endif /* Returns the previous value */ #ifdef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET #define STARPU_TEST_AND_SET(ptr, value) (__sync_lock_test_and_set((ptr), (value))) #define STARPU_RELEASE(ptr) (__sync_lock_release((ptr))) #elif defined(STARPU_HAVE_XCHG) #define STARPU_TEST_AND_SET(ptr, value) (_starpu_xchg((ptr), (value))) #define STARPU_RELEASE(ptr) (_starpu_xchg((ptr), 0)) #endif #ifdef STARPU_HAVE_SYNC_SYNCHRONIZE #define STARPU_SYNCHRONIZE() __sync_synchronize() #elif defined(__i386__) #define STARPU_SYNCHRONIZE() __asm__ __volatile__("lock; addl $0,0(%%esp)" :: \ : "memory") #elif defined(__KNC__) || defined(__KNF__) #define STARPU_SYNCHRONIZE() __asm__ __volatile__("lock; addl $0,0(%%rsp)" :: \ : "memory") #elif defined(__x86_64__) #define STARPU_SYNCHRONIZE() __asm__ __volatile__("mfence" :: \ : "memory") #elif defined(__ppc__) || defined(__ppc64__) #define STARPU_SYNCHRONIZE() __asm__ __volatile__("sync" :: \ : "memory") #endif /** This macro can be used to do a synchronization. */ #if defined(__x86_64__) #define STARPU_RMB() __asm__ __volatile__("lfence" :: \ : "memory") #elif defined(__aarch64__) #define STARPU_RMB() __asm__ __volatile__("dsb ld" :: \ : "memory") #else #define STARPU_RMB() STARPU_SYNCHRONIZE() #endif /** This macro can be used to do a synchronization. */ #if defined(__x86_64__) #define STARPU_WMB() __asm__ __volatile__("sfence" :: \ : "memory") #elif defined(__aarch64__) #define STARPU_WMB() __asm__ __volatile__("dsb st" :: \ : "memory") #else #define STARPU_WMB() STARPU_SYNCHRONIZE() #endif #if defined(__i386__) || defined(__x86_64__) #define STARPU_CACHELINE_SIZE 64 #elif defined(__ppc__) || defined(__ppc64__) || defined(__ia64__) #define STARPU_CACHELINE_SIZE 128 #elif defined(__s390__) || defined(__s390x__) #define STARPU_CACHELINE_SIZE 256 #else /* Conservative default */ #define STARPU_CACHELINE_SIZE 1024 #endif #ifdef _WIN32 /* Try to fetch the system definition of timespec */ #include #include #ifdef HAVE_UNISTD_H #include #endif #include #if !defined(_MSC_VER) || defined(BUILDING_STARPU) #include #endif #if !defined(STARPU_HAVE_STRUCT_TIMESPEC) || (defined(_MSC_VER) && _MSC_VER < 1900) /* If it didn't get defined in the standard places, then define it ourself */ #ifndef STARPU_TIMESPEC_DEFINED #define STARPU_TIMESPEC_DEFINED 1 struct timespec { time_t tv_sec; /* Seconds */ long tv_nsec; /* Nanoseconds */ }; #endif /* STARPU_TIMESPEC_DEFINED */ #endif /* STARPU_HAVE_STRUCT_TIMESPEC */ /* Fetch gettimeofday on mingw/cygwin */ #if defined(__MINGW32__) || defined(__CYGWIN__) #include #endif #else #include #endif /* _WIN32 */ /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_UTIL_H__ */ starpu-1.4.9+dfsg/include/starpu_worker.h000066400000000000000000000605761507764646700205310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_WORKER_H__ #define __STARPU_WORKER_H__ #include #include #include #include #ifdef STARPU_HAVE_HWLOC #include #endif #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Workers Workers @{ */ /** Memory node Type */ enum starpu_node_kind { STARPU_UNUSED = 0, STARPU_CPU_RAM = 1, /**< CPU core */ STARPU_CUDA_RAM = 2, /**< NVIDIA CUDA device */ STARPU_OPENCL_RAM = 3, /**< OpenCL device */ STARPU_MAX_FPGA_RAM = 4, /**< Maxeler FPGA device */ STARPU_DISK_RAM = 5, /**< Disk memory */ STARPU_MPI_MS_RAM = 6, /**< MPI Slave device */ STARPU_TCPIP_MS_RAM = 7, /**< TCPIP Slave device */ STARPU_HIP_RAM = 8, /**< NVIDIA/AMD HIP device */ STARPU_MAX_RAM = 8, /**< Maximum value of memory types */ STARPU_NRAM = 9, /**< Number of memory types */ }; /** Worker Architecture Type The value 4 which was used by the driver SCC is no longer used as renumbering workers would make unusable old performance model files. */ enum starpu_worker_archtype { STARPU_CPU_WORKER = 0, /**< CPU core */ STARPU_CUDA_WORKER = 1, /**< NVIDIA CUDA device */ STARPU_OPENCL_WORKER = 2, /**< OpenCL device */ STARPU_MAX_FPGA_WORKER = 4, /**< Maxeler FPGA device */ STARPU_MPI_MS_WORKER = 5, /**< MPI Slave device */ STARPU_TCPIP_MS_WORKER = 6, /**< TCPIP Slave device */ STARPU_HIP_WORKER = 7, /**< NVIDIA/AMD HIP device */ STARPU_NARCH = 8, /**< Number of arch types */ STARPU_ANY_WORKER = 255 /**< any worker, used in the hypervisor */ }; #define STARPU_UNKNOWN_WORKER ((enum starpu_worker_archtype)-1) /**< Invalid worker value */ /** Structure needed to iterate on the collection */ struct starpu_sched_ctx_iterator { /** The index of the current worker in the collection, needed when iterating on the collection. */ int cursor; void *value; void *possible_value; char visited[STARPU_NMAXWORKERS]; int possibly_parallel; }; /** Types of structures the worker collection can implement */ enum starpu_worker_collection_type { STARPU_WORKER_TREE, /**< The collection is a tree */ STARPU_WORKER_LIST /**< The collection is an array */ }; /** A scheduling context manages a collection of workers that can be memorized using different data structures. Thus, a generic structure is available in order to simplify the choice of its type. Only the list data structure is available but further data structures(like tree) implementations are foreseen. */ struct starpu_worker_collection { /** The workerids managed by the collection */ int *workerids; void *collection_private; /** The number of workers in the collection */ unsigned nworkers; void *unblocked_workers; unsigned nunblocked_workers; void *masters; unsigned nmasters; char present[STARPU_NMAXWORKERS]; char is_unblocked[STARPU_NMAXWORKERS]; char is_master[STARPU_NMAXWORKERS]; /** The type of structure */ enum starpu_worker_collection_type type; /** Check if there is another element in collection */ unsigned (*has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it); /** Return the next element in the collection */ int (*get_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it); /** Add a new element in the collection */ int (*add)(struct starpu_worker_collection *workers, int worker); /** Remove an element from the collection */ int (*remove)(struct starpu_worker_collection *workers, int worker); /** Initialize the collection */ void (*init)(struct starpu_worker_collection *workers); /** Deinitialize the collection */ void (*deinit)(struct starpu_worker_collection *workers); /** Initialize the cursor if there is one */ void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it); void (*init_iterator_for_parallel_tasks)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task); }; extern struct starpu_worker_collection starpu_worker_list; extern struct starpu_worker_collection starpu_worker_tree; /** Wait for all workers to be initialised. Calling this function is normally not necessary. It is called for example in tools/starpu_machine_display to make sure all workers information are correctly set before printing their information. See \ref PauseResume for more details. */ void starpu_worker_wait_for_initialisation(void); /** Return true if type matches one of StarPU's defined worker architectures. See \ref TopologyWorkers for more details. */ unsigned starpu_worker_archtype_is_valid(enum starpu_worker_archtype type); /** Convert a mask of architectures to a worker archtype. See \ref TopologyWorkers for more details. */ enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask); /** Return the number of workers (i.e. processing units executing StarPU tasks). The return value should be at most \ref STARPU_NMAXWORKERS. See \ref TopologyWorkers for more details. */ unsigned starpu_worker_get_count(void); /** Return the number of CPUs controlled by StarPU. The return value should be at most \ref STARPU_MAXCPUS. See \ref TopologyWorkers for more details. */ unsigned starpu_cpu_worker_get_count(void); /** Return the number of CUDA devices controlled by StarPU. The return value should be at most \ref STARPU_MAXCUDADEVS. See \ref TopologyWorkers for more details. */ unsigned starpu_cuda_worker_get_count(void); /** Return the number of HIP devices controlled by StarPU. The return value should be at most \ref STARPU_MAXHIPDEVS. See \ref TopologyWorkers for more details. */ unsigned starpu_hip_worker_get_count(void); /** Return the number of OpenCL devices controlled by StarPU. The return value should be at most \ref STARPU_MAXOPENCLDEVS. See \ref TopologyWorkers for more details. */ unsigned starpu_opencl_worker_get_count(void); /** Return the number of MPI Master Slave workers controlled by StarPU. See \ref TopologyWorkers for more details. */ unsigned starpu_mpi_ms_worker_get_count(void); /** Return the number of TCPIP Master Slave workers controlled by StarPU. See \ref TopologyWorkers for more details. */ unsigned starpu_tcpip_ms_worker_get_count(void); /** Return the identifier of the current worker, i.e the one associated to the calling thread. The return value is either \c -1 if the current context is not a StarPU worker (i.e. when called from the application outside a task or a callback), or an integer between \c 0 and starpu_worker_get_count() - \c 1. See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. */ int starpu_worker_get_id(void); unsigned _starpu_worker_get_id_check(const char *f, int l); /** Similar to starpu_worker_get_id(), but abort when called from outside a worker (i.e. when starpu_worker_get_id() would return \c -1). See \ref HowToInitializeAComputationLibraryOnceForEachWorker for more details. */ unsigned starpu_worker_get_id_check(void); #define starpu_worker_get_id_check() _starpu_worker_get_id_check(__FILE__, __LINE__) /** See \ref TopologyWorkers for more details. */ int starpu_worker_get_bindid(int workerid); /** See \ref SchedulingHelpers for more details. */ void starpu_sched_find_all_worker_combinations(void); /** Return the type of processing unit associated to the worker \p id. The worker identifier is a value returned by the function starpu_worker_get_id()). The return value indicates the architecture of the worker: ::STARPU_CPU_WORKER for a CPU core, ::STARPU_CUDA_WORKER for a CUDA device, and ::STARPU_OPENCL_WORKER for a OpenCL device. The return value for an invalid identifier is unspecified. See \ref TopologyWorkers for more details. */ enum starpu_worker_archtype starpu_worker_get_type(int id); /** Return the number of workers of \p type. A positive (or NULL) value is returned in case of success, -EINVAL indicates that \p type is not valid otherwise. See \ref TopologyWorkers for more details. */ int starpu_worker_get_count_by_type(enum starpu_worker_archtype type); /** Get the list of identifiers of workers of \p type. Fill the array \p workerids with the identifiers of the \p workers. The argument \p maxsize indicates the size of the array \p workerids. The return value gives the number of identifiers that were put in the array. -ERANGE is returned is \p maxsize is lower than the number of workers with the appropriate type: in that case, the array is filled with the \p maxsize first elements. To avoid such overflows, the value of maxsize can be chosen by the means of the function starpu_worker_get_count_by_type(), or by passing a value greater or equal to \ref STARPU_NMAXWORKERS. See \ref TopologyWorkers for more details. */ unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize); /** Return the identifier of the \p num -th worker that has the specified \p type. If there is no such worker, -1 is returned. See \ref TopologyWorkers for more details. */ int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num); /** Return the identifier of the worker that has the specified \p type and device id \p devid (which may not be the n-th, if some devices are skipped for instance). If there is no such worker, \c -1 is returned. See \ref TopologyWorkers for more details. */ int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid); /** Return true if worker type can execute this task. See \ref SchedulingHelpers for more details. */ unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task); /** Get the name of the worker \p id. StarPU associates a unique human readable string to each processing unit. This function copies at most the \p maxlen first bytes of the unique string associated to the worker \p id into the \p dst buffer. The caller is responsible for ensuring that \p dst is a valid pointer to a buffer of \p maxlen bytes at least. Calling this function on an invalid identifier results in an unspecified behaviour. See \ref TopologyWorkers for more details. */ void starpu_worker_get_name(int id, char *dst, size_t maxlen); /** Display on \p output the list (if any) of all workers. See \ref TopologyWorkers for more details. */ void starpu_worker_display_all(FILE *output); /** Display on \p output the list (if any) of all the workers of the given \p type. See \ref TopologyWorkers for more details. */ void starpu_worker_display_names(FILE *output, enum starpu_worker_archtype type); /** Display on \p output the number of workers of the given \p type. See \ref TopologyWorkers for more details. */ void starpu_worker_display_count(FILE *output, enum starpu_worker_archtype type); /** Return the device id of the worker \p id. The worker should be identified with the value returned by the starpu_worker_get_id() function. In the case of a CUDA worker, this device identifier is the logical device identifier exposed by CUDA (used by the function \c cudaGetDevice() for instance). The device identifier of a CPU worker is the logical identifier of the core on which the worker was bound; this identifier is either provided by the OS or by the library hwloc in case it is available. See \ref TopologyWorkers for more details. */ int starpu_worker_get_devid(int id); /** See \ref TopologyWorkers for more details. */ int starpu_worker_get_devnum(int id); /** See \ref TopologyWorkers for more details. */ int starpu_worker_get_subworkerid(int id); /** See \ref TopologyWorkers for more details. */ struct starpu_tree *starpu_workers_get_tree(void); /** See \ref TopologyWorkers for more details. */ unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx); /** Return when the current task is expected to be finished. Note: the returned date should be used with caution since the task might very well end just after this function returns. See \ref Per-taskFeedback for more details. */ void starpu_worker_get_current_task_exp_end(unsigned workerid, struct timespec *date); /** Return whether worker \p workerid is currently blocked in a parallel task. See \ref SchedulingHelpers for more details. */ unsigned starpu_worker_is_blocked_in_parallel(int workerid); /** See \ref SchedulingHelpers for more details. */ unsigned starpu_worker_is_slave_somewhere(int workerid); /** Return worker \p type as a string. See \ref TopologyWorkers for more details. */ const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type); /** Return worker \p type from a string. Returns STARPU_UNKNOWN_WORKER if the string doesn't match a worker type. See \ref TopologyWorkers for more details. */ enum starpu_worker_archtype starpu_worker_get_type_from_string(const char *type); /** Return worker \p type as a string suitable for environment variable names (CPU, CUDA, etc.). See \ref TopologyWorkers for more details. */ const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type); /** See \ref TopologyWorkers for more details. */ int starpu_bindid_get_workerids(int bindid, int **workerids); /** See \ref TopologyWorkers for more details. */ int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int num); /** See \ref TopologyWorkers for more details. */ int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type); #ifdef STARPU_HAVE_HWLOC /** If StarPU was compiled with \c hwloc support, return a duplicate of the \c hwloc cpuset associated with the worker \p workerid. The returned cpuset is obtained from a \c hwloc_bitmap_dup() function call. It must be freed by the caller using \c hwloc_bitmap_free(). See \ref InteroperabilityHWLOC for more details. */ hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid); /** If StarPU was compiled with \c hwloc support, return the \c hwloc object corresponding to the worker \p workerid. See \ref SchedulingHelpers for more details. */ hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid); #endif /** See \ref TopologyMemory for more details. */ int starpu_memory_node_get_devid(unsigned node); /** Return the memory node associated to the current worker. See \ref TopologyWorkers for more details. */ unsigned starpu_worker_get_local_memory_node(void); /** Return the identifier of the memory node associated to the worker identified by \p workerid. See \ref TopologyWorkers for more details. */ unsigned starpu_worker_get_memory_node(unsigned workerid); /** Return the number of memory nodes. See \ref TopologyWorkers for more details. */ unsigned starpu_memory_nodes_get_count(void); /** Return the number of memory nodes of a given \p kind. See \ref TopologyWorkers for more details. */ unsigned starpu_memory_nodes_get_count_by_kind(enum starpu_node_kind kind); /** Get the list of memory nodes of kind \p kind. Fill the array \p memory_nodes_ids with the memory nodes numbers. The argument \p maxsize indicates the size of the array \p memory_nodes_ids. The return value gives the number of node numbers that were put in the array. -ERANGE is returned if \p maxsize is lower than the number of memory nodes with the appropriate kind: in that case, the array is filled with the \p maxsize first elements. To avoid such overflows, the value of maxsize can be chosen by the means of function starpu_memory_nodes_get_count_by_kind(), or by passing a value greater or equal to \ref STARPU_MAXNODES. See \ref TopologyWorkers for more details. */ unsigned starpu_memory_node_get_ids_by_type(enum starpu_node_kind kind, unsigned *memory_nodes_ids, unsigned maxsize); /** Return in \p name the name of a memory node (NUMA 0, CUDA 0, etc.) \p size is the size of the \p name array. See \ref TopologyWorkers for more details. */ int starpu_memory_node_get_name(unsigned node, char *name, size_t size); /** Return the number of NUMA nodes used by StarPU. See \ref TopologyWorkers for more details. */ unsigned starpu_memory_nodes_get_numa_count(void); /** Return the identifier of the memory node associated to the NUMA node identified by \p osid by the Operating System. See \ref TopologyWorkers for more details. */ int starpu_memory_nodes_numa_id_to_devid(int osid); /** Return the Operating System identifier of the memory node whose StarPU identifier is \p id. See \ref TopologyWorkers for more details. */ int starpu_memory_nodes_numa_devid_to_id(unsigned id); /** Return the type of \p node as defined by ::starpu_node_kind. For example, when defining a new data interface, this function should be used in the allocation function to determine on which device the memory needs to be allocated. See \ref TopologyWorkers for more details. */ enum starpu_node_kind starpu_node_get_kind(unsigned node); /** Return the type of worker which operates on memory node kind \p node_kind. See \ref TopologyWorkers for more details. */ enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind); /** Return the type of memory node that arch type \p type operates on. See \ref TopologyWorkers for more details. */ enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type); /** @name Scheduling operations @{ */ /** Return \c !0 if current worker has a scheduling operation in progress, and \c 0 otherwise. */ int starpu_worker_sched_op_pending(void); /** Allow other threads and workers to temporarily observe the current worker state, even though it is performing a scheduling operation. Must be called by a worker before performing a potentially blocking call such as acquiring a mutex other than its own sched_mutex. This function increases \c state_relax_refcnt from the current worker. No more than UINT_MAX-1 nested starpu_worker_relax_on() calls should performed on the same worker. This function is automatically called by starpu_worker_lock() to relax the caller worker state while attempting to lock the target worker. See \ref DefiningANewBasicSchedulingPolicy for more details. */ void starpu_worker_relax_on(void); /** Must be called after a potentially blocking call is complete, to restore the relax state in place before the corresponding starpu_worker_relax_on(). Decreases \c state_relax_refcnt. Calls to starpu_worker_relax_on() and starpu_worker_relax_off() must be properly paired. This function is automatically called by starpu_worker_unlock() after the target worker has been unlocked. See \ref DefiningANewBasicSchedulingPolicy for more details. */ void starpu_worker_relax_off(void); /** Return \c !0 if the current worker \c state_relax_refcnt!=0 and \c 0 otherwise. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_worker_get_relax_state(void); /** Acquire the sched mutex of \p workerid. If the caller is a worker, distinct from \p workerid, the caller worker automatically enters a relax state while acquiring the target worker lock. See \ref DefiningANewBasicSchedulingPolicy for more details. */ void starpu_worker_lock(int workerid); /** Attempt to acquire the sched mutex of \p workerid. Returns \c 0 if successful, \c !0 if \p workerid sched mutex is held or the corresponding worker is not in a relax state. If the caller is a worker, distinct from \p workerid, the caller worker automatically enters relax state if successfully acquiring the target worker lock. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_worker_trylock(int workerid); /** Release the previously acquired sched mutex of \p workerid. Restore the relax state of the caller worker if needed. See \ref DefiningANewBasicSchedulingPolicy for more details. */ void starpu_worker_unlock(int workerid); /** Acquire the current worker sched mutex. See \ref DefiningANewBasicSchedulingPolicy for more details. */ void starpu_worker_lock_self(void); /** Release the current worker sched mutex. See \ref DefiningANewBasicSchedulingPolicy for more details. */ void starpu_worker_unlock_self(void); #ifdef STARPU_WORKER_CALLBACKS /** If StarPU was compiled with blocking drivers support and worker callbacks support enabled, allow to specify an external resource manager callback to be notified about workers going to sleep. See \ref SchedulingHelpers for more details. */ void starpu_worker_set_going_to_sleep_callback(void (*callback)(unsigned workerid)); /** If StarPU was compiled with blocking drivers support and worker callbacks support enabled, allow to specify an external resource manager callback to be notified about workers waking-up. See \ref SchedulingHelpers for more details. */ void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid)); #endif /** @} */ /** @} */ /** @defgroup API_Parallel_Tasks Parallel Tasks @{ */ /** Return the number of different combined workers. See \ref SchedulingHelpers for more details. */ unsigned starpu_combined_worker_get_count(void); /** See \ref SchedulingHelpers for more details. */ unsigned starpu_worker_is_combined_worker(int id); /** Return the identifier of the current combined worker. See \ref SchedulingHelpers for more details. */ int starpu_combined_worker_get_id(void); /** Return the size of the current combined worker, i.e. the total number of CPUS running the same task in the case of ::STARPU_SPMD parallel tasks, or the total number of threads that the task is allowed to start in the case of ::STARPU_FORKJOIN parallel tasks. See \ref Fork-modeParallelTasks and \ref SPMD-modeParallelTasks for more details. */ int starpu_combined_worker_get_size(void); /** Return the rank of the current thread within the combined worker. Can only be used in ::STARPU_SPMD parallel tasks, to know which part of the task to work on. See \ref SPMD-modeParallelTasks for more details. */ int starpu_combined_worker_get_rank(void); /** Register a new combined worker and get its identifier. See \ref SchedulingHelpers for more details. */ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[]); /** Get the description of a combined worker. See \ref SchedulingHelpers for more details. \p workerid is the requested combined worker id, \p worker_size returns the number of workers in the combined worker, \p combined_workerid returns the list for worker ids in the combined worker. */ int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid); /** Variant of starpu_worker_can_execute_task() compatible with combined workers. See \ref DefiningANewBasicSchedulingPolicy for more details. */ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl); /** Initialise the barrier for the parallel task, and dispatch the task between the different workers of the given combined worker. See \ref SchedulingHelpers for more details. */ void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid); /** Initialise the barrier for the parallel task, to be pushed to \p worker_size workers (without having to explicit a given combined worker). See \ref SchedulingHelpers for more details. */ void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size); /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPU_WORKER_H__ */ starpu-1.4.9+dfsg/julia/000077500000000000000000000000001507764646700151145ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/Makefile.am000066400000000000000000000014511507764646700171510ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-subdirtests.mk SUBDIRS = src if STARPU_BUILD_EXAMPLES SUBDIRS += examples endif EXTRA_DIST = README starpu-1.4.9+dfsg/julia/Makefile.in000066400000000000000000000663701507764646700171750ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_EXAMPLES_TRUE@am__append_1 = examples subdir = julia ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = src examples am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-subdirtests.mk README DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = src $(am__append_1) EXTRA_DIST = README all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign julia/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign julia/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am tags tags-am uninstall uninstall-am .PRECIOUS: Makefile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/julia/README000066400000000000000000000033431507764646700157770ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # Contents ======== * Installing Julia * Installing StarPU module for Julia * Running Examples Installing Julia ---------------- Julia version 1.3+ is required and can be downloaded from https://julialang.org/downloads/. Installing StarPU module for Julia ---------------------------------- First, build the jlstarpu_c_wrapper library: $ make Then, you need to add the lib/ directory to your library path and the julia/ directory to your Julia load path: $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/src/.lib $ export JULIA_LOAD_PATH=$PWD/src:$JULIA_LOAD_PATH This step can also be done by sourcing the setenv.sh script: $ . setenv.sh Running Examples ---------------- You can find several examples in the examples/ directory. For each example X, three versions are provided: - X.c: Original C+starpu code - X_native.jl: Native Julia version (without StarPU) - X.jl: Julia version using StarPU To run the original C+StarPU code: $ make cstarpu.dat To run the native Julia version: $ make julia_native.dat To run the Julia version using StarPU: $ make julia_generatedc.dat starpu-1.4.9+dfsg/julia/examples/000077500000000000000000000000001507764646700167325ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/Makefile.am000066400000000000000000000066151507764646700207760ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk include $(top_srcdir)/make/starpu-loader.mk BUILT_SOURCES = CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log EXTRA_DIST = \ axpy/axpy.jl \ axpy/axpy.sh \ black_scholes/black_scholes.jl \ callback/callback.jl \ callback/callback.sh \ check_deps/check_deps.jl \ check_deps/check_deps.sh \ cholesky/cholesky_codelets.jl \ cholesky/cholesky_common.jl \ cholesky/cholesky_native.jl \ cholesky/cholesky_implicit.jl \ cholesky/cholesky_tag.jl \ cholesky/cholesky.sh \ dependency/end_dep.jl \ dependency/end_dep.sh \ dependency/tag_dep.jl \ dependency/tag_dep.sh \ dependency/task_dep.sh \ dependency/task_dep.jl \ gemm/gemm.jl \ gemm/gemm_native.jl \ gemm/gemm.sh \ mandelbrot/mandelbrot_native.jl \ mandelbrot/mandelbrot.jl \ mandelbrot/mandelbrot.sh \ mult/mult_native.jl \ mult/mult.jl \ mult/perf.sh \ mult/mult_starpu.sh \ task_insert_color/task_insert_color.jl \ task_insert_color/task_insert_color.sh \ variable/variable.jl \ variable/variable_native.jl \ variable/variable.sh \ vector_scal/vector_scal.jl \ vector_scal/vector_scal.sh examplebindir = $(libdir)/starpu/julia examplebin_PROGRAMS = AM_CFLAGS += $(MAGMA_CFLAGS) $(APP_CFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) LIBS += -lm check_PROGRAMS = $(LOADER) $(starpu_julia_EXAMPLES) SHELL_TESTS = STARPU_JULIA_EXAMPLES = examplebin_PROGRAMS += $(STARPU_JULIA_EXAMPLES) TESTS = $(SHELL_TESTS) $(STARPU_JULIA_EXAMPLES) ###################### # Examples # ###################### SHELL_TESTS += check_deps/check_deps.sh STARPU_JULIA_EXAMPLES += mult/mult mult_mult_SOURCES = mult/mult.c mult/cpu_mult.c SHELL_TESTS += mult/mult_starpu.sh STARPU_JULIA_EXAMPLES += task_insert_color/task_insert_color SHELL_TESTS += task_insert_color/task_insert_color.sh SHELL_TESTS += variable/variable.sh SHELL_TESTS += vector_scal/vector_scal.sh STARPU_JULIA_EXAMPLES += mandelbrot/mandelbrot mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c mandelbrot/cpu_mandelbrot.c mandelbrot/cpu_mandelbrot.h SHELL_TESTS += mandelbrot/mandelbrot.sh STARPU_JULIA_EXAMPLES += callback/callback SHELL_TESTS += callback/callback.sh SHELL_TESTS += dependency/tag_dep.sh SHELL_TESTS += dependency/task_dep.sh SHELL_TESTS += dependency/end_dep.sh if !STARPU_NO_BLAS_LIB SHELL_TESTS += axpy/axpy.sh SHELL_TESTS += cholesky/cholesky.sh SHELL_TESTS += gemm/gemm.sh endif starpu-1.4.9+dfsg/julia/examples/Makefile.in000066400000000000000000002071341507764646700210060ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_2) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader examplebin_PROGRAMS = $(am__EXEEXT_1) check_PROGRAMS = TESTS = $(SHELL_TESTS) $(am__EXEEXT_1) @STARPU_NO_BLAS_LIB_FALSE@am__append_8 = axpy/axpy.sh \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky.sh gemm/gemm.sh subdir = julia/examples ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = execute.sh CONFIG_CLEAN_VPATH_FILES = am__EXEEXT_1 = mult/mult$(EXEEXT) \ task_insert_color/task_insert_color$(EXEEXT) \ mandelbrot/mandelbrot$(EXEEXT) callback/callback$(EXEEXT) am__installdirs = "$(DESTDIR)$(examplebindir)" @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_2 = loader$(EXEEXT) PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) callback_callback_SOURCES = callback/callback.c am__dirstamp = $(am__leading_dot)dirstamp callback_callback_OBJECTS = callback/callback.$(OBJEXT) callback_callback_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) am_mandelbrot_mandelbrot_OBJECTS = mandelbrot/mandelbrot.$(OBJEXT) \ mandelbrot/cpu_mandelbrot.$(OBJEXT) mandelbrot_mandelbrot_OBJECTS = $(am_mandelbrot_mandelbrot_OBJECTS) mandelbrot_mandelbrot_LDADD = $(LDADD) am_mult_mult_OBJECTS = mult/mult.$(OBJEXT) mult/cpu_mult.$(OBJEXT) mult_mult_OBJECTS = $(am_mult_mult_OBJECTS) mult_mult_LDADD = $(LDADD) task_insert_color_task_insert_color_SOURCES = \ task_insert_color/task_insert_color.c task_insert_color_task_insert_color_OBJECTS = \ task_insert_color/task_insert_color.$(OBJEXT) task_insert_color_task_insert_color_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ callback/$(DEPDIR)/callback.Po \ mandelbrot/$(DEPDIR)/cpu_mandelbrot.Po \ mandelbrot/$(DEPDIR)/mandelbrot.Po mult/$(DEPDIR)/cpu_mult.Po \ mult/$(DEPDIR)/mult.Po \ task_insert_color/$(DEPDIR)/task_insert_color.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = callback/callback.c loader.c \ $(mandelbrot_mandelbrot_SOURCES) $(mult_mult_SOURCES) \ task_insert_color/task_insert_color.c DIST_SOURCES = callback/callback.c loader.c \ $(mandelbrot_mandelbrot_SOURCES) $(mult_mult_SOURCES) \ task_insert_color/task_insert_color.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) AM_RECURSIVE_TARGETS = check recheck TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/execute.sh.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ ../src/libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la \ $(STARPU_EXPORTED_LIBS) $(STARPU_OPENCL_LDFLAGS) \ $(STARPU_CUDA_LDFLAGS) -lm LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(am__append_4) $(am__append_6) LAUNCHER = $(am__append_3) $(am__append_5) AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # BUILT_SOURCES = CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log EXTRA_DIST = \ axpy/axpy.jl \ axpy/axpy.sh \ black_scholes/black_scholes.jl \ callback/callback.jl \ callback/callback.sh \ check_deps/check_deps.jl \ check_deps/check_deps.sh \ cholesky/cholesky_codelets.jl \ cholesky/cholesky_common.jl \ cholesky/cholesky_native.jl \ cholesky/cholesky_implicit.jl \ cholesky/cholesky_tag.jl \ cholesky/cholesky.sh \ dependency/end_dep.jl \ dependency/end_dep.sh \ dependency/tag_dep.jl \ dependency/tag_dep.sh \ dependency/task_dep.sh \ dependency/task_dep.jl \ gemm/gemm.jl \ gemm/gemm_native.jl \ gemm/gemm.sh \ mandelbrot/mandelbrot_native.jl \ mandelbrot/mandelbrot.jl \ mandelbrot/mandelbrot.sh \ mult/mult_native.jl \ mult/mult.jl \ mult/perf.sh \ mult/mult_starpu.sh \ task_insert_color/task_insert_color.jl \ task_insert_color/task_insert_color.sh \ variable/variable.jl \ variable/variable_native.jl \ variable/variable.sh \ vector_scal/vector_scal.jl \ vector_scal/vector_scal.sh examplebindir = $(libdir)/starpu/julia AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ ###################### # Examples # ###################### SHELL_TESTS = check_deps/check_deps.sh mult/mult_starpu.sh \ task_insert_color/task_insert_color.sh variable/variable.sh \ vector_scal/vector_scal.sh mandelbrot/mandelbrot.sh \ callback/callback.sh dependency/tag_dep.sh \ dependency/task_dep.sh dependency/end_dep.sh $(am__append_8) STARPU_JULIA_EXAMPLES = mult/mult task_insert_color/task_insert_color \ mandelbrot/mandelbrot callback/callback mult_mult_SOURCES = mult/mult.c mult/cpu_mult.c mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c mandelbrot/cpu_mandelbrot.c mandelbrot/cpu_mandelbrot.h all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign julia/examples/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign julia/examples/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): execute.sh: $(top_builddir)/config.status $(srcdir)/execute.sh.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list callback/$(am__dirstamp): @$(MKDIR_P) callback @: > callback/$(am__dirstamp) callback/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) callback/$(DEPDIR) @: > callback/$(DEPDIR)/$(am__dirstamp) callback/callback.$(OBJEXT): callback/$(am__dirstamp) \ callback/$(DEPDIR)/$(am__dirstamp) callback/callback$(EXEEXT): $(callback_callback_OBJECTS) $(callback_callback_DEPENDENCIES) $(EXTRA_callback_callback_DEPENDENCIES) callback/$(am__dirstamp) @rm -f callback/callback$(EXEEXT) $(AM_V_CCLD)$(LINK) $(callback_callback_OBJECTS) $(callback_callback_LDADD) $(LIBS) loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) mandelbrot/$(am__dirstamp): @$(MKDIR_P) mandelbrot @: > mandelbrot/$(am__dirstamp) mandelbrot/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mandelbrot/$(DEPDIR) @: > mandelbrot/$(DEPDIR)/$(am__dirstamp) mandelbrot/mandelbrot.$(OBJEXT): mandelbrot/$(am__dirstamp) \ mandelbrot/$(DEPDIR)/$(am__dirstamp) mandelbrot/cpu_mandelbrot.$(OBJEXT): mandelbrot/$(am__dirstamp) \ mandelbrot/$(DEPDIR)/$(am__dirstamp) mandelbrot/mandelbrot$(EXEEXT): $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_DEPENDENCIES) $(EXTRA_mandelbrot_mandelbrot_DEPENDENCIES) mandelbrot/$(am__dirstamp) @rm -f mandelbrot/mandelbrot$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_LDADD) $(LIBS) mult/$(am__dirstamp): @$(MKDIR_P) mult @: > mult/$(am__dirstamp) mult/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mult/$(DEPDIR) @: > mult/$(DEPDIR)/$(am__dirstamp) mult/mult.$(OBJEXT): mult/$(am__dirstamp) \ mult/$(DEPDIR)/$(am__dirstamp) mult/cpu_mult.$(OBJEXT): mult/$(am__dirstamp) \ mult/$(DEPDIR)/$(am__dirstamp) mult/mult$(EXEEXT): $(mult_mult_OBJECTS) $(mult_mult_DEPENDENCIES) $(EXTRA_mult_mult_DEPENDENCIES) mult/$(am__dirstamp) @rm -f mult/mult$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mult_mult_OBJECTS) $(mult_mult_LDADD) $(LIBS) task_insert_color/$(am__dirstamp): @$(MKDIR_P) task_insert_color @: > task_insert_color/$(am__dirstamp) task_insert_color/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) task_insert_color/$(DEPDIR) @: > task_insert_color/$(DEPDIR)/$(am__dirstamp) task_insert_color/task_insert_color.$(OBJEXT): \ task_insert_color/$(am__dirstamp) \ task_insert_color/$(DEPDIR)/$(am__dirstamp) task_insert_color/task_insert_color$(EXEEXT): $(task_insert_color_task_insert_color_OBJECTS) $(task_insert_color_task_insert_color_DEPENDENCIES) $(EXTRA_task_insert_color_task_insert_color_DEPENDENCIES) task_insert_color/$(am__dirstamp) @rm -f task_insert_color/task_insert_color$(EXEEXT) $(AM_V_CCLD)$(LINK) $(task_insert_color_task_insert_color_OBJECTS) $(task_insert_color_task_insert_color_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f callback/*.$(OBJEXT) -rm -f mandelbrot/*.$(OBJEXT) -rm -f mult/*.$(OBJEXT) -rm -f task_insert_color/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@callback/$(DEPDIR)/callback.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mandelbrot/$(DEPDIR)/cpu_mandelbrot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mandelbrot/$(DEPDIR)/mandelbrot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/cpu_mult.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mult/$(DEPDIR)/mult.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@task_insert_color/$(DEPDIR)/task_insert_color.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf callback/.libs callback/_libs -rm -rf mandelbrot/.libs mandelbrot/_libs -rm -rf mult/.libs mult/_libs -rm -rf task_insert_color/.libs task_insert_color/_libs ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? check_deps/check_deps.sh.log: check_deps/check_deps.sh @p='check_deps/check_deps.sh'; \ b='check_deps/check_deps.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mult/mult_starpu.sh.log: mult/mult_starpu.sh @p='mult/mult_starpu.sh'; \ b='mult/mult_starpu.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) task_insert_color/task_insert_color.sh.log: task_insert_color/task_insert_color.sh @p='task_insert_color/task_insert_color.sh'; \ b='task_insert_color/task_insert_color.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) variable/variable.sh.log: variable/variable.sh @p='variable/variable.sh'; \ b='variable/variable.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) vector_scal/vector_scal.sh.log: vector_scal/vector_scal.sh @p='vector_scal/vector_scal.sh'; \ b='vector_scal/vector_scal.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mandelbrot/mandelbrot.sh.log: mandelbrot/mandelbrot.sh @p='mandelbrot/mandelbrot.sh'; \ b='mandelbrot/mandelbrot.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) callback/callback.sh.log: callback/callback.sh @p='callback/callback.sh'; \ b='callback/callback.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) dependency/tag_dep.sh.log: dependency/tag_dep.sh @p='dependency/tag_dep.sh'; \ b='dependency/tag_dep.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) dependency/task_dep.sh.log: dependency/task_dep.sh @p='dependency/task_dep.sh'; \ b='dependency/task_dep.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) dependency/end_dep.sh.log: dependency/end_dep.sh @p='dependency/end_dep.sh'; \ b='dependency/end_dep.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) axpy/axpy.sh.log: axpy/axpy.sh @p='axpy/axpy.sh'; \ b='axpy/axpy.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cholesky/cholesky.sh.log: cholesky/cholesky.sh @p='cholesky/cholesky.sh'; \ b='cholesky/cholesky.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) gemm/gemm.sh.log: gemm/gemm.sh @p='gemm/gemm.sh'; \ b='gemm/gemm.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mult/mult.log: mult/mult$(EXEEXT) @p='mult/mult$(EXEEXT)'; \ b='mult/mult'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) task_insert_color/task_insert_color.log: task_insert_color/task_insert_color$(EXEEXT) @p='task_insert_color/task_insert_color$(EXEEXT)'; \ b='task_insert_color/task_insert_color'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mandelbrot/mandelbrot.log: mandelbrot/mandelbrot$(EXEEXT) @p='mandelbrot/mandelbrot$(EXEEXT)'; \ b='mandelbrot/mandelbrot'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) callback/callback.log: callback/callback$(EXEEXT) @p='callback/callback$(EXEEXT)'; \ b='callback/callback'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-am all-am: Makefile $(PROGRAMS) installdirs: for dir in "$(DESTDIR)$(examplebindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-am install-exec: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f callback/$(DEPDIR)/$(am__dirstamp) -rm -f callback/$(am__dirstamp) -rm -f mandelbrot/$(DEPDIR)/$(am__dirstamp) -rm -f mandelbrot/$(am__dirstamp) -rm -f mult/$(DEPDIR)/$(am__dirstamp) -rm -f mult/$(am__dirstamp) -rm -f task_insert_color/$(DEPDIR)/$(am__dirstamp) -rm -f task_insert_color/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) clean: clean-am clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f callback/$(DEPDIR)/callback.Po -rm -f mandelbrot/$(DEPDIR)/cpu_mandelbrot.Po -rm -f mandelbrot/$(DEPDIR)/mandelbrot.Po -rm -f mult/$(DEPDIR)/cpu_mult.Po -rm -f mult/$(DEPDIR)/mult.Po -rm -f task_insert_color/$(DEPDIR)/task_insert_color.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-examplebinPROGRAMS install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f callback/$(DEPDIR)/callback.Po -rm -f mandelbrot/$(DEPDIR)/cpu_mandelbrot.Po -rm -f mandelbrot/$(DEPDIR)/mandelbrot.Po -rm -f mult/$(DEPDIR)/cpu_mult.Po -rm -f mult/$(DEPDIR)/mult.Po -rm -f task_insert_color/$(DEPDIR)/task_insert_color.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-examplebinPROGRAMS .MAKE: all check check-am install install-am install-exec \ install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am \ install-examplebinPROGRAMS install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-examplebinPROGRAMS .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/julia/examples/axpy/000077500000000000000000000000001507764646700177135ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/axpy/axpy.jl000066400000000000000000000050671507764646700212330ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using StarPU using Printf const EPSILON = 1e-6 function check(alpha, X, Y) for i in 1:length(X) expected_value = alpha * X[i] + 4.0 if abs(Y[i] - expected_value) > expected_value * EPSILON error("at ", i, ", ", alpha, "*", X[i], "+4.0=", Y[i], ", expected ", expected_value) end end end @target STARPU_CPU+STARPU_CUDA @codelet function axpy(X :: Vector{Float32}, Y :: Vector{Float32}, alpha ::Float32) :: Nothing STARPU_SAXPY(length(X), alpha, X, 1, Y, 1) return end function axpy(N, NBLOCKS, alpha, display = true) X = Array(fill(1.0f0, N)) Y = Array(fill(4.0f0, N)) starpu_memory_pin(X) starpu_memory_pin(Y) block_filter = starpu_data_filter(STARPU_VECTOR_FILTER_BLOCK, NBLOCKS) if display println("BEFORE x[0] = ", X[1]) println("BEFORE y[0] = ", Y[1]) end t_start = time_ns() @starpu_block let hX,hY = starpu_data_register(X, Y) starpu_data_partition(hX, block_filter) starpu_data_partition(hY, block_filter) for b in 1:NBLOCKS starpu_task_insert(codelet_name = "axpy", handles = [hX[b], hY[b]], cl_arg = (Float32(alpha),), tag = starpu_tag_t(b), modes = [STARPU_R, STARPU_RW]) end starpu_task_wait_for_all() end t_end = time_ns() timing = (t_end-t_start)/1000 if display @printf("timing -> %d us %.2f MB/s\n", timing, 3*N*4/timing) println("AFTER y[0] = ", Y[1], " (ALPHA=", alpha, ")") end check(alpha, X, Y) starpu_memory_unpin(X) starpu_memory_unpin(Y) end function main() N = 16 * 1024 * 1024 NBLOCKS = 8 alpha = 3.41 starpu_init() starpu_cublas_init() # warmup axpy(10, 1, alpha, false) axpy(N, NBLOCKS, alpha) starpu_shutdown() end main() starpu-1.4.9+dfsg/julia/examples/axpy/axpy.sh000077500000000000000000000013261507764646700212350ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh axpy/axpy.jl starpu-1.4.9+dfsg/julia/examples/black_scholes/000077500000000000000000000000001507764646700215265ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/black_scholes/black_scholes.jl000066400000000000000000000136311507764646700246550ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2019-2019 Mael Keryell # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import Libdl using StarPU @target STARPU_CPU+STARPU_CUDA @codelet function black_scholes(data ::Matrix{Float64}, res ::Matrix{Float64}) :: Float32 widthn ::Int64 = width(data) # data[1,...] -> S # data[2,...] -> K # data[3,...] -> r # data[4,...] -> T # data[4,...] -> sig p ::Float64 = 0.2316419 b1 ::Float64 = 0.31938153 b2 ::Float64 = -0.356563782 b3 ::Float64 = 1.781477937 b4 ::Float64 = -1.821255978 b5 ::Float64 = 1.330274428 @parallel for i = 1:widthn d1 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] + pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i])) d2 ::Float64 = (log(data[1,i] / data[2,i]) + (data[3,i] - pow(data[5,i], 2.0) * 0.5) * data[4,i]) / (data[5,i] * sqrt(data[4,i])) f ::Float64 = 0 ff ::Float64 = 0 s1 ::Float64 = 0 s2 ::Float64 = 0 s3 ::Float64 = 0 s4 ::Float64 = 0 s5 ::Float64 = 0 sz ::Float64 = 0 ######## Compute normcdf of d1 normd1p ::Float64 = 0 normd1n ::Float64 = 0 boold1 ::Int64 = (d1 >= 0) + (d1 <= 0) if (boold1 >= 2) normd1p = 0.5 normd1n = 0.5 else tmp1 ::Float64 = abs(d1) f = 1 / sqrt(2 * M_PI) ff = exp(-pow(tmp1, 2.0) / 2) * f s1 = b1 / (1 + p * tmp1) s2 = b2 / pow((1 + p * tmp1), 2.0) s3 = b3 / pow((1 + p * tmp1), 3.0) s4 = b4 / pow((1 + p * tmp1), 4.0) s5 = b5 / pow((1 + p * tmp1), 5.0) sz = ff * (s1 + s2 + s3 + s4 + s5) if (d1 > 0) normd1p = 1 - sz # normcdf(d1) normd1n = sz # normcdf(-d1) else normd1p = sz normd1n = 1 - sz end end ######## ######## Compute normcdf of d2 normd2p ::Float64 = 0 normd2n ::Float64 = 0 boold2 ::Int64 = (d2 >= 0) + (d2 <= 0) if (boold2 >= 2) normd2p = 0.5 normd2n = 0.5 else tmp2 ::Float64 = abs(d2) f = 1 / sqrt(2 * M_PI) ff = exp(-pow(tmp2, 2.0) / 2) * f s1 = b1 / (1 + p * tmp2) s2 = b2 / pow((1 + p * tmp2), 2.0) s3 = b3 / pow((1 + p * tmp2), 3.0) s4 = b4 / pow((1 + p * tmp2), 4.0) s5 = b5 / pow((1 + p * tmp2), 5.0) sz = ff * (s1 + s2 + s3 + s4 + s5) if (d2 > 0) normd2p = 1 - sz # normcdf(d2) normd2n = sz # normcdf(-d2) else normd2p = sz normd2n = 1 - sz end end # normd1p = (1 + erf(d1/sqrt(2.0)))/2.0 # normd1n = (1 + erf(-d1/sqrt(2.0)))/2.0 # normd2p = (1 + erf(d2/sqrt(2.0)))/2.0 # normd2n = (1 + erf(-d2/sqrt(2.0)))/2.0 res[1,i] = data[1,i] * (normd1p) - data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2p) # S * N(d1) - r*exp(-r*T) * norm(d2) res[2,i] = -data[1,i] * (normd1n) + data[2,i]*exp(-data[3,i]*data[4,i]) * (normd2n) # -S * N(-d1) + r*exp(-r*T) * norm(-d2) end return 0 end starpu_init() function black_scholes_starpu(data ::Matrix{Float64}, res ::Matrix{Float64}, nslices ::Int64) vert = StarpuDataFilter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslices) @starpu_block let dat_handle, res_handle = starpu_data_register(data, res) starpu_data_partition(dat_handle, vert) starpu_data_partition(res_handle, vert) #Compute the price of call and put option in the res matrix @starpu_sync_tasks for task in (1:nslices) @starpu_async_cl black_scholes(dat_handle[task], res_handle[task]) [STARPU_RW, STARPU_RW] end end return 0 end function init_data(data, data_nbr); for i in 1:data_nbr data[1,i] = rand(Float64) * 100 data[2,i] = rand(Float64) * 100 data[3,i] = rand(Float64) data[4,i] = rand(Float64) * 10 data[5,i] = rand(Float64) * 10 end return data end function median_times(data_nbr, nslices, nbr_tests) data ::Matrix{Float64} = zeros(5, data_nbr) # data[1,1] = 100.0 # data[2,1] = 100.0 # data[3,1] = 0.05 # data[4,1] = 1.0 # data[5,1] = 0.2 res ::Matrix{Float64} = zeros(2, data_nbr) exec_times ::Vector{Float64} = [0. for i in 1:nbr_tests] for i = 1:nbr_tests init_data(data, data_nbr) tic() black_scholes_starpu(data, res, nslices); t = toq() exec_times[i] = t end sort!(exec_times) # println(data) # println(res) return exec_times[1 + div(nbr_tests - 1, 2)] end function display_times(start_nbr, step_nbr, stop_nbr, nslices, nbr_tests) i = 1 open("black_scholes_times.dat", "w") do f for data_nbr in (start_nbr : step_nbr : stop_nbr) t = median_times(data_nbr, nslices, nbr_tests) println("Number of data:\n$data_nbr\nTimes:\njl: $t\nC: $(mtc[i])\nGen: $(mtcgen[i])") write(f, "$data_nbr $(t)\n") i = i + 1 end end end starpu-1.4.9+dfsg/julia/examples/callback/000077500000000000000000000000001507764646700204665ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/callback/callback.c000066400000000000000000000043121507764646700223660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is an example of using a callback. We submit a task, whose callback * submits another task (without any callback). */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) starpu_data_handle_t handle; void cpu_codelet(void *descr[], void *_args) { (void)_args; int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); *val += 1; } struct starpu_codelet cl = { .modes = { STARPU_RW }, .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 1, .name = "callback" }; void callback_func(void *callback_arg) { int ret; (void)callback_arg; struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } int main(void) { int v=40; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int)); struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->callback_func = callback_func; task->callback_arg = NULL; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); starpu_data_unregister(handle); FPRINTF(stderr, "v -> %d\n", v); starpu_shutdown(); return (v == 42) ? 0 : 1; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/julia/examples/callback/callback.jl000066400000000000000000000037471507764646700225640ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using StarPU @target STARPU_CPU @codelet function variable(val ::Ref{Int32}) :: Nothing val[] = val[] + 1 return end function callback(args) cl = args[1] handles = args[2] task = starpu_task(cl = cl, handles=handles) starpu_task_submit(task) end function variable_with_starpu(val ::Ref{Int32}) perfmodel = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = "history_perf" ) cl = starpu_codelet( cpu_func = "variable", modes = [STARPU_RW], perfmodel = perfmodel ) @starpu_block let hVal = starpu_data_register(val) starpu_task_insert(codelet_name = "variable", cl = cl, handles = [hVal], callback = callback, callback_arg = (cl, [hVal])) starpu_task_wait_for_all() end end function display() v = Ref(Int32(40)) variable_with_starpu(v) println("variable -> ", v[]) if v[] == 42 println("result is correct") else error("result is incorret") end end # Disable garbage collector because of random segfault/hang when using mutex. # This issue should be solved with Julia release 1.5. GC.enable(false) starpu_init() display() starpu_shutdown() GC.enable(true) starpu-1.4.9+dfsg/julia/examples/callback/callback.sh000077500000000000000000000013361507764646700225640ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh callback/callback.jl starpu-1.4.9+dfsg/julia/examples/check_deps/000077500000000000000000000000001507764646700210225ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/check_deps/check_deps.jl000066400000000000000000000016211507764646700234410ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import Pkg try using CBinding using Clang using ThreadPools catch Pkg.activate((@__DIR__)*"/../..") Pkg.instantiate() using Clang using CBinding using ThreadPools end using StarPU starpu_translate_headers() starpu-1.4.9+dfsg/julia/examples/check_deps/check_deps.sh000077500000000000000000000013431507764646700234520ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh check_deps/check_deps.jl starpu-1.4.9+dfsg/julia/examples/cholesky/000077500000000000000000000000001507764646700205535ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/cholesky/cholesky.sh000077500000000000000000000015671507764646700227440ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh cholesky/cholesky_native.jl -quickcheck $(dirname $0)/../execute.sh cholesky/cholesky_implicit.jl -quickcheck $(dirname $0)/../execute.sh cholesky/cholesky_tag.jl -quickcheck starpu-1.4.9+dfsg/julia/examples/cholesky/cholesky_codelets.jl000066400000000000000000000030331507764646700246040ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # chol_model_potrf = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = "chol_model_potrf" ) chol_model_trsm = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = "chol_model_trsm" ) chol_model_gemm = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = "chol_model_gemm" ) cl_potrf = starpu_codelet( cpu_func = "potrf", cuda_func = "potrf", modes = [STARPU_RW], color = 0xffff00, perfmodel = chol_model_potrf ) cl_trsm = starpu_codelet( cpu_func = "trsm", cuda_func = "trsm", modes = [STARPU_R, STARPU_RW], color = 0x8080ff, perfmodel = chol_model_trsm ) cl_gemm = starpu_codelet( cpu_func = "gemm", cuda_func = "gemm", modes = [STARPU_R, STARPU_R, STARPU_RW], color = 0x00ff00, perfmodel = chol_model_gemm ) starpu-1.4.9+dfsg/julia/examples/cholesky/cholesky_common.jl000066400000000000000000000107531507764646700243010ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Standard kernels for the Cholesky factorization @target STARPU_CPU+STARPU_CUDA @codelet function potrf(sub11 :: Matrix{Float32}) :: Nothing nx :: Int32 = width(sub11) ld :: Int32 = ld(sub11) for z in 0:nx-1 lambda11 :: Float32 = sqrt(sub11[z+1,z+1]) sub11[z+1,z+1] = lambda11 alpha ::Float32 = 1.0f0 / lambda11 X :: Vector{Float32} = view(sub11, z+2:z+2+(nx-z-2), z+1) STARPU_SSCAL(nx-z-1, alpha, X, 1) alpha = -1.0f0 A :: Matrix{Float32} = view(sub11, z+2:z+2+(nx-z-2), z+2:z+2+(nx-z-2)) STARPU_SSYR("L", nx-z-1, alpha, X, 1, A, ld) end return end @target STARPU_CPU+STARPU_CUDA @codelet function trsm(sub11 :: Matrix{Float32}, sub21 :: Matrix{Float32}) :: Nothing ld11 :: Int32 = ld(sub11) ld21 :: Int32 = ld(sub21) nx21 :: Int32 = width(sub21) ny21 :: Int32 = height(sub21) alpha :: Float32 = 1.0f0 STARPU_STRSM("R", "L", "T", "N", nx21, ny21, alpha, sub11, ld11, sub21, ld21) return end @target STARPU_CPU+STARPU_CUDA @codelet function gemm(left :: Matrix{Float32}, right :: Matrix{Float32}, center :: Matrix{Float32}) :: Nothing dx :: Int32 = width(center) dy :: Int32 = height(center) dz :: Int32 = width(left) ld21 :: Int32 = ld(left) ld12 :: Int32 = ld(center) ld22 :: Int32 = ld(right) alpha :: Float32 = -1.0f0 beta :: Float32 = 1.0f0 STARPU_SGEMM("N", "T", dy, dx, dz, alpha, left, ld21, right, ld12, beta, center, ld22) return end @inline function tag_potrf(k) return starpu_tag_t((UInt64(1)<<60) | UInt64(k)) end @inline function tag_trsm(k, j) return starpu_tag_t((UInt64(3)<<60) | (UInt64(k)<<32) | UInt64(j)) end @inline function tag_gemm(k, i, j) return starpu_tag_t((UInt64(4)<<60) | (UInt64(k)<<32) | (UInt64(i)<<16) | UInt64(j)) end function check(mat::Matrix{Float32}) size_p = size(mat, 1) for i in 1:size_p for j in 1:size_p if j > i mat[i, j] = 0.0f0 end end end test_mat ::Matrix{Float32} = zeros(Float32, size_p, size_p) syrk!('L', 'N', 1.0f0, mat, 0.0f0, test_mat) for i in 1:size_p for j in 1:size_p if j <= i orig = (1.0f0/(1.0f0+(i-1)+(j-1))) + ((i == j) ? 1.0f0*size_p : 0.0f0) err = abs(test_mat[i,j] - orig) / orig if err > 0.0001 got = test_mat[i,j] expected = orig error("[$i, $j] -> $got != $expected (err $err)") end end end end println(stderr, "Verification successful !") end function clean_tags(nblocks) for k in 1:nblocks starpu_tag_remove(tag_potrf(k)) for m in k+1:nblocks starpu_tag_remove(tag_trsm(k, m)) for n in k+1:nblocks if n <= m starpu_tag_remove(tag_gemm(k, m, n)) end end end end end function main(size_p :: Int, nblocks :: Int; verify = false, verbose = false) mat :: Matrix{Float32} = zeros(Float32, size_p, size_p) # create a simple definite positive symetric matrix # Hilbert matrix h(i,j) = 1/(i+j+1) for i in 1:size_p for j in 1:size_p mat[i, j] = 1.0f0 / (1.0f0+(i-1)+(j-1)) + ((i == j) ? 1.0f0*size_p : 0.0f0) end end if verbose display(mat) end starpu_memory_pin(mat) t_start = time_ns() cholesky(mat, size_p, nblocks) t_end = time_ns() starpu_memory_unpin(mat) flop = (1.0*size_p*size_p*size_p)/3.0 time_ms = (t_end-t_start) / 1e6 gflops = flop/(time_ms*1000)/1000 println("$size_p\t$time_ms\t$gflops") clean_tags(nblocks) if verbose display(mat) end if verify check(mat) end end starpu-1.4.9+dfsg/julia/examples/cholesky/cholesky_implicit.jl000066400000000000000000000041451507764646700246210ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using StarPU using LinearAlgebra.BLAS include("cholesky_common.jl") function cholesky(mat :: Matrix{Float32}, size, nblocks) include("cholesky_codelets.jl") horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nblocks) vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nblocks) @starpu_block let h_mat = starpu_data_register(mat) starpu_data_map_filters(h_mat, horiz, vert) for k in 1:nblocks starpu_iteration_push(k) starpu_task_insert(cl = cl_potrf, handles = [h_mat[k, k]], tag_only = tag11(k)) for m in k+1:nblocks starpu_task_insert(cl = cl_trsm, handles = [h_mat[k, k], h_mat[m, k]], tag_only = tag_trsm(m, k)) end starpu_data_wont_use(h_mat[k, k]) for m in k+1:nblocks for n in k+1:nblocks if n <= m starpu_task_insert(cl = cl_gemm, handles = [h_mat[m, k], h_mat[n, k], h_mat[m, n]], tag_only= tag_gemm(k, m, n)) end end starpu_data_wont_use(h_mat[m, k]) end starpu_iteration_pop() end starpu_task_wait_for_all() end end starpu_init() starpu_cublas_init() println("# size\tms\tGFlops") if length(ARGS) > 0 && ARGS[1] == "-quickcheck" main(1024, 8, verify = true) else for size in 1024:1024:15360 main(size, 16) end end starpu_shutdown() starpu-1.4.9+dfsg/julia/examples/cholesky/cholesky_native.jl000066400000000000000000000045571507764646700243040ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using LinearAlgebra function check(mat::Matrix{Float32}) size_p = size(mat, 1) for i in 1:size_p for j in 1:size_p if j < i mat[i, j] = 0.0f0 end end end test_mat ::Matrix{Float32} = zeros(Float32, size_p, size_p) BLAS.syrk!('L', 'T', 1.0f0, mat, 0.0f0, test_mat) for i in 1:size_p for j in 1:size_p if j <= i orig = (1.0f0/(1.0f0+(i-1)+(j-1))) + ((i == j) ? 1.0f0*size_p : 0.0f0) err = abs(test_mat[i,j] - orig) / orig if err > 0.0001 got = test_mat[i,j] expected = orig error("[$i, $j] -> $got != $expected (err $err)") end end end end println(stderr, "Verification successful !") end function main(size_p :: Int; verify = false, verbose = false) mat = zeros(Float32, size_p, size_p) # create a simple definite positive symetric matrix # Hilbert matrix h(i,j) = 1/(i+j+1) for i in 1:size_p for j in 1:size_p mat[i, j] = 1.0f0 / (1.0f0+(i-1)+(j-1)) + ((i == j) ? 1.0f0*size_p : 0.0f0) end end if verbose display(mat) end t_start = time_ns() cholesky!(mat) t_end = time_ns() flop = (1.0*size_p*size_p*size_p)/3.0 time_ms = (t_end-t_start) / 1e6 gflops = flop/(time_ms*1000)/1000 println("$size_p\t$time_ms\t$gflops") if verbose display(mat) end if verify check(mat) end end println("# size\tms\tGFlops") if length(ARGS) > 0 && ARGS[1] == "-quickcheck" main(1024, verify = true) else for size in 1024:1024:15360 main(size) end end starpu-1.4.9+dfsg/julia/examples/cholesky/cholesky_tag.jl000066400000000000000000000061021507764646700235550ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using StarPU using LinearAlgebra.BLAS include("cholesky_common.jl") function cholesky(mat :: Matrix{Float32}, size, nblocks) include("cholesky_codelets.jl") horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nblocks) vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nblocks) @starpu_block let h_mat = starpu_data_register(mat) starpu_data_set_sequential_consistency_flag(h_mat, 0) starpu_data_map_filters(h_mat, horiz, vert) entry_task = starpu_task(cl = cl_potrf, handles = [h_mat[1, 1]], tag = tag_potrf(1)) for k in 1:nblocks starpu_iteration_push(k) if k > 1 # enforce dependencies... starpu_tag_declare_deps(tag_potrf(k), tag_gemm(k-1, k, k)) starpu_task_insert(cl = cl_potrf, handles = [h_mat[k, k]], tag = tag_potrf(k)) end for m in k+1:nblocks # enforce dependencies... if k > 1 starpu_tag_declare_deps(tag_trsm(k, m), tag_potrf(k), tag_gemm(k-1, m, k)) else starpu_tag_declare_deps(tag_trsm(k, m), tag_potrf(k)) end starpu_task_insert(cl = cl_trsm, handles = [h_mat[k, k], h_mat[m, k]], tag = tag_trsm(k, m)) for n in k+1:nblocks if n <= m # enforce dependencies... if k > 1 starpu_tag_declare_deps(tag_gemm(k, m, n), tag_gemm(k-1, m, n), tag_trsm(k, n), tag_trsm(k, m)) else starpu_tag_declare_deps(tag_gemm(k, m, n), tag_trsm(k, n), tag_trsm(k, m)) end starpu_task_insert(cl = cl_gemm, handles = [h_mat[m, k], h_mat[n, k], h_mat[m, n]], tag = tag_gemm(k, m, n)) end end end starpu_iteration_pop() end starpu_task_submit(entry_task) starpu_tag_wait(tag_potrf(nblocks)) end end starpu_init() starpu_cublas_init() println("# size\tms\tGFlops") if length(ARGS) > 0 && ARGS[1] == "-quickcheck" main(1024, 8, verify = true) else for size in 1024:1024:15360 main(size, 16) end end starpu_shutdown() starpu-1.4.9+dfsg/julia/examples/dependency/000077500000000000000000000000001507764646700210505ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/dependency/end_dep.jl000066400000000000000000000056211507764646700230010ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using StarPU @target STARPU_CPU @codelet function codeletA() :: Nothing # print("[Task A] Value = ", val[]); # do nothing end @target STARPU_CPU @codelet function codeletB(val ::Ref{Int32}) :: Nothing # println("[Task B] Value = ", val[]); val[] = val[] *2 end function callbackB(task) sleep(1) starpu_task_end_dep_release(task) end @target STARPU_CPU @codelet function codeletC(val ::Ref{Int32}) :: Nothing # println("[Task C] Value = ", val[]); val[] = val[] *2 end function callbackC(task) starpu_task_end_dep_release(task) end function main() value = Ref(Int32(12)) @starpu_block let perfmodel = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = "history_perf" ) clA = starpu_codelet( cpu_func = "codeletA", perfmodel = perfmodel ) clB = starpu_codelet( cpu_func = "codeletB", modes = [STARPU_RW], perfmodel = perfmodel ) clC = starpu_codelet( cpu_func = "codeletC", modes = [STARPU_RW], perfmodel = perfmodel ) handle = starpu_data_register(value) starpu_data_set_sequential_consistency_flag(handle, 0) taskA = starpu_task(cl = clA, detach=0) taskB = starpu_task(cl = clB, handles = [handle], callback=callbackB, callback_arg=taskA) taskC = starpu_task(cl = clC, handles = [handle], callback=callbackC, callback_arg=taskA) starpu_task_end_dep_add(taskA, 2) starpu_task_declare_deps(taskC, taskB) starpu_task_submit(taskA) starpu_task_submit(taskB) starpu_task_submit(taskC) starpu_task_wait(taskA) starpu_data_acquire_on_node(handle, STARPU_MAIN_RAM, STARPU_R); # Waiting for taskA should have also waited for taskB and taskC if value[] != 48 error("Incorrect value $(value[]) (expected 48)") end starpu_data_release_on_node(handle, STARPU_MAIN_RAM); end println("Value = ", value[]) end # Disable garbage collector because of random segfault/hang when using mutex. # This issue should be solved with Julia release 1.5. GC.enable(false) starpu_init() main() starpu_shutdown() GC.enable(true) starpu-1.4.9+dfsg/julia/examples/dependency/end_dep.sh000077500000000000000000000013361507764646700230100ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh dependency/end_dep.jl starpu-1.4.9+dfsg/julia/examples/dependency/tag_dep.jl000066400000000000000000000070071507764646700230060ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using StarPU @target STARPU_CPU @codelet function codeletA(val ::Ref{Int32}) :: Nothing # print("[Task A] Value = ", val[]); val[] = val[] * 2 end function callbackA(arg) clB = arg[1] handle = arg[2] tagHoldC = arg[3] taskB = starpu_task(cl = clB, handles = [handle], callback = starpu_tag_notify_from_apps, callback_arg = tagHoldC, sequential_consistency=false) starpu_task_submit(taskB) end @target STARPU_CPU @codelet function codeletB(val ::Ref{Int32}) :: Nothing # println("[Task B] Value = ", val[]); val[] = val[] +1 end @target STARPU_CPU @codelet function codeletC(val ::Ref{Int32}) :: Nothing # println("[Task C] Value = ", val[]); val[] = val[] *2 end # Submit taskA and hold it # Submit taskC and hold it # Release taskA # Execute taskA --> callback: submit taskB # Execute taskB --> callback: release taskC # # All three tasks use the same data in RW, taskB is submitted after # taskC, so taskB should normally only execute after taskC but as the # sequential consistency for (taskB, data) is unset, taskB can # execute straightaway function main() value = Ref(Int32(12)) @starpu_block let tagHoldA :: starpu_tag_t = 32 tagHoldC :: starpu_tag_t = 84 tagA :: starpu_tag_t = 421 tagC :: starpu_tag_t = 842 starpu_tag_declare_deps(tagA, tagHoldA) starpu_tag_declare_deps(tagC, tagHoldC) perfmodel = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = "history_perf" ) clA = starpu_codelet( cpu_func = "codeletA", modes = [STARPU_RW], perfmodel = perfmodel ) clB = starpu_codelet( cpu_func = "codeletB", modes = [STARPU_RW], perfmodel = perfmodel ) clC = starpu_codelet( cpu_func = "codeletC", modes = [STARPU_RW], perfmodel = perfmodel ) handle = starpu_data_register(value) taskA = starpu_task(cl = clA, handles = [handle], tag = tagA, callback = callbackA, callback_arg=(clB, handle, tagHoldC)) starpu_task_submit(taskA) taskC = starpu_task(cl = clC, handles = [handle], tag = tagC) starpu_task_submit(taskC) # Release taskA (we want to make sure it will execute after taskC has been submitted) starpu_tag_notify_from_apps(tagHoldA) starpu_task_wait_for_all() end if value[] != 50 error("Incorrect value $(value[]) (expected 50)") end println("Value = ", value[]) end # Disable garbage collector because of random segfault/hang when using mutex. # This issue should be solved with Julia release 1.5. GC.enable(false) starpu_init() main() starpu_shutdown() GC.enable(true) starpu-1.4.9+dfsg/julia/examples/dependency/tag_dep.sh000077500000000000000000000013361507764646700230150ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh dependency/tag_dep.jl starpu-1.4.9+dfsg/julia/examples/dependency/task_dep.jl000066400000000000000000000046361507764646700232020ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using StarPU @target STARPU_CPU @codelet function codeletA(val ::Ref{Int32}) :: Nothing # print("[Task A] Value = ", val[]); val[] = val[] * 2 end @target STARPU_CPU @codelet function codeletB(val ::Ref{Int32}) :: Nothing # println("[Task B] Value = ", val[]); val[] = val[] +1 end @target STARPU_CPU @codelet function codeletC(val ::Ref{Int32}) :: Nothing # println("[Task C] Value = ", val[]); val[] = val[] *2 end function main() value = Ref(Int32(12)) @starpu_block let perfmodel = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = "history_perf" ) clA = starpu_codelet( cpu_func = "codeletA", modes = [STARPU_RW], perfmodel = perfmodel ) clB = starpu_codelet( cpu_func = "codeletB", modes = [STARPU_RW], perfmodel = perfmodel ) clC = starpu_codelet( cpu_func = "codeletC", modes = [STARPU_RW], perfmodel = perfmodel ) starpu_data_set_default_sequential_consistency_flag(0) handle = starpu_data_register(value) taskA = starpu_task(cl = clA, handles = [handle]) taskB = starpu_task(cl = clB, handles = [handle]) taskC = starpu_task(cl = clC, handles = [handle]) starpu_task_declare_deps(taskA, taskB) starpu_task_declare_deps(taskC, taskA, taskB) starpu_task_submit(taskA) starpu_task_submit(taskB) starpu_task_submit(taskC) starpu_task_wait_for_all() end if value[] != 52 error("Incorrect value $(value[]) (expected 52)") end println("Value = ", value[]) end starpu_init() main() starpu_shutdown() starpu-1.4.9+dfsg/julia/examples/dependency/task_dep.sh000077500000000000000000000013371507764646700232050ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh dependency/task_dep.jl starpu-1.4.9+dfsg/julia/examples/execute.sh.in000077500000000000000000000031751507764646700213460ustar00rootroot00000000000000#!@REALBASH@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set -x export JULIA_LOAD_PATH=@STARPU_SRC_DIR@/julia/src:$JULIA_LOAD_PATH export STARPU_BUILD_DIR=@STARPU_BUILD_DIR@ export STARPU_SRC_DIR=@STARPU_SRC_DIR@ export STARPU_JULIA_LIB=@STARPU_BUILD_DIR@/julia/src/.libs/libstarpujulia-1.3 export STARPU_JULIA_BUILD=@STARPU_BUILD_DIR@/julia export LD_LIBRARY_PATH=@STARPU_BUILD_DIR@/julia/src/.libs/:$LD_LIBRARY_PATH export JULIA_NUM_THREADS=8 export STARPU_NOPENCL=0 export STARPU_SCHED=dmda srcdir=@STARPU_SRC_DIR@/julia/examples rm -f genc*.c gencuda*.cu genc*.o if test "$1" == "-calllib" then shift pwd rm -f extern_tasks.so make -f @STARPU_BUILD_DIR@/julia/src/dynamic_compiler/Makefile extern_tasks.so SOURCES_CPU=$srcdir/$1 shift export JULIA_TASK_LIB=$PWD/extern_tasks.so fi srcfile=$1 if test ! -f $srcdir/$srcfile then echo "Error. File $srcdir/$srcfile not found" exit 1 fi shift #cd $srcdir/$(dirname $srcfile) #exec @JULIA@ $(basename $srcfile) $* exec @JULIA@ $srcdir/$srcfile $* starpu-1.4.9+dfsg/julia/examples/gemm/000077500000000000000000000000001507764646700176575ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/gemm/gemm.jl000066400000000000000000000103111507764646700211270ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using StarPU using LinearAlgebra.BLAS @target STARPU_CPU+STARPU_CUDA @codelet function gemm(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, alpha :: Float32, beta :: Float32) :: Nothing M :: Int32 = height(A) N :: Int32 = width(B) K :: Int32 = width(A) lda :: Int32 = ld(A) ldb :: Int32 = ld(B) ldc :: Int32 = ld(C) STARPU_SGEMM("N", "N", M, N, K, alpha, A, lda, B, ldb, beta, C, ldc) return end function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, alpha :: Float32, beta :: Float32, nslicesx, nslicesy) scale= 3 tmin=0 vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx) horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nslicesy) @starpu_block let hA,hB,hC = starpu_data_register(A, B, C) starpu_data_partition(hB, vert) starpu_data_partition(hA, horiz) starpu_data_map_filters(hC, vert, horiz) tmin=0 for i in (1 : 10 ) t=time_ns() @starpu_sync_tasks begin for taskx in (1 : nslicesx) for tasky in (1 : nslicesy) starpu_task_insert(codelet_name = "gemm", handles = [hA[tasky], hB[taskx], hC[taskx, tasky]], cl_arg = (alpha, beta), modes = [STARPU_R, STARPU_R, STARPU_RW]) end end end t=time_ns()-t if (tmin==0 || tmin>t) tmin=t end end end return tmin end function approximately_equals( A :: Matrix{Cfloat}, B :: Matrix{Cfloat}, eps = 1e-2 ) (height, width) = size(A) for j in (1 : width) for i in (1 : height) if (abs(A[i,j] - B[i,j]) > eps * max(abs(B[i,j]), abs(A[i,j]))) println("A[$i,$j] : $(A[i,j]), B[$i,$j] : $(B[i,j])") return false end end end return true end function check(expected, A, B, C, alpha, beta) for i in 1 : 10 gemm!('N', 'N', alpha, A, B, beta, expected) end height,width = size(C) for i in 1:height for j in 1:width got = C[i, j] exp = expected[i, j] err = abs(exp - got) / exp if err > 0.0001 error("[$i] -> $got != $exp (err $err)") end end end end function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy) for dim in (start_dim : step_dim : stop_dim) A = Array(rand(Cfloat, dim, dim)) B = Array(rand(Cfloat, dim, dim)) C = zeros(Float32, dim, dim) C_ref = copy(C) starpu_memory_pin(A) starpu_memory_pin(B) starpu_memory_pin(C) alpha = 4.0f0 beta = 2.0f0 mt = multiply_with_starpu(A, B, C, alpha, beta, nslicesx, nslicesy) gflop = 2 * dim * dim * dim * 1.e-9 gflops = gflop / (mt * 1.e-9) size=dim*dim*dim*4*3/1024/1024 println(io,"$dim $gflops") println("$dim $gflops") starpu_memory_unpin(A) starpu_memory_unpin(B) starpu_memory_unpin(C) check(C_ref, A, B, C, alpha, beta) end end if size(ARGS, 1) < 1 filename="x.dat" else filename=ARGS[1] end starpu_init() starpu_cublas_init() nblock_x = Int32(ceil(sqrt(starpu_worker_get_count()))) nblock_y = nblock_x io=open(filename,"w") compute_times(io,64,512,4096,nblock_x,nblock_y) close(io) starpu_shutdown() starpu-1.4.9+dfsg/julia/examples/gemm/gemm.sh000077500000000000000000000014401507764646700211420ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh gemm/gemm_native.jl export OMP_NUM_THREADS=1 $(dirname $0)/../execute.sh gemm/gemm.jl starpu-1.4.9+dfsg/julia/examples/gemm/gemm_native.jl000066400000000000000000000032531507764646700225040ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using LinearAlgebra.BLAS function gemm_without_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, alpha :: Float32, beta :: Float32) tmin = 0 for i in (1 : 10 ) t=time_ns() gemm!('N', 'N', alpha, A, B, beta, C) t=time_ns() - t if (tmin==0 || tmin>t) tmin=t end end return tmin end function compute_times(io,start_dim, step_dim, stop_dim) for dim in (start_dim : step_dim : stop_dim) A = Array(rand(Cfloat, dim, dim)) B = Array(rand(Cfloat, dim, dim)) C = zeros(Float32, dim, dim) alpha = 4.0f0 beta = 2.0f0 mt = gemm_without_starpu(A, B, C, alpha, beta) gflop = 2 * dim * dim * dim * 1.e-9 gflops = gflop / (mt * 1.e-9) size=dim*dim*dim*4*3/1024/1024 println(io,"$dim $gflops") println("$dim $gflops") end end if size(ARGS, 1) < 1 filename="x.dat" else filename=ARGS[1] end io=open(filename,"w") compute_times(io,64,512,4096) close(io) starpu-1.4.9+dfsg/julia/examples/loader.c000066400000000000000000000274611507764646700203560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/julia/examples/mandelbrot/000077500000000000000000000000001507764646700210615ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/mandelbrot/cpu_mandelbrot.c000066400000000000000000000046541507764646700242340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "cpu_mandelbrot.h" void cpu_mandelbrot(void *descr[], void *cl_arg) { long long *pixels; pixels = (long long int *)STARPU_MATRIX_GET_PTR(descr[0]); struct params *params = (struct params *) cl_arg; long width = STARPU_MATRIX_GET_NY(descr[0]); long height = STARPU_MATRIX_GET_NX(descr[0]); double zoom = width * 0.25296875; double iz = 1. / zoom; float diverge = 4.0; float max_iterations = (width/2) * 0.049715909 * log10(zoom); float imi = 1. / max_iterations; double centerr = params->centerr; double centeri = params->centeri; long offset = params->offset; long dim = params->dim; double cr = 0; double zr = 0; double ci = 0; double zi = 0; long n = 0; double tmp = 0; int ldP = STARPU_MATRIX_GET_LD(descr[0]); long long x,y; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { cr = centerr + (x - (dim/2)) * iz; zr = cr; ci = centeri + (y+offset - (dim/2)) * iz; zi = ci; for (n = 0; n <= max_iterations; n++) { if (zr*zr + zi*zi>diverge) break; tmp = zr*zr - zi*zi + cr; zi = 2*zr*zi + ci; zr = tmp; } if (n #include #include #include "cpu_mandelbrot.h" void cpu_mandelbrot(void **, void *); void gpu_mandelbrot(void **, void *); static struct starpu_perfmodel model = { .type = STARPU_HISTORY_BASED, .symbol = "history_perf" }; static struct starpu_codelet cl = { .cpu_funcs = {cpu_mandelbrot}, //.cuda_funcs = {gpu_mandelbrot}, .nbuffers = 1, .modes = {STARPU_W}, .model = &model }; void mandelbrot_with_starpu(long long *pixels, struct params *p, long long dim, long long nslicesx) { starpu_data_handle_t pixels_handle; starpu_matrix_data_register(&pixels_handle, STARPU_MAIN_RAM, (uintptr_t)pixels, dim, dim, dim, sizeof(long long)); struct starpu_data_filter horiz = { .filter_func = starpu_matrix_filter_block, .nchildren = nslicesx }; starpu_data_partition(pixels_handle, &horiz); long long taskx; for (taskx = 0; taskx < nslicesx; taskx++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = starpu_data_get_child(pixels_handle, taskx); task->cl_arg = p; task->cl_arg_size = sizeof(*p); if (starpu_task_submit(task)!=0) fprintf(stderr,"submit task error\n"); } starpu_task_wait_for_all(); starpu_data_unpartition(pixels_handle, STARPU_MAIN_RAM); starpu_data_unregister(pixels_handle); } void pixels2img(long long *pixels, long long width, long long height, const char *filename) { FILE *fp = fopen(filename, "w"); if (!fp) return; int MAPPING[16][3] = {{66,30,15},{25,7,26},{9,1,47},{4,4,73},{0,7,100},{12,44,138},{24,82,177},{57,125,209},{134,181,229},{211,236,248},{241,233,191},{248,201,95},{255,170,0},{204,128,0},{153,87,0},{106,52,3}}; fprintf(fp, "P3\n%lld %lld\n255\n", width, height); long long i, j; for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { fprintf(fp, "%d %d %d ", MAPPING[pixels[j*width+i]][0], MAPPING[pixels[j*width+i]][1], MAPPING[pixels[j*width+i]][2]); } } fclose(fp); } double min_times(double cr, double ci, long long dim, long long nslices, int gen_images) { long long *pixels = calloc(dim*dim, sizeof(long long)); struct params *p = calloc(nslices, sizeof(struct params)); double t_min = 0; long long i; for (i=0; iexec_t) t_min = exec_t; } if (gen_images == 1) { char filename[64]; snprintf(filename, 64, "out%lld.ppm", dim); pixels2img(pixels,dim,dim,filename); } free(pixels); free(p); return t_min; } void display_times(double cr, double ci, long long start_dim, long long step_dim, long long stop_dim, long long nslices, int gen_images) { long long dim; for (dim = start_dim; dim <= stop_dim; dim += step_dim) { printf("Dimension: %lld...\n", dim); double res = min_times(cr, ci, dim, nslices, gen_images); res = res / dim / dim; // time per pixel printf("%lld %lf\n", dim, res); } } int main(int argc, char **argv) { double cr, ci; long long start_dim, step_dim, stop_dim, nslices; int gen_images; if (argc != 8) { printf("Usage: %s cr ci start_dim step_dim stop_dim nslices(must divide dims) gen_images. Using default parameters\n", argv[0]); cr = -0.800671; ci = -0.158392; start_dim = 32; step_dim = 32; stop_dim = 512; nslices = 4; gen_images = 0; } else { cr = (float) atof(argv[1]); ci = (float) atof(argv[2]); start_dim = atoll(argv[3]); step_dim = atoll(argv[4]); stop_dim = atoll(argv[5]); nslices = atoll(argv[6]); gen_images = atoi(argv[7]); } if (starpu_init(NULL) != EXIT_SUCCESS) { fprintf(stderr, "ERROR\n"); return 77; } display_times(cr, ci, start_dim, step_dim, stop_dim, nslices, gen_images); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/julia/examples/mandelbrot/mandelbrot.jl000066400000000000000000000077421507764646700235510ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import Libdl using StarPU using LinearAlgebra @target STARPU_CPU+STARPU_CUDA @codelet function mandelbrot(pixels ::Matrix{Int64}, centerr ::Float64, centeri ::Float64, offset ::Int64, dim ::Int64 ) :: Nothing height :: Int64 = height(pixels) width :: Int64 = width(pixels) zoom :: Float64 = width * 0.25296875 iz :: Float64 = 1. / zoom diverge :: Float32 = 4.0 max_iterations :: Float32 = ((width/2) * 0.049715909 * log10(zoom)); imi :: Float32 = 1. / max_iterations cr :: Float64 = 0. zr :: Float64 = 0. ci :: Float64 = 0. zi :: Float64 = 0. n :: Int64 = 0 tmp :: Float64 = 0. @parallel for y = 1:height for x = 1:width cr = centerr + (x-1 - (dim / 2)) * iz zr = cr ci = centeri + (y-1+offset - (dim / 2)) * iz zi = ci max_it :: Float64 = max_iterations n = 0 for i = 0:max_it n = i if (zr*zr + zi*zi > diverge) break end tmp = zr*zr - zi*zi + cr zi = 2*zr*zi + ci zr = tmp end if (n < max_iterations) pixels[y,x] = round(15 * n * imi) else pixels[y,x] = 0 end end end return end starpu_init() function mandelbrot_with_starpu(A ::Matrix{Int64}, cr ::Float64, ci ::Float64, dim ::Int64, nslicesx ::Int64) horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nslicesx) @starpu_block let hA = starpu_data_register(A) starpu_data_partition(hA,horiz) @starpu_sync_tasks for taskx in (1 : nslicesx) starpu_task_insert(codelet_name = "mandelbrot", handles = [hA[taskx]], modes = [STARPU_W], cl_arg = (cr, ci, Int64((taskx-1)*dim/nslicesx), dim)) end end end function pixels2img(pixels ::Matrix{Int64}, width ::Int64, height ::Int64, filename ::String) MAPPING = [[66,30,15],[25,7,26],[9,1,47],[4,4,73],[0,7,100],[12,44,138],[24,82,177],[57,125,209],[134,181,229],[211,236,248],[241,233,191],[248,201,95],[255,170,0],[204,128,0],[153,87,0],[106,52,3]] open(filename, "w") do f write(f, "P3\n$width $height\n255\n") for i = 1:height for j = 1:width write(f,"$(MAPPING[1+pixels[i,j]][1]) $(MAPPING[1+pixels[i,j]][2]) $(MAPPING[1+pixels[i,j]][3]) ") end write(f, "\n") end end end function min_times(cr ::Float64, ci ::Float64, dim ::Int64, nslices ::Int64, gen_images) tmin=0; pixels ::Matrix{Int64} = zeros(dim, dim) for i = 1:10 t = time_ns(); mandelbrot_with_starpu(pixels, cr, ci, dim, nslices) t = time_ns()-t if (tmin==0 || tmin>t) tmin=t end end if (gen_images == 1) pixels2img(pixels,dim,dim,"out$(dim).ppm") end return tmin end function display_time(cr ::Float64, ci ::Float64, start_dim ::Int64, step_dim ::Int64, stop_dim ::Int64, nslices ::Int64, gen_images) for dim in (start_dim : step_dim : stop_dim) res = min_times(cr, ci, dim, nslices, gen_images) res=res/dim/dim; # time per pixel println("$(dim) $(res)") end end display_time(-0.800671,-0.158392,32,32,512,4, 0) starpu_shutdown() starpu-1.4.9+dfsg/julia/examples/mandelbrot/mandelbrot.sh000077500000000000000000000015701507764646700235520ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh mandelbrot/mandelbrot.jl $(dirname $0)/../execute.sh mandelbrot/mandelbrot_native.jl $(dirname $0)/../execute.sh -calllib mandelbrot/cpu_mandelbrot.c mandelbrot/mandelbrot.jl starpu-1.4.9+dfsg/julia/examples/mandelbrot/mandelbrot_native.jl000066400000000000000000000072441507764646700251140ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using LinearAlgebra function mandelbrot(pixels, centerr ::Float64, centeri ::Float64, offset ::Int64, dim ::Int64) :: Nothing height :: Int64, width :: Int64 = size(pixels) zoom :: Float64 = width * 0.25296875 iz :: Float64 = 1. / zoom diverge :: Float32 = 4.0 max_iterations :: Float32 = ((width/2) * 0.049715909 * log10(zoom)); imi :: Float64 = 1. / max_iterations cr :: Float64 = 0. zr :: Float64 = 0. ci :: Float64 = 0. zi :: Float64 = 0. n :: Int64 = 0 tmp :: Float64 = 0. for y = 1:height for x = 1:width cr = centerr + (x-1 - (dim / 2)) * iz zr = cr ci = centeri + (y-1+offset - (dim / 2)) * iz zi = ci n = 0 for i = 0:max_iterations n = i if (zr*zr + zi*zi > diverge) break end tmp = zr*zr - zi*zi + cr zi = 2*zr*zi + ci zr = tmp end if (n < max_iterations) pixels[y,x] = round(15 * n * imi) else pixels[y,x] = 0 end end end return end function mandelbrot_without_starpu(A ::Matrix{Int64}, cr ::Float64, ci ::Float64, dim ::Int64, nslicesx ::Int64) width,height = size(A) step = height / nslicesx for taskx in (1 : nslicesx) start_id = floor(Int64, (taskx-1)*step+1) end_id = floor(Int64, (taskx-1)*step+step) a = view(A, start_id:end_id, :) offset ::Int64 = (taskx-1)*dim/nslicesx mandelbrot(a, cr, ci, offset, dim) end end function pixels2img(pixels ::Matrix{Int64}, width ::Int64, height ::Int64, filename ::String) MAPPING = [[66,30,15],[25,7,26],[9,1,47],[4,4,73],[0,7,100],[12,44,138],[24,82,177],[57,125,209],[134,181,229],[211,236,248],[241,233,191],[248,201,95],[255,170,0],[204,128,0],[153,87,0],[106,52,3]] open(filename, "w") do f write(f, "P3\n$width $height\n255\n") for i = 1:height for j = 1:width write(f,"$(MAPPING[1+pixels[i,j]][1]) $(MAPPING[1+pixels[i,j]][2]) $(MAPPING[1+pixels[i,j]][3]) ") end write(f, "\n") end end end function min_times(cr ::Float64, ci ::Float64, dim ::Int64, nslices ::Int64, gen_images) tmin=0; pixels ::Matrix{Int64} = zeros(dim, dim) for i = 1:10 t = time_ns(); mandelbrot_without_starpu(pixels, cr, ci, dim, nslices) t = time_ns()-t if (tmin==0 || tmin>t) tmin=t end end if (gen_images == 1) pixels2img(pixels,dim,dim,"out$(dim).ppm") end return tmin end function display_time(cr ::Float64, ci ::Float64, start_dim ::Int64, step_dim ::Int64, stop_dim ::Int64, nslices ::Int64, gen_images) for dim in (start_dim : step_dim : stop_dim) res = min_times(cr, ci, dim, nslices, gen_images) res=res/dim/dim; # time per pixel println("$(dim) $(res)") end end display_time(-0.800671,-0.158392,32,32,512,4, 0) starpu-1.4.9+dfsg/julia/examples/mult/000077500000000000000000000000001507764646700177135ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/mult/cpu_mult.c000066400000000000000000000070151507764646700217120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2018-2018 Alexis Juven * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include /* * The codelet is passed 3 matrices, the "descr" union-type field gives a * description of the layout of those 3 matrices in the local memory (ie. RAM * in the case of CPU, GPU frame buffer in the case of GPU etc.). Since we have * registered data with the "matrix" data interface, we use the matrix macros. */ void cpu_mult(void *descr[], void *cl_arg) { int stride; float *subA, *subB, *subC; stride = *((int *)cl_arg); /* .blas.ptr gives a pointer to the first element of the local copy */ subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]); subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]); /* .blas.nx is the number of rows (consecutive elements) and .blas.ny * is the number of lines that are separated by .blas.ld elements (ld * stands for leading dimension). * NB: in case some filters were used, the leading dimension is not * guaranteed to be the same in main memory (on the original matrix) * and on the accelerator! */ const uint32_t nxC = STARPU_MATRIX_GET_NX(descr[2]); const uint32_t nyC = STARPU_MATRIX_GET_NY(descr[2]); const uint32_t nyA = STARPU_MATRIX_GET_NY(descr[0]); const uint32_t ldA = STARPU_MATRIX_GET_LD(descr[0]); const uint32_t ldB = STARPU_MATRIX_GET_LD(descr[1]); const uint32_t ldC = STARPU_MATRIX_GET_LD(descr[2]); /* we assume a FORTRAN-ordering! */ int i,j,k,ii,jj,kk; for (i = 0; i < nyC*nxC; i++) subC[i] = 0; //fprintf(stderr,"inside cpu_mult %dx%dx%d %d/%d on %d\n",nyC,nyA,nxC,starpu_worker_get_id(),STARPU_NMAXWORKERS,starpu_worker_get_devid(starpu_worker_get_id())); for (i=0;i #include #include #include #include /* * That program should compute C = A * B * * A of size (z,y) * B of size (x,z) * C of size (x,y) |---------------| z | B | |---------------| z x |----| |---------------| | | | | | | | | | A | y | C | | | | | | | | | |----| |---------------| */ //void gpu_mult(void **, void *); void cpu_mult(void **, void *); static struct starpu_perfmodel model = { .type = STARPU_HISTORY_BASED, .symbol = "history_perf" }; static struct starpu_codelet cl = { .cpu_funcs = {cpu_mult}, .cpu_funcs_name = {"cpu_mult"}, //.cuda_funcs = {gpu_mult}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .model = &model }; void multiply_with_starpu(float *A, float *B, float *C, unsigned xdim, unsigned ydim, unsigned zdim, unsigned nslicesx, unsigned nslicesy, int stride) { starpu_data_handle_t A_handle, B_handle, C_handle; starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, ydim, ydim, zdim, sizeof(float)); starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, zdim, zdim, xdim, sizeof(float)); starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, ydim, ydim, xdim, sizeof(float)); struct starpu_data_filter vert = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nslicesx }; struct starpu_data_filter horiz = { .filter_func = starpu_matrix_filter_block, .nchildren = nslicesy }; starpu_data_partition(B_handle, &vert); starpu_data_partition(A_handle, &horiz); starpu_data_map_filters(C_handle, 2, &vert, &horiz); unsigned taskx, tasky; for (taskx = 0; taskx < nslicesx; taskx++) { for (tasky = 0; tasky < nslicesy; tasky++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = starpu_data_get_sub_data(A_handle, 1, tasky); task->handles[1] = starpu_data_get_sub_data(B_handle, 1, taskx); task->handles[2] = starpu_data_get_sub_data(C_handle, 2, taskx, tasky); task->cl_arg = &stride; task->cl_arg_size = sizeof(stride); int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } starpu_task_wait_for_all(); starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); starpu_data_unregister(C_handle); } void init_rand(float * m, unsigned width, unsigned height) { unsigned i,j; for (j = 0 ; j < height ; j++) { for (i = 0 ; i < width ; i++) { m[j+i*height] = (float)(starpu_drand48()); } } } void init_zero(float * m, unsigned width, unsigned height) { memset(m, 0, sizeof(float) * width * height); } double min_time(unsigned nb_test, unsigned xdim, unsigned ydim, unsigned zdim, unsigned nsclicesx, unsigned nsclicesy, int stride) { unsigned i; float * A = (float *) malloc(zdim*ydim*sizeof(float)); float * B = (float *) malloc(xdim*zdim*sizeof(float)); float * C = (float *) malloc(xdim*ydim*sizeof(float)); double exec_times=-1; for (i = 0 ; i < nb_test ; i++) { double start, stop, exec_t; init_rand(A, zdim, ydim); init_rand(B, xdim, zdim); init_zero(C, xdim, ydim); start = starpu_timing_now(); multiply_with_starpu(A, B, C, xdim, ydim, zdim, nsclicesx, nsclicesy, stride); stop = starpu_timing_now(); exec_t = (stop - start)*1.e3; // Put in ns instead of us if (exec_times<0 || exec_times>exec_t) exec_times= exec_t; } free(A); free(B); free(C); return exec_times; } void display_times(unsigned start_dim, unsigned step_dim, unsigned stop_dim, unsigned nb_tests, unsigned nsclicesx, unsigned nsclicesy, int stride) { unsigned dim; for (dim = start_dim ; dim <= stop_dim ; dim += step_dim) { double t = min_time(nb_tests, dim, dim, dim, nsclicesx, nsclicesy, stride); printf("%f %f\n", dim*dim*4.*3./1024./1024, (2.*dim-1.)*dim*dim/t); } } #define STRIDE_DEFAULT 8 int main(int argc, char * argv[]) { int stride=STRIDE_DEFAULT; if (argc >= 2) stride = atoi(argv[1]); if (stride % 4 != 0) { fprintf(stderr, "STRIDE must be a multiple of 4 (%d)\n", stride); return -1; } if (starpu_init(NULL) != EXIT_SUCCESS) { fprintf(stderr, "ERROR\n"); return 77; } unsigned start_dim = 16*stride; unsigned step_dim = 4*stride; unsigned stop_dim = 128*stride; unsigned nb_tests = 10; unsigned nsclicesx = 2; unsigned nsclicesy = 2; display_times(start_dim, step_dim, stop_dim, nb_tests, nsclicesx, nsclicesy, stride); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/julia/examples/mult/mult.jl000066400000000000000000000115201507764646700212220ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import Libdl using StarPU using LinearAlgebra @target STARPU_CPU+STARPU_CUDA @codelet function matrix_mult(m1 :: Matrix{Float32}, m2 :: Matrix{Float32}, m3 :: Matrix{Float32}, stride ::Int32) :: Nothing width_m2 :: Int32 = width(m2) height_m1 :: Int32 = height(m1) width_m1 :: Int32 = width(m1) # Naive version @parallel for j in (1 : width_m2) @parallel for i in (1 : height_m1) sum :: Float32 = 0. for k in (1 : width_m1) sum = sum + m1[i, k] * m2[k, j] end m3[i, j] = sum end end # ##### Tiled and unrolled version # for l in (1 : width_m2) # for m in (1 : height_m1) # m3[m,l] = 0 # end # end # @parallel for i in (1 : STRIDE : height_m1) # for k in (1 : STRIDE : width_m1 ) # for j in (1 : STRIDE : width_m2 ) # for kk in (k : 4 : k+STRIDE-1) # for jj in (j : 2 : j+STRIDE-1) # alpha00 :: Float32 =m2[kk,jj] # alpha01 :: Float32 =m2[kk,jj+1] # alpha10 :: Float32 =m2[kk+1,jj] # alpha11 :: Float32 =m2[kk+1,jj+1] # alpha20 :: Float32 =m2[kk+2,jj] # alpha21 :: Float32 =m2[kk+2,jj+1] # alpha30 :: Float32 =m2[kk+3,jj] # alpha31 :: Float32 =m2[kk+3,jj+1] # for ii in (i : 1 : i+STRIDE-1) # m3[ii, jj] = m3[ii, jj] + m1[ii, kk] * alpha00 + m1[ii, kk+1] * alpha10 + m1[ii, kk+2] * alpha20 + m1[ii,kk+3]*alpha30 # m3[ii, jj+1] = m3[ii, jj+1] + m1[ii, kk] * alpha01 + m1[ii, kk+1] * alpha11 + m1[ii, kk+2]*alpha21 + m1[ii,kk+3]*alpha31 # end # end # end # end # end # end return end starpu_init() function multiply_with_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy, stride) scale= 3 tmin=0 vert = starpu_data_filter(STARPU_MATRIX_FILTER_VERTICAL_BLOCK, nslicesx) horiz = starpu_data_filter(STARPU_MATRIX_FILTER_BLOCK, nslicesy) @starpu_block let hA,hB,hC = starpu_data_register(A, B, C) starpu_data_partition(hB, vert) starpu_data_partition(hA, horiz) starpu_data_map_filters(hC, vert, horiz) tmin=0 for i in (1 : 10 ) t=time_ns() @starpu_sync_tasks begin for taskx in (1 : nslicesx) for tasky in (1 : nslicesy) starpu_task_insert(codelet_name = "matrix_mult", modes = [STARPU_R, STARPU_R, STARPU_W], handles = [hA[tasky], hB[taskx], hC[taskx, tasky]], cl_arg = (Int32(stride),)) end end end t=time_ns()-t if (tmin==0 || tmin>t) tmin=t end end end return tmin end function check(A, B, C) expected = A * B height,width = size(C) for i in 1:height for j in 1:width got = C[i, j] exp = expected[i, j] err = abs(exp - got) / exp if err > 0.0001 error("[$i] -> $got != $exp (err $err)") end end end end function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy, stride) for dim in (start_dim : step_dim : stop_dim) A = Array(rand(Cfloat, dim, dim)) B = Array(rand(Cfloat, dim, dim)) C = zeros(Float32, dim, dim) mt = multiply_with_starpu(A, B, C, nslicesx, nslicesy, stride) flops = (2*dim-1)*dim*dim/mt size=dim*dim*4*3/1024/1024 println(io,"$size $flops") println("$size $flops") check(A, B, C) end end if size(ARGS, 1) < 2 stride=4 filename="x.dat" else stride=parse(Int, ARGS[1]) filename=ARGS[2] end io=open(filename,"w") compute_times(io,16*stride,4*stride,128*stride,2,2,stride) close(io) starpu_shutdown() starpu-1.4.9+dfsg/julia/examples/mult/mult_native.jl000066400000000000000000000032761507764646700226010ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import Libdl using StarPU using LinearAlgebra function multiply_without_starpu(A :: Matrix{Float32}, B :: Matrix{Float32}, C :: Matrix{Float32}, nslicesx, nslicesy, stride) tmin = 0 for i in (1 : 10 ) t=time_ns() C = A * B; t=time_ns() - t if (tmin==0 || tmin>t) tmin=t end end return tmin end function compute_times(io,start_dim, step_dim, stop_dim, nslicesx, nslicesy, stride) for dim in (start_dim : step_dim : stop_dim) A = Array(rand(Cfloat, dim, dim)) B = Array(rand(Cfloat, dim, dim)) C = zeros(Float32, dim, dim) mt = multiply_without_starpu(A, B, C, nslicesx, nslicesy, stride) flops = (2*dim-1)*dim*dim/mt size=dim*dim*4*3/1024/1024 println(io,"$size $flops") println("$size $flops") end end if size(ARGS, 1) < 2 stride=4 filename="x.dat" else stride=parse(Int, ARGS[1]) filename=ARGS[2] end io=open(filename,"w") compute_times(io,16*stride,4*stride,128*stride,2,2,stride) close(io) starpu-1.4.9+dfsg/julia/examples/mult/mult_starpu.sh000077500000000000000000000015111507764646700226270ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh mult/mult.jl $(dirname $0)/../execute.sh mult/mult_native.jl $(dirname $0)/../execute.sh -calllib mult/cpu_mult.c mult/mult.jl starpu-1.4.9+dfsg/julia/examples/mult/perf.sh000077500000000000000000000024331507764646700212100ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # stride=72 #stride=4 export STARPU_NOPENCL=0 export STARPU_SCHED=dmda export STARPU_CALIBRATE=1 rm -f ./cstarpu.dat julia_generatedc.dat julia_native.dat julia_calllib.dat $(dirname $0)/mult $stride > ./cstarpu.dat $(dirname $0)/../execute.sh mult/mult.jl $stride julia_generatedc.dat $(dirname $0)/../execute.sh mult/mult_native.jl $stride julia_native.dat $(dirname $0)/../execute.sh -calllib mult/cpu_mult.c mult/mult.jl $stride julia_calllib.dat ( cat < #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void func(void *descr[], void *_args) { int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); (void)_args; *x *= 2; } struct starpu_codelet mycodelet = { .modes = { STARPU_RW }, .cpu_funcs = {func}, .cpu_funcs_name = {"func"}, .nbuffers = 1 }; struct starpu_codelet mycodelet_color = { .modes = { STARPU_RW }, .cpu_funcs = {func}, .cpu_funcs_name = {"func"}, .nbuffers = 1, .color = 0x0000FF, }; int main(void) { int value=42; starpu_data_handle_t handle; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); // In the trace file, the following task should be green (executed on CPU) ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask", 0); if (STARPU_UNLIKELY(ret == -ENODEV)) { starpu_data_unregister(handle); goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); // In the trace file, the following task will be red as specified by STARPU_TASK_COLOR ret = starpu_task_insert(&mycodelet, STARPU_RW, handle, STARPU_NAME, "mytask", STARPU_TASK_COLOR, 0xFF0000, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); // In the trace file, the following task will be blue as specified by the field color of mycodelet_color ret = starpu_task_insert(&mycodelet_color, STARPU_RW, handle, STARPU_NAME, "mytask", 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: return 77; } starpu-1.4.9+dfsg/julia/examples/task_insert_color/task_insert_color.jl000066400000000000000000000040411507764646700265300ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import Libdl using StarPU @target STARPU_CPU @codelet function task_insert_color(val ::Ref{Int32}) :: Nothing val[] = val[] * 2 return end starpu_init() function task_insert_color_with_starpu(val ::Ref{Int32}) @starpu_block let hVal = starpu_data_register(val) perfmodel = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = "history_perf" ) cl1 = starpu_codelet( cpu_func = "task_insert_color", modes = [STARPU_RW], perfmodel = perfmodel ) cl2 = starpu_codelet( cpu_func = "task_insert_color", modes = [STARPU_RW], perfmodel = perfmodel, color = 0x0000FF ) @starpu_sync_tasks begin # In the trace file, the following task should be green (executed on CPU) starpu_task_submit(starpu_task(cl = cl1, handles = [hVal])) # In the trace file, the following task will be blue as specified by the field color of cl2 starpu_task_submit(starpu_task(cl = cl2, handles = [hVal])) # In the trace file, the following tasks will be red as specified in @starpu_async_cl @starpu_async_cl task_insert_color(hVal) [STARPU_RW] () 0xFF0000 end end end foo = Ref(convert(Int32, 42)) task_insert_color_with_starpu(foo) starpu_shutdown() starpu-1.4.9+dfsg/julia/examples/task_insert_color/task_insert_color.sh000077500000000000000000000013601507764646700265410ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh task_insert_color/task_insert_color.jl starpu-1.4.9+dfsg/julia/examples/variable/000077500000000000000000000000001507764646700205175ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/variable/variable.jl000066400000000000000000000024641507764646700226410ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import Libdl using StarPU @target STARPU_CPU @codelet function variable(val ::Ref{Float32}) :: Nothing val[] = val[] + 1 return end starpu_init() function variable_with_starpu(val ::Ref{Float32}, niter) @starpu_block let hVal = starpu_data_register(val) @starpu_sync_tasks for task in (1 : niter) @starpu_async_cl variable(hVal) [STARPU_RW] end end end function display(niter) foo = Ref(0.0f0) variable_with_starpu(foo, niter) println("variable -> ", foo[]) if foo[] == niter println("result is correct") else error("result is incorret") end end display(10) starpu_shutdown() starpu-1.4.9+dfsg/julia/examples/variable/variable.sh000077500000000000000000000014261507764646700226460ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh variable/variable.jl $(dirname $0)/../execute.sh variable/variable_native.jl starpu-1.4.9+dfsg/julia/examples/variable/variable_native.jl000066400000000000000000000021471507764646700242050ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # function variable(val ::Ref{Float32}) :: Nothing val[] = val[] + 1 return end function variable_without_starpu(val ::Ref{Float32}, niter) for i = 1:niter variable(val) end end function display(niter) foo = Ref(0.0f0) variable_without_starpu(foo, niter) println("variable -> ", foo[]) if foo[] == niter println("result is correct") else println("result is incorret") end end display(10) starpu-1.4.9+dfsg/julia/examples/vector_scal/000077500000000000000000000000001507764646700212365ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/examples/vector_scal/vector_scal.jl000066400000000000000000000050761507764646700241010ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import Libdl using StarPU using LinearAlgebra @target STARPU_CPU+STARPU_CUDA @codelet function vector_scal(m::Int32, v :: Vector{Float32}, k :: Float32, l :: Float32) :: Float32 N :: Int32 = length(v) # Naive version @parallel for i in (1 : N) v[i] = v[i] * m + l + k end end starpu_init() function vector_scal_with_starpu(v :: Vector{Float32}, m :: Int32, k :: Float32, l :: Float32) tmin=0 @starpu_block let hV = starpu_data_register(v) tmin=0 for i in (1 : 1) t=time_ns() @starpu_sync_tasks begin starpu_task_insert(codelet_name = "vector_scal", modes = [STARPU_RW], handles = [hV], cl_arg=(m, k, l)) end t=time_ns()-t if (tmin==0 || tmin>t) tmin=t end end end return tmin end function check(ref, res, m, k, l) expected = ref .* m .+ (k+l) for i in 1:length(expected) got = res[i] exp = expected[i] err = abs(exp - got) / exp if err > 0.0001 error("[$i] -> $got != $exp (err $err)") end end end function compute_times(io,start_dim, step_dim, stop_dim) for size in (start_dim : step_dim : stop_dim) V = Array(rand(Cfloat, size)) V_ref = copy(V) starpu_memory_pin(V) m :: Int32 = 10 k :: Float32 = 2. l :: Float32 = 3. println("INPUT ", V[1:10]) mt = vector_scal_with_starpu(V, m, k, l) starpu_memory_unpin(V) println("OUTPUT ", V[1:10]) println(io,"$size $mt") println("$size $mt") check(V_ref, V, m, k, l) end end if size(ARGS, 1) < 1 filename="x.dat" else filename=ARGS[1] end io=open(filename,"w") compute_times(io,1024,1024,4096) close(io) starpu_shutdown() starpu-1.4.9+dfsg/julia/examples/vector_scal/vector_scal.sh000077500000000000000000000015021507764646700240770ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh vector_scal/vector_scal.jl $(dirname $0)/../execute.sh -calllib vector_scal/cpu_vector_scal.c vector_scal/vector_scal.jl starpu-1.4.9+dfsg/julia/src/000077500000000000000000000000001507764646700157035ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/src/Makefile.am000066400000000000000000000036021507764646700177400ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk CLEANFILES = *.gcno *.gcda AM_CFLAGS += -fPIC AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_srcdir)/julia/src $(STARPU_H_CPPFLAGS) LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ `@JULIA@ $(top_srcdir)/julia/src/openblas_ldflags.jl` $(STARPU_EXPORTED_LIBS) SUBDIRS = dynamic_compiler lib_LTLIBRARIES = libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la noinst_HEADERS = libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ -version-info $(LIBSTARPUJULIA_INTERFACE_CURRENT):$(LIBSTARPUJULIA_INTERFACE_REVISION):$(LIBSTARPUJULIA_INTERFACE_AGE) libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ callback_wrapper.c \ blas_wrapper.c \ blas.c EXTRA_DIST = blas.h \ blas.jl \ data.jl \ destructible.jl \ globals.jl \ init.jl \ linked_list.jl \ perfmodel.jl \ StarPU.jl \ task_dep.jl \ task.jl \ translate_headers.jl \ utils.jl \ compiler/c.jl \ compiler/cuda.jl \ compiler/expression_manipulation.jl \ compiler/expressions.jl \ compiler/file_generation.jl \ compiler/include.jl \ compiler/parsing.jl \ compiler/utils.jl starpu-1.4.9+dfsg/julia/src/Makefile.in000066400000000000000000001146711507764646700177620ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = julia/src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(libdir)" LTLIBRARIES = $(lib_LTLIBRARIES) libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = am_libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ callback_wrapper.lo blas_wrapper.lo blas.lo libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ $(am_libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) \ $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ $(LDFLAGS) -o $@ AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/blas.Plo \ ./$(DEPDIR)/blas_wrapper.Plo ./$(DEPDIR)/callback_wrapper.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) DIST_SOURCES = \ $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac HEADERS = $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ `@JULIA@ \ $(top_srcdir)/julia/src/openblas_ldflags.jl` \ $(STARPU_EXPORTED_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) -fPIC AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) CLEANFILES = *.gcno *.gcda AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_srcdir)/julia/src $(STARPU_H_CPPFLAGS) SUBDIRS = dynamic_compiler lib_LTLIBRARIES = libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la noinst_HEADERS = libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ -version-info $(LIBSTARPUJULIA_INTERFACE_CURRENT):$(LIBSTARPUJULIA_INTERFACE_REVISION):$(LIBSTARPUJULIA_INTERFACE_AGE) libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ callback_wrapper.c \ blas_wrapper.c \ blas.c EXTRA_DIST = blas.h \ blas.jl \ data.jl \ destructible.jl \ globals.jl \ init.jl \ linked_list.jl \ perfmodel.jl \ StarPU.jl \ task_dep.jl \ task.jl \ translate_headers.jl \ utils.jl \ compiler/c.jl \ compiler/cuda.jl \ compiler/expression_manipulation.jl \ compiler/expressions.jl \ compiler/file_generation.jl \ compiler/include.jl \ compiler/parsing.jl \ compiler/utils.jl all: all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign julia/src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign julia/src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; \ locs=`for p in $$list; do echo $$p; done | \ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ sort -u`; \ test -z "$$locs" || { \ echo rm -f $${locs}; \ rm -f $${locs}; \ } libstarpujulia-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(AM_V_CCLD)$(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpujulia_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blas.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blas_wrapper.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callback_wrapper.Plo@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/blas.Plo -rm -f ./$(DEPDIR)/blas_wrapper.Plo -rm -f ./$(DEPDIR)/callback_wrapper.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-libLTLIBRARIES install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/blas.Plo -rm -f ./$(DEPDIR)/blas_wrapper.Plo -rm -f ./$(DEPDIR)/callback_wrapper.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-libLTLIBRARIES .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-libLTLIBRARIES install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ uninstall-libLTLIBRARIES .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/julia/src/StarPU.jl000066400000000000000000000072071507764646700174160ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # """ __precompile__() """ module StarPU import Libdl using CBinding include("utils.jl") const starpu_wrapper_library_name=fstarpu_task_library_name() include("translate_headers.jl") if !isfile(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_common.jl")) || !isfile(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_api.jl")) || mtime(joinpath(@__FILE__, "translate_headers.jl")) > mtime(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_api.jl")) starpu_translate_headers() end include(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_common.jl")) include(joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_api.jl")) include("globals.jl") include("compiler/include.jl") include("linked_list.jl") include("destructible.jl") include("perfmodel.jl") include("data.jl") include("blas.jl") include("task.jl") include("task_dep.jl") include("init.jl") # macro export @starpu_filter export @starpu_block export @starpu_async_cl export @starpu_sync_tasks # enum / define export STARPU_CPU export STARPU_CUDA export STARPU_CUDA_ASYNC export STARPU_OPENCL export STARPU_MAIN_RAM export StarpuDataFilterFunc export STARPU_MATRIX_FILTER_VERTICAL_BLOCK, STARPU_MATRIX_FILTER_BLOCK export STARPU_VECTOR_FILTER_BLOCK export STARPU_PERFMODEL_INVALID, STARPU_PER_ARCH, STARPU_COMMON export STARPU_HISTORY_BASED, STARPU_REGRESSION_BASED export STARPU_NL_REGRESSION_BASED, STARPU_MULTIPLE_REGRESSION_BASED export starpu_tag_t export STARPU_NONE,STARPU_R,STARPU_W,STARPU_RW, STARPU_SCRATCH export STARPU_MPI_REDUX, STARPU_REDUX,STARPU_COMMUTE, STARPU_SSEND, STARPU_LOCALITY export STARPU_ACCESS_MODE_MAX # BLAS export STARPU_SAXPY # functions export starpu_cublas_init export starpu_init export starpu_shutdown export starpu_memory_pin export starpu_memory_unpin export starpu_data_access_mode export starpu_data_acquire_on_node export starpu_data_release_on_node export starpu_data_unregister export starpu_data_register export starpu_data_get_sub_data export starpu_data_partition export starpu_data_unpartition export starpu_data_map_filters export starpu_data_wont_use export starpu_task_insert export starpu_task_wait_for_all export starpu_task_submit export starpu_task_end_dep_add export starpu_task_end_dep_release export starpu_task_declare_deps export starpu_task_declare_end_deps export starpu_task_wait_for_n_submitted export starpu_task_destroy export starpu_tag_remove export starpu_tag_wait export starpu_tag_notify_from_apps export starpu_iteration_pop export starpu_iteration_push export starpu_tag_declare_deps export starpu_task export starpu_task_wait export starpu_codelet export starpu_perfmodel export starpu_perfmodel_type export starpu_translate_headers export starpu_data_get_default_sequential_consistency_flag export starpu_data_set_default_sequential_consistency_flag export starpu_data_get_sequential_consistency_flag export starpu_data_set_sequential_consistency_flag export starpu_worker_get_count export starpu_cpu_worker_get_count export starpu_cuda_worker_get_count export starpu_opencl_worker_get_count end starpu-1.4.9+dfsg/julia/src/blas.c000066400000000000000000000145441507764646700170000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "blas.h" inline void STARPU_SGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, float alpha, const float *A, BLASINT lda, const float *B, BLASINT ldb, float beta, float *C, BLASINT ldc) { sgemm_64_(transa, transb, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } inline void STARPU_DGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, double alpha, double *A, BLASINT lda, double *B, BLASINT ldb, double beta, double *C, BLASINT ldc) { dgemm_64_(transa, transb, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } inline void STARPU_SGEMV(char *transa, BLASINT M, BLASINT N, float alpha, float *A, BLASINT lda, float *X, BLASINT incX, float beta, float *Y, BLASINT incY) { sgemv_64_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); } inline void STARPU_DGEMV(char *transa, BLASINT M, BLASINT N, double alpha, double *A, BLASINT lda, double *X, BLASINT incX, double beta, double *Y, BLASINT incY) { dgemv_64_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); } inline float STARPU_SASUM(BLASINT N, float *X, BLASINT incX) { return sasum_64_(&N, X, &incX); } inline double STARPU_DASUM(BLASINT N, double *X, BLASINT incX) { return dasum_64_(&N, X, &incX); } void STARPU_SSCAL(BLASINT N, float alpha, float *X, BLASINT incX) { sscal_64_(&N, &alpha, X, &incX); } void STARPU_DSCAL(BLASINT N, double alpha, double *X, BLASINT incX) { dscal_64_(&N, &alpha, X, &incX); } void STARPU_STRSM (const char *side, const char *uplo, const char *transa, const char *diag, const BLASINT m, const BLASINT n, const float alpha, const float *A, const BLASINT lda, float *B, const BLASINT ldb) { strsm_64_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const BLASINT m, const BLASINT n, const double alpha, const double *A, const BLASINT lda, double *B, const BLASINT ldb) { dtrsm_64_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void STARPU_SSYR (const char *uplo, const BLASINT n, const float alpha, const float *x, const BLASINT incx, float *A, const BLASINT lda) { ssyr_64_(uplo, &n, &alpha, x, &incx, A, &lda); } void STARPU_SSYRK (const char *uplo, const char *trans, const BLASINT n, const BLASINT k, const float alpha, const float *A, const BLASINT lda, const float beta, float *C, const BLASINT ldc) { ssyrk_64_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); } void STARPU_SGER(const BLASINT m, const BLASINT n, const float alpha, const float *x, const BLASINT incx, const float *y, const BLASINT incy, float *A, const BLASINT lda) { sger_64_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } void STARPU_DGER(const BLASINT m, const BLASINT n, const double alpha, const double *x, const BLASINT incx, const double *y, const BLASINT incy, double *A, const BLASINT lda) { dger_64_(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); } void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, const BLASINT n, const float *A, const BLASINT lda, float *x, const BLASINT incx) { strsv_64_(uplo, trans, diag, &n, A, &lda, x, &incx); } void STARPU_STRMM(const char *side, const char *uplo, const char *transA, const char *diag, const BLASINT m, const BLASINT n, const float alpha, const float *A, const BLASINT lda, float *B, const BLASINT ldb) { strmm_64_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const BLASINT m, const BLASINT n, const double alpha, const double *A, const BLASINT lda, double *B, const BLASINT ldb) { dtrmm_64_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb); } void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, const BLASINT n, const float *A, const BLASINT lda, float *X, const BLASINT incX) { strmv_64_(uplo, transA, diag, &n, A, &lda, X, &incX); } void STARPU_SAXPY(const BLASINT n, const float alpha, float *X, const BLASINT incX, float *Y, const BLASINT incY) { saxpy_64_(&n, &alpha, X, &incX, Y, &incY); } void STARPU_DAXPY(const BLASINT n, const double alpha, double *X, const BLASINT incX, double *Y, const BLASINT incY) { daxpy_64_(&n, &alpha, X, &incX, Y, &incY); } BLASINT STARPU_ISAMAX (const BLASINT n, float *X, const BLASINT incX) { BLASINT retVal; retVal = isamax_64_ (&n, X, &incX); return retVal; } BLASINT STARPU_IDAMAX (const BLASINT n, double *X, const BLASINT incX) { BLASINT retVal; retVal = idamax_64_ (&n, X, &incX); return retVal; } float STARPU_SDOT(const BLASINT n, const float *x, const BLASINT incx, const float *y, const BLASINT incy) { float retVal = 0; /* GOTOBLAS will return a FLOATRET which is a double, not a float */ retVal = (float)sdot_64_(&n, x, &incx, y, &incy); return retVal; } double STARPU_DDOT(const BLASINT n, const double *x, const BLASINT incx, const double *y, const BLASINT incy) { return ddot_64_(&n, x, &incx, y, &incy); } void STARPU_SSWAP(const BLASINT n, float *X, const BLASINT incX, float *Y, const BLASINT incY) { sswap_64_(&n, X, &incX, Y, &incY); } void STARPU_DSWAP(const BLASINT n, double *X, const BLASINT incX, double *Y, const BLASINT incY) { dswap_64_(&n, X, &incX, Y, &incY); } starpu-1.4.9+dfsg/julia/src/blas.h000066400000000000000000000227451507764646700170070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __BLAS_H__ #define __BLAS_H__ #include #define BLASINT int64_t void STARPU_SGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, float alpha, const float *A, BLASINT lda, const float *B, BLASINT ldb, float beta, float *C, BLASINT ldc); void STARPU_DGEMM(char *transa, char *transb, BLASINT M, BLASINT N, BLASINT K, double alpha, double *A, BLASINT lda, double *B, BLASINT ldb, double beta, double *C, BLASINT ldc); void STARPU_SGEMV(char *transa, BLASINT M, BLASINT N, float alpha, float *A, BLASINT lda, float *X, BLASINT incX, float beta, float *Y, BLASINT incY); void STARPU_DGEMV(char *transa, BLASINT M, BLASINT N, double alpha, double *A, BLASINT lda, double *X, BLASINT incX, double beta, double *Y, BLASINT incY); float STARPU_SASUM(BLASINT N, float *X, BLASINT incX); double STARPU_DASUM(BLASINT N, double *X, BLASINT incX); void STARPU_SSCAL(BLASINT N, float alpha, float *X, BLASINT incX); void STARPU_DSCAL(BLASINT N, double alpha, double *X, BLASINT incX); void STARPU_STRSM (const char *side, const char *uplo, const char *transa, const char *diag, const BLASINT m, const BLASINT n, const float alpha, const float *A, const BLASINT lda, float *B, const BLASINT ldb); void STARPU_DTRSM (const char *side, const char *uplo, const char *transa, const char *diag, const BLASINT m, const BLASINT n, const double alpha, const double *A, const BLASINT lda, double *B, const BLASINT ldb); void STARPU_SSYR (const char *uplo, const BLASINT n, const float alpha, const float *x, const BLASINT incx, float *A, const BLASINT lda); void STARPU_SSYRK (const char *uplo, const char *trans, const BLASINT n, const BLASINT k, const float alpha, const float *A, const BLASINT lda, const float beta, float *C, const BLASINT ldc); void STARPU_SGER (const BLASINT m, const BLASINT n, const float alpha, const float *x, const BLASINT incx, const float *y, const BLASINT incy, float *A, const BLASINT lda); void STARPU_DGER(const BLASINT m, const BLASINT n, const double alpha, const double *x, const BLASINT incx, const double *y, const BLASINT incy, double *A, const BLASINT lda); void STARPU_STRSV (const char *uplo, const char *trans, const char *diag, const BLASINT n, const float *A, const BLASINT lda, float *x, const BLASINT incx); void STARPU_STRMM(const char *side, const char *uplo, const char *transA, const char *diag, const BLASINT m, const BLASINT n, const float alpha, const float *A, const BLASINT lda, float *B, const BLASINT ldb); void STARPU_DTRMM(const char *side, const char *uplo, const char *transA, const char *diag, const BLASINT m, const BLASINT n, const double alpha, const double *A, const BLASINT lda, double *B, const BLASINT ldb); void STARPU_STRMV(const char *uplo, const char *transA, const char *diag, const BLASINT n, const float *A, const BLASINT lda, float *X, const BLASINT incX); void STARPU_SAXPY(const BLASINT n, const float alpha, float *X, const BLASINT incX, float *Y, const BLASINT incy); void STARPU_DAXPY(const BLASINT n, const double alpha, double *X, const BLASINT incX, double *Y, const BLASINT incY); BLASINT STARPU_ISAMAX (const BLASINT n, float *X, const BLASINT incX); BLASINT STARPU_IDAMAX (const BLASINT n, double *X, const BLASINT incX); float STARPU_SDOT(const BLASINT n, const float *x, const BLASINT incx, const float *y, const BLASINT incy); double STARPU_DDOT(const BLASINT n, const double *x, const BLASINT incx, const double *y, const BLASINT incy); void STARPU_SSWAP(const BLASINT n, float *x, const BLASINT incx, float *y, const BLASINT incy); void STARPU_DSWAP(const BLASINT n, double *x, const BLASINT incx, double *y, const BLASINT incy); extern void sgemm_64_ (const char *transa, const char *transb, const BLASINT *m, const BLASINT *n, const BLASINT *k, const float *alpha, const float *A, const BLASINT *lda, const float *B, const BLASINT *ldb, const float *beta, float *C, const BLASINT *ldc); extern void dgemm_64_ (const char *transa, const char *transb, const BLASINT *m, const BLASINT *n, const BLASINT *k, const double *alpha, const double *A, const BLASINT *lda, const double *B, const BLASINT *ldb, const double *beta, double *C, const BLASINT *ldc); extern void sgemv_64_(const char *trans, const BLASINT *m, const BLASINT *n, const float *alpha, const float *a, const BLASINT *lda, const float *x, const BLASINT *incx, const float *beta, float *y, const BLASINT *incy); extern void dgemv_64_(const char *trans, const BLASINT *m, const BLASINT *n, const double *alpha, const double *a, const BLASINT *lda, const double *x, const BLASINT *incx, const double *beta, double *y, const BLASINT *incy); extern void ssyr_64_ (const char *uplo, const BLASINT *n, const float *alpha, const float *x, const BLASINT *incx, float *A, const BLASINT *lda); extern void ssyrk_64_ (const char *uplo, const char *trans, const BLASINT *n, const BLASINT *k, const float *alpha, const float *A, const BLASINT *lda, const float *beta, float *C, const BLASINT *ldc); extern void strsm_64_ (const char *side, const char *uplo, const char *transa, const char *diag, const BLASINT *m, const BLASINT *n, const float *alpha, const float *A, const BLASINT *lda, float *B, const BLASINT *ldb); extern void dtrsm_64_ (const char *side, const char *uplo, const char *transa, const char *diag, const BLASINT *m, const BLASINT *n, const double *alpha, const double *A, const BLASINT *lda, double *B, const BLASINT *ldb); extern double sasum_64_ (const BLASINT *n, const float *x, const BLASINT *incx); extern double dasum_64_ (const BLASINT *n, const double *x, const BLASINT *incx); extern void sscal_64_ (const BLASINT *n, const float *alpha, float *x, const BLASINT *incx); extern void dscal_64_ (const BLASINT *n, const double *alpha, double *x, const BLASINT *incx); extern void sger_64_(const BLASINT *m, const BLASINT *n, const float *alpha, const float *x, const BLASINT *incx, const float *y, const BLASINT *incy, float *A, const BLASINT *lda); extern void dger_64_(const BLASINT *m, const BLASINT *n, const double *alpha, const double *x, const BLASINT *incx, const double *y, const BLASINT *incy, double *A, const BLASINT *lda); extern void strsv_64_ (const char *uplo, const char *trans, const char *diag, const BLASINT *n, const float *A, const BLASINT *lda, float *x, const BLASINT *incx); extern void strmm_64_(const char *side, const char *uplo, const char *transA, const char *diag, const BLASINT *m, const BLASINT *n, const float *alpha, const float *A, const BLASINT *lda, float *B, const BLASINT *ldb); extern void dtrmm_64_(const char *side, const char *uplo, const char *transA, const char *diag, const BLASINT *m, const BLASINT *n, const double *alpha, const double *A, const BLASINT *lda, double *B, const BLASINT *ldb); extern void strmv_64_(const char *uplo, const char *transA, const char *diag, const BLASINT *n, const float *A, const BLASINT *lda, float *X, const BLASINT *incX); extern void saxpy_64_(const BLASINT *n, const float *alpha, const float *X, const BLASINT *incX, float *Y, const BLASINT *incy); extern void daxpy_64_(const BLASINT *n, const double *alpha, const double *X, const BLASINT *incX, double *Y, const BLASINT *incy); extern BLASINT isamax_64_(const BLASINT *n, const float *X, const BLASINT *incX); extern BLASINT idamax_64_(const BLASINT *n, const double *X, const BLASINT *incX); /* for some reason, FLOATRET is not a float but a double in GOTOBLAS */ extern double sdot_64_(const BLASINT *n, const float *x, const BLASINT *incx, const float *y, const BLASINT *incy); extern double ddot_64_(const BLASINT *n, const double *x, const BLASINT *incx, const double *y, const BLASINT *incy); extern void sswap_64_(const BLASINT *n, float *x, const BLASINT *incx, float *y, const BLASINT *incy); extern void dswap_64_(const BLASINT *n, double *x, const BLASINT *incx, double *y, const BLASINT *incy); #endif /* __BLAS_H__ */ starpu-1.4.9+dfsg/julia/src/blas.jl000066400000000000000000000015271507764646700171600ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # @enum STARPU_BLAS begin STARPU_SAXPY end cuda_blas_codelets = Dict(STARPU_SAXPY => "julia_saxpy_cuda_codelet") cpu_blas_codelets = Dict(STARPU_SAXPY => "julia_saxpy_cpu_codelet") starpu-1.4.9+dfsg/julia/src/blas_wrapper.c000066400000000000000000000031431507764646700205310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #if defined(STARPU_ATLAS) || defined(STARPU_OPENBLAS) || defined(STARPU_MKL) void julia_saxpy_cpu_codelet(void *descr[], void *arg) { float alpha = *((float *)arg); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); float *block_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); float *block_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); STARPU_SAXPY((int)n, alpha, block_x, 1, block_y, 1); } #endif #ifdef STARPU_USE_CUDA #include void julia_saxpy_cuda_codelet(void *descr[], void *arg) { float alpha = *((float *)arg); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); float *block_x = (float *)STARPU_VECTOR_GET_PTR(descr[0]); float *block_y = (float *)STARPU_VECTOR_GET_PTR(descr[1]); cublasStatus_t status = cublasSaxpy(starpu_cublas_get_local_handle(), (int)n, &alpha, block_x, 1, block_y, 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } #endif starpu-1.4.9+dfsg/julia/src/callback_wrapper.c000066400000000000000000000021511507764646700213420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include void *dummy_function_list[] = { starpu_matrix_filter_vertical_block, starpu_matrix_filter_block, starpu_vector_filter_block, starpu_init, }; void julia_callback_func(void *user_data) { volatile int *signal = (int *) user_data; // wakeup callback *(signal) = 1; // Wait for callback to end. while ((*signal) != 0); } void julia_wait_signal(volatile int *signal) { while ((*signal) == 0); } starpu-1.4.9+dfsg/julia/src/compiler/000077500000000000000000000000001507764646700175155ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/src/compiler/c.jl000066400000000000000000000240531507764646700202720ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # """ Returns the list of instruction that will be added before for loop of shape "for for_index_var in set ..." """ function interval_evaluation_declarations(set :: StarpuExprInterval, for_index_var :: Symbol) decl_pattern = @parse € :: Int64 affect_pattern = @parse € :: Int64 = € interv_size_affect_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €) id = set.id start_var = starpu_parse(Symbol(:start_, id)) start_decl = replace_pattern(affect_pattern, start_var, set.start) index_var = starpu_parse(for_index_var) index_decl = replace_pattern(decl_pattern, index_var) if isa(set.step, StarpuExprValue) stop_var = starpu_parse(Symbol(:stop_, id)) stop_decl = replace_pattern(affect_pattern, stop_var, set.stop) return StarpuExpr[start_decl, stop_decl, index_decl] end step_var = starpu_parse(Symbol(:step_, id)) step_decl = replace_pattern(affect_pattern, step_var, set.step) dim_var = starpu_parse(Symbol(:dim_, id)) dim_decl = replace_pattern(interv_size_affect_pattern, dim_var, start_var, step_var, set.stop) iter_var = starpu_parse(Symbol(:iter_, id)) iter_decl = replace_pattern(decl_pattern, iter_var) return StarpuExpr[start_decl, step_decl, dim_decl, iter_decl, index_decl] end function add_for_loop_declarations(expr :: StarpuExpr) function func_to_apply(x :: StarpuExpr) if !isa(x, StarpuExprFor) return x end interval_decl = interval_evaluation_declarations(x.set, x.iter) return StarpuExprFor(x.iter, x.set, x.body, x.is_independant, interval_decl) end return apply(func_to_apply, expr) end function transform_to_cpu_kernel(expr :: StarpuExprFunction) output = add_for_loop_declarations(expr) output = substitute_args(output) output = substitute_func_calls(output) output = substitute_views(output) output = substitute_indexing(output) output = flatten_blocks(output) return output end function generate_c_struct_param_declaration(codelet_name) scalar_parameters = CODELETS_SCALARS[codelet_name] struct_params_name = CODELETS_PARAMS_STRUCT[codelet_name] output = "struct $struct_params_name {\n" for p in scalar_parameters arg_name = p[1] arg_type = p[2] output *= "\t" * starpu_type_traduction(arg_type) * " $arg_name;\n" end output *= "};\n\n" return output end function flatten_blocks(expr :: StarpuExpr) function func_to_run(x :: StarpuExpr) if !isa(x, StarpuExprBlock) return x end instrs = StarpuExpr[] for sub_expr in x.exprs if isa(sub_expr, StarpuExprBlock) push!(instrs, sub_expr.exprs...) else push!(instrs, sub_expr) end end return StarpuExprBlock(instrs) end return apply(func_to_run, expr) end function substitute_argument_usage(expr :: StarpuExpr, arg_index, buffer_name :: Symbol, arg_name :: Symbol, ptr_name :: Symbol) function func_to_apply(x :: StarpuExpr) if x == StarpuExprVar(arg_name) return StarpuExprVar(ptr_name) end if !(isa(x, StarpuExprCall) && x.func in keys(func_substitution)) return x end if (length(x.args) != 1) error("Invalid arity for function $(x.func)") end if (x.args[1] != StarpuExprVar(ptr_name)) return x end new_func = func_substitution[x.func] new_arg = starpu_parse(:($buffer_name[$arg_index])) return StarpuExprCall(new_func, [new_arg]) end return apply(func_to_apply, expr) end function substitute_args(expr :: StarpuExprFunction) new_body = expr.body func_id = rand_string() buffer_arg_name = Symbol("buffers_", func_id) cl_arg_name = Symbol("cl_arg_", func_id) function_start_affectations = StarpuExpr[] buffer_id = 1 scalar_id = 1 # get scalar parameters and structure name scalar_parameters = CODELETS_SCALARS[string(expr.func)] struct_params_name = CODELETS_PARAMS_STRUCT[string(expr.func)] for i in (1 : length(expr.args)) var_id = rand_string() ptr = Symbol(:ptr_, var_id) var_name = ptr if (expr.args[i].typ <: Vector) func_interface = :STARPU_VECTOR_GET_PTR type_in_arg = eltype(expr.args[i].typ) new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$buffer_id])) ) push!(function_start_affectations, new_affect) new_body = substitute_argument_usage(new_body, buffer_id, buffer_arg_name, expr.args[i].name, var_name) buffer_id += 1 elseif (expr.args[i].typ <: Matrix) func_interface = :STARPU_MATRIX_GET_PTR ld_name = Symbol("ld_", var_id) post_affect = starpu_parse( :($ld_name :: UInt32 = STARPU_MATRIX_GET_LD($buffer_arg_name[$buffer_id])) ) type_in_arg = eltype(expr.args[i].typ) new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$buffer_id])) ) push!(function_start_affectations, new_affect) push!(function_start_affectations, post_affect) new_body = substitute_argument_usage(new_body, buffer_id, buffer_arg_name, expr.args[i].name, var_name) buffer_id += 1 elseif (expr.args[i].typ <: Ref) func_interface = :STARPU_VARIABLE_GET_PTR type_in_arg = eltype(expr.args[i].typ) new_affect = starpu_parse( :($ptr :: Ptr{$type_in_arg} = $func_interface($buffer_arg_name[$buffer_id])) ) push!(function_start_affectations, new_affect) new_body = substitute_argument_usage(new_body, buffer_id, buffer_arg_name, expr.args[i].name, Symbol("(*$var_name)")) buffer_id += 1 elseif (expr.args[i].typ <: Number || expr.args[i].typ <: AbstractChar) type_in_arg = eltype(expr.args[i].typ) field_name = scalar_parameters[scalar_id][1] var_name = field_name post_affect = starpu_parse( :($var_name :: $type_in_arg = *($ptr).$field_name)) new_affect = starpu_parse( :($ptr :: Ptr{$struct_params_name} = $cl_arg_name)) push!(function_start_affectations, new_affect) push!(function_start_affectations, post_affect) scalar_id += 1 else error("Task arguments must be either matrix, vector, ref or scalar (got $(expr.args[i].typ))") end end new_args = [ starpu_parse(:($buffer_arg_name :: Ptr{Ptr{Nothing}})), starpu_parse(:($cl_arg_name :: Vector{Nothing})) ] new_body = StarpuExprBlock([function_start_affectations..., new_body.exprs...]) return StarpuExprFunction(expr.ret_type, expr.func, new_args, new_body) end func_substitution = Dict( :width => :STARPU_MATRIX_GET_NY, :height => :STARPU_MATRIX_GET_NX, :ld => :STARPU_MATRIX_GET_LD, :length => :STARPU_VECTOR_GET_NX ) function substitute_func_calls(expr :: StarpuExpr) function func_to_apply(x :: StarpuExpr) if !isa(x, StarpuExprCall) || !(x.func in keys(func_substitution)) return x end return StarpuExprCall(func_substitution[x.func], x.args) end return apply(func_to_apply, expr) end function substitute_views(expr :: StarpuExpr) function func_to_apply(x :: StarpuExpr) if !isa(x, StarpuExprCall) || x.func != :view return x end ref = x.args[1] indexes = map(i -> isa(i, StarpuExprInterval) ? i.start : i, x.args[2:end]) return StarpuExprAddress(StarpuExprRef(ref, indexes)) end return apply(func_to_apply, expr) end function substitute_indexing(expr :: StarpuExpr) function func_to_run(x :: StarpuExpr) if !isa(x, StarpuExprRef) return x end #if !isa(x.ref, StarpuExprVar) # error("Only variable indexing is allowed") #TODO allow more ? #end nb_indexes = length(x.indexes) if (nb_indexes >= 3) error("Indexing with more than 2 indexes is not allowed") # TODO : blocks end if (nb_indexes == 0) return x elseif nb_indexes == 1 new_index = StarpuExprCall(:-, [x.indexes[1], StarpuExprValue(1)]) #TODO : add field "offset" from STARPU_VECTOR_GET interface #TODO : detect when it is a matrix used with one index only return StarpuExprRef(x.ref, [new_index]) elseif nb_indexes == 2 var_name = String(x.ref.name) if !occursin(r"ptr_", var_name) || isempty(var_name[5:end]) error("Invalid variable ($var_name) for multiple index dereferencing") end var_id = var_name[5:end] ld_name = Symbol("ld_", var_id) # TODO : check if this variable is legit (var_name must refer to a matrix) new_index = x.indexes[2] new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)]) new_index = StarpuExprCall(:(*), [new_index, StarpuExprVar(ld_name)]) new_index = StarpuExprCall(:(+), [x.indexes[1], new_index]) new_index = StarpuExprCall(:(-), [new_index, StarpuExprValue(1)]) return StarpuExprRef(x.ref, [new_index]) end end return apply(func_to_run, expr) end starpu-1.4.9+dfsg/julia/src/compiler/cuda.jl000066400000000000000000000515301507764646700207640ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # function is_indep_for_expr(x :: StarpuExpr) return isa(x, StarpuExprFor) && x.is_independant end function extract_init_indep_finish(expr :: StarpuExpr) # TODO : it is not a correct extraction (example : if (cond) {@indep for ...} else {return} would not work) # better use apply() (NOTE :assert_no_indep_for already exists) to find recursively every for loops init = StarpuExpr[] finish = StarpuExpr[] if is_indep_for_expr(expr) return init, StarpuIndepFor(expr), finish end if !isa(expr, StarpuExprBlock) return [expr], nothing, finish end for i in (1 : length(expr.exprs)) if !is_indep_for_expr(expr.exprs[i]) continue end init = expr.exprs[1 : i-1] indep = StarpuIndepFor(expr.exprs[i]) finish = expr.exprs[i+1 : end] if any(is_indep_for_expr, finish) error("Sequence of several independant loops is not allowed") #same it may be tricked by a Block(Indep_for(...)) end return init, indep, finish end return expr.exprs, nothing, finish end function analyse_variable_declarations(expr :: StarpuExpr, already_defined :: Vector{StarpuExprTypedVar} = StarpuExprTypedVar[]) undefined_variables = Symbol[] defined_variable_names = map((x -> x.name), already_defined) defined_variable_types = map((x -> x.typ), already_defined) function func_to_apply(x :: StarpuExpr) if isa(x, StarpuExprFunction) error("No function declaration allowed in this section") end if isa(x, StarpuExprVar) || isa(x, StarpuExprTypedVar) if !(x.name in defined_variable_names) && !(x.name in undefined_variables) push!(undefined_variables, x.name) end return x end if isa(x, StarpuExprAffect) || isa(x, StarpuExprFor) if isa(x, StarpuExprAffect) var = x.var if !isa(var, StarpuExprTypedVar) return x end name = var.name typ = var.typ else name = x.iter typ = Int64 end if name in defined_variable_names error("Multiple definition of variable $name") end filter!((sym -> sym != name), undefined_variables) push!(defined_variable_names, name) push!(defined_variable_types, typ) return x end return x end apply(func_to_apply, expr) defined_variable = map(StarpuExprTypedVar, defined_variable_names, defined_variable_types) return defined_variable, undefined_variables end function find_variable(name :: Symbol, vars :: Vector{StarpuExprTypedVar}) for x in vars if x.name == name return x end end return nothing end function add_device_to_interval_call(expr :: StarpuExpr) function func_to_apply(x :: StarpuExpr) if isa(x, StarpuExprCall) && x.func == :jlstarpu_interval_size return StarpuExprCall(:jlstarpu_interval_size__device, x.args) end return x end return apply(func_to_apply, expr) end function translate_cublas(expr :: StarpuExpr) function func_to_run(x :: StarpuExpr) # STARPU_BLAS => (CUBLAS, TRANS, FILLMODE, ALPHA, SIDE, DIAG) blas_to_cublas = Dict(:STARPU_SGEMM => (:cublasSgemm, [1, 2], [], [6, 11], [], []), :STARPU_DGEMM => (:cublasDgemm, [1, 2], [], [6, 11], [], []), :STARPU_SGEMV => (:cublasSgemv, [1], [], [4,9], [], []), :STARPU_DGEMV => (:cublasDgemv, [1], [], [4,9], [], []), :STARPU_SSCAL => (:cublasSscal, [], [], [2], [], []), :STARPU_DSCAL => (:cublasDscal, [], [], [2], [], []), :STARPU_STRSM => (:cublasStrsm, [3], [2], [7], [1], [4]), :STARPU_DTRSM => (:cublasDtrsm, [3], [2], [7], [1], [4]), :STARPU_SSYR => (:cublasSsyr, [], [1], [3], [], []), :STARPU_SSYRK => (:cublasSsyrk, [2], [1], [5,8], [], []), :STARPU_SGER => (:cublasSger, [], [], [3], [], []), :STARPU_DGER => (:cublasDger, [], [], [3], [], []), :STARPU_STRSV => (:cublasStrsv, [2], [1], [], [], [3]), :STARPU_STRMM => (:cublasStrmm, [3], [2], [7], [1], [4]), :STARPU_DTRMM => (:cublasDtrmm, [3], [2], [7], [1], [4]), :STARPU_STRMV => (:cublasStrmv, [2], [1], [], [], [3]), :STARPU_SAXPY => (:cublasSaxpy, [], [], [2], [], []), :STARPU_DAXPY => (:cublasDaxpy, [], [], [2], [], []), :STARPU_SSWAP => (:cublasSswap, [], [], [], [], []), :STARPU_DSWAP => (:cublasDswap, [], [], [], [], [])) if !(isa(x, StarpuExprCall) && x.func in keys(blas_to_cublas)) return x end new_args = x.args # cublasOperation_t parameters (e.g. StarpuExprValue("N") for i in blas_to_cublas[x.func][2] if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String) error("Argument $i of ", x.func, " must be a string") end value = new_args[i].value if value == "N" || value == "n" new_args[i] = StarpuExprVar(:CUBLAS_OP_N) elseif value == "T" || value == "t" new_args[i] = StarpuExprVar(:CUBLAS_OP_T) elseif value == "C" || value == "c" new_args[i] = StarpuExprVar(:CUBLAS_OP_C) else error("Unhandled value for rgument $i of ", x.func, ": ", value, "expecting (\"N\", \"T\", or \"C\")") end end # cublasFillMode_t parameters (e.g. StarpuExprValue("L") for i in blas_to_cublas[x.func][3] if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String) error("Argument $i of ", x.func, " must be a string") end value = new_args[i].value if value == "L" || value == "l" new_args[i] = StarpuExprVar(:CUBLAS_FILL_MODE_LOWER) elseif value == "U" || value == "u" new_args[i] = StarpuExprVar(:CUBLAS_FILL_MODE_UPPER) else error("Unhandled value for rgument $i of ", x.func, ": ", value, "expecting (\"L\" or \"U\")") end end # scalar parameters (alpha, beta, ...): alpha -> &alpha for i in blas_to_cublas[x.func][4] if !isa(new_args[i], StarpuExprVar) error("Argument $i of ", x.func, " must be a variable") end var_name = new_args[i].name new_args[i] = StarpuExprVar(Symbol("&$var_name")) end # cublasSideMode_t parameters (e.g. StarpuExprValue("L") for i in blas_to_cublas[x.func][5] if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String) error("Argument $i of ", x.func, " must be a string, got: ", new_args[i]) end value = new_args[i].value if value == "L" || value == "l" new_args[i] = StarpuExprVar(:CUBLAS_SIDE_LEFT) elseif value == "R" || value == "r" new_args[i] = StarpuExprVar(:CUBLAS_SIDE_RIGHT) else error("Unhandled value for rgument $i of ", x.func, ": ", value, "expecting (\"L\" or \"R\")") end end # cublasDiag_Typet parameters (e.g. StarpuExprValue("N") for i in blas_to_cublas[x.func][6] if !isa(new_args[i], StarpuExprValue) || !isa(new_args[i].value, String) error("Argument $i of ", x.func, " must be a string") end value = new_args[i].value if value == "N" || value == "n" new_args[i] = StarpuExprVar(:CUBLAS_DIAG_NON_UNIT) elseif value == "U" || value == "u" new_args[i] = StarpuExprVar(:CUBLAS_DIAG_UNIT) else error("Unhandled value for rgument $i of ", x.func, ": ", value, "expecting (\"N\" or \"U\")") end end new_args = [@parse(starpu_cublas_get_local_handle()), x.args...] status_varname = "status"*rand_string() status_var = StarpuExprVar(Symbol("cublasStatus_t "*status_varname)) call_expr = StarpuExprCall(blas_to_cublas[x.func][1], new_args) return StarpuExprBlock([StarpuExprAffect(status_var, call_expr), starpu_parse(Meta.parse("""if $status_varname != CUBLAS_STATUS_SUCCESS STARPU_CUBLAS_REPORT_ERROR($status_varname) end""")), @parse cudaStreamSynchronize(starpu_cuda_get_local_stream())]) end return apply(func_to_run, expr) end function get_all_assignments(cpu_instr) ret = StarpuExpr[] function func_to_run(x :: StarpuExpr) if isa(x, StarpuExprAffect) push!(ret, x) end return x end apply(func_to_run, cpu_instr) return ret end function get_all_buffer_vars(cpu_instr) ret = StarpuExprTypedVar[] assignments = get_all_assignments(cpu_instr) for x in assignments var = x.var expr = x.expr if isa(expr, StarpuExprCall) && expr.func in [:STARPU_MATRIX_GET_PTR, :STARPU_VECTOR_GET_PTR] push!(ret, var) end end return ret end function get_all_buffer_stores(cpu_instr, vars) ret = StarpuExprAffect[] function func_to_run(x :: StarpuExpr) if isa(x, StarpuExprAffect) && isa(x.var, StarpuExprRef) && isa(x.var.ref, StarpuExprVar) && x.var.ref.name in map(x -> x.name, vars) push!(ret, x) end return x end apply(func_to_run, cpu_instr) return ret end function get_all_buffer_refs(cpu_instr, vars) ret = [] current_instr = nothing InstrTy = Union{StarpuExprAffect, StarpuExprCall, StarpuExprCudaCall, StarpuExprFor, StarpuExprIf, StarpuExprIfElse, StarpuExprReturn, StarpuExprBreak, StarpuExprWhile} parent = nothing function func_to_run(x :: StarpuExpr) if isa(x, InstrTy) && !(isa(x, StarpuExprCall) && x.func in [:(+), :(-), :(*), :(/), :(%), :(<), :(<=), :(==), :(!=), :(>=), :(>), :sqrt]) current_instr = x end if isa(x, StarpuExprRef) && isa(x.ref, StarpuExprVar) && x.ref.name in map(x -> x.name, vars) && # var[...] !isa(parent, StarpuExprAddress) && # filter &var[..] !(isa(current_instr, StarpuExprAffect) && current_instr.var == x) # filter lhs ref push!(ret, (current_instr, x)) end parent = x return x end visit_preorder(func_to_run, cpu_instr) return ret end function transform_cuda_device_loadstore(cpu_instr :: StarpuExprBlock) # Get all CUDA buffer pointers buffer_vars = get_all_buffer_vars(cpu_instr) buffer_types = Dict{Symbol, Type}() for var in buffer_vars buffer_types[var.name] = var.typ end # Get all store to a CUDA buffer stores = get_all_buffer_stores(cpu_instr, buffer_vars) # Get all load from CUDA buffer loads = get_all_buffer_refs(cpu_instr, buffer_vars) # Replace each load L: # L: ... buffer[id] # With the following instruction block: # Type varX # cudaMemcpy(&varX, &buffer[id], sizeof(Type), cudaMemcpyDeviceToHost) # L: ... varX for l in loads (instr, ref) = l block = [] buffer = ref.ref.name varX = "var"*rand_string() type = buffer_types[Symbol(buffer)] ctype = starpu_type_traduction(eltype(type)) push!(block, StarpuExprTypedVar(Symbol(varX), eltype(type))) push!(block, StarpuExprCall(:cudaMemcpy, [StarpuExprAddress(StarpuExprVar(Symbol(varX))), StarpuExprAddress(ref), StarpuExprVar(Symbol("sizeof($ctype)")), StarpuExprVar(:cudaMemcpyDeviceToHost)])) push!(block, substitute(instr, ref, StarpuExprVar(Symbol("$varX")))) cpu_instr = substitute(cpu_instr, instr, StarpuExprBlock(block)) end # Replace each Store S: # S: buffer[id] = expr # With the following instruction block: # Type varX # varX = expr # cudaMemcpy(&buffer[id], &varX, sizeof(Type), cudaMemcpyHostToDevice) for s in stores block = [] buffer = s.var.ref.name varX = "var"*rand_string() type = buffer_types[Symbol(buffer)] ctype = starpu_type_traduction(eltype(type)) push!(block, StarpuExprTypedVar(Symbol(varX), eltype(type))) push!(block, StarpuExprAffect(StarpuExprVar(Symbol("$varX")), s.expr)) push!(block, StarpuExprCall(:cudaMemcpy, [StarpuExprAddress(s.var), StarpuExprAddress(StarpuExprVar(Symbol(varX))), StarpuExprVar(Symbol("sizeof($ctype)")), StarpuExprVar(:cudaMemcpyHostToDevice)])) cpu_instr = substitute(cpu_instr, s, StarpuExprBlock(block)) end return cpu_instr end function transform_to_cuda_kernel(func :: StarpuExprFunction) cpu_func = transform_to_cpu_kernel(func) init, indep, finish = extract_init_indep_finish(cpu_func.body) cpu_instr = init kernel = nothing # Generate a CUDA kernel only if there is an independent loop (@parallel macro). if (indep != nothing) prekernel_instr, kernel_args, kernel_instr = analyse_sets(indep) kernel_call = StarpuExprCudaCall(:cudaKernel, (@parse nblocks), (@parse THREADS_PER_BLOCK), StarpuExpr[]) cpu_instr = vcat(cpu_instr, prekernel_instr) kernel_instr = vcat(kernel_instr, indep.body) indep_for_def, indep_for_undef = analyse_variable_declarations(StarpuExprBlock(kernel_instr), kernel_args) prekernel_def, prekernel_undef = analyse_variable_declarations(StarpuExprBlock(cpu_instr), cpu_func.args) for undef_var in indep_for_undef found_var = find_variable(undef_var, prekernel_def) if found_var == nothing # TODO : error then ? continue end push!(kernel_args, found_var) end call_args = map((x -> StarpuExprVar(x.name)), kernel_args) kernelname=Symbol("KERNEL_",func.func); cuda_call = StarpuExprCudaCall(kernelname, (@parse nblocks), (@parse THREADS_PER_BLOCK), call_args) push!(cpu_instr, cuda_call) push!(cpu_instr, @parse cudaStreamSynchronize(starpu_cuda_get_local_stream())) kernel = StarpuExprFunction(Nothing, kernelname, kernel_args, StarpuExprBlock(kernel_instr)) kernel = add_device_to_interval_call(kernel) kernel = flatten_blocks(kernel) end cpu_instr = vcat(cpu_instr, finish) cpu_instr = StarpuExprBlock(cpu_instr) cpu_instr = transform_cuda_device_loadstore(cpu_instr) prekernel_name = Symbol("CUDA_", func.func) prekernel = StarpuExprFunction(Nothing, prekernel_name, cpu_func.args, cpu_instr) prekernel = translate_cublas(prekernel) prekernel = flatten_blocks(prekernel) return prekernel, kernel end struct StarpuIndepFor iters :: Vector{Symbol} sets :: Vector{StarpuExprInterval} body :: StarpuExpr end function assert_no_indep_for(expr :: StarpuExpr) function func_to_run(x :: StarpuExpr) if (isa(x, StarpuExprFor) && x.is_independant) error("Invalid usage of intricated @indep for loops") end return x end return apply(func_to_run, expr) end function StarpuIndepFor(expr :: StarpuExprFor) if !expr.is_independant error("For expression must be prefixed by @indep") end iters = [] sets = [] for_loop = expr while isa(for_loop, StarpuExprFor) && for_loop.is_independant push!(iters, for_loop.iter) push!(sets, for_loop.set) for_loop = for_loop.body while (isa(for_loop, StarpuExprBlock) && length(for_loop.exprs) == 1) for_loop = for_loop.exprs[1] end end return StarpuIndepFor(iters, sets, assert_no_indep_for(for_loop)) end function translate_index_code(dims :: Vector{StarpuExprVar}) ndims = length(dims) if ndims == 0 error("No dimension specified") end prod = StarpuExprValue(1) output = StarpuExpr[] reversed_dim = reverse(dims) thread_index_patern = @parse € :: Int64 = (€ / €) % € thread_id = @parse THREAD_ID for i in (1 : ndims) index_lvalue = StarpuExprVar(Symbol(:kernel_ids__index_, ndims - i + 1)) expr = replace_pattern(thread_index_patern, index_lvalue, thread_id, prod, reversed_dim[i]) push!(output, expr) prod = StarpuExprCall(:(*), [prod, reversed_dim[i]]) end thread_id_pattern = @parse begin € :: Int64 = blockIdx.x * blockDim.x + threadIdx.x if (€ >= €) return end end bound_verif = replace_pattern(thread_id_pattern, thread_id, thread_id, prod) push!(output, bound_verif) return reverse(output) end function kernel_index_declarations(ind_for :: StarpuIndepFor) pre_kernel_instr = StarpuExpr[] kernel_args = StarpuExprTypedVar[] kernel_instr = StarpuExpr[] decl_pattern = @parse € :: Int64 = € interv_size_decl_pattern = @parse € :: Int64 = jlstarpu_interval_size(€, €, €) iter_pattern = @parse € :: Int64 = € + € * € dims = StarpuExprVar[] ker_instr_to_add_later_on = StarpuExpr[] for k in (1 : length(ind_for.sets)) set = ind_for.sets[k] start_var = starpu_parse(Symbol(:kernel_ids__start_, k)) start_decl = replace_pattern(decl_pattern, start_var, set.start) step_var = starpu_parse(Symbol(:kernel_ids__step_, k)) step_decl = replace_pattern(decl_pattern, step_var, set.step) dim_var = starpu_parse(Symbol(:kernel_ids__dim_, k)) dim_decl = replace_pattern(interv_size_decl_pattern, dim_var, start_var, step_var, set.stop) push!(dims, dim_var) push!(pre_kernel_instr, start_decl, step_decl, dim_decl) push!(kernel_args, StarpuExprTypedVar(start_var.name, Int64)) push!(kernel_args, StarpuExprTypedVar(step_var.name, Int64)) push!(kernel_args, StarpuExprTypedVar(dim_var.name, Int64)) iter_var = starpu_parse(ind_for.iters[k]) index_var = starpu_parse(Symbol(:kernel_ids__index_, k)) iter_decl = replace_pattern(iter_pattern, iter_var, start_var, index_var, step_var) push!(ker_instr_to_add_later_on, iter_decl) end return dims, ker_instr_to_add_later_on, pre_kernel_instr , kernel_args, kernel_instr end function analyse_sets(ind_for :: StarpuIndepFor) decl_pattern = @parse € :: Int64 = € nblocks_decl_pattern = @parse € :: Int64 = (€ + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK dims, ker_instr_to_add, pre_kernel_instr, kernel_args, kernel_instr = kernel_index_declarations(ind_for) dim_prod = @parse 1 for d in dims dim_prod = StarpuExprCall(:(*), [dim_prod, d]) end nthreads_var = @parse nthreads nthreads_decl = replace_pattern(decl_pattern, nthreads_var, dim_prod) push!(pre_kernel_instr, nthreads_decl) nblocks_var = @parse nblocks nblocks_decl = replace_pattern(nblocks_decl_pattern, nblocks_var, nthreads_var) push!(pre_kernel_instr, nblocks_decl) index_decomposition = translate_index_code(dims) push!(kernel_instr, index_decomposition...) push!(kernel_instr, ker_instr_to_add...) return pre_kernel_instr, kernel_args, kernel_instr end starpu-1.4.9+dfsg/julia/src/compiler/expression_manipulation.jl000066400000000000000000000275671507764646700250440ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # """ Lenient comparison operator for structures and arrays. """ @generated function ≂(x, y) if x != y || x <: Type :(x == y) elseif !isempty(fieldnames(x)) mapreduce(n -> :(x.$n ≂ y.$n), (a,b)->:($a && $b), fieldnames(x)) elseif x <: Array quote if length(x) != length(y) return false end for i in 1:length(x) if !(x[i] ≂ y[i]) return false end end return true end else :(x == y) end end """ Returns a new expression where every occurrence of expr_to_replace into expr has been replaced by new_expr """ function substitute(expr :: StarpuExpr, expr_to_replace :: StarpuExpr, new_expr :: StarpuExpr) function func_to_apply(x :: StarpuExpr) if (x ≂ expr_to_replace) return new_expr end return x end return apply(func_to_apply, expr) end """ Returns an expression where "€" symbols in expr were replaced by the following expression list. Ex : replace_pattern((@parse € = €), (@parse x), (@parse 1 + 1)) --> (StarpuExpr) "x = 1 + 1" """ function replace_pattern(expr :: StarpuExpr, replace_€ :: StarpuExpr...) replace_index = 0 function func_to_apply(x :: StarpuExpr) if x == @parse € replace_index += 1 return replace_€[replace_index] end if isa(x, StarpuExprTypedVar) && x.name == :€ replace_index += 1 if isa(replace_€[replace_index], StarpuExprVar) return StarpuExprTypedVar(replace_€[replace_index].name, x.typ) end return StarpuExprTypedExpr(replace_€[replace_index], x.typ) end if isa(x, StarpuExprFunction) && x.func == :€ replace_index += 1 if !(isa(replace_€[replace_index], StarpuExprVar)) error("Can only replace a function name by a variable") end return StarpuExprFunction(x.ret_type, replace_€[replace_index].name, x.args, x.body) end return x end return apply(func_to_apply, expr) end import Base.any """ Returns true if one of the sub-expression x in expr is such as cond(x) is true, otherwise, it returns false. """ function any(cond :: Function, expr :: StarpuExpr) err_to_catch = "Catch me, condition is true somewhere !" function func_to_apply(x :: StarpuExpr) if cond(x) error(err_to_catch) # dirty but osef end return x end try apply(func_to_apply, expr) catch err if (isa(err, ErrorException) && err.msg == err_to_catch) return true end throw(err) end return false end import Base.all """ Returns true if every sub-expression x in expr is such as cond(x) is true, otherwise, it returns false. """ function all(cond :: Function, expr :: StarpuExpr) return !any(!cond, expr) end function visit_preorder(func :: Function, expr :: StarpuExprAffect) func(expr) visit_preorder(func, expr.var) visit_preorder(func, expr.expr) return expr end function visit_preorder(func :: Function, expr :: StarpuExprBlock) func(expr) for e in expr.exprs visit_preorder(func, e) end return expr end function visit_preorder(func :: Function, expr :: StarpuExprCall) func(expr) for a in expr.args visit_preorder(func, a) end return expr end function visit_preorder(func :: Function, expr :: StarpuExprCudaCall) func(expr) func(expr.nblocks) func(expr.threads_per_block) for a in expr.args visit_preorder(func, a) end return expr end function visit_preorder(func :: Function, expr :: StarpuExprField) func(expr) func(expr.left) func(expr.field) func(expr.is_an_arrow) return expr end function visit_preorder(func :: Function, expr :: StarpuExprFor) func(expr) for d in expr.set_declarations visit_preorder(func, d) end visit_preorder(func, expr.set) visit_preorder(func, expr.body) return expr end function visit_preorder(func :: Function, expr :: StarpuExprFunction) func(expr) for a in expr.args visit_preorder(func, a) end visit_preorder(func, e.body) return expr end function visit_preorder(func :: Function, expr :: StarpuExprIf) func(expr) visit_preorder(func, expr.cond) visit_preorder(func, expr.then_statement) return expr end function visit_preorder(func :: Function, expr :: StarpuExprIfElse) func(expr) visit_preorder(func, expr.cond) visit_preorder(func, expr.then_statement) visit_preorder(func, expr.else_statement) return expr end function visit_preorder(func :: Function, expr :: StarpuExprInterval) func(expr) visit_preorder(func, expr.start) visit_preorder(func, expr.step) visit_preorder(func, expr.stop) return expr end function visit_preorder(func :: Function, expr :: StarpuExprRef) func(expr) visit_preorder(func, expr.ref) for i in expr.indexes visit_preorder(func, i) end return expr end function visit_preorder(func :: Function, expr :: StarpuExprAddress) func(expr) visit_preorder(func, expr.ref) return expr end function visit_preorder(func :: Function, expr :: StarpuExprBreak) func(expr) return expr end function visit_preorder(func :: Function, expr :: StarpuExprReturn) func(expr) visit_preorder(func, expr.value) return expr end function visit_preorder(func :: Function, expr :: StarpuExpr) func(expr) return expr end function visit_preorder(func :: Function, expr :: StarpuExprTypedExpr) func(expr) visit_preorder(func, expr.expr) return expr end function visit_preorder(func :: Function, expr :: StarpuExprWhile) func(expr) visit_preorder(func, expr.cond) visit_preorder(func, expr.body) return expr end # function substitute_preorder(expr :: StarpuExprAffect, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # var = substitute_preorder(func, expr.var) # expr = substitute_preorder(func, expr.expr) # if var != expr.var || expr != expr.expr # return StarpuExprAffect(var, expr) # end # return expr # end # function substitute_preorder(expr :: StarpuExprBlock, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # modified = false # new_exprs = Vector{StarpuExpr}() # for e in expr.exprs # push!(new_exprs, substitute_preorder(func, e)) # end # if new_exprs != expr.exprs # return StarpuExprBlock(new_exprs) # end # return expr # end # function substitute_preorder(expr :: StarpuExprCall, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # new_args = Vector{StarpuExpr}() # for a in expr.args # push!(new_args, substitute_preorder(func, a)) # end # if new_args != expr.args # return StarpuExprCall(expr.func, new_args) # end # return expr # end # function substitute_preorder(expr :: StarpuExprCudaCall, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # new_args = Vector{StarpuExpr}() # for a in expr.args # push!(new_args, substitute_preorder(func, a)) # end # if new_args != expr.args # return new StarpuExprCudaCall(expr.ker_name, expr.nblocks, expr.threads_per_block, new_args) # end # return expr # end # function substitute_preorder(expr :: StarpuExprField, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # left = substitute_preorder(expr.left, match, replace) # if left != expr.left # return StarpuExprField(left, expr.field, expr.is_an_arrow) # end # return expr # end # function substitute_preorder(expr :: StarpuExprFor, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # new_set_declarations = Vector{StarpuExpr}() # for d in expr.set_declarations # substitute_preorder(func, d) # end # substitute_preorder(expr.set, match :: StarpuExpr, replace :: StarpuExpr) # substitute_preorder(func, expr.body) # return expr # end # function substitute_preorder(expr :: StarpuExprFunction, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # for a in expr.args # substitute_preorder(func, a) # end # substitute_preorder(e.body, match :: StarpuExpr, replace :: StarpuExpr) # return expr # end # function substitute_preorder(expr :: StarpuExprIf, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # substitute_preorder(func, expr.cond) # substitute_preorder(func, expr.then_statement) # return expr # end # function substitute_preorder(expr :: StarpuExprIfElse, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # substitute_preorder(func, expr.cond) # substitute_preorder(func, expr.then_statement) # substitute_preorder(func, expr.else_statement) # return expr # end # function substitute_preorder(expr :: StarpuExprInterval, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # substitute_preorder(func, expr.start) # substitute_preorder(func, expr.step) # substitute_preorder(func, expr.stop) # return expr # end # function substitute_preorder(expr :: StarpuExprRef, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # substitute_preorder(func, expr.ref) # for i in expr.indexes # substitute_preorder(func, i) # end # return expr # end # function substitute_preorder(expr :: StarpuExprAddress, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # substitute_preorder(func, expr.ref) # return expr # end # function substitute_preorder(expr :: StarpuExprBreak, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # return expr # end # function substitute_preorder(expr :: StarpuExprReturn, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # substitute_preorder(func, expr.value) # return expr # end # function substitute_preorder(expr :: StarpuExpr, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # return expr # end # function substitute_preorder(expr :: StarpuExprTypedExpr, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # substitute_preorder(func, expr.expr) # return expr # end # function substitute_preorder(expr :: StarpuExprWhile, match :: StarpuExpr, replace :: StarpuExpr) # if expr == match # return replace # end # substitute_preorder(func, expr.cond) # substitute_preorder(func, expr.body) # return expr # end starpu-1.4.9+dfsg/julia/src/compiler/expressions.jl000066400000000000000000000532401507764646700224320ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # #====================================================== AFFECTATION ======================================================# abstract type StarpuExpr end abstract type StarpuExprTyped <: StarpuExpr end struct StarpuExprTypedVar <: StarpuExprTyped name :: Symbol typ :: Type end struct StarpuExprTypedExpr <: StarpuExprTyped # TODO : remove typed expression ? expr :: StarpuExpr typ :: Type end struct StarpuExprAffect <: StarpuExpr var :: StarpuExpr expr :: StarpuExpr end struct StarpuExprBlock <: StarpuExpr exprs :: Vector{StarpuExpr} end struct StarpuExprCall <: StarpuExpr func :: Symbol args :: Vector{StarpuExpr} end struct StarpuExprCudaCall <: StarpuExpr ker_name :: Symbol nblocks :: StarpuExpr threads_per_block :: StarpuExpr args :: Vector{StarpuExpr} end struct StarpuExprField <: StarpuExpr left :: StarpuExpr field :: Symbol is_an_arrow :: Bool end struct StarpuExprInterval <: StarpuExpr start :: StarpuExpr step :: StarpuExpr stop :: StarpuExpr id :: String function StarpuExprInterval(start :: StarpuExpr, step :: StarpuExpr, stop :: StarpuExpr ; id :: String = rand_string()) return new(start, step, stop, id) end end struct StarpuExprFor <: StarpuExpr iter :: Symbol set:: StarpuExprInterval body :: StarpuExpr is_independant :: Bool set_declarations :: Vector{StarpuExpr} end struct StarpuExprFunction <: StarpuExpr ret_type :: Type func :: Symbol args :: Vector{StarpuExprTypedVar} body :: StarpuExpr end struct StarpuExprIf <: StarpuExpr cond :: StarpuExpr then_statement :: StarpuExpr end struct StarpuExprIfElse <: StarpuExpr cond :: StarpuExpr then_statement :: StarpuExpr else_statement :: StarpuExpr end struct StarpuExprRef <: StarpuExpr ref :: StarpuExpr indexes :: Vector{StarpuExpr} end struct StarpuExprReturn <: StarpuExpr value :: StarpuExpr end struct StarpuExprBreak <: StarpuExpr end struct StarpuExprVar <: StarpuExpr name :: Symbol end struct StarpuExprInvalid <: StarpuExpr end struct StarpuExprValue <: StarpuExpr value :: Any end struct StarpuExprWhile <: StarpuExpr cond :: StarpuExpr body :: StarpuExpr end struct StarpuExprAddress <: StarpuExpr ref :: StarpuExpr end function starpu_parse_affect(x :: Expr) if (x.head != :(=)) error("Invalid \"affectation\" expression") end var = starpu_parse(x.args[1]) expr = starpu_parse(x.args[2]) return StarpuExprAffect(var, expr) end function equals(x :: StarpuExprAffect, y :: StarpuExpr) if typeof(y) != StarpuExprAffect return false end return equals(x.var, y.var) && equals(x.expr, y.expr) end function print(io :: IO, x :: StarpuExprAffect ; indent = 0, restrict = false) print(io, x.var, indent = indent) print(io, " = ") need_to_transtyp = isa(x.var, StarpuExprTypedVar) # transtyping to avoid warning (or errors for cuda) during compilation time if need_to_transtyp print(io, "(", starpu_type_traduction(x.var.typ), ") (") end print(io, x.expr, indent = indent) if need_to_transtyp print(io, ")") end end function apply(func :: Function, expr :: StarpuExprAffect) var = apply(func, expr.var) new_expr = apply(func, expr.expr) return func(StarpuExprAffect(var, new_expr)) end #====================================================== BLOCK (series of instruction, not C variable scoping block) ======================================================# function is_unwanted(x :: Symbol) return false end function is_unwanted(x :: LineNumberNode) return true end function is_unwanted(x :: Expr) return false end function starpu_parse_block(x :: Expr) if (x.head != :block) error("Invalid \"block\" expression") end exprs = map(starpu_parse, filter(!is_unwanted, x.args)) return StarpuExprBlock(exprs) end function print(io :: IO, x :: StarpuExprBlock ; indent = 0, restrict=false) for i in (1 : length(x.exprs)) print(io, x.exprs[i], indent = indent) print(io, ";") if (i != length(x.exprs)) print_newline(io, indent) end end end function apply(func :: Function, expr :: StarpuExprBlock) return func(StarpuExprBlock(map((x -> apply(func, x)), expr.exprs))) end #====================================================== FUNCTION CALL ======================================================# function starpu_parse_call(x :: Expr) if (x.head != :call) error("Invalid \"call\" expression") end func = starpu_parse(x.args[1]) if (x.args[1] == Symbol(":")) return starpu_parse_interval(x) end if (!isa(func, StarpuExprVar)) error("Invalid \"call\" expression : function must be a variable") end args = map(starpu_parse, x.args[2:end]) return StarpuExprCall(func.name, args) end starpu_infix_operators = (:(+), :(*), :(-), :(/), :(<), :(>), :(<=), :(>=), :(!=), :(%)) function print_prefix(io :: IO, x :: StarpuExprCall ; indent = 0, restrict=false) print(io, x.func, "(") for i in (1 : length(x.args)) if (i != 1) print(io, ", ") end print(io, x.args[i], indent = indent) end print(io, ")") end function print_infix(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false) for i in (1 : length(x.args)) if (i != 1) print(io, " ", x.func, " ") end print(io, "(") print(io, x.args[i], indent = indent) print(io, ")") end end function print(io :: IO, x :: StarpuExprCall ; indent = 0,restrict=false) if (length(x.args) >= 2 && x.func in starpu_infix_operators) print_infix(io, x, indent = indent) else print_prefix(io, x, indent = indent) end end function apply(func :: Function, expr :: StarpuExprCall) return func(StarpuExprCall(expr.func, map((x -> apply(func, x)), expr.args))) end #====================================================== CUDA KERNEL CALL ======================================================# function print(io :: IO, expr :: StarpuExprCudaCall ; indent = 0,restrict=false) print_newline(io, indent) print(io, expr.ker_name) print_newline(io, indent + starpu_indent_size) print(io, "<<< ") print(io, expr.nblocks, indent = indent + 2 * starpu_indent_size) print(io, ", ") print(io, expr.threads_per_block, indent = indent + 2 * starpu_indent_size) print(io, ", 0, starpu_cuda_get_local_stream()") print_newline(io, indent + starpu_indent_size) print(io, ">>> (") for i in (1 : length(expr.args)) if (i != 1) print(io, ", ") if (i % 4 == 1) print_newline(io, indent + 2 * starpu_indent_size + 1) end end print(io, expr.args[i], indent = indent + 2 * starpu_indent_size) end print(io, ");") print_newline(io, indent) print(io, "cudaError_t status = cudaGetLastError();") print_newline(io, indent) print(io, "if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status);") print_newline(io, indent) end function apply(func :: Function, expr :: StarpuExprCudaCall) nblocks = func(expr.nblocks) threads_per_block = func(expr.threads_per_block) args = map((x -> apply(func, x)), expr.args) return StarpuExprCudaCall(expr.ker_name, nblocks, threads_per_block, args) end #====================================================== STRUCTURE FIELDS ======================================================# function starpu_parse_field(x :: Expr) if x.head != :(.) || length(x.args) != 2 error("Invalid parsing of dot expression") end left = starpu_parse(x.args[1]) if (!isa(x.args[2], QuoteNode) || !isa(x.args[2].value, Symbol)) error("Invalid parsing of dot expression") end return StarpuExprField(left, x.args[2].value, false) end function print(io :: IO, x :: StarpuExprField ; indent = 0,restrict=false) print(io, "(") print(io, x.left, indent = indent) print(io, ")", x.is_an_arrow ? "->" : '.', x.field) end function apply(func :: Function, expr :: StarpuExprField) return func(StarpuExprField(func(expr.left), expr.field, expr.is_an_arrow)) end #====================================================== FOR LOOPS ======================================================# function starpu_parse_for(x :: Expr; is_independant = false) if (x.head != :for) error("Invalid \"for\" expression") end affect = x.args[1] if (affect.head != :(=)) error("Invalid \"for\" iterator affectation") end iter = starpu_parse(affect.args[1]) if (!isa(iter, StarpuExprVar)) error("Invalid \"for\" iterator") end set = starpu_parse(affect.args[2]) if (!isa(set, StarpuExprInterval)) error("Set of values in \"for\" loop must be an interval") end body = starpu_parse(x.args[2]) return StarpuExprFor(iter.name, set, body, is_independant, StarpuExpr[]) end function print(io :: IO, x :: StarpuExprFor ; indent = 0,restrict=false) print_newline(io, indent) print(io, "{") indent += starpu_indent_size print_newline(io, indent) print(io, StarpuExprBlock(x.set_declarations), indent = indent) id = x.set.id start = "start_" * id stop = "stop_" * id step = "step_" * id dim = "dim_" * id iter = "iter_" * id print_newline(io, indent, 2) if isa(x.set.step, StarpuExprValue) print(io, "for ($(x.iter) = $start ; ") comparison_op = (x.set.step.value >= 0) ? "<=" : ">=" print(io, "$(x.iter) $comparison_op $stop ; ") print(io, "$(x.iter) += $(x.set.step.value))") else print(io, "for ($iter = 0, $(x.iter) = $start ; ") print(io, "$iter < $dim ; ") print(io, "$iter += 1, $(x.iter) += $step)") end print_newline(io, indent) print(io, "{") indent += starpu_indent_size print_newline(io, indent) print(io, x.body, indent = indent) indent -= starpu_indent_size print_newline(io, indent) print(io, "}") indent -= starpu_indent_size print_newline(io, indent) print(io, "}") print_newline(io, indent) end function apply(func :: Function, expr :: StarpuExprFor) set_declarations = map( (x -> apply(func, x)), expr.set_declarations) set = apply(func, expr.set) body = apply(func, expr.body) return func(StarpuExprFor(expr.iter, set, body, expr.is_independant, set_declarations)) end #====================================================== FUNCTION DECLARATION ======================================================# function starpu_parse_function(x :: Expr) if (x.head != :function) error("Invalid \"function\" expression") end typed_decl = starpu_parse(x.args[1]) if (!isa(typed_decl, StarpuExprTypedExpr)) error("Invalid \"function\" prototype : a return type must me explicited") end prototype = typed_decl.expr if (!isa(prototype, StarpuExprCall)) error("Invalid \"function\" prototype") end arg_list = StarpuExprTypedVar[] for type_arg in prototype.args if (!isa(type_arg, StarpuExprTypedVar)) error("Invalid \"function\" argument list") end push!(arg_list, type_arg) end body = starpu_parse(x.args[2]) return StarpuExprFunction(typed_decl.typ, prototype.func, arg_list, body) end function print(io :: IO, x :: StarpuExprFunction ; indent = 0,restrict=false) print(io, starpu_type_traduction(x.ret_type), " ") print(io, x.func, '(') for i in (1 : length(x.args)) if (i != 1) print(io, ", ") if (i % 4 == 1) print_newline(io, indent + starpu_indent_size + length(String(x.func)) + 13) end end print(io, x.args[i], indent = indent + starpu_indent_size, restrict = true) end print(io, ")") print_newline(io, indent) print(io, "{") print_newline(io, indent + starpu_indent_size) print(io, x.body, indent = indent + starpu_indent_size) print_newline(io, indent) print(io, "}\n\n") print_newline(io, indent) end function apply(func :: Function, expr :: StarpuExprFunction) args = map((x -> apply(func, x)), expr.args) body = apply(func, expr.body) return func(StarpuExprFunction(expr.ret_type, expr.func, args, body)) end #====================================================== IF STATEMENT ======================================================# function starpu_parse_if(x :: Expr) if (x.head != :if) error("Invalid \"if\" expression") end len = length(x.args) if (len < 2) error("Invalid \"if\" statement") end cond = starpu_parse(x.args[1]) then_statement = starpu_parse(x.args[2]) if (len == 2) return StarpuExprIf(cond, then_statement) end else_statement = starpu_parse(x.args[3]) return StarpuExprIfElse(cond, then_statement, else_statement) end function print(io :: IO, x :: Union{StarpuExprIf, StarpuExprIfElse}; indent = 0,restrict=false) print_newline(io, indent) print(io, "if (") print(io, x.cond, indent = indent + starpu_indent_size) print(io, ")") print_newline(io, indent) print(io, "{") print_newline(io, indent + starpu_indent_size) print(io, x.then_statement, indent = indent + starpu_indent_size) print_newline(io, indent) print(io, "}") if (!isa(x, StarpuExprIfElse)) return end print(io, " else") print_newline(io, indent) print(io, "{") print_newline(io, indent + starpu_indent_size) print(io, x.else_statement, indent = indent + starpu_indent_size) print_newline(io, indent) print(io, "}") print_newline(io, indent) end function apply(func :: Function, expr :: StarpuExprIf) cond = apply(func, expr.cond) then_statement = apply(func, expr.then_statement) return func(StarpuExprIf(cond, then_statement)) end function apply(func :: Function, expr :: StarpuExprIfElse) cond = apply(func, expr.cond) then_statement = apply(func, expr.then_statement) else_statement = apply(func, expr.else_statement) return func(StarpuExprIfElse(cond, then_statement, else_statement)) end #====================================================== INTERVALS ======================================================# function starpu_parse_interval(x :: Expr) if (x.head != :(call)) error("Invalid \"interval\" expression") end start = starpu_parse(x.args[2]) steop = starpu_parse(x.args[3]) if (length(x.args) == 3) return StarpuExprInterval(start, StarpuExprValue(1), steop) end stop = starpu_parse(x.args[4]) return StarpuExprInterval(start, steop, stop) end function apply(func :: Function, expr :: StarpuExprInterval) start = apply(func, expr.start) step = apply(func, expr.step) stop = apply(func, expr.stop) return func(StarpuExprInterval(start, step, stop, id = expr.id)) end #====================================================== ARRAYS AND REFERENCES ======================================================# function starpu_parse_ref(x :: Expr) if (x.head != :ref) error("Invalid \"reference\" expression") end ref = starpu_parse(x.args[1]) indexes = map(starpu_parse, x.args[2:end]) #= StarpuExpr[] for i in (2 : length(x.args)) push!(indexes, starpu_parse(x.args[i])) end=# return StarpuExprRef(ref, indexes) end function equals(x :: StarpuExprRef, y :: StarpuExpr) if typeof(y) != StarpuExprRef return false end if !equals(x.ref, y.ref) || length(x.indexes) != length(y.indexes) return false end return all(map(equals, x.indexes, y.indexes)) end function print(io :: IO, x :: StarpuExprRef ; indent = 0,restrict=false) print(io, x.ref, indent = indent) for i in (1 : length(x.indexes)) print(io, "[") print(io, x.indexes[i], indent = indent) print(io, "]") end end function apply(func :: Function, expr :: StarpuExprRef) ref = apply(func, expr.ref) indexes = map((x -> apply(func, x)), expr.indexes) return func(StarpuExprRef(ref, indexes)) end function print(io :: IO, x :: StarpuExprAddress ; indent = 0, restrict=false) print(io, "&") print(io, x.ref, indent = indent) end function apply(func :: Function, expr :: StarpuExprAddress) ref = apply(func, expr.ref) return func(StarpuExprAddress(ref)) end #====================================================== BREAK EXPRESSION ======================================================# function starpu_parse_break(x :: Expr) if (x.head != :break) error("Invalid \"break\" expression") end return StarpuExprBreak() end function print(io :: IO, x :: StarpuExprBreak ; indent = 0) print(io, "break") end function apply(func :: Function, expr :: StarpuExprBreak) return func(StarpuExprBreak()) end #====================================================== RETURN EXPRESSION ======================================================# function starpu_parse_return(x :: Expr) if (x.head != :return) error("Invalid \"return\" expression") end value = starpu_parse(x.args[1]) # Remove type associated to a single, for a return # allows matching with ExprVar if (isa(value, StarpuExprTypedVar)) value = StarpuExprVar(value.name) end return StarpuExprReturn(value) end function print(io :: IO, x :: StarpuExprReturn ; indent = 0,restrict=false) print(io, "return ") print(io, x.value, indent = indent) end function apply(func :: Function, expr :: StarpuExprReturn) return func(StarpuExprReturn(apply(func, expr.value))) end function apply(func :: Function, expr :: StarpuExpr) return func(expr) end print(io :: IO, x :: StarpuExprVar ; indent = 0, restrict = false) = print(io, x.name) function print(io :: IO, x :: StarpuExprValue ; indent = 0,restrict=false) value = x.value if value == nothing return end if isa(value, AbstractString) print(io, '"', value, '"') return end if isa(value, Char) print(io, '\'', value, '\'') return end print(io, value) end print(io :: IO, x :: StarpuExprInvalid ; indent = 0) = print(io, "INVALID") function starpu_parse(raw_value :: Any) return StarpuExprValue(raw_value) end function starpu_parse(sym :: Symbol) return StarpuExprVar(sym) end #====================================================== TYPED EXPRESSION ======================================================# function starpu_parse_typed(x :: Expr) if (x.head != :(::)) error("Invalid type assigned expression") end expr = starpu_parse(x.args[1]) typ = nothing try typ = eval(x.args[2]) :: Type catch print(x.args[2]) error("Invalid type in type assigned expression") end if (isa(expr, StarpuExprVar)) return StarpuExprTypedVar(expr.name, typ) end return StarpuExprTypedExpr(expr, typ) end function starpu_type_traduction(x) if x <: Array return starpu_type_traduction(eltype(x)) * "*" end if x <: Ptr depth = 1 type = eltype(x) while type <: Ptr depth +=1 type = eltype(type) end return starpu_type_traduction(type) * "*"^depth end return starpu_type_traduction_dict[x] end function print(io :: IO, x :: StarpuExprTyped ; indent = 0,restrict=false) if (isa(x, StarpuExprTypedVar)) print(io,starpu_type_traduction(x.typ), " ") #if (restrict) # print(io,"restrict "); #end print(io, x.name) else print(io, x.expr, indent = indent) end end function apply(func :: Function, expr :: StarpuExprTypedExpr) new_expr = apply(func, expr.expr) return func(StarpuExprTypedExpr(new_expr, expr.typ)) end #====================================================== While loop ======================================================# function starpu_parse_while(x :: Expr) if (x.head != :while) error("Invalid \"while\" loop") end len = length(x.args) if (len < 2) error("Invalid \"while\" loop") end cond = starpu_parse(x.args[1]) body = starpu_parse(x.args[2]) return StarpuExprWhile(cond, body) end function print(io :: IO, x :: StarpuExprWhile ; indent = 0) print_newline(io, indent) print(io, "while (") print(io, x.cond, indent = indent + starpu_indent_size) print(io, ")") print_newline(io, indent) print(io, "{") print_newline(io, indent + starpu_indent_size) print(io, x.body, indent = indent + starpu_indent_size) print_newline(io, indent) print(io, "}") print_newline(io, indent) end function apply(func :: Function, expr :: StarpuExprWhile) cond = apply(func, expr.cond) body = apply(func, expr.body) return func(StarpuExprWhile(cond, body)) end starpu-1.4.9+dfsg/julia/src/compiler/file_generation.jl000066400000000000000000000120621507764646700231770ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # const cpu_kernel_file_start = "#include #include #include #include #include \"blas.h\" static inline long long jlstarpu_max(long long a, long long b) { return (a > b) ? a : b; } static inline long long jlstarpu_interval_size(long long start, long long step, long long stop) { if (stop >= start){ return jlstarpu_max(0, (stop - start + 1) / step); } else { return jlstarpu_max(0, (stop - start - 1) / step); } } " const cuda_kernel_file_start = "#include #include #include #include #include #define THREADS_PER_BLOCK 64 __attribute__((unused)) static inline long long jlstarpu_max(long long a, long long b) { return (a > b) ? a : b; } __attribute__((unused)) static inline long long jlstarpu_interval_size(long long start, long long step, long long stop) { if (stop >= start){ return jlstarpu_max(0, (stop - start + 1) / step); } else { return jlstarpu_max(0, (stop - start - 1) / step); } } __attribute__((unused)) __device__ static inline long long jlstarpu_max__device(long long a, long long b) { return (a > b) ? a : b; } __attribute__((unused)) __device__ static inline long long jlstarpu_interval_size__device(long long start, long long step, long long stop) { if (stop >= start){ return jlstarpu_max__device(0, (stop - start + 1) / step); } else { return jlstarpu_max__device(0, (stop - start - 1) / step); } } " """ Opens a new Cuda source file, where generated GPU kernels will be written """ function starpu_new_cuda_kernel_file(file_name :: String) global generated_cuda_kernel_file_name = file_name kernel_file = open(file_name, "w") print(kernel_file, cuda_kernel_file_start) close(kernel_file) return nothing end export target macro target(x) targets = eval(x) return quote starpu_target=$targets global starpu_target end end """ Executes @cuda_kernel and @cpu_kernel """ macro codelet(x) parsed = starpu_parse(x) name=string(x.args[1].args[1].args[1]); cpu_name = name cuda_name = "CUDA_"*name dump(name) parse_scalar_parameters(parsed, name) c_struct_param_decl = generate_c_struct_param_declaration(name) cpu_expr = transform_to_cpu_kernel(parsed) generated_cpu_kernel_file_name=string("genc_",string(x.args[1].args[1].args[1]),".c") generated_cuda_kernel_file_name=string("gencuda_",string(x.args[1].args[1].args[1]),".cu") if (starpu_target & STARPU_CPU != 0) kernel_file = open(generated_cpu_kernel_file_name, "w") debug_print("generating ", generated_cpu_kernel_file_name) print(kernel_file, cpu_kernel_file_start) print(kernel_file, c_struct_param_decl) print(kernel_file, cpu_expr) close(kernel_file) CPU_CODELETS[name]=cpu_name end if (starpu_target & STARPU_CUDA!=0) && STARPU_USE_CUDA == 1 kernel_file = open(generated_cuda_kernel_file_name, "w") debug_print("generating ", generated_cuda_kernel_file_name) print(kernel_file, cuda_kernel_file_start) prekernel, kernel = transform_to_cuda_kernel(parsed) if kernel != nothing print(kernel_file, "__global__ ", kernel) end print(kernel_file, c_struct_param_decl) print(kernel_file, "\nextern \"C\" ", prekernel) close(kernel_file) CUDA_CODELETS[name]=cuda_name end end function parse_scalar_parameters(expr :: StarpuExprFunction, codelet_name) scalar_parameters = [] for i in (1 : length(expr.args)) type = expr.args[i].typ if (type <: Number || type <: AbstractChar) push!(scalar_parameters, (expr.args[i].name, type)) end end CODELETS_SCALARS[codelet_name] = scalar_parameters # declare structure carrying scalar parameters struct_params_name = Symbol("params_", rand_string()) structure_decl_str = "mutable struct " * "$struct_params_name\n" for p in scalar_parameters structure_decl_str *= "$(p[1])::$(p[2])\n" end structure_decl_str *= "end" eval(Meta.parse(structure_decl_str)) # add structure type to dictionnary add_to_dict_str = "starpu_type_traduction_dict[$struct_params_name] = \"struct $struct_params_name\"" eval(Meta.parse(add_to_dict_str)) # save structure name CODELETS_PARAMS_STRUCT[codelet_name] = struct_params_name end starpu-1.4.9+dfsg/julia/src/compiler/include.jl000066400000000000000000000016601507764646700214720ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # export starpu_new_cpu_kernel_file export starpu_new_cuda_kernel_file export @codelet export @target include("utils.jl") include("expressions.jl") include("parsing.jl") include("expression_manipulation.jl") include("c.jl") include("cuda.jl") include("file_generation.jl") starpu-1.4.9+dfsg/julia/src/compiler/parsing.jl000066400000000000000000000041721507764646700215130ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # #====================================================== GLOBAL PARSING ======================================================# starpu_parse_key_word_parsing_function = Dict{Symbol, Function}() """ Translates x Expr into a new StarpuExpr object """ function starpu_parse(x :: Expr) if (x.head == :macrocall) if (x.args[1] != Symbol("@parallel")) error("Only @parallel macro, used before a for loop, is allowed ($(x.args[1]) was found)") end if (length(x.args) != 3) error("Invalid usage of @parallel macro", length(x.args)) end return starpu_parse_for(x.args[3], is_independant = true) end if !(x.head in keys(starpu_parse_key_word_parsing_function)) return StarpuExprInvalid() #TODO error ? end return starpu_parse_key_word_parsing_function[x.head](x) end for kw in (:if, :call, :for, :block, :return, :function, :while, :ref, :break) starpu_parse_key_word_parsing_function[kw] = eval(Symbol(:starpu_parse_, kw)) end starpu_parse_key_word_parsing_function[:(:)] = starpu_parse_interval starpu_parse_key_word_parsing_function[:(::)] = starpu_parse_typed starpu_parse_key_word_parsing_function[:(=)] = starpu_parse_affect starpu_parse_key_word_parsing_function[:(.)] = starpu_parse_field """ Executes the starpu_parse function on the following expression, and returns the obtained StarpuExpr """ macro parse(x) y = Expr(:quote, x) :(starpu_parse($y)) end starpu-1.4.9+dfsg/julia/src/compiler/utils.jl000066400000000000000000000024471507764646700212130ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import Base.print function print_newline(io :: IO, indent = 0, n_lines = 1) for i in (1 : n_lines) print(io, "\n") end for i in (1 : indent) print(io, " ") end end starpu_indent_size = 4 function rand_char() r = rand(UInt) % 62 if (0 <= r < 10) return '0' + r elseif (10 <= r < 36) return 'a' + (r - 10) else return 'A' + (r - 36) end end function rand_string(size = 8) output = "" for i in (1 : size) output *= string(rand_char()) end return output end function system(cmd :: String) ccall((:system, "libc"), Cint, (Cstring,), cmd) end starpu-1.4.9+dfsg/julia/src/data.jl000066400000000000000000000177031507764646700171530ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # const StarpuDataHandlePointer = Ptr{Cvoid} StarpuDataHandle = StarpuDestructible{StarpuDataHandlePointer} @enum(StarpuDataFilterFunc, STARPU_MATRIX_FILTER_VERTICAL_BLOCK = 0, STARPU_MATRIX_FILTER_BLOCK = 1, STARPU_VECTOR_FILTER_BLOCK = 2, ) export starpu_data_filter function starpu_data_filter(filter_func ::StarpuDataFilterFunc, nchildren ::Integer) output = starpu_data_filter(zero) output.nchildren = UInt32(nchildren) if filter_func == STARPU_MATRIX_FILTER_VERTICAL_BLOCK output.filter_func = Libdl.dlsym(starpu_wrapper_library_handle, "starpu_matrix_filter_vertical_block") elseif filter_func == STARPU_MATRIX_FILTER_BLOCK output.filter_func = Libdl.dlsym(starpu_wrapper_library_handle, "starpu_matrix_filter_block") else filter_func == STARPU_VECTOR_FILTER_BLOCK output.filter_func = Libdl.dlsym(starpu_wrapper_library_handle, "starpu_vector_filter_block") end return output end function starpu_memory_pin(data :: Union{Vector{T}, Matrix{T}}) where T starpu_memory_pin(data, sizeof(data))::Cint end function starpu_memory_unpin(data :: Union{Vector{T}, Matrix{T}}) where T starpu_memory_unpin(data, sizeof(data))::Cint end function StarpuNewDataHandle(ptr :: StarpuDataHandlePointer, destr :: Function...) :: StarpuDataHandle return StarpuDestructible(ptr, destr...) end function starpu_data_unregister_pointer(ptr :: StarpuDataHandlePointer) starpu_data_unregister(ptr) end function starpu_data_unregister(handles :: StarpuDataHandle...) for h in handles starpu_execute_destructor!(h, starpu_data_unregister_pointer) end end function starpu_data_register(v :: Vector{T}) where T output = Ref{Ptr{Cvoid}}(0) data_pointer = pointer(v) starpu_vector_data_register(output, STARPU_MAIN_RAM, data_pointer, length(v), sizeof(T)) return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer]) end function starpu_data_register(m :: Matrix{T}) where T output = Ref{Ptr{Cvoid}}(0) data_pointer = pointer(m) (height, width) = size(m) starpu_matrix_data_register(output, STARPU_MAIN_RAM, data_pointer, height, height, width, sizeof(T)) return StarpuNewDataHandle(output[], starpu_data_unregister_pointer)#, [starpu_data_unregister_pointer]) end function starpu_data_register(block :: Array{T,3}) where T output = Ref{Ptr{Cvoid}}(0) data_pointer = pointer(block) (height, width, depth) = size(block) starpu_block_data_register(output, STARPU_MAIN_RAM, data_pointer, height, height * width, height, width, depth, sizeof(T)) return StarpuNewDataHandle(output[], starpu_data_unregister_pointer) end function starpu_data_register(ref :: Ref{T}) where T output = Ref{Ptr{Cvoid}}(0) starpu_variable_data_register(output, STARPU_MAIN_RAM, ref, sizeof(T)) return StarpuNewDataHandle(output[], starpu_data_unregister_pointer) end function starpu_data_register(x1, x2, next_args...) handle_1 = starpu_data_register(x1) handle_2 = starpu_data_register(x2) next_handles = map(starpu_data_register, next_args) return [handle_1, handle_2, next_handles...] end import Base.getindex function Base.getindex(handle :: StarpuDataHandle, indexes...) output = starpu_data_get_sub_data(handle.object, length(indexes), map(x->x-1, indexes)...) return StarpuNewDataHandle(output) end function starpu_data_unpartition_pointer(ptr :: StarpuDataHandlePointer) starpu_data_unpartition(ptr, STARPU_MAIN_RAM) end function starpu_data_partition(handle :: StarpuDataHandle, filter :: starpu_data_filter) starpu_add_destructor!(handle, starpu_data_unpartition_pointer) starpu_data_partition(handle.object, pointer_from_objref(filter)) end function starpu_data_unpartition(handles :: StarpuDataHandle...) for h in handles starpu_execute_destructor!(h, starpu_data_unpartition_pointer) end return nothing end function starpu_data_map_filters(handle :: StarpuDataHandle, filter :: starpu_data_filter) starpu_add_destructor!(handle, starpu_data_unpartition_pointer) starpu_data_map_filters(handle.object, 1, pointer_from_objref(filter)) end function starpu_data_map_filters(handle :: StarpuDataHandle, filter_1 :: starpu_data_filter, filter_2 :: starpu_data_filter) starpu_add_destructor!(handle, starpu_data_unpartition_pointer) starpu_data_map_filters(handle.object, 2, pointer_from_objref(filter_1), pointer_from_objref(filter_2)) end function starpu_data_get_sequential_consistency_flag(handle :: StarpuDataHandle) return starpu_data_get_sequential_consistency_flag(handle.object) end function starpu_data_set_sequential_consistency_flag(handle :: StarpuDataHandle, flag :: Int) starpu_data_set_sequential_consistency_flag(handle.object, flag) end function starpu_data_acquire_on_node(handle :: StarpuDataHandle, node :: Int, mode) starpu_data_acquire_on_node(handle.object, node, mode) end function starpu_data_release_on_node(handle :: StarpuDataHandle, node :: Int) starpu_data_release_on_node(handle.object, node) end function starpu_data_wont_use(handle :: StarpuDataHandle) starpu_data_wont_use(handle.object) end function repl(x::Symbol) return x end function repl(x::Number) return x end function repl(x :: Expr) if (x.head == :call && x.args[1] == :+) if (x.args[2] == :_) return x.args[3] elseif (x.args[3] == :_) return x.args[2] else return Expr(:call,:+,repl(x.args[2]),repl(x.args[3])) end elseif (x.head == :call && x.args[1] == :-) if (x.args[2] == :_) return Expr(:call,:-,x.args[3]) elseif (x.args[3] == :_) return x.args[2] else return Expr(:call,:-,repl(x.args[2]),repl(x.args[3])) end else return Expr(:call,x.args[1],repl(x.args[2]),repl(x.args[3])) end end """ Declares a subarray. Ex : @starpu_filter ha = A[ _:_+1, : ] """ macro starpu_filter(expr) #dump(expr, maxdepth=20) if (expr.head==Symbol("=")) region = expr.args[2] if (region.head == Symbol("ref")) farray = expr.args[1] println("starpu filter") index = 0 filter2=nothing filter3=nothing if (region.args[2]==Symbol(":")) index = 3 filter2=:(STARPU_MATRIX_FILTER_BLOCK) elseif (region.args[3] == Symbol(":")) index = 2 filter3=:(STARPU_MATRIX_FILTER_VERTICAL_BLOCK) else end ex = repl(region.args[index].args[3]) if (region.args[index].args[2] != Symbol("_")) throw(AssertionError("LHS must be _")) end ret = quote # escape and not global for farray! $(esc(farray)) = starpu_data_register($(esc(region.args[1]))) starpu_data_partition( $(esc(farray)),starpu_data_filter($(esc(filter)),$(esc(ex)))) end return ret else ret = quote $(esc(farray))= starpu_data_register($(esc(region.args[1]))) end dump("coucou"); #dump(region.args[2]) # dump(region.args[2]) # dump(region.args[3]) return ret end end end starpu-1.4.9+dfsg/julia/src/destructible.jl000066400000000000000000000061111507764646700207220ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # """ Object used to store a lot of function which must be applied to and object """ mutable struct StarpuDestructible{T} object :: T destructors :: LinkedList{Function} end starpu_block_list = Vector{LinkedList{StarpuDestructible}}() """ Declares a block of code. Every declared StarpuDestructible in this code will execute its destructors on its object, once the block is exited """ macro starpu_block(expr) quote starpu_enter_new_block() local z=$(esc(expr)) starpu_exit_block() z end end function StarpuDestructible(obj :: T, destructors :: Function...) where T if (isempty(starpu_block_list)) error("Creation of a StarpuDestructible object while not beeing in a @starpu_block") end l = LinkedList{Function}() for destr in destructors add_to_tail!(l, destr) end output = StarpuDestructible{T}(obj, l) add_to_head!(starpu_block_list[end], output) return output end function starpu_enter_new_block() push!(starpu_block_list, LinkedList{StarpuDestructible}()) end function starpu_destruct!(x :: StarpuDestructible) @foreach_asc x.destructors destr begin destr.data(x.object) end empty!(x.destructors) return nothing end function starpu_exit_block() destr_list = pop!(starpu_block_list) @foreach_asc destr_list x begin starpu_destruct!(x.data) end end """ Adds new destructors to the list of function. They will be executed before already stored ones when calling starpu_destruct! """ function starpu_add_destructor!(x :: StarpuDestructible, destrs :: Function...) for d in destrs add_to_head!(x.destructors, d) end return nothing end """ Removes detsructor without executing it """ function starpu_remove_destructor!(x :: StarpuDestructible, destr :: Function) @foreach_asc x.destructors lnk begin if (lnk.data == destr) remove_link!(lnk) break end end return nothing end """ Executes "destr" function. If it was one of the stored destructors, it is removed. This function can be used to allow user to execute a specific action manually (ex : explicit call to starpu_data_unpartition() without unregistering) """ function starpu_execute_destructor!(x :: StarpuDestructible, destr :: Function) starpu_remove_destructor!(x, destr) return destr(x.object) end starpu-1.4.9+dfsg/julia/src/dynamic_compiler/000077500000000000000000000000001507764646700212215ustar00rootroot00000000000000starpu-1.4.9+dfsg/julia/src/dynamic_compiler/Makefile.am000066400000000000000000000033051507764646700232560ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk LD=$(CC_OR_NVCC) AM_CPPFLAGS = -I$(abs_top_srcdir)/include/ -I$(abs_top_builddir)/src -I$(abs_top_builddir)/include \ -I$(abs_top_srcdir)/julia/src/ $(STARPU_H_CPPFLAGS) AM_CFLAGS += -fPIC -DSTRIDE=${STRIDE} -mavx -fomit-frame-pointer -march=native -ffast-math LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ LIBS += -L $(top_builddir)/julia/src/libstarpujulia-$(STARPU_EFFECTIVE_VERSION).1.3 CUDA_CFLAGS = $(STARPU_CUDA_CPPFLAGS) -Wno-deprecated-gpu-targets EXTERNLIB=extern_tasks.so GENERATEDLIB=generated_tasks.so C_OBJECTS=$(patsubst %.c,%.o,$(wildcard gen*.c)) if STARPU_USE_CUDA CUDA_OBJECTS=$(patsubst %.cu,%.o,$(wildcard gen*.cu)) else CUDA_OBJECTS= endif %.o: %.c $(CC) -c $(AM_CPPFLAGS) $(AM_CFLAGS) $^ -o $@ %.o: %.cu $(NVCC) -dc $(AM_CPPFLAGS) $(CUDA_CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@ $(LDFLAGS) ${EXTERNLIB}: $(SOURCES_CPU) $(CC) $(AM_CPPFLAGS) $(AM_CFLAGS) -shared -fPIC $(LDFLAGS) $^ -o $@ ${GENERATEDLIB}: $(C_OBJECTS) $(CUDA_OBJECTS) $(LD) -shared $^ -o $@ $(LDFLAGS) starpu-1.4.9+dfsg/julia/src/dynamic_compiler/Makefile.in000066400000000000000000000613301507764646700232710ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = julia/src/dynamic_compiler ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = $(CC_OR_NVCC) LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ -L \ $(top_builddir)/julia/src/libstarpujulia-$(STARPU_EFFECTIVE_VERSION).1.3 LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) -fPIC -DSTRIDE=${STRIDE} -mavx \ -fomit-frame-pointer -march=native -ffast-math AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) AM_CPPFLAGS = -I$(abs_top_srcdir)/include/ -I$(abs_top_builddir)/src -I$(abs_top_builddir)/include \ -I$(abs_top_srcdir)/julia/src/ $(STARPU_H_CPPFLAGS) CUDA_CFLAGS = $(STARPU_CUDA_CPPFLAGS) -Wno-deprecated-gpu-targets EXTERNLIB = extern_tasks.so GENERATEDLIB = generated_tasks.so C_OBJECTS = $(patsubst %.c,%.o,$(wildcard gen*.c)) @STARPU_USE_CUDA_FALSE@CUDA_OBJECTS = @STARPU_USE_CUDA_TRUE@CUDA_OBJECTS = $(patsubst %.cu,%.o,$(wildcard gen*.cu)) all: all-am .SUFFIXES: .SUFFIXES: .cu .cubin .hip .o $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign julia/src/dynamic_compiler/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign julia/src/dynamic_compiler/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ cscopelist-am ctags-am distclean distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags-am uninstall uninstall-am .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null %.o: %.c $(CC) -c $(AM_CPPFLAGS) $(AM_CFLAGS) $^ -o $@ %.o: %.cu $(NVCC) -dc $(AM_CPPFLAGS) $(CUDA_CFLAGS) $^ --shared --compiler-options '-fPIC' -o $@ $(LDFLAGS) ${EXTERNLIB}: $(SOURCES_CPU) $(CC) $(AM_CPPFLAGS) $(AM_CFLAGS) -shared -fPIC $(LDFLAGS) $^ -o $@ ${GENERATEDLIB}: $(C_OBJECTS) $(CUDA_OBJECTS) $(LD) -shared $^ -o $@ $(LDFLAGS) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/julia/src/globals.jl000066400000000000000000000027071507764646700176630ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # global starpu_wrapper_library_handle = C_NULL global starpu_tasks_library_handle = C_NULL global starpu_target=STARPU_CPU global generated_cuda_kernel_file_name = "PRINT TO STDOUT" global generated_cpu_kernel_file_name = "PRINT TO STDOUT" global CPU_CODELETS=Dict{String,String}() global CUDA_CODELETS=Dict{String,String}() global CODELETS_SCALARS=Dict{String,Any}() global CODELETS_PARAMS_STRUCT=Dict{String,Any}() global starpu_type_traduction_dict = Dict( Int32 => "int32_t", UInt32 => "uint32_t", Float32 => "float", Int64 => "int64_t", UInt64 => "uint64_t", Float64 => "double", Nothing => "void" ) export starpu_type_traduction_dict global mutex = Threads.SpinLock() # detect CUDA support try STARPU_USE_CUDA == 1 catch global const STARPU_USE_CUDA = 0 end starpu-1.4.9+dfsg/julia/src/init.jl000066400000000000000000000050521507764646700171770ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # """ Must be called before any other starpu function. Field extern_task_path is the shared library path which will be used to find StarpuCodelet cpu and gpu function names """ function starpu_init() debug_print("starpu_init") if (get(ENV,"JULIA_TASK_LIB",0)!=0) global starpu_tasks_library_handle= Libdl.dlopen(ENV["JULIA_TASK_LIB"]) debug_print("Loading external codelet library") ff = Libdl.dlsym(starpu_tasks_library_handle,:starpu_find_function) dump(ff) for k in keys(CPU_CODELETS) CPU_CODELETS[k]=unsafe_string(ccall(ff,Cstring, (Cstring,Cstring),Cstring_from_String(string(k)),Cstring_from_String("cpu"))) if STARPU_USE_CUDA == 1 CUDA_CODELETS[k]=unsafe_string(ccall(ff,Cstring, (Cstring,Cstring),Cstring_from_String(string(k)),Cstring_from_String("gpu"))) end print(k,">>>>",CPU_CODELETS[k],"\n") end else srcdir=get(ENV,"STARPU_JULIA_BUILD",0) if (srcdir == 0) error("Must define environment variable STARPU_JULIA_BUILD") end makefile=string(srcdir, "/src/dynamic_compiler/Makefile") debug_print("generating codelet library with ") debug_print(makefile) run(`make -f $makefile generated_tasks.so`) global starpu_tasks_library_handle=Libdl.dlopen("generated_tasks.so") end global starpu_wrapper_library_handle= Libdl.dlopen(starpu_wrapper_library_name) output = starpu_init(C_NULL) global task_pool = ThreadPools.QueuePool(2) starpu_enter_new_block() return output end """ Must be called at the end of the program """ function starpu_shutdown() debug_print("starpu_shutdown") starpu_exit_block() @starpucall starpu_shutdown Cvoid () lock(mutex) empty!(perfmodel_list) empty!(codelet_list) empty!(task_list) unlock(mutex) return nothing end starpu-1.4.9+dfsg/julia/src/linked_list.jl000066400000000000000000000143301507764646700205340ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # export Link mutable struct Link{T} data :: T previous :: Union{Nothing, Link{T}} next :: Union{Nothing, Link{T}} list function Link{T}(x :: T, l) where {T} output = new() output.data = x output.previous = Nothing() output.next = Nothing() output.list = l return output end end export LinkedList mutable struct LinkedList{T} nelement :: Int64 first :: Union{Nothing, Link{T}} last :: Union{Nothing, Link{T}} function LinkedList{T}() where {T} output = new() output.nelement = 0 output.first = Nothing() output.last = Nothing() return output end end export add_to_head! function add_to_head!(l :: LinkedList{T}, el :: T) where {T} new_first = Link{T}(el, l) old_first = l.first l.first = new_first new_first.next = old_first if (isnothing(old_first)) l.last = new_first else old_first.previous = new_first end l.nelement += 1 return new_first end export add_to_tail! function add_to_tail!(l :: LinkedList{T}, el :: T) where {T} new_last = Link{T}(el, l) old_last = l.last l.last = new_last new_last.previous = old_last if (isnothing(old_last)) l.first = new_last else old_last.next = new_last end l.nelement += 1 return new_last end function LinkedList(v :: Union{Array{T,N}, NTuple{N,T}}) where {N,T} output = LinkedList{T}() for x in v add_to_tail!(output, x) end return output end export remove_link! function remove_link!(lnk :: Link{T}) where {T} if (lnk.list == nothing) return lnk.data end l = lnk.list next = lnk.next previous = lnk.previous if (isnothing(next)) l.last = previous else next.previous = previous end if (isnothing(previous)) l.first = next else previous.next = next end l.nelement -= 1 lnk.list = nothing return lnk.data end export is_linked function is_linked(lnk :: Link) return (lnk.list != nothing) end export foreach_asc macro foreach_asc(list, lnk_iterator, expression) quote $(esc(lnk_iterator)) = $(esc(list)).first while (!isnothing($(esc(lnk_iterator)))) __next_lnk_iterator = $(esc(lnk_iterator)).next $(esc(expression)) $(esc(lnk_iterator)) = __next_lnk_iterator end end end export foreach_desc macro foreach_desc(list, lnk_iterator, expression) quote $(esc(lnk_iterator)) = $(esc(list)).last while (!isnothing($(esc(lnk_iterator)))) __next_lnk_iterator = $(esc(lnk_iterator)).previous $(esc(expression)) $(esc(lnk_iterator)) = __next_lnk_iterator end end end function Base.show(io :: IO, lnk :: Link{T}) where {T} print(io, "Link{$T}{data: ") print(io, lnk.data) print(io, " ; previous: ") if (isnothing(lnk.previous)) print(io, "NONE") else print(io, lnk.previous.data) end print(io, " ; next: ") if (isnothing(lnk.next)) print(io, "NONE") else print(io, lnk.next.data) end print(io, "}") end function Base.show(io :: IO, l :: LinkedList{T}) where {T} print(io, "LinkedList{$T}{") @foreach_asc l lnk begin if (!isnothing(lnk.previous)) print(io, ", ") end print(io, lnk.data) end print(io, "}") end #import Base.start function start(l :: LinkedList) return nothing end #import Base.done function done(l :: LinkedList, state) if (state == nothing) return isnothing(l.first) end return isnothing(state.next) end #import Base.next function next(l :: LinkedList, state) if (state == nothing) next_link = l.first else next_link = state.next end return (next_link.data, next_link) end #import Base.endof function endof(l :: LinkedList) return l.nelement end export index_to_link function index_to_link(l :: LinkedList, ind) if (ind > l.nelement || ind <= 0) error("Invalid index") end lnk = l.first for i in (1:(ind - 1)) lnk = lnk.next end return lnk end import Base.getindex function getindex(l :: LinkedList, ind) return index_to_link(l,ind).data end import Base.setindex! function setindex!(l :: LinkedList{T}, ind, value :: T) where T lnk = index_to_link(l,ind) lnk.data = value end import Base.eltype function eltype(l :: LinkedList{T}) where T return T end import Base.isempty function isempty(l :: LinkedList) return (l.nelement == 0) end import Base.empty! function empty!(l :: LinkedList) @foreach_asc l lnk remove_link!(lnk) end import Base.length function length(l :: LinkedList) return l.nelement end starpu-1.4.9+dfsg/julia/src/perfmodel.jl000066400000000000000000000021061507764646700202060ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # perfmodel_list = Vector{starpu_perfmodel}() function starpu_perfmodel(; perf_type::starpu_perfmodel_type, symbol::String) output = starpu_perfmodel(zero) output.type = perf_type output.symbol = Cstring_from_String(symbol) # Performance models must not be garbage collected before starpu_shutdown # is called. lock(mutex) push!(perfmodel_list, output) unlock(mutex) return output end starpu-1.4.9+dfsg/julia/src/task.jl000066400000000000000000000323511507764646700172000ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using ThreadPools mutable struct jl_starpu_codelet c_codelet :: starpu_codelet perfmodel :: starpu_perfmodel cpu_func :: Union{String, STARPU_BLAS} cuda_func :: Union{String, STARPU_BLAS} opencl_func :: String modes end global codelet_list = Vector{jl_starpu_codelet}() function starpu_codelet(; cpu_func :: Union{String, STARPU_BLAS, Cvoid} = "", cuda_func :: Union{String, STARPU_BLAS, Cvoid} = "", opencl_func :: String = "", modes = [], perfmodel :: starpu_perfmodel, where_to_execute :: Union{Cvoid, UInt32} = nothing, color :: UInt32 = 0x00000000 ) if (length(modes) > STARPU_NMAXBUFS) error("Codelet has too much buffers ($(length(modes)) but only $STARPU_NMAXBUFS are allowed)") end if (where_to_execute == nothing) real_where = ((cpu_func != nothing) * STARPU_CPU) | ((cuda_func != nothing) * STARPU_CUDA) else real_where = where_to_execute end output = jl_starpu_codelet(starpu_codelet(zero), perfmodel, cpu_func, cuda_func, opencl_func, modes) ## TODO: starpu_codelet_init output.c_codelet.where = real_where for i in 1:length(modes) output.c_codelet.modes[i] = modes[i] end output.c_codelet.nbuffers = length(modes) output.c_codelet.model = pointer_from_objref(perfmodel) output.c_codelet.color = color if typeof(cpu_func) == STARPU_BLAS output.cpu_func = cpu_blas_codelets[cpu_func] output.c_codelet.cpu_func = load_wrapper_function_pointer(output.cpu_func) else output.c_codelet.cpu_func = load_starpu_function_pointer(get(CPU_CODELETS, cpu_func, "")) end if typeof(cuda_func) == STARPU_BLAS output.cuda_func = cuda_blas_codelets[cuda_func] output.c_codelet.cuda_func = load_wrapper_function_pointer(output.cuda_func) output.c_codelet.cuda_flags[1] = STARPU_CUDA_ASYNC else output.c_codelet.cuda_func = load_starpu_function_pointer(get(CUDA_CODELETS, cuda_func, "")) end output.c_codelet.opencl_func = load_starpu_function_pointer("") # Codelets must not be garbage collected before starpu shutdown is called. lock(mutex) push!(codelet_list, output) unlock(mutex) return output end mutable struct jl_starpu_task cl :: jl_starpu_codelet handles :: Vector{StarpuDataHandle} handle_pointers :: Vector{StarpuDataHandlePointer} synchronous :: Bool cl_arg # type depends on codelet callback_signal :: Vector{Cint} callback_function :: Union{Cvoid, Function} callback_arg c_task :: starpu_task end task_list = Vector{jl_starpu_task}() """ starpu_task(; cl :: jl_starpu_codelet, handles :: Vector{StarpuDataHandle}, cl_arg :: Ref) Creates a new task which will run the specified codelet on handle buffers and cl_args data """ function starpu_task(; cl :: Union{Cvoid, jl_starpu_codelet} = nothing, handles :: Vector{StarpuDataHandle} = StarpuDataHandle[], cl_arg = (), callback :: Union{Cvoid, Function} = nothing, callback_arg = nothing, tag :: Union{Cvoid, starpu_tag_t} = nothing, tag_only :: Union{Cvoid, starpu_tag_t} = nothing, sequential_consistency = true, detach = 1, color :: Union{Cvoid, UInt32} = nothing, where :: Union{Cvoid, Int32} = nothing) if (cl == nothing) error("\"cl\" field can't be empty when creating a StarpuTask") end output = jl_starpu_task(cl, handles, map((x -> x.object), handles), false, nothing, Vector{Cint}(undef, 1), callback, callback_arg, starpu_task(zero)) # handle scalar_parameters codelet_name = "" if isa(cl.cpu_func, String) && cl.cpu_func != "" codelet = cl.cpu_func elseif isa(cl.gpu_func, String) && cl.gpu_func != "" codelet = cl.gpu_func end scalar_parameters = get(CODELETS_SCALARS, codelet_name, nothing) if scalar_parameters != nothing nb_scalar_required = length(scalar_parameters) nb_scalar_provided = tuple_len(cl_arg) if (nb_scalar_provided != nb_scalar_required) error("$nb_scalar_provided scalar parameters provided but $nb_scalar_required are required by $codelet_name.") end output.cl_arg = create_param_struct_from_clarg(codelet_name, cl_arg) else output.cl_arg = cl_arg end starpu_task_init(Ref(output.c_task)) output.c_task.cl = pointer_from_objref(cl.c_codelet) output.c_task.synchronous = false output.c_task.sequential_consistency = sequential_consistency output.c_task.detach = detach ## TODO: check num handles equals num codelet buffers for i in 1:length(handles) output.c_task.handles[i] = output.handle_pointers[i] end if tuple_len(cl_arg) > 0 output.c_task.cl_arg = Base.unsafe_convert(Ptr{Cvoid}, Ref(output.cl_arg)) output.c_task.cl_arg_size = sizeof(output.cl_arg) end # callback if output.callback_function != nothing output.callback_signal[1] = 0 output.c_task.callback_arg = Base.unsafe_convert(Ptr{Cvoid}, output.callback_signal) output.c_task.callback_func = load_wrapper_function_pointer("julia_callback_func") end if tag != nothing output.c_task.tag_id = tag output.c_task.use_tag = 1 end if tag_only != nothing output.c_task.tag_id = tag_only end if color != nothing output.c_task.color = color end if where != nothing output.c_task.where = where end # Tasks must not be garbage collected before starpu_task_wait_for_all is called. # This is necessary in particular for tasks created inside callback functions. lock(mutex) push!(task_list, output) unlock(mutex) return output end function create_param_struct_from_clarg(codelet_name, cl_arg) struct_params_name = CODELETS_PARAMS_STRUCT[codelet_name] if struct_params_name == false error("structure name not found in CODELET_PARAMS_STRUCT") end nb_scalar_provided = length(cl_arg) create_struct_param_str = "output = $struct_params_name(" for i in 1:nb_scalar_provided-1 arg = cl_arg[i] create_struct_param_str *= "$arg, " end if (nb_scalar_provided > 0) arg = cl_arg[nb_scalar_provided] create_struct_param_str *= "$arg" end create_struct_param_str *= ")" eval(Meta.parse(create_struct_param_str)) return output end """ Launches task execution, if "synchronous" task field is set to "false", call returns immediately """ function starpu_task_submit(task :: jl_starpu_task) if (length(task.handles) != length(task.cl.modes)) error("Invalid number of handles for task : $(length(task.handles)) where given while codelet has $(task.cl.modes) modes") end starpu_task_submit(Ref(task.c_task)) if task.callback_function != nothing callback_arg = task.callback_arg callback_signal = task.callback_signal callback_function = task.callback_function lock(mutex) put!(task_pool) do # Active waiting loop @starpucall(julia_wait_signal, Cvoid, (Ptr{Cvoid},), Base.unsafe_convert(Ptr{Cvoid}, callback_signal)) # We've received the signal from the pthread, now execute the callback. callback_function(callback_arg) # Tell the pthread that the callback is done. callback_signal[1] = 0 end unlock(mutex) end end function starpu_modes(x :: Symbol) if (x == Symbol("STARPU_RW")) return STARPU_RW elseif (x == Symbol("STARPU_R")) return STARPU_R else return STARPU_W end end default_codelet = Dict{String, jl_starpu_codelet}() default_perfmodel = Dict{String, starpu_perfmodel}() function get_default_perfmodel(name) if name in keys(default_perfmodel) return default_perfmodel[name] end perfmodel = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = name ) default_perfmodel[name] = perfmodel return perfmodel end function get_default_codelet(codelet_name, perfmodel, modes) :: jl_starpu_codelet if codelet_name in keys(default_codelet) return default_codelet[codelet_name] end cl = starpu_codelet( cpu_func = codelet_name in keys(CPU_CODELETS) ? codelet_name : "", cuda_func = codelet_name in keys(CUDA_CODELETS) ? codelet_name : "", modes = modes, perfmodel = perfmodel, ) default_codelet[codelet_name] = cl return cl end function starpu_task_insert(; codelet_name :: Union{Cvoid, String} = nothing, cl :: Union{Cvoid, jl_starpu_codelet} = nothing, perfmodel :: Union{starpu_perfmodel, Cvoid} = nothing, handles :: Vector{StarpuDataHandle} = StarpuDataHandle[], cl_arg = (), callback :: Union{Cvoid, Function} = nothing, callback_arg = nothing, tag :: Union{Cvoid, starpu_tag_t} = nothing, tag_only :: Union{Cvoid, starpu_tag_t} = nothing, sequential_consistency = true, detach = 1, where :: Union{Cvoid, Int32} = nothing, color :: Union{Cvoid, UInt32} = nothing, modes = nothing) if cl == nothing && codelet_name == nothing error("At least one of the two parameters codelet_name or cl must be provided when calling starpu_task_insert.") end if cl == nothing && modes == nothing error("Modes must be defined when calling starpu_task_insert without a codelet.") end if perfmodel == nothing perfmodel = get_default_perfmodel(codelet_name == nothing ? "default" : codelet_name) end if cl == nothing cl = get_default_codelet(codelet_name, perfmodel, modes) end task = starpu_task(cl = cl, handles = handles, cl_arg = cl_arg, callback = callback, callback_arg = callback_arg, tag = tag, tag_only = tag_only, sequential_consistency = sequential_consistency, detach = detach, color = color, where = where) starpu_task_submit(task) end """ Creates and submits an asynchronous task running cl Codelet function. Ex : @starpu_async_cl cl(handle1, handle2) """ macro starpu_async_cl(expr, modes, cl_arg=(), color ::UInt32=0x00000000) if (!isa(expr, Expr) || expr.head != :call) error("Invalid task submit syntax") end if (!isa(expr, Expr)||modes.head != :vect) error("Invalid task submit syntax") end perfmodel = starpu_perfmodel( perf_type = starpu_perfmodel_type(STARPU_HISTORY_BASED), symbol = "history_perf" ) println(CPU_CODELETS[string(expr.args[1])]) cl = starpu_codelet( cpu_func = string(expr.args[1]), cuda_func = string(expr.args[1]), #opencl_func="ocl_matrix_mult", ### TODO: CORRECT ! modes = map((x -> starpu_modes(x)),modes.args), perfmodel = perfmodel, color = color ) handles = Expr(:vect, expr.args[2:end]...) #dump(handles) quote task = starpu_task(cl = $(esc(cl)), handles = $(esc(handles)), cl_arg=$(esc(cl_arg))) starpu_task_submit(task) end end function starpu_task_wait(task :: jl_starpu_task) @threadcall(@starpufunc(:starpu_task_wait), Cint, (Ptr{Cvoid},), Ref(task.c_task)) # starpu_task_wait(Ref(task.c_task)) end """ Blocks until every submitted task has finished. """ function starpu_task_wait_for_all() @threadcall(@starpufunc(:starpu_task_wait_for_all), Cint, ()) lock(mutex) empty!(task_list) unlock(mutex) end """ Blocks until every submitted task has finished. Ex : @starpu_sync_tasks begin [...] starpu_task_submit(task) [...] end TODO : Make the macro only wait for tasks declared inside the following expression. (similar mechanism as @starpu_block) """ macro starpu_sync_tasks(expr) quote $(esc(expr)) starpu_task_wait_for_all() end end function starpu_task_destroy(task :: jl_starpu_task) starpu_task_destroy(Ref(task.c_task)) end starpu-1.4.9+dfsg/julia/src/task_dep.jl000066400000000000000000000045161507764646700200320ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # function starpu_tag_declare_deps(id :: starpu_tag_t, dep :: starpu_tag_t, other_deps :: starpu_tag_t...) v = [dep, other_deps...] starpu_tag_declare_deps_array(id, length(v), pointer(v)) end """ starpu_task_declare_deps(task :: StarpuTask, dep :: StarpuTask [, other_deps :: StarpuTask...]) Declare task dependencies between a task and the following provided ones. This function must be called prior to the submission of the task, but it may called after the submission or the execution of the tasks in the array, provided the tasks are still valid (i.e. they were not automatically destroyed). Calling this function on a task that was already submitted or with an entry of task_array that is no longer a valid task results in an undefined behaviour. """ function starpu_task_declare_deps(task :: jl_starpu_task, dep :: jl_starpu_task, other_deps :: jl_starpu_task...) task_array = [pointer_from_objref(dep.c_task), map((t -> pointer_from_objref(t.c_task)), other_deps)...] starpu_task_declare_deps_array(pointer_from_objref(task.c_task), length(task_array), task_array) end function starpu_task_end_dep_add(task :: jl_starpu_task, nb_deps :: Int) starpu_task_end_dep_add(Ref(task.c_task), nb_deps) end function starpu_task_end_dep_release(task :: jl_starpu_task) starpu_task_end_dep_release(Ref(task.c_task)) end function starpu_task_declare_end_deps(task :: jl_starpu_task, dep :: jl_starpu_task, other_deps :: jl_starpu_task...) task_array = [pointer_from_objref(dep.c_task), map((t -> pointer_from_objref(t.c_task)), other_deps)...] starpu_task_declare_end_deps_array(pointer_from_objref(task.c_task), length(task_array), pointer(task_array)) end starpu-1.4.9+dfsg/julia/src/translate_headers.jl000066400000000000000000000124651507764646700217320ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # using Clang using Clang.LibClang.LLVM_jll function starpu_translate_headers() debug_print("Translating StarPU headers...") if !isdir(joinpath(fstarpu_build_dir(), "julia/gen")) mkdir(joinpath(fstarpu_build_dir(), "julia/gen")) end STARPU_BUILD_INCLUDE=joinpath(fstarpu_build_dir(), "include") STARPU_SRC_INCLUDE=joinpath(fstarpu_src_dir(), "include") STARPU_HEADERS = [joinpath(STARPU_BUILD_INCLUDE, header) for header in readdir(STARPU_BUILD_INCLUDE) if endswith(header, ".h")] if STARPU_SRC_INCLUDE != STARPU_BUILD_INCLUDE for header in readdir(STARPU_SRC_INCLUDE) if endswith(header, ".h") push!(STARPU_HEADERS, joinpath(STARPU_SRC_INCLUDE, header)) end end end LIBCLANG_INCLUDE = joinpath(dirname(LLVM_jll.libclang_path), "..", "include", "clang-c") |> normpath clang_args = ["-I", STARPU_BUILD_INCLUDE, "-I", STARPU_SRC_INCLUDE] for header in find_std_headers() push!(clang_args, "-I") push!(clang_args, header) end only_select_symbols = Set(["starpu_task", "starpu_cublas_init", "starpu_codelet", "starpu_data_filter", "starpu_tag_t", "starpu_perfmodel", "starpu_perfmodel_type", "starpu_data_handle_t", "starpu_init", "starpu_data_acquire_on_node", "starpu_data_release_on_node", "starpu_data_unregister", "starpu_data_partition", "starpu_data_unpartition", "starpu_data_get_sub_data", "starpu_data_map_filters", "starpu_data_get_default_sequential_consistency_flag", "starpu_data_set_default_sequential_consistency_flag", "starpu_data_get_sequential_consistency_flag", "starpu_data_set_sequential_consistency_flag", "starpu_data_wont_use", "starpu_matrix_data_register", "starpu_block_data_register", "starpu_vector_data_register", "starpu_variable_data_register", "starpu_memory_pin", "starpu_memory_unpin", "starpu_task_end_dep_add", "starpu_task_end_dep_release", "starpu_task_init", "starpu_task_destroy", "starpu_task_submit", "starpu_task_wait", "starpu_task_wait_for_n_submitted", "starpu_tag_remove", "starpu_tag_wait", "starpu_tag_declare_deps_array", "starpu_tag_notify_from_apps", "starpu_task_declare_end_deps_array", "starpu_task_declare_deps_array", "starpu_iteration_push", "starpu_iteration_pop", "starpu_worker_get_count", "starpu_cpu_worker_get_count", "starpu_cuda_worker_get_count", "starpu_opencl_worker_get_count", "STARPU_CPU", "STARPU_CUDA", "STARPU_CUDA_ASYNC", "STARPU_OPENCL", "STARPU_MAIN_RAM", "STARPU_NMAXBUFS", "STARPU_USE_CUDA"]) wc = init(; headers = STARPU_HEADERS, output_file = joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_api.jl"), common_file = joinpath(fstarpu_build_dir(), "julia/gen/libstarpu_common.jl"), clang_includes = vcat(LIBCLANG_INCLUDE, CLANG_INCLUDE), clang_args = clang_args, header_library = x->"starpu_wrapper_library_name", clang_diagnostics = false, rewriter = x->x, only_select_symbols = only_select_symbols, fields_align = Dict((:starpu_pthread_spinlock_t,:taken) => 16) ) run(wc) end starpu-1.4.9+dfsg/julia/src/utils.jl000066400000000000000000000062651507764646700174030ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # function fstarpu_task_library_name() x=get(ENV, "STARPU_JULIA_LIB", C_NULL) if (x == C_NULL) error("Environment variable STARPU_JULIA_LIB must be defined") end return x end function fstarpu_build_dir() x=get(ENV, "STARPU_BUILD_DIR", C_NULL) if (x == C_NULL) error("Environment variable STARPU_BUILD_DIR must be defined") end return x end function fstarpu_src_dir() x=get(ENV, "STARPU_SRC_DIR", C_NULL) if (x == C_NULL) error("Environment variable STARPU_SRC_DIR must be defined") end return x end macro starpufunc(symbol) :($symbol, starpu_wrapper_library_name) end """ Used to call a StarPU function compiled inside "libjlstarpu_c_wrapper.so" Works as ccall function """ macro starpucall(func, ret_type, arg_types, args...) return Expr(:call, :ccall, (func, starpu_wrapper_library_name), esc(ret_type), esc(arg_types), map(esc, args)...) end function debug_print(x...) println("\x1b[32m", x..., "\x1b[0m") flush(stdout) end function Cstring_from_String(str :: String) return Cstring(pointer(str)) end tuple_len(::NTuple{N, Any}) where {N} = N function starpu_find_function(name :: String, device :: String ) s=ccall(:starpu_find_function,Cstring, (Cstring,Cstring),Cstring_from_String(name),Cstring_from_String(device)) if s == C_NULL print("NULL STRING\n") error("dead") end return s end function load_starpu_function_pointer(func_name :: String) if (isempty(func_name)) return C_NULL end #func_pointer = ccall(:dlsym,"libdl",Ptr{Cvoid}); func_pointer=Libdl.dlsym(starpu_tasks_library_handle, func_name) if (func_pointer == C_NULL) error("Couldn't find function symbol $func_name into extern library file $starpu_tasks_library") end return func_pointer end function load_wrapper_function_pointer(func_name :: String) if (isempty(func_name)) return C_NULL end func_pointer=Libdl.dlsym(starpu_wrapper_library_handle, func_name) if (func_pointer == C_NULL) error("Couldn't find function symbol $func_name into extern library file $starpu_tasks_library") end return func_pointer end """ Declares a Julia function which is just calling the StarPU function having the same name. """ macro starpu_noparam_function(func_name, ret_type) func = Symbol(func_name) quote export $func global $func() = ccall(($func_name, starpu_wrapper_library_name), $ret_type, ()) :: $ret_type end end starpu-1.4.9+dfsg/m4/000077500000000000000000000000001507764646700143305ustar00rootroot00000000000000starpu-1.4.9+dfsg/m4/acinclude.m4000066400000000000000000000303321507764646700165220ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Check whether the target supports __sync_val_compare_and_swap. AC_DEFUN([STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP], [ AC_CACHE_CHECK([whether the target supports __sync_val_compare_and_swap], ac_cv_have_sync_val_compare_and_swap, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], [bar = __sync_val_compare_and_swap(&foo, 0, 1);])], [ac_cv_have_sync_val_compare_and_swap=yes], [ac_cv_have_sync_val_compare_and_swap=no])]) if test $ac_cv_have_sync_val_compare_and_swap = yes; then AC_DEFINE(STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP, 1, [Define to 1 if the target supports __sync_val_compare_and_swap]) fi]) # Check whether the target supports 64bit __sync_val_compare_and_swap. AC_DEFUN([STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP_8], [ AC_CACHE_CHECK([whether the target supports __sync_val_compare_and_swap_8], ac_cv_have_sync_val_compare_and_swap_8, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([#include int64_t foo, bar;], [bar = __sync_val_compare_and_swap(&foo, 0, 1);])], [ac_cv_have_sync_val_compare_and_swap_8=yes], [ac_cv_have_sync_val_compare_and_swap_8=no])]) if test $ac_cv_have_sync_val_compare_and_swap_8 = yes; then AC_DEFINE(STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8, 1, [Define to 1 if the target supports __sync_val_compare_and_swap_8]) fi]) # Check whether the target supports __sync_bool_compare_and_swap. AC_DEFUN([STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP], [ AC_CACHE_CHECK([whether the target supports __sync_bool_compare_and_swap], ac_cv_have_sync_bool_compare_and_swap, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], [bar = __sync_bool_compare_and_swap(&foo, 0, 1);])], [ac_cv_have_sync_bool_compare_and_swap=yes], [ac_cv_have_sync_bool_compare_and_swap=no])]) if test $ac_cv_have_sync_bool_compare_and_swap = yes; then AC_DEFINE(STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP, 1, [Define to 1 if the target supports __sync_bool_compare_and_swap]) fi]) # Check whether the target supports __sync_bool_compare_and_swap_8. AC_DEFUN([STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP_8], [ AC_CACHE_CHECK([whether the target supports __sync_bool_compare_and_swap_8], ac_cv_have_sync_bool_compare_and_swap_8, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([#include int64_t foo, bar;], [bar = __sync_bool_compare_and_swap(&foo, 0, 1);])], [ac_cv_have_sync_bool_compare_and_swap_8=yes], [ac_cv_have_sync_bool_compare_and_swap_8=no])]) if test $ac_cv_have_sync_bool_compare_and_swap_8 = yes; then AC_DEFINE(STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8, 1, [Define to 1 if the target supports __sync_bool_compare_and_swap_8]) fi]) # Check whether the target supports __sync_fetch_and_add. AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_ADD], [ AC_CACHE_CHECK([whether the target supports __sync_fetch_and_add], ac_cv_have_sync_fetch_and_add, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], [bar = __sync_fetch_and_add(&foo, 1);])], [ac_cv_have_sync_fetch_and_add=yes], [ac_cv_have_sync_fetch_and_add=no])]) if test $ac_cv_have_sync_fetch_and_add = yes; then AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_ADD, 1, [Define to 1 if the target supports __sync_fetch_and_add]) fi]) # Check whether the target supports __sync_fetch_and_add_8. AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_ADD_8], [ AC_CACHE_CHECK([whether the target supports __sync_fetch_and_add_8], ac_cv_have_sync_fetch_and_add_8, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([#include int64_t foo, bar;], [bar = __sync_fetch_and_add(&foo, 1);])], [ac_cv_have_sync_fetch_and_add_8=yes], [ac_cv_have_sync_fetch_and_add_8=no])]) if test $ac_cv_have_sync_fetch_and_add_8 = yes; then AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_ADD_8, 1, [Define to 1 if the target supports __sync_fetch_and_add_8]) fi]) # Check whether the target supports __sync_fetch_and_or. AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_OR], [ AC_CACHE_CHECK([whether the target supports __sync_fetch_and_or], ac_cv_have_sync_fetch_and_or, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], [bar = __sync_fetch_and_or(&foo, 1);])], [ac_cv_have_sync_fetch_and_or=yes], [ac_cv_have_sync_fetch_and_or=no])]) if test $ac_cv_have_sync_fetch_and_or = yes; then AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_OR, 1, [Define to 1 if the target supports __sync_fetch_and_or]) fi]) # Check whether the target supports __sync_fetch_and_or_8. AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_OR_8], [ AC_CACHE_CHECK([whether the target supports __sync_fetch_and_or_8], ac_cv_have_sync_fetch_and_or_8, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([#include int64_t foo, bar;], [bar = __sync_fetch_and_or(&foo, 1);])], [ac_cv_have_sync_fetch_and_or_8=yes], [ac_cv_have_sync_fetch_and_or_8=no])]) if test $ac_cv_have_sync_fetch_and_or_8 = yes; then AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_OR_8, 1, [Define to 1 if the target supports __sync_fetch_and_or_8]) fi]) # Check whether the target supports __sync_lock_test_and_set. AC_DEFUN([STARPU_CHECK_SYNC_LOCK_TEST_AND_SET], [ AC_CACHE_CHECK([whether the target supports __sync_lock_test_and_set], ac_cv_have_sync_lock_test_and_set, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], [bar = __sync_lock_test_and_set(&foo, 1);])], [ac_cv_have_sync_lock_test_and_set=yes], [ac_cv_have_sync_lock_test_and_set=no])]) if test $ac_cv_have_sync_lock_test_and_set = yes; then AC_DEFINE(STARPU_HAVE_SYNC_LOCK_TEST_AND_SET, 1, [Define to 1 if the target supports __sync_lock_test_and_set]) fi]) # Check whether the target supports __atomic_compare_exchange_n. AC_DEFUN([STARPU_CHECK_ATOMIC_COMPARE_EXCHANGE_N], [ AC_CACHE_CHECK([whether the target supports __atomic_compare_exchange_n], ac_cv_have_atomic_compare_exchange_n, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar, baz;], [baz = __atomic_compare_exchange_n(&foo, &bar, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);])], [ac_cv_have_atomic_compare_exchange_n=yes], [ac_cv_have_atomic_compare_exchange_n=no])]) if test $ac_cv_have_atomic_compare_exchange_n = yes; then AC_DEFINE(STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N, 1, [Define to 1 if the target supports __atomic_compare_exchange_n]) fi]) # Check whether the target supports __atomic_compare_exchange_n_8. AC_DEFUN([STARPU_CHECK_ATOMIC_COMPARE_EXCHANGE_N_8], [ AC_CACHE_CHECK([whether the target supports __atomic_compare_exchange_n_8], ac_cv_have_atomic_compare_exchange_n_8, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([#include int64_t foo, bar, baz;], [baz = __atomic_compare_exchange_n(&foo, &bar, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);])], [ac_cv_have_atomic_compare_exchange_n_8=yes], [ac_cv_have_atomic_compare_exchange_n_8=no])]) if test $ac_cv_have_atomic_compare_exchange_n_8 = yes; then AC_DEFINE(STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8, 1, [Define to 1 if the target supports __atomic_compare_exchange_n_8]) fi]) # Check whether the target supports __atomic_exchange_n. AC_DEFUN([STARPU_CHECK_ATOMIC_EXCHANGE_N], [ AC_CACHE_CHECK([whether the target supports __atomic_exchange_n], ac_cv_have_atomic_exchange_n, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], [bar = __atomic_exchange_n(&foo, 1, __ATOMIC_SEQ_CST);])], [ac_cv_have_atomic_exchange_n=yes], [ac_cv_have_atomic_exchange_n=no])]) if test $ac_cv_have_atomic_exchange_n = yes; then AC_DEFINE(STARPU_HAVE_ATOMIC_EXCHANGE_N, 1, [Define to 1 if the target supports __atomic_exchange_n]) fi]) # Check whether the target supports __atomic_exchange_n_8. AC_DEFUN([STARPU_CHECK_ATOMIC_EXCHANGE_N_8], [ AC_CACHE_CHECK([whether the target supports __atomic_exchange_n_8], ac_cv_have_atomic_exchange_n_8, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([#include int64_t foo, bar;], [bar = __atomic_exchange_n(&foo, 1, __ATOMIC_SEQ_CST);])], [ac_cv_have_atomic_exchange_n_8=yes], [ac_cv_have_atomic_exchange_n_8=no])]) if test $ac_cv_have_atomic_exchange_n_8 = yes; then AC_DEFINE(STARPU_HAVE_ATOMIC_EXCHANGE_N_8, 1, [Define to 1 if the target supports __atomic_exchange_n_8]) fi]) # Check whether the target supports __atomic_fetch_add. AC_DEFUN([STARPU_CHECK_ATOMIC_FETCH_ADD], [ AC_CACHE_CHECK([whether the target supports __atomic_fetch_add], ac_cv_have_atomic_fetch_add, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], [bar = __atomic_fetch_add(&foo, 1, __ATOMIC_SEQ_CST);])], [ac_cv_have_atomic_fetch_add=yes], [ac_cv_have_atomic_fetch_add=no])]) if test $ac_cv_have_atomic_fetch_add = yes; then AC_DEFINE(STARPU_HAVE_ATOMIC_FETCH_ADD, 1, [Define to 1 if the target supports __atomic_fetch_add]) fi]) # Check whether the target supports __atomic_fetch_add_8. AC_DEFUN([STARPU_CHECK_ATOMIC_FETCH_ADD_8], [ AC_CACHE_CHECK([whether the target supports __atomic_fetch_add_8], ac_cv_have_atomic_fetch_add_8, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([#include int64_t foo, bar;], [bar = __atomic_fetch_add(&foo, 1, __ATOMIC_SEQ_CST);])], [ac_cv_have_atomic_fetch_add_8=yes], [ac_cv_have_atomic_fetch_add_8=no])]) if test $ac_cv_have_atomic_fetch_add_8 = yes; then AC_DEFINE(STARPU_HAVE_ATOMIC_FETCH_ADD_8, 1, [Define to 1 if the target supports __atomic_fetch_add_8]) fi]) # Check whether the target supports __atomic_fetch_or. AC_DEFUN([STARPU_CHECK_ATOMIC_FETCH_OR], [ AC_CACHE_CHECK([whether the target supports __atomic_fetch_or], ac_cv_have_atomic_fetch_or, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], [bar = __atomic_fetch_or(&foo, 1, __ATOMIC_SEQ_CST);])], [ac_cv_have_atomic_fetch_or=yes], [ac_cv_have_atomic_fetch_or=no])]) if test $ac_cv_have_atomic_fetch_or = yes; then AC_DEFINE(STARPU_HAVE_ATOMIC_FETCH_OR, 1, [Define to 1 if the target supports __atomic_fetch_or]) fi]) # Check whether the target supports __atomic_fetch_or_8. AC_DEFUN([STARPU_CHECK_ATOMIC_FETCH_OR_8], [ AC_CACHE_CHECK([whether the target supports __atomic_fetch_or_8], ac_cv_have_atomic_fetch_or_8, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([#include int64_t foo, bar;], [bar = __atomic_fetch_or(&foo, 1, __ATOMIC_SEQ_CST);])], [ac_cv_have_atomic_fetch_or_8=yes], [ac_cv_have_atomic_fetch_or_8=no])]) if test $ac_cv_have_atomic_fetch_or_8 = yes; then AC_DEFINE(STARPU_HAVE_ATOMIC_FETCH_OR_8, 1, [Define to 1 if the target supports __atomic_fetch_or_8]) fi]) # Check whether the target supports __atomic_test_and_set. AC_DEFUN([STARPU_CHECK_ATOMIC_TEST_AND_SET], [ AC_CACHE_CHECK([whether the target supports __atomic_test_and_set], ac_cv_have_atomic_test_and_set, [ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;], [bar = __atomic_test_and_set(&foo, __ATOMIC_SEQ_CST);])], [ac_cv_have_atomic_test_and_set=yes], [ac_cv_have_atomic_test_and_set=no])]) if test $ac_cv_have_atomic_test_and_set = yes; then AC_DEFINE(STARPU_HAVE_ATOMIC_TEST_AND_SET, 1, [Define to 1 if the target supports __atomic_test_and_set]) fi]) # Check whether the target supports __sync_synchronize. AC_DEFUN([STARPU_CHECK_SYNC_SYNCHRONIZE], [ AC_CACHE_CHECK([whether the target supports __sync_synchronize], ac_cv_have_sync_synchronize, [ AC_LINK_IFELSE([AC_LANG_PROGRAM(, [__sync_synchronize();])], [ac_cv_have_sync_synchronize=yes], [ac_cv_have_sync_synchronize=no])]) if test $ac_cv_have_sync_synchronize = yes; then AC_DEFINE(STARPU_HAVE_SYNC_SYNCHRONIZE, 1, [Define to 1 if the target supports __sync_synchronize]) fi]) starpu-1.4.9+dfsg/m4/ax_cxx_compile_stdcxx.m4000066400000000000000000000330001507764646700211650ustar00rootroot00000000000000# =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html # =========================================================================== # # SYNOPSIS # # AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional]) # # DESCRIPTION # # Check for baseline language coverage in the compiler for the specified # version of the C++ standard. If necessary, add switches to CXX and # CXXCPP to enable support. VERSION may be '11' (for the C++11 standard) # or '14' (for the C++14 standard). # # The second argument, if specified, indicates whether you insist on an # extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. # -std=c++11). If neither is specified, you get whatever works, with # preference for an extended mode. # # The third argument, if specified 'mandatory' or if left unspecified, # indicates that baseline support for the specified C++ standard is # required and that the macro should error out if no mode with that # support is found. If specified 'optional', then configuration proceeds # regardless, after defining HAVE_CXX${VERSION} if and only if a # supporting mode is found. # # LICENSE # # Copyright (c) 2008 Benjamin Kosnik # Copyright (c) 2012 Zack Weinberg # Copyright (c) 2013 Roy Stogner # Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov # Copyright (c) 2015 Paul Norman # Copyright (c) 2015 Moritz Klammler # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 4 dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro dnl (serial version number 13). AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl m4_if([$1], [11], [], [$1], [14], [], [$1], [17], [m4_fatal([support for C++17 not yet implemented in AX_CXX_COMPILE_STDCXX])], [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl m4_if([$2], [], [], [$2], [ext], [], [$2], [noext], [], [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true], [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true], [$3], [optional], [ax_cxx_compile_cxx$1_required=false], [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])]) AC_LANG_PUSH([C++])dnl ac_success=no AC_CACHE_CHECK(whether $CXX supports C++$1 features by default, ax_cv_cxx_compile_cxx$1, [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], [ax_cv_cxx_compile_cxx$1=yes], [ax_cv_cxx_compile_cxx$1=no])]) if test x$ax_cv_cxx_compile_cxx$1 = xyes; then ac_success=yes fi m4_if([$2], [noext], [], [dnl if test x$ac_success = xno; then for switch in -std=gnu++$1 -std=gnu++0x; do cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, $cachevar, [ac_save_CXX="$CXX" CXX="$CXX $switch" AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], [eval $cachevar=yes], [eval $cachevar=no]) CXX="$ac_save_CXX"]) if eval test x\$$cachevar = xyes; then CXX="$CXX $switch" if test -n "$CXXCPP" ; then CXXCPP="$CXXCPP $switch" fi ac_success=yes break fi done fi]) m4_if([$2], [ext], [], [dnl if test x$ac_success = xno; then dnl HP's aCC needs +std=c++11 according to: dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf dnl Cray's crayCC needs "-h std=c++11" for switch in -std=c++$1 -std=c++0x +std=c++$1 "-h std=c++$1"; do cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, $cachevar, [ac_save_CXX="$CXX" CXX="$CXX $switch" AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], [eval $cachevar=yes], [eval $cachevar=no]) CXX="$ac_save_CXX"]) if eval test x\$$cachevar = xyes; then CXX="$CXX $switch" if test -n "$CXXCPP" ; then CXXCPP="$CXXCPP $switch" fi ac_success=yes break fi done fi]) AC_LANG_POP([C++]) if test x$ax_cxx_compile_cxx$1_required = xtrue; then if test x$ac_success = xno; then AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.]) fi fi if test x$ac_success = xno; then HAVE_CXX$1=0 AC_MSG_NOTICE([No compiler with C++$1 support was found]) else HAVE_CXX$1=1 AC_DEFINE(HAVE_CXX$1,1, [define if the compiler supports basic C++$1 syntax]) fi AC_SUBST(HAVE_CXX$1) ]) dnl Test body for checking C++11 support m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11], _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 ) dnl Test body for checking C++14 support m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 ) dnl Tests for new features in C++11 m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ // If the compiler admits that it is not ready for C++11, why torture it? // Hopefully, this will speed up the test. #ifndef __cplusplus #error "This is not a C++ compiler" #elif __cplusplus < 201103L #error "This is not a C++11 compiler" #else namespace cxx11 { namespace test_static_assert { template struct check { static_assert(sizeof(int) <= sizeof(T), "not big enough"); }; } namespace test_final_override { struct Base { virtual void f() {} }; struct Derived : public Base { virtual void f() override {} }; } namespace test_double_right_angle_brackets { template < typename T > struct check {}; typedef check single_type; typedef check> double_type; typedef check>> triple_type; typedef check>>> quadruple_type; } namespace test_decltype { int f() { int a = 1; decltype(a) b = 2; return a + b; } } namespace test_type_deduction { template < typename T1, typename T2 > struct is_same { static const bool value = false; }; template < typename T > struct is_same { static const bool value = true; }; template < typename T1, typename T2 > auto add(T1 a1, T2 a2) -> decltype(a1 + a2) { return a1 + a2; } int test(const int c, volatile int v) { static_assert(is_same::value == true, ""); static_assert(is_same::value == false, ""); static_assert(is_same::value == false, ""); auto ac = c; auto av = v; auto sumi = ac + av + 'x'; auto sumf = ac + av + 1.0; static_assert(is_same::value == true, ""); static_assert(is_same::value == true, ""); static_assert(is_same::value == true, ""); static_assert(is_same::value == false, ""); static_assert(is_same::value == true, ""); return (sumf > 0.0) ? sumi : add(c, v); } } namespace test_noexcept { int f() { return 0; } int g() noexcept { return 0; } static_assert(noexcept(f()) == false, ""); static_assert(noexcept(g()) == true, ""); } namespace test_constexpr { template < typename CharT > unsigned long constexpr strlen_c_r(const CharT *const s, const unsigned long acc) noexcept { return *s ? strlen_c_r(s + 1, acc + 1) : acc; } template < typename CharT > unsigned long constexpr strlen_c(const CharT *const s) noexcept { return strlen_c_r(s, 0UL); } static_assert(strlen_c("") == 0UL, ""); static_assert(strlen_c("1") == 1UL, ""); static_assert(strlen_c("example") == 7UL, ""); static_assert(strlen_c("another\0example") == 7UL, ""); } namespace test_rvalue_references { template < int N > struct answer { static constexpr int value = N; }; answer<1> f(int&) { return answer<1>(); } answer<2> f(const int&) { return answer<2>(); } answer<3> f(int&&) { return answer<3>(); } void test() { int i = 0; const int c = 0; static_assert(decltype(f(i))::value == 1, ""); static_assert(decltype(f(c))::value == 2, ""); static_assert(decltype(f(0))::value == 3, ""); } } namespace test_uniform_initialization { struct test { static const int zero {}; static const int one {1}; }; static_assert(test::zero == 0, ""); static_assert(test::one == 1, ""); } namespace test_lambdas { void test1() { auto lambda1 = [](){}; auto lambda2 = lambda1; lambda1(); lambda2(); } int test2() { auto a = [](int i, int j){ return i + j; }(1, 2); auto b = []() -> int { return '0'; }(); auto c = [=](){ return a + b; }(); auto d = [&](){ return c; }(); auto e = [a, &b](int x) mutable { const auto identity = [](int y){ return y; }; for (auto i = 0; i < a; ++i) a += b--; return x + identity(a + b); }(0); return a + b + c + d + e; } int test3() { const auto nullary = [](){ return 0; }; const auto unary = [](int x){ return x; }; using nullary_t = decltype(nullary); using unary_t = decltype(unary); const auto higher1st = [](nullary_t f){ return f(); }; const auto higher2nd = [unary](nullary_t f1){ return [unary, f1](unary_t f2){ return f2(unary(f1())); }; }; return higher1st(nullary) + higher2nd(nullary)(unary); } } namespace test_variadic_templates { template struct sum; template struct sum { static constexpr auto value = N0 + sum::value; }; template <> struct sum<> { static constexpr auto value = 0; }; static_assert(sum<>::value == 0, ""); static_assert(sum<1>::value == 1, ""); static_assert(sum<23>::value == 23, ""); static_assert(sum<1, 2>::value == 3, ""); static_assert(sum<5, 5, 11>::value == 21, ""); static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); } // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function // because of this. namespace test_template_alias_sfinae { struct foo {}; template using member = typename T::member_type; template void func(...) {} template void func(member*) {} void test(); void test() { func(0); } } } // namespace cxx11 #endif // __cplusplus >= 201103L ]]) dnl Tests for new features in C++14 m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[ // If the compiler admits that it is not ready for C++14, why torture it? // Hopefully, this will speed up the test. #ifndef __cplusplus #error "This is not a C++ compiler" #elif __cplusplus < 201402L #error "This is not a C++14 compiler" #else namespace cxx14 { namespace test_polymorphic_lambdas { int test() { const auto lambda = [](auto&&... args){ const auto istiny = [](auto x){ return (sizeof(x) == 1UL) ? 1 : 0; }; const int aretiny[] = { istiny(args)... }; return aretiny[0]; }; return lambda(1, 1L, 1.0f, '1'); } } namespace test_binary_literals { constexpr auto ivii = 0b0000000000101010; static_assert(ivii == 42, "wrong value"); } namespace test_generalized_constexpr { template < typename CharT > constexpr unsigned long strlen_c(const CharT *const s) noexcept { auto length = 0UL; for (auto p = s; *p; ++p) ++length; return length; } static_assert(strlen_c("") == 0UL, ""); static_assert(strlen_c("x") == 1UL, ""); static_assert(strlen_c("test") == 4UL, ""); static_assert(strlen_c("another\0test") == 7UL, ""); } namespace test_lambda_init_capture { int test() { auto x = 0; const auto lambda1 = [a = x](int b){ return a + b; }; const auto lambda2 = [a = lambda1(x)](){ return a; }; return lambda2(); } } namespace test_digit_seperators { constexpr auto ten_million = 100'000'000; static_assert(ten_million == 100000000, ""); } namespace test_return_type_deduction { auto f(int& x) { return x; } decltype(auto) g(int& x) { return x; } template < typename T1, typename T2 > struct is_same { static constexpr auto value = false; }; template < typename T > struct is_same { static constexpr auto value = true; }; int test() { auto x = 0; static_assert(is_same::value, ""); static_assert(is_same::value, ""); return x; } } } // namespace cxx14 #endif // __cplusplus >= 201402L ]]) starpu-1.4.9+dfsg/m4/ax_dlb_callback_arg.m4000066400000000000000000000026231507764646700204730ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Check whether DLB callbacks expect an user argument AC_DEFUN([AX_DLB_CALLBACK_ARG], [AC_MSG_CHECKING([whether DLB callbacks expect an user argument]) AC_CACHE_VAL(ac_cv_dlb_callback_arg,dnl [AC_TRY_COMPILE(dnl [#include dlb_handler_t dlb_handle; void _dlb_callback_disable_cpu(int cpuid, void *arg) { (void)cpuid; (void)arg; } void f(void) { (void)DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu, 0); } ],, ac_cv_dlb_callback_arg=yes, ac_cv_dlb_callback_arg=no) ])dnl AC_CACHE_VAL AC_MSG_RESULT([$ac_cv_dlb_callback_arg]) if test $ac_cv_dlb_callback_arg = yes; then AC_DEFINE(STARPURM_HAVE_DLB_CALLBACK_ARG,1,[Define to 1 if DLB callbacks expect an user argument]) fi ]) starpu-1.4.9+dfsg/m4/libs.m4000066400000000000000000000142401507764646700155240ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # STARPU_SEARCH_LIBS(NAME, FUNCTION, SEARCH-LIBS, # [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND], # [OTHER-LIBRARIES]) # # Like AC_SEARCH_LIBS, but puts -l flags into $1_LDFLAGS instead of LIBS, and # AC_SUBSTs it AC_DEFUN([STARPU_SEARCH_LIBS], [dnl _LIBS_SAV="$LIBS" LIBS="" AC_SEARCH_LIBS([$2], [$3], [$4], [$5], [$6]) STARPU_$1_LDFLAGS="$STARPU_$1_LDFLAGS $LIBS" LIBS=$_LIBS_SAV AC_SUBST(STARPU_$1_LDFLAGS) ])dnl # STARPU_CHECK_LIB(NAME, LIBRARY, FUNCTION, # [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND], # [OTHER-LIBRARIES]) # # Like AC_CHECK_LIB, but puts -l flags into $1_LDFLAGS instead of LIBS, and # AC_SUBSTs it AC_DEFUN([STARPU_CHECK_LIB], [dnl _LIBS_SAV="$LIBS" LIBS="" AC_CHECK_LIB([$2], [$3], [$4], [$5], [$6]) STARPU_$1_LDFLAGS="$STARPU_$1_LDFLAGS $LIBS" LIBS=$_LIBS_SAV AC_SUBST(STARPU_$1_LDFLAGS) ])dnl # STARPU_HAVE_LIBRARY(NAME, LIBRARY, # [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND], # [OTHER-LIBRARIES]) # Like AC_HAVE_LIBRARY, but puts -l flags into $1_LDFLAGS instead of LIBS, and # AC_SUBSTs it AC_DEFUN([STARPU_HAVE_LIBRARY], [dnl STARPU_CHECK_LIB([$1], [$2], main, [$3], [$4], [$5]) ])dnl # STARPU_INIT_ZERO(INCLUDES, TYPE, INIT_MACRO) # Checks whether when TYPE is initialized with INIT_MACRO, the content is just # plain zeroes AC_DEFUN([STARPU_INIT_ZERO], [dnl AC_MSG_CHECKING(whether $3 just zeroes) AC_RUN_IFELSE([AC_LANG_PROGRAM( $1, [[$2 var = $3; char *p; for (p = (char*) &var; p < (char*) (&var+1); p++) if (*p != 0) return 1; return 0; ]], )], [AC_DEFINE([STARPU_$3_ZERO], [1], [Define to 1 if `$3' is just zeroes]) AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no)]) ])dnl # IS_SUPPORTED_CFLAG(flag) # ------------------------ # Check if the CFLAGS `flag' is supported by the compiler AC_DEFUN([IS_SUPPORTED_CFLAG], [ AC_REQUIRE([AC_PROG_CC]) AC_MSG_CHECKING([whether C compiler supports $1]) SAVED_CFLAGS="$CFLAGS" CFLAGS="$1" check_mpi="no" AC_LINK_IFELSE( AC_LANG_PROGRAM( [[]], [[AC_LANG_SOURCE([const char *hello = "Hello World";])]] ), [ m4_default_nblank([$2], check_mpi="yes") AC_MSG_RESULT(yes) ], [ AC_MSG_RESULT(no) ] ) if test "$build_mpi_lib" = "no" -a "$build_nmad_lib" = "no" then if test "$check_mpi" = "yes" ; then GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS $1" fi elif test "$check_mpi" = "yes" ; then SAVED_CC="$CC" CC="$MPICC" AC_MSG_CHECKING([whether MPI C compiler supports $1]) AC_LINK_IFELSE( AC_LANG_PROGRAM( [[]], [[AC_LANG_SOURCE([const char *hello = "Hello World";])]] ), [ m4_default_nblank([$2], [GLOBAL_AM_CFLAGS="$GLOBAL_AM_CFLAGS $1"]) AC_MSG_RESULT(yes) ], [ AC_MSG_RESULT(no) ] ) CC="$SAVED_CC" fi CFLAGS="$SAVED_CFLAGS" ]) # IS_SUPPORTED_CXXFLAG(flag) # ------------------------ # Check if the CXXFLAGS `flag' is supported by the compiler AC_DEFUN([IS_SUPPORTED_CXXFLAG], [ AC_REQUIRE([AC_PROG_CXX]) AC_LANG_PUSH([C++]) AC_MSG_CHECKING([whether CXX compiler supports $1]) SAVED_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$1" AC_LINK_IFELSE( AC_LANG_PROGRAM( [[]], [[AC_LANG_SOURCE([const char *hello = "Hello World";])]] ), [ m4_default_nblank([$2], [GLOBAL_AM_CXXFLAGS="$GLOBAL_AM_CXXFLAGS $1"]) AC_MSG_RESULT(yes) ], [ AC_MSG_RESULT(no) ] ) CXXFLAGS="$SAVED_CXXFLAGS" AC_LANG_POP([C++]) ]) # IS_SUPPORTED_FFLAG(flag) # ------------------------ # Check if the FFLAGS `flag' is supported by the compiler AC_DEFUN([IS_SUPPORTED_FFLAG], [ AC_LANG_PUSH([Fortran 77]) AC_MSG_CHECKING([whether Fortran 77 compiler supports $1]) SAVED_FFLAGS="$FFLAGS" FFLAGS="$1" AC_LINK_IFELSE( AC_LANG_PROGRAM( [], [[AC_LANG_SOURCE([])]] ), [ m4_default_nblank([$2], [GLOBAL_AM_FFLAGS="$GLOBAL_AM_FFLAGS $1"]) AC_MSG_RESULT(yes) ], [ AC_MSG_RESULT(no) ] ) FFLAGS="$SAVED_FFLAGS" AC_LANG_POP([Fortran 77]) ]) # IS_SUPPORTED_FCFLAG(flag) # ------------------------ # Check if the FCLAGS `flag' is supported by the compiler AC_DEFUN([IS_SUPPORTED_FCFLAG], [ AC_LANG_PUSH([Fortran]) AC_MSG_CHECKING([whether Fortran compiler supports $1]) SAVED_FCFLAGS="$FCFLAGS" FCFLAGS="$1" check_mpi="no" AC_LINK_IFELSE( AC_LANG_PROGRAM( [], [[AC_LANG_SOURCE([])]] ), [ m4_default_nblank([$2], check_mpi="yes") AC_MSG_RESULT(yes) ], [ AC_MSG_RESULT(no) ] ) if test "$check_mpi" = "yes" ; then SAVED_FC="$FC" FC="$MPIFORT" AC_MSG_CHECKING([whether MPI Fortran compiler supports $1]) AC_LINK_IFELSE( AC_LANG_PROGRAM( [], [[AC_LANG_SOURCE([])]] ), [ m4_default_nblank([$2], [GLOBAL_AM_FCFLAGS="$GLOBAL_AM_FCFLAGS $1"]) AC_MSG_RESULT(yes) ], [ AC_MSG_RESULT(no) ] ) FC="$SAVED_FC" fi FCFLAGS="$SAVED_FCFLAGS" AC_LANG_POP([Fortran]) ]) # IS_SUPPORTED_FLAG(flag) # ------------------------ # Check with C, C++, F77 and F90 that the `flag' is supported by the compiler AC_DEFUN([IS_SUPPORTED_FLAG], [ IS_SUPPORTED_CFLAG($1) IS_SUPPORTED_CXXFLAG($1) IS_SUPPORTED_FFLAG($1) IS_SUPPORTED_FCFLAG($1) ]) AC_DEFUN([IS_SUPPORTED_FLAG_VAR], [ IS_SUPPORTED_CFLAG($1,[$2_CFLAGS="$$2_CFLAGS $1"]) IS_SUPPORTED_CXXFLAG($1,[$2_CXXFLAGS="$$2_CXXFLAGS $1"]) IS_SUPPORTED_FFLAG($1,[$2_FFLAGS="$$2_FFLAGS $1"]) IS_SUPPORTED_FCFLAG($1,[$2_FCFLAGS="$$2_FCFLAGS $1"]) ]) # AC_PYTHON_MODULE(modulename, [action-if-found], [action-if-not-found]) # Check if the given python module is available AC_DEFUN([AC_PYTHON_MODULE], [ echo "import $1" | $PYTHON - 2>/dev/null if test $? -ne 0 ; then $3 else $2 fi ]) starpu-1.4.9+dfsg/m4/libtool.m4000066400000000000000000011316521507764646700162470ustar00rootroot00000000000000# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- # # Copyright (C) 1996-2001, 2003-2019, 2021-2022 Free Software # Foundation, Inc. # Written by Gordon Matzigkeit, 1996 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. m4_define([_LT_COPYING], [dnl # Copyright (C) 2014 Free Software Foundation, Inc. # This is free software; see the source for copying conditions. There is NO # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # GNU Libtool is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of of the License, or # (at your option) any later version. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program or library that is built # using GNU Libtool, you may include this file under the same # distribution terms that you use for the rest of that program. # # GNU Libtool is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . ]) # serial 59 LT_INIT # LT_PREREQ(VERSION) # ------------------ # Complain and exit if this libtool version is less that VERSION. m4_defun([LT_PREREQ], [m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, [m4_default([$3], [m4_fatal([Libtool version $1 or higher is required], 63)])], [$2])]) # _LT_CHECK_BUILDDIR # ------------------ # Complain if the absolute build directory name contains unusual characters m4_defun([_LT_CHECK_BUILDDIR], [case `pwd` in *\ * | *\ *) AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; esac ]) # LT_INIT([OPTIONS]) # ------------------ AC_DEFUN([LT_INIT], [AC_PREREQ([2.62])dnl We use AC_PATH_PROGS_FEATURE_CHECK AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl AC_BEFORE([$0], [LT_LANG])dnl AC_BEFORE([$0], [LT_OUTPUT])dnl AC_BEFORE([$0], [LTDL_INIT])dnl m4_require([_LT_CHECK_BUILDDIR])dnl dnl Autoconf doesn't catch unexpanded LT_ macros by default: m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 dnl unless we require an AC_DEFUNed macro: AC_REQUIRE([LTOPTIONS_VERSION])dnl AC_REQUIRE([LTSUGAR_VERSION])dnl AC_REQUIRE([LTVERSION_VERSION])dnl AC_REQUIRE([LTOBSOLETE_VERSION])dnl m4_require([_LT_PROG_LTMAIN])dnl _LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) dnl Parse OPTIONS _LT_SET_OPTIONS([$0], [$1]) # This can be used to rebuild libtool when needed LIBTOOL_DEPS=$ltmain # Always use our own libtool. LIBTOOL='$(SHELL) $(top_builddir)/libtool' AC_SUBST(LIBTOOL)dnl _LT_SETUP # Only expand once: m4_define([LT_INIT]) ])# LT_INIT # Old names: AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_PROG_LIBTOOL], []) dnl AC_DEFUN([AM_PROG_LIBTOOL], []) # _LT_PREPARE_CC_BASENAME # ----------------------- m4_defun([_LT_PREPARE_CC_BASENAME], [ # Calculate cc_basename. Skip known compiler wrappers and cross-prefix. func_cc_basename () { for cc_temp in @S|@*""; do case $cc_temp in compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; \-*) ;; *) break;; esac done func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` } ])# _LT_PREPARE_CC_BASENAME # _LT_CC_BASENAME(CC) # ------------------- # It would be clearer to call AC_REQUIREs from _LT_PREPARE_CC_BASENAME, # but that macro is also expanded into generated libtool script, which # arranges for $SED and $ECHO to be set by different means. m4_defun([_LT_CC_BASENAME], [m4_require([_LT_PREPARE_CC_BASENAME])dnl AC_REQUIRE([_LT_DECL_SED])dnl AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl func_cc_basename $1 cc_basename=$func_cc_basename_result ]) # _LT_FILEUTILS_DEFAULTS # ---------------------- # It is okay to use these file commands and assume they have been set # sensibly after 'm4_require([_LT_FILEUTILS_DEFAULTS])'. m4_defun([_LT_FILEUTILS_DEFAULTS], [: ${CP="cp -f"} : ${MV="mv -f"} : ${RM="rm -f"} ])# _LT_FILEUTILS_DEFAULTS # _LT_SETUP # --------- m4_defun([_LT_SETUP], [AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl _LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl dnl _LT_DECL([], [host_alias], [0], [The host system])dnl _LT_DECL([], [host], [0])dnl _LT_DECL([], [host_os], [0])dnl dnl _LT_DECL([], [build_alias], [0], [The build system])dnl _LT_DECL([], [build], [0])dnl _LT_DECL([], [build_os], [0])dnl dnl AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([LT_PATH_LD])dnl AC_REQUIRE([LT_PATH_NM])dnl dnl AC_REQUIRE([AC_PROG_LN_S])dnl test -z "$LN_S" && LN_S="ln -s" _LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl dnl AC_REQUIRE([LT_CMD_MAX_LEN])dnl _LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl _LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_CHECK_SHELL_FEATURES])dnl m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl m4_require([_LT_CMD_RELOAD])dnl m4_require([_LT_DECL_FILECMD])dnl m4_require([_LT_CHECK_MAGIC_METHOD])dnl m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl m4_require([_LT_CMD_OLD_ARCHIVE])dnl m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl m4_require([_LT_WITH_SYSROOT])dnl m4_require([_LT_CMD_TRUNCATE])dnl _LT_CONFIG_LIBTOOL_INIT([ # See if we are running on zsh, and set the options that allow our # commands through without removal of \ escapes INIT. if test -n "\${ZSH_VERSION+set}"; then setopt NO_GLOB_SUBST fi ]) if test -n "${ZSH_VERSION+set}"; then setopt NO_GLOB_SUBST fi _LT_CHECK_OBJDIR m4_require([_LT_TAG_COMPILER])dnl case $host_os in aix3*) # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test set != "${COLLECT_NAMES+set}"; then COLLECT_NAMES= export COLLECT_NAMES fi ;; esac # Global variables: ofile=libtool can_build_shared=yes # All known linkers require a '.a' archive for static linking (except MSVC and # ICC, which need '.lib'). libext=a with_gnu_ld=$lt_cv_prog_gnu_ld old_CC=$CC old_CFLAGS=$CFLAGS # Set sane defaults for various variables test -z "$CC" && CC=cc test -z "$LTCC" && LTCC=$CC test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS test -z "$LD" && LD=ld test -z "$ac_objext" && ac_objext=o _LT_CC_BASENAME([$compiler]) # Only perform the check for file, if the check method requires it test -z "$MAGIC_CMD" && MAGIC_CMD=file case $deplibs_check_method in file_magic*) if test "$file_magic_cmd" = '$MAGIC_CMD'; then _LT_PATH_MAGIC fi ;; esac # Use C for the default configuration in the libtool script LT_SUPPORTED_TAG([CC]) _LT_LANG_C_CONFIG _LT_LANG_DEFAULT_CONFIG _LT_CONFIG_COMMANDS ])# _LT_SETUP # _LT_PREPARE_SED_QUOTE_VARS # -------------------------- # Define a few sed substitution that help us do robust quoting. m4_defun([_LT_PREPARE_SED_QUOTE_VARS], [# Backslashify metacharacters that are still active within # double-quoted strings. sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' # Same as above, but do not quote variable references. double_quote_subst='s/\([["`\\]]\)/\\\1/g' # Sed substitution to delay expansion of an escaped shell variable in a # double_quote_subst'ed string. delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' # Sed substitution to delay expansion of an escaped single quote. delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' # Sed substitution to avoid accidental globbing in evaled expressions no_glob_subst='s/\*/\\\*/g' ]) # _LT_PROG_LTMAIN # --------------- # Note that this code is called both from 'configure', and 'config.status' # now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, # 'config.status' has no value for ac_aux_dir unless we are using Automake, # so we pass a copy along to make sure it has a sensible value anyway. m4_defun([_LT_PROG_LTMAIN], [m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl _LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) ltmain=$ac_aux_dir/ltmain.sh ])# _LT_PROG_LTMAIN ## ------------------------------------- ## ## Accumulate code for creating libtool. ## ## ------------------------------------- ## # So that we can recreate a full libtool script including additional # tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS # in macros and then make a single call at the end using the 'libtool' # label. # _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) # ---------------------------------------- # Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. m4_define([_LT_CONFIG_LIBTOOL_INIT], [m4_ifval([$1], [m4_append([_LT_OUTPUT_LIBTOOL_INIT], [$1 ])])]) # Initialize. m4_define([_LT_OUTPUT_LIBTOOL_INIT]) # _LT_CONFIG_LIBTOOL([COMMANDS]) # ------------------------------ # Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. m4_define([_LT_CONFIG_LIBTOOL], [m4_ifval([$1], [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], [$1 ])])]) # Initialize. m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) # _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) # ----------------------------------------------------- m4_defun([_LT_CONFIG_SAVE_COMMANDS], [_LT_CONFIG_LIBTOOL([$1]) _LT_CONFIG_LIBTOOL_INIT([$2]) ]) # _LT_FORMAT_COMMENT([COMMENT]) # ----------------------------- # Add leading comment marks to the start of each line, and a trailing # full-stop to the whole comment if one is not present already. m4_define([_LT_FORMAT_COMMENT], [m4_ifval([$1], [ m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) )]) ## ------------------------ ## ## FIXME: Eliminate VARNAME ## ## ------------------------ ## # _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) # ------------------------------------------------------------------- # CONFIGNAME is the name given to the value in the libtool script. # VARNAME is the (base) name used in the configure script. # VALUE may be 0, 1 or 2 for a computed quote escaped value based on # VARNAME. Any other value will be used directly. m4_define([_LT_DECL], [lt_if_append_uniq([lt_decl_varnames], [$2], [, ], [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], [m4_ifval([$1], [$1], [$2])]) lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) m4_ifval([$4], [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) lt_dict_add_subkey([lt_decl_dict], [$2], [tagged?], [m4_ifval([$5], [yes], [no])])]) ]) # _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) # -------------------------------------------------------- m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) # lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) # ------------------------------------------------ m4_define([lt_decl_tag_varnames], [_lt_decl_filter([tagged?], [yes], $@)]) # _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) # --------------------------------------------------------- m4_define([_lt_decl_filter], [m4_case([$#], [0], [m4_fatal([$0: too few arguments: $#])], [1], [m4_fatal([$0: too few arguments: $#: $1])], [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], [lt_dict_filter([lt_decl_dict], $@)])[]dnl ]) # lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) # -------------------------------------------------- m4_define([lt_decl_quote_varnames], [_lt_decl_filter([value], [1], $@)]) # lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) # --------------------------------------------------- m4_define([lt_decl_dquote_varnames], [_lt_decl_filter([value], [2], $@)]) # lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) # --------------------------------------------------- m4_define([lt_decl_varnames_tagged], [m4_assert([$# <= 2])dnl _$0(m4_quote(m4_default([$1], [[, ]])), m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) m4_define([_lt_decl_varnames_tagged], [m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) # lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) # ------------------------------------------------ m4_define([lt_decl_all_varnames], [_$0(m4_quote(m4_default([$1], [[, ]])), m4_if([$2], [], m4_quote(lt_decl_varnames), m4_quote(m4_shift($@))))[]dnl ]) m4_define([_lt_decl_all_varnames], [lt_join($@, lt_decl_varnames_tagged([$1], lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl ]) # _LT_CONFIG_STATUS_DECLARE([VARNAME]) # ------------------------------------ # Quote a variable value, and forward it to 'config.status' so that its # declaration there will have the same value as in 'configure'. VARNAME # must have a single quote delimited value for this to work. m4_define([_LT_CONFIG_STATUS_DECLARE], [$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) # _LT_CONFIG_STATUS_DECLARATIONS # ------------------------------ # We delimit libtool config variables with single quotes, so when # we write them to config.status, we have to be sure to quote all # embedded single quotes properly. In configure, this macro expands # each variable declared with _LT_DECL (and _LT_TAGDECL) into: # # ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], [m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) # _LT_LIBTOOL_TAGS # ---------------- # Output comment and list of tags supported by the script m4_defun([_LT_LIBTOOL_TAGS], [_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl available_tags='_LT_TAGS'dnl ]) # _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) # ----------------------------------- # Extract the dictionary values for VARNAME (optionally with TAG) and # expand to a commented shell variable setting: # # # Some comment about what VAR is for. # visible_name=$lt_internal_name m4_define([_LT_LIBTOOL_DECLARE], [_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [description])))[]dnl m4_pushdef([_libtool_name], m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), [0], [_libtool_name=[$]$1], [1], [_libtool_name=$lt_[]$1], [2], [_libtool_name=$lt_[]$1], [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl ]) # _LT_LIBTOOL_CONFIG_VARS # ----------------------- # Produce commented declarations of non-tagged libtool config variables # suitable for insertion in the LIBTOOL CONFIG section of the 'libtool' # script. Tagged libtool config variables (even for the LIBTOOL CONFIG # section) are produced by _LT_LIBTOOL_TAG_VARS. m4_defun([_LT_LIBTOOL_CONFIG_VARS], [m4_foreach([_lt_var], m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) # _LT_LIBTOOL_TAG_VARS(TAG) # ------------------------- m4_define([_LT_LIBTOOL_TAG_VARS], [m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) # _LT_TAGVAR(VARNAME, [TAGNAME]) # ------------------------------ m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) # _LT_CONFIG_COMMANDS # ------------------- # Send accumulated output to $CONFIG_STATUS. Thanks to the lists of # variables for single and double quote escaping we saved from calls # to _LT_DECL, we can put quote escaped variables declarations # into 'config.status', and then the shell code to quote escape them in # for loops in 'config.status'. Finally, any additional code accumulated # from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. m4_defun([_LT_CONFIG_COMMANDS], [AC_PROVIDE_IFELSE([LT_OUTPUT], dnl If the libtool generation code has been placed in $CONFIG_LT, dnl instead of duplicating it all over again into config.status, dnl then we will have config.status run $CONFIG_LT later, so it dnl needs to know what name is stored there: [AC_CONFIG_COMMANDS([libtool], [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], dnl If the libtool generation code is destined for config.status, dnl expand the accumulated commands and init code now: [AC_CONFIG_COMMANDS([libtool], [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) ])#_LT_CONFIG_COMMANDS # Initialize. m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], [ # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH sed_quote_subst='$sed_quote_subst' double_quote_subst='$double_quote_subst' delay_variable_subst='$delay_variable_subst' _LT_CONFIG_STATUS_DECLARATIONS LTCC='$LTCC' LTCFLAGS='$LTCFLAGS' compiler='$compiler_DEFAULT' # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF \$[]1 _LTECHO_EOF' } # Quote evaled strings. for var in lt_decl_all_varnames([[ \ ]], lt_decl_quote_varnames); do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[[\\\\\\\`\\"\\\$]]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done # Double-quote double-evaled strings. for var in lt_decl_all_varnames([[ \ ]], lt_decl_dquote_varnames); do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[[\\\\\\\`\\"\\\$]]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done _LT_OUTPUT_LIBTOOL_INIT ]) # _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) # ------------------------------------ # Generate a child script FILE with all initialization necessary to # reuse the environment learned by the parent script, and make the # file executable. If COMMENT is supplied, it is inserted after the # '#!' sequence but before initialization text begins. After this # macro, additional text can be appended to FILE to form the body of # the child script. The macro ends with non-zero status if the # file could not be fully written (such as if the disk is full). m4_ifdef([AS_INIT_GENERATED], [m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], [m4_defun([_LT_GENERATED_FILE_INIT], [m4_require([AS_PREPARE])]dnl [m4_pushdef([AS_MESSAGE_LOG_FD])]dnl [lt_write_fail=0 cat >$1 <<_ASEOF || lt_write_fail=1 #! $SHELL # Generated by $as_me. $2 SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$1 <<\_ASEOF || lt_write_fail=1 AS_SHELL_SANITIZE _AS_PREPARE exec AS_MESSAGE_FD>&1 _ASEOF test 0 = "$lt_write_fail" && chmod +x $1[]dnl m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT # LT_OUTPUT # --------- # This macro allows early generation of the libtool script (before # AC_OUTPUT is called), incase it is used in configure for compilation # tests. AC_DEFUN([LT_OUTPUT], [: ${CONFIG_LT=./config.lt} AC_MSG_NOTICE([creating $CONFIG_LT]) _LT_GENERATED_FILE_INIT(["$CONFIG_LT"], [# Run this file to recreate a libtool stub with the current configuration.]) cat >>"$CONFIG_LT" <<\_LTEOF lt_cl_silent=false exec AS_MESSAGE_LOG_FD>>config.log { echo AS_BOX([Running $as_me.]) } >&AS_MESSAGE_LOG_FD lt_cl_help="\ '$as_me' creates a local libtool stub from the current configuration, for use in further configure time tests before the real libtool is generated. Usage: $[0] [[OPTIONS]] -h, --help print this help, then exit -V, --version print version number, then exit -q, --quiet do not print progress messages -d, --debug don't remove temporary files Report bugs to ." lt_cl_version="\ m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) configured by $[0], generated by m4_PACKAGE_STRING. Copyright (C) 2011 Free Software Foundation, Inc. This config.lt script is free software; the Free Software Foundation gives unlimited permision to copy, distribute and modify it." while test 0 != $[#] do case $[1] in --version | --v* | -V ) echo "$lt_cl_version"; exit 0 ;; --help | --h* | -h ) echo "$lt_cl_help"; exit 0 ;; --debug | --d* | -d ) debug=: ;; --quiet | --q* | --silent | --s* | -q ) lt_cl_silent=: ;; -*) AC_MSG_ERROR([unrecognized option: $[1] Try '$[0] --help' for more information.]) ;; *) AC_MSG_ERROR([unrecognized argument: $[1] Try '$[0] --help' for more information.]) ;; esac shift done if $lt_cl_silent; then exec AS_MESSAGE_FD>/dev/null fi _LTEOF cat >>"$CONFIG_LT" <<_LTEOF _LT_OUTPUT_LIBTOOL_COMMANDS_INIT _LTEOF cat >>"$CONFIG_LT" <<\_LTEOF AC_MSG_NOTICE([creating $ofile]) _LT_OUTPUT_LIBTOOL_COMMANDS AS_EXIT(0) _LTEOF chmod +x "$CONFIG_LT" # configure is writing to config.log, but config.lt does its own redirection, # appending to config.log, which fails on DOS, as config.log is still kept # open by configure. Here we exec the FD to /dev/null, effectively closing # config.log, so it can be properly (re)opened and appended to by config.lt. lt_cl_success=: test yes = "$silent" && lt_config_lt_args="$lt_config_lt_args --quiet" exec AS_MESSAGE_LOG_FD>/dev/null $SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false exec AS_MESSAGE_LOG_FD>>config.log $lt_cl_success || AS_EXIT(1) ])# LT_OUTPUT # _LT_CONFIG(TAG) # --------------- # If TAG is the built-in tag, create an initial libtool script with a # default configuration from the untagged config vars. Otherwise add code # to config.status for appending the configuration named by TAG from the # matching tagged config vars. m4_defun([_LT_CONFIG], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl _LT_CONFIG_SAVE_COMMANDS([ m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl m4_if(_LT_TAG, [C], [ # See if we are running on zsh, and set the options that allow our # commands through without removal of \ escapes. if test -n "${ZSH_VERSION+set}"; then setopt NO_GLOB_SUBST fi cfgfile=${ofile}T trap "$RM \"$cfgfile\"; exit 1" 1 2 15 $RM "$cfgfile" cat <<_LT_EOF >> "$cfgfile" #! $SHELL # Generated automatically by $as_me ($PACKAGE) $VERSION # NOTE: Changes made to this file will be lost: look at ltmain.sh. # Provide generalized library-building support services. # Written by Gordon Matzigkeit, 1996 _LT_COPYING _LT_LIBTOOL_TAGS # Configured defaults for sys_lib_dlsearch_path munging. : \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} # ### BEGIN LIBTOOL CONFIG _LT_LIBTOOL_CONFIG_VARS _LT_LIBTOOL_TAG_VARS # ### END LIBTOOL CONFIG _LT_EOF cat <<'_LT_EOF' >> "$cfgfile" # ### BEGIN FUNCTIONS SHARED WITH CONFIGURE _LT_PREPARE_MUNGE_PATH_LIST _LT_PREPARE_CC_BASENAME # ### END FUNCTIONS SHARED WITH CONFIGURE _LT_EOF case $host_os in aix3*) cat <<\_LT_EOF >> "$cfgfile" # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test set != "${COLLECT_NAMES+set}"; then COLLECT_NAMES= export COLLECT_NAMES fi _LT_EOF ;; esac _LT_PROG_LTMAIN # We use sed instead of cat because bash on DJGPP gets confused if # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? $SED '$q' "$ltmain" >> "$cfgfile" \ || (rm -f "$cfgfile"; exit 1) mv -f "$cfgfile" "$ofile" || (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") chmod +x "$ofile" ], [cat <<_LT_EOF >> "$ofile" dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded dnl in a comment (ie after a #). # ### BEGIN LIBTOOL TAG CONFIG: $1 _LT_LIBTOOL_TAG_VARS(_LT_TAG) # ### END LIBTOOL TAG CONFIG: $1 _LT_EOF ])dnl /m4_if ], [m4_if([$1], [], [ PACKAGE='$PACKAGE' VERSION='$VERSION' RM='$RM' ofile='$ofile'], []) ])dnl /_LT_CONFIG_SAVE_COMMANDS ])# _LT_CONFIG # LT_SUPPORTED_TAG(TAG) # --------------------- # Trace this macro to discover what tags are supported by the libtool # --tag option, using: # autoconf --trace 'LT_SUPPORTED_TAG:$1' AC_DEFUN([LT_SUPPORTED_TAG], []) # C support is built-in for now m4_define([_LT_LANG_C_enabled], []) m4_define([_LT_TAGS], []) # LT_LANG(LANG) # ------------- # Enable libtool support for the given language if not already enabled. AC_DEFUN([LT_LANG], [AC_BEFORE([$0], [LT_OUTPUT])dnl m4_case([$1], [C], [_LT_LANG(C)], [C++], [_LT_LANG(CXX)], [Go], [_LT_LANG(GO)], [Java], [_LT_LANG(GCJ)], [Fortran 77], [_LT_LANG(F77)], [Fortran], [_LT_LANG(FC)], [Windows Resource], [_LT_LANG(RC)], [m4_ifdef([_LT_LANG_]$1[_CONFIG], [_LT_LANG($1)], [m4_fatal([$0: unsupported language: "$1"])])])dnl ])# LT_LANG # _LT_LANG(LANGNAME) # ------------------ m4_defun([_LT_LANG], [m4_ifdef([_LT_LANG_]$1[_enabled], [], [LT_SUPPORTED_TAG([$1])dnl m4_append([_LT_TAGS], [$1 ])dnl m4_define([_LT_LANG_]$1[_enabled], [])dnl _LT_LANG_$1_CONFIG($1)])dnl ])# _LT_LANG m4_ifndef([AC_PROG_GO], [ ############################################################ # NOTE: This macro has been submitted for inclusion into # # GNU Autoconf as AC_PROG_GO. When it is available in # # a released version of Autoconf we should remove this # # macro and use it instead. # ############################################################ m4_defun([AC_PROG_GO], [AC_LANG_PUSH(Go)dnl AC_ARG_VAR([GOC], [Go compiler command])dnl AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl _AC_ARG_VAR_LDFLAGS()dnl AC_CHECK_TOOL(GOC, gccgo) if test -z "$GOC"; then if test -n "$ac_tool_prefix"; then AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) fi fi if test -z "$GOC"; then AC_CHECK_PROG(GOC, gccgo, gccgo, false) fi ])#m4_defun ])#m4_ifndef # _LT_LANG_DEFAULT_CONFIG # ----------------------- m4_defun([_LT_LANG_DEFAULT_CONFIG], [AC_PROVIDE_IFELSE([AC_PROG_CXX], [LT_LANG(CXX)], [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) AC_PROVIDE_IFELSE([AC_PROG_F77], [LT_LANG(F77)], [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) AC_PROVIDE_IFELSE([AC_PROG_FC], [LT_LANG(FC)], [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal dnl pulling things in needlessly. AC_PROVIDE_IFELSE([AC_PROG_GCJ], [LT_LANG(GCJ)], [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], [LT_LANG(GCJ)], [AC_PROVIDE_IFELSE([LT_PROG_GCJ], [LT_LANG(GCJ)], [m4_ifdef([AC_PROG_GCJ], [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) m4_ifdef([A][M_PROG_GCJ], [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) m4_ifdef([LT_PROG_GCJ], [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) AC_PROVIDE_IFELSE([AC_PROG_GO], [LT_LANG(GO)], [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) AC_PROVIDE_IFELSE([LT_PROG_RC], [LT_LANG(RC)], [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) ])# _LT_LANG_DEFAULT_CONFIG # Obsolete macros: AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_CXX], []) dnl AC_DEFUN([AC_LIBTOOL_F77], []) dnl AC_DEFUN([AC_LIBTOOL_FC], []) dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) dnl AC_DEFUN([AC_LIBTOOL_RC], []) # _LT_TAG_COMPILER # ---------------- m4_defun([_LT_TAG_COMPILER], [AC_REQUIRE([AC_PROG_CC])dnl _LT_DECL([LTCC], [CC], [1], [A C compiler])dnl _LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl _LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl _LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC ])# _LT_TAG_COMPILER # _LT_COMPILER_BOILERPLATE # ------------------------ # Check for compiler boilerplate output or warnings with # the simple compiler test code. m4_defun([_LT_COMPILER_BOILERPLATE], [m4_require([_LT_DECL_SED])dnl ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ])# _LT_COMPILER_BOILERPLATE # _LT_LINKER_BOILERPLATE # ---------------------- # Check for linker boilerplate output or warnings with # the simple link test code. m4_defun([_LT_LINKER_BOILERPLATE], [m4_require([_LT_DECL_SED])dnl ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* ])# _LT_LINKER_BOILERPLATE # _LT_REQUIRED_DARWIN_CHECKS # ------------------------- m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ case $host_os in rhapsody* | darwin*) AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) AC_CHECK_TOOL([LIPO], [lipo], [:]) AC_CHECK_TOOL([OTOOL], [otool], [:]) AC_CHECK_TOOL([OTOOL64], [otool64], [:]) _LT_DECL([], [DSYMUTIL], [1], [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) _LT_DECL([], [NMEDIT], [1], [Tool to change global to local symbols on Mac OS X]) _LT_DECL([], [LIPO], [1], [Tool to manipulate fat objects and archives on Mac OS X]) _LT_DECL([], [OTOOL], [1], [ldd/readelf like tool for Mach-O binaries on Mac OS X]) _LT_DECL([], [OTOOL64], [1], [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], [lt_cv_apple_cc_single_mod=no if test -z "$LT_MULTI_MODULE"; then # By default we will add the -single_module flag. You can override # by either setting the environment variable LT_MULTI_MODULE # non-empty at configure time, or by adding -multi_module to the # link flags. rm -rf libconftest.dylib* echo "int foo(void){return 1;}" > conftest.c echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c 2>conftest.err _lt_result=$? # If there is a non-empty error log, and "single_module" # appears in it, assume the flag caused a linker warning if test -s conftest.err && $GREP single_module conftest.err; then cat conftest.err >&AS_MESSAGE_LOG_FD # Otherwise, if the output was created with a 0 exit code from # the compiler, it worked. elif test -f libconftest.dylib && test 0 = "$_lt_result"; then lt_cv_apple_cc_single_mod=yes else cat conftest.err >&AS_MESSAGE_LOG_FD fi rm -rf libconftest.dylib* rm -f conftest.* fi]) AC_CACHE_CHECK([for -exported_symbols_list linker flag], [lt_cv_ld_exported_symbols_list], [lt_cv_ld_exported_symbols_list=no save_LDFLAGS=$LDFLAGS echo "_main" > conftest.sym LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], [lt_cv_ld_exported_symbols_list=yes], [lt_cv_ld_exported_symbols_list=no]) LDFLAGS=$save_LDFLAGS ]) AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], [lt_cv_ld_force_load=no cat > conftest.c << _LT_EOF int forced_loaded() { return 2;} _LT_EOF echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD echo "$AR $AR_FLAGS libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD $AR $AR_FLAGS libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD cat > conftest.c << _LT_EOF int main() { return 0;} _LT_EOF echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err _lt_result=$? if test -s conftest.err && $GREP force_load conftest.err; then cat conftest.err >&AS_MESSAGE_LOG_FD elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then lt_cv_ld_force_load=yes else cat conftest.err >&AS_MESSAGE_LOG_FD fi rm -f conftest.err libconftest.a conftest conftest.c rm -rf conftest.dSYM ]) case $host_os in rhapsody* | darwin1.[[012]]) _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; darwin1.*) _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; darwin*) case $MACOSX_DEPLOYMENT_TARGET,$host in 10.[[012]],*|,*powerpc*-darwin[[5-8]]*) _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; *) _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; esac ;; esac if test yes = "$lt_cv_apple_cc_single_mod"; then _lt_dar_single_mod='$single_module' fi if test yes = "$lt_cv_ld_exported_symbols_list"; then _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' else _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' fi if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then _lt_dsymutil='~$DSYMUTIL $lib || :' else _lt_dsymutil= fi ;; esac ]) # _LT_DARWIN_LINKER_FEATURES([TAG]) # --------------------------------- # Checks for linker and compiler features on darwin m4_defun([_LT_DARWIN_LINKER_FEATURES], [ m4_require([_LT_REQUIRED_DARWIN_CHECKS]) _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_automatic, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported if test yes = "$lt_cv_ld_force_load"; then _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) else _LT_TAGVAR(whole_archive_flag_spec, $1)='' fi _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(allow_undefined_flag, $1)=$_lt_dar_allow_undefined case $cc_basename in ifort*|nagfor*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test yes = "$_lt_dar_can_shared"; then output_verbose_link_cmd=func_echo_all _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" _LT_TAGVAR(module_expsym_cmds, $1)="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" m4_if([$1], [CXX], [ if test yes != "$lt_cv_apple_cc_single_mod"; then _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" fi ],[]) else _LT_TAGVAR(ld_shlibs, $1)=no fi ]) # _LT_SYS_MODULE_PATH_AIX([TAGNAME]) # ---------------------------------- # Links a minimal program and checks the executable # for the system default hardcoded library path. In most cases, # this is /usr/lib:/lib, but when the MPI compilers are used # the location of the communication and MPI libs are included too. # If we don't find anything, use the default library path according # to the aix ld manual. # Store the results from the different compilers for each TAGNAME. # Allow to override them for all tags through lt_cv_aix_libpath. m4_defun([_LT_SYS_MODULE_PATH_AIX], [m4_require([_LT_DECL_SED])dnl if test set = "${lt_cv_aix_libpath+set}"; then aix_libpath=$lt_cv_aix_libpath else AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ lt_aix_libpath_sed='[ /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }]' _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi],[]) if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=/usr/lib:/lib fi ]) aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) fi ])# _LT_SYS_MODULE_PATH_AIX # _LT_SHELL_INIT(ARG) # ------------------- m4_define([_LT_SHELL_INIT], [m4_divert_text([M4SH-INIT], [$1 ])])# _LT_SHELL_INIT # _LT_PROG_ECHO_BACKSLASH # ----------------------- # Find how we can fake an echo command that does not interpret backslash. # In particular, with Autoconf 2.60 or later we add some code to the start # of the generated configure script that will find a shell with a builtin # printf (that we can use as an echo command). m4_defun([_LT_PROG_ECHO_BACKSLASH], [ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO AC_MSG_CHECKING([how to print strings]) # Test print first, because it will be a builtin if present. if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='print -r --' elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='printf %s\n' else # Use this function as a fallback that always works. func_fallback_echo () { eval 'cat <<_LTECHO_EOF $[]1 _LTECHO_EOF' } ECHO='func_fallback_echo' fi # func_echo_all arg... # Invoke $ECHO with all args, space-separated. func_echo_all () { $ECHO "$*" } case $ECHO in printf*) AC_MSG_RESULT([printf]) ;; print*) AC_MSG_RESULT([print -r]) ;; *) AC_MSG_RESULT([cat]) ;; esac m4_ifdef([_AS_DETECT_SUGGESTED], [_AS_DETECT_SUGGESTED([ test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO PATH=/empty FPATH=/empty; export PATH FPATH test "X`printf %s $ECHO`" = "X$ECHO" \ || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) _LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) _LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) ])# _LT_PROG_ECHO_BACKSLASH # _LT_WITH_SYSROOT # ---------------- AC_DEFUN([_LT_WITH_SYSROOT], [m4_require([_LT_DECL_SED])dnl AC_MSG_CHECKING([for sysroot]) AC_ARG_WITH([sysroot], [AS_HELP_STRING([--with-sysroot@<:@=DIR@:>@], [Search for dependent libraries within DIR (or the compiler's sysroot if not specified).])], [], [with_sysroot=no]) dnl lt_sysroot will always be passed unquoted. We quote it here dnl in case the user passed a directory name. lt_sysroot= case $with_sysroot in #( yes) if test yes = "$GCC"; then lt_sysroot=`$CC --print-sysroot 2>/dev/null` fi ;; #( /*) lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` ;; #( no|'') ;; #( *) AC_MSG_RESULT([$with_sysroot]) AC_MSG_ERROR([The sysroot must be an absolute path.]) ;; esac AC_MSG_RESULT([${lt_sysroot:-no}]) _LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl [dependent libraries, and where our libraries should be installed.])]) # _LT_ENABLE_LOCK # --------------- m4_defun([_LT_ENABLE_LOCK], [AC_ARG_ENABLE([libtool-lock], [AS_HELP_STRING([--disable-libtool-lock], [avoid locking (might break parallel builds)])]) test no = "$enable_libtool_lock" || enable_libtool_lock=yes # Some flags need to be propagated to the compiler or linker for good # libtool support. case $host in ia64-*-hpux*) # Find out what ABI is being produced by ac_compile, and set mode # options accordingly. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `$FILECMD conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE=32 ;; *ELF-64*) HPUX_IA64_MODE=64 ;; esac fi rm -rf conftest* ;; *-*-irix6*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then if test yes = "$lt_cv_prog_gnu_ld"; then case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; *N32*) LD="${LD-ld} -melf32bmipn32" ;; *64-bit*) LD="${LD-ld} -melf64bmip" ;; esac else case `$FILECMD conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; *N32*) LD="${LD-ld} -n32" ;; *64-bit*) LD="${LD-ld} -64" ;; esac fi fi rm -rf conftest* ;; mips64*-*linux*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then emul=elf case `$FILECMD conftest.$ac_objext` in *32-bit*) emul="${emul}32" ;; *64-bit*) emul="${emul}64" ;; esac case `$FILECMD conftest.$ac_objext` in *MSB*) emul="${emul}btsmip" ;; *LSB*) emul="${emul}ltsmip" ;; esac case `$FILECMD conftest.$ac_objext` in *N32*) emul="${emul}n32" ;; esac LD="${LD-ld} -m $emul" fi rm -rf conftest* ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. Note that the listed cases only cover the # situations where additional linker options are needed (such as when # doing 32-bit compilation for a host where ld defaults to 64-bit, or # vice versa); the common cases where no linker options are needed do # not appear in the list. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `$FILECMD conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) case `$FILECMD conftest.o` in *x86-64*) LD="${LD-ld} -m elf32_x86_64" ;; *) LD="${LD-ld} -m elf_i386" ;; esac ;; powerpc64le-*linux*) LD="${LD-ld} -m elf32lppclinux" ;; powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" ;; s390x-*linux*) LD="${LD-ld} -m elf_s390" ;; sparc64-*linux*) LD="${LD-ld} -m elf32_sparc" ;; esac ;; *64-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; powerpcle-*linux*) LD="${LD-ld} -m elf64lppc" ;; powerpc-*linux*) LD="${LD-ld} -m elf64ppc" ;; s390*-*linux*|s390*-*tpf*) LD="${LD-ld} -m elf64_s390" ;; sparc*-*linux*) LD="${LD-ld} -m elf64_sparc" ;; esac ;; esac fi rm -rf conftest* ;; *-*-sco3.2v5*) # On SCO OpenServer 5, we need -belf to get full-featured binaries. SAVE_CFLAGS=$CFLAGS CFLAGS="$CFLAGS -belf" AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, [AC_LANG_PUSH(C) AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) AC_LANG_POP]) if test yes != "$lt_cv_cc_needs_belf"; then # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf CFLAGS=$SAVE_CFLAGS fi ;; *-*solaris*) # Find out what ABI is being produced by ac_compile, and set linker # options accordingly. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `$FILECMD conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) case $host in i?86-*-solaris*|x86_64-*-solaris*) LD="${LD-ld} -m elf_x86_64" ;; sparc*-*-solaris*) LD="${LD-ld} -m elf64_sparc" ;; esac # GNU ld 2.21 introduced _sol2 emulations. Use them if available. if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then LD=${LD-ld}_sol2 fi ;; *) if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then LD="${LD-ld} -64" fi ;; esac ;; esac fi rm -rf conftest* ;; esac need_locks=$enable_libtool_lock ])# _LT_ENABLE_LOCK # _LT_PROG_AR # ----------- m4_defun([_LT_PROG_AR], [AC_CHECK_TOOLS(AR, [ar], false) : ${AR=ar} _LT_DECL([], [AR], [1], [The archiver]) # Use ARFLAGS variable as AR's operation code to sync the variable naming with # Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have # higher priority because thats what people were doing historically (setting # ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS # variable obsoleted/removed. test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} lt_ar_flags=$AR_FLAGS _LT_DECL([], [lt_ar_flags], [0], [Flags to create an archive (by configure)]) # Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override # by AR_FLAGS because that was never working and AR_FLAGS is about to die. _LT_DECL([], [AR_FLAGS], [\@S|@{ARFLAGS-"\@S|@lt_ar_flags"}], [Flags to create an archive]) AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], [lt_cv_ar_at_file=no AC_COMPILE_IFELSE([AC_LANG_PROGRAM], [echo conftest.$ac_objext > conftest.lst lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' AC_TRY_EVAL([lt_ar_try]) if test 0 -eq "$ac_status"; then # Ensure the archiver fails upon bogus file names. rm -f conftest.$ac_objext libconftest.a AC_TRY_EVAL([lt_ar_try]) if test 0 -ne "$ac_status"; then lt_cv_ar_at_file=@ fi fi rm -f conftest.* libconftest.a ]) ]) if test no = "$lt_cv_ar_at_file"; then archiver_list_spec= else archiver_list_spec=$lt_cv_ar_at_file fi _LT_DECL([], [archiver_list_spec], [1], [How to feed a file listing to the archiver]) ])# _LT_PROG_AR # _LT_CMD_OLD_ARCHIVE # ------------------- m4_defun([_LT_CMD_OLD_ARCHIVE], [_LT_PROG_AR AC_CHECK_TOOL(STRIP, strip, :) test -z "$STRIP" && STRIP=: _LT_DECL([], [STRIP], [1], [A symbol stripping program]) AC_CHECK_TOOL(RANLIB, ranlib, :) test -z "$RANLIB" && RANLIB=: _LT_DECL([], [RANLIB], [1], [Commands used to install an old-style archive]) # Determine commands to create old-style static archives. old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then case $host_os in bitrig* | openbsd*) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" ;; *) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" ;; esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" fi case $host_os in darwin*) lock_old_archive_extraction=yes ;; *) lock_old_archive_extraction=no ;; esac _LT_DECL([], [old_postinstall_cmds], [2]) _LT_DECL([], [old_postuninstall_cmds], [2]) _LT_TAGDECL([], [old_archive_cmds], [2], [Commands used to build an old-style archive]) _LT_DECL([], [lock_old_archive_extraction], [0], [Whether to use a lock for old archive extraction]) ])# _LT_CMD_OLD_ARCHIVE # _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, # [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) # ---------------------------------------------------------------- # Check whether the given compiler option works AC_DEFUN([_LT_COMPILER_OPTION], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_SED])dnl AC_CACHE_CHECK([$1], [$2], [$2=no m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$3" ## exclude from sc_useless_quotes_in_assignment # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&AS_MESSAGE_LOG_FD echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then $2=yes fi fi $RM conftest* ]) if test yes = "[$]$2"; then m4_if([$5], , :, [$5]) else m4_if([$6], , :, [$6]) fi ])# _LT_COMPILER_OPTION # Old name: AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) # _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, # [ACTION-SUCCESS], [ACTION-FAILURE]) # ---------------------------------------------------- # Check whether the given linker option works AC_DEFUN([_LT_LINKER_OPTION], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_SED])dnl AC_CACHE_CHECK([$1], [$2], [$2=no save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS $3" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&AS_MESSAGE_LOG_FD $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then $2=yes fi else $2=yes fi fi $RM -r conftest* LDFLAGS=$save_LDFLAGS ]) if test yes = "[$]$2"; then m4_if([$4], , :, [$4]) else m4_if([$5], , :, [$5]) fi ])# _LT_LINKER_OPTION # Old name: AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) # LT_CMD_MAX_LEN #--------------- AC_DEFUN([LT_CMD_MAX_LEN], [AC_REQUIRE([AC_CANONICAL_HOST])dnl # find the maximum length of command line arguments AC_MSG_CHECKING([the maximum length of command line arguments]) AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl i=0 teststring=ABCD case $build_os in msdosdjgpp*) # On DJGPP, this test can blow up pretty badly due to problems in libc # (any single argument exceeding 2000 bytes causes a buffer overrun # during glob expansion). Even if it were fixed, the result of this # check would be larger than it should be. lt_cv_sys_max_cmd_len=12288; # 12K is about right ;; gnu*) # Under GNU Hurd, this test is not required because there is # no limit to the length of command line arguments. # Libtool will interpret -1 as no limit whatsoever lt_cv_sys_max_cmd_len=-1; ;; cygwin* | mingw* | cegcc*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, # you end up with a "frozen" computer, even though with patience # the test eventually succeeds (with a max line length of 256k). # Instead, let's just punt: use the minimum linelength reported by # all of the supported platforms: 8192 (on NT/2K/XP). lt_cv_sys_max_cmd_len=8192; ;; mint*) # On MiNT this can take a long time and run out of memory. lt_cv_sys_max_cmd_len=8192; ;; amigaos*) # On AmigaOS with pdksh, this test takes hours, literally. # So we just punt and use a minimum line length of 8192. lt_cv_sys_max_cmd_len=8192; ;; bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` elif test -x /usr/sbin/sysctl; then lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` else lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs fi # And add a safety zone lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` ;; interix*) # We know the value 262144 and hardcode it with a safety zone (like BSD) lt_cv_sys_max_cmd_len=196608 ;; os2*) # The test takes a long time on OS/2. lt_cv_sys_max_cmd_len=8192 ;; osf*) # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not # nice to cause kernel panics so lets avoid the loop below. # First set a reasonable default. lt_cv_sys_max_cmd_len=16384 # if test -x /sbin/sysconfig; then case `/sbin/sysconfig -q proc exec_disable_arg_limit` in *1*) lt_cv_sys_max_cmd_len=-1 ;; esac fi ;; sco3.2v5*) lt_cv_sys_max_cmd_len=102400 ;; sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[[ ]]//'` else lt_cv_sys_max_cmd_len=32768 fi ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` if test -n "$lt_cv_sys_max_cmd_len" && \ test undefined != "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else # Make teststring a little bigger before we do anything with it. # a 1K string should be a reasonable start. for i in 1 2 3 4 5 6 7 8; do teststring=$teststring$teststring done SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} # If test is not a shell built-in, we'll probably end up computing a # maximum length that is only half of the actual maximum length, but # we can't tell. while { test X`env echo "$teststring$teststring" 2>/dev/null` \ = "X$teststring$teststring"; } >/dev/null 2>&1 && test 17 != "$i" # 1/2 MB should be enough do i=`expr $i + 1` teststring=$teststring$teststring done # Only check the string length outside the loop. lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` teststring= # Add a significant safety factor because C++ compilers can tack on # massive amounts of additional arguments before passing them to the # linker. It appears as though 1/2 is a usable value. lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` fi ;; esac ]) if test -n "$lt_cv_sys_max_cmd_len"; then AC_MSG_RESULT($lt_cv_sys_max_cmd_len) else AC_MSG_RESULT(none) fi max_cmd_len=$lt_cv_sys_max_cmd_len _LT_DECL([], [max_cmd_len], [0], [What is the maximum length of a command?]) ])# LT_CMD_MAX_LEN # Old name: AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) # _LT_HEADER_DLFCN # ---------------- m4_defun([_LT_HEADER_DLFCN], [AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl ])# _LT_HEADER_DLFCN # _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, # ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) # ---------------------------------------------------------------- m4_defun([_LT_TRY_DLOPEN_SELF], [m4_require([_LT_HEADER_DLFCN])dnl if test yes = "$cross_compiling"; then : [$4] else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF [#line $LINENO "configure" #include "confdefs.h" #if HAVE_DLFCN_H #include #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif /* When -fvisibility=hidden is used, assume the code has been annotated correspondingly for the symbols needed. */ #if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) int fnord () __attribute__((visibility("default"))); #endif int fnord () { return 42; } int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else { if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; else puts (dlerror ()); } /* dlclose (self); */ } else puts (dlerror ()); return status; }] _LT_EOF if AC_TRY_EVAL(ac_link) && test -s "conftest$ac_exeext" 2>/dev/null; then (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) $1 ;; x$lt_dlneed_uscore) $2 ;; x$lt_dlunknown|x*) $3 ;; esac else : # compilation failed $3 fi fi rm -fr conftest* ])# _LT_TRY_DLOPEN_SELF # LT_SYS_DLOPEN_SELF # ------------------ AC_DEFUN([LT_SYS_DLOPEN_SELF], [m4_require([_LT_HEADER_DLFCN])dnl if test yes != "$enable_dlopen"; then enable_dlopen=unknown enable_dlopen_self=unknown enable_dlopen_self_static=unknown else lt_cv_dlopen=no lt_cv_dlopen_libs= case $host_os in beos*) lt_cv_dlopen=load_add_on lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ;; mingw* | pw32* | cegcc*) lt_cv_dlopen=LoadLibrary lt_cv_dlopen_libs= ;; cygwin*) lt_cv_dlopen=dlopen lt_cv_dlopen_libs= ;; darwin*) # if libdl is installed we need to link against it AC_CHECK_LIB([dl], [dlopen], [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl],[ lt_cv_dlopen=dyld lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ]) ;; tpf*) # Don't try to run any link tests for TPF. We know it's impossible # because TPF is a cross-compiler, and we know how we open DSOs. lt_cv_dlopen=dlopen lt_cv_dlopen_libs= lt_cv_dlopen_self=no ;; *) AC_CHECK_FUNC([shl_load], [lt_cv_dlopen=shl_load], [AC_CHECK_LIB([dld], [shl_load], [lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld], [AC_CHECK_FUNC([dlopen], [lt_cv_dlopen=dlopen], [AC_CHECK_LIB([dl], [dlopen], [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl], [AC_CHECK_LIB([svld], [dlopen], [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld], [AC_CHECK_LIB([dld], [dld_link], [lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld]) ]) ]) ]) ]) ]) ;; esac if test no = "$lt_cv_dlopen"; then enable_dlopen=no else enable_dlopen=yes fi case $lt_cv_dlopen in dlopen) save_CPPFLAGS=$CPPFLAGS test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" save_LDFLAGS=$LDFLAGS wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" save_LIBS=$LIBS LIBS="$lt_cv_dlopen_libs $LIBS" AC_CACHE_CHECK([whether a program can dlopen itself], lt_cv_dlopen_self, [dnl _LT_TRY_DLOPEN_SELF( lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) ]) if test yes = "$lt_cv_dlopen_self"; then wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" AC_CACHE_CHECK([whether a statically linked program can dlopen itself], lt_cv_dlopen_self_static, [dnl _LT_TRY_DLOPEN_SELF( lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) ]) fi CPPFLAGS=$save_CPPFLAGS LDFLAGS=$save_LDFLAGS LIBS=$save_LIBS ;; esac case $lt_cv_dlopen_self in yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; *) enable_dlopen_self=unknown ;; esac case $lt_cv_dlopen_self_static in yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; *) enable_dlopen_self_static=unknown ;; esac fi _LT_DECL([dlopen_support], [enable_dlopen], [0], [Whether dlopen is supported]) _LT_DECL([dlopen_self], [enable_dlopen_self], [0], [Whether dlopen of programs is supported]) _LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], [Whether dlopen of statically linked programs is supported]) ])# LT_SYS_DLOPEN_SELF # Old name: AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) # _LT_COMPILER_C_O([TAGNAME]) # --------------------------- # Check to see if options -c and -o are simultaneously supported by compiler. # This macro does not hard code the compiler like AC_PROG_CC_C_O. m4_defun([_LT_COMPILER_C_O], [m4_require([_LT_DECL_SED])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_TAG_COMPILER])dnl AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&AS_MESSAGE_LOG_FD echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes fi fi chmod u+w . 2>&AS_MESSAGE_LOG_FD $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* ]) _LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], [Does compiler simultaneously support -c and -o options?]) ])# _LT_COMPILER_C_O # _LT_COMPILER_FILE_LOCKS([TAGNAME]) # ---------------------------------- # Check to see if we can do hard links to lock some files if needed m4_defun([_LT_COMPILER_FILE_LOCKS], [m4_require([_LT_ENABLE_LOCK])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl _LT_COMPILER_C_O([$1]) hard_links=nottested if test no = "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" && test no != "$need_locks"; then # do not overwrite the value of need_locks provided by the user AC_MSG_CHECKING([if we can lock with hard links]) hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no AC_MSG_RESULT([$hard_links]) if test no = "$hard_links"; then AC_MSG_WARN(['$CC' does not support '-c -o', so 'make -j' may be unsafe]) need_locks=warn fi else need_locks=no fi _LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) ])# _LT_COMPILER_FILE_LOCKS # _LT_CHECK_OBJDIR # ---------------- m4_defun([_LT_CHECK_OBJDIR], [AC_CACHE_CHECK([for objdir], [lt_cv_objdir], [rm -f .libs 2>/dev/null mkdir .libs 2>/dev/null if test -d .libs; then lt_cv_objdir=.libs else # MS-DOS does not allow filenames that begin with a dot. lt_cv_objdir=_libs fi rmdir .libs 2>/dev/null]) objdir=$lt_cv_objdir _LT_DECL([], [objdir], [0], [The name of the directory that contains temporary libtool files])dnl m4_pattern_allow([LT_OBJDIR])dnl AC_DEFINE_UNQUOTED([LT_OBJDIR], "$lt_cv_objdir/", [Define to the sub-directory where libtool stores uninstalled libraries.]) ])# _LT_CHECK_OBJDIR # _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) # -------------------------------------- # Check hardcoding attributes. m4_defun([_LT_LINKER_HARDCODE_LIBPATH], [AC_MSG_CHECKING([how to hardcode library paths into programs]) _LT_TAGVAR(hardcode_action, $1)= if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || test -n "$_LT_TAGVAR(runpath_var, $1)" || test yes = "$_LT_TAGVAR(hardcode_automatic, $1)"; then # We can hardcode non-existent directories. if test no != "$_LT_TAGVAR(hardcode_direct, $1)" && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" && test no != "$_LT_TAGVAR(hardcode_minus_L, $1)"; then # Linking always hardcodes the temporary library directory. _LT_TAGVAR(hardcode_action, $1)=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. _LT_TAGVAR(hardcode_action, $1)=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. _LT_TAGVAR(hardcode_action, $1)=unsupported fi AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) if test relink = "$_LT_TAGVAR(hardcode_action, $1)" || test yes = "$_LT_TAGVAR(inherit_rpath, $1)"; then # Fast installation is not supported enable_fast_install=no elif test yes = "$shlibpath_overrides_runpath" || test no = "$enable_shared"; then # Fast installation is not necessary enable_fast_install=needless fi _LT_TAGDECL([], [hardcode_action], [0], [How to hardcode a shared library path into an executable]) ])# _LT_LINKER_HARDCODE_LIBPATH # _LT_CMD_STRIPLIB # ---------------- m4_defun([_LT_CMD_STRIPLIB], [m4_require([_LT_DECL_EGREP]) striplib= old_striplib= AC_MSG_CHECKING([whether stripping libraries is possible]) if test -z "$STRIP"; then AC_MSG_RESULT([no]) else if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then old_striplib="$STRIP --strip-debug" striplib="$STRIP --strip-unneeded" AC_MSG_RESULT([yes]) else case $host_os in darwin*) # FIXME - insert some real tests, host_os isn't really good enough striplib="$STRIP -x" old_striplib="$STRIP -S" AC_MSG_RESULT([yes]) ;; freebsd*) if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then old_striplib="$STRIP --strip-debug" striplib="$STRIP --strip-unneeded" AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) fi ;; *) AC_MSG_RESULT([no]) ;; esac fi fi _LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) _LT_DECL([], [striplib], [1]) ])# _LT_CMD_STRIPLIB # _LT_PREPARE_MUNGE_PATH_LIST # --------------------------- # Make sure func_munge_path_list() is defined correctly. m4_defun([_LT_PREPARE_MUNGE_PATH_LIST], [[# func_munge_path_list VARIABLE PATH # ----------------------------------- # VARIABLE is name of variable containing _space_ separated list of # directories to be munged by the contents of PATH, which is string # having a format: # "DIR[:DIR]:" # string "DIR[ DIR]" will be prepended to VARIABLE # ":DIR[:DIR]" # string "DIR[ DIR]" will be appended to VARIABLE # "DIRP[:DIRP]::[DIRA:]DIRA" # string "DIRP[ DIRP]" will be prepended to VARIABLE and string # "DIRA[ DIRA]" will be appended to VARIABLE # "DIR[:DIR]" # VARIABLE will be replaced by "DIR[ DIR]" func_munge_path_list () { case x@S|@2 in x) ;; *:) eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'` \@S|@@S|@1\" ;; x:*) eval @S|@1=\"\@S|@@S|@1 `$ECHO @S|@2 | $SED 's/:/ /g'`\" ;; *::*) eval @S|@1=\"\@S|@@S|@1\ `$ECHO @S|@2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" eval @S|@1=\"`$ECHO @S|@2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \@S|@@S|@1\" ;; *) eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'`\" ;; esac } ]])# _LT_PREPARE_PATH_LIST # _LT_SYS_DYNAMIC_LINKER([TAG]) # ----------------------------- # PORTME Fill in your ld.so characteristics m4_defun([_LT_SYS_DYNAMIC_LINKER], [AC_REQUIRE([AC_CANONICAL_HOST])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_OBJDUMP])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_CHECK_SHELL_FEATURES])dnl m4_require([_LT_PREPARE_MUNGE_PATH_LIST])dnl AC_MSG_CHECKING([dynamic linker characteristics]) m4_if([$1], [], [ if test yes = "$GCC"; then case $host_os in darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; *) lt_awk_arg='/^libraries:/' ;; esac case $host_os in mingw* | cegcc*) lt_sed_strip_eq='s|=\([[A-Za-z]]:\)|\1|g' ;; *) lt_sed_strip_eq='s|=/|/|g' ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` case $lt_search_path_spec in *\;*) # if the path contains ";" then we assume it to be the separator # otherwise default to the standard path separator (i.e. ":") - it is # assumed that no part of a normal pathname contains ";" but that should # okay in the real world where ";" in dirpaths is itself problematic. lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` ;; *) lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` ;; esac # Ok, now we have the path, separated by spaces, we can step through it # and add multilib dir if necessary... lt_tmp_lt_search_path_spec= lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` # ...but if some path component already ends with the multilib dir we assume # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). case "$lt_multi_os_dir; $lt_search_path_spec " in "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) lt_multi_os_dir= ;; esac for lt_sys_path in $lt_search_path_spec; do if test -d "$lt_sys_path$lt_multi_os_dir"; then lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" elif test -n "$lt_multi_os_dir"; then test -d "$lt_sys_path" && \ lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" fi done lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' BEGIN {RS = " "; FS = "/|\n";} { lt_foo = ""; lt_count = 0; for (lt_i = NF; lt_i > 0; lt_i--) { if ($lt_i != "" && $lt_i != ".") { if ($lt_i == "..") { lt_count++; } else { if (lt_count == 0) { lt_foo = "/" $lt_i lt_foo; } else { lt_count--; } } } } if (lt_foo != "") { lt_freq[[lt_foo]]++; } if (lt_freq[[lt_foo]] == 1) { print lt_foo; } }'` # AWK program above erroneously prepends '/' to C:/dos/paths # for these hosts. case $host_os in mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ $SED 's|/\([[A-Za-z]]:\)|\1|g'` ;; esac sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` else sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" fi]) library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=.so postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown AC_ARG_VAR([LT_SYS_LIBRARY_PATH], [User-defined run-time library search path.]) case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='$libname$release$shared_ext$major' ;; aix[[4-9]]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test ia64 = "$host_cpu"; then # AIX 5 supports IA64 library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line '#! .'. This would cause the generated library to # depend on '.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[[01]] | aix4.[[01]].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # Using Import Files as archive members, it is possible to support # filename-based versioning of shared library archives on AIX. While # this would work for both with and without runtime linking, it will # prevent static linking of such archives. So we do filename-based # shared library versioning with .so extension only, which is used # when both runtime linking and shared linking is enabled. # Unfortunately, runtime linking may impact performance, so we do # not want this to be the default eventually. Also, we use the # versioned .so libs for executables only if there is the -brtl # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. # To allow for filename-based versioning support, we need to create # libNAME.so.V as an archive file, containing: # *) an Import File, referring to the versioned filename of the # archive as well as the shared archive member, telling the # bitwidth (32 or 64) of that shared object, and providing the # list of exported symbols of that shared object, eventually # decorated with the 'weak' keyword # *) the shared object with the F_LOADONLY flag set, to really avoid # it being seen by the linker. # At run time we better use the real file rather than another symlink, # but for link time we create the symlink libNAME.so -> libNAME.so.V case $with_aix_soname,$aix_use_runtimelinking in # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. aix,yes) # traditional libtool dynamic_linker='AIX unversionable lib.so' # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; aix,no) # traditional AIX only dynamic_linker='AIX lib.a[(]lib.so.V[)]' # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' ;; svr4,*) # full svr4 only dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)]" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,yes) # both, prefer svr4 dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)], lib.a[(]lib.so.V[)]" library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' # unpreferred sharedlib libNAME.a needs extra handling postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' # We do not specify a path in Import Files, so LIBPATH fires. shlibpath_overrides_runpath=yes ;; *,no) # both, prefer aix dynamic_linker="AIX lib.a[(]lib.so.V[)], lib.so.V[(]$shared_archive_member_spec.o[)]" library_names_spec='$libname$release.a $libname.a' soname_spec='$libname$release$shared_ext$major' # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' ;; esac shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='$libname$shared_ext' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[[45]]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' m4_if([$1], [],[ sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl* | *,icl*) # Native MSVC or ICC libname_spec='$name' soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' library_names_spec='$libname.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec=$LIB if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC and ICC wrapper library_names_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' soname_spec='$libname$release$major$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' m4_if([$1], [],[ sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly* | midnightbsd*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[[23]].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[[01]]* | freebsdelf3.[[01]]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=no sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' if test 32 = "$HPUX_IA64_MODE"; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" sys_lib_dlsearch_path_spec=/usr/lib/hpux32 else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" sys_lib_dlsearch_path_spec=/usr/lib/hpux64 fi ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[[3-9]]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test yes = "$lt_cv_prog_gnu_ld"; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; linux*android*) version_type=none # Android doesn't support versioned libraries. need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext' soname_spec='$libname$release$shared_ext' finish_cmds= shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes dynamic_linker='Android linker' # Don't embed -rpath directories since the linker doesn't support them. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], [lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], [lt_cv_shlibpath_overrides_runpath=yes])]) LDFLAGS=$save_LDFLAGS libdir=$save_libdir ]) shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Ideally, we could use ldconfig to report *all* directores which are # searched for libraries, however this is still not possible. Aside from not # being certain /sbin/ldconfig is available, command # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, # even though it is searched at run-time. Try to do the best guess by # appending ld.so.conf contents (and includes) to the search path. if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsdelf*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='NetBSD ld.elf_so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd* | bitrig*) version_type=sunos sys_lib_dlsearch_path_spec=/usr/lib need_lib_prefix=no if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then need_version=no else need_version=yes fi library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; os2*) libname_spec='$name' version_type=windows shrext_cmds=.dll need_version=no need_lib_prefix=no # OS/2 can only load a DLL with a base name of 8 characters or less. soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; v=$($ECHO $release$versuffix | tr -d .-); n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); $ECHO $n$v`$shared_ext' library_names_spec='${libname}_dll.$libext' dynamic_linker='OS/2 ld.exe' shlibpath_var=BEGINLIBPATH sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec postinstall_cmds='base_file=`basename \$file`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='$libname$release$shared_ext$major' library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test yes = "$with_gnu_ld"; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec; then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' soname_spec='$libname$shared_ext.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=sco need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test yes = "$with_gnu_ld"; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' soname_spec='$libname$release$shared_ext$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac AC_MSG_RESULT([$dynamic_linker]) test no = "$dynamic_linker" && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test yes = "$GCC"; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec fi if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec fi # remember unaugmented sys_lib_dlsearch_path content for libtool script decls... configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec # ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" # to be used as default LT_SYS_LIBRARY_PATH value in generated libtool configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH _LT_DECL([], [variables_saved_for_relink], [1], [Variables whose values should be saved in libtool wrapper scripts and restored at link time]) _LT_DECL([], [need_lib_prefix], [0], [Do we need the "lib" prefix for modules?]) _LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) _LT_DECL([], [version_type], [0], [Library versioning type]) _LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) _LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) _LT_DECL([], [shlibpath_overrides_runpath], [0], [Is shlibpath searched before the hard-coded library search path?]) _LT_DECL([], [libname_spec], [1], [Format of library name prefix]) _LT_DECL([], [library_names_spec], [1], [[List of archive names. First name is the real one, the rest are links. The last name is the one that the linker finds with -lNAME]]) _LT_DECL([], [soname_spec], [1], [[The coded name of the library, if different from the real name]]) _LT_DECL([], [install_override_mode], [1], [Permission mode override for installation of shared libraries]) _LT_DECL([], [postinstall_cmds], [2], [Command to use after installation of a shared archive]) _LT_DECL([], [postuninstall_cmds], [2], [Command to use after uninstallation of a shared archive]) _LT_DECL([], [finish_cmds], [2], [Commands used to finish a libtool library installation in a directory]) _LT_DECL([], [finish_eval], [1], [[As "finish_cmds", except a single script fragment to be evaled but not shown]]) _LT_DECL([], [hardcode_into_libs], [0], [Whether we should hardcode library paths into libraries]) _LT_DECL([], [sys_lib_search_path_spec], [2], [Compile-time system search path for libraries]) _LT_DECL([sys_lib_dlsearch_path_spec], [configure_time_dlsearch_path], [2], [Detected run-time system search path for libraries]) _LT_DECL([], [configure_time_lt_sys_library_path], [2], [Explicit LT_SYS_LIBRARY_PATH set during ./configure time]) ])# _LT_SYS_DYNAMIC_LINKER # _LT_PATH_TOOL_PREFIX(TOOL) # -------------------------- # find a file program that can recognize shared library AC_DEFUN([_LT_PATH_TOOL_PREFIX], [m4_require([_LT_DECL_EGREP])dnl AC_MSG_CHECKING([for $1]) AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, [case $MAGIC_CMD in [[\\/*] | ?:[\\/]*]) lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD=$MAGIC_CMD lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR dnl $ac_dummy forces splitting on constant user-supplied paths. dnl POSIX.2 word splitting is done only on the output of word expansions, dnl not every word. This closes a longstanding sh security hole. ac_dummy="m4_if([$2], , $PATH, [$2])" for ac_dir in $ac_dummy; do IFS=$lt_save_ifs test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$1"; then lt_cv_path_MAGIC_CMD=$ac_dir/"$1" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD=$lt_cv_path_MAGIC_CMD if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <<_LT_EOF 1>&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org _LT_EOF fi ;; esac fi break fi done IFS=$lt_save_ifs MAGIC_CMD=$lt_save_MAGIC_CMD ;; esac]) MAGIC_CMD=$lt_cv_path_MAGIC_CMD if test -n "$MAGIC_CMD"; then AC_MSG_RESULT($MAGIC_CMD) else AC_MSG_RESULT(no) fi _LT_DECL([], [MAGIC_CMD], [0], [Used to examine libraries when file_magic_cmd begins with "file"])dnl ])# _LT_PATH_TOOL_PREFIX # Old name: AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) # _LT_PATH_MAGIC # -------------- # find a file program that can recognize a shared library m4_defun([_LT_PATH_MAGIC], [_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) if test -z "$lt_cv_path_MAGIC_CMD"; then if test -n "$ac_tool_prefix"; then _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) else MAGIC_CMD=: fi fi ])# _LT_PATH_MAGIC # LT_PATH_LD # ---------- # find the pathname to the GNU or non-GNU linker AC_DEFUN([LT_PATH_LD], [AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_PROG_ECHO_BACKSLASH])dnl AC_ARG_WITH([gnu-ld], [AS_HELP_STRING([--with-gnu-ld], [assume the C compiler uses GNU ld @<:@default=no@:>@])], [test no = "$withval" || with_gnu_ld=yes], [with_gnu_ld=no])dnl ac_prog=ld if test yes = "$GCC"; then # Check if gcc -print-prog-name=ld gives a path. AC_MSG_CHECKING([for ld used by $CC]) case $host in *-*-mingw*) # gcc leaves a trailing carriage return, which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [[\\/]]* | ?:[[\\/]]*) re_direlt='/[[^/]][[^/]]*/\.\./' # Canonicalize the pathname of ld ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD=$ac_prog ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test yes = "$with_gnu_ld"; then AC_MSG_CHECKING([for GNU ld]) else AC_MSG_CHECKING([for non-GNU ld]) fi AC_CACHE_VAL(lt_cv_path_LD, [if test -z "$LD"; then lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS=$lt_save_ifs test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD=$ac_dir/$ac_prog # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &1 conftest.i cat conftest.i conftest.i >conftest2.i : ${lt_DD:=$DD} AC_PATH_PROGS_FEATURE_CHECK([lt_DD], [dd], [if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then cmp -s conftest.i conftest.out \ && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: fi]) rm -f conftest.i conftest2.i conftest.out]) ])# _LT_PATH_DD # _LT_CMD_TRUNCATE # ---------------- # find command to truncate a binary pipe m4_defun([_LT_CMD_TRUNCATE], [m4_require([_LT_PATH_DD]) AC_CACHE_CHECK([how to truncate binary pipes], [lt_cv_truncate_bin], [printf 0123456789abcdef0123456789abcdef >conftest.i cat conftest.i conftest.i >conftest2.i lt_cv_truncate_bin= if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then cmp -s conftest.i conftest.out \ && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" fi rm -f conftest.i conftest2.i conftest.out test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q"]) _LT_DECL([lt_truncate_bin], [lt_cv_truncate_bin], [1], [Command to truncate a binary pipe]) ])# _LT_CMD_TRUNCATE # _LT_CHECK_MAGIC_METHOD # ---------------------- # how to check for library dependencies # -- PORTME fill in with the dynamic library characteristics m4_defun([_LT_CHECK_MAGIC_METHOD], [m4_require([_LT_DECL_EGREP]) m4_require([_LT_DECL_OBJDUMP]) AC_CACHE_CHECK([how to recognize dependent libraries], lt_cv_deplibs_check_method, [lt_cv_file_magic_cmd='$MAGIC_CMD' lt_cv_file_magic_test_file= lt_cv_deplibs_check_method='unknown' # Need to set the preceding variable on all platforms that support # interlibrary dependencies. # 'none' -- dependencies not supported. # 'unknown' -- same as none, but documents that we really don't know. # 'pass_all' -- all dependencies passed with no checks. # 'test_compile' -- check by making test program. # 'file_magic [[regex]]' -- check by looking for files in library path # that responds to the $file_magic_cmd with a given extended regex. # If you have 'file' or equivalent on your system and you're not sure # whether 'pass_all' will *always* work, you probably want this one. case $host_os in aix[[4-9]]*) lt_cv_deplibs_check_method=pass_all ;; beos*) lt_cv_deplibs_check_method=pass_all ;; bsdi[[45]]*) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' lt_cv_file_magic_cmd='$FILECMD -L' lt_cv_file_magic_test_file=/shlib/libc.so ;; cygwin*) # func_win32_libid is a shell function defined in ltmain.sh lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' ;; mingw* | pw32*) # Base MSYS/MinGW do not provide the 'file' command needed by # func_win32_libid shell function, so use a weaker test based on 'objdump', # unless we find 'file', for example because we are cross-compiling. if ( file / ) >/dev/null 2>&1; then lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' else # Keep this pattern in sync with the one in func_win32_libid. lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' lt_cv_file_magic_cmd='$OBJDUMP -f' fi ;; cegcc*) # use the weaker test based on 'objdump'. See mingw*. lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' lt_cv_file_magic_cmd='$OBJDUMP -f' ;; darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; freebsd* | dragonfly* | midnightbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' lt_cv_file_magic_cmd=$FILECMD lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac else lt_cv_deplibs_check_method=pass_all fi ;; haiku*) lt_cv_deplibs_check_method=pass_all ;; hpux10.20* | hpux11*) lt_cv_file_magic_cmd=$FILECMD case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so ;; hppa*64*) [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl ;; *) lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' lt_cv_file_magic_test_file=/usr/lib/libc.sl ;; esac ;; interix[[3-9]]*) # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' ;; irix5* | irix6* | nonstopux*) case $LD in *-32|*"-32 ") libmagic=32-bit;; *-n32|*"-n32 ") libmagic=N32;; *-64|*"-64 ") libmagic=64-bit;; *) libmagic=never-match;; esac lt_cv_deplibs_check_method=pass_all ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) lt_cv_deplibs_check_method=pass_all ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' fi ;; newos6*) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' lt_cv_file_magic_cmd=$FILECMD lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; *nto* | *qnx*) lt_cv_deplibs_check_method=pass_all ;; openbsd* | bitrig*) if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' fi ;; osf3* | osf4* | osf5*) lt_cv_deplibs_check_method=pass_all ;; rdos*) lt_cv_deplibs_check_method=pass_all ;; solaris*) lt_cv_deplibs_check_method=pass_all ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) lt_cv_deplibs_check_method=pass_all ;; sysv4 | sysv4.3*) case $host_vendor in motorola) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` ;; ncr) lt_cv_deplibs_check_method=pass_all ;; sequent) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' ;; sni) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" lt_cv_file_magic_test_file=/lib/libc.so ;; siemens) lt_cv_deplibs_check_method=pass_all ;; pc) lt_cv_deplibs_check_method=pass_all ;; esac ;; tpf*) lt_cv_deplibs_check_method=pass_all ;; os2*) lt_cv_deplibs_check_method=pass_all ;; esac ]) file_magic_glob= want_nocaseglob=no if test "$build" = "$host"; then case $host_os in mingw* | pw32*) if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then want_nocaseglob=yes else file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` fi ;; esac fi file_magic_cmd=$lt_cv_file_magic_cmd deplibs_check_method=$lt_cv_deplibs_check_method test -z "$deplibs_check_method" && deplibs_check_method=unknown _LT_DECL([], [deplibs_check_method], [1], [Method to check whether dependent libraries are shared objects]) _LT_DECL([], [file_magic_cmd], [1], [Command to use when deplibs_check_method = "file_magic"]) _LT_DECL([], [file_magic_glob], [1], [How to find potential files when deplibs_check_method = "file_magic"]) _LT_DECL([], [want_nocaseglob], [1], [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) ])# _LT_CHECK_MAGIC_METHOD # LT_PATH_NM # ---------- # find the pathname to a BSD- or MS-compatible name lister AC_DEFUN([LT_PATH_NM], [AC_REQUIRE([AC_PROG_CC])dnl AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, [if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM=$NM else lt_nm_to_check=${ac_tool_prefix}nm if test -n "$ac_tool_prefix" && test "$build" = "$host"; then lt_nm_to_check="$lt_nm_to_check nm" fi for lt_tmp_nm in $lt_nm_to_check; do lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do IFS=$lt_save_ifs test -z "$ac_dir" && ac_dir=. tmp_nm=$ac_dir/$lt_tmp_nm if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then # Check to see if the nm accepts a BSD-compat flag. # Adding the 'sed 1q' prevents false positives on HP-UX, which says: # nm: unknown option "B" ignored # Tru64's nm complains that /dev/null is an invalid object file # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty case $build_os in mingw*) lt_bad_file=conftest.nm/nofile ;; *) lt_bad_file=/dev/null ;; esac case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in *$lt_bad_file* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break 2 ;; *) case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break 2 ;; *) lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but continue # so that we can try to find one that supports BSD flags ;; esac ;; esac fi done IFS=$lt_save_ifs done : ${lt_cv_path_NM=no} fi]) if test no != "$lt_cv_path_NM"; then NM=$lt_cv_path_NM else # Didn't find any BSD compatible name lister, look for dumpbin. if test -n "$DUMPBIN"; then : # Let the user override the test. else AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in *COFF*) DUMPBIN="$DUMPBIN -symbols -headers" ;; *) DUMPBIN=: ;; esac fi AC_SUBST([DUMPBIN]) if test : != "$DUMPBIN"; then NM=$DUMPBIN fi fi test -z "$NM" && NM=nm AC_SUBST([NM]) _LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], [lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$ac_compile" 2>conftest.err) cat conftest.err >&AS_MESSAGE_LOG_FD (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) cat conftest.err >&AS_MESSAGE_LOG_FD (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) cat conftest.out >&AS_MESSAGE_LOG_FD if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" fi rm -f conftest*]) ])# LT_PATH_NM # Old names: AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AM_PROG_NM], []) dnl AC_DEFUN([AC_PROG_NM], []) # _LT_CHECK_SHAREDLIB_FROM_LINKLIB # -------------------------------- # how to determine the name of the shared library # associated with a specific link library. # -- PORTME fill in with the dynamic library characteristics m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], [m4_require([_LT_DECL_EGREP]) m4_require([_LT_DECL_OBJDUMP]) m4_require([_LT_DECL_DLLTOOL]) AC_CACHE_CHECK([how to associate runtime and link libraries], lt_cv_sharedlib_from_linklib_cmd, [lt_cv_sharedlib_from_linklib_cmd='unknown' case $host_os in cygwin* | mingw* | pw32* | cegcc*) # two different shell functions defined in ltmain.sh; # decide which one to use based on capabilities of $DLLTOOL case `$DLLTOOL --help 2>&1` in *--identify-strict*) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib ;; *) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback ;; esac ;; *) # fallback: assume linklib IS sharedlib lt_cv_sharedlib_from_linklib_cmd=$ECHO ;; esac ]) sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO _LT_DECL([], [sharedlib_from_linklib_cmd], [1], [Command to associate shared and link libraries]) ])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB # _LT_PATH_MANIFEST_TOOL # ---------------------- # locate the manifest tool m4_defun([_LT_PATH_MANIFEST_TOOL], [AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], [lt_cv_path_mainfest_tool=no echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out cat conftest.err >&AS_MESSAGE_LOG_FD if $GREP 'Manifest Tool' conftest.out > /dev/null; then lt_cv_path_mainfest_tool=yes fi rm -f conftest*]) if test yes != "$lt_cv_path_mainfest_tool"; then MANIFEST_TOOL=: fi _LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl ])# _LT_PATH_MANIFEST_TOOL # _LT_DLL_DEF_P([FILE]) # --------------------- # True iff FILE is a Windows DLL '.def' file. # Keep in sync with func_dll_def_p in the libtool script AC_DEFUN([_LT_DLL_DEF_P], [dnl test DEF = "`$SED -n dnl -e '\''s/^[[ ]]*//'\'' dnl Strip leading whitespace -e '\''/^\(;.*\)*$/d'\'' dnl Delete empty lines and comments -e '\''s/^\(EXPORTS\|LIBRARY\)\([[ ]].*\)*$/DEF/p'\'' dnl -e q dnl Only consider the first "real" line $1`" dnl ])# _LT_DLL_DEF_P # LT_LIB_M # -------- # check for math library AC_DEFUN([LT_LIB_M], [AC_REQUIRE([AC_CANONICAL_HOST])dnl LIBM= case $host in *-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) # These system don't have libm, or don't need it ;; *-ncr-sysv4.3*) AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM=-lmw) AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") ;; *) AC_CHECK_LIB(m, cos, LIBM=-lm) ;; esac AC_SUBST([LIBM]) ])# LT_LIB_M # Old name: AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_CHECK_LIBM], []) # _LT_COMPILER_NO_RTTI([TAGNAME]) # ------------------------------- m4_defun([_LT_COMPILER_NO_RTTI], [m4_require([_LT_TAG_COMPILER])dnl _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= if test yes = "$GCC"; then case $cc_basename in nvcc*) _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; *) _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; esac _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], lt_cv_prog_compiler_rtti_exceptions, [-fno-rtti -fno-exceptions], [], [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) fi _LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], [Compiler flag to turn off builtin functions]) ])# _LT_COMPILER_NO_RTTI # _LT_CMD_GLOBAL_SYMBOLS # ---------------------- m4_defun([_LT_CMD_GLOBAL_SYMBOLS], [AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([LT_PATH_NM])dnl AC_REQUIRE([LT_PATH_LD])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_TAG_COMPILER])dnl # Check for command to grab the raw symbol name followed by C symbol from nm. AC_MSG_CHECKING([command to parse $NM output from $compiler object]) AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], [ # These are sane defaults that work on at least a few old systems. # [They come from Ultrix. What could be older than Ultrix?!! ;)] # Character class describing NM global symbol codes. symcode='[[BCDEGRST]]' # Regexp to match symbols that can be accessed directly from C. sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' # Define system-specific variables. case $host_os in aix*) symcode='[[BCDT]]' ;; cygwin* | mingw* | pw32* | cegcc*) symcode='[[ABCDGISTW]]' ;; hpux*) if test ia64 = "$host_cpu"; then symcode='[[ABCDEGRST]]' fi ;; irix* | nonstopux*) symcode='[[BCDEGRST]]' ;; osf*) symcode='[[BCDEGQRST]]' ;; solaris*) symcode='[[BDRT]]' ;; sco3.2v5*) symcode='[[DT]]' ;; sysv4.2uw2*) symcode='[[DT]]' ;; sysv5* | sco5v6* | unixware* | OpenUNIX*) symcode='[[ABDT]]' ;; sysv4) symcode='[[DFNSTU]]' ;; esac # If we're using GNU nm, then use its standard symbol codes. case `$NM -V 2>&1` in *GNU* | *'with BFD'*) symcode='[[ABCDGIRSTW]]' ;; esac if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Gets list of data symbols to import. lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" # Adjust the below global symbol transforms to fixup imported variables. lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" lt_c_name_lib_hook="\ -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" else # Disable hooks by default. lt_cv_sys_global_symbol_to_import= lt_cdecl_hook= lt_c_name_hook= lt_c_name_lib_hook= fi # Transform an extracted symbol line into a proper C declaration. # Some systems (esp. on ia64) link data and code symbols differently, # so use this general approach. lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ $lt_cdecl_hook\ " -e 's/^T .* \(.*\)$/extern int \1();/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" # Transform an extracted symbol line into symbol name and symbol address lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ $lt_c_name_hook\ " -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" # Transform an extracted symbol line into symbol name with lib prefix and # symbol address. lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ $lt_c_name_lib_hook\ " -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ " -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ " -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" # Handle CRLF in mingw tool chain opt_cr= case $build_os in mingw*) opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac # Try without a prefix underscore, then with it. for ac_symprfx in "" "_"; do # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. symxfrm="\\1 $ac_symprfx\\2 \\2" # Write the raw and C identifiers. if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Fake it for dumpbin and say T for any non-static function, # D for any global variable and I for any imported variable. # Also find C++ and __fastcall symbols from MSVC++ or ICC, # which start with @ or ?. lt_cv_sys_global_symbol_pipe="$AWK ['"\ " {last_section=section; section=\$ 3};"\ " /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ " /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ " /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ " /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ " /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ " \$ 0!~/External *\|/{next};"\ " / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ " {if(hide[section]) next};"\ " {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ " {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ " s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ " s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ " ' prfx=^$ac_symprfx]" else lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" fi lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" # Check to see that the pipe works correctly. pipe_works=no rm -f conftest* cat > conftest.$ac_ext <<_LT_EOF #ifdef __cplusplus extern "C" { #endif char nm_test_var; void nm_test_func(void); void nm_test_func(void){} #ifdef __cplusplus } #endif int main(){nm_test_var='a';nm_test_func();return(0);} _LT_EOF if AC_TRY_EVAL(ac_compile); then # Now try to grab the symbols. nlist=conftest.nm $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&AS_MESSAGE_LOG_FD if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&AS_MESSAGE_LOG_FD && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" else rm -f "$nlist"T fi # Make sure that we snagged all the symbols we need. if $GREP ' nm_test_var$' "$nlist" >/dev/null; then if $GREP ' nm_test_func$' "$nlist" >/dev/null; then cat <<_LT_EOF > conftest.$ac_ext /* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ #if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE /* DATA imports from DLLs on WIN32 can't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs. */ # define LT@&t@_DLSYM_CONST #elif defined __osf__ /* This system does not cope well with relocations in const data. */ # define LT@&t@_DLSYM_CONST #else # define LT@&t@_DLSYM_CONST const #endif #ifdef __cplusplus extern "C" { #endif _LT_EOF # Now generate the symbol file. eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' cat <<_LT_EOF >> conftest.$ac_ext /* The mapping between symbol names and symbols. */ LT@&t@_DLSYM_CONST struct { const char *name; void *address; } lt__PROGRAM__LTX_preloaded_symbols[[]] = { { "@PROGRAM@", (void *) 0 }, _LT_EOF $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext cat <<\_LT_EOF >> conftest.$ac_ext {0, (void *) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt__PROGRAM__LTX_preloaded_symbols; } #endif #ifdef __cplusplus } #endif _LT_EOF # Now try linking the two files. mv conftest.$ac_objext conftstm.$ac_objext lt_globsym_save_LIBS=$LIBS lt_globsym_save_CFLAGS=$CFLAGS LIBS=conftstm.$ac_objext CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" if AC_TRY_EVAL(ac_link) && test -s conftest$ac_exeext; then pipe_works=yes fi LIBS=$lt_globsym_save_LIBS CFLAGS=$lt_globsym_save_CFLAGS else echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD fi else echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD fi else echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD fi else echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD cat conftest.$ac_ext >&5 fi rm -rf conftest* conftst* # Do not use the global_symbol_pipe unless it works. if test yes = "$pipe_works"; then break else lt_cv_sys_global_symbol_pipe= fi done ]) if test -z "$lt_cv_sys_global_symbol_pipe"; then lt_cv_sys_global_symbol_to_cdecl= fi if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then AC_MSG_RESULT(failed) else AC_MSG_RESULT(ok) fi # Response file support. if test "$lt_cv_nm_interface" = "MS dumpbin"; then nm_file_list_spec='@' elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then nm_file_list_spec='@' fi _LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], [Take the output of nm and produce a listing of raw symbols and C names]) _LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], [Transform the output of nm in a proper C declaration]) _LT_DECL([global_symbol_to_import], [lt_cv_sys_global_symbol_to_import], [1], [Transform the output of nm into a list of symbols to manually relocate]) _LT_DECL([global_symbol_to_c_name_address], [lt_cv_sys_global_symbol_to_c_name_address], [1], [Transform the output of nm in a C name address pair]) _LT_DECL([global_symbol_to_c_name_address_lib_prefix], [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], [Transform the output of nm in a C name address pair when lib prefix is needed]) _LT_DECL([nm_interface], [lt_cv_nm_interface], [1], [The name lister interface]) _LT_DECL([], [nm_file_list_spec], [1], [Specify filename containing input files for $NM]) ]) # _LT_CMD_GLOBAL_SYMBOLS # _LT_COMPILER_PIC([TAGNAME]) # --------------------------- m4_defun([_LT_COMPILER_PIC], [m4_require([_LT_TAG_COMPILER])dnl _LT_TAGVAR(lt_prog_compiler_wl, $1)= _LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_static, $1)= m4_if([$1], [CXX], [ # C++ specific cases for pic, static, wl, etc. if test yes = "$GXX"; then _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' case $host_os in aix*) # All AIX code is PIC. if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the '-m68020' flag to GCC prevents building anything better, # like '-m68040'. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) case $host_os in os2*) _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' ;; esac ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' ;; *djgpp*) # DJGPP does not support shared libraries at all _LT_TAGVAR(lt_prog_compiler_pic, $1)= ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. _LT_TAGVAR(lt_prog_compiler_static, $1)= ;; interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic fi ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac else case $host_os in aix[[4-9]]*) # All AIX code is PIC. if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' else _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' fi ;; chorus*) case $cc_basename in cxch68*) # Green Hills C++ Compiler # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" ;; esac ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; dgux*) case $cc_basename in ec++*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' ;; ghcx*) # Green Hills C++ Compiler _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; *) ;; esac ;; freebsd* | dragonfly* | midnightbsd*) # FreeBSD uses GNU C++ ;; hpux9* | hpux10* | hpux11*) case $cc_basename in CC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' if test ia64 != "$host_cpu"; then _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' fi ;; aCC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' ;; esac ;; *) ;; esac ;; interix*) # This is c89, which is MS Visual C++ (no shared libs) # Anyone wants to do a port? ;; irix5* | irix6* | nonstopux*) case $cc_basename in CC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' # CC pic flag -KPIC is the default. ;; *) ;; esac ;; linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in KCC*) # KAI C++ Compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; ecpc* ) # old Intel C++ for x86_64, which still supported -KPIC. _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; icpc* ) # Intel C++, used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; pgCC* | pgcpp*) # Portland Group C++ compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; cxx*) # Compaq C++ # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. _LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL 8.0, 9.0 on PPC and BlueGene _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' ;; *) case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; esac ;; esac ;; lynxos*) ;; m88k*) ;; mvs*) case $cc_basename in cxx*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' ;; *) ;; esac ;; netbsd* | netbsdelf*-gnu) ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' ;; RCC*) # Rational C++ 2.4.1 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; cxx*) # Digital/Compaq C++ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. _LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; *) ;; esac ;; psos*) ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; gcx*) # Green Hills C++ Compiler _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' ;; *) ;; esac ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; lcc*) # Lucid _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; *) ;; esac ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) case $cc_basename in CC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' ;; *) ;; esac ;; vxworks*) ;; *) _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; esac fi ], [ if test yes = "$GCC"; then _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' case $host_os in aix*) # All AIX code is PIC. if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the '-m68020' flag to GCC prevents building anything better, # like '-m68040'. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) case $host_os in os2*) _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' ;; esac ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. _LT_TAGVAR(lt_prog_compiler_static, $1)= ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) # +Z the default ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac ;; interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no enable_shared=no ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic fi ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac case $cc_basename in nvcc*) # Cuda Compiler Driver 2.2 _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" fi ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' if test ia64 = "$host_cpu"; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' else _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' fi ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' case $cc_basename in nagfor*) # NAG Fortran compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; esac ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) case $host_os in os2*) _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' ;; esac ;; hpux9* | hpux10* | hpux11*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # PIC (with -KPIC) is the default. _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in # old Intel for x86_64, which still supported -KPIC. ecc*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; # flang / f18. f95 an alias for gfortran or flang on Debian flang* | f18* | f95*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; # Lahey Fortran 8.1. lf95*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' ;; nagfor*) # NAG Fortran compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; tcc*) # Fabrice Bellard et al's Tiny C Compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; ccc*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # All Alpha code is PIC. _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; xl* | bgxl* | bgf* | mpixl*) # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' ;; *) case `$CC -V 2>&1 | $SED 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='' ;; *Sun\ F* | *Sun*Fortran*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; *Sun\ C*) # Sun C 5.9 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' ;; *Intel*\ [[CF]]*Compiler*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; *Portland\ Group*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; esac ;; esac ;; newsos6) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; osf3* | osf4* | osf5*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # All OSF/1 code is PIC. _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; rdos*) _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; solaris*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' case $cc_basename in f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; *) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; esac ;; sunos4*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; unicos*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; uts4*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; *) _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; esac fi ]) case $host_os in # For platforms that do not support PIC, -DPIC is meaningless: *djgpp*) _LT_TAGVAR(lt_prog_compiler_pic, $1)= ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" ;; esac AC_CACHE_CHECK([for $compiler option to produce PIC], [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) _LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) # # Check to make sure the PIC flag actually works. # if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in "" | " "*) ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; esac], [_LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) fi _LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], [Additional compiler flags for building library objects]) _LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], [How to pass a linker flag through the compiler]) # # Check to make sure the static flag actually works. # wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" _LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), $lt_tmp_static_flag, [], [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) _LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], [Compiler flag to prevent dynamic linking]) ])# _LT_COMPILER_PIC # _LT_LINKER_SHLIBS([TAGNAME]) # ---------------------------- # See if the linker supports building shared libraries. m4_defun([_LT_LINKER_SHLIBS], [AC_REQUIRE([LT_PATH_LD])dnl AC_REQUIRE([LT_PATH_NM])dnl m4_require([_LT_PATH_MANIFEST_TOOL])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl m4_require([_LT_TAG_COMPILER])dnl AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) m4_if([$1], [CXX], [ _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] case $host_os in aix[[4-9]]*) # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to GNU nm, but means don't demangle to AIX nm. # Without the "-l" option, or with the "-B" option, AIX nm treats # weak defined symbols like other global defined symbols, whereas # GNU nm marks them as "W". # While the 'weak' keyword is ignored in the Export File, we need # it in the Import File for the 'aix-soname' feature, so we have # to replace the "-B" option with "-P" for AIX nm. if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' else _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' fi ;; pw32*) _LT_TAGVAR(export_symbols_cmds, $1)=$ltdll_cmds ;; cygwin* | mingw* | cegcc*) case $cc_basename in cl* | icl*) _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' ;; *) _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] ;; esac ;; linux* | k*bsd*-gnu | gnu*) _LT_TAGVAR(link_all_deplibs, $1)=no ;; *) _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' ;; esac ], [ runpath_var= _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_cmds, $1)= _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(compiler_needs_object, $1)=no _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(old_archive_from_new_cmds, $1)= _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= _LT_TAGVAR(thread_safe_flag_spec, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list _LT_TAGVAR(include_expsyms, $1)= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ' (' and ')$', so one must not match beginning or # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', # as well as any symbol that contains 'd'. _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. # Exclude shared library initialization/finalization symbols. dnl Note also adjust exclude_expsyms for C++ above. extract_expsyms_cmds= case $host_os in cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. if test yes != "$GCC"; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) with_gnu_ld=yes ;; openbsd* | bitrig*) with_gnu_ld=no ;; linux* | k*bsd*-gnu | gnu*) _LT_TAGVAR(link_all_deplibs, $1)=no ;; esac _LT_TAGVAR(ld_shlibs, $1)=yes # On some targets, GNU ld is compatible enough with the native linker # that we're better off using the native interface for both. lt_use_gnu_ld_interface=no if test yes = "$with_gnu_ld"; then case $host_os in aix*) # The AIX port of GNU ld has always aspired to compatibility # with the native linker. However, as the warning in the GNU ld # block says, versions before 2.19.5* couldn't really create working # shared libraries, regardless of the interface used. case `$LD -v 2>&1` in *\ \(GNU\ Binutils\)\ 2.19.5*) ;; *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; *) lt_use_gnu_ld_interface=yes ;; esac ;; *) lt_use_gnu_ld_interface=yes ;; esac fi if test yes = "$lt_use_gnu_ld_interface"; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='$wl' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' else _LT_TAGVAR(whole_archive_flag_spec, $1)= fi supports_anon_versioning=no case `$LD -v | $SED -e 's/([[^)]]\+)\s\+//' 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix[[3-9]]*) # On AIX/PPC, the GNU linker is very broken if test ia64 != "$host_cpu"; then _LT_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: the GNU linker, at least up to release 2.19, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to install binutils *** 2.20 or above, or modify your PATH so that a non-GNU linker is found. *** You will then need to restart the configuration process. _LT_EOF fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='' ;; m68k) _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes ;; esac ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, # as there is no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file, use it as # is; otherwise, prepend EXPORTS... _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; haiku*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(link_all_deplibs, $1)=yes ;; os2*) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(allow_undefined_flag, $1)=unsupported shrext_cmds=.dll _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' ;; interix[[3-9]]*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) tmp_diet=no if test linux-dietlibc = "$host_os"; then case $cc_basename in diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) esac fi if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ && test no = "$tmp_diet" then tmp_addflag=' $pic_flag' tmp_sharedflag='-shared' case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group f77 and f90 compilers _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; lf95*) # Lahey Fortran 8.1 _LT_TAGVAR(whole_archive_flag_spec, $1)= tmp_sharedflag='--shared' ;; nagfor*) # NAGFOR 5.3 tmp_sharedflag='-Wl,-shared' ;; xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) tmp_sharedflag='-qmkshrobj' tmp_addflag= ;; nvcc*) # Cuda Compiler Driver 2.2 _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes ;; esac case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C 5.9 _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; esac _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' if test yes = "$supports_anon_versioning"; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' fi case $cc_basename in tcc*) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='-rdynamic' ;; xlf* | bgf* | bgxlf* | mpixlf*) # IBM XL Fortran 10.1 on PPC cannot create shared libs itself _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test yes = "$supports_anon_versioning"; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi ;; esac else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then _LT_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) _LT_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; sunos4*) _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac if test no = "$_LT_TAGVAR(ld_shlibs, $1)"; then runpath_var= _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. _LT_TAGVAR(hardcode_minus_L, $1)=yes if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. _LT_TAGVAR(hardcode_direct, $1)=unsupported fi ;; aix[[4-9]]*) if test ia64 = "$host_cpu"; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag= else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to GNU nm, but means don't demangle to AIX nm. # Without the "-l" option, or with the "-B" option, AIX nm treats # weak defined symbols like other global defined symbols, whereas # GNU nm marks them as "W". # While the 'weak' keyword is ignored in the Export File, we need # it in the Import File for the 'aix-soname' feature, so we have # to replace the "-B" option with "-P" for AIX nm. if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' else _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # have runtime linking enabled, and use it for executables. # For shared libraries, we enable/disable runtime linking # depending on the kind of the shared library created - # when "with_aix_soname,aix_use_runtimelinking" is: # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables # "aix,yes" lib.so shared, rtl:yes, for executables # lib.a static archive # "both,no" lib.so.V(shr.o) shared, rtl:yes # lib.a(lib.so.V) shared, rtl:no, for executables # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a(lib.so.V) shared, rtl:no # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a static archive case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) for ld_flag in $LDFLAGS; do if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then aix_use_runtimelinking=yes break fi done if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then # With aix-soname=svr4, we create the lib.so.V shared archives only, # so we don't have lib.a shared libs to link our executables. # We have to force runtime linking in this case. aix_use_runtimelinking=yes LDFLAGS="$LDFLAGS -Wl,-brtl" fi ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. _LT_TAGVAR(archive_cmds, $1)='' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(file_list_spec, $1)='$wl-f,' case $with_aix_soname,$aix_use_runtimelinking in aix,*) ;; # traditional, no import file svr4,* | *,yes) # use import file # The Import File defines what to hardcode. _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no ;; esac if test yes = "$GCC"; then case $host_os in aix4.[[012]]|aix4.[[012]].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`$CC -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 _LT_TAGVAR(hardcode_direct, $1)=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)= fi ;; esac shared_flag='-shared' if test yes = "$aix_use_runtimelinking"; then shared_flag="$shared_flag "'$wl-G' fi # Need to ensure runtime linking is disabled for the traditional # shared library, or the linker may eventually find shared libraries # /with/ Import File - we do not want to mix them. shared_flag_aix='-shared' shared_flag_svr4='-shared $wl-G' else # not using gcc if test ia64 = "$host_cpu"; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test yes = "$aix_use_runtimelinking"; then shared_flag='$wl-G' else shared_flag='$wl-bM:SRE' fi shared_flag_aix='$wl-bM:SRE' shared_flag_svr4='$wl-G' fi fi _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. _LT_TAGVAR(always_export_symbols, $1)=yes if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. _LT_TAGVAR(allow_undefined_flag, $1)='-berok' # Determine the default libpath from the value encoded in an # empty executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag else if test ia64 = "$host_cpu"; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' if test yes = "$with_gnu_ld"; then # We only use this code for GNU lds that support --whole-archive. _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' fi _LT_TAGVAR(archive_cmds_need_lc, $1)=yes _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' # -brtl affects multiple linker settings, -berok does not and is overridden later compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' if test svr4 != "$with_aix_soname"; then # This is similar to how AIX traditionally builds its shared libraries. _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' fi if test aix != "$with_aix_soname"; then _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' else # used by -dlpreopen to get the symbols _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' fi _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' fi fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='' ;; m68k) _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes ;; esac ;; bsdi[[45]]*) _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic ;; cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++ or Intel C++ Compiler. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in cl* | icl*) # Native MSVC or ICC _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then cp "$export_symbols" "$output_objdir/$soname.def"; echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; else $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' # Don't use ranlib _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile=$lt_outputfile.exe lt_tool_outputfile=$lt_tool_outputfile.exe ;; esac~ if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # Assume MSVC and ICC wrapper _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' # FIXME: Should let the user specify the lib program. _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes ;; esac ;; darwin* | rhapsody*) _LT_DARWIN_LINKER_FEATURES($1) ;; dgux*) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2.*) _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly* | midnightbsd*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; hpux9*) if test yes = "$GCC"; then _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' else _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(hardcode_direct, $1)=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' ;; hpux10*) if test yes,no = "$GCC,$with_gnu_ld"; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test no = "$with_gnu_ld"; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_TAGVAR(hardcode_minus_L, $1)=yes fi ;; hpux11*) if test yes,no = "$GCC,$with_gnu_ld"; then case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) m4_if($1, [], [ # Older versions of the 11.00 compiler do not understand -b yet # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) _LT_LINKER_OPTION([if $CC understands -b], _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) ;; esac fi if test no = "$with_gnu_ld"; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: case $host_cpu in hppa*64*|ia64*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_TAGVAR(hardcode_minus_L, $1)=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test yes = "$GCC"; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' # Try to use the -exported_symbol ld option, if it does not # work, assume that -exports_file does not work either and # implicitly export all symbols. # This should be the same for all languages, so no per-tag cache variable. AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], [lt_cv_irix_exported_symbol], [save_LDFLAGS=$LDFLAGS LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" AC_LINK_IFELSE( [AC_LANG_SOURCE( [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], [C++], [[int foo (void) { return 0; }]], [Fortran 77], [[ subroutine foo end]], [Fortran], [[ subroutine foo end]])])], [lt_cv_irix_exported_symbol=yes], [lt_cv_irix_exported_symbol=no]) LDFLAGS=$save_LDFLAGS]) if test yes = "$lt_cv_irix_exported_symbol"; then _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' fi _LT_TAGVAR(link_all_deplibs, $1)=no else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' fi _LT_TAGVAR(archive_cmds_need_lc, $1)='no' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(inherit_rpath, $1)=yes _LT_TAGVAR(link_all_deplibs, $1)=yes ;; linux*) case $cc_basename in tcc*) # Fabrice Bellard et al's Tiny C Compiler _LT_TAGVAR(ld_shlibs, $1)=yes _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' ;; esac ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; newsos6) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *nto* | *qnx*) ;; openbsd* | bitrig*) if test -f /usr/libexec/ld.so; then _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=yes if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' fi else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; os2*) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(allow_undefined_flag, $1)=unsupported shrext_cmds=.dll _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' ;; osf3*) if test yes = "$GCC"; then _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' else _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' fi _LT_TAGVAR(archive_cmds_need_lc, $1)='no' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test yes = "$GCC"; then _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' else _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' # Both c and cxx compiler support -rpath directly _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' fi _LT_TAGVAR(archive_cmds_need_lc, $1)='no' _LT_TAGVAR(hardcode_libdir_separator, $1)=: ;; solaris*) _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' if test yes = "$GCC"; then wlarc='$wl' _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' else case `$CC -V 2>&1` in *"Compilers 5.0"*) wlarc='' _LT_TAGVAR(archive_cmds, $1)='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' ;; *) wlarc='$wl' _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' ;; esac fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands '-z linker_flag'. GCC discards it without '$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test yes = "$GCC"; then _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' else _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' fi ;; esac _LT_TAGVAR(link_all_deplibs, $1)=yes ;; sunos4*) if test sequent = "$host_vendor"; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; sysv4) case $host_vendor in sni) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' _LT_TAGVAR(hardcode_direct, $1)=no ;; motorola) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; sysv4.3*) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes _LT_TAGVAR(ld_shlibs, $1)=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var='LD_RUN_PATH' if test yes = "$GCC"; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We CANNOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' runpath_var='LD_RUN_PATH' if test yes = "$GCC"; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_TAGVAR(ld_shlibs, $1)=no ;; esac if test sni = "$host_vendor"; then case $host in sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Blargedynsym' ;; esac fi fi ]) AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no _LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld _LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl _LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl _LT_DECL([], [extract_expsyms_cmds], [2], [The commands to extract the exported symbol list from a shared archive]) # # Do we need to explicitly link libc? # case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in x|xyes) # Assume -lc should be added _LT_TAGVAR(archive_cmds_need_lc, $1)=yes if test yes,yes = "$GCC,$enable_shared"; then case $_LT_TAGVAR(archive_cmds, $1) in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. AC_CACHE_CHECK([whether -lc should be explicitly linked in], [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), [$RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if AC_TRY_EVAL(ac_compile) 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) _LT_TAGVAR(allow_undefined_flag, $1)= if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) then lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no else lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes fi _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* ]) _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) ;; esac fi ;; esac _LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], [Whether or not to add -lc for building shared libraries]) _LT_TAGDECL([allow_libtool_libs_with_static_runtimes], [enable_shared_with_static_runtimes], [0], [Whether or not to disallow shared libs when runtime libs are static]) _LT_TAGDECL([], [export_dynamic_flag_spec], [1], [Compiler flag to allow reflexive dlopens]) _LT_TAGDECL([], [whole_archive_flag_spec], [1], [Compiler flag to generate shared objects directly from archives]) _LT_TAGDECL([], [compiler_needs_object], [1], [Whether the compiler copes with passing no objects directly]) _LT_TAGDECL([], [old_archive_from_new_cmds], [2], [Create an old-style archive from a shared archive]) _LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], [Create a temporary old-style archive to link instead of a shared archive]) _LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) _LT_TAGDECL([], [archive_expsym_cmds], [2]) _LT_TAGDECL([], [module_cmds], [2], [Commands used to build a loadable module if different from building a shared archive.]) _LT_TAGDECL([], [module_expsym_cmds], [2]) _LT_TAGDECL([], [with_gnu_ld], [1], [Whether we are building with GNU ld or not]) _LT_TAGDECL([], [allow_undefined_flag], [1], [Flag that allows shared libraries with undefined symbols to be built]) _LT_TAGDECL([], [no_undefined_flag], [1], [Flag that enforces no undefined symbols]) _LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], [Flag to hardcode $libdir into a binary during linking. This must work even if $libdir does not exist]) _LT_TAGDECL([], [hardcode_libdir_separator], [1], [Whether we need a single "-rpath" flag with a separated argument]) _LT_TAGDECL([], [hardcode_direct], [0], [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes DIR into the resulting binary]) _LT_TAGDECL([], [hardcode_direct_absolute], [0], [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes DIR into the resulting binary and the resulting library dependency is "absolute", i.e impossible to change by setting $shlibpath_var if the library is relocated]) _LT_TAGDECL([], [hardcode_minus_L], [0], [Set to "yes" if using the -LDIR flag during linking hardcodes DIR into the resulting binary]) _LT_TAGDECL([], [hardcode_shlibpath_var], [0], [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into the resulting binary]) _LT_TAGDECL([], [hardcode_automatic], [0], [Set to "yes" if building a shared library automatically hardcodes DIR into the library and all subsequent libraries and executables linked against it]) _LT_TAGDECL([], [inherit_rpath], [0], [Set to yes if linker adds runtime paths of dependent libraries to runtime path list]) _LT_TAGDECL([], [link_all_deplibs], [0], [Whether libtool must link a program against all its dependency libraries]) _LT_TAGDECL([], [always_export_symbols], [0], [Set to "yes" if exported symbols are required]) _LT_TAGDECL([], [export_symbols_cmds], [2], [The commands to list exported symbols]) _LT_TAGDECL([], [exclude_expsyms], [1], [Symbols that should not be listed in the preloaded symbols]) _LT_TAGDECL([], [include_expsyms], [1], [Symbols that must always be exported]) _LT_TAGDECL([], [prelink_cmds], [2], [Commands necessary for linking programs (against libraries) with templates]) _LT_TAGDECL([], [postlink_cmds], [2], [Commands necessary for finishing linking programs]) _LT_TAGDECL([], [file_list_spec], [1], [Specify filename containing input files]) dnl FIXME: Not yet implemented dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], dnl [Compiler flag to generate thread safe objects]) ])# _LT_LINKER_SHLIBS # _LT_LANG_C_CONFIG([TAG]) # ------------------------ # Ensure that the configuration variables for a C compiler are suitably # defined. These variables are subsequently used by _LT_CONFIG to write # the compiler configuration to 'libtool'. m4_defun([_LT_LANG_C_CONFIG], [m4_require([_LT_DECL_EGREP])dnl lt_save_CC=$CC AC_LANG_PUSH(C) # Source file extension for C test sources. ac_ext=c # Object file extension for compiled C test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(){return(0);}' _LT_TAG_COMPILER # Save the default compiler, since it gets overwritten when the other # tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. compiler_DEFAULT=$CC # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then _LT_COMPILER_NO_RTTI($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) LT_SYS_DLOPEN_SELF _LT_CMD_STRIPLIB # Report what library types will actually be built AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test no = "$can_build_shared" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test yes = "$enable_shared" && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[[4-9]]*) if test ia64 != "$host_cpu"; then case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in yes,aix,yes) ;; # shared object as lib.so file only yes,svr4,*) ;; # shared object as lib.so archive member only yes,*) enable_static=no ;; # shared object in lib.a archive as well esac fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test yes = "$enable_shared" || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_CONFIG($1) fi AC_LANG_POP CC=$lt_save_CC ])# _LT_LANG_C_CONFIG # _LT_LANG_CXX_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for a C++ compiler are suitably # defined. These variables are subsequently used by _LT_CONFIG to write # the compiler configuration to 'libtool'. m4_defun([_LT_LANG_CXX_CONFIG], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_PATH_MANIFEST_TOOL])dnl if test -n "$CXX" && ( test no != "$CXX" && ( (test g++ = "$CXX" && `g++ -v >/dev/null 2>&1` ) || (test g++ != "$CXX"))); then AC_PROG_CXXCPP else _lt_caught_CXX_error=yes fi AC_LANG_PUSH(C++) _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(compiler_needs_object, $1)=no _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds _LT_TAGVAR(no_undefined_flag, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for C++ test sources. ac_ext=cpp # Object file extension for compiled C++ test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # No sense in running all these tests if we already determined that # the CXX compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test yes != "$_lt_caught_CXX_error"; then # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_LD=$LD lt_save_GCC=$GCC GCC=$GXX lt_save_with_gnu_ld=$with_gnu_ld lt_save_path_LD=$lt_cv_path_LD if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx else $as_unset lt_cv_prog_gnu_ld fi if test -n "${lt_cv_path_LDCXX+set}"; then lt_cv_path_LD=$lt_cv_path_LDCXX else $as_unset lt_cv_path_LD fi test -z "${LDCXX+set}" || LD=$LDCXX CC=${CXX-"c++"} CFLAGS=$CXXFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) if test -n "$compiler"; then # We don't want -fno-exception when compiling C++ code, so set the # no_builtin_flag separately if test yes = "$GXX"; then _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' else _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= fi if test yes = "$GXX"; then # Set up default GNU C++ configuration LT_PATH_LD # Check if GNU C++ uses GNU ld as the underlying linker, since the # archiving commands below assume that GNU ld is being used. if test yes = "$with_gnu_ld"; then _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' # If archive_cmds runs LD, not CC, wlarc should be empty # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to # investigate it a little bit more. (MM) wlarc='$wl' # ancient GNU ld didn't support --whole-archive et. al. if eval "`$CC -print-prog-name=ld` --help 2>&1" | $GREP 'no-whole-archive' > /dev/null; then _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' else _LT_TAGVAR(whole_archive_flag_spec, $1)= fi else with_gnu_ld=no wlarc= # A generic and very simple default shared library creation # command for GNU C++ for the case where it uses the native # linker, instead of GNU ld. If possible, this setting should # overridden to take advantage of the native linker features on # the platform it is being used on. _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' fi # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else GXX=no with_gnu_ld=no wlarc= fi # PORTME: fill in a description of your system's C++ link characteristics AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) _LT_TAGVAR(ld_shlibs, $1)=yes case $host_os in aix3*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; aix[[4-9]]*) if test ia64 = "$host_cpu"; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag= else aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # have runtime linking enabled, and use it for executables. # For shared libraries, we enable/disable runtime linking # depending on the kind of the shared library created - # when "with_aix_soname,aix_use_runtimelinking" is: # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables # "aix,yes" lib.so shared, rtl:yes, for executables # lib.a static archive # "both,no" lib.so.V(shr.o) shared, rtl:yes # lib.a(lib.so.V) shared, rtl:no, for executables # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a(lib.so.V) shared, rtl:no # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables # lib.a static archive case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) for ld_flag in $LDFLAGS; do case $ld_flag in *-brtl*) aix_use_runtimelinking=yes break ;; esac done if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then # With aix-soname=svr4, we create the lib.so.V shared archives only, # so we don't have lib.a shared libs to link our executables. # We have to force runtime linking in this case. aix_use_runtimelinking=yes LDFLAGS="$LDFLAGS -Wl,-brtl" fi ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. _LT_TAGVAR(archive_cmds, $1)='' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(file_list_spec, $1)='$wl-f,' case $with_aix_soname,$aix_use_runtimelinking in aix,*) ;; # no import file svr4,* | *,yes) # use import file # The Import File defines what to hardcode. _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no ;; esac if test yes = "$GXX"; then case $host_os in aix4.[[012]]|aix4.[[012]].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`$CC -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 _LT_TAGVAR(hardcode_direct, $1)=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)= fi esac shared_flag='-shared' if test yes = "$aix_use_runtimelinking"; then shared_flag=$shared_flag' $wl-G' fi # Need to ensure runtime linking is disabled for the traditional # shared library, or the linker may eventually find shared libraries # /with/ Import File - we do not want to mix them. shared_flag_aix='-shared' shared_flag_svr4='-shared $wl-G' else # not using gcc if test ia64 = "$host_cpu"; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test yes = "$aix_use_runtimelinking"; then shared_flag='$wl-G' else shared_flag='$wl-bM:SRE' fi shared_flag_aix='$wl-bM:SRE' shared_flag_svr4='$wl-G' fi fi _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to # export. _LT_TAGVAR(always_export_symbols, $1)=yes if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. # The "-G" linker flag allows undefined symbols. _LT_TAGVAR(no_undefined_flag, $1)='-bernotok' # Determine the default libpath from the value encoded in an empty # executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag else if test ia64 = "$host_cpu"; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' if test yes = "$with_gnu_ld"; then # We only use this code for GNU lds that support --whole-archive. _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' fi _LT_TAGVAR(archive_cmds_need_lc, $1)=yes _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' # -brtl affects multiple linker settings, -berok does not and is overridden later compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' if test svr4 != "$with_aix_soname"; then # This is similar to how AIX traditionally builds its shared # libraries. Need -bnortl late, we may have -brtl in LDFLAGS. _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' fi if test aix != "$with_aix_soname"; then _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' else # used by -dlpreopen to get the symbols _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' fi _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' fi fi ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; chorus*) case $cc_basename in *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; cygwin* | mingw* | pw32* | cegcc*) case $GXX,$cc_basename in ,cl* | no,cl* | ,icl* | no,icl*) # Native MSVC or ICC # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=.dll # FIXME: Setting linknames here is a bad hack. _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then cp "$export_symbols" "$output_objdir/$soname.def"; echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; else $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes # Don't use ranlib _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile=$lt_outputfile.exe lt_tool_outputfile=$lt_tool_outputfile.exe ;; esac~ func_to_tool_file "$lt_outputfile"~ if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # g++ # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, # as there is no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file, use it as # is; otherwise, prepend EXPORTS... _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; darwin* | rhapsody*) _LT_DARWIN_LINKER_FEATURES($1) ;; os2*) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(allow_undefined_flag, $1)=unsupported shrext_cmds=.dll _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ $ECHO EXPORTS >> $output_objdir/$libname.def~ prefix_cmds="$SED"~ if test EXPORTS = "`$SED 1q $export_symbols`"; then prefix_cmds="$prefix_cmds -e 1d"; fi~ prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ emximp -o $lib $output_objdir/$libname.def' _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' ;; dgux*) case $cc_basename in ec++*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; ghcx*) # Green Hills C++ Compiler # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; freebsd2.*) # C++ shared libraries reported to be fairly broken before # switch to ELF _LT_TAGVAR(ld_shlibs, $1)=no ;; freebsd-elf*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; freebsd* | dragonfly* | midnightbsd*) # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF # conventions _LT_TAGVAR(ld_shlibs, $1)=yes ;; haiku*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(link_all_deplibs, $1)=yes ;; hpux9*) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, # but as the default # location of the library. case $cc_basename in CC*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; aCC*) _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes = "$GXX"; then _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' else # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; hpux10*|hpux11*) if test no = "$with_gnu_ld"; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: case $host_cpu in hppa*64*|ia64*) ;; *) _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' ;; esac fi case $host_cpu in hppa*64*|ia64*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, # but as the default # location of the library. ;; esac case $cc_basename in CC*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; aCC*) case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes = "$GXX"; then if test no = "$with_gnu_ld"; then case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac fi else # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; interix[[3-9]]*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; irix5* | irix6*) case $cc_basename in CC*) # SGI C++ _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' # Archives containing C++ object files must be created using # "CC -ar", where "CC" is the IRIX C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' ;; *) if test yes = "$GXX"; then if test no = "$with_gnu_ld"; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` -o $lib' fi fi _LT_TAGVAR(link_all_deplibs, $1)=yes ;; esac _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(inherit_rpath, $1)=yes ;; linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib $wl-retain-symbols-file,$export_symbols; mv \$templib $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' # Archives containing C++ object files must be created using # "CC -Bstatic", where "CC" is the KAI C++ compiler. _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; icpc* | ecpc* ) # Intel C++ with_gnu_ld=yes # version 8.0 and above of icpc choke on multiply defined symbols # if we add $predep_objects and $postdep_objects, however 7.1 and # earlier do not add the objects themselves. case `$CC -V 2>&1` in *"Version 7."*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' ;; *) # Version 8.0 or newer tmp_idyn= case $host_cpu in ia64*) tmp_idyn=' -i_dynamic';; esac _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' ;; esac _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' ;; pgCC* | pgcpp*) # Portland Group C++ compiler case `$CC -V` in *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ $RANLIB $oldlib' _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' ;; *) # Version 6 and above use weak symbols _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' ;; esac _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl--rpath $wl$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' ;; cxx*) # Compaq C++ _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib $wl-retain-symbols-file $wl$export_symbols' runpath_var=LD_RUN_PATH _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' ;; xl* | mpixl* | bgxl*) # IBM XL 8.0 on PPC, with GNU ld _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' if test yes = "$supports_anon_versioning"; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' fi ;; *) case `$CC -V 2>&1 | $SED 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file $wl$export_symbols' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes # Not sure whether something based on # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 # would be better. output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' ;; esac ;; esac ;; lynxos*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; m88k*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; mvs*) case $cc_basename in cxx*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' wlarc= _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no fi # Workaround some broken pre-1.5 toolchains output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' ;; *nto* | *qnx*) _LT_TAGVAR(ld_shlibs, $1)=yes ;; openbsd* | bitrig*) if test -f /usr/libexec/ld.so; then _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`"; then _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file,$export_symbols -o $lib' _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' fi output_verbose_link_cmd=func_echo_all else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Archives containing C++ object files must be created using # the KAI C++ compiler. case $host in osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; esac ;; RCC*) # Rational C++ 2.4.1 # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; cxx*) case $host in osf3*) _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $soname `test -n "$verstring" && func_echo_all "$wl-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' ;; *) _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ echo "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname $wl-input $wl$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~ $RM $lib.exp' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' ;; esac _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test yes,no = "$GXX,$with_gnu_ld"; then _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' case $host in osf3*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' ;; esac _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; psos*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; lcc*) # Lucid # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ _LT_TAGVAR(archive_cmds_need_lc,$1)=yes _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G$allow_undefined_flag $wl-M $wl$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands '-z linker_flag'. # Supported since Solaris 2.6 (maybe 2.5.1?) _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' ;; esac _LT_TAGVAR(link_all_deplibs, $1)=yes output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' ;; gcx*) # Green Hills C++ Compiler _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' # The C++ compiler must be used to create the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' ;; *) # GNU C++ compiler with Solaris linker if test yes,no = "$GXX,$with_gnu_ld"; then _LT_TAGVAR(no_undefined_flag, $1)=' $wl-z ${wl}defs' if $CC --version | $GREP -v '^2\.7' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' else # g++ 2.7 appears to require '-G' NOT '-shared' on this # platform. _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $wl$libdir' case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' ;; esac fi ;; esac ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var='LD_RUN_PATH' case $cc_basename in CC*) _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We CANNOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' runpath_var='LD_RUN_PATH' case $cc_basename in CC*) _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ '"$_LT_TAGVAR(old_archive_cmds, $1)" _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ '"$_LT_TAGVAR(reload_cmds, $1)" ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; vxworks*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no _LT_TAGVAR(GCC, $1)=$GXX _LT_TAGVAR(LD, $1)=$LD ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... _LT_SYS_HIDDEN_LIBDEPS($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi # test -n "$compiler" CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS LDCXX=$LD LD=$lt_save_LD GCC=$lt_save_GCC with_gnu_ld=$lt_save_with_gnu_ld lt_cv_path_LDCXX=$lt_cv_path_LD lt_cv_path_LD=$lt_save_path_LD lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld fi # test yes != "$_lt_caught_CXX_error" AC_LANG_POP ])# _LT_LANG_CXX_CONFIG # _LT_FUNC_STRIPNAME_CNF # ---------------------- # func_stripname_cnf prefix suffix name # strip PREFIX and SUFFIX off of NAME. # PREFIX and SUFFIX must not contain globbing or regex special # characters, hashes, percent signs, but SUFFIX may contain a leading # dot (in which case that matches only a dot). # # This function is identical to the (non-XSI) version of func_stripname, # except this one can be used by m4 code that may be executed by configure, # rather than the libtool script. m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl AC_REQUIRE([_LT_DECL_SED]) AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) func_stripname_cnf () { case @S|@2 in .*) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%\\\\@S|@2\$%%"`;; *) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%@S|@2\$%%"`;; esac } # func_stripname_cnf ])# _LT_FUNC_STRIPNAME_CNF # _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) # --------------------------------- # Figure out "hidden" library dependencies from verbose # compiler output when linking a shared library. # Parse the compiler output and extract the necessary # objects, libraries and library flags. m4_defun([_LT_SYS_HIDDEN_LIBDEPS], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl # Dependencies to place before and after the object being linked: _LT_TAGVAR(predep_objects, $1)= _LT_TAGVAR(postdep_objects, $1)= _LT_TAGVAR(predeps, $1)= _LT_TAGVAR(postdeps, $1)= _LT_TAGVAR(compiler_lib_search_path, $1)= dnl we can't use the lt_simple_compile_test_code here, dnl because it contains code intended for an executable, dnl not a library. It's possible we should let each dnl tag define a new lt_????_link_test_code variable, dnl but it's only used here... m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF int a; void foo (void) { a = 0; } _LT_EOF ], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF class Foo { public: Foo (void) { a = 0; } private: int a; }; _LT_EOF ], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF subroutine foo implicit none integer*4 a a=0 return end _LT_EOF ], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF subroutine foo implicit none integer a a=0 return end _LT_EOF ], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF public class foo { private int a; public void bar (void) { a = 0; } }; _LT_EOF ], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF package foo func foo() { } _LT_EOF ]) _lt_libdeps_save_CFLAGS=$CFLAGS case "$CC $CFLAGS " in #( *\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; *\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; *\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; esac dnl Parse the compiler output and extract the necessary dnl objects, libraries and library flags. if AC_TRY_EVAL(ac_compile); then # Parse the compiler output and extract the necessary # objects, libraries and library flags. # Sentinel used to keep track of whether or not we are before # the conftest object file. pre_test_object_deps_done=no for p in `eval "$output_verbose_link_cmd"`; do case $prev$p in -L* | -R* | -l*) # Some compilers place space between "-{L,R}" and the path. # Remove the space. if test x-L = "$p" || test x-R = "$p"; then prev=$p continue fi # Expand the sysroot to ease extracting the directories later. if test -z "$prev"; then case $p in -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; esac fi case $p in =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; esac if test no = "$pre_test_object_deps_done"; then case $prev in -L | -R) # Internal compiler library paths should come after those # provided the user. The postdeps already come after the # user supplied libs so there is no need to process them. if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then _LT_TAGVAR(compiler_lib_search_path, $1)=$prev$p else _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} $prev$p" fi ;; # The "-l" case would never come before the object being # linked, so don't bother handling this case. esac else if test -z "$_LT_TAGVAR(postdeps, $1)"; then _LT_TAGVAR(postdeps, $1)=$prev$p else _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} $prev$p" fi fi prev= ;; *.lto.$objext) ;; # Ignore GCC LTO objects *.$objext) # This assumes that the test object file only shows up # once in the compiler output. if test "$p" = "conftest.$objext"; then pre_test_object_deps_done=yes continue fi if test no = "$pre_test_object_deps_done"; then if test -z "$_LT_TAGVAR(predep_objects, $1)"; then _LT_TAGVAR(predep_objects, $1)=$p else _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" fi else if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then _LT_TAGVAR(postdep_objects, $1)=$p else _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" fi fi ;; *) ;; # Ignore the rest. esac done # Clean up. rm -f a.out a.exe else echo "libtool.m4: error: problem compiling $1 test program" fi $RM -f confest.$objext CFLAGS=$_lt_libdeps_save_CFLAGS # PORTME: override above test on systems where it is broken m4_if([$1], [CXX], [case $host_os in interix[[3-9]]*) # Interix 3.5 installs completely hosed .la files for C++, so rather than # hack all around it, let's just trust "g++" to DTRT. _LT_TAGVAR(predep_objects,$1)= _LT_TAGVAR(postdep_objects,$1)= _LT_TAGVAR(postdeps,$1)= ;; esac ]) case " $_LT_TAGVAR(postdeps, $1) " in *" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; esac _LT_TAGVAR(compiler_lib_search_dirs, $1)= if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | $SED -e 's! -L! !g' -e 's!^ !!'` fi _LT_TAGDECL([], [compiler_lib_search_dirs], [1], [The directories searched by this compiler when creating a shared library]) _LT_TAGDECL([], [predep_objects], [1], [Dependencies to place before and after the objects being linked to create a shared library]) _LT_TAGDECL([], [postdep_objects], [1]) _LT_TAGDECL([], [predeps], [1]) _LT_TAGDECL([], [postdeps], [1]) _LT_TAGDECL([], [compiler_lib_search_path], [1], [The library search path used internally by the compiler when linking a shared library]) ])# _LT_SYS_HIDDEN_LIBDEPS # _LT_LANG_F77_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for a Fortran 77 compiler are # suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to 'libtool'. m4_defun([_LT_LANG_F77_CONFIG], [AC_LANG_PUSH(Fortran 77) if test -z "$F77" || test no = "$F77"; then _lt_disable_F77=yes fi _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds _LT_TAGVAR(no_undefined_flag, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for f77 test sources. ac_ext=f # Object file extension for compiled f77 test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # No sense in running all these tests if we already determined that # the F77 compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test yes != "$_lt_disable_F77"; then # Code to be used in simple compile tests lt_simple_compile_test_code="\ subroutine t return end " # Code to be used in simple link tests lt_simple_link_test_code="\ program t end " # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_GCC=$GCC lt_save_CFLAGS=$CFLAGS CC=${F77-"f77"} CFLAGS=$FFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) GCC=$G77 if test -n "$compiler"; then AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test no = "$can_build_shared" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test yes = "$enable_shared" && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[[4-9]]*) if test ia64 != "$host_cpu"; then case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in yes,aix,yes) ;; # shared object as lib.so file only yes,svr4,*) ;; # shared object as lib.so archive member only yes,*) enable_static=no ;; # shared object in lib.a archive as well esac fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test yes = "$enable_shared" || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_TAGVAR(GCC, $1)=$G77 _LT_TAGVAR(LD, $1)=$LD ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi # test -n "$compiler" GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS fi # test yes != "$_lt_disable_F77" AC_LANG_POP ])# _LT_LANG_F77_CONFIG # _LT_LANG_FC_CONFIG([TAG]) # ------------------------- # Ensure that the configuration variables for a Fortran compiler are # suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to 'libtool'. m4_defun([_LT_LANG_FC_CONFIG], [AC_LANG_PUSH(Fortran) if test -z "$FC" || test no = "$FC"; then _lt_disable_FC=yes fi _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds _LT_TAGVAR(no_undefined_flag, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for fc test sources. ac_ext=${ac_fc_srcext-f} # Object file extension for compiled fc test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # No sense in running all these tests if we already determined that # the FC compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test yes != "$_lt_disable_FC"; then # Code to be used in simple compile tests lt_simple_compile_test_code="\ subroutine t return end " # Code to be used in simple link tests lt_simple_link_test_code="\ program t end " # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_GCC=$GCC lt_save_CFLAGS=$CFLAGS CC=${FC-"f95"} CFLAGS=$FCFLAGS compiler=$CC GCC=$ac_cv_fc_compiler_gnu _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) if test -n "$compiler"; then AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test no = "$can_build_shared" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test yes = "$enable_shared" && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[[4-9]]*) if test ia64 != "$host_cpu"; then case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in yes,aix,yes) ;; # shared object as lib.so file only yes,svr4,*) ;; # shared object as lib.so archive member only yes,*) enable_static=no ;; # shared object in lib.a archive as well esac fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test yes = "$enable_shared" || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_TAGVAR(GCC, $1)=$ac_cv_fc_compiler_gnu _LT_TAGVAR(LD, $1)=$LD ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... _LT_SYS_HIDDEN_LIBDEPS($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi # test -n "$compiler" GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS fi # test yes != "$_lt_disable_FC" AC_LANG_POP ])# _LT_LANG_FC_CONFIG # _LT_LANG_GCJ_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for the GNU Java Compiler compiler # are suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to 'libtool'. m4_defun([_LT_LANG_GCJ_CONFIG], [AC_REQUIRE([LT_PROG_GCJ])dnl AC_LANG_SAVE # Source file extension for Java test sources. ac_ext=java # Object file extension for compiled Java test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="class foo {}" # Code to be used in simple link tests lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_GCC=$GCC GCC=yes CC=${GCJ-"gcj"} CFLAGS=$GCJFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_TAGVAR(LD, $1)=$LD _LT_CC_BASENAME([$compiler]) # GCJ did not exist at the time GCC didn't implicitly link libc in. _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then _LT_COMPILER_NO_RTTI($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi AC_LANG_RESTORE GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS ])# _LT_LANG_GCJ_CONFIG # _LT_LANG_GO_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for the GNU Go compiler # are suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to 'libtool'. m4_defun([_LT_LANG_GO_CONFIG], [AC_REQUIRE([LT_PROG_GO])dnl AC_LANG_SAVE # Source file extension for Go test sources. ac_ext=go # Object file extension for compiled Go test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="package main; func main() { }" # Code to be used in simple link tests lt_simple_link_test_code='package main; func main() { }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_GCC=$GCC GCC=yes CC=${GOC-"gccgo"} CFLAGS=$GOFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_TAGVAR(LD, $1)=$LD _LT_CC_BASENAME([$compiler]) # Go did not exist at the time GCC didn't implicitly link libc in. _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then _LT_COMPILER_NO_RTTI($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi AC_LANG_RESTORE GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS ])# _LT_LANG_GO_CONFIG # _LT_LANG_RC_CONFIG([TAG]) # ------------------------- # Ensure that the configuration variables for the Windows resource compiler # are suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to 'libtool'. m4_defun([_LT_LANG_RC_CONFIG], [AC_REQUIRE([LT_PROG_RC])dnl AC_LANG_SAVE # Source file extension for RC test sources. ac_ext=rc # Object file extension for compiled RC test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' # Code to be used in simple link tests lt_simple_link_test_code=$lt_simple_compile_test_code # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_GCC=$GCC GCC= CC=${RC-"windres"} CFLAGS= compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes if test -n "$compiler"; then : _LT_CONFIG($1) fi GCC=$lt_save_GCC AC_LANG_RESTORE CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS ])# _LT_LANG_RC_CONFIG # LT_PROG_GCJ # ----------- AC_DEFUN([LT_PROG_GCJ], [m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], [AC_CHECK_TOOL(GCJ, gcj,) test set = "${GCJFLAGS+set}" || GCJFLAGS="-g -O2" AC_SUBST(GCJFLAGS)])])[]dnl ]) # Old name: AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_GCJ], []) # LT_PROG_GO # ---------- AC_DEFUN([LT_PROG_GO], [AC_CHECK_TOOL(GOC, gccgo,) ]) # LT_PROG_RC # ---------- AC_DEFUN([LT_PROG_RC], [AC_CHECK_TOOL(RC, windres,) ]) # Old name: AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_RC], []) # _LT_DECL_EGREP # -------------- # If we don't have a new enough Autoconf to choose the best grep # available, choose the one first in the user's PATH. m4_defun([_LT_DECL_EGREP], [AC_REQUIRE([AC_PROG_EGREP])dnl AC_REQUIRE([AC_PROG_FGREP])dnl test -z "$GREP" && GREP=grep _LT_DECL([], [GREP], [1], [A grep program that handles long lines]) _LT_DECL([], [EGREP], [1], [An ERE matcher]) _LT_DECL([], [FGREP], [1], [A literal string matcher]) dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too AC_SUBST([GREP]) ]) # _LT_DECL_OBJDUMP # -------------- # If we don't have a new enough Autoconf to choose the best objdump # available, choose the one first in the user's PATH. m4_defun([_LT_DECL_OBJDUMP], [AC_CHECK_TOOL(OBJDUMP, objdump, false) test -z "$OBJDUMP" && OBJDUMP=objdump _LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) AC_SUBST([OBJDUMP]) ]) # _LT_DECL_DLLTOOL # ---------------- # Ensure DLLTOOL variable is set. m4_defun([_LT_DECL_DLLTOOL], [AC_CHECK_TOOL(DLLTOOL, dlltool, false) test -z "$DLLTOOL" && DLLTOOL=dlltool _LT_DECL([], [DLLTOOL], [1], [DLL creation program]) AC_SUBST([DLLTOOL]) ]) # _LT_DECL_FILECMD # ---------------- # Check for a file(cmd) program that can be used to detect file type and magic m4_defun([_LT_DECL_FILECMD], [AC_CHECK_TOOL([FILECMD], [file], [:]) _LT_DECL([], [FILECMD], [1], [A file(cmd) program that detects file types]) ])# _LD_DECL_FILECMD # _LT_DECL_SED # ------------ # Check for a fully-functional sed program, that truncates # as few characters as possible. Prefer GNU sed if found. m4_defun([_LT_DECL_SED], [AC_PROG_SED test -z "$SED" && SED=sed Xsed="$SED -e 1s/^X//" _LT_DECL([], [SED], [1], [A sed program that does not truncate output]) _LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], [Sed that helps us avoid accidentally triggering echo(1) options like -n]) ])# _LT_DECL_SED m4_ifndef([AC_PROG_SED], [ ############################################################ # NOTE: This macro has been submitted for inclusion into # # GNU Autoconf as AC_PROG_SED. When it is available in # # a released version of Autoconf we should remove this # # macro and use it instead. # ############################################################ m4_defun([AC_PROG_SED], [AC_MSG_CHECKING([for a sed that does not truncate output]) AC_CACHE_VAL(lt_cv_path_SED, [# Loop through the user's path and test for sed and gsed. # Then use that list of sed's as ones to test for truncation. as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for lt_ac_prog in sed gsed; do for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" fi done done done IFS=$as_save_IFS lt_ac_max=0 lt_ac_count=0 # Add /usr/xpg4/bin/sed as it is typically found on Solaris # along with /bin/sed that truncates output. for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do test ! -f "$lt_ac_sed" && continue cat /dev/null > conftest.in lt_ac_count=0 echo $ECHO_N "0123456789$ECHO_C" >conftest.in # Check for GNU sed and select it if it is found. if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then lt_cv_path_SED=$lt_ac_sed break fi while true; do cat conftest.in conftest.in >conftest.tmp mv conftest.tmp conftest.in cp conftest.in conftest.nl echo >>conftest.nl $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break cmp -s conftest.out conftest.nl || break # 10000 chars as input seems more than enough test 10 -lt "$lt_ac_count" && break lt_ac_count=`expr $lt_ac_count + 1` if test "$lt_ac_count" -gt "$lt_ac_max"; then lt_ac_max=$lt_ac_count lt_cv_path_SED=$lt_ac_sed fi done done ]) SED=$lt_cv_path_SED AC_SUBST([SED]) AC_MSG_RESULT([$SED]) ])#AC_PROG_SED ])#m4_ifndef # Old name: AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_SED], []) # _LT_CHECK_SHELL_FEATURES # ------------------------ # Find out whether the shell is Bourne or XSI compatible, # or has some other useful features. m4_defun([_LT_CHECK_SHELL_FEATURES], [if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then lt_unset=unset else lt_unset=false fi _LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl # test EBCDIC or ASCII case `echo X|tr X '\101'` in A) # ASCII based system # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr lt_SP2NL='tr \040 \012' lt_NL2SP='tr \015\012 \040\040' ;; *) # EBCDIC based system lt_SP2NL='tr \100 \n' lt_NL2SP='tr \r\n \100\100' ;; esac _LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl _LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl ])# _LT_CHECK_SHELL_FEATURES # _LT_PATH_CONVERSION_FUNCTIONS # ----------------------------- # Determine what file name conversion functions should be used by # func_to_host_file (and, implicitly, by func_to_host_path). These are needed # for certain cross-compile configurations and native mingw. m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], [AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl AC_MSG_CHECKING([how to convert $build file names to $host format]) AC_CACHE_VAL(lt_cv_to_host_file_cmd, [case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 ;; esac ;; *-*-cygwin* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_noop ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin ;; esac ;; * ) # unhandled hosts (and "normal" native builds) lt_cv_to_host_file_cmd=func_convert_file_noop ;; esac ]) to_host_file_cmd=$lt_cv_to_host_file_cmd AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) _LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], [0], [convert $build file names to $host format])dnl AC_MSG_CHECKING([how to convert $build file names to toolchain format]) AC_CACHE_VAL(lt_cv_to_tool_file_cmd, [#assume ordinary cross tools, or native build. lt_cv_to_tool_file_cmd=func_convert_file_noop case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 ;; esac ;; esac ]) to_tool_file_cmd=$lt_cv_to_tool_file_cmd AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) _LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], [0], [convert $build files to toolchain format])dnl ])# _LT_PATH_CONVERSION_FUNCTIONS starpu-1.4.9+dfsg/m4/ltoptions.m4000066400000000000000000000342751507764646700166400ustar00rootroot00000000000000# Helper functions for option handling. -*- Autoconf -*- # # Copyright (C) 2004-2005, 2007-2009, 2011-2019, 2021-2022 Free # Software Foundation, Inc. # Written by Gary V. Vaughan, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 8 ltoptions.m4 # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) # _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) # ------------------------------------------ m4_define([_LT_MANGLE_OPTION], [[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) # _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) # --------------------------------------- # Set option OPTION-NAME for macro MACRO-NAME, and if there is a # matching handler defined, dispatch to it. Other OPTION-NAMEs are # saved as a flag. m4_define([_LT_SET_OPTION], [m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), _LT_MANGLE_DEFUN([$1], [$2]), [m4_warning([Unknown $1 option '$2'])])[]dnl ]) # _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) # ------------------------------------------------------------ # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. m4_define([_LT_IF_OPTION], [m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) # _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) # ------------------------------------------------------- # Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME # are set. m4_define([_LT_UNLESS_OPTIONS], [m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), [m4_define([$0_found])])])[]dnl m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 ])[]dnl ]) # _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) # ---------------------------------------- # OPTION-LIST is a space-separated list of Libtool options associated # with MACRO-NAME. If any OPTION has a matching handler declared with # LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about # the unknown option and exit. m4_defun([_LT_SET_OPTIONS], [# Set options m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), [_LT_SET_OPTION([$1], _LT_Option)]) m4_if([$1],[LT_INIT],[ dnl dnl Simply set some default values (i.e off) if boolean options were not dnl specified: _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no ]) _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no ]) dnl dnl If no reference was made to various pairs of opposing options, then dnl we run the default mode handler for the pair. For example, if neither dnl 'shared' nor 'disable-shared' was passed, we enable building of shared dnl archives by default: _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], [_LT_ENABLE_FAST_INSTALL]) _LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4], [_LT_WITH_AIX_SONAME([aix])]) ]) ])# _LT_SET_OPTIONS ## --------------------------------- ## ## Macros to handle LT_INIT options. ## ## --------------------------------- ## # _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) # ----------------------------------------- m4_define([_LT_MANGLE_DEFUN], [[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) # LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) # ----------------------------------------------- m4_define([LT_OPTION_DEFINE], [m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl ])# LT_OPTION_DEFINE # dlopen # ------ LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes ]) AU_DEFUN([AC_LIBTOOL_DLOPEN], [_LT_SET_OPTION([LT_INIT], [dlopen]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the 'dlopen' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) # win32-dll # --------- # Declare package support for building win32 dll's. LT_OPTION_DEFINE([LT_INIT], [win32-dll], [enable_win32_dll=yes case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) AC_CHECK_TOOL(AS, as, false) AC_CHECK_TOOL(DLLTOOL, dlltool, false) AC_CHECK_TOOL(OBJDUMP, objdump, false) ;; esac test -z "$AS" && AS=as _LT_DECL([], [AS], [1], [Assembler program])dnl test -z "$DLLTOOL" && DLLTOOL=dlltool _LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl test -z "$OBJDUMP" && OBJDUMP=objdump _LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl ])# win32-dll AU_DEFUN([AC_LIBTOOL_WIN32_DLL], [AC_REQUIRE([AC_CANONICAL_HOST])dnl _LT_SET_OPTION([LT_INIT], [win32-dll]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the 'win32-dll' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) # _LT_ENABLE_SHARED([DEFAULT]) # ---------------------------- # implement the --enable-shared flag, and supports the 'shared' and # 'disable-shared' LT_INIT options. # DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. m4_define([_LT_ENABLE_SHARED], [m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl AC_ARG_ENABLE([shared], [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_shared=yes ;; no) enable_shared=no ;; *) enable_shared=no # Look at the argument we got. We use all the common list separators. lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, for pkg in $enableval; do IFS=$lt_save_ifs if test "X$pkg" = "X$p"; then enable_shared=yes fi done IFS=$lt_save_ifs ;; esac], [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) _LT_DECL([build_libtool_libs], [enable_shared], [0], [Whether or not to build shared libraries]) ])# _LT_ENABLE_SHARED LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) # Old names: AC_DEFUN([AC_ENABLE_SHARED], [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) ]) AC_DEFUN([AC_DISABLE_SHARED], [_LT_SET_OPTION([LT_INIT], [disable-shared]) ]) AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AM_ENABLE_SHARED], []) dnl AC_DEFUN([AM_DISABLE_SHARED], []) # _LT_ENABLE_STATIC([DEFAULT]) # ---------------------------- # implement the --enable-static flag, and support the 'static' and # 'disable-static' LT_INIT options. # DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. m4_define([_LT_ENABLE_STATIC], [m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl AC_ARG_ENABLE([static], [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_static=yes ;; no) enable_static=no ;; *) enable_static=no # Look at the argument we got. We use all the common list separators. lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, for pkg in $enableval; do IFS=$lt_save_ifs if test "X$pkg" = "X$p"; then enable_static=yes fi done IFS=$lt_save_ifs ;; esac], [enable_static=]_LT_ENABLE_STATIC_DEFAULT) _LT_DECL([build_old_libs], [enable_static], [0], [Whether or not to build static libraries]) ])# _LT_ENABLE_STATIC LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) # Old names: AC_DEFUN([AC_ENABLE_STATIC], [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) ]) AC_DEFUN([AC_DISABLE_STATIC], [_LT_SET_OPTION([LT_INIT], [disable-static]) ]) AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AM_ENABLE_STATIC], []) dnl AC_DEFUN([AM_DISABLE_STATIC], []) # _LT_ENABLE_FAST_INSTALL([DEFAULT]) # ---------------------------------- # implement the --enable-fast-install flag, and support the 'fast-install' # and 'disable-fast-install' LT_INIT options. # DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. m4_define([_LT_ENABLE_FAST_INSTALL], [m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl AC_ARG_ENABLE([fast-install], [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_fast_install=yes ;; no) enable_fast_install=no ;; *) enable_fast_install=no # Look at the argument we got. We use all the common list separators. lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, for pkg in $enableval; do IFS=$lt_save_ifs if test "X$pkg" = "X$p"; then enable_fast_install=yes fi done IFS=$lt_save_ifs ;; esac], [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) _LT_DECL([fast_install], [enable_fast_install], [0], [Whether or not to optimize for fast installation])dnl ])# _LT_ENABLE_FAST_INSTALL LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) # Old names: AU_DEFUN([AC_ENABLE_FAST_INSTALL], [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the 'fast-install' option into LT_INIT's first parameter.]) ]) AU_DEFUN([AC_DISABLE_FAST_INSTALL], [_LT_SET_OPTION([LT_INIT], [disable-fast-install]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the 'disable-fast-install' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) # _LT_WITH_AIX_SONAME([DEFAULT]) # ---------------------------------- # implement the --with-aix-soname flag, and support the `aix-soname=aix' # and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT # is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'. m4_define([_LT_WITH_AIX_SONAME], [m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl shared_archive_member_spec= case $host,$enable_shared in power*-*-aix[[5-9]]*,yes) AC_MSG_CHECKING([which variant of shared library versioning to provide]) AC_ARG_WITH([aix-soname], [AS_HELP_STRING([--with-aix-soname=aix|svr4|both], [shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])], [case $withval in aix|svr4|both) ;; *) AC_MSG_ERROR([Unknown argument to --with-aix-soname]) ;; esac lt_cv_with_aix_soname=$with_aix_soname], [AC_CACHE_VAL([lt_cv_with_aix_soname], [lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT) with_aix_soname=$lt_cv_with_aix_soname]) AC_MSG_RESULT([$with_aix_soname]) if test aix != "$with_aix_soname"; then # For the AIX way of multilib, we name the shared archive member # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, # the AIX toolchain works better with OBJECT_MODE set (default 32). if test 64 = "${OBJECT_MODE-32}"; then shared_archive_member_spec=shr_64 else shared_archive_member_spec=shr fi fi ;; *) with_aix_soname=aix ;; esac _LT_DECL([], [shared_archive_member_spec], [0], [Shared archive member basename, for filename based shared library versioning on AIX])dnl ])# _LT_WITH_AIX_SONAME LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])]) LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])]) LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])]) # _LT_WITH_PIC([MODE]) # -------------------- # implement the --with-pic flag, and support the 'pic-only' and 'no-pic' # LT_INIT options. # MODE is either 'yes' or 'no'. If omitted, it defaults to 'both'. m4_define([_LT_WITH_PIC], [AC_ARG_WITH([pic], [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], [lt_p=${PACKAGE-default} case $withval in yes|no) pic_mode=$withval ;; *) pic_mode=default # Look at the argument we got. We use all the common list separators. lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, for lt_pkg in $withval; do IFS=$lt_save_ifs if test "X$lt_pkg" = "X$lt_p"; then pic_mode=yes fi done IFS=$lt_save_ifs ;; esac], [pic_mode=m4_default([$1], [default])]) _LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl ])# _LT_WITH_PIC LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) # Old name: AU_DEFUN([AC_LIBTOOL_PICMODE], [_LT_SET_OPTION([LT_INIT], [pic-only]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the 'pic-only' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) ## ----------------- ## ## LTDL_INIT Options ## ## ----------------- ## m4_define([_LTDL_MODE], []) LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], [m4_define([_LTDL_MODE], [nonrecursive])]) LT_OPTION_DEFINE([LTDL_INIT], [recursive], [m4_define([_LTDL_MODE], [recursive])]) LT_OPTION_DEFINE([LTDL_INIT], [subproject], [m4_define([_LTDL_MODE], [subproject])]) m4_define([_LTDL_TYPE], []) LT_OPTION_DEFINE([LTDL_INIT], [installable], [m4_define([_LTDL_TYPE], [installable])]) LT_OPTION_DEFINE([LTDL_INIT], [convenience], [m4_define([_LTDL_TYPE], [convenience])]) starpu-1.4.9+dfsg/m4/ltsugar.m4000066400000000000000000000104531507764646700162560ustar00rootroot00000000000000# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- # # Copyright (C) 2004-2005, 2007-2008, 2011-2019, 2021-2022 Free Software # Foundation, Inc. # Written by Gary V. Vaughan, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 6 ltsugar.m4 # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) # lt_join(SEP, ARG1, [ARG2...]) # ----------------------------- # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their # associated separator. # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier # versions in m4sugar had bugs. m4_define([lt_join], [m4_if([$#], [1], [], [$#], [2], [[$2]], [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) m4_define([_lt_join], [m4_if([$#$2], [2], [], [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) # lt_car(LIST) # lt_cdr(LIST) # ------------ # Manipulate m4 lists. # These macros are necessary as long as will still need to support # Autoconf-2.59, which quotes differently. m4_define([lt_car], [[$1]]) m4_define([lt_cdr], [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], [$#], 1, [], [m4_dquote(m4_shift($@))])]) m4_define([lt_unquote], $1) # lt_append(MACRO-NAME, STRING, [SEPARATOR]) # ------------------------------------------ # Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'. # Note that neither SEPARATOR nor STRING are expanded; they are appended # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). # No SEPARATOR is output if MACRO-NAME was previously undefined (different # than defined and empty). # # This macro is needed until we can rely on Autoconf 2.62, since earlier # versions of m4sugar mistakenly expanded SEPARATOR but not STRING. m4_define([lt_append], [m4_define([$1], m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) # ---------------------------------------------------------- # Produce a SEP delimited list of all paired combinations of elements of # PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list # has the form PREFIXmINFIXSUFFIXn. # Needed until we can rely on m4_combine added in Autoconf 2.62. m4_define([lt_combine], [m4_if(m4_eval([$# > 3]), [1], [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl [[m4_foreach([_Lt_prefix], [$2], [m4_foreach([_Lt_suffix], ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) # ----------------------------------------------------------------------- # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. m4_define([lt_if_append_uniq], [m4_ifdef([$1], [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], [lt_append([$1], [$2], [$3])$4], [$5])], [lt_append([$1], [$2], [$3])$4])]) # lt_dict_add(DICT, KEY, VALUE) # ----------------------------- m4_define([lt_dict_add], [m4_define([$1($2)], [$3])]) # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) # -------------------------------------------- m4_define([lt_dict_add_subkey], [m4_define([$1($2:$3)], [$4])]) # lt_dict_fetch(DICT, KEY, [SUBKEY]) # ---------------------------------- m4_define([lt_dict_fetch], [m4_ifval([$3], m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) # ----------------------------------------------------------------- m4_define([lt_if_dict_fetch], [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], [$5], [$6])]) # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) # -------------------------------------------------------------- m4_define([lt_dict_filter], [m4_if([$5], [], [], [lt_join(m4_quote(m4_default([$4], [[, ]])), lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl ]) starpu-1.4.9+dfsg/m4/ltversion.m4000066400000000000000000000013121507764646700166140ustar00rootroot00000000000000# ltversion.m4 -- version numbers -*- Autoconf -*- # # Copyright (C) 2004, 2011-2019, 2021-2022 Free Software Foundation, # Inc. # Written by Scott James Remnant, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # @configure_input@ # serial 4245 ltversion.m4 # This file is part of GNU Libtool m4_define([LT_PACKAGE_VERSION], [2.4.7]) m4_define([LT_PACKAGE_REVISION], [2.4.7]) AC_DEFUN([LTVERSION_VERSION], [macro_version='2.4.7' macro_revision='2.4.7' _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) _LT_DECL(, macro_revision, 0) ]) starpu-1.4.9+dfsg/m4/lt~obsolete.m4000066400000000000000000000140071507764646700171460ustar00rootroot00000000000000# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- # # Copyright (C) 2004-2005, 2007, 2009, 2011-2019, 2021-2022 Free # Software Foundation, Inc. # Written by Scott James Remnant, 2004. # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 5 lt~obsolete.m4 # These exist entirely to fool aclocal when bootstrapping libtool. # # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN), # which have later been changed to m4_define as they aren't part of the # exported API, or moved to Autoconf or Automake where they belong. # # The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us # using a macro with the same name in our local m4/libtool.m4 it'll # pull the old libtool.m4 in (it doesn't see our shiny new m4_define # and doesn't know about Autoconf macros at all.) # # So we provide this file, which has a silly filename so it's always # included after everything else. This provides aclocal with the # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything # because those macros already exist, or will be overwritten later. # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. # # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. # Yes, that means every name once taken will need to remain here until # we give up compatibility with versions before 1.7, at which point # we need to keep only those names which we still refer to. # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) starpu-1.4.9+dfsg/m4/pkg.m4000066400000000000000000000121451507764646700153560ustar00rootroot00000000000000# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- # # Copyright © 2004 Scott James Remnant . # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # PKG_PROG_PKG_CONFIG([MIN-VERSION]) # ---------------------------------- AC_DEFUN([PKG_PROG_PKG_CONFIG], [m4_pattern_forbid([^_?PKG_[A-Z_]+$]) m4_pattern_allow([^PKG_CONFIG(_PATH)?$]) AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) fi if test -n "$PKG_CONFIG"; then _pkg_min_version=m4_default([$1], [0.9.0]) AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) PKG_CONFIG="" fi fi[]dnl ])# PKG_PROG_PKG_CONFIG # PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) # # Check to see whether a particular set of modules exists. Similar # to PKG_CHECK_MODULES(), but does not set variables or print errors. # # # Similar to PKG_CHECK_MODULES, make sure that the first instance of # this or PKG_CHECK_MODULES is called, or make sure to call # PKG_CHECK_EXISTS manually # -------------------------------------------------------------- AC_DEFUN([PKG_CHECK_EXISTS], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl if test -n "$PKG_CONFIG" && \ AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then m4_ifval([$2], [$2], [:]) m4_ifvaln([$3], [else $3])dnl fi]) # _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) # --------------------------------------------- m4_define([_PKG_CONFIG], [if test -n "$PKG_CONFIG"; then if test -n "$$1"; then pkg_cv_[]$1="$$1" else PKG_CHECK_EXISTS([$3], [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`], [pkg_failed=yes]) fi else pkg_failed=untried fi[]dnl ])# _PKG_CONFIG # _PKG_SHORT_ERRORS_SUPPORTED # ----------------------------- AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], [AC_REQUIRE([PKG_PROG_PKG_CONFIG]) if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi[]dnl ])# _PKG_SHORT_ERRORS_SUPPORTED # PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], # [ACTION-IF-NOT-FOUND]) # # # Note that if there is a possibility the first call to # PKG_CHECK_MODULES might not happen, you should be sure to include an # explicit call to PKG_PROG_PKG_CONFIG in your configure.ac # # # -------------------------------------------------------------- AC_DEFUN([PKG_CHECK_MODULES], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl pkg_failed=no AC_MSG_CHECKING([for $1]) _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) _PKG_CONFIG([$1][_LIBS], [libs], [$2]) m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS and $1[]_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details.]) if test $pkg_failed = yes; then _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"` else $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"` fi # Put the nasty error message in config.log where it belongs echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD ifelse([$4], , [AC_MSG_ERROR(dnl [Package requirements ($2) were not met: $$1_PKG_ERRORS Consider adjusting the PKG_CONFIG_PATH environment variable if you installed software in a non-standard prefix. _PKG_TEXT ])], [AC_MSG_RESULT([no]) $4]) elif test $pkg_failed = untried; then ifelse([$4], , [AC_MSG_FAILURE(dnl [The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full path to pkg-config. _PKG_TEXT To get pkg-config, see .])], [$4]) else $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS $1[]_LIBS=$pkg_cv_[]$1[]_LIBS AC_MSG_RESULT([yes]) ifelse([$3], , :, [$3]) fi[]dnl ])# PKG_CHECK_MODULES starpu-1.4.9+dfsg/make/000077500000000000000000000000001507764646700147255ustar00rootroot00000000000000starpu-1.4.9+dfsg/make/starpu-loader.mk000066400000000000000000000064021507764646700200420ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # noinst_PROGRAMS = # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER if HAVE_PARALLEL # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. STARPU_SUB_PARALLEL=$(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') export STARPU_SUB_PARALLEL endif export MS_LAUNCHER if STARPU_USE_MPI_MASTER_SLAVE # Make tests run through mpiexec LAUNCHER += $(abs_top_srcdir)/tools/starpu_msexec MS_LAUNCHER = $(STARPU_MPIEXEC) LAUNCHER_ENV += $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 endif if STARPU_USE_TCPIP_MASTER_SLAVE LAUNCHER += $(abs_top_srcdir)/tools/starpu_msexec MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal LAUNCHER_ENV += STARPU_RESERVE_NCPU=2 endif LAUNCHER ?= MS_LAUNCHER ?= if STARPU_HAVE_WINDOWS LOADER_BIN = $(LAUNCHER) $(EXTERNAL) else LOADER ?= ./loader loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) noinst_PROGRAMS += loader endif LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS if STARPU_HAVE_AM111 TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" LOG_COMPILER = $(LOADER_BIN) else TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) endif AM_TESTS_FD_REDIRECT = 9>&2 starpu-1.4.9+dfsg/make/starpu-notests.mk000066400000000000000000000015301507764646700202700ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu.mk recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null starpu-1.4.9+dfsg/make/starpu-subdirtests.mk000066400000000000000000000023231507764646700211450ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET starpu-1.4.9+dfsg/make/starpu-tests.mk000066400000000000000000000073271507764646700177450ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # LAUNCHER_ENV = LAUNCHER = include $(top_srcdir)/make/starpu.mk STARPU_MPI_NP ?= 4 # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 if STARPU_SIMGRID STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile else STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) endif showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET if STARPU_SIMGRID export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling export STARPU_HOSTNAME=mirage export MALLOC_PERTURB_=0 env: @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) endif if STARPU_SIMGRID export STARPU_SIMGRID=1 endif if STARPU_QUICK_CHECK export STARPU_QUICK_CHECK=1 endif if STARPU_LONG_CHECK export STARPU_LONG_CHECK=1 endif starpu-1.4.9+dfsg/make/starpu.mk000066400000000000000000000045561507764646700166060ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) if STARPU_USE_CUDA V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) V_nvcc_0 = @echo " NVCC " $@; V_nvcc_1 = V_nvcc = $(V_nvcc_$(V)) if STARPU_COVERITY # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) V_mynvcc_0 = @echo " myNVCC " $@; V_mynvcc_1 = V_mynvcc = $(V_mynvcc_$(V)) .cu.o: @$(MKDIR_P) `dirname $@` $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c else NVCCFLAGS += --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) .cu.cubin: $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) .cu.o: $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) endif endif if STARPU_USE_HIP V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) V_hipcc_0 = @echo " HIPCC " $@; V_hipcc_1 = V_hipcc = $(V_hipcc_$(V)) HIPCCFLAGS += -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ .hip.o: $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) endif V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) starpu-1.4.9+dfsg/min-dgels/000077500000000000000000000000001507764646700156675ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/Makefile000066400000000000000000000017701507764646700173340ustar00rootroot00000000000000CC = gcc LD = /usr/bin/ld -m elf_x86_64 srcdir = . CLAPACK=base ADDITIONAL=additional all: mkdir -p build [ -d "$(CLAPACK)" ] || ( cp -a $(srcdir)/$(CLAPACK) . ; chmod -R +rwX $(CLAPACK) ) cd $(CLAPACK) && $(MAKE) blaslib CC="$(CC)" LD="$(LD)" cd $(CLAPACK) && $(MAKE) f2clib CC="$(CC)" LD="$(LD)" [ -d "$(ADDITIONAL)" ] || ( cp -a $(srcdir)/$(ADDITIONAL) . ; chmod -R +rwX $(ADDITIONAL) ) cd $(ADDITIONAL) && $(CC) -c -fPIC *.c && ar cr ../build/minlibdgels.a *.o && ranlib ../build/minlibdgels.a install: installcheck: uninstall: distuninstallcheck: dvi: clean: -cd $(CLAPACK) && $(MAKE) clean && rm -rf *~ -cd $(ADDITIONAL) && rm -rf *.o *~ rm -rf build *~ distclean: clean [ -f Makefile.in ] || rm -fr $(CLAPACK) $(ADDITIONAL) # This part is needed by StarPU STARPU_SRCDIR = . distdir: cp -fRp $(STARPU_SRCDIR)/* $(distdir) cd $(distdir) && make -f Makefile.in clean check: echo "No checks are implemented for min-dgels" showfailed: @: showcheck: check showsuite: check recheck: check starpu-1.4.9+dfsg/min-dgels/Makefile.in000066400000000000000000000017621507764646700177420ustar00rootroot00000000000000CC = @CC@ LD = @LD@ srcdir = @srcdir@ CLAPACK=base ADDITIONAL=additional all: mkdir -p build [ -d "$(CLAPACK)" ] || ( cp -a $(srcdir)/$(CLAPACK) . ; chmod -R +rwX $(CLAPACK) ) cd $(CLAPACK) && $(MAKE) blaslib CC="$(CC)" LD="$(LD)" cd $(CLAPACK) && $(MAKE) f2clib CC="$(CC)" LD="$(LD)" [ -d "$(ADDITIONAL)" ] || ( cp -a $(srcdir)/$(ADDITIONAL) . ; chmod -R +rwX $(ADDITIONAL) ) cd $(ADDITIONAL) && $(CC) -c -fPIC *.c && ar cr ../build/minlibdgels.a *.o && ranlib ../build/minlibdgels.a install: installcheck: uninstall: distuninstallcheck: dvi: clean: -cd $(CLAPACK) && $(MAKE) clean && rm -rf *~ -cd $(ADDITIONAL) && rm -rf *.o *~ rm -rf build *~ distclean: clean [ -f Makefile.in ] || rm -fr $(CLAPACK) $(ADDITIONAL) # This part is needed by StarPU STARPU_SRCDIR = @srcdir@ distdir: cp -fRp $(STARPU_SRCDIR)/* $(distdir) cd $(distdir) && make -f Makefile.in clean check: echo "No checks are implemented for min-dgels" showfailed: @: showcheck: check showsuite: check recheck: check starpu-1.4.9+dfsg/min-dgels/additional/000077500000000000000000000000001507764646700177775ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/additional/blaswrap.h000066400000000000000000000001741507764646700217650ustar00rootroot00000000000000/* CLAPACK 3.0 BLAS wrapper macros * Feb 5, 2000 */ #ifndef __BLASWRAP_H #define __BLASWRAP_H #endif /* __BLASWRAP_H */ starpu-1.4.9+dfsg/min-dgels/additional/clapack.h000066400000000000000000012720571507764646700215640ustar00rootroot00000000000000/* header file for clapack 3.2.1 */ #ifndef __CLAPACK_H #define __CLAPACK_H #ifdef __cplusplus extern "C" { #endif /* Subroutine */ int _starpu_caxpy_(integer *n, complex *ca, complex *cx, integer * incx, complex *cy, integer *incy); /* Subroutine */ int _starpu_ccopy_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy); /* Complex */ VOID _starpu_cdotc_(complex * ret_val, integer *n, complex *cx, integer *incx, complex *cy, integer *incy); /* Complex */ VOID _starpu_cdotu_(complex * ret_val, integer *n, complex *cx, integer *incx, complex *cy, integer *incy); /* Subroutine */ int _starpu_cgbmv_(char *trans, integer *m, integer *n, integer *kl, integer *ku, complex *alpha, complex *a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, integer *incy); /* Subroutine */ int _starpu_cgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_cgemv_(char *trans, integer *m, integer *n, complex * alpha, complex *a, integer *lda, complex *x, integer *incx, complex * beta, complex *y, integer *incy); /* Subroutine */ int _starpu_cgerc_(integer *m, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); /* Subroutine */ int _starpu_cgeru_(integer *m, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); /* Subroutine */ int _starpu_chbmv_(char *uplo, integer *n, integer *k, complex * alpha, complex *a, integer *lda, complex *x, integer *incx, complex * beta, complex *y, integer *incy); /* Subroutine */ int _starpu_chemm_(char *side, char *uplo, integer *m, integer *n, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_chemv_(char *uplo, integer *n, complex *alpha, complex * a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, integer *incy); /* Subroutine */ int _starpu_cher_(char *uplo, integer *n, real *alpha, complex *x, integer *incx, complex *a, integer *lda); /* Subroutine */ int _starpu_cher2_(char *uplo, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); /* Subroutine */ int _starpu_cher2k_(char *uplo, char *trans, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, real *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_cherk_(char *uplo, char *trans, integer *n, integer *k, real *alpha, complex *a, integer *lda, real *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_chpmv_(char *uplo, integer *n, complex *alpha, complex * ap, complex *x, integer *incx, complex *beta, complex *y, integer * incy); /* Subroutine */ int _starpu_chpr_(char *uplo, integer *n, real *alpha, complex *x, integer *incx, complex *ap); /* Subroutine */ int _starpu_chpr2_(char *uplo, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *ap); /* Subroutine */ int _starpu_crotg_(complex *ca, complex *cb, real *c__, complex *s); /* Subroutine */ int _starpu_cscal_(integer *n, complex *ca, complex *cx, integer * incx); /* Subroutine */ int _starpu__starpu_csrot_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy, real *c__, real *s); /* Subroutine */ int _starpu_csscal_(integer *n, real *sa, complex *cx, integer *incx); /* Subroutine */ int _starpu_cswap_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy); /* Subroutine */ int _starpu_csymm_(char *side, char *uplo, integer *m, integer *n, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_csyr2k_(char *uplo, char *trans, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_csyrk_(char *uplo, char *trans, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_ctbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, complex *a, integer *lda, complex *x, integer *incx); /* Subroutine */ int _starpu_ctbsv_(char *uplo, char *trans, char *diag, integer *n, integer *k, complex *a, integer *lda, complex *x, integer *incx); /* Subroutine */ int _starpu_ctpmv_(char *uplo, char *trans, char *diag, integer *n, complex *ap, complex *x, integer *incx); /* Subroutine */ int _starpu_ctpsv_(char *uplo, char *trans, char *diag, integer *n, complex *ap, complex *x, integer *incx); /* Subroutine */ int _starpu_ctrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb); /* Subroutine */ int _starpu_ctrmv_(char *uplo, char *trans, char *diag, integer *n, complex *a, integer *lda, complex *x, integer *incx); /* Subroutine */ int _starpu_ctrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb); /* Subroutine */ int _starpu_ctrsv_(char *uplo, char *trans, char *diag, integer *n, complex *a, integer *lda, complex *x, integer *incx); doublereal _starpu_dasum_(integer *n, doublereal *dx, integer *incx); /* Subroutine */ int _starpu_daxpy_(integer *n, doublereal *da, doublereal *dx, integer *incx, doublereal *dy, integer *incy); doublereal _starpu_dcabs1_(doublecomplex *z__); /* Subroutine */ int _starpu_dcopy_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy); doublereal _starpu_ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy); /* Subroutine */ int _starpu_dgbmv_(char *trans, integer *m, integer *n, integer *kl, integer *ku, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); /* Subroutine */ int _starpu_dgemv_(char *trans, integer *m, integer *n, doublereal * alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dger_(integer *m, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *a, integer *lda); doublereal _starpu_dnrm2_(integer *n, doublereal *x, integer *incx); /* Subroutine */ int _starpu_drot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy, doublereal *c__, doublereal *s); /* Subroutine */ int _starpu_drotg_(doublereal *da, doublereal *db, doublereal *c__, doublereal *s); /* Subroutine */ int _starpu_drotm_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy, doublereal *dparam); /* Subroutine */ int _starpu_drotmg_(doublereal *dd1, doublereal *dd2, doublereal * dx1, doublereal *dy1, doublereal *dparam); /* Subroutine */ int _starpu_dsbmv_(char *uplo, integer *n, integer *k, doublereal * alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dscal_(integer *n, doublereal *da, doublereal *dx, integer *incx); doublereal _starpu_dsdot_(integer *n, real *sx, integer *incx, real *sy, integer * incy); /* Subroutine */ int _starpu_dspmv_(char *uplo, integer *n, doublereal *alpha, doublereal *ap, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dspr_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *ap); /* Subroutine */ int _starpu_dspr2_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *ap); /* Subroutine */ int _starpu_dswap_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy); /* Subroutine */ int _starpu_dsymm_(char *side, char *uplo, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); /* Subroutine */ int _starpu_dsymv_(char *uplo, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dsyr_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *a, integer *lda); /* Subroutine */ int _starpu_dsyr2_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *a, integer *lda); /* Subroutine */ int _starpu_dsyr2k_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); /* Subroutine */ int _starpu_dsyrk_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, doublereal *c__, integer *ldc); /* Subroutine */ int _starpu_dtbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtbsv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtpmv_(char *uplo, char *trans, char *diag, integer *n, doublereal *ap, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtpsv_(char *uplo, char *trans, char *diag, integer *n, doublereal *ap, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dtrmv_(char *uplo, char *trans, char *diag, integer *n, doublereal *a, integer *lda, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dtrsv_(char *uplo, char *trans, char *diag, integer *n, doublereal *a, integer *lda, doublereal *x, integer *incx); doublereal _starpu_dzasum_(integer *n, doublecomplex *zx, integer *incx); doublereal _starpu_dznrm2_(integer *n, doublecomplex *x, integer *incx); integer _starpu_icamax_(integer *n, complex *cx, integer *incx); integer _starpu_idamax_(integer *n, doublereal *dx, integer *incx); integer _starpu_isamax_(integer *n, real *sx, integer *incx); integer _starpu_izamax_(integer *n, doublecomplex *zx, integer *incx); logical _starpu_lsame_(char *ca, char *cb); doublereal _starpu_sasum_(integer *n, real *sx, integer *incx); /* Subroutine */ int _starpu_saxpy_(integer *n, real *sa, real *sx, integer *incx, real *sy, integer *incy); doublereal _starpu_scabs1_(complex *z__); doublereal _starpu_scasum_(integer *n, complex *cx, integer *incx); doublereal _starpu_scnrm2_(integer *n, complex *x, integer *incx); /* Subroutine */ int _starpu_scopy_(integer *n, real *sx, integer *incx, real *sy, integer *incy); doublereal _starpu_sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy); doublereal _starpu_sdsdot_(integer *n, real *sb, real *sx, integer *incx, real *sy, integer *incy); /* Subroutine */ int _starpu_sgbmv_(char *trans, integer *m, integer *n, integer *kl, integer *ku, real *alpha, real *a, integer *lda, real *x, integer * incx, real *beta, real *y, integer *incy); /* Subroutine */ int _starpu_sgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * ldb, real *beta, real *c__, integer *ldc); /* Subroutine */ int _starpu_sgemv_(char *trans, integer *m, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy); /* Subroutine */ int _starpu_sger_(integer *m, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda); doublereal _starpu_snrm2_(integer *n, real *x, integer *incx); /* Subroutine */ int _starpu_srot_(integer *n, real *sx, integer *incx, real *sy, integer *incy, real *c__, real *s); /* Subroutine */ int _starpu_srotg_(real *sa, real *sb, real *c__, real *s); /* Subroutine */ int _starpu_srotm_(integer *n, real *sx, integer *incx, real *sy, integer *incy, real *sparam); /* Subroutine */ int _starpu_srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real *sparam); /* Subroutine */ int _starpu_ssbmv_(char *uplo, integer *n, integer *k, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy); /* Subroutine */ int _starpu_sscal_(integer *n, real *sa, real *sx, integer *incx); /* Subroutine */ int _starpu_sspmv_(char *uplo, integer *n, real *alpha, real *ap, real *x, integer *incx, real *beta, real *y, integer *incy); /* Subroutine */ int _starpu_sspr_(char *uplo, integer *n, real *alpha, real *x, integer *incx, real *ap); /* Subroutine */ int _starpu_sspr2_(char *uplo, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *ap); /* Subroutine */ int _starpu_sswap_(integer *n, real *sx, integer *incx, real *sy, integer *incy); /* Subroutine */ int _starpu_ssymm_(char *side, char *uplo, integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc); /* Subroutine */ int _starpu_ssymv_(char *uplo, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer * incy); /* Subroutine */ int _starpu_ssyr_(char *uplo, integer *n, real *alpha, real *x, integer *incx, real *a, integer *lda); /* Subroutine */ int _starpu_ssyr2_(char *uplo, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda); /* Subroutine */ int _starpu_ssyr2k_(char *uplo, char *trans, integer *n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc); /* Subroutine */ int _starpu_ssyrk_(char *uplo, char *trans, integer *n, integer *k, real *alpha, real *a, integer *lda, real *beta, real *c__, integer * ldc); /* Subroutine */ int _starpu_stbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, real *a, integer *lda, real *x, integer *incx); /* Subroutine */ int _starpu_stbsv_(char *uplo, char *trans, char *diag, integer *n, integer *k, real *a, integer *lda, real *x, integer *incx); /* Subroutine */ int _starpu_stpmv_(char *uplo, char *trans, char *diag, integer *n, real *ap, real *x, integer *incx); /* Subroutine */ int _starpu_stpsv_(char *uplo, char *trans, char *diag, integer *n, real *ap, real *x, integer *incx); /* Subroutine */ int _starpu_strmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, integer *ldb); /* Subroutine */ int _starpu_strmv_(char *uplo, char *trans, char *diag, integer *n, real *a, integer *lda, real *x, integer *incx); /* Subroutine */ int _starpu_strsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, integer *ldb); /* Subroutine */ int _starpu_strsv_(char *uplo, char *trans, char *diag, integer *n, real *a, integer *lda, real *x, integer *incx); /* Subroutine */ int _starpu_xerbla_(char *srname, integer *info); /* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * srname_len__, integer *info, ftnlen srname_array_len); /* Subroutine */ int _starpu_zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Subroutine */ int _starpu_zcopy_(integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Double Complex */ VOID _starpu_zdotc_(doublecomplex * ret_val, integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Double Complex */ VOID _starpu_zdotu_(doublecomplex * ret_val, integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Subroutine */ int _starpu_zdrot_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s); /* Subroutine */ int _starpu_zdscal_(integer *n, doublereal *da, doublecomplex *zx, integer *incx); /* Subroutine */ int _starpu_zgbmv_(char *trans, integer *m, integer *n, integer *kl, integer *ku, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex * y, integer *incy); /* Subroutine */ int _starpu_zgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * c__, integer *ldc); /* Subroutine */ int _starpu_zgemv_(char *trans, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * incy); /* Subroutine */ int _starpu_zgerc_(integer *m, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zgeru_(integer *m, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zhbmv_(char *uplo, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer * incx, doublecomplex *beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zhemm_(char *side, char *uplo, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * ldc); /* Subroutine */ int _starpu_zhemv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zher_(char *uplo, integer *n, doublereal *alpha, doublecomplex *x, integer *incx, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zher2_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zher2k_(char *uplo, char *trans, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc); /* Subroutine */ int _starpu_zherk_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, doublecomplex *c__, integer *ldc); /* Subroutine */ int _starpu_zhpmv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zhpr_(char *uplo, integer *n, doublereal *alpha, doublecomplex *x, integer *incx, doublecomplex *ap); /* Subroutine */ int _starpu_zhpr2_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublecomplex *ap); /* Subroutine */ int _starpu_zrotg_(doublecomplex *ca, doublecomplex *cb, doublereal * c__, doublecomplex *s); /* Subroutine */ int _starpu_zscal_(integer *n, doublecomplex *za, doublecomplex *zx, integer *incx); /* Subroutine */ int _starpu_zswap_(integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Subroutine */ int _starpu_zsymm_(char *side, char *uplo, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * ldc); /* Subroutine */ int _starpu_zsyr2k_(char *uplo, char *trans, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * ldc); /* Subroutine */ int _starpu_zsyrk_(char *uplo, char *trans, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * beta, doublecomplex *c__, integer *ldc); /* Subroutine */ int _starpu_ztbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztbsv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztpmv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *ap, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztpsv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *ap, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_ztrmv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_ztrsv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_cbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, real *d__, real *e, complex *vt, integer *ldvt, complex *u, integer *ldu, complex *c__, integer *ldc, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, complex *ab, integer *ldab, real *d__, real *e, complex *q, integer *ldq, complex *pt, integer *ldpt, complex *c__, integer *ldc, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbcon_(char *norm, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, integer *ipiv, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbequ_(integer *m, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_cgbequb_(integer *m, integer *n, integer *kl, integer * ku, complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_cgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer * ldafb, integer *ipiv, complex *b, integer *ldb, complex *x, integer * ldx, real *ferr, real *berr, complex *work, real *rwork, integer * info); /* Subroutine */ int _starpu_cgbrfsx_(char *trans, char *equed, integer *n, integer * kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex * afb, integer *ldafb, integer *ipiv, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, complex *work, real *rwork, integer * info); /* Subroutine */ int _starpu_cgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, complex *ab, integer *ldab, integer *ipiv, complex *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_cgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbsvxx_(char *fact, char *trans, integer *n, integer * kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex * afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbtf2_(integer *m, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgbtrf_(integer *m, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, complex *ab, integer *ldab, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, real *scale, integer *m, complex *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_cgebal_(char *job, integer *n, complex *a, integer *lda, integer *ilo, integer *ihi, real *scale, integer *info); /* Subroutine */ int _starpu_cgebd2_(integer *m, integer *n, complex *a, integer *lda, real *d__, real *e, complex *tauq, complex *taup, complex *work, integer *info); /* Subroutine */ int _starpu_cgebrd_(integer *m, integer *n, complex *a, integer *lda, real *d__, real *e, complex *tauq, complex *taup, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgecon_(char *norm, integer *n, complex *a, integer *lda, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgeequ_(integer *m, integer *n, complex *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_cgeequb_(integer *m, integer *n, complex *a, integer * lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_cgees_(char *jobvs, char *sort, L_fp select, integer *n, complex *a, integer *lda, integer *sdim, complex *w, complex *vs, integer *ldvs, complex *work, integer *lwork, real *rwork, logical * bwork, integer *info); /* Subroutine */ int _starpu_cgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, complex *a, integer *lda, integer *sdim, complex * w, complex *vs, integer *ldvs, real *rconde, real *rcondv, complex * work, integer *lwork, real *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_cgeev_(char *jobvl, char *jobvr, integer *n, complex *a, integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex *work, integer *lwork, real *rwork, integer * info); /* Subroutine */ int _starpu_cgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, complex *a, integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *ilo, integer *ihi, real *scale, real *abnrm, real *rconde, real *rcondv, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cgegs_(char *jobvsl, char *jobvsr, integer *n, complex * a, integer *lda, complex *b, integer *ldb, complex *alpha, complex * beta, complex *vsl, integer *ldvsl, complex *vsr, integer *ldvsr, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cgegv_(char *jobvl, char *jobvr, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cgehd2_(integer *n, integer *ilo, integer *ihi, complex * a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgehrd_(integer *n, integer *ilo, integer *ihi, complex * a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgelq2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgelqf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgels_(char *trans, integer *m, integer *n, integer * nrhs, complex *a, integer *lda, complex *b, integer *ldb, complex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgelsd_(integer *m, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, integer *rank, complex *work, integer *lwork, real *rwork, integer * iwork, integer *info); /* Subroutine */ int _starpu_cgelss_(integer *m, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, integer *rank, complex *work, integer *lwork, real *rwork, integer * info); /* Subroutine */ int _starpu_cgelsx_(integer *m, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond, integer *rank, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgelsy_(integer *m, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond, integer *rank, complex *work, integer *lwork, real *rwork, integer * info); /* Subroutine */ int _starpu_cgeql2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgeqlf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgeqp3_(integer *m, integer *n, complex *a, integer *lda, integer *jpvt, complex *tau, complex *work, integer *lwork, real * rwork, integer *info); /* Subroutine */ int _starpu_cgeqpf_(integer *m, integer *n, complex *a, integer *lda, integer *jpvt, complex *tau, complex *work, real *rwork, integer * info); /* Subroutine */ int _starpu_cgeqr2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgeqrf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgerfs_(char *trans, integer *n, integer *nrhs, complex * a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgerfsx_(char *trans, char *equed, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgerq2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgerqf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgesc2_(integer *n, complex *a, integer *lda, complex * rhs, integer *ipiv, integer *jpiv, real *scale); /* Subroutine */ int _starpu_cgesdd_(char *jobz, integer *m, integer *n, complex *a, integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer *ldvt, complex *work, integer *lwork, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_cgesv_(integer *n, integer *nrhs, complex *a, integer * lda, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cgesvd_(char *jobu, char *jobvt, integer *m, integer *n, complex *a, integer *lda, real *s, complex *u, integer *ldu, complex * vt, integer *ldvt, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cgesvx_(char *fact, char *trans, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgesvxx_(char *fact, char *trans, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, complex *work, real *rwork, integer * info); /* Subroutine */ int _starpu_cgetc2_(integer *n, complex *a, integer *lda, integer * ipiv, integer *jpiv, integer *info); /* Subroutine */ int _starpu_cgetf2_(integer *m, integer *n, complex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgetrf_(integer *m, integer *n, complex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgetri_(integer *n, complex *a, integer *lda, integer * ipiv, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgetrs_(char *trans, integer *n, integer *nrhs, complex * a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_cggbak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, real *lscale, real *rscale, integer *m, complex *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_cggbal_(char *job, integer *n, complex *a, integer *lda, complex *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, real *rscale, real *work, integer *info); /* Subroutine */ int _starpu_cgges_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, integer *n, complex *a, integer *lda, complex *b, integer * ldb, integer *sdim, complex *alpha, complex *beta, complex *vsl, integer *ldvsl, complex *vsr, integer *ldvsr, complex *work, integer * lwork, real *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_cggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, complex *a, integer *lda, complex *b, integer *ldb, integer *sdim, complex *alpha, complex *beta, complex * vsl, integer *ldvsl, complex *vsr, integer *ldvsr, real *rconde, real *rcondv, complex *work, integer *lwork, real *rwork, integer *iwork, integer *liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_cggev_(char *jobvl, char *jobvr, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, complex *vl, integer *ldvl, complex * vr, integer *ldvr, integer *ilo, integer *ihi, real *lscale, real * rscale, real *abnrm, real *bbnrm, real *rconde, real *rcondv, complex *work, integer *lwork, real *rwork, integer *iwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_cggglm_(integer *n, integer *m, integer *p, complex *a, integer *lda, complex *b, integer *ldb, complex *d__, complex *x, complex *y, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, complex *a, integer *lda, complex *b, integer *ldb, complex *q, integer *ldq, complex *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_cgglse_(integer *m, integer *n, integer *p, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, complex *d__, complex *x, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cggqrf_(integer *n, integer *m, integer *p, complex *a, integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cggrqf_(integer *m, integer *p, integer *n, complex *a, integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, complex *a, integer * lda, complex *b, integer *ldb, real *alpha, real *beta, complex *u, integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, complex *work, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_cggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, complex *a, integer *lda, complex *b, integer *ldb, real *tola, real *tolb, integer *k, integer *l, complex *u, integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, integer *iwork, real *rwork, complex *tau, complex *work, integer * info); /* Subroutine */ int _starpu_cgtcon_(char *norm, integer *n, complex *dl, complex * d__, complex *du, complex *du2, integer *ipiv, real *anorm, real * rcond, complex *work, integer *info); /* Subroutine */ int _starpu_cgtrfs_(char *trans, integer *n, integer *nrhs, complex * dl, complex *d__, complex *du, complex *dlf, complex *df, complex * duf, complex *du2, integer *ipiv, complex *b, integer *ldb, complex * x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgtsv_(integer *n, integer *nrhs, complex *dl, complex * d__, complex *du, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, complex *dl, complex *d__, complex *du, complex *dlf, complex * df, complex *duf, complex *du2, integer *ipiv, complex *b, integer * ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgttrf_(integer *n, complex *dl, complex *d__, complex * du, complex *du2, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgttrs_(char *trans, integer *n, integer *nrhs, complex * dl, complex *d__, complex *du, complex *du2, integer *ipiv, complex * b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cgtts2_(integer *itrans, integer *n, integer *nrhs, complex *dl, complex *d__, complex *du, complex *du2, integer *ipiv, complex *b, integer *ldb); /* Subroutine */ int _starpu_chbev_(char *jobz, char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chbevd_(char *jobz, char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *lrwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chbevx_(char *jobz, char *range, char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, complex *q, integer *ldq, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * m, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_chbgst_(char *vect, char *uplo, integer *n, integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, complex *x, integer *ldx, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chbgv_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, complex *q, integer *ldq, real *vl, real *vu, integer * il, integer *iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_chbtrd_(char *vect, char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *d__, real *e, complex *q, integer * ldq, complex *work, integer *info); /* Subroutine */ int _starpu_checon_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, real *anorm, real *rcond, complex *work, integer * info); /* Subroutine */ int _starpu_cheequb_(char *uplo, integer *n, complex *a, integer * lda, real *s, real *scond, real *amax, complex *work, integer *info); /* Subroutine */ int _starpu_cheev_(char *jobz, char *uplo, integer *n, complex *a, integer *lda, real *w, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cheevd_(char *jobz, char *uplo, integer *n, complex *a, integer *lda, real *w, complex *work, integer *lwork, real *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_cheevr_(char *jobz, char *range, char *uplo, integer *n, complex *a, integer *lda, real *vl, real *vu, integer *il, integer * iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, integer *isuppz, complex *work, integer *lwork, real *rwork, integer * lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_cheevx_(char *jobz, char *range, char *uplo, integer *n, complex *a, integer *lda, real *vl, real *vu, integer *il, integer * iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_chegs2_(integer *itype, char *uplo, integer *n, complex * a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_chegst_(integer *itype, char *uplo, integer *n, complex * a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_chegv_(integer *itype, char *jobz, char *uplo, integer * n, complex *a, integer *lda, complex *b, integer *ldb, real *w, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_chegvd_(integer *itype, char *jobz, char *uplo, integer * n, complex *a, integer *lda, complex *b, integer *ldb, real *w, complex *work, integer *lwork, real *rwork, integer *lrwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chegvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, complex *a, integer *lda, complex *b, integer *ldb, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * m, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_cherfs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cherfsx_(char *uplo, char *equed, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chesv_(char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_chesvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_chesvxx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, char *equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * nparams, real *params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chetd2_(char *uplo, integer *n, complex *a, integer *lda, real *d__, real *e, complex *tau, integer *info); /* Subroutine */ int _starpu_chetf2_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_chetrd_(char *uplo, integer *n, complex *a, integer *lda, real *d__, real *e, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_chetrf_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_chetri_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, complex *work, integer *info); /* Subroutine */ int _starpu_chetrs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_chfrk_(char *transr, char *uplo, char *trans, integer *n, integer *k, real *alpha, complex *a, integer *lda, real *beta, complex *c__); /* Subroutine */ int _starpu_chgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *t, integer *ldt, complex *alpha, complex *beta, complex *q, integer *ldq, complex *z__, integer *ldz, complex *work, integer *lwork, real * rwork, integer *info); /* Character */ VOID _starpu_chla_transtype__(char *ret_val, ftnlen ret_val_len, integer *trans); /* Subroutine */ int _starpu_chpcon_(char *uplo, integer *n, complex *ap, integer * ipiv, real *anorm, real *rcond, complex *work, integer *info); /* Subroutine */ int _starpu_chpev_(char *jobz, char *uplo, integer *n, complex *ap, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chpevd_(char *jobz, char *uplo, integer *n, complex *ap, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chpevx_(char *jobz, char *range, char *uplo, integer *n, complex *ap, real *vl, real *vu, integer *il, integer *iu, real * abstol, integer *m, real *w, complex *z__, integer *ldz, complex * work, real *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_chpgst_(integer *itype, char *uplo, integer *n, complex * ap, complex *bp, integer *info); /* Subroutine */ int _starpu_chpgv_(integer *itype, char *jobz, char *uplo, integer * n, complex *ap, complex *bp, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chpgvd_(integer *itype, char *jobz, char *uplo, integer * n, complex *ap, complex *bp, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *lrwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chpgvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, complex *ap, complex *bp, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, complex * z__, integer *ldz, complex *work, real *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_chprfs_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *afp, integer *ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chpsv_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_chpsvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *ap, complex *afp, integer *ipiv, complex *b, integer * ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chptrd_(char *uplo, integer *n, complex *ap, real *d__, real *e, complex *tau, integer *info); /* Subroutine */ int _starpu_chptrf_(char *uplo, integer *n, complex *ap, integer * ipiv, integer *info); /* Subroutine */ int _starpu_chptri_(char *uplo, integer *n, complex *ap, integer * ipiv, complex *work, integer *info); /* Subroutine */ int _starpu_chptrs_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_chsein_(char *side, char *eigsrc, char *initv, logical * select, integer *n, complex *h__, integer *ldh, complex *w, complex * vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer * m, complex *work, real *rwork, integer *ifaill, integer *ifailr, integer *info); /* Subroutine */ int _starpu_chseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__, integer *ldz, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cla_gbamv__(integer *trans, integer *m, integer *n, integer *kl, integer *ku, real *alpha, complex *ab, integer *ldab, complex *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_cla_gbrcond_c__(char *trans, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * ipiv, real *c__, logical *capply, integer *info, complex *work, real * rwork, ftnlen trans_len); doublereal _starpu_cla_gbrcond_x__(char *trans, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen trans_len); /* Subroutine */ int _starpu_cla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex * y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex * y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info); doublereal _starpu_cla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * ncols, complex *ab, integer *ldab, complex *afb, integer *ldafb); /* Subroutine */ int _starpu_cla_geamv__(integer *trans, integer *m, integer *n, real *alpha, complex *a, integer *lda, complex *x, integer *incx, real * beta, real *y, integer *incy); doublereal _starpu_cla_gercond_c__(char *trans, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, integer *info, complex *work, real *rwork, ftnlen trans_len); doublereal _starpu_cla_gercond_x__(char *trans, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen trans_len); /* Subroutine */ int _starpu_cla_gerfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex *y_tail__, real *rcond, integer * ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info); /* Subroutine */ int _starpu_cla_heamv__(integer *uplo, integer *n, real *alpha, complex *a, integer *lda, complex *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_cla_hercond_c__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, integer *info, complex *work, real *rwork, ftnlen uplo_len); doublereal _starpu_cla_hercond_x__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_herfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, integer * n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real * rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_cla_herpvgrw__(char *uplo, integer *n, integer *info, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *work, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_lin_berr__(integer *n, integer *nz, integer *nrhs, complex *res, real *ayb, real *berr); doublereal _starpu_cla_porcond_c__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, real *c__, logical *capply, integer *info, complex *work, real *rwork, ftnlen uplo_len); doublereal _starpu_cla_porcond_x__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, complex *x, integer *info, complex *work, real *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_porfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, complex *af, integer *ldaf, logical *colequ, real *c__, complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, integer *n_norms__, real * errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_cla_porpvgrw__(char *uplo, integer *ncols, complex *a, integer * lda, complex *af, integer *ldaf, real *work, ftnlen uplo_len); doublereal _starpu_cla_rpvgrw__(integer *n, integer *ncols, complex *a, integer *lda, complex *af, integer *ldaf); /* Subroutine */ int _starpu_cla_syamv__(integer *uplo, integer *n, real *alpha, complex *a, integer *lda, complex *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_cla_syrcond_c__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, integer *info, complex *work, real *rwork, ftnlen uplo_len); doublereal _starpu_cla_syrcond_x__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_syrfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, integer * n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real * rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_cla_syrpvgrw__(char *uplo, integer *n, integer *info, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *work, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_wwaddw__(integer *n, complex *x, complex *y, complex *w); /* Subroutine */ int _starpu_clabrd_(integer *m, integer *n, integer *nb, complex *a, integer *lda, real *d__, real *e, complex *tauq, complex *taup, complex *x, integer *ldx, complex *y, integer *ldy); /* Subroutine */ int _starpu_clacgv_(integer *n, complex *x, integer *incx); /* Subroutine */ int _starpu_clacn2_(integer *n, complex *v, complex *x, real *est, integer *kase, integer *isave); /* Subroutine */ int _starpu_clacon_(integer *n, complex *v, complex *x, real *est, integer *kase); /* Subroutine */ int _starpu_clacp2_(char *uplo, integer *m, integer *n, real *a, integer *lda, complex *b, integer *ldb); /* Subroutine */ int _starpu_clacpy_(char *uplo, integer *m, integer *n, complex *a, integer *lda, complex *b, integer *ldb); /* Subroutine */ int _starpu_clacrm_(integer *m, integer *n, complex *a, integer *lda, real *b, integer *ldb, complex *c__, integer *ldc, real *rwork); /* Subroutine */ int _starpu_clacrt_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy, complex *c__, complex *s); /* Complex */ VOID _starpu_cladiv_(complex * ret_val, complex *x, complex *y); /* Subroutine */ int _starpu_claed0_(integer *qsiz, integer *n, real *d__, real *e, complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_claed7_(integer *n, integer *cutpnt, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex * q, integer *ldq, real *rho, integer *indxq, real *qstore, integer * qptr, integer *prmptr, integer *perm, integer *givptr, integer * givcol, real *givnum, complex *work, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_claed8_(integer *k, integer *n, integer *qsiz, complex * q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__, real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp, integer *indx, integer *indxq, integer *perm, integer *givptr, integer *givcol, real *givnum, integer *info); /* Subroutine */ int _starpu_claein_(logical *rightv, logical *noinit, integer *n, complex *h__, integer *ldh, complex *w, complex *v, complex *b, integer *ldb, real *rwork, real *eps3, real *smlnum, integer *info); /* Subroutine */ int _starpu_claesy_(complex *a, complex *b, complex *c__, complex * rt1, complex *rt2, complex *evscal, complex *cs1, complex *sn1); /* Subroutine */ int _starpu_claev2_(complex *a, complex *b, complex *c__, real *rt1, real *rt2, real *cs1, complex *sn1); /* Subroutine */ int _starpu_clag2z_(integer *m, integer *n, complex *sa, integer * ldsa, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_clags2_(logical *upper, real *a1, complex *a2, real *a3, real *b1, complex *b2, real *b3, real *csu, complex *snu, real *csv, complex *snv, real *csq, complex *snq); /* Subroutine */ int _starpu_clagtm_(char *trans, integer *n, integer *nrhs, real * alpha, complex *dl, complex *d__, complex *du, complex *x, integer * ldx, real *beta, complex *b, integer *ldb); /* Subroutine */ int _starpu_clahef_(char *uplo, integer *n, integer *nb, integer *kb, complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_clahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * info); /* Subroutine */ int _starpu_clahr2_(integer *n, integer *k, integer *nb, complex *a, integer *lda, complex *tau, complex *t, integer *ldt, complex *y, integer *ldy); /* Subroutine */ int _starpu_clahrd_(integer *n, integer *k, integer *nb, complex *a, integer *lda, complex *tau, complex *t, integer *ldt, complex *y, integer *ldy); /* Subroutine */ int _starpu_claic1_(integer *job, integer *j, complex *x, real *sest, complex *w, complex *gamma, real *sestpr, complex *s, complex *c__); /* Subroutine */ int _starpu_clals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * rwork, integer *info); /* Subroutine */ int _starpu_clalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *z__, real *poles, integer *givptr, integer *givcol, integer * ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_clalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, real *d__, real *e, complex *b, integer *ldb, real *rcond, integer *rank, complex *work, real *rwork, integer *iwork, integer * info); doublereal _starpu_clangb_(char *norm, integer *n, integer *kl, integer *ku, complex * ab, integer *ldab, real *work); doublereal _starpu_clange_(char *norm, integer *m, integer *n, complex *a, integer * lda, real *work); doublereal _starpu_clangt_(char *norm, integer *n, complex *dl, complex *d__, complex *du); doublereal _starpu_clanhb_(char *norm, char *uplo, integer *n, integer *k, complex * ab, integer *ldab, real *work); doublereal _starpu_clanhe_(char *norm, char *uplo, integer *n, complex *a, integer * lda, real *work); doublereal _starpu_clanhf_(char *norm, char *transr, char *uplo, integer *n, complex * a, real *work); doublereal _starpu_clanhp_(char *norm, char *uplo, integer *n, complex *ap, real * work); doublereal _starpu_clanhs_(char *norm, integer *n, complex *a, integer *lda, real * work); doublereal _starpu_clanht_(char *norm, integer *n, real *d__, complex *e); doublereal _starpu_clansb_(char *norm, char *uplo, integer *n, integer *k, complex * ab, integer *ldab, real *work); doublereal _starpu_clansp_(char *norm, char *uplo, integer *n, complex *ap, real * work); doublereal _starpu_clansy_(char *norm, char *uplo, integer *n, complex *a, integer * lda, real *work); doublereal _starpu_clantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, complex *ab, integer *ldab, real *work); doublereal _starpu_clantp_(char *norm, char *uplo, char *diag, integer *n, complex * ap, real *work); doublereal _starpu_clantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, complex *a, integer *lda, real *work); /* Subroutine */ int _starpu_clapll_(integer *n, complex *x, integer *incx, complex * y, integer *incy, real *ssmin); /* Subroutine */ int _starpu_clapmt_(logical *forwrd, integer *m, integer *n, complex *x, integer *ldx, integer *k); /* Subroutine */ int _starpu_claqgb_(integer *m, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char *equed); /* Subroutine */ int _starpu_claqge_(integer *m, integer *n, complex *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char * equed); /* Subroutine */ int _starpu_claqhb_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqhe_(char *uplo, integer *n, complex *a, integer *lda, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqhp_(char *uplo, integer *n, complex *ap, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqp2_(integer *m, integer *n, integer *offset, complex *a, integer *lda, integer *jpvt, complex *tau, real *vn1, real *vn2, complex *work); /* Subroutine */ int _starpu_claqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, complex *a, integer *lda, integer *jpvt, complex * tau, real *vn1, real *vn2, complex *auxv, complex *f, integer *ldf); /* Subroutine */ int _starpu_claqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_claqr1_(integer *n, complex *h__, integer *ldh, complex * s1, complex *s2, complex *v); /* Subroutine */ int _starpu_claqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, complex *work, integer *lwork); /* Subroutine */ int _starpu_claqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, complex *work, integer *lwork); /* Subroutine */ int _starpu_claqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_claqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, integer *kbot, integer *nshfts, complex *s, complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex * z__, integer *ldz, complex *v, integer *ldv, complex *u, integer *ldu, integer *nv, complex *wv, integer *ldwv, integer *nh, complex *wh, integer *ldwh); /* Subroutine */ int _starpu_claqsb_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqsp_(char *uplo, integer *n, complex *ap, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqsy_(char *uplo, integer *n, complex *a, integer *lda, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_clar1v_(integer *n, integer *b1, integer *bn, real * lambda, real *d__, real *l, real *ld, real *lld, real *pivmin, real * gaptol, complex *z__, logical *wantnc, integer *negcnt, real *ztz, real *mingma, integer *r__, integer *isuppz, real *nrminv, real * resid, real *rqcorr, real *work); /* Subroutine */ int _starpu_clar2v_(integer *n, complex *x, complex *y, complex *z__, integer *incx, real *c__, complex *s, integer *incc); /* Subroutine */ int _starpu_clarcm_(integer *m, integer *n, real *a, integer *lda, complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork); /* Subroutine */ int _starpu_clarf_(char *side, integer *m, integer *n, complex *v, integer *incv, complex *tau, complex *c__, integer *ldc, complex * work); /* Subroutine */ int _starpu_clarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, complex *v, integer *ldv, complex *t, integer *ldt, complex *c__, integer *ldc, complex *work, integer *ldwork); /* Subroutine */ int _starpu_clarfg_(integer *n, complex *alpha, complex *x, integer * incx, complex *tau); /* Subroutine */ int _starpu_clarfp_(integer *n, complex *alpha, complex *x, integer * incx, complex *tau); /* Subroutine */ int _starpu_clarft_(char *direct, char *storev, integer *n, integer * k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt); /* Subroutine */ int _starpu_clarfx_(char *side, integer *m, integer *n, complex *v, complex *tau, complex *c__, integer *ldc, complex *work); /* Subroutine */ int _starpu_clargv_(integer *n, complex *x, integer *incx, complex * y, integer *incy, real *c__, integer *incc); /* Subroutine */ int _starpu_clarnv_(integer *idist, integer *iseed, integer *n, complex *x); /* Subroutine */ int _starpu_clarrv_(integer *n, real *vl, real *vu, real *d__, real * l, real *pivmin, integer *isplit, integer *m, integer *dol, integer * dou, real *minrgp, real *rtol1, real *rtol2, real *w, real *werr, real *wgap, integer *iblock, integer *indexw, real *gers, complex * z__, integer *ldz, integer *isuppz, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_clarscl2_(integer *m, integer *n, real *d__, complex *x, integer *ldx); /* Subroutine */ int _starpu_clartg_(complex *f, complex *g, real *cs, complex *sn, complex *r__); /* Subroutine */ int _starpu_clartv_(integer *n, complex *x, integer *incx, complex * y, integer *incy, real *c__, complex *s, integer *incc); /* Subroutine */ int _starpu_clarz_(char *side, integer *m, integer *n, integer *l, complex *v, integer *incv, complex *tau, complex *c__, integer *ldc, complex *work); /* Subroutine */ int _starpu_clarzb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, complex *v, integer *ldv, complex *t, integer *ldt, complex *c__, integer *ldc, complex *work, integer *ldwork); /* Subroutine */ int _starpu_clarzt_(char *direct, char *storev, integer *n, integer * k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt); /* Subroutine */ int _starpu_clascl_(char *type__, integer *kl, integer *ku, real * cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_clascl2_(integer *m, integer *n, real *d__, complex *x, integer *ldx); /* Subroutine */ int _starpu_claset_(char *uplo, integer *m, integer *n, complex * alpha, complex *beta, complex *a, integer *lda); /* Subroutine */ int _starpu_clasr_(char *side, char *pivot, char *direct, integer *m, integer *n, real *c__, real *s, complex *a, integer *lda); /* Subroutine */ int _starpu_classq_(integer *n, complex *x, integer *incx, real * scale, real *sumsq); /* Subroutine */ int _starpu_claswp_(integer *n, complex *a, integer *lda, integer * k1, integer *k2, integer *ipiv, integer *incx); /* Subroutine */ int _starpu_clasyf_(char *uplo, integer *n, integer *nb, integer *kb, complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_clatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, complex *ab, integer *ldab, complex * x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_clatdf_(integer *ijob, integer *n, complex *z__, integer *ldz, complex *rhs, real *rdsum, real *rdscal, integer *ipiv, integer *jpiv); /* Subroutine */ int _starpu_clatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, complex *ap, complex *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_clatrd_(char *uplo, integer *n, integer *nb, complex *a, integer *lda, real *e, complex *tau, complex *w, integer *ldw); /* Subroutine */ int _starpu_clatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, complex *a, integer *lda, complex *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_clatrz_(integer *m, integer *n, integer *l, complex *a, integer *lda, complex *tau, complex *work); /* Subroutine */ int _starpu_clatzm_(char *side, integer *m, integer *n, complex *v, integer *incv, complex *tau, complex *c1, complex *c2, integer *ldc, complex *work); /* Subroutine */ int _starpu_clauu2_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_clauum_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_cpbcon_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpbequ_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_cpbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, complex *ab, integer *ldab, complex *afb, integer *ldafb, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real * berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpbstf_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_cpbsv_(char *uplo, integer *n, integer *kd, integer * nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_cpbsvx_(char *fact, char *uplo, integer *n, integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer * ldafb, char *equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpbtf2_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_cpbtrf_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_cpbtrs_(char *uplo, integer *n, integer *kd, integer * nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_cpftrf_(char *transr, char *uplo, integer *n, complex *a, integer *info); /* Subroutine */ int _starpu_cpftri_(char *transr, char *uplo, integer *n, complex *a, integer *info); /* Subroutine */ int _starpu_cpftrs_(char *transr, char *uplo, integer *n, integer * nrhs, complex *a, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cpocon_(char *uplo, integer *n, complex *a, integer *lda, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpoequ_(integer *n, complex *a, integer *lda, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_cpoequb_(integer *n, complex *a, integer *lda, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_cporfs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, complex *af, integer *ldaf, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cporfsx_(char *uplo, char *equed, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real * err_bnds_comp__, integer *nparams, real *params, complex *work, real * rwork, integer *info); /* Subroutine */ int _starpu_cposv_(char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cposvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, char * equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cposvxx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, char * equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpotf2_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_cpotrf_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_cpotri_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_cpotrs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cppcon_(char *uplo, integer *n, complex *ap, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cppequ_(char *uplo, integer *n, complex *ap, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_cpprfs_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *afp, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cppsv_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cppsvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *ap, complex *afp, char *equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpptrf_(char *uplo, integer *n, complex *ap, integer * info); /* Subroutine */ int _starpu_cpptri_(char *uplo, integer *n, complex *ap, integer * info); /* Subroutine */ int _starpu_cpptrs_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cpstf2_(char *uplo, integer *n, complex *a, integer *lda, integer *piv, integer *rank, real *tol, real *work, integer *info); /* Subroutine */ int _starpu_cpstrf_(char *uplo, integer *n, complex *a, integer *lda, integer *piv, integer *rank, real *tol, real *work, integer *info); /* Subroutine */ int _starpu_cptcon_(integer *n, real *d__, complex *e, real *anorm, real *rcond, real *rwork, integer *info); /* Subroutine */ int _starpu_cpteqr_(char *compz, integer *n, real *d__, real *e, complex *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_cptrfs_(char *uplo, integer *n, integer *nrhs, real *d__, complex *e, real *df, complex *ef, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cptsv_(integer *n, integer *nrhs, real *d__, complex *e, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cptsvx_(char *fact, integer *n, integer *nrhs, real *d__, complex *e, real *df, complex *ef, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpttrf_(integer *n, real *d__, complex *e, integer *info); /* Subroutine */ int _starpu_cpttrs_(char *uplo, integer *n, integer *nrhs, real *d__, complex *e, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cptts2_(integer *iuplo, integer *n, integer *nrhs, real * d__, complex *e, complex *b, integer *ldb); /* Subroutine */ int _starpu_crot_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy, real *c__, complex *s); /* Subroutine */ int _starpu_cspcon_(char *uplo, integer *n, complex *ap, integer * ipiv, real *anorm, real *rcond, complex *work, integer *info); /* Subroutine */ int _starpu_cspmv_(char *uplo, integer *n, complex *alpha, complex * ap, complex *x, integer *incx, complex *beta, complex *y, integer * incy); /* Subroutine */ int _starpu_cspr_(char *uplo, integer *n, complex *alpha, complex *x, integer *incx, complex *ap); /* Subroutine */ int _starpu_csprfs_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *afp, integer *ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cspsv_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cspsvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *ap, complex *afp, integer *ipiv, complex *b, integer * ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_csptrf_(char *uplo, integer *n, complex *ap, integer * ipiv, integer *info); /* Subroutine */ int _starpu_csptri_(char *uplo, integer *n, complex *ap, integer * ipiv, complex *work, integer *info); /* Subroutine */ int _starpu_csptrs_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu__starpu_csrscl_(integer *n, real *sa, complex *sx, integer *incx); /* Subroutine */ int _starpu_cstedc_(char *compz, integer *n, real *d__, real *e, complex *z__, integer *ldz, complex *work, integer *lwork, real * rwork, integer *lrwork, integer *iwork, integer *liwork, integer * info); /* Subroutine */ int _starpu_cstegr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, integer *isuppz, real *work, integer *lwork, integer *iwork, integer *liwork, integer * info); /* Subroutine */ int _starpu_cstein_(integer *n, real *d__, real *e, integer *m, real *w, integer *iblock, integer *isplit, complex *z__, integer *ldz, real *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_cstemr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, integer *m, real *w, complex *z__, integer *ldz, integer *nzc, integer *isuppz, logical *tryrac, real *work, integer *lwork, integer *iwork, integer * liwork, integer *info); /* Subroutine */ int _starpu_csteqr_(char *compz, integer *n, real *d__, real *e, complex *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_csycon_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, real *anorm, real *rcond, complex *work, integer * info); /* Subroutine */ int _starpu_csyequb_(char *uplo, integer *n, complex *a, integer * lda, real *s, real *scond, real *amax, complex *work, integer *info); /* Subroutine */ int _starpu_csymv_(char *uplo, integer *n, complex *alpha, complex * a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, integer *incy); /* Subroutine */ int _starpu_csyr_(char *uplo, integer *n, complex *alpha, complex *x, integer *incx, complex *a, integer *lda); /* Subroutine */ int _starpu_csyrfs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_csyrfsx_(char *uplo, char *equed, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_csysv_(char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_csysvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_csysvxx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, char *equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * nparams, real *params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_csytf2_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_csytrf_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_csytri_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, complex *work, integer *info); /* Subroutine */ int _starpu_csytrs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_ctbcon_(char *norm, char *uplo, char *diag, integer *n, integer *kd, complex *ab, integer *ldab, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctbtrs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ctfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, complex *alpha, complex *a, complex *b, integer *ldb); /* Subroutine */ int _starpu_ctftri_(char *transr, char *uplo, char *diag, integer *n, complex *a, integer *info); /* Subroutine */ int _starpu_ctfttp_(char *transr, char *uplo, integer *n, complex * arf, complex *ap, integer *info); /* Subroutine */ int _starpu_ctfttr_(char *transr, char *uplo, integer *n, complex * arf, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ctgevc_(char *side, char *howmny, logical *select, integer *n, complex *s, integer *lds, complex *p, integer *ldp, complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctgex2_(logical *wantq, logical *wantz, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *q, integer *ldq, complex *z__, integer *ldz, integer *j1, integer *info); /* Subroutine */ int _starpu_ctgexc_(logical *wantq, logical *wantz, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *q, integer *ldq, complex *z__, integer *ldz, integer *ifst, integer * ilst, integer *info); /* Subroutine */ int _starpu_ctgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, complex *q, integer *ldq, complex *z__, integer *ldz, integer *m, real *pl, real *pr, real * dif, complex *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ctgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, complex *a, integer * lda, complex *b, integer *ldb, real *tola, real *tolb, real *alpha, real *beta, complex *u, integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, complex *work, integer *ncycle, integer * info); /* Subroutine */ int _starpu_ctgsna_(char *job, char *howmny, logical *select, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *vl, integer *ldvl, complex *vr, integer *ldvr, real *s, real *dif, integer *mm, integer *m, complex *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_ctgsy2_(char *trans, integer *ijob, integer *m, integer * n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, complex *f, integer *ldf, real *scale, real *rdsum, real *rdscal, integer *info); /* Subroutine */ int _starpu_ctgsyl_(char *trans, integer *ijob, integer *m, integer * n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, complex *f, integer *ldf, real *scale, real *dif, complex *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_ctpcon_(char *norm, char *uplo, char *diag, integer *n, complex *ap, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, complex *ap, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctptri_(char *uplo, char *diag, integer *n, complex *ap, integer *info); /* Subroutine */ int _starpu_ctptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, complex *ap, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ctpttf_(char *transr, char *uplo, integer *n, complex * ap, complex *arf, integer *info); /* Subroutine */ int _starpu_ctpttr_(char *uplo, integer *n, complex *ap, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ctrcon_(char *norm, char *uplo, char *diag, integer *n, complex *a, integer *lda, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctrevc_(char *side, char *howmny, logical *select, integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctrexc_(char *compq, integer *n, complex *t, integer * ldt, complex *q, integer *ldq, integer *ifst, integer *ilst, integer * info); /* Subroutine */ int _starpu_ctrrfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctrsen_(char *job, char *compq, logical *select, integer *n, complex *t, integer *ldt, complex *q, integer *ldq, complex *w, integer *m, real *s, real *sep, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ctrsna_(char *job, char *howmny, logical *select, integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, complex *vr, integer *ldvr, real *s, real *sep, integer *mm, integer * m, complex *work, integer *ldwork, real *rwork, integer *info); /* Subroutine */ int _starpu_ctrsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, integer *ldc, real *scale, integer *info); /* Subroutine */ int _starpu_ctrti2_(char *uplo, char *diag, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ctrtri_(char *uplo, char *diag, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ctrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ctrttf_(char *transr, char *uplo, integer *n, complex *a, integer *lda, complex *arf, integer *info); /* Subroutine */ int _starpu_ctrttp_(char *uplo, integer *n, complex *a, integer *lda, complex *ap, integer *info); /* Subroutine */ int _starpu_ctzrqf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, integer *info); /* Subroutine */ int _starpu_ctzrzf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cung2l_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cung2r_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cungbr_(char *vect, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunghr_(integer *n, integer *ilo, integer *ihi, complex * a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cungl2_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cunglq_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cungql_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cungqr_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cungr2_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cungrq_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cungtr_(char *uplo, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunm2l_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunm2r_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunmbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cunmhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cunml2_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunmlq_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunmql_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunmqr_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunmr2_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunmr3_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunmrq_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunmrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cunmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cupgtr_(char *uplo, integer *n, complex *ap, complex * tau, complex *q, integer *ldq, complex *work, integer *info); /* Subroutine */ int _starpu_cupmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, complex *ap, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_dbdsdc_(char *uplo, char *compq, integer *n, doublereal * d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_ddisna_(char *job, integer *m, integer *n, doublereal * d__, doublereal *sep, integer *info); /* Subroutine */ int _starpu_dgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal * d__, doublereal *e, doublereal *q, integer *ldq, doublereal *pt, integer *ldpt, doublereal *c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgbcon_(char *norm, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbequ_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * info); /* Subroutine */ int _starpu_dgbequb_(integer *m, integer *n, integer *kl, integer * ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * info); /* Subroutine */ int _starpu_dgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbrfsx_(char *trans, char *equed, integer *n, integer * kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer * ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbsvxx_(char *fact, char *trans, integer *n, integer * kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbtf2_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * ldv, integer *info); /* Subroutine */ int _starpu_dgebal_(char *job, integer *n, doublereal *a, integer * lda, integer *ilo, integer *ihi, doublereal *scale, integer *info); /* Subroutine */ int _starpu_dgebd2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * taup, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgebrd_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * taup, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgecon_(char *norm, integer *n, doublereal *a, integer * lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dgeequ_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dgeequb_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dgees_(char *jobvs, char *sort, L_fp select, integer *n, doublereal *a, integer *lda, integer *sdim, doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work, integer *lwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_dgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, doublereal *a, integer *lda, integer *sdim, doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, doublereal *rconde, doublereal *rcondv, doublereal *work, integer * lwork, integer *iwork, integer *liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublereal *a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgegs_(char *jobvsl, char *jobvsr, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgegv_(char *jobvl, char *jobvr, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgehd2_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgehrd_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgejsv_(char *joba, char *jobu, char *jobv, char *jobr, char *jobt, char *jobp, integer *m, integer *n, doublereal *a, integer *lda, doublereal *sva, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgelq2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgelqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgels_(char *trans, integer *m, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgelsd_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgelss_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgelsx_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * info); /* Subroutine */ int _starpu_dgelsy_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_dgeql2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgeqlf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgeqp3_(integer *m, integer *n, doublereal *a, integer * lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgeqpf_(integer *m, integer *n, doublereal *a, integer * lda, integer *jpvt, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgeqr2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgerfs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgerfsx_(char *trans, char *equed, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgerq2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgerqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgesc2_(integer *n, doublereal *a, integer *lda, doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale); /* Subroutine */ int _starpu_dgesdd_(char *jobz, integer *m, integer *n, doublereal * a, integer *lda, doublereal *s, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgesvd_(char *jobu, char *jobvt, integer *m, integer *n, doublereal *a, integer *lda, doublereal *s, doublereal *u, integer * ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgesvj_(char *joba, char *jobu, char *jobv, integer *m, integer *n, doublereal *a, integer *lda, doublereal *sva, integer *mv, doublereal *v, integer *ldv, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgesvx_(char *fact, char *trans, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dgesvxx_(char *fact, char *trans, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgetc2_(integer *n, doublereal *a, integer *lda, integer *ipiv, integer *jpiv, integer *info); /* Subroutine */ int _starpu_dgetf2_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgetri_(integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgetrs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_dggbak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, doublereal *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_dggbal_(char *job, integer *n, doublereal *a, integer * lda, doublereal *b, integer *ldb, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *work, integer * info); /* Subroutine */ int _starpu_dgges_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *sdim, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *work, integer *lwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_dggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *sdim, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *rconde, doublereal * rcondv, doublereal *work, integer *lwork, integer *iwork, integer * liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_dggev_(char *jobvl, char *jobvr, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * rcondv, doublereal *work, integer *lwork, integer *iwork, logical * bwork, integer *info); /* Subroutine */ int _starpu_dggglm_(integer *n, integer *m, integer *p, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *d__, doublereal *x, doublereal *y, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *q, integer *ldq, doublereal *z__, integer * ldz, integer *info); /* Subroutine */ int _starpu_dgglse_(integer *m, integer *n, integer *p, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, doublereal *d__, doublereal *x, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dggqrf_(integer *n, integer *m, integer *p, doublereal * a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, doublereal *taub, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dggrqf_(integer *m, integer *p, integer *n, doublereal * a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, doublereal *taub, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer *ldq, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *tola, doublereal *tolb, integer *k, integer *l, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer *ldq, integer *iwork, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgsvj0_(char *jobv, integer *m, integer *n, doublereal * a, integer *lda, doublereal *d__, doublereal *sva, integer *mv, doublereal *v, integer *ldv, doublereal *eps, doublereal *sfmin, doublereal *tol, integer *nsweep, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgsvj1_(char *jobv, integer *m, integer *n, integer *n1, doublereal *a, integer *lda, doublereal *d__, doublereal *sva, integer *mv, doublereal *v, integer *ldv, doublereal *eps, doublereal *sfmin, doublereal *tol, integer *nsweep, doublereal *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_dgtcon_(char *norm, integer *n, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dgtrfs_(char *trans, integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_dgtsv_(integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal * dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dgttrf_(integer *n, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgttrs_(char *trans, integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgtts2_(integer *itrans, integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dhgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *t, integer *ldt, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dhsein_(char *side, char *eigsrc, char *initv, logical * select, integer *n, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, doublereal *work, integer * ifaill, integer *ifailr, integer *info); /* Subroutine */ int _starpu_dhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info); logical _starpu_disnan_(doublereal *din); /* Subroutine */ int _starpu_dla_gbamv__(integer *trans, integer *m, integer *n, integer *kl, integer *ku, doublereal *alpha, doublereal *ab, integer * ldab, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_dla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen trans_len); /* Subroutine */ int _starpu_dla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal * y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info); doublereal _starpu_dla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * ncols, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb); /* Subroutine */ int _starpu_dla_geamv__(integer *trans, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_dla_gercond__(char *trans, integer *n, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen trans_len); /* Subroutine */ int _starpu_dla_gerfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer * ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal * dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info); /* Subroutine */ int _starpu_dla_lin_berr__(integer *n, integer *nz, integer *nrhs, doublereal *res, doublereal *ayb, doublereal *berr); doublereal _starpu_dla_porcond__(char *uplo, integer *n, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen uplo_len); /* Subroutine */ int _starpu_dla_porfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * af, integer *ldaf, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal * y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_dla_porpvgrw__(char *uplo, integer *ncols, doublereal *a, integer * lda, doublereal *af, integer *ldaf, doublereal *work, ftnlen uplo_len); doublereal _starpu_dla_rpvgrw__(integer *n, integer *ncols, doublereal *a, integer * lda, doublereal *af, integer *ldaf); /* Subroutine */ int _starpu_dla_syamv__(integer *uplo, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_dla_syrcond__(char *uplo, integer *n, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen uplo_len); /* Subroutine */ int _starpu_dla_syrfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal * berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal * errs_c__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_dla_syrpvgrw__(char *uplo, integer *n, integer *info, doublereal * a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *work, ftnlen uplo_len); /* Subroutine */ int _starpu_dla_wwaddw__(integer *n, doublereal *x, doublereal *y, doublereal *w); /* Subroutine */ int _starpu_dlabad_(doublereal *small, doublereal *large); /* Subroutine */ int _starpu_dlabrd_(integer *m, integer *n, integer *nb, doublereal * a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer *ldy); /* Subroutine */ int _starpu_dlacn2_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase, integer *isave); /* Subroutine */ int _starpu_dlacon_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase); /* Subroutine */ int _starpu_dlacpy_(char *uplo, integer *m, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dladiv_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p, doublereal *q); /* Subroutine */ int _starpu_dlae2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2); /* Subroutine */ int _starpu_dlaebz_(integer *ijob, integer *nitmax, integer *n, integer *mmax, integer *minp, integer *nbmin, doublereal *abstol, doublereal *reltol, doublereal *pivmin, doublereal *d__, doublereal * e, doublereal *e2, integer *nval, doublereal *ab, doublereal *c__, integer *mout, integer *nab, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlaed0_(integer *icompq, integer *qsiz, integer *n, doublereal *d__, doublereal *e, doublereal *q, integer *ldq, doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlaed1_(integer *n, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlaed2_(integer *k, integer *n, integer *n1, doublereal * d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp, integer *info); /* Subroutine */ int _starpu_dlaed3_(integer *k, integer *n, integer *n1, doublereal * d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, doublereal *q2, integer *indx, integer *ctot, doublereal *w, doublereal *s, integer *info); /* Subroutine */ int _starpu_dlaed4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, integer *info); /* Subroutine */ int _starpu_dlaed5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam); /* Subroutine */ int _starpu_dlaed6_(integer *kniter, logical *orgati, doublereal * rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * tau, integer *info); /* Subroutine */ int _starpu_dlaed7_(integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlaed8_(integer *icompq, integer *k, integer *n, integer *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer *indx, integer *info); /* Subroutine */ int _starpu_dlaed9_(integer *k, integer *kstart, integer *kstop, integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, integer *info); /* Subroutine */ int _starpu_dlaeda_(integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, doublereal *z__, doublereal *ztemp, integer *info); /* Subroutine */ int _starpu_dlaein_(logical *rightv, logical *noinit, integer *n, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *vr, doublereal *vi, doublereal *b, integer *ldb, doublereal *work, doublereal *eps3, doublereal *smlnum, doublereal * bignum, integer *info); /* Subroutine */ int _starpu_dlaev2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1); /* Subroutine */ int _starpu_dlaexc_(logical *wantq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1, integer *n2, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlag2_(doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *safmin, doublereal *scale1, doublereal * scale2, doublereal *wr1, doublereal *wr2, doublereal *wi); /* Subroutine */ int _starpu_dlag2s_(integer *m, integer *n, doublereal *a, integer * lda, real *sa, integer *ldsa, integer *info); /* Subroutine */ int _starpu_dlags2_(logical *upper, doublereal *a1, doublereal *a2, doublereal *a3, doublereal *b1, doublereal *b2, doublereal *b3, doublereal *csu, doublereal *snu, doublereal *csv, doublereal *snv, doublereal *csq, doublereal *snq); /* Subroutine */ int _starpu_dlagtf_(integer *n, doublereal *a, doublereal *lambda, doublereal *b, doublereal *c__, doublereal *tol, doublereal *d__, integer *in, integer *info); /* Subroutine */ int _starpu_dlagtm_(char *trans, integer *n, integer *nrhs, doublereal *alpha, doublereal *dl, doublereal *d__, doublereal *du, doublereal *x, integer *ldx, doublereal *beta, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dlagts_(integer *job, integer *n, doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, integer *in, doublereal *y, doublereal *tol, integer *info); /* Subroutine */ int _starpu_dlagv2_(doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *csl, doublereal *snl, doublereal *csr, doublereal * snr); /* Subroutine */ int _starpu_dlahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_dlahr2_(integer *n, integer *k, integer *nb, doublereal * a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, doublereal *y, integer *ldy); /* Subroutine */ int _starpu_dlahrd_(integer *n, integer *k, integer *nb, doublereal * a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, doublereal *y, integer *ldy); /* Subroutine */ int _starpu_dlaic1_(integer *job, integer *j, doublereal *x, doublereal *sest, doublereal *w, doublereal *gamma, doublereal * sestpr, doublereal *s, doublereal *c__); logical _starpu_dlaisnan_(doublereal *din1, doublereal *din2); /* Subroutine */ int _starpu_dlaln2_(logical *ltrans, integer *na, integer *nw, doublereal *smin, doublereal *ca, doublereal *a, integer *lda, doublereal *d1, doublereal *d2, doublereal *b, integer *ldb, doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, doublereal *scale, doublereal *xnorm, integer *info); /* Subroutine */ int _starpu_dlals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * k, doublereal *c__, doublereal *s, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * poles, integer *givptr, integer *givcol, integer *ldgcol, integer * perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, doublereal *rcond, integer *rank, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlamrg_(integer *n1, integer *n2, doublereal *a, integer *dtrd1, integer *dtrd2, integer *index); integer _starpu_dlaneg_(integer *n, doublereal *d__, doublereal *lld, doublereal * sigma, doublereal *pivmin, integer *r__); doublereal _starpu_dlangb_(char *norm, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *work); doublereal _starpu_dlange_(char *norm, integer *m, integer *n, doublereal *a, integer *lda, doublereal *work); doublereal _starpu_dlangt_(char *norm, integer *n, doublereal *dl, doublereal *d__, doublereal *du); doublereal _starpu_dlanhs_(char *norm, integer *n, doublereal *a, integer *lda, doublereal *work); doublereal _starpu_dlansb_(char *norm, char *uplo, integer *n, integer *k, doublereal *ab, integer *ldab, doublereal *work); doublereal _starpu_dlansf_(char *norm, char *transr, char *uplo, integer *n, doublereal *a, doublereal *work); doublereal _starpu_dlansp_(char *norm, char *uplo, integer *n, doublereal *ap, doublereal *work); doublereal _starpu_dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e); doublereal _starpu_dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *work); doublereal _starpu_dlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, doublereal *ab, integer *ldab, doublereal *work); doublereal _starpu_dlantp_(char *norm, char *uplo, char *diag, integer *n, doublereal *ap, doublereal *work); doublereal _starpu_dlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, doublereal *a, integer *lda, doublereal *work); /* Subroutine */ int _starpu_dlanv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r, doublereal *rt2i, doublereal *cs, doublereal *sn); /* Subroutine */ int _starpu_dlapll_(integer *n, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *ssmin); /* Subroutine */ int _starpu_dlapmt_(logical *forwrd, integer *m, integer *n, doublereal *x, integer *ldx, integer *k); doublereal _starpu_dlapy2_(doublereal *x, doublereal *y); doublereal _starpu_dlapy3_(doublereal *x, doublereal *y, doublereal *z__); /* Subroutine */ int _starpu_dlaqgb_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqge_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqp2_(integer *m, integer *n, integer *offset, doublereal *a, integer *lda, integer *jpvt, doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *work); /* Subroutine */ int _starpu_dlaqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, doublereal *a, integer *lda, integer *jpvt, doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *auxv, doublereal *f, integer *ldf); /* Subroutine */ int _starpu_dlaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dlaqr1_(integer *n, doublereal *h__, integer *ldh, doublereal *sr1, doublereal *si1, doublereal *sr2, doublereal *si2, doublereal *v); /* Subroutine */ int _starpu_dlaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork); /* Subroutine */ int _starpu_dlaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork); /* Subroutine */ int _starpu_dlaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, integer *kbot, integer *nshfts, doublereal *sr, doublereal *si, doublereal *h__, integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, doublereal *v, integer * ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, integer *ldwv, integer *nh, doublereal *wh, integer *ldwh); /* Subroutine */ int _starpu_dlaqsb_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqsp_(char *uplo, integer *n, doublereal *ap, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqsy_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqtr_(logical *ltran, logical *lreal, integer *n, doublereal *t, integer *ldt, doublereal *b, doublereal *w, doublereal *scale, doublereal *x, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlar1v_(integer *n, integer *b1, integer *bn, doublereal *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * lld, doublereal *pivmin, doublereal *gaptol, doublereal *z__, logical *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, doublereal *rqcorr, doublereal *work); /* Subroutine */ int _starpu_dlar2v_(integer *n, doublereal *x, doublereal *y, doublereal *z__, integer *incx, doublereal *c__, doublereal *s, integer *incc); /* Subroutine */ int _starpu_dlarf_(char *side, integer *m, integer *n, doublereal *v, integer *incv, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work); /* Subroutine */ int _starpu_dlarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, doublereal *v, integer * ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, doublereal *work, integer *ldwork); /* Subroutine */ int _starpu_dlarfg_(integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *tau); /* Subroutine */ int _starpu_dlarfp_(integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *tau); /* Subroutine */ int _starpu_dlarft_(char *direct, char *storev, integer *n, integer * k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, integer *ldt); /* Subroutine */ int _starpu_dlarfx_(char *side, integer *m, integer *n, doublereal * v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work); /* Subroutine */ int _starpu_dlargv_(integer *n, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *c__, integer *incc); /* Subroutine */ int _starpu_dlarnv_(integer *idist, integer *iseed, integer *n, doublereal *x); /* Subroutine */ int _starpu_dlarra_(integer *n, doublereal *d__, doublereal *e, doublereal *e2, doublereal *spltol, doublereal *tnrm, integer *nsplit, integer *isplit, integer *info); /* Subroutine */ int _starpu_dlarrb_(integer *n, doublereal *d__, doublereal *lld, integer *ifirst, integer *ilast, doublereal *rtol1, doublereal *rtol2, integer *offset, doublereal *w, doublereal *wgap, doublereal *werr, doublereal *work, integer *iwork, doublereal *pivmin, doublereal * spdiam, integer *twist, integer *info); /* Subroutine */ int _starpu_dlarrc_(char *jobt, integer *n, doublereal *vl, doublereal *vu, doublereal *d__, doublereal *e, doublereal *pivmin, integer *eigcnt, integer *lcnt, integer *rcnt, integer *info); /* Subroutine */ int _starpu_dlarrd_(char *range, char *order, integer *n, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *gers, doublereal *reltol, doublereal *d__, doublereal *e, doublereal *e2, doublereal *pivmin, integer *nsplit, integer *isplit, integer *m, doublereal *w, doublereal *werr, doublereal *wl, doublereal *wu, integer *iblock, integer *indexw, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlarre_(char *range, integer *n, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *d__, doublereal *e, doublereal *e2, doublereal *rtol1, doublereal *rtol2, doublereal * spltol, integer *nsplit, integer *isplit, integer *m, doublereal *w, doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, doublereal *pivmin, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dlarrf_(integer *n, doublereal *d__, doublereal *l, doublereal *ld, integer *clstrt, integer *clend, doublereal *w, doublereal *wgap, doublereal *werr, doublereal *spdiam, doublereal * clgapl, doublereal *clgapr, doublereal *pivmin, doublereal *sigma, doublereal *dplus, doublereal *lplus, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlarrj_(integer *n, doublereal *d__, doublereal *e2, integer *ifirst, integer *ilast, doublereal *rtol, integer *offset, doublereal *w, doublereal *werr, doublereal *work, integer *iwork, doublereal *pivmin, doublereal *spdiam, integer *info); /* Subroutine */ int _starpu_dlarrk_(integer *n, integer *iw, doublereal *gl, doublereal *gu, doublereal *d__, doublereal *e2, doublereal *pivmin, doublereal *reltol, doublereal *w, doublereal *werr, integer *info); /* Subroutine */ int _starpu_dlarrr_(integer *n, doublereal *d__, doublereal *e, integer *info); /* Subroutine */ int _starpu_dlarrv_(integer *n, doublereal *vl, doublereal *vu, doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, integer *m, integer *dol, integer *dou, doublereal *minrgp, doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlarscl2_(integer *m, integer *n, doublereal *d__, doublereal *x, integer *ldx); /* Subroutine */ int _starpu_dlartg_(doublereal *f, doublereal *g, doublereal *cs, doublereal *sn, doublereal *r__); /* Subroutine */ int _starpu_dlartv_(integer *n, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *c__, doublereal *s, integer *incc); /* Subroutine */ int _starpu_dlaruv_(integer *iseed, integer *n, doublereal *x); /* Subroutine */ int _starpu_dlarz_(char *side, integer *m, integer *n, integer *l, doublereal *v, integer *incv, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work); /* Subroutine */ int _starpu_dlarzb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, doublereal *v, integer *ldv, doublereal *t, integer *ldt, doublereal *c__, integer * ldc, doublereal *work, integer *ldwork); /* Subroutine */ int _starpu_dlarzt_(char *direct, char *storev, integer *n, integer * k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, integer *ldt); /* Subroutine */ int _starpu_dlas2_(doublereal *f, doublereal *g, doublereal *h__, doublereal *ssmin, doublereal *ssmax); /* Subroutine */ int _starpu_dlascl_(char *type__, integer *kl, integer *ku, doublereal *cfrom, doublereal *cto, integer *m, integer *n, doublereal *a, integer *lda, integer *info); /* Subroutine */ int _starpu_dlascl2_(integer *m, integer *n, doublereal *d__, doublereal *x, integer *ldx); /* Subroutine */ int _starpu_dlasd0_(integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer * ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer * info); /* Subroutine */ int _starpu_dlasd1_(integer *nl, integer *nr, integer *sqre, doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer * iwork, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlasd2_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal * beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer * idxq, integer *coltyp, integer *info); /* Subroutine */ int _starpu_dlasd3_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, integer *idxc, integer *ctot, doublereal *z__, integer *info); /* Subroutine */ int _starpu_dlasd4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal * sigma, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlasd5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * work); /* Subroutine */ int _starpu_dlasd6_(integer *icompq, integer *nl, integer *nr, integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlasd7_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *z__, doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *c__, doublereal *s, integer *info); /* Subroutine */ int _starpu_dlasd8_(integer *icompq, integer *k, doublereal *d__, doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * work, integer *info); /* Subroutine */ int _starpu_dlasda_(integer *icompq, integer *smlsiz, integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlasdq_(char *uplo, integer *sqre, integer *n, integer * ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlasdt_(integer *n, integer *lvl, integer *nd, integer * inode, integer *ndiml, integer *ndimr, integer *msub); /* Subroutine */ int _starpu_dlaset_(char *uplo, integer *m, integer *n, doublereal * alpha, doublereal *beta, doublereal *a, integer *lda); /* Subroutine */ int _starpu_dlasq1_(integer *n, doublereal *d__, doublereal *e, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlasq2_(integer *n, doublereal *z__, integer *info); /* Subroutine */ int _starpu_dlasq3_(integer *i0, integer *n0, doublereal *z__, integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee, integer *ttype, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *g, doublereal *tau); /* Subroutine */ int _starpu_dlasq4_(integer *i0, integer *n0, doublereal *z__, integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *tau, integer *ttype, doublereal *g); /* Subroutine */ int _starpu_dlasq5_(integer *i0, integer *n0, doublereal *z__, integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2, logical *ieee); /* Subroutine */ int _starpu_dlasq6_(integer *i0, integer *n0, doublereal *z__, integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2); /* Subroutine */ int _starpu_dlasr_(char *side, char *pivot, char *direct, integer *m, integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * lda); /* Subroutine */ int _starpu_dlasrt_(char *id, integer *n, doublereal *d__, integer * info); /* Subroutine */ int _starpu_dlassq_(integer *n, doublereal *x, integer *incx, doublereal *scale, doublereal *sumsq); /* Subroutine */ int _starpu_dlasv2_(doublereal *f, doublereal *g, doublereal *h__, doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * csr, doublereal *snl, doublereal *csl); /* Subroutine */ int _starpu_dlaswp_(integer *n, doublereal *a, integer *lda, integer *k1, integer *k2, integer *ipiv, integer *incx); /* Subroutine */ int _starpu_dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, integer *n1, integer *n2, doublereal *tl, integer *ldtl, doublereal * tr, integer *ldtr, doublereal *b, integer *ldb, doublereal *scale, doublereal *x, integer *ldx, doublereal *xnorm, integer *info); /* Subroutine */ int _starpu_dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublereal *a, integer *lda, integer *ipiv, doublereal *w, integer * ldw, integer *info); /* Subroutine */ int _starpu_dlat2s_(char *uplo, integer *n, doublereal *a, integer * lda, real *sa, integer *ldsa, integer *info); /* Subroutine */ int _starpu_dlatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_dlatdf_(integer *ijob, integer *n, doublereal *z__, integer *ldz, doublereal *rhs, doublereal *rdsum, doublereal *rdscal, integer *ipiv, integer *jpiv); /* Subroutine */ int _starpu_dlatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublereal *ap, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_dlatrd_(char *uplo, integer *n, integer *nb, doublereal * a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, integer *ldw); /* Subroutine */ int _starpu_dlatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublereal *a, integer *lda, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_dlatrz_(integer *m, integer *n, integer *l, doublereal * a, integer *lda, doublereal *tau, doublereal *work); /* Subroutine */ int _starpu_dlatzm_(char *side, integer *m, integer *n, doublereal * v, integer *incv, doublereal *tau, doublereal *c1, doublereal *c2, integer *ldc, doublereal *work); /* Subroutine */ int _starpu_dlauu2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dlauum_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dopgtr_(char *uplo, integer *n, doublereal *ap, doublereal *tau, doublereal *q, integer *ldq, doublereal *work, integer *info); /* Subroutine */ int _starpu_dopmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublereal *ap, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorg2l_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorg2r_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorgbr_(char *vect, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorghr_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgl2_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorglq_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgql_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgqr_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgr2_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorgrq_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgtr_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorm2l_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorm2r_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dormbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal * tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorml2_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dormlq_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormql_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormqr_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormr2_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dormr3_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dormrq_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dpbcon_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *anorm, doublereal *rcond, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dpbequ_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dpbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_dpbstf_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_dpbsv_(char *uplo, integer *n, integer *kd, integer * nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dpbsvx_(char *fact, char *uplo, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, char *equed, doublereal *s, doublereal *b, integer * ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dpbtf2_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_dpbtrf_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_dpbtrs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dpftrf_(char *transr, char *uplo, integer *n, doublereal *a, integer *info); /* Subroutine */ int _starpu_dpftri_(char *transr, char *uplo, integer *n, doublereal *a, integer *info); /* Subroutine */ int _starpu_dpftrs_(char *transr, char *uplo, integer *n, integer * nrhs, doublereal *a, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dpocon_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dpoequ_(integer *n, doublereal *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dpoequb_(integer *n, doublereal *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dporfs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_dporfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer * ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dposv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dposvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal * berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dposvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal * berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dpotf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dpotri_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dpotrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dppcon_(char *uplo, integer *n, doublereal *ap, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dppequ_(char *uplo, integer *n, doublereal *ap, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dpprfs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *afp, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dppsv_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dppsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *ap, doublereal *afp, char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dpptrf_(char *uplo, integer *n, doublereal *ap, integer * info); /* Subroutine */ int _starpu_dpptri_(char *uplo, integer *n, doublereal *ap, integer * info); /* Subroutine */ int _starpu_dpptrs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dpstf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info); /* Subroutine */ int _starpu_dpstrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info); /* Subroutine */ int _starpu_dptcon_(integer *n, doublereal *d__, doublereal *e, doublereal *anorm, doublereal *rcond, doublereal *work, integer *info); /* Subroutine */ int _starpu_dpteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dptrfs_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *info); /* Subroutine */ int _starpu_dptsv_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dptsvx_(char *fact, integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * info); /* Subroutine */ int _starpu_dpttrf_(integer *n, doublereal *d__, doublereal *e, integer *info); /* Subroutine */ int _starpu_dpttrs_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dptts2_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_drscl_(integer *n, doublereal *sa, doublereal *sx, integer *incx); /* Subroutine */ int _starpu_dsbev_(char *jobz, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsbevd_(char *jobz, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsbevx_(char *jobz, char *range, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *q, integer * ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsbgst_(char *vect, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * ldbb, doublereal *x, integer *ldx, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsbgv_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal * bb, integer *ldbb, doublereal *q, integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsbtrd_(char *vect, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *d__, doublereal *e, doublereal *q, integer *ldq, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsfrk_(char *transr, char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, doublereal *c__); /* Subroutine */ int _starpu__starpu_dsgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *work, real *swork, integer *iter, integer *info); /* Subroutine */ int _starpu_dspcon_(char *uplo, integer *n, doublereal *ap, integer * ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dspev_(char *jobz, char *uplo, integer *n, doublereal * ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dspevd_(char *jobz, char *uplo, integer *n, doublereal * ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dspevx_(char *jobz, char *range, char *uplo, integer *n, doublereal *ap, doublereal *vl, doublereal *vu, integer *il, integer * iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dspgst_(integer *itype, char *uplo, integer *n, doublereal *ap, doublereal *bp, integer *info); /* Subroutine */ int _starpu_dspgv_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dspgvd_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dspgvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublereal *ap, doublereal *bp, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu__starpu_dsposv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *work, real *swork, integer *iter, integer *info); /* Subroutine */ int _starpu_dsprfs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dspsv_(char *uplo, integer *n, integer *nrhs, doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dspsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsptrd_(char *uplo, integer *n, doublereal *ap, doublereal *d__, doublereal *e, doublereal *tau, integer *info); /* Subroutine */ int _starpu_dsptrf_(char *uplo, integer *n, doublereal *ap, integer * ipiv, integer *info); /* Subroutine */ int _starpu_dsptri_(char *uplo, integer *n, doublereal *ap, integer * ipiv, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsptrs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dstebz_(char *range, char *order, integer *n, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, doublereal *d__, doublereal *e, integer *m, integer *nsplit, doublereal *w, integer *iblock, integer *isplit, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dstedc_(char *compz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dstegr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dstein_(integer *n, doublereal *d__, doublereal *e, integer *m, doublereal *w, integer *iblock, integer *isplit, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dstemr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsterf_(integer *n, doublereal *d__, doublereal *e, integer *info); /* Subroutine */ int _starpu_dstev_(char *jobz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dstevd_(char *jobz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dstevr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dstevx_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsycon_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsyequb_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *s, doublereal *scond, doublereal *amax, doublereal * work, integer *info); /* Subroutine */ int _starpu_dsyev_(char *jobz, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *w, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsyevd_(char *jobz, char *uplo, integer *n, doublereal * a, integer *lda, doublereal *w, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsyevr_(char *jobz, char *range, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsyevx_(char *jobz, char *range, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsygs2_(integer *itype, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dsygst_(integer *itype, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dsygv_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *w, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsygvd_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *w, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsygvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsyrfs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsyrfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsysv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsysvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer * ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsysvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, char *equed, doublereal *s, doublereal *b, integer * ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal * rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal * err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsytd2_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info); /* Subroutine */ int _starpu_dsytf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dsytrd_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsytrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsytri_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsytrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_dtbcon_(char *norm, char *uplo, char *diag, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtbtrs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dtfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dtftri_(char *transr, char *uplo, char *diag, integer *n, doublereal *a, integer *info); /* Subroutine */ int _starpu_dtfttp_(char *transr, char *uplo, integer *n, doublereal *arf, doublereal *ap, integer *info); /* Subroutine */ int _starpu_dtfttr_(char *transr, char *uplo, integer *n, doublereal *arf, doublereal *a, integer *lda, integer *info); /* Subroutine */ int _starpu_dtgevc_(char *side, char *howmny, logical *select, integer *n, doublereal *s, integer *lds, doublereal *p, integer *ldp, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, doublereal *work, integer *info); /* Subroutine */ int _starpu_dtgex2_(logical *wantq, logical *wantz, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * q, integer *ldq, doublereal *z__, integer *ldz, integer *j1, integer * n1, integer *n2, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dtgexc_(logical *wantq, logical *wantz, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * q, integer *ldq, doublereal *z__, integer *ldz, integer *ifst, integer *ilst, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dtgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, doublereal *a, integer *lda, doublereal * b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, integer *m, doublereal *pl, doublereal *pr, doublereal *dif, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dtgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *tola, doublereal *tolb, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer * ldq, doublereal *work, integer *ncycle, integer *info); /* Subroutine */ int _starpu_dtgsna_(char *job, char *howmny, logical *select, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, doublereal * work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtgsy2_(char *trans, integer *ijob, integer *m, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * scale, doublereal *rdsum, doublereal *rdscal, integer *iwork, integer *pq, integer *info); /* Subroutine */ int _starpu_dtgsyl_(char *trans, integer *ijob, integer *m, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * scale, doublereal *dif, doublereal *work, integer *lwork, integer * iwork, integer *info); /* Subroutine */ int _starpu_dtpcon_(char *norm, char *uplo, char *diag, integer *n, doublereal *ap, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtptri_(char *uplo, char *diag, integer *n, doublereal * ap, integer *info); /* Subroutine */ int _starpu_dtptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dtpttf_(char *transr, char *uplo, integer *n, doublereal *ap, doublereal *arf, integer *info); /* Subroutine */ int _starpu_dtpttr_(char *uplo, integer *n, doublereal *ap, doublereal *a, integer *lda, integer *info); /* Subroutine */ int _starpu_dtrcon_(char *norm, char *uplo, char *diag, integer *n, doublereal *a, integer *lda, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtrevc_(char *side, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, doublereal *work, integer *info); /* Subroutine */ int _starpu_dtrexc_(char *compq, integer *n, doublereal *t, integer * ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst, doublereal *work, integer *info); /* Subroutine */ int _starpu_dtrrfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtrsen_(char *job, char *compq, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, doublereal *wr, doublereal *wi, integer *m, doublereal *s, doublereal *sep, doublereal *work, integer *lwork, integer *iwork, integer * liwork, integer *info); /* Subroutine */ int _starpu_dtrsna_(char *job, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *sep, integer *mm, integer *m, doublereal *work, integer *ldwork, integer * iwork, integer *info); /* Subroutine */ int _starpu_dtrsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, doublereal *a, integer *lda, doublereal *b, integer * ldb, doublereal *c__, integer *ldc, doublereal *scale, integer *info); /* Subroutine */ int _starpu_dtrti2_(char *uplo, char *diag, integer *n, doublereal * a, integer *lda, integer *info); /* Subroutine */ int _starpu_dtrtri_(char *uplo, char *diag, integer *n, doublereal * a, integer *lda, integer *info); /* Subroutine */ int _starpu_dtrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_dtrttf_(char *transr, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *arf, integer *info); /* Subroutine */ int _starpu_dtrttp_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *ap, integer *info); /* Subroutine */ int _starpu_dtzrqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, integer *info); /* Subroutine */ int _starpu_dtzrzf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); doublereal _starpu_dzsum1_(integer *n, doublecomplex *cx, integer *incx); integer _starpu_icmax1_(integer *n, complex *cx, integer *incx); integer _starpu_ieeeck_(integer *ispec, real *zero, real *one); integer _starpu_ilaclc_(integer *m, integer *n, complex *a, integer *lda); integer _starpu_ilaclr_(integer *m, integer *n, complex *a, integer *lda); integer _starpu_iladiag_(char *diag); integer _starpu_iladlc_(integer *m, integer *n, doublereal *a, integer *lda); integer _starpu_iladlr_(integer *m, integer *n, doublereal *a, integer *lda); integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, integer *n2, integer *n3, integer *n4); integer _starpu_ilaprec_(char *prec); integer _starpu_ilaslc_(integer *m, integer *n, real *a, integer *lda); integer _starpu_ilaslr_(integer *m, integer *n, real *a, integer *lda); integer _starpu_ilatrans_(char *trans); integer _starpu_ilauplo_(char *uplo); /* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, integer *vers_patch__); integer _starpu_ilazlc_(integer *m, integer *n, doublecomplex *a, integer *lda); integer _starpu_ilazlr_(integer *m, integer *n, doublecomplex *a, integer *lda); integer _starpu_iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer *ilo, integer *ihi, integer *lwork); integer _starpu_izmax1_(integer *n, doublecomplex *cx, integer *incx); logical _starpu_lsamen_(integer *n, char *ca, char *cb); integer _starpu_smaxloc_(real *a, integer *dimm); /* Subroutine */ int _starpu_sbdsdc_(char *uplo, char *compq, integer *n, real *d__, real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q, integer *iq, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real * u, integer *ldu, real *c__, integer *ldc, real *work, integer *info); doublereal _starpu_scsum1_(integer *n, complex *cx, integer *incx); /* Subroutine */ int _starpu_sdisna_(char *job, integer *m, integer *n, real *d__, real *sep, integer *info); /* Subroutine */ int _starpu_sgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, real *ab, integer *ldab, real *d__, real * e, real *q, integer *ldq, real *pt, integer *ldpt, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sgbcon_(char *norm, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, integer *ipiv, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbequ_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real * colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_sgbequb_(integer *m, integer *n, integer *kl, integer * ku, real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_sgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real * ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbrfsx_(char *trans, char *equed, integer *n, integer * kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, integer * n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * nparams, real *params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, real *ab, integer *ldab, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbsvxx_(char *fact, char *trans, integer *n, integer * kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real * rpvgrw, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbtf2_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgbtrf_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, real *ab, integer *ldab, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_sgebal_(char *job, integer *n, real *a, integer *lda, integer *ilo, integer *ihi, real *scale, integer *info); /* Subroutine */ int _starpu_sgebd2_(integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer *info); /* Subroutine */ int _starpu_sgebrd_(integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_sgecon_(char *norm, integer *n, real *a, integer *lda, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgeequ_(integer *m, integer *n, real *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_sgeequb_(integer *m, integer *n, real *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_sgees_(char *jobvs, char *sort, L_fp select, integer *n, real *a, integer *lda, integer *sdim, real *wr, real *wi, real *vs, integer *ldvs, real *work, integer *lwork, logical *bwork, integer * info); /* Subroutine */ int _starpu_sgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, real *a, integer *lda, integer *sdim, real *wr, real *wi, real *vs, integer *ldvs, real *rconde, real *rcondv, real * work, integer *lwork, integer *iwork, integer *liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_sgeev_(char *jobvl, char *jobvr, integer *n, real *a, integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, real *a, integer *lda, real *wr, real *wi, real * vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer * ihi, real *scale, real *abnrm, real *rconde, real *rcondv, real *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgegs_(char *jobvsl, char *jobvsr, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgegv_(char *jobvl, char *jobvr, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgejsv_(char *joba, char *jobu, char *jobv, char *jobr, char *jobt, char *jobp, integer *m, integer *n, real *a, integer *lda, real *sva, real *u, integer *ldu, real *v, integer *ldv, real *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgelq2_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgelqf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgels_(char *trans, integer *m, integer *n, integer * nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgelsd_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * rank, real *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgelss_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * rank, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgelsx_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, integer *rank, real *work, integer *info); /* Subroutine */ int _starpu_sgelsy_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, integer *rank, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgeql2_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgeqlf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgeqp3_(integer *m, integer *n, real *a, integer *lda, integer *jpvt, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgeqpf_(integer *m, integer *n, real *a, integer *lda, integer *jpvt, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgeqr2_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgeqrf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgerfs_(char *trans, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgerfsx_(char *trans, char *equed, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgerq2_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgerqf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgesc2_(integer *n, real *a, integer *lda, real *rhs, integer *ipiv, integer *jpiv, real *scale); /* Subroutine */ int _starpu_sgesdd_(char *jobz, integer *m, integer *n, real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, real *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgesv_(integer *n, integer *nrhs, real *a, integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgesvd_(char *jobu, char *jobvt, integer *m, integer *n, real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgesvj_(char *joba, char *jobu, char *jobv, integer *m, integer *n, real *a, integer *lda, real *sva, integer *mv, real *v, integer *ldv, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgesvx_(char *fact, char *trans, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgesvxx_(char *fact, char *trans, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * nparams, real *params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgetc2_(integer *n, real *a, integer *lda, integer *ipiv, integer *jpiv, integer *info); /* Subroutine */ int _starpu_sgetf2_(integer *m, integer *n, real *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgetrf_(integer *m, integer *n, real *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgetri_(integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgetrs_(char *trans, integer *n, integer *nrhs, real *a, integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sggbak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, real *lscale, real *rscale, integer *m, real *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_sggbal_(char *job, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, real *rscale, real *work, integer *info); /* Subroutine */ int _starpu_sgges_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *work, integer *lwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_sggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *rconde, real *rcondv, real *work, integer *lwork, integer *iwork, integer * liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_sggev_(char *jobvl, char *jobvr, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer *ihi, real *lscale, real *rscale, real *abnrm, real *bbnrm, real *rconde, real *rcondv, real *work, integer *lwork, integer *iwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_sggglm_(integer *n, integer *m, integer *p, real *a, integer *lda, real *b, integer *ldb, real *d__, real *x, real *y, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_sgglse_(integer *m, integer *n, integer *p, real *a, integer *lda, real *b, integer *ldb, real *c__, real *d__, real *x, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sggqrf_(integer *n, integer *m, integer *p, real *a, integer *lda, real *taua, real *b, integer *ldb, real *taub, real * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sggrqf_(integer *m, integer *p, integer *n, real *a, integer *lda, real *taua, real *b, integer *ldb, real *taub, real * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, real *a, integer *lda, real *b, integer *ldb, real *alpha, real *beta, real *u, integer * ldu, real *v, integer *ldv, real *q, integer *ldq, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, real *a, integer *lda, real *b, integer *ldb, real *tola, real *tolb, integer *k, integer *l, real *u, integer *ldu, real *v, integer *ldv, real *q, integer *ldq, integer *iwork, real * tau, real *work, integer *info); /* Subroutine */ int _starpu_sgsvj0_(char *jobv, integer *m, integer *n, real *a, integer *lda, real *d__, real *sva, integer *mv, real *v, integer * ldv, real *eps, real *sfmin, real *tol, integer *nsweep, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgsvj1_(char *jobv, integer *m, integer *n, integer *n1, real *a, integer *lda, real *d__, real *sva, integer *mv, real *v, integer *ldv, real *eps, real *sfmin, real *tol, integer *nsweep, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgtcon_(char *norm, integer *n, real *dl, real *d__, real *du, real *du2, integer *ipiv, real *anorm, real *rcond, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgtrfs_(char *trans, integer *n, integer *nrhs, real *dl, real *d__, real *du, real *dlf, real *df, real *duf, real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real * ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgtsv_(integer *n, integer *nrhs, real *dl, real *d__, real *du, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, real *dl, real *d__, real *du, real *dlf, real *df, real *duf, real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer * ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgttrf_(integer *n, real *dl, real *d__, real *du, real * du2, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgttrs_(char *trans, integer *n, integer *nrhs, real *dl, real *d__, real *du, real *du2, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgtts2_(integer *itrans, integer *n, integer *nrhs, real *dl, real *d__, real *du, real *du2, integer *ipiv, real *b, integer * ldb); /* Subroutine */ int _starpu_shgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *t, integer *ldt, real *alphar, real *alphai, real *beta, real *q, integer *ldq, real *z__, integer *ldz, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_shsein_(char *side, char *eigsrc, char *initv, logical * select, integer *n, real *h__, integer *ldh, real *wr, real *wi, real *vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, real *work, integer *ifaill, integer *ifailr, integer *info); /* Subroutine */ int _starpu_shseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, integer *ldz, real *work, integer *lwork, integer *info); logical _starpu_sisnan_(real *sin__); /* Subroutine */ int _starpu_sla_gbamv__(integer *trans, integer *m, integer *n, integer *kl, integer *ku, real *alpha, real *ab, integer *ldab, real * x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_sla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, integer *cmode, real *c__, integer *info, real *work, integer *iwork, ftnlen trans_len); /* Subroutine */ int _starpu_sla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, logical *colequ, real *c__, real *b, integer *ldb, real *y, integer * ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real * errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical * ignore_cwise__, integer *info); doublereal _starpu_sla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * ncols, real *ab, integer *ldab, real *afb, integer *ldafb); /* Subroutine */ int _starpu_sla_geamv__(integer *trans, integer *m, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_sla_gercond__(char *trans, integer *n, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer *info, real *work, integer *iwork, ftnlen trans_len); /* Subroutine */ int _starpu_sla_gerfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *nrhs, real *a, integer *lda, real * af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, real *b, integer *ldb, real *y, integer *ldy, real *berr_out__, integer * n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info); /* Subroutine */ int _starpu_sla_lin_berr__(integer *n, integer *nz, integer *nrhs, real *res, real *ayb, real *berr); doublereal _starpu_sla_porcond__(char *uplo, integer *n, real *a, integer *lda, real * af, integer *ldaf, integer *cmode, real *c__, integer *info, real * work, integer *iwork, ftnlen uplo_len); /* Subroutine */ int _starpu_sla_porfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer * ldaf, logical *colequ, real *c__, real *b, integer *ldb, real *y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real * rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical * ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_sla_porpvgrw__(char *uplo, integer *ncols, real *a, integer *lda, real *af, integer *ldaf, real *work, ftnlen uplo_len); doublereal _starpu_sla_rpvgrw__(integer *n, integer *ncols, real *a, integer *lda, real *af, integer *ldaf); /* Subroutine */ int _starpu_sla_syamv__(integer *uplo, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_sla_syrcond__(char *uplo, integer *n, real *a, integer *lda, real * af, integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer * info, real *work, integer *iwork, ftnlen uplo_len); /* Subroutine */ int _starpu_sla_syrfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer * ldaf, integer *ipiv, logical *colequ, real *c__, real *b, integer * ldb, real *y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real *dy, real * y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_sla_syrpvgrw__(char *uplo, integer *n, integer *info, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *work, ftnlen uplo_len); /* Subroutine */ int _starpu_sla_wwaddw__(integer *n, real *x, real *y, real *w); /* Subroutine */ int _starpu_slabad_(real *small, real *large); /* Subroutine */ int _starpu_slabrd_(integer *m, integer *n, integer *nb, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, integer *ldx, real *y, integer *ldy); /* Subroutine */ int _starpu_slacn2_(integer *n, real *v, real *x, integer *isgn, real *est, integer *kase, integer *isave); /* Subroutine */ int _starpu_slacon_(integer *n, real *v, real *x, integer *isgn, real *est, integer *kase); /* Subroutine */ int _starpu_slacpy_(char *uplo, integer *m, integer *n, real *a, integer *lda, real *b, integer *ldb); /* Subroutine */ int _starpu_sladiv_(real *a, real *b, real *c__, real *d__, real *p, real *q); /* Subroutine */ int _starpu_slae2_(real *a, real *b, real *c__, real *rt1, real *rt2); /* Subroutine */ int _starpu_slaebz_(integer *ijob, integer *nitmax, integer *n, integer *mmax, integer *minp, integer *nbmin, real *abstol, real * reltol, real *pivmin, real *d__, real *e, real *e2, integer *nval, real *ab, real *c__, integer *mout, integer *nab, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slaed0_(integer *icompq, integer *qsiz, integer *n, real *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slaed1_(integer *n, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_slaed2_(integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, integer *indxq, real *rho, real *z__, real * dlamda, real *w, real *q2, integer *indx, integer *indxc, integer * indxp, integer *coltyp, integer *info); /* Subroutine */ int _starpu_slaed3_(integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer * indx, integer *ctot, real *w, real *s, integer *info); /* Subroutine */ int _starpu_slaed4_(integer *n, integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam, integer *info); /* Subroutine */ int _starpu_slaed5_(integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam); /* Subroutine */ int _starpu_slaed6_(integer *kniter, logical *orgati, real *rho, real *d__, real *z__, real *finit, real *tau, integer *info); /* Subroutine */ int _starpu_slaed7_(integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real * qstore, integer *qptr, integer *prmptr, integer *perm, integer * givptr, integer *givcol, real *givnum, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slaed8_(integer *icompq, integer *k, integer *n, integer *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, real *w, integer *perm, integer *givptr, integer *givcol, real * givnum, integer *indxp, integer *indx, integer *info); /* Subroutine */ int _starpu_slaed9_(integer *k, integer *kstart, integer *kstop, integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *w, real *s, integer *lds, integer *info); /* Subroutine */ int _starpu_slaeda_(integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *q, integer *qptr, real *z__, real *ztemp, integer *info); /* Subroutine */ int _starpu_slaein_(logical *rightv, logical *noinit, integer *n, real *h__, integer *ldh, real *wr, real *wi, real *vr, real *vi, real *b, integer *ldb, real *work, real *eps3, real *smlnum, real *bignum, integer *info); /* Subroutine */ int _starpu_slaev2_(real *a, real *b, real *c__, real *rt1, real * rt2, real *cs1, real *sn1); /* Subroutine */ int _starpu_slaexc_(logical *wantq, integer *n, real *t, integer * ldt, real *q, integer *ldq, integer *j1, integer *n1, integer *n2, real *work, integer *info); /* Subroutine */ int _starpu_slag2_(real *a, integer *lda, real *b, integer *ldb, real *safmin, real *scale1, real *scale2, real *wr1, real *wr2, real * wi); /* Subroutine */ int _starpu_slag2d_(integer *m, integer *n, real *sa, integer *ldsa, doublereal *a, integer *lda, integer *info); /* Subroutine */ int _starpu_slags2_(logical *upper, real *a1, real *a2, real *a3, real *b1, real *b2, real *b3, real *csu, real *snu, real *csv, real * snv, real *csq, real *snq); /* Subroutine */ int _starpu_slagtf_(integer *n, real *a, real *lambda, real *b, real *c__, real *tol, real *d__, integer *in, integer *info); /* Subroutine */ int _starpu_slagtm_(char *trans, integer *n, integer *nrhs, real * alpha, real *dl, real *d__, real *du, real *x, integer *ldx, real * beta, real *b, integer *ldb); /* Subroutine */ int _starpu_slagts_(integer *job, integer *n, real *a, real *b, real *c__, real *d__, integer *in, real *y, real *tol, integer *info); /* Subroutine */ int _starpu_slagv2_(real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *csl, real *snl, real * csr, real *snr); /* Subroutine */ int _starpu_slahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer * info); /* Subroutine */ int _starpu_slahr2_(integer *n, integer *k, integer *nb, real *a, integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy); /* Subroutine */ int _starpu_slahrd_(integer *n, integer *k, integer *nb, real *a, integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy); /* Subroutine */ int _starpu_slaic1_(integer *job, integer *j, real *x, real *sest, real *w, real *gamma, real *sestpr, real *s, real *c__); logical _starpu_slaisnan_(real *sin1, real *sin2); /* Subroutine */ int _starpu_slaln2_(logical *ltrans, integer *na, integer *nw, real * smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b, integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale, real *xnorm, integer *info); /* Subroutine */ int _starpu_slals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * work, integer *info); /* Subroutine */ int _starpu_slalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, real * u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real * z__, real *poles, integer *givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum, real *c__, real *s, real *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_slalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, real *d__, real *e, real *b, integer *ldb, real *rcond, integer *rank, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slamrg_(integer *n1, integer *n2, real *a, integer * strd1, integer *strd2, integer *index); integer _starpu_slaneg_(integer *n, real *d__, real *lld, real *sigma, real *pivmin, integer *r__); doublereal _starpu_slangb_(char *norm, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, real *work); doublereal _starpu_slange_(char *norm, integer *m, integer *n, real *a, integer *lda, real *work); doublereal _starpu_slangt_(char *norm, integer *n, real *dl, real *d__, real *du); doublereal _starpu_slanhs_(char *norm, integer *n, real *a, integer *lda, real *work); doublereal _starpu_slansb_(char *norm, char *uplo, integer *n, integer *k, real *ab, integer *ldab, real *work); doublereal _starpu_slansf_(char *norm, char *transr, char *uplo, integer *n, real *a, real *work); doublereal _starpu_slansp_(char *norm, char *uplo, integer *n, real *ap, real *work); doublereal _starpu_slanst_(char *norm, integer *n, real *d__, real *e); doublereal _starpu_slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda, real *work); doublereal _starpu_slantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, real *ab, integer *ldab, real *work); doublereal _starpu_slantp_(char *norm, char *uplo, char *diag, integer *n, real *ap, real *work); doublereal _starpu_slantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, real *a, integer *lda, real *work); /* Subroutine */ int _starpu_slanv2_(real *a, real *b, real *c__, real *d__, real * rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn); /* Subroutine */ int _starpu_slapll_(integer *n, real *x, integer *incx, real *y, integer *incy, real *ssmin); /* Subroutine */ int _starpu_slapmt_(logical *forwrd, integer *m, integer *n, real *x, integer *ldx, integer *k); doublereal _starpu_slapy2_(real *x, real *y); doublereal _starpu_slapy3_(real *x, real *y, real *z__); /* Subroutine */ int _starpu_slaqgb_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real * colcnd, real *amax, char *equed); /* Subroutine */ int _starpu_slaqge_(integer *m, integer *n, real *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char * equed); /* Subroutine */ int _starpu_slaqp2_(integer *m, integer *n, integer *offset, real *a, integer *lda, integer *jpvt, real *tau, real *vn1, real *vn2, real * work); /* Subroutine */ int _starpu_slaqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, real *a, integer *lda, integer *jpvt, real *tau, real *vn1, real *vn2, real *auxv, real *f, integer *ldf); /* Subroutine */ int _starpu_slaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_slaqr1_(integer *n, real *h__, integer *ldh, real *sr1, real *si1, real *sr2, real *si2, real *v); /* Subroutine */ int _starpu_slaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * work, integer *lwork); /* Subroutine */ int _starpu_slaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * work, integer *lwork); /* Subroutine */ int _starpu_slaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_slaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, integer *kbot, integer *nshfts, real *sr, real *si, real *h__, integer *ldh, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *v, integer *ldv, real *u, integer *ldu, integer *nv, real *wv, integer *ldwv, integer *nh, real *wh, integer * ldwh); /* Subroutine */ int _starpu_slaqsb_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_slaqsp_(char *uplo, integer *n, real *ap, real *s, real * scond, real *amax, char *equed); /* Subroutine */ int _starpu_slaqsy_(char *uplo, integer *n, real *a, integer *lda, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_slaqtr_(logical *ltran, logical *lreal, integer *n, real *t, integer *ldt, real *b, real *w, real *scale, real *x, real *work, integer *info); /* Subroutine */ int _starpu_slar1v_(integer *n, integer *b1, integer *bn, real * lambda, real *d__, real *l, real *ld, real *lld, real *pivmin, real * gaptol, real *z__, logical *wantnc, integer *negcnt, real *ztz, real * mingma, integer *r__, integer *isuppz, real *nrminv, real *resid, real *rqcorr, real *work); /* Subroutine */ int _starpu_slar2v_(integer *n, real *x, real *y, real *z__, integer *incx, real *c__, real *s, integer *incc); /* Subroutine */ int _starpu_slarf_(char *side, integer *m, integer *n, real *v, integer *incv, real *tau, real *c__, integer *ldc, real *work); /* Subroutine */ int _starpu_slarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, real *v, integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real *work, integer * ldwork); /* Subroutine */ int _starpu_slarfg_(integer *n, real *alpha, real *x, integer *incx, real *tau); /* Subroutine */ int _starpu_slarfp_(integer *n, real *alpha, real *x, integer *incx, real *tau); /* Subroutine */ int _starpu_slarft_(char *direct, char *storev, integer *n, integer * k, real *v, integer *ldv, real *tau, real *t, integer *ldt); /* Subroutine */ int _starpu_slarfx_(char *side, integer *m, integer *n, real *v, real *tau, real *c__, integer *ldc, real *work); /* Subroutine */ int _starpu_slargv_(integer *n, real *x, integer *incx, real *y, integer *incy, real *c__, integer *incc); /* Subroutine */ int _starpu_slarnv_(integer *idist, integer *iseed, integer *n, real *x); /* Subroutine */ int _starpu_slarra_(integer *n, real *d__, real *e, real *e2, real * spltol, real *tnrm, integer *nsplit, integer *isplit, integer *info); /* Subroutine */ int _starpu_slarrb_(integer *n, real *d__, real *lld, integer * ifirst, integer *ilast, real *rtol1, real *rtol2, integer *offset, real *w, real *wgap, real *werr, real *work, integer *iwork, real * pivmin, real *spdiam, integer *twist, integer *info); /* Subroutine */ int _starpu_slarrc_(char *jobt, integer *n, real *vl, real *vu, real *d__, real *e, real *pivmin, integer *eigcnt, integer *lcnt, integer * rcnt, integer *info); /* Subroutine */ int _starpu_slarrd_(char *range, char *order, integer *n, real *vl, real *vu, integer *il, integer *iu, real *gers, real *reltol, real * d__, real *e, real *e2, real *pivmin, integer *nsplit, integer * isplit, integer *m, real *w, real *werr, real *wl, real *wu, integer * iblock, integer *indexw, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slarre_(char *range, integer *n, real *vl, real *vu, integer *il, integer *iu, real *d__, real *e, real *e2, real *rtol1, real *rtol2, real *spltol, integer *nsplit, integer *isplit, integer * m, real *w, real *werr, real *wgap, integer *iblock, integer *indexw, real *gers, real *pivmin, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slarrf_(integer *n, real *d__, real *l, real *ld, integer *clstrt, integer *clend, real *w, real *wgap, real *werr, real *spdiam, real *clgapl, real *clgapr, real *pivmin, real *sigma, real *dplus, real *lplus, real *work, integer *info); /* Subroutine */ int _starpu_slarrj_(integer *n, real *d__, real *e2, integer *ifirst, integer *ilast, real *rtol, integer *offset, real *w, real *werr, real *work, integer *iwork, real *pivmin, real *spdiam, integer *info); /* Subroutine */ int _starpu_slarrk_(integer *n, integer *iw, real *gl, real *gu, real *d__, real *e2, real *pivmin, real *reltol, real *w, real *werr, integer *info); /* Subroutine */ int _starpu_slarrr_(integer *n, real *d__, real *e, integer *info); /* Subroutine */ int _starpu_slarrv_(integer *n, real *vl, real *vu, real *d__, real * l, real *pivmin, integer *isplit, integer *m, integer *dol, integer * dou, real *minrgp, real *rtol1, real *rtol2, real *w, real *werr, real *wgap, integer *iblock, integer *indexw, real *gers, real *z__, integer *ldz, integer *isuppz, real *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_slarscl2_(integer *m, integer *n, real *d__, real *x, integer *ldx); /* Subroutine */ int _starpu_slartg_(real *f, real *g, real *cs, real *sn, real *r__); /* Subroutine */ int _starpu_slartv_(integer *n, real *x, integer *incx, real *y, integer *incy, real *c__, real *s, integer *incc); /* Subroutine */ int _starpu_slaruv_(integer *iseed, integer *n, real *x); /* Subroutine */ int _starpu_slarz_(char *side, integer *m, integer *n, integer *l, real *v, integer *incv, real *tau, real *c__, integer *ldc, real * work); /* Subroutine */ int _starpu_slarzb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, real *v, integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real * work, integer *ldwork); /* Subroutine */ int _starpu_slarzt_(char *direct, char *storev, integer *n, integer * k, real *v, integer *ldv, real *tau, real *t, integer *ldt); /* Subroutine */ int _starpu_slas2_(real *f, real *g, real *h__, real *ssmin, real * ssmax); /* Subroutine */ int _starpu_slascl_(char *type__, integer *kl, integer *ku, real * cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_slascl2_(integer *m, integer *n, real *d__, real *x, integer *ldx); /* Subroutine */ int _starpu_slasd0_(integer *n, integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, integer *iwork, real *work, integer *info); /* Subroutine */ int _starpu_slasd1_(integer *nl, integer *nr, integer *sqre, real * d__, real *alpha, real *beta, real *u, integer *ldu, real *vt, integer *ldvt, integer *idxq, integer *iwork, real *work, integer * info); /* Subroutine */ int _starpu_slasd2_(integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *idxq, integer *coltyp, integer *info); /* Subroutine */ int _starpu_slasd3_(integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer * ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer * info); /* Subroutine */ int _starpu_slasd4_(integer *n, integer *i__, real *d__, real *z__, real *delta, real *rho, real *sigma, real *work, integer *info); /* Subroutine */ int _starpu_slasd5_(integer *i__, real *d__, real *z__, real *delta, real *rho, real *dsigma, real *work); /* Subroutine */ int _starpu_slasd6_(integer *icompq, integer *nl, integer *nr, integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta, integer *idxq, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slasd7_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf, real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, integer * givptr, integer *givcol, integer *ldgcol, real *givnum, integer * ldgnum, real *c__, real *s, integer *info); /* Subroutine */ int _starpu_slasd8_(integer *icompq, integer *k, real *d__, real * z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, real *dsigma, real *work, integer *info); /* Subroutine */ int _starpu_slasda_(integer *icompq, integer *smlsiz, integer *n, integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *z__, real *poles, integer * givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum, real *c__, real *s, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slasdq_(char *uplo, integer *sqre, integer *n, integer * ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real * work, integer *info); /* Subroutine */ int _starpu_slasdt_(integer *n, integer *lvl, integer *nd, integer * inode, integer *ndiml, integer *ndimr, integer *msub); /* Subroutine */ int _starpu_slaset_(char *uplo, integer *m, integer *n, real *alpha, real *beta, real *a, integer *lda); /* Subroutine */ int _starpu_slasq1_(integer *n, real *d__, real *e, real *work, integer *info); /* Subroutine */ int _starpu_slasq2_(integer *n, real *z__, integer *info); /* Subroutine */ int _starpu_slasq3_(integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee, integer *ttype, real * dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *g, real * tau); /* Subroutine */ int _starpu_slasq4_(integer *i0, integer *n0, real *z__, integer *pp, integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *tau, integer *ttype, real *g); /* Subroutine */ int _starpu_slasq5_(integer *i0, integer *n0, real *z__, integer *pp, real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real * dnm1, real *dnm2, logical *ieee); /* Subroutine */ int _starpu_slasq6_(integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real * dnm2); /* Subroutine */ int _starpu_slasr_(char *side, char *pivot, char *direct, integer *m, integer *n, real *c__, real *s, real *a, integer *lda); /* Subroutine */ int _starpu_slasrt_(char *id, integer *n, real *d__, integer *info); /* Subroutine */ int _starpu_slassq_(integer *n, real *x, integer *incx, real *scale, real *sumsq); /* Subroutine */ int _starpu_slasv2_(real *f, real *g, real *h__, real *ssmin, real * ssmax, real *snr, real *csr, real *snl, real *csl); /* Subroutine */ int _starpu_slaswp_(integer *n, real *a, integer *lda, integer *k1, integer *k2, integer *ipiv, integer *incx); /* Subroutine */ int _starpu_slasy2_(logical *ltranl, logical *ltranr, integer *isgn, integer *n1, integer *n2, real *tl, integer *ldtl, real *tr, integer * ldtr, real *b, integer *ldb, real *scale, real *x, integer *ldx, real *xnorm, integer *info); /* Subroutine */ int _starpu_slasyf_(char *uplo, integer *n, integer *nb, integer *kb, real *a, integer *lda, integer *ipiv, real *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_slatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, real *ab, integer *ldab, real *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_slatdf_(integer *ijob, integer *n, real *z__, integer * ldz, real *rhs, real *rdsum, real *rdscal, integer *ipiv, integer * jpiv); /* Subroutine */ int _starpu_slatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, real *ap, real *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_slatrd_(char *uplo, integer *n, integer *nb, real *a, integer *lda, real *e, real *tau, real *w, integer *ldw); /* Subroutine */ int _starpu_slatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, real *a, integer *lda, real *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_slatrz_(integer *m, integer *n, integer *l, real *a, integer *lda, real *tau, real *work); /* Subroutine */ int _starpu_slatzm_(char *side, integer *m, integer *n, real *v, integer *incv, real *tau, real *c1, real *c2, integer *ldc, real * work); /* Subroutine */ int _starpu_slauu2_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_slauum_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_sopgtr_(char *uplo, integer *n, real *ap, real *tau, real *q, integer *ldq, real *work, integer *info); /* Subroutine */ int _starpu_sopmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, real *ap, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sorg2l_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sorg2r_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sorgbr_(char *vect, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorghr_(integer *n, integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgl2_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sorglq_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgql_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgqr_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgr2_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sorgrq_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgtr_(char *uplo, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorm2l_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sorm2r_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sormbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real * c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorml2_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sormlq_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormql_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormqr_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormr2_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sormr3_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sormrq_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormtr_(char *side, char *uplo, char *trans, integer *m, integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_spbcon_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spbequ_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_spbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spbstf_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_spbsv_(char *uplo, integer *n, integer *kd, integer * nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_spbsvx_(char *fact, char *uplo, integer *n, integer *kd, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spbtf2_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_spbtrf_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_spbtrs_(char *uplo, integer *n, integer *kd, integer * nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_spftrf_(char *transr, char *uplo, integer *n, real *a, integer *info); /* Subroutine */ int _starpu_spftri_(char *transr, char *uplo, integer *n, real *a, integer *info); /* Subroutine */ int _starpu_spftrs_(char *transr, char *uplo, integer *n, integer * nrhs, real *a, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_spocon_(char *uplo, integer *n, real *a, integer *lda, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spoequ_(integer *n, real *a, integer *lda, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_spoequb_(integer *n, real *a, integer *lda, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_sporfs_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer *ldaf, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sporfsx_(char *uplo, char *equed, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, real *s, real * b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, real *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_sposv_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sposvx_(char *fact, char *uplo, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sposvxx_(char *fact, char *uplo, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spotf2_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_spotrf_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_spotri_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_spotrs_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sppcon_(char *uplo, integer *n, real *ap, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sppequ_(char *uplo, integer *n, real *ap, real *s, real * scond, real *amax, integer *info); /* Subroutine */ int _starpu_spprfs_(char *uplo, integer *n, integer *nrhs, real *ap, real *afp, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sppsv_(char *uplo, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sppsvx_(char *fact, char *uplo, integer *n, integer * nrhs, real *ap, real *afp, char *equed, real *s, real *b, integer * ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spptrf_(char *uplo, integer *n, real *ap, integer *info); /* Subroutine */ int _starpu_spptri_(char *uplo, integer *n, real *ap, integer *info); /* Subroutine */ int _starpu_spptrs_(char *uplo, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_spstf2_(char *uplo, integer *n, real *a, integer *lda, integer *piv, integer *rank, real *tol, real *work, integer *info); /* Subroutine */ int _starpu_spstrf_(char *uplo, integer *n, real *a, integer *lda, integer *piv, integer *rank, real *tol, real *work, integer *info); /* Subroutine */ int _starpu_sptcon_(integer *n, real *d__, real *e, real *anorm, real *rcond, real *work, integer *info); /* Subroutine */ int _starpu_spteqr_(char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_sptrfs_(integer *n, integer *nrhs, real *d__, real *e, real *df, real *ef, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *info); /* Subroutine */ int _starpu_sptsv_(integer *n, integer *nrhs, real *d__, real *e, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sptsvx_(char *fact, integer *n, integer *nrhs, real *d__, real *e, real *df, real *ef, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *info); /* Subroutine */ int _starpu_spttrf_(integer *n, real *d__, real *e, integer *info); /* Subroutine */ int _starpu_spttrs_(integer *n, integer *nrhs, real *d__, real *e, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sptts2_(integer *n, integer *nrhs, real *d__, real *e, real *b, integer *ldb); /* Subroutine */ int _starpu_srscl_(integer *n, real *sa, real *sx, integer *incx); /* Subroutine */ int _starpu_ssbev_(char *jobz, char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *w, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_ssbevd_(char *jobz, char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssbevx_(char *jobz, char *range, char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *q, integer *ldq, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real * w, real *z__, integer *ldz, real *work, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_ssbgst_(char *vect, char *uplo, integer *n, integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * x, integer *ldx, real *work, integer *info); /* Subroutine */ int _starpu_ssbgv_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * w, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_ssbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * w, real *z__, integer *ldz, real *work, integer *lwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer * ldbb, real *q, integer *ldq, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssbtrd_(char *vect, char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *d__, real *e, real *q, integer *ldq, real *work, integer *info); /* Subroutine */ int _starpu_ssfrk_(char *transr, char *uplo, char *trans, integer *n, integer *k, real *alpha, real *a, integer *lda, real *beta, real * c__); /* Subroutine */ int _starpu_sspcon_(char *uplo, integer *n, real *ap, integer *ipiv, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sspev_(char *jobz, char *uplo, integer *n, real *ap, real *w, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_sspevd_(char *jobz, char *uplo, integer *n, real *ap, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sspevx_(char *jobz, char *range, char *uplo, integer *n, real *ap, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real *work, integer * iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_sspgst_(integer *itype, char *uplo, integer *n, real *ap, real *bp, integer *info); /* Subroutine */ int _starpu_sspgv_(integer *itype, char *jobz, char *uplo, integer * n, real *ap, real *bp, real *w, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_sspgvd_(integer *itype, char *jobz, char *uplo, integer * n, real *ap, real *bp, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sspgvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, real *ap, real *bp, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer * ldz, real *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssprfs_(char *uplo, integer *n, integer *nrhs, real *ap, real *afp, integer *ipiv, real *b, integer *ldb, real *x, integer * ldx, real *ferr, real *berr, real *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_sspsv_(char *uplo, integer *n, integer *nrhs, real *ap, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sspsvx_(char *fact, char *uplo, integer *n, integer * nrhs, real *ap, real *afp, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_ssptrd_(char *uplo, integer *n, real *ap, real *d__, real *e, real *tau, integer *info); /* Subroutine */ int _starpu_ssptrf_(char *uplo, integer *n, real *ap, integer *ipiv, integer *info); /* Subroutine */ int _starpu_ssptri_(char *uplo, integer *n, real *ap, integer *ipiv, real *work, integer *info); /* Subroutine */ int _starpu_ssptrs_(char *uplo, integer *n, integer *nrhs, real *ap, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sstebz_(char *range, char *order, integer *n, real *vl, real *vu, integer *il, integer *iu, real *abstol, real *d__, real *e, integer *m, integer *nsplit, real *w, integer *iblock, integer * isplit, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sstedc_(char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sstegr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, integer *isuppz, real * work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sstein_(integer *n, real *d__, real *e, integer *m, real *w, integer *iblock, integer *isplit, real *z__, integer *ldz, real * work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_sstemr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, integer *m, real *w, real *z__, integer *ldz, integer *nzc, integer *isuppz, logical *tryrac, real *work, integer *lwork, integer *iwork, integer * liwork, integer *info); /* Subroutine */ int _starpu_ssteqr_(char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_ssterf_(integer *n, real *d__, real *e, integer *info); /* Subroutine */ int _starpu_sstev_(char *jobz, integer *n, real *d__, real *e, real * z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_sstevd_(char *jobz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sstevr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, integer *isuppz, real * work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sstevx_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real *work, integer * iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssycon_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_ssyequb_(char *uplo, integer *n, real *a, integer *lda, real *s, real *scond, real *amax, real *work, integer *info); /* Subroutine */ int _starpu_ssyev_(char *jobz, char *uplo, integer *n, real *a, integer *lda, real *w, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ssyevd_(char *jobz, char *uplo, integer *n, real *a, integer *lda, real *w, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssyevr_(char *jobz, char *range, char *uplo, integer *n, real *a, integer *lda, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, integer * isuppz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssyevx_(char *jobz, char *range, char *uplo, integer *n, real *a, integer *lda, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real * work, integer *lwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssygs2_(integer *itype, char *uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ssygst_(integer *itype, char *uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ssygv_(integer *itype, char *jobz, char *uplo, integer * n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ssygvd_(integer *itype, char *jobz, char *uplo, integer * n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssygvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, real * vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssyrfs_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_ssyrfsx_(char *uplo, char *equed, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real * err_bnds_comp__, integer *nparams, real *params, real *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_ssysv_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, integer *ipiv, real *b, integer *ldb, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ssysvx_(char *fact, char *uplo, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *lwork, integer *iwork, integer * info); /* Subroutine */ int _starpu_ssysvxx_(char *fact, char *uplo, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_ssytd2_(char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, integer *info); /* Subroutine */ int _starpu_ssytf2_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_ssytrd_(char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, real *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_ssytrf_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ssytri_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *info); /* Subroutine */ int _starpu_ssytrs_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_stbcon_(char *norm, char *uplo, char *diag, integer *n, integer *kd, real *ab, integer *ldab, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_stbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, real *ab, integer *ldab, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_stbtrs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_stfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, real *alpha, real *a, real *b, integer *ldb); /* Subroutine */ int _starpu_stftri_(char *transr, char *uplo, char *diag, integer *n, real *a, integer *info); /* Subroutine */ int _starpu_stfttp_(char *transr, char *uplo, integer *n, real *arf, real *ap, integer *info); /* Subroutine */ int _starpu_stfttr_(char *transr, char *uplo, integer *n, real *arf, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_stgevc_(char *side, char *howmny, logical *select, integer *n, real *s, integer *lds, real *p, integer *ldp, real *vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, real *work, integer *info); /* Subroutine */ int _starpu_stgex2_(logical *wantq, logical *wantz, integer *n, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_stgexc_(logical *wantq, logical *wantz, integer *n, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * z__, integer *ldz, integer *ifst, integer *ilst, real *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_stgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, real *a, integer *lda, real *b, integer * ldb, real *alphar, real *alphai, real *beta, real *q, integer *ldq, real *z__, integer *ldz, integer *m, real *pl, real *pr, real *dif, real *work, integer *lwork, integer *iwork, integer *liwork, integer * info); /* Subroutine */ int _starpu_stgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, real *a, integer *lda, real *b, integer *ldb, real *tola, real *tolb, real *alpha, real * beta, real *u, integer *ldu, real *v, integer *ldv, real *q, integer * ldq, real *work, integer *ncycle, integer *info); /* Subroutine */ int _starpu_stgsna_(char *job, char *howmny, logical *select, integer *n, real *a, integer *lda, real *b, integer *ldb, real *vl, integer *ldvl, real *vr, integer *ldvr, real *s, real *dif, integer * mm, integer *m, real *work, integer *lwork, integer *iwork, integer * info); /* Subroutine */ int _starpu_stgsy2_(char *trans, integer *ijob, integer *m, integer * n, real *a, integer *lda, real *b, integer *ldb, real *c__, integer * ldc, real *d__, integer *ldd, real *e, integer *lde, real *f, integer *ldf, real *scale, real *rdsum, real *rdscal, integer *iwork, integer *pq, integer *info); /* Subroutine */ int _starpu_stgsyl_(char *trans, integer *ijob, integer *m, integer * n, real *a, integer *lda, real *b, integer *ldb, real *c__, integer * ldc, real *d__, integer *ldd, real *e, integer *lde, real *f, integer *ldf, real *scale, real *dif, real *work, integer *lwork, integer * iwork, integer *info); /* Subroutine */ int _starpu_stpcon_(char *norm, char *uplo, char *diag, integer *n, real *ap, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_stprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_stptri_(char *uplo, char *diag, integer *n, real *ap, integer *info); /* Subroutine */ int _starpu_stptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_stpttf_(char *transr, char *uplo, integer *n, real *ap, real *arf, integer *info); /* Subroutine */ int _starpu_stpttr_(char *uplo, integer *n, real *ap, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_strcon_(char *norm, char *uplo, char *diag, integer *n, real *a, integer *lda, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_strevc_(char *side, char *howmny, logical *select, integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, real *work, integer *info); /* Subroutine */ int _starpu_strexc_(char *compq, integer *n, real *t, integer *ldt, real *q, integer *ldq, integer *ifst, integer *ilst, real *work, integer *info); /* Subroutine */ int _starpu_strrfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_strsen_(char *job, char *compq, logical *select, integer *n, real *t, integer *ldt, real *q, integer *ldq, real *wr, real *wi, integer *m, real *s, real *sep, real *work, integer *lwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_strsna_(char *job, char *howmny, logical *select, integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, integer *ldvr, real *s, real *sep, integer *mm, integer *m, real * work, integer *ldwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_strsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, real *a, integer *lda, real *b, integer *ldb, real * c__, integer *ldc, real *scale, integer *info); /* Subroutine */ int _starpu_strti2_(char *uplo, char *diag, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_strtri_(char *uplo, char *diag, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_strtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_strttf_(char *transr, char *uplo, integer *n, real *a, integer *lda, real *arf, integer *info); /* Subroutine */ int _starpu_strttp_(char *uplo, integer *n, real *a, integer *lda, real *ap, integer *info); /* Subroutine */ int _starpu_stzrqf_(integer *m, integer *n, real *a, integer *lda, real *tau, integer *info); /* Subroutine */ int _starpu_stzrzf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_xerbla_(char *srname, integer *info); /* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * srname_len__, integer *info, ftnlen srname_array_len); /* Subroutine */ int _starpu_zbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, doublereal *d__, doublereal *e, doublecomplex *vt, integer *ldvt, doublecomplex *u, integer *ldu, doublecomplex *c__, integer *ldc, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zcgesv_(integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublecomplex *work, complex *swork, doublereal *rwork, integer *iter, integer *info); /* Subroutine */ int _starpu_zcposv_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublecomplex *work, complex *swork, doublereal *rwork, integer *iter, integer *info); /* Subroutine */ int _starpu_zdrscl_(integer *n, doublereal *sa, doublecomplex *sx, integer *incx); /* Subroutine */ int _starpu_zgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublereal *d__, doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *pt, integer *ldpt, doublecomplex *c__, integer *ldc, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgbcon_(char *norm, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zgbequ_(integer *m, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * info); /* Subroutine */ int _starpu_zgbequb_(integer *m, integer *n, integer *kl, integer * ku, doublecomplex *ab, integer *ldab, doublereal *r__, doublereal * c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex * afb, integer *ldafb, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgbrfsx_(char *trans, char *equed, integer *n, integer * kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, doublecomplex *ab, integer *ldab, integer *ipiv, doublecomplex * b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zgbsvxx_(char *fact, char *trans, integer *n, integer * kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgbtf2_(integer *m, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zgbtrf_(integer *m, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublecomplex *ab, integer *ldab, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *scale, integer *m, doublecomplex *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_zgebal_(char *job, integer *n, doublecomplex *a, integer *lda, integer *ilo, integer *ihi, doublereal *scale, integer *info); /* Subroutine */ int _starpu_zgebd2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, doublecomplex *taup, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgebrd_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, doublecomplex *taup, doublecomplex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_zgecon_(char *norm, integer *n, doublecomplex *a, integer *lda, doublereal *anorm, doublereal *rcond, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeequ_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zgeequb_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zgees_(char *jobvs, char *sort, L_fp select, integer *n, doublecomplex *a, integer *lda, integer *sdim, doublecomplex *w, doublecomplex *vs, integer *ldvs, doublecomplex *work, integer *lwork, doublereal *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, doublecomplex *a, integer *lda, integer *sdim, doublecomplex *w, doublecomplex *vs, integer *ldvs, doublereal * rconde, doublereal *rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zgeev_(char *jobvl, char *jobvr, integer *n, doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublecomplex *work, integer * lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgegs_(char *jobvsl, char *jobvsr, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer *ldvsr, doublecomplex * work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgegv_(char *jobvl, char *jobvr, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgehd2_(integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zgehrd_(integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgelq2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgelqf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgels_(char *trans, integer *m, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgelsd_(integer *m, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zgelss_(integer *m, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgelsx_(integer *m, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *jpvt, doublereal *rcond, integer *rank, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgelsy_(integer *m, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *jpvt, doublereal *rcond, integer *rank, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeql2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgeqlf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgeqp3_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeqpf_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeqr2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgeqrf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgerfs_(char *trans, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgerfsx_(char *trans, char *equed, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublereal *r__, doublereal *c__, doublecomplex * b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgerq2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgerqf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgesc2_(integer *n, doublecomplex *a, integer *lda, doublecomplex *rhs, integer *ipiv, integer *jpiv, doublereal *scale); /* Subroutine */ int _starpu_zgesdd_(char *jobz, integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zgesv_(integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_zgesvd_(char *jobu, char *jobvt, integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgesvx_(char *fact, char *trans, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgesvxx_(char *fact, char *trans, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer * n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgetc2_(integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *jpiv, integer *info); /* Subroutine */ int _starpu_zgetf2_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zgetrf_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zgetri_(integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgetrs_(char *trans, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zggbak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, doublecomplex *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_zggbal_(char *job, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *work, integer * info); /* Subroutine */ int _starpu_zgges_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *sdim, doublecomplex *alpha, doublecomplex * beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer *ldvsr, doublecomplex *work, integer *lwork, doublereal *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *sdim, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer *ldvsr, doublereal *rconde, doublereal * rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, integer *liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zggev_(char *jobvl, char *jobvr, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zggglm_(integer *n, integer *m, integer *p, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *d__, doublecomplex *x, doublecomplex *y, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_zgglse_(integer *m, integer *n, integer *p, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *c__, doublecomplex *d__, doublecomplex *x, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zggqrf_(integer *n, integer *m, integer *p, doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, integer *ldb, doublecomplex *taub, doublecomplex *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_zggrqf_(integer *m, integer *p, integer *n, doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, integer *ldb, doublecomplex *taub, doublecomplex *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_zggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *alpha, doublereal *beta, doublecomplex *u, integer *ldu, doublecomplex *v, integer *ldv, doublecomplex *q, integer *ldq, doublecomplex *work, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *tola, doublereal *tolb, integer *k, integer *l, doublecomplex *u, integer *ldu, doublecomplex *v, integer *ldv, doublecomplex *q, integer *ldq, integer *iwork, doublereal * rwork, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgtcon_(char *norm, integer *n, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer * ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgtrfs_(char *trans, integer *n, integer *nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgtsv_(integer *n, integer *nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zgttrf_(integer *n, doublecomplex *dl, doublecomplex * d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, integer * info); /* Subroutine */ int _starpu_zgttrs_(char *trans, integer *n, integer *nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zgtts2_(integer *itrans, integer *n, integer *nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zhbev_(char *jobz, char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhbevd_(char *jobz, char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zhbevx_(char *jobz, char *range, char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublecomplex *q, integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer * iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_zhbgst_(char *vect, char *uplo, integer *n, integer *ka, integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, integer *ldbb, doublecomplex *x, integer *ldx, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhbgv_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer * lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zhbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, integer *ldbb, doublecomplex *q, integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal * abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_zhbtrd_(char *vect, char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *d__, doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zhecon_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zheequb_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zheev_(char *jobz, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zheevd_(char *jobz, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zheevr_(char *jobz, char *range, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * w, doublecomplex *z__, integer *ldz, integer *isuppz, doublecomplex * work, integer *lwork, doublereal *rwork, integer *lrwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zheevx_(char *jobz, char *range, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer * lwork, doublereal *rwork, integer *iwork, integer *ifail, integer * info); /* Subroutine */ int _starpu_zhegs2_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhegst_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhegv_(integer *itype, char *jobz, char *uplo, integer * n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhegvd_(integer *itype, char *jobz, char *uplo, integer * n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zhegvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *vl, doublereal *vu, integer *il, integer * iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_zherfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zherfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhesv_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zhesvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhesvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhetd2_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, integer *info); /* Subroutine */ int _starpu_zhetf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zhetrd_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zhetrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zhetri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zhetrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhfrk_(char *transr, char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, doublecomplex *c__); /* Subroutine */ int _starpu_zhgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *t, integer *ldt, doublecomplex *alpha, doublecomplex * beta, doublecomplex *q, integer *ldq, doublecomplex *z__, integer * ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zhpcon_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zhpev_(char *jobz, char *uplo, integer *n, doublecomplex *ap, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhpevd_(char *jobz, char *uplo, integer *n, doublecomplex *ap, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer * lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zhpevx_(char *jobz, char *range, char *uplo, integer *n, doublecomplex *ap, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal * rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_zhpgst_(integer *itype, char *uplo, integer *n, doublecomplex *ap, doublecomplex *bp, integer *info); /* Subroutine */ int _starpu_zhpgv_(integer *itype, char *jobz, char *uplo, integer * n, doublecomplex *ap, doublecomplex *bp, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zhpgvd_(integer *itype, char *jobz, char *uplo, integer * n, doublecomplex *ap, doublecomplex *bp, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal * rwork, integer *lrwork, integer *iwork, integer *liwork, integer * info); /* Subroutine */ int _starpu_zhpgvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublecomplex *ap, doublecomplex *bp, doublereal * vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_zhprfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex * b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zhpsv_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhpsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhptrd_(char *uplo, integer *n, doublecomplex *ap, doublereal *d__, doublereal *e, doublecomplex *tau, integer *info); /* Subroutine */ int _starpu_zhptrf_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zhptri_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zhptrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhsein_(char *side, char *eigsrc, char *initv, logical * select, integer *n, doublecomplex *h__, integer *ldh, doublecomplex * w, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *ifaill, integer *ifailr, integer *info); /* Subroutine */ int _starpu_zhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zla_gbamv__(integer *trans, integer *m, integer *n, integer *kl, integer *ku, doublereal *alpha, doublecomplex *ab, integer *ldab, doublecomplex *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_zla_gbrcond_c__(char *trans, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, doublereal *c__, logical *capply, integer *info, doublecomplex *work, doublereal *rwork, ftnlen trans_len); doublereal _starpu_zla_gbrcond_x__(char *trans, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, doublecomplex *x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen trans_len); /* Subroutine */ int _starpu_zla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info); doublereal _starpu_zla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * ncols, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer * ldafb); /* Subroutine */ int _starpu_zla_geamv__(integer *trans, integer *m, integer *n, doublereal *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_zla_gercond_c__(char *trans, integer *n, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal * c__, logical *capply, integer *info, doublecomplex *work, doublereal * rwork, ftnlen trans_len); doublereal _starpu_zla_gercond_x__(char *trans, integer *n, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen trans_len); /* Subroutine */ int _starpu_zla_gerfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *nrhs, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * ignore_cwise__, integer *info); /* Subroutine */ int _starpu_zla_heamv__(integer *uplo, integer *n, doublereal *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_zla_hercond_c__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *c__, logical *capply, integer *info, doublecomplex *work, doublereal * rwork, ftnlen uplo_len); doublereal _starpu_zla_hercond_x__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_herfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_zla_herpvgrw__(char *uplo, integer *n, integer *info, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *work, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_lin_berr__(integer *n, integer *nz, integer *nrhs, doublecomplex *res, doublereal *ayb, doublereal *berr); doublereal _starpu_zla_porcond_c__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, doublereal *c__, logical * capply, integer *info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); doublereal _starpu_zla_porcond_x__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, doublecomplex *x, integer * info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_porfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_zla_porpvgrw__(char *uplo, integer *ncols, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, doublereal *work, ftnlen uplo_len); doublereal _starpu_zla_rpvgrw__(integer *n, integer *ncols, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf); /* Subroutine */ int _starpu_zla_syamv__(integer *uplo, integer *n, doublereal *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_zla_syrcond_c__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *c__, logical *capply, integer *info, doublecomplex *work, doublereal * rwork, ftnlen uplo_len); doublereal _starpu_zla_syrcond_x__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_syrfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_zla_syrpvgrw__(char *uplo, integer *n, integer *info, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *work, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_wwaddw__(integer *n, doublecomplex *x, doublecomplex *y, doublecomplex *w); /* Subroutine */ int _starpu_zlabrd_(integer *m, integer *n, integer *nb, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, doublecomplex *taup, doublecomplex *x, integer * ldx, doublecomplex *y, integer *ldy); /* Subroutine */ int _starpu_zlacgv_(integer *n, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_zlacn2_(integer *n, doublecomplex *v, doublecomplex *x, doublereal *est, integer *kase, integer *isave); /* Subroutine */ int _starpu_zlacon_(integer *n, doublecomplex *v, doublecomplex *x, doublereal *est, integer *kase); /* Subroutine */ int _starpu_zlacp2_(char *uplo, integer *m, integer *n, doublereal * a, integer *lda, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zlacpy_(char *uplo, integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zlacrm_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *b, integer *ldb, doublecomplex *c__, integer *ldc, doublereal *rwork); /* Subroutine */ int _starpu_zlacrt_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy, doublecomplex *c__, doublecomplex * s); /* Double Complex */ VOID _starpu_zladiv_(doublecomplex * ret_val, doublecomplex *x, doublecomplex *y); /* Subroutine */ int _starpu_zlaed0_(integer *qsiz, integer *n, doublereal *d__, doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *qstore, integer *ldqs, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zlaed7_(integer *n, integer *cutpnt, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, doublecomplex *q, integer *ldq, doublereal *rho, integer *indxq, doublereal *qstore, integer *qptr, integer *prmptr, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, doublecomplex * work, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zlaed8_(integer *k, integer *n, integer *qsiz, doublecomplex *q, integer *ldq, doublereal *d__, doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, doublecomplex * q2, integer *ldq2, doublereal *w, integer *indxp, integer *indx, integer *indxq, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, integer *info); /* Subroutine */ int _starpu_zlaein_(logical *rightv, logical *noinit, integer *n, doublecomplex *h__, integer *ldh, doublecomplex *w, doublecomplex *v, doublecomplex *b, integer *ldb, doublereal *rwork, doublereal *eps3, doublereal *smlnum, integer *info); /* Subroutine */ int _starpu_zlaesy_(doublecomplex *a, doublecomplex *b, doublecomplex *c__, doublecomplex *rt1, doublecomplex *rt2, doublecomplex *evscal, doublecomplex *cs1, doublecomplex *sn1); /* Subroutine */ int _starpu_zlaev2_(doublecomplex *a, doublecomplex *b, doublecomplex *c__, doublereal *rt1, doublereal *rt2, doublereal *cs1, doublecomplex *sn1); /* Subroutine */ int _starpu_zlag2c_(integer *m, integer *n, doublecomplex *a, integer *lda, complex *sa, integer *ldsa, integer *info); /* Subroutine */ int _starpu_zlags2_(logical *upper, doublereal *a1, doublecomplex * a2, doublereal *a3, doublereal *b1, doublecomplex *b2, doublereal *b3, doublereal *csu, doublecomplex *snu, doublereal *csv, doublecomplex * snv, doublereal *csq, doublecomplex *snq); /* Subroutine */ int _starpu_zlagtm_(char *trans, integer *n, integer *nrhs, doublereal *alpha, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *x, integer *ldx, doublereal *beta, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zlahef_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_zlahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_zlahr2_(integer *n, integer *k, integer *nb, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, integer *ldt, doublecomplex *y, integer *ldy); /* Subroutine */ int _starpu_zlahrd_(integer *n, integer *k, integer *nb, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, integer *ldt, doublecomplex *y, integer *ldy); /* Subroutine */ int _starpu_zlaic1_(integer *job, integer *j, doublecomplex *x, doublereal *sest, doublecomplex *w, doublecomplex *gamma, doublereal * sestpr, doublecomplex *s, doublecomplex *c__); /* Subroutine */ int _starpu_zlals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *poles, doublereal *difl, doublereal *difr, doublereal * z__, integer *k, doublereal *c__, doublereal *s, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zlalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx, integer *ldbx, doublereal *u, integer *ldu, doublereal *vt, integer * k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * poles, integer *givptr, integer *givcol, integer *ldgcol, integer * perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zlalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublecomplex *b, integer *ldb, doublereal *rcond, integer *rank, doublecomplex *work, doublereal * rwork, integer *iwork, integer *info); doublereal _starpu_zlangb_(char *norm, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublereal *work); doublereal _starpu_zlange_(char *norm, integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *work); doublereal _starpu_zlangt_(char *norm, integer *n, doublecomplex *dl, doublecomplex * d__, doublecomplex *du); doublereal _starpu_zlanhb_(char *norm, char *uplo, integer *n, integer *k, doublecomplex *ab, integer *ldab, doublereal *work); doublereal _starpu_zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *work); doublereal _starpu_zlanhf_(char *norm, char *transr, char *uplo, integer *n, doublecomplex *a, doublereal *work); doublereal _starpu_zlanhp_(char *norm, char *uplo, integer *n, doublecomplex *ap, doublereal *work); doublereal _starpu_zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda, doublereal *work); doublereal _starpu_zlanht_(char *norm, integer *n, doublereal *d__, doublecomplex *e); doublereal _starpu_zlansb_(char *norm, char *uplo, integer *n, integer *k, doublecomplex *ab, integer *ldab, doublereal *work); doublereal _starpu_zlansp_(char *norm, char *uplo, integer *n, doublecomplex *ap, doublereal *work); doublereal _starpu_zlansy_(char *norm, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *work); doublereal _starpu_zlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, doublecomplex *ab, integer *ldab, doublereal *work); doublereal _starpu_zlantp_(char *norm, char *uplo, char *diag, integer *n, doublecomplex *ap, doublereal *work); doublereal _starpu_zlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *work); /* Subroutine */ int _starpu_zlapll_(integer *n, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublereal *ssmin); /* Subroutine */ int _starpu_zlapmt_(logical *forwrd, integer *m, integer *n, doublecomplex *x, integer *ldx, integer *k); /* Subroutine */ int _starpu_zlaqgb_(integer *m, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqge_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqhb_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqhe_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqhp_(char *uplo, integer *n, doublecomplex *ap, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqp2_(integer *m, integer *n, integer *offset, doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, doublereal *vn1, doublereal *vn2, doublecomplex *work); /* Subroutine */ int _starpu_zlaqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, doublereal *vn1, doublereal *vn2, doublecomplex * auxv, doublecomplex *f, integer *ldf); /* Subroutine */ int _starpu_zlaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zlaqr1_(integer *n, doublecomplex *h__, integer *ldh, doublecomplex *s1, doublecomplex *s2, doublecomplex *v); /* Subroutine */ int _starpu_zlaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, integer *ns, integer *nd, doublecomplex *sh, doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, doublecomplex *work, integer *lwork); /* Subroutine */ int _starpu_zlaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, integer *ns, integer *nd, doublecomplex *sh, doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, doublecomplex *work, integer *lwork); /* Subroutine */ int _starpu_zlaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zlaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, integer *kbot, integer *nshfts, doublecomplex *s, doublecomplex *h__, integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *v, integer *ldv, doublecomplex *u, integer *ldu, integer *nv, doublecomplex *wv, integer *ldwv, integer *nh, doublecomplex *wh, integer *ldwh); /* Subroutine */ int _starpu_zlaqsb_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqsp_(char *uplo, integer *n, doublecomplex *ap, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqsy_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlar1v_(integer *n, integer *b1, integer *bn, doublereal *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * lld, doublereal *pivmin, doublereal *gaptol, doublecomplex *z__, logical *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, doublereal *rqcorr, doublereal *work); /* Subroutine */ int _starpu_zlar2v_(integer *n, doublecomplex *x, doublecomplex *y, doublecomplex *z__, integer *incx, doublereal *c__, doublecomplex *s, integer *incc); /* Subroutine */ int _starpu_zlarcm_(integer *m, integer *n, doublereal *a, integer * lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, doublereal *rwork); /* Subroutine */ int _starpu_zlarf_(char *side, integer *m, integer *n, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer * ldc, doublecomplex *work); /* Subroutine */ int _starpu_zlarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, doublecomplex *v, integer *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer * ldc, doublecomplex *work, integer *ldwork); /* Subroutine */ int _starpu_zlarfg_(integer *n, doublecomplex *alpha, doublecomplex * x, integer *incx, doublecomplex *tau); /* Subroutine */ int _starpu_zlarfp_(integer *n, doublecomplex *alpha, doublecomplex * x, integer *incx, doublecomplex *tau); /* Subroutine */ int _starpu_zlarft_(char *direct, char *storev, integer *n, integer * k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * t, integer *ldt); /* Subroutine */ int _starpu_zlarfx_(char *side, integer *m, integer *n, doublecomplex *v, doublecomplex *tau, doublecomplex *c__, integer * ldc, doublecomplex *work); /* Subroutine */ int _starpu_zlargv_(integer *n, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublereal *c__, integer *incc); /* Subroutine */ int _starpu_zlarnv_(integer *idist, integer *iseed, integer *n, doublecomplex *x); /* Subroutine */ int _starpu_zlarrv_(integer *n, doublereal *vl, doublereal *vu, doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, integer *m, integer *dol, integer *dou, doublereal *minrgp, doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, doublecomplex *z__, integer *ldz, integer *isuppz, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_zlarscl2_(integer *m, integer *n, doublereal *d__, doublecomplex *x, integer *ldx); /* Subroutine */ int _starpu_zlartg_(doublecomplex *f, doublecomplex *g, doublereal * cs, doublecomplex *sn, doublecomplex *r__); /* Subroutine */ int _starpu_zlartv_(integer *n, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublereal *c__, doublecomplex *s, integer *incc); /* Subroutine */ int _starpu_zlarz_(char *side, integer *m, integer *n, integer *l, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * c__, integer *ldc, doublecomplex *work); /* Subroutine */ int _starpu_zlarzb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, doublecomplex *v, integer *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *ldwork); /* Subroutine */ int _starpu_zlarzt_(char *direct, char *storev, integer *n, integer * k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * t, integer *ldt); /* Subroutine */ int _starpu_zlascl_(char *type__, integer *kl, integer *ku, doublereal *cfrom, doublereal *cto, integer *m, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zlascl2_(integer *m, integer *n, doublereal *d__, doublecomplex *x, integer *ldx); /* Subroutine */ int _starpu_zlaset_(char *uplo, integer *m, integer *n, doublecomplex *alpha, doublecomplex *beta, doublecomplex *a, integer * lda); /* Subroutine */ int _starpu_zlasr_(char *side, char *pivot, char *direct, integer *m, integer *n, doublereal *c__, doublereal *s, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zlassq_(integer *n, doublecomplex *x, integer *incx, doublereal *scale, doublereal *sumsq); /* Subroutine */ int _starpu_zlaswp_(integer *n, doublecomplex *a, integer *lda, integer *k1, integer *k2, integer *ipiv, integer *incx); /* Subroutine */ int _starpu_zlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_zlat2c_(char *uplo, integer *n, doublecomplex *a, integer *lda, complex *sa, integer *ldsa, integer *info); /* Subroutine */ int _starpu_zlatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublecomplex *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_zlatdf_(integer *ijob, integer *n, doublecomplex *z__, integer *ldz, doublecomplex *rhs, doublereal *rdsum, doublereal * rdscal, integer *ipiv, integer *jpiv); /* Subroutine */ int _starpu_zlatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublecomplex *ap, doublecomplex *x, doublereal * scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_zlatrd_(char *uplo, integer *n, integer *nb, doublecomplex *a, integer *lda, doublereal *e, doublecomplex *tau, doublecomplex *w, integer *ldw); /* Subroutine */ int _starpu_zlatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_zlatrz_(integer *m, integer *n, integer *l, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work); /* Subroutine */ int _starpu_zlatzm_(char *side, integer *m, integer *n, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * c1, doublecomplex *c2, integer *ldc, doublecomplex *work); /* Subroutine */ int _starpu_zlauu2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zlauum_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zpbcon_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *anorm, doublereal * rcond, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpbequ_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zpbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer * ldafb, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * rwork, integer *info); /* Subroutine */ int _starpu_zpbstf_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_zpbsv_(char *uplo, integer *n, integer *kd, integer * nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_zpbsvx_(char *fact, char *uplo, integer *n, integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal * ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpbtf2_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_zpbtrf_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_zpbtrs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_zpftrf_(char *transr, char *uplo, integer *n, doublecomplex *a, integer *info); /* Subroutine */ int _starpu_zpftri_(char *transr, char *uplo, integer *n, doublecomplex *a, integer *info); /* Subroutine */ int _starpu_zpftrs_(char *transr, char *uplo, integer *n, integer * nrhs, doublecomplex *a, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zpocon_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *anorm, doublereal *rcond, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpoequ_(integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zpoequb_(integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zporfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * rwork, integer *info); /* Subroutine */ int _starpu_zporfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zposv_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zposvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zposvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpotf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zpotrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zpotri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zpotrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zppcon_(char *uplo, integer *n, doublecomplex *ap, doublereal *anorm, doublereal *rcond, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zppequ_(char *uplo, integer *n, doublecomplex *ap, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zpprfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *afp, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zppsv_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zppsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *ap, doublecomplex *afp, char *equed, doublereal * s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpptrf_(char *uplo, integer *n, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_zpptri_(char *uplo, integer *n, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_zpptrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zpstf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info); /* Subroutine */ int _starpu_zpstrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info); /* Subroutine */ int _starpu_zptcon_(integer *n, doublereal *d__, doublecomplex *e, doublereal *anorm, doublereal *rcond, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zpteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_zptrfs_(char *uplo, integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublereal *df, doublecomplex *ef, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * rwork, integer *info); /* Subroutine */ int _starpu_zptsv_(integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zptsvx_(char *fact, integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublereal *df, doublecomplex *ef, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpttrf_(integer *n, doublereal *d__, doublecomplex *e, integer *info); /* Subroutine */ int _starpu_zpttrs_(char *uplo, integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zptts2_(integer *iuplo, integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zrot_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy, doublereal *c__, doublecomplex *s); /* Subroutine */ int _starpu_zspcon_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zspmv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zspr_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *ap); /* Subroutine */ int _starpu_zsprfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex * b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zspsv_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zspsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsptrf_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zsptri_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zsptrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zstedc_(char *compz, integer *n, doublereal *d__, doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zstegr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zstein_(integer *n, doublereal *d__, doublereal *e, integer *m, doublereal *w, integer *iblock, integer *isplit, doublecomplex *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_zstemr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, integer *m, doublereal *w, doublecomplex *z__, integer * ldz, integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zsteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_zsycon_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zsyequb_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zsymv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zsyr_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zsyrfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsyrfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsysv_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zsysvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsysvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsytf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zsytrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zsytri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zsytrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ztbcon_(char *norm, char *uplo, char *diag, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *rcond, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * rwork, integer *info); /* Subroutine */ int _starpu_ztbtrs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ztfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_ztftri_(char *transr, char *uplo, char *diag, integer *n, doublecomplex *a, integer *info); /* Subroutine */ int _starpu_ztfttp_(char *transr, char *uplo, integer *n, doublecomplex *arf, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_ztfttr_(char *transr, char *uplo, integer *n, doublecomplex *arf, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ztgevc_(char *side, char *howmny, logical *select, integer *n, doublecomplex *s, integer *lds, doublecomplex *p, integer *ldp, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztgex2_(logical *wantq, logical *wantz, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, integer *j1, integer *info); /* Subroutine */ int _starpu_ztgexc_(logical *wantq, logical *wantz, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, integer *ifst, integer *ilst, integer *info); /* Subroutine */ int _starpu_ztgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex * beta, doublecomplex *q, integer *ldq, doublecomplex *z__, integer * ldz, integer *m, doublereal *pl, doublereal *pr, doublereal *dif, doublecomplex *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ztgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *tola, doublereal *tolb, doublereal *alpha, doublereal *beta, doublecomplex * u, integer *ldu, doublecomplex *v, integer *ldv, doublecomplex *q, integer *ldq, doublecomplex *work, integer *ncycle, integer *info); /* Subroutine */ int _starpu_ztgsna_(char *job, char *howmny, logical *select, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, doublecomplex *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_ztgsy2_(char *trans, integer *ijob, integer *m, integer * n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, doublecomplex *d__, integer *ldd, doublecomplex *e, integer *lde, doublecomplex *f, integer *ldf, doublereal *scale, doublereal *rdsum, doublereal *rdscal, integer * info); /* Subroutine */ int _starpu_ztgsyl_(char *trans, integer *ijob, integer *m, integer * n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, doublecomplex *d__, integer *ldd, doublecomplex *e, integer *lde, doublecomplex *f, integer *ldf, doublereal *scale, doublereal *dif, doublecomplex *work, integer * lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_ztpcon_(char *norm, char *uplo, char *diag, integer *n, doublecomplex *ap, doublereal *rcond, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztptri_(char *uplo, char *diag, integer *n, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_ztptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ztpttf_(char *transr, char *uplo, integer *n, doublecomplex *ap, doublecomplex *arf, integer *info); /* Subroutine */ int _starpu_ztpttr_(char *uplo, integer *n, doublecomplex *ap, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ztrcon_(char *norm, char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, doublereal *rcond, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztrevc_(char *side, char *howmny, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztrexc_(char *compq, integer *n, doublecomplex *t, integer *ldt, doublecomplex *q, integer *ldq, integer *ifst, integer * ilst, integer *info); /* Subroutine */ int _starpu_ztrrfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_ztrsen_(char *job, char *compq, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *q, integer *ldq, doublecomplex *w, integer *m, doublereal *s, doublereal *sep, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ztrsna_(char *job, char *howmny, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublereal *s, doublereal *sep, integer *mm, integer *m, doublecomplex *work, integer *ldwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztrsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, doublereal *scale, integer *info); /* Subroutine */ int _starpu_ztrti2_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ztrtri_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ztrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ztrttf_(char *transr, char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *arf, integer *info); /* Subroutine */ int _starpu_ztrttp_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_ztzrqf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, integer *info); /* Subroutine */ int _starpu_ztzrzf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zung2l_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zung2r_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zungbr_(char *vect, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunghr_(integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungl2_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zunglq_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungql_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungqr_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungr2_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zungrq_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungtr_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunm2l_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zunm2r_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zunmbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_zunmhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunml2_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zunmlq_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunmql_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunmqr_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunmr2_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zunmr3_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * info); /* Subroutine */ int _starpu_zunmrq_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunmrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_zunmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zupgtr_(char *uplo, integer *n, doublecomplex *ap, doublecomplex *tau, doublecomplex *q, integer *ldq, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zupmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublecomplex *ap, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1); doublereal _starpu_dsecnd_(); /* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, integer *vers_patch__); logical _starpu_lsame_(char *ca, char *cb); doublereal _starpu_second_(); doublereal _starpu_slamch_(char *cmach); /* Subroutine */ int _starpu_slamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1); /* Subroutine */ int _starpu_slamc2_(integer *beta, integer *t, logical *rnd, real * eps, integer *emin, real *rmin, integer *emax, real *rmax); doublereal _starpu_slamc3_(real *a, real *b); /* Subroutine */ int _starpu_slamc4_(integer *emin, real *start, integer *base); /* Subroutine */ int _starpu_slamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, real *rmax); doublereal _starpu_dlamch_(char *cmach); /* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1); /* Subroutine */ int _starpu_dlamc2_(integer *beta, integer *t, logical *rnd, doublereal *eps, integer *emin, doublereal *rmin, integer *emax, doublereal *rmax); doublereal _starpu_dlamc3_(doublereal *a, doublereal *b); /* Subroutine */ int _starpu_dlamc4_(integer *emin, doublereal *start, integer *base); /* Subroutine */ int _starpu_dlamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, doublereal *rmax); integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, integer *n2, integer *n3, integer *n4); #ifdef __cplusplus } #endif #endif /* __CLAPACK_H */ starpu-1.4.9+dfsg/min-dgels/additional/d_lg10.c000066400000000000000000000004431507764646700212120ustar00rootroot00000000000000#include "f2c.h" #define log10e 0.43429448190325182765 #ifdef KR_headers double log(); double d_lg10(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_lg10(doublereal *x) #endif { return( log10e * log(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/additional/d_sign.c000066400000000000000000000004121507764646700214030ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double d_sign(a,b) doublereal *a, *b; #else double d_sign(doublereal *a, doublereal *b) #endif { double x; x = (*a >= 0 ? *a : - *a); return( *b >= 0 ? x : -x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/additional/dcopy.c000066400000000000000000000043511507764646700212640ustar00rootroot00000000000000/* dcopy.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dcopy_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, m, ix, iy, mp1; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* copies a vector, x, to a vector, y. */ /* uses unrolled loops for increments equal to one. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --dy; --dx; /* Function Body */ if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments */ /* not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dy[iy] = dx[ix]; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ /* clean-up loop */ L20: m = *n % 7; if (m == 0) { goto L40; } i__1 = m; for (i__ = 1; i__ <= i__1; ++i__) { dy[i__] = dx[i__]; /* L30: */ } if (*n < 7) { return 0; } L40: mp1 = m + 1; i__1 = *n; for (i__ = mp1; i__ <= i__1; i__ += 7) { dy[i__] = dx[i__]; dy[i__ + 1] = dx[i__ + 1]; dy[i__ + 2] = dx[i__ + 2]; dy[i__ + 3] = dx[i__ + 3]; dy[i__ + 4] = dx[i__ + 4]; dy[i__ + 5] = dx[i__ + 5]; dy[i__ + 6] = dx[i__ + 6]; /* L50: */ } return 0; } /* _starpu_dcopy_ */ starpu-1.4.9+dfsg/min-dgels/additional/dgelq2.c000066400000000000000000000106351507764646700213260ustar00rootroot00000000000000/* dgelq2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgelq2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, k; doublereal aii; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELQ2 computes an LQ factorization of a real m by n matrix A: */ /* A = L * Q. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the m by n matrix A. */ /* On exit, the elements on and below the diagonal of the array */ /* contain the m by min(m,n) lower trapezoidal matrix L (L is */ /* lower triangular if m <= n); the elements above the diagonal, */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of elementary reflectors (see Further Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ /* and tau in TAU(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELQ2", &i__1); return 0; } k = min(*m,*n); i__1 = k; for (i__ = 1; i__ <= i__1; ++i__) { /* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ i__2 = *n - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n)* a_dim1] , lda, &tau[i__]); if (i__ < *m) { /* Apply H(i) to A(i+1:m,i:n) from the right */ aii = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; i__2 = *m - i__; i__3 = *n - i__ + 1; _starpu_dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); a[i__ + i__ * a_dim1] = aii; } /* L10: */ } return 0; /* End of DGELQ2 */ } /* _starpu_dgelq2_ */ starpu-1.4.9+dfsg/min-dgels/additional/dgelqf.c000066400000000000000000000163741507764646700214200ustar00rootroot00000000000000/* dgelqf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dgelqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, k, ib, nb, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dgelq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELQF computes an LQ factorization of a real M-by-N matrix A: */ /* A = L * Q. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the elements on and below the diagonal of the array */ /* contain the m-by-min(m,n) lower trapezoidal matrix L (L is */ /* lower triangular if m <= n); the elements above the diagonal, */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of elementary reflectors (see Further Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,M). */ /* For optimum performance LWORK >= M*NB, where NB is the */ /* optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ /* and tau in TAU(i). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; nb = _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); lwkopt = *m * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } else if (*lwork < max(1,*m) && ! lquery) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELQF", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ k = min(*m,*n); if (k == 0) { work[1] = 1.; return 0; } nbmin = 2; nx = 0; iws = *m; if (nb > 1 && nb < k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1); nx = max(i__1,i__2); if (nx < k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *m; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < k && nx < k) { /* Use blocked code initially */ i__1 = k - nx; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = k - i__ + 1; ib = min(i__3,nb); /* Compute the LQ factorization of the current block */ /* A(i:i+ib-1,i:n) */ i__3 = *n - i__ + 1; _starpu_dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ 1], &iinfo); if (i__ + ib <= *m) { /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__3 = *n - i__ + 1; _starpu_dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &ldwork); /* Apply H to A(i+ib:m,i:n) from the right */ i__3 = *m - i__ - ib + 1; i__4 = *n - i__ + 1; _starpu_dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + 1], &ldwork); } /* L10: */ } } else { i__ = 1; } /* Use unblocked code to factor the last or only block. */ if (i__ <= k) { i__2 = *m - i__ + 1; i__1 = *n - i__ + 1; _starpu_dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] , &iinfo); } work[1] = (doublereal) iws; return 0; /* End of DGELQF */ } /* _starpu_dgelqf_ */ starpu-1.4.9+dfsg/min-dgels/additional/dgels.c000066400000000000000000000360611507764646700212470ustar00rootroot00000000000000/* dgels.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b33 = 0.; static integer c__0 = 0; /* Subroutine */ int _starpu_dgels_(char *trans, integer *m, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ integer i__, j, nb, mn; doublereal anrm, bnrm; integer brow; logical tpsd; integer iascl, ibscl; extern logical _starpu_lsame_(char *, char *); integer wsize; doublereal rwork[1]; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer scllen; doublereal bignum; extern /* Subroutine */ int _starpu_dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal smlnum; logical lquery; extern /* Subroutine */ int _starpu_dtrtrs_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELS solves overdetermined or underdetermined real linear systems */ /* involving an M-by-N matrix A, or its transpose, using a QR or LQ */ /* factorization of A. It is assumed that A has full rank. */ /* The following options are provided: */ /* 1. If TRANS = 'N' and m >= n: find the least squares solution of */ /* an overdetermined system, i.e., solve the least squares problem */ /* minimize || B - A*X ||. */ /* 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ /* an underdetermined system A * X = B. */ /* 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ /* an undetermined system A**T * X = B. */ /* 4. If TRANS = 'T' and m < n: find the least squares solution of */ /* an overdetermined system, i.e., solve the least squares problem */ /* minimize || B - A**T * X ||. */ /* Several right hand side vectors b and solution vectors x can be */ /* handled in a single call; they are stored as the columns of the */ /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ /* matrix X. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* = 'N': the linear system involves A; */ /* = 'T': the linear system involves A**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of */ /* columns of the matrices B and X. NRHS >=0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, */ /* if M >= N, A is overwritten by details of its QR */ /* factorization as returned by DGEQRF; */ /* if M < N, A is overwritten by details of its LQ */ /* factorization as returned by DGELQF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the matrix B of right hand side vectors, stored */ /* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ /* if TRANS = 'T'. */ /* On exit, if INFO = 0, B is overwritten by the solution */ /* vectors, stored columnwise: */ /* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ /* squares solution vectors; the residual sum of squares for the */ /* solution in each column is given by the sum of squares of */ /* elements N+1 to M in that column; */ /* if TRANS = 'N' and m < n, rows 1 to N of B contain the */ /* minimum norm solution vectors; */ /* if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ /* minimum norm solution vectors; */ /* if TRANS = 'T' and m < n, rows 1 to M of B contain the */ /* least squares solution vectors; the residual sum of squares */ /* for the solution in each column is given by the sum of */ /* squares of elements M+1 to N in that column. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= MAX(1,M,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* LWORK >= max( 1, MN + max( MN, NRHS ) ). */ /* For optimal performance, */ /* LWORK >= max( 1, MN + max( MN, NRHS )*NB ). */ /* where MN = min(M,N) and NB is the optimum block size. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element of the */ /* triangular factor of A is zero, so that A does not have */ /* full rank; the least squares solution could not be */ /* computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --work; /* Function Body */ *info = 0; mn = min(*m,*n); lquery = *lwork == -1; if (! (_starpu_lsame_(trans, "N") || _starpu_lsame_(trans, "T"))) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m); if (*ldb < max(i__1,*n)) { *info = -8; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = mn + max(mn,*nrhs); if (*lwork < max(i__1,i__2) && ! lquery) { *info = -10; } } } /* Figure out optimal block size */ if (*info == 0 || *info == -10) { tpsd = TRUE_; if (_starpu_lsame_(trans, "N")) { tpsd = FALSE_; } if (*m >= *n) { nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); if (tpsd) { /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", "LN", m, nrhs, n, & c_n1); nb = max(i__1,i__2); } else { /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, & c_n1); nb = max(i__1,i__2); } } else { nb = _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); if (tpsd) { /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, & c_n1); nb = max(i__1,i__2); } else { /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", "LN", n, nrhs, m, & c_n1); nb = max(i__1,i__2); } } /* Computing MAX */ i__1 = 1, i__2 = mn + max(mn,*nrhs) * nb; wsize = max(i__1,i__2); work[1] = (doublereal) wsize; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELS ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ /* Computing MIN */ i__1 = min(*m,*n); if (min(i__1,*nrhs) == 0) { i__1 = max(*m,*n); _starpu_dlaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); return 0; } /* Get machine parameters */ smlnum = _starpu_dlamch_("S") / _starpu_dlamch_("P"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); /* Scale A, B if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, rwork); iascl = 0; if (anrm > 0. && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); _starpu_dlaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); goto L50; } brow = *m; if (tpsd) { brow = *n; } bnrm = _starpu_dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); ibscl = 0; if (bnrm > 0. && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], ldb, info); ibscl = 2; } if (*m >= *n) { /* compute QR factorization of A */ i__1 = *lwork - mn; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) ; /* workspace at least N, optimally N*NB */ if (! tpsd) { /* Least-Squares Problem min || A * X - B || */ /* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ i__1 = *lwork - mn; _starpu_dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[ 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ /* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ _starpu_dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] , lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } scllen = *n; } else { /* Overdetermined system of equations A' * X = B */ /* B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) */ _starpu_dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } /* B(N+1:M,1:NRHS) = ZERO */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = *n + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L10: */ } /* L20: */ } /* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ i__1 = *lwork - mn; _starpu_dormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, & work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ scllen = *m; } } else { /* Compute LQ factorization of A */ i__1 = *lwork - mn; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) ; /* workspace at least M, optimally M*NB. */ if (! tpsd) { /* underdetermined system of equations A * X = B */ /* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ _starpu_dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] , lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } /* B(M+1:N,1:NRHS) = 0 */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = *m + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L30: */ } /* L40: */ } /* B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) */ i__1 = *lwork - mn; _starpu_dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[ 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ scllen = *n; } else { /* overdetermined system min || A' * X - B || */ /* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ i__1 = *lwork - mn; _starpu_dormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, & work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ /* B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) */ _starpu_dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } scllen = *m; } } /* Undo scaling */ if (iascl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] , ldb, info); } else if (iascl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] , ldb, info); } if (ibscl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] , ldb, info); } else if (ibscl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] , ldb, info); } L50: work[1] = (doublereal) wsize; return 0; /* End of DGELS */ } /* _starpu_dgels_ */ starpu-1.4.9+dfsg/min-dgels/additional/dgemm.c000066400000000000000000000245261507764646700212450ustar00rootroot00000000000000/* dgemm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, l, info; logical nota, notb; doublereal temp; integer ncola; extern logical _starpu_lsame_(char *, char *); integer nrowa, nrowb; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEMM performs one of the matrix-matrix operations */ /* C := alpha*op( A )*op( B ) + beta*C, */ /* where op( X ) is one of */ /* op( X ) = X or op( X ) = X', */ /* alpha and beta are scalars, and A, B and C are matrices, with op( A ) */ /* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */ /* Arguments */ /* ========== */ /* TRANSA - CHARACTER*1. */ /* On entry, TRANSA specifies the form of op( A ) to be used in */ /* the matrix multiplication as follows: */ /* TRANSA = 'N' or 'n', op( A ) = A. */ /* TRANSA = 'T' or 't', op( A ) = A'. */ /* TRANSA = 'C' or 'c', op( A ) = A'. */ /* Unchanged on exit. */ /* TRANSB - CHARACTER*1. */ /* On entry, TRANSB specifies the form of op( B ) to be used in */ /* the matrix multiplication as follows: */ /* TRANSB = 'N' or 'n', op( B ) = B. */ /* TRANSB = 'T' or 't', op( B ) = B'. */ /* TRANSB = 'C' or 'c', op( B ) = B'. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix */ /* op( A ) and of the matrix C. M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix */ /* op( B ) and the number of columns of the matrix C. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry, K specifies the number of columns of the matrix */ /* op( A ) and the number of rows of the matrix op( B ). K must */ /* be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ /* k when TRANSA = 'N' or 'n', and is m otherwise. */ /* Before entry with TRANSA = 'N' or 'n', the leading m by k */ /* part of the array A must contain the matrix A, otherwise */ /* the leading k by m part of the array A must contain the */ /* matrix A. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When TRANSA = 'N' or 'n' then */ /* LDA must be at least max( 1, m ), otherwise LDA must be at */ /* least max( 1, k ). */ /* Unchanged on exit. */ /* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */ /* n when TRANSB = 'N' or 'n', and is k otherwise. */ /* Before entry with TRANSB = 'N' or 'n', the leading k by n */ /* part of the array B must contain the matrix B, otherwise */ /* the leading n by k part of the array B must contain the */ /* matrix B. */ /* Unchanged on exit. */ /* LDB - INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. When TRANSB = 'N' or 'n' then */ /* LDB must be at least max( 1, k ), otherwise LDB must be at */ /* least max( 1, n ). */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then C need not be set on input. */ /* Unchanged on exit. */ /* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ /* Before entry, the leading m by n part of the array C must */ /* contain the matrix C, except when beta is zero, in which */ /* case C need not be set on entry. */ /* On exit, the array C is overwritten by the m by n matrix */ /* ( alpha*op( A )*op( B ) + beta*C ). */ /* LDC - INTEGER. */ /* On entry, LDC specifies the first dimension of C as declared */ /* in the calling (sub) program. LDC must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Level 3 Blas routine. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* Set NOTA and NOTB as true if A and B respectively are not */ /* transposed and set NROWA, NCOLA and NROWB as the number of rows */ /* and columns of A and the number of rows of B respectively. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; /* Function Body */ nota = _starpu_lsame_(transa, "N"); notb = _starpu_lsame_(transb, "N"); if (nota) { nrowa = *m; ncola = *k; } else { nrowa = *k; ncola = *m; } if (notb) { nrowb = *k; } else { nrowb = *n; } /* Test the input parameters. */ info = 0; if (! nota && ! _starpu_lsame_(transa, "C") && ! _starpu_lsame_( transa, "T")) { info = 1; } else if (! notb && ! _starpu_lsame_(transb, "C") && ! _starpu_lsame_(transb, "T")) { info = 2; } else if (*m < 0) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < max(1,nrowa)) { info = 8; } else if (*ldb < max(1,nrowb)) { info = 10; } else if (*ldc < max(1,*m)) { info = 13; } if (info != 0) { _starpu_xerbla_("DGEMM ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { return 0; } /* And if alpha.eq.zero. */ if (*alpha == 0.) { if (*beta == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L30: */ } /* L40: */ } } return 0; } /* Start the operations. */ if (notb) { if (nota) { /* Form C := alpha*A*B + beta*C. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L50: */ } } else if (*beta != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L60: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (b[l + j * b_dim1] != 0.) { temp = *alpha * b[l + j * b_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1]; /* L70: */ } } /* L80: */ } /* L90: */ } } else { /* Form C := alpha*A'*B + beta*C */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = 0.; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; /* L100: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = *alpha * temp; } else { c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ i__ + j * c_dim1]; } /* L110: */ } /* L120: */ } } } else { if (nota) { /* Form C := alpha*A*B' + beta*C */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L130: */ } } else if (*beta != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L140: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (b[j + l * b_dim1] != 0.) { temp = *alpha * b[j + l * b_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1]; /* L150: */ } } /* L160: */ } /* L170: */ } } else { /* Form C := alpha*A'*B' + beta*C */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = 0.; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; /* L180: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = *alpha * temp; } else { c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ i__ + j * c_dim1]; } /* L190: */ } /* L200: */ } } } return 0; /* End of DGEMM . */ } /* _starpu_dgemm_ */ starpu-1.4.9+dfsg/min-dgels/additional/dgemv.c000066400000000000000000000170561507764646700212560ustar00rootroot00000000000000/* dgemv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgemv_(char *trans, integer *m, integer *n, doublereal * alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, iy, jx, jy, kx, ky, info; doublereal temp; integer lenx, leny; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEMV performs one of the matrix-vector operations */ /* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* m by n matrix. */ /* Arguments */ /* ========== */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ /* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ /* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry, the leading m by n part of the array A must */ /* contain the matrix of coefficients. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ /* Before entry with BETA non-zero, the incremented array Y */ /* must contain the vector y. On exit, Y is overwritten by the */ /* updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C") ) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*lda < max(1,*m)) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { _starpu_xerbla_("DGEMV ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* Set LENX and LENY, the lengths of the vectors x and y, and set */ /* up the start points in X and Y. */ if (_starpu_lsame_(trans, "N")) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if (*beta != 1.) { if (*incy == 1) { if (*beta == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.; iy += *incy; /* L30: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.) { return 0; } if (_starpu_lsame_(trans, "N")) { /* Form y := alpha*A*x + y. */ jx = kx; if (*incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { y[i__] += temp * a[i__ + j * a_dim1]; /* L50: */ } } jx += *incx; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; iy = ky; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { y[iy] += temp * a[i__ + j * a_dim1]; iy += *incy; /* L70: */ } } jx += *incx; /* L80: */ } } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp += a[i__ + j * a_dim1] * x[i__]; /* L90: */ } y[jy] += *alpha * temp; jy += *incy; /* L100: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.; ix = kx; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp += a[i__ + j * a_dim1] * x[ix]; ix += *incx; /* L110: */ } y[jy] += *alpha * temp; jy += *incy; /* L120: */ } } } return 0; /* End of DGEMV . */ } /* _starpu_dgemv_ */ starpu-1.4.9+dfsg/min-dgels/additional/dgeqr2.c000066400000000000000000000107321507764646700213320ustar00rootroot00000000000000/* dgeqr2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgeqr2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, k; doublereal aii; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEQR2 computes a QR factorization of a real m by n matrix A: */ /* A = Q * R. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the m by n matrix A. */ /* On exit, the elements on and above the diagonal of the array */ /* contain the min(m,n) by n upper trapezoidal matrix R (R is */ /* upper triangular if m >= n); the elements below the diagonal, */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of elementary reflectors (see Further Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ /* and tau in TAU(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEQR2", &i__1); return 0; } k = min(*m,*n); i__1 = k; for (i__ = 1; i__ <= i__1; ++i__) { /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ i__2 = *m - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m)+ i__ * a_dim1] , &c__1, &tau[i__]); if (i__ < *n) { /* Apply H(i) to A(i:m,i+1:n) from the left */ aii = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; i__2 = *m - i__ + 1; i__3 = *n - i__; _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); a[i__ + i__ * a_dim1] = aii; } /* L10: */ } return 0; /* End of DGEQR2 */ } /* _starpu_dgeqr2_ */ starpu-1.4.9+dfsg/min-dgels/additional/dgeqrf.c000066400000000000000000000164271507764646700214250ustar00rootroot00000000000000/* dgeqrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, k, ib, nb, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEQRF computes a QR factorization of a real M-by-N matrix A: */ /* A = Q * R. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the elements on and above the diagonal of the array */ /* contain the min(M,N)-by-N upper trapezoidal matrix R (R is */ /* upper triangular if m >= n); the elements below the diagonal, */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of min(m,n) elementary reflectors (see Further */ /* Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N). */ /* For optimum performance LWORK >= N*NB, where NB is */ /* the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ /* and tau in TAU(i). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); lwkopt = *n * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } else if (*lwork < max(1,*n) && ! lquery) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEQRF", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ k = min(*m,*n); if (k == 0) { work[1] = 1.; return 0; } nbmin = 2; nx = 0; iws = *n; if (nb > 1 && nb < k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1); nx = max(i__1,i__2); if (nx < k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *n; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < k && nx < k) { /* Use blocked code initially */ i__1 = k - nx; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = k - i__ + 1; ib = min(i__3,nb); /* Compute the QR factorization of the current block */ /* A(i:m,i:i+ib-1) */ i__3 = *m - i__ + 1; _starpu_dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ 1], &iinfo); if (i__ + ib <= *n) { /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__3 = *m - i__ + 1; _starpu_dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &ldwork); /* Apply H' to A(i:m,i+ib:n) from the left */ i__3 = *m - i__ + 1; i__4 = *n - i__ - ib + 1; _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + 1], &ldwork); } /* L10: */ } } else { i__ = 1; } /* Use unblocked code to factor the last or only block. */ if (i__ <= k) { i__2 = *m - i__ + 1; i__1 = *n - i__ + 1; _starpu_dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] , &iinfo); } work[1] = (doublereal) iws; return 0; /* End of DGEQRF */ } /* _starpu_dgeqrf_ */ starpu-1.4.9+dfsg/min-dgels/additional/dger.c000066400000000000000000000116031507764646700210650ustar00rootroot00000000000000/* dger.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dger_(integer *m, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, jy, kx, info; doublereal temp; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGER performs the rank 1 operation */ /* A := alpha*x*y' + A, */ /* where alpha is a scalar, x is an m element vector, y is an n element */ /* vector and A is an m by n matrix. */ /* Arguments */ /* ========== */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the m */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. */ /* Unchanged on exit. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry, the leading m by n part of the array A must */ /* contain the matrix of coefficients. On exit, A is */ /* overwritten by the updated matrix. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --y; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ info = 0; if (*m < 0) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*m)) { info = 9; } if (info != 0) { _starpu_xerbla_("DGER ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0.) { return 0; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (*incy > 0) { jy = 1; } else { jy = 1 - (*n - 1) * *incy; } if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (y[jy] != 0.) { temp = *alpha * y[jy]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] += x[i__] * temp; /* L10: */ } } jy += *incy; /* L20: */ } } else { if (*incx > 0) { kx = 1; } else { kx = 1 - (*m - 1) * *incx; } i__1 = *n; for (j = 1; j <= i__1; ++j) { if (y[jy] != 0.) { temp = *alpha * y[jy]; ix = kx; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] += x[ix] * temp; ix += *incx; /* L30: */ } } jy += *incy; /* L40: */ } } return 0; /* End of DGER . */ } /* _starpu_dger_ */ starpu-1.4.9+dfsg/min-dgels/additional/disnan.c000066400000000000000000000026311507764646700214210ustar00rootroot00000000000000/* disnan.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" logical _starpu_disnan_(doublereal *din) { /* System generated locals */ logical ret_val; /* Local variables */ extern logical _starpu_dlaisnan_(doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DISNAN returns .TRUE. if its argument is NaN, and .FALSE. */ /* otherwise. To be replaced by the Fortran 2003 intrinsic in the */ /* future. */ /* Arguments */ /* ========= */ /* DIN (input) DOUBLE PRECISION */ /* Input to test for NaN. */ /* ===================================================================== */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ ret_val = _starpu_dlaisnan_(din, din); return ret_val; } /* _starpu_disnan_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlabad.c000066400000000000000000000046161507764646700213610ustar00rootroot00000000000000/* dlabad.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlabad_(doublereal *small, doublereal *large) { /* Builtin functions */ double d_lg10(doublereal *), sqrt(doublereal); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLABAD takes as input the values computed by DLAMCH for underflow and */ /* overflow, and returns the square root of each of these values if the */ /* log of LARGE is sufficiently large. This subroutine is intended to */ /* identify machines with a large exponent range, such as the Crays, and */ /* redefine the underflow and overflow limits to be the square roots of */ /* the values computed by DLAMCH. This subroutine is needed because */ /* DLAMCH does not compensate for poor arithmetic in the upper half of */ /* the exponent range, as is found on a Cray. */ /* Arguments */ /* ========= */ /* SMALL (input/output) DOUBLE PRECISION */ /* On entry, the underflow threshold as computed by DLAMCH. */ /* On exit, if LOG10(LARGE) is sufficiently large, the square */ /* root of SMALL, otherwise unchanged. */ /* LARGE (input/output) DOUBLE PRECISION */ /* On entry, the overflow threshold as computed by DLAMCH. */ /* On exit, if LOG10(LARGE) is sufficiently large, the square */ /* root of LARGE, otherwise unchanged. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* If it looks like we're on a Cray, take the square root of */ /* SMALL and LARGE to avoid overflow and underflow problems. */ if (d_lg10(large) > 2e3) { *small = sqrt(*small); *large = sqrt(*large); } return 0; /* End of DLABAD */ } /* _starpu_dlabad_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlaisnan.c000066400000000000000000000035401507764646700217360ustar00rootroot00000000000000/* dlaisnan.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" logical _starpu_dlaisnan_(doublereal *din1, doublereal *din2) { /* System generated locals */ logical ret_val; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This routine is not for general use. It exists solely to avoid */ /* over-optimization in DISNAN. */ /* DLAISNAN checks for NaNs by comparing its two arguments for */ /* inequality. NaN is the only floating-point value where NaN != NaN */ /* returns .TRUE. To check for NaNs, pass the same variable as both */ /* arguments. */ /* A compiler must assume that the two arguments are */ /* not the same variable, and the test will not be optimized away. */ /* Interprocedural or whole-program optimization may delete this */ /* test. The ISNAN functions will be replaced by the correct */ /* Fortran 03 intrinsic once the intrinsic is widely available. */ /* Arguments */ /* ========= */ /* DIN1 (input) DOUBLE PRECISION */ /* DIN2 (input) DOUBLE PRECISION */ /* Two numbers to compare for inequality. */ /* ===================================================================== */ /* .. Executable Statements .. */ ret_val = *din1 != *din2; return ret_val; } /* _starpu_dlaisnan_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlamch.c000066400000000000000000000647461507764646700214140ustar00rootroot00000000000000/* dlamch.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b32 = 0.; doublereal _starpu_dlamch_(char *cmach) { /* Initialized data */ static logical first = TRUE_; /* System generated locals */ integer i__1; doublereal ret_val; /* Builtin functions */ double pow_di(doublereal *, integer *); /* Local variables */ static doublereal t; integer it; static doublereal rnd, eps, base; integer beta; static doublereal emin, prec, emax; integer imin, imax; logical lrnd; static doublereal rmin, rmax; doublereal rmach; extern logical _starpu_lsame_(char *, char *); doublereal small; static doublereal sfmin; extern /* Subroutine */ int _starpu_dlamc2_(integer *, integer *, logical *, doublereal *, integer *, doublereal *, integer *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMCH determines double precision machine parameters. */ /* Arguments */ /* ========= */ /* CMACH (input) CHARACTER*1 */ /* Specifies the value to be returned by DLAMCH: */ /* = 'E' or 'e', DLAMCH := eps */ /* = 'S' or 's , DLAMCH := sfmin */ /* = 'B' or 'b', DLAMCH := base */ /* = 'P' or 'p', DLAMCH := eps*base */ /* = 'N' or 'n', DLAMCH := t */ /* = 'R' or 'r', DLAMCH := rnd */ /* = 'M' or 'm', DLAMCH := emin */ /* = 'U' or 'u', DLAMCH := rmin */ /* = 'L' or 'l', DLAMCH := emax */ /* = 'O' or 'o', DLAMCH := rmax */ /* where */ /* eps = relative machine precision */ /* sfmin = safe minimum, such that 1/sfmin does not overflow */ /* base = base of the machine */ /* prec = eps*base */ /* t = number of (base) digits in the mantissa */ /* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */ /* emin = minimum exponent before (gradual) underflow */ /* rmin = underflow threshold - base**(emin-1) */ /* emax = largest exponent before overflow */ /* rmax = overflow threshold - (base**emax)*(1-eps) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { _starpu_dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); base = (doublereal) beta; t = (doublereal) it; if (lrnd) { rnd = 1.; i__1 = 1 - it; eps = pow_di(&base, &i__1) / 2; } else { rnd = 0.; i__1 = 1 - it; eps = pow_di(&base, &i__1); } prec = eps * base; emin = (doublereal) imin; emax = (doublereal) imax; sfmin = rmin; small = 1. / rmax; if (small >= sfmin) { /* Use SMALL plus a bit, to avoid the possibility of rounding */ /* causing overflow when computing 1/sfmin. */ sfmin = small * (eps + 1.); } } if (_starpu_lsame_(cmach, "E")) { rmach = eps; } else if (_starpu_lsame_(cmach, "S")) { rmach = sfmin; } else if (_starpu_lsame_(cmach, "B")) { rmach = base; } else if (_starpu_lsame_(cmach, "P")) { rmach = prec; } else if (_starpu_lsame_(cmach, "N")) { rmach = t; } else if (_starpu_lsame_(cmach, "R")) { rmach = rnd; } else if (_starpu_lsame_(cmach, "M")) { rmach = emin; } else if (_starpu_lsame_(cmach, "U")) { rmach = rmin; } else if (_starpu_lsame_(cmach, "L")) { rmach = emax; } else if (_starpu_lsame_(cmach, "O")) { rmach = rmax; } ret_val = rmach; first = FALSE_; return ret_val; /* End of DLAMCH */ } /* _starpu_dlamch_ */ /* *********************************************************************** */ /* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1) { /* Initialized data */ static logical first = TRUE_; /* System generated locals */ doublereal d__1, d__2; /* Local variables */ doublereal a, b, c__, f, t1, t2; static integer lt; doublereal one, qtr; static logical lrnd; static integer lbeta; doublereal savec; extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); static logical lieee1; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC1 determines the machine parameters given by BETA, T, RND, and */ /* IEEE1. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* IEEE1 (output) LOGICAL */ /* Specifies whether rounding appears to be done in the IEEE */ /* 'round to nearest' style. */ /* Further Details */ /* =============== */ /* The routine is based on the routine ENVRON by Malcolm and */ /* incorporates suggestions by Gentleman and Marovich. See */ /* Malcolm M. A. (1972) Algorithms to reveal properties of */ /* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ /* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ /* that reveal properties of floating point arithmetic units. */ /* Comms. of the ACM, 17, 276-277. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { one = 1.; /* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ /* IEEE1, T and RND. */ /* Throughout this routine we use the function DLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* Compute a = 2.0**m with the smallest positive integer m such */ /* that */ /* fl( a + 1.0 ) = a. */ a = 1.; c__ = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L10: if (c__ == one) { a *= 2; c__ = _starpu_dlamc3_(&a, &one); d__1 = -a; c__ = _starpu_dlamc3_(&c__, &d__1); goto L10; } /* + END WHILE */ /* Now compute b = 2.0**m with the smallest positive integer m */ /* such that */ /* fl( a + b ) .gt. a. */ b = 1.; c__ = _starpu_dlamc3_(&a, &b); /* + WHILE( C.EQ.A )LOOP */ L20: if (c__ == a) { b *= 2; c__ = _starpu_dlamc3_(&a, &b); goto L20; } /* + END WHILE */ /* Now compute the base. a and c are neighbouring floating point */ /* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ /* their difference is beta. Adding 0.25 to c is to ensure that it */ /* is truncated to beta and not ( beta - 1 ). */ qtr = one / 4; savec = c__; d__1 = -a; c__ = _starpu_dlamc3_(&c__, &d__1); lbeta = (integer) (c__ + qtr); /* Now determine whether rounding or chopping occurs, by adding a */ /* bit less than beta/2 and a bit more than beta/2 to a. */ b = (doublereal) lbeta; d__1 = b / 2; d__2 = -b / 100; f = _starpu_dlamc3_(&d__1, &d__2); c__ = _starpu_dlamc3_(&f, &a); if (c__ == a) { lrnd = TRUE_; } else { lrnd = FALSE_; } d__1 = b / 2; d__2 = b / 100; f = _starpu_dlamc3_(&d__1, &d__2); c__ = _starpu_dlamc3_(&f, &a); if (lrnd && c__ == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to */ /* nearest' style. B/2 is half a unit in the last place of the two */ /* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ /* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ /* A, but adding B/2 to SAVEC should change SAVEC. */ d__1 = b / 2; t1 = _starpu_dlamc3_(&d__1, &a); d__1 = b / 2; t2 = _starpu_dlamc3_(&d__1, &savec); lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the integer part of */ /* log to the base beta of a, however it is safer to determine t */ /* by powering. So we find t as the smallest positive integer for */ /* which */ /* fl( beta**t + 1.0 ) = 1.0. */ lt = 0; a = 1.; c__ = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L30: if (c__ == one) { ++lt; a *= lbeta; c__ = _starpu_dlamc3_(&a, &one); d__1 = -a; c__ = _starpu_dlamc3_(&c__, &d__1); goto L30; } /* + END WHILE */ } *beta = lbeta; *t = lt; *rnd = lrnd; *ieee1 = lieee1; first = FALSE_; return 0; /* End of DLAMC1 */ } /* _starpu_dlamc1_ */ /* *********************************************************************** */ /* Subroutine */ int _starpu_dlamc2_(integer *beta, integer *t, logical *rnd, doublereal *eps, integer *emin, doublereal *rmin, integer *emax, doublereal *rmax) { /* Initialized data */ static logical first = TRUE_; static logical iwarn = FALSE_; /* Format strings */ static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" " the IF block as marked within the code of routine\002,\002 DLAM" "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; /* System generated locals */ integer i__1; doublereal d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double pow_di(doublereal *, integer *); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublereal a, b, c__; integer i__; static integer lt; doublereal one, two; logical ieee; doublereal half; logical lrnd; static doublereal leps; doublereal zero; static integer lbeta; doublereal rbase; static integer lemin, lemax; integer gnmin; doublereal small; integer gpmin; doublereal third; static doublereal lrmin, lrmax; doublereal sixth; extern /* Subroutine */ int _starpu_dlamc1_(integer *, integer *, logical *, logical *); extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); logical lieee1; extern /* Subroutine */ int _starpu_dlamc4_(integer *, doublereal *, integer *), _starpu_dlamc5_(integer *, integer *, integer *, logical *, integer *, doublereal *); integer ngnmin, ngpmin; /* Fortran I/O blocks */ static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC2 determines the machine parameters specified in its argument */ /* list. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* EPS (output) DOUBLE PRECISION */ /* The smallest positive number such that */ /* fl( 1.0 - EPS ) .LT. 1.0, */ /* where fl denotes the computed value. */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow occurs. */ /* RMIN (output) DOUBLE PRECISION */ /* The smallest normalized number for the machine, given by */ /* BASE**( EMIN - 1 ), where BASE is the floating point value */ /* of BETA. */ /* EMAX (output) INTEGER */ /* The maximum exponent before overflow occurs. */ /* RMAX (output) DOUBLE PRECISION */ /* The largest positive number for the machine, given by */ /* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ /* value of BETA. */ /* Further Details */ /* =============== */ /* The computation of EPS is based on a routine PARANOIA by */ /* W. Kahan of the University of California at Berkeley. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { zero = 0.; one = 1.; two = 2.; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ /* BETA, T, RND, EPS, EMIN and RMIN. */ /* Throughout this routine we use the function DLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ _starpu_dlamc1_(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ b = (doublereal) lbeta; i__1 = -lt; a = pow_di(&b, &i__1); leps = a; /* Try some tricks to see whether or not this is the correct EPS. */ b = two / 3; half = one / 2; d__1 = -half; sixth = _starpu_dlamc3_(&b, &d__1); third = _starpu_dlamc3_(&sixth, &sixth); d__1 = -half; b = _starpu_dlamc3_(&third, &d__1); b = _starpu_dlamc3_(&b, &sixth); b = abs(b); if (b < leps) { b = leps; } leps = 1.; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ L10: if (leps > b && b > zero) { leps = b; d__1 = half * leps; /* Computing 5th power */ d__3 = two, d__4 = d__3, d__3 *= d__3; /* Computing 2nd power */ d__5 = leps; d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5); c__ = _starpu_dlamc3_(&d__1, &d__2); d__1 = -c__; c__ = _starpu_dlamc3_(&half, &d__1); b = _starpu_dlamc3_(&half, &c__); d__1 = -b; c__ = _starpu_dlamc3_(&half, &d__1); b = _starpu_dlamc3_(&half, &c__); goto L10; } /* + END WHILE */ if (a < leps) { leps = a; } /* Computation of EPS complete. */ /* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ /* Keep dividing A by BETA until (gradual) underflow occurs. This */ /* is detected when we cannot recover the previous A. */ rbase = one / lbeta; small = one; for (i__ = 1; i__ <= 3; ++i__) { d__1 = small * rbase; small = _starpu_dlamc3_(&d__1, &zero); /* L20: */ } a = _starpu_dlamc3_(&one, &small); _starpu_dlamc4_(&ngpmin, &one, &lbeta); d__1 = -one; _starpu_dlamc4_(&ngnmin, &d__1, &lbeta); _starpu_dlamc4_(&gpmin, &a, &lbeta); d__1 = -a; _starpu_dlamc4_(&gnmin, &d__1, &lbeta); ieee = FALSE_; if (ngpmin == ngnmin && gpmin == gnmin) { if (ngpmin == gpmin) { lemin = ngpmin; /* ( Non twos-complement machines, no gradual underflow; */ /* e.g., VAX ) */ } else if (gpmin - ngpmin == 3) { lemin = ngpmin - 1 + lt; ieee = TRUE_; /* ( Non twos-complement machines, with gradual underflow; */ /* e.g., IEEE standard followers ) */ } else { lemin = min(ngpmin,gpmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if (ngpmin == gpmin && ngnmin == gnmin) { if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { lemin = max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow; */ /* e.g., CYBER 205 ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) { if (gpmin - min(ngpmin,ngnmin) == 3) { lemin = max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflow; */ /* no known machine ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else { /* Computing MIN */ i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); lemin = min(i__1,gnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } first = FALSE_; /* ** */ /* Comment out this if block if EMIN is ok */ if (iwarn) { first = TRUE_; s_wsfe(&io___58); do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); e_wsfe(); } /* ** */ /* Assume IEEE arithmetic if we found denormalised numbers above, */ /* or if arithmetic seems to round in the IEEE style, determined */ /* in routine DLAMC1. A true IEEE machine should have both things */ /* true; however, faulty machines may have one or the other. */ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could compute */ /* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ /* this computation. */ lrmin = 1.; i__1 = 1 - lemin; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = lrmin * rbase; lrmin = _starpu_dlamc3_(&d__1, &zero); /* L30: */ } /* Finally, call DLAMC5 to compute EMAX and RMAX. */ _starpu_dlamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); } *beta = lbeta; *t = lt; *rnd = lrnd; *eps = leps; *emin = lemin; *rmin = lrmin; *emax = lemax; *rmax = lrmax; return 0; /* End of DLAMC2 */ } /* _starpu_dlamc2_ */ /* *********************************************************************** */ doublereal _starpu_dlamc3_(doublereal *a, doublereal *b) { /* System generated locals */ doublereal ret_val; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC3 is intended to force A and B to be stored prior to doing */ /* the addition of A and B , for use in situations where optimizers */ /* might hold one of these in a register. */ /* Arguments */ /* ========= */ /* A (input) DOUBLE PRECISION */ /* B (input) DOUBLE PRECISION */ /* The values A and B. */ /* ===================================================================== */ /* .. Executable Statements .. */ ret_val = *a + *b; return ret_val; /* End of DLAMC3 */ } /* _starpu_dlamc3_ */ /* *********************************************************************** */ /* Subroutine */ int _starpu_dlamc4_(integer *emin, doublereal *start, integer *base) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ doublereal a; integer i__; doublereal b1, b2, c1, c2, d1, d2, one, zero, rbase; extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC4 is a service routine for DLAMC2. */ /* Arguments */ /* ========= */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow, computed by */ /* setting A = START and dividing by BASE until the previous A */ /* can not be recovered. */ /* START (input) DOUBLE PRECISION */ /* The starting point for determining EMIN. */ /* BASE (input) INTEGER */ /* The base of the machine. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ a = *start; one = 1.; rbase = one / *base; zero = 0.; *emin = 1; d__1 = a * rbase; b1 = _starpu_dlamc3_(&d__1, &zero); c1 = a; c2 = a; d1 = a; d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ /* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ L10: if (c1 == a && c2 == a && d1 == a && d2 == a) { --(*emin); a = b1; d__1 = a / *base; b1 = _starpu_dlamc3_(&d__1, &zero); d__1 = b1 * *base; c1 = _starpu_dlamc3_(&d__1, &zero); d1 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d1 += b1; /* L20: */ } d__1 = a * rbase; b2 = _starpu_dlamc3_(&d__1, &zero); d__1 = b2 / rbase; c2 = _starpu_dlamc3_(&d__1, &zero); d2 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d2 += b2; /* L30: */ } goto L10; } /* + END WHILE */ return 0; /* End of DLAMC4 */ } /* _starpu_dlamc4_ */ /* *********************************************************************** */ /* Subroutine */ int _starpu_dlamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, doublereal *rmax) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ integer i__; doublereal y, z__; integer try__, lexp; doublereal oldy; integer uexp, nbits; extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); doublereal recbas; integer exbits, expsum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC5 attempts to compute RMAX, the largest machine floating-point */ /* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ /* approximately to a power of 2. It will fail on machines where this */ /* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ /* EMAX = 28718). It will also fail if the value supplied for EMIN is */ /* too large (i.e. too close to zero), probably with overflow. */ /* Arguments */ /* ========= */ /* BETA (input) INTEGER */ /* The base of floating-point arithmetic. */ /* P (input) INTEGER */ /* The number of base BETA digits in the mantissa of a */ /* floating-point value. */ /* EMIN (input) INTEGER */ /* The minimum exponent before (gradual) underflow. */ /* IEEE (input) LOGICAL */ /* A logical flag specifying whether or not the arithmetic */ /* system is thought to comply with the IEEE standard. */ /* EMAX (output) INTEGER */ /* The largest exponent before overflow */ /* RMAX (output) DOUBLE PRECISION */ /* The largest machine floating-point number. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* First compute LEXP and UEXP, two powers of 2 that bound */ /* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ /* approximately to the bound that is closest to abs(EMIN). */ /* (EMAX is the exponent of the required number RMAX). */ lexp = 1; exbits = 1; L10: try__ = lexp << 1; if (try__ <= -(*emin)) { lexp = try__; ++exbits; goto L10; } if (lexp == -(*emin)) { uexp = lexp; } else { uexp = try__; ++exbits; } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ /* than or equal to EMIN. EXBITS is the number of bits needed to */ /* store the exponent. */ if (uexp + *emin > -lexp - *emin) { expsum = lexp << 1; } else { expsum = uexp << 1; } /* EXPSUM is the exponent range, approximately equal to */ /* EMAX - EMIN + 1 . */ *emax = expsum + *emin - 1; nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a */ /* floating-point number. */ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a */ /* floating-point number, which is unlikely, or some bits are */ /* not used in the representation of numbers, which is possible, */ /* (e.g. Cray machines) or the mantissa has an implicit bit, */ /* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ /* most likely. We have to assume the last alternative. */ /* If this is true, then we need to reduce EMAX by one because */ /* there must be some way of representing zero in an implicit-bit */ /* system. On machines like Cray, we are reducing EMAX by one */ /* unnecessarily. */ --(*emax); } if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent */ /* for infinity and NaN. */ --(*emax); } /* Now create RMAX, the largest machine number, which should */ /* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ /* First compute 1.0 - BETA**(-P), being careful that the */ /* result is less than 1.0 . */ recbas = 1. / *beta; z__ = *beta - 1.; y = 0.; i__1 = *p; for (i__ = 1; i__ <= i__1; ++i__) { z__ *= recbas; if (y < 1.) { oldy = y; } y = _starpu_dlamc3_(&y, &z__); /* L20: */ } if (y >= 1.) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ i__1 = *emax; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = y * *beta; y = _starpu_dlamc3_(&d__1, &c_b32); /* L30: */ } *rmax = y; return 0; /* End of DLAMC5 */ } /* _starpu_dlamc5_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlange.c000066400000000000000000000121061507764646700213750ustar00rootroot00000000000000/* dlange.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlange_(char *norm, integer *m, integer *n, doublereal *a, integer *lda, doublereal *work) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j; doublereal sum, scale; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANGE returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* real matrix A. */ /* Description */ /* =========== */ /* DLANGE returns the value */ /* DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANGE as described */ /* above. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. When M = 0, */ /* DLANGE is set to zero. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. When N = 0, */ /* DLANGE is set to zero. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(M,1). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= M when NORM = 'I'; otherwise, WORK is not */ /* referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --work; /* Function Body */ if (min(*m,*n) == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); value = max(d__2,d__3); /* L10: */ } /* L20: */ } } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = 0.; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L30: */ } value = max(value,sum); /* L40: */ } } else if (_starpu_lsame_(norm, "I")) { /* Find normI(A). */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L50: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L60: */ } /* L70: */ } value = 0.; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L80: */ } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { _starpu_dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); /* L90: */ } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANGE */ } /* _starpu_dlange_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlapy2.c000066400000000000000000000033661507764646700213460ustar00rootroot00000000000000/* dlapy2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dlapy2_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val, d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal w, z__, xabs, yabs; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary */ /* overflow. */ /* Arguments */ /* ========= */ /* X (input) DOUBLE PRECISION */ /* Y (input) DOUBLE PRECISION */ /* X and Y specify the values x and y. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ xabs = abs(*x); yabs = abs(*y); w = max(xabs,yabs); z__ = min(xabs,yabs); if (z__ == 0.) { ret_val = w; } else { /* Computing 2nd power */ d__1 = z__ / w; ret_val = w * sqrt(d__1 * d__1 + 1.); } return ret_val; /* End of DLAPY2 */ } /* _starpu_dlapy2_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlarf.c000066400000000000000000000130411507764646700212320ustar00rootroot00000000000000/* dlarf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b4 = 1.; static doublereal c_b5 = 0.; static integer c__1 = 1; /* Subroutine */ int _starpu_dlarf_(char *side, integer *m, integer *n, doublereal *v, integer *incv, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work) { /* System generated locals */ integer c_dim1, c_offset; doublereal d__1; /* Local variables */ integer i__; logical applyleft; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer lastc, lastv; extern integer _starpu_iladlc_(integer *, integer *, doublereal *, integer *), _starpu_iladlr_(integer *, integer *, doublereal *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARF applies a real elementary reflector H to a real m by n matrix */ /* C, from either the left or the right. H is represented in the form */ /* H = I - tau * v * v' */ /* where tau is a real scalar and v is a real vector. */ /* If tau = 0, then H is taken to be the unit matrix. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': form H * C */ /* = 'R': form C * H */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* V (input) DOUBLE PRECISION array, dimension */ /* (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ /* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ /* The vector v in the representation of H. V is not used if */ /* TAU = 0. */ /* INCV (input) INTEGER */ /* The increment between elements of v. INCV <> 0. */ /* TAU (input) DOUBLE PRECISION */ /* The value tau in the representation of H. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ /* or C * H if SIDE = 'R'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L' */ /* or (M) if SIDE = 'R' */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --v; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ applyleft = _starpu_lsame_(side, "L"); lastv = 0; lastc = 0; if (*tau != 0.) { /* Set up variables for scanning V. LASTV begins pointing to the end */ /* of V. */ if (applyleft) { lastv = *m; } else { lastv = *n; } if (*incv > 0) { i__ = (lastv - 1) * *incv + 1; } else { i__ = 1; } /* Look for the last non-zero row in V. */ while(lastv > 0 && v[i__] == 0.) { --lastv; i__ -= *incv; } if (applyleft) { /* Scan for the last non-zero column in C(1:lastv,:). */ lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); } else { /* Scan for the last non-zero row in C(:,1:lastv). */ lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); } } /* Note that lastc.eq.0 renders the BLAS operations null; no special */ /* case is needed at this level. */ if (applyleft) { /* Form H * C */ if (lastv > 0) { /* w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */ _starpu_dgemv_("Transpose", &lastv, &lastc, &c_b4, &c__[c_offset], ldc, & v[1], incv, &c_b5, &work[1], &c__1); /* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */ d__1 = -(*tau); _starpu_dger_(&lastv, &lastc, &d__1, &v[1], incv, &work[1], &c__1, &c__[ c_offset], ldc); } } else { /* Form C * H */ if (lastv > 0) { /* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ _starpu_dgemv_("No transpose", &lastc, &lastv, &c_b4, &c__[c_offset], ldc, &v[1], incv, &c_b5, &work[1], &c__1); /* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */ d__1 = -(*tau); _starpu_dger_(&lastc, &lastv, &d__1, &work[1], &c__1, &v[1], incv, &c__[ c_offset], ldc); } } return 0; /* End of DLARF */ } /* _starpu_dlarf_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlarfb.c000066400000000000000000000522171507764646700214040ustar00rootroot00000000000000/* dlarfb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b14 = 1.; static doublereal c_b25 = -1.; /* Subroutine */ int _starpu_dlarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, doublereal *v, integer * ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, doublereal *work, integer *ldwork) { /* System generated locals */ integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, work_offset, i__1, i__2; /* Local variables */ integer i__, j; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); integer lastc; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer lastv; extern integer _starpu_iladlc_(integer *, integer *, doublereal *, integer *), _starpu_iladlr_(integer *, integer *, doublereal *, integer *); char transt[1]; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARFB applies a real block reflector H or its transpose H' to a */ /* real m by n matrix C, from either the left or the right. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply H or H' from the Left */ /* = 'R': apply H or H' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply H (No transpose) */ /* = 'T': apply H' (Transpose) */ /* DIRECT (input) CHARACTER*1 */ /* Indicates how H is formed from a product of elementary */ /* reflectors */ /* = 'F': H = H(1) H(2) . . . H(k) (Forward) */ /* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ /* STOREV (input) CHARACTER*1 */ /* Indicates how the vectors which define the elementary */ /* reflectors are stored: */ /* = 'C': Columnwise */ /* = 'R': Rowwise */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* K (input) INTEGER */ /* The order of the matrix T (= the number of elementary */ /* reflectors whose product defines the block reflector). */ /* V (input) DOUBLE PRECISION array, dimension */ /* (LDV,K) if STOREV = 'C' */ /* (LDV,M) if STOREV = 'R' and SIDE = 'L' */ /* (LDV,N) if STOREV = 'R' and SIDE = 'R' */ /* The matrix V. See further details. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. */ /* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); */ /* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); */ /* if STOREV = 'R', LDV >= K. */ /* T (input) DOUBLE PRECISION array, dimension (LDT,K) */ /* The triangular k by k matrix T in the representation of the */ /* block reflector. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= K. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDA >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */ /* LDWORK (input) INTEGER */ /* The leading dimension of the array WORK. */ /* If SIDE = 'L', LDWORK >= max(1,N); */ /* if SIDE = 'R', LDWORK >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; work_dim1 = *ldwork; work_offset = 1 + work_dim1; work -= work_offset; /* Function Body */ if (*m <= 0 || *n <= 0) { return 0; } if (_starpu_lsame_(trans, "N")) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } if (_starpu_lsame_(storev, "C")) { if (_starpu_lsame_(direct, "F")) { /* Let V = ( V1 ) (first K rows) */ /* ( V2 ) */ /* where V1 is unit lower triangular. */ if (_starpu_lsame_(side, "L")) { /* Form H * C or H' * C where C = ( C1 ) */ /* ( C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlr_(m, k, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); /* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) */ /* W := C1' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1); /* L10: */ } /* W := W * V1 */ _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); if (lastv > *k) { /* W := W + C2'*V2 */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & c_b14, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1], ldv, &c_b14, &work[work_offset], ldwork); } /* W := W * T' or W * T */ _starpu_dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - V * W' */ if (lastv > *k) { /* C2 := C2 - V2 * W' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & c_b25, &v[*k + 1 + v_dim1], ldv, &work[ work_offset], ldwork, &c_b14, &c__[*k + 1 + c_dim1], ldc); } /* W := W * V1' */ _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); /* C1 := C1 - W' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; /* L20: */ } /* L30: */ } } else if (_starpu_lsame_(side, "R")) { /* Form C * H or C * H' where C = ( C1 C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlr_(n, k, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); /* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ /* W := C1 */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &c__1); /* L40: */ } /* W := W * V1 */ _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); if (lastv > *k) { /* W := W + C2 * V2 */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + 1 + v_dim1], ldv, &c_b14, &work[work_offset], ldwork); } /* W := W * T or W * T' */ _starpu_dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - W * V' */ if (lastv > *k) { /* C2 := C2 - W * V2' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & c_b25, &work[work_offset], ldwork, &v[*k + 1 + v_dim1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc); } /* W := W * V1' */ _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); /* C1 := C1 - W */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; /* L50: */ } /* L60: */ } } } else { /* Let V = ( V1 ) */ /* ( V2 ) (last K rows) */ /* where V2 is unit upper triangular. */ if (_starpu_lsame_(side, "L")) { /* Form H * C or H' * C where C = ( C1 ) */ /* ( C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlr_(m, k, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); /* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) */ /* W := C2' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ j * work_dim1 + 1], &c__1); /* L70: */ } /* W := W * V2 */ _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ work_offset], ldwork); if (lastv > *k) { /* W := W + C1'*V1 */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & c_b14, &work[work_offset], ldwork); } /* W := W * T' or W * T */ _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - V * W' */ if (lastv > *k) { /* C1 := C1 - V1 * W' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & c_b25, &v[v_offset], ldv, &work[work_offset], ldwork, &c_b14, &c__[c_offset], ldc); } /* W := W * V2' */ _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ work_offset], ldwork); /* C2 := C2 - W' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * work_dim1]; /* L80: */ } /* L90: */ } } else if (_starpu_lsame_(side, "R")) { /* Form C * H or C * H' where C = ( C1 C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlr_(n, k, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); /* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ /* W := C2 */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, & work[j * work_dim1 + 1], &c__1); /* L100: */ } /* W := W * V2 */ _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ work_offset], ldwork); if (lastv > *k) { /* W := W + C1 * V1 */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & c_b14, &work[work_offset], ldwork); } /* W := W * T or W * T' */ _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - W * V' */ if (lastv > *k) { /* C1 := C1 - W * V1' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & c_b25, &work[work_offset], ldwork, &v[v_offset], ldv, &c_b14, &c__[c_offset], ldc); } /* W := W * V2' */ _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ work_offset], ldwork); /* C2 := C2 - W */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * work_dim1]; /* L110: */ } /* L120: */ } } } } else if (_starpu_lsame_(storev, "R")) { if (_starpu_lsame_(direct, "F")) { /* Let V = ( V1 V2 ) (V1: first K columns) */ /* where V1 is unit upper triangular. */ if (_starpu_lsame_(side, "L")) { /* Form H * C or H' * C where C = ( C1 ) */ /* ( C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlc_(k, m, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); /* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) */ /* W := C1' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1); /* L130: */ } /* W := W * V1' */ _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); if (lastv > *k) { /* W := W + C2'*V2' */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b14, &work[work_offset], ldwork); } /* W := W * T' or W * T */ _starpu_dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - V' * W' */ if (lastv > *k) { /* C2 := C2 - V2' * W' */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, &v[(*k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork, &c_b14, &c__[*k + 1 + c_dim1], ldc); } /* W := W * V1 */ _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); /* C1 := C1 - W' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; /* L140: */ } /* L150: */ } } else if (_starpu_lsame_(side, "R")) { /* Form C * H or C * H' where C = ( C1 C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlc_(k, n, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); /* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) */ /* W := C1 */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &c__1); /* L160: */ } /* W := W * V1' */ _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); if (lastv > *k) { /* W := W + C2 * V2' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b14, &work[work_offset], ldwork); } /* W := W * T or W * T' */ _starpu_dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - W * V */ if (lastv > *k) { /* C2 := C2 - W * V2 */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & c_b25, &work[work_offset], ldwork, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc); } /* W := W * V1 */ _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); /* C1 := C1 - W */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; /* L170: */ } /* L180: */ } } } else { /* Let V = ( V1 V2 ) (V2: last K columns) */ /* where V2 is unit lower triangular. */ if (_starpu_lsame_(side, "L")) { /* Form H * C or H' * C where C = ( C1 ) */ /* ( C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlc_(k, m, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); /* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) */ /* W := C2' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ j * work_dim1 + 1], &c__1); /* L190: */ } /* W := W * V2' */ _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork); if (lastv > *k) { /* W := W + C1'*V1' */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & work[work_offset], ldwork); } /* W := W * T' or W * T */ _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - V' * W' */ if (lastv > *k) { /* C1 := C1 - V1' * W' */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, &v[v_offset], ldv, &work[work_offset], ldwork, & c_b14, &c__[c_offset], ldc); } /* W := W * V2 */ _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork); /* C2 := C2 - W' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * work_dim1]; /* L200: */ } /* L210: */ } } else if (_starpu_lsame_(side, "R")) { /* Form C * H or C * H' where C = ( C1 C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlc_(k, n, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); /* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) */ /* W := C2 */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &c__1); /* L220: */ } /* W := W * V2' */ _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork); if (lastv > *k) { /* W := W + C1 * V1' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & c_b14, &work[work_offset], ldwork); } /* W := W * T or W * T' */ _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - W * V */ if (lastv > *k) { /* C1 := C1 - W * V1 */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & c_b25, &work[work_offset], ldwork, &v[v_offset], ldv, &c_b14, &c__[c_offset], ldc); } /* W := W * V2 */ _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork); /* C1 := C1 - W */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * work_dim1]; /* L230: */ } /* L240: */ } } } } return 0; /* End of DLARFB */ } /* _starpu_dlarfb_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlarfg.c000066400000000000000000000103401507764646700214000ustar00rootroot00000000000000/* dlarfg.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarfg_(integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *tau) { /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *); /* Local variables */ integer j, knt; doublereal beta; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal xnorm; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); doublereal safmin, rsafmn; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARFG generates a real elementary reflector H of order n, such */ /* that */ /* H * ( alpha ) = ( beta ), H' * H = I. */ /* ( x ) ( 0 ) */ /* where alpha and beta are scalars, and x is an (n-1)-element real */ /* vector. H is represented in the form */ /* H = I - tau * ( 1 ) * ( 1 v' ) , */ /* ( v ) */ /* where tau is a real scalar and v is a real (n-1)-element */ /* vector. */ /* If the elements of x are all zero, then tau = 0 and H is taken to be */ /* the unit matrix. */ /* Otherwise 1 <= tau <= 2. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the elementary reflector. */ /* ALPHA (input/output) DOUBLE PRECISION */ /* On entry, the value alpha. */ /* On exit, it is overwritten with the value beta. */ /* X (input/output) DOUBLE PRECISION array, dimension */ /* (1+(N-2)*abs(INCX)) */ /* On entry, the vector x. */ /* On exit, it is overwritten with the vector v. */ /* INCX (input) INTEGER */ /* The increment between elements of X. INCX > 0. */ /* TAU (output) DOUBLE PRECISION */ /* The value tau. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --x; /* Function Body */ if (*n <= 1) { *tau = 0.; return 0; } i__1 = *n - 1; xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); if (xnorm == 0.) { /* H = I */ *tau = 0.; } else { /* general case */ d__1 = _starpu_dlapy2_(alpha, &xnorm); beta = -d_sign(&d__1, alpha); safmin = _starpu_dlamch_("S") / _starpu_dlamch_("E"); knt = 0; if (abs(beta) < safmin) { /* XNORM, BETA may be inaccurate; scale X and recompute them */ rsafmn = 1. / safmin; L10: ++knt; i__1 = *n - 1; _starpu_dscal_(&i__1, &rsafmn, &x[1], incx); beta *= rsafmn; *alpha *= rsafmn; if (abs(beta) < safmin) { goto L10; } /* New BETA is at most 1, at least SAFMIN */ i__1 = *n - 1; xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); d__1 = _starpu_dlapy2_(alpha, &xnorm); beta = -d_sign(&d__1, alpha); } *tau = (beta - *alpha) / beta; i__1 = *n - 1; d__1 = 1. / (*alpha - beta); _starpu_dscal_(&i__1, &d__1, &x[1], incx); /* If ALPHA is subnormal, it may lose relative accuracy */ i__1 = knt; for (j = 1; j <= i__1; ++j) { beta *= safmin; /* L20: */ } *alpha = beta; } return 0; /* End of DLARFG */ } /* _starpu_dlarfg_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlarfp.c000066400000000000000000000116101507764646700214120ustar00rootroot00000000000000/* dlarfp.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarfp_(integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *tau) { /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *); /* Local variables */ integer j, knt; doublereal beta; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal xnorm; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); doublereal safmin, rsafmn; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARFP generates a real elementary reflector H of order n, such */ /* that */ /* H * ( alpha ) = ( beta ), H' * H = I. */ /* ( x ) ( 0 ) */ /* where alpha and beta are scalars, beta is non-negative, and x is */ /* an (n-1)-element real vector. H is represented in the form */ /* H = I - tau * ( 1 ) * ( 1 v' ) , */ /* ( v ) */ /* where tau is a real scalar and v is a real (n-1)-element */ /* vector. */ /* If the elements of x are all zero, then tau = 0 and H is taken to be */ /* the unit matrix. */ /* Otherwise 1 <= tau <= 2. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the elementary reflector. */ /* ALPHA (input/output) DOUBLE PRECISION */ /* On entry, the value alpha. */ /* On exit, it is overwritten with the value beta. */ /* X (input/output) DOUBLE PRECISION array, dimension */ /* (1+(N-2)*abs(INCX)) */ /* On entry, the vector x. */ /* On exit, it is overwritten with the vector v. */ /* INCX (input) INTEGER */ /* The increment between elements of X. INCX > 0. */ /* TAU (output) DOUBLE PRECISION */ /* The value tau. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --x; /* Function Body */ if (*n <= 0) { *tau = 0.; return 0; } i__1 = *n - 1; xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); if (xnorm == 0.) { /* H = [+/-1, 0; I], sign chosen so ALPHA >= 0 */ if (*alpha >= 0.) { /* When TAU.eq.ZERO, the vector is special-cased to be */ /* all zeros in the application routines. We do not need */ /* to clear it. */ *tau = 0.; } else { /* However, the application routines rely on explicit */ /* zero checks when TAU.ne.ZERO, and we must clear X. */ *tau = 2.; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { x[(j - 1) * *incx + 1] = 0.; } *alpha = -(*alpha); } } else { /* general case */ d__1 = _starpu_dlapy2_(alpha, &xnorm); beta = d_sign(&d__1, alpha); safmin = _starpu_dlamch_("S") / _starpu_dlamch_("E"); knt = 0; if (abs(beta) < safmin) { /* XNORM, BETA may be inaccurate; scale X and recompute them */ rsafmn = 1. / safmin; L10: ++knt; i__1 = *n - 1; _starpu_dscal_(&i__1, &rsafmn, &x[1], incx); beta *= rsafmn; *alpha *= rsafmn; if (abs(beta) < safmin) { goto L10; } /* New BETA is at most 1, at least SAFMIN */ i__1 = *n - 1; xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); d__1 = _starpu_dlapy2_(alpha, &xnorm); beta = d_sign(&d__1, alpha); } *alpha += beta; if (beta < 0.) { beta = -beta; *tau = -(*alpha) / beta; } else { *alpha = xnorm * (xnorm / *alpha); *tau = *alpha / beta; *alpha = -(*alpha); } i__1 = *n - 1; d__1 = 1. / *alpha; _starpu_dscal_(&i__1, &d__1, &x[1], incx); /* If BETA is subnormal, it may lose relative accuracy */ i__1 = knt; for (j = 1; j <= i__1; ++j) { beta *= safmin; /* L20: */ } *alpha = beta; } return 0; /* End of DLARFP */ } /* _starpu_dlarfp_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlarft.c000066400000000000000000000231411507764646700214200ustar00rootroot00000000000000/* dlarft.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = 0.; /* Subroutine */ int _starpu_dlarft_(char *direct, char *storev, integer *n, integer * k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, integer *ldt) { /* System generated locals */ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__, j, prevlastv; doublereal vii; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer lastv; extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARFT forms the triangular factor T of a real block reflector H */ /* of order n, which is defined as a product of k elementary reflectors. */ /* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ /* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ /* If STOREV = 'C', the vector which defines the elementary reflector */ /* H(i) is stored in the i-th column of the array V, and */ /* H = I - V * T * V' */ /* If STOREV = 'R', the vector which defines the elementary reflector */ /* H(i) is stored in the i-th row of the array V, and */ /* H = I - V' * T * V */ /* Arguments */ /* ========= */ /* DIRECT (input) CHARACTER*1 */ /* Specifies the order in which the elementary reflectors are */ /* multiplied to form the block reflector: */ /* = 'F': H = H(1) H(2) . . . H(k) (Forward) */ /* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ /* STOREV (input) CHARACTER*1 */ /* Specifies how the vectors which define the elementary */ /* reflectors are stored (see also Further Details): */ /* = 'C': columnwise */ /* = 'R': rowwise */ /* N (input) INTEGER */ /* The order of the block reflector H. N >= 0. */ /* K (input) INTEGER */ /* The order of the triangular factor T (= the number of */ /* elementary reflectors). K >= 1. */ /* V (input/output) DOUBLE PRECISION array, dimension */ /* (LDV,K) if STOREV = 'C' */ /* (LDV,N) if STOREV = 'R' */ /* The matrix V. See further details. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. */ /* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i). */ /* T (output) DOUBLE PRECISION array, dimension (LDT,K) */ /* The k by k triangular factor T of the block reflector. */ /* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ /* lower triangular. The rest of the array is not used. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= K. */ /* Further Details */ /* =============== */ /* The shape of the matrix V and the storage of the vectors which define */ /* the H(i) is best illustrated by the following example with n = 5 and */ /* k = 3. The elements equal to 1 are not stored; the corresponding */ /* array elements are modified but restored on exit. The rest of the */ /* array is not used. */ /* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ /* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ /* ( v1 1 ) ( 1 v2 v2 v2 ) */ /* ( v1 v2 1 ) ( 1 v3 v3 ) */ /* ( v1 v2 v3 ) */ /* ( v1 v2 v3 ) */ /* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ /* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ /* ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ /* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ /* ( 1 v3 ) */ /* ( 1 ) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --tau; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; /* Function Body */ if (*n == 0) { return 0; } if (_starpu_lsame_(direct, "F")) { prevlastv = *n; i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { prevlastv = max(i__,prevlastv); if (tau[i__] == 0.) { /* H(i) = I */ i__2 = i__; for (j = 1; j <= i__2; ++j) { t[j + i__ * t_dim1] = 0.; /* L10: */ } } else { /* general case */ vii = v[i__ + i__ * v_dim1]; v[i__ + i__ * v_dim1] = 1.; if (_starpu_lsame_(storev, "C")) { /* Skip any trailing zeros. */ i__2 = i__ + 1; for (lastv = *n; lastv >= i__2; --lastv) { if (v[lastv + i__ * v_dim1] != 0.) { break; } } j = min(lastv,prevlastv); /* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */ i__2 = j - i__ + 1; i__3 = i__ - 1; d__1 = -tau[i__]; _starpu_dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b8, &t[ i__ * t_dim1 + 1], &c__1); } else { /* Skip any trailing zeros. */ i__2 = i__ + 1; for (lastv = *n; lastv >= i__2; --lastv) { if (v[i__ + lastv * v_dim1] != 0.) { break; } } j = min(lastv,prevlastv); /* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */ i__2 = i__ - 1; i__3 = j - i__ + 1; d__1 = -tau[i__]; _starpu_dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ * v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & c_b8, &t[i__ * t_dim1 + 1], &c__1); } v[i__ + i__ * v_dim1] = vii; /* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ i__2 = i__ - 1; _starpu_dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); t[i__ + i__ * t_dim1] = tau[i__]; if (i__ > 1) { prevlastv = max(prevlastv,lastv); } else { prevlastv = lastv; } } /* L20: */ } } else { prevlastv = 1; for (i__ = *k; i__ >= 1; --i__) { if (tau[i__] == 0.) { /* H(i) = I */ i__1 = *k; for (j = i__; j <= i__1; ++j) { t[j + i__ * t_dim1] = 0.; /* L30: */ } } else { /* general case */ if (i__ < *k) { if (_starpu_lsame_(storev, "C")) { vii = v[*n - *k + i__ + i__ * v_dim1]; v[*n - *k + i__ + i__ * v_dim1] = 1.; /* Skip any leading zeros. */ i__1 = i__ - 1; for (lastv = 1; lastv <= i__1; ++lastv) { if (v[lastv + i__ * v_dim1] != 0.) { break; } } j = max(lastv,prevlastv); /* T(i+1:k,i) := */ /* - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) */ i__1 = *n - *k + i__ - j + 1; i__2 = *k - i__; d__1 = -tau[i__]; _starpu_dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & c__1, &c_b8, &t[i__ + 1 + i__ * t_dim1], & c__1); v[*n - *k + i__ + i__ * v_dim1] = vii; } else { vii = v[i__ + (*n - *k + i__) * v_dim1]; v[i__ + (*n - *k + i__) * v_dim1] = 1.; /* Skip any leading zeros. */ i__1 = i__ - 1; for (lastv = 1; lastv <= i__1; ++lastv) { if (v[i__ + lastv * v_dim1] != 0.) { break; } } j = max(lastv,prevlastv); /* T(i+1:k,i) := */ /* - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' */ i__1 = *k - i__; i__2 = *n - *k + i__ - j + 1; d__1 = -tau[i__]; _starpu_dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], ldv, &c_b8, &t[i__ + 1 + i__ * t_dim1], &c__1); v[i__ + (*n - *k + i__) * v_dim1] = vii; } /* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ i__1 = *k - i__; _starpu_dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * t_dim1], &c__1) ; if (i__ > 1) { prevlastv = min(prevlastv,lastv); } else { prevlastv = lastv; } } t[i__ + i__ * t_dim1] = tau[i__]; } /* L40: */ } } return 0; /* End of DLARFT */ } /* _starpu_dlarft_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlascl.c000066400000000000000000000212241507764646700214060ustar00rootroot00000000000000/* dlascl.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlascl_(char *type__, integer *kl, integer *ku, doublereal *cfrom, doublereal *cto, integer *m, integer *n, doublereal *a, integer *lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ integer i__, j, k1, k2, k3, k4; doublereal mul, cto1; logical done; doublereal ctoc; extern logical _starpu_lsame_(char *, char *); integer itype; doublereal cfrom1; extern doublereal _starpu_dlamch_(char *); doublereal cfromc; extern logical _starpu_disnan_(doublereal *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum, smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASCL multiplies the M by N real matrix A by the real scalar */ /* CTO/CFROM. This is done without over/underflow as long as the final */ /* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that */ /* A may be full, upper triangular, lower triangular, upper Hessenberg, */ /* or banded. */ /* Arguments */ /* ========= */ /* TYPE (input) CHARACTER*1 */ /* TYPE indices the storage type of the input matrix. */ /* = 'G': A is a full matrix. */ /* = 'L': A is a lower triangular matrix. */ /* = 'U': A is an upper triangular matrix. */ /* = 'H': A is an upper Hessenberg matrix. */ /* = 'B': A is a symmetric band matrix with lower bandwidth KL */ /* and upper bandwidth KU and with the only the lower */ /* half stored. */ /* = 'Q': A is a symmetric band matrix with lower bandwidth KL */ /* and upper bandwidth KU and with the only the upper */ /* half stored. */ /* = 'Z': A is a band matrix with lower bandwidth KL and upper */ /* bandwidth KU. */ /* KL (input) INTEGER */ /* The lower bandwidth of A. Referenced only if TYPE = 'B', */ /* 'Q' or 'Z'. */ /* KU (input) INTEGER */ /* The upper bandwidth of A. Referenced only if TYPE = 'B', */ /* 'Q' or 'Z'. */ /* CFROM (input) DOUBLE PRECISION */ /* CTO (input) DOUBLE PRECISION */ /* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed */ /* without over/underflow if the final result CTO*A(I,J)/CFROM */ /* can be represented without over/underflow. CFROM must be */ /* nonzero. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* The matrix to be multiplied by CTO/CFROM. See TYPE for the */ /* storage type. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* INFO (output) INTEGER */ /* 0 - successful exit */ /* <0 - if INFO = -i, the i-th argument had an illegal value. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; if (_starpu_lsame_(type__, "G")) { itype = 0; } else if (_starpu_lsame_(type__, "L")) { itype = 1; } else if (_starpu_lsame_(type__, "U")) { itype = 2; } else if (_starpu_lsame_(type__, "H")) { itype = 3; } else if (_starpu_lsame_(type__, "B")) { itype = 4; } else if (_starpu_lsame_(type__, "Q")) { itype = 5; } else if (_starpu_lsame_(type__, "Z")) { itype = 6; } else { itype = -1; } if (itype == -1) { *info = -1; } else if (*cfrom == 0. || _starpu_disnan_(cfrom)) { *info = -4; } else if (_starpu_disnan_(cto)) { *info = -5; } else if (*m < 0) { *info = -6; } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { *info = -7; } else if (itype <= 3 && *lda < max(1,*m)) { *info = -9; } else if (itype >= 4) { /* Computing MAX */ i__1 = *m - 1; if (*kl < 0 || *kl > max(i__1,0)) { *info = -2; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = *n - 1; if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && *kl != *ku) { *info = -3; } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { *info = -9; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASCL", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *m == 0) { return 0; } /* Get machine parameters */ smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; cfromc = *cfrom; ctoc = *cto; L10: cfrom1 = cfromc * smlnum; if (cfrom1 == cfromc) { /* CFROMC is an inf. Multiply by a correctly signed zero for */ /* finite CTOC, or a NaN if CTOC is infinite. */ mul = ctoc / cfromc; done = TRUE_; cto1 = ctoc; } else { cto1 = ctoc / bignum; if (cto1 == ctoc) { /* CTOC is either 0 or an inf. In both cases, CTOC itself */ /* serves as the correct multiplication factor. */ mul = ctoc; done = TRUE_; cfromc = 1.; } else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { mul = smlnum; done = FALSE_; cfromc = cfrom1; } else if (abs(cto1) > abs(cfromc)) { mul = bignum; done = FALSE_; ctoc = cto1; } else { mul = ctoc / cfromc; done = TRUE_; } } if (itype == 0) { /* Full matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L20: */ } /* L30: */ } } else if (itype == 1) { /* Lower triangular matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L40: */ } /* L50: */ } } else if (itype == 2) { /* Upper triangular matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = min(j,*m); for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L60: */ } /* L70: */ } } else if (itype == 3) { /* Upper Hessenberg matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = j + 1; i__2 = min(i__3,*m); for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L80: */ } /* L90: */ } } else if (itype == 4) { /* Lower half of a symmetric band matrix */ k3 = *kl + 1; k4 = *n + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = k3, i__4 = k4 - j; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L100: */ } /* L110: */ } } else if (itype == 5) { /* Upper half of a symmetric band matrix */ k1 = *ku + 2; k3 = *ku + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = k1 - j; i__3 = k3; for (i__ = max(i__2,1); i__ <= i__3; ++i__) { a[i__ + j * a_dim1] *= mul; /* L120: */ } /* L130: */ } } else if (itype == 6) { /* Band matrix */ k1 = *kl + *ku + 2; k2 = *kl + 1; k3 = (*kl << 1) + *ku + 1; k4 = *kl + *ku + 1 + *m; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__3 = k1 - j; /* Computing MIN */ i__4 = k3, i__5 = k4 - j; i__2 = min(i__4,i__5); for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L140: */ } /* L150: */ } } if (! done) { goto L10; } return 0; /* End of DLASCL */ } /* _starpu_dlascl_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlaset.c000066400000000000000000000101001507764646700214070ustar00rootroot00000000000000/* dlaset.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaset_(char *uplo, integer *m, integer *n, doublereal * alpha, doublereal *beta, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j; extern logical _starpu_lsame_(char *, char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASET initializes an m-by-n matrix A to BETA on the diagonal and */ /* ALPHA on the offdiagonals. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies the part of the matrix A to be set. */ /* = 'U': Upper triangular part is set; the strictly lower */ /* triangular part of A is not changed. */ /* = 'L': Lower triangular part is set; the strictly upper */ /* triangular part of A is not changed. */ /* Otherwise: All of the matrix A is set. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* ALPHA (input) DOUBLE PRECISION */ /* The constant to which the offdiagonal elements are to be set. */ /* BETA (input) DOUBLE PRECISION */ /* The constant to which the diagonal elements are to be set. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On exit, the leading m-by-n submatrix of A is set as follows: */ /* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */ /* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */ /* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */ /* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (_starpu_lsame_(uplo, "U")) { /* Set the strictly upper triangular or trapezoidal part of the */ /* array to ALPHA. */ i__1 = *n; for (j = 2; j <= i__1; ++j) { /* Computing MIN */ i__3 = j - 1; i__2 = min(i__3,*m); for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = *alpha; /* L10: */ } /* L20: */ } } else if (_starpu_lsame_(uplo, "L")) { /* Set the strictly lower triangular or trapezoidal part of the */ /* array to ALPHA. */ i__1 = min(*m,*n); for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j + 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = *alpha; /* L30: */ } /* L40: */ } } else { /* Set the leading m-by-n submatrix to ALPHA. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = *alpha; /* L50: */ } /* L60: */ } } /* Set the first min(M,N) diagonal elements to BETA. */ i__1 = min(*m,*n); for (i__ = 1; i__ <= i__1; ++i__) { a[i__ + i__ * a_dim1] = *beta; /* L70: */ } return 0; /* End of DLASET */ } /* _starpu_dlaset_ */ starpu-1.4.9+dfsg/min-dgels/additional/dlassq.c000066400000000000000000000064141507764646700214370ustar00rootroot00000000000000/* dlassq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlassq_(integer *n, doublereal *x, integer *incx, doublereal *scale, doublereal *sumsq) { /* System generated locals */ integer i__1, i__2; doublereal d__1; /* Local variables */ integer ix; doublereal absxi; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASSQ returns the values scl and smsq such that */ /* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */ /* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is */ /* assumed to be non-negative and scl returns the value */ /* scl = max( scale, abs( x( i ) ) ). */ /* scale and sumsq must be supplied in SCALE and SUMSQ and */ /* scl and smsq are overwritten on SCALE and SUMSQ respectively. */ /* The routine makes only one pass through the vector x. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of elements to be used from the vector X. */ /* X (input) DOUBLE PRECISION array, dimension (N) */ /* The vector for which a scaled sum of squares is computed. */ /* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */ /* INCX (input) INTEGER */ /* The increment between successive values of the vector X. */ /* INCX > 0. */ /* SCALE (input/output) DOUBLE PRECISION */ /* On entry, the value scale in the equation above. */ /* On exit, SCALE is overwritten with scl , the scaling factor */ /* for the sum of squares. */ /* SUMSQ (input/output) DOUBLE PRECISION */ /* On entry, the value sumsq in the equation above. */ /* On exit, SUMSQ is overwritten with smsq , the basic sum of */ /* squares from which scl has been factored out. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --x; /* Function Body */ if (*n > 0) { i__1 = (*n - 1) * *incx + 1; i__2 = *incx; for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { if (x[ix] != 0.) { absxi = (d__1 = x[ix], abs(d__1)); if (*scale < absxi) { /* Computing 2nd power */ d__1 = *scale / absxi; *sumsq = *sumsq * (d__1 * d__1) + 1; *scale = absxi; } else { /* Computing 2nd power */ d__1 = absxi / *scale; *sumsq += d__1 * d__1; } } /* L10: */ } } return 0; /* End of DLASSQ */ } /* _starpu_dlassq_ */ starpu-1.4.9+dfsg/min-dgels/additional/dnrm2.c000066400000000000000000000042701507764646700211700ustar00rootroot00000000000000/* dnrm2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dnrm2_(integer *n, doublereal *x, integer *incx) { /* System generated locals */ integer i__1, i__2; doublereal ret_val, d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer ix; doublereal ssq, norm, scale, absxi; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DNRM2 returns the euclidean norm of a vector via the function */ /* name, so that */ /* DNRM2 := sqrt( x'*x ) */ /* -- This version written on 25-October-1982. */ /* Modified on 14-October-1993 to inline the call to DLASSQ. */ /* Sven Hammarling, Nag Ltd. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --x; /* Function Body */ if (*n < 1 || *incx < 1) { norm = 0.; } else if (*n == 1) { norm = abs(x[1]); } else { scale = 0.; ssq = 1.; /* The following loop is equivalent to this call to the LAPACK */ /* auxiliary routine: */ /* CALL DLASSQ( N, X, INCX, SCALE, SSQ ) */ i__1 = (*n - 1) * *incx + 1; i__2 = *incx; for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { if (x[ix] != 0.) { absxi = (d__1 = x[ix], abs(d__1)); if (scale < absxi) { /* Computing 2nd power */ d__1 = scale / absxi; ssq = ssq * (d__1 * d__1) + 1.; scale = absxi; } else { /* Computing 2nd power */ d__1 = absxi / scale; ssq += d__1 * d__1; } } /* L10: */ } norm = scale * sqrt(ssq); } ret_val = norm; return ret_val; /* End of DNRM2. */ } /* _starpu_dnrm2_ */ starpu-1.4.9+dfsg/min-dgels/additional/dorm2r.c000066400000000000000000000141501507764646700213510ustar00rootroot00000000000000/* dorm2r.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dorm2r_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ integer i__, i1, i2, i3, ic, jc, mi, ni, nq; doublereal aii; logical left; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORM2R overwrites the general real m by n matrix C with */ /* Q * C if SIDE = 'L' and TRANS = 'N', or */ /* Q'* C if SIDE = 'L' and TRANS = 'T', or */ /* C * Q if SIDE = 'R' and TRANS = 'N', or */ /* C * Q' if SIDE = 'R' and TRANS = 'T', */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q' from the Left */ /* = 'R': apply Q or Q' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply Q (No transpose) */ /* = 'T': apply Q' (Transpose) */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ /* The i-th column must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGEQRF in the first k columns of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* If SIDE = 'L', LDA >= max(1,M); */ /* if SIDE = 'R', LDA >= max(1,N). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQRF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L', */ /* (M) if SIDE = 'R' */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,nq)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORM2R", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { return 0; } if (left && ! notran || ! left && notran) { i1 = 1; i2 = *k; i3 = 1; } else { i1 = *k; i2 = 1; i3 = -1; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { if (left) { /* H(i) is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H(i) is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H(i) */ aii = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; _starpu_dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ ic + jc * c_dim1], ldc, &work[1]); a[i__ + i__ * a_dim1] = aii; /* L10: */ } return 0; /* End of DORM2R */ } /* _starpu_dorm2r_ */ starpu-1.4.9+dfsg/min-dgels/additional/dorml2.c000066400000000000000000000141061507764646700213440ustar00rootroot00000000000000/* dorml2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dorml2_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ integer i__, i1, i2, i3, ic, jc, mi, ni, nq; doublereal aii; logical left; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORML2 overwrites the general real m by n matrix C with */ /* Q * C if SIDE = 'L' and TRANS = 'N', or */ /* Q'* C if SIDE = 'L' and TRANS = 'T', or */ /* C * Q if SIDE = 'R' and TRANS = 'N', or */ /* C * Q' if SIDE = 'R' and TRANS = 'T', */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q' from the Left */ /* = 'R': apply Q or Q' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply Q (No transpose) */ /* = 'T': apply Q' (Transpose) */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L', */ /* (LDA,N) if SIDE = 'R' */ /* The i-th row must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGELQF in the first k rows of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,K). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGELQF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L', */ /* (M) if SIDE = 'R' */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,*k)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORML2", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { return 0; } if (left && notran || ! left && ! notran) { i1 = 1; i2 = *k; i3 = 1; } else { i1 = *k; i2 = 1; i3 = -1; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { if (left) { /* H(i) is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H(i) is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H(i) */ aii = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; _starpu_dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ ic + jc * c_dim1], ldc, &work[1]); a[i__ + i__ * a_dim1] = aii; /* L10: */ } return 0; /* End of DORML2 */ } /* _starpu_dorml2_ */ starpu-1.4.9+dfsg/min-dgels/additional/dormlq.c000066400000000000000000000225331507764646700214460ustar00rootroot00000000000000/* dormlq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static integer c__65 = 65; /* Subroutine */ int _starpu_dormlq_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, i__5; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i__; doublereal t[4160] /* was [65][64] */; integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; logical left; extern logical _starpu_lsame_(char *, char *); integer nbmin, iinfo; extern /* Subroutine */ int _starpu_dorml2_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); logical notran; integer ldwork; char transt[1]; integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMLQ overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L', */ /* (LDA,N) if SIDE = 'R' */ /* The i-th row must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGELQF in the first k rows of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,K). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGELQF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,*k)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*lwork < max(1,nw) && ! lquery) { *info = -12; } if (*info == 0) { /* Determine the block size. NB may be at most NBMAX, where NBMAX */ /* is used to define the local array T. */ /* Computing MIN */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1); nb = min(i__1,i__2); lwkopt = max(1,nw) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMLQ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { work[1] = 1.; return 0; } nbmin = 2; ldwork = nw; if (nb > 1 && nb < *k) { iws = nw * nb; if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1); nbmin = max(i__1,i__2); } } else { iws = nw; } if (nb < nbmin || nb >= *k) { /* Use unblocked code */ _starpu_dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], &iinfo); } else { /* Use blocked code */ if (left && notran || ! left && ! notran) { i1 = 1; i2 = *k; i3 = nb; } else { i1 = (*k - 1) / nb * nb + 1; i2 = 1; i3 = -nb; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__4 = nb, i__5 = *k - i__ + 1; ib = min(i__4,i__5); /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__4 = nq - i__ + 1; _starpu_dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], t, &c__65); if (left) { /* H or H' is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H or H' is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H or H' */ _starpu_dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], ldc, &work[1], &ldwork); /* L10: */ } } work[1] = (doublereal) lwkopt; return 0; /* End of DORMLQ */ } /* _starpu_dormlq_ */ starpu-1.4.9+dfsg/min-dgels/additional/dormqr.c000066400000000000000000000223061507764646700214520ustar00rootroot00000000000000/* dormqr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static integer c__65 = 65; /* Subroutine */ int _starpu_dormqr_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, i__5; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i__; doublereal t[4160] /* was [65][64] */; integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; logical left; extern logical _starpu_lsame_(char *, char *); integer nbmin, iinfo; extern /* Subroutine */ int _starpu_dorm2r_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); logical notran; integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMQR overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ /* The i-th column must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGEQRF in the first k columns of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* If SIDE = 'L', LDA >= max(1,M); */ /* if SIDE = 'R', LDA >= max(1,N). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQRF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,nq)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*lwork < max(1,nw) && ! lquery) { *info = -12; } if (*info == 0) { /* Determine the block size. NB may be at most NBMAX, where NBMAX */ /* is used to define the local array T. */ /* Computing MIN */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1); nb = min(i__1,i__2); lwkopt = max(1,nw) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMQR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { work[1] = 1.; return 0; } nbmin = 2; ldwork = nw; if (nb > 1 && nb < *k) { iws = nw * nb; if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1); nbmin = max(i__1,i__2); } } else { iws = nw; } if (nb < nbmin || nb >= *k) { /* Use unblocked code */ _starpu_dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], &iinfo); } else { /* Use blocked code */ if (left && ! notran || ! left && notran) { i1 = 1; i2 = *k; i3 = nb; } else { i1 = (*k - 1) / nb * nb + 1; i2 = 1; i3 = -nb; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__4 = nb, i__5 = *k - i__ + 1; ib = min(i__4,i__5); /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__4 = nq - i__ + 1; _starpu_dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], t, &c__65) ; if (left) { /* H or H' is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H or H' is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H or H' */ _starpu_dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], ldc, &work[1], &ldwork); /* L10: */ } } work[1] = (doublereal) lwkopt; return 0; /* End of DORMQR */ } /* _starpu_dormqr_ */ starpu-1.4.9+dfsg/min-dgels/additional/dscal.c000066400000000000000000000041561507764646700212370ustar00rootroot00000000000000/* dscal.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dscal_(integer *n, doublereal *da, doublereal *dx, integer *incx) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, m, mp1, nincx; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* * */ /* scales a vector by a constant. */ /* uses unrolled loops for increment equal to one. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 3/93 to return if incx .le. 0. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --dx; /* Function Body */ if (*n <= 0 || *incx <= 0) { return 0; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ nincx = *n * *incx; i__1 = nincx; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { dx[i__] = *da * dx[i__]; /* L10: */ } return 0; /* code for increment equal to 1 */ /* clean-up loop */ L20: m = *n % 5; if (m == 0) { goto L40; } i__2 = m; for (i__ = 1; i__ <= i__2; ++i__) { dx[i__] = *da * dx[i__]; /* L30: */ } if (*n < 5) { return 0; } L40: mp1 = m + 1; i__2 = *n; for (i__ = mp1; i__ <= i__2; i__ += 5) { dx[i__] = *da * dx[i__]; dx[i__ + 1] = *da * dx[i__ + 1]; dx[i__ + 2] = *da * dx[i__ + 2]; dx[i__ + 3] = *da * dx[i__ + 3]; dx[i__ + 4] = *da * dx[i__ + 4]; /* L50: */ } return 0; } /* _starpu_dscal_ */ starpu-1.4.9+dfsg/min-dgels/additional/dtrmm.c000066400000000000000000000265551507764646700213030ustar00rootroot00000000000000/* dtrmm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, k, info; doublereal temp; logical lside; extern logical _starpu_lsame_(char *, char *); integer nrowa; logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRMM performs one of the matrix-matrix operations */ /* B := alpha*op( A )*B, or B := alpha*B*op( A ), */ /* where alpha is a scalar, B is an m by n matrix, A is a unit, or */ /* non-unit, upper or lower triangular matrix and op( A ) is one of */ /* op( A ) = A or op( A ) = A'. */ /* Arguments */ /* ========== */ /* SIDE - CHARACTER*1. */ /* On entry, SIDE specifies whether op( A ) multiplies B from */ /* the left or right as follows: */ /* SIDE = 'L' or 'l' B := alpha*op( A )*B. */ /* SIDE = 'R' or 'r' B := alpha*B*op( A ). */ /* Unchanged on exit. */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix A is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANSA - CHARACTER*1. */ /* On entry, TRANSA specifies the form of op( A ) to be used in */ /* the matrix multiplication as follows: */ /* TRANSA = 'N' or 'n' op( A ) = A. */ /* TRANSA = 'T' or 't' op( A ) = A'. */ /* TRANSA = 'C' or 'c' op( A ) = A'. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit triangular */ /* as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of B. M must be at */ /* least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of B. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. When alpha is */ /* zero then A is not referenced and B need not be set before */ /* entry. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */ /* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ /* Before entry with UPLO = 'U' or 'u', the leading k by k */ /* upper triangular part of the array A must contain the upper */ /* triangular matrix and the strictly lower triangular part of */ /* A is not referenced. */ /* Before entry with UPLO = 'L' or 'l', the leading k by k */ /* lower triangular part of the array A must contain the lower */ /* triangular matrix and the strictly upper triangular part of */ /* A is not referenced. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced either, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When SIDE = 'L' or 'l' then */ /* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ /* then LDA must be at least max( 1, n ). */ /* Unchanged on exit. */ /* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ /* Before entry, the leading m by n part of the array B must */ /* contain the matrix B, and on exit is overwritten by the */ /* transformed matrix. */ /* LDB - INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. LDB must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Level 3 Blas routine. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ lside = _starpu_lsame_(side, "L"); if (lside) { nrowa = *m; } else { nrowa = *n; } nounit = _starpu_lsame_(diag, "N"); upper = _starpu_lsame_(uplo, "U"); info = 0; if (! lside && ! _starpu_lsame_(side, "R")) { info = 1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { info = 2; } else if (! _starpu_lsame_(transa, "N") && ! _starpu_lsame_(transa, "T") && ! _starpu_lsame_(transa, "C")) { info = 3; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 4; } else if (*m < 0) { info = 5; } else if (*n < 0) { info = 6; } else if (*lda < max(1,nrowa)) { info = 9; } else if (*ldb < max(1,*m)) { info = 11; } if (info != 0) { _starpu_xerbla_("DTRMM ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { return 0; } /* And when alpha.eq.zero. */ if (*alpha == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L10: */ } /* L20: */ } return 0; } /* Start the operations. */ if (lside) { if (_starpu_lsame_(transa, "N")) { /* Form B := alpha*A*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (k = 1; k <= i__2; ++k) { if (b[k + j * b_dim1] != 0.) { temp = *alpha * b[k + j * b_dim1]; i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] += temp * a[i__ + k * a_dim1]; /* L30: */ } if (nounit) { temp *= a[k + k * a_dim1]; } b[k + j * b_dim1] = temp; } /* L40: */ } /* L50: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (k = *m; k >= 1; --k) { if (b[k + j * b_dim1] != 0.) { temp = *alpha * b[k + j * b_dim1]; b[k + j * b_dim1] = temp; if (nounit) { b[k + j * b_dim1] *= a[k + k * a_dim1]; } i__2 = *m; for (i__ = k + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] += temp * a[i__ + k * a_dim1]; /* L60: */ } } /* L70: */ } /* L80: */ } } } else { /* Form B := alpha*A'*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { temp = b[i__ + j * b_dim1]; if (nounit) { temp *= a[i__ + i__ * a_dim1]; } i__2 = i__ - 1; for (k = 1; k <= i__2; ++k) { temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; /* L90: */ } b[i__ + j * b_dim1] = *alpha * temp; /* L100: */ } /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = b[i__ + j * b_dim1]; if (nounit) { temp *= a[i__ + i__ * a_dim1]; } i__3 = *m; for (k = i__ + 1; k <= i__3; ++k) { temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; /* L120: */ } b[i__ + j * b_dim1] = *alpha * temp; /* L130: */ } /* L140: */ } } } } else { if (_starpu_lsame_(transa, "N")) { /* Form B := alpha*B*A. */ if (upper) { for (j = *n; j >= 1; --j) { temp = *alpha; if (nounit) { temp *= a[j + j * a_dim1]; } i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; /* L150: */ } i__1 = j - 1; for (k = 1; k <= i__1; ++k) { if (a[k + j * a_dim1] != 0.) { temp = *alpha * a[k + j * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] += temp * b[i__ + k * b_dim1]; /* L160: */ } } /* L170: */ } /* L180: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = *alpha; if (nounit) { temp *= a[j + j * a_dim1]; } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; /* L190: */ } i__2 = *n; for (k = j + 1; k <= i__2; ++k) { if (a[k + j * a_dim1] != 0.) { temp = *alpha * a[k + j * a_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] += temp * b[i__ + k * b_dim1]; /* L200: */ } } /* L210: */ } /* L220: */ } } } else { /* Form B := alpha*B*A'. */ if (upper) { i__1 = *n; for (k = 1; k <= i__1; ++k) { i__2 = k - 1; for (j = 1; j <= i__2; ++j) { if (a[j + k * a_dim1] != 0.) { temp = *alpha * a[j + k * a_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] += temp * b[i__ + k * b_dim1]; /* L230: */ } } /* L240: */ } temp = *alpha; if (nounit) { temp *= a[k + k * a_dim1]; } if (temp != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; /* L250: */ } } /* L260: */ } } else { for (k = *n; k >= 1; --k) { i__1 = *n; for (j = k + 1; j <= i__1; ++j) { if (a[j + k * a_dim1] != 0.) { temp = *alpha * a[j + k * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] += temp * b[i__ + k * b_dim1]; /* L270: */ } } /* L280: */ } temp = *alpha; if (nounit) { temp *= a[k + k * a_dim1]; } if (temp != 1.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; /* L290: */ } } /* L300: */ } } } } return 0; /* End of DTRMM . */ } /* _starpu_dtrmm_ */ starpu-1.4.9+dfsg/min-dgels/additional/dtrmv.c000066400000000000000000000205511507764646700213020ustar00rootroot00000000000000/* dtrmv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtrmv_(char *uplo, char *trans, char *diag, integer *n, doublereal *a, integer *lda, doublereal *x, integer *incx) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, jx, kx, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular matrix. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := A'*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading n by n */ /* upper triangular part of the array A must contain the upper */ /* triangular matrix and the strictly lower triangular part of */ /* A is not referenced. */ /* Before entry with UPLO = 'L' or 'l', the leading n by n */ /* lower triangular part of the array A must contain the lower */ /* triangular matrix and the strictly upper triangular part of */ /* A is not referenced. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced either, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, n ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { info = 2; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 3; } else if (*n < 0) { info = 4; } else if (*lda < max(1,*n)) { info = 6; } else if (*incx == 0) { info = 8; } if (info != 0) { _starpu_xerbla_("DTRMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = _starpu_lsame_(diag, "N"); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (_starpu_lsame_(trans, "N")) { /* Form x := A*x. */ if (_starpu_lsame_(uplo, "U")) { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = x[j]; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__] += temp * a[i__ + j * a_dim1]; /* L10: */ } if (nounit) { x[j] *= a[j + j * a_dim1]; } } /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { x[ix] += temp * a[i__ + j * a_dim1]; ix += *incx; /* L30: */ } if (nounit) { x[jx] *= a[j + j * a_dim1]; } } jx += *incx; /* L40: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { temp = x[j]; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { x[i__] += temp * a[i__ + j * a_dim1]; /* L50: */ } if (nounit) { x[j] *= a[j + j * a_dim1]; } } /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { x[ix] += temp * a[i__ + j * a_dim1]; ix -= *incx; /* L70: */ } if (nounit) { x[jx] *= a[j + j * a_dim1]; } } jx -= *incx; /* L80: */ } } } } else { /* Form x := A'*x. */ if (_starpu_lsame_(uplo, "U")) { if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; if (nounit) { temp *= a[j + j * a_dim1]; } for (i__ = j - 1; i__ >= 1; --i__) { temp += a[i__ + j * a_dim1] * x[i__]; /* L90: */ } x[j] = temp; /* L100: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = jx; if (nounit) { temp *= a[j + j * a_dim1]; } for (i__ = j - 1; i__ >= 1; --i__) { ix -= *incx; temp += a[i__ + j * a_dim1] * x[ix]; /* L110: */ } x[jx] = temp; jx -= *incx; /* L120: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; if (nounit) { temp *= a[j + j * a_dim1]; } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { temp += a[i__ + j * a_dim1] * x[i__]; /* L130: */ } x[j] = temp; /* L140: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = jx; if (nounit) { temp *= a[j + j * a_dim1]; } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { ix += *incx; temp += a[i__ + j * a_dim1] * x[ix]; /* L150: */ } x[jx] = temp; jx += *incx; /* L160: */ } } } } return 0; /* End of DTRMV . */ } /* _starpu_dtrmv_ */ starpu-1.4.9+dfsg/min-dgels/additional/dtrsm.c000066400000000000000000000301641507764646700213000ustar00rootroot00000000000000/* dtrsm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, k, info; doublereal temp; logical lside; extern logical _starpu_lsame_(char *, char *); integer nrowa; logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRSM solves one of the matrix equations */ /* op( A )*X = alpha*B, or X*op( A ) = alpha*B, */ /* where alpha is a scalar, X and B are m by n matrices, A is a unit, or */ /* non-unit, upper or lower triangular matrix and op( A ) is one of */ /* op( A ) = A or op( A ) = A'. */ /* The matrix X is overwritten on B. */ /* Arguments */ /* ========== */ /* SIDE - CHARACTER*1. */ /* On entry, SIDE specifies whether op( A ) appears on the left */ /* or right of X as follows: */ /* SIDE = 'L' or 'l' op( A )*X = alpha*B. */ /* SIDE = 'R' or 'r' X*op( A ) = alpha*B. */ /* Unchanged on exit. */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix A is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANSA - CHARACTER*1. */ /* On entry, TRANSA specifies the form of op( A ) to be used in */ /* the matrix multiplication as follows: */ /* TRANSA = 'N' or 'n' op( A ) = A. */ /* TRANSA = 'T' or 't' op( A ) = A'. */ /* TRANSA = 'C' or 'c' op( A ) = A'. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit triangular */ /* as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of B. M must be at */ /* least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of B. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. When alpha is */ /* zero then A is not referenced and B need not be set before */ /* entry. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */ /* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ /* Before entry with UPLO = 'U' or 'u', the leading k by k */ /* upper triangular part of the array A must contain the upper */ /* triangular matrix and the strictly lower triangular part of */ /* A is not referenced. */ /* Before entry with UPLO = 'L' or 'l', the leading k by k */ /* lower triangular part of the array A must contain the lower */ /* triangular matrix and the strictly upper triangular part of */ /* A is not referenced. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced either, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When SIDE = 'L' or 'l' then */ /* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ /* then LDA must be at least max( 1, n ). */ /* Unchanged on exit. */ /* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ /* Before entry, the leading m by n part of the array B must */ /* contain the right-hand side matrix B, and on exit is */ /* overwritten by the solution matrix X. */ /* LDB - INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. LDB must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Level 3 Blas routine. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ lside = _starpu_lsame_(side, "L"); if (lside) { nrowa = *m; } else { nrowa = *n; } nounit = _starpu_lsame_(diag, "N"); upper = _starpu_lsame_(uplo, "U"); info = 0; if (! lside && ! _starpu_lsame_(side, "R")) { info = 1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { info = 2; } else if (! _starpu_lsame_(transa, "N") && ! _starpu_lsame_(transa, "T") && ! _starpu_lsame_(transa, "C")) { info = 3; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 4; } else if (*m < 0) { info = 5; } else if (*n < 0) { info = 6; } else if (*lda < max(1,nrowa)) { info = 9; } else if (*ldb < max(1,*m)) { info = 11; } if (info != 0) { _starpu_xerbla_("DTRSM ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { return 0; } /* And when alpha.eq.zero. */ if (*alpha == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L10: */ } /* L20: */ } return 0; } /* Start the operations. */ if (lside) { if (_starpu_lsame_(transa, "N")) { /* Form B := alpha*inv( A )*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*alpha != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] ; /* L30: */ } } for (k = *m; k >= 1; --k) { if (b[k + j * b_dim1] != 0.) { if (nounit) { b[k + j * b_dim1] /= a[k + k * a_dim1]; } i__2 = k - 1; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ i__ + k * a_dim1]; /* L40: */ } } /* L50: */ } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*alpha != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] ; /* L70: */ } } i__2 = *m; for (k = 1; k <= i__2; ++k) { if (b[k + j * b_dim1] != 0.) { if (nounit) { b[k + j * b_dim1] /= a[k + k * a_dim1]; } i__3 = *m; for (i__ = k + 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ i__ + k * a_dim1]; /* L80: */ } } /* L90: */ } /* L100: */ } } } else { /* Form B := alpha*inv( A' )*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = *alpha * b[i__ + j * b_dim1]; i__3 = i__ - 1; for (k = 1; k <= i__3; ++k) { temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; /* L110: */ } if (nounit) { temp /= a[i__ + i__ * a_dim1]; } b[i__ + j * b_dim1] = temp; /* L120: */ } /* L130: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { temp = *alpha * b[i__ + j * b_dim1]; i__2 = *m; for (k = i__ + 1; k <= i__2; ++k) { temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; /* L140: */ } if (nounit) { temp /= a[i__ + i__ * a_dim1]; } b[i__ + j * b_dim1] = temp; /* L150: */ } /* L160: */ } } } } else { if (_starpu_lsame_(transa, "N")) { /* Form B := alpha*B*inv( A ). */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*alpha != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] ; /* L170: */ } } i__2 = j - 1; for (k = 1; k <= i__2; ++k) { if (a[k + j * a_dim1] != 0.) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ i__ + k * b_dim1]; /* L180: */ } } /* L190: */ } if (nounit) { temp = 1. / a[j + j * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; /* L200: */ } } /* L210: */ } } else { for (j = *n; j >= 1; --j) { if (*alpha != 1.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] ; /* L220: */ } } i__1 = *n; for (k = j + 1; k <= i__1; ++k) { if (a[k + j * a_dim1] != 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ i__ + k * b_dim1]; /* L230: */ } } /* L240: */ } if (nounit) { temp = 1. / a[j + j * a_dim1]; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; /* L250: */ } } /* L260: */ } } } else { /* Form B := alpha*B*inv( A' ). */ if (upper) { for (k = *n; k >= 1; --k) { if (nounit) { temp = 1. / a[k + k * a_dim1]; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; /* L270: */ } } i__1 = k - 1; for (j = 1; j <= i__1; ++j) { if (a[j + k * a_dim1] != 0.) { temp = a[j + k * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] -= temp * b[i__ + k * b_dim1]; /* L280: */ } } /* L290: */ } if (*alpha != 1.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] ; /* L300: */ } } /* L310: */ } } else { i__1 = *n; for (k = 1; k <= i__1; ++k) { if (nounit) { temp = 1. / a[k + k * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; /* L320: */ } } i__2 = *n; for (j = k + 1; j <= i__2; ++j) { if (a[j + k * a_dim1] != 0.) { temp = a[j + k * a_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] -= temp * b[i__ + k * b_dim1]; /* L330: */ } } /* L340: */ } if (*alpha != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] ; /* L350: */ } } /* L360: */ } } } } return 0; /* End of DTRSM . */ } /* _starpu_dtrsm_ */ starpu-1.4.9+dfsg/min-dgels/additional/dtrtrs.c000066400000000000000000000126071507764646700214730ustar00rootroot00000000000000/* dtrtrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b12 = 1.; /* Subroutine */ int _starpu_dtrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_( char *, integer *); logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRTRS solves a triangular system of the form */ /* A * X = B or A**T * X = B, */ /* where A is a triangular matrix of order N, and B is an N-by-NRHS */ /* matrix. A check is made to verify that A is nonsingular. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The triangular matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of the array A contains the upper */ /* triangular matrix, and the strictly lower triangular part of */ /* A is not referenced. If UPLO = 'L', the leading N-by-N lower */ /* triangular part of the array A contains the lower triangular */ /* matrix, and the strictly upper triangular part of A is not */ /* referenced. If DIAG = 'U', the diagonal elements of A are */ /* also not referenced and are assumed to be 1. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, if INFO = 0, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element of A is zero, */ /* indicating that the matrix is singular and the solutions */ /* X have not been computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; nounit = _starpu_lsame_(diag, "N"); if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity. */ if (nounit) { i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (a[*info + *info * a_dim1] == 0.) { return 0; } /* L10: */ } } *info = 0; /* Solve A * x = b or A' * x = b. */ _starpu_dtrsm_("Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[ b_offset], ldb); return 0; /* End of DTRTRS */ } /* _starpu_dtrtrs_ */ starpu-1.4.9+dfsg/min-dgels/additional/f2c.h000066400000000000000000000111201507764646700206150ustar00rootroot00000000000000/* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_INCLUDE #define F2C_INCLUDE typedef long int integer; typedef unsigned long int uinteger; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef long int logical; typedef short int shortlogical; typedef char logical1; typedef char integer1; #ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ typedef long long longint; /* system-dependent */ typedef unsigned long long ulongint; /* system-dependent */ #define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) #define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) #endif #define TRUE_ (1) #define FALSE_ (0) /* Extern is for use with -E */ #ifndef Extern #define Extern extern #endif /* I/O stuff */ #ifdef f2c_i2 /* for -i2 */ typedef short flag; typedef short ftnlen; typedef short ftnint; #else typedef long int flag; typedef long int ftnlen; typedef long int ftnint; #endif /*external read, write*/ typedef struct { flag cierr; ftnint ciunit; flag ciend; char *cifmt; ftnint cirec; } cilist; /*internal read, write*/ typedef struct { flag icierr; char *iciunit; flag iciend; char *icifmt; ftnint icirlen; ftnint icirnum; } icilist; /*open*/ typedef struct { flag oerr; ftnint ounit; char *ofnm; ftnlen ofnmlen; char *osta; char *oacc; char *ofm; ftnint orl; char *oblnk; } olist; /*close*/ typedef struct { flag cerr; ftnint cunit; char *csta; } cllist; /*rewind, backspace, endfile*/ typedef struct { flag aerr; ftnint aunit; } alist; /* inquire */ typedef struct { flag inerr; ftnint inunit; char *infile; ftnlen infilen; ftnint *inex; /*parameters in standard's order*/ ftnint *inopen; ftnint *innum; ftnint *innamed; char *inname; ftnlen innamlen; char *inacc; ftnlen inacclen; char *inseq; ftnlen inseqlen; char *indir; ftnlen indirlen; char *infmt; ftnlen infmtlen; char *inform; ftnint informlen; char *inunf; ftnlen inunflen; ftnint *inrecl; ftnint *innrec; char *inblank; ftnlen inblanklen; } inlist; #define VOID void union Multitype { /* for multiple entry points */ integer1 g; shortint h; integer i; /* longint j; */ real r; doublereal d; complex c; doublecomplex z; }; typedef union Multitype Multitype; /*typedef long int Long;*/ /* No longer used; formerly in Namelist */ struct Vardesc { /* for Namelist */ char *name; char *addr; ftnlen *dims; int type; }; typedef struct Vardesc Vardesc; struct Namelist { char *name; Vardesc **vars; int nvars; }; typedef struct Namelist Namelist; #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (doublereal)abs(x) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) #define dmin(a,b) (doublereal)min(a,b) #define dmax(a,b) (doublereal)max(a,b) #define bit_test(a,b) ((a) >> (b) & 1) #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 #ifdef __cplusplus typedef int /* Unknown procedure type */ (*U_fp)(...); typedef shortint (*J_fp)(...); typedef integer (*I_fp)(...); typedef real (*R_fp)(...); typedef doublereal (*D_fp)(...), (*E_fp)(...); typedef /* Complex */ VOID (*C_fp)(...); typedef /* Double Complex */ VOID (*Z_fp)(...); typedef logical (*L_fp)(...); typedef shortlogical (*K_fp)(...); typedef /* Character */ VOID (*H_fp)(...); typedef /* Subroutine */ int (*S_fp)(...); #else typedef int /* Unknown procedure type */ (*U_fp)(); typedef shortint (*J_fp)(); typedef integer (*I_fp)(); typedef real (*R_fp)(); typedef doublereal (*D_fp)(), (*E_fp)(); typedef /* Complex */ VOID (*C_fp)(); typedef /* Double Complex */ VOID (*Z_fp)(); typedef logical (*L_fp)(); typedef shortlogical (*K_fp)(); typedef /* Character */ VOID (*H_fp)(); typedef /* Subroutine */ int (*S_fp)(); #endif /* E_fp is for real functions when -R is not specified */ typedef VOID C_f; /* complex function */ typedef VOID H_f; /* character function */ typedef VOID Z_f; /* double complex function */ typedef doublereal E_f; /* real function with -R not specified */ /* undef any lower-case symbols that your C compiler predefines, e.g.: */ #ifndef Skip_f2c_Undefs #undef cray #undef gcos #undef mc68010 #undef mc68020 #undef mips #undef pdp11 #undef sgi #undef sparc #undef sun #undef sun2 #undef sun3 #undef sun4 #undef u370 #undef u3b #undef u3b2 #undef u3b5 #undef unix #undef vax #endif #endif starpu-1.4.9+dfsg/min-dgels/additional/fio.h000066400000000000000000000055731507764646700207370ustar00rootroot00000000000000#ifndef SYSDEP_H_INCLUDED #include "sysdep1.h" #endif #include "stdio.h" #include "errno.h" #ifndef NULL /* ANSI C */ #include "stddef.h" #endif #ifndef SEEK_SET #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 #endif #ifndef FOPEN #define FOPEN fopen #endif #ifndef FREOPEN #define FREOPEN freopen #endif #ifndef FSEEK #define FSEEK fseek #endif #ifndef FSTAT #define FSTAT fstat #endif #ifndef FTELL #define FTELL ftell #endif #ifndef OFF_T #define OFF_T long #endif #ifndef STAT_ST #define STAT_ST stat #endif #ifndef STAT #define STAT stat #endif #ifdef MSDOS #ifndef NON_UNIX_STDIO #define NON_UNIX_STDIO #endif #endif #ifdef UIOLEN_int typedef int uiolen; #else typedef long uiolen; #endif /*units*/ typedef struct { FILE *ufd; /*0=unconnected*/ char *ufnm; #ifndef MSDOS long uinode; int udev; #endif int url; /*0=sequential*/ flag useek; /*true=can backspace, use dir, ...*/ flag ufmt; flag urw; /* (1 for can read) | (2 for can write) */ flag ublnk; flag uend; flag uwrt; /*last io was write*/ flag uscrtch; } unit; #undef Void #ifdef KR_headers #define Void /*void*/ extern int (*f__getn)(); /* for formatted input */ extern void (*f__putn)(); /* for formatted output */ extern void x_putc(); extern long f__inode(); extern VOID sig_die(); extern int (*f__donewrec)(), t_putc(), x_wSL(); extern int c_sfe(), err__fl(), xrd_SL(), f__putbuf(); #else #define Void void #ifdef __cplusplus extern "C" { #endif extern int (*f__getn)(void); /* for formatted input */ extern void (*f__putn)(int); /* for formatted output */ extern void x_putc(int); extern long f__inode(char*,int*); extern void sig_die(const char*,int); extern void f__fatal(int, const char*); extern int t_runc(alist*); extern int f__nowreading(unit*), f__nowwriting(unit*); extern int fk_open(int,int,ftnint); extern int en_fio(void); extern void f_init(void); extern int (*f__donewrec)(void), t_putc(int), x_wSL(void); extern void b_char(const char*,char*,ftnlen), g_char(const char*,ftnlen,char*); extern int c_sfe(cilist*), z_rnew(void); extern int err__fl(int,int,const char*); extern int xrd_SL(void); extern int f__putbuf(int); #endif extern flag f__init; extern cilist *f__elist; /*active external io list*/ extern flag f__reading,f__external,f__sequential,f__formatted; extern int (*f__doend)(Void); extern FILE *f__cf; /*current file*/ extern unit *f__curunit; /*current unit*/ extern unit f__units[]; #define err(f,m,s) {if(f) errno= m; else f__fatal(m,s); return(m);} #define errfl(f,m,s) return err__fl((int)f,m,s) /*Table sizes*/ #define MXUNIT 100 extern int f__recpos; /*position in current record*/ extern OFF_T f__cursor; /* offset to move to */ extern OFF_T f__hiwater; /* so TL doesn't confuse us */ #ifdef __cplusplus } #endif #define WRITE 1 #define READ 2 #define SEQ 3 #define DIR 4 #define FMT 5 #define UNF 6 #define EXT 7 #define INT 8 #define buf_end(x) (x->_flag & _IONBF ? x->_ptr : x->_base + BUFSIZ) starpu-1.4.9+dfsg/min-dgels/additional/fmt.c000066400000000000000000000205661507764646700207420ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "fmt.h" #ifdef __cplusplus extern "C" { #endif #define skip(s) while(*s==' ') s++ #ifdef interdata #define SYLMX 300 #endif #ifdef pdp11 #define SYLMX 300 #endif #ifdef vax #define SYLMX 300 #endif #ifndef SYLMX #define SYLMX 300 #endif #define GLITCH '\2' /* special quote character for stu */ extern flag f__cblank,f__cplus; /*blanks in I and compulsory plus*/ static struct syl f__syl[SYLMX]; int f__parenlvl,f__pc,f__revloc; #ifdef KR_headers #define Const /*nothing*/ #else #define Const const #endif static #ifdef KR_headers char *ap_end(s) char *s; #else const char *ap_end(const char *s) #endif { char quote; quote= *s++; for(;*s;s++) { if(*s!=quote) continue; if(*++s!=quote) return(s); } if(f__elist->cierr) { errno = 100; return(NULL); } f__fatal(100, "bad string"); /*NOTREACHED*/ return 0; } static int #ifdef KR_headers op_gen(a,b,c,d) #else op_gen(int a, int b, int c, int d) #endif { struct syl *p= &f__syl[f__pc]; if(f__pc>=SYLMX) { fprintf(stderr,"format too complicated:\n"); sig_die(f__fmtbuf, 1); } p->op=a; p->p1=b; p->p2.i[0]=c; p->p2.i[1]=d; return(f__pc++); } #ifdef KR_headers static char *f_list(); static char *gt_num(s,n,n1) char *s; int *n, n1; #else static const char *f_list(const char*); static const char *gt_num(const char *s, int *n, int n1) #endif { int m=0,f__cnt=0; char c; for(c= *s;;c = *s) { if(c==' ') { s++; continue; } if(c>'9' || c<'0') break; m=10*m+c-'0'; f__cnt++; s++; } if(f__cnt==0) { if (!n1) s = 0; *n=n1; } else *n=m; return(s); } static #ifdef KR_headers char *f_s(s,curloc) char *s; #else const char *f_s(const char *s, int curloc) #endif { skip(s); if(*s++!='(') { return(NULL); } if(f__parenlvl++ ==1) f__revloc=curloc; if(op_gen(RET1,curloc,0,0)<0 || (s=f_list(s))==NULL) { return(NULL); } skip(s); return(s); } static int #ifdef KR_headers ne_d(s,p) char *s,**p; #else ne_d(const char *s, const char **p) #endif { int n,x,sign=0; struct syl *sp; switch(*s) { default: return(0); case ':': (void) op_gen(COLON,0,0,0); break; case '$': (void) op_gen(NONL, 0, 0, 0); break; case 'B': case 'b': if(*++s=='z' || *s == 'Z') (void) op_gen(BZ,0,0,0); else (void) op_gen(BN,0,0,0); break; case 'S': case 's': if(*(s+1)=='s' || *(s+1) == 'S') { x=SS; s++; } else if(*(s+1)=='p' || *(s+1) == 'P') { x=SP; s++; } else x=S; (void) op_gen(x,0,0,0); break; case '/': (void) op_gen(SLASH,0,0,0); break; case '-': sign=1; case '+': s++; /*OUTRAGEOUS CODING TRICK*/ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (!(s=gt_num(s,&n,0))) { bad: *p = 0; return 1; } switch(*s) { default: return(0); case 'P': case 'p': if(sign) n= -n; (void) op_gen(P,n,0,0); break; case 'X': case 'x': (void) op_gen(X,n,0,0); break; case 'H': case 'h': sp = &f__syl[op_gen(H,n,0,0)]; sp->p2.s = (char*)s + 1; s+=n; break; } break; case GLITCH: case '"': case '\'': sp = &f__syl[op_gen(APOS,0,0,0)]; sp->p2.s = (char*)s; if((*p = ap_end(s)) == NULL) return(0); return(1); case 'T': case 't': if(*(s+1)=='l' || *(s+1) == 'L') { x=TL; s++; } else if(*(s+1)=='r'|| *(s+1) == 'R') { x=TR; s++; } else x=T; if (!(s=gt_num(s+1,&n,0))) goto bad; s--; (void) op_gen(x,n,0,0); break; case 'X': case 'x': (void) op_gen(X,1,0,0); break; case 'P': case 'p': (void) op_gen(P,1,0,0); break; } s++; *p=s; return(1); } static int #ifdef KR_headers e_d(s,p) char *s,**p; #else e_d(const char *s, const char **p) #endif { int i,im,n,w,d,e,found=0,x=0; Const char *sv=s; s=gt_num(s,&n,1); (void) op_gen(STACK,n,0,0); switch(*s++) { default: break; case 'E': case 'e': x=1; case 'G': case 'g': found=1; if (!(s=gt_num(s,&w,0))) { bad: *p = 0; return 1; } if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; if(*s!='E' && *s != 'e') (void) op_gen(x==1?E:G,w,d,0); /* default is Ew.dE2 */ else { if (!(s=gt_num(s+1,&e,0))) goto bad; (void) op_gen(x==1?EE:GE,w,d,e); } break; case 'O': case 'o': i = O; im = OM; goto finish_I; case 'Z': case 'z': i = Z; im = ZM; goto finish_I; case 'L': case 'l': found=1; if (!(s=gt_num(s,&w,0))) goto bad; if(w==0) break; (void) op_gen(L,w,0,0); break; case 'A': case 'a': found=1; skip(s); if(*s>='0' && *s<='9') { s=gt_num(s,&w,1); if(w==0) break; (void) op_gen(AW,w,0,0); break; } (void) op_gen(A,0,0,0); break; case 'F': case 'f': if (!(s=gt_num(s,&w,0))) goto bad; found=1; if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; (void) op_gen(F,w,d,0); break; case 'D': case 'd': found=1; if (!(s=gt_num(s,&w,0))) goto bad; if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; (void) op_gen(D,w,d,0); break; case 'I': case 'i': i = I; im = IM; finish_I: if (!(s=gt_num(s,&w,0))) goto bad; found=1; if(w==0) break; if(*s!='.') { (void) op_gen(i,w,0,0); break; } if (!(s=gt_num(s+1,&d,0))) goto bad; (void) op_gen(im,w,d,0); break; } if(found==0) { f__pc--; /*unSTACK*/ *p=sv; return(0); } *p=s; return(1); } static #ifdef KR_headers char *i_tem(s) char *s; #else const char *i_tem(const char *s) #endif { const char *t; int n,curloc; if(*s==')') return(s); if(ne_d(s,&t)) return(t); if(e_d(s,&t)) return(t); s=gt_num(s,&n,1); if((curloc=op_gen(STACK,n,0,0))<0) return(NULL); return(f_s(s,curloc)); } static #ifdef KR_headers char *f_list(s) char *s; #else const char *f_list(const char *s) #endif { for(;*s!=0;) { skip(s); if((s=i_tem(s))==NULL) return(NULL); skip(s); if(*s==',') s++; else if(*s==')') { if(--f__parenlvl==0) { (void) op_gen(REVERT,f__revloc,0,0); return(++s); } (void) op_gen(GOTO,0,0,0); return(++s); } } return(NULL); } int #ifdef KR_headers pars_f(s) char *s; #else pars_f(const char *s) #endif { f__parenlvl=f__revloc=f__pc=0; if(f_s(s,0) == NULL) { return(-1); } return(0); } #define STKSZ 10 int f__cnt[STKSZ],f__ret[STKSZ],f__cp,f__rp; flag f__workdone, f__nonl; static int #ifdef KR_headers type_f(n) #else type_f(int n) #endif { switch(n) { default: return(n); case RET1: return(RET1); case REVERT: return(REVERT); case GOTO: return(GOTO); case STACK: return(STACK); case X: case SLASH: case APOS: case H: case T: case TL: case TR: return(NED); case F: case I: case IM: case A: case AW: case O: case OM: case L: case E: case EE: case D: case G: case GE: case Z: case ZM: return(ED); } } #ifdef KR_headers integer do_fio(number,ptr,len) ftnint *number; ftnlen len; char *ptr; #else integer do_fio(ftnint *number, char *ptr, ftnlen len) #endif { struct syl *p; int n,i; for(i=0;i<*number;i++,ptr+=len) { loop: switch(type_f((p= &f__syl[f__pc])->op)) { default: fprintf(stderr,"unknown code in do_fio: %d\n%s\n", p->op,f__fmtbuf); err(f__elist->cierr,100,"do_fio"); case NED: if((*f__doned)(p)) { f__pc++; goto loop; } f__pc++; continue; case ED: if(f__cnt[f__cp]<=0) { f__cp--; f__pc++; goto loop; } if(ptr==NULL) return((*f__doend)()); f__cnt[f__cp]--; f__workdone=1; if((n=(*f__doed)(p,ptr,len))>0) errfl(f__elist->cierr,errno,"fmt"); if(n<0) err(f__elist->ciend,(EOF),"fmt"); continue; case STACK: f__cnt[++f__cp]=p->p1; f__pc++; goto loop; case RET1: f__ret[++f__rp]=p->p1; f__pc++; goto loop; case GOTO: if(--f__cnt[f__cp]<=0) { f__cp--; f__rp--; f__pc++; goto loop; } f__pc=1+f__ret[f__rp--]; goto loop; case REVERT: f__rp=f__cp=0; f__pc = p->p1; if(ptr==NULL) return((*f__doend)()); if(!f__workdone) return(0); if((n=(*f__dorevert)()) != 0) return(n); goto loop; case COLON: if(ptr==NULL) return((*f__doend)()); f__pc++; goto loop; case NONL: f__nonl = 1; f__pc++; goto loop; case S: case SS: f__cplus=0; f__pc++; goto loop; case SP: f__cplus = 1; f__pc++; goto loop; case P: f__scale=p->p1; f__pc++; goto loop; case BN: f__cblank=0; f__pc++; goto loop; case BZ: f__cblank=1; f__pc++; goto loop; } } return(0); } int en_fio(Void) { ftnint one=1; return(do_fio(&one,(char *)NULL,(ftnint)0)); } VOID fmt_bg(Void) { f__workdone=f__cp=f__rp=f__pc=f__cursor=0; f__cnt[0]=f__ret[0]=0; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/additional/fmt.h000066400000000000000000000037261507764646700207460ustar00rootroot00000000000000struct syl { int op; int p1; union { int i[2]; char *s;} p2; }; #define RET1 1 #define REVERT 2 #define GOTO 3 #define X 4 #define SLASH 5 #define STACK 6 #define I 7 #define ED 8 #define NED 9 #define IM 10 #define APOS 11 #define H 12 #define TL 13 #define TR 14 #define T 15 #define COLON 16 #define S 17 #define SP 18 #define SS 19 #define P 20 #define BN 21 #define BZ 22 #define F 23 #define E 24 #define EE 25 #define D 26 #define G 27 #define GE 28 #define L 29 #define A 30 #define AW 31 #define O 32 #define NONL 33 #define OM 34 #define Z 35 #define ZM 36 typedef union { real pf; doublereal pd; } ufloat; typedef union { short is; #ifndef KR_headers signed #endif char ic; integer il; #ifdef Allow_TYQUAD longint ili; #endif } Uint; #ifdef KR_headers extern int (*f__doed)(),(*f__doned)(); extern int (*f__dorevert)(); extern int rd_ed(),rd_ned(); extern int w_ed(),w_ned(); extern int signbit_f2c(); extern char *f__fmtbuf; #else #ifdef __cplusplus extern "C" { #define Cextern extern "C" #else #define Cextern extern #endif extern const char *f__fmtbuf; extern int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); extern int (*f__dorevert)(void); extern void fmt_bg(void); extern int pars_f(const char*); extern int rd_ed(struct syl*, char*, ftnlen),rd_ned(struct syl*); extern int signbit_f2c(double*); extern int w_ed(struct syl*, char*, ftnlen),w_ned(struct syl*); extern int wrt_E(ufloat*, int, int, int, ftnlen); extern int wrt_F(ufloat*, int, int, ftnlen); extern int wrt_L(Uint*, int, ftnlen); #endif extern int f__pc,f__parenlvl,f__revloc; extern flag f__cblank,f__cplus,f__workdone, f__nonl; extern int f__scale; #ifdef __cplusplus } #endif #define GET(x) if((x=(*f__getn)())<0) return(x) #define VAL(x) (x!='\n'?x:' ') #define PUT(x) (*f__putn)(x) #undef TYQUAD #ifndef Allow_TYQUAD #undef longint #define longint long #else #define TYQUAD 14 #endif #ifdef KR_headers extern char *f__icvt(); #else Cextern char *f__icvt(longint, int*, int*, int); #endif starpu-1.4.9+dfsg/min-dgels/additional/ieeeck.c000066400000000000000000000065711507764646700214010ustar00rootroot00000000000000/* ieeeck.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ieeeck_(integer *ispec, real *zero, real *one) { /* System generated locals */ integer ret_val; /* Local variables */ real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, newzro; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* IEEECK is called from the ILAENV to verify that Infinity and */ /* possibly NaN arithmetic is safe (i.e. will not trap). */ /* Arguments */ /* ========= */ /* ISPEC (input) INTEGER */ /* Specifies whether to test just for inifinity arithmetic */ /* or whether to test for infinity and NaN arithmetic. */ /* = 0: Verify infinity arithmetic only. */ /* = 1: Verify infinity and NaN arithmetic. */ /* ZERO (input) REAL */ /* Must contain the value 0.0 */ /* This is passed to prevent the compiler from optimizing */ /* away this code. */ /* ONE (input) REAL */ /* Must contain the value 1.0 */ /* This is passed to prevent the compiler from optimizing */ /* away this code. */ /* RETURN VALUE: INTEGER */ /* = 0: Arithmetic failed to produce the correct answers */ /* = 1: Arithmetic produced the correct answers */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ ret_val = 1; posinf = *one / *zero; if (posinf <= *one) { ret_val = 0; return ret_val; } neginf = -(*one) / *zero; if (neginf >= *zero) { ret_val = 0; return ret_val; } negzro = *one / (neginf + *one); if (negzro != *zero) { ret_val = 0; return ret_val; } neginf = *one / negzro; if (neginf >= *zero) { ret_val = 0; return ret_val; } newzro = negzro + *zero; if (newzro != *zero) { ret_val = 0; return ret_val; } posinf = *one / newzro; if (posinf <= *one) { ret_val = 0; return ret_val; } neginf *= posinf; if (neginf >= *zero) { ret_val = 0; return ret_val; } posinf *= posinf; if (posinf <= *one) { ret_val = 0; return ret_val; } /* Return if we were only asked to check infinity arithmetic */ if (*ispec == 0) { return ret_val; } nan1 = posinf + neginf; nan2 = posinf / neginf; nan3 = posinf / posinf; nan4 = posinf * *zero; nan5 = neginf * negzro; nan6 = nan5 * 0.f; if (nan1 == nan1) { ret_val = 0; return ret_val; } if (nan2 == nan2) { ret_val = 0; return ret_val; } if (nan3 == nan3) { ret_val = 0; return ret_val; } if (nan4 == nan4) { ret_val = 0; return ret_val; } if (nan5 == nan5) { ret_val = 0; return ret_val; } if (nan6 == nan6) { ret_val = 0; return ret_val; } return ret_val; } /* _starpu_ieeeck_ */ starpu-1.4.9+dfsg/min-dgels/additional/iladlc.c000066400000000000000000000046141507764646700214000ustar00rootroot00000000000000/* iladlc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_iladlc_(integer *m, integer *n, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1; /* Local variables */ integer i__; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILADLC scans A for its last non-zero column. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*n == 0) { ret_val = *n; } else if (a[*n * a_dim1 + 1] != 0. || a[*m + *n * a_dim1] != 0.) { ret_val = *n; } else { /* Now scan each column from the end, returning with the first non-zero. */ for (ret_val = *n; ret_val >= 1; --ret_val) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (a[i__ + ret_val * a_dim1] != 0.) { return ret_val; } } } } return ret_val; } /* _starpu_iladlc_ */ starpu-1.4.9+dfsg/min-dgels/additional/iladlr.c000066400000000000000000000046011507764646700214130ustar00rootroot00000000000000/* iladlr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_iladlr_(integer *m, integer *n, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1; /* Local variables */ integer i__, j; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILADLR scans A for its last non-zero row. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*m == 0) { ret_val = *m; } else if (a[*m + a_dim1] != 0. || a[*m + *n * a_dim1] != 0.) { ret_val = *m; } else { /* Scan up each column tracking the last zero row seen. */ ret_val = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { if (a[i__ + j * a_dim1] != 0.) { break; } } ret_val = max(ret_val,i__); } } return ret_val; } /* _starpu_iladlr_ */ starpu-1.4.9+dfsg/min-dgels/additional/ilaenv.c000066400000000000000000000467721507764646700214410ustar00rootroot00000000000000/* ilaenv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" #include "string.h" /* Table of constant values */ static integer c__1 = 1; static real c_b163 = 0.f; static real c_b164 = 1.f; static integer c__0 = 0; integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, integer *n2, integer *n3, integer *n4) { /* System generated locals */ integer ret_val; /* Builtin functions */ /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_cmp(char *, char *, ftnlen, ftnlen); /* Local variables */ integer i__; char c1[1], c2[1], c3[1], c4[1]; integer ic, nb, iz, nx; logical cname; integer nbmin; logical sname; extern integer _starpu_ieeeck_(integer *, real *, real *); char subnam[1]; extern integer _starpu_iparmq_(integer *, char *, char *, integer *, integer *, integer *, integer *); ftnlen name_len, opts_len; name_len = strlen (name__); opts_len = strlen (opts); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* January 2007 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILAENV is called from the LAPACK routines to choose problem-dependent */ /* parameters for the local environment. See ISPEC for a description of */ /* the parameters. */ /* ILAENV returns an INTEGER */ /* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */ /* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */ /* This version provides a set of parameters which should give good, */ /* but not optimal, performance on many of the currently available */ /* computers. Users are encouraged to modify this subroutine to set */ /* the tuning parameters for their particular machine using the option */ /* and problem size information in the arguments. */ /* This routine will not function correctly if it is converted to all */ /* lower case. Converting it to all upper case is allowed. */ /* Arguments */ /* ========= */ /* ISPEC (input) INTEGER */ /* Specifies the parameter to be returned as the value of */ /* ILAENV. */ /* = 1: the optimal blocksize; if this value is 1, an unblocked */ /* algorithm will give the best performance. */ /* = 2: the minimum block size for which the block routine */ /* should be used; if the usable block size is less than */ /* this value, an unblocked routine should be used. */ /* = 3: the crossover point (in a block routine, for N less */ /* than this value, an unblocked routine should be used) */ /* = 4: the number of shifts, used in the nonsymmetric */ /* eigenvalue routines (DEPRECATED) */ /* = 5: the minimum column dimension for blocking to be used; */ /* rectangular blocks must have dimension at least k by m, */ /* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */ /* = 6: the crossover point for the SVD (when reducing an m by n */ /* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */ /* this value, a QR factorization is used first to reduce */ /* the matrix to a triangular form.) */ /* = 7: the number of processors */ /* = 8: the crossover point for the multishift QR method */ /* for nonsymmetric eigenvalue problems (DEPRECATED) */ /* = 9: maximum size of the subproblems at the bottom of the */ /* computation tree in the divide-and-conquer algorithm */ /* (used by xGELSD and xGESDD) */ /* =10: ieee NaN arithmetic can be trusted not to trap */ /* =11: infinity arithmetic can be trusted not to trap */ /* 12 <= ISPEC <= 16: */ /* xHSEQR or one of its subroutines, */ /* see IPARMQ for detailed explanation */ /* NAME (input) CHARACTER*(*) */ /* The name of the calling subroutine, in either upper case or */ /* lower case. */ /* OPTS (input) CHARACTER*(*) */ /* The character options to the subroutine NAME, concatenated */ /* into a single character string. For example, UPLO = 'U', */ /* TRANS = 'T', and DIAG = 'N' for a triangular routine would */ /* be specified as OPTS = 'UTN'. */ /* N1 (input) INTEGER */ /* N2 (input) INTEGER */ /* N3 (input) INTEGER */ /* N4 (input) INTEGER */ /* Problem dimensions for the subroutine NAME; these may not all */ /* be required. */ /* Further Details */ /* =============== */ /* The following conventions have been used when calling ILAENV from the */ /* LAPACK routines: */ /* 1) OPTS is a concatenation of all of the character options to */ /* subroutine NAME, in the same order that they appear in the */ /* argument list for NAME, even if they are not used in determining */ /* the value of the parameter specified by ISPEC. */ /* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */ /* that they appear in the argument list for NAME. N1 is used */ /* first, N2 second, and so on, and unused problem dimensions are */ /* passed a value of -1. */ /* 3) The parameter value returned by ILAENV is checked for validity in */ /* the calling subroutine. For example, ILAENV is used to retrieve */ /* the optimal blocksize for STRTRI as follows: */ /* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */ /* IF( NB.LE.1 ) NB = MAX( 1, N ) */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ switch (*ispec) { case 1: goto L10; case 2: goto L10; case 3: goto L10; case 4: goto L80; case 5: goto L90; case 6: goto L100; case 7: goto L110; case 8: goto L120; case 9: goto L130; case 10: goto L140; case 11: goto L150; case 12: goto L160; case 13: goto L160; case 14: goto L160; case 15: goto L160; case 16: goto L160; } /* Invalid value for ISPEC */ ret_val = -1; return ret_val; L10: /* Convert NAME to upper case if the first character is lower case. */ ret_val = 1; s_copy(subnam, name__, (ftnlen)1, name_len); ic = *(unsigned char *)subnam; iz = 'Z'; if (iz == 90 || iz == 122) { /* ASCII character set */ if (ic >= 97 && ic <= 122) { *(unsigned char *)subnam = (char) (ic - 32); for (i__ = 2; i__ <= 6; ++i__) { ic = *(unsigned char *)&subnam[i__ - 1]; if (ic >= 97 && ic <= 122) { *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); } /* L20: */ } } } else if (iz == 233 || iz == 169) { /* EBCDIC character set */ if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && ic <= 169) { *(unsigned char *)subnam = (char) (ic + 64); for (i__ = 2; i__ <= 6; ++i__) { ic = *(unsigned char *)&subnam[i__ - 1]; if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && ic <= 169) { *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); } /* L30: */ } } } else if (iz == 218 || iz == 250) { /* Prime machines: ASCII+128 */ if (ic >= 225 && ic <= 250) { *(unsigned char *)subnam = (char) (ic - 32); for (i__ = 2; i__ <= 6; ++i__) { ic = *(unsigned char *)&subnam[i__ - 1]; if (ic >= 225 && ic <= 250) { *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); } /* L40: */ } } } *(unsigned char *)c1 = *(unsigned char *)subnam; sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; if (! (cname || sname)) { return ret_val; } s_copy(c2, subnam + 1, (ftnlen)1, (ftnlen)2); s_copy(c3, subnam + 3, (ftnlen)1, (ftnlen)3); s_copy(c4, c3 + 1, (ftnlen)1, (ftnlen)2); switch (*ispec) { case 1: goto L50; case 2: goto L60; case 3: goto L70; } L50: /* ISPEC = 1: block size */ /* In these examples, separate code is provided for setting NB for */ /* real and complex. We assume that NB will take the same value in */ /* single or double precision. */ nb = 1; if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } else if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) 1, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 32; } else { nb = 32; } } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 32; } else { nb = 32; } } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 32; } else { nb = 32; } } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } } else if (s_cmp(c2, "PO", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nb = 32; } else if (sname && s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { nb = 64; } } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { nb = 64; } else if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nb = 32; } else if (s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { nb = 64; } } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nb = 32; } } else if (*(unsigned char *)c3 == 'M') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nb = 32; } } } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nb = 32; } } else if (*(unsigned char *)c3 == 'M') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nb = 32; } } } else if (s_cmp(c2, "GB", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { if (*n4 <= 64) { nb = 1; } else { nb = 32; } } else { if (*n4 <= 64) { nb = 1; } else { nb = 32; } } } } else if (s_cmp(c2, "PB", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { if (*n2 <= 64) { nb = 1; } else { nb = 32; } } else { if (*n2 <= 64) { nb = 1; } else { nb = 32; } } } } else if (s_cmp(c2, "TR", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } } else if (s_cmp(c2, "LA", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "UUM", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } } else if (sname && s_cmp(c2, "ST", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "EBZ", (ftnlen)1, (ftnlen)3) == 0) { nb = 1; } } ret_val = nb; return ret_val; L60: /* ISPEC = 2: minimum block size */ nbmin = 2; if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 2; } else { nbmin = 2; } } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 2; } else { nbmin = 2; } } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 2; } else { nbmin = 2; } } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 2; } else { nbmin = 2; } } } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 8; } else { nbmin = 8; } } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nbmin = 2; } } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nbmin = 2; } } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nbmin = 2; } } else if (*(unsigned char *)c3 == 'M') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nbmin = 2; } } } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nbmin = 2; } } else if (*(unsigned char *)c3 == 'M') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nbmin = 2; } } } ret_val = nbmin; return ret_val; L70: /* ISPEC = 3: crossover point */ nx = 0; if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nx = 128; } else { nx = 128; } } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nx = 128; } else { nx = 128; } } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nx = 128; } else { nx = 128; } } } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nx = 32; } } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nx = 32; } } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nx = 128; } } } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nx = 128; } } } ret_val = nx; return ret_val; L80: /* ISPEC = 4: number of shifts (used by xHSEQR) */ ret_val = 6; return ret_val; L90: /* ISPEC = 5: minimum column dimension (not used) */ ret_val = 2; return ret_val; L100: /* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); return ret_val; L110: /* ISPEC = 7: number of processors (not used) */ ret_val = 1; return ret_val; L120: /* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ ret_val = 50; return ret_val; L130: /* ISPEC = 9: maximum size of the subproblems at the bottom of the */ /* computation tree in the divide-and-conquer algorithm */ /* (used by xGELSD and xGESDD) */ ret_val = 25; return ret_val; L140: /* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */ /* ILAENV = 0 */ ret_val = 1; if (ret_val == 1) { ret_val = _starpu_ieeeck_(&c__1, &c_b163, &c_b164); } return ret_val; L150: /* ISPEC = 11: infinity arithmetic can be trusted not to trap */ /* ILAENV = 0 */ ret_val = 1; if (ret_val == 1) { ret_val = _starpu_ieeeck_(&c__0, &c_b163, &c_b164); } return ret_val; L160: /* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */ ret_val = _starpu_iparmq_(ispec, name__, opts, n1, n2, n3, n4) ; return ret_val; /* End of ILAENV */ } /* _starpu_ilaenv_ */ starpu-1.4.9+dfsg/min-dgels/additional/iparmq.c000066400000000000000000000241731507764646700214430ustar00rootroot00000000000000/* iparmq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer *ilo, integer *ihi, integer *lwork) { /* System generated locals */ integer ret_val, i__1, i__2; real r__1; /* Builtin functions */ double log(doublereal); integer i_nint(real *); /* Local variables */ integer nh, ns; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* Purpose */ /* ======= */ /* This program sets problem and machine dependent parameters */ /* useful for xHSEQR and its subroutines. It is called whenever */ /* ILAENV is called with 12 <= ISPEC <= 16 */ /* Arguments */ /* ========= */ /* ISPEC (input) integer scalar */ /* ISPEC specifies which tunable parameter IPARMQ should */ /* return. */ /* ISPEC=12: (INMIN) Matrices of order nmin or less */ /* are sent directly to xLAHQR, the implicit */ /* double shift QR algorithm. NMIN must be */ /* at least 11. */ /* ISPEC=13: (INWIN) Size of the deflation window. */ /* This is best set greater than or equal to */ /* the number of simultaneous shifts NS. */ /* Larger matrices benefit from larger deflation */ /* windows. */ /* ISPEC=14: (INIBL) Determines when to stop nibbling and */ /* invest in an (expensive) multi-shift QR sweep. */ /* If the aggressive early deflation subroutine */ /* finds LD converged eigenvalues from an order */ /* NW deflation window and LD.GT.(NW*NIBBLE)/100, */ /* then the next QR sweep is skipped and early */ /* deflation is applied immediately to the */ /* remaining active diagonal block. Setting */ /* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */ /* multi-shift QR sweep whenever early deflation */ /* finds a converged eigenvalue. Setting */ /* IPARMQ(ISPEC=14) greater than or equal to 100 */ /* prevents TTQRE from skipping a multi-shift */ /* QR sweep. */ /* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */ /* a multi-shift QR iteration. */ /* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */ /* following meanings. */ /* 0: During the multi-shift QR sweep, */ /* xLAQR5 does not accumulate reflections and */ /* does not use matrix-matrix multiply to */ /* update the far-from-diagonal matrix */ /* entries. */ /* 1: During the multi-shift QR sweep, */ /* xLAQR5 and/or xLAQRaccumulates reflections and uses */ /* matrix-matrix multiply to update the */ /* far-from-diagonal matrix entries. */ /* 2: During the multi-shift QR sweep. */ /* xLAQR5 accumulates reflections and takes */ /* advantage of 2-by-2 block structure during */ /* matrix-matrix multiplies. */ /* (If xTRMM is slower than xGEMM, then */ /* IPARMQ(ISPEC=16)=1 may be more efficient than */ /* IPARMQ(ISPEC=16)=2 despite the greater level of */ /* arithmetic work implied by the latter choice.) */ /* NAME (input) character string */ /* Name of the calling subroutine */ /* OPTS (input) character string */ /* This is a concatenation of the string arguments to */ /* TTQRE. */ /* N (input) integer scalar */ /* N is the order of the Hessenberg matrix H. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* It is assumed that H is already upper triangular */ /* in rows and columns 1:ILO-1 and IHI+1:N. */ /* LWORK (input) integer scalar */ /* The amount of workspace available. */ /* Further Details */ /* =============== */ /* Little is known about how best to choose these parameters. */ /* It is possible to use different values of the parameters */ /* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */ /* It is probably best to choose different parameters for */ /* different matrices and different parameters at different */ /* times during the iteration, but this has not been */ /* implemented --- yet. */ /* The best choices of most of the parameters depend */ /* in an ill-understood way on the relative execution */ /* rate of xLAQR3 and xLAQR5 and on the nature of each */ /* particular eigenvalue problem. Experiment may be the */ /* only practical way to determine which choices are most */ /* effective. */ /* Following is a list of default values supplied by IPARMQ. */ /* These defaults may be adjusted in order to attain better */ /* performance in any particular computational environment. */ /* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */ /* Default: 75. (Must be at least 11.) */ /* IPARMQ(ISPEC=13) Recommended deflation window size. */ /* This depends on ILO, IHI and NS, the */ /* number of simultaneous shifts returned */ /* by IPARMQ(ISPEC=15). The default for */ /* (IHI-ILO+1).LE.500 is NS. The default */ /* for (IHI-ILO+1).GT.500 is 3*NS/2. */ /* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */ /* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */ /* a multi-shift QR iteration. */ /* If IHI-ILO+1 is ... */ /* greater than ...but less ... the */ /* or equal to ... than default is */ /* 0 30 NS = 2+ */ /* 30 60 NS = 4+ */ /* 60 150 NS = 10 */ /* 150 590 NS = ** */ /* 590 3000 NS = 64 */ /* 3000 6000 NS = 128 */ /* 6000 infinity NS = 256 */ /* (+) By default matrices of this order are */ /* passed to the implicit double shift routine */ /* xLAHQR. See IPARMQ(ISPEC=12) above. These */ /* values of NS are used only in case of a rare */ /* xLAHQR failure. */ /* (**) The asterisks (**) indicate an ad-hoc */ /* function increasing from 10 to 64. */ /* IPARMQ(ISPEC=16) Select structured matrix multiply. */ /* (See ISPEC=16 above for details.) */ /* Default: 3. */ /* ================================================================ */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ if (*ispec == 15 || *ispec == 13 || *ispec == 16) { /* ==== Set the number simultaneous shifts ==== */ nh = *ihi - *ilo + 1; ns = 2; if (nh >= 30) { ns = 4; } if (nh >= 60) { ns = 10; } if (nh >= 150) { /* Computing MAX */ r__1 = log((real) nh) / log(2.f); i__1 = 10, i__2 = nh / i_nint(&r__1); ns = max(i__1,i__2); } if (nh >= 590) { ns = 64; } if (nh >= 3000) { ns = 128; } if (nh >= 6000) { ns = 256; } /* Computing MAX */ i__1 = 2, i__2 = ns - ns % 2; ns = max(i__1,i__2); } if (*ispec == 12) { /* ===== Matrices of order smaller than NMIN get sent */ /* . to xLAHQR, the classic double shift algorithm. */ /* . This must be at least 11. ==== */ ret_val = 75; } else if (*ispec == 14) { /* ==== INIBL: skip a multi-shift qr iteration and */ /* . whenever aggressive early deflation finds */ /* . at least (NIBBLE*(window size)/100) deflations. ==== */ ret_val = 14; } else if (*ispec == 15) { /* ==== NSHFTS: The number of simultaneous shifts ===== */ ret_val = ns; } else if (*ispec == 13) { /* ==== NW: deflation window size. ==== */ if (nh <= 500) { ret_val = ns; } else { ret_val = ns * 3 / 2; } } else if (*ispec == 16) { /* ==== IACC22: Whether to accumulate reflections */ /* . before updating the far-from-diagonal elements */ /* . and whether to use 2-by-2 block structure while */ /* . doing it. A small amount of work could be saved */ /* . by making this choice dependent also upon the */ /* . NH=IHI-ILO+1. */ ret_val = 0; if (ns >= 14) { ret_val = 1; } if (ns >= 14) { ret_val = 2; } } else { /* ===== invalid value of ispec ===== */ ret_val = -1; } /* ==== End of IPARMQ ==== */ return ret_val; } /* _starpu_iparmq_ */ starpu-1.4.9+dfsg/min-dgels/additional/lsame.c000066400000000000000000000056461507764646700212570ustar00rootroot00000000000000/* lsame.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" logical _starpu_lsame_(char *ca, char *cb) { /* System generated locals */ logical ret_val; /* Local variables */ integer inta, intb, zcode; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ /* case. */ /* Arguments */ /* ========= */ /* CA (input) CHARACTER*1 */ /* CB (input) CHARACTER*1 */ /* CA and CB specify the single characters to be compared. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Test if the characters are equal */ ret_val = *(unsigned char *)ca == *(unsigned char *)cb; if (ret_val) { return ret_val; } /* Now test for equivalence if both characters are alphabetic. */ zcode = 'Z'; /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ /* machines, on which ICHAR returns a value with bit 8 set. */ /* ICHAR('A') on Prime machines returns 193 which is the same as */ /* ICHAR('A') on an EBCDIC machine. */ inta = *(unsigned char *)ca; intb = *(unsigned char *)cb; if (zcode == 90 || zcode == 122) { /* ASCII is assumed - ZCODE is the ASCII code of either lower or */ /* upper case 'Z'. */ if (inta >= 97 && inta <= 122) { inta += -32; } if (intb >= 97 && intb <= 122) { intb += -32; } } else if (zcode == 233 || zcode == 169) { /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ /* upper case 'Z'. */ if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta >= 162 && inta <= 169) { inta += 64; } if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb >= 162 && intb <= 169) { intb += 64; } } else if (zcode == 218 || zcode == 250) { /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ /* plus 128 of either lower or upper case 'Z'. */ if (inta >= 225 && inta <= 250) { inta += -32; } if (intb >= 225 && intb <= 250) { intb += -32; } } ret_val = inta == intb; /* RETURN */ /* End of LSAME */ return ret_val; } /* _starpu_lsame_ */ starpu-1.4.9+dfsg/min-dgels/additional/mindgels.h000066400000000000000000000003571507764646700217570ustar00rootroot00000000000000#ifndef DGELS_H #define DGELS_H #include "f2c.h" int _starpu_dgels_(char *trans, integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); #endif starpu-1.4.9+dfsg/min-dgels/additional/pow_di.c000066400000000000000000000007001507764646700214210ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double pow_di(ap, bp) doublereal *ap; integer *bp; #else double pow_di(doublereal *ap, integer *bp) #endif { double pow, x; integer n; unsigned long u; pow = 1; x = *ap; n = *bp; if(n != 0) { if(n < 0) { n = -n; x = 1/x; } for(u = n; ; ) { if(u & 01) pow *= x; if(u >>= 1) x *= x; else break; } } return(pow); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/additional/s_cat.c000066400000000000000000000026621507764646700212420ustar00rootroot00000000000000/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the * target of a concatenation to appear on its right-hand side (contrary * to the Fortran 77 Standard, but in accordance with Fortran 90). */ #include "f2c.h" #ifndef NO_OVERWRITE #include "stdio.h" #undef abs #ifdef KR_headers extern char *F77_aloc(); extern void free(); extern void exit_(); #else #undef min #undef max #include "stdlib.h" extern #ifdef __cplusplus "C" #endif char *F77_aloc(ftnlen, const char*); #endif #include "string.h" #endif /* NO_OVERWRITE */ #ifdef __cplusplus extern "C" { #endif VOID #ifdef KR_headers s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnint rnp[], *np; ftnlen ll; #else s_cat(char *lp, char *rpp[], ftnint rnp[], ftnint *np, ftnlen ll) #endif { ftnlen i, nc; char *rp; ftnlen n = *np; #ifndef NO_OVERWRITE ftnlen L, m; char *lp0, *lp1; lp0 = 0; lp1 = lp; L = ll; i = 0; while(i < n) { rp = rpp[i]; m = rnp[i++]; if (rp >= lp1 || rp + m <= lp) { if ((L -= m) <= 0) { n = i; break; } lp1 += m; continue; } lp0 = lp; lp = lp1 = F77_aloc(L = ll, "s_cat"); break; } lp1 = lp; #endif /* NO_OVERWRITE */ for(i = 0 ; i < n ; ++i) { nc = ll; if(rnp[i] < nc) nc = rnp[i]; ll -= nc; rp = rpp[i]; while(--nc >= 0) *lp++ = *rp++; } while(--ll >= 0) *lp++ = ' '; #ifndef NO_OVERWRITE if (lp0) { memcpy(lp0, lp1, L); free(lp1); } #endif } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/additional/sysdep1.h000066400000000000000000000022621507764646700215420ustar00rootroot00000000000000#ifndef SYSDEP_H_INCLUDED #define SYSDEP_H_INCLUDED #undef USE_LARGEFILE #ifndef NO_LONG_LONG #ifdef __sun__ #define USE_LARGEFILE #define OFF_T off64_t #endif #ifdef __linux__ #define USE_LARGEFILE #define OFF_T __off64_t #endif #ifdef _AIX43 #define _LARGE_FILES #define _LARGE_FILE_API #define USE_LARGEFILE #endif /*_AIX43*/ #ifdef __hpux #define _FILE64 #define _LARGEFILE64_SOURCE #define USE_LARGEFILE #endif /*__hpux*/ #ifdef __sgi #define USE_LARGEFILE #endif /*__sgi*/ #ifdef __FreeBSD__ #define OFF_T off_t #define FSEEK fseeko #define FTELL ftello #endif #ifdef USE_LARGEFILE #ifndef OFF_T #define OFF_T off64_t #endif #define _LARGEFILE_SOURCE #define _LARGEFILE64_SOURCE #include #include #define FOPEN fopen64 #define FREOPEN freopen64 #define FSEEK fseeko64 #define FSTAT fstat64 #define FTELL ftello64 #define FTRUNCATE ftruncate64 #define STAT stat64 #define STAT_ST stat64 #endif /*USE_LARGEFILE*/ #endif /*NO_LONG_LONG*/ #ifndef NON_UNIX_STDIO #ifndef USE_LARGEFILE #define _INCLUDE_POSIX_SOURCE /* for HP-UX */ #define _INCLUDE_XOPEN_SOURCE /* for HP-UX */ #include "sys/types.h" #include "sys/stat.h" #endif #endif #endif /*SYSDEP_H_INCLUDED*/ starpu-1.4.9+dfsg/min-dgels/additional/wsfe.c000066400000000000000000000024001507764646700211030ustar00rootroot00000000000000/*write sequential formatted external*/ #include "f2c.h" #include "fio.h" #include "fmt.h" #ifdef __cplusplus extern "C" { #endif int x_wSL(Void) { int n = f__putbuf('\n'); f__hiwater = f__recpos = f__cursor = 0; return(n == 0); } static int xw_end(Void) { int n; if(f__nonl) { f__putbuf(n = 0); fflush(f__cf); } else n = f__putbuf('\n'); f__hiwater = f__recpos = f__cursor = 0; return n; } static int xw_rev(Void) { int n = 0; if(f__workdone) { n = f__putbuf('\n'); f__workdone = 0; } f__hiwater = f__recpos = f__cursor = 0; return n; } #ifdef KR_headers integer s_wsfe(a) cilist *a; /*start*/ #else integer s_wsfe(cilist *a) /*start*/ #endif { int n; if(!f__init) f_init(); f__reading=0; f__sequential=1; f__formatted=1; f__external=1; if(n=c_sfe(a)) return(n); f__elist=a; f__hiwater = f__cursor=f__recpos=0; f__nonl = 0; f__scale=0; f__fmtbuf=a->cifmt; f__cf=f__curunit->ufd; if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); f__putn= x_putc; f__doed= w_ed; f__doned= w_ned; f__doend=xw_end; f__dorevert=xw_rev; f__donewrec=x_wSL; fmt_bg(); f__cplus=0; f__cblank=f__curunit->ublnk; if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr,errno,"write start"); return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/additional/xerbla.c000066400000000000000000000034261507764646700214250ustar00rootroot00000000000000/* xerbla.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" #include "stdio.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_xerbla_(char *srname, integer *info) { /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* XERBLA is an error handler for the LAPACK routines. */ /* It is called by an LAPACK routine if an input parameter has an */ /* invalid value. A message is printed and execution stops. */ /* Installers may consider modifying the STOP statement in order to */ /* call system-specific exception-handling facilities. */ /* Arguments */ /* ========= */ /* SRNAME (input) CHARACTER*(*) */ /* The name of the routine which called XERBLA. */ /* INFO (input) INTEGER */ /* The position of the invalid parameter in the parameter list */ /* of the calling routine. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ printf("** On entry to %s, parameter number %ld had an illegal value\n", srname, *info); /* End of XERBLA */ return 0; } /* _starpu_xerbla_ */ starpu-1.4.9+dfsg/min-dgels/base/000077500000000000000000000000001507764646700166015ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/BLAS/000077500000000000000000000000001507764646700173225ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/000077500000000000000000000000001507764646700177515ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/Makefile000066400000000000000000000074371507764646700214240ustar00rootroot00000000000000TOPDIR=../.. include $(TOPDIR)/make.inc ####################################################################### # This is the makefile to create a library for the BLAS. # The files are grouped as follows: # # SBLAS1 -- Single precision real BLAS routines # CBLAS1 -- Single precision complex BLAS routines # DBLAS1 -- Double precision real BLAS routines # ZBLAS1 -- Double precision complex BLAS routines # # CB1AUX -- Real BLAS routines called by complex routines # ZB1AUX -- D.P. real BLAS routines called by d.p. complex # routines # # ALLBLAS -- Auxiliary routines for Level 2 and 3 BLAS # # SBLAS2 -- Single precision real BLAS2 routines # CBLAS2 -- Single precision complex BLAS2 routines # DBLAS2 -- Double precision real BLAS2 routines # ZBLAS2 -- Double precision complex BLAS2 routines # # SBLAS3 -- Single precision real BLAS3 routines # CBLAS3 -- Single precision complex BLAS3 routines # DBLAS3 -- Double precision real BLAS3 routines # ZBLAS3 -- Double precision complex BLAS3 routines # # The library can be set up to include routines for any combination # of the four precisions. To create or add to the library, enter make # followed by one or more of the precisions desired. Some examples: # make single # make single complex # make single double complex complex16 # Note that these commands are not safe for parallel builds. # # Alternatively, the commands # make all # or # make # without any arguments creates a library of all four precisions. # The name of the library is held in BLASLIB, which is set in the # top-level make.inc # # To remove the object files after the library is created, enter # make clean # To force the source files to be recompiled, enter, for example, # make single FRC=FRC # #--------------------------------------------------------------------- # # Edward Anderson, University of Tennessee # March 26, 1990 # Susan Ostrouchov, Last updated September 30, 1994 # ejr, May 2006. # ####################################################################### all: $(BLASLIB) #--------------------------------------------------------- # Comment out the next 6 definitions if you already have # the Level 1 BLAS. #--------------------------------------------------------- DBLAS1 = idamax.o dasum.o daxpy.o dcopy.o ddot.o dnrm2.o \ drot.o drotg.o dscal.o dsdot.o dswap.o drotmg.o drotm.o $(DBLAS1): $(FRC) #--------------------------------------------------------------------- # The following line defines auxiliary routines needed by both the # Level 2 and Level 3 BLAS. Comment it out only if you already have # both the Level 2 and 3 BLAS. #--------------------------------------------------------------------- ALLBLAS = lsame.o xerbla.o xerbla_array.o $(ALLBLAS) : $(FRC) #--------------------------------------------------------- # Comment out the next 4 definitions if you already have # the Level 2 BLAS. #--------------------------------------------------------- DBLAS2 = dgemv.o dgbmv.o dsymv.o dsbmv.o dspmv.o \ dtrmv.o dtbmv.o dtpmv.o dtrsv.o dtbsv.o dtpsv.o \ dger.o dsyr.o dspr.o dsyr2.o dspr2.o $(DBLAS2): $(FRC) #--------------------------------------------------------- # Comment out the next 4 definitions if you already have # the Level 3 BLAS. #--------------------------------------------------------- DBLAS3 = dgemm.o dsymm.o dsyrk.o dsyr2k.o dtrmm.o dtrsm.o $(DBLAS3): $(FRC) ALLOBJ= $(DBLAS1) $(DBLAS2) $(DBLAS3) $(ALLBLAS) $(BLASLIB): $(ALLOBJ) $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(RANLIB) $@ double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \ $(DBLAS2) $(DBLAS3) $(RANLIB) $(BLASLIB) FRC: @FRC=$(FRC) clean: rm -f *.o .c.o: $(CC) $(CFLAGS) -c $< -o $@ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dasum.c000066400000000000000000000044621507764646700212340ustar00rootroot00000000000000/* dasum.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dasum_(integer *n, doublereal *dx, integer *incx) { /* System generated locals */ integer i__1, i__2; doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6; /* Local variables */ integer i__, m, mp1; doublereal dtemp; integer nincx; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* takes the sum of the absolute values. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 3/93 to return if incx .le. 0. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --dx; /* Function Body */ ret_val = 0.; dtemp = 0.; if (*n <= 0 || *incx <= 0) { return ret_val; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ nincx = *n * *incx; i__1 = nincx; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { dtemp += (d__1 = dx[i__], abs(d__1)); /* L10: */ } ret_val = dtemp; return ret_val; /* code for increment equal to 1 */ /* clean-up loop */ L20: m = *n % 6; if (m == 0) { goto L40; } i__2 = m; for (i__ = 1; i__ <= i__2; ++i__) { dtemp += (d__1 = dx[i__], abs(d__1)); /* L30: */ } if (*n < 6) { goto L60; } L40: mp1 = m + 1; i__2 = *n; for (i__ = mp1; i__ <= i__2; i__ += 6) { dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], abs(d__2)) + (d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = dx[i__ + 3], abs(d__4)) + (d__5 = dx[i__ + 4], abs(d__5)) + (d__6 = dx[i__ + 5], abs(d__6)); /* L50: */ } L60: ret_val = dtemp; return ret_val; } /* _starpu_dasum_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/daxpy.c000066400000000000000000000043701507764646700212460ustar00rootroot00000000000000/* daxpy.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_daxpy_(integer *n, doublereal *da, doublereal *dx, integer *incx, doublereal *dy, integer *incy) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, m, ix, iy, mp1; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* constant times a vector plus a vector. */ /* uses unrolled loops for increments equal to one. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --dy; --dx; /* Function Body */ if (*n <= 0) { return 0; } if (*da == 0.) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments */ /* not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dy[iy] += *da * dx[ix]; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ /* clean-up loop */ L20: m = *n % 4; if (m == 0) { goto L40; } i__1 = m; for (i__ = 1; i__ <= i__1; ++i__) { dy[i__] += *da * dx[i__]; /* L30: */ } if (*n < 4) { return 0; } L40: mp1 = m + 1; i__1 = *n; for (i__ = mp1; i__ <= i__1; i__ += 4) { dy[i__] += *da * dx[i__]; dy[i__ + 1] += *da * dx[i__ + 1]; dy[i__ + 2] += *da * dx[i__ + 2]; dy[i__ + 3] += *da * dx[i__ + 3]; /* L50: */ } return 0; } /* _starpu_daxpy_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dcabs1.c000066400000000000000000000017511507764646700212560ustar00rootroot00000000000000/* dcabs1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dcabs1_(doublecomplex *z__) { /* System generated locals */ doublereal ret_val, d__1, d__2; /* Builtin functions */ double d_imag(doublecomplex *); /* .. Scalar Arguments .. */ /* .. */ /* .. */ /* Purpose */ /* ======= */ /* DCABS1 computes absolute value of a double complex number */ /* .. Intrinsic Functions .. */ ret_val = (d__1 = z__->r, abs(d__1)) + (d__2 = d_imag(z__), abs(d__2)); return ret_val; } /* _starpu_dcabs1_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dcopy.c000066400000000000000000000043511507764646700212360ustar00rootroot00000000000000/* dcopy.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dcopy_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, m, ix, iy, mp1; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* copies a vector, x, to a vector, y. */ /* uses unrolled loops for increments equal to one. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --dy; --dx; /* Function Body */ if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments */ /* not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dy[iy] = dx[ix]; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ /* clean-up loop */ L20: m = *n % 7; if (m == 0) { goto L40; } i__1 = m; for (i__ = 1; i__ <= i__1; ++i__) { dy[i__] = dx[i__]; /* L30: */ } if (*n < 7) { return 0; } L40: mp1 = m + 1; i__1 = *n; for (i__ = mp1; i__ <= i__1; i__ += 7) { dy[i__] = dx[i__]; dy[i__ + 1] = dx[i__ + 1]; dy[i__ + 2] = dx[i__ + 2]; dy[i__ + 3] = dx[i__ + 3]; dy[i__ + 4] = dx[i__ + 4]; dy[i__ + 5] = dx[i__ + 5]; dy[i__ + 6] = dx[i__ + 6]; /* L50: */ } return 0; } /* _starpu_dcopy_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/ddot.c000066400000000000000000000045371507764646700210600ustar00rootroot00000000000000/* ddot.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy) { /* System generated locals */ integer i__1; doublereal ret_val; /* Local variables */ integer i__, m, ix, iy, mp1; doublereal dtemp; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* forms the dot product of two vectors. */ /* uses unrolled loops for increments equal to one. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --dy; --dx; /* Function Body */ ret_val = 0.; dtemp = 0.; if (*n <= 0) { return ret_val; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments */ /* not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dtemp += dx[ix] * dy[iy]; ix += *incx; iy += *incy; /* L10: */ } ret_val = dtemp; return ret_val; /* code for both increments equal to 1 */ /* clean-up loop */ L20: m = *n % 5; if (m == 0) { goto L40; } i__1 = m; for (i__ = 1; i__ <= i__1; ++i__) { dtemp += dx[i__] * dy[i__]; /* L30: */ } if (*n < 5) { goto L60; } L40: mp1 = m + 1; i__1 = *n; for (i__ = mp1; i__ <= i__1; i__ += 5) { dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[ i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + 4] * dy[i__ + 4]; /* L50: */ } L60: ret_val = dtemp; return ret_val; } /* _starpu_ddot_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dgbmv.c000066400000000000000000000232451507764646700212220ustar00rootroot00000000000000/* dgbmv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgbmv_(char *trans, integer *m, integer *n, integer *kl, integer *ku, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; /* Local variables */ integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info; doublereal temp; integer lenx, leny; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBMV performs one of the matrix-vector operations */ /* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* m by n band matrix, with kl sub-diagonals and ku super-diagonals. */ /* Arguments */ /* ========== */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ /* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ /* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* KL - INTEGER. */ /* On entry, KL specifies the number of sub-diagonals of the */ /* matrix A. KL must satisfy 0 .le. KL. */ /* Unchanged on exit. */ /* KU - INTEGER. */ /* On entry, KU specifies the number of super-diagonals of the */ /* matrix A. KU must satisfy 0 .le. KU. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry, the leading ( kl + ku + 1 ) by n part of the */ /* array A must contain the matrix of coefficients, supplied */ /* column by column, with the leading diagonal of the matrix in */ /* row ( ku + 1 ) of the array, the first super-diagonal */ /* starting at position 2 in row ku, the first sub-diagonal */ /* starting at position 1 in row ( ku + 2 ), and so on. */ /* Elements in the array A that do not correspond to elements */ /* in the band matrix (such as the top left ku by ku triangle) */ /* are not referenced. */ /* The following program segment will transfer a band matrix */ /* from conventional full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* K = KU + 1 - J */ /* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */ /* A( K + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( kl + ku + 1 ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C") ) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*kl < 0) { info = 4; } else if (*ku < 0) { info = 5; } else if (*lda < *kl + *ku + 1) { info = 8; } else if (*incx == 0) { info = 10; } else if (*incy == 0) { info = 13; } if (info != 0) { _starpu_xerbla_("DGBMV ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* Set LENX and LENY, the lengths of the vectors x and y, and set */ /* up the start points in X and Y. */ if (_starpu_lsame_(trans, "N")) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through the band part of A. */ /* First form y := beta*y. */ if (*beta != 1.) { if (*incy == 1) { if (*beta == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.; iy += *incy; /* L30: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.) { return 0; } kup1 = *ku + 1; if (_starpu_lsame_(trans, "N")) { /* Form y := alpha*A*x + y. */ jx = kx; if (*incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = min(i__5,i__6); for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { y[i__] += temp * a[k + i__ + j * a_dim1]; /* L50: */ } } jx += *incx; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; iy = ky; k = kup1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__3 = min(i__5,i__6); for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { y[iy] += temp * a[k + i__ + j * a_dim1]; iy += *incy; /* L70: */ } } jx += *incx; if (j > *ku) { ky += *incy; } /* L80: */ } } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.; k = kup1 - j; /* Computing MAX */ i__3 = 1, i__4 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__2 = min(i__5,i__6); for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) { temp += a[k + i__ + j * a_dim1] * x[i__]; /* L90: */ } y[jy] += *alpha * temp; jy += *incy; /* L100: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.; ix = kx; k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = min(i__5,i__6); for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { temp += a[k + i__ + j * a_dim1] * x[ix]; ix += *incx; /* L110: */ } y[jy] += *alpha * temp; jy += *incy; if (j > *ku) { kx += *incx; } /* L120: */ } } } return 0; /* End of DGBMV . */ } /* _starpu_dgbmv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dgemm.c000066400000000000000000000245261507764646700212170ustar00rootroot00000000000000/* dgemm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, l, info; logical nota, notb; doublereal temp; integer ncola; extern logical _starpu_lsame_(char *, char *); integer nrowa, nrowb; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEMM performs one of the matrix-matrix operations */ /* C := alpha*op( A )*op( B ) + beta*C, */ /* where op( X ) is one of */ /* op( X ) = X or op( X ) = X', */ /* alpha and beta are scalars, and A, B and C are matrices, with op( A ) */ /* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */ /* Arguments */ /* ========== */ /* TRANSA - CHARACTER*1. */ /* On entry, TRANSA specifies the form of op( A ) to be used in */ /* the matrix multiplication as follows: */ /* TRANSA = 'N' or 'n', op( A ) = A. */ /* TRANSA = 'T' or 't', op( A ) = A'. */ /* TRANSA = 'C' or 'c', op( A ) = A'. */ /* Unchanged on exit. */ /* TRANSB - CHARACTER*1. */ /* On entry, TRANSB specifies the form of op( B ) to be used in */ /* the matrix multiplication as follows: */ /* TRANSB = 'N' or 'n', op( B ) = B. */ /* TRANSB = 'T' or 't', op( B ) = B'. */ /* TRANSB = 'C' or 'c', op( B ) = B'. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix */ /* op( A ) and of the matrix C. M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix */ /* op( B ) and the number of columns of the matrix C. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry, K specifies the number of columns of the matrix */ /* op( A ) and the number of rows of the matrix op( B ). K must */ /* be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ /* k when TRANSA = 'N' or 'n', and is m otherwise. */ /* Before entry with TRANSA = 'N' or 'n', the leading m by k */ /* part of the array A must contain the matrix A, otherwise */ /* the leading k by m part of the array A must contain the */ /* matrix A. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When TRANSA = 'N' or 'n' then */ /* LDA must be at least max( 1, m ), otherwise LDA must be at */ /* least max( 1, k ). */ /* Unchanged on exit. */ /* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */ /* n when TRANSB = 'N' or 'n', and is k otherwise. */ /* Before entry with TRANSB = 'N' or 'n', the leading k by n */ /* part of the array B must contain the matrix B, otherwise */ /* the leading n by k part of the array B must contain the */ /* matrix B. */ /* Unchanged on exit. */ /* LDB - INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. When TRANSB = 'N' or 'n' then */ /* LDB must be at least max( 1, k ), otherwise LDB must be at */ /* least max( 1, n ). */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then C need not be set on input. */ /* Unchanged on exit. */ /* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ /* Before entry, the leading m by n part of the array C must */ /* contain the matrix C, except when beta is zero, in which */ /* case C need not be set on entry. */ /* On exit, the array C is overwritten by the m by n matrix */ /* ( alpha*op( A )*op( B ) + beta*C ). */ /* LDC - INTEGER. */ /* On entry, LDC specifies the first dimension of C as declared */ /* in the calling (sub) program. LDC must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Level 3 Blas routine. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* Set NOTA and NOTB as true if A and B respectively are not */ /* transposed and set NROWA, NCOLA and NROWB as the number of rows */ /* and columns of A and the number of rows of B respectively. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; /* Function Body */ nota = _starpu_lsame_(transa, "N"); notb = _starpu_lsame_(transb, "N"); if (nota) { nrowa = *m; ncola = *k; } else { nrowa = *k; ncola = *m; } if (notb) { nrowb = *k; } else { nrowb = *n; } /* Test the input parameters. */ info = 0; if (! nota && ! _starpu_lsame_(transa, "C") && ! _starpu_lsame_( transa, "T")) { info = 1; } else if (! notb && ! _starpu_lsame_(transb, "C") && ! _starpu_lsame_(transb, "T")) { info = 2; } else if (*m < 0) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < max(1,nrowa)) { info = 8; } else if (*ldb < max(1,nrowb)) { info = 10; } else if (*ldc < max(1,*m)) { info = 13; } if (info != 0) { _starpu_xerbla_("DGEMM ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { return 0; } /* And if alpha.eq.zero. */ if (*alpha == 0.) { if (*beta == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L30: */ } /* L40: */ } } return 0; } /* Start the operations. */ if (notb) { if (nota) { /* Form C := alpha*A*B + beta*C. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L50: */ } } else if (*beta != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L60: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (b[l + j * b_dim1] != 0.) { temp = *alpha * b[l + j * b_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1]; /* L70: */ } } /* L80: */ } /* L90: */ } } else { /* Form C := alpha*A'*B + beta*C */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = 0.; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; /* L100: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = *alpha * temp; } else { c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ i__ + j * c_dim1]; } /* L110: */ } /* L120: */ } } } else { if (nota) { /* Form C := alpha*A*B' + beta*C */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L130: */ } } else if (*beta != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L140: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (b[j + l * b_dim1] != 0.) { temp = *alpha * b[j + l * b_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1]; /* L150: */ } } /* L160: */ } /* L170: */ } } else { /* Form C := alpha*A'*B' + beta*C */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = 0.; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; /* L180: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = *alpha * temp; } else { c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ i__ + j * c_dim1]; } /* L190: */ } /* L200: */ } } } return 0; /* End of DGEMM . */ } /* _starpu_dgemm_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dgemv.c000066400000000000000000000170561507764646700212300ustar00rootroot00000000000000/* dgemv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgemv_(char *trans, integer *m, integer *n, doublereal * alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, iy, jx, jy, kx, ky, info; doublereal temp; integer lenx, leny; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEMV performs one of the matrix-vector operations */ /* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* m by n matrix. */ /* Arguments */ /* ========== */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ /* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ /* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry, the leading m by n part of the array A must */ /* contain the matrix of coefficients. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ /* Before entry with BETA non-zero, the incremented array Y */ /* must contain the vector y. On exit, Y is overwritten by the */ /* updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C") ) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*lda < max(1,*m)) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { _starpu_xerbla_("DGEMV ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* Set LENX and LENY, the lengths of the vectors x and y, and set */ /* up the start points in X and Y. */ if (_starpu_lsame_(trans, "N")) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if (*beta != 1.) { if (*incy == 1) { if (*beta == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.; iy += *incy; /* L30: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.) { return 0; } if (_starpu_lsame_(trans, "N")) { /* Form y := alpha*A*x + y. */ jx = kx; if (*incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { y[i__] += temp * a[i__ + j * a_dim1]; /* L50: */ } } jx += *incx; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; iy = ky; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { y[iy] += temp * a[i__ + j * a_dim1]; iy += *incy; /* L70: */ } } jx += *incx; /* L80: */ } } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp += a[i__ + j * a_dim1] * x[i__]; /* L90: */ } y[jy] += *alpha * temp; jy += *incy; /* L100: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.; ix = kx; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp += a[i__ + j * a_dim1] * x[ix]; ix += *incx; /* L110: */ } y[jy] += *alpha * temp; jy += *incy; /* L120: */ } } } return 0; /* End of DGEMV . */ } /* _starpu_dgemv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dger.c000066400000000000000000000116031507764646700210370ustar00rootroot00000000000000/* dger.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dger_(integer *m, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, jy, kx, info; doublereal temp; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGER performs the rank 1 operation */ /* A := alpha*x*y' + A, */ /* where alpha is a scalar, x is an m element vector, y is an n element */ /* vector and A is an m by n matrix. */ /* Arguments */ /* ========== */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the m */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. */ /* Unchanged on exit. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry, the leading m by n part of the array A must */ /* contain the matrix of coefficients. On exit, A is */ /* overwritten by the updated matrix. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --y; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ info = 0; if (*m < 0) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*m)) { info = 9; } if (info != 0) { _starpu_xerbla_("DGER ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0.) { return 0; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (*incy > 0) { jy = 1; } else { jy = 1 - (*n - 1) * *incy; } if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (y[jy] != 0.) { temp = *alpha * y[jy]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] += x[i__] * temp; /* L10: */ } } jy += *incy; /* L20: */ } } else { if (*incx > 0) { kx = 1; } else { kx = 1 - (*m - 1) * *incx; } i__1 = *n; for (j = 1; j <= i__1; ++j) { if (y[jy] != 0.) { temp = *alpha * y[jy]; ix = kx; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] += x[ix] * temp; ix += *incx; /* L30: */ } } jy += *incy; /* L40: */ } } return 0; /* End of DGER . */ } /* _starpu_dger_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dnrm2.c000066400000000000000000000042701507764646700211420ustar00rootroot00000000000000/* dnrm2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dnrm2_(integer *n, doublereal *x, integer *incx) { /* System generated locals */ integer i__1, i__2; doublereal ret_val, d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer ix; doublereal ssq, norm, scale, absxi; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DNRM2 returns the euclidean norm of a vector via the function */ /* name, so that */ /* DNRM2 := sqrt( x'*x ) */ /* -- This version written on 25-October-1982. */ /* Modified on 14-October-1993 to inline the call to DLASSQ. */ /* Sven Hammarling, Nag Ltd. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --x; /* Function Body */ if (*n < 1 || *incx < 1) { norm = 0.; } else if (*n == 1) { norm = abs(x[1]); } else { scale = 0.; ssq = 1.; /* The following loop is equivalent to this call to the LAPACK */ /* auxiliary routine: */ /* CALL DLASSQ( N, X, INCX, SCALE, SSQ ) */ i__1 = (*n - 1) * *incx + 1; i__2 = *incx; for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { if (x[ix] != 0.) { absxi = (d__1 = x[ix], abs(d__1)); if (scale < absxi) { /* Computing 2nd power */ d__1 = scale / absxi; ssq = ssq * (d__1 * d__1) + 1.; scale = absxi; } else { /* Computing 2nd power */ d__1 = absxi / scale; ssq += d__1 * d__1; } } /* L10: */ } norm = scale * sqrt(ssq); } ret_val = norm; return ret_val; /* End of DNRM2. */ } /* _starpu_dnrm2_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/drot.c000066400000000000000000000036461507764646700210760ustar00rootroot00000000000000/* drot.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_drot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy, doublereal *c__, doublereal *s) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, ix, iy; doublereal dtemp; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* applies a plane rotation. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* Parameter adjustments */ --dy; --dx; /* Function Body */ if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal */ /* to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dtemp = *c__ * dx[ix] + *s * dy[iy]; dy[iy] = *c__ * dy[iy] - *s * dx[ix]; dx[ix] = dtemp; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dtemp = *c__ * dx[i__] + *s * dy[i__]; dy[i__] = *c__ * dy[i__] - *s * dx[i__]; dx[i__] = dtemp; /* L30: */ } return 0; } /* _starpu_drot_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/drotg.c000066400000000000000000000034001507764646700212310ustar00rootroot00000000000000/* drotg.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b4 = 1.; /* Subroutine */ int _starpu_drotg_(doublereal *da, doublereal *db, doublereal *c__, doublereal *s) { /* System generated locals */ doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ doublereal r__, z__, roe, scale; /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* construct givens plane rotation. */ /* jack dongarra, linpack, 3/11/78. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ roe = *db; if (abs(*da) > abs(*db)) { roe = *da; } scale = abs(*da) + abs(*db); if (scale != 0.) { goto L10; } *c__ = 1.; *s = 0.; r__ = 0.; z__ = 0.; goto L20; L10: /* Computing 2nd power */ d__1 = *da / scale; /* Computing 2nd power */ d__2 = *db / scale; r__ = scale * sqrt(d__1 * d__1 + d__2 * d__2); r__ = d_sign(&c_b4, &roe) * r__; *c__ = *da / r__; *s = *db / r__; z__ = 1.; if (abs(*da) > abs(*db)) { z__ = *s; } if (abs(*db) >= abs(*da) && *c__ != 0.) { z__ = 1. / *c__; } L20: *da = r__; *db = z__; return 0; } /* _starpu_drotg_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/drotm.c000066400000000000000000000116071507764646700212470ustar00rootroot00000000000000/* drotm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_drotm_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy, doublereal *dparam) { /* Initialized data */ static doublereal zero = 0.; static doublereal two = 2.; /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__; doublereal w, z__; integer kx, ky; doublereal dh11, dh12, dh21, dh22, dflag; integer nsteps; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ /* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */ /* (DY**T) */ /* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ /* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */ /* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ /* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ /* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ /* H=( ) ( ) ( ) ( ) */ /* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ /* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* number of elements in input vector(s) */ /* DX (input/output) DOUBLE PRECISION array, dimension N */ /* double precision vector with N elements */ /* INCX (input) INTEGER */ /* storage spacing between elements of DX */ /* DY (input/output) DOUBLE PRECISION array, dimension N */ /* double precision vector with N elements */ /* INCY (input) INTEGER */ /* storage spacing between elements of DY */ /* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ /* DPARAM(1)=DFLAG */ /* DPARAM(2)=DH11 */ /* DPARAM(3)=DH21 */ /* DPARAM(4)=DH12 */ /* DPARAM(5)=DH22 */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Data statements .. */ /* Parameter adjustments */ --dparam; --dy; --dx; /* Function Body */ /* .. */ dflag = dparam[1]; if (*n <= 0 || dflag + two == zero) { goto L140; } if (! (*incx == *incy && *incx > 0)) { goto L70; } nsteps = *n * *incx; if (dflag < 0.) { goto L50; } else if (dflag == 0) { goto L10; } else { goto L30; } L10: dh12 = dparam[4]; dh21 = dparam[3]; i__1 = nsteps; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { w = dx[i__]; z__ = dy[i__]; dx[i__] = w + z__ * dh12; dy[i__] = w * dh21 + z__; /* L20: */ } goto L140; L30: dh11 = dparam[2]; dh22 = dparam[5]; i__2 = nsteps; i__1 = *incx; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { w = dx[i__]; z__ = dy[i__]; dx[i__] = w * dh11 + z__; dy[i__] = -w + dh22 * z__; /* L40: */ } goto L140; L50: dh11 = dparam[2]; dh12 = dparam[4]; dh21 = dparam[3]; dh22 = dparam[5]; i__1 = nsteps; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { w = dx[i__]; z__ = dy[i__]; dx[i__] = w * dh11 + z__ * dh12; dy[i__] = w * dh21 + z__ * dh22; /* L60: */ } goto L140; L70: kx = 1; ky = 1; if (*incx < 0) { kx = (1 - *n) * *incx + 1; } if (*incy < 0) { ky = (1 - *n) * *incy + 1; } if (dflag < 0.) { goto L120; } else if (dflag == 0) { goto L80; } else { goto L100; } L80: dh12 = dparam[4]; dh21 = dparam[3]; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { w = dx[kx]; z__ = dy[ky]; dx[kx] = w + z__ * dh12; dy[ky] = w * dh21 + z__; kx += *incx; ky += *incy; /* L90: */ } goto L140; L100: dh11 = dparam[2]; dh22 = dparam[5]; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { w = dx[kx]; z__ = dy[ky]; dx[kx] = w * dh11 + z__; dy[ky] = -w + dh22 * z__; kx += *incx; ky += *incy; /* L110: */ } goto L140; L120: dh11 = dparam[2]; dh12 = dparam[4]; dh21 = dparam[3]; dh22 = dparam[5]; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { w = dx[kx]; z__ = dy[ky]; dx[kx] = w * dh11 + z__ * dh12; dy[ky] = w * dh21 + z__ * dh22; kx += *incx; ky += *incy; /* L130: */ } L140: return 0; } /* _starpu_drotm_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/drotmg.c000066400000000000000000000141201507764646700214070ustar00rootroot00000000000000/* drotmg.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_drotmg_(doublereal *dd1, doublereal *dd2, doublereal * dx1, doublereal *dy1, doublereal *dparam) { /* Initialized data */ static doublereal zero = 0.; static doublereal one = 1.; static doublereal two = 2.; static doublereal gam = 4096.; static doublereal gamsq = 16777216.; static doublereal rgamsq = 5.9604645e-8; /* Format strings */ static char fmt_120[] = ""; static char fmt_150[] = ""; static char fmt_180[] = ""; static char fmt_210[] = ""; /* System generated locals */ doublereal d__1; /* Local variables */ doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22; integer igo; doublereal dflag, dtemp; /* Assigned format variables */ static char *igo_fmt; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ /* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* */ /* DY2)**T. */ /* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ /* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ /* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ /* H=( ) ( ) ( ) ( ) */ /* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ /* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */ /* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */ /* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */ /* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ /* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ /* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ /* Arguments */ /* ========= */ /* DD1 (input/output) DOUBLE PRECISION */ /* DD2 (input/output) DOUBLE PRECISION */ /* DX1 (input/output) DOUBLE PRECISION */ /* DY1 (input) DOUBLE PRECISION */ /* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ /* DPARAM(1)=DFLAG */ /* DPARAM(2)=DH11 */ /* DPARAM(3)=DH21 */ /* DPARAM(4)=DH12 */ /* DPARAM(5)=DH22 */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Data statements .. */ /* Parameter adjustments */ --dparam; /* Function Body */ /* .. */ if (! (*dd1 < zero)) { goto L10; } /* GO ZERO-H-D-AND-DX1.. */ goto L60; L10: /* CASE-DD1-NONNEGATIVE */ dp2 = *dd2 * *dy1; if (! (dp2 == zero)) { goto L20; } dflag = -two; goto L260; /* REGULAR-CASE.. */ L20: dp1 = *dd1 * *dx1; dq2 = dp2 * *dy1; dq1 = dp1 * *dx1; if (! (abs(dq1) > abs(dq2))) { goto L40; } dh21 = -(*dy1) / *dx1; dh12 = dp2 / dp1; du = one - dh12 * dh21; if (! (du <= zero)) { goto L30; } /* GO ZERO-H-D-AND-DX1.. */ goto L60; L30: dflag = zero; *dd1 /= du; *dd2 /= du; *dx1 *= du; /* GO SCALE-CHECK.. */ goto L100; L40: if (! (dq2 < zero)) { goto L50; } /* GO ZERO-H-D-AND-DX1.. */ goto L60; L50: dflag = one; dh11 = dp1 / dp2; dh22 = *dx1 / *dy1; du = one + dh11 * dh22; dtemp = *dd2 / du; *dd2 = *dd1 / du; *dd1 = dtemp; *dx1 = *dy1 * du; /* GO SCALE-CHECK */ goto L100; /* PROCEDURE..ZERO-H-D-AND-DX1.. */ L60: dflag = -one; dh11 = zero; dh12 = zero; dh21 = zero; dh22 = zero; *dd1 = zero; *dd2 = zero; *dx1 = zero; /* RETURN.. */ goto L220; /* PROCEDURE..FIX-H.. */ L70: if (! (dflag >= zero)) { goto L90; } if (! (dflag == zero)) { goto L80; } dh11 = one; dh22 = one; dflag = -one; goto L90; L80: dh21 = -one; dh12 = one; dflag = -one; L90: switch (igo) { case 0: goto L120; case 1: goto L150; case 2: goto L180; case 3: goto L210; } /* PROCEDURE..SCALE-CHECK */ L100: L110: if (! (*dd1 <= rgamsq)) { goto L130; } if (*dd1 == zero) { goto L160; } igo = 0; igo_fmt = fmt_120; /* FIX-H.. */ goto L70; L120: /* Computing 2nd power */ d__1 = gam; *dd1 *= d__1 * d__1; *dx1 /= gam; dh11 /= gam; dh12 /= gam; goto L110; L130: L140: if (! (*dd1 >= gamsq)) { goto L160; } igo = 1; igo_fmt = fmt_150; /* FIX-H.. */ goto L70; L150: /* Computing 2nd power */ d__1 = gam; *dd1 /= d__1 * d__1; *dx1 *= gam; dh11 *= gam; dh12 *= gam; goto L140; L160: L170: if (! (abs(*dd2) <= rgamsq)) { goto L190; } if (*dd2 == zero) { goto L220; } igo = 2; igo_fmt = fmt_180; /* FIX-H.. */ goto L70; L180: /* Computing 2nd power */ d__1 = gam; *dd2 *= d__1 * d__1; dh21 /= gam; dh22 /= gam; goto L170; L190: L200: if (! (abs(*dd2) >= gamsq)) { goto L220; } igo = 3; igo_fmt = fmt_210; /* FIX-H.. */ goto L70; L210: /* Computing 2nd power */ d__1 = gam; *dd2 /= d__1 * d__1; dh21 *= gam; dh22 *= gam; goto L200; L220: if (dflag < 0.) { goto L250; } else if (dflag == 0) { goto L230; } else { goto L240; } L230: dparam[3] = dh21; dparam[4] = dh12; goto L260; L240: dparam[2] = dh11; dparam[5] = dh22; goto L260; L250: dparam[2] = dh11; dparam[3] = dh21; dparam[4] = dh12; dparam[5] = dh22; L260: dparam[1] = dflag; return 0; } /* _starpu_drotmg_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dsbmv.c000066400000000000000000000235201507764646700212320ustar00rootroot00000000000000/* dsbmv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsbmv_(char *uplo, integer *n, integer *k, doublereal * alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j, l, ix, iy, jx, jy, kx, ky, info; doublereal temp1, temp2; extern logical _starpu_lsame_(char *, char *); integer kplus1; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n symmetric band matrix, with k super-diagonals. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the band matrix A is being supplied as */ /* follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* being supplied. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* being supplied. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry, K specifies the number of super-diagonals of the */ /* matrix A. K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the symmetric matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer the upper */ /* triangular part of a symmetric band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the symmetric matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer the lower */ /* triangular part of a symmetric band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (*n < 0) { info = 2; } else if (*k < 0) { info = 3; } else if (*lda < *k + 1) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { _starpu_xerbla_("DSBMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array A */ /* are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if (*beta != 1.) { if (*incy == 1) { if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.; iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.) { return 0; } if (_starpu_lsame_(uplo, "U")) { /* Form y when upper triangle of A is stored. */ kplus1 = *k + 1; if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { y[i__] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[i__]; /* L50: */ } y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; ix = kx; iy = ky; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { y[iy] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[ix]; ix += *incx; iy += *incy; /* L70: */ } y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; jx += *incx; jy += *incy; if (j > *k) { kx += *incx; ky += *incy; } /* L80: */ } } } else { /* Form y when lower triangle of A is stored. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; y[j] += temp1 * a[j * a_dim1 + 1]; l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { y[i__] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[i__]; /* L90: */ } y[j] += *alpha * temp2; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; y[jy] += temp1 * a[j * a_dim1 + 1]; l = 1 - j; ix = jx; iy = jy; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { ix += *incx; iy += *incy; y[iy] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[ix]; /* L110: */ } y[jy] += *alpha * temp2; jx += *incx; jy += *incy; /* L120: */ } } } return 0; /* End of DSBMV . */ } /* _starpu_dsbmv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dscal.c000066400000000000000000000041561507764646700212110ustar00rootroot00000000000000/* dscal.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dscal_(integer *n, doublereal *da, doublereal *dx, integer *incx) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, m, mp1, nincx; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* * */ /* scales a vector by a constant. */ /* uses unrolled loops for increment equal to one. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 3/93 to return if incx .le. 0. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --dx; /* Function Body */ if (*n <= 0 || *incx <= 0) { return 0; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ nincx = *n * *incx; i__1 = nincx; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { dx[i__] = *da * dx[i__]; /* L10: */ } return 0; /* code for increment equal to 1 */ /* clean-up loop */ L20: m = *n % 5; if (m == 0) { goto L40; } i__2 = m; for (i__ = 1; i__ <= i__2; ++i__) { dx[i__] = *da * dx[i__]; /* L30: */ } if (*n < 5) { return 0; } L40: mp1 = m + 1; i__2 = *n; for (i__ = mp1; i__ <= i__2; i__ += 5) { dx[i__] = *da * dx[i__]; dx[i__ + 1] = *da * dx[i__ + 1]; dx[i__ + 2] = *da * dx[i__ + 2]; dx[i__ + 3] = *da * dx[i__ + 3]; dx[i__ + 4] = *da * dx[i__ + 4]; /* L50: */ } return 0; } /* _starpu_dscal_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dsdot.c000066400000000000000000000071511507764646700212360ustar00rootroot00000000000000/* dsdot.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dsdot_(integer *n, real *sx, integer *incx, real *sy, integer * incy) { /* System generated locals */ integer i__1, i__2; doublereal ret_val; /* Local variables */ integer i__, ns, kx, ky; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* AUTHORS */ /* ======= */ /* Lawson, C. L., (JPL), Hanson, R. J., (SNLA), */ /* Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) */ /* Purpose */ /* ======= */ /* Compute the inner product of two vectors with extended */ /* precision accumulation and result. */ /* Returns D.P. dot product accumulated in D.P., for S.P. SX and SY */ /* DSDOT = sum for I = 0 to N-1 of SX(LX+I*INCX) * SY(LY+I*INCY), */ /* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is */ /* defined in a similar way using INCY. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* number of elements in input vector(s) */ /* SX (input) REAL array, dimension(N) */ /* single precision vector with N elements */ /* INCX (input) INTEGER */ /* storage spacing between elements of SX */ /* SY (input) REAL array, dimension(N) */ /* single precision vector with N elements */ /* INCY (input) INTEGER */ /* storage spacing between elements of SY */ /* DSDOT (output) DOUBLE PRECISION */ /* DSDOT double precision dot product (zero if N.LE.0) */ /* REFERENCES */ /* ========== */ /* C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. */ /* Krogh, Basic linear algebra subprograms for Fortran */ /* usage, Algorithm No. 539, Transactions on Mathematical */ /* Software 5, 3 (September 1979), pp. 308-323. */ /* REVISION HISTORY (YYMMDD) */ /* ========================== */ /* 791001 DATE WRITTEN */ /* 890831 Modified array declarations. (WRB) */ /* 890831 REVISION DATE from Version 3.2 */ /* 891214 Prologue converted to Version 4.0 format. (BAB) */ /* 920310 Corrected definition of LX in DESCRIPTION. (WRB) */ /* 920501 Reformatted the REFERENCES section. (WRB) */ /* 070118 Reformat to LAPACK style (JL) */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --sy; --sx; /* Function Body */ ret_val = 0.; if (*n <= 0) { return ret_val; } if (*incx == *incy && *incx > 0) { goto L20; } /* Code for unequal or nonpositive increments. */ kx = 1; ky = 1; if (*incx < 0) { kx = (1 - *n) * *incx + 1; } if (*incy < 0) { ky = (1 - *n) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ret_val += (doublereal) sx[kx] * (doublereal) sy[ky]; kx += *incx; ky += *incy; /* L10: */ } return ret_val; /* Code for equal, positive, non-unit increments. */ L20: ns = *n * *incx; i__1 = ns; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { ret_val += (doublereal) sx[i__] * (doublereal) sy[i__]; /* L30: */ } return ret_val; } /* _starpu_dsdot_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dspmv.c000066400000000000000000000173411507764646700212540ustar00rootroot00000000000000/* dspmv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dspmv_(char *uplo, integer *n, doublereal *alpha, doublereal *ap, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; doublereal temp1, temp2; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n symmetric matrix, supplied in packed form. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. On exit, Y is overwritten by the updated */ /* vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ --y; --x; --ap; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 6; } else if (*incy == 0) { info = 9; } if (info != 0) { _starpu_xerbla_("DSPMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ /* First form y := beta*y. */ if (*beta != 1.) { if (*incy == 1) { if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.; iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.) { return 0; } kk = 1; if (_starpu_lsame_(uplo, "U")) { /* Form y when AP contains the upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { y[i__] += temp1 * ap[k]; temp2 += ap[k] * x[i__]; ++k; /* L50: */ } y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; kk += j; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; ix = kx; iy = ky; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { y[iy] += temp1 * ap[k]; temp2 += ap[k] * x[ix]; ix += *incx; iy += *incy; /* L70: */ } y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; jx += *incx; jy += *incy; kk += j; /* L80: */ } } } else { /* Form y when AP contains the lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; y[j] += temp1 * ap[kk]; k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { y[i__] += temp1 * ap[k]; temp2 += ap[k] * x[i__]; ++k; /* L90: */ } y[j] += *alpha * temp2; kk += *n - j + 1; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; y[jy] += temp1 * ap[kk]; ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; iy += *incy; y[iy] += temp1 * ap[k]; temp2 += ap[k] * x[ix]; /* L110: */ } y[jy] += *alpha * temp2; jx += *incx; jy += *incy; kk += *n - j + 1; /* L120: */ } } } return 0; /* End of DSPMV . */ } /* _starpu_dspmv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dspr.c000066400000000000000000000140741507764646700210730ustar00rootroot00000000000000/* dspr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dspr_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *ap) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, j, k, kk, ix, jx, kx, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPR performs the symmetric rank 1 operation */ /* A := alpha*x*x' + A, */ /* where alpha is a real scalar, x is an n element vector and A is an */ /* n by n symmetric matrix, supplied in packed form. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --x; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } if (info != 0) { _starpu_xerbla_("DSPR ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.) { return 0; } /* Set the start point in X if the increment is not unity. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (_starpu_lsame_(uplo, "U")) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = *alpha * x[j]; k = kk; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { ap[k] += x[i__] * temp; ++k; /* L10: */ } } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; ix = kx; i__2 = kk + j - 1; for (k = kk; k <= i__2; ++k) { ap[k] += x[ix] * temp; ix += *incx; /* L30: */ } } jx += *incx; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = *alpha * x[j]; k = kk; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ap[k] += x[i__] * temp; ++k; /* L50: */ } } kk = kk + *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; ix = jx; i__2 = kk + *n - j; for (k = kk; k <= i__2; ++k) { ap[k] += x[ix] * temp; ix += *incx; /* L70: */ } } jx += *incx; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of DSPR . */ } /* _starpu_dspr_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dspr2.c000066400000000000000000000161421507764646700211530ustar00rootroot00000000000000/* dspr2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dspr2_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *ap) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; doublereal temp1, temp2; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPR2 performs the symmetric rank 2 operation */ /* A := alpha*x*y' + alpha*y*x' + A, */ /* where alpha is a scalar, x and y are n element vectors and A is an */ /* n by n symmetric matrix, supplied in packed form. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. */ /* Unchanged on exit. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --y; --x; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } if (info != 0) { _starpu_xerbla_("DSPR2 ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.) { return 0; } /* Set up the start points in X and Y if the increments are not both */ /* unity. */ if (*incx != 1 || *incy != 1) { if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } jx = kx; jy = ky; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (_starpu_lsame_(uplo, "U")) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0. || y[j] != 0.) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; k = kk; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2; ++k; /* L10: */ } } kk += j; /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0. || y[jy] != 0.) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = kx; iy = ky; i__2 = kk + j - 1; for (k = kk; k <= i__2; ++k) { ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L30: */ } } jx += *incx; jy += *incy; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0. || y[j] != 0.) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; k = kk; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2; ++k; /* L50: */ } } kk = kk + *n - j + 1; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0. || y[jy] != 0.) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk; k <= i__2; ++k) { ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L70: */ } } jx += *incx; jy += *incy; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of DSPR2 . */ } /* _starpu_dspr2_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dswap.c000066400000000000000000000045001507764646700212320ustar00rootroot00000000000000/* dswap.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dswap_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, m, ix, iy, mp1; doublereal dtemp; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* interchanges two vectors. */ /* uses unrolled loops for increments equal one. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --dy; --dx; /* Function Body */ if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal */ /* to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dtemp = dx[ix]; dx[ix] = dy[iy]; dy[iy] = dtemp; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ /* clean-up loop */ L20: m = *n % 3; if (m == 0) { goto L40; } i__1 = m; for (i__ = 1; i__ <= i__1; ++i__) { dtemp = dx[i__]; dx[i__] = dy[i__]; dy[i__] = dtemp; /* L30: */ } if (*n < 3) { return 0; } L40: mp1 = m + 1; i__1 = *n; for (i__ = mp1; i__ <= i__1; i__ += 3) { dtemp = dx[i__]; dx[i__] = dy[i__]; dy[i__] = dtemp; dtemp = dx[i__ + 1]; dx[i__ + 1] = dy[i__ + 1]; dy[i__ + 1] = dtemp; dtemp = dx[i__ + 2]; dx[i__ + 2] = dy[i__ + 2]; dy[i__ + 2] = dtemp; /* L50: */ } return 0; } /* _starpu_dswap_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dsymm.c000066400000000000000000000247611507764646700212600ustar00rootroot00000000000000/* dsymm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsymm_(char *side, char *uplo, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, k, info; doublereal temp1, temp2; extern logical _starpu_lsame_(char *, char *); integer nrowa; logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYMM performs one of the matrix-matrix operations */ /* C := alpha*A*B + beta*C, */ /* or */ /* C := alpha*B*A + beta*C, */ /* where alpha and beta are scalars, A is a symmetric matrix and B and */ /* C are m by n matrices. */ /* Arguments */ /* ========== */ /* SIDE - CHARACTER*1. */ /* On entry, SIDE specifies whether the symmetric matrix A */ /* appears on the left or right in the operation as follows: */ /* SIDE = 'L' or 'l' C := alpha*A*B + beta*C, */ /* SIDE = 'R' or 'r' C := alpha*B*A + beta*C, */ /* Unchanged on exit. */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the symmetric matrix A is to be */ /* referenced as follows: */ /* UPLO = 'U' or 'u' Only the upper triangular part of the */ /* symmetric matrix is to be referenced. */ /* UPLO = 'L' or 'l' Only the lower triangular part of the */ /* symmetric matrix is to be referenced. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix C. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix C. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ /* m when SIDE = 'L' or 'l' and is n otherwise. */ /* Before entry with SIDE = 'L' or 'l', the m by m part of */ /* the array A must contain the symmetric matrix, such that */ /* when UPLO = 'U' or 'u', the leading m by m upper triangular */ /* part of the array A must contain the upper triangular part */ /* of the symmetric matrix and the strictly lower triangular */ /* part of A is not referenced, and when UPLO = 'L' or 'l', */ /* the leading m by m lower triangular part of the array A */ /* must contain the lower triangular part of the symmetric */ /* matrix and the strictly upper triangular part of A is not */ /* referenced. */ /* Before entry with SIDE = 'R' or 'r', the n by n part of */ /* the array A must contain the symmetric matrix, such that */ /* when UPLO = 'U' or 'u', the leading n by n upper triangular */ /* part of the array A must contain the upper triangular part */ /* of the symmetric matrix and the strictly lower triangular */ /* part of A is not referenced, and when UPLO = 'L' or 'l', */ /* the leading n by n lower triangular part of the array A */ /* must contain the lower triangular part of the symmetric */ /* matrix and the strictly upper triangular part of A is not */ /* referenced. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When SIDE = 'L' or 'l' then */ /* LDA must be at least max( 1, m ), otherwise LDA must be at */ /* least max( 1, n ). */ /* Unchanged on exit. */ /* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ /* Before entry, the leading m by n part of the array B must */ /* contain the matrix B. */ /* Unchanged on exit. */ /* LDB - INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. LDB must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then C need not be set on input. */ /* Unchanged on exit. */ /* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ /* Before entry, the leading m by n part of the array C must */ /* contain the matrix C, except when beta is zero, in which */ /* case C need not be set on entry. */ /* On exit, the array C is overwritten by the m by n updated */ /* matrix. */ /* LDC - INTEGER. */ /* On entry, LDC specifies the first dimension of C as declared */ /* in the calling (sub) program. LDC must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Level 3 Blas routine. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* Set NROWA as the number of rows of A. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; /* Function Body */ if (_starpu_lsame_(side, "L")) { nrowa = *m; } else { nrowa = *n; } upper = _starpu_lsame_(uplo, "U"); /* Test the input parameters. */ info = 0; if (! _starpu_lsame_(side, "L") && ! _starpu_lsame_(side, "R")) { info = 1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { info = 2; } else if (*m < 0) { info = 3; } else if (*n < 0) { info = 4; } else if (*lda < max(1,nrowa)) { info = 7; } else if (*ldb < max(1,*m)) { info = 9; } else if (*ldc < max(1,*m)) { info = 12; } if (info != 0) { _starpu_xerbla_("DSYMM ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* And when alpha.eq.zero. */ if (*alpha == 0.) { if (*beta == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L30: */ } /* L40: */ } } return 0; } /* Start the operations. */ if (_starpu_lsame_(side, "L")) { /* Form C := alpha*A*B + beta*C. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp1 = *alpha * b[i__ + j * b_dim1]; temp2 = 0.; i__3 = i__ - 1; for (k = 1; k <= i__3; ++k) { c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; /* L50: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + *alpha * temp2; } else { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + temp1 * a[i__ + i__ * a_dim1] + *alpha * temp2; } /* L60: */ } /* L70: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { temp1 = *alpha * b[i__ + j * b_dim1]; temp2 = 0.; i__2 = *m; for (k = i__ + 1; k <= i__2; ++k) { c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; /* L80: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + *alpha * temp2; } else { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + temp1 * a[i__ + i__ * a_dim1] + *alpha * temp2; } /* L90: */ } /* L100: */ } } } else { /* Form C := alpha*B*A + beta*C. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * a[j + j * a_dim1]; if (*beta == 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = temp1 * b[i__ + j * b_dim1]; /* L110: */ } } else { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + temp1 * b[i__ + j * b_dim1]; /* L120: */ } } i__2 = j - 1; for (k = 1; k <= i__2; ++k) { if (upper) { temp1 = *alpha * a[k + j * a_dim1]; } else { temp1 = *alpha * a[j + k * a_dim1]; } i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; /* L130: */ } /* L140: */ } i__2 = *n; for (k = j + 1; k <= i__2; ++k) { if (upper) { temp1 = *alpha * a[j + k * a_dim1]; } else { temp1 = *alpha * a[k + j * a_dim1]; } i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; /* L150: */ } /* L160: */ } /* L170: */ } } return 0; /* End of DSYMM . */ } /* _starpu_dsymm_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dsymv.c000066400000000000000000000177441507764646700212740ustar00rootroot00000000000000/* dsymv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsymv_(char *uplo, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, iy, jx, jy, kx, ky, info; doublereal temp1, temp2; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n symmetric matrix. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the array A is to be referenced as */ /* follows: */ /* UPLO = 'U' or 'u' Only the upper triangular part of A */ /* is to be referenced. */ /* UPLO = 'L' or 'l' Only the lower triangular part of A */ /* is to be referenced. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading n by n */ /* upper triangular part of the array A must contain the upper */ /* triangular part of the symmetric matrix and the strictly */ /* lower triangular part of A is not referenced. */ /* Before entry with UPLO = 'L' or 'l', the leading n by n */ /* lower triangular part of the array A must contain the lower */ /* triangular part of the symmetric matrix and the strictly */ /* upper triangular part of A is not referenced. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, n ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. On exit, Y is overwritten by the updated */ /* vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (*n < 0) { info = 2; } else if (*lda < max(1,*n)) { info = 5; } else if (*incx == 0) { info = 7; } else if (*incy == 0) { info = 10; } if (info != 0) { _starpu_xerbla_("DSYMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through the triangular part */ /* of A. */ /* First form y := beta*y. */ if (*beta != 1.) { if (*incy == 1) { if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.; iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.) { return 0; } if (_starpu_lsame_(uplo, "U")) { /* Form y when A is stored in upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { y[i__] += temp1 * a[i__ + j * a_dim1]; temp2 += a[i__ + j * a_dim1] * x[i__]; /* L50: */ } y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; ix = kx; iy = ky; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { y[iy] += temp1 * a[i__ + j * a_dim1]; temp2 += a[i__ + j * a_dim1] * x[ix]; ix += *incx; iy += *incy; /* L70: */ } y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2; jx += *incx; jy += *incy; /* L80: */ } } } else { /* Form y when A is stored in lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; y[j] += temp1 * a[j + j * a_dim1]; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { y[i__] += temp1 * a[i__ + j * a_dim1]; temp2 += a[i__ + j * a_dim1] * x[i__]; /* L90: */ } y[j] += *alpha * temp2; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; y[jy] += temp1 * a[j + j * a_dim1]; ix = jx; iy = jy; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { ix += *incx; iy += *incy; y[iy] += temp1 * a[i__ + j * a_dim1]; temp2 += a[i__ + j * a_dim1] * x[ix]; /* L110: */ } y[jy] += *alpha * temp2; jx += *incx; jy += *incy; /* L120: */ } } } return 0; /* End of DSYMV . */ } /* _starpu_dsymv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dsyr.c000066400000000000000000000144251507764646700211040ustar00rootroot00000000000000/* dsyr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsyr_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, jx, kx, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYR performs the symmetric rank 1 operation */ /* A := alpha*x*x' + A, */ /* where alpha is a real scalar, x is an n element vector and A is an */ /* n by n symmetric matrix. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the array A is to be referenced as */ /* follows: */ /* UPLO = 'U' or 'u' Only the upper triangular part of A */ /* is to be referenced. */ /* UPLO = 'L' or 'l' Only the lower triangular part of A */ /* is to be referenced. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading n by n */ /* upper triangular part of the array A must contain the upper */ /* triangular part of the symmetric matrix and the strictly */ /* lower triangular part of A is not referenced. On exit, the */ /* upper triangular part of the array A is overwritten by the */ /* upper triangular part of the updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the leading n by n */ /* lower triangular part of the array A must contain the lower */ /* triangular part of the symmetric matrix and the strictly */ /* upper triangular part of A is not referenced. On exit, the */ /* lower triangular part of the array A is overwritten by the */ /* lower triangular part of the updated matrix. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, n ). */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*lda < max(1,*n)) { info = 7; } if (info != 0) { _starpu_xerbla_("DSYR ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.) { return 0; } /* Set the start point in X if the increment is not unity. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through the triangular part */ /* of A. */ if (_starpu_lsame_(uplo, "U")) { /* Form A when A is stored in upper triangle. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = *alpha * x[j]; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] += x[i__] * temp; /* L10: */ } } /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; ix = kx; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] += x[ix] * temp; ix += *incx; /* L30: */ } } jx += *incx; /* L40: */ } } } else { /* Form A when A is stored in lower triangle. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = *alpha * x[j]; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] += x[i__] * temp; /* L50: */ } } /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; ix = jx; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] += x[ix] * temp; ix += *incx; /* L70: */ } } jx += *incx; /* L80: */ } } } return 0; /* End of DSYR . */ } /* _starpu_dsyr_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dsyr2.c000066400000000000000000000166201507764646700211650ustar00rootroot00000000000000/* dsyr2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsyr2_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, iy, jx, jy, kx, ky, info; doublereal temp1, temp2; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYR2 performs the symmetric rank 2 operation */ /* A := alpha*x*y' + alpha*y*x' + A, */ /* where alpha is a scalar, x and y are n element vectors and A is an n */ /* by n symmetric matrix. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the array A is to be referenced as */ /* follows: */ /* UPLO = 'U' or 'u' Only the upper triangular part of A */ /* is to be referenced. */ /* UPLO = 'L' or 'l' Only the lower triangular part of A */ /* is to be referenced. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. */ /* Unchanged on exit. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading n by n */ /* upper triangular part of the array A must contain the upper */ /* triangular part of the symmetric matrix and the strictly */ /* lower triangular part of A is not referenced. On exit, the */ /* upper triangular part of the array A is overwritten by the */ /* upper triangular part of the updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the leading n by n */ /* lower triangular part of the array A must contain the lower */ /* triangular part of the symmetric matrix and the strictly */ /* upper triangular part of A is not referenced. On exit, the */ /* lower triangular part of the array A is overwritten by the */ /* lower triangular part of the updated matrix. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, n ). */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --y; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*n)) { info = 9; } if (info != 0) { _starpu_xerbla_("DSYR2 ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.) { return 0; } /* Set up the start points in X and Y if the increments are not both */ /* unity. */ if (*incx != 1 || *incy != 1) { if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } jx = kx; jy = ky; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through the triangular part */ /* of A. */ if (_starpu_lsame_(uplo, "U")) { /* Form A when A is stored in the upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0. || y[j] != 0.) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * temp1 + y[i__] * temp2; /* L10: */ } } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0. || y[jy] != 0.) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = kx; iy = ky; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L30: */ } } jx += *incx; jy += *incy; /* L40: */ } } } else { /* Form A when A is stored in the lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0. || y[j] != 0.) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * temp1 + y[i__] * temp2; /* L50: */ } } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0. || y[jy] != 0.) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = jx; iy = jy; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L70: */ } } jx += *incx; jy += *incy; /* L80: */ } } } return 0; /* End of DSYR2 . */ } /* _starpu_dsyr2_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dsyr2k.c000066400000000000000000000266371507764646700213510ustar00rootroot00000000000000/* dsyr2k.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsyr2k_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, l, info; doublereal temp1, temp2; extern logical _starpu_lsame_(char *, char *); integer nrowa; logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYR2K performs one of the symmetric rank 2k operations */ /* C := alpha*A*B' + alpha*B*A' + beta*C, */ /* or */ /* C := alpha*A'*B + alpha*B'*A + beta*C, */ /* where alpha and beta are scalars, C is an n by n symmetric matrix */ /* and A and B are n by k matrices in the first case and k by n */ /* matrices in the second case. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the array C is to be referenced as */ /* follows: */ /* UPLO = 'U' or 'u' Only the upper triangular part of C */ /* is to be referenced. */ /* UPLO = 'L' or 'l' Only the lower triangular part of C */ /* is to be referenced. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + */ /* beta*C. */ /* TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + */ /* beta*C. */ /* TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + */ /* beta*C. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix C. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with TRANS = 'N' or 'n', K specifies the number */ /* of columns of the matrices A and B, and on entry with */ /* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number */ /* of rows of the matrices A and B. K must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ /* k when TRANS = 'N' or 'n', and is n otherwise. */ /* Before entry with TRANS = 'N' or 'n', the leading n by k */ /* part of the array A must contain the matrix A, otherwise */ /* the leading k by n part of the array A must contain the */ /* matrix A. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When TRANS = 'N' or 'n' */ /* then LDA must be at least max( 1, n ), otherwise LDA must */ /* be at least max( 1, k ). */ /* Unchanged on exit. */ /* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */ /* k when TRANS = 'N' or 'n', and is n otherwise. */ /* Before entry with TRANS = 'N' or 'n', the leading n by k */ /* part of the array B must contain the matrix B, otherwise */ /* the leading k by n part of the array B must contain the */ /* matrix B. */ /* Unchanged on exit. */ /* LDB - INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. When TRANS = 'N' or 'n' */ /* then LDB must be at least max( 1, n ), otherwise LDB must */ /* be at least max( 1, k ). */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. */ /* Unchanged on exit. */ /* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading n by n */ /* upper triangular part of the array C must contain the upper */ /* triangular part of the symmetric matrix and the strictly */ /* lower triangular part of C is not referenced. On exit, the */ /* upper triangular part of the array C is overwritten by the */ /* upper triangular part of the updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the leading n by n */ /* lower triangular part of the array C must contain the lower */ /* triangular part of the symmetric matrix and the strictly */ /* upper triangular part of C is not referenced. On exit, the */ /* lower triangular part of the array C is overwritten by the */ /* lower triangular part of the updated matrix. */ /* LDC - INTEGER. */ /* On entry, LDC specifies the first dimension of C as declared */ /* in the calling (sub) program. LDC must be at least */ /* max( 1, n ). */ /* Unchanged on exit. */ /* Level 3 Blas routine. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; /* Function Body */ if (_starpu_lsame_(trans, "N")) { nrowa = *n; } else { nrowa = *k; } upper = _starpu_lsame_(uplo, "U"); info = 0; if (! upper && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { info = 2; } else if (*n < 0) { info = 3; } else if (*k < 0) { info = 4; } else if (*lda < max(1,nrowa)) { info = 7; } else if (*ldb < max(1,nrowa)) { info = 9; } else if (*ldc < max(1,*n)) { info = 12; } if (info != 0) { _starpu_xerbla_("DSYR2K", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { return 0; } /* And when alpha.eq.zero. */ if (*alpha == 0.) { if (upper) { if (*beta == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L30: */ } /* L40: */ } } } else { if (*beta == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L50: */ } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L70: */ } /* L80: */ } } } return 0; } /* Start the operations. */ if (_starpu_lsame_(trans, "N")) { /* Form C := alpha*A*B' + alpha*B*A' + C. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L90: */ } } else if (*beta != 1.) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L100: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { temp1 = *alpha * b[j + l * b_dim1]; temp2 = *alpha * a[j + l * a_dim1]; i__3 = j; for (i__ = 1; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ i__ + l * a_dim1] * temp1 + b[i__ + l * b_dim1] * temp2; /* L110: */ } } /* L120: */ } /* L130: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L140: */ } } else if (*beta != 1.) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L150: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { temp1 = *alpha * b[j + l * b_dim1]; temp2 = *alpha * a[j + l * a_dim1]; i__3 = *n; for (i__ = j; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ i__ + l * a_dim1] * temp1 + b[i__ + l * b_dim1] * temp2; /* L160: */ } } /* L170: */ } /* L180: */ } } } else { /* Form C := alpha*A'*B + alpha*B'*A + C. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { temp1 = 0.; temp2 = 0.; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; /* L190: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * temp2; } else { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + *alpha * temp1 + *alpha * temp2; } /* L200: */ } /* L210: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { temp1 = 0.; temp2 = 0.; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; /* L220: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * temp2; } else { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + *alpha * temp1 + *alpha * temp2; } /* L230: */ } /* L240: */ } } } return 0; /* End of DSYR2K. */ } /* _starpu_dsyr2k_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dsyrk.c000066400000000000000000000234241507764646700212560ustar00rootroot00000000000000/* dsyrk.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsyrk_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, doublereal *c__, integer *ldc) { /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, l, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); integer nrowa; logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYRK performs one of the symmetric rank k operations */ /* C := alpha*A*A' + beta*C, */ /* or */ /* C := alpha*A'*A + beta*C, */ /* where alpha and beta are scalars, C is an n by n symmetric matrix */ /* and A is an n by k matrix in the first case and a k by n matrix */ /* in the second case. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the array C is to be referenced as */ /* follows: */ /* UPLO = 'U' or 'u' Only the upper triangular part of C */ /* is to be referenced. */ /* UPLO = 'L' or 'l' Only the lower triangular part of C */ /* is to be referenced. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. */ /* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. */ /* TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix C. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with TRANS = 'N' or 'n', K specifies the number */ /* of columns of the matrix A, and on entry with */ /* TRANS = 'T' or 't' or 'C' or 'c', K specifies the number */ /* of rows of the matrix A. K must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */ /* k when TRANS = 'N' or 'n', and is n otherwise. */ /* Before entry with TRANS = 'N' or 'n', the leading n by k */ /* part of the array A must contain the matrix A, otherwise */ /* the leading k by n part of the array A must contain the */ /* matrix A. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When TRANS = 'N' or 'n' */ /* then LDA must be at least max( 1, n ), otherwise LDA must */ /* be at least max( 1, k ). */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. */ /* Unchanged on exit. */ /* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading n by n */ /* upper triangular part of the array C must contain the upper */ /* triangular part of the symmetric matrix and the strictly */ /* lower triangular part of C is not referenced. On exit, the */ /* upper triangular part of the array C is overwritten by the */ /* upper triangular part of the updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the leading n by n */ /* lower triangular part of the array C must contain the lower */ /* triangular part of the symmetric matrix and the strictly */ /* upper triangular part of C is not referenced. On exit, the */ /* lower triangular part of the array C is overwritten by the */ /* lower triangular part of the updated matrix. */ /* LDC - INTEGER. */ /* On entry, LDC specifies the first dimension of C as declared */ /* in the calling (sub) program. LDC must be at least */ /* max( 1, n ). */ /* Unchanged on exit. */ /* Level 3 Blas routine. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; /* Function Body */ if (_starpu_lsame_(trans, "N")) { nrowa = *n; } else { nrowa = *k; } upper = _starpu_lsame_(uplo, "U"); info = 0; if (! upper && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { info = 2; } else if (*n < 0) { info = 3; } else if (*k < 0) { info = 4; } else if (*lda < max(1,nrowa)) { info = 7; } else if (*ldc < max(1,*n)) { info = 10; } if (info != 0) { _starpu_xerbla_("DSYRK ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { return 0; } /* And when alpha.eq.zero. */ if (*alpha == 0.) { if (upper) { if (*beta == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L30: */ } /* L40: */ } } } else { if (*beta == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L50: */ } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L70: */ } /* L80: */ } } } return 0; } /* Start the operations. */ if (_starpu_lsame_(trans, "N")) { /* Form C := alpha*A*A' + beta*C. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L90: */ } } else if (*beta != 1.) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L100: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (a[j + l * a_dim1] != 0.) { temp = *alpha * a[j + l * a_dim1]; i__3 = j; for (i__ = 1; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1]; /* L110: */ } } /* L120: */ } /* L130: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L140: */ } } else if (*beta != 1.) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; /* L150: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (a[j + l * a_dim1] != 0.) { temp = *alpha * a[j + l * a_dim1]; i__3 = *n; for (i__ = j; i__ <= i__3; ++i__) { c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1]; /* L160: */ } } /* L170: */ } /* L180: */ } } } else { /* Form C := alpha*A'*A + beta*C. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { temp = 0.; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; /* L190: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = *alpha * temp; } else { c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ i__ + j * c_dim1]; } /* L200: */ } /* L210: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { temp = 0.; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; /* L220: */ } if (*beta == 0.) { c__[i__ + j * c_dim1] = *alpha * temp; } else { c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ i__ + j * c_dim1]; } /* L230: */ } /* L240: */ } } } return 0; /* End of DSYRK . */ } /* _starpu_dsyrk_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dtbmv.c000066400000000000000000000261051507764646700212350ustar00rootroot00000000000000/* dtbmv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j, l, ix, jx, kx, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); integer kplus1; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTBMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := A'*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { info = 2; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { _starpu_xerbla_("DTBMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = _starpu_lsame_(diag, "N"); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (_starpu_lsame_(trans, "N")) { /* Form x := A*x. */ if (_starpu_lsame_(uplo, "U")) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = x[j]; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { x[i__] += temp * a[l + i__ + j * a_dim1]; /* L10: */ } if (nounit) { x[j] *= a[kplus1 + j * a_dim1]; } } /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { x[ix] += temp * a[l + i__ + j * a_dim1]; ix += *incx; /* L30: */ } if (nounit) { x[jx] *= a[kplus1 + j * a_dim1]; } } jx += *incx; if (j > *k) { kx += *incx; } /* L40: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { temp = x[j]; l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; i__4 = j + 1; for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { x[i__] += temp * a[l + i__ + j * a_dim1]; /* L50: */ } if (nounit) { x[j] *= a[j * a_dim1 + 1]; } } /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; l = 1 - j; /* Computing MIN */ i__4 = *n, i__1 = j + *k; i__3 = j + 1; for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { x[ix] += temp * a[l + i__ + j * a_dim1]; ix -= *incx; /* L70: */ } if (nounit) { x[jx] *= a[j * a_dim1 + 1]; } } jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L80: */ } } } } else { /* Form x := A'*x. */ if (_starpu_lsame_(uplo, "U")) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; l = kplus1 - j; if (nounit) { temp *= a[kplus1 + j * a_dim1]; } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { temp += a[l + i__ + j * a_dim1] * x[i__]; /* L90: */ } x[j] = temp; /* L100: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; kx -= *incx; ix = kx; l = kplus1 - j; if (nounit) { temp *= a[kplus1 + j * a_dim1]; } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { temp += a[l + i__ + j * a_dim1] * x[ix]; ix -= *incx; /* L110: */ } x[jx] = temp; jx -= *incx; /* L120: */ } } } else { if (*incx == 1) { i__3 = *n; for (j = 1; j <= i__3; ++j) { temp = x[j]; l = 1 - j; if (nounit) { temp *= a[j * a_dim1 + 1]; } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { temp += a[l + i__ + j * a_dim1] * x[i__]; /* L130: */ } x[j] = temp; /* L140: */ } } else { jx = kx; i__3 = *n; for (j = 1; j <= i__3; ++j) { temp = x[jx]; kx += *incx; ix = kx; l = 1 - j; if (nounit) { temp *= a[j * a_dim1 + 1]; } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { temp += a[l + i__ + j * a_dim1] * x[ix]; ix += *incx; /* L150: */ } x[jx] = temp; jx += *incx; /* L160: */ } } } } return 0; /* End of DTBMV . */ } /* _starpu_dtbmv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dtbsv.c000066400000000000000000000263601507764646700212460ustar00rootroot00000000000000/* dtbsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtbsv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j, l, ix, jx, kx, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); integer kplus1; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTBSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular band matrix, with ( k + 1 ) */ /* diagonals. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' A'*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { info = 2; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { _starpu_xerbla_("DTBSV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = _starpu_lsame_(diag, "N"); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed by sequentially with one pass through A. */ if (_starpu_lsame_(trans, "N")) { /* Form x := inv( A )*x. */ if (_starpu_lsame_(uplo, "U")) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { l = kplus1 - j; if (nounit) { x[j] /= a[kplus1 + j * a_dim1]; } temp = x[j]; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { x[i__] -= temp * a[l + i__ + j * a_dim1]; /* L10: */ } } /* L20: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { kx -= *incx; if (x[jx] != 0.) { ix = kx; l = kplus1 - j; if (nounit) { x[jx] /= a[kplus1 + j * a_dim1]; } temp = x[jx]; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { x[ix] -= temp * a[l + i__ + j * a_dim1]; ix -= *incx; /* L30: */ } } jx -= *incx; /* L40: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { l = 1 - j; if (nounit) { x[j] /= a[j * a_dim1 + 1]; } temp = x[j]; /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { x[i__] -= temp * a[l + i__ + j * a_dim1]; /* L50: */ } } /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { kx += *incx; if (x[jx] != 0.) { ix = kx; l = 1 - j; if (nounit) { x[jx] /= a[j * a_dim1 + 1]; } temp = x[jx]; /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { x[ix] -= temp * a[l + i__ + j * a_dim1]; ix += *incx; /* L70: */ } } jx += *incx; /* L80: */ } } } } else { /* Form x := inv( A')*x. */ if (_starpu_lsame_(uplo, "U")) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { temp -= a[l + i__ + j * a_dim1] * x[i__]; /* L90: */ } if (nounit) { temp /= a[kplus1 + j * a_dim1]; } x[j] = temp; /* L100: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = kx; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { temp -= a[l + i__ + j * a_dim1] * x[ix]; ix += *incx; /* L110: */ } if (nounit) { temp /= a[kplus1 + j * a_dim1]; } x[jx] = temp; jx += *incx; if (j > *k) { kx += *incx; } /* L120: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; i__4 = j + 1; for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { temp -= a[l + i__ + j * a_dim1] * x[i__]; /* L130: */ } if (nounit) { temp /= a[j * a_dim1 + 1]; } x[j] = temp; /* L140: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = kx; l = 1 - j; /* Computing MIN */ i__4 = *n, i__1 = j + *k; i__3 = j + 1; for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { temp -= a[l + i__ + j * a_dim1] * x[ix]; ix -= *incx; /* L150: */ } if (nounit) { temp /= a[j * a_dim1 + 1]; } x[jx] = temp; jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L160: */ } } } } return 0; /* End of DTBSV . */ } /* _starpu_dtbsv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dtpmv.c000066400000000000000000000205231507764646700212510ustar00rootroot00000000000000/* dtpmv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtpmv_(char *uplo, char *trans, char *diag, integer *n, doublereal *ap, doublereal *x, integer *incx) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, j, k, kk, ix, jx, kx, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTPMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular matrix, supplied in packed form. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := A'*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { info = 2; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { _starpu_xerbla_("DTPMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = _starpu_lsame_(diag, "N"); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (_starpu_lsame_(trans, "N")) { /* Form x:= A*x. */ if (_starpu_lsame_(uplo, "U")) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = x[j]; k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__] += temp * ap[k]; ++k; /* L10: */ } if (nounit) { x[j] *= ap[kk + j - 1]; } } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { x[ix] += temp * ap[k]; ix += *incx; /* L30: */ } if (nounit) { x[jx] *= ap[kk + j - 1]; } } jx += *incx; kk += j; /* L40: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { temp = x[j]; k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { x[i__] += temp * ap[k]; --k; /* L50: */ } if (nounit) { x[j] *= ap[kk - *n + j]; } } kk -= *n - j + 1; /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { x[ix] += temp * ap[k]; ix -= *incx; /* L70: */ } if (nounit) { x[jx] *= ap[kk - *n + j]; } } jx -= *incx; kk -= *n - j + 1; /* L80: */ } } } } else { /* Form x := A'*x. */ if (_starpu_lsame_(uplo, "U")) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; if (nounit) { temp *= ap[kk]; } k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { temp += ap[k] * x[i__]; --k; /* L90: */ } x[j] = temp; kk -= j; /* L100: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = jx; if (nounit) { temp *= ap[kk]; } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; temp += ap[k] * x[ix]; /* L110: */ } x[jx] = temp; jx -= *incx; kk -= j; /* L120: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; if (nounit) { temp *= ap[kk]; } k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { temp += ap[k] * x[i__]; ++k; /* L130: */ } x[j] = temp; kk += *n - j + 1; /* L140: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = jx; if (nounit) { temp *= ap[kk]; } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; temp += ap[k] * x[ix]; /* L150: */ } x[jx] = temp; jx += *incx; kk += *n - j + 1; /* L160: */ } } } } return 0; /* End of DTPMV . */ } /* _starpu_dtpmv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dtpsv.c000066400000000000000000000207731507764646700212660ustar00rootroot00000000000000/* dtpsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtpsv_(char *uplo, char *trans, char *diag, integer *n, doublereal *ap, doublereal *x, integer *incx) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, j, k, kk, ix, jx, kx, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTPSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular matrix, supplied in packed form. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' A'*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { info = 2; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { _starpu_xerbla_("DTPSV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = _starpu_lsame_(diag, "N"); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (_starpu_lsame_(trans, "N")) { /* Form x := inv( A )*x. */ if (_starpu_lsame_(uplo, "U")) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { if (nounit) { x[j] /= ap[kk]; } temp = x[j]; k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { x[i__] -= temp * ap[k]; --k; /* L10: */ } } kk -= j; /* L20: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.) { if (nounit) { x[jx] /= ap[kk]; } temp = x[jx]; ix = jx; i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; x[ix] -= temp * ap[k]; /* L30: */ } } jx -= *incx; kk -= j; /* L40: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { if (nounit) { x[j] /= ap[kk]; } temp = x[j]; k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { x[i__] -= temp * ap[k]; ++k; /* L50: */ } } kk += *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { if (nounit) { x[jx] /= ap[kk]; } temp = x[jx]; ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; x[ix] -= temp * ap[k]; /* L70: */ } } jx += *incx; kk += *n - j + 1; /* L80: */ } } } } else { /* Form x := inv( A' )*x. */ if (_starpu_lsame_(uplo, "U")) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { temp -= ap[k] * x[i__]; ++k; /* L90: */ } if (nounit) { temp /= ap[kk + j - 1]; } x[j] = temp; kk += j; /* L100: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { temp -= ap[k] * x[ix]; ix += *incx; /* L110: */ } if (nounit) { temp /= ap[kk + j - 1]; } x[jx] = temp; jx += *incx; kk += j; /* L120: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { temp -= ap[k] * x[i__]; --k; /* L130: */ } if (nounit) { temp /= ap[kk - *n + j]; } x[j] = temp; kk -= *n - j + 1; /* L140: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { temp -= ap[k] * x[ix]; ix -= *incx; /* L150: */ } if (nounit) { temp /= ap[kk - *n + j]; } x[jx] = temp; jx -= *incx; kk -= *n - j + 1; /* L160: */ } } } } return 0; /* End of DTPSV . */ } /* _starpu_dtpsv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dtrmm.c000066400000000000000000000265551507764646700212550ustar00rootroot00000000000000/* dtrmm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, k, info; doublereal temp; logical lside; extern logical _starpu_lsame_(char *, char *); integer nrowa; logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRMM performs one of the matrix-matrix operations */ /* B := alpha*op( A )*B, or B := alpha*B*op( A ), */ /* where alpha is a scalar, B is an m by n matrix, A is a unit, or */ /* non-unit, upper or lower triangular matrix and op( A ) is one of */ /* op( A ) = A or op( A ) = A'. */ /* Arguments */ /* ========== */ /* SIDE - CHARACTER*1. */ /* On entry, SIDE specifies whether op( A ) multiplies B from */ /* the left or right as follows: */ /* SIDE = 'L' or 'l' B := alpha*op( A )*B. */ /* SIDE = 'R' or 'r' B := alpha*B*op( A ). */ /* Unchanged on exit. */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix A is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANSA - CHARACTER*1. */ /* On entry, TRANSA specifies the form of op( A ) to be used in */ /* the matrix multiplication as follows: */ /* TRANSA = 'N' or 'n' op( A ) = A. */ /* TRANSA = 'T' or 't' op( A ) = A'. */ /* TRANSA = 'C' or 'c' op( A ) = A'. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit triangular */ /* as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of B. M must be at */ /* least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of B. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. When alpha is */ /* zero then A is not referenced and B need not be set before */ /* entry. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */ /* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ /* Before entry with UPLO = 'U' or 'u', the leading k by k */ /* upper triangular part of the array A must contain the upper */ /* triangular matrix and the strictly lower triangular part of */ /* A is not referenced. */ /* Before entry with UPLO = 'L' or 'l', the leading k by k */ /* lower triangular part of the array A must contain the lower */ /* triangular matrix and the strictly upper triangular part of */ /* A is not referenced. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced either, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When SIDE = 'L' or 'l' then */ /* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ /* then LDA must be at least max( 1, n ). */ /* Unchanged on exit. */ /* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ /* Before entry, the leading m by n part of the array B must */ /* contain the matrix B, and on exit is overwritten by the */ /* transformed matrix. */ /* LDB - INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. LDB must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Level 3 Blas routine. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ lside = _starpu_lsame_(side, "L"); if (lside) { nrowa = *m; } else { nrowa = *n; } nounit = _starpu_lsame_(diag, "N"); upper = _starpu_lsame_(uplo, "U"); info = 0; if (! lside && ! _starpu_lsame_(side, "R")) { info = 1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { info = 2; } else if (! _starpu_lsame_(transa, "N") && ! _starpu_lsame_(transa, "T") && ! _starpu_lsame_(transa, "C")) { info = 3; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 4; } else if (*m < 0) { info = 5; } else if (*n < 0) { info = 6; } else if (*lda < max(1,nrowa)) { info = 9; } else if (*ldb < max(1,*m)) { info = 11; } if (info != 0) { _starpu_xerbla_("DTRMM ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { return 0; } /* And when alpha.eq.zero. */ if (*alpha == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L10: */ } /* L20: */ } return 0; } /* Start the operations. */ if (lside) { if (_starpu_lsame_(transa, "N")) { /* Form B := alpha*A*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (k = 1; k <= i__2; ++k) { if (b[k + j * b_dim1] != 0.) { temp = *alpha * b[k + j * b_dim1]; i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] += temp * a[i__ + k * a_dim1]; /* L30: */ } if (nounit) { temp *= a[k + k * a_dim1]; } b[k + j * b_dim1] = temp; } /* L40: */ } /* L50: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (k = *m; k >= 1; --k) { if (b[k + j * b_dim1] != 0.) { temp = *alpha * b[k + j * b_dim1]; b[k + j * b_dim1] = temp; if (nounit) { b[k + j * b_dim1] *= a[k + k * a_dim1]; } i__2 = *m; for (i__ = k + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] += temp * a[i__ + k * a_dim1]; /* L60: */ } } /* L70: */ } /* L80: */ } } } else { /* Form B := alpha*A'*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { temp = b[i__ + j * b_dim1]; if (nounit) { temp *= a[i__ + i__ * a_dim1]; } i__2 = i__ - 1; for (k = 1; k <= i__2; ++k) { temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; /* L90: */ } b[i__ + j * b_dim1] = *alpha * temp; /* L100: */ } /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = b[i__ + j * b_dim1]; if (nounit) { temp *= a[i__ + i__ * a_dim1]; } i__3 = *m; for (k = i__ + 1; k <= i__3; ++k) { temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; /* L120: */ } b[i__ + j * b_dim1] = *alpha * temp; /* L130: */ } /* L140: */ } } } } else { if (_starpu_lsame_(transa, "N")) { /* Form B := alpha*B*A. */ if (upper) { for (j = *n; j >= 1; --j) { temp = *alpha; if (nounit) { temp *= a[j + j * a_dim1]; } i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; /* L150: */ } i__1 = j - 1; for (k = 1; k <= i__1; ++k) { if (a[k + j * a_dim1] != 0.) { temp = *alpha * a[k + j * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] += temp * b[i__ + k * b_dim1]; /* L160: */ } } /* L170: */ } /* L180: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = *alpha; if (nounit) { temp *= a[j + j * a_dim1]; } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; /* L190: */ } i__2 = *n; for (k = j + 1; k <= i__2; ++k) { if (a[k + j * a_dim1] != 0.) { temp = *alpha * a[k + j * a_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] += temp * b[i__ + k * b_dim1]; /* L200: */ } } /* L210: */ } /* L220: */ } } } else { /* Form B := alpha*B*A'. */ if (upper) { i__1 = *n; for (k = 1; k <= i__1; ++k) { i__2 = k - 1; for (j = 1; j <= i__2; ++j) { if (a[j + k * a_dim1] != 0.) { temp = *alpha * a[j + k * a_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] += temp * b[i__ + k * b_dim1]; /* L230: */ } } /* L240: */ } temp = *alpha; if (nounit) { temp *= a[k + k * a_dim1]; } if (temp != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; /* L250: */ } } /* L260: */ } } else { for (k = *n; k >= 1; --k) { i__1 = *n; for (j = k + 1; j <= i__1; ++j) { if (a[j + k * a_dim1] != 0.) { temp = *alpha * a[j + k * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] += temp * b[i__ + k * b_dim1]; /* L270: */ } } /* L280: */ } temp = *alpha; if (nounit) { temp *= a[k + k * a_dim1]; } if (temp != 1.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; /* L290: */ } } /* L300: */ } } } } return 0; /* End of DTRMM . */ } /* _starpu_dtrmm_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dtrmv.c000066400000000000000000000205511507764646700212540ustar00rootroot00000000000000/* dtrmv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtrmv_(char *uplo, char *trans, char *diag, integer *n, doublereal *a, integer *lda, doublereal *x, integer *incx) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, jx, kx, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular matrix. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := A'*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading n by n */ /* upper triangular part of the array A must contain the upper */ /* triangular matrix and the strictly lower triangular part of */ /* A is not referenced. */ /* Before entry with UPLO = 'L' or 'l', the leading n by n */ /* lower triangular part of the array A must contain the lower */ /* triangular matrix and the strictly upper triangular part of */ /* A is not referenced. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced either, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, n ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { info = 2; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 3; } else if (*n < 0) { info = 4; } else if (*lda < max(1,*n)) { info = 6; } else if (*incx == 0) { info = 8; } if (info != 0) { _starpu_xerbla_("DTRMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = _starpu_lsame_(diag, "N"); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (_starpu_lsame_(trans, "N")) { /* Form x := A*x. */ if (_starpu_lsame_(uplo, "U")) { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = x[j]; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__] += temp * a[i__ + j * a_dim1]; /* L10: */ } if (nounit) { x[j] *= a[j + j * a_dim1]; } } /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { x[ix] += temp * a[i__ + j * a_dim1]; ix += *incx; /* L30: */ } if (nounit) { x[jx] *= a[j + j * a_dim1]; } } jx += *incx; /* L40: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { temp = x[j]; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { x[i__] += temp * a[i__ + j * a_dim1]; /* L50: */ } if (nounit) { x[j] *= a[j + j * a_dim1]; } } /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { x[ix] += temp * a[i__ + j * a_dim1]; ix -= *incx; /* L70: */ } if (nounit) { x[jx] *= a[j + j * a_dim1]; } } jx -= *incx; /* L80: */ } } } } else { /* Form x := A'*x. */ if (_starpu_lsame_(uplo, "U")) { if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; if (nounit) { temp *= a[j + j * a_dim1]; } for (i__ = j - 1; i__ >= 1; --i__) { temp += a[i__ + j * a_dim1] * x[i__]; /* L90: */ } x[j] = temp; /* L100: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = jx; if (nounit) { temp *= a[j + j * a_dim1]; } for (i__ = j - 1; i__ >= 1; --i__) { ix -= *incx; temp += a[i__ + j * a_dim1] * x[ix]; /* L110: */ } x[jx] = temp; jx -= *incx; /* L120: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; if (nounit) { temp *= a[j + j * a_dim1]; } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { temp += a[i__ + j * a_dim1] * x[i__]; /* L130: */ } x[j] = temp; /* L140: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = jx; if (nounit) { temp *= a[j + j * a_dim1]; } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { ix += *incx; temp += a[i__ + j * a_dim1] * x[ix]; /* L150: */ } x[jx] = temp; jx += *incx; /* L160: */ } } } } return 0; /* End of DTRMV . */ } /* _starpu_dtrmv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dtrsm.c000066400000000000000000000301641507764646700212520ustar00rootroot00000000000000/* dtrsm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, k, info; doublereal temp; logical lside; extern logical _starpu_lsame_(char *, char *); integer nrowa; logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRSM solves one of the matrix equations */ /* op( A )*X = alpha*B, or X*op( A ) = alpha*B, */ /* where alpha is a scalar, X and B are m by n matrices, A is a unit, or */ /* non-unit, upper or lower triangular matrix and op( A ) is one of */ /* op( A ) = A or op( A ) = A'. */ /* The matrix X is overwritten on B. */ /* Arguments */ /* ========== */ /* SIDE - CHARACTER*1. */ /* On entry, SIDE specifies whether op( A ) appears on the left */ /* or right of X as follows: */ /* SIDE = 'L' or 'l' op( A )*X = alpha*B. */ /* SIDE = 'R' or 'r' X*op( A ) = alpha*B. */ /* Unchanged on exit. */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix A is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANSA - CHARACTER*1. */ /* On entry, TRANSA specifies the form of op( A ) to be used in */ /* the matrix multiplication as follows: */ /* TRANSA = 'N' or 'n' op( A ) = A. */ /* TRANSA = 'T' or 't' op( A ) = A'. */ /* TRANSA = 'C' or 'c' op( A ) = A'. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit triangular */ /* as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of B. M must be at */ /* least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of B. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. When alpha is */ /* zero then A is not referenced and B need not be set before */ /* entry. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */ /* when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ /* Before entry with UPLO = 'U' or 'u', the leading k by k */ /* upper triangular part of the array A must contain the upper */ /* triangular matrix and the strictly lower triangular part of */ /* A is not referenced. */ /* Before entry with UPLO = 'L' or 'l', the leading k by k */ /* lower triangular part of the array A must contain the lower */ /* triangular matrix and the strictly upper triangular part of */ /* A is not referenced. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced either, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When SIDE = 'L' or 'l' then */ /* LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ /* then LDA must be at least max( 1, n ). */ /* Unchanged on exit. */ /* B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */ /* Before entry, the leading m by n part of the array B must */ /* contain the right-hand side matrix B, and on exit is */ /* overwritten by the solution matrix X. */ /* LDB - INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. LDB must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Level 3 Blas routine. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ lside = _starpu_lsame_(side, "L"); if (lside) { nrowa = *m; } else { nrowa = *n; } nounit = _starpu_lsame_(diag, "N"); upper = _starpu_lsame_(uplo, "U"); info = 0; if (! lside && ! _starpu_lsame_(side, "R")) { info = 1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { info = 2; } else if (! _starpu_lsame_(transa, "N") && ! _starpu_lsame_(transa, "T") && ! _starpu_lsame_(transa, "C")) { info = 3; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 4; } else if (*m < 0) { info = 5; } else if (*n < 0) { info = 6; } else if (*lda < max(1,nrowa)) { info = 9; } else if (*ldb < max(1,*m)) { info = 11; } if (info != 0) { _starpu_xerbla_("DTRSM ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { return 0; } /* And when alpha.eq.zero. */ if (*alpha == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L10: */ } /* L20: */ } return 0; } /* Start the operations. */ if (lside) { if (_starpu_lsame_(transa, "N")) { /* Form B := alpha*inv( A )*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*alpha != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] ; /* L30: */ } } for (k = *m; k >= 1; --k) { if (b[k + j * b_dim1] != 0.) { if (nounit) { b[k + j * b_dim1] /= a[k + k * a_dim1]; } i__2 = k - 1; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ i__ + k * a_dim1]; /* L40: */ } } /* L50: */ } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*alpha != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] ; /* L70: */ } } i__2 = *m; for (k = 1; k <= i__2; ++k) { if (b[k + j * b_dim1] != 0.) { if (nounit) { b[k + j * b_dim1] /= a[k + k * a_dim1]; } i__3 = *m; for (i__ = k + 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ i__ + k * a_dim1]; /* L80: */ } } /* L90: */ } /* L100: */ } } } else { /* Form B := alpha*inv( A' )*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = *alpha * b[i__ + j * b_dim1]; i__3 = i__ - 1; for (k = 1; k <= i__3; ++k) { temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; /* L110: */ } if (nounit) { temp /= a[i__ + i__ * a_dim1]; } b[i__ + j * b_dim1] = temp; /* L120: */ } /* L130: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { temp = *alpha * b[i__ + j * b_dim1]; i__2 = *m; for (k = i__ + 1; k <= i__2; ++k) { temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; /* L140: */ } if (nounit) { temp /= a[i__ + i__ * a_dim1]; } b[i__ + j * b_dim1] = temp; /* L150: */ } /* L160: */ } } } } else { if (_starpu_lsame_(transa, "N")) { /* Form B := alpha*B*inv( A ). */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*alpha != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] ; /* L170: */ } } i__2 = j - 1; for (k = 1; k <= i__2; ++k) { if (a[k + j * a_dim1] != 0.) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ i__ + k * b_dim1]; /* L180: */ } } /* L190: */ } if (nounit) { temp = 1. / a[j + j * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; /* L200: */ } } /* L210: */ } } else { for (j = *n; j >= 1; --j) { if (*alpha != 1.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] ; /* L220: */ } } i__1 = *n; for (k = j + 1; k <= i__1; ++k) { if (a[k + j * a_dim1] != 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ i__ + k * b_dim1]; /* L230: */ } } /* L240: */ } if (nounit) { temp = 1. / a[j + j * a_dim1]; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; /* L250: */ } } /* L260: */ } } } else { /* Form B := alpha*B*inv( A' ). */ if (upper) { for (k = *n; k >= 1; --k) { if (nounit) { temp = 1. / a[k + k * a_dim1]; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; /* L270: */ } } i__1 = k - 1; for (j = 1; j <= i__1; ++j) { if (a[j + k * a_dim1] != 0.) { temp = a[j + k * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] -= temp * b[i__ + k * b_dim1]; /* L280: */ } } /* L290: */ } if (*alpha != 1.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] ; /* L300: */ } } /* L310: */ } } else { i__1 = *n; for (k = 1; k <= i__1; ++k) { if (nounit) { temp = 1. / a[k + k * a_dim1]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; /* L320: */ } } i__2 = *n; for (j = k + 1; j <= i__2; ++j) { if (a[j + k * a_dim1] != 0.) { temp = a[j + k * a_dim1]; i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b[i__ + j * b_dim1] -= temp * b[i__ + k * b_dim1]; /* L330: */ } } /* L340: */ } if (*alpha != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] ; /* L350: */ } } /* L360: */ } } } } return 0; /* End of DTRSM . */ } /* _starpu_dtrsm_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dtrsv.c000066400000000000000000000210231507764646700212550ustar00rootroot00000000000000/* dtrsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtrsv_(char *uplo, char *trans, char *diag, integer *n, doublereal *a, integer *lda, doublereal *x, integer *incx) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, ix, jx, kx, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular matrix. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Arguments */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' A'*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading n by n */ /* upper triangular part of the array A must contain the upper */ /* triangular matrix and the strictly lower triangular part of */ /* A is not referenced. */ /* Before entry with UPLO = 'L' or 'l', the leading n by n */ /* lower triangular part of the array A must contain the lower */ /* triangular matrix and the strictly upper triangular part of */ /* A is not referenced. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced either, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, n ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; /* Function Body */ info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { info = 1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { info = 2; } else if (! _starpu_lsame_(diag, "U") && ! _starpu_lsame_(diag, "N")) { info = 3; } else if (*n < 0) { info = 4; } else if (*lda < max(1,*n)) { info = 6; } else if (*incx == 0) { info = 8; } if (info != 0) { _starpu_xerbla_("DTRSV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = _starpu_lsame_(diag, "N"); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (_starpu_lsame_(trans, "N")) { /* Form x := inv( A )*x. */ if (_starpu_lsame_(uplo, "U")) { if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { if (nounit) { x[j] /= a[j + j * a_dim1]; } temp = x[j]; for (i__ = j - 1; i__ >= 1; --i__) { x[i__] -= temp * a[i__ + j * a_dim1]; /* L10: */ } } /* L20: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.) { if (nounit) { x[jx] /= a[j + j * a_dim1]; } temp = x[jx]; ix = jx; for (i__ = j - 1; i__ >= 1; --i__) { ix -= *incx; x[ix] -= temp * a[i__ + j * a_dim1]; /* L30: */ } } jx -= *incx; /* L40: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { if (nounit) { x[j] /= a[j + j * a_dim1]; } temp = x[j]; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { x[i__] -= temp * a[i__ + j * a_dim1]; /* L50: */ } } /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { if (nounit) { x[jx] /= a[j + j * a_dim1]; } temp = x[jx]; ix = jx; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { ix += *incx; x[ix] -= temp * a[i__ + j * a_dim1]; /* L70: */ } } jx += *incx; /* L80: */ } } } } else { /* Form x := inv( A' )*x. */ if (_starpu_lsame_(uplo, "U")) { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { temp -= a[i__ + j * a_dim1] * x[i__]; /* L90: */ } if (nounit) { temp /= a[j + j * a_dim1]; } x[j] = temp; /* L100: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = kx; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { temp -= a[i__ + j * a_dim1] * x[ix]; ix += *incx; /* L110: */ } if (nounit) { temp /= a[j + j * a_dim1]; } x[jx] = temp; jx += *incx; /* L120: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { temp -= a[i__ + j * a_dim1] * x[i__]; /* L130: */ } if (nounit) { temp /= a[j + j * a_dim1]; } x[j] = temp; /* L140: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = kx; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { temp -= a[i__ + j * a_dim1] * x[ix]; ix -= *incx; /* L150: */ } if (nounit) { temp /= a[j + j * a_dim1]; } x[jx] = temp; jx -= *incx; /* L160: */ } } } } return 0; /* End of DTRSV . */ } /* _starpu_dtrsv_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dzasum.c000066400000000000000000000034631507764646700214260ustar00rootroot00000000000000/* dzasum.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dzasum_(integer *n, doublecomplex *zx, integer *incx) { /* System generated locals */ integer i__1; doublereal ret_val; /* Local variables */ integer i__, ix; doublereal stemp; extern doublereal _starpu_dcabs1_(doublecomplex *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* takes the sum of the absolute values. */ /* jack dongarra, 3/11/78. */ /* modified 3/93 to return if incx .le. 0. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* Parameter adjustments */ --zx; /* Function Body */ ret_val = 0.; stemp = 0.; if (*n <= 0 || *incx <= 0) { return ret_val; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ ix = 1; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { stemp += _starpu_dcabs1_(&zx[ix]); ix += *incx; /* L10: */ } ret_val = stemp; return ret_val; /* code for increment equal to 1 */ L20: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { stemp += _starpu_dcabs1_(&zx[i__]); /* L30: */ } ret_val = stemp; return ret_val; } /* _starpu_dzasum_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/dznrm2.c000066400000000000000000000050231507764646700213310ustar00rootroot00000000000000/* dznrm2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dznrm2_(integer *n, doublecomplex *x, integer *incx) { /* System generated locals */ integer i__1, i__2, i__3; doublereal ret_val, d__1; /* Builtin functions */ double d_imag(doublecomplex *), sqrt(doublereal); /* Local variables */ integer ix; doublereal ssq, temp, norm, scale; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DZNRM2 returns the euclidean norm of a vector via the function */ /* name, so that */ /* DZNRM2 := sqrt( conjg( x' )*x ) */ /* -- This version written on 25-October-1982. */ /* Modified on 14-October-1993 to inline the call to ZLASSQ. */ /* Sven Hammarling, Nag Ltd. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --x; /* Function Body */ if (*n < 1 || *incx < 1) { norm = 0.; } else { scale = 0.; ssq = 1.; /* The following loop is equivalent to this call to the LAPACK */ /* auxiliary routine: */ /* CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) */ i__1 = (*n - 1) * *incx + 1; i__2 = *incx; for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { i__3 = ix; if (x[i__3].r != 0.) { i__3 = ix; temp = (d__1 = x[i__3].r, abs(d__1)); if (scale < temp) { /* Computing 2nd power */ d__1 = scale / temp; ssq = ssq * (d__1 * d__1) + 1.; scale = temp; } else { /* Computing 2nd power */ d__1 = temp / scale; ssq += d__1 * d__1; } } if (d_imag(&x[ix]) != 0.) { temp = (d__1 = d_imag(&x[ix]), abs(d__1)); if (scale < temp) { /* Computing 2nd power */ d__1 = scale / temp; ssq = ssq * (d__1 * d__1) + 1.; scale = temp; } else { /* Computing 2nd power */ d__1 = temp / scale; ssq += d__1 * d__1; } } /* L10: */ } norm = scale * sqrt(ssq); } ret_val = norm; return ret_val; /* End of DZNRM2. */ } /* _starpu_dznrm2_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/idamax.c000066400000000000000000000037741507764646700213730ustar00rootroot00000000000000/* idamax.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_idamax_(integer *n, doublereal *dx, integer *incx) { /* System generated locals */ integer ret_val, i__1; doublereal d__1; /* Local variables */ integer i__, ix; doublereal dmax__; /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* finds the index of element having max. absolute value. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 3/93 to return if incx .le. 0. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* Parameter adjustments */ --dx; /* Function Body */ ret_val = 0; if (*n < 1 || *incx <= 0) { return ret_val; } ret_val = 1; if (*n == 1) { return ret_val; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ ix = 1; dmax__ = abs(dx[1]); ix += *incx; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if ((d__1 = dx[ix], abs(d__1)) <= dmax__) { goto L5; } ret_val = i__; dmax__ = (d__1 = dx[ix], abs(d__1)); L5: ix += *incx; /* L10: */ } return ret_val; /* code for increment equal to 1 */ L20: dmax__ = abs(dx[1]); i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if ((d__1 = dx[i__], abs(d__1)) <= dmax__) { goto L30; } ret_val = i__; dmax__ = (d__1 = dx[i__], abs(d__1)); L30: ; } return ret_val; } /* _starpu_idamax_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/izamax.c000066400000000000000000000040341507764646700214070ustar00rootroot00000000000000/* izamax.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_izamax_(integer *n, doublecomplex *zx, integer *incx) { /* System generated locals */ integer ret_val, i__1; /* Local variables */ integer i__, ix; doublereal smax; extern doublereal _starpu_dcabs1_(doublecomplex *); /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* finds the index of element having max. absolute value. */ /* jack dongarra, 1/15/85. */ /* modified 3/93 to return if incx .le. 0. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* Parameter adjustments */ --zx; /* Function Body */ ret_val = 0; if (*n < 1 || *incx <= 0) { return ret_val; } ret_val = 1; if (*n == 1) { return ret_val; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ ix = 1; smax = _starpu_dcabs1_(&zx[1]); ix += *incx; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if (_starpu_dcabs1_(&zx[ix]) <= smax) { goto L5; } ret_val = i__; smax = _starpu_dcabs1_(&zx[ix]); L5: ix += *incx; /* L10: */ } return ret_val; /* code for increment equal to 1 */ L20: smax = _starpu_dcabs1_(&zx[1]); i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if (_starpu_dcabs1_(&zx[i__]) <= smax) { goto L30; } ret_val = i__; smax = _starpu_dcabs1_(&zx[i__]); L30: ; } return ret_val; } /* _starpu_izamax_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/lsame.c000066400000000000000000000056011507764646700212200ustar00rootroot00000000000000/* lsame.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" logical _starpu_lsame_(char *ca, char *cb) { /* System generated locals */ logical ret_val; /* Local variables */ integer inta, intb, zcode; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ /* case. */ /* Arguments */ /* ========= */ /* CA (input) CHARACTER*1 */ /* CB (input) CHARACTER*1 */ /* CA and CB specify the single characters to be compared. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* Test if the characters are equal */ ret_val = *(unsigned char *)ca == *(unsigned char *)cb; if (ret_val) { return ret_val; } /* Now test for equivalence if both characters are alphabetic. */ zcode = 'Z'; /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ /* machines, on which ICHAR returns a value with bit 8 set. */ /* ICHAR('A') on Prime machines returns 193 which is the same as */ /* ICHAR('A') on an EBCDIC machine. */ inta = *(unsigned char *)ca; intb = *(unsigned char *)cb; if (zcode == 90 || zcode == 122) { /* ASCII is assumed - ZCODE is the ASCII code of either lower or */ /* upper case 'Z'. */ if (inta >= 97 && inta <= 122) { inta += -32; } if (intb >= 97 && intb <= 122) { intb += -32; } } else if (zcode == 233 || zcode == 169) { /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ /* upper case 'Z'. */ if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta >= 162 && inta <= 169) { inta += 64; } if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb >= 162 && intb <= 169) { intb += 64; } } else if (zcode == 218 || zcode == 250) { /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ /* plus 128 of either lower or upper case 'Z'. */ if (inta >= 225 && inta <= 250) { inta += -32; } if (intb >= 225 && intb <= 250) { intb += -32; } } ret_val = inta == intb; /* RETURN */ /* End of LSAME */ return ret_val; } /* _starpu_lsame_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/xerbla.c000066400000000000000000000043151507764646700213750ustar00rootroot00000000000000/* xerbla.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" #include "stdio.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_xerbla_(char *srname, integer *info) { /* Format strings */ static char fmt_9999[] = "(\002 ** On entry to \002,a,\002 parameter num" "ber \002,i2,\002 had \002,\002an illegal value\002)"; /* Builtin functions */ integer s_wsfe(cilist *), i_len_trim(char *, ftnlen), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Fortran I/O blocks */ static cilist io___1 = { 0, 6, 0, fmt_9999, 0 }; /* -- LAPACK auxiliary routine (preliminary version) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* XERBLA is an error handler for the LAPACK routines. */ /* It is called by an LAPACK routine if an input parameter has an */ /* invalid value. A message is printed and execution stops. */ /* Installers may consider modifying the STOP statement in order to */ /* call system-specific exception-handling facilities. */ /* Arguments */ /* ========= */ /* SRNAME (input) CHARACTER*(*) */ /* The name of the routine which called XERBLA. */ /* INFO (input) INTEGER */ /* The position of the invalid parameter in the parameter list */ /* of the calling routine. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ printf("** On entry to %s, parameter number %ld had an illegal value\n", srname, *info); /* End of XERBLA */ return 0; } /* _starpu_xerbla_ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/SRC/xerbla_array.c000066400000000000000000000064361507764646700226010ustar00rootroot00000000000000/* _starpu_xerbla_array.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * srname_len__, integer *info, ftnlen srname_array_len) { /* System generated locals */ integer i__1, i__2, i__3; /* Builtin functions */ /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer i_len(char *, ftnlen); /* Local variables */ integer i__; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); char srname[32]; /* -- LAPACK auxiliary routine (version 3.0) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* September 19, 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* XERBLA_ARRAY assists other languages in calling XERBLA, the LAPACK */ /* and BLAS error handler. Rather than taking a Fortran string argument */ /* as the function's name, XERBLA_ARRAY takes an array of single */ /* characters along with the array's length. XERBLA_ARRAY then copies */ /* up to 32 characters of that array into a Fortran string and passes */ /* that to XERBLA. If called with a non-positive SRNAME_LEN, */ /* XERBLA_ARRAY will call XERBLA with a string of all blank characters. */ /* Say some macro or other device makes XERBLA_ARRAY available to C99 */ /* by a name lapack_xerbla and with a common Fortran calling convention. */ /* Then a C99 program could invoke XERBLA via: */ /* { */ /* int flen = strlen(__func__); */ /* lapack_xerbla(__func__, &flen, &info); */ /* } */ /* Providing XERBLA_ARRAY is not necessary for intercepting LAPACK */ /* errors. XERBLA_ARRAY calls XERBLA. */ /* Arguments */ /* ========= */ /* SRNAME_ARRAY (input) CHARACTER(1) array, dimension (SRNAME_LEN) */ /* The name of the routine which called XERBLA_ARRAY. */ /* SRNAME_LEN (input) INTEGER */ /* The length of the name in SRNAME_ARRAY. */ /* INFO (input) INTEGER */ /* The position of the invalid parameter in the parameter list */ /* of the calling routine. */ /* ===================================================================== */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --srname_array__; /* Function Body */ s_copy(srname, "", (ftnlen)32, (ftnlen)0); /* Computing MIN */ i__2 = *srname_len__, i__3 = i_len(srname, (ftnlen)32); i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { *(unsigned char *)&srname[i__ - 1] = *(unsigned char *)& srname_array__[i__]; } _starpu_xerbla_(srname, info); return 0; } /* _starpu_xerbla_array__ */ starpu-1.4.9+dfsg/min-dgels/base/BLAS/WRAP/000077500000000000000000000000001507764646700200735ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/BLAS/WRAP/Makefile000066400000000000000000000007751507764646700215440ustar00rootroot00000000000000TOPDIR=../.. include $(TOPDIR)/make.inc ####################################################################### # This is the makefile to create a wrapper for the CBLAS. ####################################################################### all: libcblaswr.a libcblaswr.a: cblaswr.o $(ARCH) $(ARCHFLAGS) libcblaswr.a cblaswr.o $(RANLIB) libcblaswr.a libfblaswr.a: fblaswr.o $(ARCH) $(ARCHFLAGS) libfblaswr.a fblaswr.o $(RANLIB) libfblaswr.a clean: rm -f *.o *.a .c.o: $(CC) $(CFLAGS) -c $*.c starpu-1.4.9+dfsg/min-dgels/base/BLAS/WRAP/README000066400000000000000000000024271507764646700207600ustar00rootroot00000000000000f2c'd BLAS wrapper The f2c translated BLAS interfaces used by CLAPACK (and other f2c'd codes) unfortunately often don't quite match the Fortran interfaces on various platforms. Consequently, it is difficult to use f2c'd codes with high performance BLAS routines, which may substantially impede the performance of these codes. These simple wrappers provide a way around this difficulty by providing f2c style interfaces (preceded with "f2c_" to avoid name collisions) that call through to an underlying CBLAS or F77 BLAS. f2c.h: The f2c header file blaswrap.h: A header file to be included in f2c codes that will use the wrapper. Just #include it at the top of an f2c generated file. cblaswr.c: A wrapper around the CBLAS interface. This interface is provided, for instance, by ATLAS (see www.netlib.org/atlas) fblaswr.c: A sample wrapper around a conventional Fortran BLAS interface. This works on a Sun platform, but will require substantial tinkering on platforms with different Fortran calling conventions. cblas.h: A header file for the CBLAS interface. fblaswr.h: A header file for the (Sun-style) Fortran BLAS interface [cz]rotg.c: Complex Givens rotation routines. For some reason, C interfaces to these routines were not specified in the CBLAS interface. starpu-1.4.9+dfsg/min-dgels/base/BLAS/WRAP/cblas.h000066400000000000000000000777531507764646700213530ustar00rootroot00000000000000#ifndef CBLAS_H #define CBLAS_H #include #define CBLAS_INDEX size_t enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; int cblas_errprn(int ierr, int info, char *form, ...); /* * =========================================================================== * Prototypes for level 1 BLAS functions (complex are recast as routines) * =========================================================================== */ float cblas_sdsdot(const int N, const float alpha, const float *X, const int incX, const float *Y, const int incY); double cblas_dsdot(const int N, const float *X, const int incX, const float *Y, const int incY); float cblas_sdot(const int N, const float *X, const int incX, const float *Y, const int incY); double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY); /* * Functions having prefixes Z and C only */ void _starpu_cblas_cdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu); void _starpu_cblas_cdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc); void _starpu_cblas_zdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu); void _starpu_cblas_zdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc); /* * Functions having prefixes S D SC DZ */ float cblas_snrm2(const int N, const float *X, const int incX); float cblas_sasum(const int N, const float *X, const int incX); double cblas_dnrm2(const int N, const double *X, const int incX); double cblas_dasum(const int N, const double *X, const int incX); float cblas_scnrm2(const int N, const void *X, const int incX); float cblas_scasum(const int N, const void *X, const int incX); double cblas_dznrm2(const int N, const void *X, const int incX); double cblas_dzasum(const int N, const void *X, const int incX); /* * Functions having standard 4 prefixes (S D C Z) */ CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX); CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX); CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); /* * =========================================================================== * Prototypes for level 0 BLAS routines * =========================================================================== */ void cblas_srotg(float a, float b, float c, float s); void cblas_crotg(complex a, complex b, complex c, float s); void cblas_drotg(double a, double b, double c, double s); void cblas_zrotg(doublecomplex a, doublecomplex b, doublecomplex c, double s); /* * =========================================================================== * Prototypes for level 1 BLAS routines * =========================================================================== */ /* * Routines with standard 4 prefixes (s, d, c, z) */ void cblas_sswap(const int N, float *X, const int incX, float *Y, const int incY); void cblas_scopy(const int N, const float *X, const int incX, float *Y, const int incY); void cblas_saxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY); void cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY); void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY); void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY); void cblas_cswap(const int N, void *X, const int incX, void *Y, const int incY); void cblas_ccopy(const int N, const void *X, const int incX, void *Y, const int incY); void cblas_caxpy(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY); void cblas_zswap(const int N, void *X, const int incX, void *Y, const int incY); void cblas_zcopy(const int N, const void *X, const int incX, void *Y, const int incY); void cblas_zaxpy(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY); /* * Routines with S and D prefix only */ void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P); void cblas_srot(const int N, float *X, const int incX, float *Y, const int incY, const float c, const float s); void cblas_srotm(const int N, float *X, const int incX, float *Y, const int incY, const float *P); void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P); void cblas_drot(const int N, double *X, const int incX, double *Y, const int incY, const double c, const double s); void cblas_drotm(const int N, double *X, const int incX, double *Y, const int incY, const double *P); /* * Routines with S D C Z CS and ZD prefixes */ void cblas_sscal(const int N, const float alpha, float *X, const int incX); void cblas_dscal(const int N, const double alpha, double *X, const int incX); void cblas_cscal(const int N, const void *alpha, void *X, const int incX); void cblas_zscal(const int N, const void *alpha, void *X, const int incX); void cblas_csscal(const int N, const float alpha, void *X, const int incX); void cblas_zdscal(const int N, const double alpha, void *X, const int incX); /* * =========================================================================== * Prototypes for level 2 BLAS * =========================================================================== */ /* * Routines with standard 4 prefixes (S, D, C, Z) */ void cblas_sgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY); void cblas_sgbmv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const float alpha, const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY); void cblas_strmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const float *A, const int lda, float *X, const int incX); void cblas_stbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const float *A, const int lda, float *X, const int incX); void cblas_stpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const float *Ap, float *X, const int incX); void cblas_strsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const float *A, const int lda, float *X, const int incX); void cblas_stbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const float *A, const int lda, float *X, const int incX); void cblas_stpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const float *Ap, float *X, const int incX); void cblas_dgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY); void cblas_dgbmv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY); void cblas_dtrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const double *A, const int lda, double *X, const int incX); void cblas_dtbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const double *A, const int lda, double *X, const int incX); void cblas_dtpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const double *Ap, double *X, const int incX); void cblas_dtrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const double *A, const int lda, double *X, const int incX); void cblas_dtbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const double *A, const int lda, double *X, const int incX); void cblas_dtpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const double *Ap, double *X, const int incX); void cblas_cgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_cgbmv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_ctrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *A, const int lda, void *X, const int incX); void cblas_ctbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const void *A, const int lda, void *X, const int incX); void cblas_ctpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *Ap, void *X, const int incX); void cblas_ctrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *A, const int lda, void *X, const int incX); void cblas_ctbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const void *A, const int lda, void *X, const int incX); void cblas_ctpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *Ap, void *X, const int incX); void cblas_zgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_zgbmv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_ztrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *A, const int lda, void *X, const int incX); void cblas_ztbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const void *A, const int lda, void *X, const int incX); void cblas_ztpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *Ap, void *X, const int incX); void cblas_ztrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *A, const int lda, void *X, const int incX); void cblas_ztbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const void *A, const int lda, void *X, const int incX); void cblas_ztpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *Ap, void *X, const int incX); /* * Routines with S and D prefixes only */ void cblas_ssymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY); void cblas_ssbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int K, const float alpha, const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY); void cblas_sspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const float *Ap, const float *X, const int incX, const float beta, float *Y, const int incY); void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N, const float alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda); void cblas_ssyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const float *X, const int incX, float *A, const int lda); void cblas_sspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const float *X, const int incX, float *Ap); void cblas_ssyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda); void cblas_sspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const float *X, const int incX, const float *Y, const int incY, float *A); void cblas_dsymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY); void cblas_dsbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int K, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY); void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const double *Ap, const double *X, const int incX, const double beta, double *Y, const int incY); void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda); void cblas_dsyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const double *X, const int incX, double *A, const int lda); void cblas_dspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const double *X, const int incX, double *Ap); void cblas_dsyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda); void cblas_dspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A); /* * Routines with C and Z prefixes only */ void cblas_chemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_chbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int K, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_chpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *Ap, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_cgeru(const enum CBLAS_ORDER Order, const int M, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *A, const int lda); void cblas_cgerc(const enum CBLAS_ORDER Order, const int M, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *A, const int lda); void cblas_cher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const void *X, const int incX, void *A, const int lda); void cblas_chpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const void *X, const int incX, void *A); void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *A, const int lda); void cblas_chpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *Ap); void cblas_zhemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int K, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *Ap, const void *X, const int incX, const void *beta, void *Y, const int incY); void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *A, const int lda); void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *A, const int lda); void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const void *X, const int incX, void *A, const int lda); void cblas_zhpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const void *X, const int incX, void *A); void cblas_zher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *A, const int lda); void cblas_zhpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *X, const int incX, const void *Y, const int incY, void *Ap); /* * =========================================================================== * Prototypes for level 3 BLAS * =========================================================================== */ /* * Routines with standard 4 prefixes (S, D, C, Z) */ void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc); void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc); void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc); void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc); void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb); void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb); void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc); void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc); void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc); void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc); void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb); void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb); void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc); void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc); void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc); void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc); void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb); void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb); void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc); void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc); void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc); void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc); void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb); void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb); /* * Routines with prefixes C and Z only */ void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc); void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const void *A, const int lda, const float beta, void *C, const int ldc); void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const float beta, void *C, const int ldc); void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc); void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const void *A, const int lda, const double beta, void *C, const int ldc); void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const double beta, void *C, const int ldc); int cblas_errprn(int ierr, int info, char *form, ...); #endif starpu-1.4.9+dfsg/min-dgels/base/BLAS/WRAP/cblaswr.c000066400000000000000000001225031507764646700216770ustar00rootroot00000000000000#include "f2c.h" #include "cblas.h" /* #define CBLAS_INDEX size_t enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; */ #define CVT_TRANSPOSE(c) \ (((c) == 'N' || (c) == 'n') ? CblasNoTrans : \ ((c) == 'T' || (c) == 't') ? CblasTrans : \ ((c) == 'C' || (c) == 'c') ? CblasConjTrans : \ -1) #define CVT_UPLO(c) \ (((c) == 'U' || (c) == 'u') ? CblasUpper : \ ((c) == 'L' || (c) == 'l') ? CblasLower : \ -1) #define CVT_DIAG(c) \ (((c) == 'U' || (c) == 'u') ? CblasUnit : \ ((c) == 'N' || (c) == 'n') ? CblasNonUnit : \ -1) #define CVT_SIDE(c) \ (((c) == 'L' || (c) == 'l') ? CblasLeft : \ ((c) == 'R' || (c) == 'r') ? CblasRight : \ -1) /* * =========================================================================== * Prototypes for level 1 BLAS functions (complex are recast as routines) * =========================================================================== */ doublereal f2c_sdot(integer* N, real* X, integer* incX, real* Y, integer* incY) { return cblas_sdot(*N, X, *incX, Y, *incY); } doublereal f2c_ddot(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY) { return cblas_ddot(*N, X, *incX, Y, *incY); } /* * Functions having prefixes Z and C only */ void f2c_cdotu(complex* retval, integer* N, complex* X, integer* incX, complex* Y, integer* incY) { _starpu_cblas_cdotu_sub(*N, X, *incX, Y, *incY, retval); } void f2c_cdotc(complex* retval, integer* N, complex* X, integer* incX, complex* Y, integer* incY) { _starpu_cblas_cdotc_sub(*N, X, *incX, Y, *incY, retval); } void f2c_zdotu(doublecomplex* retval, integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { _starpu_cblas_zdotu_sub(*N, X, *incX, Y, *incY, retval); } void f2c_zdotc(doublecomplex* retval, integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { _starpu_cblas_zdotc_sub(*N, X, *incX, Y, *incY, retval); } /* * Functions having prefixes S D SC DZ */ doublereal f2c_snrm2(integer* N, real* X, integer* incX) { return cblas_snrm2(*N, X, *incX); } doublereal f2c_sasum(integer* N, real* X, integer* incX) { return cblas_sasum(*N, X, *incX); } doublereal f2c_dnrm2(integer* N, doublereal* X, integer* incX) { return cblas_dnrm2(*N, X, *incX); } doublereal f2c_dasum(integer* N, doublereal* X, integer* incX) { return cblas_dasum(*N, X, *incX); } doublereal f2c_scnrm2(integer* N, complex* X, integer* incX) { return cblas_scnrm2(*N, X, *incX); } doublereal f2c_scasum(integer* N, complex* X, integer* incX) { return cblas_scasum(*N, X, *incX); } doublereal f2c_dznrm2(integer* N, doublecomplex* X, integer* incX) { return cblas_dznrm2(*N, X, *incX); } doublereal f2c_dzasum(integer* N, doublecomplex* X, integer* incX) { return cblas_dzasum(*N, X, *incX); } /* * Functions having standard 4 prefixes (S D C Z) */ integer f2c_isamax(integer* N, real* X, integer* incX) { if (*N == 0) return 0; return (integer) cblas_isamax(*N, X, *incX) + 1; } integer f2c_idamax(integer* N, doublereal* X, integer* incX) { if (*N == 0) return 0; return (integer) cblas_idamax(*N, X, *incX) + 1; } integer f2c_icamax(integer* N, complex* X, integer* incX) { if (*N == 0) return 0; return (integer) cblas_icamax(*N, X, *incX) + 1; } integer f2c_izamax(integer* N, doublecomplex* X, integer* incX) { if (*N == 0) return 0; return (integer) cblas_izamax(*N, X, *incX) + 1; } /* * =========================================================================== * Prototypes for level 0 BLAS routines * =========================================================================== */ int f2c_srotg(real* a, real* b, real* c, real* s) { cblas_srotg(*a, *b, *c, *s); return 0; } int f2c_crotg(complex* CA, complex* CB, complex* C, real* S) { cblas_crotg(*CA, *CB, *C, *S); return 0; } int f2c_drotg(doublereal* a, doublereal* b, doublereal* c, doublereal* s) { cblas_drotg(*a, *b, *c, *s); return 0; } int f2c_zrotg(doublecomplex* CA, doublecomplex* CB, doublecomplex* C, doublereal* S) { cblas_zrotg(*CA, *CB, *C, *S); return 0; } /* * =========================================================================== * Prototypes for level 1 BLAS routines * =========================================================================== */ /* * Routines with standard 4 prefixes (s, d, c, z) */ int f2c_sswap(integer* N, real* X, integer* incX, real* Y, integer* incY) { cblas_sswap(*N, X, *incX, Y, *incY); return 0; } int f2c_scopy(integer* N, real* X, integer* incX, real* Y, integer* incY) { cblas_scopy(*N, X, *incX, Y, *incY); return 0; } int f2c_saxpy(integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY) { cblas_saxpy(*N, *alpha, X, *incX, Y, *incY); return 0; } int f2c_dswap(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY) { cblas_dswap(*N, X, *incX, Y, *incY); return 0; } int f2c_dcopy(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY) { cblas_dcopy(*N, X, *incX, Y, *incY); return 0; } int f2c_daxpy(integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY) { cblas_daxpy(*N, *alpha, X, *incX, Y, *incY); return 0; } int f2c_cswap(integer* N, complex* X, integer* incX, complex* Y, integer* incY) { cblas_cswap(*N, X, *incX, Y, *incY); return 0; } int f2c_ccopy(integer* N, complex* X, integer* incX, complex* Y, integer* incY) { cblas_ccopy(*N, X, *incX, Y, *incY); return 0; } int f2c_caxpy(integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY) { cblas_caxpy(*N, alpha, X, *incX, Y, *incY); return 0; } int f2c_zswap(integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { cblas_zswap(*N, X, *incX, Y, *incY); return 0; } int f2c_zcopy(integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { cblas_zcopy(*N, X, *incX, Y, *incY); return 0; } int f2c_zaxpy(integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { cblas_zaxpy(*N, alpha, X, *incX, Y, *incY); return 0; } /* * Routines with S and D prefix only */ int f2c_srot(integer* N, real* X, integer* incX, real* Y, integer* incY, real* c, real* s) { cblas_srot(*N, X, *incX, Y, *incY, *c, *s); return 0; } int f2c_drot(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* c, doublereal* s) { cblas_drot(*N, X, *incX, Y, *incY, *c, *s); return 0; } /* * Routines with S D C Z CS and ZD prefixes */ int f2c_sscal(integer* N, real* alpha, real* X, integer* incX) { cblas_sscal(*N, *alpha, X, *incX); return 0; } int f2c_dscal(integer* N, doublereal* alpha, doublereal* X, integer* incX) { cblas_dscal(*N, *alpha, X, *incX); return 0; } int f2c_cscal(integer* N, complex* alpha, complex* X, integer* incX) { cblas_cscal(*N, alpha, X, *incX); return 0; } int f2c_zscal(integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX) { cblas_zscal(*N, alpha, X, *incX); return 0; } int f2c_csscal(integer* N, real* alpha, complex* X, integer* incX) { cblas_csscal(*N, *alpha, X, *incX); return 0; } int f2c_zdscal(integer* N, doublereal* alpha, doublecomplex* X, integer* incX) { cblas_zdscal(*N, *alpha, X, *incX); return 0; } /* * =========================================================================== * Prototypes for level 2 BLAS * =========================================================================== */ /* * Routines with standard 4 prefixes (S, D, C, Z) */ int f2c_sgemv(char* trans, integer* M, integer* N, real* alpha, real* A, integer* lda, real* X, integer* incX, real* beta, real* Y, integer* incY) { cblas_sgemv(CblasColMajor, CVT_TRANSPOSE(*trans), *M, *N, *alpha, A, *lda, X, *incX, *beta, Y, *incY); return 0; } int f2c_sgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, real *alpha, real *A, integer *lda, real *X, integer *incX, real *beta, real *Y, integer *incY) { cblas_sgbmv(CblasColMajor, CVT_TRANSPOSE(*trans), *M, *N, *KL, *KU, *alpha, A, *lda, X, *incX, *beta, Y, *incY); return 0; } int f2c_strmv(char* uplo, char *trans, char* diag, integer *N, real *A, integer *lda, real *X, integer *incX) { cblas_strmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, A, *lda, X, *incX); return 0; } int f2c_stbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, real* A, integer* lda, real* X, integer* incX) { cblas_stbmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, *K, A, *lda, X, *incX); return 0; } int f2c_stpmv(char* uplo, char* trans, char* diag, integer* N, real* Ap, real* X, integer* incX) { cblas_stpmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, Ap, X, *incX); return 0; } int f2c_strsv(char* uplo, char* trans, char* diag, integer* N, real* A, integer* lda, real* X, integer* incX) { cblas_strsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, A, *lda, X, *incX); return 0; } int f2c_stbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, real* A, integer* lda, real* X, integer* incX) { cblas_stbsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, *K, A, *lda, X, *incX); return 0; } int f2c_stpsv(char* uplo, char* trans, char* diag, integer* N, real* Ap, real* X, integer* incX) { cblas_stpsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, Ap, X, *incX); return 0; } int f2c_dgemv(char* trans, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY) { cblas_dgemv(CblasColMajor, CVT_TRANSPOSE(*trans), *M, *N, *alpha, A, *lda, X, *incX, *beta, Y, *incY); return 0; } int f2c_dgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, doublereal *alpha, doublereal *A, integer *lda, doublereal *X, integer *incX, doublereal *beta, doublereal *Y, integer *incY) { cblas_dgbmv(CblasColMajor, CVT_TRANSPOSE(*trans), *M, *N, *KL, *KU, *alpha, A, *lda, X, *incX, *beta, Y, *incY); return 0; } int f2c_dtrmv(char* uplo, char *trans, char* diag, integer *N, doublereal *A, integer *lda, doublereal *X, integer *incX) { cblas_dtrmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, A, *lda, X, *incX); return 0; } int f2c_dtbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, doublereal* A, integer* lda, doublereal* X, integer* incX) { cblas_dtbmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, *K, A, *lda, X, *incX); return 0; } int f2c_dtpmv(char* uplo, char* trans, char* diag, integer* N, doublereal* Ap, doublereal* X, integer* incX) { cblas_dtpmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, Ap, X, *incX); return 0; } int f2c_dtrsv(char* uplo, char* trans, char* diag, integer* N, doublereal* A, integer* lda, doublereal* X, integer* incX) { cblas_dtrsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, A, *lda, X, *incX); return 0; } int f2c_dtbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, doublereal* A, integer* lda, doublereal* X, integer* incX) { cblas_dtbsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, *K, A, *lda, X, *incX); return 0; } int f2c_dtpsv(char* uplo, char* trans, char* diag, integer* N, doublereal* Ap, doublereal* X, integer* incX) { cblas_dtpsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, Ap, X, *incX); return 0; } int f2c_cgemv(char* trans, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* X, integer* incX, complex* beta, complex* Y, integer* incY) { cblas_cgemv(CblasColMajor, CVT_TRANSPOSE(*trans), *M, *N, alpha, A, *lda, X, *incX, beta, Y, *incY); return 0; } int f2c_cgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, complex *alpha, complex *A, integer *lda, complex *X, integer *incX, complex *beta, complex *Y, integer *incY) { cblas_cgbmv(CblasColMajor, CVT_TRANSPOSE(*trans), *M, *N, *KL, *KU, alpha, A, *lda, X, *incX, beta, Y, *incY); return 0; } int f2c_ctrmv(char* uplo, char *trans, char* diag, integer *N, complex *A, integer *lda, complex *X, integer *incX) { cblas_ctrmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, A, *lda, X, *incX); return 0; } int f2c_ctbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, complex* A, integer* lda, complex* X, integer* incX) { cblas_ctbmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, *K, A, *lda, X, *incX); return 0; } int f2c_ctpmv(char* uplo, char* trans, char* diag, integer* N, complex* Ap, complex* X, integer* incX) { cblas_ctpmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, Ap, X, *incX); return 0; } int f2c_ctrsv(char* uplo, char* trans, char* diag, integer* N, complex* A, integer* lda, complex* X, integer* incX) { cblas_ctrsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, A, *lda, X, *incX); return 0; } int f2c_ctbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, complex* A, integer* lda, complex* X, integer* incX) { cblas_ctbsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, *K, A, *lda, X, *incX); return 0; } int f2c_ctpsv(char* uplo, char* trans, char* diag, integer* N, complex* Ap, complex* X, integer* incX) { cblas_ctpsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, Ap, X, *incX); return 0; } int f2c_zgemv(char* trans, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY) { cblas_zgemv(CblasColMajor, CVT_TRANSPOSE(*trans), *M, *N, alpha, A, *lda, X, *incX, beta, Y, *incY); return 0; } int f2c_zgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, doublecomplex *alpha, doublecomplex *A, integer *lda, doublecomplex *X, integer *incX, doublecomplex *beta, doublecomplex *Y, integer *incY) { cblas_zgbmv(CblasColMajor, CVT_TRANSPOSE(*trans), *M, *N, *KL, *KU, alpha, A, *lda, X, *incX, beta, Y, *incY); return 0; } int f2c_ztrmv(char* uplo, char *trans, char* diag, integer *N, doublecomplex *A, integer *lda, doublecomplex *X, integer *incX) { cblas_ztrmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, A, *lda, X, *incX); return 0; } int f2c_ztbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX) { cblas_ztbmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, *K, A, *lda, X, *incX); return 0; } int f2c_ztpmv(char* uplo, char* trans, char* diag, integer* N, doublecomplex* Ap, doublecomplex* X, integer* incX) { cblas_ztpmv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, Ap, X, *incX); return 0; } int f2c_ztrsv(char* uplo, char* trans, char* diag, integer* N, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX) { cblas_ztrsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, A, *lda, X, *incX); return 0; } int f2c_ztbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX) { cblas_ztbsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, *K, A, *lda, X, *incX); return 0; } int f2c_ztpsv(char* uplo, char* trans, char* diag, integer* N, doublecomplex* Ap, doublecomplex* X, integer* incX) { cblas_ztpsv(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *N, Ap, X, *incX); return 0; } /* * Routines with S and D prefixes only */ int f2c_ssymv(char* uplo, integer* N, real* alpha, real* A, integer* lda, real* X, integer* incX, real* beta, real* Y, integer* incY) { cblas_ssymv(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, A, *lda, X, *incX, *beta, Y, *incY); return 0; } int f2c_ssbmv(char* uplo, integer* N, integer* K, real* alpha, real* A, integer* lda, real* X, integer* incX, real* beta, real* Y, integer* incY) { cblas_ssbmv(CblasColMajor, CVT_UPLO(*uplo), *N, *K, *alpha, A, *lda, X, *incX, *beta, Y, *incY); return 0; } int f2c_sspmv(char* uplo, integer* N, real* alpha, real* Ap, real* X, integer* incX, real* beta, real* Y, integer* incY) { cblas_sspmv(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, Ap, X, *incX, *beta, Y, *incY); return 0; } int f2c_sger(integer* M, integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY, real* A, integer* lda) { cblas_sger(CblasColMajor, *M, *N, *alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_ssyr(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* A, integer* lda) { cblas_ssyr(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, A, *lda); return 0; } int f2c_sspr(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* Ap) { cblas_sspr(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, Ap); return 0; } int f2c_ssyr2(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY, real* A, integer* lda) { cblas_ssyr2(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_sspr2(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY, real* A) { cblas_sspr2(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, Y, *incY, A); return 0; } int f2c_dsymv(char* uplo, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY) { cblas_dsymv(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, A, *lda, X, *incX, *beta, Y, *incY); return 0; } int f2c_dsbmv(char* uplo, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY) { cblas_dsbmv(CblasColMajor, CVT_UPLO(*uplo), *N, *K, *alpha, A, *lda, X, *incX, *beta, Y, *incY); return 0; } int f2c_dspmv(char* uplo, integer* N, doublereal* alpha, doublereal* Ap, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY) { cblas_dspmv(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, Ap, X, *incX, *beta, Y, *incY); return 0; } int f2c_dger(integer* M, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* A, integer* lda) { cblas_dger(CblasColMajor, *M, *N, *alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_dsyr(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* A, integer* lda) { cblas_dsyr(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, A, *lda); return 0; } int f2c_dspr(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Ap) { cblas_dspr(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, Ap); return 0; } int f2c_dsyr2(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* A, integer* lda) { cblas_dsyr2(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_dspr2(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* A) { cblas_dspr2(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, Y, *incY, A); return 0; } /* * Routines with C and Z prefixes only */ int f2c_chemv(char* uplo, integer* N, complex* alpha, complex* A, integer* lda, complex* X, integer* incX, complex* beta, complex* Y, integer* incY) { cblas_chemv(CblasColMajor, CVT_UPLO(*uplo), *N, alpha, A, *lda, X, *incX, beta, Y, *incY); return 0; } int f2c_chbmv(char* uplo, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* X, integer* incX, complex* beta, complex* Y, integer* incY) { cblas_chbmv(CblasColMajor, CVT_UPLO(*uplo), *N, *K, alpha, A, *lda, X, *incX, beta, Y, *incY); return 0; } int f2c_chpmv(char* uplo, integer* N, complex* alpha, complex* Ap, complex* X, integer* incX, complex* beta, complex* Y, integer* incY) { cblas_chpmv(CblasColMajor, CVT_UPLO(*uplo), *N, alpha, Ap, X, *incX, beta, Y, *incY); return 0; } int f2c_cgeru(integer* M, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* A, integer* lda) { cblas_cgeru(CblasColMajor, *M, *N, alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_cgerc(integer* M, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* A, integer* lda) { cblas_cgerc(CblasColMajor, *M, *N, alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_cher(char* uplo, integer* N, real* alpha, complex* X, integer* incX, complex* A, integer* lda) { cblas_cher(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, A, *lda); return 0; } int f2c_chpr(char* uplo, integer* N, real* alpha, complex* X, integer* incX, complex* Ap) { cblas_chpr(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, Ap); return 0; } int f2c_cher2(char* uplo, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* A, integer* lda) { cblas_cher2(CblasColMajor, CVT_UPLO(*uplo), *N, alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_chpr2(char* uplo, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* Ap) { cblas_chpr2(CblasColMajor, CVT_UPLO(*uplo), *N, alpha, X, *incX, Y, *incY, Ap); return 0; } int f2c_zhemv(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY) { cblas_zhemv(CblasColMajor, CVT_UPLO(*uplo), *N, alpha, A, *lda, X, *incX, beta, Y, *incY); return 0; } int f2c_zhbmv(char* uplo, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY) { cblas_zhbmv(CblasColMajor, CVT_UPLO(*uplo), *N, *K, alpha, A, *lda, X, *incX, beta, Y, *incY); return 0; } int f2c_zhpmv(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* Ap, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY) { cblas_zhpmv(CblasColMajor, CVT_UPLO(*uplo), *N, alpha, Ap, X, *incX, beta, Y, *incY); return 0; } int f2c_zgeru(integer* M, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* A, integer* lda) { cblas_zgeru(CblasColMajor, *M, *N, alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_zgerc(integer* M, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* A, integer* lda) { cblas_zgerc(CblasColMajor, *M, *N, alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_zher(char* uplo, integer* N, doublereal* alpha, doublecomplex* X, integer* incX, doublecomplex* A, integer* lda) { cblas_zher(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, A, *lda); return 0; } int f2c_zhpr(char* uplo, integer* N, doublereal* alpha, doublecomplex* X, integer* incX, doublecomplex* Ap) { cblas_zhpr(CblasColMajor, CVT_UPLO(*uplo), *N, *alpha, X, *incX, Ap); return 0; } int f2c_zher2(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* A, integer* lda) { cblas_zher2(CblasColMajor, CVT_UPLO(*uplo), *N, alpha, X, *incX, Y, *incY, A, *lda); return 0; } int f2c_zhpr2(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* Ap) { cblas_zhpr2(CblasColMajor, CVT_UPLO(*uplo), *N, alpha, X, *incX, Y, *incY, Ap); return 0; } /* * =========================================================================== * Prototypes for level 3 BLAS * =========================================================================== */ /* * Routines with standard 4 prefixes (S, D, C, Z) */ int f2c_sgemm(char* transA, char* transB, integer* M, integer* N, integer* K, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc) { cblas_sgemm(CblasColMajor, CVT_TRANSPOSE(*transA), CVT_TRANSPOSE(*transB), *M, *N, *K, *alpha, A, *lda, B, *ldb, *beta, C, *ldc); return 0; } int f2c_ssymm(char* side, char* uplo, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc) { cblas_ssymm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, *alpha, A, *lda, B, *ldb, *beta, C, *ldc); return 0; } int f2c_ssyrk(char* uplo, char* trans, integer* N, integer* K, real* alpha, real* A, integer* lda, real* beta, real* C, integer* ldc) { cblas_ssyrk(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, *alpha, A, *lda, *beta, C, *ldc); return 0; } int f2c_ssyr2k(char* uplo, char* trans, integer* N, integer* K, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc) { cblas_ssyr2k(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, *alpha, A, *lda, B, *ldb, *beta, C, *ldc); return 0; } int f2c_strmm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb) { cblas_strmm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *M, *N, *alpha, A, *lda, B, *ldb); return 0; } int f2c_strsm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb) { cblas_strsm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *M, *N, *alpha, A, *lda, B, *ldb); return 0; } int f2c_dgemm(char* transA, char* transB, integer* M, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc) { cblas_dgemm(CblasColMajor, CVT_TRANSPOSE(*transA), CVT_TRANSPOSE(*transB), *M, *N, *K, *alpha, A, *lda, B, *ldb, *beta, C, *ldc); return 0; } int f2c_dsymm(char* side, char* uplo, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc) { cblas_dsymm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, *alpha, A, *lda, B, *ldb, *beta, C, *ldc); return 0; } int f2c_dsyrk(char* uplo, char* trans, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* beta, doublereal* C, integer* ldc) { cblas_dsyrk(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, *alpha, A, *lda, *beta, C, *ldc); return 0; } int f2c_dsyr2k(char* uplo, char* trans, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc) { cblas_dsyr2k(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, *alpha, A, *lda, B, *ldb, *beta, C, *ldc); return 0; } int f2c_dtrmm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb) { cblas_dtrmm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *M, *N, *alpha, A, *lda, B, *ldb); return 0; } int f2c_dtrsm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb) { cblas_dtrsm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *M, *N, *alpha, A, *lda, B, *ldb); return 0; } int f2c_cgemm(char* transA, char* transB, integer* M, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc) { cblas_cgemm(CblasColMajor, CVT_TRANSPOSE(*transA), CVT_TRANSPOSE(*transB), *M, *N, *K, alpha, A, *lda, B, *ldb, beta, C, *ldc); return 0; } int f2c_csymm(char* side, char* uplo, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc) { cblas_csymm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, alpha, A, *lda, B, *ldb, beta, C, *ldc); return 0; } int f2c_csyrk(char* uplo, char* trans, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* beta, complex* C, integer* ldc) { cblas_csyrk(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, alpha, A, *lda, beta, C, *ldc); return 0; } int f2c_csyr2k(char* uplo, char* trans, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc) { cblas_csyr2k(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, alpha, A, *lda, B, *ldb, beta, C, *ldc); return 0; } int f2c_ctrmm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb) { cblas_ctrmm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *M, *N, alpha, A, *lda, B, *ldb); return 0; } int f2c_ctrsm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb) { cblas_ctrsm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *M, *N, alpha, A, *lda, B, *ldb); return 0; } int f2c_zgemm(char* transA, char* transB, integer* M, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc) { cblas_zgemm(CblasColMajor, CVT_TRANSPOSE(*transA), CVT_TRANSPOSE(*transB), *M, *N, *K, alpha, A, *lda, B, *ldb, beta, C, *ldc); return 0; } int f2c_zsymm(char* side, char* uplo, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc) { cblas_zsymm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, alpha, A, *lda, B, *ldb, beta, C, *ldc); return 0; } int f2c_zsyrk(char* uplo, char* trans, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* beta, doublecomplex* C, integer* ldc) { cblas_zsyrk(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, alpha, A, *lda, beta, C, *ldc); return 0; } int f2c_zsyr2k(char* uplo, char* trans, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc) { cblas_zsyr2k(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, alpha, A, *lda, B, *ldb, beta, C, *ldc); return 0; } int f2c_ztrmm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb) { cblas_ztrmm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *M, *N, alpha, A, *lda, B, *ldb); return 0; } int f2c_ztrsm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb) { cblas_ztrsm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), CVT_DIAG(*diag), *M, *N, alpha, A, *lda, B, *ldb); return 0; } /* * Routines with prefixes C and Z only */ int f2c_chemm(char* side, char* uplo, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc) { cblas_chemm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, alpha, A, *lda, B, *ldb, beta, C, *ldc); return 0; } int f2c_cherk(char* uplo, char* trans, integer* N, integer* K, real* alpha, complex* A, integer* lda, real* beta, complex* C, integer* ldc) { cblas_cherk(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, *alpha, A, *lda, *beta, C, *ldc); return 0; } int f2c_cher2k(char* uplo, char* trans, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, real* beta, complex* C, integer* ldc) { cblas_cher2k(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, alpha, A, *lda, B, *ldb, *beta, C, *ldc); return 0; } int f2c_zhemm(char* side, char* uplo, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc) { cblas_zhemm(CblasColMajor, CVT_SIDE(*side), CVT_UPLO(*uplo), *M, *N, alpha, A, *lda, B, *ldb, beta, C, *ldc); return 0; } int f2c_zherk(char* uplo, char* trans, integer* N, integer* K, doublereal* alpha, doublecomplex* A, integer* lda, doublereal* beta, doublecomplex* C, integer* ldc) { cblas_zherk(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, *alpha, A, *lda, *beta, C, *ldc); return 0; } int f2c_zher2k(char* uplo, char* trans, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublereal* beta, doublecomplex* C, integer* ldc) { cblas_zher2k(CblasColMajor, CVT_UPLO(*uplo), CVT_TRANSPOSE(*trans), *N, *K, alpha, A, *lda, B, *ldb, *beta, C, *ldc); return 0; } starpu-1.4.9+dfsg/min-dgels/base/BLAS/WRAP/fblaswr.c000066400000000000000000001044341507764646700217050ustar00rootroot00000000000000#include "f2c.h" #include "fblaswr.h" /* * =========================================================================== * Prototypes for level 1 BLAS functions (complex are recast as routines) * =========================================================================== */ doublereal f2c_sdot(integer* N, real* X, integer* incX, real* Y, integer* incY) { return _starpu_sdot_(N, X, incX, Y, incY); } doublereal f2c_ddot(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY) { return _starpu_ddot_(N, X, incX, Y, incY); } /* * Functions having prefixes Z and C only */ void f2c_cdotu(complex* retval, integer* N, complex* X, integer* incX, complex* Y, integer* incY) { _starpu_cdotu_(retval, N, X, incX, Y, incY); } void f2c_cdotc(complex* retval, integer* N, complex* X, integer* incX, complex* Y, integer* incY) { _starpu_cdotc_(retval, N, X, incX, Y, incY); } void f2c_zdotu(doublecomplex* retval, integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { _starpu_zdotu_(retval, N, X, incX, Y, incY); } void f2c_zdotc(doublecomplex* retval, integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { _starpu_zdotc_(retval, N, X, incX, Y, incY); } /* * Functions having prefixes S D SC DZ */ doublereal f2c_snrm2(integer* N, real* X, integer* incX) { return _starpu_snrm2_(N, X, incX); } doublereal f2c_sasum(integer* N, real* X, integer* incX) { return _starpu_sasum_(N, X, incX); } doublereal f2c_dnrm2(integer* N, doublereal* X, integer* incX) { return _starpu_dnrm2_(N, X, incX); } doublereal f2c_dasum(integer* N, doublereal* X, integer* incX) { return _starpu_dasum_(N, X, incX); } doublereal f2c_scnrm2(integer* N, complex* X, integer* incX) { return _starpu_scnrm2_(N, X, incX); } doublereal f2c_scasum(integer* N, complex* X, integer* incX) { return _starpu_scasum_(N, X, incX); } doublereal f2c_dznrm2(integer* N, doublecomplex* X, integer* incX) { return _starpu_dznrm2_(N, X, incX); } doublereal f2c_dzasum(integer* N, doublecomplex* X, integer* incX) { return _starpu_dzasum_(N, X, incX); } /* * Functions having standard 4 prefixes (S D C Z) */ integer f2c_isamax(integer* N, real* X, integer* incX) { return _starpu_isamax_(N, X, incX); } integer f2c_idamax(integer* N, doublereal* X, integer* incX) { return _starpu_idamax_(N, X, incX); } integer f2c_icamax(integer* N, complex* X, integer* incX) { return _starpu_icamax_(N, X, incX); } integer f2c_izamax(integer* N, doublecomplex* X, integer* incX) { return _starpu_izamax_(N, X, incX); } /* * =========================================================================== * Prototypes for level 0 BLAS routines * =========================================================================== */ int f2c_srotg(real* a, real* b, real* c, real* s) { _starpu_srotg_(a, b, c, s); return 0; } int f2c_crotg(complex* CA, complex* CB, complex* C, real* S) { _starpu_crotg_(CA, CB, C, S); return 0; } int f2c_drotg(doublereal* a, doublereal* b, doublereal* c, doublereal* s) { _starpu_drotg_(a, b, c, s); return 0; } int f2c_zrotg(doublecomplex* CA, doublecomplex* CB, doublecomplex* C, doublereal* S) { _starpu_zrotg_(CA, CB, C, S); return 0; } /* * =========================================================================== * Prototypes for level 1 BLAS routines * =========================================================================== */ /* * Routines with standard 4 prefixes (s, d, c, z) */ int f2c_sswap(integer* N, real* X, integer* incX, real* Y, integer* incY) { _starpu_sswap_(N, X, incX, Y, incY); return 0; } int f2c_scopy(integer* N, real* X, integer* incX, real* Y, integer* incY) { _starpu_scopy_(N, X, incX, Y, incY); return 0; } int f2c_saxpy(integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY) { _starpu_saxpy_(N, alpha, X, incX, Y, incY); return 0; } int f2c_dswap(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY) { _starpu_dswap_(N, X, incX, Y, incY); return 0; } int f2c_dcopy(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY) { _starpu_dcopy_(N, X, incX, Y, incY); return 0; } int f2c_daxpy(integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY) { _starpu_daxpy_(N, alpha, X, incX, Y, incY); return 0; } int f2c_cswap(integer* N, complex* X, integer* incX, complex* Y, integer* incY) { _starpu_cswap_(N, X, incX, Y, incY); return 0; } int f2c_ccopy(integer* N, complex* X, integer* incX, complex* Y, integer* incY) { _starpu_ccopy_(N, X, incX, Y, incY); return 0; } int f2c_caxpy(integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY) { _starpu_caxpy_(N, alpha, X, incX, Y, incY); return 0; } int f2c_zswap(integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { _starpu_zswap_(N, X, incX, Y, incY); return 0; } int f2c_zcopy(integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { _starpu_zcopy_(N, X, incX, Y, incY); return 0; } int f2c_zaxpy(integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY) { _starpu_zaxpy_(N, alpha, X, incX, Y, incY); return 0; } /* * Routines with S and D prefix only */ int f2c_srot(integer* N, real* X, integer* incX, real* Y, integer* incY, real* c, real* s) { _starpu_srot_(N, X, incX, Y, incY, c, s); return 0; } int f2c_drot(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* c, doublereal* s) { _starpu_drot_(N, X, incX, Y, incY, c, s); return 0; } /* * Routines with S D C Z CS and ZD prefixes */ int f2c_sscal(integer* N, real* alpha, real* X, integer* incX) { _starpu_sscal_(N, alpha, X, incX); return 0; } int f2c_dscal(integer* N, doublereal* alpha, doublereal* X, integer* incX) { _starpu_dscal_(N, alpha, X, incX); return 0; } int f2c_cscal(integer* N, complex* alpha, complex* X, integer* incX) { _starpu_cscal_(N, alpha, X, incX); return 0; } int f2c_zscal(integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX) { _starpu_zscal_(N, alpha, X, incX); return 0; } int f2c_csscal(integer* N, real* alpha, complex* X, integer* incX) { _starpu_csscal_(N, alpha, X, incX); return 0; } int f2c_zdscal(integer* N, doublereal* alpha, doublecomplex* X, integer* incX) { _starpu_zdscal_(N, alpha, X, incX); return 0; } /* * =========================================================================== * Prototypes for level 2 BLAS * =========================================================================== */ /* * Routines with standard 4 prefixes (S, D, C, Z) */ int f2c_sgemv(char* trans, integer* M, integer* N, real* alpha, real* A, integer* lda, real* X, integer* incX, real* beta, real* Y, integer* incY) { _starpu_sgemv_(trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_sgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, real *alpha, real *A, integer *lda, real *X, integer *incX, real *beta, real *Y, integer *incY) { _starpu_sgbmv_(trans, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_strmv(char* uplo, char *trans, char* diag, integer *N, real *A, integer *lda, real *X, integer *incX) { _starpu_strmv_(uplo, trans, diag, N, A, lda, X, incX); return 0; } int f2c_stbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, real* A, integer* lda, real* X, integer* incX) { _starpu_stbmv_(uplo, trans, diag, N, K, A, lda, X, incX); return 0; } int f2c_stpmv(char* uplo, char* trans, char* diag, integer* N, real* Ap, real* X, integer* incX) { _starpu_stpmv_(uplo, trans, diag, N, Ap, X, incX); return 0; } int f2c_strsv(char* uplo, char* trans, char* diag, integer* N, real* A, integer* lda, real* X, integer* incX) { _starpu_strsv_(uplo, trans, diag, N, A, lda, X, incX); return 0; } int f2c_stbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, real* A, integer* lda, real* X, integer* incX) { _starpu_stbsv_(uplo, trans, diag, N, K, A, lda, X, incX); return 0; } int f2c_stpsv(char* uplo, char* trans, char* diag, integer* N, real* Ap, real* X, integer* incX) { _starpu_stpsv_(uplo, trans, diag, N, Ap, X, incX); return 0; } int f2c_dgemv(char* trans, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY) { _starpu_dgemv_(trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_dgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, doublereal *alpha, doublereal *A, integer *lda, doublereal *X, integer *incX, doublereal *beta, doublereal *Y, integer *incY) { _starpu_dgbmv_(trans, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_dtrmv(char* uplo, char *trans, char* diag, integer *N, doublereal *A, integer *lda, doublereal *X, integer *incX) { _starpu_dtrmv_(uplo, trans, diag, N, A, lda, X, incX); return 0; } int f2c_dtbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, doublereal* A, integer* lda, doublereal* X, integer* incX) { _starpu_dtbmv_(uplo, trans, diag, N, K, A, lda, X, incX); return 0; } int f2c_dtpmv(char* uplo, char* trans, char* diag, integer* N, doublereal* Ap, doublereal* X, integer* incX) { _starpu_dtpmv_(uplo, trans, diag, N, Ap, X, incX); return 0; } int f2c_dtrsv(char* uplo, char* trans, char* diag, integer* N, doublereal* A, integer* lda, doublereal* X, integer* incX) { _starpu_dtrsv_(uplo, trans, diag, N, A, lda, X, incX); return 0; } int f2c_dtbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, doublereal* A, integer* lda, doublereal* X, integer* incX) { _starpu_dtbsv_(uplo, trans, diag, N, K, A, lda, X, incX); return 0; } int f2c_dtpsv(char* uplo, char* trans, char* diag, integer* N, doublereal* Ap, doublereal* X, integer* incX) { _starpu_dtpsv_(uplo, trans, diag, N, Ap, X, incX); return 0; } int f2c_cgemv(char* trans, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* X, integer* incX, complex* beta, complex* Y, integer* incY) { _starpu_cgemv_(trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_cgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, complex *alpha, complex *A, integer *lda, complex *X, integer *incX, complex *beta, complex *Y, integer *incY) { _starpu_cgbmv_(trans, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_ctrmv(char* uplo, char *trans, char* diag, integer *N, complex *A, integer *lda, complex *X, integer *incX) { _starpu_ctrmv_(uplo, trans, diag, N, A, lda, X, incX); return 0; } int f2c_ctbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, complex* A, integer* lda, complex* X, integer* incX) { _starpu_ctbmv_(uplo, trans, diag, N, K, A, lda, X, incX); return 0; } int f2c_ctpmv(char* uplo, char* trans, char* diag, integer* N, complex* Ap, complex* X, integer* incX) { _starpu_ctpmv_(uplo, trans, diag, N, Ap, X, incX); return 0; } int f2c_ctrsv(char* uplo, char* trans, char* diag, integer* N, complex* A, integer* lda, complex* X, integer* incX) { _starpu_ctrsv_(uplo, trans, diag, N, A, lda, X, incX); return 0; } int f2c_ctbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, complex* A, integer* lda, complex* X, integer* incX) { _starpu_ctbsv_(uplo, trans, diag, N, K, A, lda, X, incX); return 0; } int f2c_ctpsv(char* uplo, char* trans, char* diag, integer* N, complex* Ap, complex* X, integer* incX) { _starpu_ctpsv_(uplo, trans, diag, N, Ap, X, incX); return 0; } int f2c_zgemv(char* trans, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY) { _starpu_zgemv_(trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_zgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, doublecomplex *alpha, doublecomplex *A, integer *lda, doublecomplex *X, integer *incX, doublecomplex *beta, doublecomplex *Y, integer *incY) { _starpu_zgbmv_(trans, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_ztrmv(char* uplo, char *trans, char* diag, integer *N, doublecomplex *A, integer *lda, doublecomplex *X, integer *incX) { _starpu_ztrmv_(uplo, trans, diag, N, A, lda, X, incX); return 0; } int f2c_ztbmv(char* uplo, char* trans, char* diag, integer* N, integer* K, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX) { _starpu_ztbmv_(uplo, trans, diag, N, K, A, lda, X, incX); return 0; } int f2c_ztpmv(char* uplo, char* trans, char* diag, integer* N, doublecomplex* Ap, doublecomplex* X, integer* incX) { _starpu_ztpmv_(uplo, trans, diag, N, Ap, X, incX); return 0; } int f2c_ztrsv(char* uplo, char* trans, char* diag, integer* N, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX) { _starpu_ztrsv_(uplo, trans, diag, N, A, lda, X, incX); return 0; } int f2c_ztbsv(char* uplo, char* trans, char* diag, integer* N, integer* K, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX) { _starpu_ztbsv_(uplo, trans, diag, N, K, A, lda, X, incX); return 0; } int f2c_ztpsv(char* uplo, char* trans, char* diag, integer* N, doublecomplex* Ap, doublecomplex* X, integer* incX) { _starpu_ztpsv_(uplo, trans, diag, N, Ap, X, incX); return 0; } /* * Routines with S and D prefixes only */ int f2c_ssymv(char* uplo, integer* N, real* alpha, real* A, integer* lda, real* X, integer* incX, real* beta, real* Y, integer* incY) { _starpu_ssymv_(uplo, N, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_ssbmv(char* uplo, integer* N, integer* K, real* alpha, real* A, integer* lda, real* X, integer* incX, real* beta, real* Y, integer* incY) { _starpu_ssbmv_(uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_sspmv(char* uplo, integer* N, real* alpha, real* Ap, real* X, integer* incX, real* beta, real* Y, integer* incY) { _starpu_sspmv_(uplo, N, alpha, Ap, X, incX, beta, Y, incY); return 0; } int f2c_sger(integer* M, integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY, real* A, integer* lda) { _starpu_sger_(M, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_ssyr(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* A, integer* lda) { _starpu_ssyr_(uplo, N, alpha, X, incX, A, lda); return 0; } int f2c_sspr(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* Ap) { _starpu_sspr_(uplo, N, alpha, X, incX, Ap); return 0; } int f2c_ssyr2(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY, real* A, integer* lda) { _starpu_ssyr2_(uplo, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_sspr2(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY, real* A) { _starpu_sspr2_(uplo, N, alpha, X, incX, Y, incY, A); return 0; } int f2c_dsymv(char* uplo, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY) { _starpu_dsymv_(uplo, N, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_dsbmv(char* uplo, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY) { _starpu_dsbmv_(uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_dspmv(char* uplo, integer* N, doublereal* alpha, doublereal* Ap, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY) { _starpu_dspmv_(uplo, N, alpha, Ap, X, incX, beta, Y, incY); return 0; } int f2c_dger(integer* M, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* A, integer* lda) { _starpu_dger_(M, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_dsyr(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* A, integer* lda) { _starpu_dsyr_(uplo, N, alpha, X, incX, A, lda); return 0; } int f2c_dspr(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Ap) { _starpu_dspr_(uplo, N, alpha, X, incX, Ap); return 0; } int f2c_dsyr2(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* A, integer* lda) { _starpu_dsyr2_(uplo, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_dspr2(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* A) { _starpu_dspr2_(uplo, N, alpha, X, incX, Y, incY, A); return 0; } /* * Routines with C and Z prefixes only */ int f2c_chemv(char* uplo, integer* N, complex* alpha, complex* A, integer* lda, complex* X, integer* incX, complex* beta, complex* Y, integer* incY) { _starpu_chemv_(uplo, N, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_chbmv(char* uplo, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* X, integer* incX, complex* beta, complex* Y, integer* incY) { _starpu_chbmv_(uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_chpmv(char* uplo, integer* N, complex* alpha, complex* Ap, complex* X, integer* incX, complex* beta, complex* Y, integer* incY) { _starpu_chpmv_(uplo, N, alpha, Ap, X, incX, beta, Y, incY); return 0; } int f2c_cgeru(integer* M, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* A, integer* lda) { _starpu_cgeru_(M, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_cgerc(integer* M, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* A, integer* lda) { _starpu_cgerc_(M, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_cher(char* uplo, integer* N, real* alpha, complex* X, integer* incX, complex* A, integer* lda) { _starpu_cher_(uplo, N, alpha, X, incX, A, lda); return 0; } int f2c_chpr(char* uplo, integer* N, real* alpha, complex* X, integer* incX, complex* Ap) { _starpu_chpr_(uplo, N, alpha, X, incX, Ap); return 0; } int f2c_cher2(char* uplo, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* A, integer* lda) { _starpu_cher2_(uplo, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_chpr2(char* uplo, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* Ap) { _starpu_chpr2_(uplo, N, alpha, X, incX, Y, incY, Ap); return 0; } int f2c_zhemv(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY) { _starpu_zhemv_(uplo, N, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_zhbmv(char* uplo, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY) { _starpu_zhbmv_(uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); return 0; } int f2c_zhpmv(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* Ap, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY) { _starpu_zhpmv_(uplo, N, alpha, Ap, X, incX, beta, Y, incY); return 0; } int f2c_zgeru(integer* M, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* A, integer* lda) { _starpu_zgeru_(M, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_zgerc(integer* M, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* A, integer* lda) { _starpu_zgerc_(M, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_zher(char* uplo, integer* N, doublereal* alpha, doublecomplex* X, integer* incX, doublecomplex* A, integer* lda) { _starpu_zher_(uplo, N, alpha, X, incX, A, lda); return 0; } int f2c_zhpr(char* uplo, integer* N, doublereal* alpha, doublecomplex* X, integer* incX, doublecomplex* Ap) { _starpu_zhpr_(uplo, N, alpha, X, incX, Ap); return 0; } int f2c_zher2(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* A, integer* lda) { _starpu_zher2_(uplo, N, alpha, X, incX, Y, incY, A, lda); return 0; } int f2c_zhpr2(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* Ap) { _starpu_zhpr2_(uplo, N, alpha, X, incX, Y, incY, Ap); return 0; } /* * =========================================================================== * Prototypes for level 3 BLAS * =========================================================================== */ /* * Routines with standard 4 prefixes (S, D, C, Z) */ int f2c_sgemm(char* transA, char* transB, integer* M, integer* N, integer* K, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc) { _starpu_sgemm_(transA, transB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_ssymm(char* side, char* uplo, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc) { _starpu_ssymm_(side, uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_ssyrk(char* uplo, char* trans, integer* N, integer* K, real* alpha, real* A, integer* lda, real* beta, real* C, integer* ldc) { _starpu_ssyrk_(uplo, trans, N, K, alpha, A, lda, beta, C, ldc); return 0; } int f2c_ssyr2k(char* uplo, char* trans, integer* N, integer* K, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc) { _starpu_ssyr2k_(uplo, trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_strmm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb) { _starpu_strmm_(side, uplo, trans, diag, M, N, alpha, A, lda, B, ldb); return 0; } int f2c_strsm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb) { _starpu_strsm_(side, uplo, trans, diag, M, N, alpha, A, lda, B, ldb); return 0; } int f2c_dgemm(char* transA, char* transB, integer* M, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc) { _starpu_dgemm_(transA, transB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_dsymm(char* side, char* uplo, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc) { _starpu_dsymm_(side, uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_dsyrk(char* uplo, char* trans, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* beta, doublereal* C, integer* ldc) { _starpu_dsyrk_(uplo, trans, N, K, alpha, A, lda, beta, C, ldc); return 0; } int f2c_dsyr2k(char* uplo, char* trans, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc) { _starpu_dsyr2k_(uplo, trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_dtrmm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb) { _starpu_dtrmm_(side, uplo, trans, diag, M, N, alpha, A, lda, B, ldb); return 0; } int f2c_dtrsm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb) { _starpu_dtrsm_(side, uplo, trans, diag, M, N, alpha, A, lda, B, ldb); return 0; } int f2c_cgemm(char* transA, char* transB, integer* M, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc) { _starpu_cgemm_(transA, transB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_csymm(char* side, char* uplo, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc) { _starpu_csymm_(side, uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_csyrk(char* uplo, char* trans, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* beta, complex* C, integer* ldc) { _starpu_csyrk_(uplo, trans, N, K, alpha, A, lda, beta, C, ldc); return 0; } int f2c_csyr2k(char* uplo, char* trans, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc) { _starpu_csyr2k_(uplo, trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_ctrmm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb) { _starpu_ctrmm_(side, uplo, trans, diag, M, N, alpha, A, lda, B, ldb); return 0; } int f2c_ctrsm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb) { _starpu_ctrsm_(side, uplo, trans, diag, M, N, alpha, A, lda, B, ldb); return 0; } int f2c_zgemm(char* transA, char* transB, integer* M, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc) { _starpu_zgemm_(transA, transB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_zsymm(char* side, char* uplo, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc) { _starpu_zsymm_(side, uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_zsyrk(char* uplo, char* trans, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* beta, doublecomplex* C, integer* ldc) { _starpu_zsyrk_(uplo, trans, N, K, alpha, A, lda, beta, C, ldc); return 0; } int f2c_zsyr2k(char* uplo, char* trans, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc) { _starpu_zsyr2k_(uplo, trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_ztrmm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb) { _starpu_ztrmm_(side, uplo, trans, diag, M, N, alpha, A, lda, B, ldb); return 0; } int f2c_ztrsm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb) { _starpu_ztrsm_(side, uplo, trans, diag, M, N, alpha, A, lda, B, ldb); return 0; } /* * Routines with prefixes C and Z only */ int f2c_chemm(char* side, char* uplo, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc) { _starpu_chemm_(side, uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_cherk(char* uplo, char* trans, integer* N, integer* K, real* alpha, complex* A, integer* lda, real* beta, complex* C, integer* ldc) { _starpu_cherk_(uplo, trans, N, K, alpha, A, lda, beta, C, ldc); return 0; } int f2c_cher2k(char* uplo, char* trans, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, real* beta, complex* C, integer* ldc) { _starpu_cher2k_(uplo, trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_zhemm(char* side, char* uplo, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc) { _starpu_zhemm_(side, uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } int f2c_zherk(char* uplo, char* trans, integer* N, integer* K, doublereal* alpha, doublecomplex* A, integer* lda, doublereal* beta, doublecomplex* C, integer* ldc) { _starpu_zherk_(uplo, trans, N, K, alpha, A, lda, beta, C, ldc); return 0; } int f2c_zher2k(char* uplo, char* trans, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublereal* beta, doublecomplex* C, integer* ldc) { _starpu_zher2k_(uplo, trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc); return 0; } starpu-1.4.9+dfsg/min-dgels/base/BLAS/WRAP/fblaswr.h000066400000000000000000000517421507764646700217150ustar00rootroot00000000000000real _starpu_sdot_(integer* N, real* X, integer* incX, real* Y, integer* incY); doublereal _starpu_ddot_(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY); void _starpu_cdotu_(complex* retval, integer* N, complex* X, integer* incX, complex* Y, integer* incY); void _starpu_cdotc_(complex* retval, integer* N, complex* X, integer* incX, complex* Y, integer* incY); void _starpu_zdotu_(doublecomplex* retval, integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY); void _starpu_zdotc_(doublecomplex* retval, integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY); real _starpu_snrm2_(integer* N, real* X, integer* incX); real _starpu_sasum_(integer* N, real* X, integer* incX); doublereal _starpu_dnrm2_(integer* N, doublereal* X, integer* incX); doublereal _starpu_dasum_(integer* N, doublereal* X, integer* incX); real _starpu_scnrm2_(integer* N, complex* X, integer* incX); real _starpu_scasum_(integer* N, complex* X, integer* incX); doublereal _starpu_dznrm2_(integer* N, doublecomplex* X, integer* incX); doublereal _starpu_dzasum_(integer* N, doublecomplex* X, integer* incX); integer _starpu_isamax_(integer* N, real* X, integer* incX); integer _starpu_idamax_(integer* N, doublereal* X, integer* incX); integer _starpu_icamax_(integer* N, complex* X, integer* incX); integer _starpu_izamax_(integer* N, doublecomplex* X, integer* incX); int _starpu_sswap_(integer* N, real* X, integer* incX, real* Y, integer* incY); int _starpu_scopy_(integer* N, real* X, integer* incX, real* Y, integer* incY); int _starpu_saxpy_(integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY); int _starpu_dswap_(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY); int _starpu_dcopy_(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY); int _starpu_daxpy_(integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY); int _starpu_cswap_(integer* N, complex* X, integer* incX, complex* Y, integer* incY); int _starpu_ccopy_(integer* N, complex* X, integer* incX, complex* Y, integer* incY); int _starpu_caxpy_(integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY); int _starpu_zswap_(integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY); int _starpu_zcopy_(integer* N, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY); int _starpu_zaxpy_(integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY); int _starpu_srotg_(real* a, real* b, real* c, real* s); int _starpu_srot_(integer* N, real* X, integer* incX, real* Y, integer* incY, real* c, real* s); int _starpu_crotg_(complex* a, complex* b, complex* c, complex* s); int _starpu_drotg_(doublereal* a, doublereal* b, doublereal* c, doublereal* s); int _starpu_drot_(integer* N, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* c, doublereal* s); int _starpu_zrotg_(doublecomplex* a, doublecomplex* b, doublecomplex* c, doublecomplex* s); int _starpu_sscal_(integer* N, real* alpha, real* X, integer* incX); int _starpu_dscal_(integer* N, doublereal* alpha, doublereal* X, integer* incX); int _starpu_cscal_(integer* N, complex* alpha, complex* X, integer* incX); int _starpu_zscal_(integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX); int _starpu_csscal_(integer* N, real* alpha, complex* X, integer* incX); int _starpu_zdscal_(integer* N, doublereal* alpha, doublecomplex* X, integer* incX); int _starpu_sgemv_(char* trans, integer* M, integer* N, real* alpha, real* A, integer* lda, real* X, integer* incX, real* beta, real* Y, integer* incY); int _starpu_sgbmv_(char *trans, integer *M, integer *N, integer *KL, integer *KU, real *alpha, real *A, integer *lda, real *X, integer *incX, real *beta, real *Y, integer *incY); int _starpu_strmv_(char* uplo, char *trans, char* diag, integer *N, real *A, integer *lda, real *X, integer *incX); int _starpu_stbmv_(char* uplo, char* trans, char* diag, integer* N, integer* K, real* A, integer* lda, real* X, integer* incX); int _starpu_stpmv_(char* uplo, char* trans, char* diag, integer* N, real* Ap, real* X, integer* incX); int _starpu_strsv_(char* uplo, char* trans, char* diag, integer* N, real* A, integer* lda, real* X, integer* incX); int _starpu_stbsv_(char* uplo, char* trans, char* diag, integer* N, integer* K, real* A, integer* lda, real* X, integer* incX); int _starpu_stpsv_(char* uplo, char* trans, char* diag, integer* N, real* Ap, real* X, integer* incX); int _starpu_dgemv_(char* trans, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY); int _starpu_dgbmv_(char *trans, integer *M, integer *N, integer *KL, integer *KU, doublereal *alpha, doublereal *A, integer *lda, doublereal *X, integer *incX, doublereal *beta, doublereal *Y, integer *incY); int _starpu_dtrmv_(char* uplo, char *trans, char* diag, integer *N, doublereal *A, integer *lda, doublereal *X, integer *incX); int _starpu_dtbmv_(char* uplo, char* trans, char* diag, integer* N, integer* K, doublereal* A, integer* lda, doublereal* X, integer* incX); int _starpu_dtpmv_(char* uplo, char* trans, char* diag, integer* N, doublereal* Ap, doublereal* X, integer* incX); int _starpu_dtrsv_(char* uplo, char* trans, char* diag, integer* N, doublereal* A, integer* lda, doublereal* X, integer* incX); int _starpu_dtbsv_(char* uplo, char* trans, char* diag, integer* N, integer* K, doublereal* A, integer* lda, doublereal* X, integer* incX); int _starpu_dtpsv_(char* uplo, char* trans, char* diag, integer* N, doublereal* Ap, doublereal* X, integer* incX); int _starpu_cgemv_(char* trans, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* X, integer* incX, complex* beta, complex* Y, integer* incY); int _starpu_cgbmv_(char *trans, integer *M, integer *N, integer *KL, integer *KU, complex *alpha, complex *A, integer *lda, complex *X, integer *incX, complex *beta, complex *Y, integer *incY); int _starpu_ctrmv_(char* uplo, char *trans, char* diag, integer *N, complex *A, integer *lda, complex *X, integer *incX); int _starpu_ctbmv_(char* uplo, char* trans, char* diag, integer* N, integer* K, complex* A, integer* lda, complex* X, integer* incX); int _starpu_ctpmv_(char* uplo, char* trans, char* diag, integer* N, complex* Ap, complex* X, integer* incX); int _starpu_ctrsv_(char* uplo, char* trans, char* diag, integer* N, complex* A, integer* lda, complex* X, integer* incX); int _starpu_ctbsv_(char* uplo, char* trans, char* diag, integer* N, integer* K, complex* A, integer* lda, complex* X, integer* incX); int _starpu_ctpsv_(char* uplo, char* trans, char* diag, integer* N, complex* Ap, complex* X, integer* incX); int _starpu_zgemv_(char* trans, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY); int _starpu_zgbmv_(char *trans, integer *M, integer *N, integer *KL, integer *KU, doublecomplex *alpha, doublecomplex *A, integer *lda, doublecomplex *X, integer *incX, doublecomplex *beta, doublecomplex *Y, integer *incY); int _starpu_ztrmv_(char* uplo, char *trans, char* diag, integer *N, doublecomplex *A, integer *lda, doublecomplex *X, integer *incX); int _starpu_ztbmv_(char* uplo, char* trans, char* diag, integer* N, integer* K, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX); void _starpu_ztpmv_(char* uplo, char* trans, char* diag, integer* N, doublecomplex* Ap, doublecomplex* X, integer* incX); int _starpu_ztrsv_(char* uplo, char* trans, char* diag, integer* N, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX); int _starpu_ztbsv_(char* uplo, char* trans, char* diag, integer* N, integer* K, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX); int _starpu_ztpsv_(char* uplo, char* trans, char* diag, integer* N, doublecomplex* Ap, doublecomplex* X, integer* incX); int _starpu_ssymv_(char* uplo, integer* N, real* alpha, real* A, integer* lda, real* X, integer* incX, real* beta, real* Y, integer* incY); int _starpu_ssbmv_(char* uplo, integer* N, integer* K, real* alpha, real* A, integer* lda, real* X, integer* incX, real* beta, real* Y, integer* incY); int _starpu_sspmv_(char* uplo, integer* N, real* alpha, real* Ap, real* X, integer* incX, real* beta, real* Y, integer* incY); int _starpu_sger_(integer* M, integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY, real* A, integer* lda); int _starpu_ssyr_(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* A, integer* lda); int _starpu_sspr_(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* Ap); int _starpu_ssyr2_(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY, real* A, integer* lda); int _starpu_sspr2_(char* uplo, integer* N, real* alpha, real* X, integer* incX, real* Y, integer* incY, real* A); int _starpu_dsymv_(char* uplo, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY); int _starpu_dsbmv_(char* uplo, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY); int _starpu_dspmv_(char* uplo, integer* N, doublereal* alpha, doublereal* Ap, doublereal* X, integer* incX, doublereal* beta, doublereal* Y, integer* incY); int _starpu_dger_(integer* M, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* A, integer* lda); int _starpu_dsyr_(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* A, integer* lda); int _starpu_dspr_(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Ap); int _starpu_dsyr2_(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* A, integer* lda); int _starpu_dspr2_(char* uplo, integer* N, doublereal* alpha, doublereal* X, integer* incX, doublereal* Y, integer* incY, doublereal* A); int _starpu_chemv_(char* uplo, integer* N, complex* alpha, complex* A, integer* lda, complex* X, integer* incX, complex* beta, complex* Y, integer* incY); int _starpu_chbmv_(char* uplo, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* X, integer* incX, complex* beta, complex* Y, integer* incY); int _starpu_chpmv_(char* uplo, integer* N, complex* alpha, complex* Ap, complex* X, integer* incX, complex* beta, complex* Y, integer* incY); int _starpu_cgeru_(integer* M, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* A, integer* lda); int _starpu_cgerc_(integer* M, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* A, integer* lda); int _starpu_cher_(char* uplo, integer* N, real* alpha, complex* X, integer* incX, complex* A, integer* lda); int _starpu_chpr_(char* uplo, integer* N, real* alpha, complex* X, integer* incX, complex* Ap); int _starpu_cher2_(char* uplo, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* A, integer* lda); int _starpu_chpr2_(char* uplo, integer* N, complex* alpha, complex* X, integer* incX, complex* Y, integer* incY, complex* Ap); int _starpu_zhemv_(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY); int _starpu_zhbmv_(char* uplo, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY); int _starpu_zhpmv_(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* Ap, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY); int _starpu_zgeru_(integer* M, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* A, integer* lda); int _starpu_zgerc_(integer* M, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* A, integer* lda); int _starpu_zher_(char* uplo, integer* N, doublereal* alpha, doublecomplex* X, integer* incX, doublecomplex* A, integer* lda); int _starpu_zhpr_(char* uplo, integer* N, doublereal* alpha, doublecomplex* X, integer* incX, doublecomplex* Ap); int _starpu_zher2_(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* A, integer* lda); int _starpu_zhpr2_(char* uplo, integer* N, doublecomplex* alpha, doublecomplex* X, integer* incX, doublecomplex* Y, integer* incY, doublecomplex* Ap); int _starpu_sgemm_(char* transA, char* transB, integer* M, integer* N, integer* K, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc); int _starpu_ssymm_(char* side, char* uplo, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc); int _starpu_ssyrk_(char* uplo, char* trans, integer* N, integer* K, real* alpha, real* A, integer* lda, real* beta, real* C, integer* ldc); int _starpu_ssyr2k_(char* uplo, char* trans, integer* N, integer* K, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc); int _starpu_strmm_(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb); int _starpu_strsm_(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb); int _starpu_dgemm_(char* transA, char* transB, integer* M, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc); int _starpu_dsymm_(char* side, char* uplo, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc); int _starpu_dsyrk_(char* uplo, char* trans, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* beta, doublereal* C, integer* ldc); int _starpu_dsyr2k_(char* uplo, char* trans, integer* N, integer* K, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc); int _starpu_dtrmm_(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb); int _starpu_dtrsm_(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb); int _starpu_cgemm_(char* transA, char* transB, integer* M, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc); int _starpu_csymm_(char* side, char* uplo, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc); int _starpu_csyrk_(char* uplo, char* trans, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* beta, complex* C, integer* ldc); int _starpu_csyr2k_(char* uplo, char* trans, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc); int _starpu_ctrmm_(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb); int _starpu_ctrsm_(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb); int _starpu_zgemm_(char* transA, char* transB, integer* M, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc); int _starpu_zsymm_(char* side, char* uplo, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc); int _starpu_zsyrk_(char* uplo, char* trans, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* beta, doublecomplex* C, integer* ldc); int _starpu_zsyr2k_(char* uplo, char* trans, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc); int _starpu_ztrmm_(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb); int _starpu_ztrsm_(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb); int _starpu_chemm_(char* side, char* uplo, integer* M, integer* N, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, complex* beta, complex* C, integer* ldc); int _starpu_cherk_(char* uplo, char* trans, integer* N, integer* K, real* alpha, complex* A, integer* lda, real* beta, complex* C, integer* ldc); int _starpu_cher2k_(char* uplo, char* trans, integer* N, integer* K, complex* alpha, complex* A, integer* lda, complex* B, integer* ldb, real* beta, complex* C, integer* ldc); int _starpu_zhemm_(char* side, char* uplo, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublecomplex* beta, doublecomplex* C, integer* ldc); int _starpu_zherk_(char* uplo, char* trans, integer* N, integer* K, doublereal* alpha, doublecomplex* A, integer* lda, doublereal* beta, doublecomplex* C, integer* ldc); int _starpu_zher2k_(char* uplo, char* trans, integer* N, integer* K, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* B, integer* ldb, doublereal* beta, doublecomplex* C, integer* ldc); starpu-1.4.9+dfsg/min-dgels/base/BLAS/dblat2.in000066400000000000000000000026721507764646700210310ustar00rootroot00000000000000'dblat2.out' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 4 NUMBER OF VALUES OF K 0 1 2 4 VALUES OF K 4 NUMBER OF VALUES OF INCX AND INCY 1 2 -1 -2 VALUES OF INCX AND INCY 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 0.9 VALUES OF BETA DGEMV T PUT F FOR NO TEST. SAME COLUMNS. DGBMV T PUT F FOR NO TEST. SAME COLUMNS. DSYMV T PUT F FOR NO TEST. SAME COLUMNS. DSBMV T PUT F FOR NO TEST. SAME COLUMNS. DSPMV T PUT F FOR NO TEST. SAME COLUMNS. DTRMV T PUT F FOR NO TEST. SAME COLUMNS. DTBMV T PUT F FOR NO TEST. SAME COLUMNS. DTPMV T PUT F FOR NO TEST. SAME COLUMNS. DTRSV T PUT F FOR NO TEST. SAME COLUMNS. DTBSV T PUT F FOR NO TEST. SAME COLUMNS. DTPSV T PUT F FOR NO TEST. SAME COLUMNS. DGER T PUT F FOR NO TEST. SAME COLUMNS. DSYR T PUT F FOR NO TEST. SAME COLUMNS. DSPR T PUT F FOR NO TEST. SAME COLUMNS. DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. starpu-1.4.9+dfsg/min-dgels/base/BLAS/dblat3.in000066400000000000000000000015621507764646700210270ustar00rootroot00000000000000'dblat3.out' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 1.3 VALUES OF BETA DGEMM T PUT F FOR NO TEST. SAME COLUMNS. DSYMM T PUT F FOR NO TEST. SAME COLUMNS. DTRMM T PUT F FOR NO TEST. SAME COLUMNS. DTRSM T PUT F FOR NO TEST. SAME COLUMNS. DSYRK T PUT F FOR NO TEST. SAME COLUMNS. DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. starpu-1.4.9+dfsg/min-dgels/base/COPYING000066400000000000000000000031071507764646700176350ustar00rootroot00000000000000Copyright (c) 1992-2008 The University of Tennessee. All rights reserved. $COPYRIGHT$ Additional copyrights may follow $HEADER$ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer listed in this license in the documentation and/or other materials provided with the distribution. - Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/000077500000000000000000000000001507764646700176255ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/000077500000000000000000000000001507764646700207665ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/Makefile000066400000000000000000000160201507764646700224250ustar00rootroot00000000000000 TOPDIR=../.. include $(TOPDIR)/make.inc # Unix makefile: see README. # For C++, first "make hadd". # If your compiler does not recognize ANSI C, add # -DKR_headers # to the CFLAGS = line below. # On Sun and other BSD systems that do not provide an ANSI sprintf, add # -DUSE_STRLEN # to the CFLAGS = line below. # On Linux systems, add # -DNON_UNIX_STDIO # to the CFLAGS = line below. For libf2c.so under Linux, also add # -fPIC # to the CFLAGS = line below. .SUFFIXES: .c .o # compile, then strip unnecessary symbols .c.o: $(CC) -c -DSkip_f2c_Undefs $(CFLAGS) $*.c $(LD) -r -x -o $*.xxx $*.o mv $*.xxx $*.o ## Under Solaris (and other systems that do not understand ld -x), ## omit -x in the ld line above. ## If your system does not have the ld command, comment out ## or remove both the ld and mv lines above. MISC = f77vers.o i77vers.o main.o s_rnge.o abort_.o exit_.o getarg_.o iargc_.o\ getenv_.o signal_.o s_stop.o s_paus.o system_.o cabs.o ctype.o\ derf_.o derfc_.o erf_.o erfc_.o sig_die.o uninit.o POW = pow_ci.o pow_dd.o pow_di.o pow_hh.o pow_ii.o pow_ri.o pow_zi.o pow_zz.o CX = c_abs.o c_cos.o c_div.o c_exp.o c_log.o c_sin.o c_sqrt.o DCX = z_abs.o z_cos.o z_div.o z_exp.o z_log.o z_sin.o z_sqrt.o REAL = r_abs.o r_acos.o r_asin.o r_atan.o r_atn2.o r_cnjg.o r_cos.o\ r_cosh.o r_dim.o r_exp.o r_imag.o r_int.o\ r_lg10.o r_log.o r_mod.o r_nint.o r_sign.o\ r_sin.o r_sinh.o r_sqrt.o r_tan.o r_tanh.o DBL = d_abs.o d_acos.o d_asin.o d_atan.o d_atn2.o\ d_cnjg.o d_cos.o d_cosh.o d_dim.o d_exp.o\ d_imag.o d_int.o d_lg10.o d_log.o d_mod.o\ d_nint.o d_prod.o d_sign.o d_sin.o d_sinh.o\ d_sqrt.o d_tan.o d_tanh.o INT = i_abs.o i_dim.o i_dnnt.o i_indx.o i_len.o i_len_trim.o i_mod.o i_nint.o i_sign.o\ lbitbits.o lbitshft.o i_ceiling.o HALF = h_abs.o h_dim.o h_dnnt.o h_indx.o h_len.o h_mod.o h_nint.o h_sign.o CMP = l_ge.o l_gt.o l_le.o l_lt.o hl_ge.o hl_gt.o hl_le.o hl_lt.o EFL = ef1asc_.o ef1cmc_.o CHAR = f77_aloc.o s_cat.o s_cmp.o s_copy.o I77 = backspac.o close.o dfe.o dolio.o due.o endfile.o err.o\ fmt.o fmtlib.o ftell_.o iio.o ilnw.o inquire.o lread.o lwrite.o\ open.o rdfmt.o rewind.o rsfe.o rsli.o rsne.o sfe.o sue.o\ typesize.o uio.o util.o wref.o wrtfmt.o wsfe.o wsle.o wsne.o xwsne.o QINT = pow_qq.o qbitbits.o qbitshft.o ftell64_.o TIME = dtime_.o etime_.o # If you get an error compiling dtime_.c or etime_.c, try adding # -DUSE_CLOCK to the CFLAGS assignment above; if that does not work, # omit $(TIME) from OFILES = assignment below. # To get signed zeros in write statements on IEEE-arithmetic systems, # add -DSIGNED_ZEROS to the CFLAGS assignment below and add signbit.o # to the end of the OFILES = assignment below. # For INTEGER*8 support (which requires system-dependent adjustments to # f2c.h), add $(QINT) to the OFILES = assignment below... OFILES = $(MISC) $(POW) $(CX) $(DCX) $(REAL) $(DBL) $(INT) \ $(HALF) $(CMP) $(EFL) $(CHAR) $(I77) $(TIME) all: f2c.h signal1.h sysdep1.h libminf2c.a libminf2c.a: $(OFILES) $(ARCH) $(ARCHFLAGS) $(F2CLIB) $? $(RANLIB) $(F2CLIB) ## Shared-library variant: the following rule works on Linux ## systems. Details are system-dependent. Under Linux, -fPIC ## must appear in the CFLAGS assignment when making libf2c.so. ## Under Solaris, use -Kpic in CFLAGS and use "ld -G" instead ## of "cc -shared". libf2c.so: $(OFILES) $(CC) -shared -o libf2c.so $(OFILES) ### If your system lacks ranlib, you don't need it; see README. f77vers.o: f77vers.c $(CC) -c f77vers.c i77vers.o: i77vers.c $(CC) -c i77vers.c # To get an "f2c.h" for use with "f2c -C++", first "make hadd" hadd: f2c.h0 f2ch.add cat f2c.h0 f2ch.add >f2c.h # For use with "f2c" and "f2c -A": f2c.h: f2c.h0 cp f2c.h0 f2c.h # You may need to adjust signal1.h and sysdep1.h suitably for your system... signal1.h: signal1.h0 cp signal1.h0 signal1.h sysdep1.h: sysdep1.h0 cp sysdep1.h0 sysdep1.h # If your system lacks onexit() and you are not using an # ANSI C compiler, then you should uncomment the following # two lines (for compiling main.o): #main.o: main.c # $(CC) -c -DNO_ONEXIT -DSkip_f2c_Undefs main.c # On at least some Sun systems, it is more appropriate to # uncomment the following two lines: #main.o: main.c # $(CC) -c -Donexit=on_exit -DSkip_f2c_Undefs main.c clean: rm -f libminf2c.a *.o arith.h signal1.h sysdep1.h backspac.o: fio.h close.o: fio.h dfe.o: fio.h dfe.o: fmt.h due.o: fio.h endfile.o: fio.h rawio.h err.o: fio.h rawio.h fmt.o: fio.h fmt.o: fmt.h iio.o: fio.h iio.o: fmt.h ilnw.o: fio.h ilnw.o: lio.h inquire.o: fio.h lread.o: fio.h lread.o: fmt.h lread.o: lio.h lread.o: fp.h lwrite.o: fio.h lwrite.o: fmt.h lwrite.o: lio.h open.o: fio.h rawio.h rdfmt.o: fio.h rdfmt.o: fmt.h rdfmt.o: fp.h rewind.o: fio.h rsfe.o: fio.h rsfe.o: fmt.h rsli.o: fio.h rsli.o: lio.h rsne.o: fio.h rsne.o: lio.h sfe.o: fio.h signbit.o: arith.h sue.o: fio.h uio.o: fio.h uninit.o: arith.h util.o: fio.h wref.o: fio.h wref.o: fmt.h wref.o: fp.h wrtfmt.o: fio.h wrtfmt.o: fmt.h wsfe.o: fio.h wsfe.o: fmt.h wsle.o: fio.h wsle.o: fmt.h wsle.o: lio.h wsne.o: fio.h wsne.o: lio.h xwsne.o: fio.h xwsne.o: lio.h xwsne.o: fmt.h main.o: signal1.h signal_.o: signal1.h s_paus.o: signal1.h err.o: sysdep1.h fio.h: sysdep1.h util.c: sysdep1.h arith.h: arithchk.c $(CC) $(CFLAGS) -DNO_FPINIT arithchk.c -lm ||\ $(CC) -DNO_LONG_LONG $(CFLAGS) -DNO_FPINIT arithchk.c -lm ./a.out >arith.h rm -f a.out arithchk.o check: xsum Notice README abort_.c arithchk.c backspac.c c_abs.c c_cos.c \ c_div.c c_exp.c c_log.c c_sin.c c_sqrt.c cabs.c close.c comptry.bat \ ctype.c ctype.h \ d_abs.c d_acos.c d_asin.c d_atan.c d_atn2.c d_cnjg.c d_cos.c d_cosh.c \ d_dim.c d_exp.c d_imag.c d_int.c d_lg10.c d_log.c d_mod.c \ d_nint.c d_prod.c d_sign.c d_sin.c d_sinh.c d_sqrt.c d_tan.c \ d_tanh.c derf_.c derfc_.c dfe.c dolio.c dtime_.c due.c ef1asc_.c \ ef1cmc_.c endfile.c erf_.c erfc_.c err.c etime_.c exit_.c f2c.h0 \ f2ch.add f77_aloc.c f77vers.c fio.h fmt.c fmt.h fmtlib.c \ fp.h ftell_.c ftell64_.c i_ceiling.c \ getarg_.c getenv_.c h_abs.c h_dim.c h_dnnt.c h_indx.c h_len.c \ h_mod.c h_nint.c h_sign.c hl_ge.c hl_gt.c hl_le.c hl_lt.c \ i77vers.c i_abs.c i_dim.c i_dnnt.c i_indx.c i_len.c i_len_trim.c i_mod.c \ i_nint.c i_sign.c iargc_.c iio.c ilnw.c inquire.c l_ge.c l_gt.c \ l_le.c l_lt.c lbitbits.c lbitshft.c libf2c.lbc libf2c.sy lio.h \ lread.c lwrite.c main.c makefile.sy makefile.u makefile.vc \ makefile.wat math.hvc mkfile.plan9 open.c pow_ci.c pow_dd.c \ pow_di.c pow_hh.c pow_ii.c pow_qq.c pow_ri.c pow_zi.c pow_zz.c \ qbitbits.c qbitshft.c r_abs.c r_acos.c r_asin.c r_atan.c r_atn2.c \ r_cnjg.c r_cos.c r_cosh.c r_dim.c r_exp.c r_imag.c r_int.c r_lg10.c \ r_log.c r_mod.c r_nint.c r_sign.c r_sin.c r_sinh.c r_sqrt.c \ r_tan.c r_tanh.c rawio.h rdfmt.c rewind.c rsfe.c rsli.c rsne.c \ s_cat.c s_cmp.c s_copy.c s_paus.c s_rnge.c s_stop.c scomptry.bat sfe.c \ sig_die.c signal1.h0 signal_.c signbit.c sue.c sysdep1.h0 system_.c \ typesize.c \ uio.c uninit.c util.c wref.c wrtfmt.c wsfe.c wsle.c wsne.c xwsne.c \ z_abs.c z_cos.c z_div.c z_exp.c z_log.c z_sin.c z_sqrt.c >xsum1.out cmp xsum0.out xsum1.out && mv xsum1.out xsum.out || diff xsum[01].out starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/Notice000066400000000000000000000022741507764646700221370ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/README000066400000000000000000000407541507764646700216600ustar00rootroot00000000000000As shipped, "makefile" is a copy of "makefile.u", a Unix makefile. Variants for other systems have names of the form makefile.* and have initial comments saying how to invoke them. You may wish to copy one of the other makefile.* files to makefile. If you use a C++ compiler, first say make hadd to create a suitable f2c.h from f2c.h0 and f2ch.add. Otherwise, make f2c.h will just copy f2c.h0 to f2c.h . If your compiler does not recognize ANSI C headers, compile with KR_headers defined: either add -DKR_headers to the definition of CFLAGS in the makefile, or insert #define KR_headers at the top of f2c.h . If your system lacks onexit() and you are not using an ANSI C compiler, then you should compile main.c with NO_ONEXIT defined. See the comments about onexit in makefile.u. If your system has a double drem() function such that drem(a,b) is the IEEE remainder function (with double a, b), then you may wish to compile r_mod.c and d_mod.c with IEEE_drem defined. To check for transmission errors, issue the command make check or make -f makefile.u check This assumes you have the xsum program whose source, xsum.c, is distributed as part of "all from f2c/src", and that it is installed somewhere in your search path. If you do not have xsum, you can obtain xsum.c by sending the following E-mail message to netlib@netlib.bell-labs.com send xsum.c from f2c/src For convenience, the f2c.h0 in this directory is a copy of netlib's "f2c.h from f2c". It is best to install f2c.h in a standard place, so "include f2c.h" will work in any directory without further ado. Beware that the makefiles do not cause recompilation when f2c.h is changed. On machines, such as those using a DEC Alpha processor, on which sizeof(short) == 2, sizeof(int) == sizeof(float) == 4, and sizeof(long) == sizeof(double) == 8, it suffices to modify f2c.h by removing the first occurrence of "long " on each line containing "long ". On Unix systems, you can do this by issuing the commands mv f2c.h f2c.h0 sed 's/long int /int /' f2c.h0 >f2c.h On such machines, one can enable INTEGER*8 by uncommenting the typedefs of longint and ulongint in f2c.h and adjusting them, so they read typedef long longint; typedef unsigned long ulongint; and by compiling libf2c with -DAllow_TYQUAD, as discussed below. Most of the routines in libf2c are support routines for Fortran intrinsic functions or for operations that f2c chooses not to do "in line". There are a few exceptions, summarized below -- functions and subroutines that appear to your program as ordinary external Fortran routines. If you use the REAL valued functions listed below (ERF, ERFC, DTIME, and ETIME) with "f2c -R", then you need to compile the corresponding source files with -DREAL=float. To do this, it is perhaps simplest to add "-DREAL=float" to CFLAGS in the makefile. 1. CALL ABORT prints a message and causes a core dump. 2. ERF(r) and DERF(d) and the REAL and DOUBLE PRECISION error functions (with x REAL and d DOUBLE PRECISION); DERF must be declared DOUBLE PRECISION in your program. Both ERF and DERF assume your C library provides the underlying erf() function (which not all systems do). 3. ERFC(r) and DERFC(d) are the complementary error functions: ERFC(r) = 1 - ERF(r) and DERFC(d) = 1.d0 - DERFC(d) (except that their results may be more accurate than explicitly evaluating the above formulae would give). Again, ERFC and r are REAL, and DERFC and d are DOUBLE PRECISION (and must be declared as such in your program), and ERFC and DERFC rely on your system's erfc(). 4. CALL GETARG(n,s), where n is an INTEGER and s is a CHARACTER variable, sets s to the n-th command-line argument (or to all blanks if there are fewer than n command-line arguments); CALL GETARG(0,s) sets s to the name of the program (on systems that support this feature). See IARGC below. 5. CALL GETENV(name, value), where name and value are of type CHARACTER, sets value to the environment value, $name, of name (or to blanks if $name has not been set). 6. NARGS = IARGC() sets NARGS to the number of command-line arguments (an INTEGER value). 7. CALL SIGNAL(n,func), where n is an INTEGER and func is an EXTERNAL procedure, arranges for func to be invoked when n occurs (on systems where this makes sense). If your compiler complains about the signal calls in main.c, s_paus.c, and signal_.c, you may need to adjust signal1.h suitably. See the comments in signal1.h. 8. ETIME(ARR) and DTIME(ARR) are REAL functions that return execution times. ARR is declared REAL ARR(2). The elapsed user and system CPU times are stored in ARR(1) and ARR(2), respectively. ETIME returns the total elapsed CPU time, i.e., ARR(1) + ARR(2). DTIME returns total elapsed CPU time since the previous call on DTIME. 9. CALL SYSTEM(cmd), where cmd is of type CHARACTER, passes cmd to the system's command processor (on systems where this can be done). 10. CALL FLUSH flushes all buffers. 11. FTELL(i) is an INTEGER function that returns the current offset of Fortran unit i (or -1 if unit i is not open). 12. CALL FSEEK(i, offset, whence, *errlab) attemps to move Fortran unit i to the specified offset: absolute offset if whence = 0; relative to the current offset if whence = 1; relative to the end of the file if whence = 2. It branches to label errlab if unit i is not open or if the call otherwise fails. The routines whose objects are makefile.u's $(I77) are for I/O. The following comments apply to them. If your system lacks /usr/include/local.h , then you should create an appropriate local.h in this directory. An appropriate local.h may simply be empty, or it may #define VAX or #define CRAY (or whatever else you must do to make fp.h work right). Alternatively, edit fp.h to suite your machine. If your system lacks /usr/include/fcntl.h , then you should simply create an empty fcntl.h in this directory. If your compiler then complains about creat and open not having a prototype, compile with OPEN_DECL defined. On many systems, open and creat are declared in fcntl.h . If your system's sprintf does not work the way ANSI C specifies -- specifically, if it does not return the number of characters transmitted -- then insert the line #define USE_STRLEN at the end of fmt.h . This is necessary with at least some versions of Sun software. In particular, if you get a warning about an improper pointer/integer combination in compiling wref.c, then you need to compile with -DUSE_STRLEN . If your system's fopen does not like the ANSI binary reading and writing modes "rb" and "wb", then you should compile open.c with NON_ANSI_RW_MODES #defined. If you get error messages about references to cf->_ptr and cf->_base when compiling wrtfmt.c and wsfe.c or to stderr->_flag when compiling err.c, then insert the line #define NON_UNIX_STDIO at the beginning of fio.h, and recompile everything (or at least those modules that contain NON_UNIX_STDIO). Unformatted sequential records consist of a length of record contents, the record contents themselves, and the length of record contents again (for backspace). Prior to 17 Oct. 1991, the length was of type int; now it is of type long, but you can change it back to int by inserting #define UIOLEN_int at the beginning of fio.h. This affects only sue.c and uio.c . If you have a really ancient K&R C compiler that does not understand void, add -Dvoid=int to the definition of CFLAGS in the makefile. On VAX, Cray, or Research Tenth-Edition Unix systems, you may need to add -DVAX, -DCRAY, or -DV10 (respectively) to CFLAGS to make fp.h work correctly. Alternatively, you may need to edit fp.h to suit your machine. If your compiler complains about the signal calls in main.c, s_paus.c, and signal_.c, you may need to adjust signal1.h suitably. See the comments in signal1.h. You may need to supply the following non-ANSI routines: fstat(int fileds, struct stat *buf) is similar to stat(char *name, struct stat *buf), except that the first argument, fileds, is the file descriptor returned by open rather than the name of the file. fstat is used in the system-dependent routine canseek (in the libf2c source file err.c), which is supposed to return 1 if it's possible to issue seeks on the file in question, 0 if it's not; you may need to suitably modify err.c . On non-UNIX systems, you can avoid references to fstat and stat by compiling with NON_UNIX_STDIO defined; in that case, you may need to supply access(char *Name,0), which is supposed to return 0 if file Name exists, nonzero otherwise. char * mktemp(char *buf) is supposed to replace the 6 trailing X's in buf with a unique number and then return buf. The idea is to get a unique name for a temporary file. On non-UNIX systems, you may need to change a few other, e.g.: the form of name computed by mktemp() in endfile.c and open.c; the use of the open(), close(), and creat() system calls in endfile.c, err.c, open.c; and the modes in calls on fopen() and fdopen() (and perhaps the use of fdopen() itself -- it's supposed to return a FILE* corresponding to a given an integer file descriptor) in err.c and open.c (component ufmt of struct unit is 1 for formatted I/O -- text mode on some systems -- and 0 for unformatted I/O -- binary mode on some systems). Compiling with -DNON_UNIX_STDIO omits all references to creat() and almost all references to open() and close(), the exception being in the function f__isdev() (in open.c). If you wish to use translated Fortran that has funny notions of record length for direct unformatted I/O (i.e., that assumes RECL= values in OPEN statements are not bytes but rather counts of some other units -- e.g., 4-character words for VMS), then you should insert an appropriate #define for url_Adjust at the beginning of open.c . For VMS Fortran, for example, #define url_Adjust(x) x *= 4 would suffice. By default, Fortran I/O units 5, 6, and 0 are pre-connected to stdin, stdout, and stderr, respectively. You can change this behavior by changing f_init() in err.c to suit your needs. Note that f2c assumes READ(*... means READ(5... and WRITE(*... means WRITE(6... . Moreover, an OPEN(n,... statement that does not specify a file name (and does not specify STATUS='SCRATCH') assumes FILE='fort.n' . You can change this by editing open.c and endfile.c suitably. Unless you adjust the "#define MXUNIT" line in fio.h, Fortran units 0, 1, ..., 99 are available, i.e., the highest allowed unit number is MXUNIT - 1. Lines protected from compilation by #ifdef Allow_TYQUAD are for a possible extension to 64-bit integers in which integer = int = 32 bits and longint = long = 64 bits. The makefile does not attempt to compile pow_qq.c, qbitbits.c, and qbitshft.c, which are meant for use with INTEGER*8. To use INTEGER*8, you must modify f2c.h to declare longint and ulongint appropriately; then add $(QINT) to the end of the makefile's dependency list for libf2c.a (if makefile is a copy of makefile.u; for the PC makefiles, add pow_qq.obj qbitbits.obj qbitshft.obj to the library's dependency list and adjust libf2c.lbc or libf2c.sy accordingly). Also add -DAllow_TYQUAD to the makefile's CFLAGS assignment. To make longint and ulongint available, it may suffice to add -DINTEGER_STAR_8 to the CFLAGS assignment. Following Fortran 90, s_cat.c and s_copy.c allow the target of a (character string) assignment to be appear on its right-hand, at the cost of some extra overhead for all run-time concatenations. If you prefer the extra efficiency that comes with the Fortran 77 requirement that the left-hand side of a character assignment not be involved in the right-hand side, compile s_cat.c and s_copy.c with -DNO_OVERWRITE . Extensions (Feb. 1993) to NAMELIST processing: 1. Reading a ? instead of &name (the start of a namelist) causes the namelist being sought to be written to stdout (unit 6); to omit this feature, compile rsne.c with -DNo_Namelist_Questions. 2. Reading the wrong namelist name now leads to an error message and an attempt to skip input until the right namelist name is found; to omit this feature, compile rsne.c with -DNo_Bad_Namelist_Skip. 3. Namelist writes now insert newlines before each variable; to omit this feature, compile xwsne.c with -DNo_Extra_Namelist_Newlines. 4. (Sept. 1995) When looking for the &name that starts namelist input, lines whose first non-blank character is something other than &, $, or ? are treated as comment lines and ignored, unless rsne.c is compiled with -DNo_Namelist_Comments. Nonstandard extension (Feb. 1993) to open: for sequential files, ACCESS='APPEND' (or access='anything else starting with "A" or "a"') causes the file to be positioned at end-of-file, so a write will append to the file. Some buggy Fortran programs use unformatted direct I/O to write an incomplete record and later read more from that record than they have written. For records other than the last, the unwritten portion of the record reads as binary zeros. The last record is a special case: attempting to read more from it than was written gives end-of-file -- which may help one find a bug. Some other Fortran I/O libraries treat the last record no differently than others and thus give no help in finding the bug of reading more than was written. If you wish to have this behavior, compile uio.c with -DPad_UDread . If you want to be able to catch write failures (e.g., due to a disk being full) with an ERR= specifier, compile dfe.c, due.c, sfe.c, sue.c, and wsle.c with -DALWAYS_FLUSH. This will lead to slower execution and more I/O, but should make ERR= work as expected, provided fflush returns an error return when its physical write fails. Carriage controls are meant to be interpreted by the UNIX col program (or a similar program). Sometimes it's convenient to use only ' ' as the carriage control character (normal single spacing). If you compile lwrite.c and wsfe.c with -DOMIT_BLANK_CC, formatted external output lines will have an initial ' ' quietly omitted, making use of the col program unnecessary with output that only has ' ' for carriage control. The Fortran 77 Standard leaves it up to the implementation whether formatted writes of floating-point numbers of absolute value < 1 have a zero before the decimal point. By default, libI77 omits such superfluous zeros, but you can cause them to appear by compiling lwrite.c, wref.c, and wrtfmt.c with -DWANT_LEAD_0 . If your (Unix) system lacks a ranlib command, you don't need it. Either comment out the makefile's ranlib invocation, or install a harmless "ranlib" command somewhere in your PATH, such as the one-line shell script exit 0 or (on some systems) exec /usr/bin/ar lts $1 >/dev/null By default, the routines that implement complex and double complex division, c_div.c and z_div.c, call sig_die to print an error message and exit if they see a divisor of 0, as this is sometimes helpful for debugging. On systems with IEEE arithmetic, compiling c_div.c and z_div.c with -DIEEE_COMPLEX_DIVIDE causes them instead to set both the real and imaginary parts of the result to +INFINITY if the numerator is nonzero, or to NaN if it vanishes. Nowadays most Unix and Linux systems have function int ftruncate(int fildes, off_t len); defined in system header file unistd.h that adjusts the length of file descriptor fildes to length len. Unless endfile.c is compiled with -DNO_TRUNCATE, endfile.c #includes "unistd.h" and calls ftruncate() if necessary to shorten files. If your system lacks ftruncate(), compile endfile.c with -DNO_TRUNCATE to make endfile.c use the older and more portable scheme of shortening a file by copying to a temporary file and back again. The initializations for "f2c -trapuv" are done by _uninit_f2c(), whose source is uninit.c, introduced June 2001. On IEEE-arithmetic systems, _uninit_f2c should initialize floating-point variables to signaling NaNs and, at its first invocation, should enable the invalid operation exception. Alas, the rules for distinguishing signaling from quiet NaNs were not specified in the IEEE P754 standard, nor were the precise means of enabling and disabling IEEE-arithmetic exceptions, and these details are thus system dependent. There are #ifdef's in uninit.c that specify them for some popular systems. If yours is not one of these systems, it may take some detective work to discover the appropriate details for your system. Sometimes it helps to look in the standard include directories for header files with relevant-sounding names, such as ieeefp.h, nan.h, or trap.h, and it may be simplest to run experiments to see what distinguishes a signaling from a quiet NaN. (If x is initialized to a signaling NaN and the invalid operation exception is masked off, as it should be by default on IEEE-arithmetic systems, then computing, say, y = x + 1 will yield a quiet NaN.) starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/abort_.c000066400000000000000000000004601507764646700224000ustar00rootroot00000000000000#include "stdio.h" #include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern VOID sig_die(); int abort_() #else extern void sig_die(const char*,int); int abort_(void) #endif { sig_die("Fortran abort routine called", 1); return 0; /* not reached */ } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/arithchk.c000066400000000000000000000120701507764646700227270ustar00rootroot00000000000000/**************************************************************** Copyright (C) 1997, 1998, 2000 Lucent Technologies All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the name of Lucent or any of its entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ /* Try to deduce arith.h from arithmetic properties. */ #include #include #include #ifdef NO_FPINIT #define fpinit_ASL() #else #ifndef KR_headers extern #ifdef __cplusplus "C" #endif void fpinit_ASL(void); #endif /*KR_headers*/ #endif /*NO_FPINIT*/ static int dalign; typedef struct Akind { char *name; int kind; } Akind; static Akind IEEE_8087 = { "IEEE_8087", 1 }, IEEE_MC68k = { "IEEE_MC68k", 2 }, IBM = { "IBM", 3 }, VAX = { "VAX", 4 }, CRAY = { "CRAY", 5}; static double t_nan; static Akind * Lcheck(void) { union { double d; long L[2]; } u; struct { double d; long L; } x[2]; if (sizeof(x) > 2*(sizeof(double) + sizeof(long))) dalign = 1; u.L[0] = u.L[1] = 0; u.d = 1e13; if (u.L[0] == 1117925532 && u.L[1] == -448790528) return &IEEE_MC68k; if (u.L[1] == 1117925532 && u.L[0] == -448790528) return &IEEE_8087; if (u.L[0] == -2065213935 && u.L[1] == 10752) return &VAX; if (u.L[0] == 1267827943 && u.L[1] == 704643072) return &IBM; return 0; } static Akind * icheck(void) { union { double d; int L[2]; } u; struct { double d; int L; } x[2]; if (sizeof(x) > 2*(sizeof(double) + sizeof(int))) dalign = 1; u.L[0] = u.L[1] = 0; u.d = 1e13; if (u.L[0] == 1117925532 && u.L[1] == -448790528) return &IEEE_MC68k; if (u.L[1] == 1117925532 && u.L[0] == -448790528) return &IEEE_8087; if (u.L[0] == -2065213935 && u.L[1] == 10752) return &VAX; if (u.L[0] == 1267827943 && u.L[1] == 704643072) return &IBM; return 0; } char *emptyfmt = ""; /* avoid possible warning message with printf("") */ static Akind * ccheck(void) { union { double d; long L; } u; long Cray1; /* Cray1 = 4617762693716115456 -- without overflow on non-Crays */ Cray1 = printf("%s",emptyfmt) < 0 ? 0 : 4617762; if (printf(emptyfmt, Cray1) >= 0) Cray1 = 1000000*Cray1 + 693716; if (printf(emptyfmt, Cray1) >= 0) Cray1 = 1000000*Cray1 + 115456; u.d = 1e13; if (u.L == Cray1) return &CRAY; return 0; } static int fzcheck(void) { double a, b; int i; a = 1.; b = .1; for(i = 155;; b *= b, i >>= 1) { if (i & 1) { a *= b; if (i == 1) break; } } b = a * a; return b == 0.; } static int need_nancheck(void) { double t; errno = 0; t = log(t_nan); if (errno == 0) return 1; errno = 0; t = sqrt(t_nan); return errno == 0; } void get_nanbits(unsigned int *b, int k) { union { double d; unsigned int z[2]; } u, u1, u2; k = 2 - k; u1.z[k] = u2.z[k] = 0x7ff00000; u1.z[1-k] = u2.z[1-k] = 0; u.d = u1.d - u2.d; /* Infinity - Infinity */ b[0] = u.z[0]; b[1] = u.z[1]; } int main(void) { FILE *f; Akind *a = 0; int Ldef = 0; unsigned int nanbits[2]; fpinit_ASL(); #ifdef WRITE_ARITH_H /* for Symantec's buggy "make" */ f = fopen("arith.h", "w"); if (!f) { printf("Cannot open arith.h\n"); return 1; } #else f = stdout; #endif if (sizeof(double) == 2*sizeof(long)) a = Lcheck(); else if (sizeof(double) == 2*sizeof(int)) { Ldef = 1; a = icheck(); } else if (sizeof(double) == sizeof(long)) a = ccheck(); if (a) { fprintf(f, "#define %s\n#define Arith_Kind_ASL %d\n", a->name, a->kind); if (Ldef) fprintf(f, "#define Long int\n#define Intcast (int)(long)\n"); if (dalign) fprintf(f, "#define Double_Align\n"); if (sizeof(char*) == 8) fprintf(f, "#define X64_bit_pointers\n"); #ifndef NO_LONG_LONG if (sizeof(long long) < 8) #endif fprintf(f, "#define NO_LONG_LONG\n"); if (a->kind <= 2) { if (fzcheck()) fprintf(f, "#define Sudden_Underflow\n"); t_nan = -a->kind; if (need_nancheck()) fprintf(f, "#define NANCHECK\n"); if (sizeof(double) == 2*sizeof(unsigned int)) { get_nanbits(nanbits, a->kind); fprintf(f, "#define QNaN0 0x%x\n", nanbits[0]); fprintf(f, "#define QNaN1 0x%x\n", nanbits[1]); } } return 0; } fprintf(f, "/* Unknown arithmetic */\n"); return 1; } #ifdef __sun #ifdef __i386 /* kludge for Intel Solaris */ void fpsetprec(int x) { } #endif #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/backspac.c000066400000000000000000000025171507764646700227060ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer f_back(a) alist *a; #else integer f_back(alist *a) #endif { unit *b; OFF_T v, w, x, y, z; uiolen n; FILE *f; f__curunit = b = &f__units[a->aunit]; /* curunit for error messages */ if(a->aunit >= MXUNIT || a->aunit < 0) err(a->aerr,101,"backspace") if(b->useek==0) err(a->aerr,106,"backspace") if(b->ufd == NULL) { fk_open(1, 1, a->aunit); return(0); } if(b->uend==1) { b->uend=0; return(0); } if(b->uwrt) { t_runc(a); if (f__nowreading(b)) err(a->aerr,errno,"backspace") } f = b->ufd; /* may have changed in t_runc() */ if(b->url>0) { x=FTELL(f); y = x % b->url; if(y == 0) x--; x /= b->url; x *= b->url; (void) FSEEK(f,x,SEEK_SET); return(0); } if(b->ufmt==0) { FSEEK(f,-(OFF_T)sizeof(uiolen),SEEK_CUR); if(fread((char *)&n,sizeof(uiolen),1,f)); return (1); FSEEK(f,-(OFF_T)n-2*sizeof(uiolen),SEEK_CUR); return(0); } w = x = FTELL(f); z = 0; loop: while(x) { x -= x < 64 ? x : 64; FSEEK(f,x,SEEK_SET); for(y = x; y < w; y++) { if (getc(f) != '\n') continue; v = FTELL(f); if (v == w) { if (z) goto break2; goto loop; } z = v; } err(a->aerr,(EOF),"backspace") } break2: FSEEK(f, z, SEEK_SET); return 0; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/c_abs.c000066400000000000000000000004201507764646700221750ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern double f__cabs(); double c_abs(z) complex *z; #else extern double f__cabs(double, double); double c_abs(complex *z) #endif { return( f__cabs( z->r, z->i ) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/c_cos.c000066400000000000000000000005421507764646700222210ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers extern double sin(), cos(), sinh(), cosh(); VOID c_cos(r, z) complex *r, *z; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif void c_cos(complex *r, complex *z) #endif { double zi = z->i, zr = z->r; r->r = cos(zr) * cosh(zi); r->i = - sin(zr) * sinh(zi); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/c_div.c000066400000000000000000000016501507764646700222200ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern VOID sig_die(); VOID c_div(c, a, b) complex *a, *b, *c; #else extern void sig_die(const char*,int); void c_div(complex *c, complex *a, complex *b) #endif { double ratio, den; double abr, abi, cr; if( (abr = b->r) < 0.) abr = - abr; if( (abi = b->i) < 0.) abi = - abi; if( abr <= abi ) { if(abi == 0) { #ifdef IEEE_COMPLEX_DIVIDE float af, bf; af = bf = abr; if (a->i != 0 || a->r != 0) af = 1.; c->i = c->r = af / bf; return; #else sig_die("complex division by zero", 1); #endif } ratio = (double)b->r / b->i ; den = b->i * (1 + ratio*ratio); cr = (a->r*ratio + a->i) / den; c->i = (a->i*ratio - a->r) / den; } else { ratio = (double)b->i / b->r ; den = b->r * (1 + ratio*ratio); cr = (a->r + a->i*ratio) / den; c->i = (a->i - a->r*ratio) / den; } c->r = cr; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/c_exp.c000066400000000000000000000005351507764646700222330ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers extern double exp(), cos(), sin(); VOID c_exp(r, z) complex *r, *z; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif void c_exp(complex *r, complex *z) #endif { double expx, zi = z->i; expx = exp(z->r); r->r = expx * cos(zi); r->i = expx * sin(zi); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/c_log.c000066400000000000000000000006001507764646700222110ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers extern double log(), f__cabs(), atan2(); VOID c_log(r, z) complex *r, *z; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif extern double f__cabs(double, double); void c_log(complex *r, complex *z) #endif { double zi, zr; r->i = atan2(zi = z->i, zr = z->r); r->r = log( f__cabs(zr, zi) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/c_sin.c000066400000000000000000000005361507764646700222310ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers extern double sin(), cos(), sinh(), cosh(); VOID c_sin(r, z) complex *r, *z; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif void c_sin(complex *r, complex *z) #endif { double zi = z->i, zr = z->r; r->r = sin(zr) * cosh(zi); r->i = cos(zr) * sinh(zi); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/c_sqrt.c000066400000000000000000000011351507764646700224250ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers extern double sqrt(), f__cabs(); VOID c_sqrt(r, z) complex *r, *z; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif extern double f__cabs(double, double); void c_sqrt(complex *r, complex *z) #endif { double mag, t; double zi = z->i, zr = z->r; if( (mag = f__cabs(zr, zi)) == 0.) r->r = r->i = 0.; else if(zr > 0) { r->r = t = sqrt(0.5 * (mag + zr) ); t = zi / t; r->i = 0.5 * t; } else { t = sqrt(0.5 * (mag - zr) ); if(zi < 0) t = -t; r->i = t; t = zi / t; r->r = 0.5 * t; } } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/cabs.c000066400000000000000000000007561507764646700220520ustar00rootroot00000000000000#ifdef KR_headers extern double sqrt(); double f__cabs(real, imag) double real, imag; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double f__cabs(double real, double imag) #endif { double temp; if(real < 0) real = -real; if(imag < 0) imag = -imag; if(imag > real){ temp = real; real = imag; imag = temp; } if((real+imag) == real) return(real); temp = imag/real; temp = real*sqrt(1.0 + temp*temp); /*overflow!!*/ return(temp); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/close.c000066400000000000000000000025611507764646700222430ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #ifdef KR_headers integer f_clos(a) cllist *a; #else #undef abs #undef min #undef max #include "stdlib.h" #ifdef NON_UNIX_STDIO #ifndef unlink #define unlink remove #endif #else #ifdef MSDOS #include "io.h" #else #ifdef __cplusplus extern "C" int unlink(const char*); #else extern int unlink(const char*); #endif #endif #endif #ifdef __cplusplus extern "C" { #endif integer f_clos(cllist *a) #endif { unit *b; if(a->cunit >= MXUNIT) return(0); b= &f__units[a->cunit]; if(b->ufd==NULL) goto done; if (b->uscrtch == 1) goto Delete; if (!a->csta) goto Keep; switch(*a->csta) { default: Keep: case 'k': case 'K': if(b->uwrt == 1) t_runc((alist *)a); if(b->ufnm) { fclose(b->ufd); free(b->ufnm); } break; case 'd': case 'D': Delete: fclose(b->ufd); if(b->ufnm) { unlink(b->ufnm); /*SYSDEP*/ free(b->ufnm); } } b->ufd=NULL; done: b->uend=0; b->ufnm=NULL; return(0); } void #ifdef KR_headers f_exit() #else f_exit(void) #endif { int i; static cllist xx; if (!xx.cerr) { xx.cerr=1; xx.csta=NULL; for(i=0;i #else /*{*/ #ifndef My_ctype_DEF extern char My_ctype[]; #else /*{*/ char My_ctype[264] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; #endif /*}*/ #define isdigit(x) (My_ctype[(x)+8] & 1) #define isspace(x) (My_ctype[(x)+8] & 2) #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_abs.c000066400000000000000000000003321507764646700222000ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double d_abs(x) doublereal *x; #else double d_abs(doublereal *x) #endif { if(*x >= 0) return(*x); return(- *x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_acos.c000066400000000000000000000003651507764646700223660ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double acos(); double d_acos(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_acos(doublereal *x) #endif { return( acos(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_asin.c000066400000000000000000000003651507764646700223730ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double asin(); double d_asin(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_asin(doublereal *x) #endif { return( asin(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_atan.c000066400000000000000000000003651507764646700223640ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double atan(); double d_atan(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_atan(doublereal *x) #endif { return( atan(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_atn2.c000066400000000000000000000004171507764646700223030ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double atan2(); double d_atn2(x,y) doublereal *x, *y; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_atn2(doublereal *x, doublereal *y) #endif { return( atan2(*x,*y) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_cnjg.c000066400000000000000000000003771507764646700223650ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif VOID #ifdef KR_headers d_cnjg(r, z) doublecomplex *r, *z; #else d_cnjg(doublecomplex *r, doublecomplex *z) #endif { doublereal zi = z->i; r->r = z->r; r->i = -zi; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_cos.c000066400000000000000000000003611507764646700222210ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double cos(); double d_cos(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_cos(doublereal *x) #endif { return( cos(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_cosh.c000066400000000000000000000003651507764646700223750ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double cosh(); double d_cosh(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_cosh(doublereal *x) #endif { return( cosh(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_dim.c000066400000000000000000000003501507764646700222040ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double d_dim(a,b) doublereal *a, *b; #else double d_dim(doublereal *a, doublereal *b) #endif { return( *a > *b ? *a - *b : 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_exp.c000066400000000000000000000003611507764646700222310ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double exp(); double d_exp(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_exp(doublereal *x) #endif { return( exp(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_imag.c000066400000000000000000000003111507764646700223450ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double d_imag(z) doublecomplex *z; #else double d_imag(doublecomplex *z) #endif { return(z->i); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_int.c000066400000000000000000000004151507764646700222270ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double floor(); double d_int(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_int(doublereal *x) #endif { return( (*x>0) ? floor(*x) : -floor(- *x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_lg10.c000066400000000000000000000004431507764646700222010ustar00rootroot00000000000000#include "f2c.h" #define log10e 0.43429448190325182765 #ifdef KR_headers double log(); double d_lg10(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_lg10(doublereal *x) #endif { return( log10e * log(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_log.c000066400000000000000000000003611507764646700222160ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double log(); double d_log(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_log(doublereal *x) #endif { return( log(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_mod.c000066400000000000000000000012601507764646700222130ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers #ifdef IEEE_drem double drem(); #else double floor(); #endif double d_mod(x,y) doublereal *x, *y; #else #ifdef IEEE_drem double drem(double, double); #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif #endif double d_mod(doublereal *x, doublereal *y) #endif { #ifdef IEEE_drem double xa, ya, z; if ((ya = *y) < 0.) ya = -ya; z = drem(xa = *x, ya); if (xa > 0) { if (z < 0) z += ya; } else if (z > 0) z -= ya; return z; #else double quotient; if( (quotient = *x / *y) >= 0) quotient = floor(quotient); else quotient = -floor(-quotient); return(*x - (*y) * quotient ); #endif } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_nint.c000066400000000000000000000004311507764646700224030ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double floor(); double d_nint(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_nint(doublereal *x) #endif { return( (*x)>=0 ? floor(*x + .5) : -floor(.5 - *x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_prod.c000066400000000000000000000003171507764646700224020ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double d_prod(x,y) real *x, *y; #else double d_prod(real *x, real *y) #endif { return( (*x) * (*y) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_sign.c000066400000000000000000000004121507764646700223720ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double d_sign(a,b) doublereal *a, *b; #else double d_sign(doublereal *a, doublereal *b) #endif { double x; x = (*a >= 0 ? *a : - *a); return( *b >= 0 ? x : -x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_sin.c000066400000000000000000000003611507764646700222260ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double sin(); double d_sin(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_sin(doublereal *x) #endif { return( sin(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_sinh.c000066400000000000000000000003651507764646700224020ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double sinh(); double d_sinh(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_sinh(doublereal *x) #endif { return( sinh(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_sqrt.c000066400000000000000000000003651507764646700224320ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double sqrt(); double d_sqrt(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_sqrt(doublereal *x) #endif { return( sqrt(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_tan.c000066400000000000000000000003611507764646700222170ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double tan(); double d_tan(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_tan(doublereal *x) #endif { return( tan(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/d_tanh.c000066400000000000000000000003651507764646700223730ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double tanh(); double d_tanh(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double d_tanh(doublereal *x) #endif { return( tanh(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/derf_.c000066400000000000000000000003571507764646700222160ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double erf(); double derf_(x) doublereal *x; #else extern double erf(double); double derf_(doublereal *x) #endif { return( erf(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/derfc_.c000066400000000000000000000003751507764646700223610ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern double erfc(); double derfc_(x) doublereal *x; #else extern double erfc(double); double derfc_(doublereal *x) #endif { return( erfc(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/dfe.c000066400000000000000000000051001507764646700216640ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "fmt.h" #ifdef __cplusplus extern "C" { #endif int y_rsk(Void) { if(f__curunit->uend || f__curunit->url <= f__recpos || f__curunit->url == 1) return 0; do { getc(f__cf); } while(++f__recpos < f__curunit->url); return 0; } int y_getc(Void) { int ch; if(f__curunit->uend) return(-1); if((ch=getc(f__cf))!=EOF) { f__recpos++; if(f__curunit->url>=f__recpos || f__curunit->url==1) return(ch); else return(' '); } if(feof(f__cf)) { f__curunit->uend=1; errno=0; return(-1); } err(f__elist->cierr,errno,"readingd"); } static int y_rev(Void) { if (f__recpos < f__hiwater) f__recpos = f__hiwater; if (f__curunit->url > 1) while(f__recpos < f__curunit->url) (*f__putn)(' '); if (f__recpos) f__putbuf(0); f__recpos = 0; return(0); } static int y_err(Void) { err(f__elist->cierr, 110, "dfe"); } static int y_newrec(Void) { y_rev(); f__hiwater = f__cursor = 0; return(1); } int #ifdef KR_headers c_dfe(a) cilist *a; #else c_dfe(cilist *a) #endif { f__sequential=0; f__formatted=f__external=1; f__elist=a; f__cursor=f__scale=f__recpos=0; f__curunit = &f__units[a->ciunit]; if(a->ciunit>MXUNIT || a->ciunit<0) err(a->cierr,101,"startchk"); if(f__curunit->ufd==NULL && fk_open(DIR,FMT,a->ciunit)) err(a->cierr,104,"dfe"); f__cf=f__curunit->ufd; if(!f__curunit->ufmt) err(a->cierr,102,"dfe") if(!f__curunit->useek) err(a->cierr,104,"dfe") f__fmtbuf=a->cifmt; if(a->cirec <= 0) err(a->cierr,130,"dfe") FSEEK(f__cf,(OFF_T)f__curunit->url * (a->cirec-1),SEEK_SET); f__curunit->uend = 0; return(0); } #ifdef KR_headers integer s_rdfe(a) cilist *a; #else integer s_rdfe(cilist *a) #endif { int n; if(!f__init) f_init(); f__reading=1; if(n=c_dfe(a))return(n); if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr,errno,"read start"); f__getn = y_getc; f__doed = rd_ed; f__doned = rd_ned; f__dorevert = f__donewrec = y_err; f__doend = y_rsk; if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"read start"); fmt_bg(); return(0); } #ifdef KR_headers integer s_wdfe(a) cilist *a; #else integer s_wdfe(cilist *a) #endif { int n; if(!f__init) f_init(); f__reading=0; if(n=c_dfe(a)) return(n); if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr,errno,"startwrt"); f__putn = x_putc; f__doed = w_ed; f__doned= w_ned; f__dorevert = y_err; f__donewrec = y_newrec; f__doend = y_rev; if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startwrt"); fmt_bg(); return(0); } integer e_rdfe(Void) { en_fio(); return 0; } integer e_wdfe(Void) { return en_fio(); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/dolio.c000066400000000000000000000007271507764646700222460ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern int (*f__lioproc)(); integer do_lio(type,number,ptr,len) ftnint *number,*type; char *ptr; ftnlen len; #else extern int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint); integer do_lio(ftnint *type, ftnint *number, char *ptr, ftnlen len) #endif { return((*f__lioproc)(number,ptr,len,*type)); } #ifdef __cplusplus } #endif #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/dtime_.c000066400000000000000000000017141507764646700223760ustar00rootroot00000000000000#include "time.h" #ifdef MSDOS #undef USE_CLOCK #define USE_CLOCK #endif #ifndef REAL #define REAL double #endif #ifndef USE_CLOCK #define _INCLUDE_POSIX_SOURCE /* for HP-UX */ #define _INCLUDE_XOPEN_SOURCE /* for HP-UX */ #include "sys/types.h" #include "sys/times.h" #ifdef __cplusplus extern "C" { #endif #endif #undef Hz #ifdef CLK_TCK #define Hz CLK_TCK #else #ifdef HZ #define Hz HZ #else #define Hz 60 #endif #endif REAL #ifdef KR_headers dtime_(tarray) float *tarray; #else dtime_(float *tarray) #endif { #ifdef USE_CLOCK #ifndef CLOCKS_PER_SECOND #define CLOCKS_PER_SECOND Hz #endif static double t0; double t = clock(); tarray[1] = 0; tarray[0] = (t - t0) / CLOCKS_PER_SECOND; t0 = t; return tarray[0]; #else struct tms t; static struct tms t0; times(&t); tarray[0] = (double)(t.tms_utime - t0.tms_utime) / Hz; tarray[1] = (double)(t.tms_stime - t0.tms_stime) / Hz; t0 = t; return tarray[0] + tarray[1]; #endif } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/due.c000066400000000000000000000031301507764646700217040ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #ifdef __cplusplus extern "C" { #endif int #ifdef KR_headers c_due(a) cilist *a; #else c_due(cilist *a) #endif { if(!f__init) f_init(); f__sequential=f__formatted=f__recpos=0; f__external=1; f__curunit = &f__units[a->ciunit]; if(a->ciunit>=MXUNIT || a->ciunit<0) err(a->cierr,101,"startio"); f__elist=a; if(f__curunit->ufd==NULL && fk_open(DIR,UNF,a->ciunit) ) err(a->cierr,104,"due"); f__cf=f__curunit->ufd; if(f__curunit->ufmt) err(a->cierr,102,"cdue") if(!f__curunit->useek) err(a->cierr,104,"cdue") if(f__curunit->ufd==NULL) err(a->cierr,114,"cdue") if(a->cirec <= 0) err(a->cierr,130,"due") FSEEK(f__cf,(OFF_T)(a->cirec-1)*f__curunit->url,SEEK_SET); f__curunit->uend = 0; return(0); } #ifdef KR_headers integer s_rdue(a) cilist *a; #else integer s_rdue(cilist *a) #endif { int n; f__reading=1; if(n=c_due(a)) return(n); if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr,errno,"read start"); return(0); } #ifdef KR_headers integer s_wdue(a) cilist *a; #else integer s_wdue(cilist *a) #endif { int n; f__reading=0; if(n=c_due(a)) return(n); if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr,errno,"write start"); return(0); } integer e_rdue(Void) { if(f__curunit->url==1 || f__recpos==f__curunit->url) return(0); FSEEK(f__cf,(OFF_T)(f__curunit->url-f__recpos),SEEK_CUR); if(FTELL(f__cf)%f__curunit->url) err(f__elist->cierr,200,"syserr"); return(0); } integer e_wdue(Void) { #ifdef ALWAYS_FLUSH if (fflush(f__cf)) err(f__elist->cierr,errno,"write end"); #endif return(e_rdue()); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/ef1asc_.c000066400000000000000000000010111507764646700224240ustar00rootroot00000000000000/* EFL support routine to copy string b to string a */ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif #define M ( (long) (sizeof(long) - 1) ) #define EVEN(x) ( ( (x)+ M) & (~M) ) #ifdef KR_headers extern VOID s_copy(); ef1asc_(a, la, b, lb) ftnint *a, *b; ftnlen *la, *lb; #else extern void s_copy(char*,char*,ftnlen,ftnlen); int ef1asc_(ftnint *a, ftnlen *la, ftnint *b, ftnlen *lb) #endif { s_copy( (char *)a, (char *)b, EVEN(*la), *lb ); return 0; /* ignored return value */ } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/ef1cmc_.c000066400000000000000000000006731507764646700224350ustar00rootroot00000000000000/* EFL support routine to compare two character strings */ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern integer s_cmp(); integer _starpu_ef1cmc_(a, la, b, lb) ftnint *a, *b; ftnlen *la, *lb; #else extern integer s_cmp(char*,char*,ftnlen,ftnlen); integer _starpu_ef1cmc_(ftnint *a, ftnlen *la, ftnint *b, ftnlen *lb) #endif { return( s_cmp( (char *)a, (char *)b, *la, *lb) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/endfile.c000066400000000000000000000054261507764646700225470ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" /* Compile this with -DNO_TRUNCATE if unistd.h does not exist or */ /* if it does not define int truncate(const char *name, off_t). */ #ifdef MSDOS #undef NO_TRUNCATE #define NO_TRUNCATE #endif #ifndef NO_TRUNCATE #include "unistd.h" #endif #ifdef KR_headers extern char *strcpy(); extern FILE *tmpfile(); #else #undef abs #undef min #undef max #include "stdlib.h" #include "string.h" #ifdef __cplusplus extern "C" { #endif #endif extern char *f__r_mode[], *f__w_mode[]; #ifdef KR_headers integer f_end(a) alist *a; #else integer f_end(alist *a) #endif { unit *b; FILE *tf; if(a->aunit>=MXUNIT || a->aunit<0) err(a->aerr,101,"endfile"); b = &f__units[a->aunit]; if(b->ufd==NULL) { char nbuf[10]; sprintf(nbuf,"fort.%ld",(long)a->aunit); if (tf = FOPEN(nbuf, f__w_mode[0])) fclose(tf); return(0); } b->uend=1; return(b->useek ? t_runc(a) : 0); } #ifdef NO_TRUNCATE static int #ifdef KR_headers copy(from, len, to) FILE *from, *to; register long len; #else copy(FILE *from, register long len, FILE *to) #endif { int len1; char buf[BUFSIZ]; while(fread(buf, len1 = len > BUFSIZ ? BUFSIZ : (int)len, 1, from)) { if (!fwrite(buf, len1, 1, to)) return 1; if ((len -= len1) <= 0) break; } return 0; } #endif /* NO_TRUNCATE */ int #ifdef KR_headers t_runc(a) alist *a; #else t_runc(alist *a) #endif { OFF_T loc, len; unit *b; int rc; FILE *bf; #ifdef NO_TRUNCATE FILE *tf; #endif b = &f__units[a->aunit]; if(b->url) return(0); /*don't truncate direct files*/ loc=FTELL(bf = b->ufd); FSEEK(bf,(OFF_T)0,SEEK_END); len=FTELL(bf); if (loc >= len || b->useek == 0) return(0); #ifdef NO_TRUNCATE if (b->ufnm == NULL) return 0; rc = 0; fclose(b->ufd); if (!loc) { if (!(bf = FOPEN(b->ufnm, f__w_mode[b->ufmt]))) rc = 1; if (b->uwrt) b->uwrt = 1; goto done; } if (!(bf = FOPEN(b->ufnm, f__r_mode[0])) || !(tf = tmpfile())) { #ifdef NON_UNIX_STDIO bad: #endif rc = 1; goto done; } if (copy(bf, (long)loc, tf)) { bad1: rc = 1; goto done1; } if (!(bf = FREOPEN(b->ufnm, f__w_mode[0], bf))) goto bad1; rewind(tf); if (copy(tf, (long)loc, bf)) goto bad1; b->uwrt = 1; b->urw = 2; #ifdef NON_UNIX_STDIO if (b->ufmt) { fclose(bf); if (!(bf = FOPEN(b->ufnm, f__w_mode[3]))) goto bad; FSEEK(bf,(OFF_T)0,SEEK_END); b->urw = 3; } #endif done1: fclose(tf); done: f__cf = b->ufd = bf; #else /* NO_TRUNCATE */ if (b->urw & 2) fflush(b->ufd); /* necessary on some Linux systems */ #ifndef FTRUNCATE #define FTRUNCATE ftruncate #endif rc = FTRUNCATE(fileno(b->ufd), loc); /* The following FSEEK is unnecessary on some systems, */ /* but should be harmless. */ FSEEK(b->ufd, (OFF_T)0, SEEK_END); #endif /* NO_TRUNCATE */ if (rc) err(a->aerr,111,"endfile"); return 0; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/erf_.c000066400000000000000000000004161507764646700220460ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifndef REAL #define REAL double #endif #ifdef KR_headers double erf(); REAL erf_(x) real *x; #else extern double erf(double); REAL erf_(real *x) #endif { return( erf((double)*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/erfc_.c000066400000000000000000000004231507764646700222070ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifndef REAL #define REAL double #endif #ifdef KR_headers double erfc(); REAL erfc_(x) real *x; #else extern double erfc(double); REAL erfc_(real *x) #endif { return( erfc((double)*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/err.c000066400000000000000000000144521507764646700217300ustar00rootroot00000000000000#include "sysdep1.h" /* here to get stat64 on some badly designed Linux systems */ #include "f2c.h" #ifdef KR_headers #define Const /*nothing*/ extern char *malloc(); #else #define Const const #undef abs #undef min #undef max #include "stdlib.h" #endif #include "fio.h" #include "fmt.h" /* for struct syl */ /* Compile this with -DNO_ISATTY if unistd.h does not exist or */ /* if it does not define int isatty(int). */ #ifdef NO_ISATTY #define isatty(x) 0 #else #include #endif #ifdef __cplusplus extern "C" { #endif /*global definitions*/ unit f__units[MXUNIT]; /*unit table*/ flag f__init; /*0 on entry, 1 after initializations*/ cilist *f__elist; /*active external io list*/ icilist *f__svic; /*active internal io list*/ flag f__reading; /*1 if reading, 0 if writing*/ flag f__cplus,f__cblank; Const char *f__fmtbuf; flag f__external; /*1 if external io, 0 if internal */ #ifdef KR_headers int (*f__doed)(),(*f__doned)(); int (*f__doend)(),(*f__donewrec)(),(*f__dorevert)(); int (*f__getn)(); /* for formatted input */ void (*f__putn)(); /* for formatted output */ #else int (*f__getn)(void); /* for formatted input */ void (*f__putn)(int); /* for formatted output */ int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); int (*f__dorevert)(void),(*f__donewrec)(void),(*f__doend)(void); #endif flag f__sequential; /*1 if sequential io, 0 if direct*/ flag f__formatted; /*1 if formatted io, 0 if unformatted*/ FILE *f__cf; /*current file*/ unit *f__curunit; /*current unit*/ int f__recpos; /*place in current record*/ OFF_T f__cursor, f__hiwater; int f__scale; char *f__icptr; /*error messages*/ Const char *F_err[] = { "error in format", /* 100 */ "illegal unit number", /* 101 */ "formatted io not allowed", /* 102 */ "unformatted io not allowed", /* 103 */ "direct io not allowed", /* 104 */ "sequential io not allowed", /* 105 */ "can't backspace file", /* 106 */ "null file name", /* 107 */ "can't stat file", /* 108 */ "unit not connected", /* 109 */ "off end of record", /* 110 */ "truncation failed in endfile", /* 111 */ "incomprehensible list input", /* 112 */ "out of free space", /* 113 */ "unit not connected", /* 114 */ "read unexpected character", /* 115 */ "bad logical input field", /* 116 */ "bad variable type", /* 117 */ "bad namelist name", /* 118 */ "variable not in namelist", /* 119 */ "no end record", /* 120 */ "variable count incorrect", /* 121 */ "subscript for scalar variable", /* 122 */ "invalid array section", /* 123 */ "substring out of bounds", /* 124 */ "subscript out of bounds", /* 125 */ "can't read file", /* 126 */ "can't write file", /* 127 */ "'new' file exists", /* 128 */ "can't append to file", /* 129 */ "non-positive record number", /* 130 */ "nmLbuf overflow" /* 131 */ }; #define MAXERR (sizeof(F_err)/sizeof(char *)+100) int #ifdef KR_headers f__canseek(f) FILE *f; /*SYSDEP*/ #else f__canseek(FILE *f) /*SYSDEP*/ #endif { #ifdef NON_UNIX_STDIO return !isatty(fileno(f)); #else struct STAT_ST x; if (FSTAT(fileno(f),&x) < 0) return(0); #ifdef S_IFMT switch(x.st_mode & S_IFMT) { case S_IFDIR: case S_IFREG: if(x.st_nlink > 0) /* !pipe */ return(1); else return(0); case S_IFCHR: if(isatty(fileno(f))) return(0); return(1); #ifdef S_IFBLK case S_IFBLK: return(1); #endif } #else #ifdef S_ISDIR /* POSIX version */ if (S_ISREG(x.st_mode) || S_ISDIR(x.st_mode)) { if(x.st_nlink > 0) /* !pipe */ return(1); else return(0); } if (S_ISCHR(x.st_mode)) { if(isatty(fileno(f))) return(0); return(1); } if (S_ISBLK(x.st_mode)) return(1); #else Help! How does fstat work on this system? #endif #endif return(0); /* who knows what it is? */ #endif } void #ifdef KR_headers f__fatal(n,s) char *s; #else f__fatal(int n, const char *s) #endif { if(n<100 && n>=0) perror(s); /*SYSDEP*/ else if(n >= (int)MAXERR || n < -1) { fprintf(stderr,"%s: illegal error number %d\n",s,n); } else if(n == -1) fprintf(stderr,"%s: end of file\n",s); else fprintf(stderr,"%s: %s\n",s,F_err[n-100]); if (f__curunit) { fprintf(stderr,"apparent state: unit %d ", (int)(f__curunit-f__units)); fprintf(stderr, f__curunit->ufnm ? "named %s\n" : "(unnamed)\n", f__curunit->ufnm); } else fprintf(stderr,"apparent state: internal I/O\n"); if (f__fmtbuf) fprintf(stderr,"last format: %s\n",f__fmtbuf); fprintf(stderr,"lately %s %s %s %s",f__reading?"reading":"writing", f__sequential?"sequential":"direct",f__formatted?"formatted":"unformatted", f__external?"external":"internal"); sig_die(" IO", 1); } /*initialization routine*/ VOID f_init(Void) { unit *p; f__init=1; p= &f__units[0]; p->ufd=stderr; p->useek=f__canseek(stderr); p->ufmt=1; p->uwrt=1; p = &f__units[5]; p->ufd=stdin; p->useek=f__canseek(stdin); p->ufmt=1; p->uwrt=0; p= &f__units[6]; p->ufd=stdout; p->useek=f__canseek(stdout); p->ufmt=1; p->uwrt=1; } int #ifdef KR_headers f__nowreading(x) unit *x; #else f__nowreading(unit *x) #endif { OFF_T loc; int ufmt, urw; extern char *f__r_mode[], *f__w_mode[]; if (x->urw & 1) goto done; if (!x->ufnm) goto cantread; ufmt = x->url ? 0 : x->ufmt; loc = FTELL(x->ufd); urw = 3; if (!FREOPEN(x->ufnm, f__w_mode[ufmt|2], x->ufd)) { urw = 1; if(!FREOPEN(x->ufnm, f__r_mode[ufmt], x->ufd)) { cantread: errno = 126; return 1; } } FSEEK(x->ufd,loc,SEEK_SET); x->urw = urw; done: x->uwrt = 0; return 0; } int #ifdef KR_headers f__nowwriting(x) unit *x; #else f__nowwriting(unit *x) #endif { OFF_T loc; int ufmt; extern char *f__w_mode[]; if (x->urw & 2) { if (x->urw & 1) FSEEK(x->ufd, (OFF_T)0, SEEK_CUR); goto done; } if (!x->ufnm) goto cantwrite; ufmt = x->url ? 0 : x->ufmt; if (x->uwrt == 3) { /* just did write, rewind */ if (!(f__cf = x->ufd = FREOPEN(x->ufnm,f__w_mode[ufmt],x->ufd))) goto cantwrite; x->urw = 2; } else { loc=FTELL(x->ufd); if (!(f__cf = x->ufd = FREOPEN(x->ufnm, f__w_mode[ufmt | 2], x->ufd))) { x->ufd = NULL; cantwrite: errno = 127; return(1); } x->urw = 3; FSEEK(x->ufd,loc,SEEK_SET); } done: x->uwrt = 1; return 0; } int #ifdef KR_headers err__fl(f, m, s) int f, m; char *s; #else err__fl(int f, int m, const char *s) #endif { if (!f) f__fatal(m, s); if (f__doend) (*f__doend)(); return errno = m; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/etime_.c000066400000000000000000000015071507764646700223770ustar00rootroot00000000000000#include "time.h" #ifdef MSDOS #undef USE_CLOCK #define USE_CLOCK #endif #ifndef REAL #define REAL double #endif #ifndef USE_CLOCK #define _INCLUDE_POSIX_SOURCE /* for HP-UX */ #define _INCLUDE_XOPEN_SOURCE /* for HP-UX */ #include "sys/types.h" #include "sys/times.h" #ifdef __cplusplus extern "C" { #endif #endif #undef Hz #ifdef CLK_TCK #define Hz CLK_TCK #else #ifdef HZ #define Hz HZ #else #define Hz 60 #endif #endif REAL #ifdef KR_headers etime_(tarray) float *tarray; #else etime_(float *tarray) #endif { #ifdef USE_CLOCK #ifndef CLOCKS_PER_SECOND #define CLOCKS_PER_SECOND Hz #endif double t = clock(); tarray[1] = 0; return tarray[0] = t / CLOCKS_PER_SECOND; #else struct tms t; times(&t); return (tarray[0] = (double)t.tms_utime/Hz) + (tarray[1] = (double)t.tms_stime/Hz); #endif } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/exit_.c000066400000000000000000000010371507764646700222430ustar00rootroot00000000000000/* This gives the effect of subroutine exit(rc) integer*4 rc stop end * with the added side effect of supplying rc as the program's exit code. */ #include "f2c.h" #undef abs #undef min #undef max #ifndef KR_headers #include "stdlib.h" #ifdef __cplusplus extern "C" { #endif #ifdef __cplusplus extern "C" { #endif extern void f_exit(void); #endif void #ifdef KR_headers exit_(rc) integer *rc; #else exit_(integer *rc) #endif { #ifdef NO_ONEXIT f_exit(); #endif exit(*rc); } #ifdef __cplusplus } #endif #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/f2c.h000066400000000000000000000111201507764646700216040ustar00rootroot00000000000000/* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_INCLUDE #define F2C_INCLUDE typedef long int integer; typedef unsigned long int uinteger; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef long int logical; typedef short int shortlogical; typedef char logical1; typedef char integer1; #ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ typedef long long longint; /* system-dependent */ typedef unsigned long long ulongint; /* system-dependent */ #define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) #define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) #endif #define TRUE_ (1) #define FALSE_ (0) /* Extern is for use with -E */ #ifndef Extern #define Extern extern #endif /* I/O stuff */ #ifdef f2c_i2 /* for -i2 */ typedef short flag; typedef short ftnlen; typedef short ftnint; #else typedef long int flag; typedef long int ftnlen; typedef long int ftnint; #endif /*external read, write*/ typedef struct { flag cierr; ftnint ciunit; flag ciend; char *cifmt; ftnint cirec; } cilist; /*internal read, write*/ typedef struct { flag icierr; char *iciunit; flag iciend; char *icifmt; ftnint icirlen; ftnint icirnum; } icilist; /*open*/ typedef struct { flag oerr; ftnint ounit; char *ofnm; ftnlen ofnmlen; char *osta; char *oacc; char *ofm; ftnint orl; char *oblnk; } olist; /*close*/ typedef struct { flag cerr; ftnint cunit; char *csta; } cllist; /*rewind, backspace, endfile*/ typedef struct { flag aerr; ftnint aunit; } alist; /* inquire */ typedef struct { flag inerr; ftnint inunit; char *infile; ftnlen infilen; ftnint *inex; /*parameters in standard's order*/ ftnint *inopen; ftnint *innum; ftnint *innamed; char *inname; ftnlen innamlen; char *inacc; ftnlen inacclen; char *inseq; ftnlen inseqlen; char *indir; ftnlen indirlen; char *infmt; ftnlen infmtlen; char *inform; ftnint informlen; char *inunf; ftnlen inunflen; ftnint *inrecl; ftnint *innrec; char *inblank; ftnlen inblanklen; } inlist; #define VOID void union Multitype { /* for multiple entry points */ integer1 g; shortint h; integer i; /* longint j; */ real r; doublereal d; complex c; doublecomplex z; }; typedef union Multitype Multitype; /*typedef long int Long;*/ /* No longer used; formerly in Namelist */ struct Vardesc { /* for Namelist */ char *name; char *addr; ftnlen *dims; int type; }; typedef struct Vardesc Vardesc; struct Namelist { char *name; Vardesc **vars; int nvars; }; typedef struct Namelist Namelist; #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (doublereal)abs(x) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) #define dmin(a,b) (doublereal)min(a,b) #define dmax(a,b) (doublereal)max(a,b) #define bit_test(a,b) ((a) >> (b) & 1) #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 #ifdef __cplusplus typedef int /* Unknown procedure type */ (*U_fp)(...); typedef shortint (*J_fp)(...); typedef integer (*I_fp)(...); typedef real (*R_fp)(...); typedef doublereal (*D_fp)(...), (*E_fp)(...); typedef /* Complex */ VOID (*C_fp)(...); typedef /* Double Complex */ VOID (*Z_fp)(...); typedef logical (*L_fp)(...); typedef shortlogical (*K_fp)(...); typedef /* Character */ VOID (*H_fp)(...); typedef /* Subroutine */ int (*S_fp)(...); #else typedef int /* Unknown procedure type */ (*U_fp)(); typedef shortint (*J_fp)(); typedef integer (*I_fp)(); typedef real (*R_fp)(); typedef doublereal (*D_fp)(), (*E_fp)(); typedef /* Complex */ VOID (*C_fp)(); typedef /* Double Complex */ VOID (*Z_fp)(); typedef logical (*L_fp)(); typedef shortlogical (*K_fp)(); typedef /* Character */ VOID (*H_fp)(); typedef /* Subroutine */ int (*S_fp)(); #endif /* E_fp is for real functions when -R is not specified */ typedef VOID C_f; /* complex function */ typedef VOID H_f; /* character function */ typedef VOID Z_f; /* double complex function */ typedef doublereal E_f; /* real function with -R not specified */ /* undef any lower-case symbols that your C compiler predefines, e.g.: */ #ifndef Skip_f2c_Undefs #undef cray #undef gcos #undef mc68010 #undef mc68020 #undef mips #undef pdp11 #undef sgi #undef sparc #undef sun #undef sun2 #undef sun3 #undef sun4 #undef u370 #undef u3b #undef u3b2 #undef u3b5 #undef unix #undef vax #endif #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/f2c.h0000066400000000000000000000111201507764646700216640ustar00rootroot00000000000000/* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_INCLUDE #define F2C_INCLUDE typedef long int integer; typedef unsigned long int uinteger; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef long int logical; typedef short int shortlogical; typedef char logical1; typedef char integer1; #ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ typedef long long longint; /* system-dependent */ typedef unsigned long long ulongint; /* system-dependent */ #define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) #define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) #endif #define TRUE_ (1) #define FALSE_ (0) /* Extern is for use with -E */ #ifndef Extern #define Extern extern #endif /* I/O stuff */ #ifdef f2c_i2 /* for -i2 */ typedef short flag; typedef short ftnlen; typedef short ftnint; #else typedef long int flag; typedef long int ftnlen; typedef long int ftnint; #endif /*external read, write*/ typedef struct { flag cierr; ftnint ciunit; flag ciend; char *cifmt; ftnint cirec; } cilist; /*internal read, write*/ typedef struct { flag icierr; char *iciunit; flag iciend; char *icifmt; ftnint icirlen; ftnint icirnum; } icilist; /*open*/ typedef struct { flag oerr; ftnint ounit; char *ofnm; ftnlen ofnmlen; char *osta; char *oacc; char *ofm; ftnint orl; char *oblnk; } olist; /*close*/ typedef struct { flag cerr; ftnint cunit; char *csta; } cllist; /*rewind, backspace, endfile*/ typedef struct { flag aerr; ftnint aunit; } alist; /* inquire */ typedef struct { flag inerr; ftnint inunit; char *infile; ftnlen infilen; ftnint *inex; /*parameters in standard's order*/ ftnint *inopen; ftnint *innum; ftnint *innamed; char *inname; ftnlen innamlen; char *inacc; ftnlen inacclen; char *inseq; ftnlen inseqlen; char *indir; ftnlen indirlen; char *infmt; ftnlen infmtlen; char *inform; ftnint informlen; char *inunf; ftnlen inunflen; ftnint *inrecl; ftnint *innrec; char *inblank; ftnlen inblanklen; } inlist; #define VOID void union Multitype { /* for multiple entry points */ integer1 g; shortint h; integer i; /* longint j; */ real r; doublereal d; complex c; doublecomplex z; }; typedef union Multitype Multitype; /*typedef long int Long;*/ /* No longer used; formerly in Namelist */ struct Vardesc { /* for Namelist */ char *name; char *addr; ftnlen *dims; int type; }; typedef struct Vardesc Vardesc; struct Namelist { char *name; Vardesc **vars; int nvars; }; typedef struct Namelist Namelist; #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (doublereal)abs(x) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) #define dmin(a,b) (doublereal)min(a,b) #define dmax(a,b) (doublereal)max(a,b) #define bit_test(a,b) ((a) >> (b) & 1) #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 #ifdef __cplusplus typedef int /* Unknown procedure type */ (*U_fp)(...); typedef shortint (*J_fp)(...); typedef integer (*I_fp)(...); typedef real (*R_fp)(...); typedef doublereal (*D_fp)(...), (*E_fp)(...); typedef /* Complex */ VOID (*C_fp)(...); typedef /* Double Complex */ VOID (*Z_fp)(...); typedef logical (*L_fp)(...); typedef shortlogical (*K_fp)(...); typedef /* Character */ VOID (*H_fp)(...); typedef /* Subroutine */ int (*S_fp)(...); #else typedef int /* Unknown procedure type */ (*U_fp)(); typedef shortint (*J_fp)(); typedef integer (*I_fp)(); typedef real (*R_fp)(); typedef doublereal (*D_fp)(), (*E_fp)(); typedef /* Complex */ VOID (*C_fp)(); typedef /* Double Complex */ VOID (*Z_fp)(); typedef logical (*L_fp)(); typedef shortlogical (*K_fp)(); typedef /* Character */ VOID (*H_fp)(); typedef /* Subroutine */ int (*S_fp)(); #endif /* E_fp is for real functions when -R is not specified */ typedef VOID C_f; /* complex function */ typedef VOID H_f; /* character function */ typedef VOID Z_f; /* double complex function */ typedef doublereal E_f; /* real function with -R not specified */ /* undef any lower-case symbols that your C compiler predefines, e.g.: */ #ifndef Skip_f2c_Undefs #undef cray #undef gcos #undef mc68010 #undef mc68020 #undef mips #undef pdp11 #undef sgi #undef sparc #undef sun #undef sun2 #undef sun3 #undef sun4 #undef u370 #undef u3b #undef u3b2 #undef u3b5 #undef unix #undef vax #endif #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/f2ch.add000066400000000000000000000136641507764646700222740ustar00rootroot00000000000000/* If you are using a C++ compiler, append the following to f2c.h for compiling libF77 and libI77. */ #ifdef __cplusplus extern "C" { extern int abort_(void); extern double c_abs(complex *); extern void c_cos(complex *, complex *); extern void c_div(complex *, complex *, complex *); extern void c_exp(complex *, complex *); extern void c_log(complex *, complex *); extern void c_sin(complex *, complex *); extern void c_sqrt(complex *, complex *); extern double d_abs(double *); extern double d_acos(double *); extern double d_asin(double *); extern double d_atan(double *); extern double d_atn2(double *, double *); extern void d_cnjg(doublecomplex *, doublecomplex *); extern double d_cos(double *); extern double d_cosh(double *); extern double d_dim(double *, double *); extern double d_exp(double *); extern double d_imag(doublecomplex *); extern double d_int(double *); extern double d_lg10(double *); extern double d_log(double *); extern double d_mod(double *, double *); extern double d_nint(double *); extern double d_prod(float *, float *); extern double d_sign(double *, double *); extern double d_sin(double *); extern double d_sinh(double *); extern double d_sqrt(double *); extern double d_tan(double *); extern double d_tanh(double *); extern double derf_(double *); extern double derfc_(double *); extern integer do_fio(ftnint *, char *, ftnlen); extern integer do_lio(ftnint *, ftnint *, char *, ftnlen); extern integer do_uio(ftnint *, char *, ftnlen); extern integer e_rdfe(void); extern integer e_rdue(void); extern integer e_rsfe(void); extern integer e_rsfi(void); extern integer e_rsle(void); extern integer e_rsli(void); extern integer e_rsue(void); extern integer e_wdfe(void); extern integer e_wdue(void); extern integer e_wsfe(void); extern integer e_wsfi(void); extern integer e_wsle(void); extern integer e_wsli(void); extern integer e_wsue(void); extern int ef1asc_(ftnint *, ftnlen *, ftnint *, ftnlen *); extern integer _starpu_ef1cmc_(ftnint *, ftnlen *, ftnint *, ftnlen *); extern double erf(double); extern double erf_(float *); extern double erfc(double); extern double erfc_(float *); extern integer f_back(alist *); extern integer f_clos(cllist *); extern integer f_end(alist *); extern void f_exit(void); extern integer f_inqu(inlist *); extern integer f_open(olist *); extern integer f_rew(alist *); extern int flush_(void); extern void getarg_(integer *, char *, ftnlen); extern void getenv_(char *, char *, ftnlen, ftnlen); extern short h_abs(short *); extern short h_dim(short *, short *); extern short h_dnnt(double *); extern short h_indx(char *, char *, ftnlen, ftnlen); extern short h_len(char *, ftnlen); extern short h_mod(short *, short *); extern short h_nint(float *); extern short h_sign(short *, short *); extern short hl_ge(char *, char *, ftnlen, ftnlen); extern short hl_gt(char *, char *, ftnlen, ftnlen); extern short hl_le(char *, char *, ftnlen, ftnlen); extern short hl_lt(char *, char *, ftnlen, ftnlen); extern integer i_abs(integer *); extern integer i_dim(integer *, integer *); extern integer i_dnnt(double *); extern integer i_indx(char *, char *, ftnlen, ftnlen); extern integer i_len(char *, ftnlen); extern integer i_mod(integer *, integer *); extern integer i_nint(float *); extern integer i_sign(integer *, integer *); extern integer iargc_(void); extern ftnlen l_ge(char *, char *, ftnlen, ftnlen); extern ftnlen l_gt(char *, char *, ftnlen, ftnlen); extern ftnlen l_le(char *, char *, ftnlen, ftnlen); extern ftnlen l_lt(char *, char *, ftnlen, ftnlen); extern void pow_ci(complex *, complex *, integer *); extern double pow_dd(double *, double *); extern double pow_di(double *, integer *); extern short pow_hh(short *, shortint *); extern integer pow_ii(integer *, integer *); extern double pow_ri(float *, integer *); extern void pow_zi(doublecomplex *, doublecomplex *, integer *); extern void pow_zz(doublecomplex *, doublecomplex *, doublecomplex *); extern double r_abs(float *); extern double r_acos(float *); extern double r_asin(float *); extern double r_atan(float *); extern double r_atn2(float *, float *); extern void r_cnjg(complex *, complex *); extern double r_cos(float *); extern double r_cosh(float *); extern double r_dim(float *, float *); extern double r_exp(float *); extern double r_imag(complex *); extern double r_int(float *); extern double r_lg10(float *); extern double r_log(float *); extern double r_mod(float *, float *); extern double r_nint(float *); extern double r_sign(float *, float *); extern double r_sin(float *); extern double r_sinh(float *); extern double r_sqrt(float *); extern double r_tan(float *); extern double r_tanh(float *); extern void s_cat(char *, char **, integer *, integer *, ftnlen); extern integer s_cmp(char *, char *, ftnlen, ftnlen); extern void s_copy(char *, char *, ftnlen, ftnlen); extern int s_paus(char *, ftnlen); extern integer s_rdfe(cilist *); extern integer s_rdue(cilist *); extern integer s_rnge(char *, integer, char *, integer); extern integer s_rsfe(cilist *); extern integer s_rsfi(icilist *); extern integer s_rsle(cilist *); extern integer s_rsli(icilist *); extern integer s_rsne(cilist *); extern integer s_rsni(icilist *); extern integer s_rsue(cilist *); extern int s_stop(char *, ftnlen); extern integer s_wdfe(cilist *); extern integer s_wdue(cilist *); extern integer s_wsfe(cilist *); extern integer s_wsfi(icilist *); extern integer s_wsle(cilist *); extern integer s_wsli(icilist *); extern integer s_wsne(cilist *); extern integer s_wsni(icilist *); extern integer s_wsue(cilist *); extern void sig_die(char *, int); extern integer signal_(integer *, void (*)(int)); extern integer system_(char *, ftnlen); extern double z_abs(doublecomplex *); extern void z_cos(doublecomplex *, doublecomplex *); extern void z_div(doublecomplex *, doublecomplex *, doublecomplex *); extern void z_exp(doublecomplex *, doublecomplex *); extern void z_log(doublecomplex *, doublecomplex *); extern void z_sin(doublecomplex *, doublecomplex *); extern void z_sqrt(doublecomplex *, doublecomplex *); } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/f77_aloc.c000066400000000000000000000012541507764646700225350ustar00rootroot00000000000000#include "f2c.h" #undef abs #undef min #undef max #include "stdio.h" static integer memfailure = 3; #ifdef KR_headers extern char *malloc(); extern void exit_(); char * F77_aloc(Len, whence) integer Len; char *whence; #else #include "stdlib.h" #ifdef __cplusplus extern "C" { #endif #ifdef __cplusplus extern "C" { #endif extern void exit_(integer*); #ifdef __cplusplus } #endif char * F77_aloc(integer Len, const char *whence) #endif { char *rv; unsigned int uLen = (unsigned int) Len; /* for K&R C */ if (!(rv = (char*)malloc(uLen))) { fprintf(stderr, "malloc(%u) failure in %s\n", uLen, whence); exit_(&memfailure); } return rv; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/f77vers.c000066400000000000000000000115051507764646700224370ustar00rootroot00000000000000 char _libf77_version_f2c[] = "\n@(#) LIBF77 VERSION (f2c) 20051004\n"; /* 2.00 11 June 1980. File version.c added to library. 2.01 31 May 1988. s_paus() flushes stderr; names of hl_* fixed [ d]erf[c ] added 8 Aug. 1989: #ifdefs for f2c -i2 added to s_cat.c 29 Nov. 1989: s_cmp returns long (for f2c) 30 Nov. 1989: arg types from f2c.h 12 Dec. 1989: s_rnge allows long names 19 Dec. 1989: getenv_ allows unsorted environment 28 Mar. 1990: add exit(0) to end of main() 2 Oct. 1990: test signal(...) == SIG_IGN rather than & 01 in main 17 Oct. 1990: abort() calls changed to sig_die(...,1) 22 Oct. 1990: separate sig_die from main 25 Apr. 1991: minor, theoretically invisible tweaks to s_cat, sig_die 31 May 1991: make system_ return status 18 Dec. 1991: change long to ftnlen (for -i2) many places 28 Feb. 1992: repair z_sqrt.c (scribbled on input, gave wrong answer) 18 July 1992: for n < 0, repair handling of 0**n in pow_[dr]i.c and m**n in pow_hh.c and pow_ii.c; catch SIGTRAP in main() for error msg before abort 23 July 1992: switch to ANSI prototypes unless KR_headers is #defined 23 Oct. 1992: fix botch in signal_.c (erroneous deref of 2nd arg); change Cabs to f__cabs. 12 March 1993: various tweaks for C++ 2 June 1994: adjust so abnormal terminations invoke f_exit just once 16 Sept. 1994: s_cmp: treat characters as unsigned in comparisons. 19 Sept. 1994: s_paus: flush after end of PAUSE; add -DMSDOS 12 Jan. 1995: pow_[dhiqrz][hiq]: adjust x**i to work on machines that sign-extend right shifts when i is the most negative integer. 26 Jan. 1995: adjust s_cat.c, s_copy.c to permit the left-hand side of character assignments to appear on the right-hand side (unless compiled with -DNO_OVERWRITE). 27 Jan. 1995: minor tweak to s_copy.c: copy forward whenever possible (for better cache behavior). 30 May 1995: added subroutine exit(rc) integer rc. Version not changed. 29 Aug. 1995: add F77_aloc.c; use it in s_cat.c and system_.c. 6 Sept. 1995: fix return type of system_ under -DKR_headers. 19 Dec. 1995: s_cat.c: fix bug when 2nd or later arg overlaps lhs. 19 Mar. 1996: s_cat.c: supply missing break after overlap detection. 13 May 1996: add [lq]bitbits.c and [lq]bitshft.c (f90 bit intrinsics). 19 June 1996: add casts to unsigned in [lq]bitshft.c. 26 Feb. 1997: adjust functions with a complex output argument to permit aliasing it with input arguments. (For now, at least, this is just for possible benefit of g77.) 4 April 1997: [cz]_div.c: tweaks invisible on most systems (that may affect systems using gratuitous extra precision). 19 Sept. 1997: [de]time_.c (Unix systems only): change return type to double. 2 May 1999: getenv_.c: omit environ in favor of getenv(). c_cos.c, c_exp.c, c_sin.c, d_cnjg.c, r_cnjg.c, z_cos.c, z_exp.c, z_log.c, z_sin.c: cope fully with overlapping arguments caused by equivalence. 3 May 1999: "invisible" tweaks to omit compiler warnings in abort_.c, ef1asc_.c, s_rnge.c, s_stop.c. 7 Sept. 1999: [cz]_div.c: arrange for compilation under -DIEEE_COMPLEX_DIVIDE to make these routines avoid calling sig_die when the denominator vanishes; instead, they return pairs of NaNs or Infinities, depending whether the numerator also vanishes or not. VERSION not changed. 15 Nov. 1999: s_rnge.c: add casts for the case of sizeof(ftnint) == sizeof(int) < sizeof(long). 10 March 2000: z_log.c: improve accuracy of Real(log(z)) for, e.g., z near (+-1,eps) with |eps| small. For the old evaluation, compile with -DPre20000310 . 20 April 2000: s_cat.c: tweak argument types to accord with calls by f2c when ftnint and ftnlen are of different sizes (different numbers of bits). 4 July 2000: adjustments to permit compilation by C++ compilers; VERSION string remains unchanged. 29 Sept. 2000: dtime_.c, etime_.c: use floating-point divide. dtime_.d, erf_.c, erfc_.c, etime.c: for use with "f2c -R", compile with -DREAL=float. 23 June 2001: add uninit.c; [fi]77vers.c: make version strings visible as extern char _lib[fi]77_version_f2c[]. 5 July 2001: modify uninit.c for __mc68k__ under Linux. 16 Nov. 2001: uninit.c: Linux Power PC logic supplied by Alan Bain. 18 Jan. 2002: fix glitches in qbit_bits(): wrong return type, missing ~ on y in return value. 14 March 2002: z_log.c: add code to cope with buggy compilers (e.g., some versions of gcc under -O2 or -O3) that do floating-point comparisons against values computed into extended-precision registers on some systems (such as Intel IA32 systems). Compile with -DNO_DOUBLE_EXTENDED to omit the new logic. 4 Oct. 2002: uninit.c: on IRIX systems, omit use of shell variables. 10 Oct 2005: uninit.c: on IA32 Linux systems, leave the rounding precision alone rather than forcing it to 53 bits; compile with -DUNINIT_F2C_PRECISION_53 to get the former behavior. */ starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/fio.h000066400000000000000000000055731507764646700217260ustar00rootroot00000000000000#ifndef SYSDEP_H_INCLUDED #include "sysdep1.h" #endif #include "stdio.h" #include "errno.h" #ifndef NULL /* ANSI C */ #include "stddef.h" #endif #ifndef SEEK_SET #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 #endif #ifndef FOPEN #define FOPEN fopen #endif #ifndef FREOPEN #define FREOPEN freopen #endif #ifndef FSEEK #define FSEEK fseek #endif #ifndef FSTAT #define FSTAT fstat #endif #ifndef FTELL #define FTELL ftell #endif #ifndef OFF_T #define OFF_T long #endif #ifndef STAT_ST #define STAT_ST stat #endif #ifndef STAT #define STAT stat #endif #ifdef MSDOS #ifndef NON_UNIX_STDIO #define NON_UNIX_STDIO #endif #endif #ifdef UIOLEN_int typedef int uiolen; #else typedef long uiolen; #endif /*units*/ typedef struct { FILE *ufd; /*0=unconnected*/ char *ufnm; #ifndef MSDOS long uinode; int udev; #endif int url; /*0=sequential*/ flag useek; /*true=can backspace, use dir, ...*/ flag ufmt; flag urw; /* (1 for can read) | (2 for can write) */ flag ublnk; flag uend; flag uwrt; /*last io was write*/ flag uscrtch; } unit; #undef Void #ifdef KR_headers #define Void /*void*/ extern int (*f__getn)(); /* for formatted input */ extern void (*f__putn)(); /* for formatted output */ extern void x_putc(); extern long f__inode(); extern VOID sig_die(); extern int (*f__donewrec)(), t_putc(), x_wSL(); extern int c_sfe(), err__fl(), xrd_SL(), f__putbuf(); #else #define Void void #ifdef __cplusplus extern "C" { #endif extern int (*f__getn)(void); /* for formatted input */ extern void (*f__putn)(int); /* for formatted output */ extern void x_putc(int); extern long f__inode(char*,int*); extern void sig_die(const char*,int); extern void f__fatal(int, const char*); extern int t_runc(alist*); extern int f__nowreading(unit*), f__nowwriting(unit*); extern int fk_open(int,int,ftnint); extern int en_fio(void); extern void f_init(void); extern int (*f__donewrec)(void), t_putc(int), x_wSL(void); extern void b_char(const char*,char*,ftnlen), g_char(const char*,ftnlen,char*); extern int c_sfe(cilist*), z_rnew(void); extern int err__fl(int,int,const char*); extern int xrd_SL(void); extern int f__putbuf(int); #endif extern flag f__init; extern cilist *f__elist; /*active external io list*/ extern flag f__reading,f__external,f__sequential,f__formatted; extern int (*f__doend)(Void); extern FILE *f__cf; /*current file*/ extern unit *f__curunit; /*current unit*/ extern unit f__units[]; #define err(f,m,s) {if(f) errno= m; else f__fatal(m,s); return(m);} #define errfl(f,m,s) return err__fl((int)f,m,s) /*Table sizes*/ #define MXUNIT 100 extern int f__recpos; /*position in current record*/ extern OFF_T f__cursor; /* offset to move to */ extern OFF_T f__hiwater; /* so TL doesn't confuse us */ #ifdef __cplusplus } #endif #define WRITE 1 #define READ 2 #define SEQ 3 #define DIR 4 #define FMT 5 #define UNF 6 #define EXT 7 #define INT 8 #define buf_end(x) (x->_flag & _IONBF ? x->_ptr : x->_base + BUFSIZ) starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/fmt.c000066400000000000000000000205661507764646700217310ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "fmt.h" #ifdef __cplusplus extern "C" { #endif #define skip(s) while(*s==' ') s++ #ifdef interdata #define SYLMX 300 #endif #ifdef pdp11 #define SYLMX 300 #endif #ifdef vax #define SYLMX 300 #endif #ifndef SYLMX #define SYLMX 300 #endif #define GLITCH '\2' /* special quote character for stu */ extern flag f__cblank,f__cplus; /*blanks in I and compulsory plus*/ static struct syl f__syl[SYLMX]; int f__parenlvl,f__pc,f__revloc; #ifdef KR_headers #define Const /*nothing*/ #else #define Const const #endif static #ifdef KR_headers char *ap_end(s) char *s; #else const char *ap_end(const char *s) #endif { char quote; quote= *s++; for(;*s;s++) { if(*s!=quote) continue; if(*++s!=quote) return(s); } if(f__elist->cierr) { errno = 100; return(NULL); } f__fatal(100, "bad string"); /*NOTREACHED*/ return 0; } static int #ifdef KR_headers op_gen(a,b,c,d) #else op_gen(int a, int b, int c, int d) #endif { struct syl *p= &f__syl[f__pc]; if(f__pc>=SYLMX) { fprintf(stderr,"format too complicated:\n"); sig_die(f__fmtbuf, 1); } p->op=a; p->p1=b; p->p2.i[0]=c; p->p2.i[1]=d; return(f__pc++); } #ifdef KR_headers static char *f_list(); static char *gt_num(s,n,n1) char *s; int *n, n1; #else static const char *f_list(const char*); static const char *gt_num(const char *s, int *n, int n1) #endif { int m=0,f__cnt=0; char c; for(c= *s;;c = *s) { if(c==' ') { s++; continue; } if(c>'9' || c<'0') break; m=10*m+c-'0'; f__cnt++; s++; } if(f__cnt==0) { if (!n1) s = 0; *n=n1; } else *n=m; return(s); } static #ifdef KR_headers char *f_s(s,curloc) char *s; #else const char *f_s(const char *s, int curloc) #endif { skip(s); if(*s++!='(') { return(NULL); } if(f__parenlvl++ ==1) f__revloc=curloc; if(op_gen(RET1,curloc,0,0)<0 || (s=f_list(s))==NULL) { return(NULL); } skip(s); return(s); } static int #ifdef KR_headers ne_d(s,p) char *s,**p; #else ne_d(const char *s, const char **p) #endif { int n,x,sign=0; struct syl *sp; switch(*s) { default: return(0); case ':': (void) op_gen(COLON,0,0,0); break; case '$': (void) op_gen(NONL, 0, 0, 0); break; case 'B': case 'b': if(*++s=='z' || *s == 'Z') (void) op_gen(BZ,0,0,0); else (void) op_gen(BN,0,0,0); break; case 'S': case 's': if(*(s+1)=='s' || *(s+1) == 'S') { x=SS; s++; } else if(*(s+1)=='p' || *(s+1) == 'P') { x=SP; s++; } else x=S; (void) op_gen(x,0,0,0); break; case '/': (void) op_gen(SLASH,0,0,0); break; case '-': sign=1; case '+': s++; /*OUTRAGEOUS CODING TRICK*/ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (!(s=gt_num(s,&n,0))) { bad: *p = 0; return 1; } switch(*s) { default: return(0); case 'P': case 'p': if(sign) n= -n; (void) op_gen(P,n,0,0); break; case 'X': case 'x': (void) op_gen(X,n,0,0); break; case 'H': case 'h': sp = &f__syl[op_gen(H,n,0,0)]; sp->p2.s = (char*)s + 1; s+=n; break; } break; case GLITCH: case '"': case '\'': sp = &f__syl[op_gen(APOS,0,0,0)]; sp->p2.s = (char*)s; if((*p = ap_end(s)) == NULL) return(0); return(1); case 'T': case 't': if(*(s+1)=='l' || *(s+1) == 'L') { x=TL; s++; } else if(*(s+1)=='r'|| *(s+1) == 'R') { x=TR; s++; } else x=T; if (!(s=gt_num(s+1,&n,0))) goto bad; s--; (void) op_gen(x,n,0,0); break; case 'X': case 'x': (void) op_gen(X,1,0,0); break; case 'P': case 'p': (void) op_gen(P,1,0,0); break; } s++; *p=s; return(1); } static int #ifdef KR_headers e_d(s,p) char *s,**p; #else e_d(const char *s, const char **p) #endif { int i,im,n,w,d,e,found=0,x=0; Const char *sv=s; s=gt_num(s,&n,1); (void) op_gen(STACK,n,0,0); switch(*s++) { default: break; case 'E': case 'e': x=1; case 'G': case 'g': found=1; if (!(s=gt_num(s,&w,0))) { bad: *p = 0; return 1; } if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; if(*s!='E' && *s != 'e') (void) op_gen(x==1?E:G,w,d,0); /* default is Ew.dE2 */ else { if (!(s=gt_num(s+1,&e,0))) goto bad; (void) op_gen(x==1?EE:GE,w,d,e); } break; case 'O': case 'o': i = O; im = OM; goto finish_I; case 'Z': case 'z': i = Z; im = ZM; goto finish_I; case 'L': case 'l': found=1; if (!(s=gt_num(s,&w,0))) goto bad; if(w==0) break; (void) op_gen(L,w,0,0); break; case 'A': case 'a': found=1; skip(s); if(*s>='0' && *s<='9') { s=gt_num(s,&w,1); if(w==0) break; (void) op_gen(AW,w,0,0); break; } (void) op_gen(A,0,0,0); break; case 'F': case 'f': if (!(s=gt_num(s,&w,0))) goto bad; found=1; if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; (void) op_gen(F,w,d,0); break; case 'D': case 'd': found=1; if (!(s=gt_num(s,&w,0))) goto bad; if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; (void) op_gen(D,w,d,0); break; case 'I': case 'i': i = I; im = IM; finish_I: if (!(s=gt_num(s,&w,0))) goto bad; found=1; if(w==0) break; if(*s!='.') { (void) op_gen(i,w,0,0); break; } if (!(s=gt_num(s+1,&d,0))) goto bad; (void) op_gen(im,w,d,0); break; } if(found==0) { f__pc--; /*unSTACK*/ *p=sv; return(0); } *p=s; return(1); } static #ifdef KR_headers char *i_tem(s) char *s; #else const char *i_tem(const char *s) #endif { const char *t; int n,curloc; if(*s==')') return(s); if(ne_d(s,&t)) return(t); if(e_d(s,&t)) return(t); s=gt_num(s,&n,1); if((curloc=op_gen(STACK,n,0,0))<0) return(NULL); return(f_s(s,curloc)); } static #ifdef KR_headers char *f_list(s) char *s; #else const char *f_list(const char *s) #endif { for(;*s!=0;) { skip(s); if((s=i_tem(s))==NULL) return(NULL); skip(s); if(*s==',') s++; else if(*s==')') { if(--f__parenlvl==0) { (void) op_gen(REVERT,f__revloc,0,0); return(++s); } (void) op_gen(GOTO,0,0,0); return(++s); } } return(NULL); } int #ifdef KR_headers pars_f(s) char *s; #else pars_f(const char *s) #endif { f__parenlvl=f__revloc=f__pc=0; if(f_s(s,0) == NULL) { return(-1); } return(0); } #define STKSZ 10 int f__cnt[STKSZ],f__ret[STKSZ],f__cp,f__rp; flag f__workdone, f__nonl; static int #ifdef KR_headers type_f(n) #else type_f(int n) #endif { switch(n) { default: return(n); case RET1: return(RET1); case REVERT: return(REVERT); case GOTO: return(GOTO); case STACK: return(STACK); case X: case SLASH: case APOS: case H: case T: case TL: case TR: return(NED); case F: case I: case IM: case A: case AW: case O: case OM: case L: case E: case EE: case D: case G: case GE: case Z: case ZM: return(ED); } } #ifdef KR_headers integer do_fio(number,ptr,len) ftnint *number; ftnlen len; char *ptr; #else integer do_fio(ftnint *number, char *ptr, ftnlen len) #endif { struct syl *p; int n,i; for(i=0;i<*number;i++,ptr+=len) { loop: switch(type_f((p= &f__syl[f__pc])->op)) { default: fprintf(stderr,"unknown code in do_fio: %d\n%s\n", p->op,f__fmtbuf); err(f__elist->cierr,100,"do_fio"); case NED: if((*f__doned)(p)) { f__pc++; goto loop; } f__pc++; continue; case ED: if(f__cnt[f__cp]<=0) { f__cp--; f__pc++; goto loop; } if(ptr==NULL) return((*f__doend)()); f__cnt[f__cp]--; f__workdone=1; if((n=(*f__doed)(p,ptr,len))>0) errfl(f__elist->cierr,errno,"fmt"); if(n<0) err(f__elist->ciend,(EOF),"fmt"); continue; case STACK: f__cnt[++f__cp]=p->p1; f__pc++; goto loop; case RET1: f__ret[++f__rp]=p->p1; f__pc++; goto loop; case GOTO: if(--f__cnt[f__cp]<=0) { f__cp--; f__rp--; f__pc++; goto loop; } f__pc=1+f__ret[f__rp--]; goto loop; case REVERT: f__rp=f__cp=0; f__pc = p->p1; if(ptr==NULL) return((*f__doend)()); if(!f__workdone) return(0); if((n=(*f__dorevert)()) != 0) return(n); goto loop; case COLON: if(ptr==NULL) return((*f__doend)()); f__pc++; goto loop; case NONL: f__nonl = 1; f__pc++; goto loop; case S: case SS: f__cplus=0; f__pc++; goto loop; case SP: f__cplus = 1; f__pc++; goto loop; case P: f__scale=p->p1; f__pc++; goto loop; case BN: f__cblank=0; f__pc++; goto loop; case BZ: f__cblank=1; f__pc++; goto loop; } } return(0); } int en_fio(Void) { ftnint one=1; return(do_fio(&one,(char *)NULL,(ftnint)0)); } VOID fmt_bg(Void) { f__workdone=f__cp=f__rp=f__pc=f__cursor=0; f__cnt[0]=f__ret[0]=0; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/fmt.h000066400000000000000000000037261507764646700217350ustar00rootroot00000000000000struct syl { int op; int p1; union { int i[2]; char *s;} p2; }; #define RET1 1 #define REVERT 2 #define GOTO 3 #define X 4 #define SLASH 5 #define STACK 6 #define I 7 #define ED 8 #define NED 9 #define IM 10 #define APOS 11 #define H 12 #define TL 13 #define TR 14 #define T 15 #define COLON 16 #define S 17 #define SP 18 #define SS 19 #define P 20 #define BN 21 #define BZ 22 #define F 23 #define E 24 #define EE 25 #define D 26 #define G 27 #define GE 28 #define L 29 #define A 30 #define AW 31 #define O 32 #define NONL 33 #define OM 34 #define Z 35 #define ZM 36 typedef union { real pf; doublereal pd; } ufloat; typedef union { short is; #ifndef KR_headers signed #endif char ic; integer il; #ifdef Allow_TYQUAD longint ili; #endif } Uint; #ifdef KR_headers extern int (*f__doed)(),(*f__doned)(); extern int (*f__dorevert)(); extern int rd_ed(),rd_ned(); extern int w_ed(),w_ned(); extern int signbit_f2c(); extern char *f__fmtbuf; #else #ifdef __cplusplus extern "C" { #define Cextern extern "C" #else #define Cextern extern #endif extern const char *f__fmtbuf; extern int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); extern int (*f__dorevert)(void); extern void fmt_bg(void); extern int pars_f(const char*); extern int rd_ed(struct syl*, char*, ftnlen),rd_ned(struct syl*); extern int signbit_f2c(double*); extern int w_ed(struct syl*, char*, ftnlen),w_ned(struct syl*); extern int wrt_E(ufloat*, int, int, int, ftnlen); extern int wrt_F(ufloat*, int, int, ftnlen); extern int wrt_L(Uint*, int, ftnlen); #endif extern int f__pc,f__parenlvl,f__revloc; extern flag f__cblank,f__cplus,f__workdone, f__nonl; extern int f__scale; #ifdef __cplusplus } #endif #define GET(x) if((x=(*f__getn)())<0) return(x) #define VAL(x) (x!='\n'?x:' ') #define PUT(x) (*f__putn)(x) #undef TYQUAD #ifndef Allow_TYQUAD #undef longint #define longint long #else #define TYQUAD 14 #endif #ifdef KR_headers extern char *f__icvt(); #else Cextern char *f__icvt(longint, int*, int*, int); #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/fmtlib.c000066400000000000000000000015411507764646700224100ustar00rootroot00000000000000/* @(#)fmtlib.c 1.2 */ #define MAXINTLENGTH 23 #include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifndef Allow_TYQUAD #undef longint #define longint long #undef ulongint #define ulongint unsigned long #endif #ifdef KR_headers char *f__icvt(value,ndigit,sign, base) longint value; int *ndigit,*sign; register int base; #else char *f__icvt(longint value, int *ndigit, int *sign, int base) #endif { static char buf[MAXINTLENGTH+1]; register int i; ulongint uvalue; if(value > 0) { uvalue = value; *sign = 0; } else if (value < 0) { uvalue = -value; *sign = 1; } else { *sign = 0; *ndigit = 1; buf[MAXINTLENGTH-1] = '0'; return &buf[MAXINTLENGTH-1]; } i = MAXINTLENGTH; do { buf[--i] = (uvalue%base) + '0'; uvalue /= base; } while(uvalue > 0); *ndigit = MAXINTLENGTH - i; return &buf[i]; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/fp.h000066400000000000000000000012311507764646700215410ustar00rootroot00000000000000#define FMAX 40 #define EXPMAXDIGS 8 #define EXPMAX 99999999 /* FMAX = max number of nonzero digits passed to atof() */ /* EXPMAX = 10^EXPMAXDIGS - 1 = largest allowed exponent absolute value */ #ifdef V10 /* Research Tenth-Edition Unix */ #include "local.h" #endif /* MAXFRACDIGS and MAXINTDIGS are for wrt_F -- bounds (not necessarily tight) on the maximum number of digits to the right and left of * the decimal point. */ #ifdef VAX #define MAXFRACDIGS 56 #define MAXINTDIGS 38 #else #ifdef CRAY #define MAXFRACDIGS 9880 #define MAXINTDIGS 9864 #else /* values that suffice for IEEE double */ #define MAXFRACDIGS 344 #define MAXINTDIGS 308 #endif #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/ftell64_.c000066400000000000000000000016251507764646700225550ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #ifdef __cplusplus extern "C" { #endif static FILE * #ifdef KR_headers unit_chk(Unit, who) integer Unit; char *who; #else unit_chk(integer Unit, char *who) #endif { if (Unit >= MXUNIT || Unit < 0) f__fatal(101, who); return f__units[Unit].ufd; } longint #ifdef KR_headers ftell64_(Unit) integer *Unit; #else ftell64_(integer *Unit) #endif { FILE *f; return (f = unit_chk(*Unit, "ftell")) ? FTELL(f) : -1L; } int #ifdef KR_headers fseek64_(Unit, offset, whence) integer *Unit, *whence; longint *offset; #else fseek64_(integer *Unit, longint *offset, integer *whence) #endif { FILE *f; int w = (int)*whence; #ifdef SEEK_SET static int wohin[3] = { SEEK_SET, SEEK_CUR, SEEK_END }; #endif if (w < 0 || w > 2) w = 0; #ifdef SEEK_SET w = wohin[w]; #endif return !(f = unit_chk(*Unit, "fseek")) || FSEEK(f, (OFF_T)*offset, w) ? 1 : 0; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/ftell_.c000066400000000000000000000016041507764646700224000ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #ifdef __cplusplus extern "C" { #endif static FILE * #ifdef KR_headers unit_chk(Unit, who) integer Unit; char *who; #else unit_chk(integer Unit, const char *who) #endif { if (Unit >= MXUNIT || Unit < 0) f__fatal(101, who); return f__units[Unit].ufd; } integer #ifdef KR_headers ftell_(Unit) integer *Unit; #else ftell_(integer *Unit) #endif { FILE *f; return (f = unit_chk(*Unit, "ftell")) ? ftell(f) : -1L; } int #ifdef KR_headers fseek_(Unit, offset, whence) integer *Unit, *offset, *whence; #else fseek_(integer *Unit, integer *offset, integer *whence) #endif { FILE *f; int w = (int)*whence; #ifdef SEEK_SET static int wohin[3] = { SEEK_SET, SEEK_CUR, SEEK_END }; #endif if (w < 0 || w > 2) w = 0; #ifdef SEEK_SET w = wohin[w]; #endif return !(f = unit_chk(*Unit, "fseek")) || fseek(f, *offset, w) ? 1 : 0; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/getarg_.c000066400000000000000000000011201507764646700225340ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* * subroutine getarg(k, c) * returns the kth unix command argument in fortran character * variable argument c */ #ifdef KR_headers VOID getarg_(n, s, ls) ftnint *n; char *s; ftnlen ls; #define Const /*nothing*/ #else #define Const const void getarg_(ftnint *n, char *s, ftnlen ls) #endif { extern int xargc; extern char **xargv; Const char *t; int i; if(*n>=0 && *n #include #ifdef __cplusplus extern "C" { #endif extern char *F77_aloc(ftnlen, const char*); #endif /* * getenv - f77 subroutine to return environment variables * * called by: * call getenv (ENV_NAME, char_var) * where: * ENV_NAME is the name of an environment variable * char_var is a character variable which will receive * the current value of ENV_NAME, or all blanks * if ENV_NAME is not defined */ #ifdef KR_headers VOID getenv_(fname, value, flen, vlen) char *value, *fname; ftnlen vlen, flen; #else void getenv_(char *fname, char *value, ftnlen flen, ftnlen vlen) #endif { char buf[256], *ep, *fp; integer i; if (flen <= 0) goto add_blanks; for(i = 0; i < sizeof(buf); i++) { if (i == flen || (buf[i] = fname[i]) == ' ') { buf[i] = 0; ep = getenv(buf); goto have_ep; } } while(i < flen && fname[i] != ' ') i++; strncpy(fp = F77_aloc(i+1, "getenv_"), fname, (int)i); fp[i] = 0; ep = getenv(fp); free(fp); have_ep: if (ep) while(*ep && vlen-- > 0) *value++ = *ep++; add_blanks: while(vlen-- > 0) *value++ = ' '; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/h_abs.c000066400000000000000000000003321507764646700222040ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers shortint h_abs(x) shortint *x; #else shortint h_abs(shortint *x) #endif { if(*x >= 0) return(*x); return(- *x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/h_dim.c000066400000000000000000000003461507764646700222150ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers shortint h_dim(a,b) shortint *a, *b; #else shortint h_dim(shortint *a, shortint *b) #endif { return( *a > *b ? *a - *b : 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/h_dnnt.c000066400000000000000000000004461507764646700224100ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double floor(); shortint h_dnnt(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif shortint h_dnnt(doublereal *x) #endif { return (shortint)(*x >= 0. ? floor(*x + .5) : -floor(.5 - *x)); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/h_indx.c000066400000000000000000000006721507764646700224100ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers shortint h_indx(a, b, la, lb) char *a, *b; ftnlen la, lb; #else shortint h_indx(char *a, char *b, ftnlen la, ftnlen lb) #endif { ftnlen i, n; char *s, *t, *bend; n = la - lb + 1; bend = b + lb; for(i = 0 ; i < n ; ++i) { s = a + i; t = b; while(t < bend) if(*s++ != *t++) goto no; return((shortint)i+1); no: ; } return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/h_len.c000066400000000000000000000003151507764646700222160ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers shortint h_len(s, n) char *s; ftnlen n; #else shortint h_len(char *s, ftnlen n) #endif { return(n); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/h_mod.c000066400000000000000000000003171507764646700222210ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers shortint h_mod(a,b) short *a, *b; #else shortint h_mod(short *a, short *b) #endif { return( *a % *b); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/h_nint.c000066400000000000000000000004311507764646700224070ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double floor(); shortint h_nint(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif shortint h_nint(real *x) #endif { return (shortint)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/h_sign.c000066400000000000000000000004121507764646700223760ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers shortint h_sign(a,b) shortint *a, *b; #else shortint h_sign(shortint *a, shortint *b) #endif { shortint x; x = (*a >= 0 ? *a : - *a); return( *b >= 0 ? x : -x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/hl_ge.c000066400000000000000000000005321507764646700222100ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern integer s_cmp(); shortlogical hl_ge(a,b,la,lb) char *a, *b; ftnlen la, lb; #else extern integer s_cmp(char *, char *, ftnlen, ftnlen); shortlogical hl_ge(char *a, char *b, ftnlen la, ftnlen lb) #endif { return(s_cmp(a,b,la,lb) >= 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/hl_gt.c000066400000000000000000000005311507764646700222260ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern integer s_cmp(); shortlogical hl_gt(a,b,la,lb) char *a, *b; ftnlen la, lb; #else extern integer s_cmp(char *, char *, ftnlen, ftnlen); shortlogical hl_gt(char *a, char *b, ftnlen la, ftnlen lb) #endif { return(s_cmp(a,b,la,lb) > 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/hl_le.c000066400000000000000000000005321507764646700222150ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern integer s_cmp(); shortlogical hl_le(a,b,la,lb) char *a, *b; ftnlen la, lb; #else extern integer s_cmp(char *, char *, ftnlen, ftnlen); shortlogical hl_le(char *a, char *b, ftnlen la, ftnlen lb) #endif { return(s_cmp(a,b,la,lb) <= 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/hl_lt.c000066400000000000000000000005311507764646700222330ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern integer s_cmp(); shortlogical hl_lt(a,b,la,lb) char *a, *b; ftnlen la, lb; #else extern integer s_cmp(char *, char *, ftnlen, ftnlen); shortlogical hl_lt(char *a, char *b, ftnlen la, ftnlen lb) #endif { return(s_cmp(a,b,la,lb) < 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i77vers.c000066400000000000000000000433201507764646700224420ustar00rootroot00000000000000 char _libi77_version_f2c[] = "\n@(#) LIBI77 VERSION (f2c) pjw,dmg-mods 20030321\n"; /* 2.01 $ format added 2.02 Coding bug in open.c repaired 2.03 fixed bugs in lread.c (read * with negative f-format) and lio.c and lio.h (e-format conforming to spec) 2.04 changed open.c and err.c (fopen and freopen respectively) to update to new c-library (append mode) 2.05 added namelist capability 2.06 allow internal list and namelist I/O */ /* close.c: allow upper-case STATUS= values endfile.c create fort.nnn if unit nnn not open; else if (file length == 0) use creat() rather than copy; use local copy() rather than forking /bin/cp; rewind, fseek to clear buffer (for no reading past EOF) err.c use neither setbuf nor setvbuf; make stderr buffered fio.h #define _bufend inquire.c upper case responses; omit byfile test from SEQUENTIAL= answer "YES" to DIRECT= for unopened file (open to debate) lio.c flush stderr, stdout at end of each stmt space before character strings in list output only at line start lio.h adjust LEW, LED consistent with old libI77 lread.c use atof() allow "nnn*," when reading complex constants open.c try opening for writing when open for read fails, with special uwrt value (2) delaying creat() to first write; set curunit so error messages don't drop core; no file name ==> fort.nnn except for STATUS='SCRATCH' rdfmt.c use atof(); trust EOF == end-of-file (so don't read past end-of-file after endfile stmt) sfe.c flush stderr, stdout at end of each stmt wrtfmt.c: use upper case put wrt_E and wrt_F into wref.c, use sprintf() rather than ecvt() and fcvt() [more accurate on VAX] */ /* 16 Oct. 1988: uwrt = 3 after write, rewind, so close won't zap the file. */ /* 10 July 1989: change _bufend to buf_end in fio.h, wsfe.c, wrtfmt.c */ /* 28 Nov. 1989: corrections for IEEE and Cray arithmetic */ /* 29 Nov. 1989: change various int return types to long for f2c */ /* 30 Nov. 1989: various types from f2c.h */ /* 6 Dec. 1989: types corrected various places */ /* 19 Dec. 1989: make iostat= work right for internal I/O */ /* 8 Jan. 1990: add rsne, wsne -- routines for handling NAMELIST */ /* 28 Jan. 1990: have NAMELIST read treat $ as &, general white space as blank */ /* 27 Mar. 1990: change an = to == in rd_L(rdfmt.c) so formatted reads of logical values reject letters other than fFtT; have nowwriting reset cf */ /* 14 Aug. 1990: adjust lread.c to treat tabs as spaces in list input */ /* 17 Aug. 1990: adjust open.c to recognize blank='Z...' as well as blank='z...' when reopening an open file */ /* 30 Aug. 1990: prevent embedded blanks in list output of complex values; omit exponent field in list output of values of magnitude between 10 and 1e8; prevent writing stdin and reading stdout or stderr; don't close stdin, stdout, or stderr when reopening units 5, 6, 0. */ /* 18 Sep. 1990: add component udev to unit and consider old == new file iff uinode and udev values agree; use stat rather than access to check existence of file (when STATUS='OLD')*/ /* 2 Oct. 1990: adjust rewind.c so two successive rewinds after a write don't clobber the file. */ /* 9 Oct. 1990: add #include "fcntl.h" to endfile.c, err.c, open.c; adjust g_char in util.c for segmented memories. */ /* 17 Oct. 1990: replace abort() and _cleanup() with calls on sig_die(...,1) (defined in main.c). */ /* 5 Nov. 1990: changes to open.c: complain if new= is specified and the file already exists; allow file= to be omitted in open stmts and allow status='replace' (Fortran 90 extensions). */ /* 11 Dec. 1990: adjustments for POSIX. */ /* 15 Jan. 1991: tweak i_ungetc in rsli.c to allow reading from strings in read-only memory. */ /* 25 Apr. 1991: adjust namelist stuff to work with f2c -i2 */ /* 26 Apr. 1991: fix some bugs with NAMELIST read of multi-dim. arrays */ /* 16 May 1991: increase LEFBL in lio.h to bypass NeXT bug */ /* 17 Oct. 1991: change type of length field in sequential unformatted records from int to long (for systems where sizeof(int) can vary, depending on the compiler or compiler options). */ /* 14 Nov. 1991: change uint to Uint in fmt.h, rdfmt.c, wrtfmt.c. */ /* 25 Nov. 1991: change uint to Uint in lwrite.c; change sizeof(int) to sizeof(uioint) in fseeks in sue.c (missed on 17 Oct.). */ /* 1 Dec. 1991: uio.c: add test for read failure (seq. unformatted reads); adjust an error return from EOF to off end of record */ /* 12 Dec. 1991: rsli.c: fix bug with internal list input that caused the last character of each record to be ignored. iio.c: adjust error message in internal formatted input from "end-of-file" to "off end of record" if the format specifies more characters than the record contains. */ /* 17 Jan. 1992: lread.c, rsne.c: in list and namelist input, treat "r* ," and "r*," alike (where r is a positive integer constant), and fix a bug in handling null values following items with repeat counts (e.g., 2*1,,3); for namelist reading of a numeric array, allow a new name-value subsequence to terminate the current one (as though the current one ended with the right number of null values). lio.h, lwrite.c: omit insignificant zeros in list and namelist output. To get the old behavior, compile with -DOld_list_output . */ /* 18 Jan. 1992: make list output consistent with F format by printing .1 rather than 0.1 (introduced yesterday). */ /* 3 Feb. 1992: rsne.c: fix namelist read bug that caused the character following a comma to be ignored. */ /* 19 May 1992: adjust iio.c, ilnw.c, rdfmt.c and rsli.c to make err= work with internal list and formatted I/O. */ /* 18 July 1992: adjust rsne.c to allow namelist input to stop at an & (e.g. &end). */ /* 23 July 1992: switch to ANSI prototypes unless KR_headers is #defined ; recognize Z format (assuming 8-bit bytes). */ /* 14 Aug. 1992: tweak wrt_E in wref.c to avoid -NaN */ /* 23 Oct. 1992: Supply missing l_eof = 0 assignment to s_rsne() in rsne.c (so end-of-file on other files won't confuse namelist reads of external files). Prepend f__ to external names that are only of internal interest to lib[FI]77. */ /* 1 Feb. 1993: backspace.c: fix bug that bit when last char of 2nd buffer == '\n'. endfile.c: guard against tiny L_tmpnam; close and reopen files in t_runc(). lio.h: lengthen LINTW (buffer size in lwrite.c). err.c, open.c: more prepending of f__ (to [rw]_mode). */ /* 5 Feb. 1993: tweaks to NAMELIST: rsne.c: ? prints the namelist being sought; namelists of the wrong name are skipped (after an error message; xwsne.c: namelist writes have a newline before each new variable. open.c: ACCESS='APPEND' positions sequential files at EOF (nonstandard extension -- that doesn't require changing data structures). */ /* 9 Feb. 1993: Change some #ifdef MSDOS lines to #ifdef NON_UNIX_STDIO. err.c: under NON_UNIX_STDIO, avoid close(creat(name,0666)) when the unit has another file descriptor for name. */ /* 4 March 1993: err.c, open.c: take declaration of fdopen from rawio.h; open.c: always give f__w_mode[] 4 elements for use in t_runc (in endfile.c -- for change of 1 Feb. 1993). */ /* 6 March 1993: uio.c: adjust off-end-of-record test for sequential unformatted reads to respond to err= rather than end=. */ /* 12 March 1993: various tweaks for C++ */ /* 6 April 1993: adjust error returns for formatted inputs to flush the current input line when err=label is specified. To restore the old behavior (input left mid-line), either adjust the #definition of errfl in fio.h or omit the invocation of f__doend in err__fl (in err.c). */ /* 23 June 1993: iio.c: fix bug in format reversions for internal writes. */ /* 5 Aug. 1993: lread.c: fix bug in handling repetition counts for logical data (during list or namelist input). Change struct f__syl to struct syl (for buggy compilers). */ /* 7 Aug. 1993: lread.c: fix bug in namelist reading of incomplete logical arrays. */ /* 9 Aug. 1993: lread.c: fix bug in namelist reading of an incomplete array of numeric data followed by another namelist item whose name starts with 'd', 'D', 'e', or 'E'. */ /* 8 Sept. 1993: open.c: protect #include "sys/..." with #ifndef NON_UNIX_STDIO; Version date not changed. */ /* 10 Nov. 1993: backspace.c: add nonsense for #ifdef MSDOS */ /* 8 Dec. 1993: iio.c: adjust internal formatted reads to treat short records as though padded with blanks (rather than causing an "off end of record" error). */ /* 22 Feb. 1994: lread.c: check that realloc did not return NULL. */ /* 6 June 1994: Under NON_UNIX_STDIO, use binary mode for direct formatted files (avoiding any confusion regarding \n). */ /* 5 July 1994: Fix bug (introduced 6 June 1994?) in reopening files under NON_UNIX_STDIO. */ /* 6 July 1994: wref.c: protect with #ifdef GOOD_SPRINTF_EXPONENT an optimization that requires exponents to have 2 digits when 2 digits suffice. lwrite.c wsfe.c (list and formatted external output): omit ' ' carriage-control when compiled with -DOMIT_BLANK_CC . Off-by-one bug fixed in character count for list output of character strings. Omit '.' in list-directed printing of Nan, Infinity. */ /* 12 July 1994: wrtfmt.c: under G11.4, write 0. as " .0000 " rather than " .0000E+00". */ /* 3 Aug. 1994: lwrite.c: do not insert a newline when appending an oversize item to an empty line. */ /* 12 Aug. 1994: rsli.c rsne.c: fix glitch (reset nml_read) that kept ERR= (in list- or format-directed input) from working after a NAMELIST READ. */ /* 7 Sept. 1994: typesize.c: adjust to allow types LOGICAL*1, LOGICAL*2, INTEGER*1, and (under -DAllow_TYQUAD) INTEGER*8 in NAMELISTs. */ /* 6 Oct. 1994: util.c: omit f__mvgbt, as it is never used. */ /* 2 Nov. 1994: add #ifdef ALWAYS_FLUSH logic. */ /* 26 Jan. 1995: wref.c: fix glitch in printing the exponent of 0 when GOOD_SPRINTF_EXPONENT is not #defined. */ /* 24 Feb. 1995: iio.c: z_getc: insert (unsigned char *) to allow internal reading of characters with high-bit set (on machines that sign-extend characters). */ /* 14 March 1995:lread.c and rsfe.c: adjust s_rsle and s_rsfe to check for end-of-file (to prevent infinite loops with empty read statements). */ /* 26 May 1995: iio.c: z_wnew: fix bug in handling T format items in internal writes whose last item is written to an earlier position than some previous item. */ /* 29 Aug. 1995: backspace.c: adjust MSDOS logic. */ /* 6 Sept. 1995: Adjust namelist input to treat a subscripted name whose subscripts do not involve colons similarly to the name without a subscript: accept several values, stored in successive elements starting at the indicated subscript. Adjust namelist output to quote character strings (avoiding confusion with arrays of character strings). Adjust f_init calls for people who don't use libF77's main(); now open and namelist read statements invoke f_init if needed. */ /* 7 Sept. 1995: Fix some bugs with -DAllow_TYQUAD (for integer*8). Add -DNo_Namelist_Comments lines to rsne.c. */ /* 5 Oct. 1995: wrtfmt.c: fix bug with t editing (f__cursor was not always zeroed in mv_cur). */ /* 11 Oct. 1995: move defs of f__hiwater, f__svic, f__icptr from wrtfmt.c to err.c */ /* 15 Mar. 1996: lread.c, rsfe.c: honor END= in READ stmt with empty iolist */ /* 13 May 1996: add ftell_.c and fseek_.c */ /* 9 June 1996: Adjust rsli.c and lread.c so internal list input with too few items in the input string will honor end= . */ /* 12 Sept. 1995:fmtlib.c: fix glitch in printing the most negative integer. */ /* 25 Sept. 1995:fmt.h: for formatted writes of negative integer*1 values, make ic signed on ANSI systems. If formatted writes of integer*1 values trouble you when using a K&R C compiler, switch to an ANSI compiler or use a compiler flag that makes characters signed. */ /* 9 Dec. 1996: d[fu]e.c, err.c: complain about non-positive rec= in direct read and write statements. ftell_.c: change param "unit" to "Unit" for -DKR_headers. */ /* 26 Feb. 1997: ftell_.c: on systems that define SEEK_SET, etc., use SEEK_SET, SEEK_CUR, SEEK_END for *whence = 0, 1, 2. */ /* 7 Apr. 1997: fmt.c: adjust to complain at missing numbers in formats (but still treat missing ".nnn" as ".0"). */ /* 11 Apr. 1997: err.c: attempt to make stderr line buffered rather than fully buffered. (Buffering is needed for format items T and TR.) */ /* 27 May 1997: ftell_.c: fix typo (that caused the third argument to be treated as 2 on some systems). */ /* 5 Aug. 1997: lread.c: adjust to accord with a change to the Fortran 8X draft (in 1990 or 1991) that rescinded permission to elide quote marks in namelist input of character data; compile with -DF8X_NML_ELIDE_QUOTES to get the old behavior. wrtfmt.o: wrt_G: tweak to print the right number of 0's for zero under G format. */ /* 16 Aug. 1997: iio.c: fix bug in internal writes to an array of character strings that sometimes caused one more array element than required by the format to be blank-filled. Example: format(1x). */ /* 16 Sept. 1997:fmt.[ch] rdfmt.c wrtfmt.c: tweak struct syl for machines with 64-bit pointers and 32-bit ints that did not 64-bit align struct syl (e.g., Linux on the DEC Alpha). */ /* 19 Jan. 1998: backspace.c: for b->ufmt==0, change sizeof(int) to sizeof(uiolen). On machines where this would make a difference, it is best for portability to compile libI77 with -DUIOLEN_int (which will render the change invisible). */ /* 4 March 1998: open.c: fix glitch in comparing file names under -DNON_UNIX_STDIO */ /* 17 March 1998: endfile.c, open.c: acquire temporary files from tmpfile(), unless compiled with -DNON_ANSI_STDIO, which uses mktemp(). New buffering scheme independent of NON_UNIX_STDIO for handling T format items. Now -DNON_UNIX_STDIO is no longer be necessary for Linux, and libf2c no longer causes stderr to be buffered -- the former setbuf or setvbuf call for stderr was to make T format items work. open.c: use the Posix access() function to check existence or nonexistence of files, except under -DNON_POSIX_STDIO, where trial fopen calls are used. */ /* 5 April 1998: wsfe.c: make $ format item work: this was lost in the changes of 17 March 1998. */ /* 28 May 1998: backspace.c dfe.c due.c iio.c lread.c rsfe.c sue.c wsfe.c: set f__curunit sooner so various error messages will correctly identify the I/O unit involved. */ /* 17 June 1998: lread.c: unless compiled with ALLOW_FLOAT_IN_INTEGER_LIST_INPUT #defined, treat floating-point numbers (containing either a decimal point or an exponent field) as errors when they appear as list input for integer data. */ /* 7 Sept. 1998: move e_wdfe from sfe.c to dfe.c, where it was originally. Why did it ever move to sfe.c? */ /* 2 May 1999: open.c: set f__external (to get "external" versus "internal" right in the error message if we cannot open the file). err.c: cast a pointer difference to (int) for %d. rdfmt.c: omit fixed-length buffer that could be overwritten by formats Inn or Lnn with nn > 83. */ /* 3 May 1999: open.c: insert two casts for machines with 64-bit longs. */ /* 18 June 1999: backspace.c: allow for b->ufd changing in t_runc */ /* 27 June 1999: rsne.c: fix bug in namelist input: a misplaced increment */ /* could cause wrong array elements to be assigned; e.g., */ /* "&input k(5)=10*1 &end" assigned k(5) and k(15..23) */ /* 15 Nov. 1999: endfile.c: set state to writing (b->uwrt = 1) when an */ /* endfile statement requires copying the file. */ /* (Otherwise an immediately following rewind statement */ /* could make the file appear empty.) Also, supply a */ /* missing (long) cast in the sprintf call. */ /* sfe.c: add #ifdef ALWAYS_FLUSH logic, for formatted I/O: */ /* Compiling libf2c with -DALWAYS_FLUSH should prevent losing */ /* any data in buffers should the program fault. It also */ /* makes the program run more slowly. */ /* 20 April 2000: rsne.c, xwsne.c: tweaks that only matter if ftnint and */ /* ftnlen are of different fundamental types (different numbers */ /* of bits). Since these files will not compile when this */ /* change matters, the above VERSION string remains unchanged. */ /* 4 July 2000: adjustments to permit compilation by C++ compilers; */ /* VERSION string remains unchanged. */ /* 5 Dec. 2000: lread.c: under namelist input, when reading a logical array, */ /* treat Tstuff= and Fstuff= as new assignments rather than as */ /* logical constants. */ /* 22 Feb. 2001: endfile.c: adjust to use truncate() unless compiled with */ /* -DNO_TRUNCATE (or with -DMSDOS). */ /* 1 March 2001: endfile.c: switch to ftruncate (absent -DNO_TRUNCATE), */ /* thus permitting truncation of scratch files on true Unix */ /* systems, where scratch files have no name. Add an fflush() */ /* (surprisingly) needed on some Linux systems. */ /* 11 Oct. 2001: backspac.c dfe.c due.c endfile.c err.c fio.h fmt.c fmt.h */ /* inquire.c open.c rdfmt.c sue.c util.c: change fseek and */ /* ftell to FSEEK and FTELL (#defined to be fseek and ftell, */ /* respectively, in fio.h unless otherwise #defined), and use */ /* type OFF_T (#defined to be long unless otherwise #defined) */ /* to permit handling files over 2GB long where possible, */ /* with suitable -D options, provided for some systems in new */ /* header file sysdep1.h (copied from sysdep1.h0 by default). */ /* 15 Nov. 2001: endfile.c: add FSEEK after FTRUNCATE. */ /* 28 Nov. 2001: fmt.h lwrite.c wref.c and (new) signbit.c: on IEEE systems, */ /* print -0 as -0 when compiled with -DSIGNED_ZEROS. See */ /* comments in makefile or (better) libf2c/makefile.* . */ /* 6 Sept. 2002: rsne.c: fix bug with multiple repeat counts in reading */ /* namelists, e.g., &nl a(2) = 3*1.0, 2*2.0, 3*3.0 / */ /* 21 March 2003: err.c: before writing to a file after reading from it, */ /* f_seek(file, 0, SEEK_CUR) to make writing legal in ANSI C. */ starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_abs.c000066400000000000000000000003261507764646700222100ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer i_abs(x) integer *x; #else integer i_abs(integer *x) #endif { if(*x >= 0) return(*x); return(- *x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_ceiling.c000066400000000000000000000010261507764646700230530ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers integer i_sceiling(x) real *x; #else #ifdef __cplusplus extern "C" { #endif integer i_sceiling(real *x) #endif { #define CEIL(x) ((int)(x) + ((x) > 0 && (x) != (int)(x))) return (integer) CEIL(*x); } #ifdef __cplusplus } #endif #ifdef KR_headers integer i_dceiling(x) doublereal *x; #else #ifdef __cplusplus extern "C" { #endif integer i_dceiling(doublereal *x) #endif { #define CEIL(x) ((int)(x) + ((x) > 0 && (x) != (int)(x))) return (integer) CEIL(*x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_dim.c000066400000000000000000000003411507764646700222110ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer i_dim(a,b) integer *a, *b; #else integer i_dim(integer *a, integer *b) #endif { return( *a > *b ? *a - *b : 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_dnnt.c000066400000000000000000000004431507764646700224060ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double floor(); integer i_dnnt(x) doublereal *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif integer i_dnnt(doublereal *x) #endif { return (integer)(*x >= 0. ? floor(*x + .5) : -floor(.5 - *x)); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_indx.c000066400000000000000000000006561507764646700224130ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer i_indx(a, b, la, lb) char *a, *b; ftnlen la, lb; #else integer i_indx(char *a, char *b, ftnlen la, ftnlen lb) #endif { ftnlen i, n; char *s, *t, *bend; n = la - lb + 1; bend = b + lb; for(i = 0 ; i < n ; ++i) { s = a + i; t = b; while(t < bend) if(*s++ != *t++) goto no; return(i+1); no: ; } return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_len.c000066400000000000000000000003131507764646700222150ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer i_len(s, n) char *s; ftnlen n; #else integer i_len(char *s, ftnlen n) #endif { return(n); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_len_trim.c000066400000000000000000000004401507764646700232510ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer i_len_trim(s, n) char *s; ftnlen n; #else integer i_len_trim(char *s, ftnlen n) #endif { int i; for(i=n-1;i>=0;i--) if(s[i] != ' ') return i + 1; return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_mod.c000066400000000000000000000003231507764646700222170ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer i_mod(a,b) integer *a, *b; #else integer i_mod(integer *a, integer *b) #endif { return( *a % *b); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_nint.c000066400000000000000000000004261507764646700224140ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double floor(); integer i_nint(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif integer i_nint(real *x) #endif { return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/i_sign.c000066400000000000000000000004041507764646700224000ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer i_sign(a,b) integer *a, *b; #else integer i_sign(integer *a, integer *b) #endif { integer x; x = (*a >= 0 ? *a : - *a); return( *b >= 0 ? x : -x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/iargc_.c000066400000000000000000000003041507764646700223530ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers ftnint iargc_() #else ftnint iargc_(void) #endif { extern int xargc; return ( xargc - 1 ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/iio.c000066400000000000000000000051171507764646700217160ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "fmt.h" #ifdef __cplusplus extern "C" { #endif extern char *f__icptr; char *f__icend; extern icilist *f__svic; int f__icnum; int z_getc(Void) { if(f__recpos++ < f__svic->icirlen) { if(f__icptr >= f__icend) err(f__svic->iciend,(EOF),"endfile"); return(*(unsigned char *)f__icptr++); } return '\n'; } void #ifdef KR_headers z_putc(c) #else z_putc(int c) #endif { if (f__icptr < f__icend && f__recpos++ < f__svic->icirlen) *f__icptr++ = c; } int z_rnew(Void) { f__icptr = f__svic->iciunit + (++f__icnum)*f__svic->icirlen; f__recpos = 0; f__cursor = 0; f__hiwater = 0; return 1; } static int z_endp(Void) { (*f__donewrec)(); return 0; } int #ifdef KR_headers c_si(a) icilist *a; #else c_si(icilist *a) #endif { f__elist = (cilist *)a; f__fmtbuf=a->icifmt; f__curunit = 0; f__sequential=f__formatted=1; f__external=0; if(pars_f(f__fmtbuf)<0) err(a->icierr,100,"startint"); fmt_bg(); f__cblank=f__cplus=f__scale=0; f__svic=a; f__icnum=f__recpos=0; f__cursor = 0; f__hiwater = 0; f__icptr = a->iciunit; f__icend = f__icptr + a->icirlen*a->icirnum; f__cf = 0; return(0); } int iw_rev(Void) { if(f__workdone) z_endp(); f__hiwater = f__recpos = f__cursor = 0; return(f__workdone=0); } #ifdef KR_headers integer s_rsfi(a) icilist *a; #else integer s_rsfi(icilist *a) #endif { int n; if(n=c_si(a)) return(n); f__reading=1; f__doed=rd_ed; f__doned=rd_ned; f__getn=z_getc; f__dorevert = z_endp; f__donewrec = z_rnew; f__doend = z_endp; return(0); } int z_wnew(Void) { if (f__recpos < f__hiwater) { f__icptr += f__hiwater - f__recpos; f__recpos = f__hiwater; } while(f__recpos++ < f__svic->icirlen) *f__icptr++ = ' '; f__recpos = 0; f__cursor = 0; f__hiwater = 0; f__icnum++; return 1; } #ifdef KR_headers integer s_wsfi(a) icilist *a; #else integer s_wsfi(icilist *a) #endif { int n; if(n=c_si(a)) return(n); f__reading=0; f__doed=w_ed; f__doned=w_ned; f__putn=z_putc; f__dorevert = iw_rev; f__donewrec = z_wnew; f__doend = z_endp; return(0); } integer e_rsfi(Void) { int n = en_fio(); f__fmtbuf = NULL; return(n); } integer e_wsfi(Void) { int n; n = en_fio(); f__fmtbuf = NULL; if(f__svic->icirnum != 1 && (f__icnum > f__svic->icirnum || (f__icnum == f__svic->icirnum && (f__recpos | f__hiwater)))) err(f__svic->icierr,110,"inwrite"); if (f__recpos < f__hiwater) f__recpos = f__hiwater; if (f__recpos >= f__svic->icirlen) err(f__svic->icierr,110,"recend"); if (!f__recpos && f__icnum) return n; while(f__recpos++ < f__svic->icirlen) *f__icptr++ = ' '; return n; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/ilnw.c000066400000000000000000000021451507764646700221050ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "lio.h" #ifdef __cplusplus extern "C" { #endif extern char *f__icptr; extern char *f__icend; extern icilist *f__svic; extern int f__icnum; #ifdef KR_headers extern void z_putc(); #else extern void z_putc(int); #endif static int z_wSL(Void) { while(f__recpos < f__svic->icirlen) z_putc(' '); return z_rnew(); } static void #ifdef KR_headers c_liw(a) icilist *a; #else c_liw(icilist *a) #endif { f__reading = 0; f__external = 0; f__formatted = 1; f__putn = z_putc; L_len = a->icirlen; f__donewrec = z_wSL; f__svic = a; f__icnum = f__recpos = 0; f__cursor = 0; f__cf = 0; f__curunit = 0; f__icptr = a->iciunit; f__icend = f__icptr + a->icirlen*a->icirnum; f__elist = (cilist *)a; } integer #ifdef KR_headers s_wsni(a) icilist *a; #else s_wsni(icilist *a) #endif { cilist ca; c_liw(a); ca.cifmt = a->icifmt; x_wsne(&ca); z_wSL(); return 0; } integer #ifdef KR_headers s_wsli(a) icilist *a; #else s_wsli(icilist *a) #endif { f__lioproc = l_write; c_liw(a); return(0); } integer e_wsli(Void) { z_wSL(); return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/inquire.c000066400000000000000000000052541507764646700226140ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "string.h" #ifdef NON_UNIX_STDIO #ifndef MSDOS #include "unistd.h" /* for access() */ #endif #endif #ifdef KR_headers integer f_inqu(a) inlist *a; #else #ifdef __cplusplus extern "C" integer f_inqu(inlist*); #endif #ifdef MSDOS #undef abs #undef min #undef max #include "io.h" #endif integer f_inqu(inlist *a) #endif { flag byfile; int i; #ifndef NON_UNIX_STDIO int n; #endif unit *p; char buf[256]; long x; if(a->infile!=NULL) { byfile=1; g_char(a->infile,a->infilen,buf); #ifdef NON_UNIX_STDIO x = access(buf,0) ? -1 : 0; for(i=0,p=NULL;iinunitinunit>=0) { p= &f__units[a->inunit]; } else { p=NULL; } } if(a->inex!=NULL) if(byfile && x != -1 || !byfile && p!=NULL) *a->inex=1; else *a->inex=0; if(a->inopen!=NULL) if(byfile) *a->inopen=(p!=NULL); else *a->inopen=(p!=NULL && p->ufd!=NULL); if(a->innum!=NULL) *a->innum= p-f__units; if(a->innamed!=NULL) if(byfile || p!=NULL && p->ufnm!=NULL) *a->innamed=1; else *a->innamed=0; if(a->inname!=NULL) if(byfile) b_char(buf,a->inname,a->innamlen); else if(p!=NULL && p->ufnm!=NULL) b_char(p->ufnm,a->inname,a->innamlen); if(a->inacc!=NULL && p!=NULL && p->ufd!=NULL) if(p->url) b_char("DIRECT",a->inacc,a->inacclen); else b_char("SEQUENTIAL",a->inacc,a->inacclen); if(a->inseq!=NULL) if(p!=NULL && p->url) b_char("NO",a->inseq,a->inseqlen); else b_char("YES",a->inseq,a->inseqlen); if(a->indir!=NULL) if(p==NULL || p->url) b_char("YES",a->indir,a->indirlen); else b_char("NO",a->indir,a->indirlen); if(a->infmt!=NULL) if(p!=NULL && p->ufmt==0) b_char("UNFORMATTED",a->infmt,a->infmtlen); else b_char("FORMATTED",a->infmt,a->infmtlen); if(a->inform!=NULL) if(p!=NULL && p->ufmt==0) b_char("NO",a->inform,a->informlen); else b_char("YES",a->inform,a->informlen); if(a->inunf) if(p!=NULL && p->ufmt==0) b_char("YES",a->inunf,a->inunflen); else if (p!=NULL) b_char("NO",a->inunf,a->inunflen); else b_char("UNKNOWN",a->inunf,a->inunflen); if(a->inrecl!=NULL && p!=NULL) *a->inrecl=p->url; if(a->innrec!=NULL && p!=NULL && p->url>0) *a->innrec=(ftnint)(FTELL(p->ufd)/p->url+1); if(a->inblank && p!=NULL && p->ufmt) if(p->ublnk) b_char("ZERO",a->inblank,a->inblanklen); else b_char("NULL",a->inblank,a->inblanklen); return(0); } starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/l_ge.c000066400000000000000000000005161507764646700220420ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern integer s_cmp(); logical l_ge(a,b,la,lb) char *a, *b; ftnlen la, lb; #else extern integer s_cmp(char *, char *, ftnlen, ftnlen); logical l_ge(char *a, char *b, ftnlen la, ftnlen lb) #endif { return(s_cmp(a,b,la,lb) >= 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/l_gt.c000066400000000000000000000005151507764646700220600ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern integer s_cmp(); logical l_gt(a,b,la,lb) char *a, *b; ftnlen la, lb; #else extern integer s_cmp(char *, char *, ftnlen, ftnlen); logical l_gt(char *a, char *b, ftnlen la, ftnlen lb) #endif { return(s_cmp(a,b,la,lb) > 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/l_le.c000066400000000000000000000005161507764646700220470ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern integer s_cmp(); logical l_le(a,b,la,lb) char *a, *b; ftnlen la, lb; #else extern integer s_cmp(char *, char *, ftnlen, ftnlen); logical l_le(char *a, char *b, ftnlen la, ftnlen lb) #endif { return(s_cmp(a,b,la,lb) <= 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/l_lt.c000066400000000000000000000005151507764646700220650ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern integer s_cmp(); logical l_lt(a,b,la,lb) char *a, *b; ftnlen la, lb; #else extern integer s_cmp(char *, char *, ftnlen, ftnlen); logical l_lt(char *a, char *b, ftnlen la, ftnlen lb) #endif { return(s_cmp(a,b,la,lb) < 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/lbitbits.c000066400000000000000000000021111507764646700227410ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifndef LONGBITS #define LONGBITS 32 #endif integer #ifdef KR_headers lbit_bits(a, b, len) integer a, b, len; #else lbit_bits(integer a, integer b, integer len) #endif { /* Assume 2's complement arithmetic */ unsigned long x, y; x = (unsigned long) a; y = (unsigned long)-1L; x >>= b; y <<= len; return (integer)(x & ~y); } integer #ifdef KR_headers lbit_cshift(a, b, len) integer a, b, len; #else lbit_cshift(integer a, integer b, integer len) #endif { unsigned long x, y, z; x = (unsigned long)a; if (len <= 0) { if (len == 0) return 0; goto full_len; } if (len >= LONGBITS) { full_len: if (b >= 0) { b %= LONGBITS; return (integer)(x << b | x >> LONGBITS -b ); } b = -b; b %= LONGBITS; return (integer)(x << LONGBITS - b | x >> b); } y = z = (unsigned long)-1; y <<= len; z &= ~y; y &= x; x &= z; if (b >= 0) { b %= len; return (integer)(y | z & (x << b | x >> len - b)); } b = -b; b %= len; return (integer)(y | z & (x >> b | x << len - b)); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/lbitshft.c000066400000000000000000000004021507764646700227450ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif integer #ifdef KR_headers lbit_shift(a, b) integer a; integer b; #else lbit_shift(integer a, integer b) #endif { return b >= 0 ? a << b : (integer)((uinteger)a >> -b); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/libf2c.lbc000066400000000000000000000030721507764646700226130ustar00rootroot00000000000000abort_.obj backspac.obj c_abs.obj c_cos.obj c_div.obj c_exp.obj c_log.obj c_sin.obj c_sqrt.obj cabs.obj close.obj d_abs.obj d_acos.obj d_asin.obj d_atan.obj d_atn2.obj d_cnjg.obj d_cos.obj d_cosh.obj d_dim.obj d_exp.obj d_imag.obj d_int.obj d_lg10.obj d_log.obj d_mod.obj d_nint.obj d_prod.obj d_sign.obj d_sin.obj d_sinh.obj d_sqrt.obj d_tan.obj d_tanh.obj derf_.obj derfc_.obj dfe.obj dolio.obj dtime_.obj due.obj ef1asc_.obj ef1cmc_.obj endfile.obj erf_.obj erfc_.obj err.obj etime_.obj exit_.obj f77_aloc.obj f77vers.obj fmt.obj fmtlib.obj ftell_.obj getarg_.obj getenv_.obj h_abs.obj h_dim.obj h_dnnt.obj h_indx.obj h_len.obj h_mod.obj h_nint.obj h_sign.obj hl_ge.obj hl_gt.obj hl_le.obj hl_lt.obj i77vers.obj i_abs.obj i_dim.obj i_dnnt.obj i_indx.obj i_len.obj i_mod.obj i_nint.obj i_sign.obj iargc_.obj iio.obj ilnw.obj inquire.obj l_ge.obj l_gt.obj l_le.obj l_lt.obj lbitbits.obj lbitshft.obj lread.obj lwrite.obj main.obj open.obj pow_ci.obj pow_dd.obj pow_di.obj pow_hh.obj pow_ii.obj pow_ri.obj pow_zi.obj pow_zz.obj r_abs.obj r_acos.obj r_asin.obj r_atan.obj r_atn2.obj r_cnjg.obj r_cos.obj r_cosh.obj r_dim.obj r_exp.obj r_imag.obj r_int.obj r_lg10.obj r_log.obj r_mod.obj r_nint.obj r_sign.obj r_sin.obj r_sinh.obj r_sqrt.obj r_tan.obj r_tanh.obj rdfmt.obj rewind.obj rsfe.obj rsli.obj rsne.obj s_cat.obj s_cmp.obj s_copy.obj s_paus.obj s_rnge.obj s_stop.obj sfe.obj sig_die.obj signal_.obj sue.obj system_.obj typesize.obj uio.obj uninit.obj util.obj wref.obj wrtfmt.obj wsfe.obj wsle.obj wsne.obj xwsne.obj z_abs.obj z_cos.obj z_div.obj z_exp.obj z_log.obj z_sin.obj z_sqrt.obj starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/libf2c.sy000066400000000000000000000040031507764646700225010ustar00rootroot00000000000000+abort_.obj & +backspac.obj & +c_abs.obj & +c_cos.obj & +c_div.obj & +c_exp.obj & +c_log.obj & +c_sin.obj & +c_sqrt.obj & +cabs.obj & +close.obj & +d_abs.obj & +d_acos.obj & +d_asin.obj & +d_atan.obj & +d_atn2.obj & +d_cnjg.obj & +d_cos.obj & +d_cosh.obj & +d_dim.obj & +d_exp.obj & +d_imag.obj & +d_int.obj & +d_lg10.obj & +d_log.obj & +d_mod.obj & +d_nint.obj & +d_prod.obj & +d_sign.obj & +d_sin.obj & +d_sinh.obj & +d_sqrt.obj & +d_tan.obj & +d_tanh.obj & +derf_.obj & +derfc_.obj & +dfe.obj & +dolio.obj & +dtime_.obj & +due.obj & +ef1asc_.obj & +ef1cmc_.obj & +endfile.obj & +erf_.obj & +erfc_.obj & +err.obj & +etime_.obj & +exit_.obj & +f77_aloc.obj & +f77vers.obj & +fmt.obj & +fmtlib.obj & +ftell_.obj & +getarg_.obj & +getenv_.obj & +h_abs.obj & +h_dim.obj & +h_dnnt.obj & +h_indx.obj & +h_len.obj & +h_mod.obj & +h_nint.obj & +h_sign.obj & +hl_ge.obj & +hl_gt.obj & +hl_le.obj & +hl_lt.obj & +i77vers.obj & +i_abs.obj & +i_dim.obj & +i_dnnt.obj & +i_indx.obj & +i_len.obj & +i_mod.obj & +i_nint.obj & +i_sign.obj & +iargc_.obj & +iio.obj & +ilnw.obj & +inquire.obj & +l_ge.obj & +l_gt.obj & +l_le.obj & +l_lt.obj & +lbitbits.obj & +lbitshft.obj & +lread.obj & +lwrite.obj & +main.obj & +open.obj & +pow_ci.obj & +pow_dd.obj & +pow_di.obj & +pow_hh.obj & +pow_ii.obj & +pow_ri.obj & +pow_zi.obj & +pow_zz.obj & +r_abs.obj & +r_acos.obj & +r_asin.obj & +r_atan.obj & +r_atn2.obj & +r_cnjg.obj & +r_cos.obj & +r_cosh.obj & +r_dim.obj & +r_exp.obj & +r_imag.obj & +r_int.obj & +r_lg10.obj & +r_log.obj & +r_mod.obj & +r_nint.obj & +r_sign.obj & +r_sin.obj & +r_sinh.obj & +r_sqrt.obj & +r_tan.obj & +r_tanh.obj & +rdfmt.obj & +rewind.obj & +rsfe.obj & +rsli.obj & +rsne.obj & +s_cat.obj & +s_cmp.obj & +s_copy.obj & +s_paus.obj & +s_rnge.obj & +s_stop.obj & +sfe.obj & +sig_die.obj & +signal_.obj & +sue.obj & +system_.obj & +typesize.obj & +uio.obj & +uninit.obj & +util.obj & +wref.obj & +wrtfmt.obj & +wsfe.obj & +wsle.obj & +wsne.obj & +xwsne.obj & +z_abs.obj & +z_cos.obj & +z_div.obj & +z_exp.obj & +z_log.obj & +z_sin.obj & +z_sqrt.obj starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/lio.h000066400000000000000000000030341507764646700217220ustar00rootroot00000000000000/* copy of ftypes from the compiler */ /* variable types * numeric assumptions: * int < reals < complexes * TYDREAL-TYREAL = TYDCOMPLEX-TYCOMPLEX */ /* 0-10 retain their old (pre LOGICAL*1, etc.) */ /* values to allow mixing old and new objects. */ #define TYUNKNOWN 0 #define TYADDR 1 #define TYSHORT 2 #define TYLONG 3 #define TYREAL 4 #define TYDREAL 5 #define TYCOMPLEX 6 #define TYDCOMPLEX 7 #define TYLOGICAL 8 #define TYCHAR 9 #define TYSUBR 10 #define TYINT1 11 #define TYLOGICAL1 12 #define TYLOGICAL2 13 #ifdef Allow_TYQUAD #undef TYQUAD #define TYQUAD 14 #endif #define LINTW 24 #define LINE 80 #define LLOGW 2 #ifdef Old_list_output #define LLOW 1.0 #define LHIGH 1.e9 #define LEFMT " %# .8E" #define LFFMT " %# .9g" #else #define LGFMT "%.9G" #endif /* LEFBL 20 should suffice; 24 overcomes a NeXT bug. */ #define LEFBL 24 typedef union { char flchar; short flshort; ftnint flint; #ifdef Allow_TYQUAD longint fllongint; #endif real flreal; doublereal fldouble; } flex; #ifdef KR_headers extern int (*f__lioproc)(), (*l_getc)(), (*l_ungetc)(); extern int l_read(), l_write(); #else #ifdef __cplusplus extern "C" { #endif extern int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint); extern int l_write(ftnint*, char*, ftnlen, ftnint); extern void x_wsne(cilist*); extern int c_le(cilist*), (*l_getc)(void), (*l_ungetc)(int,FILE*); extern int l_read(ftnint*,char*,ftnlen,ftnint); extern integer e_rsle(void), e_wsle(void), s_wsne(cilist*); extern int z_rnew(void); #endif extern ftnint L_len; extern int f__scale; #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/lread.c000066400000000000000000000346231507764646700222310ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" /* Compile with -DF8X_NML_ELIDE_QUOTES to permit eliding quotation */ /* marks in namelist input a la the Fortran 8X Draft published in */ /* the May 1989 issue of Fortran Forum. */ #ifdef Allow_TYQUAD static longint f__llx; #endif #ifdef KR_headers extern double atof(); extern char *malloc(), *realloc(); int (*f__lioproc)(), (*l_getc)(), (*l_ungetc)(); #else #undef abs #undef min #undef max #include "stdlib.h" #endif #include "fmt.h" #include "lio.h" #include "ctype.h" #include "fp.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern char *f__fmtbuf; #else extern const char *f__fmtbuf; int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint), (*l_getc)(void), (*l_ungetc)(int,FILE*); #endif int l_eof; #define isblnk(x) (f__ltab[x+1]&B) #define issep(x) (f__ltab[x+1]&SX) #define isapos(x) (f__ltab[x+1]&AX) #define isexp(x) (f__ltab[x+1]&EX) #define issign(x) (f__ltab[x+1]&SG) #define iswhit(x) (f__ltab[x+1]&WH) #define SX 1 #define B 2 #define AX 4 #define EX 8 #define SG 16 #define WH 32 char f__ltab[128+1] = { /* offset one for EOF */ 0, 0,0,AX,0,0,0,0,0,0,WH|B,SX|WH,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, SX|B|WH,0,AX,0,0,0,0,AX,0,0,0,SG,SX,SG,0,SX, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,EX,EX,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, AX,0,0,0,EX,EX,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; #ifdef ungetc static int #ifdef KR_headers un_getc(x,f__cf) int x; FILE *f__cf; #else un_getc(int x, FILE *f__cf) #endif { return ungetc(x,f__cf); } #else #define un_getc ungetc #ifdef KR_headers extern int ungetc(); #else extern int ungetc(int, FILE*); /* for systems with a buggy stdio.h */ #endif #endif int t_getc(Void) { int ch; if(f__curunit->uend) return(EOF); if((ch=getc(f__cf))!=EOF) return(ch); if(feof(f__cf)) f__curunit->uend = l_eof = 1; return(EOF); } integer e_rsle(Void) { int ch; if(f__curunit->uend) return(0); while((ch=t_getc())!='\n') if (ch == EOF) { if(feof(f__cf)) f__curunit->uend = l_eof = 1; return EOF; } return(0); } flag f__lquit; int f__lcount,f__ltype,nml_read; char *f__lchar; double f__lx,f__ly; #define ERR(x) if(n=(x)) return(n) #define GETC(x) (x=(*l_getc)()) #define Ungetc(x,y) (*l_ungetc)(x,y) static int #ifdef KR_headers l_R(poststar, reqint) int poststar, reqint; #else l_R(int poststar, int reqint) #endif { char s[FMAX+EXPMAXDIGS+4]; register int ch; register char *sp, *spe, *sp1; long e, exp; int havenum, havestar, se; if (!poststar) { if (f__lcount > 0) return(0); f__lcount = 1; } #ifdef Allow_TYQUAD f__llx = 0; #endif f__ltype = 0; exp = 0; havestar = 0; retry: sp1 = sp = s; spe = sp + FMAX; havenum = 0; switch(GETC(ch)) { case '-': *sp++ = ch; sp1++; spe++; case '+': GETC(ch); } while(ch == '0') { ++havenum; GETC(ch); } while(isdigit(ch)) { if (sp < spe) *sp++ = ch; else ++exp; GETC(ch); } if (ch == '*' && !poststar) { if (sp == sp1 || exp || *s == '-') { errfl(f__elist->cierr,112,"bad repetition count"); } poststar = havestar = 1; *sp = 0; f__lcount = atoi(s); goto retry; } if (ch == '.') { #ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT if (reqint) errfl(f__elist->cierr,115,"invalid integer"); #endif GETC(ch); if (sp == sp1) while(ch == '0') { ++havenum; --exp; GETC(ch); } while(isdigit(ch)) { if (sp < spe) { *sp++ = ch; --exp; } GETC(ch); } } havenum += sp - sp1; se = 0; if (issign(ch)) goto signonly; if (havenum && isexp(ch)) { #ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT if (reqint) errfl(f__elist->cierr,115,"invalid integer"); #endif GETC(ch); if (issign(ch)) { signonly: if (ch == '-') se = 1; GETC(ch); } if (!isdigit(ch)) { bad: errfl(f__elist->cierr,112,"exponent field"); } e = ch - '0'; while(isdigit(GETC(ch))) { e = 10*e + ch - '0'; if (e > EXPMAX) goto bad; } if (se) exp -= e; else exp += e; } (void) Ungetc(ch, f__cf); if (sp > sp1) { ++havenum; while(*--sp == '0') ++exp; if (exp) sprintf(sp+1, "e%ld", exp); else sp[1] = 0; f__lx = atof(s); #ifdef Allow_TYQUAD if (reqint&2 && (se = sp - sp1 + exp) > 14 && se < 20) { /* Assuming 64-bit longint and 32-bit long. */ if (exp < 0) sp += exp; if (sp1 <= sp) { f__llx = *sp1 - '0'; while(++sp1 <= sp) f__llx = 10*f__llx + (*sp1 - '0'); } while(--exp >= 0) f__llx *= 10; if (*s == '-') f__llx = -f__llx; } #endif } else f__lx = 0.; if (havenum) f__ltype = TYLONG; else switch(ch) { case ',': case '/': break; default: if (havestar && ( ch == ' ' ||ch == '\t' ||ch == '\n')) break; if (nml_read > 1) { f__lquit = 2; return 0; } errfl(f__elist->cierr,112,"invalid number"); } return 0; } static int #ifdef KR_headers rd_count(ch) register int ch; #else rd_count(register int ch) #endif { if (ch < '0' || ch > '9') return 1; f__lcount = ch - '0'; while(GETC(ch) >= '0' && ch <= '9') f__lcount = 10*f__lcount + ch - '0'; Ungetc(ch,f__cf); return f__lcount <= 0; } static int l_C(Void) { int ch, nml_save; double lz; if(f__lcount>0) return(0); f__ltype=0; GETC(ch); if(ch!='(') { if (nml_read > 1 && (ch < '0' || ch > '9')) { Ungetc(ch,f__cf); f__lquit = 2; return 0; } if (rd_count(ch)) if(!f__cf || !feof(f__cf)) errfl(f__elist->cierr,112,"complex format"); else err(f__elist->cierr,(EOF),"lread"); if(GETC(ch)!='*') { if(!f__cf || !feof(f__cf)) errfl(f__elist->cierr,112,"no star"); else err(f__elist->cierr,(EOF),"lread"); } if(GETC(ch)!='(') { Ungetc(ch,f__cf); return(0); } } else f__lcount = 1; while(iswhit(GETC(ch))); Ungetc(ch,f__cf); nml_save = nml_read; nml_read = 0; if (ch = l_R(1,0)) return ch; if (!f__ltype) errfl(f__elist->cierr,112,"no real part"); lz = f__lx; while(iswhit(GETC(ch))); if(ch!=',') { (void) Ungetc(ch,f__cf); errfl(f__elist->cierr,112,"no comma"); } while(iswhit(GETC(ch))); (void) Ungetc(ch,f__cf); if (ch = l_R(1,0)) return ch; if (!f__ltype) errfl(f__elist->cierr,112,"no imaginary part"); while(iswhit(GETC(ch))); if(ch!=')') errfl(f__elist->cierr,112,"no )"); f__ly = f__lx; f__lx = lz; #ifdef Allow_TYQUAD f__llx = 0; #endif nml_read = nml_save; return(0); } static char nmLbuf[256], *nmL_next; static int (*nmL_getc_save)(Void); #ifdef KR_headers static int (*nmL_ungetc_save)(/* int, FILE* */); #else static int (*nmL_ungetc_save)(int, FILE*); #endif static int nmL_getc(Void) { int rv; if (rv = *nmL_next++) return rv; l_getc = nmL_getc_save; l_ungetc = nmL_ungetc_save; return (*l_getc)(); } static int #ifdef KR_headers nmL_ungetc(x, f) int x; FILE *f; #else nmL_ungetc(int x, FILE *f) #endif { f = f; /* banish non-use warning */ return *--nmL_next = x; } static int #ifdef KR_headers Lfinish(ch, dot, rvp) int ch, dot, *rvp; #else Lfinish(int ch, int dot, int *rvp) #endif { char *s, *se; static char what[] = "namelist input"; s = nmLbuf + 2; se = nmLbuf + sizeof(nmLbuf) - 1; *s++ = ch; while(!issep(GETC(ch)) && ch!=EOF) { if (s >= se) { nmLbuf_ovfl: return *rvp = err__fl(f__elist->cierr,131,what); } *s++ = ch; if (ch != '=') continue; if (dot) return *rvp = err__fl(f__elist->cierr,112,what); got_eq: *s = 0; nmL_getc_save = l_getc; l_getc = nmL_getc; nmL_ungetc_save = l_ungetc; l_ungetc = nmL_ungetc; nmLbuf[1] = *(nmL_next = nmLbuf) = ','; *rvp = f__lcount = 0; return 1; } if (dot) goto done; for(;;) { if (s >= se) goto nmLbuf_ovfl; *s++ = ch; if (!isblnk(ch)) break; if (GETC(ch) == EOF) goto done; } if (ch == '=') goto got_eq; done: Ungetc(ch, f__cf); return 0; } static int l_L(Void) { int ch, rv, sawdot; if(f__lcount>0) return(0); f__lcount = 1; f__ltype=0; GETC(ch); if(isdigit(ch)) { rd_count(ch); if(GETC(ch)!='*') if(!f__cf || !feof(f__cf)) errfl(f__elist->cierr,112,"no star"); else err(f__elist->cierr,(EOF),"lread"); GETC(ch); } sawdot = 0; if(ch == '.') { sawdot = 1; GETC(ch); } switch(ch) { case 't': case 'T': if (nml_read && Lfinish(ch, sawdot, &rv)) return rv; f__lx=1; break; case 'f': case 'F': if (nml_read && Lfinish(ch, sawdot, &rv)) return rv; f__lx=0; break; default: if(isblnk(ch) || issep(ch) || ch==EOF) { (void) Ungetc(ch,f__cf); return(0); } if (nml_read > 1) { Ungetc(ch,f__cf); f__lquit = 2; return 0; } errfl(f__elist->cierr,112,"logical"); } f__ltype=TYLONG; while(!issep(GETC(ch)) && ch!=EOF); Ungetc(ch, f__cf); return(0); } #define BUFSIZE 128 static int l_CHAR(Void) { int ch,size,i; static char rafail[] = "realloc failure"; char quote,*p; if(f__lcount>0) return(0); f__ltype=0; if(f__lchar!=NULL) free(f__lchar); size=BUFSIZE; p=f__lchar = (char *)malloc((unsigned int)size); if(f__lchar == NULL) errfl(f__elist->cierr,113,"no space"); GETC(ch); if(isdigit(ch)) { /* allow Fortran 8x-style unquoted string... */ /* either find a repetition count or the string */ f__lcount = ch - '0'; *p++ = ch; for(i = 1;;) { switch(GETC(ch)) { case '*': if (f__lcount == 0) { f__lcount = 1; #ifndef F8X_NML_ELIDE_QUOTES if (nml_read) goto no_quote; #endif goto noquote; } p = f__lchar; goto have_lcount; case ',': case ' ': case '\t': case '\n': case '/': Ungetc(ch,f__cf); /* no break */ case EOF: f__lcount = 1; f__ltype = TYCHAR; return *p = 0; } if (!isdigit(ch)) { f__lcount = 1; #ifndef F8X_NML_ELIDE_QUOTES if (nml_read) { no_quote: errfl(f__elist->cierr,112, "undelimited character string"); } #endif goto noquote; } *p++ = ch; f__lcount = 10*f__lcount + ch - '0'; if (++i == size) { f__lchar = (char *)realloc(f__lchar, (unsigned int)(size += BUFSIZE)); if(f__lchar == NULL) errfl(f__elist->cierr,113,rafail); p = f__lchar + i; } } } else (void) Ungetc(ch,f__cf); have_lcount: if(GETC(ch)=='\'' || ch=='"') quote=ch; else if(isblnk(ch) || (issep(ch) && ch != '\n') || ch==EOF) { Ungetc(ch,f__cf); return 0; } #ifndef F8X_NML_ELIDE_QUOTES else if (nml_read > 1) { Ungetc(ch,f__cf); f__lquit = 2; return 0; } #endif else { /* Fortran 8x-style unquoted string */ *p++ = ch; for(i = 1;;) { switch(GETC(ch)) { case ',': case ' ': case '\t': case '\n': case '/': Ungetc(ch,f__cf); /* no break */ case EOF: f__ltype = TYCHAR; return *p = 0; } noquote: *p++ = ch; if (++i == size) { f__lchar = (char *)realloc(f__lchar, (unsigned int)(size += BUFSIZE)); if(f__lchar == NULL) errfl(f__elist->cierr,113,rafail); p = f__lchar + i; } } } f__ltype=TYCHAR; for(i=0;;) { while(GETC(ch)!=quote && ch!='\n' && ch!=EOF && ++icierr,113,rafail); p=f__lchar+i-1; *p++ = ch; } else if(ch==EOF) return(EOF); else if(ch=='\n') { if(*(p-1) != '\\') continue; i--; p--; if(++iciunit]; if(a->ciunit>=MXUNIT || a->ciunit<0) err(a->cierr,101,"stler"); f__scale=f__recpos=0; f__elist=a; if(f__curunit->ufd==NULL && fk_open(SEQ,FMT,a->ciunit)) err(a->cierr,102,"lio"); f__cf=f__curunit->ufd; if(!f__curunit->ufmt) err(a->cierr,103,"lio") return(0); } int #ifdef KR_headers l_read(number,ptr,len,type) ftnint *number,type; char *ptr; ftnlen len; #else l_read(ftnint *number, char *ptr, ftnlen len, ftnint type) #endif { #define Ptr ((flex *)ptr) int i,n,ch; doublereal *yy; real *xx; for(i=0;i<*number;i++) { if(f__lquit) return(0); if(l_eof) err(f__elist->ciend, EOF, "list in") if(f__lcount == 0) { f__ltype = 0; for(;;) { GETC(ch); switch(ch) { case EOF: err(f__elist->ciend,(EOF),"list in") case ' ': case '\t': case '\n': continue; case '/': f__lquit = 1; goto loopend; case ',': f__lcount = 1; goto loopend; default: (void) Ungetc(ch, f__cf); goto rddata; } } } rddata: switch((int)type) { case TYINT1: case TYSHORT: case TYLONG: #ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT ERR(l_R(0,1)); break; #endif case TYREAL: case TYDREAL: ERR(l_R(0,0)); break; #ifdef TYQUAD case TYQUAD: n = l_R(0,2); if (n) return n; break; #endif case TYCOMPLEX: case TYDCOMPLEX: ERR(l_C()); break; case TYLOGICAL1: case TYLOGICAL2: case TYLOGICAL: ERR(l_L()); break; case TYCHAR: ERR(l_CHAR()); break; } while (GETC(ch) == ' ' || ch == '\t'); if (ch != ',' || f__lcount > 1) Ungetc(ch,f__cf); loopend: if(f__lquit) return(0); if(f__cf && ferror(f__cf)) { clearerr(f__cf); errfl(f__elist->cierr,errno,"list in"); } if(f__ltype==0) goto bump; switch((int)type) { case TYINT1: case TYLOGICAL1: Ptr->flchar = (char)f__lx; break; case TYLOGICAL2: case TYSHORT: Ptr->flshort = (short)f__lx; break; case TYLOGICAL: case TYLONG: Ptr->flint = (ftnint)f__lx; break; #ifdef Allow_TYQUAD case TYQUAD: if (!(Ptr->fllongint = f__llx)) Ptr->fllongint = f__lx; break; #endif case TYREAL: Ptr->flreal=f__lx; break; case TYDREAL: Ptr->fldouble=f__lx; break; case TYCOMPLEX: xx=(real *)ptr; *xx++ = f__lx; *xx = f__ly; break; case TYDCOMPLEX: yy=(doublereal *)ptr; *yy++ = f__lx; *yy = f__ly; break; case TYCHAR: b_char(f__lchar,ptr,len); break; } bump: if(f__lcount>0) f__lcount--; ptr += len; if (nml_read) nml_read++; } return(0); #undef Ptr } #ifdef KR_headers integer s_rsle(a) cilist *a; #else integer s_rsle(cilist *a) #endif { int n; f__reading=1; f__external=1; f__formatted=1; if(n=c_le(a)) return(n); f__lioproc = l_read; f__lquit = 0; f__lcount = 0; l_eof = 0; if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr,errno,"read start"); if(f__curunit->uend) err(f__elist->ciend,(EOF),"read start"); l_getc = t_getc; l_ungetc = un_getc; f__doend = xrd_SL; return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/lwrite.c000066400000000000000000000110101507764646700224310ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "fmt.h" #include "lio.h" #ifdef __cplusplus extern "C" { #endif ftnint L_len; int f__Aquote; static VOID donewrec(Void) { if (f__recpos) (*f__donewrec)(); } static VOID #ifdef KR_headers lwrt_I(n) longint n; #else lwrt_I(longint n) #endif { char *p; int ndigit, sign; p = f__icvt(n, &ndigit, &sign, 10); if(f__recpos + ndigit >= L_len) donewrec(); PUT(' '); if (sign) PUT('-'); while(*p) PUT(*p++); } static VOID #ifdef KR_headers lwrt_L(n, len) ftnint n; ftnlen len; #else lwrt_L(ftnint n, ftnlen len) #endif { if(f__recpos+LLOGW>=L_len) donewrec(); wrt_L((Uint *)&n,LLOGW, len); } static VOID #ifdef KR_headers lwrt_A(p,len) char *p; ftnlen len; #else lwrt_A(char *p, ftnlen len) #endif { int a; char *p1, *pe; a = 0; pe = p + len; if (f__Aquote) { a = 3; if (len > 1 && p[len-1] == ' ') { while(--len > 1 && p[len-1] == ' '); pe = p + len; } p1 = p; while(p1 < pe) if (*p1++ == '\'') a++; } if(f__recpos+len+a >= L_len) donewrec(); if (a #ifndef OMIT_BLANK_CC || !f__recpos #endif ) PUT(' '); if (a) { PUT('\''); while(p < pe) { if (*p == '\'') PUT('\''); PUT(*p++); } PUT('\''); } else while(p < pe) PUT(*p++); } static int #ifdef KR_headers l_g(buf, n) char *buf; double n; #else l_g(char *buf, double n) #endif { #ifdef Old_list_output doublereal absn; char *fmt; absn = n; if (absn < 0) absn = -absn; fmt = LLOW <= absn && absn < LHIGH ? LFFMT : LEFMT; #ifdef USE_STRLEN sprintf(buf, fmt, n); return strlen(buf); #else return sprintf(buf, fmt, n); #endif #else register char *b, c, c1; b = buf; *b++ = ' '; if (n < 0) { *b++ = '-'; n = -n; } else *b++ = ' '; if (n == 0) { #ifdef SIGNED_ZEROS if (signbit_f2c(&n)) *b++ = '-'; #endif *b++ = '0'; *b++ = '.'; *b = 0; goto f__ret; } sprintf(b, LGFMT, n); switch(*b) { #ifndef WANT_LEAD_0 case '0': while(b[0] = b[1]) b++; break; #endif case 'i': case 'I': /* Infinity */ case 'n': case 'N': /* NaN */ while(*++b); break; default: /* Fortran 77 insists on having a decimal point... */ for(;; b++) switch(*b) { case 0: *b++ = '.'; *b = 0; goto f__ret; case '.': while(*++b); goto f__ret; case 'E': for(c1 = '.', c = 'E'; *b = c1; c1 = c, c = *++b); goto f__ret; } } f__ret: return b - buf; #endif } static VOID #ifdef KR_headers l_put(s) register char *s; #else l_put(register char *s) #endif { #ifdef KR_headers register void (*pn)() = f__putn; #else register void (*pn)(int) = f__putn; #endif register int c; while(c = *s++) (*pn)(c); } static VOID #ifdef KR_headers lwrt_F(n) double n; #else lwrt_F(double n) #endif { char buf[LEFBL]; if(f__recpos + l_g(buf,n) >= L_len) donewrec(); l_put(buf); } static VOID #ifdef KR_headers lwrt_C(a,b) double a,b; #else lwrt_C(double a, double b) #endif { char *ba, *bb, bufa[LEFBL], bufb[LEFBL]; int al, bl; al = l_g(bufa, a); for(ba = bufa; *ba == ' '; ba++) --al; bl = l_g(bufb, b) + 1; /* intentionally high by 1 */ for(bb = bufb; *bb == ' '; bb++) --bl; if(f__recpos + al + bl + 3 >= L_len) donewrec(); #ifdef OMIT_BLANK_CC else #endif PUT(' '); PUT('('); l_put(ba); PUT(','); if (f__recpos + bl >= L_len) { (*f__donewrec)(); #ifndef OMIT_BLANK_CC PUT(' '); #endif } l_put(bb); PUT(')'); } int #ifdef KR_headers l_write(number,ptr,len,type) ftnint *number,type; char *ptr; ftnlen len; #else l_write(ftnint *number, char *ptr, ftnlen len, ftnint type) #endif { #define Ptr ((flex *)ptr) int i; longint x; double y,z; real *xx; doublereal *yy; for(i=0;i< *number; i++) { switch((int)type) { default: f__fatal(117,"unknown type in lio"); case TYINT1: x = Ptr->flchar; goto xint; case TYSHORT: x=Ptr->flshort; goto xint; #ifdef Allow_TYQUAD case TYQUAD: x = Ptr->fllongint; goto xint; #endif case TYLONG: x=Ptr->flint; xint: lwrt_I(x); break; case TYREAL: y=Ptr->flreal; goto xfloat; case TYDREAL: y=Ptr->fldouble; xfloat: lwrt_F(y); break; case TYCOMPLEX: xx= &Ptr->flreal; y = *xx++; z = *xx; goto xcomplex; case TYDCOMPLEX: yy = &Ptr->fldouble; y= *yy++; z = *yy; xcomplex: lwrt_C(y,z); break; case TYLOGICAL1: x = Ptr->flchar; goto xlog; case TYLOGICAL2: x = Ptr->flshort; goto xlog; case TYLOGICAL: x = Ptr->flint; xlog: lwrt_L(Ptr->flint, len); break; case TYCHAR: lwrt_A(ptr,len); break; } ptr += len; } return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/main.c000066400000000000000000000042661507764646700220660ustar00rootroot00000000000000/* STARTUP PROCEDURE FOR UNIX FORTRAN PROGRAMS */ #include "stdio.h" #include "signal1.h" #ifndef SIGIOT #ifdef SIGABRT #define SIGIOT SIGABRT #endif #endif #ifndef KR_headers #undef VOID #include "stdlib.h" #ifdef __cplusplus extern "C" { #endif #endif #ifndef VOID #define VOID void #endif #ifdef __cplusplus extern "C" { #endif #ifdef NO__STDC #define ONEXIT onexit extern VOID f_exit(); #else #ifndef KR_headers extern void f_exit(void); #ifndef NO_ONEXIT #define ONEXIT atexit extern int atexit(void (*)(void)); #endif #else #ifndef NO_ONEXIT #define ONEXIT onexit extern VOID f_exit(); #endif #endif #endif #ifdef KR_headers extern VOID f_init(), sig_die(); extern int MAIN__(); #define Int /* int */ #else extern void f_init(void), sig_die(const char*, int); extern int MAIN__(void); #define Int int #endif static VOID sigfdie(Sigarg) { Use_Sigarg; sig_die("Floating Exception", 1); } static VOID sigidie(Sigarg) { Use_Sigarg; sig_die("IOT Trap", 1); } #ifdef SIGQUIT static VOID sigqdie(Sigarg) { Use_Sigarg; sig_die("Quit signal", 1); } #endif static VOID sigindie(Sigarg) { Use_Sigarg; sig_die("Interrupt", 0); } static VOID sigtdie(Sigarg) { Use_Sigarg; sig_die("Killed", 0); } #ifdef SIGTRAP static VOID sigtrdie(Sigarg) { Use_Sigarg; sig_die("Trace trap", 1); } #endif int xargc; char **xargv; #ifdef __cplusplus } #endif int #ifdef KR_headers main(argc, argv) int argc; char **argv; #else main(int argc, char **argv) #endif { xargc = argc; xargv = argv; signal1(SIGFPE, sigfdie); /* ignore underflow, enable overflow */ #ifdef SIGIOT signal1(SIGIOT, sigidie); #endif #ifdef SIGTRAP signal1(SIGTRAP, sigtrdie); #endif #ifdef SIGQUIT if(signal1(SIGQUIT,sigqdie) == SIG_IGN) signal1(SIGQUIT, SIG_IGN); #endif if(signal1(SIGINT, sigindie) == SIG_IGN) signal1(SIGINT, SIG_IGN); signal1(SIGTERM,sigtdie); #ifdef pdp11 ldfps(01200); /* detect overflow as an exception */ #endif f_init(); #ifndef NO_ONEXIT ONEXIT(f_exit); #endif MAIN__(); #ifdef NO_ONEXIT f_exit(); #endif exit(0); /* exit(0) rather than return(0) to bypass Cray bug */ return 0; /* For compilers that complain of missing return values; */ /* others will complain that this is unreachable code. */ } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/math.hvc000066400000000000000000000000621507764646700224170ustar00rootroot00000000000000/* for VC 4.2 */ #include #undef complex starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/mkfile.plan9000066400000000000000000000120661507764646700232070ustar00rootroot00000000000000# Plan 9 mkfile for libf2c.a$O f2c.h # For use with "f2c" and "f2c -A": f2c.h: f2c.h0 cp f2c.h0 f2c.h # You may need to adjust signal1.h suitably for your system... signal1.h: signal1.h0 cp signal1.h0 signal1.h clean: rm -f libf2c.a$O *.$O arith.h backspac.$O: fio.h close.$O: fio.h dfe.$O: fio.h dfe.$O: fmt.h due.$O: fio.h endfile.$O: fio.h rawio.h err.$O: fio.h rawio.h fmt.$O: fio.h fmt.$O: fmt.h iio.$O: fio.h iio.$O: fmt.h ilnw.$O: fio.h ilnw.$O: lio.h inquire.$O: fio.h lread.$O: fio.h lread.$O: fmt.h lread.$O: lio.h lread.$O: fp.h lwrite.$O: fio.h lwrite.$O: fmt.h lwrite.$O: lio.h open.$O: fio.h rawio.h rdfmt.$O: fio.h rdfmt.$O: fmt.h rdfmt.$O: fp.h rewind.$O: fio.h rsfe.$O: fio.h rsfe.$O: fmt.h rsli.$O: fio.h rsli.$O: lio.h rsne.$O: fio.h rsne.$O: lio.h sfe.$O: fio.h sue.$O: fio.h uio.$O: fio.h uninit.$O: arith.h util.$O: fio.h wref.$O: fio.h wref.$O: fmt.h wref.$O: fp.h wrtfmt.$O: fio.h wrtfmt.$O: fmt.h wsfe.$O: fio.h wsfe.$O: fmt.h wsle.$O: fio.h wsle.$O: fmt.h wsle.$O: lio.h wsne.$O: fio.h wsne.$O: lio.h xwsne.$O: fio.h xwsne.$O: lio.h xwsne.$O: fmt.h arith.h: arithchk.c pcc -DNO_FPINIT -o arithchk arithchk.c arithchk >$target rm arithchk xsum.out:V: check check: xsum Notice README abort_.c arithchk.c backspac.c c_abs.c c_cos.c \ c_div.c c_exp.c c_log.c c_sin.c c_sqrt.c cabs.c close.c comptry.bat \ d_abs.c d_acos.c d_asin.c d_atan.c d_atn2.c d_cnjg.c d_cos.c d_cosh.c \ d_dim.c d_exp.c d_imag.c d_int.c d_lg10.c d_log.c d_mod.c \ d_nint.c d_prod.c d_sign.c d_sin.c d_sinh.c d_sqrt.c d_tan.c \ d_tanh.c derf_.c derfc_.c dfe.c dolio.c dtime_.c due.c ef1asc_.c \ ef1cmc_.c endfile.c erf_.c erfc_.c err.c etime_.c exit_.c f2c.h0 \ f2ch.add f77_aloc.c f77vers.c fio.h fmt.c fmt.h fmtlib.c \ fp.h ftell_.c \ getarg_.c getenv_.c h_abs.c h_dim.c h_dnnt.c h_indx.c h_len.c \ h_mod.c h_nint.c h_sign.c hl_ge.c hl_gt.c hl_le.c hl_lt.c \ i77vers.c i_abs.c i_dim.c i_dnnt.c i_indx.c i_len.c i_mod.c \ i_nint.c i_sign.c iargc_.c iio.c ilnw.c inquire.c l_ge.c l_gt.c \ l_le.c l_lt.c lbitbits.c lbitshft.c libf2c.lbc libf2c.sy lio.h \ lread.c lwrite.c main.c makefile.sy makefile.u makefile.vc \ makefile.wat math.hvc mkfile.plan9 open.c pow_ci.c pow_dd.c \ pow_di.c pow_hh.c pow_ii.c pow_qq.c pow_ri.c pow_zi.c pow_zz.c \ qbitbits.c qbitshft.c r_abs.c r_acos.c r_asin.c r_atan.c r_atn2.c \ r_cnjg.c r_cos.c r_cosh.c r_dim.c r_exp.c r_imag.c r_int.c r_lg10.c \ r_log.c r_mod.c r_nint.c r_sign.c r_sin.c r_sinh.c r_sqrt.c \ r_tan.c r_tanh.c rawio.h rdfmt.c rewind.c rsfe.c rsli.c rsne.c \ s_cat.c s_cmp.c s_copy.c s_paus.c s_rnge.c s_stop.c sfe.c \ sig_die.c signal1.h0 signal_.c sue.c system_.c typesize.c uio.c \ uninit.c util.c wref.c wrtfmt.c wsfe.c wsle.c wsne.c xwsne.c \ z_abs.c z_cos.c z_div.c z_exp.c z_log.c z_sin.c z_sqrt.c >xsum1.out cmp xsum0.out xsum1.out && mv xsum1.out xsum.out || diff xsum[01].out starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/open.c000066400000000000000000000131051507764646700220730ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "string.h" #ifndef NON_POSIX_STDIO #ifdef MSDOS #include "io.h" #else #include "unistd.h" /* for access */ #endif #endif #ifdef KR_headers extern char *malloc(); #ifdef NON_ANSI_STDIO extern char *mktemp(); #endif extern integer f_clos(); #define Const /*nothing*/ #else #define Const const #undef abs #undef min #undef max #include "stdlib.h" #ifdef __cplusplus extern "C" { #endif extern int f__canseek(FILE*); extern integer f_clos(cllist*); #endif #ifdef NON_ANSI_RW_MODES Const char *f__r_mode[2] = {"r", "r"}; Const char *f__w_mode[4] = {"w", "w", "r+w", "r+w"}; #else Const char *f__r_mode[2] = {"rb", "r"}; Const char *f__w_mode[4] = {"wb", "w", "r+b", "r+"}; #endif static char f__buf0[400], *f__buf = f__buf0; int f__buflen = (int)sizeof(f__buf0); static void #ifdef KR_headers f__bufadj(n, c) int n, c; #else f__bufadj(int n, int c) #endif { unsigned int len; char *nbuf, *s, *t, *te; if (f__buf == f__buf0) f__buflen = 1024; while(f__buflen <= n) f__buflen <<= 1; len = (unsigned int)f__buflen; if (len != f__buflen || !(nbuf = (char*)malloc(len))) f__fatal(113, "malloc failure"); s = nbuf; t = f__buf; te = t + c; while(t < te) *s++ = *t++; if (f__buf != f__buf0) free(f__buf); f__buf = nbuf; } int #ifdef KR_headers f__putbuf(c) int c; #else f__putbuf(int c) #endif { char *s, *se; int n; if (f__hiwater > f__recpos) f__recpos = f__hiwater; n = f__recpos + 1; if (n >= f__buflen) f__bufadj(n, f__recpos); s = f__buf; se = s + f__recpos; if (c) *se++ = c; *se = 0; for(;;) { fputs(s, f__cf); s += strlen(s); if (s >= se) break; /* normally happens the first time */ putc(*s++, f__cf); } return 0; } void #ifdef KR_headers x_putc(c) #else x_putc(int c) #endif { if (f__recpos >= f__buflen) f__bufadj(f__recpos, f__buflen); f__buf[f__recpos++] = c; } #define opnerr(f,m,s) {if(f) errno= m; else opn_err(m,s,a); return(m);} static void #ifdef KR_headers opn_err(m, s, a) int m; char *s; olist *a; #else opn_err(int m, const char *s, olist *a) #endif { if (a->ofnm) { /* supply file name to error message */ if (a->ofnmlen >= f__buflen) f__bufadj((int)a->ofnmlen, 0); g_char(a->ofnm, a->ofnmlen, f__curunit->ufnm = f__buf); } f__fatal(m, s); } #ifdef KR_headers integer f_open(a) olist *a; #else integer f_open(olist *a) #endif { unit *b; integer rv; char buf[256], *s; cllist x; int ufmt; FILE *tf; #ifndef NON_UNIX_STDIO int n; #endif f__external = 1; if(a->ounit>=MXUNIT || a->ounit<0) err(a->oerr,101,"open") if (!f__init) f_init(); f__curunit = b = &f__units[a->ounit]; if(b->ufd) { if(a->ofnm==0) { same: if (a->oblnk) b->ublnk = *a->oblnk == 'z' || *a->oblnk == 'Z'; return(0); } #ifdef NON_UNIX_STDIO if (b->ufnm && strlen(b->ufnm) == a->ofnmlen && !strncmp(b->ufnm, a->ofnm, (unsigned)a->ofnmlen)) goto same; #else g_char(a->ofnm,a->ofnmlen,buf); if (f__inode(buf,&n) == b->uinode && n == b->udev) goto same; #endif x.cunit=a->ounit; x.csta=0; x.cerr=a->oerr; if ((rv = f_clos(&x)) != 0) return rv; } b->url = (int)a->orl; b->ublnk = a->oblnk && (*a->oblnk == 'z' || *a->oblnk == 'Z'); if(a->ofm==0) { if(b->url>0) b->ufmt=0; else b->ufmt=1; } else if(*a->ofm=='f' || *a->ofm == 'F') b->ufmt=1; else b->ufmt=0; ufmt = b->ufmt; #ifdef url_Adjust if (b->url && !ufmt) url_Adjust(b->url); #endif if (a->ofnm) { g_char(a->ofnm,a->ofnmlen,buf); if (!buf[0]) opnerr(a->oerr,107,"open") } else sprintf(buf, "fort.%ld", (long)a->ounit); b->uscrtch = 0; b->uend=0; b->uwrt = 0; b->ufd = 0; b->urw = 3; switch(a->osta ? *a->osta : 'u') { case 'o': case 'O': #ifdef NON_POSIX_STDIO if (!(tf = FOPEN(buf,"r"))) opnerr(a->oerr,errno,"open") fclose(tf); #else if (access(buf,0)) opnerr(a->oerr,errno,"open") #endif break; case 's': case 'S': b->uscrtch=1; #ifdef NON_ANSI_STDIO (void) strcpy(buf,"tmp.FXXXXXX"); (void) mktemp(buf); goto replace; #else if (!(b->ufd = tmpfile())) opnerr(a->oerr,errno,"open") b->ufnm = 0; #ifndef NON_UNIX_STDIO b->uinode = b->udev = -1; #endif b->useek = 1; return 0; #endif case 'n': case 'N': #ifdef NON_POSIX_STDIO if ((tf = FOPEN(buf,"r")) || (tf = FOPEN(buf,"a"))) { fclose(tf); opnerr(a->oerr,128,"open") } #else if (!access(buf,0)) opnerr(a->oerr,128,"open") #endif /* no break */ case 'r': /* Fortran 90 replace option */ case 'R': #ifdef NON_ANSI_STDIO replace: #endif if (tf = FOPEN(buf,f__w_mode[0])) fclose(tf); } b->ufnm=(char *) malloc((unsigned int)(strlen(buf)+1)); if(b->ufnm==NULL) opnerr(a->oerr,113,"no space"); (void) strcpy(b->ufnm,buf); if ((s = a->oacc) && b->url) ufmt = 0; if(!(tf = FOPEN(buf, f__w_mode[ufmt|2]))) { if (tf = FOPEN(buf, f__r_mode[ufmt])) b->urw = 1; else if (tf = FOPEN(buf, f__w_mode[ufmt])) { b->uwrt = 1; b->urw = 2; } else err(a->oerr, errno, "open"); } b->useek = f__canseek(b->ufd = tf); #ifndef NON_UNIX_STDIO if((b->uinode = f__inode(buf,&b->udev)) == -1) opnerr(a->oerr,108,"open") #endif if(b->useek) if (a->orl) rewind(b->ufd); else if ((s = a->oacc) && (*s == 'a' || *s == 'A') && FSEEK(b->ufd, 0L, SEEK_END)) opnerr(a->oerr,129,"open"); return(0); } int #ifdef KR_headers fk_open(seq,fmt,n) ftnint n; #else fk_open(int seq, int fmt, ftnint n) #endif { char nbuf[10]; olist a; (void) sprintf(nbuf,"fort.%ld",(long)n); a.oerr=1; a.ounit=n; a.ofnm=nbuf; a.ofnmlen=strlen(nbuf); a.osta=NULL; a.oacc= (char*)(seq==SEQ?"s":"d"); a.ofm = (char*)(fmt==FMT?"f":"u"); a.orl = seq==DIR?1:0; a.oblnk=NULL; return(f_open(&a)); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/pow_ci.c000066400000000000000000000006341507764646700224150ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers VOID pow_ci(p, a, b) /* p = a**b */ complex *p, *a; integer *b; #else extern void pow_zi(doublecomplex*, doublecomplex*, integer*); void pow_ci(complex *p, complex *a, integer *b) /* p = a**b */ #endif { doublecomplex p1, a1; a1.r = a->r; a1.i = a->i; pow_zi(&p1, &a1, b); p->r = p1.r; p->i = p1.i; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/pow_dd.c000066400000000000000000000004241507764646700224060ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double pow(); double pow_dd(ap, bp) doublereal *ap, *bp; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double pow_dd(doublereal *ap, doublereal *bp) #endif { return(pow(*ap, *bp) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/pow_di.c000066400000000000000000000007001507764646700224100ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double pow_di(ap, bp) doublereal *ap; integer *bp; #else double pow_di(doublereal *ap, integer *bp) #endif { double pow, x; integer n; unsigned long u; pow = 1; x = *ap; n = *bp; if(n != 0) { if(n < 0) { n = -n; x = 1/x; } for(u = n; ; ) { if(u & 01) pow *= x; if(u >>= 1) x *= x; else break; } } return(pow); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/pow_hh.c000066400000000000000000000007511507764646700224210ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers shortint pow_hh(ap, bp) shortint *ap, *bp; #else shortint pow_hh(shortint *ap, shortint *bp) #endif { shortint pow, x, n; unsigned u; x = *ap; n = *bp; if (n <= 0) { if (n == 0 || x == 1) return 1; if (x != -1) return x == 0 ? 1/x : 0; n = -n; } u = n; for(pow = 1; ; ) { if(u & 01) pow *= x; if(u >>= 1) x *= x; else break; } return(pow); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/pow_ii.c000066400000000000000000000007501507764646700224220ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer pow_ii(ap, bp) integer *ap, *bp; #else integer pow_ii(integer *ap, integer *bp) #endif { integer pow, x, n; unsigned long u; x = *ap; n = *bp; if (n <= 0) { if (n == 0 || x == 1) return 1; if (x != -1) return x == 0 ? 1/x : 0; n = -n; } u = n; for(pow = 1; ; ) { if(u & 01) pow *= x; if(u >>= 1) x *= x; else break; } return(pow); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/pow_qq.c000066400000000000000000000010041507764646700224330ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers longint pow_qq(ap, bp) longint *ap, *bp; #else longint pow_qq(longint *ap, longint *bp) #endif { longint pow, x, n; unsigned long long u; /* system-dependent */ x = *ap; n = *bp; if (n <= 0) { if (n == 0 || x == 1) return 1; if (x != -1) return x == 0 ? 1/x : 0; n = -n; } u = n; for(pow = 1; ; ) { if(u & 01) pow *= x; if(u >>= 1) x *= x; else break; } return(pow); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/pow_ri.c000066400000000000000000000006641507764646700224370ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double pow_ri(ap, bp) real *ap; integer *bp; #else double pow_ri(real *ap, integer *bp) #endif { double pow, x; integer n; unsigned long u; pow = 1; x = *ap; n = *bp; if(n != 0) { if(n < 0) { n = -n; x = 1/x; } for(u = n; ; ) { if(u & 01) pow *= x; if(u >>= 1) x *= x; else break; } } return(pow); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/pow_zi.c000066400000000000000000000015231507764646700224420ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers VOID pow_zi(p, a, b) /* p = a**b */ doublecomplex *p, *a; integer *b; #else extern void z_div(doublecomplex*, doublecomplex*, doublecomplex*); void pow_zi(doublecomplex *p, doublecomplex *a, integer *b) /* p = a**b */ #endif { integer n; unsigned long u; double t; doublecomplex q, x; static doublecomplex one = {1.0, 0.0}; n = *b; q.r = 1; q.i = 0; if(n == 0) goto done; if(n < 0) { n = -n; z_div(&x, &one, a); } else { x.r = a->r; x.i = a->i; } for(u = n; ; ) { if(u & 01) { t = q.r * x.r - q.i * x.i; q.i = q.r * x.i + q.i * x.r; q.r = t; } if(u >>= 1) { t = x.r * x.r - x.i * x.i; x.i = 2 * x.r * x.i; x.r = t; } else break; } done: p->i = q.i; p->r = q.r; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/pow_zz.c000066400000000000000000000010451507764646700224620ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double log(), exp(), cos(), sin(), atan2(), f__cabs(); VOID pow_zz(r,a,b) doublecomplex *r, *a, *b; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif extern double f__cabs(double,double); void pow_zz(doublecomplex *r, doublecomplex *a, doublecomplex *b) #endif { double logr, logi, x, y; logr = log( f__cabs(a->r, a->i) ); logi = atan2(a->i, a->r); x = exp( logr * b->r - logi * b->i ); y = logr * b->i + logi * b->r; r->r = x * cos(y); r->i = x * sin(y); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/qbitbits.c000066400000000000000000000021771507764646700227620ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifndef LONGBITS #define LONGBITS 32 #endif #ifndef LONG8BITS #define LONG8BITS (2*LONGBITS) #endif longint #ifdef KR_headers qbit_bits(a, b, len) longint a; integer b, len; #else qbit_bits(longint a, integer b, integer len) #endif { /* Assume 2's complement arithmetic */ ulongint x, y; x = (ulongint) a; y = (ulongint)-1L; x >>= b; y <<= len; return (longint)(x & ~y); } longint #ifdef KR_headers qbit_cshift(a, b, len) longint a; integer b, len; #else qbit_cshift(longint a, integer b, integer len) #endif { ulongint x, y, z; x = (ulongint)a; if (len <= 0) { if (len == 0) return 0; goto full_len; } if (len >= LONG8BITS) { full_len: if (b >= 0) { b %= LONG8BITS; return (longint)(x << b | x >> LONG8BITS - b ); } b = -b; b %= LONG8BITS; return (longint)(x << LONG8BITS - b | x >> b); } y = z = (unsigned long)-1; y <<= len; z &= ~y; y &= x; x &= z; if (b >= 0) { b %= len; return (longint)(y | z & (x << b | x >> len - b)); } b = -b; b %= len; return (longint)(y | z & (x >> b | x << len - b)); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/qbitshft.c000066400000000000000000000004021507764646700227520ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif longint #ifdef KR_headers qbit_shift(a, b) longint a; integer b; #else qbit_shift(longint a, integer b) #endif { return b >= 0 ? a << b : (longint)((ulongint)a >> -b); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_abs.c000066400000000000000000000003161507764646700222200ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double r_abs(x) real *x; #else double r_abs(real *x) #endif { if(*x >= 0) return(*x); return(- *x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_acos.c000066400000000000000000000003511507764646700223770ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double acos(); double r_acos(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_acos(real *x) #endif { return( acos(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_asin.c000066400000000000000000000003511507764646700224040ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double asin(); double r_asin(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_asin(real *x) #endif { return( asin(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_atan.c000066400000000000000000000003511507764646700223750ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double atan(); double r_atan(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_atan(real *x) #endif { return( atan(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_atn2.c000066400000000000000000000003751507764646700223240ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double atan2(); double r_atn2(x,y) real *x, *y; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_atn2(real *x, real *y) #endif { return( atan2(*x,*y) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_cnjg.c000066400000000000000000000003531507764646700223750ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers VOID r_cnjg(r, z) complex *r, *z; #else VOID r_cnjg(complex *r, complex *z) #endif { real zi = z->i; r->r = z->r; r->i = -zi; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_cos.c000066400000000000000000000003451507764646700222410ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double cos(); double r_cos(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_cos(real *x) #endif { return( cos(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_cosh.c000066400000000000000000000003511507764646700224060ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double cosh(); double r_cosh(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_cosh(real *x) #endif { return( cosh(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_dim.c000066400000000000000000000003261507764646700222250ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double r_dim(a,b) real *a, *b; #else double r_dim(real *a, real *b) #endif { return( *a > *b ? *a - *b : 0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_exp.c000066400000000000000000000003451507764646700222510ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double exp(); double r_exp(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_exp(real *x) #endif { return( exp(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_imag.c000066400000000000000000000002751507764646700223740ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double r_imag(z) complex *z; #else double r_imag(complex *z) #endif { return(z->i); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_int.c000066400000000000000000000004011507764646700222400ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double floor(); double r_int(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_int(real *x) #endif { return( (*x>0) ? floor(*x) : -floor(- *x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_lg10.c000066400000000000000000000004271507764646700222210ustar00rootroot00000000000000#include "f2c.h" #define log10e 0.43429448190325182765 #ifdef KR_headers double log(); double r_lg10(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_lg10(real *x) #endif { return( log10e * log(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_log.c000066400000000000000000000003451507764646700222360ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double log(); double r_log(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_log(real *x) #endif { return( log(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_mod.c000066400000000000000000000012461507764646700222350ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers #ifdef IEEE_drem double drem(); #else double floor(); #endif double r_mod(x,y) real *x, *y; #else #ifdef IEEE_drem double drem(double, double); #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif #endif double r_mod(real *x, real *y) #endif { #ifdef IEEE_drem double xa, ya, z; if ((ya = *y) < 0.) ya = -ya; z = drem(xa = *x, ya); if (xa > 0) { if (z < 0) z += ya; } else if (z > 0) z -= ya; return z; #else double quotient; if( (quotient = (double)*x / *y) >= 0) quotient = floor(quotient); else quotient = -floor(-quotient); return(*x - (*y) * quotient ); #endif } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_nint.c000066400000000000000000000004151507764646700224230ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double floor(); double r_nint(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_nint(real *x) #endif { return( (*x)>=0 ? floor(*x + .5) : -floor(.5 - *x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_sign.c000066400000000000000000000003701507764646700224130ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double r_sign(a,b) real *a, *b; #else double r_sign(real *a, real *b) #endif { double x; x = (*a >= 0 ? *a : - *a); return( *b >= 0 ? x : -x); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_sin.c000066400000000000000000000003451507764646700222460ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double sin(); double r_sin(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_sin(real *x) #endif { return( sin(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_sinh.c000066400000000000000000000003511507764646700224130ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double sinh(); double r_sinh(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_sinh(real *x) #endif { return( sinh(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_sqrt.c000066400000000000000000000003511507764646700224430ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double sqrt(); double r_sqrt(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_sqrt(real *x) #endif { return( sqrt(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_tan.c000066400000000000000000000003451507764646700222370ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double tan(); double r_tan(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_tan(real *x) #endif { return( tan(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/r_tanh.c000066400000000000000000000003511507764646700224040ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double tanh(); double r_tanh(x) real *x; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif double r_tanh(real *x) #endif { return( tanh(*x) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/rawio.h000066400000000000000000000013161507764646700222610ustar00rootroot00000000000000#ifndef KR_headers #ifdef MSDOS #include "io.h" #ifndef WATCOM #define close _close #define creat _creat #define open _open #define read _read #define write _write #endif /*WATCOM*/ #endif /*MSDOS*/ #ifdef __cplusplus extern "C" { #endif #ifndef MSDOS #ifdef OPEN_DECL extern int creat(const char*,int), open(const char*,int); #endif extern int close(int); extern int read(int,void*,size_t), write(int,void*,size_t); extern int unlink(const char*); #ifndef _POSIX_SOURCE #ifndef NON_UNIX_STDIO extern FILE *fdopen(int, const char*); #endif #endif #endif /*KR_HEADERS*/ extern char *mktemp(char*); #ifdef __cplusplus } #endif #endif #include "fcntl.h" #ifndef O_WRONLY #define O_RDONLY 0 #define O_WRONLY 1 #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/rdfmt.c000066400000000000000000000213351507764646700222520ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #ifdef KR_headers extern double atof(); #define Const /*nothing*/ #else #define Const const #undef abs #undef min #undef max #include "stdlib.h" #endif #include "fmt.h" #include "fp.h" #include "ctype.h" #ifdef __cplusplus extern "C" { #endif static int #ifdef KR_headers rd_Z(n,w,len) Uint *n; ftnlen len; #else rd_Z(Uint *n, int w, ftnlen len) #endif { long x[9]; char *s, *s0, *s1, *se, *t; Const char *sc; int ch, i, w1, w2; static char hex[256]; static int one = 1; int bad = 0; if (!hex['0']) { sc = "0123456789"; while(ch = *sc++) hex[ch] = ch - '0' + 1; sc = "ABCDEF"; while(ch = *sc++) hex[ch] = hex[ch + 'a' - 'A'] = ch - 'A' + 11; } s = s0 = (char *)x; s1 = (char *)&x[4]; se = (char *)&x[8]; if (len > 4*sizeof(long)) return errno = 117; while (w) { GET(ch); if (ch==',' || ch=='\n') break; w--; if (ch > ' ') { if (!hex[ch & 0xff]) bad++; *s++ = ch; if (s == se) { /* discard excess characters */ for(t = s0, s = s1; t < s1;) *t++ = *s++; s = s1; } } } if (bad) return errno = 115; w = (int)len; w1 = s - s0; w2 = w1+1 >> 1; t = (char *)n; if (*(char *)&one) { /* little endian */ t += w - 1; i = -1; } else i = 1; for(; w > w2; t += i, --w) *t = 0; if (!w) return 0; if (w < w2) s0 = s - (w << 1); else if (w1 & 1) { *t = hex[*s0++ & 0xff] - 1; if (!--w) return 0; t += i; } do { *t = hex[*s0 & 0xff]-1 << 4 | hex[s0[1] & 0xff]-1; t += i; s0 += 2; } while(--w); return 0; } static int #ifdef KR_headers rd_I(n,w,len, base) Uint *n; int w; ftnlen len; register int base; #else rd_I(Uint *n, int w, ftnlen len, register int base) #endif { int ch, sign; longint x = 0; if (w <= 0) goto have_x; for(;;) { GET(ch); if (ch != ' ') break; if (!--w) goto have_x; } sign = 0; switch(ch) { case ',': case '\n': w = 0; goto have_x; case '-': sign = 1; case '+': break; default: if (ch >= '0' && ch <= '9') { x = ch - '0'; break; } goto have_x; } while(--w) { GET(ch); if (ch >= '0' && ch <= '9') { x = x*base + ch - '0'; continue; } if (ch != ' ') { if (ch == '\n' || ch == ',') w = 0; break; } if (f__cblank) x *= base; } if (sign) x = -x; have_x: if(len == sizeof(integer)) n->il=x; else if(len == sizeof(char)) n->ic = (char)x; #ifdef Allow_TYQUAD else if (len == sizeof(longint)) n->ili = x; #endif else n->is = (short)x; if (w) { while(--w) GET(ch); return errno = 115; } return 0; } static int #ifdef KR_headers rd_L(n,w,len) ftnint *n; ftnlen len; #else rd_L(ftnint *n, int w, ftnlen len) #endif { int ch, dot, lv; if (w <= 0) goto bad; for(;;) { GET(ch); --w; if (ch != ' ') break; if (!w) goto bad; } dot = 0; retry: switch(ch) { case '.': if (dot++ || !w) goto bad; GET(ch); --w; goto retry; case 't': case 'T': lv = 1; break; case 'f': case 'F': lv = 0; break; default: bad: for(; w > 0; --w) GET(ch); /* no break */ case ',': case '\n': return errno = 116; } switch(len) { case sizeof(char): *(char *)n = (char)lv; break; case sizeof(short): *(short *)n = (short)lv; break; default: *n = lv; } while(w-- > 0) { GET(ch); if (ch == ',' || ch == '\n') break; } return 0; } static int #ifdef KR_headers rd_F(p, w, d, len) ufloat *p; ftnlen len; #else rd_F(ufloat *p, int w, int d, ftnlen len) #endif { char s[FMAX+EXPMAXDIGS+4]; register int ch; register char *sp, *spe, *sp1; double x; int scale1, se; long e, exp; sp1 = sp = s; spe = sp + FMAX; exp = -d; x = 0.; do { GET(ch); w--; } while (ch == ' ' && w); switch(ch) { case '-': *sp++ = ch; sp1++; spe++; case '+': if (!w) goto zero; --w; GET(ch); } while(ch == ' ') { blankdrop: if (!w--) goto zero; GET(ch); } while(ch == '0') { if (!w--) goto zero; GET(ch); } if (ch == ' ' && f__cblank) goto blankdrop; scale1 = f__scale; while(isdigit(ch)) { digloop1: if (sp < spe) *sp++ = ch; else ++exp; digloop1e: if (!w--) goto done; GET(ch); } if (ch == ' ') { if (f__cblank) { ch = '0'; goto digloop1; } goto digloop1e; } if (ch == '.') { exp += d; if (!w--) goto done; GET(ch); if (sp == sp1) { /* no digits yet */ while(ch == '0') { skip01: --exp; skip0: if (!w--) goto done; GET(ch); } if (ch == ' ') { if (f__cblank) goto skip01; goto skip0; } } while(isdigit(ch)) { digloop2: if (sp < spe) { *sp++ = ch; --exp; } digloop2e: if (!w--) goto done; GET(ch); } if (ch == ' ') { if (f__cblank) { ch = '0'; goto digloop2; } goto digloop2e; } } switch(ch) { default: break; case '-': se = 1; goto signonly; case '+': se = 0; goto signonly; case 'e': case 'E': case 'd': case 'D': if (!w--) goto bad; GET(ch); while(ch == ' ') { if (!w--) goto bad; GET(ch); } se = 0; switch(ch) { case '-': se = 1; case '+': signonly: if (!w--) goto bad; GET(ch); } while(ch == ' ') { if (!w--) goto bad; GET(ch); } if (!isdigit(ch)) goto bad; e = ch - '0'; for(;;) { if (!w--) { ch = '\n'; break; } GET(ch); if (!isdigit(ch)) { if (ch == ' ') { if (f__cblank) ch = '0'; else continue; } else break; } e = 10*e + ch - '0'; if (e > EXPMAX && sp > sp1) goto bad; } if (se) exp -= e; else exp += e; scale1 = 0; } switch(ch) { case '\n': case ',': break; default: bad: return (errno = 115); } done: if (sp > sp1) { while(*--sp == '0') ++exp; if (exp -= scale1) sprintf(sp+1, "e%ld", exp); else sp[1] = 0; x = atof(s); } zero: if (len == sizeof(real)) p->pf = x; else p->pd = x; return(0); } static int #ifdef KR_headers rd_A(p,len) char *p; ftnlen len; #else rd_A(char *p, ftnlen len) #endif { int i,ch; for(i=0;i=len) { for(i=0;i0;f__cursor--) if((ch=(*f__getn)())<0) return(ch); if(f__cursor<0) { if(f__recpos+f__cursor < 0) /*err(elist->cierr,110,"fmt")*/ f__cursor = -f__recpos; /* is this in the standard? */ if(f__external == 0) { extern char *f__icptr; f__icptr += f__cursor; } else if(f__curunit && f__curunit->useek) (void) FSEEK(f__cf, f__cursor,SEEK_CUR); else err(f__elist->cierr,106,"fmt"); f__recpos += f__cursor; f__cursor=0; } switch(p->op) { default: fprintf(stderr,"rd_ed, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case IM: case I: ch = rd_I((Uint *)ptr,p->p1,len, 10); break; /* O and OM don't work right for character, double, complex, */ /* or doublecomplex, and they differ from Fortran 90 in */ /* showing a minus sign for negative values. */ case OM: case O: ch = rd_I((Uint *)ptr, p->p1, len, 8); break; case L: ch = rd_L((ftnint *)ptr,p->p1,len); break; case A: ch = rd_A(ptr,len); break; case AW: ch = rd_AW(ptr,p->p1,len); break; case E: case EE: case D: case G: case GE: case F: ch = rd_F((ufloat *)ptr,p->p1,p->p2.i[0],len); break; /* Z and ZM assume 8-bit bytes. */ case ZM: case Z: ch = rd_Z((Uint *)ptr, p->p1, len); break; } if(ch == 0) return(ch); else if(ch == EOF) return(EOF); if (f__cf) clearerr(f__cf); return(errno); } int #ifdef KR_headers rd_ned(p) struct syl *p; #else rd_ned(struct syl *p) #endif { switch(p->op) { default: fprintf(stderr,"rd_ned, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case APOS: return(rd_POS(p->p2.s)); case H: return(rd_H(p->p1,p->p2.s)); case SLASH: return((*f__donewrec)()); case TR: case X: f__cursor += p->p1; return(1); case T: f__cursor=p->p1-f__recpos - 1; return(1); case TL: f__cursor -= p->p1; if(f__cursor < -f__recpos) /* TL1000, 1X */ f__cursor = -f__recpos; return(1); } } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/rewind.c000066400000000000000000000007331507764646700224250ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer f_rew(a) alist *a; #else integer f_rew(alist *a) #endif { unit *b; if(a->aunit>=MXUNIT || a->aunit<0) err(a->aerr,101,"rewind"); b = &f__units[a->aunit]; if(b->ufd == NULL || b->uwrt == 3) return(0); if(!b->useek) err(a->aerr,106,"rewind") if(b->uwrt) { (void) t_runc(a); b->uwrt = 3; } rewind(b->ufd); b->uend=0; return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/rsfe.c000066400000000000000000000027241507764646700220760ustar00rootroot00000000000000/* read sequential formatted external */ #include "f2c.h" #include "fio.h" #include "fmt.h" #ifdef __cplusplus extern "C" { #endif int xrd_SL(Void) { int ch; if(!f__curunit->uend) while((ch=getc(f__cf))!='\n') if (ch == EOF) { f__curunit->uend = 1; break; } f__cursor=f__recpos=0; return(1); } int x_getc(Void) { int ch; if(f__curunit->uend) return(EOF); ch = getc(f__cf); if(ch!=EOF && ch!='\n') { f__recpos++; return(ch); } if(ch=='\n') { (void) ungetc(ch,f__cf); return(ch); } if(f__curunit->uend || feof(f__cf)) { errno=0; f__curunit->uend=1; return(-1); } return(-1); } int x_endp(Void) { xrd_SL(); return f__curunit->uend == 1 ? EOF : 0; } int x_rev(Void) { (void) xrd_SL(); return(0); } #ifdef KR_headers integer s_rsfe(a) cilist *a; /* start */ #else integer s_rsfe(cilist *a) /* start */ #endif { int n; if(!f__init) f_init(); f__reading=1; f__sequential=1; f__formatted=1; f__external=1; if(n=c_sfe(a)) return(n); f__elist=a; f__cursor=f__recpos=0; f__scale=0; f__fmtbuf=a->cifmt; f__cf=f__curunit->ufd; if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); f__getn= x_getc; f__doed= rd_ed; f__doned= rd_ned; fmt_bg(); f__doend=x_endp; f__donewrec=xrd_SL; f__dorevert=x_rev; f__cblank=f__curunit->ublnk; f__cplus=0; if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr,errno,"read start"); if(f__curunit->uend) err(f__elist->ciend,(EOF),"read start"); return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/rsli.c000066400000000000000000000033711507764646700221070ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "lio.h" #include "fmt.h" /* for f__doend */ #ifdef __cplusplus extern "C" { #endif extern flag f__lquit; extern int f__lcount; extern char *f__icptr; extern char *f__icend; extern icilist *f__svic; extern int f__icnum, f__recpos; static int i_getc(Void) { if(f__recpos >= f__svic->icirlen) { if (f__recpos++ == f__svic->icirlen) return '\n'; z_rnew(); } f__recpos++; if(f__icptr >= f__icend) return EOF; return(*f__icptr++); } static #ifdef KR_headers int i_ungetc(ch, f) int ch; FILE *f; #else int i_ungetc(int ch, FILE *f) #endif { if (--f__recpos == f__svic->icirlen) return '\n'; if (f__recpos < -1) err(f__svic->icierr,110,"recend"); /* *--icptr == ch, and icptr may point to read-only memory */ return *--f__icptr /* = ch */; } static void #ifdef KR_headers c_lir(a) icilist *a; #else c_lir(icilist *a) #endif { extern int l_eof; f__reading = 1; f__external = 0; f__formatted = 1; f__svic = a; L_len = a->icirlen; f__recpos = -1; f__icnum = f__recpos = 0; f__cursor = 0; l_getc = i_getc; l_ungetc = i_ungetc; l_eof = 0; f__icptr = a->iciunit; f__icend = f__icptr + a->icirlen*a->icirnum; f__cf = 0; f__curunit = 0; f__elist = (cilist *)a; } #ifdef KR_headers integer s_rsli(a) icilist *a; #else integer s_rsli(icilist *a) #endif { f__lioproc = l_read; f__lquit = 0; f__lcount = 0; c_lir(a); f__doend = 0; return(0); } integer e_rsli(Void) { return 0; } #ifdef KR_headers integer s_rsni(a) icilist *a; #else extern int x_rsne(cilist*); integer s_rsni(icilist *a) #endif { extern int nml_read; integer rv; cilist ca; ca.ciend = a->iciend; ca.cierr = a->icierr; ca.cifmt = a->icifmt; c_lir(a); rv = x_rsne(&ca); nml_read = 0; return rv; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/rsne.c000066400000000000000000000265011507764646700221050ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "lio.h" #define MAX_NL_CACHE 3 /* maximum number of namelist hash tables to cache */ #define MAXDIM 20 /* maximum number of subscripts */ struct dimen { ftnlen extent; ftnlen curval; ftnlen delta; ftnlen stride; }; typedef struct dimen dimen; struct hashentry { struct hashentry *next; char *name; Vardesc *vd; }; typedef struct hashentry hashentry; struct hashtab { struct hashtab *next; Namelist *nl; int htsize; hashentry *tab[1]; }; typedef struct hashtab hashtab; static hashtab *nl_cache; static int n_nlcache; static hashentry **zot; static int colonseen; extern ftnlen f__typesize[]; extern flag f__lquit; extern int f__lcount, nml_read; extern int t_getc(Void); #ifdef KR_headers extern char *malloc(), *memset(); #define Const /*nothing*/ #ifdef ungetc static int un_getc(x,f__cf) int x; FILE *f__cf; { return ungetc(x,f__cf); } #else #define un_getc ungetc extern int ungetc(); #endif #else #define Const const #undef abs #undef min #undef max #include "stdlib.h" #include "string.h" #ifdef __cplusplus extern "C" { #endif #ifdef ungetc static int un_getc(int x, FILE *f__cf) { return ungetc(x,f__cf); } #else #define un_getc ungetc extern int ungetc(int, FILE*); /* for systems with a buggy stdio.h */ #endif #endif static Vardesc * #ifdef KR_headers hash(ht, s) hashtab *ht; register char *s; #else hash(hashtab *ht, register char *s) #endif { register int c, x; register hashentry *h; char *s0 = s; for(x = 0; c = *s++; x = x & 0x4000 ? ((x << 1) & 0x7fff) + 1 : x << 1) x += c; for(h = *(zot = ht->tab + x % ht->htsize); h; h = h->next) if (!strcmp(s0, h->name)) return h->vd; return 0; } hashtab * #ifdef KR_headers mk_hashtab(nl) Namelist *nl; #else mk_hashtab(Namelist *nl) #endif { int nht, nv; hashtab *ht; Vardesc *v, **vd, **vde; hashentry *he; hashtab **x, **x0, *y; for(x = &nl_cache; y = *x; x0 = x, x = &y->next) if (nl == y->nl) return y; if (n_nlcache >= MAX_NL_CACHE) { /* discard least recently used namelist hash table */ y = *x0; free((char *)y->next); y->next = 0; } else n_nlcache++; nv = nl->nvars; if (nv >= 0x4000) nht = 0x7fff; else { for(nht = 1; nht < nv; nht <<= 1); nht += nht - 1; } ht = (hashtab *)malloc(sizeof(hashtab) + (nht-1)*sizeof(hashentry *) + nv*sizeof(hashentry)); if (!ht) return 0; he = (hashentry *)&ht->tab[nht]; ht->nl = nl; ht->htsize = nht; ht->next = nl_cache; nl_cache = ht; memset((char *)ht->tab, 0, nht*sizeof(hashentry *)); vd = nl->vars; vde = vd + nv; while(vd < vde) { v = *vd++; if (!hash(ht, v->name)) { he->next = *zot; *zot = he; he->name = v->name; he->vd = v; he++; } } return ht; } static char Alpha[256], Alphanum[256]; static VOID nl_init(Void) { Const char *s; int c; if(!f__init) f_init(); for(s = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; c = *s++; ) Alpha[c] = Alphanum[c] = Alpha[c + 'a' - 'A'] = Alphanum[c + 'a' - 'A'] = c; for(s = "0123456789_"; c = *s++; ) Alphanum[c] = c; } #define GETC(x) (x=(*l_getc)()) #define Ungetc(x,y) (*l_ungetc)(x,y) static int #ifdef KR_headers getname(s, slen) register char *s; int slen; #else getname(register char *s, int slen) #endif { register char *se = s + slen - 1; register int ch; GETC(ch); if (!(*s++ = Alpha[ch & 0xff])) { if (ch != EOF) ch = 115; errfl(f__elist->cierr, ch, "namelist read"); } while(*s = Alphanum[GETC(ch) & 0xff]) if (s < se) s++; if (ch == EOF) err(f__elist->cierr, EOF, "namelist read"); if (ch > ' ') Ungetc(ch,f__cf); return *s = 0; } static int #ifdef KR_headers getnum(chp, val) int *chp; ftnlen *val; #else getnum(int *chp, ftnlen *val) #endif { register int ch, sign; register ftnlen x; while(GETC(ch) <= ' ' && ch >= 0); if (ch == '-') { sign = 1; GETC(ch); } else { sign = 0; if (ch == '+') GETC(ch); } x = ch - '0'; if (x < 0 || x > 9) return 115; while(GETC(ch) >= '0' && ch <= '9') x = 10*x + ch - '0'; while(ch <= ' ' && ch >= 0) GETC(ch); if (ch == EOF) return EOF; *val = sign ? -x : x; *chp = ch; return 0; } static int #ifdef KR_headers getdimen(chp, d, delta, extent, x1) int *chp; dimen *d; ftnlen delta, extent, *x1; #else getdimen(int *chp, dimen *d, ftnlen delta, ftnlen extent, ftnlen *x1) #endif { register int k; ftnlen x2, x3; if (k = getnum(chp, x1)) return k; x3 = 1; if (*chp == ':') { if (k = getnum(chp, &x2)) return k; x2 -= *x1; if (*chp == ':') { if (k = getnum(chp, &x3)) return k; if (!x3) return 123; x2 /= x3; colonseen = 1; } if (x2 < 0 || x2 >= extent) return 123; d->extent = x2 + 1; } else d->extent = 1; d->curval = 0; d->delta = delta; d->stride = x3; return 0; } #ifndef No_Namelist_Questions static Void #ifdef KR_headers print_ne(a) cilist *a; #else print_ne(cilist *a) #endif { flag intext = f__external; int rpsave = f__recpos; FILE *cfsave = f__cf; unit *usave = f__curunit; cilist t; t = *a; t.ciunit = 6; s_wsne(&t); fflush(f__cf); f__external = intext; f__reading = 1; f__recpos = rpsave; f__cf = cfsave; f__curunit = usave; f__elist = a; } #endif static char where0[] = "namelist read start "; int #ifdef KR_headers x_rsne(a) cilist *a; #else x_rsne(cilist *a) #endif { int ch, got1, k, n, nd, quote, readall; Namelist *nl; static char where[] = "namelist read"; char buf[64]; hashtab *ht; Vardesc *v; dimen *dn, *dn0, *dn1; ftnlen *dims, *dims1; ftnlen b, b0, b1, ex, no, nomax, size, span; ftnint no1, no2, type; char *vaddr; long iva, ivae; dimen dimens[MAXDIM], substr; if (!Alpha['a']) nl_init(); f__reading=1; f__formatted=1; got1 = 0; top: for(;;) switch(GETC(ch)) { case EOF: eof: err(a->ciend,(EOF),where0); case '&': case '$': goto have_amp; #ifndef No_Namelist_Questions case '?': print_ne(a); continue; #endif default: if (ch <= ' ' && ch >= 0) continue; #ifndef No_Namelist_Comments while(GETC(ch) != '\n') if (ch == EOF) goto eof; #else errfl(a->cierr, 115, where0); #endif } have_amp: if (ch = getname(buf,sizeof(buf))) return ch; nl = (Namelist *)a->cifmt; if (strcmp(buf, nl->name)) #ifdef No_Bad_Namelist_Skip errfl(a->cierr, 118, where0); #else { fprintf(stderr, "Skipping namelist \"%s\": seeking namelist \"%s\".\n", buf, nl->name); fflush(stderr); for(;;) switch(GETC(ch)) { case EOF: err(a->ciend, EOF, where0); case '/': case '&': case '$': if (f__external) e_rsle(); else z_rnew(); goto top; case '"': case '\'': quote = ch; more_quoted: while(GETC(ch) != quote) if (ch == EOF) err(a->ciend, EOF, where0); if (GETC(ch) == quote) goto more_quoted; Ungetc(ch,f__cf); default: continue; } } #endif ht = mk_hashtab(nl); if (!ht) errfl(f__elist->cierr, 113, where0); for(;;) { for(;;) switch(GETC(ch)) { case EOF: if (got1) return 0; err(a->ciend, EOF, where0); case '/': case '$': case '&': return 0; default: if (ch <= ' ' && ch >= 0 || ch == ',') continue; Ungetc(ch,f__cf); if (ch = getname(buf,sizeof(buf))) return ch; goto havename; } havename: v = hash(ht,buf); if (!v) errfl(a->cierr, 119, where); while(GETC(ch) <= ' ' && ch >= 0); vaddr = v->addr; type = v->type; if (type < 0) { size = -type; type = TYCHAR; } else size = f__typesize[type]; ivae = size; iva = readall = 0; if (ch == '(' /*)*/ ) { dn = dimens; if (!(dims = v->dims)) { if (type != TYCHAR) errfl(a->cierr, 122, where); if (k = getdimen(&ch, dn, (ftnlen)size, (ftnlen)size, &b)) errfl(a->cierr, k, where); if (ch != ')') errfl(a->cierr, 115, where); b1 = dn->extent; if (--b < 0 || b + b1 > size) return 124; iva += b; size = b1; while(GETC(ch) <= ' ' && ch >= 0); goto scalar; } nd = (int)dims[0]; nomax = span = dims[1]; ivae = iva + size*nomax; colonseen = 0; if (k = getdimen(&ch, dn, size, nomax, &b)) errfl(a->cierr, k, where); no = dn->extent; b0 = dims[2]; dims1 = dims += 3; ex = 1; for(n = 1; n++ < nd; dims++) { if (ch != ',') errfl(a->cierr, 115, where); dn1 = dn + 1; span /= *dims; if (k = getdimen(&ch, dn1, dn->delta**dims, span, &b1)) errfl(a->cierr, k, where); ex *= *dims; b += b1*ex; no *= dn1->extent; dn = dn1; } if (ch != ')') errfl(a->cierr, 115, where); readall = 1 - colonseen; b -= b0; if (b < 0 || b >= nomax) errfl(a->cierr, 125, where); iva += size * b; dims = dims1; while(GETC(ch) <= ' ' && ch >= 0); no1 = 1; dn0 = dimens; if (type == TYCHAR && ch == '(' /*)*/) { if (k = getdimen(&ch, &substr, size, size, &b)) errfl(a->cierr, k, where); if (ch != ')') errfl(a->cierr, 115, where); b1 = substr.extent; if (--b < 0 || b + b1 > size) return 124; iva += b; b0 = size; size = b1; while(GETC(ch) <= ' ' && ch >= 0); if (b1 < b0) goto delta_adj; } if (readall) goto delta_adj; for(; dn0 < dn; dn0++) { if (dn0->extent != *dims++ || dn0->stride != 1) break; no1 *= dn0->extent; } if (dn0 == dimens && dimens[0].stride == 1) { no1 = dimens[0].extent; dn0++; } delta_adj: ex = 0; for(dn1 = dn0; dn1 <= dn; dn1++) ex += (dn1->extent-1) * (dn1->delta *= dn1->stride); for(dn1 = dn; dn1 > dn0; dn1--) { ex -= (dn1->extent - 1) * dn1->delta; dn1->delta -= ex; } } else if (dims = v->dims) { no = no1 = dims[1]; ivae = iva + no*size; } else scalar: no = no1 = 1; if (ch != '=') errfl(a->cierr, 115, where); got1 = nml_read = 1; f__lcount = 0; readloop: for(;;) { if (iva >= ivae || iva < 0) { f__lquit = 1; goto mustend; } else if (iva + no1*size > ivae) no1 = (ivae - iva)/size; f__lquit = 0; if (k = l_read(&no1, vaddr + iva, size, type)) return k; if (f__lquit == 1) return 0; if (readall) { iva += dn0->delta; if (f__lcount > 0) { no2 = (ivae - iva)/size; if (no2 > f__lcount) no2 = f__lcount; if (k = l_read(&no2, vaddr + iva, size, type)) return k; iva += no2 * dn0->delta; } } mustend: GETC(ch); if (readall) if (iva >= ivae) readall = 0; else for(;;) { switch(ch) { case ' ': case '\t': case '\n': GETC(ch); continue; } break; } if (ch == '/' || ch == '$' || ch == '&') { f__lquit = 1; return 0; } else if (f__lquit) { while(ch <= ' ' && ch >= 0) GETC(ch); Ungetc(ch,f__cf); if (!Alpha[ch & 0xff] && ch >= 0) errfl(a->cierr, 125, where); break; } Ungetc(ch,f__cf); if (readall && !Alpha[ch & 0xff]) goto readloop; if ((no -= no1) <= 0) break; for(dn1 = dn0; dn1 <= dn; dn1++) { if (++dn1->curval < dn1->extent) { iva += dn1->delta; goto readloop; } dn1->curval = 0; } break; } } } integer #ifdef KR_headers s_rsne(a) cilist *a; #else s_rsne(cilist *a) #endif { extern int l_eof; int n; f__external=1; l_eof = 0; if(n = c_le(a)) return n; if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr,errno,where0); l_getc = t_getc; l_ungetc = un_getc; f__doend = xrd_SL; n = x_rsne(a); nml_read = 0; if (n) return n; return e_rsle(); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/s_cat.c000066400000000000000000000026621507764646700222310ustar00rootroot00000000000000/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the * target of a concatenation to appear on its right-hand side (contrary * to the Fortran 77 Standard, but in accordance with Fortran 90). */ #include "f2c.h" #ifndef NO_OVERWRITE #include "stdio.h" #undef abs #ifdef KR_headers extern char *F77_aloc(); extern void free(); extern void exit_(); #else #undef min #undef max #include "stdlib.h" extern #ifdef __cplusplus "C" #endif char *F77_aloc(ftnlen, const char*); #endif #include "string.h" #endif /* NO_OVERWRITE */ #ifdef __cplusplus extern "C" { #endif VOID #ifdef KR_headers s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnint rnp[], *np; ftnlen ll; #else s_cat(char *lp, char *rpp[], ftnint rnp[], ftnint *np, ftnlen ll) #endif { ftnlen i, nc; char *rp; ftnlen n = *np; #ifndef NO_OVERWRITE ftnlen L, m; char *lp0, *lp1; lp0 = 0; lp1 = lp; L = ll; i = 0; while(i < n) { rp = rpp[i]; m = rnp[i++]; if (rp >= lp1 || rp + m <= lp) { if ((L -= m) <= 0) { n = i; break; } lp1 += m; continue; } lp0 = lp; lp = lp1 = F77_aloc(L = ll, "s_cat"); break; } lp1 = lp; #endif /* NO_OVERWRITE */ for(i = 0 ; i < n ; ++i) { nc = ll; if(rnp[i] < nc) nc = rnp[i]; ll -= nc; rp = rpp[i]; while(--nc >= 0) *lp++ = *rp++; } while(--ll >= 0) *lp++ = ' '; #ifndef NO_OVERWRITE if (lp0) { memcpy(lp0, lp1, L); free(lp1); } #endif } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/s_cmp.c000066400000000000000000000013221507764646700222310ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* compare two strings */ #ifdef KR_headers integer s_cmp(a0, b0, la, lb) char *a0, *b0; ftnlen la, lb; #else integer s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb) #endif { register unsigned char *a, *aend, *b, *bend; a = (unsigned char *)a0; b = (unsigned char *)b0; aend = a + la; bend = b + lb; if(la <= lb) { while(a < aend) if(*a != *b) return( *a - *b ); else { ++a; ++b; } while(b < bend) if(*b != ' ') return( ' ' - *b ); else ++b; } else { while(b < bend) if(*a == *b) { ++a; ++b; } else return( *a - *b ); while(a < aend) if(*a != ' ') return(*a - ' '); else ++a; } return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/s_copy.c000066400000000000000000000020001507764646700224160ustar00rootroot00000000000000/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the * target of an assignment to appear on its right-hand side (contrary * to the Fortran 77 Standard, but in accordance with Fortran 90), * as in a(2:5) = a(4:7) . */ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* assign strings: a = b */ #ifdef KR_headers VOID s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb; #else void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb) #endif { register char *aend, *bend; aend = a + la; if(la <= lb) #ifndef NO_OVERWRITE if (a <= b || a >= b + la) #endif while(a < aend) *a++ = *b++; #ifndef NO_OVERWRITE else for(b += la; a < aend; ) *--aend = *--b; #endif else { bend = b + lb; #ifndef NO_OVERWRITE if (a <= b || a >= bend) #endif while(b < bend) *a++ = *b++; #ifndef NO_OVERWRITE else { a += lb; while(b < bend) *--a = *--bend; a += lb; } #endif while(a < aend) *a++ = ' '; } } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/s_paus.c000066400000000000000000000031211507764646700224210ustar00rootroot00000000000000#include "stdio.h" #include "f2c.h" #define PAUSESIG 15 #include "signal1.h" #ifdef KR_headers #define Void /* void */ #define Int /* int */ #else #define Void void #define Int int #undef abs #undef min #undef max #include "stdlib.h" #ifdef __cplusplus extern "C" { #endif #ifdef __cplusplus extern "C" { #endif extern int getpid(void), isatty(int), pause(void); #endif extern VOID f_exit(Void); #ifndef MSDOS static VOID waitpause(Sigarg) { Use_Sigarg; return; } #endif static VOID #ifdef KR_headers s_1paus(fin) FILE *fin; #else s_1paus(FILE *fin) #endif { fprintf(stderr, "To resume execution, type go. Other input will terminate the job.\n"); fflush(stderr); if( getc(fin)!='g' || getc(fin)!='o' || getc(fin)!='\n' ) { fprintf(stderr, "STOP\n"); #ifdef NO_ONEXIT f_exit(); #endif exit(0); } } int #ifdef KR_headers s_paus(s, n) char *s; ftnlen n; #else s_paus(char *s, ftnlen n) #endif { fprintf(stderr, "PAUSE "); if(n > 0) fprintf(stderr, " %.*s", (int)n, s); fprintf(stderr, " statement executed\n"); if( isatty(fileno(stdin)) ) s_1paus(stdin); else { #ifdef MSDOS FILE *fin; fin = fopen("con", "r"); if (!fin) { fprintf(stderr, "s_paus: can't open con!\n"); fflush(stderr); exit(1); } s_1paus(fin); fclose(fin); #else fprintf(stderr, "To resume execution, execute a kill -%d %d command\n", PAUSESIG, getpid() ); signal1(PAUSESIG, waitpause); fflush(stderr); pause(); #endif } fprintf(stderr, "Execution resumes after PAUSE.\n"); fflush(stderr); return 0; /* NOT REACHED */ #ifdef __cplusplus } #endif } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/s_rnge.c000066400000000000000000000013671507764646700224160ustar00rootroot00000000000000#include "stdio.h" #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* called when a subscript is out of range */ #ifdef KR_headers extern VOID sig_die(); integer s_rnge(varn, offset, procn, line) char *varn, *procn; ftnint offset, line; #else extern VOID sig_die(const char*,int); integer s_rnge(char *varn, ftnint offset, char *procn, ftnint line) #endif { register int i; fprintf(stderr, "Subscript out of range on file line %ld, procedure ", (long)line); while((i = *procn) && i != '_' && i != ' ') putc(*procn++, stderr); fprintf(stderr, ".\nAttempt to access the %ld-th element of variable ", (long)offset+1); while((i = *varn) && i != ' ') putc(*varn++, stderr); sig_die(".", 1); return 0; /* not reached */ } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/s_stop.c000066400000000000000000000013721507764646700224440ustar00rootroot00000000000000#include "stdio.h" #include "f2c.h" #ifdef KR_headers extern void f_exit(); int s_stop(s, n) char *s; ftnlen n; #else #undef abs #undef min #undef max #include "stdlib.h" #ifdef __cplusplus extern "C" { #endif #ifdef __cplusplus extern "C" { #endif void f_exit(void); int s_stop(char *s, ftnlen n) #endif { int i; if(n > 0) { fprintf(stderr, "STOP "); for(i = 0; iciunit]; if(a->ciunit >= MXUNIT || a->ciunit<0) err(a->cierr,101,"startio"); if(p->ufd==NULL && fk_open(SEQ,FMT,a->ciunit)) err(a->cierr,114,"sfe") if(!p->ufmt) err(a->cierr,102,"sfe") return(0); } integer e_wsfe(Void) { int n = en_fio(); f__fmtbuf = NULL; #ifdef ALWAYS_FLUSH if (!n && fflush(f__cf)) err(f__elist->cierr, errno, "write end"); #endif return n; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/sig_die.c000066400000000000000000000012611507764646700225350ustar00rootroot00000000000000#include "stdio.h" #include "signal.h" #ifndef SIGIOT #ifdef SIGABRT #define SIGIOT SIGABRT #endif #endif #ifdef KR_headers void sig_die(s, kill) char *s; int kill; #else #include "stdlib.h" #ifdef __cplusplus extern "C" { #endif #ifdef __cplusplus extern "C" { #endif extern void f_exit(void); void sig_die(const char *s, int kill) #endif { /* print error message, then clear buffers */ fprintf(stderr, "%s\n", s); if(kill) { fflush(stderr); f_exit(); fflush(stderr); /* now get a core */ #ifdef SIGIOT signal(SIGIOT, SIG_DFL); #endif abort(); } else { #ifdef NO_ONEXIT f_exit(); #endif exit(1); } } #ifdef __cplusplus } #endif #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/signal1.h0000066400000000000000000000015121507764646700225540ustar00rootroot00000000000000/* You may need to adjust the definition of signal1 to supply a */ /* cast to the correct argument type. This detail is system- and */ /* compiler-dependent. The #define below assumes signal.h declares */ /* type SIG_PF for the signal function's second argument. */ /* For some C++ compilers, "#define Sigarg_t ..." may be appropriate. */ #include #ifndef Sigret_t #define Sigret_t void #endif #ifndef Sigarg_t #ifdef KR_headers #define Sigarg_t #else #define Sigarg_t int #endif #endif /*Sigarg_t*/ #ifdef USE_SIG_PF /* compile with -DUSE_SIG_PF under IRIX */ #define sig_pf SIG_PF #else typedef Sigret_t (*sig_pf)(Sigarg_t); #endif #define signal1(a,b) signal(a,(sig_pf)b) #ifdef __cplusplus #define Sigarg ... #define Use_Sigarg #else #define Sigarg Int n #define Use_Sigarg n = n /* shut up compiler warning */ #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/signal_.c000066400000000000000000000004531507764646700225500ustar00rootroot00000000000000#include "f2c.h" #include "signal1.h" #ifdef __cplusplus extern "C" { #endif ftnint #ifdef KR_headers signal_(sigp, proc) integer *sigp; sig_pf proc; #else signal_(integer *sigp, sig_pf proc) #endif { int sig; sig = (int)*sigp; return (ftnint)signal(sig, proc); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/signbit.c000066400000000000000000000005121507764646700225670ustar00rootroot00000000000000#include "arith.h" #ifndef Long #define Long long #endif int #ifdef KR_headers signbit_f2c(x) double *x; #else signbit_f2c(double *x) #endif { #ifdef IEEE_MC68k if (*(Long*)x & 0x80000000) return 1; #else #ifdef IEEE_8087 if (((Long*)x)[1] & 0x80000000) return 1; #endif /*IEEE_8087*/ #endif /*IEEE_MC68k*/ return 0; } starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/sue.c000066400000000000000000000035111507764646700217260ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #ifdef __cplusplus extern "C" { #endif extern uiolen f__reclen; OFF_T f__recloc; int #ifdef KR_headers c_sue(a) cilist *a; #else c_sue(cilist *a) #endif { f__external=f__sequential=1; f__formatted=0; f__curunit = &f__units[a->ciunit]; if(a->ciunit >= MXUNIT || a->ciunit < 0) err(a->cierr,101,"startio"); f__elist=a; if(f__curunit->ufd==NULL && fk_open(SEQ,UNF,a->ciunit)) err(a->cierr,114,"sue"); f__cf=f__curunit->ufd; if(f__curunit->ufmt) err(a->cierr,103,"sue") if(!f__curunit->useek) err(a->cierr,103,"sue") return(0); } #ifdef KR_headers integer s_rsue(a) cilist *a; #else integer s_rsue(cilist *a) #endif { int n; if(!f__init) f_init(); f__reading=1; if(n=c_sue(a)) return(n); f__recpos=0; if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr, errno, "read start"); if(fread((char *)&f__reclen,sizeof(uiolen),1,f__cf) != 1) { if(feof(f__cf)) { f__curunit->uend = 1; err(a->ciend, EOF, "start"); } clearerr(f__cf); err(a->cierr, errno, "start"); } return(0); } #ifdef KR_headers integer s_wsue(a) cilist *a; #else integer s_wsue(cilist *a) #endif { int n; if(!f__init) f_init(); if(n=c_sue(a)) return(n); f__reading=0; f__reclen=0; if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr, errno, "write start"); f__recloc=FTELL(f__cf); FSEEK(f__cf,(OFF_T)sizeof(uiolen),SEEK_CUR); return(0); } integer e_wsue(Void) { OFF_T loc; fwrite((char *)&f__reclen,sizeof(uiolen),1,f__cf); #ifdef ALWAYS_FLUSH if (fflush(f__cf)) err(f__elist->cierr, errno, "write end"); #endif loc=FTELL(f__cf); FSEEK(f__cf,f__recloc,SEEK_SET); fwrite((char *)&f__reclen,sizeof(uiolen),1,f__cf); FSEEK(f__cf,loc,SEEK_SET); return(0); } integer e_rsue(Void) { FSEEK(f__cf,(OFF_T)(f__reclen-f__recpos+sizeof(uiolen)),SEEK_CUR); return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/sysdep1.h0000066400000000000000000000022621507764646700226110ustar00rootroot00000000000000#ifndef SYSDEP_H_INCLUDED #define SYSDEP_H_INCLUDED #undef USE_LARGEFILE #ifndef NO_LONG_LONG #ifdef __sun__ #define USE_LARGEFILE #define OFF_T off64_t #endif #ifdef __linux__ #define USE_LARGEFILE #define OFF_T __off64_t #endif #ifdef _AIX43 #define _LARGE_FILES #define _LARGE_FILE_API #define USE_LARGEFILE #endif /*_AIX43*/ #ifdef __hpux #define _FILE64 #define _LARGEFILE64_SOURCE #define USE_LARGEFILE #endif /*__hpux*/ #ifdef __sgi #define USE_LARGEFILE #endif /*__sgi*/ #ifdef __FreeBSD__ #define OFF_T off_t #define FSEEK fseeko #define FTELL ftello #endif #ifdef USE_LARGEFILE #ifndef OFF_T #define OFF_T off64_t #endif #define _LARGEFILE_SOURCE #define _LARGEFILE64_SOURCE #include #include #define FOPEN fopen64 #define FREOPEN freopen64 #define FSEEK fseeko64 #define FSTAT fstat64 #define FTELL ftello64 #define FTRUNCATE ftruncate64 #define STAT stat64 #define STAT_ST stat64 #endif /*USE_LARGEFILE*/ #endif /*NO_LONG_LONG*/ #ifndef NON_UNIX_STDIO #ifndef USE_LARGEFILE #define _INCLUDE_POSIX_SOURCE /* for HP-UX */ #define _INCLUDE_XOPEN_SOURCE /* for HP-UX */ #include "sys/types.h" #include "sys/stat.h" #endif #endif #endif /*SYSDEP_H_INCLUDED*/ starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/system_.c000066400000000000000000000012141507764646700226130ustar00rootroot00000000000000/* f77 interface to system routine */ #include "f2c.h" #ifdef KR_headers extern char *F77_aloc(); integer system_(s, n) register char *s; ftnlen n; #else #undef abs #undef min #undef max #include "stdlib.h" #ifdef __cplusplus extern "C" { #endif extern char *F77_aloc(ftnlen, const char*); integer system_(register char *s, ftnlen n) #endif { char buff0[256], *buff; register char *bp, *blast; integer rv; buff = bp = n < sizeof(buff0) ? buff0 : F77_aloc(n+1, "system_"); blast = bp + n; while(bp < blast && *s) *bp++ = *s++; *bp = 0; rv = system(buff); if (buff != buff0) free(buff); return rv; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/typesize.c000066400000000000000000000006021507764646700230040ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif ftnlen f__typesize[] = { 0, 0, sizeof(shortint), sizeof(integer), sizeof(real), sizeof(doublereal), sizeof(complex), sizeof(doublecomplex), sizeof(logical), sizeof(char), 0, sizeof(integer1), sizeof(logical1), sizeof(shortlogical), #ifdef Allow_TYQUAD sizeof(longint), #endif 0}; #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/uio.c000066400000000000000000000031231507764646700217250ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #ifdef __cplusplus extern "C" { #endif uiolen f__reclen; int #ifdef KR_headers do_us(number,ptr,len) ftnint *number; char *ptr; ftnlen len; #else do_us(ftnint *number, char *ptr, ftnlen len) #endif { if(f__reading) { f__recpos += (int)(*number * len); if(f__recpos>f__reclen) err(f__elist->cierr, 110, "do_us"); if (fread(ptr,(int)len,(int)(*number),f__cf) != *number) err(f__elist->ciend, EOF, "do_us"); return(0); } else { f__reclen += *number * len; (void) fwrite(ptr,(int)len,(int)(*number),f__cf); return(0); } } #ifdef KR_headers integer do_ud(number,ptr,len) ftnint *number; char *ptr; ftnlen len; #else integer do_ud(ftnint *number, char *ptr, ftnlen len) #endif { f__recpos += (int)(*number * len); if(f__recpos > f__curunit->url && f__curunit->url!=1) err(f__elist->cierr,110,"do_ud"); if(f__reading) { #ifdef Pad_UDread #ifdef KR_headers int i; #else size_t i; #endif if (!(i = fread(ptr,(int)len,(int)(*number),f__cf)) && !(f__recpos - *number*len)) err(f__elist->cierr,EOF,"do_ud") if (i < *number) memset(ptr + i*len, 0, (*number - i)*len); return 0; #else if(fread(ptr,(int)len,(int)(*number),f__cf) != *number) err(f__elist->cierr,EOF,"do_ud") else return(0); #endif } (void) fwrite(ptr,(int)len,(int)(*number),f__cf); return(0); } #ifdef KR_headers integer do_uio(number,ptr,len) ftnint *number; char *ptr; ftnlen len; #else integer do_uio(ftnint *number, char *ptr, ftnlen len) #endif { if(f__sequential) return(do_us(number,ptr,len)); else return(do_ud(number,ptr,len)); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/uninit.c000066400000000000000000000166401507764646700224470ustar00rootroot00000000000000#include #include #include "arith.h" #define TYSHORT 2 #define TYLONG 3 #define TYREAL 4 #define TYDREAL 5 #define TYCOMPLEX 6 #define TYDCOMPLEX 7 #define TYINT1 11 #define TYQUAD 14 #ifndef Long #define Long long #endif #ifdef __mips #define RNAN 0xffc00000 #define DNAN0 0xfff80000 #define DNAN1 0 #endif #ifdef _PA_RISC1_1 #define RNAN 0xffc00000 #define DNAN0 0xfff80000 #define DNAN1 0 #endif #ifndef RNAN #define RNAN 0xff800001 #ifdef IEEE_MC68k #define DNAN0 0xfff00000 #define DNAN1 1 #else #define DNAN0 1 #define DNAN1 0xfff00000 #endif #endif /*RNAN*/ #ifdef KR_headers #define Void /*void*/ #define FA7UL (unsigned Long) 0xfa7a7a7aL #else #define Void void #define FA7UL 0xfa7a7a7aUL #endif #ifdef __cplusplus extern "C" { #endif static void ieee0(Void); static unsigned Long rnan = RNAN, dnan0 = DNAN0, dnan1 = DNAN1; double _0 = 0.; void #ifdef KR_headers _uninit_f2c(x, type, len) void *x; int type; long len; #else _uninit_f2c(void *x, int type, long len) #endif { static int first = 1; unsigned Long *lx, *lxe; if (first) { first = 0; ieee0(); } if (len == 1) switch(type) { case TYINT1: *(char*)x = 'Z'; return; case TYSHORT: *(short*)x = 0xfa7a; break; case TYLONG: *(unsigned Long*)x = FA7UL; return; case TYQUAD: case TYCOMPLEX: case TYDCOMPLEX: break; case TYREAL: *(unsigned Long*)x = rnan; return; case TYDREAL: lx = (unsigned Long*)x; lx[0] = dnan0; lx[1] = dnan1; return; default: printf("Surprise type %d in _uninit_f2c\n", type); } switch(type) { case TYINT1: memset(x, 'Z', len); break; case TYSHORT: *(short*)x = 0xfa7a; break; case TYQUAD: len *= 2; /* no break */ case TYLONG: lx = (unsigned Long*)x; lxe = lx + len; while(lx < lxe) *lx++ = FA7UL; break; case TYCOMPLEX: len *= 2; /* no break */ case TYREAL: lx = (unsigned Long*)x; lxe = lx + len; while(lx < lxe) *lx++ = rnan; break; case TYDCOMPLEX: len *= 2; /* no break */ case TYDREAL: lx = (unsigned Long*)x; for(lxe = lx + 2*len; lx < lxe; lx += 2) { lx[0] = dnan0; lx[1] = dnan1; } } } #ifdef __cplusplus } #endif #ifndef MSpc #ifdef MSDOS #define MSpc #else #ifdef _WIN32 #define MSpc #endif #endif #endif #ifdef MSpc #define IEEE0_done #include "float.h" #include "signal.h" static void ieee0(Void) { #ifndef __alpha #ifndef EM_DENORMAL #define EM_DENORMAL _EM_DENORMAL #endif #ifndef EM_UNDERFLOW #define EM_UNDERFLOW _EM_UNDERFLOW #endif #ifndef EM_INEXACT #define EM_INEXACT _EM_INEXACT #endif #ifndef MCW_EM #define MCW_EM _MCW_EM #endif _control87(EM_DENORMAL | EM_UNDERFLOW | EM_INEXACT, MCW_EM); #endif /* With MS VC++, compiling and linking with -Zi will permit */ /* clicking to invoke the MS C++ debugger, which will show */ /* the point of error -- provided SIGFPE is SIG_DFL. */ signal(SIGFPE, SIG_DFL); } #endif /* MSpc */ #ifdef __mips /* must link with -lfpe */ #define IEEE0_done /* code from Eric Grosse */ #include #include #include "/usr/include/sigfpe.h" /* full pathname for lcc -N */ #include "/usr/include/sys/fpu.h" static void #ifdef KR_headers ieeeuserhand(exception, val) unsigned exception[5]; int val[2]; #else ieeeuserhand(unsigned exception[5], int val[2]) #endif { fflush(stdout); fprintf(stderr,"ieee0() aborting because of "); if(exception[0]==_OVERFL) fprintf(stderr,"overflow\n"); else if(exception[0]==_UNDERFL) fprintf(stderr,"underflow\n"); else if(exception[0]==_DIVZERO) fprintf(stderr,"divide by 0\n"); else if(exception[0]==_INVALID) fprintf(stderr,"invalid operation\n"); else fprintf(stderr,"\tunknown reason\n"); fflush(stderr); abort(); } static void #ifdef KR_headers ieeeuserhand2(j) unsigned int **j; #else ieeeuserhand2(unsigned int **j) #endif { fprintf(stderr,"ieee0() aborting because of confusion\n"); abort(); } static void ieee0(Void) { int i; for(i=1; i<=4; i++){ sigfpe_[i].count = 1000; sigfpe_[i].trace = 1; sigfpe_[i].repls = _USER_DETERMINED; } sigfpe_[1].repls = _ZERO; /* underflow */ handle_sigfpes( _ON, _EN_UNDERFL|_EN_OVERFL|_EN_DIVZERO|_EN_INVALID, ieeeuserhand,_ABORT_ON_ERROR,ieeeuserhand2); } #endif /* mips */ #ifdef __linux__ #define IEEE0_done #include "fpu_control.h" #ifdef __alpha__ #ifndef USE_setfpucw #define __setfpucw(x) __fpu_control = (x) #endif #endif #ifndef _FPU_SETCW #undef Can_use__setfpucw #define Can_use__setfpucw #endif static void ieee0(Void) { #if (defined(__mc68000__) || defined(__mc68020__) || defined(mc68020) || defined (__mc68k__)) /* Reported 20010705 by Alan Bain */ /* Note that IEEE 754 IOP (illegal operation) */ /* = Signaling NAN (SNAN) + operation error (OPERR). */ #ifdef Can_use__setfpucw __setfpucw(_FPU_IEEE + _FPU_DOUBLE + _FPU_MASK_OPERR + _FPU_MASK_DZ + _FPU_MASK_SNAN+_FPU_MASK_OVFL); #else __fpu_control = _FPU_IEEE + _FPU_DOUBLE + _FPU_MASK_OPERR + _FPU_MASK_DZ + _FPU_MASK_SNAN+_FPU_MASK_OVFL; _FPU_SETCW(__fpu_control); #endif #elif (defined(__powerpc__)||defined(_ARCH_PPC)||defined(_ARCH_PWR)) /* !__mc68k__ */ /* Reported 20011109 by Alan Bain */ #ifdef Can_use__setfpucw /* The following is NOT a mistake -- the author of the fpu_control.h for the PPC has erroneously defined IEEE mode to turn on exceptions other than Inexact! Start from default then and turn on only the ones which we want*/ __setfpucw(_FPU_DEFAULT + _FPU_MASK_IM+_FPU_MASK_OM+_FPU_MASK_UM); #else /* PPC && !Can_use__setfpucw */ __fpu_control = _FPU_DEFAULT +_FPU_MASK_OM+_FPU_MASK_IM+_FPU_MASK_UM; _FPU_SETCW(__fpu_control); #endif /*Can_use__setfpucw*/ #else /* !(mc68000||powerpc) */ #ifdef _FPU_IEEE #ifndef _FPU_EXTENDED /* e.g., ARM processor under Linux */ #define _FPU_EXTENDED 0 #endif #ifndef _FPU_DOUBLE #define _FPU_DOUBLE 0 #endif #ifdef Can_use__setfpucw /* pre-1997 (?) Linux */ __setfpucw(_FPU_IEEE - _FPU_MASK_IM - _FPU_MASK_ZM - _FPU_MASK_OM); #else #ifdef UNINIT_F2C_PRECISION_53 /* 20051004 */ /* unmask invalid, etc., and change rounding precision to double */ __fpu_control = _FPU_IEEE - _FPU_EXTENDED + _FPU_DOUBLE - _FPU_MASK_IM - _FPU_MASK_ZM - _FPU_MASK_OM; _FPU_SETCW(__fpu_control); #else /* unmask invalid, etc., and keep current rounding precision */ fpu_control_t cw; _FPU_GETCW(cw); cw &= ~(_FPU_MASK_IM | _FPU_MASK_ZM | _FPU_MASK_OM); _FPU_SETCW(cw); #endif #endif #else /* !_FPU_IEEE */ fprintf(stderr, "\n%s\n%s\n%s\n%s\n", "WARNING: _uninit_f2c in libf2c does not know how", "to enable trapping on this system, so f2c's -trapuv", "option will not detect uninitialized variables unless", "you can enable trapping manually."); fflush(stderr); #endif /* _FPU_IEEE */ #endif /* __mc68k__ */ } #endif /* __linux__ */ #ifdef __alpha #ifndef IEEE0_done #define IEEE0_done #include static void ieee0(Void) { ieee_set_fp_control(IEEE_TRAP_ENABLE_INV); } #endif /*IEEE0_done*/ #endif /*__alpha*/ #ifdef __hpux #define IEEE0_done #define _INCLUDE_HPUX_SOURCE #include #ifndef FP_X_INV #include #define fpsetmask fesettrapenable #define FP_X_INV FE_INVALID #endif static void ieee0(Void) { fpsetmask(FP_X_INV); } #endif /*__hpux*/ #ifdef _AIX #define IEEE0_done #include static void ieee0(Void) { fp_enable(TRP_INVALID); fp_trap(FP_TRAP_SYNC); } #endif /*_AIX*/ #ifdef __sun #define IEEE0_done #include static void ieee0(Void) { fpsetmask(FP_X_INV); } #endif /*__sparc*/ #ifndef IEEE0_done static void ieee0(Void) {} #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/util.c000066400000000000000000000017141507764646700221120ustar00rootroot00000000000000#include "sysdep1.h" /* here to get stat64 on some badly designed Linux systems */ #include "f2c.h" #include "fio.h" #ifdef __cplusplus extern "C" { #endif VOID #ifdef KR_headers #define Const /*nothing*/ g_char(a,alen,b) char *a,*b; ftnlen alen; #else #define Const const g_char(const char *a, ftnlen alen, char *b) #endif { Const char *x = a + alen; char *y = b + alen; for(;; y--) { if (x <= a) { *b = 0; return; } if (*--x != ' ') break; } *y-- = 0; do *y-- = *x; while(x-- > a); } VOID #ifdef KR_headers b_char(a,b,blen) char *a,*b; ftnlen blen; #else b_char(const char *a, char *b, ftnlen blen) #endif { int i; for(i=0;i= d + 2 || f__scale <= -d) goto nogood; } if(f__scale <= 0) --d; if (len == sizeof(real)) dd = p->pf; else dd = p->pd; if (dd < 0.) { signspace = sign = 1; dd = -dd; } else { sign = 0; signspace = (int)f__cplus; #ifndef VAX if (!dd) { #ifdef SIGNED_ZEROS if (signbit_f2c(&dd)) signspace = sign = 1; #endif dd = 0.; /* avoid -0 */ } #endif } delta = w - (2 /* for the . and the d adjustment above */ + 2 /* for the E+ */ + signspace + d + e); #ifdef WANT_LEAD_0 if (f__scale <= 0 && delta > 0) { delta--; insert0 = 1; } else #endif if (delta < 0) { nogood: while(--w >= 0) PUT('*'); return(0); } if (f__scale < 0) d += f__scale; if (d > FMAX) { d1 = d - FMAX; d = FMAX; } else d1 = 0; sprintf(buf,"%#.*E", d, dd); #ifndef VAX /* check for NaN, Infinity */ if (!isdigit(buf[0])) { switch(buf[0]) { case 'n': case 'N': signspace = 0; /* no sign for NaNs */ } delta = w - strlen(buf) - signspace; if (delta < 0) goto nogood; while(--delta >= 0) PUT(' '); if (signspace) PUT(sign ? '-' : '+'); for(s = buf; *s; s++) PUT(*s); return 0; } #endif se = buf + d + 3; #ifdef GOOD_SPRINTF_EXPONENT /* When possible, exponent has 2 digits. */ if (f__scale != 1 && dd) sprintf(se, "%+.2d", atoi(se) + 1 - f__scale); #else if (dd) sprintf(se, "%+.2d", atoi(se) + 1 - f__scale); else strcpy(se, "+00"); #endif s = ++se; if (e < 2) { if (*s != '0') goto nogood; } #ifndef VAX /* accommodate 3 significant digits in exponent */ if (s[2]) { #ifdef Pedantic if (!e0 && !s[3]) for(s -= 2, e1 = 2; s[0] = s[1]; s++); /* Pedantic gives the behavior that Fortran 77 specifies, */ /* i.e., requires that E be specified for exponent fields */ /* of more than 3 digits. With Pedantic undefined, we get */ /* the behavior that Cray displays -- you get a bigger */ /* exponent field if it fits. */ #else if (!e0) { for(s -= 2, e1 = 2; s[0] = s[1]; s++) #ifdef CRAY delta--; if ((delta += 4) < 0) goto nogood #endif ; } #endif else if (e0 >= 0) goto shift; else e1 = e; } else shift: #endif for(s += 2, e1 = 2; *s; ++e1, ++s) if (e1 >= e) goto nogood; while(--delta >= 0) PUT(' '); if (signspace) PUT(sign ? '-' : '+'); s = buf; i = f__scale; if (f__scale <= 0) { #ifdef WANT_LEAD_0 if (insert0) PUT('0'); #endif PUT('.'); for(; i < 0; ++i) PUT('0'); PUT(*s); s += 2; } else if (f__scale > 1) { PUT(*s); s += 2; while(--i > 0) PUT(*s++); PUT('.'); } if (d1) { se -= 2; while(s < se) PUT(*s++); se += 2; do PUT('0'); while(--d1 > 0); } while(s < se) PUT(*s++); if (e < 2) PUT(s[1]); else { while(++e1 <= e) PUT('0'); while(*s) PUT(*s++); } return 0; } int #ifdef KR_headers wrt_F(p,w,d,len) ufloat *p; ftnlen len; #else wrt_F(ufloat *p, int w, int d, ftnlen len) #endif { int d1, sign, n; double x; char *b, buf[MAXINTDIGS+MAXFRACDIGS+4], *s; x= (len==sizeof(real)?p->pf:p->pd); if (d < MAXFRACDIGS) d1 = 0; else { d1 = d - MAXFRACDIGS; d = MAXFRACDIGS; } if (x < 0.) { x = -x; sign = 1; } else { sign = 0; #ifndef VAX if (!x) { #ifdef SIGNED_ZEROS if (signbit_f2c(&x)) sign = 2; #endif x = 0.; } #endif } if (n = f__scale) if (n > 0) do x *= 10.; while(--n > 0); else do x *= 0.1; while(++n < 0); #ifdef USE_STRLEN sprintf(b = buf, "%#.*f", d, x); n = strlen(b) + d1; #else n = sprintf(b = buf, "%#.*f", d, x) + d1; #endif #ifndef WANT_LEAD_0 if (buf[0] == '0' && d) { ++b; --n; } #endif if (sign == 1) { /* check for all zeros */ for(s = b;;) { while(*s == '0') s++; switch(*s) { case '.': s++; continue; case 0: sign = 0; } break; } } if (sign || f__cplus) ++n; if (n > w) { #ifdef WANT_LEAD_0 if (buf[0] == '0' && --n == w) ++b; else #endif { while(--w >= 0) PUT('*'); return 0; } } for(w -= n; --w >= 0; ) PUT(' '); if (sign) PUT('-'); else if (f__cplus) PUT('+'); while(n = *b++) PUT(n); while(--d1 >= 0) PUT('0'); return 0; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/wrtfmt.c000066400000000000000000000165221507764646700224630ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "fmt.h" #ifdef __cplusplus extern "C" { #endif extern icilist *f__svic; extern char *f__icptr; static int mv_cur(Void) /* shouldn't use fseek because it insists on calling fflush */ /* instead we know too much about stdio */ { int cursor = f__cursor; f__cursor = 0; if(f__external == 0) { if(cursor < 0) { if(f__hiwater < f__recpos) f__hiwater = f__recpos; f__recpos += cursor; f__icptr += cursor; if(f__recpos < 0) err(f__elist->cierr, 110, "left off"); } else if(cursor > 0) { if(f__recpos + cursor >= f__svic->icirlen) err(f__elist->cierr, 110, "recend"); if(f__hiwater <= f__recpos) for(; cursor > 0; cursor--) (*f__putn)(' '); else if(f__hiwater <= f__recpos + cursor) { cursor -= f__hiwater - f__recpos; f__icptr += f__hiwater - f__recpos; f__recpos = f__hiwater; for(; cursor > 0; cursor--) (*f__putn)(' '); } else { f__icptr += cursor; f__recpos += cursor; } } return(0); } if (cursor > 0) { if(f__hiwater <= f__recpos) for(;cursor>0;cursor--) (*f__putn)(' '); else if(f__hiwater <= f__recpos + cursor) { cursor -= f__hiwater - f__recpos; f__recpos = f__hiwater; for(; cursor > 0; cursor--) (*f__putn)(' '); } else { f__recpos += cursor; } } else if (cursor < 0) { if(cursor + f__recpos < 0) err(f__elist->cierr,110,"left off"); if(f__hiwater < f__recpos) f__hiwater = f__recpos; f__recpos += cursor; } return(0); } static int #ifdef KR_headers wrt_Z(n,w,minlen,len) Uint *n; int w, minlen; ftnlen len; #else wrt_Z(Uint *n, int w, int minlen, ftnlen len) #endif { register char *s, *se; register int i, w1; static int one = 1; static char hex[] = "0123456789ABCDEF"; s = (char *)n; --len; if (*(char *)&one) { /* little endian */ se = s; s += len; i = -1; } else { se = s + len; i = 1; } for(;; s += i) if (s == se || *s) break; w1 = (i*(se-s) << 1) + 1; if (*s & 0xf0) w1++; if (w1 > w) for(i = 0; i < w; i++) (*f__putn)('*'); else { if ((minlen -= w1) > 0) w1 += minlen; while(--w >= w1) (*f__putn)(' '); while(--minlen >= 0) (*f__putn)('0'); if (!(*s & 0xf0)) { (*f__putn)(hex[*s & 0xf]); if (s == se) return 0; s += i; } for(;; s += i) { (*f__putn)(hex[*s >> 4 & 0xf]); (*f__putn)(hex[*s & 0xf]); if (s == se) break; } } return 0; } static int #ifdef KR_headers wrt_I(n,w,len, base) Uint *n; ftnlen len; register int base; #else wrt_I(Uint *n, int w, ftnlen len, register int base) #endif { int ndigit,sign,spare,i; longint x; char *ans; if(len==sizeof(integer)) x=n->il; else if(len == sizeof(char)) x = n->ic; #ifdef Allow_TYQUAD else if (len == sizeof(longint)) x = n->ili; #endif else x=n->is; ans=f__icvt(x,&ndigit,&sign, base); spare=w-ndigit; if(sign || f__cplus) spare--; if(spare<0) for(i=0;iil; else if(len == sizeof(char)) x = n->ic; #ifdef Allow_TYQUAD else if (len == sizeof(longint)) x = n->ili; #endif else x=n->is; ans=f__icvt(x,&ndigit,&sign, base); if(sign || f__cplus) xsign=1; else xsign=0; if(ndigit+xsign>w || m+xsign>w) { for(i=0;i=m) spare=w-ndigit-xsign; else spare=w-m-xsign; for(i=0;iil; else if(sz == sizeof(char)) x = n->ic; else x=n->is; for(i=0;i 0) (*f__putn)(*p++); return(0); } static int #ifdef KR_headers wrt_AW(p,w,len) char * p; ftnlen len; #else wrt_AW(char * p, int w, ftnlen len) #endif { while(w>len) { w--; (*f__putn)(' '); } while(w-- > 0) (*f__putn)(*p++); return(0); } static int #ifdef KR_headers wrt_G(p,w,d,e,len) ufloat *p; ftnlen len; #else wrt_G(ufloat *p, int w, int d, int e, ftnlen len) #endif { double up = 1,x; int i=0,oldscale,n,j; x = len==sizeof(real)?p->pf:p->pd; if(x < 0 ) x = -x; if(x<.1) { if (x != 0.) return(wrt_E(p,w,d,e,len)); i = 1; goto have_i; } for(;i<=d;i++,up*=10) { if(x>=up) continue; have_i: oldscale = f__scale; f__scale = 0; if(e==0) n=4; else n=e+2; i=wrt_F(p,w-n,d-i,len); for(j=0;jop) { default: fprintf(stderr,"w_ed, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case I: return(wrt_I((Uint *)ptr,p->p1,len, 10)); case IM: return(wrt_IM((Uint *)ptr,p->p1,p->p2.i[0],len,10)); /* O and OM don't work right for character, double, complex, */ /* or doublecomplex, and they differ from Fortran 90 in */ /* showing a minus sign for negative values. */ case O: return(wrt_I((Uint *)ptr, p->p1, len, 8)); case OM: return(wrt_IM((Uint *)ptr,p->p1,p->p2.i[0],len,8)); case L: return(wrt_L((Uint *)ptr,p->p1, len)); case A: return(wrt_A(ptr,len)); case AW: return(wrt_AW(ptr,p->p1,len)); case D: case E: case EE: return(wrt_E((ufloat *)ptr,p->p1,p->p2.i[0],p->p2.i[1],len)); case G: case GE: return(wrt_G((ufloat *)ptr,p->p1,p->p2.i[0],p->p2.i[1],len)); case F: return(wrt_F((ufloat *)ptr,p->p1,p->p2.i[0],len)); /* Z and ZM assume 8-bit bytes. */ case Z: return(wrt_Z((Uint *)ptr,p->p1,0,len)); case ZM: return(wrt_Z((Uint *)ptr,p->p1,p->p2.i[0],len)); } } int #ifdef KR_headers w_ned(p) struct syl *p; #else w_ned(struct syl *p) #endif { switch(p->op) { default: fprintf(stderr,"w_ned, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case SLASH: return((*f__donewrec)()); case T: f__cursor = p->p1-f__recpos - 1; return(1); case TL: f__cursor -= p->p1; if(f__cursor < -f__recpos) /* TL1000, 1X */ f__cursor = -f__recpos; return(1); case TR: case X: f__cursor += p->p1; return(1); case APOS: return(wrt_AP(p->p2.s)); case H: return(wrt_H(p->p1,p->p2.s)); } } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/wsfe.c000066400000000000000000000024001507764646700220720ustar00rootroot00000000000000/*write sequential formatted external*/ #include "f2c.h" #include "fio.h" #include "fmt.h" #ifdef __cplusplus extern "C" { #endif int x_wSL(Void) { int n = f__putbuf('\n'); f__hiwater = f__recpos = f__cursor = 0; return(n == 0); } static int xw_end(Void) { int n; if(f__nonl) { f__putbuf(n = 0); fflush(f__cf); } else n = f__putbuf('\n'); f__hiwater = f__recpos = f__cursor = 0; return n; } static int xw_rev(Void) { int n = 0; if(f__workdone) { n = f__putbuf('\n'); f__workdone = 0; } f__hiwater = f__recpos = f__cursor = 0; return n; } #ifdef KR_headers integer s_wsfe(a) cilist *a; /*start*/ #else integer s_wsfe(cilist *a) /*start*/ #endif { int n; if(!f__init) f_init(); f__reading=0; f__sequential=1; f__formatted=1; f__external=1; if(n=c_sfe(a)) return(n); f__elist=a; f__hiwater = f__cursor=f__recpos=0; f__nonl = 0; f__scale=0; f__fmtbuf=a->cifmt; f__cf=f__curunit->ufd; if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); f__putn= x_putc; f__doed= w_ed; f__doned= w_ned; f__doend=xw_end; f__dorevert=xw_rev; f__donewrec=x_wSL; fmt_bg(); f__cplus=0; f__cblank=f__curunit->ublnk; if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr,errno,"write start"); return(0); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/wsle.c000066400000000000000000000012711507764646700221050ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "fmt.h" #include "lio.h" #include "string.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers integer s_wsle(a) cilist *a; #else integer s_wsle(cilist *a) #endif { int n; if(n=c_le(a)) return(n); f__reading=0; f__external=1; f__formatted=1; f__putn = x_putc; f__lioproc = l_write; L_len = LINE; f__donewrec = x_wSL; if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr, errno, "list output start"); return(0); } integer e_wsle(Void) { int n = f__putbuf('\n'); f__recpos=0; #ifdef ALWAYS_FLUSH if (!n && fflush(f__cf)) err(f__elist->cierr, errno, "write end"); #endif return(n); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/wsne.c000066400000000000000000000007371507764646700221150ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "lio.h" #ifdef __cplusplus extern "C" { #endif integer #ifdef KR_headers s_wsne(a) cilist *a; #else s_wsne(cilist *a) #endif { int n; if(n=c_le(a)) return(n); f__reading=0; f__external=1; f__formatted=1; f__putn = x_putc; L_len = LINE; f__donewrec = x_wSL; if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr, errno, "namelist output start"); x_wsne(a); return e_wsle(); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/xwsne.c000066400000000000000000000022261507764646700223000ustar00rootroot00000000000000#include "f2c.h" #include "fio.h" #include "lio.h" #include "fmt.h" extern int f__Aquote; static VOID nl_donewrec(Void) { (*f__donewrec)(); PUT(' '); } #ifdef KR_headers x_wsne(a) cilist *a; #else #include "string.h" #ifdef __cplusplus extern "C" { #endif VOID x_wsne(cilist *a) #endif { Namelist *nl; char *s; Vardesc *v, **vd, **vde; ftnint number, type; ftnlen *dims; ftnlen size; extern ftnlen f__typesize[]; nl = (Namelist *)a->cifmt; PUT('&'); for(s = nl->name; *s; s++) PUT(*s); PUT(' '); f__Aquote = 1; vd = nl->vars; vde = vd + nl->nvars; while(vd < vde) { v = *vd++; s = v->name; #ifdef No_Extra_Namelist_Newlines if (f__recpos+strlen(s)+2 >= L_len) #endif nl_donewrec(); while(*s) PUT(*s++); PUT(' '); PUT('='); number = (dims = v->dims) ? dims[1] : 1; type = v->type; if (type < 0) { size = -type; type = TYCHAR; } else size = f__typesize[type]; l_write(&number, v->addr, size, type); if (vd < vde) { if (f__recpos+2 >= L_len) nl_donewrec(); PUT(','); PUT(' '); } else if (f__recpos+1 >= L_len) nl_donewrec(); } f__Aquote = 0; PUT('/'); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/z_abs.c000066400000000000000000000004141507764646700222270ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers double f__cabs(); double z_abs(z) doublecomplex *z; #else double f__cabs(double, double); double z_abs(doublecomplex *z) #endif { return( f__cabs( z->r, z->i ) ); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/z_cos.c000066400000000000000000000005531507764646700222520ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double sin(), cos(), sinh(), cosh(); VOID z_cos(r, z) doublecomplex *r, *z; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif void z_cos(doublecomplex *r, doublecomplex *z) #endif { double zi = z->i, zr = z->r; r->r = cos(zr) * cosh(zi); r->i = - sin(zr) * sinh(zi); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/z_div.c000066400000000000000000000016211507764646700222450ustar00rootroot00000000000000#include "f2c.h" #ifdef __cplusplus extern "C" { #endif #ifdef KR_headers extern VOID sig_die(); VOID z_div(c, a, b) doublecomplex *a, *b, *c; #else extern void sig_die(const char*, int); void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) #endif { double ratio, den; double abr, abi, cr; if( (abr = b->r) < 0.) abr = - abr; if( (abi = b->i) < 0.) abi = - abi; if( abr <= abi ) { if(abi == 0) { #ifdef IEEE_COMPLEX_DIVIDE if (a->i != 0 || a->r != 0) abi = 1.; c->i = c->r = abi / abr; return; #else sig_die("complex division by zero", 1); #endif } ratio = b->r / b->i ; den = b->i * (1 + ratio*ratio); cr = (a->r*ratio + a->i) / den; c->i = (a->i*ratio - a->r) / den; } else { ratio = b->i / b->r ; den = b->r * (1 + ratio*ratio); cr = (a->r + a->i*ratio) / den; c->i = (a->i - a->r*ratio) / den; } c->r = cr; } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/z_exp.c000066400000000000000000000005451507764646700222630ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double exp(), cos(), sin(); VOID z_exp(r, z) doublecomplex *r, *z; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif void z_exp(doublecomplex *r, doublecomplex *z) #endif { double expx, zi = z->i; expx = exp(z->r); r->r = expx * cos(zi); r->i = expx * sin(zi); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/z_log.c000066400000000000000000000052511507764646700222470ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double log(), f__cabs(), atan2(); #define ANSI(x) () #else #define ANSI(x) x #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif extern double f__cabs(double, double); #endif #ifndef NO_DOUBLE_EXTENDED #ifndef GCC_COMPARE_BUG_FIXED #ifndef Pre20000310 #ifdef Comment Some versions of gcc, such as 2.95.3 and 3.0.4, are buggy under -O2 or -O3: on IA32 (Intel 80x87) systems, they may do comparisons on values computed in extended-precision registers. This can lead to the test "s > s0" that was used below being carried out incorrectly. The fix below cannot be spoiled by overzealous optimization, since the compiler cannot know whether gcc_bug_bypass_diff_F2C will be nonzero. (We expect it always to be zero. The weird name is unlikely to collide with anything.) An example (provided by Ulrich Jakobus) where the bug fix matters is double complex a, b a = (.1099557428756427618354862829619, .9857360542953131909982289471372) b = log(a) An alternative to the fix below would be to use 53-bit rounding precision, but the means of specifying this 80x87 feature are highly unportable. #endif /*Comment*/ #define BYPASS_GCC_COMPARE_BUG double (*gcc_bug_bypass_diff_F2C) ANSI((double*,double*)); static double #ifdef KR_headers diff1(a,b) double *a, *b; #else diff1(double *a, double *b) #endif { return *a - *b; } #endif /*Pre20000310*/ #endif /*GCC_COMPARE_BUG_FIXED*/ #endif /*NO_DOUBLE_EXTENDED*/ #ifdef KR_headers VOID z_log(r, z) doublecomplex *r, *z; #else void z_log(doublecomplex *r, doublecomplex *z) #endif { double s, s0, t, t2, u, v; double zi = z->i, zr = z->r; #ifdef BYPASS_GCC_COMPARE_BUG double (*diff) ANSI((double*,double*)); #endif r->i = atan2(zi, zr); #ifdef Pre20000310 r->r = log( f__cabs( zr, zi ) ); #else if (zi < 0) zi = -zi; if (zr < 0) zr = -zr; if (zr < zi) { t = zi; zi = zr; zr = t; } t = zi/zr; s = zr * sqrt(1 + t*t); /* now s = f__cabs(zi,zr), and zr = |zr| >= |zi| = zi */ if ((t = s - 1) < 0) t = -t; if (t > .01) r->r = log(s); else { #ifdef Comment log(1+x) = x - x^2/2 + x^3/3 - x^4/4 + - ... = x(1 - x/2 + x^2/3 -+...) [sqrt(y^2 + z^2) - 1] * [sqrt(y^2 + z^2) + 1] = y^2 + z^2 - 1, so sqrt(y^2 + z^2) - 1 = (y^2 + z^2 - 1) / [sqrt(y^2 + z^2) + 1] #endif /*Comment*/ #ifdef BYPASS_GCC_COMPARE_BUG if (!(diff = gcc_bug_bypass_diff_F2C)) diff = diff1; #endif t = ((zr*zr - 1.) + zi*zi) / (s + 1); t2 = t*t; s = 1. - 0.5*t; u = v = 1; do { s0 = s; u *= t2; v += 2; s += u/v - t*u/(v+1); } #ifdef BYPASS_GCC_COMPARE_BUG while(s - s0 > 1e-18 || (*diff)(&s,&s0) > 0.); #else while(s > s0); #endif r->r = s*t; } #endif } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/z_sin.c000066400000000000000000000005471507764646700222620ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double sin(), cos(), sinh(), cosh(); VOID z_sin(r, z) doublecomplex *r, *z; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif void z_sin(doublecomplex *r, doublecomplex *z) #endif { double zi = z->i, zr = z->r; r->r = sin(zr) * cosh(zi); r->i = cos(zr) * sinh(zi); } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/F2CLIBS/libf2c/z_sqrt.c000066400000000000000000000011051507764646700224510ustar00rootroot00000000000000#include "f2c.h" #ifdef KR_headers double sqrt(), f__cabs(); VOID z_sqrt(r, z) doublecomplex *r, *z; #else #undef abs #include "math.h" #ifdef __cplusplus extern "C" { #endif extern double f__cabs(double, double); void z_sqrt(doublecomplex *r, doublecomplex *z) #endif { double mag, zi = z->i, zr = z->r; if( (mag = f__cabs(zr, zi)) == 0.) r->r = r->i = 0.; else if(zr > 0) { r->r = sqrt(0.5 * (mag + zr) ); r->i = zi / r->r / 2; } else { r->i = sqrt(0.5 * (mag - zr) ); if(zi < 0) r->i = - r->i; r->r = zi / r->i / 2; } } #ifdef __cplusplus } #endif starpu-1.4.9+dfsg/min-dgels/base/INCLUDE/000077500000000000000000000000001507764646700176645ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/INCLUDE/blaswrap.h000066400000000000000000000001741507764646700216520ustar00rootroot00000000000000/* CLAPACK 3.0 BLAS wrapper macros * Feb 5, 2000 */ #ifndef __BLASWRAP_H #define __BLASWRAP_H #endif /* __BLASWRAP_H */ starpu-1.4.9+dfsg/min-dgels/base/INCLUDE/clapack.h000066400000000000000000012720571507764646700214510ustar00rootroot00000000000000/* header file for clapack 3.2.1 */ #ifndef __CLAPACK_H #define __CLAPACK_H #ifdef __cplusplus extern "C" { #endif /* Subroutine */ int _starpu_caxpy_(integer *n, complex *ca, complex *cx, integer * incx, complex *cy, integer *incy); /* Subroutine */ int _starpu_ccopy_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy); /* Complex */ VOID _starpu_cdotc_(complex * ret_val, integer *n, complex *cx, integer *incx, complex *cy, integer *incy); /* Complex */ VOID _starpu_cdotu_(complex * ret_val, integer *n, complex *cx, integer *incx, complex *cy, integer *incy); /* Subroutine */ int _starpu_cgbmv_(char *trans, integer *m, integer *n, integer *kl, integer *ku, complex *alpha, complex *a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, integer *incy); /* Subroutine */ int _starpu_cgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_cgemv_(char *trans, integer *m, integer *n, complex * alpha, complex *a, integer *lda, complex *x, integer *incx, complex * beta, complex *y, integer *incy); /* Subroutine */ int _starpu_cgerc_(integer *m, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); /* Subroutine */ int _starpu_cgeru_(integer *m, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); /* Subroutine */ int _starpu_chbmv_(char *uplo, integer *n, integer *k, complex * alpha, complex *a, integer *lda, complex *x, integer *incx, complex * beta, complex *y, integer *incy); /* Subroutine */ int _starpu_chemm_(char *side, char *uplo, integer *m, integer *n, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_chemv_(char *uplo, integer *n, complex *alpha, complex * a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, integer *incy); /* Subroutine */ int _starpu_cher_(char *uplo, integer *n, real *alpha, complex *x, integer *incx, complex *a, integer *lda); /* Subroutine */ int _starpu_cher2_(char *uplo, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *a, integer *lda); /* Subroutine */ int _starpu_cher2k_(char *uplo, char *trans, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, real *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_cherk_(char *uplo, char *trans, integer *n, integer *k, real *alpha, complex *a, integer *lda, real *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_chpmv_(char *uplo, integer *n, complex *alpha, complex * ap, complex *x, integer *incx, complex *beta, complex *y, integer * incy); /* Subroutine */ int _starpu_chpr_(char *uplo, integer *n, real *alpha, complex *x, integer *incx, complex *ap); /* Subroutine */ int _starpu_chpr2_(char *uplo, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *ap); /* Subroutine */ int _starpu_crotg_(complex *ca, complex *cb, real *c__, complex *s); /* Subroutine */ int _starpu_cscal_(integer *n, complex *ca, complex *cx, integer * incx); /* Subroutine */ int _starpu__starpu_csrot_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy, real *c__, real *s); /* Subroutine */ int _starpu_csscal_(integer *n, real *sa, complex *cx, integer *incx); /* Subroutine */ int _starpu_cswap_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy); /* Subroutine */ int _starpu_csymm_(char *side, char *uplo, integer *m, integer *n, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_csyr2k_(char *uplo, char *trans, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_csyrk_(char *uplo, char *trans, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *beta, complex *c__, integer *ldc); /* Subroutine */ int _starpu_ctbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, complex *a, integer *lda, complex *x, integer *incx); /* Subroutine */ int _starpu_ctbsv_(char *uplo, char *trans, char *diag, integer *n, integer *k, complex *a, integer *lda, complex *x, integer *incx); /* Subroutine */ int _starpu_ctpmv_(char *uplo, char *trans, char *diag, integer *n, complex *ap, complex *x, integer *incx); /* Subroutine */ int _starpu_ctpsv_(char *uplo, char *trans, char *diag, integer *n, complex *ap, complex *x, integer *incx); /* Subroutine */ int _starpu_ctrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb); /* Subroutine */ int _starpu_ctrmv_(char *uplo, char *trans, char *diag, integer *n, complex *a, integer *lda, complex *x, integer *incx); /* Subroutine */ int _starpu_ctrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb); /* Subroutine */ int _starpu_ctrsv_(char *uplo, char *trans, char *diag, integer *n, complex *a, integer *lda, complex *x, integer *incx); doublereal _starpu_dasum_(integer *n, doublereal *dx, integer *incx); /* Subroutine */ int _starpu_daxpy_(integer *n, doublereal *da, doublereal *dx, integer *incx, doublereal *dy, integer *incy); doublereal _starpu_dcabs1_(doublecomplex *z__); /* Subroutine */ int _starpu_dcopy_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy); doublereal _starpu_ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy); /* Subroutine */ int _starpu_dgbmv_(char *trans, integer *m, integer *n, integer *kl, integer *ku, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); /* Subroutine */ int _starpu_dgemv_(char *trans, integer *m, integer *n, doublereal * alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dger_(integer *m, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *a, integer *lda); doublereal _starpu_dnrm2_(integer *n, doublereal *x, integer *incx); /* Subroutine */ int _starpu_drot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy, doublereal *c__, doublereal *s); /* Subroutine */ int _starpu_drotg_(doublereal *da, doublereal *db, doublereal *c__, doublereal *s); /* Subroutine */ int _starpu_drotm_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy, doublereal *dparam); /* Subroutine */ int _starpu_drotmg_(doublereal *dd1, doublereal *dd2, doublereal * dx1, doublereal *dy1, doublereal *dparam); /* Subroutine */ int _starpu_dsbmv_(char *uplo, integer *n, integer *k, doublereal * alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dscal_(integer *n, doublereal *da, doublereal *dx, integer *incx); doublereal _starpu_dsdot_(integer *n, real *sx, integer *incx, real *sy, integer * incy); /* Subroutine */ int _starpu_dspmv_(char *uplo, integer *n, doublereal *alpha, doublereal *ap, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dspr_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *ap); /* Subroutine */ int _starpu_dspr2_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *ap); /* Subroutine */ int _starpu_dswap_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy); /* Subroutine */ int _starpu_dsymm_(char *side, char *uplo, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); /* Subroutine */ int _starpu_dsymv_(char *uplo, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); /* Subroutine */ int _starpu_dsyr_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *a, integer *lda); /* Subroutine */ int _starpu_dsyr2_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *a, integer *lda); /* Subroutine */ int _starpu_dsyr2k_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc); /* Subroutine */ int _starpu_dsyrk_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, doublereal *c__, integer *ldc); /* Subroutine */ int _starpu_dtbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtbsv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtpmv_(char *uplo, char *trans, char *diag, integer *n, doublereal *ap, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtpsv_(char *uplo, char *trans, char *diag, integer *n, doublereal *ap, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dtrmv_(char *uplo, char *trans, char *diag, integer *n, doublereal *a, integer *lda, doublereal *x, integer *incx); /* Subroutine */ int _starpu_dtrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dtrsv_(char *uplo, char *trans, char *diag, integer *n, doublereal *a, integer *lda, doublereal *x, integer *incx); doublereal _starpu_dzasum_(integer *n, doublecomplex *zx, integer *incx); doublereal _starpu_dznrm2_(integer *n, doublecomplex *x, integer *incx); integer _starpu_icamax_(integer *n, complex *cx, integer *incx); integer _starpu_idamax_(integer *n, doublereal *dx, integer *incx); integer _starpu_isamax_(integer *n, real *sx, integer *incx); integer _starpu_izamax_(integer *n, doublecomplex *zx, integer *incx); logical _starpu_lsame_(char *ca, char *cb); doublereal _starpu_sasum_(integer *n, real *sx, integer *incx); /* Subroutine */ int _starpu_saxpy_(integer *n, real *sa, real *sx, integer *incx, real *sy, integer *incy); doublereal _starpu_scabs1_(complex *z__); doublereal _starpu_scasum_(integer *n, complex *cx, integer *incx); doublereal _starpu_scnrm2_(integer *n, complex *x, integer *incx); /* Subroutine */ int _starpu_scopy_(integer *n, real *sx, integer *incx, real *sy, integer *incy); doublereal _starpu_sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy); doublereal _starpu_sdsdot_(integer *n, real *sb, real *sx, integer *incx, real *sy, integer *incy); /* Subroutine */ int _starpu_sgbmv_(char *trans, integer *m, integer *n, integer *kl, integer *ku, real *alpha, real *a, integer *lda, real *x, integer * incx, real *beta, real *y, integer *incy); /* Subroutine */ int _starpu_sgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * ldb, real *beta, real *c__, integer *ldc); /* Subroutine */ int _starpu_sgemv_(char *trans, integer *m, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy); /* Subroutine */ int _starpu_sger_(integer *m, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda); doublereal _starpu_snrm2_(integer *n, real *x, integer *incx); /* Subroutine */ int _starpu_srot_(integer *n, real *sx, integer *incx, real *sy, integer *incy, real *c__, real *s); /* Subroutine */ int _starpu_srotg_(real *sa, real *sb, real *c__, real *s); /* Subroutine */ int _starpu_srotm_(integer *n, real *sx, integer *incx, real *sy, integer *incy, real *sparam); /* Subroutine */ int _starpu_srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real *sparam); /* Subroutine */ int _starpu_ssbmv_(char *uplo, integer *n, integer *k, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy); /* Subroutine */ int _starpu_sscal_(integer *n, real *sa, real *sx, integer *incx); /* Subroutine */ int _starpu_sspmv_(char *uplo, integer *n, real *alpha, real *ap, real *x, integer *incx, real *beta, real *y, integer *incy); /* Subroutine */ int _starpu_sspr_(char *uplo, integer *n, real *alpha, real *x, integer *incx, real *ap); /* Subroutine */ int _starpu_sspr2_(char *uplo, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *ap); /* Subroutine */ int _starpu_sswap_(integer *n, real *sx, integer *incx, real *sy, integer *incy); /* Subroutine */ int _starpu_ssymm_(char *side, char *uplo, integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc); /* Subroutine */ int _starpu_ssymv_(char *uplo, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer * incy); /* Subroutine */ int _starpu_ssyr_(char *uplo, integer *n, real *alpha, real *x, integer *incx, real *a, integer *lda); /* Subroutine */ int _starpu_ssyr2_(char *uplo, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda); /* Subroutine */ int _starpu_ssyr2k_(char *uplo, char *trans, integer *n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, real *c__, integer *ldc); /* Subroutine */ int _starpu_ssyrk_(char *uplo, char *trans, integer *n, integer *k, real *alpha, real *a, integer *lda, real *beta, real *c__, integer * ldc); /* Subroutine */ int _starpu_stbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, real *a, integer *lda, real *x, integer *incx); /* Subroutine */ int _starpu_stbsv_(char *uplo, char *trans, char *diag, integer *n, integer *k, real *a, integer *lda, real *x, integer *incx); /* Subroutine */ int _starpu_stpmv_(char *uplo, char *trans, char *diag, integer *n, real *ap, real *x, integer *incx); /* Subroutine */ int _starpu_stpsv_(char *uplo, char *trans, char *diag, integer *n, real *ap, real *x, integer *incx); /* Subroutine */ int _starpu_strmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, integer *ldb); /* Subroutine */ int _starpu_strmv_(char *uplo, char *trans, char *diag, integer *n, real *a, integer *lda, real *x, integer *incx); /* Subroutine */ int _starpu_strsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, integer *ldb); /* Subroutine */ int _starpu_strsv_(char *uplo, char *trans, char *diag, integer *n, real *a, integer *lda, real *x, integer *incx); /* Subroutine */ int _starpu_xerbla_(char *srname, integer *info); /* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * srname_len__, integer *info, ftnlen srname_array_len); /* Subroutine */ int _starpu_zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Subroutine */ int _starpu_zcopy_(integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Double Complex */ VOID _starpu_zdotc_(doublecomplex * ret_val, integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Double Complex */ VOID _starpu_zdotu_(doublecomplex * ret_val, integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Subroutine */ int _starpu_zdrot_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s); /* Subroutine */ int _starpu_zdscal_(integer *n, doublereal *da, doublecomplex *zx, integer *incx); /* Subroutine */ int _starpu_zgbmv_(char *trans, integer *m, integer *n, integer *kl, integer *ku, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex * y, integer *incy); /* Subroutine */ int _starpu_zgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * c__, integer *ldc); /* Subroutine */ int _starpu_zgemv_(char *trans, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * incy); /* Subroutine */ int _starpu_zgerc_(integer *m, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zgeru_(integer *m, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zhbmv_(char *uplo, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer * incx, doublecomplex *beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zhemm_(char *side, char *uplo, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * ldc); /* Subroutine */ int _starpu_zhemv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zher_(char *uplo, integer *n, doublereal *alpha, doublecomplex *x, integer *incx, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zher2_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zher2k_(char *uplo, char *trans, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc); /* Subroutine */ int _starpu_zherk_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, doublecomplex *c__, integer *ldc); /* Subroutine */ int _starpu_zhpmv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zhpr_(char *uplo, integer *n, doublereal *alpha, doublecomplex *x, integer *incx, doublecomplex *ap); /* Subroutine */ int _starpu_zhpr2_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublecomplex *ap); /* Subroutine */ int _starpu_zrotg_(doublecomplex *ca, doublecomplex *cb, doublereal * c__, doublecomplex *s); /* Subroutine */ int _starpu_zscal_(integer *n, doublecomplex *za, doublecomplex *zx, integer *incx); /* Subroutine */ int _starpu_zswap_(integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy); /* Subroutine */ int _starpu_zsymm_(char *side, char *uplo, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * ldc); /* Subroutine */ int _starpu_zsyr2k_(char *uplo, char *trans, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer * ldc); /* Subroutine */ int _starpu_zsyrk_(char *uplo, char *trans, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * beta, doublecomplex *c__, integer *ldc); /* Subroutine */ int _starpu_ztbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztbsv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztpmv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *ap, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztpsv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *ap, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_ztrmv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_ztrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_ztrsv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_cbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, real *d__, real *e, complex *vt, integer *ldvt, complex *u, integer *ldu, complex *c__, integer *ldc, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, complex *ab, integer *ldab, real *d__, real *e, complex *q, integer *ldq, complex *pt, integer *ldpt, complex *c__, integer *ldc, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbcon_(char *norm, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, integer *ipiv, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbequ_(integer *m, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_cgbequb_(integer *m, integer *n, integer *kl, integer * ku, complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_cgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer * ldafb, integer *ipiv, complex *b, integer *ldb, complex *x, integer * ldx, real *ferr, real *berr, complex *work, real *rwork, integer * info); /* Subroutine */ int _starpu_cgbrfsx_(char *trans, char *equed, integer *n, integer * kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex * afb, integer *ldafb, integer *ipiv, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, complex *work, real *rwork, integer * info); /* Subroutine */ int _starpu_cgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, complex *ab, integer *ldab, integer *ipiv, complex *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_cgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbsvxx_(char *fact, char *trans, integer *n, integer * kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex * afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgbtf2_(integer *m, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgbtrf_(integer *m, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, complex *ab, integer *ldab, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, real *scale, integer *m, complex *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_cgebal_(char *job, integer *n, complex *a, integer *lda, integer *ilo, integer *ihi, real *scale, integer *info); /* Subroutine */ int _starpu_cgebd2_(integer *m, integer *n, complex *a, integer *lda, real *d__, real *e, complex *tauq, complex *taup, complex *work, integer *info); /* Subroutine */ int _starpu_cgebrd_(integer *m, integer *n, complex *a, integer *lda, real *d__, real *e, complex *tauq, complex *taup, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgecon_(char *norm, integer *n, complex *a, integer *lda, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgeequ_(integer *m, integer *n, complex *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_cgeequb_(integer *m, integer *n, complex *a, integer * lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_cgees_(char *jobvs, char *sort, L_fp select, integer *n, complex *a, integer *lda, integer *sdim, complex *w, complex *vs, integer *ldvs, complex *work, integer *lwork, real *rwork, logical * bwork, integer *info); /* Subroutine */ int _starpu_cgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, complex *a, integer *lda, integer *sdim, complex * w, complex *vs, integer *ldvs, real *rconde, real *rcondv, complex * work, integer *lwork, real *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_cgeev_(char *jobvl, char *jobvr, integer *n, complex *a, integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex *work, integer *lwork, real *rwork, integer * info); /* Subroutine */ int _starpu_cgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, complex *a, integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *ilo, integer *ihi, real *scale, real *abnrm, real *rconde, real *rcondv, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cgegs_(char *jobvsl, char *jobvsr, integer *n, complex * a, integer *lda, complex *b, integer *ldb, complex *alpha, complex * beta, complex *vsl, integer *ldvsl, complex *vsr, integer *ldvsr, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cgegv_(char *jobvl, char *jobvr, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cgehd2_(integer *n, integer *ilo, integer *ihi, complex * a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgehrd_(integer *n, integer *ilo, integer *ihi, complex * a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgelq2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgelqf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgels_(char *trans, integer *m, integer *n, integer * nrhs, complex *a, integer *lda, complex *b, integer *ldb, complex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgelsd_(integer *m, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, integer *rank, complex *work, integer *lwork, real *rwork, integer * iwork, integer *info); /* Subroutine */ int _starpu_cgelss_(integer *m, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, integer *rank, complex *work, integer *lwork, real *rwork, integer * info); /* Subroutine */ int _starpu_cgelsx_(integer *m, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond, integer *rank, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgelsy_(integer *m, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond, integer *rank, complex *work, integer *lwork, real *rwork, integer * info); /* Subroutine */ int _starpu_cgeql2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgeqlf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgeqp3_(integer *m, integer *n, complex *a, integer *lda, integer *jpvt, complex *tau, complex *work, integer *lwork, real * rwork, integer *info); /* Subroutine */ int _starpu_cgeqpf_(integer *m, integer *n, complex *a, integer *lda, integer *jpvt, complex *tau, complex *work, real *rwork, integer * info); /* Subroutine */ int _starpu_cgeqr2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgeqrf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgerfs_(char *trans, integer *n, integer *nrhs, complex * a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgerfsx_(char *trans, char *equed, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgerq2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cgerqf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgesc2_(integer *n, complex *a, integer *lda, complex * rhs, integer *ipiv, integer *jpiv, real *scale); /* Subroutine */ int _starpu_cgesdd_(char *jobz, integer *m, integer *n, complex *a, integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer *ldvt, complex *work, integer *lwork, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_cgesv_(integer *n, integer *nrhs, complex *a, integer * lda, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cgesvd_(char *jobu, char *jobvt, integer *m, integer *n, complex *a, integer *lda, real *s, complex *u, integer *ldu, complex * vt, integer *ldvt, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cgesvx_(char *fact, char *trans, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgesvxx_(char *fact, char *trans, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, char *equed, real *r__, real *c__, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, complex *work, real *rwork, integer * info); /* Subroutine */ int _starpu_cgetc2_(integer *n, complex *a, integer *lda, integer * ipiv, integer *jpiv, integer *info); /* Subroutine */ int _starpu_cgetf2_(integer *m, integer *n, complex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgetrf_(integer *m, integer *n, complex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgetri_(integer *n, complex *a, integer *lda, integer * ipiv, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgetrs_(char *trans, integer *n, integer *nrhs, complex * a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_cggbak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, real *lscale, real *rscale, integer *m, complex *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_cggbal_(char *job, integer *n, complex *a, integer *lda, complex *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, real *rscale, real *work, integer *info); /* Subroutine */ int _starpu_cgges_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, integer *n, complex *a, integer *lda, complex *b, integer * ldb, integer *sdim, complex *alpha, complex *beta, complex *vsl, integer *ldvsl, complex *vsr, integer *ldvsr, complex *work, integer * lwork, real *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_cggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, complex *a, integer *lda, complex *b, integer *ldb, integer *sdim, complex *alpha, complex *beta, complex * vsl, integer *ldvsl, complex *vsr, integer *ldvsr, real *rconde, real *rcondv, complex *work, integer *lwork, real *rwork, integer *iwork, integer *liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_cggev_(char *jobvl, char *jobvr, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, complex *vl, integer *ldvl, complex * vr, integer *ldvr, integer *ilo, integer *ihi, real *lscale, real * rscale, real *abnrm, real *bbnrm, real *rconde, real *rcondv, complex *work, integer *lwork, real *rwork, integer *iwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_cggglm_(integer *n, integer *m, integer *p, complex *a, integer *lda, complex *b, integer *ldb, complex *d__, complex *x, complex *y, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, complex *a, integer *lda, complex *b, integer *ldb, complex *q, integer *ldq, complex *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_cgglse_(integer *m, integer *n, integer *p, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, complex *d__, complex *x, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cggqrf_(integer *n, integer *m, integer *p, complex *a, integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cggrqf_(integer *m, integer *p, integer *n, complex *a, integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, complex *a, integer * lda, complex *b, integer *ldb, real *alpha, real *beta, complex *u, integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, complex *work, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_cggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, complex *a, integer *lda, complex *b, integer *ldb, real *tola, real *tolb, integer *k, integer *l, complex *u, integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, integer *iwork, real *rwork, complex *tau, complex *work, integer * info); /* Subroutine */ int _starpu_cgtcon_(char *norm, integer *n, complex *dl, complex * d__, complex *du, complex *du2, integer *ipiv, real *anorm, real * rcond, complex *work, integer *info); /* Subroutine */ int _starpu_cgtrfs_(char *trans, integer *n, integer *nrhs, complex * dl, complex *d__, complex *du, complex *dlf, complex *df, complex * duf, complex *du2, integer *ipiv, complex *b, integer *ldb, complex * x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgtsv_(integer *n, integer *nrhs, complex *dl, complex * d__, complex *du, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, complex *dl, complex *d__, complex *du, complex *dlf, complex * df, complex *duf, complex *du2, integer *ipiv, complex *b, integer * ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cgttrf_(integer *n, complex *dl, complex *d__, complex * du, complex *du2, integer *ipiv, integer *info); /* Subroutine */ int _starpu_cgttrs_(char *trans, integer *n, integer *nrhs, complex * dl, complex *d__, complex *du, complex *du2, integer *ipiv, complex * b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cgtts2_(integer *itrans, integer *n, integer *nrhs, complex *dl, complex *d__, complex *du, complex *du2, integer *ipiv, complex *b, integer *ldb); /* Subroutine */ int _starpu_chbev_(char *jobz, char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chbevd_(char *jobz, char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *lrwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chbevx_(char *jobz, char *range, char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, complex *q, integer *ldq, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * m, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_chbgst_(char *vect, char *uplo, integer *n, integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, complex *x, integer *ldx, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chbgv_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, complex *q, integer *ldq, real *vl, real *vu, integer * il, integer *iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_chbtrd_(char *vect, char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *d__, real *e, complex *q, integer * ldq, complex *work, integer *info); /* Subroutine */ int _starpu_checon_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, real *anorm, real *rcond, complex *work, integer * info); /* Subroutine */ int _starpu_cheequb_(char *uplo, integer *n, complex *a, integer * lda, real *s, real *scond, real *amax, complex *work, integer *info); /* Subroutine */ int _starpu_cheev_(char *jobz, char *uplo, integer *n, complex *a, integer *lda, real *w, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_cheevd_(char *jobz, char *uplo, integer *n, complex *a, integer *lda, real *w, complex *work, integer *lwork, real *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_cheevr_(char *jobz, char *range, char *uplo, integer *n, complex *a, integer *lda, real *vl, real *vu, integer *il, integer * iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, integer *isuppz, complex *work, integer *lwork, real *rwork, integer * lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_cheevx_(char *jobz, char *range, char *uplo, integer *n, complex *a, integer *lda, real *vl, real *vu, integer *il, integer * iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_chegs2_(integer *itype, char *uplo, integer *n, complex * a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_chegst_(integer *itype, char *uplo, integer *n, complex * a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_chegv_(integer *itype, char *jobz, char *uplo, integer * n, complex *a, integer *lda, complex *b, integer *ldb, real *w, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_chegvd_(integer *itype, char *jobz, char *uplo, integer * n, complex *a, integer *lda, complex *b, integer *ldb, real *w, complex *work, integer *lwork, real *rwork, integer *lrwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chegvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, complex *a, integer *lda, complex *b, integer *ldb, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * m, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_cherfs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cherfsx_(char *uplo, char *equed, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chesv_(char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_chesvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_chesvxx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, char *equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * nparams, real *params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chetd2_(char *uplo, integer *n, complex *a, integer *lda, real *d__, real *e, complex *tau, integer *info); /* Subroutine */ int _starpu_chetf2_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_chetrd_(char *uplo, integer *n, complex *a, integer *lda, real *d__, real *e, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_chetrf_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_chetri_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, complex *work, integer *info); /* Subroutine */ int _starpu_chetrs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_chfrk_(char *transr, char *uplo, char *trans, integer *n, integer *k, real *alpha, complex *a, integer *lda, real *beta, complex *c__); /* Subroutine */ int _starpu_chgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *t, integer *ldt, complex *alpha, complex *beta, complex *q, integer *ldq, complex *z__, integer *ldz, complex *work, integer *lwork, real * rwork, integer *info); /* Character */ VOID _starpu_chla_transtype__(char *ret_val, ftnlen ret_val_len, integer *trans); /* Subroutine */ int _starpu_chpcon_(char *uplo, integer *n, complex *ap, integer * ipiv, real *anorm, real *rcond, complex *work, integer *info); /* Subroutine */ int _starpu_chpev_(char *jobz, char *uplo, integer *n, complex *ap, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chpevd_(char *jobz, char *uplo, integer *n, complex *ap, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chpevx_(char *jobz, char *range, char *uplo, integer *n, complex *ap, real *vl, real *vu, integer *il, integer *iu, real * abstol, integer *m, real *w, complex *z__, integer *ldz, complex * work, real *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_chpgst_(integer *itype, char *uplo, integer *n, complex * ap, complex *bp, integer *info); /* Subroutine */ int _starpu_chpgv_(integer *itype, char *jobz, char *uplo, integer * n, complex *ap, complex *bp, real *w, complex *z__, integer *ldz, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chpgvd_(integer *itype, char *jobz, char *uplo, integer * n, complex *ap, complex *bp, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *lrwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_chpgvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, complex *ap, complex *bp, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, complex * z__, integer *ldz, complex *work, real *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_chprfs_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *afp, integer *ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chpsv_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_chpsvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *ap, complex *afp, integer *ipiv, complex *b, integer * ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_chptrd_(char *uplo, integer *n, complex *ap, real *d__, real *e, complex *tau, integer *info); /* Subroutine */ int _starpu_chptrf_(char *uplo, integer *n, complex *ap, integer * ipiv, integer *info); /* Subroutine */ int _starpu_chptri_(char *uplo, integer *n, complex *ap, integer * ipiv, complex *work, integer *info); /* Subroutine */ int _starpu_chptrs_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_chsein_(char *side, char *eigsrc, char *initv, logical * select, integer *n, complex *h__, integer *ldh, complex *w, complex * vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer * m, complex *work, real *rwork, integer *ifaill, integer *ifailr, integer *info); /* Subroutine */ int _starpu_chseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__, integer *ldz, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cla_gbamv__(integer *trans, integer *m, integer *n, integer *kl, integer *ku, real *alpha, complex *ab, integer *ldab, complex *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_cla_gbrcond_c__(char *trans, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * ipiv, real *c__, logical *capply, integer *info, complex *work, real * rwork, ftnlen trans_len); doublereal _starpu_cla_gbrcond_x__(char *trans, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen trans_len); /* Subroutine */ int _starpu_cla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer *ldafb, integer * ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex * y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex * y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info); doublereal _starpu_cla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * ncols, complex *ab, integer *ldab, complex *afb, integer *ldafb); /* Subroutine */ int _starpu_cla_geamv__(integer *trans, integer *m, integer *n, real *alpha, complex *a, integer *lda, complex *x, integer *incx, real * beta, real *y, integer *incy); doublereal _starpu_cla_gercond_c__(char *trans, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, integer *info, complex *work, real *rwork, ftnlen trans_len); doublereal _starpu_cla_gercond_x__(char *trans, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen trans_len); /* Subroutine */ int _starpu_cla_gerfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex *y_tail__, real *rcond, integer * ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info); /* Subroutine */ int _starpu_cla_heamv__(integer *uplo, integer *n, real *alpha, complex *a, integer *lda, complex *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_cla_hercond_c__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, integer *info, complex *work, real *rwork, ftnlen uplo_len); doublereal _starpu_cla_hercond_x__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_herfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, integer * n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real * rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_cla_herpvgrw__(char *uplo, integer *n, integer *info, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *work, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_lin_berr__(integer *n, integer *nz, integer *nrhs, complex *res, real *ayb, real *berr); doublereal _starpu_cla_porcond_c__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, real *c__, logical *capply, integer *info, complex *work, real *rwork, ftnlen uplo_len); doublereal _starpu_cla_porcond_x__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, complex *x, integer *info, complex *work, real *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_porfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, complex *af, integer *ldaf, logical *colequ, real *c__, complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, integer *n_norms__, real * errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_cla_porpvgrw__(char *uplo, integer *ncols, complex *a, integer * lda, complex *af, integer *ldaf, real *work, ftnlen uplo_len); doublereal _starpu_cla_rpvgrw__(integer *n, integer *ncols, complex *a, integer *lda, complex *af, integer *ldaf); /* Subroutine */ int _starpu_cla_syamv__(integer *uplo, integer *n, real *alpha, complex *a, integer *lda, complex *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_cla_syrcond_c__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *c__, logical *capply, integer *info, complex *work, real *rwork, ftnlen uplo_len); doublereal _starpu_cla_syrcond_x__(char *uplo, integer *n, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex *x, integer *info, complex *work, real *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_syrfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, integer * n_norms__, real *errs_n__, real *errs_c__, complex *res, real *ayb, complex *dy, complex *y_tail__, real *rcond, integer *ithresh, real * rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_cla_syrpvgrw__(char *uplo, integer *n, integer *info, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, real *work, ftnlen uplo_len); /* Subroutine */ int _starpu_cla_wwaddw__(integer *n, complex *x, complex *y, complex *w); /* Subroutine */ int _starpu_clabrd_(integer *m, integer *n, integer *nb, complex *a, integer *lda, real *d__, real *e, complex *tauq, complex *taup, complex *x, integer *ldx, complex *y, integer *ldy); /* Subroutine */ int _starpu_clacgv_(integer *n, complex *x, integer *incx); /* Subroutine */ int _starpu_clacn2_(integer *n, complex *v, complex *x, real *est, integer *kase, integer *isave); /* Subroutine */ int _starpu_clacon_(integer *n, complex *v, complex *x, real *est, integer *kase); /* Subroutine */ int _starpu_clacp2_(char *uplo, integer *m, integer *n, real *a, integer *lda, complex *b, integer *ldb); /* Subroutine */ int _starpu_clacpy_(char *uplo, integer *m, integer *n, complex *a, integer *lda, complex *b, integer *ldb); /* Subroutine */ int _starpu_clacrm_(integer *m, integer *n, complex *a, integer *lda, real *b, integer *ldb, complex *c__, integer *ldc, real *rwork); /* Subroutine */ int _starpu_clacrt_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy, complex *c__, complex *s); /* Complex */ VOID _starpu_cladiv_(complex * ret_val, complex *x, complex *y); /* Subroutine */ int _starpu_claed0_(integer *qsiz, integer *n, real *d__, real *e, complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_claed7_(integer *n, integer *cutpnt, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex * q, integer *ldq, real *rho, integer *indxq, real *qstore, integer * qptr, integer *prmptr, integer *perm, integer *givptr, integer * givcol, real *givnum, complex *work, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_claed8_(integer *k, integer *n, integer *qsiz, complex * q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__, real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp, integer *indx, integer *indxq, integer *perm, integer *givptr, integer *givcol, real *givnum, integer *info); /* Subroutine */ int _starpu_claein_(logical *rightv, logical *noinit, integer *n, complex *h__, integer *ldh, complex *w, complex *v, complex *b, integer *ldb, real *rwork, real *eps3, real *smlnum, integer *info); /* Subroutine */ int _starpu_claesy_(complex *a, complex *b, complex *c__, complex * rt1, complex *rt2, complex *evscal, complex *cs1, complex *sn1); /* Subroutine */ int _starpu_claev2_(complex *a, complex *b, complex *c__, real *rt1, real *rt2, real *cs1, complex *sn1); /* Subroutine */ int _starpu_clag2z_(integer *m, integer *n, complex *sa, integer * ldsa, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_clags2_(logical *upper, real *a1, complex *a2, real *a3, real *b1, complex *b2, real *b3, real *csu, complex *snu, real *csv, complex *snv, real *csq, complex *snq); /* Subroutine */ int _starpu_clagtm_(char *trans, integer *n, integer *nrhs, real * alpha, complex *dl, complex *d__, complex *du, complex *x, integer * ldx, real *beta, complex *b, integer *ldb); /* Subroutine */ int _starpu_clahef_(char *uplo, integer *n, integer *nb, integer *kb, complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_clahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * info); /* Subroutine */ int _starpu_clahr2_(integer *n, integer *k, integer *nb, complex *a, integer *lda, complex *tau, complex *t, integer *ldt, complex *y, integer *ldy); /* Subroutine */ int _starpu_clahrd_(integer *n, integer *k, integer *nb, complex *a, integer *lda, complex *tau, complex *t, integer *ldt, complex *y, integer *ldy); /* Subroutine */ int _starpu_claic1_(integer *job, integer *j, complex *x, real *sest, complex *w, complex *gamma, real *sestpr, complex *s, complex *c__); /* Subroutine */ int _starpu_clals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * rwork, integer *info); /* Subroutine */ int _starpu_clalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *z__, real *poles, integer *givptr, integer *givcol, integer * ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_clalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, real *d__, real *e, complex *b, integer *ldb, real *rcond, integer *rank, complex *work, real *rwork, integer *iwork, integer * info); doublereal _starpu_clangb_(char *norm, integer *n, integer *kl, integer *ku, complex * ab, integer *ldab, real *work); doublereal _starpu_clange_(char *norm, integer *m, integer *n, complex *a, integer * lda, real *work); doublereal _starpu_clangt_(char *norm, integer *n, complex *dl, complex *d__, complex *du); doublereal _starpu_clanhb_(char *norm, char *uplo, integer *n, integer *k, complex * ab, integer *ldab, real *work); doublereal _starpu_clanhe_(char *norm, char *uplo, integer *n, complex *a, integer * lda, real *work); doublereal _starpu_clanhf_(char *norm, char *transr, char *uplo, integer *n, complex * a, real *work); doublereal _starpu_clanhp_(char *norm, char *uplo, integer *n, complex *ap, real * work); doublereal _starpu_clanhs_(char *norm, integer *n, complex *a, integer *lda, real * work); doublereal _starpu_clanht_(char *norm, integer *n, real *d__, complex *e); doublereal _starpu_clansb_(char *norm, char *uplo, integer *n, integer *k, complex * ab, integer *ldab, real *work); doublereal _starpu_clansp_(char *norm, char *uplo, integer *n, complex *ap, real * work); doublereal _starpu_clansy_(char *norm, char *uplo, integer *n, complex *a, integer * lda, real *work); doublereal _starpu_clantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, complex *ab, integer *ldab, real *work); doublereal _starpu_clantp_(char *norm, char *uplo, char *diag, integer *n, complex * ap, real *work); doublereal _starpu_clantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, complex *a, integer *lda, real *work); /* Subroutine */ int _starpu_clapll_(integer *n, complex *x, integer *incx, complex * y, integer *incy, real *ssmin); /* Subroutine */ int _starpu_clapmt_(logical *forwrd, integer *m, integer *n, complex *x, integer *ldx, integer *k); /* Subroutine */ int _starpu_claqgb_(integer *m, integer *n, integer *kl, integer *ku, complex *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char *equed); /* Subroutine */ int _starpu_claqge_(integer *m, integer *n, complex *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char * equed); /* Subroutine */ int _starpu_claqhb_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqhe_(char *uplo, integer *n, complex *a, integer *lda, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqhp_(char *uplo, integer *n, complex *ap, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqp2_(integer *m, integer *n, integer *offset, complex *a, integer *lda, integer *jpvt, complex *tau, real *vn1, real *vn2, complex *work); /* Subroutine */ int _starpu_claqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, complex *a, integer *lda, integer *jpvt, complex * tau, real *vn1, real *vn2, complex *auxv, complex *f, integer *ldf); /* Subroutine */ int _starpu_claqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_claqr1_(integer *n, complex *h__, integer *ldh, complex * s1, complex *s2, complex *v); /* Subroutine */ int _starpu_claqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, complex *work, integer *lwork); /* Subroutine */ int _starpu_claqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, complex *work, integer *lwork); /* Subroutine */ int _starpu_claqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_claqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, integer *kbot, integer *nshfts, complex *s, complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex * z__, integer *ldz, complex *v, integer *ldv, complex *u, integer *ldu, integer *nv, complex *wv, integer *ldwv, integer *nh, complex *wh, integer *ldwh); /* Subroutine */ int _starpu_claqsb_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqsp_(char *uplo, integer *n, complex *ap, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_claqsy_(char *uplo, integer *n, complex *a, integer *lda, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_clar1v_(integer *n, integer *b1, integer *bn, real * lambda, real *d__, real *l, real *ld, real *lld, real *pivmin, real * gaptol, complex *z__, logical *wantnc, integer *negcnt, real *ztz, real *mingma, integer *r__, integer *isuppz, real *nrminv, real * resid, real *rqcorr, real *work); /* Subroutine */ int _starpu_clar2v_(integer *n, complex *x, complex *y, complex *z__, integer *incx, real *c__, complex *s, integer *incc); /* Subroutine */ int _starpu_clarcm_(integer *m, integer *n, real *a, integer *lda, complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork); /* Subroutine */ int _starpu_clarf_(char *side, integer *m, integer *n, complex *v, integer *incv, complex *tau, complex *c__, integer *ldc, complex * work); /* Subroutine */ int _starpu_clarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, complex *v, integer *ldv, complex *t, integer *ldt, complex *c__, integer *ldc, complex *work, integer *ldwork); /* Subroutine */ int _starpu_clarfg_(integer *n, complex *alpha, complex *x, integer * incx, complex *tau); /* Subroutine */ int _starpu_clarfp_(integer *n, complex *alpha, complex *x, integer * incx, complex *tau); /* Subroutine */ int _starpu_clarft_(char *direct, char *storev, integer *n, integer * k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt); /* Subroutine */ int _starpu_clarfx_(char *side, integer *m, integer *n, complex *v, complex *tau, complex *c__, integer *ldc, complex *work); /* Subroutine */ int _starpu_clargv_(integer *n, complex *x, integer *incx, complex * y, integer *incy, real *c__, integer *incc); /* Subroutine */ int _starpu_clarnv_(integer *idist, integer *iseed, integer *n, complex *x); /* Subroutine */ int _starpu_clarrv_(integer *n, real *vl, real *vu, real *d__, real * l, real *pivmin, integer *isplit, integer *m, integer *dol, integer * dou, real *minrgp, real *rtol1, real *rtol2, real *w, real *werr, real *wgap, integer *iblock, integer *indexw, real *gers, complex * z__, integer *ldz, integer *isuppz, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_clarscl2_(integer *m, integer *n, real *d__, complex *x, integer *ldx); /* Subroutine */ int _starpu_clartg_(complex *f, complex *g, real *cs, complex *sn, complex *r__); /* Subroutine */ int _starpu_clartv_(integer *n, complex *x, integer *incx, complex * y, integer *incy, real *c__, complex *s, integer *incc); /* Subroutine */ int _starpu_clarz_(char *side, integer *m, integer *n, integer *l, complex *v, integer *incv, complex *tau, complex *c__, integer *ldc, complex *work); /* Subroutine */ int _starpu_clarzb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, complex *v, integer *ldv, complex *t, integer *ldt, complex *c__, integer *ldc, complex *work, integer *ldwork); /* Subroutine */ int _starpu_clarzt_(char *direct, char *storev, integer *n, integer * k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt); /* Subroutine */ int _starpu_clascl_(char *type__, integer *kl, integer *ku, real * cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_clascl2_(integer *m, integer *n, real *d__, complex *x, integer *ldx); /* Subroutine */ int _starpu_claset_(char *uplo, integer *m, integer *n, complex * alpha, complex *beta, complex *a, integer *lda); /* Subroutine */ int _starpu_clasr_(char *side, char *pivot, char *direct, integer *m, integer *n, real *c__, real *s, complex *a, integer *lda); /* Subroutine */ int _starpu_classq_(integer *n, complex *x, integer *incx, real * scale, real *sumsq); /* Subroutine */ int _starpu_claswp_(integer *n, complex *a, integer *lda, integer * k1, integer *k2, integer *ipiv, integer *incx); /* Subroutine */ int _starpu_clasyf_(char *uplo, integer *n, integer *nb, integer *kb, complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_clatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, complex *ab, integer *ldab, complex * x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_clatdf_(integer *ijob, integer *n, complex *z__, integer *ldz, complex *rhs, real *rdsum, real *rdscal, integer *ipiv, integer *jpiv); /* Subroutine */ int _starpu_clatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, complex *ap, complex *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_clatrd_(char *uplo, integer *n, integer *nb, complex *a, integer *lda, real *e, complex *tau, complex *w, integer *ldw); /* Subroutine */ int _starpu_clatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, complex *a, integer *lda, complex *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_clatrz_(integer *m, integer *n, integer *l, complex *a, integer *lda, complex *tau, complex *work); /* Subroutine */ int _starpu_clatzm_(char *side, integer *m, integer *n, complex *v, integer *incv, complex *tau, complex *c1, complex *c2, integer *ldc, complex *work); /* Subroutine */ int _starpu_clauu2_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_clauum_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_cpbcon_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpbequ_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_cpbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, complex *ab, integer *ldab, complex *afb, integer *ldafb, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real * berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpbstf_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_cpbsv_(char *uplo, integer *n, integer *kd, integer * nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_cpbsvx_(char *fact, char *uplo, integer *n, integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer * ldafb, char *equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpbtf2_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_cpbtrf_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_cpbtrs_(char *uplo, integer *n, integer *kd, integer * nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_cpftrf_(char *transr, char *uplo, integer *n, complex *a, integer *info); /* Subroutine */ int _starpu_cpftri_(char *transr, char *uplo, integer *n, complex *a, integer *info); /* Subroutine */ int _starpu_cpftrs_(char *transr, char *uplo, integer *n, integer * nrhs, complex *a, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cpocon_(char *uplo, integer *n, complex *a, integer *lda, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpoequ_(integer *n, complex *a, integer *lda, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_cpoequb_(integer *n, complex *a, integer *lda, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_cporfs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, complex *af, integer *ldaf, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cporfsx_(char *uplo, char *equed, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real * err_bnds_comp__, integer *nparams, real *params, complex *work, real * rwork, integer *info); /* Subroutine */ int _starpu_cposv_(char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cposvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, char * equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cposvxx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, char * equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpotf2_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_cpotrf_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_cpotri_(char *uplo, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_cpotrs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cppcon_(char *uplo, integer *n, complex *ap, real *anorm, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cppequ_(char *uplo, integer *n, complex *ap, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_cpprfs_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *afp, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cppsv_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cppsvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *ap, complex *afp, char *equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpptrf_(char *uplo, integer *n, complex *ap, integer * info); /* Subroutine */ int _starpu_cpptri_(char *uplo, integer *n, complex *ap, integer * info); /* Subroutine */ int _starpu_cpptrs_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cpstf2_(char *uplo, integer *n, complex *a, integer *lda, integer *piv, integer *rank, real *tol, real *work, integer *info); /* Subroutine */ int _starpu_cpstrf_(char *uplo, integer *n, complex *a, integer *lda, integer *piv, integer *rank, real *tol, real *work, integer *info); /* Subroutine */ int _starpu_cptcon_(integer *n, real *d__, complex *e, real *anorm, real *rcond, real *rwork, integer *info); /* Subroutine */ int _starpu_cpteqr_(char *compz, integer *n, real *d__, real *e, complex *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_cptrfs_(char *uplo, integer *n, integer *nrhs, real *d__, complex *e, real *df, complex *ef, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cptsv_(integer *n, integer *nrhs, real *d__, complex *e, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cptsvx_(char *fact, integer *n, integer *nrhs, real *d__, complex *e, real *df, complex *ef, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cpttrf_(integer *n, real *d__, complex *e, integer *info); /* Subroutine */ int _starpu_cpttrs_(char *uplo, integer *n, integer *nrhs, real *d__, complex *e, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cptts2_(integer *iuplo, integer *n, integer *nrhs, real * d__, complex *e, complex *b, integer *ldb); /* Subroutine */ int _starpu_crot_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy, real *c__, complex *s); /* Subroutine */ int _starpu_cspcon_(char *uplo, integer *n, complex *ap, integer * ipiv, real *anorm, real *rcond, complex *work, integer *info); /* Subroutine */ int _starpu_cspmv_(char *uplo, integer *n, complex *alpha, complex * ap, complex *x, integer *incx, complex *beta, complex *y, integer * incy); /* Subroutine */ int _starpu_cspr_(char *uplo, integer *n, complex *alpha, complex *x, integer *incx, complex *ap); /* Subroutine */ int _starpu_csprfs_(char *uplo, integer *n, integer *nrhs, complex * ap, complex *afp, integer *ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_cspsv_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_cspsvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *ap, complex *afp, integer *ipiv, complex *b, integer * ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_csptrf_(char *uplo, integer *n, complex *ap, integer * ipiv, integer *info); /* Subroutine */ int _starpu_csptri_(char *uplo, integer *n, complex *ap, integer * ipiv, complex *work, integer *info); /* Subroutine */ int _starpu_csptrs_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu__starpu_csrscl_(integer *n, real *sa, complex *sx, integer *incx); /* Subroutine */ int _starpu_cstedc_(char *compz, integer *n, real *d__, real *e, complex *z__, integer *ldz, complex *work, integer *lwork, real * rwork, integer *lrwork, integer *iwork, integer *liwork, integer * info); /* Subroutine */ int _starpu_cstegr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, integer *isuppz, real *work, integer *lwork, integer *iwork, integer *liwork, integer * info); /* Subroutine */ int _starpu_cstein_(integer *n, real *d__, real *e, integer *m, real *w, integer *iblock, integer *isplit, complex *z__, integer *ldz, real *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_cstemr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, integer *m, real *w, complex *z__, integer *ldz, integer *nzc, integer *isuppz, logical *tryrac, real *work, integer *lwork, integer *iwork, integer * liwork, integer *info); /* Subroutine */ int _starpu_csteqr_(char *compz, integer *n, real *d__, real *e, complex *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_csycon_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, real *anorm, real *rcond, complex *work, integer * info); /* Subroutine */ int _starpu_csyequb_(char *uplo, integer *n, complex *a, integer * lda, real *s, real *scond, real *amax, complex *work, integer *info); /* Subroutine */ int _starpu_csymv_(char *uplo, integer *n, complex *alpha, complex * a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, integer *incy); /* Subroutine */ int _starpu_csyr_(char *uplo, integer *n, complex *alpha, complex *x, integer *incx, complex *a, integer *lda); /* Subroutine */ int _starpu_csyrfs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_csyrfsx_(char *uplo, char *equed, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_csysv_(char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_csysvx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, complex *work, integer *lwork, real *rwork, integer *info); /* Subroutine */ int _starpu_csysvxx_(char *fact, char *uplo, integer *n, integer * nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * ipiv, char *equed, real *s, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * nparams, real *params, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_csytf2_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_csytrf_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_csytri_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, complex *work, integer *info); /* Subroutine */ int _starpu_csytrs_(char *uplo, integer *n, integer *nrhs, complex * a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_ctbcon_(char *norm, char *uplo, char *diag, integer *n, integer *kd, complex *ab, integer *ldab, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctbtrs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ctfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, complex *alpha, complex *a, complex *b, integer *ldb); /* Subroutine */ int _starpu_ctftri_(char *transr, char *uplo, char *diag, integer *n, complex *a, integer *info); /* Subroutine */ int _starpu_ctfttp_(char *transr, char *uplo, integer *n, complex * arf, complex *ap, integer *info); /* Subroutine */ int _starpu_ctfttr_(char *transr, char *uplo, integer *n, complex * arf, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ctgevc_(char *side, char *howmny, logical *select, integer *n, complex *s, integer *lds, complex *p, integer *ldp, complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctgex2_(logical *wantq, logical *wantz, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *q, integer *ldq, complex *z__, integer *ldz, integer *j1, integer *info); /* Subroutine */ int _starpu_ctgexc_(logical *wantq, logical *wantz, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *q, integer *ldq, complex *z__, integer *ldz, integer *ifst, integer * ilst, integer *info); /* Subroutine */ int _starpu_ctgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, complex *q, integer *ldq, complex *z__, integer *ldz, integer *m, real *pl, real *pr, real * dif, complex *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ctgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, complex *a, integer * lda, complex *b, integer *ldb, real *tola, real *tolb, real *alpha, real *beta, complex *u, integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, complex *work, integer *ncycle, integer * info); /* Subroutine */ int _starpu_ctgsna_(char *job, char *howmny, logical *select, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *vl, integer *ldvl, complex *vr, integer *ldvr, real *s, real *dif, integer *mm, integer *m, complex *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_ctgsy2_(char *trans, integer *ijob, integer *m, integer * n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, complex *f, integer *ldf, real *scale, real *rdsum, real *rdscal, integer *info); /* Subroutine */ int _starpu_ctgsyl_(char *trans, integer *ijob, integer *m, integer * n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, complex *f, integer *ldf, real *scale, real *dif, complex *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_ctpcon_(char *norm, char *uplo, char *diag, integer *n, complex *ap, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, complex *ap, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctptri_(char *uplo, char *diag, integer *n, complex *ap, integer *info); /* Subroutine */ int _starpu_ctptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, complex *ap, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ctpttf_(char *transr, char *uplo, integer *n, complex * ap, complex *arf, integer *info); /* Subroutine */ int _starpu_ctpttr_(char *uplo, integer *n, complex *ap, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ctrcon_(char *norm, char *uplo, char *diag, integer *n, complex *a, integer *lda, real *rcond, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctrevc_(char *side, char *howmny, logical *select, integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctrexc_(char *compq, integer *n, complex *t, integer * ldt, complex *q, integer *ldq, integer *ifst, integer *ilst, integer * info); /* Subroutine */ int _starpu_ctrrfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info); /* Subroutine */ int _starpu_ctrsen_(char *job, char *compq, logical *select, integer *n, complex *t, integer *ldt, complex *q, integer *ldq, complex *w, integer *m, real *s, real *sep, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ctrsna_(char *job, char *howmny, logical *select, integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, complex *vr, integer *ldvr, real *s, real *sep, integer *mm, integer * m, complex *work, integer *ldwork, real *rwork, integer *info); /* Subroutine */ int _starpu_ctrsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, integer *ldc, real *scale, integer *info); /* Subroutine */ int _starpu_ctrti2_(char *uplo, char *diag, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ctrtri_(char *uplo, char *diag, integer *n, complex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ctrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, complex *a, integer *lda, complex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ctrttf_(char *transr, char *uplo, integer *n, complex *a, integer *lda, complex *arf, integer *info); /* Subroutine */ int _starpu_ctrttp_(char *uplo, integer *n, complex *a, integer *lda, complex *ap, integer *info); /* Subroutine */ int _starpu_ctzrqf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, integer *info); /* Subroutine */ int _starpu_ctzrzf_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cung2l_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cung2r_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cungbr_(char *vect, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunghr_(integer *n, integer *ilo, integer *ihi, complex * a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cungl2_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cunglq_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cungql_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cungqr_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cungr2_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info); /* Subroutine */ int _starpu_cungrq_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cungtr_(char *uplo, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunm2l_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunm2r_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunmbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cunmhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cunml2_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunmlq_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunmql_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunmqr_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunmr2_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunmr3_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_cunmrq_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cunmrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_cunmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_cupgtr_(char *uplo, integer *n, complex *ap, complex * tau, complex *q, integer *ldq, complex *work, integer *info); /* Subroutine */ int _starpu_cupmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, complex *ap, complex *tau, complex *c__, integer *ldc, complex *work, integer *info); /* Subroutine */ int _starpu_dbdsdc_(char *uplo, char *compq, integer *n, doublereal * d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_ddisna_(char *job, integer *m, integer *n, doublereal * d__, doublereal *sep, integer *info); /* Subroutine */ int _starpu_dgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal * d__, doublereal *e, doublereal *q, integer *ldq, doublereal *pt, integer *ldpt, doublereal *c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgbcon_(char *norm, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbequ_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * info); /* Subroutine */ int _starpu_dgbequb_(integer *m, integer *n, integer *kl, integer * ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * info); /* Subroutine */ int _starpu_dgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbrfsx_(char *trans, char *equed, integer *n, integer * kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer * ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbsvxx_(char *fact, char *trans, integer *n, integer * kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgbtf2_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * ldv, integer *info); /* Subroutine */ int _starpu_dgebal_(char *job, integer *n, doublereal *a, integer * lda, integer *ilo, integer *ihi, doublereal *scale, integer *info); /* Subroutine */ int _starpu_dgebd2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * taup, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgebrd_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * taup, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgecon_(char *norm, integer *n, doublereal *a, integer * lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dgeequ_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dgeequb_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dgees_(char *jobvs, char *sort, L_fp select, integer *n, doublereal *a, integer *lda, integer *sdim, doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work, integer *lwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_dgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, doublereal *a, integer *lda, integer *sdim, doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, doublereal *rconde, doublereal *rcondv, doublereal *work, integer * lwork, integer *iwork, integer *liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublereal *a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgegs_(char *jobvsl, char *jobvsr, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgegv_(char *jobvl, char *jobvr, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgehd2_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgehrd_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgejsv_(char *joba, char *jobu, char *jobv, char *jobr, char *jobt, char *jobp, integer *m, integer *n, doublereal *a, integer *lda, doublereal *sva, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgelq2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgelqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgels_(char *trans, integer *m, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgelsd_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgelss_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgelsx_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * info); /* Subroutine */ int _starpu_dgelsy_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_dgeql2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgeqlf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgeqp3_(integer *m, integer *n, doublereal *a, integer * lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgeqpf_(integer *m, integer *n, doublereal *a, integer * lda, integer *jpvt, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgeqr2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgerfs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgerfsx_(char *trans, char *equed, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgerq2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgerqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgesc2_(integer *n, doublereal *a, integer *lda, doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale); /* Subroutine */ int _starpu_dgesdd_(char *jobz, integer *m, integer *n, doublereal * a, integer *lda, doublereal *s, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgesvd_(char *jobu, char *jobvt, integer *m, integer *n, doublereal *a, integer *lda, doublereal *s, doublereal *u, integer * ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgesvj_(char *joba, char *jobu, char *jobv, integer *m, integer *n, doublereal *a, integer *lda, doublereal *sva, integer *mv, doublereal *v, integer *ldv, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgesvx_(char *fact, char *trans, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dgesvxx_(char *fact, char *trans, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dgetc2_(integer *n, doublereal *a, integer *lda, integer *ipiv, integer *jpiv, integer *info); /* Subroutine */ int _starpu_dgetf2_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgetri_(integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgetrs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_dggbak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, doublereal *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_dggbal_(char *job, integer *n, doublereal *a, integer * lda, doublereal *b, integer *ldb, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *work, integer * info); /* Subroutine */ int _starpu_dgges_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *sdim, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *work, integer *lwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_dggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *sdim, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *rconde, doublereal * rcondv, doublereal *work, integer *lwork, integer *iwork, integer * liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_dggev_(char *jobvl, char *jobvr, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * rcondv, doublereal *work, integer *lwork, integer *iwork, logical * bwork, integer *info); /* Subroutine */ int _starpu_dggglm_(integer *n, integer *m, integer *p, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *d__, doublereal *x, doublereal *y, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *q, integer *ldq, doublereal *z__, integer * ldz, integer *info); /* Subroutine */ int _starpu_dgglse_(integer *m, integer *n, integer *p, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, doublereal *d__, doublereal *x, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dggqrf_(integer *n, integer *m, integer *p, doublereal * a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, doublereal *taub, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dggrqf_(integer *m, integer *p, integer *n, doublereal * a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, doublereal *taub, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer *ldq, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *tola, doublereal *tolb, integer *k, integer *l, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer *ldq, integer *iwork, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dgsvj0_(char *jobv, integer *m, integer *n, doublereal * a, integer *lda, doublereal *d__, doublereal *sva, integer *mv, doublereal *v, integer *ldv, doublereal *eps, doublereal *sfmin, doublereal *tol, integer *nsweep, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dgsvj1_(char *jobv, integer *m, integer *n, integer *n1, doublereal *a, integer *lda, doublereal *d__, doublereal *sva, integer *mv, doublereal *v, integer *ldv, doublereal *eps, doublereal *sfmin, doublereal *tol, integer *nsweep, doublereal *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_dgtcon_(char *norm, integer *n, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dgtrfs_(char *trans, integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_dgtsv_(integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal * dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dgttrf_(integer *n, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dgttrs_(char *trans, integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dgtts2_(integer *itrans, integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dhgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *t, integer *ldt, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dhsein_(char *side, char *eigsrc, char *initv, logical * select, integer *n, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, doublereal *work, integer * ifaill, integer *ifailr, integer *info); /* Subroutine */ int _starpu_dhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info); logical _starpu_disnan_(doublereal *din); /* Subroutine */ int _starpu_dla_gbamv__(integer *trans, integer *m, integer *n, integer *kl, integer *ku, doublereal *alpha, doublereal *ab, integer * ldab, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_dla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen trans_len); /* Subroutine */ int _starpu_dla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal * y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info); doublereal _starpu_dla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * ncols, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb); /* Subroutine */ int _starpu_dla_geamv__(integer *trans, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_dla_gercond__(char *trans, integer *n, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen trans_len); /* Subroutine */ int _starpu_dla_gerfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer * ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal * dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info); /* Subroutine */ int _starpu_dla_lin_berr__(integer *n, integer *nz, integer *nrhs, doublereal *res, doublereal *ayb, doublereal *berr); doublereal _starpu_dla_porcond__(char *uplo, integer *n, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen uplo_len); /* Subroutine */ int _starpu_dla_porfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * af, integer *ldaf, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal * y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_dla_porpvgrw__(char *uplo, integer *ncols, doublereal *a, integer * lda, doublereal *af, integer *ldaf, doublereal *work, ftnlen uplo_len); doublereal _starpu_dla_rpvgrw__(integer *n, integer *ncols, doublereal *a, integer * lda, doublereal *af, integer *ldaf); /* Subroutine */ int _starpu_dla_syamv__(integer *uplo, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_dla_syrcond__(char *uplo, integer *n, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen uplo_len); /* Subroutine */ int _starpu_dla_syrfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal * berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal * errs_c__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_dla_syrpvgrw__(char *uplo, integer *n, integer *info, doublereal * a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *work, ftnlen uplo_len); /* Subroutine */ int _starpu_dla_wwaddw__(integer *n, doublereal *x, doublereal *y, doublereal *w); /* Subroutine */ int _starpu_dlabad_(doublereal *small, doublereal *large); /* Subroutine */ int _starpu_dlabrd_(integer *m, integer *n, integer *nb, doublereal * a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer *ldy); /* Subroutine */ int _starpu_dlacn2_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase, integer *isave); /* Subroutine */ int _starpu_dlacon_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase); /* Subroutine */ int _starpu_dlacpy_(char *uplo, integer *m, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dladiv_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p, doublereal *q); /* Subroutine */ int _starpu_dlae2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2); /* Subroutine */ int _starpu_dlaebz_(integer *ijob, integer *nitmax, integer *n, integer *mmax, integer *minp, integer *nbmin, doublereal *abstol, doublereal *reltol, doublereal *pivmin, doublereal *d__, doublereal * e, doublereal *e2, integer *nval, doublereal *ab, doublereal *c__, integer *mout, integer *nab, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlaed0_(integer *icompq, integer *qsiz, integer *n, doublereal *d__, doublereal *e, doublereal *q, integer *ldq, doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlaed1_(integer *n, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlaed2_(integer *k, integer *n, integer *n1, doublereal * d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp, integer *info); /* Subroutine */ int _starpu_dlaed3_(integer *k, integer *n, integer *n1, doublereal * d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, doublereal *q2, integer *indx, integer *ctot, doublereal *w, doublereal *s, integer *info); /* Subroutine */ int _starpu_dlaed4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, integer *info); /* Subroutine */ int _starpu_dlaed5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam); /* Subroutine */ int _starpu_dlaed6_(integer *kniter, logical *orgati, doublereal * rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * tau, integer *info); /* Subroutine */ int _starpu_dlaed7_(integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlaed8_(integer *icompq, integer *k, integer *n, integer *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer *indx, integer *info); /* Subroutine */ int _starpu_dlaed9_(integer *k, integer *kstart, integer *kstop, integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, integer *info); /* Subroutine */ int _starpu_dlaeda_(integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, doublereal *z__, doublereal *ztemp, integer *info); /* Subroutine */ int _starpu_dlaein_(logical *rightv, logical *noinit, integer *n, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *vr, doublereal *vi, doublereal *b, integer *ldb, doublereal *work, doublereal *eps3, doublereal *smlnum, doublereal * bignum, integer *info); /* Subroutine */ int _starpu_dlaev2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1); /* Subroutine */ int _starpu_dlaexc_(logical *wantq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1, integer *n2, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlag2_(doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *safmin, doublereal *scale1, doublereal * scale2, doublereal *wr1, doublereal *wr2, doublereal *wi); /* Subroutine */ int _starpu_dlag2s_(integer *m, integer *n, doublereal *a, integer * lda, real *sa, integer *ldsa, integer *info); /* Subroutine */ int _starpu_dlags2_(logical *upper, doublereal *a1, doublereal *a2, doublereal *a3, doublereal *b1, doublereal *b2, doublereal *b3, doublereal *csu, doublereal *snu, doublereal *csv, doublereal *snv, doublereal *csq, doublereal *snq); /* Subroutine */ int _starpu_dlagtf_(integer *n, doublereal *a, doublereal *lambda, doublereal *b, doublereal *c__, doublereal *tol, doublereal *d__, integer *in, integer *info); /* Subroutine */ int _starpu_dlagtm_(char *trans, integer *n, integer *nrhs, doublereal *alpha, doublereal *dl, doublereal *d__, doublereal *du, doublereal *x, integer *ldx, doublereal *beta, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dlagts_(integer *job, integer *n, doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, integer *in, doublereal *y, doublereal *tol, integer *info); /* Subroutine */ int _starpu_dlagv2_(doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *csl, doublereal *snl, doublereal *csr, doublereal * snr); /* Subroutine */ int _starpu_dlahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_dlahr2_(integer *n, integer *k, integer *nb, doublereal * a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, doublereal *y, integer *ldy); /* Subroutine */ int _starpu_dlahrd_(integer *n, integer *k, integer *nb, doublereal * a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, doublereal *y, integer *ldy); /* Subroutine */ int _starpu_dlaic1_(integer *job, integer *j, doublereal *x, doublereal *sest, doublereal *w, doublereal *gamma, doublereal * sestpr, doublereal *s, doublereal *c__); logical _starpu_dlaisnan_(doublereal *din1, doublereal *din2); /* Subroutine */ int _starpu_dlaln2_(logical *ltrans, integer *na, integer *nw, doublereal *smin, doublereal *ca, doublereal *a, integer *lda, doublereal *d1, doublereal *d2, doublereal *b, integer *ldb, doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, doublereal *scale, doublereal *xnorm, integer *info); /* Subroutine */ int _starpu_dlals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * k, doublereal *c__, doublereal *s, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * poles, integer *givptr, integer *givcol, integer *ldgcol, integer * perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, doublereal *rcond, integer *rank, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlamrg_(integer *n1, integer *n2, doublereal *a, integer *dtrd1, integer *dtrd2, integer *index); integer _starpu_dlaneg_(integer *n, doublereal *d__, doublereal *lld, doublereal * sigma, doublereal *pivmin, integer *r__); doublereal _starpu_dlangb_(char *norm, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *work); doublereal _starpu_dlange_(char *norm, integer *m, integer *n, doublereal *a, integer *lda, doublereal *work); doublereal _starpu_dlangt_(char *norm, integer *n, doublereal *dl, doublereal *d__, doublereal *du); doublereal _starpu_dlanhs_(char *norm, integer *n, doublereal *a, integer *lda, doublereal *work); doublereal _starpu_dlansb_(char *norm, char *uplo, integer *n, integer *k, doublereal *ab, integer *ldab, doublereal *work); doublereal _starpu_dlansf_(char *norm, char *transr, char *uplo, integer *n, doublereal *a, doublereal *work); doublereal _starpu_dlansp_(char *norm, char *uplo, integer *n, doublereal *ap, doublereal *work); doublereal _starpu_dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e); doublereal _starpu_dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *work); doublereal _starpu_dlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, doublereal *ab, integer *ldab, doublereal *work); doublereal _starpu_dlantp_(char *norm, char *uplo, char *diag, integer *n, doublereal *ap, doublereal *work); doublereal _starpu_dlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, doublereal *a, integer *lda, doublereal *work); /* Subroutine */ int _starpu_dlanv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r, doublereal *rt2i, doublereal *cs, doublereal *sn); /* Subroutine */ int _starpu_dlapll_(integer *n, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *ssmin); /* Subroutine */ int _starpu_dlapmt_(logical *forwrd, integer *m, integer *n, doublereal *x, integer *ldx, integer *k); doublereal _starpu_dlapy2_(doublereal *x, doublereal *y); doublereal _starpu_dlapy3_(doublereal *x, doublereal *y, doublereal *z__); /* Subroutine */ int _starpu_dlaqgb_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqge_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqp2_(integer *m, integer *n, integer *offset, doublereal *a, integer *lda, integer *jpvt, doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *work); /* Subroutine */ int _starpu_dlaqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, doublereal *a, integer *lda, integer *jpvt, doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *auxv, doublereal *f, integer *ldf); /* Subroutine */ int _starpu_dlaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dlaqr1_(integer *n, doublereal *h__, integer *ldh, doublereal *sr1, doublereal *si1, doublereal *sr2, doublereal *si2, doublereal *v); /* Subroutine */ int _starpu_dlaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork); /* Subroutine */ int _starpu_dlaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork); /* Subroutine */ int _starpu_dlaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, integer *kbot, integer *nshfts, doublereal *sr, doublereal *si, doublereal *h__, integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, doublereal *v, integer * ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, integer *ldwv, integer *nh, doublereal *wh, integer *ldwh); /* Subroutine */ int _starpu_dlaqsb_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqsp_(char *uplo, integer *n, doublereal *ap, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqsy_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_dlaqtr_(logical *ltran, logical *lreal, integer *n, doublereal *t, integer *ldt, doublereal *b, doublereal *w, doublereal *scale, doublereal *x, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlar1v_(integer *n, integer *b1, integer *bn, doublereal *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * lld, doublereal *pivmin, doublereal *gaptol, doublereal *z__, logical *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, doublereal *rqcorr, doublereal *work); /* Subroutine */ int _starpu_dlar2v_(integer *n, doublereal *x, doublereal *y, doublereal *z__, integer *incx, doublereal *c__, doublereal *s, integer *incc); /* Subroutine */ int _starpu_dlarf_(char *side, integer *m, integer *n, doublereal *v, integer *incv, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work); /* Subroutine */ int _starpu_dlarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, doublereal *v, integer * ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, doublereal *work, integer *ldwork); /* Subroutine */ int _starpu_dlarfg_(integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *tau); /* Subroutine */ int _starpu_dlarfp_(integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *tau); /* Subroutine */ int _starpu_dlarft_(char *direct, char *storev, integer *n, integer * k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, integer *ldt); /* Subroutine */ int _starpu_dlarfx_(char *side, integer *m, integer *n, doublereal * v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work); /* Subroutine */ int _starpu_dlargv_(integer *n, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *c__, integer *incc); /* Subroutine */ int _starpu_dlarnv_(integer *idist, integer *iseed, integer *n, doublereal *x); /* Subroutine */ int _starpu_dlarra_(integer *n, doublereal *d__, doublereal *e, doublereal *e2, doublereal *spltol, doublereal *tnrm, integer *nsplit, integer *isplit, integer *info); /* Subroutine */ int _starpu_dlarrb_(integer *n, doublereal *d__, doublereal *lld, integer *ifirst, integer *ilast, doublereal *rtol1, doublereal *rtol2, integer *offset, doublereal *w, doublereal *wgap, doublereal *werr, doublereal *work, integer *iwork, doublereal *pivmin, doublereal * spdiam, integer *twist, integer *info); /* Subroutine */ int _starpu_dlarrc_(char *jobt, integer *n, doublereal *vl, doublereal *vu, doublereal *d__, doublereal *e, doublereal *pivmin, integer *eigcnt, integer *lcnt, integer *rcnt, integer *info); /* Subroutine */ int _starpu_dlarrd_(char *range, char *order, integer *n, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *gers, doublereal *reltol, doublereal *d__, doublereal *e, doublereal *e2, doublereal *pivmin, integer *nsplit, integer *isplit, integer *m, doublereal *w, doublereal *werr, doublereal *wl, doublereal *wu, integer *iblock, integer *indexw, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlarre_(char *range, integer *n, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *d__, doublereal *e, doublereal *e2, doublereal *rtol1, doublereal *rtol2, doublereal * spltol, integer *nsplit, integer *isplit, integer *m, doublereal *w, doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, doublereal *pivmin, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dlarrf_(integer *n, doublereal *d__, doublereal *l, doublereal *ld, integer *clstrt, integer *clend, doublereal *w, doublereal *wgap, doublereal *werr, doublereal *spdiam, doublereal * clgapl, doublereal *clgapr, doublereal *pivmin, doublereal *sigma, doublereal *dplus, doublereal *lplus, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlarrj_(integer *n, doublereal *d__, doublereal *e2, integer *ifirst, integer *ilast, doublereal *rtol, integer *offset, doublereal *w, doublereal *werr, doublereal *work, integer *iwork, doublereal *pivmin, doublereal *spdiam, integer *info); /* Subroutine */ int _starpu_dlarrk_(integer *n, integer *iw, doublereal *gl, doublereal *gu, doublereal *d__, doublereal *e2, doublereal *pivmin, doublereal *reltol, doublereal *w, doublereal *werr, integer *info); /* Subroutine */ int _starpu_dlarrr_(integer *n, doublereal *d__, doublereal *e, integer *info); /* Subroutine */ int _starpu_dlarrv_(integer *n, doublereal *vl, doublereal *vu, doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, integer *m, integer *dol, integer *dou, doublereal *minrgp, doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlarscl2_(integer *m, integer *n, doublereal *d__, doublereal *x, integer *ldx); /* Subroutine */ int _starpu_dlartg_(doublereal *f, doublereal *g, doublereal *cs, doublereal *sn, doublereal *r__); /* Subroutine */ int _starpu_dlartv_(integer *n, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *c__, doublereal *s, integer *incc); /* Subroutine */ int _starpu_dlaruv_(integer *iseed, integer *n, doublereal *x); /* Subroutine */ int _starpu_dlarz_(char *side, integer *m, integer *n, integer *l, doublereal *v, integer *incv, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work); /* Subroutine */ int _starpu_dlarzb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, doublereal *v, integer *ldv, doublereal *t, integer *ldt, doublereal *c__, integer * ldc, doublereal *work, integer *ldwork); /* Subroutine */ int _starpu_dlarzt_(char *direct, char *storev, integer *n, integer * k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, integer *ldt); /* Subroutine */ int _starpu_dlas2_(doublereal *f, doublereal *g, doublereal *h__, doublereal *ssmin, doublereal *ssmax); /* Subroutine */ int _starpu_dlascl_(char *type__, integer *kl, integer *ku, doublereal *cfrom, doublereal *cto, integer *m, integer *n, doublereal *a, integer *lda, integer *info); /* Subroutine */ int _starpu_dlascl2_(integer *m, integer *n, doublereal *d__, doublereal *x, integer *ldx); /* Subroutine */ int _starpu_dlasd0_(integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer * ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer * info); /* Subroutine */ int _starpu_dlasd1_(integer *nl, integer *nr, integer *sqre, doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer * iwork, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlasd2_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal * beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer * idxq, integer *coltyp, integer *info); /* Subroutine */ int _starpu_dlasd3_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, integer *idxc, integer *ctot, doublereal *z__, integer *info); /* Subroutine */ int _starpu_dlasd4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal * sigma, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlasd5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * work); /* Subroutine */ int _starpu_dlasd6_(integer *icompq, integer *nl, integer *nr, integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlasd7_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *z__, doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *c__, doublereal *s, integer *info); /* Subroutine */ int _starpu_dlasd8_(integer *icompq, integer *k, doublereal *d__, doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * work, integer *info); /* Subroutine */ int _starpu_dlasda_(integer *icompq, integer *smlsiz, integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dlasdq_(char *uplo, integer *sqre, integer *n, integer * ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlasdt_(integer *n, integer *lvl, integer *nd, integer * inode, integer *ndiml, integer *ndimr, integer *msub); /* Subroutine */ int _starpu_dlaset_(char *uplo, integer *m, integer *n, doublereal * alpha, doublereal *beta, doublereal *a, integer *lda); /* Subroutine */ int _starpu_dlasq1_(integer *n, doublereal *d__, doublereal *e, doublereal *work, integer *info); /* Subroutine */ int _starpu_dlasq2_(integer *n, doublereal *z__, integer *info); /* Subroutine */ int _starpu_dlasq3_(integer *i0, integer *n0, doublereal *z__, integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee, integer *ttype, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *g, doublereal *tau); /* Subroutine */ int _starpu_dlasq4_(integer *i0, integer *n0, doublereal *z__, integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *tau, integer *ttype, doublereal *g); /* Subroutine */ int _starpu_dlasq5_(integer *i0, integer *n0, doublereal *z__, integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2, logical *ieee); /* Subroutine */ int _starpu_dlasq6_(integer *i0, integer *n0, doublereal *z__, integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2); /* Subroutine */ int _starpu_dlasr_(char *side, char *pivot, char *direct, integer *m, integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * lda); /* Subroutine */ int _starpu_dlasrt_(char *id, integer *n, doublereal *d__, integer * info); /* Subroutine */ int _starpu_dlassq_(integer *n, doublereal *x, integer *incx, doublereal *scale, doublereal *sumsq); /* Subroutine */ int _starpu_dlasv2_(doublereal *f, doublereal *g, doublereal *h__, doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * csr, doublereal *snl, doublereal *csl); /* Subroutine */ int _starpu_dlaswp_(integer *n, doublereal *a, integer *lda, integer *k1, integer *k2, integer *ipiv, integer *incx); /* Subroutine */ int _starpu_dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, integer *n1, integer *n2, doublereal *tl, integer *ldtl, doublereal * tr, integer *ldtr, doublereal *b, integer *ldb, doublereal *scale, doublereal *x, integer *ldx, doublereal *xnorm, integer *info); /* Subroutine */ int _starpu_dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublereal *a, integer *lda, integer *ipiv, doublereal *w, integer * ldw, integer *info); /* Subroutine */ int _starpu_dlat2s_(char *uplo, integer *n, doublereal *a, integer * lda, real *sa, integer *ldsa, integer *info); /* Subroutine */ int _starpu_dlatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_dlatdf_(integer *ijob, integer *n, doublereal *z__, integer *ldz, doublereal *rhs, doublereal *rdsum, doublereal *rdscal, integer *ipiv, integer *jpiv); /* Subroutine */ int _starpu_dlatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublereal *ap, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_dlatrd_(char *uplo, integer *n, integer *nb, doublereal * a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, integer *ldw); /* Subroutine */ int _starpu_dlatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublereal *a, integer *lda, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_dlatrz_(integer *m, integer *n, integer *l, doublereal * a, integer *lda, doublereal *tau, doublereal *work); /* Subroutine */ int _starpu_dlatzm_(char *side, integer *m, integer *n, doublereal * v, integer *incv, doublereal *tau, doublereal *c1, doublereal *c2, integer *ldc, doublereal *work); /* Subroutine */ int _starpu_dlauu2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dlauum_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dopgtr_(char *uplo, integer *n, doublereal *ap, doublereal *tau, doublereal *q, integer *ldq, doublereal *work, integer *info); /* Subroutine */ int _starpu_dopmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublereal *ap, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorg2l_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorg2r_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorgbr_(char *vect, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorghr_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgl2_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorglq_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgql_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgqr_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgr2_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorgrq_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorgtr_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorm2l_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dorm2r_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dormbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal * tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dorml2_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dormlq_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormql_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormqr_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormr2_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dormr3_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *info); /* Subroutine */ int _starpu_dormrq_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dormtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dpbcon_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *anorm, doublereal *rcond, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dpbequ_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dpbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_dpbstf_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_dpbsv_(char *uplo, integer *n, integer *kd, integer * nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dpbsvx_(char *fact, char *uplo, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, char *equed, doublereal *s, doublereal *b, integer * ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dpbtf2_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_dpbtrf_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_dpbtrs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dpftrf_(char *transr, char *uplo, integer *n, doublereal *a, integer *info); /* Subroutine */ int _starpu_dpftri_(char *transr, char *uplo, integer *n, doublereal *a, integer *info); /* Subroutine */ int _starpu_dpftrs_(char *transr, char *uplo, integer *n, integer * nrhs, doublereal *a, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dpocon_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dpoequ_(integer *n, doublereal *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dpoequb_(integer *n, doublereal *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dporfs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_dporfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer * ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dposv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dposvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal * berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dposvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal * berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dpotf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dpotri_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info); /* Subroutine */ int _starpu_dpotrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dppcon_(char *uplo, integer *n, doublereal *ap, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dppequ_(char *uplo, integer *n, doublereal *ap, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_dpprfs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *afp, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dppsv_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dppsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *ap, doublereal *afp, char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_dpptrf_(char *uplo, integer *n, doublereal *ap, integer * info); /* Subroutine */ int _starpu_dpptri_(char *uplo, integer *n, doublereal *ap, integer * info); /* Subroutine */ int _starpu_dpptrs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dpstf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info); /* Subroutine */ int _starpu_dpstrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info); /* Subroutine */ int _starpu_dptcon_(integer *n, doublereal *d__, doublereal *e, doublereal *anorm, doublereal *rcond, doublereal *work, integer *info); /* Subroutine */ int _starpu_dpteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dptrfs_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *info); /* Subroutine */ int _starpu_dptsv_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dptsvx_(char *fact, integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * info); /* Subroutine */ int _starpu_dpttrf_(integer *n, doublereal *d__, doublereal *e, integer *info); /* Subroutine */ int _starpu_dpttrs_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dptts2_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_drscl_(integer *n, doublereal *sa, doublereal *sx, integer *incx); /* Subroutine */ int _starpu_dsbev_(char *jobz, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsbevd_(char *jobz, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsbevx_(char *jobz, char *range, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *q, integer * ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsbgst_(char *vect, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * ldbb, doublereal *x, integer *ldx, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsbgv_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal * bb, integer *ldbb, doublereal *q, integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsbtrd_(char *vect, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *d__, doublereal *e, doublereal *q, integer *ldq, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsfrk_(char *transr, char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, doublereal *c__); /* Subroutine */ int _starpu__starpu_dsgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *work, real *swork, integer *iter, integer *info); /* Subroutine */ int _starpu_dspcon_(char *uplo, integer *n, doublereal *ap, integer * ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dspev_(char *jobz, char *uplo, integer *n, doublereal * ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dspevd_(char *jobz, char *uplo, integer *n, doublereal * ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dspevx_(char *jobz, char *range, char *uplo, integer *n, doublereal *ap, doublereal *vl, doublereal *vu, integer *il, integer * iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dspgst_(integer *itype, char *uplo, integer *n, doublereal *ap, doublereal *bp, integer *info); /* Subroutine */ int _starpu_dspgv_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dspgvd_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dspgvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublereal *ap, doublereal *bp, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu__starpu_dsposv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *work, real *swork, integer *iter, integer *info); /* Subroutine */ int _starpu_dsprfs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dspsv_(char *uplo, integer *n, integer *nrhs, doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dspsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsptrd_(char *uplo, integer *n, doublereal *ap, doublereal *d__, doublereal *e, doublereal *tau, integer *info); /* Subroutine */ int _starpu_dsptrf_(char *uplo, integer *n, doublereal *ap, integer * ipiv, integer *info); /* Subroutine */ int _starpu_dsptri_(char *uplo, integer *n, doublereal *ap, integer * ipiv, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsptrs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dstebz_(char *range, char *order, integer *n, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, doublereal *d__, doublereal *e, integer *m, integer *nsplit, doublereal *w, integer *iblock, integer *isplit, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dstedc_(char *compz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dstegr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dstein_(integer *n, doublereal *d__, doublereal *e, integer *m, doublereal *w, integer *iblock, integer *isplit, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dstemr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsterf_(integer *n, doublereal *d__, doublereal *e, integer *info); /* Subroutine */ int _starpu_dstev_(char *jobz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_dstevd_(char *jobz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dstevr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dstevx_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsycon_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsyequb_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *s, doublereal *scond, doublereal *amax, doublereal * work, integer *info); /* Subroutine */ int _starpu_dsyev_(char *jobz, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *w, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsyevd_(char *jobz, char *uplo, integer *n, doublereal * a, integer *lda, doublereal *w, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsyevr_(char *jobz, char *range, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsyevx_(char *jobz, char *range, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsygs2_(integer *itype, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dsygst_(integer *itype, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dsygv_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *w, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsygvd_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *w, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dsygvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_dsyrfs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsyrfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublereal * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsysv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsysvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer * ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsysvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, char *equed, doublereal *s, doublereal *b, integer * ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal * rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal * err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dsytd2_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info); /* Subroutine */ int _starpu_dsytf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_dsytrd_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsytrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dsytri_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, doublereal *work, integer *info); /* Subroutine */ int _starpu_dsytrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_dtbcon_(char *norm, char *uplo, char *diag, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtbtrs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_dtfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, doublereal *b, integer *ldb); /* Subroutine */ int _starpu_dtftri_(char *transr, char *uplo, char *diag, integer *n, doublereal *a, integer *info); /* Subroutine */ int _starpu_dtfttp_(char *transr, char *uplo, integer *n, doublereal *arf, doublereal *ap, integer *info); /* Subroutine */ int _starpu_dtfttr_(char *transr, char *uplo, integer *n, doublereal *arf, doublereal *a, integer *lda, integer *info); /* Subroutine */ int _starpu_dtgevc_(char *side, char *howmny, logical *select, integer *n, doublereal *s, integer *lds, doublereal *p, integer *ldp, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, doublereal *work, integer *info); /* Subroutine */ int _starpu_dtgex2_(logical *wantq, logical *wantz, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * q, integer *ldq, doublereal *z__, integer *ldz, integer *j1, integer * n1, integer *n2, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dtgexc_(logical *wantq, logical *wantz, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * q, integer *ldq, doublereal *z__, integer *ldz, integer *ifst, integer *ilst, doublereal *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_dtgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, doublereal *a, integer *lda, doublereal * b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, integer *m, doublereal *pl, doublereal *pr, doublereal *dif, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_dtgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *tola, doublereal *tolb, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer * ldq, doublereal *work, integer *ncycle, integer *info); /* Subroutine */ int _starpu_dtgsna_(char *job, char *howmny, logical *select, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, doublereal * work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtgsy2_(char *trans, integer *ijob, integer *m, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * scale, doublereal *rdsum, doublereal *rdscal, integer *iwork, integer *pq, integer *info); /* Subroutine */ int _starpu_dtgsyl_(char *trans, integer *ijob, integer *m, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * scale, doublereal *dif, doublereal *work, integer *lwork, integer * iwork, integer *info); /* Subroutine */ int _starpu_dtpcon_(char *norm, char *uplo, char *diag, integer *n, doublereal *ap, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtptri_(char *uplo, char *diag, integer *n, doublereal * ap, integer *info); /* Subroutine */ int _starpu_dtptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_dtpttf_(char *transr, char *uplo, integer *n, doublereal *ap, doublereal *arf, integer *info); /* Subroutine */ int _starpu_dtpttr_(char *uplo, integer *n, doublereal *ap, doublereal *a, integer *lda, integer *info); /* Subroutine */ int _starpu_dtrcon_(char *norm, char *uplo, char *diag, integer *n, doublereal *a, integer *lda, doublereal *rcond, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtrevc_(char *side, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, doublereal *work, integer *info); /* Subroutine */ int _starpu_dtrexc_(char *compq, integer *n, doublereal *t, integer * ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst, doublereal *work, integer *info); /* Subroutine */ int _starpu_dtrrfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_dtrsen_(char *job, char *compq, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, doublereal *wr, doublereal *wi, integer *m, doublereal *s, doublereal *sep, doublereal *work, integer *lwork, integer *iwork, integer * liwork, integer *info); /* Subroutine */ int _starpu_dtrsna_(char *job, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *sep, integer *mm, integer *m, doublereal *work, integer *ldwork, integer * iwork, integer *info); /* Subroutine */ int _starpu_dtrsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, doublereal *a, integer *lda, doublereal *b, integer * ldb, doublereal *c__, integer *ldc, doublereal *scale, integer *info); /* Subroutine */ int _starpu_dtrti2_(char *uplo, char *diag, integer *n, doublereal * a, integer *lda, integer *info); /* Subroutine */ int _starpu_dtrtri_(char *uplo, char *diag, integer *n, doublereal * a, integer *lda, integer *info); /* Subroutine */ int _starpu_dtrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_dtrttf_(char *transr, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *arf, integer *info); /* Subroutine */ int _starpu_dtrttp_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *ap, integer *info); /* Subroutine */ int _starpu_dtzrqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, integer *info); /* Subroutine */ int _starpu_dtzrzf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info); doublereal _starpu_dzsum1_(integer *n, doublecomplex *cx, integer *incx); integer _starpu_icmax1_(integer *n, complex *cx, integer *incx); integer _starpu_ieeeck_(integer *ispec, real *zero, real *one); integer _starpu_ilaclc_(integer *m, integer *n, complex *a, integer *lda); integer _starpu_ilaclr_(integer *m, integer *n, complex *a, integer *lda); integer _starpu_iladiag_(char *diag); integer _starpu_iladlc_(integer *m, integer *n, doublereal *a, integer *lda); integer _starpu_iladlr_(integer *m, integer *n, doublereal *a, integer *lda); integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, integer *n2, integer *n3, integer *n4); integer _starpu_ilaprec_(char *prec); integer _starpu_ilaslc_(integer *m, integer *n, real *a, integer *lda); integer _starpu_ilaslr_(integer *m, integer *n, real *a, integer *lda); integer _starpu_ilatrans_(char *trans); integer _starpu_ilauplo_(char *uplo); /* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, integer *vers_patch__); integer _starpu_ilazlc_(integer *m, integer *n, doublecomplex *a, integer *lda); integer _starpu_ilazlr_(integer *m, integer *n, doublecomplex *a, integer *lda); integer _starpu_iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer *ilo, integer *ihi, integer *lwork); integer _starpu_izmax1_(integer *n, doublecomplex *cx, integer *incx); logical _starpu_lsamen_(integer *n, char *ca, char *cb); integer _starpu_smaxloc_(real *a, integer *dimm); /* Subroutine */ int _starpu_sbdsdc_(char *uplo, char *compq, integer *n, real *d__, real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q, integer *iq, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real * u, integer *ldu, real *c__, integer *ldc, real *work, integer *info); doublereal _starpu_scsum1_(integer *n, complex *cx, integer *incx); /* Subroutine */ int _starpu_sdisna_(char *job, integer *m, integer *n, real *d__, real *sep, integer *info); /* Subroutine */ int _starpu_sgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, real *ab, integer *ldab, real *d__, real * e, real *q, integer *ldq, real *pt, integer *ldpt, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sgbcon_(char *norm, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, integer *ipiv, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbequ_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real * colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_sgbequb_(integer *m, integer *n, integer *kl, integer * ku, real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_sgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real * ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbrfsx_(char *trans, char *equed, integer *n, integer * kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, integer * n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * nparams, real *params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, real *ab, integer *ldab, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbsvxx_(char *fact, char *trans, integer *n, integer * kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real * rpvgrw, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgbtf2_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgbtrf_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, real *ab, integer *ldab, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_sgebal_(char *job, integer *n, real *a, integer *lda, integer *ilo, integer *ihi, real *scale, integer *info); /* Subroutine */ int _starpu_sgebd2_(integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer *info); /* Subroutine */ int _starpu_sgebrd_(integer *m, integer *n, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_sgecon_(char *norm, integer *n, real *a, integer *lda, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgeequ_(integer *m, integer *n, real *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_sgeequb_(integer *m, integer *n, real *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer *info); /* Subroutine */ int _starpu_sgees_(char *jobvs, char *sort, L_fp select, integer *n, real *a, integer *lda, integer *sdim, real *wr, real *wi, real *vs, integer *ldvs, real *work, integer *lwork, logical *bwork, integer * info); /* Subroutine */ int _starpu_sgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, real *a, integer *lda, integer *sdim, real *wr, real *wi, real *vs, integer *ldvs, real *rconde, real *rcondv, real * work, integer *lwork, integer *iwork, integer *liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_sgeev_(char *jobvl, char *jobvr, integer *n, real *a, integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, real *a, integer *lda, real *wr, real *wi, real * vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer * ihi, real *scale, real *abnrm, real *rconde, real *rcondv, real *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgegs_(char *jobvsl, char *jobvsr, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgegv_(char *jobvl, char *jobvr, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgejsv_(char *joba, char *jobu, char *jobv, char *jobr, char *jobt, char *jobp, integer *m, integer *n, real *a, integer *lda, real *sva, real *u, integer *ldu, real *v, integer *ldv, real *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgelq2_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgelqf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgels_(char *trans, integer *m, integer *n, integer * nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgelsd_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * rank, real *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgelss_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * rank, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgelsx_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, integer *rank, real *work, integer *info); /* Subroutine */ int _starpu_sgelsy_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, integer *rank, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgeql2_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgeqlf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgeqp3_(integer *m, integer *n, real *a, integer *lda, integer *jpvt, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgeqpf_(integer *m, integer *n, real *a, integer *lda, integer *jpvt, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgeqr2_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgeqrf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgerfs_(char *trans, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgerfsx_(char *trans, char *equed, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgerq2_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sgerqf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgesc2_(integer *n, real *a, integer *lda, real *rhs, integer *ipiv, integer *jpiv, real *scale); /* Subroutine */ int _starpu_sgesdd_(char *jobz, integer *m, integer *n, real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, real *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgesv_(integer *n, integer *nrhs, real *a, integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgesvd_(char *jobu, char *jobvt, integer *m, integer *n, real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgesvj_(char *joba, char *jobu, char *jobv, integer *m, integer *n, real *a, integer *lda, real *sva, integer *mv, real *v, integer *ldv, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgesvx_(char *fact, char *trans, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgesvxx_(char *fact, char *trans, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * nparams, real *params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgetc2_(integer *n, real *a, integer *lda, integer *ipiv, integer *jpiv, integer *info); /* Subroutine */ int _starpu_sgetf2_(integer *m, integer *n, real *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgetrf_(integer *m, integer *n, real *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgetri_(integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgetrs_(char *trans, integer *n, integer *nrhs, real *a, integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sggbak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, real *lscale, real *rscale, integer *m, real *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_sggbal_(char *job, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, real *rscale, real *work, integer *info); /* Subroutine */ int _starpu_sgges_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *work, integer *lwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_sggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *rconde, real *rcondv, real *work, integer *lwork, integer *iwork, integer * liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_sggev_(char *jobvl, char *jobvr, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer *ihi, real *lscale, real *rscale, real *abnrm, real *bbnrm, real *rconde, real *rcondv, real *work, integer *lwork, integer *iwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_sggglm_(integer *n, integer *m, integer *p, real *a, integer *lda, real *b, integer *ldb, real *d__, real *x, real *y, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_sgglse_(integer *m, integer *n, integer *p, real *a, integer *lda, real *b, integer *ldb, real *c__, real *d__, real *x, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sggqrf_(integer *n, integer *m, integer *p, real *a, integer *lda, real *taua, real *b, integer *ldb, real *taub, real * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sggrqf_(integer *m, integer *p, integer *n, real *a, integer *lda, real *taua, real *b, integer *ldb, real *taub, real * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, real *a, integer *lda, real *b, integer *ldb, real *alpha, real *beta, real *u, integer * ldu, real *v, integer *ldv, real *q, integer *ldq, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, real *a, integer *lda, real *b, integer *ldb, real *tola, real *tolb, integer *k, integer *l, real *u, integer *ldu, real *v, integer *ldv, real *q, integer *ldq, integer *iwork, real * tau, real *work, integer *info); /* Subroutine */ int _starpu_sgsvj0_(char *jobv, integer *m, integer *n, real *a, integer *lda, real *d__, real *sva, integer *mv, real *v, integer * ldv, real *eps, real *sfmin, real *tol, integer *nsweep, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgsvj1_(char *jobv, integer *m, integer *n, integer *n1, real *a, integer *lda, real *d__, real *sva, integer *mv, real *v, integer *ldv, real *eps, real *sfmin, real *tol, integer *nsweep, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sgtcon_(char *norm, integer *n, real *dl, real *d__, real *du, real *du2, integer *ipiv, real *anorm, real *rcond, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgtrfs_(char *trans, integer *n, integer *nrhs, real *dl, real *d__, real *du, real *dlf, real *df, real *duf, real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real * ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgtsv_(integer *n, integer *nrhs, real *dl, real *d__, real *du, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, real *dl, real *d__, real *du, real *dlf, real *df, real *duf, real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer * ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sgttrf_(integer *n, real *dl, real *d__, real *du, real * du2, integer *ipiv, integer *info); /* Subroutine */ int _starpu_sgttrs_(char *trans, integer *n, integer *nrhs, real *dl, real *d__, real *du, real *du2, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sgtts2_(integer *itrans, integer *n, integer *nrhs, real *dl, real *d__, real *du, real *du2, integer *ipiv, real *b, integer * ldb); /* Subroutine */ int _starpu_shgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *t, integer *ldt, real *alphar, real *alphai, real *beta, real *q, integer *ldq, real *z__, integer *ldz, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_shsein_(char *side, char *eigsrc, char *initv, logical * select, integer *n, real *h__, integer *ldh, real *wr, real *wi, real *vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, real *work, integer *ifaill, integer *ifailr, integer *info); /* Subroutine */ int _starpu_shseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, integer *ldz, real *work, integer *lwork, integer *info); logical _starpu_sisnan_(real *sin__); /* Subroutine */ int _starpu_sla_gbamv__(integer *trans, integer *m, integer *n, integer *kl, integer *ku, real *alpha, real *ab, integer *ldab, real * x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_sla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, integer *cmode, real *c__, integer *info, real *work, integer *iwork, ftnlen trans_len); /* Subroutine */ int _starpu_sla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, logical *colequ, real *c__, real *b, integer *ldb, real *y, integer * ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real * errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical * ignore_cwise__, integer *info); doublereal _starpu_sla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * ncols, real *ab, integer *ldab, real *afb, integer *ldafb); /* Subroutine */ int _starpu_sla_geamv__(integer *trans, integer *m, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_sla_gercond__(char *trans, integer *n, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer *info, real *work, integer *iwork, ftnlen trans_len); /* Subroutine */ int _starpu_sla_gerfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *nrhs, real *a, integer *lda, real * af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, real *b, integer *ldb, real *y, integer *ldy, real *berr_out__, integer * n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info); /* Subroutine */ int _starpu_sla_lin_berr__(integer *n, integer *nz, integer *nrhs, real *res, real *ayb, real *berr); doublereal _starpu_sla_porcond__(char *uplo, integer *n, real *a, integer *lda, real * af, integer *ldaf, integer *cmode, real *c__, integer *info, real * work, integer *iwork, ftnlen uplo_len); /* Subroutine */ int _starpu_sla_porfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer * ldaf, logical *colequ, real *c__, real *b, integer *ldb, real *y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real *dy, real *y_tail__, real * rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical * ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_sla_porpvgrw__(char *uplo, integer *ncols, real *a, integer *lda, real *af, integer *ldaf, real *work, ftnlen uplo_len); doublereal _starpu_sla_rpvgrw__(integer *n, integer *ncols, real *a, integer *lda, real *af, integer *ldaf); /* Subroutine */ int _starpu_sla_syamv__(integer *uplo, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer *incy); doublereal _starpu_sla_syrcond__(char *uplo, integer *n, real *a, integer *lda, real * af, integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer * info, real *work, integer *iwork, ftnlen uplo_len); /* Subroutine */ int _starpu_sla_syrfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer * ldaf, integer *ipiv, logical *colequ, real *c__, real *b, integer * ldb, real *y, integer *ldy, real *berr_out__, integer *n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real *dy, real * y_tail__, real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_sla_syrpvgrw__(char *uplo, integer *n, integer *info, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *work, ftnlen uplo_len); /* Subroutine */ int _starpu_sla_wwaddw__(integer *n, real *x, real *y, real *w); /* Subroutine */ int _starpu_slabad_(real *small, real *large); /* Subroutine */ int _starpu_slabrd_(integer *m, integer *n, integer *nb, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, integer *ldx, real *y, integer *ldy); /* Subroutine */ int _starpu_slacn2_(integer *n, real *v, real *x, integer *isgn, real *est, integer *kase, integer *isave); /* Subroutine */ int _starpu_slacon_(integer *n, real *v, real *x, integer *isgn, real *est, integer *kase); /* Subroutine */ int _starpu_slacpy_(char *uplo, integer *m, integer *n, real *a, integer *lda, real *b, integer *ldb); /* Subroutine */ int _starpu_sladiv_(real *a, real *b, real *c__, real *d__, real *p, real *q); /* Subroutine */ int _starpu_slae2_(real *a, real *b, real *c__, real *rt1, real *rt2); /* Subroutine */ int _starpu_slaebz_(integer *ijob, integer *nitmax, integer *n, integer *mmax, integer *minp, integer *nbmin, real *abstol, real * reltol, real *pivmin, real *d__, real *e, real *e2, integer *nval, real *ab, real *c__, integer *mout, integer *nab, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slaed0_(integer *icompq, integer *qsiz, integer *n, real *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slaed1_(integer *n, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_slaed2_(integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, integer *indxq, real *rho, real *z__, real * dlamda, real *w, real *q2, integer *indx, integer *indxc, integer * indxp, integer *coltyp, integer *info); /* Subroutine */ int _starpu_slaed3_(integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer * indx, integer *ctot, real *w, real *s, integer *info); /* Subroutine */ int _starpu_slaed4_(integer *n, integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam, integer *info); /* Subroutine */ int _starpu_slaed5_(integer *i__, real *d__, real *z__, real *delta, real *rho, real *dlam); /* Subroutine */ int _starpu_slaed6_(integer *kniter, logical *orgati, real *rho, real *d__, real *z__, real *finit, real *tau, integer *info); /* Subroutine */ int _starpu_slaed7_(integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real * qstore, integer *qptr, integer *prmptr, integer *perm, integer * givptr, integer *givcol, real *givnum, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slaed8_(integer *icompq, integer *k, integer *n, integer *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, real *w, integer *perm, integer *givptr, integer *givcol, real * givnum, integer *indxp, integer *indx, integer *info); /* Subroutine */ int _starpu_slaed9_(integer *k, integer *kstart, integer *kstop, integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *w, real *s, integer *lds, integer *info); /* Subroutine */ int _starpu_slaeda_(integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *q, integer *qptr, real *z__, real *ztemp, integer *info); /* Subroutine */ int _starpu_slaein_(logical *rightv, logical *noinit, integer *n, real *h__, integer *ldh, real *wr, real *wi, real *vr, real *vi, real *b, integer *ldb, real *work, real *eps3, real *smlnum, real *bignum, integer *info); /* Subroutine */ int _starpu_slaev2_(real *a, real *b, real *c__, real *rt1, real * rt2, real *cs1, real *sn1); /* Subroutine */ int _starpu_slaexc_(logical *wantq, integer *n, real *t, integer * ldt, real *q, integer *ldq, integer *j1, integer *n1, integer *n2, real *work, integer *info); /* Subroutine */ int _starpu_slag2_(real *a, integer *lda, real *b, integer *ldb, real *safmin, real *scale1, real *scale2, real *wr1, real *wr2, real * wi); /* Subroutine */ int _starpu_slag2d_(integer *m, integer *n, real *sa, integer *ldsa, doublereal *a, integer *lda, integer *info); /* Subroutine */ int _starpu_slags2_(logical *upper, real *a1, real *a2, real *a3, real *b1, real *b2, real *b3, real *csu, real *snu, real *csv, real * snv, real *csq, real *snq); /* Subroutine */ int _starpu_slagtf_(integer *n, real *a, real *lambda, real *b, real *c__, real *tol, real *d__, integer *in, integer *info); /* Subroutine */ int _starpu_slagtm_(char *trans, integer *n, integer *nrhs, real * alpha, real *dl, real *d__, real *du, real *x, integer *ldx, real * beta, real *b, integer *ldb); /* Subroutine */ int _starpu_slagts_(integer *job, integer *n, real *a, real *b, real *c__, real *d__, integer *in, real *y, real *tol, integer *info); /* Subroutine */ int _starpu_slagv2_(real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *csl, real *snl, real * csr, real *snr); /* Subroutine */ int _starpu_slahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer * info); /* Subroutine */ int _starpu_slahr2_(integer *n, integer *k, integer *nb, real *a, integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy); /* Subroutine */ int _starpu_slahrd_(integer *n, integer *k, integer *nb, real *a, integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy); /* Subroutine */ int _starpu_slaic1_(integer *job, integer *j, real *x, real *sest, real *w, real *gamma, real *sestpr, real *s, real *c__); logical _starpu_slaisnan_(real *sin1, real *sin2); /* Subroutine */ int _starpu_slaln2_(logical *ltrans, integer *na, integer *nw, real * smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b, integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale, real *xnorm, integer *info); /* Subroutine */ int _starpu_slals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * work, integer *info); /* Subroutine */ int _starpu_slalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, real * u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real * z__, real *poles, integer *givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum, real *c__, real *s, real *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_slalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, real *d__, real *e, real *b, integer *ldb, real *rcond, integer *rank, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slamrg_(integer *n1, integer *n2, real *a, integer * strd1, integer *strd2, integer *index); integer _starpu_slaneg_(integer *n, real *d__, real *lld, real *sigma, real *pivmin, integer *r__); doublereal _starpu_slangb_(char *norm, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, real *work); doublereal _starpu_slange_(char *norm, integer *m, integer *n, real *a, integer *lda, real *work); doublereal _starpu_slangt_(char *norm, integer *n, real *dl, real *d__, real *du); doublereal _starpu_slanhs_(char *norm, integer *n, real *a, integer *lda, real *work); doublereal _starpu_slansb_(char *norm, char *uplo, integer *n, integer *k, real *ab, integer *ldab, real *work); doublereal _starpu_slansf_(char *norm, char *transr, char *uplo, integer *n, real *a, real *work); doublereal _starpu_slansp_(char *norm, char *uplo, integer *n, real *ap, real *work); doublereal _starpu_slanst_(char *norm, integer *n, real *d__, real *e); doublereal _starpu_slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda, real *work); doublereal _starpu_slantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, real *ab, integer *ldab, real *work); doublereal _starpu_slantp_(char *norm, char *uplo, char *diag, integer *n, real *ap, real *work); doublereal _starpu_slantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, real *a, integer *lda, real *work); /* Subroutine */ int _starpu_slanv2_(real *a, real *b, real *c__, real *d__, real * rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn); /* Subroutine */ int _starpu_slapll_(integer *n, real *x, integer *incx, real *y, integer *incy, real *ssmin); /* Subroutine */ int _starpu_slapmt_(logical *forwrd, integer *m, integer *n, real *x, integer *ldx, integer *k); doublereal _starpu_slapy2_(real *x, real *y); doublereal _starpu_slapy3_(real *x, real *y, real *z__); /* Subroutine */ int _starpu_slaqgb_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, real *r__, real *c__, real *rowcnd, real * colcnd, real *amax, char *equed); /* Subroutine */ int _starpu_slaqge_(integer *m, integer *n, real *a, integer *lda, real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, char * equed); /* Subroutine */ int _starpu_slaqp2_(integer *m, integer *n, integer *offset, real *a, integer *lda, integer *jpvt, real *tau, real *vn1, real *vn2, real * work); /* Subroutine */ int _starpu_slaqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, real *a, integer *lda, integer *jpvt, real *tau, real *vn1, real *vn2, real *auxv, real *f, integer *ldf); /* Subroutine */ int _starpu_slaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_slaqr1_(integer *n, real *h__, integer *ldh, real *sr1, real *si1, real *sr2, real *si2, real *v); /* Subroutine */ int _starpu_slaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * work, integer *lwork); /* Subroutine */ int _starpu_slaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns, integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh, real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real * work, integer *lwork); /* Subroutine */ int _starpu_slaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_slaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, integer *kbot, integer *nshfts, real *sr, real *si, real *h__, integer *ldh, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *v, integer *ldv, real *u, integer *ldu, integer *nv, real *wv, integer *ldwv, integer *nh, real *wh, integer * ldwh); /* Subroutine */ int _starpu_slaqsb_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_slaqsp_(char *uplo, integer *n, real *ap, real *s, real * scond, real *amax, char *equed); /* Subroutine */ int _starpu_slaqsy_(char *uplo, integer *n, real *a, integer *lda, real *s, real *scond, real *amax, char *equed); /* Subroutine */ int _starpu_slaqtr_(logical *ltran, logical *lreal, integer *n, real *t, integer *ldt, real *b, real *w, real *scale, real *x, real *work, integer *info); /* Subroutine */ int _starpu_slar1v_(integer *n, integer *b1, integer *bn, real * lambda, real *d__, real *l, real *ld, real *lld, real *pivmin, real * gaptol, real *z__, logical *wantnc, integer *negcnt, real *ztz, real * mingma, integer *r__, integer *isuppz, real *nrminv, real *resid, real *rqcorr, real *work); /* Subroutine */ int _starpu_slar2v_(integer *n, real *x, real *y, real *z__, integer *incx, real *c__, real *s, integer *incc); /* Subroutine */ int _starpu_slarf_(char *side, integer *m, integer *n, real *v, integer *incv, real *tau, real *c__, integer *ldc, real *work); /* Subroutine */ int _starpu_slarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, real *v, integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real *work, integer * ldwork); /* Subroutine */ int _starpu_slarfg_(integer *n, real *alpha, real *x, integer *incx, real *tau); /* Subroutine */ int _starpu_slarfp_(integer *n, real *alpha, real *x, integer *incx, real *tau); /* Subroutine */ int _starpu_slarft_(char *direct, char *storev, integer *n, integer * k, real *v, integer *ldv, real *tau, real *t, integer *ldt); /* Subroutine */ int _starpu_slarfx_(char *side, integer *m, integer *n, real *v, real *tau, real *c__, integer *ldc, real *work); /* Subroutine */ int _starpu_slargv_(integer *n, real *x, integer *incx, real *y, integer *incy, real *c__, integer *incc); /* Subroutine */ int _starpu_slarnv_(integer *idist, integer *iseed, integer *n, real *x); /* Subroutine */ int _starpu_slarra_(integer *n, real *d__, real *e, real *e2, real * spltol, real *tnrm, integer *nsplit, integer *isplit, integer *info); /* Subroutine */ int _starpu_slarrb_(integer *n, real *d__, real *lld, integer * ifirst, integer *ilast, real *rtol1, real *rtol2, integer *offset, real *w, real *wgap, real *werr, real *work, integer *iwork, real * pivmin, real *spdiam, integer *twist, integer *info); /* Subroutine */ int _starpu_slarrc_(char *jobt, integer *n, real *vl, real *vu, real *d__, real *e, real *pivmin, integer *eigcnt, integer *lcnt, integer * rcnt, integer *info); /* Subroutine */ int _starpu_slarrd_(char *range, char *order, integer *n, real *vl, real *vu, integer *il, integer *iu, real *gers, real *reltol, real * d__, real *e, real *e2, real *pivmin, integer *nsplit, integer * isplit, integer *m, real *w, real *werr, real *wl, real *wu, integer * iblock, integer *indexw, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slarre_(char *range, integer *n, real *vl, real *vu, integer *il, integer *iu, real *d__, real *e, real *e2, real *rtol1, real *rtol2, real *spltol, integer *nsplit, integer *isplit, integer * m, real *w, real *werr, real *wgap, integer *iblock, integer *indexw, real *gers, real *pivmin, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slarrf_(integer *n, real *d__, real *l, real *ld, integer *clstrt, integer *clend, real *w, real *wgap, real *werr, real *spdiam, real *clgapl, real *clgapr, real *pivmin, real *sigma, real *dplus, real *lplus, real *work, integer *info); /* Subroutine */ int _starpu_slarrj_(integer *n, real *d__, real *e2, integer *ifirst, integer *ilast, real *rtol, integer *offset, real *w, real *werr, real *work, integer *iwork, real *pivmin, real *spdiam, integer *info); /* Subroutine */ int _starpu_slarrk_(integer *n, integer *iw, real *gl, real *gu, real *d__, real *e2, real *pivmin, real *reltol, real *w, real *werr, integer *info); /* Subroutine */ int _starpu_slarrr_(integer *n, real *d__, real *e, integer *info); /* Subroutine */ int _starpu_slarrv_(integer *n, real *vl, real *vu, real *d__, real * l, real *pivmin, integer *isplit, integer *m, integer *dol, integer * dou, real *minrgp, real *rtol1, real *rtol2, real *w, real *werr, real *wgap, integer *iblock, integer *indexw, real *gers, real *z__, integer *ldz, integer *isuppz, real *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_slarscl2_(integer *m, integer *n, real *d__, real *x, integer *ldx); /* Subroutine */ int _starpu_slartg_(real *f, real *g, real *cs, real *sn, real *r__); /* Subroutine */ int _starpu_slartv_(integer *n, real *x, integer *incx, real *y, integer *incy, real *c__, real *s, integer *incc); /* Subroutine */ int _starpu_slaruv_(integer *iseed, integer *n, real *x); /* Subroutine */ int _starpu_slarz_(char *side, integer *m, integer *n, integer *l, real *v, integer *incv, real *tau, real *c__, integer *ldc, real * work); /* Subroutine */ int _starpu_slarzb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, real *v, integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real * work, integer *ldwork); /* Subroutine */ int _starpu_slarzt_(char *direct, char *storev, integer *n, integer * k, real *v, integer *ldv, real *tau, real *t, integer *ldt); /* Subroutine */ int _starpu_slas2_(real *f, real *g, real *h__, real *ssmin, real * ssmax); /* Subroutine */ int _starpu_slascl_(char *type__, integer *kl, integer *ku, real * cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_slascl2_(integer *m, integer *n, real *d__, real *x, integer *ldx); /* Subroutine */ int _starpu_slasd0_(integer *n, integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, integer *iwork, real *work, integer *info); /* Subroutine */ int _starpu_slasd1_(integer *nl, integer *nr, integer *sqre, real * d__, real *alpha, real *beta, real *u, integer *ldu, real *vt, integer *ldvt, integer *idxq, integer *iwork, real *work, integer * info); /* Subroutine */ int _starpu_slasd2_(integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *idxq, integer *coltyp, integer *info); /* Subroutine */ int _starpu_slasd3_(integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer * ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer * info); /* Subroutine */ int _starpu_slasd4_(integer *n, integer *i__, real *d__, real *z__, real *delta, real *rho, real *sigma, real *work, integer *info); /* Subroutine */ int _starpu_slasd5_(integer *i__, real *d__, real *z__, real *delta, real *rho, real *dsigma, real *work); /* Subroutine */ int _starpu_slasd6_(integer *icompq, integer *nl, integer *nr, integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta, integer *idxq, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slasd7_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf, real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, integer * givptr, integer *givcol, integer *ldgcol, real *givnum, integer * ldgnum, real *c__, real *s, integer *info); /* Subroutine */ int _starpu_slasd8_(integer *icompq, integer *k, real *d__, real * z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, real *dsigma, real *work, integer *info); /* Subroutine */ int _starpu_slasda_(integer *icompq, integer *smlsiz, integer *n, integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *z__, real *poles, integer * givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum, real *c__, real *s, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_slasdq_(char *uplo, integer *sqre, integer *n, integer * ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real * work, integer *info); /* Subroutine */ int _starpu_slasdt_(integer *n, integer *lvl, integer *nd, integer * inode, integer *ndiml, integer *ndimr, integer *msub); /* Subroutine */ int _starpu_slaset_(char *uplo, integer *m, integer *n, real *alpha, real *beta, real *a, integer *lda); /* Subroutine */ int _starpu_slasq1_(integer *n, real *d__, real *e, real *work, integer *info); /* Subroutine */ int _starpu_slasq2_(integer *n, real *z__, integer *info); /* Subroutine */ int _starpu_slasq3_(integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee, integer *ttype, real * dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *g, real * tau); /* Subroutine */ int _starpu_slasq4_(integer *i0, integer *n0, real *z__, integer *pp, integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *tau, integer *ttype, real *g); /* Subroutine */ int _starpu_slasq5_(integer *i0, integer *n0, real *z__, integer *pp, real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real * dnm1, real *dnm2, logical *ieee); /* Subroutine */ int _starpu_slasq6_(integer *i0, integer *n0, real *z__, integer *pp, real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real * dnm2); /* Subroutine */ int _starpu_slasr_(char *side, char *pivot, char *direct, integer *m, integer *n, real *c__, real *s, real *a, integer *lda); /* Subroutine */ int _starpu_slasrt_(char *id, integer *n, real *d__, integer *info); /* Subroutine */ int _starpu_slassq_(integer *n, real *x, integer *incx, real *scale, real *sumsq); /* Subroutine */ int _starpu_slasv2_(real *f, real *g, real *h__, real *ssmin, real * ssmax, real *snr, real *csr, real *snl, real *csl); /* Subroutine */ int _starpu_slaswp_(integer *n, real *a, integer *lda, integer *k1, integer *k2, integer *ipiv, integer *incx); /* Subroutine */ int _starpu_slasy2_(logical *ltranl, logical *ltranr, integer *isgn, integer *n1, integer *n2, real *tl, integer *ldtl, real *tr, integer * ldtr, real *b, integer *ldb, real *scale, real *x, integer *ldx, real *xnorm, integer *info); /* Subroutine */ int _starpu_slasyf_(char *uplo, integer *n, integer *nb, integer *kb, real *a, integer *lda, integer *ipiv, real *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_slatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, real *ab, integer *ldab, real *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_slatdf_(integer *ijob, integer *n, real *z__, integer * ldz, real *rhs, real *rdsum, real *rdscal, integer *ipiv, integer * jpiv); /* Subroutine */ int _starpu_slatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, real *ap, real *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_slatrd_(char *uplo, integer *n, integer *nb, real *a, integer *lda, real *e, real *tau, real *w, integer *ldw); /* Subroutine */ int _starpu_slatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, real *a, integer *lda, real *x, real *scale, real *cnorm, integer *info); /* Subroutine */ int _starpu_slatrz_(integer *m, integer *n, integer *l, real *a, integer *lda, real *tau, real *work); /* Subroutine */ int _starpu_slatzm_(char *side, integer *m, integer *n, real *v, integer *incv, real *tau, real *c1, real *c2, integer *ldc, real * work); /* Subroutine */ int _starpu_slauu2_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_slauum_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_sopgtr_(char *uplo, integer *n, real *ap, real *tau, real *q, integer *ldq, real *work, integer *info); /* Subroutine */ int _starpu_sopmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, real *ap, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sorg2l_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sorg2r_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sorgbr_(char *vect, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorghr_(integer *n, integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgl2_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sorglq_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgql_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgqr_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgr2_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info); /* Subroutine */ int _starpu_sorgrq_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorgtr_(char *uplo, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorm2l_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sorm2r_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sormbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real * c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sorml2_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sormlq_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormql_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormqr_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormr2_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sormr3_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *info); /* Subroutine */ int _starpu_sormrq_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_sormtr_(char *side, char *uplo, char *trans, integer *m, integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_spbcon_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spbequ_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_spbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spbstf_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_spbsv_(char *uplo, integer *n, integer *kd, integer * nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_spbsvx_(char *fact, char *uplo, integer *n, integer *kd, integer *nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spbtf2_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_spbtrf_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_spbtrs_(char *uplo, integer *n, integer *kd, integer * nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_spftrf_(char *transr, char *uplo, integer *n, real *a, integer *info); /* Subroutine */ int _starpu_spftri_(char *transr, char *uplo, integer *n, real *a, integer *info); /* Subroutine */ int _starpu_spftrs_(char *transr, char *uplo, integer *n, integer * nrhs, real *a, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_spocon_(char *uplo, integer *n, real *a, integer *lda, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spoequ_(integer *n, real *a, integer *lda, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_spoequb_(integer *n, real *a, integer *lda, real *s, real *scond, real *amax, integer *info); /* Subroutine */ int _starpu_sporfs_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer *ldaf, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sporfsx_(char *uplo, char *equed, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, real *s, real * b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real *params, real *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_sposv_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sposvx_(char *fact, char *uplo, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sposvxx_(char *fact, char *uplo, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spotf2_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_spotrf_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_spotri_(char *uplo, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_spotrs_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sppcon_(char *uplo, integer *n, real *ap, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sppequ_(char *uplo, integer *n, real *ap, real *s, real * scond, real *amax, integer *info); /* Subroutine */ int _starpu_spprfs_(char *uplo, integer *n, integer *nrhs, real *ap, real *afp, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sppsv_(char *uplo, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sppsvx_(char *fact, char *uplo, integer *n, integer * nrhs, real *ap, real *afp, char *equed, real *s, real *b, integer * ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_spptrf_(char *uplo, integer *n, real *ap, integer *info); /* Subroutine */ int _starpu_spptri_(char *uplo, integer *n, real *ap, integer *info); /* Subroutine */ int _starpu_spptrs_(char *uplo, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_spstf2_(char *uplo, integer *n, real *a, integer *lda, integer *piv, integer *rank, real *tol, real *work, integer *info); /* Subroutine */ int _starpu_spstrf_(char *uplo, integer *n, real *a, integer *lda, integer *piv, integer *rank, real *tol, real *work, integer *info); /* Subroutine */ int _starpu_sptcon_(integer *n, real *d__, real *e, real *anorm, real *rcond, real *work, integer *info); /* Subroutine */ int _starpu_spteqr_(char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_sptrfs_(integer *n, integer *nrhs, real *d__, real *e, real *df, real *ef, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *info); /* Subroutine */ int _starpu_sptsv_(integer *n, integer *nrhs, real *d__, real *e, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sptsvx_(char *fact, integer *n, integer *nrhs, real *d__, real *e, real *df, real *ef, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *info); /* Subroutine */ int _starpu_spttrf_(integer *n, real *d__, real *e, integer *info); /* Subroutine */ int _starpu_spttrs_(integer *n, integer *nrhs, real *d__, real *e, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sptts2_(integer *n, integer *nrhs, real *d__, real *e, real *b, integer *ldb); /* Subroutine */ int _starpu_srscl_(integer *n, real *sa, real *sx, integer *incx); /* Subroutine */ int _starpu_ssbev_(char *jobz, char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *w, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_ssbevd_(char *jobz, char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssbevx_(char *jobz, char *range, char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *q, integer *ldq, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real * w, real *z__, integer *ldz, real *work, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_ssbgst_(char *vect, char *uplo, integer *n, integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * x, integer *ldx, real *work, integer *info); /* Subroutine */ int _starpu_ssbgv_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * w, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_ssbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer *ldbb, real * w, real *z__, integer *ldz, real *work, integer *lwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, real *ab, integer *ldab, real *bb, integer * ldbb, real *q, integer *ldq, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssbtrd_(char *vect, char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *d__, real *e, real *q, integer *ldq, real *work, integer *info); /* Subroutine */ int _starpu_ssfrk_(char *transr, char *uplo, char *trans, integer *n, integer *k, real *alpha, real *a, integer *lda, real *beta, real * c__); /* Subroutine */ int _starpu_sspcon_(char *uplo, integer *n, real *ap, integer *ipiv, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sspev_(char *jobz, char *uplo, integer *n, real *ap, real *w, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_sspevd_(char *jobz, char *uplo, integer *n, real *ap, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sspevx_(char *jobz, char *range, char *uplo, integer *n, real *ap, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real *work, integer * iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_sspgst_(integer *itype, char *uplo, integer *n, real *ap, real *bp, integer *info); /* Subroutine */ int _starpu_sspgv_(integer *itype, char *jobz, char *uplo, integer * n, real *ap, real *bp, real *w, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_sspgvd_(integer *itype, char *jobz, char *uplo, integer * n, real *ap, real *bp, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sspgvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, real *ap, real *bp, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer * ldz, real *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssprfs_(char *uplo, integer *n, integer *nrhs, real *ap, real *afp, integer *ipiv, real *b, integer *ldb, real *x, integer * ldx, real *ferr, real *berr, real *work, integer *iwork, integer * info); /* Subroutine */ int _starpu_sspsv_(char *uplo, integer *n, integer *nrhs, real *ap, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sspsvx_(char *fact, char *uplo, integer *n, integer * nrhs, real *ap, real *afp, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_ssptrd_(char *uplo, integer *n, real *ap, real *d__, real *e, real *tau, integer *info); /* Subroutine */ int _starpu_ssptrf_(char *uplo, integer *n, real *ap, integer *ipiv, integer *info); /* Subroutine */ int _starpu_ssptri_(char *uplo, integer *n, real *ap, integer *ipiv, real *work, integer *info); /* Subroutine */ int _starpu_ssptrs_(char *uplo, integer *n, integer *nrhs, real *ap, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_sstebz_(char *range, char *order, integer *n, real *vl, real *vu, integer *il, integer *iu, real *abstol, real *d__, real *e, integer *m, integer *nsplit, real *w, integer *iblock, integer * isplit, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_sstedc_(char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sstegr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, integer *isuppz, real * work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sstein_(integer *n, real *d__, real *e, integer *m, real *w, integer *iblock, integer *isplit, real *z__, integer *ldz, real * work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_sstemr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, integer *m, real *w, real *z__, integer *ldz, integer *nzc, integer *isuppz, logical *tryrac, real *work, integer *lwork, integer *iwork, integer * liwork, integer *info); /* Subroutine */ int _starpu_ssteqr_(char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_ssterf_(integer *n, real *d__, real *e, integer *info); /* Subroutine */ int _starpu_sstev_(char *jobz, integer *n, real *d__, real *e, real * z__, integer *ldz, real *work, integer *info); /* Subroutine */ int _starpu_sstevd_(char *jobz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sstevr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, integer *isuppz, real * work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_sstevx_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real *work, integer * iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssycon_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, real *anorm, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_ssyequb_(char *uplo, integer *n, real *a, integer *lda, real *s, real *scond, real *amax, real *work, integer *info); /* Subroutine */ int _starpu_ssyev_(char *jobz, char *uplo, integer *n, real *a, integer *lda, real *w, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ssyevd_(char *jobz, char *uplo, integer *n, real *a, integer *lda, real *w, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssyevr_(char *jobz, char *range, char *uplo, integer *n, real *a, integer *lda, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, integer * isuppz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssyevx_(char *jobz, char *range, char *uplo, integer *n, real *a, integer *lda, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real * work, integer *lwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssygs2_(integer *itype, char *uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ssygst_(integer *itype, char *uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ssygv_(integer *itype, char *jobz, char *uplo, integer * n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ssygvd_(integer *itype, char *jobz, char *uplo, integer * n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ssygvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, real * vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_ssyrfs_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * work, integer *iwork, integer *info); /* Subroutine */ int _starpu_ssyrfsx_(char *uplo, char *equed, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, real * err_bnds_comp__, integer *nparams, real *params, real *work, integer * iwork, integer *info); /* Subroutine */ int _starpu_ssysv_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, integer *ipiv, real *b, integer *ldb, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ssysvx_(char *fact, char *uplo, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *lwork, integer *iwork, integer * info); /* Subroutine */ int _starpu_ssysvxx_(char *fact, char *uplo, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *n_err_bnds__, real * err_bnds_norm__, real *err_bnds_comp__, integer *nparams, real * params, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_ssytd2_(char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, integer *info); /* Subroutine */ int _starpu_ssytf2_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_ssytrd_(char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, real *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_ssytrf_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ssytri_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *info); /* Subroutine */ int _starpu_ssytrs_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, integer *ipiv, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_stbcon_(char *norm, char *uplo, char *diag, integer *n, integer *kd, real *ab, integer *ldab, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_stbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, real *ab, integer *ldab, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_stbtrs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, real *ab, integer *ldab, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_stfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, real *alpha, real *a, real *b, integer *ldb); /* Subroutine */ int _starpu_stftri_(char *transr, char *uplo, char *diag, integer *n, real *a, integer *info); /* Subroutine */ int _starpu_stfttp_(char *transr, char *uplo, integer *n, real *arf, real *ap, integer *info); /* Subroutine */ int _starpu_stfttr_(char *transr, char *uplo, integer *n, real *arf, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_stgevc_(char *side, char *howmny, logical *select, integer *n, real *s, integer *lds, real *p, integer *ldp, real *vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, real *work, integer *info); /* Subroutine */ int _starpu_stgex2_(logical *wantq, logical *wantz, integer *n, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_stgexc_(logical *wantq, logical *wantz, integer *n, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * z__, integer *ldz, integer *ifst, integer *ilst, real *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_stgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, real *a, integer *lda, real *b, integer * ldb, real *alphar, real *alphai, real *beta, real *q, integer *ldq, real *z__, integer *ldz, integer *m, real *pl, real *pr, real *dif, real *work, integer *lwork, integer *iwork, integer *liwork, integer * info); /* Subroutine */ int _starpu_stgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, real *a, integer *lda, real *b, integer *ldb, real *tola, real *tolb, real *alpha, real * beta, real *u, integer *ldu, real *v, integer *ldv, real *q, integer * ldq, real *work, integer *ncycle, integer *info); /* Subroutine */ int _starpu_stgsna_(char *job, char *howmny, logical *select, integer *n, real *a, integer *lda, real *b, integer *ldb, real *vl, integer *ldvl, real *vr, integer *ldvr, real *s, real *dif, integer * mm, integer *m, real *work, integer *lwork, integer *iwork, integer * info); /* Subroutine */ int _starpu_stgsy2_(char *trans, integer *ijob, integer *m, integer * n, real *a, integer *lda, real *b, integer *ldb, real *c__, integer * ldc, real *d__, integer *ldd, real *e, integer *lde, real *f, integer *ldf, real *scale, real *rdsum, real *rdscal, integer *iwork, integer *pq, integer *info); /* Subroutine */ int _starpu_stgsyl_(char *trans, integer *ijob, integer *m, integer * n, real *a, integer *lda, real *b, integer *ldb, real *c__, integer * ldc, real *d__, integer *ldd, real *e, integer *lde, real *f, integer *ldf, real *scale, real *dif, real *work, integer *lwork, integer * iwork, integer *info); /* Subroutine */ int _starpu_stpcon_(char *norm, char *uplo, char *diag, integer *n, real *ap, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_stprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_stptri_(char *uplo, char *diag, integer *n, real *ap, integer *info); /* Subroutine */ int _starpu_stptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_stpttf_(char *transr, char *uplo, integer *n, real *ap, real *arf, integer *info); /* Subroutine */ int _starpu_stpttr_(char *uplo, integer *n, real *ap, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_strcon_(char *norm, char *uplo, char *diag, integer *n, real *a, integer *lda, real *rcond, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_strevc_(char *side, char *howmny, logical *select, integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, real *work, integer *info); /* Subroutine */ int _starpu_strexc_(char *compq, integer *n, real *t, integer *ldt, real *q, integer *ldq, integer *ifst, integer *ilst, real *work, integer *info); /* Subroutine */ int _starpu_strrfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_strsen_(char *job, char *compq, logical *select, integer *n, real *t, integer *ldt, real *q, integer *ldq, real *wr, real *wi, integer *m, real *s, real *sep, real *work, integer *lwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_strsna_(char *job, char *howmny, logical *select, integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, integer *ldvr, real *s, real *sep, integer *mm, integer *m, real * work, integer *ldwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_strsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, real *a, integer *lda, real *b, integer *ldb, real * c__, integer *ldc, real *scale, integer *info); /* Subroutine */ int _starpu_strti2_(char *uplo, char *diag, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_strtri_(char *uplo, char *diag, integer *n, real *a, integer *lda, integer *info); /* Subroutine */ int _starpu_strtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_strttf_(char *transr, char *uplo, integer *n, real *a, integer *lda, real *arf, integer *info); /* Subroutine */ int _starpu_strttp_(char *uplo, integer *n, real *a, integer *lda, real *ap, integer *info); /* Subroutine */ int _starpu_stzrqf_(integer *m, integer *n, real *a, integer *lda, real *tau, integer *info); /* Subroutine */ int _starpu_stzrzf_(integer *m, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_xerbla_(char *srname, integer *info); /* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * srname_len__, integer *info, ftnlen srname_array_len); /* Subroutine */ int _starpu_zbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, doublereal *d__, doublereal *e, doublecomplex *vt, integer *ldvt, doublecomplex *u, integer *ldu, doublecomplex *c__, integer *ldc, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zcgesv_(integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublecomplex *work, complex *swork, doublereal *rwork, integer *iter, integer *info); /* Subroutine */ int _starpu_zcposv_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublecomplex *work, complex *swork, doublereal *rwork, integer *iter, integer *info); /* Subroutine */ int _starpu_zdrscl_(integer *n, doublereal *sa, doublecomplex *sx, integer *incx); /* Subroutine */ int _starpu_zgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublereal *d__, doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *pt, integer *ldpt, doublecomplex *c__, integer *ldc, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgbcon_(char *norm, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zgbequ_(integer *m, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * info); /* Subroutine */ int _starpu_zgbequb_(integer *m, integer *n, integer *kl, integer * ku, doublecomplex *ab, integer *ldab, doublereal *r__, doublereal * c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex * afb, integer *ldafb, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgbrfsx_(char *trans, char *equed, integer *n, integer * kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, doublecomplex *ab, integer *ldab, integer *ipiv, doublecomplex * b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zgbsvxx_(char *fact, char *trans, integer *n, integer * kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgbtf2_(integer *m, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zgbtrf_(integer *m, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublecomplex *ab, integer *ldab, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *scale, integer *m, doublecomplex *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_zgebal_(char *job, integer *n, doublecomplex *a, integer *lda, integer *ilo, integer *ihi, doublereal *scale, integer *info); /* Subroutine */ int _starpu_zgebd2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, doublecomplex *taup, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgebrd_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, doublecomplex *taup, doublecomplex *work, integer *lwork, integer * info); /* Subroutine */ int _starpu_zgecon_(char *norm, integer *n, doublecomplex *a, integer *lda, doublereal *anorm, doublereal *rcond, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeequ_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zgeequb_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zgees_(char *jobvs, char *sort, L_fp select, integer *n, doublecomplex *a, integer *lda, integer *sdim, doublecomplex *w, doublecomplex *vs, integer *ldvs, doublecomplex *work, integer *lwork, doublereal *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, doublecomplex *a, integer *lda, integer *sdim, doublecomplex *w, doublecomplex *vs, integer *ldvs, doublereal * rconde, doublereal *rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zgeev_(char *jobvl, char *jobvr, integer *n, doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublecomplex *work, integer * lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgegs_(char *jobvsl, char *jobvsr, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer *ldvsr, doublecomplex * work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgegv_(char *jobvl, char *jobvr, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgehd2_(integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zgehrd_(integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgelq2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgelqf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgels_(char *trans, integer *m, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgelsd_(integer *m, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zgelss_(integer *m, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgelsx_(integer *m, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *jpvt, doublereal *rcond, integer *rank, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgelsy_(integer *m, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *jpvt, doublereal *rcond, integer *rank, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeql2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgeqlf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgeqp3_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeqpf_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgeqr2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgeqrf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgerfs_(char *trans, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgerfsx_(char *trans, char *equed, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublereal *r__, doublereal *c__, doublecomplex * b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgerq2_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgerqf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgesc2_(integer *n, doublecomplex *a, integer *lda, doublecomplex *rhs, integer *ipiv, integer *jpiv, doublereal *scale); /* Subroutine */ int _starpu_zgesdd_(char *jobz, integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zgesv_(integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer * info); /* Subroutine */ int _starpu_zgesvd_(char *jobu, char *jobvt, integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgesvx_(char *fact, char *trans, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgesvxx_(char *fact, char *trans, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer * n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgetc2_(integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *jpiv, integer *info); /* Subroutine */ int _starpu_zgetf2_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zgetrf_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zgetri_(integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgetrs_(char *trans, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zggbak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, doublecomplex *v, integer *ldv, integer *info); /* Subroutine */ int _starpu_zggbal_(char *job, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *work, integer * info); /* Subroutine */ int _starpu_zgges_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *sdim, doublecomplex *alpha, doublecomplex * beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer *ldvsr, doublecomplex *work, integer *lwork, doublereal *rwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *sdim, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer *ldvsr, doublereal *rconde, doublereal * rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, integer *liwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zggev_(char *jobvl, char *jobvr, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, logical *bwork, integer *info); /* Subroutine */ int _starpu_zggglm_(integer *n, integer *m, integer *p, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *d__, doublecomplex *x, doublecomplex *y, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_zgglse_(integer *m, integer *n, integer *p, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *c__, doublecomplex *d__, doublecomplex *x, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zggqrf_(integer *n, integer *m, integer *p, doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, integer *ldb, doublecomplex *taub, doublecomplex *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_zggrqf_(integer *m, integer *p, integer *n, doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, integer *ldb, doublecomplex *taub, doublecomplex *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_zggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *alpha, doublereal *beta, doublecomplex *u, integer *ldu, doublecomplex *v, integer *ldv, doublecomplex *q, integer *ldq, doublecomplex *work, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *tola, doublereal *tolb, integer *k, integer *l, doublecomplex *u, integer *ldu, doublecomplex *v, integer *ldv, doublecomplex *q, integer *ldq, integer *iwork, doublereal * rwork, doublecomplex *tau, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgtcon_(char *norm, integer *n, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer * ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zgtrfs_(char *trans, integer *n, integer *nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zgtsv_(integer *n, integer *nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zgttrf_(integer *n, doublecomplex *dl, doublecomplex * d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, integer * info); /* Subroutine */ int _starpu_zgttrs_(char *trans, integer *n, integer *nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zgtts2_(integer *itrans, integer *n, integer *nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zhbev_(char *jobz, char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhbevd_(char *jobz, char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zhbevx_(char *jobz, char *range, char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublecomplex *q, integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer * iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_zhbgst_(char *vect, char *uplo, integer *n, integer *ka, integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, integer *ldbb, doublecomplex *x, integer *ldx, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhbgv_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer * lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zhbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, integer *ldbb, doublecomplex *q, integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal * abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_zhbtrd_(char *vect, char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *d__, doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zhecon_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zheequb_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zheev_(char *jobz, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zheevd_(char *jobz, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zheevr_(char *jobz, char *range, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * w, doublecomplex *z__, integer *ldz, integer *isuppz, doublecomplex * work, integer *lwork, doublereal *rwork, integer *lrwork, integer * iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zheevx_(char *jobz, char *range, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer * lwork, doublereal *rwork, integer *iwork, integer *ifail, integer * info); /* Subroutine */ int _starpu_zhegs2_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhegst_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhegv_(integer *itype, char *jobz, char *uplo, integer * n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhegvd_(integer *itype, char *jobz, char *uplo, integer * n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zhegvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *vl, doublereal *vu, integer *il, integer * iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_zherfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zherfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhesv_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zhesvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhesvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhetd2_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, integer *info); /* Subroutine */ int _starpu_zhetf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zhetrd_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zhetrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zhetri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zhetrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhfrk_(char *transr, char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, doublecomplex *c__); /* Subroutine */ int _starpu_zhgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *t, integer *ldt, doublecomplex *alpha, doublecomplex * beta, doublecomplex *q, integer *ldq, doublecomplex *z__, integer * ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zhpcon_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zhpev_(char *jobz, char *uplo, integer *n, doublecomplex *ap, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhpevd_(char *jobz, char *uplo, integer *n, doublecomplex *ap, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer * lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zhpevx_(char *jobz, char *range, char *uplo, integer *n, doublecomplex *ap, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal * rwork, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_zhpgst_(integer *itype, char *uplo, integer *n, doublecomplex *ap, doublecomplex *bp, integer *info); /* Subroutine */ int _starpu_zhpgv_(integer *itype, char *jobz, char *uplo, integer * n, doublecomplex *ap, doublecomplex *bp, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zhpgvd_(integer *itype, char *jobz, char *uplo, integer * n, doublecomplex *ap, doublecomplex *bp, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal * rwork, integer *lrwork, integer *iwork, integer *liwork, integer * info); /* Subroutine */ int _starpu_zhpgvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublecomplex *ap, doublecomplex *bp, doublereal * vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, integer * ifail, integer *info); /* Subroutine */ int _starpu_zhprfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex * b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zhpsv_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhpsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zhptrd_(char *uplo, integer *n, doublecomplex *ap, doublereal *d__, doublereal *e, doublecomplex *tau, integer *info); /* Subroutine */ int _starpu_zhptrf_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zhptri_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zhptrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zhsein_(char *side, char *eigsrc, char *initv, logical * select, integer *n, doublecomplex *h__, integer *ldh, doublecomplex * w, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *ifaill, integer *ifailr, integer *info); /* Subroutine */ int _starpu_zhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zla_gbamv__(integer *trans, integer *m, integer *n, integer *kl, integer *ku, doublereal *alpha, doublecomplex *ab, integer *ldab, doublecomplex *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_zla_gbrcond_c__(char *trans, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, doublereal *c__, logical *capply, integer *info, doublecomplex *work, doublereal *rwork, ftnlen trans_len); doublereal _starpu_zla_gbrcond_x__(char *trans, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, doublecomplex *x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen trans_len); /* Subroutine */ int _starpu_zla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info); doublereal _starpu_zla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * ncols, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer * ldafb); /* Subroutine */ int _starpu_zla_geamv__(integer *trans, integer *m, integer *n, doublereal *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_zla_gercond_c__(char *trans, integer *n, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal * c__, logical *capply, integer *info, doublecomplex *work, doublereal * rwork, ftnlen trans_len); doublereal _starpu_zla_gercond_x__(char *trans, integer *n, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen trans_len); /* Subroutine */ int _starpu_zla_gerfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *nrhs, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * ignore_cwise__, integer *info); /* Subroutine */ int _starpu_zla_heamv__(integer *uplo, integer *n, doublereal *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_zla_hercond_c__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *c__, logical *capply, integer *info, doublecomplex *work, doublereal * rwork, ftnlen uplo_len); doublereal _starpu_zla_hercond_x__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_herfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_zla_herpvgrw__(char *uplo, integer *n, integer *info, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *work, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_lin_berr__(integer *n, integer *nz, integer *nrhs, doublecomplex *res, doublereal *ayb, doublereal *berr); doublereal _starpu_zla_porcond_c__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, doublereal *c__, logical * capply, integer *info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); doublereal _starpu_zla_porcond_x__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, doublecomplex *x, integer * info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_porfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_zla_porpvgrw__(char *uplo, integer *ncols, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, doublereal *work, ftnlen uplo_len); doublereal _starpu_zla_rpvgrw__(integer *n, integer *ncols, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf); /* Subroutine */ int _starpu_zla_syamv__(integer *uplo, integer *n, doublereal *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublereal *beta, doublereal *y, integer *incy); doublereal _starpu_zla_syrcond_c__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *c__, logical *capply, integer *info, doublecomplex *work, doublereal * rwork, ftnlen uplo_len); doublereal _starpu_zla_syrcond_x__(char *uplo, integer *n, doublecomplex *a, integer * lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex * x, integer *info, doublecomplex *work, doublereal *rwork, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_syrfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublecomplex *b, integer *ldb, doublecomplex *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal * errs_n__, doublereal *errs_c__, doublecomplex *res, doublereal *ayb, doublecomplex *dy, doublecomplex *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * ignore_cwise__, integer *info, ftnlen uplo_len); doublereal _starpu_zla_syrpvgrw__(char *uplo, integer *n, integer *info, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublereal *work, ftnlen uplo_len); /* Subroutine */ int _starpu_zla_wwaddw__(integer *n, doublecomplex *x, doublecomplex *y, doublecomplex *w); /* Subroutine */ int _starpu_zlabrd_(integer *m, integer *n, integer *nb, doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, doublecomplex *taup, doublecomplex *x, integer * ldx, doublecomplex *y, integer *ldy); /* Subroutine */ int _starpu_zlacgv_(integer *n, doublecomplex *x, integer *incx); /* Subroutine */ int _starpu_zlacn2_(integer *n, doublecomplex *v, doublecomplex *x, doublereal *est, integer *kase, integer *isave); /* Subroutine */ int _starpu_zlacon_(integer *n, doublecomplex *v, doublecomplex *x, doublereal *est, integer *kase); /* Subroutine */ int _starpu_zlacp2_(char *uplo, integer *m, integer *n, doublereal * a, integer *lda, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zlacpy_(char *uplo, integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zlacrm_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *b, integer *ldb, doublecomplex *c__, integer *ldc, doublereal *rwork); /* Subroutine */ int _starpu_zlacrt_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy, doublecomplex *c__, doublecomplex * s); /* Double Complex */ VOID _starpu_zladiv_(doublecomplex * ret_val, doublecomplex *x, doublecomplex *y); /* Subroutine */ int _starpu_zlaed0_(integer *qsiz, integer *n, doublereal *d__, doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *qstore, integer *ldqs, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zlaed7_(integer *n, integer *cutpnt, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, doublecomplex *q, integer *ldq, doublereal *rho, integer *indxq, doublereal *qstore, integer *qptr, integer *prmptr, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, doublecomplex * work, doublereal *rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zlaed8_(integer *k, integer *n, integer *qsiz, doublecomplex *q, integer *ldq, doublereal *d__, doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, doublecomplex * q2, integer *ldq2, doublereal *w, integer *indxp, integer *indx, integer *indxq, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, integer *info); /* Subroutine */ int _starpu_zlaein_(logical *rightv, logical *noinit, integer *n, doublecomplex *h__, integer *ldh, doublecomplex *w, doublecomplex *v, doublecomplex *b, integer *ldb, doublereal *rwork, doublereal *eps3, doublereal *smlnum, integer *info); /* Subroutine */ int _starpu_zlaesy_(doublecomplex *a, doublecomplex *b, doublecomplex *c__, doublecomplex *rt1, doublecomplex *rt2, doublecomplex *evscal, doublecomplex *cs1, doublecomplex *sn1); /* Subroutine */ int _starpu_zlaev2_(doublecomplex *a, doublecomplex *b, doublecomplex *c__, doublereal *rt1, doublereal *rt2, doublereal *cs1, doublecomplex *sn1); /* Subroutine */ int _starpu_zlag2c_(integer *m, integer *n, doublecomplex *a, integer *lda, complex *sa, integer *ldsa, integer *info); /* Subroutine */ int _starpu_zlags2_(logical *upper, doublereal *a1, doublecomplex * a2, doublereal *a3, doublereal *b1, doublecomplex *b2, doublereal *b3, doublereal *csu, doublecomplex *snu, doublereal *csv, doublecomplex * snv, doublereal *csq, doublecomplex *snq); /* Subroutine */ int _starpu_zlagtm_(char *trans, integer *n, integer *nrhs, doublereal *alpha, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, doublecomplex *x, integer *ldx, doublereal *beta, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zlahef_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_zlahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, integer *info); /* Subroutine */ int _starpu_zlahr2_(integer *n, integer *k, integer *nb, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, integer *ldt, doublecomplex *y, integer *ldy); /* Subroutine */ int _starpu_zlahrd_(integer *n, integer *k, integer *nb, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, integer *ldt, doublecomplex *y, integer *ldy); /* Subroutine */ int _starpu_zlaic1_(integer *job, integer *j, doublecomplex *x, doublereal *sest, doublecomplex *w, doublecomplex *gamma, doublereal * sestpr, doublecomplex *s, doublecomplex *c__); /* Subroutine */ int _starpu_zlals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *poles, doublereal *difl, doublereal *difr, doublereal * z__, integer *k, doublereal *c__, doublereal *s, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zlalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx, integer *ldbx, doublereal *u, integer *ldu, doublereal *vt, integer * k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * poles, integer *givptr, integer *givcol, integer *ldgcol, integer * perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * rwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_zlalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublecomplex *b, integer *ldb, doublereal *rcond, integer *rank, doublecomplex *work, doublereal * rwork, integer *iwork, integer *info); doublereal _starpu_zlangb_(char *norm, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublereal *work); doublereal _starpu_zlange_(char *norm, integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *work); doublereal _starpu_zlangt_(char *norm, integer *n, doublecomplex *dl, doublecomplex * d__, doublecomplex *du); doublereal _starpu_zlanhb_(char *norm, char *uplo, integer *n, integer *k, doublecomplex *ab, integer *ldab, doublereal *work); doublereal _starpu_zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *work); doublereal _starpu_zlanhf_(char *norm, char *transr, char *uplo, integer *n, doublecomplex *a, doublereal *work); doublereal _starpu_zlanhp_(char *norm, char *uplo, integer *n, doublecomplex *ap, doublereal *work); doublereal _starpu_zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda, doublereal *work); doublereal _starpu_zlanht_(char *norm, integer *n, doublereal *d__, doublecomplex *e); doublereal _starpu_zlansb_(char *norm, char *uplo, integer *n, integer *k, doublecomplex *ab, integer *ldab, doublereal *work); doublereal _starpu_zlansp_(char *norm, char *uplo, integer *n, doublecomplex *ap, doublereal *work); doublereal _starpu_zlansy_(char *norm, char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *work); doublereal _starpu_zlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, doublecomplex *ab, integer *ldab, doublereal *work); doublereal _starpu_zlantp_(char *norm, char *uplo, char *diag, integer *n, doublecomplex *ap, doublereal *work); doublereal _starpu_zlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *work); /* Subroutine */ int _starpu_zlapll_(integer *n, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublereal *ssmin); /* Subroutine */ int _starpu_zlapmt_(logical *forwrd, integer *m, integer *n, doublecomplex *x, integer *ldx, integer *k); /* Subroutine */ int _starpu_zlaqgb_(integer *m, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqge_(integer *m, integer *n, doublecomplex *a, integer *lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqhb_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqhe_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqhp_(char *uplo, integer *n, doublecomplex *ap, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqp2_(integer *m, integer *n, integer *offset, doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, doublereal *vn1, doublereal *vn2, doublecomplex *work); /* Subroutine */ int _starpu_zlaqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, doublereal *vn1, doublereal *vn2, doublecomplex * auxv, doublecomplex *f, integer *ldf); /* Subroutine */ int _starpu_zlaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zlaqr1_(integer *n, doublecomplex *h__, integer *ldh, doublecomplex *s1, doublecomplex *s2, doublecomplex *v); /* Subroutine */ int _starpu_zlaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, integer *ns, integer *nd, doublecomplex *sh, doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, doublecomplex *work, integer *lwork); /* Subroutine */ int _starpu_zlaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublecomplex *h__, integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, integer *ns, integer *nd, doublecomplex *sh, doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t, integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv, doublecomplex *work, integer *lwork); /* Subroutine */ int _starpu_zlaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zlaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, integer *kbot, integer *nshfts, doublecomplex *s, doublecomplex *h__, integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *v, integer *ldv, doublecomplex *u, integer *ldu, integer *nv, doublecomplex *wv, integer *ldwv, integer *nh, doublecomplex *wh, integer *ldwh); /* Subroutine */ int _starpu_zlaqsb_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqsp_(char *uplo, integer *n, doublecomplex *ap, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlaqsy_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed); /* Subroutine */ int _starpu_zlar1v_(integer *n, integer *b1, integer *bn, doublereal *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * lld, doublereal *pivmin, doublereal *gaptol, doublecomplex *z__, logical *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, doublereal *rqcorr, doublereal *work); /* Subroutine */ int _starpu_zlar2v_(integer *n, doublecomplex *x, doublecomplex *y, doublecomplex *z__, integer *incx, doublereal *c__, doublecomplex *s, integer *incc); /* Subroutine */ int _starpu_zlarcm_(integer *m, integer *n, doublereal *a, integer * lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, doublereal *rwork); /* Subroutine */ int _starpu_zlarf_(char *side, integer *m, integer *n, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer * ldc, doublecomplex *work); /* Subroutine */ int _starpu_zlarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, doublecomplex *v, integer *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer * ldc, doublecomplex *work, integer *ldwork); /* Subroutine */ int _starpu_zlarfg_(integer *n, doublecomplex *alpha, doublecomplex * x, integer *incx, doublecomplex *tau); /* Subroutine */ int _starpu_zlarfp_(integer *n, doublecomplex *alpha, doublecomplex * x, integer *incx, doublecomplex *tau); /* Subroutine */ int _starpu_zlarft_(char *direct, char *storev, integer *n, integer * k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * t, integer *ldt); /* Subroutine */ int _starpu_zlarfx_(char *side, integer *m, integer *n, doublecomplex *v, doublecomplex *tau, doublecomplex *c__, integer * ldc, doublecomplex *work); /* Subroutine */ int _starpu_zlargv_(integer *n, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublereal *c__, integer *incc); /* Subroutine */ int _starpu_zlarnv_(integer *idist, integer *iseed, integer *n, doublecomplex *x); /* Subroutine */ int _starpu_zlarrv_(integer *n, doublereal *vl, doublereal *vu, doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, integer *m, integer *dol, integer *dou, doublereal *minrgp, doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, doublecomplex *z__, integer *ldz, integer *isuppz, doublereal *work, integer *iwork, integer *info); /* Subroutine */ int _starpu_zlarscl2_(integer *m, integer *n, doublereal *d__, doublecomplex *x, integer *ldx); /* Subroutine */ int _starpu_zlartg_(doublecomplex *f, doublecomplex *g, doublereal * cs, doublecomplex *sn, doublecomplex *r__); /* Subroutine */ int _starpu_zlartv_(integer *n, doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, doublereal *c__, doublecomplex *s, integer *incc); /* Subroutine */ int _starpu_zlarz_(char *side, integer *m, integer *n, integer *l, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * c__, integer *ldc, doublecomplex *work); /* Subroutine */ int _starpu_zlarzb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, doublecomplex *v, integer *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *ldwork); /* Subroutine */ int _starpu_zlarzt_(char *direct, char *storev, integer *n, integer * k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * t, integer *ldt); /* Subroutine */ int _starpu_zlascl_(char *type__, integer *kl, integer *ku, doublereal *cfrom, doublereal *cto, integer *m, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zlascl2_(integer *m, integer *n, doublereal *d__, doublecomplex *x, integer *ldx); /* Subroutine */ int _starpu_zlaset_(char *uplo, integer *m, integer *n, doublecomplex *alpha, doublecomplex *beta, doublecomplex *a, integer * lda); /* Subroutine */ int _starpu_zlasr_(char *side, char *pivot, char *direct, integer *m, integer *n, doublereal *c__, doublereal *s, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zlassq_(integer *n, doublecomplex *x, integer *incx, doublereal *scale, doublereal *sumsq); /* Subroutine */ int _starpu_zlaswp_(integer *n, doublecomplex *a, integer *lda, integer *k1, integer *k2, integer *ipiv, integer *incx); /* Subroutine */ int _starpu_zlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *w, integer *ldw, integer *info); /* Subroutine */ int _starpu_zlat2c_(char *uplo, integer *n, doublecomplex *a, integer *lda, complex *sa, integer *ldsa, integer *info); /* Subroutine */ int _starpu_zlatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublecomplex *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_zlatdf_(integer *ijob, integer *n, doublecomplex *z__, integer *ldz, doublecomplex *rhs, doublereal *rdsum, doublereal * rdscal, integer *ipiv, integer *jpiv); /* Subroutine */ int _starpu_zlatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublecomplex *ap, doublecomplex *x, doublereal * scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_zlatrd_(char *uplo, integer *n, integer *nb, doublecomplex *a, integer *lda, doublereal *e, doublecomplex *tau, doublecomplex *w, integer *ldw); /* Subroutine */ int _starpu_zlatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, doublereal *scale, doublereal *cnorm, integer *info); /* Subroutine */ int _starpu_zlatrz_(integer *m, integer *n, integer *l, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work); /* Subroutine */ int _starpu_zlatzm_(char *side, integer *m, integer *n, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * c1, doublecomplex *c2, integer *ldc, doublecomplex *work); /* Subroutine */ int _starpu_zlauu2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zlauum_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zpbcon_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *anorm, doublereal * rcond, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpbequ_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zpbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer * ldafb, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * rwork, integer *info); /* Subroutine */ int _starpu_zpbstf_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_zpbsv_(char *uplo, integer *n, integer *kd, integer * nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_zpbsvx_(char *fact, char *uplo, integer *n, integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *afb, integer *ldafb, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal * ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpbtf2_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_zpbtrf_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, integer *info); /* Subroutine */ int _starpu_zpbtrs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer * ldb, integer *info); /* Subroutine */ int _starpu_zpftrf_(char *transr, char *uplo, integer *n, doublecomplex *a, integer *info); /* Subroutine */ int _starpu_zpftri_(char *transr, char *uplo, integer *n, doublecomplex *a, integer *info); /* Subroutine */ int _starpu_zpftrs_(char *transr, char *uplo, integer *n, integer * nrhs, doublecomplex *a, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zpocon_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *anorm, doublereal *rcond, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpoequ_(integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zpoequb_(integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zporfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * rwork, integer *info); /* Subroutine */ int _starpu_zporfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zposv_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zposvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zposvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpotf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zpotrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zpotri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_zpotrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zppcon_(char *uplo, integer *n, doublecomplex *ap, doublereal *anorm, doublereal *rcond, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zppequ_(char *uplo, integer *n, doublecomplex *ap, doublereal *s, doublereal *scond, doublereal *amax, integer *info); /* Subroutine */ int _starpu_zpprfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *afp, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zppsv_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zppsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *ap, doublecomplex *afp, char *equed, doublereal * s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpptrf_(char *uplo, integer *n, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_zpptri_(char *uplo, integer *n, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_zpptrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zpstf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info); /* Subroutine */ int _starpu_zpstrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info); /* Subroutine */ int _starpu_zptcon_(integer *n, doublereal *d__, doublecomplex *e, doublereal *anorm, doublereal *rcond, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zpteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_zptrfs_(char *uplo, integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublereal *df, doublecomplex *ef, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * rwork, integer *info); /* Subroutine */ int _starpu_zptsv_(integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zptsvx_(char *fact, integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublereal *df, doublecomplex *ef, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zpttrf_(integer *n, doublereal *d__, doublecomplex *e, integer *info); /* Subroutine */ int _starpu_zpttrs_(char *uplo, integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zptts2_(integer *iuplo, integer *n, integer *nrhs, doublereal *d__, doublecomplex *e, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_zrot_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy, doublereal *c__, doublecomplex *s); /* Subroutine */ int _starpu_zspcon_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zspmv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zspr_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *ap); /* Subroutine */ int _starpu_zsprfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex * b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_zspsv_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zspsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *ap, doublecomplex *afp, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsptrf_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zsptri_(char *uplo, integer *n, doublecomplex *ap, integer *ipiv, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zsptrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_zstedc_(char *compz, integer *n, doublereal *d__, doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zstegr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zstein_(integer *n, doublereal *d__, doublereal *e, integer *m, doublereal *w, integer *iblock, integer *isplit, doublecomplex *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info); /* Subroutine */ int _starpu_zstemr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, integer *m, doublereal *w, doublecomplex *z__, integer * ldz, integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_zsteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, integer *info); /* Subroutine */ int _starpu_zsycon_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zsyequb_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zsymv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy); /* Subroutine */ int _starpu_zsyr_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, doublecomplex *a, integer *lda); /* Subroutine */ int _starpu_zsyrfs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsyrfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsysv_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zsysvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsysvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, char *equed, doublereal *s, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_zsytf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info); /* Subroutine */ int _starpu_zsytrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zsytri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zsytrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ztbcon_(char *norm, char *uplo, char *diag, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal *rcond, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal * rwork, integer *info); /* Subroutine */ int _starpu_ztbtrs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublecomplex *ab, integer *ldab, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ztfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, doublecomplex *b, integer *ldb); /* Subroutine */ int _starpu_ztftri_(char *transr, char *uplo, char *diag, integer *n, doublecomplex *a, integer *info); /* Subroutine */ int _starpu_ztfttp_(char *transr, char *uplo, integer *n, doublecomplex *arf, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_ztfttr_(char *transr, char *uplo, integer *n, doublecomplex *arf, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ztgevc_(char *side, char *howmny, logical *select, integer *n, doublecomplex *s, integer *lds, doublecomplex *p, integer *ldp, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztgex2_(logical *wantq, logical *wantz, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, integer *j1, integer *info); /* Subroutine */ int _starpu_ztgexc_(logical *wantq, logical *wantz, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, integer *ifst, integer *ilst, integer *info); /* Subroutine */ int _starpu_ztgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex * beta, doublecomplex *q, integer *ldq, doublecomplex *z__, integer * ldz, integer *m, doublereal *pl, doublereal *pr, doublereal *dif, doublecomplex *work, integer *lwork, integer *iwork, integer *liwork, integer *info); /* Subroutine */ int _starpu_ztgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *tola, doublereal *tolb, doublereal *alpha, doublereal *beta, doublecomplex * u, integer *ldu, doublecomplex *v, integer *ldv, doublecomplex *q, integer *ldq, doublecomplex *work, integer *ncycle, integer *info); /* Subroutine */ int _starpu_ztgsna_(char *job, char *howmny, logical *select, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, doublecomplex *work, integer *lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_ztgsy2_(char *trans, integer *ijob, integer *m, integer * n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, doublecomplex *d__, integer *ldd, doublecomplex *e, integer *lde, doublecomplex *f, integer *ldf, doublereal *scale, doublereal *rdsum, doublereal *rdscal, integer * info); /* Subroutine */ int _starpu_ztgsyl_(char *trans, integer *ijob, integer *m, integer * n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, doublecomplex *d__, integer *ldd, doublecomplex *e, integer *lde, doublecomplex *f, integer *ldf, doublereal *scale, doublereal *dif, doublecomplex *work, integer * lwork, integer *iwork, integer *info); /* Subroutine */ int _starpu_ztpcon_(char *norm, char *uplo, char *diag, integer *n, doublecomplex *ap, doublereal *rcond, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztptri_(char *uplo, char *diag, integer *n, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_ztptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *ap, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ztpttf_(char *transr, char *uplo, integer *n, doublecomplex *ap, doublecomplex *arf, integer *info); /* Subroutine */ int _starpu_ztpttr_(char *uplo, integer *n, doublecomplex *ap, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ztrcon_(char *norm, char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, doublereal *rcond, doublecomplex * work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztrevc_(char *side, char *howmny, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztrexc_(char *compq, integer *n, doublecomplex *t, integer *ldt, doublecomplex *q, integer *ldq, integer *ifst, integer * ilst, integer *info); /* Subroutine */ int _starpu_ztrrfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, doublereal *rwork, integer * info); /* Subroutine */ int _starpu_ztrsen_(char *job, char *compq, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *q, integer *ldq, doublecomplex *w, integer *m, doublereal *s, doublereal *sep, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_ztrsna_(char *job, char *howmny, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublereal *s, doublereal *sep, integer *mm, integer *m, doublecomplex *work, integer *ldwork, doublereal *rwork, integer *info); /* Subroutine */ int _starpu_ztrsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, doublereal *scale, integer *info); /* Subroutine */ int _starpu_ztrti2_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ztrtri_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info); /* Subroutine */ int _starpu_ztrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, integer *info); /* Subroutine */ int _starpu_ztrttf_(char *transr, char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *arf, integer *info); /* Subroutine */ int _starpu_ztrttp_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *ap, integer *info); /* Subroutine */ int _starpu_ztzrqf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, integer *info); /* Subroutine */ int _starpu_ztzrzf_(integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zung2l_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zung2r_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zungbr_(char *vect, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunghr_(integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungl2_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zunglq_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungql_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungqr_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungr2_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zungrq_(integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zungtr_(char *uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunm2l_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zunm2r_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zunmbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_zunmhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex * work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunml2_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zunmlq_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunmql_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunmqr_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunmr2_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_zunmr3_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * info); /* Subroutine */ int _starpu_zunmrq_(char *side, char *trans, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zunmrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * lwork, integer *info); /* Subroutine */ int _starpu_zunmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, integer *info); /* Subroutine */ int _starpu_zupgtr_(char *uplo, integer *n, doublecomplex *ap, doublecomplex *tau, doublecomplex *q, integer *ldq, doublecomplex * work, integer *info); /* Subroutine */ int _starpu_zupmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublecomplex *ap, doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info); /* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1); doublereal _starpu_dsecnd_(); /* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, integer *vers_patch__); logical _starpu_lsame_(char *ca, char *cb); doublereal _starpu_second_(); doublereal _starpu_slamch_(char *cmach); /* Subroutine */ int _starpu_slamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1); /* Subroutine */ int _starpu_slamc2_(integer *beta, integer *t, logical *rnd, real * eps, integer *emin, real *rmin, integer *emax, real *rmax); doublereal _starpu_slamc3_(real *a, real *b); /* Subroutine */ int _starpu_slamc4_(integer *emin, real *start, integer *base); /* Subroutine */ int _starpu_slamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, real *rmax); doublereal _starpu_dlamch_(char *cmach); /* Subroutine */ int _starpu_dlamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1); /* Subroutine */ int _starpu_dlamc2_(integer *beta, integer *t, logical *rnd, doublereal *eps, integer *emin, doublereal *rmin, integer *emax, doublereal *rmax); doublereal _starpu_dlamc3_(doublereal *a, doublereal *b); /* Subroutine */ int _starpu_dlamc4_(integer *emin, doublereal *start, integer *base); /* Subroutine */ int _starpu_dlamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, doublereal *rmax); integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, integer *n2, integer *n3, integer *n4); #ifdef __cplusplus } #endif #endif /* __CLAPACK_H */ starpu-1.4.9+dfsg/min-dgels/base/INCLUDE/f2c.h000066400000000000000000000111201507764646700205020ustar00rootroot00000000000000/* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_INCLUDE #define F2C_INCLUDE typedef long int integer; typedef unsigned long int uinteger; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef long int logical; typedef short int shortlogical; typedef char logical1; typedef char integer1; #ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ typedef long long longint; /* system-dependent */ typedef unsigned long long ulongint; /* system-dependent */ #define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) #define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) #endif #define TRUE_ (1) #define FALSE_ (0) /* Extern is for use with -E */ #ifndef Extern #define Extern extern #endif /* I/O stuff */ #ifdef f2c_i2 /* for -i2 */ typedef short flag; typedef short ftnlen; typedef short ftnint; #else typedef long int flag; typedef long int ftnlen; typedef long int ftnint; #endif /*external read, write*/ typedef struct { flag cierr; ftnint ciunit; flag ciend; char *cifmt; ftnint cirec; } cilist; /*internal read, write*/ typedef struct { flag icierr; char *iciunit; flag iciend; char *icifmt; ftnint icirlen; ftnint icirnum; } icilist; /*open*/ typedef struct { flag oerr; ftnint ounit; char *ofnm; ftnlen ofnmlen; char *osta; char *oacc; char *ofm; ftnint orl; char *oblnk; } olist; /*close*/ typedef struct { flag cerr; ftnint cunit; char *csta; } cllist; /*rewind, backspace, endfile*/ typedef struct { flag aerr; ftnint aunit; } alist; /* inquire */ typedef struct { flag inerr; ftnint inunit; char *infile; ftnlen infilen; ftnint *inex; /*parameters in standard's order*/ ftnint *inopen; ftnint *innum; ftnint *innamed; char *inname; ftnlen innamlen; char *inacc; ftnlen inacclen; char *inseq; ftnlen inseqlen; char *indir; ftnlen indirlen; char *infmt; ftnlen infmtlen; char *inform; ftnint informlen; char *inunf; ftnlen inunflen; ftnint *inrecl; ftnint *innrec; char *inblank; ftnlen inblanklen; } inlist; #define VOID void union Multitype { /* for multiple entry points */ integer1 g; shortint h; integer i; /* longint j; */ real r; doublereal d; complex c; doublecomplex z; }; typedef union Multitype Multitype; /*typedef long int Long;*/ /* No longer used; formerly in Namelist */ struct Vardesc { /* for Namelist */ char *name; char *addr; ftnlen *dims; int type; }; typedef struct Vardesc Vardesc; struct Namelist { char *name; Vardesc **vars; int nvars; }; typedef struct Namelist Namelist; #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (doublereal)abs(x) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) #define dmin(a,b) (doublereal)min(a,b) #define dmax(a,b) (doublereal)max(a,b) #define bit_test(a,b) ((a) >> (b) & 1) #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 #ifdef __cplusplus typedef int /* Unknown procedure type */ (*U_fp)(...); typedef shortint (*J_fp)(...); typedef integer (*I_fp)(...); typedef real (*R_fp)(...); typedef doublereal (*D_fp)(...), (*E_fp)(...); typedef /* Complex */ VOID (*C_fp)(...); typedef /* Double Complex */ VOID (*Z_fp)(...); typedef logical (*L_fp)(...); typedef shortlogical (*K_fp)(...); typedef /* Character */ VOID (*H_fp)(...); typedef /* Subroutine */ int (*S_fp)(...); #else typedef int /* Unknown procedure type */ (*U_fp)(); typedef shortint (*J_fp)(); typedef integer (*I_fp)(); typedef real (*R_fp)(); typedef doublereal (*D_fp)(), (*E_fp)(); typedef /* Complex */ VOID (*C_fp)(); typedef /* Double Complex */ VOID (*Z_fp)(); typedef logical (*L_fp)(); typedef shortlogical (*K_fp)(); typedef /* Character */ VOID (*H_fp)(); typedef /* Subroutine */ int (*S_fp)(); #endif /* E_fp is for real functions when -R is not specified */ typedef VOID C_f; /* complex function */ typedef VOID H_f; /* character function */ typedef VOID Z_f; /* double complex function */ typedef doublereal E_f; /* real function with -R not specified */ /* undef any lower-case symbols that your C compiler predefines, e.g.: */ #ifndef Skip_f2c_Undefs #undef cray #undef gcos #undef mc68010 #undef mc68020 #undef mips #undef pdp11 #undef sgi #undef sparc #undef sun #undef sun2 #undef sun3 #undef sun4 #undef u370 #undef u3b #undef u3b2 #undef u3b5 #undef unix #undef vax #endif #endif starpu-1.4.9+dfsg/min-dgels/base/Makefile000066400000000000000000000005461507764646700202460ustar00rootroot00000000000000# # Top Level Makefile for LAPACK # Version 3.2.1 # June 2009 # include make.inc all: lib lib: f2clib blaslib clean: cleanlib blaslib: ( cd BLAS/SRC; $(MAKE) ) f2clib: ( cd F2CLIBS/libf2c; $(MAKE) ) cleanlib: ( cd BLAS/SRC; $(MAKE) clean ) ( cd SRC; $(MAKE) clean ) ( cd SRC/VARIANTS; $(MAKE) clean ) ( cd F2CLIBS/libf2c; $(MAKE) clean ) starpu-1.4.9+dfsg/min-dgels/base/README.install000066400000000000000000000205721507764646700211340ustar00rootroot00000000000000 =================== CLAPACK README FILE =================== ============================================================================================ Version 3.2.1 (threadsafe) Release date: June 2009 / April 2010 F2C translation of LAPACK 3.2.1 To get revisions info about LAPACK 3.2.1, please see http://www.netlib.org/lapack/lapack-3.2.1.html FOR SUPPORT: LAPACK Forum: http://icl.cs.utk.edu/lapack-forum/ ============================================================================================ This README file describes how and how to install the ANSI C translation of the LAPACK library, called CLAPACK. CLAPACK must be compiled with an ANSI Standard C compiler. If the C compiler on your machine is an old-style C compiler, you will have to use gcc to compile the package. IMPORTANT NOTE: You *CANNOT* just go to www.netlib.org/clapack, download a routine like sgesv.c and have it work unless you properly install and link to the f2c and BLAS routines as described below. If your linker complains about missing functions, you have probably accidentally neglected this step. Also, you will need the file "f2c.h" (included with the f2c libraries) in order to compile these routines. The default BLAS routines included with CLAPACK in the BLAS/SRC subdirectory may also be used these will most likely be slower than a BLAS library optimized for your machine. If you do not have such an optimized BLAS library, you can get one at http://www.netlib.org/atlas ============================================================================== For a fast default installation, you will need to - Untar clapack.tar and modify the make.inc file (see step 1 below) - Make the f2c libraries (see step 2 below) - Make the BLAS library (see step 2 below) - Make the main library, test it, and time it by simply typing make If you encounter difficulties, you may find the installation manual for the FORTRAN version (INSTALL/lawn81.*) useful. Procedure for installing CLAPACK: ============================================================================== (1) 'tar xvf clapack.tar' to build the following directory structure: CLAPACK/README.install this file CLAPACK/BLAS/ C source for BLAS CLAPACK/F2CLIBS/ f2c I/O functions (libI77) and math functions (libF77) CLAPACK/INSTALL/ Testing functions and pre-tested make.inc files for various platforms. CLAPACK/INCLUDE/ header files - clapack.h is including C prototypes of all the CLAPACK routines. CLAPACK/SRC/ C source of LAPACK routines CLAPACK/TESTING/ driver routines to test correctness CLAPACK/make.inc compiler, compile flags and library definitions, included in all Makefiles. NOTE: It's better to use gcc compiler on some older Sun systems. You should be sure to modify the make.inc file for your system. (2) Build the f2c libraries by doing: make f2clib ############################################################################## WARNING: 1) If your system lacks onexit() and you are not using an ANSI C compiler, then you should change your F2CCFLAGS line in make.inc to F2CCFLAGS=$(CFLAGS) -DNO_ONEXIT On at least some Sun systems, it is better to use F2CCFLAGS=$(CFLAGS) -Donexit=on_exit 2) On at least some Sun systems, the type declaration in F2CLIBS/libI77/rawio.h: extern FILE *fdopen(int, char*) is not consistent with the one defined in stdio.h. In this case you should comment out this line. ############################################################################## (3) To run CLAPACK, you need to create a BLAS library. The performance of CLAPACK largely depends on the performance of the BLAS library. You can either use the reference BLAS library included with this distribution, which is easy to install but not optimized to be fast on any particular machine, or else find a version of the BLAS optimized for your machine. If you want to use the reference BLAS library included with this distribution, build it by doing make blaslib from the main directory. If you want to find a BLAS library optimized for your machine, see the note below for more details; see also the README in the BLAS/WRAP directory. (4) Compile and run the BLAS TESTING code by doing: cd CLAPACK/BLAS/TESTING; make -f Makeblat2 cd CLAPACK/BLAS xblat2s < sblat2.in xblat2d < dblat2.in xblat2c < cblat2.in xblat2z < zblat2.in cd CLAPACK/BLAS/TESTING; make -f Makeblat3 cd CLAPACK/BLAS xblat3s < sblat3.in xblat3d < dblat3.in xblat3c < cblat3.in xblat3z < zblat3.in Inspect the output files *.SUMM to confirm that no errors occurred. {NOTE: If a compiling error involving _atexit appears then see information within the WARNING above.} {NOTE: For the highest performance, it is best to use a version of the BLAS optimized for your particular machine. This may be done by modifying the line BLASLIB = ../../blas$(PLAT).a in CLAPACK/make.inc to point to the optimized BLAS. Link with BLAS which provides the standard CBLAS interface ========================================================== If you are using a version of the BLAS which provides the standard CBLAS interface (e.g. ATLAS), you need to add the appropriate "wrapper" library. you can make the wrapper library libcblaswr.a by typing "make cblaswrap" from the main directory. For this setup (ATLAS with the CBLAS wrapper), the BLASLIB line might look like Example: Modification to make.inc CC = gcc BLASLIB = ../../libcblaswr.a -lcblas -latlas Creation of libcblaswr.a : make cblaswrap Link with BLAS which Fortran calling interface =============================================== Two possibilities: - add -DNO_BLAS_WRAP to the CC variable to when compiling and no need of a "wrapper" library Example: CC = gcc -DNO_BLAS_WRAP BLASLIB = -lgoto -lpthread - add the sample Fortran calling interface (libfblaswr.a) for systems with Sun-style Fortran calling conventions is also provided; however, this interface will need modifications to work on other architectures which have different Fortran calling convensions. See the README in the BLAS/WRAP subdirectory for further information. Example: CC = gcc BLASLIB = ../../libfblaswr.a -lgoto -lpthread Creation of libfblaswr.a : make fblaswrap } (5) Build the archive containing lapack source code by doing: cd CLAPACK/SRC; make (6) Compile the matrix generation software, the eigenroutine TESTING code, the linear system TESTING code, and run the LAPACK tests by doing: cd CLAPACK/TESTING/MATGEN; make cd CLAPACK/TESTING; make Inspect the output files *.out to confirm that no errors occurred. I. Compile the matrix generation software, the eigenroutine TESTING code, the linear system TESTING code, and run the LAPACK tests separately by doing: cd CLAPACK/TESTING/MATGEN; make cd CLAPACK/TESTING/EIG; make cd CLAPACK/TESTING/LIN; make cd CLAPACK/TESTING; make II. After the executable files and libraries have been created for each of the compiles, the object files should be removed by doing: make clean III. Each 'make' may be accomplished just for one or a subset of the precisions desired. For example: make single make single complex make single double complex complex16 Using make without any arguments will compile all four precisions. James Demmel Xiaoye Li Chris Puscasiu Steve Timson UC Berkeley Sept 27 1993 {Revised by Susan Ostrouchov and Jude Toth} {The University of Tennessee at Knoxville} {October 15, 1993} {Revised by Xiaoye Li and James Demmel} {University of California at Berkeley} {November 22, 1994} {Revised by David Bindel and James Demmel} {University of California at Berkeley} {July 19, 2000} {Revised by Julie Langou} {University of Tennessee} {February 2008} {Revised by Julie Langou} {University of Tennessee} {October 2008, April 2010} {Revised by Peng Du} {University of Tennessee} {May 2009} starpu-1.4.9+dfsg/min-dgels/base/SRC/000077500000000000000000000000001507764646700172305ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/Makefile000066400000000000000000000161771507764646700207040ustar00rootroot00000000000000TOPDIR=.. include $(TOPDIR)/make.inc ####################################################################### # This is the makefile to create a library for LAPACK. # The files are organized as follows: # ALLAUX -- Auxiliary routines called from all precisions # ALLXAUX -- Auxiliary routines called from all precisions but # only from routines using extra precision. # SCLAUX -- Auxiliary routines called from both REAL and COMPLEX # DZLAUX -- Auxiliary routines called from both DOUBLE PRECISION # and COMPLEX*16 # SLASRC -- Single precision real LAPACK routines # SXLASRC -- Single precision real LAPACK routines using extra # precision. # CLASRC -- Single precision complex LAPACK routines # CXLASRC -- Single precision complex LAPACK routines using extra # precision. # DLASRC -- Double precision real LAPACK routines # DXLASRC -- Double precision real LAPACK routines using extra # precision. # ZLASRC -- Double precision complex LAPACK routines # ZXLASRC -- Double precision complex LAPACK routines using extra # precision. # # The library can be set up to include routines for any combination # of the four precisions. To create or add to the library, enter make # followed by one or more of the precisions desired. Some examples: # make single # make single complex # make single double complex complex16 # Alternatively, the command # make # without any arguments creates a library of all four precisions. # The library is called # lapack.a # and is created at the next higher directory level. # # To remove the object files after the library is created, enter # make clean # On some systems, you can force the source files to be recompiled by # entering (for example) # make single FRC=FRC # # ***Note*** # The functions lsame, second, dsecnd, slamch, and dlamch may have # to be installed before compiling the library. Refer to the # installation guide, LAPACK Working Note 41, for instructions. # ####################################################################### ALLAUX = maxloc.o ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \ ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \ ../INSTALL/ilaver.o ../INSTALL/lsame.o ALLXAUX = DZLAUX = \ dbdsdc.o \ dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \ dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \ dlaed7.o dlaed8.o dlaed9.o dlaeda.o dlaev2.o dlagtf.o \ dlagts.o dlamrg.o dlanst.o \ dlapy2.o dlapy3.o dlarnv.o \ dlarra.o dlarrb.o dlarrc.o dlarrd.o dlarre.o dlarrf.o dlarrj.o \ dlarrk.o dlarrr.o dlaneg.o \ dlartg.o dlaruv.o dlas2.o dlascl.o \ dlasd0.o dlasd1.o dlasd2.o dlasd3.o dlasd4.o dlasd5.o dlasd6.o \ dlasd7.o dlasd8.o dlasda.o dlasdq.o dlasdt.o \ dlaset.o dlasq1.o dlasq2.o dlasq3.o dlasq4.o dlasq5.o dlasq6.o \ dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \ dsteqr.o dsterf.o dlaisnan.o disnan.o \ ../INSTALL/dlamch.o ../INSTALL/dsecnd.o DLASRC = \ dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \ dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \ dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \ dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \ dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \ dgeqp3.o dgeqpf.o dgeqr2.o dgeqrf.o dgerfs.o dgerq2.o dgerqf.o \ dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o dgetc2.o dgetf2.o \ dgetrf.o dgetri.o \ dgetrs.o dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \ dggglm.o dgghrd.o dgglse.o dggqrf.o \ dggrqf.o dggsvd.o dggsvp.o dgtcon.o dgtrfs.o dgtsv.o \ dgtsvx.o dgttrf.o dgttrs.o dgtts2.o dhgeqz.o \ dhsein.o dhseqr.o dlabrd.o dlacon.o dlacn2.o \ dlaein.o dlaexc.o dlag2.o dlags2.o dlagtm.o dlagv2.o dlahqr.o \ dlahrd.o dlahr2.o dlaic1.o dlaln2.o dlals0.o dlalsa.o dlalsd.o \ dlangb.o dlange.o dlangt.o dlanhs.o dlansb.o dlansp.o \ dlansy.o dlantb.o dlantp.o dlantr.o dlanv2.o \ dlapll.o dlapmt.o \ dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \ dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ dlarf.o dlarfb.o dlarfg.o dlarft.o dlarfx.o dlargv.o \ dlarrv.o dlartv.o dlarfp.o \ dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o dlasyf.o \ dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \ dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \ dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \ dorgrq.o dorgtr.o dorm2l.o dorm2r.o \ dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \ dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \ dpbstf.o dpbsv.o dpbsvx.o \ dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \ dposvx.o dpotf2.o dpotrf.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \ dppcon.o dppequ.o \ dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \ dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \ dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \ dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \ dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \ dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \ dstevx.o dsycon.o dsyev.o dsyevd.o dsyevr.o \ dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \ dsysv.o dsysvx.o \ dsytd2.o dsytf2.o dsytrd.o dsytrf.o dsytri.o dsytrs.o dtbcon.o \ dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \ dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \ dtptrs.o \ dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \ dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \ dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \ dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \ dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \ dgejsv.o dgesvj.o dgsvj0.o dgsvj1.o \ dgeequb.o dsyequb.o dpoequb.o dgbequb.o DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ dla_gercond.o dla_rpvgrw.o dsysvxx.o dsyrfsx.o \ dla_syrfsx_extended.o dla_syamv.o dla_syrcond.o dla_syrpvgrw.o \ dposvxx.o dporfsx.o dla_porfsx_extended.o dla_porcond.o \ dla_porpvgrw.o dgbsvxx.o dgbrfsx.o dla_gbrfsx_extended.o \ dla_gbamv.o dla_gbrcond.o dla_gbrpvgrw.o dla_lin_berr.o dlarscl2.o \ dlascl2.o dla_wwaddw.o all: ../$(LAPACKLIB) ifdef USEXBLAS ALLXOBJ=$(DXLASRC) $(ALLXAUX) endif ALLOBJ=$(DLASRC) $(DZLAUX) \ $(ALLAUX) ../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(RANLIB) $@ double: $(DLASRC) $(ALLAUX) $(DZLAUX) $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(DLASRC) $(ALLAUX) \ $(DZLAUX) $(RANLIB) ../$(LAPACKLIB) $(ALLAUX): $(FRC) $(DZLAUX): $(FRC) $(DLASRC): $(FRC) ifdef USEXBLAS $(ALLXAUX): $(FRC) $(DXLASRC): $(FRC) endif FRC: @FRC=$(FRC) clean: rm -f *.o .c.o: $(CC) $(CFLAGS) -c $< dlaruv.o: dlaruv.c ; $(CC) $(NOOPT) -c $< -o $@ dla_wwaddw.o: dla_wwaddw.c ; $(CC) $(NOOPT) -c $< -o $@ starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/000077500000000000000000000000001507764646700204575ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/Makefile000066400000000000000000000045371507764646700221300ustar00rootroot00000000000000TOPDIR=../.. include $(TOPDIR)/make.inc ####################################################################### # This is the makefile to create a the variants libraries for LAPACK. # The files are organized as follows: # CHOLRL -- Right looking block version of the algorithm, calling Level 3 BLAS # CHOLTOP -- Top looking block version of the algorithm, calling Level 3 BLAS # LUCR -- Crout Level 3 BLAS version of LU factorization # LULL -- left-looking Level 3 BLAS version of LU factorization # QRLL -- left-looking Level 3 BLAS version of QR factorization # LUREC -- an iterative version of Sivan Toledo's recursive LU algorithm[1]. # For square matrices, this iterative versions should # be within a factor of two of the optimum number of memory transfers. # # [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with # Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), # 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 ####################################################################### VARIANTSDIR=LIB CHOLRL = cholesky/RL/cpotrf.o cholesky/RL/dpotrf.o cholesky/RL/spotrf.o cholesky/RL/zpotrf.o CHOLTOP = cholesky/TOP/cpotrf.o cholesky/TOP/dpotrf.o cholesky/TOP/spotrf.o cholesky/TOP/zpotrf.o LUCR = lu/CR/cgetrf.o lu/CR/dgetrf.o lu/CR/sgetrf.o lu/CR/zgetrf.o LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o all: cholrl choltop lucr lull lurec qrll cholrl: $(CHOLRL) $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/cholrl.a $(CHOLRL) $(RANLIB) $(VARIANTSDIR)/cholrl.a choltop: $(CHOLTOP) $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/choltop.a $(CHOLTOP) $(RANLIB) $(VARIANTSDIR)/choltop.a lucr: $(LUCR) $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lucr.a $(LUCR) $(RANLIB) $(VARIANTSDIR)/lucr.a lull: $(LULL) $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lull.a $(LULL) $(RANLIB) $(VARIANTSDIR)/lull.a lurec: $(LUREC) $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lurec.a $(LUREC) $(RANLIB) $(VARIANTSDIR)/lurec.a qrll: $(QRLL) $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/qrll.a $(QRLL) $(RANLIB) $(VARIANTSDIR)/qrll.a .c.o: $(CC) $(CFLAGS) -c $< -o $@ clean: rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) \ $(VARIANTSDIR)/*.a starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/README000066400000000000000000000057611507764646700213500ustar00rootroot00000000000000 =============== = README File = =============== This README File is for the LAPACK driver variants. It is composed of 5 sections: - Description: contents a quick description of each of the variants. For a more detailed description please refer to LAWN XXX. - Build - Testing - Linking your program - Support Author: Julie LANGOU, May 2008 =============== = DESCRIPTION = =============== This directory contains several variants of LAPACK routines in single/double/complex/double complex precision: - [sdcz]getrf with LU Crout Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/CR - [sdcz]getrf with LU Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/LL - [sdcz]getrf with Sivan Toledo's recursive LU algorithm [1] - Directory: SRC/VARIANTS/lu/REC - [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL - [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL - [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP References:For a more detailed description please refer to - [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 - [2]LAWN XXX ========= = BUILD = ========= These variants are compiled by default in the build process but they are not tested by default. The build process creates one new library per variants in the four arithmetics (singel/double/comple/double complex). The libraries are in the SRC/VARIANTS/LIB directory. Corresponding libraries created in SRC/VARIANTS/LIB: - LU Crout : lucr.a - LU Left Looking : lull.a - LU Sivan Toledo's recursive : lurec.a - QR Left Looking : qrll.a - Cholesky Right Looking : cholrl.a - Cholesky Top : choltop.a =========== = TESTING = =========== To test these variants you can type 'make variants-testing' This will rerun the linear methods testings once per variants and append the short name of the variants to the output files. You should then see the following files in the TESTING directory: [scdz]test_cholrl.out [scdz]test_choltop.out [scdz]test_lucr.out [scdz]test_lull.out [scdz]test_lurec.out [scdz]test_qrll.out ======================== = LINKING YOUR PROGRAM = ======================== You just need to add the variants methods library in your linking sequence before your lapack libary. Here is a quick example for LU Default using LU Right Looking version: $(FORTRAN) -c myprog.f $(FORTRAN) -o myexe myprog.o $(LAPACKLIB) $(BLASLIB) Using LU Left Looking version: $(FORTRAN) -c myprog.f $(FORTRAN) -o myexe myprog.o $(PATH TO LAPACK/SRC/VARIANTS/LIB)/lull.a $(LAPACKLIB) $(BLASLIB) =========== = SUPPORT = =========== You can use either LAPACK forum or the LAPACK mailing list to get support. LAPACK forum : http://icl.cs.utk.edu/lapack-forum LAPACK mailing list : lapack@cs.utk.edu starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/cholesky/000077500000000000000000000000001507764646700223005ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/cholesky/RL/000077500000000000000000000000001507764646700226155ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/cholesky/RL/dpotrf.c000066400000000000000000000152121507764646700242600ustar00rootroot00000000000000/* dpotrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b17 = 1.; static doublereal c_b20 = -1.; /* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer j, jb, nb; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dpotf2_(char *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* March 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOTRF computes the Cholesky factorization of a real symmetric */ /* positive definite matrix A. */ /* The factorization has the form */ /* A = U**T * U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular. */ /* This is the right looking block version of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i is not */ /* positive definite, and the factorization could not be */ /* completed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = _starpu_ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1); if (nb <= 1 || nb >= *n) { /* Use unblocked code. */ _starpu_dpotf2_(uplo, n, &a[a_offset], lda, info); } else { /* Use blocked code. */ if (upper) { /* Compute the Cholesky factorization A = U'*U. */ i__1 = *n; i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Update and factorize the current diagonal block and test */ /* for non-positive-definiteness. */ /* Computing MIN */ i__3 = nb, i__4 = *n - j + 1; jb = min(i__3,i__4); _starpu_dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); if (*info != 0) { goto L30; } if (j + jb <= *n) { /* Updating the trailing submatrix. */ i__3 = *n - j - jb + 1; _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & i__3, &c_b17, &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda); i__3 = *n - j - jb + 1; _starpu_dsyrk_("Upper", "Transpose", &i__3, &jb, &c_b20, &a[j + ( j + jb) * a_dim1], lda, &c_b17, &a[j + jb + (j + jb) * a_dim1], lda); } /* L10: */ } } else { /* Compute the Cholesky factorization A = L*L'. */ i__2 = *n; i__1 = nb; for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Update and factorize the current diagonal block and test */ /* for non-positive-definiteness. */ /* Computing MIN */ i__3 = nb, i__4 = *n - j + 1; jb = min(i__3,i__4); _starpu_dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); if (*info != 0) { goto L30; } if (j + jb <= *n) { /* Updating the trailing submatrix. */ i__3 = *n - j - jb + 1; _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & jb, &c_b17, &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda); i__3 = *n - j - jb + 1; _starpu_dsyrk_("Lower", "No Transpose", &i__3, &jb, &c_b20, &a[j + jb + j * a_dim1], lda, &c_b17, &a[j + jb + (j + jb) * a_dim1], lda); } /* L20: */ } } } goto L40; L30: *info = *info + j - 1; L40: return 0; /* End of DPOTRF */ } /* _starpu_dpotrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/cholesky/TOP/000077500000000000000000000000001507764646700227425ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/cholesky/TOP/dpotrf.c000066400000000000000000000146131507764646700244110ustar00rootroot00000000000000/* dpotrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b15 = 1.; static doublereal c_b18 = -1.; /* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer j, jb, nb; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dpotf2_(char *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* March 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOTRF computes the Cholesky factorization of a real symmetric */ /* positive definite matrix A. */ /* The factorization has the form */ /* A = U**T * U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular. */ /* This is the top-looking block version of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i is not */ /* positive definite, and the factorization could not be */ /* completed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = _starpu_ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1); if (nb <= 1 || nb >= *n) { /* Use unblocked code. */ _starpu_dpotf2_(uplo, n, &a[a_offset], lda, info); } else { /* Use blocked code. */ if (upper) { /* Compute the Cholesky factorization A = U'*U. */ i__1 = *n; i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = nb, i__4 = *n - j + 1; jb = min(i__3,i__4); /* Compute the current block. */ i__3 = j - 1; _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &i__3, &jb, & c_b15, &a[a_dim1 + 1], lda, &a[j * a_dim1 + 1], lda); i__3 = j - 1; _starpu_dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b18, &a[j * a_dim1 + 1], lda, &c_b15, &a[j + j * a_dim1], lda); /* Update and factorize the current diagonal block and test */ /* for non-positive-definiteness. */ _starpu_dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); if (*info != 0) { goto L30; } /* L10: */ } } else { /* Compute the Cholesky factorization A = L*L'. */ i__2 = *n; i__1 = nb; for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Computing MIN */ i__3 = nb, i__4 = *n - j + 1; jb = min(i__3,i__4); /* Compute the current block. */ i__3 = j - 1; _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &jb, &i__3, &c_b15, &a[a_dim1 + 1], lda, &a[j + a_dim1], lda); i__3 = j - 1; _starpu_dsyrk_("Lower", "No Transpose", &jb, &i__3, &c_b18, &a[j + a_dim1], lda, &c_b15, &a[j + j * a_dim1], lda); /* Update and factorize the current diagonal block and test */ /* for non-positive-definiteness. */ _starpu_dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); if (*info != 0) { goto L30; } /* L20: */ } } } goto L40; L30: *info = *info + j - 1; L40: return 0; /* End of DPOTRF */ } /* _starpu_dpotrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/lu/000077500000000000000000000000001507764646700210775ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/lu/CR/000077500000000000000000000000001507764646700214035ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/lu/CR/dgetrf.c000066400000000000000000000147211507764646700230270ustar00rootroot00000000000000/* dgetrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b11 = -1.; static doublereal c_b12 = 1.; /* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ integer i__, j, jb, nb; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iinfo; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dgetf2_( integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* March 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETRF computes an LU factorization of a general M-by-N matrix A */ /* using partial pivoting with row interchanges. */ /* The factorization has the form */ /* A = P * L * U */ /* where P is a permutation matrix, L is lower triangular with unit */ /* diagonal elements (lower trapezoidal if m > n), and U is upper */ /* triangular (upper trapezoidal if m < n). */ /* This is the Crout Level 3 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGETRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment. */ nb = _starpu_ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1); if (nb <= 1 || nb >= min(*m,*n)) { /* Use unblocked code. */ _starpu_dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); } else { /* Use blocked code. */ i__1 = min(*m,*n); i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = min(*m,*n) - j + 1; jb = min(i__3,nb); /* Update current block. */ i__3 = *m - j + 1; i__4 = j - 1; _starpu_dgemm_("No transpose", "No transpose", &i__3, &jb, &i__4, &c_b11, &a[j + a_dim1], lda, &a[j * a_dim1 + 1], lda, &c_b12, &a[ j + j * a_dim1], lda); /* Factor diagonal and subdiagonal blocks and test for exact */ /* singularity. */ i__3 = *m - j + 1; _starpu_dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); /* Adjust INFO and the pivot indices. */ if (*info == 0 && iinfo > 0) { *info = iinfo + j - 1; } /* Computing MIN */ i__4 = *m, i__5 = j + jb - 1; i__3 = min(i__4,i__5); for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = j - 1 + ipiv[i__]; /* L10: */ } /* Apply interchanges to column 1:J-1 */ i__3 = j - 1; i__4 = j + jb - 1; _starpu_dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); if (j + jb <= *n) { /* Apply interchanges to column J+JB:N */ i__3 = *n - j - jb + 1; i__4 = j + jb - 1; _starpu_dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & ipiv[1], &c__1); i__3 = *n - j - jb + 1; i__4 = j - 1; _starpu_dgemm_("No transpose", "No transpose", &jb, &i__3, &i__4, & c_b11, &a[j + a_dim1], lda, &a[(j + jb) * a_dim1 + 1], lda, &c_b12, &a[j + (j + jb) * a_dim1], lda); /* Compute block row of U. */ i__3 = *n - j - jb + 1; _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & c_b12, &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda); } /* L20: */ } } return 0; /* End of DGETRF */ } /* _starpu_dgetrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/lu/LL/000077500000000000000000000000001507764646700214065ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/lu/LL/dgetrf.c000066400000000000000000000164631507764646700230370ustar00rootroot00000000000000/* dgetrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b15 = 1.; static doublereal c_b18 = -1.; /* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; /* Local variables */ integer i__, j, k, jb, nb; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iinfo; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dgetf2_( integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* March 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETRF computes an LU factorization of a general M-by-N matrix A */ /* using partial pivoting with row interchanges. */ /* The factorization has the form */ /* A = P * L * U */ /* where P is a permutation matrix, L is lower triangular with unit */ /* diagonal elements (lower trapezoidal if m > n), and U is upper */ /* triangular (upper trapezoidal if m < n). */ /* This is the left-looking Level 3 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGETRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment. */ nb = _starpu_ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1); if (nb <= 1 || nb >= min(*m,*n)) { /* Use unblocked code. */ _starpu_dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); } else { /* Use blocked code. */ i__1 = min(*m,*n); i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = min(*m,*n) - j + 1; jb = min(i__3,nb); /* Update before factoring the current panel */ i__3 = j - nb; i__4 = nb; for (k = 1; i__4 < 0 ? k >= i__3 : k <= i__3; k += i__4) { /* Apply interchanges to rows K:K+NB-1. */ i__5 = k + nb - 1; _starpu_dlaswp_(&jb, &a[j * a_dim1 + 1], lda, &k, &i__5, &ipiv[1], & c__1); /* Compute block row of U. */ _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &nb, &jb, & c_b15, &a[k + k * a_dim1], lda, &a[k + j * a_dim1], lda); /* Update trailing submatrix. */ i__5 = *m - k - nb + 1; _starpu_dgemm_("No transpose", "No transpose", &i__5, &jb, &nb, & c_b18, &a[k + nb + k * a_dim1], lda, &a[k + j * a_dim1], lda, &c_b15, &a[k + nb + j * a_dim1], lda); /* L30: */ } /* Factor diagonal and subdiagonal blocks and test for exact */ /* singularity. */ i__4 = *m - j + 1; _starpu_dgetf2_(&i__4, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); /* Adjust INFO and the pivot indices. */ if (*info == 0 && iinfo > 0) { *info = iinfo + j - 1; } /* Computing MIN */ i__3 = *m, i__5 = j + jb - 1; i__4 = min(i__3,i__5); for (i__ = j; i__ <= i__4; ++i__) { ipiv[i__] = j - 1 + ipiv[i__]; /* L10: */ } /* L20: */ } /* Apply interchanges to the left-overs */ i__2 = min(*m,*n); i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { i__4 = k - 1; /* Computing MIN */ i__5 = k + nb - 1, i__6 = min(*m,*n); i__3 = min(i__5,i__6); _starpu_dlaswp_(&i__4, &a[a_dim1 + 1], lda, &k, &i__3, &ipiv[1], &c__1); /* L40: */ } /* Apply update to the M+1:N columns when N > M */ if (*n > *m) { i__1 = *n - *m; _starpu_dlaswp_(&i__1, &a[(*m + 1) * a_dim1 + 1], lda, &c__1, m, &ipiv[1], &c__1); i__1 = *m; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__4 = *m - k + 1; jb = min(i__4,nb); i__4 = *n - *m; _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__4, & c_b15, &a[k + k * a_dim1], lda, &a[k + (*m + 1) * a_dim1], lda); if (k + nb <= *m) { i__4 = *m - k - nb + 1; i__3 = *n - *m; _starpu_dgemm_("No transpose", "No transpose", &i__4, &i__3, &nb, &c_b18, &a[k + nb + k * a_dim1], lda, &a[k + (*m + 1) * a_dim1], lda, &c_b15, &a[k + nb + (*m + 1) * a_dim1], lda); } /* L50: */ } } } return 0; /* End of DGETRF */ } /* _starpu_dgetrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/lu/REC/000077500000000000000000000000001507764646700215105ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/lu/REC/dgetrf.c000066400000000000000000000205641507764646700231360ustar00rootroot00000000000000/* dgetrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = 1.; static doublereal c_b15 = -1.; /* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__, j, ipivstart, jpivstart, jp; doublereal tmp; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer kcols; doublereal sfmin; integer nstep; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer kahead; extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern logical _starpu_disnan_(doublereal *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer npived; extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); integer kstart, ntopiv; /* -- LAPACK routine (version 3.X) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* May 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETRF computes an LU factorization of a general M-by-N matrix A */ /* using partial pivoting with row interchanges. */ /* The factorization has the form */ /* A = P * L * U */ /* where P is a permutation matrix, L is lower triangular with unit */ /* diagonal elements (lower trapezoidal if m > n), and U is upper */ /* triangular (upper trapezoidal if m < n). */ /* This code implements an iterative version of Sivan Toledo's recursive */ /* LU algorithm[1]. For square matrices, this iterative versions should */ /* be within a factor of two of the optimum number of memory transfers. */ /* The pattern is as follows, with the large blocks of U being updated */ /* in one call to DTRSM, and the dotted lines denoting sections that */ /* have had all pending permutations applied: */ /* 1 2 3 4 5 6 7 8 */ /* +-+-+---+-------+------ */ /* | |1| | | */ /* |.+-+ 2 | | */ /* | | | | | */ /* |.|.+-+-+ 4 | */ /* | | | |1| | */ /* | | |.+-+ | */ /* | | | | | | */ /* |.|.|.|.+-+-+---+ 8 */ /* | | | | | |1| | */ /* | | | | |.+-+ 2 | */ /* | | | | | | | | */ /* | | | | |.|.+-+-+ */ /* | | | | | | | |1| */ /* | | | | | | |.+-+ */ /* | | | | | | | | | */ /* |.|.|.|.|.|.|.|.+----- */ /* | | | | | | | | | */ /* The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in */ /* the binary expansion of the current column. Each Schur update is */ /* applied as soon as the necessary portion of U is available. */ /* [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with */ /* Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), */ /* 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGETRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Compute machine safe minimum */ sfmin = _starpu_dlamch_("S"); nstep = min(*m,*n); i__1 = nstep; for (j = 1; j <= i__1; ++j) { kahead = j & -j; kstart = j + 1 - kahead; /* Computing MIN */ i__2 = kahead, i__3 = *m - j; kcols = min(i__2,i__3); /* Find pivot. */ i__2 = *m - j + 1; jp = j - 1 + _starpu_idamax_(&i__2, &a[j + j * a_dim1], &c__1); ipiv[j] = jp; /* Permute just this column. */ if (jp != j) { tmp = a[j + j * a_dim1]; a[j + j * a_dim1] = a[jp + j * a_dim1]; a[jp + j * a_dim1] = tmp; } /* Apply pending permutations to L */ ntopiv = 1; ipivstart = j; jpivstart = j - ntopiv; while(ntopiv < kahead) { _starpu_dlaswp_(&ntopiv, &a[jpivstart * a_dim1 + 1], lda, &ipivstart, &j, &ipiv[1], &c__1); ipivstart -= ntopiv; ntopiv <<= 1; jpivstart -= ntopiv; } /* Permute U block to match L */ _starpu_dlaswp_(&kcols, &a[(j + 1) * a_dim1 + 1], lda, &kstart, &j, &ipiv[1], &c__1); /* Factor the current column */ if (a[j + j * a_dim1] != 0. && ! _starpu_disnan_(&a[j + j * a_dim1])) { if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) { i__2 = *m - j; d__1 = 1. / a[j + j * a_dim1]; _starpu_dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); } else { i__2 = *m - j; for (i__ = 1; i__ <= i__2; ++i__) { a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; } } } else if (a[j + j * a_dim1] == 0. && *info == 0) { *info = j; } /* Solve for U block. */ _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &kahead, &kcols, & c_b12, &a[kstart + kstart * a_dim1], lda, &a[kstart + (j + 1) * a_dim1], lda); /* Schur complement. */ i__2 = *m - j; _starpu_dgemm_("No transpose", "No transpose", &i__2, &kcols, &kahead, &c_b15, &a[j + 1 + kstart * a_dim1], lda, &a[kstart + (j + 1) * a_dim1], lda, &c_b12, &a[j + 1 + (j + 1) * a_dim1], lda); } /* Handle pivot permutations on the way out of the recursion */ npived = nstep & -nstep; j = nstep - npived; while(j > 0) { ntopiv = j & -j; i__1 = j + 1; _starpu_dlaswp_(&ntopiv, &a[(j - ntopiv + 1) * a_dim1 + 1], lda, &i__1, & nstep, &ipiv[1], &c__1); j -= ntopiv; } /* If short and wide, handle the rest of the columns. */ if (*m < *n) { i__1 = *n - *m; _starpu_dlaswp_(&i__1, &a[(*m + kcols + 1) * a_dim1 + 1], lda, &c__1, m, & ipiv[1], &c__1); i__1 = *n - *m; _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", m, &i__1, &c_b12, &a[ a_offset], lda, &a[(*m + kcols + 1) * a_dim1 + 1], lda); } return 0; /* End of DGETRF */ } /* _starpu_dgetrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/qr/000077500000000000000000000000001507764646700211015ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/qr/LL/000077500000000000000000000000001507764646700214105ustar00rootroot00000000000000starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/qr/LL/dgeqrf.c000066400000000000000000000300461507764646700230270ustar00rootroot00000000000000/* dgeqrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; real r__1; /* Local variables */ integer i__, j, k, ib, nb, nt, nx, iws; extern doublereal _starpu_sceil_(real *); integer nbmin, iinfo; extern /* Subroutine */ int _starpu_dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer lbwork, llwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* March 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEQRF computes a QR factorization of a real M-by-N matrix A: */ /* A = Q * R. */ /* This is the left-looking Level 3 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the elements on and above the diagonal of the array */ /* contain the min(M,N)-by-N upper trapezoidal matrix R (R is */ /* upper triangular if m >= n); the elements below the diagonal, */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of min(m,n) elementary reflectors (see Further */ /* Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. The dimension can be divided into three parts. */ /* 1) The part for the triangular factor T. If the very last T is not bigger */ /* than any of the rest, then this part is NB x ceiling(K/NB), otherwise, */ /* NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T */ /* 2) The part for the very last T when T is bigger than any of the rest T. */ /* The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB, */ /* where K = min(M,N), NX is calculated by */ /* NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) */ /* 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB) */ /* So LWORK = part1 + part2 + part3 */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ /* and tau in TAU(i). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; nbmin = 2; nx = 0; iws = *n; k = min(*m,*n); nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); if (nb > 1 && nb < k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1); nx = max(i__1,i__2); } /* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.: */ /* NB=3 2NB=6 K=10 */ /* | | | */ /* 1--2--3--4--5--6--7--8--9--10 */ /* | \________/ */ /* K-NX=5 NT=4 */ /* So here 4 x 4 is the last T stored in the workspace */ r__1 = (real) (k - nx) / (real) nb; nt = k - _starpu_sceil_(&r__1) * nb; /* optimal workspace = space for dlarfb + space for normal T's + space for the last T */ /* Computing MAX */ /* Computing MAX */ i__3 = (*n - *m) * k, i__4 = (*n - *m) * nb; /* Computing MAX */ i__5 = k * nb, i__6 = nb * nb; i__1 = max(i__3,i__4), i__2 = max(i__5,i__6); llwork = max(i__1,i__2); r__1 = (real) llwork / (real) nb; llwork = _starpu_sceil_(&r__1); if (nt > nb) { lbwork = k - nt; /* Optimal workspace for dlarfb = MAX(1,N)*NT */ lwkopt = (lbwork + llwork) * nb; work[1] = (doublereal) (lwkopt + nt * nt); } else { r__1 = (real) k / (real) nb; lbwork = _starpu_sceil_(&r__1) * nb; lwkopt = (lbwork + llwork - nb) * nb; work[1] = (doublereal) lwkopt; } /* Test the input arguments */ lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } else if (*lwork < max(1,*n) && ! lquery) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEQRF", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (k == 0) { work[1] = 1.; return 0; } if (nb > 1 && nb < k) { if (nx < k) { /* Determine if workspace is large enough for blocked code. */ if (nt <= nb) { iws = (lbwork + llwork - nb) * nb; } else { iws = (lbwork + llwork) * nb + nt * nt; } if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ if (nt <= nb) { nb = *lwork / (llwork + (lbwork - nb)); } else { nb = (*lwork - nt * nt) / (lbwork + llwork); } /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < k && nx < k) { /* Use blocked code initially */ i__1 = k - nx; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = k - i__ + 1; ib = min(i__3,nb); /* Update the current column using old T's */ i__3 = i__ - nb; i__4 = nb; for (j = 1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* Apply H' to A(J:M,I:I+IB-1) from the left */ i__5 = *m - j + 1; _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__5, & ib, &nb, &a[j + j * a_dim1], lda, &work[j], &lbwork, & a[j + i__ * a_dim1], lda, &work[lbwork * nb + nt * nt + 1], &ib); /* L20: */ } /* Compute the QR factorization of the current block */ /* A(I:M,I:I+IB-1) */ i__4 = *m - i__ + 1; _starpu_dgeqr2_(&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ lbwork * nb + nt * nt + 1], &iinfo); if (i__ + ib <= *n) { /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__4 = *m - i__ + 1; _starpu_dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[i__], &lbwork); } /* L10: */ } } else { i__ = 1; } /* Use unblocked code to factor the last or only block. */ if (i__ <= k) { if (i__ != 1) { i__2 = i__ - nb; i__1 = nb; for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Apply H' to A(J:M,I:K) from the left */ i__4 = *m - j + 1; i__3 = k - i__ + 1; i__5 = k - i__ + 1; _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__4, & i__3, &nb, &a[j + j * a_dim1], lda, &work[j], &lbwork, &a[j + i__ * a_dim1], lda, &work[lbwork * nb + nt * nt + 1], &i__5); /* L30: */ } i__1 = *m - i__ + 1; i__2 = k - i__ + 1; _starpu_dgeqr2_(&i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &tau[i__], & work[lbwork * nb + nt * nt + 1], &iinfo); } else { /* Use unblocked code to factor the last or only block. */ i__1 = *m - i__ + 1; i__2 = *n - i__ + 1; _starpu_dgeqr2_(&i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &tau[i__], & work[1], &iinfo); } } /* Apply update to the column M+1:N when N > M */ if (*m < *n && i__ != 1) { /* Form the last triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ if (nt <= nb) { i__1 = *m - i__ + 1; i__2 = k - i__ + 1; _starpu_dlarft_("Forward", "Columnwise", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[i__], &lbwork); } else { i__1 = *m - i__ + 1; i__2 = k - i__ + 1; _starpu_dlarft_("Forward", "Columnwise", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[lbwork * nb + 1], &nt); } /* Apply H' to A(1:M,M+1:N) from the left */ i__1 = k - nx; i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__4 = k - j + 1; ib = min(i__4,nb); i__4 = *m - j + 1; i__3 = *n - *m; i__5 = *n - *m; _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__4, & i__3, &ib, &a[j + j * a_dim1], lda, &work[j], &lbwork, &a[ j + (*m + 1) * a_dim1], lda, &work[lbwork * nb + nt * nt + 1], &i__5); /* L40: */ } if (nt <= nb) { i__2 = *m - j + 1; i__1 = *n - *m; i__4 = k - j + 1; i__3 = *n - *m; _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__2, & i__1, &i__4, &a[j + j * a_dim1], lda, &work[j], &lbwork, & a[j + (*m + 1) * a_dim1], lda, &work[lbwork * nb + nt * nt + 1], &i__3); } else { i__2 = *m - j + 1; i__1 = *n - *m; i__4 = k - j + 1; i__3 = *n - *m; _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__2, & i__1, &i__4, &a[j + j * a_dim1], lda, &work[lbwork * nb + 1], &nt, &a[j + (*m + 1) * a_dim1], lda, &work[lbwork * nb + nt * nt + 1], &i__3); } } work[1] = (doublereal) iws; return 0; /* End of DGEQRF */ } /* _starpu_dgeqrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/VARIANTS/qr/LL/sceil.c000066400000000000000000000022131507764646700226510ustar00rootroot00000000000000/* sceil.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_sceil_(real *a) { /* System generated locals */ real ret_val; /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* June 2008 */ /* .. Scalar Arguments ..* */ /* .. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements ..* */ if (*a - (integer) (*a) == 0.f) { ret_val = *a; } else if (*a > 0.f) { ret_val = (real) ((integer) (*a) + 1); } else { ret_val = (real) ((integer) (*a)); } return ret_val; } /* _starpu_sceil_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/chla_transtype.c000066400000000000000000000037121507764646700224170ustar00rootroot00000000000000/* _starpu_chla_transtype.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Character */ VOID _starpu_chla_transtype__(char *ret_val, ftnlen ret_val_len, integer *trans) { /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* October 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine translates from a BLAST-specified integer constant to */ /* the character string specifying a transposition operation. */ /* CHLA_TRANSTYPE returns an CHARACTER*1. If CHLA_TRANSTYPE is 'X', */ /* then input is not an integer indicating a transposition operator. */ /* Otherwise CHLA_TRANSTYPE returns the constant value corresponding to */ /* TRANS. */ /* Arguments */ /* ========= */ /* TRANS (input) INTEGER */ /* Specifies the form of the system of equations: */ /* = BLAS_NO_TRANS = 111 : No Transpose */ /* = BLAS_TRANS = 112 : Transpose */ /* = BLAS_CONJ_TRANS = 113 : Conjugate Transpose */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Executable Statements .. */ if (*trans == 111) { *(unsigned char *)ret_val = 'N'; } else if (*trans == 112) { *(unsigned char *)ret_val = 'T'; } else if (*trans == 113) { *(unsigned char *)ret_val = 'C'; } else { *(unsigned char *)ret_val = 'X'; } return ; /* End of CHLA_TRANSTYPE */ } /* _starpu_chla_transtype__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dbdsdc.c000066400000000000000000000411011507764646700206140ustar00rootroot00000000000000/* dbdsdc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__9 = 9; static integer c__0 = 0; static doublereal c_b15 = 1.; static integer c__1 = 1; static doublereal c_b29 = 0.; /* Subroutine */ int _starpu_dbdsdc_(char *uplo, char *compq, integer *n, doublereal * d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * iwork, integer *info) { /* System generated locals */ integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *), log(doublereal); /* Local variables */ integer i__, j, k; doublereal p, r__; integer z__, ic, ii, kk; doublereal cs; integer is, iu; doublereal sn; integer nm1; doublereal eps; integer ivt, difl, difr, ierr, perm, mlvl, sqre; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer * , doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer poles, iuplo, nsize, start; extern /* Subroutine */ int _starpu_dlasd0_(integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlasda_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer givcol; extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); integer icompq; doublereal orgnrm; integer givnum, givptr, qstart, smlsiz, wstart, smlszp; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DBDSDC computes the singular value decomposition (SVD) of a real */ /* N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, */ /* using a divide and conquer method, where S is a diagonal matrix */ /* with non-negative diagonal elements (the singular values of B), and */ /* U and VT are orthogonal matrices of left and right singular vectors, */ /* respectively. DBDSDC can be used to compute all singular values, */ /* and optionally, singular vectors or singular vectors in compact form. */ /* This code makes very mild assumptions about floating point */ /* arithmetic. It will work on machines with a guard digit in */ /* add/subtract, or on those binary machines without guard digits */ /* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ /* It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. See DLASD3 for details. */ /* The code currently calls DLASDQ if singular values only are desired. */ /* However, it can be slightly modified to compute singular values */ /* using the divide and conquer method. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': B is upper bidiagonal. */ /* = 'L': B is lower bidiagonal. */ /* COMPQ (input) CHARACTER*1 */ /* Specifies whether singular vectors are to be computed */ /* as follows: */ /* = 'N': Compute singular values only; */ /* = 'P': Compute singular values and compute singular */ /* vectors in compact form; */ /* = 'I': Compute singular values and singular vectors. */ /* N (input) INTEGER */ /* The order of the matrix B. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the bidiagonal matrix B. */ /* On exit, if INFO=0, the singular values of B. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the elements of E contain the offdiagonal */ /* elements of the bidiagonal matrix whose SVD is desired. */ /* On exit, E has been destroyed. */ /* U (output) DOUBLE PRECISION array, dimension (LDU,N) */ /* If COMPQ = 'I', then: */ /* On exit, if INFO = 0, U contains the left singular vectors */ /* of the bidiagonal matrix. */ /* For other values of COMPQ, U is not referenced. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= 1. */ /* If singular vectors are desired, then LDU >= max( 1, N ). */ /* VT (output) DOUBLE PRECISION array, dimension (LDVT,N) */ /* If COMPQ = 'I', then: */ /* On exit, if INFO = 0, VT' contains the right singular */ /* vectors of the bidiagonal matrix. */ /* For other values of COMPQ, VT is not referenced. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. LDVT >= 1. */ /* If singular vectors are desired, then LDVT >= max( 1, N ). */ /* Q (output) DOUBLE PRECISION array, dimension (LDQ) */ /* If COMPQ = 'P', then: */ /* On exit, if INFO = 0, Q and IQ contain the left */ /* and right singular vectors in a compact form, */ /* requiring O(N log N) space instead of 2*N**2. */ /* In particular, Q contains all the DOUBLE PRECISION data in */ /* LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) */ /* words of memory, where SMLSIZ is returned by ILAENV and */ /* is equal to the maximum size of the subproblems at the */ /* bottom of the computation tree (usually about 25). */ /* For other values of COMPQ, Q is not referenced. */ /* IQ (output) INTEGER array, dimension (LDIQ) */ /* If COMPQ = 'P', then: */ /* On exit, if INFO = 0, Q and IQ contain the left */ /* and right singular vectors in a compact form, */ /* requiring O(N log N) space instead of 2*N**2. */ /* In particular, IQ contains all INTEGER data in */ /* LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) */ /* words of memory, where SMLSIZ is returned by ILAENV and */ /* is equal to the maximum size of the subproblems at the */ /* bottom of the computation tree (usually about 25). */ /* For other values of COMPQ, IQ is not referenced. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* If COMPQ = 'N' then LWORK >= (4 * N). */ /* If COMPQ = 'P' then LWORK >= (6 * N). */ /* If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). */ /* IWORK (workspace) INTEGER array, dimension (8*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: The algorithm failed to compute an singular value. */ /* The update process of divide and conquer failed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* Changed dimension statement in comment describing E from (N) to */ /* (N-1). Sven, 17 Feb 05. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; --q; --iq; --work; --iwork; /* Function Body */ *info = 0; iuplo = 0; if (_starpu_lsame_(uplo, "U")) { iuplo = 1; } if (_starpu_lsame_(uplo, "L")) { iuplo = 2; } if (_starpu_lsame_(compq, "N")) { icompq = 0; } else if (_starpu_lsame_(compq, "P")) { icompq = 1; } else if (_starpu_lsame_(compq, "I")) { icompq = 2; } else { icompq = -1; } if (iuplo == 0) { *info = -1; } else if (icompq < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { *info = -7; } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DBDSDC", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } smlsiz = _starpu_ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0); if (*n == 1) { if (icompq == 1) { q[1] = d_sign(&c_b15, &d__[1]); q[smlsiz * *n + 1] = 1.; } else if (icompq == 2) { u[u_dim1 + 1] = d_sign(&c_b15, &d__[1]); vt[vt_dim1 + 1] = 1.; } d__[1] = abs(d__[1]); return 0; } nm1 = *n - 1; /* If matrix lower bidiagonal, rotate to be upper bidiagonal */ /* by applying Givens rotations on the left */ wstart = 1; qstart = 3; if (icompq == 1) { _starpu_dcopy_(n, &d__[1], &c__1, &q[1], &c__1); i__1 = *n - 1; _starpu_dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); } if (iuplo == 2) { qstart = 5; wstart = (*n << 1) - 1; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (icompq == 1) { q[i__ + (*n << 1)] = cs; q[i__ + *n * 3] = sn; } else if (icompq == 2) { work[i__] = cs; work[nm1 + i__] = -sn; } /* L10: */ } } /* If ICOMPQ = 0, use DLASDQ to compute the singular values. */ if (icompq == 0) { _starpu_dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ wstart], info); goto L40; } /* If N is smaller than the minimum divide size SMLSIZ, then solve */ /* the problem with another solver. */ if (*n <= smlsiz) { if (icompq == 2) { _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); _starpu_dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ wstart], info); } else if (icompq == 1) { iu = 1; ivt = iu + *n; _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n); _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n); _starpu_dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ iu + (qstart - 1) * *n], n, &work[wstart], info); } goto L40; } if (icompq == 2) { _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); _starpu_dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); } /* Scale. */ orgnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); if (orgnrm == 0.) { return 0; } _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, & ierr); eps = _starpu_dlamch_("Epsilon"); mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / log(2.)) + 1; smlszp = smlsiz + 1; if (icompq == 1) { iu = 1; ivt = smlsiz + 1; difl = ivt + smlszp; difr = difl + mlvl; z__ = difr + (mlvl << 1); ic = z__ + mlvl; is = ic + 1; poles = is + 1; givnum = poles + (mlvl << 1); k = 1; givptr = 2; perm = 3; givcol = perm + mlvl; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = d__[i__], abs(d__1)) < eps) { d__[i__] = d_sign(&eps, &d__[i__]); } /* L20: */ } start = 1; sqre = 0; i__1 = nm1; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { /* Subproblem found. First determine its size and then */ /* apply divide and conquer on it. */ if (i__ < nm1) { /* A subproblem with E(I) small for I < NM1. */ nsize = i__ - start + 1; } else if ((d__1 = e[i__], abs(d__1)) >= eps) { /* A subproblem with E(NM1) not too small but I = NM1. */ nsize = *n - start + 1; } else { /* A subproblem with E(NM1) small. This implies an */ /* 1-by-1 subproblem at D(N). Solve this 1-by-1 problem */ /* first. */ nsize = i__ - start + 1; if (icompq == 2) { u[*n + *n * u_dim1] = d_sign(&c_b15, &d__[*n]); vt[*n + *n * vt_dim1] = 1.; } else if (icompq == 1) { q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]); q[*n + (smlsiz + qstart - 1) * *n] = 1.; } d__[*n] = (d__1 = d__[*n], abs(d__1)); } if (icompq == 2) { _starpu_dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + start * u_dim1], ldu, &vt[start + start * vt_dim1], ldvt, &smlsiz, &iwork[1], &work[wstart], info); } else { _starpu_dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ start], &q[start + (iu + qstart - 2) * *n], n, &q[ start + (ivt + qstart - 2) * *n], &iq[start + k * *n], &q[start + (difl + qstart - 2) * *n], &q[start + ( difr + qstart - 2) * *n], &q[start + (z__ + qstart - 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ start + givptr * *n], &iq[start + givcol * *n], n, & iq[start + perm * *n], &q[start + (givnum + qstart - 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ start + (is + qstart - 2) * *n], &work[wstart], & iwork[1], info); if (*info != 0) { return 0; } } start = i__ + 1; } /* L30: */ } /* Unscale */ _starpu_dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr); L40: /* Use Selection Sort to minimize swaps of singular vectors */ i__1 = *n; for (ii = 2; ii <= i__1; ++ii) { i__ = ii - 1; kk = i__; p = d__[i__]; i__2 = *n; for (j = ii; j <= i__2; ++j) { if (d__[j] > p) { kk = j; p = d__[j]; } /* L50: */ } if (kk != i__) { d__[kk] = d__[i__]; d__[i__] = p; if (icompq == 1) { iq[i__] = kk; } else if (icompq == 2) { _starpu_dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], & c__1); _starpu_dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); } } else if (icompq == 1) { iq[i__] = i__; } /* L60: */ } /* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ if (icompq == 1) { if (iuplo == 1) { iq[*n] = 1; } else { iq[*n] = 0; } } /* If B is lower bidiagonal, update U by those Givens rotations */ /* which rotated B to be upper bidiagonal */ if (iuplo == 2 && icompq == 2) { _starpu_dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); } return 0; /* End of DBDSDC */ } /* _starpu_dbdsdc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dbdsqr.c000066400000000000000000000621701507764646700206610ustar00rootroot00000000000000/* dbdsqr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b15 = -.125; static integer c__1 = 1; static doublereal c_b49 = 1.; static doublereal c_b72 = -1.; /* Subroutine */ int _starpu_dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * ldc, doublereal *work, integer *info) { /* System generated locals */ integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign( doublereal *, doublereal *); /* Local variables */ doublereal f, g, h__; integer i__, j, m; doublereal r__, cs; integer ll; doublereal sn, mu; integer nm1, nm12, nm13, lll; doublereal eps, sll, tol, abse; integer idir; doublereal abss; integer oldm; doublereal cosl; integer isub, iter; doublereal unfl, sinl, cosr, smin, smax, sinr; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), _starpu_dlas2_( doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); doublereal oldcs; extern /* Subroutine */ int _starpu_dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); integer oldll; doublereal shift, sigmn, oldsn; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer maxit; doublereal sminl, sigmx; logical lower; extern /* Subroutine */ int _starpu_dlasq1_(integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *, integer *); doublereal sminoa, thresh; logical rotate; doublereal tolmul; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* January 2007 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DBDSQR computes the singular values and, optionally, the right and/or */ /* left singular vectors from the singular value decomposition (SVD) of */ /* a real N-by-N (upper or lower) bidiagonal matrix B using the implicit */ /* zero-shift QR algorithm. The SVD of B has the form */ /* B = Q * S * P**T */ /* where S is the diagonal matrix of singular values, Q is an orthogonal */ /* matrix of left singular vectors, and P is an orthogonal matrix of */ /* right singular vectors. If left singular vectors are requested, this */ /* subroutine actually returns U*Q instead of Q, and, if right singular */ /* vectors are requested, this subroutine returns P**T*VT instead of */ /* P**T, for given real input matrices U and VT. When U and VT are the */ /* orthogonal matrices that reduce a general matrix A to bidiagonal */ /* form: A = U*B*VT, as computed by DGEBRD, then */ /* A = (U*Q) * S * (P**T*VT) */ /* is the SVD of A. Optionally, the subroutine may also compute Q**T*C */ /* for a given real input matrix C. */ /* See "Computing Small Singular Values of Bidiagonal Matrices With */ /* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */ /* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, */ /* no. 5, pp. 873-912, Sept 1990) and */ /* "Accurate singular values and differential qd algorithms," by */ /* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics */ /* Department, University of California at Berkeley, July 1992 */ /* for a detailed description of the algorithm. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': B is upper bidiagonal; */ /* = 'L': B is lower bidiagonal. */ /* N (input) INTEGER */ /* The order of the matrix B. N >= 0. */ /* NCVT (input) INTEGER */ /* The number of columns of the matrix VT. NCVT >= 0. */ /* NRU (input) INTEGER */ /* The number of rows of the matrix U. NRU >= 0. */ /* NCC (input) INTEGER */ /* The number of columns of the matrix C. NCC >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the bidiagonal matrix B. */ /* On exit, if INFO=0, the singular values of B in decreasing */ /* order. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the N-1 offdiagonal elements of the bidiagonal */ /* matrix B. */ /* On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E */ /* will contain the diagonal and superdiagonal elements of a */ /* bidiagonal matrix orthogonally equivalent to the one given */ /* as input. */ /* VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) */ /* On entry, an N-by-NCVT matrix VT. */ /* On exit, VT is overwritten by P**T * VT. */ /* Not referenced if NCVT = 0. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. */ /* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. */ /* U (input/output) DOUBLE PRECISION array, dimension (LDU, N) */ /* On entry, an NRU-by-N matrix U. */ /* On exit, U is overwritten by U * Q. */ /* Not referenced if NRU = 0. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= max(1,NRU). */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) */ /* On entry, an N-by-NCC matrix C. */ /* On exit, C is overwritten by Q**T * C. */ /* Not referenced if NCC = 0. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. */ /* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: If INFO = -i, the i-th argument had an illegal value */ /* > 0: */ /* if NCVT = NRU = NCC = 0, */ /* = 1, a split was marked by a positive value in E */ /* = 2, current block of Z not diagonalized after 30*N */ /* iterations (in inner while loop) */ /* = 3, termination criterion of outer while loop not met */ /* (program created more than N unreduced blocks) */ /* else NCVT = NRU = NCC = 0, */ /* the algorithm did not converge; D and E contain the */ /* elements of a bidiagonal matrix which is orthogonally */ /* similar to the input matrix B; if INFO = i, i */ /* elements of E have not converged to zero. */ /* Internal Parameters */ /* =================== */ /* TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) */ /* TOLMUL controls the convergence criterion of the QR loop. */ /* If it is positive, TOLMUL*EPS is the desired relative */ /* precision in the computed singular values. */ /* If it is negative, abs(TOLMUL*EPS*sigma_max) is the */ /* desired absolute accuracy in the computed singular */ /* values (corresponds to relative accuracy */ /* abs(TOLMUL*EPS) in the largest singular value. */ /* abs(TOLMUL) should be between 1 and 1/EPS, and preferably */ /* between 10 (for fast convergence) and .1/EPS */ /* (for there to be some accuracy in the results). */ /* Default is to lose at either one eighth or 2 of the */ /* available decimal digits in each computed singular value */ /* (whichever is smaller). */ /* MAXITR INTEGER, default = 6 */ /* MAXITR controls the maximum number of passes of the */ /* algorithm through its inner loop. The algorithms stops */ /* (and so fails to converge) if the number of passes */ /* through the inner loop exceeds MAXITR*N**2. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; lower = _starpu_lsame_(uplo, "L"); if (! _starpu_lsame_(uplo, "U") && ! lower) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ncvt < 0) { *info = -3; } else if (*nru < 0) { *info = -4; } else if (*ncc < 0) { *info = -5; } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { *info = -9; } else if (*ldu < max(1,*nru)) { *info = -11; } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { *info = -13; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DBDSQR", &i__1); return 0; } if (*n == 0) { return 0; } if (*n == 1) { goto L160; } /* ROTATE is true if any singular vectors desired, false otherwise */ rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; /* If no singular vectors desired, use qd algorithm */ if (! rotate) { _starpu_dlasq1_(n, &d__[1], &e[1], &work[1], info); return 0; } nm1 = *n - 1; nm12 = nm1 + nm1; nm13 = nm12 + nm1; idir = 0; /* Get machine constants */ eps = _starpu_dlamch_("Epsilon"); unfl = _starpu_dlamch_("Safe minimum"); /* If matrix lower bidiagonal, rotate to be upper bidiagonal */ /* by applying Givens rotations on the left */ if (lower) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; work[i__] = cs; work[nm1 + i__] = sn; /* L10: */ } /* Update singular vectors if desired */ if (*nru > 0) { _starpu_dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], ldu); } if (*ncc > 0) { _starpu_dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], ldc); } } /* Compute singular values to relative accuracy TOL */ /* (By setting TOL to be negative, algorithm will compute */ /* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) */ /* Computing MAX */ /* Computing MIN */ d__3 = 100., d__4 = pow_dd(&eps, &c_b15); d__1 = 10., d__2 = min(d__3,d__4); tolmul = max(d__1,d__2); tol = tolmul * eps; /* Compute approximate maximum, minimum singular values */ smax = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1)); smax = max(d__2,d__3); /* L20: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); smax = max(d__2,d__3); /* L30: */ } sminl = 0.; if (tol >= 0.) { /* Relative accuracy desired */ sminoa = abs(d__[1]); if (sminoa == 0.) { goto L50; } mu = sminoa; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] , abs(d__1)))); sminoa = min(sminoa,mu); if (sminoa == 0.) { goto L50; } /* L40: */ } L50: sminoa /= sqrt((doublereal) (*n)); /* Computing MAX */ d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl; thresh = max(d__1,d__2); } else { /* Absolute accuracy desired */ /* Computing MAX */ d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl; thresh = max(d__1,d__2); } /* Prepare for main iteration loop for the singular values */ /* (MAXIT is the maximum number of passes through the inner */ /* loop permitted before nonconvergence signalled.) */ maxit = *n * 6 * *n; iter = 0; oldll = -1; oldm = -1; /* M points to last element of unconverged part of matrix */ m = *n; /* Begin main iteration loop */ L60: /* Check for convergence or exceeding iteration count */ if (m <= 1) { goto L160; } if (iter > maxit) { goto L200; } /* Find diagonal block of matrix to work on */ if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) { d__[m] = 0.; } smax = (d__1 = d__[m], abs(d__1)); smin = smax; i__1 = m - 1; for (lll = 1; lll <= i__1; ++lll) { ll = m - lll; abss = (d__1 = d__[ll], abs(d__1)); abse = (d__1 = e[ll], abs(d__1)); if (tol < 0. && abss <= thresh) { d__[ll] = 0.; } if (abse <= thresh) { goto L80; } smin = min(smin,abss); /* Computing MAX */ d__1 = max(smax,abss); smax = max(d__1,abse); /* L70: */ } ll = 0; goto L90; L80: e[ll] = 0.; /* Matrix splits since E(LL) = 0 */ if (ll == m - 1) { /* Convergence of bottom singular value, return to top of loop */ --m; goto L60; } L90: ++ll; /* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ if (ll == m - 1) { /* 2 by 2 block, handle separately */ _starpu_dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl); d__[m - 1] = sigmx; e[m - 1] = 0.; d__[m] = sigmn; /* Compute singular vectors, if desired */ if (*ncvt > 0) { _starpu_drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & cosr, &sinr); } if (*nru > 0) { _starpu_drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & c__1, &cosl, &sinl); } if (*ncc > 0) { _starpu_drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & cosl, &sinl); } m += -2; goto L60; } /* If working on new submatrix, choose shift direction */ /* (from larger end diagonal element towards smaller) */ if (ll > oldm || m < oldll) { if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) { /* Chase bulge from top (big end) to bottom (small end) */ idir = 1; } else { /* Chase bulge from bottom (big end) to top (small end) */ idir = 2; } } /* Apply convergence tests */ if (idir == 1) { /* Run convergence test in forward direction */ /* First apply standard test to bottom of matrix */ if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs( d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) { e[m - 1] = 0.; goto L60; } if (tol >= 0.) { /* If relative accuracy desired, */ /* apply convergence criterion forward */ mu = (d__1 = d__[ll], abs(d__1)); sminl = mu; i__1 = m - 1; for (lll = ll; lll <= i__1; ++lll) { if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { e[lll] = 0.; goto L60; } mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[ lll], abs(d__1)))); sminl = min(sminl,mu); /* L100: */ } } } else { /* Run convergence test in backward direction */ /* First apply standard test to top of matrix */ if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1) ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) { e[ll] = 0.; goto L60; } if (tol >= 0.) { /* If relative accuracy desired, */ /* apply convergence criterion backward */ mu = (d__1 = d__[m], abs(d__1)); sminl = mu; i__1 = ll; for (lll = m - 1; lll >= i__1; --lll) { if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { e[lll] = 0.; goto L60; } mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll] , abs(d__1)))); sminl = min(sminl,mu); /* L110: */ } } } oldll = ll; oldm = m; /* Compute shift. First, test if shifting would ruin relative */ /* accuracy, and if so set the shift to zero. */ /* Computing MAX */ d__1 = eps, d__2 = tol * .01; if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) { /* Use a zero shift to avoid loss of relative accuracy */ shift = 0.; } else { /* Compute the shift from 2-by-2 block at end of matrix */ if (idir == 1) { sll = (d__1 = d__[ll], abs(d__1)); _starpu_dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); } else { sll = (d__1 = d__[m], abs(d__1)); _starpu_dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); } /* Test if shift negligible, and if so set to zero */ if (sll > 0.) { /* Computing 2nd power */ d__1 = shift / sll; if (d__1 * d__1 < eps) { shift = 0.; } } } /* Increment iteration count */ iter = iter + m - ll; /* If SHIFT = 0, do simplified QR iteration */ if (shift == 0.) { if (idir == 1) { /* Chase bulge from top to bottom */ /* Save cosines and sines for later singular vector updates */ cs = 1.; oldcs = 1.; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { d__1 = d__[i__] * cs; _starpu_dlartg_(&d__1, &e[i__], &cs, &sn, &r__); if (i__ > ll) { e[i__ - 1] = oldsn * r__; } d__1 = oldcs * r__; d__2 = d__[i__ + 1] * sn; _starpu_dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); work[i__ - ll + 1] = cs; work[i__ - ll + 1 + nm1] = sn; work[i__ - ll + 1 + nm12] = oldcs; work[i__ - ll + 1 + nm13] = oldsn; /* L120: */ } h__ = d__[m] * cs; d__[m] = h__ * oldcs; e[m - 1] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; _starpu_dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ ll + vt_dim1], ldvt); } if (*nru > 0) { i__1 = m - ll + 1; _starpu_dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1], &u[ll * u_dim1 + 1], ldu); } if (*ncc > 0) { i__1 = m - ll + 1; _starpu_dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1], &c__[ll + c_dim1], ldc); } /* Test convergence */ if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { e[m - 1] = 0.; } } else { /* Chase bulge from bottom to top */ /* Save cosines and sines for later singular vector updates */ cs = 1.; oldcs = 1.; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { d__1 = d__[i__] * cs; _starpu_dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__); if (i__ < m) { e[i__] = oldsn * r__; } d__1 = oldcs * r__; d__2 = d__[i__ - 1] * sn; _starpu_dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); work[i__ - ll] = cs; work[i__ - ll + nm1] = -sn; work[i__ - ll + nm12] = oldcs; work[i__ - ll + nm13] = -oldsn; /* L130: */ } h__ = d__[ll] * cs; d__[ll] = h__ * oldcs; e[ll] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; _starpu_dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ nm13 + 1], &vt[ll + vt_dim1], ldvt); } if (*nru > 0) { i__1 = m - ll + 1; _starpu_dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu); } if (*ncc > 0) { i__1 = m - ll + 1; _starpu_dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ ll + c_dim1], ldc); } /* Test convergence */ if ((d__1 = e[ll], abs(d__1)) <= thresh) { e[ll] = 0.; } } } else { /* Use nonzero shift */ if (idir == 1) { /* Chase bulge from top to bottom */ /* Save cosines and sines for later singular vector updates */ f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[ ll]) + shift / d__[ll]); g = e[ll]; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { _starpu_dlartg_(&f, &g, &cosr, &sinr, &r__); if (i__ > ll) { e[i__ - 1] = r__; } f = cosr * d__[i__] + sinr * e[i__]; e[i__] = cosr * e[i__] - sinr * d__[i__]; g = sinr * d__[i__ + 1]; d__[i__ + 1] = cosr * d__[i__ + 1]; _starpu_dlartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__] + sinl * d__[i__ + 1]; d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; if (i__ < m - 1) { g = sinl * e[i__ + 1]; e[i__ + 1] = cosl * e[i__ + 1]; } work[i__ - ll + 1] = cosr; work[i__ - ll + 1 + nm1] = sinr; work[i__ - ll + 1 + nm12] = cosl; work[i__ - ll + 1 + nm13] = sinl; /* L140: */ } e[m - 1] = f; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; _starpu_dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ ll + vt_dim1], ldvt); } if (*nru > 0) { i__1 = m - ll + 1; _starpu_dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1], &u[ll * u_dim1 + 1], ldu); } if (*ncc > 0) { i__1 = m - ll + 1; _starpu_dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1], &c__[ll + c_dim1], ldc); } /* Test convergence */ if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { e[m - 1] = 0.; } } else { /* Chase bulge from bottom to top */ /* Save cosines and sines for later singular vector updates */ f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[m] ) + shift / d__[m]); g = e[m - 1]; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { _starpu_dlartg_(&f, &g, &cosr, &sinr, &r__); if (i__ < m) { e[i__] = r__; } f = cosr * d__[i__] + sinr * e[i__ - 1]; e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; g = sinr * d__[i__ - 1]; d__[i__ - 1] = cosr * d__[i__ - 1]; _starpu_dlartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; if (i__ > ll + 1) { g = sinl * e[i__ - 2]; e[i__ - 2] = cosl * e[i__ - 2]; } work[i__ - ll] = cosr; work[i__ - ll + nm1] = -sinr; work[i__ - ll + nm12] = cosl; work[i__ - ll + nm13] = -sinl; /* L150: */ } e[ll] = f; /* Test convergence */ if ((d__1 = e[ll], abs(d__1)) <= thresh) { e[ll] = 0.; } /* Update singular vectors if desired */ if (*ncvt > 0) { i__1 = m - ll + 1; _starpu_dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ nm13 + 1], &vt[ll + vt_dim1], ldvt); } if (*nru > 0) { i__1 = m - ll + 1; _starpu_dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu); } if (*ncc > 0) { i__1 = m - ll + 1; _starpu_dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ ll + c_dim1], ldc); } } } /* QR iteration finished, go back and check convergence */ goto L60; /* All singular values converged, so make them positive */ L160: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] < 0.) { d__[i__] = -d__[i__]; /* Change sign of singular vectors, if desired */ if (*ncvt > 0) { _starpu_dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt); } } /* L170: */ } /* Sort the singular values into decreasing order (insertion sort on */ /* singular values, but only one transposition per singular vector) */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Scan for smallest D(I) */ isub = 1; smin = d__[1]; i__2 = *n + 1 - i__; for (j = 2; j <= i__2; ++j) { if (d__[j] <= smin) { isub = j; smin = d__[j]; } /* L180: */ } if (isub != *n + 1 - i__) { /* Swap singular values and vectors */ d__[isub] = d__[*n + 1 - i__]; d__[*n + 1 - i__] = smin; if (*ncvt > 0) { _starpu_dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + vt_dim1], ldvt); } if (*nru > 0) { _starpu_dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * u_dim1 + 1], &c__1); } if (*ncc > 0) { _starpu_dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + c_dim1], ldc); } } /* L190: */ } goto L220; /* Maximum number of iterations exceeded, failure to converge */ L200: *info = 0; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { if (e[i__] != 0.) { ++(*info); } /* L210: */ } L220: return 0; /* End of DBDSQR */ } /* _starpu_dbdsqr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ddisna.c000066400000000000000000000141051507764646700206370ustar00rootroot00000000000000/* ddisna.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_ddisna_(char *job, integer *m, integer *n, doublereal * d__, doublereal *sep, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, k; doublereal eps; logical decr, left, incr, sing, eigen; extern logical _starpu_lsame_(char *, char *); doublereal anorm; logical right; extern doublereal _starpu_dlamch_(char *); doublereal oldgap, safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal newgap, thresh; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DDISNA computes the reciprocal condition numbers for the eigenvectors */ /* of a real symmetric or complex Hermitian matrix or for the left or */ /* right singular vectors of a general m-by-n matrix. The reciprocal */ /* condition number is the 'gap' between the corresponding eigenvalue or */ /* singular value and the nearest other one. */ /* The bound on the error, measured by angle in radians, in the I-th */ /* computed vector is given by */ /* DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) */ /* where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed */ /* to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of */ /* the error bound. */ /* DDISNA may also be used to compute error bounds for eigenvectors of */ /* the generalized symmetric definite eigenproblem. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* Specifies for which problem the reciprocal condition numbers */ /* should be computed: */ /* = 'E': the eigenvectors of a symmetric/Hermitian matrix; */ /* = 'L': the left singular vectors of a general matrix; */ /* = 'R': the right singular vectors of a general matrix. */ /* M (input) INTEGER */ /* The number of rows of the matrix. M >= 0. */ /* N (input) INTEGER */ /* If JOB = 'L' or 'R', the number of columns of the matrix, */ /* in which case N >= 0. Ignored if JOB = 'E'. */ /* D (input) DOUBLE PRECISION array, dimension (M) if JOB = 'E' */ /* dimension (min(M,N)) if JOB = 'L' or 'R' */ /* The eigenvalues (if JOB = 'E') or singular values (if JOB = */ /* 'L' or 'R') of the matrix, in either increasing or decreasing */ /* order. If singular values, they must be non-negative. */ /* SEP (output) DOUBLE PRECISION array, dimension (M) if JOB = 'E' */ /* dimension (min(M,N)) if JOB = 'L' or 'R' */ /* The reciprocal condition numbers of the vectors. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ --sep; --d__; /* Function Body */ *info = 0; eigen = _starpu_lsame_(job, "E"); left = _starpu_lsame_(job, "L"); right = _starpu_lsame_(job, "R"); sing = left || right; if (eigen) { k = *m; } else if (sing) { k = min(*m,*n); } if (! eigen && ! sing) { *info = -1; } else if (*m < 0) { *info = -2; } else if (k < 0) { *info = -3; } else { incr = TRUE_; decr = TRUE_; i__1 = k - 1; for (i__ = 1; i__ <= i__1; ++i__) { if (incr) { incr = incr && d__[i__] <= d__[i__ + 1]; } if (decr) { decr = decr && d__[i__] >= d__[i__ + 1]; } /* L10: */ } if (sing && k > 0) { if (incr) { incr = incr && 0. <= d__[1]; } if (decr) { decr = decr && d__[k] >= 0.; } } if (! (incr || decr)) { *info = -4; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DDISNA", &i__1); return 0; } /* Quick return if possible */ if (k == 0) { return 0; } /* Compute reciprocal condition numbers */ if (k == 1) { sep[1] = _starpu_dlamch_("O"); } else { oldgap = (d__1 = d__[2] - d__[1], abs(d__1)); sep[1] = oldgap; i__1 = k - 1; for (i__ = 2; i__ <= i__1; ++i__) { newgap = (d__1 = d__[i__ + 1] - d__[i__], abs(d__1)); sep[i__] = min(oldgap,newgap); oldgap = newgap; /* L20: */ } sep[k] = oldgap; } if (sing) { if (left && *m > *n || right && *m < *n) { if (incr) { sep[1] = min(sep[1],d__[1]); } if (decr) { /* Computing MIN */ d__1 = sep[k], d__2 = d__[k]; sep[k] = min(d__1,d__2); } } } /* Ensure that reciprocal condition numbers are not less than */ /* threshold, in order to limit the size of the error bound */ eps = _starpu_dlamch_("E"); safmin = _starpu_dlamch_("S"); /* Computing MAX */ d__2 = abs(d__[1]), d__3 = (d__1 = d__[k], abs(d__1)); anorm = max(d__2,d__3); if (anorm == 0.) { thresh = eps; } else { /* Computing MAX */ d__1 = eps * anorm; thresh = max(d__1,safmin); } i__1 = k; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = sep[i__]; sep[i__] = max(d__1,thresh); /* L30: */ } return 0; /* End of DDISNA */ } /* _starpu_ddisna_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbbrd.c000066400000000000000000000370651507764646700206330ustar00rootroot00000000000000/* dgbbrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b8 = 0.; static doublereal c_b9 = 1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal * d__, doublereal *e, doublereal *q, integer *ldq, doublereal *pt, integer *ldpt, doublereal *c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; /* Local variables */ integer i__, j, l, j1, j2, kb; doublereal ra, rb, rc; integer kk, ml, mn, nr, mu; doublereal rs; integer kb1, ml0, mu0, klm, kun, nrt, klu1, inca; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); extern logical _starpu_lsame_(char *, char *); logical wantb, wantc; integer minmn; logical wantq; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *, integer *), _starpu_dlargv_( integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlartv_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical wantpt; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBBRD reduces a real general m-by-n band matrix A to upper */ /* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. */ /* The routine computes B, and optionally forms Q or P', or computes */ /* Q'*C for a given matrix C. */ /* Arguments */ /* ========= */ /* VECT (input) CHARACTER*1 */ /* Specifies whether or not the matrices Q and P' are to be */ /* formed. */ /* = 'N': do not form Q or P'; */ /* = 'Q': form Q only; */ /* = 'P': form P' only; */ /* = 'B': form both. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NCC (input) INTEGER */ /* The number of columns of the matrix C. NCC >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals of the matrix A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals of the matrix A. KU >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the m-by-n band matrix A, stored in rows 1 to */ /* KL+KU+1. The j-th column of A is stored in the j-th column of */ /* the array AB as follows: */ /* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). */ /* On exit, A is overwritten by values generated during the */ /* reduction. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array A. LDAB >= KL+KU+1. */ /* D (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The diagonal elements of the bidiagonal matrix B. */ /* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) */ /* The superdiagonal elements of the bidiagonal matrix B. */ /* Q (output) DOUBLE PRECISION array, dimension (LDQ,M) */ /* If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. */ /* If VECT = 'N' or 'P', the array Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. */ /* LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. */ /* PT (output) DOUBLE PRECISION array, dimension (LDPT,N) */ /* If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. */ /* If VECT = 'N' or 'Q', the array PT is not referenced. */ /* LDPT (input) INTEGER */ /* The leading dimension of the array PT. */ /* LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,NCC) */ /* On entry, an m-by-ncc matrix C. */ /* On exit, C is overwritten by Q'*C. */ /* C is not referenced if NCC = 0. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. */ /* LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*max(M,N)) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --d__; --e; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; pt_dim1 = *ldpt; pt_offset = 1 + pt_dim1; pt -= pt_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ wantb = _starpu_lsame_(vect, "B"); wantq = _starpu_lsame_(vect, "Q") || wantb; wantpt = _starpu_lsame_(vect, "P") || wantb; wantc = *ncc > 0; klu1 = *kl + *ku + 1; *info = 0; if (! wantq && ! wantpt && ! _starpu_lsame_(vect, "N")) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ncc < 0) { *info = -4; } else if (*kl < 0) { *info = -5; } else if (*ku < 0) { *info = -6; } else if (*ldab < klu1) { *info = -8; } else if (*ldq < 1 || wantq && *ldq < max(1,*m)) { *info = -12; } else if (*ldpt < 1 || wantpt && *ldpt < max(1,*n)) { *info = -14; } else if (*ldc < 1 || wantc && *ldc < max(1,*m)) { *info = -16; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBBRD", &i__1); return 0; } /* Initialize Q and P' to the unit matrix, if needed */ if (wantq) { _starpu_dlaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq); } if (wantpt) { _starpu_dlaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt); } /* Quick return if possible. */ if (*m == 0 || *n == 0) { return 0; } minmn = min(*m,*n); if (*kl + *ku > 1) { /* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce */ /* first to lower bidiagonal form and then transform to upper */ /* bidiagonal */ if (*ku > 0) { ml0 = 1; mu0 = 2; } else { ml0 = 2; mu0 = 1; } /* Wherever possible, plane rotations are generated and applied in */ /* vector operations of length NR over the index set J1:J2:KLU1. */ /* The sines of the plane rotations are stored in WORK(1:max(m,n)) */ /* and the cosines in WORK(max(m,n)+1:2*max(m,n)). */ mn = max(*m,*n); /* Computing MIN */ i__1 = *m - 1; klm = min(i__1,*kl); /* Computing MIN */ i__1 = *n - 1; kun = min(i__1,*ku); kb = klm + kun; kb1 = kb + 1; inca = kb1 * *ldab; nr = 0; j1 = klm + 2; j2 = 1 - kun; i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { /* Reduce i-th column and i-th row of matrix to bidiagonal form */ ml = klm + 1; mu = kun + 1; i__2 = kb; for (kk = 1; kk <= i__2; ++kk) { j1 += kb; j2 += kb; /* generate plane rotations to annihilate nonzero elements */ /* which have been created below the band */ if (nr > 0) { _starpu_dlargv_(&nr, &ab[klu1 + (j1 - klm - 1) * ab_dim1], &inca, &work[j1], &kb1, &work[mn + j1], &kb1); } /* apply plane rotations from the left */ i__3 = kb; for (l = 1; l <= i__3; ++l) { if (j2 - klm + l - 1 > *n) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[klu1 - l + (j1 - klm + l - 1) * ab_dim1], &inca, &ab[klu1 - l + 1 + (j1 - klm + l - 1) * ab_dim1], &inca, &work[mn + j1], & work[j1], &kb1); } /* L10: */ } if (ml > ml0) { if (ml <= *m - i__ + 1) { /* generate plane rotation to annihilate a(i+ml-1,i) */ /* within the band, and apply rotation from the left */ _starpu_dlartg_(&ab[*ku + ml - 1 + i__ * ab_dim1], &ab[*ku + ml + i__ * ab_dim1], &work[mn + i__ + ml - 1], &work[i__ + ml - 1], &ra); ab[*ku + ml - 1 + i__ * ab_dim1] = ra; if (i__ < *n) { /* Computing MIN */ i__4 = *ku + ml - 2, i__5 = *n - i__; i__3 = min(i__4,i__5); i__6 = *ldab - 1; i__7 = *ldab - 1; _starpu_drot_(&i__3, &ab[*ku + ml - 2 + (i__ + 1) * ab_dim1], &i__6, &ab[*ku + ml - 1 + (i__ + 1) * ab_dim1], &i__7, &work[mn + i__ + ml - 1], &work[i__ + ml - 1]); } } ++nr; j1 -= kb1; } if (wantq) { /* accumulate product of plane rotations in Q */ i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { _starpu_drot_(m, &q[(j - 1) * q_dim1 + 1], &c__1, &q[j * q_dim1 + 1], &c__1, &work[mn + j], &work[j]); /* L20: */ } } if (wantc) { /* apply plane rotations to C */ i__4 = j2; i__3 = kb1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { _starpu_drot_(ncc, &c__[j - 1 + c_dim1], ldc, &c__[j + c_dim1] , ldc, &work[mn + j], &work[j]); /* L30: */ } } if (j2 + kun > *n) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 -= kb1; } i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j-1,j+ku) above the band */ /* and store it in WORK(n+1:2*n) */ work[j + kun] = work[j] * ab[(j + kun) * ab_dim1 + 1]; ab[(j + kun) * ab_dim1 + 1] = work[mn + j] * ab[(j + kun) * ab_dim1 + 1]; /* L40: */ } /* generate plane rotations to annihilate nonzero elements */ /* which have been generated above the band */ if (nr > 0) { _starpu_dlargv_(&nr, &ab[(j1 + kun - 1) * ab_dim1 + 1], &inca, & work[j1 + kun], &kb1, &work[mn + j1 + kun], &kb1); } /* apply plane rotations from the right */ i__4 = kb; for (l = 1; l <= i__4; ++l) { if (j2 + l - 1 > *m) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + 1 + (j1 + kun - 1) * ab_dim1], & inca, &ab[l + (j1 + kun) * ab_dim1], &inca, & work[mn + j1 + kun], &work[j1 + kun], &kb1); } /* L50: */ } if (ml == ml0 && mu > mu0) { if (mu <= *n - i__ + 1) { /* generate plane rotation to annihilate a(i,i+mu-1) */ /* within the band, and apply rotation from the right */ _starpu_dlartg_(&ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1], &ab[*ku - mu + 2 + (i__ + mu - 1) * ab_dim1], &work[mn + i__ + mu - 1], &work[i__ + mu - 1], &ra); ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1] = ra; /* Computing MIN */ i__3 = *kl + mu - 2, i__5 = *m - i__; i__4 = min(i__3,i__5); _starpu_drot_(&i__4, &ab[*ku - mu + 4 + (i__ + mu - 2) * ab_dim1], &c__1, &ab[*ku - mu + 3 + (i__ + mu - 1) * ab_dim1], &c__1, &work[mn + i__ + mu - 1], &work[i__ + mu - 1]); } ++nr; j1 -= kb1; } if (wantpt) { /* accumulate product of plane rotations in P' */ i__4 = j2; i__3 = kb1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { _starpu_drot_(n, &pt[j + kun - 1 + pt_dim1], ldpt, &pt[j + kun + pt_dim1], ldpt, &work[mn + j + kun], & work[j + kun]); /* L60: */ } } if (j2 + kb > *m) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 -= kb1; } i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j+kl+ku,j+ku-1) below the */ /* band and store it in WORK(1:n) */ work[j + kb] = work[j + kun] * ab[klu1 + (j + kun) * ab_dim1]; ab[klu1 + (j + kun) * ab_dim1] = work[mn + j + kun] * ab[ klu1 + (j + kun) * ab_dim1]; /* L70: */ } if (ml > ml0) { --ml; } else { --mu; } /* L80: */ } /* L90: */ } } if (*ku == 0 && *kl > 0) { /* A has been reduced to lower bidiagonal form */ /* Transform lower bidiagonal form to upper bidiagonal by applying */ /* plane rotations from the left, storing diagonal elements in D */ /* and off-diagonal elements in E */ /* Computing MIN */ i__2 = *m - 1; i__1 = min(i__2,*n); for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dlartg_(&ab[i__ * ab_dim1 + 1], &ab[i__ * ab_dim1 + 2], &rc, &rs, &ra); d__[i__] = ra; if (i__ < *n) { e[i__] = rs * ab[(i__ + 1) * ab_dim1 + 1]; ab[(i__ + 1) * ab_dim1 + 1] = rc * ab[(i__ + 1) * ab_dim1 + 1] ; } if (wantq) { _starpu_drot_(m, &q[i__ * q_dim1 + 1], &c__1, &q[(i__ + 1) * q_dim1 + 1], &c__1, &rc, &rs); } if (wantc) { _starpu_drot_(ncc, &c__[i__ + c_dim1], ldc, &c__[i__ + 1 + c_dim1], ldc, &rc, &rs); } /* L100: */ } if (*m <= *n) { d__[*m] = ab[*m * ab_dim1 + 1]; } } else if (*ku > 0) { /* A has been reduced to upper bidiagonal form */ if (*m < *n) { /* Annihilate a(m,m+1) by applying plane rotations from the */ /* right, storing diagonal elements in D and off-diagonal */ /* elements in E */ rb = ab[*ku + (*m + 1) * ab_dim1]; for (i__ = *m; i__ >= 1; --i__) { _starpu_dlartg_(&ab[*ku + 1 + i__ * ab_dim1], &rb, &rc, &rs, &ra); d__[i__] = ra; if (i__ > 1) { rb = -rs * ab[*ku + i__ * ab_dim1]; e[i__ - 1] = rc * ab[*ku + i__ * ab_dim1]; } if (wantpt) { _starpu_drot_(n, &pt[i__ + pt_dim1], ldpt, &pt[*m + 1 + pt_dim1], ldpt, &rc, &rs); } /* L110: */ } } else { /* Copy off-diagonal elements to E and diagonal elements to D */ i__1 = minmn - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = ab[*ku + (i__ + 1) * ab_dim1]; /* L120: */ } i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab[*ku + 1 + i__ * ab_dim1]; /* L130: */ } } } else { /* A is diagonal. Set elements of E to zero and copy diagonal */ /* elements to D. */ i__1 = minmn - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = 0.; /* L140: */ } i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab[i__ * ab_dim1 + 1]; /* L150: */ } } return 0; /* End of DGBBRD */ } /* _starpu_dgbbrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbcon.c000066400000000000000000000175261507764646700206430ustar00rootroot00000000000000/* dgbcon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgbcon_(char *norm, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer j; doublereal t; integer kd, lm, jp, ix, kase; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer kase1; doublereal scale; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, integer *); logical lnoti; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlatbs_(char *, char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal ainvnm; logical onenrm; char normin[1]; doublereal smlnum; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBCON estimates the reciprocal of the condition number of a real */ /* general band matrix A, in either the 1-norm or the infinity-norm, */ /* using the LU factorization computed by DGBTRF. */ /* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ /* condition number is computed as */ /* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies whether the 1-norm condition number or the */ /* infinity-norm condition number is required: */ /* = '1' or 'O': 1-norm; */ /* = 'I': Infinity-norm. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* Details of the LU factorization of the band matrix A, as */ /* computed by DGBTRF. U is stored as an upper triangular band */ /* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ /* the multipliers used during the factorization are stored in */ /* rows KL+KU+2 to 2*KL+KU+1. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices; for 1 <= i <= N, row i of the matrix was */ /* interchanged with row IPIV(i). */ /* ANORM (input) DOUBLE PRECISION */ /* If NORM = '1' or 'O', the 1-norm of the original matrix A. */ /* If NORM = 'I', the infinity-norm of the original matrix A. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --ipiv; --work; --iwork; /* Function Body */ *info = 0; onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); if (! onenrm && ! _starpu_lsame_(norm, "I")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < (*kl << 1) + *ku + 1) { *info = -6; } else if (*anorm < 0.) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm == 0.) { return 0; } smlnum = _starpu_dlamch_("Safe minimum"); /* Estimate the norm of inv(A). */ ainvnm = 0.; *(unsigned char *)normin = 'N'; if (onenrm) { kase1 = 1; } else { kase1 = 2; } kd = *kl + *ku + 1; lnoti = *kl > 0; kase = 0; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == kase1) { /* Multiply by inv(L). */ if (lnoti) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kl, i__3 = *n - j; lm = min(i__2,i__3); jp = ipiv[j]; t = work[jp]; if (jp != j) { work[jp] = work[j]; work[j] = t; } d__1 = -t; _starpu_daxpy_(&lm, &d__1, &ab[kd + 1 + j * ab_dim1], &c__1, & work[j + 1], &c__1); /* L20: */ } } /* Multiply by inv(U). */ i__1 = *kl + *ku; _starpu_dlatbs_("Upper", "No transpose", "Non-unit", normin, n, &i__1, & ab[ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 1], info); } else { /* Multiply by inv(U'). */ i__1 = *kl + *ku; _starpu_dlatbs_("Upper", "Transpose", "Non-unit", normin, n, &i__1, &ab[ ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 1], info); /* Multiply by inv(L'). */ if (lnoti) { for (j = *n - 1; j >= 1; --j) { /* Computing MIN */ i__1 = *kl, i__2 = *n - j; lm = min(i__1,i__2); work[j] -= _starpu_ddot_(&lm, &ab[kd + 1 + j * ab_dim1], &c__1, & work[j + 1], &c__1); jp = ipiv[j]; if (jp != j) { t = work[jp]; work[jp] = work[j]; work[j] = t; } /* L30: */ } } } /* Divide X by 1/SCALE if doing so will not cause overflow. */ *(unsigned char *)normin = 'Y'; if (scale != 1.) { ix = _starpu_idamax_(n, &work[1], &c__1); if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) { goto L40; } _starpu_drscl_(n, &scale, &work[1], &c__1); } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } L40: return 0; /* End of DGBCON */ } /* _starpu_dgbcon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbequ.c000066400000000000000000000200721507764646700206440ustar00rootroot00000000000000/* dgbequ.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgbequ_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, kd; doublereal rcmin, rcmax; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum, smlnum; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBEQU computes row and column scalings intended to equilibrate an */ /* M-by-N band matrix A and reduce its condition number. R returns the */ /* row scale factors and C the column scale factors, chosen to try to */ /* make the largest element in each row and column of the matrix B with */ /* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */ /* R(i) and C(j) are restricted to be between SMLNUM = smallest safe */ /* number and BIGNUM = largest safe number. Use of these scaling */ /* factors is not guaranteed to reduce the condition number of A but */ /* works well in practice. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The band matrix A, stored in rows 1 to KL+KU+1. The j-th */ /* column of A is stored in the j-th column of the array AB as */ /* follows: */ /* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KL+KU+1. */ /* R (output) DOUBLE PRECISION array, dimension (M) */ /* If INFO = 0, or INFO > M, R contains the row scale factors */ /* for A. */ /* C (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, C contains the column scale factors for A. */ /* ROWCND (output) DOUBLE PRECISION */ /* If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ /* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ /* AMAX is neither too large nor too small, it is not worth */ /* scaling by R. */ /* COLCND (output) DOUBLE PRECISION */ /* If INFO = 0, COLCND contains the ratio of the smallest */ /* C(i) to the largest C(i). If COLCND >= 0.1, it is not */ /* worth scaling by C. */ /* AMAX (output) DOUBLE PRECISION */ /* Absolute value of largest matrix element. If AMAX is very */ /* close to overflow or very close to underflow, the matrix */ /* should be scaled. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= M: the i-th row of A is exactly zero */ /* > M: the (i-M)-th column of A is exactly zero */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --r__; --c__; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + *ku + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBEQU", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return 0; } /* Get machine constants. */ smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; /* Compute row scale factors. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { r__[i__] = 0.; /* L10: */ } /* Find the maximum element in each row. */ kd = *ku + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = j - *ku; /* Computing MIN */ i__4 = j + *kl; i__3 = min(i__4,*m); for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(d__1)); r__[i__] = max(d__2,d__3); /* L20: */ } /* L30: */ } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = rcmax, d__2 = r__[i__]; rcmax = max(d__1,d__2); /* Computing MIN */ d__1 = rcmin, d__2 = r__[i__]; rcmin = min(d__1,d__2); /* L40: */ } *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (r__[i__] == 0.) { *info = i__; return 0; } /* L50: */ } } else { /* Invert the scale factors. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MIN */ /* Computing MAX */ d__2 = r__[i__]; d__1 = max(d__2,smlnum); r__[i__] = 1. / min(d__1,bignum); /* L60: */ } /* Compute ROWCND = min(R(I)) / max(R(I)) */ *rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); } /* Compute column scale factors */ i__1 = *n; for (j = 1; j <= i__1; ++j) { c__[j] = 0.; /* L70: */ } /* Find the maximum element in each column, */ /* assuming the row scaling computed above. */ kd = *ku + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__3 = j - *ku; /* Computing MIN */ i__4 = j + *kl; i__2 = min(i__4,*m); for (i__ = max(i__3,1); i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( d__1)) * r__[i__]; c__[j] = max(d__2,d__3); /* L80: */ } /* L90: */ } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = c__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = c__[j]; rcmax = max(d__1,d__2); /* L100: */ } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (c__[j] == 0.) { *info = *m + j; return 0; } /* L110: */ } } else { /* Invert the scale factors. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ /* Computing MAX */ d__2 = c__[j]; d__1 = max(d__2,smlnum); c__[j] = 1. / min(d__1,bignum); /* L120: */ } /* Compute COLCND = min(C(J)) / max(C(J)) */ *colcnd = max(rcmin,smlnum) / min(rcmax,bignum); } return 0; /* End of DGBEQU */ } /* _starpu_dgbequ_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbequb.c000066400000000000000000000223671507764646700210170ustar00rootroot00000000000000/* dgbequb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgbequb_(integer *m, integer *n, integer *kl, integer * ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3; /* Builtin functions */ double log(doublereal), pow_di(doublereal *, integer *); /* Local variables */ integer i__, j, kd; doublereal radix, rcmin, rcmax; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum, logrdx, smlnum; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBEQUB computes row and column scalings intended to equilibrate an */ /* M-by-N matrix A and reduce its condition number. R returns the row */ /* scale factors and C the column scale factors, chosen to try to make */ /* the largest element in each row and column of the matrix B with */ /* elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */ /* the radix. */ /* R(i) and C(j) are restricted to be a power of the radix between */ /* SMLNUM = smallest safe number and BIGNUM = largest safe number. Use */ /* of these scaling factors is not guaranteed to reduce the condition */ /* number of A but works well in practice. */ /* This routine differs from DGEEQU by restricting the scaling factors */ /* to a power of the radix. Baring over- and underflow, scaling by */ /* these factors introduces no additional rounding errors. However, the */ /* scaled entries' magnitured are no longer approximately 1 but lie */ /* between sqrt(radix) and 1/sqrt(radix). */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ /* LDAB (input) INTEGER */ /* The leading dimension of the array A. LDAB >= max(1,M). */ /* R (output) DOUBLE PRECISION array, dimension (M) */ /* If INFO = 0 or INFO > M, R contains the row scale factors */ /* for A. */ /* C (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, C contains the column scale factors for A. */ /* ROWCND (output) DOUBLE PRECISION */ /* If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ /* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ /* AMAX is neither too large nor too small, it is not worth */ /* scaling by R. */ /* COLCND (output) DOUBLE PRECISION */ /* If INFO = 0, COLCND contains the ratio of the smallest */ /* C(i) to the largest C(i). If COLCND >= 0.1, it is not */ /* worth scaling by C. */ /* AMAX (output) DOUBLE PRECISION */ /* Absolute value of largest matrix element. If AMAX is very */ /* close to overflow or very close to underflow, the matrix */ /* should be scaled. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= M: the i-th row of A is exactly zero */ /* > M: the (i-M)-th column of A is exactly zero */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --r__; --c__; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + *ku + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBEQUB", &i__1); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return 0; } /* Get machine constants. Assume SMLNUM is a power of the radix. */ smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; radix = _starpu_dlamch_("B"); logrdx = log(radix); /* Compute row scale factors. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { r__[i__] = 0.; /* L10: */ } /* Find the maximum element in each row. */ kd = *ku + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = j - *ku; /* Computing MIN */ i__4 = j + *kl; i__3 = min(i__4,*m); for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(d__1)); r__[i__] = max(d__2,d__3); /* L20: */ } /* L30: */ } i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (r__[i__] > 0.) { i__3 = (integer) (log(r__[i__]) / logrdx); r__[i__] = pow_di(&radix, &i__3); } } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = rcmax, d__2 = r__[i__]; rcmax = max(d__1,d__2); /* Computing MIN */ d__1 = rcmin, d__2 = r__[i__]; rcmin = min(d__1,d__2); /* L40: */ } *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (r__[i__] == 0.) { *info = i__; return 0; } /* L50: */ } } else { /* Invert the scale factors. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MIN */ /* Computing MAX */ d__2 = r__[i__]; d__1 = max(d__2,smlnum); r__[i__] = 1. / min(d__1,bignum); /* L60: */ } /* Compute ROWCND = min(R(I)) / max(R(I)). */ *rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); } /* Compute column scale factors. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { c__[j] = 0.; /* L70: */ } /* Find the maximum element in each column, */ /* assuming the row scaling computed above. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__3 = j - *ku; /* Computing MIN */ i__4 = j + *kl; i__2 = min(i__4,*m); for (i__ = max(i__3,1); i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( d__1)) * r__[i__]; c__[j] = max(d__2,d__3); /* L80: */ } if (c__[j] > 0.) { i__2 = (integer) (log(c__[j]) / logrdx); c__[j] = pow_di(&radix, &i__2); } /* L90: */ } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = c__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = c__[j]; rcmax = max(d__1,d__2); /* L100: */ } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (c__[j] == 0.) { *info = *m + j; return 0; } /* L110: */ } } else { /* Invert the scale factors. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ /* Computing MAX */ d__2 = c__[j]; d__1 = max(d__2,smlnum); c__[j] = 1. / min(d__1,bignum); /* L120: */ } /* Compute COLCND = min(C(J)) / max(C(J)). */ *colcnd = max(rcmin,smlnum) / min(rcmax,bignum); } return 0; /* End of DGBEQUB */ } /* _starpu_dgbequb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbrfs.c000066400000000000000000000331201507764646700206420ustar00rootroot00000000000000/* dgbrfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b15 = -1.; static doublereal c_b17 = 1.; /* Subroutine */ int _starpu_dgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k; doublereal s; integer kk; doublereal xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern /* Subroutine */ int _starpu_dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer count; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dgbtrs_( char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); logical notran; char transt[1]; doublereal lstres; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBRFS improves the computed solution to a system of linear */ /* equations when the coefficient matrix is banded, and provides */ /* error bounds and backward error estimates for the solution. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The original band matrix A, stored in rows 1 to KL+KU+1. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KL+KU+1. */ /* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ /* Details of the LU factorization of the band matrix A, as */ /* computed by DGBTRF. U is stored as an upper triangular band */ /* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ /* the multipliers used during the factorization are stored in */ /* rows KL+KU+2 to 2*KL+KU+1. */ /* LDAFB (input) INTEGER */ /* The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from DGBTRF; for 1<=i<=N, row i of the */ /* matrix was interchanged with row IPIV(i). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DGBTRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Internal Parameters */ /* =================== */ /* ITMAX is the maximum number of steps of iterative refinement. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; notran = _starpu_lsame_(trans, "N"); if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldab < *kl + *ku + 1) { *info = -7; } else if (*ldafb < (*kl << 1) + *ku + 1) { *info = -9; } else if (*ldb < max(1,*n)) { *info = -12; } else if (*ldx < max(1,*n)) { *info = -14; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ /* Computing MIN */ i__1 = *kl + *ku + 2, i__2 = *n + 1; nz = min(i__1,i__2); eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, */ /* where op(A) = A, A**T, or A**H, depending on TRANS. */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dgbmv_(trans, n, n, kl, ku, &c_b15, &ab[ab_offset], ldab, &x[j * x_dim1 + 1], &c__1, &c_b17, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L30: */ } /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { i__2 = *n; for (k = 1; k <= i__2; ++k) { kk = *ku + 1 - k; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); /* Computing MAX */ i__3 = 1, i__4 = k - *ku; /* Computing MIN */ i__6 = *n, i__7 = k + *kl; i__5 = min(i__6,i__7); for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { work[i__] += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1) ) * xk; /* L40: */ } /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; kk = *ku + 1 - k; /* Computing MAX */ i__5 = 1, i__3 = k - *ku; /* Computing MIN */ i__6 = *n, i__7 = k + *kl; i__4 = min(i__6,i__7); for (i__ = max(i__5,i__3); i__ <= i__4; ++i__) { s += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1)) * ( d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L60: */ } work[k] += s; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if */ /* 1) The residual BERR(J) is larger than machine epsilon, and */ /* 2) BERR(J) decreased by at least a factor of 2 during the */ /* last iteration, and */ /* 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ _starpu_dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1] , &work[*n + 1], n, info); _starpu_daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) ; lstres = berr[j]; ++count; goto L20; } /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(op(A)))* */ /* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(op(A)) is the inverse of op(A) */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(op(A)) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T). */ _starpu_dgbtrs_(transt, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & ipiv[1], &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L110: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L120: */ } _starpu_dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & ipiv[1], &work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DGBRFS */ } /* _starpu_dgbrfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbrfsx.c000066400000000000000000000661031507764646700210410ustar00rootroot00000000000000/* dgbrfsx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c_n1 = -1; static integer c__0 = 0; static integer c__1 = 1; /* Subroutine */ int _starpu_dgbrfsx_(char *trans, char *equed, integer *n, integer * kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer * ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; integer ref_type__; extern integer _starpu_ilatrans_(char *); integer j; doublereal rcond_tmp__; integer prec_type__, trans_type__; extern doublereal _starpu_dla_gbrcond__(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen); doublereal cwise_wrong__; extern /* Subroutine */ int _starpu_dla_gbrfsx_extended__(integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, logical *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal * , doublereal *, logical *, integer *); char norm[1]; logical ignore_cwise__; extern logical _starpu_lsame_(char *, char *); doublereal anorm; extern doublereal _starpu_dlangb_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *), _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dgbcon_(char *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); logical colequ, notran, rowequ; extern integer _starpu_ilaprec_(char *); integer ithresh, n_norms__; doublereal rthresh; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBRFSX improves the computed solution to a system of linear */ /* equations and provides error bounds and backward error estimates */ /* for the solution. In addition to normwise error bound, the code */ /* provides maximum componentwise error bound if possible. See */ /* comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */ /* error bounds. */ /* The original system of linear equations may have been equilibrated */ /* before calling this routine, as described by arguments EQUED, R */ /* and C below. In this case, the solution and error bounds returned */ /* are for the original unequilibrated system. */ /* Arguments */ /* ========= */ /* Some optional parameters are bundled in the PARAMS array. These */ /* settings determine how refinement is performed, but often the */ /* defaults are acceptable. If the defaults are acceptable, users */ /* can pass NPARAMS = 0 which prevents the source code from accessing */ /* the PARAMS argument. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* EQUED (input) CHARACTER*1 */ /* Specifies the form of equilibration that was done to A */ /* before calling this routine. This is needed to compute */ /* the solution and error bounds correctly. */ /* = 'N': No equilibration */ /* = 'R': Row equilibration, i.e., A has been premultiplied by */ /* diag(R). */ /* = 'C': Column equilibration, i.e., A has been postmultiplied */ /* by diag(C). */ /* = 'B': Both row and column equilibration, i.e., A has been */ /* replaced by diag(R) * A * diag(C). */ /* The right hand side B has been changed accordingly. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The original band matrix A, stored in rows 1 to KL+KU+1. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KL+KU+1. */ /* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ /* Details of the LU factorization of the band matrix A, as */ /* computed by DGBTRF. U is stored as an upper triangular band */ /* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ /* the multipliers used during the factorization are stored in */ /* rows KL+KU+2 to 2*KL+KU+1. */ /* LDAFB (input) INTEGER */ /* The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ /* matrix was interchanged with row IPIV(i). */ /* R (input or output) DOUBLE PRECISION array, dimension (N) */ /* The row scale factors for A. If EQUED = 'R' or 'B', A is */ /* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ /* is not accessed. R is an input argument if FACT = 'F'; */ /* otherwise, R is an output argument. If FACT = 'F' and */ /* EQUED = 'R' or 'B', each element of R must be positive. */ /* If R is output, each element of R is a power of the radix. */ /* If R is input, each element of R should be a power of the radix */ /* to ensure a reliable solution and error estimates. Scaling by */ /* powers of the radix does not cause rounding errors unless the */ /* result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* C (input or output) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If EQUED = 'C' or 'B', A is */ /* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ /* is not accessed. C is an input argument if FACT = 'F'; */ /* otherwise, C is an output argument. If FACT = 'F' and */ /* EQUED = 'C' or 'B', each element of C must be positive. */ /* If C is output, each element of C is a power of the radix. */ /* If C is input, each element of C should be a power of the radix */ /* to ensure a reliable solution and error estimates. Scaling by */ /* powers of the radix does not cause rounding errors unless the */ /* result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DGETRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* Componentwise relative backward error. This is the */ /* componentwise relative backward error of each solution vector X(j) */ /* (i.e., the smallest relative change in any element of A or B that */ /* makes X(j) an exact solution). */ /* N_ERR_BNDS (input) INTEGER */ /* Number of error bounds to return for each right hand side */ /* and each type (normwise or componentwise). See ERR_BNDS_NORM and */ /* ERR_BNDS_COMP below. */ /* ERR_BNDS_NORM (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* normwise relative error, which is defined as follows: */ /* Normwise relative error in the ith solution vector: */ /* max_j (abs(XTRUE(j,i) - X(j,i))) */ /* ------------------------------ */ /* max_j abs(X(j,i)) */ /* The array is indexed by the type of error information as described */ /* below. There currently are up to three pieces of information */ /* returned. */ /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_NORM(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * dlamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * dlamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated normwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * dlamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*A, where S scales each row by a power of the */ /* radix so all absolute row sums of Z are approximately 1. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* ERR_BNDS_COMP (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* componentwise relative error, which is defined as follows: */ /* Componentwise relative error in the ith solution vector: */ /* abs(XTRUE(j,i) - X(j,i)) */ /* max_j ---------------------- */ /* abs(X(j,i)) */ /* The array is indexed by the right-hand side i (on which the */ /* componentwise relative error depends), and the type of error */ /* information as described below. There currently are up to three */ /* pieces of information returned for each right-hand side. If */ /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ /* the first (:,N_ERR_BNDS) entries are returned. */ /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_COMP(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * dlamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * dlamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated componentwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * dlamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*(A*diag(x)), where x is the solution for the */ /* current right-hand side and S scales each row of */ /* A*diag(x) by a power of the radix so all absolute row */ /* sums of Z are approximately 1. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* NPARAMS (input) INTEGER */ /* Specifies the number of parameters set in PARAMS. If .LE. 0, the */ /* PARAMS array is never referenced and default values are used. */ /* PARAMS (input / output) DOUBLE PRECISION array, dimension NPARAMS */ /* Specifies algorithm parameters. If an entry is .LT. 0.0, then */ /* that entry will be filled with default value used for that */ /* parameter. Only positions up to NPARAMS are accessed; defaults */ /* are used for higher-numbered parameters. */ /* PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ /* refinement or not. */ /* Default: 1.0D+0 */ /* = 0.0 : No refinement is performed, and no error bounds are */ /* computed. */ /* = 1.0 : Use the double-precision refinement algorithm, */ /* possibly with doubled-single computations if the */ /* compilation environment does not support DOUBLE */ /* PRECISION. */ /* (other values are reserved for future use) */ /* PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ /* computations allowed for refinement. */ /* Default: 10 */ /* Aggressive: Set to 100 to permit convergence using approximate */ /* factorizations or factorizations other than LU. If */ /* the factorization uses a technique other than */ /* Gaussian elimination, the guarantees in */ /* err_bnds_norm and err_bnds_comp may no longer be */ /* trustworthy. */ /* PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ /* will attempt to find a solution with small componentwise */ /* relative error in the double-precision algorithm. Positive */ /* is true, 0.0 is false. */ /* Default: 1.0 (attempt componentwise convergence) */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: Successful exit. The solution to every right-hand side is */ /* guaranteed. */ /* < 0: If INFO = -i, the i-th argument had an illegal value */ /* > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly singular, so */ /* the solution and error bounds could not be computed. RCOND = 0 */ /* is returned. */ /* = N+J: The solution corresponding to the Jth right-hand side is */ /* not guaranteed. The solutions corresponding to other right- */ /* hand sides K with K > J may not be guaranteed as well, but */ /* only the first such right-hand side is reported. If a small */ /* componentwise error is not requested (PARAMS(3) = 0.0) then */ /* the Jth right-hand side is the first with a normwise error */ /* bound that is not guaranteed (the smallest J such */ /* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ /* the Jth right-hand side is the first with either a normwise or */ /* componentwise error bound that is not guaranteed (the smallest */ /* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ /* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ /* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ /* about all of the right-hand sides check ERR_BNDS_NORM or */ /* ERR_BNDS_COMP. */ /* ================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Check the input parameters. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; --ipiv; --r__; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --berr; --params; --work; --iwork; /* Function Body */ *info = 0; trans_type__ = _starpu_ilatrans_(trans); ref_type__ = 1; if (*nparams >= 1) { if (params[1] < 0.) { params[1] = 1.; } else { ref_type__ = (integer) params[1]; } } /* Set default parameters. */ illrcond_thresh__ = (doublereal) (*n) * _starpu_dlamch_("Epsilon"); ithresh = 10; rthresh = .5; unstable_thresh__ = .25; ignore_cwise__ = FALSE_; if (*nparams >= 2) { if (params[2] < 0.) { params[2] = (doublereal) ithresh; } else { ithresh = (integer) params[2]; } } if (*nparams >= 3) { if (params[3] < 0.) { if (ignore_cwise__) { params[3] = 0.; } else { params[3] = 1.; } } else { ignore_cwise__ = params[3] == 0.; } } if (ref_type__ == 0 || *n_err_bnds__ == 0) { n_norms__ = 0; } else if (ignore_cwise__) { n_norms__ = 1; } else { n_norms__ = 2; } notran = _starpu_lsame_(trans, "N"); rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); /* Test input parameters. */ if (trans_type__ == -1) { *info = -1; } else if (! rowequ && ! colequ && ! _starpu_lsame_(equed, "N")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kl < 0) { *info = -4; } else if (*ku < 0) { *info = -5; } else if (*nrhs < 0) { *info = -6; } else if (*ldab < *kl + *ku + 1) { *info = -8; } else if (*ldafb < (*kl << 1) + *ku + 1) { *info = -10; } else if (*ldb < max(1,*n)) { *info = -13; } else if (*ldx < max(1,*n)) { *info = -15; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBRFSX", &i__1); return 0; } /* Quick return if possible. */ if (*n == 0 || *nrhs == 0) { *rcond = 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { berr[j] = 0.; if (*n_err_bnds__ >= 1) { err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } else if (*n_err_bnds__ >= 2) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; } else if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; } } return 0; } /* Default to failure. */ *rcond = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { berr[j] = 1.; if (*n_err_bnds__ >= 1) { err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } else if (*n_err_bnds__ >= 2) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; } else if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; } } /* Compute the norm of A and the reciprocal of the condition */ /* number of A. */ if (notran) { *(unsigned char *)norm = 'I'; } else { *(unsigned char *)norm = '1'; } anorm = _starpu_dlangb_(norm, n, kl, ku, &ab[ab_offset], ldab, &work[1]); _starpu_dgbcon_(norm, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], &anorm, rcond, &work[1], &iwork[1], info); /* Perform refinement on each right-hand side */ if (ref_type__ != 0) { prec_type__ = _starpu_ilaprec_("E"); if (notran) { _starpu_dla_gbrfsx_extended__(&prec_type__, &trans_type__, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, & ipiv[1], &colequ, &c__[1], &b[b_offset], ldb, &x[x_offset] , ldx, &berr[1], &n_norms__, &err_bnds_norm__[ err_bnds_norm_offset], &err_bnds_comp__[ err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, &rthresh, & unstable_thresh__, &ignore_cwise__, info); } else { _starpu_dla_gbrfsx_extended__(&prec_type__, &trans_type__, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, & ipiv[1], &rowequ, &r__[1], &b[b_offset], ldb, &x[x_offset] , ldx, &berr[1], &n_norms__, &err_bnds_norm__[ err_bnds_norm_offset], &err_bnds_comp__[ err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, &rthresh, & unstable_thresh__, &ignore_cwise__, info); } } /* Computing MAX */ d__1 = 10., d__2 = sqrt((doublereal) (*n)); err_lbnd__ = max(d__1,d__2) * _starpu_dlamch_("Epsilon"); if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { /* Compute scaled normwise condition number cond(A*C). */ if (colequ && notran) { rcond_tmp__ = _starpu_dla_gbrcond__(trans, n, kl, ku, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, &ipiv[1], &c_n1, &c__[1], info, &work[1], &iwork[1], (ftnlen)1); } else if (rowequ && ! notran) { rcond_tmp__ = _starpu_dla_gbrcond__(trans, n, kl, ku, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, &ipiv[1], &c_n1, &r__[1], info, &work[1], &iwork[1], (ftnlen)1); } else { rcond_tmp__ = _starpu_dla_gbrcond__(trans, n, kl, ku, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, &ipiv[1], &c__0, &r__[1], info, &work[1], &iwork[1], (ftnlen)1); } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Cap the error at 1.0. */ if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] > 1.) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; } /* Threshold the error (see LAWN). */ if (rcond_tmp__ < illrcond_thresh__) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; if (*info <= *n) { *info = *n + j; } } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < err_lbnd__) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; } /* Save the condition number. */ if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; } } } if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { /* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ /* each right-hand side using the current solution as an estimate of */ /* the true solution. If the componentwise error estimate is too */ /* large, then the solution is a lousy estimate of truth and the */ /* estimated RCOND may be too optimistic. To avoid misleading users, */ /* the inverse condition number is set to 0.0 when the estimated */ /* cwise error is at least CWISE_WRONG. */ cwise_wrong__ = sqrt(_starpu_dlamch_("Epsilon")); i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < cwise_wrong__) { rcond_tmp__ = _starpu_dla_gbrcond__(trans, n, kl, ku, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, &ipiv[1], &c__1, &x[j * x_dim1 + 1], info, &work[1], &iwork[1], (ftnlen)1); } else { rcond_tmp__ = 0.; } /* Cap the error at 1.0. */ if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] > 1.) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; } /* Threshold the error (see LAWN). */ if (rcond_tmp__ < illrcond_thresh__) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; if (params[3] == 1. && *info < *n + j) { *info = *n + j; } } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < err_lbnd__) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } /* Save the condition number. */ if (*n_err_bnds__ >= 3) { err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; } } } return 0; /* End of DGBRFSX */ } /* _starpu_dgbrfsx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbsv.c000066400000000000000000000142721507764646700205070ustar00rootroot00000000000000/* dgbsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; /* Local variables */ extern /* Subroutine */ int _starpu_dgbtrf_(integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dgbtrs_(char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBSV computes the solution to a real system of linear equations */ /* A * X = B, where A is a band matrix of order N with KL subdiagonals */ /* and KU superdiagonals, and X and B are N-by-NRHS matrices. */ /* The LU decomposition with partial pivoting and row interchanges is */ /* used to factor A as A = L * U, where L is a product of permutation */ /* and unit lower triangular matrices with KL subdiagonals, and U is */ /* upper triangular with KL+KU superdiagonals. The factored form of A */ /* is then used to solve the system of equations A * X = B. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows KL+1 to */ /* 2*KL+KU+1; rows 1 to KL of the array need not be set. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) */ /* On exit, details of the factorization: U is stored as an */ /* upper triangular band matrix with KL+KU superdiagonals in */ /* rows 1 to KL+KU+1, and the multipliers used during the */ /* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ /* See below for further details. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ /* IPIV (output) INTEGER array, dimension (N) */ /* The pivot indices that define the permutation matrix P; */ /* row i of the matrix was interchanged with row IPIV(i). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and the solution has not been computed. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* M = N = 6, KL = 2, KU = 1: */ /* On entry: On exit: */ /* * * * + + + * * * u14 u25 u36 */ /* * * + + + + * * u13 u24 u35 u46 */ /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ /* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ /* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ /* Array elements marked * are not used by the routine; elements marked */ /* + need not be set on entry, but are required by the routine to store */ /* elements of U because of fill-in resulting from the row interchanges. */ /* ===================================================================== */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*kl < 0) { *info = -2; } else if (*ku < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldab < (*kl << 1) + *ku + 1) { *info = -6; } else if (*ldb < max(*n,1)) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBSV ", &i__1); return 0; } /* Compute the LU factorization of the band matrix A. */ _starpu_dgbtrf_(n, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ _starpu_dgbtrs_("No transpose", n, kl, ku, nrhs, &ab[ab_offset], ldab, &ipiv[ 1], &b[b_offset], ldb, info); } return 0; /* End of DGBSV */ } /* _starpu_dgbsv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbsvx.c000066400000000000000000000557561507764646700207130ustar00rootroot00000000000000/* dgbsvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, j1, j2; doublereal amax; char norm[1]; extern logical _starpu_lsame_(char *, char *); doublereal rcmin, rcmax, anorm; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); logical equil; extern doublereal _starpu_dlangb_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *), _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlaqgb_(integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, char *), _starpu_dgbcon_(char *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); doublereal colcnd; extern doublereal _starpu_dlantb_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgbequ_(integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dgbrfs_( char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dgbtrf_(integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dgbtrs_(char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer infequ; logical colequ; doublereal rowcnd; logical notran; doublereal smlnum; logical rowequ; doublereal rpvgrw; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBSVX uses the LU factorization to compute the solution to a real */ /* system of linear equations A * X = B, A**T * X = B, or A**H * X = B, */ /* where A is a band matrix of order N with KL subdiagonals and KU */ /* superdiagonals, and X and B are N-by-NRHS matrices. */ /* Error bounds on the solution and a condition estimate are also */ /* provided. */ /* Description */ /* =========== */ /* The following steps are performed by this subroutine: */ /* 1. If FACT = 'E', real scaling factors are computed to equilibrate */ /* the system: */ /* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ /* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ /* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ /* Whether or not the system will be equilibrated depends on the */ /* scaling of the matrix A, but if equilibration is used, A is */ /* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ /* or diag(C)*B (if TRANS = 'T' or 'C'). */ /* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the */ /* matrix A (after equilibration if FACT = 'E') as */ /* A = L * U, */ /* where L is a product of permutation and unit lower triangular */ /* matrices with KL subdiagonals, and U is upper triangular with */ /* KL+KU superdiagonals. */ /* 3. If some U(i,i)=0, so that U is exactly singular, then the routine */ /* returns with INFO = i. Otherwise, the factored form of A is used */ /* to estimate the condition number of the matrix A. If the */ /* reciprocal of the condition number is less than machine precision, */ /* INFO = N+1 is returned as a warning, but the routine still goes on */ /* to solve for X and compute error bounds as described below. */ /* 4. The system of equations is solved for X using the factored form */ /* of A. */ /* 5. Iterative refinement is applied to improve the computed solution */ /* matrix and calculate error bounds and backward error estimates */ /* for it. */ /* 6. If equilibration was used, the matrix X is premultiplied by */ /* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ /* that it solves the original system before equilibration. */ /* Arguments */ /* ========= */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of the matrix A is */ /* supplied on entry, and if not, whether the matrix A should be */ /* equilibrated before it is factored. */ /* = 'F': On entry, AFB and IPIV contain the factored form of */ /* A. If EQUED is not 'N', the matrix A has been */ /* equilibrated with scaling factors given by R and C. */ /* AB, AFB, and IPIV are not modified. */ /* = 'N': The matrix A will be copied to AFB and factored. */ /* = 'E': The matrix A will be equilibrated if necessary, then */ /* copied to AFB and factored. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations. */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Transpose) */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ /* If FACT = 'F' and EQUED is not 'N', then A must have been */ /* equilibrated by the scaling factors in R and/or C. AB is not */ /* modified if FACT = 'F' or 'N', or if FACT = 'E' and */ /* EQUED = 'N' on exit. */ /* On exit, if EQUED .ne. 'N', A is scaled as follows: */ /* EQUED = 'R': A := diag(R) * A */ /* EQUED = 'C': A := A * diag(C) */ /* EQUED = 'B': A := diag(R) * A * diag(C). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KL+KU+1. */ /* AFB (input or output) DOUBLE PRECISION array, dimension (LDAFB,N) */ /* If FACT = 'F', then AFB is an input argument and on entry */ /* contains details of the LU factorization of the band matrix */ /* A, as computed by DGBTRF. U is stored as an upper triangular */ /* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ /* and the multipliers used during the factorization are stored */ /* in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is */ /* the factored form of the equilibrated matrix A. */ /* If FACT = 'N', then AFB is an output argument and on exit */ /* returns details of the LU factorization of A. */ /* If FACT = 'E', then AFB is an output argument and on exit */ /* returns details of the LU factorization of the equilibrated */ /* matrix A (see the description of AB for the form of the */ /* equilibrated matrix). */ /* LDAFB (input) INTEGER */ /* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ /* IPIV (input or output) INTEGER array, dimension (N) */ /* If FACT = 'F', then IPIV is an input argument and on entry */ /* contains the pivot indices from the factorization A = L*U */ /* as computed by DGBTRF; row i of the matrix was interchanged */ /* with row IPIV(i). */ /* If FACT = 'N', then IPIV is an output argument and on exit */ /* contains the pivot indices from the factorization A = L*U */ /* of the original matrix A. */ /* If FACT = 'E', then IPIV is an output argument and on exit */ /* contains the pivot indices from the factorization A = L*U */ /* of the equilibrated matrix A. */ /* EQUED (input or output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration (always true if FACT = 'N'). */ /* = 'R': Row equilibration, i.e., A has been premultiplied by */ /* diag(R). */ /* = 'C': Column equilibration, i.e., A has been postmultiplied */ /* by diag(C). */ /* = 'B': Both row and column equilibration, i.e., A has been */ /* replaced by diag(R) * A * diag(C). */ /* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ /* output argument. */ /* R (input or output) DOUBLE PRECISION array, dimension (N) */ /* The row scale factors for A. If EQUED = 'R' or 'B', A is */ /* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ /* is not accessed. R is an input argument if FACT = 'F'; */ /* otherwise, R is an output argument. If FACT = 'F' and */ /* EQUED = 'R' or 'B', each element of R must be positive. */ /* C (input or output) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If EQUED = 'C' or 'B', A is */ /* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ /* is not accessed. C is an input argument if FACT = 'F'; */ /* otherwise, C is an output argument. If FACT = 'F' and */ /* EQUED = 'C' or 'B', each element of C must be positive. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, */ /* if EQUED = 'N', B is not modified; */ /* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ /* diag(R)*B; */ /* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ /* overwritten by diag(C)*B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */ /* to the original system of equations. Note that A and B are */ /* modified on exit if EQUED .ne. 'N', and the solution to the */ /* equilibrated system is inv(diag(C))*X if TRANS = 'N' and */ /* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */ /* and EQUED = 'R' or 'B'. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The estimate of the reciprocal condition number of the matrix */ /* A after equilibration (if done). If RCOND is less than the */ /* machine precision (in particular, if RCOND = 0), the matrix */ /* is singular to working precision. This condition is */ /* indicated by a return code of INFO > 0. */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (3*N) */ /* On exit, WORK(1) contains the reciprocal pivot growth */ /* factor norm(A)/norm(U). The "max absolute element" norm is */ /* used. If WORK(1) is much less than 1, then the stability */ /* of the LU factorization of the (equilibrated) matrix A */ /* could be poor. This also means that the solution X, condition */ /* estimator RCOND, and forward error bound FERR could be */ /* unreliable. If factorization fails with 0 0: if INFO = i, and i is */ /* <= N: U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, so the solution and error bounds */ /* could not be computed. RCOND = 0 is returned. */ /* = N+1: U is nonsingular, but RCOND is less than machine */ /* precision, meaning that the matrix is singular */ /* to working precision. Nevertheless, the */ /* solution and error bounds are computed because */ /* there are a number of situations where the */ /* computed solution can be more accurate than the */ /* value of RCOND would suggest. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; --ipiv; --r__; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); equil = _starpu_lsame_(fact, "E"); notran = _starpu_lsame_(trans, "N"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rowequ = FALSE_; colequ = FALSE_; } else { rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); smlnum = _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* Test the input parameters. */ if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kl < 0) { *info = -4; } else if (*ku < 0) { *info = -5; } else if (*nrhs < 0) { *info = -6; } else if (*ldab < *kl + *ku + 1) { *info = -8; } else if (*ldafb < (*kl << 1) + *ku + 1) { *info = -10; } else if (_starpu_lsame_(fact, "F") && ! (rowequ || colequ || _starpu_lsame_(equed, "N"))) { *info = -12; } else { if (rowequ) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = r__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = r__[j]; rcmax = max(d__1,d__2); /* L10: */ } if (rcmin <= 0.) { *info = -13; } else if (*n > 0) { rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { rowcnd = 1.; } } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = c__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = c__[j]; rcmax = max(d__1,d__2); /* L20: */ } if (rcmin <= 0.) { *info = -14; } else if (*n > 0) { colcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { colcnd = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -16; } else if (*ldx < max(1,*n)) { *info = -18; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBSVX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ _starpu_dgbequ_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], &rowcnd, &colcnd, &amax, &infequ); if (infequ == 0) { /* Equilibrate the matrix. */ _starpu_dlaqgb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & rowcnd, &colcnd, &amax, equed); rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); } } /* Scale the right hand side. */ if (notran) { if (rowequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1]; /* L30: */ } /* L40: */ } } } else if (colequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1]; /* L50: */ } /* L60: */ } } if (nofact || equil) { /* Compute the LU factorization of the band matrix A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = j - *ku; j1 = max(i__2,1); /* Computing MIN */ i__2 = j + *kl; j2 = min(i__2,*n); i__2 = j2 - j1 + 1; _starpu_dcopy_(&i__2, &ab[*ku + 1 - j + j1 + j * ab_dim1], &c__1, &afb[* kl + *ku + 1 - j + j1 + j * afb_dim1], &c__1); /* L70: */ } _starpu_dgbtrf_(n, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], info); /* Return if INFO is non-zero. */ if (*info > 0) { /* Compute the reciprocal pivot growth factor of the */ /* leading rank-deficient INFO columns of A. */ anorm = 0.; i__1 = *info; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = *ku + 2 - j; /* Computing MIN */ i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; i__3 = min(i__4,i__5); for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = anorm, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs( d__1)); anorm = max(d__2,d__3); /* L80: */ } /* L90: */ } /* Computing MIN */ i__3 = *info - 1, i__2 = *kl + *ku; i__1 = min(i__3,i__2); /* Computing MAX */ i__4 = 1, i__5 = *kl + *ku + 2 - *info; rpvgrw = _starpu_dlantb_("M", "U", "N", info, &i__1, &afb[max(i__4, i__5) + afb_dim1], ldafb, &work[1]); if (rpvgrw == 0.) { rpvgrw = 1.; } else { rpvgrw = anorm / rpvgrw; } work[1] = rpvgrw; *rcond = 0.; return 0; } } /* Compute the norm of the matrix A and the */ /* reciprocal pivot growth factor RPVGRW. */ if (notran) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = _starpu_dlangb_(norm, n, kl, ku, &ab[ab_offset], ldab, &work[1]); i__1 = *kl + *ku; rpvgrw = _starpu_dlantb_("M", "U", "N", n, &i__1, &afb[afb_offset], ldafb, &work[ 1]); if (rpvgrw == 0.) { rpvgrw = 1.; } else { rpvgrw = _starpu_dlangb_("M", n, kl, ku, &ab[ab_offset], ldab, &work[1]) / rpvgrw; } /* Compute the reciprocal of the condition number of A. */ _starpu_dgbcon_(norm, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], &anorm, rcond, &work[1], &iwork[1], info); /* Compute the solution matrix X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dgbtrs_(trans, n, kl, ku, nrhs, &afb[afb_offset], ldafb, &ipiv[1], &x[ x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and */ /* compute error bounds and backward error estimates for it. */ _starpu_dgbrfs_(trans, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], & berr[1], &work[1], &iwork[1], info); /* Transform the solution matrix X to a solution of the original */ /* system. */ if (notran) { if (colequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1]; /* L100: */ } /* L110: */ } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] /= colcnd; /* L120: */ } } } else if (rowequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1]; /* L130: */ } /* L140: */ } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] /= rowcnd; /* L150: */ } } /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < _starpu_dlamch_("Epsilon")) { *info = *n + 1; } work[1] = rpvgrw; return 0; /* End of DGBSVX */ } /* _starpu_dgbsvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbsvxx.c000066400000000000000000000744561507764646700211010ustar00rootroot00000000000000/* dgbsvxx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgbsvxx_(char *fact, char *trans, integer *n, integer * kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ integer i__, j; doublereal amax; extern doublereal _starpu_dla_gbrpvgrw__(integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); doublereal rcmin, rcmax; logical equil; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlaqgb_(integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, char *); doublereal colcnd; extern /* Subroutine */ int _starpu_dgbtrf_(integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dgbtrs_(char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer infequ; logical colequ; doublereal rowcnd; logical notran; doublereal smlnum; logical rowequ; extern /* Subroutine */ int _starpu_dlascl2_(integer *, integer *, doublereal *, doublereal *, integer *), _starpu_dgbequb_(integer *, integer *, integer * , integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dgbrfsx_( char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBSVXX uses the LU factorization to compute the solution to a */ /* double precision system of linear equations A * X = B, where A is an */ /* N-by-N matrix and X and B are N-by-NRHS matrices. */ /* If requested, both normwise and maximum componentwise error bounds */ /* are returned. DGBSVXX will return a solution with a tiny */ /* guaranteed error (O(eps) where eps is the working machine */ /* precision) unless the matrix is very ill-conditioned, in which */ /* case a warning is returned. Relevant condition numbers also are */ /* calculated and returned. */ /* DGBSVXX accepts user-provided factorizations and equilibration */ /* factors; see the definitions of the FACT and EQUED options. */ /* Solving with refinement and using a factorization from a previous */ /* DGBSVXX call will also produce a solution with either O(eps) */ /* errors or warnings, but we cannot make that claim for general */ /* user-provided factorizations and equilibration factors if they */ /* differ from what DGBSVXX would itself produce. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ /* the system: */ /* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ /* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ /* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ /* Whether or not the system will be equilibrated depends on the */ /* scaling of the matrix A, but if equilibration is used, A is */ /* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ /* or diag(C)*B (if TRANS = 'T' or 'C'). */ /* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ /* the matrix A (after equilibration if FACT = 'E') as */ /* A = P * L * U, */ /* where P is a permutation matrix, L is a unit lower triangular */ /* matrix, and U is upper triangular. */ /* 3. If some U(i,i)=0, so that U is exactly singular, then the */ /* routine returns with INFO = i. Otherwise, the factored form of A */ /* is used to estimate the condition number of the matrix A (see */ /* argument RCOND). If the reciprocal of the condition number is less */ /* than machine precision, the routine still goes on to solve for X */ /* and compute error bounds as described below. */ /* 4. The system of equations is solved for X using the factored form */ /* of A. */ /* 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ /* the routine will use iterative refinement to try to get a small */ /* error and error bounds. Refinement calculates the residual to at */ /* least twice the working precision. */ /* 6. If equilibration was used, the matrix X is premultiplied by */ /* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ /* that it solves the original system before equilibration. */ /* Arguments */ /* ========= */ /* Some optional parameters are bundled in the PARAMS array. These */ /* settings determine how refinement is performed, but often the */ /* defaults are acceptable. If the defaults are acceptable, users */ /* can pass NPARAMS = 0 which prevents the source code from accessing */ /* the PARAMS argument. */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of the matrix A is */ /* supplied on entry, and if not, whether the matrix A should be */ /* equilibrated before it is factored. */ /* = 'F': On entry, AF and IPIV contain the factored form of A. */ /* If EQUED is not 'N', the matrix A has been */ /* equilibrated with scaling factors given by R and C. */ /* A, AF, and IPIV are not modified. */ /* = 'N': The matrix A will be copied to AF and factored. */ /* = 'E': The matrix A will be equilibrated if necessary, then */ /* copied to AF and factored. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ /* If FACT = 'F' and EQUED is not 'N', then AB must have been */ /* equilibrated by the scaling factors in R and/or C. AB is not */ /* modified if FACT = 'F' or 'N', or if FACT = 'E' and */ /* EQUED = 'N' on exit. */ /* On exit, if EQUED .ne. 'N', A is scaled as follows: */ /* EQUED = 'R': A := diag(R) * A */ /* EQUED = 'C': A := A * diag(C) */ /* EQUED = 'B': A := diag(R) * A * diag(C). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KL+KU+1. */ /* AFB (input or output) DOUBLE PRECISION array, dimension (LDAFB,N) */ /* If FACT = 'F', then AFB is an input argument and on entry */ /* contains details of the LU factorization of the band matrix */ /* A, as computed by DGBTRF. U is stored as an upper triangular */ /* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ /* and the multipliers used during the factorization are stored */ /* in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is */ /* the factored form of the equilibrated matrix A. */ /* If FACT = 'N', then AF is an output argument and on exit */ /* returns the factors L and U from the factorization A = P*L*U */ /* of the original matrix A. */ /* If FACT = 'E', then AF is an output argument and on exit */ /* returns the factors L and U from the factorization A = P*L*U */ /* of the equilibrated matrix A (see the description of A for */ /* the form of the equilibrated matrix). */ /* LDAFB (input) INTEGER */ /* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ /* IPIV (input or output) INTEGER array, dimension (N) */ /* If FACT = 'F', then IPIV is an input argument and on entry */ /* contains the pivot indices from the factorization A = P*L*U */ /* as computed by DGETRF; row i of the matrix was interchanged */ /* with row IPIV(i). */ /* If FACT = 'N', then IPIV is an output argument and on exit */ /* contains the pivot indices from the factorization A = P*L*U */ /* of the original matrix A. */ /* If FACT = 'E', then IPIV is an output argument and on exit */ /* contains the pivot indices from the factorization A = P*L*U */ /* of the equilibrated matrix A. */ /* EQUED (input or output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration (always true if FACT = 'N'). */ /* = 'R': Row equilibration, i.e., A has been premultiplied by */ /* diag(R). */ /* = 'C': Column equilibration, i.e., A has been postmultiplied */ /* by diag(C). */ /* = 'B': Both row and column equilibration, i.e., A has been */ /* replaced by diag(R) * A * diag(C). */ /* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ /* output argument. */ /* R (input or output) DOUBLE PRECISION array, dimension (N) */ /* The row scale factors for A. If EQUED = 'R' or 'B', A is */ /* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ /* is not accessed. R is an input argument if FACT = 'F'; */ /* otherwise, R is an output argument. If FACT = 'F' and */ /* EQUED = 'R' or 'B', each element of R must be positive. */ /* If R is output, each element of R is a power of the radix. */ /* If R is input, each element of R should be a power of the radix */ /* to ensure a reliable solution and error estimates. Scaling by */ /* powers of the radix does not cause rounding errors unless the */ /* result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* C (input or output) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If EQUED = 'C' or 'B', A is */ /* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ /* is not accessed. C is an input argument if FACT = 'F'; */ /* otherwise, C is an output argument. If FACT = 'F' and */ /* EQUED = 'C' or 'B', each element of C must be positive. */ /* If C is output, each element of C is a power of the radix. */ /* If C is input, each element of C should be a power of the radix */ /* to ensure a reliable solution and error estimates. Scaling by */ /* powers of the radix does not cause rounding errors unless the */ /* result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, */ /* if EQUED = 'N', B is not modified; */ /* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ /* diag(R)*B; */ /* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ /* overwritten by diag(C)*B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0, the N-by-NRHS solution matrix X to the original */ /* system of equations. Note that A and B are modified on exit */ /* if EQUED .ne. 'N', and the solution to the equilibrated system is */ /* inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */ /* inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* RPVGRW (output) DOUBLE PRECISION */ /* Reciprocal pivot growth. On exit, this contains the reciprocal */ /* pivot growth factor norm(A)/norm(U). The "max absolute element" */ /* norm is used. If this is much less than 1, then the stability of */ /* the LU factorization of the (equilibrated) matrix A could be poor. */ /* This also means that the solution X, estimated condition numbers, */ /* and error bounds could be unreliable. If factorization fails with */ /* 0 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly singular, so */ /* the solution and error bounds could not be computed. RCOND = 0 */ /* is returned. */ /* = N+J: The solution corresponding to the Jth right-hand side is */ /* not guaranteed. The solutions corresponding to other right- */ /* hand sides K with K > J may not be guaranteed as well, but */ /* only the first such right-hand side is reported. If a small */ /* componentwise error is not requested (PARAMS(3) = 0.0) then */ /* the Jth right-hand side is the first with a normwise error */ /* bound that is not guaranteed (the smallest J such */ /* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ /* the Jth right-hand side is the first with either a normwise or */ /* componentwise error bound that is not guaranteed (the smallest */ /* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ /* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ /* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ /* about all of the right-hand sides check ERR_BNDS_NORM or */ /* ERR_BNDS_COMP. */ /* ================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; --ipiv; --r__; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --berr; --params; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); equil = _starpu_lsame_(fact, "E"); notran = _starpu_lsame_(trans, "N"); smlnum = _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; if (nofact || equil) { *(unsigned char *)equed = 'N'; rowequ = FALSE_; colequ = FALSE_; } else { rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); } /* Default is failure. If an input parameter is wrong or */ /* factorization fails, make everything look horrible. Only the */ /* pivot growth is set here, the rest is initialized in DGBRFSX. */ *rpvgrw = 0.; /* Test the input parameters. PARAMS is not tested until DGBRFSX. */ if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kl < 0) { *info = -4; } else if (*ku < 0) { *info = -5; } else if (*nrhs < 0) { *info = -6; } else if (*ldab < *kl + *ku + 1) { *info = -8; } else if (*ldafb < (*kl << 1) + *ku + 1) { *info = -10; } else if (_starpu_lsame_(fact, "F") && ! (rowequ || colequ || _starpu_lsame_(equed, "N"))) { *info = -12; } else { if (rowequ) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = r__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = r__[j]; rcmax = max(d__1,d__2); /* L10: */ } if (rcmin <= 0.) { *info = -13; } else if (*n > 0) { rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { rowcnd = 1.; } } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = c__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = c__[j]; rcmax = max(d__1,d__2); /* L20: */ } if (rcmin <= 0.) { *info = -14; } else if (*n > 0) { colcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { colcnd = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -15; } else if (*ldx < max(1,*n)) { *info = -16; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBSVXX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ _starpu_dgbequb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & rowcnd, &colcnd, &amax, &infequ); if (infequ == 0) { /* Equilibrate the matrix. */ _starpu_dlaqgb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & rowcnd, &colcnd, &amax, equed); rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); } /* If the scaling factors are not applied, set them to 1.0. */ if (! rowequ) { i__1 = *n; for (j = 1; j <= i__1; ++j) { r__[j] = 1.; } } if (! colequ) { i__1 = *n; for (j = 1; j <= i__1; ++j) { c__[j] = 1.; } } } /* Scale the right hand side. */ if (notran) { if (rowequ) { _starpu_dlascl2_(n, nrhs, &r__[1], &b[b_offset], ldb); } } else { if (colequ) { _starpu_dlascl2_(n, nrhs, &c__[1], &b[b_offset], ldb); } } if (nofact || equil) { /* Compute the LU factorization of A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = (*kl << 1) + *ku + 1; for (i__ = *kl + 1; i__ <= i__2; ++i__) { afb[i__ + j * afb_dim1] = ab[i__ - *kl + j * ab_dim1]; /* L30: */ } /* L40: */ } _starpu_dgbtrf_(n, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], info); /* Return if INFO is non-zero. */ if (*info > 0) { /* Pivot in column INFO is exactly 0 */ /* Compute the reciprocal pivot growth factor of the */ /* leading rank-deficient INFO columns of A. */ *rpvgrw = _starpu_dla_gbrpvgrw__(n, kl, ku, info, &ab[ab_offset], ldab, & afb[afb_offset], ldafb); return 0; } } /* Compute the reciprocal pivot growth factor RPVGRW. */ *rpvgrw = _starpu_dla_gbrpvgrw__(n, kl, ku, n, &ab[ab_offset], ldab, &afb[ afb_offset], ldafb); /* Compute the solution matrix X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dgbtrs_(trans, n, kl, ku, nrhs, &afb[afb_offset], ldafb, &ipiv[1], &x[ x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and */ /* compute error bounds and backward error estimates for it. */ _starpu_dgbrfsx_(trans, equed, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[ afb_offset], ldafb, &ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, &x[x_offset], ldx, rcond, &berr[1], n_err_bnds__, & err_bnds_norm__[err_bnds_norm_offset], &err_bnds_comp__[ err_bnds_comp_offset], nparams, ¶ms[1], &work[1], &iwork[1], info); /* Scale solutions. */ if (colequ && notran) { _starpu_dlascl2_(n, nrhs, &c__[1], &x[x_offset], ldx); } else if (rowequ && ! notran) { _starpu_dlascl2_(n, nrhs, &r__[1], &x[x_offset], ldx); } return 0; /* End of DGBSVXX */ } /* _starpu_dgbsvxx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbtf2.c000066400000000000000000000171671507764646700205600ustar00rootroot00000000000000/* dgbtf2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b9 = -1.; /* Subroutine */ int _starpu_dgbtf2_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; doublereal d__1; /* Local variables */ integer i__, j, km, jp, ju, kv; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBTF2 computes an LU factorization of a real m-by-n band matrix A */ /* using partial pivoting with row interchanges. */ /* This is the unblocked version of the algorithm, calling Level 2 BLAS. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows KL+1 to */ /* 2*KL+KU+1; rows 1 to KL of the array need not be set. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */ /* On exit, details of the factorization: U is stored as an */ /* upper triangular band matrix with KL+KU superdiagonals in */ /* rows 1 to KL+KU+1, and the multipliers used during the */ /* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ /* See below for further details. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* M = N = 6, KL = 2, KU = 1: */ /* On entry: On exit: */ /* * * * + + + * * * u14 u25 u36 */ /* * * + + + + * * u13 u24 u35 u46 */ /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ /* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ /* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ /* Array elements marked * are not used by the routine; elements marked */ /* + need not be set on entry, but are required by the routine to store */ /* elements of U, because of fill-in resulting from the row */ /* interchanges. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* KV is the number of superdiagonals in the factor U, allowing for */ /* fill-in. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --ipiv; /* Function Body */ kv = *ku + *kl; /* Test the input parameters. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + kv + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBTF2", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Gaussian elimination with partial pivoting */ /* Set fill-in elements in columns KU+2 to KV to zero. */ i__1 = min(kv,*n); for (j = *ku + 2; j <= i__1; ++j) { i__2 = *kl; for (i__ = kv - j + 2; i__ <= i__2; ++i__) { ab[i__ + j * ab_dim1] = 0.; /* L10: */ } /* L20: */ } /* JU is the index of the last column affected by the current stage */ /* of the factorization. */ ju = 1; i__1 = min(*m,*n); for (j = 1; j <= i__1; ++j) { /* Set fill-in elements in column J+KV to zero. */ if (j + kv <= *n) { i__2 = *kl; for (i__ = 1; i__ <= i__2; ++i__) { ab[i__ + (j + kv) * ab_dim1] = 0.; /* L30: */ } } /* Find pivot and test for singularity. KM is the number of */ /* subdiagonal elements in the current column. */ /* Computing MIN */ i__2 = *kl, i__3 = *m - j; km = min(i__2,i__3); i__2 = km + 1; jp = _starpu_idamax_(&i__2, &ab[kv + 1 + j * ab_dim1], &c__1); ipiv[j] = jp + j - 1; if (ab[kv + jp + j * ab_dim1] != 0.) { /* Computing MAX */ /* Computing MIN */ i__4 = j + *ku + jp - 1; i__2 = ju, i__3 = min(i__4,*n); ju = max(i__2,i__3); /* Apply interchange to columns J to JU. */ if (jp != 1) { i__2 = ju - j + 1; i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dswap_(&i__2, &ab[kv + jp + j * ab_dim1], &i__3, &ab[kv + 1 + j * ab_dim1], &i__4); } if (km > 0) { /* Compute multipliers. */ d__1 = 1. / ab[kv + 1 + j * ab_dim1]; _starpu_dscal_(&km, &d__1, &ab[kv + 2 + j * ab_dim1], &c__1); /* Update trailing submatrix within the band. */ if (ju > j) { i__2 = ju - j; i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dger_(&km, &i__2, &c_b9, &ab[kv + 2 + j * ab_dim1], &c__1, &ab[kv + (j + 1) * ab_dim1], &i__3, &ab[kv + 1 + (j + 1) * ab_dim1], &i__4); } } } else { /* If pivot is zero, set INFO to the index of the pivot */ /* unless a zero pivot has already been found. */ if (*info == 0) { *info = j; } } /* L40: */ } return 0; /* End of DGBTF2 */ } /* _starpu_dgbtf2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbtrf.c000066400000000000000000000412371507764646700206530ustar00rootroot00000000000000/* dgbtrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__65 = 65; static doublereal c_b18 = -1.; static doublereal c_b31 = 1.; /* Subroutine */ int _starpu_dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1; /* Local variables */ integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju, kv, nw; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal temp; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_( integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer * ); doublereal work13[4160] /* was [65][64] */, work31[4160] /* was [65][64] */; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dgbtf2_( integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBTRF computes an LU factorization of a real m-by-n band matrix A */ /* using partial pivoting with row interchanges. */ /* This is the blocked version of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows KL+1 to */ /* 2*KL+KU+1; rows 1 to KL of the array need not be set. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */ /* On exit, details of the factorization: U is stored as an */ /* upper triangular band matrix with KL+KU superdiagonals in */ /* rows 1 to KL+KU+1, and the multipliers used during the */ /* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ /* See below for further details. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* M = N = 6, KL = 2, KU = 1: */ /* On entry: On exit: */ /* * * * + + + * * * u14 u25 u36 */ /* * * + + + + * * u13 u24 u35 u46 */ /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ /* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ /* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ /* Array elements marked * are not used by the routine; elements marked */ /* + need not be set on entry, but are required by the routine to store */ /* elements of U because of fill-in resulting from the row interchanges. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* KV is the number of superdiagonals in the factor U, allowing for */ /* fill-in */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --ipiv; /* Function Body */ kv = *ku + *kl; /* Test the input parameters. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + kv + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBTRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment */ nb = _starpu_ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku); /* The block size must not exceed the limit set by the size of the */ /* local arrays WORK13 and WORK31. */ nb = min(nb,64); if (nb <= 1 || nb > *kl) { /* Use unblocked code */ _starpu_dgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); } else { /* Use blocked code */ /* Zero the superdiagonal elements of the work array WORK13 */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work13[i__ + j * 65 - 66] = 0.; /* L10: */ } /* L20: */ } /* Zero the subdiagonal elements of the work array WORK31 */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = nb; for (i__ = j + 1; i__ <= i__2; ++i__) { work31[i__ + j * 65 - 66] = 0.; /* L30: */ } /* L40: */ } /* Gaussian elimination with partial pivoting */ /* Set fill-in elements in columns KU+2 to KV to zero */ i__1 = min(kv,*n); for (j = *ku + 2; j <= i__1; ++j) { i__2 = *kl; for (i__ = kv - j + 2; i__ <= i__2; ++i__) { ab[i__ + j * ab_dim1] = 0.; /* L50: */ } /* L60: */ } /* JU is the index of the last column affected by the current */ /* stage of the factorization */ ju = 1; i__1 = min(*m,*n); i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = nb, i__4 = min(*m,*n) - j + 1; jb = min(i__3,i__4); /* The active part of the matrix is partitioned */ /* A11 A12 A13 */ /* A21 A22 A23 */ /* A31 A32 A33 */ /* Here A11, A21 and A31 denote the current block of JB columns */ /* which is about to be factorized. The number of rows in the */ /* partitioning are JB, I2, I3 respectively, and the numbers */ /* of columns are JB, J2, J3. The superdiagonal elements of A13 */ /* and the subdiagonal elements of A31 lie outside the band. */ /* Computing MIN */ i__3 = *kl - jb, i__4 = *m - j - jb + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = jb, i__4 = *m - j - *kl + 1; i3 = min(i__3,i__4); /* J2 and J3 are computed after JU has been updated. */ /* Factorize the current block of JB columns */ i__3 = j + jb - 1; for (jj = j; jj <= i__3; ++jj) { /* Set fill-in elements in column JJ+KV to zero */ if (jj + kv <= *n) { i__4 = *kl; for (i__ = 1; i__ <= i__4; ++i__) { ab[i__ + (jj + kv) * ab_dim1] = 0.; /* L70: */ } } /* Find pivot and test for singularity. KM is the number of */ /* subdiagonal elements in the current column. */ /* Computing MIN */ i__4 = *kl, i__5 = *m - jj; km = min(i__4,i__5); i__4 = km + 1; jp = _starpu_idamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1); ipiv[jj] = jp + jj - j; if (ab[kv + jp + jj * ab_dim1] != 0.) { /* Computing MAX */ /* Computing MIN */ i__6 = jj + *ku + jp - 1; i__4 = ju, i__5 = min(i__6,*n); ju = max(i__4,i__5); if (jp != 1) { /* Apply interchange to columns J to J+JB-1 */ if (jp + jj - 1 < j + *kl) { i__4 = *ldab - 1; i__5 = *ldab - 1; _starpu_dswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], & i__4, &ab[kv + jp + jj - j + j * ab_dim1], &i__5); } else { /* The interchange affects columns J to JJ-1 of A31 */ /* which are stored in the work array WORK31 */ i__4 = jj - j; i__5 = *ldab - 1; _starpu_dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &i__5, &work31[jp + jj - j - *kl - 1], & c__65); i__4 = j + jb - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; _starpu_dswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, & ab[kv + jp + jj * ab_dim1], &i__6); } } /* Compute multipliers */ d__1 = 1. / ab[kv + 1 + jj * ab_dim1]; _starpu_dscal_(&km, &d__1, &ab[kv + 2 + jj * ab_dim1], &c__1); /* Update trailing submatrix within the band and within */ /* the current block. JM is the index of the last column */ /* which needs to be updated. */ /* Computing MIN */ i__4 = ju, i__5 = j + jb - 1; jm = min(i__4,i__5); if (jm > jj) { i__4 = jm - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; _starpu_dger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1], &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, & ab[kv + 1 + (jj + 1) * ab_dim1], &i__6); } } else { /* If pivot is zero, set INFO to the index of the pivot */ /* unless a zero pivot has already been found. */ if (*info == 0) { *info = jj; } } /* Copy current column of A31 into the work array WORK31 */ /* Computing MIN */ i__4 = jj - j + 1; nw = min(i__4,i3); if (nw > 0) { _starpu_dcopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], & c__1, &work31[(jj - j + 1) * 65 - 65], &c__1); } /* L80: */ } if (j + jb <= *n) { /* Apply the row interchanges to the other blocks. */ /* Computing MIN */ i__3 = ju - j + 1; j2 = min(i__3,kv) - jb; /* Computing MAX */ i__3 = 0, i__4 = ju - j - kv + 1; j3 = max(i__3,i__4); /* Use DLASWP to apply the row interchanges to A12, A22, and */ /* A32. */ i__3 = *ldab - 1; _starpu_dlaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, & c__1, &jb, &ipiv[j], &c__1); /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = ipiv[i__] + j - 1; /* L90: */ } /* Apply the row interchanges to A13, A23, and A33 */ /* columnwise. */ k2 = j - 1 + jb + j2; i__3 = j3; for (i__ = 1; i__ <= i__3; ++i__) { jj = k2 + i__; i__4 = j + jb - 1; for (ii = j + i__ - 1; ii <= i__4; ++ii) { ip = ipiv[ii]; if (ip != ii) { temp = ab[kv + 1 + ii - jj + jj * ab_dim1]; ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 + ip - jj + jj * ab_dim1]; ab[kv + 1 + ip - jj + jj * ab_dim1] = temp; } /* L100: */ } /* L110: */ } /* Update the relevant part of the trailing submatrix */ if (j2 > 0) { /* Update A12 */ i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4); if (i2 > 0) { /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; i__5 = *ldab - 1; _starpu_dgemm_("No transpose", "No transpose", &i2, &j2, &jb, &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4, &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], & i__5); } if (i3 > 0) { /* Update A32 */ i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dgemm_("No transpose", "No transpose", &i3, &j2, &jb, &c_b18, work31, &c__65, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl + 1 - jb + (j + jb) * ab_dim1], &i__4); } } if (j3 > 0) { /* Copy the lower triangle of A13 into the work array */ /* WORK13 */ i__3 = j3; for (jj = 1; jj <= i__3; ++jj) { i__4 = jb; for (ii = jj; ii <= i__4; ++ii) { work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1]; /* L120: */ } /* L130: */ } /* Update A13 in the work array */ i__3 = *ldab - 1; _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13, &c__65); if (i2 > 0) { /* Update A23 */ i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dgemm_("No transpose", "No transpose", &i2, &j3, &jb, &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv) * ab_dim1], &i__4); } if (i3 > 0) { /* Update A33 */ i__3 = *ldab - 1; _starpu_dgemm_("No transpose", "No transpose", &i3, &j3, &jb, &c_b18, work31, &c__65, work13, &c__65, & c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], & i__3); } /* Copy the lower triangle of A13 back into place */ i__3 = j3; for (jj = 1; jj <= i__3; ++jj) { i__4 = jb; for (ii = jj; ii <= i__4; ++ii) { ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] = work13[ii + jj * 65 - 66]; /* L140: */ } /* L150: */ } } } else { /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = ipiv[i__] + j - 1; /* L160: */ } } /* Partially undo the interchanges in the current block to */ /* restore the upper triangular form of A31 and copy the upper */ /* triangle of A31 back into place */ i__3 = j; for (jj = j + jb - 1; jj >= i__3; --jj) { jp = ipiv[jj] - jj + 1; if (jp != 1) { /* Apply interchange to columns J to JJ-1 */ if (jp + jj - 1 < j + *kl) { /* The interchange does not affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; i__6 = *ldab - 1; _starpu_dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & i__5, &ab[kv + jp + jj - j + j * ab_dim1], & i__6); } else { /* The interchange does affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; _starpu_dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & i__5, &work31[jp + jj - j - *kl - 1], &c__65); } } /* Copy the current column of A31 back into place */ /* Computing MIN */ i__4 = i3, i__5 = jj - j + 1; nw = min(i__4,i__5); if (nw > 0) { _starpu_dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[ kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1); } /* L170: */ } /* L180: */ } } return 0; /* End of DGBTRF */ } /* _starpu_dgbtrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgbtrs.c000066400000000000000000000156711507764646700206730ustar00rootroot00000000000000/* dgbtrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b7 = -1.; static integer c__1 = 1; static doublereal c_b23 = 1.; /* Subroutine */ int _starpu_dgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, l, kd, lm; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); logical lnoti; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBTRS solves a system of linear equations */ /* A * X = B or A' * X = B */ /* with a general band matrix A using the LU factorization computed */ /* by DGBTRF. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations. */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A'* X = B (Transpose) */ /* = 'C': A'* X = B (Conjugate transpose = Transpose) */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* Details of the LU factorization of the band matrix A, as */ /* computed by DGBTRF. U is stored as an upper triangular band */ /* matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ /* the multipliers used during the factorization are stored in */ /* rows KL+KU+2 to 2*KL+KU+1. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices; for 1 <= i <= N, row i of the matrix was */ /* interchanged with row IPIV(i). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; notran = _starpu_lsame_(trans, "N"); if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldab < (*kl << 1) + *ku + 1) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGBTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } kd = *ku + *kl + 1; lnoti = *kl > 0; if (notran) { /* Solve A*X = B. */ /* Solve L*X = B, overwriting B with X. */ /* L is represented as a product of permutations and unit lower */ /* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), */ /* where each transformation L(i) is a rank-one modification of */ /* the identity matrix. */ if (lnoti) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kl, i__3 = *n - j; lm = min(i__2,i__3); l = ipiv[j]; if (l != j) { _starpu_dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb); } _starpu_dger_(&lm, nrhs, &c_b7, &ab[kd + 1 + j * ab_dim1], &c__1, &b[ j + b_dim1], ldb, &b[j + 1 + b_dim1], ldb); /* L10: */ } } i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { /* Solve U*X = B, overwriting B with X. */ i__2 = *kl + *ku; _starpu_dtbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &ab[ ab_offset], ldab, &b[i__ * b_dim1 + 1], &c__1); /* L20: */ } } else { /* Solve A'*X = B. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { /* Solve U'*X = B, overwriting B with X. */ i__2 = *kl + *ku; _starpu_dtbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &ab[ab_offset], ldab, &b[i__ * b_dim1 + 1], &c__1); /* L30: */ } /* Solve L'*X = B, overwriting B with X. */ if (lnoti) { for (j = *n - 1; j >= 1; --j) { /* Computing MIN */ i__1 = *kl, i__2 = *n - j; lm = min(i__1,i__2); _starpu_dgemv_("Transpose", &lm, nrhs, &c_b7, &b[j + 1 + b_dim1], ldb, &ab[kd + 1 + j * ab_dim1], &c__1, &c_b23, &b[j + b_dim1], ldb); l = ipiv[j]; if (l != j) { _starpu_dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb); } /* L40: */ } } } return 0; /* End of DGBTRS */ } /* _starpu_dgbtrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgebak.c000066400000000000000000000137671507764646700206270ustar00rootroot00000000000000/* dgebak.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * ldv, integer *info) { /* System generated locals */ integer v_dim1, v_offset, i__1; /* Local variables */ integer i__, k; doublereal s; integer ii; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical leftv; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical rightv; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEBAK forms the right or left eigenvectors of a real general matrix */ /* by backward transformation on the computed eigenvectors of the */ /* balanced matrix output by DGEBAL. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* Specifies the type of backward transformation required: */ /* = 'N', do nothing, return immediately; */ /* = 'P', do backward transformation for permutation only; */ /* = 'S', do backward transformation for scaling only; */ /* = 'B', do backward transformations for both permutation and */ /* scaling. */ /* JOB must be the same as the argument JOB supplied to DGEBAL. */ /* SIDE (input) CHARACTER*1 */ /* = 'R': V contains right eigenvectors; */ /* = 'L': V contains left eigenvectors. */ /* N (input) INTEGER */ /* The number of rows of the matrix V. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* The integers ILO and IHI determined by DGEBAL. */ /* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ /* SCALE (input) DOUBLE PRECISION array, dimension (N) */ /* Details of the permutation and scaling factors, as returned */ /* by DGEBAL. */ /* M (input) INTEGER */ /* The number of columns of the matrix V. M >= 0. */ /* V (input/output) DOUBLE PRECISION array, dimension (LDV,M) */ /* On entry, the matrix of right or left eigenvectors to be */ /* transformed, as returned by DHSEIN or DTREVC. */ /* On exit, V is overwritten by the transformed eigenvectors. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. LDV >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and Test the input parameters */ /* Parameter adjustments */ --scale; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; /* Function Body */ rightv = _starpu_lsame_(side, "R"); leftv = _starpu_lsame_(side, "L"); *info = 0; if (! _starpu_lsame_(job, "N") && ! _starpu_lsame_(job, "P") && ! _starpu_lsame_(job, "S") && ! _starpu_lsame_(job, "B")) { *info = -1; } else if (! rightv && ! leftv) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -4; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -5; } else if (*m < 0) { *info = -7; } else if (*ldv < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEBAK", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*m == 0) { return 0; } if (_starpu_lsame_(job, "N")) { return 0; } if (*ilo == *ihi) { goto L30; } /* Backward balance */ if (_starpu_lsame_(job, "S") || _starpu_lsame_(job, "B")) { if (rightv) { i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { s = scale[i__]; _starpu_dscal_(m, &s, &v[i__ + v_dim1], ldv); /* L10: */ } } if (leftv) { i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { s = 1. / scale[i__]; _starpu_dscal_(m, &s, &v[i__ + v_dim1], ldv); /* L20: */ } } } /* Backward permutation */ /* For I = ILO-1 step -1 until 1, */ /* IHI+1 step 1 until N do -- */ L30: if (_starpu_lsame_(job, "P") || _starpu_lsame_(job, "B")) { if (rightv) { i__1 = *n; for (ii = 1; ii <= i__1; ++ii) { i__ = ii; if (i__ >= *ilo && i__ <= *ihi) { goto L40; } if (i__ < *ilo) { i__ = *ilo - ii; } k = (integer) scale[i__]; if (k == i__) { goto L40; } _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); L40: ; } } if (leftv) { i__1 = *n; for (ii = 1; ii <= i__1; ++ii) { i__ = ii; if (i__ >= *ilo && i__ <= *ihi) { goto L50; } if (i__ < *ilo) { i__ = *ilo - ii; } k = (integer) scale[i__]; if (k == i__) { goto L50; } _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); L50: ; } } } return 0; /* End of DGEBAK */ } /* _starpu_dgebak_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgebal.c000066400000000000000000000227071507764646700206220ustar00rootroot00000000000000/* dgebal.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgebal_(char *job, integer *n, doublereal *a, integer * lda, integer *ilo, integer *ihi, doublereal *scale, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ doublereal c__, f, g; integer i__, j, k, l, m; doublereal r__, s, ca, ra; integer ica, ira, iexc; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal sfmin1, sfmin2, sfmax1, sfmax2; extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical noconv; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEBAL balances a general real matrix A. This involves, first, */ /* permuting A by a similarity transformation to isolate eigenvalues */ /* in the first 1 to ILO-1 and last IHI+1 to N elements on the */ /* diagonal; and second, applying a diagonal similarity transformation */ /* to rows and columns ILO to IHI to make the rows and columns as */ /* close in norm as possible. Both steps are optional. */ /* Balancing may reduce the 1-norm of the matrix, and improve the */ /* accuracy of the computed eigenvalues and/or eigenvectors. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* Specifies the operations to be performed on A: */ /* = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 */ /* for i = 1,...,N; */ /* = 'P': permute only; */ /* = 'S': scale only; */ /* = 'B': both permute and scale. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the input matrix A. */ /* On exit, A is overwritten by the balanced matrix. */ /* If JOB = 'N', A is not referenced. */ /* See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* ILO (output) INTEGER */ /* IHI (output) INTEGER */ /* ILO and IHI are set to integers such that on exit */ /* A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. */ /* If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ /* SCALE (output) DOUBLE PRECISION array, dimension (N) */ /* Details of the permutations and scaling factors applied to */ /* A. If P(j) is the index of the row and column interchanged */ /* with row and column j and D(j) is the scaling factor */ /* applied to row and column j, then */ /* SCALE(j) = P(j) for j = 1,...,ILO-1 */ /* = D(j) for j = ILO,...,IHI */ /* = P(j) for j = IHI+1,...,N. */ /* The order in which the interchanges are made is N to IHI+1, */ /* then 1 to ILO-1. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* The permutations consist of row and column interchanges which put */ /* the matrix in the form */ /* ( T1 X Y ) */ /* P A P = ( 0 B Z ) */ /* ( 0 0 T2 ) */ /* where T1 and T2 are upper triangular matrices whose eigenvalues lie */ /* along the diagonal. The column indices ILO and IHI mark the starting */ /* and ending columns of the submatrix B. Balancing consists of applying */ /* a diagonal similarity transformation inv(D) * B * D to make the */ /* 1-norms of each row of B and its corresponding column nearly equal. */ /* The output matrix is */ /* ( T1 X*D Y ) */ /* ( 0 inv(D)*B*D inv(D)*Z ). */ /* ( 0 0 T2 ) */ /* Information about the permutations P and the diagonal matrix D is */ /* returned in the vector SCALE. */ /* This subroutine is based on the EISPACK routine BALANC. */ /* Modified by Tzu-Yi Chen, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --scale; /* Function Body */ *info = 0; if (! _starpu_lsame_(job, "N") && ! _starpu_lsame_(job, "P") && ! _starpu_lsame_(job, "S") && ! _starpu_lsame_(job, "B")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEBAL", &i__1); return 0; } k = 1; l = *n; if (*n == 0) { goto L210; } if (_starpu_lsame_(job, "N")) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { scale[i__] = 1.; /* L10: */ } goto L210; } if (_starpu_lsame_(job, "S")) { goto L120; } /* Permutation to isolate eigenvalues if possible */ goto L50; /* Row and column exchange. */ L20: scale[m] = (doublereal) j; if (j == m) { goto L30; } _starpu_dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); i__1 = *n - k + 1; _starpu_dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); L30: switch (iexc) { case 1: goto L40; case 2: goto L80; } /* Search for rows isolating an eigenvalue and push them down. */ L40: if (l == 1) { goto L210; } --l; L50: for (j = l; j >= 1; --j) { i__1 = l; for (i__ = 1; i__ <= i__1; ++i__) { if (i__ == j) { goto L60; } if (a[j + i__ * a_dim1] != 0.) { goto L70; } L60: ; } m = l; iexc = 1; goto L20; L70: ; } goto L90; /* Search for columns isolating an eigenvalue and push them left. */ L80: ++k; L90: i__1 = l; for (j = k; j <= i__1; ++j) { i__2 = l; for (i__ = k; i__ <= i__2; ++i__) { if (i__ == j) { goto L100; } if (a[i__ + j * a_dim1] != 0.) { goto L110; } L100: ; } m = k; iexc = 2; goto L20; L110: ; } L120: i__1 = l; for (i__ = k; i__ <= i__1; ++i__) { scale[i__] = 1.; /* L130: */ } if (_starpu_lsame_(job, "P")) { goto L210; } /* Balance the submatrix in rows K to L. */ /* Iterative loop for norm reduction */ sfmin1 = _starpu_dlamch_("S") / _starpu_dlamch_("P"); sfmax1 = 1. / sfmin1; sfmin2 = sfmin1 * 2.; sfmax2 = 1. / sfmin2; L140: noconv = FALSE_; i__1 = l; for (i__ = k; i__ <= i__1; ++i__) { c__ = 0.; r__ = 0.; i__2 = l; for (j = k; j <= i__2; ++j) { if (j == i__) { goto L150; } c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1)); r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1)); L150: ; } ica = _starpu_idamax_(&l, &a[i__ * a_dim1 + 1], &c__1); ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1)); i__2 = *n - k + 1; ira = _starpu_idamax_(&i__2, &a[i__ + k * a_dim1], lda); ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1)); /* Guard against zero C or R due to underflow. */ if (c__ == 0. || r__ == 0.) { goto L200; } g = r__ / 2.; f = 1.; s = c__ + r__; L160: /* Computing MAX */ d__1 = max(f,c__); /* Computing MIN */ d__2 = min(r__,g); if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) { goto L170; } f *= 2.; c__ *= 2.; ca *= 2.; r__ /= 2.; g /= 2.; ra /= 2.; goto L160; L170: g = c__ / 2.; L180: /* Computing MIN */ d__1 = min(f,c__), d__1 = min(d__1,g); if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) { goto L190; } f /= 2.; c__ /= 2.; g /= 2.; ca /= 2.; r__ *= 2.; ra *= 2.; goto L180; /* Now balance. */ L190: if (c__ + r__ >= s * .95) { goto L200; } if (f < 1. && scale[i__] < 1.) { if (f * scale[i__] <= sfmin1) { goto L200; } } if (f > 1. && scale[i__] > 1.) { if (scale[i__] >= sfmax1 / f) { goto L200; } } g = 1. / f; scale[i__] *= f; noconv = TRUE_; i__2 = *n - k + 1; _starpu_dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); _starpu_dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); L200: ; } if (noconv) { goto L140; } L210: *ilo = k; *ihi = l; return 0; /* End of DGEBAL */ } /* _starpu_dgebal_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgebd2.c000066400000000000000000000230061507764646700205240ustar00rootroot00000000000000/* dgebd2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgebd2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * taup, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEBD2 reduces a real general m by n matrix A to upper or lower */ /* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. */ /* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows in the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns in the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the m by n general matrix to be reduced. */ /* On exit, */ /* if m >= n, the diagonal and the first superdiagonal are */ /* overwritten with the upper bidiagonal matrix B; the */ /* elements below the diagonal, with the array TAUQ, represent */ /* the orthogonal matrix Q as a product of elementary */ /* reflectors, and the elements above the first superdiagonal, */ /* with the array TAUP, represent the orthogonal matrix P as */ /* a product of elementary reflectors; */ /* if m < n, the diagonal and the first subdiagonal are */ /* overwritten with the lower bidiagonal matrix B; the */ /* elements below the first subdiagonal, with the array TAUQ, */ /* represent the orthogonal matrix Q as a product of */ /* elementary reflectors, and the elements above the diagonal, */ /* with the array TAUP, represent the orthogonal matrix P as */ /* a product of elementary reflectors. */ /* See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* D (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The diagonal elements of the bidiagonal matrix B: */ /* D(i) = A(i,i). */ /* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) */ /* The off-diagonal elements of the bidiagonal matrix B: */ /* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ /* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ /* TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix Q. See Further Details. */ /* TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix P. See Further Details. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (max(M,N)) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* The matrices Q and P are represented as products of elementary */ /* reflectors: */ /* If m >= n, */ /* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ /* Each H(i) and G(i) has the form: */ /* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ /* where tauq and taup are real scalars, and v and u are real vectors; */ /* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ /* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ /* tauq is stored in TAUQ(i) and taup in TAUP(i). */ /* If m < n, */ /* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ /* Each H(i) and G(i) has the form: */ /* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ /* where tauq and taup are real scalars, and v and u are real vectors; */ /* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ /* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ /* tauq is stored in TAUQ(i) and taup in TAUP(i). */ /* The contents of A on exit are illustrated by the following examples: */ /* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ /* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ /* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ /* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ /* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ /* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ /* ( v1 v2 v3 v4 v5 ) */ /* where d and e denote diagonal and off-diagonal elements of B, vi */ /* denotes an element of the vector defining H(i), and ui an element of */ /* the vector defining G(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --d__; --e; --tauq; --taup; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info < 0) { i__1 = -(*info); _starpu_xerbla_("DGEBD2", &i__1); return 0; } if (*m >= *n) { /* Reduce to upper bidiagonal form */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ i__2 = *m - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; _starpu_dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m)+ i__ * a_dim1], &c__1, &tauq[i__]); d__[i__] = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; /* Apply H(i) to A(i:m,i+1:n) from the left */ if (i__ < *n) { i__2 = *m - i__ + 1; i__3 = *n - i__; _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1] ); } a[i__ + i__ * a_dim1] = d__[i__]; if (i__ < *n) { /* Generate elementary reflector G(i) to annihilate */ /* A(i,i+2:n) */ i__2 = *n - i__; /* Computing MIN */ i__3 = i__ + 2; _starpu_dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( i__3, *n)* a_dim1], lda, &taup[i__]); e[i__] = a[i__ + (i__ + 1) * a_dim1]; a[i__ + (i__ + 1) * a_dim1] = 1.; /* Apply G(i) to A(i+1:m,i+1:n) from the right */ i__2 = *m - i__; i__3 = *n - i__; _starpu_dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); a[i__ + (i__ + 1) * a_dim1] = e[i__]; } else { taup[i__] = 0.; } /* L10: */ } } else { /* Reduce to lower bidiagonal form */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ i__2 = *n - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; _starpu_dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n)* a_dim1], lda, &taup[i__]); d__[i__] = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; /* Apply G(i) to A(i+1:m,i:n) from the right */ if (i__ < *m) { i__2 = *m - i__; i__3 = *n - i__ + 1; _starpu_dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, & taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); } a[i__ + i__ * a_dim1] = d__[i__]; if (i__ < *m) { /* Generate elementary reflector H(i) to annihilate */ /* A(i+2:m,i) */ i__2 = *m - i__; /* Computing MIN */ i__3 = i__ + 2; _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m)+ i__ * a_dim1], &c__1, &tauq[i__]); e[i__] = a[i__ + 1 + i__ * a_dim1]; a[i__ + 1 + i__ * a_dim1] = 1.; /* Apply H(i) to A(i+1:m,i+1:n) from the left */ i__2 = *m - i__; i__3 = *n - i__; _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); a[i__ + 1 + i__ * a_dim1] = e[i__]; } else { tauq[i__] = 0.; } /* L20: */ } } return 0; /* End of DGEBD2 */ } /* _starpu_dgebd2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgebrd.c000066400000000000000000000260561507764646700206340ustar00rootroot00000000000000/* dgebrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; static doublereal c_b21 = -1.; static doublereal c_b22 = 1.; /* Subroutine */ int _starpu_dgebrd_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * taup, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j, nb, nx; doublereal ws; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer nbmin, iinfo, minmn; extern /* Subroutine */ int _starpu_dgebd2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlabrd_(integer *, integer *, integer * , doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *) , _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwrkx, ldwrky, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEBRD reduces a general real M-by-N matrix A to upper or lower */ /* bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */ /* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows in the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns in the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N general matrix to be reduced. */ /* On exit, */ /* if m >= n, the diagonal and the first superdiagonal are */ /* overwritten with the upper bidiagonal matrix B; the */ /* elements below the diagonal, with the array TAUQ, represent */ /* the orthogonal matrix Q as a product of elementary */ /* reflectors, and the elements above the first superdiagonal, */ /* with the array TAUP, represent the orthogonal matrix P as */ /* a product of elementary reflectors; */ /* if m < n, the diagonal and the first subdiagonal are */ /* overwritten with the lower bidiagonal matrix B; the */ /* elements below the first subdiagonal, with the array TAUQ, */ /* represent the orthogonal matrix Q as a product of */ /* elementary reflectors, and the elements above the diagonal, */ /* with the array TAUP, represent the orthogonal matrix P as */ /* a product of elementary reflectors. */ /* See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* D (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The diagonal elements of the bidiagonal matrix B: */ /* D(i) = A(i,i). */ /* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) */ /* The off-diagonal elements of the bidiagonal matrix B: */ /* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ /* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ /* TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix Q. See Further Details. */ /* TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix P. See Further Details. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK >= max(1,M,N). */ /* For optimum performance LWORK >= (M+N)*NB, where NB */ /* is the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* The matrices Q and P are represented as products of elementary */ /* reflectors: */ /* If m >= n, */ /* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ /* Each H(i) and G(i) has the form: */ /* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ /* where tauq and taup are real scalars, and v and u are real vectors; */ /* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ /* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ /* tauq is stored in TAUQ(i) and taup in TAUP(i). */ /* If m < n, */ /* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ /* Each H(i) and G(i) has the form: */ /* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ /* where tauq and taup are real scalars, and v and u are real vectors; */ /* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ /* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ /* tauq is stored in TAUQ(i) and taup in TAUP(i). */ /* The contents of A on exit are illustrated by the following examples: */ /* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ /* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ /* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ /* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ /* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ /* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ /* ( v1 v2 v3 v4 v5 ) */ /* where d and e denote diagonal and off-diagonal elements of B, vi */ /* denotes an element of the vector defining H(i), and ui an element of */ /* the vector defining G(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --d__; --e; --tauq; --taup; --work; /* Function Body */ *info = 0; /* Computing MAX */ i__1 = 1, i__2 = _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1); nb = max(i__1,i__2); lwkopt = (*m + *n) * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m); if (*lwork < max(i__1,*n) && ! lquery) { *info = -10; } } if (*info < 0) { i__1 = -(*info); _starpu_xerbla_("DGEBRD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ minmn = min(*m,*n); if (minmn == 0) { work[1] = 1.; return 0; } ws = (doublereal) max(*m,*n); ldwrkx = *m; ldwrky = *n; if (nb > 1 && nb < minmn) { /* Set the crossover point NX. */ /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1); nx = max(i__1,i__2); /* Determine when to switch from blocked to unblocked code. */ if (nx < minmn) { ws = (doublereal) ((*m + *n) * nb); if ((doublereal) (*lwork) < ws) { /* Not enough work space for the optimal NB, consider using */ /* a smaller block size. */ nbmin = _starpu_ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1); if (*lwork >= (*m + *n) * nbmin) { nb = *lwork / (*m + *n); } else { nb = 1; nx = minmn; } } } } else { nx = minmn; } i__1 = minmn - nx; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Reduce rows and columns i:i+nb-1 to bidiagonal form and return */ /* the matrices X and Y which are needed to update the unreduced */ /* part of the matrix */ i__3 = *m - i__ + 1; i__4 = *n - i__ + 1; _starpu_dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx * nb + 1], &ldwrky); /* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update */ /* of the form A := A - V*Y' - X*U' */ i__3 = *m - i__ - nb + 1; i__4 = *n - i__ - nb + 1; _starpu_dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & ldwrky, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda); i__3 = *m - i__ - nb + 1; i__4 = *n - i__ - nb + 1; _starpu_dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, & work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda); /* Copy diagonal and off-diagonal elements of B back into A */ if (*m >= *n) { i__3 = i__ + nb - 1; for (j = i__; j <= i__3; ++j) { a[j + j * a_dim1] = d__[j]; a[j + (j + 1) * a_dim1] = e[j]; /* L10: */ } } else { i__3 = i__ + nb - 1; for (j = i__; j <= i__3; ++j) { a[j + j * a_dim1] = d__[j]; a[j + 1 + j * a_dim1] = e[j]; /* L20: */ } } /* L30: */ } /* Use unblocked code to reduce the remainder of the matrix */ i__2 = *m - i__ + 1; i__1 = *n - i__ + 1; _starpu_dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & tauq[i__], &taup[i__], &work[1], &iinfo); work[1] = ws; return 0; /* End of DGEBRD */ } /* _starpu_dgebrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgecon.c000066400000000000000000000143601507764646700206370ustar00rootroot00000000000000/* dgecon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgecon_(char *norm, integer *n, doublereal *a, integer * lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1; doublereal d__1; /* Local variables */ doublereal sl; integer ix; doublereal su; integer kase, kase1; doublereal scale; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, integer *), _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal ainvnm; extern /* Subroutine */ int _starpu_dlatrs_(char *, char *, char *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *); logical onenrm; char normin[1]; doublereal smlnum; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGECON estimates the reciprocal of the condition number of a general */ /* real matrix A, in either the 1-norm or the infinity-norm, using */ /* the LU factorization computed by DGETRF. */ /* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ /* condition number is computed as */ /* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies whether the 1-norm condition number or the */ /* infinity-norm condition number is required: */ /* = '1' or 'O': 1-norm; */ /* = 'I': Infinity-norm. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The factors L and U from the factorization A = P*L*U */ /* as computed by DGETRF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* ANORM (input) DOUBLE PRECISION */ /* If NORM = '1' or 'O', the 1-norm of the original matrix A. */ /* If NORM = 'I', the infinity-norm of the original matrix A. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --work; --iwork; /* Function Body */ *info = 0; onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); if (! onenrm && ! _starpu_lsame_(norm, "I")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*anorm < 0.) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGECON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm == 0.) { return 0; } smlnum = _starpu_dlamch_("Safe minimum"); /* Estimate the norm of inv(A). */ ainvnm = 0.; *(unsigned char *)normin = 'N'; if (onenrm) { kase1 = 1; } else { kase1 = 2; } kase = 0; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == kase1) { /* Multiply by inv(L). */ _starpu_dlatrs_("Lower", "No transpose", "Unit", normin, n, &a[a_offset], lda, &work[1], &sl, &work[(*n << 1) + 1], info); /* Multiply by inv(U). */ _starpu_dlatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[ a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info); } else { /* Multiply by inv(U'). */ _starpu_dlatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info); /* Multiply by inv(L'). */ _starpu_dlatrs_("Lower", "Transpose", "Unit", normin, n, &a[a_offset], lda, &work[1], &sl, &work[(*n << 1) + 1], info); } /* Divide X by 1/(SL*SU) if doing so will not cause overflow. */ scale = sl * su; *(unsigned char *)normin = 'Y'; if (scale != 1.) { ix = _starpu_idamax_(n, &work[1], &c__1); if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) { goto L20; } _starpu_drscl_(n, &scale, &work[1], &c__1); } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } L20: return 0; /* End of DGECON */ } /* _starpu_dgecon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeequ.c000066400000000000000000000165041507764646700206540ustar00rootroot00000000000000/* dgeequ.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgeequ_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j; doublereal rcmin, rcmax; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum, smlnum; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEEQU computes row and column scalings intended to equilibrate an */ /* M-by-N matrix A and reduce its condition number. R returns the row */ /* scale factors and C the column scale factors, chosen to try to make */ /* the largest element in each row and column of the matrix B with */ /* elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */ /* R(i) and C(j) are restricted to be between SMLNUM = smallest safe */ /* number and BIGNUM = largest safe number. Use of these scaling */ /* factors is not guaranteed to reduce the condition number of A but */ /* works well in practice. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The M-by-N matrix whose equilibration factors are */ /* to be computed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* R (output) DOUBLE PRECISION array, dimension (M) */ /* If INFO = 0 or INFO > M, R contains the row scale factors */ /* for A. */ /* C (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, C contains the column scale factors for A. */ /* ROWCND (output) DOUBLE PRECISION */ /* If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ /* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ /* AMAX is neither too large nor too small, it is not worth */ /* scaling by R. */ /* COLCND (output) DOUBLE PRECISION */ /* If INFO = 0, COLCND contains the ratio of the smallest */ /* C(i) to the largest C(i). If COLCND >= 0.1, it is not */ /* worth scaling by C. */ /* AMAX (output) DOUBLE PRECISION */ /* Absolute value of largest matrix element. If AMAX is very */ /* close to overflow or very close to underflow, the matrix */ /* should be scaled. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= M: the i-th row of A is exactly zero */ /* > M: the (i-M)-th column of A is exactly zero */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --r__; --c__; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEEQU", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return 0; } /* Get machine constants. */ smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; /* Compute row scale factors. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { r__[i__] = 0.; /* L10: */ } /* Find the maximum element in each row. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); r__[i__] = max(d__2,d__3); /* L20: */ } /* L30: */ } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = rcmax, d__2 = r__[i__]; rcmax = max(d__1,d__2); /* Computing MIN */ d__1 = rcmin, d__2 = r__[i__]; rcmin = min(d__1,d__2); /* L40: */ } *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (r__[i__] == 0.) { *info = i__; return 0; } /* L50: */ } } else { /* Invert the scale factors. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MIN */ /* Computing MAX */ d__2 = r__[i__]; d__1 = max(d__2,smlnum); r__[i__] = 1. / min(d__1,bignum); /* L60: */ } /* Compute ROWCND = min(R(I)) / max(R(I)) */ *rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); } /* Compute column scale factors */ i__1 = *n; for (j = 1; j <= i__1; ++j) { c__[j] = 0.; /* L70: */ } /* Find the maximum element in each column, */ /* assuming the row scaling computed above. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * r__[i__]; c__[j] = max(d__2,d__3); /* L80: */ } /* L90: */ } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = c__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = c__[j]; rcmax = max(d__1,d__2); /* L100: */ } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (c__[j] == 0.) { *info = *m + j; return 0; } /* L110: */ } } else { /* Invert the scale factors. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ /* Computing MAX */ d__2 = c__[j]; d__1 = max(d__2,smlnum); c__[j] = 1. / min(d__1,bignum); /* L120: */ } /* Compute COLCND = min(C(J)) / max(C(J)) */ *colcnd = max(rcmin,smlnum) / min(rcmax,bignum); } return 0; /* End of DGEEQU */ } /* _starpu_dgeequ_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeequb.c000066400000000000000000000210121507764646700210040ustar00rootroot00000000000000/* dgeequb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgeequb_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1, d__2, d__3; /* Builtin functions */ double log(doublereal), pow_di(doublereal *, integer *); /* Local variables */ integer i__, j; doublereal radix, rcmin, rcmax; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum, logrdx, smlnum; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEEQUB computes row and column scalings intended to equilibrate an */ /* M-by-N matrix A and reduce its condition number. R returns the row */ /* scale factors and C the column scale factors, chosen to try to make */ /* the largest element in each row and column of the matrix B with */ /* elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */ /* the radix. */ /* R(i) and C(j) are restricted to be a power of the radix between */ /* SMLNUM = smallest safe number and BIGNUM = largest safe number. Use */ /* of these scaling factors is not guaranteed to reduce the condition */ /* number of A but works well in practice. */ /* This routine differs from DGEEQU by restricting the scaling factors */ /* to a power of the radix. Baring over- and underflow, scaling by */ /* these factors introduces no additional rounding errors. However, the */ /* scaled entries' magnitured are no longer approximately 1 but lie */ /* between sqrt(radix) and 1/sqrt(radix). */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The M-by-N matrix whose equilibration factors are */ /* to be computed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* R (output) DOUBLE PRECISION array, dimension (M) */ /* If INFO = 0 or INFO > M, R contains the row scale factors */ /* for A. */ /* C (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, C contains the column scale factors for A. */ /* ROWCND (output) DOUBLE PRECISION */ /* If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ /* smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ /* AMAX is neither too large nor too small, it is not worth */ /* scaling by R. */ /* COLCND (output) DOUBLE PRECISION */ /* If INFO = 0, COLCND contains the ratio of the smallest */ /* C(i) to the largest C(i). If COLCND >= 0.1, it is not */ /* worth scaling by C. */ /* AMAX (output) DOUBLE PRECISION */ /* Absolute value of largest matrix element. If AMAX is very */ /* close to overflow or very close to underflow, the matrix */ /* should be scaled. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= M: the i-th row of A is exactly zero */ /* > M: the (i-M)-th column of A is exactly zero */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --r__; --c__; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEEQUB", &i__1); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return 0; } /* Get machine constants. Assume SMLNUM is a power of the radix. */ smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; radix = _starpu_dlamch_("B"); logrdx = log(radix); /* Compute row scale factors. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { r__[i__] = 0.; /* L10: */ } /* Find the maximum element in each row. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); r__[i__] = max(d__2,d__3); /* L20: */ } /* L30: */ } i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (r__[i__] > 0.) { i__2 = (integer) (log(r__[i__]) / logrdx); r__[i__] = pow_di(&radix, &i__2); } } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = rcmax, d__2 = r__[i__]; rcmax = max(d__1,d__2); /* Computing MIN */ d__1 = rcmin, d__2 = r__[i__]; rcmin = min(d__1,d__2); /* L40: */ } *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (r__[i__] == 0.) { *info = i__; return 0; } /* L50: */ } } else { /* Invert the scale factors. */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MIN */ /* Computing MAX */ d__2 = r__[i__]; d__1 = max(d__2,smlnum); r__[i__] = 1. / min(d__1,bignum); /* L60: */ } /* Compute ROWCND = min(R(I)) / max(R(I)). */ *rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); } /* Compute column scale factors */ i__1 = *n; for (j = 1; j <= i__1; ++j) { c__[j] = 0.; /* L70: */ } /* Find the maximum element in each column, */ /* assuming the row scaling computed above. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * r__[i__]; c__[j] = max(d__2,d__3); /* L80: */ } if (c__[j] > 0.) { i__2 = (integer) (log(c__[j]) / logrdx); c__[j] = pow_di(&radix, &i__2); } /* L90: */ } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = c__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = c__[j]; rcmax = max(d__1,d__2); /* L100: */ } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (c__[j] == 0.) { *info = *m + j; return 0; } /* L110: */ } } else { /* Invert the scale factors. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ /* Computing MAX */ d__2 = c__[j]; d__1 = max(d__2,smlnum); c__[j] = 1. / min(d__1,bignum); /* L120: */ } /* Compute COLCND = min(C(J)) / max(C(J)). */ *colcnd = max(rcmin,smlnum) / min(rcmax,bignum); } return 0; /* End of DGEEQUB */ } /* _starpu_dgeequb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgees.c000066400000000000000000000422431507764646700204700ustar00rootroot00000000000000/* dgees.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static integer c_n1 = -1; /* Subroutine */ int _starpu_dgees_(char *jobvs, char *sort, L_fp select, integer *n, doublereal *a, integer *lda, integer *sdim, doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work, integer *lwork, logical *bwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal s; integer i1, i2, ip, ihi, ilo; doublereal dum[1], eps, sep; integer ibal; doublereal anrm; integer idum[1], ierr, itau, iwrk, inxt, icond, ieval; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical cursl; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebak_( char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dgebal_(char *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); logical lst2sl, scalea; extern doublereal _starpu_dlamch_(char *); doublereal cscale; extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dorghr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dhseqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dtrsen_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *); logical lastsl; integer minwrk, maxwrk; doublereal smlnum; integer hswork; logical wantst, lquery, wantvs; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* .. Function Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEES computes for an N-by-N real nonsymmetric matrix A, the */ /* eigenvalues, the real Schur form T, and, optionally, the matrix of */ /* Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). */ /* Optionally, it also orders the eigenvalues on the diagonal of the */ /* real Schur form so that selected eigenvalues are at the top left. */ /* The leading columns of Z then form an orthonormal basis for the */ /* invariant subspace corresponding to the selected eigenvalues. */ /* A matrix is in real Schur form if it is upper quasi-triangular with */ /* 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the */ /* form */ /* [ a b ] */ /* [ c a ] */ /* where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */ /* Arguments */ /* ========= */ /* JOBVS (input) CHARACTER*1 */ /* = 'N': Schur vectors are not computed; */ /* = 'V': Schur vectors are computed. */ /* SORT (input) CHARACTER*1 */ /* Specifies whether or not to order the eigenvalues on the */ /* diagonal of the Schur form. */ /* = 'N': Eigenvalues are not ordered; */ /* = 'S': Eigenvalues are ordered (see SELECT). */ /* SELECT (external procedure) LOGICAL FUNCTION of two DOUBLE PRECISION arguments */ /* SELECT must be declared EXTERNAL in the calling subroutine. */ /* If SORT = 'S', SELECT is used to select eigenvalues to sort */ /* to the top left of the Schur form. */ /* If SORT = 'N', SELECT is not referenced. */ /* An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */ /* SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex */ /* conjugate pair of eigenvalues is selected, then both complex */ /* eigenvalues are selected. */ /* Note that a selected complex eigenvalue may no longer */ /* satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */ /* ordering may change the value of complex eigenvalues */ /* (especially if the eigenvalue is ill-conditioned); in this */ /* case INFO is set to N+2 (see INFO below). */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* On exit, A has been overwritten by its real Schur form T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* SDIM (output) INTEGER */ /* If SORT = 'N', SDIM = 0. */ /* If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ /* for which SELECT is true. (Complex conjugate */ /* pairs for which SELECT is true for either */ /* eigenvalue count as 2.) */ /* WR (output) DOUBLE PRECISION array, dimension (N) */ /* WI (output) DOUBLE PRECISION array, dimension (N) */ /* WR and WI contain the real and imaginary parts, */ /* respectively, of the computed eigenvalues in the same order */ /* that they appear on the diagonal of the output Schur form T. */ /* Complex conjugate pairs of eigenvalues will appear */ /* consecutively with the eigenvalue having the positive */ /* imaginary part first. */ /* VS (output) DOUBLE PRECISION array, dimension (LDVS,N) */ /* If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */ /* vectors. */ /* If JOBVS = 'N', VS is not referenced. */ /* LDVS (input) INTEGER */ /* The leading dimension of the array VS. LDVS >= 1; if */ /* JOBVS = 'V', LDVS >= N. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) contains the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,3*N). */ /* For good performance, LWORK must generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* BWORK (workspace) LOGICAL array, dimension (N) */ /* Not referenced if SORT = 'N'. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, and i is */ /* <= N: the QR algorithm failed to compute all the */ /* eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */ /* contain those eigenvalues which have converged; if */ /* JOBVS = 'V', VS contains the matrix which reduces A */ /* to its partially converged Schur form. */ /* = N+1: the eigenvalues could not be reordered because some */ /* eigenvalues were too close to separate (the problem */ /* is very ill-conditioned); */ /* = N+2: after reordering, roundoff changed values of some */ /* complex eigenvalues so that leading eigenvalues in */ /* the Schur form no longer satisfy SELECT=.TRUE. This */ /* could also be caused by underflow due to scaling. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --wr; --wi; vs_dim1 = *ldvs; vs_offset = 1 + vs_dim1; vs -= vs_offset; --work; --bwork; /* Function Body */ *info = 0; lquery = *lwork == -1; wantvs = _starpu_lsame_(jobvs, "V"); wantst = _starpu_lsame_(sort, "S"); if (! wantvs && ! _starpu_lsame_(jobvs, "N")) { *info = -1; } else if (! wantst && ! _starpu_lsame_(sort, "N")) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldvs < 1 || wantvs && *ldvs < *n) { *info = -11; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV. */ /* HSWORK refers to the workspace preferred by DHSEQR, as */ /* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ /* the worst case.) */ if (*info == 0) { if (*n == 0) { minwrk = 1; maxwrk = 1; } else { maxwrk = (*n << 1) + *n * _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, &c__0); minwrk = *n * 3; _starpu_dhseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1] , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval); hswork = (integer) work[1]; if (! wantvs) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n + hswork; maxwrk = max(i__1,i__2); } else { /* Computing MAX */ i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGHR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + hswork; maxwrk = max(i__1,i__2); } } work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -13; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEES ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { *sdim = 0; return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, dum); scalea = FALSE_; if (anrm > 0. && anrm < smlnum) { scalea = TRUE_; cscale = smlnum; } else if (anrm > bignum) { scalea = TRUE_; cscale = bignum; } if (scalea) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & ierr); } /* Permute the matrix to make it more nearly triangular */ /* (Workspace: need N) */ ibal = 1; _starpu_dgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); /* Reduce to upper Hessenberg form */ /* (Workspace: need 3*N, prefer 2*N+N*NB) */ itau = *n + ibal; iwrk = *n + itau; i__1 = *lwork - iwrk + 1; _starpu_dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &ierr); if (wantvs) { /* Copy Householder vectors to VS */ _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs) ; /* Generate orthogonal matrix in VS */ /* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ i__1 = *lwork - iwrk + 1; _starpu_dorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk], &i__1, &ierr); } *sdim = 0; /* Perform QR iteration, accumulating Schur vectors in VS if desired */ /* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; _starpu_dhseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[ vs_offset], ldvs, &work[iwrk], &i__1, &ieval); if (ieval > 0) { *info = ieval; } /* Sort eigenvalues if desired */ if (wantst && *info == 0) { if (scalea) { _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, & ierr); } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { bwork[i__] = (*select)(&wr[i__], &wi[i__]); /* L10: */ } /* Reorder eigenvalues and transform Schur vectors */ /* (Workspace: none needed) */ i__1 = *lwork - iwrk + 1; _starpu_dtrsen_("N", jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], ldvs, &wr[1], &wi[1], sdim, &s, &sep, &work[iwrk], &i__1, idum, &c__1, &icond); if (icond > 0) { *info = *n + icond; } } if (wantvs) { /* Undo balancing */ /* (Workspace: need N) */ _starpu_dgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs, &ierr); } if (scalea) { /* Undo scaling for the Schur form of A */ _starpu_dlascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, & ierr); i__1 = *lda + 1; _starpu_dcopy_(n, &a[a_offset], &i__1, &wr[1], &c__1); if (cscale == smlnum) { /* If scaling back towards underflow, adjust WI if an */ /* offdiagonal element of a 2-by-2 block in the Schur form */ /* underflows. */ if (ieval > 0) { i1 = ieval + 1; i2 = ihi - 1; i__1 = ilo - 1; /* Computing MAX */ i__3 = ilo - 1; i__2 = max(i__3,1); _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ 1], &i__2, &ierr); } else if (wantst) { i1 = 1; i2 = *n - 1; } else { i1 = ilo; i2 = ihi - 1; } inxt = i1 - 1; i__1 = i2; for (i__ = i1; i__ <= i__1; ++i__) { if (i__ < inxt) { goto L20; } if (wi[i__] == 0.) { inxt = i__ + 1; } else { if (a[i__ + 1 + i__ * a_dim1] == 0.) { wi[i__] = 0.; wi[i__ + 1] = 0.; } else if (a[i__ + 1 + i__ * a_dim1] != 0. && a[i__ + ( i__ + 1) * a_dim1] == 0.) { wi[i__] = 0.; wi[i__ + 1] = 0.; if (i__ > 1) { i__2 = i__ - 1; _starpu_dswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[( i__ + 1) * a_dim1 + 1], &c__1); } if (*n > i__ + 1) { i__2 = *n - i__ - 1; _starpu_dswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, & a[i__ + 1 + (i__ + 2) * a_dim1], lda); } if (wantvs) { _starpu_dswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ + 1) * vs_dim1 + 1], &c__1); } a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * a_dim1]; a[i__ + 1 + i__ * a_dim1] = 0.; } inxt = i__ + 2; } L20: ; } } /* Undo scaling for the imaginary part of the eigenvalues */ i__1 = *n - ieval; /* Computing MAX */ i__3 = *n - ieval; i__2 = max(i__3,1); _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + 1], &i__2, &ierr); } if (wantst && *info == 0) { /* Check if reordering successful */ lastsl = TRUE_; lst2sl = TRUE_; *sdim = 0; ip = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { cursl = (*select)(&wr[i__], &wi[i__]); if (wi[i__] == 0.) { if (cursl) { ++(*sdim); } ip = 0; if (cursl && ! lastsl) { *info = *n + 2; } } else { if (ip == 1) { /* Last eigenvalue of conjugate pair */ cursl = cursl || lastsl; lastsl = cursl; if (cursl) { *sdim += 2; } ip = -1; if (cursl && ! lst2sl) { *info = *n + 2; } } else { /* First eigenvalue of conjugate pair */ ip = 1; } } lst2sl = lastsl; lastsl = cursl; /* L30: */ } } work[1] = (doublereal) maxwrk; return 0; /* End of DGEES */ } /* _starpu_dgees_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeesx.c000066400000000000000000000526211507764646700206610ustar00rootroot00000000000000/* dgeesx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static integer c_n1 = -1; /* Subroutine */ int _starpu_dgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, doublereal *a, integer *lda, integer *sdim, doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, doublereal *rconde, doublereal *rcondv, doublereal *work, integer * lwork, integer *iwork, integer *liwork, logical *bwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, i1, i2, ip, ihi, ilo; doublereal dum[1], eps; integer ibal; doublereal anrm; integer ierr, itau, iwrk, lwrk, inxt, icond, ieval; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical cursl; integer liwrk; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebak_( char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dgebal_(char *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); logical lst2sl, scalea; extern doublereal _starpu_dlamch_(char *); doublereal cscale; extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dorghr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dhseqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); logical wantsb; extern /* Subroutine */ int _starpu_dtrsen_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *); logical wantse, lastsl; integer minwrk, maxwrk; logical wantsn; doublereal smlnum; integer hswork; logical wantst, lquery, wantsv, wantvs; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* .. Function Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEESX computes for an N-by-N real nonsymmetric matrix A, the */ /* eigenvalues, the real Schur form T, and, optionally, the matrix of */ /* Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). */ /* Optionally, it also orders the eigenvalues on the diagonal of the */ /* real Schur form so that selected eigenvalues are at the top left; */ /* computes a reciprocal condition number for the average of the */ /* selected eigenvalues (RCONDE); and computes a reciprocal condition */ /* number for the right invariant subspace corresponding to the */ /* selected eigenvalues (RCONDV). The leading columns of Z form an */ /* orthonormal basis for this invariant subspace. */ /* For further explanation of the reciprocal condition numbers RCONDE */ /* and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where */ /* these quantities are called s and sep respectively). */ /* A real matrix is in real Schur form if it is upper quasi-triangular */ /* with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in */ /* the form */ /* [ a b ] */ /* [ c a ] */ /* where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */ /* Arguments */ /* ========= */ /* JOBVS (input) CHARACTER*1 */ /* = 'N': Schur vectors are not computed; */ /* = 'V': Schur vectors are computed. */ /* SORT (input) CHARACTER*1 */ /* Specifies whether or not to order the eigenvalues on the */ /* diagonal of the Schur form. */ /* = 'N': Eigenvalues are not ordered; */ /* = 'S': Eigenvalues are ordered (see SELECT). */ /* SELECT (external procedure) LOGICAL FUNCTION of two DOUBLE PRECISION arguments */ /* SELECT must be declared EXTERNAL in the calling subroutine. */ /* If SORT = 'S', SELECT is used to select eigenvalues to sort */ /* to the top left of the Schur form. */ /* If SORT = 'N', SELECT is not referenced. */ /* An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */ /* SELECT(WR(j),WI(j)) is true; i.e., if either one of a */ /* complex conjugate pair of eigenvalues is selected, then both */ /* are. Note that a selected complex eigenvalue may no longer */ /* satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */ /* ordering may change the value of complex eigenvalues */ /* (especially if the eigenvalue is ill-conditioned); in this */ /* case INFO may be set to N+3 (see INFO below). */ /* SENSE (input) CHARACTER*1 */ /* Determines which reciprocal condition numbers are computed. */ /* = 'N': None are computed; */ /* = 'E': Computed for average of selected eigenvalues only; */ /* = 'V': Computed for selected right invariant subspace only; */ /* = 'B': Computed for both. */ /* If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the N-by-N matrix A. */ /* On exit, A is overwritten by its real Schur form T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* SDIM (output) INTEGER */ /* If SORT = 'N', SDIM = 0. */ /* If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ /* for which SELECT is true. (Complex conjugate */ /* pairs for which SELECT is true for either */ /* eigenvalue count as 2.) */ /* WR (output) DOUBLE PRECISION array, dimension (N) */ /* WI (output) DOUBLE PRECISION array, dimension (N) */ /* WR and WI contain the real and imaginary parts, respectively, */ /* of the computed eigenvalues, in the same order that they */ /* appear on the diagonal of the output Schur form T. Complex */ /* conjugate pairs of eigenvalues appear consecutively with the */ /* eigenvalue having the positive imaginary part first. */ /* VS (output) DOUBLE PRECISION array, dimension (LDVS,N) */ /* If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */ /* vectors. */ /* If JOBVS = 'N', VS is not referenced. */ /* LDVS (input) INTEGER */ /* The leading dimension of the array VS. LDVS >= 1, and if */ /* JOBVS = 'V', LDVS >= N. */ /* RCONDE (output) DOUBLE PRECISION */ /* If SENSE = 'E' or 'B', RCONDE contains the reciprocal */ /* condition number for the average of the selected eigenvalues. */ /* Not referenced if SENSE = 'N' or 'V'. */ /* RCONDV (output) DOUBLE PRECISION */ /* If SENSE = 'V' or 'B', RCONDV contains the reciprocal */ /* condition number for the selected right invariant subspace. */ /* Not referenced if SENSE = 'N' or 'E'. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,3*N). */ /* Also, if SENSE = 'E' or 'V' or 'B', */ /* LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of */ /* selected eigenvalues computed by this routine. Note that */ /* N+2*SDIM*(N-SDIM) <= N+N*N/2. Note also that an error is only */ /* returned if LWORK < max(1,3*N), but if SENSE = 'E' or 'V' or */ /* 'B' this may not be large enough. */ /* For good performance, LWORK must generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates upper bounds on the optimal sizes of the */ /* arrays WORK and IWORK, returns these values as the first */ /* entries of the WORK and IWORK arrays, and no error messages */ /* related to LWORK or LIWORK are issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). */ /* Note that SDIM*(N-SDIM) <= N*N/4. Note also that an error is */ /* only returned if LIWORK < 1, but if SENSE = 'V' or 'B' this */ /* may not be large enough. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates upper bounds on the optimal sizes of */ /* the arrays WORK and IWORK, returns these values as the first */ /* entries of the WORK and IWORK arrays, and no error messages */ /* related to LWORK or LIWORK are issued by XERBLA. */ /* BWORK (workspace) LOGICAL array, dimension (N) */ /* Not referenced if SORT = 'N'. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, and i is */ /* <= N: the QR algorithm failed to compute all the */ /* eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */ /* contain those eigenvalues which have converged; if */ /* JOBVS = 'V', VS contains the transformation which */ /* reduces A to its partially converged Schur form. */ /* = N+1: the eigenvalues could not be reordered because some */ /* eigenvalues were too close to separate (the problem */ /* is very ill-conditioned); */ /* = N+2: after reordering, roundoff changed values of some */ /* complex eigenvalues so that leading eigenvalues in */ /* the Schur form no longer satisfy SELECT=.TRUE. This */ /* could also be caused by underflow due to scaling. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --wr; --wi; vs_dim1 = *ldvs; vs_offset = 1 + vs_dim1; vs -= vs_offset; --work; --iwork; --bwork; /* Function Body */ *info = 0; wantvs = _starpu_lsame_(jobvs, "V"); wantst = _starpu_lsame_(sort, "S"); wantsn = _starpu_lsame_(sense, "N"); wantse = _starpu_lsame_(sense, "E"); wantsv = _starpu_lsame_(sense, "V"); wantsb = _starpu_lsame_(sense, "B"); lquery = *lwork == -1 || *liwork == -1; if (! wantvs && ! _starpu_lsame_(jobvs, "N")) { *info = -1; } else if (! wantst && ! _starpu_lsame_(sort, "N")) { *info = -2; } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! wantsn) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldvs < 1 || wantvs && *ldvs < *n) { *info = -12; } /* Compute workspace */ /* (Note: Comments in the code beginning "RWorkspace:" describe the */ /* minimal amount of real workspace needed at that point in the */ /* code, as well as the preferred amount for good performance. */ /* IWorkspace refers to integer workspace. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV. */ /* HSWORK refers to the workspace preferred by DHSEQR, as */ /* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ /* the worst case. */ /* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed */ /* depends on SDIM, which is computed by the routine DTRSEN later */ /* in the code.) */ if (*info == 0) { liwrk = 1; if (*n == 0) { minwrk = 1; lwrk = 1; } else { maxwrk = (*n << 1) + *n * _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, &c__0); minwrk = *n * 3; _starpu_dhseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1] , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval); hswork = (integer) work[1]; if (! wantvs) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n + hswork; maxwrk = max(i__1,i__2); } else { /* Computing MAX */ i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGHR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + hswork; maxwrk = max(i__1,i__2); } lwrk = maxwrk; if (! wantsn) { /* Computing MAX */ i__1 = lwrk, i__2 = *n + *n * *n / 2; lwrk = max(i__1,i__2); } if (wantsv || wantsb) { liwrk = *n * *n / 4; } } iwork[1] = liwrk; work[1] = (doublereal) lwrk; if (*lwork < minwrk && ! lquery) { *info = -16; } else if (*liwork < 1 && ! lquery) { *info = -18; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEESX", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *sdim = 0; return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, dum); scalea = FALSE_; if (anrm > 0. && anrm < smlnum) { scalea = TRUE_; cscale = smlnum; } else if (anrm > bignum) { scalea = TRUE_; cscale = bignum; } if (scalea) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & ierr); } /* Permute the matrix to make it more nearly triangular */ /* (RWorkspace: need N) */ ibal = 1; _starpu_dgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); /* Reduce to upper Hessenberg form */ /* (RWorkspace: need 3*N, prefer 2*N+N*NB) */ itau = *n + ibal; iwrk = *n + itau; i__1 = *lwork - iwrk + 1; _starpu_dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &ierr); if (wantvs) { /* Copy Householder vectors to VS */ _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs) ; /* Generate orthogonal matrix in VS */ /* (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ i__1 = *lwork - iwrk + 1; _starpu_dorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk], &i__1, &ierr); } *sdim = 0; /* Perform QR iteration, accumulating Schur vectors in VS if desired */ /* (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; _starpu_dhseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[ vs_offset], ldvs, &work[iwrk], &i__1, &ieval); if (ieval > 0) { *info = ieval; } /* Sort eigenvalues if desired */ if (wantst && *info == 0) { if (scalea) { _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, & ierr); } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { bwork[i__] = (*select)(&wr[i__], &wi[i__]); /* L10: */ } /* Reorder eigenvalues, transform Schur vectors, and compute */ /* reciprocal condition numbers */ /* (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) */ /* otherwise, need N ) */ /* (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) */ /* otherwise, need 0 ) */ i__1 = *lwork - iwrk + 1; _starpu_dtrsen_(sense, jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], ldvs, &wr[1], &wi[1], sdim, rconde, rcondv, &work[iwrk], & i__1, &iwork[1], liwork, &icond); if (! wantsn) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n + (*sdim << 1) * (*n - *sdim); maxwrk = max(i__1,i__2); } if (icond == -15) { /* Not enough real workspace */ *info = -16; } else if (icond == -17) { /* Not enough integer workspace */ *info = -18; } else if (icond > 0) { /* DTRSEN failed to reorder or to restore standard Schur form */ *info = icond + *n; } } if (wantvs) { /* Undo balancing */ /* (RWorkspace: need N) */ _starpu_dgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs, &ierr); } if (scalea) { /* Undo scaling for the Schur form of A */ _starpu_dlascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, & ierr); i__1 = *lda + 1; _starpu_dcopy_(n, &a[a_offset], &i__1, &wr[1], &c__1); if ((wantsv || wantsb) && *info == 0) { dum[0] = *rcondv; _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, & c__1, &ierr); *rcondv = dum[0]; } if (cscale == smlnum) { /* If scaling back towards underflow, adjust WI if an */ /* offdiagonal element of a 2-by-2 block in the Schur form */ /* underflows. */ if (ieval > 0) { i1 = ieval + 1; i2 = ihi - 1; i__1 = ilo - 1; _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ 1], n, &ierr); } else if (wantst) { i1 = 1; i2 = *n - 1; } else { i1 = ilo; i2 = ihi - 1; } inxt = i1 - 1; i__1 = i2; for (i__ = i1; i__ <= i__1; ++i__) { if (i__ < inxt) { goto L20; } if (wi[i__] == 0.) { inxt = i__ + 1; } else { if (a[i__ + 1 + i__ * a_dim1] == 0.) { wi[i__] = 0.; wi[i__ + 1] = 0.; } else if (a[i__ + 1 + i__ * a_dim1] != 0. && a[i__ + ( i__ + 1) * a_dim1] == 0.) { wi[i__] = 0.; wi[i__ + 1] = 0.; if (i__ > 1) { i__2 = i__ - 1; _starpu_dswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[( i__ + 1) * a_dim1 + 1], &c__1); } if (*n > i__ + 1) { i__2 = *n - i__ - 1; _starpu_dswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, & a[i__ + 1 + (i__ + 2) * a_dim1], lda); } _starpu_dswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ + 1) * vs_dim1 + 1], &c__1); a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * a_dim1]; a[i__ + 1 + i__ * a_dim1] = 0.; } inxt = i__ + 2; } L20: ; } } i__1 = *n - ieval; /* Computing MAX */ i__3 = *n - ieval; i__2 = max(i__3,1); _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + 1], &i__2, &ierr); } if (wantst && *info == 0) { /* Check if reordering successful */ lastsl = TRUE_; lst2sl = TRUE_; *sdim = 0; ip = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { cursl = (*select)(&wr[i__], &wi[i__]); if (wi[i__] == 0.) { if (cursl) { ++(*sdim); } ip = 0; if (cursl && ! lastsl) { *info = *n + 2; } } else { if (ip == 1) { /* Last eigenvalue of conjugate pair */ cursl = cursl || lastsl; lastsl = cursl; if (cursl) { *sdim += 2; } ip = -1; if (cursl && ! lst2sl) { *info = *n + 2; } } else { /* First eigenvalue of conjugate pair */ ip = 1; } } lst2sl = lastsl; lastsl = cursl; /* L30: */ } } work[1] = (doublereal) maxwrk; if (wantsv || wantsb) { /* Computing MAX */ i__1 = 1, i__2 = *sdim * (*n - *sdim); iwork[1] = max(i__1,i__2); } else { iwork[1] = 1; } return 0; /* End of DGEESX */ } /* _starpu_dgeesx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeev.c000066400000000000000000000447151507764646700205010ustar00rootroot00000000000000/* dgeev.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static integer c_n1 = -1; /* Subroutine */ int _starpu_dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, k; doublereal r__, cs, sn; integer ihi; doublereal scl; integer ilo; doublereal dum[1], eps; integer ibal; char side[1]; doublereal anrm; integer ierr, itau; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer iwrk, nout; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebak_( char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dgebal_(char *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); logical scalea; extern doublereal _starpu_dlamch_(char *); doublereal cscale; extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *, integer *); logical select[1]; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dorghr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dhseqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dtrevc_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); integer minwrk, maxwrk; logical wantvl; doublereal smlnum; integer hswork; logical lquery, wantvr; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEEV computes for an N-by-N real nonsymmetric matrix A, the */ /* eigenvalues and, optionally, the left and/or right eigenvectors. */ /* The right eigenvector v(j) of A satisfies */ /* A * v(j) = lambda(j) * v(j) */ /* where lambda(j) is its eigenvalue. */ /* The left eigenvector u(j) of A satisfies */ /* u(j)**H * A = lambda(j) * u(j)**H */ /* where u(j)**H denotes the conjugate transpose of u(j). */ /* The computed eigenvectors are normalized to have Euclidean norm */ /* equal to 1 and largest component real. */ /* Arguments */ /* ========= */ /* JOBVL (input) CHARACTER*1 */ /* = 'N': left eigenvectors of A are not computed; */ /* = 'V': left eigenvectors of A are computed. */ /* JOBVR (input) CHARACTER*1 */ /* = 'N': right eigenvectors of A are not computed; */ /* = 'V': right eigenvectors of A are computed. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* On exit, A has been overwritten. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* WR (output) DOUBLE PRECISION array, dimension (N) */ /* WI (output) DOUBLE PRECISION array, dimension (N) */ /* WR and WI contain the real and imaginary parts, */ /* respectively, of the computed eigenvalues. Complex */ /* conjugate pairs of eigenvalues appear consecutively */ /* with the eigenvalue having the positive imaginary part */ /* first. */ /* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ /* If JOBVL = 'V', the left eigenvectors u(j) are stored one */ /* after another in the columns of VL, in the same order */ /* as their eigenvalues. */ /* If JOBVL = 'N', VL is not referenced. */ /* If the j-th eigenvalue is real, then u(j) = VL(:,j), */ /* the j-th column of VL. */ /* If the j-th and (j+1)-st eigenvalues form a complex */ /* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */ /* u(j+1) = VL(:,j) - i*VL(:,j+1). */ /* LDVL (input) INTEGER */ /* The leading dimension of the array VL. LDVL >= 1; if */ /* JOBVL = 'V', LDVL >= N. */ /* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ /* If JOBVR = 'V', the right eigenvectors v(j) are stored one */ /* after another in the columns of VR, in the same order */ /* as their eigenvalues. */ /* If JOBVR = 'N', VR is not referenced. */ /* If the j-th eigenvalue is real, then v(j) = VR(:,j), */ /* the j-th column of VR. */ /* If the j-th and (j+1)-st eigenvalues form a complex */ /* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */ /* v(j+1) = VR(:,j) - i*VR(:,j+1). */ /* LDVR (input) INTEGER */ /* The leading dimension of the array VR. LDVR >= 1; if */ /* JOBVR = 'V', LDVR >= N. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,3*N), and */ /* if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good */ /* performance, LWORK must generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, the QR algorithm failed to compute all the */ /* eigenvalues, and no eigenvectors have been computed; */ /* elements i+1:N of WR and WI contain eigenvalues which */ /* have converged. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --wr; --wi; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; wantvl = _starpu_lsame_(jobvl, "V"); wantvr = _starpu_lsame_(jobvr, "V"); if (! wantvl && ! _starpu_lsame_(jobvl, "N")) { *info = -1; } else if (! wantvr && ! _starpu_lsame_(jobvr, "N")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldvl < 1 || wantvl && *ldvl < *n) { *info = -9; } else if (*ldvr < 1 || wantvr && *ldvr < *n) { *info = -11; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV. */ /* HSWORK refers to the workspace preferred by DHSEQR, as */ /* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ /* the worst case.) */ if (*info == 0) { if (*n == 0) { minwrk = 1; maxwrk = 1; } else { maxwrk = (*n << 1) + *n * _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, &c__0); if (wantvl) { minwrk = *n << 2; /* Computing MAX */ i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGHR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); _starpu_dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); hswork = (integer) work[1]; /* Computing MAX */ i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * n + hswork; maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n << 2; maxwrk = max(i__1,i__2); } else if (wantvr) { minwrk = *n << 2; /* Computing MAX */ i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGHR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); _starpu_dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); hswork = (integer) work[1]; /* Computing MAX */ i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * n + hswork; maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n << 2; maxwrk = max(i__1,i__2); } else { minwrk = *n * 3; _starpu_dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); hswork = (integer) work[1]; /* Computing MAX */ i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = * n + hswork; maxwrk = max(i__1,i__2); } maxwrk = max(maxwrk,minwrk); } work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -13; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEEV ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, dum); scalea = FALSE_; if (anrm > 0. && anrm < smlnum) { scalea = TRUE_; cscale = smlnum; } else if (anrm > bignum) { scalea = TRUE_; cscale = bignum; } if (scalea) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & ierr); } /* Balance the matrix */ /* (Workspace: need N) */ ibal = 1; _starpu_dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); /* Reduce to upper Hessenberg form */ /* (Workspace: need 3*N, prefer 2*N+N*NB) */ itau = ibal + *n; iwrk = itau + *n; i__1 = *lwork - iwrk + 1; _starpu_dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &ierr); if (wantvl) { /* Want left eigenvectors */ /* Copy Householder vectors to VL */ *(unsigned char *)side = 'L'; _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) ; /* Generate orthogonal matrix in VL */ /* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ i__1 = *lwork - iwrk + 1; _starpu_dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], &i__1, &ierr); /* Perform QR iteration, accumulating Schur vectors in VL */ /* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; _starpu_dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & vl[vl_offset], ldvl, &work[iwrk], &i__1, info); if (wantvr) { /* Want left and right eigenvectors */ /* Copy Schur vectors to VR */ *(unsigned char *)side = 'B'; _starpu_dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); } } else if (wantvr) { /* Want right eigenvectors */ /* Copy Householder vectors to VR */ *(unsigned char *)side = 'R'; _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) ; /* Generate orthogonal matrix in VR */ /* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ i__1 = *lwork - iwrk + 1; _starpu_dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], &i__1, &ierr); /* Perform QR iteration, accumulating Schur vectors in VR */ /* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; _starpu_dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & vr[vr_offset], ldvr, &work[iwrk], &i__1, info); } else { /* Compute eigenvalues only */ /* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; _starpu_dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & vr[vr_offset], ldvr, &work[iwrk], &i__1, info); } /* If INFO > 0 from DHSEQR, then quit */ if (*info > 0) { goto L50; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors */ /* (Workspace: need 4*N) */ _starpu_dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); } if (wantvl) { /* Undo balancing of left eigenvectors */ /* (Workspace: need N) */ _starpu_dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, &ierr); /* Normalize left eigenvectors and make largest component real */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (wi[i__] == 0.) { scl = 1. / _starpu_dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); _starpu_dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); } else if (wi[i__] > 0.) { d__1 = _starpu_dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); d__2 = _starpu_dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); scl = 1. / _starpu_dlapy2_(&d__1, &d__2); _starpu_dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); _starpu_dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); i__2 = *n; for (k = 1; k <= i__2; ++k) { /* Computing 2nd power */ d__1 = vl[k + i__ * vl_dim1]; /* Computing 2nd power */ d__2 = vl[k + (i__ + 1) * vl_dim1]; work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; /* L10: */ } k = _starpu_idamax_(n, &work[iwrk], &c__1); _starpu_dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], &cs, &sn, &r__); _starpu_drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * vl_dim1 + 1], &c__1, &cs, &sn); vl[k + (i__ + 1) * vl_dim1] = 0.; } /* L20: */ } } if (wantvr) { /* Undo balancing of right eigenvectors */ /* (Workspace: need N) */ _starpu_dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, &ierr); /* Normalize right eigenvectors and make largest component real */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (wi[i__] == 0.) { scl = 1. / _starpu_dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); _starpu_dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); } else if (wi[i__] > 0.) { d__1 = _starpu_dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); d__2 = _starpu_dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); scl = 1. / _starpu_dlapy2_(&d__1, &d__2); _starpu_dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); _starpu_dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); i__2 = *n; for (k = 1; k <= i__2; ++k) { /* Computing 2nd power */ d__1 = vr[k + i__ * vr_dim1]; /* Computing 2nd power */ d__2 = vr[k + (i__ + 1) * vr_dim1]; work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; /* L30: */ } k = _starpu_idamax_(n, &work[iwrk], &c__1); _starpu_dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], &cs, &sn, &r__); _starpu_drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * vr_dim1 + 1], &c__1, &cs, &sn); vr[k + (i__ + 1) * vr_dim1] = 0.; } /* L40: */ } } /* Undo scaling if necessary */ L50: if (scalea) { i__1 = *n - *info; /* Computing MAX */ i__3 = *n - *info; i__2 = max(i__3,1); _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 1], &i__2, &ierr); i__1 = *n - *info; /* Computing MAX */ i__3 = *n - *info; i__2 = max(i__3,1); _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 1], &i__2, &ierr); if (*info > 0) { i__1 = ilo - 1; _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], n, &ierr); i__1 = ilo - 1; _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], n, &ierr); } } work[1] = (doublereal) maxwrk; return 0; /* End of DGEEV */ } /* _starpu_dgeev_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeevx.c000066400000000000000000000604261507764646700206660ustar00rootroot00000000000000/* dgeevx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static integer c_n1 = -1; /* Subroutine */ int _starpu_dgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublereal *a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublereal *work, integer *lwork, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, k; doublereal r__, cs, sn; char job[1]; doublereal scl, dum[1], eps; char side[1]; doublereal anrm; integer ierr, itau; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer iwrk, nout; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); integer icond; extern logical _starpu_lsame_(char *, char *); extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebak_( char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dgebal_(char *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); logical scalea; extern doublereal _starpu_dlamch_(char *); doublereal cscale; extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *, integer *); logical select[1]; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dorghr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dhseqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dtrevc_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), _starpu_dtrsna_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, integer *); integer minwrk, maxwrk; logical wantvl, wntsnb; integer hswork; logical wntsne; doublereal smlnum; logical lquery, wantvr, wntsnn, wntsnv; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEEVX computes for an N-by-N real nonsymmetric matrix A, the */ /* eigenvalues and, optionally, the left and/or right eigenvectors. */ /* Optionally also, it computes a balancing transformation to improve */ /* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ /* SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues */ /* (RCONDE), and reciprocal condition numbers for the right */ /* eigenvectors (RCONDV). */ /* The right eigenvector v(j) of A satisfies */ /* A * v(j) = lambda(j) * v(j) */ /* where lambda(j) is its eigenvalue. */ /* The left eigenvector u(j) of A satisfies */ /* u(j)**H * A = lambda(j) * u(j)**H */ /* where u(j)**H denotes the conjugate transpose of u(j). */ /* The computed eigenvectors are normalized to have Euclidean norm */ /* equal to 1 and largest component real. */ /* Balancing a matrix means permuting the rows and columns to make it */ /* more nearly upper triangular, and applying a diagonal similarity */ /* transformation D * A * D**(-1), where D is a diagonal matrix, to */ /* make its rows and columns closer in norm and the condition numbers */ /* of its eigenvalues and eigenvectors smaller. The computed */ /* reciprocal condition numbers correspond to the balanced matrix. */ /* Permuting rows and columns will not change the condition numbers */ /* (in exact arithmetic) but diagonal scaling will. For further */ /* explanation of balancing, see section 4.10.2 of the LAPACK */ /* Users' Guide. */ /* Arguments */ /* ========= */ /* BALANC (input) CHARACTER*1 */ /* Indicates how the input matrix should be diagonally scaled */ /* and/or permuted to improve the conditioning of its */ /* eigenvalues. */ /* = 'N': Do not diagonally scale or permute; */ /* = 'P': Perform permutations to make the matrix more nearly */ /* upper triangular. Do not diagonally scale; */ /* = 'S': Diagonally scale the matrix, i.e. replace A by */ /* D*A*D**(-1), where D is a diagonal matrix chosen */ /* to make the rows and columns of A more equal in */ /* norm. Do not permute; */ /* = 'B': Both diagonally scale and permute A. */ /* Computed reciprocal condition numbers will be for the matrix */ /* after balancing and/or permuting. Permuting does not change */ /* condition numbers (in exact arithmetic), but balancing does. */ /* JOBVL (input) CHARACTER*1 */ /* = 'N': left eigenvectors of A are not computed; */ /* = 'V': left eigenvectors of A are computed. */ /* If SENSE = 'E' or 'B', JOBVL must = 'V'. */ /* JOBVR (input) CHARACTER*1 */ /* = 'N': right eigenvectors of A are not computed; */ /* = 'V': right eigenvectors of A are computed. */ /* If SENSE = 'E' or 'B', JOBVR must = 'V'. */ /* SENSE (input) CHARACTER*1 */ /* Determines which reciprocal condition numbers are computed. */ /* = 'N': None are computed; */ /* = 'E': Computed for eigenvalues only; */ /* = 'V': Computed for right eigenvectors only; */ /* = 'B': Computed for eigenvalues and right eigenvectors. */ /* If SENSE = 'E' or 'B', both left and right eigenvectors */ /* must also be computed (JOBVL = 'V' and JOBVR = 'V'). */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* On exit, A has been overwritten. If JOBVL = 'V' or */ /* JOBVR = 'V', A contains the real Schur form of the balanced */ /* version of the input matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* WR (output) DOUBLE PRECISION array, dimension (N) */ /* WI (output) DOUBLE PRECISION array, dimension (N) */ /* WR and WI contain the real and imaginary parts, */ /* respectively, of the computed eigenvalues. Complex */ /* conjugate pairs of eigenvalues will appear consecutively */ /* with the eigenvalue having the positive imaginary part */ /* first. */ /* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ /* If JOBVL = 'V', the left eigenvectors u(j) are stored one */ /* after another in the columns of VL, in the same order */ /* as their eigenvalues. */ /* If JOBVL = 'N', VL is not referenced. */ /* If the j-th eigenvalue is real, then u(j) = VL(:,j), */ /* the j-th column of VL. */ /* If the j-th and (j+1)-st eigenvalues form a complex */ /* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */ /* u(j+1) = VL(:,j) - i*VL(:,j+1). */ /* LDVL (input) INTEGER */ /* The leading dimension of the array VL. LDVL >= 1; if */ /* JOBVL = 'V', LDVL >= N. */ /* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ /* If JOBVR = 'V', the right eigenvectors v(j) are stored one */ /* after another in the columns of VR, in the same order */ /* as their eigenvalues. */ /* If JOBVR = 'N', VR is not referenced. */ /* If the j-th eigenvalue is real, then v(j) = VR(:,j), */ /* the j-th column of VR. */ /* If the j-th and (j+1)-st eigenvalues form a complex */ /* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */ /* v(j+1) = VR(:,j) - i*VR(:,j+1). */ /* LDVR (input) INTEGER */ /* The leading dimension of the array VR. LDVR >= 1, and if */ /* JOBVR = 'V', LDVR >= N. */ /* ILO (output) INTEGER */ /* IHI (output) INTEGER */ /* ILO and IHI are integer values determined when A was */ /* balanced. The balanced A(i,j) = 0 if I > J and */ /* J = 1,...,ILO-1 or I = IHI+1,...,N. */ /* SCALE (output) DOUBLE PRECISION array, dimension (N) */ /* Details of the permutations and scaling factors applied */ /* when balancing A. If P(j) is the index of the row and column */ /* interchanged with row and column j, and D(j) is the scaling */ /* factor applied to row and column j, then */ /* SCALE(J) = P(J), for J = 1,...,ILO-1 */ /* = D(J), for J = ILO,...,IHI */ /* = P(J) for J = IHI+1,...,N. */ /* The order in which the interchanges are made is N to IHI+1, */ /* then 1 to ILO-1. */ /* ABNRM (output) DOUBLE PRECISION */ /* The one-norm of the balanced matrix (the maximum */ /* of the sum of absolute values of elements of any column). */ /* RCONDE (output) DOUBLE PRECISION array, dimension (N) */ /* RCONDE(j) is the reciprocal condition number of the j-th */ /* eigenvalue. */ /* RCONDV (output) DOUBLE PRECISION array, dimension (N) */ /* RCONDV(j) is the reciprocal condition number of the j-th */ /* right eigenvector. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. If SENSE = 'N' or 'E', */ /* LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', */ /* LWORK >= 3*N. If SENSE = 'V' or 'B', LWORK >= N*(N+6). */ /* For good performance, LWORK must generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (2*N-2) */ /* If SENSE = 'N' or 'E', not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, the QR algorithm failed to compute all the */ /* eigenvalues, and no eigenvectors or condition numbers */ /* have been computed; elements 1:ILO-1 and i+1:N of WR */ /* and WI contain eigenvalues which have converged. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --wr; --wi; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --scale; --rconde; --rcondv; --work; --iwork; /* Function Body */ *info = 0; lquery = *lwork == -1; wantvl = _starpu_lsame_(jobvl, "V"); wantvr = _starpu_lsame_(jobvr, "V"); wntsnn = _starpu_lsame_(sense, "N"); wntsne = _starpu_lsame_(sense, "E"); wntsnv = _starpu_lsame_(sense, "V"); wntsnb = _starpu_lsame_(sense, "B"); if (! (_starpu_lsame_(balanc, "N") || _starpu_lsame_(balanc, "S") || _starpu_lsame_(balanc, "P") || _starpu_lsame_(balanc, "B"))) { *info = -1; } else if (! wantvl && ! _starpu_lsame_(jobvl, "N")) { *info = -2; } else if (! wantvr && ! _starpu_lsame_(jobvr, "N")) { *info = -3; } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb) && ! (wantvl && wantvr)) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldvl < 1 || wantvl && *ldvl < *n) { *info = -11; } else if (*ldvr < 1 || wantvr && *ldvr < *n) { *info = -13; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV. */ /* HSWORK refers to the workspace preferred by DHSEQR, as */ /* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ /* the worst case.) */ if (*info == 0) { if (*n == 0) { minwrk = 1; maxwrk = 1; } else { maxwrk = *n + *n * _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, & c__0); if (wantvl) { _starpu_dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); } else if (wantvr) { _starpu_dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); } else { if (wntsnn) { _starpu_dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); } else { _starpu_dhseqr_("S", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); } } hswork = (integer) work[1]; if (! wantvl && ! wantvr) { minwrk = *n << 1; if (! wntsnn) { /* Computing MAX */ i__1 = minwrk, i__2 = *n * *n + *n * 6; minwrk = max(i__1,i__2); } maxwrk = max(maxwrk,hswork); if (! wntsnn) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *n + *n * 6; maxwrk = max(i__1,i__2); } } else { minwrk = *n * 3; if (! wntsnn && ! wntsne) { /* Computing MAX */ i__1 = minwrk, i__2 = *n * *n + *n * 6; minwrk = max(i__1,i__2); } maxwrk = max(maxwrk,hswork); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGHR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); if (! wntsnn && ! wntsne) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *n + *n * 6; maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3; maxwrk = max(i__1,i__2); } maxwrk = max(maxwrk,minwrk); } work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -21; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEEVX", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ icond = 0; anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, dum); scalea = FALSE_; if (anrm > 0. && anrm < smlnum) { scalea = TRUE_; cscale = smlnum; } else if (anrm > bignum) { scalea = TRUE_; cscale = bignum; } if (scalea) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & ierr); } /* Balance the matrix and compute ABNRM */ _starpu_dgebal_(balanc, n, &a[a_offset], lda, ilo, ihi, &scale[1], &ierr); *abnrm = _starpu_dlange_("1", n, n, &a[a_offset], lda, dum); if (scalea) { dum[0] = *abnrm; _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, & ierr); *abnrm = dum[0]; } /* Reduce to upper Hessenberg form */ /* (Workspace: need 2*N, prefer N+N*NB) */ itau = 1; iwrk = itau + *n; i__1 = *lwork - iwrk + 1; _starpu_dgehrd_(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, & ierr); if (wantvl) { /* Want left eigenvectors */ /* Copy Householder vectors to VL */ *(unsigned char *)side = 'L'; _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) ; /* Generate orthogonal matrix in VL */ /* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ i__1 = *lwork - iwrk + 1; _starpu_dorghr_(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], & i__1, &ierr); /* Perform QR iteration, accumulating Schur vectors in VL */ /* (Workspace: need 1, prefer HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; _starpu_dhseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[ vl_offset], ldvl, &work[iwrk], &i__1, info); if (wantvr) { /* Want left and right eigenvectors */ /* Copy Schur vectors to VR */ *(unsigned char *)side = 'B'; _starpu_dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); } } else if (wantvr) { /* Want right eigenvectors */ /* Copy Householder vectors to VR */ *(unsigned char *)side = 'R'; _starpu_dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) ; /* Generate orthogonal matrix in VR */ /* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ i__1 = *lwork - iwrk + 1; _starpu_dorghr_(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], & i__1, &ierr); /* Perform QR iteration, accumulating Schur vectors in VR */ /* (Workspace: need 1, prefer HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; _starpu_dhseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ vr_offset], ldvr, &work[iwrk], &i__1, info); } else { /* Compute eigenvalues only */ /* If condition numbers desired, compute Schur form */ if (wntsnn) { *(unsigned char *)job = 'E'; } else { *(unsigned char *)job = 'S'; } /* (Workspace: need 1, prefer HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; _starpu_dhseqr_(job, "N", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ vr_offset], ldvr, &work[iwrk], &i__1, info); } /* If INFO > 0 from DHSEQR, then quit */ if (*info > 0) { goto L50; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors */ /* (Workspace: need 3*N) */ _starpu_dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); } /* Compute condition numbers if desired */ /* (Workspace: need N*N+6*N unless SENSE = 'E') */ if (! wntsnn) { _starpu_dtrsna_(sense, "A", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &rconde[1], &rcondv[1], n, &nout, &work[iwrk], n, &iwork[1], &icond); } if (wantvl) { /* Undo balancing of left eigenvectors */ _starpu_dgebak_(balanc, "L", n, ilo, ihi, &scale[1], n, &vl[vl_offset], ldvl, &ierr); /* Normalize left eigenvectors and make largest component real */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (wi[i__] == 0.) { scl = 1. / _starpu_dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); _starpu_dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); } else if (wi[i__] > 0.) { d__1 = _starpu_dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); d__2 = _starpu_dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); scl = 1. / _starpu_dlapy2_(&d__1, &d__2); _starpu_dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); _starpu_dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); i__2 = *n; for (k = 1; k <= i__2; ++k) { /* Computing 2nd power */ d__1 = vl[k + i__ * vl_dim1]; /* Computing 2nd power */ d__2 = vl[k + (i__ + 1) * vl_dim1]; work[k] = d__1 * d__1 + d__2 * d__2; /* L10: */ } k = _starpu_idamax_(n, &work[1], &c__1); _starpu_dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], &cs, &sn, &r__); _starpu_drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * vl_dim1 + 1], &c__1, &cs, &sn); vl[k + (i__ + 1) * vl_dim1] = 0.; } /* L20: */ } } if (wantvr) { /* Undo balancing of right eigenvectors */ _starpu_dgebak_(balanc, "R", n, ilo, ihi, &scale[1], n, &vr[vr_offset], ldvr, &ierr); /* Normalize right eigenvectors and make largest component real */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (wi[i__] == 0.) { scl = 1. / _starpu_dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); _starpu_dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); } else if (wi[i__] > 0.) { d__1 = _starpu_dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); d__2 = _starpu_dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); scl = 1. / _starpu_dlapy2_(&d__1, &d__2); _starpu_dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); _starpu_dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); i__2 = *n; for (k = 1; k <= i__2; ++k) { /* Computing 2nd power */ d__1 = vr[k + i__ * vr_dim1]; /* Computing 2nd power */ d__2 = vr[k + (i__ + 1) * vr_dim1]; work[k] = d__1 * d__1 + d__2 * d__2; /* L30: */ } k = _starpu_idamax_(n, &work[1], &c__1); _starpu_dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], &cs, &sn, &r__); _starpu_drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * vr_dim1 + 1], &c__1, &cs, &sn); vr[k + (i__ + 1) * vr_dim1] = 0.; } /* L40: */ } } /* Undo scaling if necessary */ L50: if (scalea) { i__1 = *n - *info; /* Computing MAX */ i__3 = *n - *info; i__2 = max(i__3,1); _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 1], &i__2, &ierr); i__1 = *n - *info; /* Computing MAX */ i__3 = *n - *info; i__2 = max(i__3,1); _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 1], &i__2, &ierr); if (*info == 0) { if ((wntsnv || wntsnb) && icond == 0) { _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &rcondv[ 1], n, &ierr); } } else { i__1 = *ilo - 1; _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], n, &ierr); i__1 = *ilo - 1; _starpu_dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], n, &ierr); } } work[1] = (doublereal) maxwrk; return 0; /* End of DGEEVX */ } /* _starpu_dgeevx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgegs.c000066400000000000000000000424351507764646700204750ustar00rootroot00000000000000/* dgegs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b36 = 0.; static doublereal c_b37 = 1.; /* Subroutine */ int _starpu_dgegs_(char *jobvsl, char *jobvsr, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, vsr_dim1, vsr_offset, i__1, i__2; /* Local variables */ integer nb, nb1, nb2, nb3, ihi, ilo; doublereal eps, anrm, bnrm; integer itau, lopt; extern logical _starpu_lsame_(char *, char *); integer ileft, iinfo, icols; logical ilvsl; integer iwork; logical ilvsr; integer irows; extern /* Subroutine */ int _starpu_dggbak_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); logical ilascl, ilbscl; extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); integer ijobvl, iright, ijobvr; extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); doublereal anrmto; integer lwkmin; doublereal bnrmto; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal smlnum; integer lwkopt; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This routine is deprecated and has been replaced by routine DGGES. */ /* DGEGS computes the eigenvalues, real Schur form, and, optionally, */ /* left and or/right Schur vectors of a real matrix pair (A,B). */ /* Given two square matrices A and B, the generalized real Schur */ /* factorization has the form */ /* A = Q*S*Z**T, B = Q*T*Z**T */ /* where Q and Z are orthogonal matrices, T is upper triangular, and S */ /* is an upper quasi-triangular matrix with 1-by-1 and 2-by-2 diagonal */ /* blocks, the 2-by-2 blocks corresponding to complex conjugate pairs */ /* of eigenvalues of (A,B). The columns of Q are the left Schur vectors */ /* and the columns of Z are the right Schur vectors. */ /* If only the eigenvalues of (A,B) are needed, the driver routine */ /* DGEGV should be used instead. See DGEGV for a description of the */ /* eigenvalues of the generalized nonsymmetric eigenvalue problem */ /* (GNEP). */ /* Arguments */ /* ========= */ /* JOBVSL (input) CHARACTER*1 */ /* = 'N': do not compute the left Schur vectors; */ /* = 'V': compute the left Schur vectors (returned in VSL). */ /* JOBVSR (input) CHARACTER*1 */ /* = 'N': do not compute the right Schur vectors; */ /* = 'V': compute the right Schur vectors (returned in VSR). */ /* N (input) INTEGER */ /* The order of the matrices A, B, VSL, and VSR. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the matrix A. */ /* On exit, the upper quasi-triangular matrix S from the */ /* generalized real Schur factorization. */ /* LDA (input) INTEGER */ /* The leading dimension of A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, the matrix B. */ /* On exit, the upper triangular matrix T from the generalized */ /* real Schur factorization. */ /* LDB (input) INTEGER */ /* The leading dimension of B. LDB >= max(1,N). */ /* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ /* The real parts of each scalar alpha defining an eigenvalue */ /* of GNEP. */ /* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ /* The imaginary parts of each scalar alpha defining an */ /* eigenvalue of GNEP. If ALPHAI(j) is zero, then the j-th */ /* eigenvalue is real; if positive, then the j-th and (j+1)-st */ /* eigenvalues are a complex conjugate pair, with */ /* ALPHAI(j+1) = -ALPHAI(j). */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* The scalars beta that define the eigenvalues of GNEP. */ /* Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and */ /* beta = BETA(j) represent the j-th eigenvalue of the matrix */ /* pair (A,B), in one of the forms lambda = alpha/beta or */ /* mu = beta/alpha. Since either lambda or mu may overflow, */ /* they should not, in general, be computed. */ /* VSL (output) DOUBLE PRECISION array, dimension (LDVSL,N) */ /* If JOBVSL = 'V', the matrix of left Schur vectors Q. */ /* Not referenced if JOBVSL = 'N'. */ /* LDVSL (input) INTEGER */ /* The leading dimension of the matrix VSL. LDVSL >=1, and */ /* if JOBVSL = 'V', LDVSL >= N. */ /* VSR (output) DOUBLE PRECISION array, dimension (LDVSR,N) */ /* If JOBVSR = 'V', the matrix of right Schur vectors Z. */ /* Not referenced if JOBVSR = 'N'. */ /* LDVSR (input) INTEGER */ /* The leading dimension of the matrix VSR. LDVSR >= 1, and */ /* if JOBVSR = 'V', LDVSR >= N. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,4*N). */ /* For good performance, LWORK must generally be larger. */ /* To compute the optimal value of LWORK, call ILAENV to get */ /* blocksizes (for DGEQRF, DORMQR, and DORGQR.) Then compute: */ /* NB -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR */ /* The optimal LWORK is 2*N + N*(NB+1). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1,...,N: */ /* The QZ iteration failed. (A,B) are not in Schur */ /* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */ /* be correct for j=INFO+1,...,N. */ /* > N: errors that usually indicate LAPACK problems: */ /* =N+1: error return from DGGBAL */ /* =N+2: error return from DGEQRF */ /* =N+3: error return from DORMQR */ /* =N+4: error return from DORGQR */ /* =N+5: error return from DGGHRD */ /* =N+6: error return from DHGEQZ (other than failed */ /* iteration) */ /* =N+7: error return from DGGBAK (computing VSL) */ /* =N+8: error return from DGGBAK (computing VSR) */ /* =N+9: error return from DLASCL (various places) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alphar; --alphai; --beta; vsl_dim1 = *ldvsl; vsl_offset = 1 + vsl_dim1; vsl -= vsl_offset; vsr_dim1 = *ldvsr; vsr_offset = 1 + vsr_dim1; vsr -= vsr_offset; --work; /* Function Body */ if (_starpu_lsame_(jobvsl, "N")) { ijobvl = 1; ilvsl = FALSE_; } else if (_starpu_lsame_(jobvsl, "V")) { ijobvl = 2; ilvsl = TRUE_; } else { ijobvl = -1; ilvsl = FALSE_; } if (_starpu_lsame_(jobvsr, "N")) { ijobvr = 1; ilvsr = FALSE_; } else if (_starpu_lsame_(jobvsr, "V")) { ijobvr = 2; ilvsr = TRUE_; } else { ijobvr = -1; ilvsr = FALSE_; } /* Test the input arguments */ /* Computing MAX */ i__1 = *n << 2; lwkmin = max(i__1,1); lwkopt = lwkmin; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; *info = 0; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { *info = -12; } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { *info = -14; } else if (*lwork < lwkmin && ! lquery) { *info = -16; } if (*info == 0) { nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, n, &c_n1, &c_n1); nb2 = _starpu_ilaenv_(&c__1, "DORMQR", " ", n, n, n, &c_n1); nb3 = _starpu_ilaenv_(&c__1, "DORGQR", " ", n, n, n, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2); nb = max(i__1,nb3); lopt = (*n << 1) + *n * (nb + 1); work[1] = (doublereal) lopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEGS ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = _starpu_dlamch_("E") * _starpu_dlamch_("B"); safmin = _starpu_dlamch_("S"); smlnum = *n * safmin / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); ilascl = FALSE_; if (anrm > 0. && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { _starpu_dlascl_("G", &c_n1, &c_n1, &anrm, &anrmto, n, n, &a[a_offset], lda, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); ilbscl = FALSE_; if (bnrm > 0. && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { _starpu_dlascl_("G", &c_n1, &c_n1, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } /* Permute the matrix to make it more nearly triangular */ /* Workspace layout: (2*N words -- "work..." not actually used) */ /* left_permutation, right_permutation, work... */ ileft = 1; iright = *n + 1; iwork = iright + *n; _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ ileft], &work[iright], &work[iwork], &iinfo); if (iinfo != 0) { *info = *n + 1; goto L10; } /* Reduce B to triangular form, and initialize VSL and/or VSR */ /* Workspace layout: ("work..." must have at least N words) */ /* left_permutation, right_permutation, tau, work... */ irows = ihi + 1 - ilo; icols = *n + 1 - ilo; itau = iwork; iwork = itau + irows; i__1 = *lwork + 1 - iwork; _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 2; goto L10; } i__1 = *lwork + 1 - iwork; _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwork], &i__1, & iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 3; goto L10; } if (ilvsl) { _starpu_dlaset_("Full", n, n, &c_b36, &c_b37, &vsl[vsl_offset], ldvsl); i__1 = irows - 1; i__2 = irows - 1; _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ilo + 1 + ilo * vsl_dim1], ldvsl); i__1 = *lwork + 1 - iwork; _starpu_dorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & work[itau], &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 4; goto L10; } } if (ilvsr) { _starpu_dlaset_("Full", n, n, &c_b36, &c_b37, &vsr[vsr_offset], ldvsr); } /* Reduce to generalized Hessenberg form */ _starpu_dgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &iinfo); if (iinfo != 0) { *info = *n + 5; goto L10; } /* Perform QZ algorithm, computing Schur vectors if desired */ /* Workspace layout: ("work..." must have at least 1 word) */ /* left_permutation, right_permutation, work... */ iwork = itau; i__1 = *lwork + 1 - iwork; _starpu_dhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { if (iinfo > 0 && iinfo <= *n) { *info = iinfo; } else if (iinfo > *n && iinfo <= *n << 1) { *info = iinfo - *n; } else { *info = *n + 6; } goto L10; } /* Apply permutation to VSL and VSR */ if (ilvsl) { _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ vsl_offset], ldvsl, &iinfo); if (iinfo != 0) { *info = *n + 7; goto L10; } } if (ilvsr) { _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ vsr_offset], ldvsr, &iinfo); if (iinfo != 0) { *info = *n + 8; goto L10; } } /* Undo scaling */ if (ilascl) { _starpu_dlascl_("H", &c_n1, &c_n1, &anrmto, &anrm, n, n, &a[a_offset], lda, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } _starpu_dlascl_("G", &c_n1, &c_n1, &anrmto, &anrm, n, &c__1, &alphar[1], n, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } _starpu_dlascl_("G", &c_n1, &c_n1, &anrmto, &anrm, n, &c__1, &alphai[1], n, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } if (ilbscl) { _starpu_dlascl_("U", &c_n1, &c_n1, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } _starpu_dlascl_("G", &c_n1, &c_n1, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } L10: work[1] = (doublereal) lwkopt; return 0; /* End of DGEGS */ } /* _starpu_dgegs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgegv.c000066400000000000000000000633421507764646700205000ustar00rootroot00000000000000/* dgegv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b27 = 1.; static doublereal c_b38 = 0.; /* Subroutine */ int _starpu_dgegv_(char *jobvl, char *jobvr, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Local variables */ integer jc, nb, in, jr, nb1, nb2, nb3, ihi, ilo; doublereal eps; logical ilv; doublereal absb, anrm, bnrm; integer itau; doublereal temp; logical ilvl, ilvr; integer lopt; doublereal anrm1, anrm2, bnrm1, bnrm2, absai, scale, absar, sbeta; extern logical _starpu_lsame_(char *, char *); integer ileft, iinfo, icols, iwork, irows; extern /* Subroutine */ int _starpu_dggbak_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); doublereal salfai; extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal salfar; extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal safmax; char chtemp[1]; logical ldumma[1]; extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dtgevc_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); integer ijobvl, iright; logical ilimit; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ijobvr; extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); doublereal onepls; integer lwkmin; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This routine is deprecated and has been replaced by routine DGGEV. */ /* DGEGV computes the eigenvalues and, optionally, the left and/or right */ /* eigenvectors of a real matrix pair (A,B). */ /* Given two square matrices A and B, */ /* the generalized nonsymmetric eigenvalue problem (GNEP) is to find the */ /* eigenvalues lambda and corresponding (non-zero) eigenvectors x such */ /* that */ /* A*x = lambda*B*x. */ /* An alternate form is to find the eigenvalues mu and corresponding */ /* eigenvectors y such that */ /* mu*A*y = B*y. */ /* These two forms are equivalent with mu = 1/lambda and x = y if */ /* neither lambda nor mu is zero. In order to deal with the case that */ /* lambda or mu is zero or small, two values alpha and beta are returned */ /* for each eigenvalue, such that lambda = alpha/beta and */ /* mu = beta/alpha. */ /* The vectors x and y in the above equations are right eigenvectors of */ /* the matrix pair (A,B). Vectors u and v satisfying */ /* u**H*A = lambda*u**H*B or mu*v**H*A = v**H*B */ /* are left eigenvectors of (A,B). */ /* Note: this routine performs "full balancing" on A and B -- see */ /* "Further Details", below. */ /* Arguments */ /* ========= */ /* JOBVL (input) CHARACTER*1 */ /* = 'N': do not compute the left generalized eigenvectors; */ /* = 'V': compute the left generalized eigenvectors (returned */ /* in VL). */ /* JOBVR (input) CHARACTER*1 */ /* = 'N': do not compute the right generalized eigenvectors; */ /* = 'V': compute the right generalized eigenvectors (returned */ /* in VR). */ /* N (input) INTEGER */ /* The order of the matrices A, B, VL, and VR. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the matrix A. */ /* If JOBVL = 'V' or JOBVR = 'V', then on exit A */ /* contains the real Schur form of A from the generalized Schur */ /* factorization of the pair (A,B) after balancing. */ /* If no eigenvectors were computed, then only the diagonal */ /* blocks from the Schur form will be correct. See DGGHRD and */ /* DHGEQZ for details. */ /* LDA (input) INTEGER */ /* The leading dimension of A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, the matrix B. */ /* If JOBVL = 'V' or JOBVR = 'V', then on exit B contains the */ /* upper triangular matrix obtained from B in the generalized */ /* Schur factorization of the pair (A,B) after balancing. */ /* If no eigenvectors were computed, then only those elements of */ /* B corresponding to the diagonal blocks from the Schur form of */ /* A will be correct. See DGGHRD and DHGEQZ for details. */ /* LDB (input) INTEGER */ /* The leading dimension of B. LDB >= max(1,N). */ /* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ /* The real parts of each scalar alpha defining an eigenvalue of */ /* GNEP. */ /* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ /* The imaginary parts of each scalar alpha defining an */ /* eigenvalue of GNEP. If ALPHAI(j) is zero, then the j-th */ /* eigenvalue is real; if positive, then the j-th and */ /* (j+1)-st eigenvalues are a complex conjugate pair, with */ /* ALPHAI(j+1) = -ALPHAI(j). */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* The scalars beta that define the eigenvalues of GNEP. */ /* Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and */ /* beta = BETA(j) represent the j-th eigenvalue of the matrix */ /* pair (A,B), in one of the forms lambda = alpha/beta or */ /* mu = beta/alpha. Since either lambda or mu may overflow, */ /* they should not, in general, be computed. */ /* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ /* If JOBVL = 'V', the left eigenvectors u(j) are stored */ /* in the columns of VL, in the same order as their eigenvalues. */ /* If the j-th eigenvalue is real, then u(j) = VL(:,j). */ /* If the j-th and (j+1)-st eigenvalues form a complex conjugate */ /* pair, then */ /* u(j) = VL(:,j) + i*VL(:,j+1) */ /* and */ /* u(j+1) = VL(:,j) - i*VL(:,j+1). */ /* Each eigenvector is scaled so that its largest component has */ /* abs(real part) + abs(imag. part) = 1, except for eigenvectors */ /* corresponding to an eigenvalue with alpha = beta = 0, which */ /* are set to zero. */ /* Not referenced if JOBVL = 'N'. */ /* LDVL (input) INTEGER */ /* The leading dimension of the matrix VL. LDVL >= 1, and */ /* if JOBVL = 'V', LDVL >= N. */ /* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ /* If JOBVR = 'V', the right eigenvectors x(j) are stored */ /* in the columns of VR, in the same order as their eigenvalues. */ /* If the j-th eigenvalue is real, then x(j) = VR(:,j). */ /* If the j-th and (j+1)-st eigenvalues form a complex conjugate */ /* pair, then */ /* x(j) = VR(:,j) + i*VR(:,j+1) */ /* and */ /* x(j+1) = VR(:,j) - i*VR(:,j+1). */ /* Each eigenvector is scaled so that its largest component has */ /* abs(real part) + abs(imag. part) = 1, except for eigenvalues */ /* corresponding to an eigenvalue with alpha = beta = 0, which */ /* are set to zero. */ /* Not referenced if JOBVR = 'N'. */ /* LDVR (input) INTEGER */ /* The leading dimension of the matrix VR. LDVR >= 1, and */ /* if JOBVR = 'V', LDVR >= N. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,8*N). */ /* For good performance, LWORK must generally be larger. */ /* To compute the optimal value of LWORK, call ILAENV to get */ /* blocksizes (for DGEQRF, DORMQR, and DORGQR.) Then compute: */ /* NB -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR; */ /* The optimal LWORK is: */ /* 2*N + MAX( 6*N, N*(NB+1) ). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1,...,N: */ /* The QZ iteration failed. No eigenvectors have been */ /* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */ /* should be correct for j=INFO+1,...,N. */ /* > N: errors that usually indicate LAPACK problems: */ /* =N+1: error return from DGGBAL */ /* =N+2: error return from DGEQRF */ /* =N+3: error return from DORMQR */ /* =N+4: error return from DORGQR */ /* =N+5: error return from DGGHRD */ /* =N+6: error return from DHGEQZ (other than failed */ /* iteration) */ /* =N+7: error return from DTGEVC */ /* =N+8: error return from DGGBAK (computing VL) */ /* =N+9: error return from DGGBAK (computing VR) */ /* =N+10: error return from DLASCL (various calls) */ /* Further Details */ /* =============== */ /* Balancing */ /* --------- */ /* This driver calls DGGBAL to both permute and scale rows and columns */ /* of A and B. The permutations PL and PR are chosen so that PL*A*PR */ /* and PL*B*R will be upper triangular except for the diagonal blocks */ /* A(i:j,i:j) and B(i:j,i:j), with i and j as close together as */ /* possible. The diagonal scaling matrices DL and DR are chosen so */ /* that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to */ /* one (except for the elements that start out zero.) */ /* After the eigenvalues and eigenvectors of the balanced matrices */ /* have been computed, DGGBAK transforms the eigenvectors back to what */ /* they would have been (in perfect arithmetic) if they had not been */ /* balanced. */ /* Contents of A and B on Exit */ /* -------- -- - --- - -- ---- */ /* If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or */ /* both), then on exit the arrays A and B will contain the real Schur */ /* form[*] of the "balanced" versions of A and B. If no eigenvectors */ /* are computed, then only the diagonal blocks will be correct. */ /* [*] See DHGEQZ, DGEGS, or read the book "Matrix Computations", */ /* by Golub & van Loan, pub. by Johns Hopkins U. Press. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alphar; --alphai; --beta; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; /* Function Body */ if (_starpu_lsame_(jobvl, "N")) { ijobvl = 1; ilvl = FALSE_; } else if (_starpu_lsame_(jobvl, "V")) { ijobvl = 2; ilvl = TRUE_; } else { ijobvl = -1; ilvl = FALSE_; } if (_starpu_lsame_(jobvr, "N")) { ijobvr = 1; ilvr = FALSE_; } else if (_starpu_lsame_(jobvr, "V")) { ijobvr = 2; ilvr = TRUE_; } else { ijobvr = -1; ilvr = FALSE_; } ilv = ilvl || ilvr; /* Test the input arguments */ /* Computing MAX */ i__1 = *n << 3; lwkmin = max(i__1,1); lwkopt = lwkmin; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; *info = 0; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldvl < 1 || ilvl && *ldvl < *n) { *info = -12; } else if (*ldvr < 1 || ilvr && *ldvr < *n) { *info = -14; } else if (*lwork < lwkmin && ! lquery) { *info = -16; } if (*info == 0) { nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, n, &c_n1, &c_n1); nb2 = _starpu_ilaenv_(&c__1, "DORMQR", " ", n, n, n, &c_n1); nb3 = _starpu_ilaenv_(&c__1, "DORGQR", " ", n, n, n, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2); nb = max(i__1,nb3); /* Computing MAX */ i__1 = *n * 6, i__2 = *n * (nb + 1); lopt = (*n << 1) + max(i__1,i__2); work[1] = (doublereal) lopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEGV ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = _starpu_dlamch_("E") * _starpu_dlamch_("B"); safmin = _starpu_dlamch_("S"); safmin += safmin; safmax = 1. / safmin; onepls = eps * 4 + 1.; /* Scale A */ anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); anrm1 = anrm; anrm2 = 1.; if (anrm < 1.) { if (safmax * anrm < 1.) { anrm1 = safmin; anrm2 = safmax * anrm; } } if (anrm > 0.) { _starpu_dlascl_("G", &c_n1, &c_n1, &anrm, &c_b27, n, n, &a[a_offset], lda, & iinfo); if (iinfo != 0) { *info = *n + 10; return 0; } } /* Scale B */ bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); bnrm1 = bnrm; bnrm2 = 1.; if (bnrm < 1.) { if (safmax * bnrm < 1.) { bnrm1 = safmin; bnrm2 = safmax * bnrm; } } if (bnrm > 0.) { _starpu_dlascl_("G", &c_n1, &c_n1, &bnrm, &c_b27, n, n, &b[b_offset], ldb, & iinfo); if (iinfo != 0) { *info = *n + 10; return 0; } } /* Permute the matrix to make it more nearly triangular */ /* Workspace layout: (8*N words -- "work" requires 6*N words) */ /* left_permutation, right_permutation, work... */ ileft = 1; iright = *n + 1; iwork = iright + *n; _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ ileft], &work[iright], &work[iwork], &iinfo); if (iinfo != 0) { *info = *n + 1; goto L120; } /* Reduce B to triangular form, and initialize VL and/or VR */ /* Workspace layout: ("work..." must have at least N words) */ /* left_permutation, right_permutation, tau, work... */ irows = ihi + 1 - ilo; if (ilv) { icols = *n + 1 - ilo; } else { icols = irows; } itau = iwork; iwork = itau + irows; i__1 = *lwork + 1 - iwork; _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 2; goto L120; } i__1 = *lwork + 1 - iwork; _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwork], &i__1, & iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 3; goto L120; } if (ilvl) { _starpu_dlaset_("Full", n, n, &c_b38, &c_b27, &vl[vl_offset], ldvl) ; i__1 = irows - 1; i__2 = irows - 1; _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ilo + 1 + ilo * vl_dim1], ldvl); i__1 = *lwork + 1 - iwork; _starpu_dorgqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ itau], &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 4; goto L120; } } if (ilvr) { _starpu_dlaset_("Full", n, n, &c_b38, &c_b27, &vr[vr_offset], ldvr) ; } /* Reduce to generalized Hessenberg form */ if (ilv) { /* Eigenvectors requested -- work on whole matrix. */ _starpu_dgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &iinfo); } else { _starpu_dgghrd_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ vr_offset], ldvr, &iinfo); } if (iinfo != 0) { *info = *n + 5; goto L120; } /* Perform QZ algorithm */ /* Workspace layout: ("work..." must have at least 1 word) */ /* left_permutation, right_permutation, work... */ iwork = itau; if (ilv) { *(unsigned char *)chtemp = 'S'; } else { *(unsigned char *)chtemp = 'E'; } i__1 = *lwork + 1 - iwork; _starpu_dhgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { if (iinfo > 0 && iinfo <= *n) { *info = iinfo; } else if (iinfo > *n && iinfo <= *n << 1) { *info = iinfo - *n; } else { *info = *n + 6; } goto L120; } if (ilv) { /* Compute Eigenvectors (DTGEVC requires 6*N words of workspace) */ if (ilvl) { if (ilvr) { *(unsigned char *)chtemp = 'B'; } else { *(unsigned char *)chtemp = 'L'; } } else { *(unsigned char *)chtemp = 'R'; } _starpu_dtgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ iwork], &iinfo); if (iinfo != 0) { *info = *n + 7; goto L120; } /* Undo balancing on VL and VR, rescale */ if (ilvl) { _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, & vl[vl_offset], ldvl, &iinfo); if (iinfo != 0) { *info = *n + 8; goto L120; } i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { if (alphai[jc] < 0.) { goto L50; } temp = 0.; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__2 = temp, d__3 = (d__1 = vl[jr + jc * vl_dim1], abs(d__1)); temp = max(d__2,d__3); /* L10: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__3 = temp, d__4 = (d__1 = vl[jr + jc * vl_dim1], abs(d__1)) + (d__2 = vl[jr + (jc + 1) * vl_dim1], abs(d__2)); temp = max(d__3,d__4); /* L20: */ } } if (temp < safmin) { goto L50; } temp = 1. / temp; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vl[jr + jc * vl_dim1] *= temp; /* L30: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vl[jr + jc * vl_dim1] *= temp; vl[jr + (jc + 1) * vl_dim1] *= temp; /* L40: */ } } L50: ; } } if (ilvr) { _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, & vr[vr_offset], ldvr, &iinfo); if (iinfo != 0) { *info = *n + 9; goto L120; } i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { if (alphai[jc] < 0.) { goto L100; } temp = 0.; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__2 = temp, d__3 = (d__1 = vr[jr + jc * vr_dim1], abs(d__1)); temp = max(d__2,d__3); /* L60: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__3 = temp, d__4 = (d__1 = vr[jr + jc * vr_dim1], abs(d__1)) + (d__2 = vr[jr + (jc + 1) * vr_dim1], abs(d__2)); temp = max(d__3,d__4); /* L70: */ } } if (temp < safmin) { goto L100; } temp = 1. / temp; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr[jr + jc * vr_dim1] *= temp; /* L80: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr[jr + jc * vr_dim1] *= temp; vr[jr + (jc + 1) * vr_dim1] *= temp; /* L90: */ } } L100: ; } } /* End of eigenvector calculation */ } /* Undo scaling in alpha, beta */ /* Note: this does not give the alpha and beta for the unscaled */ /* problem. */ /* Un-scaling is limited to avoid underflow in alpha and beta */ /* if they are significant. */ i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { absar = (d__1 = alphar[jc], abs(d__1)); absai = (d__1 = alphai[jc], abs(d__1)); absb = (d__1 = beta[jc], abs(d__1)); salfar = anrm * alphar[jc]; salfai = anrm * alphai[jc]; sbeta = bnrm * beta[jc]; ilimit = FALSE_; scale = 1.; /* Check for significant underflow in ALPHAI */ /* Computing MAX */ d__1 = safmin, d__2 = eps * absar, d__1 = max(d__1,d__2), d__2 = eps * absb; if (abs(salfai) < safmin && absai >= max(d__1,d__2)) { ilimit = TRUE_; /* Computing MAX */ d__1 = onepls * safmin, d__2 = anrm2 * absai; scale = onepls * safmin / anrm1 / max(d__1,d__2); } else if (salfai == 0.) { /* If insignificant underflow in ALPHAI, then make the */ /* conjugate eigenvalue real. */ if (alphai[jc] < 0. && jc > 1) { alphai[jc - 1] = 0.; } else if (alphai[jc] > 0. && jc < *n) { alphai[jc + 1] = 0.; } } /* Check for significant underflow in ALPHAR */ /* Computing MAX */ d__1 = safmin, d__2 = eps * absai, d__1 = max(d__1,d__2), d__2 = eps * absb; if (abs(salfar) < safmin && absar >= max(d__1,d__2)) { ilimit = TRUE_; /* Computing MAX */ /* Computing MAX */ d__3 = onepls * safmin, d__4 = anrm2 * absar; d__1 = scale, d__2 = onepls * safmin / anrm1 / max(d__3,d__4); scale = max(d__1,d__2); } /* Check for significant underflow in BETA */ /* Computing MAX */ d__1 = safmin, d__2 = eps * absar, d__1 = max(d__1,d__2), d__2 = eps * absai; if (abs(sbeta) < safmin && absb >= max(d__1,d__2)) { ilimit = TRUE_; /* Computing MAX */ /* Computing MAX */ d__3 = onepls * safmin, d__4 = bnrm2 * absb; d__1 = scale, d__2 = onepls * safmin / bnrm1 / max(d__3,d__4); scale = max(d__1,d__2); } /* Check for possible overflow when limiting scaling */ if (ilimit) { /* Computing MAX */ d__1 = abs(salfar), d__2 = abs(salfai), d__1 = max(d__1,d__2), d__2 = abs(sbeta); temp = scale * safmin * max(d__1,d__2); if (temp > 1.) { scale /= temp; } if (scale < 1.) { ilimit = FALSE_; } } /* Recompute un-scaled ALPHAR, ALPHAI, BETA if necessary. */ if (ilimit) { salfar = scale * alphar[jc] * anrm; salfai = scale * alphai[jc] * anrm; sbeta = scale * beta[jc] * bnrm; } alphar[jc] = salfar; alphai[jc] = salfai; beta[jc] = sbeta; /* L110: */ } L120: work[1] = (doublereal) lwkopt; return 0; /* End of DGEGV */ } /* _starpu_dgegv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgehd2.c000066400000000000000000000136551507764646700205430ustar00rootroot00000000000000/* dgehd2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgehd2_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__; doublereal aii; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEHD2 reduces a real general matrix A to upper Hessenberg form H by */ /* an orthogonal similarity transformation: Q' * A * Q = H . */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* It is assumed that A is already upper triangular in rows */ /* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ /* set by a previous call to DGEBAL; otherwise they should be */ /* set to 1 and N respectively. See Further Details. */ /* 1 <= ILO <= IHI <= max(1,N). */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the n by n general matrix to be reduced. */ /* On exit, the upper triangle and the first subdiagonal of A */ /* are overwritten with the upper Hessenberg matrix H, and the */ /* elements below the first subdiagonal, with the array TAU, */ /* represent the orthogonal matrix Q as a product of elementary */ /* reflectors. See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of (ihi-ilo) elementary */ /* reflectors */ /* Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */ /* exit in A(i+2:ihi,i), and tau in TAU(i). */ /* The contents of A are illustrated by the following example, with */ /* n = 7, ilo = 2 and ihi = 6: */ /* on entry, on exit, */ /* ( a a a a a a a ) ( a a h h h h a ) */ /* ( a a a a a a ) ( a h h h h a ) */ /* ( a a a a a a ) ( h h h h h h ) */ /* ( a a a a a a ) ( v2 h h h h h ) */ /* ( a a a a a a ) ( v2 v3 h h h h ) */ /* ( a a a a a a ) ( v2 v3 v4 h h h ) */ /* ( a ) ( a ) */ /* where a denotes an element of the original matrix A, h denotes a */ /* modified element of the upper Hessenberg matrix H, and vi denotes an */ /* element of the vector defining H(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -2; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEHD2", &i__1); return 0; } i__1 = *ihi - 1; for (i__ = *ilo; i__ <= i__1; ++i__) { /* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ i__2 = *ihi - i__; /* Computing MIN */ i__3 = i__ + 2; _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n)+ i__ * a_dim1], &c__1, &tau[i__]); aii = a[i__ + 1 + i__ * a_dim1]; a[i__ + 1 + i__ * a_dim1] = 1.; /* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ i__2 = *ihi - i__; _starpu_dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); /* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ i__2 = *ihi - i__; i__3 = *n - i__; _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); a[i__ + 1 + i__ * a_dim1] = aii; /* L10: */ } return 0; /* End of DGEHD2 */ } /* _starpu_dgehd2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgehrd.c000066400000000000000000000256671507764646700206510ustar00rootroot00000000000000/* dgehrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; static integer c__65 = 65; static doublereal c_b25 = -1.; static doublereal c_b26 = 1.; /* Subroutine */ int _starpu_dgehrd_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j; doublereal t[4160] /* was [65][64] */; integer ib; doublereal ei; integer nb, nh, nx, iws; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer nbmin, iinfo; extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_( integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dgehd2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlahr2_( integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEHRD reduces a real general matrix A to upper Hessenberg form H by */ /* an orthogonal similarity transformation: Q' * A * Q = H . */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* It is assumed that A is already upper triangular in rows */ /* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ /* set by a previous call to DGEBAL; otherwise they should be */ /* set to 1 and N respectively. See Further Details. */ /* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N general matrix to be reduced. */ /* On exit, the upper triangle and the first subdiagonal of A */ /* are overwritten with the upper Hessenberg matrix H, and the */ /* elements below the first subdiagonal, with the array TAU, */ /* represent the orthogonal matrix Q as a product of elementary */ /* reflectors. See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */ /* zero. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK >= max(1,N). */ /* For optimum performance LWORK >= N*NB, where NB is the */ /* optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of (ihi-ilo) elementary */ /* reflectors */ /* Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */ /* exit in A(i+2:ihi,i), and tau in TAU(i). */ /* The contents of A are illustrated by the following example, with */ /* n = 7, ilo = 2 and ihi = 6: */ /* on entry, on exit, */ /* ( a a a a a a a ) ( a a h h h h a ) */ /* ( a a a a a a ) ( a h h h h a ) */ /* ( a a a a a a ) ( h h h h h h ) */ /* ( a a a a a a ) ( v2 h h h h h ) */ /* ( a a a a a a ) ( v2 v3 h h h h ) */ /* ( a a a a a a ) ( v2 v3 v4 h h h ) */ /* ( a ) ( a ) */ /* where a denotes an element of the original matrix A, h denotes a */ /* modified element of the upper Hessenberg matrix H, and vi denotes an */ /* element of the vector defining H(i). */ /* This file is a slight modification of LAPACK-3.0's DGEHRD */ /* subroutine incorporating improvements proposed by Quintana-Orti and */ /* Van de Geijn (2005). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; /* Computing MIN */ i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1); nb = min(i__1,i__2); lwkopt = *n * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*n < 0) { *info = -1; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -2; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*lwork < max(1,*n) && ! lquery) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEHRD", &i__1); return 0; } else if (lquery) { return 0; } /* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ i__1 = *ilo - 1; for (i__ = 1; i__ <= i__1; ++i__) { tau[i__] = 0.; /* L10: */ } i__1 = *n - 1; for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { tau[i__] = 0.; /* L20: */ } /* Quick return if possible */ nh = *ihi - *ilo + 1; if (nh <= 1) { work[1] = 1.; return 0; } /* Determine the block size */ /* Computing MIN */ i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1); nb = min(i__1,i__2); nbmin = 2; iws = 1; if (nb > 1 && nb < nh) { /* Determine when to cross over from blocked to unblocked code */ /* (last block is always handled by unblocked code) */ /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1); nx = max(i__1,i__2); if (nx < nh) { /* Determine if workspace is large enough for blocked code */ iws = *n * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: determine the */ /* minimum value of NB, and reduce NB or force use of */ /* unblocked code */ /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, & c_n1); nbmin = max(i__1,i__2); if (*lwork >= *n * nbmin) { nb = *lwork / *n; } else { nb = 1; } } } } ldwork = *n; if (nb < nbmin || nb >= nh) { /* Use unblocked code below */ i__ = *ilo; } else { /* Use blocked code */ i__1 = *ihi - 1 - nx; i__2 = nb; for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = nb, i__4 = *ihi - i__; ib = min(i__3,i__4); /* Reduce columns i:i+ib-1 to Hessenberg form, returning the */ /* matrices V and T of the block reflector H = I - V*T*V' */ /* which performs the reduction, and also the matrix Y = A*V*T */ _starpu_dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & c__65, &work[1], &ldwork); /* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */ /* right, computing A := A - Y * V'. V(i+ib,ib-1) must be set */ /* to 1 */ ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.; i__3 = *ihi - i__ - ib + 1; _starpu_dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, & work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & c_b26, &a[(i__ + ib) * a_dim1 + 1], lda); a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; /* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */ /* right */ i__3 = ib - 1; _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26, &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork); i__3 = ib - 2; for (j = 0; j <= i__3; ++j) { _starpu_daxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + j + 1) * a_dim1 + 1], &c__1); /* L30: */ } /* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */ /* left */ i__3 = *ihi - i__; i__4 = *n - i__ - ib + 1; _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[ i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork); /* L40: */ } } /* Use unblocked code to reduce the rest of the matrix */ _starpu_dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); work[1] = (doublereal) iws; return 0; /* End of DGEHRD */ } /* _starpu_dgehrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgejsv.c000066400000000000000000002336171507764646700206720ustar00rootroot00000000000000/* dgejsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b34 = 0.; static doublereal c_b35 = 1.; static integer c__0 = 0; static integer c_n1 = -1; /* Subroutine */ int _starpu_dgejsv_(char *joba, char *jobu, char *jobv, char *jobr, char *jobt, char *jobp, integer *m, integer *n, doublereal *a, integer *lda, doublereal *sva, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *work, integer *lwork, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal), log(doublereal), d_sign(doublereal *, doublereal *); integer i_dnnt(doublereal *); /* Local variables */ integer p, q, n1, nr; doublereal big, xsc, big1; logical defr; doublereal aapp, aaqq; logical kill; integer ierr; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); doublereal temp1; logical jracc; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); doublereal small, entra, sfmin; logical lsvec; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal epsln; logical rsvec; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical l2aber; extern /* Subroutine */ int _starpu_dgeqp3_(integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *, integer *); doublereal condr1, condr2, uscal1, uscal2; logical l2kill, l2rank, l2tran, l2pert; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); doublereal scalem; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal sconda; logical goscal; doublereal aatmin; extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); doublereal aatmax; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); logical noscal; extern /* Subroutine */ int _starpu_dpocon_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dgesvj_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *), _starpu_dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); doublereal entrat; logical almort; extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal maxprj; logical errest; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); logical transp, rowpiv; doublereal cond_ok__; integer warning, numrank; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Zlatko Drmac of the University of Zagreb and -- */ /* -- Kresimir Veselic of the Fernuniversitaet Hagen -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* This routine is also part of SIGMA (version 1.23, October 23. 2008.) */ /* SIGMA is a library of algorithms for highly accurate algorithms for */ /* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the */ /* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0. */ /* -#- Scalar Arguments -#- */ /* -#- Array Arguments -#- */ /* .. */ /* Purpose */ /* ~~~~~~~ */ /* DGEJSV computes the singular value decomposition (SVD) of a real M-by-N */ /* matrix [A], where M >= N. The SVD of [A] is written as */ /* [A] = [U] * [SIGMA] * [V]^t, */ /* where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N */ /* diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and */ /* [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are */ /* the singular values of [A]. The columns of [U] and [V] are the left and */ /* the right singular vectors of [A], respectively. The matrices [U] and [V] */ /* are computed and stored in the arrays U and V, respectively. The diagonal */ /* of [SIGMA] is computed and stored in the array SVA. */ /* Further details */ /* ~~~~~~~~~~~~~~~ */ /* DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses SGEQP3, */ /* SGEQRF, and SGELQF as preprocessors and preconditioners. Optionally, an */ /* additional row pivoting can be used as a preprocessor, which in some */ /* cases results in much higher accuracy. An example is matrix A with the */ /* structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned */ /* diagonal matrices and C is well-conditioned matrix. In that case, complete */ /* pivoting in the first QR factorizations provides accuracy dependent on the */ /* condition number of C, and independent of D1, D2. Such higher accuracy is */ /* not completely understood theoretically, but it works well in practice. */ /* Further, if A can be written as A = B*D, with well-conditioned B and some */ /* diagonal D, then the high accuracy is guaranteed, both theoretically and */ /* in software, independent of D. For more details see [1], [2]. */ /* The computational range for the singular values can be the full range */ /* ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS */ /* & LAPACK routines called by DGEJSV are implemented to work in that range. */ /* If that is not the case, then the restriction for safe computation with */ /* the singular values in the range of normalized IEEE numbers is that the */ /* spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not */ /* overflow. This code (DGEJSV) is best used in this restricted range, */ /* meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are */ /* returned as zeros. See JOBR for details on this. */ /* Further, this implementation is somewhat slower than the one described */ /* in [1,2] due to replacement of some non-LAPACK components, and because */ /* the choice of some tuning parameters in the iterative part (DGESVJ) is */ /* left to the implementer on a particular machine. */ /* The rank revealing QR factorization (in this code: SGEQP3) should be */ /* implemented as in [3]. We have a new version of SGEQP3 under development */ /* that is more robust than the current one in LAPACK, with a cleaner cut in */ /* rank defficient cases. It will be available in the SIGMA library [4]. */ /* If M is much larger than N, it is obvious that the inital QRF with */ /* column pivoting can be preprocessed by the QRF without pivoting. That */ /* well known trick is not used in DGEJSV because in some cases heavy row */ /* weighting can be treated with complete pivoting. The overhead in cases */ /* M much larger than N is then only due to pivoting, but the benefits in */ /* terms of accuracy have prevailed. The implementer/user can incorporate */ /* this extra QRF step easily. The implementer can also improve data movement */ /* (matrix transpose, matrix copy, matrix transposed copy) - this */ /* implementation of DGEJSV uses only the simplest, naive data movement. */ /* Contributors */ /* ~~~~~~~~~~~~ */ /* Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ /* References */ /* ~~~~~~~~~~ */ /* [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */ /* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */ /* LAPACK Working note 169. */ /* [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */ /* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */ /* LAPACK Working note 170. */ /* [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR */ /* factorization software - a case study. */ /* ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. */ /* LAPACK Working note 176. */ /* [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */ /* QSVD, (H,K)-SVD computations. */ /* Department of Mathematics, University of Zagreb, 2008. */ /* Bugs, examples and comments */ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Please report all bugs and send interesting examples and/or comments to */ /* drmac@math.hr. Thank you. */ /* Arguments */ /* ~~~~~~~~~ */ /* ............................................................................ */ /* . JOBA (input) CHARACTER*1 */ /* . Specifies the level of accuracy: */ /* . = 'C': This option works well (high relative accuracy) if A = B * D, */ /* . with well-conditioned B and arbitrary diagonal matrix D. */ /* . The accuracy cannot be spoiled by COLUMN scaling. The */ /* . accuracy of the computed output depends on the condition of */ /* . B, and the procedure aims at the best theoretical accuracy. */ /* . The relative error max_{i=1:N}|d sigma_i| / sigma_i is */ /* . bounded by f(M,N)*epsilon* cond(B), independent of D. */ /* . The input matrix is preprocessed with the QRF with column */ /* . pivoting. This initial preprocessing and preconditioning by */ /* . a rank revealing QR factorization is common for all values of */ /* . JOBA. Additional actions are specified as follows: */ /* . = 'E': Computation as with 'C' with an additional estimate of the */ /* . condition number of B. It provides a realistic error bound. */ /* . = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings */ /* . D1, D2, and well-conditioned matrix C, this option gives */ /* . higher accuracy than the 'C' option. If the structure of the */ /* . input matrix is not known, and relative accuracy is */ /* . desirable, then this option is advisable. The input matrix A */ /* . is preprocessed with QR factorization with FULL (row and */ /* . column) pivoting. */ /* . = 'G' Computation as with 'F' with an additional estimate of the */ /* . condition number of B, where A=D*B. If A has heavily weighted */ /* . rows, then using this condition number gives too pessimistic */ /* . error bound. */ /* . = 'A': Small singular values are the noise and the matrix is treated */ /* . as numerically rank defficient. The error in the computed */ /* . singular values is bounded by f(m,n)*epsilon*||A||. */ /* . The computed SVD A = U * S * V^t restores A up to */ /* . f(m,n)*epsilon*||A||. */ /* . This gives the procedure the licence to discard (set to zero) */ /* . all singular values below N*epsilon*||A||. */ /* . = 'R': Similar as in 'A'. Rank revealing property of the initial */ /* . QR factorization is used do reveal (using triangular factor) */ /* . a gap sigma_{r+1} < epsilon * sigma_r in which case the */ /* . numerical RANK is declared to be r. The SVD is computed with */ /* . absolute error bounds, but more accurately than with 'A'. */ /* . */ /* . JOBU (input) CHARACTER*1 */ /* . Specifies whether to compute the columns of U: */ /* . = 'U': N columns of U are returned in the array U. */ /* . = 'F': full set of M left sing. vectors is returned in the array U. */ /* . = 'W': U may be used as workspace of length M*N. See the description */ /* . of U. */ /* . = 'N': U is not computed. */ /* . */ /* . JOBV (input) CHARACTER*1 */ /* . Specifies whether to compute the matrix V: */ /* . = 'V': N columns of V are returned in the array V; Jacobi rotations */ /* . are not explicitly accumulated. */ /* . = 'J': N columns of V are returned in the array V, but they are */ /* . computed as the product of Jacobi rotations. This option is */ /* . allowed only if JOBU .NE. 'N', i.e. in computing the full SVD. */ /* . = 'W': V may be used as workspace of length N*N. See the description */ /* . of V. */ /* . = 'N': V is not computed. */ /* . */ /* . JOBR (input) CHARACTER*1 */ /* . Specifies the RANGE for the singular values. Issues the licence to */ /* . set to zero small positive singular values if they are outside */ /* . specified range. If A .NE. 0 is scaled so that the largest singular */ /* . value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues */ /* . the licence to kill columns of A whose norm in c*A is less than */ /* . DSQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, */ /* . where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). */ /* . = 'N': Do not kill small columns of c*A. This option assumes that */ /* . BLAS and QR factorizations and triangular solvers are */ /* . implemented to work in that range. If the condition of A */ /* . is greater than BIG, use DGESVJ. */ /* . = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)] */ /* . (roughly, as described above). This option is recommended. */ /* . ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* . For computing the singular values in the FULL range [SFMIN,BIG] */ /* . use DGESVJ. */ /* . */ /* . JOBT (input) CHARACTER*1 */ /* . If the matrix is square then the procedure may determine to use */ /* . transposed A if A^t seems to be better with respect to convergence. */ /* . If the matrix is not square, JOBT is ignored. This is subject to */ /* . changes in the future. */ /* . The decision is based on two values of entropy over the adjoint */ /* . orbit of A^t * A. See the descriptions of WORK(6) and WORK(7). */ /* . = 'T': transpose if entropy test indicates possibly faster */ /* . convergence of Jacobi process if A^t is taken as input. If A is */ /* . replaced with A^t, then the row pivoting is included automatically. */ /* . = 'N': do not speculate. */ /* . This option can be used to compute only the singular values, or the */ /* . full SVD (U, SIGMA and V). For only one set of singular vectors */ /* . (U or V), the caller should provide both U and V, as one of the */ /* . matrices is used as workspace if the matrix A is transposed. */ /* . The implementer can easily remove this constraint and make the */ /* . code more complicated. See the descriptions of U and V. */ /* . */ /* . JOBP (input) CHARACTER*1 */ /* . Issues the licence to introduce structured perturbations to drown */ /* . denormalized numbers. This licence should be active if the */ /* . denormals are poorly implemented, causing slow computation, */ /* . especially in cases of fast convergence (!). For details see [1,2]. */ /* . For the sake of simplicity, this perturbations are included only */ /* . when the full SVD or only the singular values are requested. The */ /* . implementer/user can easily add the perturbation for the cases of */ /* . computing one set of singular vectors. */ /* . = 'P': introduce perturbation */ /* . = 'N': do not perturb */ /* ............................................................................ */ /* M (input) INTEGER */ /* The number of rows of the input matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the input matrix A. M >= N >= 0. */ /* A (input/workspace) REAL array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* SVA (workspace/output) REAL array, dimension (N) */ /* On exit, */ /* - For WORK(1)/WORK(2) = ONE: The singular values of A. During the */ /* computation SVA contains Euclidean column norms of the */ /* iterated matrices in the array A. */ /* - For WORK(1) .NE. WORK(2): The singular values of A are */ /* (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if */ /* sigma_max(A) overflows or if small singular values have been */ /* saved from underflow by scaling the input matrix A. */ /* - If JOBR='R' then some of the singular values may be returned */ /* as exact zeros obtained by "set to zero" because they are */ /* below the numerical rank threshold or are denormalized numbers. */ /* U (workspace/output) REAL array, dimension ( LDU, N ) */ /* If JOBU = 'U', then U contains on exit the M-by-N matrix of */ /* the left singular vectors. */ /* If JOBU = 'F', then U contains on exit the M-by-M matrix of */ /* the left singular vectors, including an ONB */ /* of the orthogonal complement of the Range(A). */ /* If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), */ /* then U is used as workspace if the procedure */ /* replaces A with A^t. In that case, [V] is computed */ /* in U as left singular vectors of A^t and then */ /* copied back to the V array. This 'W' option is just */ /* a reminder to the caller that in this case U is */ /* reserved as workspace of length N*N. */ /* If JOBU = 'N' U is not referenced. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U, LDU >= 1. */ /* IF JOBU = 'U' or 'F' or 'W', then LDU >= M. */ /* V (workspace/output) REAL array, dimension ( LDV, N ) */ /* If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of */ /* the right singular vectors; */ /* If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), */ /* then V is used as workspace if the pprocedure */ /* replaces A with A^t. In that case, [U] is computed */ /* in V as right singular vectors of A^t and then */ /* copied back to the U array. This 'W' option is just */ /* a reminder to the caller that in this case V is */ /* reserved as workspace of length N*N. */ /* If JOBV = 'N' V is not referenced. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V, LDV >= 1. */ /* If JOBV = 'V' or 'J' or 'W', then LDV >= N. */ /* WORK (workspace/output) REAL array, dimension at least LWORK. */ /* On exit, */ /* WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such */ /* that SCALE*SVA(1:N) are the computed singular values */ /* of A. (See the description of SVA().) */ /* WORK(2) = See the description of WORK(1). */ /* WORK(3) = SCONDA is an estimate for the condition number of */ /* column equilibrated A. (If JOBA .EQ. 'E' or 'G') */ /* SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). */ /* It is computed using DPOCON. It holds */ /* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ /* where R is the triangular factor from the QRF of A. */ /* However, if R is truncated and the numerical rank is */ /* determined to be strictly smaller than N, SCONDA is */ /* returned as -1, thus indicating that the smallest */ /* singular values might be lost. */ /* If full SVD is needed, the following two condition numbers are */ /* useful for the analysis of the algorithm. They are provied for */ /* a developer/implementer who is familiar with the details of */ /* the method. */ /* WORK(4) = an estimate of the scaled condition number of the */ /* triangular factor in the first QR factorization. */ /* WORK(5) = an estimate of the scaled condition number of the */ /* triangular factor in the second QR factorization. */ /* The following two parameters are computed if JOBT .EQ. 'T'. */ /* They are provided for a developer/implementer who is familiar */ /* with the details of the method. */ /* WORK(6) = the entropy of A^t*A :: this is the Shannon entropy */ /* of diag(A^t*A) / Trace(A^t*A) taken as point in the */ /* probability simplex. */ /* WORK(7) = the entropy of A*A^t. */ /* LWORK (input) INTEGER */ /* Length of WORK to confirm proper allocation of work space. */ /* LWORK depends on the job: */ /* If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and */ /* -> .. no scaled condition estimate required ( JOBE.EQ.'N'): */ /* LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement. */ /* For optimal performance (blocked code) the optimal value */ /* is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal */ /* block size for xGEQP3/xGEQRF. */ /* -> .. an estimate of the scaled condition number of A is */ /* required (JOBA='E', 'G'). In this case, LWORK is the maximum */ /* of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4N,7). */ /* If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), */ /* -> the minimal requirement is LWORK >= max(2*N+M,7). */ /* -> For optimal performance, LWORK >= max(2*N+M,2*N+N*NB,7), */ /* where NB is the optimal block size. */ /* If SIGMA and the left singular vectors are needed */ /* -> the minimal requirement is LWORK >= max(2*N+M,7). */ /* -> For optimal performance, LWORK >= max(2*N+M,2*N+N*NB,7), */ /* where NB is the optimal block size. */ /* If full SVD is needed ( JOBU.EQ.'U' or 'F', JOBV.EQ.'V' ) and */ /* -> .. the singular vectors are computed without explicit */ /* accumulation of the Jacobi rotations, LWORK >= 6*N+2*N*N */ /* -> .. in the iterative part, the Jacobi rotations are */ /* explicitly accumulated (option, see the description of JOBV), */ /* then the minimal requirement is LWORK >= max(M+3*N+N*N,7). */ /* For better performance, if NB is the optimal block size, */ /* LWORK >= max(3*N+N*N+M,3*N+N*N+N*NB,7). */ /* IWORK (workspace/output) INTEGER array, dimension M+3*N. */ /* On exit, */ /* IWORK(1) = the numerical rank determined after the initial */ /* QR factorization with pivoting. See the descriptions */ /* of JOBA and JOBR. */ /* IWORK(2) = the number of the computed nonzero singular values */ /* IWORK(3) = if nonzero, a warning message: */ /* If IWORK(3).EQ.1 then some of the column norms of A */ /* were denormalized floats. The requested high accuracy */ /* is not warranted by the data. */ /* INFO (output) INTEGER */ /* < 0 : if INFO = -i, then the i-th argument had an illegal value. */ /* = 0 : successfull exit; */ /* > 0 : DGEJSV did not converge in the maximal allowed number */ /* of sweeps. The computed values may be inaccurate. */ /* ............................................................................ */ /* Local Parameters: */ /* Local Scalars: */ /* Intrinsic Functions: */ /* External Functions: */ /* External Subroutines ( BLAS, LAPACK ): */ /* ............................................................................ */ /* Test the input arguments */ /* Parameter adjustments */ --sva; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --work; --iwork; /* Function Body */ lsvec = _starpu_lsame_(jobu, "U") || _starpu_lsame_(jobu, "F"); jracc = _starpu_lsame_(jobv, "J"); rsvec = _starpu_lsame_(jobv, "V") || jracc; rowpiv = _starpu_lsame_(joba, "F") || _starpu_lsame_(joba, "G"); l2rank = _starpu_lsame_(joba, "R"); l2aber = _starpu_lsame_(joba, "A"); errest = _starpu_lsame_(joba, "E") || _starpu_lsame_(joba, "G"); l2tran = _starpu_lsame_(jobt, "T"); l2kill = _starpu_lsame_(jobr, "R"); defr = _starpu_lsame_(jobr, "N"); l2pert = _starpu_lsame_(jobp, "P"); if (! (rowpiv || l2rank || l2aber || errest || _starpu_lsame_(joba, "C"))) { *info = -1; } else if (! (lsvec || _starpu_lsame_(jobu, "N") || _starpu_lsame_( jobu, "W"))) { *info = -2; } else if (! (rsvec || _starpu_lsame_(jobv, "N") || _starpu_lsame_( jobv, "W")) || jracc && ! lsvec) { *info = -3; } else if (! (l2kill || defr)) { *info = -4; } else if (! (l2tran || _starpu_lsame_(jobt, "N"))) { *info = -5; } else if (! (l2pert || _starpu_lsame_(jobp, "N"))) { *info = -6; } else if (*m < 0) { *info = -7; } else if (*n < 0 || *n > *m) { *info = -8; } else if (*lda < *m) { *info = -10; } else if (lsvec && *ldu < *m) { *info = -13; } else if (rsvec && *ldv < *n) { *info = -14; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 7, i__2 = (*n << 2) + 1, i__1 = max(i__1,i__2), i__2 = (*m << 1) + *n; /* Computing MAX */ i__3 = 7, i__4 = (*n << 2) + *n * *n, i__3 = max(i__3,i__4), i__4 = (* m << 1) + *n; /* Computing MAX */ i__5 = 7, i__6 = (*n << 1) + *m; /* Computing MAX */ i__7 = 7, i__8 = (*n << 1) + *m; /* Computing MAX */ i__9 = 7, i__10 = *m + *n * 3 + *n * *n; if (! (lsvec || rsvec || errest) && *lwork < max(i__1,i__2) || ! ( lsvec || lsvec) && errest && *lwork < max(i__3,i__4) || lsvec && ! rsvec && *lwork < max(i__5,i__6) || rsvec && ! lsvec && * lwork < max(i__7,i__8) || lsvec && rsvec && ! jracc && *lwork < *n * 6 + (*n << 1) * *n || lsvec && rsvec && jracc && * lwork < max(i__9,i__10)) { *info = -17; } else { /* #:) */ *info = 0; } } if (*info != 0) { /* #:( */ i__1 = -(*info); _starpu_xerbla_("DGEJSV", &i__1); } /* Quick return for void matrix (Y3K safe) */ /* #:) */ if (*m == 0 || *n == 0) { return 0; } /* Determine whether the matrix U should be M x N or M x M */ if (lsvec) { n1 = *n; if (_starpu_lsame_(jobu, "F")) { n1 = *m; } } /* Set numerical parameters */ /* ! NOTE: Make sure DLAMCH() does not fail on the target architecture. */ epsln = _starpu_dlamch_("Epsilon"); sfmin = _starpu_dlamch_("SafeMinimum"); small = sfmin / epsln; big = _starpu_dlamch_("O"); /* BIG = ONE / SFMIN */ /* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N */ /* (!) If necessary, scale SVA() to protect the largest norm from */ /* overflow. It is possible that this scaling pushes the smallest */ /* column norm left from the underflow threshold (extreme case). */ scalem = 1. / sqrt((doublereal) (*m) * (doublereal) (*n)); noscal = TRUE_; goscal = TRUE_; i__1 = *n; for (p = 1; p <= i__1; ++p) { aapp = 0.; aaqq = 0.; _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); if (aapp > big) { *info = -9; i__2 = -(*info); _starpu_xerbla_("DGEJSV", &i__2); return 0; } aaqq = sqrt(aaqq); if (aapp < big / aaqq && noscal) { sva[p] = aapp * aaqq; } else { noscal = FALSE_; sva[p] = aapp * (aaqq * scalem); if (goscal) { goscal = FALSE_; i__2 = p - 1; _starpu_dscal_(&i__2, &scalem, &sva[1], &c__1); } } /* L1874: */ } if (noscal) { scalem = 1.; } aapp = 0.; aaqq = big; i__1 = *n; for (p = 1; p <= i__1; ++p) { /* Computing MAX */ d__1 = aapp, d__2 = sva[p]; aapp = max(d__1,d__2); if (sva[p] != 0.) { /* Computing MIN */ d__1 = aaqq, d__2 = sva[p]; aaqq = min(d__1,d__2); } /* L4781: */ } /* Quick return for zero M x N matrix */ /* #:) */ if (aapp == 0.) { if (lsvec) { _starpu_dlaset_("G", m, &n1, &c_b34, &c_b35, &u[u_offset], ldu) ; } if (rsvec) { _starpu_dlaset_("G", n, n, &c_b34, &c_b35, &v[v_offset], ldv); } work[1] = 1.; work[2] = 1.; if (errest) { work[3] = 1.; } if (lsvec && rsvec) { work[4] = 1.; work[5] = 1.; } if (l2tran) { work[6] = 0.; work[7] = 0.; } iwork[1] = 0; iwork[2] = 0; return 0; } /* Issue warning if denormalized column norms detected. Override the */ /* high relative accuracy request. Issue licence to kill columns */ /* (set them to zero) whose norm is less than sigma_max / BIG (roughly). */ /* #:( */ warning = 0; if (aaqq <= sfmin) { l2rank = TRUE_; l2kill = TRUE_; warning = 1; } /* Quick return for one-column matrix */ /* #:) */ if (*n == 1) { if (lsvec) { _starpu_dlascl_("G", &c__0, &c__0, &sva[1], &scalem, m, &c__1, &a[a_dim1 + 1], lda, &ierr); _starpu_dlacpy_("A", m, &c__1, &a[a_offset], lda, &u[u_offset], ldu); /* computing all M left singular vectors of the M x 1 matrix */ if (n1 != *n) { i__1 = *lwork - *n; _starpu_dgeqrf_(m, n, &u[u_offset], ldu, &work[1], &work[*n + 1], & i__1, &ierr); i__1 = *lwork - *n; _starpu_dorgqr_(m, &n1, &c__1, &u[u_offset], ldu, &work[1], &work[*n + 1], &i__1, &ierr); _starpu_dcopy_(m, &a[a_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); } } if (rsvec) { v[v_dim1 + 1] = 1.; } if (sva[1] < big * scalem) { sva[1] /= scalem; scalem = 1.; } work[1] = 1. / scalem; work[2] = 1.; if (sva[1] != 0.) { iwork[1] = 1; if (sva[1] / scalem >= sfmin) { iwork[2] = 1; } else { iwork[2] = 0; } } else { iwork[1] = 0; iwork[2] = 0; } if (errest) { work[3] = 1.; } if (lsvec && rsvec) { work[4] = 1.; work[5] = 1.; } if (l2tran) { work[6] = 0.; work[7] = 0.; } return 0; } transp = FALSE_; l2tran = l2tran && *m == *n; aatmax = -1.; aatmin = big; if (rowpiv || l2tran) { /* Compute the row norms, needed to determine row pivoting sequence */ /* (in the case of heavily row weighted A, row pivoting is strongly */ /* advised) and to collect information needed to compare the */ /* structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.). */ if (l2tran) { i__1 = *m; for (p = 1; p <= i__1; ++p) { xsc = 0.; temp1 = 0.; _starpu_dlassq_(n, &a[p + a_dim1], lda, &xsc, &temp1); /* DLASSQ gets both the ell_2 and the ell_infinity norm */ /* in one pass through the vector */ work[*m + *n + p] = xsc * scalem; work[*n + p] = xsc * (scalem * sqrt(temp1)); /* Computing MAX */ d__1 = aatmax, d__2 = work[*n + p]; aatmax = max(d__1,d__2); if (work[*n + p] != 0.) { /* Computing MIN */ d__1 = aatmin, d__2 = work[*n + p]; aatmin = min(d__1,d__2); } /* L1950: */ } } else { i__1 = *m; for (p = 1; p <= i__1; ++p) { work[*m + *n + p] = scalem * (d__1 = a[p + _starpu_idamax_(n, &a[p + a_dim1], lda) * a_dim1], abs(d__1)); /* Computing MAX */ d__1 = aatmax, d__2 = work[*m + *n + p]; aatmax = max(d__1,d__2); /* Computing MIN */ d__1 = aatmin, d__2 = work[*m + *n + p]; aatmin = min(d__1,d__2); /* L1904: */ } } } /* For square matrix A try to determine whether A^t would be better */ /* input for the preconditioned Jacobi SVD, with faster convergence. */ /* The decision is based on an O(N) function of the vector of column */ /* and row norms of A, based on the Shannon entropy. This should give */ /* the right choice in most cases when the difference actually matters. */ /* It may fail and pick the slower converging side. */ entra = 0.; entrat = 0.; if (l2tran) { xsc = 0.; temp1 = 0.; _starpu_dlassq_(n, &sva[1], &c__1, &xsc, &temp1); temp1 = 1. / temp1; entra = 0.; i__1 = *n; for (p = 1; p <= i__1; ++p) { /* Computing 2nd power */ d__1 = sva[p] / xsc; big1 = d__1 * d__1 * temp1; if (big1 != 0.) { entra += big1 * log(big1); } /* L1113: */ } entra = -entra / log((doublereal) (*n)); /* Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex. */ /* It is derived from the diagonal of A^t * A. Do the same with the */ /* diagonal of A * A^t, compute the entropy of the corresponding */ /* probability distribution. Note that A * A^t and A^t * A have the */ /* same trace. */ entrat = 0.; i__1 = *n + *m; for (p = *n + 1; p <= i__1; ++p) { /* Computing 2nd power */ d__1 = work[p] / xsc; big1 = d__1 * d__1 * temp1; if (big1 != 0.) { entrat += big1 * log(big1); } /* L1114: */ } entrat = -entrat / log((doublereal) (*m)); /* Analyze the entropies and decide A or A^t. Smaller entropy */ /* usually means better input for the algorithm. */ transp = entrat < entra; /* If A^t is better than A, transpose A. */ if (transp) { /* In an optimal implementation, this trivial transpose */ /* should be replaced with faster transpose. */ i__1 = *n - 1; for (p = 1; p <= i__1; ++p) { i__2 = *n; for (q = p + 1; q <= i__2; ++q) { temp1 = a[q + p * a_dim1]; a[q + p * a_dim1] = a[p + q * a_dim1]; a[p + q * a_dim1] = temp1; /* L1116: */ } /* L1115: */ } i__1 = *n; for (p = 1; p <= i__1; ++p) { work[*m + *n + p] = sva[p]; sva[p] = work[*n + p]; /* L1117: */ } temp1 = aapp; aapp = aatmax; aatmax = temp1; temp1 = aaqq; aaqq = aatmin; aatmin = temp1; kill = lsvec; lsvec = rsvec; rsvec = kill; rowpiv = TRUE_; } } /* END IF L2TRAN */ /* Scale the matrix so that its maximal singular value remains less */ /* than DSQRT(BIG) -- the matrix is scaled so that its maximal column */ /* has Euclidean norm equal to DSQRT(BIG/N). The only reason to keep */ /* DSQRT(BIG) instead of BIG is the fact that DGEJSV uses LAPACK and */ /* BLAS routines that, in some implementations, are not capable of */ /* working in the full interval [SFMIN,BIG] and that they may provoke */ /* overflows in the intermediate results. If the singular values spread */ /* from SFMIN to BIG, then DGESVJ will compute them. So, in that case, */ /* one should use DGESVJ instead of DGEJSV. */ big1 = sqrt(big); temp1 = sqrt(big / (doublereal) (*n)); _starpu_dlascl_("G", &c__0, &c__0, &aapp, &temp1, n, &c__1, &sva[1], n, &ierr); if (aaqq > aapp * sfmin) { aaqq = aaqq / aapp * temp1; } else { aaqq = aaqq * temp1 / aapp; } temp1 *= scalem; _starpu_dlascl_("G", &c__0, &c__0, &aapp, &temp1, m, n, &a[a_offset], lda, &ierr); /* To undo scaling at the end of this procedure, multiply the */ /* computed singular values with USCAL2 / USCAL1. */ uscal1 = temp1; uscal2 = aapp; if (l2kill) { /* L2KILL enforces computation of nonzero singular values in */ /* the restricted range of condition number of the initial A, */ /* sigma_max(A) / sigma_min(A) approx. DSQRT(BIG)/DSQRT(SFMIN). */ xsc = sqrt(sfmin); } else { xsc = small; /* Now, if the condition number of A is too big, */ /* sigma_max(A) / sigma_min(A) .GT. DSQRT(BIG/N) * EPSLN / SFMIN, */ /* as a precaution measure, the full SVD is computed using DGESVJ */ /* with accumulated Jacobi rotations. This provides numerically */ /* more robust computation, at the cost of slightly increased run */ /* time. Depending on the concrete implementation of BLAS and LAPACK */ /* (i.e. how they behave in presence of extreme ill-conditioning) the */ /* implementor may decide to remove this switch. */ if (aaqq < sqrt(sfmin) && lsvec && rsvec) { jracc = TRUE_; } } if (aaqq < xsc) { i__1 = *n; for (p = 1; p <= i__1; ++p) { if (sva[p] < xsc) { _starpu_dlaset_("A", m, &c__1, &c_b34, &c_b34, &a[p * a_dim1 + 1], lda); sva[p] = 0.; } /* L700: */ } } /* Preconditioning using QR factorization with pivoting */ if (rowpiv) { /* Optional row permutation (Bjoerck row pivoting): */ /* A result by Cox and Higham shows that the Bjoerck's */ /* row pivoting combined with standard column pivoting */ /* has similar effect as Powell-Reid complete pivoting. */ /* The ell-infinity norms of A are made nonincreasing. */ i__1 = *m - 1; for (p = 1; p <= i__1; ++p) { i__2 = *m - p + 1; q = _starpu_idamax_(&i__2, &work[*m + *n + p], &c__1) + p - 1; iwork[(*n << 1) + p] = q; if (p != q) { temp1 = work[*m + *n + p]; work[*m + *n + p] = work[*m + *n + q]; work[*m + *n + q] = temp1; } /* L1952: */ } i__1 = *m - 1; _starpu_dlaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[(*n << 1) + 1], & c__1); } /* End of the preparation phase (scaling, optional sorting and */ /* transposing, optional flushing of small columns). */ /* Preconditioning */ /* If the full SVD is needed, the right singular vectors are computed */ /* from a matrix equation, and for that we need theoretical analysis */ /* of the Businger-Golub pivoting. So we use DGEQP3 as the first RR QRF. */ /* In all other cases the first RR QRF can be chosen by other criteria */ /* (eg speed by replacing global with restricted window pivoting, such */ /* as in SGEQPX from TOMS # 782). Good results will be obtained using */ /* SGEQPX with properly (!) chosen numerical parameters. */ /* Any improvement of DGEQP3 improves overal performance of DGEJSV. */ /* A * P1 = Q1 * [ R1^t 0]^t: */ i__1 = *n; for (p = 1; p <= i__1; ++p) { /* .. all columns are free columns */ iwork[p] = 0; /* L1963: */ } i__1 = *lwork - *n; _starpu_dgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], & i__1, &ierr); /* The upper triangular matrix R1 from the first QRF is inspected for */ /* rank deficiency and possibilities for deflation, or possible */ /* ill-conditioning. Depending on the user specified flag L2RANK, */ /* the procedure explores possibilities to reduce the numerical */ /* rank by inspecting the computed upper triangular factor. If */ /* L2RANK or L2ABER are up, then DGEJSV will compute the SVD of */ /* A + dA, where ||dA|| <= f(M,N)*EPSLN. */ nr = 1; if (l2aber) { /* Standard absolute error bound suffices. All sigma_i with */ /* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an */ /* agressive enforcement of lower numerical rank by introducing a */ /* backward error of the order of N*EPSLN*||A||. */ temp1 = sqrt((doublereal) (*n)) * epsln; i__1 = *n; for (p = 2; p <= i__1; ++p) { if ((d__2 = a[p + p * a_dim1], abs(d__2)) >= temp1 * (d__1 = a[ a_dim1 + 1], abs(d__1))) { ++nr; } else { goto L3002; } /* L3001: */ } L3002: ; } else if (l2rank) { /* .. similarly as above, only slightly more gentle (less agressive). */ /* Sudden drop on the diagonal of R1 is used as the criterion for */ /* close-to-rank-defficient. */ temp1 = sqrt(sfmin); i__1 = *n; for (p = 2; p <= i__1; ++p) { if ((d__2 = a[p + p * a_dim1], abs(d__2)) < epsln * (d__1 = a[p - 1 + (p - 1) * a_dim1], abs(d__1)) || (d__3 = a[p + p * a_dim1], abs(d__3)) < small || l2kill && (d__4 = a[p + p * a_dim1], abs(d__4)) < temp1) { goto L3402; } ++nr; /* L3401: */ } L3402: ; } else { /* The goal is high relative accuracy. However, if the matrix */ /* has high scaled condition number the relative accuracy is in */ /* general not feasible. Later on, a condition number estimator */ /* will be deployed to estimate the scaled condition number. */ /* Here we just remove the underflowed part of the triangular */ /* factor. This prevents the situation in which the code is */ /* working hard to get the accuracy not warranted by the data. */ temp1 = sqrt(sfmin); i__1 = *n; for (p = 2; p <= i__1; ++p) { if ((d__1 = a[p + p * a_dim1], abs(d__1)) < small || l2kill && ( d__2 = a[p + p * a_dim1], abs(d__2)) < temp1) { goto L3302; } ++nr; /* L3301: */ } L3302: ; } almort = FALSE_; if (nr == *n) { maxprj = 1.; i__1 = *n; for (p = 2; p <= i__1; ++p) { temp1 = (d__1 = a[p + p * a_dim1], abs(d__1)) / sva[iwork[p]]; maxprj = min(maxprj,temp1); /* L3051: */ } /* Computing 2nd power */ d__1 = maxprj; if (d__1 * d__1 >= 1. - (doublereal) (*n) * epsln) { almort = TRUE_; } } sconda = -1.; condr1 = -1.; condr2 = -1.; if (errest) { if (*n == nr) { if (rsvec) { /* .. V is available as workspace */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv); i__1 = *n; for (p = 1; p <= i__1; ++p) { temp1 = sva[iwork[p]]; d__1 = 1. / temp1; _starpu_dscal_(&p, &d__1, &v[p * v_dim1 + 1], &c__1); /* L3053: */ } _starpu_dpocon_("U", n, &v[v_offset], ldv, &c_b35, &temp1, &work[*n + 1], &iwork[(*n << 1) + *m + 1], &ierr); } else if (lsvec) { /* .. U is available as workspace */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &u[u_offset], ldu); i__1 = *n; for (p = 1; p <= i__1; ++p) { temp1 = sva[iwork[p]]; d__1 = 1. / temp1; _starpu_dscal_(&p, &d__1, &u[p * u_dim1 + 1], &c__1); /* L3054: */ } _starpu_dpocon_("U", n, &u[u_offset], ldu, &c_b35, &temp1, &work[*n + 1], &iwork[(*n << 1) + *m + 1], &ierr); } else { _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[*n + 1], n); i__1 = *n; for (p = 1; p <= i__1; ++p) { temp1 = sva[iwork[p]]; d__1 = 1. / temp1; _starpu_dscal_(&p, &d__1, &work[*n + (p - 1) * *n + 1], &c__1); /* L3052: */ } /* .. the columns of R are scaled to have unit Euclidean lengths. */ _starpu_dpocon_("U", n, &work[*n + 1], n, &c_b35, &temp1, &work[*n + * n * *n + 1], &iwork[(*n << 1) + *m + 1], &ierr); } sconda = 1. / sqrt(temp1); /* SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). */ /* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ } else { sconda = -1.; } } l2pert = l2pert && (d__1 = a[a_dim1 + 1] / a[nr + nr * a_dim1], abs(d__1)) > sqrt(big1); /* If there is no violent scaling, artificial perturbation is not needed. */ /* Phase 3: */ if (! (rsvec || lsvec)) { /* Singular Values only */ /* .. transpose A(1:NR,1:N) */ /* Computing MIN */ i__2 = *n - 1; i__1 = min(i__2,nr); for (p = 1; p <= i__1; ++p) { i__2 = *n - p; _starpu_dcopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * a_dim1], &c__1); /* L1946: */ } /* The following two DO-loops introduce small relative perturbation */ /* into the strict upper triangle of the lower triangular matrix. */ /* Small entries below the main diagonal are also changed. */ /* This modification is useful if the computing environment does not */ /* provide/allow FLUSH TO ZERO underflow, for it prevents many */ /* annoying denormalized numbers in case of strongly scaled matrices. */ /* The perturbation is structured so that it does not introduce any */ /* new perturbation of the singular values, and it does not destroy */ /* the job done by the preconditioner. */ /* The licence for this perturbation is in the variable L2PERT, which */ /* should be .FALSE. if FLUSH TO ZERO underflow is active. */ if (! almort) { if (l2pert) { /* XSC = DSQRT(SMALL) */ xsc = epsln / (doublereal) (*n); i__1 = nr; for (q = 1; q <= i__1; ++q) { temp1 = xsc * (d__1 = a[q + q * a_dim1], abs(d__1)); i__2 = *n; for (p = 1; p <= i__2; ++p) { if (p > q && (d__1 = a[p + q * a_dim1], abs(d__1)) <= temp1 || p < q) { a[p + q * a_dim1] = d_sign(&temp1, &a[p + q * a_dim1]); } /* L4949: */ } /* L4947: */ } } else { i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + 1], lda); } /* .. second preconditioning using the QR factorization */ i__1 = *lwork - *n; _starpu_dgeqrf_(n, &nr, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1, &ierr); /* .. and transpose upper to lower triangular */ i__1 = nr - 1; for (p = 1; p <= i__1; ++p) { i__2 = nr - p; _starpu_dcopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * a_dim1], &c__1); /* L1948: */ } } /* Row-cyclic Jacobi SVD algorithm with column pivoting */ /* .. again some perturbation (a "background noise") is added */ /* to drown denormals */ if (l2pert) { /* XSC = DSQRT(SMALL) */ xsc = epsln / (doublereal) (*n); i__1 = nr; for (q = 1; q <= i__1; ++q) { temp1 = xsc * (d__1 = a[q + q * a_dim1], abs(d__1)); i__2 = nr; for (p = 1; p <= i__2; ++p) { if (p > q && (d__1 = a[p + q * a_dim1], abs(d__1)) <= temp1 || p < q) { a[p + q * a_dim1] = d_sign(&temp1, &a[p + q * a_dim1]) ; } /* L1949: */ } /* L1947: */ } } else { i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + 1], lda); } /* .. and one-sided Jacobi rotations are started on a lower */ /* triangular matrix (plus perturbation which is ignored in */ /* the part which destroys triangular form (confusing?!)) */ _starpu_dgesvj_("L", "NoU", "NoV", &nr, &nr, &a[a_offset], lda, &sva[1], n, & v[v_offset], ldv, &work[1], lwork, info); scalem = work[1]; numrank = i_dnnt(&work[2]); } else if (rsvec && ! lsvec) { /* -> Singular Values and Right Singular Vectors <- */ if (almort) { /* .. in this case NR equals N */ i__1 = nr; for (p = 1; p <= i__1; ++p) { i__2 = *n - p + 1; _starpu_dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], & c__1); /* L1998: */ } i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 1], ldv); _starpu_dgesvj_("L", "U", "N", n, &nr, &v[v_offset], ldv, &sva[1], &nr, & a[a_offset], lda, &work[1], lwork, info); scalem = work[1]; numrank = i_dnnt(&work[2]); } else { /* .. two more QR factorizations ( one QRF is not enough, two require */ /* accumulated product of Jacobi rotations, three are perfect ) */ i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &a[a_dim1 + 2], lda); i__1 = *lwork - *n; _starpu_dgelqf_(&nr, n, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1, &ierr); _starpu_dlacpy_("Lower", &nr, &nr, &a[a_offset], lda, &v[v_offset], ldv); i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 1], ldv); i__1 = *lwork - (*n << 1); _starpu_dgeqrf_(&nr, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << 1) + 1], &i__1, &ierr); i__1 = nr; for (p = 1; p <= i__1; ++p) { i__2 = nr - p + 1; _starpu_dcopy_(&i__2, &v[p + p * v_dim1], ldv, &v[p + p * v_dim1], & c__1); /* L8998: */ } i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 1], ldv); _starpu_dgesvj_("Lower", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[1], & nr, &u[u_offset], ldu, &work[*n + 1], lwork, info); scalem = work[*n + 1]; numrank = i_dnnt(&work[*n + 2]); if (nr < *n) { i__1 = *n - nr; _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], ldv); i__1 = *n - nr; _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + 1], ldv); i__1 = *n - nr; i__2 = *n - nr; _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + 1) * v_dim1], ldv); } i__1 = *lwork - *n; _starpu_dormlq_("Left", "Transpose", n, n, &nr, &a[a_offset], lda, &work[ 1], &v[v_offset], ldv, &work[*n + 1], &i__1, &ierr); } i__1 = *n; for (p = 1; p <= i__1; ++p) { _starpu_dcopy_(n, &v[p + v_dim1], ldv, &a[iwork[p] + a_dim1], lda); /* L8991: */ } _starpu_dlacpy_("All", n, n, &a[a_offset], lda, &v[v_offset], ldv); if (transp) { _starpu_dlacpy_("All", n, n, &v[v_offset], ldv, &u[u_offset], ldu); } } else if (lsvec && ! rsvec) { /* -#- Singular Values and Left Singular Vectors -#- */ /* .. second preconditioning step to avoid need to accumulate */ /* Jacobi rotations in the Jacobi iterations. */ i__1 = nr; for (p = 1; p <= i__1; ++p) { i__2 = *n - p + 1; _starpu_dcopy_(&i__2, &a[p + p * a_dim1], lda, &u[p + p * u_dim1], &c__1); /* L1965: */ } i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], ldu); i__1 = *lwork - (*n << 1); _starpu_dgeqrf_(n, &nr, &u[u_offset], ldu, &work[*n + 1], &work[(*n << 1) + 1] , &i__1, &ierr); i__1 = nr - 1; for (p = 1; p <= i__1; ++p) { i__2 = nr - p; _starpu_dcopy_(&i__2, &u[p + (p + 1) * u_dim1], ldu, &u[p + 1 + p * u_dim1], &c__1); /* L1967: */ } i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], ldu); i__1 = *lwork - *n; _starpu_dgesvj_("Lower", "U", "N", &nr, &nr, &u[u_offset], ldu, &sva[1], &nr, &a[a_offset], lda, &work[*n + 1], &i__1, info); scalem = work[*n + 1]; numrank = i_dnnt(&work[*n + 2]); if (nr < *m) { i__1 = *m - nr; _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], ldu); if (nr < n1) { i__1 = n1 - nr; _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * u_dim1 + 1], ldu); i__1 = *m - nr; i__2 = n1 - nr; _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (nr + 1) * u_dim1], ldu); } } i__1 = *lwork - *n; _starpu_dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], &u[ u_offset], ldu, &work[*n + 1], &i__1, &ierr); if (rowpiv) { i__1 = *m - 1; _starpu_dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + 1], &c_n1); } i__1 = n1; for (p = 1; p <= i__1; ++p) { xsc = 1. / _starpu_dnrm2_(m, &u[p * u_dim1 + 1], &c__1); _starpu_dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); /* L1974: */ } if (transp) { _starpu_dlacpy_("All", n, n, &u[u_offset], ldu, &v[v_offset], ldv); } } else { /* -#- Full SVD -#- */ if (! jracc) { if (! almort) { /* Second Preconditioning Step (QRF [with pivoting]) */ /* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is */ /* equivalent to an LQF CALL. Since in many libraries the QRF */ /* seems to be better optimized than the LQF, we do explicit */ /* transpose and use the QRF. This is subject to changes in an */ /* optimized implementation of DGEJSV. */ i__1 = nr; for (p = 1; p <= i__1; ++p) { i__2 = *n - p + 1; _starpu_dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], &c__1); /* L1968: */ } /* .. the following two loops perturb small entries to avoid */ /* denormals in the second QR factorization, where they are */ /* as good as zeros. This is done to avoid painfully slow */ /* computation with denormals. The relative size of the perturbation */ /* is a parameter that can be changed by the implementer. */ /* This perturbation device will be obsolete on machines with */ /* properly implemented arithmetic. */ /* To switch it off, set L2PERT=.FALSE. To remove it from the */ /* code, remove the action under L2PERT=.TRUE., leave the ELSE part. */ /* The following two loops should be blocked and fused with the */ /* transposed copy above. */ if (l2pert) { xsc = sqrt(small); i__1 = nr; for (q = 1; q <= i__1; ++q) { temp1 = xsc * (d__1 = v[q + q * v_dim1], abs(d__1)); i__2 = *n; for (p = 1; p <= i__2; ++p) { if (p > q && (d__1 = v[p + q * v_dim1], abs(d__1)) <= temp1 || p < q) { v[p + q * v_dim1] = d_sign(&temp1, &v[p + q * v_dim1]); } if (p < q) { v[p + q * v_dim1] = -v[p + q * v_dim1]; } /* L2968: */ } /* L2969: */ } } else { i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 1], ldv); } /* Estimate the row scaled condition number of R1 */ /* (If R1 is rectangular, N > NR, then the condition number */ /* of the leading NR x NR submatrix is estimated.) */ _starpu_dlacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1] , &nr); i__1 = nr; for (p = 1; p <= i__1; ++p) { i__2 = nr - p + 1; temp1 = _starpu_dnrm2_(&i__2, &work[(*n << 1) + (p - 1) * nr + p], &c__1); i__2 = nr - p + 1; d__1 = 1. / temp1; _starpu_dscal_(&i__2, &d__1, &work[(*n << 1) + (p - 1) * nr + p], &c__1); /* L3950: */ } _starpu_dpocon_("Lower", &nr, &work[(*n << 1) + 1], &nr, &c_b35, & temp1, &work[(*n << 1) + nr * nr + 1], &iwork[*m + (* n << 1) + 1], &ierr); condr1 = 1. / sqrt(temp1); /* .. here need a second oppinion on the condition number */ /* .. then assume worst case scenario */ /* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N) */ /* more conservative <=> CONDR1 .LT. DSQRT(DBLE(N)) */ cond_ok__ = sqrt((doublereal) nr); /* [TP] COND_OK is a tuning parameter. */ if (condr1 < cond_ok__) { /* .. the second QRF without pivoting. Note: in an optimized */ /* implementation, this QRF should be implemented as the QRF */ /* of a lower triangular matrix. */ /* R1^t = Q2 * R2 */ i__1 = *lwork - (*n << 1); _starpu_dgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(* n << 1) + 1], &i__1, &ierr); if (l2pert) { xsc = sqrt(small) / epsln; i__1 = nr; for (p = 2; p <= i__1; ++p) { i__2 = p - 1; for (q = 1; q <= i__2; ++q) { /* Computing MIN */ d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), d__4 = (d__2 = v[q + q * v_dim1], abs( d__2)); temp1 = xsc * min(d__3,d__4); if ((d__1 = v[q + p * v_dim1], abs(d__1)) <= temp1) { v[q + p * v_dim1] = d_sign(&temp1, &v[q + p * v_dim1]); } /* L3958: */ } /* L3959: */ } } if (nr != *n) { _starpu_dlacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1], n); } /* .. save ... */ /* .. this transposed copy should be better than naive */ i__1 = nr - 1; for (p = 1; p <= i__1; ++p) { i__2 = nr - p; _starpu_dcopy_(&i__2, &v[p + (p + 1) * v_dim1], ldv, &v[p + 1 + p * v_dim1], &c__1); /* L1969: */ } condr2 = condr1; } else { /* .. ill-conditioned case: second QRF with pivoting */ /* Note that windowed pivoting would be equaly good */ /* numerically, and more run-time efficient. So, in */ /* an optimal implementation, the next call to DGEQP3 */ /* should be replaced with eg. CALL SGEQPX (ACM TOMS #782) */ /* with properly (carefully) chosen parameters. */ /* R1^t * P2 = Q2 * R2 */ i__1 = nr; for (p = 1; p <= i__1; ++p) { iwork[*n + p] = 0; /* L3003: */ } i__1 = *lwork - (*n << 1); _starpu_dgeqp3_(n, &nr, &v[v_offset], ldv, &iwork[*n + 1], &work[* n + 1], &work[(*n << 1) + 1], &i__1, &ierr); /* * CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), */ /* * & LWORK-2*N, IERR ) */ if (l2pert) { xsc = sqrt(small); i__1 = nr; for (p = 2; p <= i__1; ++p) { i__2 = p - 1; for (q = 1; q <= i__2; ++q) { /* Computing MIN */ d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), d__4 = (d__2 = v[q + q * v_dim1], abs( d__2)); temp1 = xsc * min(d__3,d__4); if ((d__1 = v[q + p * v_dim1], abs(d__1)) <= temp1) { v[q + p * v_dim1] = d_sign(&temp1, &v[q + p * v_dim1]); } /* L3968: */ } /* L3969: */ } } _starpu_dlacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1], n); if (l2pert) { xsc = sqrt(small); i__1 = nr; for (p = 2; p <= i__1; ++p) { i__2 = p - 1; for (q = 1; q <= i__2; ++q) { /* Computing MIN */ d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), d__4 = (d__2 = v[q + q * v_dim1], abs( d__2)); temp1 = xsc * min(d__3,d__4); v[p + q * v_dim1] = -d_sign(&temp1, &v[q + p * v_dim1]); /* L8971: */ } /* L8970: */ } } else { i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("L", &i__1, &i__2, &c_b34, &c_b34, &v[v_dim1 + 2], ldv); } /* Now, compute R2 = L3 * Q3, the LQ factorization. */ i__1 = *lwork - (*n << 1) - *n * nr - nr; _starpu_dgelqf_(&nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + *n * nr + 1], &work[(*n << 1) + *n * nr + nr + 1], & i__1, &ierr); /* .. and estimate the condition number */ _starpu_dlacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1], &nr); i__1 = nr; for (p = 1; p <= i__1; ++p) { temp1 = _starpu_dnrm2_(&p, &work[(*n << 1) + *n * nr + nr + p] , &nr); d__1 = 1. / temp1; _starpu_dscal_(&p, &d__1, &work[(*n << 1) + *n * nr + nr + p], &nr); /* L4950: */ } _starpu_dpocon_("L", &nr, &work[(*n << 1) + *n * nr + nr + 1], & nr, &c_b35, &temp1, &work[(*n << 1) + *n * nr + nr + nr * nr + 1], &iwork[*m + (*n << 1) + 1], & ierr); condr2 = 1. / sqrt(temp1); if (condr2 >= cond_ok__) { /* .. save the Householder vectors used for Q3 */ /* (this overwrittes the copy of R2, as it will not be */ /* needed in this branch, but it does not overwritte the */ /* Huseholder vectors of Q2.). */ _starpu_dlacpy_("U", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1], n); /* .. and the rest of the information on Q3 is in */ /* WORK(2*N+N*NR+1:2*N+N*NR+N) */ } } if (l2pert) { xsc = sqrt(small); i__1 = nr; for (q = 2; q <= i__1; ++q) { temp1 = xsc * v[q + q * v_dim1]; i__2 = q - 1; for (p = 1; p <= i__2; ++p) { /* V(p,q) = - DSIGN( TEMP1, V(q,p) ) */ v[p + q * v_dim1] = -d_sign(&temp1, &v[p + q * v_dim1]); /* L4969: */ } /* L4968: */ } } else { i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 1], ldv); } /* Second preconditioning finished; continue with Jacobi SVD */ /* The input matrix is lower trinagular. */ /* Recover the right singular vectors as solution of a well */ /* conditioned triangular matrix equation. */ if (condr1 < cond_ok__) { i__1 = *lwork - (*n << 1) - *n * nr - nr; _starpu_dgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[ 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * nr + nr + 1], &i__1, info); scalem = work[(*n << 1) + *n * nr + nr + 1]; numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]); i__1 = nr; for (p = 1; p <= i__1; ++p) { _starpu_dcopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 + 1], &c__1); _starpu_dscal_(&nr, &sva[p], &v[p * v_dim1 + 1], &c__1); /* L3970: */ } /* .. pick the right matrix equation and solve it */ if (nr == *n) { /* :)) .. best case, R1 is inverted. The solution of this matrix */ /* equation is Q2*V2 = the product of the Jacobi rotations */ /* used in DGESVJ, premultiplied with the orthogonal matrix */ /* from the second QR factorization. */ _starpu_dtrsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &a[ a_offset], lda, &v[v_offset], ldv); } else { /* .. R1 is well conditioned, but non-square. Transpose(R2) */ /* is inverted to get the product of the Jacobi rotations */ /* used in DGESVJ. The Q-factor from the second QR */ /* factorization is then built in explicitly. */ _starpu_dtrsm_("L", "U", "T", "N", &nr, &nr, &c_b35, &work[(* n << 1) + 1], n, &v[v_offset], ldv); if (nr < *n) { i__1 = *n - nr; _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], ldv); i__1 = *n - nr; _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + 1], ldv); i__1 = *n - nr; i__2 = *n - nr; _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + 1) * v_dim1], ldv); } i__1 = *lwork - (*n << 1) - *n * nr - nr; _starpu_dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1], &i__1, &ierr); } } else if (condr2 < cond_ok__) { /* :) .. the input matrix A is very likely a relative of */ /* the Kahan matrix :) */ /* The matrix R2 is inverted. The solution of the matrix equation */ /* is Q3^T*V3 = the product of the Jacobi rotations (appplied to */ /* the lower triangular L3 from the LQ factorization of */ /* R2=L3*Q3), pre-multiplied with the transposed Q3. */ i__1 = *lwork - (*n << 1) - *n * nr - nr; _starpu_dgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[ 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * nr + nr + 1], &i__1, info); scalem = work[(*n << 1) + *n * nr + nr + 1]; numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]); i__1 = nr; for (p = 1; p <= i__1; ++p) { _starpu_dcopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 + 1], &c__1); _starpu_dscal_(&nr, &sva[p], &u[p * u_dim1 + 1], &c__1); /* L3870: */ } _starpu_dtrsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &work[(*n << 1) + 1], n, &u[u_offset], ldu); /* .. apply the permutation from the second QR factorization */ i__1 = nr; for (q = 1; q <= i__1; ++q) { i__2 = nr; for (p = 1; p <= i__2; ++p) { work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = u[p + q * u_dim1]; /* L872: */ } i__2 = nr; for (p = 1; p <= i__2; ++p) { u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr + p]; /* L874: */ } /* L873: */ } if (nr < *n) { i__1 = *n - nr; _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], ldv); i__1 = *n - nr; _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + 1], ldv); i__1 = *n - nr; i__2 = *n - nr; _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + 1) * v_dim1], ldv); } i__1 = *lwork - (*n << 1) - *n * nr - nr; _starpu_dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, & work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1], &i__1, &ierr); } else { /* Last line of defense. */ /* #:( This is a rather pathological case: no scaled condition */ /* improvement after two pivoted QR factorizations. Other */ /* possibility is that the rank revealing QR factorization */ /* or the condition estimator has failed, or the COND_OK */ /* is set very close to ONE (which is unnecessary). Normally, */ /* this branch should never be executed, but in rare cases of */ /* failure of the RRQR or condition estimator, the last line of */ /* defense ensures that DGEJSV completes the task. */ /* Compute the full SVD of L3 using DGESVJ with explicit */ /* accumulation of Jacobi rotations. */ i__1 = *lwork - (*n << 1) - *n * nr - nr; _starpu_dgesvj_("L", "U", "V", &nr, &nr, &v[v_offset], ldv, &sva[ 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * nr + nr + 1], &i__1, info); scalem = work[(*n << 1) + *n * nr + nr + 1]; numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]); if (nr < *n) { i__1 = *n - nr; _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], ldv); i__1 = *n - nr; _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + 1], ldv); i__1 = *n - nr; i__2 = *n - nr; _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + 1) * v_dim1], ldv); } i__1 = *lwork - (*n << 1) - *n * nr - nr; _starpu_dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, & work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1], &i__1, &ierr); i__1 = *lwork - (*n << 1) - *n * nr - nr; _starpu_dormlq_("L", "T", &nr, &nr, &nr, &work[(*n << 1) + 1], n, &work[(*n << 1) + *n * nr + 1], &u[u_offset], ldu, &work[(*n << 1) + *n * nr + nr + 1], &i__1, & ierr); i__1 = nr; for (q = 1; q <= i__1; ++q) { i__2 = nr; for (p = 1; p <= i__2; ++p) { work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = u[p + q * u_dim1]; /* L772: */ } i__2 = nr; for (p = 1; p <= i__2; ++p) { u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr + p]; /* L774: */ } /* L773: */ } } /* Permute the rows of V using the (column) permutation from the */ /* first QRF. Also, scale the columns to make them unit in */ /* Euclidean norm. This applies to all cases. */ temp1 = sqrt((doublereal) (*n)) * epsln; i__1 = *n; for (q = 1; q <= i__1; ++q) { i__2 = *n; for (p = 1; p <= i__2; ++p) { work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * v_dim1]; /* L972: */ } i__2 = *n; for (p = 1; p <= i__2; ++p) { v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p] ; /* L973: */ } xsc = 1. / _starpu_dnrm2_(n, &v[q * v_dim1 + 1], &c__1); if (xsc < 1. - temp1 || xsc > temp1 + 1.) { _starpu_dscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1); } /* L1972: */ } /* At this moment, V contains the right singular vectors of A. */ /* Next, assemble the left singular vector matrix U (M x N). */ if (nr < *m) { i__1 = *m - nr; _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], ldu); if (nr < n1) { i__1 = n1 - nr; _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * u_dim1 + 1], ldu); i__1 = *m - nr; i__2 = n1 - nr; _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (nr + 1) * u_dim1], ldu); } } /* The Q matrix from the first QRF is built into the left singular */ /* matrix U. This applies to all cases. */ i__1 = *lwork - *n; _starpu_dormqr_("Left", "No_Tr", m, &n1, n, &a[a_offset], lda, &work[ 1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); /* The columns of U are normalized. The cost is O(M*N) flops. */ temp1 = sqrt((doublereal) (*m)) * epsln; i__1 = nr; for (p = 1; p <= i__1; ++p) { xsc = 1. / _starpu_dnrm2_(m, &u[p * u_dim1 + 1], &c__1); if (xsc < 1. - temp1 || xsc > temp1 + 1.) { _starpu_dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); } /* L1973: */ } /* If the initial QRF is computed with row pivoting, the left */ /* singular vectors must be adjusted. */ if (rowpiv) { i__1 = *m - 1; _starpu_dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + 1], &c_n1); } } else { /* .. the initial matrix A has almost orthogonal columns and */ /* the second QRF is not needed */ _starpu_dlacpy_("Upper", n, n, &a[a_offset], lda, &work[*n + 1], n); if (l2pert) { xsc = sqrt(small); i__1 = *n; for (p = 2; p <= i__1; ++p) { temp1 = xsc * work[*n + (p - 1) * *n + p]; i__2 = p - 1; for (q = 1; q <= i__2; ++q) { work[*n + (q - 1) * *n + p] = -d_sign(&temp1, & work[*n + (p - 1) * *n + q]); /* L5971: */ } /* L5970: */ } } else { i__1 = *n - 1; i__2 = *n - 1; _starpu_dlaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &work[*n + 2], n); } i__1 = *lwork - *n - *n * *n; _starpu_dgesvj_("Upper", "U", "N", n, n, &work[*n + 1], n, &sva[1], n, &u[u_offset], ldu, &work[*n + *n * *n + 1], &i__1, info); scalem = work[*n + *n * *n + 1]; numrank = i_dnnt(&work[*n + *n * *n + 2]); i__1 = *n; for (p = 1; p <= i__1; ++p) { _starpu_dcopy_(n, &work[*n + (p - 1) * *n + 1], &c__1, &u[p * u_dim1 + 1], &c__1); _starpu_dscal_(n, &sva[p], &work[*n + (p - 1) * *n + 1], &c__1); /* L6970: */ } _starpu_dtrsm_("Left", "Upper", "NoTrans", "No UD", n, n, &c_b35, &a[ a_offset], lda, &work[*n + 1], n); i__1 = *n; for (p = 1; p <= i__1; ++p) { _starpu_dcopy_(n, &work[*n + p], n, &v[iwork[p] + v_dim1], ldv); /* L6972: */ } temp1 = sqrt((doublereal) (*n)) * epsln; i__1 = *n; for (p = 1; p <= i__1; ++p) { xsc = 1. / _starpu_dnrm2_(n, &v[p * v_dim1 + 1], &c__1); if (xsc < 1. - temp1 || xsc > temp1 + 1.) { _starpu_dscal_(n, &xsc, &v[p * v_dim1 + 1], &c__1); } /* L6971: */ } /* Assemble the left singular vector matrix U (M x N). */ if (*n < *m) { i__1 = *m - *n; _starpu_dlaset_("A", &i__1, n, &c_b34, &c_b34, &u[nr + 1 + u_dim1] , ldu); if (*n < n1) { i__1 = n1 - *n; _starpu_dlaset_("A", n, &i__1, &c_b34, &c_b34, &u[(*n + 1) * u_dim1 + 1], ldu); i__1 = *m - *n; i__2 = n1 - *n; _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (*n + 1) * u_dim1], ldu); } } i__1 = *lwork - *n; _starpu_dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[ 1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); temp1 = sqrt((doublereal) (*m)) * epsln; i__1 = n1; for (p = 1; p <= i__1; ++p) { xsc = 1. / _starpu_dnrm2_(m, &u[p * u_dim1 + 1], &c__1); if (xsc < 1. - temp1 || xsc > temp1 + 1.) { _starpu_dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); } /* L6973: */ } if (rowpiv) { i__1 = *m - 1; _starpu_dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + 1], &c_n1); } } /* end of the >> almost orthogonal case << in the full SVD */ } else { /* This branch deploys a preconditioned Jacobi SVD with explicitly */ /* accumulated rotations. It is included as optional, mainly for */ /* experimental purposes. It does perfom well, and can also be used. */ /* In this implementation, this branch will be automatically activated */ /* if the condition number sigma_max(A) / sigma_min(A) is predicted */ /* to be greater than the overflow threshold. This is because the */ /* a posteriori computation of the singular vectors assumes robust */ /* implementation of BLAS and some LAPACK procedures, capable of working */ /* in presence of extreme values. Since that is not always the case, ... */ i__1 = nr; for (p = 1; p <= i__1; ++p) { i__2 = *n - p + 1; _starpu_dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], & c__1); /* L7968: */ } if (l2pert) { xsc = sqrt(small / epsln); i__1 = nr; for (q = 1; q <= i__1; ++q) { temp1 = xsc * (d__1 = v[q + q * v_dim1], abs(d__1)); i__2 = *n; for (p = 1; p <= i__2; ++p) { if (p > q && (d__1 = v[p + q * v_dim1], abs(d__1)) <= temp1 || p < q) { v[p + q * v_dim1] = d_sign(&temp1, &v[p + q * v_dim1]); } if (p < q) { v[p + q * v_dim1] = -v[p + q * v_dim1]; } /* L5968: */ } /* L5969: */ } } else { i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 1], ldv); } i__1 = *lwork - (*n << 1); _starpu_dgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << 1) + 1], &i__1, &ierr); _starpu_dlacpy_("L", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1], n); i__1 = nr; for (p = 1; p <= i__1; ++p) { i__2 = nr - p + 1; _starpu_dcopy_(&i__2, &v[p + p * v_dim1], ldv, &u[p + p * u_dim1], & c__1); /* L7969: */ } if (l2pert) { xsc = sqrt(small / epsln); i__1 = nr; for (q = 2; q <= i__1; ++q) { i__2 = q - 1; for (p = 1; p <= i__2; ++p) { /* Computing MIN */ d__3 = (d__1 = u[p + p * u_dim1], abs(d__1)), d__4 = ( d__2 = u[q + q * u_dim1], abs(d__2)); temp1 = xsc * min(d__3,d__4); u[p + q * u_dim1] = -d_sign(&temp1, &u[q + p * u_dim1] ); /* L9971: */ } /* L9970: */ } } else { i__1 = nr - 1; i__2 = nr - 1; _starpu_dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], ldu); } i__1 = *lwork - (*n << 1) - *n * nr; _starpu_dgesvj_("G", "U", "V", &nr, &nr, &u[u_offset], ldu, &sva[1], n, & v[v_offset], ldv, &work[(*n << 1) + *n * nr + 1], &i__1, info); scalem = work[(*n << 1) + *n * nr + 1]; numrank = i_dnnt(&work[(*n << 1) + *n * nr + 2]); if (nr < *n) { i__1 = *n - nr; _starpu_dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], ldv); i__1 = *n - nr; _starpu_dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + 1], ldv); i__1 = *n - nr; i__2 = *n - nr; _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + 1) * v_dim1], ldv); } i__1 = *lwork - (*n << 1) - *n * nr - nr; _starpu_dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1] , &i__1, &ierr); /* Permute the rows of V using the (column) permutation from the */ /* first QRF. Also, scale the columns to make them unit in */ /* Euclidean norm. This applies to all cases. */ temp1 = sqrt((doublereal) (*n)) * epsln; i__1 = *n; for (q = 1; q <= i__1; ++q) { i__2 = *n; for (p = 1; p <= i__2; ++p) { work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * v_dim1]; /* L8972: */ } i__2 = *n; for (p = 1; p <= i__2; ++p) { v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p]; /* L8973: */ } xsc = 1. / _starpu_dnrm2_(n, &v[q * v_dim1 + 1], &c__1); if (xsc < 1. - temp1 || xsc > temp1 + 1.) { _starpu_dscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1); } /* L7972: */ } /* At this moment, V contains the right singular vectors of A. */ /* Next, assemble the left singular vector matrix U (M x N). */ if (*n < *m) { i__1 = *m - *n; _starpu_dlaset_("A", &i__1, n, &c_b34, &c_b34, &u[nr + 1 + u_dim1], ldu); if (*n < n1) { i__1 = n1 - *n; _starpu_dlaset_("A", n, &i__1, &c_b34, &c_b34, &u[(*n + 1) * u_dim1 + 1], ldu); i__1 = *m - *n; i__2 = n1 - *n; _starpu_dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (* n + 1) * u_dim1], ldu); } } i__1 = *lwork - *n; _starpu_dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], & u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); if (rowpiv) { i__1 = *m - 1; _starpu_dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + 1], &c_n1); } } if (transp) { /* .. swap U and V because the procedure worked on A^t */ i__1 = *n; for (p = 1; p <= i__1; ++p) { _starpu_dswap_(n, &u[p * u_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); /* L6974: */ } } } /* end of the full SVD */ /* Undo scaling, if necessary (and possible) */ if (uscal2 <= big / sva[1] * uscal1) { _starpu_dlascl_("G", &c__0, &c__0, &uscal1, &uscal2, &nr, &c__1, &sva[1], n, & ierr); uscal1 = 1.; uscal2 = 1.; } if (nr < *n) { i__1 = *n; for (p = nr + 1; p <= i__1; ++p) { sva[p] = 0.; /* L3004: */ } } work[1] = uscal2 * scalem; work[2] = uscal1; if (errest) { work[3] = sconda; } if (lsvec && rsvec) { work[4] = condr1; work[5] = condr2; } if (l2tran) { work[6] = entra; work[7] = entrat; } iwork[1] = nr; iwork[2] = numrank; iwork[3] = warning; return 0; /* .. */ /* .. END OF DGEJSV */ /* .. */ } /* _starpu_dgejsv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgelq2.c000066400000000000000000000106351507764646700205570ustar00rootroot00000000000000/* dgelq2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgelq2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, k; doublereal aii; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELQ2 computes an LQ factorization of a real m by n matrix A: */ /* A = L * Q. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the m by n matrix A. */ /* On exit, the elements on and below the diagonal of the array */ /* contain the m by min(m,n) lower trapezoidal matrix L (L is */ /* lower triangular if m <= n); the elements above the diagonal, */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of elementary reflectors (see Further Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ /* and tau in TAU(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELQ2", &i__1); return 0; } k = min(*m,*n); i__1 = k; for (i__ = 1; i__ <= i__1; ++i__) { /* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ i__2 = *n - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n)* a_dim1] , lda, &tau[i__]); if (i__ < *m) { /* Apply H(i) to A(i+1:m,i:n) from the right */ aii = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; i__2 = *m - i__; i__3 = *n - i__ + 1; _starpu_dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); a[i__ + i__ * a_dim1] = aii; } /* L10: */ } return 0; /* End of DGELQ2 */ } /* _starpu_dgelq2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgelqf.c000066400000000000000000000163741507764646700206510ustar00rootroot00000000000000/* dgelqf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dgelqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, k, ib, nb, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dgelq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELQF computes an LQ factorization of a real M-by-N matrix A: */ /* A = L * Q. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the elements on and below the diagonal of the array */ /* contain the m-by-min(m,n) lower trapezoidal matrix L (L is */ /* lower triangular if m <= n); the elements above the diagonal, */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of elementary reflectors (see Further Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,M). */ /* For optimum performance LWORK >= M*NB, where NB is the */ /* optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ /* and tau in TAU(i). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; nb = _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); lwkopt = *m * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } else if (*lwork < max(1,*m) && ! lquery) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELQF", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ k = min(*m,*n); if (k == 0) { work[1] = 1.; return 0; } nbmin = 2; nx = 0; iws = *m; if (nb > 1 && nb < k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1); nx = max(i__1,i__2); if (nx < k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *m; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < k && nx < k) { /* Use blocked code initially */ i__1 = k - nx; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = k - i__ + 1; ib = min(i__3,nb); /* Compute the LQ factorization of the current block */ /* A(i:i+ib-1,i:n) */ i__3 = *n - i__ + 1; _starpu_dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ 1], &iinfo); if (i__ + ib <= *m) { /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__3 = *n - i__ + 1; _starpu_dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &ldwork); /* Apply H to A(i+ib:m,i:n) from the right */ i__3 = *m - i__ - ib + 1; i__4 = *n - i__ + 1; _starpu_dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + 1], &ldwork); } /* L10: */ } } else { i__ = 1; } /* Use unblocked code to factor the last or only block. */ if (i__ <= k) { i__2 = *m - i__ + 1; i__1 = *n - i__ + 1; _starpu_dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] , &iinfo); } work[1] = (doublereal) iws; return 0; /* End of DGELQF */ } /* _starpu_dgelqf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgels.c000066400000000000000000000360611507764646700205000ustar00rootroot00000000000000/* dgels.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b33 = 0.; static integer c__0 = 0; /* Subroutine */ int _starpu_dgels_(char *trans, integer *m, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ integer i__, j, nb, mn; doublereal anrm, bnrm; integer brow; logical tpsd; integer iascl, ibscl; extern logical _starpu_lsame_(char *, char *); integer wsize; doublereal rwork[1]; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer scllen; doublereal bignum; extern /* Subroutine */ int _starpu_dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal smlnum; logical lquery; extern /* Subroutine */ int _starpu_dtrtrs_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELS solves overdetermined or underdetermined real linear systems */ /* involving an M-by-N matrix A, or its transpose, using a QR or LQ */ /* factorization of A. It is assumed that A has full rank. */ /* The following options are provided: */ /* 1. If TRANS = 'N' and m >= n: find the least squares solution of */ /* an overdetermined system, i.e., solve the least squares problem */ /* minimize || B - A*X ||. */ /* 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ /* an underdetermined system A * X = B. */ /* 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ /* an undetermined system A**T * X = B. */ /* 4. If TRANS = 'T' and m < n: find the least squares solution of */ /* an overdetermined system, i.e., solve the least squares problem */ /* minimize || B - A**T * X ||. */ /* Several right hand side vectors b and solution vectors x can be */ /* handled in a single call; they are stored as the columns of the */ /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ /* matrix X. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* = 'N': the linear system involves A; */ /* = 'T': the linear system involves A**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of */ /* columns of the matrices B and X. NRHS >=0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, */ /* if M >= N, A is overwritten by details of its QR */ /* factorization as returned by DGEQRF; */ /* if M < N, A is overwritten by details of its LQ */ /* factorization as returned by DGELQF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the matrix B of right hand side vectors, stored */ /* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ /* if TRANS = 'T'. */ /* On exit, if INFO = 0, B is overwritten by the solution */ /* vectors, stored columnwise: */ /* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ /* squares solution vectors; the residual sum of squares for the */ /* solution in each column is given by the sum of squares of */ /* elements N+1 to M in that column; */ /* if TRANS = 'N' and m < n, rows 1 to N of B contain the */ /* minimum norm solution vectors; */ /* if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ /* minimum norm solution vectors; */ /* if TRANS = 'T' and m < n, rows 1 to M of B contain the */ /* least squares solution vectors; the residual sum of squares */ /* for the solution in each column is given by the sum of */ /* squares of elements M+1 to N in that column. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= MAX(1,M,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* LWORK >= max( 1, MN + max( MN, NRHS ) ). */ /* For optimal performance, */ /* LWORK >= max( 1, MN + max( MN, NRHS )*NB ). */ /* where MN = min(M,N) and NB is the optimum block size. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element of the */ /* triangular factor of A is zero, so that A does not have */ /* full rank; the least squares solution could not be */ /* computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --work; /* Function Body */ *info = 0; mn = min(*m,*n); lquery = *lwork == -1; if (! (_starpu_lsame_(trans, "N") || _starpu_lsame_(trans, "T"))) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m); if (*ldb < max(i__1,*n)) { *info = -8; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = mn + max(mn,*nrhs); if (*lwork < max(i__1,i__2) && ! lquery) { *info = -10; } } } /* Figure out optimal block size */ if (*info == 0 || *info == -10) { tpsd = TRUE_; if (_starpu_lsame_(trans, "N")) { tpsd = FALSE_; } if (*m >= *n) { nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); if (tpsd) { /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", "LN", m, nrhs, n, & c_n1); nb = max(i__1,i__2); } else { /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, & c_n1); nb = max(i__1,i__2); } } else { nb = _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); if (tpsd) { /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, & c_n1); nb = max(i__1,i__2); } else { /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", "LN", n, nrhs, m, & c_n1); nb = max(i__1,i__2); } } /* Computing MAX */ i__1 = 1, i__2 = mn + max(mn,*nrhs) * nb; wsize = max(i__1,i__2); work[1] = (doublereal) wsize; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELS ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ /* Computing MIN */ i__1 = min(*m,*n); if (min(i__1,*nrhs) == 0) { i__1 = max(*m,*n); _starpu_dlaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); return 0; } /* Get machine parameters */ smlnum = _starpu_dlamch_("S") / _starpu_dlamch_("P"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); /* Scale A, B if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, rwork); iascl = 0; if (anrm > 0. && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); _starpu_dlaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); goto L50; } brow = *m; if (tpsd) { brow = *n; } bnrm = _starpu_dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); ibscl = 0; if (bnrm > 0. && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], ldb, info); ibscl = 2; } if (*m >= *n) { /* compute QR factorization of A */ i__1 = *lwork - mn; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) ; /* workspace at least N, optimally N*NB */ if (! tpsd) { /* Least-Squares Problem min || A * X - B || */ /* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ i__1 = *lwork - mn; _starpu_dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[ 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ /* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ _starpu_dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] , lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } scllen = *n; } else { /* Overdetermined system of equations A' * X = B */ /* B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) */ _starpu_dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } /* B(N+1:M,1:NRHS) = ZERO */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = *n + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L10: */ } /* L20: */ } /* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ i__1 = *lwork - mn; _starpu_dormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, & work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ scllen = *m; } } else { /* Compute LQ factorization of A */ i__1 = *lwork - mn; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) ; /* workspace at least M, optimally M*NB. */ if (! tpsd) { /* underdetermined system of equations A * X = B */ /* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ _starpu_dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] , lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } /* B(M+1:N,1:NRHS) = 0 */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = *m + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L30: */ } /* L40: */ } /* B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) */ i__1 = *lwork - mn; _starpu_dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[ 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ scllen = *n; } else { /* overdetermined system min || A' * X - B || */ /* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ i__1 = *lwork - mn; _starpu_dormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, & work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ /* B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) */ _starpu_dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } scllen = *m; } } /* Undo scaling */ if (iascl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] , ldb, info); } else if (iascl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] , ldb, info); } if (ibscl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] , ldb, info); } else if (ibscl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] , ldb, info); } L50: work[1] = (doublereal) wsize; return 0; /* End of DGELS */ } /* _starpu_dgels_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgelsd.c000066400000000000000000000556171507764646700206540ustar00rootroot00000000000000/* dgelsd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__6 = 6; static integer c_n1 = -1; static integer c__9 = 9; static integer c__0 = 0; static integer c__1 = 1; static doublereal c_b82 = 0.; /* Subroutine */ int _starpu_dgelsd_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; /* Builtin functions */ double log(doublereal); /* Local variables */ integer ie, il, mm; doublereal eps, anrm, bnrm; integer itau, nlvl, iascl, ibscl; doublereal sfmin; integer minmn, maxmn, itaup, itauq, mnthr, nwork; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebrd_( integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlalsd_(char *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dgeqrf_( integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer wlalsd; extern /* Subroutine */ int _starpu_dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer ldwork; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer minwrk, maxwrk; doublereal smlnum; logical lquery; integer smlsiz; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELSD computes the minimum-norm solution to a real linear least */ /* squares problem: */ /* minimize 2-norm(| b - A*x |) */ /* using the singular value decomposition (SVD) of A. A is an M-by-N */ /* matrix which may be rank-deficient. */ /* Several right hand side vectors b and solution vectors x can be */ /* handled in a single call; they are stored as the columns of the */ /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ /* matrix X. */ /* The problem is solved in three steps: */ /* (1) Reduce the coefficient matrix A to bidiagonal form with */ /* Householder transformations, reducing the original problem */ /* into a "bidiagonal least squares problem" (BLS) */ /* (2) Solve the BLS using a divide and conquer approach. */ /* (3) Apply back all the Householder tranformations to solve */ /* the original least squares problem. */ /* The effective rank of A is determined by treating as zero those */ /* singular values which are less than RCOND times the largest singular */ /* value. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, A has been destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the M-by-NRHS right hand side matrix B. */ /* On exit, B is overwritten by the N-by-NRHS solution */ /* matrix X. If m >= n and RANK = n, the residual */ /* sum-of-squares for the solution in the i-th column is given */ /* by the sum of squares of elements n+1:m in that column. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,max(M,N)). */ /* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The singular values of A in decreasing order. */ /* The condition number of A in the 2-norm = S(1)/S(min(m,n)). */ /* RCOND (input) DOUBLE PRECISION */ /* RCOND is used to determine the effective rank of A. */ /* Singular values S(i) <= RCOND*S(1) are treated as zero. */ /* If RCOND < 0, machine precision is used instead. */ /* RANK (output) INTEGER */ /* The effective rank of A, i.e., the number of singular values */ /* which are greater than RCOND*S(1). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK must be at least 1. */ /* The exact minimum amount of workspace needed depends on M, */ /* N and NRHS. As long as LWORK is at least */ /* 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, */ /* if M is greater than or equal to N or */ /* 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, */ /* if M is less than N, the code will execute correctly. */ /* SMLSIZ is returned by ILAENV and is equal to the maximum */ /* size of the subproblems at the bottom of the computation */ /* tree (usually about 25), and */ /* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) */ /* For good performance, LWORK should generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) */ /* LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, */ /* where MINMN = MIN( M,N ). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: the algorithm for computing the SVD failed to converge; */ /* if INFO = i, i off-diagonal elements of an intermediate */ /* bidiagonal form did not converge to zero. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ /* California at Berkeley, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --s; --work; --iwork; /* Function Body */ *info = 0; minmn = min(*m,*n); maxmn = max(*m,*n); mnthr = _starpu_ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1); lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,maxmn)) { *info = -7; } smlsiz = _starpu_ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0); /* Compute workspace. */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV.) */ minwrk = 1; minmn = max(1,minmn); /* Computing MAX */ i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) / log(2.)) + 1; nlvl = max(i__1,0); if (*info == 0) { maxwrk = 0; mm = *m; if (*m >= *n && *m >= mnthr) { /* Path 1a - overdetermined, with many more rows than columns. */ mm = *n; /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *nrhs * _starpu_ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, &c_n1); maxwrk = max(i__1,i__2); } if (*m >= *n) { /* Path 1 - overdetermined or exactly determined. */ /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * _starpu_ilaenv_(&c__1, "DGEBRD" , " ", &mm, n, &c_n1, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR", "QLT", &mm, nrhs, n, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORMBR", "PLN", n, nrhs, n, &c_n1); maxwrk = max(i__1,i__2); /* Computing 2nd power */ i__1 = smlsiz + 1; wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * * nrhs + i__1 * i__1; /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + wlalsd; maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2), i__2 = *n * 3 + wlalsd; minwrk = max(i__1,i__2); } if (*n > *m) { /* Computing 2nd power */ i__1 = smlsiz + 1; wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * * nrhs + i__1 * i__1; if (*n >= mnthr) { /* Path 2a - underdetermined, with many more columns */ /* than rows. */ maxwrk = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * _starpu_ilaenv_(& c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * _starpu_ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1); maxwrk = max(i__1,i__2); if (*nrhs > 1) { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; maxwrk = max(i__1,i__2); } else { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 1); maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = maxwrk, i__2 = *m + *nrhs * _starpu_ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; maxwrk = max(i__1,i__2); /* XXX: Ensure the Path 2a case below is triggered. The workspace */ /* calculation should use queries for all routines eventually. */ /* Computing MAX */ /* Computing MAX */ i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3; i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4); maxwrk = max(i__1,i__2); } else { /* Path 2 - remaining underdetermined cases. */ maxwrk = *m * 3 + (*n + *m) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR" , "QLT", m, nrhs, n, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR", "PLN", n, nrhs, m, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + wlalsd; maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2), i__2 = *m * 3 + wlalsd; minwrk = max(i__1,i__2); } minwrk = min(minwrk,maxwrk); work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELSD", &i__1); return 0; } else if (lquery) { goto L10; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { *rank = 0; return 0; } /* Get machine parameters. */ eps = _starpu_dlamch_("P"); sfmin = _starpu_dlamch_("S"); smlnum = sfmin / eps; bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); /* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, &work[1]); iascl = 0; if (anrm > 0. && anrm < smlnum) { /* Scale matrix norm up to SMLNUM. */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM. */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); _starpu_dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb); _starpu_dlaset_("F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1); *rank = 0; goto L10; } /* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ bnrm = _starpu_dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); ibscl = 0; if (bnrm > 0. && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM. */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM. */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info); ibscl = 2; } /* If M < N make sure certain entries of B are zero. */ if (*m < *n) { i__1 = *n - *m; _starpu_dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb); } /* Overdetermined case. */ if (*m >= *n) { /* Path 1 - overdetermined or exactly determined. */ mm = *m; if (*m >= mnthr) { /* Path 1a - overdetermined, with many more rows than columns. */ mm = *n; itau = 1; nwork = itau + *n; /* Compute A=Q*R. */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info); /* Multiply B by transpose(Q). */ /* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[nwork], &i__1, info); /* Zero out below R. */ if (*n > 1) { i__1 = *n - 1; i__2 = *n - 1; _starpu_dlaset_("L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2], lda); } } ie = 1; itauq = ie + *n; itaup = itauq + *n; nwork = itaup + *n; /* Bidiagonalize R in A. */ /* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[nwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors of R. */ /* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); /* Solve the bidiagonal least squares problem. */ _starpu_dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank, &work[nwork], &iwork[1], info); if (*info != 0) { goto L10; } /* Multiply B by right bidiagonalizing vectors of R. */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & b[b_offset], ldb, &work[nwork], &i__1, info); } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( i__1,*nrhs), i__2 = *n - *m * 3, i__1 = max(i__1,i__2); if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,wlalsd)) { /* Path 2a - underdetermined, with many more columns than rows */ /* and sufficient workspace for an efficient algorithm. */ ldwork = *m; /* Computing MAX */ /* Computing MAX */ i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3; i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + *m + *m * *nrhs, i__1 = max(i__1,i__2), i__2 = (*m << 2) + *m * *lda + wlalsd; if (*lwork >= max(i__1,i__2)) { ldwork = *lda; } itau = 1; nwork = *m + 1; /* Compute A=L*Q. */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info); il = nwork; /* Copy L to WORK(IL), zeroing out above its diagonal. */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); i__1 = *m - 1; i__2 = *m - 1; _starpu_dlaset_("U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], & ldwork); ie = il + ldwork * *m; itauq = ie + *m; itaup = itauq + *m; nwork = itaup + *m; /* Bidiagonalize L in WORK(IL). */ /* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup], &work[nwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors of L. */ /* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); /* Solve the bidiagonal least squares problem. */ _starpu_dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank, &work[nwork], &iwork[1], info); if (*info != 0) { goto L10; } /* Multiply B by right bidiagonalizing vectors of L. */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); /* Zero out below first M rows of B. */ i__1 = *n - *m; _starpu_dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb); nwork = itau + *m; /* Multiply transpose(Q) by B. */ /* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[nwork], &i__1, info); } else { /* Path 2 - remaining underdetermined cases. */ ie = 1; itauq = ie + *m; itaup = itauq + *m; nwork = itaup + *m; /* Bidiagonalize A. */ /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[nwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors. */ /* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] , &b[b_offset], ldb, &work[nwork], &i__1, info); /* Solve the bidiagonal least squares problem. */ _starpu_dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank, &work[nwork], &iwork[1], info); if (*info != 0) { goto L10; } /* Multiply B by right bidiagonalizing vectors of A. */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] , &b[b_offset], ldb, &work[nwork], &i__1, info); } } /* Undo scaling. */ if (iascl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info); _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } else if (iascl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info); _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } if (ibscl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } else if (ibscl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } L10: work[1] = (doublereal) maxwrk; return 0; /* End of DGELSD */ } /* _starpu_dgelsd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgelss.c000066400000000000000000000643531507764646700206700ustar00rootroot00000000000000/* dgelss.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__6 = 6; static integer c_n1 = -1; static integer c__1 = 1; static integer c__0 = 0; static doublereal c_b74 = 0.; static doublereal c_b108 = 1.; /* Subroutine */ int _starpu_dgelss_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; doublereal d__1; /* Local variables */ integer i__, bl, ie, il, mm; doublereal eps, thr, anrm, bnrm; integer itau; doublereal vdum[1]; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iascl, ibscl; extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_drscl_(integer *, doublereal *, doublereal *, integer *); integer chunk; doublereal sfmin; integer minmn; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer maxmn, itaup, itauq, mnthr, iwork; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebrd_( integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); integer bdspac; extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); doublereal bignum; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer ldwork; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer minwrk, maxwrk; doublereal smlnum; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELSS computes the minimum norm solution to a real linear least */ /* squares problem: */ /* Minimize 2-norm(| b - A*x |). */ /* using the singular value decomposition (SVD) of A. A is an M-by-N */ /* matrix which may be rank-deficient. */ /* Several right hand side vectors b and solution vectors x can be */ /* handled in a single call; they are stored as the columns of the */ /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix */ /* X. */ /* The effective rank of A is determined by treating as zero those */ /* singular values which are less than RCOND times the largest singular */ /* value. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the first min(m,n) rows of A are overwritten with */ /* its right singular vectors, stored rowwise. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the M-by-NRHS right hand side matrix B. */ /* On exit, B is overwritten by the N-by-NRHS solution */ /* matrix X. If m >= n and RANK = n, the residual */ /* sum-of-squares for the solution in the i-th column is given */ /* by the sum of squares of elements n+1:m in that column. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,max(M,N)). */ /* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The singular values of A in decreasing order. */ /* The condition number of A in the 2-norm = S(1)/S(min(m,n)). */ /* RCOND (input) DOUBLE PRECISION */ /* RCOND is used to determine the effective rank of A. */ /* Singular values S(i) <= RCOND*S(1) are treated as zero. */ /* If RCOND < 0, machine precision is used instead. */ /* RANK (output) INTEGER */ /* The effective rank of A, i.e., the number of singular values */ /* which are greater than RCOND*S(1). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= 1, and also: */ /* LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) */ /* For good performance, LWORK should generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: the algorithm for computing the SVD failed to converge; */ /* if INFO = i, i off-diagonal elements of an intermediate */ /* bidiagonal form did not converge to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --s; --work; /* Function Body */ *info = 0; minmn = min(*m,*n); maxmn = max(*m,*n); lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,maxmn)) { *info = -7; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV.) */ if (*info == 0) { minwrk = 1; maxwrk = 1; if (minmn > 0) { mm = *m; mnthr = _starpu_ilaenv_(&c__6, "DGELSS", " ", m, n, nrhs, &c_n1); if (*m >= *n && *m >= mnthr) { /* Path 1a - overdetermined, with many more rows than */ /* columns */ mm = *n; /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *nrhs * _starpu_ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, &c_n1); maxwrk = max(i__1,i__2); } if (*m >= *n) { /* Path 1 - overdetermined or exactly determined */ /* Compute workspace needed for DBDSQR */ /* Computing MAX */ i__1 = 1, i__2 = *n * 5; bdspac = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", &mm, n, &c_n1, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR" , "QLT", &mm, nrhs, n, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", n, n, n, &c_n1); maxwrk = max(i__1,i__2); maxwrk = max(maxwrk,bdspac); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *nrhs; maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, i__2); minwrk = max(i__1,bdspac); maxwrk = max(minwrk,maxwrk); } if (*n > *m) { /* Compute workspace needed for DBDSQR */ /* Computing MAX */ i__1 = 1, i__2 = *m * 5; bdspac = max(i__1,i__2); /* Computing MAX */ i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1, i__2); minwrk = max(i__1,bdspac); if (*n >= mnthr) { /* Path 2a - underdetermined, with many more columns */ /* than rows */ maxwrk = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + *m + bdspac; maxwrk = max(i__1,i__2); if (*nrhs > 1) { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; maxwrk = max(i__1,i__2); } else { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 1); maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = maxwrk, i__2 = *m + *nrhs * _starpu_ilaenv_(&c__1, "DORMLQ" , "LT", n, nrhs, m, &c_n1); maxwrk = max(i__1,i__2); } else { /* Path 2 - underdetermined */ maxwrk = *m * 3 + (*n + *m) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORG" "BR", "P", m, n, m, &c_n1); maxwrk = max(i__1,i__2); maxwrk = max(maxwrk,bdspac); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *nrhs; maxwrk = max(i__1,i__2); } } maxwrk = max(minwrk,maxwrk); } work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELSS", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { *rank = 0; return 0; } /* Get machine parameters */ eps = _starpu_dlamch_("P"); sfmin = _starpu_dlamch_("S"); smlnum = sfmin / eps; bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, &work[1]); iascl = 0; if (anrm > 0. && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); _starpu_dlaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b[b_offset], ldb); _starpu_dlaset_("F", &minmn, &c__1, &c_b74, &c_b74, &s[1], &c__1); *rank = 0; goto L70; } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = _starpu_dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); ibscl = 0; if (bnrm > 0. && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info); ibscl = 2; } /* Overdetermined case */ if (*m >= *n) { /* Path 1 - overdetermined or exactly determined */ mm = *m; if (*m >= mnthr) { /* Path 1a - overdetermined, with many more rows than columns */ mm = *n; itau = 1; iwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__1 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, info); /* Multiply B by transpose(Q) */ /* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ i__1 = *lwork - iwork + 1; _starpu_dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[iwork], &i__1, info); /* Zero out below R */ if (*n > 1) { i__1 = *n - 1; i__2 = *n - 1; _starpu_dlaset_("L", &i__1, &i__2, &c_b74, &c_b74, &a[a_dim1 + 2], lda); } } ie = 1; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in A */ /* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ i__1 = *lwork - iwork + 1; _starpu_dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[iwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors of R */ /* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ i__1 = *lwork - iwork + 1; _starpu_dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb, &work[iwork], &i__1, info); /* Generate right bidiagonalizing vectors of R in A */ /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ i__1 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], & i__1, info); iwork = ie + *n; /* Perform bidiagonal QR iteration */ /* multiply B by transpose of left singular vectors */ /* compute right singular vectors in A */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, vdum, &c__1, &b[b_offset], ldb, &work[iwork], info) ; if (*info != 0) { goto L70; } /* Multiply B by reciprocals of singular values */ /* Computing MAX */ d__1 = *rcond * s[1]; thr = max(d__1,sfmin); if (*rcond < 0.) { /* Computing MAX */ d__1 = eps * s[1]; thr = max(d__1,sfmin); } *rank = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (s[i__] > thr) { _starpu_drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); ++(*rank); } else { _starpu_dlaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b[i__ + b_dim1], ldb); } /* L10: */ } /* Multiply B by right singular vectors */ /* (Workspace: need N, prefer N*NRHS) */ if (*lwork >= *ldb * *nrhs && *nrhs > 1) { _starpu_dgemm_("T", "N", n, nrhs, n, &c_b108, &a[a_offset], lda, &b[ b_offset], ldb, &c_b74, &work[1], ldb); _starpu_dlacpy_("G", n, nrhs, &work[1], ldb, &b[b_offset], ldb) ; } else if (*nrhs > 1) { chunk = *lwork / *n; i__1 = *nrhs; i__2 = chunk; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = *nrhs - i__ + 1; bl = min(i__3,chunk); _starpu_dgemm_("T", "N", n, &bl, n, &c_b108, &a[a_offset], lda, &b[ i__ * b_dim1 + 1], ldb, &c_b74, &work[1], n); _starpu_dlacpy_("G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb); /* L20: */ } } else { _starpu_dgemv_("T", n, n, &c_b108, &a[a_offset], lda, &b[b_offset], &c__1, &c_b74, &work[1], &c__1); _starpu_dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1); } } else /* if(complicated condition) */ { /* Computing MAX */ i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2,i__1), i__2 = max( i__2,*nrhs), i__1 = *n - *m * 3; if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2,i__1)) { /* Path 2a - underdetermined, with many more columns than rows */ /* and sufficient workspace for an efficient algorithm */ ldwork = *m; /* Computing MAX */ /* Computing MAX */ i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3; i__2 = (*m << 2) + *m * *lda + max(i__3,i__4), i__1 = *m * *lda + *m + *m * *nrhs; if (*lwork >= max(i__2,i__1)) { ldwork = *lda; } itau = 1; iwork = *m + 1; /* Compute A=L*Q */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, info); il = iwork; /* Copy L to WORK(IL), zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); i__2 = *m - 1; i__1 = *m - 1; _starpu_dlaset_("U", &i__2, &i__1, &c_b74, &c_b74, &work[il + ldwork], & ldwork); ie = il + ldwork * *m; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in WORK(IL) */ /* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup], &work[iwork], &i__2, info); /* Multiply B by transpose of left bidiagonalizing vectors of L */ /* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ itauq], &b[b_offset], ldb, &work[iwork], &i__2, info); /* Generate right bidiagonalizing vectors of R in WORK(IL) */ /* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, m, m, &work[il], &ldwork, &work[itaup], &work[ iwork], &i__2, info); iwork = ie + *m; /* Perform bidiagonal QR iteration, */ /* computing right singular vectors of L in WORK(IL) and */ /* multiplying B by transpose of left singular vectors */ /* (Workspace: need M*M+M+BDSPAC) */ _starpu_dbdsqr_("U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], & ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork] , info); if (*info != 0) { goto L70; } /* Multiply B by reciprocals of singular values */ /* Computing MAX */ d__1 = *rcond * s[1]; thr = max(d__1,sfmin); if (*rcond < 0.) { /* Computing MAX */ d__1 = eps * s[1]; thr = max(d__1,sfmin); } *rank = 0; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (s[i__] > thr) { _starpu_drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); ++(*rank); } else { _starpu_dlaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b[i__ + b_dim1] , ldb); } /* L30: */ } iwork = ie; /* Multiply B by right singular vectors of L in WORK(IL) */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */ if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) { _starpu_dgemm_("T", "N", m, nrhs, m, &c_b108, &work[il], &ldwork, &b[ b_offset], ldb, &c_b74, &work[iwork], ldb); _starpu_dlacpy_("G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb); } else if (*nrhs > 1) { chunk = (*lwork - iwork + 1) / *m; i__2 = *nrhs; i__1 = chunk; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { /* Computing MIN */ i__3 = *nrhs - i__ + 1; bl = min(i__3,chunk); _starpu_dgemm_("T", "N", m, &bl, m, &c_b108, &work[il], &ldwork, & b[i__ * b_dim1 + 1], ldb, &c_b74, &work[iwork], m); _starpu_dlacpy_("G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1] , ldb); /* L40: */ } } else { _starpu_dgemv_("T", m, m, &c_b108, &work[il], &ldwork, &b[b_dim1 + 1], &c__1, &c_b74, &work[iwork], &c__1); _starpu_dcopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1); } /* Zero out below first M rows of B */ i__1 = *n - *m; _starpu_dlaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b[*m + 1 + b_dim1], ldb); iwork = itau + *m; /* Multiply transpose(Q) by B */ /* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ i__1 = *lwork - iwork + 1; _starpu_dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[iwork], &i__1, info); } else { /* Path 2 - remaining underdetermined cases */ ie = 1; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize A */ /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ i__1 = *lwork - iwork + 1; _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[iwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors */ /* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ i__1 = *lwork - iwork + 1; _starpu_dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] , &b[b_offset], ldb, &work[iwork], &i__1, info); /* Generate right bidiagonalizing vectors in A */ /* (Workspace: need 4*M, prefer 3*M+M*NB) */ i__1 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ iwork], &i__1, info); iwork = ie + *m; /* Perform bidiagonal QR iteration, */ /* computing right singular vectors of A in A and */ /* multiplying B by transpose of left singular vectors */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, vdum, &c__1, &b[b_offset], ldb, &work[iwork], info); if (*info != 0) { goto L70; } /* Multiply B by reciprocals of singular values */ /* Computing MAX */ d__1 = *rcond * s[1]; thr = max(d__1,sfmin); if (*rcond < 0.) { /* Computing MAX */ d__1 = eps * s[1]; thr = max(d__1,sfmin); } *rank = 0; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (s[i__] > thr) { _starpu_drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); ++(*rank); } else { _starpu_dlaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b[i__ + b_dim1] , ldb); } /* L50: */ } /* Multiply B by right singular vectors of A */ /* (Workspace: need N, prefer N*NRHS) */ if (*lwork >= *ldb * *nrhs && *nrhs > 1) { _starpu_dgemm_("T", "N", n, nrhs, m, &c_b108, &a[a_offset], lda, &b[ b_offset], ldb, &c_b74, &work[1], ldb); _starpu_dlacpy_("F", n, nrhs, &work[1], ldb, &b[b_offset], ldb); } else if (*nrhs > 1) { chunk = *lwork / *n; i__1 = *nrhs; i__2 = chunk; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = *nrhs - i__ + 1; bl = min(i__3,chunk); _starpu_dgemm_("T", "N", n, &bl, m, &c_b108, &a[a_offset], lda, & b[i__ * b_dim1 + 1], ldb, &c_b74, &work[1], n); _starpu_dlacpy_("F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb); /* L60: */ } } else { _starpu_dgemv_("T", m, n, &c_b108, &a[a_offset], lda, &b[b_offset], & c__1, &c_b74, &work[1], &c__1); _starpu_dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1); } } } /* Undo scaling */ if (iascl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info); _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } else if (iascl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info); _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } if (ibscl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } else if (ibscl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } L70: work[1] = (doublereal) maxwrk; return 0; /* End of DGELSS */ } /* _starpu_dgelss_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgelsx.c000066400000000000000000000322151507764646700206650ustar00rootroot00000000000000/* dgelsx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__0 = 0; static doublereal c_b13 = 0.; static integer c__2 = 2; static integer c__1 = 1; static doublereal c_b36 = 1.; /* Subroutine */ int _starpu_dgelsx_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; doublereal d__1; /* Local variables */ integer i__, j, k; doublereal c1, c2, s1, s2, t1, t2; integer mn; doublereal anrm, bnrm, smin, smax; integer iascl, ibscl, ismin, ismax; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaic1_( integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dorm2r_( char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dgeqpf_(integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dlatzm_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *); doublereal sminpr, smaxpr, smlnum; extern /* Subroutine */ int _starpu_dtzrqf_(integer *, integer *, doublereal *, integer *, doublereal *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This routine is deprecated and has been replaced by routine DGELSY. */ /* DGELSX computes the minimum-norm solution to a real linear least */ /* squares problem: */ /* minimize || A * X - B || */ /* using a complete orthogonal factorization of A. A is an M-by-N */ /* matrix which may be rank-deficient. */ /* Several right hand side vectors b and solution vectors x can be */ /* handled in a single call; they are stored as the columns of the */ /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ /* matrix X. */ /* The routine first computes a QR factorization with column pivoting: */ /* A * P = Q * [ R11 R12 ] */ /* [ 0 R22 ] */ /* with R11 defined as the largest leading submatrix whose estimated */ /* condition number is less than 1/RCOND. The order of R11, RANK, */ /* is the effective rank of A. */ /* Then, R22 is considered to be negligible, and R12 is annihilated */ /* by orthogonal transformations from the right, arriving at the */ /* complete orthogonal factorization: */ /* A * P = Q * [ T11 0 ] * Z */ /* [ 0 0 ] */ /* The minimum-norm solution is then */ /* X = P * Z' [ inv(T11)*Q1'*B ] */ /* [ 0 ] */ /* where Q1 consists of the first RANK columns of Q. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of */ /* columns of matrices B and X. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, A has been overwritten by details of its */ /* complete orthogonal factorization. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the M-by-NRHS right hand side matrix B. */ /* On exit, the N-by-NRHS solution matrix X. */ /* If m >= n and RANK = n, the residual sum-of-squares for */ /* the solution in the i-th column is given by the sum of */ /* squares of elements N+1:M in that column. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,M,N). */ /* JPVT (input/output) INTEGER array, dimension (N) */ /* On entry, if JPVT(i) .ne. 0, the i-th column of A is an */ /* initial column, otherwise it is a free column. Before */ /* the QR factorization of A, all initial columns are */ /* permuted to the leading positions; only the remaining */ /* free columns are moved as a result of column pivoting */ /* during the factorization. */ /* On exit, if JPVT(i) = k, then the i-th column of A*P */ /* was the k-th column of A. */ /* RCOND (input) DOUBLE PRECISION */ /* RCOND is used to determine the effective rank of A, which */ /* is defined as the order of the largest leading triangular */ /* submatrix R11 in the QR factorization with pivoting of A, */ /* whose estimated condition number < 1/RCOND. */ /* RANK (output) INTEGER */ /* The effective rank of A, i.e., the order of the submatrix */ /* R11. This is the same as the order of the submatrix T11 */ /* in the complete orthogonal factorization of A. */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (max( min(M,N)+3*N, 2*min(M,N)+NRHS )), */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --jpvt; --work; /* Function Body */ mn = min(*m,*n); ismin = mn + 1; ismax = (mn << 1) + 1; /* Test the input arguments. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m); if (*ldb < max(i__1,*n)) { *info = -7; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELSX", &i__1); return 0; } /* Quick return if possible */ /* Computing MIN */ i__1 = min(*m,*n); if (min(i__1,*nrhs) == 0) { *rank = 0; return 0; } /* Get machine parameters */ smlnum = _starpu_dlamch_("S") / _starpu_dlamch_("P"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); /* Scale A, B if max elements outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, &work[1]); iascl = 0; if (anrm > 0. && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); _starpu_dlaset_("F", &i__1, nrhs, &c_b13, &c_b13, &b[b_offset], ldb); *rank = 0; goto L100; } bnrm = _starpu_dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); ibscl = 0; if (bnrm > 0. && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info); ibscl = 2; } /* Compute QR factorization with column pivoting of A: */ /* A * P = Q * R */ _starpu_dgeqpf_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], info); /* workspace 3*N. Details of Householder rotations stored */ /* in WORK(1:MN). */ /* Determine RANK using incremental condition estimation */ work[ismin] = 1.; work[ismax] = 1.; smax = (d__1 = a[a_dim1 + 1], abs(d__1)); smin = smax; if ((d__1 = a[a_dim1 + 1], abs(d__1)) == 0.) { *rank = 0; i__1 = max(*m,*n); _starpu_dlaset_("F", &i__1, nrhs, &c_b13, &c_b13, &b[b_offset], ldb); goto L100; } else { *rank = 1; } L10: if (*rank < mn) { i__ = *rank + 1; _starpu_dlaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[ i__ + i__ * a_dim1], &sminpr, &s1, &c1); _starpu_dlaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[ i__ + i__ * a_dim1], &smaxpr, &s2, &c2); if (smaxpr * *rcond <= sminpr) { i__1 = *rank; for (i__ = 1; i__ <= i__1; ++i__) { work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1]; work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1]; /* L20: */ } work[ismin + *rank] = c1; work[ismax + *rank] = c2; smin = sminpr; smax = smaxpr; ++(*rank); goto L10; } } /* Logically partition R = [ R11 R12 ] */ /* [ 0 R22 ] */ /* where R11 = R(1:RANK,1:RANK) */ /* [R11,R12] = [ T11, 0 ] * Y */ if (*rank < *n) { _starpu_dtzrqf_(rank, n, &a[a_offset], lda, &work[mn + 1], info); } /* Details of Householder rotations stored in WORK(MN+1:2*MN) */ /* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ _starpu_dorm2r_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], & b[b_offset], ldb, &work[(mn << 1) + 1], info); /* workspace NRHS */ /* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */ _starpu_dtrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b36, & a[a_offset], lda, &b[b_offset], ldb); i__1 = *n; for (i__ = *rank + 1; i__ <= i__1; ++i__) { i__2 = *nrhs; for (j = 1; j <= i__2; ++j) { b[i__ + j * b_dim1] = 0.; /* L30: */ } /* L40: */ } /* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) */ if (*rank < *n) { i__1 = *rank; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *n - *rank + 1; _starpu_dlatzm_("Left", &i__2, nrhs, &a[i__ + (*rank + 1) * a_dim1], lda, &work[mn + i__], &b[i__ + b_dim1], &b[*rank + 1 + b_dim1], ldb, &work[(mn << 1) + 1]); /* L50: */ } } /* workspace NRHS */ /* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[(mn << 1) + i__] = 1.; /* L60: */ } i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[(mn << 1) + i__] == 1.) { if (jpvt[i__] != i__) { k = i__; t1 = b[k + j * b_dim1]; t2 = b[jpvt[k] + j * b_dim1]; L70: b[jpvt[k] + j * b_dim1] = t1; work[(mn << 1) + k] = 0.; t1 = t2; k = jpvt[k]; t2 = b[jpvt[k] + j * b_dim1]; if (jpvt[k] != i__) { goto L70; } b[i__ + j * b_dim1] = t1; work[(mn << 1) + k] = 0.; } } /* L80: */ } /* L90: */ } /* Undo scaling */ if (iascl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info); _starpu_dlascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], lda, info); } else if (iascl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info); _starpu_dlascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], lda, info); } if (ibscl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } else if (ibscl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } L100: return 0; /* End of DGELSX */ } /* _starpu_dgelsx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgelsy.c000066400000000000000000000371321507764646700206710ustar00rootroot00000000000000/* dgelsy.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__0 = 0; static doublereal c_b31 = 0.; static integer c__2 = 2; static doublereal c_b54 = 1.; /* Subroutine */ int _starpu_dgelsy_(integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ integer i__, j; doublereal c1, c2, s1, s2; integer nb, mn, nb1, nb2, nb3, nb4; doublereal anrm, bnrm, smin, smax; integer iascl, ibscl; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer ismin, ismax; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaic1_( integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal wsize; extern /* Subroutine */ int _starpu_dgeqp3_(integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); doublereal bignum; integer lwkmin; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal sminpr, smaxpr, smlnum; extern /* Subroutine */ int _starpu_dormrz_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; extern /* Subroutine */ int _starpu_dtzrzf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELSY computes the minimum-norm solution to a real linear least */ /* squares problem: */ /* minimize || A * X - B || */ /* using a complete orthogonal factorization of A. A is an M-by-N */ /* matrix which may be rank-deficient. */ /* Several right hand side vectors b and solution vectors x can be */ /* handled in a single call; they are stored as the columns of the */ /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ /* matrix X. */ /* The routine first computes a QR factorization with column pivoting: */ /* A * P = Q * [ R11 R12 ] */ /* [ 0 R22 ] */ /* with R11 defined as the largest leading submatrix whose estimated */ /* condition number is less than 1/RCOND. The order of R11, RANK, */ /* is the effective rank of A. */ /* Then, R22 is considered to be negligible, and R12 is annihilated */ /* by orthogonal transformations from the right, arriving at the */ /* complete orthogonal factorization: */ /* A * P = Q * [ T11 0 ] * Z */ /* [ 0 0 ] */ /* The minimum-norm solution is then */ /* X = P * Z' [ inv(T11)*Q1'*B ] */ /* [ 0 ] */ /* where Q1 consists of the first RANK columns of Q. */ /* This routine is basically identical to the original xGELSX except */ /* three differences: */ /* o The call to the subroutine xGEQPF has been substituted by the */ /* the call to the subroutine xGEQP3. This subroutine is a Blas-3 */ /* version of the QR factorization with column pivoting. */ /* o Matrix B (the right hand side) is updated with Blas-3. */ /* o The permutation of matrix B (the right hand side) is faster and */ /* more simple. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of */ /* columns of matrices B and X. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, A has been overwritten by details of its */ /* complete orthogonal factorization. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the M-by-NRHS right hand side matrix B. */ /* On exit, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,M,N). */ /* JPVT (input/output) INTEGER array, dimension (N) */ /* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */ /* to the front of AP, otherwise column i is a free column. */ /* On exit, if JPVT(i) = k, then the i-th column of AP */ /* was the k-th column of A. */ /* RCOND (input) DOUBLE PRECISION */ /* RCOND is used to determine the effective rank of A, which */ /* is defined as the order of the largest leading triangular */ /* submatrix R11 in the QR factorization with pivoting of A, */ /* whose estimated condition number < 1/RCOND. */ /* RANK (output) INTEGER */ /* The effective rank of A, i.e., the order of the submatrix */ /* R11. This is the same as the order of the submatrix T11 */ /* in the complete orthogonal factorization of A. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* The unblocked strategy requires that: */ /* LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */ /* where MN = min( M, N ). */ /* The block algorithm requires that: */ /* LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */ /* where NB is an upper bound on the blocksize returned */ /* by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR, */ /* and DORMRZ. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: If INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ /* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --jpvt; --work; /* Function Body */ mn = min(*m,*n); ismin = mn + 1; ismax = (mn << 1) + 1; /* Test the input arguments. */ *info = 0; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m); if (*ldb < max(i__1,*n)) { *info = -7; } } /* Figure out optimal block size */ if (*info == 0) { if (mn == 0 || *nrhs == 0) { lwkmin = 1; lwkopt = 1; } else { nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); nb2 = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); nb3 = _starpu_ilaenv_(&c__1, "DORMQR", " ", m, n, nrhs, &c_n1); nb4 = _starpu_ilaenv_(&c__1, "DORMRQ", " ", m, n, nrhs, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); nb = max(i__1,nb4); /* Computing MAX */ i__1 = mn << 1, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = mn + *nrhs; lwkmin = mn + max(i__1,i__2); /* Computing MAX */ i__1 = lwkmin, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = max( i__1,i__2), i__2 = (mn << 1) + nb * *nrhs; lwkopt = max(i__1,i__2); } work[1] = (doublereal) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGELSY", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (mn == 0 || *nrhs == 0) { *rank = 0; return 0; } /* Get machine parameters */ smlnum = _starpu_dlamch_("S") / _starpu_dlamch_("P"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); /* Scale A, B if max entries outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, &work[1]); iascl = 0; if (anrm > 0. && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); _starpu_dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); *rank = 0; goto L70; } bnrm = _starpu_dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); ibscl = 0; if (bnrm > 0. && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM */ _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info); ibscl = 2; } /* Compute QR factorization with column pivoting of A: */ /* A * P = Q * R */ i__1 = *lwork - mn; _starpu_dgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1, info); wsize = mn + work[mn + 1]; /* workspace: MN+2*N+NB*(N+1). */ /* Details of Householder rotations stored in WORK(1:MN). */ /* Determine RANK using incremental condition estimation */ work[ismin] = 1.; work[ismax] = 1.; smax = (d__1 = a[a_dim1 + 1], abs(d__1)); smin = smax; if ((d__1 = a[a_dim1 + 1], abs(d__1)) == 0.) { *rank = 0; i__1 = max(*m,*n); _starpu_dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); goto L70; } else { *rank = 1; } L10: if (*rank < mn) { i__ = *rank + 1; _starpu_dlaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[ i__ + i__ * a_dim1], &sminpr, &s1, &c1); _starpu_dlaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[ i__ + i__ * a_dim1], &smaxpr, &s2, &c2); if (smaxpr * *rcond <= sminpr) { i__1 = *rank; for (i__ = 1; i__ <= i__1; ++i__) { work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1]; work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1]; /* L20: */ } work[ismin + *rank] = c1; work[ismax + *rank] = c2; smin = sminpr; smax = smaxpr; ++(*rank); goto L10; } } /* workspace: 3*MN. */ /* Logically partition R = [ R11 R12 ] */ /* [ 0 R22 ] */ /* where R11 = R(1:RANK,1:RANK) */ /* [R11,R12] = [ T11, 0 ] * Y */ if (*rank < *n) { i__1 = *lwork - (mn << 1); _starpu_dtzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + 1], &i__1, info); } /* workspace: 2*MN. */ /* Details of Householder rotations stored in WORK(MN+1:2*MN) */ /* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ i__1 = *lwork - (mn << 1); _starpu_dormqr_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], & b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info); /* Computing MAX */ d__1 = wsize, d__2 = (mn << 1) + work[(mn << 1) + 1]; wsize = max(d__1,d__2); /* workspace: 2*MN+NB*NRHS. */ /* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */ _starpu_dtrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b54, & a[a_offset], lda, &b[b_offset], ldb); i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = *rank + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L30: */ } /* L40: */ } /* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) */ if (*rank < *n) { i__1 = *n - *rank; i__2 = *lwork - (mn << 1); _starpu_dormrz_("Left", "Transpose", n, nrhs, rank, &i__1, &a[a_offset], lda, &work[mn + 1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__2, info); } /* workspace: 2*MN+NRHS. */ /* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[jpvt[i__]] = b[i__ + j * b_dim1]; /* L50: */ } _starpu_dcopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1); /* L60: */ } /* workspace: N. */ /* Undo scaling */ if (iascl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info); _starpu_dlascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], lda, info); } else if (iascl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info); _starpu_dlascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], lda, info); } if (ibscl == 1) { _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } else if (ibscl == 2) { _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } L70: work[1] = (doublereal) lwkopt; return 0; /* End of DGELSY */ } /* _starpu_dgelsy_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeql2.c000066400000000000000000000111231507764646700205500ustar00rootroot00000000000000/* dgeql2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgeql2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, k; doublereal aii; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEQL2 computes a QL factorization of a real m by n matrix A: */ /* A = Q * L. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the m by n matrix A. */ /* On exit, if m >= n, the lower triangle of the subarray */ /* A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; */ /* if m <= n, the elements on and below the (n-m)-th */ /* superdiagonal contain the m by n lower trapezoidal matrix L; */ /* the remaining elements, with the array TAU, represent the */ /* orthogonal matrix Q as a product of elementary reflectors */ /* (see Further Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */ /* A(1:m-k+i-1,n-k+i), and tau in TAU(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEQL2", &i__1); return 0; } k = min(*m,*n); for (i__ = k; i__ >= 1; --i__) { /* Generate elementary reflector H(i) to annihilate */ /* A(1:m-k+i-1,n-k+i) */ i__1 = *m - k + i__; _starpu_dlarfp_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[(*n - k + i__) * a_dim1 + 1], &c__1, &tau[i__]); /* Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left */ aii = a[*m - k + i__ + (*n - k + i__) * a_dim1]; a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.; i__1 = *m - k + i__; i__2 = *n - k + i__ - 1; _starpu_dlarf_("Left", &i__1, &i__2, &a[(*n - k + i__) * a_dim1 + 1], &c__1, & tau[i__], &a[a_offset], lda, &work[1]); a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii; /* L10: */ } return 0; /* End of DGEQL2 */ } /* _starpu_dgeql2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeqlf.c000066400000000000000000000173341507764646700206460ustar00rootroot00000000000000/* dgeqlf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dgeqlf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, k, ib, nb, ki, kk, mu, nu, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dgeql2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEQLF computes a QL factorization of a real M-by-N matrix A: */ /* A = Q * L. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, */ /* if m >= n, the lower triangle of the subarray */ /* A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; */ /* if m <= n, the elements on and below the (n-m)-th */ /* superdiagonal contain the M-by-N lower trapezoidal matrix L; */ /* the remaining elements, with the array TAU, represent the */ /* orthogonal matrix Q as a product of elementary reflectors */ /* (see Further Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N). */ /* For optimum performance LWORK >= N*NB, where NB is the */ /* optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(k) . . . H(2) H(1), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */ /* A(1:m-k+i-1,n-k+i), and tau in TAU(i). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info == 0) { k = min(*m,*n); if (k == 0) { lwkopt = 1; } else { nb = _starpu_ilaenv_(&c__1, "DGEQLF", " ", m, n, &c_n1, &c_n1); lwkopt = *n * nb; } work[1] = (doublereal) lwkopt; if (*lwork < max(1,*n) && ! lquery) { *info = -7; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEQLF", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (k == 0) { return 0; } nbmin = 2; nx = 1; iws = *n; if (nb > 1 && nb < k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQLF", " ", m, n, &c_n1, &c_n1); nx = max(i__1,i__2); if (nx < k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *n; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQLF", " ", m, n, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < k && nx < k) { /* Use blocked code initially. */ /* The last kk columns are handled by the block method. */ ki = (k - nx - 1) / nb * nb; /* Computing MIN */ i__1 = k, i__2 = ki + nb; kk = min(i__1,i__2); i__1 = k - kk + 1; i__2 = -nb; for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = k - i__ + 1; ib = min(i__3,nb); /* Compute the QL factorization of the current block */ /* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */ i__3 = *m - k + i__ + ib - 1; _starpu_dgeql2_(&i__3, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &tau[ i__], &work[1], &iinfo); if (*n - k + i__ > 1) { /* Form the triangular factor of the block reflector */ /* H = H(i+ib-1) . . . H(i+1) H(i) */ i__3 = *m - k + i__ + ib - 1; _starpu_dlarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork); /* Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ i__3 = *m - k + i__ + ib - 1; i__4 = *n - k + i__ - 1; _starpu_dlarfb_("Left", "Transpose", "Backward", "Columnwise", &i__3, &i__4, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, & work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], & ldwork); } /* L10: */ } mu = *m - k + i__ + nb - 1; nu = *n - k + i__ + nb - 1; } else { mu = *m; nu = *n; } /* Use unblocked code to factor the last or only block */ if (mu > 0 && nu > 0) { _starpu_dgeql2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo); } work[1] = (doublereal) iws; return 0; /* End of DGEQLF */ } /* _starpu_dgeqlf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeqp3.c000066400000000000000000000236131507764646700205640ustar00rootroot00000000000000/* dgeqp3.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dgeqp3_(integer *m, integer *n, doublereal *a, integer * lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer j, jb, na, nb, sm, sn, nx, fjb, iws, nfxd; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); integer nbmin, minmn; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer minws; extern /* Subroutine */ int _starpu_dlaqp2_(integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dlaqps_(integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); integer topbmn, sminmn; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEQP3 computes a QR factorization with column pivoting of a */ /* matrix A: A*P = Q*R using Level 3 BLAS. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the upper triangle of the array contains the */ /* min(M,N)-by-N upper trapezoidal matrix R; the elements below */ /* the diagonal, together with the array TAU, represent the */ /* orthogonal matrix Q as a product of min(M,N) elementary */ /* reflectors. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* JPVT (input/output) INTEGER array, dimension (N) */ /* On entry, if JPVT(J).ne.0, the J-th column of A is permuted */ /* to the front of A*P (a leading column); if JPVT(J)=0, */ /* the J-th column of A is a free column. */ /* On exit, if JPVT(J)=K, then the J-th column of A*P was the */ /* the K-th column of A. */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO=0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= 3*N+1. */ /* For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB */ /* is the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real/complex scalar, and v is a real/complex vector */ /* with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in */ /* A(i+1:m,i), and tau in TAU(i). */ /* Based on contributions by */ /* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ /* X. Sun, Computer Science Dept., Duke University, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test input arguments */ /* ==================== */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --jpvt; --tau; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info == 0) { minmn = min(*m,*n); if (minmn == 0) { iws = 1; lwkopt = 1; } else { iws = *n * 3 + 1; nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); lwkopt = (*n << 1) + (*n + 1) * nb; } work[1] = (doublereal) lwkopt; if (*lwork < iws && ! lquery) { *info = -8; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEQP3", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible. */ if (minmn == 0) { return 0; } /* Move initial columns up front. */ nfxd = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (jpvt[j] != 0) { if (j != nfxd) { _starpu_dswap_(m, &a[j * a_dim1 + 1], &c__1, &a[nfxd * a_dim1 + 1], & c__1); jpvt[j] = jpvt[nfxd]; jpvt[nfxd] = j; } else { jpvt[j] = j; } ++nfxd; } else { jpvt[j] = j; } /* L10: */ } --nfxd; /* Factorize fixed columns */ /* ======================= */ /* Compute the QR factorization of fixed columns and update */ /* remaining columns. */ if (nfxd > 0) { na = min(*m,nfxd); /* CC CALL DGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) */ _starpu_dgeqrf_(m, &na, &a[a_offset], lda, &tau[1], &work[1], lwork, info); /* Computing MAX */ i__1 = iws, i__2 = (integer) work[1]; iws = max(i__1,i__2); if (na < *n) { /* CC CALL DORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, */ /* CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) */ i__1 = *n - na; _starpu_dormqr_("Left", "Transpose", m, &i__1, &na, &a[a_offset], lda, & tau[1], &a[(na + 1) * a_dim1 + 1], lda, &work[1], lwork, info); /* Computing MAX */ i__1 = iws, i__2 = (integer) work[1]; iws = max(i__1,i__2); } } /* Factorize free columns */ /* ====================== */ if (nfxd < minmn) { sm = *m - nfxd; sn = *n - nfxd; sminmn = minmn - nfxd; /* Determine the block size. */ nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", &sm, &sn, &c_n1, &c_n1); nbmin = 2; nx = 0; if (nb > 1 && nb < sminmn) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQRF", " ", &sm, &sn, &c_n1, & c_n1); nx = max(i__1,i__2); if (nx < sminmn) { /* Determine if workspace is large enough for blocked code. */ minws = (sn << 1) + (sn + 1) * nb; iws = max(iws,minws); if (*lwork < minws) { /* Not enough workspace to use optimal NB: Reduce NB and */ /* determine the minimum value of NB. */ nb = (*lwork - (sn << 1)) / (sn + 1); /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQRF", " ", &sm, &sn, & c_n1, &c_n1); nbmin = max(i__1,i__2); } } } /* Initialize partial column norms. The first N elements of work */ /* store the exact column norms. */ i__1 = *n; for (j = nfxd + 1; j <= i__1; ++j) { work[j] = _starpu_dnrm2_(&sm, &a[nfxd + 1 + j * a_dim1], &c__1); work[*n + j] = work[j]; /* L20: */ } if (nb >= nbmin && nb < sminmn && nx < sminmn) { /* Use blocked code initially. */ j = nfxd + 1; /* Compute factorization: while loop. */ topbmn = minmn - nx; L30: if (j <= topbmn) { /* Computing MIN */ i__1 = nb, i__2 = topbmn - j + 1; jb = min(i__1,i__2); /* Factorize JB columns among columns J:N. */ i__1 = *n - j + 1; i__2 = j - 1; i__3 = *n - j + 1; _starpu_dlaqps_(m, &i__1, &i__2, &jb, &fjb, &a[j * a_dim1 + 1], lda, & jpvt[j], &tau[j], &work[j], &work[*n + j], &work[(*n << 1) + 1], &work[(*n << 1) + jb + 1], &i__3); j += fjb; goto L30; } } else { j = nfxd + 1; } /* Use unblocked code to factor the last or only block. */ if (j <= minmn) { i__1 = *n - j + 1; i__2 = j - 1; _starpu_dlaqp2_(m, &i__1, &i__2, &a[j * a_dim1 + 1], lda, &jpvt[j], &tau[ j], &work[j], &work[*n + j], &work[(*n << 1) + 1]); } } work[1] = (doublereal) iws; return 0; /* End of DGEQP3 */ } /* _starpu_dgeqp3_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeqpf.c000066400000000000000000000207761507764646700206560ustar00rootroot00000000000000/* dgeqpf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgeqpf_(integer *m, integer *n, doublereal *a, integer * lda, integer *jpvt, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, ma, mn; doublereal aii; integer pvt; doublereal temp; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); doublereal temp2, tol3z; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); integer itemp; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dorm2r_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK deprecated driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This routine is deprecated and has been replaced by routine DGEQP3. */ /* DGEQPF computes a QR factorization with column pivoting of a */ /* real M-by-N matrix A: A*P = Q*R. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0 */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the upper triangle of the array contains the */ /* min(M,N)-by-N upper triangular matrix R; the elements */ /* below the diagonal, together with the array TAU, */ /* represent the orthogonal matrix Q as a product of */ /* min(m,n) elementary reflectors. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* JPVT (input/output) INTEGER array, dimension (N) */ /* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */ /* to the front of A*P (a leading column); if JPVT(i) = 0, */ /* the i-th column of A is a free column. */ /* On exit, if JPVT(i) = k, then the i-th column of A*P */ /* was the k-th column of A. */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(n) */ /* Each H(i) has the form */ /* H = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). */ /* The matrix P is represented in jpvt as follows: If */ /* jpvt(j) = i */ /* then the jth column of P is the ith canonical unit vector. */ /* Partial column norm updating strategy modified by */ /* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */ /* University of Zagreb, Croatia. */ /* June 2006. */ /* For more details see LAPACK Working Note 176. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --jpvt; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEQPF", &i__1); return 0; } mn = min(*m,*n); tol3z = sqrt(_starpu_dlamch_("Epsilon")); /* Move initial columns up front */ itemp = 1; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (jpvt[i__] != 0) { if (i__ != itemp) { _starpu_dswap_(m, &a[i__ * a_dim1 + 1], &c__1, &a[itemp * a_dim1 + 1], &c__1); jpvt[i__] = jpvt[itemp]; jpvt[itemp] = i__; } else { jpvt[i__] = i__; } ++itemp; } else { jpvt[i__] = i__; } /* L10: */ } --itemp; /* Compute the QR factorization and update remaining columns */ if (itemp > 0) { ma = min(itemp,*m); _starpu_dgeqr2_(m, &ma, &a[a_offset], lda, &tau[1], &work[1], info); if (ma < *n) { i__1 = *n - ma; _starpu_dorm2r_("Left", "Transpose", m, &i__1, &ma, &a[a_offset], lda, & tau[1], &a[(ma + 1) * a_dim1 + 1], lda, &work[1], info); } } if (itemp < mn) { /* Initialize partial column norms. The first n elements of */ /* work store the exact column norms. */ i__1 = *n; for (i__ = itemp + 1; i__ <= i__1; ++i__) { i__2 = *m - itemp; work[i__] = _starpu_dnrm2_(&i__2, &a[itemp + 1 + i__ * a_dim1], &c__1); work[*n + i__] = work[i__]; /* L20: */ } /* Compute factorization */ i__1 = mn; for (i__ = itemp + 1; i__ <= i__1; ++i__) { /* Determine ith pivot column and swap if necessary */ i__2 = *n - i__ + 1; pvt = i__ - 1 + _starpu_idamax_(&i__2, &work[i__], &c__1); if (pvt != i__) { _starpu_dswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], & c__1); itemp = jpvt[pvt]; jpvt[pvt] = jpvt[i__]; jpvt[i__] = itemp; work[pvt] = work[i__]; work[*n + pvt] = work[*n + i__]; } /* Generate elementary reflector H(i) */ if (i__ < *m) { i__2 = *m - i__ + 1; _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[i__]); } else { _starpu_dlarfp_(&c__1, &a[*m + *m * a_dim1], &a[*m + *m * a_dim1], & c__1, &tau[*m]); } if (i__ < *n) { /* Apply H(i) to A(i:m,i+1:n) from the left */ aii = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; i__2 = *m - i__ + 1; i__3 = *n - i__; _starpu_dlarf_("LEFT", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & tau[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[(* n << 1) + 1]); a[i__ + i__ * a_dim1] = aii; } /* Update partial column norms */ i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { if (work[j] != 0.) { /* NOTE: The following 4 lines follow from the analysis in */ /* Lapack Working Note 176. */ temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)) / work[j]; /* Computing MAX */ d__1 = 0., d__2 = (temp + 1.) * (1. - temp); temp = max(d__1,d__2); /* Computing 2nd power */ d__1 = work[j] / work[*n + j]; temp2 = temp * (d__1 * d__1); if (temp2 <= tol3z) { if (*m - i__ > 0) { i__3 = *m - i__; work[j] = _starpu_dnrm2_(&i__3, &a[i__ + 1 + j * a_dim1], &c__1); work[*n + j] = work[j]; } else { work[j] = 0.; work[*n + j] = 0.; } } else { work[j] *= sqrt(temp); } } /* L30: */ } /* L40: */ } } return 0; /* End of DGEQPF */ } /* _starpu_dgeqpf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeqr2.c000066400000000000000000000107321507764646700205630ustar00rootroot00000000000000/* dgeqr2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgeqr2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, k; doublereal aii; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEQR2 computes a QR factorization of a real m by n matrix A: */ /* A = Q * R. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the m by n matrix A. */ /* On exit, the elements on and above the diagonal of the array */ /* contain the min(m,n) by n upper trapezoidal matrix R (R is */ /* upper triangular if m >= n); the elements below the diagonal, */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of elementary reflectors (see Further Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ /* and tau in TAU(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEQR2", &i__1); return 0; } k = min(*m,*n); i__1 = k; for (i__ = 1; i__ <= i__1; ++i__) { /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ i__2 = *m - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; _starpu_dlarfp_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m)+ i__ * a_dim1] , &c__1, &tau[i__]); if (i__ < *n) { /* Apply H(i) to A(i:m,i+1:n) from the left */ aii = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; i__2 = *m - i__ + 1; i__3 = *n - i__; _starpu_dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); a[i__ + i__ * a_dim1] = aii; } /* L10: */ } return 0; /* End of DGEQR2 */ } /* _starpu_dgeqr2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgeqrf.c000066400000000000000000000164271507764646700206560ustar00rootroot00000000000000/* dgeqrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dgeqrf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, k, ib, nb, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGEQRF computes a QR factorization of a real M-by-N matrix A: */ /* A = Q * R. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the elements on and above the diagonal of the array */ /* contain the min(M,N)-by-N upper trapezoidal matrix R (R is */ /* upper triangular if m >= n); the elements below the diagonal, */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of min(m,n) elementary reflectors (see Further */ /* Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N). */ /* For optimum performance LWORK >= N*NB, where NB is */ /* the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ /* and tau in TAU(i). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; nb = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); lwkopt = *n * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } else if (*lwork < max(1,*n) && ! lquery) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGEQRF", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ k = min(*m,*n); if (k == 0) { work[1] = 1.; return 0; } nbmin = 2; nx = 0; iws = *n; if (nb > 1 && nb < k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1); nx = max(i__1,i__2); if (nx < k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *n; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < k && nx < k) { /* Use blocked code initially */ i__1 = k - nx; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = k - i__ + 1; ib = min(i__3,nb); /* Compute the QR factorization of the current block */ /* A(i:m,i:i+ib-1) */ i__3 = *m - i__ + 1; _starpu_dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ 1], &iinfo); if (i__ + ib <= *n) { /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__3 = *m - i__ + 1; _starpu_dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &ldwork); /* Apply H' to A(i:m,i+ib:n) from the left */ i__3 = *m - i__ + 1; i__4 = *n - i__ - ib + 1; _starpu_dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + 1], &ldwork); } /* L10: */ } } else { i__ = 1; } /* Use unblocked code to factor the last or only block. */ if (i__ <= k) { i__2 = *m - i__ + 1; i__1 = *n - i__ + 1; _starpu_dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] , &iinfo); } work[1] = (doublereal) iws; return 0; /* End of DGEQRF */ } /* _starpu_dgeqrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgerfs.c000066400000000000000000000304551507764646700206550ustar00rootroot00000000000000/* dgerfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b15 = -1.; static doublereal c_b17 = 1.; /* Subroutine */ int _starpu_dgerfs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k; doublereal s, xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer isave[3]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer count; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dgetrs_( char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); logical notran; char transt[1]; doublereal lstres; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGERFS improves the computed solution to a system of linear */ /* equations and provides error bounds and backward error estimates for */ /* the solution. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The original N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The factors L and U from the factorization A = P*L*U */ /* as computed by DGETRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ /* matrix was interchanged with row IPIV(i). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DGETRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Internal Parameters */ /* =================== */ /* ITMAX is the maximum number of steps of iterative refinement. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; notran = _starpu_lsame_(trans, "N"); if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldaf < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGERFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, */ /* where op(A) = A, A**T, or A**H, depending on TRANS. */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dgemv_(trans, n, n, &c_b15, &a[a_offset], lda, &x[j * x_dim1 + 1], & c__1, &c_b17, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L30: */ } /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; /* L40: */ } /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ i__ + j * x_dim1], abs(d__2)); /* L60: */ } work[k] += s; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if */ /* 1) The residual BERR(J) is larger than machine epsilon, and */ /* 2) BERR(J) decreased by at least a factor of 2 during the */ /* last iteration, and */ /* 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ _starpu_dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n + 1], n, info); _starpu_daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) ; lstres = berr[j]; ++count; goto L20; } /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(op(A)))* */ /* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(op(A)) is the inverse of op(A) */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(op(A)) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T). */ _starpu_dgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L110: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L120: */ } _starpu_dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DGERFS */ } /* _starpu_dgerfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgerfsx.c000066400000000000000000000641351507764646700210470ustar00rootroot00000000000000/* dgerfsx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c_n1 = -1; static integer c__0 = 0; static integer c__1 = 1; /* Subroutine */ int _starpu_dgerfsx_(char *trans, char *equed, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; integer ref_type__; extern integer _starpu_ilatrans_(char *); integer j; doublereal rcond_tmp__; integer prec_type__, trans_type__; extern doublereal _starpu_dla_gercond__(char *, integer *, doublereal *, integer * , doublereal *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen); doublereal cwise_wrong__; extern /* Subroutine */ int _starpu_dla_gerfsx_extended__(integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, logical *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *); char norm[1]; logical ignore_cwise__; extern logical _starpu_lsame_(char *, char *); doublereal anorm; extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgecon_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); logical colequ, notran, rowequ; extern integer _starpu_ilaprec_(char *); integer ithresh, n_norms__; doublereal rthresh; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGERFSX improves the computed solution to a system of linear */ /* equations and provides error bounds and backward error estimates */ /* for the solution. In addition to normwise error bound, the code */ /* provides maximum componentwise error bound if possible. See */ /* comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */ /* error bounds. */ /* The original system of linear equations may have been equilibrated */ /* before calling this routine, as described by arguments EQUED, R */ /* and C below. In this case, the solution and error bounds returned */ /* are for the original unequilibrated system. */ /* Arguments */ /* ========= */ /* Some optional parameters are bundled in the PARAMS array. These */ /* settings determine how refinement is performed, but often the */ /* defaults are acceptable. If the defaults are acceptable, users */ /* can pass NPARAMS = 0 which prevents the source code from accessing */ /* the PARAMS argument. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* EQUED (input) CHARACTER*1 */ /* Specifies the form of equilibration that was done to A */ /* before calling this routine. This is needed to compute */ /* the solution and error bounds correctly. */ /* = 'N': No equilibration */ /* = 'R': Row equilibration, i.e., A has been premultiplied by */ /* diag(R). */ /* = 'C': Column equilibration, i.e., A has been postmultiplied */ /* by diag(C). */ /* = 'B': Both row and column equilibration, i.e., A has been */ /* replaced by diag(R) * A * diag(C). */ /* The right hand side B has been changed accordingly. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The original N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The factors L and U from the factorization A = P*L*U */ /* as computed by DGETRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ /* matrix was interchanged with row IPIV(i). */ /* R (input or output) DOUBLE PRECISION array, dimension (N) */ /* The row scale factors for A. If EQUED = 'R' or 'B', A is */ /* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ /* is not accessed. R is an input argument if FACT = 'F'; */ /* otherwise, R is an output argument. If FACT = 'F' and */ /* EQUED = 'R' or 'B', each element of R must be positive. */ /* If R is output, each element of R is a power of the radix. */ /* If R is input, each element of R should be a power of the radix */ /* to ensure a reliable solution and error estimates. Scaling by */ /* powers of the radix does not cause rounding errors unless the */ /* result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* C (input or output) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If EQUED = 'C' or 'B', A is */ /* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ /* is not accessed. C is an input argument if FACT = 'F'; */ /* otherwise, C is an output argument. If FACT = 'F' and */ /* EQUED = 'C' or 'B', each element of C must be positive. */ /* If C is output, each element of C is a power of the radix. */ /* If C is input, each element of C should be a power of the radix */ /* to ensure a reliable solution and error estimates. Scaling by */ /* powers of the radix does not cause rounding errors unless the */ /* result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DGETRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* Componentwise relative backward error. This is the */ /* componentwise relative backward error of each solution vector X(j) */ /* (i.e., the smallest relative change in any element of A or B that */ /* makes X(j) an exact solution). */ /* N_ERR_BNDS (input) INTEGER */ /* Number of error bounds to return for each right hand side */ /* and each type (normwise or componentwise). See ERR_BNDS_NORM and */ /* ERR_BNDS_COMP below. */ /* ERR_BNDS_NORM (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* normwise relative error, which is defined as follows: */ /* Normwise relative error in the ith solution vector: */ /* max_j (abs(XTRUE(j,i) - X(j,i))) */ /* ------------------------------ */ /* max_j abs(X(j,i)) */ /* The array is indexed by the type of error information as described */ /* below. There currently are up to three pieces of information */ /* returned. */ /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_NORM(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * dlamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * dlamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated normwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * dlamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*A, where S scales each row by a power of the */ /* radix so all absolute row sums of Z are approximately 1. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* ERR_BNDS_COMP (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* componentwise relative error, which is defined as follows: */ /* Componentwise relative error in the ith solution vector: */ /* abs(XTRUE(j,i) - X(j,i)) */ /* max_j ---------------------- */ /* abs(X(j,i)) */ /* The array is indexed by the right-hand side i (on which the */ /* componentwise relative error depends), and the type of error */ /* information as described below. There currently are up to three */ /* pieces of information returned for each right-hand side. If */ /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ /* the first (:,N_ERR_BNDS) entries are returned. */ /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_COMP(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * dlamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * dlamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated componentwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * dlamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*(A*diag(x)), where x is the solution for the */ /* current right-hand side and S scales each row of */ /* A*diag(x) by a power of the radix so all absolute row */ /* sums of Z are approximately 1. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* NPARAMS (input) INTEGER */ /* Specifies the number of parameters set in PARAMS. If .LE. 0, the */ /* PARAMS array is never referenced and default values are used. */ /* PARAMS (input / output) DOUBLE PRECISION array, dimension NPARAMS */ /* Specifies algorithm parameters. If an entry is .LT. 0.0, then */ /* that entry will be filled with default value used for that */ /* parameter. Only positions up to NPARAMS are accessed; defaults */ /* are used for higher-numbered parameters. */ /* PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ /* refinement or not. */ /* Default: 1.0D+0 */ /* = 0.0 : No refinement is performed, and no error bounds are */ /* computed. */ /* = 1.0 : Use the double-precision refinement algorithm, */ /* possibly with doubled-single computations if the */ /* compilation environment does not support DOUBLE */ /* PRECISION. */ /* (other values are reserved for future use) */ /* PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ /* computations allowed for refinement. */ /* Default: 10 */ /* Aggressive: Set to 100 to permit convergence using approximate */ /* factorizations or factorizations other than LU. If */ /* the factorization uses a technique other than */ /* Gaussian elimination, the guarantees in */ /* err_bnds_norm and err_bnds_comp may no longer be */ /* trustworthy. */ /* PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ /* will attempt to find a solution with small componentwise */ /* relative error in the double-precision algorithm. Positive */ /* is true, 0.0 is false. */ /* Default: 1.0 (attempt componentwise convergence) */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: Successful exit. The solution to every right-hand side is */ /* guaranteed. */ /* < 0: If INFO = -i, the i-th argument had an illegal value */ /* > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly singular, so */ /* the solution and error bounds could not be computed. RCOND = 0 */ /* is returned. */ /* = N+J: The solution corresponding to the Jth right-hand side is */ /* not guaranteed. The solutions corresponding to other right- */ /* hand sides K with K > J may not be guaranteed as well, but */ /* only the first such right-hand side is reported. If a small */ /* componentwise error is not requested (PARAMS(3) = 0.0) then */ /* the Jth right-hand side is the first with a normwise error */ /* bound that is not guaranteed (the smallest J such */ /* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ /* the Jth right-hand side is the first with either a normwise or */ /* componentwise error bound that is not guaranteed (the smallest */ /* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ /* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ /* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ /* about all of the right-hand sides check ERR_BNDS_NORM or */ /* ERR_BNDS_COMP. */ /* ================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Check the input parameters. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --r__; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --berr; --params; --work; --iwork; /* Function Body */ *info = 0; trans_type__ = _starpu_ilatrans_(trans); ref_type__ = 1; if (*nparams >= 1) { if (params[1] < 0.) { params[1] = 1.; } else { ref_type__ = (integer) params[1]; } } /* Set default parameters. */ illrcond_thresh__ = (doublereal) (*n) * _starpu_dlamch_("Epsilon"); ithresh = 10; rthresh = .5; unstable_thresh__ = .25; ignore_cwise__ = FALSE_; if (*nparams >= 2) { if (params[2] < 0.) { params[2] = (doublereal) ithresh; } else { ithresh = (integer) params[2]; } } if (*nparams >= 3) { if (params[3] < 0.) { if (ignore_cwise__) { params[3] = 0.; } else { params[3] = 1.; } } else { ignore_cwise__ = params[3] == 0.; } } if (ref_type__ == 0 || *n_err_bnds__ == 0) { n_norms__ = 0; } else if (ignore_cwise__) { n_norms__ = 1; } else { n_norms__ = 2; } notran = _starpu_lsame_(trans, "N"); rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); /* Test input parameters. */ if (trans_type__ == -1) { *info = -1; } else if (! rowequ && ! colequ && ! _starpu_lsame_(equed, "N")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -13; } else if (*ldx < max(1,*n)) { *info = -15; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGERFSX", &i__1); return 0; } /* Quick return if possible. */ if (*n == 0 || *nrhs == 0) { *rcond = 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { berr[j] = 0.; if (*n_err_bnds__ >= 1) { err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } else if (*n_err_bnds__ >= 2) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; } else if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; } } return 0; } /* Default to failure. */ *rcond = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { berr[j] = 1.; if (*n_err_bnds__ >= 1) { err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } else if (*n_err_bnds__ >= 2) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; } else if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; } } /* Compute the norm of A and the reciprocal of the condition */ /* number of A. */ if (notran) { *(unsigned char *)norm = 'I'; } else { *(unsigned char *)norm = '1'; } anorm = _starpu_dlange_(norm, n, n, &a[a_offset], lda, &work[1]); _starpu_dgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], info); /* Perform refinement on each right-hand side */ if (ref_type__ != 0) { prec_type__ = _starpu_ilaprec_("E"); if (notran) { _starpu_dla_gerfsx_extended__(&prec_type__, &trans_type__, n, nrhs, &a[ a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &colequ, & c__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], & n_norms__, &err_bnds_norm__[err_bnds_norm_offset], & err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], & work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, &rthresh, &unstable_thresh__, &ignore_cwise__, info); } else { _starpu_dla_gerfsx_extended__(&prec_type__, &trans_type__, n, nrhs, &a[ a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &rowequ, & r__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], & n_norms__, &err_bnds_norm__[err_bnds_norm_offset], & err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], & work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, &rthresh, &unstable_thresh__, &ignore_cwise__, info); } } /* Computing MAX */ d__1 = 10., d__2 = sqrt((doublereal) (*n)); err_lbnd__ = max(d__1,d__2) * _starpu_dlamch_("Epsilon"); if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { /* Compute scaled normwise condition number cond(A*C). */ if (colequ && notran) { rcond_tmp__ = _starpu_dla_gercond__(trans, n, &a[a_offset], lda, &af[ af_offset], ldaf, &ipiv[1], &c_n1, &c__[1], info, &work[1] , &iwork[1], (ftnlen)1); } else if (rowequ && ! notran) { rcond_tmp__ = _starpu_dla_gercond__(trans, n, &a[a_offset], lda, &af[ af_offset], ldaf, &ipiv[1], &c_n1, &r__[1], info, &work[1] , &iwork[1], (ftnlen)1); } else { rcond_tmp__ = _starpu_dla_gercond__(trans, n, &a[a_offset], lda, &af[ af_offset], ldaf, &ipiv[1], &c__0, &r__[1], info, &work[1] , &iwork[1], (ftnlen)1); } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Cap the error at 1.0. */ if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] > 1.) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; } /* Threshold the error (see LAWN). */ if (rcond_tmp__ < illrcond_thresh__) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; if (*info <= *n) { *info = *n + j; } } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < err_lbnd__) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; } /* Save the condition number. */ if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; } } } if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { /* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ /* each right-hand side using the current solution as an estimate of */ /* the true solution. If the componentwise error estimate is too */ /* large, then the solution is a lousy estimate of truth and the */ /* estimated RCOND may be too optimistic. To avoid misleading users, */ /* the inverse condition number is set to 0.0 when the estimated */ /* cwise error is at least CWISE_WRONG. */ cwise_wrong__ = sqrt(_starpu_dlamch_("Epsilon")); i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < cwise_wrong__) { rcond_tmp__ = _starpu_dla_gercond__(trans, n, &a[a_offset], lda, &af[ af_offset], ldaf, &ipiv[1], &c__1, &x[j * x_dim1 + 1], info, &work[1], &iwork[1], (ftnlen)1); } else { rcond_tmp__ = 0.; } /* Cap the error at 1.0. */ if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] > 1.) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; } /* Threshold the error (see LAWN). */ if (rcond_tmp__ < illrcond_thresh__) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; if (params[3] == 1. && *info < *n + j) { *info = *n + j; } } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < err_lbnd__) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } /* Save the condition number. */ if (*n_err_bnds__ >= 3) { err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; } } } return 0; /* End of DGERFSX */ } /* _starpu_dgerfsx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgerq2.c000066400000000000000000000110121507764646700205530ustar00rootroot00000000000000/* dgerq2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgerq2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, k; doublereal aii; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGERQ2 computes an RQ factorization of a real m by n matrix A: */ /* A = R * Q. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the m by n matrix A. */ /* On exit, if m <= n, the upper triangle of the subarray */ /* A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */ /* if m >= n, the elements on and above the (m-n)-th subdiagonal */ /* contain the m by n upper trapezoidal matrix R; the remaining */ /* elements, with the array TAU, represent the orthogonal matrix */ /* Q as a product of elementary reflectors (see Further */ /* Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ /* A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGERQ2", &i__1); return 0; } k = min(*m,*n); for (i__ = k; i__ >= 1; --i__) { /* Generate elementary reflector H(i) to annihilate */ /* A(m-k+i,1:n-k+i-1) */ i__1 = *n - k + i__; _starpu_dlarfp_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[*m - k + i__ + a_dim1], lda, &tau[i__]); /* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */ aii = a[*m - k + i__ + (*n - k + i__) * a_dim1]; a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.; i__1 = *m - k + i__ - 1; i__2 = *n - k + i__; _starpu_dlarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[ i__], &a[a_offset], lda, &work[1]); a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii; /* L10: */ } return 0; /* End of DGERQ2 */ } /* _starpu_dgerq2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgerqf.c000066400000000000000000000173131507764646700206510ustar00rootroot00000000000000/* dgerqf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dgerqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, k, ib, nb, ki, kk, mu, nu, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dgerq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGERQF computes an RQ factorization of a real M-by-N matrix A: */ /* A = R * Q. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, */ /* if m <= n, the upper triangle of the subarray */ /* A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */ /* if m >= n, the elements on and above the (m-n)-th subdiagonal */ /* contain the M-by-N upper trapezoidal matrix R; */ /* the remaining elements, with the array TAU, represent the */ /* orthogonal matrix Q as a product of min(m,n) elementary */ /* reflectors (see Further Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,M). */ /* For optimum performance LWORK >= M*NB, where NB is */ /* the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ /* A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info == 0) { k = min(*m,*n); if (k == 0) { lwkopt = 1; } else { nb = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); lwkopt = *m * nb; } work[1] = (doublereal) lwkopt; if (*lwork < max(1,*m) && ! lquery) { *info = -7; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGERQF", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (k == 0) { return 0; } nbmin = 2; nx = 1; iws = *m; if (nb > 1 && nb < k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGERQF", " ", m, n, &c_n1, &c_n1); nx = max(i__1,i__2); if (nx < k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *m; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGERQF", " ", m, n, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < k && nx < k) { /* Use blocked code initially. */ /* The last kk rows are handled by the block method. */ ki = (k - nx - 1) / nb * nb; /* Computing MIN */ i__1 = k, i__2 = ki + nb; kk = min(i__1,i__2); i__1 = k - kk + 1; i__2 = -nb; for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = k - i__ + 1; ib = min(i__3,nb); /* Compute the RQ factorization of the current block */ /* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */ i__3 = *n - k + i__ + ib - 1; _starpu_dgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], & work[1], &iinfo); if (*m - k + i__ > 1) { /* Form the triangular factor of the block reflector */ /* H = H(i+ib-1) . . . H(i+1) H(i) */ i__3 = *n - k + i__ + ib - 1; _starpu_dlarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + a_dim1], lda, &tau[i__], &work[1], &ldwork); /* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */ i__3 = *m - k + i__ - 1; i__4 = *n - k + i__ + ib - 1; _starpu_dlarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3, &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork); } /* L10: */ } mu = *m - k + i__ + nb - 1; nu = *n - k + i__ + nb - 1; } else { mu = *m; nu = *n; } /* Use unblocked code to factor the last or only block */ if (mu > 0 && nu > 0) { _starpu_dgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo); } work[1] = (doublereal) iws; return 0; /* End of DGERQF */ } /* _starpu_dgerqf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgesc2.c000066400000000000000000000116051507764646700205460ustar00rootroot00000000000000/* dgesc2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dgesc2_(integer *n, doublereal *a, integer *lda, doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ integer i__, j; doublereal eps, temp; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); doublereal smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGESC2 solves a system of linear equations */ /* A * X = scale* RHS */ /* with a general N-by-N matrix A using the LU factorization with */ /* complete pivoting computed by DGETC2. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the LU part of the factorization of the n-by-n */ /* matrix A computed by DGETC2: A = P * L * U * Q */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1, N). */ /* RHS (input/output) DOUBLE PRECISION array, dimension (N). */ /* On entry, the right hand side vector b. */ /* On exit, the solution vector X. */ /* IPIV (input) INTEGER array, dimension (N). */ /* The pivot indices; for 1 <= i <= N, row i of the */ /* matrix has been interchanged with row IPIV(i). */ /* JPIV (input) INTEGER array, dimension (N). */ /* The pivot indices; for 1 <= j <= N, column j of the */ /* matrix has been interchanged with column JPIV(j). */ /* SCALE (output) DOUBLE PRECISION */ /* On exit, SCALE contains the scale factor. SCALE is chosen */ /* 0 <= SCALE <= 1 to prevent owerflow in the solution. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Set constant to control owerflow */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --rhs; --ipiv; --jpiv; /* Function Body */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S") / eps; bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); /* Apply permutations IPIV to RHS */ i__1 = *n - 1; _starpu_dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1); /* Solve for L part */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { rhs[j] -= a[j + i__ * a_dim1] * rhs[i__]; /* L10: */ } /* L20: */ } /* Solve for U part */ *scale = 1.; /* Check for scaling */ i__ = _starpu_idamax_(n, &rhs[1], &c__1); if (smlnum * 2. * (d__1 = rhs[i__], abs(d__1)) > (d__2 = a[*n + *n * a_dim1], abs(d__2))) { temp = .5 / (d__1 = rhs[i__], abs(d__1)); _starpu_dscal_(n, &temp, &rhs[1], &c__1); *scale *= temp; } for (i__ = *n; i__ >= 1; --i__) { temp = 1. / a[i__ + i__ * a_dim1]; rhs[i__] *= temp; i__1 = *n; for (j = i__ + 1; j <= i__1; ++j) { rhs[i__] -= rhs[j] * (a[i__ + j * a_dim1] * temp); /* L30: */ } /* L40: */ } /* Apply permutations JPIV to the solution (RHS) */ i__1 = *n - 1; _starpu_dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1); return 0; /* End of DGESC2 */ } /* _starpu_dgesc2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgesdd.c000066400000000000000000001527111507764646700206350ustar00rootroot00000000000000/* dgesdd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__0 = 0; static doublereal c_b227 = 0.; static doublereal c_b248 = 1.; /* Subroutine */ int _starpu_dgesdd_(char *jobz, integer *m, integer *n, doublereal * a, integer *lda, doublereal *s, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2, i__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, ie, il, ir, iu, blk; doublereal dum[1], eps; integer ivt, iscl; doublereal anrm; integer idum[1], ierr, itau; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); integer chunk, minmn, wrkbl, itaup, itauq, mnthr; logical wntqa; integer nwork; logical wntqn, wntqo, wntqs; extern /* Subroutine */ int _starpu_dbdsdc_(char *, char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); integer bdspac; extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dorglq_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; doublereal smlnum; logical wntqas, lquery; /* -- LAPACK driver routine (version 3.2.1) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* March 2009 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGESDD computes the singular value decomposition (SVD) of a real */ /* M-by-N matrix A, optionally computing the left and right singular */ /* vectors. If singular vectors are desired, it uses a */ /* divide-and-conquer algorithm. */ /* The SVD is written */ /* A = U * SIGMA * transpose(V) */ /* where SIGMA is an M-by-N matrix which is zero except for its */ /* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ /* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ /* are the singular values of A; they are real and non-negative, and */ /* are returned in descending order. The first min(m,n) columns of */ /* U and V are the left and right singular vectors of A. */ /* Note that the routine returns VT = V**T, not V. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* Specifies options for computing all or part of the matrix U: */ /* = 'A': all M columns of U and all N rows of V**T are */ /* returned in the arrays U and VT; */ /* = 'S': the first min(M,N) columns of U and the first */ /* min(M,N) rows of V**T are returned in the arrays U */ /* and VT; */ /* = 'O': If M >= N, the first N columns of U are overwritten */ /* on the array A and all rows of V**T are returned in */ /* the array VT; */ /* otherwise, all columns of U are returned in the */ /* array U and the first M rows of V**T are overwritten */ /* in the array A; */ /* = 'N': no columns of U or rows of V**T are computed. */ /* M (input) INTEGER */ /* The number of rows of the input matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the input matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, */ /* if JOBZ = 'O', A is overwritten with the first N columns */ /* of U (the left singular vectors, stored */ /* columnwise) if M >= N; */ /* A is overwritten with the first M rows */ /* of V**T (the right singular vectors, stored */ /* rowwise) otherwise. */ /* if JOBZ .ne. 'O', the contents of A are destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The singular values of A, sorted so that S(i) >= S(i+1). */ /* U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) */ /* UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; */ /* UCOL = min(M,N) if JOBZ = 'S'. */ /* If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M */ /* orthogonal matrix U; */ /* if JOBZ = 'S', U contains the first min(M,N) columns of U */ /* (the left singular vectors, stored columnwise); */ /* if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= 1; if */ /* JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. */ /* VT (output) DOUBLE PRECISION array, dimension (LDVT,N) */ /* If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the */ /* N-by-N orthogonal matrix V**T; */ /* if JOBZ = 'S', VT contains the first min(M,N) rows of */ /* V**T (the right singular vectors, stored rowwise); */ /* if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. LDVT >= 1; if */ /* JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; */ /* if JOBZ = 'S', LDVT >= min(M,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= 1. */ /* If JOBZ = 'N', */ /* LWORK >= 3*min(M,N) + max(max(M,N),7*min(M,N)). */ /* If JOBZ = 'O', */ /* LWORK >= 3*min(M,N) + */ /* max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). */ /* If JOBZ = 'S' or 'A' */ /* LWORK >= 3*min(M,N) + */ /* max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). */ /* For good performance, LWORK should generally be larger. */ /* If LWORK = -1 but other input arguments are legal, WORK(1) */ /* returns the optimal LWORK. */ /* IWORK (workspace) INTEGER array, dimension (8*min(M,N)) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: DBDSDC did not converge, updating process failed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --s; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; --work; --iwork; /* Function Body */ *info = 0; minmn = min(*m,*n); wntqa = _starpu_lsame_(jobz, "A"); wntqs = _starpu_lsame_(jobz, "S"); wntqas = wntqa || wntqs; wntqo = _starpu_lsame_(jobz, "O"); wntqn = _starpu_lsame_(jobz, "N"); lquery = *lwork == -1; if (! (wntqa || wntqs || wntqo || wntqn)) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * m) { *info = -8; } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || wntqo && *m >= *n && *ldvt < *n) { *info = -10; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV.) */ if (*info == 0) { minwrk = 1; maxwrk = 1; if (*m >= *n && minmn > 0) { /* Compute space needed for DBDSDC */ mnthr = (integer) (minmn * 11. / 6.); if (wntqn) { bdspac = *n * 7; } else { bdspac = *n * 3 * *n + (*n << 2); } if (*m >= mnthr) { if (wntqn) { /* Path 1 (M much larger than N, JOBZ='N') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *n; maxwrk = max(i__1,i__2); minwrk = bdspac + *n; } else if (wntqo) { /* Path 2 (M much larger than N, JOBZ='O') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__1 = wrkbl, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", n, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", n, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *n * 3; wrkbl = max(i__1,i__2); maxwrk = wrkbl + (*n << 1) * *n; minwrk = bdspac + (*n << 1) * *n + *n * 3; } else if (wntqs) { /* Path 3 (M much larger than N, JOBZ='S') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__1 = wrkbl, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", n, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", n, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *n * 3; wrkbl = max(i__1,i__2); maxwrk = wrkbl + *n * *n; minwrk = bdspac + *n * *n + *n * 3; } else if (wntqa) { /* Path 4 (M much larger than N, JOBZ='A') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__1 = wrkbl, i__2 = *n + *m * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, m, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", n, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", n, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *n * 3; wrkbl = max(i__1,i__2); maxwrk = wrkbl + *n * *n; minwrk = bdspac + *n * *n + *n * 3; } } else { /* Path 5 (M at least N, but not much larger) */ wrkbl = *n * 3 + (*m + *n) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1); if (wntqn) { /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *n * 3; maxwrk = max(i__1,i__2); minwrk = *n * 3 + max(*m,bdspac); } else if (wntqo) { /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", m, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", n, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *n * 3; wrkbl = max(i__1,i__2); maxwrk = wrkbl + *m * *n; /* Computing MAX */ i__1 = *m, i__2 = *n * *n + bdspac; minwrk = *n * 3 + max(i__1,i__2); } else if (wntqs) { /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", m, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", n, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *n * 3; maxwrk = max(i__1,i__2); minwrk = *n * 3 + max(*m,bdspac); } else if (wntqa) { /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", m, m, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", n, n, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = bdspac + *n * 3; maxwrk = max(i__1,i__2); minwrk = *n * 3 + max(*m,bdspac); } } } else if (minmn > 0) { /* Compute space needed for DBDSDC */ mnthr = (integer) (minmn * 11. / 6.); if (wntqn) { bdspac = *m * 7; } else { bdspac = *m * 3 * *m + (*m << 2); } if (*n >= mnthr) { if (wntqn) { /* Path 1t (N much larger than M, JOBZ='N') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *m; maxwrk = max(i__1,i__2); minwrk = bdspac + *m; } else if (wntqo) { /* Path 2t (N much larger than M, JOBZ='O') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__1 = wrkbl, i__2 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", m, m, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", m, m, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *m * 3; wrkbl = max(i__1,i__2); maxwrk = wrkbl + (*m << 1) * *m; minwrk = bdspac + (*m << 1) * *m + *m * 3; } else if (wntqs) { /* Path 3t (N much larger than M, JOBZ='S') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__1 = wrkbl, i__2 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", m, m, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", m, m, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *m * 3; wrkbl = max(i__1,i__2); maxwrk = wrkbl + *m * *m; minwrk = bdspac + *m * *m + *m * 3; } else if (wntqa) { /* Path 4t (N much larger than M, JOBZ='A') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__1 = wrkbl, i__2 = *m + *n * _starpu_ilaenv_(&c__1, "DORGLQ", " ", n, n, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", m, m, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", m, m, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *m * 3; wrkbl = max(i__1,i__2); maxwrk = wrkbl + *m * *m; minwrk = bdspac + *m * *m + *m * 3; } } else { /* Path 5t (N greater than M, but not much larger) */ wrkbl = *m * 3 + (*m + *n) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1); if (wntqn) { /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *m * 3; maxwrk = max(i__1,i__2); minwrk = *m * 3 + max(*n,bdspac); } else if (wntqo) { /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", m, m, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", m, n, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *m * 3; wrkbl = max(i__1,i__2); maxwrk = wrkbl + *m * *n; /* Computing MAX */ i__1 = *n, i__2 = *m * *m + bdspac; minwrk = *m * 3 + max(i__1,i__2); } else if (wntqs) { /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", m, m, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", m, n, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *m * 3; maxwrk = max(i__1,i__2); minwrk = *m * 3 + max(*n,bdspac); } else if (wntqa) { /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "QLN", m, m, n, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR" , "PRT", n, n, m, &c_n1); wrkbl = max(i__1,i__2); /* Computing MAX */ i__1 = wrkbl, i__2 = bdspac + *m * 3; maxwrk = max(i__1,i__2); minwrk = *m * 3 + max(*n,bdspac); } } } maxwrk = max(maxwrk,minwrk); work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGESDD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = sqrt(_starpu_dlamch_("S")) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, dum); iscl = 0; if (anrm > 0. && anrm < smlnum) { iscl = 1; _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & ierr); } else if (anrm > bignum) { iscl = 1; _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & ierr); } if (*m >= *n) { /* A has at least as many rows as columns. If A has sufficiently */ /* more rows than columns, first reduce using the QR */ /* decomposition (if sufficient workspace available) */ if (*m >= mnthr) { if (wntqn) { /* Path 1 (M much larger than N, JOBZ='N') */ /* No singular vectors to be computed */ itau = 1; nwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & i__1, &ierr); /* Zero out below R */ i__1 = *n - 1; i__2 = *n - 1; _starpu_dlaset_("L", &i__1, &i__2, &c_b227, &c_b227, &a[a_dim1 + 2], lda); ie = 1; itauq = ie + *n; itaup = itauq + *n; nwork = itaup + *n; /* Bidiagonalize R in A */ /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[nwork], &i__1, &ierr); nwork = ie + *n; /* Perform bidiagonal SVD, computing singular values only */ /* (Workspace: need N+BDSPAC) */ _starpu_dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, &work[nwork], &iwork[1], info); } else if (wntqo) { /* Path 2 (M much larger than N, JOBZ = 'O') */ /* N left singular vectors to be overwritten on A and */ /* N right singular vectors to be computed in VT */ ir = 1; /* WORK(IR) is LDWRKR by N */ if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { ldwrkr = *lda; } else { ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; } itau = ir + ldwrkr * *n; nwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & i__1, &ierr); /* Copy R to WORK(IR), zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); i__1 = *n - 1; i__2 = *n - 1; _starpu_dlaset_("L", &i__1, &i__2, &c_b227, &c_b227, &work[ir + 1], & ldwrkr); /* Generate Q in A */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__1 = *lwork - nwork + 1; _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; nwork = itaup + *n; /* Bidiagonalize R in VT, copying result to WORK(IR) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[nwork], &i__1, &ierr); /* WORK(IU) is N by N */ iu = nwork; nwork = iu + *n * *n; /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in WORK(IU) and computing right */ /* singular vectors of bidiagonal matrix in VT */ /* (Workspace: need N+N*N+BDSPAC) */ _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], info); /* Overwrite WORK(IU) by left singular vectors of R */ /* and VT by right singular vectors of R */ /* (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ itauq], &work[iu], n, &work[nwork], &i__1, &ierr); i__1 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & ierr); /* Multiply Q in A by left singular vectors of R in */ /* WORK(IU), storing result in WORK(IR) and copying to A */ /* (Workspace: need 2*N*N, prefer N*N+M*N) */ i__1 = *m; i__2 = ldwrkr; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = *m - i__ + 1; chunk = min(i__3,ldwrkr); _starpu_dgemm_("N", "N", &chunk, n, n, &c_b248, &a[i__ + a_dim1], lda, &work[iu], n, &c_b227, &work[ir], &ldwrkr); _starpu_dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + a_dim1], lda); /* L10: */ } } else if (wntqs) { /* Path 3 (M much larger than N, JOBZ='S') */ /* N left singular vectors to be computed in U and */ /* N right singular vectors to be computed in VT */ ir = 1; /* WORK(IR) is N by N */ ldwrkr = *n; itau = ir + ldwrkr * *n; nwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & i__2, &ierr); /* Copy R to WORK(IR), zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); i__2 = *n - 1; i__1 = *n - 1; _starpu_dlaset_("L", &i__2, &i__1, &c_b227, &c_b227, &work[ir + 1], & ldwrkr); /* Generate Q in A */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; nwork = itaup + *n; /* Bidiagonalize R in WORK(IR) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[nwork], &i__2, &ierr); /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagoal matrix in U and computing right singular */ /* vectors of bidiagonal matrix in VT */ /* (Workspace: need N+BDSPAC) */ _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], info); /* Overwrite U by left singular vectors of R and VT */ /* by right singular vectors of R */ /* (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); i__2 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & ierr); /* Multiply Q in A by left singular vectors of R in */ /* WORK(IR), storing result in U */ /* (Workspace: need N*N) */ _starpu_dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); _starpu_dgemm_("N", "N", m, n, n, &c_b248, &a[a_offset], lda, &work[ ir], &ldwrkr, &c_b227, &u[u_offset], ldu); } else if (wntqa) { /* Path 4 (M much larger than N, JOBZ='A') */ /* M left singular vectors to be computed in U and */ /* N right singular vectors to be computed in VT */ iu = 1; /* WORK(IU) is N by N */ ldwrku = *n; itau = iu + ldwrku * *n; nwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Generate Q in U */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], &i__2, &ierr); /* Produce R in A, zeroing out other entries */ i__2 = *n - 1; i__1 = *n - 1; _starpu_dlaset_("L", &i__2, &i__1, &c_b227, &c_b227, &a[a_dim1 + 2], lda); ie = itau; itauq = ie + *n; itaup = itauq + *n; nwork = itaup + *n; /* Bidiagonalize R in A */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[nwork], &i__2, &ierr); /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in WORK(IU) and computing right */ /* singular vectors of bidiagonal matrix in VT */ /* (Workspace: need N+N*N+BDSPAC) */ _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], info); /* Overwrite WORK(IU) by left singular vectors of R and VT */ /* by right singular vectors of R */ /* (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & ierr); i__2 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & ierr); /* Multiply Q in U by left singular vectors of R in */ /* WORK(IU), storing result in A */ /* (Workspace: need N*N) */ _starpu_dgemm_("N", "N", m, n, n, &c_b248, &u[u_offset], ldu, &work[ iu], &ldwrku, &c_b227, &a[a_offset], lda); /* Copy left singular vectors of A from A to U */ _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); } } else { /* M .LT. MNTHR */ /* Path 5 (M at least N, but not much larger) */ /* Reduce to bidiagonal form without QR decomposition */ ie = 1; itauq = ie + *n; itaup = itauq + *n; nwork = itaup + *n; /* Bidiagonalize A */ /* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[nwork], &i__2, &ierr); if (wntqn) { /* Perform bidiagonal SVD, only computing singular values */ /* (Workspace: need N+BDSPAC) */ _starpu_dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, &work[nwork], &iwork[1], info); } else if (wntqo) { iu = nwork; if (*lwork >= *m * *n + *n * 3 + bdspac) { /* WORK( IU ) is M by N */ ldwrku = *m; nwork = iu + ldwrku * *n; _starpu_dlaset_("F", m, n, &c_b227, &c_b227, &work[iu], &ldwrku); } else { /* WORK( IU ) is N by N */ ldwrku = *n; nwork = iu + ldwrku * *n; /* WORK(IR) is LDWRKR by N */ ir = nwork; ldwrkr = (*lwork - *n * *n - *n * 3) / *n; } nwork = iu + ldwrku * *n; /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in WORK(IU) and computing right */ /* singular vectors of bidiagonal matrix in VT */ /* (Workspace: need N+N*N+BDSPAC) */ _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ 1], info); /* Overwrite VT by right singular vectors of A */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & ierr); if (*lwork >= *m * *n + *n * 3 + bdspac) { /* Overwrite WORK(IU) by left singular vectors of A */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & ierr); /* Copy left singular vectors of A from WORK(IU) to A */ _starpu_dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); } else { /* Generate Q in A */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - nwork + 1; _starpu_dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & work[nwork], &i__2, &ierr); /* Multiply Q in A by left singular vectors of */ /* bidiagonal matrix in WORK(IU), storing result in */ /* WORK(IR) and copying to A */ /* (Workspace: need 2*N*N, prefer N*N+M*N) */ i__2 = *m; i__1 = ldwrkr; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { /* Computing MIN */ i__3 = *m - i__ + 1; chunk = min(i__3,ldwrkr); _starpu_dgemm_("N", "N", &chunk, n, n, &c_b248, &a[i__ + a_dim1], lda, &work[iu], &ldwrku, &c_b227, & work[ir], &ldwrkr); _starpu_dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + a_dim1], lda); /* L20: */ } } } else if (wntqs) { /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in U and computing right singular */ /* vectors of bidiagonal matrix in VT */ /* (Workspace: need N+BDSPAC) */ _starpu_dlaset_("F", m, n, &c_b227, &c_b227, &u[u_offset], ldu); _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], info); /* Overwrite U by left singular vectors of A and VT */ /* by right singular vectors of A */ /* (Workspace: need 3*N, prefer 2*N+N*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); i__1 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & ierr); } else if (wntqa) { /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in U and computing right singular */ /* vectors of bidiagonal matrix in VT */ /* (Workspace: need N+BDSPAC) */ _starpu_dlaset_("F", m, m, &c_b227, &c_b227, &u[u_offset], ldu); _starpu_dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], info); /* Set the right corner of U to identity matrix */ if (*m > *n) { i__1 = *m - *n; i__2 = *m - *n; _starpu_dlaset_("F", &i__1, &i__2, &c_b227, &c_b248, &u[*n + 1 + ( *n + 1) * u_dim1], ldu); } /* Overwrite U by left singular vectors of A and VT */ /* by right singular vectors of A */ /* (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); i__1 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & ierr); } } } else { /* A has more columns than rows. If A has sufficiently more */ /* columns than rows, first reduce using the LQ decomposition (if */ /* sufficient workspace available) */ if (*n >= mnthr) { if (wntqn) { /* Path 1t (N much larger than M, JOBZ='N') */ /* No singular vectors to be computed */ itau = 1; nwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & i__1, &ierr); /* Zero out above L */ i__1 = *m - 1; i__2 = *m - 1; _starpu_dlaset_("U", &i__1, &i__2, &c_b227, &c_b227, &a[(a_dim1 << 1) + 1], lda); ie = 1; itauq = ie + *m; itaup = itauq + *m; nwork = itaup + *m; /* Bidiagonalize L in A */ /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[nwork], &i__1, &ierr); nwork = ie + *m; /* Perform bidiagonal SVD, computing singular values only */ /* (Workspace: need M+BDSPAC) */ _starpu_dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, &work[nwork], &iwork[1], info); } else if (wntqo) { /* Path 2t (N much larger than M, JOBZ='O') */ /* M right singular vectors to be overwritten on A and */ /* M left singular vectors to be computed in U */ ivt = 1; /* IVT is M by M */ il = ivt + *m * *m; if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { /* WORK(IL) is M by N */ ldwrkl = *m; chunk = *n; } else { ldwrkl = *m; chunk = (*lwork - *m * *m) / *m; } itau = il + ldwrkl * *m; nwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & i__1, &ierr); /* Copy L to WORK(IL), zeroing about above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); i__1 = *m - 1; i__2 = *m - 1; _starpu_dlaset_("U", &i__1, &i__2, &c_b227, &c_b227, &work[il + ldwrkl], &ldwrkl); /* Generate Q in A */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; nwork = itaup + *m; /* Bidiagonalize L in WORK(IL) */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[nwork], &i__1, &ierr); /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in U, and computing right singular */ /* vectors of bidiagonal matrix in WORK(IVT) */ /* (Workspace: need M+M*M+BDSPAC) */ _starpu_dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & work[ivt], m, dum, idum, &work[nwork], &iwork[1], info); /* Overwrite U by left singular vectors of L and WORK(IVT) */ /* by right singular vectors of L */ /* (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); i__1 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); /* Multiply right singular vectors of L in WORK(IVT) by Q */ /* in A, storing result in WORK(IL) and copying to A */ /* (Workspace: need 2*M*M, prefer M*M+M*N) */ i__1 = *n; i__2 = chunk; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = *n - i__ + 1; blk = min(i__3,chunk); _starpu_dgemm_("N", "N", m, &blk, m, &c_b248, &work[ivt], m, &a[ i__ * a_dim1 + 1], lda, &c_b227, &work[il], & ldwrkl); _starpu_dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + 1], lda); /* L30: */ } } else if (wntqs) { /* Path 3t (N much larger than M, JOBZ='S') */ /* M right singular vectors to be computed in VT and */ /* M left singular vectors to be computed in U */ il = 1; /* WORK(IL) is M by M */ ldwrkl = *m; itau = il + ldwrkl * *m; nwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & i__2, &ierr); /* Copy L to WORK(IL), zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); i__2 = *m - 1; i__1 = *m - 1; _starpu_dlaset_("U", &i__2, &i__1, &c_b227, &c_b227, &work[il + ldwrkl], &ldwrkl); /* Generate Q in A */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; nwork = itaup + *m; /* Bidiagonalize L in WORK(IU), copying result to U */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[nwork], &i__2, &ierr); /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in U and computing right singular */ /* vectors of bidiagonal matrix in VT */ /* (Workspace: need M+BDSPAC) */ _starpu_dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], info); /* Overwrite U by left singular vectors of L and VT */ /* by right singular vectors of L */ /* (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); i__2 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & ierr); /* Multiply right singular vectors of L in WORK(IL) by */ /* Q in A, storing result in VT */ /* (Workspace: need M*M) */ _starpu_dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); _starpu_dgemm_("N", "N", m, n, m, &c_b248, &work[il], &ldwrkl, &a[ a_offset], lda, &c_b227, &vt[vt_offset], ldvt); } else if (wntqa) { /* Path 4t (N much larger than M, JOBZ='A') */ /* N right singular vectors to be computed in VT and */ /* M left singular vectors to be computed in U */ ivt = 1; /* WORK(IVT) is M by M */ ldwkvt = *m; itau = ivt + ldwkvt * *m; nwork = itau + *m; /* Compute A=L*Q, copying result to VT */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & i__2, &ierr); _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Generate Q in VT */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ nwork], &i__2, &ierr); /* Produce L in A, zeroing out other entries */ i__2 = *m - 1; i__1 = *m - 1; _starpu_dlaset_("U", &i__2, &i__1, &c_b227, &c_b227, &a[(a_dim1 << 1) + 1], lda); ie = itau; itauq = ie + *m; itaup = itauq + *m; nwork = itaup + *m; /* Bidiagonalize L in A */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[nwork], &i__2, &ierr); /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in U and computing right singular */ /* vectors of bidiagonal matrix in WORK(IVT) */ /* (Workspace: need M+M*M+BDSPAC) */ _starpu_dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] , info); /* Overwrite U by left singular vectors of L and WORK(IVT) */ /* by right singular vectors of L */ /* (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); i__2 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & ierr); /* Multiply right singular vectors of L in WORK(IVT) by */ /* Q in VT, storing result in A */ /* (Workspace: need M*M) */ _starpu_dgemm_("N", "N", m, n, m, &c_b248, &work[ivt], &ldwkvt, &vt[ vt_offset], ldvt, &c_b227, &a[a_offset], lda); /* Copy right singular vectors of A from A to VT */ _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); } } else { /* N .LT. MNTHR */ /* Path 5t (N greater than M, but not much larger) */ /* Reduce to bidiagonal form without LQ decomposition */ ie = 1; itauq = ie + *m; itaup = itauq + *m; nwork = itaup + *m; /* Bidiagonalize A */ /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ i__2 = *lwork - nwork + 1; _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[nwork], &i__2, &ierr); if (wntqn) { /* Perform bidiagonal SVD, only computing singular values */ /* (Workspace: need M+BDSPAC) */ _starpu_dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, &work[nwork], &iwork[1], info); } else if (wntqo) { ldwkvt = *m; ivt = nwork; if (*lwork >= *m * *n + *m * 3 + bdspac) { /* WORK( IVT ) is M by N */ _starpu_dlaset_("F", m, n, &c_b227, &c_b227, &work[ivt], &ldwkvt); nwork = ivt + ldwkvt * *n; } else { /* WORK( IVT ) is M by M */ nwork = ivt + ldwkvt * *m; il = nwork; /* WORK(IL) is M by CHUNK */ chunk = (*lwork - *m * *m - *m * 3) / *m; } /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in U and computing right singular */ /* vectors of bidiagonal matrix in WORK(IVT) */ /* (Workspace: need M*M+BDSPAC) */ _starpu_dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] , info); /* Overwrite U by left singular vectors of A */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); if (*lwork >= *m * *n + *m * 3 + bdspac) { /* Overwrite WORK(IVT) by left singular vectors of A */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &ierr); /* Copy right singular vectors of A from WORK(IVT) to A */ _starpu_dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); } else { /* Generate P**T in A */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - nwork + 1; _starpu_dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & work[nwork], &i__2, &ierr); /* Multiply Q in A by right singular vectors of */ /* bidiagonal matrix in WORK(IVT), storing result in */ /* WORK(IL) and copying to A */ /* (Workspace: need 2*M*M, prefer M*M+M*N) */ i__2 = *n; i__1 = chunk; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { /* Computing MIN */ i__3 = *n - i__ + 1; blk = min(i__3,chunk); _starpu_dgemm_("N", "N", m, &blk, m, &c_b248, &work[ivt], & ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b227, & work[il], m); _starpu_dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + 1], lda); /* L40: */ } } } else if (wntqs) { /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in U and computing right singular */ /* vectors of bidiagonal matrix in VT */ /* (Workspace: need M+BDSPAC) */ _starpu_dlaset_("F", m, n, &c_b227, &c_b227, &vt[vt_offset], ldvt); _starpu_dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], info); /* Overwrite U by left singular vectors of A and VT */ /* by right singular vectors of A */ /* (Workspace: need 3*M, prefer 2*M+M*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); i__1 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & ierr); } else if (wntqa) { /* Perform bidiagonal SVD, computing left singular vectors */ /* of bidiagonal matrix in U and computing right singular */ /* vectors of bidiagonal matrix in VT */ /* (Workspace: need M+BDSPAC) */ _starpu_dlaset_("F", n, n, &c_b227, &c_b227, &vt[vt_offset], ldvt); _starpu_dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], info); /* Set the right corner of VT to identity matrix */ if (*n > *m) { i__1 = *n - *m; i__2 = *n - *m; _starpu_dlaset_("F", &i__1, &i__2, &c_b227, &c_b248, &vt[*m + 1 + (*m + 1) * vt_dim1], ldvt); } /* Overwrite U by left singular vectors of A and VT */ /* by right singular vectors of A */ /* (Workspace: need 2*M+N, prefer 2*M+N*NB) */ i__1 = *lwork - nwork + 1; _starpu_dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); i__1 = *lwork - nwork + 1; _starpu_dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & ierr); } } } /* Undo scaling if necessary */ if (iscl == 1) { if (anrm > bignum) { _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & minmn, &ierr); } if (anrm < smlnum) { _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & minmn, &ierr); } } /* Return optimal workspace in WORK(1) */ work[1] = (doublereal) maxwrk; return 0; /* End of DGESDD */ } /* _starpu_dgesdd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgesv.c000066400000000000000000000106031507764646700205040ustar00rootroot00000000000000/* dgesv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern /* Subroutine */ int _starpu_dgetrf_(integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGESV computes the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ /* The LU decomposition with partial pivoting and row interchanges is */ /* used to factor A as */ /* A = P * L * U, */ /* where P is a permutation matrix, L is unit lower triangular, and U is */ /* upper triangular. The factored form of A is then used to solve the */ /* system of equations A * X = B. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N coefficient matrix A. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (output) INTEGER array, dimension (N) */ /* The pivot indices that define the permutation matrix P; */ /* row i of the matrix was interchanged with row IPIV(i). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS matrix of right hand side matrix B. */ /* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, so the solution could not be computed. */ /* ===================================================================== */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGESV ", &i__1); return 0; } /* Compute the LU factorization of A. */ _starpu_dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ _starpu_dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ b_offset], ldb, info); } return 0; /* End of DGESV */ } /* _starpu_dgesv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgesvd.c000066400000000000000000004010131507764646700206470ustar00rootroot00000000000000/* dgesvd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__6 = 6; static integer c__0 = 0; static integer c__2 = 2; static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b421 = 0.; static doublereal c_b443 = 1.; /* Subroutine */ int _starpu_dgesvd_(char *jobu, char *jobvt, integer *m, integer *n, doublereal *a, integer *lda, doublereal *s, doublereal *u, integer * ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], i__2, i__3, i__4; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); double sqrt(doublereal); /* Local variables */ integer i__, ie, ir, iu, blk, ncu; doublereal dum[1], eps; integer nru, iscl; doublereal anrm; integer ierr, itau, ncvt, nrvt; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); integer chunk, minmn, wrkbl, itaup, itauq, mnthr, iwork; logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs; extern /* Subroutine */ int _starpu_dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); integer bdspac; extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dorglq_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer ldwrkr, minwrk, ldwrku, maxwrk; doublereal smlnum; logical lquery, wntuas, wntvas; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGESVD computes the singular value decomposition (SVD) of a real */ /* M-by-N matrix A, optionally computing the left and/or right singular */ /* vectors. The SVD is written */ /* A = U * SIGMA * transpose(V) */ /* where SIGMA is an M-by-N matrix which is zero except for its */ /* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ /* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ /* are the singular values of A; they are real and non-negative, and */ /* are returned in descending order. The first min(m,n) columns of */ /* U and V are the left and right singular vectors of A. */ /* Note that the routine returns V**T, not V. */ /* Arguments */ /* ========= */ /* JOBU (input) CHARACTER*1 */ /* Specifies options for computing all or part of the matrix U: */ /* = 'A': all M columns of U are returned in array U: */ /* = 'S': the first min(m,n) columns of U (the left singular */ /* vectors) are returned in the array U; */ /* = 'O': the first min(m,n) columns of U (the left singular */ /* vectors) are overwritten on the array A; */ /* = 'N': no columns of U (no left singular vectors) are */ /* computed. */ /* JOBVT (input) CHARACTER*1 */ /* Specifies options for computing all or part of the matrix */ /* V**T: */ /* = 'A': all N rows of V**T are returned in the array VT; */ /* = 'S': the first min(m,n) rows of V**T (the right singular */ /* vectors) are returned in the array VT; */ /* = 'O': the first min(m,n) rows of V**T (the right singular */ /* vectors) are overwritten on the array A; */ /* = 'N': no rows of V**T (no right singular vectors) are */ /* computed. */ /* JOBVT and JOBU cannot both be 'O'. */ /* M (input) INTEGER */ /* The number of rows of the input matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the input matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, */ /* if JOBU = 'O', A is overwritten with the first min(m,n) */ /* columns of U (the left singular vectors, */ /* stored columnwise); */ /* if JOBVT = 'O', A is overwritten with the first min(m,n) */ /* rows of V**T (the right singular vectors, */ /* stored rowwise); */ /* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */ /* are destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The singular values of A, sorted so that S(i) >= S(i+1). */ /* U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) */ /* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. */ /* If JOBU = 'A', U contains the M-by-M orthogonal matrix U; */ /* if JOBU = 'S', U contains the first min(m,n) columns of U */ /* (the left singular vectors, stored columnwise); */ /* if JOBU = 'N' or 'O', U is not referenced. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= 1; if */ /* JOBU = 'S' or 'A', LDU >= M. */ /* VT (output) DOUBLE PRECISION array, dimension (LDVT,N) */ /* If JOBVT = 'A', VT contains the N-by-N orthogonal matrix */ /* V**T; */ /* if JOBVT = 'S', VT contains the first min(m,n) rows of */ /* V**T (the right singular vectors, stored rowwise); */ /* if JOBVT = 'N' or 'O', VT is not referenced. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. LDVT >= 1; if */ /* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ /* if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged */ /* superdiagonal elements of an upper bidiagonal matrix B */ /* whose diagonal is in S (not necessarily sorted). B */ /* satisfies A = U * B * VT, so it has the same singular values */ /* as A, and singular vectors related by U and VT. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)). */ /* For good performance, LWORK should generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if DBDSQR did not converge, INFO specifies how many */ /* superdiagonals of an intermediate bidiagonal form B */ /* did not converge to zero. See the description of WORK */ /* above for details. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --s; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; --work; /* Function Body */ *info = 0; minmn = min(*m,*n); wntua = _starpu_lsame_(jobu, "A"); wntus = _starpu_lsame_(jobu, "S"); wntuas = wntua || wntus; wntuo = _starpu_lsame_(jobu, "O"); wntun = _starpu_lsame_(jobu, "N"); wntva = _starpu_lsame_(jobvt, "A"); wntvs = _starpu_lsame_(jobvt, "S"); wntvas = wntva || wntvs; wntvo = _starpu_lsame_(jobvt, "O"); wntvn = _starpu_lsame_(jobvt, "N"); lquery = *lwork == -1; if (! (wntua || wntus || wntuo || wntun)) { *info = -1; } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else if (*ldu < 1 || wntuas && *ldu < *m) { *info = -9; } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) { *info = -11; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV.) */ if (*info == 0) { minwrk = 1; maxwrk = 1; if (*m >= *n && minmn > 0) { /* Compute space needed for DBDSQR */ /* Writing concatenation */ i__1[0] = 1, a__1[0] = jobu; i__1[1] = 1, a__1[1] = jobvt; s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); mnthr = _starpu_ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0); bdspac = *n * 5; if (*m >= mnthr) { if (wntun) { /* Path 1 (M much larger than N, JOBU='N') */ maxwrk = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = maxwrk, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); maxwrk = max(i__2,i__3); if (wntvo || wntvas) { /* Computing MAX */ i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(& c__1, "DORGBR", "P", n, n, n, &c_n1); maxwrk = max(i__2,i__3); } maxwrk = max(maxwrk,bdspac); /* Computing MAX */ i__2 = *n << 2; minwrk = max(i__2,bdspac); } else if (wntuo && wntvn) { /* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", n, n, n, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); /* Computing MAX */ i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; maxwrk = max(i__2,i__3); /* Computing MAX */ i__2 = *n * 3 + *m; minwrk = max(i__2,bdspac); } else if (wntuo && wntvas) { /* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */ /* 'A') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", n, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", n, n, n, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); /* Computing MAX */ i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; maxwrk = max(i__2,i__3); /* Computing MAX */ i__2 = *n * 3 + *m; minwrk = max(i__2,bdspac); } else if (wntus && wntvn) { /* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", n, n, n, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = *n * *n + wrkbl; /* Computing MAX */ i__2 = *n * 3 + *m; minwrk = max(i__2,bdspac); } else if (wntus && wntvo) { /* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", n, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", n, n, n, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = (*n << 1) * *n + wrkbl; /* Computing MAX */ i__2 = *n * 3 + *m; minwrk = max(i__2,bdspac); } else if (wntus && wntvas) { /* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */ /* 'A') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", n, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", n, n, n, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = *n * *n + wrkbl; /* Computing MAX */ i__2 = *n * 3 + *m; minwrk = max(i__2,bdspac); } else if (wntua && wntvn) { /* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *n + *m * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, m, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", n, n, n, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = *n * *n + wrkbl; /* Computing MAX */ i__2 = *n * 3 + *m; minwrk = max(i__2,bdspac); } else if (wntua && wntvo) { /* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *n + *m * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, m, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", n, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", n, n, n, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = (*n << 1) * *n + wrkbl; /* Computing MAX */ i__2 = *n * 3 + *m; minwrk = max(i__2,bdspac); } else if (wntua && wntvas) { /* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */ /* 'A') */ wrkbl = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *n + *m * _starpu_ilaenv_(&c__1, "DORGQR", " ", m, m, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", n, n, n, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", n, n, n, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = *n * *n + wrkbl; /* Computing MAX */ i__2 = *n * 3 + *m; minwrk = max(i__2,bdspac); } } else { /* Path 10 (M at least N, but not much larger) */ maxwrk = *n * 3 + (*m + *n) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1); if (wntus || wntuo) { /* Computing MAX */ i__2 = maxwrk, i__3 = *n * 3 + *n * _starpu_ilaenv_(&c__1, "DORG" "BR", "Q", m, n, n, &c_n1); maxwrk = max(i__2,i__3); } if (wntua) { /* Computing MAX */ i__2 = maxwrk, i__3 = *n * 3 + *m * _starpu_ilaenv_(&c__1, "DORG" "BR", "Q", m, m, n, &c_n1); maxwrk = max(i__2,i__3); } if (! wntvn) { /* Computing MAX */ i__2 = maxwrk, i__3 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", n, n, n, &c_n1); maxwrk = max(i__2,i__3); } maxwrk = max(maxwrk,bdspac); /* Computing MAX */ i__2 = *n * 3 + *m; minwrk = max(i__2,bdspac); } } else if (minmn > 0) { /* Compute space needed for DBDSQR */ /* Writing concatenation */ i__1[0] = 1, a__1[0] = jobu; i__1[1] = 1, a__1[1] = jobvt; s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); mnthr = _starpu_ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0); bdspac = *m * 5; if (*n >= mnthr) { if (wntvn) { /* Path 1t(N much larger than M, JOBVT='N') */ maxwrk = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = maxwrk, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); maxwrk = max(i__2,i__3); if (wntuo || wntuas) { /* Computing MAX */ i__2 = maxwrk, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR", "Q", m, m, m, &c_n1); maxwrk = max(i__2,i__3); } maxwrk = max(maxwrk,bdspac); /* Computing MAX */ i__2 = *m << 2; minwrk = max(i__2,bdspac); } else if (wntvo && wntun) { /* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); /* Computing MAX */ i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; maxwrk = max(i__2,i__3); /* Computing MAX */ i__2 = *m * 3 + *n; minwrk = max(i__2,bdspac); } else if (wntvo && wntuas) { /* Path 3t(N much larger than M, JOBU='S' or 'A', */ /* JOBVT='O') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", m, m, m, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); /* Computing MAX */ i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; maxwrk = max(i__2,i__3); /* Computing MAX */ i__2 = *m * 3 + *n; minwrk = max(i__2,bdspac); } else if (wntvs && wntun) { /* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = *m * *m + wrkbl; /* Computing MAX */ i__2 = *m * 3 + *n; minwrk = max(i__2,bdspac); } else if (wntvs && wntuo) { /* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", m, m, m, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = (*m << 1) * *m + wrkbl; /* Computing MAX */ i__2 = *m * 3 + *n; minwrk = max(i__2,bdspac); } else if (wntvs && wntuas) { /* Path 6t(N much larger than M, JOBU='S' or 'A', */ /* JOBVT='S') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *m + *m * _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", m, m, m, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = *m * *m + wrkbl; /* Computing MAX */ i__2 = *m * 3 + *n; minwrk = max(i__2,bdspac); } else if (wntva && wntun) { /* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *m + *n * _starpu_ilaenv_(&c__1, "DORGLQ", " ", n, n, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = *m * *m + wrkbl; /* Computing MAX */ i__2 = *m * 3 + *n; minwrk = max(i__2,bdspac); } else if (wntva && wntuo) { /* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *m + *n * _starpu_ilaenv_(&c__1, "DORGLQ", " ", n, n, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", m, m, m, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = (*m << 1) * *m + wrkbl; /* Computing MAX */ i__2 = *m * 3 + *n; minwrk = max(i__2,bdspac); } else if (wntva && wntuas) { /* Path 9t(N much larger than M, JOBU='S' or 'A', */ /* JOBVT='A') */ wrkbl = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, & c_n1, &c_n1); /* Computing MAX */ i__2 = wrkbl, i__3 = *m + *n * _starpu_ilaenv_(&c__1, "DORGLQ", " ", n, n, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m << 1) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "P", m, m, m, &c_n1); wrkbl = max(i__2,i__3); /* Computing MAX */ i__2 = wrkbl, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORGBR" , "Q", m, m, m, &c_n1); wrkbl = max(i__2,i__3); wrkbl = max(wrkbl,bdspac); maxwrk = *m * *m + wrkbl; /* Computing MAX */ i__2 = *m * 3 + *n; minwrk = max(i__2,bdspac); } } else { /* Path 10t(N greater than M, but not much larger) */ maxwrk = *m * 3 + (*m + *n) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1); if (wntvs || wntvo) { /* Computing MAX */ i__2 = maxwrk, i__3 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORG" "BR", "P", m, n, m, &c_n1); maxwrk = max(i__2,i__3); } if (wntva) { /* Computing MAX */ i__2 = maxwrk, i__3 = *m * 3 + *n * _starpu_ilaenv_(&c__1, "DORG" "BR", "P", n, n, m, &c_n1); maxwrk = max(i__2,i__3); } if (! wntun) { /* Computing MAX */ i__2 = maxwrk, i__3 = *m * 3 + (*m - 1) * _starpu_ilaenv_(&c__1, "DORGBR", "Q", m, m, m, &c_n1); maxwrk = max(i__2,i__3); } maxwrk = max(maxwrk,bdspac); /* Computing MAX */ i__2 = *m * 3 + *n; minwrk = max(i__2,bdspac); } } maxwrk = max(maxwrk,minwrk); work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -13; } } if (*info != 0) { i__2 = -(*info); _starpu_xerbla_("DGESVD", &i__2); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = sqrt(_starpu_dlamch_("S")) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, dum); iscl = 0; if (anrm > 0. && anrm < smlnum) { iscl = 1; _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & ierr); } else if (anrm > bignum) { iscl = 1; _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & ierr); } if (*m >= *n) { /* A has at least as many rows as columns. If A has sufficiently */ /* more rows than columns, first reduce using the QR */ /* decomposition (if sufficient workspace available) */ if (*m >= mnthr) { if (wntun) { /* Path 1 (M much larger than N, JOBU='N') */ /* No left singular vectors to be computed */ itau = 1; iwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & i__2, &ierr); /* Zero out below R */ i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[a_dim1 + 2], lda); ie = 1; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in A */ /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[iwork], &i__2, &ierr); ncvt = 0; if (wntvo || wntvas) { /* If right singular vectors desired, generate P'. */ /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], & work[iwork], &i__2, &ierr); ncvt = *n; } iwork = ie + *n; /* Perform bidiagonal QR iteration, computing right */ /* singular vectors of A in A if desired */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[ a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], info); /* If right singular vectors desired in VT, copy them there */ if (wntvas) { _starpu_dlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); } } else if (wntuo && wntvn) { /* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ /* N left singular vectors to be overwritten on A and */ /* no right singular vectors to be computed */ /* Computing MAX */ i__2 = *n << 2; if (*lwork >= *n * *n + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ ir = 1; /* Computing MAX */ i__2 = wrkbl, i__3 = *lda * *n + *n; if (*lwork >= max(i__2,i__3) + *lda * *n) { /* WORK(IU) is LDA by N, WORK(IR) is LDA by N */ ldwrku = *lda; ldwrkr = *lda; } else /* if(complicated condition) */ { /* Computing MAX */ i__2 = wrkbl, i__3 = *lda * *n + *n; if (*lwork >= max(i__2,i__3) + *n * *n) { /* WORK(IU) is LDA by N, WORK(IR) is N by N */ ldwrku = *lda; ldwrkr = *n; } else { /* WORK(IU) is LDWRKU by N, WORK(IR) is N by N */ ldwrku = (*lwork - *n * *n - *n) / *n; ldwrkr = *n; } } itau = ir + ldwrkr * *n; iwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] , &i__2, &ierr); /* Copy R to WORK(IR) and zero out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[ir + 1] , &ldwrkr); /* Generate Q in A */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in WORK(IR) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[iwork], &i__2, &ierr); /* Generate left vectors bidiagonalizing R */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & work[iwork], &i__2, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of R in WORK(IR) */ /* (Workspace: need N*N+BDSPAC) */ _starpu_dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, & c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork] , info); iu = ie + *n; /* Multiply Q in A by left singular vectors of R in */ /* WORK(IR), storing result in WORK(IU) and copying to A */ /* (Workspace: need N*N+2*N, prefer N*N+M*N+N) */ i__2 = *m; i__3 = ldwrku; for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) { /* Computing MIN */ i__4 = *m - i__ + 1; chunk = min(i__4,ldwrku); _starpu_dgemm_("N", "N", &chunk, n, n, &c_b443, &a[i__ + a_dim1], lda, &work[ir], &ldwrkr, &c_b421, & work[iu], &ldwrku); _starpu_dlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + a_dim1], lda); /* L10: */ } } else { /* Insufficient workspace for a fast algorithm */ ie = 1; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize A */ /* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ i__3 = *lwork - iwork + 1; _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[iwork], &i__3, &ierr); /* Generate left vectors bidiagonalizing A */ /* (Workspace: need 4*N, prefer 3*N+N*NB) */ i__3 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & work[iwork], &i__3, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in A */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, & c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], info); } } else if (wntuo && wntvas) { /* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */ /* N left singular vectors to be overwritten on A and */ /* N right singular vectors to be computed in VT */ /* Computing MAX */ i__3 = *n << 2; if (*lwork >= *n * *n + max(i__3,bdspac)) { /* Sufficient workspace for a fast algorithm */ ir = 1; /* Computing MAX */ i__3 = wrkbl, i__2 = *lda * *n + *n; if (*lwork >= max(i__3,i__2) + *lda * *n) { /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ ldwrku = *lda; ldwrkr = *lda; } else /* if(complicated condition) */ { /* Computing MAX */ i__3 = wrkbl, i__2 = *lda * *n + *n; if (*lwork >= max(i__3,i__2) + *n * *n) { /* WORK(IU) is LDA by N and WORK(IR) is N by N */ ldwrku = *lda; ldwrkr = *n; } else { /* WORK(IU) is LDWRKU by N and WORK(IR) is N by N */ ldwrku = (*lwork - *n * *n - *n) / *n; ldwrkr = *n; } } itau = ir + ldwrkr * *n; iwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__3 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] , &i__3, &ierr); /* Copy R to VT, zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); if (*n > 1) { i__3 = *n - 1; i__2 = *n - 1; _starpu_dlaset_("L", &i__3, &i__2, &c_b421, &c_b421, &vt[ vt_dim1 + 2], ldvt); } /* Generate Q in A */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__3 = *lwork - iwork + 1; _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__3, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in VT, copying result to WORK(IR) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ i__3 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], &i__3, & ierr); _starpu_dlacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], & ldwrkr); /* Generate left vectors bidiagonalizing R in WORK(IR) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ i__3 = *lwork - iwork + 1; _starpu_dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & work[iwork], &i__3, &ierr); /* Generate right vectors bidiagonalizing R in VT */ /* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) */ i__3 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &work[iwork], &i__3, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of R in WORK(IR) and computing right */ /* singular vectors of R in VT */ /* (Workspace: need N*N+BDSPAC) */ _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, &work[iwork], info); iu = ie + *n; /* Multiply Q in A by left singular vectors of R in */ /* WORK(IR), storing result in WORK(IU) and copying to A */ /* (Workspace: need N*N+2*N, prefer N*N+M*N+N) */ i__3 = *m; i__2 = ldwrku; for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += i__2) { /* Computing MIN */ i__4 = *m - i__ + 1; chunk = min(i__4,ldwrku); _starpu_dgemm_("N", "N", &chunk, n, n, &c_b443, &a[i__ + a_dim1], lda, &work[ir], &ldwrkr, &c_b421, & work[iu], &ldwrku); _starpu_dlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + a_dim1], lda); /* L20: */ } } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] , &i__2, &ierr); /* Copy R to VT, zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); if (*n > 1) { i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &vt[ vt_dim1 + 2], ldvt); } /* Generate Q in A */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in VT */ /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], &i__2, & ierr); /* Multiply Q in A by left vectors bidiagonalizing R */ /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, & work[itauq], &a[a_offset], lda, &work[iwork], & i__2, &ierr); /* Generate right vectors bidiagonalizing R in VT */ /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &work[iwork], &i__2, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in A and computing right */ /* singular vectors of A in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & work[iwork], info); } } else if (wntus) { if (wntvn) { /* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ /* N left singular vectors to be computed in U and */ /* no right singular vectors to be computed */ /* Computing MAX */ i__2 = *n << 2; if (*lwork >= *n * *n + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ ir = 1; if (*lwork >= wrkbl + *lda * *n) { /* WORK(IR) is LDA by N */ ldwrkr = *lda; } else { /* WORK(IR) is N by N */ ldwrkr = *n; } itau = ir + ldwrkr * *n; iwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); /* Copy R to WORK(IR), zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], & ldwrkr); i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[ir + 1], &ldwrkr); /* Generate Q in A */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in WORK(IR) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Generate left vectors bidiagonalizing R in WORK(IR) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] , &work[iwork], &i__2, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of R in WORK(IR) */ /* (Workspace: need N*N+BDSPAC) */ _starpu_dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & work[iwork], info); /* Multiply Q in A by left singular vectors of R in */ /* WORK(IR), storing result in U */ /* (Workspace: need N*N) */ _starpu_dgemm_("N", "N", m, n, n, &c_b443, &a[a_offset], lda, &work[ir], &ldwrkr, &c_b421, &u[u_offset], ldu); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Generate Q in U */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Zero out below R in A */ i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[ a_dim1 + 2], lda); /* Bidiagonalize R in A */ /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply Q in U by left vectors bidiagonalizing R */ /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & work[itauq], &u[u_offset], ldu, &work[iwork], &i__2, &ierr) ; iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in U */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, &c__1, &u[u_offset], ldu, dum, &c__1, & work[iwork], info); } } else if (wntvo) { /* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ /* N left singular vectors to be computed in U and */ /* N right singular vectors to be overwritten on A */ /* Computing MAX */ i__2 = *n << 2; if (*lwork >= (*n << 1) * *n + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ iu = 1; if (*lwork >= wrkbl + (*lda << 1) * *n) { /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ ldwrku = *lda; ir = iu + ldwrku * *n; ldwrkr = *lda; } else if (*lwork >= wrkbl + (*lda + *n) * *n) { /* WORK(IU) is LDA by N and WORK(IR) is N by N */ ldwrku = *lda; ir = iu + ldwrku * *n; ldwrkr = *n; } else { /* WORK(IU) is N by N and WORK(IR) is N by N */ ldwrku = *n; ir = iu + ldwrku * *n; ldwrkr = *n; } itau = ir + ldwrkr * *n; iwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); /* Copy R to WORK(IU), zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & ldwrku); i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[iu + 1], &ldwrku); /* Generate Q in A */ /* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in WORK(IU), copying result to */ /* WORK(IR) */ /* (Workspace: need 2*N*N+4*N, */ /* prefer 2*N*N+3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); _starpu_dlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & ldwrkr); /* Generate left bidiagonalizing vectors in WORK(IU) */ /* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] , &work[iwork], &i__2, &ierr); /* Generate right bidiagonalizing vectors in WORK(IR) */ /* (Workspace: need 2*N*N+4*N-1, */ /* prefer 2*N*N+3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] , &work[iwork], &i__2, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of R in WORK(IU) and computing */ /* right singular vectors of R in WORK(IR) */ /* (Workspace: need 2*N*N+BDSPAC) */ _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, &work[iwork], info); /* Multiply Q in A by left singular vectors of R in */ /* WORK(IU), storing result in U */ /* (Workspace: need N*N) */ _starpu_dgemm_("N", "N", m, n, n, &c_b443, &a[a_offset], lda, &work[iu], &ldwrku, &c_b421, &u[u_offset], ldu); /* Copy right singular vectors of R to A */ /* (Workspace: need N*N) */ _starpu_dlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], lda); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Generate Q in U */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Zero out below R in A */ i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[ a_dim1 + 2], lda); /* Bidiagonalize R in A */ /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply Q in U by left vectors bidiagonalizing R */ /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & work[itauq], &u[u_offset], ldu, &work[iwork], &i__2, &ierr) ; /* Generate right vectors bidiagonalizing R in A */ /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &i__2, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in U and computing right */ /* singular vectors of A in A */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[iwork], info); } } else if (wntvas) { /* Path 6 (M much larger than N, JOBU='S', JOBVT='S' */ /* or 'A') */ /* N left singular vectors to be computed in U and */ /* N right singular vectors to be computed in VT */ /* Computing MAX */ i__2 = *n << 2; if (*lwork >= *n * *n + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ iu = 1; if (*lwork >= wrkbl + *lda * *n) { /* WORK(IU) is LDA by N */ ldwrku = *lda; } else { /* WORK(IU) is N by N */ ldwrku = *n; } itau = iu + ldwrku * *n; iwork = itau + *n; /* Compute A=Q*R */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); /* Copy R to WORK(IU), zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & ldwrku); i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[iu + 1], &ldwrku); /* Generate Q in A */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in WORK(IU), copying result to VT */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); _starpu_dlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt); /* Generate left bidiagonalizing vectors in WORK(IU) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] , &work[iwork], &i__2, &ierr); /* Generate right bidiagonalizing vectors in VT */ /* (Workspace: need N*N+4*N-1, */ /* prefer N*N+3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ itaup], &work[iwork], &i__2, &ierr) ; iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of R in WORK(IU) and computing */ /* right singular vectors of R in VT */ /* (Workspace: need N*N+BDSPAC) */ _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &work[iu], &ldwrku, dum, & c__1, &work[iwork], info); /* Multiply Q in A by left singular vectors of R in */ /* WORK(IU), storing result in U */ /* (Workspace: need N*N) */ _starpu_dgemm_("N", "N", m, n, n, &c_b443, &a[a_offset], lda, &work[iu], &ldwrku, &c_b421, &u[u_offset], ldu); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Generate Q in U */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & work[iwork], &i__2, &ierr); /* Copy R to VT, zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); if (*n > 1) { i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &vt[ vt_dim1 + 2], ldvt); } ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in VT */ /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply Q in U by left bidiagonalizing vectors */ /* in VT */ /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, &work[itauq], &u[u_offset], ldu, &work[iwork], &i__2, &ierr); /* Generate right bidiagonalizing vectors in VT */ /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ itaup], &work[iwork], &i__2, &ierr) ; iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in U and computing right */ /* singular vectors of A in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &u[u_offset], ldu, dum, & c__1, &work[iwork], info); } } } else if (wntua) { if (wntvn) { /* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ /* M left singular vectors to be computed in U and */ /* no right singular vectors to be computed */ /* Computing MAX */ i__2 = *n + *m, i__3 = *n << 2, i__2 = max(i__2,i__3); if (*lwork >= *n * *n + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ ir = 1; if (*lwork >= wrkbl + *lda * *n) { /* WORK(IR) is LDA by N */ ldwrkr = *lda; } else { /* WORK(IR) is N by N */ ldwrkr = *n; } itau = ir + ldwrkr * *n; iwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Copy R to WORK(IR), zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], & ldwrkr); i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[ir + 1], &ldwrkr); /* Generate Q in U */ /* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in WORK(IR) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Generate left bidiagonalizing vectors in WORK(IR) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] , &work[iwork], &i__2, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of R in WORK(IR) */ /* (Workspace: need N*N+BDSPAC) */ _starpu_dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & work[iwork], info); /* Multiply Q in U by left singular vectors of R in */ /* WORK(IR), storing result in A */ /* (Workspace: need N*N) */ _starpu_dgemm_("N", "N", m, n, n, &c_b443, &u[u_offset], ldu, &work[ir], &ldwrkr, &c_b421, &a[a_offset], lda); /* Copy left singular vectors of A from A to U */ _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Generate Q in U */ /* (Workspace: need N+M, prefer N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Zero out below R in A */ i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[ a_dim1 + 2], lda); /* Bidiagonalize R in A */ /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply Q in U by left bidiagonalizing vectors */ /* in A */ /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & work[itauq], &u[u_offset], ldu, &work[iwork], &i__2, &ierr) ; iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in U */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, &c__1, &u[u_offset], ldu, dum, &c__1, & work[iwork], info); } } else if (wntvo) { /* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ /* M left singular vectors to be computed in U and */ /* N right singular vectors to be overwritten on A */ /* Computing MAX */ i__2 = *n + *m, i__3 = *n << 2, i__2 = max(i__2,i__3); if (*lwork >= (*n << 1) * *n + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ iu = 1; if (*lwork >= wrkbl + (*lda << 1) * *n) { /* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ ldwrku = *lda; ir = iu + ldwrku * *n; ldwrkr = *lda; } else if (*lwork >= wrkbl + (*lda + *n) * *n) { /* WORK(IU) is LDA by N and WORK(IR) is N by N */ ldwrku = *lda; ir = iu + ldwrku * *n; ldwrkr = *n; } else { /* WORK(IU) is N by N and WORK(IR) is N by N */ ldwrku = *n; ir = iu + ldwrku * *n; ldwrkr = *n; } itau = ir + ldwrkr * *n; iwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Generate Q in U */ /* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & work[iwork], &i__2, &ierr); /* Copy R to WORK(IU), zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & ldwrku); i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[iu + 1], &ldwrku); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in WORK(IU), copying result to */ /* WORK(IR) */ /* (Workspace: need 2*N*N+4*N, */ /* prefer 2*N*N+3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); _starpu_dlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & ldwrkr); /* Generate left bidiagonalizing vectors in WORK(IU) */ /* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] , &work[iwork], &i__2, &ierr); /* Generate right bidiagonalizing vectors in WORK(IR) */ /* (Workspace: need 2*N*N+4*N-1, */ /* prefer 2*N*N+3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] , &work[iwork], &i__2, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of R in WORK(IU) and computing */ /* right singular vectors of R in WORK(IR) */ /* (Workspace: need 2*N*N+BDSPAC) */ _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, &work[iwork], info); /* Multiply Q in U by left singular vectors of R in */ /* WORK(IU), storing result in A */ /* (Workspace: need N*N) */ _starpu_dgemm_("N", "N", m, n, n, &c_b443, &u[u_offset], ldu, &work[iu], &ldwrku, &c_b421, &a[a_offset], lda); /* Copy left singular vectors of A from A to U */ _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Copy right singular vectors of R from WORK(IR) to A */ _starpu_dlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], lda); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Generate Q in U */ /* (Workspace: need N+M, prefer N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Zero out below R in A */ i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &a[ a_dim1 + 2], lda); /* Bidiagonalize R in A */ /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply Q in U by left bidiagonalizing vectors */ /* in A */ /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & work[itauq], &u[u_offset], ldu, &work[iwork], &i__2, &ierr) ; /* Generate right bidiagonalizing vectors in A */ /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &i__2, &ierr); iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in U and computing right */ /* singular vectors of A in A */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[iwork], info); } } else if (wntvas) { /* Path 9 (M much larger than N, JOBU='A', JOBVT='S' */ /* or 'A') */ /* M left singular vectors to be computed in U and */ /* N right singular vectors to be computed in VT */ /* Computing MAX */ i__2 = *n + *m, i__3 = *n << 2, i__2 = max(i__2,i__3); if (*lwork >= *n * *n + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ iu = 1; if (*lwork >= wrkbl + *lda * *n) { /* WORK(IU) is LDA by N */ ldwrku = *lda; } else { /* WORK(IU) is N by N */ ldwrku = *n; } itau = iu + ldwrku * *n; iwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Generate Q in U */ /* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & work[iwork], &i__2, &ierr); /* Copy R to WORK(IU), zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & ldwrku); i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &work[iu + 1], &ldwrku); ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in WORK(IU), copying result to VT */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); _starpu_dlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt); /* Generate left bidiagonalizing vectors in WORK(IU) */ /* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] , &work[iwork], &i__2, &ierr); /* Generate right bidiagonalizing vectors in VT */ /* (Workspace: need N*N+4*N-1, */ /* prefer N*N+3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ itaup], &work[iwork], &i__2, &ierr) ; iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of R in WORK(IU) and computing */ /* right singular vectors of R in VT */ /* (Workspace: need N*N+BDSPAC) */ _starpu_dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &work[iu], &ldwrku, dum, & c__1, &work[iwork], info); /* Multiply Q in U by left singular vectors of R in */ /* WORK(IU), storing result in A */ /* (Workspace: need N*N) */ _starpu_dgemm_("N", "N", m, n, n, &c_b443, &u[u_offset], ldu, &work[iu], &ldwrku, &c_b421, &a[a_offset], lda); /* Copy left singular vectors of A from A to U */ _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *n; /* Compute A=Q*R, copying result to U */ /* (Workspace: need 2*N, prefer N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); /* Generate Q in U */ /* (Workspace: need N+M, prefer N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & work[iwork], &i__2, &ierr); /* Copy R from A to VT, zeroing out below it */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); if (*n > 1) { i__2 = *n - 1; i__3 = *n - 1; _starpu_dlaset_("L", &i__2, &i__3, &c_b421, &c_b421, &vt[ vt_dim1 + 2], ldvt); } ie = itau; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in VT */ /* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply Q in U by left bidiagonalizing vectors */ /* in VT */ /* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, &work[itauq], &u[u_offset], ldu, &work[iwork], &i__2, &ierr); /* Generate right bidiagonalizing vectors in VT */ /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ itaup], &work[iwork], &i__2, &ierr) ; iwork = ie + *n; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in U and computing right */ /* singular vectors of A in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &u[u_offset], ldu, dum, & c__1, &work[iwork], info); } } } } else { /* M .LT. MNTHR */ /* Path 10 (M at least N, but not much larger) */ /* Reduce to bidiagonal form without QR decomposition */ ie = 1; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize A */ /* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[iwork], &i__2, &ierr); if (wntuas) { /* If left singular vectors desired in U, copy result to U */ /* and generate left bidiagonalizing vectors in U */ /* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) */ _starpu_dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); if (wntus) { ncu = *n; } if (wntua) { ncu = *m; } i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], & work[iwork], &i__2, &ierr); } if (wntvas) { /* If right singular vectors desired in VT, copy result to */ /* VT and generate right bidiagonalizing vectors in VT */ /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ _starpu_dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & work[iwork], &i__2, &ierr); } if (wntuo) { /* If left singular vectors desired in A, generate left */ /* bidiagonalizing vectors in A */ /* (Workspace: need 4*N, prefer 3*N+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ iwork], &i__2, &ierr); } if (wntvo) { /* If right singular vectors desired in A, generate right */ /* bidiagonalizing vectors in A */ /* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[ iwork], &i__2, &ierr); } iwork = ie + *n; if (wntuas || wntuo) { nru = *m; } if (wntun) { nru = 0; } if (wntvas || wntvo) { ncvt = *n; } if (wntvn) { ncvt = 0; } if (! wntuo && ! wntvo) { /* Perform bidiagonal QR iteration, if desired, computing */ /* left singular vectors in U and computing right singular */ /* vectors in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & work[iwork], info); } else if (! wntuo && wntvo) { /* Perform bidiagonal QR iteration, if desired, computing */ /* left singular vectors in U and computing right singular */ /* vectors in A */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ iwork], info); } else { /* Perform bidiagonal QR iteration, if desired, computing */ /* left singular vectors in A and computing right singular */ /* vectors in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & work[iwork], info); } } } else { /* A has more columns than rows. If A has sufficiently more */ /* columns than rows, first reduce using the LQ decomposition (if */ /* sufficient workspace available) */ if (*n >= mnthr) { if (wntvn) { /* Path 1t(N much larger than M, JOBVT='N') */ /* No right singular vectors to be computed */ itau = 1; iwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & i__2, &ierr); /* Zero out above L */ i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[(a_dim1 << 1) + 1], lda); ie = 1; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in A */ /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[iwork], &i__2, &ierr); if (wntuo || wntuas) { /* If left singular vectors desired, generate Q */ /* (Workspace: need 4*M, prefer 3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], & work[iwork], &i__2, &ierr); } iwork = ie + *m; nru = 0; if (wntuo || wntuas) { nru = *m; } /* Perform bidiagonal QR iteration, computing left singular */ /* vectors of A in A if desired */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, & c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], info); /* If left singular vectors desired in U, copy them there */ if (wntuas) { _starpu_dlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); } } else if (wntvo && wntun) { /* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ /* M right singular vectors to be overwritten on A and */ /* no left singular vectors to be computed */ /* Computing MAX */ i__2 = *m << 2; if (*lwork >= *m * *m + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ ir = 1; /* Computing MAX */ i__2 = wrkbl, i__3 = *lda * *n + *m; if (*lwork >= max(i__2,i__3) + *lda * *m) { /* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ ldwrku = *lda; chunk = *n; ldwrkr = *lda; } else /* if(complicated condition) */ { /* Computing MAX */ i__2 = wrkbl, i__3 = *lda * *n + *m; if (*lwork >= max(i__2,i__3) + *m * *m) { /* WORK(IU) is LDA by N and WORK(IR) is M by M */ ldwrku = *lda; chunk = *n; ldwrkr = *m; } else { /* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ ldwrku = *m; chunk = (*lwork - *m * *m - *m) / *m; ldwrkr = *m; } } itau = ir + ldwrkr * *m; iwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] , &i__2, &ierr); /* Copy L to WORK(IR) and zero out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[ir + ldwrkr], &ldwrkr); /* Generate Q in A */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in WORK(IR) */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[iwork], &i__2, &ierr); /* Generate right vectors bidiagonalizing L */ /* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing right */ /* singular vectors of L in WORK(IR) */ /* (Workspace: need M*M+BDSPAC) */ _starpu_dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[ ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork] , info); iu = ie + *m; /* Multiply right singular vectors of L in WORK(IR) by Q */ /* in A, storing result in WORK(IU) and copying to A */ /* (Workspace: need M*M+2*M, prefer M*M+M*N+M) */ i__2 = *n; i__3 = chunk; for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) { /* Computing MIN */ i__4 = *n - i__ + 1; blk = min(i__4,chunk); _starpu_dgemm_("N", "N", m, &blk, m, &c_b443, &work[ir], & ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b421, & work[iu], &ldwrku); _starpu_dlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * a_dim1 + 1], lda); /* L30: */ } } else { /* Insufficient workspace for a fast algorithm */ ie = 1; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize A */ /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ i__3 = *lwork - iwork + 1; _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[iwork], &i__3, &ierr); /* Generate right vectors bidiagonalizing A */ /* (Workspace: need 4*M, prefer 3*M+M*NB) */ i__3 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & work[iwork], &i__3, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing right */ /* singular vectors of A in A */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[ a_offset], lda, dum, &c__1, dum, &c__1, &work[ iwork], info); } } else if (wntvo && wntuas) { /* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */ /* M right singular vectors to be overwritten on A and */ /* M left singular vectors to be computed in U */ /* Computing MAX */ i__3 = *m << 2; if (*lwork >= *m * *m + max(i__3,bdspac)) { /* Sufficient workspace for a fast algorithm */ ir = 1; /* Computing MAX */ i__3 = wrkbl, i__2 = *lda * *n + *m; if (*lwork >= max(i__3,i__2) + *lda * *m) { /* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ ldwrku = *lda; chunk = *n; ldwrkr = *lda; } else /* if(complicated condition) */ { /* Computing MAX */ i__3 = wrkbl, i__2 = *lda * *n + *m; if (*lwork >= max(i__3,i__2) + *m * *m) { /* WORK(IU) is LDA by N and WORK(IR) is M by M */ ldwrku = *lda; chunk = *n; ldwrkr = *m; } else { /* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ ldwrku = *m; chunk = (*lwork - *m * *m - *m) / *m; ldwrkr = *m; } } itau = ir + ldwrkr * *m; iwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__3 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] , &i__3, &ierr); /* Copy L to U, zeroing about above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); i__3 = *m - 1; i__2 = *m - 1; _starpu_dlaset_("U", &i__3, &i__2, &c_b421, &c_b421, &u[(u_dim1 << 1) + 1], ldu); /* Generate Q in A */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__3 = *lwork - iwork + 1; _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ iwork], &i__3, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in U, copying result to WORK(IR) */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ i__3 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[iwork], &i__3, &ierr); _starpu_dlacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr); /* Generate right vectors bidiagonalizing L in WORK(IR) */ /* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */ i__3 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & work[iwork], &i__3, &ierr); /* Generate left vectors bidiagonalizing L in U */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ i__3 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & work[iwork], &i__3, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of L in U, and computing right */ /* singular vectors of L in WORK(IR) */ /* (Workspace: need M*M+BDSPAC) */ _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[ iwork], info); iu = ie + *m; /* Multiply right singular vectors of L in WORK(IR) by Q */ /* in A, storing result in WORK(IU) and copying to A */ /* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) */ i__3 = *n; i__2 = chunk; for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += i__2) { /* Computing MIN */ i__4 = *n - i__ + 1; blk = min(i__4,chunk); _starpu_dgemm_("N", "N", m, &blk, m, &c_b443, &work[ir], & ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b421, & work[iu], &ldwrku); _starpu_dlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * a_dim1 + 1], lda); /* L40: */ } } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] , &i__2, &ierr); /* Copy L to U, zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &u[(u_dim1 << 1) + 1], ldu); /* Generate Q in A */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in U */ /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ itauq], &work[itaup], &work[iwork], &i__2, &ierr); /* Multiply right vectors bidiagonalizing L by Q in A */ /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[ itaup], &a[a_offset], lda, &work[iwork], &i__2, & ierr); /* Generate left vectors bidiagonalizing L in U */ /* (Workspace: need 4*M, prefer 3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in U and computing right */ /* singular vectors of A in A */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[ a_offset], lda, &u[u_offset], ldu, dum, &c__1, & work[iwork], info); } } else if (wntvs) { if (wntun) { /* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ /* M right singular vectors to be computed in VT and */ /* no left singular vectors to be computed */ /* Computing MAX */ i__2 = *m << 2; if (*lwork >= *m * *m + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ ir = 1; if (*lwork >= wrkbl + *lda * *m) { /* WORK(IR) is LDA by M */ ldwrkr = *lda; } else { /* WORK(IR) is M by M */ ldwrkr = *m; } itau = ir + ldwrkr * *m; iwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); /* Copy L to WORK(IR), zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], & ldwrkr); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[ir + ldwrkr], &ldwrkr); /* Generate Q in A */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in WORK(IR) */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Generate right vectors bidiagonalizing L in */ /* WORK(IR) */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] , &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing right */ /* singular vectors of L in WORK(IR) */ /* (Workspace: need M*M+BDSPAC) */ _starpu_dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & work[iwork], info); /* Multiply right singular vectors of L in WORK(IR) by */ /* Q in A, storing result in VT */ /* (Workspace: need M*M) */ _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[ir], &ldwrkr, &a[a_offset], lda, &c_b421, &vt[vt_offset], ldvt); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); /* Copy result to VT */ _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Generate Q in VT */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Zero out above L in A */ i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[( a_dim1 << 1) + 1], lda); /* Bidiagonalize L in A */ /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply right vectors bidiagonalizing L by Q in VT */ /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & work[itaup], &vt[vt_offset], ldvt, &work[ iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing right */ /* singular vectors of A in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & work[iwork], info); } } else if (wntuo) { /* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ /* M right singular vectors to be computed in VT and */ /* M left singular vectors to be overwritten on A */ /* Computing MAX */ i__2 = *m << 2; if (*lwork >= (*m << 1) * *m + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ iu = 1; if (*lwork >= wrkbl + (*lda << 1) * *m) { /* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ ldwrku = *lda; ir = iu + ldwrku * *m; ldwrkr = *lda; } else if (*lwork >= wrkbl + (*lda + *m) * *m) { /* WORK(IU) is LDA by M and WORK(IR) is M by M */ ldwrku = *lda; ir = iu + ldwrku * *m; ldwrkr = *m; } else { /* WORK(IU) is M by M and WORK(IR) is M by M */ ldwrku = *m; ir = iu + ldwrku * *m; ldwrkr = *m; } itau = ir + ldwrkr * *m; iwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); /* Copy L to WORK(IU), zeroing out below it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & ldwrku); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[iu + ldwrku], &ldwrku); /* Generate Q in A */ /* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in WORK(IU), copying result to */ /* WORK(IR) */ /* (Workspace: need 2*M*M+4*M, */ /* prefer 2*M*M+3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); _starpu_dlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & ldwrkr); /* Generate right bidiagonalizing vectors in WORK(IU) */ /* (Workspace: need 2*M*M+4*M-1, */ /* prefer 2*M*M+3*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] , &work[iwork], &i__2, &ierr); /* Generate left bidiagonalizing vectors in WORK(IR) */ /* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] , &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of L in WORK(IR) and computing */ /* right singular vectors of L in WORK(IU) */ /* (Workspace: need 2*M*M+BDSPAC) */ _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, &work[iwork], info); /* Multiply right singular vectors of L in WORK(IU) by */ /* Q in A, storing result in VT */ /* (Workspace: need M*M) */ _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[iu], &ldwrku, &a[a_offset], lda, &c_b421, &vt[vt_offset], ldvt); /* Copy left singular vectors of L to A */ /* (Workspace: need M*M) */ _starpu_dlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], lda); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *m; /* Compute A=L*Q, copying result to VT */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Generate Q in VT */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Zero out above L in A */ i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[( a_dim1 << 1) + 1], lda); /* Bidiagonalize L in A */ /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply right vectors bidiagonalizing L by Q in VT */ /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & work[itaup], &vt[vt_offset], ldvt, &work[ iwork], &i__2, &ierr); /* Generate left bidiagonalizing vectors of L in A */ /* (Workspace: need 4*M, prefer 3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, compute left */ /* singular vectors of A in A and compute right */ /* singular vectors of A in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &a[a_offset], lda, dum, & c__1, &work[iwork], info); } } else if (wntuas) { /* Path 6t(N much larger than M, JOBU='S' or 'A', */ /* JOBVT='S') */ /* M right singular vectors to be computed in VT and */ /* M left singular vectors to be computed in U */ /* Computing MAX */ i__2 = *m << 2; if (*lwork >= *m * *m + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ iu = 1; if (*lwork >= wrkbl + *lda * *m) { /* WORK(IU) is LDA by N */ ldwrku = *lda; } else { /* WORK(IU) is LDA by M */ ldwrku = *m; } itau = iu + ldwrku * *m; iwork = itau + *m; /* Compute A=L*Q */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); /* Copy L to WORK(IU), zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & ldwrku); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[iu + ldwrku], &ldwrku); /* Generate Q in A */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in WORK(IU), copying result to U */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); _starpu_dlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], ldu); /* Generate right bidiagonalizing vectors in WORK(IU) */ /* (Workspace: need M*M+4*M-1, */ /* prefer M*M+3*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] , &work[iwork], &i__2, &ierr); /* Generate left bidiagonalizing vectors in U */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of L in U and computing right */ /* singular vectors of L in WORK(IU) */ /* (Workspace: need M*M+BDSPAC) */ _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & work[iwork], info); /* Multiply right singular vectors of L in WORK(IU) by */ /* Q in A, storing result in VT */ /* (Workspace: need M*M) */ _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[iu], &ldwrku, &a[a_offset], lda, &c_b421, &vt[vt_offset], ldvt); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *m; /* Compute A=L*Q, copying result to VT */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Generate Q in VT */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & work[iwork], &i__2, &ierr); /* Copy L to U, zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &u[( u_dim1 << 1) + 1], ldu); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in U */ /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply right bidiagonalizing vectors in U by Q */ /* in VT */ /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & work[itaup], &vt[vt_offset], ldvt, &work[ iwork], &i__2, &ierr); /* Generate left bidiagonalizing vectors in U */ /* (Workspace: need 4*M, prefer 3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in U and computing right */ /* singular vectors of A in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &u[u_offset], ldu, dum, & c__1, &work[iwork], info); } } } else if (wntva) { if (wntun) { /* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ /* N right singular vectors to be computed in VT and */ /* no left singular vectors to be computed */ /* Computing MAX */ i__2 = *n + *m, i__3 = *m << 2, i__2 = max(i__2,i__3); if (*lwork >= *m * *m + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ ir = 1; if (*lwork >= wrkbl + *lda * *m) { /* WORK(IR) is LDA by M */ ldwrkr = *lda; } else { /* WORK(IR) is M by M */ ldwrkr = *m; } itau = ir + ldwrkr * *m; iwork = itau + *m; /* Compute A=L*Q, copying result to VT */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Copy L to WORK(IR), zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], & ldwrkr); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[ir + ldwrkr], &ldwrkr); /* Generate Q in VT */ /* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in WORK(IR) */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Generate right bidiagonalizing vectors in WORK(IR) */ /* (Workspace: need M*M+4*M-1, */ /* prefer M*M+3*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] , &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing right */ /* singular vectors of L in WORK(IR) */ /* (Workspace: need M*M+BDSPAC) */ _starpu_dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & work[iwork], info); /* Multiply right singular vectors of L in WORK(IR) by */ /* Q in VT, storing result in A */ /* (Workspace: need M*M) */ _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[ir], &ldwrkr, &vt[vt_offset], ldvt, &c_b421, &a[a_offset], lda); /* Copy right singular vectors of A from A to VT */ _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *m; /* Compute A=L*Q, copying result to VT */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Generate Q in VT */ /* (Workspace: need M+N, prefer M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Zero out above L in A */ i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[( a_dim1 << 1) + 1], lda); /* Bidiagonalize L in A */ /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply right bidiagonalizing vectors in A by Q */ /* in VT */ /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & work[itaup], &vt[vt_offset], ldvt, &work[ iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing right */ /* singular vectors of A in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & work[iwork], info); } } else if (wntuo) { /* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ /* N right singular vectors to be computed in VT and */ /* M left singular vectors to be overwritten on A */ /* Computing MAX */ i__2 = *n + *m, i__3 = *m << 2, i__2 = max(i__2,i__3); if (*lwork >= (*m << 1) * *m + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ iu = 1; if (*lwork >= wrkbl + (*lda << 1) * *m) { /* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ ldwrku = *lda; ir = iu + ldwrku * *m; ldwrkr = *lda; } else if (*lwork >= wrkbl + (*lda + *m) * *m) { /* WORK(IU) is LDA by M and WORK(IR) is M by M */ ldwrku = *lda; ir = iu + ldwrku * *m; ldwrkr = *m; } else { /* WORK(IU) is M by M and WORK(IR) is M by M */ ldwrku = *m; ir = iu + ldwrku * *m; ldwrkr = *m; } itau = ir + ldwrkr * *m; iwork = itau + *m; /* Compute A=L*Q, copying result to VT */ /* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Generate Q in VT */ /* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & work[iwork], &i__2, &ierr); /* Copy L to WORK(IU), zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & ldwrku); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[iu + ldwrku], &ldwrku); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in WORK(IU), copying result to */ /* WORK(IR) */ /* (Workspace: need 2*M*M+4*M, */ /* prefer 2*M*M+3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); _starpu_dlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & ldwrkr); /* Generate right bidiagonalizing vectors in WORK(IU) */ /* (Workspace: need 2*M*M+4*M-1, */ /* prefer 2*M*M+3*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] , &work[iwork], &i__2, &ierr); /* Generate left bidiagonalizing vectors in WORK(IR) */ /* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] , &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of L in WORK(IR) and computing */ /* right singular vectors of L in WORK(IU) */ /* (Workspace: need 2*M*M+BDSPAC) */ _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, &work[iwork], info); /* Multiply right singular vectors of L in WORK(IU) by */ /* Q in VT, storing result in A */ /* (Workspace: need M*M) */ _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[iu], &ldwrku, &vt[vt_offset], ldvt, &c_b421, &a[a_offset], lda); /* Copy right singular vectors of A from A to VT */ _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Copy left singular vectors of A from WORK(IR) to A */ _starpu_dlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], lda); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *m; /* Compute A=L*Q, copying result to VT */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Generate Q in VT */ /* (Workspace: need M+N, prefer M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & work[iwork], &i__2, &ierr); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Zero out above L in A */ i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &a[( a_dim1 << 1) + 1], lda); /* Bidiagonalize L in A */ /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply right bidiagonalizing vectors in A by Q */ /* in VT */ /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & work[itaup], &vt[vt_offset], ldvt, &work[ iwork], &i__2, &ierr); /* Generate left bidiagonalizing vectors in A */ /* (Workspace: need 4*M, prefer 3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in A and computing right */ /* singular vectors of A in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &a[a_offset], lda, dum, & c__1, &work[iwork], info); } } else if (wntuas) { /* Path 9t(N much larger than M, JOBU='S' or 'A', */ /* JOBVT='A') */ /* N right singular vectors to be computed in VT and */ /* M left singular vectors to be computed in U */ /* Computing MAX */ i__2 = *n + *m, i__3 = *m << 2, i__2 = max(i__2,i__3); if (*lwork >= *m * *m + max(i__2,bdspac)) { /* Sufficient workspace for a fast algorithm */ iu = 1; if (*lwork >= wrkbl + *lda * *m) { /* WORK(IU) is LDA by M */ ldwrku = *lda; } else { /* WORK(IU) is M by M */ ldwrku = *m; } itau = iu + ldwrku * *m; iwork = itau + *m; /* Compute A=L*Q, copying result to VT */ /* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Generate Q in VT */ /* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & work[iwork], &i__2, &ierr); /* Copy L to WORK(IU), zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & ldwrku); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &work[iu + ldwrku], &ldwrku); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in WORK(IU), copying result to U */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); _starpu_dlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], ldu); /* Generate right bidiagonalizing vectors in WORK(IU) */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] , &work[iwork], &i__2, &ierr); /* Generate left bidiagonalizing vectors in U */ /* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of L in U and computing right */ /* singular vectors of L in WORK(IU) */ /* (Workspace: need M*M+BDSPAC) */ _starpu_dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & work[iwork], info); /* Multiply right singular vectors of L in WORK(IU) by */ /* Q in VT, storing result in A */ /* (Workspace: need M*M) */ _starpu_dgemm_("N", "N", m, n, m, &c_b443, &work[iu], &ldwrku, &vt[vt_offset], ldvt, &c_b421, &a[a_offset], lda); /* Copy right singular vectors of A from A to VT */ _starpu_dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); } else { /* Insufficient workspace for a fast algorithm */ itau = 1; iwork = itau + *m; /* Compute A=L*Q, copying result to VT */ /* (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ iwork], &i__2, &ierr); _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); /* Generate Q in VT */ /* (Workspace: need M+N, prefer M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & work[iwork], &i__2, &ierr); /* Copy L to U, zeroing out above it */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); i__2 = *m - 1; i__3 = *m - 1; _starpu_dlaset_("U", &i__2, &i__3, &c_b421, &c_b421, &u[( u_dim1 << 1) + 1], ldu); ie = itau; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in U */ /* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & work[itauq], &work[itaup], &work[iwork], & i__2, &ierr); /* Multiply right bidiagonalizing vectors in U by Q */ /* in VT */ /* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ i__2 = *lwork - iwork + 1; _starpu_dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & work[itaup], &vt[vt_offset], ldvt, &work[ iwork], &i__2, &ierr); /* Generate left bidiagonalizing vectors in U */ /* (Workspace: need 4*M, prefer 3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &work[iwork], &i__2, &ierr); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing left */ /* singular vectors of A in U and computing right */ /* singular vectors of A in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &u[u_offset], ldu, dum, & c__1, &work[iwork], info); } } } } else { /* N .LT. MNTHR */ /* Path 10t(N greater than M, but not much larger) */ /* Reduce to bidiagonal form without LQ decomposition */ ie = 1; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize A */ /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[iwork], &i__2, &ierr); if (wntuas) { /* If left singular vectors desired in U, copy result to U */ /* and generate left bidiagonalizing vectors in U */ /* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */ _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ iwork], &i__2, &ierr); } if (wntvas) { /* If right singular vectors desired in VT, copy result to */ /* VT and generate right bidiagonalizing vectors in VT */ /* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) */ _starpu_dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); if (wntva) { nrvt = *n; } if (wntvs) { nrvt = *m; } i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], &work[iwork], &i__2, &ierr); } if (wntuo) { /* If left singular vectors desired in A, generate left */ /* bidiagonalizing vectors in A */ /* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[ iwork], &i__2, &ierr); } if (wntvo) { /* If right singular vectors desired in A, generate right */ /* bidiagonalizing vectors in A */ /* (Workspace: need 4*M, prefer 3*M+M*NB) */ i__2 = *lwork - iwork + 1; _starpu_dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ iwork], &i__2, &ierr); } iwork = ie + *m; if (wntuas || wntuo) { nru = *m; } if (wntun) { nru = 0; } if (wntvas || wntvo) { ncvt = *n; } if (wntvn) { ncvt = 0; } if (! wntuo && ! wntvo) { /* Perform bidiagonal QR iteration, if desired, computing */ /* left singular vectors in U and computing right singular */ /* vectors in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & work[iwork], info); } else if (! wntuo && wntvo) { /* Perform bidiagonal QR iteration, if desired, computing */ /* left singular vectors in U and computing right singular */ /* vectors in A */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ iwork], info); } else { /* Perform bidiagonal QR iteration, if desired, computing */ /* left singular vectors in A and computing right singular */ /* vectors in VT */ /* (Workspace: need BDSPAC) */ _starpu_dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & work[iwork], info); } } } /* If DBDSQR failed to converge, copy unconverged superdiagonals */ /* to WORK( 2:MINMN ) */ if (*info != 0) { if (ie > 2) { i__2 = minmn - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[i__ + 1] = work[i__ + ie - 1]; /* L50: */ } } if (ie < 2) { for (i__ = minmn - 1; i__ >= 1; --i__) { work[i__ + 1] = work[i__ + ie - 1]; /* L60: */ } } } /* Undo scaling if necessary */ if (iscl == 1) { if (anrm > bignum) { _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & minmn, &ierr); } if (*info != 0 && anrm > bignum) { i__2 = minmn - 1; _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2], &minmn, &ierr); } if (anrm < smlnum) { _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & minmn, &ierr); } if (*info != 0 && anrm < smlnum) { i__2 = minmn - 1; _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2], &minmn, &ierr); } } /* Return optimal workspace in WORK(1) */ work[1] = (doublereal) maxwrk; return 0; /* End of DGESVD */ } /* _starpu_dgesvd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgesvj.c000066400000000000000000001557441507764646700206760ustar00rootroot00000000000000/* dgesvj.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b17 = 0.; static doublereal c_b18 = 1.; static integer c__1 = 1; static integer c__0 = 0; static integer c__2 = 2; /* Subroutine */ int _starpu_dgesvj_(char *joba, char *jobu, char *jobv, integer *m, integer *n, doublereal *a, integer *lda, doublereal *sva, integer *mv, doublereal *v, integer *ldv, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ doublereal bigtheta; integer pskipped, i__, p, q; doublereal t; integer n2, n4; doublereal rootsfmin; integer n34; doublereal cs, sn; integer ir1, jbc; doublereal big; integer kbl, igl, ibr, jgl, nbl; doublereal tol; integer mvl; doublereal aapp, aapq, aaqq; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal ctol; integer ierr; doublereal aapp0; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); doublereal temp1; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal scale, large, apoaq, aqoap; extern logical _starpu_lsame_(char *, char *); doublereal theta, small, sfmin; logical lsvec; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal fastr[5]; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical applv, rsvec; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical uctol; extern /* Subroutine */ int _starpu_drotm_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *); logical lower, upper, rotok; extern /* Subroutine */ int _starpu_dgsvj0_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dgsvj1_( char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); integer ijblsk, swband, blskip; doublereal mxaapq; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); doublereal thsign, mxsinj; integer emptsw, notrot, iswrot, lkahead; logical goscale, noscale; doublereal rootbig, epsilon, rooteps; integer rowskip; doublereal roottol; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Zlatko Drmac of the University of Zagreb and -- */ /* -- Kresimir Veselic of the Fernuniversitaet Hagen -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* This routine is also part of SIGMA (version 1.23, October 23. 2008.) */ /* SIGMA is a library of algorithms for highly accurate algorithms for */ /* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the */ /* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0. */ /* -#- Scalar Arguments -#- */ /* -#- Array Arguments -#- */ /* .. */ /* Purpose */ /* ~~~~~~~ */ /* DGESVJ computes the singular value decomposition (SVD) of a real */ /* M-by-N matrix A, where M >= N. The SVD of A is written as */ /* [++] [xx] [x0] [xx] */ /* A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] */ /* [++] [xx] */ /* where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */ /* matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */ /* of SIGMA are the singular values of A. The columns of U and V are the */ /* left and the right singular vectors of A, respectively. */ /* Further Details */ /* ~~~~~~~~~~~~~~~ */ /* The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane */ /* rotations. The rotations are implemented as fast scaled rotations of */ /* Anda and Park [1]. In the case of underflow of the Jacobi angle, a */ /* modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses */ /* column interchanges of de Rijk [2]. The relative accuracy of the computed */ /* singular values and the accuracy of the computed singular vectors (in */ /* angle metric) is as guaranteed by the theory of Demmel and Veselic [3]. */ /* The condition number that determines the accuracy in the full rank case */ /* is essentially min_{D=diag} kappa(A*D), where kappa(.) is the */ /* spectral condition number. The best performance of this Jacobi SVD */ /* procedure is achieved if used in an accelerated version of Drmac and */ /* Veselic [5,6], and it is the kernel routine in the SIGMA library [7]. */ /* Some tunning parameters (marked with [TP]) are available for the */ /* implementer. */ /* The computational range for the nonzero singular values is the machine */ /* number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even */ /* denormalized singular values can be computed with the corresponding */ /* gradual loss of accurate digits. */ /* Contributors */ /* ~~~~~~~~~~~~ */ /* Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ /* References */ /* ~~~~~~~~~~ */ /* [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. */ /* SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. */ /* [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the */ /* singular value decomposition on a vector computer. */ /* SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. */ /* [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. */ /* [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular */ /* value computation in floating point arithmetic. */ /* SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. */ /* [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */ /* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */ /* LAPACK Working note 169. */ /* [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */ /* SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */ /* LAPACK Working note 170. */ /* [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */ /* QSVD, (H,K)-SVD computations. */ /* Department of Mathematics, University of Zagreb, 2008. */ /* Bugs, Examples and Comments */ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Please report all bugs and send interesting test examples and comments to */ /* drmac@math.hr. Thank you. */ /* Arguments */ /* ~~~~~~~~~ */ /* JOBA (input) CHARACTER* 1 */ /* Specifies the structure of A. */ /* = 'L': The input matrix A is lower triangular; */ /* = 'U': The input matrix A is upper triangular; */ /* = 'G': The input matrix A is general M-by-N matrix, M >= N. */ /* JOBU (input) CHARACTER*1 */ /* Specifies whether to compute the left singular vectors */ /* (columns of U): */ /* = 'U': The left singular vectors corresponding to the nonzero */ /* singular values are computed and returned in the leading */ /* columns of A. See more details in the description of A. */ /* The default numerical orthogonality threshold is set to */ /* approximately TOL=CTOL*EPS, CTOL=DSQRT(M), EPS=DLAMCH('E'). */ /* = 'C': Analogous to JOBU='U', except that user can control the */ /* level of numerical orthogonality of the computed left */ /* singular vectors. TOL can be set to TOL = CTOL*EPS, where */ /* CTOL is given on input in the array WORK. */ /* No CTOL smaller than ONE is allowed. CTOL greater */ /* than 1 / EPS is meaningless. The option 'C' */ /* can be used if M*EPS is satisfactory orthogonality */ /* of the computed left singular vectors, so CTOL=M could */ /* save few sweeps of Jacobi rotations. */ /* See the descriptions of A and WORK(1). */ /* = 'N': The matrix U is not computed. However, see the */ /* description of A. */ /* JOBV (input) CHARACTER*1 */ /* Specifies whether to compute the right singular vectors, that */ /* is, the matrix V: */ /* = 'V' : the matrix V is computed and returned in the array V */ /* = 'A' : the Jacobi rotations are applied to the MV-by-N */ /* array V. In other words, the right singular vector */ /* matrix V is not computed explicitly, instead it is */ /* applied to an MV-by-N matrix initially stored in the */ /* first MV rows of V. */ /* = 'N' : the matrix V is not computed and the array V is not */ /* referenced */ /* M (input) INTEGER */ /* The number of rows of the input matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the input matrix A. */ /* M >= N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, */ /* If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C': */ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* If INFO .EQ. 0, */ /* ~~~~~~~~~~~~~~~ */ /* RANKA orthonormal columns of U are returned in the */ /* leading RANKA columns of the array A. Here RANKA <= N */ /* is the number of computed singular values of A that are */ /* above the underflow threshold DLAMCH('S'). The singular */ /* vectors corresponding to underflowed or zero singular */ /* values are not computed. The value of RANKA is returned */ /* in the array WORK as RANKA=NINT(WORK(2)). Also see the */ /* descriptions of SVA and WORK. The computed columns of U */ /* are mutually numerically orthogonal up to approximately */ /* TOL=DSQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'), */ /* see the description of JOBU. */ /* If INFO .GT. 0, */ /* ~~~~~~~~~~~~~~~ */ /* the procedure DGESVJ did not converge in the given number */ /* of iterations (sweeps). In that case, the computed */ /* columns of U may not be orthogonal up to TOL. The output */ /* U (stored in A), SIGMA (given by the computed singular */ /* values in SVA(1:N)) and V is still a decomposition of the */ /* input matrix A in the sense that the residual */ /* ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small. */ /* If JOBU .EQ. 'N': */ /* ~~~~~~~~~~~~~~~~~ */ /* If INFO .EQ. 0 */ /* ~~~~~~~~~~~~~~ */ /* Note that the left singular vectors are 'for free' in the */ /* one-sided Jacobi SVD algorithm. However, if only the */ /* singular values are needed, the level of numerical */ /* orthogonality of U is not an issue and iterations are */ /* stopped when the columns of the iterated matrix are */ /* numerically orthogonal up to approximately M*EPS. Thus, */ /* on exit, A contains the columns of U scaled with the */ /* corresponding singular values. */ /* If INFO .GT. 0, */ /* ~~~~~~~~~~~~~~~ */ /* the procedure DGESVJ did not converge in the given number */ /* of iterations (sweeps). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* SVA (workspace/output) REAL array, dimension (N) */ /* On exit, */ /* If INFO .EQ. 0, */ /* ~~~~~~~~~~~~~~~ */ /* depending on the value SCALE = WORK(1), we have: */ /* If SCALE .EQ. ONE: */ /* ~~~~~~~~~~~~~~~~~~ */ /* SVA(1:N) contains the computed singular values of A. */ /* During the computation SVA contains the Euclidean column */ /* norms of the iterated matrices in the array A. */ /* If SCALE .NE. ONE: */ /* ~~~~~~~~~~~~~~~~~~ */ /* The singular values of A are SCALE*SVA(1:N), and this */ /* factored representation is due to the fact that some of the */ /* singular values of A might underflow or overflow. */ /* If INFO .GT. 0, */ /* ~~~~~~~~~~~~~~~ */ /* the procedure DGESVJ did not converge in the given number of */ /* iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. */ /* MV (input) INTEGER */ /* If JOBV .EQ. 'A', then the product of Jacobi rotations in DGESVJ */ /* is applied to the first MV rows of V. See the description of JOBV. */ /* V (input/output) REAL array, dimension (LDV,N) */ /* If JOBV = 'V', then V contains on exit the N-by-N matrix of */ /* the right singular vectors; */ /* If JOBV = 'A', then V contains the product of the computed right */ /* singular vector matrix and the initial matrix in */ /* the array V. */ /* If JOBV = 'N', then V is not referenced. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V, LDV .GE. 1. */ /* If JOBV .EQ. 'V', then LDV .GE. max(1,N). */ /* If JOBV .EQ. 'A', then LDV .GE. max(1,MV) . */ /* WORK (input/workspace/output) REAL array, dimension max(4,M+N). */ /* On entry, */ /* If JOBU .EQ. 'C', */ /* ~~~~~~~~~~~~~~~~~ */ /* WORK(1) = CTOL, where CTOL defines the threshold for convergence. */ /* The process stops if all columns of A are mutually */ /* orthogonal up to CTOL*EPS, EPS=DLAMCH('E'). */ /* It is required that CTOL >= ONE, i.e. it is not */ /* allowed to force the routine to obtain orthogonality */ /* below EPSILON. */ /* On exit, */ /* WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) */ /* are the computed singular vcalues of A. */ /* (See description of SVA().) */ /* WORK(2) = NINT(WORK(2)) is the number of the computed nonzero */ /* singular values. */ /* WORK(3) = NINT(WORK(3)) is the number of the computed singular */ /* values that are larger than the underflow threshold. */ /* WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi */ /* rotations needed for numerical convergence. */ /* WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. */ /* This is useful information in cases when DGESVJ did */ /* not converge, as it can be used to estimate whether */ /* the output is stil useful and for post festum analysis. */ /* WORK(6) = the largest absolute value over all sines of the */ /* Jacobi rotation angles in the last sweep. It can be */ /* useful for a post festum analysis. */ /* LWORK length of WORK, WORK >= MAX(6,M+N) */ /* INFO (output) INTEGER */ /* = 0 : successful exit. */ /* < 0 : if INFO = -i, then the i-th argument had an illegal value */ /* > 0 : DGESVJ did not converge in the maximal allowed number (30) */ /* of sweeps. The output may still be useful. See the */ /* description of WORK. */ /* Local Parameters */ /* Local Scalars */ /* Local Arrays */ /* Intrinsic Functions */ /* External Functions */ /* .. from BLAS */ /* .. from LAPACK */ /* External Subroutines */ /* .. from BLAS */ /* .. from LAPACK */ /* Test the input arguments */ /* Parameter adjustments */ --sva; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --work; /* Function Body */ lsvec = _starpu_lsame_(jobu, "U"); uctol = _starpu_lsame_(jobu, "C"); rsvec = _starpu_lsame_(jobv, "V"); applv = _starpu_lsame_(jobv, "A"); upper = _starpu_lsame_(joba, "U"); lower = _starpu_lsame_(joba, "L"); if (! (upper || lower || _starpu_lsame_(joba, "G"))) { *info = -1; } else if (! (lsvec || uctol || _starpu_lsame_(jobu, "N"))) { *info = -2; } else if (! (rsvec || applv || _starpu_lsame_(jobv, "N"))) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0 || *n > *m) { *info = -5; } else if (*lda < *m) { *info = -7; } else if (*mv < 0) { *info = -9; } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { *info = -11; } else if (uctol && work[1] <= 1.) { *info = -12; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = *m + *n; if (*lwork < max(i__1,6)) { *info = -13; } else { *info = 0; } } /* #:( */ if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGESVJ", &i__1); return 0; } /* #:) Quick return for void matrix */ if (*m == 0 || *n == 0) { return 0; } /* Set numerical parameters */ /* The stopping criterion for Jacobi rotations is */ /* max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS */ /* where EPS is the round-off and CTOL is defined as follows: */ if (uctol) { /* ... user controlled */ ctol = work[1]; } else { /* ... default */ if (lsvec || rsvec || applv) { ctol = sqrt((doublereal) (*m)); } else { ctol = (doublereal) (*m); } } /* ... and the machine dependent parameters are */ /* [!] (Make sure that DLAMCH() works properly on the target machine.) */ epsilon = _starpu_dlamch_("Epsilon"); rooteps = sqrt(epsilon); sfmin = _starpu_dlamch_("SafeMinimum"); rootsfmin = sqrt(sfmin); small = sfmin / epsilon; big = _starpu_dlamch_("Overflow"); /* BIG = ONE / SFMIN */ rootbig = 1. / rootsfmin; large = big / sqrt((doublereal) (*m * *n)); bigtheta = 1. / rooteps; tol = ctol * epsilon; roottol = sqrt(tol); if ((doublereal) (*m) * epsilon >= 1.) { *info = -5; i__1 = -(*info); _starpu_xerbla_("DGESVJ", &i__1); return 0; } /* Initialize the right singular vector matrix. */ if (rsvec) { mvl = *n; _starpu_dlaset_("A", &mvl, n, &c_b17, &c_b18, &v[v_offset], ldv); } else if (applv) { mvl = *mv; } rsvec = rsvec || applv; /* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) */ /* (!) If necessary, scale A to protect the largest singular value */ /* from overflow. It is possible that saving the largest singular */ /* value destroys the information about the small ones. */ /* This initial scaling is almost minimal in the sense that the */ /* goal is to make sure that no column norm overflows, and that */ /* DSQRT(N)*max_i SVA(i) does not overflow. If INFinite entries */ /* in A are detected, the procedure returns with INFO=-6. */ scale = 1. / sqrt((doublereal) (*m) * (doublereal) (*n)); noscale = TRUE_; goscale = TRUE_; if (lower) { /* the input matrix is M-by-N lower triangular (trapezoidal) */ i__1 = *n; for (p = 1; p <= i__1; ++p) { aapp = 0.; aaqq = 0.; i__2 = *m - p + 1; _starpu_dlassq_(&i__2, &a[p + p * a_dim1], &c__1, &aapp, &aaqq); if (aapp > big) { *info = -6; i__2 = -(*info); _starpu_xerbla_("DGESVJ", &i__2); return 0; } aaqq = sqrt(aaqq); if (aapp < big / aaqq && noscale) { sva[p] = aapp * aaqq; } else { noscale = FALSE_; sva[p] = aapp * (aaqq * scale); if (goscale) { goscale = FALSE_; i__2 = p - 1; for (q = 1; q <= i__2; ++q) { sva[q] *= scale; /* L1873: */ } } } /* L1874: */ } } else if (upper) { /* the input matrix is M-by-N upper triangular (trapezoidal) */ i__1 = *n; for (p = 1; p <= i__1; ++p) { aapp = 0.; aaqq = 0.; _starpu_dlassq_(&p, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); if (aapp > big) { *info = -6; i__2 = -(*info); _starpu_xerbla_("DGESVJ", &i__2); return 0; } aaqq = sqrt(aaqq); if (aapp < big / aaqq && noscale) { sva[p] = aapp * aaqq; } else { noscale = FALSE_; sva[p] = aapp * (aaqq * scale); if (goscale) { goscale = FALSE_; i__2 = p - 1; for (q = 1; q <= i__2; ++q) { sva[q] *= scale; /* L2873: */ } } } /* L2874: */ } } else { /* the input matrix is M-by-N general dense */ i__1 = *n; for (p = 1; p <= i__1; ++p) { aapp = 0.; aaqq = 0.; _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); if (aapp > big) { *info = -6; i__2 = -(*info); _starpu_xerbla_("DGESVJ", &i__2); return 0; } aaqq = sqrt(aaqq); if (aapp < big / aaqq && noscale) { sva[p] = aapp * aaqq; } else { noscale = FALSE_; sva[p] = aapp * (aaqq * scale); if (goscale) { goscale = FALSE_; i__2 = p - 1; for (q = 1; q <= i__2; ++q) { sva[q] *= scale; /* L3873: */ } } } /* L3874: */ } } if (noscale) { scale = 1.; } /* Move the smaller part of the spectrum from the underflow threshold */ /* (!) Start by determining the position of the nonzero entries of the */ /* array SVA() relative to ( SFMIN, BIG ). */ aapp = 0.; aaqq = big; i__1 = *n; for (p = 1; p <= i__1; ++p) { if (sva[p] != 0.) { /* Computing MIN */ d__1 = aaqq, d__2 = sva[p]; aaqq = min(d__1,d__2); } /* Computing MAX */ d__1 = aapp, d__2 = sva[p]; aapp = max(d__1,d__2); /* L4781: */ } /* #:) Quick return for zero matrix */ if (aapp == 0.) { if (lsvec) { _starpu_dlaset_("G", m, n, &c_b17, &c_b18, &a[a_offset], lda); } work[1] = 1.; work[2] = 0.; work[3] = 0.; work[4] = 0.; work[5] = 0.; work[6] = 0.; return 0; } /* #:) Quick return for one-column matrix */ if (*n == 1) { if (lsvec) { _starpu_dlascl_("G", &c__0, &c__0, &sva[1], &scale, m, &c__1, &a[a_dim1 + 1], lda, &ierr); } work[1] = 1. / scale; if (sva[1] >= sfmin) { work[2] = 1.; } else { work[2] = 0.; } work[3] = 0.; work[4] = 0.; work[5] = 0.; work[6] = 0.; return 0; } /* Protect small singular values from underflow, and try to */ /* avoid underflows/overflows in computing Jacobi rotations. */ sn = sqrt(sfmin / epsilon); temp1 = sqrt(big / (doublereal) (*n)); if (aapp <= sn || aaqq >= temp1 || sn <= aaqq && aapp <= temp1) { /* Computing MIN */ d__1 = big, d__2 = temp1 / aapp; temp1 = min(d__1,d__2); /* AAQQ = AAQQ*TEMP1 */ /* AAPP = AAPP*TEMP1 */ } else if (aaqq <= sn && aapp <= temp1) { /* Computing MIN */ d__1 = sn / aaqq, d__2 = big / (aapp * sqrt((doublereal) (*n))); temp1 = min(d__1,d__2); /* AAQQ = AAQQ*TEMP1 */ /* AAPP = AAPP*TEMP1 */ } else if (aaqq >= sn && aapp >= temp1) { /* Computing MAX */ d__1 = sn / aaqq, d__2 = temp1 / aapp; temp1 = max(d__1,d__2); /* AAQQ = AAQQ*TEMP1 */ /* AAPP = AAPP*TEMP1 */ } else if (aaqq <= sn && aapp >= temp1) { /* Computing MIN */ d__1 = sn / aaqq, d__2 = big / (sqrt((doublereal) (*n)) * aapp); temp1 = min(d__1,d__2); /* AAQQ = AAQQ*TEMP1 */ /* AAPP = AAPP*TEMP1 */ } else { temp1 = 1.; } /* Scale, if necessary */ if (temp1 != 1.) { _starpu_dlascl_("G", &c__0, &c__0, &c_b18, &temp1, n, &c__1, &sva[1], n, & ierr); } scale = temp1 * scale; if (scale != 1.) { _starpu_dlascl_(joba, &c__0, &c__0, &c_b18, &scale, m, n, &a[a_offset], lda, & ierr); scale = 1. / scale; } /* Row-cyclic Jacobi SVD algorithm with column pivoting */ emptsw = *n * (*n - 1) / 2; notrot = 0; fastr[0] = 0.; /* A is represented in factored form A = A * diag(WORK), where diag(WORK) */ /* is initialized to identity. WORK is updated during fast scaled */ /* rotations. */ i__1 = *n; for (q = 1; q <= i__1; ++q) { work[q] = 1.; /* L1868: */ } swband = 3; /* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */ /* if DGESVJ is used as a computational routine in the preconditioned */ /* Jacobi SVD algorithm DGESVJ. For sweeps i=1:SWBAND the procedure */ /* works on pivots inside a band-like region around the diagonal. */ /* The boundaries are determined dynamically, based on the number of */ /* pivots above a threshold. */ kbl = min(8,*n); /* [TP] KBL is a tuning parameter that defines the tile size in the */ /* tiling of the p-q loops of pivot pairs. In general, an optimal */ /* value of KBL depends on the matrix dimensions and on the */ /* parameters of the computer's memory. */ nbl = *n / kbl; if (nbl * kbl != *n) { ++nbl; } /* Computing 2nd power */ i__1 = kbl; blskip = i__1 * i__1; /* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ rowskip = min(5,kbl); /* [TP] ROWSKIP is a tuning parameter. */ lkahead = 1; /* [TP] LKAHEAD is a tuning parameter. */ /* Quasi block transformations, using the lower (upper) triangular */ /* structure of the input matrix. The quasi-block-cycling usually */ /* invokes cubic convergence. Big part of this cycle is done inside */ /* canonical subspaces of dimensions less than M. */ /* Computing MAX */ i__1 = 64, i__2 = kbl << 2; if ((lower || upper) && *n > max(i__1,i__2)) { /* [TP] The number of partition levels and the actual partition are */ /* tuning parameters. */ n4 = *n / 4; n2 = *n / 2; n34 = n4 * 3; if (applv) { q = 0; } else { q = 1; } if (lower) { /* This works very well on lower triangular matrices, in particular */ /* in the framework of the preconditioned Jacobi SVD (xGEJSV). */ /* The idea is simple: */ /* [+ 0 0 0] Note that Jacobi transformations of [0 0] */ /* [+ + 0 0] [0 0] */ /* [+ + x 0] actually work on [x 0] [x 0] */ /* [+ + x x] [x x]. [x x] */ i__1 = *m - n34; i__2 = *n - n34; i__3 = *lwork - *n; _starpu_dgsvj0_(jobv, &i__1, &i__2, &a[n34 + 1 + (n34 + 1) * a_dim1], lda, &work[n34 + 1], &sva[n34 + 1], &mvl, &v[n34 * q + 1 + ( n34 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__2, & work[*n + 1], &i__3, &ierr); i__1 = *m - n2; i__2 = n34 - n2; i__3 = *lwork - *n; _starpu_dgsvj0_(jobv, &i__1, &i__2, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, & work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__2, &work[*n + 1], &i__3, &ierr); i__1 = *m - n2; i__2 = *n - n2; i__3 = *lwork - *n; _starpu_dgsvj1_(jobv, &i__1, &i__2, &n4, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, &work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + ( n2 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, & work[*n + 1], &i__3, &ierr); i__1 = *m - n4; i__2 = n2 - n4; i__3 = *lwork - *n; _starpu_dgsvj0_(jobv, &i__1, &i__2, &a[n4 + 1 + (n4 + 1) * a_dim1], lda, & work[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*n + 1], &i__3, &ierr); i__1 = *lwork - *n; _starpu_dgsvj0_(jobv, m, &n4, &a[a_offset], lda, &work[1], &sva[1], &mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__1, &work[* n + 1], &i__1, &ierr); i__1 = *lwork - *n; _starpu_dgsvj1_(jobv, m, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], & mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__1, & work[*n + 1], &i__1, &ierr); } else if (upper) { i__1 = *lwork - *n; _starpu_dgsvj0_(jobv, &n4, &n4, &a[a_offset], lda, &work[1], &sva[1], & mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__2, & work[*n + 1], &i__1, &ierr); i__1 = *lwork - *n; _starpu_dgsvj0_(jobv, &n2, &n4, &a[(n4 + 1) * a_dim1 + 1], lda, &work[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*n + 1] , &i__1, &ierr); i__1 = *lwork - *n; _starpu_dgsvj1_(jobv, &n2, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], &mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__1, & work[*n + 1], &i__1, &ierr); i__1 = n2 + n4; i__2 = *lwork - *n; _starpu_dgsvj0_(jobv, &i__1, &n4, &a[(n2 + 1) * a_dim1 + 1], lda, &work[ n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*n + 1] , &i__2, &ierr); } } /* -#- Row-cyclic pivot strategy with de Rijk's pivoting -#- */ for (i__ = 1; i__ <= 30; ++i__) { /* .. go go go ... */ mxaapq = 0.; mxsinj = 0.; iswrot = 0; notrot = 0; pskipped = 0; /* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */ /* 1 <= p < q <= N. This is the first step toward a blocked implementation */ /* of the rotations. New implementation, based on block transformations, */ /* is under development. */ i__1 = nbl; for (ibr = 1; ibr <= i__1; ++ibr) { igl = (ibr - 1) * kbl + 1; /* Computing MIN */ i__3 = lkahead, i__4 = nbl - ibr; i__2 = min(i__3,i__4); for (ir1 = 0; ir1 <= i__2; ++ir1) { igl += ir1 * kbl; /* Computing MIN */ i__4 = igl + kbl - 1, i__5 = *n - 1; i__3 = min(i__4,i__5); for (p = igl; p <= i__3; ++p) { /* .. de Rijk's pivoting */ i__4 = *n - p + 1; q = _starpu_idamax_(&i__4, &sva[p], &c__1) + p - 1; if (p != q) { _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); if (rsvec) { _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &c__1); } temp1 = sva[p]; sva[p] = sva[q]; sva[q] = temp1; temp1 = work[p]; work[p] = work[q]; work[q] = temp1; } if (ir1 == 0) { /* Column norms are periodically updated by explicit */ /* norm computation. */ /* Caveat: */ /* Unfortunately, some BLAS implementations compute DNRM2(M,A(1,p),1) */ /* as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to */ /* overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and to */ /* underflow for ||A(:,p)||_2 < DSQRT(underflow_threshold). */ /* Hence, DNRM2 cannot be trusted, not even in the case when */ /* the true norm is far from the under(over)flow boundaries. */ /* If properly implemented DNRM2 is available, the IF-THEN-ELSE */ /* below should read "AAPP = DNRM2( M, A(1,p), 1 ) * WORK(p)". */ if (sva[p] < rootbig && sva[p] > rootsfmin) { sva[p] = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * work[p]; } else { temp1 = 0.; aapp = 0.; _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & aapp); sva[p] = temp1 * sqrt(aapp) * work[p]; } aapp = sva[p]; } else { aapp = sva[p]; } if (aapp > 0.) { pskipped = 0; /* Computing MIN */ i__5 = igl + kbl - 1; i__4 = min(i__5,*n); for (q = p + 1; q <= i__4; ++q) { aaqq = sva[q]; if (aaqq > 0.) { aapp0 = aapp; if (aaqq >= 1.) { rotok = small * aapp <= aaqq; if (aapp < big / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * work[p] * work[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & work[*n + 1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, & work[p], m, &c__1, &work[*n + 1], lda, &ierr); aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, &a[q * a_dim1 + 1], &c__1) * work[q] / aaqq; } } else { rotok = aapp <= aaqq / small; if (aapp > small / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * work[p] * work[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & work[*n + 1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & work[q], m, &c__1, &work[*n + 1], lda, &ierr); aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, &a[p * a_dim1 + 1], &c__1) * work[p] / aapp; } } /* Computing MAX */ d__1 = mxaapq, d__2 = abs(aapq); mxaapq = max(d__1,d__2); /* TO rotate or NOT to rotate, THAT is the question ... */ if (abs(aapq) > tol) { /* .. rotate */ /* [RTD] ROTATED = ROTATED + ONE */ if (ir1 == 0) { notrot = 0; pskipped = 0; ++iswrot; } if (rotok) { aqoap = aaqq / aapp; apoaq = aapp / aaqq; theta = (d__1 = aqoap - apoaq, abs( d__1)) * -.5 / aapq; if (abs(theta) > bigtheta) { t = .5 / theta; fastr[2] = t * work[p] / work[q]; fastr[3] = -t * work[q] / work[p]; _starpu_drotm_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &c__1, fastr); } /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); aapp *= sqrt(1. - t * aqoap * aapq); /* Computing MAX */ d__1 = mxsinj, d__2 = abs(t); mxsinj = max(d__1,d__2); } else { /* .. choose correct signum for THETA and rotate */ thsign = -d_sign(&c_b18, &aapq); t = 1. / (theta + thsign * sqrt( theta * theta + 1.)); cs = sqrt(1. / (t * t + 1.)); sn = t * cs; /* Computing MAX */ d__1 = mxsinj, d__2 = abs(sn); mxsinj = max(d__1,d__2); /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); /* Computing MAX */ d__1 = 0., d__2 = 1. - t * aqoap * aapq; aapp *= sqrt((max(d__1,d__2))); apoaq = work[p] / work[q]; aqoap = work[q] / work[p]; if (work[p] >= 1.) { if (work[q] >= 1.) { fastr[2] = t * apoaq; fastr[3] = -t * aqoap; work[p] *= cs; work[q] *= cs; _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ q * v_dim1 + 1], &c__1, fastr); } } else { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); work[p] *= cs; work[q] /= cs; if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); } } } else { if (work[q] >= 1.) { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); work[p] /= cs; work[q] *= cs; if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); } } else { if (work[p] >= work[q]) { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); work[p] *= cs; work[q] /= cs; if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); } } else { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); work[p] /= cs; work[q] *= cs; if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); } } } } } } else { /* .. have to use modified Gram-Schmidt like transformation */ _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & work[*n + 1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, & c_b18, m, &c__1, &work[*n + 1] , lda, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & c_b18, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); temp1 = -aapq * work[p] / work[q]; _starpu_daxpy_(m, &temp1, &work[*n + 1], & c__1, &a[q * a_dim1 + 1], & c__1); _starpu_dlascl_("G", &c__0, &c__0, &c_b18, & aaqq, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); /* Computing MAX */ d__1 = 0., d__2 = 1. - aapq * aapq; sva[q] = aaqq * sqrt((max(d__1,d__2))) ; mxsinj = max(mxsinj,sfmin); } /* END IF ROTOK THEN ... ELSE */ /* In the case of cancellation in updating SVA(q), SVA(p) */ /* recompute SVA(q), SVA(p). */ /* Computing 2nd power */ d__1 = sva[q] / aaqq; if (d__1 * d__1 <= rooteps) { if (aaqq < rootbig && aaqq > rootsfmin) { sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + 1], &c__1) * work[q]; } else { t = 0.; aaqq = 0.; _starpu_dlassq_(m, &a[q * a_dim1 + 1], & c__1, &t, &aaqq); sva[q] = t * sqrt(aaqq) * work[q]; } } if (aapp / aapp0 <= rooteps) { if (aapp < rootbig && aapp > rootsfmin) { aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * work[p]; } else { t = 0.; aapp = 0.; _starpu_dlassq_(m, &a[p * a_dim1 + 1], & c__1, &t, &aapp); aapp = t * sqrt(aapp) * work[p]; } sva[p] = aapp; } } else { /* A(:,p) and A(:,q) already numerically orthogonal */ if (ir1 == 0) { ++notrot; } /* [RTD] SKIPPED = SKIPPED + 1 */ ++pskipped; } } else { /* A(:,q) is zero column */ if (ir1 == 0) { ++notrot; } ++pskipped; } if (i__ <= swband && pskipped > rowskip) { if (ir1 == 0) { aapp = -aapp; } notrot = 0; goto L2103; } /* L2002: */ } /* END q-LOOP */ L2103: /* bailed out of q-loop */ sva[p] = aapp; } else { sva[p] = aapp; if (ir1 == 0 && aapp == 0.) { /* Computing MIN */ i__4 = igl + kbl - 1; notrot = notrot + min(i__4,*n) - p; } } /* L2001: */ } /* end of the p-loop */ /* end of doing the block ( ibr, ibr ) */ /* L1002: */ } /* end of ir1-loop */ /* ... go to the off diagonal blocks */ igl = (ibr - 1) * kbl + 1; i__2 = nbl; for (jbc = ibr + 1; jbc <= i__2; ++jbc) { jgl = (jbc - 1) * kbl + 1; /* doing the block at ( ibr, jbc ) */ ijblsk = 0; /* Computing MIN */ i__4 = igl + kbl - 1; i__3 = min(i__4,*n); for (p = igl; p <= i__3; ++p) { aapp = sva[p]; if (aapp > 0.) { pskipped = 0; /* Computing MIN */ i__5 = jgl + kbl - 1; i__4 = min(i__5,*n); for (q = jgl; q <= i__4; ++q) { aaqq = sva[q]; if (aaqq > 0.) { aapp0 = aapp; /* -#- M x 2 Jacobi SVD -#- */ /* Safe Gram matrix computation */ if (aaqq >= 1.) { if (aapp >= aaqq) { rotok = small * aapp <= aaqq; } else { rotok = small * aaqq <= aapp; } if (aapp < big / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * work[p] * work[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & work[*n + 1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, & work[p], m, &c__1, &work[*n + 1], lda, &ierr); aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, &a[q * a_dim1 + 1], &c__1) * work[q] / aaqq; } } else { if (aapp >= aaqq) { rotok = aapp <= aaqq / small; } else { rotok = aaqq <= aapp / small; } if (aapp > small / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * work[p] * work[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & work[*n + 1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & work[q], m, &c__1, &work[*n + 1], lda, &ierr); aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, &a[p * a_dim1 + 1], &c__1) * work[p] / aapp; } } /* Computing MAX */ d__1 = mxaapq, d__2 = abs(aapq); mxaapq = max(d__1,d__2); /* TO rotate or NOT to rotate, THAT is the question ... */ if (abs(aapq) > tol) { notrot = 0; /* [RTD] ROTATED = ROTATED + 1 */ pskipped = 0; ++iswrot; if (rotok) { aqoap = aaqq / aapp; apoaq = aapp / aaqq; theta = (d__1 = aqoap - apoaq, abs( d__1)) * -.5 / aapq; if (aaqq > aapp0) { theta = -theta; } if (abs(theta) > bigtheta) { t = .5 / theta; fastr[2] = t * work[p] / work[q]; fastr[3] = -t * work[q] / work[p]; _starpu_drotm_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &c__1, fastr); } /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); /* Computing MAX */ d__1 = 0., d__2 = 1. - t * aqoap * aapq; aapp *= sqrt((max(d__1,d__2))); /* Computing MAX */ d__1 = mxsinj, d__2 = abs(t); mxsinj = max(d__1,d__2); } else { /* .. choose correct signum for THETA and rotate */ thsign = -d_sign(&c_b18, &aapq); if (aaqq > aapp0) { thsign = -thsign; } t = 1. / (theta + thsign * sqrt( theta * theta + 1.)); cs = sqrt(1. / (t * t + 1.)); sn = t * cs; /* Computing MAX */ d__1 = mxsinj, d__2 = abs(sn); mxsinj = max(d__1,d__2); /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); aapp *= sqrt(1. - t * aqoap * aapq); apoaq = work[p] / work[q]; aqoap = work[q] / work[p]; if (work[p] >= 1.) { if (work[q] >= 1.) { fastr[2] = t * apoaq; fastr[3] = -t * aqoap; work[p] *= cs; work[q] *= cs; _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ q * v_dim1 + 1], &c__1, fastr); } } else { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); } work[p] *= cs; work[q] /= cs; } } else { if (work[q] >= 1.) { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); } work[p] /= cs; work[q] *= cs; } else { if (work[p] >= work[q]) { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); work[p] *= cs; work[q] /= cs; if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); } } else { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); work[p] /= cs; work[q] *= cs; if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); } } } } } } else { if (aapp > aaqq) { _starpu_dcopy_(m, &a[p * a_dim1 + 1], & c__1, &work[*n + 1], & c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, &c_b18, m, &c__1, &work[* n + 1], lda, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, &c_b18, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); temp1 = -aapq * work[p] / work[q]; _starpu_daxpy_(m, &temp1, &work[*n + 1], & c__1, &a[q * a_dim1 + 1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &c_b18, &aaqq, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); /* Computing MAX */ d__1 = 0., d__2 = 1. - aapq * aapq; sva[q] = aaqq * sqrt((max(d__1, d__2))); mxsinj = max(mxsinj,sfmin); } else { _starpu_dcopy_(m, &a[q * a_dim1 + 1], & c__1, &work[*n + 1], & c__1); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, &c_b18, m, &c__1, &work[* n + 1], lda, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &aapp, &c_b18, m, &c__1, &a[p * a_dim1 + 1], lda, &ierr); temp1 = -aapq * work[q] / work[p]; _starpu_daxpy_(m, &temp1, &work[*n + 1], & c__1, &a[p * a_dim1 + 1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &c_b18, &aapp, m, &c__1, &a[p * a_dim1 + 1], lda, &ierr); /* Computing MAX */ d__1 = 0., d__2 = 1. - aapq * aapq; sva[p] = aapp * sqrt((max(d__1, d__2))); mxsinj = max(mxsinj,sfmin); } } /* END IF ROTOK THEN ... ELSE */ /* In the case of cancellation in updating SVA(q) */ /* .. recompute SVA(q) */ /* Computing 2nd power */ d__1 = sva[q] / aaqq; if (d__1 * d__1 <= rooteps) { if (aaqq < rootbig && aaqq > rootsfmin) { sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + 1], &c__1) * work[q]; } else { t = 0.; aaqq = 0.; _starpu_dlassq_(m, &a[q * a_dim1 + 1], & c__1, &t, &aaqq); sva[q] = t * sqrt(aaqq) * work[q]; } } /* Computing 2nd power */ d__1 = aapp / aapp0; if (d__1 * d__1 <= rooteps) { if (aapp < rootbig && aapp > rootsfmin) { aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * work[p]; } else { t = 0.; aapp = 0.; _starpu_dlassq_(m, &a[p * a_dim1 + 1], & c__1, &t, &aapp); aapp = t * sqrt(aapp) * work[p]; } sva[p] = aapp; } /* end of OK rotation */ } else { ++notrot; /* [RTD] SKIPPED = SKIPPED + 1 */ ++pskipped; ++ijblsk; } } else { ++notrot; ++pskipped; ++ijblsk; } if (i__ <= swband && ijblsk >= blskip) { sva[p] = aapp; notrot = 0; goto L2011; } if (i__ <= swband && pskipped > rowskip) { aapp = -aapp; notrot = 0; goto L2203; } /* L2200: */ } /* end of the q-loop */ L2203: sva[p] = aapp; } else { if (aapp == 0.) { /* Computing MIN */ i__4 = jgl + kbl - 1; notrot = notrot + min(i__4,*n) - jgl + 1; } if (aapp < 0.) { notrot = 0; } } /* L2100: */ } /* end of the p-loop */ /* L2010: */ } /* end of the jbc-loop */ L2011: /* 2011 bailed out of the jbc-loop */ /* Computing MIN */ i__3 = igl + kbl - 1; i__2 = min(i__3,*n); for (p = igl; p <= i__2; ++p) { sva[p] = (d__1 = sva[p], abs(d__1)); /* L2012: */ } /* ** */ /* L2000: */ } /* 2000 :: end of the ibr-loop */ /* .. update SVA(N) */ if (sva[*n] < rootbig && sva[*n] > rootsfmin) { sva[*n] = _starpu_dnrm2_(m, &a[*n * a_dim1 + 1], &c__1) * work[*n]; } else { t = 0.; aapp = 0.; _starpu_dlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); sva[*n] = t * sqrt(aapp) * work[*n]; } /* Additional steering devices */ if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { swband = i__; } if (i__ > swband + 1 && mxaapq < sqrt((doublereal) (*n)) * tol && ( doublereal) (*n) * mxaapq * mxsinj < tol) { goto L1994; } if (notrot >= emptsw) { goto L1994; } /* L1993: */ } /* end i=1:NSWEEP loop */ /* #:( Reaching this point means that the procedure has not converged. */ *info = 29; goto L1995; L1994: /* #:) Reaching this point means numerical convergence after the i-th */ /* sweep. */ *info = 0; /* #:) INFO = 0 confirms successful iterations. */ L1995: /* Sort the singular values and find how many are above */ /* the underflow threshold. */ n2 = 0; n4 = 0; i__1 = *n - 1; for (p = 1; p <= i__1; ++p) { i__2 = *n - p + 1; q = _starpu_idamax_(&i__2, &sva[p], &c__1) + p - 1; if (p != q) { temp1 = sva[p]; sva[p] = sva[q]; sva[q] = temp1; temp1 = work[p]; work[p] = work[q]; work[q] = temp1; _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); if (rsvec) { _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); } } if (sva[p] != 0.) { ++n4; if (sva[p] * scale > sfmin) { ++n2; } } /* L5991: */ } if (sva[*n] != 0.) { ++n4; if (sva[*n] * scale > sfmin) { ++n2; } } /* Normalize the left singular vectors. */ if (lsvec || uctol) { i__1 = n2; for (p = 1; p <= i__1; ++p) { d__1 = work[p] / sva[p]; _starpu_dscal_(m, &d__1, &a[p * a_dim1 + 1], &c__1); /* L1998: */ } } /* Scale the product of Jacobi rotations (assemble the fast rotations). */ if (rsvec) { if (applv) { i__1 = *n; for (p = 1; p <= i__1; ++p) { _starpu_dscal_(&mvl, &work[p], &v[p * v_dim1 + 1], &c__1); /* L2398: */ } } else { i__1 = *n; for (p = 1; p <= i__1; ++p) { temp1 = 1. / _starpu_dnrm2_(&mvl, &v[p * v_dim1 + 1], &c__1); _starpu_dscal_(&mvl, &temp1, &v[p * v_dim1 + 1], &c__1); /* L2399: */ } } } /* Undo scaling, if necessary (and possible). */ if (scale > 1. && sva[1] < big / scale || scale < 1. && sva[n2] > sfmin / scale) { i__1 = *n; for (p = 1; p <= i__1; ++p) { sva[p] = scale * sva[p]; /* L2400: */ } scale = 1.; } work[1] = scale; /* The singular values of A are SCALE*SVA(1:N). If SCALE.NE.ONE */ /* then some of the singular values may overflow or underflow and */ /* the spectrum is given in this factored representation. */ work[2] = (doublereal) n4; /* N4 is the number of computed nonzero singular values of A. */ work[3] = (doublereal) n2; /* N2 is the number of singular values of A greater than SFMIN. */ /* If N2= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ /* not 'N', then A must have been equilibrated by the scaling */ /* factors in R and/or C. A is not modified if FACT = 'F' or */ /* 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ /* On exit, if EQUED .ne. 'N', A is scaled as follows: */ /* EQUED = 'R': A := diag(R) * A */ /* EQUED = 'C': A := A * diag(C) */ /* EQUED = 'B': A := diag(R) * A * diag(C). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ /* If FACT = 'F', then AF is an input argument and on entry */ /* contains the factors L and U from the factorization */ /* A = P*L*U as computed by DGETRF. If EQUED .ne. 'N', then */ /* AF is the factored form of the equilibrated matrix A. */ /* If FACT = 'N', then AF is an output argument and on exit */ /* returns the factors L and U from the factorization A = P*L*U */ /* of the original matrix A. */ /* If FACT = 'E', then AF is an output argument and on exit */ /* returns the factors L and U from the factorization A = P*L*U */ /* of the equilibrated matrix A (see the description of A for */ /* the form of the equilibrated matrix). */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input or output) INTEGER array, dimension (N) */ /* If FACT = 'F', then IPIV is an input argument and on entry */ /* contains the pivot indices from the factorization A = P*L*U */ /* as computed by DGETRF; row i of the matrix was interchanged */ /* with row IPIV(i). */ /* If FACT = 'N', then IPIV is an output argument and on exit */ /* contains the pivot indices from the factorization A = P*L*U */ /* of the original matrix A. */ /* If FACT = 'E', then IPIV is an output argument and on exit */ /* contains the pivot indices from the factorization A = P*L*U */ /* of the equilibrated matrix A. */ /* EQUED (input or output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration (always true if FACT = 'N'). */ /* = 'R': Row equilibration, i.e., A has been premultiplied by */ /* diag(R). */ /* = 'C': Column equilibration, i.e., A has been postmultiplied */ /* by diag(C). */ /* = 'B': Both row and column equilibration, i.e., A has been */ /* replaced by diag(R) * A * diag(C). */ /* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ /* output argument. */ /* R (input or output) DOUBLE PRECISION array, dimension (N) */ /* The row scale factors for A. If EQUED = 'R' or 'B', A is */ /* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ /* is not accessed. R is an input argument if FACT = 'F'; */ /* otherwise, R is an output argument. If FACT = 'F' and */ /* EQUED = 'R' or 'B', each element of R must be positive. */ /* C (input or output) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If EQUED = 'C' or 'B', A is */ /* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ /* is not accessed. C is an input argument if FACT = 'F'; */ /* otherwise, C is an output argument. If FACT = 'F' and */ /* EQUED = 'C' or 'B', each element of C must be positive. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, */ /* if EQUED = 'N', B is not modified; */ /* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ /* diag(R)*B; */ /* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ /* overwritten by diag(C)*B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */ /* to the original system of equations. Note that A and B are */ /* modified on exit if EQUED .ne. 'N', and the solution to the */ /* equilibrated system is inv(diag(C))*X if TRANS = 'N' and */ /* EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */ /* and EQUED = 'R' or 'B'. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The estimate of the reciprocal condition number of the matrix */ /* A after equilibration (if done). If RCOND is less than the */ /* machine precision (in particular, if RCOND = 0), the matrix */ /* is singular to working precision. This condition is */ /* indicated by a return code of INFO > 0. */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (4*N) */ /* On exit, WORK(1) contains the reciprocal pivot growth */ /* factor norm(A)/norm(U). The "max absolute element" norm is */ /* used. If WORK(1) is much less than 1, then the stability */ /* of the LU factorization of the (equilibrated) matrix A */ /* could be poor. This also means that the solution X, condition */ /* estimator RCOND, and forward error bound FERR could be */ /* unreliable. If factorization fails with 0 0: if INFO = i, and i is */ /* <= N: U(i,i) is exactly zero. The factorization has */ /* been completed, but the factor U is exactly */ /* singular, so the solution and error bounds */ /* could not be computed. RCOND = 0 is returned. */ /* = N+1: U is nonsingular, but RCOND is less than machine */ /* precision, meaning that the matrix is singular */ /* to working precision. Nevertheless, the */ /* solution and error bounds are computed because */ /* there are a number of situations where the */ /* computed solution can be more accurate than the */ /* value of RCOND would suggest. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --r__; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); equil = _starpu_lsame_(fact, "E"); notran = _starpu_lsame_(trans, "N"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rowequ = FALSE_; colequ = FALSE_; } else { rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); smlnum = _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* Test the input parameters. */ if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (_starpu_lsame_(fact, "F") && ! (rowequ || colequ || _starpu_lsame_(equed, "N"))) { *info = -10; } else { if (rowequ) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = r__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = r__[j]; rcmax = max(d__1,d__2); /* L10: */ } if (rcmin <= 0.) { *info = -11; } else if (*n > 0) { rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { rowcnd = 1.; } } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = c__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = c__[j]; rcmax = max(d__1,d__2); /* L20: */ } if (rcmin <= 0.) { *info = -12; } else if (*n > 0) { colcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { colcnd = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -14; } else if (*ldx < max(1,*n)) { *info = -16; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGESVX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ _starpu_dgeequ_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, & amax, &infequ); if (infequ == 0) { /* Equilibrate the matrix. */ _starpu_dlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & colcnd, &amax, equed); rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); } } /* Scale the right hand side. */ if (notran) { if (rowequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1]; /* L30: */ } /* L40: */ } } } else if (colequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1]; /* L50: */ } /* L60: */ } } if (nofact || equil) { /* Compute the LU factorization of A. */ _starpu_dlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); _starpu_dgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); /* Return if INFO is non-zero. */ if (*info > 0) { /* Compute the reciprocal pivot growth factor of the */ /* leading rank-deficient INFO columns of A. */ rpvgrw = _starpu_dlantr_("M", "U", "N", info, info, &af[af_offset], ldaf, &work[1]); if (rpvgrw == 0.) { rpvgrw = 1.; } else { rpvgrw = _starpu_dlange_("M", n, info, &a[a_offset], lda, &work[1]) / rpvgrw; } work[1] = rpvgrw; *rcond = 0.; return 0; } } /* Compute the norm of the matrix A and the */ /* reciprocal pivot growth factor RPVGRW. */ if (notran) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = _starpu_dlange_(norm, n, n, &a[a_offset], lda, &work[1]); rpvgrw = _starpu_dlantr_("M", "U", "N", n, n, &af[af_offset], ldaf, &work[1]); if (rpvgrw == 0.) { rpvgrw = 1.; } else { rpvgrw = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]) / rpvgrw; } /* Compute the reciprocal of the condition number of A. */ _starpu_dgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], info); /* Compute the solution matrix X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and */ /* compute error bounds and backward error estimates for it. */ _starpu_dgerfs_(trans, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[ 1], &iwork[1], info); /* Transform the solution matrix X to a solution of the original */ /* system. */ if (notran) { if (colequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1]; /* L70: */ } /* L80: */ } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] /= colcnd; /* L90: */ } } } else if (rowequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1]; /* L100: */ } /* L110: */ } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] /= rowcnd; /* L120: */ } } work[1] = rpvgrw; /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < _starpu_dlamch_("Epsilon")) { *info = *n + 1; } return 0; /* End of DGESVX */ } /* _starpu_dgesvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgesvxx.c000066400000000000000000000721631507764646700210750ustar00rootroot00000000000000/* dgesvxx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgesvxx_(char *fact, char *trans, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1; doublereal d__1, d__2; /* Local variables */ integer j; extern doublereal _starpu_dla_rpvgrw__(integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal amax; extern logical _starpu_lsame_(char *, char *); doublereal rcmin, rcmax; logical equil; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlaqge_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, char *); doublereal colcnd; logical nofact; extern /* Subroutine */ int _starpu_dgetrf_(integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; integer infequ; logical colequ; extern /* Subroutine */ int _starpu_dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal rowcnd; logical notran; doublereal smlnum; logical rowequ; extern /* Subroutine */ int _starpu_dlascl2_(integer *, integer *, doublereal *, doublereal *, integer *), _starpu_dgeequb_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dgerfsx_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGESVXX uses the LU factorization to compute the solution to a */ /* double precision system of linear equations A * X = B, where A is an */ /* N-by-N matrix and X and B are N-by-NRHS matrices. */ /* If requested, both normwise and maximum componentwise error bounds */ /* are returned. DGESVXX will return a solution with a tiny */ /* guaranteed error (O(eps) where eps is the working machine */ /* precision) unless the matrix is very ill-conditioned, in which */ /* case a warning is returned. Relevant condition numbers also are */ /* calculated and returned. */ /* DGESVXX accepts user-provided factorizations and equilibration */ /* factors; see the definitions of the FACT and EQUED options. */ /* Solving with refinement and using a factorization from a previous */ /* DGESVXX call will also produce a solution with either O(eps) */ /* errors or warnings, but we cannot make that claim for general */ /* user-provided factorizations and equilibration factors if they */ /* differ from what DGESVXX would itself produce. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ /* the system: */ /* TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ /* TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ /* TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ /* Whether or not the system will be equilibrated depends on the */ /* scaling of the matrix A, but if equilibration is used, A is */ /* overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ /* or diag(C)*B (if TRANS = 'T' or 'C'). */ /* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ /* the matrix A (after equilibration if FACT = 'E') as */ /* A = P * L * U, */ /* where P is a permutation matrix, L is a unit lower triangular */ /* matrix, and U is upper triangular. */ /* 3. If some U(i,i)=0, so that U is exactly singular, then the */ /* routine returns with INFO = i. Otherwise, the factored form of A */ /* is used to estimate the condition number of the matrix A (see */ /* argument RCOND). If the reciprocal of the condition number is less */ /* than machine precision, the routine still goes on to solve for X */ /* and compute error bounds as described below. */ /* 4. The system of equations is solved for X using the factored form */ /* of A. */ /* 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ /* the routine will use iterative refinement to try to get a small */ /* error and error bounds. Refinement calculates the residual to at */ /* least twice the working precision. */ /* 6. If equilibration was used, the matrix X is premultiplied by */ /* diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ /* that it solves the original system before equilibration. */ /* Arguments */ /* ========= */ /* Some optional parameters are bundled in the PARAMS array. These */ /* settings determine how refinement is performed, but often the */ /* defaults are acceptable. If the defaults are acceptable, users */ /* can pass NPARAMS = 0 which prevents the source code from accessing */ /* the PARAMS argument. */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of the matrix A is */ /* supplied on entry, and if not, whether the matrix A should be */ /* equilibrated before it is factored. */ /* = 'F': On entry, AF and IPIV contain the factored form of A. */ /* If EQUED is not 'N', the matrix A has been */ /* equilibrated with scaling factors given by R and C. */ /* A, AF, and IPIV are not modified. */ /* = 'N': The matrix A will be copied to AF and factored. */ /* = 'E': The matrix A will be equilibrated if necessary, then */ /* copied to AF and factored. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ /* not 'N', then A must have been equilibrated by the scaling */ /* factors in R and/or C. A is not modified if FACT = 'F' or */ /* 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ /* On exit, if EQUED .ne. 'N', A is scaled as follows: */ /* EQUED = 'R': A := diag(R) * A */ /* EQUED = 'C': A := A * diag(C) */ /* EQUED = 'B': A := diag(R) * A * diag(C). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ /* If FACT = 'F', then AF is an input argument and on entry */ /* contains the factors L and U from the factorization */ /* A = P*L*U as computed by DGETRF. If EQUED .ne. 'N', then */ /* AF is the factored form of the equilibrated matrix A. */ /* If FACT = 'N', then AF is an output argument and on exit */ /* returns the factors L and U from the factorization A = P*L*U */ /* of the original matrix A. */ /* If FACT = 'E', then AF is an output argument and on exit */ /* returns the factors L and U from the factorization A = P*L*U */ /* of the equilibrated matrix A (see the description of A for */ /* the form of the equilibrated matrix). */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input or output) INTEGER array, dimension (N) */ /* If FACT = 'F', then IPIV is an input argument and on entry */ /* contains the pivot indices from the factorization A = P*L*U */ /* as computed by DGETRF; row i of the matrix was interchanged */ /* with row IPIV(i). */ /* If FACT = 'N', then IPIV is an output argument and on exit */ /* contains the pivot indices from the factorization A = P*L*U */ /* of the original matrix A. */ /* If FACT = 'E', then IPIV is an output argument and on exit */ /* contains the pivot indices from the factorization A = P*L*U */ /* of the equilibrated matrix A. */ /* EQUED (input or output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration (always true if FACT = 'N'). */ /* = 'R': Row equilibration, i.e., A has been premultiplied by */ /* diag(R). */ /* = 'C': Column equilibration, i.e., A has been postmultiplied */ /* by diag(C). */ /* = 'B': Both row and column equilibration, i.e., A has been */ /* replaced by diag(R) * A * diag(C). */ /* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ /* output argument. */ /* R (input or output) DOUBLE PRECISION array, dimension (N) */ /* The row scale factors for A. If EQUED = 'R' or 'B', A is */ /* multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ /* is not accessed. R is an input argument if FACT = 'F'; */ /* otherwise, R is an output argument. If FACT = 'F' and */ /* EQUED = 'R' or 'B', each element of R must be positive. */ /* If R is output, each element of R is a power of the radix. */ /* If R is input, each element of R should be a power of the radix */ /* to ensure a reliable solution and error estimates. Scaling by */ /* powers of the radix does not cause rounding errors unless the */ /* result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* C (input or output) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If EQUED = 'C' or 'B', A is */ /* multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ /* is not accessed. C is an input argument if FACT = 'F'; */ /* otherwise, C is an output argument. If FACT = 'F' and */ /* EQUED = 'C' or 'B', each element of C must be positive. */ /* If C is output, each element of C is a power of the radix. */ /* If C is input, each element of C should be a power of the radix */ /* to ensure a reliable solution and error estimates. Scaling by */ /* powers of the radix does not cause rounding errors unless the */ /* result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, */ /* if EQUED = 'N', B is not modified; */ /* if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ /* diag(R)*B; */ /* if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ /* overwritten by diag(C)*B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0, the N-by-NRHS solution matrix X to the original */ /* system of equations. Note that A and B are modified on exit */ /* if EQUED .ne. 'N', and the solution to the equilibrated system is */ /* inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */ /* inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* RPVGRW (output) DOUBLE PRECISION */ /* Reciprocal pivot growth. On exit, this contains the reciprocal */ /* pivot growth factor norm(A)/norm(U). The "max absolute element" */ /* norm is used. If this is much less than 1, then the stability of */ /* the LU factorization of the (equilibrated) matrix A could be poor. */ /* This also means that the solution X, estimated condition numbers, */ /* and error bounds could be unreliable. If factorization fails with */ /* 0 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly singular, so */ /* the solution and error bounds could not be computed. RCOND = 0 */ /* is returned. */ /* = N+J: The solution corresponding to the Jth right-hand side is */ /* not guaranteed. The solutions corresponding to other right- */ /* hand sides K with K > J may not be guaranteed as well, but */ /* only the first such right-hand side is reported. If a small */ /* componentwise error is not requested (PARAMS(3) = 0.0) then */ /* the Jth right-hand side is the first with a normwise error */ /* bound that is not guaranteed (the smallest J such */ /* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ /* the Jth right-hand side is the first with either a normwise or */ /* componentwise error bound that is not guaranteed (the smallest */ /* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ /* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ /* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ /* about all of the right-hand sides check ERR_BNDS_NORM or */ /* ERR_BNDS_COMP. */ /* ================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --r__; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --berr; --params; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); equil = _starpu_lsame_(fact, "E"); notran = _starpu_lsame_(trans, "N"); smlnum = _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; if (nofact || equil) { *(unsigned char *)equed = 'N'; rowequ = FALSE_; colequ = FALSE_; } else { rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); } /* Default is failure. If an input parameter is wrong or */ /* factorization fails, make everything look horrible. Only the */ /* pivot growth is set here, the rest is initialized in DGERFSX. */ *rpvgrw = 0.; /* Test the input parameters. PARAMS is not tested until DGERFSX. */ if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (_starpu_lsame_(fact, "F") && ! (rowequ || colequ || _starpu_lsame_(equed, "N"))) { *info = -10; } else { if (rowequ) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = r__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = r__[j]; rcmax = max(d__1,d__2); /* L10: */ } if (rcmin <= 0.) { *info = -11; } else if (*n > 0) { rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { rowcnd = 1.; } } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = c__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = c__[j]; rcmax = max(d__1,d__2); /* L20: */ } if (rcmin <= 0.) { *info = -12; } else if (*n > 0) { colcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { colcnd = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -14; } else if (*ldx < max(1,*n)) { *info = -16; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGESVXX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ _starpu_dgeequb_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, &amax, &infequ); if (infequ == 0) { /* Equilibrate the matrix. */ _starpu_dlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & colcnd, &amax, equed); rowequ = _starpu_lsame_(equed, "R") || _starpu_lsame_(equed, "B"); colequ = _starpu_lsame_(equed, "C") || _starpu_lsame_(equed, "B"); } /* If the scaling factors are not applied, set them to 1.0. */ if (! rowequ) { i__1 = *n; for (j = 1; j <= i__1; ++j) { r__[j] = 1.; } } if (! colequ) { i__1 = *n; for (j = 1; j <= i__1; ++j) { c__[j] = 1.; } } } /* Scale the right-hand side. */ if (notran) { if (rowequ) { _starpu_dlascl2_(n, nrhs, &r__[1], &b[b_offset], ldb); } } else { if (colequ) { _starpu_dlascl2_(n, nrhs, &c__[1], &b[b_offset], ldb); } } if (nofact || equil) { /* Compute the LU factorization of A. */ _starpu_dlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); _starpu_dgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); /* Return if INFO is non-zero. */ if (*info > 0) { /* Pivot in column INFO is exactly 0 */ /* Compute the reciprocal pivot growth factor of the */ /* leading rank-deficient INFO columns of A. */ *rpvgrw = _starpu_dla_rpvgrw__(n, info, &a[a_offset], lda, &af[af_offset], ldaf); return 0; } } /* Compute the reciprocal pivot growth factor RPVGRW. */ *rpvgrw = _starpu_dla_rpvgrw__(n, n, &a[a_offset], lda, &af[af_offset], ldaf); /* Compute the solution matrix X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and */ /* compute error bounds and backward error estimates for it. */ _starpu_dgerfsx_(trans, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, &x[x_offset], ldx, rcond, &berr[1], n_err_bnds__, &err_bnds_norm__[ err_bnds_norm_offset], &err_bnds_comp__[err_bnds_comp_offset], nparams, ¶ms[1], &work[1], &iwork[1], info); /* Scale solutions. */ if (colequ && notran) { _starpu_dlascl2_(n, nrhs, &c__[1], &x[x_offset], ldx); } else if (rowequ && ! notran) { _starpu_dlascl2_(n, nrhs, &r__[1], &x[x_offset], ldx); } return 0; /* End of DGESVXX */ } /* _starpu_dgesvxx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgetc2.c000066400000000000000000000130151507764646700205440ustar00rootroot00000000000000/* dgetc2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b10 = -1.; /* Subroutine */ int _starpu_dgetc2_(integer *n, doublereal *a, integer *lda, integer *ipiv, integer *jpiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__, j, ip, jp; doublereal eps; integer ipv, jpv; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal smin, xmax; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); doublereal bignum, smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETC2 computes an LU factorization with complete pivoting of the */ /* n-by-n matrix A. The factorization has the form A = P * L * U * Q, */ /* where P and Q are permutation matrices, L is lower triangular with */ /* unit diagonal elements and U is upper triangular. */ /* This is the Level 2 BLAS algorithm. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the n-by-n matrix A to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U*Q; the unit diagonal elements of L are not stored. */ /* If U(k, k) appears to be less than SMIN, U(k, k) is given the */ /* value of SMIN, i.e., giving a nonsingular perturbed system. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (output) INTEGER array, dimension(N). */ /* The pivot indices; for 1 <= i <= N, row i of the */ /* matrix has been interchanged with row IPIV(i). */ /* JPIV (output) INTEGER array, dimension(N). */ /* The pivot indices; for 1 <= j <= N, column j of the */ /* matrix has been interchanged with column JPIV(j). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* > 0: if INFO = k, U(k, k) is likely to produce owerflow if */ /* we try to solve for x in Ax = b. So U is perturbed to */ /* avoid the overflow. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Set constants to control overflow */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; --jpiv; /* Function Body */ *info = 0; eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S") / eps; bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); /* Factorize A using complete pivoting. */ /* Set pivots less than SMIN to SMIN. */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Find max element in matrix A */ xmax = 0.; i__2 = *n; for (ip = i__; ip <= i__2; ++ip) { i__3 = *n; for (jp = i__; jp <= i__3; ++jp) { if ((d__1 = a[ip + jp * a_dim1], abs(d__1)) >= xmax) { xmax = (d__1 = a[ip + jp * a_dim1], abs(d__1)); ipv = ip; jpv = jp; } /* L10: */ } /* L20: */ } if (i__ == 1) { /* Computing MAX */ d__1 = eps * xmax; smin = max(d__1,smlnum); } /* Swap rows */ if (ipv != i__) { _starpu_dswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda); } ipiv[i__] = ipv; /* Swap columns */ if (jpv != i__) { _starpu_dswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], & c__1); } jpiv[i__] = jpv; /* Check for singularity */ if ((d__1 = a[i__ + i__ * a_dim1], abs(d__1)) < smin) { *info = i__; a[i__ + i__ * a_dim1] = smin; } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { a[j + i__ * a_dim1] /= a[i__ + i__ * a_dim1]; /* L30: */ } i__2 = *n - i__; i__3 = *n - i__; _starpu_dger_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * a_dim1], lda); /* L40: */ } if ((d__1 = a[*n + *n * a_dim1], abs(d__1)) < smin) { *info = *n; a[*n + *n * a_dim1] = smin; } return 0; /* End of DGETC2 */ } /* _starpu_dgetc2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgetf2.c000066400000000000000000000125051507764646700205520ustar00rootroot00000000000000/* dgetf2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = -1.; /* Subroutine */ int _starpu_dgetf2_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__, j, jp; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sfmin; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETF2 computes an LU factorization of a general m-by-n matrix A */ /* using partial pivoting with row interchanges. */ /* The factorization has the form */ /* A = P * L * U */ /* where P is a permutation matrix, L is lower triangular with unit */ /* diagonal elements (lower trapezoidal if m > n), and U is upper */ /* triangular (upper trapezoidal if m < n). */ /* This is the right-looking Level 2 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the m by n matrix to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGETF2", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Compute machine safe minimum */ sfmin = _starpu_dlamch_("S"); i__1 = min(*m,*n); for (j = 1; j <= i__1; ++j) { /* Find pivot and test for singularity. */ i__2 = *m - j + 1; jp = j - 1 + _starpu_idamax_(&i__2, &a[j + j * a_dim1], &c__1); ipiv[j] = jp; if (a[jp + j * a_dim1] != 0.) { /* Apply the interchange to columns 1:N. */ if (jp != j) { _starpu_dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); } /* Compute elements J+1:M of J-th column. */ if (j < *m) { if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) { i__2 = *m - j; d__1 = 1. / a[j + j * a_dim1]; _starpu_dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); } else { i__2 = *m - j; for (i__ = 1; i__ <= i__2; ++i__) { a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; /* L20: */ } } } } else if (*info == 0) { *info = j; } if (j < min(*m,*n)) { /* Update trailing submatrix. */ i__2 = *m - j; i__3 = *n - j; _starpu_dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + ( j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda); } /* L10: */ } return 0; /* End of DGETF2 */ } /* _starpu_dgetf2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgetrf.c000066400000000000000000000145371507764646700206610ustar00rootroot00000000000000/* dgetrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b16 = 1.; static doublereal c_b19 = -1.; /* Subroutine */ int _starpu_dgetrf_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ integer i__, j, jb, nb; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iinfo; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dgetf2_( integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETRF computes an LU factorization of a general M-by-N matrix A */ /* using partial pivoting with row interchanges. */ /* The factorization has the form */ /* A = P * L * U */ /* where P is a permutation matrix, L is lower triangular with unit */ /* diagonal elements (lower trapezoidal if m > n), and U is upper */ /* triangular (upper trapezoidal if m < n). */ /* This is the right-looking Level 3 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGETRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment. */ nb = _starpu_ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1); if (nb <= 1 || nb >= min(*m,*n)) { /* Use unblocked code. */ _starpu_dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); } else { /* Use blocked code. */ i__1 = min(*m,*n); i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = min(*m,*n) - j + 1; jb = min(i__3,nb); /* Factor diagonal and subdiagonal blocks and test for exact */ /* singularity. */ i__3 = *m - j + 1; _starpu_dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); /* Adjust INFO and the pivot indices. */ if (*info == 0 && iinfo > 0) { *info = iinfo + j - 1; } /* Computing MIN */ i__4 = *m, i__5 = j + jb - 1; i__3 = min(i__4,i__5); for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = j - 1 + ipiv[i__]; /* L10: */ } /* Apply interchanges to columns 1:J-1. */ i__3 = j - 1; i__4 = j + jb - 1; _starpu_dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); if (j + jb <= *n) { /* Apply interchanges to columns J+JB:N. */ i__3 = *n - j - jb + 1; i__4 = j + jb - 1; _starpu_dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & ipiv[1], &c__1); /* Compute block row of U. */ i__3 = *n - j - jb + 1; _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda); if (j + jb <= *m) { /* Update trailing submatrix. */ i__3 = *m - j - jb + 1; i__4 = *n - j - jb + 1; _starpu_dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) * a_dim1], lda); } } /* L20: */ } } return 0; /* End of DGETRF */ } /* _starpu_dgetrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgetri.c000066400000000000000000000167101507764646700206570ustar00rootroot00000000000000/* dgetri.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static doublereal c_b20 = -1.; static doublereal c_b22 = 1.; /* Subroutine */ int _starpu_dgetri_(integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, jb, nb, jj, jp, nn, iws; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer nbmin; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_( char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork; extern /* Subroutine */ int _starpu_dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETRI computes the inverse of a matrix using the LU factorization */ /* computed by DGETRF. */ /* This method inverts U and then computes inv(A) by solving the system */ /* inv(A)*L = inv(U) for inv(A). */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the factors L and U from the factorization */ /* A = P*L*U as computed by DGETRF. */ /* On exit, if INFO = 0, the inverse of the original matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ /* matrix was interchanged with row IPIV(i). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N). */ /* For optimal performance LWORK >= N*NB, where NB is */ /* the optimal blocksize returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is */ /* singular and its inverse could not be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; --work; /* Function Body */ *info = 0; nb = _starpu_ilaenv_(&c__1, "DGETRI", " ", n, &c_n1, &c_n1, &c_n1); lwkopt = *n * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*n < 0) { *info = -1; } else if (*lda < max(1,*n)) { *info = -3; } else if (*lwork < max(1,*n) && ! lquery) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGETRI", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form inv(U). If INFO > 0 from DTRTRI, then U is singular, */ /* and the inverse is not computed. */ _starpu_dtrtri_("Upper", "Non-unit", n, &a[a_offset], lda, info); if (*info > 0) { return 0; } nbmin = 2; ldwork = *n; if (nb > 1 && nb < *n) { /* Computing MAX */ i__1 = ldwork * nb; iws = max(i__1,1); if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGETRI", " ", n, &c_n1, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } else { iws = *n; } /* Solve the equation inv(A)*L = inv(U) for inv(A). */ if (nb < nbmin || nb >= *n) { /* Use unblocked code. */ for (j = *n; j >= 1; --j) { /* Copy current column of L to WORK and replace with zeros. */ i__1 = *n; for (i__ = j + 1; i__ <= i__1; ++i__) { work[i__] = a[i__ + j * a_dim1]; a[i__ + j * a_dim1] = 0.; /* L10: */ } /* Compute current column of inv(A). */ if (j < *n) { i__1 = *n - j; _starpu_dgemv_("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + 1], &c__1); } /* L20: */ } } else { /* Use blocked code. */ nn = (*n - 1) / nb * nb + 1; i__1 = -nb; for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { /* Computing MIN */ i__2 = nb, i__3 = *n - j + 1; jb = min(i__2,i__3); /* Copy current block column of L to WORK and replace with */ /* zeros. */ i__2 = j + jb - 1; for (jj = j; jj <= i__2; ++jj) { i__3 = *n; for (i__ = jj + 1; i__ <= i__3; ++i__) { work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1]; a[i__ + jj * a_dim1] = 0.; /* L30: */ } /* L40: */ } /* Compute current block column of inv(A). */ if (j + jb <= *n) { i__2 = *n - j - jb + 1; _starpu_dgemm_("No transpose", "No transpose", n, &jb, &i__2, &c_b20, &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], & ldwork, &c_b22, &a[j * a_dim1 + 1], lda); } _starpu_dtrsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, & work[j], &ldwork, &a[j * a_dim1 + 1], lda); /* L50: */ } } /* Apply column interchanges. */ for (j = *n - 1; j >= 1; --j) { jp = ipiv[j]; if (jp != j) { _starpu_dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); } /* L60: */ } work[1] = (doublereal) iws; return 0; /* End of DGETRI */ } /* _starpu_dgetri_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgetrs.c000066400000000000000000000123641507764646700206720ustar00rootroot00000000000000/* dgetrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = 1.; static integer c_n1 = -1; /* Subroutine */ int _starpu_dgetrs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_( char *, integer *), _starpu_dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETRS solves a system of linear equations */ /* A * X = B or A' * X = B */ /* with a general N-by-N matrix A using the LU factorization computed */ /* by DGETRF. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A'* X = B (Transpose) */ /* = 'C': A'* X = B (Conjugate transpose = Transpose) */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The factors L and U from the factorization A = P*L*U */ /* as computed by DGETRF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from DGETRF; for 1<=i<=N, row i of the */ /* matrix was interchanged with row IPIV(i). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; notran = _starpu_lsame_(trans, "N"); if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGETRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (notran) { /* Solve A * X = B. */ /* Apply row interchanges to the right hand sides. */ _starpu_dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); /* Solve L*X = B, overwriting B with X. */ _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[ a_offset], lda, &b[b_offset], ldb); /* Solve U*X = B, overwriting B with X. */ _starpu_dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, & a[a_offset], lda, &b[b_offset], ldb); } else { /* Solve A' * X = B. */ /* Solve U'*X = B, overwriting B with X. */ _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[ a_offset], lda, &b[b_offset], ldb); /* Solve L'*X = B, overwriting B with X. */ _starpu_dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[ a_offset], lda, &b[b_offset], ldb); /* Apply row interchanges to the solution vectors. */ _starpu_dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); } return 0; /* End of DGETRS */ } /* _starpu_dgetrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggbak.c000066400000000000000000000160101507764646700206110ustar00rootroot00000000000000/* dggbak.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dggbak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, doublereal *v, integer *ldv, integer *info) { /* System generated locals */ integer v_dim1, v_offset, i__1; /* Local variables */ integer i__, k; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical leftv; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical rightv; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGBAK forms the right or left eigenvectors of a real generalized */ /* eigenvalue problem A*x = lambda*B*x, by backward transformation on */ /* the computed eigenvectors of the balanced pair of matrices output by */ /* DGGBAL. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* Specifies the type of backward transformation required: */ /* = 'N': do nothing, return immediately; */ /* = 'P': do backward transformation for permutation only; */ /* = 'S': do backward transformation for scaling only; */ /* = 'B': do backward transformations for both permutation and */ /* scaling. */ /* JOB must be the same as the argument JOB supplied to DGGBAL. */ /* SIDE (input) CHARACTER*1 */ /* = 'R': V contains right eigenvectors; */ /* = 'L': V contains left eigenvectors. */ /* N (input) INTEGER */ /* The number of rows of the matrix V. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* The integers ILO and IHI determined by DGGBAL. */ /* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ /* LSCALE (input) DOUBLE PRECISION array, dimension (N) */ /* Details of the permutations and/or scaling factors applied */ /* to the left side of A and B, as returned by DGGBAL. */ /* RSCALE (input) DOUBLE PRECISION array, dimension (N) */ /* Details of the permutations and/or scaling factors applied */ /* to the right side of A and B, as returned by DGGBAL. */ /* M (input) INTEGER */ /* The number of columns of the matrix V. M >= 0. */ /* V (input/output) DOUBLE PRECISION array, dimension (LDV,M) */ /* On entry, the matrix of right or left eigenvectors to be */ /* transformed, as returned by DTGEVC. */ /* On exit, V is overwritten by the transformed eigenvectors. */ /* LDV (input) INTEGER */ /* The leading dimension of the matrix V. LDV >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* See R.C. Ward, Balancing the generalized eigenvalue problem, */ /* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ --lscale; --rscale; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; /* Function Body */ rightv = _starpu_lsame_(side, "R"); leftv = _starpu_lsame_(side, "L"); *info = 0; if (! _starpu_lsame_(job, "N") && ! _starpu_lsame_(job, "P") && ! _starpu_lsame_(job, "S") && ! _starpu_lsame_(job, "B")) { *info = -1; } else if (! rightv && ! leftv) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ilo < 1) { *info = -4; } else if (*n == 0 && *ihi == 0 && *ilo != 1) { *info = -4; } else if (*n > 0 && (*ihi < *ilo || *ihi > max(1,*n))) { *info = -5; } else if (*n == 0 && *ilo == 1 && *ihi != 0) { *info = -5; } else if (*m < 0) { *info = -8; } else if (*ldv < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGBAK", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*m == 0) { return 0; } if (_starpu_lsame_(job, "N")) { return 0; } if (*ilo == *ihi) { goto L30; } /* Backward balance */ if (_starpu_lsame_(job, "S") || _starpu_lsame_(job, "B")) { /* Backward transformation on right eigenvectors */ if (rightv) { i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { _starpu_dscal_(m, &rscale[i__], &v[i__ + v_dim1], ldv); /* L10: */ } } /* Backward transformation on left eigenvectors */ if (leftv) { i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { _starpu_dscal_(m, &lscale[i__], &v[i__ + v_dim1], ldv); /* L20: */ } } } /* Backward permutation */ L30: if (_starpu_lsame_(job, "P") || _starpu_lsame_(job, "B")) { /* Backward permutation on right eigenvectors */ if (rightv) { if (*ilo == 1) { goto L50; } for (i__ = *ilo - 1; i__ >= 1; --i__) { k = (integer) rscale[i__]; if (k == i__) { goto L40; } _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); L40: ; } L50: if (*ihi == *n) { goto L70; } i__1 = *n; for (i__ = *ihi + 1; i__ <= i__1; ++i__) { k = (integer) rscale[i__]; if (k == i__) { goto L60; } _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); L60: ; } } /* Backward permutation on left eigenvectors */ L70: if (leftv) { if (*ilo == 1) { goto L90; } for (i__ = *ilo - 1; i__ >= 1; --i__) { k = (integer) lscale[i__]; if (k == i__) { goto L80; } _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); L80: ; } L90: if (*ihi == *n) { goto L110; } i__1 = *n; for (i__ = *ihi + 1; i__ <= i__1; ++i__) { k = (integer) lscale[i__]; if (k == i__) { goto L100; } _starpu_dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); L100: ; } } } L110: return 0; /* End of DGGBAK */ } /* _starpu_dggbak_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggbal.c000066400000000000000000000374661507764646700206340ustar00rootroot00000000000000/* dggbal.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b35 = 10.; static doublereal c_b71 = .5; /* Subroutine */ int _starpu_dggbal_(char *job, integer *n, doublereal *a, integer * lda, doublereal *b, integer *ldb, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *work, integer * info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Builtin functions */ double d_lg10(doublereal *), d_sign(doublereal *, doublereal *), pow_di( doublereal *, integer *); /* Local variables */ integer i__, j, k, l, m; doublereal t; integer jc; doublereal ta, tb, tc; integer ir; doublereal ew; integer it, nr, ip1, jp1, lm1; doublereal cab, rab, ewc, cor, sum; integer nrp2, icab, lcab; doublereal beta, coef; integer irab, lrab; doublereal basl, cmax; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal coef2, coef5, gamma, alpha; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); doublereal sfmin, sfmax; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer iflow; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer kount; extern doublereal _starpu_dlamch_(char *); doublereal pgamma; extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer lsfmin, lsfmax; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGBAL balances a pair of general real matrices (A,B). This */ /* involves, first, permuting A and B by similarity transformations to */ /* isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N */ /* elements on the diagonal; and second, applying a diagonal similarity */ /* transformation to rows and columns ILO to IHI to make the rows */ /* and columns as close in norm as possible. Both steps are optional. */ /* Balancing may reduce the 1-norm of the matrices, and improve the */ /* accuracy of the computed eigenvalues and/or eigenvectors in the */ /* generalized eigenvalue problem A*x = lambda*B*x. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* Specifies the operations to be performed on A and B: */ /* = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 */ /* and RSCALE(I) = 1.0 for i = 1,...,N. */ /* = 'P': permute only; */ /* = 'S': scale only; */ /* = 'B': both permute and scale. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the input matrix A. */ /* On exit, A is overwritten by the balanced matrix. */ /* If JOB = 'N', A is not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ /* On entry, the input matrix B. */ /* On exit, B is overwritten by the balanced matrix. */ /* If JOB = 'N', B is not referenced. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* ILO (output) INTEGER */ /* IHI (output) INTEGER */ /* ILO and IHI are set to integers such that on exit */ /* A(i,j) = 0 and B(i,j) = 0 if i > j and */ /* j = 1,...,ILO-1 or i = IHI+1,...,N. */ /* If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ /* LSCALE (output) DOUBLE PRECISION array, dimension (N) */ /* Details of the permutations and scaling factors applied */ /* to the left side of A and B. If P(j) is the index of the */ /* row interchanged with row j, and D(j) */ /* is the scaling factor applied to row j, then */ /* LSCALE(j) = P(j) for J = 1,...,ILO-1 */ /* = D(j) for J = ILO,...,IHI */ /* = P(j) for J = IHI+1,...,N. */ /* The order in which the interchanges are made is N to IHI+1, */ /* then 1 to ILO-1. */ /* RSCALE (output) DOUBLE PRECISION array, dimension (N) */ /* Details of the permutations and scaling factors applied */ /* to the right side of A and B. If P(j) is the index of the */ /* column interchanged with column j, and D(j) */ /* is the scaling factor applied to column j, then */ /* LSCALE(j) = P(j) for J = 1,...,ILO-1 */ /* = D(j) for J = ILO,...,IHI */ /* = P(j) for J = IHI+1,...,N. */ /* The order in which the interchanges are made is N to IHI+1, */ /* then 1 to ILO-1. */ /* WORK (workspace) REAL array, dimension (lwork) */ /* lwork must be at least max(1,6*N) when JOB = 'S' or 'B', and */ /* at least 1 when JOB = 'N' or 'P'. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* See R.C. WARD, Balancing the generalized eigenvalue problem, */ /* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --lscale; --rscale; --work; /* Function Body */ *info = 0; if (! _starpu_lsame_(job, "N") && ! _starpu_lsame_(job, "P") && ! _starpu_lsame_(job, "S") && ! _starpu_lsame_(job, "B")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGBAL", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *ilo = 1; *ihi = *n; return 0; } if (*n == 1) { *ilo = 1; *ihi = *n; lscale[1] = 1.; rscale[1] = 1.; return 0; } if (_starpu_lsame_(job, "N")) { *ilo = 1; *ihi = *n; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { lscale[i__] = 1.; rscale[i__] = 1.; /* L10: */ } return 0; } k = 1; l = *n; if (_starpu_lsame_(job, "S")) { goto L190; } goto L30; /* Permute the matrices A and B to isolate the eigenvalues. */ /* Find row with one nonzero in columns 1 through L */ L20: l = lm1; if (l != 1) { goto L30; } rscale[1] = 1.; lscale[1] = 1.; goto L190; L30: lm1 = l - 1; for (i__ = l; i__ >= 1; --i__) { i__1 = lm1; for (j = 1; j <= i__1; ++j) { jp1 = j + 1; if (a[i__ + j * a_dim1] != 0. || b[i__ + j * b_dim1] != 0.) { goto L50; } /* L40: */ } j = l; goto L70; L50: i__1 = l; for (j = jp1; j <= i__1; ++j) { if (a[i__ + j * a_dim1] != 0. || b[i__ + j * b_dim1] != 0.) { goto L80; } /* L60: */ } j = jp1 - 1; L70: m = l; iflow = 1; goto L160; L80: ; } goto L100; /* Find column with one nonzero in rows K through N */ L90: ++k; L100: i__1 = l; for (j = k; j <= i__1; ++j) { i__2 = lm1; for (i__ = k; i__ <= i__2; ++i__) { ip1 = i__ + 1; if (a[i__ + j * a_dim1] != 0. || b[i__ + j * b_dim1] != 0.) { goto L120; } /* L110: */ } i__ = l; goto L140; L120: i__2 = l; for (i__ = ip1; i__ <= i__2; ++i__) { if (a[i__ + j * a_dim1] != 0. || b[i__ + j * b_dim1] != 0.) { goto L150; } /* L130: */ } i__ = ip1 - 1; L140: m = k; iflow = 2; goto L160; L150: ; } goto L190; /* Permute rows M and I */ L160: lscale[m] = (doublereal) i__; if (i__ == m) { goto L170; } i__1 = *n - k + 1; _starpu_dswap_(&i__1, &a[i__ + k * a_dim1], lda, &a[m + k * a_dim1], lda); i__1 = *n - k + 1; _starpu_dswap_(&i__1, &b[i__ + k * b_dim1], ldb, &b[m + k * b_dim1], ldb); /* Permute columns M and J */ L170: rscale[m] = (doublereal) j; if (j == m) { goto L180; } _starpu_dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); _starpu_dswap_(&l, &b[j * b_dim1 + 1], &c__1, &b[m * b_dim1 + 1], &c__1); L180: switch (iflow) { case 1: goto L20; case 2: goto L90; } L190: *ilo = k; *ihi = l; if (_starpu_lsame_(job, "P")) { i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { lscale[i__] = 1.; rscale[i__] = 1.; /* L195: */ } return 0; } if (*ilo == *ihi) { return 0; } /* Balance the submatrix in rows ILO to IHI. */ nr = *ihi - *ilo + 1; i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { rscale[i__] = 0.; lscale[i__] = 0.; work[i__] = 0.; work[i__ + *n] = 0.; work[i__ + (*n << 1)] = 0.; work[i__ + *n * 3] = 0.; work[i__ + (*n << 2)] = 0.; work[i__ + *n * 5] = 0.; /* L200: */ } /* Compute right side vector in resulting linear equations */ basl = d_lg10(&c_b35); i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { i__2 = *ihi; for (j = *ilo; j <= i__2; ++j) { tb = b[i__ + j * b_dim1]; ta = a[i__ + j * a_dim1]; if (ta == 0.) { goto L210; } d__1 = abs(ta); ta = d_lg10(&d__1) / basl; L210: if (tb == 0.) { goto L220; } d__1 = abs(tb); tb = d_lg10(&d__1) / basl; L220: work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb; work[j + *n * 5] = work[j + *n * 5] - ta - tb; /* L230: */ } /* L240: */ } coef = 1. / (doublereal) (nr << 1); coef2 = coef * coef; coef5 = coef2 * .5; nrp2 = nr + 2; beta = 0.; it = 1; /* Start generalized conjugate gradient iteration */ L250: gamma = _starpu_ddot_(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)] , &c__1) + _starpu_ddot_(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + * n * 5], &c__1); ew = 0.; ewc = 0.; i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { ew += work[i__ + (*n << 2)]; ewc += work[i__ + *n * 5]; /* L260: */ } /* Computing 2nd power */ d__1 = ew; /* Computing 2nd power */ d__2 = ewc; /* Computing 2nd power */ d__3 = ew - ewc; gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * ( d__3 * d__3); if (gamma == 0.) { goto L350; } if (it != 1) { beta = gamma / pgamma; } t = coef5 * (ewc - ew * 3.); tc = coef5 * (ew - ewc * 3.); _starpu_dscal_(&nr, &beta, &work[*ilo], &c__1); _starpu_dscal_(&nr, &beta, &work[*ilo + *n], &c__1); _starpu_daxpy_(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], & c__1); _starpu_daxpy_(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1); i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { work[i__] += tc; work[i__ + *n] += t; /* L270: */ } /* Apply matrix to vector */ i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { kount = 0; sum = 0.; i__2 = *ihi; for (j = *ilo; j <= i__2; ++j) { if (a[i__ + j * a_dim1] == 0.) { goto L280; } ++kount; sum += work[j]; L280: if (b[i__ + j * b_dim1] == 0.) { goto L290; } ++kount; sum += work[j]; L290: ; } work[i__ + (*n << 1)] = (doublereal) kount * work[i__ + *n] + sum; /* L300: */ } i__1 = *ihi; for (j = *ilo; j <= i__1; ++j) { kount = 0; sum = 0.; i__2 = *ihi; for (i__ = *ilo; i__ <= i__2; ++i__) { if (a[i__ + j * a_dim1] == 0.) { goto L310; } ++kount; sum += work[i__ + *n]; L310: if (b[i__ + j * b_dim1] == 0.) { goto L320; } ++kount; sum += work[i__ + *n]; L320: ; } work[j + *n * 3] = (doublereal) kount * work[j] + sum; /* L330: */ } sum = _starpu_ddot_(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) + _starpu_ddot_(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1); alpha = gamma / sum; /* Determine correction to current iteration */ cmax = 0.; i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { cor = alpha * work[i__ + *n]; if (abs(cor) > cmax) { cmax = abs(cor); } lscale[i__] += cor; cor = alpha * work[i__]; if (abs(cor) > cmax) { cmax = abs(cor); } rscale[i__] += cor; /* L340: */ } if (cmax < .5) { goto L350; } d__1 = -alpha; _starpu_daxpy_(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)] , &c__1); d__1 = -alpha; _starpu_daxpy_(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], & c__1); pgamma = gamma; ++it; if (it <= nrp2) { goto L250; } /* End generalized conjugate gradient iteration */ L350: sfmin = _starpu_dlamch_("S"); sfmax = 1. / sfmin; lsfmin = (integer) (d_lg10(&sfmin) / basl + 1.); lsfmax = (integer) (d_lg10(&sfmax) / basl); i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { i__2 = *n - *ilo + 1; irab = _starpu_idamax_(&i__2, &a[i__ + *ilo * a_dim1], lda); rab = (d__1 = a[i__ + (irab + *ilo - 1) * a_dim1], abs(d__1)); i__2 = *n - *ilo + 1; irab = _starpu_idamax_(&i__2, &b[i__ + *ilo * b_dim1], ldb); /* Computing MAX */ d__2 = rab, d__3 = (d__1 = b[i__ + (irab + *ilo - 1) * b_dim1], abs( d__1)); rab = max(d__2,d__3); d__1 = rab + sfmin; lrab = (integer) (d_lg10(&d__1) / basl + 1.); ir = (integer) (lscale[i__] + d_sign(&c_b71, &lscale[i__])); /* Computing MIN */ i__2 = max(ir,lsfmin), i__2 = min(i__2,lsfmax), i__3 = lsfmax - lrab; ir = min(i__2,i__3); lscale[i__] = pow_di(&c_b35, &ir); icab = _starpu_idamax_(ihi, &a[i__ * a_dim1 + 1], &c__1); cab = (d__1 = a[icab + i__ * a_dim1], abs(d__1)); icab = _starpu_idamax_(ihi, &b[i__ * b_dim1 + 1], &c__1); /* Computing MAX */ d__2 = cab, d__3 = (d__1 = b[icab + i__ * b_dim1], abs(d__1)); cab = max(d__2,d__3); d__1 = cab + sfmin; lcab = (integer) (d_lg10(&d__1) / basl + 1.); jc = (integer) (rscale[i__] + d_sign(&c_b71, &rscale[i__])); /* Computing MIN */ i__2 = max(jc,lsfmin), i__2 = min(i__2,lsfmax), i__3 = lsfmax - lcab; jc = min(i__2,i__3); rscale[i__] = pow_di(&c_b35, &jc); /* L360: */ } /* Row scaling of matrices A and B */ i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { i__2 = *n - *ilo + 1; _starpu_dscal_(&i__2, &lscale[i__], &a[i__ + *ilo * a_dim1], lda); i__2 = *n - *ilo + 1; _starpu_dscal_(&i__2, &lscale[i__], &b[i__ + *ilo * b_dim1], ldb); /* L370: */ } /* Column scaling of matrices A and B */ i__1 = *ihi; for (j = *ilo; j <= i__1; ++j) { _starpu_dscal_(ihi, &rscale[j], &a[j * a_dim1 + 1], &c__1); _starpu_dscal_(ihi, &rscale[j], &b[j * b_dim1 + 1], &c__1); /* L380: */ } return 0; /* End of DGGBAL */ } /* _starpu_dggbal_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgges.c000066400000000000000000000547211507764646700204760ustar00rootroot00000000000000/* dgges.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static integer c_n1 = -1; static doublereal c_b38 = 0.; static doublereal c_b39 = 1.; /* Subroutine */ int _starpu_dgges_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *sdim, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *work, integer *lwork, logical *bwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, vsr_dim1, vsr_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, ip; doublereal dif[2]; integer ihi, ilo; doublereal eps, anrm, bnrm; integer idum[1], ierr, itau, iwrk; doublereal pvsl, pvsr; extern logical _starpu_lsame_(char *, char *); integer ileft, icols; logical cursl, ilvsl, ilvsr; integer irows; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dggbak_( char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); logical lst2sl; extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); logical ilascl, ilbscl; extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal safmax; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dtgsen_(integer *, logical *, logical *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *); integer ijobvl, iright; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ijobvr; extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); doublereal anrmto, bnrmto; logical lastsl; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer minwrk, maxwrk; doublereal smlnum; logical wantst, lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* .. Function Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B), */ /* the generalized eigenvalues, the generalized real Schur form (S,T), */ /* optionally, the left and/or right matrices of Schur vectors (VSL and */ /* VSR). This gives the generalized Schur factorization */ /* (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) */ /* Optionally, it also orders the eigenvalues so that a selected cluster */ /* of eigenvalues appears in the leading diagonal blocks of the upper */ /* quasi-triangular matrix S and the upper triangular matrix T.The */ /* leading columns of VSL and VSR then form an orthonormal basis for the */ /* corresponding left and right eigenspaces (deflating subspaces). */ /* (If only the generalized eigenvalues are needed, use the driver */ /* DGGEV instead, which is faster.) */ /* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ /* or a ratio alpha/beta = w, such that A - w*B is singular. It is */ /* usually represented as the pair (alpha,beta), as there is a */ /* reasonable interpretation for beta=0 or both being zero. */ /* A pair of matrices (S,T) is in generalized real Schur form if T is */ /* upper triangular with non-negative diagonal and S is block upper */ /* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond */ /* to real generalized eigenvalues, while 2-by-2 blocks of S will be */ /* "standardized" by making the corresponding elements of T have the */ /* form: */ /* [ a 0 ] */ /* [ 0 b ] */ /* and the pair of corresponding 2-by-2 blocks in S and T will have a */ /* complex conjugate pair of generalized eigenvalues. */ /* Arguments */ /* ========= */ /* JOBVSL (input) CHARACTER*1 */ /* = 'N': do not compute the left Schur vectors; */ /* = 'V': compute the left Schur vectors. */ /* JOBVSR (input) CHARACTER*1 */ /* = 'N': do not compute the right Schur vectors; */ /* = 'V': compute the right Schur vectors. */ /* SORT (input) CHARACTER*1 */ /* Specifies whether or not to order the eigenvalues on the */ /* diagonal of the generalized Schur form. */ /* = 'N': Eigenvalues are not ordered; */ /* = 'S': Eigenvalues are ordered (see SELCTG); */ /* SELCTG (external procedure) LOGICAL FUNCTION of three DOUBLE PRECISION arguments */ /* SELCTG must be declared EXTERNAL in the calling subroutine. */ /* If SORT = 'N', SELCTG is not referenced. */ /* If SORT = 'S', SELCTG is used to select eigenvalues to sort */ /* to the top left of the Schur form. */ /* An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */ /* SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */ /* one of a complex conjugate pair of eigenvalues is selected, */ /* then both complex eigenvalues are selected. */ /* Note that in the ill-conditioned case, a selected complex */ /* eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), */ /* BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 */ /* in this case. */ /* N (input) INTEGER */ /* The order of the matrices A, B, VSL, and VSR. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the first of the pair of matrices. */ /* On exit, A has been overwritten by its generalized Schur */ /* form S. */ /* LDA (input) INTEGER */ /* The leading dimension of A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, the second of the pair of matrices. */ /* On exit, B has been overwritten by its generalized Schur */ /* form T. */ /* LDB (input) INTEGER */ /* The leading dimension of B. LDB >= max(1,N). */ /* SDIM (output) INTEGER */ /* If SORT = 'N', SDIM = 0. */ /* If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ /* for which SELCTG is true. (Complex conjugate pairs for which */ /* SELCTG is true for either eigenvalue count as 2.) */ /* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ /* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ /* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, */ /* and BETA(j),j=1,...,N are the diagonals of the complex Schur */ /* form (S,T) that would result if the 2-by-2 diagonal blocks of */ /* the real Schur form of (A,B) were further reduced to */ /* triangular form using 2-by-2 complex unitary transformations. */ /* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ /* positive, then the j-th and (j+1)-st eigenvalues are a */ /* complex conjugate pair, with ALPHAI(j+1) negative. */ /* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ /* may easily over- or underflow, and BETA(j) may even be zero. */ /* Thus, the user should avoid naively computing the ratio. */ /* However, ALPHAR and ALPHAI will be always less than and */ /* usually comparable with norm(A) in magnitude, and BETA always */ /* less than and usually comparable with norm(B). */ /* VSL (output) DOUBLE PRECISION array, dimension (LDVSL,N) */ /* If JOBVSL = 'V', VSL will contain the left Schur vectors. */ /* Not referenced if JOBVSL = 'N'. */ /* LDVSL (input) INTEGER */ /* The leading dimension of the matrix VSL. LDVSL >=1, and */ /* if JOBVSL = 'V', LDVSL >= N. */ /* VSR (output) DOUBLE PRECISION array, dimension (LDVSR,N) */ /* If JOBVSR = 'V', VSR will contain the right Schur vectors. */ /* Not referenced if JOBVSR = 'N'. */ /* LDVSR (input) INTEGER */ /* The leading dimension of the matrix VSR. LDVSR >= 1, and */ /* if JOBVSR = 'V', LDVSR >= N. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If N = 0, LWORK >= 1, else LWORK >= 8*N+16. */ /* For good performance , LWORK must generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* BWORK (workspace) LOGICAL array, dimension (N) */ /* Not referenced if SORT = 'N'. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1,...,N: */ /* The QZ iteration failed. (A,B) are not in Schur */ /* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */ /* be correct for j=INFO+1,...,N. */ /* > N: =N+1: other than QZ iteration failed in DHGEQZ. */ /* =N+2: after reordering, roundoff changed values of */ /* some complex eigenvalues so that leading */ /* eigenvalues in the Generalized Schur form no */ /* longer satisfy SELCTG=.TRUE. This could also */ /* be caused due to scaling. */ /* =N+3: reordering failed in DTGSEN. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alphar; --alphai; --beta; vsl_dim1 = *ldvsl; vsl_offset = 1 + vsl_dim1; vsl -= vsl_offset; vsr_dim1 = *ldvsr; vsr_offset = 1 + vsr_dim1; vsr -= vsr_offset; --work; --bwork; /* Function Body */ if (_starpu_lsame_(jobvsl, "N")) { ijobvl = 1; ilvsl = FALSE_; } else if (_starpu_lsame_(jobvsl, "V")) { ijobvl = 2; ilvsl = TRUE_; } else { ijobvl = -1; ilvsl = FALSE_; } if (_starpu_lsame_(jobvsr, "N")) { ijobvr = 1; ilvsr = FALSE_; } else if (_starpu_lsame_(jobvsr, "V")) { ijobvr = 2; ilvsr = TRUE_; } else { ijobvr = -1; ilvsr = FALSE_; } wantst = _starpu_lsame_(sort, "S"); /* Test the input arguments */ *info = 0; lquery = *lwork == -1; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (! wantst && ! _starpu_lsame_(sort, "N")) { *info = -3; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { *info = -15; } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { *info = -17; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV.) */ if (*info == 0) { if (*n > 0) { /* Computing MAX */ i__1 = *n << 3, i__2 = *n * 6 + 16; minwrk = max(i__1,i__2); maxwrk = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, & c__1, n, &c__0); /* Computing MAX */ i__1 = maxwrk, i__2 = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DORMQR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); if (ilvsl) { /* Computing MAX */ i__1 = maxwrk, i__2 = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DOR" "GQR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); } } else { minwrk = 1; maxwrk = 1; } work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -19; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGES ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { *sdim = 0; return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); safmin = _starpu_dlamch_("S"); safmax = 1. / safmin; _starpu_dlabad_(&safmin, &safmax); smlnum = sqrt(safmin) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); ilascl = FALSE_; if (anrm > 0. && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & ierr); } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); ilbscl = FALSE_; if (bnrm > 0. && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & ierr); } /* Permute the matrix to make it more nearly triangular */ /* (Workspace: need 6*N + 2*N space for storing balancing factors) */ ileft = 1; iright = *n + 1; iwrk = iright + *n; _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ ileft], &work[iright], &work[iwrk], &ierr); /* Reduce B to triangular form (QR decomposition of B) */ /* (Workspace: need N, prefer N*NB) */ irows = ihi + 1 - ilo; icols = *n + 1 - ilo; itau = iwrk; iwrk = itau + irows; i__1 = *lwork + 1 - iwrk; _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ iwrk], &i__1, &ierr); /* Apply the orthogonal transformation to matrix A */ /* (Workspace: need N, prefer N*NB) */ i__1 = *lwork + 1 - iwrk; _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & ierr); /* Initialize VSL */ /* (Workspace: need N, prefer N*NB) */ if (ilvsl) { _starpu_dlaset_("Full", n, n, &c_b38, &c_b39, &vsl[vsl_offset], ldvsl); if (irows > 1) { i__1 = irows - 1; i__2 = irows - 1; _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ ilo + 1 + ilo * vsl_dim1], ldvsl); } i__1 = *lwork + 1 - iwrk; _starpu_dorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & work[itau], &work[iwrk], &i__1, &ierr); } /* Initialize VSR */ if (ilvsr) { _starpu_dlaset_("Full", n, n, &c_b38, &c_b39, &vsr[vsr_offset], ldvsr); } /* Reduce to generalized Hessenberg form */ /* (Workspace: none needed) */ _starpu_dgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); /* Perform QZ algorithm, computing Schur vectors if desired */ /* (Workspace: need N) */ iwrk = itau; i__1 = *lwork + 1 - iwrk; _starpu_dhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr); if (ierr != 0) { if (ierr > 0 && ierr <= *n) { *info = ierr; } else if (ierr > *n && ierr <= *n << 1) { *info = ierr - *n; } else { *info = *n + 1; } goto L50; } /* Sort eigenvalues ALPHA/BETA if desired */ /* (Workspace: need 4*N+16 ) */ *sdim = 0; if (wantst) { /* Undo scaling on eigenvalues before SELCTGing */ if (ilascl) { _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &ierr); } if (ilbscl) { _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &ierr); } /* Select eigenvalues */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); /* L10: */ } i__1 = *lwork - iwrk + 1; _starpu_dtgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, & pvsr, dif, &work[iwrk], &i__1, idum, &c__1, &ierr); if (ierr == 1) { *info = *n + 3; } } /* Apply back-permutation to VSL and VSR */ /* (Workspace: none needed) */ if (ilvsl) { _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ vsl_offset], ldvsl, &ierr); } if (ilvsr) { _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ vsr_offset], ldvsr, &ierr); } /* Check if unscaling would cause over/underflow, if so, rescale */ /* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */ /* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */ if (ilascl) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (alphai[i__] != 0.) { if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[ i__] > anrm / anrmto) { work[1] = (d__1 = a[i__ + i__ * a_dim1] / alphar[i__], abs(d__1)); beta[i__] *= work[1]; alphar[i__] *= work[1]; alphai[i__] *= work[1]; } else if (alphai[i__] / safmax > anrmto / anrm || safmin / alphai[i__] > anrm / anrmto) { work[1] = (d__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[ i__], abs(d__1)); beta[i__] *= work[1]; alphar[i__] *= work[1]; alphai[i__] *= work[1]; } } /* L20: */ } } if (ilbscl) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (alphai[i__] != 0.) { if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] > bnrm / bnrmto) { work[1] = (d__1 = b[i__ + i__ * b_dim1] / beta[i__], abs( d__1)); beta[i__] *= work[1]; alphar[i__] *= work[1]; alphai[i__] *= work[1]; } } /* L30: */ } } /* Undo scaling */ if (ilascl) { _starpu_dlascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & ierr); } if (ilbscl) { _starpu_dlascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & ierr); } if (wantst) { /* Check if reordering is correct */ lastsl = TRUE_; lst2sl = TRUE_; *sdim = 0; ip = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); if (alphai[i__] == 0.) { if (cursl) { ++(*sdim); } ip = 0; if (cursl && ! lastsl) { *info = *n + 2; } } else { if (ip == 1) { /* Last eigenvalue of conjugate pair */ cursl = cursl || lastsl; lastsl = cursl; if (cursl) { *sdim += 2; } ip = -1; if (cursl && ! lst2sl) { *info = *n + 2; } } else { /* First eigenvalue of conjugate pair */ ip = 1; } } lst2sl = lastsl; lastsl = cursl; /* L40: */ } } L50: work[1] = (doublereal) maxwrk; return 0; /* End of DGGES */ } /* _starpu_dgges_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggesx.c000066400000000000000000000654111507764646700206640ustar00rootroot00000000000000/* dggesx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static integer c_n1 = -1; static doublereal c_b42 = 0.; static doublereal c_b43 = 1.; /* Subroutine */ int _starpu_dggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *sdim, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vsl, integer *ldvsl, doublereal *vsr, integer *ldvsr, doublereal *rconde, doublereal * rcondv, doublereal *work, integer *lwork, integer *iwork, integer * liwork, logical *bwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, vsr_dim1, vsr_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, ip; doublereal pl, pr, dif[2]; integer ihi, ilo; doublereal eps; integer ijob; doublereal anrm, bnrm; integer ierr, itau, iwrk, lwrk; extern logical _starpu_lsame_(char *, char *); integer ileft, icols; logical cursl, ilvsl, ilvsr; integer irows; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dggbak_( char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); logical lst2sl; extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); logical ilascl, ilbscl; extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal safmax; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); integer ijobvl, iright; extern /* Subroutine */ int _starpu_dtgsen_(integer *, logical *, logical *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ijobvr; logical wantsb; integer liwmin; logical wantse, lastsl; doublereal anrmto, bnrmto; extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer minwrk, maxwrk; logical wantsn; doublereal smlnum; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); logical wantst, lquery, wantsv; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* .. Function Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGESX computes for a pair of N-by-N real nonsymmetric matrices */ /* (A,B), the generalized eigenvalues, the real Schur form (S,T), and, */ /* optionally, the left and/or right matrices of Schur vectors (VSL and */ /* VSR). This gives the generalized Schur factorization */ /* (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) */ /* Optionally, it also orders the eigenvalues so that a selected cluster */ /* of eigenvalues appears in the leading diagonal blocks of the upper */ /* quasi-triangular matrix S and the upper triangular matrix T; computes */ /* a reciprocal condition number for the average of the selected */ /* eigenvalues (RCONDE); and computes a reciprocal condition number for */ /* the right and left deflating subspaces corresponding to the selected */ /* eigenvalues (RCONDV). The leading columns of VSL and VSR then form */ /* an orthonormal basis for the corresponding left and right eigenspaces */ /* (deflating subspaces). */ /* A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ /* or a ratio alpha/beta = w, such that A - w*B is singular. It is */ /* usually represented as the pair (alpha,beta), as there is a */ /* reasonable interpretation for beta=0 or for both being zero. */ /* A pair of matrices (S,T) is in generalized real Schur form if T is */ /* upper triangular with non-negative diagonal and S is block upper */ /* triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond */ /* to real generalized eigenvalues, while 2-by-2 blocks of S will be */ /* "standardized" by making the corresponding elements of T have the */ /* form: */ /* [ a 0 ] */ /* [ 0 b ] */ /* and the pair of corresponding 2-by-2 blocks in S and T will have a */ /* complex conjugate pair of generalized eigenvalues. */ /* Arguments */ /* ========= */ /* JOBVSL (input) CHARACTER*1 */ /* = 'N': do not compute the left Schur vectors; */ /* = 'V': compute the left Schur vectors. */ /* JOBVSR (input) CHARACTER*1 */ /* = 'N': do not compute the right Schur vectors; */ /* = 'V': compute the right Schur vectors. */ /* SORT (input) CHARACTER*1 */ /* Specifies whether or not to order the eigenvalues on the */ /* diagonal of the generalized Schur form. */ /* = 'N': Eigenvalues are not ordered; */ /* = 'S': Eigenvalues are ordered (see SELCTG). */ /* SELCTG (external procedure) LOGICAL FUNCTION of three DOUBLE PRECISION arguments */ /* SELCTG must be declared EXTERNAL in the calling subroutine. */ /* If SORT = 'N', SELCTG is not referenced. */ /* If SORT = 'S', SELCTG is used to select eigenvalues to sort */ /* to the top left of the Schur form. */ /* An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */ /* SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */ /* one of a complex conjugate pair of eigenvalues is selected, */ /* then both complex eigenvalues are selected. */ /* Note that a selected complex eigenvalue may no longer satisfy */ /* SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering, */ /* since ordering may change the value of complex eigenvalues */ /* (especially if the eigenvalue is ill-conditioned), in this */ /* case INFO is set to N+3. */ /* SENSE (input) CHARACTER*1 */ /* Determines which reciprocal condition numbers are computed. */ /* = 'N' : None are computed; */ /* = 'E' : Computed for average of selected eigenvalues only; */ /* = 'V' : Computed for selected deflating subspaces only; */ /* = 'B' : Computed for both. */ /* If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. */ /* N (input) INTEGER */ /* The order of the matrices A, B, VSL, and VSR. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the first of the pair of matrices. */ /* On exit, A has been overwritten by its generalized Schur */ /* form S. */ /* LDA (input) INTEGER */ /* The leading dimension of A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, the second of the pair of matrices. */ /* On exit, B has been overwritten by its generalized Schur */ /* form T. */ /* LDB (input) INTEGER */ /* The leading dimension of B. LDB >= max(1,N). */ /* SDIM (output) INTEGER */ /* If SORT = 'N', SDIM = 0. */ /* If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ /* for which SELCTG is true. (Complex conjugate pairs for which */ /* SELCTG is true for either eigenvalue count as 2.) */ /* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ /* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ /* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i */ /* and BETA(j),j=1,...,N are the diagonals of the complex Schur */ /* form (S,T) that would result if the 2-by-2 diagonal blocks of */ /* the real Schur form of (A,B) were further reduced to */ /* triangular form using 2-by-2 complex unitary transformations. */ /* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ /* positive, then the j-th and (j+1)-st eigenvalues are a */ /* complex conjugate pair, with ALPHAI(j+1) negative. */ /* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ /* may easily over- or underflow, and BETA(j) may even be zero. */ /* Thus, the user should avoid naively computing the ratio. */ /* However, ALPHAR and ALPHAI will be always less than and */ /* usually comparable with norm(A) in magnitude, and BETA always */ /* less than and usually comparable with norm(B). */ /* VSL (output) DOUBLE PRECISION array, dimension (LDVSL,N) */ /* If JOBVSL = 'V', VSL will contain the left Schur vectors. */ /* Not referenced if JOBVSL = 'N'. */ /* LDVSL (input) INTEGER */ /* The leading dimension of the matrix VSL. LDVSL >=1, and */ /* if JOBVSL = 'V', LDVSL >= N. */ /* VSR (output) DOUBLE PRECISION array, dimension (LDVSR,N) */ /* If JOBVSR = 'V', VSR will contain the right Schur vectors. */ /* Not referenced if JOBVSR = 'N'. */ /* LDVSR (input) INTEGER */ /* The leading dimension of the matrix VSR. LDVSR >= 1, and */ /* if JOBVSR = 'V', LDVSR >= N. */ /* RCONDE (output) DOUBLE PRECISION array, dimension ( 2 ) */ /* If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the */ /* reciprocal condition numbers for the average of the selected */ /* eigenvalues. */ /* Not referenced if SENSE = 'N' or 'V'. */ /* RCONDV (output) DOUBLE PRECISION array, dimension ( 2 ) */ /* If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the */ /* reciprocal condition numbers for the selected deflating */ /* subspaces. */ /* Not referenced if SENSE = 'N' or 'E'. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', */ /* LWORK >= max( 8*N, 6*N+16, 2*SDIM*(N-SDIM) ), else */ /* LWORK >= max( 8*N, 6*N+16 ). */ /* Note that 2*SDIM*(N-SDIM) <= N*N/2. */ /* Note also that an error is only returned if */ /* LWORK < max( 8*N, 6*N+16), but if SENSE = 'E' or 'V' or 'B' */ /* this may not be large enough. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the bound on the optimal size of the WORK */ /* array and the minimum size of the IWORK array, returns these */ /* values as the first entries of the WORK and IWORK arrays, and */ /* no error message related to LWORK or LIWORK is issued by */ /* XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise */ /* LIWORK >= N+6. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the bound on the optimal size of the */ /* WORK array and the minimum size of the IWORK array, returns */ /* these values as the first entries of the WORK and IWORK */ /* arrays, and no error message related to LWORK or LIWORK is */ /* issued by XERBLA. */ /* BWORK (workspace) LOGICAL array, dimension (N) */ /* Not referenced if SORT = 'N'. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1,...,N: */ /* The QZ iteration failed. (A,B) are not in Schur */ /* form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */ /* be correct for j=INFO+1,...,N. */ /* > N: =N+1: other than QZ iteration failed in DHGEQZ */ /* =N+2: after reordering, roundoff changed values of */ /* some complex eigenvalues so that leading */ /* eigenvalues in the Generalized Schur form no */ /* longer satisfy SELCTG=.TRUE. This could also */ /* be caused due to scaling. */ /* =N+3: reordering failed in DTGSEN. */ /* Further details */ /* =============== */ /* An approximate (asymptotic) bound on the average absolute error of */ /* the selected eigenvalues is */ /* EPS * norm((A, B)) / RCONDE( 1 ). */ /* An approximate (asymptotic) bound on the maximum angular error in */ /* the computed deflating subspaces is */ /* EPS * norm((A, B)) / RCONDV( 2 ). */ /* See LAPACK User's Guide, section 4.11 for more information. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alphar; --alphai; --beta; vsl_dim1 = *ldvsl; vsl_offset = 1 + vsl_dim1; vsl -= vsl_offset; vsr_dim1 = *ldvsr; vsr_offset = 1 + vsr_dim1; vsr -= vsr_offset; --rconde; --rcondv; --work; --iwork; --bwork; /* Function Body */ if (_starpu_lsame_(jobvsl, "N")) { ijobvl = 1; ilvsl = FALSE_; } else if (_starpu_lsame_(jobvsl, "V")) { ijobvl = 2; ilvsl = TRUE_; } else { ijobvl = -1; ilvsl = FALSE_; } if (_starpu_lsame_(jobvsr, "N")) { ijobvr = 1; ilvsr = FALSE_; } else if (_starpu_lsame_(jobvsr, "V")) { ijobvr = 2; ilvsr = TRUE_; } else { ijobvr = -1; ilvsr = FALSE_; } wantst = _starpu_lsame_(sort, "S"); wantsn = _starpu_lsame_(sense, "N"); wantse = _starpu_lsame_(sense, "E"); wantsv = _starpu_lsame_(sense, "V"); wantsb = _starpu_lsame_(sense, "B"); lquery = *lwork == -1 || *liwork == -1; if (wantsn) { ijob = 0; } else if (wantse) { ijob = 1; } else if (wantsv) { ijob = 2; } else if (wantsb) { ijob = 4; } /* Test the input arguments */ *info = 0; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (! wantst && ! _starpu_lsame_(sort, "N")) { *info = -3; } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! wantsn) { *info = -5; } else if (*n < 0) { *info = -6; } else if (*lda < max(1,*n)) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { *info = -16; } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { *info = -18; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV.) */ if (*info == 0) { if (*n > 0) { /* Computing MAX */ i__1 = *n << 3, i__2 = *n * 6 + 16; minwrk = max(i__1,i__2); maxwrk = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, & c__1, n, &c__0); /* Computing MAX */ i__1 = maxwrk, i__2 = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DORMQR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); if (ilvsl) { /* Computing MAX */ i__1 = maxwrk, i__2 = minwrk - *n + *n * _starpu_ilaenv_(&c__1, "DOR" "GQR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); } lwrk = maxwrk; if (ijob >= 1) { /* Computing MAX */ i__1 = lwrk, i__2 = *n * *n / 2; lwrk = max(i__1,i__2); } } else { minwrk = 1; maxwrk = 1; lwrk = 1; } work[1] = (doublereal) lwrk; if (wantsn || *n == 0) { liwmin = 1; } else { liwmin = *n + 6; } iwork[1] = liwmin; if (*lwork < minwrk && ! lquery) { *info = -22; } else if (*liwork < liwmin && ! lquery) { *info = -24; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGESX", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { *sdim = 0; return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); safmin = _starpu_dlamch_("S"); safmax = 1. / safmin; _starpu_dlabad_(&safmin, &safmax); smlnum = sqrt(safmin) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); ilascl = FALSE_; if (anrm > 0. && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & ierr); } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); ilbscl = FALSE_; if (bnrm > 0. && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & ierr); } /* Permute the matrix to make it more nearly triangular */ /* (Workspace: need 6*N + 2*N for permutation parameters) */ ileft = 1; iright = *n + 1; iwrk = iright + *n; _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ ileft], &work[iright], &work[iwrk], &ierr); /* Reduce B to triangular form (QR decomposition of B) */ /* (Workspace: need N, prefer N*NB) */ irows = ihi + 1 - ilo; icols = *n + 1 - ilo; itau = iwrk; iwrk = itau + irows; i__1 = *lwork + 1 - iwrk; _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ iwrk], &i__1, &ierr); /* Apply the orthogonal transformation to matrix A */ /* (Workspace: need N, prefer N*NB) */ i__1 = *lwork + 1 - iwrk; _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & ierr); /* Initialize VSL */ /* (Workspace: need N, prefer N*NB) */ if (ilvsl) { _starpu_dlaset_("Full", n, n, &c_b42, &c_b43, &vsl[vsl_offset], ldvsl); if (irows > 1) { i__1 = irows - 1; i__2 = irows - 1; _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ ilo + 1 + ilo * vsl_dim1], ldvsl); } i__1 = *lwork + 1 - iwrk; _starpu_dorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & work[itau], &work[iwrk], &i__1, &ierr); } /* Initialize VSR */ if (ilvsr) { _starpu_dlaset_("Full", n, n, &c_b42, &c_b43, &vsr[vsr_offset], ldvsr); } /* Reduce to generalized Hessenberg form */ /* (Workspace: none needed) */ _starpu_dgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); *sdim = 0; /* Perform QZ algorithm, computing Schur vectors if desired */ /* (Workspace: need N) */ iwrk = itau; i__1 = *lwork + 1 - iwrk; _starpu_dhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr); if (ierr != 0) { if (ierr > 0 && ierr <= *n) { *info = ierr; } else if (ierr > *n && ierr <= *n << 1) { *info = ierr - *n; } else { *info = *n + 1; } goto L60; } /* Sort eigenvalues ALPHA/BETA and compute the reciprocal of */ /* condition number(s) */ /* (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) ) */ /* otherwise, need 8*(N+1) ) */ if (wantst) { /* Undo scaling on eigenvalues before SELCTGing */ if (ilascl) { _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &ierr); } if (ilbscl) { _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &ierr); } /* Select eigenvalues */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); /* L10: */ } /* Reorder eigenvalues, transform Generalized Schur vectors, and */ /* compute reciprocal condition numbers */ i__1 = *lwork - iwrk + 1; _starpu_dtgsen_(&ijob, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pl, &pr, dif, &work[iwrk], &i__1, &iwork[1], liwork, &ierr); if (ijob >= 1) { /* Computing MAX */ i__1 = maxwrk, i__2 = (*sdim << 1) * (*n - *sdim); maxwrk = max(i__1,i__2); } if (ierr == -22) { /* not enough real workspace */ *info = -22; } else { if (ijob == 1 || ijob == 4) { rconde[1] = pl; rconde[2] = pr; } if (ijob == 2 || ijob == 4) { rcondv[1] = dif[0]; rcondv[2] = dif[1]; } if (ierr == 1) { *info = *n + 3; } } } /* Apply permutation to VSL and VSR */ /* (Workspace: none needed) */ if (ilvsl) { _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ vsl_offset], ldvsl, &ierr); } if (ilvsr) { _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ vsr_offset], ldvsr, &ierr); } /* Check if unscaling would cause over/underflow, if so, rescale */ /* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */ /* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */ if (ilascl) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (alphai[i__] != 0.) { if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[ i__] > anrm / anrmto) { work[1] = (d__1 = a[i__ + i__ * a_dim1] / alphar[i__], abs(d__1)); beta[i__] *= work[1]; alphar[i__] *= work[1]; alphai[i__] *= work[1]; } else if (alphai[i__] / safmax > anrmto / anrm || safmin / alphai[i__] > anrm / anrmto) { work[1] = (d__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[ i__], abs(d__1)); beta[i__] *= work[1]; alphar[i__] *= work[1]; alphai[i__] *= work[1]; } } /* L20: */ } } if (ilbscl) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (alphai[i__] != 0.) { if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] > bnrm / bnrmto) { work[1] = (d__1 = b[i__ + i__ * b_dim1] / beta[i__], abs( d__1)); beta[i__] *= work[1]; alphar[i__] *= work[1]; alphai[i__] *= work[1]; } } /* L30: */ } } /* Undo scaling */ if (ilascl) { _starpu_dlascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & ierr); } if (ilbscl) { _starpu_dlascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & ierr); } if (wantst) { /* Check if reordering is correct */ lastsl = TRUE_; lst2sl = TRUE_; *sdim = 0; ip = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); if (alphai[i__] == 0.) { if (cursl) { ++(*sdim); } ip = 0; if (cursl && ! lastsl) { *info = *n + 2; } } else { if (ip == 1) { /* Last eigenvalue of conjugate pair */ cursl = cursl || lastsl; lastsl = cursl; if (cursl) { *sdim += 2; } ip = -1; if (cursl && ! lst2sl) { *info = *n + 2; } } else { /* First eigenvalue of conjugate pair */ ip = 1; } } lst2sl = lastsl; lastsl = cursl; /* L50: */ } } L60: work[1] = (doublereal) maxwrk; iwork[1] = liwmin; return 0; /* End of DGGESX */ } /* _starpu_dggesx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggev.c000066400000000000000000000470571507764646700205050ustar00rootroot00000000000000/* dggev.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static integer c_n1 = -1; static doublereal c_b36 = 0.; static doublereal c_b37 = 1.; /* Subroutine */ int _starpu_dggev_(char *jobvl, char *jobvr, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer jc, in, jr, ihi, ilo; doublereal eps; logical ilv; doublereal anrm, bnrm; integer ierr, itau; doublereal temp; logical ilvl, ilvr; integer iwrk; extern logical _starpu_lsame_(char *, char *); integer ileft, icols, irows; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dggbak_( char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); logical ilascl, ilbscl; extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dtgevc_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); logical ldumma[1]; char chtemp[1]; doublereal bignum; extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ijobvl, iright, ijobvr; extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); doublereal anrmto, bnrmto; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer minwrk, maxwrk; doublereal smlnum; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B) */ /* the generalized eigenvalues, and optionally, the left and/or right */ /* generalized eigenvectors. */ /* A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ /* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ /* singular. It is usually represented as the pair (alpha,beta), as */ /* there is a reasonable interpretation for beta=0, and even for both */ /* being zero. */ /* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */ /* of (A,B) satisfies */ /* A * v(j) = lambda(j) * B * v(j). */ /* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */ /* of (A,B) satisfies */ /* u(j)**H * A = lambda(j) * u(j)**H * B . */ /* where u(j)**H is the conjugate-transpose of u(j). */ /* Arguments */ /* ========= */ /* JOBVL (input) CHARACTER*1 */ /* = 'N': do not compute the left generalized eigenvectors; */ /* = 'V': compute the left generalized eigenvectors. */ /* JOBVR (input) CHARACTER*1 */ /* = 'N': do not compute the right generalized eigenvectors; */ /* = 'V': compute the right generalized eigenvectors. */ /* N (input) INTEGER */ /* The order of the matrices A, B, VL, and VR. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the matrix A in the pair (A,B). */ /* On exit, A has been overwritten. */ /* LDA (input) INTEGER */ /* The leading dimension of A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, the matrix B in the pair (A,B). */ /* On exit, B has been overwritten. */ /* LDB (input) INTEGER */ /* The leading dimension of B. LDB >= max(1,N). */ /* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ /* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ /* be the generalized eigenvalues. If ALPHAI(j) is zero, then */ /* the j-th eigenvalue is real; if positive, then the j-th and */ /* (j+1)-st eigenvalues are a complex conjugate pair, with */ /* ALPHAI(j+1) negative. */ /* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ /* may easily over- or underflow, and BETA(j) may even be zero. */ /* Thus, the user should avoid naively computing the ratio */ /* alpha/beta. However, ALPHAR and ALPHAI will be always less */ /* than and usually comparable with norm(A) in magnitude, and */ /* BETA always less than and usually comparable with norm(B). */ /* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ /* If JOBVL = 'V', the left eigenvectors u(j) are stored one */ /* after another in the columns of VL, in the same order as */ /* their eigenvalues. If the j-th eigenvalue is real, then */ /* u(j) = VL(:,j), the j-th column of VL. If the j-th and */ /* (j+1)-th eigenvalues form a complex conjugate pair, then */ /* u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */ /* Each eigenvector is scaled so the largest component has */ /* abs(real part)+abs(imag. part)=1. */ /* Not referenced if JOBVL = 'N'. */ /* LDVL (input) INTEGER */ /* The leading dimension of the matrix VL. LDVL >= 1, and */ /* if JOBVL = 'V', LDVL >= N. */ /* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ /* If JOBVR = 'V', the right eigenvectors v(j) are stored one */ /* after another in the columns of VR, in the same order as */ /* their eigenvalues. If the j-th eigenvalue is real, then */ /* v(j) = VR(:,j), the j-th column of VR. If the j-th and */ /* (j+1)-th eigenvalues form a complex conjugate pair, then */ /* v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */ /* Each eigenvector is scaled so the largest component has */ /* abs(real part)+abs(imag. part)=1. */ /* Not referenced if JOBVR = 'N'. */ /* LDVR (input) INTEGER */ /* The leading dimension of the matrix VR. LDVR >= 1, and */ /* if JOBVR = 'V', LDVR >= N. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,8*N). */ /* For good performance, LWORK must generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1,...,N: */ /* The QZ iteration failed. No eigenvectors have been */ /* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */ /* should be correct for j=INFO+1,...,N. */ /* > N: =N+1: other than QZ iteration failed in DHGEQZ. */ /* =N+2: error return from DTGEVC. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alphar; --alphai; --beta; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; /* Function Body */ if (_starpu_lsame_(jobvl, "N")) { ijobvl = 1; ilvl = FALSE_; } else if (_starpu_lsame_(jobvl, "V")) { ijobvl = 2; ilvl = TRUE_; } else { ijobvl = -1; ilvl = FALSE_; } if (_starpu_lsame_(jobvr, "N")) { ijobvr = 1; ilvr = FALSE_; } else if (_starpu_lsame_(jobvr, "V")) { ijobvr = 2; ilvr = TRUE_; } else { ijobvr = -1; ilvr = FALSE_; } ilv = ilvl || ilvr; /* Test the input arguments */ *info = 0; lquery = *lwork == -1; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldvl < 1 || ilvl && *ldvl < *n) { *info = -12; } else if (*ldvr < 1 || ilvr && *ldvr < *n) { *info = -14; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV. The workspace is */ /* computed assuming ILO = 1 and IHI = N, the worst case.) */ if (*info == 0) { /* Computing MAX */ i__1 = 1, i__2 = *n << 3; minwrk = max(i__1,i__2); /* Computing MAX */ i__1 = 1, i__2 = *n * (_starpu_ilaenv_(&c__1, "DGEQRF", " ", n, &c__1, n, & c__0) + 7); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * (_starpu_ilaenv_(&c__1, "DORMQR", " ", n, &c__1, n, &c__0) + 7); maxwrk = max(i__1,i__2); if (ilvl) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n * (_starpu_ilaenv_(&c__1, "DORGQR", " ", n, & c__1, n, &c_n1) + 7); maxwrk = max(i__1,i__2); } work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -16; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGEV ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); ilascl = FALSE_; if (anrm > 0. && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & ierr); } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); ilbscl = FALSE_; if (bnrm > 0. && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & ierr); } /* Permute the matrices A, B to isolate eigenvalues if possible */ /* (Workspace: need 6*N) */ ileft = 1; iright = *n + 1; iwrk = iright + *n; _starpu_dggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ ileft], &work[iright], &work[iwrk], &ierr); /* Reduce B to triangular form (QR decomposition of B) */ /* (Workspace: need N, prefer N*NB) */ irows = ihi + 1 - ilo; if (ilv) { icols = *n + 1 - ilo; } else { icols = irows; } itau = iwrk; iwrk = itau + irows; i__1 = *lwork + 1 - iwrk; _starpu_dgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ iwrk], &i__1, &ierr); /* Apply the orthogonal transformation to matrix A */ /* (Workspace: need N, prefer N*NB) */ i__1 = *lwork + 1 - iwrk; _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & ierr); /* Initialize VL */ /* (Workspace: need N, prefer N*NB) */ if (ilvl) { _starpu_dlaset_("Full", n, n, &c_b36, &c_b37, &vl[vl_offset], ldvl) ; if (irows > 1) { i__1 = irows - 1; i__2 = irows - 1; _starpu_dlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ ilo + 1 + ilo * vl_dim1], ldvl); } i__1 = *lwork + 1 - iwrk; _starpu_dorgqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ itau], &work[iwrk], &i__1, &ierr); } /* Initialize VR */ if (ilvr) { _starpu_dlaset_("Full", n, n, &c_b36, &c_b37, &vr[vr_offset], ldvr) ; } /* Reduce to generalized Hessenberg form */ /* (Workspace: none needed) */ if (ilv) { /* Eigenvectors requested -- work on whole matrix. */ _starpu_dgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); } else { _starpu_dgghrd_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ vr_offset], ldvr, &ierr); } /* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ /* Schur forms and Schur vectors) */ /* (Workspace: need N) */ iwrk = itau; if (ilv) { *(unsigned char *)chtemp = 'S'; } else { *(unsigned char *)chtemp = 'E'; } i__1 = *lwork + 1 - iwrk; _starpu_dhgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[iwrk], &i__1, &ierr); if (ierr != 0) { if (ierr > 0 && ierr <= *n) { *info = ierr; } else if (ierr > *n && ierr <= *n << 1) { *info = ierr - *n; } else { *info = *n + 1; } goto L110; } /* Compute Eigenvectors */ /* (Workspace: need 6*N) */ if (ilv) { if (ilvl) { if (ilvr) { *(unsigned char *)chtemp = 'B'; } else { *(unsigned char *)chtemp = 'L'; } } else { *(unsigned char *)chtemp = 'R'; } _starpu_dtgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ iwrk], &ierr); if (ierr != 0) { *info = *n + 2; goto L110; } /* Undo balancing on VL and VR and normalization */ /* (Workspace: none needed) */ if (ilvl) { _starpu_dggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, & vl[vl_offset], ldvl, &ierr); i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { if (alphai[jc] < 0.) { goto L50; } temp = 0.; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__2 = temp, d__3 = (d__1 = vl[jr + jc * vl_dim1], abs(d__1)); temp = max(d__2,d__3); /* L10: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__3 = temp, d__4 = (d__1 = vl[jr + jc * vl_dim1], abs(d__1)) + (d__2 = vl[jr + (jc + 1) * vl_dim1], abs(d__2)); temp = max(d__3,d__4); /* L20: */ } } if (temp < smlnum) { goto L50; } temp = 1. / temp; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vl[jr + jc * vl_dim1] *= temp; /* L30: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vl[jr + jc * vl_dim1] *= temp; vl[jr + (jc + 1) * vl_dim1] *= temp; /* L40: */ } } L50: ; } } if (ilvr) { _starpu_dggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, & vr[vr_offset], ldvr, &ierr); i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { if (alphai[jc] < 0.) { goto L100; } temp = 0.; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__2 = temp, d__3 = (d__1 = vr[jr + jc * vr_dim1], abs(d__1)); temp = max(d__2,d__3); /* L60: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__3 = temp, d__4 = (d__1 = vr[jr + jc * vr_dim1], abs(d__1)) + (d__2 = vr[jr + (jc + 1) * vr_dim1], abs(d__2)); temp = max(d__3,d__4); /* L70: */ } } if (temp < smlnum) { goto L100; } temp = 1. / temp; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr[jr + jc * vr_dim1] *= temp; /* L80: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr[jr + jc * vr_dim1] *= temp; vr[jr + (jc + 1) * vr_dim1] *= temp; /* L90: */ } } L100: ; } } /* End of eigenvector calculation */ } /* Undo scaling if necessary */ if (ilascl) { _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & ierr); } if (ilbscl) { _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & ierr); } L110: work[1] = (doublereal) maxwrk; return 0; /* End of DGGEV */ } /* _starpu_dggev_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggevx.c000066400000000000000000000717501507764646700206720ustar00rootroot00000000000000/* dggevx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static doublereal c_b59 = 0.; static doublereal c_b60 = 1.; /* Subroutine */ int _starpu_dggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * rcondv, doublereal *work, integer *lwork, integer *iwork, logical * bwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, m, jc, in, mm, jr; doublereal eps; logical ilv, pair; doublereal anrm, bnrm; integer ierr, itau; doublereal temp; logical ilvl, ilvr; integer iwrk, iwrk1; extern logical _starpu_lsame_(char *, char *); integer icols; logical noscl; integer irows; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dggbak_( char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dggbal_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dgghrd_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); logical ilascl, ilbscl; extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); logical ldumma[1]; char chtemp[1]; doublereal bignum; extern /* Subroutine */ int _starpu_dhgeqz_(char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dtgevc_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); integer ijobvl; extern /* Subroutine */ int _starpu_dtgsna_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ijobvr; logical wantsb; extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); doublereal anrmto; logical wantse; doublereal bnrmto; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer minwrk, maxwrk; logical wantsn; doublereal smlnum; logical lquery, wantsv; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B) */ /* the generalized eigenvalues, and optionally, the left and/or right */ /* generalized eigenvectors. */ /* Optionally also, it computes a balancing transformation to improve */ /* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ /* LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for */ /* the eigenvalues (RCONDE), and reciprocal condition numbers for the */ /* right eigenvectors (RCONDV). */ /* A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ /* lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ /* singular. It is usually represented as the pair (alpha,beta), as */ /* there is a reasonable interpretation for beta=0, and even for both */ /* being zero. */ /* The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */ /* of (A,B) satisfies */ /* A * v(j) = lambda(j) * B * v(j) . */ /* The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */ /* of (A,B) satisfies */ /* u(j)**H * A = lambda(j) * u(j)**H * B. */ /* where u(j)**H is the conjugate-transpose of u(j). */ /* Arguments */ /* ========= */ /* BALANC (input) CHARACTER*1 */ /* Specifies the balance option to be performed. */ /* = 'N': do not diagonally scale or permute; */ /* = 'P': permute only; */ /* = 'S': scale only; */ /* = 'B': both permute and scale. */ /* Computed reciprocal condition numbers will be for the */ /* matrices after permuting and/or balancing. Permuting does */ /* not change condition numbers (in exact arithmetic), but */ /* balancing does. */ /* JOBVL (input) CHARACTER*1 */ /* = 'N': do not compute the left generalized eigenvectors; */ /* = 'V': compute the left generalized eigenvectors. */ /* JOBVR (input) CHARACTER*1 */ /* = 'N': do not compute the right generalized eigenvectors; */ /* = 'V': compute the right generalized eigenvectors. */ /* SENSE (input) CHARACTER*1 */ /* Determines which reciprocal condition numbers are computed. */ /* = 'N': none are computed; */ /* = 'E': computed for eigenvalues only; */ /* = 'V': computed for eigenvectors only; */ /* = 'B': computed for eigenvalues and eigenvectors. */ /* N (input) INTEGER */ /* The order of the matrices A, B, VL, and VR. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the matrix A in the pair (A,B). */ /* On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' */ /* or both, then A contains the first part of the real Schur */ /* form of the "balanced" versions of the input A and B. */ /* LDA (input) INTEGER */ /* The leading dimension of A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, the matrix B in the pair (A,B). */ /* On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' */ /* or both, then B contains the second part of the real Schur */ /* form of the "balanced" versions of the input A and B. */ /* LDB (input) INTEGER */ /* The leading dimension of B. LDB >= max(1,N). */ /* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ /* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ /* be the generalized eigenvalues. If ALPHAI(j) is zero, then */ /* the j-th eigenvalue is real; if positive, then the j-th and */ /* (j+1)-st eigenvalues are a complex conjugate pair, with */ /* ALPHAI(j+1) negative. */ /* Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ /* may easily over- or underflow, and BETA(j) may even be zero. */ /* Thus, the user should avoid naively computing the ratio */ /* ALPHA/BETA. However, ALPHAR and ALPHAI will be always less */ /* than and usually comparable with norm(A) in magnitude, and */ /* BETA always less than and usually comparable with norm(B). */ /* VL (output) DOUBLE PRECISION array, dimension (LDVL,N) */ /* If JOBVL = 'V', the left eigenvectors u(j) are stored one */ /* after another in the columns of VL, in the same order as */ /* their eigenvalues. If the j-th eigenvalue is real, then */ /* u(j) = VL(:,j), the j-th column of VL. If the j-th and */ /* (j+1)-th eigenvalues form a complex conjugate pair, then */ /* u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */ /* Each eigenvector will be scaled so the largest component have */ /* abs(real part) + abs(imag. part) = 1. */ /* Not referenced if JOBVL = 'N'. */ /* LDVL (input) INTEGER */ /* The leading dimension of the matrix VL. LDVL >= 1, and */ /* if JOBVL = 'V', LDVL >= N. */ /* VR (output) DOUBLE PRECISION array, dimension (LDVR,N) */ /* If JOBVR = 'V', the right eigenvectors v(j) are stored one */ /* after another in the columns of VR, in the same order as */ /* their eigenvalues. If the j-th eigenvalue is real, then */ /* v(j) = VR(:,j), the j-th column of VR. If the j-th and */ /* (j+1)-th eigenvalues form a complex conjugate pair, then */ /* v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */ /* Each eigenvector will be scaled so the largest component have */ /* abs(real part) + abs(imag. part) = 1. */ /* Not referenced if JOBVR = 'N'. */ /* LDVR (input) INTEGER */ /* The leading dimension of the matrix VR. LDVR >= 1, and */ /* if JOBVR = 'V', LDVR >= N. */ /* ILO (output) INTEGER */ /* IHI (output) INTEGER */ /* ILO and IHI are integer values such that on exit */ /* A(i,j) = 0 and B(i,j) = 0 if i > j and */ /* j = 1,...,ILO-1 or i = IHI+1,...,N. */ /* If BALANC = 'N' or 'S', ILO = 1 and IHI = N. */ /* LSCALE (output) DOUBLE PRECISION array, dimension (N) */ /* Details of the permutations and scaling factors applied */ /* to the left side of A and B. If PL(j) is the index of the */ /* row interchanged with row j, and DL(j) is the scaling */ /* factor applied to row j, then */ /* LSCALE(j) = PL(j) for j = 1,...,ILO-1 */ /* = DL(j) for j = ILO,...,IHI */ /* = PL(j) for j = IHI+1,...,N. */ /* The order in which the interchanges are made is N to IHI+1, */ /* then 1 to ILO-1. */ /* RSCALE (output) DOUBLE PRECISION array, dimension (N) */ /* Details of the permutations and scaling factors applied */ /* to the right side of A and B. If PR(j) is the index of the */ /* column interchanged with column j, and DR(j) is the scaling */ /* factor applied to column j, then */ /* RSCALE(j) = PR(j) for j = 1,...,ILO-1 */ /* = DR(j) for j = ILO,...,IHI */ /* = PR(j) for j = IHI+1,...,N */ /* The order in which the interchanges are made is N to IHI+1, */ /* then 1 to ILO-1. */ /* ABNRM (output) DOUBLE PRECISION */ /* The one-norm of the balanced matrix A. */ /* BBNRM (output) DOUBLE PRECISION */ /* The one-norm of the balanced matrix B. */ /* RCONDE (output) DOUBLE PRECISION array, dimension (N) */ /* If SENSE = 'E' or 'B', the reciprocal condition numbers of */ /* the eigenvalues, stored in consecutive elements of the array. */ /* For a complex conjugate pair of eigenvalues two consecutive */ /* elements of RCONDE are set to the same value. Thus RCONDE(j), */ /* RCONDV(j), and the j-th columns of VL and VR all correspond */ /* to the j-th eigenpair. */ /* If SENSE = 'N or 'V', RCONDE is not referenced. */ /* RCONDV (output) DOUBLE PRECISION array, dimension (N) */ /* If SENSE = 'V' or 'B', the estimated reciprocal condition */ /* numbers of the eigenvectors, stored in consecutive elements */ /* of the array. For a complex eigenvector two consecutive */ /* elements of RCONDV are set to the same value. If the */ /* eigenvalues cannot be reordered to compute RCONDV(j), */ /* RCONDV(j) is set to 0; this can only occur when the true */ /* value would be very small anyway. */ /* If SENSE = 'N' or 'E', RCONDV is not referenced. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,2*N). */ /* If BALANC = 'S' or 'B', or JOBVL = 'V', or JOBVR = 'V', */ /* LWORK >= max(1,6*N). */ /* If SENSE = 'E' or 'B', LWORK >= max(1,10*N). */ /* If SENSE = 'V' or 'B', LWORK >= 2*N*N+8*N+16. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (N+6) */ /* If SENSE = 'E', IWORK is not referenced. */ /* BWORK (workspace) LOGICAL array, dimension (N) */ /* If SENSE = 'N', BWORK is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1,...,N: */ /* The QZ iteration failed. No eigenvectors have been */ /* calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */ /* should be correct for j=INFO+1,...,N. */ /* > N: =N+1: other than QZ iteration failed in DHGEQZ. */ /* =N+2: error return from DTGEVC. */ /* Further Details */ /* =============== */ /* Balancing a matrix pair (A,B) includes, first, permuting rows and */ /* columns to isolate eigenvalues, second, applying diagonal similarity */ /* transformation to the rows and columns to make the rows and columns */ /* as close in norm as possible. The computed reciprocal condition */ /* numbers correspond to the balanced matrix. Permuting rows and columns */ /* will not change the condition numbers (in exact arithmetic) but */ /* diagonal scaling will. For further explanation of balancing, see */ /* section 4.11.1.2 of LAPACK Users' Guide. */ /* An approximate error bound on the chordal distance between the i-th */ /* computed generalized eigenvalue w and the corresponding exact */ /* eigenvalue lambda is */ /* chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) */ /* An approximate error bound for the angle between the i-th computed */ /* eigenvector VL(i) or VR(i) is given by */ /* EPS * norm(ABNRM, BBNRM) / DIF(i). */ /* For further explanation of the reciprocal condition numbers RCONDE */ /* and RCONDV, see section 4.11 of LAPACK User's Guide. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alphar; --alphai; --beta; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --lscale; --rscale; --rconde; --rcondv; --work; --iwork; --bwork; /* Function Body */ if (_starpu_lsame_(jobvl, "N")) { ijobvl = 1; ilvl = FALSE_; } else if (_starpu_lsame_(jobvl, "V")) { ijobvl = 2; ilvl = TRUE_; } else { ijobvl = -1; ilvl = FALSE_; } if (_starpu_lsame_(jobvr, "N")) { ijobvr = 1; ilvr = FALSE_; } else if (_starpu_lsame_(jobvr, "V")) { ijobvr = 2; ilvr = TRUE_; } else { ijobvr = -1; ilvr = FALSE_; } ilv = ilvl || ilvr; noscl = _starpu_lsame_(balanc, "N") || _starpu_lsame_(balanc, "P"); wantsn = _starpu_lsame_(sense, "N"); wantse = _starpu_lsame_(sense, "E"); wantsv = _starpu_lsame_(sense, "V"); wantsb = _starpu_lsame_(sense, "B"); /* Test the input arguments */ *info = 0; lquery = *lwork == -1; if (! (_starpu_lsame_(balanc, "N") || _starpu_lsame_(balanc, "S") || _starpu_lsame_(balanc, "P") || _starpu_lsame_(balanc, "B"))) { *info = -1; } else if (ijobvl <= 0) { *info = -2; } else if (ijobvr <= 0) { *info = -3; } else if (! (wantsn || wantse || wantsb || wantsv)) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldvl < 1 || ilvl && *ldvl < *n) { *info = -14; } else if (*ldvr < 1 || ilvr && *ldvr < *n) { *info = -16; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV. The workspace is */ /* computed assuming ILO = 1 and IHI = N, the worst case.) */ if (*info == 0) { if (*n == 0) { minwrk = 1; maxwrk = 1; } else { if (noscl && ! ilv) { minwrk = *n << 1; } else { minwrk = *n * 6; } if (wantse || wantsb) { minwrk = *n * 10; } if (wantsv || wantsb) { /* Computing MAX */ i__1 = minwrk, i__2 = (*n << 1) * (*n + 4) + 16; minwrk = max(i__1,i__2); } maxwrk = minwrk; /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, & c__1, n, &c__0); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DORMQR", " ", n, & c__1, n, &c__0); maxwrk = max(i__1,i__2); if (ilvl) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DORGQR", " ", n, &c__1, n, &c__0); maxwrk = max(i__1,i__2); } } work[1] = (doublereal) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -26; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGEVX", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = _starpu_dlange_("M", n, n, &a[a_offset], lda, &work[1]); ilascl = FALSE_; if (anrm > 0. && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { _starpu_dlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & ierr); } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = _starpu_dlange_("M", n, n, &b[b_offset], ldb, &work[1]); ilbscl = FALSE_; if (bnrm > 0. && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & ierr); } /* Permute and/or balance the matrix pair (A,B) */ /* (Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) */ _starpu_dggbal_(balanc, n, &a[a_offset], lda, &b[b_offset], ldb, ilo, ihi, & lscale[1], &rscale[1], &work[1], &ierr); /* Compute ABNRM and BBNRM */ *abnrm = _starpu_dlange_("1", n, n, &a[a_offset], lda, &work[1]); if (ilascl) { work[1] = *abnrm; _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, &c__1, &c__1, &work[1], & c__1, &ierr); *abnrm = work[1]; } *bbnrm = _starpu_dlange_("1", n, n, &b[b_offset], ldb, &work[1]); if (ilbscl) { work[1] = *bbnrm; _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, &c__1, &c__1, &work[1], & c__1, &ierr); *bbnrm = work[1]; } /* Reduce B to triangular form (QR decomposition of B) */ /* (Workspace: need N, prefer N*NB ) */ irows = *ihi + 1 - *ilo; if (ilv || ! wantsn) { icols = *n + 1 - *ilo; } else { icols = irows; } itau = 1; iwrk = itau + irows; i__1 = *lwork + 1 - iwrk; _starpu_dgeqrf_(&irows, &icols, &b[*ilo + *ilo * b_dim1], ldb, &work[itau], &work[ iwrk], &i__1, &ierr); /* Apply the orthogonal transformation to A */ /* (Workspace: need N, prefer N*NB) */ i__1 = *lwork + 1 - iwrk; _starpu_dormqr_("L", "T", &irows, &icols, &irows, &b[*ilo + *ilo * b_dim1], ldb, & work[itau], &a[*ilo + *ilo * a_dim1], lda, &work[iwrk], &i__1, & ierr); /* Initialize VL and/or VR */ /* (Workspace: need N, prefer N*NB) */ if (ilvl) { _starpu_dlaset_("Full", n, n, &c_b59, &c_b60, &vl[vl_offset], ldvl) ; if (irows > 1) { i__1 = irows - 1; i__2 = irows - 1; _starpu_dlacpy_("L", &i__1, &i__2, &b[*ilo + 1 + *ilo * b_dim1], ldb, &vl[ *ilo + 1 + *ilo * vl_dim1], ldvl); } i__1 = *lwork + 1 - iwrk; _starpu_dorgqr_(&irows, &irows, &irows, &vl[*ilo + *ilo * vl_dim1], ldvl, & work[itau], &work[iwrk], &i__1, &ierr); } if (ilvr) { _starpu_dlaset_("Full", n, n, &c_b59, &c_b60, &vr[vr_offset], ldvr) ; } /* Reduce to generalized Hessenberg form */ /* (Workspace: none needed) */ if (ilv || ! wantsn) { /* Eigenvectors requested -- work on whole matrix. */ _starpu_dgghrd_(jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); } else { _starpu_dgghrd_("N", "N", &irows, &c__1, &irows, &a[*ilo + *ilo * a_dim1], lda, &b[*ilo + *ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ vr_offset], ldvr, &ierr); } /* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ /* Schur forms and Schur vectors) */ /* (Workspace: need N) */ if (ilv || ! wantsn) { *(unsigned char *)chtemp = 'S'; } else { *(unsigned char *)chtemp = 'E'; } _starpu_dhgeqz_(chtemp, jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset] , ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], ldvl, & vr[vr_offset], ldvr, &work[1], lwork, &ierr); if (ierr != 0) { if (ierr > 0 && ierr <= *n) { *info = ierr; } else if (ierr > *n && ierr <= *n << 1) { *info = ierr - *n; } else { *info = *n + 1; } goto L130; } /* Compute Eigenvectors and estimate condition numbers if desired */ /* (Workspace: DTGEVC: need 6*N */ /* DTGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B', */ /* need N otherwise ) */ if (ilv || ! wantsn) { if (ilv) { if (ilvl) { if (ilvr) { *(unsigned char *)chtemp = 'B'; } else { *(unsigned char *)chtemp = 'L'; } } else { *(unsigned char *)chtemp = 'R'; } _starpu_dtgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, & work[1], &ierr); if (ierr != 0) { *info = *n + 2; goto L130; } } if (! wantsn) { /* compute eigenvectors (DTGEVC) and estimate condition */ /* numbers (DTGSNA). Note that the definition of the condition */ /* number is not invariant under transformation (u,v) to */ /* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized */ /* Schur form (S,T), Q and Z are orthogonal matrices. In order */ /* to avoid using extra 2*N*N workspace, we have to recalculate */ /* eigenvectors and estimate one condition numbers at a time. */ pair = FALSE_; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (pair) { pair = FALSE_; goto L20; } mm = 1; if (i__ < *n) { if (a[i__ + 1 + i__ * a_dim1] != 0.) { pair = TRUE_; mm = 2; } } i__2 = *n; for (j = 1; j <= i__2; ++j) { bwork[j] = FALSE_; /* L10: */ } if (mm == 1) { bwork[i__] = TRUE_; } else if (mm == 2) { bwork[i__] = TRUE_; bwork[i__ + 1] = TRUE_; } iwrk = mm * *n + 1; iwrk1 = iwrk + mm * *n; /* Compute a pair of left and right eigenvectors. */ /* (compute workspace: need up to 4*N + 6*N) */ if (wantse || wantsb) { _starpu_dtgevc_("B", "S", &bwork[1], n, &a[a_offset], lda, &b[ b_offset], ldb, &work[1], n, &work[iwrk], n, &mm, &m, &work[iwrk1], &ierr); if (ierr != 0) { *info = *n + 2; goto L130; } } i__2 = *lwork - iwrk1 + 1; _starpu_dtgsna_(sense, "S", &bwork[1], n, &a[a_offset], lda, &b[ b_offset], ldb, &work[1], n, &work[iwrk], n, &rconde[ i__], &rcondv[i__], &mm, &m, &work[iwrk1], &i__2, & iwork[1], &ierr); L20: ; } } } /* Undo balancing on VL and VR and normalization */ /* (Workspace: none needed) */ if (ilvl) { _starpu_dggbak_(balanc, "L", n, ilo, ihi, &lscale[1], &rscale[1], n, &vl[ vl_offset], ldvl, &ierr); i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { if (alphai[jc] < 0.) { goto L70; } temp = 0.; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__2 = temp, d__3 = (d__1 = vl[jr + jc * vl_dim1], abs( d__1)); temp = max(d__2,d__3); /* L30: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__3 = temp, d__4 = (d__1 = vl[jr + jc * vl_dim1], abs( d__1)) + (d__2 = vl[jr + (jc + 1) * vl_dim1], abs( d__2)); temp = max(d__3,d__4); /* L40: */ } } if (temp < smlnum) { goto L70; } temp = 1. / temp; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vl[jr + jc * vl_dim1] *= temp; /* L50: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vl[jr + jc * vl_dim1] *= temp; vl[jr + (jc + 1) * vl_dim1] *= temp; /* L60: */ } } L70: ; } } if (ilvr) { _starpu_dggbak_(balanc, "R", n, ilo, ihi, &lscale[1], &rscale[1], n, &vr[ vr_offset], ldvr, &ierr); i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { if (alphai[jc] < 0.) { goto L120; } temp = 0.; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__2 = temp, d__3 = (d__1 = vr[jr + jc * vr_dim1], abs( d__1)); temp = max(d__2,d__3); /* L80: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ d__3 = temp, d__4 = (d__1 = vr[jr + jc * vr_dim1], abs( d__1)) + (d__2 = vr[jr + (jc + 1) * vr_dim1], abs( d__2)); temp = max(d__3,d__4); /* L90: */ } } if (temp < smlnum) { goto L120; } temp = 1. / temp; if (alphai[jc] == 0.) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr[jr + jc * vr_dim1] *= temp; /* L100: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr[jr + jc * vr_dim1] *= temp; vr[jr + (jc + 1) * vr_dim1] *= temp; /* L110: */ } } L120: ; } } /* Undo scaling if necessary */ if (ilascl) { _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & ierr); _starpu_dlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & ierr); } if (ilbscl) { _starpu_dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & ierr); } L130: work[1] = (doublereal) maxwrk; return 0; /* End of DGGEVX */ } /* _starpu_dggevx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggglm.c000066400000000000000000000250721507764646700206430ustar00rootroot00000000000000/* dggglm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b32 = -1.; static doublereal c_b34 = 1.; /* Subroutine */ int _starpu_dggglm_(integer *n, integer *m, integer *p, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *d__, doublereal *x, doublereal *y, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, nb, np, nb1, nb2, nb3, nb4, lopt; extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dggqrf_( integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer lwkmin; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dormrq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; extern /* Subroutine */ int _starpu_dtrtrs_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGGLM solves a general Gauss-Markov linear model (GLM) problem: */ /* minimize || y ||_2 subject to d = A*x + B*y */ /* x */ /* where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */ /* given N-vector. It is assumed that M <= N <= M+P, and */ /* rank(A) = M and rank( A B ) = N. */ /* Under these assumptions, the constrained equation is always */ /* consistent, and there is a unique solution x and a minimal 2-norm */ /* solution y, which is obtained using a generalized QR factorization */ /* of the matrices (A, B) given by */ /* A = Q*(R), B = Q*T*Z. */ /* (0) */ /* In particular, if matrix B is square nonsingular, then the problem */ /* GLM is equivalent to the following weighted linear least squares */ /* problem */ /* minimize || inv(B)*(d-A*x) ||_2 */ /* x */ /* where inv(B) denotes the inverse of B. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of rows of the matrices A and B. N >= 0. */ /* M (input) INTEGER */ /* The number of columns of the matrix A. 0 <= M <= N. */ /* P (input) INTEGER */ /* The number of columns of the matrix B. P >= N-M. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,M) */ /* On entry, the N-by-M matrix A. */ /* On exit, the upper triangular part of the array A contains */ /* the M-by-M upper triangular matrix R. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,P) */ /* On entry, the N-by-P matrix B. */ /* On exit, if N <= P, the upper triangle of the subarray */ /* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ /* if N > P, the elements on and above the (N-P)th subdiagonal */ /* contain the N-by-P upper trapezoidal matrix T. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, D is the left hand side of the GLM equation. */ /* On exit, D is destroyed. */ /* X (output) DOUBLE PRECISION array, dimension (M) */ /* Y (output) DOUBLE PRECISION array, dimension (P) */ /* On exit, X and Y are the solutions of the GLM problem. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N+M+P). */ /* For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, */ /* where NB is an upper bound for the optimal blocksizes for */ /* DGEQRF, SGERQF, DORMQR and SORMRQ. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1: the upper triangular factor R associated with A in the */ /* generalized QR factorization of the pair (A, B) is */ /* singular, so that rank(A) < M; the least squares */ /* solution could not be computed. */ /* = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal */ /* factor T associated with B in the generalized QR */ /* factorization of the pair (A, B) is singular, so that */ /* rank( A B ) < N; the least squares solution could not */ /* be computed. */ /* =================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --d__; --x; --y; --work; /* Function Body */ *info = 0; np = min(*n,*p); lquery = *lwork == -1; if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -2; } else if (*p < 0 || *p < *n - *m) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } /* Calculate workspace */ if (*info == 0) { if (*n == 0) { lwkmin = 1; lwkopt = 1; } else { nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, m, &c_n1, &c_n1); nb2 = _starpu_ilaenv_(&c__1, "DGERQF", " ", n, m, &c_n1, &c_n1); nb3 = _starpu_ilaenv_(&c__1, "DORMQR", " ", n, m, p, &c_n1); nb4 = _starpu_ilaenv_(&c__1, "DORMRQ", " ", n, m, p, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); nb = max(i__1,nb4); lwkmin = *m + *n + *p; lwkopt = *m + np + max(*n,*p) * nb; } work[1] = (doublereal) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGGLM", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the GQR factorization of matrices A and B: */ /* Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M */ /* ( 0 ) N-M ( 0 T22 ) N-M */ /* M M+P-N N-M */ /* where R11 and T22 are upper triangular, and Q and Z are */ /* orthogonal. */ i__1 = *lwork - *m - np; _starpu_dggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m + 1], &work[*m + np + 1], &i__1, info); lopt = (integer) work[*m + np + 1]; /* Update left-hand-side vector d = Q'*d = ( d1 ) M */ /* ( d2 ) N-M */ i__1 = max(1,*n); i__2 = *lwork - *m - np; _starpu_dormqr_("Left", "Transpose", n, &c__1, m, &a[a_offset], lda, &work[1], & d__[1], &i__1, &work[*m + np + 1], &i__2, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*m + np + 1]; lopt = max(i__1,i__2); /* Solve T22*y2 = d2 for y2 */ if (*n > *m) { i__1 = *n - *m; i__2 = *n - *m; _starpu_dtrtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 + (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, info); if (*info > 0) { *info = 1; return 0; } i__1 = *n - *m; _starpu_dcopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1); } /* Set y1 = 0 */ i__1 = *m + *p - *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } /* Update d1 = d1 - T12*y2 */ i__1 = *n - *m; _starpu_dgemv_("No transpose", m, &i__1, &c_b32, &b[(*m + *p - *n + 1) * b_dim1 + 1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b34, &d__[1], &c__1); /* Solve triangular system: R11*x = d1 */ if (*m > 0) { _starpu_dtrtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], lda, &d__[1], m, info); if (*info > 0) { *info = 2; return 0; } /* Copy D to X */ _starpu_dcopy_(m, &d__[1], &c__1, &x[1], &c__1); } /* Backward transformation y = Z'*y */ /* Computing MAX */ i__1 = 1, i__2 = *n - *p + 1; i__3 = max(1,*p); i__4 = *lwork - *m - np; _starpu_dormrq_("Left", "Transpose", p, &c__1, &np, &b[max(i__1, i__2)+ b_dim1], ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &i__4, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*m + np + 1]; work[1] = (doublereal) (*m + np + max(i__1,i__2)); return 0; /* End of DGGGLM */ } /* _starpu_dggglm_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgghrd.c000066400000000000000000000242671507764646700206460ustar00rootroot00000000000000/* dgghrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b10 = 0.; static doublereal c_b11 = 1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *q, integer *ldq, doublereal *z__, integer * ldz, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2, i__3; /* Local variables */ doublereal c__, s; logical ilq, ilz; integer jcol; doublereal temp; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer jrow; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *, integer *); integer icompq, icompz; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGHRD reduces a pair of real matrices (A,B) to generalized upper */ /* Hessenberg form using orthogonal transformations, where A is a */ /* general matrix and B is upper triangular. The form of the */ /* generalized eigenvalue problem is */ /* A*x = lambda*B*x, */ /* and B is typically made upper triangular by computing its QR */ /* factorization and moving the orthogonal matrix Q to the left side */ /* of the equation. */ /* This subroutine simultaneously reduces A to a Hessenberg matrix H: */ /* Q**T*A*Z = H */ /* and transforms B to another upper triangular matrix T: */ /* Q**T*B*Z = T */ /* in order to reduce the problem to its standard form */ /* H*y = lambda*T*y */ /* where y = Z**T*x. */ /* The orthogonal matrices Q and Z are determined as products of Givens */ /* rotations. They may either be formed explicitly, or they may be */ /* postmultiplied into input matrices Q1 and Z1, so that */ /* Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T */ /* Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T */ /* If Q1 is the orthogonal matrix from the QR factorization of B in the */ /* original equation A*x = lambda*B*x, then DGGHRD reduces the original */ /* problem to generalized Hessenberg form. */ /* Arguments */ /* ========= */ /* COMPQ (input) CHARACTER*1 */ /* = 'N': do not compute Q; */ /* = 'I': Q is initialized to the unit matrix, and the */ /* orthogonal matrix Q is returned; */ /* = 'V': Q must contain an orthogonal matrix Q1 on entry, */ /* and the product Q1*Q is returned. */ /* COMPZ (input) CHARACTER*1 */ /* = 'N': do not compute Z; */ /* = 'I': Z is initialized to the unit matrix, and the */ /* orthogonal matrix Z is returned; */ /* = 'V': Z must contain an orthogonal matrix Z1 on entry, */ /* and the product Z1*Z is returned. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* ILO and IHI mark the rows and columns of A which are to be */ /* reduced. It is assumed that A is already upper triangular */ /* in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are */ /* normally set by a previous call to SGGBAL; otherwise they */ /* should be set to 1 and N respectively. */ /* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the N-by-N general matrix to be reduced. */ /* On exit, the upper triangle and the first subdiagonal of A */ /* are overwritten with the upper Hessenberg matrix H, and the */ /* rest is set to zero. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, the N-by-N upper triangular matrix B. */ /* On exit, the upper triangular matrix T = Q**T B Z. The */ /* elements below the diagonal are set to zero. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ /* On entry, if COMPQ = 'V', the orthogonal matrix Q1, */ /* typically from the QR factorization of B. */ /* On exit, if COMPQ='I', the orthogonal matrix Q, and if */ /* COMPQ = 'V', the product Q1*Q. */ /* Not referenced if COMPQ='N'. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. */ /* LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* On entry, if COMPZ = 'V', the orthogonal matrix Z1. */ /* On exit, if COMPZ='I', the orthogonal matrix Z, and if */ /* COMPZ = 'V', the product Z1*Z. */ /* Not referenced if COMPZ='N'. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. */ /* LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* This routine reduces A to Hessenberg and B to triangular form by */ /* an unblocked reduction, as described in _Matrix_Computations_, */ /* by Golub and Van Loan (Johns Hopkins Press.) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode COMPQ */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; /* Function Body */ if (_starpu_lsame_(compq, "N")) { ilq = FALSE_; icompq = 1; } else if (_starpu_lsame_(compq, "V")) { ilq = TRUE_; icompq = 2; } else if (_starpu_lsame_(compq, "I")) { ilq = TRUE_; icompq = 3; } else { icompq = 0; } /* Decode COMPZ */ if (_starpu_lsame_(compz, "N")) { ilz = FALSE_; icompz = 1; } else if (_starpu_lsame_(compz, "V")) { ilz = TRUE_; icompz = 2; } else if (_starpu_lsame_(compz, "I")) { ilz = TRUE_; icompz = 3; } else { icompz = 0; } /* Test the input parameters. */ *info = 0; if (icompq <= 0) { *info = -1; } else if (icompz <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ilo < 1) { *info = -4; } else if (*ihi > *n || *ihi < *ilo - 1) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (ilq && *ldq < *n || *ldq < 1) { *info = -11; } else if (ilz && *ldz < *n || *ldz < 1) { *info = -13; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGHRD", &i__1); return 0; } /* Initialize Q and Z if desired. */ if (icompq == 3) { _starpu_dlaset_("Full", n, n, &c_b10, &c_b11, &q[q_offset], ldq); } if (icompz == 3) { _starpu_dlaset_("Full", n, n, &c_b10, &c_b11, &z__[z_offset], ldz); } /* Quick return if possible */ if (*n <= 1) { return 0; } /* Zero out lower triangle of B */ i__1 = *n - 1; for (jcol = 1; jcol <= i__1; ++jcol) { i__2 = *n; for (jrow = jcol + 1; jrow <= i__2; ++jrow) { b[jrow + jcol * b_dim1] = 0.; /* L10: */ } /* L20: */ } /* Reduce A and B */ i__1 = *ihi - 2; for (jcol = *ilo; jcol <= i__1; ++jcol) { i__2 = jcol + 2; for (jrow = *ihi; jrow >= i__2; --jrow) { /* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */ temp = a[jrow - 1 + jcol * a_dim1]; _starpu_dlartg_(&temp, &a[jrow + jcol * a_dim1], &c__, &s, &a[jrow - 1 + jcol * a_dim1]); a[jrow + jcol * a_dim1] = 0.; i__3 = *n - jcol; _starpu_drot_(&i__3, &a[jrow - 1 + (jcol + 1) * a_dim1], lda, &a[jrow + ( jcol + 1) * a_dim1], lda, &c__, &s); i__3 = *n + 2 - jrow; _starpu_drot_(&i__3, &b[jrow - 1 + (jrow - 1) * b_dim1], ldb, &b[jrow + ( jrow - 1) * b_dim1], ldb, &c__, &s); if (ilq) { _starpu_drot_(n, &q[(jrow - 1) * q_dim1 + 1], &c__1, &q[jrow * q_dim1 + 1], &c__1, &c__, &s); } /* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */ temp = b[jrow + jrow * b_dim1]; _starpu_dlartg_(&temp, &b[jrow + (jrow - 1) * b_dim1], &c__, &s, &b[jrow + jrow * b_dim1]); b[jrow + (jrow - 1) * b_dim1] = 0.; _starpu_drot_(ihi, &a[jrow * a_dim1 + 1], &c__1, &a[(jrow - 1) * a_dim1 + 1], &c__1, &c__, &s); i__3 = jrow - 1; _starpu_drot_(&i__3, &b[jrow * b_dim1 + 1], &c__1, &b[(jrow - 1) * b_dim1 + 1], &c__1, &c__, &s); if (ilz) { _starpu_drot_(n, &z__[jrow * z_dim1 + 1], &c__1, &z__[(jrow - 1) * z_dim1 + 1], &c__1, &c__, &s); } /* L30: */ } /* L40: */ } return 0; /* End of DGGHRD */ } /* _starpu_dgghrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgglse.c000066400000000000000000000256431507764646700206530ustar00rootroot00000000000000/* dgglse.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b31 = -1.; static doublereal c_b33 = 1.; /* Subroutine */ int _starpu_dgglse_(integer *m, integer *n, integer *p, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, doublereal *d__, doublereal *x, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ integer nb, mn, nr, nb1, nb2, nb3, nb4, lopt; extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *) , _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dggrqf_( integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer lwkmin; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dormrq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; extern /* Subroutine */ int _starpu_dtrtrs_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGLSE solves the linear equality-constrained least squares (LSE) */ /* problem: */ /* minimize || c - A*x ||_2 subject to B*x = d */ /* where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */ /* M-vector, and d is a given P-vector. It is assumed that */ /* P <= N <= M+P, and */ /* rank(B) = P and rank( (A) ) = N. */ /* ( (B) ) */ /* These conditions ensure that the LSE problem has a unique solution, */ /* which is obtained using a generalized RQ factorization of the */ /* matrices (B, A) given by */ /* B = (0 R)*Q, A = Z*T*Q. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrices A and B. N >= 0. */ /* P (input) INTEGER */ /* The number of rows of the matrix B. 0 <= P <= N <= M+P. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the elements on and above the diagonal of the array */ /* contain the min(M,N)-by-N upper trapezoidal matrix T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ /* On entry, the P-by-N matrix B. */ /* On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */ /* contains the P-by-P upper triangular matrix R. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,P). */ /* C (input/output) DOUBLE PRECISION array, dimension (M) */ /* On entry, C contains the right hand side vector for the */ /* least squares part of the LSE problem. */ /* On exit, the residual sum of squares for the solution */ /* is given by the sum of squares of elements N-P+1 to M of */ /* vector C. */ /* D (input/output) DOUBLE PRECISION array, dimension (P) */ /* On entry, D contains the right hand side vector for the */ /* constrained equation. */ /* On exit, D is destroyed. */ /* X (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, X is the solution of the LSE problem. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,M+N+P). */ /* For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, */ /* where NB is an upper bound for the optimal blocksizes for */ /* DGEQRF, SGERQF, DORMQR and SORMRQ. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1: the upper triangular factor R associated with B in the */ /* generalized RQ factorization of the pair (B, A) is */ /* singular, so that rank(B) < P; the least squares */ /* solution could not be computed. */ /* = 2: the (N-P) by (N-P) part of the upper trapezoidal factor */ /* T associated with A in the generalized RQ factorization */ /* of the pair (B, A) is singular, so that */ /* rank( (A) ) < N; the least squares solution could not */ /* ( (B) ) */ /* be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --c__; --d__; --x; --work; /* Function Body */ *info = 0; mn = min(*m,*n); lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*p < 0 || *p > *n || *p < *n - *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,*p)) { *info = -7; } /* Calculate workspace */ if (*info == 0) { if (*n == 0) { lwkmin = 1; lwkopt = 1; } else { nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); nb2 = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); nb3 = _starpu_ilaenv_(&c__1, "DORMQR", " ", m, n, p, &c_n1); nb4 = _starpu_ilaenv_(&c__1, "DORMRQ", " ", m, n, p, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); nb = max(i__1,nb4); lwkmin = *m + *n + *p; lwkopt = *p + mn + max(*m,*n) * nb; } work[1] = (doublereal) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGLSE", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the GRQ factorization of matrices B and A: */ /* B*Q' = ( 0 T12 ) P Z'*A*Q' = ( R11 R12 ) N-P */ /* N-P P ( 0 R22 ) M+P-N */ /* N-P P */ /* where T12 and R11 are upper triangular, and Q and Z are */ /* orthogonal. */ i__1 = *lwork - *p - mn; _starpu_dggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p + 1], &work[*p + mn + 1], &i__1, info); lopt = (integer) work[*p + mn + 1]; /* Update c = Z'*c = ( c1 ) N-P */ /* ( c2 ) M+P-N */ i__1 = max(1,*m); i__2 = *lwork - *p - mn; _starpu_dormqr_("Left", "Transpose", m, &c__1, &mn, &a[a_offset], lda, &work[*p + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*p + mn + 1]; lopt = max(i__1,i__2); /* Solve T12*x2 = d for x2 */ if (*p > 0) { _starpu_dtrtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + 1) * b_dim1 + 1], ldb, &d__[1], p, info); if (*info > 0) { *info = 1; return 0; } /* Put the solution in X */ _starpu_dcopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1); /* Update c1 */ i__1 = *n - *p; _starpu_dgemv_("No transpose", &i__1, p, &c_b31, &a[(*n - *p + 1) * a_dim1 + 1], lda, &d__[1], &c__1, &c_b33, &c__[1], &c__1); } /* Solve R11*x1 = c1 for x1 */ if (*n > *p) { i__1 = *n - *p; i__2 = *n - *p; _starpu_dtrtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[ a_offset], lda, &c__[1], &i__2, info); if (*info > 0) { *info = 2; return 0; } /* Put the solutions in X */ i__1 = *n - *p; _starpu_dcopy_(&i__1, &c__[1], &c__1, &x[1], &c__1); } /* Compute the residual vector: */ if (*m < *n) { nr = *m + *p - *n; if (nr > 0) { i__1 = *n - *m; _starpu_dgemv_("No transpose", &nr, &i__1, &c_b31, &a[*n - *p + 1 + (*m + 1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b33, &c__[*n - *p + 1], &c__1); } } else { nr = *p; } if (nr > 0) { _starpu_dtrmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n - *p + 1) * a_dim1], lda, &d__[1], &c__1); _starpu_daxpy_(&nr, &c_b31, &d__[1], &c__1, &c__[*n - *p + 1], &c__1); } /* Backward transformation x = Q'*x */ i__1 = *lwork - *p - mn; _starpu_dormrq_("Left", "Transpose", n, &c__1, p, &b[b_offset], ldb, &work[1], &x[ 1], n, &work[*p + mn + 1], &i__1, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*p + mn + 1]; work[1] = (doublereal) (*p + mn + max(i__1,i__2)); return 0; /* End of DGGLSE */ } /* _starpu_dgglse_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggqrf.c000066400000000000000000000222131507764646700206460ustar00rootroot00000000000000/* dggqrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dggqrf_(integer *n, integer *m, integer *p, doublereal * a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, doublereal *taub, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ integer nb, nb1, nb2, nb3, lopt; extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dgerqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGQRF computes a generalized QR factorization of an N-by-M matrix A */ /* and an N-by-P matrix B: */ /* A = Q*R, B = Q*T*Z, */ /* where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */ /* matrix, and R and T assume one of the forms: */ /* if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, */ /* ( 0 ) N-M N M-N */ /* M */ /* where R11 is upper triangular, and */ /* if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, */ /* P-N N ( T21 ) P */ /* P */ /* where T12 or T21 is upper triangular. */ /* In particular, if B is square and nonsingular, the GQR factorization */ /* of A and B implicitly gives the QR factorization of inv(B)*A: */ /* inv(B)*A = Z'*(inv(T)*R) */ /* where inv(B) denotes the inverse of the matrix B, and Z' denotes the */ /* transpose of the matrix Z. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of rows of the matrices A and B. N >= 0. */ /* M (input) INTEGER */ /* The number of columns of the matrix A. M >= 0. */ /* P (input) INTEGER */ /* The number of columns of the matrix B. P >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,M) */ /* On entry, the N-by-M matrix A. */ /* On exit, the elements on and above the diagonal of the array */ /* contain the min(N,M)-by-M upper trapezoidal matrix R (R is */ /* upper triangular if N >= M); the elements below the diagonal, */ /* with the array TAUA, represent the orthogonal matrix Q as a */ /* product of min(N,M) elementary reflectors (see Further */ /* Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* TAUA (output) DOUBLE PRECISION array, dimension (min(N,M)) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix Q (see Further Details). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,P) */ /* On entry, the N-by-P matrix B. */ /* On exit, if N <= P, the upper triangle of the subarray */ /* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ /* if N > P, the elements on and above the (N-P)-th subdiagonal */ /* contain the N-by-P upper trapezoidal matrix T; the remaining */ /* elements, with the array TAUB, represent the orthogonal */ /* matrix Z as a product of elementary reflectors (see Further */ /* Details). */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* TAUB (output) DOUBLE PRECISION array, dimension (min(N,P)) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix Z (see Further Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N,M,P). */ /* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), */ /* where NB1 is the optimal blocksize for the QR factorization */ /* of an N-by-M matrix, NB2 is the optimal blocksize for the */ /* RQ factorization of an N-by-P matrix, and NB3 is the optimal */ /* blocksize for a call of DORMQR. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(n,m). */ /* Each H(i) has the form */ /* H(i) = I - taua * v * v' */ /* where taua is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ /* and taua in TAUA(i). */ /* To form Q explicitly, use LAPACK subroutine DORGQR. */ /* To use Q to update another matrix, use LAPACK subroutine DORMQR. */ /* The matrix Z is represented as a product of elementary reflectors */ /* Z = H(1) H(2) . . . H(k), where k = min(n,p). */ /* Each H(i) has the form */ /* H(i) = I - taub * v * v' */ /* where taub is a real scalar, and v is a real vector with */ /* v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in */ /* B(n-k+i,1:p-k+i-1), and taub in TAUB(i). */ /* To form Z explicitly, use LAPACK subroutine DORGRQ. */ /* To use Z to update another matrix, use LAPACK subroutine DORMRQ. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --taua; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --taub; --work; /* Function Body */ *info = 0; nb1 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", n, m, &c_n1, &c_n1); nb2 = _starpu_ilaenv_(&c__1, "DGERQF", " ", n, p, &c_n1, &c_n1); nb3 = _starpu_ilaenv_(&c__1, "DORMQR", " ", n, m, p, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2); nb = max(i__1,nb3); /* Computing MAX */ i__1 = max(*n,*m); lwkopt = max(i__1,*p) * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*n < 0) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*p < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*n), i__1 = max(i__1,*m); if (*lwork < max(i__1,*p) && ! lquery) { *info = -11; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGQRF", &i__1); return 0; } else if (lquery) { return 0; } /* QR factorization of N-by-M matrix A: A = Q*R */ _starpu_dgeqrf_(n, m, &a[a_offset], lda, &taua[1], &work[1], lwork, info); lopt = (integer) work[1]; /* Update B := Q'*B. */ i__1 = min(*n,*m); _starpu_dormqr_("Left", "Transpose", n, p, &i__1, &a[a_offset], lda, &taua[1], &b[ b_offset], ldb, &work[1], lwork, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[1]; lopt = max(i__1,i__2); /* RQ factorization of N-by-P matrix B: B = T*Z. */ _starpu_dgerqf_(n, p, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[1]; work[1] = (doublereal) max(i__1,i__2); return 0; /* End of DGGQRF */ } /* _starpu_dggqrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggrqf.c000066400000000000000000000222671507764646700206570ustar00rootroot00000000000000/* dggrqf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dggrqf_(integer *m, integer *p, integer *n, doublereal * a, integer *lda, doublereal *taua, doublereal *b, integer *ldb, doublereal *taub, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer nb, nb1, nb2, nb3, lopt; extern /* Subroutine */ int _starpu_dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dgerqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dormrq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGRQF computes a generalized RQ factorization of an M-by-N matrix A */ /* and a P-by-N matrix B: */ /* A = R*Q, B = Z*T*Q, */ /* where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */ /* matrix, and R and T assume one of the forms: */ /* if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, */ /* N-M M ( R21 ) N */ /* N */ /* where R12 or R21 is upper triangular, and */ /* if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, */ /* ( 0 ) P-N P N-P */ /* N */ /* where T11 is upper triangular. */ /* In particular, if B is square and nonsingular, the GRQ factorization */ /* of A and B implicitly gives the RQ factorization of A*inv(B): */ /* A*inv(B) = (R*inv(T))*Z' */ /* where inv(B) denotes the inverse of the matrix B, and Z' denotes the */ /* transpose of the matrix Z. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* P (input) INTEGER */ /* The number of rows of the matrix B. P >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, if M <= N, the upper triangle of the subarray */ /* A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; */ /* if M > N, the elements on and above the (M-N)-th subdiagonal */ /* contain the M-by-N upper trapezoidal matrix R; the remaining */ /* elements, with the array TAUA, represent the orthogonal */ /* matrix Q as a product of elementary reflectors (see Further */ /* Details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAUA (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix Q (see Further Details). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ /* On entry, the P-by-N matrix B. */ /* On exit, the elements on and above the diagonal of the array */ /* contain the min(P,N)-by-N upper trapezoidal matrix T (T is */ /* upper triangular if P >= N); the elements below the diagonal, */ /* with the array TAUB, represent the orthogonal matrix Z as a */ /* product of elementary reflectors (see Further Details). */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,P). */ /* TAUB (output) DOUBLE PRECISION array, dimension (min(P,N)) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix Z (see Further Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N,M,P). */ /* For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), */ /* where NB1 is the optimal blocksize for the RQ factorization */ /* of an M-by-N matrix, NB2 is the optimal blocksize for the */ /* QR factorization of a P-by-N matrix, and NB3 is the optimal */ /* blocksize for a call of DORMRQ. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INF0= -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of elementary reflectors */ /* Q = H(1) H(2) . . . H(k), where k = min(m,n). */ /* Each H(i) has the form */ /* H(i) = I - taua * v * v' */ /* where taua is a real scalar, and v is a real vector with */ /* v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ /* A(m-k+i,1:n-k+i-1), and taua in TAUA(i). */ /* To form Q explicitly, use LAPACK subroutine DORGRQ. */ /* To use Q to update another matrix, use LAPACK subroutine DORMRQ. */ /* The matrix Z is represented as a product of elementary reflectors */ /* Z = H(1) H(2) . . . H(k), where k = min(p,n). */ /* Each H(i) has the form */ /* H(i) = I - taub * v * v' */ /* where taub is a real scalar, and v is a real vector with */ /* v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), */ /* and taub in TAUB(i). */ /* To form Z explicitly, use LAPACK subroutine DORGQR. */ /* To use Z to update another matrix, use LAPACK subroutine DORMQR. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --taua; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --taub; --work; /* Function Body */ *info = 0; nb1 = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); nb2 = _starpu_ilaenv_(&c__1, "DGEQRF", " ", p, n, &c_n1, &c_n1); nb3 = _starpu_ilaenv_(&c__1, "DORMRQ", " ", m, n, p, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2); nb = max(i__1,nb3); /* Computing MAX */ i__1 = max(*n,*m); lwkopt = max(i__1,*p) * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*p < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,*p)) { *info = -8; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m), i__1 = max(i__1,*p); if (*lwork < max(i__1,*n) && ! lquery) { *info = -11; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGRQF", &i__1); return 0; } else if (lquery) { return 0; } /* RQ factorization of M-by-N matrix A: A = R*Q */ _starpu_dgerqf_(m, n, &a[a_offset], lda, &taua[1], &work[1], lwork, info); lopt = (integer) work[1]; /* Update B := B*Q' */ i__1 = min(*m,*n); /* Computing MAX */ i__2 = 1, i__3 = *m - *n + 1; _starpu_dormrq_("Right", "Transpose", p, n, &i__1, &a[max(i__2, i__3)+ a_dim1], lda, &taua[1], &b[b_offset], ldb, &work[1], lwork, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[1]; lopt = max(i__1,i__2); /* QR factorization of P-by-N matrix B: B = Z*T */ _starpu_dgeqrf_(p, n, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[1]; work[1] = (doublereal) max(i__1,i__2); return 0; /* End of DGGRQF */ } /* _starpu_dggrqf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggsvd.c000066400000000000000000000327011507764646700206550ustar00rootroot00000000000000/* dggsvd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer *ldq, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2; /* Local variables */ integer i__, j; doublereal ulp; integer ibnd; doublereal tola; integer isub; doublereal tolb, unfl, temp, smax; extern logical _starpu_lsame_(char *, char *); doublereal anorm, bnorm; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); logical wantq, wantu, wantv; extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dtgsja_(char *, char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); integer ncycle; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dggsvp_( char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGSVD computes the generalized singular value decomposition (GSVD) */ /* of an M-by-N real matrix A and P-by-N real matrix B: */ /* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ) */ /* where U, V and Q are orthogonal matrices, and Z' is the transpose */ /* of Z. Let K+L = the effective numerical rank of the matrix (A',B')', */ /* then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and */ /* D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the */ /* following structures, respectively: */ /* If M-K-L >= 0, */ /* K L */ /* D1 = K ( I 0 ) */ /* L ( 0 C ) */ /* M-K-L ( 0 0 ) */ /* K L */ /* D2 = L ( 0 S ) */ /* P-L ( 0 0 ) */ /* N-K-L K L */ /* ( 0 R ) = K ( 0 R11 R12 ) */ /* L ( 0 0 R22 ) */ /* where */ /* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */ /* S = diag( BETA(K+1), ... , BETA(K+L) ), */ /* C**2 + S**2 = I. */ /* R is stored in A(1:K+L,N-K-L+1:N) on exit. */ /* If M-K-L < 0, */ /* K M-K K+L-M */ /* D1 = K ( I 0 0 ) */ /* M-K ( 0 C 0 ) */ /* K M-K K+L-M */ /* D2 = M-K ( 0 S 0 ) */ /* K+L-M ( 0 0 I ) */ /* P-L ( 0 0 0 ) */ /* N-K-L K M-K K+L-M */ /* ( 0 R ) = K ( 0 R11 R12 R13 ) */ /* M-K ( 0 0 R22 R23 ) */ /* K+L-M ( 0 0 0 R33 ) */ /* where */ /* C = diag( ALPHA(K+1), ... , ALPHA(M) ), */ /* S = diag( BETA(K+1), ... , BETA(M) ), */ /* C**2 + S**2 = I. */ /* (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored */ /* ( 0 R22 R23 ) */ /* in B(M-K+1:L,N+M-K-L+1:N) on exit. */ /* The routine computes C, S, R, and optionally the orthogonal */ /* transformation matrices U, V and Q. */ /* In particular, if B is an N-by-N nonsingular matrix, then the GSVD of */ /* A and B implicitly gives the SVD of A*inv(B): */ /* A*inv(B) = U*(D1*inv(D2))*V'. */ /* If ( A',B')' has orthonormal columns, then the GSVD of A and B is */ /* also equal to the CS decomposition of A and B. Furthermore, the GSVD */ /* can be used to derive the solution of the eigenvalue problem: */ /* A'*A x = lambda* B'*B x. */ /* In some literature, the GSVD of A and B is presented in the form */ /* U'*A*X = ( 0 D1 ), V'*B*X = ( 0 D2 ) */ /* where U and V are orthogonal and X is nonsingular, D1 and D2 are */ /* ``diagonal''. The former GSVD form can be converted to the latter */ /* form by taking the nonsingular matrix X as */ /* X = Q*( I 0 ) */ /* ( 0 inv(R) ). */ /* Arguments */ /* ========= */ /* JOBU (input) CHARACTER*1 */ /* = 'U': Orthogonal matrix U is computed; */ /* = 'N': U is not computed. */ /* JOBV (input) CHARACTER*1 */ /* = 'V': Orthogonal matrix V is computed; */ /* = 'N': V is not computed. */ /* JOBQ (input) CHARACTER*1 */ /* = 'Q': Orthogonal matrix Q is computed; */ /* = 'N': Q is not computed. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrices A and B. N >= 0. */ /* P (input) INTEGER */ /* The number of rows of the matrix B. P >= 0. */ /* K (output) INTEGER */ /* L (output) INTEGER */ /* On exit, K and L specify the dimension of the subblocks */ /* described in the Purpose section. */ /* K + L = effective numerical rank of (A',B')'. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, A contains the triangular matrix R, or part of R. */ /* See Purpose for details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ /* On entry, the P-by-N matrix B. */ /* On exit, B contains the triangular matrix R if M-K-L < 0. */ /* See Purpose for details. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,P). */ /* ALPHA (output) DOUBLE PRECISION array, dimension (N) */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, ALPHA and BETA contain the generalized singular */ /* value pairs of A and B; */ /* ALPHA(1:K) = 1, */ /* BETA(1:K) = 0, */ /* and if M-K-L >= 0, */ /* ALPHA(K+1:K+L) = C, */ /* BETA(K+1:K+L) = S, */ /* or if M-K-L < 0, */ /* ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 */ /* BETA(K+1:M) =S, BETA(M+1:K+L) =1 */ /* and */ /* ALPHA(K+L+1:N) = 0 */ /* BETA(K+L+1:N) = 0 */ /* U (output) DOUBLE PRECISION array, dimension (LDU,M) */ /* If JOBU = 'U', U contains the M-by-M orthogonal matrix U. */ /* If JOBU = 'N', U is not referenced. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= max(1,M) if */ /* JOBU = 'U'; LDU >= 1 otherwise. */ /* V (output) DOUBLE PRECISION array, dimension (LDV,P) */ /* If JOBV = 'V', V contains the P-by-P orthogonal matrix V. */ /* If JOBV = 'N', V is not referenced. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. LDV >= max(1,P) if */ /* JOBV = 'V'; LDV >= 1 otherwise. */ /* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. */ /* If JOBQ = 'N', Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N) if */ /* JOBQ = 'Q'; LDQ >= 1 otherwise. */ /* WORK (workspace) DOUBLE PRECISION array, */ /* dimension (max(3*N,M,P)+N) */ /* IWORK (workspace/output) INTEGER array, dimension (N) */ /* On exit, IWORK stores the sorting information. More */ /* precisely, the following loop will sort ALPHA */ /* for I = K+1, min(M,K+L) */ /* swap ALPHA(I) and ALPHA(IWORK(I)) */ /* endfor */ /* such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, the Jacobi-type procedure failed to */ /* converge. For further details, see subroutine DTGSJA. */ /* Internal Parameters */ /* =================== */ /* TOLA DOUBLE PRECISION */ /* TOLB DOUBLE PRECISION */ /* TOLA and TOLB are the thresholds to determine the effective */ /* rank of (A',B')'. Generally, they are set to */ /* TOLA = MAX(M,N)*norm(A)*MAZHEPS, */ /* TOLB = MAX(P,N)*norm(B)*MAZHEPS. */ /* The size of TOLA and TOLB may affect the size of backward */ /* errors of the decomposition. */ /* Further Details */ /* =============== */ /* 2-96 Based on modifications by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alpha; --beta; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --work; --iwork; /* Function Body */ wantu = _starpu_lsame_(jobu, "U"); wantv = _starpu_lsame_(jobv, "V"); wantq = _starpu_lsame_(jobq, "Q"); *info = 0; if (! (wantu || _starpu_lsame_(jobu, "N"))) { *info = -1; } else if (! (wantv || _starpu_lsame_(jobv, "N"))) { *info = -2; } else if (! (wantq || _starpu_lsame_(jobq, "N"))) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*p < 0) { *info = -6; } else if (*lda < max(1,*m)) { *info = -10; } else if (*ldb < max(1,*p)) { *info = -12; } else if (*ldu < 1 || wantu && *ldu < *m) { *info = -16; } else if (*ldv < 1 || wantv && *ldv < *p) { *info = -18; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -20; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGSVD", &i__1); return 0; } /* Compute the Frobenius norm of matrices A and B */ anorm = _starpu_dlange_("1", m, n, &a[a_offset], lda, &work[1]); bnorm = _starpu_dlange_("1", p, n, &b[b_offset], ldb, &work[1]); /* Get machine precision and set up threshold for determining */ /* the effective numerical rank of the matrices A and B. */ ulp = _starpu_dlamch_("Precision"); unfl = _starpu_dlamch_("Safe Minimum"); tola = max(*m,*n) * max(anorm,unfl) * ulp; tolb = max(*p,*n) * max(bnorm,unfl) * ulp; /* Preprocessing */ _starpu_dggsvp_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], ldb, & tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, &q[ q_offset], ldq, &iwork[1], &work[1], &work[*n + 1], info); /* Compute the GSVD of two upper "triangular" matrices */ _starpu_dtgsja_(jobu, jobv, jobq, m, p, n, k, l, &a[a_offset], lda, &b[b_offset], ldb, &tola, &tolb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[ v_offset], ldv, &q[q_offset], ldq, &work[1], &ncycle, info); /* Sort the singular values and store the pivot indices in IWORK */ /* Copy ALPHA to WORK, then sort ALPHA in WORK */ _starpu_dcopy_(n, &alpha[1], &c__1, &work[1], &c__1); /* Computing MIN */ i__1 = *l, i__2 = *m - *k; ibnd = min(i__1,i__2); i__1 = ibnd; for (i__ = 1; i__ <= i__1; ++i__) { /* Scan for largest ALPHA(K+I) */ isub = i__; smax = work[*k + i__]; i__2 = ibnd; for (j = i__ + 1; j <= i__2; ++j) { temp = work[*k + j]; if (temp > smax) { isub = j; smax = temp; } /* L10: */ } if (isub != i__) { work[*k + isub] = work[*k + i__]; work[*k + i__] = smax; iwork[*k + i__] = *k + isub; } else { iwork[*k + i__] = *k + i__; } /* L20: */ } return 0; /* End of DGGSVD */ } /* _starpu_dggsvd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dggsvp.c000066400000000000000000000351771507764646700207030ustar00rootroot00000000000000/* dggsvp.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b12 = 0.; static doublereal c_b22 = 1.; /* Subroutine */ int _starpu_dggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *tola, doublereal *tolb, integer *k, integer *l, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer *ldq, integer *iwork, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__, j; extern logical _starpu_lsame_(char *, char *); logical wantq, wantu, wantv; extern /* Subroutine */ int _starpu_dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dgerq2_( integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dorg2r_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dorm2r_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dormr2_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dgeqpf_(integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dlapmt_(logical *, integer *, integer *, doublereal *, integer *, integer *); logical forwrd; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGSVP computes orthogonal matrices U, V and Q such that */ /* N-K-L K L */ /* U'*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; */ /* L ( 0 0 A23 ) */ /* M-K-L ( 0 0 0 ) */ /* N-K-L K L */ /* = K ( 0 A12 A13 ) if M-K-L < 0; */ /* M-K ( 0 0 A23 ) */ /* N-K-L K L */ /* V'*B*Q = L ( 0 0 B13 ) */ /* P-L ( 0 0 0 ) */ /* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */ /* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */ /* otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective */ /* numerical rank of the (M+P)-by-N matrix (A',B')'. Z' denotes the */ /* transpose of Z. */ /* This decomposition is the preprocessing step for computing the */ /* Generalized Singular Value Decomposition (GSVD), see subroutine */ /* DGGSVD. */ /* Arguments */ /* ========= */ /* JOBU (input) CHARACTER*1 */ /* = 'U': Orthogonal matrix U is computed; */ /* = 'N': U is not computed. */ /* JOBV (input) CHARACTER*1 */ /* = 'V': Orthogonal matrix V is computed; */ /* = 'N': V is not computed. */ /* JOBQ (input) CHARACTER*1 */ /* = 'Q': Orthogonal matrix Q is computed; */ /* = 'N': Q is not computed. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* P (input) INTEGER */ /* The number of rows of the matrix B. P >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, A contains the triangular (or trapezoidal) matrix */ /* described in the Purpose section. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ /* On entry, the P-by-N matrix B. */ /* On exit, B contains the triangular matrix described in */ /* the Purpose section. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,P). */ /* TOLA (input) DOUBLE PRECISION */ /* TOLB (input) DOUBLE PRECISION */ /* TOLA and TOLB are the thresholds to determine the effective */ /* numerical rank of matrix B and a subblock of A. Generally, */ /* they are set to */ /* TOLA = MAX(M,N)*norm(A)*MAZHEPS, */ /* TOLB = MAX(P,N)*norm(B)*MAZHEPS. */ /* The size of TOLA and TOLB may affect the size of backward */ /* errors of the decomposition. */ /* K (output) INTEGER */ /* L (output) INTEGER */ /* On exit, K and L specify the dimension of the subblocks */ /* described in Purpose. */ /* K + L = effective numerical rank of (A',B')'. */ /* U (output) DOUBLE PRECISION array, dimension (LDU,M) */ /* If JOBU = 'U', U contains the orthogonal matrix U. */ /* If JOBU = 'N', U is not referenced. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= max(1,M) if */ /* JOBU = 'U'; LDU >= 1 otherwise. */ /* V (output) DOUBLE PRECISION array, dimension (LDV,P) */ /* If JOBV = 'V', V contains the orthogonal matrix V. */ /* If JOBV = 'N', V is not referenced. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. LDV >= max(1,P) if */ /* JOBV = 'V'; LDV >= 1 otherwise. */ /* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* If JOBQ = 'Q', Q contains the orthogonal matrix Q. */ /* If JOBQ = 'N', Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N) if */ /* JOBQ = 'Q'; LDQ >= 1 otherwise. */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* TAU (workspace) DOUBLE PRECISION array, dimension (N) */ /* WORK (workspace) DOUBLE PRECISION array, dimension (max(3*N,M,P)) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* The subroutine uses LAPACK subroutine DGEQPF for the QR factorization */ /* with column pivoting to detect the effective numerical rank of the */ /* a matrix. It may be replaced by a better rank determination strategy. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --iwork; --tau; --work; /* Function Body */ wantu = _starpu_lsame_(jobu, "U"); wantv = _starpu_lsame_(jobv, "V"); wantq = _starpu_lsame_(jobq, "Q"); forwrd = TRUE_; *info = 0; if (! (wantu || _starpu_lsame_(jobu, "N"))) { *info = -1; } else if (! (wantv || _starpu_lsame_(jobv, "N"))) { *info = -2; } else if (! (wantq || _starpu_lsame_(jobq, "N"))) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*p < 0) { *info = -5; } else if (*n < 0) { *info = -6; } else if (*lda < max(1,*m)) { *info = -8; } else if (*ldb < max(1,*p)) { *info = -10; } else if (*ldu < 1 || wantu && *ldu < *m) { *info = -16; } else if (*ldv < 1 || wantv && *ldv < *p) { *info = -18; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -20; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGGSVP", &i__1); return 0; } /* QR with column pivoting of B: B*P = V*( S11 S12 ) */ /* ( 0 0 ) */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { iwork[i__] = 0; /* L10: */ } _starpu_dgeqpf_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], info); /* Update A := A*P */ _starpu_dlapmt_(&forwrd, m, n, &a[a_offset], lda, &iwork[1]); /* Determine the effective rank of matrix B. */ *l = 0; i__1 = min(*p,*n); for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = b[i__ + i__ * b_dim1], abs(d__1)) > *tolb) { ++(*l); } /* L20: */ } if (wantv) { /* Copy the details of V, and form V. */ _starpu_dlaset_("Full", p, p, &c_b12, &c_b12, &v[v_offset], ldv); if (*p > 1) { i__1 = *p - 1; _starpu_dlacpy_("Lower", &i__1, n, &b[b_dim1 + 2], ldb, &v[v_dim1 + 2], ldv); } i__1 = min(*p,*n); _starpu_dorg2r_(p, p, &i__1, &v[v_offset], ldv, &tau[1], &work[1], info); } /* Clean up B */ i__1 = *l - 1; for (j = 1; j <= i__1; ++j) { i__2 = *l; for (i__ = j + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L30: */ } /* L40: */ } if (*p > *l) { i__1 = *p - *l; _starpu_dlaset_("Full", &i__1, n, &c_b12, &c_b12, &b[*l + 1 + b_dim1], ldb); } if (wantq) { /* Set Q = I and Update Q := Q*P */ _starpu_dlaset_("Full", n, n, &c_b12, &c_b22, &q[q_offset], ldq); _starpu_dlapmt_(&forwrd, n, n, &q[q_offset], ldq, &iwork[1]); } if (*p >= *l && *n != *l) { /* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z */ _starpu_dgerq2_(l, n, &b[b_offset], ldb, &tau[1], &work[1], info); /* Update A := A*Z' */ _starpu_dormr2_("Right", "Transpose", m, n, l, &b[b_offset], ldb, &tau[1], &a[ a_offset], lda, &work[1], info); if (wantq) { /* Update Q := Q*Z' */ _starpu_dormr2_("Right", "Transpose", n, n, l, &b[b_offset], ldb, &tau[1], &q[q_offset], ldq, &work[1], info); } /* Clean up B */ i__1 = *n - *l; _starpu_dlaset_("Full", l, &i__1, &c_b12, &c_b12, &b[b_offset], ldb); i__1 = *n; for (j = *n - *l + 1; j <= i__1; ++j) { i__2 = *l; for (i__ = j - *n + *l + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L50: */ } /* L60: */ } } /* Let N-L L */ /* A = ( A11 A12 ) M, */ /* then the following does the complete QR decomposition of A11: */ /* A11 = U*( 0 T12 )*P1' */ /* ( 0 0 ) */ i__1 = *n - *l; for (i__ = 1; i__ <= i__1; ++i__) { iwork[i__] = 0; /* L70: */ } i__1 = *n - *l; _starpu_dgeqpf_(m, &i__1, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], info); /* Determine the effective rank of A11 */ *k = 0; /* Computing MIN */ i__2 = *m, i__3 = *n - *l; i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = a[i__ + i__ * a_dim1], abs(d__1)) > *tola) { ++(*k); } /* L80: */ } /* Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N ) */ /* Computing MIN */ i__2 = *m, i__3 = *n - *l; i__1 = min(i__2,i__3); _starpu_dorm2r_("Left", "Transpose", m, l, &i__1, &a[a_offset], lda, &tau[1], &a[( *n - *l + 1) * a_dim1 + 1], lda, &work[1], info); if (wantu) { /* Copy the details of U, and form U */ _starpu_dlaset_("Full", m, m, &c_b12, &c_b12, &u[u_offset], ldu); if (*m > 1) { i__1 = *m - 1; i__2 = *n - *l; _starpu_dlacpy_("Lower", &i__1, &i__2, &a[a_dim1 + 2], lda, &u[u_dim1 + 2] , ldu); } /* Computing MIN */ i__2 = *m, i__3 = *n - *l; i__1 = min(i__2,i__3); _starpu_dorg2r_(m, m, &i__1, &u[u_offset], ldu, &tau[1], &work[1], info); } if (wantq) { /* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 */ i__1 = *n - *l; _starpu_dlapmt_(&forwrd, n, &i__1, &q[q_offset], ldq, &iwork[1]); } /* Clean up A: set the strictly lower triangular part of */ /* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. */ i__1 = *k - 1; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L90: */ } /* L100: */ } if (*m > *k) { i__1 = *m - *k; i__2 = *n - *l; _starpu_dlaset_("Full", &i__1, &i__2, &c_b12, &c_b12, &a[*k + 1 + a_dim1], lda); } if (*n - *l > *k) { /* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 */ i__1 = *n - *l; _starpu_dgerq2_(k, &i__1, &a[a_offset], lda, &tau[1], &work[1], info); if (wantq) { /* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1' */ i__1 = *n - *l; _starpu_dormr2_("Right", "Transpose", n, &i__1, k, &a[a_offset], lda, & tau[1], &q[q_offset], ldq, &work[1], info); } /* Clean up A */ i__1 = *n - *l - *k; _starpu_dlaset_("Full", k, &i__1, &c_b12, &c_b12, &a[a_offset], lda); i__1 = *n - *l; for (j = *n - *l - *k + 1; j <= i__1; ++j) { i__2 = *k; for (i__ = j - *n + *l + *k + 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L110: */ } /* L120: */ } } if (*m > *k) { /* QR factorization of A( K+1:M,N-L+1:N ) */ i__1 = *m - *k; _starpu_dgeqr2_(&i__1, l, &a[*k + 1 + (*n - *l + 1) * a_dim1], lda, &tau[1], & work[1], info); if (wantu) { /* Update U(:,K+1:M) := U(:,K+1:M)*U1 */ i__1 = *m - *k; /* Computing MIN */ i__3 = *m - *k; i__2 = min(i__3,*l); _starpu_dorm2r_("Right", "No transpose", m, &i__1, &i__2, &a[*k + 1 + (*n - *l + 1) * a_dim1], lda, &tau[1], &u[(*k + 1) * u_dim1 + 1], ldu, &work[1], info); } /* Clean up */ i__1 = *n; for (j = *n - *l + 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j - *n + *k + *l + 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L130: */ } /* L140: */ } } return 0; /* End of DGGSVP */ } /* _starpu_dggsvp_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgsvj0.c000066400000000000000000001017311507764646700205740ustar00rootroot00000000000000/* dgsvj0.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static doublereal c_b42 = 1.; /* Subroutine */ int _starpu_dgsvj0_(char *jobv, integer *m, integer *n, doublereal * a, integer *lda, doublereal *d__, doublereal *sva, integer *mv, doublereal *v, integer *ldv, doublereal *eps, doublereal *sfmin, doublereal *tol, integer *nsweep, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ doublereal bigtheta; integer pskipped, i__, p, q; doublereal t, rootsfmin, cs, sn; integer ir1, jbc; doublereal big; integer kbl, igl, ibr, jgl, nbl, mvl; doublereal aapp, aapq, aaqq; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer ierr; doublereal aapp0; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); doublereal temp1, apoaq, aqoap; extern logical _starpu_lsame_(char *, char *); doublereal theta, small; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal fastr[5]; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical applv, rsvec; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_drotm_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *); logical rotok; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer ijblsk, swband, blskip; doublereal mxaapq; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); doublereal thsign, mxsinj; integer emptsw, notrot, iswrot, lkahead; doublereal rootbig, rooteps; integer rowskip; doublereal roottol; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Zlatko Drmac of the University of Zagreb and -- */ /* -- Kresimir Veselic of the Fernuniversitaet Hagen -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* This routine is also part of SIGMA (version 1.23, October 23. 2008.) */ /* SIGMA is a library of algorithms for highly accurate algorithms for */ /* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the */ /* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0. */ /* Scalar Arguments */ /* Array Arguments */ /* .. */ /* Purpose */ /* ~~~~~~~ */ /* DGSVJ0 is called from DGESVJ as a pre-processor and that is its main */ /* purpose. It applies Jacobi rotations in the same way as DGESVJ does, but */ /* it does not check convergence (stopping criterion). Few tuning */ /* parameters (marked by [TP]) are available for the implementer. */ /* Further details */ /* ~~~~~~~~~~~~~~~ */ /* DGSVJ0 is used just to enable SGESVJ to call a simplified version of */ /* itself to work on a submatrix of the original matrix. */ /* Contributors */ /* ~~~~~~~~~~~~ */ /* Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ /* Bugs, Examples and Comments */ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ /* Please report all bugs and send interesting test examples and comments to */ /* drmac@math.hr. Thank you. */ /* Arguments */ /* ~~~~~~~~~ */ /* JOBV (input) CHARACTER*1 */ /* Specifies whether the output from this procedure is used */ /* to compute the matrix V: */ /* = 'V': the product of the Jacobi rotations is accumulated */ /* by postmulyiplying the N-by-N array V. */ /* (See the description of V.) */ /* = 'A': the product of the Jacobi rotations is accumulated */ /* by postmulyiplying the MV-by-N array V. */ /* (See the descriptions of MV and V.) */ /* = 'N': the Jacobi rotations are not accumulated. */ /* M (input) INTEGER */ /* The number of rows of the input matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the input matrix A. */ /* M >= N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, M-by-N matrix A, such that A*diag(D) represents */ /* the input matrix. */ /* On exit, */ /* A_onexit * D_onexit represents the input matrix A*diag(D) */ /* post-multiplied by a sequence of Jacobi rotations, where the */ /* rotation threshold and the total number of sweeps are given in */ /* TOL and NSWEEP, respectively. */ /* (See the descriptions of D, TOL and NSWEEP.) */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* D (input/workspace/output) REAL array, dimension (N) */ /* The array D accumulates the scaling factors from the fast scaled */ /* Jacobi rotations. */ /* On entry, A*diag(D) represents the input matrix. */ /* On exit, A_onexit*diag(D_onexit) represents the input matrix */ /* post-multiplied by a sequence of Jacobi rotations, where the */ /* rotation threshold and the total number of sweeps are given in */ /* TOL and NSWEEP, respectively. */ /* (See the descriptions of A, TOL and NSWEEP.) */ /* SVA (input/workspace/output) REAL array, dimension (N) */ /* On entry, SVA contains the Euclidean norms of the columns of */ /* the matrix A*diag(D). */ /* On exit, SVA contains the Euclidean norms of the columns of */ /* the matrix onexit*diag(D_onexit). */ /* MV (input) INTEGER */ /* If JOBV .EQ. 'A', then MV rows of V are post-multipled by a */ /* sequence of Jacobi rotations. */ /* If JOBV = 'N', then MV is not referenced. */ /* V (input/output) REAL array, dimension (LDV,N) */ /* If JOBV .EQ. 'V' then N rows of V are post-multipled by a */ /* sequence of Jacobi rotations. */ /* If JOBV .EQ. 'A' then MV rows of V are post-multipled by a */ /* sequence of Jacobi rotations. */ /* If JOBV = 'N', then V is not referenced. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V, LDV >= 1. */ /* If JOBV = 'V', LDV .GE. N. */ /* If JOBV = 'A', LDV .GE. MV. */ /* EPS (input) INTEGER */ /* EPS = SLAMCH('Epsilon') */ /* SFMIN (input) INTEGER */ /* SFMIN = SLAMCH('Safe Minimum') */ /* TOL (input) REAL */ /* TOL is the threshold for Jacobi rotations. For a pair */ /* A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ /* applied only if DABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. */ /* NSWEEP (input) INTEGER */ /* NSWEEP is the number of sweeps of Jacobi rotations to be */ /* performed. */ /* WORK (workspace) REAL array, dimension LWORK. */ /* LWORK (input) INTEGER */ /* LWORK is the dimension of WORK. LWORK .GE. M. */ /* INFO (output) INTEGER */ /* = 0 : successful exit. */ /* < 0 : if INFO = -i, then the i-th argument had an illegal value */ /* Local Parameters */ /* Local Scalars */ /* Local Arrays */ /* Intrinsic Functions */ /* External Functions */ /* External Subroutines */ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~| */ /* Parameter adjustments */ --sva; --d__; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --work; /* Function Body */ applv = _starpu_lsame_(jobv, "A"); rsvec = _starpu_lsame_(jobv, "V"); if (! (rsvec || applv || _starpu_lsame_(jobv, "N"))) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0 || *n > *m) { *info = -3; } else if (*lda < *m) { *info = -5; } else if (*mv < 0) { *info = -8; } else if (*ldv < *m) { *info = -10; } else if (*tol <= *eps) { *info = -13; } else if (*nsweep < 0) { *info = -14; } else if (*lwork < *m) { *info = -16; } else { *info = 0; } /* #:( */ if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGSVJ0", &i__1); return 0; } if (rsvec) { mvl = *n; } else if (applv) { mvl = *mv; } rsvec = rsvec || applv; rooteps = sqrt(*eps); rootsfmin = sqrt(*sfmin); small = *sfmin / *eps; big = 1. / *sfmin; rootbig = 1. / rootsfmin; bigtheta = 1. / rooteps; roottol = sqrt(*tol); /* -#- Row-cyclic Jacobi SVD algorithm with column pivoting -#- */ emptsw = *n * (*n - 1) / 2; notrot = 0; fastr[0] = 0.; /* -#- Row-cyclic pivot strategy with de Rijk's pivoting -#- */ swband = 0; /* [TP] SWBAND is a tuning parameter. It is meaningful and effective */ /* if SGESVJ is used as a computational routine in the preconditioned */ /* Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure */ /* ...... */ kbl = min(8,*n); /* [TP] KBL is a tuning parameter that defines the tile size in the */ /* tiling of the p-q loops of pivot pairs. In general, an optimal */ /* value of KBL depends on the matrix dimensions and on the */ /* parameters of the computer's memory. */ nbl = *n / kbl; if (nbl * kbl != *n) { ++nbl; } /* Computing 2nd power */ i__1 = kbl; blskip = i__1 * i__1 + 1; /* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ rowskip = min(5,kbl); /* [TP] ROWSKIP is a tuning parameter. */ lkahead = 1; /* [TP] LKAHEAD is a tuning parameter. */ swband = 0; pskipped = 0; i__1 = *nsweep; for (i__ = 1; i__ <= i__1; ++i__) { /* .. go go go ... */ mxaapq = 0.; mxsinj = 0.; iswrot = 0; notrot = 0; pskipped = 0; i__2 = nbl; for (ibr = 1; ibr <= i__2; ++ibr) { igl = (ibr - 1) * kbl + 1; /* Computing MIN */ i__4 = lkahead, i__5 = nbl - ibr; i__3 = min(i__4,i__5); for (ir1 = 0; ir1 <= i__3; ++ir1) { igl += ir1 * kbl; /* Computing MIN */ i__5 = igl + kbl - 1, i__6 = *n - 1; i__4 = min(i__5,i__6); for (p = igl; p <= i__4; ++p) { /* .. de Rijk's pivoting */ i__5 = *n - p + 1; q = _starpu_idamax_(&i__5, &sva[p], &c__1) + p - 1; if (p != q) { _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); if (rsvec) { _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &c__1); } temp1 = sva[p]; sva[p] = sva[q]; sva[q] = temp1; temp1 = d__[p]; d__[p] = d__[q]; d__[q] = temp1; } if (ir1 == 0) { /* Column norms are periodically updated by explicit */ /* norm computation. */ /* Caveat: */ /* Some BLAS implementations compute DNRM2(M,A(1,p),1) */ /* as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may result in */ /* overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and */ /* undeflow for ||A(:,p)||_2 < DSQRT(underflow_threshold). */ /* Hence, DNRM2 cannot be trusted, not even in the case when */ /* the true norm is far from the under(over)flow boundaries. */ /* If properly implemented DNRM2 is available, the IF-THEN-ELSE */ /* below should read "AAPP = DNRM2( M, A(1,p), 1 ) * D(p)". */ if (sva[p] < rootbig && sva[p] > rootsfmin) { sva[p] = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * d__[p]; } else { temp1 = 0.; aapp = 0.; _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & aapp); sva[p] = temp1 * sqrt(aapp) * d__[p]; } aapp = sva[p]; } else { aapp = sva[p]; } if (aapp > 0.) { pskipped = 0; /* Computing MIN */ i__6 = igl + kbl - 1; i__5 = min(i__6,*n); for (q = p + 1; q <= i__5; ++q) { aaqq = sva[q]; if (aaqq > 0.) { aapp0 = aapp; if (aaqq >= 1.) { rotok = small * aapp <= aaqq; if (aapp < big / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * d__[p] * d__[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, & d__[p], m, &c__1, &work[1], lda, &ierr); aapq = _starpu_ddot_(m, &work[1], &c__1, &a[q * a_dim1 + 1], &c__1) * d__[q] / aaqq; } } else { rotok = aapp <= aaqq / small; if (aapp > small / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * d__[p] * d__[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & d__[q], m, &c__1, &work[1], lda, &ierr); aapq = _starpu_ddot_(m, &work[1], &c__1, &a[p * a_dim1 + 1], &c__1) * d__[p] / aapp; } } /* Computing MAX */ d__1 = mxaapq, d__2 = abs(aapq); mxaapq = max(d__1,d__2); /* TO rotate or NOT to rotate, THAT is the question ... */ if (abs(aapq) > *tol) { /* .. rotate */ /* ROTATED = ROTATED + ONE */ if (ir1 == 0) { notrot = 0; pskipped = 0; ++iswrot; } if (rotok) { aqoap = aaqq / aapp; apoaq = aapp / aaqq; theta = (d__1 = aqoap - apoaq, abs( d__1)) * -.5 / aapq; if (abs(theta) > bigtheta) { t = .5 / theta; fastr[2] = t * d__[p] / d__[q]; fastr[3] = -t * d__[q] / d__[p]; _starpu_drotm_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &c__1, fastr); } /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); aapp *= sqrt(1. - t * aqoap * aapq); /* Computing MAX */ d__1 = mxsinj, d__2 = abs(t); mxsinj = max(d__1,d__2); } else { /* .. choose correct signum for THETA and rotate */ thsign = -d_sign(&c_b42, &aapq); t = 1. / (theta + thsign * sqrt( theta * theta + 1.)); cs = sqrt(1. / (t * t + 1.)); sn = t * cs; /* Computing MAX */ d__1 = mxsinj, d__2 = abs(sn); mxsinj = max(d__1,d__2); /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); /* Computing MAX */ d__1 = 0., d__2 = 1. - t * aqoap * aapq; aapp *= sqrt((max(d__1,d__2))); apoaq = d__[p] / d__[q]; aqoap = d__[q] / d__[p]; if (d__[p] >= 1.) { if (d__[q] >= 1.) { fastr[2] = t * apoaq; fastr[3] = -t * aqoap; d__[p] *= cs; d__[q] *= cs; _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ q * v_dim1 + 1], &c__1, fastr); } } else { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); d__[p] *= cs; d__[q] /= cs; if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); } } } else { if (d__[q] >= 1.) { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); d__[p] /= cs; d__[q] *= cs; if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); } } else { if (d__[p] >= d__[q]) { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); d__[p] *= cs; d__[q] /= cs; if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); } } else { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); d__[p] /= cs; d__[q] *= cs; if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); } } } } } } else { /* .. have to use modified Gram-Schmidt like transformation */ _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, & c_b42, m, &c__1, &work[1], lda, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & c_b42, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); temp1 = -aapq * d__[p] / d__[q]; _starpu_daxpy_(m, &temp1, &work[1], &c__1, &a[ q * a_dim1 + 1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &c_b42, & aaqq, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); /* Computing MAX */ d__1 = 0., d__2 = 1. - aapq * aapq; sva[q] = aaqq * sqrt((max(d__1,d__2))) ; mxsinj = max(mxsinj,*sfmin); } /* END IF ROTOK THEN ... ELSE */ /* In the case of cancellation in updating SVA(q), SVA(p) */ /* recompute SVA(q), SVA(p). */ /* Computing 2nd power */ d__1 = sva[q] / aaqq; if (d__1 * d__1 <= rooteps) { if (aaqq < rootbig && aaqq > rootsfmin) { sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + 1], &c__1) * d__[q]; } else { t = 0.; aaqq = 0.; _starpu_dlassq_(m, &a[q * a_dim1 + 1], & c__1, &t, &aaqq); sva[q] = t * sqrt(aaqq) * d__[q]; } } if (aapp / aapp0 <= rooteps) { if (aapp < rootbig && aapp > rootsfmin) { aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * d__[p]; } else { t = 0.; aapp = 0.; _starpu_dlassq_(m, &a[p * a_dim1 + 1], & c__1, &t, &aapp); aapp = t * sqrt(aapp) * d__[p]; } sva[p] = aapp; } } else { /* A(:,p) and A(:,q) already numerically orthogonal */ if (ir1 == 0) { ++notrot; } ++pskipped; } } else { /* A(:,q) is zero column */ if (ir1 == 0) { ++notrot; } ++pskipped; } if (i__ <= swband && pskipped > rowskip) { if (ir1 == 0) { aapp = -aapp; } notrot = 0; goto L2103; } /* L2002: */ } /* END q-LOOP */ L2103: /* bailed out of q-loop */ sva[p] = aapp; } else { sva[p] = aapp; if (ir1 == 0 && aapp == 0.) { /* Computing MIN */ i__5 = igl + kbl - 1; notrot = notrot + min(i__5,*n) - p; } } /* L2001: */ } /* end of the p-loop */ /* end of doing the block ( ibr, ibr ) */ /* L1002: */ } /* end of ir1-loop */ /* ........................................................ */ /* ... go to the off diagonal blocks */ igl = (ibr - 1) * kbl + 1; i__3 = nbl; for (jbc = ibr + 1; jbc <= i__3; ++jbc) { jgl = (jbc - 1) * kbl + 1; /* doing the block at ( ibr, jbc ) */ ijblsk = 0; /* Computing MIN */ i__5 = igl + kbl - 1; i__4 = min(i__5,*n); for (p = igl; p <= i__4; ++p) { aapp = sva[p]; if (aapp > 0.) { pskipped = 0; /* Computing MIN */ i__6 = jgl + kbl - 1; i__5 = min(i__6,*n); for (q = jgl; q <= i__5; ++q) { aaqq = sva[q]; if (aaqq > 0.) { aapp0 = aapp; /* -#- M x 2 Jacobi SVD -#- */ /* -#- Safe Gram matrix computation -#- */ if (aaqq >= 1.) { if (aapp >= aaqq) { rotok = small * aapp <= aaqq; } else { rotok = small * aaqq <= aapp; } if (aapp < big / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * d__[p] * d__[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, & d__[p], m, &c__1, &work[1], lda, &ierr); aapq = _starpu_ddot_(m, &work[1], &c__1, &a[q * a_dim1 + 1], &c__1) * d__[q] / aaqq; } } else { if (aapp >= aaqq) { rotok = aapp <= aaqq / small; } else { rotok = aaqq <= aapp / small; } if (aapp > small / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * d__[p] * d__[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & d__[q], m, &c__1, &work[1], lda, &ierr); aapq = _starpu_ddot_(m, &work[1], &c__1, &a[p * a_dim1 + 1], &c__1) * d__[p] / aapp; } } /* Computing MAX */ d__1 = mxaapq, d__2 = abs(aapq); mxaapq = max(d__1,d__2); /* TO rotate or NOT to rotate, THAT is the question ... */ if (abs(aapq) > *tol) { notrot = 0; /* ROTATED = ROTATED + 1 */ pskipped = 0; ++iswrot; if (rotok) { aqoap = aaqq / aapp; apoaq = aapp / aaqq; theta = (d__1 = aqoap - apoaq, abs( d__1)) * -.5 / aapq; if (aaqq > aapp0) { theta = -theta; } if (abs(theta) > bigtheta) { t = .5 / theta; fastr[2] = t * d__[p] / d__[q]; fastr[3] = -t * d__[q] / d__[p]; _starpu_drotm_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &c__1, fastr); } /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); /* Computing MAX */ d__1 = 0., d__2 = 1. - t * aqoap * aapq; aapp *= sqrt((max(d__1,d__2))); /* Computing MAX */ d__1 = mxsinj, d__2 = abs(t); mxsinj = max(d__1,d__2); } else { /* .. choose correct signum for THETA and rotate */ thsign = -d_sign(&c_b42, &aapq); if (aaqq > aapp0) { thsign = -thsign; } t = 1. / (theta + thsign * sqrt( theta * theta + 1.)); cs = sqrt(1. / (t * t + 1.)); sn = t * cs; /* Computing MAX */ d__1 = mxsinj, d__2 = abs(sn); mxsinj = max(d__1,d__2); /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); aapp *= sqrt(1. - t * aqoap * aapq); apoaq = d__[p] / d__[q]; aqoap = d__[q] / d__[p]; if (d__[p] >= 1.) { if (d__[q] >= 1.) { fastr[2] = t * apoaq; fastr[3] = -t * aqoap; d__[p] *= cs; d__[q] *= cs; _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ q * v_dim1 + 1], &c__1, fastr); } } else { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); } d__[p] *= cs; d__[q] /= cs; } } else { if (d__[q] >= 1.) { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); } d__[p] /= cs; d__[q] *= cs; } else { if (d__[p] >= d__[q]) { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); d__[p] *= cs; d__[q] /= cs; if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); } } else { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); d__[p] /= cs; d__[q] *= cs; if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); } } } } } } else { if (aapp > aaqq) { _starpu_dcopy_(m, &a[p * a_dim1 + 1], & c__1, &work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, &c_b42, m, &c__1, &work[1] , lda, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, &c_b42, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); temp1 = -aapq * d__[p] / d__[q]; _starpu_daxpy_(m, &temp1, &work[1], &c__1, &a[q * a_dim1 + 1], & c__1); _starpu_dlascl_("G", &c__0, &c__0, &c_b42, &aaqq, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); /* Computing MAX */ d__1 = 0., d__2 = 1. - aapq * aapq; sva[q] = aaqq * sqrt((max(d__1, d__2))); mxsinj = max(mxsinj,*sfmin); } else { _starpu_dcopy_(m, &a[q * a_dim1 + 1], & c__1, &work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, &c_b42, m, &c__1, &work[1] , lda, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &aapp, &c_b42, m, &c__1, &a[p * a_dim1 + 1], lda, &ierr); temp1 = -aapq * d__[q] / d__[p]; _starpu_daxpy_(m, &temp1, &work[1], &c__1, &a[p * a_dim1 + 1], & c__1); _starpu_dlascl_("G", &c__0, &c__0, &c_b42, &aapp, m, &c__1, &a[p * a_dim1 + 1], lda, &ierr); /* Computing MAX */ d__1 = 0., d__2 = 1. - aapq * aapq; sva[p] = aapp * sqrt((max(d__1, d__2))); mxsinj = max(mxsinj,*sfmin); } } /* END IF ROTOK THEN ... ELSE */ /* In the case of cancellation in updating SVA(q) */ /* .. recompute SVA(q) */ /* Computing 2nd power */ d__1 = sva[q] / aaqq; if (d__1 * d__1 <= rooteps) { if (aaqq < rootbig && aaqq > rootsfmin) { sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + 1], &c__1) * d__[q]; } else { t = 0.; aaqq = 0.; _starpu_dlassq_(m, &a[q * a_dim1 + 1], & c__1, &t, &aaqq); sva[q] = t * sqrt(aaqq) * d__[q]; } } /* Computing 2nd power */ d__1 = aapp / aapp0; if (d__1 * d__1 <= rooteps) { if (aapp < rootbig && aapp > rootsfmin) { aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * d__[p]; } else { t = 0.; aapp = 0.; _starpu_dlassq_(m, &a[p * a_dim1 + 1], & c__1, &t, &aapp); aapp = t * sqrt(aapp) * d__[p]; } sva[p] = aapp; } /* end of OK rotation */ } else { ++notrot; ++pskipped; ++ijblsk; } } else { ++notrot; ++pskipped; ++ijblsk; } if (i__ <= swband && ijblsk >= blskip) { sva[p] = aapp; notrot = 0; goto L2011; } if (i__ <= swband && pskipped > rowskip) { aapp = -aapp; notrot = 0; goto L2203; } /* L2200: */ } /* end of the q-loop */ L2203: sva[p] = aapp; } else { if (aapp == 0.) { /* Computing MIN */ i__5 = jgl + kbl - 1; notrot = notrot + min(i__5,*n) - jgl + 1; } if (aapp < 0.) { notrot = 0; } } /* L2100: */ } /* end of the p-loop */ /* L2010: */ } /* end of the jbc-loop */ L2011: /* 2011 bailed out of the jbc-loop */ /* Computing MIN */ i__4 = igl + kbl - 1; i__3 = min(i__4,*n); for (p = igl; p <= i__3; ++p) { sva[p] = (d__1 = sva[p], abs(d__1)); /* L2012: */ } /* L2000: */ } /* 2000 :: end of the ibr-loop */ /* .. update SVA(N) */ if (sva[*n] < rootbig && sva[*n] > rootsfmin) { sva[*n] = _starpu_dnrm2_(m, &a[*n * a_dim1 + 1], &c__1) * d__[*n]; } else { t = 0.; aapp = 0.; _starpu_dlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); sva[*n] = t * sqrt(aapp) * d__[*n]; } /* Additional steering devices */ if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { swband = i__; } if (i__ > swband + 1 && mxaapq < (doublereal) (*n) * *tol && ( doublereal) (*n) * mxaapq * mxsinj < *tol) { goto L1994; } if (notrot >= emptsw) { goto L1994; } /* L1993: */ } /* end i=1:NSWEEP loop */ /* #:) Reaching this point means that the procedure has comleted the given */ /* number of iterations. */ *info = *nsweep - 1; goto L1995; L1994: /* #:) Reaching this point means that during the i-th sweep all pivots were */ /* below the given tolerance, causing early exit. */ *info = 0; /* #:) INFO = 0 confirms successful iterations. */ L1995: /* Sort the vector D. */ i__1 = *n - 1; for (p = 1; p <= i__1; ++p) { i__2 = *n - p + 1; q = _starpu_idamax_(&i__2, &sva[p], &c__1) + p - 1; if (p != q) { temp1 = sva[p]; sva[p] = sva[q]; sva[q] = temp1; temp1 = d__[p]; d__[p] = d__[q]; d__[q] = temp1; _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); if (rsvec) { _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); } } /* L5991: */ } return 0; /* .. */ /* .. END OF DGSVJ0 */ /* .. */ } /* _starpu_dgsvj0_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgsvj1.c000066400000000000000000000600271507764646700205770ustar00rootroot00000000000000/* dgsvj1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static doublereal c_b35 = 1.; /* Subroutine */ int _starpu_dgsvj1_(char *jobv, integer *m, integer *n, integer *n1, doublereal *a, integer *lda, doublereal *d__, doublereal *sva, integer *mv, doublereal *v, integer *ldv, doublereal *eps, doublereal *sfmin, doublereal *tol, integer *nsweep, doublereal *work, integer * lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ doublereal bigtheta; integer pskipped, i__, p, q; doublereal t, rootsfmin, cs, sn; integer jbc; doublereal big; integer kbl, igl, ibr, jgl, mvl, nblc; doublereal aapp, aapq, aaqq; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer nblr, ierr; doublereal aapp0; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); doublereal temp1, large, apoaq, aqoap; extern logical _starpu_lsame_(char *, char *); doublereal theta, small; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal fastr[5]; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical applv, rsvec; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_drotm_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *); logical rotok; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer ijblsk, swband, blskip; doublereal mxaapq; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); doublereal thsign, mxsinj; integer emptsw, notrot, iswrot; doublereal rootbig, rooteps; integer rowskip; doublereal roottol; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Zlatko Drmac of the University of Zagreb and -- */ /* -- Kresimir Veselic of the Fernuniversitaet Hagen -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* This routine is also part of SIGMA (version 1.23, October 23. 2008.) */ /* SIGMA is a library of algorithms for highly accurate algorithms for */ /* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the */ /* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0. */ /* -#- Scalar Arguments -#- */ /* -#- Array Arguments -#- */ /* .. */ /* Purpose */ /* ~~~~~~~ */ /* DGSVJ1 is called from SGESVJ as a pre-processor and that is its main */ /* purpose. It applies Jacobi rotations in the same way as SGESVJ does, but */ /* it targets only particular pivots and it does not check convergence */ /* (stopping criterion). Few tunning parameters (marked by [TP]) are */ /* available for the implementer. */ /* Further details */ /* ~~~~~~~~~~~~~~~ */ /* DGSVJ1 applies few sweeps of Jacobi rotations in the column space of */ /* the input M-by-N matrix A. The pivot pairs are taken from the (1,2) */ /* off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The */ /* block-entries (tiles) of the (1,2) off-diagonal block are marked by the */ /* [x]'s in the following scheme: */ /* | * * * [x] [x] [x]| */ /* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ /* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ /* |[x] [x] [x] * * * | */ /* |[x] [x] [x] * * * | */ /* |[x] [x] [x] * * * | */ /* In terms of the columns of A, the first N1 columns are rotated 'against' */ /* the remaining N-N1 columns, trying to increase the angle between the */ /* corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is */ /* tiled using quadratic tiles of side KBL. Here, KBL is a tunning parmeter. */ /* The number of sweeps is given in NSWEEP and the orthogonality threshold */ /* is given in TOL. */ /* Contributors */ /* ~~~~~~~~~~~~ */ /* Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ /* Arguments */ /* ~~~~~~~~~ */ /* JOBV (input) CHARACTER*1 */ /* Specifies whether the output from this procedure is used */ /* to compute the matrix V: */ /* = 'V': the product of the Jacobi rotations is accumulated */ /* by postmulyiplying the N-by-N array V. */ /* (See the description of V.) */ /* = 'A': the product of the Jacobi rotations is accumulated */ /* by postmulyiplying the MV-by-N array V. */ /* (See the descriptions of MV and V.) */ /* = 'N': the Jacobi rotations are not accumulated. */ /* M (input) INTEGER */ /* The number of rows of the input matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the input matrix A. */ /* M >= N >= 0. */ /* N1 (input) INTEGER */ /* N1 specifies the 2 x 2 block partition, the first N1 columns are */ /* rotated 'against' the remaining N-N1 columns of A. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, M-by-N matrix A, such that A*diag(D) represents */ /* the input matrix. */ /* On exit, */ /* A_onexit * D_onexit represents the input matrix A*diag(D) */ /* post-multiplied by a sequence of Jacobi rotations, where the */ /* rotation threshold and the total number of sweeps are given in */ /* TOL and NSWEEP, respectively. */ /* (See the descriptions of N1, D, TOL and NSWEEP.) */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* D (input/workspace/output) REAL array, dimension (N) */ /* The array D accumulates the scaling factors from the fast scaled */ /* Jacobi rotations. */ /* On entry, A*diag(D) represents the input matrix. */ /* On exit, A_onexit*diag(D_onexit) represents the input matrix */ /* post-multiplied by a sequence of Jacobi rotations, where the */ /* rotation threshold and the total number of sweeps are given in */ /* TOL and NSWEEP, respectively. */ /* (See the descriptions of N1, A, TOL and NSWEEP.) */ /* SVA (input/workspace/output) REAL array, dimension (N) */ /* On entry, SVA contains the Euclidean norms of the columns of */ /* the matrix A*diag(D). */ /* On exit, SVA contains the Euclidean norms of the columns of */ /* the matrix onexit*diag(D_onexit). */ /* MV (input) INTEGER */ /* If JOBV .EQ. 'A', then MV rows of V are post-multipled by a */ /* sequence of Jacobi rotations. */ /* If JOBV = 'N', then MV is not referenced. */ /* V (input/output) REAL array, dimension (LDV,N) */ /* If JOBV .EQ. 'V' then N rows of V are post-multipled by a */ /* sequence of Jacobi rotations. */ /* If JOBV .EQ. 'A' then MV rows of V are post-multipled by a */ /* sequence of Jacobi rotations. */ /* If JOBV = 'N', then V is not referenced. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V, LDV >= 1. */ /* If JOBV = 'V', LDV .GE. N. */ /* If JOBV = 'A', LDV .GE. MV. */ /* EPS (input) INTEGER */ /* EPS = SLAMCH('Epsilon') */ /* SFMIN (input) INTEGER */ /* SFMIN = SLAMCH('Safe Minimum') */ /* TOL (input) REAL */ /* TOL is the threshold for Jacobi rotations. For a pair */ /* A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ /* applied only if DABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. */ /* NSWEEP (input) INTEGER */ /* NSWEEP is the number of sweeps of Jacobi rotations to be */ /* performed. */ /* WORK (workspace) REAL array, dimension LWORK. */ /* LWORK (input) INTEGER */ /* LWORK is the dimension of WORK. LWORK .GE. M. */ /* INFO (output) INTEGER */ /* = 0 : successful exit. */ /* < 0 : if INFO = -i, then the i-th argument had an illegal value */ /* -#- Local Parameters -#- */ /* -#- Local Scalars -#- */ /* Local Arrays */ /* Intrinsic Functions */ /* External Functions */ /* External Subroutines */ /* Parameter adjustments */ --sva; --d__; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --work; /* Function Body */ applv = _starpu_lsame_(jobv, "A"); rsvec = _starpu_lsame_(jobv, "V"); if (! (rsvec || applv || _starpu_lsame_(jobv, "N"))) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0 || *n > *m) { *info = -3; } else if (*n1 < 0) { *info = -4; } else if (*lda < *m) { *info = -6; } else if (*mv < 0) { *info = -9; } else if (*ldv < *m) { *info = -11; } else if (*tol <= *eps) { *info = -14; } else if (*nsweep < 0) { *info = -15; } else if (*lwork < *m) { *info = -17; } else { *info = 0; } /* #:( */ if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGSVJ1", &i__1); return 0; } if (rsvec) { mvl = *n; } else if (applv) { mvl = *mv; } rsvec = rsvec || applv; rooteps = sqrt(*eps); rootsfmin = sqrt(*sfmin); small = *sfmin / *eps; big = 1. / *sfmin; rootbig = 1. / rootsfmin; large = big / sqrt((doublereal) (*m * *n)); bigtheta = 1. / rooteps; roottol = sqrt(*tol); /* -#- Initialize the right singular vector matrix -#- */ /* RSVEC = LSAME( JOBV, 'Y' ) */ emptsw = *n1 * (*n - *n1); notrot = 0; fastr[0] = 0.; /* -#- Row-cyclic pivot strategy with de Rijk's pivoting -#- */ kbl = min(8,*n); nblr = *n1 / kbl; if (nblr * kbl != *n1) { ++nblr; } /* .. the tiling is nblr-by-nblc [tiles] */ nblc = (*n - *n1) / kbl; if (nblc * kbl != *n - *n1) { ++nblc; } /* Computing 2nd power */ i__1 = kbl; blskip = i__1 * i__1 + 1; /* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ rowskip = min(5,kbl); /* [TP] ROWSKIP is a tuning parameter. */ swband = 0; /* [TP] SWBAND is a tuning parameter. It is meaningful and effective */ /* if SGESVJ is used as a computational routine in the preconditioned */ /* Jacobi SVD algorithm SGESVJ. */ /* | * * * [x] [x] [x]| */ /* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ /* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ /* |[x] [x] [x] * * * | */ /* |[x] [x] [x] * * * | */ /* |[x] [x] [x] * * * | */ i__1 = *nsweep; for (i__ = 1; i__ <= i__1; ++i__) { /* .. go go go ... */ mxaapq = 0.; mxsinj = 0.; iswrot = 0; notrot = 0; pskipped = 0; i__2 = nblr; for (ibr = 1; ibr <= i__2; ++ibr) { igl = (ibr - 1) * kbl + 1; /* ........................................................ */ /* ... go to the off diagonal blocks */ igl = (ibr - 1) * kbl + 1; i__3 = nblc; for (jbc = 1; jbc <= i__3; ++jbc) { jgl = *n1 + (jbc - 1) * kbl + 1; /* doing the block at ( ibr, jbc ) */ ijblsk = 0; /* Computing MIN */ i__5 = igl + kbl - 1; i__4 = min(i__5,*n1); for (p = igl; p <= i__4; ++p) { aapp = sva[p]; if (aapp > 0.) { pskipped = 0; /* Computing MIN */ i__6 = jgl + kbl - 1; i__5 = min(i__6,*n); for (q = jgl; q <= i__5; ++q) { aaqq = sva[q]; if (aaqq > 0.) { aapp0 = aapp; /* -#- M x 2 Jacobi SVD -#- */ /* -#- Safe Gram matrix computation -#- */ if (aaqq >= 1.) { if (aapp >= aaqq) { rotok = small * aapp <= aaqq; } else { rotok = small * aaqq <= aapp; } if (aapp < big / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * d__[p] * d__[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, & work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, & d__[p], m, &c__1, &work[1], lda, &ierr); aapq = _starpu_ddot_(m, &work[1], &c__1, &a[q * a_dim1 + 1], &c__1) * d__[q] / aaqq; } } else { if (aapp >= aaqq) { rotok = aapp <= aaqq / small; } else { rotok = aaqq <= aapp / small; } if (aapp > small / aaqq) { aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], & c__1) * d__[p] * d__[q] / aaqq / aapp; } else { _starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, & work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, & d__[q], m, &c__1, &work[1], lda, &ierr); aapq = _starpu_ddot_(m, &work[1], &c__1, &a[p * a_dim1 + 1], &c__1) * d__[p] / aapp; } } /* Computing MAX */ d__1 = mxaapq, d__2 = abs(aapq); mxaapq = max(d__1,d__2); /* TO rotate or NOT to rotate, THAT is the question ... */ if (abs(aapq) > *tol) { notrot = 0; /* ROTATED = ROTATED + 1 */ pskipped = 0; ++iswrot; if (rotok) { aqoap = aaqq / aapp; apoaq = aapp / aaqq; theta = (d__1 = aqoap - apoaq, abs( d__1)) * -.5 / aapq; if (aaqq > aapp0) { theta = -theta; } if (abs(theta) > bigtheta) { t = .5 / theta; fastr[2] = t * d__[p] / d__[q]; fastr[3] = -t * d__[q] / d__[p]; _starpu_drotm_(m, &a[p * a_dim1 + 1], & c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &c__1, fastr); } /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); /* Computing MAX */ d__1 = 0., d__2 = 1. - t * aqoap * aapq; aapp *= sqrt((max(d__1,d__2))); /* Computing MAX */ d__1 = mxsinj, d__2 = abs(t); mxsinj = max(d__1,d__2); } else { /* .. choose correct signum for THETA and rotate */ thsign = -d_sign(&c_b35, &aapq); if (aaqq > aapp0) { thsign = -thsign; } t = 1. / (theta + thsign * sqrt( theta * theta + 1.)); cs = sqrt(1. / (t * t + 1.)); sn = t * cs; /* Computing MAX */ d__1 = mxsinj, d__2 = abs(sn); mxsinj = max(d__1,d__2); /* Computing MAX */ d__1 = 0., d__2 = t * apoaq * aapq + 1.; sva[q] = aaqq * sqrt((max(d__1, d__2))); aapp *= sqrt(1. - t * aqoap * aapq); apoaq = d__[p] / d__[q]; aqoap = d__[q] / d__[p]; if (d__[p] >= 1.) { if (d__[q] >= 1.) { fastr[2] = t * apoaq; fastr[3] = -t * aqoap; d__[p] *= cs; d__[q] *= cs; _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1, fastr); if (rsvec) { _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ q * v_dim1 + 1], &c__1, fastr); } } else { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); } d__[p] *= cs; d__[q] /= cs; } } else { if (d__[q] >= 1.) { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ p * a_dim1 + 1], &c__1); if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & c__1, &v[q * v_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & c__1, &v[p * v_dim1 + 1], &c__1); } d__[p] /= cs; d__[q] *= cs; } else { if (d__[p] >= d__[q]) { d__1 = -t * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); d__[p] *= cs; d__[q] /= cs; if (rsvec) { d__1 = -t * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); d__1 = cs * sn * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); } } else { d__1 = t * apoaq; _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[p * a_dim1 + 1], &c__1); d__[p] /= cs; d__[q] *= cs; if (rsvec) { d__1 = t * apoaq; _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); d__1 = -cs * sn * aqoap; _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & c__1); } } } } } } else { if (aapp > aaqq) { _starpu_dcopy_(m, &a[p * a_dim1 + 1], & c__1, &work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aapp, &c_b35, m, &c__1, &work[1] , lda, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, &c_b35, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); temp1 = -aapq * d__[p] / d__[q]; _starpu_daxpy_(m, &temp1, &work[1], &c__1, &a[q * a_dim1 + 1], & c__1); _starpu_dlascl_("G", &c__0, &c__0, &c_b35, &aaqq, m, &c__1, &a[q * a_dim1 + 1], lda, &ierr); /* Computing MAX */ d__1 = 0., d__2 = 1. - aapq * aapq; sva[q] = aaqq * sqrt((max(d__1, d__2))); mxsinj = max(mxsinj,*sfmin); } else { _starpu_dcopy_(m, &a[q * a_dim1 + 1], & c__1, &work[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &aaqq, &c_b35, m, &c__1, &work[1] , lda, &ierr); _starpu_dlascl_("G", &c__0, &c__0, &aapp, &c_b35, m, &c__1, &a[p * a_dim1 + 1], lda, &ierr); temp1 = -aapq * d__[q] / d__[p]; _starpu_daxpy_(m, &temp1, &work[1], &c__1, &a[p * a_dim1 + 1], & c__1); _starpu_dlascl_("G", &c__0, &c__0, &c_b35, &aapp, m, &c__1, &a[p * a_dim1 + 1], lda, &ierr); /* Computing MAX */ d__1 = 0., d__2 = 1. - aapq * aapq; sva[p] = aapp * sqrt((max(d__1, d__2))); mxsinj = max(mxsinj,*sfmin); } } /* END IF ROTOK THEN ... ELSE */ /* In the case of cancellation in updating SVA(q) */ /* .. recompute SVA(q) */ /* Computing 2nd power */ d__1 = sva[q] / aaqq; if (d__1 * d__1 <= rooteps) { if (aaqq < rootbig && aaqq > rootsfmin) { sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 + 1], &c__1) * d__[q]; } else { t = 0.; aaqq = 0.; _starpu_dlassq_(m, &a[q * a_dim1 + 1], & c__1, &t, &aaqq); sva[q] = t * sqrt(aaqq) * d__[q]; } } /* Computing 2nd power */ d__1 = aapp / aapp0; if (d__1 * d__1 <= rooteps) { if (aapp < rootbig && aapp > rootsfmin) { aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * d__[p]; } else { t = 0.; aapp = 0.; _starpu_dlassq_(m, &a[p * a_dim1 + 1], & c__1, &t, &aapp); aapp = t * sqrt(aapp) * d__[p]; } sva[p] = aapp; } /* end of OK rotation */ } else { ++notrot; /* SKIPPED = SKIPPED + 1 */ ++pskipped; ++ijblsk; } } else { ++notrot; ++pskipped; ++ijblsk; } /* IF ( NOTROT .GE. EMPTSW ) GO TO 2011 */ if (i__ <= swband && ijblsk >= blskip) { sva[p] = aapp; notrot = 0; goto L2011; } if (i__ <= swband && pskipped > rowskip) { aapp = -aapp; notrot = 0; goto L2203; } /* L2200: */ } /* end of the q-loop */ L2203: sva[p] = aapp; } else { if (aapp == 0.) { /* Computing MIN */ i__5 = jgl + kbl - 1; notrot = notrot + min(i__5,*n) - jgl + 1; } if (aapp < 0.) { notrot = 0; } /* ** IF ( NOTROT .GE. EMPTSW ) GO TO 2011 */ } /* L2100: */ } /* end of the p-loop */ /* L2010: */ } /* end of the jbc-loop */ L2011: /* 2011 bailed out of the jbc-loop */ /* Computing MIN */ i__4 = igl + kbl - 1; i__3 = min(i__4,*n); for (p = igl; p <= i__3; ++p) { sva[p] = (d__1 = sva[p], abs(d__1)); /* L2012: */ } /* ** IF ( NOTROT .GE. EMPTSW ) GO TO 1994 */ /* L2000: */ } /* 2000 :: end of the ibr-loop */ /* .. update SVA(N) */ if (sva[*n] < rootbig && sva[*n] > rootsfmin) { sva[*n] = _starpu_dnrm2_(m, &a[*n * a_dim1 + 1], &c__1) * d__[*n]; } else { t = 0.; aapp = 0.; _starpu_dlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); sva[*n] = t * sqrt(aapp) * d__[*n]; } /* Additional steering devices */ if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { swband = i__; } if (i__ > swband + 1 && mxaapq < (doublereal) (*n) * *tol && ( doublereal) (*n) * mxaapq * mxsinj < *tol) { goto L1994; } if (notrot >= emptsw) { goto L1994; } /* L1993: */ } /* end i=1:NSWEEP loop */ /* #:) Reaching this point means that the procedure has completed the given */ /* number of sweeps. */ *info = *nsweep - 1; goto L1995; L1994: /* #:) Reaching this point means that during the i-th sweep all pivots were */ /* below the given threshold, causing early exit. */ *info = 0; /* #:) INFO = 0 confirms successful iterations. */ L1995: /* Sort the vector D */ i__1 = *n - 1; for (p = 1; p <= i__1; ++p) { i__2 = *n - p + 1; q = _starpu_idamax_(&i__2, &sva[p], &c__1) + p - 1; if (p != q) { temp1 = sva[p]; sva[p] = sva[q]; sva[q] = temp1; temp1 = d__[p]; d__[p] = d__[q]; d__[q] = temp1; _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); if (rsvec) { _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & c__1); } } /* L5991: */ } return 0; /* .. */ /* .. END OF DGSVJ1 */ /* .. */ } /* _starpu_dgsvj1_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgtcon.c000066400000000000000000000135431507764646700206600ustar00rootroot00000000000000/* dgtcon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgtcon_(char *norm, integer *n, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, kase, kase1; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); doublereal ainvnm; logical onenrm; extern /* Subroutine */ int _starpu_dgttrs_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGTCON estimates the reciprocal of the condition number of a real */ /* tridiagonal matrix A using the LU factorization as computed by */ /* DGTTRF. */ /* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ /* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies whether the 1-norm condition number or the */ /* infinity-norm condition number is required: */ /* = '1' or 'O': 1-norm; */ /* = 'I': Infinity-norm. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* DL (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) multipliers that define the matrix L from the */ /* LU factorization of A as computed by DGTTRF. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the upper triangular matrix U from */ /* the LU factorization of A. */ /* DU (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) elements of the first superdiagonal of U. */ /* DU2 (input) DOUBLE PRECISION array, dimension (N-2) */ /* The (n-2) elements of the second superdiagonal of U. */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices; for 1 <= i <= n, row i of the matrix was */ /* interchanged with row IPIV(i). IPIV(i) will always be either */ /* i or i+1; IPIV(i) = i indicates a row interchange was not */ /* required. */ /* ANORM (input) DOUBLE PRECISION */ /* If NORM = '1' or 'O', the 1-norm of the original matrix A. */ /* If NORM = 'I', the infinity-norm of the original matrix A. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ /* estimate of the 1-norm of inv(A) computed in this routine. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments. */ /* Parameter adjustments */ --iwork; --work; --ipiv; --du2; --du; --d__; --dl; /* Function Body */ *info = 0; onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); if (! onenrm && ! _starpu_lsame_(norm, "I")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*anorm < 0.) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGTCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm == 0.) { return 0; } /* Check that D(1:N) is non-zero. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] == 0.) { return 0; } /* L10: */ } ainvnm = 0.; if (onenrm) { kase1 = 1; } else { kase1 = 2; } kase = 0; L20: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == kase1) { /* Multiply by inv(U)*inv(L). */ _starpu_dgttrs_("No transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1] , &ipiv[1], &work[1], n, info); } else { /* Multiply by inv(L')*inv(U'). */ _starpu_dgttrs_("Transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1], & ipiv[1], &work[1], n, info); } goto L20; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } return 0; /* End of DGTCON */ } /* _starpu_dgtcon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgtrfs.c000066400000000000000000000337411507764646700206750ustar00rootroot00000000000000/* dgtrfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b18 = -1.; static doublereal c_b19 = 1.; /* Subroutine */ int _starpu_dgtrfs_(char *trans, integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Local variables */ integer i__, j; doublereal s; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer count; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlagtm_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; char transn[1]; extern /* Subroutine */ int _starpu_dgttrs_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); char transt[1]; doublereal lstres; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGTRFS improves the computed solution to a system of linear */ /* equations when the coefficient matrix is tridiagonal, and provides */ /* error bounds and backward error estimates for the solution. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* DL (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) subdiagonal elements of A. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The diagonal elements of A. */ /* DU (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) superdiagonal elements of A. */ /* DLF (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) multipliers that define the matrix L from the */ /* LU factorization of A as computed by DGTTRF. */ /* DF (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the upper triangular matrix U from */ /* the LU factorization of A. */ /* DUF (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) elements of the first superdiagonal of U. */ /* DU2 (input) DOUBLE PRECISION array, dimension (N-2) */ /* The (n-2) elements of the second superdiagonal of U. */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices; for 1 <= i <= n, row i of the matrix was */ /* interchanged with row IPIV(i). IPIV(i) will always be either */ /* i or i+1; IPIV(i) = i indicates a row interchange was not */ /* required. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DGTTRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Internal Parameters */ /* =================== */ /* ITMAX is the maximum number of steps of iterative refinement. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --dl; --d__; --du; --dlf; --df; --duf; --du2; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; notran = _starpu_lsame_(trans, "N"); if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -13; } else if (*ldx < max(1,*n)) { *info = -15; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGTRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transn = 'N'; *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transn = 'T'; *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = 4; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, */ /* where op(A) = A, A**T, or A**H, depending on TRANS. */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dlagtm_(trans, n, &c__1, &c_b18, &dl[1], &d__[1], &du[1], &x[j * x_dim1 + 1], ldx, &c_b19, &work[*n + 1], n); /* Compute abs(op(A))*abs(x) + abs(b) for use in the backward */ /* error bound. */ if (notran) { if (*n == 1) { work[1] = (d__1 = b[j * b_dim1 + 1], abs(d__1)) + (d__2 = d__[ 1] * x[j * x_dim1 + 1], abs(d__2)); } else { work[1] = (d__1 = b[j * b_dim1 + 1], abs(d__1)) + (d__2 = d__[ 1] * x[j * x_dim1 + 1], abs(d__2)) + (d__3 = du[1] * x[j * x_dim1 + 2], abs(d__3)); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)) + ( d__2 = dl[i__ - 1] * x[i__ - 1 + j * x_dim1], abs( d__2)) + (d__3 = d__[i__] * x[i__ + j * x_dim1], abs(d__3)) + (d__4 = du[i__] * x[i__ + 1 + j * x_dim1], abs(d__4)); /* L30: */ } work[*n] = (d__1 = b[*n + j * b_dim1], abs(d__1)) + (d__2 = dl[*n - 1] * x[*n - 1 + j * x_dim1], abs(d__2)) + ( d__3 = d__[*n] * x[*n + j * x_dim1], abs(d__3)); } } else { if (*n == 1) { work[1] = (d__1 = b[j * b_dim1 + 1], abs(d__1)) + (d__2 = d__[ 1] * x[j * x_dim1 + 1], abs(d__2)); } else { work[1] = (d__1 = b[j * b_dim1 + 1], abs(d__1)) + (d__2 = d__[ 1] * x[j * x_dim1 + 1], abs(d__2)) + (d__3 = dl[1] * x[j * x_dim1 + 2], abs(d__3)); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)) + ( d__2 = du[i__ - 1] * x[i__ - 1 + j * x_dim1], abs( d__2)) + (d__3 = d__[i__] * x[i__ + j * x_dim1], abs(d__3)) + (d__4 = dl[i__] * x[i__ + 1 + j * x_dim1], abs(d__4)); /* L40: */ } work[*n] = (d__1 = b[*n + j * b_dim1], abs(d__1)) + (d__2 = du[*n - 1] * x[*n - 1 + j * x_dim1], abs(d__2)) + ( d__3 = d__[*n] * x[*n + j * x_dim1], abs(d__3)); } } /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L50: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if */ /* 1) The residual BERR(J) is larger than machine epsilon, and */ /* 2) BERR(J) decreased by at least a factor of 2 during the */ /* last iteration, and */ /* 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ _starpu_dgttrs_(trans, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[ 1], &work[*n + 1], n, info); _starpu_daxpy_(n, &c_b19, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) ; lstres = berr[j]; ++count; goto L20; } /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(op(A)))* */ /* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(op(A)) is the inverse of op(A) */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(op(A)) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L60: */ } kase = 0; L70: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T). */ _starpu_dgttrs_(transt, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & ipiv[1], &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L80: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L90: */ } _starpu_dgttrs_(transn, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & ipiv[1], &work[*n + 1], n, info); } goto L70; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L100: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L110: */ } return 0; /* End of DGTRFS */ } /* _starpu_dgtrfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgtsv.c000066400000000000000000000200771507764646700205310ustar00rootroot00000000000000/* dgtsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgtsv_(integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ integer i__, j; doublereal fact, temp; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGTSV solves the equation */ /* A*X = B, */ /* where A is an n by n tridiagonal matrix, by Gaussian elimination with */ /* partial pivoting. */ /* Note that the equation A'*X = B may be solved by interchanging the */ /* order of the arguments DU and DL. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* DL (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, DL must contain the (n-1) sub-diagonal elements of */ /* A. */ /* On exit, DL is overwritten by the (n-2) elements of the */ /* second super-diagonal of the upper triangular matrix U from */ /* the LU factorization of A, in DL(1), ..., DL(n-2). */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, D must contain the diagonal elements of A. */ /* On exit, D is overwritten by the n diagonal elements of U. */ /* DU (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, DU must contain the (n-1) super-diagonal elements */ /* of A. */ /* On exit, DU is overwritten by the (n-1) elements of the first */ /* super-diagonal of U. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N by NRHS matrix of right hand side matrix B. */ /* On exit, if INFO = 0, the N by NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero, and the solution */ /* has not been computed. The factorization has not been */ /* completed unless i = N. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --dl; --d__; --du; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGTSV ", &i__1); return 0; } if (*n == 0) { return 0; } if (*nrhs == 1) { i__1 = *n - 2; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { /* No row interchange required */ if (d__[i__] != 0.) { fact = dl[i__] / d__[i__]; d__[i__ + 1] -= fact * du[i__]; b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1]; } else { *info = i__; return 0; } dl[i__] = 0.; } else { /* Interchange rows I and I+1 */ fact = d__[i__] / dl[i__]; d__[i__] = dl[i__]; temp = d__[i__ + 1]; d__[i__ + 1] = du[i__] - fact * temp; dl[i__] = du[i__ + 1]; du[i__ + 1] = -fact * dl[i__]; du[i__] = temp; temp = b[i__ + b_dim1]; b[i__ + b_dim1] = b[i__ + 1 + b_dim1]; b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1]; } /* L10: */ } if (*n > 1) { i__ = *n - 1; if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { if (d__[i__] != 0.) { fact = dl[i__] / d__[i__]; d__[i__ + 1] -= fact * du[i__]; b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1]; } else { *info = i__; return 0; } } else { fact = d__[i__] / dl[i__]; d__[i__] = dl[i__]; temp = d__[i__ + 1]; d__[i__ + 1] = du[i__] - fact * temp; du[i__] = temp; temp = b[i__ + b_dim1]; b[i__ + b_dim1] = b[i__ + 1 + b_dim1]; b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1]; } } if (d__[*n] == 0.) { *info = *n; return 0; } } else { i__1 = *n - 2; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { /* No row interchange required */ if (d__[i__] != 0.) { fact = dl[i__] / d__[i__]; d__[i__ + 1] -= fact * du[i__]; i__2 = *nrhs; for (j = 1; j <= i__2; ++j) { b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1]; /* L20: */ } } else { *info = i__; return 0; } dl[i__] = 0.; } else { /* Interchange rows I and I+1 */ fact = d__[i__] / dl[i__]; d__[i__] = dl[i__]; temp = d__[i__ + 1]; d__[i__ + 1] = du[i__] - fact * temp; dl[i__] = du[i__ + 1]; du[i__ + 1] = -fact * dl[i__]; du[i__] = temp; i__2 = *nrhs; for (j = 1; j <= i__2; ++j) { temp = b[i__ + j * b_dim1]; b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1]; b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * b_dim1]; /* L30: */ } } /* L40: */ } if (*n > 1) { i__ = *n - 1; if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { if (d__[i__] != 0.) { fact = dl[i__] / d__[i__]; d__[i__ + 1] -= fact * du[i__]; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1]; /* L50: */ } } else { *info = i__; return 0; } } else { fact = d__[i__] / dl[i__]; d__[i__] = dl[i__]; temp = d__[i__ + 1]; d__[i__ + 1] = du[i__] - fact * temp; du[i__] = temp; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { temp = b[i__ + j * b_dim1]; b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1]; b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * b_dim1]; /* L60: */ } } } if (d__[*n] == 0.) { *info = *n; return 0; } } /* Back solve with the matrix U from the factorization. */ if (*nrhs <= 2) { j = 1; L70: b[*n + j * b_dim1] /= d__[*n]; if (*n > 1) { b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] * b[ *n + j * b_dim1]) / d__[*n - 1]; } for (i__ = *n - 2; i__ >= 1; --i__) { b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + 1 + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1]) / d__[ i__]; /* L80: */ } if (j < *nrhs) { ++j; goto L70; } } else { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { b[*n + j * b_dim1] /= d__[*n]; if (*n > 1) { b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] * b[*n + j * b_dim1]) / d__[*n - 1]; } for (i__ = *n - 2; i__ >= 1; --i__) { b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + 1 + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1]) / d__[i__]; /* L90: */ } /* L100: */ } } return 0; /* End of DGTSV */ } /* _starpu_dgtsv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgtsvx.c000066400000000000000000000322171507764646700207200ustar00rootroot00000000000000/* dgtsvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal * dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * iwork, integer *info) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1; /* Local variables */ char norm[1]; extern logical _starpu_lsame_(char *, char *); doublereal anorm; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlangt_(char *, integer *, doublereal *, doublereal *, doublereal *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dgtcon_(char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dgtrfs_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dgttrf_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); logical notran; extern /* Subroutine */ int _starpu_dgttrs_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGTSVX uses the LU factorization to compute the solution to a real */ /* system of linear equations A * X = B or A**T * X = B, */ /* where A is a tridiagonal matrix of order N and X and B are N-by-NRHS */ /* matrices. */ /* Error bounds on the solution and a condition estimate are also */ /* provided. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'N', the LU decomposition is used to factor the matrix A */ /* as A = L * U, where L is a product of permutation and unit lower */ /* bidiagonal matrices and U is upper triangular with nonzeros in */ /* only the main diagonal and first two superdiagonals. */ /* 2. If some U(i,i)=0, so that U is exactly singular, then the routine */ /* returns with INFO = i. Otherwise, the factored form of A is used */ /* to estimate the condition number of the matrix A. If the */ /* reciprocal of the condition number is less than machine precision, */ /* INFO = N+1 is returned as a warning, but the routine still goes on */ /* to solve for X and compute error bounds as described below. */ /* 3. The system of equations is solved for X using the factored form */ /* of A. */ /* 4. Iterative refinement is applied to improve the computed solution */ /* matrix and calculate error bounds and backward error estimates */ /* for it. */ /* Arguments */ /* ========= */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of A has been */ /* supplied on entry. */ /* = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored */ /* form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV */ /* will not be modified. */ /* = 'N': The matrix will be copied to DLF, DF, and DUF */ /* and factored. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* DL (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) subdiagonal elements of A. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of A. */ /* DU (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) superdiagonal elements of A. */ /* DLF (input or output) DOUBLE PRECISION array, dimension (N-1) */ /* If FACT = 'F', then DLF is an input argument and on entry */ /* contains the (n-1) multipliers that define the matrix L from */ /* the LU factorization of A as computed by DGTTRF. */ /* If FACT = 'N', then DLF is an output argument and on exit */ /* contains the (n-1) multipliers that define the matrix L from */ /* the LU factorization of A. */ /* DF (input or output) DOUBLE PRECISION array, dimension (N) */ /* If FACT = 'F', then DF is an input argument and on entry */ /* contains the n diagonal elements of the upper triangular */ /* matrix U from the LU factorization of A. */ /* If FACT = 'N', then DF is an output argument and on exit */ /* contains the n diagonal elements of the upper triangular */ /* matrix U from the LU factorization of A. */ /* DUF (input or output) DOUBLE PRECISION array, dimension (N-1) */ /* If FACT = 'F', then DUF is an input argument and on entry */ /* contains the (n-1) elements of the first superdiagonal of U. */ /* If FACT = 'N', then DUF is an output argument and on exit */ /* contains the (n-1) elements of the first superdiagonal of U. */ /* DU2 (input or output) DOUBLE PRECISION array, dimension (N-2) */ /* If FACT = 'F', then DU2 is an input argument and on entry */ /* contains the (n-2) elements of the second superdiagonal of */ /* U. */ /* If FACT = 'N', then DU2 is an output argument and on exit */ /* contains the (n-2) elements of the second superdiagonal of */ /* U. */ /* IPIV (input or output) INTEGER array, dimension (N) */ /* If FACT = 'F', then IPIV is an input argument and on entry */ /* contains the pivot indices from the LU factorization of A as */ /* computed by DGTTRF. */ /* If FACT = 'N', then IPIV is an output argument and on exit */ /* contains the pivot indices from the LU factorization of A; */ /* row i of the matrix was interchanged with row IPIV(i). */ /* IPIV(i) will always be either i or i+1; IPIV(i) = i indicates */ /* a row interchange was not required. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The N-by-NRHS right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The estimate of the reciprocal condition number of the matrix */ /* A. If RCOND is less than the machine precision (in */ /* particular, if RCOND = 0), the matrix is singular to working */ /* precision. This condition is indicated by a return code of */ /* INFO > 0. */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= N: U(i,i) is exactly zero. The factorization */ /* has not been completed unless i = N, but the */ /* factor U is exactly singular, so the solution */ /* and error bounds could not be computed. */ /* RCOND = 0 is returned. */ /* = N+1: U is nonsingular, but RCOND is less than machine */ /* precision, meaning that the matrix is singular */ /* to working precision. Nevertheless, the */ /* solution and error bounds are computed because */ /* there are a number of situations where the */ /* computed solution can be more accurate than the */ /* value of RCOND would suggest. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --dl; --d__; --du; --dlf; --df; --duf; --du2; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); notran = _starpu_lsame_(trans, "N"); if (! nofact && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -14; } else if (*ldx < max(1,*n)) { *info = -16; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGTSVX", &i__1); return 0; } if (nofact) { /* Compute the LU factorization of A. */ _starpu_dcopy_(n, &d__[1], &c__1, &df[1], &c__1); if (*n > 1) { i__1 = *n - 1; _starpu_dcopy_(&i__1, &dl[1], &c__1, &dlf[1], &c__1); i__1 = *n - 1; _starpu_dcopy_(&i__1, &du[1], &c__1, &duf[1], &c__1); } _starpu_dgttrf_(n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], info); /* Return if INFO is non-zero. */ if (*info > 0) { *rcond = 0.; return 0; } } /* Compute the norm of the matrix A. */ if (notran) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = _starpu_dlangt_(norm, n, &dl[1], &d__[1], &du[1]); /* Compute the reciprocal of the condition number of A. */ _starpu_dgtcon_(norm, n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &anorm, rcond, &work[1], &iwork[1], info); /* Compute the solution vectors X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dgttrs_(trans, n, nrhs, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &x[ x_offset], ldx, info); /* Use iterative refinement to improve the computed solutions and */ /* compute error bounds and backward error estimates for them. */ _starpu_dgtrfs_(trans, n, nrhs, &dl[1], &d__[1], &du[1], &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1] , &berr[1], &work[1], &iwork[1], info); /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < _starpu_dlamch_("Epsilon")) { *info = *n + 1; } return 0; /* End of DGTSVX */ } /* _starpu_dgtsvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgttrf.c000066400000000000000000000127371507764646700207000ustar00rootroot00000000000000/* dgttrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgttrf_(integer *n, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Local variables */ integer i__; doublereal fact, temp; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGTTRF computes an LU factorization of a real tridiagonal matrix A */ /* using elimination with partial pivoting and row interchanges. */ /* The factorization has the form */ /* A = L * U */ /* where L is a product of permutation and unit lower bidiagonal */ /* matrices and U is upper triangular with nonzeros in only the main */ /* diagonal and first two superdiagonals. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. */ /* DL (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, DL must contain the (n-1) sub-diagonal elements of */ /* A. */ /* On exit, DL is overwritten by the (n-1) multipliers that */ /* define the matrix L from the LU factorization of A. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, D must contain the diagonal elements of A. */ /* On exit, D is overwritten by the n diagonal elements of the */ /* upper triangular matrix U from the LU factorization of A. */ /* DU (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, DU must contain the (n-1) super-diagonal elements */ /* of A. */ /* On exit, DU is overwritten by the (n-1) elements of the first */ /* super-diagonal of U. */ /* DU2 (output) DOUBLE PRECISION array, dimension (N-2) */ /* On exit, DU2 is overwritten by the (n-2) elements of the */ /* second super-diagonal of U. */ /* IPIV (output) INTEGER array, dimension (N) */ /* The pivot indices; for 1 <= i <= n, row i of the matrix was */ /* interchanged with row IPIV(i). IPIV(i) will always be either */ /* i or i+1; IPIV(i) = i indicates a row interchange was not */ /* required. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ipiv; --du2; --du; --d__; --dl; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; i__1 = -(*info); _starpu_xerbla_("DGTTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Initialize IPIV(i) = i and DU2(I) = 0 */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ipiv[i__] = i__; /* L10: */ } i__1 = *n - 2; for (i__ = 1; i__ <= i__1; ++i__) { du2[i__] = 0.; /* L20: */ } i__1 = *n - 2; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { /* No row interchange required, eliminate DL(I) */ if (d__[i__] != 0.) { fact = dl[i__] / d__[i__]; dl[i__] = fact; d__[i__ + 1] -= fact * du[i__]; } } else { /* Interchange rows I and I+1, eliminate DL(I) */ fact = d__[i__] / dl[i__]; d__[i__] = dl[i__]; dl[i__] = fact; temp = du[i__]; du[i__] = d__[i__ + 1]; d__[i__ + 1] = temp - fact * d__[i__ + 1]; du2[i__] = du[i__ + 1]; du[i__ + 1] = -fact * du[i__ + 1]; ipiv[i__] = i__ + 1; } /* L30: */ } if (*n > 1) { i__ = *n - 1; if ((d__1 = d__[i__], abs(d__1)) >= (d__2 = dl[i__], abs(d__2))) { if (d__[i__] != 0.) { fact = dl[i__] / d__[i__]; dl[i__] = fact; d__[i__ + 1] -= fact * du[i__]; } } else { fact = d__[i__] / dl[i__]; d__[i__] = dl[i__]; dl[i__] = fact; temp = du[i__]; du[i__] = d__[i__ + 1]; d__[i__ + 1] = temp - fact * d__[i__ + 1]; ipiv[i__] = i__ + 1; } } /* Check for a zero on the diagonal of U. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] == 0.) { *info = i__; goto L50; } /* L40: */ } L50: return 0; /* End of DGTTRF */ } /* _starpu_dgttrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgttrs.c000066400000000000000000000126651507764646700207150ustar00rootroot00000000000000/* dgttrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dgttrs_(char *trans, integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer j, jb, nb; extern /* Subroutine */ int _starpu_dgtts2_(integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer itrans; logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGTTRS solves one of the systems of equations */ /* A*X = B or A'*X = B, */ /* with a tridiagonal matrix A using the LU factorization computed */ /* by DGTTRF. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations. */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A'* X = B (Transpose) */ /* = 'C': A'* X = B (Conjugate transpose = Transpose) */ /* N (input) INTEGER */ /* The order of the matrix A. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* DL (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) multipliers that define the matrix L from the */ /* LU factorization of A. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the upper triangular matrix U from */ /* the LU factorization of A. */ /* DU (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) elements of the first super-diagonal of U. */ /* DU2 (input) DOUBLE PRECISION array, dimension (N-2) */ /* The (n-2) elements of the second super-diagonal of U. */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices; for 1 <= i <= n, row i of the matrix was */ /* interchanged with row IPIV(i). IPIV(i) will always be either */ /* i or i+1; IPIV(i) = i indicates a row interchange was not */ /* required. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the matrix of right hand side vectors B. */ /* On exit, B is overwritten by the solution vectors X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --dl; --d__; --du; --du2; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; notran = *(unsigned char *)trans == 'N' || *(unsigned char *)trans == 'n'; if (! notran && ! (*(unsigned char *)trans == 'T' || *(unsigned char *) trans == 't') && ! (*(unsigned char *)trans == 'C' || *(unsigned char *)trans == 'c')) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(*n,1)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DGTTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } /* Decode TRANS */ if (notran) { itrans = 0; } else { itrans = 1; } /* Determine the number of right-hand sides to solve at a time. */ if (*nrhs == 1) { nb = 1; } else { /* Computing MAX */ i__1 = 1, i__2 = _starpu_ilaenv_(&c__1, "DGTTRS", trans, n, nrhs, &c_n1, & c_n1); nb = max(i__1,i__2); } if (nb >= *nrhs) { _starpu_dgtts2_(&itrans, n, nrhs, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[1], &b[b_offset], ldb); } else { i__1 = *nrhs; i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = *nrhs - j + 1; jb = min(i__3,nb); _starpu_dgtts2_(&itrans, n, &jb, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[ 1], &b[j * b_dim1 + 1], ldb); /* L10: */ } } /* End of DGTTRS */ return 0; } /* _starpu_dgttrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dgtts2.c000066400000000000000000000154231507764646700206100ustar00rootroot00000000000000/* dgtts2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dgtts2_(integer *itrans, integer *n, integer *nrhs, doublereal *dl, doublereal *d__, doublereal *du, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; /* Local variables */ integer i__, j, ip; doublereal temp; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGTTS2 solves one of the systems of equations */ /* A*X = B or A'*X = B, */ /* with a tridiagonal matrix A using the LU factorization computed */ /* by DGTTRF. */ /* Arguments */ /* ========= */ /* ITRANS (input) INTEGER */ /* Specifies the form of the system of equations. */ /* = 0: A * X = B (No transpose) */ /* = 1: A'* X = B (Transpose) */ /* = 2: A'* X = B (Conjugate transpose = Transpose) */ /* N (input) INTEGER */ /* The order of the matrix A. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* DL (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) multipliers that define the matrix L from the */ /* LU factorization of A. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the upper triangular matrix U from */ /* the LU factorization of A. */ /* DU (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) elements of the first super-diagonal of U. */ /* DU2 (input) DOUBLE PRECISION array, dimension (N-2) */ /* The (n-2) elements of the second super-diagonal of U. */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices; for 1 <= i <= n, row i of the matrix was */ /* interchanged with row IPIV(i). IPIV(i) will always be either */ /* i or i+1; IPIV(i) = i indicates a row interchange was not */ /* required. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the matrix of right hand side vectors B. */ /* On exit, B is overwritten by the solution vectors X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ --dl; --d__; --du; --du2; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ if (*n == 0 || *nrhs == 0) { return 0; } if (*itrans == 0) { /* Solve A*X = B using the LU factorization of A, */ /* overwriting each right hand side vector with its solution. */ if (*nrhs <= 1) { j = 1; L10: /* Solve L*x = b. */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { ip = ipiv[i__]; temp = b[i__ + 1 - ip + i__ + j * b_dim1] - dl[i__] * b[ip + j * b_dim1]; b[i__ + j * b_dim1] = b[ip + j * b_dim1]; b[i__ + 1 + j * b_dim1] = temp; /* L20: */ } /* Solve U*x = b. */ b[*n + j * b_dim1] /= d__[*n]; if (*n > 1) { b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] * b[*n + j * b_dim1]) / d__[*n - 1]; } for (i__ = *n - 2; i__ >= 1; --i__) { b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j * b_dim1] ) / d__[i__]; /* L30: */ } if (j < *nrhs) { ++j; goto L10; } } else { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Solve L*x = b. */ i__2 = *n - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (ipiv[i__] == i__) { b[i__ + 1 + j * b_dim1] -= dl[i__] * b[i__ + j * b_dim1]; } else { temp = b[i__ + j * b_dim1]; b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1]; b[i__ + 1 + j * b_dim1] = temp - dl[i__] * b[i__ + j * b_dim1]; } /* L40: */ } /* Solve U*x = b. */ b[*n + j * b_dim1] /= d__[*n]; if (*n > 1) { b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] * b[*n + j * b_dim1]) / d__[*n - 1]; } for (i__ = *n - 2; i__ >= 1; --i__) { b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[ i__ + 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j * b_dim1]) / d__[i__]; /* L50: */ } /* L60: */ } } } else { /* Solve A' * X = B. */ if (*nrhs <= 1) { /* Solve U'*x = b. */ j = 1; L70: b[j * b_dim1 + 1] /= d__[1]; if (*n > 1) { b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * b_dim1 + 1]) / d__[2]; } i__1 = *n; for (i__ = 3; i__ <= i__1; ++i__) { b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] * b[ i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - 2 + j * b_dim1]) / d__[i__]; /* L80: */ } /* Solve L'*x = b. */ for (i__ = *n - 1; i__ >= 1; --i__) { ip = ipiv[i__]; temp = b[i__ + j * b_dim1] - dl[i__] * b[i__ + 1 + j * b_dim1] ; b[i__ + j * b_dim1] = b[ip + j * b_dim1]; b[ip + j * b_dim1] = temp; /* L90: */ } if (j < *nrhs) { ++j; goto L70; } } else { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Solve U'*x = b. */ b[j * b_dim1 + 1] /= d__[1]; if (*n > 1) { b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * b_dim1 + 1]) / d__[2]; } i__2 = *n; for (i__ = 3; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] * b[i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - 2 + j * b_dim1]) / d__[i__]; /* L100: */ } for (i__ = *n - 1; i__ >= 1; --i__) { if (ipiv[i__] == i__) { b[i__ + j * b_dim1] -= dl[i__] * b[i__ + 1 + j * b_dim1]; } else { temp = b[i__ + 1 + j * b_dim1]; b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - dl[ i__] * temp; b[i__ + j * b_dim1] = temp; } /* L110: */ } /* L120: */ } } } /* End of DGTTS2 */ return 0; } /* _starpu_dgtts2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dhgeqz.c000066400000000000000000001273351507764646700206710ustar00rootroot00000000000000/* dhgeqz.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b12 = 0.; static doublereal c_b13 = 1.; static integer c__1 = 1; static integer c__3 = 3; /* Subroutine */ int _starpu_dhgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *t, integer *ldt, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer h_dim1, h_offset, q_dim1, q_offset, t_dim1, t_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal c__; integer j; doublereal s, v[3], s1, s2, t1, u1, u2, a11, a12, a21, a22, b11, b22, c12, c21; integer jc; doublereal an, bn, cl, cq, cr; integer in; doublereal u12, w11, w12, w21; integer jr; doublereal cz, w22, sl, wi, sr, vs, wr, b1a, b2a, a1i, a2i, b1i, b2i, a1r, a2r, b1r, b2r, wr2, ad11, ad12, ad21, ad22, c11i, c22i; integer jch; doublereal c11r, c22r; logical ilq; doublereal u12l, tau, sqi; logical ilz; doublereal ulp, sqr, szi, szr, ad11l, ad12l, ad21l, ad22l, ad32l, wabs, atol, btol, temp; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), _starpu_dlag2_( doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal temp2, s1inv, scale; extern logical _starpu_lsame_(char *, char *); integer iiter, ilast, jiter; doublereal anorm, bnorm; integer maxit; doublereal tempi, tempr; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlapy3_(doublereal *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); logical ilazr2; doublereal ascale, bscale; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); extern doublereal _starpu_dlanhs_(char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal safmax; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal eshift; logical ilschr; integer icompq, ilastm, ischur; logical ilazro; integer icompz, ifirst, ifrstm, istart; logical ilpivt, lquery; /* -- LAPACK routine (version 3.2.1) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* -- April 2009 -- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DHGEQZ computes the eigenvalues of a real matrix pair (H,T), */ /* where H is an upper Hessenberg matrix and T is upper triangular, */ /* using the double-shift QZ method. */ /* Matrix pairs of this type are produced by the reduction to */ /* generalized upper Hessenberg form of a real matrix pair (A,B): */ /* A = Q1*H*Z1**T, B = Q1*T*Z1**T, */ /* as computed by DGGHRD. */ /* If JOB='S', then the Hessenberg-triangular pair (H,T) is */ /* also reduced to generalized Schur form, */ /* H = Q*S*Z**T, T = Q*P*Z**T, */ /* where Q and Z are orthogonal matrices, P is an upper triangular */ /* matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 */ /* diagonal blocks. */ /* The 1-by-1 blocks correspond to real eigenvalues of the matrix pair */ /* (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of */ /* eigenvalues. */ /* Additionally, the 2-by-2 upper triangular diagonal blocks of P */ /* corresponding to 2-by-2 blocks of S are reduced to positive diagonal */ /* form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, */ /* P(j,j) > 0, and P(j+1,j+1) > 0. */ /* Optionally, the orthogonal matrix Q from the generalized Schur */ /* factorization may be postmultiplied into an input matrix Q1, and the */ /* orthogonal matrix Z may be postmultiplied into an input matrix Z1. */ /* If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced */ /* the matrix pair (A,B) to generalized upper Hessenberg form, then the */ /* output matrices Q1*Q and Z1*Z are the orthogonal factors from the */ /* generalized Schur factorization of (A,B): */ /* A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. */ /* To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, */ /* of (A,B)) are computed as a pair of values (alpha,beta), where alpha is */ /* complex and beta real. */ /* If beta is nonzero, lambda = alpha / beta is an eigenvalue of the */ /* generalized nonsymmetric eigenvalue problem (GNEP) */ /* A*x = lambda*B*x */ /* and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the */ /* alternate form of the GNEP */ /* mu*A*y = B*y. */ /* Real eigenvalues can be read directly from the generalized Schur */ /* form: */ /* alpha = S(i,i), beta = P(i,i). */ /* Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix */ /* Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), */ /* pp. 241--256. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* = 'E': Compute eigenvalues only; */ /* = 'S': Compute eigenvalues and the Schur form. */ /* COMPQ (input) CHARACTER*1 */ /* = 'N': Left Schur vectors (Q) are not computed; */ /* = 'I': Q is initialized to the unit matrix and the matrix Q */ /* of left Schur vectors of (H,T) is returned; */ /* = 'V': Q must contain an orthogonal matrix Q1 on entry and */ /* the product Q1*Q is returned. */ /* COMPZ (input) CHARACTER*1 */ /* = 'N': Right Schur vectors (Z) are not computed; */ /* = 'I': Z is initialized to the unit matrix and the matrix Z */ /* of right Schur vectors of (H,T) is returned; */ /* = 'V': Z must contain an orthogonal matrix Z1 on entry and */ /* the product Z1*Z is returned. */ /* N (input) INTEGER */ /* The order of the matrices H, T, Q, and Z. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* ILO and IHI mark the rows and columns of H which are in */ /* Hessenberg form. It is assumed that A is already upper */ /* triangular in rows and columns 1:ILO-1 and IHI+1:N. */ /* If N > 0, 1 <= ILO <= IHI <= N; if N = 0, ILO=1 and IHI=0. */ /* H (input/output) DOUBLE PRECISION array, dimension (LDH, N) */ /* On entry, the N-by-N upper Hessenberg matrix H. */ /* On exit, if JOB = 'S', H contains the upper quasi-triangular */ /* matrix S from the generalized Schur factorization; */ /* 2-by-2 diagonal blocks (corresponding to complex conjugate */ /* pairs of eigenvalues) are returned in standard form, with */ /* H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. */ /* If JOB = 'E', the diagonal blocks of H match those of S, but */ /* the rest of H is unspecified. */ /* LDH (input) INTEGER */ /* The leading dimension of the array H. LDH >= max( 1, N ). */ /* T (input/output) DOUBLE PRECISION array, dimension (LDT, N) */ /* On entry, the N-by-N upper triangular matrix T. */ /* On exit, if JOB = 'S', T contains the upper triangular */ /* matrix P from the generalized Schur factorization; */ /* 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks of S */ /* are reduced to positive diagonal form, i.e., if H(j+1,j) is */ /* non-zero, then T(j+1,j) = T(j,j+1) = 0, T(j,j) > 0, and */ /* T(j+1,j+1) > 0. */ /* If JOB = 'E', the diagonal blocks of T match those of P, but */ /* the rest of T is unspecified. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= max( 1, N ). */ /* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ /* The real parts of each scalar alpha defining an eigenvalue */ /* of GNEP. */ /* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ /* The imaginary parts of each scalar alpha defining an */ /* eigenvalue of GNEP. */ /* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ /* positive, then the j-th and (j+1)-st eigenvalues are a */ /* complex conjugate pair, with ALPHAI(j+1) = -ALPHAI(j). */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* The scalars beta that define the eigenvalues of GNEP. */ /* Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and */ /* beta = BETA(j) represent the j-th eigenvalue of the matrix */ /* pair (A,B), in one of the forms lambda = alpha/beta or */ /* mu = beta/alpha. Since either lambda or mu may overflow, */ /* they should not, in general, be computed. */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ /* On entry, if COMPZ = 'V', the orthogonal matrix Q1 used in */ /* the reduction of (A,B) to generalized Hessenberg form. */ /* On exit, if COMPZ = 'I', the orthogonal matrix of left Schur */ /* vectors of (H,T), and if COMPZ = 'V', the orthogonal matrix */ /* of left Schur vectors of (A,B). */ /* Not referenced if COMPZ = 'N'. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= 1. */ /* If COMPQ='V' or 'I', then LDQ >= N. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* On entry, if COMPZ = 'V', the orthogonal matrix Z1 used in */ /* the reduction of (A,B) to generalized Hessenberg form. */ /* On exit, if COMPZ = 'I', the orthogonal matrix of */ /* right Schur vectors of (H,T), and if COMPZ = 'V', the */ /* orthogonal matrix of right Schur vectors of (A,B). */ /* Not referenced if COMPZ = 'N'. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1. */ /* If COMPZ='V' or 'I', then LDZ >= N. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* = 1,...,N: the QZ iteration did not converge. (H,T) is not */ /* in Schur form, but ALPHAR(i), ALPHAI(i), and */ /* BETA(i), i=INFO+1,...,N should be correct. */ /* = N+1,...,2*N: the shift calculation failed. (H,T) is not */ /* in Schur form, but ALPHAR(i), ALPHAI(i), and */ /* BETA(i), i=INFO-N+1,...,N should be correct. */ /* Further Details */ /* =============== */ /* Iteration counters: */ /* JITER -- counts iterations. */ /* IITER -- counts iterations run since ILAST was last */ /* changed. This is therefore reset only when a 1-by-1 or */ /* 2-by-2 block deflates off the bottom. */ /* ===================================================================== */ /* .. Parameters .. */ /* $ SAFETY = 1.0E+0 ) */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode JOB, COMPQ, COMPZ */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; --alphar; --alphai; --beta; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ if (_starpu_lsame_(job, "E")) { ilschr = FALSE_; ischur = 1; } else if (_starpu_lsame_(job, "S")) { ilschr = TRUE_; ischur = 2; } else { ischur = 0; } if (_starpu_lsame_(compq, "N")) { ilq = FALSE_; icompq = 1; } else if (_starpu_lsame_(compq, "V")) { ilq = TRUE_; icompq = 2; } else if (_starpu_lsame_(compq, "I")) { ilq = TRUE_; icompq = 3; } else { icompq = 0; } if (_starpu_lsame_(compz, "N")) { ilz = FALSE_; icompz = 1; } else if (_starpu_lsame_(compz, "V")) { ilz = TRUE_; icompz = 2; } else if (_starpu_lsame_(compz, "I")) { ilz = TRUE_; icompz = 3; } else { icompz = 0; } /* Check Argument Values */ *info = 0; work[1] = (doublereal) max(1,*n); lquery = *lwork == -1; if (ischur == 0) { *info = -1; } else if (icompq == 0) { *info = -2; } else if (icompz == 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*ilo < 1) { *info = -5; } else if (*ihi > *n || *ihi < *ilo - 1) { *info = -6; } else if (*ldh < *n) { *info = -8; } else if (*ldt < *n) { *info = -10; } else if (*ldq < 1 || ilq && *ldq < *n) { *info = -15; } else if (*ldz < 1 || ilz && *ldz < *n) { *info = -17; } else if (*lwork < max(1,*n) && ! lquery) { *info = -19; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DHGEQZ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n <= 0) { work[1] = 1.; return 0; } /* Initialize Q and Z */ if (icompq == 3) { _starpu_dlaset_("Full", n, n, &c_b12, &c_b13, &q[q_offset], ldq); } if (icompz == 3) { _starpu_dlaset_("Full", n, n, &c_b12, &c_b13, &z__[z_offset], ldz); } /* Machine Constants */ in = *ihi + 1 - *ilo; safmin = _starpu_dlamch_("S"); safmax = 1. / safmin; ulp = _starpu_dlamch_("E") * _starpu_dlamch_("B"); anorm = _starpu_dlanhs_("F", &in, &h__[*ilo + *ilo * h_dim1], ldh, &work[1]); bnorm = _starpu_dlanhs_("F", &in, &t[*ilo + *ilo * t_dim1], ldt, &work[1]); /* Computing MAX */ d__1 = safmin, d__2 = ulp * anorm; atol = max(d__1,d__2); /* Computing MAX */ d__1 = safmin, d__2 = ulp * bnorm; btol = max(d__1,d__2); ascale = 1. / max(safmin,anorm); bscale = 1. / max(safmin,bnorm); /* Set Eigenvalues IHI+1:N */ i__1 = *n; for (j = *ihi + 1; j <= i__1; ++j) { if (t[j + j * t_dim1] < 0.) { if (ilschr) { i__2 = j; for (jr = 1; jr <= i__2; ++jr) { h__[jr + j * h_dim1] = -h__[jr + j * h_dim1]; t[jr + j * t_dim1] = -t[jr + j * t_dim1]; /* L10: */ } } else { h__[j + j * h_dim1] = -h__[j + j * h_dim1]; t[j + j * t_dim1] = -t[j + j * t_dim1]; } if (ilz) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { z__[jr + j * z_dim1] = -z__[jr + j * z_dim1]; /* L20: */ } } } alphar[j] = h__[j + j * h_dim1]; alphai[j] = 0.; beta[j] = t[j + j * t_dim1]; /* L30: */ } /* If IHI < ILO, skip QZ steps */ if (*ihi < *ilo) { goto L380; } /* MAIN QZ ITERATION LOOP */ /* Initialize dynamic indices */ /* Eigenvalues ILAST+1:N have been found. */ /* Column operations modify rows IFRSTM:whatever. */ /* Row operations modify columns whatever:ILASTM. */ /* If only eigenvalues are being computed, then */ /* IFRSTM is the row of the last splitting row above row ILAST; */ /* this is always at least ILO. */ /* IITER counts iterations since the last eigenvalue was found, */ /* to tell when to use an extraordinary shift. */ /* MAXIT is the maximum number of QZ sweeps allowed. */ ilast = *ihi; if (ilschr) { ifrstm = 1; ilastm = *n; } else { ifrstm = *ilo; ilastm = *ihi; } iiter = 0; eshift = 0.; maxit = (*ihi - *ilo + 1) * 30; i__1 = maxit; for (jiter = 1; jiter <= i__1; ++jiter) { /* Split the matrix if possible. */ /* Two tests: */ /* 1: H(j,j-1)=0 or j=ILO */ /* 2: T(j,j)=0 */ if (ilast == *ilo) { /* Special case: j=ILAST */ goto L80; } else { if ((d__1 = h__[ilast + (ilast - 1) * h_dim1], abs(d__1)) <= atol) { h__[ilast + (ilast - 1) * h_dim1] = 0.; goto L80; } } if ((d__1 = t[ilast + ilast * t_dim1], abs(d__1)) <= btol) { t[ilast + ilast * t_dim1] = 0.; goto L70; } /* General case: j= i__2; --j) { /* Test 1: for H(j,j-1)=0 or j=ILO */ if (j == *ilo) { ilazro = TRUE_; } else { if ((d__1 = h__[j + (j - 1) * h_dim1], abs(d__1)) <= atol) { h__[j + (j - 1) * h_dim1] = 0.; ilazro = TRUE_; } else { ilazro = FALSE_; } } /* Test 2: for T(j,j)=0 */ if ((d__1 = t[j + j * t_dim1], abs(d__1)) < btol) { t[j + j * t_dim1] = 0.; /* Test 1a: Check for 2 consecutive small subdiagonals in A */ ilazr2 = FALSE_; if (! ilazro) { temp = (d__1 = h__[j + (j - 1) * h_dim1], abs(d__1)); temp2 = (d__1 = h__[j + j * h_dim1], abs(d__1)); tempr = max(temp,temp2); if (tempr < 1. && tempr != 0.) { temp /= tempr; temp2 /= tempr; } if (temp * (ascale * (d__1 = h__[j + 1 + j * h_dim1], abs( d__1))) <= temp2 * (ascale * atol)) { ilazr2 = TRUE_; } } /* If both tests pass (1 & 2), i.e., the leading diagonal */ /* element of B in the block is zero, split a 1x1 block off */ /* at the top. (I.e., at the J-th row/column) The leading */ /* diagonal element of the remainder can also be zero, so */ /* this may have to be done repeatedly. */ if (ilazro || ilazr2) { i__3 = ilast - 1; for (jch = j; jch <= i__3; ++jch) { temp = h__[jch + jch * h_dim1]; _starpu_dlartg_(&temp, &h__[jch + 1 + jch * h_dim1], &c__, &s, &h__[jch + jch * h_dim1]); h__[jch + 1 + jch * h_dim1] = 0.; i__4 = ilastm - jch; _starpu_drot_(&i__4, &h__[jch + (jch + 1) * h_dim1], ldh, & h__[jch + 1 + (jch + 1) * h_dim1], ldh, &c__, &s); i__4 = ilastm - jch; _starpu_drot_(&i__4, &t[jch + (jch + 1) * t_dim1], ldt, &t[ jch + 1 + (jch + 1) * t_dim1], ldt, &c__, &s); if (ilq) { _starpu_drot_(n, &q[jch * q_dim1 + 1], &c__1, &q[(jch + 1) * q_dim1 + 1], &c__1, &c__, &s); } if (ilazr2) { h__[jch + (jch - 1) * h_dim1] *= c__; } ilazr2 = FALSE_; if ((d__1 = t[jch + 1 + (jch + 1) * t_dim1], abs(d__1) ) >= btol) { if (jch + 1 >= ilast) { goto L80; } else { ifirst = jch + 1; goto L110; } } t[jch + 1 + (jch + 1) * t_dim1] = 0.; /* L40: */ } goto L70; } else { /* Only test 2 passed -- chase the zero to T(ILAST,ILAST) */ /* Then process as in the case T(ILAST,ILAST)=0 */ i__3 = ilast - 1; for (jch = j; jch <= i__3; ++jch) { temp = t[jch + (jch + 1) * t_dim1]; _starpu_dlartg_(&temp, &t[jch + 1 + (jch + 1) * t_dim1], &c__, &s, &t[jch + (jch + 1) * t_dim1]); t[jch + 1 + (jch + 1) * t_dim1] = 0.; if (jch < ilastm - 1) { i__4 = ilastm - jch - 1; _starpu_drot_(&i__4, &t[jch + (jch + 2) * t_dim1], ldt, & t[jch + 1 + (jch + 2) * t_dim1], ldt, & c__, &s); } i__4 = ilastm - jch + 2; _starpu_drot_(&i__4, &h__[jch + (jch - 1) * h_dim1], ldh, & h__[jch + 1 + (jch - 1) * h_dim1], ldh, &c__, &s); if (ilq) { _starpu_drot_(n, &q[jch * q_dim1 + 1], &c__1, &q[(jch + 1) * q_dim1 + 1], &c__1, &c__, &s); } temp = h__[jch + 1 + jch * h_dim1]; _starpu_dlartg_(&temp, &h__[jch + 1 + (jch - 1) * h_dim1], & c__, &s, &h__[jch + 1 + jch * h_dim1]); h__[jch + 1 + (jch - 1) * h_dim1] = 0.; i__4 = jch + 1 - ifrstm; _starpu_drot_(&i__4, &h__[ifrstm + jch * h_dim1], &c__1, &h__[ ifrstm + (jch - 1) * h_dim1], &c__1, &c__, &s) ; i__4 = jch - ifrstm; _starpu_drot_(&i__4, &t[ifrstm + jch * t_dim1], &c__1, &t[ ifrstm + (jch - 1) * t_dim1], &c__1, &c__, &s) ; if (ilz) { _starpu_drot_(n, &z__[jch * z_dim1 + 1], &c__1, &z__[(jch - 1) * z_dim1 + 1], &c__1, &c__, &s); } /* L50: */ } goto L70; } } else if (ilazro) { /* Only test 1 passed -- work on J:ILAST */ ifirst = j; goto L110; } /* Neither test passed -- try next J */ /* L60: */ } /* (Drop-through is "impossible") */ *info = *n + 1; goto L420; /* T(ILAST,ILAST)=0 -- clear H(ILAST,ILAST-1) to split off a */ /* 1x1 block. */ L70: temp = h__[ilast + ilast * h_dim1]; _starpu_dlartg_(&temp, &h__[ilast + (ilast - 1) * h_dim1], &c__, &s, &h__[ ilast + ilast * h_dim1]); h__[ilast + (ilast - 1) * h_dim1] = 0.; i__2 = ilast - ifrstm; _starpu_drot_(&i__2, &h__[ifrstm + ilast * h_dim1], &c__1, &h__[ifrstm + ( ilast - 1) * h_dim1], &c__1, &c__, &s); i__2 = ilast - ifrstm; _starpu_drot_(&i__2, &t[ifrstm + ilast * t_dim1], &c__1, &t[ifrstm + (ilast - 1) * t_dim1], &c__1, &c__, &s); if (ilz) { _starpu_drot_(n, &z__[ilast * z_dim1 + 1], &c__1, &z__[(ilast - 1) * z_dim1 + 1], &c__1, &c__, &s); } /* H(ILAST,ILAST-1)=0 -- Standardize B, set ALPHAR, ALPHAI, */ /* and BETA */ L80: if (t[ilast + ilast * t_dim1] < 0.) { if (ilschr) { i__2 = ilast; for (j = ifrstm; j <= i__2; ++j) { h__[j + ilast * h_dim1] = -h__[j + ilast * h_dim1]; t[j + ilast * t_dim1] = -t[j + ilast * t_dim1]; /* L90: */ } } else { h__[ilast + ilast * h_dim1] = -h__[ilast + ilast * h_dim1]; t[ilast + ilast * t_dim1] = -t[ilast + ilast * t_dim1]; } if (ilz) { i__2 = *n; for (j = 1; j <= i__2; ++j) { z__[j + ilast * z_dim1] = -z__[j + ilast * z_dim1]; /* L100: */ } } } alphar[ilast] = h__[ilast + ilast * h_dim1]; alphai[ilast] = 0.; beta[ilast] = t[ilast + ilast * t_dim1]; /* Go to next block -- exit if finished. */ --ilast; if (ilast < *ilo) { goto L380; } /* Reset counters */ iiter = 0; eshift = 0.; if (! ilschr) { ilastm = ilast; if (ifrstm > ilast) { ifrstm = *ilo; } } goto L350; /* QZ step */ /* This iteration only involves rows/columns IFIRST:ILAST. We */ /* assume IFIRST < ILAST, and that the diagonal of B is non-zero. */ L110: ++iiter; if (! ilschr) { ifrstm = ifirst; } /* Compute single shifts. */ /* At this point, IFIRST < ILAST, and the diagonal elements of */ /* T(IFIRST:ILAST,IFIRST,ILAST) are larger than BTOL (in */ /* magnitude) */ if (iiter / 10 * 10 == iiter) { /* Exceptional shift. Chosen for no particularly good reason. */ /* (Single shift only.) */ if ((doublereal) maxit * safmin * (d__1 = h__[ilast - 1 + ilast * h_dim1], abs(d__1)) < (d__2 = t[ilast - 1 + (ilast - 1) * t_dim1], abs(d__2))) { eshift += h__[ilast - 1 + ilast * h_dim1] / t[ilast - 1 + ( ilast - 1) * t_dim1]; } else { eshift += 1. / (safmin * (doublereal) maxit); } s1 = 1.; wr = eshift; } else { /* Shifts based on the generalized eigenvalues of the */ /* bottom-right 2x2 block of A and B. The first eigenvalue */ /* returned by DLAG2 is the Wilkinson shift (AEP p.512), */ d__1 = safmin * 100.; _starpu_dlag2_(&h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &t[ilast - 1 + (ilast - 1) * t_dim1], ldt, &d__1, &s1, &s2, &wr, &wr2, &wi); /* Computing MAX */ /* Computing MAX */ d__3 = 1., d__4 = abs(wr), d__3 = max(d__3,d__4), d__4 = abs(wi); d__1 = s1, d__2 = safmin * max(d__3,d__4); temp = max(d__1,d__2); if (wi != 0.) { goto L200; } } /* Fiddle with shift to avoid overflow */ temp = min(ascale,1.) * (safmax * .5); if (s1 > temp) { scale = temp / s1; } else { scale = 1.; } temp = min(bscale,1.) * (safmax * .5); if (abs(wr) > temp) { /* Computing MIN */ d__1 = scale, d__2 = temp / abs(wr); scale = min(d__1,d__2); } s1 = scale * s1; wr = scale * wr; /* Now check for two consecutive small subdiagonals. */ i__2 = ifirst + 1; for (j = ilast - 1; j >= i__2; --j) { istart = j; temp = (d__1 = s1 * h__[j + (j - 1) * h_dim1], abs(d__1)); temp2 = (d__1 = s1 * h__[j + j * h_dim1] - wr * t[j + j * t_dim1], abs(d__1)); tempr = max(temp,temp2); if (tempr < 1. && tempr != 0.) { temp /= tempr; temp2 /= tempr; } if ((d__1 = ascale * h__[j + 1 + j * h_dim1] * temp, abs(d__1)) <= ascale * atol * temp2) { goto L130; } /* L120: */ } istart = ifirst; L130: /* Do an implicit single-shift QZ sweep. */ /* Initial Q */ temp = s1 * h__[istart + istart * h_dim1] - wr * t[istart + istart * t_dim1]; temp2 = s1 * h__[istart + 1 + istart * h_dim1]; _starpu_dlartg_(&temp, &temp2, &c__, &s, &tempr); /* Sweep */ i__2 = ilast - 1; for (j = istart; j <= i__2; ++j) { if (j > istart) { temp = h__[j + (j - 1) * h_dim1]; _starpu_dlartg_(&temp, &h__[j + 1 + (j - 1) * h_dim1], &c__, &s, &h__[ j + (j - 1) * h_dim1]); h__[j + 1 + (j - 1) * h_dim1] = 0.; } i__3 = ilastm; for (jc = j; jc <= i__3; ++jc) { temp = c__ * h__[j + jc * h_dim1] + s * h__[j + 1 + jc * h_dim1]; h__[j + 1 + jc * h_dim1] = -s * h__[j + jc * h_dim1] + c__ * h__[j + 1 + jc * h_dim1]; h__[j + jc * h_dim1] = temp; temp2 = c__ * t[j + jc * t_dim1] + s * t[j + 1 + jc * t_dim1]; t[j + 1 + jc * t_dim1] = -s * t[j + jc * t_dim1] + c__ * t[j + 1 + jc * t_dim1]; t[j + jc * t_dim1] = temp2; /* L140: */ } if (ilq) { i__3 = *n; for (jr = 1; jr <= i__3; ++jr) { temp = c__ * q[jr + j * q_dim1] + s * q[jr + (j + 1) * q_dim1]; q[jr + (j + 1) * q_dim1] = -s * q[jr + j * q_dim1] + c__ * q[jr + (j + 1) * q_dim1]; q[jr + j * q_dim1] = temp; /* L150: */ } } temp = t[j + 1 + (j + 1) * t_dim1]; _starpu_dlartg_(&temp, &t[j + 1 + j * t_dim1], &c__, &s, &t[j + 1 + (j + 1) * t_dim1]); t[j + 1 + j * t_dim1] = 0.; /* Computing MIN */ i__4 = j + 2; i__3 = min(i__4,ilast); for (jr = ifrstm; jr <= i__3; ++jr) { temp = c__ * h__[jr + (j + 1) * h_dim1] + s * h__[jr + j * h_dim1]; h__[jr + j * h_dim1] = -s * h__[jr + (j + 1) * h_dim1] + c__ * h__[jr + j * h_dim1]; h__[jr + (j + 1) * h_dim1] = temp; /* L160: */ } i__3 = j; for (jr = ifrstm; jr <= i__3; ++jr) { temp = c__ * t[jr + (j + 1) * t_dim1] + s * t[jr + j * t_dim1] ; t[jr + j * t_dim1] = -s * t[jr + (j + 1) * t_dim1] + c__ * t[ jr + j * t_dim1]; t[jr + (j + 1) * t_dim1] = temp; /* L170: */ } if (ilz) { i__3 = *n; for (jr = 1; jr <= i__3; ++jr) { temp = c__ * z__[jr + (j + 1) * z_dim1] + s * z__[jr + j * z_dim1]; z__[jr + j * z_dim1] = -s * z__[jr + (j + 1) * z_dim1] + c__ * z__[jr + j * z_dim1]; z__[jr + (j + 1) * z_dim1] = temp; /* L180: */ } } /* L190: */ } goto L350; /* Use Francis double-shift */ /* Note: the Francis double-shift should work with real shifts, */ /* but only if the block is at least 3x3. */ /* This code may break if this point is reached with */ /* a 2x2 block with real eigenvalues. */ L200: if (ifirst + 1 == ilast) { /* Special case -- 2x2 block with complex eigenvectors */ /* Step 1: Standardize, that is, rotate so that */ /* ( B11 0 ) */ /* B = ( ) with B11 non-negative. */ /* ( 0 B22 ) */ _starpu_dlasv2_(&t[ilast - 1 + (ilast - 1) * t_dim1], &t[ilast - 1 + ilast * t_dim1], &t[ilast + ilast * t_dim1], &b22, &b11, & sr, &cr, &sl, &cl); if (b11 < 0.) { cr = -cr; sr = -sr; b11 = -b11; b22 = -b22; } i__2 = ilastm + 1 - ifirst; _starpu_drot_(&i__2, &h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &h__[ ilast + (ilast - 1) * h_dim1], ldh, &cl, &sl); i__2 = ilast + 1 - ifrstm; _starpu_drot_(&i__2, &h__[ifrstm + (ilast - 1) * h_dim1], &c__1, &h__[ ifrstm + ilast * h_dim1], &c__1, &cr, &sr); if (ilast < ilastm) { i__2 = ilastm - ilast; _starpu_drot_(&i__2, &t[ilast - 1 + (ilast + 1) * t_dim1], ldt, &t[ ilast + (ilast + 1) * t_dim1], ldt, &cl, &sl); } if (ifrstm < ilast - 1) { i__2 = ifirst - ifrstm; _starpu_drot_(&i__2, &t[ifrstm + (ilast - 1) * t_dim1], &c__1, &t[ ifrstm + ilast * t_dim1], &c__1, &cr, &sr); } if (ilq) { _starpu_drot_(n, &q[(ilast - 1) * q_dim1 + 1], &c__1, &q[ilast * q_dim1 + 1], &c__1, &cl, &sl); } if (ilz) { _starpu_drot_(n, &z__[(ilast - 1) * z_dim1 + 1], &c__1, &z__[ilast * z_dim1 + 1], &c__1, &cr, &sr); } t[ilast - 1 + (ilast - 1) * t_dim1] = b11; t[ilast - 1 + ilast * t_dim1] = 0.; t[ilast + (ilast - 1) * t_dim1] = 0.; t[ilast + ilast * t_dim1] = b22; /* If B22 is negative, negate column ILAST */ if (b22 < 0.) { i__2 = ilast; for (j = ifrstm; j <= i__2; ++j) { h__[j + ilast * h_dim1] = -h__[j + ilast * h_dim1]; t[j + ilast * t_dim1] = -t[j + ilast * t_dim1]; /* L210: */ } if (ilz) { i__2 = *n; for (j = 1; j <= i__2; ++j) { z__[j + ilast * z_dim1] = -z__[j + ilast * z_dim1]; /* L220: */ } } } /* Step 2: Compute ALPHAR, ALPHAI, and BETA (see refs.) */ /* Recompute shift */ d__1 = safmin * 100.; _starpu_dlag2_(&h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &t[ilast - 1 + (ilast - 1) * t_dim1], ldt, &d__1, &s1, &temp, &wr, & temp2, &wi); /* If standardization has perturbed the shift onto real line, */ /* do another (real single-shift) QR step. */ if (wi == 0.) { goto L350; } s1inv = 1. / s1; /* Do EISPACK (QZVAL) computation of alpha and beta */ a11 = h__[ilast - 1 + (ilast - 1) * h_dim1]; a21 = h__[ilast + (ilast - 1) * h_dim1]; a12 = h__[ilast - 1 + ilast * h_dim1]; a22 = h__[ilast + ilast * h_dim1]; /* Compute complex Givens rotation on right */ /* (Assume some element of C = (sA - wB) > unfl ) */ /* __ */ /* (sA - wB) ( CZ -SZ ) */ /* ( SZ CZ ) */ c11r = s1 * a11 - wr * b11; c11i = -wi * b11; c12 = s1 * a12; c21 = s1 * a21; c22r = s1 * a22 - wr * b22; c22i = -wi * b22; if (abs(c11r) + abs(c11i) + abs(c12) > abs(c21) + abs(c22r) + abs( c22i)) { t1 = _starpu_dlapy3_(&c12, &c11r, &c11i); cz = c12 / t1; szr = -c11r / t1; szi = -c11i / t1; } else { cz = _starpu_dlapy2_(&c22r, &c22i); if (cz <= safmin) { cz = 0.; szr = 1.; szi = 0.; } else { tempr = c22r / cz; tempi = c22i / cz; t1 = _starpu_dlapy2_(&cz, &c21); cz /= t1; szr = -c21 * tempr / t1; szi = c21 * tempi / t1; } } /* Compute Givens rotation on left */ /* ( CQ SQ ) */ /* ( __ ) A or B */ /* ( -SQ CQ ) */ an = abs(a11) + abs(a12) + abs(a21) + abs(a22); bn = abs(b11) + abs(b22); wabs = abs(wr) + abs(wi); if (s1 * an > wabs * bn) { cq = cz * b11; sqr = szr * b22; sqi = -szi * b22; } else { a1r = cz * a11 + szr * a12; a1i = szi * a12; a2r = cz * a21 + szr * a22; a2i = szi * a22; cq = _starpu_dlapy2_(&a1r, &a1i); if (cq <= safmin) { cq = 0.; sqr = 1.; sqi = 0.; } else { tempr = a1r / cq; tempi = a1i / cq; sqr = tempr * a2r + tempi * a2i; sqi = tempi * a2r - tempr * a2i; } } t1 = _starpu_dlapy3_(&cq, &sqr, &sqi); cq /= t1; sqr /= t1; sqi /= t1; /* Compute diagonal elements of QBZ */ tempr = sqr * szr - sqi * szi; tempi = sqr * szi + sqi * szr; b1r = cq * cz * b11 + tempr * b22; b1i = tempi * b22; b1a = _starpu_dlapy2_(&b1r, &b1i); b2r = cq * cz * b22 + tempr * b11; b2i = -tempi * b11; b2a = _starpu_dlapy2_(&b2r, &b2i); /* Normalize so beta > 0, and Im( alpha1 ) > 0 */ beta[ilast - 1] = b1a; beta[ilast] = b2a; alphar[ilast - 1] = wr * b1a * s1inv; alphai[ilast - 1] = wi * b1a * s1inv; alphar[ilast] = wr * b2a * s1inv; alphai[ilast] = -(wi * b2a) * s1inv; /* Step 3: Go to next block -- exit if finished. */ ilast = ifirst - 1; if (ilast < *ilo) { goto L380; } /* Reset counters */ iiter = 0; eshift = 0.; if (! ilschr) { ilastm = ilast; if (ifrstm > ilast) { ifrstm = *ilo; } } goto L350; } else { /* Usual case: 3x3 or larger block, using Francis implicit */ /* double-shift */ /* 2 */ /* Eigenvalue equation is w - c w + d = 0, */ /* -1 2 -1 */ /* so compute 1st column of (A B ) - c A B + d */ /* using the formula in QZIT (from EISPACK) */ /* We assume that the block is at least 3x3 */ ad11 = ascale * h__[ilast - 1 + (ilast - 1) * h_dim1] / (bscale * t[ilast - 1 + (ilast - 1) * t_dim1]); ad21 = ascale * h__[ilast + (ilast - 1) * h_dim1] / (bscale * t[ ilast - 1 + (ilast - 1) * t_dim1]); ad12 = ascale * h__[ilast - 1 + ilast * h_dim1] / (bscale * t[ ilast + ilast * t_dim1]); ad22 = ascale * h__[ilast + ilast * h_dim1] / (bscale * t[ilast + ilast * t_dim1]); u12 = t[ilast - 1 + ilast * t_dim1] / t[ilast + ilast * t_dim1]; ad11l = ascale * h__[ifirst + ifirst * h_dim1] / (bscale * t[ ifirst + ifirst * t_dim1]); ad21l = ascale * h__[ifirst + 1 + ifirst * h_dim1] / (bscale * t[ ifirst + ifirst * t_dim1]); ad12l = ascale * h__[ifirst + (ifirst + 1) * h_dim1] / (bscale * t[ifirst + 1 + (ifirst + 1) * t_dim1]); ad22l = ascale * h__[ifirst + 1 + (ifirst + 1) * h_dim1] / ( bscale * t[ifirst + 1 + (ifirst + 1) * t_dim1]); ad32l = ascale * h__[ifirst + 2 + (ifirst + 1) * h_dim1] / ( bscale * t[ifirst + 1 + (ifirst + 1) * t_dim1]); u12l = t[ifirst + (ifirst + 1) * t_dim1] / t[ifirst + 1 + (ifirst + 1) * t_dim1]; v[0] = (ad11 - ad11l) * (ad22 - ad11l) - ad12 * ad21 + ad21 * u12 * ad11l + (ad12l - ad11l * u12l) * ad21l; v[1] = (ad22l - ad11l - ad21l * u12l - (ad11 - ad11l) - (ad22 - ad11l) + ad21 * u12) * ad21l; v[2] = ad32l * ad21l; istart = ifirst; _starpu_dlarfg_(&c__3, v, &v[1], &c__1, &tau); v[0] = 1.; /* Sweep */ i__2 = ilast - 2; for (j = istart; j <= i__2; ++j) { /* All but last elements: use 3x3 Householder transforms. */ /* Zero (j-1)st column of A */ if (j > istart) { v[0] = h__[j + (j - 1) * h_dim1]; v[1] = h__[j + 1 + (j - 1) * h_dim1]; v[2] = h__[j + 2 + (j - 1) * h_dim1]; _starpu_dlarfg_(&c__3, &h__[j + (j - 1) * h_dim1], &v[1], &c__1, & tau); v[0] = 1.; h__[j + 1 + (j - 1) * h_dim1] = 0.; h__[j + 2 + (j - 1) * h_dim1] = 0.; } i__3 = ilastm; for (jc = j; jc <= i__3; ++jc) { temp = tau * (h__[j + jc * h_dim1] + v[1] * h__[j + 1 + jc * h_dim1] + v[2] * h__[j + 2 + jc * h_dim1]); h__[j + jc * h_dim1] -= temp; h__[j + 1 + jc * h_dim1] -= temp * v[1]; h__[j + 2 + jc * h_dim1] -= temp * v[2]; temp2 = tau * (t[j + jc * t_dim1] + v[1] * t[j + 1 + jc * t_dim1] + v[2] * t[j + 2 + jc * t_dim1]); t[j + jc * t_dim1] -= temp2; t[j + 1 + jc * t_dim1] -= temp2 * v[1]; t[j + 2 + jc * t_dim1] -= temp2 * v[2]; /* L230: */ } if (ilq) { i__3 = *n; for (jr = 1; jr <= i__3; ++jr) { temp = tau * (q[jr + j * q_dim1] + v[1] * q[jr + (j + 1) * q_dim1] + v[2] * q[jr + (j + 2) * q_dim1] ); q[jr + j * q_dim1] -= temp; q[jr + (j + 1) * q_dim1] -= temp * v[1]; q[jr + (j + 2) * q_dim1] -= temp * v[2]; /* L240: */ } } /* Zero j-th column of B (see DLAGBC for details) */ /* Swap rows to pivot */ ilpivt = FALSE_; /* Computing MAX */ d__3 = (d__1 = t[j + 1 + (j + 1) * t_dim1], abs(d__1)), d__4 = (d__2 = t[j + 1 + (j + 2) * t_dim1], abs(d__2)); temp = max(d__3,d__4); /* Computing MAX */ d__3 = (d__1 = t[j + 2 + (j + 1) * t_dim1], abs(d__1)), d__4 = (d__2 = t[j + 2 + (j + 2) * t_dim1], abs(d__2)); temp2 = max(d__3,d__4); if (max(temp,temp2) < safmin) { scale = 0.; u1 = 1.; u2 = 0.; goto L250; } else if (temp >= temp2) { w11 = t[j + 1 + (j + 1) * t_dim1]; w21 = t[j + 2 + (j + 1) * t_dim1]; w12 = t[j + 1 + (j + 2) * t_dim1]; w22 = t[j + 2 + (j + 2) * t_dim1]; u1 = t[j + 1 + j * t_dim1]; u2 = t[j + 2 + j * t_dim1]; } else { w21 = t[j + 1 + (j + 1) * t_dim1]; w11 = t[j + 2 + (j + 1) * t_dim1]; w22 = t[j + 1 + (j + 2) * t_dim1]; w12 = t[j + 2 + (j + 2) * t_dim1]; u2 = t[j + 1 + j * t_dim1]; u1 = t[j + 2 + j * t_dim1]; } /* Swap columns if nec. */ if (abs(w12) > abs(w11)) { ilpivt = TRUE_; temp = w12; temp2 = w22; w12 = w11; w22 = w21; w11 = temp; w21 = temp2; } /* LU-factor */ temp = w21 / w11; u2 -= temp * u1; w22 -= temp * w12; w21 = 0.; /* Compute SCALE */ scale = 1.; if (abs(w22) < safmin) { scale = 0.; u2 = 1.; u1 = -w12 / w11; goto L250; } if (abs(w22) < abs(u2)) { scale = (d__1 = w22 / u2, abs(d__1)); } if (abs(w11) < abs(u1)) { /* Computing MIN */ d__2 = scale, d__3 = (d__1 = w11 / u1, abs(d__1)); scale = min(d__2,d__3); } /* Solve */ u2 = scale * u2 / w22; u1 = (scale * u1 - w12 * u2) / w11; L250: if (ilpivt) { temp = u2; u2 = u1; u1 = temp; } /* Compute Householder Vector */ /* Computing 2nd power */ d__1 = scale; /* Computing 2nd power */ d__2 = u1; /* Computing 2nd power */ d__3 = u2; t1 = sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3); tau = scale / t1 + 1.; vs = -1. / (scale + t1); v[0] = 1.; v[1] = vs * u1; v[2] = vs * u2; /* Apply transformations from the right. */ /* Computing MIN */ i__4 = j + 3; i__3 = min(i__4,ilast); for (jr = ifrstm; jr <= i__3; ++jr) { temp = tau * (h__[jr + j * h_dim1] + v[1] * h__[jr + (j + 1) * h_dim1] + v[2] * h__[jr + (j + 2) * h_dim1]); h__[jr + j * h_dim1] -= temp; h__[jr + (j + 1) * h_dim1] -= temp * v[1]; h__[jr + (j + 2) * h_dim1] -= temp * v[2]; /* L260: */ } i__3 = j + 2; for (jr = ifrstm; jr <= i__3; ++jr) { temp = tau * (t[jr + j * t_dim1] + v[1] * t[jr + (j + 1) * t_dim1] + v[2] * t[jr + (j + 2) * t_dim1]); t[jr + j * t_dim1] -= temp; t[jr + (j + 1) * t_dim1] -= temp * v[1]; t[jr + (j + 2) * t_dim1] -= temp * v[2]; /* L270: */ } if (ilz) { i__3 = *n; for (jr = 1; jr <= i__3; ++jr) { temp = tau * (z__[jr + j * z_dim1] + v[1] * z__[jr + ( j + 1) * z_dim1] + v[2] * z__[jr + (j + 2) * z_dim1]); z__[jr + j * z_dim1] -= temp; z__[jr + (j + 1) * z_dim1] -= temp * v[1]; z__[jr + (j + 2) * z_dim1] -= temp * v[2]; /* L280: */ } } t[j + 1 + j * t_dim1] = 0.; t[j + 2 + j * t_dim1] = 0.; /* L290: */ } /* Last elements: Use Givens rotations */ /* Rotations from the left */ j = ilast - 1; temp = h__[j + (j - 1) * h_dim1]; _starpu_dlartg_(&temp, &h__[j + 1 + (j - 1) * h_dim1], &c__, &s, &h__[j + (j - 1) * h_dim1]); h__[j + 1 + (j - 1) * h_dim1] = 0.; i__2 = ilastm; for (jc = j; jc <= i__2; ++jc) { temp = c__ * h__[j + jc * h_dim1] + s * h__[j + 1 + jc * h_dim1]; h__[j + 1 + jc * h_dim1] = -s * h__[j + jc * h_dim1] + c__ * h__[j + 1 + jc * h_dim1]; h__[j + jc * h_dim1] = temp; temp2 = c__ * t[j + jc * t_dim1] + s * t[j + 1 + jc * t_dim1]; t[j + 1 + jc * t_dim1] = -s * t[j + jc * t_dim1] + c__ * t[j + 1 + jc * t_dim1]; t[j + jc * t_dim1] = temp2; /* L300: */ } if (ilq) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { temp = c__ * q[jr + j * q_dim1] + s * q[jr + (j + 1) * q_dim1]; q[jr + (j + 1) * q_dim1] = -s * q[jr + j * q_dim1] + c__ * q[jr + (j + 1) * q_dim1]; q[jr + j * q_dim1] = temp; /* L310: */ } } /* Rotations from the right. */ temp = t[j + 1 + (j + 1) * t_dim1]; _starpu_dlartg_(&temp, &t[j + 1 + j * t_dim1], &c__, &s, &t[j + 1 + (j + 1) * t_dim1]); t[j + 1 + j * t_dim1] = 0.; i__2 = ilast; for (jr = ifrstm; jr <= i__2; ++jr) { temp = c__ * h__[jr + (j + 1) * h_dim1] + s * h__[jr + j * h_dim1]; h__[jr + j * h_dim1] = -s * h__[jr + (j + 1) * h_dim1] + c__ * h__[jr + j * h_dim1]; h__[jr + (j + 1) * h_dim1] = temp; /* L320: */ } i__2 = ilast - 1; for (jr = ifrstm; jr <= i__2; ++jr) { temp = c__ * t[jr + (j + 1) * t_dim1] + s * t[jr + j * t_dim1] ; t[jr + j * t_dim1] = -s * t[jr + (j + 1) * t_dim1] + c__ * t[ jr + j * t_dim1]; t[jr + (j + 1) * t_dim1] = temp; /* L330: */ } if (ilz) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { temp = c__ * z__[jr + (j + 1) * z_dim1] + s * z__[jr + j * z_dim1]; z__[jr + j * z_dim1] = -s * z__[jr + (j + 1) * z_dim1] + c__ * z__[jr + j * z_dim1]; z__[jr + (j + 1) * z_dim1] = temp; /* L340: */ } } /* End of Double-Shift code */ } goto L350; /* End of iteration loop */ L350: /* L360: */ ; } /* Drop-through = non-convergence */ *info = ilast; goto L420; /* Successful completion of all QZ steps */ L380: /* Set Eigenvalues 1:ILO-1 */ i__1 = *ilo - 1; for (j = 1; j <= i__1; ++j) { if (t[j + j * t_dim1] < 0.) { if (ilschr) { i__2 = j; for (jr = 1; jr <= i__2; ++jr) { h__[jr + j * h_dim1] = -h__[jr + j * h_dim1]; t[jr + j * t_dim1] = -t[jr + j * t_dim1]; /* L390: */ } } else { h__[j + j * h_dim1] = -h__[j + j * h_dim1]; t[j + j * t_dim1] = -t[j + j * t_dim1]; } if (ilz) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { z__[jr + j * z_dim1] = -z__[jr + j * z_dim1]; /* L400: */ } } } alphar[j] = h__[j + j * h_dim1]; alphai[j] = 0.; beta[j] = t[j + j * t_dim1]; /* L410: */ } /* Normal Termination */ *info = 0; /* Exit (other than argument error) -- return optimal workspace size */ L420: work[1] = (doublereal) (*n); return 0; /* End of DHGEQZ */ } /* _starpu_dhgeqz_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dhsein.c000066400000000000000000000357511507764646700206610ustar00rootroot00000000000000/* dhsein.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static logical c_false = FALSE_; static logical c_true = TRUE_; /* Subroutine */ int _starpu_dhsein_(char *side, char *eigsrc, char *initv, logical * select, integer *n, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, doublereal *work, integer * ifaill, integer *ifailr, integer *info) { /* System generated locals */ integer h_dim1, h_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ integer i__, k, kl, kr, kln, ksi; doublereal wki; integer ksr; doublereal ulp, wkr, eps3; logical pair; doublereal unfl; extern logical _starpu_lsame_(char *, char *); integer iinfo; logical leftv, bothv; doublereal hnorm; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlaein_(logical *, logical *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal * , doublereal *, doublereal *, integer *); extern doublereal _starpu_dlanhs_(char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; logical noinit; integer ldwork; logical rightv, fromqr; doublereal smlnum; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DHSEIN uses inverse iteration to find specified right and/or left */ /* eigenvectors of a real upper Hessenberg matrix H. */ /* The right eigenvector x and the left eigenvector y of the matrix H */ /* corresponding to an eigenvalue w are defined by: */ /* H * x = w * x, y**h * H = w * y**h */ /* where y**h denotes the conjugate transpose of the vector y. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'R': compute right eigenvectors only; */ /* = 'L': compute left eigenvectors only; */ /* = 'B': compute both right and left eigenvectors. */ /* EIGSRC (input) CHARACTER*1 */ /* Specifies the source of eigenvalues supplied in (WR,WI): */ /* = 'Q': the eigenvalues were found using DHSEQR; thus, if */ /* H has zero subdiagonal elements, and so is */ /* block-triangular, then the j-th eigenvalue can be */ /* assumed to be an eigenvalue of the block containing */ /* the j-th row/column. This property allows DHSEIN to */ /* perform inverse iteration on just one diagonal block. */ /* = 'N': no assumptions are made on the correspondence */ /* between eigenvalues and diagonal blocks. In this */ /* case, DHSEIN must always perform inverse iteration */ /* using the whole matrix H. */ /* INITV (input) CHARACTER*1 */ /* = 'N': no initial vectors are supplied; */ /* = 'U': user-supplied initial vectors are stored in the arrays */ /* VL and/or VR. */ /* SELECT (input/output) LOGICAL array, dimension (N) */ /* Specifies the eigenvectors to be computed. To select the */ /* real eigenvector corresponding to a real eigenvalue WR(j), */ /* SELECT(j) must be set to .TRUE.. To select the complex */ /* eigenvector corresponding to a complex eigenvalue */ /* (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)), */ /* either SELECT(j) or SELECT(j+1) or both must be set to */ /* .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is */ /* .FALSE.. */ /* N (input) INTEGER */ /* The order of the matrix H. N >= 0. */ /* H (input) DOUBLE PRECISION array, dimension (LDH,N) */ /* The upper Hessenberg matrix H. */ /* LDH (input) INTEGER */ /* The leading dimension of the array H. LDH >= max(1,N). */ /* WR (input/output) DOUBLE PRECISION array, dimension (N) */ /* WI (input) DOUBLE PRECISION array, dimension (N) */ /* On entry, the real and imaginary parts of the eigenvalues of */ /* H; a complex conjugate pair of eigenvalues must be stored in */ /* consecutive elements of WR and WI. */ /* On exit, WR may have been altered since close eigenvalues */ /* are perturbed slightly in searching for independent */ /* eigenvectors. */ /* VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) */ /* On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must */ /* contain starting vectors for the inverse iteration for the */ /* left eigenvectors; the starting vector for each eigenvector */ /* must be in the same column(s) in which the eigenvector will */ /* be stored. */ /* On exit, if SIDE = 'L' or 'B', the left eigenvectors */ /* specified by SELECT will be stored consecutively in the */ /* columns of VL, in the same order as their eigenvalues. A */ /* complex eigenvector corresponding to a complex eigenvalue is */ /* stored in two consecutive columns, the first holding the real */ /* part and the second the imaginary part. */ /* If SIDE = 'R', VL is not referenced. */ /* LDVL (input) INTEGER */ /* The leading dimension of the array VL. */ /* LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. */ /* VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) */ /* On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must */ /* contain starting vectors for the inverse iteration for the */ /* right eigenvectors; the starting vector for each eigenvector */ /* must be in the same column(s) in which the eigenvector will */ /* be stored. */ /* On exit, if SIDE = 'R' or 'B', the right eigenvectors */ /* specified by SELECT will be stored consecutively in the */ /* columns of VR, in the same order as their eigenvalues. A */ /* complex eigenvector corresponding to a complex eigenvalue is */ /* stored in two consecutive columns, the first holding the real */ /* part and the second the imaginary part. */ /* If SIDE = 'L', VR is not referenced. */ /* LDVR (input) INTEGER */ /* The leading dimension of the array VR. */ /* LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. */ /* MM (input) INTEGER */ /* The number of columns in the arrays VL and/or VR. MM >= M. */ /* M (output) INTEGER */ /* The number of columns in the arrays VL and/or VR required to */ /* store the eigenvectors; each selected real eigenvector */ /* occupies one column and each selected complex eigenvector */ /* occupies two columns. */ /* WORK (workspace) DOUBLE PRECISION array, dimension ((N+2)*N) */ /* IFAILL (output) INTEGER array, dimension (MM) */ /* If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left */ /* eigenvector in the i-th column of VL (corresponding to the */ /* eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the */ /* eigenvector converged satisfactorily. If the i-th and (i+1)th */ /* columns of VL hold a complex eigenvector, then IFAILL(i) and */ /* IFAILL(i+1) are set to the same value. */ /* If SIDE = 'R', IFAILL is not referenced. */ /* IFAILR (output) INTEGER array, dimension (MM) */ /* If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right */ /* eigenvector in the i-th column of VR (corresponding to the */ /* eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the */ /* eigenvector converged satisfactorily. If the i-th and (i+1)th */ /* columns of VR hold a complex eigenvector, then IFAILR(i) and */ /* IFAILR(i+1) are set to the same value. */ /* If SIDE = 'L', IFAILR is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, i is the number of eigenvectors which */ /* failed to converge; see IFAILL and IFAILR for further */ /* details. */ /* Further Details */ /* =============== */ /* Each eigenvector is normalized so that the element of largest */ /* magnitude has magnitude 1; here the magnitude of a complex number */ /* (x,y) is taken to be |x|+|y|. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test the input parameters. */ /* Parameter adjustments */ --select; h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --wr; --wi; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; --ifaill; --ifailr; /* Function Body */ bothv = _starpu_lsame_(side, "B"); rightv = _starpu_lsame_(side, "R") || bothv; leftv = _starpu_lsame_(side, "L") || bothv; fromqr = _starpu_lsame_(eigsrc, "Q"); noinit = _starpu_lsame_(initv, "N"); /* Set M to the number of columns required to store the selected */ /* eigenvectors, and standardize the array SELECT. */ *m = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; select[k] = FALSE_; } else { if (wi[k] == 0.) { if (select[k]) { ++(*m); } } else { pair = TRUE_; if (select[k] || select[k + 1]) { select[k] = TRUE_; *m += 2; } } } /* L10: */ } *info = 0; if (! rightv && ! leftv) { *info = -1; } else if (! fromqr && ! _starpu_lsame_(eigsrc, "N")) { *info = -2; } else if (! noinit && ! _starpu_lsame_(initv, "U")) { *info = -3; } else if (*n < 0) { *info = -5; } else if (*ldh < max(1,*n)) { *info = -7; } else if (*ldvl < 1 || leftv && *ldvl < *n) { *info = -11; } else if (*ldvr < 1 || rightv && *ldvr < *n) { *info = -13; } else if (*mm < *m) { *info = -14; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DHSEIN", &i__1); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } /* Set machine-dependent constants. */ unfl = _starpu_dlamch_("Safe minimum"); ulp = _starpu_dlamch_("Precision"); smlnum = unfl * (*n / ulp); bignum = (1. - ulp) / smlnum; ldwork = *n + 1; kl = 1; kln = 0; if (fromqr) { kr = 0; } else { kr = *n; } ksr = 1; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (select[k]) { /* Compute eigenvector(s) corresponding to W(K). */ if (fromqr) { /* If affiliation of eigenvalues is known, check whether */ /* the matrix splits. */ /* Determine KL and KR such that 1 <= KL <= K <= KR <= N */ /* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or */ /* KR = N). */ /* Then inverse iteration can be performed with the */ /* submatrix H(KL:N,KL:N) for a left eigenvector, and with */ /* the submatrix H(1:KR,1:KR) for a right eigenvector. */ i__2 = kl + 1; for (i__ = k; i__ >= i__2; --i__) { if (h__[i__ + (i__ - 1) * h_dim1] == 0.) { goto L30; } /* L20: */ } L30: kl = i__; if (k > kr) { i__2 = *n - 1; for (i__ = k; i__ <= i__2; ++i__) { if (h__[i__ + 1 + i__ * h_dim1] == 0.) { goto L50; } /* L40: */ } L50: kr = i__; } } if (kl != kln) { kln = kl; /* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it */ /* has not ben computed before. */ i__2 = kr - kl + 1; hnorm = _starpu_dlanhs_("I", &i__2, &h__[kl + kl * h_dim1], ldh, & work[1]); if (hnorm > 0.) { eps3 = hnorm * ulp; } else { eps3 = smlnum; } } /* Perturb eigenvalue if it is close to any previous */ /* selected eigenvalues affiliated to the submatrix */ /* H(KL:KR,KL:KR). Close roots are modified by EPS3. */ wkr = wr[k]; wki = wi[k]; L60: i__2 = kl; for (i__ = k - 1; i__ >= i__2; --i__) { if (select[i__] && (d__1 = wr[i__] - wkr, abs(d__1)) + (d__2 = wi[i__] - wki, abs(d__2)) < eps3) { wkr += eps3; goto L60; } /* L70: */ } wr[k] = wkr; pair = wki != 0.; if (pair) { ksi = ksr + 1; } else { ksi = ksr; } if (leftv) { /* Compute left eigenvector. */ i__2 = *n - kl + 1; _starpu_dlaein_(&c_false, &noinit, &i__2, &h__[kl + kl * h_dim1], ldh, &wkr, &wki, &vl[kl + ksr * vl_dim1], &vl[kl + ksi * vl_dim1], &work[1], &ldwork, &work[*n * *n + *n + 1], &eps3, &smlnum, &bignum, &iinfo); if (iinfo > 0) { if (pair) { *info += 2; } else { ++(*info); } ifaill[ksr] = k; ifaill[ksi] = k; } else { ifaill[ksr] = 0; ifaill[ksi] = 0; } i__2 = kl - 1; for (i__ = 1; i__ <= i__2; ++i__) { vl[i__ + ksr * vl_dim1] = 0.; /* L80: */ } if (pair) { i__2 = kl - 1; for (i__ = 1; i__ <= i__2; ++i__) { vl[i__ + ksi * vl_dim1] = 0.; /* L90: */ } } } if (rightv) { /* Compute right eigenvector. */ _starpu_dlaein_(&c_true, &noinit, &kr, &h__[h_offset], ldh, &wkr, & wki, &vr[ksr * vr_dim1 + 1], &vr[ksi * vr_dim1 + 1], & work[1], &ldwork, &work[*n * *n + *n + 1], &eps3, & smlnum, &bignum, &iinfo); if (iinfo > 0) { if (pair) { *info += 2; } else { ++(*info); } ifailr[ksr] = k; ifailr[ksi] = k; } else { ifailr[ksr] = 0; ifailr[ksi] = 0; } i__2 = *n; for (i__ = kr + 1; i__ <= i__2; ++i__) { vr[i__ + ksr * vr_dim1] = 0.; /* L100: */ } if (pair) { i__2 = *n; for (i__ = kr + 1; i__ <= i__2; ++i__) { vr[i__ + ksi * vr_dim1] = 0.; /* L110: */ } } } if (pair) { ksr += 2; } else { ++ksr; } } /* L120: */ } return 0; /* End of DHSEIN */ } /* _starpu_dhsein_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dhseqr.c000066400000000000000000000452131507764646700206670ustar00rootroot00000000000000/* dhseqr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b11 = 0.; static doublereal c_b12 = 1.; static integer c__12 = 12; static integer c__2 = 2; static integer c__49 = 49; /* Subroutine */ int _starpu_dhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3; doublereal d__1; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i__; doublereal hl[2401] /* was [49][49] */; integer kbot, nmin; extern logical _starpu_lsame_(char *, char *); logical initz; doublereal workl[49]; logical wantt, wantz; extern /* Subroutine */ int _starpu_dlaqr0_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DHSEQR computes the eigenvalues of a Hessenberg matrix H */ /* and, optionally, the matrices T and Z from the Schur decomposition */ /* H = Z T Z**T, where T is an upper quasi-triangular matrix (the */ /* Schur form), and Z is the orthogonal matrix of Schur vectors. */ /* Optionally Z may be postmultiplied into an input orthogonal */ /* matrix Q so that this routine can give the Schur factorization */ /* of a matrix A which has been reduced to the Hessenberg form H */ /* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* = 'E': compute eigenvalues only; */ /* = 'S': compute eigenvalues and the Schur form T. */ /* COMPZ (input) CHARACTER*1 */ /* = 'N': no Schur vectors are computed; */ /* = 'I': Z is initialized to the unit matrix and the matrix Z */ /* of Schur vectors of H is returned; */ /* = 'V': Z must contain an orthogonal matrix Q on entry, and */ /* the product Q*Z is returned. */ /* N (input) INTEGER */ /* The order of the matrix H. N .GE. 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* It is assumed that H is already upper triangular in rows */ /* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ /* set by a previous call to DGEBAL, and then passed to DGEHRD */ /* when the matrix output by DGEBAL is reduced to Hessenberg */ /* form. Otherwise ILO and IHI should be set to 1 and N */ /* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. */ /* If N = 0, then ILO = 1 and IHI = 0. */ /* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ /* On entry, the upper Hessenberg matrix H. */ /* On exit, if INFO = 0 and JOB = 'S', then H contains the */ /* upper quasi-triangular matrix T from the Schur decomposition */ /* (the Schur form); 2-by-2 diagonal blocks (corresponding to */ /* complex conjugate pairs of eigenvalues) are returned in */ /* standard form, with H(i,i) = H(i+1,i+1) and */ /* H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the */ /* contents of H are unspecified on exit. (The output value of */ /* H when INFO.GT.0 is given under the description of INFO */ /* below.) */ /* Unlike earlier versions of DHSEQR, this subroutine may */ /* explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 */ /* or j = IHI+1, IHI+2, ... N. */ /* LDH (input) INTEGER */ /* The leading dimension of the array H. LDH .GE. max(1,N). */ /* WR (output) DOUBLE PRECISION array, dimension (N) */ /* WI (output) DOUBLE PRECISION array, dimension (N) */ /* The real and imaginary parts, respectively, of the computed */ /* eigenvalues. If two eigenvalues are computed as a complex */ /* conjugate pair, they are stored in consecutive elements of */ /* WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and */ /* WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in */ /* the same order as on the diagonal of the Schur form returned */ /* in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 */ /* diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */ /* WI(i+1) = -WI(i). */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* If COMPZ = 'N', Z is not referenced. */ /* If COMPZ = 'I', on entry Z need not be set and on exit, */ /* if INFO = 0, Z contains the orthogonal matrix Z of the Schur */ /* vectors of H. If COMPZ = 'V', on entry Z must contain an */ /* N-by-N matrix Q, which is assumed to be equal to the unit */ /* matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, */ /* if INFO = 0, Z contains Q*Z. */ /* Normally Q is the orthogonal matrix generated by DORGHR */ /* after the call to DGEHRD which formed the Hessenberg matrix */ /* H. (The output value of Z when INFO.GT.0 is given under */ /* the description of INFO below.) */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. if COMPZ = 'I' or */ /* COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ /* On exit, if INFO = 0, WORK(1) returns an estimate of */ /* the optimal value for LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK .GE. max(1,N) */ /* is sufficient and delivers very good and sometimes */ /* optimal performance. However, LWORK as large as 11*N */ /* may be required for optimal performance. A workspace */ /* query is recommended to determine the optimal workspace */ /* size. */ /* If LWORK = -1, then DHSEQR does a workspace query. */ /* In this case, DHSEQR checks the input parameters and */ /* estimates the optimal workspace size for the given */ /* values of N, ILO and IHI. The estimate is returned */ /* in WORK(1). No error message related to LWORK is */ /* issued by XERBLA. Neither H nor Z are accessed. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* .LT. 0: if INFO = -i, the i-th argument had an illegal */ /* value */ /* .GT. 0: if INFO = i, DHSEQR failed to compute all of */ /* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR */ /* and WI contain those eigenvalues which have been */ /* successfully computed. (Failures are rare.) */ /* If INFO .GT. 0 and JOB = 'E', then on exit, the */ /* remaining unconverged eigenvalues are the eigen- */ /* values of the upper Hessenberg matrix rows and */ /* columns ILO through INFO of the final, output */ /* value of H. */ /* If INFO .GT. 0 and JOB = 'S', then on exit */ /* (*) (initial value of H)*U = U*(final value of H) */ /* where U is an orthogonal matrix. The final */ /* value of H is upper Hessenberg and quasi-triangular */ /* in rows and columns INFO+1 through IHI. */ /* If INFO .GT. 0 and COMPZ = 'V', then on exit */ /* (final value of Z) = (initial value of Z)*U */ /* where U is the orthogonal matrix in (*) (regard- */ /* less of the value of JOB.) */ /* If INFO .GT. 0 and COMPZ = 'I', then on exit */ /* (final value of Z) = U */ /* where U is the orthogonal matrix in (*) (regard- */ /* less of the value of JOB.) */ /* If INFO .GT. 0 and COMPZ = 'N', then Z is not */ /* accessed. */ /* ================================================================ */ /* Default values supplied by */ /* ILAENV(ISPEC,'DHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). */ /* It is suggested that these defaults be adjusted in order */ /* to attain best performance in each particular */ /* computational environment. */ /* ISPEC=12: The DLAHQR vs DLAQR0 crossover point. */ /* Default: 75. (Must be at least 11.) */ /* ISPEC=13: Recommended deflation window size. */ /* This depends on ILO, IHI and NS. NS is the */ /* number of simultaneous shifts returned */ /* by ILAENV(ISPEC=15). (See ISPEC=15 below.) */ /* The default for (IHI-ILO+1).LE.500 is NS. */ /* The default for (IHI-ILO+1).GT.500 is 3*NS/2. */ /* ISPEC=14: Nibble crossover point. (See IPARMQ for */ /* details.) Default: 14% of deflation window */ /* size. */ /* ISPEC=15: Number of simultaneous shifts in a multishift */ /* QR iteration. */ /* If IHI-ILO+1 is ... */ /* greater than ...but less ... the */ /* or equal to ... than default is */ /* 1 30 NS = 2(+) */ /* 30 60 NS = 4(+) */ /* 60 150 NS = 10(+) */ /* 150 590 NS = ** */ /* 590 3000 NS = 64 */ /* 3000 6000 NS = 128 */ /* 6000 infinity NS = 256 */ /* (+) By default some or all matrices of this order */ /* are passed to the implicit double shift routine */ /* DLAHQR and this parameter is ignored. See */ /* ISPEC=12 above and comments in IPARMQ for */ /* details. */ /* (**) The asterisks (**) indicate an ad-hoc */ /* function of N increasing from 10 to 64. */ /* ISPEC=16: Select structured matrix multiply. */ /* If the number of simultaneous shifts (specified */ /* by ISPEC=15) is less than 14, then the default */ /* for ISPEC=16 is 0. Otherwise the default for */ /* ISPEC=16 is 2. */ /* ================================================================ */ /* Based on contributions by */ /* Karen Braman and Ralph Byers, Department of Mathematics, */ /* University of Kansas, USA */ /* ================================================================ */ /* References: */ /* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ /* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */ /* Performance, SIAM Journal of Matrix Analysis, volume 23, pages */ /* 929--947, 2002. */ /* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ /* Algorithm Part II: Aggressive Early Deflation, SIAM Journal */ /* of Matrix Analysis, volume 23, pages 948--973, 2002. */ /* ================================================================ */ /* .. Parameters .. */ /* ==== Matrices of order NTINY or smaller must be processed by */ /* . DLAHQR because of insufficient subdiagonal scratch space. */ /* . (This is a hard limit.) ==== */ /* ==== NL allocates some local workspace to help small matrices */ /* . through a rare DLAHQR failure. NL .GT. NTINY = 11 is */ /* . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- */ /* . mended. (The default value of NMIN is 75.) Using NL = 49 */ /* . allows up to six simultaneous shifts and a 16-by-16 */ /* . deflation window. ==== */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* ==== Decode and check the input parameters. ==== */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --wr; --wi; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ wantt = _starpu_lsame_(job, "S"); initz = _starpu_lsame_(compz, "I"); wantz = initz || _starpu_lsame_(compz, "V"); work[1] = (doublereal) max(1,*n); lquery = *lwork == -1; *info = 0; if (! _starpu_lsame_(job, "E") && ! wantt) { *info = -1; } else if (! _starpu_lsame_(compz, "N") && ! wantz) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -4; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -5; } else if (*ldh < max(1,*n)) { *info = -7; } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { *info = -11; } else if (*lwork < max(1,*n) && ! lquery) { *info = -13; } if (*info != 0) { /* ==== Quick return in case of invalid argument. ==== */ i__1 = -(*info); _starpu_xerbla_("DHSEQR", &i__1); return 0; } else if (*n == 0) { /* ==== Quick return in case N = 0; nothing to do. ==== */ return 0; } else if (lquery) { /* ==== Quick return in case of a workspace query ==== */ _starpu_dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ 1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info); /* ==== Ensure reported workspace size is backward-compatible with */ /* . previous LAPACK versions. ==== */ /* Computing MAX */ d__1 = (doublereal) max(1,*n); work[1] = max(d__1,work[1]); return 0; } else { /* ==== copy eigenvalues isolated by DGEBAL ==== */ i__1 = *ilo - 1; for (i__ = 1; i__ <= i__1; ++i__) { wr[i__] = h__[i__ + i__ * h_dim1]; wi[i__] = 0.; /* L10: */ } i__1 = *n; for (i__ = *ihi + 1; i__ <= i__1; ++i__) { wr[i__] = h__[i__ + i__ * h_dim1]; wi[i__] = 0.; /* L20: */ } /* ==== Initialize Z, if requested ==== */ if (initz) { _starpu_dlaset_("A", n, n, &c_b11, &c_b12, &z__[z_offset], ldz) ; } /* ==== Quick return if possible ==== */ if (*ilo == *ihi) { wr[*ilo] = h__[*ilo + *ilo * h_dim1]; wi[*ilo] = 0.; return 0; } /* ==== DLAHQR/DLAQR0 crossover point ==== */ /* Writing concatenation */ i__2[0] = 1, a__1[0] = job; i__2[1] = 1, a__1[1] = compz; s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); nmin = _starpu_ilaenv_(&c__12, "DHSEQR", ch__1, n, ilo, ihi, lwork); nmin = max(11,nmin); /* ==== DLAQR0 for big matrices; DLAHQR for small ones ==== */ if (*n > nmin) { _starpu_dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info); } else { /* ==== Small matrix ==== */ _starpu_dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, info); if (*info > 0) { /* ==== A rare DLAHQR failure! DLAQR0 sometimes succeeds */ /* . when DLAHQR fails. ==== */ kbot = *info; if (*n >= 49) { /* ==== Larger matrices have enough subdiagonal scratch */ /* . space to call DLAQR0 directly. ==== */ _starpu_dlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info); } else { /* ==== Tiny matrices don't have enough subdiagonal */ /* . scratch space to benefit from DLAQR0. Hence, */ /* . tiny matrices must be copied into a larger */ /* . array before calling DLAQR0. ==== */ _starpu_dlacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49); hl[*n + 1 + *n * 49 - 50] = 0.; i__1 = 49 - *n; _starpu_dlaset_("A", &c__49, &i__1, &c_b11, &c_b11, &hl[(*n + 1) * 49 - 49], &c__49); _starpu_dlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, & wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, workl, &c__49, info); if (wantt || *info != 0) { _starpu_dlacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh); } } } } /* ==== Clear out the trash, if necessary. ==== */ if ((wantt || *info != 0) && *n > 2) { i__1 = *n - 2; i__3 = *n - 2; _starpu_dlaset_("L", &i__1, &i__3, &c_b11, &c_b11, &h__[h_dim1 + 3], ldh); } /* ==== Ensure reported workspace size is backward-compatible with */ /* . previous LAPACK versions. ==== */ /* Computing MAX */ d__1 = (doublereal) max(1,*n); work[1] = max(d__1,work[1]); } /* ==== End of DHSEQR ==== */ return 0; } /* _starpu_dhseqr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/disnan.c000066400000000000000000000026311507764646700206520ustar00rootroot00000000000000/* disnan.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" logical _starpu_disnan_(doublereal *din) { /* System generated locals */ logical ret_val; /* Local variables */ extern logical _starpu_dlaisnan_(doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DISNAN returns .TRUE. if its argument is NaN, and .FALSE. */ /* otherwise. To be replaced by the Fortran 2003 intrinsic in the */ /* future. */ /* Arguments */ /* ========= */ /* DIN (input) DOUBLE PRECISION */ /* Input to test for NaN. */ /* ===================================================================== */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ ret_val = _starpu_dlaisnan_(din, din); return ret_val; } /* _starpu_disnan_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_gbamv.c000066400000000000000000000222721507764646700213150ustar00rootroot00000000000000/* _starpu_dla_gbamv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dla_gbamv__(integer *trans, integer *m, integer *n, integer *kl, integer *ku, doublereal *alpha, doublereal *ab, integer * ldab, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *); /* Local variables */ extern integer _starpu_ilatrans_(char *); integer i__, j; logical symb_zero__; integer kd, iy, jx, kx, ky, info; doublereal temp; integer lenx, leny; doublereal safe1; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_GEAMV performs one of the matrix-vector operations */ /* y := alpha*abs(A)*abs(x) + beta*abs(y), */ /* or y := alpha*abs(A)'*abs(x) + beta*abs(y), */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* m by n matrix. */ /* This function is primarily used in calculating error bounds. */ /* To protect against underflow during evaluation, components in */ /* the resulting vector are perturbed away from zero by (N+1) */ /* times the underflow threshold. To prevent unnecessarily large */ /* errors for block-structure embedded in general matrices, */ /* "symbolically" zero components are not perturbed. A zero */ /* entry is considered "symbolic" if all multiplications involved */ /* in computing that entry have at least one zero multiplicand. */ /* Parameters */ /* ========== */ /* TRANS - INTEGER */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) */ /* BLAS_TRANS y := alpha*abs(A')*abs(x) + beta*abs(y) */ /* BLAS_CONJ_TRANS y := alpha*abs(A')*abs(x) + beta*abs(y) */ /* Unchanged on exit. */ /* M - INTEGER */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* KL - INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU - INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* ALPHA - DOUBLE PRECISION */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ) */ /* Before entry, the leading m by n part of the array A must */ /* contain the matrix of coefficients. */ /* Unchanged on exit. */ /* LDA - INTEGER */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ /* Before entry with BETA non-zero, the incremented array Y */ /* must contain the vector y. On exit, Y is overwritten by the */ /* updated vector y. */ /* INCY - INTEGER */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --x; --y; /* Function Body */ info = 0; if (! (*trans == _starpu_ilatrans_("N") || *trans == _starpu_ilatrans_("T") || *trans == _starpu_ilatrans_("C"))) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*kl < 0) { info = 4; } else if (*ku < 0) { info = 5; } else if (*ldab < *kl + *ku + 1) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { _starpu_xerbla_("DLA_GBAMV ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* Set LENX and LENY, the lengths of the vectors x and y, and set */ /* up the start points in X and Y. */ if (*trans == _starpu_ilatrans_("N")) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Set SAFE1 essentially to be the underflow threshold times the */ /* number of additions in each row. */ safe1 = _starpu_dlamch_("Safe minimum"); safe1 = (*n + 1) * safe1; /* Form y := alpha*abs(A)*abs(x) + beta*abs(y). */ /* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */ /* the inexact flag. Still doesn't help change the iteration order */ /* to per-column. */ kd = *ku + 1; iy = ky; if (*incx == 1) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { if (*beta == 0.) { symb_zero__ = TRUE_; y[iy] = 0.; } else if (y[iy] == 0.) { symb_zero__ = TRUE_; } else { symb_zero__ = FALSE_; y[iy] = *beta * (d__1 = y[iy], abs(d__1)); } if (*alpha != 0.) { /* Computing MAX */ i__2 = i__ - *ku; /* Computing MIN */ i__4 = i__ + *kl; i__3 = min(i__4,lenx); for (j = max(i__2,1); j <= i__3; ++j) { if (*trans == _starpu_ilatrans_("N")) { temp = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( d__1)); } else { temp = (d__1 = ab[j + (kd + i__ - j) * ab_dim1], abs( d__1)); } symb_zero__ = symb_zero__ && (x[j] == 0. || temp == 0.); y[iy] += *alpha * (d__1 = x[j], abs(d__1)) * temp; } } if (! symb_zero__) { y[iy] += d_sign(&safe1, &y[iy]); } iy += *incy; } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { if (*beta == 0.) { symb_zero__ = TRUE_; y[iy] = 0.; } else if (y[iy] == 0.) { symb_zero__ = TRUE_; } else { symb_zero__ = FALSE_; y[iy] = *beta * (d__1 = y[iy], abs(d__1)); } if (*alpha != 0.) { jx = kx; /* Computing MAX */ i__3 = i__ - *ku; /* Computing MIN */ i__4 = i__ + *kl; i__2 = min(i__4,lenx); for (j = max(i__3,1); j <= i__2; ++j) { if (*trans == _starpu_ilatrans_("N")) { temp = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( d__1)); } else { temp = (d__1 = ab[j + (kd + i__ - j) * ab_dim1], abs( d__1)); } symb_zero__ = symb_zero__ && (x[jx] == 0. || temp == 0.); y[iy] += *alpha * (d__1 = x[jx], abs(d__1)) * temp; jx += *incx; } } if (! symb_zero__) { y[iy] += d_sign(&safe1, &y[iy]); } iy += *incy; } } return 0; /* End of DLA_GBAMV */ } /* _starpu_dla_gbamv__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_gbrcond.c000066400000000000000000000231471507764646700216410ustar00rootroot00000000000000/* _starpu_dla_gbrcond.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dla_gbrcond__(char *trans, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen trans_len) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4; doublereal ret_val, d__1; /* Local variables */ integer i__, j, kd, ke; doublereal tmp; integer kase; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dgbtrs_(char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal ainvnm; logical notrans; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_GERCOND Estimates the Skeel condition number of op(A) * op2(C) */ /* where op2 is determined by CMODE as follows */ /* CMODE = 1 op2(C) = C */ /* CMODE = 0 op2(C) = I */ /* CMODE = -1 op2(C) = inv(C) */ /* The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ /* is computed by computing scaling factors R such that */ /* diag(R)*A*op2(C) is row equilibrated and computing the standard */ /* infinity-norm condition number. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KL+KU+1. */ /* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ /* Details of the LU factorization of the band matrix A, as */ /* computed by DGBTRF. U is stored as an upper triangular */ /* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ /* and the multipliers used during the factorization are stored */ /* in rows KL+KU+2 to 2*KL+KU+1. */ /* LDAFB (input) INTEGER */ /* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from the factorization A = P*L*U */ /* as computed by DGBTRF; row i of the matrix was interchanged */ /* with row IPIV(i). */ /* CMODE (input) INTEGER */ /* Determines op2(C) in the formula op(A) * op2(C) as follows: */ /* CMODE = 1 op2(C) = C */ /* CMODE = 0 op2(C) = I */ /* CMODE = -1 op2(C) = inv(C) */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The vector C in the formula op(A) * op2(C). */ /* INFO (output) INTEGER */ /* = 0: Successful exit. */ /* i > 0: The ith argument is invalid. */ /* WORK (input) DOUBLE PRECISION array, dimension (5*N). */ /* Workspace. */ /* IWORK (input) INTEGER array, dimension (N). */ /* Workspace. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; --ipiv; --c__; --work; --iwork; /* Function Body */ ret_val = 0.; *info = 0; notrans = _starpu_lsame_(trans, "N"); if (! notrans && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0 || *kl > *n - 1) { *info = -3; } else if (*ku < 0 || *ku > *n - 1) { *info = -4; } else if (*ldab < *kl + *ku + 1) { *info = -6; } else if (*ldafb < (*kl << 1) + *ku + 1) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLA_GBRCOND", &i__1); return ret_val; } if (*n == 0) { ret_val = 1.; return ret_val; } /* Compute the equilibration matrix R such that */ /* inv(R)*A*C has unit 1-norm. */ kd = *ku + 1; ke = *kl + 1; if (notrans) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tmp = 0.; if (*cmode == 1) { /* Computing MAX */ i__2 = i__ - *kl; /* Computing MIN */ i__4 = i__ + *ku; i__3 = min(i__4,*n); for (j = max(i__2,1); j <= i__3; ++j) { tmp += (d__1 = ab[kd + i__ - j + j * ab_dim1] * c__[j], abs(d__1)); } } else if (*cmode == 0) { /* Computing MAX */ i__3 = i__ - *kl; /* Computing MIN */ i__4 = i__ + *ku; i__2 = min(i__4,*n); for (j = max(i__3,1); j <= i__2; ++j) { tmp += (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(d__1)); } } else { /* Computing MAX */ i__2 = i__ - *kl; /* Computing MIN */ i__4 = i__ + *ku; i__3 = min(i__4,*n); for (j = max(i__2,1); j <= i__3; ++j) { tmp += (d__1 = ab[kd + i__ - j + j * ab_dim1] / c__[j], abs(d__1)); } } work[(*n << 1) + i__] = tmp; } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tmp = 0.; if (*cmode == 1) { /* Computing MAX */ i__3 = i__ - *kl; /* Computing MIN */ i__4 = i__ + *ku; i__2 = min(i__4,*n); for (j = max(i__3,1); j <= i__2; ++j) { tmp += (d__1 = ab[ke - i__ + j + i__ * ab_dim1] * c__[j], abs(d__1)); } } else if (*cmode == 0) { /* Computing MAX */ i__2 = i__ - *kl; /* Computing MIN */ i__4 = i__ + *ku; i__3 = min(i__4,*n); for (j = max(i__2,1); j <= i__3; ++j) { tmp += (d__1 = ab[ke - i__ + j + i__ * ab_dim1], abs(d__1) ); } } else { /* Computing MAX */ i__3 = i__ - *kl; /* Computing MIN */ i__4 = i__ + *ku; i__2 = min(i__4,*n); for (j = max(i__3,1); j <= i__2; ++j) { tmp += (d__1 = ab[ke - i__ + j + i__ * ab_dim1] / c__[j], abs(d__1)); } } work[(*n << 1) + i__] = tmp; } } /* Estimate the norm of inv(op(A)). */ ainvnm = 0.; kase = 0; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == 2) { /* Multiply by R. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= work[(*n << 1) + i__]; } if (notrans) { _starpu_dgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1], &work[1], n, info); } else { _starpu_dgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1], &work[1], n, info); } /* Multiply by inv(C). */ if (*cmode == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] /= c__[i__]; } } else if (*cmode == -1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= c__[i__]; } } } else { /* Multiply by inv(C'). */ if (*cmode == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] /= c__[i__]; } } else if (*cmode == -1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= c__[i__]; } } if (notrans) { _starpu_dgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1], &work[1], n, info); } else { _starpu_dgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1], &work[1], n, info); } /* Multiply by R. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= work[(*n << 1) + i__]; } } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { ret_val = 1. / ainvnm; } return ret_val; } /* _starpu_dla_gbrcond__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_gbrfsx_extended.c000066400000000000000000000564541507764646700234050ustar00rootroot00000000000000/* _starpu_dla_gbrfsx_extended.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b6 = -1.; static doublereal c_b8 = 1.; /* Subroutine */ int _starpu_dla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, y_dim1, y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3; doublereal d__1, d__2; char ch__1[1]; /* Local variables */ doublereal dxratmax, dzratmax; integer i__, j, m; extern /* Subroutine */ int _starpu_dla_gbamv__(integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical incr_prec__; doublereal prev_dz_z__, yk, final_dx_x__; extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *, doublereal *, doublereal *); doublereal final_dz_z__, prevnormdx; integer cnt; doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__; extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer * , doublereal *, doublereal *, doublereal *); doublereal ymin; extern /* Subroutine */ int _starpu_blas_dgbmv_x__(integer *, integer *, integer * , integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer y_prec_state__; extern /* Subroutine */ int blas_dgbmv2_x__(integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dgbmv_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal dxrat, dzrat; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); char trans[1]; doublereal normx, normy; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dgbtrs_(char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal normdx; extern /* Character */ VOID _starpu_chla_transtype__(char *, ftnlen, integer *); doublereal hugeval; integer x_state__, z_state__; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_GBRFSX_EXTENDED improves the computed solution to a system of */ /* linear equations by performing extra-precise iterative refinement */ /* and provides error bounds and backward error estimates for the solution. */ /* This subroutine is called by DGBRFSX to perform iterative refinement. */ /* In addition to normwise error bound, the code provides maximum */ /* componentwise error bound if possible. See comments for ERR_BNDS_NORM */ /* and ERR_BNDS_COMP for details of the error bounds. Note that this */ /* subroutine is only resonsible for setting the second fields of */ /* ERR_BNDS_NORM and ERR_BNDS_COMP. */ /* Arguments */ /* ========= */ /* PREC_TYPE (input) INTEGER */ /* Specifies the intermediate precision to be used in refinement. */ /* The value is defined by ILAPREC(P) where P is a CHARACTER and */ /* P = 'S': Single */ /* = 'D': Double */ /* = 'I': Indigenous */ /* = 'X', 'E': Extra */ /* TRANS_TYPE (input) INTEGER */ /* Specifies the transposition operation on A. */ /* The value is defined by ILATRANS(T) where T is a CHARACTER and */ /* T = 'N': No transpose */ /* = 'T': Transpose */ /* = 'C': Conjugate transpose */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0 */ /* NRHS (input) INTEGER */ /* The number of right-hand-sides, i.e., the number of columns of the */ /* matrix B. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The factors L and U from the factorization */ /* A = P*L*U as computed by DGBTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from the factorization A = P*L*U */ /* as computed by DGBTRF; row i of the matrix was interchanged */ /* with row IPIV(i). */ /* COLEQU (input) LOGICAL */ /* If .TRUE. then column equilibration was done to A before calling */ /* this routine. This is needed to compute the solution and error */ /* bounds correctly. */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If COLEQU = .FALSE., C */ /* is not accessed. If C is input, each element of C should be a power */ /* of the radix to ensure a reliable solution and error estimates. */ /* Scaling by powers of the radix does not cause rounding errors unless */ /* the result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right-hand-side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* Y (input/output) DOUBLE PRECISION array, dimension */ /* (LDY,NRHS) */ /* On entry, the solution matrix X, as computed by DGBTRS. */ /* On exit, the improved solution matrix Y. */ /* LDY (input) INTEGER */ /* The leading dimension of the array Y. LDY >= max(1,N). */ /* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */ /* On exit, BERR_OUT(j) contains the componentwise relative backward */ /* error for right-hand-side j from the formula */ /* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. This is computed by DLA_LIN_BERR. */ /* N_NORMS (input) INTEGER */ /* Determines which error bounds to return (see ERR_BNDS_NORM */ /* and ERR_BNDS_COMP). */ /* If N_NORMS >= 1 return normwise error bounds. */ /* If N_NORMS >= 2 return componentwise error bounds. */ /* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* normwise relative error, which is defined as follows: */ /* Normwise relative error in the ith solution vector: */ /* max_j (abs(XTRUE(j,i) - X(j,i))) */ /* ------------------------------ */ /* max_j abs(X(j,i)) */ /* The array is indexed by the type of error information as described */ /* below. There currently are up to three pieces of information */ /* returned. */ /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_NORM(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated normwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*A, where S scales each row by a power of the */ /* radix so all absolute row sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* componentwise relative error, which is defined as follows: */ /* Componentwise relative error in the ith solution vector: */ /* abs(XTRUE(j,i) - X(j,i)) */ /* max_j ---------------------- */ /* abs(X(j,i)) */ /* The array is indexed by the right-hand side i (on which the */ /* componentwise relative error depends), and the type of error */ /* information as described below. There currently are up to three */ /* pieces of information returned for each right-hand side. If */ /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ /* the first (:,N_ERR_BNDS) entries are returned. */ /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_COMP(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated componentwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*(A*diag(x)), where x is the solution for the */ /* current right-hand side and S scales each row of */ /* A*diag(x) by a power of the radix so all absolute row */ /* sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* RES (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate residual. */ /* AYB (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace. This can be the same workspace passed for Y_TAIL. */ /* DY (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate solution. */ /* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the trailing bits of the intermediate solution. */ /* RCOND (input) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* ITHRESH (input) INTEGER */ /* The maximum number of residual computations allowed for */ /* refinement. The default is 10. For 'aggressive' set to 100 to */ /* permit convergence using approximate factorizations or */ /* factorizations other than LU. If the factorization uses a */ /* technique other than Gaussian elimination, the guarantees in */ /* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ /* RTHRESH (input) DOUBLE PRECISION */ /* Determines when to stop refinement if the error estimate stops */ /* decreasing. Refinement will stop when the next solution no longer */ /* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ /* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ /* default value is 0.5. For 'aggressive' set to 0.9 to permit */ /* convergence on extremely ill-conditioned matrices. See LAWN 165 */ /* for more details. */ /* DZ_UB (input) DOUBLE PRECISION */ /* Determines when to start considering componentwise convergence. */ /* Componentwise convergence is only considered after each component */ /* of the solution Y is stable, which we definte as the relative */ /* change in each component being less than DZ_UB. The default value */ /* is 0.25, requiring the first bit to be stable. See LAWN 165 for */ /* more details. */ /* IGNORE_CWISE (input) LOGICAL */ /* If .TRUE. then ignore componentwise convergence. Default value */ /* is .FALSE.. */ /* INFO (output) INTEGER */ /* = 0: Successful exit. */ /* < 0: if INFO = -i, the ith argument to DGBTRS had an illegal */ /* value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; --ipiv; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1; y -= y_offset; --berr_out__; --res; --ayb; --dy; --y_tail__; /* Function Body */ if (*info != 0) { return 0; } _starpu_chla_transtype__(ch__1, (ftnlen)1, trans_type__); *(unsigned char *)trans = *(unsigned char *)&ch__1[0]; eps = _starpu_dlamch_("Epsilon"); hugeval = _starpu_dlamch_("Overflow"); /* Force HUGEVAL to Inf */ hugeval *= hugeval; /* Using HUGEVAL may lead to spurious underflows. */ incr_thresh__ = (doublereal) (*n) * eps; m = *kl + *ku + 1; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { y_prec_state__ = 1; if (y_prec_state__ == 2) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { y_tail__[i__] = 0.; } } dxrat = 0.; dxratmax = 0.; dzrat = 0.; dzratmax = 0.; final_dx_x__ = hugeval; final_dz_z__ = hugeval; prevnormdx = hugeval; prev_dz_z__ = hugeval; dz_z__ = hugeval; dx_x__ = hugeval; x_state__ = 1; z_state__ = 0; incr_prec__ = FALSE_; i__2 = *ithresh; for (cnt = 1; cnt <= i__2; ++cnt) { /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); if (y_prec_state__ == 0) { _starpu_dgbmv_(trans, &m, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[ j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1); } else if (y_prec_state__ == 1) { _starpu_blas_dgbmv_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[ ab_offset], ldab, &y[j * y_dim1 + 1], &c__1, &c_b8, & res[1], &c__1, prec_type__); } else { blas_dgbmv2_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[ ab_offset], ldab, &y[j * y_dim1 + 1], &y_tail__[1], & c__1, &c_b8, &res[1], &c__1, prec_type__); } /* XXX: RES is no longer needed. */ _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1); _starpu_dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1] , &dy[1], n, info); /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ normx = 0.; normy = 0.; normdx = 0.; dz_z__ = 0.; ymin = hugeval; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { yk = (d__1 = y[i__ + j * y_dim1], abs(d__1)); dyk = (d__1 = dy[i__], abs(d__1)); if (yk != 0.) { /* Computing MAX */ d__1 = dz_z__, d__2 = dyk / yk; dz_z__ = max(d__1,d__2); } else if (dyk != 0.) { dz_z__ = hugeval; } ymin = min(ymin,yk); normy = max(normy,yk); if (*colequ) { /* Computing MAX */ d__1 = normx, d__2 = yk * c__[i__]; normx = max(d__1,d__2); /* Computing MAX */ d__1 = normdx, d__2 = dyk * c__[i__]; normdx = max(d__1,d__2); } else { normx = normy; normdx = max(normdx,dyk); } } if (normx != 0.) { dx_x__ = normdx / normx; } else if (normdx == 0.) { dx_x__ = 0.; } else { dx_x__ = hugeval; } dxrat = normdx / prevnormdx; dzrat = dz_z__ / prev_dz_z__; /* Check termination criteria. */ if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2) { incr_prec__ = TRUE_; } if (x_state__ == 3 && dxrat <= *rthresh) { x_state__ = 1; } if (x_state__ == 1) { if (dx_x__ <= eps) { x_state__ = 2; } else if (dxrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { x_state__ = 3; } } else { if (dxrat > dxratmax) { dxratmax = dxrat; } } if (x_state__ > 1) { final_dx_x__ = dx_x__; } } if (z_state__ == 0 && dz_z__ <= *dz_ub__) { z_state__ = 1; } if (z_state__ == 3 && dzrat <= *rthresh) { z_state__ = 1; } if (z_state__ == 1) { if (dz_z__ <= eps) { z_state__ = 2; } else if (dz_z__ > *dz_ub__) { z_state__ = 0; dzratmax = 0.; final_dz_z__ = hugeval; } else if (dzrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { z_state__ = 3; } } else { if (dzrat > dzratmax) { dzratmax = dzrat; } } if (z_state__ > 1) { final_dz_z__ = dz_z__; } } /* Exit if both normwise and componentwise stopped working, */ /* but if componentwise is unstable, let it go at least two */ /* iterations. */ if (x_state__ != 1) { if (*ignore_cwise__) { goto L666; } if (z_state__ == 3 || z_state__ == 2) { goto L666; } if (z_state__ == 0 && cnt > 1) { goto L666; } } if (incr_prec__) { incr_prec__ = FALSE_; ++y_prec_state__; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { y_tail__[i__] = 0.; } } prevnormdx = normdx; prev_dz_z__ = dz_z__; /* Update soluton. */ if (y_prec_state__ < 2) { _starpu_daxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); } else { _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); } } /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */ L666: /* Set final_* when cnt hits ithresh. */ if (x_state__ == 1) { final_dx_x__ = dx_x__; } if (z_state__ == 1) { final_dz_z__ = dz_z__; } /* Compute error bounds. */ if (*n_norms__ >= 1) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / ( 1 - dxratmax); } if (*n_norms__ >= 2) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / ( 1 - dzratmax); } /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. */ /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); _starpu_dgbmv_(trans, n, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); } /* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ _starpu_dla_gbamv__(trans_type__, n, n, kl, ku, &c_b8, &ab[ab_offset], ldab, & y[j * y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1); _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); /* End of loop for each RHS */ } return 0; } /* _starpu_dla_gbrfsx_extended__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_gbrpvgrw.c000066400000000000000000000105161507764646700220570ustar00rootroot00000000000000/* _starpu_dla_gbrpvgrw.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dla_gbrpvgrw__(integer *n, integer *kl, integer *ku, integer * ncols, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4; doublereal ret_val, d__1, d__2; /* Local variables */ integer i__, j, kd; doublereal amax, umax, rpvgrw; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_GBRPVGRW computes the reciprocal pivot growth factor */ /* norm(A)/norm(U). The "max absolute element" norm is used. If this is */ /* much less than 1, the stability of the LU factorization of the */ /* (equilibrated) matrix A could be poor. This also means that the */ /* solution X, estimated condition numbers, and error bounds could be */ /* unreliable. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* NCOLS (input) INTEGER */ /* The number of columns of the matrix A. NCOLS >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KL+KU+1. */ /* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ /* Details of the LU factorization of the band matrix A, as */ /* computed by DGBTRF. U is stored as an upper triangular */ /* band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ /* and the multipliers used during the factorization are stored */ /* in rows KL+KU+2 to 2*KL+KU+1. */ /* LDAFB (input) INTEGER */ /* The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; /* Function Body */ rpvgrw = 1.; kd = *ku + 1; i__1 = *ncols; for (j = 1; j <= i__1; ++j) { amax = 0.; umax = 0.; /* Computing MAX */ i__2 = j - *ku; /* Computing MIN */ i__4 = j + *kl; i__3 = min(i__4,*n); for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(d__1)); amax = max(d__2,amax); } /* Computing MAX */ i__3 = j - *ku; i__2 = j; for (i__ = max(i__3,1); i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = (d__1 = afb[kd + i__ - j + j * afb_dim1], abs(d__1)); umax = max(d__2,umax); } if (umax != 0.) { /* Computing MIN */ d__1 = amax / umax; rpvgrw = min(d__1,rpvgrw); } } ret_val = rpvgrw; return ret_val; } /* _starpu_dla_gbrpvgrw__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_geamv.c000066400000000000000000000211331507764646700213130ustar00rootroot00000000000000/* _starpu_dla_geamv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dla_geamv__(integer *trans, integer *m, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *); /* Local variables */ extern integer _starpu_ilatrans_(char *); integer i__, j; logical symb_zero__; integer iy, jx, kx, ky, info; doublereal temp; integer lenx, leny; doublereal safe1; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_GEAMV performs one of the matrix-vector operations */ /* y := alpha*abs(A)*abs(x) + beta*abs(y), */ /* or y := alpha*abs(A)'*abs(x) + beta*abs(y), */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* m by n matrix. */ /* This function is primarily used in calculating error bounds. */ /* To protect against underflow during evaluation, components in */ /* the resulting vector are perturbed away from zero by (N+1) */ /* times the underflow threshold. To prevent unnecessarily large */ /* errors for block-structure embedded in general matrices, */ /* "symbolically" zero components are not perturbed. A zero */ /* entry is considered "symbolic" if all multiplications involved */ /* in computing that entry have at least one zero multiplicand. */ /* Parameters */ /* ========== */ /* TRANS - INTEGER */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) */ /* BLAS_TRANS y := alpha*abs(A')*abs(x) + beta*abs(y) */ /* BLAS_CONJ_TRANS y := alpha*abs(A')*abs(x) + beta*abs(y) */ /* Unchanged on exit. */ /* M - INTEGER */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ) */ /* Before entry, the leading m by n part of the array A must */ /* contain the matrix of coefficients. */ /* Unchanged on exit. */ /* LDA - INTEGER */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION */ /* Array of DIMENSION at least */ /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ /* Before entry with BETA non-zero, the incremented array Y */ /* must contain the vector y. On exit, Y is overwritten by the */ /* updated vector y. */ /* INCY - INTEGER */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! (*trans == _starpu_ilatrans_("N") || *trans == _starpu_ilatrans_("T") || *trans == _starpu_ilatrans_("C"))) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*lda < max(1,*m)) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { _starpu_xerbla_("DLA_GEAMV ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* Set LENX and LENY, the lengths of the vectors x and y, and set */ /* up the start points in X and Y. */ if (*trans == _starpu_ilatrans_("N")) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Set SAFE1 essentially to be the underflow threshold times the */ /* number of additions in each row. */ safe1 = _starpu_dlamch_("Safe minimum"); safe1 = (*n + 1) * safe1; /* Form y := alpha*abs(A)*abs(x) + beta*abs(y). */ /* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */ /* the inexact flag. Still doesn't help change the iteration order */ /* to per-column. */ iy = ky; if (*incx == 1) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { if (*beta == 0.) { symb_zero__ = TRUE_; y[iy] = 0.; } else if (y[iy] == 0.) { symb_zero__ = TRUE_; } else { symb_zero__ = FALSE_; y[iy] = *beta * (d__1 = y[iy], abs(d__1)); } if (*alpha != 0.) { i__2 = lenx; for (j = 1; j <= i__2; ++j) { if (*trans == _starpu_ilatrans_("N")) { temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); } else { temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); } symb_zero__ = symb_zero__ && (x[j] == 0. || temp == 0.); y[iy] += *alpha * (d__1 = x[j], abs(d__1)) * temp; } } if (! symb_zero__) { y[iy] += d_sign(&safe1, &y[iy]); } iy += *incy; } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { if (*beta == 0.) { symb_zero__ = TRUE_; y[iy] = 0.; } else if (y[iy] == 0.) { symb_zero__ = TRUE_; } else { symb_zero__ = FALSE_; y[iy] = *beta * (d__1 = y[iy], abs(d__1)); } if (*alpha != 0.) { jx = kx; i__2 = lenx; for (j = 1; j <= i__2; ++j) { if (*trans == _starpu_ilatrans_("N")) { temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); } else { temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); } symb_zero__ = symb_zero__ && (x[jx] == 0. || temp == 0.); y[iy] += *alpha * (d__1 = x[jx], abs(d__1)) * temp; jx += *incx; } } if (! symb_zero__) { y[iy] += d_sign(&safe1, &y[iy]); } iy += *incy; } } return 0; /* End of DLA_GEAMV */ } /* _starpu_dla_geamv__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_gercond.c000066400000000000000000000200461507764646700216370ustar00rootroot00000000000000/* _starpu_dla_gercond.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dla_gercond__(char *trans, integer *n, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen trans_len) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; doublereal ret_val, d__1; /* Local variables */ integer i__, j; doublereal tmp; integer kase; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); doublereal ainvnm; extern /* Subroutine */ int _starpu_dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); logical notrans; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_GERCOND estimates the Skeel condition number of op(A) * op2(C) */ /* where op2 is determined by CMODE as follows */ /* CMODE = 1 op2(C) = C */ /* CMODE = 0 op2(C) = I */ /* CMODE = -1 op2(C) = inv(C) */ /* The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ /* is computed by computing scaling factors R such that */ /* diag(R)*A*op2(C) is row equilibrated and computing the standard */ /* infinity-norm condition number. */ /* Arguments */ /* ========== */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The factors L and U from the factorization */ /* A = P*L*U as computed by DGETRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from the factorization A = P*L*U */ /* as computed by DGETRF; row i of the matrix was interchanged */ /* with row IPIV(i). */ /* CMODE (input) INTEGER */ /* Determines op2(C) in the formula op(A) * op2(C) as follows: */ /* CMODE = 1 op2(C) = C */ /* CMODE = 0 op2(C) = I */ /* CMODE = -1 op2(C) = inv(C) */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The vector C in the formula op(A) * op2(C). */ /* INFO (output) INTEGER */ /* = 0: Successful exit. */ /* i > 0: The ith argument is invalid. */ /* WORK (input) DOUBLE PRECISION array, dimension (3*N). */ /* Workspace. */ /* IWORK (input) INTEGER array, dimension (N). */ /* Workspace. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --c__; --work; --iwork; /* Function Body */ ret_val = 0.; *info = 0; notrans = _starpu_lsame_(trans, "N"); if (! notrans && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*ldaf < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLA_GERCOND", &i__1); return ret_val; } if (*n == 0) { ret_val = 1.; return ret_val; } /* Compute the equilibration matrix R such that */ /* inv(R)*A*C has unit 1-norm. */ if (notrans) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tmp = 0.; if (*cmode == 1) { i__2 = *n; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); } } else if (*cmode == 0) { i__2 = *n; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); } } else { i__2 = *n; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); } } work[(*n << 1) + i__] = tmp; } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tmp = 0.; if (*cmode == 1) { i__2 = *n; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); } } else if (*cmode == 0) { i__2 = *n; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); } } else { i__2 = *n; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); } } work[(*n << 1) + i__] = tmp; } } /* Estimate the norm of inv(op(A)). */ ainvnm = 0.; kase = 0; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == 2) { /* Multiply by R. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= work[(*n << 1) + i__]; } if (notrans) { _starpu_dgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[ 1], &work[1], n, info); } else { _starpu_dgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], n, info); } /* Multiply by inv(C). */ if (*cmode == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] /= c__[i__]; } } else if (*cmode == -1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= c__[i__]; } } } else { /* Multiply by inv(C'). */ if (*cmode == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] /= c__[i__]; } } else if (*cmode == -1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= c__[i__]; } } if (notrans) { _starpu_dgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], n, info); } else { _starpu_dgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[ 1], &work[1], n, info); } /* Multiply by R. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= work[(*n << 1) + i__]; } } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { ret_val = 1. / ainvnm; } return ret_val; } /* _starpu_dla_gercond__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_gerfsx_extended.c000066400000000000000000000553001507764646700233750ustar00rootroot00000000000000/* _starpu_dla_gerfsx_extended.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b6 = -1.; static doublereal c_b8 = 1.; /* Subroutine */ int _starpu_dla_gerfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer * ldy, doublereal *berr_out__, integer *n_norms__, doublereal *errs_n__, doublereal *errs_c__, doublereal *res, doublereal *ayb, doublereal * dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, y_offset, errs_n_dim1, errs_n_offset, errs_c_dim1, errs_c_offset, i__1, i__2, i__3; doublereal d__1, d__2; char ch__1[1]; /* Local variables */ doublereal dxratmax, dzratmax; integer i__, j; extern /* Subroutine */ int _starpu_dla_geamv__(integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical incr_prec__; doublereal prev_dz_z__, yk, final_dx_x__; extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *, doublereal *, doublereal *); doublereal final_dz_z__, prevnormdx; integer cnt; doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__; extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer * , doublereal *, doublereal *, doublereal *); doublereal ymin; extern /* Subroutine */ int _starpu_blas_dgemv_x__(integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer y_prec_state__; extern /* Subroutine */ int blas_dgemv2_x__(integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal dxrat, dzrat; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); char trans[1]; doublereal normx, normy; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal normdx; extern /* Character */ VOID _starpu_chla_transtype__(char *, ftnlen, integer *); doublereal hugeval; integer x_state__, z_state__; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_GERFSX_EXTENDED improves the computed solution to a system of */ /* linear equations by performing extra-precise iterative refinement */ /* and provides error bounds and backward error estimates for the solution. */ /* This subroutine is called by DGERFSX to perform iterative refinement. */ /* In addition to normwise error bound, the code provides maximum */ /* componentwise error bound if possible. See comments for ERR_BNDS_NORM */ /* and ERR_BNDS_COMP for details of the error bounds. Note that this */ /* subroutine is only resonsible for setting the second fields of */ /* ERR_BNDS_NORM and ERR_BNDS_COMP. */ /* Arguments */ /* ========= */ /* PREC_TYPE (input) INTEGER */ /* Specifies the intermediate precision to be used in refinement. */ /* The value is defined by ILAPREC(P) where P is a CHARACTER and */ /* P = 'S': Single */ /* = 'D': Double */ /* = 'I': Indigenous */ /* = 'X', 'E': Extra */ /* TRANS_TYPE (input) INTEGER */ /* Specifies the transposition operation on A. */ /* The value is defined by ILATRANS(T) where T is a CHARACTER and */ /* T = 'N': No transpose */ /* = 'T': Transpose */ /* = 'C': Conjugate transpose */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right-hand-sides, i.e., the number of columns of the */ /* matrix B. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The factors L and U from the factorization */ /* A = P*L*U as computed by DGETRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from the factorization A = P*L*U */ /* as computed by DGETRF; row i of the matrix was interchanged */ /* with row IPIV(i). */ /* COLEQU (input) LOGICAL */ /* If .TRUE. then column equilibration was done to A before calling */ /* this routine. This is needed to compute the solution and error */ /* bounds correctly. */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If COLEQU = .FALSE., C */ /* is not accessed. If C is input, each element of C should be a power */ /* of the radix to ensure a reliable solution and error estimates. */ /* Scaling by powers of the radix does not cause rounding errors unless */ /* the result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right-hand-side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* Y (input/output) DOUBLE PRECISION array, dimension */ /* (LDY,NRHS) */ /* On entry, the solution matrix X, as computed by DGETRS. */ /* On exit, the improved solution matrix Y. */ /* LDY (input) INTEGER */ /* The leading dimension of the array Y. LDY >= max(1,N). */ /* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */ /* On exit, BERR_OUT(j) contains the componentwise relative backward */ /* error for right-hand-side j from the formula */ /* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. This is computed by DLA_LIN_BERR. */ /* N_NORMS (input) INTEGER */ /* Determines which error bounds to return (see ERR_BNDS_NORM */ /* and ERR_BNDS_COMP). */ /* If N_NORMS >= 1 return normwise error bounds. */ /* If N_NORMS >= 2 return componentwise error bounds. */ /* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* normwise relative error, which is defined as follows: */ /* Normwise relative error in the ith solution vector: */ /* max_j (abs(XTRUE(j,i) - X(j,i))) */ /* ------------------------------ */ /* max_j abs(X(j,i)) */ /* The array is indexed by the type of error information as described */ /* below. There currently are up to three pieces of information */ /* returned. */ /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_NORM(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated normwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*A, where S scales each row by a power of the */ /* radix so all absolute row sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* componentwise relative error, which is defined as follows: */ /* Componentwise relative error in the ith solution vector: */ /* abs(XTRUE(j,i) - X(j,i)) */ /* max_j ---------------------- */ /* abs(X(j,i)) */ /* The array is indexed by the right-hand side i (on which the */ /* componentwise relative error depends), and the type of error */ /* information as described below. There currently are up to three */ /* pieces of information returned for each right-hand side. If */ /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ /* the first (:,N_ERR_BNDS) entries are returned. */ /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_COMP(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated componentwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*(A*diag(x)), where x is the solution for the */ /* current right-hand side and S scales each row of */ /* A*diag(x) by a power of the radix so all absolute row */ /* sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* RES (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate residual. */ /* AYB (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace. This can be the same workspace passed for Y_TAIL. */ /* DY (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate solution. */ /* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the trailing bits of the intermediate solution. */ /* RCOND (input) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* ITHRESH (input) INTEGER */ /* The maximum number of residual computations allowed for */ /* refinement. The default is 10. For 'aggressive' set to 100 to */ /* permit convergence using approximate factorizations or */ /* factorizations other than LU. If the factorization uses a */ /* technique other than Gaussian elimination, the guarantees in */ /* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ /* RTHRESH (input) DOUBLE PRECISION */ /* Determines when to stop refinement if the error estimate stops */ /* decreasing. Refinement will stop when the next solution no longer */ /* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ /* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ /* default value is 0.5. For 'aggressive' set to 0.9 to permit */ /* convergence on extremely ill-conditioned matrices. See LAWN 165 */ /* for more details. */ /* DZ_UB (input) DOUBLE PRECISION */ /* Determines when to start considering componentwise convergence. */ /* Componentwise convergence is only considered after each component */ /* of the solution Y is stable, which we definte as the relative */ /* change in each component being less than DZ_UB. The default value */ /* is 0.25, requiring the first bit to be stable. See LAWN 165 for */ /* more details. */ /* IGNORE_CWISE (input) LOGICAL */ /* If .TRUE. then ignore componentwise convergence. Default value */ /* is .FALSE.. */ /* INFO (output) INTEGER */ /* = 0: Successful exit. */ /* < 0: if INFO = -i, the ith argument to DGETRS had an illegal */ /* value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ errs_c_dim1 = *nrhs; errs_c_offset = 1 + errs_c_dim1; errs_c__ -= errs_c_offset; errs_n_dim1 = *nrhs; errs_n_offset = 1 + errs_n_dim1; errs_n__ -= errs_n_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1; y -= y_offset; --berr_out__; --res; --ayb; --dy; --y_tail__; /* Function Body */ if (*info != 0) { return 0; } _starpu_chla_transtype__(ch__1, (ftnlen)1, trans_type__); *(unsigned char *)trans = *(unsigned char *)&ch__1[0]; eps = _starpu_dlamch_("Epsilon"); hugeval = _starpu_dlamch_("Overflow"); /* Force HUGEVAL to Inf */ hugeval *= hugeval; /* Using HUGEVAL may lead to spurious underflows. */ incr_thresh__ = (doublereal) (*n) * eps; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { y_prec_state__ = 1; if (y_prec_state__ == 2) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { y_tail__[i__] = 0.; } } dxrat = 0.; dxratmax = 0.; dzrat = 0.; dzratmax = 0.; final_dx_x__ = hugeval; final_dz_z__ = hugeval; prevnormdx = hugeval; prev_dz_z__ = hugeval; dz_z__ = hugeval; dx_x__ = hugeval; x_state__ = 1; z_state__ = 0; incr_prec__ = FALSE_; i__2 = *ithresh; for (cnt = 1; cnt <= i__2; ++cnt) { /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); if (y_prec_state__ == 0) { _starpu_dgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1); } else if (y_prec_state__ == 1) { _starpu_blas_dgemv_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, & y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1, prec_type__); } else { blas_dgemv2_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b8, &res[ 1], &c__1, prec_type__); } /* XXX: RES is no longer needed. */ _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1); _starpu_dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &dy[1], n, info); /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ normx = 0.; normy = 0.; normdx = 0.; dz_z__ = 0.; ymin = hugeval; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { yk = (d__1 = y[i__ + j * y_dim1], abs(d__1)); dyk = (d__1 = dy[i__], abs(d__1)); if (yk != 0.) { /* Computing MAX */ d__1 = dz_z__, d__2 = dyk / yk; dz_z__ = max(d__1,d__2); } else if (dyk != 0.) { dz_z__ = hugeval; } ymin = min(ymin,yk); normy = max(normy,yk); if (*colequ) { /* Computing MAX */ d__1 = normx, d__2 = yk * c__[i__]; normx = max(d__1,d__2); /* Computing MAX */ d__1 = normdx, d__2 = dyk * c__[i__]; normdx = max(d__1,d__2); } else { normx = normy; normdx = max(normdx,dyk); } } if (normx != 0.) { dx_x__ = normdx / normx; } else if (normdx == 0.) { dx_x__ = 0.; } else { dx_x__ = hugeval; } dxrat = normdx / prevnormdx; dzrat = dz_z__ / prev_dz_z__; /* Check termination criteria */ if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2) { incr_prec__ = TRUE_; } if (x_state__ == 3 && dxrat <= *rthresh) { x_state__ = 1; } if (x_state__ == 1) { if (dx_x__ <= eps) { x_state__ = 2; } else if (dxrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { x_state__ = 3; } } else { if (dxrat > dxratmax) { dxratmax = dxrat; } } if (x_state__ > 1) { final_dx_x__ = dx_x__; } } if (z_state__ == 0 && dz_z__ <= *dz_ub__) { z_state__ = 1; } if (z_state__ == 3 && dzrat <= *rthresh) { z_state__ = 1; } if (z_state__ == 1) { if (dz_z__ <= eps) { z_state__ = 2; } else if (dz_z__ > *dz_ub__) { z_state__ = 0; dzratmax = 0.; final_dz_z__ = hugeval; } else if (dzrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { z_state__ = 3; } } else { if (dzrat > dzratmax) { dzratmax = dzrat; } } if (z_state__ > 1) { final_dz_z__ = dz_z__; } } /* Exit if both normwise and componentwise stopped working, */ /* but if componentwise is unstable, let it go at least two */ /* iterations. */ if (x_state__ != 1) { if (*ignore_cwise__) { goto L666; } if (z_state__ == 3 || z_state__ == 2) { goto L666; } if (z_state__ == 0 && cnt > 1) { goto L666; } } if (incr_prec__) { incr_prec__ = FALSE_; ++y_prec_state__; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { y_tail__[i__] = 0.; } } prevnormdx = normdx; prev_dz_z__ = dz_z__; /* Update soluton. */ if (y_prec_state__ < 2) { _starpu_daxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); } else { _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); } } /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */ L666: /* Set final_* when cnt hits ithresh. */ if (x_state__ == 1) { final_dx_x__ = dx_x__; } if (z_state__ == 1) { final_dz_z__ = dz_z__; } /* Compute error bounds */ if (*n_norms__ >= 1) { errs_n__[j + (errs_n_dim1 << 1)] = final_dx_x__ / (1 - dxratmax); } if (*n_norms__ >= 2) { errs_c__[j + (errs_c_dim1 << 1)] = final_dz_z__ / (1 - dzratmax); } /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. */ /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); _starpu_dgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 1], & c__1, &c_b8, &res[1], &c__1); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); } /* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ _starpu_dla_geamv__(trans_type__, n, n, &c_b8, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1); _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); /* End of loop for each RHS. */ } return 0; } /* _starpu_dla_gerfsx_extended__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_lin_berr.c000066400000000000000000000100521507764646700220060ustar00rootroot00000000000000/* _starpu_dla_lin_berr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dla_lin_berr__(integer *n, integer *nz, integer *nrhs, doublereal *res, doublereal *ayb, doublereal *berr) { /* System generated locals */ integer ayb_dim1, ayb_offset, res_dim1, res_offset, i__1, i__2; doublereal d__1; /* Local variables */ integer i__, j; doublereal tmp, safe1; extern doublereal _starpu_dlamch_(char *); /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_LIN_BERR computes componentwise relative backward error from */ /* the formula */ /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. */ /* Arguments */ /* ========== */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NZ (input) INTEGER */ /* We add (NZ+1)*SLAMCH( 'Safe minimum' ) to R(i) in the numerator to */ /* guard against spuriously zero residuals. Default value is N. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices AYB, RES, and BERR. NRHS >= 0. */ /* RES (input) DOUBLE PRECISION array, dimension (N,NRHS) */ /* The residual matrix, i.e., the matrix R in the relative backward */ /* error formula above. */ /* AYB (input) DOUBLE PRECISION array, dimension (N, NRHS) */ /* The denominator in the relative backward error formula above, i.e., */ /* the matrix abs(op(A_s))*abs(Y) + abs(B_s). The matrices A, Y, and B */ /* are from iterative refinement (see _starpu_dla_gerfsx_extended.f). */ /* RES (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error from the formula above. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Adding SAFE1 to the numerator guards against spuriously zero */ /* residuals. A similar safeguard is in the SLA_yyAMV routine used */ /* to compute AYB. */ /* Parameter adjustments */ --berr; ayb_dim1 = *n; ayb_offset = 1 + ayb_dim1; ayb -= ayb_offset; res_dim1 = *n; res_offset = 1 + res_dim1; res -= res_offset; /* Function Body */ safe1 = _starpu_dlamch_("Safe minimum"); safe1 = (*nz + 1) * safe1; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { berr[j] = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (ayb[i__ + j * ayb_dim1] != 0.) { tmp = (safe1 + (d__1 = res[i__ + j * res_dim1], abs(d__1))) / ayb[i__ + j * ayb_dim1]; /* Computing MAX */ d__1 = berr[j]; berr[j] = max(d__1,tmp); } /* If AYB is exactly 0.0 (and if computed by SLA_yyAMV), then we know */ /* the true residual also must be exactly 0.0. */ } } return 0; } /* _starpu_dla_lin_berr__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_porcond.c000066400000000000000000000201671507764646700216660ustar00rootroot00000000000000/* _starpu_dla_porcond.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dla_porcond__(char *uplo, integer *n, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen uplo_len) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; doublereal ret_val, d__1; /* Local variables */ integer i__, j; logical up; doublereal tmp; integer kase; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); doublereal ainvnm; extern /* Subroutine */ int _starpu_dpotrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_PORCOND Estimates the Skeel condition number of op(A) * op2(C) */ /* where op2 is determined by CMODE as follows */ /* CMODE = 1 op2(C) = C */ /* CMODE = 0 op2(C) = I */ /* CMODE = -1 op2(C) = inv(C) */ /* The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ /* is computed by computing scaling factors R such that */ /* diag(R)*A*op2(C) is row equilibrated and computing the standard */ /* infinity-norm condition number. */ /* Arguments */ /* ========== */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* A (input) REAL array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* CMODE (input) INTEGER */ /* Determines op2(C) in the formula op(A) * op2(C) as follows: */ /* CMODE = 1 op2(C) = C */ /* CMODE = 0 op2(C) = I */ /* CMODE = -1 op2(C) = inv(C) */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The vector C in the formula op(A) * op2(C). */ /* INFO (output) INTEGER */ /* = 0: Successful exit. */ /* i > 0: The ith argument is invalid. */ /* WORK (input) DOUBLE PRECISION array, dimension (3*N). */ /* Workspace. */ /* IWORK (input) INTEGER array, dimension (N). */ /* Workspace. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --c__; --work; --iwork; /* Function Body */ ret_val = 0.; *info = 0; if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLA_PORCOND", &i__1); return ret_val; } if (*n == 0) { ret_val = 1.; return ret_val; } up = FALSE_; if (_starpu_lsame_(uplo, "U")) { up = TRUE_; } /* Compute the equilibration matrix R such that */ /* inv(R)*A*C has unit 1-norm. */ if (up) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tmp = 0.; if (*cmode == 1) { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); } } else if (*cmode == 0) { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); } } else { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); } } work[(*n << 1) + i__] = tmp; } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tmp = 0.; if (*cmode == 1) { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); } } else if (*cmode == 0) { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); } } else { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); } } work[(*n << 1) + i__] = tmp; } } /* Estimate the norm of inv(op(A)). */ ainvnm = 0.; kase = 0; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == 2) { /* Multiply by R. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= work[(*n << 1) + i__]; } if (up) { _starpu_dpotrs_("Upper", n, &c__1, &af[af_offset], ldaf, &work[1], n, info); } else { _starpu_dpotrs_("Lower", n, &c__1, &af[af_offset], ldaf, &work[1], n, info); } /* Multiply by inv(C). */ if (*cmode == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] /= c__[i__]; } } else if (*cmode == -1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= c__[i__]; } } } else { /* Multiply by inv(C'). */ if (*cmode == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] /= c__[i__]; } } else if (*cmode == -1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= c__[i__]; } } if (up) { _starpu_dpotrs_("Upper", n, &c__1, &af[af_offset], ldaf, &work[1], n, info); } else { _starpu_dpotrs_("Lower", n, &c__1, &af[af_offset], ldaf, &work[1], n, info); } /* Multiply by R. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= work[(*n << 1) + i__]; } } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { ret_val = 1. / ainvnm; } return ret_val; } /* _starpu_dla_porcond__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_porfsx_extended.c000066400000000000000000000541631507764646700234260ustar00rootroot00000000000000/* _starpu_dla_porfsx_extended.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b9 = -1.; static doublereal c_b11 = 1.; /* Subroutine */ int _starpu_dla_porfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * af, integer *ldaf, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info, ftnlen uplo_len) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Local variables */ doublereal dxratmax, dzratmax; integer i__, j; logical incr_prec__; extern /* Subroutine */ int _starpu_dla_syamv__(integer *, integer *, doublereal * , doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal prev_dz_z__, yk, final_dx_x__; extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *, doublereal *, doublereal *); doublereal final_dz_z__, prevnormdx; integer cnt; doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__; extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer * , doublereal *, doublereal *, doublereal *); doublereal ymin; integer y_prec_state__; extern /* Subroutine */ int _starpu_blas_dsymv_x__(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer uplo2; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int blas_dsymv2_x__(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer * ); doublereal dxrat, dzrat; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal normx, normy; extern doublereal _starpu_dlamch_(char *); doublereal normdx; extern /* Subroutine */ int _starpu_dpotrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal hugeval; extern integer _starpu_ilauplo_(char *); integer x_state__, z_state__; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_PORFSX_EXTENDED improves the computed solution to a system of */ /* linear equations by performing extra-precise iterative refinement */ /* and provides error bounds and backward error estimates for the solution. */ /* This subroutine is called by DPORFSX to perform iterative refinement. */ /* In addition to normwise error bound, the code provides maximum */ /* componentwise error bound if possible. See comments for ERR_BNDS_NORM */ /* and ERR_BNDS_COMP for details of the error bounds. Note that this */ /* subroutine is only resonsible for setting the second fields of */ /* ERR_BNDS_NORM and ERR_BNDS_COMP. */ /* Arguments */ /* ========= */ /* PREC_TYPE (input) INTEGER */ /* Specifies the intermediate precision to be used in refinement. */ /* The value is defined by ILAPREC(P) where P is a CHARACTER and */ /* P = 'S': Single */ /* = 'D': Double */ /* = 'I': Indigenous */ /* = 'X', 'E': Extra */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right-hand-sides, i.e., the number of columns of the */ /* matrix B. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* COLEQU (input) LOGICAL */ /* If .TRUE. then column equilibration was done to A before calling */ /* this routine. This is needed to compute the solution and error */ /* bounds correctly. */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If COLEQU = .FALSE., C */ /* is not accessed. If C is input, each element of C should be a power */ /* of the radix to ensure a reliable solution and error estimates. */ /* Scaling by powers of the radix does not cause rounding errors unless */ /* the result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right-hand-side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* Y (input/output) DOUBLE PRECISION array, dimension */ /* (LDY,NRHS) */ /* On entry, the solution matrix X, as computed by DPOTRS. */ /* On exit, the improved solution matrix Y. */ /* LDY (input) INTEGER */ /* The leading dimension of the array Y. LDY >= max(1,N). */ /* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */ /* On exit, BERR_OUT(j) contains the componentwise relative backward */ /* error for right-hand-side j from the formula */ /* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. This is computed by DLA_LIN_BERR. */ /* N_NORMS (input) INTEGER */ /* Determines which error bounds to return (see ERR_BNDS_NORM */ /* and ERR_BNDS_COMP). */ /* If N_NORMS >= 1 return normwise error bounds. */ /* If N_NORMS >= 2 return componentwise error bounds. */ /* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* normwise relative error, which is defined as follows: */ /* Normwise relative error in the ith solution vector: */ /* max_j (abs(XTRUE(j,i) - X(j,i))) */ /* ------------------------------ */ /* max_j abs(X(j,i)) */ /* The array is indexed by the type of error information as described */ /* below. There currently are up to three pieces of information */ /* returned. */ /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_NORM(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated normwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*A, where S scales each row by a power of the */ /* radix so all absolute row sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* componentwise relative error, which is defined as follows: */ /* Componentwise relative error in the ith solution vector: */ /* abs(XTRUE(j,i) - X(j,i)) */ /* max_j ---------------------- */ /* abs(X(j,i)) */ /* The array is indexed by the right-hand side i (on which the */ /* componentwise relative error depends), and the type of error */ /* information as described below. There currently are up to three */ /* pieces of information returned for each right-hand side. If */ /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ /* the first (:,N_ERR_BNDS) entries are returned. */ /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_COMP(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated componentwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*(A*diag(x)), where x is the solution for the */ /* current right-hand side and S scales each row of */ /* A*diag(x) by a power of the radix so all absolute row */ /* sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* RES (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate residual. */ /* AYB (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace. This can be the same workspace passed for Y_TAIL. */ /* DY (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate solution. */ /* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the trailing bits of the intermediate solution. */ /* RCOND (input) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* ITHRESH (input) INTEGER */ /* The maximum number of residual computations allowed for */ /* refinement. The default is 10. For 'aggressive' set to 100 to */ /* permit convergence using approximate factorizations or */ /* factorizations other than LU. If the factorization uses a */ /* technique other than Gaussian elimination, the guarantees in */ /* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ /* RTHRESH (input) DOUBLE PRECISION */ /* Determines when to stop refinement if the error estimate stops */ /* decreasing. Refinement will stop when the next solution no longer */ /* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ /* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ /* default value is 0.5. For 'aggressive' set to 0.9 to permit */ /* convergence on extremely ill-conditioned matrices. See LAWN 165 */ /* for more details. */ /* DZ_UB (input) DOUBLE PRECISION */ /* Determines when to start considering componentwise convergence. */ /* Componentwise convergence is only considered after each component */ /* of the solution Y is stable, which we definte as the relative */ /* change in each component being less than DZ_UB. The default value */ /* is 0.25, requiring the first bit to be stable. See LAWN 165 for */ /* more details. */ /* IGNORE_CWISE (input) LOGICAL */ /* If .TRUE. then ignore componentwise convergence. Default value */ /* is .FALSE.. */ /* INFO (output) INTEGER */ /* = 0: Successful exit. */ /* < 0: if INFO = -i, the ith argument to DPOTRS had an illegal */ /* value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1; y -= y_offset; --berr_out__; --res; --ayb; --dy; --y_tail__; /* Function Body */ if (*info != 0) { return 0; } eps = _starpu_dlamch_("Epsilon"); hugeval = _starpu_dlamch_("Overflow"); /* Force HUGEVAL to Inf */ hugeval *= hugeval; /* Using HUGEVAL may lead to spurious underflows. */ incr_thresh__ = (doublereal) (*n) * eps; if (_starpu_lsame_(uplo, "L")) { uplo2 = _starpu_ilauplo_("L"); } else { uplo2 = _starpu_ilauplo_("U"); } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { y_prec_state__ = 1; if (y_prec_state__ == 2) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { y_tail__[i__] = 0.; } } dxrat = 0.; dxratmax = 0.; dzrat = 0.; dzratmax = 0.; final_dx_x__ = hugeval; final_dz_z__ = hugeval; prevnormdx = hugeval; prev_dz_z__ = hugeval; dz_z__ = hugeval; dx_x__ = hugeval; x_state__ = 1; z_state__ = 0; incr_prec__ = FALSE_; i__2 = *ithresh; for (cnt = 1; cnt <= i__2; ++cnt) { /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); if (y_prec_state__ == 0) { _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b11, &res[1], &c__1); } else if (y_prec_state__ == 1) { _starpu_blas_dsymv_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b11, &res[1], &c__1, prec_type__); } else { blas_dsymv2_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b11, &res[1], & c__1, prec_type__); } /* XXX: RES is no longer needed. */ _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1); _starpu_dpotrs_(uplo, n, nrhs, &af[af_offset], ldaf, &dy[1], n, info); /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ normx = 0.; normy = 0.; normdx = 0.; dz_z__ = 0.; ymin = hugeval; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { yk = (d__1 = y[i__ + j * y_dim1], abs(d__1)); dyk = (d__1 = dy[i__], abs(d__1)); if (yk != 0.) { /* Computing MAX */ d__1 = dz_z__, d__2 = dyk / yk; dz_z__ = max(d__1,d__2); } else if (dyk != 0.) { dz_z__ = hugeval; } ymin = min(ymin,yk); normy = max(normy,yk); if (*colequ) { /* Computing MAX */ d__1 = normx, d__2 = yk * c__[i__]; normx = max(d__1,d__2); /* Computing MAX */ d__1 = normdx, d__2 = dyk * c__[i__]; normdx = max(d__1,d__2); } else { normx = normy; normdx = max(normdx,dyk); } } if (normx != 0.) { dx_x__ = normdx / normx; } else if (normdx == 0.) { dx_x__ = 0.; } else { dx_x__ = hugeval; } dxrat = normdx / prevnormdx; dzrat = dz_z__ / prev_dz_z__; /* Check termination criteria. */ if (ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2) { incr_prec__ = TRUE_; } if (x_state__ == 3 && dxrat <= *rthresh) { x_state__ = 1; } if (x_state__ == 1) { if (dx_x__ <= eps) { x_state__ = 2; } else if (dxrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { x_state__ = 3; } } else { if (dxrat > dxratmax) { dxratmax = dxrat; } } if (x_state__ > 1) { final_dx_x__ = dx_x__; } } if (z_state__ == 0 && dz_z__ <= *dz_ub__) { z_state__ = 1; } if (z_state__ == 3 && dzrat <= *rthresh) { z_state__ = 1; } if (z_state__ == 1) { if (dz_z__ <= eps) { z_state__ = 2; } else if (dz_z__ > *dz_ub__) { z_state__ = 0; dzratmax = 0.; final_dz_z__ = hugeval; } else if (dzrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { z_state__ = 3; } } else { if (dzrat > dzratmax) { dzratmax = dzrat; } } if (z_state__ > 1) { final_dz_z__ = dz_z__; } } if (x_state__ != 1 && (*ignore_cwise__ || z_state__ != 1)) { goto L666; } if (incr_prec__) { incr_prec__ = FALSE_; ++y_prec_state__; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { y_tail__[i__] = 0.; } } prevnormdx = normdx; prev_dz_z__ = dz_z__; /* Update soluton. */ if (y_prec_state__ < 2) { _starpu_daxpy_(n, &c_b11, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); } else { _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); } } /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */ L666: /* Set final_* when cnt hits ithresh. */ if (x_state__ == 1) { final_dx_x__ = dx_x__; } if (z_state__ == 1) { final_dz_z__ = dz_z__; } /* Compute error bounds. */ if (*n_norms__ >= 1) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / ( 1 - dxratmax); } if (*n_norms__ >= 2) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / ( 1 - dzratmax); } /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. */ /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, & c_b11, &res[1], &c__1); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); } /* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ _starpu_dla_syamv__(&uplo2, n, &c_b11, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b11, &ayb[1], &c__1); _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); /* End of loop for each RHS. */ } return 0; } /* _starpu_dla_porfsx_extended__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_porpvgrw.c000066400000000000000000000132641507764646700221100ustar00rootroot00000000000000/* _starpu_dla_porpvgrw.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dla_porpvgrw__(char *uplo, integer *ncols, doublereal *a, integer * lda, doublereal *af, integer *ldaf, doublereal *work, ftnlen uplo_len) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; doublereal ret_val, d__1, d__2, d__3; /* Local variables */ integer i__, j; doublereal amax, umax; extern logical _starpu_lsame_(char *, char *); logical upper; doublereal rpvgrw; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_PORPVGRW computes the reciprocal pivot growth factor */ /* norm(A)/norm(U). The "max absolute element" norm is used. If this is */ /* much less than 1, the stability of the LU factorization of the */ /* (equilibrated) matrix A could be poor. This also means that the */ /* solution X, estimated condition numbers, and error bounds could be */ /* unreliable. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* NCOLS (input) INTEGER */ /* The number of columns of the matrix A. NCOLS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* WORK (input) DOUBLE PRECISION array, dimension (2*N) */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --work; /* Function Body */ upper = _starpu_lsame_("Upper", uplo); /* DPOTRF will have factored only the NCOLSxNCOLS leading minor, so */ /* we restrict the growth search to that minor and use only the first */ /* 2*NCOLS workspace entries. */ rpvgrw = 1.; i__1 = *ncols << 1; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; } /* Find the max magnitude entry of each column. */ if (upper) { i__1 = *ncols; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* ncols + j]; work[*ncols + j] = max(d__2,d__3); } } } else { i__1 = *ncols; for (j = 1; j <= i__1; ++j) { i__2 = *ncols; for (i__ = j; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* ncols + j]; work[*ncols + j] = max(d__2,d__3); } } } /* Now find the max magnitude entry of each column of the factor in */ /* AF. No pivoting, so no permutations. */ if (_starpu_lsame_("Upper", uplo)) { i__1 = *ncols; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = (d__1 = af[i__ + j * af_dim1], abs(d__1)), d__3 = work[ j]; work[j] = max(d__2,d__3); } } } else { i__1 = *ncols; for (j = 1; j <= i__1; ++j) { i__2 = *ncols; for (i__ = j; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = (d__1 = af[i__ + j * af_dim1], abs(d__1)), d__3 = work[ j]; work[j] = max(d__2,d__3); } } } /* Compute the *inverse* of the max element growth factor. Dividing */ /* by zero would imply the largest entry of the factor's column is */ /* zero. Than can happen when either the column of A is zero or */ /* massive pivots made the factor underflow to zero. Neither counts */ /* as growth in itself, so simply ignore terms with zero */ /* denominators. */ if (_starpu_lsame_("Upper", uplo)) { i__1 = *ncols; for (i__ = 1; i__ <= i__1; ++i__) { umax = work[i__]; amax = work[*ncols + i__]; if (umax != 0.) { /* Computing MIN */ d__1 = amax / umax; rpvgrw = min(d__1,rpvgrw); } } } else { i__1 = *ncols; for (i__ = 1; i__ <= i__1; ++i__) { umax = work[i__]; amax = work[*ncols + i__]; if (umax != 0.) { /* Computing MIN */ d__1 = amax / umax; rpvgrw = min(d__1,rpvgrw); } } } ret_val = rpvgrw; return ret_val; } /* _starpu_dla_porpvgrw__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_rpvgrw.c000066400000000000000000000067351507764646700215560ustar00rootroot00000000000000/* _starpu_dla_rpvgrw.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dla_rpvgrw__(integer *n, integer *ncols, doublereal *a, integer * lda, doublereal *af, integer *ldaf) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; doublereal ret_val, d__1, d__2; /* Local variables */ integer i__, j; doublereal amax, umax, rpvgrw; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_RPVGRW computes the reciprocal pivot growth factor */ /* norm(A)/norm(U). The "max absolute element" norm is used. If this is */ /* much less than 1, the stability of the LU factorization of the */ /* (equilibrated) matrix A could be poor. This also means that the */ /* solution X, estimated condition numbers, and error bounds could be */ /* unreliable. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NCOLS (input) INTEGER */ /* The number of columns of the matrix A. NCOLS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The factors L and U from the factorization */ /* A = P*L*U as computed by DGETRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; /* Function Body */ rpvgrw = 1.; i__1 = *ncols; for (j = 1; j <= i__1; ++j) { amax = 0.; umax = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); amax = max(d__2,amax); } i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = (d__1 = af[i__ + j * af_dim1], abs(d__1)); umax = max(d__2,umax); } if (umax != 0.) { /* Computing MIN */ d__1 = amax / umax; rpvgrw = min(d__1,rpvgrw); } } ret_val = rpvgrw; return ret_val; } /* _starpu_dla_rpvgrw__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_syamv.c000066400000000000000000000211571507764646700213610ustar00rootroot00000000000000/* _starpu_dla_syamv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dla_syamv__(integer *uplo, integer *n, doublereal *alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *); /* Local variables */ integer i__, j; logical symb_zero__; integer iy, jx, kx, ky, info; doublereal temp, safe1; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilauplo_(char *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_SYAMV performs the matrix-vector operation */ /* y := alpha*abs(A)*abs(x) + beta*abs(y), */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* n by n symmetric matrix. */ /* This function is primarily used in calculating error bounds. */ /* To protect against underflow during evaluation, components in */ /* the resulting vector are perturbed away from zero by (N+1) */ /* times the underflow threshold. To prevent unnecessarily large */ /* errors for block-structure embedded in general matrices, */ /* "symbolically" zero components are not perturbed. A zero */ /* entry is considered "symbolic" if all multiplications involved */ /* in computing that entry have at least one zero multiplicand. */ /* Parameters */ /* ========== */ /* UPLO - INTEGER */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the array A is to be referenced as */ /* follows: */ /* UPLO = BLAS_UPPER Only the upper triangular part of A */ /* is to be referenced. */ /* UPLO = BLAS_LOWER Only the lower triangular part of A */ /* is to be referenced. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry, the leading m by n part of the array A must */ /* contain the matrix of coefficients. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* max( 1, n ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION . */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) */ /* Before entry with BETA non-zero, the incremented array Y */ /* must contain the vector y. On exit, Y is overwritten by the */ /* updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* -- Modified for the absolute-value product, April 2006 */ /* Jason Riedy, UC Berkeley */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (*uplo != _starpu_ilauplo_("U") && *uplo != _starpu_ilauplo_("L") ) { info = 1; } else if (*n < 0) { info = 2; } else if (*lda < max(1,*n)) { info = 5; } else if (*incx == 0) { info = 7; } else if (*incy == 0) { info = 10; } if (info != 0) { _starpu_xerbla_("DSYMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0. && *beta == 1.) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Set SAFE1 essentially to be the underflow threshold times the */ /* number of additions in each row. */ safe1 = _starpu_dlamch_("Safe minimum"); safe1 = (*n + 1) * safe1; /* Form y := alpha*abs(A)*abs(x) + beta*abs(y). */ /* The O(N^2) SYMB_ZERO tests could be replaced by O(N) queries to */ /* the inexact flag. Still doesn't help change the iteration order */ /* to per-column. */ iy = ky; if (*incx == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (*beta == 0.) { symb_zero__ = TRUE_; y[iy] = 0.; } else if (y[iy] == 0.) { symb_zero__ = TRUE_; } else { symb_zero__ = FALSE_; y[iy] = *beta * (d__1 = y[iy], abs(d__1)); } if (*alpha != 0.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { if (*uplo == _starpu_ilauplo_("U")) { if (i__ <= j) { temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); } else { temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); } } else { if (i__ >= j) { temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); } else { temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); } } symb_zero__ = symb_zero__ && (x[j] == 0. || temp == 0.); y[iy] += *alpha * (d__1 = x[j], abs(d__1)) * temp; } } if (! symb_zero__) { y[iy] += d_sign(&safe1, &y[iy]); } iy += *incy; } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (*beta == 0.) { symb_zero__ = TRUE_; y[iy] = 0.; } else if (y[iy] == 0.) { symb_zero__ = TRUE_; } else { symb_zero__ = FALSE_; y[iy] = *beta * (d__1 = y[iy], abs(d__1)); } jx = kx; if (*alpha != 0.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { if (*uplo == _starpu_ilauplo_("U")) { if (i__ <= j) { temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); } else { temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); } } else { if (i__ >= j) { temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); } else { temp = (d__1 = a[j + i__ * a_dim1], abs(d__1)); } } symb_zero__ = symb_zero__ && (x[j] == 0. || temp == 0.); y[iy] += *alpha * (d__1 = x[jx], abs(d__1)) * temp; jx += *incx; } } if (! symb_zero__) { y[iy] += d_sign(&safe1, &y[iy]); } iy += *incy; } } return 0; /* End of DLA_SYAMV */ } /* _starpu_dla_syamv__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_syrcond.c000066400000000000000000000210411507764646700216730ustar00rootroot00000000000000/* _starpu_dla_syrcond.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dla_syrcond__(char *uplo, integer *n, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, integer *cmode, doublereal *c__, integer *info, doublereal *work, integer *iwork, ftnlen uplo_len) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; doublereal ret_val, d__1; /* Local variables */ integer i__, j; logical up; doublereal tmp; integer kase; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal ainvnm; char normin[1]; doublereal smlnum; extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments */ /* .. */ /* Purpose */ /* ======= */ /* DLA_SYRCOND estimates the Skeel condition number of op(A) * op2(C) */ /* where op2 is determined by CMODE as follows */ /* CMODE = 1 op2(C) = C */ /* CMODE = 0 op2(C) = I */ /* CMODE = -1 op2(C) = inv(C) */ /* The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ /* is computed by computing scaling factors R such that */ /* diag(R)*A*op2(C) is row equilibrated and computing the standard */ /* infinity-norm condition number. */ /* Arguments */ /* ========== */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The block diagonal matrix D and the multipliers used to */ /* obtain the factor U or L as computed by DSYTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSYTRF. */ /* CMODE (input) INTEGER */ /* Determines op2(C) in the formula op(A) * op2(C) as follows: */ /* CMODE = 1 op2(C) = C */ /* CMODE = 0 op2(C) = I */ /* CMODE = -1 op2(C) = inv(C) */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The vector C in the formula op(A) * op2(C). */ /* INFO (output) INTEGER */ /* = 0: Successful exit. */ /* i > 0: The ith argument is invalid. */ /* WORK (input) DOUBLE PRECISION array, dimension (3*N). */ /* Workspace. */ /* IWORK (input) INTEGER array, dimension (N). */ /* Workspace. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --c__; --work; --iwork; /* Function Body */ ret_val = 0.; *info = 0; if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLA_SYRCOND", &i__1); return ret_val; } if (*n == 0) { ret_val = 1.; return ret_val; } up = FALSE_; if (_starpu_lsame_(uplo, "U")) { up = TRUE_; } /* Compute the equilibration matrix R such that */ /* inv(R)*A*C has unit 1-norm. */ if (up) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tmp = 0.; if (*cmode == 1) { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); } } else if (*cmode == 0) { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); } } else { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); } } work[(*n << 1) + i__] = tmp; } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tmp = 0.; if (*cmode == 1) { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] * c__[j], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] * c__[j], abs(d__1)); } } else if (*cmode == 0) { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1], abs(d__1)); } } else { i__2 = i__; for (j = 1; j <= i__2; ++j) { tmp += (d__1 = a[i__ + j * a_dim1] / c__[j], abs(d__1)); } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { tmp += (d__1 = a[j + i__ * a_dim1] / c__[j], abs(d__1)); } } work[(*n << 1) + i__] = tmp; } } /* Estimate the norm of inv(op(A)). */ smlnum = _starpu_dlamch_("Safe minimum"); ainvnm = 0.; *(unsigned char *)normin = 'N'; kase = 0; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == 2) { /* Multiply by R. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= work[(*n << 1) + i__]; } if (up) { _starpu_dsytrs_("U", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ 1], n, info); } else { _starpu_dsytrs_("L", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ 1], n, info); } /* Multiply by inv(C). */ if (*cmode == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] /= c__[i__]; } } else if (*cmode == -1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= c__[i__]; } } } else { /* Multiply by inv(C'). */ if (*cmode == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] /= c__[i__]; } } else if (*cmode == -1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= c__[i__]; } } if (up) { _starpu_dsytrs_("U", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ 1], n, info); } else { _starpu_dsytrs_("L", n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ 1], n, info); } /* Multiply by R. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] *= work[(*n << 1) + i__]; } } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { ret_val = 1. / ainvnm; } return ret_val; } /* _starpu_dla_syrcond__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_syrfsx_extended.c000066400000000000000000000545141507764646700234430ustar00rootroot00000000000000/* _starpu_dla_syrfsx_extended.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b9 = -1.; static doublereal c_b11 = 1.; /* Subroutine */ int _starpu_dla_syrfsx_extended__(integer *prec_type__, char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal * af, integer *ldaf, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal * berr_out__, integer *n_norms__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal *y_tail__, doublereal *rcond, integer * ithresh, doublereal *rthresh, doublereal *dz_ub__, logical * ignore_cwise__, integer *info, ftnlen uplo_len) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Local variables */ doublereal dxratmax, dzratmax; integer i__, j; logical incr_prec__; extern /* Subroutine */ int _starpu_dla_syamv__(integer *, integer *, doublereal * , doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal prev_dz_z__, yk, final_dx_x__; extern /* Subroutine */ int _starpu_dla_wwaddw__(integer *, doublereal *, doublereal *, doublereal *); doublereal final_dz_z__, prevnormdx; integer cnt; doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__; extern /* Subroutine */ int _starpu_dla_lin_berr__(integer *, integer *, integer * , doublereal *, doublereal *, doublereal *); doublereal ymin; integer y_prec_state__; extern /* Subroutine */ int _starpu_blas_dsymv_x__(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer uplo2; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int blas_dsymv2_x__(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer * ); doublereal dxrat, dzrat; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal normx, normy; extern doublereal _starpu_dlamch_(char *); doublereal normdx; extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal hugeval; extern integer _starpu_ilauplo_(char *); integer x_state__, z_state__; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_SYRFSX_EXTENDED improves the computed solution to a system of */ /* linear equations by performing extra-precise iterative refinement */ /* and provides error bounds and backward error estimates for the solution. */ /* This subroutine is called by DSYRFSX to perform iterative refinement. */ /* In addition to normwise error bound, the code provides maximum */ /* componentwise error bound if possible. See comments for ERR_BNDS_NORM */ /* and ERR_BNDS_COMP for details of the error bounds. Note that this */ /* subroutine is only resonsible for setting the second fields of */ /* ERR_BNDS_NORM and ERR_BNDS_COMP. */ /* Arguments */ /* ========= */ /* PREC_TYPE (input) INTEGER */ /* Specifies the intermediate precision to be used in refinement. */ /* The value is defined by ILAPREC(P) where P is a CHARACTER and */ /* P = 'S': Single */ /* = 'D': Double */ /* = 'I': Indigenous */ /* = 'X', 'E': Extra */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right-hand-sides, i.e., the number of columns of the */ /* matrix B. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The block diagonal matrix D and the multipliers used to */ /* obtain the factor U or L as computed by DSYTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSYTRF. */ /* COLEQU (input) LOGICAL */ /* If .TRUE. then column equilibration was done to A before calling */ /* this routine. This is needed to compute the solution and error */ /* bounds correctly. */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If COLEQU = .FALSE., C */ /* is not accessed. If C is input, each element of C should be a power */ /* of the radix to ensure a reliable solution and error estimates. */ /* Scaling by powers of the radix does not cause rounding errors unless */ /* the result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right-hand-side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* Y (input/output) DOUBLE PRECISION array, dimension */ /* (LDY,NRHS) */ /* On entry, the solution matrix X, as computed by DSYTRS. */ /* On exit, the improved solution matrix Y. */ /* LDY (input) INTEGER */ /* The leading dimension of the array Y. LDY >= max(1,N). */ /* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */ /* On exit, BERR_OUT(j) contains the componentwise relative backward */ /* error for right-hand-side j from the formula */ /* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. This is computed by DLA_LIN_BERR. */ /* N_NORMS (input) INTEGER */ /* Determines which error bounds to return (see ERR_BNDS_NORM */ /* and ERR_BNDS_COMP). */ /* If N_NORMS >= 1 return normwise error bounds. */ /* If N_NORMS >= 2 return componentwise error bounds. */ /* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* normwise relative error, which is defined as follows: */ /* Normwise relative error in the ith solution vector: */ /* max_j (abs(XTRUE(j,i) - X(j,i))) */ /* ------------------------------ */ /* max_j abs(X(j,i)) */ /* The array is indexed by the type of error information as described */ /* below. There currently are up to three pieces of information */ /* returned. */ /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_NORM(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated normwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*A, where S scales each row by a power of the */ /* radix so all absolute row sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* componentwise relative error, which is defined as follows: */ /* Componentwise relative error in the ith solution vector: */ /* abs(XTRUE(j,i) - X(j,i)) */ /* max_j ---------------------- */ /* abs(X(j,i)) */ /* The array is indexed by the right-hand side i (on which the */ /* componentwise relative error depends), and the type of error */ /* information as described below. There currently are up to three */ /* pieces of information returned for each right-hand side. If */ /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ /* the first (:,N_ERR_BNDS) entries are returned. */ /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_COMP(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated componentwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*(A*diag(x)), where x is the solution for the */ /* current right-hand side and S scales each row of */ /* A*diag(x) by a power of the radix so all absolute row */ /* sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* RES (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate residual. */ /* AYB (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace. This can be the same workspace passed for Y_TAIL. */ /* DY (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate solution. */ /* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the trailing bits of the intermediate solution. */ /* RCOND (input) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* ITHRESH (input) INTEGER */ /* The maximum number of residual computations allowed for */ /* refinement. The default is 10. For 'aggressive' set to 100 to */ /* permit convergence using approximate factorizations or */ /* factorizations other than LU. If the factorization uses a */ /* technique other than Gaussian elimination, the guarantees in */ /* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ /* RTHRESH (input) DOUBLE PRECISION */ /* Determines when to stop refinement if the error estimate stops */ /* decreasing. Refinement will stop when the next solution no longer */ /* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ /* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ /* default value is 0.5. For 'aggressive' set to 0.9 to permit */ /* convergence on extremely ill-conditioned matrices. See LAWN 165 */ /* for more details. */ /* DZ_UB (input) DOUBLE PRECISION */ /* Determines when to start considering componentwise convergence. */ /* Componentwise convergence is only considered after each component */ /* of the solution Y is stable, which we definte as the relative */ /* change in each component being less than DZ_UB. The default value */ /* is 0.25, requiring the first bit to be stable. See LAWN 165 for */ /* more details. */ /* IGNORE_CWISE (input) LOGICAL */ /* If .TRUE. then ignore componentwise convergence. Default value */ /* is .FALSE.. */ /* INFO (output) INTEGER */ /* = 0: Successful exit. */ /* < 0: if INFO = -i, the ith argument to DSYTRS had an illegal */ /* value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1; y -= y_offset; --berr_out__; --res; --ayb; --dy; --y_tail__; /* Function Body */ if (*info != 0) { return 0; } eps = _starpu_dlamch_("Epsilon"); hugeval = _starpu_dlamch_("Overflow"); /* Force HUGEVAL to Inf */ hugeval *= hugeval; /* Using HUGEVAL may lead to spurious underflows. */ incr_thresh__ = (doublereal) (*n) * eps; if (_starpu_lsame_(uplo, "L")) { uplo2 = _starpu_ilauplo_("L"); } else { uplo2 = _starpu_ilauplo_("U"); } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { y_prec_state__ = 1; if (y_prec_state__ == 2) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { y_tail__[i__] = 0.; } } dxrat = 0.; dxratmax = 0.; dzrat = 0.; dzratmax = 0.; final_dx_x__ = hugeval; final_dz_z__ = hugeval; prevnormdx = hugeval; prev_dz_z__ = hugeval; dz_z__ = hugeval; dx_x__ = hugeval; x_state__ = 1; z_state__ = 0; incr_prec__ = FALSE_; i__2 = *ithresh; for (cnt = 1; cnt <= i__2; ++cnt) { /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); if (y_prec_state__ == 0) { _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b11, &res[1], &c__1); } else if (y_prec_state__ == 1) { _starpu_blas_dsymv_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b11, &res[1], &c__1, prec_type__); } else { blas_dsymv2_x__(&uplo2, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b11, &res[1], & c__1, prec_type__); } /* XXX: RES is no longer needed. */ _starpu_dcopy_(n, &res[1], &c__1, &dy[1], &c__1); _starpu_dsytrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &dy[1], n, info); /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ normx = 0.; normy = 0.; normdx = 0.; dz_z__ = 0.; ymin = hugeval; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { yk = (d__1 = y[i__ + j * y_dim1], abs(d__1)); dyk = (d__1 = dy[i__], abs(d__1)); if (yk != 0.) { /* Computing MAX */ d__1 = dz_z__, d__2 = dyk / yk; dz_z__ = max(d__1,d__2); } else if (dyk != 0.) { dz_z__ = hugeval; } ymin = min(ymin,yk); normy = max(normy,yk); if (*colequ) { /* Computing MAX */ d__1 = normx, d__2 = yk * c__[i__]; normx = max(d__1,d__2); /* Computing MAX */ d__1 = normdx, d__2 = dyk * c__[i__]; normdx = max(d__1,d__2); } else { normx = normy; normdx = max(normdx,dyk); } } if (normx != 0.) { dx_x__ = normdx / normx; } else if (normdx == 0.) { dx_x__ = 0.; } else { dx_x__ = hugeval; } dxrat = normdx / prevnormdx; dzrat = dz_z__ / prev_dz_z__; /* Check termination criteria. */ if (ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2) { incr_prec__ = TRUE_; } if (x_state__ == 3 && dxrat <= *rthresh) { x_state__ = 1; } if (x_state__ == 1) { if (dx_x__ <= eps) { x_state__ = 2; } else if (dxrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { x_state__ = 3; } } else { if (dxrat > dxratmax) { dxratmax = dxrat; } } if (x_state__ > 1) { final_dx_x__ = dx_x__; } } if (z_state__ == 0 && dz_z__ <= *dz_ub__) { z_state__ = 1; } if (z_state__ == 3 && dzrat <= *rthresh) { z_state__ = 1; } if (z_state__ == 1) { if (dz_z__ <= eps) { z_state__ = 2; } else if (dz_z__ > *dz_ub__) { z_state__ = 0; dzratmax = 0.; final_dz_z__ = hugeval; } else if (dzrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { z_state__ = 3; } } else { if (dzrat > dzratmax) { dzratmax = dzrat; } } if (z_state__ > 1) { final_dz_z__ = dz_z__; } } if (x_state__ != 1 && (*ignore_cwise__ || z_state__ != 1)) { goto L666; } if (incr_prec__) { incr_prec__ = FALSE_; ++y_prec_state__; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { y_tail__[i__] = 0.; } } prevnormdx = normdx; prev_dz_z__ = dz_z__; /* Update soluton. */ if (y_prec_state__ < 2) { _starpu_daxpy_(n, &c_b11, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); } else { _starpu_dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); } } /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */ L666: /* Set final_* when cnt hits ithresh. */ if (x_state__ == 1) { final_dx_x__ = dx_x__; } if (z_state__ == 1) { final_dz_z__ = dz_z__; } /* Compute error bounds. */ if (*n_norms__ >= 1) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / ( 1 - dxratmax); } if (*n_norms__ >= 2) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / ( 1 - dzratmax); } /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. */ /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); _starpu_dsymv_(uplo, n, &c_b9, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, & c_b11, &res[1], &c__1); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); } /* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ _starpu_dla_syamv__(&uplo2, n, &c_b11, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b11, &ayb[1], &c__1); _starpu_dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); /* End of loop for each RHS. */ } return 0; } /* _starpu_dla_syrfsx_extended__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_syrpvgrw.c000066400000000000000000000214001507764646700221140ustar00rootroot00000000000000/* _starpu_dla_syrpvgrw.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dla_syrpvgrw__(char *uplo, integer *n, integer *info, doublereal * a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *work, ftnlen uplo_len) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; doublereal ret_val, d__1, d__2, d__3; /* Local variables */ integer i__, j, k, kp; doublereal tmp, amax, umax; extern logical _starpu_lsame_(char *, char *); integer ncols; logical upper; doublereal rpvgrw; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_SYRPVGRW computes the reciprocal pivot growth factor */ /* norm(A)/norm(U). The "max absolute element" norm is used. If this is */ /* much less than 1, the stability of the LU factorization of the */ /* (equilibrated) matrix A could be poor. This also means that the */ /* solution X, estimated condition numbers, and error bounds could be */ /* unreliable. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* INFO (input) INTEGER */ /* The value of INFO returned from DSYTRF, .i.e., the pivot in */ /* column INFO is exactly 0. */ /* NCOLS (input) INTEGER */ /* The number of columns of the matrix A. NCOLS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The block diagonal matrix D and the multipliers used to */ /* obtain the factor U or L as computed by DSYTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSYTRF. */ /* WORK (input) DOUBLE PRECISION array, dimension (2*N) */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --work; /* Function Body */ upper = _starpu_lsame_("Upper", uplo); if (*info == 0) { if (upper) { ncols = 1; } else { ncols = *n; } } else { ncols = *info; } rpvgrw = 1.; i__1 = *n << 1; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; } /* Find the max magnitude entry of each column of A. Compute the max */ /* for all N columns so we can apply the pivot permutation while */ /* looping below. Assume a full factorization is the common case. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* n + i__]; work[*n + i__] = max(d__2,d__3); /* Computing MAX */ d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* n + j]; work[*n + j] = max(d__2,d__3); } } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* n + i__]; work[*n + i__] = max(d__2,d__3); /* Computing MAX */ d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[* n + j]; work[*n + j] = max(d__2,d__3); } } } /* Now find the max magnitude entry of each column of U or L. Also */ /* permute the magnitudes of A above so they're in the same order as */ /* the factor. */ /* The iteration orders and permutations were copied from dsytrs. */ /* Calls to SSWAP would be severe overkill. */ if (upper) { k = *n; while(k < ncols && k > 0) { if (ipiv[k] > 0) { /* 1x1 pivot */ kp = ipiv[k]; if (kp != k) { tmp = work[*n + k]; work[*n + k] = work[*n + kp]; work[*n + kp] = tmp; } i__1 = k; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 = work[k]; work[k] = max(d__2,d__3); } --k; } else { /* 2x2 pivot */ kp = -ipiv[k]; tmp = work[*n + k - 1]; work[*n + k - 1] = work[*n + kp]; work[*n + kp] = tmp; i__1 = k - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 = work[k]; work[k] = max(d__2,d__3); /* Computing MAX */ d__2 = (d__1 = af[i__ + (k - 1) * af_dim1], abs(d__1)), d__3 = work[k - 1]; work[k - 1] = max(d__2,d__3); } /* Computing MAX */ d__2 = (d__1 = af[k + k * af_dim1], abs(d__1)), d__3 = work[k] ; work[k] = max(d__2,d__3); k += -2; } } k = ncols; while(k <= *n) { if (ipiv[k] > 0) { kp = ipiv[k]; if (kp != k) { tmp = work[*n + k]; work[*n + k] = work[*n + kp]; work[*n + kp] = tmp; } ++k; } else { kp = -ipiv[k]; tmp = work[*n + k]; work[*n + k] = work[*n + kp]; work[*n + kp] = tmp; k += 2; } } } else { k = 1; while(k <= ncols) { if (ipiv[k] > 0) { /* 1x1 pivot */ kp = ipiv[k]; if (kp != k) { tmp = work[*n + k]; work[*n + k] = work[*n + kp]; work[*n + kp] = tmp; } i__1 = *n; for (i__ = k; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 = work[k]; work[k] = max(d__2,d__3); } ++k; } else { /* 2x2 pivot */ kp = -ipiv[k]; tmp = work[*n + k + 1]; work[*n + k + 1] = work[*n + kp]; work[*n + kp] = tmp; i__1 = *n; for (i__ = k + 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 = work[k]; work[k] = max(d__2,d__3); /* Computing MAX */ d__2 = (d__1 = af[i__ + (k + 1) * af_dim1], abs(d__1)), d__3 = work[k + 1]; work[k + 1] = max(d__2,d__3); } /* Computing MAX */ d__2 = (d__1 = af[k + k * af_dim1], abs(d__1)), d__3 = work[k] ; work[k] = max(d__2,d__3); k += 2; } } k = ncols; while(k >= 1) { if (ipiv[k] > 0) { kp = ipiv[k]; if (kp != k) { tmp = work[*n + k]; work[*n + k] = work[*n + kp]; work[*n + kp] = tmp; } --k; } else { kp = -ipiv[k]; tmp = work[*n + k]; work[*n + k] = work[*n + kp]; work[*n + kp] = tmp; k += -2; } } } /* Compute the *inverse* of the max element growth factor. Dividing */ /* by zero would imply the largest entry of the factor's column is */ /* zero. Than can happen when either the column of A is zero or */ /* massive pivots made the factor underflow to zero. Neither counts */ /* as growth in itself, so simply ignore terms with zero */ /* denominators. */ if (upper) { i__1 = *n; for (i__ = ncols; i__ <= i__1; ++i__) { umax = work[i__]; amax = work[*n + i__]; if (umax != 0.) { /* Computing MIN */ d__1 = amax / umax; rpvgrw = min(d__1,rpvgrw); } } } else { i__1 = ncols; for (i__ = 1; i__ <= i__1; ++i__) { umax = work[i__]; amax = work[*n + i__]; if (umax != 0.) { /* Computing MIN */ d__1 = amax / umax; rpvgrw = min(d__1,rpvgrw); } } } ret_val = rpvgrw; return ret_val; } /* _starpu_dla_syrpvgrw__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dla_wwaddw.c000066400000000000000000000043321507764646700215130ustar00rootroot00000000000000/* _starpu_dla_wwaddw.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dla_wwaddw__(integer *n, doublereal *x, doublereal *y, doublereal *w) { /* System generated locals */ integer i__1; /* Local variables */ integer i__; doublereal s; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_WWADDW adds a vector W into a doubled-single vector (X, Y). */ /* This works for all extant IBM's hex and binary floating point */ /* arithmetics, but not for decimal. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The length of vectors X, Y, and W. */ /* X, Y (input/output) DOUBLE PRECISION array, length N */ /* The doubled-single accumulation vector. */ /* W (input) DOUBLE PRECISION array, length N */ /* The vector to be added. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --w; --y; --x; /* Function Body */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { s = x[i__] + w[i__]; s = s + s - s; y[i__] = x[i__] - s + w[i__] + y[i__]; x[i__] = s; /* L10: */ } return 0; } /* _starpu_dla_wwaddw__ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlabad.c000066400000000000000000000046161507764646700206120ustar00rootroot00000000000000/* dlabad.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlabad_(doublereal *small, doublereal *large) { /* Builtin functions */ double d_lg10(doublereal *), sqrt(doublereal); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLABAD takes as input the values computed by DLAMCH for underflow and */ /* overflow, and returns the square root of each of these values if the */ /* log of LARGE is sufficiently large. This subroutine is intended to */ /* identify machines with a large exponent range, such as the Crays, and */ /* redefine the underflow and overflow limits to be the square roots of */ /* the values computed by DLAMCH. This subroutine is needed because */ /* DLAMCH does not compensate for poor arithmetic in the upper half of */ /* the exponent range, as is found on a Cray. */ /* Arguments */ /* ========= */ /* SMALL (input/output) DOUBLE PRECISION */ /* On entry, the underflow threshold as computed by DLAMCH. */ /* On exit, if LOG10(LARGE) is sufficiently large, the square */ /* root of SMALL, otherwise unchanged. */ /* LARGE (input/output) DOUBLE PRECISION */ /* On entry, the overflow threshold as computed by DLAMCH. */ /* On exit, if LOG10(LARGE) is sufficiently large, the square */ /* root of LARGE, otherwise unchanged. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* If it looks like we're on a Cray, take the square root of */ /* SMALL and LARGE to avoid overflow and underflow problems. */ if (d_lg10(large) > 2e3) { *small = sqrt(*small); *large = sqrt(*large); } return 0; /* End of DLABAD */ } /* _starpu_dlabad_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlabrd.c000066400000000000000000000356651507764646700206430ustar00rootroot00000000000000/* dlabrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b4 = -1.; static doublereal c_b5 = 1.; static integer c__1 = 1; static doublereal c_b16 = 0.; /* Subroutine */ int _starpu_dlabrd_(integer *m, integer *n, integer *nb, doublereal * a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer *ldy) { /* System generated locals */ integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, i__3; /* Local variables */ integer i__; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLABRD reduces the first NB rows and columns of a real general */ /* m by n matrix A to upper or lower bidiagonal form by an orthogonal */ /* transformation Q' * A * P, and returns the matrices X and Y which */ /* are needed to apply the transformation to the unreduced part of A. */ /* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower */ /* bidiagonal form. */ /* This is an auxiliary routine called by DGEBRD */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows in the matrix A. */ /* N (input) INTEGER */ /* The number of columns in the matrix A. */ /* NB (input) INTEGER */ /* The number of leading rows and columns of A to be reduced. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the m by n general matrix to be reduced. */ /* On exit, the first NB rows and columns of the matrix are */ /* overwritten; the rest of the array is unchanged. */ /* If m >= n, elements on and below the diagonal in the first NB */ /* columns, with the array TAUQ, represent the orthogonal */ /* matrix Q as a product of elementary reflectors; and */ /* elements above the diagonal in the first NB rows, with the */ /* array TAUP, represent the orthogonal matrix P as a product */ /* of elementary reflectors. */ /* If m < n, elements below the diagonal in the first NB */ /* columns, with the array TAUQ, represent the orthogonal */ /* matrix Q as a product of elementary reflectors, and */ /* elements on and above the diagonal in the first NB rows, */ /* with the array TAUP, represent the orthogonal matrix P as */ /* a product of elementary reflectors. */ /* See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* D (output) DOUBLE PRECISION array, dimension (NB) */ /* The diagonal elements of the first NB rows and columns of */ /* the reduced matrix. D(i) = A(i,i). */ /* E (output) DOUBLE PRECISION array, dimension (NB) */ /* The off-diagonal elements of the first NB rows and columns of */ /* the reduced matrix. */ /* TAUQ (output) DOUBLE PRECISION array dimension (NB) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix Q. See Further Details. */ /* TAUP (output) DOUBLE PRECISION array, dimension (NB) */ /* The scalar factors of the elementary reflectors which */ /* represent the orthogonal matrix P. See Further Details. */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NB) */ /* The m-by-nb matrix X required to update the unreduced part */ /* of A. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= M. */ /* Y (output) DOUBLE PRECISION array, dimension (LDY,NB) */ /* The n-by-nb matrix Y required to update the unreduced part */ /* of A. */ /* LDY (input) INTEGER */ /* The leading dimension of the array Y. LDY >= N. */ /* Further Details */ /* =============== */ /* The matrices Q and P are represented as products of elementary */ /* reflectors: */ /* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) */ /* Each H(i) and G(i) has the form: */ /* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ /* where tauq and taup are real scalars, and v and u are real vectors. */ /* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in */ /* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in */ /* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */ /* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in */ /* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in */ /* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */ /* The elements of the vectors v and u together form the m-by-nb matrix */ /* V and the nb-by-n matrix U' which are needed, with X and Y, to apply */ /* the transformation to the unreduced part of the matrix, using a block */ /* update of the form: A := A - V*Y' - X*U'. */ /* The contents of A on exit are illustrated by the following examples */ /* with nb = 2: */ /* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ /* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) */ /* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) */ /* ( v1 v2 a a a ) ( v1 1 a a a a ) */ /* ( v1 v2 a a a ) ( v1 v2 a a a a ) */ /* ( v1 v2 a a a ) ( v1 v2 a a a a ) */ /* ( v1 v2 a a a ) */ /* where a denotes an element of the original matrix which is unchanged, */ /* vi denotes an element of the vector defining H(i), and ui an element */ /* of the vector defining G(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --d__; --e; --tauq; --taup; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1; y -= y_offset; /* Function Body */ if (*m <= 0 || *n <= 0) { return 0; } if (*m >= *n) { /* Reduce to upper bidiagonal form */ i__1 = *nb; for (i__ = 1; i__ <= i__1; ++i__) { /* Update A(i:m,i) */ i__2 = *m - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &a[i__ + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + i__ * a_dim1], & c__1); i__2 = *m - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &x[i__ + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[i__ + i__ * a_dim1], &c__1); /* Generate reflection Q(i) to annihilate A(i+1:m,i) */ i__2 = *m - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; _starpu_dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3, *m)+ i__ * a_dim1], &c__1, &tauq[i__]); d__[i__] = a[i__ + i__ * a_dim1]; if (i__ < *n) { a[i__ + i__ * a_dim1] = 1.; /* Compute Y(i+1:n,i) */ i__2 = *m - i__ + 1; i__3 = *n - i__; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b16, & y[i__ + 1 + i__ * y_dim1], &c__1); i__2 = *m - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[ i__ + 1 + i__ * y_dim1], &c__1); i__2 = *m - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &x[i__ + x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1); i__2 = *n - i__; _starpu_dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); /* Update A(i,i+1:n) */ i__2 = *n - i__; _starpu_dgemv_("No transpose", &i__2, &i__, &c_b4, &y[i__ + 1 + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b5, &a[i__ + ( i__ + 1) * a_dim1], lda); i__2 = i__ - 1; i__3 = *n - i__; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b5, &a[ i__ + (i__ + 1) * a_dim1], lda); /* Generate reflection P(i) to annihilate A(i,i+2:n) */ i__2 = *n - i__; /* Computing MIN */ i__3 = i__ + 2; _starpu_dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( i__3, *n)* a_dim1], lda, &taup[i__]); e[i__] = a[i__ + (i__ + 1) * a_dim1]; a[i__ + (i__ + 1) * a_dim1] = 1.; /* Compute X(i+1:m,i) */ i__2 = *m - i__; i__3 = *n - i__; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1); i__2 = *n - i__; _starpu_dgemv_("Transpose", &i__2, &i__, &c_b5, &y[i__ + 1 + y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[ i__ * x_dim1 + 1], &c__1); i__2 = *m - i__; _starpu_dgemv_("No transpose", &i__2, &i__, &c_b4, &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ i__ + 1 + i__ * x_dim1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & c_b16, &x[i__ * x_dim1 + 1], &c__1); i__2 = *m - i__; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ i__ + 1 + i__ * x_dim1], &c__1); i__2 = *m - i__; _starpu_dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); } /* L10: */ } } else { /* Reduce to lower bidiagonal form */ i__1 = *nb; for (i__ = 1; i__ <= i__1; ++i__) { /* Update A(i,i:n) */ i__2 = *n - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &y[i__ + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b5, &a[i__ + i__ * a_dim1], lda); i__2 = i__ - 1; i__3 = *n - i__ + 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b4, &a[i__ * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b5, &a[i__ + i__ * a_dim1], lda); /* Generate reflection P(i) to annihilate A(i,i+1:n) */ i__2 = *n - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; _starpu_dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3, *n)* a_dim1], lda, &taup[i__]); d__[i__] = a[i__ + i__ * a_dim1]; if (i__ < *m) { a[i__ + i__ * a_dim1] = 1.; /* Compute X(i+1:m,i) */ i__2 = *m - i__; i__3 = *n - i__ + 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b16, & x[i__ + 1 + i__ * x_dim1], &c__1); i__2 = *n - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &y[i__ + y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1); i__2 = *m - i__; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ i__ + 1 + i__ * x_dim1], &c__1); i__2 = i__ - 1; i__3 = *n - i__ + 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ * a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1); i__2 = *m - i__; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ i__ + 1 + i__ * x_dim1], &c__1); i__2 = *m - i__; _starpu_dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); /* Update A(i+1:m,i) */ i__2 = *m - i__; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1); i__2 = *m - i__; _starpu_dgemv_("No transpose", &i__2, &i__, &c_b4, &x[i__ + 1 + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[ i__ + 1 + i__ * a_dim1], &c__1); /* Generate reflection Q(i) to annihilate A(i+2:m,i) */ i__2 = *m - i__; /* Computing MIN */ i__3 = i__ + 2; _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *m)+ i__ * a_dim1], &c__1, &tauq[i__]); e[i__] = a[i__ + 1 + i__ * a_dim1]; a[i__ + 1 + i__ * a_dim1] = 1.; /* Compute Y(i+1:n,i) */ i__2 = *m - i__; i__3 = *n - i__; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1); i__2 = *m - i__; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[ i__ * y_dim1 + 1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[ i__ + 1 + i__ * y_dim1], &c__1); i__2 = *m - i__; _starpu_dgemv_("Transpose", &i__2, &i__, &c_b5, &x[i__ + 1 + x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[ i__ * y_dim1 + 1], &c__1); i__2 = *n - i__; _starpu_dgemv_("Transpose", &i__, &i__2, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1); i__2 = *n - i__; _starpu_dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); } /* L20: */ } } return 0; /* End of DLABRD */ } /* _starpu_dlabrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlacn2.c000066400000000000000000000154171507764646700205470ustar00rootroot00000000000000/* dlacn2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b11 = 1.; /* Subroutine */ int _starpu_dlacn2_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase, integer *isave) { /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *); integer i_dnnt(doublereal *); /* Local variables */ integer i__; doublereal temp; extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); integer jlast; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); doublereal altsgn, estold; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLACN2 estimates the 1-norm of a square, real matrix A. */ /* Reverse communication is used for evaluating matrix-vector products. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. N >= 1. */ /* V (workspace) DOUBLE PRECISION array, dimension (N) */ /* On the final return, V = A*W, where EST = norm(V)/norm(W) */ /* (W is not returned). */ /* X (input/output) DOUBLE PRECISION array, dimension (N) */ /* On an intermediate return, X should be overwritten by */ /* A * X, if KASE=1, */ /* A' * X, if KASE=2, */ /* and DLACN2 must be re-called with all the other parameters */ /* unchanged. */ /* ISGN (workspace) INTEGER array, dimension (N) */ /* EST (input/output) DOUBLE PRECISION */ /* On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be */ /* unchanged from the previous call to DLACN2. */ /* On exit, EST is an estimate (a lower bound) for norm(A). */ /* KASE (input/output) INTEGER */ /* On the initial call to DLACN2, KASE should be 0. */ /* On an intermediate return, KASE will be 1 or 2, indicating */ /* whether X should be overwritten by A * X or A' * X. */ /* On the final return from DLACN2, KASE will again be 0. */ /* ISAVE (input/output) INTEGER array, dimension (3) */ /* ISAVE is used to save variables between calls to DLACN2 */ /* Further Details */ /* ======= ======= */ /* Contributed by Nick Higham, University of Manchester. */ /* Originally named SONEST, dated March 16, 1988. */ /* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of */ /* a real or complex matrix, with applications to condition estimation", */ /* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. */ /* This is a thread safe version of DLACON, which uses the array ISAVE */ /* in place of a SAVE statement, as follows: */ /* DLACON DLACN2 */ /* JUMP ISAVE(1) */ /* J ISAVE(2) */ /* ITER ISAVE(3) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --isave; --isgn; --x; --v; /* Function Body */ if (*kase == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = 1. / (doublereal) (*n); /* L10: */ } *kase = 1; isave[1] = 1; return 0; } switch (isave[1]) { case 1: goto L20; case 2: goto L40; case 3: goto L70; case 4: goto L110; case 5: goto L140; } /* ................ ENTRY (ISAVE( 1 ) = 1) */ /* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. */ L20: if (*n == 1) { v[1] = x[1]; *est = abs(v[1]); /* ... QUIT */ goto L150; } *est = _starpu_dasum_(n, &x[1], &c__1); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = d_sign(&c_b11, &x[i__]); isgn[i__] = i_dnnt(&x[i__]); /* L30: */ } *kase = 2; isave[1] = 2; return 0; /* ................ ENTRY (ISAVE( 1 ) = 2) */ /* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ L40: isave[2] = _starpu_idamax_(n, &x[1], &c__1); isave[3] = 2; /* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */ L50: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = 0.; /* L60: */ } x[isave[2]] = 1.; *kase = 1; isave[1] = 3; return 0; /* ................ ENTRY (ISAVE( 1 ) = 3) */ /* X HAS BEEN OVERWRITTEN BY A*X. */ L70: _starpu_dcopy_(n, &x[1], &c__1, &v[1], &c__1); estold = *est; *est = _starpu_dasum_(n, &v[1], &c__1); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = d_sign(&c_b11, &x[i__]); if (i_dnnt(&d__1) != isgn[i__]) { goto L90; } /* L80: */ } /* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */ goto L120; L90: /* TEST FOR CYCLING. */ if (*est <= estold) { goto L120; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = d_sign(&c_b11, &x[i__]); isgn[i__] = i_dnnt(&x[i__]); /* L100: */ } *kase = 2; isave[1] = 4; return 0; /* ................ ENTRY (ISAVE( 1 ) = 4) */ /* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ L110: jlast = isave[2]; isave[2] = _starpu_idamax_(n, &x[1], &c__1); if (x[jlast] != (d__1 = x[isave[2]], abs(d__1)) && isave[3] < 5) { ++isave[3]; goto L50; } /* ITERATION COMPLETE. FINAL STAGE. */ L120: altsgn = 1.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = altsgn * ((doublereal) (i__ - 1) / (doublereal) (*n - 1) + 1.); altsgn = -altsgn; /* L130: */ } *kase = 1; isave[1] = 5; return 0; /* ................ ENTRY (ISAVE( 1 ) = 5) */ /* X HAS BEEN OVERWRITTEN BY A*X. */ L140: temp = _starpu_dasum_(n, &x[1], &c__1) / (doublereal) (*n * 3) * 2.; if (temp > *est) { _starpu_dcopy_(n, &x[1], &c__1, &v[1], &c__1); *est = temp; } L150: *kase = 0; return 0; /* End of DLACN2 */ } /* _starpu_dlacn2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlacon.c000066400000000000000000000146011507764646700206360ustar00rootroot00000000000000/* dlacon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b11 = 1.; /* Subroutine */ int _starpu_dlacon_(integer *n, doublereal *v, doublereal *x, integer *isgn, doublereal *est, integer *kase) { /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *); integer i_dnnt(doublereal *); /* Local variables */ static integer i__, j, iter; static doublereal temp; static integer jump; extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); static integer jlast; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); static doublereal altsgn, estold; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLACON estimates the 1-norm of a square, real matrix A. */ /* Reverse communication is used for evaluating matrix-vector products. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. N >= 1. */ /* V (workspace) DOUBLE PRECISION array, dimension (N) */ /* On the final return, V = A*W, where EST = norm(V)/norm(W) */ /* (W is not returned). */ /* X (input/output) DOUBLE PRECISION array, dimension (N) */ /* On an intermediate return, X should be overwritten by */ /* A * X, if KASE=1, */ /* A' * X, if KASE=2, */ /* and DLACON must be re-called with all the other parameters */ /* unchanged. */ /* ISGN (workspace) INTEGER array, dimension (N) */ /* EST (input/output) DOUBLE PRECISION */ /* On entry with KASE = 1 or 2 and JUMP = 3, EST should be */ /* unchanged from the previous call to DLACON. */ /* On exit, EST is an estimate (a lower bound) for norm(A). */ /* KASE (input/output) INTEGER */ /* On the initial call to DLACON, KASE should be 0. */ /* On an intermediate return, KASE will be 1 or 2, indicating */ /* whether X should be overwritten by A * X or A' * X. */ /* On the final return from DLACON, KASE will again be 0. */ /* Further Details */ /* ======= ======= */ /* Contributed by Nick Higham, University of Manchester. */ /* Originally named SONEST, dated March 16, 1988. */ /* Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of */ /* a real or complex matrix, with applications to condition estimation", */ /* ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --isgn; --x; --v; /* Function Body */ if (*kase == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = 1. / (doublereal) (*n); /* L10: */ } *kase = 1; jump = 1; return 0; } switch (jump) { case 1: goto L20; case 2: goto L40; case 3: goto L70; case 4: goto L110; case 5: goto L140; } /* ................ ENTRY (JUMP = 1) */ /* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. */ L20: if (*n == 1) { v[1] = x[1]; *est = abs(v[1]); /* ... QUIT */ goto L150; } *est = _starpu_dasum_(n, &x[1], &c__1); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = d_sign(&c_b11, &x[i__]); isgn[i__] = i_dnnt(&x[i__]); /* L30: */ } *kase = 2; jump = 2; return 0; /* ................ ENTRY (JUMP = 2) */ /* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ L40: j = _starpu_idamax_(n, &x[1], &c__1); iter = 2; /* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */ L50: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = 0.; /* L60: */ } x[j] = 1.; *kase = 1; jump = 3; return 0; /* ................ ENTRY (JUMP = 3) */ /* X HAS BEEN OVERWRITTEN BY A*X. */ L70: _starpu_dcopy_(n, &x[1], &c__1, &v[1], &c__1); estold = *est; *est = _starpu_dasum_(n, &v[1], &c__1); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = d_sign(&c_b11, &x[i__]); if (i_dnnt(&d__1) != isgn[i__]) { goto L90; } /* L80: */ } /* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */ goto L120; L90: /* TEST FOR CYCLING. */ if (*est <= estold) { goto L120; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = d_sign(&c_b11, &x[i__]); isgn[i__] = i_dnnt(&x[i__]); /* L100: */ } *kase = 2; jump = 4; return 0; /* ................ ENTRY (JUMP = 4) */ /* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ L110: jlast = j; j = _starpu_idamax_(n, &x[1], &c__1); if (x[jlast] != (d__1 = x[j], abs(d__1)) && iter < 5) { ++iter; goto L50; } /* ITERATION COMPLETE. FINAL STAGE. */ L120: altsgn = 1.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { x[i__] = altsgn * ((doublereal) (i__ - 1) / (doublereal) (*n - 1) + 1.); altsgn = -altsgn; /* L130: */ } *kase = 1; jump = 5; return 0; /* ................ ENTRY (JUMP = 5) */ /* X HAS BEEN OVERWRITTEN BY A*X. */ L140: temp = _starpu_dasum_(n, &x[1], &c__1) / (doublereal) (*n * 3) * 2.; if (temp > *est) { _starpu_dcopy_(n, &x[1], &c__1, &v[1], &c__1); *est = temp; } L150: *kase = 0; return 0; /* End of DLACON */ } /* _starpu_dlacon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlacpy.c000066400000000000000000000065311507764646700206550ustar00rootroot00000000000000/* dlacpy.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlacpy_(char *uplo, integer *m, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ integer i__, j; extern logical _starpu_lsame_(char *, char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLACPY copies all or part of a two-dimensional matrix A to another */ /* matrix B. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies the part of the matrix A to be copied to B. */ /* = 'U': Upper triangular part */ /* = 'L': Lower triangular part */ /* Otherwise: All of the matrix A */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The m by n matrix A. If UPLO = 'U', only the upper triangle */ /* or trapezoid is accessed; if UPLO = 'L', only the lower */ /* triangle or trapezoid is accessed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (output) DOUBLE PRECISION array, dimension (LDB,N) */ /* On exit, B = A in the locations specified by UPLO. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,M). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = min(j,*m); for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; /* L10: */ } /* L20: */ } } else if (_starpu_lsame_(uplo, "L")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; /* L30: */ } /* L40: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; /* L50: */ } /* L60: */ } } return 0; /* End of DLACPY */ } /* _starpu_dlacpy_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dladiv.c000066400000000000000000000040751507764646700206450ustar00rootroot00000000000000/* dladiv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dladiv_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *p, doublereal *q) { doublereal e, f; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLADIV performs complex division in real arithmetic */ /* a + i*b */ /* p + i*q = --------- */ /* c + i*d */ /* The algorithm is due to Robert L. Smith and can be found */ /* in D. Knuth, The art of Computer Programming, Vol.2, p.195 */ /* Arguments */ /* ========= */ /* A (input) DOUBLE PRECISION */ /* B (input) DOUBLE PRECISION */ /* C (input) DOUBLE PRECISION */ /* D (input) DOUBLE PRECISION */ /* The scalars a, b, c, and d in the above expression. */ /* P (output) DOUBLE PRECISION */ /* Q (output) DOUBLE PRECISION */ /* The scalars p and q in the above expression. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ if (abs(*d__) < abs(*c__)) { e = *d__ / *c__; f = *c__ + *d__ * e; *p = (*a + *b * e) / f; *q = (*b - *a * e) / f; } else { e = *c__ / *d__; f = *d__ + *c__ * e; *p = (*b + *a * e) / f; *q = (-(*a) + *b * e) / f; } return 0; /* End of DLADIV */ } /* _starpu_dladiv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlae2.c000066400000000000000000000073071507764646700203720ustar00rootroot00000000000000/* dlae2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlae2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2) { /* System generated locals */ doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal ab, df, tb, sm, rt, adf, acmn, acmx; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix */ /* [ A B ] */ /* [ B C ]. */ /* On return, RT1 is the eigenvalue of larger absolute value, and RT2 */ /* is the eigenvalue of smaller absolute value. */ /* Arguments */ /* ========= */ /* A (input) DOUBLE PRECISION */ /* The (1,1) element of the 2-by-2 matrix. */ /* B (input) DOUBLE PRECISION */ /* The (1,2) and (2,1) elements of the 2-by-2 matrix. */ /* C (input) DOUBLE PRECISION */ /* The (2,2) element of the 2-by-2 matrix. */ /* RT1 (output) DOUBLE PRECISION */ /* The eigenvalue of larger absolute value. */ /* RT2 (output) DOUBLE PRECISION */ /* The eigenvalue of smaller absolute value. */ /* Further Details */ /* =============== */ /* RT1 is accurate to a few ulps barring over/underflow. */ /* RT2 may be inaccurate if there is massive cancellation in the */ /* determinant A*C-B*B; higher precision or correctly rounded or */ /* correctly truncated arithmetic would be needed to compute RT2 */ /* accurately in all cases. */ /* Overflow is possible only if RT1 is within a factor of 5 of overflow. */ /* Underflow is harmless if the input data is 0 or exceeds */ /* underflow_threshold / macheps. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Compute the eigenvalues */ sm = *a + *c__; df = *a - *c__; adf = abs(df); tb = *b + *b; ab = abs(tb); if (abs(*a) > abs(*c__)) { acmx = *a; acmn = *c__; } else { acmx = *c__; acmn = *a; } if (adf > ab) { /* Computing 2nd power */ d__1 = ab / adf; rt = adf * sqrt(d__1 * d__1 + 1.); } else if (adf < ab) { /* Computing 2nd power */ d__1 = adf / ab; rt = ab * sqrt(d__1 * d__1 + 1.); } else { /* Includes case AB=ADF=0 */ rt = ab * sqrt(2.); } if (sm < 0.) { *rt1 = (sm - rt) * .5; /* Order of execution important. */ /* To get fully accurate smaller eigenvalue, */ /* next line needs to be executed in higher precision. */ *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; } else if (sm > 0.) { *rt1 = (sm + rt) * .5; /* Order of execution important. */ /* To get fully accurate smaller eigenvalue, */ /* next line needs to be executed in higher precision. */ *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; } else { /* Includes case RT1 = RT2 = 0 */ *rt1 = rt * .5; *rt2 = rt * -.5; } return 0; /* End of DLAE2 */ } /* _starpu_dlae2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaebz.c000066400000000000000000000510111507764646700206330ustar00rootroot00000000000000/* dlaebz.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaebz_(integer *ijob, integer *nitmax, integer *n, integer *mmax, integer *minp, integer *nbmin, doublereal *abstol, doublereal *reltol, doublereal *pivmin, doublereal *d__, doublereal * e, doublereal *e2, integer *nval, doublereal *ab, doublereal *c__, integer *mout, integer *nab, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer nab_dim1, nab_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2, d__3, d__4; /* Local variables */ integer j, kf, ji, kl, jp, jit; doublereal tmp1, tmp2; integer itmp1, itmp2, kfnew, klnew; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAEBZ contains the iteration loops which compute and use the */ /* function N(w), which is the count of eigenvalues of a symmetric */ /* tridiagonal matrix T less than or equal to its argument w. It */ /* performs a choice of two types of loops: */ /* IJOB=1, followed by */ /* IJOB=2: It takes as input a list of intervals and returns a list of */ /* sufficiently small intervals whose union contains the same */ /* eigenvalues as the union of the original intervals. */ /* The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. */ /* The output interval (AB(j,1),AB(j,2)] will contain */ /* eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. */ /* IJOB=3: It performs a binary search in each input interval */ /* (AB(j,1),AB(j,2)] for a point w(j) such that */ /* N(w(j))=NVAL(j), and uses C(j) as the starting point of */ /* the search. If such a w(j) is found, then on output */ /* AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output */ /* (AB(j,1),AB(j,2)] will be a small interval containing the */ /* point where N(w) jumps through NVAL(j), unless that point */ /* lies outside the initial interval. */ /* Note that the intervals are in all cases half-open intervals, */ /* i.e., of the form (a,b] , which includes b but not a . */ /* To avoid underflow, the matrix should be scaled so that its largest */ /* element is no greater than overflow**(1/2) * underflow**(1/4) */ /* in absolute value. To assure the most accurate computation */ /* of small eigenvalues, the matrix should be scaled to be */ /* not much smaller than that, either. */ /* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ /* Matrix", Report CS41, Computer Science Dept., Stanford */ /* University, July 21, 1966 */ /* Note: the arguments are, in general, *not* checked for unreasonable */ /* values. */ /* Arguments */ /* ========= */ /* IJOB (input) INTEGER */ /* Specifies what is to be done: */ /* = 1: Compute NAB for the initial intervals. */ /* = 2: Perform bisection iteration to find eigenvalues of T. */ /* = 3: Perform bisection iteration to invert N(w), i.e., */ /* to find a point which has a specified number of */ /* eigenvalues of T to its left. */ /* Other values will cause DLAEBZ to return with INFO=-1. */ /* NITMAX (input) INTEGER */ /* The maximum number of "levels" of bisection to be */ /* performed, i.e., an interval of width W will not be made */ /* smaller than 2^(-NITMAX) * W. If not all intervals */ /* have converged after NITMAX iterations, then INFO is set */ /* to the number of non-converged intervals. */ /* N (input) INTEGER */ /* The dimension n of the tridiagonal matrix T. It must be at */ /* least 1. */ /* MMAX (input) INTEGER */ /* The maximum number of intervals. If more than MMAX intervals */ /* are generated, then DLAEBZ will quit with INFO=MMAX+1. */ /* MINP (input) INTEGER */ /* The initial number of intervals. It may not be greater than */ /* MMAX. */ /* NBMIN (input) INTEGER */ /* The smallest number of intervals that should be processed */ /* using a vector loop. If zero, then only the scalar loop */ /* will be used. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The minimum (absolute) width of an interval. When an */ /* interval is narrower than ABSTOL, or than RELTOL times the */ /* larger (in magnitude) endpoint, then it is considered to be */ /* sufficiently small, i.e., converged. This must be at least */ /* zero. */ /* RELTOL (input) DOUBLE PRECISION */ /* The minimum relative width of an interval. When an interval */ /* is narrower than ABSTOL, or than RELTOL times the larger (in */ /* magnitude) endpoint, then it is considered to be */ /* sufficiently small, i.e., converged. Note: this should */ /* always be at least radix*machine epsilon. */ /* PIVMIN (input) DOUBLE PRECISION */ /* The minimum absolute value of a "pivot" in the Sturm */ /* sequence loop. This *must* be at least max |e(j)**2| * */ /* safe_min and at least safe_min, where safe_min is at least */ /* the smallest number that can divide one without overflow. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The diagonal elements of the tridiagonal matrix T. */ /* E (input) DOUBLE PRECISION array, dimension (N) */ /* The offdiagonal elements of the tridiagonal matrix T in */ /* positions 1 through N-1. E(N) is arbitrary. */ /* E2 (input) DOUBLE PRECISION array, dimension (N) */ /* The squares of the offdiagonal elements of the tridiagonal */ /* matrix T. E2(N) is ignored. */ /* NVAL (input/output) INTEGER array, dimension (MINP) */ /* If IJOB=1 or 2, not referenced. */ /* If IJOB=3, the desired values of N(w). The elements of NVAL */ /* will be reordered to correspond with the intervals in AB. */ /* Thus, NVAL(j) on output will not, in general be the same as */ /* NVAL(j) on input, but it will correspond with the interval */ /* (AB(j,1),AB(j,2)] on output. */ /* AB (input/output) DOUBLE PRECISION array, dimension (MMAX,2) */ /* The endpoints of the intervals. AB(j,1) is a(j), the left */ /* endpoint of the j-th interval, and AB(j,2) is b(j), the */ /* right endpoint of the j-th interval. The input intervals */ /* will, in general, be modified, split, and reordered by the */ /* calculation. */ /* C (input/output) DOUBLE PRECISION array, dimension (MMAX) */ /* If IJOB=1, ignored. */ /* If IJOB=2, workspace. */ /* If IJOB=3, then on input C(j) should be initialized to the */ /* first search point in the binary search. */ /* MOUT (output) INTEGER */ /* If IJOB=1, the number of eigenvalues in the intervals. */ /* If IJOB=2 or 3, the number of intervals output. */ /* If IJOB=3, MOUT will equal MINP. */ /* NAB (input/output) INTEGER array, dimension (MMAX,2) */ /* If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). */ /* If IJOB=2, then on input, NAB(i,j) should be set. It must */ /* satisfy the condition: */ /* N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), */ /* which means that in interval i only eigenvalues */ /* NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, */ /* NAB(i,j)=N(AB(i,j)), from a previous call to DLAEBZ with */ /* IJOB=1. */ /* On output, NAB(i,j) will contain */ /* max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of */ /* the input interval that the output interval */ /* (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the */ /* the input values of NAB(k,1) and NAB(k,2). */ /* If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), */ /* unless N(w) > NVAL(i) for all search points w , in which */ /* case NAB(i,1) will not be modified, i.e., the output */ /* value will be the same as the input value (modulo */ /* reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) */ /* for all search points w , in which case NAB(i,2) will */ /* not be modified. Normally, NAB should be set to some */ /* distinctive value(s) before DLAEBZ is called. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MMAX) */ /* Workspace. */ /* IWORK (workspace) INTEGER array, dimension (MMAX) */ /* Workspace. */ /* INFO (output) INTEGER */ /* = 0: All intervals converged. */ /* = 1--MMAX: The last INFO intervals did not converge. */ /* = MMAX+1: More than MMAX intervals were generated. */ /* Further Details */ /* =============== */ /* This routine is intended to be called only by other LAPACK */ /* routines, thus the interface is less user-friendly. It is intended */ /* for two purposes: */ /* (a) finding eigenvalues. In this case, DLAEBZ should have one or */ /* more initial intervals set up in AB, and DLAEBZ should be called */ /* with IJOB=1. This sets up NAB, and also counts the eigenvalues. */ /* Intervals with no eigenvalues would usually be thrown out at */ /* this point. Also, if not all the eigenvalues in an interval i */ /* are desired, NAB(i,1) can be increased or NAB(i,2) decreased. */ /* For example, set NAB(i,1)=NAB(i,2)-1 to get the largest */ /* eigenvalue. DLAEBZ is then called with IJOB=2 and MMAX */ /* no smaller than the value of MOUT returned by the call with */ /* IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 */ /* through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the */ /* tolerance specified by ABSTOL and RELTOL. */ /* (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). */ /* In this case, start with a Gershgorin interval (a,b). Set up */ /* AB to contain 2 search intervals, both initially (a,b). One */ /* NVAL element should contain f-1 and the other should contain l */ /* , while C should contain a and b, resp. NAB(i,1) should be -1 */ /* and NAB(i,2) should be N+1, to flag an error if the desired */ /* interval does not lie in (a,b). DLAEBZ is then called with */ /* IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- */ /* j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while */ /* if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r */ /* >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and */ /* N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and */ /* w(l-r)=...=w(l+k) are handled similarly. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Check for Errors */ /* Parameter adjustments */ nab_dim1 = *mmax; nab_offset = 1 + nab_dim1; nab -= nab_offset; ab_dim1 = *mmax; ab_offset = 1 + ab_dim1; ab -= ab_offset; --d__; --e; --e2; --nval; --c__; --work; --iwork; /* Function Body */ *info = 0; if (*ijob < 1 || *ijob > 3) { *info = -1; return 0; } /* Initialize NAB */ if (*ijob == 1) { /* Compute the number of eigenvalues in the initial intervals. */ *mout = 0; /* DIR$ NOVECTOR */ i__1 = *minp; for (ji = 1; ji <= i__1; ++ji) { for (jp = 1; jp <= 2; ++jp) { tmp1 = d__[1] - ab[ji + jp * ab_dim1]; if (abs(tmp1) < *pivmin) { tmp1 = -(*pivmin); } nab[ji + jp * nab_dim1] = 0; if (tmp1 <= 0.) { nab[ji + jp * nab_dim1] = 1; } i__2 = *n; for (j = 2; j <= i__2; ++j) { tmp1 = d__[j] - e2[j - 1] / tmp1 - ab[ji + jp * ab_dim1]; if (abs(tmp1) < *pivmin) { tmp1 = -(*pivmin); } if (tmp1 <= 0.) { ++nab[ji + jp * nab_dim1]; } /* L10: */ } /* L20: */ } *mout = *mout + nab[ji + (nab_dim1 << 1)] - nab[ji + nab_dim1]; /* L30: */ } return 0; } /* Initialize for loop */ /* KF and KL have the following meaning: */ /* Intervals 1,...,KF-1 have converged. */ /* Intervals KF,...,KL still need to be refined. */ kf = 1; kl = *minp; /* If IJOB=2, initialize C. */ /* If IJOB=3, use the user-supplied starting point. */ if (*ijob == 2) { i__1 = *minp; for (ji = 1; ji <= i__1; ++ji) { c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5; /* L40: */ } } /* Iteration loop */ i__1 = *nitmax; for (jit = 1; jit <= i__1; ++jit) { /* Loop over intervals */ if (kl - kf + 1 >= *nbmin && *nbmin > 0) { /* Begin of Parallel Version of the loop */ i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { /* Compute N(c), the number of eigenvalues less than c */ work[ji] = d__[1] - c__[ji]; iwork[ji] = 0; if (work[ji] <= *pivmin) { iwork[ji] = 1; /* Computing MIN */ d__1 = work[ji], d__2 = -(*pivmin); work[ji] = min(d__1,d__2); } i__3 = *n; for (j = 2; j <= i__3; ++j) { work[ji] = d__[j] - e2[j - 1] / work[ji] - c__[ji]; if (work[ji] <= *pivmin) { ++iwork[ji]; /* Computing MIN */ d__1 = work[ji], d__2 = -(*pivmin); work[ji] = min(d__1,d__2); } /* L50: */ } /* L60: */ } if (*ijob <= 2) { /* IJOB=2: Choose all intervals containing eigenvalues. */ klnew = kl; i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { /* Insure that N(w) is monotone */ /* Computing MIN */ /* Computing MAX */ i__5 = nab[ji + nab_dim1], i__6 = iwork[ji]; i__3 = nab[ji + (nab_dim1 << 1)], i__4 = max(i__5,i__6); iwork[ji] = min(i__3,i__4); /* Update the Queue -- add intervals if both halves */ /* contain eigenvalues. */ if (iwork[ji] == nab[ji + (nab_dim1 << 1)]) { /* No eigenvalue in the upper interval: */ /* just use the lower interval. */ ab[ji + (ab_dim1 << 1)] = c__[ji]; } else if (iwork[ji] == nab[ji + nab_dim1]) { /* No eigenvalue in the lower interval: */ /* just use the upper interval. */ ab[ji + ab_dim1] = c__[ji]; } else { ++klnew; if (klnew <= *mmax) { /* Eigenvalue in both intervals -- add upper to */ /* queue. */ ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << 1)]; nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 << 1)]; ab[klnew + ab_dim1] = c__[ji]; nab[klnew + nab_dim1] = iwork[ji]; ab[ji + (ab_dim1 << 1)] = c__[ji]; nab[ji + (nab_dim1 << 1)] = iwork[ji]; } else { *info = *mmax + 1; } } /* L70: */ } if (*info != 0) { return 0; } kl = klnew; } else { /* IJOB=3: Binary search. Keep only the interval containing */ /* w s.t. N(w) = NVAL */ i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { if (iwork[ji] <= nval[ji]) { ab[ji + ab_dim1] = c__[ji]; nab[ji + nab_dim1] = iwork[ji]; } if (iwork[ji] >= nval[ji]) { ab[ji + (ab_dim1 << 1)] = c__[ji]; nab[ji + (nab_dim1 << 1)] = iwork[ji]; } /* L80: */ } } } else { /* End of Parallel Version of the loop */ /* Begin of Serial Version of the loop */ klnew = kl; i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { /* Compute N(w), the number of eigenvalues less than w */ tmp1 = c__[ji]; tmp2 = d__[1] - tmp1; itmp1 = 0; if (tmp2 <= *pivmin) { itmp1 = 1; /* Computing MIN */ d__1 = tmp2, d__2 = -(*pivmin); tmp2 = min(d__1,d__2); } /* A series of compiler directives to defeat vectorization */ /* for the next loop */ /* $PL$ CMCHAR=' ' */ /* DIR$ NEXTSCALAR */ /* $DIR SCALAR */ /* DIR$ NEXT SCALAR */ /* VD$L NOVECTOR */ /* DEC$ NOVECTOR */ /* VD$ NOVECTOR */ /* VDIR NOVECTOR */ /* VOCL LOOP,SCALAR */ /* IBM PREFER SCALAR */ /* $PL$ CMCHAR='*' */ i__3 = *n; for (j = 2; j <= i__3; ++j) { tmp2 = d__[j] - e2[j - 1] / tmp2 - tmp1; if (tmp2 <= *pivmin) { ++itmp1; /* Computing MIN */ d__1 = tmp2, d__2 = -(*pivmin); tmp2 = min(d__1,d__2); } /* L90: */ } if (*ijob <= 2) { /* IJOB=2: Choose all intervals containing eigenvalues. */ /* Insure that N(w) is monotone */ /* Computing MIN */ /* Computing MAX */ i__5 = nab[ji + nab_dim1]; i__3 = nab[ji + (nab_dim1 << 1)], i__4 = max(i__5,itmp1); itmp1 = min(i__3,i__4); /* Update the Queue -- add intervals if both halves */ /* contain eigenvalues. */ if (itmp1 == nab[ji + (nab_dim1 << 1)]) { /* No eigenvalue in the upper interval: */ /* just use the lower interval. */ ab[ji + (ab_dim1 << 1)] = tmp1; } else if (itmp1 == nab[ji + nab_dim1]) { /* No eigenvalue in the lower interval: */ /* just use the upper interval. */ ab[ji + ab_dim1] = tmp1; } else if (klnew < *mmax) { /* Eigenvalue in both intervals -- add upper to queue. */ ++klnew; ab[klnew + (ab_dim1 << 1)] = ab[ji + (ab_dim1 << 1)]; nab[klnew + (nab_dim1 << 1)] = nab[ji + (nab_dim1 << 1)]; ab[klnew + ab_dim1] = tmp1; nab[klnew + nab_dim1] = itmp1; ab[ji + (ab_dim1 << 1)] = tmp1; nab[ji + (nab_dim1 << 1)] = itmp1; } else { *info = *mmax + 1; return 0; } } else { /* IJOB=3: Binary search. Keep only the interval */ /* containing w s.t. N(w) = NVAL */ if (itmp1 <= nval[ji]) { ab[ji + ab_dim1] = tmp1; nab[ji + nab_dim1] = itmp1; } if (itmp1 >= nval[ji]) { ab[ji + (ab_dim1 << 1)] = tmp1; nab[ji + (nab_dim1 << 1)] = itmp1; } } /* L100: */ } kl = klnew; /* End of Serial Version of the loop */ } /* Check for convergence */ kfnew = kf; i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { tmp1 = (d__1 = ab[ji + (ab_dim1 << 1)] - ab[ji + ab_dim1], abs( d__1)); /* Computing MAX */ d__3 = (d__1 = ab[ji + (ab_dim1 << 1)], abs(d__1)), d__4 = (d__2 = ab[ji + ab_dim1], abs(d__2)); tmp2 = max(d__3,d__4); /* Computing MAX */ d__1 = max(*abstol,*pivmin), d__2 = *reltol * tmp2; if (tmp1 < max(d__1,d__2) || nab[ji + nab_dim1] >= nab[ji + ( nab_dim1 << 1)]) { /* Converged -- Swap with position KFNEW, */ /* then increment KFNEW */ if (ji > kfnew) { tmp1 = ab[ji + ab_dim1]; tmp2 = ab[ji + (ab_dim1 << 1)]; itmp1 = nab[ji + nab_dim1]; itmp2 = nab[ji + (nab_dim1 << 1)]; ab[ji + ab_dim1] = ab[kfnew + ab_dim1]; ab[ji + (ab_dim1 << 1)] = ab[kfnew + (ab_dim1 << 1)]; nab[ji + nab_dim1] = nab[kfnew + nab_dim1]; nab[ji + (nab_dim1 << 1)] = nab[kfnew + (nab_dim1 << 1)]; ab[kfnew + ab_dim1] = tmp1; ab[kfnew + (ab_dim1 << 1)] = tmp2; nab[kfnew + nab_dim1] = itmp1; nab[kfnew + (nab_dim1 << 1)] = itmp2; if (*ijob == 3) { itmp1 = nval[ji]; nval[ji] = nval[kfnew]; nval[kfnew] = itmp1; } } ++kfnew; } /* L110: */ } kf = kfnew; /* Choose Midpoints */ i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { c__[ji] = (ab[ji + ab_dim1] + ab[ji + (ab_dim1 << 1)]) * .5; /* L120: */ } /* If no more intervals to refine, quit. */ if (kf > kl) { goto L140; } /* L130: */ } /* Converged */ L140: /* Computing MAX */ i__1 = kl + 1 - kf; *info = max(i__1,0); *mout = kl; return 0; /* End of DLAEBZ */ } /* _starpu_dlaebz_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed0.c000066400000000000000000000323471507764646700205360ustar00rootroot00000000000000/* dlaed0.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__9 = 9; static integer c__0 = 0; static integer c__2 = 2; static doublereal c_b23 = 1.; static doublereal c_b24 = 0.; static integer c__1 = 1; /* Subroutine */ int _starpu_dlaed0_(integer *icompq, integer *qsiz, integer *n, doublereal *d__, doublereal *e, doublereal *q, integer *ldq, doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double log(doublereal); integer pow_ii(integer *, integer *); /* Local variables */ integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2; doublereal temp; integer curr; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iperm; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer indxq, iwrem; extern /* Subroutine */ int _starpu_dlaed1_(integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); integer iqptr; extern /* Subroutine */ int _starpu_dlaed7_(integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, integer *); integer tlvls; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); integer igivcl; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer igivnm, submat, curprb, subpbs, igivpt; extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer curlvl, matsiz, iprmpt, smlsiz; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAED0 computes all eigenvalues and corresponding eigenvectors of a */ /* symmetric tridiagonal matrix using the divide and conquer method. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* = 0: Compute eigenvalues only. */ /* = 1: Compute eigenvectors of original dense symmetric matrix */ /* also. On entry, Q contains the orthogonal matrix used */ /* to reduce the original matrix to tridiagonal form. */ /* = 2: Compute eigenvalues and eigenvectors of tridiagonal */ /* matrix. */ /* QSIZ (input) INTEGER */ /* The dimension of the orthogonal matrix used to reduce */ /* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the main diagonal of the tridiagonal matrix. */ /* On exit, its eigenvalues. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The off-diagonal elements of the tridiagonal matrix. */ /* On exit, E has been destroyed. */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ /* On entry, Q must contain an N-by-N orthogonal matrix. */ /* If ICOMPQ = 0 Q is not referenced. */ /* If ICOMPQ = 1 On entry, Q is a subset of the columns of the */ /* orthogonal matrix used to reduce the full */ /* matrix to tridiagonal form corresponding to */ /* the subset of the full matrix which is being */ /* decomposed at this time. */ /* If ICOMPQ = 2 On entry, Q will be the identity matrix. */ /* On exit, Q contains the eigenvectors of the */ /* tridiagonal matrix. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. If eigenvectors are */ /* desired, then LDQ >= max(1,N). In any case, LDQ >= 1. */ /* QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N) */ /* Referenced only when ICOMPQ = 1. Used to store parts of */ /* the eigenvector matrix when the updating matrix multiplies */ /* take place. */ /* LDQS (input) INTEGER */ /* The leading dimension of the array QSTORE. If ICOMPQ = 1, */ /* then LDQS >= max(1,N). In any case, LDQS >= 1. */ /* WORK (workspace) DOUBLE PRECISION array, */ /* If ICOMPQ = 0 or 1, the dimension of WORK must be at least */ /* 1 + 3*N + 2*N*lg N + 2*N**2 */ /* ( lg( N ) = smallest integer k */ /* such that 2^k >= N ) */ /* If ICOMPQ = 2, the dimension of WORK must be at least */ /* 4*N + N**2. */ /* IWORK (workspace) INTEGER array, */ /* If ICOMPQ = 0 or 1, the dimension of IWORK must be at least */ /* 6 + 6*N + 5*N*lg N. */ /* ( lg( N ) = smallest integer k */ /* such that 2^k >= N ) */ /* If ICOMPQ = 2, the dimension of IWORK must be at least */ /* 3 + 5*N. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: The algorithm failed to compute an eigenvalue while */ /* working on the submatrix lying in rows and columns */ /* INFO/(N+1) through mod(INFO,N+1). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; qstore_dim1 = *ldqs; qstore_offset = 1 + qstore_dim1; qstore -= qstore_offset; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 2) { *info = -1; } else if (*icompq == 1 && *qsiz < max(0,*n)) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ldq < max(1,*n)) { *info = -7; } else if (*ldqs < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAED0", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } smlsiz = _starpu_ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0); /* Determine the size and placement of the submatrices, and save in */ /* the leading elements of IWORK. */ iwork[1] = *n; subpbs = 1; tlvls = 0; L10: if (iwork[subpbs] > smlsiz) { for (j = subpbs; j >= 1; --j) { iwork[j * 2] = (iwork[j] + 1) / 2; iwork[(j << 1) - 1] = iwork[j] / 2; /* L20: */ } ++tlvls; subpbs <<= 1; goto L10; } i__1 = subpbs; for (j = 2; j <= i__1; ++j) { iwork[j] += iwork[j - 1]; /* L30: */ } /* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 */ /* using rank-1 modifications (cuts). */ spm1 = subpbs - 1; i__1 = spm1; for (i__ = 1; i__ <= i__1; ++i__) { submat = iwork[i__] + 1; smm1 = submat - 1; d__[smm1] -= (d__1 = e[smm1], abs(d__1)); d__[submat] -= (d__1 = e[smm1], abs(d__1)); /* L40: */ } indxq = (*n << 2) + 3; if (*icompq != 2) { /* Set up workspaces for eigenvalues only/accumulate new vectors */ /* routine */ temp = log((doublereal) (*n)) / log(2.); lgn = (integer) temp; if (pow_ii(&c__2, &lgn) < *n) { ++lgn; } if (pow_ii(&c__2, &lgn) < *n) { ++lgn; } iprmpt = indxq + *n + 1; iperm = iprmpt + *n * lgn; iqptr = iperm + *n * lgn; igivpt = iqptr + *n + 2; igivcl = igivpt + *n * lgn; igivnm = 1; iq = igivnm + (*n << 1) * lgn; /* Computing 2nd power */ i__1 = *n; iwrem = iq + i__1 * i__1 + 1; /* Initialize pointers */ i__1 = subpbs; for (i__ = 0; i__ <= i__1; ++i__) { iwork[iprmpt + i__] = 1; iwork[igivpt + i__] = 1; /* L50: */ } iwork[iqptr] = 1; } /* Solve each submatrix eigenproblem at the bottom of the divide and */ /* conquer tree. */ curr = 0; i__1 = spm1; for (i__ = 0; i__ <= i__1; ++i__) { if (i__ == 0) { submat = 1; matsiz = iwork[1]; } else { submat = iwork[i__] + 1; matsiz = iwork[i__ + 1] - iwork[i__]; } if (*icompq == 2) { _starpu_dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat + submat * q_dim1], ldq, &work[1], info); if (*info != 0) { goto L130; } } else { _starpu_dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + iwork[iqptr + curr]], &matsiz, &work[1], info); if (*info != 0) { goto L130; } if (*icompq == 1) { _starpu_dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b23, &q[submat * q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]], &matsiz, &c_b24, &qstore[submat * qstore_dim1 + 1], ldqs); } /* Computing 2nd power */ i__2 = matsiz; iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; ++curr; } k = 1; i__2 = iwork[i__ + 1]; for (j = submat; j <= i__2; ++j) { iwork[indxq + j] = k; ++k; /* L60: */ } /* L70: */ } /* Successively merge eigensystems of adjacent submatrices */ /* into eigensystem for the corresponding larger matrix. */ /* while ( SUBPBS > 1 ) */ curlvl = 1; L80: if (subpbs > 1) { spm2 = subpbs - 2; i__1 = spm2; for (i__ = 0; i__ <= i__1; i__ += 2) { if (i__ == 0) { submat = 1; matsiz = iwork[2]; msd2 = iwork[1]; curprb = 0; } else { submat = iwork[i__] + 1; matsiz = iwork[i__ + 2] - iwork[i__]; msd2 = matsiz / 2; ++curprb; } /* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) */ /* into an eigensystem of size MATSIZ. */ /* DLAED1 is used only for the full eigensystem of a tridiagonal */ /* matrix. */ /* DLAED7 handles the cases in which eigenvalues only or eigenvalues */ /* and eigenvectors of a full symmetric matrix (which was reduced to */ /* tridiagonal form) are desired. */ if (*icompq == 2) { _starpu_dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], & msd2, &work[1], &iwork[subpbs + 1], info); } else { _starpu_dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ submat], &qstore[submat * qstore_dim1 + 1], ldqs, & iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm] , &iwork[igivpt], &iwork[igivcl], &work[igivnm], & work[iwrem], &iwork[subpbs + 1], info); } if (*info != 0) { goto L130; } iwork[i__ / 2 + 1] = iwork[i__ + 2]; /* L90: */ } subpbs /= 2; ++curlvl; goto L80; } /* end while */ /* Re-merge the eigenvalues/vectors which were deflated at the final */ /* merge step. */ if (*icompq == 1) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { j = iwork[indxq + i__]; work[i__] = d__[j]; _starpu_dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1], &c__1); /* L100: */ } _starpu_dcopy_(n, &work[1], &c__1, &d__[1], &c__1); } else if (*icompq == 2) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { j = iwork[indxq + i__]; work[i__] = d__[j]; _starpu_dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1); /* L110: */ } _starpu_dcopy_(n, &work[1], &c__1, &d__[1], &c__1); _starpu_dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq); } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { j = iwork[indxq + i__]; work[i__] = d__[j]; /* L120: */ } _starpu_dcopy_(n, &work[1], &c__1, &d__[1], &c__1); } goto L140; L130: *info = submat * (*n + 1) + submat + matsiz - 1; L140: return 0; /* End of DLAED0 */ } /* _starpu_dlaed0_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed1.c000066400000000000000000000177201507764646700205350ustar00rootroot00000000000000/* dlaed1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dlaed1_(integer *n, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer q_dim1, q_offset, i__1, i__2; /* Local variables */ integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer indxp; extern /* Subroutine */ int _starpu_dlaed2_(integer *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, integer *), _starpu_dlaed3_(integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *); integer idlmda; extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); integer coltyp; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAED1 computes the updated eigensystem of a diagonal */ /* matrix after modification by a rank-one symmetric matrix. This */ /* routine is used only for the eigenproblem which requires all */ /* eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles */ /* the case in which eigenvalues only or eigenvalues and eigenvectors */ /* of a full symmetric matrix (which was reduced to tridiagonal form) */ /* are desired. */ /* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) */ /* where Z = Q'u, u is a vector of length N with ones in the */ /* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. */ /* The eigenvectors of the original matrix are stored in Q, and the */ /* eigenvalues are in D. The algorithm consists of three stages: */ /* The first stage consists of deflating the size of the problem */ /* when there are multiple eigenvalues or if there is a zero in */ /* the Z vector. For each such occurence the dimension of the */ /* secular equation problem is reduced by one. This stage is */ /* performed by the routine DLAED2. */ /* The second stage consists of calculating the updated */ /* eigenvalues. This is done by finding the roots of the secular */ /* equation via the routine DLAED4 (as called by DLAED3). */ /* This routine also calculates the eigenvectors of the current */ /* problem. */ /* The final stage consists of computing the updated eigenvectors */ /* directly using the updated eigenvalues. The eigenvectors for */ /* the current problem are multiplied with the eigenvectors from */ /* the overall problem. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the eigenvalues of the rank-1-perturbed matrix. */ /* On exit, the eigenvalues of the repaired matrix. */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* On entry, the eigenvectors of the rank-1-perturbed matrix. */ /* On exit, the eigenvectors of the repaired tridiagonal matrix. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* INDXQ (input/output) INTEGER array, dimension (N) */ /* On entry, the permutation which separately sorts the two */ /* subproblems in D into ascending order. */ /* On exit, the permutation which will reintegrate the */ /* subproblems back into sorted order, */ /* i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. */ /* RHO (input) DOUBLE PRECISION */ /* The subdiagonal entry used to create the rank-1 modification. */ /* CUTPNT (input) INTEGER */ /* The location of the last eigenvalue in the leading sub-matrix. */ /* min(1,N) <= CUTPNT <= N/2. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N + N**2) */ /* IWORK (workspace) INTEGER array, dimension (4*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an eigenvalue did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* Modified by Francoise Tisseur, University of Tennessee. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --indxq; --work; --iwork; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*ldq < max(1,*n)) { *info = -4; } else /* if(complicated condition) */ { /* Computing MIN */ i__1 = 1, i__2 = *n / 2; if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { *info = -7; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAED1", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* The following values are integer pointers which indicate */ /* the portion of the workspace */ /* used by a particular array in DLAED2 and DLAED3. */ iz = 1; idlmda = iz + *n; iw = idlmda + *n; iq2 = iw + *n; indx = 1; indxc = indx + *n; coltyp = indxc + *n; indxp = coltyp + *n; /* Form the z-vector which consists of the last row of Q_1 and the */ /* first row of Q_2. */ _starpu_dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1); zpp1 = *cutpnt + 1; i__1 = *n - *cutpnt; _starpu_dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1); /* Deflate eigenvalues. */ _starpu_dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ indxc], &iwork[indxp], &iwork[coltyp], info); if (*info != 0) { goto L20; } /* Solve Secular Equation. */ if (k != 0) { is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; _starpu_dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ is], info); if (*info != 0) { goto L20; } /* Prepare the INDXQ sorting permutation. */ n1 = k; n2 = *n - k; _starpu_dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { indxq[i__] = i__; /* L10: */ } } L20: return 0; /* End of DLAED1 */ } /* _starpu_dlaed1_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed2.c000066400000000000000000000360461507764646700205400ustar00rootroot00000000000000/* dlaed2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b3 = -1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dlaed2_(integer *k, integer *n, integer *n1, doublereal * d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, integer *indx, integer *indxc, integer *indxp, integer *coltyp, integer *info) { /* System generated locals */ integer q_dim1, q_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal c__; integer i__, j; doublereal s, t; integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; doublereal eps, tau, tol; integer psm[4], imax, jmax; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer ctot[4]; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAED2 merges the two sets of eigenvalues together into a single */ /* sorted set. Then it tries to deflate the size of the problem. */ /* There are two ways in which deflation can occur: when two or more */ /* eigenvalues are close together or if there is a tiny entry in the */ /* Z vector. For each such occurrence the order of the related secular */ /* equation problem is reduced by one. */ /* Arguments */ /* ========= */ /* K (output) INTEGER */ /* The number of non-deflated eigenvalues, and the order of the */ /* related secular equation. 0 <= K <=N. */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* N1 (input) INTEGER */ /* The location of the last eigenvalue in the leading sub-matrix. */ /* min(1,N) <= N1 <= N/2. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, D contains the eigenvalues of the two submatrices to */ /* be combined. */ /* On exit, D contains the trailing (N-K) updated eigenvalues */ /* (those which were deflated) sorted into increasing order. */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ /* On entry, Q contains the eigenvectors of two submatrices in */ /* the two square blocks with corners at (1,1), (N1,N1) */ /* and (N1+1, N1+1), (N,N). */ /* On exit, Q contains the trailing (N-K) updated eigenvectors */ /* (those which were deflated) in its last N-K columns. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* INDXQ (input/output) INTEGER array, dimension (N) */ /* The permutation which separately sorts the two sub-problems */ /* in D into ascending order. Note that elements in the second */ /* half of this permutation must first have N1 added to their */ /* values. Destroyed on exit. */ /* RHO (input/output) DOUBLE PRECISION */ /* On entry, the off-diagonal element associated with the rank-1 */ /* cut which originally split the two submatrices which are now */ /* being recombined. */ /* On exit, RHO has been modified to the value required by */ /* DLAED3. */ /* Z (input) DOUBLE PRECISION array, dimension (N) */ /* On entry, Z contains the updating vector (the last */ /* row of the first sub-eigenvector matrix and the first row of */ /* the second sub-eigenvector matrix). */ /* On exit, the contents of Z have been destroyed by the updating */ /* process. */ /* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */ /* A copy of the first K eigenvalues which will be used by */ /* DLAED3 to form the secular equation. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* The first k values of the final deflation-altered z-vector */ /* which will be passed to DLAED3. */ /* Q2 (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) */ /* A copy of the first K eigenvectors which will be used by */ /* DLAED3 in a matrix multiply (DGEMM) to solve for the new */ /* eigenvectors. */ /* INDX (workspace) INTEGER array, dimension (N) */ /* The permutation used to sort the contents of DLAMDA into */ /* ascending order. */ /* INDXC (output) INTEGER array, dimension (N) */ /* The permutation used to arrange the columns of the deflated */ /* Q matrix into three groups: the first group contains non-zero */ /* elements only at and above N1, the second contains */ /* non-zero elements only below N1, and the third is dense. */ /* INDXP (workspace) INTEGER array, dimension (N) */ /* The permutation used to place deflated values of D at the end */ /* of the array. INDXP(1:K) points to the nondeflated D-values */ /* and INDXP(K+1:N) points to the deflated eigenvalues. */ /* COLTYP (workspace/output) INTEGER array, dimension (N) */ /* During execution, a label which will indicate which of the */ /* following types a column in the Q2 matrix is: */ /* 1 : non-zero in the upper half only; */ /* 2 : dense; */ /* 3 : non-zero in the lower half only; */ /* 4 : deflated. */ /* On exit, COLTYP(i) is the number of columns of type i, */ /* for i=1 to 4 only. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* Modified by Francoise Tisseur, University of Tennessee. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --indxq; --z__; --dlamda; --w; --q2; --indx; --indxc; --indxp; --coltyp; /* Function Body */ *info = 0; if (*n < 0) { *info = -2; } else if (*ldq < max(1,*n)) { *info = -6; } else /* if(complicated condition) */ { /* Computing MIN */ i__1 = 1, i__2 = *n / 2; if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { *info = -3; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAED2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } n2 = *n - *n1; n1p1 = *n1 + 1; if (*rho < 0.) { _starpu_dscal_(&n2, &c_b3, &z__[n1p1], &c__1); } /* Normalize z so that norm(z) = 1. Since z is the concatenation of */ /* two normalized vectors, norm2(z) = sqrt(2). */ t = 1. / sqrt(2.); _starpu_dscal_(n, &t, &z__[1], &c__1); /* RHO = ABS( norm(z)**2 * RHO ) */ *rho = (d__1 = *rho * 2., abs(d__1)); /* Sort the eigenvalues into increasing order */ i__1 = *n; for (i__ = n1p1; i__ <= i__1; ++i__) { indxq[i__] += *n1; /* L10: */ } /* re-integrate the deflated parts from the last pass */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = d__[indxq[i__]]; /* L20: */ } _starpu_dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { indx[i__] = indxq[indxc[i__]]; /* L30: */ } /* Calculate the allowable deflation tolerance */ imax = _starpu_idamax_(n, &z__[1], &c__1); jmax = _starpu_idamax_(n, &d__[1], &c__1); eps = _starpu_dlamch_("Epsilon"); /* Computing MAX */ d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2)) ; tol = eps * 8. * max(d__3,d__4); /* If the rank-1 modifier is small enough, no more needs to be done */ /* except to reorganize Q so that its columns correspond with the */ /* elements in D. */ if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { *k = 0; iq2 = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__ = indx[j]; _starpu_dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); dlamda[j] = d__[i__]; iq2 += *n; /* L40: */ } _starpu_dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); _starpu_dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); goto L190; } /* If there are multiple eigenvalues then the problem deflates. Here */ /* the number of equal eigenvalues are found. As each equal */ /* eigenvalue is found, an elementary reflector is computed to rotate */ /* the corresponding eigensubspace so that the corresponding */ /* components of Z are zero in this new basis. */ i__1 = *n1; for (i__ = 1; i__ <= i__1; ++i__) { coltyp[i__] = 1; /* L50: */ } i__1 = *n; for (i__ = n1p1; i__ <= i__1; ++i__) { coltyp[i__] = 3; /* L60: */ } *k = 0; k2 = *n + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { nj = indx[j]; if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { /* Deflate due to small z component. */ --k2; coltyp[nj] = 4; indxp[k2] = nj; if (j == *n) { goto L100; } } else { pj = nj; goto L80; } /* L70: */ } L80: ++j; nj = indx[j]; if (j > *n) { goto L100; } if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { /* Deflate due to small z component. */ --k2; coltyp[nj] = 4; indxp[k2] = nj; } else { /* Check if eigenvalues are close enough to allow deflation. */ s = z__[pj]; c__ = z__[nj]; /* Find sqrt(a**2+b**2) without overflow or */ /* destructive underflow. */ tau = _starpu_dlapy2_(&c__, &s); t = d__[nj] - d__[pj]; c__ /= tau; s = -s / tau; if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { /* Deflation is possible. */ z__[nj] = tau; z__[pj] = 0.; if (coltyp[nj] != coltyp[pj]) { coltyp[nj] = 2; } coltyp[pj] = 4; _starpu_drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & c__, &s); /* Computing 2nd power */ d__1 = c__; /* Computing 2nd power */ d__2 = s; t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); /* Computing 2nd power */ d__1 = s; /* Computing 2nd power */ d__2 = c__; d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); d__[pj] = t; --k2; i__ = 1; L90: if (k2 + i__ <= *n) { if (d__[pj] < d__[indxp[k2 + i__]]) { indxp[k2 + i__ - 1] = indxp[k2 + i__]; indxp[k2 + i__] = pj; ++i__; goto L90; } else { indxp[k2 + i__ - 1] = pj; } } else { indxp[k2 + i__ - 1] = pj; } pj = nj; } else { ++(*k); dlamda[*k] = d__[pj]; w[*k] = z__[pj]; indxp[*k] = pj; pj = nj; } } goto L80; L100: /* Record the last eigenvalue. */ ++(*k); dlamda[*k] = d__[pj]; w[*k] = z__[pj]; indxp[*k] = pj; /* Count up the total number of the various types of columns, then */ /* form a permutation which positions the four column types into */ /* four uniform groups (although one or more of these groups may be */ /* empty). */ for (j = 1; j <= 4; ++j) { ctot[j - 1] = 0; /* L110: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { ct = coltyp[j]; ++ctot[ct - 1]; /* L120: */ } /* PSM(*) = Position in SubMatrix (of types 1 through 4) */ psm[0] = 1; psm[1] = ctot[0] + 1; psm[2] = psm[1] + ctot[1]; psm[3] = psm[2] + ctot[2]; *k = *n - ctot[3]; /* Fill out the INDXC array so that the permutation which it induces */ /* will place all type-1 columns first, all type-2 columns next, */ /* then all type-3's, and finally all type-4's. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { js = indxp[j]; ct = coltyp[js]; indx[psm[ct - 1]] = js; indxc[psm[ct - 1]] = j; ++psm[ct - 1]; /* L130: */ } /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ /* and Q2 respectively. The eigenvalues/vectors which were not */ /* deflated go into the first K slots of DLAMDA and Q2 respectively, */ /* while those which were deflated go into the last N - K slots. */ i__ = 1; iq1 = 1; iq2 = (ctot[0] + ctot[1]) * *n1 + 1; i__1 = ctot[0]; for (j = 1; j <= i__1; ++j) { js = indx[i__]; _starpu_dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); z__[i__] = d__[js]; ++i__; iq1 += *n1; /* L140: */ } i__1 = ctot[1]; for (j = 1; j <= i__1; ++j) { js = indx[i__]; _starpu_dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); _starpu_dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); z__[i__] = d__[js]; ++i__; iq1 += *n1; iq2 += n2; /* L150: */ } i__1 = ctot[2]; for (j = 1; j <= i__1; ++j) { js = indx[i__]; _starpu_dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); z__[i__] = d__[js]; ++i__; iq2 += n2; /* L160: */ } iq1 = iq2; i__1 = ctot[3]; for (j = 1; j <= i__1; ++j) { js = indx[i__]; _starpu_dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); iq2 += *n; z__[i__] = d__[js]; ++i__; /* L170: */ } /* The deflated eigenvalues and their corresponding vectors go back */ /* into the last N - K slots of D and Q respectively. */ _starpu_dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq); i__1 = *n - *k; _starpu_dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); /* Copy CTOT into COLTYP for referencing in DLAED3. */ for (j = 1; j <= 4; ++j) { coltyp[j] = ctot[j - 1]; /* L180: */ } L190: return 0; /* End of DLAED2 */ } /* _starpu_dlaed2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed3.c000066400000000000000000000252531507764646700205370ustar00rootroot00000000000000/* dlaed3.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b22 = 1.; static doublereal c_b23 = 0.; /* Subroutine */ int _starpu_dlaed3_(integer *k, integer *n, integer *n1, doublereal * d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, doublereal *q2, integer *indx, integer *ctot, doublereal *w, doublereal *s, integer *info) { /* System generated locals */ integer q_dim1, q_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ integer i__, j, n2, n12, ii, n23, iq2; doublereal temp; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaed4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAED3 finds the roots of the secular equation, as defined by the */ /* values in D, W, and RHO, between 1 and K. It makes the */ /* appropriate calls to DLAED4 and then updates the eigenvectors by */ /* multiplying the matrix of eigenvectors of the pair of eigensystems */ /* being combined by the matrix of eigenvectors of the K-by-K system */ /* which is solved here. */ /* This code makes very mild assumptions about floating point */ /* arithmetic. It will work on machines with a guard digit in */ /* add/subtract, or on those binary machines without guard digits */ /* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ /* It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* K (input) INTEGER */ /* The number of terms in the rational function to be solved by */ /* DLAED4. K >= 0. */ /* N (input) INTEGER */ /* The number of rows and columns in the Q matrix. */ /* N >= K (deflation may result in N>K). */ /* N1 (input) INTEGER */ /* The location of the last eigenvalue in the leading submatrix. */ /* min(1,N) <= N1 <= N/2. */ /* D (output) DOUBLE PRECISION array, dimension (N) */ /* D(I) contains the updated eigenvalues for */ /* 1 <= I <= K. */ /* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* Initially the first K columns are used as workspace. */ /* On output the columns 1 to K contain */ /* the updated eigenvectors. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* RHO (input) DOUBLE PRECISION */ /* The value of the parameter in the rank one update equation. */ /* RHO >= 0 required. */ /* DLAMDA (input/output) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. May be changed on output by */ /* having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */ /* Cray-2, or Cray C-90, as described above. */ /* Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N) */ /* The first K columns of this matrix contain the non-deflated */ /* eigenvectors for the split problem. */ /* INDX (input) INTEGER array, dimension (N) */ /* The permutation used to arrange the columns of the deflated */ /* Q matrix into three groups (see DLAED2). */ /* The rows of the eigenvectors found by DLAED4 must be likewise */ /* permuted before the matrix multiply can take place. */ /* CTOT (input) INTEGER array, dimension (4) */ /* A count of the total number of the various types of columns */ /* in Q, as described in INDX. The fourth column type is any */ /* column which has been deflated. */ /* W (input/output) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating vector. Destroyed on */ /* output. */ /* S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K */ /* Will contain the eigenvectors of the repaired matrix which */ /* will be multiplied by the previously accumulated eigenvectors */ /* to update the system. */ /* LDS (input) INTEGER */ /* The leading dimension of S. LDS >= max(1,K). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an eigenvalue did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* Modified by Francoise Tisseur, University of Tennessee. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --dlamda; --q2; --indx; --ctot; --w; --s; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*n < *k) { *info = -2; } else if (*ldq < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAED3", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DLAMDA(I) if it is 1; this makes the subsequent */ /* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DLAMDA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DLAMDA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = _starpu_dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; /* L10: */ } i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } /* L20: */ } if (*k == 1) { goto L110; } if (*k == 2) { i__1 = *k; for (j = 1; j <= i__1; ++j) { w[1] = q[j * q_dim1 + 1]; w[2] = q[j * q_dim1 + 2]; ii = indx[1]; q[j * q_dim1 + 1] = w[ii]; ii = indx[2]; q[j * q_dim1 + 2] = w[ii]; /* L30: */ } goto L110; } /* Compute updated W. */ _starpu_dcopy_(k, &w[1], &c__1, &s[1], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; _starpu_dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L40: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L50: */ } /* L60: */ } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = sqrt(-w[i__]); w[i__] = d_sign(&d__1, &s[i__]); /* L70: */ } /* Compute eigenvectors of the modified rank-1 modification. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s[i__] = w[i__] / q[i__ + j * q_dim1]; /* L80: */ } temp = _starpu_dnrm2_(k, &s[1], &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { ii = indx[i__]; q[i__ + j * q_dim1] = s[ii] / temp; /* L90: */ } /* L100: */ } /* Compute the updated eigenvectors. */ L110: n2 = *n - *n1; n12 = ctot[1] + ctot[2]; n23 = ctot[2] + ctot[3]; _starpu_dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); iq2 = *n1 * n12 + 1; if (n23 != 0) { _starpu_dgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, & c_b23, &q[*n1 + 1 + q_dim1], ldq); } else { _starpu_dlaset_("A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq); } _starpu_dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); if (n12 != 0) { _starpu_dgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, &q[q_offset], ldq); } else { _starpu_dlaset_("A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq); } L120: return 0; /* End of DLAED3 */ } /* _starpu_dlaed3_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed4.c000066400000000000000000000534431507764646700205420ustar00rootroot00000000000000/* dlaed4.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaed4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, integer *info) { /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal a, b, c__; integer j; doublereal w; integer ii; doublereal dw, zz[3]; integer ip1; doublereal del, eta, phi, eps, tau, psi; integer iim1, iip1; doublereal dphi, dpsi; integer iter; doublereal temp, prew, temp1, dltlb, dltub, midpt; integer niter; logical swtch; extern /* Subroutine */ int _starpu_dlaed5_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaed6_(integer *, logical *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); logical swtch3; extern doublereal _starpu_dlamch_(char *); logical orgati; doublereal erretm, rhoinv; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine computes the I-th updated eigenvalue of a symmetric */ /* rank-one modification to a diagonal matrix whose elements are */ /* given in the array d, and that */ /* D(i) < D(j) for i < j */ /* and that RHO > 0. This is arranged by the calling routine, and is */ /* no loss in generality. The rank-one modified system is thus */ /* diag( D ) + RHO * Z * Z_transpose. */ /* where we assume the Euclidean norm of Z is 1. */ /* The method consists of approximating the rational functions in the */ /* secular equation by simpler interpolating rational functions. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The length of all arrays. */ /* I (input) INTEGER */ /* The index of the eigenvalue to be computed. 1 <= I <= N. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The original eigenvalues. It is assumed that they are in */ /* order, D(I) < D(J) for I < J. */ /* Z (input) DOUBLE PRECISION array, dimension (N) */ /* The components of the updating vector. */ /* DELTA (output) DOUBLE PRECISION array, dimension (N) */ /* If N .GT. 2, DELTA contains (D(j) - lambda_I) in its j-th */ /* component. If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5 */ /* for detail. The vector DELTA contains the information necessary */ /* to construct the eigenvectors by DLAED3 and DLAED9. */ /* RHO (input) DOUBLE PRECISION */ /* The scalar in the symmetric updating formula. */ /* DLAM (output) DOUBLE PRECISION */ /* The computed lambda_I, the I-th updated eigenvalue. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* > 0: if INFO = 1, the updating process failed. */ /* Internal Parameters */ /* =================== */ /* Logical variable ORGATI (origin-at-i?) is used for distinguishing */ /* whether D(i) or D(i+1) is treated as the origin. */ /* ORGATI = .true. origin at i */ /* ORGATI = .false. origin at i+1 */ /* Logical variable SWTCH3 (switch-for-3-poles?) is for noting */ /* if we are working with THREE poles! */ /* MAXIT is the maximum number of iterations allowed for each */ /* eigenvalue. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ren-Cang Li, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Since this routine is called in an inner loop, we do no argument */ /* checking. */ /* Quick return for N=1 and 2. */ /* Parameter adjustments */ --delta; --z__; --d__; /* Function Body */ *info = 0; if (*n == 1) { /* Presumably, I=1 upon entry */ *dlam = d__[1] + *rho * z__[1] * z__[1]; delta[1] = 1.; return 0; } if (*n == 2) { _starpu_dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); return 0; } /* Compute machine epsilon */ eps = _starpu_dlamch_("Epsilon"); rhoinv = 1. / *rho; /* The case I = N */ if (*i__ == *n) { /* Initialize some basic variables */ ii = *n - 1; niter = 1; /* Calculate initial guess */ midpt = *rho / 2.; /* If ||Z||_2 is not one, then TEMP should be set to */ /* RHO * ||Z||_2^2 / TWO */ i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] = d__[j] - d__[*i__] - midpt; /* L10: */ } psi = 0.; i__1 = *n - 2; for (j = 1; j <= i__1; ++j) { psi += z__[j] * z__[j] / delta[j]; /* L20: */ } c__ = rhoinv + psi; w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* n]; if (w <= 0.) { temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) + z__[*n] * z__[*n] / *rho; if (c__ <= temp) { tau = *rho; } else { del = d__[*n] - d__[*n - 1]; a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] ; b = z__[*n] * z__[*n] * del; if (a < 0.) { tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); } else { tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); } } /* It can be proved that */ /* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO */ dltlb = midpt; dltub = *rho; } else { del = d__[*n] - d__[*n - 1]; a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; b = z__[*n] * z__[*n] * del; if (a < 0.) { tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); } else { tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); } /* It can be proved that */ /* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 */ dltlb = 0.; dltub = midpt; } i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] = d__[j] - d__[*i__] - tau; /* L30: */ } /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = ii; for (j = 1; j <= i__1; ++j) { temp = z__[j] / delta[j]; psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L40: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ temp = z__[*n] / delta[*n]; phi = z__[*n] * temp; dphi = temp * temp; erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi); w = rhoinv + phi + psi; /* Test for convergence */ if (abs(w) <= eps * erretm) { *dlam = d__[*i__] + tau; goto L250; } if (w <= 0.) { dltlb = max(dltlb,tau); } else { dltub = min(dltub,tau); } /* Calculate the new step */ ++niter; c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( dpsi + dphi); b = delta[*n - 1] * delta[*n] * w; if (c__ < 0.) { c__ = abs(c__); } if (c__ == 0.) { /* ETA = B/A */ /* ETA = RHO - TAU */ eta = dltub - tau; } else if (a >= 0.) { eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.); } else { eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) ); } /* Note, eta should be positive if w is negative, and */ /* eta should be negative otherwise. However, */ /* if for some reason caused by roundoff, eta*w > 0, */ /* we simply use one Newton step instead. This way */ /* will guarantee eta*w < 0. */ if (w * eta > 0.) { eta = -w / (dpsi + dphi); } temp = tau + eta; if (temp > dltub || temp < dltlb) { if (w < 0.) { eta = (dltub - tau) / 2.; } else { eta = (dltlb - tau) / 2.; } } i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] -= eta; /* L50: */ } tau += eta; /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = ii; for (j = 1; j <= i__1; ++j) { temp = z__[j] / delta[j]; psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L60: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ temp = z__[*n] / delta[*n]; phi = z__[*n] * temp; dphi = temp * temp; erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi); w = rhoinv + phi + psi; /* Main loop to update the values of the array DELTA */ iter = niter + 1; for (niter = iter; niter <= 30; ++niter) { /* Test for convergence */ if (abs(w) <= eps * erretm) { *dlam = d__[*i__] + tau; goto L250; } if (w <= 0.) { dltlb = max(dltlb,tau); } else { dltub = min(dltub,tau); } /* Calculate the new step */ c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (dpsi + dphi); b = delta[*n - 1] * delta[*n] * w; if (a >= 0.) { eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( c__ * 2.); } else { eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( d__1)))); } /* Note, eta should be positive if w is negative, and */ /* eta should be negative otherwise. However, */ /* if for some reason caused by roundoff, eta*w > 0, */ /* we simply use one Newton step instead. This way */ /* will guarantee eta*w < 0. */ if (w * eta > 0.) { eta = -w / (dpsi + dphi); } temp = tau + eta; if (temp > dltub || temp < dltlb) { if (w < 0.) { eta = (dltub - tau) / 2.; } else { eta = (dltlb - tau) / 2.; } } i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] -= eta; /* L70: */ } tau += eta; /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = ii; for (j = 1; j <= i__1; ++j) { temp = z__[j] / delta[j]; psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L80: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ temp = z__[*n] / delta[*n]; phi = z__[*n] * temp; dphi = temp * temp; erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( dpsi + dphi); w = rhoinv + phi + psi; /* L90: */ } /* Return with INFO = 1, NITER = MAXIT and not converged */ *info = 1; *dlam = d__[*i__] + tau; goto L250; /* End for the case I = N */ } else { /* The case for I < N */ niter = 1; ip1 = *i__ + 1; /* Calculate initial guess */ del = d__[ip1] - d__[*i__]; midpt = del / 2.; i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] = d__[j] - d__[*i__] - midpt; /* L100: */ } psi = 0.; i__1 = *i__ - 1; for (j = 1; j <= i__1; ++j) { psi += z__[j] * z__[j] / delta[j]; /* L110: */ } phi = 0.; i__1 = *i__ + 2; for (j = *n; j >= i__1; --j) { phi += z__[j] * z__[j] / delta[j]; /* L120: */ } c__ = rhoinv + psi + phi; w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / delta[ip1]; if (w > 0.) { /* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 */ /* We choose d(i) as origin. */ orgati = TRUE_; a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; b = z__[*i__] * z__[*i__] * del; if (a > 0.) { tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( d__1)))); } else { tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( c__ * 2.); } dltlb = 0.; dltub = midpt; } else { /* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) */ /* We choose d(i+1) as origin. */ orgati = FALSE_; a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; b = z__[ip1] * z__[ip1] * del; if (a < 0.) { tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( d__1)))); } else { tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / (c__ * 2.); } dltlb = -midpt; dltub = 0.; } if (orgati) { i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] = d__[j] - d__[*i__] - tau; /* L130: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] = d__[j] - d__[ip1] - tau; /* L140: */ } } if (orgati) { ii = *i__; } else { ii = *i__ + 1; } iim1 = ii - 1; iip1 = ii + 1; /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = iim1; for (j = 1; j <= i__1; ++j) { temp = z__[j] / delta[j]; psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L150: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ dphi = 0.; phi = 0.; i__1 = iip1; for (j = *n; j >= i__1; --j) { temp = z__[j] / delta[j]; phi += z__[j] * temp; dphi += temp * temp; erretm += phi; /* L160: */ } w = rhoinv + phi + psi; /* W is the value of the secular function with */ /* its ii-th element removed. */ swtch3 = FALSE_; if (orgati) { if (w < 0.) { swtch3 = TRUE_; } } else { if (w > 0.) { swtch3 = TRUE_; } } if (ii == 1 || ii == *n) { swtch3 = FALSE_; } temp = z__[ii] / delta[ii]; dw = dpsi + dphi + temp * temp; temp = z__[ii] * temp; w += temp; erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw; /* Test for convergence */ if (abs(w) <= eps * erretm) { if (orgati) { *dlam = d__[*i__] + tau; } else { *dlam = d__[ip1] + tau; } goto L250; } if (w <= 0.) { dltlb = max(dltlb,tau); } else { dltub = min(dltub,tau); } /* Calculate the new step */ ++niter; if (! swtch3) { if (orgati) { /* Computing 2nd power */ d__1 = z__[*i__] / delta[*i__]; c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * d__1); } else { /* Computing 2nd power */ d__1 = z__[ip1] / delta[ip1]; c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1); } a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw; b = delta[*i__] * delta[ip1] * w; if (c__ == 0.) { if (a == 0.) { if (orgati) { a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi); } else { a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * (dpsi + dphi); } } eta = b / a; } else if (a <= 0.) { eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( c__ * 2.); } else { eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( d__1)))); } } else { /* Interpolation using THREE most relevant poles */ temp = rhoinv + psi + phi; if (orgati) { temp1 = z__[iim1] / delta[iim1]; temp1 *= temp1; c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ iip1]) * temp1; zz[0] = z__[iim1] * z__[iim1]; zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); } else { temp1 = z__[iip1] / delta[iip1]; temp1 *= temp1; c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ iim1]) * temp1; zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); zz[2] = z__[iip1] * z__[iip1]; } zz[1] = z__[ii] * z__[ii]; _starpu_dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); if (*info != 0) { goto L250; } } /* Note, eta should be positive if w is negative, and */ /* eta should be negative otherwise. However, */ /* if for some reason caused by roundoff, eta*w > 0, */ /* we simply use one Newton step instead. This way */ /* will guarantee eta*w < 0. */ if (w * eta >= 0.) { eta = -w / dw; } temp = tau + eta; if (temp > dltub || temp < dltlb) { if (w < 0.) { eta = (dltub - tau) / 2.; } else { eta = (dltlb - tau) / 2.; } } prew = w; i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] -= eta; /* L180: */ } /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = iim1; for (j = 1; j <= i__1; ++j) { temp = z__[j] / delta[j]; psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L190: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ dphi = 0.; phi = 0.; i__1 = iip1; for (j = *n; j >= i__1; --j) { temp = z__[j] / delta[j]; phi += z__[j] * temp; dphi += temp * temp; erretm += phi; /* L200: */ } temp = z__[ii] / delta[ii]; dw = dpsi + dphi + temp * temp; temp = z__[ii] * temp; w = rhoinv + phi + psi + temp; erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + ( d__1 = tau + eta, abs(d__1)) * dw; swtch = FALSE_; if (orgati) { if (-w > abs(prew) / 10.) { swtch = TRUE_; } } else { if (w > abs(prew) / 10.) { swtch = TRUE_; } } tau += eta; /* Main loop to update the values of the array DELTA */ iter = niter + 1; for (niter = iter; niter <= 30; ++niter) { /* Test for convergence */ if (abs(w) <= eps * erretm) { if (orgati) { *dlam = d__[*i__] + tau; } else { *dlam = d__[ip1] + tau; } goto L250; } if (w <= 0.) { dltlb = max(dltlb,tau); } else { dltub = min(dltub,tau); } /* Calculate the new step */ if (! swtch3) { if (! swtch) { if (orgati) { /* Computing 2nd power */ d__1 = z__[*i__] / delta[*i__]; c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( d__1 * d__1); } else { /* Computing 2nd power */ d__1 = z__[ip1] / delta[ip1]; c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * d__1); } } else { temp = z__[ii] / delta[ii]; if (orgati) { dpsi += temp * temp; } else { dphi += temp * temp; } c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; } a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * dw; b = delta[*i__] * delta[ip1] * w; if (c__ == 0.) { if (a == 0.) { if (! swtch) { if (orgati) { a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * (dpsi + dphi); } else { a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ *i__] * (dpsi + dphi); } } else { a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] * delta[ip1] * dphi; } } eta = b / a; } else if (a <= 0.) { eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.); } else { eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))); } } else { /* Interpolation using THREE most relevant poles */ temp = rhoinv + psi + phi; if (swtch) { c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; zz[0] = delta[iim1] * delta[iim1] * dpsi; zz[2] = delta[iip1] * delta[iip1] * dphi; } else { if (orgati) { temp1 = z__[iim1] / delta[iim1]; temp1 *= temp1; c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * temp1; zz[0] = z__[iim1] * z__[iim1]; zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); } else { temp1 = z__[iip1] / delta[iip1]; temp1 *= temp1; c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * temp1; zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); zz[2] = z__[iip1] * z__[iip1]; } } _starpu_dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); if (*info != 0) { goto L250; } } /* Note, eta should be positive if w is negative, and */ /* eta should be negative otherwise. However, */ /* if for some reason caused by roundoff, eta*w > 0, */ /* we simply use one Newton step instead. This way */ /* will guarantee eta*w < 0. */ if (w * eta >= 0.) { eta = -w / dw; } temp = tau + eta; if (temp > dltub || temp < dltlb) { if (w < 0.) { eta = (dltub - tau) / 2.; } else { eta = (dltlb - tau) / 2.; } } i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] -= eta; /* L210: */ } tau += eta; prew = w; /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = iim1; for (j = 1; j <= i__1; ++j) { temp = z__[j] / delta[j]; psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L220: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ dphi = 0.; phi = 0.; i__1 = iip1; for (j = *n; j >= i__1; --j) { temp = z__[j] / delta[j]; phi += z__[j] * temp; dphi += temp * temp; erretm += phi; /* L230: */ } temp = z__[ii] / delta[ii]; dw = dpsi + dphi + temp * temp; temp = z__[ii] * temp; w = rhoinv + phi + psi + temp; erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw; if (w * prew > 0. && abs(w) > abs(prew) / 10.) { swtch = ! swtch; } /* L240: */ } /* Return with INFO = 1, NITER = MAXIT and not converged */ *info = 1; if (orgati) { *dlam = d__[*i__] + tau; } else { *dlam = d__[ip1] + tau; } } L250: return 0; /* End of DLAED4 */ } /* _starpu_dlaed4_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed5.c000066400000000000000000000100511507764646700205270ustar00rootroot00000000000000/* dlaed5.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaed5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam) { /* System generated locals */ doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal b, c__, w, del, tau, temp; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine computes the I-th eigenvalue of a symmetric rank-one */ /* modification of a 2-by-2 diagonal matrix */ /* diag( D ) + RHO * Z * transpose(Z) . */ /* The diagonal elements in the array D are assumed to satisfy */ /* D(i) < D(j) for i < j . */ /* We also assume RHO > 0 and that the Euclidean norm of the vector */ /* Z is one. */ /* Arguments */ /* ========= */ /* I (input) INTEGER */ /* The index of the eigenvalue to be computed. I = 1 or I = 2. */ /* D (input) DOUBLE PRECISION array, dimension (2) */ /* The original eigenvalues. We assume D(1) < D(2). */ /* Z (input) DOUBLE PRECISION array, dimension (2) */ /* The components of the updating vector. */ /* DELTA (output) DOUBLE PRECISION array, dimension (2) */ /* The vector DELTA contains the information necessary */ /* to construct the eigenvectors. */ /* RHO (input) DOUBLE PRECISION */ /* The scalar in the symmetric updating formula. */ /* DLAM (output) DOUBLE PRECISION */ /* The computed lambda_I, the I-th updated eigenvalue. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ren-Cang Li, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --delta; --z__; --d__; /* Function Body */ del = d__[2] - d__[1]; if (*i__ == 1) { w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.; if (w > 0.) { b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); c__ = *rho * z__[1] * z__[1] * del; /* B > ZERO, always */ tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); *dlam = d__[1] + tau; delta[1] = -z__[1] / tau; delta[2] = z__[2] / (del - tau); } else { b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); c__ = *rho * z__[2] * z__[2] * del; if (b > 0.) { tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); } else { tau = (b - sqrt(b * b + c__ * 4.)) / 2.; } *dlam = d__[2] + tau; delta[1] = -z__[1] / (del + tau); delta[2] = -z__[2] / tau; } temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); delta[1] /= temp; delta[2] /= temp; } else { /* Now I=2 */ b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); c__ = *rho * z__[2] * z__[2] * del; if (b > 0.) { tau = (b + sqrt(b * b + c__ * 4.)) / 2.; } else { tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); } *dlam = d__[2] + tau; delta[1] = -z__[1] / (del + tau); delta[2] = -z__[2] / tau; temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); delta[1] /= temp; delta[2] /= temp; } return 0; /* End OF DLAED5 */ } /* _starpu_dlaed5_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed6.c000066400000000000000000000230101507764646700205270ustar00rootroot00000000000000/* dlaed6.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaed6_(integer *kniter, logical *orgati, doublereal * rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * tau, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *); /* Local variables */ doublereal a, b, c__, f; integer i__; doublereal fc, df, ddf, lbd, eta, ubd, eps, base; integer iter; doublereal temp, temp1, temp2, temp3, temp4; logical scale; integer niter; doublereal small1, small2, sminv1, sminv2; extern doublereal _starpu_dlamch_(char *); doublereal dscale[3], sclfac, zscale[3], erretm, sclinv; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* February 2007 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAED6 computes the positive or negative root (closest to the origin) */ /* of */ /* z(1) z(2) z(3) */ /* f(x) = rho + --------- + ---------- + --------- */ /* d(1)-x d(2)-x d(3)-x */ /* It is assumed that */ /* if ORGATI = .true. the root is between d(2) and d(3); */ /* otherwise it is between d(1) and d(2) */ /* This routine will be called by DLAED4 when necessary. In most cases, */ /* the root sought is the smallest in magnitude, though it might not be */ /* in some extremely rare situations. */ /* Arguments */ /* ========= */ /* KNITER (input) INTEGER */ /* Refer to DLAED4 for its significance. */ /* ORGATI (input) LOGICAL */ /* If ORGATI is true, the needed root is between d(2) and */ /* d(3); otherwise it is between d(1) and d(2). See */ /* DLAED4 for further details. */ /* RHO (input) DOUBLE PRECISION */ /* Refer to the equation f(x) above. */ /* D (input) DOUBLE PRECISION array, dimension (3) */ /* D satisfies d(1) < d(2) < d(3). */ /* Z (input) DOUBLE PRECISION array, dimension (3) */ /* Each of the elements in z must be positive. */ /* FINIT (input) DOUBLE PRECISION */ /* The value of f at 0. It is more accurate than the one */ /* evaluated inside this routine (if someone wants to do */ /* so). */ /* TAU (output) DOUBLE PRECISION */ /* The root of the equation f(x). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* > 0: if INFO = 1, failure to converge */ /* Further Details */ /* =============== */ /* 30/06/99: Based on contributions by */ /* Ren-Cang Li, Computer Science Division, University of California */ /* at Berkeley, USA */ /* 10/02/03: This version has a few statements commented out for thread */ /* safety (machine parameters are computed on each entry). SJH. */ /* 05/10/06: Modified from a new version of Ren-Cang Li, use */ /* Gragg-Thornton-Warner cubic convergent scheme for better stability. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --z__; --d__; /* Function Body */ *info = 0; if (*orgati) { lbd = d__[2]; ubd = d__[3]; } else { lbd = d__[1]; ubd = d__[2]; } if (*finit < 0.) { lbd = 0.; } else { ubd = 0.; } niter = 1; *tau = 0.; if (*kniter == 2) { if (*orgati) { temp = (d__[3] - d__[2]) / 2.; c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; } else { temp = (d__[1] - d__[2]) / 2.; c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; } /* Computing MAX */ d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); temp = max(d__1,d__2); a /= temp; b /= temp; c__ /= temp; if (c__ == 0.) { *tau = b / a; } else if (a <= 0.) { *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( c__ * 2.); } else { *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)) )); } if (*tau < lbd || *tau > ubd) { *tau = (lbd + ubd) / 2.; } if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) { *tau = 0.; } else { temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / ( d__[3] * (d__[3] - *tau)); if (temp <= 0.) { lbd = *tau; } else { ubd = *tau; } if (abs(*finit) <= abs(temp)) { *tau = 0.; } } } /* get machine parameters for possible scaling to avoid overflow */ /* modified by Sven: parameters SMALL1, SMINV1, SMALL2, */ /* SMINV2, EPS are not SAVEd anymore between one call to the */ /* others but recomputed at each call */ eps = _starpu_dlamch_("Epsilon"); base = _starpu_dlamch_("Base"); i__1 = (integer) (log(_starpu_dlamch_("SafMin")) / log(base) / 3.); small1 = pow_di(&base, &i__1); sminv1 = 1. / small1; small2 = small1 * small1; sminv2 = sminv1 * sminv1; /* Determine if scaling of inputs necessary to avoid overflow */ /* when computing 1/TEMP**3 */ if (*orgati) { /* Computing MIN */ d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - * tau, abs(d__2)); temp = min(d__3,d__4); } else { /* Computing MIN */ d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - * tau, abs(d__2)); temp = min(d__3,d__4); } scale = FALSE_; if (temp <= small1) { scale = TRUE_; if (temp <= small2) { /* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ sclfac = sminv2; sclinv = small2; } else { /* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ sclfac = sminv1; sclinv = small1; } /* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ for (i__ = 1; i__ <= 3; ++i__) { dscale[i__ - 1] = d__[i__] * sclfac; zscale[i__ - 1] = z__[i__] * sclfac; /* L10: */ } *tau *= sclfac; lbd *= sclfac; ubd *= sclfac; } else { /* Copy D and Z to DSCALE and ZSCALE */ for (i__ = 1; i__ <= 3; ++i__) { dscale[i__ - 1] = d__[i__]; zscale[i__ - 1] = z__[i__]; /* L20: */ } } fc = 0.; df = 0.; ddf = 0.; for (i__ = 1; i__ <= 3; ++i__) { temp = 1. / (dscale[i__ - 1] - *tau); temp1 = zscale[i__ - 1] * temp; temp2 = temp1 * temp; temp3 = temp2 * temp; fc += temp1 / dscale[i__ - 1]; df += temp2; ddf += temp3; /* L30: */ } f = *finit + *tau * fc; if (abs(f) <= 0.) { goto L60; } if (f <= 0.) { lbd = *tau; } else { ubd = *tau; } /* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent */ /* scheme */ /* It is not hard to see that */ /* 1) Iterations will go up monotonically */ /* if FINIT < 0; */ /* 2) Iterations will go down monotonically */ /* if FINIT > 0. */ iter = niter + 1; for (niter = iter; niter <= 40; ++niter) { if (*orgati) { temp1 = dscale[1] - *tau; temp2 = dscale[2] - *tau; } else { temp1 = dscale[0] - *tau; temp2 = dscale[1] - *tau; } a = (temp1 + temp2) * f - temp1 * temp2 * df; b = temp1 * temp2 * f; c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; /* Computing MAX */ d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); temp = max(d__1,d__2); a /= temp; b /= temp; c__ /= temp; if (c__ == 0.) { eta = b / a; } else if (a <= 0.) { eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.); } else { eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) ); } if (f * eta >= 0.) { eta = -f / df; } *tau += eta; if (*tau < lbd || *tau > ubd) { *tau = (lbd + ubd) / 2.; } fc = 0.; erretm = 0.; df = 0.; ddf = 0.; for (i__ = 1; i__ <= 3; ++i__) { temp = 1. / (dscale[i__ - 1] - *tau); temp1 = zscale[i__ - 1] * temp; temp2 = temp1 * temp; temp3 = temp2 * temp; temp4 = temp1 / dscale[i__ - 1]; fc += temp4; erretm += abs(temp4); df += temp2; ddf += temp3; /* L40: */ } f = *finit + *tau * fc; erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df; if (abs(f) <= eps * erretm) { goto L60; } if (f <= 0.) { lbd = *tau; } else { ubd = *tau; } /* L50: */ } *info = 1; L60: /* Undo scaling */ if (scale) { *tau *= sclinv; } return 0; /* End of DLAED6 */ } /* _starpu_dlaed6_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed7.c000066400000000000000000000300201507764646700205270ustar00rootroot00000000000000/* dlaed7.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__2 = 2; static integer c__1 = 1; static doublereal c_b10 = 1.; static doublereal c_b11 = 0.; static integer c_n1 = -1; /* Subroutine */ int _starpu_dlaed7_(integer *icompq, integer *n, integer *qsiz, integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer q_dim1, q_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer indxc, indxp; extern /* Subroutine */ int _starpu_dlaed8_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dlaed9_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dlaeda_(integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *) ; integer idlmda; extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); integer coltyp; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAED7 computes the updated eigensystem of a diagonal */ /* matrix after modification by a rank-one symmetric matrix. This */ /* routine is used only for the eigenproblem which requires all */ /* eigenvalues and optionally eigenvectors of a dense symmetric matrix */ /* that has been reduced to tridiagonal form. DLAED1 handles */ /* the case in which all eigenvalues and eigenvectors of a symmetric */ /* tridiagonal matrix are desired. */ /* T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) */ /* where Z = Q'u, u is a vector of length N with ones in the */ /* CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. */ /* The eigenvectors of the original matrix are stored in Q, and the */ /* eigenvalues are in D. The algorithm consists of three stages: */ /* The first stage consists of deflating the size of the problem */ /* when there are multiple eigenvalues or if there is a zero in */ /* the Z vector. For each such occurence the dimension of the */ /* secular equation problem is reduced by one. This stage is */ /* performed by the routine DLAED8. */ /* The second stage consists of calculating the updated */ /* eigenvalues. This is done by finding the roots of the secular */ /* equation via the routine DLAED4 (as called by DLAED9). */ /* This routine also calculates the eigenvectors of the current */ /* problem. */ /* The final stage consists of computing the updated eigenvectors */ /* directly using the updated eigenvalues. The eigenvectors for */ /* the current problem are multiplied with the eigenvectors from */ /* the overall problem. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* = 0: Compute eigenvalues only. */ /* = 1: Compute eigenvectors of original dense symmetric matrix */ /* also. On entry, Q contains the orthogonal matrix used */ /* to reduce the original matrix to tridiagonal form. */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* QSIZ (input) INTEGER */ /* The dimension of the orthogonal matrix used to reduce */ /* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ /* TLVLS (input) INTEGER */ /* The total number of merging levels in the overall divide and */ /* conquer tree. */ /* CURLVL (input) INTEGER */ /* The current level in the overall merge routine, */ /* 0 <= CURLVL <= TLVLS. */ /* CURPBM (input) INTEGER */ /* The current problem in the current level in the overall */ /* merge routine (counting from upper left to lower right). */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the eigenvalues of the rank-1-perturbed matrix. */ /* On exit, the eigenvalues of the repaired matrix. */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */ /* On entry, the eigenvectors of the rank-1-perturbed matrix. */ /* On exit, the eigenvectors of the repaired tridiagonal matrix. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* INDXQ (output) INTEGER array, dimension (N) */ /* The permutation which will reintegrate the subproblem just */ /* solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) */ /* will be in ascending order. */ /* RHO (input) DOUBLE PRECISION */ /* The subdiagonal element used to create the rank-1 */ /* modification. */ /* CUTPNT (input) INTEGER */ /* Contains the location of the last eigenvalue in the leading */ /* sub-matrix. min(1,N) <= CUTPNT <= N. */ /* QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) */ /* Stores eigenvectors of submatrices encountered during */ /* divide and conquer, packed together. QPTR points to */ /* beginning of the submatrices. */ /* QPTR (input/output) INTEGER array, dimension (N+2) */ /* List of indices pointing to beginning of submatrices stored */ /* in QSTORE. The submatrices are numbered starting at the */ /* bottom left of the divide and conquer tree, from left to */ /* right and bottom to top. */ /* PRMPTR (input) INTEGER array, dimension (N lg N) */ /* Contains a list of pointers which indicate where in PERM a */ /* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) */ /* indicates the size of the permutation and also the size of */ /* the full, non-deflated problem. */ /* PERM (input) INTEGER array, dimension (N lg N) */ /* Contains the permutations (from deflation and sorting) to be */ /* applied to each eigenblock. */ /* GIVPTR (input) INTEGER array, dimension (N lg N) */ /* Contains a list of pointers which indicate where in GIVCOL a */ /* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) */ /* indicates the number of Givens rotations. */ /* GIVCOL (input) INTEGER array, dimension (2, N lg N) */ /* Each pair of numbers indicates a pair of columns to take place */ /* in a Givens rotation. */ /* GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) */ /* Each number indicates the S value to be used in the */ /* corresponding Givens rotation. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N) */ /* IWORK (workspace) INTEGER array, dimension (4*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an eigenvalue did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --indxq; --qstore; --qptr; --prmptr; --perm; --givptr; givcol -= 3; givnum -= 3; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*icompq == 1 && *qsiz < *n) { *info = -4; } else if (*ldq < max(1,*n)) { *info = -9; } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { *info = -12; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAED7", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* The following values are for bookkeeping purposes only. They are */ /* integer pointers which indicate the portion of the workspace */ /* used by a particular array in DLAED8 and DLAED9. */ if (*icompq == 1) { ldq2 = *qsiz; } else { ldq2 = *n; } iz = 1; idlmda = iz + *n; iw = idlmda + *n; iq2 = iw + *n; is = iq2 + *n * ldq2; indx = 1; indxc = indx + *n; coltyp = indxc + *n; indxp = coltyp + *n; /* Form the z-vector which consists of the last row of Q_1 and the */ /* first row of Q_2. */ ptr = pow_ii(&c__2, tlvls) + 1; i__1 = *curlvl - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *tlvls - i__; ptr += pow_ii(&c__2, &i__2); /* L10: */ } curr = ptr + *curpbm; _starpu_dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz + *n], info); /* When solving the final problem, we no longer need the stored data, */ /* so we will overwrite the data from this level onto the previously */ /* used storage space. */ if (*curlvl == *tlvls) { qptr[curr] = 1; prmptr[curr] = 1; givptr[curr] = 1; } /* Sort and Deflate eigenvalues. */ _starpu_dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[ indx], info); prmptr[curr + 1] = prmptr[curr] + *n; givptr[curr + 1] += givptr[curr]; /* Solve Secular Equation. */ if (k != 0) { _starpu_dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], &work[iw], &qstore[qptr[curr]], &k, info); if (*info != 0) { goto L30; } if (*icompq == 1) { _starpu_dgemm_("N", "N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[ qptr[curr]], &k, &c_b11, &q[q_offset], ldq); } /* Computing 2nd power */ i__1 = k; qptr[curr + 1] = qptr[curr] + i__1 * i__1; /* Prepare the INDXQ sorting permutation. */ n1 = k; n2 = *n - k; _starpu_dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); } else { qptr[curr + 1] = qptr[curr]; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { indxq[i__] = i__; /* L20: */ } } L30: return 0; /* End of DLAED7 */ } /* _starpu_dlaed7_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed8.c000066400000000000000000000337741507764646700205530ustar00rootroot00000000000000/* dlaed8.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b3 = -1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dlaed8_(integer *icompq, integer *k, integer *n, integer *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer *indx, integer *info) { /* System generated locals */ integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal c__; integer i__, j; doublereal s, t; integer k2, n1, n2, jp, n1p1; doublereal eps, tau, tol; integer jlam, imax, jmax; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), _starpu_dscal_( integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAED8 merges the two sets of eigenvalues together into a single */ /* sorted set. Then it tries to deflate the size of the problem. */ /* There are two ways in which deflation can occur: when two or more */ /* eigenvalues are close together or if there is a tiny element in the */ /* Z vector. For each such occurrence the order of the related secular */ /* equation problem is reduced by one. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* = 0: Compute eigenvalues only. */ /* = 1: Compute eigenvectors of original dense symmetric matrix */ /* also. On entry, Q contains the orthogonal matrix used */ /* to reduce the original matrix to tridiagonal form. */ /* K (output) INTEGER */ /* The number of non-deflated eigenvalues, and the order of the */ /* related secular equation. */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* QSIZ (input) INTEGER */ /* The dimension of the orthogonal matrix used to reduce */ /* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the eigenvalues of the two submatrices to be */ /* combined. On exit, the trailing (N-K) updated eigenvalues */ /* (those which were deflated) sorted into increasing order. */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* If ICOMPQ = 0, Q is not referenced. Otherwise, */ /* on entry, Q contains the eigenvectors of the partially solved */ /* system which has been previously updated in matrix */ /* multiplies with other partially solved eigensystems. */ /* On exit, Q contains the trailing (N-K) updated eigenvectors */ /* (those which were deflated) in its last N-K columns. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* INDXQ (input) INTEGER array, dimension (N) */ /* The permutation which separately sorts the two sub-problems */ /* in D into ascending order. Note that elements in the second */ /* half of this permutation must first have CUTPNT added to */ /* their values in order to be accurate. */ /* RHO (input/output) DOUBLE PRECISION */ /* On entry, the off-diagonal element associated with the rank-1 */ /* cut which originally split the two submatrices which are now */ /* being recombined. */ /* On exit, RHO has been modified to the value required by */ /* DLAED3. */ /* CUTPNT (input) INTEGER */ /* The location of the last eigenvalue in the leading */ /* sub-matrix. min(1,N) <= CUTPNT <= N. */ /* Z (input) DOUBLE PRECISION array, dimension (N) */ /* On entry, Z contains the updating vector (the last row of */ /* the first sub-eigenvector matrix and the first row of the */ /* second sub-eigenvector matrix). */ /* On exit, the contents of Z are destroyed by the updating */ /* process. */ /* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */ /* A copy of the first K eigenvalues which will be used by */ /* DLAED3 to form the secular equation. */ /* Q2 (output) DOUBLE PRECISION array, dimension (LDQ2,N) */ /* If ICOMPQ = 0, Q2 is not referenced. Otherwise, */ /* a copy of the first K eigenvectors which will be used by */ /* DLAED7 in a matrix multiply (DGEMM) to update the new */ /* eigenvectors. */ /* LDQ2 (input) INTEGER */ /* The leading dimension of the array Q2. LDQ2 >= max(1,N). */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* The first k values of the final deflation-altered z-vector and */ /* will be passed to DLAED3. */ /* PERM (output) INTEGER array, dimension (N) */ /* The permutations (from deflation and sorting) to be applied */ /* to each eigenblock. */ /* GIVPTR (output) INTEGER */ /* The number of Givens rotations which took place in this */ /* subproblem. */ /* GIVCOL (output) INTEGER array, dimension (2, N) */ /* Each pair of numbers indicates a pair of columns to take place */ /* in a Givens rotation. */ /* GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) */ /* Each number indicates the S value to be used in the */ /* corresponding Givens rotation. */ /* INDXP (workspace) INTEGER array, dimension (N) */ /* The permutation used to place deflated values of D at the end */ /* of the array. INDXP(1:K) points to the nondeflated D-values */ /* and INDXP(K+1:N) points to the deflated eigenvalues. */ /* INDX (workspace) INTEGER array, dimension (N) */ /* The permutation used to sort the contents of D into ascending */ /* order. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --indxq; --z__; --dlamda; q2_dim1 = *ldq2; q2_offset = 1 + q2_dim1; q2 -= q2_offset; --w; --perm; givcol -= 3; givnum -= 3; --indxp; --indx; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*n < 0) { *info = -3; } else if (*icompq == 1 && *qsiz < *n) { *info = -4; } else if (*ldq < max(1,*n)) { *info = -7; } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { *info = -10; } else if (*ldq2 < max(1,*n)) { *info = -14; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAED8", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } n1 = *cutpnt; n2 = *n - n1; n1p1 = n1 + 1; if (*rho < 0.) { _starpu_dscal_(&n2, &c_b3, &z__[n1p1], &c__1); } /* Normalize z so that norm(z) = 1 */ t = 1. / sqrt(2.); i__1 = *n; for (j = 1; j <= i__1; ++j) { indx[j] = j; /* L10: */ } _starpu_dscal_(n, &t, &z__[1], &c__1); *rho = (d__1 = *rho * 2., abs(d__1)); /* Sort the eigenvalues into increasing order */ i__1 = *n; for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { indxq[i__] += *cutpnt; /* L20: */ } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = d__[indxq[i__]]; w[i__] = z__[indxq[i__]]; /* L30: */ } i__ = 1; j = *cutpnt + 1; _starpu_dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = dlamda[indx[i__]]; z__[i__] = w[indx[i__]]; /* L40: */ } /* Calculate the allowable deflation tolerence */ imax = _starpu_idamax_(n, &z__[1], &c__1); jmax = _starpu_idamax_(n, &d__[1], &c__1); eps = _starpu_dlamch_("Epsilon"); tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); /* If the rank-1 modifier is small enough, no more needs to be done */ /* except to reorganize Q so that its columns correspond with the */ /* elements in D. */ if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { *k = 0; if (*icompq == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { perm[j] = indxq[indx[j]]; /* L50: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { perm[j] = indxq[indx[j]]; _starpu_dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &c__1); /* L60: */ } _starpu_dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); } return 0; } /* If there are multiple eigenvalues then the problem deflates. Here */ /* the number of equal eigenvalues are found. As each equal */ /* eigenvalue is found, an elementary reflector is computed to rotate */ /* the corresponding eigensubspace so that the corresponding */ /* components of Z are zero in this new basis. */ *k = 0; *givptr = 0; k2 = *n + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { /* Deflate due to small z component. */ --k2; indxp[k2] = j; if (j == *n) { goto L110; } } else { jlam = j; goto L80; } /* L70: */ } L80: ++j; if (j > *n) { goto L100; } if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { /* Deflate due to small z component. */ --k2; indxp[k2] = j; } else { /* Check if eigenvalues are close enough to allow deflation. */ s = z__[jlam]; c__ = z__[j]; /* Find sqrt(a**2+b**2) without overflow or */ /* destructive underflow. */ tau = _starpu_dlapy2_(&c__, &s); t = d__[j] - d__[jlam]; c__ /= tau; s = -s / tau; if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { /* Deflation is possible. */ z__[j] = tau; z__[jlam] = 0.; /* Record the appropriate Givens rotation */ ++(*givptr); givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; givcol[(*givptr << 1) + 2] = indxq[indx[j]]; givnum[(*givptr << 1) + 1] = c__; givnum[(*givptr << 1) + 2] = s; if (*icompq == 1) { _starpu_drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); } t = d__[jlam] * c__ * c__ + d__[j] * s * s; d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; d__[jlam] = t; --k2; i__ = 1; L90: if (k2 + i__ <= *n) { if (d__[jlam] < d__[indxp[k2 + i__]]) { indxp[k2 + i__ - 1] = indxp[k2 + i__]; indxp[k2 + i__] = jlam; ++i__; goto L90; } else { indxp[k2 + i__ - 1] = jlam; } } else { indxp[k2 + i__ - 1] = jlam; } jlam = j; } else { ++(*k); w[*k] = z__[jlam]; dlamda[*k] = d__[jlam]; indxp[*k] = jlam; jlam = j; } } goto L80; L100: /* Record the last eigenvalue. */ ++(*k); w[*k] = z__[jlam]; dlamda[*k] = d__[jlam]; indxp[*k] = jlam; L110: /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ /* and Q2 respectively. The eigenvalues/vectors which were not */ /* deflated go into the first K slots of DLAMDA and Q2 respectively, */ /* while those which were deflated go into the last N - K slots. */ if (*icompq == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { jp = indxp[j]; dlamda[j] = d__[jp]; perm[j] = indxq[indx[jp]]; /* L120: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { jp = indxp[j]; dlamda[j] = d__[jp]; perm[j] = indxq[indx[jp]]; _starpu_dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] , &c__1); /* L130: */ } } /* The deflated eigenvalues and their corresponding vectors go back */ /* into the last N - K slots of D and Q respectively. */ if (*k < *n) { if (*icompq == 0) { i__1 = *n - *k; _starpu_dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); } else { i__1 = *n - *k; _starpu_dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); i__1 = *n - *k; _starpu_dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* k + 1) * q_dim1 + 1], ldq); } } return 0; /* End of DLAED8 */ } /* _starpu_dlaed8_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaed9.c000066400000000000000000000201341507764646700205360ustar00rootroot00000000000000/* dlaed9.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dlaed9_(integer *k, integer *kstart, integer *kstop, integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, integer *info) { /* System generated locals */ integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ integer i__, j; doublereal temp; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaed4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAED9 finds the roots of the secular equation, as defined by the */ /* values in D, Z, and RHO, between KSTART and KSTOP. It makes the */ /* appropriate calls to DLAED4 and then stores the new matrix of */ /* eigenvectors for use in calculating the next level of Z vectors. */ /* Arguments */ /* ========= */ /* K (input) INTEGER */ /* The number of terms in the rational function to be solved by */ /* DLAED4. K >= 0. */ /* KSTART (input) INTEGER */ /* KSTOP (input) INTEGER */ /* The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP */ /* are to be computed. 1 <= KSTART <= KSTOP <= K. */ /* N (input) INTEGER */ /* The number of rows and columns in the Q matrix. */ /* N >= K (delation may result in N > K). */ /* D (output) DOUBLE PRECISION array, dimension (N) */ /* D(I) contains the updated eigenvalues */ /* for KSTART <= I <= KSTOP. */ /* Q (workspace) DOUBLE PRECISION array, dimension (LDQ,N) */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max( 1, N ). */ /* RHO (input) DOUBLE PRECISION */ /* The value of the parameter in the rank one update equation. */ /* RHO >= 0 required. */ /* DLAMDA (input) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. */ /* W (input) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating vector. */ /* S (output) DOUBLE PRECISION array, dimension (LDS, K) */ /* Will contain the eigenvectors of the repaired matrix which */ /* will be stored for subsequent Z vector calculation and */ /* multiplied by the previously accumulated eigenvectors */ /* to update the system. */ /* LDS (input) INTEGER */ /* The leading dimension of S. LDS >= max( 1, K ). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an eigenvalue did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --dlamda; --w; s_dim1 = *lds; s_offset = 1 + s_dim1; s -= s_offset; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*kstart < 1 || *kstart > max(1,*k)) { *info = -2; } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { *info = -3; } else if (*n < *k) { *info = -4; } else if (*ldq < max(1,*k)) { *info = -7; } else if (*lds < max(1,*k)) { *info = -12; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAED9", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DLAMDA(I) if it is 1; this makes the subsequent */ /* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DLAMDA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DLAMDA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = _starpu_dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; /* L10: */ } i__1 = *kstop; for (j = *kstart; j <= i__1; ++j) { _starpu_dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } /* L20: */ } if (*k == 1 || *k == 2) { i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *k; for (j = 1; j <= i__2; ++j) { s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; /* L30: */ } /* L40: */ } goto L120; } /* Compute updated W. */ _starpu_dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; _starpu_dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L50: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L60: */ } /* L70: */ } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = sqrt(-w[i__]); w[i__] = d_sign(&d__1, &s[i__ + s_dim1]); /* L80: */ } /* Compute eigenvectors of the modified rank-1 modification. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; /* L90: */ } temp = _starpu_dnrm2_(k, &q[j * q_dim1 + 1], &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; /* L100: */ } /* L110: */ } L120: return 0; /* End of DLAED9 */ } /* _starpu_dlaed9_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaeda.c000066400000000000000000000217451507764646700206170ustar00rootroot00000000000000/* dlaeda.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__2 = 2; static integer c__1 = 1; static doublereal c_b24 = 1.; static doublereal c_b26 = 0.; /* Subroutine */ int _starpu_dlaeda_(integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, doublereal *z__, doublereal *ztemp, integer *info) { /* System generated locals */ integer i__1, i__2, i__3; /* Builtin functions */ integer pow_ii(integer *, integer *); double sqrt(doublereal); /* Local variables */ integer i__, k, mid, ptr; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1; extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAEDA computes the Z vector corresponding to the merge step in the */ /* CURLVLth step of the merge process with TLVLS steps for the CURPBMth */ /* problem. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* TLVLS (input) INTEGER */ /* The total number of merging levels in the overall divide and */ /* conquer tree. */ /* CURLVL (input) INTEGER */ /* The current level in the overall merge routine, */ /* 0 <= curlvl <= tlvls. */ /* CURPBM (input) INTEGER */ /* The current problem in the current level in the overall */ /* merge routine (counting from upper left to lower right). */ /* PRMPTR (input) INTEGER array, dimension (N lg N) */ /* Contains a list of pointers which indicate where in PERM a */ /* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) */ /* indicates the size of the permutation and incidentally the */ /* size of the full, non-deflated problem. */ /* PERM (input) INTEGER array, dimension (N lg N) */ /* Contains the permutations (from deflation and sorting) to be */ /* applied to each eigenblock. */ /* GIVPTR (input) INTEGER array, dimension (N lg N) */ /* Contains a list of pointers which indicate where in GIVCOL a */ /* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) */ /* indicates the number of Givens rotations. */ /* GIVCOL (input) INTEGER array, dimension (2, N lg N) */ /* Each pair of numbers indicates a pair of columns to take place */ /* in a Givens rotation. */ /* GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) */ /* Each number indicates the S value to be used in the */ /* corresponding Givens rotation. */ /* Q (input) DOUBLE PRECISION array, dimension (N**2) */ /* Contains the square eigenblocks from previous levels, the */ /* starting positions for blocks are given by QPTR. */ /* QPTR (input) INTEGER array, dimension (N+2) */ /* Contains a list of pointers which indicate where in Q an */ /* eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates */ /* the size of the block. */ /* Z (output) DOUBLE PRECISION array, dimension (N) */ /* On output this vector contains the updating vector (the last */ /* row of the first sub-eigenvector matrix and the first row of */ /* the second sub-eigenvector matrix). */ /* ZTEMP (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ztemp; --z__; --qptr; --q; givnum -= 3; givcol -= 3; --givptr; --perm; --prmptr; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAEDA", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine location of first number in second half. */ mid = *n / 2 + 1; /* Gather last/first rows of appropriate eigenblocks into center of Z */ ptr = 1; /* Determine location of lowest level subproblem in the full storage */ /* scheme */ i__1 = *curlvl - 1; curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; /* Determine size of these matrices. We add HALF to the value of */ /* the SQRT in case the machine underestimates one of these square */ /* roots. */ bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5); bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) + .5); i__1 = mid - bsiz1 - 1; for (k = 1; k <= i__1; ++k) { z__[k] = 0.; /* L10: */ } _starpu_dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & c__1); _starpu_dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); i__1 = *n; for (k = mid + bsiz2; k <= i__1; ++k) { z__[k] = 0.; /* L20: */ } /* Loop thru remaining levels 1 -> CURLVL applying the Givens */ /* rotations and permutation and then multiplying the center matrices */ /* against the current Z. */ ptr = pow_ii(&c__2, tlvls) + 1; i__1 = *curlvl - 1; for (k = 1; k <= i__1; ++k) { i__2 = *curlvl - k; i__3 = *curlvl - k - 1; curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - 1; psiz1 = prmptr[curr + 1] - prmptr[curr]; psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; zptr1 = mid - psiz1; /* Apply Givens at CURR and CURR+1 */ i__2 = givptr[curr + 1] - 1; for (i__ = givptr[curr]; i__ <= i__2; ++i__) { _starpu_drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( i__ << 1) + 1], &givnum[(i__ << 1) + 2]); /* L30: */ } i__2 = givptr[curr + 2] - 1; for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { _starpu_drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << 1) + 1], &givnum[(i__ << 1) + 2]); /* L40: */ } psiz1 = prmptr[curr + 1] - prmptr[curr]; psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; i__2 = psiz1 - 1; for (i__ = 0; i__ <= i__2; ++i__) { ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; /* L50: */ } i__2 = psiz2 - 1; for (i__ = 0; i__ <= i__2; ++i__) { ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - 1]; /* L60: */ } /* Multiply Blocks at CURR and CURR+1 */ /* Determine size of these matrices. We add HALF to the value of */ /* the SQRT in case the machine underestimates one of these */ /* square roots. */ bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5); bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1]) ) + .5); if (bsiz1 > 0) { _starpu_dgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, & ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1); } i__2 = psiz1 - bsiz1; _starpu_dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); if (bsiz2 > 0) { _starpu_dgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, & ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1); } i__2 = psiz2 - bsiz2; _starpu_dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & c__1); i__2 = *tlvls - k; ptr += pow_ii(&c__2, &i__2); /* L70: */ } return 0; /* End of DLAEDA */ } /* _starpu_dlaeda_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaein.c000066400000000000000000000440461507764646700206400ustar00rootroot00000000000000/* dlaein.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dlaein_(logical *rightv, logical *noinit, integer *n, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *vr, doublereal *vi, doublereal *b, integer *ldb, doublereal *work, doublereal *eps3, doublereal *smlnum, doublereal * bignum, integer *info) { /* System generated locals */ integer b_dim1, b_offset, h_dim1, h_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j; doublereal w, x, y; integer i1, i2, i3; doublereal w1, ei, ej, xi, xr, rec; integer its, ierr; doublereal temp, norm, vmax; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal scale; extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); char trans[1]; doublereal vcrit, rootn, vnorm; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); doublereal absbii, absbjj; extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dladiv_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlatrs_( char *, char *, char *, char *, integer *, doublereal *, integer * , doublereal *, doublereal *, doublereal *, integer *); char normin[1]; doublereal nrmsml, growto; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAEIN uses inverse iteration to find a right or left eigenvector */ /* corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg */ /* matrix H. */ /* Arguments */ /* ========= */ /* RIGHTV (input) LOGICAL */ /* = .TRUE. : compute right eigenvector; */ /* = .FALSE.: compute left eigenvector. */ /* NOINIT (input) LOGICAL */ /* = .TRUE. : no initial vector supplied in (VR,VI). */ /* = .FALSE.: initial vector supplied in (VR,VI). */ /* N (input) INTEGER */ /* The order of the matrix H. N >= 0. */ /* H (input) DOUBLE PRECISION array, dimension (LDH,N) */ /* The upper Hessenberg matrix H. */ /* LDH (input) INTEGER */ /* The leading dimension of the array H. LDH >= max(1,N). */ /* WR (input) DOUBLE PRECISION */ /* WI (input) DOUBLE PRECISION */ /* The real and imaginary parts of the eigenvalue of H whose */ /* corresponding right or left eigenvector is to be computed. */ /* VR (input/output) DOUBLE PRECISION array, dimension (N) */ /* VI (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, if NOINIT = .FALSE. and WI = 0.0, VR must contain */ /* a real starting vector for inverse iteration using the real */ /* eigenvalue WR; if NOINIT = .FALSE. and WI.ne.0.0, VR and VI */ /* must contain the real and imaginary parts of a complex */ /* starting vector for inverse iteration using the complex */ /* eigenvalue (WR,WI); otherwise VR and VI need not be set. */ /* On exit, if WI = 0.0 (real eigenvalue), VR contains the */ /* computed real eigenvector; if WI.ne.0.0 (complex eigenvalue), */ /* VR and VI contain the real and imaginary parts of the */ /* computed complex eigenvector. The eigenvector is normalized */ /* so that the component of largest magnitude has magnitude 1; */ /* here the magnitude of a complex number (x,y) is taken to be */ /* |x| + |y|. */ /* VI is not referenced if WI = 0.0. */ /* B (workspace) DOUBLE PRECISION array, dimension (LDB,N) */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= N+1. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* EPS3 (input) DOUBLE PRECISION */ /* A small machine-dependent value which is used to perturb */ /* close eigenvalues, and to replace zero pivots. */ /* SMLNUM (input) DOUBLE PRECISION */ /* A machine-dependent value close to the underflow threshold. */ /* BIGNUM (input) DOUBLE PRECISION */ /* A machine-dependent value close to the overflow threshold. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* = 1: inverse iteration did not converge; VR is set to the */ /* last iterate, and so is VI if WI.ne.0.0. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --vr; --vi; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --work; /* Function Body */ *info = 0; /* GROWTO is the threshold used in the acceptance test for an */ /* eigenvector. */ rootn = sqrt((doublereal) (*n)); growto = .1 / rootn; /* Computing MAX */ d__1 = 1., d__2 = *eps3 * rootn; nrmsml = max(d__1,d__2) * *smlnum; /* Form B = H - (WR,WI)*I (except that the subdiagonal elements and */ /* the imaginary parts of the diagonal elements are not stored). */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = h__[i__ + j * h_dim1]; /* L10: */ } b[j + j * b_dim1] = h__[j + j * h_dim1] - *wr; /* L20: */ } if (*wi == 0.) { /* Real eigenvalue. */ if (*noinit) { /* Set initial vector. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { vr[i__] = *eps3; /* L30: */ } } else { /* Scale supplied initial vector. */ vnorm = _starpu_dnrm2_(n, &vr[1], &c__1); d__1 = *eps3 * rootn / max(vnorm,nrmsml); _starpu_dscal_(n, &d__1, &vr[1], &c__1); } if (*rightv) { /* LU decomposition with partial pivoting of B, replacing zero */ /* pivots by EPS3. */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { ei = h__[i__ + 1 + i__ * h_dim1]; if ((d__1 = b[i__ + i__ * b_dim1], abs(d__1)) < abs(ei)) { /* Interchange rows and eliminate. */ x = b[i__ + i__ * b_dim1] / ei; b[i__ + i__ * b_dim1] = ei; i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { temp = b[i__ + 1 + j * b_dim1]; b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - x * temp; b[i__ + j * b_dim1] = temp; /* L40: */ } } else { /* Eliminate without interchange. */ if (b[i__ + i__ * b_dim1] == 0.) { b[i__ + i__ * b_dim1] = *eps3; } x = ei / b[i__ + i__ * b_dim1]; if (x != 0.) { i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { b[i__ + 1 + j * b_dim1] -= x * b[i__ + j * b_dim1] ; /* L50: */ } } } /* L60: */ } if (b[*n + *n * b_dim1] == 0.) { b[*n + *n * b_dim1] = *eps3; } *(unsigned char *)trans = 'N'; } else { /* UL decomposition with partial pivoting of B, replacing zero */ /* pivots by EPS3. */ for (j = *n; j >= 2; --j) { ej = h__[j + (j - 1) * h_dim1]; if ((d__1 = b[j + j * b_dim1], abs(d__1)) < abs(ej)) { /* Interchange columns and eliminate. */ x = b[j + j * b_dim1] / ej; b[j + j * b_dim1] = ej; i__1 = j - 1; for (i__ = 1; i__ <= i__1; ++i__) { temp = b[i__ + (j - 1) * b_dim1]; b[i__ + (j - 1) * b_dim1] = b[i__ + j * b_dim1] - x * temp; b[i__ + j * b_dim1] = temp; /* L70: */ } } else { /* Eliminate without interchange. */ if (b[j + j * b_dim1] == 0.) { b[j + j * b_dim1] = *eps3; } x = ej / b[j + j * b_dim1]; if (x != 0.) { i__1 = j - 1; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + (j - 1) * b_dim1] -= x * b[i__ + j * b_dim1]; /* L80: */ } } } /* L90: */ } if (b[b_dim1 + 1] == 0.) { b[b_dim1 + 1] = *eps3; } *(unsigned char *)trans = 'T'; } *(unsigned char *)normin = 'N'; i__1 = *n; for (its = 1; its <= i__1; ++its) { /* Solve U*x = scale*v for a right eigenvector */ /* or U'*x = scale*v for a left eigenvector, */ /* overwriting x on v. */ _starpu_dlatrs_("Upper", trans, "Nonunit", normin, n, &b[b_offset], ldb, & vr[1], &scale, &work[1], &ierr); *(unsigned char *)normin = 'Y'; /* Test for sufficient growth in the norm of v. */ vnorm = _starpu_dasum_(n, &vr[1], &c__1); if (vnorm >= growto * scale) { goto L120; } /* Choose new orthogonal starting vector and try again. */ temp = *eps3 / (rootn + 1.); vr[1] = *eps3; i__2 = *n; for (i__ = 2; i__ <= i__2; ++i__) { vr[i__] = temp; /* L100: */ } vr[*n - its + 1] -= *eps3 * rootn; /* L110: */ } /* Failure to find eigenvector in N iterations. */ *info = 1; L120: /* Normalize eigenvector. */ i__ = _starpu_idamax_(n, &vr[1], &c__1); d__2 = 1. / (d__1 = vr[i__], abs(d__1)); _starpu_dscal_(n, &d__2, &vr[1], &c__1); } else { /* Complex eigenvalue. */ if (*noinit) { /* Set initial vector. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { vr[i__] = *eps3; vi[i__] = 0.; /* L130: */ } } else { /* Scale supplied initial vector. */ d__1 = _starpu_dnrm2_(n, &vr[1], &c__1); d__2 = _starpu_dnrm2_(n, &vi[1], &c__1); norm = _starpu_dlapy2_(&d__1, &d__2); rec = *eps3 * rootn / max(norm,nrmsml); _starpu_dscal_(n, &rec, &vr[1], &c__1); _starpu_dscal_(n, &rec, &vi[1], &c__1); } if (*rightv) { /* LU decomposition with partial pivoting of B, replacing zero */ /* pivots by EPS3. */ /* The imaginary part of the (i,j)-th element of U is stored in */ /* B(j+1,i). */ b[b_dim1 + 2] = -(*wi); i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { b[i__ + 1 + b_dim1] = 0.; /* L140: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { absbii = _starpu_dlapy2_(&b[i__ + i__ * b_dim1], &b[i__ + 1 + i__ * b_dim1]); ei = h__[i__ + 1 + i__ * h_dim1]; if (absbii < abs(ei)) { /* Interchange rows and eliminate. */ xr = b[i__ + i__ * b_dim1] / ei; xi = b[i__ + 1 + i__ * b_dim1] / ei; b[i__ + i__ * b_dim1] = ei; b[i__ + 1 + i__ * b_dim1] = 0.; i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { temp = b[i__ + 1 + j * b_dim1]; b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - xr * temp; b[j + 1 + (i__ + 1) * b_dim1] = b[j + 1 + i__ * b_dim1] - xi * temp; b[i__ + j * b_dim1] = temp; b[j + 1 + i__ * b_dim1] = 0.; /* L150: */ } b[i__ + 2 + i__ * b_dim1] = -(*wi); b[i__ + 1 + (i__ + 1) * b_dim1] -= xi * *wi; b[i__ + 2 + (i__ + 1) * b_dim1] += xr * *wi; } else { /* Eliminate without interchanging rows. */ if (absbii == 0.) { b[i__ + i__ * b_dim1] = *eps3; b[i__ + 1 + i__ * b_dim1] = 0.; absbii = *eps3; } ei = ei / absbii / absbii; xr = b[i__ + i__ * b_dim1] * ei; xi = -b[i__ + 1 + i__ * b_dim1] * ei; i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { b[i__ + 1 + j * b_dim1] = b[i__ + 1 + j * b_dim1] - xr * b[i__ + j * b_dim1] + xi * b[j + 1 + i__ * b_dim1]; b[j + 1 + (i__ + 1) * b_dim1] = -xr * b[j + 1 + i__ * b_dim1] - xi * b[i__ + j * b_dim1]; /* L160: */ } b[i__ + 2 + (i__ + 1) * b_dim1] -= *wi; } /* Compute 1-norm of offdiagonal elements of i-th row. */ i__2 = *n - i__; i__3 = *n - i__; work[i__] = _starpu_dasum_(&i__2, &b[i__ + (i__ + 1) * b_dim1], ldb) + _starpu_dasum_(&i__3, &b[i__ + 2 + i__ * b_dim1], &c__1); /* L170: */ } if (b[*n + *n * b_dim1] == 0. && b[*n + 1 + *n * b_dim1] == 0.) { b[*n + *n * b_dim1] = *eps3; } work[*n] = 0.; i1 = *n; i2 = 1; i3 = -1; } else { /* UL decomposition with partial pivoting of conjg(B), */ /* replacing zero pivots by EPS3. */ /* The imaginary part of the (i,j)-th element of U is stored in */ /* B(j+1,i). */ b[*n + 1 + *n * b_dim1] = *wi; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { b[*n + 1 + j * b_dim1] = 0.; /* L180: */ } for (j = *n; j >= 2; --j) { ej = h__[j + (j - 1) * h_dim1]; absbjj = _starpu_dlapy2_(&b[j + j * b_dim1], &b[j + 1 + j * b_dim1]); if (absbjj < abs(ej)) { /* Interchange columns and eliminate */ xr = b[j + j * b_dim1] / ej; xi = b[j + 1 + j * b_dim1] / ej; b[j + j * b_dim1] = ej; b[j + 1 + j * b_dim1] = 0.; i__1 = j - 1; for (i__ = 1; i__ <= i__1; ++i__) { temp = b[i__ + (j - 1) * b_dim1]; b[i__ + (j - 1) * b_dim1] = b[i__ + j * b_dim1] - xr * temp; b[j + i__ * b_dim1] = b[j + 1 + i__ * b_dim1] - xi * temp; b[i__ + j * b_dim1] = temp; b[j + 1 + i__ * b_dim1] = 0.; /* L190: */ } b[j + 1 + (j - 1) * b_dim1] = *wi; b[j - 1 + (j - 1) * b_dim1] += xi * *wi; b[j + (j - 1) * b_dim1] -= xr * *wi; } else { /* Eliminate without interchange. */ if (absbjj == 0.) { b[j + j * b_dim1] = *eps3; b[j + 1 + j * b_dim1] = 0.; absbjj = *eps3; } ej = ej / absbjj / absbjj; xr = b[j + j * b_dim1] * ej; xi = -b[j + 1 + j * b_dim1] * ej; i__1 = j - 1; for (i__ = 1; i__ <= i__1; ++i__) { b[i__ + (j - 1) * b_dim1] = b[i__ + (j - 1) * b_dim1] - xr * b[i__ + j * b_dim1] + xi * b[j + 1 + i__ * b_dim1]; b[j + i__ * b_dim1] = -xr * b[j + 1 + i__ * b_dim1] - xi * b[i__ + j * b_dim1]; /* L200: */ } b[j + (j - 1) * b_dim1] += *wi; } /* Compute 1-norm of offdiagonal elements of j-th column. */ i__1 = j - 1; i__2 = j - 1; work[j] = _starpu_dasum_(&i__1, &b[j * b_dim1 + 1], &c__1) + _starpu_dasum_(& i__2, &b[j + 1 + b_dim1], ldb); /* L210: */ } if (b[b_dim1 + 1] == 0. && b[b_dim1 + 2] == 0.) { b[b_dim1 + 1] = *eps3; } work[1] = 0.; i1 = 1; i2 = *n; i3 = 1; } i__1 = *n; for (its = 1; its <= i__1; ++its) { scale = 1.; vmax = 1.; vcrit = *bignum; /* Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector, */ /* or U'*(xr,xi) = scale*(vr,vi) for a left eigenvector, */ /* overwriting (xr,xi) on (vr,vi). */ i__2 = i2; i__3 = i3; for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) { if (work[i__] > vcrit) { rec = 1. / vmax; _starpu_dscal_(n, &rec, &vr[1], &c__1); _starpu_dscal_(n, &rec, &vi[1], &c__1); scale *= rec; vmax = 1.; vcrit = *bignum; } xr = vr[i__]; xi = vi[i__]; if (*rightv) { i__4 = *n; for (j = i__ + 1; j <= i__4; ++j) { xr = xr - b[i__ + j * b_dim1] * vr[j] + b[j + 1 + i__ * b_dim1] * vi[j]; xi = xi - b[i__ + j * b_dim1] * vi[j] - b[j + 1 + i__ * b_dim1] * vr[j]; /* L220: */ } } else { i__4 = i__ - 1; for (j = 1; j <= i__4; ++j) { xr = xr - b[j + i__ * b_dim1] * vr[j] + b[i__ + 1 + j * b_dim1] * vi[j]; xi = xi - b[j + i__ * b_dim1] * vi[j] - b[i__ + 1 + j * b_dim1] * vr[j]; /* L230: */ } } w = (d__1 = b[i__ + i__ * b_dim1], abs(d__1)) + (d__2 = b[i__ + 1 + i__ * b_dim1], abs(d__2)); if (w > *smlnum) { if (w < 1.) { w1 = abs(xr) + abs(xi); if (w1 > w * *bignum) { rec = 1. / w1; _starpu_dscal_(n, &rec, &vr[1], &c__1); _starpu_dscal_(n, &rec, &vi[1], &c__1); xr = vr[i__]; xi = vi[i__]; scale *= rec; vmax *= rec; } } /* Divide by diagonal element of B. */ _starpu_dladiv_(&xr, &xi, &b[i__ + i__ * b_dim1], &b[i__ + 1 + i__ * b_dim1], &vr[i__], &vi[i__]); /* Computing MAX */ d__3 = (d__1 = vr[i__], abs(d__1)) + (d__2 = vi[i__], abs( d__2)); vmax = max(d__3,vmax); vcrit = *bignum / vmax; } else { i__4 = *n; for (j = 1; j <= i__4; ++j) { vr[j] = 0.; vi[j] = 0.; /* L240: */ } vr[i__] = 1.; vi[i__] = 1.; scale = 0.; vmax = 1.; vcrit = *bignum; } /* L250: */ } /* Test for sufficient growth in the norm of (VR,VI). */ vnorm = _starpu_dasum_(n, &vr[1], &c__1) + _starpu_dasum_(n, &vi[1], &c__1); if (vnorm >= growto * scale) { goto L280; } /* Choose a new orthogonal starting vector and try again. */ y = *eps3 / (rootn + 1.); vr[1] = *eps3; vi[1] = 0.; i__3 = *n; for (i__ = 2; i__ <= i__3; ++i__) { vr[i__] = y; vi[i__] = 0.; /* L260: */ } vr[*n - its + 1] -= *eps3 * rootn; /* L270: */ } /* Failure to find eigenvector in N iterations */ *info = 1; L280: /* Normalize eigenvector. */ vnorm = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__3 = vnorm, d__4 = (d__1 = vr[i__], abs(d__1)) + (d__2 = vi[i__] , abs(d__2)); vnorm = max(d__3,d__4); /* L290: */ } d__1 = 1. / vnorm; _starpu_dscal_(n, &d__1, &vr[1], &c__1); d__1 = 1. / vnorm; _starpu_dscal_(n, &d__1, &vi[1], &c__1); } return 0; /* End of DLAEIN */ } /* _starpu_dlaein_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaev2.c000066400000000000000000000114031507764646700205500ustar00rootroot00000000000000/* dlaev2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaev2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1) { /* System generated locals */ doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs; integer sgn1, sgn2; doublereal acmn, acmx; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix */ /* [ A B ] */ /* [ B C ]. */ /* On return, RT1 is the eigenvalue of larger absolute value, RT2 is the */ /* eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right */ /* eigenvector for RT1, giving the decomposition */ /* [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] */ /* [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. */ /* Arguments */ /* ========= */ /* A (input) DOUBLE PRECISION */ /* The (1,1) element of the 2-by-2 matrix. */ /* B (input) DOUBLE PRECISION */ /* The (1,2) element and the conjugate of the (2,1) element of */ /* the 2-by-2 matrix. */ /* C (input) DOUBLE PRECISION */ /* The (2,2) element of the 2-by-2 matrix. */ /* RT1 (output) DOUBLE PRECISION */ /* The eigenvalue of larger absolute value. */ /* RT2 (output) DOUBLE PRECISION */ /* The eigenvalue of smaller absolute value. */ /* CS1 (output) DOUBLE PRECISION */ /* SN1 (output) DOUBLE PRECISION */ /* The vector (CS1, SN1) is a unit right eigenvector for RT1. */ /* Further Details */ /* =============== */ /* RT1 is accurate to a few ulps barring over/underflow. */ /* RT2 may be inaccurate if there is massive cancellation in the */ /* determinant A*C-B*B; higher precision or correctly rounded or */ /* correctly truncated arithmetic would be needed to compute RT2 */ /* accurately in all cases. */ /* CS1 and SN1 are accurate to a few ulps barring over/underflow. */ /* Overflow is possible only if RT1 is within a factor of 5 of overflow. */ /* Underflow is harmless if the input data is 0 or exceeds */ /* underflow_threshold / macheps. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Compute the eigenvalues */ sm = *a + *c__; df = *a - *c__; adf = abs(df); tb = *b + *b; ab = abs(tb); if (abs(*a) > abs(*c__)) { acmx = *a; acmn = *c__; } else { acmx = *c__; acmn = *a; } if (adf > ab) { /* Computing 2nd power */ d__1 = ab / adf; rt = adf * sqrt(d__1 * d__1 + 1.); } else if (adf < ab) { /* Computing 2nd power */ d__1 = adf / ab; rt = ab * sqrt(d__1 * d__1 + 1.); } else { /* Includes case AB=ADF=0 */ rt = ab * sqrt(2.); } if (sm < 0.) { *rt1 = (sm - rt) * .5; sgn1 = -1; /* Order of execution important. */ /* To get fully accurate smaller eigenvalue, */ /* next line needs to be executed in higher precision. */ *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; } else if (sm > 0.) { *rt1 = (sm + rt) * .5; sgn1 = 1; /* Order of execution important. */ /* To get fully accurate smaller eigenvalue, */ /* next line needs to be executed in higher precision. */ *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; } else { /* Includes case RT1 = RT2 = 0 */ *rt1 = rt * .5; *rt2 = rt * -.5; sgn1 = 1; } /* Compute the eigenvector */ if (df >= 0.) { cs = df + rt; sgn2 = 1; } else { cs = df - rt; sgn2 = -1; } acs = abs(cs); if (acs > ab) { ct = -tb / cs; *sn1 = 1. / sqrt(ct * ct + 1.); *cs1 = ct * *sn1; } else { if (ab == 0.) { *cs1 = 1.; *sn1 = 0.; } else { tn = -cs / tb; *cs1 = 1. / sqrt(tn * tn + 1.); *sn1 = tn * *cs1; } } if (sgn1 == sgn2) { tn = *cs1; *cs1 = -(*sn1); *sn1 = tn; } return 0; /* End of DLAEV2 */ } /* _starpu_dlaev2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaexc.c000066400000000000000000000322211507764646700206340ustar00rootroot00000000000000/* dlaexc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__4 = 4; static logical c_false = FALSE_; static integer c_n1 = -1; static integer c__2 = 2; static integer c__3 = 3; /* Subroutine */ int _starpu_dlaexc_(logical *wantq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1, integer *n2, doublereal *work, integer *info) { /* System generated locals */ integer q_dim1, q_offset, t_dim1, t_offset, i__1; doublereal d__1, d__2, d__3; /* Local variables */ doublereal d__[16] /* was [4][4] */; integer k; doublereal u[3], x[4] /* was [2][2] */; integer j2, j3, j4; doublereal u1[3], u2[3]; integer nd; doublereal cs, t11, t22, t33, sn, wi1, wi2, wr1, wr2, eps, tau, tau1, tau2; integer ierr; doublereal temp; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); doublereal scale, dnorm, xnorm; extern /* Subroutine */ int _starpu_dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlasy2_( logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlarfx_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *); doublereal thresh, smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in */ /* an upper quasi-triangular matrix T by an orthogonal similarity */ /* transformation. */ /* T must be in Schur canonical form, that is, block upper triangular */ /* with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block */ /* has its diagonal elemnts equal and its off-diagonal elements of */ /* opposite sign. */ /* Arguments */ /* ========= */ /* WANTQ (input) LOGICAL */ /* = .TRUE. : accumulate the transformation in the matrix Q; */ /* = .FALSE.: do not accumulate the transformation. */ /* N (input) INTEGER */ /* The order of the matrix T. N >= 0. */ /* T (input/output) DOUBLE PRECISION array, dimension (LDT,N) */ /* On entry, the upper quasi-triangular matrix T, in Schur */ /* canonical form. */ /* On exit, the updated matrix T, again in Schur canonical form. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= max(1,N). */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* On entry, if WANTQ is .TRUE., the orthogonal matrix Q. */ /* On exit, if WANTQ is .TRUE., the updated matrix Q. */ /* If WANTQ is .FALSE., Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. */ /* LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. */ /* J1 (input) INTEGER */ /* The index of the first row of the first block T11. */ /* N1 (input) INTEGER */ /* The order of the first block T11. N1 = 0, 1 or 2. */ /* N2 (input) INTEGER */ /* The order of the second block T22. N2 = 0, 1 or 2. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* = 1: the transformed matrix T would be too far from Schur */ /* form; the blocks are not swapped and T and Q are */ /* unchanged. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --work; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n == 0 || *n1 == 0 || *n2 == 0) { return 0; } if (*j1 + *n1 > *n) { return 0; } j2 = *j1 + 1; j3 = *j1 + 2; j4 = *j1 + 3; if (*n1 == 1 && *n2 == 1) { /* Swap two 1-by-1 blocks. */ t11 = t[*j1 + *j1 * t_dim1]; t22 = t[j2 + j2 * t_dim1]; /* Determine the transformation to perform the interchange. */ d__1 = t22 - t11; _starpu_dlartg_(&t[*j1 + j2 * t_dim1], &d__1, &cs, &sn, &temp); /* Apply transformation to the matrix T. */ if (j3 <= *n) { i__1 = *n - *j1 - 1; _starpu_drot_(&i__1, &t[*j1 + j3 * t_dim1], ldt, &t[j2 + j3 * t_dim1], ldt, &cs, &sn); } i__1 = *j1 - 1; _starpu_drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, &cs, &sn); t[*j1 + *j1 * t_dim1] = t22; t[j2 + j2 * t_dim1] = t11; if (*wantq) { /* Accumulate transformation in the matrix Q. */ _starpu_drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, &cs, &sn); } } else { /* Swapping involves at least one 2-by-2 block. */ /* Copy the diagonal block of order N1+N2 to the local array D */ /* and compute its norm. */ nd = *n1 + *n2; _starpu_dlacpy_("Full", &nd, &nd, &t[*j1 + *j1 * t_dim1], ldt, d__, &c__4); dnorm = _starpu_dlange_("Max", &nd, &nd, d__, &c__4, &work[1]); /* Compute machine-dependent threshold for test for accepting */ /* swap. */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S") / eps; /* Computing MAX */ d__1 = eps * 10. * dnorm; thresh = max(d__1,smlnum); /* Solve T11*X - X*T22 = scale*T12 for X. */ _starpu_dlasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d__[*n1 + 1 + (*n1 + 1 << 2) - 5], &c__4, &d__[(*n1 + 1 << 2) - 4], &c__4, & scale, x, &c__2, &xnorm, &ierr); /* Swap the adjacent diagonal blocks. */ k = *n1 + *n1 + *n2 - 3; switch (k) { case 1: goto L10; case 2: goto L20; case 3: goto L30; } L10: /* N1 = 1, N2 = 2: generate elementary reflector H so that: */ /* ( scale, X11, X12 ) H = ( 0, 0, * ) */ u[0] = scale; u[1] = x[0]; u[2] = x[2]; _starpu_dlarfg_(&c__3, &u[2], u, &c__1, &tau); u[2] = 1.; t11 = t[*j1 + *j1 * t_dim1]; /* Perform swap provisionally on diagonal block in D. */ _starpu_dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); _starpu_dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); /* Test whether to reject swap. */ /* Computing MAX */ d__2 = abs(d__[2]), d__3 = abs(d__[6]), d__2 = max(d__2,d__3), d__3 = (d__1 = d__[10] - t11, abs(d__1)); if (max(d__2,d__3) > thresh) { goto L50; } /* Accept swap: apply transformation to the entire matrix T. */ i__1 = *n - *j1 + 1; _starpu_dlarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + *j1 * t_dim1], ldt, & work[1]); _starpu_dlarfx_("R", &j2, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]); t[j3 + *j1 * t_dim1] = 0.; t[j3 + j2 * t_dim1] = 0.; t[j3 + j3 * t_dim1] = t11; if (*wantq) { /* Accumulate transformation in the matrix Q. */ _starpu_dlarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[ 1]); } goto L40; L20: /* N1 = 2, N2 = 1: generate elementary reflector H so that: */ /* H ( -X11 ) = ( * ) */ /* ( -X21 ) = ( 0 ) */ /* ( scale ) = ( 0 ) */ u[0] = -x[0]; u[1] = -x[1]; u[2] = scale; _starpu_dlarfg_(&c__3, u, &u[1], &c__1, &tau); u[0] = 1.; t33 = t[j3 + j3 * t_dim1]; /* Perform swap provisionally on diagonal block in D. */ _starpu_dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); _starpu_dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); /* Test whether to reject swap. */ /* Computing MAX */ d__2 = abs(d__[1]), d__3 = abs(d__[2]), d__2 = max(d__2,d__3), d__3 = (d__1 = d__[0] - t33, abs(d__1)); if (max(d__2,d__3) > thresh) { goto L50; } /* Accept swap: apply transformation to the entire matrix T. */ _starpu_dlarfx_("R", &j3, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]); i__1 = *n - *j1; _starpu_dlarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + j2 * t_dim1], ldt, &work[ 1]); t[*j1 + *j1 * t_dim1] = t33; t[j2 + *j1 * t_dim1] = 0.; t[j3 + *j1 * t_dim1] = 0.; if (*wantq) { /* Accumulate transformation in the matrix Q. */ _starpu_dlarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[ 1]); } goto L40; L30: /* N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so */ /* that: */ /* H(2) H(1) ( -X11 -X12 ) = ( * * ) */ /* ( -X21 -X22 ) ( 0 * ) */ /* ( scale 0 ) ( 0 0 ) */ /* ( 0 scale ) ( 0 0 ) */ u1[0] = -x[0]; u1[1] = -x[1]; u1[2] = scale; _starpu_dlarfg_(&c__3, u1, &u1[1], &c__1, &tau1); u1[0] = 1.; temp = -tau1 * (x[2] + u1[1] * x[3]); u2[0] = -temp * u1[1] - x[3]; u2[1] = -temp * u1[2]; u2[2] = scale; _starpu_dlarfg_(&c__3, u2, &u2[1], &c__1, &tau2); u2[0] = 1.; /* Perform swap provisionally on diagonal block in D. */ _starpu_dlarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1]) ; _starpu_dlarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1]) ; _starpu_dlarfx_("L", &c__3, &c__4, u2, &tau2, &d__[1], &c__4, &work[1]); _starpu_dlarfx_("R", &c__4, &c__3, u2, &tau2, &d__[4], &c__4, &work[1]); /* Test whether to reject swap. */ /* Computing MAX */ d__1 = abs(d__[2]), d__2 = abs(d__[6]), d__1 = max(d__1,d__2), d__2 = abs(d__[3]), d__1 = max(d__1,d__2), d__2 = abs(d__[7]); if (max(d__1,d__2) > thresh) { goto L50; } /* Accept swap: apply transformation to the entire matrix T. */ i__1 = *n - *j1 + 1; _starpu_dlarfx_("L", &c__3, &i__1, u1, &tau1, &t[*j1 + *j1 * t_dim1], ldt, & work[1]); _starpu_dlarfx_("R", &j4, &c__3, u1, &tau1, &t[*j1 * t_dim1 + 1], ldt, &work[ 1]); i__1 = *n - *j1 + 1; _starpu_dlarfx_("L", &c__3, &i__1, u2, &tau2, &t[j2 + *j1 * t_dim1], ldt, & work[1]); _starpu_dlarfx_("R", &j4, &c__3, u2, &tau2, &t[j2 * t_dim1 + 1], ldt, &work[1] ); t[j3 + *j1 * t_dim1] = 0.; t[j3 + j2 * t_dim1] = 0.; t[j4 + *j1 * t_dim1] = 0.; t[j4 + j2 * t_dim1] = 0.; if (*wantq) { /* Accumulate transformation in the matrix Q. */ _starpu_dlarfx_("R", n, &c__3, u1, &tau1, &q[*j1 * q_dim1 + 1], ldq, & work[1]); _starpu_dlarfx_("R", n, &c__3, u2, &tau2, &q[j2 * q_dim1 + 1], ldq, &work[ 1]); } L40: if (*n2 == 2) { /* Standardize new 2-by-2 block T11 */ _starpu_dlanv2_(&t[*j1 + *j1 * t_dim1], &t[*j1 + j2 * t_dim1], &t[j2 + * j1 * t_dim1], &t[j2 + j2 * t_dim1], &wr1, &wi1, &wr2, & wi2, &cs, &sn); i__1 = *n - *j1 - 1; _starpu_drot_(&i__1, &t[*j1 + (*j1 + 2) * t_dim1], ldt, &t[j2 + (*j1 + 2) * t_dim1], ldt, &cs, &sn); i__1 = *j1 - 1; _starpu_drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], & c__1, &cs, &sn); if (*wantq) { _starpu_drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], & c__1, &cs, &sn); } } if (*n1 == 2) { /* Standardize new 2-by-2 block T22 */ j3 = *j1 + *n2; j4 = j3 + 1; _starpu_dlanv2_(&t[j3 + j3 * t_dim1], &t[j3 + j4 * t_dim1], &t[j4 + j3 * t_dim1], &t[j4 + j4 * t_dim1], &wr1, &wi1, &wr2, &wi2, & cs, &sn); if (j3 + 2 <= *n) { i__1 = *n - j3 - 1; _starpu_drot_(&i__1, &t[j3 + (j3 + 2) * t_dim1], ldt, &t[j4 + (j3 + 2) * t_dim1], ldt, &cs, &sn); } i__1 = j3 - 1; _starpu_drot_(&i__1, &t[j3 * t_dim1 + 1], &c__1, &t[j4 * t_dim1 + 1], & c__1, &cs, &sn); if (*wantq) { _starpu_drot_(n, &q[j3 * q_dim1 + 1], &c__1, &q[j4 * q_dim1 + 1], & c__1, &cs, &sn); } } } return 0; /* Exit with INFO = 1 if swap was rejected. */ L50: *info = 1; return 0; /* End of DLAEXC */ } /* _starpu_dlaexc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlag2.c000066400000000000000000000263531507764646700203760ustar00rootroot00000000000000/* dlag2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlag2_(doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *safmin, doublereal *scale1, doublereal * scale2, doublereal *wr1, doublereal *wr2, doublereal *wi) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ doublereal r__, c1, c2, c3, c4, c5, s1, s2, a11, a12, a21, a22, b11, b12, b22, pp, qq, ss, as11, as12, as22, sum, abi22, diff, bmin, wbig, wabs, wdet, binv11, binv22, discr, anorm, bnorm, bsize, shift, rtmin, rtmax, wsize, ascale, bscale, wscale, safmax, wsmall; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue */ /* problem A - w B, with scaling as necessary to avoid over-/underflow. */ /* The scaling factor "s" results in a modified eigenvalue equation */ /* s A - w B */ /* where s is a non-negative scaling factor chosen so that w, w B, */ /* and s A do not overflow and, if possible, do not underflow, either. */ /* Arguments */ /* ========= */ /* A (input) DOUBLE PRECISION array, dimension (LDA, 2) */ /* On entry, the 2 x 2 matrix A. It is assumed that its 1-norm */ /* is less than 1/SAFMIN. Entries less than */ /* sqrt(SAFMIN)*norm(A) are subject to being treated as zero. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= 2. */ /* B (input) DOUBLE PRECISION array, dimension (LDB, 2) */ /* On entry, the 2 x 2 upper triangular matrix B. It is */ /* assumed that the one-norm of B is less than 1/SAFMIN. The */ /* diagonals should be at least sqrt(SAFMIN) times the largest */ /* element of B (in absolute value); if a diagonal is smaller */ /* than that, then +/- sqrt(SAFMIN) will be used instead of */ /* that diagonal. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= 2. */ /* SAFMIN (input) DOUBLE PRECISION */ /* The smallest positive number s.t. 1/SAFMIN does not */ /* overflow. (This should always be DLAMCH('S') -- it is an */ /* argument in order to avoid having to call DLAMCH frequently.) */ /* SCALE1 (output) DOUBLE PRECISION */ /* A scaling factor used to avoid over-/underflow in the */ /* eigenvalue equation which defines the first eigenvalue. If */ /* the eigenvalues are complex, then the eigenvalues are */ /* ( WR1 +/- WI i ) / SCALE1 (which may lie outside the */ /* exponent range of the machine), SCALE1=SCALE2, and SCALE1 */ /* will always be positive. If the eigenvalues are real, then */ /* the first (real) eigenvalue is WR1 / SCALE1 , but this may */ /* overflow or underflow, and in fact, SCALE1 may be zero or */ /* less than the underflow threshhold if the exact eigenvalue */ /* is sufficiently large. */ /* SCALE2 (output) DOUBLE PRECISION */ /* A scaling factor used to avoid over-/underflow in the */ /* eigenvalue equation which defines the second eigenvalue. If */ /* the eigenvalues are complex, then SCALE2=SCALE1. If the */ /* eigenvalues are real, then the second (real) eigenvalue is */ /* WR2 / SCALE2 , but this may overflow or underflow, and in */ /* fact, SCALE2 may be zero or less than the underflow */ /* threshhold if the exact eigenvalue is sufficiently large. */ /* WR1 (output) DOUBLE PRECISION */ /* If the eigenvalue is real, then WR1 is SCALE1 times the */ /* eigenvalue closest to the (2,2) element of A B**(-1). If the */ /* eigenvalue is complex, then WR1=WR2 is SCALE1 times the real */ /* part of the eigenvalues. */ /* WR2 (output) DOUBLE PRECISION */ /* If the eigenvalue is real, then WR2 is SCALE2 times the */ /* other eigenvalue. If the eigenvalue is complex, then */ /* WR1=WR2 is SCALE1 times the real part of the eigenvalues. */ /* WI (output) DOUBLE PRECISION */ /* If the eigenvalue is real, then WI is zero. If the */ /* eigenvalue is complex, then WI is SCALE1 times the imaginary */ /* part of the eigenvalues. WI will always be non-negative. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ rtmin = sqrt(*safmin); rtmax = 1. / rtmin; safmax = 1. / *safmin; /* Scale A */ /* Computing MAX */ d__5 = (d__1 = a[a_dim1 + 1], abs(d__1)) + (d__2 = a[a_dim1 + 2], abs( d__2)), d__6 = (d__3 = a[(a_dim1 << 1) + 1], abs(d__3)) + (d__4 = a[(a_dim1 << 1) + 2], abs(d__4)), d__5 = max(d__5,d__6); anorm = max(d__5,*safmin); ascale = 1. / anorm; a11 = ascale * a[a_dim1 + 1]; a21 = ascale * a[a_dim1 + 2]; a12 = ascale * a[(a_dim1 << 1) + 1]; a22 = ascale * a[(a_dim1 << 1) + 2]; /* Perturb B if necessary to insure non-singularity */ b11 = b[b_dim1 + 1]; b12 = b[(b_dim1 << 1) + 1]; b22 = b[(b_dim1 << 1) + 2]; /* Computing MAX */ d__1 = abs(b11), d__2 = abs(b12), d__1 = max(d__1,d__2), d__2 = abs(b22), d__1 = max(d__1,d__2); bmin = rtmin * max(d__1,rtmin); if (abs(b11) < bmin) { b11 = d_sign(&bmin, &b11); } if (abs(b22) < bmin) { b22 = d_sign(&bmin, &b22); } /* Scale B */ /* Computing MAX */ d__1 = abs(b11), d__2 = abs(b12) + abs(b22), d__1 = max(d__1,d__2); bnorm = max(d__1,*safmin); /* Computing MAX */ d__1 = abs(b11), d__2 = abs(b22); bsize = max(d__1,d__2); bscale = 1. / bsize; b11 *= bscale; b12 *= bscale; b22 *= bscale; /* Compute larger eigenvalue by method described by C. van Loan */ /* ( AS is A shifted by -SHIFT*B ) */ binv11 = 1. / b11; binv22 = 1. / b22; s1 = a11 * binv11; s2 = a22 * binv22; if (abs(s1) <= abs(s2)) { as12 = a12 - s1 * b12; as22 = a22 - s1 * b22; ss = a21 * (binv11 * binv22); abi22 = as22 * binv22 - ss * b12; pp = abi22 * .5; shift = s1; } else { as12 = a12 - s2 * b12; as11 = a11 - s2 * b11; ss = a21 * (binv11 * binv22); abi22 = -ss * b12; pp = (as11 * binv11 + abi22) * .5; shift = s2; } qq = ss * as12; if ((d__1 = pp * rtmin, abs(d__1)) >= 1.) { /* Computing 2nd power */ d__1 = rtmin * pp; discr = d__1 * d__1 + qq * *safmin; r__ = sqrt((abs(discr))) * rtmax; } else { /* Computing 2nd power */ d__1 = pp; if (d__1 * d__1 + abs(qq) <= *safmin) { /* Computing 2nd power */ d__1 = rtmax * pp; discr = d__1 * d__1 + qq * safmax; r__ = sqrt((abs(discr))) * rtmin; } else { /* Computing 2nd power */ d__1 = pp; discr = d__1 * d__1 + qq; r__ = sqrt((abs(discr))); } } /* Note: the test of R in the following IF is to cover the case when */ /* DISCR is small and negative and is flushed to zero during */ /* the calculation of R. On machines which have a consistent */ /* flush-to-zero threshhold and handle numbers above that */ /* threshhold correctly, it would not be necessary. */ if (discr >= 0. || r__ == 0.) { sum = pp + d_sign(&r__, &pp); diff = pp - d_sign(&r__, &pp); wbig = shift + sum; /* Compute smaller eigenvalue */ wsmall = shift + diff; /* Computing MAX */ d__1 = abs(wsmall); if (abs(wbig) * .5 > max(d__1,*safmin)) { wdet = (a11 * a22 - a12 * a21) * (binv11 * binv22); wsmall = wdet / wbig; } /* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1) */ /* for WR1. */ if (pp > abi22) { *wr1 = min(wbig,wsmall); *wr2 = max(wbig,wsmall); } else { *wr1 = max(wbig,wsmall); *wr2 = min(wbig,wsmall); } *wi = 0.; } else { /* Complex eigenvalues */ *wr1 = shift + pp; *wr2 = *wr1; *wi = r__; } /* Further scaling to avoid underflow and overflow in computing */ /* SCALE1 and overflow in computing w*B. */ /* This scale factor (WSCALE) is bounded from above using C1 and C2, */ /* and from below using C3 and C4. */ /* C1 implements the condition s A must never overflow. */ /* C2 implements the condition w B must never overflow. */ /* C3, with C2, */ /* implement the condition that s A - w B must never overflow. */ /* C4 implements the condition s should not underflow. */ /* C5 implements the condition max(s,|w|) should be at least 2. */ c1 = bsize * (*safmin * max(1.,ascale)); c2 = *safmin * max(1.,bnorm); c3 = bsize * *safmin; if (ascale <= 1. && bsize <= 1.) { /* Computing MIN */ d__1 = 1., d__2 = ascale / *safmin * bsize; c4 = min(d__1,d__2); } else { c4 = 1.; } if (ascale <= 1. || bsize <= 1.) { /* Computing MIN */ d__1 = 1., d__2 = ascale * bsize; c5 = min(d__1,d__2); } else { c5 = 1.; } /* Scale first eigenvalue */ wabs = abs(*wr1) + abs(*wi); /* Computing MAX */ /* Computing MIN */ d__3 = c4, d__4 = max(wabs,c5) * .5; d__1 = max(*safmin,c1), d__2 = (wabs * c2 + c3) * 1.0000100000000001, d__1 = max(d__1,d__2), d__2 = min(d__3,d__4); wsize = max(d__1,d__2); if (wsize != 1.) { wscale = 1. / wsize; if (wsize > 1.) { *scale1 = max(ascale,bsize) * wscale * min(ascale,bsize); } else { *scale1 = min(ascale,bsize) * wscale * max(ascale,bsize); } *wr1 *= wscale; if (*wi != 0.) { *wi *= wscale; *wr2 = *wr1; *scale2 = *scale1; } } else { *scale1 = ascale * bsize; *scale2 = *scale1; } /* Scale second eigenvalue (if real) */ if (*wi == 0.) { /* Computing MAX */ /* Computing MIN */ /* Computing MAX */ d__5 = abs(*wr2); d__3 = c4, d__4 = max(d__5,c5) * .5; d__1 = max(*safmin,c1), d__2 = (abs(*wr2) * c2 + c3) * 1.0000100000000001, d__1 = max(d__1,d__2), d__2 = min(d__3, d__4); wsize = max(d__1,d__2); if (wsize != 1.) { wscale = 1. / wsize; if (wsize > 1.) { *scale2 = max(ascale,bsize) * wscale * min(ascale,bsize); } else { *scale2 = min(ascale,bsize) * wscale * max(ascale,bsize); } *wr2 *= wscale; } else { *scale2 = ascale * bsize; } } /* End of DLAG2 */ return 0; } /* _starpu_dlag2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlag2s.c000066400000000000000000000063011507764646700205500ustar00rootroot00000000000000/* dlag2s.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlag2s_(integer *m, integer *n, doublereal *a, integer * lda, real *sa, integer *ldsa, integer *info) { /* System generated locals */ integer sa_dim1, sa_offset, a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j; doublereal rmax; extern doublereal _starpu_slamch_(char *); /* -- LAPACK PROTOTYPE auxiliary routine (version 3.1.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* August 2007 */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAG2S converts a DOUBLE PRECISION matrix, SA, to a SINGLE */ /* PRECISION matrix, A. */ /* RMAX is the overflow for the SINGLE PRECISION arithmetic */ /* DLAG2S checks that all the entries of A are between -RMAX and */ /* RMAX. If not the convertion is aborted and a flag is raised. */ /* This is an auxiliary routine so there is no argument checking. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of lines of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N coefficient matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* SA (output) REAL array, dimension (LDSA,N) */ /* On exit, if INFO=0, the M-by-N coefficient matrix SA; if */ /* INFO>0, the content of SA is unspecified. */ /* LDSA (input) INTEGER */ /* The leading dimension of the array SA. LDSA >= max(1,M). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* = 1: an entry of the matrix A is greater than the SINGLE */ /* PRECISION overflow threshold, in this case, the content */ /* of SA in exit is unspecified. */ /* ========= */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; sa_dim1 = *ldsa; sa_offset = 1 + sa_dim1; sa -= sa_offset; /* Function Body */ rmax = _starpu_slamch_("O"); i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (a[i__ + j * a_dim1] < -rmax || a[i__ + j * a_dim1] > rmax) { *info = 1; goto L30; } sa[i__ + j * sa_dim1] = a[i__ + j * a_dim1]; /* L10: */ } /* L20: */ } *info = 0; L30: return 0; /* End of DLAG2S */ } /* _starpu_dlag2s_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlags2.c000066400000000000000000000201621507764646700205510ustar00rootroot00000000000000/* dlags2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlags2_(logical *upper, doublereal *a1, doublereal *a2, doublereal *a3, doublereal *b1, doublereal *b2, doublereal *b3, doublereal *csu, doublereal *snu, doublereal *csv, doublereal *snv, doublereal *csq, doublereal *snq) { /* System generated locals */ doublereal d__1; /* Local variables */ doublereal a, b, c__, d__, r__, s1, s2, ua11, ua12, ua21, ua22, vb11, vb12, vb21, vb22, csl, csr, snl, snr, aua11, aua12, aua21, aua22, avb11, avb12, avb21, avb22, ua11r, ua22r, vb11r, vb22r; extern /* Subroutine */ int _starpu_dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAGS2 computes 2-by-2 orthogonal matrices U, V and Q, such */ /* that if ( UPPER ) then */ /* U'*A*Q = U'*( A1 A2 )*Q = ( x 0 ) */ /* ( 0 A3 ) ( x x ) */ /* and */ /* V'*B*Q = V'*( B1 B2 )*Q = ( x 0 ) */ /* ( 0 B3 ) ( x x ) */ /* or if ( .NOT.UPPER ) then */ /* U'*A*Q = U'*( A1 0 )*Q = ( x x ) */ /* ( A2 A3 ) ( 0 x ) */ /* and */ /* V'*B*Q = V'*( B1 0 )*Q = ( x x ) */ /* ( B2 B3 ) ( 0 x ) */ /* The rows of the transformed A and B are parallel, where */ /* U = ( CSU SNU ), V = ( CSV SNV ), Q = ( CSQ SNQ ) */ /* ( -SNU CSU ) ( -SNV CSV ) ( -SNQ CSQ ) */ /* Z' denotes the transpose of Z. */ /* Arguments */ /* ========= */ /* UPPER (input) LOGICAL */ /* = .TRUE.: the input matrices A and B are upper triangular. */ /* = .FALSE.: the input matrices A and B are lower triangular. */ /* A1 (input) DOUBLE PRECISION */ /* A2 (input) DOUBLE PRECISION */ /* A3 (input) DOUBLE PRECISION */ /* On entry, A1, A2 and A3 are elements of the input 2-by-2 */ /* upper (lower) triangular matrix A. */ /* B1 (input) DOUBLE PRECISION */ /* B2 (input) DOUBLE PRECISION */ /* B3 (input) DOUBLE PRECISION */ /* On entry, B1, B2 and B3 are elements of the input 2-by-2 */ /* upper (lower) triangular matrix B. */ /* CSU (output) DOUBLE PRECISION */ /* SNU (output) DOUBLE PRECISION */ /* The desired orthogonal matrix U. */ /* CSV (output) DOUBLE PRECISION */ /* SNV (output) DOUBLE PRECISION */ /* The desired orthogonal matrix V. */ /* CSQ (output) DOUBLE PRECISION */ /* SNQ (output) DOUBLE PRECISION */ /* The desired orthogonal matrix Q. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ if (*upper) { /* Input matrices A and B are upper triangular matrices */ /* Form matrix C = A*adj(B) = ( a b ) */ /* ( 0 d ) */ a = *a1 * *b3; d__ = *a3 * *b1; b = *a2 * *b1 - *a1 * *b2; /* The SVD of real 2-by-2 triangular C */ /* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) */ /* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) */ _starpu_dlasv2_(&a, &b, &d__, &s1, &s2, &snr, &csr, &snl, &csl); if (abs(csl) >= abs(snl) || abs(csr) >= abs(snr)) { /* Compute the (1,1) and (1,2) elements of U'*A and V'*B, */ /* and (1,2) element of |U|'*|A| and |V|'*|B|. */ ua11r = csl * *a1; ua12 = csl * *a2 + snl * *a3; vb11r = csr * *b1; vb12 = csr * *b2 + snr * *b3; aua12 = abs(csl) * abs(*a2) + abs(snl) * abs(*a3); avb12 = abs(csr) * abs(*b2) + abs(snr) * abs(*b3); /* zero (1,2) elements of U'*A and V'*B */ if (abs(ua11r) + abs(ua12) != 0.) { if (aua12 / (abs(ua11r) + abs(ua12)) <= avb12 / (abs(vb11r) + abs(vb12))) { d__1 = -ua11r; _starpu_dlartg_(&d__1, &ua12, csq, snq, &r__); } else { d__1 = -vb11r; _starpu_dlartg_(&d__1, &vb12, csq, snq, &r__); } } else { d__1 = -vb11r; _starpu_dlartg_(&d__1, &vb12, csq, snq, &r__); } *csu = csl; *snu = -snl; *csv = csr; *snv = -snr; } else { /* Compute the (2,1) and (2,2) elements of U'*A and V'*B, */ /* and (2,2) element of |U|'*|A| and |V|'*|B|. */ ua21 = -snl * *a1; ua22 = -snl * *a2 + csl * *a3; vb21 = -snr * *b1; vb22 = -snr * *b2 + csr * *b3; aua22 = abs(snl) * abs(*a2) + abs(csl) * abs(*a3); avb22 = abs(snr) * abs(*b2) + abs(csr) * abs(*b3); /* zero (2,2) elements of U'*A and V'*B, and then swap. */ if (abs(ua21) + abs(ua22) != 0.) { if (aua22 / (abs(ua21) + abs(ua22)) <= avb22 / (abs(vb21) + abs(vb22))) { d__1 = -ua21; _starpu_dlartg_(&d__1, &ua22, csq, snq, &r__); } else { d__1 = -vb21; _starpu_dlartg_(&d__1, &vb22, csq, snq, &r__); } } else { d__1 = -vb21; _starpu_dlartg_(&d__1, &vb22, csq, snq, &r__); } *csu = snl; *snu = csl; *csv = snr; *snv = csr; } } else { /* Input matrices A and B are lower triangular matrices */ /* Form matrix C = A*adj(B) = ( a 0 ) */ /* ( c d ) */ a = *a1 * *b3; d__ = *a3 * *b1; c__ = *a2 * *b3 - *a3 * *b2; /* The SVD of real 2-by-2 triangular C */ /* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) */ /* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) */ _starpu_dlasv2_(&a, &c__, &d__, &s1, &s2, &snr, &csr, &snl, &csl); if (abs(csr) >= abs(snr) || abs(csl) >= abs(snl)) { /* Compute the (2,1) and (2,2) elements of U'*A and V'*B, */ /* and (2,1) element of |U|'*|A| and |V|'*|B|. */ ua21 = -snr * *a1 + csr * *a2; ua22r = csr * *a3; vb21 = -snl * *b1 + csl * *b2; vb22r = csl * *b3; aua21 = abs(snr) * abs(*a1) + abs(csr) * abs(*a2); avb21 = abs(snl) * abs(*b1) + abs(csl) * abs(*b2); /* zero (2,1) elements of U'*A and V'*B. */ if (abs(ua21) + abs(ua22r) != 0.) { if (aua21 / (abs(ua21) + abs(ua22r)) <= avb21 / (abs(vb21) + abs(vb22r))) { _starpu_dlartg_(&ua22r, &ua21, csq, snq, &r__); } else { _starpu_dlartg_(&vb22r, &vb21, csq, snq, &r__); } } else { _starpu_dlartg_(&vb22r, &vb21, csq, snq, &r__); } *csu = csr; *snu = -snr; *csv = csl; *snv = -snl; } else { /* Compute the (1,1) and (1,2) elements of U'*A and V'*B, */ /* and (1,1) element of |U|'*|A| and |V|'*|B|. */ ua11 = csr * *a1 + snr * *a2; ua12 = snr * *a3; vb11 = csl * *b1 + snl * *b2; vb12 = snl * *b3; aua11 = abs(csr) * abs(*a1) + abs(snr) * abs(*a2); avb11 = abs(csl) * abs(*b1) + abs(snl) * abs(*b2); /* zero (1,1) elements of U'*A and V'*B, and then swap. */ if (abs(ua11) + abs(ua12) != 0.) { if (aua11 / (abs(ua11) + abs(ua12)) <= avb11 / (abs(vb11) + abs(vb12))) { _starpu_dlartg_(&ua12, &ua11, csq, snq, &r__); } else { _starpu_dlartg_(&vb12, &vb11, csq, snq, &r__); } } else { _starpu_dlartg_(&vb12, &vb11, csq, snq, &r__); } *csu = snr; *snu = csr; *csv = snl; *snv = csl; } } return 0; /* End of DLAGS2 */ } /* _starpu_dlags2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlagtf.c000066400000000000000000000150451507764646700206420ustar00rootroot00000000000000/* dlagtf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlagtf_(integer *n, doublereal *a, doublereal *lambda, doublereal *b, doublereal *c__, doublereal *tol, doublereal *d__, integer *in, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Local variables */ integer k; doublereal tl, eps, piv1, piv2, temp, mult, scale1, scale2; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAGTF factorizes the matrix (T - lambda*I), where T is an n by n */ /* tridiagonal matrix and lambda is a scalar, as */ /* T - lambda*I = PLU, */ /* where P is a permutation matrix, L is a unit lower tridiagonal matrix */ /* with at most one non-zero sub-diagonal elements per column and U is */ /* an upper triangular matrix with at most two non-zero super-diagonal */ /* elements per column. */ /* The factorization is obtained by Gaussian elimination with partial */ /* pivoting and implicit row scaling. */ /* The parameter LAMBDA is included in the routine so that DLAGTF may */ /* be used, in conjunction with DLAGTS, to obtain eigenvectors of T by */ /* inverse iteration. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix T. */ /* A (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, A must contain the diagonal elements of T. */ /* On exit, A is overwritten by the n diagonal elements of the */ /* upper triangular matrix U of the factorization of T. */ /* LAMBDA (input) DOUBLE PRECISION */ /* On entry, the scalar lambda. */ /* B (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, B must contain the (n-1) super-diagonal elements of */ /* T. */ /* On exit, B is overwritten by the (n-1) super-diagonal */ /* elements of the matrix U of the factorization of T. */ /* C (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, C must contain the (n-1) sub-diagonal elements of */ /* T. */ /* On exit, C is overwritten by the (n-1) sub-diagonal elements */ /* of the matrix L of the factorization of T. */ /* TOL (input) DOUBLE PRECISION */ /* On entry, a relative tolerance used to indicate whether or */ /* not the matrix (T - lambda*I) is nearly singular. TOL should */ /* normally be chose as approximately the largest relative error */ /* in the elements of T. For example, if the elements of T are */ /* correct to about 4 significant figures, then TOL should be */ /* set to about 5*10**(-4). If TOL is supplied as less than eps, */ /* where eps is the relative machine precision, then the value */ /* eps is used in place of TOL. */ /* D (output) DOUBLE PRECISION array, dimension (N-2) */ /* On exit, D is overwritten by the (n-2) second super-diagonal */ /* elements of the matrix U of the factorization of T. */ /* IN (output) INTEGER array, dimension (N) */ /* On exit, IN contains details of the permutation matrix P. If */ /* an interchange occurred at the kth step of the elimination, */ /* then IN(k) = 1, otherwise IN(k) = 0. The element IN(n) */ /* returns the smallest positive integer j such that */ /* abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL, */ /* where norm( A(j) ) denotes the sum of the absolute values of */ /* the jth row of the matrix A. If no such j exists then IN(n) */ /* is returned as zero. If IN(n) is returned as positive, then a */ /* diagonal element of U is small, indicating that */ /* (T - lambda*I) is singular or nearly singular, */ /* INFO (output) INTEGER */ /* = 0 : successful exit */ /* .lt. 0: if INFO = -k, the kth argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --in; --d__; --c__; --b; --a; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; i__1 = -(*info); _starpu_xerbla_("DLAGTF", &i__1); return 0; } if (*n == 0) { return 0; } a[1] -= *lambda; in[*n] = 0; if (*n == 1) { if (a[1] == 0.) { in[1] = 1; } return 0; } eps = _starpu_dlamch_("Epsilon"); tl = max(*tol,eps); scale1 = abs(a[1]) + abs(b[1]); i__1 = *n - 1; for (k = 1; k <= i__1; ++k) { a[k + 1] -= *lambda; scale2 = (d__1 = c__[k], abs(d__1)) + (d__2 = a[k + 1], abs(d__2)); if (k < *n - 1) { scale2 += (d__1 = b[k + 1], abs(d__1)); } if (a[k] == 0.) { piv1 = 0.; } else { piv1 = (d__1 = a[k], abs(d__1)) / scale1; } if (c__[k] == 0.) { in[k] = 0; piv2 = 0.; scale1 = scale2; if (k < *n - 1) { d__[k] = 0.; } } else { piv2 = (d__1 = c__[k], abs(d__1)) / scale2; if (piv2 <= piv1) { in[k] = 0; scale1 = scale2; c__[k] /= a[k]; a[k + 1] -= c__[k] * b[k]; if (k < *n - 1) { d__[k] = 0.; } } else { in[k] = 1; mult = a[k] / c__[k]; a[k] = c__[k]; temp = a[k + 1]; a[k + 1] = b[k] - mult * temp; if (k < *n - 1) { d__[k] = b[k + 1]; b[k + 1] = -mult * d__[k]; } b[k] = temp; c__[k] = mult; } } if (max(piv1,piv2) <= tl && in[*n] == 0) { in[*n] = k; } /* L10: */ } if ((d__1 = a[*n], abs(d__1)) <= scale1 * tl && in[*n] == 0) { in[*n] = *n; } return 0; /* End of DLAGTF */ } /* _starpu_dlagtf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlagtm.c000066400000000000000000000157661507764646700206630ustar00rootroot00000000000000/* dlagtm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlagtm_(char *trans, integer *n, integer *nrhs, doublereal *alpha, doublereal *dl, doublereal *d__, doublereal *du, doublereal *x, integer *ldx, doublereal *beta, doublereal *b, integer *ldb) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; /* Local variables */ integer i__, j; extern logical _starpu_lsame_(char *, char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAGTM performs a matrix-vector product of the form */ /* B := alpha * A * X + beta * B */ /* where A is a tridiagonal matrix of order N, B and X are N by NRHS */ /* matrices, and alpha and beta are real scalars, each of which may be */ /* 0., 1., or -1. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the operation applied to A. */ /* = 'N': No transpose, B := alpha * A * X + beta * B */ /* = 'T': Transpose, B := alpha * A'* X + beta * B */ /* = 'C': Conjugate transpose = Transpose */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices X and B. */ /* ALPHA (input) DOUBLE PRECISION */ /* The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, */ /* it is assumed to be 0. */ /* DL (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) sub-diagonal elements of T. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The diagonal elements of T. */ /* DU (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) super-diagonal elements of T. */ /* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* The N by NRHS matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(N,1). */ /* BETA (input) DOUBLE PRECISION */ /* The scalar beta. BETA must be 0., 1., or -1.; otherwise, */ /* it is assumed to be 1. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N by NRHS matrix B. */ /* On exit, B is overwritten by the matrix expression */ /* B := alpha * A * X + beta * B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(N,1). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --dl; --d__; --du; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ if (*n == 0) { return 0; } /* Multiply B by BETA if BETA.NE.1. */ if (*beta == 0.) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L10: */ } /* L20: */ } } else if (*beta == -1.) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = -b[i__ + j * b_dim1]; /* L30: */ } /* L40: */ } } if (*alpha == 1.) { if (_starpu_lsame_(trans, "N")) { /* Compute B := B + A*X */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (*n == 1) { b[j * b_dim1 + 1] += d__[1] * x[j * x_dim1 + 1]; } else { b[j * b_dim1 + 1] = b[j * b_dim1 + 1] + d__[1] * x[j * x_dim1 + 1] + du[1] * x[j * x_dim1 + 2]; b[*n + j * b_dim1] = b[*n + j * b_dim1] + dl[*n - 1] * x[* n - 1 + j * x_dim1] + d__[*n] * x[*n + j * x_dim1] ; i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = b[i__ + j * b_dim1] + dl[i__ - 1] * x[i__ - 1 + j * x_dim1] + d__[i__] * x[ i__ + j * x_dim1] + du[i__] * x[i__ + 1 + j * x_dim1]; /* L50: */ } } /* L60: */ } } else { /* Compute B := B + A'*X */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (*n == 1) { b[j * b_dim1 + 1] += d__[1] * x[j * x_dim1 + 1]; } else { b[j * b_dim1 + 1] = b[j * b_dim1 + 1] + d__[1] * x[j * x_dim1 + 1] + dl[1] * x[j * x_dim1 + 2]; b[*n + j * b_dim1] = b[*n + j * b_dim1] + du[*n - 1] * x[* n - 1 + j * x_dim1] + d__[*n] * x[*n + j * x_dim1] ; i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = b[i__ + j * b_dim1] + du[i__ - 1] * x[i__ - 1 + j * x_dim1] + d__[i__] * x[ i__ + j * x_dim1] + dl[i__] * x[i__ + 1 + j * x_dim1]; /* L70: */ } } /* L80: */ } } } else if (*alpha == -1.) { if (_starpu_lsame_(trans, "N")) { /* Compute B := B - A*X */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (*n == 1) { b[j * b_dim1 + 1] -= d__[1] * x[j * x_dim1 + 1]; } else { b[j * b_dim1 + 1] = b[j * b_dim1 + 1] - d__[1] * x[j * x_dim1 + 1] - du[1] * x[j * x_dim1 + 2]; b[*n + j * b_dim1] = b[*n + j * b_dim1] - dl[*n - 1] * x[* n - 1 + j * x_dim1] - d__[*n] * x[*n + j * x_dim1] ; i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = b[i__ + j * b_dim1] - dl[i__ - 1] * x[i__ - 1 + j * x_dim1] - d__[i__] * x[ i__ + j * x_dim1] - du[i__] * x[i__ + 1 + j * x_dim1]; /* L90: */ } } /* L100: */ } } else { /* Compute B := B - A'*X */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (*n == 1) { b[j * b_dim1 + 1] -= d__[1] * x[j * x_dim1 + 1]; } else { b[j * b_dim1 + 1] = b[j * b_dim1 + 1] - d__[1] * x[j * x_dim1 + 1] - dl[1] * x[j * x_dim1 + 2]; b[*n + j * b_dim1] = b[*n + j * b_dim1] - du[*n - 1] * x[* n - 1 + j * x_dim1] - d__[*n] * x[*n + j * x_dim1] ; i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = b[i__ + j * b_dim1] - du[i__ - 1] * x[i__ - 1 + j * x_dim1] - d__[i__] * x[ i__ + j * x_dim1] - dl[i__] * x[i__ + 1 + j * x_dim1]; /* L110: */ } } /* L120: */ } } } return 0; /* End of DLAGTM */ } /* _starpu_dlagtm_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlagts.c000066400000000000000000000226061507764646700206600ustar00rootroot00000000000000/* dlagts.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlagts_(integer *job, integer *n, doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, integer *in, doublereal *y, doublereal *tol, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double d_sign(doublereal *, doublereal *); /* Local variables */ integer k; doublereal ak, eps, temp, pert, absak, sfmin; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAGTS may be used to solve one of the systems of equations */ /* (T - lambda*I)*x = y or (T - lambda*I)'*x = y, */ /* where T is an n by n tridiagonal matrix, for x, following the */ /* factorization of (T - lambda*I) as */ /* (T - lambda*I) = P*L*U , */ /* by routine DLAGTF. The choice of equation to be solved is */ /* controlled by the argument JOB, and in each case there is an option */ /* to perturb zero or very small diagonal elements of U, this option */ /* being intended for use in applications such as inverse iteration. */ /* Arguments */ /* ========= */ /* JOB (input) INTEGER */ /* Specifies the job to be performed by DLAGTS as follows: */ /* = 1: The equations (T - lambda*I)x = y are to be solved, */ /* but diagonal elements of U are not to be perturbed. */ /* = -1: The equations (T - lambda*I)x = y are to be solved */ /* and, if overflow would otherwise occur, the diagonal */ /* elements of U are to be perturbed. See argument TOL */ /* below. */ /* = 2: The equations (T - lambda*I)'x = y are to be solved, */ /* but diagonal elements of U are not to be perturbed. */ /* = -2: The equations (T - lambda*I)'x = y are to be solved */ /* and, if overflow would otherwise occur, the diagonal */ /* elements of U are to be perturbed. See argument TOL */ /* below. */ /* N (input) INTEGER */ /* The order of the matrix T. */ /* A (input) DOUBLE PRECISION array, dimension (N) */ /* On entry, A must contain the diagonal elements of U as */ /* returned from DLAGTF. */ /* B (input) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, B must contain the first super-diagonal elements of */ /* U as returned from DLAGTF. */ /* C (input) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, C must contain the sub-diagonal elements of L as */ /* returned from DLAGTF. */ /* D (input) DOUBLE PRECISION array, dimension (N-2) */ /* On entry, D must contain the second super-diagonal elements */ /* of U as returned from DLAGTF. */ /* IN (input) INTEGER array, dimension (N) */ /* On entry, IN must contain details of the matrix P as returned */ /* from DLAGTF. */ /* Y (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the right hand side vector y. */ /* On exit, Y is overwritten by the solution vector x. */ /* TOL (input/output) DOUBLE PRECISION */ /* On entry, with JOB .lt. 0, TOL should be the minimum */ /* perturbation to be made to very small diagonal elements of U. */ /* TOL should normally be chosen as about eps*norm(U), where eps */ /* is the relative machine precision, but if TOL is supplied as */ /* non-positive, then it is reset to eps*max( abs( u(i,j) ) ). */ /* If JOB .gt. 0 then TOL is not referenced. */ /* On exit, TOL is changed as described above, only if TOL is */ /* non-positive on entry. Otherwise TOL is unchanged. */ /* INFO (output) INTEGER */ /* = 0 : successful exit */ /* .lt. 0: if INFO = -i, the i-th argument had an illegal value */ /* .gt. 0: overflow would occur when computing the INFO(th) */ /* element of the solution vector x. This can only occur */ /* when JOB is supplied as positive and either means */ /* that a diagonal element of U is very small, or that */ /* the elements of the right-hand side vector y are very */ /* large. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --y; --in; --d__; --c__; --b; --a; /* Function Body */ *info = 0; if (abs(*job) > 2 || *job == 0) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAGTS", &i__1); return 0; } if (*n == 0) { return 0; } eps = _starpu_dlamch_("Epsilon"); sfmin = _starpu_dlamch_("Safe minimum"); bignum = 1. / sfmin; if (*job < 0) { if (*tol <= 0.) { *tol = abs(a[1]); if (*n > 1) { /* Computing MAX */ d__1 = *tol, d__2 = abs(a[2]), d__1 = max(d__1,d__2), d__2 = abs(b[1]); *tol = max(d__1,d__2); } i__1 = *n; for (k = 3; k <= i__1; ++k) { /* Computing MAX */ d__4 = *tol, d__5 = (d__1 = a[k], abs(d__1)), d__4 = max(d__4, d__5), d__5 = (d__2 = b[k - 1], abs(d__2)), d__4 = max(d__4,d__5), d__5 = (d__3 = d__[k - 2], abs(d__3)); *tol = max(d__4,d__5); /* L10: */ } *tol *= eps; if (*tol == 0.) { *tol = eps; } } } if (abs(*job) == 1) { i__1 = *n; for (k = 2; k <= i__1; ++k) { if (in[k - 1] == 0) { y[k] -= c__[k - 1] * y[k - 1]; } else { temp = y[k - 1]; y[k - 1] = y[k]; y[k] = temp - c__[k - 1] * y[k]; } /* L20: */ } if (*job == 1) { for (k = *n; k >= 1; --k) { if (k <= *n - 2) { temp = y[k] - b[k] * y[k + 1] - d__[k] * y[k + 2]; } else if (k == *n - 1) { temp = y[k] - b[k] * y[k + 1]; } else { temp = y[k]; } ak = a[k]; absak = abs(ak); if (absak < 1.) { if (absak < sfmin) { if (absak == 0. || abs(temp) * sfmin > absak) { *info = k; return 0; } else { temp *= bignum; ak *= bignum; } } else if (abs(temp) > absak * bignum) { *info = k; return 0; } } y[k] = temp / ak; /* L30: */ } } else { for (k = *n; k >= 1; --k) { if (k <= *n - 2) { temp = y[k] - b[k] * y[k + 1] - d__[k] * y[k + 2]; } else if (k == *n - 1) { temp = y[k] - b[k] * y[k + 1]; } else { temp = y[k]; } ak = a[k]; pert = d_sign(tol, &ak); L40: absak = abs(ak); if (absak < 1.) { if (absak < sfmin) { if (absak == 0. || abs(temp) * sfmin > absak) { ak += pert; pert *= 2; goto L40; } else { temp *= bignum; ak *= bignum; } } else if (abs(temp) > absak * bignum) { ak += pert; pert *= 2; goto L40; } } y[k] = temp / ak; /* L50: */ } } } else { /* Come to here if JOB = 2 or -2 */ if (*job == 2) { i__1 = *n; for (k = 1; k <= i__1; ++k) { if (k >= 3) { temp = y[k] - b[k - 1] * y[k - 1] - d__[k - 2] * y[k - 2]; } else if (k == 2) { temp = y[k] - b[k - 1] * y[k - 1]; } else { temp = y[k]; } ak = a[k]; absak = abs(ak); if (absak < 1.) { if (absak < sfmin) { if (absak == 0. || abs(temp) * sfmin > absak) { *info = k; return 0; } else { temp *= bignum; ak *= bignum; } } else if (abs(temp) > absak * bignum) { *info = k; return 0; } } y[k] = temp / ak; /* L60: */ } } else { i__1 = *n; for (k = 1; k <= i__1; ++k) { if (k >= 3) { temp = y[k] - b[k - 1] * y[k - 1] - d__[k - 2] * y[k - 2]; } else if (k == 2) { temp = y[k] - b[k - 1] * y[k - 1]; } else { temp = y[k]; } ak = a[k]; pert = d_sign(tol, &ak); L70: absak = abs(ak); if (absak < 1.) { if (absak < sfmin) { if (absak == 0. || abs(temp) * sfmin > absak) { ak += pert; pert *= 2; goto L70; } else { temp *= bignum; ak *= bignum; } } else if (abs(temp) > absak * bignum) { ak += pert; pert *= 2; goto L70; } } y[k] = temp / ak; /* L80: */ } } for (k = *n; k >= 2; --k) { if (in[k - 1] == 0) { y[k - 1] -= c__[k - 1] * y[k]; } else { temp = y[k - 1]; y[k - 1] = y[k]; y[k] = temp - c__[k - 1] * y[k]; } /* L90: */ } } /* End of DLAGTS */ return 0; } /* _starpu_dlagts_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlagv2.c000066400000000000000000000256511507764646700205640ustar00rootroot00000000000000/* dlagv2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__2 = 2; static integer c__1 = 1; /* Subroutine */ int _starpu_dlagv2_(doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *csl, doublereal *snl, doublereal *csr, doublereal * snr) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Local variables */ doublereal r__, t, h1, h2, h3, wi, qq, rr, wr1, wr2, ulp; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), _starpu_dlag2_( doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal anorm, bnorm, scale1, scale2; extern /* Subroutine */ int _starpu_dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); doublereal ascale, bscale; extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAGV2 computes the Generalized Schur factorization of a real 2-by-2 */ /* matrix pencil (A,B) where B is upper triangular. This routine */ /* computes orthogonal (rotation) matrices given by CSL, SNL and CSR, */ /* SNR such that */ /* 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 */ /* types), then */ /* [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] */ /* [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] */ /* [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] */ /* [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], */ /* 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, */ /* then */ /* [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] */ /* [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] */ /* [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] */ /* [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] */ /* where b11 >= b22 > 0. */ /* Arguments */ /* ========= */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, 2) */ /* On entry, the 2 x 2 matrix A. */ /* On exit, A is overwritten by the ``A-part'' of the */ /* generalized Schur form. */ /* LDA (input) INTEGER */ /* THe leading dimension of the array A. LDA >= 2. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, 2) */ /* On entry, the upper triangular 2 x 2 matrix B. */ /* On exit, B is overwritten by the ``B-part'' of the */ /* generalized Schur form. */ /* LDB (input) INTEGER */ /* THe leading dimension of the array B. LDB >= 2. */ /* ALPHAR (output) DOUBLE PRECISION array, dimension (2) */ /* ALPHAI (output) DOUBLE PRECISION array, dimension (2) */ /* BETA (output) DOUBLE PRECISION array, dimension (2) */ /* (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the */ /* pencil (A,B), k=1,2, i = sqrt(-1). Note that BETA(k) may */ /* be zero. */ /* CSL (output) DOUBLE PRECISION */ /* The cosine of the left rotation matrix. */ /* SNL (output) DOUBLE PRECISION */ /* The sine of the left rotation matrix. */ /* CSR (output) DOUBLE PRECISION */ /* The cosine of the right rotation matrix. */ /* SNR (output) DOUBLE PRECISION */ /* The sine of the right rotation matrix. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alphar; --alphai; --beta; /* Function Body */ safmin = _starpu_dlamch_("S"); ulp = _starpu_dlamch_("P"); /* Scale A */ /* Computing MAX */ d__5 = (d__1 = a[a_dim1 + 1], abs(d__1)) + (d__2 = a[a_dim1 + 2], abs( d__2)), d__6 = (d__3 = a[(a_dim1 << 1) + 1], abs(d__3)) + (d__4 = a[(a_dim1 << 1) + 2], abs(d__4)), d__5 = max(d__5,d__6); anorm = max(d__5,safmin); ascale = 1. / anorm; a[a_dim1 + 1] = ascale * a[a_dim1 + 1]; a[(a_dim1 << 1) + 1] = ascale * a[(a_dim1 << 1) + 1]; a[a_dim1 + 2] = ascale * a[a_dim1 + 2]; a[(a_dim1 << 1) + 2] = ascale * a[(a_dim1 << 1) + 2]; /* Scale B */ /* Computing MAX */ d__4 = (d__3 = b[b_dim1 + 1], abs(d__3)), d__5 = (d__1 = b[(b_dim1 << 1) + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 2], abs(d__2)), d__4 = max(d__4,d__5); bnorm = max(d__4,safmin); bscale = 1. / bnorm; b[b_dim1 + 1] = bscale * b[b_dim1 + 1]; b[(b_dim1 << 1) + 1] = bscale * b[(b_dim1 << 1) + 1]; b[(b_dim1 << 1) + 2] = bscale * b[(b_dim1 << 1) + 2]; /* Check if A can be deflated */ if ((d__1 = a[a_dim1 + 2], abs(d__1)) <= ulp) { *csl = 1.; *snl = 0.; *csr = 1.; *snr = 0.; a[a_dim1 + 2] = 0.; b[b_dim1 + 2] = 0.; /* Check if B is singular */ } else if ((d__1 = b[b_dim1 + 1], abs(d__1)) <= ulp) { _starpu_dlartg_(&a[a_dim1 + 1], &a[a_dim1 + 2], csl, snl, &r__); *csr = 1.; *snr = 0.; _starpu_drot_(&c__2, &a[a_dim1 + 1], lda, &a[a_dim1 + 2], lda, csl, snl); _starpu_drot_(&c__2, &b[b_dim1 + 1], ldb, &b[b_dim1 + 2], ldb, csl, snl); a[a_dim1 + 2] = 0.; b[b_dim1 + 1] = 0.; b[b_dim1 + 2] = 0.; } else if ((d__1 = b[(b_dim1 << 1) + 2], abs(d__1)) <= ulp) { _starpu_dlartg_(&a[(a_dim1 << 1) + 2], &a[a_dim1 + 2], csr, snr, &t); *snr = -(*snr); _starpu_drot_(&c__2, &a[a_dim1 + 1], &c__1, &a[(a_dim1 << 1) + 1], &c__1, csr, snr); _starpu_drot_(&c__2, &b[b_dim1 + 1], &c__1, &b[(b_dim1 << 1) + 1], &c__1, csr, snr); *csl = 1.; *snl = 0.; a[a_dim1 + 2] = 0.; b[b_dim1 + 2] = 0.; b[(b_dim1 << 1) + 2] = 0.; } else { /* B is nonsingular, first compute the eigenvalues of (A,B) */ _starpu_dlag2_(&a[a_offset], lda, &b[b_offset], ldb, &safmin, &scale1, & scale2, &wr1, &wr2, &wi); if (wi == 0.) { /* two real eigenvalues, compute s*A-w*B */ h1 = scale1 * a[a_dim1 + 1] - wr1 * b[b_dim1 + 1]; h2 = scale1 * a[(a_dim1 << 1) + 1] - wr1 * b[(b_dim1 << 1) + 1]; h3 = scale1 * a[(a_dim1 << 1) + 2] - wr1 * b[(b_dim1 << 1) + 2]; rr = _starpu_dlapy2_(&h1, &h2); d__1 = scale1 * a[a_dim1 + 2]; qq = _starpu_dlapy2_(&d__1, &h3); if (rr > qq) { /* find right rotation matrix to zero 1,1 element of */ /* (sA - wB) */ _starpu_dlartg_(&h2, &h1, csr, snr, &t); } else { /* find right rotation matrix to zero 2,1 element of */ /* (sA - wB) */ d__1 = scale1 * a[a_dim1 + 2]; _starpu_dlartg_(&h3, &d__1, csr, snr, &t); } *snr = -(*snr); _starpu_drot_(&c__2, &a[a_dim1 + 1], &c__1, &a[(a_dim1 << 1) + 1], &c__1, csr, snr); _starpu_drot_(&c__2, &b[b_dim1 + 1], &c__1, &b[(b_dim1 << 1) + 1], &c__1, csr, snr); /* compute inf norms of A and B */ /* Computing MAX */ d__5 = (d__1 = a[a_dim1 + 1], abs(d__1)) + (d__2 = a[(a_dim1 << 1) + 1], abs(d__2)), d__6 = (d__3 = a[a_dim1 + 2], abs(d__3) ) + (d__4 = a[(a_dim1 << 1) + 2], abs(d__4)); h1 = max(d__5,d__6); /* Computing MAX */ d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2], abs(d__3) ) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); h2 = max(d__5,d__6); if (scale1 * h1 >= abs(wr1) * h2) { /* find left rotation matrix Q to zero out B(2,1) */ _starpu_dlartg_(&b[b_dim1 + 1], &b[b_dim1 + 2], csl, snl, &r__); } else { /* find left rotation matrix Q to zero out A(2,1) */ _starpu_dlartg_(&a[a_dim1 + 1], &a[a_dim1 + 2], csl, snl, &r__); } _starpu_drot_(&c__2, &a[a_dim1 + 1], lda, &a[a_dim1 + 2], lda, csl, snl); _starpu_drot_(&c__2, &b[b_dim1 + 1], ldb, &b[b_dim1 + 2], ldb, csl, snl); a[a_dim1 + 2] = 0.; b[b_dim1 + 2] = 0.; } else { /* a pair of complex conjugate eigenvalues */ /* first compute the SVD of the matrix B */ _starpu_dlasv2_(&b[b_dim1 + 1], &b[(b_dim1 << 1) + 1], &b[(b_dim1 << 1) + 2], &r__, &t, snr, csr, snl, csl); /* Form (A,B) := Q(A,B)Z' where Q is left rotation matrix and */ /* Z is right rotation matrix computed from DLASV2 */ _starpu_drot_(&c__2, &a[a_dim1 + 1], lda, &a[a_dim1 + 2], lda, csl, snl); _starpu_drot_(&c__2, &b[b_dim1 + 1], ldb, &b[b_dim1 + 2], ldb, csl, snl); _starpu_drot_(&c__2, &a[a_dim1 + 1], &c__1, &a[(a_dim1 << 1) + 1], &c__1, csr, snr); _starpu_drot_(&c__2, &b[b_dim1 + 1], &c__1, &b[(b_dim1 << 1) + 1], &c__1, csr, snr); b[b_dim1 + 2] = 0.; b[(b_dim1 << 1) + 1] = 0.; } } /* Unscaling */ a[a_dim1 + 1] = anorm * a[a_dim1 + 1]; a[a_dim1 + 2] = anorm * a[a_dim1 + 2]; a[(a_dim1 << 1) + 1] = anorm * a[(a_dim1 << 1) + 1]; a[(a_dim1 << 1) + 2] = anorm * a[(a_dim1 << 1) + 2]; b[b_dim1 + 1] = bnorm * b[b_dim1 + 1]; b[b_dim1 + 2] = bnorm * b[b_dim1 + 2]; b[(b_dim1 << 1) + 1] = bnorm * b[(b_dim1 << 1) + 1]; b[(b_dim1 << 1) + 2] = bnorm * b[(b_dim1 << 1) + 2]; if (wi == 0.) { alphar[1] = a[a_dim1 + 1]; alphar[2] = a[(a_dim1 << 1) + 2]; alphai[1] = 0.; alphai[2] = 0.; beta[1] = b[b_dim1 + 1]; beta[2] = b[(b_dim1 << 1) + 2]; } else { alphar[1] = anorm * wr1 / scale1 / bnorm; alphai[1] = anorm * wi / scale1 / bnorm; alphar[2] = alphar[1]; alphai[2] = -alphai[1]; beta[1] = 1.; beta[2] = 1.; } return 0; /* End of DLAGV2 */ } /* _starpu_dlagv2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlahqr.c000066400000000000000000000467321507764646700206630ustar00rootroot00000000000000/* dlahqr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dlahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *info) { /* System generated locals */ integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k, l, m; doublereal s, v[3]; integer i1, i2; doublereal t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, cs; integer nh; doublereal sn; integer nr; doublereal tr; integer nz; doublereal det, h21s; integer its; doublereal ulp, sum, tst, rt1i, rt2i, rt1r, rt2r; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), _starpu_dcopy_( integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); doublereal safmin, safmax, rtdisc, smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAHQR is an auxiliary routine called by DHSEQR to update the */ /* eigenvalues and Schur decomposition already computed by DHSEQR, by */ /* dealing with the Hessenberg submatrix in rows and columns ILO to */ /* IHI. */ /* Arguments */ /* ========= */ /* WANTT (input) LOGICAL */ /* = .TRUE. : the full Schur form T is required; */ /* = .FALSE.: only eigenvalues are required. */ /* WANTZ (input) LOGICAL */ /* = .TRUE. : the matrix of Schur vectors Z is required; */ /* = .FALSE.: Schur vectors are not required. */ /* N (input) INTEGER */ /* The order of the matrix H. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* It is assumed that H is already upper quasi-triangular in */ /* rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless */ /* ILO = 1). DLAHQR works primarily with the Hessenberg */ /* submatrix in rows and columns ILO to IHI, but applies */ /* transformations to all of H if WANTT is .TRUE.. */ /* 1 <= ILO <= max(1,IHI); IHI <= N. */ /* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ /* On entry, the upper Hessenberg matrix H. */ /* On exit, if INFO is zero and if WANTT is .TRUE., H is upper */ /* quasi-triangular in rows and columns ILO:IHI, with any */ /* 2-by-2 diagonal blocks in standard form. If INFO is zero */ /* and WANTT is .FALSE., the contents of H are unspecified on */ /* exit. The output state of H if INFO is nonzero is given */ /* below under the description of INFO. */ /* LDH (input) INTEGER */ /* The leading dimension of the array H. LDH >= max(1,N). */ /* WR (output) DOUBLE PRECISION array, dimension (N) */ /* WI (output) DOUBLE PRECISION array, dimension (N) */ /* The real and imaginary parts, respectively, of the computed */ /* eigenvalues ILO to IHI are stored in the corresponding */ /* elements of WR and WI. If two eigenvalues are computed as a */ /* complex conjugate pair, they are stored in consecutive */ /* elements of WR and WI, say the i-th and (i+1)th, with */ /* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the */ /* eigenvalues are stored in the same order as on the diagonal */ /* of the Schur form returned in H, with WR(i) = H(i,i), and, if */ /* H(i:i+1,i:i+1) is a 2-by-2 diagonal block, */ /* WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). */ /* ILOZ (input) INTEGER */ /* IHIZ (input) INTEGER */ /* Specify the rows of Z to which transformations must be */ /* applied if WANTZ is .TRUE.. */ /* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* If WANTZ is .TRUE., on entry Z must contain the current */ /* matrix Z of transformations accumulated by DHSEQR, and on */ /* exit Z has been updated; transformations are applied only to */ /* the submatrix Z(ILOZ:IHIZ,ILO:IHI). */ /* If WANTZ is .FALSE., Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* .GT. 0: If INFO = i, DLAHQR failed to compute all the */ /* eigenvalues ILO to IHI in a total of 30 iterations */ /* per eigenvalue; elements i+1:ihi of WR and WI */ /* contain those eigenvalues which have been */ /* successfully computed. */ /* If INFO .GT. 0 and WANTT is .FALSE., then on exit, */ /* the remaining unconverged eigenvalues are the */ /* eigenvalues of the upper Hessenberg matrix rows */ /* and columns ILO thorugh INFO of the final, output */ /* value of H. */ /* If INFO .GT. 0 and WANTT is .TRUE., then on exit */ /* (*) (initial value of H)*U = U*(final value of H) */ /* where U is an orthognal matrix. The final */ /* value of H is upper Hessenberg and triangular in */ /* rows and columns INFO+1 through IHI. */ /* If INFO .GT. 0 and WANTZ is .TRUE., then on exit */ /* (final value of Z) = (initial value of Z)*U */ /* where U is the orthogonal matrix in (*) */ /* (regardless of the value of WANTT.) */ /* Further Details */ /* =============== */ /* 02-96 Based on modifications by */ /* David Day, Sandia National Laboratory, USA */ /* 12-04 Further modifications by */ /* Ralph Byers, University of Kansas, USA */ /* This is a modified version of DLAHQR from LAPACK version 3.0. */ /* It is (1) more robust against overflow and underflow and */ /* (2) adopts the more conservative Ahues & Tisseur stopping */ /* criterion (LAWN 122, 1997). */ /* ========================================================= */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --wr; --wi; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n == 0) { return 0; } if (*ilo == *ihi) { wr[*ilo] = h__[*ilo + *ilo * h_dim1]; wi[*ilo] = 0.; return 0; } /* ==== clear out the trash ==== */ i__1 = *ihi - 3; for (j = *ilo; j <= i__1; ++j) { h__[j + 2 + j * h_dim1] = 0.; h__[j + 3 + j * h_dim1] = 0.; /* L10: */ } if (*ilo <= *ihi - 2) { h__[*ihi + (*ihi - 2) * h_dim1] = 0.; } nh = *ihi - *ilo + 1; nz = *ihiz - *iloz + 1; /* Set machine-dependent constants for the stopping criterion. */ safmin = _starpu_dlamch_("SAFE MINIMUM"); safmax = 1. / safmin; _starpu_dlabad_(&safmin, &safmax); ulp = _starpu_dlamch_("PRECISION"); smlnum = safmin * ((doublereal) nh / ulp); /* I1 and I2 are the indices of the first row and last column of H */ /* to which transformations must be applied. If eigenvalues only are */ /* being computed, I1 and I2 are set inside the main loop. */ if (*wantt) { i1 = 1; i2 = *n; } /* The main loop begins here. I is the loop index and decreases from */ /* IHI to ILO in steps of 1 or 2. Each iteration of the loop works */ /* with the active submatrix in rows and columns L to I. */ /* Eigenvalues I+1 to IHI have already converged. Either L = ILO or */ /* H(L,L-1) is negligible so that the matrix splits. */ i__ = *ihi; L20: l = *ilo; if (i__ < *ilo) { goto L160; } /* Perform QR iterations on rows and columns ILO to I until a */ /* submatrix of order 1 or 2 splits off at the bottom because a */ /* subdiagonal element has become negligible. */ for (its = 0; its <= 30; ++its) { /* Look for a single small subdiagonal element. */ i__1 = l + 1; for (k = i__; k >= i__1; --k) { if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= smlnum) { goto L40; } tst = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 = h__[k + k * h_dim1], abs(d__2)); if (tst == 0.) { if (k - 2 >= *ilo) { tst += (d__1 = h__[k - 1 + (k - 2) * h_dim1], abs(d__1)); } if (k + 1 <= *ihi) { tst += (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)); } } /* ==== The following is a conservative small subdiagonal */ /* . deflation criterion due to Ahues & Tisseur (LAWN 122, */ /* . 1997). It has better mathematical foundation and */ /* . improves accuracy in some cases. ==== */ if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= ulp * tst) { /* Computing MAX */ d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), d__4 = ( d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); ab = max(d__3,d__4); /* Computing MIN */ d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), d__4 = ( d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); ba = min(d__3,d__4); /* Computing MAX */ d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), d__4 = (d__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], abs(d__2)); aa = max(d__3,d__4); /* Computing MIN */ d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), d__4 = (d__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], abs(d__2)); bb = min(d__3,d__4); s = aa + ab; /* Computing MAX */ d__1 = smlnum, d__2 = ulp * (bb * (aa / s)); if (ba * (ab / s) <= max(d__1,d__2)) { goto L40; } } /* L30: */ } L40: l = k; if (l > *ilo) { /* H(L,L-1) is negligible */ h__[l + (l - 1) * h_dim1] = 0.; } /* Exit from loop if a submatrix of order 1 or 2 has split off. */ if (l >= i__ - 1) { goto L150; } /* Now the active submatrix is in rows and columns L to I. If */ /* eigenvalues only are being computed, only the active submatrix */ /* need be transformed. */ if (! (*wantt)) { i1 = l; i2 = i__; } if (its == 10) { /* Exceptional shift. */ s = (d__1 = h__[l + 1 + l * h_dim1], abs(d__1)) + (d__2 = h__[l + 2 + (l + 1) * h_dim1], abs(d__2)); h11 = s * .75 + h__[l + l * h_dim1]; h12 = s * -.4375; h21 = s; h22 = h11; } else if (its == 20) { /* Exceptional shift. */ s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); h11 = s * .75 + h__[i__ + i__ * h_dim1]; h12 = s * -.4375; h21 = s; h22 = h11; } else { /* Prepare to use Francis' double shift */ /* (i.e. 2nd degree generalized Rayleigh quotient) */ h11 = h__[i__ - 1 + (i__ - 1) * h_dim1]; h21 = h__[i__ + (i__ - 1) * h_dim1]; h12 = h__[i__ - 1 + i__ * h_dim1]; h22 = h__[i__ + i__ * h_dim1]; } s = abs(h11) + abs(h12) + abs(h21) + abs(h22); if (s == 0.) { rt1r = 0.; rt1i = 0.; rt2r = 0.; rt2i = 0.; } else { h11 /= s; h21 /= s; h12 /= s; h22 /= s; tr = (h11 + h22) / 2.; det = (h11 - tr) * (h22 - tr) - h12 * h21; rtdisc = sqrt((abs(det))); if (det >= 0.) { /* ==== complex conjugate shifts ==== */ rt1r = tr * s; rt2r = rt1r; rt1i = rtdisc * s; rt2i = -rt1i; } else { /* ==== real shifts (use only one of them) ==== */ rt1r = tr + rtdisc; rt2r = tr - rtdisc; if ((d__1 = rt1r - h22, abs(d__1)) <= (d__2 = rt2r - h22, abs( d__2))) { rt1r *= s; rt2r = rt1r; } else { rt2r *= s; rt1r = rt2r; } rt1i = 0.; rt2i = 0.; } } /* Look for two consecutive small subdiagonal elements. */ i__1 = l; for (m = i__ - 2; m >= i__1; --m) { /* Determine the effect of starting the double-shift QR */ /* iteration at row M, and see if this would make H(M,M-1) */ /* negligible. (The following uses scaling to avoid */ /* overflows and most underflows.) */ h21s = h__[m + 1 + m * h_dim1]; s = (d__1 = h__[m + m * h_dim1] - rt2r, abs(d__1)) + abs(rt2i) + abs(h21s); h21s = h__[m + 1 + m * h_dim1] / s; v[0] = h21s * h__[m + (m + 1) * h_dim1] + (h__[m + m * h_dim1] - rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - rt1i * (rt2i / s); v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1] - rt1r - rt2r); v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1]; s = abs(v[0]) + abs(v[1]) + abs(v[2]); v[0] /= s; v[1] /= s; v[2] /= s; if (m == l) { goto L60; } if ((d__1 = h__[m + (m - 1) * h_dim1], abs(d__1)) * (abs(v[1]) + abs(v[2])) <= ulp * abs(v[0]) * ((d__2 = h__[m - 1 + (m - 1) * h_dim1], abs(d__2)) + (d__3 = h__[m + m * h_dim1], abs(d__3)) + (d__4 = h__[m + 1 + (m + 1) * h_dim1], abs( d__4)))) { goto L60; } /* L50: */ } L60: /* Double-shift QR step */ i__1 = i__ - 1; for (k = m; k <= i__1; ++k) { /* The first iteration of this loop determines a reflection G */ /* from the vector V and applies it from left and right to H, */ /* thus creating a nonzero bulge below the subdiagonal. */ /* Each subsequent iteration determines a reflection G to */ /* restore the Hessenberg form in the (K-1)th column, and thus */ /* chases the bulge one step toward the bottom of the active */ /* submatrix. NR is the order of G. */ /* Computing MIN */ i__2 = 3, i__3 = i__ - k + 1; nr = min(i__2,i__3); if (k > m) { _starpu_dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); } _starpu_dlarfg_(&nr, v, &v[1], &c__1, &t1); if (k > m) { h__[k + (k - 1) * h_dim1] = v[0]; h__[k + 1 + (k - 1) * h_dim1] = 0.; if (k < i__ - 1) { h__[k + 2 + (k - 1) * h_dim1] = 0.; } } else if (m > l) { /* ==== Use the following instead of */ /* . H( K, K-1 ) = -H( K, K-1 ) to */ /* . avoid a bug when v(2) and v(3) */ /* . underflow. ==== */ h__[k + (k - 1) * h_dim1] *= 1. - t1; } v2 = v[1]; t2 = t1 * v2; if (nr == 3) { v3 = v[2]; t3 = t1 * v3; /* Apply G from the left to transform the rows of the matrix */ /* in columns K to I2. */ i__2 = i2; for (j = k; j <= i__2; ++j) { sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + v3 * h__[k + 2 + j * h_dim1]; h__[k + j * h_dim1] -= sum * t1; h__[k + 1 + j * h_dim1] -= sum * t2; h__[k + 2 + j * h_dim1] -= sum * t3; /* L70: */ } /* Apply G from the right to transform the columns of the */ /* matrix in rows I1 to min(K+3,I). */ /* Computing MIN */ i__3 = k + 3; i__2 = min(i__3,i__); for (j = i1; j <= i__2; ++j) { sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + v3 * h__[j + (k + 2) * h_dim1]; h__[j + k * h_dim1] -= sum * t1; h__[j + (k + 1) * h_dim1] -= sum * t2; h__[j + (k + 2) * h_dim1] -= sum * t3; /* L80: */ } if (*wantz) { /* Accumulate transformations in the matrix Z */ i__2 = *ihiz; for (j = *iloz; j <= i__2; ++j) { sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1] + v3 * z__[j + (k + 2) * z_dim1]; z__[j + k * z_dim1] -= sum * t1; z__[j + (k + 1) * z_dim1] -= sum * t2; z__[j + (k + 2) * z_dim1] -= sum * t3; /* L90: */ } } } else if (nr == 2) { /* Apply G from the left to transform the rows of the matrix */ /* in columns K to I2. */ i__2 = i2; for (j = k; j <= i__2; ++j) { sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; h__[k + j * h_dim1] -= sum * t1; h__[k + 1 + j * h_dim1] -= sum * t2; /* L100: */ } /* Apply G from the right to transform the columns of the */ /* matrix in rows I1 to min(K+3,I). */ i__2 = i__; for (j = i1; j <= i__2; ++j) { sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] ; h__[j + k * h_dim1] -= sum * t1; h__[j + (k + 1) * h_dim1] -= sum * t2; /* L110: */ } if (*wantz) { /* Accumulate transformations in the matrix Z */ i__2 = *ihiz; for (j = *iloz; j <= i__2; ++j) { sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1]; z__[j + k * z_dim1] -= sum * t1; z__[j + (k + 1) * z_dim1] -= sum * t2; /* L120: */ } } } /* L130: */ } /* L140: */ } /* Failure to converge in remaining number of iterations */ *info = i__; return 0; L150: if (l == i__) { /* H(I,I-1) is negligible: one eigenvalue has converged. */ wr[i__] = h__[i__ + i__ * h_dim1]; wi[i__] = 0.; } else if (l == i__ - 1) { /* H(I-1,I-2) is negligible: a pair of eigenvalues have converged. */ /* Transform the 2-by-2 submatrix to standard Schur form, */ /* and compute and store the eigenvalues. */ _starpu_dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, &sn); if (*wantt) { /* Apply the transformation to the rest of H. */ if (i2 > i__) { i__1 = i2 - i__; _starpu_drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); } i__1 = i__ - i1 - 1; _starpu_drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * h_dim1], &c__1, &cs, &sn); } if (*wantz) { /* Apply the transformation to Z. */ _starpu_drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + i__ * z_dim1], &c__1, &cs, &sn); } } /* return to start of the main loop with new value of I. */ i__ = l - 1; goto L20; L160: return 0; /* End of DLAHQR */ } /* _starpu_dlahqr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlahr2.c000066400000000000000000000246531507764646700205620ustar00rootroot00000000000000/* dlahr2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b4 = -1.; static doublereal c_b5 = 1.; static integer c__1 = 1; static doublereal c_b38 = 0.; /* Subroutine */ int _starpu_dlahr2_(integer *n, integer *k, integer *nb, doublereal * a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, doublereal *y, integer *ldy) { /* System generated locals */ integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__; doublereal ei; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dgemv_( char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfg_( integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) */ /* matrix A so that elements below the k-th subdiagonal are zero. The */ /* reduction is performed by an orthogonal similarity transformation */ /* Q' * A * Q. The routine returns the matrices V and T which determine */ /* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. */ /* This is an auxiliary routine called by DGEHRD. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. */ /* K (input) INTEGER */ /* The offset for the reduction. Elements below the k-th */ /* subdiagonal in the first NB columns are reduced to zero. */ /* K < N. */ /* NB (input) INTEGER */ /* The number of columns to be reduced. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1) */ /* On entry, the n-by-(n-k+1) general matrix A. */ /* On exit, the elements on and above the k-th subdiagonal in */ /* the first NB columns are overwritten with the corresponding */ /* elements of the reduced matrix; the elements below the k-th */ /* subdiagonal, with the array TAU, represent the matrix Q as a */ /* product of elementary reflectors. The other columns of A are */ /* unchanged. See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* TAU (output) DOUBLE PRECISION array, dimension (NB) */ /* The scalar factors of the elementary reflectors. See Further */ /* Details. */ /* T (output) DOUBLE PRECISION array, dimension (LDT,NB) */ /* The upper triangular matrix T. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= NB. */ /* Y (output) DOUBLE PRECISION array, dimension (LDY,NB) */ /* The n-by-nb matrix Y. */ /* LDY (input) INTEGER */ /* The leading dimension of the array Y. LDY >= N. */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of nb elementary reflectors */ /* Q = H(1) H(2) . . . H(nb). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in */ /* A(i+k+1:n,i), and tau in TAU(i). */ /* The elements of the vectors v together form the (n-k+1)-by-nb matrix */ /* V which is needed, with T and Y, to apply the transformation to the */ /* unreduced part of the matrix, using an update of the form: */ /* A := (I - V*T*V') * (A - Y*V'). */ /* The contents of A on exit are illustrated by the following example */ /* with n = 7, k = 3 and nb = 2: */ /* ( a a a a a ) */ /* ( a a a a a ) */ /* ( a a a a a ) */ /* ( h h a a a ) */ /* ( v1 h a a a ) */ /* ( v1 v2 a a a ) */ /* ( v1 v2 a a a ) */ /* where a denotes an element of the original matrix A, h denotes a */ /* modified element of the upper Hessenberg matrix H, and vi denotes an */ /* element of the vector defining H(i). */ /* This file is a slight modification of LAPACK-3.0's DLAHRD */ /* incorporating improvements proposed by Quintana-Orti and Van de */ /* Gejin. Note that the entries of A(1:K,2:NB) differ from those */ /* returned by the original LAPACK routine. This function is */ /* not backward compatible with LAPACK3.0. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ --tau; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1; y -= y_offset; /* Function Body */ if (*n <= 1) { return 0; } i__1 = *nb; for (i__ = 1; i__ <= i__1; ++i__) { if (i__ > 1) { /* Update A(K+1:N,I) */ /* Update I-th column of A - Y * V' */ i__2 = *n - *k; i__3 = i__ - 1; _starpu_dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b5, &a[*k + 1 + i__ * a_dim1], &c__1); /* Apply I - V * T' * V' to this column (call it b) from the */ /* left, using the last column of T as workspace */ /* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) */ /* ( V2 ) ( b2 ) */ /* where V1 is unit lower triangular */ /* w := V1' * b1 */ i__2 = i__ - 1; _starpu_dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + 1], &c__1); i__2 = i__ - 1; _starpu_dtrmv_("Lower", "Transpose", "UNIT", &i__2, &a[*k + 1 + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1); /* w := w + V2'*b2 */ i__2 = *n - *k - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb * t_dim1 + 1], &c__1); /* w := T'*w */ i__2 = i__ - 1; _starpu_dtrmv_("Upper", "Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1); /* b2 := b2 - V2*w */ i__2 = *n - *k - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ + i__ * a_dim1], &c__1); /* b1 := b1 - V1*w */ i__2 = i__ - 1; _starpu_dtrmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1] , lda, &t[*nb * t_dim1 + 1], &c__1); i__2 = i__ - 1; _starpu_daxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ * a_dim1], &c__1); a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; } /* Generate the elementary reflector H(I) to annihilate */ /* A(K+I+1:N,I) */ i__2 = *n - *k - i__ + 1; /* Computing MIN */ i__3 = *k + i__ + 1; _starpu_dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n)+ i__ * a_dim1], &c__1, &tau[i__]); ei = a[*k + i__ + i__ * a_dim1]; a[*k + i__ + i__ * a_dim1] = 1.; /* Compute Y(K+1:N,I) */ i__2 = *n - *k; i__3 = *n - *k - i__ + 1; _starpu_dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b5, &a[*k + 1 + (i__ + 1) * a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[* k + 1 + i__ * y_dim1], &c__1); i__2 = *n - *k - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, & a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 + 1], &c__1); i__2 = *n - *k; i__3 = i__ - 1; _starpu_dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, &t[i__ * t_dim1 + 1], &c__1, &c_b5, &y[*k + 1 + i__ * y_dim1], &c__1); i__2 = *n - *k; _starpu_dscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1); /* Compute T(1:I,I) */ i__2 = i__ - 1; d__1 = -tau[i__]; _starpu_dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); i__2 = i__ - 1; _starpu_dtrmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1) ; t[i__ + i__ * t_dim1] = tau[i__]; /* L10: */ } a[*k + *nb + *nb * a_dim1] = ei; /* Compute Y(1:K,1:NB) */ _starpu_dlacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy); _starpu_dtrmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b5, &a[*k + 1 + a_dim1], lda, &y[y_offset], ldy); if (*n > *k + *nb) { i__1 = *n - *k - *nb; _starpu_dgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b5, &a[(*nb + 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, &c_b5, &y[y_offset], ldy); } _starpu_dtrmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b5, &t[ t_offset], ldt, &y[y_offset], ldy); return 0; /* End of DLAHR2 */ } /* _starpu_dlahr2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlahrd.c000066400000000000000000000221001507764646700206250ustar00rootroot00000000000000/* dlahrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b4 = -1.; static doublereal c_b5 = 1.; static integer c__1 = 1; static doublereal c_b38 = 0.; /* Subroutine */ int _starpu_dlahrd_(integer *n, integer *k, integer *nb, doublereal * a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, doublereal *y, integer *ldy) { /* System generated locals */ integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__; doublereal ei; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAHRD reduces the first NB columns of a real general n-by-(n-k+1) */ /* matrix A so that elements below the k-th subdiagonal are zero. The */ /* reduction is performed by an orthogonal similarity transformation */ /* Q' * A * Q. The routine returns the matrices V and T which determine */ /* Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. */ /* This is an OBSOLETE auxiliary routine. */ /* This routine will be 'deprecated' in a future release. */ /* Please use the new routine DLAHR2 instead. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. */ /* K (input) INTEGER */ /* The offset for the reduction. Elements below the k-th */ /* subdiagonal in the first NB columns are reduced to zero. */ /* NB (input) INTEGER */ /* The number of columns to be reduced. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1) */ /* On entry, the n-by-(n-k+1) general matrix A. */ /* On exit, the elements on and above the k-th subdiagonal in */ /* the first NB columns are overwritten with the corresponding */ /* elements of the reduced matrix; the elements below the k-th */ /* subdiagonal, with the array TAU, represent the matrix Q as a */ /* product of elementary reflectors. The other columns of A are */ /* unchanged. See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* TAU (output) DOUBLE PRECISION array, dimension (NB) */ /* The scalar factors of the elementary reflectors. See Further */ /* Details. */ /* T (output) DOUBLE PRECISION array, dimension (LDT,NB) */ /* The upper triangular matrix T. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= NB. */ /* Y (output) DOUBLE PRECISION array, dimension (LDY,NB) */ /* The n-by-nb matrix Y. */ /* LDY (input) INTEGER */ /* The leading dimension of the array Y. LDY >= N. */ /* Further Details */ /* =============== */ /* The matrix Q is represented as a product of nb elementary reflectors */ /* Q = H(1) H(2) . . . H(nb). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in */ /* A(i+k+1:n,i), and tau in TAU(i). */ /* The elements of the vectors v together form the (n-k+1)-by-nb matrix */ /* V which is needed, with T and Y, to apply the transformation to the */ /* unreduced part of the matrix, using an update of the form: */ /* A := (I - V*T*V') * (A - Y*V'). */ /* The contents of A on exit are illustrated by the following example */ /* with n = 7, k = 3 and nb = 2: */ /* ( a h a a a ) */ /* ( a h a a a ) */ /* ( a h a a a ) */ /* ( h h a a a ) */ /* ( v1 h a a a ) */ /* ( v1 v2 a a a ) */ /* ( v1 v2 a a a ) */ /* where a denotes an element of the original matrix A, h denotes a */ /* modified element of the upper Hessenberg matrix H, and vi denotes an */ /* element of the vector defining H(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ --tau; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1; y -= y_offset; /* Function Body */ if (*n <= 1) { return 0; } i__1 = *nb; for (i__ = 1; i__ <= i__1; ++i__) { if (i__ > 1) { /* Update A(1:n,i) */ /* Compute i-th column of A - Y * V' */ i__2 = i__ - 1; _starpu_dgemv_("No transpose", n, &i__2, &c_b4, &y[y_offset], ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b5, &a[i__ * a_dim1 + 1], & c__1); /* Apply I - V * T' * V' to this column (call it b) from the */ /* left, using the last column of T as workspace */ /* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) */ /* ( V2 ) ( b2 ) */ /* where V1 is unit lower triangular */ /* w := V1' * b1 */ i__2 = i__ - 1; _starpu_dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + 1], &c__1); i__2 = i__ - 1; _starpu_dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1); /* w := w + V2'*b2 */ i__2 = *n - *k - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb * t_dim1 + 1], &c__1); /* w := T'*w */ i__2 = i__ - 1; _starpu_dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1); /* b2 := b2 - V2*w */ i__2 = *n - *k - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ + i__ * a_dim1], &c__1); /* b1 := b1 - V1*w */ i__2 = i__ - 1; _starpu_dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] , lda, &t[*nb * t_dim1 + 1], &c__1); i__2 = i__ - 1; _starpu_daxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ * a_dim1], &c__1); a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; } /* Generate the elementary reflector H(i) to annihilate */ /* A(k+i+1:n,i) */ i__2 = *n - *k - i__ + 1; /* Computing MIN */ i__3 = *k + i__ + 1; _starpu_dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n)+ i__ * a_dim1], &c__1, &tau[i__]); ei = a[*k + i__ + i__ * a_dim1]; a[*k + i__ + i__ * a_dim1] = 1.; /* Compute Y(1:n,i) */ i__2 = *n - *k - i__ + 1; _starpu_dgemv_("No transpose", n, &i__2, &c_b5, &a[(i__ + 1) * a_dim1 + 1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[i__ * y_dim1 + 1], &c__1); i__2 = *n - *k - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, & a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 + 1], &c__1); i__2 = i__ - 1; _starpu_dgemv_("No transpose", n, &i__2, &c_b4, &y[y_offset], ldy, &t[i__ * t_dim1 + 1], &c__1, &c_b5, &y[i__ * y_dim1 + 1], &c__1); _starpu_dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); /* Compute T(1:i,i) */ i__2 = i__ - 1; d__1 = -tau[i__]; _starpu_dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); i__2 = i__ - 1; _starpu_dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1) ; t[i__ + i__ * t_dim1] = tau[i__]; /* L10: */ } a[*k + *nb + *nb * a_dim1] = ei; return 0; /* End of DLAHRD */ } /* _starpu_dlahrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaic1.c000066400000000000000000000177511507764646700205440ustar00rootroot00000000000000/* dlaic1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b5 = 1.; /* Subroutine */ int _starpu_dlaic1_(integer *job, integer *j, doublereal *x, doublereal *sest, doublereal *w, doublereal *gamma, doublereal * sestpr, doublereal *s, doublereal *c__) { /* System generated locals */ doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ doublereal b, t, s1, s2, eps, tmp; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal sine, test, zeta1, zeta2, alpha, norma; extern doublereal _starpu_dlamch_(char *); doublereal absgam, absalp, cosine, absest; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAIC1 applies one step of incremental condition estimation in */ /* its simplest version: */ /* Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j */ /* lower triangular matrix L, such that */ /* twonorm(L*x) = sest */ /* Then DLAIC1 computes sestpr, s, c such that */ /* the vector */ /* [ s*x ] */ /* xhat = [ c ] */ /* is an approximate singular vector of */ /* [ L 0 ] */ /* Lhat = [ w' gamma ] */ /* in the sense that */ /* twonorm(Lhat*xhat) = sestpr. */ /* Depending on JOB, an estimate for the largest or smallest singular */ /* value is computed. */ /* Note that [s c]' and sestpr**2 is an eigenpair of the system */ /* diag(sest*sest, 0) + [alpha gamma] * [ alpha ] */ /* [ gamma ] */ /* where alpha = x'*w. */ /* Arguments */ /* ========= */ /* JOB (input) INTEGER */ /* = 1: an estimate for the largest singular value is computed. */ /* = 2: an estimate for the smallest singular value is computed. */ /* J (input) INTEGER */ /* Length of X and W */ /* X (input) DOUBLE PRECISION array, dimension (J) */ /* The j-vector x. */ /* SEST (input) DOUBLE PRECISION */ /* Estimated singular value of j by j matrix L */ /* W (input) DOUBLE PRECISION array, dimension (J) */ /* The j-vector w. */ /* GAMMA (input) DOUBLE PRECISION */ /* The diagonal element gamma. */ /* SESTPR (output) DOUBLE PRECISION */ /* Estimated singular value of (j+1) by (j+1) matrix Lhat. */ /* S (output) DOUBLE PRECISION */ /* Sine needed in forming xhat. */ /* C (output) DOUBLE PRECISION */ /* Cosine needed in forming xhat. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --w; --x; /* Function Body */ eps = _starpu_dlamch_("Epsilon"); alpha = _starpu_ddot_(j, &x[1], &c__1, &w[1], &c__1); absalp = abs(alpha); absgam = abs(*gamma); absest = abs(*sest); if (*job == 1) { /* Estimating largest singular value */ /* special cases */ if (*sest == 0.) { s1 = max(absgam,absalp); if (s1 == 0.) { *s = 0.; *c__ = 1.; *sestpr = 0.; } else { *s = alpha / s1; *c__ = *gamma / s1; tmp = sqrt(*s * *s + *c__ * *c__); *s /= tmp; *c__ /= tmp; *sestpr = s1 * tmp; } return 0; } else if (absgam <= eps * absest) { *s = 1.; *c__ = 0.; tmp = max(absest,absalp); s1 = absest / tmp; s2 = absalp / tmp; *sestpr = tmp * sqrt(s1 * s1 + s2 * s2); return 0; } else if (absalp <= eps * absest) { s1 = absgam; s2 = absest; if (s1 <= s2) { *s = 1.; *c__ = 0.; *sestpr = s2; } else { *s = 0.; *c__ = 1.; *sestpr = s1; } return 0; } else if (absest <= eps * absalp || absest <= eps * absgam) { s1 = absgam; s2 = absalp; if (s1 <= s2) { tmp = s1 / s2; *s = sqrt(tmp * tmp + 1.); *sestpr = s2 * *s; *c__ = *gamma / s2 / *s; *s = d_sign(&c_b5, &alpha) / *s; } else { tmp = s2 / s1; *c__ = sqrt(tmp * tmp + 1.); *sestpr = s1 * *c__; *s = alpha / s1 / *c__; *c__ = d_sign(&c_b5, gamma) / *c__; } return 0; } else { /* normal case */ zeta1 = alpha / absest; zeta2 = *gamma / absest; b = (1. - zeta1 * zeta1 - zeta2 * zeta2) * .5; *c__ = zeta1 * zeta1; if (b > 0.) { t = *c__ / (b + sqrt(b * b + *c__)); } else { t = sqrt(b * b + *c__) - b; } sine = -zeta1 / t; cosine = -zeta2 / (t + 1.); tmp = sqrt(sine * sine + cosine * cosine); *s = sine / tmp; *c__ = cosine / tmp; *sestpr = sqrt(t + 1.) * absest; return 0; } } else if (*job == 2) { /* Estimating smallest singular value */ /* special cases */ if (*sest == 0.) { *sestpr = 0.; if (max(absgam,absalp) == 0.) { sine = 1.; cosine = 0.; } else { sine = -(*gamma); cosine = alpha; } /* Computing MAX */ d__1 = abs(sine), d__2 = abs(cosine); s1 = max(d__1,d__2); *s = sine / s1; *c__ = cosine / s1; tmp = sqrt(*s * *s + *c__ * *c__); *s /= tmp; *c__ /= tmp; return 0; } else if (absgam <= eps * absest) { *s = 0.; *c__ = 1.; *sestpr = absgam; return 0; } else if (absalp <= eps * absest) { s1 = absgam; s2 = absest; if (s1 <= s2) { *s = 0.; *c__ = 1.; *sestpr = s1; } else { *s = 1.; *c__ = 0.; *sestpr = s2; } return 0; } else if (absest <= eps * absalp || absest <= eps * absgam) { s1 = absgam; s2 = absalp; if (s1 <= s2) { tmp = s1 / s2; *c__ = sqrt(tmp * tmp + 1.); *sestpr = absest * (tmp / *c__); *s = -(*gamma / s2) / *c__; *c__ = d_sign(&c_b5, &alpha) / *c__; } else { tmp = s2 / s1; *s = sqrt(tmp * tmp + 1.); *sestpr = absest / *s; *c__ = alpha / s1 / *s; *s = -d_sign(&c_b5, gamma) / *s; } return 0; } else { /* normal case */ zeta1 = alpha / absest; zeta2 = *gamma / absest; /* Computing MAX */ d__3 = zeta1 * zeta1 + 1. + (d__1 = zeta1 * zeta2, abs(d__1)), d__4 = (d__2 = zeta1 * zeta2, abs(d__2)) + zeta2 * zeta2; norma = max(d__3,d__4); /* See if root is closer to zero or to ONE */ test = (zeta1 - zeta2) * 2. * (zeta1 + zeta2) + 1.; if (test >= 0.) { /* root is close to zero, compute directly */ b = (zeta1 * zeta1 + zeta2 * zeta2 + 1.) * .5; *c__ = zeta2 * zeta2; t = *c__ / (b + sqrt((d__1 = b * b - *c__, abs(d__1)))); sine = zeta1 / (1. - t); cosine = -zeta2 / t; *sestpr = sqrt(t + eps * 4. * eps * norma) * absest; } else { /* root is closer to ONE, shift by that amount */ b = (zeta2 * zeta2 + zeta1 * zeta1 - 1.) * .5; *c__ = zeta1 * zeta1; if (b >= 0.) { t = -(*c__) / (b + sqrt(b * b + *c__)); } else { t = b - sqrt(b * b + *c__); } sine = -zeta1 / t; cosine = -zeta2 / (t + 1.); *sestpr = sqrt(t + 1. + eps * 4. * eps * norma) * absest; } tmp = sqrt(sine * sine + cosine * cosine); *s = sine / tmp; *c__ = cosine / tmp; return 0; } } return 0; /* End of DLAIC1 */ } /* _starpu_dlaic1_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaisnan.c000066400000000000000000000035401507764646700211670ustar00rootroot00000000000000/* dlaisnan.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" logical _starpu_dlaisnan_(doublereal *din1, doublereal *din2) { /* System generated locals */ logical ret_val; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This routine is not for general use. It exists solely to avoid */ /* over-optimization in DISNAN. */ /* DLAISNAN checks for NaNs by comparing its two arguments for */ /* inequality. NaN is the only floating-point value where NaN != NaN */ /* returns .TRUE. To check for NaNs, pass the same variable as both */ /* arguments. */ /* A compiler must assume that the two arguments are */ /* not the same variable, and the test will not be optimized away. */ /* Interprocedural or whole-program optimization may delete this */ /* test. The ISNAN functions will be replaced by the correct */ /* Fortran 03 intrinsic once the intrinsic is widely available. */ /* Arguments */ /* ========= */ /* DIN1 (input) DOUBLE PRECISION */ /* DIN2 (input) DOUBLE PRECISION */ /* Two numbers to compare for inequality. */ /* ===================================================================== */ /* .. Executable Statements .. */ ret_val = *din1 != *din2; return ret_val; } /* _starpu_dlaisnan_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaln2.c000066400000000000000000000405151507764646700205550ustar00rootroot00000000000000/* dlaln2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaln2_(logical *ltrans, integer *na, integer *nw, doublereal *smin, doublereal *ca, doublereal *a, integer *lda, doublereal *d1, doublereal *d2, doublereal *b, integer *ldb, doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, doublereal *scale, doublereal *xnorm, integer *info) { /* Initialized data */ static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; static integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2, 4,3,2,1 }; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; doublereal d__1, d__2, d__3, d__4, d__5, d__6; static doublereal equiv_0[4], equiv_1[4]; /* Local variables */ integer j; #define ci (equiv_0) #define cr (equiv_1) doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21, cr22, li21, csi, ui11, lr21, ui12, ui22; #define civ (equiv_0) doublereal csr, ur11, ur12, ur22; #define crv (equiv_1) doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; integer icmax; doublereal bnorm, cnorm, smini; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dladiv_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal bignum, smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLALN2 solves a system of the form (ca A - w D ) X = s B */ /* or (ca A' - w D) X = s B with possible scaling ("s") and */ /* perturbation of A. (A' means A-transpose.) */ /* A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA */ /* real diagonal matrix, w is a real or complex value, and X and B are */ /* NA x 1 matrices -- real if w is real, complex if w is complex. NA */ /* may be 1 or 2. */ /* If w is complex, X and B are represented as NA x 2 matrices, */ /* the first column of each being the real part and the second */ /* being the imaginary part. */ /* "s" is a scaling factor (.LE. 1), computed by DLALN2, which is */ /* so chosen that X can be computed without overflow. X is further */ /* scaled if necessary to assure that norm(ca A - w D)*norm(X) is less */ /* than overflow. */ /* If both singular values of (ca A - w D) are less than SMIN, */ /* SMIN*identity will be used instead of (ca A - w D). If only one */ /* singular value is less than SMIN, one element of (ca A - w D) will be */ /* perturbed enough to make the smallest singular value roughly SMIN. */ /* If both singular values are at least SMIN, (ca A - w D) will not be */ /* perturbed. In any case, the perturbation will be at most some small */ /* multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values */ /* are computed by infinity-norm approximations, and thus will only be */ /* correct to a factor of 2 or so. */ /* Note: all input quantities are assumed to be smaller than overflow */ /* by a reasonable factor. (See BIGNUM.) */ /* Arguments */ /* ========== */ /* LTRANS (input) LOGICAL */ /* =.TRUE.: A-transpose will be used. */ /* =.FALSE.: A will be used (not transposed.) */ /* NA (input) INTEGER */ /* The size of the matrix A. It may (only) be 1 or 2. */ /* NW (input) INTEGER */ /* 1 if "w" is real, 2 if "w" is complex. It may only be 1 */ /* or 2. */ /* SMIN (input) DOUBLE PRECISION */ /* The desired lower bound on the singular values of A. This */ /* should be a safe distance away from underflow or overflow, */ /* say, between (underflow/machine precision) and (machine */ /* precision * overflow ). (See BIGNUM and ULP.) */ /* CA (input) DOUBLE PRECISION */ /* The coefficient c, which A is multiplied by. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,NA) */ /* The NA x NA matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of A. It must be at least NA. */ /* D1 (input) DOUBLE PRECISION */ /* The 1,1 element in the diagonal matrix D. */ /* D2 (input) DOUBLE PRECISION */ /* The 2,2 element in the diagonal matrix D. Not used if NW=1. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NW) */ /* The NA x NW matrix B (right-hand side). If NW=2 ("w" is */ /* complex), column 1 contains the real part of B and column 2 */ /* contains the imaginary part. */ /* LDB (input) INTEGER */ /* The leading dimension of B. It must be at least NA. */ /* WR (input) DOUBLE PRECISION */ /* The real part of the scalar "w". */ /* WI (input) DOUBLE PRECISION */ /* The imaginary part of the scalar "w". Not used if NW=1. */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NW) */ /* The NA x NW matrix X (unknowns), as computed by DLALN2. */ /* If NW=2 ("w" is complex), on exit, column 1 will contain */ /* the real part of X and column 2 will contain the imaginary */ /* part. */ /* LDX (input) INTEGER */ /* The leading dimension of X. It must be at least NA. */ /* SCALE (output) DOUBLE PRECISION */ /* The scale factor that B must be multiplied by to insure */ /* that overflow does not occur when computing X. Thus, */ /* (ca A - w D) X will be SCALE*B, not B (ignoring */ /* perturbations of A.) It will be at most 1. */ /* XNORM (output) DOUBLE PRECISION */ /* The infinity-norm of X, when X is regarded as an NA x NW */ /* real matrix. */ /* INFO (output) INTEGER */ /* An error flag. It will be set to zero if no error occurs, */ /* a negative number if an argument is in error, or a positive */ /* number if ca A - w D had to be perturbed. */ /* The possible values are: */ /* = 0: No error occurred, and (ca A - w D) did not have to be */ /* perturbed. */ /* = 1: (ca A - w D) had to be perturbed to make its smallest */ /* (or only) singular value greater than SMIN. */ /* NOTE: In the interests of speed, this routine does not */ /* check the inputs for errors. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Equivalences .. */ /* .. */ /* .. Data statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; /* Function Body */ /* .. */ /* .. Executable Statements .. */ /* Compute BIGNUM */ smlnum = 2. * _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; smini = max(*smin,smlnum); /* Don't check for input errors */ *info = 0; /* Standard Initializations */ *scale = 1.; if (*na == 1) { /* 1 x 1 (i.e., scalar) system C X = B */ if (*nw == 1) { /* Real 1x1 system. */ /* C = ca A - w D */ csr = *ca * a[a_dim1 + 1] - *wr * *d1; cnorm = abs(csr); /* If | C | < SMINI, use C = SMINI */ if (cnorm < smini) { csr = smini; cnorm = smini; *info = 1; } /* Check scaling for X = B / C */ bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)); if (cnorm < 1. && bnorm > 1.) { if (bnorm > bignum * cnorm) { *scale = 1. / bnorm; } } /* Compute X */ x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); } else { /* Complex 1x1 system (w is complex) */ /* C = ca A - w D */ csr = *ca * a[a_dim1 + 1] - *wr * *d1; csi = -(*wi) * *d1; cnorm = abs(csr) + abs(csi); /* If | C | < SMINI, use C = SMINI */ if (cnorm < smini) { csr = smini; csi = 0.; cnorm = smini; *info = 1; } /* Check scaling for X = B / C */ bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 1], abs(d__2)); if (cnorm < 1. && bnorm > 1.) { if (bnorm > bignum * cnorm) { *scale = 1. / bnorm; } } /* Compute X */ d__1 = *scale * b[b_dim1 + 1]; d__2 = *scale * b[(b_dim1 << 1) + 1]; _starpu_dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) + 1]); *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + 1], abs(d__2)); } } else { /* 2x2 System */ /* Compute the real part of C = ca A - w D (or ca A' - w D ) */ cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; if (*ltrans) { cr[2] = *ca * a[a_dim1 + 2]; cr[1] = *ca * a[(a_dim1 << 1) + 1]; } else { cr[1] = *ca * a[a_dim1 + 2]; cr[2] = *ca * a[(a_dim1 << 1) + 1]; } if (*nw == 1) { /* Real 2x2 system (w is real) */ /* Find the largest element in C */ cmax = 0.; icmax = 0; for (j = 1; j <= 4; ++j) { if ((d__1 = crv[j - 1], abs(d__1)) > cmax) { cmax = (d__1 = crv[j - 1], abs(d__1)); icmax = j; } /* L10: */ } /* If norm(C) < SMINI, use SMINI*identity. */ if (cmax < smini) { /* Computing MAX */ d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[ b_dim1 + 2], abs(d__2)); bnorm = max(d__3,d__4); if (smini < 1. && bnorm > 1.) { if (bnorm > bignum * smini) { *scale = 1. / bnorm; } } temp = *scale / smini; x[x_dim1 + 1] = temp * b[b_dim1 + 1]; x[x_dim1 + 2] = temp * b[b_dim1 + 2]; *xnorm = temp * bnorm; *info = 1; return 0; } /* Gaussian elimination with complete pivoting. */ ur11 = crv[icmax - 1]; cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; ur11r = 1. / ur11; lr21 = ur11r * cr21; ur22 = cr22 - ur12 * lr21; /* If smaller pivot < SMINI, use SMINI */ if (abs(ur22) < smini) { ur22 = smini; *info = 1; } if (rswap[icmax - 1]) { br1 = b[b_dim1 + 2]; br2 = b[b_dim1 + 1]; } else { br1 = b[b_dim1 + 1]; br2 = b[b_dim1 + 2]; } br2 -= lr21 * br1; /* Computing MAX */ d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2); bbnd = max(d__2,d__3); if (bbnd > 1. && abs(ur22) < 1.) { if (bbnd >= bignum * abs(ur22)) { *scale = 1. / bbnd; } } xr2 = br2 * *scale / ur22; xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); if (zswap[icmax - 1]) { x[x_dim1 + 1] = xr2; x[x_dim1 + 2] = xr1; } else { x[x_dim1 + 1] = xr1; x[x_dim1 + 2] = xr2; } /* Computing MAX */ d__1 = abs(xr1), d__2 = abs(xr2); *xnorm = max(d__1,d__2); /* Further scaling if norm(A) norm(X) > overflow */ if (*xnorm > 1. && cmax > 1.) { if (*xnorm > bignum / cmax) { temp = cmax / bignum; x[x_dim1 + 1] = temp * x[x_dim1 + 1]; x[x_dim1 + 2] = temp * x[x_dim1 + 2]; *xnorm = temp * *xnorm; *scale = temp * *scale; } } } else { /* Complex 2x2 system (w is complex) */ /* Find the largest element in C */ ci[0] = -(*wi) * *d1; ci[1] = 0.; ci[2] = 0.; ci[3] = -(*wi) * *d2; cmax = 0.; icmax = 0; for (j = 1; j <= 4; ++j) { if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs( d__2)) > cmax) { cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1] , abs(d__2)); icmax = j; } /* L20: */ } /* If norm(C) < SMINI, use SMINI*identity. */ if (cmax < smini) { /* Computing MAX */ d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2], abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); bnorm = max(d__5,d__6); if (smini < 1. && bnorm > 1.) { if (bnorm > bignum * smini) { *scale = 1. / bnorm; } } temp = *scale / smini; x[x_dim1 + 1] = temp * b[b_dim1 + 1]; x[x_dim1 + 2] = temp * b[b_dim1 + 2]; x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; *xnorm = temp * bnorm; *info = 1; return 0; } /* Gaussian elimination with complete pivoting. */ ur11 = crv[icmax - 1]; ui11 = civ[icmax - 1]; cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; if (icmax == 1 || icmax == 4) { /* Code when off-diagonals of pivoted C are real */ if (abs(ur11) > abs(ui11)) { temp = ui11 / ur11; /* Computing 2nd power */ d__1 = temp; ur11r = 1. / (ur11 * (d__1 * d__1 + 1.)); ui11r = -temp * ur11r; } else { temp = ur11 / ui11; /* Computing 2nd power */ d__1 = temp; ui11r = -1. / (ui11 * (d__1 * d__1 + 1.)); ur11r = -temp * ui11r; } lr21 = cr21 * ur11r; li21 = cr21 * ui11r; ur12s = ur12 * ur11r; ui12s = ur12 * ui11r; ur22 = cr22 - ur12 * lr21; ui22 = ci22 - ur12 * li21; } else { /* Code when diagonals of pivoted C are real */ ur11r = 1. / ur11; ui11r = 0.; lr21 = cr21 * ur11r; li21 = ci21 * ur11r; ur12s = ur12 * ur11r; ui12s = ui12 * ur11r; ur22 = cr22 - ur12 * lr21 + ui12 * li21; ui22 = -ur12 * li21 - ui12 * lr21; } u22abs = abs(ur22) + abs(ui22); /* If smaller pivot < SMINI, use SMINI */ if (u22abs < smini) { ur22 = smini; ui22 = 0.; *info = 1; } if (rswap[icmax - 1]) { br2 = b[b_dim1 + 1]; br1 = b[b_dim1 + 2]; bi2 = b[(b_dim1 << 1) + 1]; bi1 = b[(b_dim1 << 1) + 2]; } else { br1 = b[b_dim1 + 1]; br2 = b[b_dim1 + 2]; bi1 = b[(b_dim1 << 1) + 1]; bi2 = b[(b_dim1 << 1) + 2]; } br2 = br2 - lr21 * br1 + li21 * bi1; bi2 = bi2 - li21 * br1 - lr21 * bi1; /* Computing MAX */ d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r)) ), d__2 = abs(br2) + abs(bi2); bbnd = max(d__1,d__2); if (bbnd > 1. && u22abs < 1.) { if (bbnd >= bignum * u22abs) { *scale = 1. / bbnd; br1 = *scale * br1; bi1 = *scale * bi1; br2 = *scale * br2; bi2 = *scale * bi2; } } _starpu_dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; if (zswap[icmax - 1]) { x[x_dim1 + 1] = xr2; x[x_dim1 + 2] = xr1; x[(x_dim1 << 1) + 1] = xi2; x[(x_dim1 << 1) + 2] = xi1; } else { x[x_dim1 + 1] = xr1; x[x_dim1 + 2] = xr2; x[(x_dim1 << 1) + 1] = xi1; x[(x_dim1 << 1) + 2] = xi2; } /* Computing MAX */ d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2); *xnorm = max(d__1,d__2); /* Further scaling if norm(A) norm(X) > overflow */ if (*xnorm > 1. && cmax > 1.) { if (*xnorm > bignum / cmax) { temp = cmax / bignum; x[x_dim1 + 1] = temp * x[x_dim1 + 1]; x[x_dim1 + 2] = temp * x[x_dim1 + 2]; x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; *xnorm = temp * *xnorm; *scale = temp * *scale; } } } } return 0; /* End of DLALN2 */ } /* _starpu_dlaln2_ */ #undef crv #undef civ #undef cr #undef ci starpu-1.4.9+dfsg/min-dgels/base/SRC/dlals0.c000066400000000000000000000354311507764646700205610ustar00rootroot00000000000000/* dlals0.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b5 = -1.; static integer c__1 = 1; static doublereal c_b11 = 1.; static doublereal c_b13 = 0.; static integer c__0 = 0; /* Subroutine */ int _starpu_dlals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * k, doublereal *c__, doublereal *s, doublereal *work, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, i__1, i__2; doublereal d__1; /* Local variables */ integer i__, j, m, n; doublereal dj; integer nlp1; doublereal temp; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal diflj, difrj, dsigj; extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal dsigjp; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLALS0 applies back the multiplying factors of either the left or the */ /* right singular vector matrix of a diagonal matrix appended by a row */ /* to the right hand side matrix B in solving the least squares problem */ /* using the divide-and-conquer SVD approach. */ /* For the left singular vector matrix, three types of orthogonal */ /* matrices are involved: */ /* (1L) Givens rotations: the number of such rotations is GIVPTR; the */ /* pairs of columns/rows they were applied to are stored in GIVCOL; */ /* and the C- and S-values of these rotations are stored in GIVNUM. */ /* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first */ /* row, and for J=2:N, PERM(J)-th row of B is to be moved to the */ /* J-th row. */ /* (3L) The left singular vector matrix of the remaining matrix. */ /* For the right singular vector matrix, four types of orthogonal */ /* matrices are involved: */ /* (1R) The right singular vector matrix of the remaining matrix. */ /* (2R) If SQRE = 1, one extra Givens rotation to generate the right */ /* null space. */ /* (3R) The inverse transformation of (2L). */ /* (4R) The inverse transformation of (1L). */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether singular vectors are to be computed in */ /* factored form: */ /* = 0: Left singular vector matrix. */ /* = 1: Right singular vector matrix. */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has row dimension N = NL + NR + 1, */ /* and column dimension M = N + SQRE. */ /* NRHS (input) INTEGER */ /* The number of columns of B and BX. NRHS must be at least 1. */ /* B (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS ) */ /* On input, B contains the right hand sides of the least */ /* squares problem in rows 1 through M. On output, B contains */ /* the solution X in rows 1 through N. */ /* LDB (input) INTEGER */ /* The leading dimension of B. LDB must be at least */ /* max(1,MAX( M, N ) ). */ /* BX (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) */ /* LDBX (input) INTEGER */ /* The leading dimension of BX. */ /* PERM (input) INTEGER array, dimension ( N ) */ /* The permutations (from deflation and sorting) applied */ /* to the two blocks. */ /* GIVPTR (input) INTEGER */ /* The number of Givens rotations which took place in this */ /* subproblem. */ /* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) */ /* Each pair of numbers indicates a pair of rows/columns */ /* involved in a Givens rotation. */ /* LDGCOL (input) INTEGER */ /* The leading dimension of GIVCOL, must be at least N. */ /* GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ /* Each number indicates the C or S value used in the */ /* corresponding Givens rotation. */ /* LDGNUM (input) INTEGER */ /* The leading dimension of arrays DIFR, POLES and */ /* GIVNUM, must be at least K. */ /* POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ /* On entry, POLES(1:K, 1) contains the new singular */ /* values obtained from solving the secular equation, and */ /* POLES(1:K, 2) is an array containing the poles in the secular */ /* equation. */ /* DIFL (input) DOUBLE PRECISION array, dimension ( K ). */ /* On entry, DIFL(I) is the distance between I-th updated */ /* (undeflated) singular value and the I-th (undeflated) old */ /* singular value. */ /* DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). */ /* On entry, DIFR(I, 1) contains the distances between I-th */ /* updated (undeflated) singular value and the I+1-th */ /* (undeflated) old singular value. And DIFR(I, 2) is the */ /* normalizing factor for the I-th right singular vector. */ /* Z (input) DOUBLE PRECISION array, dimension ( K ) */ /* Contain the components of the deflation-adjusted updating row */ /* vector. */ /* K (input) INTEGER */ /* Contains the dimension of the non-deflated matrix, */ /* This is the order of the related secular equation. 1 <= K <=N. */ /* C (input) DOUBLE PRECISION */ /* C contains garbage if SQRE =0 and the C-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* S (input) DOUBLE PRECISION */ /* S contains garbage if SQRE =0 and the S-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* WORK (workspace) DOUBLE PRECISION array, dimension ( K ) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ /* California at Berkeley, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1; bx -= bx_offset; --perm; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1; givcol -= givcol_offset; difr_dim1 = *ldgnum; difr_offset = 1 + difr_dim1; difr -= difr_offset; poles_dim1 = *ldgnum; poles_offset = 1 + poles_dim1; poles -= poles_offset; givnum_dim1 = *ldgnum; givnum_offset = 1 + givnum_dim1; givnum -= givnum_offset; --difl; --z__; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*nl < 1) { *info = -2; } else if (*nr < 1) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } n = *nl + *nr + 1; if (*nrhs < 1) { *info = -5; } else if (*ldb < n) { *info = -7; } else if (*ldbx < n) { *info = -9; } else if (*givptr < 0) { *info = -11; } else if (*ldgcol < n) { *info = -13; } else if (*ldgnum < n) { *info = -15; } else if (*k < 1) { *info = -20; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLALS0", &i__1); return 0; } m = n + *sqre; nlp1 = *nl + 1; if (*icompq == 0) { /* Apply back orthogonal transformations from the left. */ /* Step (1L): apply back the Givens rotations performed. */ i__1 = *givptr; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); /* L10: */ } /* Step (2L): permute rows of B. */ _starpu_dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { _starpu_dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], ldbx); /* L20: */ } /* Step (3L): apply the inverse of the left singular vector */ /* matrix to BX. */ if (*k == 1) { _starpu_dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); if (z__[1] < 0.) { _starpu_dscal_(nrhs, &c_b5, &b[b_offset], ldb); } } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = poles[j + poles_dim1]; dsigj = -poles[j + (poles_dim1 << 1)]; if (j < *k) { difrj = -difr[j + difr_dim1]; dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; } if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) { work[j] = 0.; } else { work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj / (poles[j + (poles_dim1 << 1)] + dj); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == 0.) { work[i__] = 0.; } else { work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] / (_starpu_dlamc3_(&poles[i__ + (poles_dim1 << 1)], & dsigj) - diflj) / (poles[i__ + (poles_dim1 << 1)] + dj); } /* L30: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == 0.) { work[i__] = 0.; } else { work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] / (_starpu_dlamc3_(&poles[i__ + (poles_dim1 << 1)], & dsigjp) + difrj) / (poles[i__ + (poles_dim1 << 1)] + dj); } /* L40: */ } work[1] = -1.; temp = _starpu_dnrm2_(k, &work[1], &c__1); _starpu_dgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], & c__1, &c_b13, &b[j + b_dim1], ldb); _starpu_dlascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b[j + b_dim1], ldb, info); /* L50: */ } } /* Move the deflated rows of BX to B also. */ if (*k < max(m,n)) { i__1 = n - *k; _starpu_dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + b_dim1], ldb); } } else { /* Apply back the right orthogonal transformations. */ /* Step (1R): apply back the new right singular vector matrix */ /* to B. */ if (*k == 1) { _starpu_dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { dsigj = poles[j + (poles_dim1 << 1)]; if (z__[j] == 0.) { work[j] = 0.; } else { work[j] = -z__[j] / difl[j] / (dsigj + poles[j + poles_dim1]) / difr[j + (difr_dim1 << 1)]; } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[j] == 0.) { work[i__] = 0.; } else { d__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; work[i__] = z__[j] / (_starpu_dlamc3_(&dsigj, &d__1) - difr[ i__ + difr_dim1]) / (dsigj + poles[i__ + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; } /* L60: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[j] == 0.) { work[i__] = 0.; } else { d__1 = -poles[i__ + (poles_dim1 << 1)]; work[i__] = z__[j] / (_starpu_dlamc3_(&dsigj, &d__1) - difl[ i__]) / (dsigj + poles[i__ + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; } /* L70: */ } _starpu_dgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], & c__1, &c_b13, &bx[j + bx_dim1], ldbx); /* L80: */ } } /* Step (2R): if SQRE = 1, apply back the rotation that is */ /* related to the right null space of the subproblem. */ if (*sqre == 1) { _starpu_dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); _starpu_drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, s); } if (*k < max(m,n)) { i__1 = n - *k; _starpu_dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + bx_dim1], ldbx); } /* Step (3R): permute rows of B. */ _starpu_dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); if (*sqre == 1) { _starpu_dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); } i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { _starpu_dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], ldb); /* L90: */ } /* Step (4R): apply back the Givens rotations performed. */ for (i__ = *givptr; i__ >= 1; --i__) { d__1 = -givnum[i__ + givnum_dim1]; _starpu_drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + (givnum_dim1 << 1)], &d__1); /* L100: */ } } return 0; /* End of DLALS0 */ } /* _starpu_dlals0_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlalsa.c000066400000000000000000000351571507764646700206470ustar00rootroot00000000000000/* dlalsa.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b7 = 1.; static doublereal c_b8 = 0.; static integer c__2 = 2; /* Subroutine */ int _starpu_dlalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * poles, integer *givptr, integer *givcol, integer *ldgcol, integer * perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * work, integer *iwork, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1, nlvl, sqre; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer inode, ndiml, ndimr; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlals0_(integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLALSA is an itermediate step in solving the least squares problem */ /* by computing the SVD of the coefficient matrix in compact form (The */ /* singular vectors are computed as products of simple orthorgonal */ /* matrices.). */ /* If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector */ /* matrix of an upper bidiagonal matrix to the right hand side; and if */ /* ICOMPQ = 1, DLALSA applies the right singular vector matrix to the */ /* right hand side. The singular vector matrices were generated in */ /* compact form by DLALSA. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether the left or the right singular vector */ /* matrix is involved. */ /* = 0: Left singular vector matrix */ /* = 1: Right singular vector matrix */ /* SMLSIZ (input) INTEGER */ /* The maximum size of the subproblems at the bottom of the */ /* computation tree. */ /* N (input) INTEGER */ /* The row and column dimensions of the upper bidiagonal matrix. */ /* NRHS (input) INTEGER */ /* The number of columns of B and BX. NRHS must be at least 1. */ /* B (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS ) */ /* On input, B contains the right hand sides of the least */ /* squares problem in rows 1 through M. */ /* On output, B contains the solution X in rows 1 through N. */ /* LDB (input) INTEGER */ /* The leading dimension of B in the calling subprogram. */ /* LDB must be at least max(1,MAX( M, N ) ). */ /* BX (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) */ /* On exit, the result of applying the left or right singular */ /* vector matrix to B. */ /* LDBX (input) INTEGER */ /* The leading dimension of BX. */ /* U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). */ /* On entry, U contains the left singular vector matrices of all */ /* subproblems at the bottom level. */ /* LDU (input) INTEGER, LDU = > N. */ /* The leading dimension of arrays U, VT, DIFL, DIFR, */ /* POLES, GIVNUM, and Z. */ /* VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). */ /* On entry, VT' contains the right singular vector matrices of */ /* all subproblems at the bottom level. */ /* K (input) INTEGER array, dimension ( N ). */ /* DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). */ /* where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. */ /* DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). */ /* On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record */ /* distances between singular values on the I-th level and */ /* singular values on the (I -1)-th level, and DIFR(*, 2 * I) */ /* record the normalizing factors of the right singular vectors */ /* matrices of subproblems on I-th level. */ /* Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). */ /* On entry, Z(1, I) contains the components of the deflation- */ /* adjusted updating row vector for subproblems on the I-th */ /* level. */ /* POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). */ /* On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old */ /* singular values involved in the secular equations on the I-th */ /* level. */ /* GIVPTR (input) INTEGER array, dimension ( N ). */ /* On entry, GIVPTR( I ) records the number of Givens */ /* rotations performed on the I-th problem on the computation */ /* tree. */ /* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). */ /* On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the */ /* locations of Givens rotations performed on the I-th level on */ /* the computation tree. */ /* LDGCOL (input) INTEGER, LDGCOL = > N. */ /* The leading dimension of arrays GIVCOL and PERM. */ /* PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). */ /* On entry, PERM(*, I) records permutations done on the I-th */ /* level of the computation tree. */ /* GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). */ /* On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- */ /* values of Givens rotations performed on the I-th level on the */ /* computation tree. */ /* C (input) DOUBLE PRECISION array, dimension ( N ). */ /* On entry, if the I-th subproblem is not square, */ /* C( I ) contains the C-value of a Givens rotation related to */ /* the right null space of the I-th subproblem. */ /* S (input) DOUBLE PRECISION array, dimension ( N ). */ /* On entry, if the I-th subproblem is not square, */ /* S( I ) contains the S-value of a Givens rotation related to */ /* the right null space of the I-th subproblem. */ /* WORK (workspace) DOUBLE PRECISION array. */ /* The dimension must be at least N. */ /* IWORK (workspace) INTEGER array. */ /* The dimension must be at least 3 * N */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ /* California at Berkeley, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1; bx -= bx_offset; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1; givcol -= givcol_offset; --c__; --s; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < *smlsiz) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < *n) { *info = -6; } else if (*ldbx < *n) { *info = -8; } else if (*ldu < *n) { *info = -10; } else if (*ldgcol < *n) { *info = -19; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLALSA", &i__1); return 0; } /* Book-keeping and setting up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; _starpu_dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* The following code applies back the left singular vector factors. */ /* For applying back the right singular vector factors, go to 50. */ if (*icompq == 1) { goto L50; } /* The nodes on the bottom level of the tree were solved */ /* by DLASDQ. The corresponding left and right singular vector */ /* matrices are in explicit form. First apply back the left */ /* singular vector matrices. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node */ /* NL : number of rows of left subproblem */ /* NR : number of rows of right subproblem */ /* NLF: starting row of the left subproblem */ /* NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; _starpu_dgemm_("T", "N", &nl, nrhs, &nl, &c_b7, &u[nlf + u_dim1], ldu, &b[nlf + b_dim1], ldb, &c_b8, &bx[nlf + bx_dim1], ldbx); _starpu_dgemm_("T", "N", &nr, nrhs, &nr, &c_b7, &u[nrf + u_dim1], ldu, &b[nrf + b_dim1], ldb, &c_b8, &bx[nrf + bx_dim1], ldbx); /* L10: */ } /* Next copy the rows of B that correspond to unchanged rows */ /* in the bidiagonal matrix to BX. */ i__1 = nd; for (i__ = 1; i__ <= i__1; ++i__) { ic = iwork[inode + i__ - 1]; _starpu_dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); /* L20: */ } /* Finally go through the left singular vector matrices of all */ /* the other subproblems bottom-up on the tree. */ j = pow_ii(&c__2, &nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* find the first node LF and last node LL on */ /* the current level LVL */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; --j; _starpu_dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ j], &s[j], &work[1], info); /* L30: */ } /* L40: */ } goto L90; /* ICOMPQ = 1: applying back the right singular vector factors. */ L50: /* First now go through the right singular vector matrices of all */ /* the tree nodes top-down. */ j = 0; i__1 = nlvl; for (lvl = 1; lvl <= i__1; ++lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on */ /* the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__2 = lvl - 1; lf = pow_ii(&c__2, &i__2); ll = (lf << 1) - 1; } i__2 = lf; for (i__ = ll; i__ >= i__2; --i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqre = 0; } else { sqre = 1; } ++j; _starpu_dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ j], &s[j], &work[1], info); /* L60: */ } /* L70: */ } /* The nodes on the bottom level of the tree were solved */ /* by DLASDQ. The corresponding right singular vector */ /* matrices are in explicit form. Apply them back. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlp1 = nl + 1; if (i__ == nd) { nrp1 = nr; } else { nrp1 = nr + 1; } nlf = ic - nl; nrf = ic + 1; _starpu_dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b7, &vt[nlf + vt_dim1], ldu, & b[nlf + b_dim1], ldb, &c_b8, &bx[nlf + bx_dim1], ldbx); _starpu_dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b7, &vt[nrf + vt_dim1], ldu, & b[nrf + b_dim1], ldb, &c_b8, &bx[nrf + bx_dim1], ldbx); /* L80: */ } L90: return 0; /* End of DLALSA */ } /* _starpu_dlalsa_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlalsd.c000066400000000000000000000414131507764646700206420ustar00rootroot00000000000000/* dlalsd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b6 = 0.; static integer c__0 = 0; static doublereal c_b11 = 1.; /* Subroutine */ int _starpu_dlalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, doublereal *rcond, integer *rank, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double log(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ integer c__, i__, j, k; doublereal r__; integer s, u, z__; doublereal cs; integer bx; doublereal sn; integer st, vt, nm1, st1; doublereal eps; integer iwk; doublereal tol; integer difl, difr; doublereal rcnd; integer perm, nsub; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer nlvl, sqre, bxst; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer poles, sizei, nsize, nwork, icmpq1, icmpq2; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlasda_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dlalsa_(integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); integer givcol; extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *, integer *); doublereal orgnrm; integer givnum, givptr, smlszp; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLALSD uses the singular value decomposition of A to solve the least */ /* squares problem of finding X to minimize the Euclidean norm of each */ /* column of A*X-B, where A is N-by-N upper bidiagonal, and X and B */ /* are N-by-NRHS. The solution X overwrites B. */ /* The singular values of A smaller than RCOND times the largest */ /* singular value are treated as zero in solving the least squares */ /* problem; in this case a minimum norm solution is returned. */ /* The actual singular values are returned in D in ascending order. */ /* This code makes very mild assumptions about floating point */ /* arithmetic. It will work on machines with a guard digit in */ /* add/subtract, or on those binary machines without guard digits */ /* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */ /* It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': D and E define an upper bidiagonal matrix. */ /* = 'L': D and E define a lower bidiagonal matrix. */ /* SMLSIZ (input) INTEGER */ /* The maximum size of the subproblems at the bottom of the */ /* computation tree. */ /* N (input) INTEGER */ /* The dimension of the bidiagonal matrix. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of columns of B. NRHS must be at least 1. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry D contains the main diagonal of the bidiagonal */ /* matrix. On exit, if INFO = 0, D contains its singular values. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* Contains the super-diagonal entries of the bidiagonal matrix. */ /* On exit, E has been destroyed. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On input, B contains the right hand sides of the least */ /* squares problem. On output, B contains the solution X. */ /* LDB (input) INTEGER */ /* The leading dimension of B in the calling subprogram. */ /* LDB must be at least max(1,N). */ /* RCOND (input) DOUBLE PRECISION */ /* The singular values of A less than or equal to RCOND times */ /* the largest singular value are treated as zero in solving */ /* the least squares problem. If RCOND is negative, */ /* machine precision is used instead. */ /* For example, if diag(S)*X=B were the least squares problem, */ /* where diag(S) is a diagonal matrix of singular values, the */ /* solution would be X(i) = B(i) / S(i) if S(i) is greater than */ /* RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to */ /* RCOND*max(S). */ /* RANK (output) INTEGER */ /* The number of singular values of A greater than RCOND times */ /* the largest singular value. */ /* WORK (workspace) DOUBLE PRECISION array, dimension at least */ /* (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), */ /* where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). */ /* IWORK (workspace) INTEGER array, dimension at least */ /* (3*N*NLVL + 11*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: The algorithm failed to compute an singular value while */ /* working on the submatrix lying in rows and columns */ /* INFO/(N+1) through MOD(INFO,N+1). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ /* California at Berkeley, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --work; --iwork; /* Function Body */ *info = 0; if (*n < 0) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < 1 || *ldb < *n) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLALSD", &i__1); return 0; } eps = _starpu_dlamch_("Epsilon"); /* Set up the tolerance. */ if (*rcond <= 0. || *rcond >= 1.) { rcnd = eps; } else { rcnd = *rcond; } *rank = 0; /* Quick return if possible. */ if (*n == 0) { return 0; } else if (*n == 1) { if (d__[1] == 0.) { _starpu_dlaset_("A", &c__1, nrhs, &c_b6, &c_b6, &b[b_offset], ldb); } else { *rank = 1; _starpu_dlascl_("G", &c__0, &c__0, &d__[1], &c_b11, &c__1, nrhs, &b[ b_offset], ldb, info); d__[1] = abs(d__[1]); } return 0; } /* Rotate the matrix if it is lower bidiagonal. */ if (*(unsigned char *)uplo == 'L') { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (*nrhs == 1) { _starpu_drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & c__1, &cs, &sn); } else { work[(i__ << 1) - 1] = cs; work[i__ * 2] = sn; } /* L10: */ } if (*nrhs > 1) { i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *n - 1; for (j = 1; j <= i__2; ++j) { cs = work[(j << 1) - 1]; sn = work[j * 2]; _starpu_drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ * b_dim1], &c__1, &cs, &sn); /* L20: */ } /* L30: */ } } } /* Scale. */ nm1 = *n - 1; orgnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); if (orgnrm == 0.) { _starpu_dlaset_("A", n, nrhs, &c_b6, &c_b6, &b[b_offset], ldb); return 0; } _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, &c__1, &d__[1], n, info); _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b11, &nm1, &c__1, &e[1], &nm1, info); /* If N is smaller than the minimum divide size SMLSIZ, then solve */ /* the problem with another solver. */ if (*n <= *smlsiz) { nwork = *n * *n + 1; _starpu_dlaset_("A", n, n, &c_b6, &c_b11, &work[1], n); _starpu_dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, & work[1], n, &b[b_offset], ldb, &work[nwork], info); if (*info != 0) { return 0; } tol = rcnd * (d__1 = d__[_starpu_idamax_(n, &d__[1], &c__1)], abs(d__1)); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] <= tol) { _starpu_dlaset_("A", &c__1, nrhs, &c_b6, &c_b6, &b[i__ + b_dim1], ldb); } else { _starpu_dlascl_("G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, &b[ i__ + b_dim1], ldb, info); ++(*rank); } /* L40: */ } _starpu_dgemm_("T", "N", n, nrhs, n, &c_b11, &work[1], n, &b[b_offset], ldb, & c_b6, &work[nwork], n); _starpu_dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb); /* Unscale. */ _starpu_dlascl_("G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, info); _starpu_dlasrt_("D", n, &d__[1], info); _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], ldb, info); return 0; } /* Book-keeping and setting up some constants. */ nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) / log(2.)) + 1; smlszp = *smlsiz + 1; u = 1; vt = *smlsiz * *n + 1; difl = vt + smlszp * *n; difr = difl + nlvl * *n; z__ = difr + (nlvl * *n << 1); c__ = z__ + nlvl * *n; s = c__ + *n; poles = s + *n; givnum = poles + (nlvl << 1) * *n; bx = givnum + (nlvl << 1) * *n; nwork = bx + *n * *nrhs; sizei = *n + 1; k = sizei + *n; givptr = k + *n; perm = givptr + *n; givcol = perm + nlvl * *n; iwk = givcol + (nlvl * *n << 1); st = 1; sqre = 0; icmpq1 = 1; icmpq2 = 0; nsub = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = d__[i__], abs(d__1)) < eps) { d__[i__] = d_sign(&eps, &d__[i__]); } /* L50: */ } i__1 = nm1; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { ++nsub; iwork[nsub] = st; /* Subproblem found. First determine its size and then */ /* apply divide and conquer on it. */ if (i__ < nm1) { /* A subproblem with E(I) small for I < NM1. */ nsize = i__ - st + 1; iwork[sizei + nsub - 1] = nsize; } else if ((d__1 = e[i__], abs(d__1)) >= eps) { /* A subproblem with E(NM1) not too small but I = NM1. */ nsize = *n - st + 1; iwork[sizei + nsub - 1] = nsize; } else { /* A subproblem with E(NM1) small. This implies an */ /* 1-by-1 subproblem at D(N), which is not solved */ /* explicitly. */ nsize = i__ - st + 1; iwork[sizei + nsub - 1] = nsize; ++nsub; iwork[nsub] = *n; iwork[sizei + nsub - 1] = 1; _starpu_dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); } st1 = st - 1; if (nsize == 1) { /* This is a 1-by-1 subproblem and is not solved */ /* explicitly. */ _starpu_dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); } else if (nsize <= *smlsiz) { /* This is a small subproblem and is solved by DLASDQ. */ _starpu_dlaset_("A", &nsize, &nsize, &c_b6, &c_b11, &work[vt + st1], n); _starpu_dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[ st], &work[vt + st1], n, &work[nwork], n, &b[st + b_dim1], ldb, &work[nwork], info); if (*info != 0) { return 0; } _starpu_dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); } else { /* A large problem. Solve it using divide and conquer. */ _starpu_dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & work[u + st1], n, &work[vt + st1], &iwork[k + st1], & work[difl + st1], &work[difr + st1], &work[z__ + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[iwk], info); if (*info != 0) { return 0; } bxst = bx + st1; _starpu_dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & work[bxst], n, &work[u + st1], n, &work[vt + st1], & iwork[k + st1], &work[difl + st1], &work[difr + st1], &work[z__ + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[givcol + st1], n, &iwork[perm + st1], & work[givnum + st1], &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[iwk], info); if (*info != 0) { return 0; } } st = i__ + 1; } /* L60: */ } /* Apply the singular values and treat the tiny ones as zero. */ tol = rcnd * (d__1 = d__[_starpu_idamax_(n, &d__[1], &c__1)], abs(d__1)); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Some of the elements in D can be negative because 1-by-1 */ /* subproblems were not solved explicitly. */ if ((d__1 = d__[i__], abs(d__1)) <= tol) { _starpu_dlaset_("A", &c__1, nrhs, &c_b6, &c_b6, &work[bx + i__ - 1], n); } else { ++(*rank); _starpu_dlascl_("G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, &work[ bx + i__ - 1], n, info); } d__[i__] = (d__1 = d__[i__], abs(d__1)); /* L70: */ } /* Now apply back the right singular vectors. */ icmpq2 = 1; i__1 = nsub; for (i__ = 1; i__ <= i__1; ++i__) { st = iwork[i__]; st1 = st - 1; nsize = iwork[sizei + i__ - 1]; bxst = bx + st1; if (nsize == 1) { _starpu_dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); } else if (nsize <= *smlsiz) { _starpu_dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b11, &work[vt + st1], n, &work[bxst], n, &c_b6, &b[st + b_dim1], ldb); } else { _starpu_dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[ k + st1], &work[difl + st1], &work[difr + st1], &work[z__ + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[ iwk], info); if (*info != 0) { return 0; } } /* L80: */ } /* Unscale and sort the singular values. */ _starpu_dlascl_("G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, info); _starpu_dlasrt_("D", n, &d__[1], info); _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], ldb, info); return 0; /* End of DLALSD */ } /* _starpu_dlalsd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlamrg.c000066400000000000000000000064101507764646700206430ustar00rootroot00000000000000/* dlamrg.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlamrg_(integer *n1, integer *n2, doublereal *a, integer *dtrd1, integer *dtrd2, integer *index) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, ind1, ind2, n1sv, n2sv; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMRG will create a permutation list which will merge the elements */ /* of A (which is composed of two independently sorted sets) into a */ /* single set which is sorted in ascending order. */ /* Arguments */ /* ========= */ /* N1 (input) INTEGER */ /* N2 (input) INTEGER */ /* These arguements contain the respective lengths of the two */ /* sorted lists to be merged. */ /* A (input) DOUBLE PRECISION array, dimension (N1+N2) */ /* The first N1 elements of A contain a list of numbers which */ /* are sorted in either ascending or descending order. Likewise */ /* for the final N2 elements. */ /* DTRD1 (input) INTEGER */ /* DTRD2 (input) INTEGER */ /* These are the strides to be taken through the array A. */ /* Allowable strides are 1 and -1. They indicate whether a */ /* subset of A is sorted in ascending (DTRDx = 1) or descending */ /* (DTRDx = -1) order. */ /* INDEX (output) INTEGER array, dimension (N1+N2) */ /* On exit this array will contain a permutation such that */ /* if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be */ /* sorted in ascending order. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --index; --a; /* Function Body */ n1sv = *n1; n2sv = *n2; if (*dtrd1 > 0) { ind1 = 1; } else { ind1 = *n1; } if (*dtrd2 > 0) { ind2 = *n1 + 1; } else { ind2 = *n1 + *n2; } i__ = 1; /* while ( (N1SV > 0) & (N2SV > 0) ) */ L10: if (n1sv > 0 && n2sv > 0) { if (a[ind1] <= a[ind2]) { index[i__] = ind1; ++i__; ind1 += *dtrd1; --n1sv; } else { index[i__] = ind2; ++i__; ind2 += *dtrd2; --n2sv; } goto L10; } /* end while */ if (n1sv == 0) { i__1 = n2sv; for (n1sv = 1; n1sv <= i__1; ++n1sv) { index[i__] = ind2; ++i__; ind2 += *dtrd2; /* L20: */ } } else { /* N2SV .EQ. 0 */ i__1 = n1sv; for (n2sv = 1; n2sv <= i__1; ++n2sv) { index[i__] = ind1; ++i__; ind1 += *dtrd1; /* L30: */ } } return 0; /* End of DLAMRG */ } /* _starpu_dlamrg_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaneg.c000066400000000000000000000140231507764646700206260ustar00rootroot00000000000000/* dlaneg.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_dlaneg_(integer *n, doublereal *d__, doublereal *lld, doublereal * sigma, doublereal *pivmin, integer *r__) { /* System generated locals */ integer ret_val, i__1, i__2, i__3, i__4; /* Local variables */ integer j; doublereal p, t; integer bj; doublereal tmp; integer neg1, neg2; doublereal bsav, gamma, dplus; extern logical _starpu_disnan_(doublereal *); integer negcnt; logical sawnan; doublereal dminus; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANEG computes the Sturm count, the number of negative pivots */ /* encountered while factoring tridiagonal T - sigma I = L D L^T. */ /* This implementation works directly on the factors without forming */ /* the tridiagonal matrix T. The Sturm count is also the number of */ /* eigenvalues of T less than sigma. */ /* This routine is called from DLARRB. */ /* The current routine does not use the PIVMIN parameter but rather */ /* requires IEEE-754 propagation of Infinities and NaNs. This */ /* routine also has no input range restrictions but does require */ /* default exception handling such that x/0 produces Inf when x is */ /* non-zero, and Inf/Inf produces NaN. For more information, see: */ /* Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */ /* Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */ /* Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 */ /* (Tech report version in LAWN 172 with the same title.) */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The N diagonal elements of the diagonal matrix D. */ /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (N-1) elements L(i)*L(i)*D(i). */ /* SIGMA (input) DOUBLE PRECISION */ /* Shift amount in T - sigma I = L D L^T. */ /* PIVMIN (input) DOUBLE PRECISION */ /* The minimum pivot in the Sturm sequence. May be used */ /* when zero pivots are encountered on non-IEEE-754 */ /* architectures. */ /* R (input) INTEGER */ /* The twist index for the twisted factorization that is used */ /* for the negcount. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* Jason Riedy, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* Some architectures propagate Infinities and NaNs very slowly, so */ /* the code computes counts in BLKLEN chunks. Then a NaN can */ /* propagate at most BLKLEN columns before being detected. This is */ /* not a general tuning parameter; it needs only to be just large */ /* enough that the overhead is tiny in common cases. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --lld; --d__; /* Function Body */ negcnt = 0; /* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */ t = -(*sigma); i__1 = *r__ - 1; for (bj = 1; bj <= i__1; bj += 128) { neg1 = 0; bsav = t; /* Computing MIN */ i__3 = bj + 127, i__4 = *r__ - 1; i__2 = min(i__3,i__4); for (j = bj; j <= i__2; ++j) { dplus = d__[j] + t; if (dplus < 0.) { ++neg1; } tmp = t / dplus; t = tmp * lld[j] - *sigma; /* L21: */ } sawnan = _starpu_disnan_(&t); /* Run a slower version of the above loop if a NaN is detected. */ /* A NaN should occur only with a zero pivot after an infinite */ /* pivot. In that case, substituting 1 for T/DPLUS is the */ /* correct limit. */ if (sawnan) { neg1 = 0; t = bsav; /* Computing MIN */ i__3 = bj + 127, i__4 = *r__ - 1; i__2 = min(i__3,i__4); for (j = bj; j <= i__2; ++j) { dplus = d__[j] + t; if (dplus < 0.) { ++neg1; } tmp = t / dplus; if (_starpu_disnan_(&tmp)) { tmp = 1.; } t = tmp * lld[j] - *sigma; /* L22: */ } } negcnt += neg1; /* L210: */ } /* II) lower part: L D L^T - SIGMA I = U- D- U-^T */ p = d__[*n] - *sigma; i__1 = *r__; for (bj = *n - 1; bj >= i__1; bj += -128) { neg2 = 0; bsav = p; /* Computing MAX */ i__3 = bj - 127; i__2 = max(i__3,*r__); for (j = bj; j >= i__2; --j) { dminus = lld[j] + p; if (dminus < 0.) { ++neg2; } tmp = p / dminus; p = tmp * d__[j] - *sigma; /* L23: */ } sawnan = _starpu_disnan_(&p); /* As above, run a slower version that substitutes 1 for Inf/Inf. */ if (sawnan) { neg2 = 0; p = bsav; /* Computing MAX */ i__3 = bj - 127; i__2 = max(i__3,*r__); for (j = bj; j >= i__2; --j) { dminus = lld[j] + p; if (dminus < 0.) { ++neg2; } tmp = p / dminus; if (_starpu_disnan_(&tmp)) { tmp = 1.; } p = tmp * d__[j] - *sigma; /* L24: */ } } negcnt += neg2; /* L230: */ } /* III) Twist index */ /* T was shifted by SIGMA initially. */ gamma = t + *sigma + p; if (gamma < 0.) { ++negcnt; } ret_val = negcnt; return ret_val; } /* _starpu_dlaneg_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlangb.c000066400000000000000000000140571507764646700206320ustar00rootroot00000000000000/* dlangb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlangb_(char *norm, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *work) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k, l; doublereal sum, scale; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANGB returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of an */ /* n by n band matrix A, with kl sub-diagonals and ku super-diagonals. */ /* Description */ /* =========== */ /* DLANGB returns the value */ /* DLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANGB as described */ /* above. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANGB is */ /* set to zero. */ /* KL (input) INTEGER */ /* The number of sub-diagonals of the matrix A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of super-diagonals of the matrix A. KU >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The band matrix A, stored in rows 1 to KL+KU+1. The j-th */ /* column of A is stored in the j-th column of the array AB as */ /* follows: */ /* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KL+KU+1. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= N when NORM = 'I'; otherwise, WORK is not */ /* referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --work; /* Function Body */ if (*n == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = *ku + 2 - j; /* Computing MIN */ i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; i__3 = min(i__4,i__5); for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs(d__1)) ; value = max(d__2,d__3); /* L10: */ } /* L20: */ } } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = 0.; /* Computing MAX */ i__3 = *ku + 2 - j; /* Computing MIN */ i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; i__2 = min(i__4,i__5); for (i__ = max(i__3,1); i__ <= i__2; ++i__) { sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); /* L30: */ } value = max(value,sum); /* L40: */ } } else if (_starpu_lsame_(norm, "I")) { /* Find normI(A). */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L50: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { k = *ku + 1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *n, i__6 = j + *kl; i__4 = min(i__5,i__6); for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { work[i__] += (d__1 = ab[k + i__ + j * ab_dim1], abs(d__1)); /* L60: */ } /* L70: */ } value = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L80: */ } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__4 = 1, i__2 = j - *ku; l = max(i__4,i__2); k = *ku + 1 - j + l; /* Computing MIN */ i__2 = *n, i__3 = j + *kl; i__4 = min(i__2,i__3) - l + 1; _starpu_dlassq_(&i__4, &ab[k + j * ab_dim1], &c__1, &scale, &sum); /* L90: */ } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANGB */ } /* _starpu_dlangb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlange.c000066400000000000000000000121061507764646700206260ustar00rootroot00000000000000/* dlange.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlange_(char *norm, integer *m, integer *n, doublereal *a, integer *lda, doublereal *work) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j; doublereal sum, scale; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANGE returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* real matrix A. */ /* Description */ /* =========== */ /* DLANGE returns the value */ /* DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANGE as described */ /* above. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. When M = 0, */ /* DLANGE is set to zero. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. When N = 0, */ /* DLANGE is set to zero. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(M,1). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= M when NORM = 'I'; otherwise, WORK is not */ /* referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --work; /* Function Body */ if (min(*m,*n) == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); value = max(d__2,d__3); /* L10: */ } /* L20: */ } } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = 0.; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L30: */ } value = max(value,sum); /* L40: */ } } else if (_starpu_lsame_(norm, "I")) { /* Find normI(A). */ i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L50: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L60: */ } /* L70: */ } value = 0.; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L80: */ } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { _starpu_dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); /* L90: */ } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANGE */ } /* _starpu_dlange_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlangt.c000066400000000000000000000125101507764646700206440ustar00rootroot00000000000000/* dlangt.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlangt_(char *norm, integer *n, doublereal *dl, doublereal *d__, doublereal *du) { /* System generated locals */ integer i__1; doublereal ret_val, d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal sum, scale; extern logical _starpu_lsame_(char *, char *); doublereal anorm; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANGT returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* real tridiagonal matrix A. */ /* Description */ /* =========== */ /* DLANGT returns the value */ /* DLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANGT as described */ /* above. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANGT is */ /* set to zero. */ /* DL (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) sub-diagonal elements of A. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The diagonal elements of A. */ /* DU (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) super-diagonal elements of A. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --du; --d__; --dl; /* Function Body */ if (*n <= 0) { anorm = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ anorm = (d__1 = d__[*n], abs(d__1)); i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = anorm, d__3 = (d__1 = dl[i__], abs(d__1)); anorm = max(d__2,d__3); /* Computing MAX */ d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1)); anorm = max(d__2,d__3); /* Computing MAX */ d__2 = anorm, d__3 = (d__1 = du[i__], abs(d__1)); anorm = max(d__2,d__3); /* L10: */ } } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ if (*n == 1) { anorm = abs(d__[1]); } else { /* Computing MAX */ d__3 = abs(d__[1]) + abs(dl[1]), d__4 = (d__1 = d__[*n], abs(d__1) ) + (d__2 = du[*n - 1], abs(d__2)); anorm = max(d__3,d__4); i__1 = *n - 1; for (i__ = 2; i__ <= i__1; ++i__) { /* Computing MAX */ d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = dl[i__], abs(d__2)) + (d__3 = du[i__ - 1], abs(d__3)); anorm = max(d__4,d__5); /* L20: */ } } } else if (_starpu_lsame_(norm, "I")) { /* Find normI(A). */ if (*n == 1) { anorm = abs(d__[1]); } else { /* Computing MAX */ d__3 = abs(d__[1]) + abs(du[1]), d__4 = (d__1 = d__[*n], abs(d__1) ) + (d__2 = dl[*n - 1], abs(d__2)); anorm = max(d__3,d__4); i__1 = *n - 1; for (i__ = 2; i__ <= i__1; ++i__) { /* Computing MAX */ d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = du[i__], abs(d__2)) + (d__3 = dl[i__ - 1], abs(d__3)); anorm = max(d__4,d__5); /* L30: */ } } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; _starpu_dlassq_(n, &d__[1], &c__1, &scale, &sum); if (*n > 1) { i__1 = *n - 1; _starpu_dlassq_(&i__1, &dl[1], &c__1, &scale, &sum); i__1 = *n - 1; _starpu_dlassq_(&i__1, &du[1], &c__1, &scale, &sum); } anorm = scale * sqrt(sum); } ret_val = anorm; return ret_val; /* End of DLANGT */ } /* _starpu_dlangt_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlanhs.c000066400000000000000000000124231507764646700206470ustar00rootroot00000000000000/* dlanhs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlanhs_(char *norm, integer *n, doublereal *a, integer *lda, doublereal *work) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j; doublereal sum, scale; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANHS returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* Hessenberg matrix A. */ /* Description */ /* =========== */ /* DLANHS returns the value */ /* DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANHS as described */ /* above. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANHS is */ /* set to zero. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The n by n upper Hessenberg matrix A; the part of A below the */ /* first sub-diagonal is not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(N,1). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= N when NORM = 'I'; otherwise, WORK is not */ /* referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --work; /* Function Body */ if (*n == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n, i__4 = j + 1; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); value = max(d__2,d__3); /* L10: */ } /* L20: */ } } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = 0.; /* Computing MIN */ i__3 = *n, i__4 = j + 1; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L30: */ } value = max(value,sum); /* L40: */ } } else if (_starpu_lsame_(norm, "I")) { /* Find normI(A). */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L50: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n, i__4 = j + 1; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L60: */ } /* L70: */ } value = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L80: */ } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n, i__4 = j + 1; i__2 = min(i__3,i__4); _starpu_dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); /* L90: */ } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANHS */ } /* _starpu_dlanhs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlansb.c000066400000000000000000000161601507764646700206430ustar00rootroot00000000000000/* dlansb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlansb_(char *norm, char *uplo, integer *n, integer *k, doublereal *ab, integer *ldab, doublereal *work) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, l; doublereal sum, absa, scale; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANSB returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of an */ /* n by n symmetric band matrix A, with k super-diagonals. */ /* Description */ /* =========== */ /* DLANSB returns the value */ /* DLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANSB as described */ /* above. */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* band matrix A is supplied. */ /* = 'U': Upper triangular part is supplied */ /* = 'L': Lower triangular part is supplied */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANSB is */ /* set to zero. */ /* K (input) INTEGER */ /* The number of super-diagonals or sub-diagonals of the */ /* band matrix A. K >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The upper or lower triangle of the symmetric band matrix A, */ /* stored in the first K+1 rows of AB. The j-th column of A is */ /* stored in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= K+1. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ /* WORK is not referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --work; /* Function Body */ if (*n == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ value = 0.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = *k + 2 - j; i__3 = *k + 1; for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs( d__1)); value = max(d__2,d__3); /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *n + 1 - j, i__4 = *k + 1; i__3 = min(i__2,i__4); for (i__ = 1; i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs( d__1)); value = max(d__2,d__3); /* L30: */ } /* L40: */ } } } else if (_starpu_lsame_(norm, "I") || _starpu_lsame_(norm, "O") || *(unsigned char *)norm == '1') { /* Find normI(A) ( = norm1(A), since A is symmetric). */ value = 0.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = 0.; l = *k + 1 - j; /* Computing MAX */ i__3 = 1, i__2 = j - *k; i__4 = j - 1; for (i__ = max(i__3,i__2); i__ <= i__4; ++i__) { absa = (d__1 = ab[l + i__ + j * ab_dim1], abs(d__1)); sum += absa; work[i__] += absa; /* L50: */ } work[j] = sum + (d__1 = ab[*k + 1 + j * ab_dim1], abs(d__1)); /* L60: */ } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L70: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L80: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = work[j] + (d__1 = ab[j * ab_dim1 + 1], abs(d__1)); l = 1 - j; /* Computing MIN */ i__3 = *n, i__2 = j + *k; i__4 = min(i__3,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { absa = (d__1 = ab[l + i__ + j * ab_dim1], abs(d__1)); sum += absa; work[i__] += absa; /* L90: */ } value = max(value,sum); /* L100: */ } } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; if (*k > 0) { if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 2; j <= i__1; ++j) { /* Computing MIN */ i__3 = j - 1; i__4 = min(i__3,*k); /* Computing MAX */ i__2 = *k + 2 - j; _starpu_dlassq_(&i__4, &ab[max(i__2, 1)+ j * ab_dim1], &c__1, & scale, &sum); /* L110: */ } l = *k + 1; } else { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n - j; i__4 = min(i__3,*k); _starpu_dlassq_(&i__4, &ab[j * ab_dim1 + 2], &c__1, &scale, &sum); /* L120: */ } l = 1; } sum *= 2; } else { l = 1; } _starpu_dlassq_(n, &ab[l + ab_dim1], ldab, &scale, &sum); value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANSB */ } /* _starpu_dlansb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlansf.c000066400000000000000000000676041507764646700206600ustar00rootroot00000000000000/* dlansf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlansf_(char *norm, char *transr, char *uplo, integer *n, doublereal *a, doublereal *work) { /* System generated locals */ integer i__1, i__2; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k, l; doublereal s; integer n1; doublereal aa; integer lda, ifm, noe, ilu; doublereal scale; extern logical _starpu_lsame_(char *, char *); doublereal value; extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANSF returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* real symmetric matrix A in RFP format. */ /* Description */ /* =========== */ /* DLANSF returns the value */ /* DLANSF = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER */ /* Specifies the value to be returned in DLANSF as described */ /* above. */ /* TRANSR (input) CHARACTER */ /* Specifies whether the RFP format of A is normal or */ /* transposed format. */ /* = 'N': RFP format is Normal; */ /* = 'T': RFP format is Transpose. */ /* UPLO (input) CHARACTER */ /* On entry, UPLO specifies whether the RFP matrix A came from */ /* an upper or lower triangular matrix as follows: */ /* = 'U': RFP A came from an upper triangular matrix; */ /* = 'L': RFP A came from a lower triangular matrix. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANSF is */ /* set to zero. */ /* A (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ); */ /* On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L') */ /* part of the symmetric matrix A stored in RFP format. See the */ /* "Notes" below for more details. */ /* Unchanged on exit. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ /* WORK is not referenced. */ /* Notes */ /* ===== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* Reference */ /* ========= */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ if (*n == 0) { ret_val = 0.; return ret_val; } /* set noe = 1 if n is odd. if n is even set noe=0 */ noe = 1; if (*n % 2 == 0) { noe = 0; } /* set ifm = 0 when form='T or 't' and 1 otherwise */ ifm = 1; if (_starpu_lsame_(transr, "T")) { ifm = 0; } /* set ilu = 0 when uplo='U or 'u' and 1 otherwise */ ilu = 1; if (_starpu_lsame_(uplo, "U")) { ilu = 0; } /* set lda = (n+1)/2 when ifm = 0 */ /* set lda = n when ifm = 1 and noe = 1 */ /* set lda = n+1 when ifm = 1 and noe = 0 */ if (ifm == 1) { if (noe == 1) { lda = *n; } else { /* noe=0 */ lda = *n + 1; } } else { /* ifm=0 */ lda = (*n + 1) / 2; } if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ k = (*n + 1) / 2; value = 0.; if (noe == 1) { /* n is odd */ if (ifm == 1) { /* A is n by k */ i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = *n - 1; for (i__ = 0; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * lda], abs( d__1)); value = max(d__2,d__3); } } } else { /* xpose case; A is k by n */ i__1 = *n - 1; for (j = 0; j <= i__1; ++j) { i__2 = k - 1; for (i__ = 0; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * lda], abs( d__1)); value = max(d__2,d__3); } } } } else { /* n is even */ if (ifm == 1) { /* A is n+1 by k */ i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = *n; for (i__ = 0; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * lda], abs( d__1)); value = max(d__2,d__3); } } } else { /* xpose case; A is k by n+1 */ i__1 = *n; for (j = 0; j <= i__1; ++j) { i__2 = k - 1; for (i__ = 0; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * lda], abs( d__1)); value = max(d__2,d__3); } } } } } else if (_starpu_lsame_(norm, "I") || _starpu_lsame_(norm, "O") || *(unsigned char *)norm == '1') { /* Find normI(A) ( = norm1(A), since A is symmetric). */ if (ifm == 1) { k = *n / 2; if (noe == 1) { /* n is odd */ if (ilu == 0) { i__1 = k - 1; for (i__ = 0; i__ <= i__1; ++i__) { work[i__] = 0.; } i__1 = k; for (j = 0; j <= i__1; ++j) { s = 0.; i__2 = k + j - 1; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(i,j+k) */ s += aa; work[i__] += aa; } aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j+k,j+k) */ work[j + k] = s + aa; if (i__ == k + k) { goto L10; } ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j,j) */ work[j] += aa; s = 0.; i__2 = k - 1; for (l = j + 1; l <= i__2; ++l) { ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(l,j) */ s += aa; work[l] += aa; } work[j] += s; } L10: i__ = _starpu_idamax_(n, work, &c__1); value = work[i__ - 1]; } else { /* ilu = 1 */ ++k; /* k=(n+1)/2 for n odd and ilu=1 */ i__1 = *n - 1; for (i__ = k; i__ <= i__1; ++i__) { work[i__] = 0.; } for (j = k - 1; j >= 0; --j) { s = 0.; i__1 = j - 2; for (i__ = 0; i__ <= i__1; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j+k,i+k) */ s += aa; work[i__ + k] += aa; } if (j > 0) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j+k,j+k) */ s += aa; work[i__ + k] += s; /* i=j */ ++i__; } aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j,j) */ work[j] = aa; s = 0.; i__1 = *n - 1; for (l = j + 1; l <= i__1; ++l) { ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(l,j) */ s += aa; work[l] += aa; } work[j] += s; } i__ = _starpu_idamax_(n, work, &c__1); value = work[i__ - 1]; } } else { /* n is even */ if (ilu == 0) { i__1 = k - 1; for (i__ = 0; i__ <= i__1; ++i__) { work[i__] = 0.; } i__1 = k - 1; for (j = 0; j <= i__1; ++j) { s = 0.; i__2 = k + j - 1; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(i,j+k) */ s += aa; work[i__] += aa; } aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j+k,j+k) */ work[j + k] = s + aa; ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j,j) */ work[j] += aa; s = 0.; i__2 = k - 1; for (l = j + 1; l <= i__2; ++l) { ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(l,j) */ s += aa; work[l] += aa; } work[j] += s; } i__ = _starpu_idamax_(n, work, &c__1); value = work[i__ - 1]; } else { /* ilu = 1 */ i__1 = *n - 1; for (i__ = k; i__ <= i__1; ++i__) { work[i__] = 0.; } for (j = k - 1; j >= 0; --j) { s = 0.; i__1 = j - 1; for (i__ = 0; i__ <= i__1; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j+k,i+k) */ s += aa; work[i__ + k] += aa; } aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j+k,j+k) */ s += aa; work[i__ + k] += s; /* i=j */ ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(j,j) */ work[j] = aa; s = 0.; i__1 = *n - 1; for (l = j + 1; l <= i__1; ++l) { ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* -> A(l,j) */ s += aa; work[l] += aa; } work[j] += s; } i__ = _starpu_idamax_(n, work, &c__1); value = work[i__ - 1]; } } } else { /* ifm=0 */ k = *n / 2; if (noe == 1) { /* n is odd */ if (ilu == 0) { n1 = k; /* n/2 */ ++k; /* k is the row size and lda */ i__1 = *n - 1; for (i__ = n1; i__ <= i__1; ++i__) { work[i__] = 0.; } i__1 = n1 - 1; for (j = 0; j <= i__1; ++j) { s = 0.; i__2 = k - 1; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j,n1+i) */ work[i__ + n1] += aa; s += aa; } work[j] = s; } /* j=n1=k-1 is special */ s = (d__1 = a[j * lda], abs(d__1)); /* A(k-1,k-1) */ i__1 = k - 1; for (i__ = 1; i__ <= i__1; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(k-1,i+n1) */ work[i__ + n1] += aa; s += aa; } work[j] += s; i__1 = *n - 1; for (j = k; j <= i__1; ++j) { s = 0.; i__2 = j - k - 1; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(i,j-k) */ work[i__] += aa; s += aa; } /* i=j-k */ aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j-k,j-k) */ s += aa; work[j - k] += s; ++i__; s = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j,j) */ i__2 = *n - 1; for (l = j + 1; l <= i__2; ++l) { ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j,l) */ work[l] += aa; s += aa; } work[j] += s; } i__ = _starpu_idamax_(n, work, &c__1); value = work[i__ - 1]; } else { /* ilu=1 */ ++k; /* k=(n+1)/2 for n odd and ilu=1 */ i__1 = *n - 1; for (i__ = k; i__ <= i__1; ++i__) { work[i__] = 0.; } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { /* process */ s = 0.; i__2 = j - 1; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j,i) */ work[i__] += aa; s += aa; } aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* i=j so process of A(j,j) */ s += aa; work[j] = s; /* is initialised here */ ++i__; /* i=j process A(j+k,j+k) */ aa = (d__1 = a[i__ + j * lda], abs(d__1)); s = aa; i__2 = *n - 1; for (l = k + j + 1; l <= i__2; ++l) { ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(l,k+j) */ s += aa; work[l] += aa; } work[k + j] += s; } /* j=k-1 is special :process col A(k-1,0:k-1) */ s = 0.; i__1 = k - 2; for (i__ = 0; i__ <= i__1; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(k,i) */ work[i__] += aa; s += aa; } /* i=k-1 */ aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(k-1,k-1) */ s += aa; work[i__] = s; /* done with col j=k+1 */ i__1 = *n - 1; for (j = k; j <= i__1; ++j) { /* process col j of A = A(j,0:k-1) */ s = 0.; i__2 = k - 1; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j,i) */ work[i__] += aa; s += aa; } work[j] += s; } i__ = _starpu_idamax_(n, work, &c__1); value = work[i__ - 1]; } } else { /* n is even */ if (ilu == 0) { i__1 = *n - 1; for (i__ = k; i__ <= i__1; ++i__) { work[i__] = 0.; } i__1 = k - 1; for (j = 0; j <= i__1; ++j) { s = 0.; i__2 = k - 1; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j,i+k) */ work[i__ + k] += aa; s += aa; } work[j] = s; } /* j=k */ aa = (d__1 = a[j * lda], abs(d__1)); /* A(k,k) */ s = aa; i__1 = k - 1; for (i__ = 1; i__ <= i__1; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(k,k+i) */ work[i__ + k] += aa; s += aa; } work[j] += s; i__1 = *n - 1; for (j = k + 1; j <= i__1; ++j) { s = 0.; i__2 = j - 2 - k; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(i,j-k-1) */ work[i__] += aa; s += aa; } /* i=j-1-k */ aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j-k-1,j-k-1) */ s += aa; work[j - k - 1] += s; ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j,j) */ s = aa; i__2 = *n - 1; for (l = j + 1; l <= i__2; ++l) { ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j,l) */ work[l] += aa; s += aa; } work[j] += s; } /* j=n */ s = 0.; i__1 = k - 2; for (i__ = 0; i__ <= i__1; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(i,k-1) */ work[i__] += aa; s += aa; } /* i=k-1 */ aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(k-1,k-1) */ s += aa; work[i__] += s; i__ = _starpu_idamax_(n, work, &c__1); value = work[i__ - 1]; } else { /* ilu=1 */ i__1 = *n - 1; for (i__ = k; i__ <= i__1; ++i__) { work[i__] = 0.; } /* j=0 is special :process col A(k:n-1,k) */ s = abs(a[0]); /* A(k,k) */ i__1 = k - 1; for (i__ = 1; i__ <= i__1; ++i__) { aa = (d__1 = a[i__], abs(d__1)); /* A(k+i,k) */ work[i__ + k] += aa; s += aa; } work[k] += s; i__1 = k - 1; for (j = 1; j <= i__1; ++j) { /* process */ s = 0.; i__2 = j - 2; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j-1,i) */ work[i__] += aa; s += aa; } aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* i=j-1 so process of A(j-1,j-1) */ s += aa; work[j - 1] = s; /* is initialised here */ ++i__; /* i=j process A(j+k,j+k) */ aa = (d__1 = a[i__ + j * lda], abs(d__1)); s = aa; i__2 = *n - 1; for (l = k + j + 1; l <= i__2; ++l) { ++i__; aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(l,k+j) */ s += aa; work[l] += aa; } work[k + j] += s; } /* j=k is special :process col A(k,0:k-1) */ s = 0.; i__1 = k - 2; for (i__ = 0; i__ <= i__1; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(k,i) */ work[i__] += aa; s += aa; } /* i=k-1 */ aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(k-1,k-1) */ s += aa; work[i__] = s; /* done with col j=k+1 */ i__1 = *n; for (j = k + 1; j <= i__1; ++j) { /* process col j-1 of A = A(j-1,0:k-1) */ s = 0.; i__2 = k - 1; for (i__ = 0; i__ <= i__2; ++i__) { aa = (d__1 = a[i__ + j * lda], abs(d__1)); /* A(j-1,i) */ work[i__] += aa; s += aa; } work[j - 1] += s; } i__ = _starpu_idamax_(n, work, &c__1); value = work[i__ - 1]; } } } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ k = (*n + 1) / 2; scale = 0.; s = 1.; if (noe == 1) { /* n is odd */ if (ifm == 1) { /* A is normal */ if (ilu == 0) { /* A is upper */ i__1 = k - 3; for (j = 0; j <= i__1; ++j) { i__2 = k - j - 2; _starpu_dlassq_(&i__2, &a[k + j + 1 + j * lda], &c__1, &scale, &s); /* L at A(k,0) */ } i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = k + j - 1; _starpu_dlassq_(&i__2, &a[j * lda], &c__1, &scale, &s); /* trap U at A(0,0) */ } s += s; /* double s for the off diagonal elements */ i__1 = k - 1; i__2 = lda + 1; _starpu_dlassq_(&i__1, &a[k], &i__2, &scale, &s); /* tri L at A(k,0) */ i__1 = lda + 1; _starpu_dlassq_(&k, &a[k - 1], &i__1, &scale, &s); /* tri U at A(k-1,0) */ } else { /* ilu=1 & A is lower */ i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = *n - j - 1; _starpu_dlassq_(&i__2, &a[j + 1 + j * lda], &c__1, &scale, &s) ; /* trap L at A(0,0) */ } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { _starpu_dlassq_(&j, &a[(j + 1) * lda], &c__1, &scale, &s); /* U at A(0,1) */ } s += s; /* double s for the off diagonal elements */ i__1 = lda + 1; _starpu_dlassq_(&k, a, &i__1, &scale, &s); /* tri L at A(0,0) */ i__1 = k - 1; i__2 = lda + 1; _starpu_dlassq_(&i__1, &a[lda], &i__2, &scale, &s); /* tri U at A(0,1) */ } } else { /* A is xpose */ if (ilu == 0) { /* A' is upper */ i__1 = k - 2; for (j = 1; j <= i__1; ++j) { _starpu_dlassq_(&j, &a[(k + j) * lda], &c__1, &scale, &s); /* U at A(0,k) */ } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { _starpu_dlassq_(&k, &a[j * lda], &c__1, &scale, &s); /* k by k-1 rect. at A(0,0) */ } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { i__2 = k - j - 1; _starpu_dlassq_(&i__2, &a[j + 1 + (j + k - 1) * lda], &c__1, & scale, &s); /* L at A(0,k-1) */ } s += s; /* double s for the off diagonal elements */ i__1 = k - 1; i__2 = lda + 1; _starpu_dlassq_(&i__1, &a[k * lda], &i__2, &scale, &s); /* tri U at A(0,k) */ i__1 = lda + 1; _starpu_dlassq_(&k, &a[(k - 1) * lda], &i__1, &scale, &s); /* tri L at A(0,k-1) */ } else { /* A' is lower */ i__1 = k - 1; for (j = 1; j <= i__1; ++j) { _starpu_dlassq_(&j, &a[j * lda], &c__1, &scale, &s); /* U at A(0,0) */ } i__1 = *n - 1; for (j = k; j <= i__1; ++j) { _starpu_dlassq_(&k, &a[j * lda], &c__1, &scale, &s); /* k by k-1 rect. at A(0,k) */ } i__1 = k - 3; for (j = 0; j <= i__1; ++j) { i__2 = k - j - 2; _starpu_dlassq_(&i__2, &a[j + 2 + j * lda], &c__1, &scale, &s) ; /* L at A(1,0) */ } s += s; /* double s for the off diagonal elements */ i__1 = lda + 1; _starpu_dlassq_(&k, a, &i__1, &scale, &s); /* tri U at A(0,0) */ i__1 = k - 1; i__2 = lda + 1; _starpu_dlassq_(&i__1, &a[1], &i__2, &scale, &s); /* tri L at A(1,0) */ } } } else { /* n is even */ if (ifm == 1) { /* A is normal */ if (ilu == 0) { /* A is upper */ i__1 = k - 2; for (j = 0; j <= i__1; ++j) { i__2 = k - j - 1; _starpu_dlassq_(&i__2, &a[k + j + 2 + j * lda], &c__1, &scale, &s); /* L at A(k+1,0) */ } i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = k + j; _starpu_dlassq_(&i__2, &a[j * lda], &c__1, &scale, &s); /* trap U at A(0,0) */ } s += s; /* double s for the off diagonal elements */ i__1 = lda + 1; _starpu_dlassq_(&k, &a[k + 1], &i__1, &scale, &s); /* tri L at A(k+1,0) */ i__1 = lda + 1; _starpu_dlassq_(&k, &a[k], &i__1, &scale, &s); /* tri U at A(k,0) */ } else { /* ilu=1 & A is lower */ i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = *n - j - 1; _starpu_dlassq_(&i__2, &a[j + 2 + j * lda], &c__1, &scale, &s) ; /* trap L at A(1,0) */ } i__1 = k - 1; for (j = 1; j <= i__1; ++j) { _starpu_dlassq_(&j, &a[j * lda], &c__1, &scale, &s); /* U at A(0,0) */ } s += s; /* double s for the off diagonal elements */ i__1 = lda + 1; _starpu_dlassq_(&k, &a[1], &i__1, &scale, &s); /* tri L at A(1,0) */ i__1 = lda + 1; _starpu_dlassq_(&k, a, &i__1, &scale, &s); /* tri U at A(0,0) */ } } else { /* A is xpose */ if (ilu == 0) { /* A' is upper */ i__1 = k - 1; for (j = 1; j <= i__1; ++j) { _starpu_dlassq_(&j, &a[(k + 1 + j) * lda], &c__1, &scale, &s); /* U at A(0,k+1) */ } i__1 = k - 1; for (j = 0; j <= i__1; ++j) { _starpu_dlassq_(&k, &a[j * lda], &c__1, &scale, &s); /* k by k rect. at A(0,0) */ } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { i__2 = k - j - 1; _starpu_dlassq_(&i__2, &a[j + 1 + (j + k) * lda], &c__1, & scale, &s); /* L at A(0,k) */ } s += s; /* double s for the off diagonal elements */ i__1 = lda + 1; _starpu_dlassq_(&k, &a[(k + 1) * lda], &i__1, &scale, &s); /* tri U at A(0,k+1) */ i__1 = lda + 1; _starpu_dlassq_(&k, &a[k * lda], &i__1, &scale, &s); /* tri L at A(0,k) */ } else { /* A' is lower */ i__1 = k - 1; for (j = 1; j <= i__1; ++j) { _starpu_dlassq_(&j, &a[(j + 1) * lda], &c__1, &scale, &s); /* U at A(0,1) */ } i__1 = *n; for (j = k + 1; j <= i__1; ++j) { _starpu_dlassq_(&k, &a[j * lda], &c__1, &scale, &s); /* k by k rect. at A(0,k+1) */ } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { i__2 = k - j - 1; _starpu_dlassq_(&i__2, &a[j + 1 + j * lda], &c__1, &scale, &s) ; /* L at A(0,0) */ } s += s; /* double s for the off diagonal elements */ i__1 = lda + 1; _starpu_dlassq_(&k, &a[lda], &i__1, &scale, &s); /* tri L at A(0,1) */ i__1 = lda + 1; _starpu_dlassq_(&k, a, &i__1, &scale, &s); /* tri U at A(0,0) */ } } } value = scale * sqrt(s); } ret_val = value; return ret_val; /* End of DLANSF */ } /* _starpu_dlansf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlansp.c000066400000000000000000000152371507764646700206650ustar00rootroot00000000000000/* dlansp.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlansp_(char *norm, char *uplo, integer *n, doublereal *ap, doublereal *work) { /* System generated locals */ integer i__1, i__2; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k; doublereal sum, absa, scale; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANSP returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* real symmetric matrix A, supplied in packed form. */ /* Description */ /* =========== */ /* DLANSP returns the value */ /* DLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANSP as described */ /* above. */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is supplied. */ /* = 'U': Upper triangular part of A is supplied */ /* = 'L': Lower triangular part of A is supplied */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANSP is */ /* set to zero. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangle of the symmetric matrix A, packed */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ /* WORK is not referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --work; --ap; /* Function Body */ if (*n == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ value = 0.; if (_starpu_lsame_(uplo, "U")) { k = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = k + j - 1; for (i__ = k; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); value = max(d__2,d__3); /* L10: */ } k += j; /* L20: */ } } else { k = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = k + *n - j; for (i__ = k; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); value = max(d__2,d__3); /* L30: */ } k = k + *n - j + 1; /* L40: */ } } } else if (_starpu_lsame_(norm, "I") || _starpu_lsame_(norm, "O") || *(unsigned char *)norm == '1') { /* Find normI(A) ( = norm1(A), since A is symmetric). */ value = 0.; k = 1; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = 0.; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { absa = (d__1 = ap[k], abs(d__1)); sum += absa; work[i__] += absa; ++k; /* L50: */ } work[j] = sum + (d__1 = ap[k], abs(d__1)); ++k; /* L60: */ } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L70: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L80: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = work[j] + (d__1 = ap[k], abs(d__1)); ++k; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { absa = (d__1 = ap[k], abs(d__1)); sum += absa; work[i__] += absa; ++k; /* L90: */ } value = max(value,sum); /* L100: */ } } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; k = 2; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 2; j <= i__1; ++j) { i__2 = j - 1; _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); k += j; /* L110: */ } } else { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = *n - j; _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); k = k + *n - j + 1; /* L120: */ } } sum *= 2; k = 1; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (ap[k] != 0.) { absa = (d__1 = ap[k], abs(d__1)); if (scale < absa) { /* Computing 2nd power */ d__1 = scale / absa; sum = sum * (d__1 * d__1) + 1.; scale = absa; } else { /* Computing 2nd power */ d__1 = absa / scale; sum += d__1 * d__1; } } if (_starpu_lsame_(uplo, "U")) { k = k + i__ + 1; } else { k = k + *n - i__ + 1; } /* L130: */ } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANSP */ } /* _starpu_dlansp_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlanst.c000066400000000000000000000110451507764646700206620ustar00rootroot00000000000000/* dlanst.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e) { /* System generated locals */ integer i__1; doublereal ret_val, d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal sum, scale; extern logical _starpu_lsame_(char *, char *); doublereal anorm; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANST returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* real symmetric tridiagonal matrix A. */ /* Description */ /* =========== */ /* DLANST returns the value */ /* DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANST as described */ /* above. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANST is */ /* set to zero. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The diagonal elements of A. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) sub-diagonal or super-diagonal elements of A. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --e; --d__; /* Function Body */ if (*n <= 0) { anorm = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ anorm = (d__1 = d__[*n], abs(d__1)); i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1)); anorm = max(d__2,d__3); /* Computing MAX */ d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1)); anorm = max(d__2,d__3); /* L10: */ } } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) norm == '1' || _starpu_lsame_(norm, "I")) { /* Find norm1(A). */ if (*n == 1) { anorm = abs(d__[1]); } else { /* Computing MAX */ d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs( d__1)) + (d__2 = d__[*n], abs(d__2)); anorm = max(d__3,d__4); i__1 = *n - 1; for (i__ = 2; i__ <= i__1; ++i__) { /* Computing MAX */ d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[ i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3)); anorm = max(d__4,d__5); /* L20: */ } } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; if (*n > 1) { i__1 = *n - 1; _starpu_dlassq_(&i__1, &e[1], &c__1, &scale, &sum); sum *= 2; } _starpu_dlassq_(n, &d__[1], &c__1, &scale, &sum); anorm = scale * sqrt(sum); } ret_val = anorm; return ret_val; /* End of DLANST */ } /* _starpu_dlanst_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlansy.c000066400000000000000000000150461507764646700206740ustar00rootroot00000000000000/* dlansy.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *work) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j; doublereal sum, absa, scale; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANSY returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* real symmetric matrix A. */ /* Description */ /* =========== */ /* DLANSY returns the value */ /* DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANSY as described */ /* above. */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is to be referenced. */ /* = 'U': Upper triangular part of A is referenced */ /* = 'L': Lower triangular part of A is referenced */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANSY is */ /* set to zero. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The symmetric matrix A. If UPLO = 'U', the leading n by n */ /* upper triangular part of A contains the upper triangular part */ /* of the matrix A, and the strictly lower triangular part of A */ /* is not referenced. If UPLO = 'L', the leading n by n lower */ /* triangular part of A contains the lower triangular part of */ /* the matrix A, and the strictly upper triangular part of A is */ /* not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(N,1). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ /* WORK is not referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --work; /* Function Body */ if (*n == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ value = 0.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( d__1)); value = max(d__2,d__3); /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( d__1)); value = max(d__2,d__3); /* L30: */ } /* L40: */ } } } else if (_starpu_lsame_(norm, "I") || _starpu_lsame_(norm, "O") || *(unsigned char *)norm == '1') { /* Find normI(A) ( = norm1(A), since A is symmetric). */ value = 0.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = 0.; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); sum += absa; work[i__] += absa; /* L50: */ } work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1)); /* L60: */ } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L70: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L80: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1)); i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); sum += absa; work[i__] += absa; /* L90: */ } value = max(value,sum); /* L100: */ } } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 2; j <= i__1; ++j) { i__2 = j - 1; _starpu_dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); /* L110: */ } } else { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = *n - j; _starpu_dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); /* L120: */ } } sum *= 2; i__1 = *lda + 1; _starpu_dlassq_(n, &a[a_offset], &i__1, &scale, &sum); value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANSY */ } /* _starpu_dlansy_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlantb.c000066400000000000000000000254331507764646700206470ustar00rootroot00000000000000/* dlantb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, doublereal *ab, integer *ldab, doublereal *work) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, l; doublereal sum, scale; logical udiag; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANTB returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of an */ /* n by n triangular band matrix A, with ( k + 1 ) diagonals. */ /* Description */ /* =========== */ /* DLANTB returns the value */ /* DLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANTB as described */ /* above. */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the matrix A is upper or lower triangular. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* DIAG (input) CHARACTER*1 */ /* Specifies whether or not the matrix A is unit triangular. */ /* = 'N': Non-unit triangular */ /* = 'U': Unit triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANTB is */ /* set to zero. */ /* K (input) INTEGER */ /* The number of super-diagonals of the matrix A if UPLO = 'U', */ /* or the number of sub-diagonals of the matrix A if UPLO = 'L'. */ /* K >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The upper or lower triangular band matrix A, stored in the */ /* first k+1 rows of AB. The j-th column of A is stored */ /* in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). */ /* Note that when DIAG = 'U', the elements of the array AB */ /* corresponding to the diagonal elements of the matrix A are */ /* not referenced, but are assumed to be one. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= K+1. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= N when NORM = 'I'; otherwise, WORK is not */ /* referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --work; /* Function Body */ if (*n == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ if (_starpu_lsame_(diag, "U")) { value = 1.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = *k + 2 - j; i__3 = *k; for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); value = max(d__2,d__3); /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *n + 1 - j, i__4 = *k + 1; i__3 = min(i__2,i__4); for (i__ = 2; i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); value = max(d__2,d__3); /* L30: */ } /* L40: */ } } } else { value = 0.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__3 = *k + 2 - j; i__2 = *k + 1; for (i__ = max(i__3,1); i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); value = max(d__2,d__3); /* L50: */ } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n + 1 - j, i__4 = *k + 1; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); value = max(d__2,d__3); /* L70: */ } /* L80: */ } } } } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ value = 0.; udiag = _starpu_lsame_(diag, "U"); if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (udiag) { sum = 1.; /* Computing MAX */ i__2 = *k + 2 - j; i__3 = *k; for (i__ = max(i__2,1); i__ <= i__3; ++i__) { sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); /* L90: */ } } else { sum = 0.; /* Computing MAX */ i__3 = *k + 2 - j; i__2 = *k + 1; for (i__ = max(i__3,1); i__ <= i__2; ++i__) { sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); /* L100: */ } } value = max(value,sum); /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (udiag) { sum = 1.; /* Computing MIN */ i__3 = *n + 1 - j, i__4 = *k + 1; i__2 = min(i__3,i__4); for (i__ = 2; i__ <= i__2; ++i__) { sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); /* L120: */ } } else { sum = 0.; /* Computing MIN */ i__3 = *n + 1 - j, i__4 = *k + 1; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { sum += (d__1 = ab[i__ + j * ab_dim1], abs(d__1)); /* L130: */ } } value = max(value,sum); /* L140: */ } } } else if (_starpu_lsame_(norm, "I")) { /* Find normI(A). */ value = 0.; if (_starpu_lsame_(uplo, "U")) { if (_starpu_lsame_(diag, "U")) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 1.; /* L150: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { l = *k + 1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { work[i__] += (d__1 = ab[l + i__ + j * ab_dim1], abs( d__1)); /* L160: */ } /* L170: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L180: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { l = *k + 1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j; for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { work[i__] += (d__1 = ab[l + i__ + j * ab_dim1], abs( d__1)); /* L190: */ } /* L200: */ } } } else { if (_starpu_lsame_(diag, "U")) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 1.; /* L210: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ab[l + i__ + j * ab_dim1], abs( d__1)); /* L220: */ } /* L230: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L240: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = min(i__4,i__2); for (i__ = j; i__ <= i__3; ++i__) { work[i__] += (d__1 = ab[l + i__ + j * ab_dim1], abs( d__1)); /* L250: */ } /* L260: */ } } } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L270: */ } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ if (_starpu_lsame_(uplo, "U")) { if (_starpu_lsame_(diag, "U")) { scale = 1.; sum = (doublereal) (*n); if (*k > 0) { i__1 = *n; for (j = 2; j <= i__1; ++j) { /* Computing MIN */ i__4 = j - 1; i__3 = min(i__4,*k); /* Computing MAX */ i__2 = *k + 2 - j; _starpu_dlassq_(&i__3, &ab[max(i__2, 1)+ j * ab_dim1], &c__1, &scale, &sum); /* L280: */ } } } else { scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__4 = j, i__2 = *k + 1; i__3 = min(i__4,i__2); /* Computing MAX */ i__5 = *k + 2 - j; _starpu_dlassq_(&i__3, &ab[max(i__5, 1)+ j * ab_dim1], &c__1, & scale, &sum); /* L290: */ } } } else { if (_starpu_lsame_(diag, "U")) { scale = 1.; sum = (doublereal) (*n); if (*k > 0) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__4 = *n - j; i__3 = min(i__4,*k); _starpu_dlassq_(&i__3, &ab[j * ab_dim1 + 2], &c__1, &scale, & sum); /* L300: */ } } } else { scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__4 = *n - j + 1, i__2 = *k + 1; i__3 = min(i__4,i__2); _starpu_dlassq_(&i__3, &ab[j * ab_dim1 + 1], &c__1, &scale, &sum); /* L310: */ } } } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANTB */ } /* _starpu_dlantb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlantp.c000066400000000000000000000222511507764646700206600ustar00rootroot00000000000000/* dlantp.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlantp_(char *norm, char *uplo, char *diag, integer *n, doublereal *ap, doublereal *work) { /* System generated locals */ integer i__1, i__2; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k; doublereal sum, scale; logical udiag; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANTP returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* triangular matrix A, supplied in packed form. */ /* Description */ /* =========== */ /* DLANTP returns the value */ /* DLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANTP as described */ /* above. */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the matrix A is upper or lower triangular. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* DIAG (input) CHARACTER*1 */ /* Specifies whether or not the matrix A is unit triangular. */ /* = 'N': Non-unit triangular */ /* = 'U': Unit triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. When N = 0, DLANTP is */ /* set to zero. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangular matrix A, packed columnwise in */ /* a linear array. The j-th column of A is stored in the array */ /* AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* Note that when DIAG = 'U', the elements of the array AP */ /* corresponding to the diagonal elements of the matrix A are */ /* not referenced, but are assumed to be one. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= N when NORM = 'I'; otherwise, WORK is not */ /* referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --work; --ap; /* Function Body */ if (*n == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ k = 1; if (_starpu_lsame_(diag, "U")) { value = 1.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = k + j - 2; for (i__ = k; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); value = max(d__2,d__3); /* L10: */ } k += j; /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = k + *n - j; for (i__ = k + 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); value = max(d__2,d__3); /* L30: */ } k = k + *n - j + 1; /* L40: */ } } } else { value = 0.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = k + j - 1; for (i__ = k; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); value = max(d__2,d__3); /* L50: */ } k += j; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = k + *n - j; for (i__ = k; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ap[i__], abs(d__1)); value = max(d__2,d__3); /* L70: */ } k = k + *n - j + 1; /* L80: */ } } } } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ value = 0.; k = 1; udiag = _starpu_lsame_(diag, "U"); if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (udiag) { sum = 1.; i__2 = k + j - 2; for (i__ = k; i__ <= i__2; ++i__) { sum += (d__1 = ap[i__], abs(d__1)); /* L90: */ } } else { sum = 0.; i__2 = k + j - 1; for (i__ = k; i__ <= i__2; ++i__) { sum += (d__1 = ap[i__], abs(d__1)); /* L100: */ } } k += j; value = max(value,sum); /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (udiag) { sum = 1.; i__2 = k + *n - j; for (i__ = k + 1; i__ <= i__2; ++i__) { sum += (d__1 = ap[i__], abs(d__1)); /* L120: */ } } else { sum = 0.; i__2 = k + *n - j; for (i__ = k; i__ <= i__2; ++i__) { sum += (d__1 = ap[i__], abs(d__1)); /* L130: */ } } k = k + *n - j + 1; value = max(value,sum); /* L140: */ } } } else if (_starpu_lsame_(norm, "I")) { /* Find normI(A). */ k = 1; if (_starpu_lsame_(uplo, "U")) { if (_starpu_lsame_(diag, "U")) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 1.; /* L150: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] += (d__1 = ap[k], abs(d__1)); ++k; /* L160: */ } ++k; /* L170: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L180: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] += (d__1 = ap[k], abs(d__1)); ++k; /* L190: */ } /* L200: */ } } } else { if (_starpu_lsame_(diag, "U")) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 1.; /* L210: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { ++k; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { work[i__] += (d__1 = ap[k], abs(d__1)); ++k; /* L220: */ } /* L230: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L240: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { work[i__] += (d__1 = ap[k], abs(d__1)); ++k; /* L250: */ } /* L260: */ } } } value = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L270: */ } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ if (_starpu_lsame_(uplo, "U")) { if (_starpu_lsame_(diag, "U")) { scale = 1.; sum = (doublereal) (*n); k = 2; i__1 = *n; for (j = 2; j <= i__1; ++j) { i__2 = j - 1; _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); k += j; /* L280: */ } } else { scale = 0.; sum = 1.; k = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { _starpu_dlassq_(&j, &ap[k], &c__1, &scale, &sum); k += j; /* L290: */ } } } else { if (_starpu_lsame_(diag, "U")) { scale = 1.; sum = (doublereal) (*n); k = 2; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = *n - j; _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); k = k + *n - j + 1; /* L300: */ } } else { scale = 0.; sum = 1.; k = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n - j + 1; _starpu_dlassq_(&i__2, &ap[k], &c__1, &scale, &sum); k = k + *n - j + 1; /* L310: */ } } } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANTP */ } /* _starpu_dlantp_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlantr.c000066400000000000000000000240141507764646700206610ustar00rootroot00000000000000/* dlantr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; doublereal _starpu_dlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, doublereal *a, integer *lda, doublereal *work) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j; doublereal sum, scale; logical udiag; extern logical _starpu_lsame_(char *, char *); doublereal value; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANTR returns the value of the one norm, or the Frobenius norm, or */ /* the infinity norm, or the element of largest absolute value of a */ /* trapezoidal or triangular matrix A. */ /* Description */ /* =========== */ /* DLANTR returns the value */ /* DLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ /* ( */ /* ( norm1(A), NORM = '1', 'O' or 'o' */ /* ( */ /* ( normI(A), NORM = 'I' or 'i' */ /* ( */ /* ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ /* where norm1 denotes the one norm of a matrix (maximum column sum), */ /* normI denotes the infinity norm of a matrix (maximum row sum) and */ /* normF denotes the Frobenius norm of a matrix (square root of sum of */ /* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies the value to be returned in DLANTR as described */ /* above. */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the matrix A is upper or lower trapezoidal. */ /* = 'U': Upper trapezoidal */ /* = 'L': Lower trapezoidal */ /* Note that A is triangular instead of trapezoidal if M = N. */ /* DIAG (input) CHARACTER*1 */ /* Specifies whether or not the matrix A has unit diagonal. */ /* = 'N': Non-unit diagonal */ /* = 'U': Unit diagonal */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0, and if */ /* UPLO = 'U', M <= N. When M = 0, DLANTR is set to zero. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0, and if */ /* UPLO = 'L', N <= M. When N = 0, DLANTR is set to zero. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The trapezoidal matrix A (A is triangular if M = N). */ /* If UPLO = 'U', the leading m by n upper trapezoidal part of */ /* the array A contains the upper trapezoidal matrix, and the */ /* strictly lower triangular part of A is not referenced. */ /* If UPLO = 'L', the leading m by n lower trapezoidal part of */ /* the array A contains the lower trapezoidal matrix, and the */ /* strictly upper triangular part of A is not referenced. Note */ /* that when DIAG = 'U', the diagonal elements of A are not */ /* referenced and are assumed to be one. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(M,1). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ /* where LWORK >= M when NORM = 'I'; otherwise, WORK is not */ /* referenced. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --work; /* Function Body */ if (min(*m,*n) == 0) { value = 0.; } else if (_starpu_lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ if (_starpu_lsame_(diag, "U")) { value = 1.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *m, i__4 = j - 1; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( d__1)); value = max(d__2,d__3); /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j + 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( d__1)); value = max(d__2,d__3); /* L30: */ } /* L40: */ } } } else { value = 0.; if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = min(*m,j); for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( d__1)); value = max(d__2,d__3); /* L50: */ } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( d__1)); value = max(d__2,d__3); /* L70: */ } /* L80: */ } } } } else if (_starpu_lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ value = 0.; udiag = _starpu_lsame_(diag, "U"); if (_starpu_lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (udiag && j <= *m) { sum = 1.; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L90: */ } } else { sum = 0.; i__2 = min(*m,j); for (i__ = 1; i__ <= i__2; ++i__) { sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L100: */ } } value = max(value,sum); /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (udiag) { sum = 1.; i__2 = *m; for (i__ = j + 1; i__ <= i__2; ++i__) { sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L120: */ } } else { sum = 0.; i__2 = *m; for (i__ = j; i__ <= i__2; ++i__) { sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L130: */ } } value = max(value,sum); /* L140: */ } } } else if (_starpu_lsame_(norm, "I")) { /* Find normI(A). */ if (_starpu_lsame_(uplo, "U")) { if (_starpu_lsame_(diag, "U")) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 1.; /* L150: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *m, i__4 = j - 1; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L160: */ } /* L170: */ } } else { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L180: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = min(*m,j); for (i__ = 1; i__ <= i__2; ++i__) { work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L190: */ } /* L200: */ } } } else { if (_starpu_lsame_(diag, "U")) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 1.; /* L210: */ } i__1 = *m; for (i__ = *n + 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L220: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j + 1; i__ <= i__2; ++i__) { work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L230: */ } /* L240: */ } } else { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L250: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j; i__ <= i__2; ++i__) { work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); /* L260: */ } /* L270: */ } } } value = 0.; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L280: */ } } else if (_starpu_lsame_(norm, "F") || _starpu_lsame_(norm, "E")) { /* Find normF(A). */ if (_starpu_lsame_(uplo, "U")) { if (_starpu_lsame_(diag, "U")) { scale = 1.; sum = (doublereal) min(*m,*n); i__1 = *n; for (j = 2; j <= i__1; ++j) { /* Computing MIN */ i__3 = *m, i__4 = j - 1; i__2 = min(i__3,i__4); _starpu_dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); /* L290: */ } } else { scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = min(*m,j); _starpu_dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); /* L300: */ } } } else { if (_starpu_lsame_(diag, "U")) { scale = 1.; sum = (doublereal) min(*m,*n); i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m - j; /* Computing MIN */ i__3 = *m, i__4 = j + 1; _starpu_dlassq_(&i__2, &a[min(i__3, i__4)+ j * a_dim1], &c__1, & scale, &sum); /* L310: */ } } else { scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m - j + 1; _starpu_dlassq_(&i__2, &a[j + j * a_dim1], &c__1, &scale, &sum); /* L320: */ } } } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANTR */ } /* _starpu_dlantr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlanv2.c000066400000000000000000000141411507764646700205630ustar00rootroot00000000000000/* dlanv2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b4 = 1.; /* Subroutine */ int _starpu_dlanv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r, doublereal *rt2i, doublereal *cs, doublereal *sn) { /* System generated locals */ doublereal d__1, d__2; /* Builtin functions */ double d_sign(doublereal *, doublereal *), sqrt(doublereal); /* Local variables */ doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp, scale, bcmax, bcmis, sigma; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric */ /* matrix in standard form: */ /* [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] */ /* [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] */ /* where either */ /* 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or */ /* 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex */ /* conjugate eigenvalues. */ /* Arguments */ /* ========= */ /* A (input/output) DOUBLE PRECISION */ /* B (input/output) DOUBLE PRECISION */ /* C (input/output) DOUBLE PRECISION */ /* D (input/output) DOUBLE PRECISION */ /* On entry, the elements of the input matrix. */ /* On exit, they are overwritten by the elements of the */ /* standardised Schur form. */ /* RT1R (output) DOUBLE PRECISION */ /* RT1I (output) DOUBLE PRECISION */ /* RT2R (output) DOUBLE PRECISION */ /* RT2I (output) DOUBLE PRECISION */ /* The real and imaginary parts of the eigenvalues. If the */ /* eigenvalues are a complex conjugate pair, RT1I > 0. */ /* CS (output) DOUBLE PRECISION */ /* SN (output) DOUBLE PRECISION */ /* Parameters of the rotation matrix. */ /* Further Details */ /* =============== */ /* Modified by V. Sima, Research Institute for Informatics, Bucharest, */ /* Romania, to reduce the risk of cancellation errors, */ /* when computing real eigenvalues, and to ensure, if possible, that */ /* abs(RT1R) >= abs(RT2R). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ eps = _starpu_dlamch_("P"); if (*c__ == 0.) { *cs = 1.; *sn = 0.; goto L10; } else if (*b == 0.) { /* Swap rows and columns */ *cs = 0.; *sn = 1.; temp = *d__; *d__ = *a; *a = temp; *b = -(*c__); *c__ = 0.; goto L10; } else if (*a - *d__ == 0. && d_sign(&c_b4, b) != d_sign(&c_b4, c__)) { *cs = 1.; *sn = 0.; goto L10; } else { temp = *a - *d__; p = temp * .5; /* Computing MAX */ d__1 = abs(*b), d__2 = abs(*c__); bcmax = max(d__1,d__2); /* Computing MIN */ d__1 = abs(*b), d__2 = abs(*c__); bcmis = min(d__1,d__2) * d_sign(&c_b4, b) * d_sign(&c_b4, c__); /* Computing MAX */ d__1 = abs(p); scale = max(d__1,bcmax); z__ = p / scale * p + bcmax / scale * bcmis; /* If Z is of the order of the machine accuracy, postpone the */ /* decision on the nature of eigenvalues */ if (z__ >= eps * 4.) { /* Real eigenvalues. Compute A and D. */ d__1 = sqrt(scale) * sqrt(z__); z__ = p + d_sign(&d__1, &p); *a = *d__ + z__; *d__ -= bcmax / z__ * bcmis; /* Compute B and the rotation matrix */ tau = _starpu_dlapy2_(c__, &z__); *cs = z__ / tau; *sn = *c__ / tau; *b -= *c__; *c__ = 0.; } else { /* Complex eigenvalues, or real (almost) equal eigenvalues. */ /* Make diagonal elements equal. */ sigma = *b + *c__; tau = _starpu_dlapy2_(&sigma, &temp); *cs = sqrt((abs(sigma) / tau + 1.) * .5); *sn = -(p / (tau * *cs)) * d_sign(&c_b4, &sigma); /* Compute [ AA BB ] = [ A B ] [ CS -SN ] */ /* [ CC DD ] [ C D ] [ SN CS ] */ aa = *a * *cs + *b * *sn; bb = -(*a) * *sn + *b * *cs; cc = *c__ * *cs + *d__ * *sn; dd = -(*c__) * *sn + *d__ * *cs; /* Compute [ A B ] = [ CS SN ] [ AA BB ] */ /* [ C D ] [-SN CS ] [ CC DD ] */ *a = aa * *cs + cc * *sn; *b = bb * *cs + dd * *sn; *c__ = -aa * *sn + cc * *cs; *d__ = -bb * *sn + dd * *cs; temp = (*a + *d__) * .5; *a = temp; *d__ = temp; if (*c__ != 0.) { if (*b != 0.) { if (d_sign(&c_b4, b) == d_sign(&c_b4, c__)) { /* Real eigenvalues: reduce to upper triangular form */ sab = sqrt((abs(*b))); sac = sqrt((abs(*c__))); d__1 = sab * sac; p = d_sign(&d__1, c__); tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1))); *a = temp + p; *d__ = temp - p; *b -= *c__; *c__ = 0.; cs1 = sab * tau; sn1 = sac * tau; temp = *cs * cs1 - *sn * sn1; *sn = *cs * sn1 + *sn * cs1; *cs = temp; } } else { *b = -(*c__); *c__ = 0.; temp = *cs; *cs = -(*sn); *sn = temp; } } } } L10: /* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */ *rt1r = *a; *rt2r = *d__; if (*c__ == 0.) { *rt1i = 0.; *rt2i = 0.; } else { *rt1i = sqrt((abs(*b))) * sqrt((abs(*c__))); *rt2i = -(*rt1i); } return 0; /* End of DLANV2 */ } /* _starpu_dlanv2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlapll.c000066400000000000000000000073131507764646700206500ustar00rootroot00000000000000/* dlapll.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlapll_(integer *n, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *ssmin) { /* System generated locals */ integer i__1; /* Local variables */ doublereal c__, a11, a12, a22, tau; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlas2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); doublereal ssmax; extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Given two column vectors X and Y, let */ /* A = ( X Y ). */ /* The subroutine first computes the QR factorization of A = Q*R, */ /* and then computes the SVD of the 2-by-2 upper triangular matrix R. */ /* The smaller singular value of R is returned in SSMIN, which is used */ /* as the measurement of the linear dependency of the vectors X and Y. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The length of the vectors X and Y. */ /* X (input/output) DOUBLE PRECISION array, */ /* dimension (1+(N-1)*INCX) */ /* On entry, X contains the N-vector X. */ /* On exit, X is overwritten. */ /* INCX (input) INTEGER */ /* The increment between successive elements of X. INCX > 0. */ /* Y (input/output) DOUBLE PRECISION array, */ /* dimension (1+(N-1)*INCY) */ /* On entry, Y contains the N-vector Y. */ /* On exit, Y is overwritten. */ /* INCY (input) INTEGER */ /* The increment between successive elements of Y. INCY > 0. */ /* SSMIN (output) DOUBLE PRECISION */ /* The smallest singular value of the N-by-2 matrix A = ( X Y ). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ --y; --x; /* Function Body */ if (*n <= 1) { *ssmin = 0.; return 0; } /* Compute the QR factorization of the N-by-2 matrix ( X Y ) */ _starpu_dlarfg_(n, &x[1], &x[*incx + 1], incx, &tau); a11 = x[1]; x[1] = 1.; c__ = -tau * _starpu_ddot_(n, &x[1], incx, &y[1], incy); _starpu_daxpy_(n, &c__, &x[1], incx, &y[1], incy); i__1 = *n - 1; _starpu_dlarfg_(&i__1, &y[*incy + 1], &y[(*incy << 1) + 1], incy, &tau); a12 = y[1]; a22 = y[*incy + 1]; /* Compute the SVD of 2-by-2 Upper triangular matrix. */ _starpu_dlas2_(&a11, &a12, &a22, ssmin, &ssmax); return 0; /* End of DLAPLL */ } /* _starpu_dlapll_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlapmt.c000066400000000000000000000072531507764646700206640ustar00rootroot00000000000000/* dlapmt.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlapmt_(logical *forwrd, integer *m, integer *n, doublereal *x, integer *ldx, integer *k) { /* System generated locals */ integer x_dim1, x_offset, i__1, i__2; /* Local variables */ integer i__, j, ii, in; doublereal temp; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAPMT rearranges the columns of the M by N matrix X as specified */ /* by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. */ /* If FORWRD = .TRUE., forward permutation: */ /* X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. */ /* If FORWRD = .FALSE., backward permutation: */ /* X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. */ /* Arguments */ /* ========= */ /* FORWRD (input) LOGICAL */ /* = .TRUE., forward permutation */ /* = .FALSE., backward permutation */ /* M (input) INTEGER */ /* The number of rows of the matrix X. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix X. N >= 0. */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,N) */ /* On entry, the M by N matrix X. */ /* On exit, X contains the permuted matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X, LDX >= MAX(1,M). */ /* K (input/output) INTEGER array, dimension (N) */ /* On entry, K contains the permutation vector. K is used as */ /* internal workspace, but reset to its original value on */ /* output. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --k; /* Function Body */ if (*n <= 1) { return 0; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { k[i__] = -k[i__]; /* L10: */ } if (*forwrd) { /* Forward permutation */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (k[i__] > 0) { goto L40; } j = i__; k[j] = -k[j]; in = k[j]; L20: if (k[in] > 0) { goto L40; } i__2 = *m; for (ii = 1; ii <= i__2; ++ii) { temp = x[ii + j * x_dim1]; x[ii + j * x_dim1] = x[ii + in * x_dim1]; x[ii + in * x_dim1] = temp; /* L30: */ } k[in] = -k[in]; j = in; in = k[in]; goto L20; L40: /* L50: */ ; } } else { /* Backward permutation */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (k[i__] > 0) { goto L80; } k[i__] = -k[i__]; j = k[i__]; L60: if (j == i__) { goto L80; } i__2 = *m; for (ii = 1; ii <= i__2; ++ii) { temp = x[ii + i__ * x_dim1]; x[ii + i__ * x_dim1] = x[ii + j * x_dim1]; x[ii + j * x_dim1] = temp; /* L70: */ } k[j] = -k[j]; j = k[j]; goto L60; L80: /* L90: */ ; } } return 0; /* End of DLAPMT */ } /* _starpu_dlapmt_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlapy2.c000066400000000000000000000033661507764646700205770ustar00rootroot00000000000000/* dlapy2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dlapy2_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val, d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal w, z__, xabs, yabs; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary */ /* overflow. */ /* Arguments */ /* ========= */ /* X (input) DOUBLE PRECISION */ /* Y (input) DOUBLE PRECISION */ /* X and Y specify the values x and y. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ xabs = abs(*x); yabs = abs(*y); w = max(xabs,yabs); z__ = min(xabs,yabs); if (z__ == 0.) { ret_val = w; } else { /* Computing 2nd power */ d__1 = z__ / w; ret_val = w * sqrt(d__1 * d__1 + 1.); } return ret_val; /* End of DLAPY2 */ } /* _starpu_dlapy2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlapy3.c000066400000000000000000000041671507764646700206000ustar00rootroot00000000000000/* dlapy3.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dlapy3_(doublereal *x, doublereal *y, doublereal *z__) { /* System generated locals */ doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal w, xabs, yabs, zabs; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause */ /* unnecessary overflow. */ /* Arguments */ /* ========= */ /* X (input) DOUBLE PRECISION */ /* Y (input) DOUBLE PRECISION */ /* Z (input) DOUBLE PRECISION */ /* X, Y and Z specify the values x, y and z. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ xabs = abs(*x); yabs = abs(*y); zabs = abs(*z__); /* Computing MAX */ d__1 = max(xabs,yabs); w = max(d__1,zabs); if (w == 0.) { /* W can be zero for max(0,nan,0) */ /* adding all three entries together will make sure */ /* NaN will not disappear. */ ret_val = xabs + yabs + zabs; } else { /* Computing 2nd power */ d__1 = xabs / w; /* Computing 2nd power */ d__2 = yabs / w; /* Computing 2nd power */ d__3 = zabs / w; ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3); } return ret_val; /* End of DLAPY3 */ } /* _starpu_dlapy3_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqgb.c000066400000000000000000000142371507764646700206350ustar00rootroot00000000000000/* dlaqgb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaqgb_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; /* Local variables */ integer i__, j; doublereal cj, large, small; extern doublereal _starpu_dlamch_(char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAQGB equilibrates a general M by N band matrix A with KL */ /* subdiagonals and KU superdiagonals using the row and scaling factors */ /* in the vectors R and C. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */ /* On exit, the equilibrated matrix, in the same storage format */ /* as A. See EQUED for the form of the equilibrated matrix. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDA >= KL+KU+1. */ /* R (input) DOUBLE PRECISION array, dimension (M) */ /* The row scale factors for A. */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. */ /* ROWCND (input) DOUBLE PRECISION */ /* Ratio of the smallest R(i) to the largest R(i). */ /* COLCND (input) DOUBLE PRECISION */ /* Ratio of the smallest C(i) to the largest C(i). */ /* AMAX (input) DOUBLE PRECISION */ /* Absolute value of largest matrix entry. */ /* EQUED (output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration */ /* = 'R': Row equilibration, i.e., A has been premultiplied by */ /* diag(R). */ /* = 'C': Column equilibration, i.e., A has been postmultiplied */ /* by diag(C). */ /* = 'B': Both row and column equilibration, i.e., A has been */ /* replaced by diag(R) * A * diag(C). */ /* Internal Parameters */ /* =================== */ /* THRESH is a threshold value used to decide if row or column scaling */ /* should be done based on the ratio of the row or column scaling */ /* factors. If ROWCND < THRESH, row scaling is done, and if */ /* COLCND < THRESH, column scaling is done. */ /* LARGE and SMALL are threshold values used to decide if row scaling */ /* should be done based on the absolute size of the largest matrix */ /* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --r__; --c__; /* Function Body */ if (*m <= 0 || *n <= 0) { *(unsigned char *)equed = 'N'; return 0; } /* Initialize LARGE and SMALL. */ small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); large = 1. / small; if (*rowcnd >= .1 && *amax >= small && *amax <= large) { /* No row scaling */ if (*colcnd >= .1) { /* No column scaling */ *(unsigned char *)equed = 'N'; } else { /* Column scaling */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = c__[j]; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = min(i__5,i__6); for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { ab[*ku + 1 + i__ - j + j * ab_dim1] = cj * ab[*ku + 1 + i__ - j + j * ab_dim1]; /* L10: */ } /* L20: */ } *(unsigned char *)equed = 'C'; } } else if (*colcnd >= .1) { /* Row scaling, no column scaling */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__4 = 1, i__2 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__3 = min(i__5,i__6); for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { ab[*ku + 1 + i__ - j + j * ab_dim1] = r__[i__] * ab[*ku + 1 + i__ - j + j * ab_dim1]; /* L30: */ } /* L40: */ } *(unsigned char *)equed = 'R'; } else { /* Row and column scaling */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = c__[j]; /* Computing MAX */ i__3 = 1, i__4 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__2 = min(i__5,i__6); for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) { ab[*ku + 1 + i__ - j + j * ab_dim1] = cj * r__[i__] * ab[*ku + 1 + i__ - j + j * ab_dim1]; /* L50: */ } /* L60: */ } *(unsigned char *)equed = 'B'; } return 0; /* End of DLAQGB */ } /* _starpu_dlaqgb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqge.c000066400000000000000000000121141507764646700206300ustar00rootroot00000000000000/* dlaqge.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaqge_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j; doublereal cj, large, small; extern doublereal _starpu_dlamch_(char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAQGE equilibrates a general M by N matrix A using the row and */ /* column scaling factors in the vectors R and C. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M by N matrix A. */ /* On exit, the equilibrated matrix. See EQUED for the form of */ /* the equilibrated matrix. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(M,1). */ /* R (input) DOUBLE PRECISION array, dimension (M) */ /* The row scale factors for A. */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. */ /* ROWCND (input) DOUBLE PRECISION */ /* Ratio of the smallest R(i) to the largest R(i). */ /* COLCND (input) DOUBLE PRECISION */ /* Ratio of the smallest C(i) to the largest C(i). */ /* AMAX (input) DOUBLE PRECISION */ /* Absolute value of largest matrix entry. */ /* EQUED (output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration */ /* = 'R': Row equilibration, i.e., A has been premultiplied by */ /* diag(R). */ /* = 'C': Column equilibration, i.e., A has been postmultiplied */ /* by diag(C). */ /* = 'B': Both row and column equilibration, i.e., A has been */ /* replaced by diag(R) * A * diag(C). */ /* Internal Parameters */ /* =================== */ /* THRESH is a threshold value used to decide if row or column scaling */ /* should be done based on the ratio of the row or column scaling */ /* factors. If ROWCND < THRESH, row scaling is done, and if */ /* COLCND < THRESH, column scaling is done. */ /* LARGE and SMALL are threshold values used to decide if row scaling */ /* should be done based on the absolute size of the largest matrix */ /* element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --r__; --c__; /* Function Body */ if (*m <= 0 || *n <= 0) { *(unsigned char *)equed = 'N'; return 0; } /* Initialize LARGE and SMALL. */ small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); large = 1. / small; if (*rowcnd >= .1 && *amax >= small && *amax <= large) { /* No row scaling */ if (*colcnd >= .1) { /* No column scaling */ *(unsigned char *)equed = 'N'; } else { /* Column scaling */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = c__[j]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = cj * a[i__ + j * a_dim1]; /* L10: */ } /* L20: */ } *(unsigned char *)equed = 'C'; } } else if (*colcnd >= .1) { /* Row scaling, no column scaling */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = r__[i__] * a[i__ + j * a_dim1]; /* L30: */ } /* L40: */ } *(unsigned char *)equed = 'R'; } else { /* Row and column scaling */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = c__[j]; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = cj * r__[i__] * a[i__ + j * a_dim1]; /* L50: */ } /* L60: */ } *(unsigned char *)equed = 'B'; } return 0; /* End of DLAQGE */ } /* _starpu_dlaqge_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqp2.c000066400000000000000000000156621507764646700205710ustar00rootroot00000000000000/* dlaqp2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dlaqp2_(integer *m, integer *n, integer *offset, doublereal *a, integer *lda, integer *jpvt, doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *work) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, mn; doublereal aii; integer pvt; doublereal temp; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); doublereal temp2, tol3z; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); integer offpi, itemp; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAQP2 computes a QR factorization with column pivoting of */ /* the block A(OFFSET+1:M,1:N). */ /* The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* OFFSET (input) INTEGER */ /* The number of rows of the matrix A that must be pivoted */ /* but no factorized. OFFSET >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the upper triangle of block A(OFFSET+1:M,1:N) is */ /* the triangular factor obtained; the elements in block */ /* A(OFFSET+1:M,1:N) below the diagonal, together with the */ /* array TAU, represent the orthogonal matrix Q as a product of */ /* elementary reflectors. Block A(1:OFFSET,1:N) has been */ /* accordingly pivoted, but no factorized. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* JPVT (input/output) INTEGER array, dimension (N) */ /* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */ /* to the front of A*P (a leading column); if JPVT(i) = 0, */ /* the i-th column of A is a free column. */ /* On exit, if JPVT(i) = k, then the i-th column of A*P */ /* was the k-th column of A. */ /* TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) */ /* The scalar factors of the elementary reflectors. */ /* VN1 (input/output) DOUBLE PRECISION array, dimension (N) */ /* The vector with the partial column norms. */ /* VN2 (input/output) DOUBLE PRECISION array, dimension (N) */ /* The vector with the exact column norms. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ /* X. Sun, Computer Science Dept., Duke University, USA */ /* Partial column norm updating strategy modified by */ /* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */ /* University of Zagreb, Croatia. */ /* June 2006. */ /* For more details see LAPACK Working Note 176. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --jpvt; --tau; --vn1; --vn2; --work; /* Function Body */ /* Computing MIN */ i__1 = *m - *offset; mn = min(i__1,*n); tol3z = sqrt(_starpu_dlamch_("Epsilon")); /* Compute factorization. */ i__1 = mn; for (i__ = 1; i__ <= i__1; ++i__) { offpi = *offset + i__; /* Determine ith pivot column and swap if necessary. */ i__2 = *n - i__ + 1; pvt = i__ - 1 + _starpu_idamax_(&i__2, &vn1[i__], &c__1); if (pvt != i__) { _starpu_dswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], & c__1); itemp = jpvt[pvt]; jpvt[pvt] = jpvt[i__]; jpvt[i__] = itemp; vn1[pvt] = vn1[i__]; vn2[pvt] = vn2[i__]; } /* Generate elementary reflector H(i). */ if (offpi < *m) { i__2 = *m - offpi + 1; _starpu_dlarfp_(&i__2, &a[offpi + i__ * a_dim1], &a[offpi + 1 + i__ * a_dim1], &c__1, &tau[i__]); } else { _starpu_dlarfp_(&c__1, &a[*m + i__ * a_dim1], &a[*m + i__ * a_dim1], & c__1, &tau[i__]); } if (i__ <= *n) { /* Apply H(i)' to A(offset+i:m,i+1:n) from the left. */ aii = a[offpi + i__ * a_dim1]; a[offpi + i__ * a_dim1] = 1.; i__2 = *m - offpi + 1; i__3 = *n - i__; _starpu_dlarf_("Left", &i__2, &i__3, &a[offpi + i__ * a_dim1], &c__1, & tau[i__], &a[offpi + (i__ + 1) * a_dim1], lda, &work[1]); a[offpi + i__ * a_dim1] = aii; } /* Update partial column norms. */ i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { if (vn1[j] != 0.) { /* NOTE: The following 4 lines follow from the analysis in */ /* Lapack Working Note 176. */ /* Computing 2nd power */ d__2 = (d__1 = a[offpi + j * a_dim1], abs(d__1)) / vn1[j]; temp = 1. - d__2 * d__2; temp = max(temp,0.); /* Computing 2nd power */ d__1 = vn1[j] / vn2[j]; temp2 = temp * (d__1 * d__1); if (temp2 <= tol3z) { if (offpi < *m) { i__3 = *m - offpi; vn1[j] = _starpu_dnrm2_(&i__3, &a[offpi + 1 + j * a_dim1], & c__1); vn2[j] = vn1[j]; } else { vn1[j] = 0.; vn2[j] = 0.; } } else { vn1[j] *= sqrt(temp); } } /* L10: */ } /* L20: */ } return 0; /* End of DLAQP2 */ } /* _starpu_dlaqp2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqps.c000066400000000000000000000241111507764646700206570ustar00rootroot00000000000000/* dlaqps.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = -1.; static doublereal c_b9 = 1.; static doublereal c_b16 = 0.; /* Subroutine */ int _starpu_dlaqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, doublereal *a, integer *lda, integer *jpvt, doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *auxv, doublereal *f, integer *ldf) { /* System generated locals */ integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); integer i_dnnt(doublereal *); /* Local variables */ integer j, k, rk; doublereal akk; integer pvt; doublereal temp; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); doublereal temp2, tol3z; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer itemp; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *); integer lsticc, lastrk; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAQPS computes a step of QR factorization with column pivoting */ /* of a real M-by-N matrix A by using Blas-3. It tries to factorize */ /* NB columns from A starting from the row OFFSET+1, and updates all */ /* of the matrix with Blas-3 xGEMM. */ /* In some cases, due to catastrophic cancellations, it cannot */ /* factorize NB columns. Hence, the actual number of factorized */ /* columns is returned in KB. */ /* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0 */ /* OFFSET (input) INTEGER */ /* The number of rows of A that have been factorized in */ /* previous steps. */ /* NB (input) INTEGER */ /* The number of columns to factorize. */ /* KB (output) INTEGER */ /* The number of columns actually factorized. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, block A(OFFSET+1:M,1:KB) is the triangular */ /* factor obtained and block A(1:OFFSET,1:N) has been */ /* accordingly pivoted, but no factorized. */ /* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has */ /* been updated. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* JPVT (input/output) INTEGER array, dimension (N) */ /* JPVT(I) = K <==> Column K of the full matrix A has been */ /* permuted into position I in AP. */ /* TAU (output) DOUBLE PRECISION array, dimension (KB) */ /* The scalar factors of the elementary reflectors. */ /* VN1 (input/output) DOUBLE PRECISION array, dimension (N) */ /* The vector with the partial column norms. */ /* VN2 (input/output) DOUBLE PRECISION array, dimension (N) */ /* The vector with the exact column norms. */ /* AUXV (input/output) DOUBLE PRECISION array, dimension (NB) */ /* Auxiliar vector. */ /* F (input/output) DOUBLE PRECISION array, dimension (LDF,NB) */ /* Matrix F' = L*Y'*A. */ /* LDF (input) INTEGER */ /* The leading dimension of the array F. LDF >= max(1,N). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ /* X. Sun, Computer Science Dept., Duke University, USA */ /* Partial column norm updating strategy modified by */ /* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */ /* University of Zagreb, Croatia. */ /* June 2006. */ /* For more details see LAPACK Working Note 176. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --jpvt; --tau; --vn1; --vn2; --auxv; f_dim1 = *ldf; f_offset = 1 + f_dim1; f -= f_offset; /* Function Body */ /* Computing MIN */ i__1 = *m, i__2 = *n + *offset; lastrk = min(i__1,i__2); lsticc = 0; k = 0; tol3z = sqrt(_starpu_dlamch_("Epsilon")); /* Beginning of while loop. */ L10: if (k < *nb && lsticc == 0) { ++k; rk = *offset + k; /* Determine ith pivot column and swap if necessary */ i__1 = *n - k + 1; pvt = k - 1 + _starpu_idamax_(&i__1, &vn1[k], &c__1); if (pvt != k) { _starpu_dswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; _starpu_dswap_(&i__1, &f[pvt + f_dim1], ldf, &f[k + f_dim1], ldf); itemp = jpvt[pvt]; jpvt[pvt] = jpvt[k]; jpvt[k] = itemp; vn1[pvt] = vn1[k]; vn2[pvt] = vn2[k]; } /* Apply previous Householder reflectors to column K: */ /* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */ if (k > 1) { i__1 = *m - rk + 1; i__2 = k - 1; _starpu_dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[rk + a_dim1], lda, &f[k + f_dim1], ldf, &c_b9, &a[rk + k * a_dim1], &c__1); } /* Generate elementary reflector H(k). */ if (rk < *m) { i__1 = *m - rk + 1; _starpu_dlarfp_(&i__1, &a[rk + k * a_dim1], &a[rk + 1 + k * a_dim1], & c__1, &tau[k]); } else { _starpu_dlarfp_(&c__1, &a[rk + k * a_dim1], &a[rk + k * a_dim1], &c__1, & tau[k]); } akk = a[rk + k * a_dim1]; a[rk + k * a_dim1] = 1.; /* Compute Kth column of F: */ /* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */ if (k < *n) { i__1 = *m - rk + 1; i__2 = *n - k; _starpu_dgemv_("Transpose", &i__1, &i__2, &tau[k], &a[rk + (k + 1) * a_dim1], lda, &a[rk + k * a_dim1], &c__1, &c_b16, &f[k + 1 + k * f_dim1], &c__1); } /* Padding F(1:K,K) with zeros. */ i__1 = k; for (j = 1; j <= i__1; ++j) { f[j + k * f_dim1] = 0.; /* L20: */ } /* Incremental updating of F: */ /* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' */ /* *A(RK:M,K). */ if (k > 1) { i__1 = *m - rk + 1; i__2 = k - 1; d__1 = -tau[k]; _starpu_dgemv_("Transpose", &i__1, &i__2, &d__1, &a[rk + a_dim1], lda, &a[ rk + k * a_dim1], &c__1, &c_b16, &auxv[1], &c__1); i__1 = k - 1; _starpu_dgemv_("No transpose", n, &i__1, &c_b9, &f[f_dim1 + 1], ldf, & auxv[1], &c__1, &c_b9, &f[k * f_dim1 + 1], &c__1); } /* Update the current row of A: */ /* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */ if (k < *n) { i__1 = *n - k; _starpu_dgemv_("No transpose", &i__1, &k, &c_b8, &f[k + 1 + f_dim1], ldf, &a[rk + a_dim1], lda, &c_b9, &a[rk + (k + 1) * a_dim1], lda); } /* Update partial column norms. */ if (rk < lastrk) { i__1 = *n; for (j = k + 1; j <= i__1; ++j) { if (vn1[j] != 0.) { /* NOTE: The following 4 lines follow from the analysis in */ /* Lapack Working Note 176. */ temp = (d__1 = a[rk + j * a_dim1], abs(d__1)) / vn1[j]; /* Computing MAX */ d__1 = 0., d__2 = (temp + 1.) * (1. - temp); temp = max(d__1,d__2); /* Computing 2nd power */ d__1 = vn1[j] / vn2[j]; temp2 = temp * (d__1 * d__1); if (temp2 <= tol3z) { vn2[j] = (doublereal) lsticc; lsticc = j; } else { vn1[j] *= sqrt(temp); } } /* L30: */ } } a[rk + k * a_dim1] = akk; /* End of while loop. */ goto L10; } *kb = k; rk = *offset + *kb; /* Apply the block reflector to the rest of the matrix: */ /* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - */ /* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. */ /* Computing MIN */ i__1 = *n, i__2 = *m - *offset; if (*kb < min(i__1,i__2)) { i__1 = *m - rk; i__2 = *n - *kb; _starpu_dgemm_("No transpose", "Transpose", &i__1, &i__2, kb, &c_b8, &a[rk + 1 + a_dim1], lda, &f[*kb + 1 + f_dim1], ldf, &c_b9, &a[rk + 1 + (*kb + 1) * a_dim1], lda); } /* Recomputation of difficult columns. */ L40: if (lsticc > 0) { itemp = i_dnnt(&vn2[lsticc]); i__1 = *m - rk; vn1[lsticc] = _starpu_dnrm2_(&i__1, &a[rk + 1 + lsticc * a_dim1], &c__1); /* NOTE: The computation of VN1( LSTICC ) relies on the fact that */ /* SNRM2 does not fail on vectors with norm below the value of */ /* SQRT(DLAMCH('S')) */ vn2[lsticc] = vn1[lsticc]; lsticc = itemp; goto L40; } return 0; /* End of DLAQPS */ } /* _starpu_dlaqps_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqr0.c000066400000000000000000000635351507764646700205730ustar00rootroot00000000000000/* dlaqr0.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__13 = 13; static integer c__15 = 15; static integer c_n1 = -1; static integer c__12 = 12; static integer c__14 = 14; static integer c__16 = 16; static logical c_false = FALSE_; static integer c__1 = 1; static integer c__3 = 3; /* Subroutine */ int _starpu_dlaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3, d__4; /* Local variables */ integer i__, k; doublereal aa, bb, cc, dd; integer ld; doublereal cs; integer nh, it, ks, kt; doublereal sn; integer ku, kv, ls, ns; doublereal ss; integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, nmin; doublereal swap; integer ktop; doublereal zdum[1] /* was [1][1] */; integer kacc22, itmax, nsmax, nwmax, kwtop; extern /* Subroutine */ int _starpu_dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaqr3_( logical *, logical *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaqr4_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlaqr5_(logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *); integer nibble; extern /* Subroutine */ int _starpu_dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); char jbcmpz[1]; integer nwupbd; logical sorted; integer lwkopt; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAQR0 computes the eigenvalues of a Hessenberg matrix H */ /* and, optionally, the matrices T and Z from the Schur decomposition */ /* H = Z T Z**T, where T is an upper quasi-triangular matrix (the */ /* Schur form), and Z is the orthogonal matrix of Schur vectors. */ /* Optionally Z may be postmultiplied into an input orthogonal */ /* matrix Q so that this routine can give the Schur factorization */ /* of a matrix A which has been reduced to the Hessenberg form H */ /* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. */ /* Arguments */ /* ========= */ /* WANTT (input) LOGICAL */ /* = .TRUE. : the full Schur form T is required; */ /* = .FALSE.: only eigenvalues are required. */ /* WANTZ (input) LOGICAL */ /* = .TRUE. : the matrix of Schur vectors Z is required; */ /* = .FALSE.: Schur vectors are not required. */ /* N (input) INTEGER */ /* The order of the matrix H. N .GE. 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* It is assumed that H is already upper triangular in rows */ /* and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, */ /* H(ILO,ILO-1) is zero. ILO and IHI are normally set by a */ /* previous call to DGEBAL, and then passed to DGEHRD when the */ /* matrix output by DGEBAL is reduced to Hessenberg form. */ /* Otherwise, ILO and IHI should be set to 1 and N, */ /* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. */ /* If N = 0, then ILO = 1 and IHI = 0. */ /* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ /* On entry, the upper Hessenberg matrix H. */ /* On exit, if INFO = 0 and WANTT is .TRUE., then H contains */ /* the upper quasi-triangular matrix T from the Schur */ /* decomposition (the Schur form); 2-by-2 diagonal blocks */ /* (corresponding to complex conjugate pairs of eigenvalues) */ /* are returned in standard form, with H(i,i) = H(i+1,i+1) */ /* and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is */ /* .FALSE., then the contents of H are unspecified on exit. */ /* (The output value of H when INFO.GT.0 is given under the */ /* description of INFO below.) */ /* This subroutine may explicitly set H(i,j) = 0 for i.GT.j and */ /* j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. */ /* LDH (input) INTEGER */ /* The leading dimension of the array H. LDH .GE. max(1,N). */ /* WR (output) DOUBLE PRECISION array, dimension (IHI) */ /* WI (output) DOUBLE PRECISION array, dimension (IHI) */ /* The real and imaginary parts, respectively, of the computed */ /* eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) */ /* and WI(ILO:IHI). If two eigenvalues are computed as a */ /* complex conjugate pair, they are stored in consecutive */ /* elements of WR and WI, say the i-th and (i+1)th, with */ /* WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then */ /* the eigenvalues are stored in the same order as on the */ /* diagonal of the Schur form returned in H, with */ /* WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal */ /* block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */ /* WI(i+1) = -WI(i). */ /* ILOZ (input) INTEGER */ /* IHIZ (input) INTEGER */ /* Specify the rows of Z to which transformations must be */ /* applied if WANTZ is .TRUE.. */ /* 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI) */ /* If WANTZ is .FALSE., then Z is not referenced. */ /* If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is */ /* replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the */ /* orthogonal Schur factor of H(ILO:IHI,ILO:IHI). */ /* (The output value of Z when INFO.GT.0 is given under */ /* the description of INFO below.) */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. if WANTZ is .TRUE. */ /* then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension LWORK */ /* On exit, if LWORK = -1, WORK(1) returns an estimate of */ /* the optimal value for LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK .GE. max(1,N) */ /* is sufficient, but LWORK typically as large as 6*N may */ /* be required for optimal performance. A workspace query */ /* to determine the optimal workspace size is recommended. */ /* If LWORK = -1, then DLAQR0 does a workspace query. */ /* In this case, DLAQR0 checks the input parameters and */ /* estimates the optimal workspace size for the given */ /* values of N, ILO and IHI. The estimate is returned */ /* in WORK(1). No error message related to LWORK is */ /* issued by XERBLA. Neither H nor Z are accessed. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* .GT. 0: if INFO = i, DLAQR0 failed to compute all of */ /* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR */ /* and WI contain those eigenvalues which have been */ /* successfully computed. (Failures are rare.) */ /* If INFO .GT. 0 and WANT is .FALSE., then on exit, */ /* the remaining unconverged eigenvalues are the eigen- */ /* values of the upper Hessenberg matrix rows and */ /* columns ILO through INFO of the final, output */ /* value of H. */ /* If INFO .GT. 0 and WANTT is .TRUE., then on exit */ /* (*) (initial value of H)*U = U*(final value of H) */ /* where U is an orthogonal matrix. The final */ /* value of H is upper Hessenberg and quasi-triangular */ /* in rows and columns INFO+1 through IHI. */ /* If INFO .GT. 0 and WANTZ is .TRUE., then on exit */ /* (final value of Z(ILO:IHI,ILOZ:IHIZ) */ /* = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U */ /* where U is the orthogonal matrix in (*) (regard- */ /* less of the value of WANTT.) */ /* If INFO .GT. 0 and WANTZ is .FALSE., then Z is not */ /* accessed. */ /* ================================================================ */ /* Based on contributions by */ /* Karen Braman and Ralph Byers, Department of Mathematics, */ /* University of Kansas, USA */ /* ================================================================ */ /* References: */ /* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ /* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */ /* Performance, SIAM Journal of Matrix Analysis, volume 23, pages */ /* 929--947, 2002. */ /* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ /* Algorithm Part II: Aggressive Early Deflation, SIAM Journal */ /* of Matrix Analysis, volume 23, pages 948--973, 2002. */ /* ================================================================ */ /* .. Parameters .. */ /* ==== Matrices of order NTINY or smaller must be processed by */ /* . DLAHQR because of insufficient subdiagonal scratch space. */ /* . (This is a hard limit.) ==== */ /* ==== Exceptional deflation windows: try to cure rare */ /* . slow convergence by varying the size of the */ /* . deflation window after KEXNW iterations. ==== */ /* ==== Exceptional shifts: try to cure rare slow convergence */ /* . with ad-hoc exceptional shifts every KEXSH iterations. */ /* . ==== */ /* ==== The constants WILK1 and WILK2 are used to form the */ /* . exceptional shifts. ==== */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --wr; --wi; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ *info = 0; /* ==== Quick return for N = 0: nothing to do. ==== */ if (*n == 0) { work[1] = 1.; return 0; } if (*n <= 11) { /* ==== Tiny matrices must use DLAHQR. ==== */ lwkopt = 1; if (*lwork != -1) { _starpu_dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], & wi[1], iloz, ihiz, &z__[z_offset], ldz, info); } } else { /* ==== Use small bulge multi-shift QR with aggressive early */ /* . deflation on larger-than-tiny matrices. ==== */ /* ==== Hope for the best. ==== */ *info = 0; /* ==== Set up job flags for ILAENV. ==== */ if (*wantt) { *(unsigned char *)jbcmpz = 'S'; } else { *(unsigned char *)jbcmpz = 'E'; } if (*wantz) { *(unsigned char *)&jbcmpz[1] = 'V'; } else { *(unsigned char *)&jbcmpz[1] = 'N'; } /* ==== NWR = recommended deflation window size. At this */ /* . point, N .GT. NTINY = 11, so there is enough */ /* . subdiagonal workspace for NWR.GE.2 as required. */ /* . (In fact, there is enough subdiagonal space for */ /* . NWR.GE.3.) ==== */ nwr = _starpu_ilaenv_(&c__13, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); nwr = max(2,nwr); /* Computing MIN */ i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); nwr = min(i__1,nwr); /* ==== NSR = recommended number of simultaneous shifts. */ /* . At this point N .GT. NTINY = 11, so there is at */ /* . enough subdiagonal workspace for NSR to be even */ /* . and greater than or equal to two as required. ==== */ nsr = _starpu_ilaenv_(&c__15, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); /* Computing MIN */ i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - *ilo; nsr = min(i__1,i__2); /* Computing MAX */ i__1 = 2, i__2 = nsr - nsr % 2; nsr = max(i__1,i__2); /* ==== Estimate optimal workspace ==== */ /* ==== Workspace query call to DLAQR3 ==== */ i__1 = nwr + 1; _starpu_dlaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[ h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1], &c_n1); /* ==== Optimal workspace = MAX(DLAQR5, DLAQR3) ==== */ /* Computing MAX */ i__1 = nsr * 3 / 2, i__2 = (integer) work[1]; lwkopt = max(i__1,i__2); /* ==== Quick return in case of workspace query. ==== */ if (*lwork == -1) { work[1] = (doublereal) lwkopt; return 0; } /* ==== DLAHQR/DLAQR0 crossover point ==== */ nmin = _starpu_ilaenv_(&c__12, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); nmin = max(11,nmin); /* ==== Nibble crossover point ==== */ nibble = _starpu_ilaenv_(&c__14, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); nibble = max(0,nibble); /* ==== Accumulate reflections during ttswp? Use block */ /* . 2-by-2 structure during matrix-matrix multiply? ==== */ kacc22 = _starpu_ilaenv_(&c__16, "DLAQR0", jbcmpz, n, ilo, ihi, lwork); kacc22 = max(0,kacc22); kacc22 = min(2,kacc22); /* ==== NWMAX = the largest possible deflation window for */ /* . which there is sufficient workspace. ==== */ /* Computing MIN */ i__1 = (*n - 1) / 3, i__2 = *lwork / 2; nwmax = min(i__1,i__2); nw = nwmax; /* ==== NSMAX = the Largest number of simultaneous shifts */ /* . for which there is sufficient workspace. ==== */ /* Computing MIN */ i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; nsmax = min(i__1,i__2); nsmax -= nsmax % 2; /* ==== NDFL: an iteration count restarted at deflation. ==== */ ndfl = 1; /* ==== ITMAX = iteration limit ==== */ /* Computing MAX */ i__1 = 10, i__2 = *ihi - *ilo + 1; itmax = max(i__1,i__2) * 30; /* ==== Last row and column in the active block ==== */ kbot = *ihi; /* ==== Main Loop ==== */ i__1 = itmax; for (it = 1; it <= i__1; ++it) { /* ==== Done when KBOT falls below ILO ==== */ if (kbot < *ilo) { goto L90; } /* ==== Locate active block ==== */ i__2 = *ilo + 1; for (k = kbot; k >= i__2; --k) { if (h__[k + (k - 1) * h_dim1] == 0.) { goto L20; } /* L10: */ } k = *ilo; L20: ktop = k; /* ==== Select deflation window size: */ /* . Typical Case: */ /* . If possible and advisable, nibble the entire */ /* . active block. If not, use size MIN(NWR,NWMAX) */ /* . or MIN(NWR+1,NWMAX) depending upon which has */ /* . the smaller corresponding subdiagonal entry */ /* . (a heuristic). */ /* . */ /* . Exceptional Case: */ /* . If there have been no deflations in KEXNW or */ /* . more iterations, then vary the deflation window */ /* . size. At first, because, larger windows are, */ /* . in general, more powerful than smaller ones, */ /* . rapidly increase the window to the maximum possible. */ /* . Then, gradually reduce the window size. ==== */ nh = kbot - ktop + 1; nwupbd = min(nh,nwmax); if (ndfl < 5) { nw = min(nwupbd,nwr); } else { /* Computing MIN */ i__2 = nwupbd, i__3 = nw << 1; nw = min(i__2,i__3); } if (nw < nwmax) { if (nw >= nh - 1) { nw = nh; } else { kwtop = kbot - nw + 1; if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) > (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], abs(d__2))) { ++nw; } } } if (ndfl < 5) { ndec = -1; } else if (ndec >= 0 || nw >= nwupbd) { ++ndec; if (nw - ndec < 2) { ndec = 0; } nw -= ndec; } /* ==== Aggressive early deflation: */ /* . split workspace under the subdiagonal into */ /* . - an nw-by-nw work array V in the lower */ /* . left-hand-corner, */ /* . - an NW-by-at-least-NW-but-more-is-better */ /* . (NW-by-NHO) horizontal work array along */ /* . the bottom edge, */ /* . - an at-least-NW-but-more-is-better (NHV-by-NW) */ /* . vertical work array along the left-hand-edge. */ /* . ==== */ kv = *n - nw + 1; kt = nw + 1; nho = *n - nw - 1 - kt + 1; kwv = nw + 2; nve = *n - nw - kwv + 1; /* ==== Aggressive early deflation ==== */ _starpu_dlaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); /* ==== Adjust KBOT accounting for new deflations. ==== */ kbot -= ld; /* ==== KS points to the shifts. ==== */ ks = kbot - ls + 1; /* ==== Skip an expensive QR sweep if there is a (partly */ /* . heuristic) reason to expect that many eigenvalues */ /* . will deflate without it. Here, the QR sweep is */ /* . skipped if many eigenvalues have just been deflated */ /* . or if the remaining active block is small. */ if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( nmin,nwmax)) { /* ==== NS = nominal number of simultaneous shifts. */ /* . This may be lowered (slightly) if DLAQR3 */ /* . did not provide that many shifts. ==== */ /* Computing MIN */ /* Computing MAX */ i__4 = 2, i__5 = kbot - ktop; i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); ns = min(i__2,i__3); ns -= ns % 2; /* ==== If there have been no deflations */ /* . in a multiple of KEXSH iterations, */ /* . then try exceptional shifts. */ /* . Otherwise use shifts provided by */ /* . DLAQR3 above or from the eigenvalues */ /* . of a trailing principal submatrix. ==== */ if (ndfl % 6 == 0) { ks = kbot - ns + 1; /* Computing MAX */ i__3 = ks + 1, i__4 = ktop + 2; i__2 = max(i__3,i__4); for (i__ = kbot; i__ >= i__2; i__ += -2) { ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); aa = ss * .75 + h__[i__ + i__ * h_dim1]; bb = ss; cc = ss * -.4375; dd = aa; _starpu_dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1] , &wr[i__], &wi[i__], &cs, &sn); /* L30: */ } if (ks == ktop) { wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; wi[ks + 1] = 0.; wr[ks] = wr[ks + 1]; wi[ks] = wi[ks + 1]; } } else { /* ==== Got NS/2 or fewer shifts? Use DLAQR4 or */ /* . DLAHQR on a trailing principal submatrix to */ /* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, */ /* . there is enough space below the subdiagonal */ /* . to fit an NS-by-NS scratch array.) ==== */ if (kbot - ks + 1 <= ns / 2) { ks = kbot - ns + 1; kt = *n - ns + 1; _starpu_dlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & h__[kt + h_dim1], ldh); if (ns > nmin) { _starpu_dlaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ kt + h_dim1], ldh, &wr[ks], &wi[ks], & c__1, &c__1, zdum, &c__1, &work[1], lwork, &inf); } else { _starpu_dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[ kt + h_dim1], ldh, &wr[ks], &wi[ks], & c__1, &c__1, zdum, &c__1, &inf); } ks += inf; /* ==== In case of a rare QR failure use */ /* . eigenvalues of the trailing 2-by-2 */ /* . principal submatrix. ==== */ if (ks >= kbot) { aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; cc = h__[kbot + (kbot - 1) * h_dim1]; bb = h__[kbot - 1 + kbot * h_dim1]; dd = h__[kbot + kbot * h_dim1]; _starpu_dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[ kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn) ; ks = kbot - 1; } } if (kbot - ks + 1 > ns) { /* ==== Sort the shifts (Helps a little) */ /* . Bubble sort keeps complex conjugate */ /* . pairs together. ==== */ sorted = FALSE_; i__2 = ks + 1; for (k = kbot; k >= i__2; --k) { if (sorted) { goto L60; } sorted = TRUE_; i__3 = k - 1; for (i__ = ks; i__ <= i__3; ++i__) { if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[ i__], abs(d__2)) < (d__3 = wr[i__ + 1] , abs(d__3)) + (d__4 = wi[i__ + 1], abs(d__4))) { sorted = FALSE_; swap = wr[i__]; wr[i__] = wr[i__ + 1]; wr[i__ + 1] = swap; swap = wi[i__]; wi[i__] = wi[i__ + 1]; wi[i__ + 1] = swap; } /* L40: */ } /* L50: */ } L60: ; } /* ==== Shuffle shifts into pairs of real shifts */ /* . and pairs of complex conjugate shifts */ /* . assuming complex conjugate shifts are */ /* . already adjacent to one another. (Yes, */ /* . they are.) ==== */ i__2 = ks + 2; for (i__ = kbot; i__ >= i__2; i__ += -2) { if (wi[i__] != -wi[i__ - 1]) { swap = wr[i__]; wr[i__] = wr[i__ - 1]; wr[i__ - 1] = wr[i__ - 2]; wr[i__ - 2] = swap; swap = wi[i__]; wi[i__] = wi[i__ - 1]; wi[i__ - 1] = wi[i__ - 2]; wi[i__ - 2] = swap; } /* L70: */ } } /* ==== If there are only two shifts and both are */ /* . real, then use only one. ==== */ if (kbot - ks + 1 == 2) { if (wi[kbot] == 0.) { if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs( d__1)) < (d__2 = wr[kbot - 1] - h__[kbot + kbot * h_dim1], abs(d__2))) { wr[kbot - 1] = wr[kbot]; } else { wr[kbot] = wr[kbot - 1]; } } } /* ==== Use up to NS of the the smallest magnatiude */ /* . shifts. If there aren't NS shifts available, */ /* . then use them all, possibly dropping one to */ /* . make the number of shifts even. ==== */ /* Computing MIN */ i__2 = ns, i__3 = kbot - ks + 1; ns = min(i__2,i__3); ns -= ns % 2; ks = kbot - ns + 1; /* ==== Small-bulge multi-shift QR sweep: */ /* . split workspace under the subdiagonal into */ /* . - a KDU-by-KDU work array U in the lower */ /* . left-hand-corner, */ /* . - a KDU-by-at-least-KDU-but-more-is-better */ /* . (KDU-by-NHo) horizontal work array WH along */ /* . the bottom edge, */ /* . - and an at-least-KDU-but-more-is-better-by-KDU */ /* . (NVE-by-KDU) vertical work WV arrow along */ /* . the left-hand-edge. ==== */ kdu = ns * 3 - 3; ku = *n - kdu + 1; kwh = kdu + 1; nho = *n - kdu - 3 - (kdu + 1) + 1; kwv = kdu + 4; nve = *n - kdu - kwv + 1; /* ==== Small-bulge multi-shift QR sweep ==== */ _starpu_dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], &wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[ z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1], ldh); } /* ==== Note progress (or the lack of it). ==== */ if (ld > 0) { ndfl = 1; } else { ++ndfl; } /* ==== End of main loop ==== */ /* L80: */ } /* ==== Iteration limit exceeded. Set INFO to show where */ /* . the problem occurred and exit. ==== */ *info = kbot; L90: ; } /* ==== Return the optimal value of LWORK. ==== */ work[1] = (doublereal) lwkopt; /* ==== End of DLAQR0 ==== */ return 0; } /* _starpu_dlaqr0_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqr1.c000066400000000000000000000076741507764646700205760ustar00rootroot00000000000000/* dlaqr1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaqr1_(integer *n, doublereal *h__, integer *ldh, doublereal *sr1, doublereal *si1, doublereal *sr2, doublereal *si2, doublereal *v) { /* System generated locals */ integer h_dim1, h_offset; doublereal d__1, d__2, d__3; /* Local variables */ doublereal s, h21s, h31s; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Given a 2-by-2 or 3-by-3 matrix H, DLAQR1 sets v to a */ /* scalar multiple of the first column of the product */ /* (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) */ /* scaling to avoid overflows and most underflows. It */ /* is assumed that either */ /* 1) sr1 = sr2 and si1 = -si2 */ /* or */ /* 2) si1 = si2 = 0. */ /* This is useful for starting double implicit shift bulges */ /* in the QR algorithm. */ /* N (input) integer */ /* Order of the matrix H. N must be either 2 or 3. */ /* H (input) DOUBLE PRECISION array of dimension (LDH,N) */ /* The 2-by-2 or 3-by-3 matrix H in (*). */ /* LDH (input) integer */ /* The leading dimension of H as declared in */ /* the calling procedure. LDH.GE.N */ /* SR1 (input) DOUBLE PRECISION */ /* SI1 The shifts in (*). */ /* SR2 */ /* SI2 */ /* V (output) DOUBLE PRECISION array of dimension N */ /* A scalar multiple of the first column of the */ /* matrix K in (*). */ /* ================================================================ */ /* Based on contributions by */ /* Karen Braman and Ralph Byers, Department of Mathematics, */ /* University of Kansas, USA */ /* ================================================================ */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --v; /* Function Body */ if (*n == 2) { s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + (d__2 = h__[h_dim1 + 2], abs(d__2)); if (s == 0.) { v[1] = 0.; v[2] = 0.; } else { h21s = h__[h_dim1 + 2] / s; v[1] = h21s * h__[(h_dim1 << 1) + 1] + (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s); v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - * sr2); } } else { s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + (d__2 = h__[h_dim1 + 2], abs(d__2)) + (d__3 = h__[h_dim1 + 3], abs( d__3)); if (s == 0.) { v[1] = 0.; v[2] = 0.; v[3] = 0.; } else { h21s = h__[h_dim1 + 2] / s; h31s = h__[h_dim1 + 3] / s; v[1] = (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s) + h__[(h_dim1 << 1) + 1] * h21s + h__[ h_dim1 * 3 + 1] * h31s; v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - * sr2) + h__[h_dim1 * 3 + 2] * h31s; v[3] = h31s * (h__[h_dim1 + 1] + h__[h_dim1 * 3 + 3] - *sr1 - * sr2) + h21s * h__[(h_dim1 << 1) + 3]; } } return 0; } /* _starpu_dlaqr1_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqr2.c000066400000000000000000000531401507764646700205640ustar00rootroot00000000000000/* dlaqr2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b12 = 0.; static doublereal c_b13 = 1.; static logical c_true = TRUE_; /* Subroutine */ int _starpu_dlaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork) { /* System generated locals */ integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k; doublereal s, aa, bb, cc, dd, cs, sn; integer jw; doublereal evi, evk, foo; integer kln; doublereal tau, ulp; integer lwk1, lwk2; doublereal beta; integer kend, kcol, info, ifst, ilst, ltop, krow; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dgemm_(char *, char *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical bulge; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer infqr, kwtop; extern /* Subroutine */ int _starpu_dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlabad_( doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal safmax; extern /* Subroutine */ int _starpu_dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), _starpu_dormhr_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); logical sorted; doublereal smlnum; integer lwkopt; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ /* -- April 2009 -- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* This subroutine is identical to DLAQR3 except that it avoids */ /* recursion by calling DLAHQR instead of DLAQR4. */ /* ****************************************************************** */ /* Aggressive early deflation: */ /* This subroutine accepts as input an upper Hessenberg matrix */ /* H and performs an orthogonal similarity transformation */ /* designed to detect and deflate fully converged eigenvalues from */ /* a trailing principal submatrix. On output H has been over- */ /* written by a new Hessenberg matrix that is a perturbation of */ /* an orthogonal similarity transformation of H. It is to be */ /* hoped that the final version of H has many zero subdiagonal */ /* entries. */ /* ****************************************************************** */ /* WANTT (input) LOGICAL */ /* If .TRUE., then the Hessenberg matrix H is fully updated */ /* so that the quasi-triangular Schur factor may be */ /* computed (in cooperation with the calling subroutine). */ /* If .FALSE., then only enough of H is updated to preserve */ /* the eigenvalues. */ /* WANTZ (input) LOGICAL */ /* If .TRUE., then the orthogonal matrix Z is updated so */ /* so that the orthogonal Schur factor may be computed */ /* (in cooperation with the calling subroutine). */ /* If .FALSE., then Z is not referenced. */ /* N (input) INTEGER */ /* The order of the matrix H and (if WANTZ is .TRUE.) the */ /* order of the orthogonal matrix Z. */ /* KTOP (input) INTEGER */ /* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. */ /* KBOT and KTOP together determine an isolated block */ /* along the diagonal of the Hessenberg matrix. */ /* KBOT (input) INTEGER */ /* It is assumed without a check that either */ /* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together */ /* determine an isolated block along the diagonal of the */ /* Hessenberg matrix. */ /* NW (input) INTEGER */ /* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). */ /* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ /* On input the initial N-by-N section of H stores the */ /* Hessenberg matrix undergoing aggressive early deflation. */ /* On output H has been transformed by an orthogonal */ /* similarity transformation, perturbed, and the returned */ /* to Hessenberg form that (it is to be hoped) has some */ /* zero subdiagonal entries. */ /* LDH (input) integer */ /* Leading dimension of H just as declared in the calling */ /* subroutine. N .LE. LDH */ /* ILOZ (input) INTEGER */ /* IHIZ (input) INTEGER */ /* Specify the rows of Z to which transformations must be */ /* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* IF WANTZ is .TRUE., then on output, the orthogonal */ /* similarity transformation mentioned above has been */ /* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. */ /* If WANTZ is .FALSE., then Z is unreferenced. */ /* LDZ (input) integer */ /* The leading dimension of Z just as declared in the */ /* calling subroutine. 1 .LE. LDZ. */ /* NS (output) integer */ /* The number of unconverged (ie approximate) eigenvalues */ /* returned in SR and SI that may be used as shifts by the */ /* calling subroutine. */ /* ND (output) integer */ /* The number of converged eigenvalues uncovered by this */ /* subroutine. */ /* SR (output) DOUBLE PRECISION array, dimension KBOT */ /* SI (output) DOUBLE PRECISION array, dimension KBOT */ /* On output, the real and imaginary parts of approximate */ /* eigenvalues that may be used for shifts are stored in */ /* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and */ /* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. */ /* The real and imaginary parts of converged eigenvalues */ /* are stored in SR(KBOT-ND+1) through SR(KBOT) and */ /* SI(KBOT-ND+1) through SI(KBOT), respectively. */ /* V (workspace) DOUBLE PRECISION array, dimension (LDV,NW) */ /* An NW-by-NW work array. */ /* LDV (input) integer scalar */ /* The leading dimension of V just as declared in the */ /* calling subroutine. NW .LE. LDV */ /* NH (input) integer scalar */ /* The number of columns of T. NH.GE.NW. */ /* T (workspace) DOUBLE PRECISION array, dimension (LDT,NW) */ /* LDT (input) integer */ /* The leading dimension of T just as declared in the */ /* calling subroutine. NW .LE. LDT */ /* NV (input) integer */ /* The number of rows of work array WV available for */ /* workspace. NV.GE.NW. */ /* WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW) */ /* LDWV (input) integer */ /* The leading dimension of W just as declared in the */ /* calling subroutine. NW .LE. LDV */ /* WORK (workspace) DOUBLE PRECISION array, dimension LWORK. */ /* On exit, WORK(1) is set to an estimate of the optimal value */ /* of LWORK for the given values of N, NW, KTOP and KBOT. */ /* LWORK (input) integer */ /* The dimension of the work array WORK. LWORK = 2*NW */ /* suffices, but greater efficiency may result from larger */ /* values of LWORK. */ /* If LWORK = -1, then a workspace query is assumed; DLAQR2 */ /* only estimates the optimal workspace size for the given */ /* values of N, NW, KTOP and KBOT. The estimate is returned */ /* in WORK(1). No error message related to LWORK is issued */ /* by XERBLA. Neither H nor Z are accessed. */ /* ================================================================ */ /* Based on contributions by */ /* Karen Braman and Ralph Byers, Department of Mathematics, */ /* University of Kansas, USA */ /* ================================================================ */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* ==== Estimate optimal workspace. ==== */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --sr; --si; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; wv_dim1 = *ldwv; wv_offset = 1 + wv_dim1; wv -= wv_offset; --work; /* Function Body */ /* Computing MIN */ i__1 = *nw, i__2 = *kbot - *ktop + 1; jw = min(i__1,i__2); if (jw <= 2) { lwkopt = 1; } else { /* ==== Workspace query call to DGEHRD ==== */ i__1 = jw - 1; _starpu_dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & c_n1, &info); lwk1 = (integer) work[1]; /* ==== Workspace query call to DORMHR ==== */ i__1 = jw - 1; _starpu_dormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, &work[1], &c_n1, &info); lwk2 = (integer) work[1]; /* ==== Optimal workspace ==== */ lwkopt = jw + max(lwk1,lwk2); } /* ==== Quick return in case of workspace query. ==== */ if (*lwork == -1) { work[1] = (doublereal) lwkopt; return 0; } /* ==== Nothing to do ... */ /* ... for an empty active block ... ==== */ *ns = 0; *nd = 0; work[1] = 1.; if (*ktop > *kbot) { return 0; } /* ... nor for an empty deflation window. ==== */ if (*nw < 1) { return 0; } /* ==== Machine constants ==== */ safmin = _starpu_dlamch_("SAFE MINIMUM"); safmax = 1. / safmin; _starpu_dlabad_(&safmin, &safmax); ulp = _starpu_dlamch_("PRECISION"); smlnum = safmin * ((doublereal) (*n) / ulp); /* ==== Setup deflation window ==== */ /* Computing MIN */ i__1 = *nw, i__2 = *kbot - *ktop + 1; jw = min(i__1,i__2); kwtop = *kbot - jw + 1; if (kwtop == *ktop) { s = 0.; } else { s = h__[kwtop + (kwtop - 1) * h_dim1]; } if (*kbot == kwtop) { /* ==== 1-by-1 deflation window: not much to do ==== */ sr[kwtop] = h__[kwtop + kwtop * h_dim1]; si[kwtop] = 0.; *ns = 1; *nd = 0; /* Computing MAX */ d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs( d__1)); if (abs(s) <= max(d__2,d__3)) { *ns = 0; *nd = 1; if (kwtop > *ktop) { h__[kwtop + (kwtop - 1) * h_dim1] = 0.; } } work[1] = 1.; return 0; } /* ==== Convert to spike-triangular form. (In case of a */ /* . rare QR failure, this routine continues to do */ /* . aggressive early deflation using that part of */ /* . the deflation window that converged using INFQR */ /* . here and there to keep track.) ==== */ _starpu_dlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], ldt); i__1 = jw - 1; i__2 = *ldh + 1; i__3 = *ldt + 1; _starpu_dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & i__3); _starpu_dlaset_("A", &jw, &jw, &c_b12, &c_b13, &v[v_offset], ldv); _starpu_dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); /* ==== DTREXC needs a clean margin near the diagonal ==== */ i__1 = jw - 3; for (j = 1; j <= i__1; ++j) { t[j + 2 + j * t_dim1] = 0.; t[j + 3 + j * t_dim1] = 0.; /* L10: */ } if (jw > 2) { t[jw + (jw - 2) * t_dim1] = 0.; } /* ==== Deflation detection loop ==== */ *ns = jw; ilst = infqr + 1; L20: if (ilst <= *ns) { if (*ns == 1) { bulge = FALSE_; } else { bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; } /* ==== Small spike tip test for deflation ==== */ if (! bulge) { /* ==== Real eigenvalue ==== */ foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); if (foo == 0.) { foo = abs(s); } /* Computing MAX */ d__2 = smlnum, d__3 = ulp * foo; if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2,d__3)) { /* ==== Deflatable ==== */ --(*ns); } else { /* ==== Undeflatable. Move it up out of the way. */ /* . (DTREXC can not fail in this case.) ==== */ ifst = *ns; _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], &info); ++ilst; } } else { /* ==== Complex conjugate pair ==== */ foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + sqrt((d__1 = t[* ns + (*ns - 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[* ns - 1 + *ns * t_dim1], abs(d__2))); if (foo == 0.) { foo = abs(s); } /* Computing MAX */ d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), d__4 = (d__2 = s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); /* Computing MAX */ d__5 = smlnum, d__6 = ulp * foo; if (max(d__3,d__4) <= max(d__5,d__6)) { /* ==== Deflatable ==== */ *ns += -2; } else { /* ==== Undeflatable. Move them up out of the way. */ /* . Fortunately, DTREXC does the right thing with */ /* . ILST in case of a rare exchange failure. ==== */ ifst = *ns; _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], &info); ilst += 2; } } /* ==== End deflation detection loop ==== */ goto L20; } /* ==== Return to Hessenberg form ==== */ if (*ns == 0) { s = 0.; } if (*ns < jw) { /* ==== sorting diagonal blocks of T improves accuracy for */ /* . graded matrices. Bubble sort deals well with */ /* . exchange failures. ==== */ sorted = FALSE_; i__ = *ns + 1; L30: if (sorted) { goto L50; } sorted = TRUE_; kend = i__ - 1; i__ = infqr + 1; if (i__ == *ns) { k = i__ + 1; } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { k = i__ + 1; } else { k = i__ + 2; } L40: if (k <= kend) { if (k == i__ + 1) { evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); } else { evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + sqrt((d__1 = t[i__ + 1 + i__ * t_dim1], abs(d__1))) * sqrt((d__2 = t[i__ + (i__ + 1) * t_dim1], abs(d__2))); } if (k == kend) { evk = (d__1 = t[k + k * t_dim1], abs(d__1)); } else if (t[k + 1 + k * t_dim1] == 0.) { evk = (d__1 = t[k + k * t_dim1], abs(d__1)); } else { evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + sqrt((d__1 = t[ k + 1 + k * t_dim1], abs(d__1))) * sqrt((d__2 = t[k + (k + 1) * t_dim1], abs(d__2))); } if (evi >= evk) { i__ = k; } else { sorted = FALSE_; ifst = i__; ilst = k; _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], &info); if (info == 0) { i__ = ilst; } else { i__ = k; } } if (i__ == kend) { k = i__ + 1; } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { k = i__ + 1; } else { k = i__ + 2; } goto L40; } goto L30; L50: ; } /* ==== Restore shift/eigenvalue array from T ==== */ i__ = jw; L60: if (i__ >= infqr + 1) { if (i__ == infqr + 1) { sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; si[kwtop + i__ - 1] = 0.; --i__; } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; si[kwtop + i__ - 1] = 0.; --i__; } else { aa = t[i__ - 1 + (i__ - 1) * t_dim1]; cc = t[i__ + (i__ - 1) * t_dim1]; bb = t[i__ - 1 + i__ * t_dim1]; dd = t[i__ + i__ * t_dim1]; _starpu_dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, & sn); i__ += -2; } goto L60; } if (*ns < jw || s == 0.) { if (*ns > 1 && s != 0.) { /* ==== Reflect spike back into lower triangle ==== */ _starpu_dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); beta = work[1]; _starpu_dlarfg_(ns, &beta, &work[2], &c__1, &tau); work[1] = 1.; i__1 = jw - 2; i__2 = jw - 2; _starpu_dlaset_("L", &i__1, &i__2, &c_b12, &c_b12, &t[t_dim1 + 3], ldt); _starpu_dlarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, & work[jw + 1]); _starpu_dlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & work[jw + 1]); _starpu_dlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & work[jw + 1]); i__1 = *lwork - jw; _starpu_dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] , &i__1, &info); } /* ==== Copy updated reduced window into place ==== */ if (kwtop > 1) { h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; } _starpu_dlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] , ldh); i__1 = jw - 1; i__2 = *ldt + 1; i__3 = *ldh + 1; _starpu_dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], &i__3); /* ==== Accumulate orthogonal matrix in order update */ /* . H and Z, if requested. ==== */ if (*ns > 1 && s != 0.) { i__1 = *lwork - jw; _starpu_dormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, &work[jw + 1], &i__1, &info); } /* ==== Update vertical slab in H ==== */ if (*wantt) { ltop = 1; } else { ltop = *ktop; } i__1 = kwtop - 1; i__2 = *nv; for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { /* Computing MIN */ i__3 = *nv, i__4 = kwtop - krow; kln = min(i__3,i__4); _starpu_dgemm_("N", "N", &kln, &jw, &jw, &c_b13, &h__[krow + kwtop * h_dim1], ldh, &v[v_offset], ldv, &c_b12, &wv[wv_offset], ldwv); _starpu_dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * h_dim1], ldh); /* L70: */ } /* ==== Update horizontal slab in H ==== */ if (*wantt) { i__2 = *n; i__1 = *nh; for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; kcol += i__1) { /* Computing MIN */ i__3 = *nh, i__4 = *n - kcol + 1; kln = min(i__3,i__4); _starpu_dgemm_("C", "N", &jw, &kln, &jw, &c_b13, &v[v_offset], ldv, & h__[kwtop + kcol * h_dim1], ldh, &c_b12, &t[t_offset], ldt); _starpu_dlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * h_dim1], ldh); /* L80: */ } } /* ==== Update vertical slab in Z ==== */ if (*wantz) { i__1 = *ihiz; i__2 = *nv; for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { /* Computing MIN */ i__3 = *nv, i__4 = *ihiz - krow + 1; kln = min(i__3,i__4); _starpu_dgemm_("N", "N", &kln, &jw, &jw, &c_b13, &z__[krow + kwtop * z_dim1], ldz, &v[v_offset], ldv, &c_b12, &wv[ wv_offset], ldwv); _starpu_dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + kwtop * z_dim1], ldz); /* L90: */ } } } /* ==== Return the number of deflations ... ==== */ *nd = jw - *ns; /* ==== ... and the number of shifts. (Subtracting */ /* . INFQR from the spike length takes care */ /* . of the case of a rare QR failure while */ /* . calculating eigenvalues of the deflation */ /* . window.) ==== */ *ns -= infqr; /* ==== Return optimal workspace. ==== */ work[1] = (doublereal) lwkopt; /* ==== End of DLAQR2 ==== */ return 0; } /* _starpu_dlaqr2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqr3.c000066400000000000000000000545321507764646700205730ustar00rootroot00000000000000/* dlaqr3.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static logical c_true = TRUE_; static doublereal c_b17 = 0.; static doublereal c_b18 = 1.; static integer c__12 = 12; /* Subroutine */ int _starpu_dlaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer * ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal * v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer * nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork) { /* System generated locals */ integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k; doublereal s, aa, bb, cc, dd, cs, sn; integer jw; doublereal evi, evk, foo; integer kln; doublereal tau, ulp; integer lwk1, lwk2, lwk3; doublereal beta; integer kend, kcol, info, nmin, ifst, ilst, ltop, krow; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dgemm_(char *, char *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical bulge; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer infqr, kwtop; extern /* Subroutine */ int _starpu_dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaqr4_( logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); doublereal safmax; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), _starpu_dormhr_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); logical sorted; doublereal smlnum; integer lwkopt; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ /* -- April 2009 -- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* ****************************************************************** */ /* Aggressive early deflation: */ /* This subroutine accepts as input an upper Hessenberg matrix */ /* H and performs an orthogonal similarity transformation */ /* designed to detect and deflate fully converged eigenvalues from */ /* a trailing principal submatrix. On output H has been over- */ /* written by a new Hessenberg matrix that is a perturbation of */ /* an orthogonal similarity transformation of H. It is to be */ /* hoped that the final version of H has many zero subdiagonal */ /* entries. */ /* ****************************************************************** */ /* WANTT (input) LOGICAL */ /* If .TRUE., then the Hessenberg matrix H is fully updated */ /* so that the quasi-triangular Schur factor may be */ /* computed (in cooperation with the calling subroutine). */ /* If .FALSE., then only enough of H is updated to preserve */ /* the eigenvalues. */ /* WANTZ (input) LOGICAL */ /* If .TRUE., then the orthogonal matrix Z is updated so */ /* so that the orthogonal Schur factor may be computed */ /* (in cooperation with the calling subroutine). */ /* If .FALSE., then Z is not referenced. */ /* N (input) INTEGER */ /* The order of the matrix H and (if WANTZ is .TRUE.) the */ /* order of the orthogonal matrix Z. */ /* KTOP (input) INTEGER */ /* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. */ /* KBOT and KTOP together determine an isolated block */ /* along the diagonal of the Hessenberg matrix. */ /* KBOT (input) INTEGER */ /* It is assumed without a check that either */ /* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together */ /* determine an isolated block along the diagonal of the */ /* Hessenberg matrix. */ /* NW (input) INTEGER */ /* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). */ /* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ /* On input the initial N-by-N section of H stores the */ /* Hessenberg matrix undergoing aggressive early deflation. */ /* On output H has been transformed by an orthogonal */ /* similarity transformation, perturbed, and the returned */ /* to Hessenberg form that (it is to be hoped) has some */ /* zero subdiagonal entries. */ /* LDH (input) integer */ /* Leading dimension of H just as declared in the calling */ /* subroutine. N .LE. LDH */ /* ILOZ (input) INTEGER */ /* IHIZ (input) INTEGER */ /* Specify the rows of Z to which transformations must be */ /* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* IF WANTZ is .TRUE., then on output, the orthogonal */ /* similarity transformation mentioned above has been */ /* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. */ /* If WANTZ is .FALSE., then Z is unreferenced. */ /* LDZ (input) integer */ /* The leading dimension of Z just as declared in the */ /* calling subroutine. 1 .LE. LDZ. */ /* NS (output) integer */ /* The number of unconverged (ie approximate) eigenvalues */ /* returned in SR and SI that may be used as shifts by the */ /* calling subroutine. */ /* ND (output) integer */ /* The number of converged eigenvalues uncovered by this */ /* subroutine. */ /* SR (output) DOUBLE PRECISION array, dimension KBOT */ /* SI (output) DOUBLE PRECISION array, dimension KBOT */ /* On output, the real and imaginary parts of approximate */ /* eigenvalues that may be used for shifts are stored in */ /* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and */ /* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. */ /* The real and imaginary parts of converged eigenvalues */ /* are stored in SR(KBOT-ND+1) through SR(KBOT) and */ /* SI(KBOT-ND+1) through SI(KBOT), respectively. */ /* V (workspace) DOUBLE PRECISION array, dimension (LDV,NW) */ /* An NW-by-NW work array. */ /* LDV (input) integer scalar */ /* The leading dimension of V just as declared in the */ /* calling subroutine. NW .LE. LDV */ /* NH (input) integer scalar */ /* The number of columns of T. NH.GE.NW. */ /* T (workspace) DOUBLE PRECISION array, dimension (LDT,NW) */ /* LDT (input) integer */ /* The leading dimension of T just as declared in the */ /* calling subroutine. NW .LE. LDT */ /* NV (input) integer */ /* The number of rows of work array WV available for */ /* workspace. NV.GE.NW. */ /* WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW) */ /* LDWV (input) integer */ /* The leading dimension of W just as declared in the */ /* calling subroutine. NW .LE. LDV */ /* WORK (workspace) DOUBLE PRECISION array, dimension LWORK. */ /* On exit, WORK(1) is set to an estimate of the optimal value */ /* of LWORK for the given values of N, NW, KTOP and KBOT. */ /* LWORK (input) integer */ /* The dimension of the work array WORK. LWORK = 2*NW */ /* suffices, but greater efficiency may result from larger */ /* values of LWORK. */ /* If LWORK = -1, then a workspace query is assumed; DLAQR3 */ /* only estimates the optimal workspace size for the given */ /* values of N, NW, KTOP and KBOT. The estimate is returned */ /* in WORK(1). No error message related to LWORK is issued */ /* by XERBLA. Neither H nor Z are accessed. */ /* ================================================================ */ /* Based on contributions by */ /* Karen Braman and Ralph Byers, Department of Mathematics, */ /* University of Kansas, USA */ /* ================================================================ */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* ==== Estimate optimal workspace. ==== */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --sr; --si; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; wv_dim1 = *ldwv; wv_offset = 1 + wv_dim1; wv -= wv_offset; --work; /* Function Body */ /* Computing MIN */ i__1 = *nw, i__2 = *kbot - *ktop + 1; jw = min(i__1,i__2); if (jw <= 2) { lwkopt = 1; } else { /* ==== Workspace query call to DGEHRD ==== */ i__1 = jw - 1; _starpu_dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], & c_n1, &info); lwk1 = (integer) work[1]; /* ==== Workspace query call to DORMHR ==== */ i__1 = jw - 1; _starpu_dormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, &work[1], &c_n1, &info); lwk2 = (integer) work[1]; /* ==== Workspace query call to DLAQR4 ==== */ _starpu_dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[1], &si[1], &c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, & infqr); lwk3 = (integer) work[1]; /* ==== Optimal workspace ==== */ /* Computing MAX */ i__1 = jw + max(lwk1,lwk2); lwkopt = max(i__1,lwk3); } /* ==== Quick return in case of workspace query. ==== */ if (*lwork == -1) { work[1] = (doublereal) lwkopt; return 0; } /* ==== Nothing to do ... */ /* ... for an empty active block ... ==== */ *ns = 0; *nd = 0; work[1] = 1.; if (*ktop > *kbot) { return 0; } /* ... nor for an empty deflation window. ==== */ if (*nw < 1) { return 0; } /* ==== Machine constants ==== */ safmin = _starpu_dlamch_("SAFE MINIMUM"); safmax = 1. / safmin; _starpu_dlabad_(&safmin, &safmax); ulp = _starpu_dlamch_("PRECISION"); smlnum = safmin * ((doublereal) (*n) / ulp); /* ==== Setup deflation window ==== */ /* Computing MIN */ i__1 = *nw, i__2 = *kbot - *ktop + 1; jw = min(i__1,i__2); kwtop = *kbot - jw + 1; if (kwtop == *ktop) { s = 0.; } else { s = h__[kwtop + (kwtop - 1) * h_dim1]; } if (*kbot == kwtop) { /* ==== 1-by-1 deflation window: not much to do ==== */ sr[kwtop] = h__[kwtop + kwtop * h_dim1]; si[kwtop] = 0.; *ns = 1; *nd = 0; /* Computing MAX */ d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs( d__1)); if (abs(s) <= max(d__2,d__3)) { *ns = 0; *nd = 1; if (kwtop > *ktop) { h__[kwtop + (kwtop - 1) * h_dim1] = 0.; } } work[1] = 1.; return 0; } /* ==== Convert to spike-triangular form. (In case of a */ /* . rare QR failure, this routine continues to do */ /* . aggressive early deflation using that part of */ /* . the deflation window that converged using INFQR */ /* . here and there to keep track.) ==== */ _starpu_dlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], ldt); i__1 = jw - 1; i__2 = *ldh + 1; i__3 = *ldt + 1; _starpu_dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], & i__3); _starpu_dlaset_("A", &jw, &jw, &c_b17, &c_b18, &v[v_offset], ldv); nmin = _starpu_ilaenv_(&c__12, "DLAQR3", "SV", &jw, &c__1, &jw, lwork); if (jw > nmin) { _starpu_dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[ kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1], lwork, &infqr); } else { _starpu_dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[ kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr); } /* ==== DTREXC needs a clean margin near the diagonal ==== */ i__1 = jw - 3; for (j = 1; j <= i__1; ++j) { t[j + 2 + j * t_dim1] = 0.; t[j + 3 + j * t_dim1] = 0.; /* L10: */ } if (jw > 2) { t[jw + (jw - 2) * t_dim1] = 0.; } /* ==== Deflation detection loop ==== */ *ns = jw; ilst = infqr + 1; L20: if (ilst <= *ns) { if (*ns == 1) { bulge = FALSE_; } else { bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; } /* ==== Small spike tip test for deflation ==== */ if (! bulge) { /* ==== Real eigenvalue ==== */ foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); if (foo == 0.) { foo = abs(s); } /* Computing MAX */ d__2 = smlnum, d__3 = ulp * foo; if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2,d__3)) { /* ==== Deflatable ==== */ --(*ns); } else { /* ==== Undeflatable. Move it up out of the way. */ /* . (DTREXC can not fail in this case.) ==== */ ifst = *ns; _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], &info); ++ilst; } } else { /* ==== Complex conjugate pair ==== */ foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + sqrt((d__1 = t[* ns + (*ns - 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[* ns - 1 + *ns * t_dim1], abs(d__2))); if (foo == 0.) { foo = abs(s); } /* Computing MAX */ d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), d__4 = (d__2 = s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); /* Computing MAX */ d__5 = smlnum, d__6 = ulp * foo; if (max(d__3,d__4) <= max(d__5,d__6)) { /* ==== Deflatable ==== */ *ns += -2; } else { /* ==== Undeflatable. Move them up out of the way. */ /* . Fortunately, DTREXC does the right thing with */ /* . ILST in case of a rare exchange failure. ==== */ ifst = *ns; _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], &info); ilst += 2; } } /* ==== End deflation detection loop ==== */ goto L20; } /* ==== Return to Hessenberg form ==== */ if (*ns == 0) { s = 0.; } if (*ns < jw) { /* ==== sorting diagonal blocks of T improves accuracy for */ /* . graded matrices. Bubble sort deals well with */ /* . exchange failures. ==== */ sorted = FALSE_; i__ = *ns + 1; L30: if (sorted) { goto L50; } sorted = TRUE_; kend = i__ - 1; i__ = infqr + 1; if (i__ == *ns) { k = i__ + 1; } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { k = i__ + 1; } else { k = i__ + 2; } L40: if (k <= kend) { if (k == i__ + 1) { evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); } else { evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + sqrt((d__1 = t[i__ + 1 + i__ * t_dim1], abs(d__1))) * sqrt((d__2 = t[i__ + (i__ + 1) * t_dim1], abs(d__2))); } if (k == kend) { evk = (d__1 = t[k + k * t_dim1], abs(d__1)); } else if (t[k + 1 + k * t_dim1] == 0.) { evk = (d__1 = t[k + k * t_dim1], abs(d__1)); } else { evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + sqrt((d__1 = t[ k + 1 + k * t_dim1], abs(d__1))) * sqrt((d__2 = t[k + (k + 1) * t_dim1], abs(d__2))); } if (evi >= evk) { i__ = k; } else { sorted = FALSE_; ifst = i__; ilst = k; _starpu_dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], &info); if (info == 0) { i__ = ilst; } else { i__ = k; } } if (i__ == kend) { k = i__ + 1; } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { k = i__ + 1; } else { k = i__ + 2; } goto L40; } goto L30; L50: ; } /* ==== Restore shift/eigenvalue array from T ==== */ i__ = jw; L60: if (i__ >= infqr + 1) { if (i__ == infqr + 1) { sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; si[kwtop + i__ - 1] = 0.; --i__; } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; si[kwtop + i__ - 1] = 0.; --i__; } else { aa = t[i__ - 1 + (i__ - 1) * t_dim1]; cc = t[i__ + (i__ - 1) * t_dim1]; bb = t[i__ - 1 + i__ * t_dim1]; dd = t[i__ + i__ * t_dim1]; _starpu_dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, & sn); i__ += -2; } goto L60; } if (*ns < jw || s == 0.) { if (*ns > 1 && s != 0.) { /* ==== Reflect spike back into lower triangle ==== */ _starpu_dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); beta = work[1]; _starpu_dlarfg_(ns, &beta, &work[2], &c__1, &tau); work[1] = 1.; i__1 = jw - 2; i__2 = jw - 2; _starpu_dlaset_("L", &i__1, &i__2, &c_b17, &c_b17, &t[t_dim1 + 3], ldt); _starpu_dlarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, & work[jw + 1]); _starpu_dlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, & work[jw + 1]); _starpu_dlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, & work[jw + 1]); i__1 = *lwork - jw; _starpu_dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1] , &i__1, &info); } /* ==== Copy updated reduced window into place ==== */ if (kwtop > 1) { h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; } _starpu_dlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1] , ldh); i__1 = jw - 1; i__2 = *ldt + 1; i__3 = *ldh + 1; _starpu_dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], &i__3); /* ==== Accumulate orthogonal matrix in order update */ /* . H and Z, if requested. ==== */ if (*ns > 1 && s != 0.) { i__1 = *lwork - jw; _starpu_dormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, &work[jw + 1], &i__1, &info); } /* ==== Update vertical slab in H ==== */ if (*wantt) { ltop = 1; } else { ltop = *ktop; } i__1 = kwtop - 1; i__2 = *nv; for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { /* Computing MIN */ i__3 = *nv, i__4 = kwtop - krow; kln = min(i__3,i__4); _starpu_dgemm_("N", "N", &kln, &jw, &jw, &c_b18, &h__[krow + kwtop * h_dim1], ldh, &v[v_offset], ldv, &c_b17, &wv[wv_offset], ldwv); _starpu_dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * h_dim1], ldh); /* L70: */ } /* ==== Update horizontal slab in H ==== */ if (*wantt) { i__2 = *n; i__1 = *nh; for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; kcol += i__1) { /* Computing MIN */ i__3 = *nh, i__4 = *n - kcol + 1; kln = min(i__3,i__4); _starpu_dgemm_("C", "N", &jw, &kln, &jw, &c_b18, &v[v_offset], ldv, & h__[kwtop + kcol * h_dim1], ldh, &c_b17, &t[t_offset], ldt); _starpu_dlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * h_dim1], ldh); /* L80: */ } } /* ==== Update vertical slab in Z ==== */ if (*wantz) { i__1 = *ihiz; i__2 = *nv; for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { /* Computing MIN */ i__3 = *nv, i__4 = *ihiz - krow + 1; kln = min(i__3,i__4); _starpu_dgemm_("N", "N", &kln, &jw, &jw, &c_b18, &z__[krow + kwtop * z_dim1], ldz, &v[v_offset], ldv, &c_b17, &wv[ wv_offset], ldwv); _starpu_dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + kwtop * z_dim1], ldz); /* L90: */ } } } /* ==== Return the number of deflations ... ==== */ *nd = jw - *ns; /* ==== ... and the number of shifts. (Subtracting */ /* . INFQR from the spike length takes care */ /* . of the case of a rare QR failure while */ /* . calculating eigenvalues of the deflation */ /* . window.) ==== */ *ns -= infqr; /* ==== Return optimal workspace. ==== */ work[1] = (doublereal) lwkopt; /* ==== End of DLAQR3 ==== */ return 0; } /* _starpu_dlaqr3_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqr4.c000066400000000000000000000634111507764646700205700ustar00rootroot00000000000000/* dlaqr4.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__13 = 13; static integer c__15 = 15; static integer c_n1 = -1; static integer c__12 = 12; static integer c__14 = 14; static integer c__16 = 16; static logical c_false = FALSE_; static integer c__1 = 1; static integer c__3 = 3; /* Subroutine */ int _starpu_dlaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3, d__4; /* Local variables */ integer i__, k; doublereal aa, bb, cc, dd; integer ld; doublereal cs; integer nh, it, ks, kt; doublereal sn; integer ku, kv, ls, ns; doublereal ss; integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, nmin; doublereal swap; integer ktop; doublereal zdum[1] /* was [1][1] */; integer kacc22, itmax, nsmax, nwmax, kwtop; extern /* Subroutine */ int _starpu_dlaqr2_(logical *, logical *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaqr5_( logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *); integer nibble; extern /* Subroutine */ int _starpu_dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); char jbcmpz[1]; integer nwupbd; logical sorted; integer lwkopt; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* This subroutine implements one level of recursion for DLAQR0. */ /* It is a complete implementation of the small bulge multi-shift */ /* QR algorithm. It may be called by DLAQR0 and, for large enough */ /* deflation window size, it may be called by DLAQR3. This */ /* subroutine is identical to DLAQR0 except that it calls DLAQR2 */ /* instead of DLAQR3. */ /* Purpose */ /* ======= */ /* DLAQR4 computes the eigenvalues of a Hessenberg matrix H */ /* and, optionally, the matrices T and Z from the Schur decomposition */ /* H = Z T Z**T, where T is an upper quasi-triangular matrix (the */ /* Schur form), and Z is the orthogonal matrix of Schur vectors. */ /* Optionally Z may be postmultiplied into an input orthogonal */ /* matrix Q so that this routine can give the Schur factorization */ /* of a matrix A which has been reduced to the Hessenberg form H */ /* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. */ /* Arguments */ /* ========= */ /* WANTT (input) LOGICAL */ /* = .TRUE. : the full Schur form T is required; */ /* = .FALSE.: only eigenvalues are required. */ /* WANTZ (input) LOGICAL */ /* = .TRUE. : the matrix of Schur vectors Z is required; */ /* = .FALSE.: Schur vectors are not required. */ /* N (input) INTEGER */ /* The order of the matrix H. N .GE. 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* It is assumed that H is already upper triangular in rows */ /* and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, */ /* H(ILO,ILO-1) is zero. ILO and IHI are normally set by a */ /* previous call to DGEBAL, and then passed to DGEHRD when the */ /* matrix output by DGEBAL is reduced to Hessenberg form. */ /* Otherwise, ILO and IHI should be set to 1 and N, */ /* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. */ /* If N = 0, then ILO = 1 and IHI = 0. */ /* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */ /* On entry, the upper Hessenberg matrix H. */ /* On exit, if INFO = 0 and WANTT is .TRUE., then H contains */ /* the upper quasi-triangular matrix T from the Schur */ /* decomposition (the Schur form); 2-by-2 diagonal blocks */ /* (corresponding to complex conjugate pairs of eigenvalues) */ /* are returned in standard form, with H(i,i) = H(i+1,i+1) */ /* and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is */ /* .FALSE., then the contents of H are unspecified on exit. */ /* (The output value of H when INFO.GT.0 is given under the */ /* description of INFO below.) */ /* This subroutine may explicitly set H(i,j) = 0 for i.GT.j and */ /* j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. */ /* LDH (input) INTEGER */ /* The leading dimension of the array H. LDH .GE. max(1,N). */ /* WR (output) DOUBLE PRECISION array, dimension (IHI) */ /* WI (output) DOUBLE PRECISION array, dimension (IHI) */ /* The real and imaginary parts, respectively, of the computed */ /* eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) */ /* and WI(ILO:IHI). If two eigenvalues are computed as a */ /* complex conjugate pair, they are stored in consecutive */ /* elements of WR and WI, say the i-th and (i+1)th, with */ /* WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then */ /* the eigenvalues are stored in the same order as on the */ /* diagonal of the Schur form returned in H, with */ /* WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal */ /* block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */ /* WI(i+1) = -WI(i). */ /* ILOZ (input) INTEGER */ /* IHIZ (input) INTEGER */ /* Specify the rows of Z to which transformations must be */ /* applied if WANTZ is .TRUE.. */ /* 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI) */ /* If WANTZ is .FALSE., then Z is not referenced. */ /* If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is */ /* replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the */ /* orthogonal Schur factor of H(ILO:IHI,ILO:IHI). */ /* (The output value of Z when INFO.GT.0 is given under */ /* the description of INFO below.) */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. if WANTZ is .TRUE. */ /* then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension LWORK */ /* On exit, if LWORK = -1, WORK(1) returns an estimate of */ /* the optimal value for LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK .GE. max(1,N) */ /* is sufficient, but LWORK typically as large as 6*N may */ /* be required for optimal performance. A workspace query */ /* to determine the optimal workspace size is recommended. */ /* If LWORK = -1, then DLAQR4 does a workspace query. */ /* In this case, DLAQR4 checks the input parameters and */ /* estimates the optimal workspace size for the given */ /* values of N, ILO and IHI. The estimate is returned */ /* in WORK(1). No error message related to LWORK is */ /* issued by XERBLA. Neither H nor Z are accessed. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* .GT. 0: if INFO = i, DLAQR4 failed to compute all of */ /* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR */ /* and WI contain those eigenvalues which have been */ /* successfully computed. (Failures are rare.) */ /* If INFO .GT. 0 and WANT is .FALSE., then on exit, */ /* the remaining unconverged eigenvalues are the eigen- */ /* values of the upper Hessenberg matrix rows and */ /* columns ILO through INFO of the final, output */ /* value of H. */ /* If INFO .GT. 0 and WANTT is .TRUE., then on exit */ /* (*) (initial value of H)*U = U*(final value of H) */ /* where U is an orthogonal matrix. The final */ /* value of H is upper Hessenberg and quasi-triangular */ /* in rows and columns INFO+1 through IHI. */ /* If INFO .GT. 0 and WANTZ is .TRUE., then on exit */ /* (final value of Z(ILO:IHI,ILOZ:IHIZ) */ /* = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U */ /* where U is the orthogonal matrix in (*) (regard- */ /* less of the value of WANTT.) */ /* If INFO .GT. 0 and WANTZ is .FALSE., then Z is not */ /* accessed. */ /* ================================================================ */ /* Based on contributions by */ /* Karen Braman and Ralph Byers, Department of Mathematics, */ /* University of Kansas, USA */ /* ================================================================ */ /* References: */ /* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ /* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */ /* Performance, SIAM Journal of Matrix Analysis, volume 23, pages */ /* 929--947, 2002. */ /* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ /* Algorithm Part II: Aggressive Early Deflation, SIAM Journal */ /* of Matrix Analysis, volume 23, pages 948--973, 2002. */ /* ================================================================ */ /* .. Parameters .. */ /* ==== Matrices of order NTINY or smaller must be processed by */ /* . DLAHQR because of insufficient subdiagonal scratch space. */ /* . (This is a hard limit.) ==== */ /* ==== Exceptional deflation windows: try to cure rare */ /* . slow convergence by varying the size of the */ /* . deflation window after KEXNW iterations. ==== */ /* ==== Exceptional shifts: try to cure rare slow convergence */ /* . with ad-hoc exceptional shifts every KEXSH iterations. */ /* . ==== */ /* ==== The constants WILK1 and WILK2 are used to form the */ /* . exceptional shifts. ==== */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --wr; --wi; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ *info = 0; /* ==== Quick return for N = 0: nothing to do. ==== */ if (*n == 0) { work[1] = 1.; return 0; } if (*n <= 11) { /* ==== Tiny matrices must use DLAHQR. ==== */ lwkopt = 1; if (*lwork != -1) { _starpu_dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], & wi[1], iloz, ihiz, &z__[z_offset], ldz, info); } } else { /* ==== Use small bulge multi-shift QR with aggressive early */ /* . deflation on larger-than-tiny matrices. ==== */ /* ==== Hope for the best. ==== */ *info = 0; /* ==== Set up job flags for ILAENV. ==== */ if (*wantt) { *(unsigned char *)jbcmpz = 'S'; } else { *(unsigned char *)jbcmpz = 'E'; } if (*wantz) { *(unsigned char *)&jbcmpz[1] = 'V'; } else { *(unsigned char *)&jbcmpz[1] = 'N'; } /* ==== NWR = recommended deflation window size. At this */ /* . point, N .GT. NTINY = 11, so there is enough */ /* . subdiagonal workspace for NWR.GE.2 as required. */ /* . (In fact, there is enough subdiagonal space for */ /* . NWR.GE.3.) ==== */ nwr = _starpu_ilaenv_(&c__13, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); nwr = max(2,nwr); /* Computing MIN */ i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2); nwr = min(i__1,nwr); /* ==== NSR = recommended number of simultaneous shifts. */ /* . At this point N .GT. NTINY = 11, so there is at */ /* . enough subdiagonal workspace for NSR to be even */ /* . and greater than or equal to two as required. ==== */ nsr = _starpu_ilaenv_(&c__15, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); /* Computing MIN */ i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi - *ilo; nsr = min(i__1,i__2); /* Computing MAX */ i__1 = 2, i__2 = nsr - nsr % 2; nsr = max(i__1,i__2); /* ==== Estimate optimal workspace ==== */ /* ==== Workspace query call to DLAQR2 ==== */ i__1 = nwr + 1; _starpu_dlaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[ h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1], &c_n1); /* ==== Optimal workspace = MAX(DLAQR5, DLAQR2) ==== */ /* Computing MAX */ i__1 = nsr * 3 / 2, i__2 = (integer) work[1]; lwkopt = max(i__1,i__2); /* ==== Quick return in case of workspace query. ==== */ if (*lwork == -1) { work[1] = (doublereal) lwkopt; return 0; } /* ==== DLAHQR/DLAQR0 crossover point ==== */ nmin = _starpu_ilaenv_(&c__12, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); nmin = max(11,nmin); /* ==== Nibble crossover point ==== */ nibble = _starpu_ilaenv_(&c__14, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); nibble = max(0,nibble); /* ==== Accumulate reflections during ttswp? Use block */ /* . 2-by-2 structure during matrix-matrix multiply? ==== */ kacc22 = _starpu_ilaenv_(&c__16, "DLAQR4", jbcmpz, n, ilo, ihi, lwork); kacc22 = max(0,kacc22); kacc22 = min(2,kacc22); /* ==== NWMAX = the largest possible deflation window for */ /* . which there is sufficient workspace. ==== */ /* Computing MIN */ i__1 = (*n - 1) / 3, i__2 = *lwork / 2; nwmax = min(i__1,i__2); nw = nwmax; /* ==== NSMAX = the Largest number of simultaneous shifts */ /* . for which there is sufficient workspace. ==== */ /* Computing MIN */ i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; nsmax = min(i__1,i__2); nsmax -= nsmax % 2; /* ==== NDFL: an iteration count restarted at deflation. ==== */ ndfl = 1; /* ==== ITMAX = iteration limit ==== */ /* Computing MAX */ i__1 = 10, i__2 = *ihi - *ilo + 1; itmax = max(i__1,i__2) * 30; /* ==== Last row and column in the active block ==== */ kbot = *ihi; /* ==== Main Loop ==== */ i__1 = itmax; for (it = 1; it <= i__1; ++it) { /* ==== Done when KBOT falls below ILO ==== */ if (kbot < *ilo) { goto L90; } /* ==== Locate active block ==== */ i__2 = *ilo + 1; for (k = kbot; k >= i__2; --k) { if (h__[k + (k - 1) * h_dim1] == 0.) { goto L20; } /* L10: */ } k = *ilo; L20: ktop = k; /* ==== Select deflation window size: */ /* . Typical Case: */ /* . If possible and advisable, nibble the entire */ /* . active block. If not, use size MIN(NWR,NWMAX) */ /* . or MIN(NWR+1,NWMAX) depending upon which has */ /* . the smaller corresponding subdiagonal entry */ /* . (a heuristic). */ /* . */ /* . Exceptional Case: */ /* . If there have been no deflations in KEXNW or */ /* . more iterations, then vary the deflation window */ /* . size. At first, because, larger windows are, */ /* . in general, more powerful than smaller ones, */ /* . rapidly increase the window to the maximum possible. */ /* . Then, gradually reduce the window size. ==== */ nh = kbot - ktop + 1; nwupbd = min(nh,nwmax); if (ndfl < 5) { nw = min(nwupbd,nwr); } else { /* Computing MIN */ i__2 = nwupbd, i__3 = nw << 1; nw = min(i__2,i__3); } if (nw < nwmax) { if (nw >= nh - 1) { nw = nh; } else { kwtop = kbot - nw + 1; if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) > (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], abs(d__2))) { ++nw; } } } if (ndfl < 5) { ndec = -1; } else if (ndec >= 0 || nw >= nwupbd) { ++ndec; if (nw - ndec < 2) { ndec = 0; } nw -= ndec; } /* ==== Aggressive early deflation: */ /* . split workspace under the subdiagonal into */ /* . - an nw-by-nw work array V in the lower */ /* . left-hand-corner, */ /* . - an NW-by-at-least-NW-but-more-is-better */ /* . (NW-by-NHO) horizontal work array along */ /* . the bottom edge, */ /* . - an at-least-NW-but-more-is-better (NHV-by-NW) */ /* . vertical work array along the left-hand-edge. */ /* . ==== */ kv = *n - nw + 1; kt = nw + 1; nho = *n - nw - 1 - kt + 1; kwv = nw + 2; nve = *n - nw - kwv + 1; /* ==== Aggressive early deflation ==== */ _starpu_dlaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); /* ==== Adjust KBOT accounting for new deflations. ==== */ kbot -= ld; /* ==== KS points to the shifts. ==== */ ks = kbot - ls + 1; /* ==== Skip an expensive QR sweep if there is a (partly */ /* . heuristic) reason to expect that many eigenvalues */ /* . will deflate without it. Here, the QR sweep is */ /* . skipped if many eigenvalues have just been deflated */ /* . or if the remaining active block is small. */ if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min( nmin,nwmax)) { /* ==== NS = nominal number of simultaneous shifts. */ /* . This may be lowered (slightly) if DLAQR2 */ /* . did not provide that many shifts. ==== */ /* Computing MIN */ /* Computing MAX */ i__4 = 2, i__5 = kbot - ktop; i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5); ns = min(i__2,i__3); ns -= ns % 2; /* ==== If there have been no deflations */ /* . in a multiple of KEXSH iterations, */ /* . then try exceptional shifts. */ /* . Otherwise use shifts provided by */ /* . DLAQR2 above or from the eigenvalues */ /* . of a trailing principal submatrix. ==== */ if (ndfl % 6 == 0) { ks = kbot - ns + 1; /* Computing MAX */ i__3 = ks + 1, i__4 = ktop + 2; i__2 = max(i__3,i__4); for (i__ = kbot; i__ >= i__2; i__ += -2) { ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); aa = ss * .75 + h__[i__ + i__ * h_dim1]; bb = ss; cc = ss * -.4375; dd = aa; _starpu_dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1] , &wr[i__], &wi[i__], &cs, &sn); /* L30: */ } if (ks == ktop) { wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; wi[ks + 1] = 0.; wr[ks] = wr[ks + 1]; wi[ks] = wi[ks + 1]; } } else { /* ==== Got NS/2 or fewer shifts? Use DLAHQR */ /* . on a trailing principal submatrix to */ /* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, */ /* . there is enough space below the subdiagonal */ /* . to fit an NS-by-NS scratch array.) ==== */ if (kbot - ks + 1 <= ns / 2) { ks = kbot - ns + 1; kt = *n - ns + 1; _starpu_dlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, & h__[kt + h_dim1], ldh); _starpu_dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, &wr[ks], &wi[ks], &c__1, & c__1, zdum, &c__1, &inf); ks += inf; /* ==== In case of a rare QR failure use */ /* . eigenvalues of the trailing 2-by-2 */ /* . principal submatrix. ==== */ if (ks >= kbot) { aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; cc = h__[kbot + (kbot - 1) * h_dim1]; bb = h__[kbot - 1 + kbot * h_dim1]; dd = h__[kbot + kbot * h_dim1]; _starpu_dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[ kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn) ; ks = kbot - 1; } } if (kbot - ks + 1 > ns) { /* ==== Sort the shifts (Helps a little) */ /* . Bubble sort keeps complex conjugate */ /* . pairs together. ==== */ sorted = FALSE_; i__2 = ks + 1; for (k = kbot; k >= i__2; --k) { if (sorted) { goto L60; } sorted = TRUE_; i__3 = k - 1; for (i__ = ks; i__ <= i__3; ++i__) { if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[ i__], abs(d__2)) < (d__3 = wr[i__ + 1] , abs(d__3)) + (d__4 = wi[i__ + 1], abs(d__4))) { sorted = FALSE_; swap = wr[i__]; wr[i__] = wr[i__ + 1]; wr[i__ + 1] = swap; swap = wi[i__]; wi[i__] = wi[i__ + 1]; wi[i__ + 1] = swap; } /* L40: */ } /* L50: */ } L60: ; } /* ==== Shuffle shifts into pairs of real shifts */ /* . and pairs of complex conjugate shifts */ /* . assuming complex conjugate shifts are */ /* . already adjacent to one another. (Yes, */ /* . they are.) ==== */ i__2 = ks + 2; for (i__ = kbot; i__ >= i__2; i__ += -2) { if (wi[i__] != -wi[i__ - 1]) { swap = wr[i__]; wr[i__] = wr[i__ - 1]; wr[i__ - 1] = wr[i__ - 2]; wr[i__ - 2] = swap; swap = wi[i__]; wi[i__] = wi[i__ - 1]; wi[i__ - 1] = wi[i__ - 2]; wi[i__ - 2] = swap; } /* L70: */ } } /* ==== If there are only two shifts and both are */ /* . real, then use only one. ==== */ if (kbot - ks + 1 == 2) { if (wi[kbot] == 0.) { if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs( d__1)) < (d__2 = wr[kbot - 1] - h__[kbot + kbot * h_dim1], abs(d__2))) { wr[kbot - 1] = wr[kbot]; } else { wr[kbot] = wr[kbot - 1]; } } } /* ==== Use up to NS of the the smallest magnatiude */ /* . shifts. If there aren't NS shifts available, */ /* . then use them all, possibly dropping one to */ /* . make the number of shifts even. ==== */ /* Computing MIN */ i__2 = ns, i__3 = kbot - ks + 1; ns = min(i__2,i__3); ns -= ns % 2; ks = kbot - ns + 1; /* ==== Small-bulge multi-shift QR sweep: */ /* . split workspace under the subdiagonal into */ /* . - a KDU-by-KDU work array U in the lower */ /* . left-hand-corner, */ /* . - a KDU-by-at-least-KDU-but-more-is-better */ /* . (KDU-by-NHo) horizontal work array WH along */ /* . the bottom edge, */ /* . - and an at-least-KDU-but-more-is-better-by-KDU */ /* . (NVE-by-KDU) vertical work WV arrow along */ /* . the left-hand-edge. ==== */ kdu = ns * 3 - 3; ku = *n - kdu + 1; kwh = kdu + 1; nho = *n - kdu - 3 - (kdu + 1) + 1; kwv = kdu + 4; nve = *n - kdu - kwv + 1; /* ==== Small-bulge multi-shift QR sweep ==== */ _starpu_dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], &wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[ z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1], ldh); } /* ==== Note progress (or the lack of it). ==== */ if (ld > 0) { ndfl = 1; } else { ++ndfl; } /* ==== End of main loop ==== */ /* L80: */ } /* ==== Iteration limit exceeded. Set INFO to show where */ /* . the problem occurred and exit. ==== */ *info = kbot; L90: ; } /* ==== Return the optimal value of LWORK. ==== */ work[1] = (doublereal) lwkopt; /* ==== End of DLAQR4 ==== */ return 0; } /* _starpu_dlaqr4_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqr5.c000066400000000000000000001031771507764646700205750ustar00rootroot00000000000000/* dlaqr5.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b7 = 0.; static doublereal c_b8 = 1.; static integer c__3 = 3; static integer c__1 = 1; static integer c__2 = 2; /* Subroutine */ int _starpu_dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, integer *kbot, integer *nshfts, doublereal *sr, doublereal *si, doublereal *h__, integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, doublereal *v, integer * ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, integer *ldwv, integer *nh, doublereal *wh, integer *ldwh) { /* System generated locals */ integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublereal d__1, d__2, d__3, d__4, d__5; /* Local variables */ integer i__, j, k, m, i2, j2, i4, j4, k1; doublereal h11, h12, h21, h22; integer m22, ns, nu; doublereal vt[3], scl; integer kdu, kms; doublereal ulp; integer knz, kzs; doublereal tst1, tst2, beta; logical blk22, bmp22; integer mend, jcol, jlen, jbot, mbot; doublereal swap; integer jtop, jrow, mtop; doublereal alpha; logical accum; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer ndcol, incol, krcol, nbmps; extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaqr1_( integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal safmax, refsum; integer mstart; doublereal smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* This auxiliary subroutine called by DLAQR0 performs a */ /* single small-bulge multi-shift QR sweep. */ /* WANTT (input) logical scalar */ /* WANTT = .true. if the quasi-triangular Schur factor */ /* is being computed. WANTT is set to .false. otherwise. */ /* WANTZ (input) logical scalar */ /* WANTZ = .true. if the orthogonal Schur factor is being */ /* computed. WANTZ is set to .false. otherwise. */ /* KACC22 (input) integer with value 0, 1, or 2. */ /* Specifies the computation mode of far-from-diagonal */ /* orthogonal updates. */ /* = 0: DLAQR5 does not accumulate reflections and does not */ /* use matrix-matrix multiply to update far-from-diagonal */ /* matrix entries. */ /* = 1: DLAQR5 accumulates reflections and uses matrix-matrix */ /* multiply to update the far-from-diagonal matrix entries. */ /* = 2: DLAQR5 accumulates reflections, uses matrix-matrix */ /* multiply to update the far-from-diagonal matrix entries, */ /* and takes advantage of 2-by-2 block structure during */ /* matrix multiplies. */ /* N (input) integer scalar */ /* N is the order of the Hessenberg matrix H upon which this */ /* subroutine operates. */ /* KTOP (input) integer scalar */ /* KBOT (input) integer scalar */ /* These are the first and last rows and columns of an */ /* isolated diagonal block upon which the QR sweep is to be */ /* applied. It is assumed without a check that */ /* either KTOP = 1 or H(KTOP,KTOP-1) = 0 */ /* and */ /* either KBOT = N or H(KBOT+1,KBOT) = 0. */ /* NSHFTS (input) integer scalar */ /* NSHFTS gives the number of simultaneous shifts. NSHFTS */ /* must be positive and even. */ /* SR (input/output) DOUBLE PRECISION array of size (NSHFTS) */ /* SI (input/output) DOUBLE PRECISION array of size (NSHFTS) */ /* SR contains the real parts and SI contains the imaginary */ /* parts of the NSHFTS shifts of origin that define the */ /* multi-shift QR sweep. On output SR and SI may be */ /* reordered. */ /* H (input/output) DOUBLE PRECISION array of size (LDH,N) */ /* On input H contains a Hessenberg matrix. On output a */ /* multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied */ /* to the isolated diagonal block in rows and columns KTOP */ /* through KBOT. */ /* LDH (input) integer scalar */ /* LDH is the leading dimension of H just as declared in the */ /* calling procedure. LDH.GE.MAX(1,N). */ /* ILOZ (input) INTEGER */ /* IHIZ (input) INTEGER */ /* Specify the rows of Z to which transformations must be */ /* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N */ /* Z (input/output) DOUBLE PRECISION array of size (LDZ,IHI) */ /* If WANTZ = .TRUE., then the QR Sweep orthogonal */ /* similarity transformation is accumulated into */ /* Z(ILOZ:IHIZ,ILO:IHI) from the right. */ /* If WANTZ = .FALSE., then Z is unreferenced. */ /* LDZ (input) integer scalar */ /* LDA is the leading dimension of Z just as declared in */ /* the calling procedure. LDZ.GE.N. */ /* V (workspace) DOUBLE PRECISION array of size (LDV,NSHFTS/2) */ /* LDV (input) integer scalar */ /* LDV is the leading dimension of V as declared in the */ /* calling procedure. LDV.GE.3. */ /* U (workspace) DOUBLE PRECISION array of size */ /* (LDU,3*NSHFTS-3) */ /* LDU (input) integer scalar */ /* LDU is the leading dimension of U just as declared in the */ /* in the calling subroutine. LDU.GE.3*NSHFTS-3. */ /* NH (input) integer scalar */ /* NH is the number of columns in array WH available for */ /* workspace. NH.GE.1. */ /* WH (workspace) DOUBLE PRECISION array of size (LDWH,NH) */ /* LDWH (input) integer scalar */ /* Leading dimension of WH just as declared in the */ /* calling procedure. LDWH.GE.3*NSHFTS-3. */ /* NV (input) integer scalar */ /* NV is the number of rows in WV agailable for workspace. */ /* NV.GE.1. */ /* WV (workspace) DOUBLE PRECISION array of size */ /* (LDWV,3*NSHFTS-3) */ /* LDWV (input) integer scalar */ /* LDWV is the leading dimension of WV as declared in the */ /* in the calling subroutine. LDWV.GE.NV. */ /* ================================================================ */ /* Based on contributions by */ /* Karen Braman and Ralph Byers, Department of Mathematics, */ /* University of Kansas, USA */ /* ================================================================ */ /* Reference: */ /* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ /* Algorithm Part I: Maintaining Well Focused Shifts, and */ /* Level 3 Performance, SIAM Journal of Matrix Analysis, */ /* volume 23, pages 929--947, 2002. */ /* ================================================================ */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* ==== If there are no shifts, then there is nothing to do. ==== */ /* Parameter adjustments */ --sr; --si; h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; wv_dim1 = *ldwv; wv_offset = 1 + wv_dim1; wv -= wv_offset; wh_dim1 = *ldwh; wh_offset = 1 + wh_dim1; wh -= wh_offset; /* Function Body */ if (*nshfts < 2) { return 0; } /* ==== If the active block is empty or 1-by-1, then there */ /* . is nothing to do. ==== */ if (*ktop >= *kbot) { return 0; } /* ==== Shuffle shifts into pairs of real shifts and pairs */ /* . of complex conjugate shifts assuming complex */ /* . conjugate shifts are already adjacent to one */ /* . another. ==== */ i__1 = *nshfts - 2; for (i__ = 1; i__ <= i__1; i__ += 2) { if (si[i__] != -si[i__ + 1]) { swap = sr[i__]; sr[i__] = sr[i__ + 1]; sr[i__ + 1] = sr[i__ + 2]; sr[i__ + 2] = swap; swap = si[i__]; si[i__] = si[i__ + 1]; si[i__ + 1] = si[i__ + 2]; si[i__ + 2] = swap; } /* L10: */ } /* ==== NSHFTS is supposed to be even, but if it is odd, */ /* . then simply reduce it by one. The shuffle above */ /* . ensures that the dropped shift is real and that */ /* . the remaining shifts are paired. ==== */ ns = *nshfts - *nshfts % 2; /* ==== Machine constants for deflation ==== */ safmin = _starpu_dlamch_("SAFE MINIMUM"); safmax = 1. / safmin; _starpu_dlabad_(&safmin, &safmax); ulp = _starpu_dlamch_("PRECISION"); smlnum = safmin * ((doublereal) (*n) / ulp); /* ==== Use accumulated reflections to update far-from-diagonal */ /* . entries ? ==== */ accum = *kacc22 == 1 || *kacc22 == 2; /* ==== If so, exploit the 2-by-2 block structure? ==== */ blk22 = ns > 2 && *kacc22 == 2; /* ==== clear trash ==== */ if (*ktop + 2 <= *kbot) { h__[*ktop + 2 + *ktop * h_dim1] = 0.; } /* ==== NBMPS = number of 2-shift bulges in the chain ==== */ nbmps = ns / 2; /* ==== KDU = width of slab ==== */ kdu = nbmps * 6 - 3; /* ==== Create and chase chains of NBMPS bulges ==== */ i__1 = *kbot - 2; i__2 = nbmps * 3 - 2; for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : incol <= i__1; incol += i__2) { ndcol = incol + kdu; if (accum) { _starpu_dlaset_("ALL", &kdu, &kdu, &c_b7, &c_b8, &u[u_offset], ldu); } /* ==== Near-the-diagonal bulge chase. The following loop */ /* . performs the near-the-diagonal part of a small bulge */ /* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal */ /* . chunk extends from column INCOL to column NDCOL */ /* . (including both column INCOL and column NDCOL). The */ /* . following loop chases a 3*NBMPS column long chain of */ /* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL */ /* . may be less than KTOP and and NDCOL may be greater than */ /* . KBOT indicating phantom columns from which to chase */ /* . bulges before they are actually introduced or to which */ /* . to chase bulges beyond column KBOT.) ==== */ /* Computing MIN */ i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2; i__3 = min(i__4,i__5); for (krcol = incol; krcol <= i__3; ++krcol) { /* ==== Bulges number MTOP to MBOT are active double implicit */ /* . shift bulges. There may or may not also be small */ /* . 2-by-2 bulge, if there is room. The inactive bulges */ /* . (if any) must wait until the active bulges have moved */ /* . down the diagonal to make room. The phantom matrix */ /* . paradigm described above helps keep track. ==== */ /* Computing MAX */ i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1; mtop = max(i__4,i__5); /* Computing MIN */ i__4 = nbmps, i__5 = (*kbot - krcol) / 3; mbot = min(i__4,i__5); m22 = mbot + 1; bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2; /* ==== Generate reflections to chase the chain right */ /* . one column. (The minimum value of K is KTOP-1.) ==== */ i__4 = mbot; for (m = mtop; m <= i__4; ++m) { k = krcol + (m - 1) * 3; if (k == *ktop - 1) { _starpu_dlaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m << 1) - 1], &si[(m << 1) - 1], &sr[m * 2], &si[m * 2], &v[m * v_dim1 + 1]); alpha = v[m * v_dim1 + 1]; _starpu_dlarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * v_dim1 + 1]); } else { beta = h__[k + 1 + k * h_dim1]; v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1]; _starpu_dlarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * v_dim1 + 1]); /* ==== A Bulge may collapse because of vigilant */ /* . deflation or destructive underflow. In the */ /* . underflow case, try the two-small-subdiagonals */ /* . trick to try to reinflate the bulge. ==== */ if (h__[k + 3 + k * h_dim1] != 0. || h__[k + 3 + (k + 1) * h_dim1] != 0. || h__[k + 3 + (k + 2) * h_dim1] == 0.) { /* ==== Typical case: not collapsed (yet). ==== */ h__[k + 1 + k * h_dim1] = beta; h__[k + 2 + k * h_dim1] = 0.; h__[k + 3 + k * h_dim1] = 0.; } else { /* ==== Atypical case: collapsed. Attempt to */ /* . reintroduce ignoring H(K+1,K) and H(K+2,K). */ /* . If the fill resulting from the new */ /* . reflector is too large, then abandon it. */ /* . Otherwise, use the new one. ==== */ _starpu_dlaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, & sr[(m << 1) - 1], &si[(m << 1) - 1], &sr[m * 2], &si[m * 2], vt); alpha = vt[0]; _starpu_dlarfg_(&c__3, &alpha, &vt[1], &c__1, vt); refsum = vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] * h__[k + 2 + k * h_dim1]); if ((d__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1], abs(d__1)) + (d__2 = refsum * vt[2], abs(d__2) ) > ulp * ((d__3 = h__[k + k * h_dim1], abs( d__3)) + (d__4 = h__[k + 1 + (k + 1) * h_dim1] , abs(d__4)) + (d__5 = h__[k + 2 + (k + 2) * h_dim1], abs(d__5)))) { /* ==== Starting a new bulge here would */ /* . create non-negligible fill. Use */ /* . the old one with trepidation. ==== */ h__[k + 1 + k * h_dim1] = beta; h__[k + 2 + k * h_dim1] = 0.; h__[k + 3 + k * h_dim1] = 0.; } else { /* ==== Stating a new bulge here would */ /* . create only negligible fill. */ /* . Replace the old reflector with */ /* . the new one. ==== */ h__[k + 1 + k * h_dim1] -= refsum; h__[k + 2 + k * h_dim1] = 0.; h__[k + 3 + k * h_dim1] = 0.; v[m * v_dim1 + 1] = vt[0]; v[m * v_dim1 + 2] = vt[1]; v[m * v_dim1 + 3] = vt[2]; } } } /* L20: */ } /* ==== Generate a 2-by-2 reflection, if needed. ==== */ k = krcol + (m22 - 1) * 3; if (bmp22) { if (k == *ktop - 1) { _starpu_dlaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[( m22 << 1) - 1], &si[(m22 << 1) - 1], &sr[m22 * 2], &si[m22 * 2], &v[m22 * v_dim1 + 1]); beta = v[m22 * v_dim1 + 1]; _starpu_dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 * v_dim1 + 1]); } else { beta = h__[k + 1 + k * h_dim1]; v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; _starpu_dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 * v_dim1 + 1]); h__[k + 1 + k * h_dim1] = beta; h__[k + 2 + k * h_dim1] = 0.; } } /* ==== Multiply H by reflections from the left ==== */ if (accum) { jbot = min(ndcol,*kbot); } else if (*wantt) { jbot = *n; } else { jbot = *kbot; } i__4 = jbot; for (j = max(*ktop,krcol); j <= i__4; ++j) { /* Computing MIN */ i__5 = mbot, i__6 = (j - krcol + 2) / 3; mend = min(i__5,i__6); i__5 = mend; for (m = mtop; m <= i__5; ++m) { k = krcol + (m - 1) * 3; refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + v[ m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + v[m * v_dim1 + 3] * h__[k + 3 + j * h_dim1]); h__[k + 1 + j * h_dim1] -= refsum; h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2]; h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3]; /* L30: */ } /* L40: */ } if (bmp22) { k = krcol + (m22 - 1) * 3; /* Computing MAX */ i__4 = k + 1; i__5 = jbot; for (j = max(i__4,*ktop); j <= i__5; ++j) { refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]); h__[k + 1 + j * h_dim1] -= refsum; h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; /* L50: */ } } /* ==== Multiply H by reflections from the right. */ /* . Delay filling in the last row until the */ /* . vigilant deflation check is complete. ==== */ if (accum) { jtop = max(*ktop,incol); } else if (*wantt) { jtop = 1; } else { jtop = *ktop; } i__5 = mbot; for (m = mtop; m <= i__5; ++m) { if (v[m * v_dim1 + 1] != 0.) { k = krcol + (m - 1) * 3; /* Computing MIN */ i__6 = *kbot, i__7 = k + 3; i__4 = min(i__6,i__7); for (j = jtop; j <= i__4; ++j) { refsum = v[m * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + v[m * v_dim1 + 2] * h__[j + (k + 2) * h_dim1] + v[m * v_dim1 + 3] * h__[j + (k + 3) * h_dim1]); h__[j + (k + 1) * h_dim1] -= refsum; h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 + 2]; h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; /* L60: */ } if (accum) { /* ==== Accumulate U. (If necessary, update Z later */ /* . with with an efficient matrix-matrix */ /* . multiply.) ==== */ kms = k - incol; /* Computing MAX */ i__4 = 1, i__6 = *ktop - incol; i__7 = kdu; for (j = max(i__4,i__6); j <= i__7; ++j) { refsum = v[m * v_dim1 + 1] * (u[j + (kms + 1) * u_dim1] + v[m * v_dim1 + 2] * u[j + (kms + 2) * u_dim1] + v[m * v_dim1 + 3] * u[j + (kms + 3) * u_dim1]); u[j + (kms + 1) * u_dim1] -= refsum; u[j + (kms + 2) * u_dim1] -= refsum * v[m * v_dim1 + 2]; u[j + (kms + 3) * u_dim1] -= refsum * v[m * v_dim1 + 3]; /* L70: */ } } else if (*wantz) { /* ==== U is not accumulated, so update Z */ /* . now by multiplying by reflections */ /* . from the right. ==== */ i__7 = *ihiz; for (j = *iloz; j <= i__7; ++j) { refsum = v[m * v_dim1 + 1] * (z__[j + (k + 1) * z_dim1] + v[m * v_dim1 + 2] * z__[j + (k + 2) * z_dim1] + v[m * v_dim1 + 3] * z__[ j + (k + 3) * z_dim1]); z__[j + (k + 1) * z_dim1] -= refsum; z__[j + (k + 2) * z_dim1] -= refsum * v[m * v_dim1 + 2]; z__[j + (k + 3) * z_dim1] -= refsum * v[m * v_dim1 + 3]; /* L80: */ } } } /* L90: */ } /* ==== Special case: 2-by-2 reflection (if needed) ==== */ k = krcol + (m22 - 1) * 3; if (bmp22 && v[m22 * v_dim1 + 1] != 0.) { /* Computing MIN */ i__7 = *kbot, i__4 = k + 3; i__5 = min(i__7,i__4); for (j = jtop; j <= i__5; ++j) { refsum = v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1]) ; h__[j + (k + 1) * h_dim1] -= refsum; h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; /* L100: */ } if (accum) { kms = k - incol; /* Computing MAX */ i__5 = 1, i__7 = *ktop - incol; i__4 = kdu; for (j = max(i__5,i__7); j <= i__4; ++j) { refsum = v[m22 * v_dim1 + 1] * (u[j + (kms + 1) * u_dim1] + v[m22 * v_dim1 + 2] * u[j + (kms + 2) * u_dim1]); u[j + (kms + 1) * u_dim1] -= refsum; u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1 + 2]; /* L110: */ } } else if (*wantz) { i__4 = *ihiz; for (j = *iloz; j <= i__4; ++j) { refsum = v[m22 * v_dim1 + 1] * (z__[j + (k + 1) * z_dim1] + v[m22 * v_dim1 + 2] * z__[j + (k + 2) * z_dim1]); z__[j + (k + 1) * z_dim1] -= refsum; z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1 + 2]; /* L120: */ } } } /* ==== Vigilant deflation check ==== */ mstart = mtop; if (krcol + (mstart - 1) * 3 < *ktop) { ++mstart; } mend = mbot; if (bmp22) { ++mend; } if (krcol == *kbot - 2) { ++mend; } i__4 = mend; for (m = mstart; m <= i__4; ++m) { /* Computing MIN */ i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3; k = min(i__5,i__7); /* ==== The following convergence test requires that */ /* . the tradition small-compared-to-nearby-diagonals */ /* . criterion and the Ahues & Tisseur (LAWN 122, 1997) */ /* . criteria both be satisfied. The latter improves */ /* . accuracy in some examples. Falling back on an */ /* . alternate convergence criterion when TST1 or TST2 */ /* . is zero (as done here) is traditional but probably */ /* . unnecessary. ==== */ if (h__[k + 1 + k * h_dim1] != 0.) { tst1 = (d__1 = h__[k + k * h_dim1], abs(d__1)) + (d__2 = h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); if (tst1 == 0.) { if (k >= *ktop + 1) { tst1 += (d__1 = h__[k + (k - 1) * h_dim1], abs( d__1)); } if (k >= *ktop + 2) { tst1 += (d__1 = h__[k + (k - 2) * h_dim1], abs( d__1)); } if (k >= *ktop + 3) { tst1 += (d__1 = h__[k + (k - 3) * h_dim1], abs( d__1)); } if (k <= *kbot - 2) { tst1 += (d__1 = h__[k + 2 + (k + 1) * h_dim1], abs(d__1)); } if (k <= *kbot - 3) { tst1 += (d__1 = h__[k + 3 + (k + 1) * h_dim1], abs(d__1)); } if (k <= *kbot - 4) { tst1 += (d__1 = h__[k + 4 + (k + 1) * h_dim1], abs(d__1)); } } /* Computing MAX */ d__2 = smlnum, d__3 = ulp * tst1; if ((d__1 = h__[k + 1 + k * h_dim1], abs(d__1)) <= max( d__2,d__3)) { /* Computing MAX */ d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs( d__2)); h12 = max(d__3,d__4); /* Computing MIN */ d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs( d__2)); h21 = min(d__3,d__4); /* Computing MAX */ d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs( d__1)), d__4 = (d__2 = h__[k + k * h_dim1] - h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); h11 = max(d__3,d__4); /* Computing MIN */ d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs( d__1)), d__4 = (d__2 = h__[k + k * h_dim1] - h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); h22 = min(d__3,d__4); scl = h11 + h12; tst2 = h22 * (h11 / scl); /* Computing MAX */ d__1 = smlnum, d__2 = ulp * tst2; if (tst2 == 0. || h21 * (h12 / scl) <= max(d__1,d__2)) { h__[k + 1 + k * h_dim1] = 0.; } } } /* L130: */ } /* ==== Fill in the last row of each bulge. ==== */ /* Computing MIN */ i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3; mend = min(i__4,i__5); i__4 = mend; for (m = mtop; m <= i__4; ++m) { k = krcol + (m - 1) * 3; refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + ( k + 3) * h_dim1]; h__[k + 4 + (k + 1) * h_dim1] = -refsum; h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2]; h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; /* L140: */ } /* ==== End of near-the-diagonal bulge chase. ==== */ /* L150: */ } /* ==== Use U (if accumulated) to update far-from-diagonal */ /* . entries in H. If required, use U to update Z as */ /* . well. ==== */ if (accum) { if (*wantt) { jtop = 1; jbot = *n; } else { jtop = *ktop; jbot = *kbot; } if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) { /* ==== Updates not exploiting the 2-by-2 block */ /* . structure of U. K1 and NU keep track of */ /* . the location and size of U in the special */ /* . cases of introducing bulges and chasing */ /* . bulges off the bottom. In these special */ /* . cases and in case the number of shifts */ /* . is NS = 2, there is no 2-by-2 block */ /* . structure to exploit. ==== */ /* Computing MAX */ i__3 = 1, i__4 = *ktop - incol; k1 = max(i__3,i__4); /* Computing MAX */ i__3 = 0, i__4 = ndcol - *kbot; nu = kdu - max(i__3,i__4) - k1 + 1; /* ==== Horizontal Multiply ==== */ i__3 = jbot; i__4 = *nh; for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 : jcol <= i__3; jcol += i__4) { /* Computing MIN */ i__5 = *nh, i__7 = jbot - jcol + 1; jlen = min(i__5,i__7); _starpu_dgemm_("C", "N", &nu, &jlen, &nu, &c_b8, &u[k1 + k1 * u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1], ldh, &c_b7, &wh[wh_offset], ldwh); _starpu_dlacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[ incol + k1 + jcol * h_dim1], ldh); /* L160: */ } /* ==== Vertical multiply ==== */ i__4 = max(*ktop,incol) - 1; i__3 = *nv; for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; jrow += i__3) { /* Computing MIN */ i__5 = *nv, i__7 = max(*ktop,incol) - jrow; jlen = min(i__5,i__7); _starpu_dgemm_("N", "N", &jlen, &nu, &nu, &c_b8, &h__[jrow + ( incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv); _starpu_dlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[ jrow + (incol + k1) * h_dim1], ldh); /* L170: */ } /* ==== Z multiply (also vertical) ==== */ if (*wantz) { i__3 = *ihiz; i__4 = *nv; for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; jrow += i__4) { /* Computing MIN */ i__5 = *nv, i__7 = *ihiz - jrow + 1; jlen = min(i__5,i__7); _starpu_dgemm_("N", "N", &jlen, &nu, &nu, &c_b8, &z__[jrow + ( incol + k1) * z_dim1], ldz, &u[k1 + k1 * u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv); _starpu_dlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[ jrow + (incol + k1) * z_dim1], ldz) ; /* L180: */ } } } else { /* ==== Updates exploiting U's 2-by-2 block structure. */ /* . (I2, I4, J2, J4 are the last rows and columns */ /* . of the blocks.) ==== */ i2 = (kdu + 1) / 2; i4 = kdu; j2 = i4 - i2; j4 = kdu; /* ==== KZS and KNZ deal with the band of zeros */ /* . along the diagonal of one of the triangular */ /* . blocks. ==== */ kzs = j4 - j2 - (ns + 1); knz = ns + 1; /* ==== Horizontal multiply ==== */ i__4 = jbot; i__3 = *nh; for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 : jcol <= i__4; jcol += i__3) { /* Computing MIN */ i__5 = *nh, i__7 = jbot - jcol + 1; jlen = min(i__5,i__7); /* ==== Copy bottom of H to top+KZS of scratch ==== */ /* (The first KZS rows get multiplied by zero.) ==== */ _starpu_dlacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh); /* ==== Multiply by U21' ==== */ _starpu_dlaset_("ALL", &kzs, &jlen, &c_b7, &c_b7, &wh[wh_offset], ldwh); _starpu_dtrmm_("L", "U", "C", "N", &knz, &jlen, &c_b8, &u[j2 + 1 + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1] , ldwh); /* ==== Multiply top of H by U11' ==== */ _starpu_dgemm_("C", "N", &i2, &jlen, &j2, &c_b8, &u[u_offset], ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b8, &wh[wh_offset], ldwh); /* ==== Copy top of H to bottom of WH ==== */ _starpu_dlacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1] , ldh, &wh[i2 + 1 + wh_dim1], ldwh); /* ==== Multiply by U21' ==== */ _starpu_dtrmm_("L", "L", "C", "N", &j2, &jlen, &c_b8, &u[(i2 + 1) * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh); /* ==== Multiply by U22 ==== */ i__5 = i4 - i2; i__7 = j4 - j2; _starpu_dgemm_("C", "N", &i__5, &jlen, &i__7, &c_b8, &u[j2 + 1 + ( i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 + jcol * h_dim1], ldh, &c_b8, &wh[i2 + 1 + wh_dim1], ldwh); /* ==== Copy it back ==== */ _starpu_dlacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[ incol + 1 + jcol * h_dim1], ldh); /* L190: */ } /* ==== Vertical multiply ==== */ i__3 = max(incol,*ktop) - 1; i__4 = *nv; for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; jrow += i__4) { /* Computing MIN */ i__5 = *nv, i__7 = max(incol,*ktop) - jrow; jlen = min(i__5,i__7); /* ==== Copy right of H to scratch (the first KZS */ /* . columns get multiplied by zero) ==== */ _starpu_dlacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) * h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv); /* ==== Multiply by U21 ==== */ _starpu_dlaset_("ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[wv_offset], ldwv); _starpu_dtrmm_("R", "U", "N", "N", &jlen, &knz, &c_b8, &u[j2 + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * wv_dim1 + 1], ldwv); /* ==== Multiply by U11 ==== */ _starpu_dgemm_("N", "N", &jlen, &i2, &j2, &c_b8, &h__[jrow + ( incol + 1) * h_dim1], ldh, &u[u_offset], ldu, & c_b8, &wv[wv_offset], ldwv); /* ==== Copy left of H to right of scratch ==== */ _starpu_dlacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) * h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); /* ==== Multiply by U21 ==== */ i__5 = i4 - i2; _starpu_dtrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b8, &u[(i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1] , ldwv); /* ==== Multiply by U22 ==== */ i__5 = i4 - i2; i__7 = j4 - j2; _starpu_dgemm_("N", "N", &jlen, &i__5, &i__7, &c_b8, &h__[jrow + ( incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2 + 1) * u_dim1], ldu, &c_b8, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); /* ==== Copy it back ==== */ _starpu_dlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[ jrow + (incol + 1) * h_dim1], ldh); /* L200: */ } /* ==== Multiply Z (also vertical) ==== */ if (*wantz) { i__4 = *ihiz; i__3 = *nv; for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; jrow += i__3) { /* Computing MIN */ i__5 = *nv, i__7 = *ihiz - jrow + 1; jlen = min(i__5,i__7); /* ==== Copy right of Z to left of scratch (first */ /* . KZS columns get multiplied by zero) ==== */ _starpu_dlacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 + j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 + 1], ldwv); /* ==== Multiply by U12 ==== */ _starpu_dlaset_("ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[ wv_offset], ldwv); _starpu_dtrmm_("R", "U", "N", "N", &jlen, &knz, &c_b8, &u[j2 + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * wv_dim1 + 1], ldwv); /* ==== Multiply by U11 ==== */ _starpu_dgemm_("N", "N", &jlen, &i2, &j2, &c_b8, &z__[jrow + ( incol + 1) * z_dim1], ldz, &u[u_offset], ldu, &c_b8, &wv[wv_offset], ldwv); /* ==== Copy left of Z to right of scratch ==== */ _starpu_dlacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) * z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); /* ==== Multiply by U21 ==== */ i__5 = i4 - i2; _starpu_dtrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b8, &u[( i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); /* ==== Multiply by U22 ==== */ i__5 = i4 - i2; i__7 = j4 - j2; _starpu_dgemm_("N", "N", &jlen, &i__5, &i__7, &c_b8, &z__[ jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2 + 1 + (i2 + 1) * u_dim1], ldu, &c_b8, &wv[(i2 + 1) * wv_dim1 + 1], ldwv); /* ==== Copy the result back to Z ==== */ _starpu_dlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, & z__[jrow + (incol + 1) * z_dim1], ldz); /* L210: */ } } } } /* L220: */ } /* ==== End of DLAQR5 ==== */ return 0; } /* _starpu_dlaqr5_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqsb.c000066400000000000000000000124331507764646700206450ustar00rootroot00000000000000/* dlaqsb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaqsb_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, char *equed) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j; doublereal cj, large; extern logical _starpu_lsame_(char *, char *); doublereal small; extern doublereal _starpu_dlamch_(char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAQSB equilibrates a symmetric band matrix A using the scaling */ /* factors in the vector S. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of super-diagonals of the matrix A if UPLO = 'U', */ /* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* On exit, if INFO = 0, the triangular factor U or L from the */ /* Cholesky factorization A = U'*U or A = L*L' of the band */ /* matrix A, in the same storage format as A. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* S (input) DOUBLE PRECISION array, dimension (N) */ /* The scale factors for A. */ /* SCOND (input) DOUBLE PRECISION */ /* Ratio of the smallest S(i) to the largest S(i). */ /* AMAX (input) DOUBLE PRECISION */ /* Absolute value of largest matrix entry. */ /* EQUED (output) CHARACTER*1 */ /* Specifies whether or not equilibration was done. */ /* = 'N': No equilibration. */ /* = 'Y': Equilibration was done, i.e., A has been replaced by */ /* diag(S) * A * diag(S). */ /* Internal Parameters */ /* =================== */ /* THRESH is a threshold value used to decide if scaling should be done */ /* based on the ratio of the scaling factors. If SCOND < THRESH, */ /* scaling is done. */ /* LARGE and SMALL are threshold values used to decide if scaling should */ /* be done based on the absolute size of the largest matrix element. */ /* If AMAX > LARGE or AMAX < SMALL, scaling is done. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --s; /* Function Body */ if (*n <= 0) { *(unsigned char *)equed = 'N'; return 0; } /* Initialize LARGE and SMALL. */ small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); large = 1. / small; if (*scond >= .1 && *amax >= small && *amax <= large) { /* No equilibration */ *(unsigned char *)equed = 'N'; } else { /* Replace A by diag(S) * A * diag(S). */ if (_starpu_lsame_(uplo, "U")) { /* Upper triangle of A is stored in band format. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = s[j]; /* Computing MAX */ i__2 = 1, i__3 = j - *kd; i__4 = j; for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { ab[*kd + 1 + i__ - j + j * ab_dim1] = cj * s[i__] * ab[* kd + 1 + i__ - j + j * ab_dim1]; /* L10: */ } /* L20: */ } } else { /* Lower triangle of A is stored. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = s[j]; /* Computing MIN */ i__2 = *n, i__3 = j + *kd; i__4 = min(i__2,i__3); for (i__ = j; i__ <= i__4; ++i__) { ab[i__ + 1 - j + j * ab_dim1] = cj * s[i__] * ab[i__ + 1 - j + j * ab_dim1]; /* L30: */ } /* L40: */ } } *(unsigned char *)equed = 'Y'; } return 0; /* End of DLAQSB */ } /* _starpu_dlaqsb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqsp.c000066400000000000000000000110621507764646700206600ustar00rootroot00000000000000/* dlaqsp.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaqsp_(char *uplo, integer *n, doublereal *ap, doublereal *s, doublereal *scond, doublereal *amax, char *equed) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, j, jc; doublereal cj, large; extern logical _starpu_lsame_(char *, char *); doublereal small; extern doublereal _starpu_dlamch_(char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAQSP equilibrates a symmetric matrix A using the scaling factors */ /* in the vector S. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, the equilibrated matrix: diag(S) * A * diag(S), in */ /* the same storage format as A. */ /* S (input) DOUBLE PRECISION array, dimension (N) */ /* The scale factors for A. */ /* SCOND (input) DOUBLE PRECISION */ /* Ratio of the smallest S(i) to the largest S(i). */ /* AMAX (input) DOUBLE PRECISION */ /* Absolute value of largest matrix entry. */ /* EQUED (output) CHARACTER*1 */ /* Specifies whether or not equilibration was done. */ /* = 'N': No equilibration. */ /* = 'Y': Equilibration was done, i.e., A has been replaced by */ /* diag(S) * A * diag(S). */ /* Internal Parameters */ /* =================== */ /* THRESH is a threshold value used to decide if scaling should be done */ /* based on the ratio of the scaling factors. If SCOND < THRESH, */ /* scaling is done. */ /* LARGE and SMALL are threshold values used to decide if scaling should */ /* be done based on the absolute size of the largest matrix element. */ /* If AMAX > LARGE or AMAX < SMALL, scaling is done. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ --s; --ap; /* Function Body */ if (*n <= 0) { *(unsigned char *)equed = 'N'; return 0; } /* Initialize LARGE and SMALL. */ small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); large = 1. / small; if (*scond >= .1 && *amax >= small && *amax <= large) { /* No equilibration */ *(unsigned char *)equed = 'N'; } else { /* Replace A by diag(S) * A * diag(S). */ if (_starpu_lsame_(uplo, "U")) { /* Upper triangle of A is stored. */ jc = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = s[j]; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { ap[jc + i__ - 1] = cj * s[i__] * ap[jc + i__ - 1]; /* L10: */ } jc += j; /* L20: */ } } else { /* Lower triangle of A is stored. */ jc = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = s[j]; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ap[jc + i__ - j] = cj * s[i__] * ap[jc + i__ - j]; /* L30: */ } jc = jc + *n - j + 1; /* L40: */ } } *(unsigned char *)equed = 'Y'; } return 0; /* End of DLAQSP */ } /* _starpu_dlaqsp_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqsy.c000066400000000000000000000114611507764646700206740ustar00rootroot00000000000000/* dlaqsy.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaqsy_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *s, doublereal *scond, doublereal *amax, char *equed) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j; doublereal cj, large; extern logical _starpu_lsame_(char *, char *); doublereal small; extern doublereal _starpu_dlamch_(char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAQSY equilibrates a symmetric matrix A using the scaling factors */ /* in the vector S. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n by n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n by n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if EQUED = 'Y', the equilibrated matrix: */ /* diag(S) * A * diag(S). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(N,1). */ /* S (input) DOUBLE PRECISION array, dimension (N) */ /* The scale factors for A. */ /* SCOND (input) DOUBLE PRECISION */ /* Ratio of the smallest S(i) to the largest S(i). */ /* AMAX (input) DOUBLE PRECISION */ /* Absolute value of largest matrix entry. */ /* EQUED (output) CHARACTER*1 */ /* Specifies whether or not equilibration was done. */ /* = 'N': No equilibration. */ /* = 'Y': Equilibration was done, i.e., A has been replaced by */ /* diag(S) * A * diag(S). */ /* Internal Parameters */ /* =================== */ /* THRESH is a threshold value used to decide if scaling should be done */ /* based on the ratio of the scaling factors. If SCOND < THRESH, */ /* scaling is done. */ /* LARGE and SMALL are threshold values used to decide if scaling should */ /* be done based on the absolute size of the largest matrix element. */ /* If AMAX > LARGE or AMAX < SMALL, scaling is done. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --s; /* Function Body */ if (*n <= 0) { *(unsigned char *)equed = 'N'; return 0; } /* Initialize LARGE and SMALL. */ small = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); large = 1. / small; if (*scond >= .1 && *amax >= small && *amax <= large) { /* No equilibration */ *(unsigned char *)equed = 'N'; } else { /* Replace A by diag(S) * A * diag(S). */ if (_starpu_lsame_(uplo, "U")) { /* Upper triangle of A is stored. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = s[j]; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = cj * s[i__] * a[i__ + j * a_dim1]; /* L10: */ } /* L20: */ } } else { /* Lower triangle of A is stored. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = s[j]; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = cj * s[i__] * a[i__ + j * a_dim1]; /* L30: */ } /* L40: */ } } *(unsigned char *)equed = 'Y'; } return 0; /* End of DLAQSY */ } /* _starpu_dlaqsy_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaqtr.c000066400000000000000000000517511507764646700206740ustar00rootroot00000000000000/* dlaqtr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static logical c_false = FALSE_; static integer c__2 = 2; static doublereal c_b21 = 1.; static doublereal c_b25 = 0.; static logical c_true = TRUE_; /* Subroutine */ int _starpu_dlaqtr_(logical *ltran, logical *lreal, integer *n, doublereal *t, integer *ldt, doublereal *b, doublereal *w, doublereal *scale, doublereal *x, doublereal *work, integer *info) { /* System generated locals */ integer t_dim1, t_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Local variables */ doublereal d__[4] /* was [2][2] */; integer i__, j, k; doublereal v[4] /* was [2][2] */, z__; integer j1, j2, n1, n2; doublereal si, xj, sr, rec, eps, tjj, tmp; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer ierr; doublereal smin, xmax; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer jnext; doublereal sminw, xnorm; extern /* Subroutine */ int _starpu_dlaln2_(logical *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal * , doublereal *, integer *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); doublereal scaloc; extern /* Subroutine */ int _starpu_dladiv_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal bignum; logical notran; doublereal smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAQTR solves the real quasi-triangular system */ /* op(T)*p = scale*c, if LREAL = .TRUE. */ /* or the complex quasi-triangular systems */ /* op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. */ /* in real arithmetic, where T is upper quasi-triangular. */ /* If LREAL = .FALSE., then the first diagonal block of T must be */ /* 1 by 1, B is the specially structured matrix */ /* B = [ b(1) b(2) ... b(n) ] */ /* [ w ] */ /* [ w ] */ /* [ . ] */ /* [ w ] */ /* op(A) = A or A', A' denotes the conjugate transpose of */ /* matrix A. */ /* On input, X = [ c ]. On output, X = [ p ]. */ /* [ d ] [ q ] */ /* This subroutine is designed for the condition number estimation */ /* in routine DTRSNA. */ /* Arguments */ /* ========= */ /* LTRAN (input) LOGICAL */ /* On entry, LTRAN specifies the option of conjugate transpose: */ /* = .FALSE., op(T+i*B) = T+i*B, */ /* = .TRUE., op(T+i*B) = (T+i*B)'. */ /* LREAL (input) LOGICAL */ /* On entry, LREAL specifies the input matrix structure: */ /* = .FALSE., the input is complex */ /* = .TRUE., the input is real */ /* N (input) INTEGER */ /* On entry, N specifies the order of T+i*B. N >= 0. */ /* T (input) DOUBLE PRECISION array, dimension (LDT,N) */ /* On entry, T contains a matrix in Schur canonical form. */ /* If LREAL = .FALSE., then the first diagonal block of T mu */ /* be 1 by 1. */ /* LDT (input) INTEGER */ /* The leading dimension of the matrix T. LDT >= max(1,N). */ /* B (input) DOUBLE PRECISION array, dimension (N) */ /* On entry, B contains the elements to form the matrix */ /* B as described above. */ /* If LREAL = .TRUE., B is not referenced. */ /* W (input) DOUBLE PRECISION */ /* On entry, W is the diagonal element of the matrix B. */ /* If LREAL = .TRUE., W is not referenced. */ /* SCALE (output) DOUBLE PRECISION */ /* On exit, SCALE is the scale factor. */ /* X (input/output) DOUBLE PRECISION array, dimension (2*N) */ /* On entry, X contains the right hand side of the system. */ /* On exit, X is overwritten by the solution. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* On exit, INFO is set to */ /* 0: successful exit. */ /* 1: the some diagonal 1 by 1 block has been perturbed by */ /* a small number SMIN to keep nonsingularity. */ /* 2: the some diagonal 2 by 2 block has been perturbed by */ /* a small number in DLALN2 to keep nonsingularity. */ /* NOTE: In the interests of speed, this routine does not */ /* check the inputs for errors. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Do not test the input parameters for errors */ /* Parameter adjustments */ t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; --b; --x; --work; /* Function Body */ notran = ! (*ltran); *info = 0; /* Quick return if possible */ if (*n == 0) { return 0; } /* Set constants to control overflow */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S") / eps; bignum = 1. / smlnum; xnorm = _starpu_dlange_("M", n, n, &t[t_offset], ldt, d__); if (! (*lreal)) { /* Computing MAX */ d__1 = xnorm, d__2 = abs(*w), d__1 = max(d__1,d__2), d__2 = _starpu_dlange_( "M", n, &c__1, &b[1], n, d__); xnorm = max(d__1,d__2); } /* Computing MAX */ d__1 = smlnum, d__2 = eps * xnorm; smin = max(d__1,d__2); /* Compute 1-norm of each column of strictly upper triangular */ /* part of T to control overflow in triangular solver. */ work[1] = 0.; i__1 = *n; for (j = 2; j <= i__1; ++j) { i__2 = j - 1; work[j] = _starpu_dasum_(&i__2, &t[j * t_dim1 + 1], &c__1); /* L10: */ } if (! (*lreal)) { i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { work[i__] += (d__1 = b[i__], abs(d__1)); /* L20: */ } } n2 = *n << 1; n1 = *n; if (! (*lreal)) { n1 = n2; } k = _starpu_idamax_(&n1, &x[1], &c__1); xmax = (d__1 = x[k], abs(d__1)); *scale = 1.; if (xmax > bignum) { *scale = bignum / xmax; _starpu_dscal_(&n1, scale, &x[1], &c__1); xmax = bignum; } if (*lreal) { if (notran) { /* Solve T*p = scale*c */ jnext = *n; for (j = *n; j >= 1; --j) { if (j > jnext) { goto L30; } j1 = j; j2 = j; jnext = j - 1; if (j > 1) { if (t[j + (j - 1) * t_dim1] != 0.) { j1 = j - 1; jnext = j - 2; } } if (j1 == j2) { /* Meet 1 by 1 diagonal block */ /* Scale to avoid overflow when computing */ /* x(j) = b(j)/T(j,j) */ xj = (d__1 = x[j1], abs(d__1)); tjj = (d__1 = t[j1 + j1 * t_dim1], abs(d__1)); tmp = t[j1 + j1 * t_dim1]; if (tjj < smin) { tmp = smin; tjj = smin; *info = 1; } if (xj == 0.) { goto L30; } if (tjj < 1.) { if (xj > bignum * tjj) { rec = 1. / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j1] /= tmp; xj = (d__1 = x[j1], abs(d__1)); /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j1 of T. */ if (xj > 1.) { rec = 1. / xj; if (work[j1] > (bignum - xmax) * rec) { _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } if (j1 > 1) { i__1 = j1 - 1; d__1 = -x[j1]; _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; k = _starpu_idamax_(&i__1, &x[1], &c__1); xmax = (d__1 = x[k], abs(d__1)); } } else { /* Meet 2 by 2 diagonal block */ /* Call 2 by 2 linear system solve, to take */ /* care of possible overflow by scaling factor. */ d__[0] = x[j1]; d__[1] = x[j2]; _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b21, &t[j1 + j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, & c_b25, &c_b25, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.) { _starpu_dscal_(n, &scaloc, &x[1], &c__1); *scale *= scaloc; } x[j1] = v[0]; x[j2] = v[1]; /* Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2)) */ /* to avoid overflow in updating right-hand side. */ /* Computing MAX */ d__1 = abs(v[0]), d__2 = abs(v[1]); xj = max(d__1,d__2); if (xj > 1.) { rec = 1. / xj; /* Computing MAX */ d__1 = work[j1], d__2 = work[j2]; if (max(d__1,d__2) > (bignum - xmax) * rec) { _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } /* Update right-hand side */ if (j1 > 1) { i__1 = j1 - 1; d__1 = -x[j1]; _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; d__1 = -x[j2]; _starpu_daxpy_(&i__1, &d__1, &t[j2 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; k = _starpu_idamax_(&i__1, &x[1], &c__1); xmax = (d__1 = x[k], abs(d__1)); } } L30: ; } } else { /* Solve T'*p = scale*c */ jnext = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (j < jnext) { goto L40; } j1 = j; j2 = j; jnext = j + 1; if (j < *n) { if (t[j + 1 + j * t_dim1] != 0.) { j2 = j + 1; jnext = j + 2; } } if (j1 == j2) { /* 1 by 1 diagonal block */ /* Scale if necessary to avoid overflow in forming the */ /* right-hand side element by inner product. */ xj = (d__1 = x[j1], abs(d__1)); if (xmax > 1.) { rec = 1. / xmax; if (work[j1] > (bignum - xj) * rec) { _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; x[j1] -= _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], & c__1); xj = (d__1 = x[j1], abs(d__1)); tjj = (d__1 = t[j1 + j1 * t_dim1], abs(d__1)); tmp = t[j1 + j1 * t_dim1]; if (tjj < smin) { tmp = smin; tjj = smin; *info = 1; } if (tjj < 1.) { if (xj > bignum * tjj) { rec = 1. / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j1] /= tmp; /* Computing MAX */ d__2 = xmax, d__3 = (d__1 = x[j1], abs(d__1)); xmax = max(d__2,d__3); } else { /* 2 by 2 diagonal block */ /* Scale if necessary to avoid overflow in forming the */ /* right-hand side elements by inner product. */ /* Computing MAX */ d__3 = (d__1 = x[j1], abs(d__1)), d__4 = (d__2 = x[j2], abs(d__2)); xj = max(d__3,d__4); if (xmax > 1.) { rec = 1. / xmax; /* Computing MAX */ d__1 = work[j2], d__2 = work[j1]; if (max(d__1,d__2) > (bignum - xj) * rec) { _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; d__[0] = x[j1] - _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], &c__1); i__2 = j1 - 1; d__[1] = x[j2] - _starpu_ddot_(&i__2, &t[j2 * t_dim1 + 1], &c__1, &x[1], &c__1); _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b21, &t[j1 + j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, &c_b25, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.) { _starpu_dscal_(n, &scaloc, &x[1], &c__1); *scale *= scaloc; } x[j1] = v[0]; x[j2] = v[1]; /* Computing MAX */ d__3 = (d__1 = x[j1], abs(d__1)), d__4 = (d__2 = x[j2], abs(d__2)), d__3 = max(d__3,d__4); xmax = max(d__3,xmax); } L40: ; } } } else { /* Computing MAX */ d__1 = eps * abs(*w); sminw = max(d__1,smin); if (notran) { /* Solve (T + iB)*(p+iq) = c+id */ jnext = *n; for (j = *n; j >= 1; --j) { if (j > jnext) { goto L70; } j1 = j; j2 = j; jnext = j - 1; if (j > 1) { if (t[j + (j - 1) * t_dim1] != 0.) { j1 = j - 1; jnext = j - 2; } } if (j1 == j2) { /* 1 by 1 diagonal block */ /* Scale if necessary to avoid overflow in division */ z__ = *w; if (j1 == 1) { z__ = b[1]; } xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs( d__2)); tjj = (d__1 = t[j1 + j1 * t_dim1], abs(d__1)) + abs(z__); tmp = t[j1 + j1 * t_dim1]; if (tjj < sminw) { tmp = sminw; tjj = sminw; *info = 1; } if (xj == 0.) { goto L70; } if (tjj < 1.) { if (xj > bignum * tjj) { rec = 1. / xj; _starpu_dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } _starpu_dladiv_(&x[j1], &x[*n + j1], &tmp, &z__, &sr, &si); x[j1] = sr; x[*n + j1] = si; xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs( d__2)); /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j1 of T. */ if (xj > 1.) { rec = 1. / xj; if (work[j1] > (bignum - xmax) * rec) { _starpu_dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; } } if (j1 > 1) { i__1 = j1 - 1; d__1 = -x[j1]; _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; d__1 = -x[*n + j1]; _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[* n + 1], &c__1); x[1] += b[j1] * x[*n + j1]; x[*n + 1] -= b[j1] * x[j1]; xmax = 0.; i__1 = j1 - 1; for (k = 1; k <= i__1; ++k) { /* Computing MAX */ d__3 = xmax, d__4 = (d__1 = x[k], abs(d__1)) + ( d__2 = x[k + *n], abs(d__2)); xmax = max(d__3,d__4); /* L50: */ } } } else { /* Meet 2 by 2 diagonal block */ d__[0] = x[j1]; d__[1] = x[j2]; d__[2] = x[*n + j1]; d__[3] = x[*n + j2]; d__1 = -(*w); _starpu_dlaln2_(&c_false, &c__2, &c__2, &sminw, &c_b21, &t[j1 + j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, & c_b25, &d__1, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.) { i__1 = *n << 1; _starpu_dscal_(&i__1, &scaloc, &x[1], &c__1); *scale = scaloc * *scale; } x[j1] = v[0]; x[j2] = v[1]; x[*n + j1] = v[2]; x[*n + j2] = v[3]; /* Scale X(J1), .... to avoid overflow in */ /* updating right hand side. */ /* Computing MAX */ d__1 = abs(v[0]) + abs(v[2]), d__2 = abs(v[1]) + abs(v[3]) ; xj = max(d__1,d__2); if (xj > 1.) { rec = 1. / xj; /* Computing MAX */ d__1 = work[j1], d__2 = work[j2]; if (max(d__1,d__2) > (bignum - xmax) * rec) { _starpu_dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; } } /* Update the right-hand side. */ if (j1 > 1) { i__1 = j1 - 1; d__1 = -x[j1]; _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; d__1 = -x[j2]; _starpu_daxpy_(&i__1, &d__1, &t[j2 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; d__1 = -x[*n + j1]; _starpu_daxpy_(&i__1, &d__1, &t[j1 * t_dim1 + 1], &c__1, &x[* n + 1], &c__1); i__1 = j1 - 1; d__1 = -x[*n + j2]; _starpu_daxpy_(&i__1, &d__1, &t[j2 * t_dim1 + 1], &c__1, &x[* n + 1], &c__1); x[1] = x[1] + b[j1] * x[*n + j1] + b[j2] * x[*n + j2]; x[*n + 1] = x[*n + 1] - b[j1] * x[j1] - b[j2] * x[j2]; xmax = 0.; i__1 = j1 - 1; for (k = 1; k <= i__1; ++k) { /* Computing MAX */ d__3 = (d__1 = x[k], abs(d__1)) + (d__2 = x[k + * n], abs(d__2)); xmax = max(d__3,xmax); /* L60: */ } } } L70: ; } } else { /* Solve (T + iB)'*(p+iq) = c+id */ jnext = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (j < jnext) { goto L80; } j1 = j; j2 = j; jnext = j + 1; if (j < *n) { if (t[j + 1 + j * t_dim1] != 0.) { j2 = j + 1; jnext = j + 2; } } if (j1 == j2) { /* 1 by 1 diagonal block */ /* Scale if necessary to avoid overflow in forming the */ /* right-hand side element by inner product. */ xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs( d__2)); if (xmax > 1.) { rec = 1. / xmax; if (work[j1] > (bignum - xj) * rec) { _starpu_dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; x[j1] -= _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], & c__1); i__2 = j1 - 1; x[*n + j1] -= _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[ *n + 1], &c__1); if (j1 > 1) { x[j1] -= b[j1] * x[*n + 1]; x[*n + j1] += b[j1] * x[1]; } xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs( d__2)); z__ = *w; if (j1 == 1) { z__ = b[1]; } /* Scale if necessary to avoid overflow in */ /* complex division */ tjj = (d__1 = t[j1 + j1 * t_dim1], abs(d__1)) + abs(z__); tmp = t[j1 + j1 * t_dim1]; if (tjj < sminw) { tmp = sminw; tjj = sminw; *info = 1; } if (tjj < 1.) { if (xj > bignum * tjj) { rec = 1. / xj; _starpu_dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } d__1 = -z__; _starpu_dladiv_(&x[j1], &x[*n + j1], &tmp, &d__1, &sr, &si); x[j1] = sr; x[j1 + *n] = si; /* Computing MAX */ d__3 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs(d__2)); xmax = max(d__3,xmax); } else { /* 2 by 2 diagonal block */ /* Scale if necessary to avoid overflow in forming the */ /* right-hand side element by inner product. */ /* Computing MAX */ d__5 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs(d__2)), d__6 = (d__3 = x[j2], abs(d__3)) + ( d__4 = x[*n + j2], abs(d__4)); xj = max(d__5,d__6); if (xmax > 1.) { rec = 1. / xmax; /* Computing MAX */ d__1 = work[j1], d__2 = work[j2]; if (max(d__1,d__2) > (bignum - xj) / xmax) { _starpu_dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; d__[0] = x[j1] - _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], &c__1); i__2 = j1 - 1; d__[1] = x[j2] - _starpu_ddot_(&i__2, &t[j2 * t_dim1 + 1], &c__1, &x[1], &c__1); i__2 = j1 - 1; d__[2] = x[*n + j1] - _starpu_ddot_(&i__2, &t[j1 * t_dim1 + 1], & c__1, &x[*n + 1], &c__1); i__2 = j1 - 1; d__[3] = x[*n + j2] - _starpu_ddot_(&i__2, &t[j2 * t_dim1 + 1], & c__1, &x[*n + 1], &c__1); d__[0] -= b[j1] * x[*n + 1]; d__[1] -= b[j2] * x[*n + 1]; d__[2] += b[j1] * x[1]; d__[3] += b[j2] * x[1]; _starpu_dlaln2_(&c_true, &c__2, &c__2, &sminw, &c_b21, &t[j1 + j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, & c_b25, w, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.) { _starpu_dscal_(&n2, &scaloc, &x[1], &c__1); *scale = scaloc * *scale; } x[j1] = v[0]; x[j2] = v[1]; x[*n + j1] = v[2]; x[*n + j2] = v[3]; /* Computing MAX */ d__5 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs(d__2)), d__6 = (d__3 = x[j2], abs(d__3)) + ( d__4 = x[*n + j2], abs(d__4)), d__5 = max(d__5, d__6); xmax = max(d__5,xmax); } L80: ; } } } return 0; /* End of DLAQTR */ } /* _starpu_dlaqtr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlar1v.c000066400000000000000000000311261507764646700205700ustar00rootroot00000000000000/* dlar1v.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlar1v_(integer *n, integer *b1, integer *bn, doublereal *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal * lld, doublereal *pivmin, doublereal *gaptol, doublereal *z__, logical *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma, integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid, doublereal *rqcorr, doublereal *work) { /* System generated locals */ integer i__1; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal s; integer r1, r2; doublereal eps, tmp; integer neg1, neg2, indp, inds; doublereal dplus; extern doublereal _starpu_dlamch_(char *); extern logical _starpu_disnan_(doublereal *); integer indlpl, indumn; doublereal dminus; logical sawnan1, sawnan2; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAR1V computes the (scaled) r-th column of the inverse of */ /* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */ /* L D L^T - sigma I. When sigma is close to an eigenvalue, the */ /* computed vector is an accurate eigenvector. Usually, r corresponds */ /* to the index where the eigenvector is largest in magnitude. */ /* The following steps accomplish this computation : */ /* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */ /* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */ /* (c) Computation of the diagonal elements of the inverse of */ /* L D L^T - sigma I by combining the above transforms, and choosing */ /* r as the index where the diagonal of the inverse is (one of the) */ /* largest in magnitude. */ /* (d) Computation of the (scaled) r-th column of the inverse using the */ /* twisted factorization obtained by combining the top part of the */ /* the stationary and the bottom part of the progressive transform. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix L D L^T. */ /* B1 (input) INTEGER */ /* First index of the submatrix of L D L^T. */ /* BN (input) INTEGER */ /* Last index of the submatrix of L D L^T. */ /* LAMBDA (input) DOUBLE PRECISION */ /* The shift. In order to compute an accurate eigenvector, */ /* LAMBDA should be a good approximation to an eigenvalue */ /* of L D L^T. */ /* L (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) subdiagonal elements of the unit bidiagonal matrix */ /* L, in elements 1 to N-1. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the diagonal matrix D. */ /* LD (input) DOUBLE PRECISION array, dimension (N-1) */ /* The n-1 elements L(i)*D(i). */ /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ /* The n-1 elements L(i)*L(i)*D(i). */ /* PIVMIN (input) DOUBLE PRECISION */ /* The minimum pivot in the Sturm sequence. */ /* GAPTOL (input) DOUBLE PRECISION */ /* Tolerance that indicates when eigenvector entries are negligible */ /* w.r.t. their contribution to the residual. */ /* Z (input/output) DOUBLE PRECISION array, dimension (N) */ /* On input, all entries of Z must be set to 0. */ /* On output, Z contains the (scaled) r-th column of the */ /* inverse. The scaling is such that Z(R) equals 1. */ /* WANTNC (input) LOGICAL */ /* Specifies whether NEGCNT has to be computed. */ /* NEGCNT (output) INTEGER */ /* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */ /* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */ /* ZTZ (output) DOUBLE PRECISION */ /* The square of the 2-norm of Z. */ /* MINGMA (output) DOUBLE PRECISION */ /* The reciprocal of the largest (in magnitude) diagonal */ /* element of the inverse of L D L^T - sigma I. */ /* R (input/output) INTEGER */ /* The twist index for the twisted factorization used to */ /* compute Z. */ /* On input, 0 <= R <= N. If R is input as 0, R is set to */ /* the index where (L D L^T - sigma I)^{-1} is largest */ /* in magnitude. If 1 <= R <= N, R is unchanged. */ /* On output, R contains the twist index used to compute Z. */ /* Ideally, R designates the position of the maximum entry in the */ /* eigenvector. */ /* ISUPPZ (output) INTEGER array, dimension (2) */ /* The support of the vector in Z, i.e., the vector Z is */ /* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */ /* NRMINV (output) DOUBLE PRECISION */ /* NRMINV = 1/SQRT( ZTZ ) */ /* RESID (output) DOUBLE PRECISION */ /* The residual of the FP vector. */ /* RESID = ABS( MINGMA )/SQRT( ZTZ ) */ /* RQCORR (output) DOUBLE PRECISION */ /* The Rayleigh Quotient correction to LAMBDA. */ /* RQCORR = MINGMA*TMP */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --work; --isuppz; --z__; --lld; --ld; --l; --d__; /* Function Body */ eps = _starpu_dlamch_("Precision"); if (*r__ == 0) { r1 = *b1; r2 = *bn; } else { r1 = *r__; r2 = *r__; } /* Storage for LPLUS */ indlpl = 0; /* Storage for UMINUS */ indumn = *n; inds = (*n << 1) + 1; indp = *n * 3 + 1; if (*b1 == 1) { work[inds] = 0.; } else { work[inds + *b1 - 1] = lld[*b1 - 1]; } /* Compute the stationary transform (using the differential form) */ /* until the index R2. */ sawnan1 = FALSE_; neg1 = 0; s = work[inds + *b1 - 1] - *lambda; i__1 = r1 - 1; for (i__ = *b1; i__ <= i__1; ++i__) { dplus = d__[i__] + s; work[indlpl + i__] = ld[i__] / dplus; if (dplus < 0.) { ++neg1; } work[inds + i__] = s * work[indlpl + i__] * l[i__]; s = work[inds + i__] - *lambda; /* L50: */ } sawnan1 = _starpu_disnan_(&s); if (sawnan1) { goto L60; } i__1 = r2 - 1; for (i__ = r1; i__ <= i__1; ++i__) { dplus = d__[i__] + s; work[indlpl + i__] = ld[i__] / dplus; work[inds + i__] = s * work[indlpl + i__] * l[i__]; s = work[inds + i__] - *lambda; /* L51: */ } sawnan1 = _starpu_disnan_(&s); L60: if (sawnan1) { /* Runs a slower version of the above loop if a NaN is detected */ neg1 = 0; s = work[inds + *b1 - 1] - *lambda; i__1 = r1 - 1; for (i__ = *b1; i__ <= i__1; ++i__) { dplus = d__[i__] + s; if (abs(dplus) < *pivmin) { dplus = -(*pivmin); } work[indlpl + i__] = ld[i__] / dplus; if (dplus < 0.) { ++neg1; } work[inds + i__] = s * work[indlpl + i__] * l[i__]; if (work[indlpl + i__] == 0.) { work[inds + i__] = lld[i__]; } s = work[inds + i__] - *lambda; /* L70: */ } i__1 = r2 - 1; for (i__ = r1; i__ <= i__1; ++i__) { dplus = d__[i__] + s; if (abs(dplus) < *pivmin) { dplus = -(*pivmin); } work[indlpl + i__] = ld[i__] / dplus; work[inds + i__] = s * work[indlpl + i__] * l[i__]; if (work[indlpl + i__] == 0.) { work[inds + i__] = lld[i__]; } s = work[inds + i__] - *lambda; /* L71: */ } } /* Compute the progressive transform (using the differential form) */ /* until the index R1 */ sawnan2 = FALSE_; neg2 = 0; work[indp + *bn - 1] = d__[*bn] - *lambda; i__1 = r1; for (i__ = *bn - 1; i__ >= i__1; --i__) { dminus = lld[i__] + work[indp + i__]; tmp = d__[i__] / dminus; if (dminus < 0.) { ++neg2; } work[indumn + i__] = l[i__] * tmp; work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; /* L80: */ } tmp = work[indp + r1 - 1]; sawnan2 = _starpu_disnan_(&tmp); if (sawnan2) { /* Runs a slower version of the above loop if a NaN is detected */ neg2 = 0; i__1 = r1; for (i__ = *bn - 1; i__ >= i__1; --i__) { dminus = lld[i__] + work[indp + i__]; if (abs(dminus) < *pivmin) { dminus = -(*pivmin); } tmp = d__[i__] / dminus; if (dminus < 0.) { ++neg2; } work[indumn + i__] = l[i__] * tmp; work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; if (tmp == 0.) { work[indp + i__ - 1] = d__[i__] - *lambda; } /* L100: */ } } /* Find the index (from R1 to R2) of the largest (in magnitude) */ /* diagonal element of the inverse */ *mingma = work[inds + r1 - 1] + work[indp + r1 - 1]; if (*mingma < 0.) { ++neg1; } if (*wantnc) { *negcnt = neg1 + neg2; } else { *negcnt = -1; } if (abs(*mingma) == 0.) { *mingma = eps * work[inds + r1 - 1]; } *r__ = r1; i__1 = r2 - 1; for (i__ = r1; i__ <= i__1; ++i__) { tmp = work[inds + i__] + work[indp + i__]; if (tmp == 0.) { tmp = eps * work[inds + i__]; } if (abs(tmp) <= abs(*mingma)) { *mingma = tmp; *r__ = i__ + 1; } /* L110: */ } /* Compute the FP vector: solve N^T v = e_r */ isuppz[1] = *b1; isuppz[2] = *bn; z__[*r__] = 1.; *ztz = 1.; /* Compute the FP vector upwards from R */ if (! sawnan1 && ! sawnan2) { i__1 = *b1; for (i__ = *r__ - 1; i__ >= i__1; --i__) { z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs( d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) { z__[i__] = 0.; isuppz[1] = i__ + 1; goto L220; } *ztz += z__[i__] * z__[i__]; /* L210: */ } L220: ; } else { /* Run slower loop if NaN occurred. */ i__1 = *b1; for (i__ = *r__ - 1; i__ >= i__1; --i__) { if (z__[i__ + 1] == 0.) { z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2]; } else { z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); } if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs( d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) { z__[i__] = 0.; isuppz[1] = i__ + 1; goto L240; } *ztz += z__[i__] * z__[i__]; /* L230: */ } L240: ; } /* Compute the FP vector downwards from R in blocks of size BLKSIZ */ if (! sawnan1 && ! sawnan2) { i__1 = *bn - 1; for (i__ = *r__; i__ <= i__1; ++i__) { z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs( d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) { z__[i__ + 1] = 0.; isuppz[2] = i__; goto L260; } *ztz += z__[i__ + 1] * z__[i__ + 1]; /* L250: */ } L260: ; } else { /* Run slower loop if NaN occurred. */ i__1 = *bn - 1; for (i__ = *r__; i__ <= i__1; ++i__) { if (z__[i__] == 0.) { z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1]; } else { z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); } if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs( d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) { z__[i__ + 1] = 0.; isuppz[2] = i__; goto L280; } *ztz += z__[i__ + 1] * z__[i__ + 1]; /* L270: */ } L280: ; } /* Compute quantities for convergence test */ tmp = 1. / *ztz; *nrminv = sqrt(tmp); *resid = abs(*mingma) * *nrminv; *rqcorr = *mingma * tmp; return 0; /* End of DLAR1V */ } /* _starpu_dlar1v_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlar2v.c000066400000000000000000000062301507764646700205670ustar00rootroot00000000000000/* dlar2v.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlar2v_(integer *n, doublereal *x, doublereal *y, doublereal *z__, integer *incx, doublereal *c__, doublereal *s, integer *incc) { /* System generated locals */ integer i__1; /* Local variables */ integer i__; doublereal t1, t2, t3, t4, t5, t6; integer ic; doublereal ci, si; integer ix; doublereal xi, yi, zi; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAR2V applies a vector of real plane rotations from both sides to */ /* a sequence of 2-by-2 real symmetric matrices, defined by the elements */ /* of the vectors x, y and z. For i = 1,2,...,n */ /* ( x(i) z(i) ) := ( c(i) s(i) ) ( x(i) z(i) ) ( c(i) -s(i) ) */ /* ( z(i) y(i) ) ( -s(i) c(i) ) ( z(i) y(i) ) ( s(i) c(i) ) */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of plane rotations to be applied. */ /* X (input/output) DOUBLE PRECISION array, */ /* dimension (1+(N-1)*INCX) */ /* The vector x. */ /* Y (input/output) DOUBLE PRECISION array, */ /* dimension (1+(N-1)*INCX) */ /* The vector y. */ /* Z (input/output) DOUBLE PRECISION array, */ /* dimension (1+(N-1)*INCX) */ /* The vector z. */ /* INCX (input) INTEGER */ /* The increment between elements of X, Y and Z. INCX > 0. */ /* C (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ /* The cosines of the plane rotations. */ /* S (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ /* The sines of the plane rotations. */ /* INCC (input) INTEGER */ /* The increment between elements of C and S. INCC > 0. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --s; --c__; --z__; --y; --x; /* Function Body */ ix = 1; ic = 1; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { xi = x[ix]; yi = y[ix]; zi = z__[ix]; ci = c__[ic]; si = s[ic]; t1 = si * zi; t2 = ci * zi; t3 = t2 - si * xi; t4 = t2 + si * yi; t5 = ci * xi + t1; t6 = ci * yi - t1; x[ix] = ci * t5 + si * t4; y[ix] = ci * t6 - si * t3; z__[ix] = ci * t4 - si * t5; ix += *incx; ic += *incc; /* L10: */ } /* End of DLAR2V */ return 0; } /* _starpu_dlar2v_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarf.c000066400000000000000000000130411507764646700204630ustar00rootroot00000000000000/* dlarf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b4 = 1.; static doublereal c_b5 = 0.; static integer c__1 = 1; /* Subroutine */ int _starpu_dlarf_(char *side, integer *m, integer *n, doublereal *v, integer *incv, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work) { /* System generated locals */ integer c_dim1, c_offset; doublereal d__1; /* Local variables */ integer i__; logical applyleft; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer lastc, lastv; extern integer _starpu_iladlc_(integer *, integer *, doublereal *, integer *), _starpu_iladlr_(integer *, integer *, doublereal *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARF applies a real elementary reflector H to a real m by n matrix */ /* C, from either the left or the right. H is represented in the form */ /* H = I - tau * v * v' */ /* where tau is a real scalar and v is a real vector. */ /* If tau = 0, then H is taken to be the unit matrix. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': form H * C */ /* = 'R': form C * H */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* V (input) DOUBLE PRECISION array, dimension */ /* (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ /* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ /* The vector v in the representation of H. V is not used if */ /* TAU = 0. */ /* INCV (input) INTEGER */ /* The increment between elements of v. INCV <> 0. */ /* TAU (input) DOUBLE PRECISION */ /* The value tau in the representation of H. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ /* or C * H if SIDE = 'R'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L' */ /* or (M) if SIDE = 'R' */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --v; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ applyleft = _starpu_lsame_(side, "L"); lastv = 0; lastc = 0; if (*tau != 0.) { /* Set up variables for scanning V. LASTV begins pointing to the end */ /* of V. */ if (applyleft) { lastv = *m; } else { lastv = *n; } if (*incv > 0) { i__ = (lastv - 1) * *incv + 1; } else { i__ = 1; } /* Look for the last non-zero row in V. */ while(lastv > 0 && v[i__] == 0.) { --lastv; i__ -= *incv; } if (applyleft) { /* Scan for the last non-zero column in C(1:lastv,:). */ lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); } else { /* Scan for the last non-zero row in C(:,1:lastv). */ lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); } } /* Note that lastc.eq.0 renders the BLAS operations null; no special */ /* case is needed at this level. */ if (applyleft) { /* Form H * C */ if (lastv > 0) { /* w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */ _starpu_dgemv_("Transpose", &lastv, &lastc, &c_b4, &c__[c_offset], ldc, & v[1], incv, &c_b5, &work[1], &c__1); /* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */ d__1 = -(*tau); _starpu_dger_(&lastv, &lastc, &d__1, &v[1], incv, &work[1], &c__1, &c__[ c_offset], ldc); } } else { /* Form C * H */ if (lastv > 0) { /* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ _starpu_dgemv_("No transpose", &lastc, &lastv, &c_b4, &c__[c_offset], ldc, &v[1], incv, &c_b5, &work[1], &c__1); /* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */ d__1 = -(*tau); _starpu_dger_(&lastc, &lastv, &d__1, &work[1], &c__1, &v[1], incv, &c__[ c_offset], ldc); } } return 0; /* End of DLARF */ } /* _starpu_dlarf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarfb.c000066400000000000000000000522171507764646700206350ustar00rootroot00000000000000/* dlarfb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b14 = 1.; static doublereal c_b25 = -1.; /* Subroutine */ int _starpu_dlarfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, doublereal *v, integer * ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, doublereal *work, integer *ldwork) { /* System generated locals */ integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, work_offset, i__1, i__2; /* Local variables */ integer i__, j; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); integer lastc; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer lastv; extern integer _starpu_iladlc_(integer *, integer *, doublereal *, integer *), _starpu_iladlr_(integer *, integer *, doublereal *, integer *); char transt[1]; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARFB applies a real block reflector H or its transpose H' to a */ /* real m by n matrix C, from either the left or the right. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply H or H' from the Left */ /* = 'R': apply H or H' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply H (No transpose) */ /* = 'T': apply H' (Transpose) */ /* DIRECT (input) CHARACTER*1 */ /* Indicates how H is formed from a product of elementary */ /* reflectors */ /* = 'F': H = H(1) H(2) . . . H(k) (Forward) */ /* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ /* STOREV (input) CHARACTER*1 */ /* Indicates how the vectors which define the elementary */ /* reflectors are stored: */ /* = 'C': Columnwise */ /* = 'R': Rowwise */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* K (input) INTEGER */ /* The order of the matrix T (= the number of elementary */ /* reflectors whose product defines the block reflector). */ /* V (input) DOUBLE PRECISION array, dimension */ /* (LDV,K) if STOREV = 'C' */ /* (LDV,M) if STOREV = 'R' and SIDE = 'L' */ /* (LDV,N) if STOREV = 'R' and SIDE = 'R' */ /* The matrix V. See further details. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. */ /* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); */ /* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); */ /* if STOREV = 'R', LDV >= K. */ /* T (input) DOUBLE PRECISION array, dimension (LDT,K) */ /* The triangular k by k matrix T in the representation of the */ /* block reflector. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= K. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDA >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */ /* LDWORK (input) INTEGER */ /* The leading dimension of the array WORK. */ /* If SIDE = 'L', LDWORK >= max(1,N); */ /* if SIDE = 'R', LDWORK >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; work_dim1 = *ldwork; work_offset = 1 + work_dim1; work -= work_offset; /* Function Body */ if (*m <= 0 || *n <= 0) { return 0; } if (_starpu_lsame_(trans, "N")) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } if (_starpu_lsame_(storev, "C")) { if (_starpu_lsame_(direct, "F")) { /* Let V = ( V1 ) (first K rows) */ /* ( V2 ) */ /* where V1 is unit lower triangular. */ if (_starpu_lsame_(side, "L")) { /* Form H * C or H' * C where C = ( C1 ) */ /* ( C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlr_(m, k, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); /* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) */ /* W := C1' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1); /* L10: */ } /* W := W * V1 */ _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); if (lastv > *k) { /* W := W + C2'*V2 */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & c_b14, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1], ldv, &c_b14, &work[work_offset], ldwork); } /* W := W * T' or W * T */ _starpu_dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - V * W' */ if (lastv > *k) { /* C2 := C2 - V2 * W' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & c_b25, &v[*k + 1 + v_dim1], ldv, &work[ work_offset], ldwork, &c_b14, &c__[*k + 1 + c_dim1], ldc); } /* W := W * V1' */ _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); /* C1 := C1 - W' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; /* L20: */ } /* L30: */ } } else if (_starpu_lsame_(side, "R")) { /* Form C * H or C * H' where C = ( C1 C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlr_(n, k, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); /* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ /* W := C1 */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &c__1); /* L40: */ } /* W := W * V1 */ _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); if (lastv > *k) { /* W := W + C2 * V2 */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + 1 + v_dim1], ldv, &c_b14, &work[work_offset], ldwork); } /* W := W * T or W * T' */ _starpu_dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - W * V' */ if (lastv > *k) { /* C2 := C2 - W * V2' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & c_b25, &work[work_offset], ldwork, &v[*k + 1 + v_dim1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc); } /* W := W * V1' */ _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); /* C1 := C1 - W */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; /* L50: */ } /* L60: */ } } } else { /* Let V = ( V1 ) */ /* ( V2 ) (last K rows) */ /* where V2 is unit upper triangular. */ if (_starpu_lsame_(side, "L")) { /* Form H * C or H' * C where C = ( C1 ) */ /* ( C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlr_(m, k, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); /* W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) */ /* W := C2' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ j * work_dim1 + 1], &c__1); /* L70: */ } /* W := W * V2 */ _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ work_offset], ldwork); if (lastv > *k) { /* W := W + C1'*V1 */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "No transpose", &lastc, k, &i__1, & c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & c_b14, &work[work_offset], ldwork); } /* W := W * T' or W * T */ _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - V * W' */ if (lastv > *k) { /* C1 := C1 - V1 * W' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &i__1, &lastc, k, & c_b25, &v[v_offset], ldv, &work[work_offset], ldwork, &c_b14, &c__[c_offset], ldc); } /* W := W * V2' */ _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ work_offset], ldwork); /* C2 := C2 - W' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * work_dim1]; /* L80: */ } /* L90: */ } } else if (_starpu_lsame_(side, "R")) { /* Form C * H or C * H' where C = ( C1 C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlr_(n, k, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); /* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ /* W := C2 */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, & work[j * work_dim1 + 1], &c__1); /* L100: */ } /* W := W * V2 */ _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ work_offset], ldwork); if (lastv > *k) { /* W := W + C1 * V1 */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "No transpose", &lastc, k, &i__1, & c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & c_b14, &work[work_offset], ldwork); } /* W := W * T or W * T' */ _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - W * V' */ if (lastv > *k) { /* C1 := C1 - W * V1' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &lastc, &i__1, k, & c_b25, &work[work_offset], ldwork, &v[v_offset], ldv, &c_b14, &c__[c_offset], ldc); } /* W := W * V2' */ _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[ work_offset], ldwork); /* C2 := C2 - W */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * work_dim1]; /* L110: */ } /* L120: */ } } } } else if (_starpu_lsame_(storev, "R")) { if (_starpu_lsame_(direct, "F")) { /* Let V = ( V1 V2 ) (V1: first K columns) */ /* where V1 is unit upper triangular. */ if (_starpu_lsame_(side, "L")) { /* Form H * C or H' * C where C = ( C1 ) */ /* ( C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlc_(k, m, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); /* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) */ /* W := C1' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1); /* L130: */ } /* W := W * V1' */ _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); if (lastv > *k) { /* W := W + C2'*V2' */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b14, &work[work_offset], ldwork); } /* W := W * T' or W * T */ _starpu_dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, & c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - V' * W' */ if (lastv > *k) { /* C2 := C2 - V2' * W' */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, &v[(*k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork, &c_b14, &c__[*k + 1 + c_dim1], ldc); } /* W := W * V1 */ _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); /* C1 := C1 - W' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; /* L140: */ } /* L150: */ } } else if (_starpu_lsame_(side, "R")) { /* Form C * H or C * H' where C = ( C1 C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlc_(k, n, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); /* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) */ /* W := C1 */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &c__1); /* L160: */ } /* W := W * V1' */ _starpu_dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); if (lastv > *k) { /* W := W + C2 * V2' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b14, &work[work_offset], ldwork); } /* W := W * T or W * T' */ _starpu_dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - W * V */ if (lastv > *k) { /* C2 := C2 - W * V2 */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & c_b25, &work[work_offset], ldwork, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc); } /* W := W * V1 */ _starpu_dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, & c_b14, &v[v_offset], ldv, &work[work_offset], ldwork); /* C1 := C1 - W */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; /* L170: */ } /* L180: */ } } } else { /* Let V = ( V1 V2 ) (V2: last K columns) */ /* where V2 is unit lower triangular. */ if (_starpu_lsame_(side, "L")) { /* Form H * C or H' * C where C = ( C1 ) */ /* ( C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlc_(k, m, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlc_(&lastv, n, &c__[c_offset], ldc); /* W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) */ /* W := C2' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[ j * work_dim1 + 1], &c__1); /* L190: */ } /* W := W * V2' */ _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork); if (lastv > *k) { /* W := W + C1'*V1' */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & work[work_offset], ldwork); } /* W := W * T' or W * T */ _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, & c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - V' * W' */ if (lastv > *k) { /* C1 := C1 - V1' * W' */ i__1 = lastv - *k; _starpu_dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, &v[v_offset], ldv, &work[work_offset], ldwork, & c_b14, &c__[c_offset], ldc); } /* W := W * V2 */ _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork); /* C2 := C2 - W' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * work_dim1]; /* L200: */ } /* L210: */ } } else if (_starpu_lsame_(side, "R")) { /* Form C * H or C * H' where C = ( C1 C2 ) */ /* Computing MAX */ i__1 = *k, i__2 = _starpu_iladlc_(k, n, &v[v_offset], ldv); lastv = max(i__1,i__2); lastc = _starpu_iladlr_(m, &lastv, &c__[c_offset], ldc); /* W := C * V' = (C1*V1' + C2*V2') (stored in WORK) */ /* W := C2 */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &c__1); /* L220: */ } /* W := W * V2' */ _starpu_dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, & c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork); if (lastv > *k) { /* W := W + C1 * V1' */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "Transpose", &lastc, k, &i__1, & c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & c_b14, &work[work_offset], ldwork); } /* W := W * T or W * T' */ _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, &t[t_offset], ldt, &work[work_offset], ldwork); /* C := C - W * V */ if (lastv > *k) { /* C1 := C1 - W * V1 */ i__1 = lastv - *k; _starpu_dgemm_("No transpose", "No transpose", &lastc, &i__1, k, & c_b25, &work[work_offset], ldwork, &v[v_offset], ldv, &c_b14, &c__[c_offset], ldc); } /* W := W * V2 */ _starpu_dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, & c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[ work_offset], ldwork); /* C1 := C1 - W */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = lastc; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j * work_dim1]; /* L230: */ } /* L240: */ } } } } return 0; /* End of DLARFB */ } /* _starpu_dlarfb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarfg.c000066400000000000000000000103401507764646700206310ustar00rootroot00000000000000/* dlarfg.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarfg_(integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *tau) { /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *); /* Local variables */ integer j, knt; doublereal beta; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal xnorm; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); doublereal safmin, rsafmn; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARFG generates a real elementary reflector H of order n, such */ /* that */ /* H * ( alpha ) = ( beta ), H' * H = I. */ /* ( x ) ( 0 ) */ /* where alpha and beta are scalars, and x is an (n-1)-element real */ /* vector. H is represented in the form */ /* H = I - tau * ( 1 ) * ( 1 v' ) , */ /* ( v ) */ /* where tau is a real scalar and v is a real (n-1)-element */ /* vector. */ /* If the elements of x are all zero, then tau = 0 and H is taken to be */ /* the unit matrix. */ /* Otherwise 1 <= tau <= 2. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the elementary reflector. */ /* ALPHA (input/output) DOUBLE PRECISION */ /* On entry, the value alpha. */ /* On exit, it is overwritten with the value beta. */ /* X (input/output) DOUBLE PRECISION array, dimension */ /* (1+(N-2)*abs(INCX)) */ /* On entry, the vector x. */ /* On exit, it is overwritten with the vector v. */ /* INCX (input) INTEGER */ /* The increment between elements of X. INCX > 0. */ /* TAU (output) DOUBLE PRECISION */ /* The value tau. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --x; /* Function Body */ if (*n <= 1) { *tau = 0.; return 0; } i__1 = *n - 1; xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); if (xnorm == 0.) { /* H = I */ *tau = 0.; } else { /* general case */ d__1 = _starpu_dlapy2_(alpha, &xnorm); beta = -d_sign(&d__1, alpha); safmin = _starpu_dlamch_("S") / _starpu_dlamch_("E"); knt = 0; if (abs(beta) < safmin) { /* XNORM, BETA may be inaccurate; scale X and recompute them */ rsafmn = 1. / safmin; L10: ++knt; i__1 = *n - 1; _starpu_dscal_(&i__1, &rsafmn, &x[1], incx); beta *= rsafmn; *alpha *= rsafmn; if (abs(beta) < safmin) { goto L10; } /* New BETA is at most 1, at least SAFMIN */ i__1 = *n - 1; xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); d__1 = _starpu_dlapy2_(alpha, &xnorm); beta = -d_sign(&d__1, alpha); } *tau = (beta - *alpha) / beta; i__1 = *n - 1; d__1 = 1. / (*alpha - beta); _starpu_dscal_(&i__1, &d__1, &x[1], incx); /* If ALPHA is subnormal, it may lose relative accuracy */ i__1 = knt; for (j = 1; j <= i__1; ++j) { beta *= safmin; /* L20: */ } *alpha = beta; } return 0; /* End of DLARFG */ } /* _starpu_dlarfg_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarfp.c000066400000000000000000000116101507764646700206430ustar00rootroot00000000000000/* dlarfp.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarfp_(integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *tau) { /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *); /* Local variables */ integer j, knt; doublereal beta; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal xnorm; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); doublereal safmin, rsafmn; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARFP generates a real elementary reflector H of order n, such */ /* that */ /* H * ( alpha ) = ( beta ), H' * H = I. */ /* ( x ) ( 0 ) */ /* where alpha and beta are scalars, beta is non-negative, and x is */ /* an (n-1)-element real vector. H is represented in the form */ /* H = I - tau * ( 1 ) * ( 1 v' ) , */ /* ( v ) */ /* where tau is a real scalar and v is a real (n-1)-element */ /* vector. */ /* If the elements of x are all zero, then tau = 0 and H is taken to be */ /* the unit matrix. */ /* Otherwise 1 <= tau <= 2. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the elementary reflector. */ /* ALPHA (input/output) DOUBLE PRECISION */ /* On entry, the value alpha. */ /* On exit, it is overwritten with the value beta. */ /* X (input/output) DOUBLE PRECISION array, dimension */ /* (1+(N-2)*abs(INCX)) */ /* On entry, the vector x. */ /* On exit, it is overwritten with the vector v. */ /* INCX (input) INTEGER */ /* The increment between elements of X. INCX > 0. */ /* TAU (output) DOUBLE PRECISION */ /* The value tau. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --x; /* Function Body */ if (*n <= 0) { *tau = 0.; return 0; } i__1 = *n - 1; xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); if (xnorm == 0.) { /* H = [+/-1, 0; I], sign chosen so ALPHA >= 0 */ if (*alpha >= 0.) { /* When TAU.eq.ZERO, the vector is special-cased to be */ /* all zeros in the application routines. We do not need */ /* to clear it. */ *tau = 0.; } else { /* However, the application routines rely on explicit */ /* zero checks when TAU.ne.ZERO, and we must clear X. */ *tau = 2.; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { x[(j - 1) * *incx + 1] = 0.; } *alpha = -(*alpha); } } else { /* general case */ d__1 = _starpu_dlapy2_(alpha, &xnorm); beta = d_sign(&d__1, alpha); safmin = _starpu_dlamch_("S") / _starpu_dlamch_("E"); knt = 0; if (abs(beta) < safmin) { /* XNORM, BETA may be inaccurate; scale X and recompute them */ rsafmn = 1. / safmin; L10: ++knt; i__1 = *n - 1; _starpu_dscal_(&i__1, &rsafmn, &x[1], incx); beta *= rsafmn; *alpha *= rsafmn; if (abs(beta) < safmin) { goto L10; } /* New BETA is at most 1, at least SAFMIN */ i__1 = *n - 1; xnorm = _starpu_dnrm2_(&i__1, &x[1], incx); d__1 = _starpu_dlapy2_(alpha, &xnorm); beta = d_sign(&d__1, alpha); } *alpha += beta; if (beta < 0.) { beta = -beta; *tau = -(*alpha) / beta; } else { *alpha = xnorm * (xnorm / *alpha); *tau = *alpha / beta; *alpha = -(*alpha); } i__1 = *n - 1; d__1 = 1. / *alpha; _starpu_dscal_(&i__1, &d__1, &x[1], incx); /* If BETA is subnormal, it may lose relative accuracy */ i__1 = knt; for (j = 1; j <= i__1; ++j) { beta *= safmin; /* L20: */ } *alpha = beta; } return 0; /* End of DLARFP */ } /* _starpu_dlarfp_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarft.c000066400000000000000000000231411507764646700206510ustar00rootroot00000000000000/* dlarft.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = 0.; /* Subroutine */ int _starpu_dlarft_(char *direct, char *storev, integer *n, integer * k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, integer *ldt) { /* System generated locals */ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__, j, prevlastv; doublereal vii; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer lastv; extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARFT forms the triangular factor T of a real block reflector H */ /* of order n, which is defined as a product of k elementary reflectors. */ /* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ /* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ /* If STOREV = 'C', the vector which defines the elementary reflector */ /* H(i) is stored in the i-th column of the array V, and */ /* H = I - V * T * V' */ /* If STOREV = 'R', the vector which defines the elementary reflector */ /* H(i) is stored in the i-th row of the array V, and */ /* H = I - V' * T * V */ /* Arguments */ /* ========= */ /* DIRECT (input) CHARACTER*1 */ /* Specifies the order in which the elementary reflectors are */ /* multiplied to form the block reflector: */ /* = 'F': H = H(1) H(2) . . . H(k) (Forward) */ /* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ /* STOREV (input) CHARACTER*1 */ /* Specifies how the vectors which define the elementary */ /* reflectors are stored (see also Further Details): */ /* = 'C': columnwise */ /* = 'R': rowwise */ /* N (input) INTEGER */ /* The order of the block reflector H. N >= 0. */ /* K (input) INTEGER */ /* The order of the triangular factor T (= the number of */ /* elementary reflectors). K >= 1. */ /* V (input/output) DOUBLE PRECISION array, dimension */ /* (LDV,K) if STOREV = 'C' */ /* (LDV,N) if STOREV = 'R' */ /* The matrix V. See further details. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. */ /* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i). */ /* T (output) DOUBLE PRECISION array, dimension (LDT,K) */ /* The k by k triangular factor T of the block reflector. */ /* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ /* lower triangular. The rest of the array is not used. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= K. */ /* Further Details */ /* =============== */ /* The shape of the matrix V and the storage of the vectors which define */ /* the H(i) is best illustrated by the following example with n = 5 and */ /* k = 3. The elements equal to 1 are not stored; the corresponding */ /* array elements are modified but restored on exit. The rest of the */ /* array is not used. */ /* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ /* V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ /* ( v1 1 ) ( 1 v2 v2 v2 ) */ /* ( v1 v2 1 ) ( 1 v3 v3 ) */ /* ( v1 v2 v3 ) */ /* ( v1 v2 v3 ) */ /* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ /* V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ /* ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ /* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ /* ( 1 v3 ) */ /* ( 1 ) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --tau; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; /* Function Body */ if (*n == 0) { return 0; } if (_starpu_lsame_(direct, "F")) { prevlastv = *n; i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { prevlastv = max(i__,prevlastv); if (tau[i__] == 0.) { /* H(i) = I */ i__2 = i__; for (j = 1; j <= i__2; ++j) { t[j + i__ * t_dim1] = 0.; /* L10: */ } } else { /* general case */ vii = v[i__ + i__ * v_dim1]; v[i__ + i__ * v_dim1] = 1.; if (_starpu_lsame_(storev, "C")) { /* Skip any trailing zeros. */ i__2 = i__ + 1; for (lastv = *n; lastv >= i__2; --lastv) { if (v[lastv + i__ * v_dim1] != 0.) { break; } } j = min(lastv,prevlastv); /* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */ i__2 = j - i__ + 1; i__3 = i__ - 1; d__1 = -tau[i__]; _starpu_dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b8, &t[ i__ * t_dim1 + 1], &c__1); } else { /* Skip any trailing zeros. */ i__2 = i__ + 1; for (lastv = *n; lastv >= i__2; --lastv) { if (v[i__ + lastv * v_dim1] != 0.) { break; } } j = min(lastv,prevlastv); /* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */ i__2 = i__ - 1; i__3 = j - i__ + 1; d__1 = -tau[i__]; _starpu_dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ * v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & c_b8, &t[i__ * t_dim1 + 1], &c__1); } v[i__ + i__ * v_dim1] = vii; /* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ i__2 = i__ - 1; _starpu_dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); t[i__ + i__ * t_dim1] = tau[i__]; if (i__ > 1) { prevlastv = max(prevlastv,lastv); } else { prevlastv = lastv; } } /* L20: */ } } else { prevlastv = 1; for (i__ = *k; i__ >= 1; --i__) { if (tau[i__] == 0.) { /* H(i) = I */ i__1 = *k; for (j = i__; j <= i__1; ++j) { t[j + i__ * t_dim1] = 0.; /* L30: */ } } else { /* general case */ if (i__ < *k) { if (_starpu_lsame_(storev, "C")) { vii = v[*n - *k + i__ + i__ * v_dim1]; v[*n - *k + i__ + i__ * v_dim1] = 1.; /* Skip any leading zeros. */ i__1 = i__ - 1; for (lastv = 1; lastv <= i__1; ++lastv) { if (v[lastv + i__ * v_dim1] != 0.) { break; } } j = max(lastv,prevlastv); /* T(i+1:k,i) := */ /* - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) */ i__1 = *n - *k + i__ - j + 1; i__2 = *k - i__; d__1 = -tau[i__]; _starpu_dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & c__1, &c_b8, &t[i__ + 1 + i__ * t_dim1], & c__1); v[*n - *k + i__ + i__ * v_dim1] = vii; } else { vii = v[i__ + (*n - *k + i__) * v_dim1]; v[i__ + (*n - *k + i__) * v_dim1] = 1.; /* Skip any leading zeros. */ i__1 = i__ - 1; for (lastv = 1; lastv <= i__1; ++lastv) { if (v[i__ + lastv * v_dim1] != 0.) { break; } } j = max(lastv,prevlastv); /* T(i+1:k,i) := */ /* - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' */ i__1 = *k - i__; i__2 = *n - *k + i__ - j + 1; d__1 = -tau[i__]; _starpu_dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], ldv, &c_b8, &t[i__ + 1 + i__ * t_dim1], &c__1); v[i__ + (*n - *k + i__) * v_dim1] = vii; } /* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ i__1 = *k - i__; _starpu_dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * t_dim1], &c__1) ; if (i__ > 1) { prevlastv = min(prevlastv,lastv); } else { prevlastv = lastv; } } t[i__ + i__ * t_dim1] = tau[i__]; } /* L40: */ } } return 0; /* End of DLARFT */ } /* _starpu_dlarft_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarfx.c000066400000000000000000000425701507764646700206640ustar00rootroot00000000000000/* dlarfx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dlarfx_(char *side, integer *m, integer *n, doublereal * v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work) { /* System generated locals */ integer c_dim1, c_offset, i__1; /* Local variables */ integer j; doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, v7, v8, v9, t10, v10, sum; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); extern logical _starpu_lsame_(char *, char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARFX applies a real elementary reflector H to a real m by n */ /* matrix C, from either the left or the right. H is represented in the */ /* form */ /* H = I - tau * v * v' */ /* where tau is a real scalar and v is a real vector. */ /* If tau = 0, then H is taken to be the unit matrix */ /* This version uses inline code if H has order < 11. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': form H * C */ /* = 'R': form C * H */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* V (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L' */ /* or (N) if SIDE = 'R' */ /* The vector v in the representation of H. */ /* TAU (input) DOUBLE PRECISION */ /* The value tau in the representation of H. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ /* or C * H if SIDE = 'R'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDA >= (1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L' */ /* or (M) if SIDE = 'R' */ /* WORK is not referenced if H has order < 11. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --v; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ if (*tau == 0.) { return 0; } if (_starpu_lsame_(side, "L")) { /* Form H * C, where H has order m. */ switch (*m) { case 1: goto L10; case 2: goto L30; case 3: goto L50; case 4: goto L70; case 5: goto L90; case 6: goto L110; case 7: goto L130; case 8: goto L150; case 9: goto L170; case 10: goto L190; } /* Code for general M */ _starpu_dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]); goto L410; L10: /* Special code for 1 x 1 Householder */ t1 = 1. - *tau * v[1] * v[1]; i__1 = *n; for (j = 1; j <= i__1; ++j) { c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; /* L20: */ } goto L410; L30: /* Special code for 2 x 2 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; c__[j * c_dim1 + 1] -= sum * t1; c__[j * c_dim1 + 2] -= sum * t2; /* L40: */ } goto L410; L50: /* Special code for 3 x 3 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3]; c__[j * c_dim1 + 1] -= sum * t1; c__[j * c_dim1 + 2] -= sum * t2; c__[j * c_dim1 + 3] -= sum * t3; /* L60: */ } goto L410; L70: /* Special code for 4 x 4 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4]; c__[j * c_dim1 + 1] -= sum * t1; c__[j * c_dim1 + 2] -= sum * t2; c__[j * c_dim1 + 3] -= sum * t3; c__[j * c_dim1 + 4] -= sum * t4; /* L80: */ } goto L410; L90: /* Special code for 5 x 5 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ j * c_dim1 + 5]; c__[j * c_dim1 + 1] -= sum * t1; c__[j * c_dim1 + 2] -= sum * t2; c__[j * c_dim1 + 3] -= sum * t3; c__[j * c_dim1 + 4] -= sum * t4; c__[j * c_dim1 + 5] -= sum * t5; /* L100: */ } goto L410; L110: /* Special code for 6 x 6 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; c__[j * c_dim1 + 1] -= sum * t1; c__[j * c_dim1 + 2] -= sum * t2; c__[j * c_dim1 + 3] -= sum * t3; c__[j * c_dim1 + 4] -= sum * t4; c__[j * c_dim1 + 5] -= sum * t5; c__[j * c_dim1 + 6] -= sum * t6; /* L120: */ } goto L410; L130: /* Special code for 7 x 7 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; v7 = v[7]; t7 = *tau * v7; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * c_dim1 + 7]; c__[j * c_dim1 + 1] -= sum * t1; c__[j * c_dim1 + 2] -= sum * t2; c__[j * c_dim1 + 3] -= sum * t3; c__[j * c_dim1 + 4] -= sum * t4; c__[j * c_dim1 + 5] -= sum * t5; c__[j * c_dim1 + 6] -= sum * t6; c__[j * c_dim1 + 7] -= sum * t7; /* L140: */ } goto L410; L150: /* Special code for 8 x 8 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; v7 = v[7]; t7 = *tau * v7; v8 = v[8]; t8 = *tau * v8; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; c__[j * c_dim1 + 1] -= sum * t1; c__[j * c_dim1 + 2] -= sum * t2; c__[j * c_dim1 + 3] -= sum * t3; c__[j * c_dim1 + 4] -= sum * t4; c__[j * c_dim1 + 5] -= sum * t5; c__[j * c_dim1 + 6] -= sum * t6; c__[j * c_dim1 + 7] -= sum * t7; c__[j * c_dim1 + 8] -= sum * t8; /* L160: */ } goto L410; L170: /* Special code for 9 x 9 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; v7 = v[7]; t7 = *tau * v7; v8 = v[8]; t8 = *tau * v8; v9 = v[9]; t9 = *tau * v9; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * c_dim1 + 9]; c__[j * c_dim1 + 1] -= sum * t1; c__[j * c_dim1 + 2] -= sum * t2; c__[j * c_dim1 + 3] -= sum * t3; c__[j * c_dim1 + 4] -= sum * t4; c__[j * c_dim1 + 5] -= sum * t5; c__[j * c_dim1 + 6] -= sum * t6; c__[j * c_dim1 + 7] -= sum * t7; c__[j * c_dim1 + 8] -= sum * t8; c__[j * c_dim1 + 9] -= sum * t9; /* L180: */ } goto L410; L190: /* Special code for 10 x 10 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; v7 = v[7]; t7 = *tau * v7; v8 = v[8]; t8 = *tau * v8; v9 = v[9]; t9 = *tau * v9; v10 = v[10]; t10 = *tau * v10; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * c_dim1 + 9] + v10 * c__[j * c_dim1 + 10]; c__[j * c_dim1 + 1] -= sum * t1; c__[j * c_dim1 + 2] -= sum * t2; c__[j * c_dim1 + 3] -= sum * t3; c__[j * c_dim1 + 4] -= sum * t4; c__[j * c_dim1 + 5] -= sum * t5; c__[j * c_dim1 + 6] -= sum * t6; c__[j * c_dim1 + 7] -= sum * t7; c__[j * c_dim1 + 8] -= sum * t8; c__[j * c_dim1 + 9] -= sum * t9; c__[j * c_dim1 + 10] -= sum * t10; /* L200: */ } goto L410; } else { /* Form C * H, where H has order n. */ switch (*n) { case 1: goto L210; case 2: goto L230; case 3: goto L250; case 4: goto L270; case 5: goto L290; case 6: goto L310; case 7: goto L330; case 8: goto L350; case 9: goto L370; case 10: goto L390; } /* Code for general N */ _starpu_dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]); goto L410; L210: /* Special code for 1 x 1 Householder */ t1 = 1. - *tau * v[1] * v[1]; i__1 = *m; for (j = 1; j <= i__1; ++j) { c__[j + c_dim1] = t1 * c__[j + c_dim1]; /* L220: */ } goto L410; L230: /* Special code for 2 x 2 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; i__1 = *m; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; c__[j + c_dim1] -= sum * t1; c__[j + (c_dim1 << 1)] -= sum * t2; /* L240: */ } goto L410; L250: /* Special code for 3 x 3 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; i__1 = *m; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3]; c__[j + c_dim1] -= sum * t1; c__[j + (c_dim1 << 1)] -= sum * t2; c__[j + c_dim1 * 3] -= sum * t3; /* L260: */ } goto L410; L270: /* Special code for 4 x 4 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; i__1 = *m; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)]; c__[j + c_dim1] -= sum * t1; c__[j + (c_dim1 << 1)] -= sum * t2; c__[j + c_dim1 * 3] -= sum * t3; c__[j + (c_dim1 << 2)] -= sum * t4; /* L280: */ } goto L410; L290: /* Special code for 5 x 5 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; i__1 = *m; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5]; c__[j + c_dim1] -= sum * t1; c__[j + (c_dim1 << 1)] -= sum * t2; c__[j + c_dim1 * 3] -= sum * t3; c__[j + (c_dim1 << 2)] -= sum * t4; c__[j + c_dim1 * 5] -= sum * t5; /* L300: */ } goto L410; L310: /* Special code for 6 x 6 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; i__1 = *m; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; c__[j + c_dim1] -= sum * t1; c__[j + (c_dim1 << 1)] -= sum * t2; c__[j + c_dim1 * 3] -= sum * t3; c__[j + (c_dim1 << 2)] -= sum * t4; c__[j + c_dim1 * 5] -= sum * t5; c__[j + c_dim1 * 6] -= sum * t6; /* L320: */ } goto L410; L330: /* Special code for 7 x 7 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; v7 = v[7]; t7 = *tau * v7; i__1 = *m; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ j + c_dim1 * 7]; c__[j + c_dim1] -= sum * t1; c__[j + (c_dim1 << 1)] -= sum * t2; c__[j + c_dim1 * 3] -= sum * t3; c__[j + (c_dim1 << 2)] -= sum * t4; c__[j + c_dim1 * 5] -= sum * t5; c__[j + c_dim1 * 6] -= sum * t6; c__[j + c_dim1 * 7] -= sum * t7; /* L340: */ } goto L410; L350: /* Special code for 8 x 8 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; v7 = v[7]; t7 = *tau * v7; v8 = v[8]; t8 = *tau * v8; i__1 = *m; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; c__[j + c_dim1] -= sum * t1; c__[j + (c_dim1 << 1)] -= sum * t2; c__[j + c_dim1 * 3] -= sum * t3; c__[j + (c_dim1 << 2)] -= sum * t4; c__[j + c_dim1 * 5] -= sum * t5; c__[j + c_dim1 * 6] -= sum * t6; c__[j + c_dim1 * 7] -= sum * t7; c__[j + (c_dim1 << 3)] -= sum * t8; /* L360: */ } goto L410; L370: /* Special code for 9 x 9 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; v7 = v[7]; t7 = *tau * v7; v8 = v[8]; t8 = *tau * v8; v9 = v[9]; t9 = *tau * v9; i__1 = *m; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ j + c_dim1 * 9]; c__[j + c_dim1] -= sum * t1; c__[j + (c_dim1 << 1)] -= sum * t2; c__[j + c_dim1 * 3] -= sum * t3; c__[j + (c_dim1 << 2)] -= sum * t4; c__[j + c_dim1 * 5] -= sum * t5; c__[j + c_dim1 * 6] -= sum * t6; c__[j + c_dim1 * 7] -= sum * t7; c__[j + (c_dim1 << 3)] -= sum * t8; c__[j + c_dim1 * 9] -= sum * t9; /* L380: */ } goto L410; L390: /* Special code for 10 x 10 Householder */ v1 = v[1]; t1 = *tau * v1; v2 = v[2]; t2 = *tau * v2; v3 = v[3]; t3 = *tau * v3; v4 = v[4]; t4 = *tau * v4; v5 = v[5]; t5 = *tau * v5; v6 = v[6]; t6 = *tau * v6; v7 = v[7]; t7 = *tau * v7; v8 = v[8]; t8 = *tau * v8; v9 = v[9]; t9 = *tau * v9; v10 = v[10]; t10 = *tau * v10; i__1 = *m; for (j = 1; j <= i__1; ++j) { sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10]; c__[j + c_dim1] -= sum * t1; c__[j + (c_dim1 << 1)] -= sum * t2; c__[j + c_dim1 * 3] -= sum * t3; c__[j + (c_dim1 << 2)] -= sum * t4; c__[j + c_dim1 * 5] -= sum * t5; c__[j + c_dim1 * 6] -= sum * t6; c__[j + c_dim1 * 7] -= sum * t7; c__[j + (c_dim1 << 3)] -= sum * t8; c__[j + c_dim1 * 9] -= sum * t9; c__[j + c_dim1 * 10] -= sum * t10; /* L400: */ } goto L410; } L410: return 0; /* End of DLARFX */ } /* _starpu_dlarfx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlargv.c000066400000000000000000000063511507764646700206600ustar00rootroot00000000000000/* dlargv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlargv_(integer *n, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *c__, integer *incc) { /* System generated locals */ integer i__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal f, g; integer i__; doublereal t; integer ic, ix, iy; doublereal tt; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARGV generates a vector of real plane rotations, determined by */ /* elements of the real vectors x and y. For i = 1,2,...,n */ /* ( c(i) s(i) ) ( x(i) ) = ( a(i) ) */ /* ( -s(i) c(i) ) ( y(i) ) = ( 0 ) */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of plane rotations to be generated. */ /* X (input/output) DOUBLE PRECISION array, */ /* dimension (1+(N-1)*INCX) */ /* On entry, the vector x. */ /* On exit, x(i) is overwritten by a(i), for i = 1,...,n. */ /* INCX (input) INTEGER */ /* The increment between elements of X. INCX > 0. */ /* Y (input/output) DOUBLE PRECISION array, */ /* dimension (1+(N-1)*INCY) */ /* On entry, the vector y. */ /* On exit, the sines of the plane rotations. */ /* INCY (input) INTEGER */ /* The increment between elements of Y. INCY > 0. */ /* C (output) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ /* The cosines of the plane rotations. */ /* INCC (input) INTEGER */ /* The increment between elements of C. INCC > 0. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --c__; --y; --x; /* Function Body */ ix = 1; iy = 1; ic = 1; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { f = x[ix]; g = y[iy]; if (g == 0.) { c__[ic] = 1.; } else if (f == 0.) { c__[ic] = 0.; y[iy] = 1.; x[ix] = g; } else if (abs(f) > abs(g)) { t = g / f; tt = sqrt(t * t + 1.); c__[ic] = 1. / tt; y[iy] = t * c__[ic]; x[ix] = f * tt; } else { t = f / g; tt = sqrt(t * t + 1.); y[iy] = 1. / tt; c__[ic] = t * y[iy]; x[ix] = g * tt; } ic += *incc; iy += *incy; ix += *incx; /* L10: */ } return 0; /* End of DLARGV */ } /* _starpu_dlargv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarnv.c000066400000000000000000000074611507764646700206720ustar00rootroot00000000000000/* dlarnv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarnv_(integer *idist, integer *iseed, integer *n, doublereal *x) { /* System generated locals */ integer i__1, i__2, i__3; /* Builtin functions */ double log(doublereal), sqrt(doublereal), cos(doublereal); /* Local variables */ integer i__; doublereal u[128]; integer il, iv, il2; extern /* Subroutine */ int _starpu_dlaruv_(integer *, integer *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARNV returns a vector of n random real numbers from a uniform or */ /* normal distribution. */ /* Arguments */ /* ========= */ /* IDIST (input) INTEGER */ /* Specifies the distribution of the random numbers: */ /* = 1: uniform (0,1) */ /* = 2: uniform (-1,1) */ /* = 3: normal (0,1) */ /* ISEED (input/output) INTEGER array, dimension (4) */ /* On entry, the seed of the random number generator; the array */ /* elements must be between 0 and 4095, and ISEED(4) must be */ /* odd. */ /* On exit, the seed is updated. */ /* N (input) INTEGER */ /* The number of random numbers to be generated. */ /* X (output) DOUBLE PRECISION array, dimension (N) */ /* The generated random numbers. */ /* Further Details */ /* =============== */ /* This routine calls the auxiliary routine DLARUV to generate random */ /* real numbers from a uniform (0,1) distribution, in batches of up to */ /* 128 using vectorisable code. The Box-Muller method is used to */ /* transform numbers from a uniform to a normal distribution. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --x; --iseed; /* Function Body */ i__1 = *n; for (iv = 1; iv <= i__1; iv += 64) { /* Computing MIN */ i__2 = 64, i__3 = *n - iv + 1; il = min(i__2,i__3); if (*idist == 3) { il2 = il << 1; } else { il2 = il; } /* Call DLARUV to generate IL2 numbers from a uniform (0,1) */ /* distribution (IL2 <= LV) */ _starpu_dlaruv_(&iseed[1], &il2, u); if (*idist == 1) { /* Copy generated numbers */ i__2 = il; for (i__ = 1; i__ <= i__2; ++i__) { x[iv + i__ - 1] = u[i__ - 1]; /* L10: */ } } else if (*idist == 2) { /* Convert generated numbers to uniform (-1,1) distribution */ i__2 = il; for (i__ = 1; i__ <= i__2; ++i__) { x[iv + i__ - 1] = u[i__ - 1] * 2. - 1.; /* L20: */ } } else if (*idist == 3) { /* Convert generated numbers to normal (0,1) distribution */ i__2 = il; for (i__ = 1; i__ <= i__2; ++i__) { x[iv + i__ - 1] = sqrt(log(u[(i__ << 1) - 2]) * -2.) * cos(u[( i__ << 1) - 1] * 6.2831853071795864769252867663); /* L30: */ } } /* L40: */ } return 0; /* End of DLARNV */ } /* _starpu_dlarnv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarra.c000066400000000000000000000112351507764646700206430ustar00rootroot00000000000000/* dlarra.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarra_(integer *n, doublereal *d__, doublereal *e, doublereal *e2, doublereal *spltol, doublereal *tnrm, integer *nsplit, integer *isplit, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal tmp1, eabs; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Compute the splitting points with threshold SPLTOL. */ /* DLARRA sets any "small" off-diagonal elements to zero. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. N > 0. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* On entry, the N diagonal elements of the tridiagonal */ /* matrix T. */ /* E (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the first (N-1) entries contain the subdiagonal */ /* elements of the tridiagonal matrix T; E(N) need not be set. */ /* On exit, the entries E( ISPLIT( I ) ), 1 <= I <= NSPLIT, */ /* are set to zero, the other entries of E are untouched. */ /* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the first (N-1) entries contain the SQUARES of the */ /* subdiagonal elements of the tridiagonal matrix T; */ /* E2(N) need not be set. */ /* On exit, the entries E2( ISPLIT( I ) ), */ /* 1 <= I <= NSPLIT, have been set to zero */ /* SPLTOL (input) DOUBLE PRECISION */ /* The threshold for splitting. Two criteria can be used: */ /* SPLTOL<0 : criterion based on absolute off-diagonal value */ /* SPLTOL>0 : criterion that preserves relative accuracy */ /* TNRM (input) DOUBLE PRECISION */ /* The norm of the matrix. */ /* NSPLIT (output) INTEGER */ /* The number of blocks T splits into. 1 <= NSPLIT <= N. */ /* ISPLIT (output) INTEGER array, dimension (N) */ /* The splitting points, at which T breaks up into blocks. */ /* The first block consists of rows/columns 1 to ISPLIT(1), */ /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ /* etc., and the NSPLIT-th consists of rows/columns */ /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --isplit; --e2; --e; --d__; /* Function Body */ *info = 0; /* Compute splitting points */ *nsplit = 1; if (*spltol < 0.) { /* Criterion based on absolute off-diagonal value */ tmp1 = abs(*spltol) * *tnrm; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { eabs = (d__1 = e[i__], abs(d__1)); if (eabs <= tmp1) { e[i__] = 0.; e2[i__] = 0.; isplit[*nsplit] = i__; ++(*nsplit); } /* L9: */ } } else { /* Criterion that guarantees relative accuracy */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { eabs = (d__1 = e[i__], abs(d__1)); if (eabs <= *spltol * sqrt((d__1 = d__[i__], abs(d__1))) * sqrt(( d__2 = d__[i__ + 1], abs(d__2)))) { e[i__] = 0.; e2[i__] = 0.; isplit[*nsplit] = i__; ++(*nsplit); } /* L10: */ } } isplit[*nsplit] = *n; return 0; /* End of DLARRA */ } /* _starpu_dlarra_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarrb.c000066400000000000000000000250221507764646700206430ustar00rootroot00000000000000/* dlarrb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarrb_(integer *n, doublereal *d__, doublereal *lld, integer *ifirst, integer *ilast, doublereal *rtol1, doublereal *rtol2, integer *offset, doublereal *w, doublereal *wgap, doublereal *werr, doublereal *work, integer *iwork, doublereal *pivmin, doublereal * spdiam, integer *twist, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ double log(doublereal); /* Local variables */ integer i__, k, r__, i1, ii, ip; doublereal gap, mid, tmp, back, lgap, rgap, left; integer iter, nint, prev, next; doublereal cvrgd, right, width; extern integer _starpu_dlaneg_(integer *, doublereal *, doublereal *, doublereal * , doublereal *, integer *); integer negcnt; doublereal mnwdth; integer olnint, maxitr; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Given the relatively robust representation(RRR) L D L^T, DLARRB */ /* does "limited" bisection to refine the eigenvalues of L D L^T, */ /* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ /* guesses for these eigenvalues are input in W, the corresponding estimate */ /* of the error in these guesses and their gaps are input in WERR */ /* and WGAP, respectively. During bisection, intervals */ /* [left, right] are maintained by storing their mid-points and */ /* semi-widths in the arrays W and WERR respectively. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The N diagonal elements of the diagonal matrix D. */ /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (N-1) elements L(i)*L(i)*D(i). */ /* IFIRST (input) INTEGER */ /* The index of the first eigenvalue to be computed. */ /* ILAST (input) INTEGER */ /* The index of the last eigenvalue to be computed. */ /* RTOL1 (input) DOUBLE PRECISION */ /* RTOL2 (input) DOUBLE PRECISION */ /* Tolerance for the convergence of the bisection intervals. */ /* An interval [LEFT,RIGHT] has converged if */ /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ /* where GAP is the (estimated) distance to the nearest */ /* eigenvalue. */ /* OFFSET (input) INTEGER */ /* Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET */ /* through ILAST-OFFSET elements of these arrays are to be used. */ /* W (input/output) DOUBLE PRECISION array, dimension (N) */ /* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ /* estimates of the eigenvalues of L D L^T indexed IFIRST throug */ /* ILAST. */ /* On output, these estimates are refined. */ /* WGAP (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On input, the (estimated) gaps between consecutive */ /* eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between */ /* eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST */ /* then WGAP(IFIRST-OFFSET) must be set to ZERO. */ /* On output, these gaps are refined. */ /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ /* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ /* the errors in the estimates of the corresponding elements in W. */ /* On output, these errors are refined. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* Workspace. */ /* IWORK (workspace) INTEGER array, dimension (2*N) */ /* Workspace. */ /* PIVMIN (input) DOUBLE PRECISION */ /* The minimum pivot in the Sturm sequence. */ /* SPDIAM (input) DOUBLE PRECISION */ /* The spectral diameter of the matrix. */ /* TWIST (input) INTEGER */ /* The twist index for the twisted factorization that is used */ /* for the negcount. */ /* TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T */ /* TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T */ /* TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) */ /* INFO (output) INTEGER */ /* Error flag. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --iwork; --work; --werr; --wgap; --w; --lld; --d__; /* Function Body */ *info = 0; maxitr = (integer) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + 2; mnwdth = *pivmin * 2.; r__ = *twist; if (r__ < 1 || r__ > *n) { r__ = *n; } /* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ /* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ /* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */ /* for an unconverged interval is set to the index of the next unconverged */ /* interval, and is -1 or 0 for a converged interval. Thus a linked */ /* list of unconverged intervals is set up. */ i1 = *ifirst; /* The number of unconverged intervals */ nint = 0; /* The last unconverged interval found */ prev = 0; rgap = wgap[i1 - *offset]; i__1 = *ilast; for (i__ = i1; i__ <= i__1; ++i__) { k = i__ << 1; ii = i__ - *offset; left = w[ii] - werr[ii]; right = w[ii] + werr[ii]; lgap = rgap; rgap = wgap[ii]; gap = min(lgap,rgap); /* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ /* Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT */ /* Do while( NEGCNT(LEFT).GT.I-1 ) */ back = werr[ii]; L20: negcnt = _starpu_dlaneg_(n, &d__[1], &lld[1], &left, pivmin, &r__); if (negcnt > i__ - 1) { left -= back; back *= 2.; goto L20; } /* Do while( NEGCNT(RIGHT).LT.I ) */ /* Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT */ back = werr[ii]; L50: negcnt = _starpu_dlaneg_(n, &d__[1], &lld[1], &right, pivmin, &r__); if (negcnt < i__) { right += back; back *= 2.; goto L50; } width = (d__1 = left - right, abs(d__1)) * .5; /* Computing MAX */ d__1 = abs(left), d__2 = abs(right); tmp = max(d__1,d__2); /* Computing MAX */ d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; cvrgd = max(d__1,d__2); if (width <= cvrgd || width <= mnwdth) { /* This interval has already converged and does not need refinement. */ /* (Note that the gaps might change through refining the */ /* eigenvalues, however, they can only get bigger.) */ /* Remove it from the list. */ iwork[k - 1] = -1; /* Make sure that I1 always points to the first unconverged interval */ if (i__ == i1 && i__ < *ilast) { i1 = i__ + 1; } if (prev >= i1 && i__ <= *ilast) { iwork[(prev << 1) - 1] = i__ + 1; } } else { /* unconverged interval found */ prev = i__; ++nint; iwork[k - 1] = i__ + 1; iwork[k] = negcnt; } work[k - 1] = left; work[k] = right; /* L75: */ } /* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ /* and while (ITER.LT.MAXITR) */ iter = 0; L80: prev = i1 - 1; i__ = i1; olnint = nint; i__1 = olnint; for (ip = 1; ip <= i__1; ++ip) { k = i__ << 1; ii = i__ - *offset; rgap = wgap[ii]; lgap = rgap; if (ii > 1) { lgap = wgap[ii - 1]; } gap = min(lgap,rgap); next = iwork[k - 1]; left = work[k - 1]; right = work[k]; mid = (left + right) * .5; /* semiwidth of interval */ width = right - mid; /* Computing MAX */ d__1 = abs(left), d__2 = abs(right); tmp = max(d__1,d__2); /* Computing MAX */ d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; cvrgd = max(d__1,d__2); if (width <= cvrgd || width <= mnwdth || iter == maxitr) { /* reduce number of unconverged intervals */ --nint; /* Mark interval as converged. */ iwork[k - 1] = 0; if (i1 == i__) { i1 = next; } else { /* Prev holds the last unconverged interval previously examined */ if (prev >= i1) { iwork[(prev << 1) - 1] = next; } } i__ = next; goto L100; } prev = i__; /* Perform one bisection step */ negcnt = _starpu_dlaneg_(n, &d__[1], &lld[1], &mid, pivmin, &r__); if (negcnt <= i__ - 1) { work[k - 1] = mid; } else { work[k] = mid; } i__ = next; L100: ; } ++iter; /* do another loop if there are still unconverged intervals */ /* However, in the last iteration, all intervals are accepted */ /* since this is the best we can do. */ if (nint > 0 && iter <= maxitr) { goto L80; } /* At this point, all the intervals have converged */ i__1 = *ilast; for (i__ = *ifirst; i__ <= i__1; ++i__) { k = i__ << 1; ii = i__ - *offset; /* All intervals marked by '0' have been refined. */ if (iwork[k - 1] == 0) { w[ii] = (work[k - 1] + work[k]) * .5; werr[ii] = work[k] - w[ii]; } /* L110: */ } i__1 = *ilast; for (i__ = *ifirst + 1; i__ <= i__1; ++i__) { k = i__ << 1; ii = i__ - *offset; /* Computing MAX */ d__1 = 0., d__2 = w[ii] - werr[ii] - w[ii - 1] - werr[ii - 1]; wgap[ii - 1] = max(d__1,d__2); /* L111: */ } return 0; /* End of DLARRB */ } /* _starpu_dlarrb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarrc.c000066400000000000000000000112541507764646700206460ustar00rootroot00000000000000/* dlarrc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarrc_(char *jobt, integer *n, doublereal *vl, doublereal *vu, doublereal *d__, doublereal *e, doublereal *pivmin, integer *eigcnt, integer *lcnt, integer *rcnt, integer *info) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ integer i__; doublereal sl, su, tmp, tmp2; logical matt; extern logical _starpu_lsame_(char *, char *); doublereal lpivot, rpivot; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Find the number of eigenvalues of the symmetric tridiagonal matrix T */ /* that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T */ /* if JOBT = 'L'. */ /* Arguments */ /* ========= */ /* JOBT (input) CHARACTER*1 */ /* = 'T': Compute Sturm count for matrix T. */ /* = 'L': Compute Sturm count for matrix L D L^T. */ /* N (input) INTEGER */ /* The order of the matrix. N > 0. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* The lower and upper bounds for the eigenvalues. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* JOBT = 'T': The N diagonal elements of the tridiagonal matrix T. */ /* JOBT = 'L': The N diagonal elements of the diagonal matrix D. */ /* E (input) DOUBLE PRECISION array, dimension (N) */ /* JOBT = 'T': The N-1 offdiagonal elements of the matrix T. */ /* JOBT = 'L': The N-1 offdiagonal elements of the matrix L. */ /* PIVMIN (input) DOUBLE PRECISION */ /* The minimum pivot in the Sturm sequence for T. */ /* EIGCNT (output) INTEGER */ /* The number of eigenvalues of the symmetric tridiagonal matrix T */ /* that are in the interval (VL,VU] */ /* LCNT (output) INTEGER */ /* RCNT (output) INTEGER */ /* The left and right negcounts of the interval. */ /* INFO (output) INTEGER */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --e; --d__; /* Function Body */ *info = 0; *lcnt = 0; *rcnt = 0; *eigcnt = 0; matt = _starpu_lsame_(jobt, "T"); if (matt) { /* Sturm sequence count on T */ lpivot = d__[1] - *vl; rpivot = d__[1] - *vu; if (lpivot <= 0.) { ++(*lcnt); } if (rpivot <= 0.) { ++(*rcnt); } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing 2nd power */ d__1 = e[i__]; tmp = d__1 * d__1; lpivot = d__[i__ + 1] - *vl - tmp / lpivot; rpivot = d__[i__ + 1] - *vu - tmp / rpivot; if (lpivot <= 0.) { ++(*lcnt); } if (rpivot <= 0.) { ++(*rcnt); } /* L10: */ } } else { /* Sturm sequence count on L D L^T */ sl = -(*vl); su = -(*vu); i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { lpivot = d__[i__] + sl; rpivot = d__[i__] + su; if (lpivot <= 0.) { ++(*lcnt); } if (rpivot <= 0.) { ++(*rcnt); } tmp = e[i__] * d__[i__] * e[i__]; tmp2 = tmp / lpivot; if (tmp2 == 0.) { sl = tmp - *vl; } else { sl = sl * tmp2 - *vl; } tmp2 = tmp / rpivot; if (tmp2 == 0.) { su = tmp - *vu; } else { su = su * tmp2 - *vu; } /* L20: */ } lpivot = d__[*n] + sl; rpivot = d__[*n] + su; if (lpivot <= 0.) { ++(*lcnt); } if (rpivot <= 0.) { ++(*rcnt); } } *eigcnt = *rcnt - *lcnt; return 0; /* end of DLARRC */ } /* _starpu_dlarrc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarrd.c000066400000000000000000000635231507764646700206550ustar00rootroot00000000000000/* dlarrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; static integer c__0 = 0; /* Subroutine */ int _starpu_dlarrd_(char *range, char *order, integer *n, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *gers, doublereal *reltol, doublereal *d__, doublereal *e, doublereal *e2, doublereal *pivmin, integer *nsplit, integer *isplit, integer *m, doublereal *w, doublereal *werr, doublereal *wl, doublereal *wu, integer *iblock, integer *indexw, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer i__1, i__2, i__3; doublereal d__1, d__2; /* Builtin functions */ double log(doublereal); /* Local variables */ integer i__, j, ib, ie, je, nb; doublereal gl; integer im, in; doublereal gu; integer iw, jee; doublereal eps; integer nwl; doublereal wlu, wul; integer nwu; doublereal tmp1, tmp2; integer iend, jblk, ioff, iout, itmp1, itmp2, jdisc; extern logical _starpu_lsame_(char *, char *); integer iinfo; doublereal atoli; integer iwoff, itmax; doublereal wkill, rtoli, uflow, tnorm; extern doublereal _starpu_dlamch_(char *); integer ibegin; extern /* Subroutine */ int _starpu_dlaebz_(integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer irange, idiscl, idumma[1]; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer idiscu; logical ncnvrg, toofew; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* -- April 2009 -- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARRD computes the eigenvalues of a symmetric tridiagonal */ /* matrix T to suitable accuracy. This is an auxiliary code to be */ /* called from DSTEMR. */ /* The user may ask for all eigenvalues, all eigenvalues */ /* in the half-open interval (VL, VU], or the IL-th through IU-th */ /* eigenvalues. */ /* To avoid overflow, the matrix must be scaled so that its */ /* largest element is no greater than overflow**(1/2) * */ /* underflow**(1/4) in absolute value, and for greatest */ /* accuracy, it should not be much smaller than that. */ /* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ /* Matrix", Report CS41, Computer Science Dept., Stanford */ /* University, July 21, 1966. */ /* Arguments */ /* ========= */ /* RANGE (input) CHARACTER */ /* = 'A': ("All") all eigenvalues will be found. */ /* = 'V': ("Value") all eigenvalues in the half-open interval */ /* (VL, VU] will be found. */ /* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ /* entire matrix) will be found. */ /* ORDER (input) CHARACTER */ /* = 'B': ("By Block") the eigenvalues will be grouped by */ /* split-off block (see IBLOCK, ISPLIT) and */ /* ordered from smallest to largest within */ /* the block. */ /* = 'E': ("Entire matrix") */ /* the eigenvalues for the entire matrix */ /* will be ordered from smallest to */ /* largest. */ /* N (input) INTEGER */ /* The order of the tridiagonal matrix T. N >= 0. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. Eigenvalues less than or equal */ /* to VL, or greater than VU, will not be returned. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ /* is (GERS(2*i-1), GERS(2*i)). */ /* RELTOL (input) DOUBLE PRECISION */ /* The minimum relative width of an interval. When an interval */ /* is narrower than RELTOL times the larger (in */ /* magnitude) endpoint, then it is considered to be */ /* sufficiently small, i.e., converged. Note: this should */ /* always be at least radix*machine epsilon. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the tridiagonal matrix T. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) off-diagonal elements of the tridiagonal matrix T. */ /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */ /* PIVMIN (input) DOUBLE PRECISION */ /* The minimum pivot allowed in the Sturm sequence for T. */ /* NSPLIT (input) INTEGER */ /* The number of diagonal blocks in the matrix T. */ /* 1 <= NSPLIT <= N. */ /* ISPLIT (input) INTEGER array, dimension (N) */ /* The splitting points, at which T breaks up into submatrices. */ /* The first submatrix consists of rows/columns 1 to ISPLIT(1), */ /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ /* etc., and the NSPLIT-th consists of rows/columns */ /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ /* (Only the first NSPLIT elements will actually be used, but */ /* since the user cannot know a priori what value NSPLIT will */ /* have, N words must be reserved for ISPLIT.) */ /* M (output) INTEGER */ /* The actual number of eigenvalues found. 0 <= M <= N. */ /* (See also the description of INFO=2,3.) */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, the first M elements of W will contain the */ /* eigenvalue approximations. DLARRD computes an interval */ /* I_j = (a_j, b_j] that includes eigenvalue j. The eigenvalue */ /* approximation is given as the interval midpoint */ /* W(j)= ( a_j + b_j)/2. The corresponding error is bounded by */ /* WERR(j) = abs( a_j - b_j)/2 */ /* WERR (output) DOUBLE PRECISION array, dimension (N) */ /* The error bound on the corresponding eigenvalue approximation */ /* in W. */ /* WL (output) DOUBLE PRECISION */ /* WU (output) DOUBLE PRECISION */ /* The interval (WL, WU] contains all the wanted eigenvalues. */ /* If RANGE='V', then WL=VL and WU=VU. */ /* If RANGE='A', then WL and WU are the global Gerschgorin bounds */ /* on the spectrum. */ /* If RANGE='I', then WL and WU are computed by DLAEBZ from the */ /* index range specified. */ /* IBLOCK (output) INTEGER array, dimension (N) */ /* At each row/column j where E(j) is zero or small, the */ /* matrix T is considered to split into a block diagonal */ /* matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which */ /* block (from 1 to the number of blocks) the eigenvalue W(i) */ /* belongs. (DLARRD may use the remaining N-M elements as */ /* workspace.) */ /* INDEXW (output) INTEGER array, dimension (N) */ /* The indices of the eigenvalues within each block (submatrix); */ /* for example, INDEXW(i)= j and IBLOCK(i)=k imply that the */ /* i-th eigenvalue W(i) is the j-th eigenvalue in block k. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* IWORK (workspace) INTEGER array, dimension (3*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: some or all of the eigenvalues failed to converge or */ /* were not computed: */ /* =1 or 3: Bisection failed to converge for some */ /* eigenvalues; these eigenvalues are flagged by a */ /* negative block number. The effect is that the */ /* eigenvalues may not be as accurate as the */ /* absolute and relative tolerances. This is */ /* generally caused by unexpectedly inaccurate */ /* arithmetic. */ /* =2 or 3: RANGE='I' only: Not all of the eigenvalues */ /* IL:IU were found. */ /* Effect: M < IU+1-IL */ /* Cause: non-monotonic arithmetic, causing the */ /* Sturm sequence to be non-monotonic. */ /* Cure: recalculate, using RANGE='A', and pick */ /* out eigenvalues IL:IU. In some cases, */ /* increasing the PARAMETER "FUDGE" may */ /* make things work. */ /* = 4: RANGE='I', and the Gershgorin interval */ /* initially used was too small. No eigenvalues */ /* were computed. */ /* Probable cause: your machine has sloppy */ /* floating-point arithmetic. */ /* Cure: Increase the PARAMETER "FUDGE", */ /* recompile, and try again. */ /* Internal Parameters */ /* =================== */ /* FUDGE DOUBLE PRECISION, default = 2 */ /* A "fudge factor" to widen the Gershgorin intervals. Ideally, */ /* a value of 1 should work, but on machines with sloppy */ /* arithmetic, this needs to be larger. The default for */ /* publicly released versions should be large enough to handle */ /* the worst machine around. Note that this has no effect */ /* on accuracy of the solution. */ /* Based on contributions by */ /* W. Kahan, University of California, Berkeley, USA */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --iwork; --work; --indexw; --iblock; --werr; --w; --isplit; --e2; --e; --d__; --gers; /* Function Body */ *info = 0; /* Decode RANGE */ if (_starpu_lsame_(range, "A")) { irange = 1; } else if (_starpu_lsame_(range, "V")) { irange = 2; } else if (_starpu_lsame_(range, "I")) { irange = 3; } else { irange = 0; } /* Check for Errors */ if (irange <= 0) { *info = -1; } else if (! (_starpu_lsame_(order, "B") || _starpu_lsame_(order, "E"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (irange == 2) { if (*vl >= *vu) { *info = -5; } } else if (irange == 3 && (*il < 1 || *il > max(1,*n))) { *info = -6; } else if (irange == 3 && (*iu < min(*n,*il) || *iu > *n)) { *info = -7; } if (*info != 0) { return 0; } /* Initialize error flags */ *info = 0; ncnvrg = FALSE_; toofew = FALSE_; /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } /* Simplification: */ if (irange == 3 && *il == 1 && *iu == *n) { irange = 1; } /* Get machine constants */ eps = _starpu_dlamch_("P"); uflow = _starpu_dlamch_("U"); /* Special Case when N=1 */ /* Treat case of 1x1 matrix for quick return */ if (*n == 1) { if (irange == 1 || irange == 2 && d__[1] > *vl && d__[1] <= *vu || irange == 3 && *il == 1 && *iu == 1) { *m = 1; w[1] = d__[1]; /* The computation error of the eigenvalue is zero */ werr[1] = 0.; iblock[1] = 1; indexw[1] = 1; } return 0; } /* NB is the minimum vector length for vector bisection, or 0 */ /* if only scalar is to be done. */ nb = _starpu_ilaenv_(&c__1, "DSTEBZ", " ", n, &c_n1, &c_n1, &c_n1); if (nb <= 1) { nb = 0; } /* Find global spectral radius */ gl = d__[1]; gu = d__[1]; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MIN */ d__1 = gl, d__2 = gers[(i__ << 1) - 1]; gl = min(d__1,d__2); /* Computing MAX */ d__1 = gu, d__2 = gers[i__ * 2]; gu = max(d__1,d__2); /* L5: */ } /* Compute global Gerschgorin bounds and spectral diameter */ /* Computing MAX */ d__1 = abs(gl), d__2 = abs(gu); tnorm = max(d__1,d__2); gl = gl - tnorm * 2. * eps * *n - *pivmin * 4.; gu = gu + tnorm * 2. * eps * *n + *pivmin * 4.; /* [JAN/28/2009] remove the line below since SPDIAM variable not use */ /* SPDIAM = GU - GL */ /* Input arguments for DLAEBZ: */ /* The relative tolerance. An interval (a,b] lies within */ /* "relative tolerance" if b-a < RELTOL*max(|a|,|b|), */ rtoli = *reltol; /* Set the absolute tolerance for interval convergence to zero to force */ /* interval convergence based on relative size of the interval. */ /* This is dangerous because intervals might not converge when RELTOL is */ /* small. But at least a very small number should be selected so that for */ /* strongly graded matrices, the code can get relatively accurate */ /* eigenvalues. */ atoli = uflow * 4. + *pivmin * 4.; if (irange == 3) { /* RANGE='I': Compute an interval containing eigenvalues */ /* IL through IU. The initial interval [GL,GU] from the global */ /* Gerschgorin bounds GL and GU is refined by DLAEBZ. */ itmax = (integer) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 2; work[*n + 1] = gl; work[*n + 2] = gl; work[*n + 3] = gu; work[*n + 4] = gu; work[*n + 5] = gl; work[*n + 6] = gu; iwork[1] = -1; iwork[2] = -1; iwork[3] = *n + 1; iwork[4] = *n + 1; iwork[5] = *il - 1; iwork[6] = *iu; _starpu_dlaebz_(&c__3, &itmax, n, &c__2, &c__2, &nb, &atoli, &rtoli, pivmin, & d__[1], &e[1], &e2[1], &iwork[5], &work[*n + 1], &work[*n + 5] , &iout, &iwork[1], &w[1], &iblock[1], &iinfo); if (iinfo != 0) { *info = iinfo; return 0; } /* On exit, output intervals may not be ordered by ascending negcount */ if (iwork[6] == *iu) { *wl = work[*n + 1]; wlu = work[*n + 3]; nwl = iwork[1]; *wu = work[*n + 4]; wul = work[*n + 2]; nwu = iwork[4]; } else { *wl = work[*n + 2]; wlu = work[*n + 4]; nwl = iwork[2]; *wu = work[*n + 3]; wul = work[*n + 1]; nwu = iwork[3]; } /* On exit, the interval [WL, WLU] contains a value with negcount NWL, */ /* and [WUL, WU] contains a value with negcount NWU. */ if (nwl < 0 || nwl >= *n || nwu < 1 || nwu > *n) { *info = 4; return 0; } } else if (irange == 2) { *wl = *vl; *wu = *vu; } else if (irange == 1) { *wl = gl; *wu = gu; } /* Find Eigenvalues -- Loop Over blocks and recompute NWL and NWU. */ /* NWL accumulates the number of eigenvalues .le. WL, */ /* NWU accumulates the number of eigenvalues .le. WU */ *m = 0; iend = 0; *info = 0; nwl = 0; nwu = 0; i__1 = *nsplit; for (jblk = 1; jblk <= i__1; ++jblk) { ioff = iend; ibegin = ioff + 1; iend = isplit[jblk]; in = iend - ioff; if (in == 1) { /* 1x1 block */ if (*wl >= d__[ibegin] - *pivmin) { ++nwl; } if (*wu >= d__[ibegin] - *pivmin) { ++nwu; } if (irange == 1 || *wl < d__[ibegin] - *pivmin && *wu >= d__[ ibegin] - *pivmin) { ++(*m); w[*m] = d__[ibegin]; werr[*m] = 0.; /* The gap for a single block doesn't matter for the later */ /* algorithm and is assigned an arbitrary large value */ iblock[*m] = jblk; indexw[*m] = 1; } /* Disabled 2x2 case because of a failure on the following matrix */ /* RANGE = 'I', IL = IU = 4 */ /* Original Tridiagonal, d = [ */ /* -0.150102010615740E+00 */ /* -0.849897989384260E+00 */ /* -0.128208148052635E-15 */ /* 0.128257718286320E-15 */ /* ]; */ /* e = [ */ /* -0.357171383266986E+00 */ /* -0.180411241501588E-15 */ /* -0.175152352710251E-15 */ /* ]; */ /* ELSE IF( IN.EQ.2 ) THEN */ /* * 2x2 block */ /* DISC = SQRT( (HALF*(D(IBEGIN)-D(IEND)))**2 + E(IBEGIN)**2 ) */ /* TMP1 = HALF*(D(IBEGIN)+D(IEND)) */ /* L1 = TMP1 - DISC */ /* IF( WL.GE. L1-PIVMIN ) */ /* $ NWL = NWL + 1 */ /* IF( WU.GE. L1-PIVMIN ) */ /* $ NWU = NWU + 1 */ /* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L1-PIVMIN .AND. WU.GE. */ /* $ L1-PIVMIN ) ) THEN */ /* M = M + 1 */ /* W( M ) = L1 */ /* * The uncertainty of eigenvalues of a 2x2 matrix is very small */ /* WERR( M ) = EPS * ABS( W( M ) ) * TWO */ /* IBLOCK( M ) = JBLK */ /* INDEXW( M ) = 1 */ /* ENDIF */ /* L2 = TMP1 + DISC */ /* IF( WL.GE. L2-PIVMIN ) */ /* $ NWL = NWL + 1 */ /* IF( WU.GE. L2-PIVMIN ) */ /* $ NWU = NWU + 1 */ /* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L2-PIVMIN .AND. WU.GE. */ /* $ L2-PIVMIN ) ) THEN */ /* M = M + 1 */ /* W( M ) = L2 */ /* * The uncertainty of eigenvalues of a 2x2 matrix is very small */ /* WERR( M ) = EPS * ABS( W( M ) ) * TWO */ /* IBLOCK( M ) = JBLK */ /* INDEXW( M ) = 2 */ /* ENDIF */ } else { /* General Case - block of size IN >= 2 */ /* Compute local Gerschgorin interval and use it as the initial */ /* interval for DLAEBZ */ gu = d__[ibegin]; gl = d__[ibegin]; tmp1 = 0.; i__2 = iend; for (j = ibegin; j <= i__2; ++j) { /* Computing MIN */ d__1 = gl, d__2 = gers[(j << 1) - 1]; gl = min(d__1,d__2); /* Computing MAX */ d__1 = gu, d__2 = gers[j * 2]; gu = max(d__1,d__2); /* L40: */ } /* [JAN/28/2009] */ /* change SPDIAM by TNORM in lines 2 and 3 thereafter */ /* line 1: remove computation of SPDIAM (not useful anymore) */ /* SPDIAM = GU - GL */ /* GL = GL - FUDGE*SPDIAM*EPS*IN - FUDGE*PIVMIN */ /* GU = GU + FUDGE*SPDIAM*EPS*IN + FUDGE*PIVMIN */ gl = gl - tnorm * 2. * eps * in - *pivmin * 2.; gu = gu + tnorm * 2. * eps * in + *pivmin * 2.; if (irange > 1) { if (gu < *wl) { /* the local block contains none of the wanted eigenvalues */ nwl += in; nwu += in; goto L70; } /* refine search interval if possible, only range (WL,WU] matters */ gl = max(gl,*wl); gu = min(gu,*wu); if (gl >= gu) { goto L70; } } /* Find negcount of initial interval boundaries GL and GU */ work[*n + 1] = gl; work[*n + in + 1] = gu; _starpu_dlaebz_(&c__1, &c__0, &in, &in, &c__1, &nb, &atoli, &rtoli, pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, & work[*n + 1], &work[*n + (in << 1) + 1], &im, &iwork[1], & w[*m + 1], &iblock[*m + 1], &iinfo); if (iinfo != 0) { *info = iinfo; return 0; } nwl += iwork[1]; nwu += iwork[in + 1]; iwoff = *m - iwork[1]; /* Compute Eigenvalues */ itmax = (integer) ((log(gu - gl + *pivmin) - log(*pivmin)) / log( 2.)) + 2; _starpu_dlaebz_(&c__2, &itmax, &in, &in, &c__1, &nb, &atoli, &rtoli, pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, & work[*n + 1], &work[*n + (in << 1) + 1], &iout, &iwork[1], &w[*m + 1], &iblock[*m + 1], &iinfo); if (iinfo != 0) { *info = iinfo; return 0; } /* Copy eigenvalues into W and IBLOCK */ /* Use -JBLK for block number for unconverged eigenvalues. */ /* Loop over the number of output intervals from DLAEBZ */ i__2 = iout; for (j = 1; j <= i__2; ++j) { /* eigenvalue approximation is middle point of interval */ tmp1 = (work[j + *n] + work[j + in + *n]) * .5; /* semi length of error interval */ tmp2 = (d__1 = work[j + *n] - work[j + in + *n], abs(d__1)) * .5; if (j > iout - iinfo) { /* Flag non-convergence. */ ncnvrg = TRUE_; ib = -jblk; } else { ib = jblk; } i__3 = iwork[j + in] + iwoff; for (je = iwork[j] + 1 + iwoff; je <= i__3; ++je) { w[je] = tmp1; werr[je] = tmp2; indexw[je] = je - iwoff; iblock[je] = ib; /* L50: */ } /* L60: */ } *m += im; } L70: ; } /* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU */ /* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. */ if (irange == 3) { idiscl = *il - 1 - nwl; idiscu = nwu - *iu; if (idiscl > 0) { im = 0; i__1 = *m; for (je = 1; je <= i__1; ++je) { /* Remove some of the smallest eigenvalues from the left so that */ /* at the end IDISCL =0. Move all eigenvalues up to the left. */ if (w[je] <= wlu && idiscl > 0) { --idiscl; } else { ++im; w[im] = w[je]; werr[im] = werr[je]; indexw[im] = indexw[je]; iblock[im] = iblock[je]; } /* L80: */ } *m = im; } if (idiscu > 0) { /* Remove some of the largest eigenvalues from the right so that */ /* at the end IDISCU =0. Move all eigenvalues up to the left. */ im = *m + 1; for (je = *m; je >= 1; --je) { if (w[je] >= wul && idiscu > 0) { --idiscu; } else { --im; w[im] = w[je]; werr[im] = werr[je]; indexw[im] = indexw[je]; iblock[im] = iblock[je]; } /* L81: */ } jee = 0; i__1 = *m; for (je = im; je <= i__1; ++je) { ++jee; w[jee] = w[je]; werr[jee] = werr[je]; indexw[jee] = indexw[je]; iblock[jee] = iblock[je]; /* L82: */ } *m = *m - im + 1; } if (idiscl > 0 || idiscu > 0) { /* Code to deal with effects of bad arithmetic. (If N(w) is */ /* monotone non-decreasing, this should never happen.) */ /* Some low eigenvalues to be discarded are not in (WL,WLU], */ /* or high eigenvalues to be discarded are not in (WUL,WU] */ /* so just kill off the smallest IDISCL/largest IDISCU */ /* eigenvalues, by marking the corresponding IBLOCK = 0 */ if (idiscl > 0) { wkill = *wu; i__1 = idiscl; for (jdisc = 1; jdisc <= i__1; ++jdisc) { iw = 0; i__2 = *m; for (je = 1; je <= i__2; ++je) { if (iblock[je] != 0 && (w[je] < wkill || iw == 0)) { iw = je; wkill = w[je]; } /* L90: */ } iblock[iw] = 0; /* L100: */ } } if (idiscu > 0) { wkill = *wl; i__1 = idiscu; for (jdisc = 1; jdisc <= i__1; ++jdisc) { iw = 0; i__2 = *m; for (je = 1; je <= i__2; ++je) { if (iblock[je] != 0 && (w[je] >= wkill || iw == 0)) { iw = je; wkill = w[je]; } /* L110: */ } iblock[iw] = 0; /* L120: */ } } /* Now erase all eigenvalues with IBLOCK set to zero */ im = 0; i__1 = *m; for (je = 1; je <= i__1; ++je) { if (iblock[je] != 0) { ++im; w[im] = w[je]; werr[im] = werr[je]; indexw[im] = indexw[je]; iblock[im] = iblock[je]; } /* L130: */ } *m = im; } if (idiscl < 0 || idiscu < 0) { toofew = TRUE_; } } if (irange == 1 && *m != *n || irange == 3 && *m != *iu - *il + 1) { toofew = TRUE_; } /* If ORDER='B', do nothing the eigenvalues are already sorted by */ /* block. */ /* If ORDER='E', sort the eigenvalues from smallest to largest */ if (_starpu_lsame_(order, "E") && *nsplit > 1) { i__1 = *m - 1; for (je = 1; je <= i__1; ++je) { ie = 0; tmp1 = w[je]; i__2 = *m; for (j = je + 1; j <= i__2; ++j) { if (w[j] < tmp1) { ie = j; tmp1 = w[j]; } /* L140: */ } if (ie != 0) { tmp2 = werr[ie]; itmp1 = iblock[ie]; itmp2 = indexw[ie]; w[ie] = w[je]; werr[ie] = werr[je]; iblock[ie] = iblock[je]; indexw[ie] = indexw[je]; w[je] = tmp1; werr[je] = tmp2; iblock[je] = itmp1; indexw[je] = itmp2; } /* L150: */ } } *info = 0; if (ncnvrg) { ++(*info); } if (toofew) { *info += 2; } return 0; /* End of DLARRD */ } /* _starpu_dlarrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarre.c000066400000000000000000000722231507764646700206530ustar00rootroot00000000000000/* dlarre.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__2 = 2; /* Subroutine */ int _starpu_dlarre_(char *range, integer *n, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *d__, doublereal *e, doublereal *e2, doublereal *rtol1, doublereal *rtol2, doublereal * spltol, integer *nsplit, integer *isplit, integer *m, doublereal *w, doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, doublereal *pivmin, doublereal *work, integer * iwork, integer *info) { /* System generated locals */ integer i__1, i__2; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal), log(doublereal); /* Local variables */ integer i__, j; doublereal s1, s2; integer mb; doublereal gl; integer in, mm; doublereal gu; integer cnt; doublereal eps, tau, tmp, rtl; integer cnt1, cnt2; doublereal tmp1, eabs; integer iend, jblk; doublereal eold; integer indl; doublereal dmax__, emax; integer wend, idum, indu; doublereal rtol; integer iseed[4]; doublereal avgap, sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); logical norep; extern /* Subroutine */ int _starpu_dlasq2_(integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); integer ibegin; logical forceb; integer irange; doublereal sgndef; extern /* Subroutine */ int _starpu_dlarra_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *), _starpu_dlarrb_(integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dlarrc_(char * , integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *); integer wbegin; extern /* Subroutine */ int _starpu_dlarrd_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer * , integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal safmin, spdiam; extern /* Subroutine */ int _starpu_dlarrk_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); logical usedqd; doublereal clwdth, isleft; extern /* Subroutine */ int _starpu_dlarnv_(integer *, integer *, integer *, doublereal *); doublereal isrght, bsrtol, dpivot; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* To find the desired eigenvalues of a given real symmetric */ /* tridiagonal matrix T, DLARRE sets any "small" off-diagonal */ /* elements to zero, and for each unreduced block T_i, it finds */ /* (a) a suitable shift at one end of the block's spectrum, */ /* (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */ /* (c) eigenvalues of each L_i D_i L_i^T. */ /* The representations and eigenvalues found are then used by */ /* DSTEMR to compute the eigenvectors of T. */ /* The accuracy varies depending on whether bisection is used to */ /* find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to */ /* conpute all and then discard any unwanted one. */ /* As an added benefit, DLARRE also outputs the n */ /* Gerschgorin intervals for the matrices L_i D_i L_i^T. */ /* Arguments */ /* ========= */ /* RANGE (input) CHARACTER */ /* = 'A': ("All") all eigenvalues will be found. */ /* = 'V': ("Value") all eigenvalues in the half-open interval */ /* (VL, VU] will be found. */ /* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ /* entire matrix) will be found. */ /* N (input) INTEGER */ /* The order of the matrix. N > 0. */ /* VL (input/output) DOUBLE PRECISION */ /* VU (input/output) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds for the eigenvalues. */ /* Eigenvalues less than or equal to VL, or greater than VU, */ /* will not be returned. VL < VU. */ /* If RANGE='I' or ='A', DLARRE computes bounds on the desired */ /* part of the spectrum. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the N diagonal elements of the tridiagonal */ /* matrix T. */ /* On exit, the N diagonal elements of the diagonal */ /* matrices D_i. */ /* E (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the first (N-1) entries contain the subdiagonal */ /* elements of the tridiagonal matrix T; E(N) need not be set. */ /* On exit, E contains the subdiagonal elements of the unit */ /* bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */ /* 1 <= I <= NSPLIT, contain the base points sigma_i on output. */ /* E2 (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the first (N-1) entries contain the SQUARES of the */ /* subdiagonal elements of the tridiagonal matrix T; */ /* E2(N) need not be set. */ /* On exit, the entries E2( ISPLIT( I ) ), */ /* 1 <= I <= NSPLIT, have been set to zero */ /* RTOL1 (input) DOUBLE PRECISION */ /* RTOL2 (input) DOUBLE PRECISION */ /* Parameters for bisection. */ /* An interval [LEFT,RIGHT] has converged if */ /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ /* SPLTOL (input) DOUBLE PRECISION */ /* The threshold for splitting. */ /* NSPLIT (output) INTEGER */ /* The number of blocks T splits into. 1 <= NSPLIT <= N. */ /* ISPLIT (output) INTEGER array, dimension (N) */ /* The splitting points, at which T breaks up into blocks. */ /* The first block consists of rows/columns 1 to ISPLIT(1), */ /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ /* etc., and the NSPLIT-th consists of rows/columns */ /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ /* M (output) INTEGER */ /* The total number of eigenvalues (of all L_i D_i L_i^T) */ /* found. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* The first M elements contain the eigenvalues. The */ /* eigenvalues of each of the blocks, L_i D_i L_i^T, are */ /* sorted in ascending order ( DLARRE may use the */ /* remaining N-M elements as workspace). */ /* WERR (output) DOUBLE PRECISION array, dimension (N) */ /* The error bound on the corresponding eigenvalue in W. */ /* WGAP (output) DOUBLE PRECISION array, dimension (N) */ /* The separation from the right neighbor eigenvalue in W. */ /* The gap is only with respect to the eigenvalues of the same block */ /* as each block has its own representation tree. */ /* Exception: at the right end of a block we store the left gap */ /* IBLOCK (output) INTEGER array, dimension (N) */ /* The indices of the blocks (submatrices) associated with the */ /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ /* W(i) belongs to the first block from the top, =2 if W(i) */ /* belongs to the second block, etc. */ /* INDEXW (output) INTEGER array, dimension (N) */ /* The indices of the eigenvalues within each block (submatrix); */ /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ /* i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */ /* GERS (output) DOUBLE PRECISION array, dimension (2*N) */ /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ /* is (GERS(2*i-1), GERS(2*i)). */ /* PIVMIN (output) DOUBLE PRECISION */ /* The minimum pivot in the Sturm sequence for T. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */ /* Workspace. */ /* IWORK (workspace) INTEGER array, dimension (5*N) */ /* Workspace. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* > 0: A problem occured in DLARRE. */ /* < 0: One of the called subroutines signaled an internal problem. */ /* Needs inspection of the corresponding parameter IINFO */ /* for further information. */ /* =-1: Problem in DLARRD. */ /* = 2: No base representation could be found in MAXTRY iterations. */ /* Increasing MAXTRY and recompilation might be a remedy. */ /* =-3: Problem in DLARRB when computing the refined root */ /* representation for DLASQ2. */ /* =-4: Problem in DLARRB when preforming bisection on the */ /* desired part of the spectrum. */ /* =-5: Problem in DLASQ2. */ /* =-6: Problem in DLASQ2. */ /* Further Details */ /* The base representations are required to suffer very little */ /* element growth and consequently define all their eigenvalues to */ /* high relative accuracy. */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --iwork; --work; --gers; --indexw; --iblock; --wgap; --werr; --w; --isplit; --e2; --e; --d__; /* Function Body */ *info = 0; /* Decode RANGE */ if (_starpu_lsame_(range, "A")) { irange = 1; } else if (_starpu_lsame_(range, "V")) { irange = 3; } else if (_starpu_lsame_(range, "I")) { irange = 2; } *m = 0; /* Get machine constants */ safmin = _starpu_dlamch_("S"); eps = _starpu_dlamch_("P"); /* Set parameters */ rtl = sqrt(eps); bsrtol = sqrt(eps); /* Treat case of 1x1 matrix for quick return */ if (*n == 1) { if (irange == 1 || irange == 3 && d__[1] > *vl && d__[1] <= *vu || irange == 2 && *il == 1 && *iu == 1) { *m = 1; w[1] = d__[1]; /* The computation error of the eigenvalue is zero */ werr[1] = 0.; wgap[1] = 0.; iblock[1] = 1; indexw[1] = 1; gers[1] = d__[1]; gers[2] = d__[1]; } /* store the shift for the initial RRR, which is zero in this case */ e[1] = 0.; return 0; } /* General case: tridiagonal matrix of order > 1 */ /* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */ /* Compute maximum off-diagonal entry and pivmin. */ gl = d__[1]; gu = d__[1]; eold = 0.; emax = 0.; e[*n] = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { werr[i__] = 0.; wgap[i__] = 0.; eabs = (d__1 = e[i__], abs(d__1)); if (eabs >= emax) { emax = eabs; } tmp1 = eabs + eold; gers[(i__ << 1) - 1] = d__[i__] - tmp1; /* Computing MIN */ d__1 = gl, d__2 = gers[(i__ << 1) - 1]; gl = min(d__1,d__2); gers[i__ * 2] = d__[i__] + tmp1; /* Computing MAX */ d__1 = gu, d__2 = gers[i__ * 2]; gu = max(d__1,d__2); eold = eabs; /* L5: */ } /* The minimum pivot allowed in the Sturm sequence for T */ /* Computing MAX */ /* Computing 2nd power */ d__3 = emax; d__1 = 1., d__2 = d__3 * d__3; *pivmin = safmin * max(d__1,d__2); /* Compute spectral diameter. The Gerschgorin bounds give an */ /* estimate that is wrong by at most a factor of SQRT(2) */ spdiam = gu - gl; /* Compute splitting points */ _starpu_dlarra_(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], & iinfo); /* Can force use of bisection instead of faster DQDS. */ /* Option left in the code for future multisection work. */ forceb = FALSE_; /* Initialize USEDQD, DQDS should be used for ALLRNG unless someone */ /* explicitly wants bisection. */ usedqd = irange == 1 && ! forceb; if (irange == 1 && ! forceb) { /* Set interval [VL,VU] that contains all eigenvalues */ *vl = gl; *vu = gu; } else { /* We call DLARRD to find crude approximations to the eigenvalues */ /* in the desired range. In case IRANGE = INDRNG, we also obtain the */ /* interval (VL,VU] that contains all the wanted eigenvalues. */ /* An interval [LEFT,RIGHT] has converged if */ /* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */ /* DLARRD needs a WORK of size 4*N, IWORK of size 3*N */ _starpu_dlarrd_(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[ 1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1], vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo); if (iinfo != 0) { *info = -1; return 0; } /* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */ i__1 = *n; for (i__ = mm + 1; i__ <= i__1; ++i__) { w[i__] = 0.; werr[i__] = 0.; iblock[i__] = 0; indexw[i__] = 0; /* L14: */ } } /* ** */ /* Loop over unreduced blocks */ ibegin = 1; wbegin = 1; i__1 = *nsplit; for (jblk = 1; jblk <= i__1; ++jblk) { iend = isplit[jblk]; in = iend - ibegin + 1; /* 1 X 1 block */ if (in == 1) { if (irange == 1 || irange == 3 && d__[ibegin] > *vl && d__[ibegin] <= *vu || irange == 2 && iblock[wbegin] == jblk) { ++(*m); w[*m] = d__[ibegin]; werr[*m] = 0.; /* The gap for a single block doesn't matter for the later */ /* algorithm and is assigned an arbitrary large value */ wgap[*m] = 0.; iblock[*m] = jblk; indexw[*m] = 1; ++wbegin; } /* E( IEND ) holds the shift for the initial RRR */ e[iend] = 0.; ibegin = iend + 1; goto L170; } /* Blocks of size larger than 1x1 */ /* E( IEND ) will hold the shift for the initial RRR, for now set it =0 */ e[iend] = 0.; /* Find local outer bounds GL,GU for the block */ gl = d__[ibegin]; gu = d__[ibegin]; i__2 = iend; for (i__ = ibegin; i__ <= i__2; ++i__) { /* Computing MIN */ d__1 = gers[(i__ << 1) - 1]; gl = min(d__1,gl); /* Computing MAX */ d__1 = gers[i__ * 2]; gu = max(d__1,gu); /* L15: */ } spdiam = gu - gl; if (! (irange == 1 && ! forceb)) { /* Count the number of eigenvalues in the current block. */ mb = 0; i__2 = mm; for (i__ = wbegin; i__ <= i__2; ++i__) { if (iblock[i__] == jblk) { ++mb; } else { goto L21; } /* L20: */ } L21: if (mb == 0) { /* No eigenvalue in the current block lies in the desired range */ /* E( IEND ) holds the shift for the initial RRR */ e[iend] = 0.; ibegin = iend + 1; goto L170; } else { /* Decide whether dqds or bisection is more efficient */ usedqd = (doublereal) mb > in * .5 && ! forceb; wend = wbegin + mb - 1; /* Calculate gaps for the current block */ /* In later stages, when representations for individual */ /* eigenvalues are different, we use SIGMA = E( IEND ). */ sigma = 0.; i__2 = wend - 1; for (i__ = wbegin; i__ <= i__2; ++i__) { /* Computing MAX */ d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[i__]); wgap[i__] = max(d__1,d__2); /* L30: */ } /* Computing MAX */ d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); wgap[wend] = max(d__1,d__2); /* Find local index of the first and last desired evalue. */ indl = indexw[wbegin]; indu = indexw[wend]; } } if (irange == 1 && ! forceb || usedqd) { /* Case of DQDS */ /* Find approximations to the extremal eigenvalues of the block */ _starpu_dlarrk_(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & rtl, &tmp, &tmp1, &iinfo); if (iinfo != 0) { *info = -1; return 0; } /* Computing MAX */ d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1, abs(d__1)); isleft = max(d__2,d__3); _starpu_dlarrk_(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, & rtl, &tmp, &tmp1, &iinfo); if (iinfo != 0) { *info = -1; return 0; } /* Computing MIN */ d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1, abs(d__1)); isrght = min(d__2,d__3); /* Improve the estimate of the spectral diameter */ spdiam = isrght - isleft; } else { /* Case of bisection */ /* Find approximations to the wanted extremal eigenvalues */ /* Computing MAX */ d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 = w[wbegin] - werr[wbegin], abs(d__1)); isleft = max(d__2,d__3); /* Computing MIN */ d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[ wend] + werr[wend], abs(d__1)); isrght = min(d__2,d__3); } /* Decide whether the base representation for the current block */ /* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */ /* should be on the left or the right end of the current block. */ /* The strategy is to shift to the end which is "more populated" */ /* Furthermore, decide whether to use DQDS for the computation of */ /* the eigenvalue approximations at the end of DLARRE or bisection. */ /* dqds is chosen if all eigenvalues are desired or the number of */ /* eigenvalues to be computed is large compared to the blocksize. */ if (irange == 1 && ! forceb) { /* If all the eigenvalues have to be computed, we use dqd */ usedqd = TRUE_; /* INDL is the local index of the first eigenvalue to compute */ indl = 1; indu = in; /* MB = number of eigenvalues to compute */ mb = in; wend = wbegin + mb - 1; /* Define 1/4 and 3/4 points of the spectrum */ s1 = isleft + spdiam * .25; s2 = isrght - spdiam * .25; } else { /* DLARRD has computed IBLOCK and INDEXW for each eigenvalue */ /* approximation. */ /* choose sigma */ if (usedqd) { s1 = isleft + spdiam * .25; s2 = isrght - spdiam * .25; } else { tmp = min(isrght,*vu) - max(isleft,*vl); s1 = max(isleft,*vl) + tmp * .25; s2 = min(isrght,*vu) - tmp * .25; } } /* Compute the negcount at the 1/4 and 3/4 points */ if (mb > 1) { _starpu_dlarrc_("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, & cnt, &cnt1, &cnt2, &iinfo); } if (mb == 1) { sigma = gl; sgndef = 1.; } else if (cnt1 - indl >= indu - cnt2) { if (irange == 1 && ! forceb) { sigma = max(isleft,gl); } else if (usedqd) { /* use Gerschgorin bound as shift to get pos def matrix */ /* for dqds */ sigma = isleft; } else { /* use approximation of the first desired eigenvalue of the */ /* block as shift */ sigma = max(isleft,*vl); } sgndef = 1.; } else { if (irange == 1 && ! forceb) { sigma = min(isrght,gu); } else if (usedqd) { /* use Gerschgorin bound as shift to get neg def matrix */ /* for dqds */ sigma = isrght; } else { /* use approximation of the first desired eigenvalue of the */ /* block as shift */ sigma = min(isrght,*vu); } sgndef = -1.; } /* An initial SIGMA has been chosen that will be used for computing */ /* T - SIGMA I = L D L^T */ /* Define the increment TAU of the shift in case the initial shift */ /* needs to be refined to obtain a factorization with not too much */ /* element growth. */ if (usedqd) { /* The initial SIGMA was to the outer end of the spectrum */ /* the matrix is definite and we need not retreat. */ tau = spdiam * eps * *n + *pivmin * 2.; } else { if (mb > 1) { clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin]; avgap = (d__1 = clwdth / (doublereal) (wend - wbegin), abs( d__1)); if (sgndef == 1.) { /* Computing MAX */ d__1 = wgap[wbegin]; tau = max(d__1,avgap) * .5; /* Computing MAX */ d__1 = tau, d__2 = werr[wbegin]; tau = max(d__1,d__2); } else { /* Computing MAX */ d__1 = wgap[wend - 1]; tau = max(d__1,avgap) * .5; /* Computing MAX */ d__1 = tau, d__2 = werr[wend]; tau = max(d__1,d__2); } } else { tau = werr[wbegin]; } } for (idum = 1; idum <= 6; ++idum) { /* Compute L D L^T factorization of tridiagonal matrix T - sigma I. */ /* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */ /* pivots in WORK(2*IN+1:3*IN) */ dpivot = d__[ibegin] - sigma; work[1] = dpivot; dmax__ = abs(work[1]); j = ibegin; i__2 = in - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[(in << 1) + i__] = 1. / work[i__]; tmp = e[j] * work[(in << 1) + i__]; work[in + i__] = tmp; dpivot = d__[j + 1] - sigma - tmp * e[j]; work[i__ + 1] = dpivot; /* Computing MAX */ d__1 = dmax__, d__2 = abs(dpivot); dmax__ = max(d__1,d__2); ++j; /* L70: */ } /* check for element growth */ if (dmax__ > spdiam * 64.) { norep = TRUE_; } else { norep = FALSE_; } if (usedqd && ! norep) { /* Ensure the definiteness of the representation */ /* All entries of D (of L D L^T) must have the same sign */ i__2 = in; for (i__ = 1; i__ <= i__2; ++i__) { tmp = sgndef * work[i__]; if (tmp < 0.) { norep = TRUE_; } /* L71: */ } } if (norep) { /* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */ /* shift which makes the matrix definite. So we should end up */ /* here really only in the case of IRANGE = VALRNG or INDRNG. */ if (idum == 5) { if (sgndef == 1.) { /* The fudged Gerschgorin shift should succeed */ sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.; } else { sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.; } } else { sigma -= sgndef * tau; tau *= 2.; } } else { /* an initial RRR is found */ goto L83; } /* L80: */ } /* if the program reaches this point, no base representation could be */ /* found in MAXTRY iterations. */ *info = 2; return 0; L83: /* At this point, we have found an initial base representation */ /* T - SIGMA I = L D L^T with not too much element growth. */ /* Store the shift. */ e[iend] = sigma; /* Store D and L. */ _starpu_dcopy_(&in, &work[1], &c__1, &d__[ibegin], &c__1); i__2 = in - 1; _starpu_dcopy_(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1); if (mb > 1) { /* Perturb each entry of the base representation by a small */ /* (but random) relative amount to overcome difficulties with */ /* glued matrices. */ for (i__ = 1; i__ <= 4; ++i__) { iseed[i__ - 1] = 1; /* L122: */ } i__2 = (in << 1) - 1; _starpu_dlarnv_(&c__2, iseed, &i__2, &work[1]); i__2 = in - 1; for (i__ = 1; i__ <= i__2; ++i__) { d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.; e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.; /* L125: */ } d__[iend] *= eps * 4. * work[in] + 1.; } /* Don't update the Gerschgorin intervals because keeping track */ /* of the updates would be too much work in DLARRV. */ /* We update W instead and use it to locate the proper Gerschgorin */ /* intervals. */ /* Compute the required eigenvalues of L D L' by bisection or dqds */ if (! usedqd) { /* If DLARRD has been used, shift the eigenvalue approximations */ /* according to their representation. This is necessary for */ /* a uniform DLARRV since dqds computes eigenvalues of the */ /* shifted representation. In DLARRV, W will always hold the */ /* UNshifted eigenvalue approximation. */ i__2 = wend; for (j = wbegin; j <= i__2; ++j) { w[j] -= sigma; werr[j] += (d__1 = w[j], abs(d__1)) * eps; /* L134: */ } /* call DLARRB to reduce eigenvalue error of the approximations */ /* from DLARRD */ i__2 = iend - 1; for (i__ = ibegin; i__ <= i__2; ++i__) { /* Computing 2nd power */ d__1 = e[i__]; work[i__] = d__[i__] * (d__1 * d__1); /* L135: */ } /* use bisection to find EV from INDL to INDU */ i__2 = indl - 1; _starpu_dlarrb_(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1, rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], & work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, & iinfo); if (iinfo != 0) { *info = -4; return 0; } /* DLARRB computes all gaps correctly except for the last one */ /* Record distance to VU/GU */ /* Computing MAX */ d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]); wgap[wend] = max(d__1,d__2); i__2 = indu; for (i__ = indl; i__ <= i__2; ++i__) { ++(*m); iblock[*m] = jblk; indexw[*m] = i__; /* L138: */ } } else { /* Call dqds to get all eigs (and then possibly delete unwanted */ /* eigenvalues). */ /* Note that dqds finds the eigenvalues of the L D L^T representation */ /* of T to high relative accuracy. High relative accuracy */ /* might be lost when the shift of the RRR is subtracted to obtain */ /* the eigenvalues of T. However, T is not guaranteed to define its */ /* eigenvalues to high relative accuracy anyway. */ /* Set RTOL to the order of the tolerance used in DLASQ2 */ /* This is an ESTIMATED error, the worst case bound is 4*N*EPS */ /* which is usually too large and requires unnecessary work to be */ /* done by bisection when computing the eigenvectors */ rtol = log((doublereal) in) * 4. * eps; j = ibegin; i__2 = in - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[(i__ << 1) - 1] = (d__1 = d__[j], abs(d__1)); work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1]; ++j; /* L140: */ } work[(in << 1) - 1] = (d__1 = d__[iend], abs(d__1)); work[in * 2] = 0.; _starpu_dlasq2_(&in, &work[1], &iinfo); if (iinfo != 0) { /* If IINFO = -5 then an index is part of a tight cluster */ /* and should be changed. The index is in IWORK(1) and the */ /* gap is in WORK(N+1) */ *info = -5; return 0; } else { /* Test that all eigenvalues are positive as expected */ i__2 = in; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] < 0.) { *info = -6; return 0; } /* L149: */ } } if (sgndef > 0.) { i__2 = indu; for (i__ = indl; i__ <= i__2; ++i__) { ++(*m); w[*m] = work[in - i__ + 1]; iblock[*m] = jblk; indexw[*m] = i__; /* L150: */ } } else { i__2 = indu; for (i__ = indl; i__ <= i__2; ++i__) { ++(*m); w[*m] = -work[i__]; iblock[*m] = jblk; indexw[*m] = i__; /* L160: */ } } i__2 = *m; for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { /* the value of RTOL below should be the tolerance in DLASQ2 */ werr[i__] = rtol * (d__1 = w[i__], abs(d__1)); /* L165: */ } i__2 = *m - 1; for (i__ = *m - mb + 1; i__ <= i__2; ++i__) { /* compute the right gap between the intervals */ /* Computing MAX */ d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[ i__]); wgap[i__] = max(d__1,d__2); /* L166: */ } /* Computing MAX */ d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]); wgap[*m] = max(d__1,d__2); } /* proceed with next block */ ibegin = iend + 1; wbegin = wend + 1; L170: ; } return 0; /* end of DLARRE */ } /* _starpu_dlarre_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarrf.c000066400000000000000000000321431507764646700206510ustar00rootroot00000000000000/* dlarrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dlarrf_(integer *n, doublereal *d__, doublereal *l, doublereal *ld, integer *clstrt, integer *clend, doublereal *w, doublereal *wgap, doublereal *werr, doublereal *spdiam, doublereal * clgapl, doublereal *clgapr, doublereal *pivmin, doublereal *sigma, doublereal *dplus, doublereal *lplus, doublereal *work, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2, znm2, growthbound, fail, fact, oldp; integer indx; doublereal prod; integer ktry; doublereal fail2, avgap, ldmax, rdmax; integer shift; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); logical dorrr1; extern doublereal _starpu_dlamch_(char *); doublereal ldelta; logical nofail; doublereal mingap, lsigma, rdelta; extern logical _starpu_disnan_(doublereal *); logical forcer; doublereal rsigma, clwdth; logical sawnan1, sawnan2, tryrrr1; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* * */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Given the initial representation L D L^T and its cluster of close */ /* eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */ /* W( CLEND ), DLARRF finds a new relatively robust representation */ /* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */ /* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix (subblock, if the matrix splitted). */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The N diagonal elements of the diagonal matrix D. */ /* L (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (N-1) subdiagonal elements of the unit bidiagonal */ /* matrix L. */ /* LD (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (N-1) elements L(i)*D(i). */ /* CLSTRT (input) INTEGER */ /* The index of the first eigenvalue in the cluster. */ /* CLEND (input) INTEGER */ /* The index of the last eigenvalue in the cluster. */ /* W (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ /* The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */ /* W( CLSTRT ) through W( CLEND ) form the cluster of relatively */ /* close eigenalues. */ /* WGAP (input/output) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ /* The separation from the right neighbor eigenvalue in W. */ /* WERR (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ /* WERR contain the semiwidth of the uncertainty */ /* interval of the corresponding eigenvalue APPROXIMATION in W */ /* SPDIAM (input) estimate of the spectral diameter obtained from the */ /* Gerschgorin intervals */ /* CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */ /* Set by the calling routine to protect against shifts too close */ /* to eigenvalues outside the cluster. */ /* PIVMIN (input) DOUBLE PRECISION */ /* The minimum pivot allowed in the Sturm sequence. */ /* SIGMA (output) DOUBLE PRECISION */ /* The shift used to form L(+) D(+) L(+)^T. */ /* DPLUS (output) DOUBLE PRECISION array, dimension (N) */ /* The N diagonal elements of the diagonal matrix D(+). */ /* LPLUS (output) DOUBLE PRECISION array, dimension (N-1) */ /* The first (N-1) elements of LPLUS contain the subdiagonal */ /* elements of the unit bidiagonal matrix L(+). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* Workspace. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --work; --lplus; --dplus; --werr; --wgap; --w; --ld; --l; --d__; /* Function Body */ *info = 0; fact = 2.; eps = _starpu_dlamch_("Precision"); shift = 0; forcer = FALSE_; /* Note that we cannot guarantee that for any of the shifts tried, */ /* the factorization has a small or even moderate element growth. */ /* There could be Ritz values at both ends of the cluster and despite */ /* backing off, there are examples where all factorizations tried */ /* (in IEEE mode, allowing zero pivots & infinities) have INFINITE */ /* element growth. */ /* For this reason, we should use PIVMIN in this subroutine so that at */ /* least the L D L^T factorization exists. It can be checked afterwards */ /* whether the element growth caused bad residuals/orthogonality. */ /* Decide whether the code should accept the best among all */ /* representations despite large element growth or signal INFO=1 */ nofail = TRUE_; /* Compute the average gap length of the cluster */ clwdth = (d__1 = w[*clend] - w[*clstrt], abs(d__1)) + werr[*clend] + werr[ *clstrt]; avgap = clwdth / (doublereal) (*clend - *clstrt); mingap = min(*clgapl,*clgapr); /* Initial values for shifts to both ends of cluster */ /* Computing MIN */ d__1 = w[*clstrt], d__2 = w[*clend]; lsigma = min(d__1,d__2) - werr[*clstrt]; /* Computing MAX */ d__1 = w[*clstrt], d__2 = w[*clend]; rsigma = max(d__1,d__2) + werr[*clend]; /* Use a small fudge to make sure that we really shift to the outside */ lsigma -= abs(lsigma) * 4. * eps; rsigma += abs(rsigma) * 4. * eps; /* Compute upper bounds for how much to back off the initial shifts */ ldmax = mingap * .25 + *pivmin * 2.; rdmax = mingap * .25 + *pivmin * 2.; /* Computing MAX */ d__1 = avgap, d__2 = wgap[*clstrt]; ldelta = max(d__1,d__2) / fact; /* Computing MAX */ d__1 = avgap, d__2 = wgap[*clend - 1]; rdelta = max(d__1,d__2) / fact; /* Initialize the record of the best representation found */ s = _starpu_dlamch_("S"); smlgrowth = 1. / s; fail = (doublereal) (*n - 1) * mingap / (*spdiam * eps); fail2 = (doublereal) (*n - 1) * mingap / (*spdiam * sqrt(eps)); bestshift = lsigma; /* while (KTRY <= KTRYMAX) */ ktry = 0; growthbound = *spdiam * 8.; L5: sawnan1 = FALSE_; sawnan2 = FALSE_; /* Ensure that we do not back off too much of the initial shifts */ ldelta = min(ldmax,ldelta); rdelta = min(rdmax,rdelta); /* Compute the element growth when shifting to both ends of the cluster */ /* accept the shift if there is no element growth at one of the two ends */ /* Left end */ s = -lsigma; dplus[1] = d__[1] + s; if (abs(dplus[1]) < *pivmin) { dplus[1] = -(*pivmin); /* Need to set SAWNAN1 because refined RRR test should not be used */ /* in this case */ sawnan1 = TRUE_; } max1 = abs(dplus[1]); i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { lplus[i__] = ld[i__] / dplus[i__]; s = s * lplus[i__] * l[i__] - lsigma; dplus[i__ + 1] = d__[i__ + 1] + s; if ((d__1 = dplus[i__ + 1], abs(d__1)) < *pivmin) { dplus[i__ + 1] = -(*pivmin); /* Need to set SAWNAN1 because refined RRR test should not be used */ /* in this case */ sawnan1 = TRUE_; } /* Computing MAX */ d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], abs(d__1)); max1 = max(d__2,d__3); /* L6: */ } sawnan1 = sawnan1 || _starpu_disnan_(&max1); if (forcer || max1 <= growthbound && ! sawnan1) { *sigma = lsigma; shift = 1; goto L100; } /* Right end */ s = -rsigma; work[1] = d__[1] + s; if (abs(work[1]) < *pivmin) { work[1] = -(*pivmin); /* Need to set SAWNAN2 because refined RRR test should not be used */ /* in this case */ sawnan2 = TRUE_; } max2 = abs(work[1]); i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { work[*n + i__] = ld[i__] / work[i__]; s = s * work[*n + i__] * l[i__] - rsigma; work[i__ + 1] = d__[i__ + 1] + s; if ((d__1 = work[i__ + 1], abs(d__1)) < *pivmin) { work[i__ + 1] = -(*pivmin); /* Need to set SAWNAN2 because refined RRR test should not be used */ /* in this case */ sawnan2 = TRUE_; } /* Computing MAX */ d__2 = max2, d__3 = (d__1 = work[i__ + 1], abs(d__1)); max2 = max(d__2,d__3); /* L7: */ } sawnan2 = sawnan2 || _starpu_disnan_(&max2); if (forcer || max2 <= growthbound && ! sawnan2) { *sigma = rsigma; shift = 2; goto L100; } /* If we are at this point, both shifts led to too much element growth */ /* Record the better of the two shifts (provided it didn't lead to NaN) */ if (sawnan1 && sawnan2) { /* both MAX1 and MAX2 are NaN */ goto L50; } else { if (! sawnan1) { indx = 1; if (max1 <= smlgrowth) { smlgrowth = max1; bestshift = lsigma; } } if (! sawnan2) { if (sawnan1 || max2 <= max1) { indx = 2; } if (max2 <= smlgrowth) { smlgrowth = max2; bestshift = rsigma; } } } /* If we are here, both the left and the right shift led to */ /* element growth. If the element growth is moderate, then */ /* we may still accept the representation, if it passes a */ /* refined test for RRR. This test supposes that no NaN occurred. */ /* Moreover, we use the refined RRR test only for isolated clusters. */ if (clwdth < mingap / 128. && min(max1,max2) < fail2 && ! sawnan1 && ! sawnan2) { dorrr1 = TRUE_; } else { dorrr1 = FALSE_; } tryrrr1 = TRUE_; if (tryrrr1 && dorrr1) { if (indx == 1) { tmp = (d__1 = dplus[*n], abs(d__1)); znm2 = 1.; prod = 1.; oldp = 1.; for (i__ = *n - 1; i__ >= 1; --i__) { if (prod <= eps) { prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] * work[*n + i__]) * oldp; } else { prod *= (d__1 = work[*n + i__], abs(d__1)); } oldp = prod; /* Computing 2nd power */ d__1 = prod; znm2 += d__1 * d__1; /* Computing MAX */ d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, abs(d__1)); tmp = max(d__2,d__3); /* L15: */ } rrr1 = tmp / (*spdiam * sqrt(znm2)); if (rrr1 <= 8.) { *sigma = lsigma; shift = 1; goto L100; } } else if (indx == 2) { tmp = (d__1 = work[*n], abs(d__1)); znm2 = 1.; prod = 1.; oldp = 1.; for (i__ = *n - 1; i__ >= 1; --i__) { if (prod <= eps) { prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] * lplus[i__]) * oldp; } else { prod *= (d__1 = lplus[i__], abs(d__1)); } oldp = prod; /* Computing 2nd power */ d__1 = prod; znm2 += d__1 * d__1; /* Computing MAX */ d__2 = tmp, d__3 = (d__1 = work[i__] * prod, abs(d__1)); tmp = max(d__2,d__3); /* L16: */ } rrr2 = tmp / (*spdiam * sqrt(znm2)); if (rrr2 <= 8.) { *sigma = rsigma; shift = 2; goto L100; } } } L50: if (ktry < 1) { /* If we are here, both shifts failed also the RRR test. */ /* Back off to the outside */ /* Computing MAX */ d__1 = lsigma - ldelta, d__2 = lsigma - ldmax; lsigma = max(d__1,d__2); /* Computing MIN */ d__1 = rsigma + rdelta, d__2 = rsigma + rdmax; rsigma = min(d__1,d__2); ldelta *= 2.; rdelta *= 2.; ++ktry; goto L5; } else { /* None of the representations investigated satisfied our */ /* criteria. Take the best one we found. */ if (smlgrowth < fail || nofail) { lsigma = bestshift; rsigma = bestshift; forcer = TRUE_; goto L5; } else { *info = 1; return 0; } } L100: if (shift == 1) { } else if (shift == 2) { /* store new L and D back into DPLUS, LPLUS */ _starpu_dcopy_(n, &work[1], &c__1, &dplus[1], &c__1); i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1); } return 0; /* End of DLARRF */ } /* _starpu_dlarrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarrj.c000066400000000000000000000220171507764646700206540ustar00rootroot00000000000000/* dlarrj.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarrj_(integer *n, doublereal *d__, doublereal *e2, integer *ifirst, integer *ilast, doublereal *rtol, integer *offset, doublereal *w, doublereal *werr, doublereal *work, integer *iwork, doublereal *pivmin, doublereal *spdiam, integer *info) { /* System generated locals */ integer i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double log(doublereal); /* Local variables */ integer i__, j, k, p; doublereal s; integer i1, i2, ii; doublereal fac, mid; integer cnt; doublereal tmp, left; integer iter, nint, prev, next, savi1; doublereal right, width, dplus; integer olnint, maxitr; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Given the initial eigenvalue approximations of T, DLARRJ */ /* does bisection to refine the eigenvalues of T, */ /* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ /* guesses for these eigenvalues are input in W, the corresponding estimate */ /* of the error in these guesses in WERR. During bisection, intervals */ /* [left, right] are maintained by storing their mid-points and */ /* semi-widths in the arrays W and WERR respectively. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The N diagonal elements of T. */ /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ /* The Squares of the (N-1) subdiagonal elements of T. */ /* IFIRST (input) INTEGER */ /* The index of the first eigenvalue to be computed. */ /* ILAST (input) INTEGER */ /* The index of the last eigenvalue to be computed. */ /* RTOL (input) DOUBLE PRECISION */ /* Tolerance for the convergence of the bisection intervals. */ /* An interval [LEFT,RIGHT] has converged if */ /* RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */ /* OFFSET (input) INTEGER */ /* Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */ /* through ILAST-OFFSET elements of these arrays are to be used. */ /* W (input/output) DOUBLE PRECISION array, dimension (N) */ /* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ /* estimates of the eigenvalues of L D L^T indexed IFIRST through */ /* ILAST. */ /* On output, these estimates are refined. */ /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ /* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ /* the errors in the estimates of the corresponding elements in W. */ /* On output, these errors are refined. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* Workspace. */ /* IWORK (workspace) INTEGER array, dimension (2*N) */ /* Workspace. */ /* PIVMIN (input) DOUBLE PRECISION */ /* The minimum pivot in the Sturm sequence for T. */ /* SPDIAM (input) DOUBLE PRECISION */ /* The spectral diameter of T. */ /* INFO (output) INTEGER */ /* Error flag. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --iwork; --work; --werr; --w; --e2; --d__; /* Function Body */ *info = 0; maxitr = (integer) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) + 2; /* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ /* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ /* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */ /* for an unconverged interval is set to the index of the next unconverged */ /* interval, and is -1 or 0 for a converged interval. Thus a linked */ /* list of unconverged intervals is set up. */ i1 = *ifirst; i2 = *ilast; /* The number of unconverged intervals */ nint = 0; /* The last unconverged interval found */ prev = 0; i__1 = i2; for (i__ = i1; i__ <= i__1; ++i__) { k = i__ << 1; ii = i__ - *offset; left = w[ii] - werr[ii]; mid = w[ii]; right = w[ii] + werr[ii]; width = right - mid; /* Computing MAX */ d__1 = abs(left), d__2 = abs(right); tmp = max(d__1,d__2); /* The following test prevents the test of converged intervals */ if (width < *rtol * tmp) { /* This interval has already converged and does not need refinement. */ /* (Note that the gaps might change through refining the */ /* eigenvalues, however, they can only get bigger.) */ /* Remove it from the list. */ iwork[k - 1] = -1; /* Make sure that I1 always points to the first unconverged interval */ if (i__ == i1 && i__ < i2) { i1 = i__ + 1; } if (prev >= i1 && i__ <= i2) { iwork[(prev << 1) - 1] = i__ + 1; } } else { /* unconverged interval found */ prev = i__; /* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ /* Do while( CNT(LEFT).GT.I-1 ) */ fac = 1.; L20: cnt = 0; s = left; dplus = d__[1] - s; if (dplus < 0.) { ++cnt; } i__2 = *n; for (j = 2; j <= i__2; ++j) { dplus = d__[j] - s - e2[j - 1] / dplus; if (dplus < 0.) { ++cnt; } /* L30: */ } if (cnt > i__ - 1) { left -= werr[ii] * fac; fac *= 2.; goto L20; } /* Do while( CNT(RIGHT).LT.I ) */ fac = 1.; L50: cnt = 0; s = right; dplus = d__[1] - s; if (dplus < 0.) { ++cnt; } i__2 = *n; for (j = 2; j <= i__2; ++j) { dplus = d__[j] - s - e2[j - 1] / dplus; if (dplus < 0.) { ++cnt; } /* L60: */ } if (cnt < i__) { right += werr[ii] * fac; fac *= 2.; goto L50; } ++nint; iwork[k - 1] = i__ + 1; iwork[k] = cnt; } work[k - 1] = left; work[k] = right; /* L75: */ } savi1 = i1; /* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ /* and while (ITER.LT.MAXITR) */ iter = 0; L80: prev = i1 - 1; i__ = i1; olnint = nint; i__1 = olnint; for (p = 1; p <= i__1; ++p) { k = i__ << 1; ii = i__ - *offset; next = iwork[k - 1]; left = work[k - 1]; right = work[k]; mid = (left + right) * .5; /* semiwidth of interval */ width = right - mid; /* Computing MAX */ d__1 = abs(left), d__2 = abs(right); tmp = max(d__1,d__2); if (width < *rtol * tmp || iter == maxitr) { /* reduce number of unconverged intervals */ --nint; /* Mark interval as converged. */ iwork[k - 1] = 0; if (i1 == i__) { i1 = next; } else { /* Prev holds the last unconverged interval previously examined */ if (prev >= i1) { iwork[(prev << 1) - 1] = next; } } i__ = next; goto L100; } prev = i__; /* Perform one bisection step */ cnt = 0; s = mid; dplus = d__[1] - s; if (dplus < 0.) { ++cnt; } i__2 = *n; for (j = 2; j <= i__2; ++j) { dplus = d__[j] - s - e2[j - 1] / dplus; if (dplus < 0.) { ++cnt; } /* L90: */ } if (cnt <= i__ - 1) { work[k - 1] = mid; } else { work[k] = mid; } i__ = next; L100: ; } ++iter; /* do another loop if there are still unconverged intervals */ /* However, in the last iteration, all intervals are accepted */ /* since this is the best we can do. */ if (nint > 0 && iter <= maxitr) { goto L80; } /* At this point, all the intervals have converged */ i__1 = *ilast; for (i__ = savi1; i__ <= i__1; ++i__) { k = i__ << 1; ii = i__ - *offset; /* All intervals marked by '0' have been refined. */ if (iwork[k - 1] == 0) { w[ii] = (work[k - 1] + work[k]) * .5; werr[ii] = work[k] - w[ii]; } /* L110: */ } return 0; /* End of DLARRJ */ } /* _starpu_dlarrj_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarrk.c000066400000000000000000000125171507764646700206610ustar00rootroot00000000000000/* dlarrk.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarrk_(integer *n, integer *iw, doublereal *gl, doublereal *gu, doublereal *d__, doublereal *e2, doublereal *pivmin, doublereal *reltol, doublereal *w, doublereal *werr, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ double log(doublereal); /* Local variables */ integer i__, it; doublereal mid, eps, tmp1, tmp2, left, atoli, right; integer itmax; doublereal rtoli, tnorm; extern doublereal _starpu_dlamch_(char *); integer negcnt; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARRK computes one eigenvalue of a symmetric tridiagonal */ /* matrix T to suitable accuracy. This is an auxiliary code to be */ /* called from DSTEMR. */ /* To avoid overflow, the matrix must be scaled so that its */ /* largest element is no greater than overflow**(1/2) * */ /* underflow**(1/4) in absolute value, and for greatest */ /* accuracy, it should not be much smaller than that. */ /* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ /* Matrix", Report CS41, Computer Science Dept., Stanford */ /* University, July 21, 1966. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the tridiagonal matrix T. N >= 0. */ /* IW (input) INTEGER */ /* The index of the eigenvalues to be returned. */ /* GL (input) DOUBLE PRECISION */ /* GU (input) DOUBLE PRECISION */ /* An upper and a lower bound on the eigenvalue. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the tridiagonal matrix T. */ /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */ /* PIVMIN (input) DOUBLE PRECISION */ /* The minimum pivot allowed in the Sturm sequence for T. */ /* RELTOL (input) DOUBLE PRECISION */ /* The minimum relative width of an interval. When an interval */ /* is narrower than RELTOL times the larger (in */ /* magnitude) endpoint, then it is considered to be */ /* sufficiently small, i.e., converged. Note: this should */ /* always be at least radix*machine epsilon. */ /* W (output) DOUBLE PRECISION */ /* WERR (output) DOUBLE PRECISION */ /* The error bound on the corresponding eigenvalue approximation */ /* in W. */ /* INFO (output) INTEGER */ /* = 0: Eigenvalue converged */ /* = -1: Eigenvalue did NOT converge */ /* Internal Parameters */ /* =================== */ /* FUDGE DOUBLE PRECISION, default = 2 */ /* A "fudge factor" to widen the Gershgorin intervals. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Get machine constants */ /* Parameter adjustments */ --e2; --d__; /* Function Body */ eps = _starpu_dlamch_("P"); /* Computing MAX */ d__1 = abs(*gl), d__2 = abs(*gu); tnorm = max(d__1,d__2); rtoli = *reltol; atoli = *pivmin * 4.; itmax = (integer) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 2; *info = -1; left = *gl - tnorm * 2. * eps * *n - *pivmin * 4.; right = *gu + tnorm * 2. * eps * *n + *pivmin * 4.; it = 0; L10: /* Check if interval converged or maximum number of iterations reached */ tmp1 = (d__1 = right - left, abs(d__1)); /* Computing MAX */ d__1 = abs(right), d__2 = abs(left); tmp2 = max(d__1,d__2); /* Computing MAX */ d__1 = max(atoli,*pivmin), d__2 = rtoli * tmp2; if (tmp1 < max(d__1,d__2)) { *info = 0; goto L30; } if (it > itmax) { goto L30; } /* Count number of negative pivots for mid-point */ ++it; mid = (left + right) * .5; negcnt = 0; tmp1 = d__[1] - mid; if (abs(tmp1) < *pivmin) { tmp1 = -(*pivmin); } if (tmp1 <= 0.) { ++negcnt; } i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { tmp1 = d__[i__] - e2[i__ - 1] / tmp1 - mid; if (abs(tmp1) < *pivmin) { tmp1 = -(*pivmin); } if (tmp1 <= 0.) { ++negcnt; } /* L20: */ } if (negcnt >= *iw) { right = mid; } else { left = mid; } goto L10; L30: /* Converged or maximum number of iterations reached */ *w = (left + right) * .5; *werr = (d__1 = right - left, abs(d__1)) * .5; return 0; /* End of DLARRK */ } /* _starpu_dlarrk_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarrr.c000066400000000000000000000121341507764646700206630ustar00rootroot00000000000000/* dlarrr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarrr_(integer *n, doublereal *d__, doublereal *e, integer *info) { /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal eps, tmp, tmp2, rmin; extern doublereal _starpu_dlamch_(char *); doublereal offdig, safmin; logical yesrel; doublereal smlnum, offdig2; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Perform tests to decide whether the symmetric tridiagonal matrix T */ /* warrants expensive computations which guarantee high relative accuracy */ /* in the eigenvalues. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. N > 0. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The N diagonal elements of the tridiagonal matrix T. */ /* E (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the first (N-1) entries contain the subdiagonal */ /* elements of the tridiagonal matrix T; E(N) is set to ZERO. */ /* INFO (output) INTEGER */ /* INFO = 0(default) : the matrix warrants computations preserving */ /* relative accuracy. */ /* INFO = 1 : the matrix warrants computations guaranteeing */ /* only absolute accuracy. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* As a default, do NOT go for relative-accuracy preserving computations. */ /* Parameter adjustments */ --e; --d__; /* Function Body */ *info = 1; safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; rmin = sqrt(smlnum); /* Tests for relative accuracy */ /* Test for scaled diagonal dominance */ /* Scale the diagonal entries to one and check whether the sum of the */ /* off-diagonals is less than one */ /* The sdd relative error bounds have a 1/(1- 2*x) factor in them, */ /* x = max(OFFDIG + OFFDIG2), so when x is close to 1/2, no relative */ /* accuracy is promised. In the notation of the code fragment below, */ /* 1/(1 - (OFFDIG + OFFDIG2)) is the condition number. */ /* We don't think it is worth going into "sdd mode" unless the relative */ /* condition number is reasonable, not 1/macheps. */ /* The threshold should be compatible with other thresholds used in the */ /* code. We set OFFDIG + OFFDIG2 <= .999 =: RELCOND, it corresponds */ /* to losing at most 3 decimal digits: 1 / (1 - (OFFDIG + OFFDIG2)) <= 1000 */ /* instead of the current OFFDIG + OFFDIG2 < 1 */ yesrel = TRUE_; offdig = 0.; tmp = sqrt((abs(d__[1]))); if (tmp < rmin) { yesrel = FALSE_; } if (! yesrel) { goto L11; } i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { tmp2 = sqrt((d__1 = d__[i__], abs(d__1))); if (tmp2 < rmin) { yesrel = FALSE_; } if (! yesrel) { goto L11; } offdig2 = (d__1 = e[i__ - 1], abs(d__1)) / (tmp * tmp2); if (offdig + offdig2 >= .999) { yesrel = FALSE_; } if (! yesrel) { goto L11; } tmp = tmp2; offdig = offdig2; /* L10: */ } L11: if (yesrel) { *info = 0; return 0; } else { } /* *** MORE TO BE IMPLEMENTED *** */ /* Test if the lower bidiagonal matrix L from T = L D L^T */ /* (zero shift facto) is well conditioned */ /* Test if the upper bidiagonal matrix U from T = U D U^T */ /* (zero shift facto) is well conditioned. */ /* In this case, the matrix needs to be flipped and, at the end */ /* of the eigenvector computation, the flip needs to be applied */ /* to the computed eigenvectors (and the support) */ return 0; /* END OF DLARRR */ } /* _starpu_dlarrr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarrv.c000066400000000000000000001114401507764646700206670ustar00rootroot00000000000000/* dlarrv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b5 = 0.; static integer c__1 = 1; static integer c__2 = 2; /* Subroutine */ int _starpu_dlarrv_(integer *n, doublereal *vl, doublereal *vu, doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, integer *m, integer *dol, integer *dou, doublereal *minrgp, doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2; logical L__1; /* Builtin functions */ double log(doublereal); /* Local variables */ integer minwsize, i__, j, k, p, q, miniwsize, ii; doublereal gl; integer im, in; doublereal gu, gap, eps, tau, tol, tmp; integer zto; doublereal ztz; integer iend, jblk; doublereal lgap; integer done; doublereal rgap, left; integer wend, iter; doublereal bstw; integer itmp1; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); integer indld; doublereal fudge; integer idone; doublereal sigma; integer iinfo, iindr; doublereal resid; logical eskip; doublereal right; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer nclus, zfrom; doublereal rqtol; integer iindc1, iindc2; extern /* Subroutine */ int _starpu_dlar1v_(integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *); logical stp2ii; doublereal lambda; extern doublereal _starpu_dlamch_(char *); integer ibegin, indeig; logical needbs; integer indlld; doublereal sgndef, mingma; extern /* Subroutine */ int _starpu_dlarrb_(integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer oldien, oldncl, wbegin; doublereal spdiam; integer negcnt; extern /* Subroutine */ int _starpu_dlarrf_(integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); integer oldcls; doublereal savgap; integer ndepth; doublereal ssigma; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); logical usedbs; integer iindwk, offset; doublereal gaptol; integer newcls, oldfst, indwrk, windex, oldlst; logical usedrq; integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl; doublereal bstres; integer newsiz, zusedu, zusedw; doublereal nrminv, rqcorr; logical tryrqc; integer isupmx; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARRV computes the eigenvectors of the tridiagonal matrix */ /* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */ /* The input eigenvalues should have been computed by DLARRE. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* Lower and upper bounds of the interval that contains the desired */ /* eigenvalues. VL < VU. Needed to compute gaps on the left or right */ /* end of the extremal eigenvalues in the desired RANGE. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the N diagonal elements of the diagonal matrix D. */ /* On exit, D may be overwritten. */ /* L (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the (N-1) subdiagonal elements of the unit */ /* bidiagonal matrix L are in elements 1 to N-1 of L */ /* (if the matrix is not splitted.) At the end of each block */ /* is stored the corresponding shift as given by DLARRE. */ /* On exit, L is overwritten. */ /* PIVMIN (in) DOUBLE PRECISION */ /* The minimum pivot allowed in the Sturm sequence. */ /* ISPLIT (input) INTEGER array, dimension (N) */ /* The splitting points, at which T breaks up into blocks. */ /* The first block consists of rows/columns 1 to */ /* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */ /* through ISPLIT( 2 ), etc. */ /* M (input) INTEGER */ /* The total number of input eigenvalues. 0 <= M <= N. */ /* DOL (input) INTEGER */ /* DOU (input) INTEGER */ /* If the user wants to compute only selected eigenvectors from all */ /* the eigenvalues supplied, he can specify an index range DOL:DOU. */ /* Or else the setting DOL=1, DOU=M should be applied. */ /* Note that DOL and DOU refer to the order in which the eigenvalues */ /* are stored in W. */ /* If the user wants to compute only selected eigenpairs, then */ /* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */ /* computed eigenvectors. All other columns of Z are set to zero. */ /* MINRGP (input) DOUBLE PRECISION */ /* RTOL1 (input) DOUBLE PRECISION */ /* RTOL2 (input) DOUBLE PRECISION */ /* Parameters for bisection. */ /* An interval [LEFT,RIGHT] has converged if */ /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ /* W (input/output) DOUBLE PRECISION array, dimension (N) */ /* The first M elements of W contain the APPROXIMATE eigenvalues for */ /* which eigenvectors are to be computed. The eigenvalues */ /* should be grouped by split-off block and ordered from */ /* smallest to largest within the block ( The output array */ /* W from DLARRE is expected here ). Furthermore, they are with */ /* respect to the shift of the corresponding root representation */ /* for their block. On exit, W holds the eigenvalues of the */ /* UNshifted matrix. */ /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ /* The first M elements contain the semiwidth of the uncertainty */ /* interval of the corresponding eigenvalue in W */ /* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */ /* The separation from the right neighbor eigenvalue in W. */ /* IBLOCK (input) INTEGER array, dimension (N) */ /* The indices of the blocks (submatrices) associated with the */ /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ /* W(i) belongs to the first block from the top, =2 if W(i) */ /* belongs to the second block, etc. */ /* INDEXW (input) INTEGER array, dimension (N) */ /* The indices of the eigenvalues within each block (submatrix); */ /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ /* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */ /* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ /* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */ /* be computed from the original UNshifted matrix. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ /* If INFO = 0, the first M columns of Z contain the */ /* orthonormal eigenvectors of the matrix T */ /* corresponding to the input eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */ /* The support of the eigenvectors in Z, i.e., the indices */ /* indicating the nonzero elements in Z. The I-th eigenvector */ /* is nonzero only in elements ISUPPZ( 2*I-1 ) through */ /* ISUPPZ( 2*I ). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */ /* IWORK (workspace) INTEGER array, dimension (7*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* > 0: A problem occured in DLARRV. */ /* < 0: One of the called subroutines signaled an internal problem. */ /* Needs inspection of the corresponding parameter IINFO */ /* for further information. */ /* =-1: Problem in DLARRB when refining a child's eigenvalues. */ /* =-2: Problem in DLARRF when computing the RRR of a child. */ /* When a child is inside a tight cluster, it can be difficult */ /* to find an RRR. A partial remedy from the user's point of */ /* view is to make the parameter MINRGP smaller and recompile. */ /* However, as the orthogonality of the computed vectors is */ /* proportional to 1/MINRGP, the user should be aware that */ /* he might be trading in precision when he decreases MINRGP. */ /* =-3: Problem in DLARRB when refining a single eigenvalue */ /* after the Rayleigh correction was rejected. */ /* = 5: The Rayleigh Quotient Iteration failed to converge to */ /* full accuracy in MAXITR steps. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* .. */ /* The first N entries of WORK are reserved for the eigenvalues */ /* Parameter adjustments */ --d__; --l; --isplit; --w; --werr; --wgap; --iblock; --indexw; --gers; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --isuppz; --work; --iwork; /* Function Body */ indld = *n + 1; indlld = (*n << 1) + 1; indwrk = *n * 3 + 1; minwsize = *n * 12; i__1 = minwsize; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L5: */ } /* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */ /* factorization used to compute the FP vector */ iindr = 0; /* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */ /* layer and the one above. */ iindc1 = *n; iindc2 = *n << 1; iindwk = *n * 3 + 1; miniwsize = *n * 7; i__1 = miniwsize; for (i__ = 1; i__ <= i__1; ++i__) { iwork[i__] = 0; /* L10: */ } zusedl = 1; if (*dol > 1) { /* Set lower bound for use of Z */ zusedl = *dol - 1; } zusedu = *m; if (*dou < *m) { /* Set lower bound for use of Z */ zusedu = *dou + 1; } /* The width of the part of Z that is used */ zusedw = zusedu - zusedl + 1; _starpu_dlaset_("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz); eps = _starpu_dlamch_("Precision"); rqtol = eps * 2.; /* Set expert flags for standard code. */ tryrqc = TRUE_; if (*dol == 1 && *dou == *m) { } else { /* Only selected eigenpairs are computed. Since the other evalues */ /* are not refined by RQ iteration, bisection has to compute to full */ /* accuracy. */ *rtol1 = eps * 4.; *rtol2 = eps * 4.; } /* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */ /* desired eigenvalues. The support of the nonzero eigenvector */ /* entries is contained in the interval IBEGIN:IEND. */ /* Remark that if k eigenpairs are desired, then the eigenvectors */ /* are stored in k contiguous columns of Z. */ /* DONE is the number of eigenvectors already computed */ done = 0; ibegin = 1; wbegin = 1; i__1 = iblock[*m]; for (jblk = 1; jblk <= i__1; ++jblk) { iend = isplit[jblk]; sigma = l[iend]; /* Find the eigenvectors of the submatrix indexed IBEGIN */ /* through IEND. */ wend = wbegin - 1; L15: if (wend < *m) { if (iblock[wend + 1] == jblk) { ++wend; goto L15; } } if (wend < wbegin) { ibegin = iend + 1; goto L170; } else if (wend < *dol || wbegin > *dou) { ibegin = iend + 1; wbegin = wend + 1; goto L170; } /* Find local spectral diameter of the block */ gl = gers[(ibegin << 1) - 1]; gu = gers[ibegin * 2]; i__2 = iend; for (i__ = ibegin + 1; i__ <= i__2; ++i__) { /* Computing MIN */ d__1 = gers[(i__ << 1) - 1]; gl = min(d__1,gl); /* Computing MAX */ d__1 = gers[i__ * 2]; gu = max(d__1,gu); /* L20: */ } spdiam = gu - gl; /* OLDIEN is the last index of the previous block */ oldien = ibegin - 1; /* Calculate the size of the current block */ in = iend - ibegin + 1; /* The number of eigenvalues in the current block */ im = wend - wbegin + 1; /* This is for a 1x1 block */ if (ibegin == iend) { ++done; z__[ibegin + wbegin * z_dim1] = 1.; isuppz[(wbegin << 1) - 1] = ibegin; isuppz[wbegin * 2] = ibegin; w[wbegin] += sigma; work[wbegin] = w[wbegin]; ibegin = iend + 1; ++wbegin; goto L170; } /* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */ /* Note that these can be approximations, in this case, the corresp. */ /* entries of WERR give the size of the uncertainty interval. */ /* The eigenvalue approximations will be refined when necessary as */ /* high relative accuracy is required for the computation of the */ /* corresponding eigenvectors. */ _starpu_dcopy_(&im, &w[wbegin], &c__1, &work[wbegin], &c__1); /* We store in W the eigenvalue approximations w.r.t. the original */ /* matrix T. */ i__2 = im; for (i__ = 1; i__ <= i__2; ++i__) { w[wbegin + i__ - 1] += sigma; /* L30: */ } /* NDEPTH is the current depth of the representation tree */ ndepth = 0; /* PARITY is either 1 or 0 */ parity = 1; /* NCLUS is the number of clusters for the next level of the */ /* representation tree, we start with NCLUS = 1 for the root */ nclus = 1; iwork[iindc1 + 1] = 1; iwork[iindc1 + 2] = im; /* IDONE is the number of eigenvectors already computed in the current */ /* block */ idone = 0; /* loop while( IDONE.LT.IM ) */ /* generate the representation tree for the current block and */ /* compute the eigenvectors */ L40: if (idone < im) { /* This is a crude protection against infinitely deep trees */ if (ndepth > *m) { *info = -2; return 0; } /* breadth first processing of the current level of the representation */ /* tree: OLDNCL = number of clusters on current level */ oldncl = nclus; /* reset NCLUS to count the number of child clusters */ nclus = 0; parity = 1 - parity; if (parity == 0) { oldcls = iindc1; newcls = iindc2; } else { oldcls = iindc2; newcls = iindc1; } /* Process the clusters on the current level */ i__2 = oldncl; for (i__ = 1; i__ <= i__2; ++i__) { j = oldcls + (i__ << 1); /* OLDFST, OLDLST = first, last index of current cluster. */ /* cluster indices start with 1 and are relative */ /* to WBEGIN when accessing W, WGAP, WERR, Z */ oldfst = iwork[j - 1]; oldlst = iwork[j]; if (ndepth > 0) { /* Retrieve relatively robust representation (RRR) of cluster */ /* that has been computed at the previous level */ /* The RRR is stored in Z and overwritten once the eigenvectors */ /* have been computed or when the cluster is refined */ if (*dol == 1 && *dou == *m) { /* Get representation from location of the leftmost evalue */ /* of the cluster */ j = wbegin + oldfst - 1; } else { if (wbegin + oldfst - 1 < *dol) { /* Get representation from the left end of Z array */ j = *dol - 1; } else if (wbegin + oldfst - 1 > *dou) { /* Get representation from the right end of Z array */ j = *dou; } else { j = wbegin + oldfst - 1; } } _starpu_dcopy_(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin] , &c__1); i__3 = in - 1; _starpu_dcopy_(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[ ibegin], &c__1); sigma = z__[iend + (j + 1) * z_dim1]; /* Set the corresponding entries in Z to zero */ _starpu_dlaset_("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j * z_dim1], ldz); } /* Compute DL and DLL of current RRR */ i__3 = iend - 1; for (j = ibegin; j <= i__3; ++j) { tmp = d__[j] * l[j]; work[indld - 1 + j] = tmp; work[indlld - 1 + j] = tmp * l[j]; /* L50: */ } if (ndepth > 0) { /* P and Q are index of the first and last eigenvalue to compute */ /* within the current block */ p = indexw[wbegin - 1 + oldfst]; q = indexw[wbegin - 1 + oldlst]; /* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */ /* thru' Q-OFFSET elements of these arrays are to be used. */ /* OFFSET = P-OLDFST */ offset = indexw[wbegin] - 1; /* perform limited bisection (if necessary) to get approximate */ /* eigenvalues to the precision needed. */ _starpu_dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[ wbegin], &werr[wbegin], &work[indwrk], &iwork[ iindwk], pivmin, &spdiam, &in, &iinfo); if (iinfo != 0) { *info = -1; return 0; } /* We also recompute the extremal gaps. W holds all eigenvalues */ /* of the unshifted matrix and must be used for computation */ /* of WGAP, the entries of WORK might stem from RRRs with */ /* different shifts. The gaps from WBEGIN-1+OLDFST to */ /* WBEGIN-1+OLDLST are correctly computed in DLARRB. */ /* However, we only allow the gaps to become greater since */ /* this is what should happen when we decrease WERR */ if (oldfst > 1) { /* Computing MAX */ d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + oldfst - 1] - werr[wbegin + oldfst - 1] - w[ wbegin + oldfst - 2] - werr[wbegin + oldfst - 2]; wgap[wbegin + oldfst - 2] = max(d__1,d__2); } if (wbegin + oldlst - 1 < wend) { /* Computing MAX */ d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + oldlst] - werr[wbegin + oldlst] - w[wbegin + oldlst - 1] - werr[wbegin + oldlst - 1]; wgap[wbegin + oldlst - 1] = max(d__1,d__2); } /* Each time the eigenvalues in WORK get refined, we store */ /* the newly found approximation with all shifts applied in W */ i__3 = oldlst; for (j = oldfst; j <= i__3; ++j) { w[wbegin + j - 1] = work[wbegin + j - 1] + sigma; /* L53: */ } } /* Process the current node. */ newfst = oldfst; i__3 = oldlst; for (j = oldfst; j <= i__3; ++j) { if (j == oldlst) { /* we are at the right end of the cluster, this is also the */ /* boundary of the child cluster */ newlst = j; } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[ wbegin + j - 1], abs(d__1))) { /* the right relative gap is big enough, the child cluster */ /* (NEWFST,..,NEWLST) is well separated from the following */ newlst = j; } else { /* inside a child cluster, the relative gap is not */ /* big enough. */ goto L140; } /* Compute size of child cluster found */ newsiz = newlst - newfst + 1; /* NEWFTT is the place in Z where the new RRR or the computed */ /* eigenvector is to be stored */ if (*dol == 1 && *dou == *m) { /* Store representation at location of the leftmost evalue */ /* of the cluster */ newftt = wbegin + newfst - 1; } else { if (wbegin + newfst - 1 < *dol) { /* Store representation at the left end of Z array */ newftt = *dol - 1; } else if (wbegin + newfst - 1 > *dou) { /* Store representation at the right end of Z array */ newftt = *dou; } else { newftt = wbegin + newfst - 1; } } if (newsiz > 1) { /* Current child is not a singleton but a cluster. */ /* Compute and store new representation of child. */ /* Compute left and right cluster gap. */ /* LGAP and RGAP are not computed from WORK because */ /* the eigenvalue approximations may stem from RRRs */ /* different shifts. However, W hold all eigenvalues */ /* of the unshifted matrix. Still, the entries in WGAP */ /* have to be computed from WORK since the entries */ /* in W might be of the same order so that gaps are not */ /* exhibited correctly for very close eigenvalues. */ if (newfst == 1) { /* Computing MAX */ d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl; lgap = max(d__1,d__2); } else { lgap = wgap[wbegin + newfst - 2]; } rgap = wgap[wbegin + newlst - 1]; /* Compute left- and rightmost eigenvalue of child */ /* to high precision in order to shift as close */ /* as possible and obtain as large relative gaps */ /* as possible */ for (k = 1; k <= 2; ++k) { if (k == 1) { p = indexw[wbegin - 1 + newfst]; } else { p = indexw[wbegin - 1 + newlst]; } offset = indexw[wbegin] - 1; _starpu_dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, &p, &rqtol, &rqtol, &offset, & work[wbegin], &wgap[wbegin], &werr[wbegin] , &work[indwrk], &iwork[iindwk], pivmin, & spdiam, &in, &iinfo); /* L55: */ } if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 > *dou) { /* if the cluster contains no desired eigenvalues */ /* skip the computation of that branch of the rep. tree */ /* We could skip before the refinement of the extremal */ /* eigenvalues of the child, but then the representation */ /* tree could be different from the one when nothing is */ /* skipped. For this reason we skip at this place. */ idone = idone + newlst - newfst + 1; goto L139; } /* Compute RRR of child cluster. */ /* Note that the new RRR is stored in Z */ /* DLARRF needs LWORK = 2*N */ _starpu_dlarrf_(&in, &d__[ibegin], &l[ibegin], &work[indld + ibegin - 1], &newfst, &newlst, &work[wbegin], &wgap[wbegin], &werr[wbegin], &spdiam, &lgap, &rgap, pivmin, &tau, &z__[ibegin + newftt * z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], &work[indwrk], &iinfo); if (iinfo == 0) { /* a new RRR for the cluster was found by DLARRF */ /* update shift and store it */ ssigma = sigma + tau; z__[iend + (newftt + 1) * z_dim1] = ssigma; /* WORK() are the midpoints and WERR() the semi-width */ /* Note that the entries in W are unchanged. */ i__4 = newlst; for (k = newfst; k <= i__4; ++k) { fudge = eps * 3. * (d__1 = work[wbegin + k - 1], abs(d__1)); work[wbegin + k - 1] -= tau; fudge += eps * 4. * (d__1 = work[wbegin + k - 1], abs(d__1)); /* Fudge errors */ werr[wbegin + k - 1] += fudge; /* Gaps are not fudged. Provided that WERR is small */ /* when eigenvalues are close, a zero gap indicates */ /* that a new representation is needed for resolving */ /* the cluster. A fudge could lead to a wrong decision */ /* of judging eigenvalues 'separated' which in */ /* reality are not. This could have a negative impact */ /* on the orthogonality of the computed eigenvectors. */ /* L116: */ } ++nclus; k = newcls + (nclus << 1); iwork[k - 1] = newfst; iwork[k] = newlst; } else { *info = -2; return 0; } } else { /* Compute eigenvector of singleton */ iter = 0; tol = log((doublereal) in) * 4. * eps; k = newfst; windex = wbegin + k - 1; /* Computing MAX */ i__4 = windex - 1; windmn = max(i__4,1); /* Computing MIN */ i__4 = windex + 1; windpl = min(i__4,*m); lambda = work[windex]; ++done; /* Check if eigenvector computation is to be skipped */ if (windex < *dol || windex > *dou) { eskip = TRUE_; goto L125; } else { eskip = FALSE_; } left = work[windex] - werr[windex]; right = work[windex] + werr[windex]; indeig = indexw[windex]; /* Note that since we compute the eigenpairs for a child, */ /* all eigenvalue approximations are w.r.t the same shift. */ /* In this case, the entries in WORK should be used for */ /* computing the gaps since they exhibit even very small */ /* differences in the eigenvalues, as opposed to the */ /* entries in W which might "look" the same. */ if (k == 1) { /* In the case RANGE='I' and with not much initial */ /* accuracy in LAMBDA and VL, the formula */ /* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */ /* can lead to an overestimation of the left gap and */ /* thus to inadequately early RQI 'convergence'. */ /* Prevent this by forcing a small left gap. */ /* Computing MAX */ d__1 = abs(left), d__2 = abs(right); lgap = eps * max(d__1,d__2); } else { lgap = wgap[windmn]; } if (k == im) { /* In the case RANGE='I' and with not much initial */ /* accuracy in LAMBDA and VU, the formula */ /* can lead to an overestimation of the right gap and */ /* thus to inadequately early RQI 'convergence'. */ /* Prevent this by forcing a small right gap. */ /* Computing MAX */ d__1 = abs(left), d__2 = abs(right); rgap = eps * max(d__1,d__2); } else { rgap = wgap[windex]; } gap = min(lgap,rgap); if (k == 1 || k == im) { /* The eigenvector support can become wrong */ /* because significant entries could be cut off due to a */ /* large GAPTOL parameter in LAR1V. Prevent this. */ gaptol = 0.; } else { gaptol = gap * eps; } isupmn = in; isupmx = 1; /* Update WGAP so that it holds the minimum gap */ /* to the left or the right. This is crucial in the */ /* case where bisection is used to ensure that the */ /* eigenvalue is refined up to the required precision. */ /* The correct value is restored afterwards. */ savgap = wgap[windex]; wgap[windex] = gap; /* We want to use the Rayleigh Quotient Correction */ /* as often as possible since it converges quadratically */ /* when we are close enough to the desired eigenvalue. */ /* However, the Rayleigh Quotient can have the wrong sign */ /* and lead us away from the desired eigenvalue. In this */ /* case, the best we can do is to use bisection. */ usedbs = FALSE_; usedrq = FALSE_; /* Bisection is initially turned off unless it is forced */ needbs = ! tryrqc; L120: /* Check if bisection should be used to refine eigenvalue */ if (needbs) { /* Take the bisection as new iterate */ usedbs = TRUE_; itmp1 = iwork[iindr + windex]; offset = indexw[wbegin] - 1; d__1 = eps * 2.; _starpu_dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin - 1], &indeig, &indeig, &c_b5, &d__1, & offset, &work[wbegin], &wgap[wbegin], & werr[wbegin], &work[indwrk], &iwork[ iindwk], pivmin, &spdiam, &itmp1, &iinfo); if (iinfo != 0) { *info = -3; return 0; } lambda = work[windex]; /* Reset twist index from inaccurate LAMBDA to */ /* force computation of true MINGMA */ iwork[iindr + windex] = 0; } /* Given LAMBDA, compute the eigenvector. */ L__1 = ! usedbs; _starpu_dlar1v_(&in, &c__1, &in, &lambda, &d__[ibegin], &l[ ibegin], &work[indld + ibegin - 1], &work[ indlld + ibegin - 1], pivmin, &gaptol, &z__[ ibegin + windex * z_dim1], &L__1, &negcnt, & ztz, &mingma, &iwork[iindr + windex], &isuppz[ (windex << 1) - 1], &nrminv, &resid, &rqcorr, &work[indwrk]); if (iter == 0) { bstres = resid; bstw = lambda; } else if (resid < bstres) { bstres = resid; bstw = lambda; } /* Computing MIN */ i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1]; isupmn = min(i__4,i__5); /* Computing MAX */ i__4 = isupmx, i__5 = isuppz[windex * 2]; isupmx = max(i__4,i__5); ++iter; /* sin alpha <= |resid|/gap */ /* Note that both the residual and the gap are */ /* proportional to the matrix, so ||T|| doesn't play */ /* a role in the quotient */ /* Convergence test for Rayleigh-Quotient iteration */ /* (omitted when Bisection has been used) */ if (resid > tol * gap && abs(rqcorr) > rqtol * abs( lambda) && ! usedbs) { /* We need to check that the RQCORR update doesn't */ /* move the eigenvalue away from the desired one and */ /* towards a neighbor. -> protection with bisection */ if (indeig <= negcnt) { /* The wanted eigenvalue lies to the left */ sgndef = -1.; } else { /* The wanted eigenvalue lies to the right */ sgndef = 1.; } /* We only use the RQCORR if it improves the */ /* the iterate reasonably. */ if (rqcorr * sgndef >= 0. && lambda + rqcorr <= right && lambda + rqcorr >= left) { usedrq = TRUE_; /* Store new midpoint of bisection interval in WORK */ if (sgndef == 1.) { /* The current LAMBDA is on the left of the true */ /* eigenvalue */ left = lambda; /* We prefer to assume that the error estimate */ /* is correct. We could make the interval not */ /* as a bracket but to be modified if the RQCORR */ /* chooses to. In this case, the RIGHT side should */ /* be modified as follows: */ /* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */ } else { /* The current LAMBDA is on the right of the true */ /* eigenvalue */ right = lambda; /* See comment about assuming the error estimate is */ /* correct above. */ /* LEFT = MIN(LEFT, LAMBDA + RQCORR) */ } work[windex] = (right + left) * .5; /* Take RQCORR since it has the correct sign and */ /* improves the iterate reasonably */ lambda += rqcorr; /* Update width of error interval */ werr[windex] = (right - left) * .5; } else { needbs = TRUE_; } if (right - left < rqtol * abs(lambda)) { /* The eigenvalue is computed to bisection accuracy */ /* compute eigenvector and stop */ usedbs = TRUE_; goto L120; } else if (iter < 10) { goto L120; } else if (iter == 10) { needbs = TRUE_; goto L120; } else { *info = 5; return 0; } } else { stp2ii = FALSE_; if (usedrq && usedbs && bstres <= resid) { lambda = bstw; stp2ii = TRUE_; } if (stp2ii) { /* improve error angle by second step */ L__1 = ! usedbs; _starpu_dlar1v_(&in, &c__1, &in, &lambda, &d__[ibegin] , &l[ibegin], &work[indld + ibegin - 1], &work[indlld + ibegin - 1], pivmin, &gaptol, &z__[ibegin + windex * z_dim1], &L__1, &negcnt, &ztz, & mingma, &iwork[iindr + windex], & isuppz[(windex << 1) - 1], &nrminv, & resid, &rqcorr, &work[indwrk]); } work[windex] = lambda; } /* Compute FP-vector support w.r.t. whole matrix */ isuppz[(windex << 1) - 1] += oldien; isuppz[windex * 2] += oldien; zfrom = isuppz[(windex << 1) - 1]; zto = isuppz[windex * 2]; isupmn += oldien; isupmx += oldien; /* Ensure vector is ok if support in the RQI has changed */ if (isupmn < zfrom) { i__4 = zfrom - 1; for (ii = isupmn; ii <= i__4; ++ii) { z__[ii + windex * z_dim1] = 0.; /* L122: */ } } if (isupmx > zto) { i__4 = isupmx; for (ii = zto + 1; ii <= i__4; ++ii) { z__[ii + windex * z_dim1] = 0.; /* L123: */ } } i__4 = zto - zfrom + 1; _starpu_dscal_(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], &c__1); L125: /* Update W */ w[windex] = lambda + sigma; /* Recompute the gaps on the left and right */ /* But only allow them to become larger and not */ /* smaller (which can only happen through "bad" */ /* cancellation and doesn't reflect the theory */ /* where the initial gaps are underestimated due */ /* to WERR being too crude.) */ if (! eskip) { if (k > 1) { /* Computing MAX */ d__1 = wgap[windmn], d__2 = w[windex] - werr[ windex] - w[windmn] - werr[windmn]; wgap[windmn] = max(d__1,d__2); } if (windex < wend) { /* Computing MAX */ d__1 = savgap, d__2 = w[windpl] - werr[windpl] - w[windex] - werr[windex]; wgap[windex] = max(d__1,d__2); } } ++idone; } /* here ends the code for the current child */ L139: /* Proceed to any remaining child nodes */ newfst = j + 1; L140: ; } /* L150: */ } ++ndepth; goto L40; } ibegin = iend + 1; wbegin = wend + 1; L170: ; } return 0; /* End of DLARRV */ } /* _starpu_dlarrv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarscl2.c000066400000000000000000000051341507764646700211050ustar00rootroot00000000000000/* dlarscl2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlarscl2_(integer *m, integer *n, doublereal *d__, doublereal *x, integer *ldx) { /* System generated locals */ integer x_dim1, x_offset, i__1, i__2; /* Local variables */ integer i__, j; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARSCL2 performs a reciprocal diagonal scaling on an vector: */ /* x <-- inv(D) * x */ /* where the diagonal matrix D is stored as a vector. */ /* Eventually to be replaced by BLAS_dge_diag_scale in the new BLAS */ /* standard. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of D and X. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of D and X. N >= 0. */ /* D (input) DOUBLE PRECISION array, length M */ /* Diagonal matrix D, stored as a vector of length M. */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,N) */ /* On entry, the vector X to be scaled by D. */ /* On exit, the scaled vector. */ /* LDX (input) INTEGER */ /* The leading dimension of the vector X. LDX >= 0. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --d__; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; /* Function Body */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ + j * x_dim1] /= d__[i__]; } } return 0; } /* _starpu_dlarscl2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlartg.c000066400000000000000000000114711507764646700206550ustar00rootroot00000000000000/* dlartg.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlartg_(doublereal *f, doublereal *g, doublereal *cs, doublereal *sn, doublereal *r__) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal); /* Local variables */ integer i__; doublereal f1, g1, eps, scale; integer count; doublereal safmn2, safmx2; extern doublereal _starpu_dlamch_(char *); doublereal safmin; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARTG generate a plane rotation so that */ /* [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. */ /* [ -SN CS ] [ G ] [ 0 ] */ /* This is a slower, more accurate version of the BLAS1 routine DROTG, */ /* with the following other differences: */ /* F and G are unchanged on return. */ /* If G=0, then CS=1 and SN=0. */ /* If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any */ /* floating point operations (saves work in DBDSQR when */ /* there are zeros on the diagonal). */ /* If F exceeds G in magnitude, CS will be positive. */ /* Arguments */ /* ========= */ /* F (input) DOUBLE PRECISION */ /* The first component of vector to be rotated. */ /* G (input) DOUBLE PRECISION */ /* The second component of vector to be rotated. */ /* CS (output) DOUBLE PRECISION */ /* The cosine of the rotation. */ /* SN (output) DOUBLE PRECISION */ /* The sine of the rotation. */ /* R (output) DOUBLE PRECISION */ /* The nonzero component of the rotated vector. */ /* This version has a few statements commented out for thread safety */ /* (machine parameters are computed on each entry). 10 feb 03, SJH. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* LOGICAL FIRST */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Save statement .. */ /* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 */ /* .. */ /* .. Data statements .. */ /* DATA FIRST / .TRUE. / */ /* .. */ /* .. Executable Statements .. */ /* IF( FIRST ) THEN */ safmin = _starpu_dlamch_("S"); eps = _starpu_dlamch_("E"); d__1 = _starpu_dlamch_("B"); i__1 = (integer) (log(safmin / eps) / log(_starpu_dlamch_("B")) / 2.); safmn2 = pow_di(&d__1, &i__1); safmx2 = 1. / safmn2; /* FIRST = .FALSE. */ /* END IF */ if (*g == 0.) { *cs = 1.; *sn = 0.; *r__ = *f; } else if (*f == 0.) { *cs = 0.; *sn = 1.; *r__ = *g; } else { f1 = *f; g1 = *g; /* Computing MAX */ d__1 = abs(f1), d__2 = abs(g1); scale = max(d__1,d__2); if (scale >= safmx2) { count = 0; L10: ++count; f1 *= safmn2; g1 *= safmn2; /* Computing MAX */ d__1 = abs(f1), d__2 = abs(g1); scale = max(d__1,d__2); if (scale >= safmx2) { goto L10; } /* Computing 2nd power */ d__1 = f1; /* Computing 2nd power */ d__2 = g1; *r__ = sqrt(d__1 * d__1 + d__2 * d__2); *cs = f1 / *r__; *sn = g1 / *r__; i__1 = count; for (i__ = 1; i__ <= i__1; ++i__) { *r__ *= safmx2; /* L20: */ } } else if (scale <= safmn2) { count = 0; L30: ++count; f1 *= safmx2; g1 *= safmx2; /* Computing MAX */ d__1 = abs(f1), d__2 = abs(g1); scale = max(d__1,d__2); if (scale <= safmn2) { goto L30; } /* Computing 2nd power */ d__1 = f1; /* Computing 2nd power */ d__2 = g1; *r__ = sqrt(d__1 * d__1 + d__2 * d__2); *cs = f1 / *r__; *sn = g1 / *r__; i__1 = count; for (i__ = 1; i__ <= i__1; ++i__) { *r__ *= safmn2; /* L40: */ } } else { /* Computing 2nd power */ d__1 = f1; /* Computing 2nd power */ d__2 = g1; *r__ = sqrt(d__1 * d__1 + d__2 * d__2); *cs = f1 / *r__; *sn = g1 / *r__; } if (abs(*f) > abs(*g) && *cs < 0.) { *cs = -(*cs); *sn = -(*sn); *r__ = -(*r__); } } return 0; /* End of DLARTG */ } /* _starpu_dlartg_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlartv.c000066400000000000000000000053521507764646700206750ustar00rootroot00000000000000/* dlartv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlartv_(integer *n, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *c__, doublereal *s, integer *incc) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, ic, ix, iy; doublereal xi, yi; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARTV applies a vector of real plane rotations to elements of the */ /* real vectors x and y. For i = 1,2,...,n */ /* ( x(i) ) := ( c(i) s(i) ) ( x(i) ) */ /* ( y(i) ) ( -s(i) c(i) ) ( y(i) ) */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of plane rotations to be applied. */ /* X (input/output) DOUBLE PRECISION array, */ /* dimension (1+(N-1)*INCX) */ /* The vector x. */ /* INCX (input) INTEGER */ /* The increment between elements of X. INCX > 0. */ /* Y (input/output) DOUBLE PRECISION array, */ /* dimension (1+(N-1)*INCY) */ /* The vector y. */ /* INCY (input) INTEGER */ /* The increment between elements of Y. INCY > 0. */ /* C (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ /* The cosines of the plane rotations. */ /* S (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */ /* The sines of the plane rotations. */ /* INCC (input) INTEGER */ /* The increment between elements of C and S. INCC > 0. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --s; --c__; --y; --x; /* Function Body */ ix = 1; iy = 1; ic = 1; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { xi = x[ix]; yi = y[iy]; x[ix] = c__[ic] * xi + s[ic] * yi; y[iy] = c__[ic] * yi - s[ic] * xi; ix += *incx; iy += *incy; ic += *incc; /* L10: */ } return 0; /* End of DLARTV */ } /* _starpu_dlartv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaruv.c000066400000000000000000000156331507764646700207010ustar00rootroot00000000000000/* dlaruv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaruv_(integer *iseed, integer *n, doublereal *x) { /* Initialized data */ static integer mm[512] /* was [128][4] */ = { 494,2637,255,2008,1253, 3344,4084,1739,3143,3468,688,1657,1238,3166,1292,3422,1270,2016, 154,2862,697,1706,491,931,1444,444,3577,3944,2184,1661,3482,657, 3023,3618,1267,1828,164,3798,3087,2400,2870,3876,1905,1593,1797, 1234,3460,328,2861,1950,617,2070,3331,769,1558,2412,2800,189,287, 2045,1227,2838,209,2770,3654,3993,192,2253,3491,2889,2857,2094, 1818,688,1407,634,3231,815,3524,1914,516,164,303,2144,3480,119, 3357,837,2826,2332,2089,3780,1700,3712,150,2000,3375,1621,3090, 3765,1149,3146,33,3082,2741,359,3316,1749,185,2784,2202,2199,1364, 1244,2020,3160,2785,2772,1217,1822,1245,2252,3904,2774,997,2573, 1148,545,322,789,1440,752,2859,123,1848,643,2405,2638,2344,46, 3814,913,3649,339,3808,822,2832,3078,3633,2970,637,2249,2081,4019, 1478,242,481,2075,4058,622,3376,812,234,641,4005,1122,3135,2640, 2302,40,1832,2247,2034,2637,1287,1691,496,1597,2394,2584,1843,336, 1472,2407,433,2096,1761,2810,566,442,41,1238,1086,603,840,3168, 1499,1084,3438,2408,1589,2391,288,26,512,1456,171,1677,2657,2270, 2587,2961,1970,1817,676,1410,3723,2803,3185,184,663,499,3784,1631, 1925,3912,1398,1349,1441,2224,2411,1907,3192,2786,382,37,759,2948, 1862,3802,2423,2051,2295,1332,1832,2405,3638,3661,327,3660,716, 1842,3987,1368,1848,2366,2508,3754,1766,3572,2893,307,1297,3966, 758,2598,3406,2922,1038,2934,2091,2451,1580,1958,2055,1507,1078, 3273,17,854,2916,3971,2889,3831,2621,1541,893,736,3992,787,2125, 2364,2460,257,1574,3912,1216,3248,3401,2124,2762,149,2245,166,466, 4018,1399,190,2879,153,2320,18,712,2159,2318,2091,3443,1510,449, 1956,2201,3137,3399,1321,2271,3667,2703,629,2365,2431,1113,3922, 2554,184,2099,3228,4012,1921,3452,3901,572,3309,3171,817,3039, 1696,1256,3715,2077,3019,1497,1101,717,51,981,1978,1813,3881,76, 3846,3694,1682,124,1660,3997,479,1141,886,3514,1301,3604,1888, 1836,1990,2058,692,1194,20,3285,2046,2107,3508,3525,3801,2549, 1145,2253,305,3301,1065,3133,2913,3285,1241,1197,3729,2501,1673, 541,2753,949,2361,1165,4081,2725,3305,3069,3617,3733,409,2157, 1361,3973,1865,2525,1409,3445,3577,77,3761,2149,1449,3005,225,85, 3673,3117,3089,1349,2057,413,65,1845,697,3085,3441,1573,3689,2941, 929,533,2841,4077,721,2821,2249,2397,2817,245,1913,1997,3121,997, 1833,2877,1633,981,2009,941,2449,197,2441,285,1473,2741,3129,909, 2801,421,4073,2813,2337,1429,1177,1901,81,1669,2633,2269,129,1141, 249,3917,2481,3941,2217,2749,3041,1877,345,2861,1809,3141,2825, 157,2881,3637,1465,2829,2161,3365,361,2685,3745,2325,3609,3821, 3537,517,3017,2141,1537 }; /* System generated locals */ integer i__1; /* Local variables */ integer i__, i1, i2, i3, i4, it1, it2, it3, it4; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARUV returns a vector of n random real numbers from a uniform (0,1) */ /* distribution (n <= 128). */ /* This is an auxiliary routine called by DLARNV and ZLARNV. */ /* Arguments */ /* ========= */ /* ISEED (input/output) INTEGER array, dimension (4) */ /* On entry, the seed of the random number generator; the array */ /* elements must be between 0 and 4095, and ISEED(4) must be */ /* odd. */ /* On exit, the seed is updated. */ /* N (input) INTEGER */ /* The number of random numbers to be generated. N <= 128. */ /* X (output) DOUBLE PRECISION array, dimension (N) */ /* The generated random numbers. */ /* Further Details */ /* =============== */ /* This routine uses a multiplicative congruential method with modulus */ /* 2**48 and multiplier 33952834046453 (see G.S.Fishman, */ /* 'Multiplicative congruential random number generators with modulus */ /* 2**b: an exhaustive analysis for b = 32 and a partial analysis for */ /* b = 48', Math. Comp. 189, pp 331-344, 1990). */ /* 48-bit integers are stored in 4 integer array elements with 12 bits */ /* per element. Hence the routine is portable across machines with */ /* integers of 32 bits or more. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Data statements .. */ /* Parameter adjustments */ --iseed; --x; /* Function Body */ /* .. */ /* .. Executable Statements .. */ i1 = iseed[1]; i2 = iseed[2]; i3 = iseed[3]; i4 = iseed[4]; i__1 = min(*n,128); for (i__ = 1; i__ <= i__1; ++i__) { L20: /* Multiply the seed by i-th power of the multiplier modulo 2**48 */ it4 = i4 * mm[i__ + 383]; it3 = it4 / 4096; it4 -= it3 << 12; it3 = it3 + i3 * mm[i__ + 383] + i4 * mm[i__ + 255]; it2 = it3 / 4096; it3 -= it2 << 12; it2 = it2 + i2 * mm[i__ + 383] + i3 * mm[i__ + 255] + i4 * mm[i__ + 127]; it1 = it2 / 4096; it2 -= it1 << 12; it1 = it1 + i1 * mm[i__ + 383] + i2 * mm[i__ + 255] + i3 * mm[i__ + 127] + i4 * mm[i__ - 1]; it1 %= 4096; /* Convert 48-bit integer to a real number in the interval (0,1) */ x[i__] = ((doublereal) it1 + ((doublereal) it2 + ((doublereal) it3 + ( doublereal) it4 * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4; if (x[i__] == 1.) { /* If a real number has n bits of precision, and the first */ /* n bits of the 48-bit integer above happen to be all 1 (which */ /* will occur about once every 2**n calls), then X( I ) will */ /* be rounded to exactly 1.0. */ /* Since X( I ) is not supposed to return exactly 0.0 or 1.0, */ /* the statistically correct thing to do in this situation is */ /* simply to iterate again. */ /* N.B. the case X( I ) = 0.0 should not be possible. */ i1 += 2; i2 += 2; i3 += 2; i4 += 2; goto L20; } /* L10: */ } /* Return final value of seed */ iseed[1] = it1; iseed[2] = it2; iseed[3] = it3; iseed[4] = it4; return 0; /* End of DLARUV */ } /* _starpu_dlaruv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarz.c000066400000000000000000000131701507764646700205120ustar00rootroot00000000000000/* dlarz.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b5 = 1.; /* Subroutine */ int _starpu_dlarz_(char *side, integer *m, integer *n, integer *l, doublereal *v, integer *incv, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work) { /* System generated locals */ integer c_dim1, c_offset; doublereal d__1; /* Local variables */ extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *) ; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARZ applies a real elementary reflector H to a real M-by-N */ /* matrix C, from either the left or the right. H is represented in the */ /* form */ /* H = I - tau * v * v' */ /* where tau is a real scalar and v is a real vector. */ /* If tau = 0, then H is taken to be the unit matrix. */ /* H is a product of k elementary reflectors as returned by DTZRZF. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': form H * C */ /* = 'R': form C * H */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* L (input) INTEGER */ /* The number of entries of the vector V containing */ /* the meaningful part of the Householder vectors. */ /* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */ /* V (input) DOUBLE PRECISION array, dimension (1+(L-1)*abs(INCV)) */ /* The vector v in the representation of H as returned by */ /* DTZRZF. V is not used if TAU = 0. */ /* INCV (input) INTEGER */ /* The increment between elements of v. INCV <> 0. */ /* TAU (input) DOUBLE PRECISION */ /* The value tau in the representation of H. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ /* or C * H if SIDE = 'R'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L' */ /* or (M) if SIDE = 'R' */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --v; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ if (_starpu_lsame_(side, "L")) { /* Form H * C */ if (*tau != 0.) { /* w( 1:n ) = C( 1, 1:n ) */ _starpu_dcopy_(n, &c__[c_offset], ldc, &work[1], &c__1); /* w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) */ _starpu_dgemv_("Transpose", l, n, &c_b5, &c__[*m - *l + 1 + c_dim1], ldc, &v[1], incv, &c_b5, &work[1], &c__1); /* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) */ d__1 = -(*tau); _starpu_daxpy_(n, &d__1, &work[1], &c__1, &c__[c_offset], ldc); /* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */ /* tau * v( 1:l ) * w( 1:n )' */ d__1 = -(*tau); _starpu_dger_(l, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[*m - *l + 1 + c_dim1], ldc); } } else { /* Form C * H */ if (*tau != 0.) { /* w( 1:m ) = C( 1:m, 1 ) */ _starpu_dcopy_(m, &c__[c_offset], &c__1, &work[1], &c__1); /* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) */ _starpu_dgemv_("No transpose", m, l, &c_b5, &c__[(*n - *l + 1) * c_dim1 + 1], ldc, &v[1], incv, &c_b5, &work[1], &c__1); /* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) */ d__1 = -(*tau); _starpu_daxpy_(m, &d__1, &work[1], &c__1, &c__[c_offset], &c__1); /* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */ /* tau * w( 1:m ) * v( 1:l )' */ d__1 = -(*tau); _starpu_dger_(m, l, &d__1, &work[1], &c__1, &v[1], incv, &c__[(*n - *l + 1) * c_dim1 + 1], ldc); } } return 0; /* End of DLARZ */ } /* _starpu_dlarz_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarzb.c000066400000000000000000000206631507764646700206610ustar00rootroot00000000000000/* dlarzb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b13 = 1.; static doublereal c_b23 = -1.; /* Subroutine */ int _starpu_dlarzb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, doublereal *v, integer *ldv, doublereal *t, integer *ldt, doublereal *c__, integer * ldc, doublereal *work, integer *ldwork) { /* System generated locals */ integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, work_offset, i__1, i__2; /* Local variables */ integer i__, j, info; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_( char *, integer *); char transt[1]; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARZB applies a real block reflector H or its transpose H**T to */ /* a real distributed M-by-N C from the left or the right. */ /* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply H or H' from the Left */ /* = 'R': apply H or H' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply H (No transpose) */ /* = 'C': apply H' (Transpose) */ /* DIRECT (input) CHARACTER*1 */ /* Indicates how H is formed from a product of elementary */ /* reflectors */ /* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) */ /* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ /* STOREV (input) CHARACTER*1 */ /* Indicates how the vectors which define the elementary */ /* reflectors are stored: */ /* = 'C': Columnwise (not supported yet) */ /* = 'R': Rowwise */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* K (input) INTEGER */ /* The order of the matrix T (= the number of elementary */ /* reflectors whose product defines the block reflector). */ /* L (input) INTEGER */ /* The number of columns of the matrix V containing the */ /* meaningful part of the Householder reflectors. */ /* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */ /* V (input) DOUBLE PRECISION array, dimension (LDV,NV). */ /* If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. */ /* If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. */ /* T (input) DOUBLE PRECISION array, dimension (LDT,K) */ /* The triangular K-by-K matrix T in the representation of the */ /* block reflector. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= K. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */ /* LDWORK (input) INTEGER */ /* The leading dimension of the array WORK. */ /* If SIDE = 'L', LDWORK >= max(1,N); */ /* if SIDE = 'R', LDWORK >= max(1,M). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; work_dim1 = *ldwork; work_offset = 1 + work_dim1; work -= work_offset; /* Function Body */ if (*m <= 0 || *n <= 0) { return 0; } /* Check for currently supported options */ info = 0; if (! _starpu_lsame_(direct, "B")) { info = -3; } else if (! _starpu_lsame_(storev, "R")) { info = -4; } if (info != 0) { i__1 = -info; _starpu_xerbla_("DLARZB", &i__1); return 0; } if (_starpu_lsame_(trans, "N")) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } if (_starpu_lsame_(side, "L")) { /* Form H * C or H' * C */ /* W( 1:n, 1:k ) = C( 1:k, 1:n )' */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1); /* L10: */ } /* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... */ /* C( m-l+1:m, 1:n )' * V( 1:k, 1:l )' */ if (*l > 0) { _starpu_dgemm_("Transpose", "Transpose", n, k, l, &c_b13, &c__[*m - *l + 1 + c_dim1], ldc, &v[v_offset], ldv, &c_b13, &work[ work_offset], ldwork); } /* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T' or W( 1:m, 1:k ) * T */ _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b13, &t[ t_offset], ldt, &work[work_offset], ldwork); /* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )' */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] -= work[j + i__ * work_dim1]; /* L20: */ } /* L30: */ } /* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */ /* V( 1:k, 1:l )' * W( 1:n, 1:k )' */ if (*l > 0) { _starpu_dgemm_("Transpose", "Transpose", l, n, k, &c_b23, &v[v_offset], ldv, &work[work_offset], ldwork, &c_b13, &c__[*m - *l + 1 + c_dim1], ldc); } } else if (_starpu_lsame_(side, "R")) { /* Form C * H or C * H' */ /* W( 1:m, 1:k ) = C( 1:m, 1:k ) */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], & c__1); /* L40: */ } /* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... */ /* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )' */ if (*l > 0) { _starpu_dgemm_("No transpose", "Transpose", m, k, l, &c_b13, &c__[(*n - * l + 1) * c_dim1 + 1], ldc, &v[v_offset], ldv, &c_b13, & work[work_offset], ldwork); } /* W( 1:m, 1:k ) = W( 1:m, 1:k ) * T or W( 1:m, 1:k ) * T' */ _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b13, &t[t_offset] , ldt, &work[work_offset], ldwork); /* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; /* L50: */ } /* L60: */ } /* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */ /* W( 1:m, 1:k ) * V( 1:k, 1:l ) */ if (*l > 0) { _starpu_dgemm_("No transpose", "No transpose", m, l, k, &c_b23, &work[ work_offset], ldwork, &v[v_offset], ldv, &c_b13, &c__[(*n - *l + 1) * c_dim1 + 1], ldc); } } return 0; /* End of DLARZB */ } /* _starpu_dlarzb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlarzt.c000066400000000000000000000163531507764646700207040ustar00rootroot00000000000000/* dlarzt.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b8 = 0.; static integer c__1 = 1; /* Subroutine */ int _starpu_dlarzt_(char *direct, char *storev, integer *n, integer * k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, integer *ldt) { /* System generated locals */ integer t_dim1, t_offset, v_dim1, v_offset, i__1; doublereal d__1; /* Local variables */ integer i__, j, info; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLARZT forms the triangular factor T of a real block reflector */ /* H of order > n, which is defined as a product of k elementary */ /* reflectors. */ /* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ /* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ /* If STOREV = 'C', the vector which defines the elementary reflector */ /* H(i) is stored in the i-th column of the array V, and */ /* H = I - V * T * V' */ /* If STOREV = 'R', the vector which defines the elementary reflector */ /* H(i) is stored in the i-th row of the array V, and */ /* H = I - V' * T * V */ /* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. */ /* Arguments */ /* ========= */ /* DIRECT (input) CHARACTER*1 */ /* Specifies the order in which the elementary reflectors are */ /* multiplied to form the block reflector: */ /* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) */ /* = 'B': H = H(k) . . . H(2) H(1) (Backward) */ /* STOREV (input) CHARACTER*1 */ /* Specifies how the vectors which define the elementary */ /* reflectors are stored (see also Further Details): */ /* = 'C': columnwise (not supported yet) */ /* = 'R': rowwise */ /* N (input) INTEGER */ /* The order of the block reflector H. N >= 0. */ /* K (input) INTEGER */ /* The order of the triangular factor T (= the number of */ /* elementary reflectors). K >= 1. */ /* V (input/output) DOUBLE PRECISION array, dimension */ /* (LDV,K) if STOREV = 'C' */ /* (LDV,N) if STOREV = 'R' */ /* The matrix V. See further details. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. */ /* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i). */ /* T (output) DOUBLE PRECISION array, dimension (LDT,K) */ /* The k by k triangular factor T of the block reflector. */ /* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ /* lower triangular. The rest of the array is not used. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= K. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* The shape of the matrix V and the storage of the vectors which define */ /* the H(i) is best illustrated by the following example with n = 5 and */ /* k = 3. The elements equal to 1 are not stored; the corresponding */ /* array elements are modified but restored on exit. The rest of the */ /* array is not used. */ /* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ /* ______V_____ */ /* ( v1 v2 v3 ) / \ */ /* ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) */ /* V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) */ /* ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) */ /* ( v1 v2 v3 ) */ /* . . . */ /* . . . */ /* 1 . . */ /* 1 . */ /* 1 */ /* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ /* ______V_____ */ /* 1 / \ */ /* . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) */ /* . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) */ /* . . . ( . . 1 . . v3 v3 v3 v3 v3 ) */ /* . . . */ /* ( v1 v2 v3 ) */ /* ( v1 v2 v3 ) */ /* V = ( v1 v2 v3 ) */ /* ( v1 v2 v3 ) */ /* ( v1 v2 v3 ) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Check for currently supported options */ /* Parameter adjustments */ v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --tau; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; /* Function Body */ info = 0; if (! _starpu_lsame_(direct, "B")) { info = -1; } else if (! _starpu_lsame_(storev, "R")) { info = -2; } if (info != 0) { i__1 = -info; _starpu_xerbla_("DLARZT", &i__1); return 0; } for (i__ = *k; i__ >= 1; --i__) { if (tau[i__] == 0.) { /* H(i) = I */ i__1 = *k; for (j = i__; j <= i__1; ++j) { t[j + i__ * t_dim1] = 0.; /* L10: */ } } else { /* general case */ if (i__ < *k) { /* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)' */ i__1 = *k - i__; d__1 = -tau[i__]; _starpu_dgemv_("No transpose", &i__1, n, &d__1, &v[i__ + 1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &c_b8, &t[i__ + 1 + i__ * t_dim1], &c__1); /* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) */ i__1 = *k - i__; _starpu_dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * t_dim1] , &c__1); } t[i__ + i__ * t_dim1] = tau[i__]; } /* L20: */ } return 0; /* End of DLARZT */ } /* _starpu_dlarzt_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlas2.c000066400000000000000000000076741507764646700204170ustar00rootroot00000000000000/* dlas2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlas2_(doublereal *f, doublereal *g, doublereal *h__, doublereal *ssmin, doublereal *ssmax) { /* System generated locals */ doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAS2 computes the singular values of the 2-by-2 matrix */ /* [ F G ] */ /* [ 0 H ]. */ /* On return, SSMIN is the smaller singular value and SSMAX is the */ /* larger singular value. */ /* Arguments */ /* ========= */ /* F (input) DOUBLE PRECISION */ /* The (1,1) element of the 2-by-2 matrix. */ /* G (input) DOUBLE PRECISION */ /* The (1,2) element of the 2-by-2 matrix. */ /* H (input) DOUBLE PRECISION */ /* The (2,2) element of the 2-by-2 matrix. */ /* SSMIN (output) DOUBLE PRECISION */ /* The smaller singular value. */ /* SSMAX (output) DOUBLE PRECISION */ /* The larger singular value. */ /* Further Details */ /* =============== */ /* Barring over/underflow, all output quantities are correct to within */ /* a few units in the last place (ulps), even in the absence of a guard */ /* digit in addition/subtraction. */ /* In IEEE arithmetic, the code works correctly if one matrix element is */ /* infinite. */ /* Overflow will not occur unless the largest singular value itself */ /* overflows, or is within a few ulps of overflow. (On machines with */ /* partial overflow, like the Cray, overflow may occur if the largest */ /* singular value is within a factor of 2 of overflow.) */ /* Underflow is harmless if underflow is gradual. Otherwise, results */ /* may correspond to a matrix modified by perturbations of size near */ /* the underflow threshold. */ /* ==================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ fa = abs(*f); ga = abs(*g); ha = abs(*h__); fhmn = min(fa,ha); fhmx = max(fa,ha); if (fhmn == 0.) { *ssmin = 0.; if (fhmx == 0.) { *ssmax = ga; } else { /* Computing 2nd power */ d__1 = min(fhmx,ga) / max(fhmx,ga); *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.); } } else { if (ga < fhmx) { as = fhmn / fhmx + 1.; at = (fhmx - fhmn) / fhmx; /* Computing 2nd power */ d__1 = ga / fhmx; au = d__1 * d__1; c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au)); *ssmin = fhmn * c__; *ssmax = fhmx / c__; } else { au = fhmx / ga; if (au == 0.) { /* Avoid possible harmful underflow if exponent range */ /* asymmetric (true SSMIN may not underflow even if */ /* AU underflows) */ *ssmin = fhmn * fhmx / ga; *ssmax = ga; } else { as = fhmn / fhmx + 1.; at = (fhmx - fhmn) / fhmx; /* Computing 2nd power */ d__1 = as * au; /* Computing 2nd power */ d__2 = at * au; c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.)); *ssmin = fhmn * c__ * au; *ssmin += *ssmin; *ssmax = ga / (c__ + c__); } } } return 0; /* End of DLAS2 */ } /* _starpu_dlas2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlascl.c000066400000000000000000000212241507764646700206370ustar00rootroot00000000000000/* dlascl.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlascl_(char *type__, integer *kl, integer *ku, doublereal *cfrom, doublereal *cto, integer *m, integer *n, doublereal *a, integer *lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ integer i__, j, k1, k2, k3, k4; doublereal mul, cto1; logical done; doublereal ctoc; extern logical _starpu_lsame_(char *, char *); integer itype; doublereal cfrom1; extern doublereal _starpu_dlamch_(char *); doublereal cfromc; extern logical _starpu_disnan_(doublereal *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum, smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASCL multiplies the M by N real matrix A by the real scalar */ /* CTO/CFROM. This is done without over/underflow as long as the final */ /* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that */ /* A may be full, upper triangular, lower triangular, upper Hessenberg, */ /* or banded. */ /* Arguments */ /* ========= */ /* TYPE (input) CHARACTER*1 */ /* TYPE indices the storage type of the input matrix. */ /* = 'G': A is a full matrix. */ /* = 'L': A is a lower triangular matrix. */ /* = 'U': A is an upper triangular matrix. */ /* = 'H': A is an upper Hessenberg matrix. */ /* = 'B': A is a symmetric band matrix with lower bandwidth KL */ /* and upper bandwidth KU and with the only the lower */ /* half stored. */ /* = 'Q': A is a symmetric band matrix with lower bandwidth KL */ /* and upper bandwidth KU and with the only the upper */ /* half stored. */ /* = 'Z': A is a band matrix with lower bandwidth KL and upper */ /* bandwidth KU. */ /* KL (input) INTEGER */ /* The lower bandwidth of A. Referenced only if TYPE = 'B', */ /* 'Q' or 'Z'. */ /* KU (input) INTEGER */ /* The upper bandwidth of A. Referenced only if TYPE = 'B', */ /* 'Q' or 'Z'. */ /* CFROM (input) DOUBLE PRECISION */ /* CTO (input) DOUBLE PRECISION */ /* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed */ /* without over/underflow if the final result CTO*A(I,J)/CFROM */ /* can be represented without over/underflow. CFROM must be */ /* nonzero. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* The matrix to be multiplied by CTO/CFROM. See TYPE for the */ /* storage type. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* INFO (output) INTEGER */ /* 0 - successful exit */ /* <0 - if INFO = -i, the i-th argument had an illegal value. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; if (_starpu_lsame_(type__, "G")) { itype = 0; } else if (_starpu_lsame_(type__, "L")) { itype = 1; } else if (_starpu_lsame_(type__, "U")) { itype = 2; } else if (_starpu_lsame_(type__, "H")) { itype = 3; } else if (_starpu_lsame_(type__, "B")) { itype = 4; } else if (_starpu_lsame_(type__, "Q")) { itype = 5; } else if (_starpu_lsame_(type__, "Z")) { itype = 6; } else { itype = -1; } if (itype == -1) { *info = -1; } else if (*cfrom == 0. || _starpu_disnan_(cfrom)) { *info = -4; } else if (_starpu_disnan_(cto)) { *info = -5; } else if (*m < 0) { *info = -6; } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { *info = -7; } else if (itype <= 3 && *lda < max(1,*m)) { *info = -9; } else if (itype >= 4) { /* Computing MAX */ i__1 = *m - 1; if (*kl < 0 || *kl > max(i__1,0)) { *info = -2; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = *n - 1; if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && *kl != *ku) { *info = -3; } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { *info = -9; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASCL", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *m == 0) { return 0; } /* Get machine parameters */ smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; cfromc = *cfrom; ctoc = *cto; L10: cfrom1 = cfromc * smlnum; if (cfrom1 == cfromc) { /* CFROMC is an inf. Multiply by a correctly signed zero for */ /* finite CTOC, or a NaN if CTOC is infinite. */ mul = ctoc / cfromc; done = TRUE_; cto1 = ctoc; } else { cto1 = ctoc / bignum; if (cto1 == ctoc) { /* CTOC is either 0 or an inf. In both cases, CTOC itself */ /* serves as the correct multiplication factor. */ mul = ctoc; done = TRUE_; cfromc = 1.; } else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { mul = smlnum; done = FALSE_; cfromc = cfrom1; } else if (abs(cto1) > abs(cfromc)) { mul = bignum; done = FALSE_; ctoc = cto1; } else { mul = ctoc / cfromc; done = TRUE_; } } if (itype == 0) { /* Full matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L20: */ } /* L30: */ } } else if (itype == 1) { /* Lower triangular matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L40: */ } /* L50: */ } } else if (itype == 2) { /* Upper triangular matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = min(j,*m); for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L60: */ } /* L70: */ } } else if (itype == 3) { /* Upper Hessenberg matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = j + 1; i__2 = min(i__3,*m); for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L80: */ } /* L90: */ } } else if (itype == 4) { /* Lower half of a symmetric band matrix */ k3 = *kl + 1; k4 = *n + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = k3, i__4 = k4 - j; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L100: */ } /* L110: */ } } else if (itype == 5) { /* Upper half of a symmetric band matrix */ k1 = *ku + 2; k3 = *ku + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = k1 - j; i__3 = k3; for (i__ = max(i__2,1); i__ <= i__3; ++i__) { a[i__ + j * a_dim1] *= mul; /* L120: */ } /* L130: */ } } else if (itype == 6) { /* Band matrix */ k1 = *kl + *ku + 2; k2 = *kl + 1; k3 = (*kl << 1) + *ku + 1; k4 = *kl + *ku + 1 + *m; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__3 = k1 - j; /* Computing MIN */ i__4 = k3, i__5 = k4 - j; i__2 = min(i__4,i__5); for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { a[i__ + j * a_dim1] *= mul; /* L140: */ } /* L150: */ } } if (! done) { goto L10; } return 0; /* End of DLASCL */ } /* _starpu_dlascl_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlascl2.c000066400000000000000000000051071507764646700207230ustar00rootroot00000000000000/* dlascl2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlascl2_(integer *m, integer *n, doublereal *d__, doublereal *x, integer *ldx) { /* System generated locals */ integer x_dim1, x_offset, i__1, i__2; /* Local variables */ integer i__, j; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASCL2 performs a diagonal scaling on a vector: */ /* x <-- D * x */ /* where the diagonal matrix D is stored as a vector. */ /* Eventually to be replaced by BLAS_dge_diag_scale in the new BLAS */ /* standard. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of D and X. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of D and X. N >= 0. */ /* D (input) DOUBLE PRECISION array, length M */ /* Diagonal matrix D, stored as a vector of length M. */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,N) */ /* On entry, the vector X to be scaled by D. */ /* On exit, the scaled vector. */ /* LDX (input) INTEGER */ /* The leading dimension of the vector X. LDX >= 0. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --d__; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; /* Function Body */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ + j * x_dim1] *= d__[i__]; } } return 0; } /* _starpu_dlascl2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasd0.c000066400000000000000000000202301507764646700205400ustar00rootroot00000000000000/* dlasd0.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__0 = 0; static integer c__2 = 2; /* Subroutine */ int _starpu_dlasd0_(integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer * ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer * info) { /* System generated locals */ integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, iwk, lvl, ndb1, nlp1, nrp1; doublereal beta; integer idxq, nlvl; doublereal alpha; integer inode, ndiml, idxqc, ndimr, itemp, sqrei; extern /* Subroutine */ int _starpu_dlasd1_(integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), _starpu_dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), _starpu_xerbla_( char *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Using a divide and conquer approach, DLASD0 computes the singular */ /* value decomposition (SVD) of a real upper bidiagonal N-by-M */ /* matrix B with diagonal D and offdiagonal E, where M = N + SQRE. */ /* The algorithm computes orthogonal matrices U and VT such that */ /* B = U * S * VT. The singular values S are overwritten on D. */ /* A related subroutine, DLASDA, computes only the singular values, */ /* and optionally, the singular vectors in compact form. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* On entry, the row dimension of the upper bidiagonal matrix. */ /* This is also the dimension of the main diagonal array D. */ /* SQRE (input) INTEGER */ /* Specifies the column dimension of the bidiagonal matrix. */ /* = 0: The bidiagonal matrix has column dimension M = N; */ /* = 1: The bidiagonal matrix has column dimension M = N+1; */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry D contains the main diagonal of the bidiagonal */ /* matrix. */ /* On exit D, if INFO = 0, contains its singular values. */ /* E (input) DOUBLE PRECISION array, dimension (M-1) */ /* Contains the subdiagonal entries of the bidiagonal matrix. */ /* On exit, E has been destroyed. */ /* U (output) DOUBLE PRECISION array, dimension at least (LDQ, N) */ /* On exit, U contains the left singular vectors. */ /* LDU (input) INTEGER */ /* On entry, leading dimension of U. */ /* VT (output) DOUBLE PRECISION array, dimension at least (LDVT, M) */ /* On exit, VT' contains the right singular vectors. */ /* LDVT (input) INTEGER */ /* On entry, leading dimension of VT. */ /* SMLSIZ (input) INTEGER */ /* On entry, maximum size of the subproblems at the */ /* bottom of the computation tree. */ /* IWORK (workspace) INTEGER work array. */ /* Dimension must be at least (8 * N) */ /* WORK (workspace) DOUBLE PRECISION work array. */ /* Dimension must be at least (3 * M**2 + 2 * M) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; --iwork; --work; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*sqre < 0 || *sqre > 1) { *info = -2; } m = *n + *sqre; if (*ldu < *n) { *info = -6; } else if (*ldvt < m) { *info = -8; } else if (*smlsiz < 3) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASD0", &i__1); return 0; } /* If the input matrix is too small, call DLASDQ to find the SVD. */ if (*n <= *smlsiz) { _starpu_dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); return 0; } /* Set up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; idxq = ndimr + *n; iwk = idxq + *n; _starpu_dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* For the nodes on bottom level of the tree, solve */ /* their subproblems by DLASDQ. */ ndb1 = (nd + 1) / 2; ncc = 0; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node */ /* NL : number of rows of left subproblem */ /* NR : number of rows of right subproblem */ /* NLF: starting row of the left subproblem */ /* NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nlp1 = nl + 1; nr = iwork[ndimr + i1]; nrp1 = nr + 1; nlf = ic - nl; nrf = ic + 1; sqrei = 1; _starpu_dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[ nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[ nlf + nlf * u_dim1], ldu, &work[1], info); if (*info != 0) { return 0; } itemp = idxq + nlf - 2; i__2 = nl; for (j = 1; j <= i__2; ++j) { iwork[itemp + j] = j; /* L10: */ } if (i__ == nd) { sqrei = *sqre; } else { sqrei = 1; } nrp1 = nr + sqrei; _starpu_dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[ nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[ nrf + nrf * u_dim1], ldu, &work[1], info); if (*info != 0) { return 0; } itemp = idxq + ic; i__2 = nr; for (j = 1; j <= i__2; ++j) { iwork[itemp + j - 1] = j; /* L20: */ } /* L30: */ } /* Now conquer each subproblem bottom-up. */ for (lvl = nlvl; lvl >= 1; --lvl) { /* Find the first node LF and last node LL on the */ /* current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; if (*sqre == 0 && i__ == ll) { sqrei = *sqre; } else { sqrei = 1; } idxqc = idxq + nlf - 1; alpha = d__[ic]; beta = e[ic]; _starpu_dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[ idxqc], &iwork[iwk], &work[1], info); if (*info != 0) { return 0; } /* L40: */ } /* L50: */ } return 0; /* End of DLASD0 */ } /* _starpu_dlasd0_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasd1.c000066400000000000000000000232251507764646700205500ustar00rootroot00000000000000/* dlasd1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__0 = 0; static doublereal c_b7 = 1.; static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dlasd1_(integer *nl, integer *nr, integer *sqre, doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer * iwork, doublereal *work, integer *info) { /* System generated locals */ integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; doublereal d__1, d__2; /* Local variables */ integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, idxc, idxp, ldvt2; extern /* Subroutine */ int _starpu_dlasd2_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, integer *, integer *, integer *), _starpu_dlasd3_( integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *); integer isigma; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal orgnrm; integer coltyp; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, */ /* where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. */ /* A related subroutine DLASD7 handles the case in which the singular */ /* values (and the singular vectors in factored form) are desired. */ /* DLASD1 computes the SVD as follows: */ /* ( D1(in) 0 0 0 ) */ /* B = U(in) * ( Z1' a Z2' b ) * VT(in) */ /* ( 0 0 D2(in) 0 ) */ /* = U(out) * ( D(out) 0) * VT(out) */ /* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M */ /* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros */ /* elsewhere; and the entry b is empty if SQRE = 0. */ /* The left singular vectors of the original matrix are stored in U, and */ /* the transpose of the right singular vectors are stored in VT, and the */ /* singular values are in D. The algorithm consists of three stages: */ /* The first stage consists of deflating the size of the problem */ /* when there are multiple singular values or when there are zeros in */ /* the Z vector. For each such occurence the dimension of the */ /* secular equation problem is reduced by one. This stage is */ /* performed by the routine DLASD2. */ /* The second stage consists of calculating the updated */ /* singular values. This is done by finding the square roots of the */ /* roots of the secular equation via the routine DLASD4 (as called */ /* by DLASD3). This routine also calculates the singular vectors of */ /* the current problem. */ /* The final stage consists of computing the updated singular vectors */ /* directly using the updated singular values. The singular vectors */ /* for the current problem are multiplied with the singular vectors */ /* from the overall problem. */ /* Arguments */ /* ========= */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has row dimension N = NL + NR + 1, */ /* and column dimension M = N + SQRE. */ /* D (input/output) DOUBLE PRECISION array, */ /* dimension (N = NL+NR+1). */ /* On entry D(1:NL,1:NL) contains the singular values of the */ /* upper block; and D(NL+2:N) contains the singular values of */ /* the lower block. On exit D(1:N) contains the singular values */ /* of the modified matrix. */ /* ALPHA (input/output) DOUBLE PRECISION */ /* Contains the diagonal element associated with the added row. */ /* BETA (input/output) DOUBLE PRECISION */ /* Contains the off-diagonal element associated with the added */ /* row. */ /* U (input/output) DOUBLE PRECISION array, dimension(LDU,N) */ /* On entry U(1:NL, 1:NL) contains the left singular vectors of */ /* the upper block; U(NL+2:N, NL+2:N) contains the left singular */ /* vectors of the lower block. On exit U contains the left */ /* singular vectors of the bidiagonal matrix. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= max( 1, N ). */ /* VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) */ /* where M = N + SQRE. */ /* On entry VT(1:NL+1, 1:NL+1)' contains the right singular */ /* vectors of the upper block; VT(NL+2:M, NL+2:M)' contains */ /* the right singular vectors of the lower block. On exit */ /* VT' contains the right singular vectors of the */ /* bidiagonal matrix. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. LDVT >= max( 1, M ). */ /* IDXQ (output) INTEGER array, dimension(N) */ /* This contains the permutation which will reintegrate the */ /* subproblem just solved back into sorted order, i.e. */ /* D( IDXQ( I = 1, N ) ) will be in ascending order. */ /* IWORK (workspace) INTEGER array, dimension( 4 * N ) */ /* WORK (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M ) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; --idxq; --iwork; --work; /* Function Body */ *info = 0; if (*nl < 1) { *info = -1; } else if (*nr < 1) { *info = -2; } else if (*sqre < 0 || *sqre > 1) { *info = -3; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASD1", &i__1); return 0; } n = *nl + *nr + 1; m = n + *sqre; /* The following values are for bookkeeping purposes only. They are */ /* integer pointers which indicate the portion of the workspace */ /* used by a particular array in DLASD2 and DLASD3. */ ldu2 = n; ldvt2 = m; iz = 1; isigma = iz + m; iu2 = isigma + n; ivt2 = iu2 + ldu2 * n; iq = ivt2 + ldvt2 * m; idx = 1; idxc = idx + n; coltyp = idxc + n; idxp = coltyp + n; /* Scale. */ /* Computing MAX */ d__1 = abs(*alpha), d__2 = abs(*beta); orgnrm = max(d__1,d__2); d__[*nl + 1] = 0.; i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { orgnrm = (d__1 = d__[i__], abs(d__1)); } /* L10: */ } _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info); *alpha /= orgnrm; *beta /= orgnrm; /* Deflate singular values. */ _starpu_dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, & work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], & idxq[1], &iwork[coltyp], info); /* Solve Secular Equation and update singular vectors. */ ldq = k; _starpu_dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[ u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info); if (*info != 0) { return 0; } /* Unscale. */ _starpu_dlascl_("G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info); /* Prepare the IDXQ sorting permutation. */ n1 = k; n2 = n - k; _starpu_dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); return 0; /* End of DLASD1 */ } /* _starpu_dlasd1_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasd2.c000066400000000000000000000445341507764646700205570ustar00rootroot00000000000000/* dlasd2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b30 = 0.; /* Subroutine */ int _starpu_dlasd2_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal * beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer * idxq, integer *coltyp, integer *info) { /* System generated locals */ integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1; doublereal d__1, d__2; /* Local variables */ doublereal c__; integer i__, j, m, n; doublereal s; integer k2; doublereal z1; integer ct, jp; doublereal eps, tau, tol; integer psm[4], nlp1, nlp2, idxi, idxj; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer ctot[4], idxjp; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer jprev; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal hlftol; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASD2 merges the two sets of singular values together into a single */ /* sorted set. Then it tries to deflate the size of the problem. */ /* There are two ways in which deflation can occur: when two or more */ /* singular values are close together or if there is a tiny entry in the */ /* Z vector. For each such occurrence the order of the related secular */ /* equation problem is reduced by one. */ /* DLASD2 is called from DLASD1. */ /* Arguments */ /* ========= */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has N = NL + NR + 1 rows and */ /* M = N + SQRE >= N columns. */ /* K (output) INTEGER */ /* Contains the dimension of the non-deflated matrix, */ /* This is the order of the related secular equation. 1 <= K <=N. */ /* D (input/output) DOUBLE PRECISION array, dimension(N) */ /* On entry D contains the singular values of the two submatrices */ /* to be combined. On exit D contains the trailing (N-K) updated */ /* singular values (those which were deflated) sorted into */ /* increasing order. */ /* Z (output) DOUBLE PRECISION array, dimension(N) */ /* On exit Z contains the updating row vector in the secular */ /* equation. */ /* ALPHA (input) DOUBLE PRECISION */ /* Contains the diagonal element associated with the added row. */ /* BETA (input) DOUBLE PRECISION */ /* Contains the off-diagonal element associated with the added */ /* row. */ /* U (input/output) DOUBLE PRECISION array, dimension(LDU,N) */ /* On entry U contains the left singular vectors of two */ /* submatrices in the two square blocks with corners at (1,1), */ /* (NL, NL), and (NL+2, NL+2), (N,N). */ /* On exit U contains the trailing (N-K) updated left singular */ /* vectors (those which were deflated) in its last N-K columns. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= N. */ /* VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) */ /* On entry VT' contains the right singular vectors of two */ /* submatrices in the two square blocks with corners at (1,1), */ /* (NL+1, NL+1), and (NL+2, NL+2), (M,M). */ /* On exit VT' contains the trailing (N-K) updated right singular */ /* vectors (those which were deflated) in its last N-K columns. */ /* In case SQRE =1, the last row of VT spans the right null */ /* space. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. LDVT >= M. */ /* DSIGMA (output) DOUBLE PRECISION array, dimension (N) */ /* Contains a copy of the diagonal elements (K-1 singular values */ /* and one zero) in the secular equation. */ /* U2 (output) DOUBLE PRECISION array, dimension(LDU2,N) */ /* Contains a copy of the first K-1 left singular vectors which */ /* will be used by DLASD3 in a matrix multiply (DGEMM) to solve */ /* for the new left singular vectors. U2 is arranged into four */ /* blocks. The first block contains a column with 1 at NL+1 and */ /* zero everywhere else; the second block contains non-zero */ /* entries only at and above NL; the third contains non-zero */ /* entries only below NL+1; and the fourth is dense. */ /* LDU2 (input) INTEGER */ /* The leading dimension of the array U2. LDU2 >= N. */ /* VT2 (output) DOUBLE PRECISION array, dimension(LDVT2,N) */ /* VT2' contains a copy of the first K right singular vectors */ /* which will be used by DLASD3 in a matrix multiply (DGEMM) to */ /* solve for the new right singular vectors. VT2 is arranged into */ /* three blocks. The first block contains a row that corresponds */ /* to the special 0 diagonal element in SIGMA; the second block */ /* contains non-zeros only at and before NL +1; the third block */ /* contains non-zeros only at and after NL +2. */ /* LDVT2 (input) INTEGER */ /* The leading dimension of the array VT2. LDVT2 >= M. */ /* IDXP (workspace) INTEGER array dimension(N) */ /* This will contain the permutation used to place deflated */ /* values of D at the end of the array. On output IDXP(2:K) */ /* points to the nondeflated D-values and IDXP(K+1:N) */ /* points to the deflated singular values. */ /* IDX (workspace) INTEGER array dimension(N) */ /* This will contain the permutation used to sort the contents of */ /* D into ascending order. */ /* IDXC (output) INTEGER array dimension(N) */ /* This will contain the permutation used to arrange the columns */ /* of the deflated U matrix into three groups: the first group */ /* contains non-zero entries only at and above NL, the second */ /* contains non-zero entries only below NL+2, and the third is */ /* dense. */ /* IDXQ (input/output) INTEGER array dimension(N) */ /* This contains the permutation which separately sorts the two */ /* sub-problems in D into ascending order. Note that entries in */ /* the first hlaf of this permutation must first be moved one */ /* position backward; and entries in the second half */ /* must first have NL+1 added to their values. */ /* COLTYP (workspace/output) INTEGER array dimension(N) */ /* As workspace, this will contain a label which will indicate */ /* which of the following types a column in the U2 matrix or a */ /* row in the VT2 matrix is: */ /* 1 : non-zero in the upper half only */ /* 2 : non-zero in the lower half only */ /* 3 : dense */ /* 4 : deflated */ /* On exit, it is an array of dimension 4, with COLTYP(I) being */ /* the dimension of the I-th type columns. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --z__; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; --dsigma; u2_dim1 = *ldu2; u2_offset = 1 + u2_dim1; u2 -= u2_offset; vt2_dim1 = *ldvt2; vt2_offset = 1 + vt2_dim1; vt2 -= vt2_offset; --idxp; --idx; --idxc; --idxq; --coltyp; /* Function Body */ *info = 0; if (*nl < 1) { *info = -1; } else if (*nr < 1) { *info = -2; } else if (*sqre != 1 && *sqre != 0) { *info = -3; } n = *nl + *nr + 1; m = n + *sqre; if (*ldu < n) { *info = -10; } else if (*ldvt < m) { *info = -12; } else if (*ldu2 < n) { *info = -15; } else if (*ldvt2 < m) { *info = -17; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASD2", &i__1); return 0; } nlp1 = *nl + 1; nlp2 = *nl + 2; /* Generate the first part of the vector Z; and move the singular */ /* values in the first part of D one position backward. */ z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; z__[1] = z1; for (i__ = *nl; i__ >= 1; --i__) { z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; d__[i__ + 1] = d__[i__]; idxq[i__ + 1] = idxq[i__] + 1; /* L10: */ } /* Generate the second part of the vector Z. */ i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; /* L20: */ } /* Initialize some reference arrays. */ i__1 = nlp1; for (i__ = 2; i__ <= i__1; ++i__) { coltyp[i__] = 1; /* L30: */ } i__1 = n; for (i__ = nlp2; i__ <= i__1; ++i__) { coltyp[i__] = 2; /* L40: */ } /* Sort the singular values into increasing order */ i__1 = n; for (i__ = nlp2; i__ <= i__1; ++i__) { idxq[i__] += nlp1; /* L50: */ } /* DSIGMA, IDXC, IDXC, and the first column of U2 */ /* are used as storage space. */ i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { dsigma[i__] = d__[idxq[i__]]; u2[i__ + u2_dim1] = z__[idxq[i__]]; idxc[i__] = coltyp[idxq[i__]]; /* L60: */ } _starpu_dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { idxi = idx[i__] + 1; d__[i__] = dsigma[idxi]; z__[i__] = u2[idxi + u2_dim1]; coltyp[i__] = idxc[idxi]; /* L70: */ } /* Calculate the allowable deflation tolerance */ eps = _starpu_dlamch_("Epsilon"); /* Computing MAX */ d__1 = abs(*alpha), d__2 = abs(*beta); tol = max(d__1,d__2); /* Computing MAX */ d__2 = (d__1 = d__[n], abs(d__1)); tol = eps * 8. * max(d__2,tol); /* There are 2 kinds of deflation -- first a value in the z-vector */ /* is small, second two (or more) singular values are very close */ /* together (their difference is small). */ /* If the value in the z-vector is small, we simply permute the */ /* array so that the corresponding singular value is moved to the */ /* end. */ /* If two values in the D-vector are close, we perform a two-sided */ /* rotation designed to make one of the corresponding z-vector */ /* entries zero, and then permute the array so that the deflated */ /* singular value is moved to the end. */ /* If there are multiple singular values then the problem deflates. */ /* Here the number of equal singular values are found. As each equal */ /* singular value is found, an elementary reflector is computed to */ /* rotate the corresponding singular subspace so that the */ /* corresponding components of Z are zero in this new basis. */ *k = 1; k2 = n + 1; i__1 = n; for (j = 2; j <= i__1; ++j) { if ((d__1 = z__[j], abs(d__1)) <= tol) { /* Deflate due to small z component. */ --k2; idxp[k2] = j; coltyp[j] = 4; if (j == n) { goto L120; } } else { jprev = j; goto L90; } /* L80: */ } L90: j = jprev; L100: ++j; if (j > n) { goto L110; } if ((d__1 = z__[j], abs(d__1)) <= tol) { /* Deflate due to small z component. */ --k2; idxp[k2] = j; coltyp[j] = 4; } else { /* Check if singular values are close enough to allow deflation. */ if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { /* Deflation is possible. */ s = z__[jprev]; c__ = z__[j]; /* Find sqrt(a**2+b**2) without overflow or */ /* destructive underflow. */ tau = _starpu_dlapy2_(&c__, &s); c__ /= tau; s = -s / tau; z__[j] = tau; z__[jprev] = 0.; /* Apply back the Givens rotation to the left and right */ /* singular vector matrices. */ idxjp = idxq[idx[jprev] + 1]; idxj = idxq[idx[j] + 1]; if (idxjp <= nlp1) { --idxjp; } if (idxj <= nlp1) { --idxj; } _starpu_drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & c__1, &c__, &s); _starpu_drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & c__, &s); if (coltyp[j] != coltyp[jprev]) { coltyp[j] = 3; } coltyp[jprev] = 4; --k2; idxp[k2] = jprev; jprev = j; } else { ++(*k); u2[*k + u2_dim1] = z__[jprev]; dsigma[*k] = d__[jprev]; idxp[*k] = jprev; jprev = j; } } goto L100; L110: /* Record the last singular value. */ ++(*k); u2[*k + u2_dim1] = z__[jprev]; dsigma[*k] = d__[jprev]; idxp[*k] = jprev; L120: /* Count up the total number of the various types of columns, then */ /* form a permutation which positions the four column types into */ /* four groups of uniform structure (although one or more of these */ /* groups may be empty). */ for (j = 1; j <= 4; ++j) { ctot[j - 1] = 0; /* L130: */ } i__1 = n; for (j = 2; j <= i__1; ++j) { ct = coltyp[j]; ++ctot[ct - 1]; /* L140: */ } /* PSM(*) = Position in SubMatrix (of types 1 through 4) */ psm[0] = 2; psm[1] = ctot[0] + 2; psm[2] = psm[1] + ctot[1]; psm[3] = psm[2] + ctot[2]; /* Fill out the IDXC array so that the permutation which it induces */ /* will place all type-1 columns first, all type-2 columns next, */ /* then all type-3's, and finally all type-4's, starting from the */ /* second column. This applies similarly to the rows of VT. */ i__1 = n; for (j = 2; j <= i__1; ++j) { jp = idxp[j]; ct = coltyp[jp]; idxc[psm[ct - 1]] = j; ++psm[ct - 1]; /* L150: */ } /* Sort the singular values and corresponding singular vectors into */ /* DSIGMA, U2, and VT2 respectively. The singular values/vectors */ /* which were not deflated go into the first K slots of DSIGMA, U2, */ /* and VT2 respectively, while those which were deflated go into the */ /* last N - K slots, except that the first column/row will be treated */ /* separately. */ i__1 = n; for (j = 2; j <= i__1; ++j) { jp = idxp[j]; dsigma[j] = d__[jp]; idxj = idxq[idx[idxp[idxc[j]]] + 1]; if (idxj <= nlp1) { --idxj; } _starpu_dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); _starpu_dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); /* L160: */ } /* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ dsigma[1] = 0.; hlftol = tol / 2.; if (abs(dsigma[2]) <= hlftol) { dsigma[2] = hlftol; } if (m > n) { z__[1] = _starpu_dlapy2_(&z1, &z__[m]); if (z__[1] <= tol) { c__ = 1.; s = 0.; z__[1] = tol; } else { c__ = z1 / z__[1]; s = z__[m] / z__[1]; } } else { if (abs(z1) <= tol) { z__[1] = tol; } else { z__[1] = z1; } } /* Move the rest of the updating row to Z. */ i__1 = *k - 1; _starpu_dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); /* Determine the first column of U2, the first row of VT2 and the */ /* last row of VT. */ _starpu_dlaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2); u2[nlp1 + u2_dim1] = 1.; if (m > n) { i__1 = nlp1; for (i__ = 1; i__ <= i__1; ++i__) { vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; /* L170: */ } i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; /* L180: */ } } else { _starpu_dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); } if (m > n) { _starpu_dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); } /* The deflated singular values and their corresponding vectors go */ /* into the back of D, U, and V respectively. */ if (n > *k) { i__1 = n - *k; _starpu_dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); i__1 = n - *k; _starpu_dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) * u_dim1 + 1], ldu); i__1 = n - *k; _starpu_dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + vt_dim1], ldvt); } /* Copy CTOT into COLTYP for referencing in DLASD3. */ for (j = 1; j <= 4; ++j) { coltyp[j] = ctot[j - 1]; /* L190: */ } return 0; /* End of DLASD2 */ } /* _starpu_dlasd2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasd3.c000066400000000000000000000350621507764646700205540ustar00rootroot00000000000000/* dlasd3.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static doublereal c_b13 = 1.; static doublereal c_b26 = 0.; /* Subroutine */ int _starpu_dlasd3_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, integer *idxc, integer *ctot, doublereal *z__, integer *info) { /* System generated locals */ integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ integer i__, j, m, n, jc; doublereal rho; integer nlp1, nlp2, nrp1; doublereal temp; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer ctemp; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer ktemp; extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlasd4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASD3 finds all the square roots of the roots of the secular */ /* equation, as defined by the values in D and Z. It makes the */ /* appropriate calls to DLASD4 and then updates the singular */ /* vectors by matrix multiplication. */ /* This code makes very mild assumptions about floating point */ /* arithmetic. It will work on machines with a guard digit in */ /* add/subtract, or on those binary machines without guard digits */ /* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */ /* It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* DLASD3 is called from DLASD1. */ /* Arguments */ /* ========= */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has N = NL + NR + 1 rows and */ /* M = N + SQRE >= N columns. */ /* K (input) INTEGER */ /* The size of the secular equation, 1 =< K = < N. */ /* D (output) DOUBLE PRECISION array, dimension(K) */ /* On exit the square roots of the roots of the secular equation, */ /* in ascending order. */ /* Q (workspace) DOUBLE PRECISION array, */ /* dimension at least (LDQ,K). */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= K. */ /* DSIGMA (input) DOUBLE PRECISION array, dimension(K) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. */ /* U (output) DOUBLE PRECISION array, dimension (LDU, N) */ /* The last N - K columns of this matrix contain the deflated */ /* left singular vectors. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= N. */ /* U2 (input/output) DOUBLE PRECISION array, dimension (LDU2, N) */ /* The first K columns of this matrix contain the non-deflated */ /* left singular vectors for the split problem. */ /* LDU2 (input) INTEGER */ /* The leading dimension of the array U2. LDU2 >= N. */ /* VT (output) DOUBLE PRECISION array, dimension (LDVT, M) */ /* The last M - K columns of VT' contain the deflated */ /* right singular vectors. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. LDVT >= N. */ /* VT2 (input/output) DOUBLE PRECISION array, dimension (LDVT2, N) */ /* The first K columns of VT2' contain the non-deflated */ /* right singular vectors for the split problem. */ /* LDVT2 (input) INTEGER */ /* The leading dimension of the array VT2. LDVT2 >= N. */ /* IDXC (input) INTEGER array, dimension ( N ) */ /* The permutation used to arrange the columns of U (and rows of */ /* VT) into three groups: the first group contains non-zero */ /* entries only at and above (or before) NL +1; the second */ /* contains non-zero entries only at and below (or after) NL+2; */ /* and the third is dense. The first column of U and the row of */ /* VT are treated separately, however. */ /* The rows of the singular vectors found by DLASD4 */ /* must be likewise permuted before the matrix multiplies can */ /* take place. */ /* CTOT (input) INTEGER array, dimension ( 4 ) */ /* A count of the total number of the various types of columns */ /* in U (or rows in VT), as described in IDXC. The fourth column */ /* type is any column which has been deflated. */ /* Z (input) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating row vector. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --dsigma; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; u2_dim1 = *ldu2; u2_offset = 1 + u2_dim1; u2 -= u2_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; vt2_dim1 = *ldvt2; vt2_offset = 1 + vt2_dim1; vt2 -= vt2_offset; --idxc; --ctot; --z__; /* Function Body */ *info = 0; if (*nl < 1) { *info = -1; } else if (*nr < 1) { *info = -2; } else if (*sqre != 1 && *sqre != 0) { *info = -3; } n = *nl + *nr + 1; m = n + *sqre; nlp1 = *nl + 1; nlp2 = *nl + 2; if (*k < 1 || *k > n) { *info = -4; } else if (*ldq < *k) { *info = -7; } else if (*ldu < n) { *info = -10; } else if (*ldu2 < n) { *info = -12; } else if (*ldvt < m) { *info = -14; } else if (*ldvt2 < m) { *info = -16; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASD3", &i__1); return 0; } /* Quick return if possible */ if (*k == 1) { d__[1] = abs(z__[1]); _starpu_dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); if (z__[1] > 0.) { _starpu_dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); } else { i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { u[i__ + u_dim1] = -u2[i__ + u2_dim1]; /* L10: */ } } return 0; } /* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DSIGMA(I) if it is 1; this makes the subsequent */ /* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DSIGMA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DSIGMA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DSIGMA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dsigma[i__] = _starpu_dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; /* L20: */ } /* Keep a copy of Z. */ _starpu_dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); /* Normalize Z. */ rho = _starpu_dnrm2_(k, &z__[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info); rho *= rho; /* Find the new singular values. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], &vt[j * vt_dim1 + 1], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { return 0; } /* L30: */ } /* Compute updated Z. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; i__2 = i__ - 1; for (j = 1; j <= i__2; ++j) { z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); /* L40: */ } i__2 = *k - 1; for (j = i__; j <= i__2; ++j) { z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); /* L50: */ } d__2 = sqrt((d__1 = z__[i__], abs(d__1))); z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]); /* L60: */ } /* Compute left singular vectors of the modified diagonal matrix, */ /* and store related information for the right singular vectors. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * vt_dim1 + 1]; u[i__ * u_dim1 + 1] = -1.; i__2 = *k; for (j = 2; j <= i__2; ++j) { vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ * vt_dim1]; u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; /* L70: */ } temp = _starpu_dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1); q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; i__2 = *k; for (j = 2; j <= i__2; ++j) { jc = idxc[j]; q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; /* L80: */ } /* L90: */ } /* Update the left singular vector matrix. */ if (*k == 2) { _starpu_dgemm_("N", "N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset], ldq, &c_b26, &u[u_offset], ldu); goto L100; } if (ctot[1] > 0) { _starpu_dgemm_("N", "N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1], ldu2, &q[q_dim1 + 2], ldq, &c_b26, &u[u_dim1 + 1], ldu); if (ctot[3] > 0) { ktemp = ctot[1] + 2 + ctot[2]; _starpu_dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1] , ldu2, &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1], ldu); } } else if (ctot[3] > 0) { ktemp = ctot[1] + 2 + ctot[2]; _starpu_dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu); } else { _starpu_dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); } _starpu_dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); ktemp = ctot[1] + 2; ctemp = ctot[2] + ctot[3]; _starpu_dgemm_("N", "N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[nlp2 + u_dim1], ldu); /* Generate the right singular vectors. */ L100: i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { temp = _starpu_dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; i__2 = *k; for (j = 2; j <= i__2; ++j) { jc = idxc[j]; q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; /* L110: */ } /* L120: */ } /* Update the right singular vector matrix. */ if (*k == 2) { _starpu_dgemm_("N", "N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset] , ldvt2, &c_b26, &vt[vt_offset], ldvt); return 0; } ktemp = ctot[1] + 1; _starpu_dgemm_("N", "N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[ vt2_dim1 + 1], ldvt2, &c_b26, &vt[vt_dim1 + 1], ldvt); ktemp = ctot[1] + 2 + ctot[2]; if (ktemp <= *ldvt2) { _starpu_dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1], ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1], ldvt); } ktemp = ctot[1] + 1; nrp1 = *nr + *sqre; if (ktemp > 1) { i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; /* L130: */ } i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; /* L140: */ } } ctemp = ctot[2] + 1 + ctot[3]; _starpu_dgemm_("N", "N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, & vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 + 1], ldvt); return 0; /* End of DLASD3 */ } /* _starpu_dlasd3_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasd4.c000066400000000000000000000603711507764646700205560ustar00rootroot00000000000000/* dlasd4.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlasd4_(integer *n, integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal * sigma, doublereal *work, integer *info) { /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal a, b, c__; integer j; doublereal w, dd[3]; integer ii; doublereal dw, zz[3]; integer ip1; doublereal eta, phi, eps, tau, psi; integer iim1, iip1; doublereal dphi, dpsi; integer iter; doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip; integer niter; doublereal dtisq; logical swtch; doublereal dtnsq; extern /* Subroutine */ int _starpu_dlaed6_(integer *, logical *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *) , _starpu_dlasd5_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal delsq2, dtnsq1; logical swtch3; extern doublereal _starpu_dlamch_(char *); logical orgati; doublereal erretm, dtipsq, rhoinv; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine computes the square root of the I-th updated */ /* eigenvalue of a positive symmetric rank-one modification to */ /* a positive diagonal matrix whose entries are given as the squares */ /* of the corresponding entries in the array d, and that */ /* 0 <= D(i) < D(j) for i < j */ /* and that RHO > 0. This is arranged by the calling routine, and is */ /* no loss in generality. The rank-one modified system is thus */ /* diag( D ) * diag( D ) + RHO * Z * Z_transpose. */ /* where we assume the Euclidean norm of Z is 1. */ /* The method consists of approximating the rational functions in the */ /* secular equation by simpler interpolating rational functions. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The length of all arrays. */ /* I (input) INTEGER */ /* The index of the eigenvalue to be computed. 1 <= I <= N. */ /* D (input) DOUBLE PRECISION array, dimension ( N ) */ /* The original eigenvalues. It is assumed that they are in */ /* order, 0 <= D(I) < D(J) for I < J. */ /* Z (input) DOUBLE PRECISION array, dimension ( N ) */ /* The components of the updating vector. */ /* DELTA (output) DOUBLE PRECISION array, dimension ( N ) */ /* If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th */ /* component. If N = 1, then DELTA(1) = 1. The vector DELTA */ /* contains the information necessary to construct the */ /* (singular) eigenvectors. */ /* RHO (input) DOUBLE PRECISION */ /* The scalar in the symmetric updating formula. */ /* SIGMA (output) DOUBLE PRECISION */ /* The computed sigma_I, the I-th updated eigenvalue. */ /* WORK (workspace) DOUBLE PRECISION array, dimension ( N ) */ /* If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th */ /* component. If N = 1, then WORK( 1 ) = 1. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* > 0: if INFO = 1, the updating process failed. */ /* Internal Parameters */ /* =================== */ /* Logical variable ORGATI (origin-at-i?) is used for distinguishing */ /* whether D(i) or D(i+1) is treated as the origin. */ /* ORGATI = .true. origin at i */ /* ORGATI = .false. origin at i+1 */ /* Logical variable SWTCH3 (switch-for-3-poles?) is for noting */ /* if we are working with THREE poles! */ /* MAXIT is the maximum number of iterations allowed for each */ /* eigenvalue. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ren-Cang Li, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Since this routine is called in an inner loop, we do no argument */ /* checking. */ /* Quick return for N=1 and 2. */ /* Parameter adjustments */ --work; --delta; --z__; --d__; /* Function Body */ *info = 0; if (*n == 1) { /* Presumably, I=1 upon entry */ *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]); delta[1] = 1.; work[1] = 1.; return 0; } if (*n == 2) { _starpu_dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]); return 0; } /* Compute machine epsilon */ eps = _starpu_dlamch_("Epsilon"); rhoinv = 1. / *rho; /* The case I = N */ if (*i__ == *n) { /* Initialize some basic variables */ ii = *n - 1; niter = 1; /* Calculate initial guess */ temp = *rho / 2.; /* If ||Z||_2 is not one, then TEMP should be set to */ /* RHO * ||Z||_2^2 / TWO */ temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp)); i__1 = *n; for (j = 1; j <= i__1; ++j) { work[j] = d__[j] + d__[*n] + temp1; delta[j] = d__[j] - d__[*n] - temp1; /* L10: */ } psi = 0.; i__1 = *n - 2; for (j = 1; j <= i__1; ++j) { psi += z__[j] * z__[j] / (delta[j] * work[j]); /* L20: */ } c__ = rhoinv + psi; w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[* n] / (delta[*n] * work[*n]); if (w <= 0.) { temp1 = sqrt(d__[*n] * d__[*n] + *rho); temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[* n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * z__[*n] / *rho; /* The following TAU is to approximate */ /* SIGMA_n^2 - D( N )*D( N ) */ if (c__ <= temp) { tau = *rho; } else { delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[* n]; b = z__[*n] * z__[*n] * delsq; if (a < 0.) { tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); } else { tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); } } /* It can be proved that */ /* D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO */ } else { delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; b = z__[*n] * z__[*n] * delsq; /* The following TAU is to approximate */ /* SIGMA_n^2 - D( N )*D( N ) */ if (a < 0.) { tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); } else { tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); } /* It can be proved that */ /* D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 */ } /* The following ETA is to approximate SIGMA_n - D( N ) */ eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau)); *sigma = d__[*n] + eta; i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] = d__[j] - d__[*i__] - eta; work[j] = d__[j] + d__[*i__] + eta; /* L30: */ } /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = ii; for (j = 1; j <= i__1; ++j) { temp = z__[j] / (delta[j] * work[j]); psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L40: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ temp = z__[*n] / (delta[*n] * work[*n]); phi = z__[*n] * temp; dphi = temp * temp; erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi); w = rhoinv + phi + psi; /* Test for convergence */ if (abs(w) <= eps * erretm) { goto L240; } /* Calculate the new step */ ++niter; dtnsq1 = work[*n - 1] * delta[*n - 1]; dtnsq = work[*n] * delta[*n]; c__ = w - dtnsq1 * dpsi - dtnsq * dphi; a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi); b = dtnsq * dtnsq1 * w; if (c__ < 0.) { c__ = abs(c__); } if (c__ == 0.) { eta = *rho - *sigma * *sigma; } else if (a >= 0.) { eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.); } else { eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) ); } /* Note, eta should be positive if w is negative, and */ /* eta should be negative otherwise. However, */ /* if for some reason caused by roundoff, eta*w > 0, */ /* we simply use one Newton step instead. This way */ /* will guarantee eta*w < 0. */ if (w * eta > 0.) { eta = -w / (dpsi + dphi); } temp = eta - dtnsq; if (temp > *rho) { eta = *rho + dtnsq; } tau += eta; eta /= *sigma + sqrt(eta + *sigma * *sigma); i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] -= eta; work[j] += eta; /* L50: */ } *sigma += eta; /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = ii; for (j = 1; j <= i__1; ++j) { temp = z__[j] / (work[j] * delta[j]); psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L60: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ temp = z__[*n] / (work[*n] * delta[*n]); phi = z__[*n] * temp; dphi = temp * temp; erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + dphi); w = rhoinv + phi + psi; /* Main loop to update the values of the array DELTA */ iter = niter + 1; for (niter = iter; niter <= 20; ++niter) { /* Test for convergence */ if (abs(w) <= eps * erretm) { goto L240; } /* Calculate the new step */ dtnsq1 = work[*n - 1] * delta[*n - 1]; dtnsq = work[*n] * delta[*n]; c__ = w - dtnsq1 * dpsi - dtnsq * dphi; a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi); b = dtnsq1 * dtnsq * w; if (a >= 0.) { eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( c__ * 2.); } else { eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( d__1)))); } /* Note, eta should be positive if w is negative, and */ /* eta should be negative otherwise. However, */ /* if for some reason caused by roundoff, eta*w > 0, */ /* we simply use one Newton step instead. This way */ /* will guarantee eta*w < 0. */ if (w * eta > 0.) { eta = -w / (dpsi + dphi); } temp = eta - dtnsq; if (temp <= 0.) { eta /= 2.; } tau += eta; eta /= *sigma + sqrt(eta + *sigma * *sigma); i__1 = *n; for (j = 1; j <= i__1; ++j) { delta[j] -= eta; work[j] += eta; /* L70: */ } *sigma += eta; /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = ii; for (j = 1; j <= i__1; ++j) { temp = z__[j] / (work[j] * delta[j]); psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L80: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ temp = z__[*n] / (work[*n] * delta[*n]); phi = z__[*n] * temp; dphi = temp * temp; erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( dpsi + dphi); w = rhoinv + phi + psi; /* L90: */ } /* Return with INFO = 1, NITER = MAXIT and not converged */ *info = 1; goto L240; /* End for the case I = N */ } else { /* The case for I < N */ niter = 1; ip1 = *i__ + 1; /* Calculate initial guess */ delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]); delsq2 = delsq / 2.; temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2)); i__1 = *n; for (j = 1; j <= i__1; ++j) { work[j] = d__[j] + d__[*i__] + temp; delta[j] = d__[j] - d__[*i__] - temp; /* L100: */ } psi = 0.; i__1 = *i__ - 1; for (j = 1; j <= i__1; ++j) { psi += z__[j] * z__[j] / (work[j] * delta[j]); /* L110: */ } phi = 0.; i__1 = *i__ + 2; for (j = *n; j >= i__1; --j) { phi += z__[j] * z__[j] / (work[j] * delta[j]); /* L120: */ } c__ = rhoinv + psi + phi; w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[ ip1] * z__[ip1] / (work[ip1] * delta[ip1]); if (w > 0.) { /* d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 */ /* We choose d(i) as origin. */ orgati = TRUE_; sg2lb = 0.; sg2ub = delsq2; a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; b = z__[*i__] * z__[*i__] * delsq; if (a > 0.) { tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( d__1)))); } else { tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( c__ * 2.); } /* TAU now is an estimation of SIGMA^2 - D( I )^2. The */ /* following, however, is the corresponding estimation of */ /* SIGMA - D( I ). */ eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau)); } else { /* (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 */ /* We choose d(i+1) as origin. */ orgati = FALSE_; sg2lb = -delsq2; sg2ub = 0.; a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; b = z__[ip1] * z__[ip1] * delsq; if (a < 0.) { tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( d__1)))); } else { tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / (c__ * 2.); } /* TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The */ /* following, however, is the corresponding estimation of */ /* SIGMA - D( IP1 ). */ eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau, abs(d__1)))); } if (orgati) { ii = *i__; *sigma = d__[*i__] + eta; i__1 = *n; for (j = 1; j <= i__1; ++j) { work[j] = d__[j] + d__[*i__] + eta; delta[j] = d__[j] - d__[*i__] - eta; /* L130: */ } } else { ii = *i__ + 1; *sigma = d__[ip1] + eta; i__1 = *n; for (j = 1; j <= i__1; ++j) { work[j] = d__[j] + d__[ip1] + eta; delta[j] = d__[j] - d__[ip1] - eta; /* L140: */ } } iim1 = ii - 1; iip1 = ii + 1; /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = iim1; for (j = 1; j <= i__1; ++j) { temp = z__[j] / (work[j] * delta[j]); psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L150: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ dphi = 0.; phi = 0.; i__1 = iip1; for (j = *n; j >= i__1; --j) { temp = z__[j] / (work[j] * delta[j]); phi += z__[j] * temp; dphi += temp * temp; erretm += phi; /* L160: */ } w = rhoinv + phi + psi; /* W is the value of the secular function with */ /* its ii-th element removed. */ swtch3 = FALSE_; if (orgati) { if (w < 0.) { swtch3 = TRUE_; } } else { if (w > 0.) { swtch3 = TRUE_; } } if (ii == 1 || ii == *n) { swtch3 = FALSE_; } temp = z__[ii] / (work[ii] * delta[ii]); dw = dpsi + dphi + temp * temp; temp = z__[ii] * temp; w += temp; erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw; /* Test for convergence */ if (abs(w) <= eps * erretm) { goto L240; } if (w <= 0.) { sg2lb = max(sg2lb,tau); } else { sg2ub = min(sg2ub,tau); } /* Calculate the new step */ ++niter; if (! swtch3) { dtipsq = work[ip1] * delta[ip1]; dtisq = work[*i__] * delta[*i__]; if (orgati) { /* Computing 2nd power */ d__1 = z__[*i__] / dtisq; c__ = w - dtipsq * dw + delsq * (d__1 * d__1); } else { /* Computing 2nd power */ d__1 = z__[ip1] / dtipsq; c__ = w - dtisq * dw - delsq * (d__1 * d__1); } a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; b = dtipsq * dtisq * w; if (c__ == 0.) { if (a == 0.) { if (orgati) { a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + dphi); } else { a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + dphi); } } eta = b / a; } else if (a <= 0.) { eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( c__ * 2.); } else { eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( d__1)))); } } else { /* Interpolation using THREE most relevant poles */ dtiim = work[iim1] * delta[iim1]; dtiip = work[iip1] * delta[iip1]; temp = rhoinv + psi + phi; if (orgati) { temp1 = z__[iim1] / dtiim; temp1 *= temp1; c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[iip1]) * temp1; zz[0] = z__[iim1] * z__[iim1]; if (dpsi < temp1) { zz[2] = dtiip * dtiip * dphi; } else { zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); } } else { temp1 = z__[iip1] / dtiip; temp1 *= temp1; c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[iip1]) * temp1; if (dphi < temp1) { zz[0] = dtiim * dtiim * dpsi; } else { zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); } zz[2] = z__[iip1] * z__[iip1]; } zz[1] = z__[ii] * z__[ii]; dd[0] = dtiim; dd[1] = delta[ii] * work[ii]; dd[2] = dtiip; _starpu_dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); if (*info != 0) { goto L240; } } /* Note, eta should be positive if w is negative, and */ /* eta should be negative otherwise. However, */ /* if for some reason caused by roundoff, eta*w > 0, */ /* we simply use one Newton step instead. This way */ /* will guarantee eta*w < 0. */ if (w * eta >= 0.) { eta = -w / dw; } if (orgati) { temp1 = work[*i__] * delta[*i__]; temp = eta - temp1; } else { temp1 = work[ip1] * delta[ip1]; temp = eta - temp1; } if (temp > sg2ub || temp < sg2lb) { if (w < 0.) { eta = (sg2ub - tau) / 2.; } else { eta = (sg2lb - tau) / 2.; } } tau += eta; eta /= *sigma + sqrt(*sigma * *sigma + eta); prew = w; *sigma += eta; i__1 = *n; for (j = 1; j <= i__1; ++j) { work[j] += eta; delta[j] -= eta; /* L170: */ } /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = iim1; for (j = 1; j <= i__1; ++j) { temp = z__[j] / (work[j] * delta[j]); psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L180: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ dphi = 0.; phi = 0.; i__1 = iip1; for (j = *n; j >= i__1; --j) { temp = z__[j] / (work[j] * delta[j]); phi += z__[j] * temp; dphi += temp * temp; erretm += phi; /* L190: */ } temp = z__[ii] / (work[ii] * delta[ii]); dw = dpsi + dphi + temp * temp; temp = z__[ii] * temp; w = rhoinv + phi + psi + temp; erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw; if (w <= 0.) { sg2lb = max(sg2lb,tau); } else { sg2ub = min(sg2ub,tau); } swtch = FALSE_; if (orgati) { if (-w > abs(prew) / 10.) { swtch = TRUE_; } } else { if (w > abs(prew) / 10.) { swtch = TRUE_; } } /* Main loop to update the values of the array DELTA and WORK */ iter = niter + 1; for (niter = iter; niter <= 20; ++niter) { /* Test for convergence */ if (abs(w) <= eps * erretm) { goto L240; } /* Calculate the new step */ if (! swtch3) { dtipsq = work[ip1] * delta[ip1]; dtisq = work[*i__] * delta[*i__]; if (! swtch) { if (orgati) { /* Computing 2nd power */ d__1 = z__[*i__] / dtisq; c__ = w - dtipsq * dw + delsq * (d__1 * d__1); } else { /* Computing 2nd power */ d__1 = z__[ip1] / dtipsq; c__ = w - dtisq * dw - delsq * (d__1 * d__1); } } else { temp = z__[ii] / (work[ii] * delta[ii]); if (orgati) { dpsi += temp * temp; } else { dphi += temp * temp; } c__ = w - dtisq * dpsi - dtipsq * dphi; } a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; b = dtipsq * dtisq * w; if (c__ == 0.) { if (a == 0.) { if (! swtch) { if (orgati) { a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + dphi); } else { a = z__[ip1] * z__[ip1] + dtisq * dtisq * ( dpsi + dphi); } } else { a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi; } } eta = b / a; } else if (a <= 0.) { eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ * 2.); } else { eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))); } } else { /* Interpolation using THREE most relevant poles */ dtiim = work[iim1] * delta[iim1]; dtiip = work[iip1] * delta[iip1]; temp = rhoinv + psi + phi; if (swtch) { c__ = temp - dtiim * dpsi - dtiip * dphi; zz[0] = dtiim * dtiim * dpsi; zz[2] = dtiip * dtiip * dphi; } else { if (orgati) { temp1 = z__[iim1] / dtiim; temp1 *= temp1; temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[ iip1]) * temp1; c__ = temp - dtiip * (dpsi + dphi) - temp2; zz[0] = z__[iim1] * z__[iim1]; if (dpsi < temp1) { zz[2] = dtiip * dtiip * dphi; } else { zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); } } else { temp1 = z__[iip1] / dtiip; temp1 *= temp1; temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[ iip1]) * temp1; c__ = temp - dtiim * (dpsi + dphi) - temp2; if (dphi < temp1) { zz[0] = dtiim * dtiim * dpsi; } else { zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); } zz[2] = z__[iip1] * z__[iip1]; } } dd[0] = dtiim; dd[1] = delta[ii] * work[ii]; dd[2] = dtiip; _starpu_dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); if (*info != 0) { goto L240; } } /* Note, eta should be positive if w is negative, and */ /* eta should be negative otherwise. However, */ /* if for some reason caused by roundoff, eta*w > 0, */ /* we simply use one Newton step instead. This way */ /* will guarantee eta*w < 0. */ if (w * eta >= 0.) { eta = -w / dw; } if (orgati) { temp1 = work[*i__] * delta[*i__]; temp = eta - temp1; } else { temp1 = work[ip1] * delta[ip1]; temp = eta - temp1; } if (temp > sg2ub || temp < sg2lb) { if (w < 0.) { eta = (sg2ub - tau) / 2.; } else { eta = (sg2lb - tau) / 2.; } } tau += eta; eta /= *sigma + sqrt(*sigma * *sigma + eta); *sigma += eta; i__1 = *n; for (j = 1; j <= i__1; ++j) { work[j] += eta; delta[j] -= eta; /* L200: */ } prew = w; /* Evaluate PSI and the derivative DPSI */ dpsi = 0.; psi = 0.; erretm = 0.; i__1 = iim1; for (j = 1; j <= i__1; ++j) { temp = z__[j] / (work[j] * delta[j]); psi += z__[j] * temp; dpsi += temp * temp; erretm += psi; /* L210: */ } erretm = abs(erretm); /* Evaluate PHI and the derivative DPHI */ dphi = 0.; phi = 0.; i__1 = iip1; for (j = *n; j >= i__1; --j) { temp = z__[j] / (work[j] * delta[j]); phi += z__[j] * temp; dphi += temp * temp; erretm += phi; /* L220: */ } temp = z__[ii] / (work[ii] * delta[ii]); dw = dpsi + dphi + temp * temp; temp = z__[ii] * temp; w = rhoinv + phi + psi + temp; erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + abs(tau) * dw; if (w * prew > 0. && abs(w) > abs(prew) / 10.) { swtch = ! swtch; } if (w <= 0.) { sg2lb = max(sg2lb,tau); } else { sg2ub = min(sg2ub,tau); } /* L230: */ } /* Return with INFO = 1, NITER = MAXIT and not converged */ *info = 1; } L240: return 0; /* End of DLASD4 */ } /* _starpu_dlasd4_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasd5.c000066400000000000000000000130251507764646700205510ustar00rootroot00000000000000/* dlasd5.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlasd5_(integer *i__, doublereal *d__, doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * work) { /* System generated locals */ doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal b, c__, w, del, tau, delsq; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine computes the square root of the I-th eigenvalue */ /* of a positive symmetric rank-one modification of a 2-by-2 diagonal */ /* matrix */ /* diag( D ) * diag( D ) + RHO * Z * transpose(Z) . */ /* The diagonal entries in the array D are assumed to satisfy */ /* 0 <= D(i) < D(j) for i < j . */ /* We also assume RHO > 0 and that the Euclidean norm of the vector */ /* Z is one. */ /* Arguments */ /* ========= */ /* I (input) INTEGER */ /* The index of the eigenvalue to be computed. I = 1 or I = 2. */ /* D (input) DOUBLE PRECISION array, dimension ( 2 ) */ /* The original eigenvalues. We assume 0 <= D(1) < D(2). */ /* Z (input) DOUBLE PRECISION array, dimension ( 2 ) */ /* The components of the updating vector. */ /* DELTA (output) DOUBLE PRECISION array, dimension ( 2 ) */ /* Contains (D(j) - sigma_I) in its j-th component. */ /* The vector DELTA contains the information necessary */ /* to construct the eigenvectors. */ /* RHO (input) DOUBLE PRECISION */ /* The scalar in the symmetric updating formula. */ /* DSIGMA (output) DOUBLE PRECISION */ /* The computed sigma_I, the I-th updated eigenvalue. */ /* WORK (workspace) DOUBLE PRECISION array, dimension ( 2 ) */ /* WORK contains (D(j) + sigma_I) in its j-th component. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ren-Cang Li, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --work; --delta; --z__; --d__; /* Function Body */ del = d__[2] - d__[1]; delsq = del * (d__[2] + d__[1]); if (*i__ == 1) { w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] * z__[1] / (d__[1] * 3. + d__[2])) / del + 1.; if (w > 0.) { b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); c__ = *rho * z__[1] * z__[1] * delsq; /* B > ZERO, always */ /* The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) */ tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); /* The following TAU is DSIGMA - D( 1 ) */ tau /= d__[1] + sqrt(d__[1] * d__[1] + tau); *dsigma = d__[1] + tau; delta[1] = -tau; delta[2] = del - tau; work[1] = d__[1] * 2. + tau; work[2] = d__[1] + tau + d__[2]; /* DELTA( 1 ) = -Z( 1 ) / TAU */ /* DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) */ } else { b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); c__ = *rho * z__[2] * z__[2] * delsq; /* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ if (b > 0.) { tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); } else { tau = (b - sqrt(b * b + c__ * 4.)) / 2.; } /* The following TAU is DSIGMA - D( 2 ) */ tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1))); *dsigma = d__[2] + tau; delta[1] = -(del + tau); delta[2] = -tau; work[1] = d__[1] + tau + d__[2]; work[2] = d__[2] * 2. + tau; /* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) */ /* DELTA( 2 ) = -Z( 2 ) / TAU */ } /* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) */ /* DELTA( 1 ) = DELTA( 1 ) / TEMP */ /* DELTA( 2 ) = DELTA( 2 ) / TEMP */ } else { /* Now I=2 */ b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); c__ = *rho * z__[2] * z__[2] * delsq; /* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ if (b > 0.) { tau = (b + sqrt(b * b + c__ * 4.)) / 2.; } else { tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); } /* The following TAU is DSIGMA - D( 2 ) */ tau /= d__[2] + sqrt(d__[2] * d__[2] + tau); *dsigma = d__[2] + tau; delta[1] = -(del + tau); delta[2] = -tau; work[1] = d__[1] + tau + d__[2]; work[2] = d__[2] * 2. + tau; /* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) */ /* DELTA( 2 ) = -Z( 2 ) / TAU */ /* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) */ /* DELTA( 1 ) = DELTA( 1 ) / TEMP */ /* DELTA( 2 ) = DELTA( 2 ) / TEMP */ } return 0; /* End of DLASD5 */ } /* _starpu_dlasd5_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasd6.c000066400000000000000000000327051507764646700205600ustar00rootroot00000000000000/* dlasd6.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__0 = 0; static doublereal c_b7 = 1.; static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dlasd6_(integer *icompq, integer *nl, integer *nr, integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, i__1; doublereal d__1, d__2; /* Local variables */ integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlasd7_(integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlasd8_( integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *); integer isigma; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal orgnrm; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASD6 computes the SVD of an updated upper bidiagonal matrix B */ /* obtained by merging two smaller ones by appending a row. This */ /* routine is used only for the problem which requires all singular */ /* values and optionally singular vector matrices in factored form. */ /* B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. */ /* A related subroutine, DLASD1, handles the case in which all singular */ /* values and singular vectors of the bidiagonal matrix are desired. */ /* DLASD6 computes the SVD as follows: */ /* ( D1(in) 0 0 0 ) */ /* B = U(in) * ( Z1' a Z2' b ) * VT(in) */ /* ( 0 0 D2(in) 0 ) */ /* = U(out) * ( D(out) 0) * VT(out) */ /* where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M */ /* with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros */ /* elsewhere; and the entry b is empty if SQRE = 0. */ /* The singular values of B can be computed using D1, D2, the first */ /* components of all the right singular vectors of the lower block, and */ /* the last components of all the right singular vectors of the upper */ /* block. These components are stored and updated in VF and VL, */ /* respectively, in DLASD6. Hence U and VT are not explicitly */ /* referenced. */ /* The singular values are stored in D. The algorithm consists of two */ /* stages: */ /* The first stage consists of deflating the size of the problem */ /* when there are multiple singular values or if there is a zero */ /* in the Z vector. For each such occurence the dimension of the */ /* secular equation problem is reduced by one. This stage is */ /* performed by the routine DLASD7. */ /* The second stage consists of calculating the updated */ /* singular values. This is done by finding the roots of the */ /* secular equation via the routine DLASD4 (as called by DLASD8). */ /* This routine also updates VF and VL and computes the distances */ /* between the updated singular values and the old singular */ /* values. */ /* DLASD6 is called from DLASDA. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether singular vectors are to be computed in */ /* factored form: */ /* = 0: Compute singular values only. */ /* = 1: Compute singular vectors in factored form as well. */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has row dimension N = NL + NR + 1, */ /* and column dimension M = N + SQRE. */ /* D (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ). */ /* On entry D(1:NL,1:NL) contains the singular values of the */ /* upper block, and D(NL+2:N) contains the singular values */ /* of the lower block. On exit D(1:N) contains the singular */ /* values of the modified matrix. */ /* VF (input/output) DOUBLE PRECISION array, dimension ( M ) */ /* On entry, VF(1:NL+1) contains the first components of all */ /* right singular vectors of the upper block; and VF(NL+2:M) */ /* contains the first components of all right singular vectors */ /* of the lower block. On exit, VF contains the first components */ /* of all right singular vectors of the bidiagonal matrix. */ /* VL (input/output) DOUBLE PRECISION array, dimension ( M ) */ /* On entry, VL(1:NL+1) contains the last components of all */ /* right singular vectors of the upper block; and VL(NL+2:M) */ /* contains the last components of all right singular vectors of */ /* the lower block. On exit, VL contains the last components of */ /* all right singular vectors of the bidiagonal matrix. */ /* ALPHA (input/output) DOUBLE PRECISION */ /* Contains the diagonal element associated with the added row. */ /* BETA (input/output) DOUBLE PRECISION */ /* Contains the off-diagonal element associated with the added */ /* row. */ /* IDXQ (output) INTEGER array, dimension ( N ) */ /* This contains the permutation which will reintegrate the */ /* subproblem just solved back into sorted order, i.e. */ /* D( IDXQ( I = 1, N ) ) will be in ascending order. */ /* PERM (output) INTEGER array, dimension ( N ) */ /* The permutations (from deflation and sorting) to be applied */ /* to each block. Not referenced if ICOMPQ = 0. */ /* GIVPTR (output) INTEGER */ /* The number of Givens rotations which took place in this */ /* subproblem. Not referenced if ICOMPQ = 0. */ /* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) */ /* Each pair of numbers indicates a pair of columns to take place */ /* in a Givens rotation. Not referenced if ICOMPQ = 0. */ /* LDGCOL (input) INTEGER */ /* leading dimension of GIVCOL, must be at least N. */ /* GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ /* Each number indicates the C or S value to be used in the */ /* corresponding Givens rotation. Not referenced if ICOMPQ = 0. */ /* LDGNUM (input) INTEGER */ /* The leading dimension of GIVNUM and POLES, must be at least N. */ /* POLES (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ /* On exit, POLES(1,*) is an array containing the new singular */ /* values obtained from solving the secular equation, and */ /* POLES(2,*) is an array containing the poles in the secular */ /* equation. Not referenced if ICOMPQ = 0. */ /* DIFL (output) DOUBLE PRECISION array, dimension ( N ) */ /* On exit, DIFL(I) is the distance between I-th updated */ /* (undeflated) singular value and the I-th (undeflated) old */ /* singular value. */ /* DIFR (output) DOUBLE PRECISION array, */ /* dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and */ /* dimension ( N ) if ICOMPQ = 0. */ /* On exit, DIFR(I, 1) is the distance between I-th updated */ /* (undeflated) singular value and the I+1-th (undeflated) old */ /* singular value. */ /* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */ /* normalizing factors for the right singular vector matrix. */ /* See DLASD8 for details on DIFL and DIFR. */ /* Z (output) DOUBLE PRECISION array, dimension ( M ) */ /* The first elements of this array contain the components */ /* of the deflation-adjusted updating row vector. */ /* K (output) INTEGER */ /* Contains the dimension of the non-deflated matrix, */ /* This is the order of the related secular equation. 1 <= K <=N. */ /* C (output) DOUBLE PRECISION */ /* C contains garbage if SQRE =0 and the C-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* S (output) DOUBLE PRECISION */ /* S contains garbage if SQRE =0 and the S-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* WORK (workspace) DOUBLE PRECISION array, dimension ( 4 * M ) */ /* IWORK (workspace) INTEGER array, dimension ( 3 * N ) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --vf; --vl; --idxq; --perm; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1; givcol -= givcol_offset; poles_dim1 = *ldgnum; poles_offset = 1 + poles_dim1; poles -= poles_offset; givnum_dim1 = *ldgnum; givnum_offset = 1 + givnum_dim1; givnum -= givnum_offset; --difl; --difr; --z__; --work; --iwork; /* Function Body */ *info = 0; n = *nl + *nr + 1; m = n + *sqre; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*nl < 1) { *info = -2; } else if (*nr < 1) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } else if (*ldgcol < n) { *info = -14; } else if (*ldgnum < n) { *info = -16; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASD6", &i__1); return 0; } /* The following values are for bookkeeping purposes only. They are */ /* integer pointers which indicate the portion of the workspace */ /* used by a particular array in DLASD7 and DLASD8. */ isigma = 1; iw = isigma + n; ivfw = iw + m; ivlw = ivfw + m; idx = 1; idxc = idx + n; idxp = idxc + n; /* Scale. */ /* Computing MAX */ d__1 = abs(*alpha), d__2 = abs(*beta); orgnrm = max(d__1,d__2); d__[*nl + 1] = 0.; i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { orgnrm = (d__1 = d__[i__], abs(d__1)); } /* L10: */ } _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info); *alpha /= orgnrm; *beta /= orgnrm; /* Sort and Deflate singular values. */ _starpu_dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], & work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], & iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[ givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, info); /* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */ _starpu_dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], ldgnum, &work[isigma], &work[iw], info); /* Save the poles if ICOMPQ = 1. */ if (*icompq == 1) { _starpu_dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1); _starpu_dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1); } /* Unscale. */ _starpu_dlascl_("G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info); /* Prepare the IDXQ sorting permutation. */ n1 = *k; n2 = n - *k; _starpu_dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); return 0; /* End of DLASD6 */ } /* _starpu_dlasd6_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasd7.c000066400000000000000000000362241507764646700205610ustar00rootroot00000000000000/* dlasd7.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dlasd7_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *z__, doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *c__, doublereal *s, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1; doublereal d__1, d__2; /* Local variables */ integer i__, j, m, n, k2; doublereal z1; integer jp; doublereal eps, tau, tol; integer nlp1, nlp2, idxi, idxj; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer idxjp; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer jprev; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); doublereal hlftol; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASD7 merges the two sets of singular values together into a single */ /* sorted set. Then it tries to deflate the size of the problem. There */ /* are two ways in which deflation can occur: when two or more singular */ /* values are close together or if there is a tiny entry in the Z */ /* vector. For each such occurrence the order of the related */ /* secular equation problem is reduced by one. */ /* DLASD7 is called from DLASD6. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether singular vectors are to be computed */ /* in compact form, as follows: */ /* = 0: Compute singular values only. */ /* = 1: Compute singular vectors of upper */ /* bidiagonal matrix in compact form. */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has */ /* N = NL + NR + 1 rows and */ /* M = N + SQRE >= N columns. */ /* K (output) INTEGER */ /* Contains the dimension of the non-deflated matrix, this is */ /* the order of the related secular equation. 1 <= K <=N. */ /* D (input/output) DOUBLE PRECISION array, dimension ( N ) */ /* On entry D contains the singular values of the two submatrices */ /* to be combined. On exit D contains the trailing (N-K) updated */ /* singular values (those which were deflated) sorted into */ /* increasing order. */ /* Z (output) DOUBLE PRECISION array, dimension ( M ) */ /* On exit Z contains the updating row vector in the secular */ /* equation. */ /* ZW (workspace) DOUBLE PRECISION array, dimension ( M ) */ /* Workspace for Z. */ /* VF (input/output) DOUBLE PRECISION array, dimension ( M ) */ /* On entry, VF(1:NL+1) contains the first components of all */ /* right singular vectors of the upper block; and VF(NL+2:M) */ /* contains the first components of all right singular vectors */ /* of the lower block. On exit, VF contains the first components */ /* of all right singular vectors of the bidiagonal matrix. */ /* VFW (workspace) DOUBLE PRECISION array, dimension ( M ) */ /* Workspace for VF. */ /* VL (input/output) DOUBLE PRECISION array, dimension ( M ) */ /* On entry, VL(1:NL+1) contains the last components of all */ /* right singular vectors of the upper block; and VL(NL+2:M) */ /* contains the last components of all right singular vectors */ /* of the lower block. On exit, VL contains the last components */ /* of all right singular vectors of the bidiagonal matrix. */ /* VLW (workspace) DOUBLE PRECISION array, dimension ( M ) */ /* Workspace for VL. */ /* ALPHA (input) DOUBLE PRECISION */ /* Contains the diagonal element associated with the added row. */ /* BETA (input) DOUBLE PRECISION */ /* Contains the off-diagonal element associated with the added */ /* row. */ /* DSIGMA (output) DOUBLE PRECISION array, dimension ( N ) */ /* Contains a copy of the diagonal elements (K-1 singular values */ /* and one zero) in the secular equation. */ /* IDX (workspace) INTEGER array, dimension ( N ) */ /* This will contain the permutation used to sort the contents of */ /* D into ascending order. */ /* IDXP (workspace) INTEGER array, dimension ( N ) */ /* This will contain the permutation used to place deflated */ /* values of D at the end of the array. On output IDXP(2:K) */ /* points to the nondeflated D-values and IDXP(K+1:N) */ /* points to the deflated singular values. */ /* IDXQ (input) INTEGER array, dimension ( N ) */ /* This contains the permutation which separately sorts the two */ /* sub-problems in D into ascending order. Note that entries in */ /* the first half of this permutation must first be moved one */ /* position backward; and entries in the second half */ /* must first have NL+1 added to their values. */ /* PERM (output) INTEGER array, dimension ( N ) */ /* The permutations (from deflation and sorting) to be applied */ /* to each singular block. Not referenced if ICOMPQ = 0. */ /* GIVPTR (output) INTEGER */ /* The number of Givens rotations which took place in this */ /* subproblem. Not referenced if ICOMPQ = 0. */ /* GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) */ /* Each pair of numbers indicates a pair of columns to take place */ /* in a Givens rotation. Not referenced if ICOMPQ = 0. */ /* LDGCOL (input) INTEGER */ /* The leading dimension of GIVCOL, must be at least N. */ /* GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ /* Each number indicates the C or S value to be used in the */ /* corresponding Givens rotation. Not referenced if ICOMPQ = 0. */ /* LDGNUM (input) INTEGER */ /* The leading dimension of GIVNUM, must be at least N. */ /* C (output) DOUBLE PRECISION */ /* C contains garbage if SQRE =0 and the C-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* S (output) DOUBLE PRECISION */ /* S contains garbage if SQRE =0 and the S-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --z__; --zw; --vf; --vfw; --vl; --vlw; --dsigma; --idx; --idxp; --idxq; --perm; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1; givcol -= givcol_offset; givnum_dim1 = *ldgnum; givnum_offset = 1 + givnum_dim1; givnum -= givnum_offset; /* Function Body */ *info = 0; n = *nl + *nr + 1; m = n + *sqre; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*nl < 1) { *info = -2; } else if (*nr < 1) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } else if (*ldgcol < n) { *info = -22; } else if (*ldgnum < n) { *info = -24; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASD7", &i__1); return 0; } nlp1 = *nl + 1; nlp2 = *nl + 2; if (*icompq == 1) { *givptr = 0; } /* Generate the first part of the vector Z and move the singular */ /* values in the first part of D one position backward. */ z1 = *alpha * vl[nlp1]; vl[nlp1] = 0.; tau = vf[nlp1]; for (i__ = *nl; i__ >= 1; --i__) { z__[i__ + 1] = *alpha * vl[i__]; vl[i__] = 0.; vf[i__ + 1] = vf[i__]; d__[i__ + 1] = d__[i__]; idxq[i__ + 1] = idxq[i__] + 1; /* L10: */ } vf[1] = tau; /* Generate the second part of the vector Z. */ i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { z__[i__] = *beta * vf[i__]; vf[i__] = 0.; /* L20: */ } /* Sort the singular values into increasing order */ i__1 = n; for (i__ = nlp2; i__ <= i__1; ++i__) { idxq[i__] += nlp1; /* L30: */ } /* DSIGMA, IDXC, IDXC, and ZW are used as storage space. */ i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { dsigma[i__] = d__[idxq[i__]]; zw[i__] = z__[idxq[i__]]; vfw[i__] = vf[idxq[i__]]; vlw[i__] = vl[idxq[i__]]; /* L40: */ } _starpu_dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { idxi = idx[i__] + 1; d__[i__] = dsigma[idxi]; z__[i__] = zw[idxi]; vf[i__] = vfw[idxi]; vl[i__] = vlw[idxi]; /* L50: */ } /* Calculate the allowable deflation tolerence */ eps = _starpu_dlamch_("Epsilon"); /* Computing MAX */ d__1 = abs(*alpha), d__2 = abs(*beta); tol = max(d__1,d__2); /* Computing MAX */ d__2 = (d__1 = d__[n], abs(d__1)); tol = eps * 64. * max(d__2,tol); /* There are 2 kinds of deflation -- first a value in the z-vector */ /* is small, second two (or more) singular values are very close */ /* together (their difference is small). */ /* If the value in the z-vector is small, we simply permute the */ /* array so that the corresponding singular value is moved to the */ /* end. */ /* If two values in the D-vector are close, we perform a two-sided */ /* rotation designed to make one of the corresponding z-vector */ /* entries zero, and then permute the array so that the deflated */ /* singular value is moved to the end. */ /* If there are multiple singular values then the problem deflates. */ /* Here the number of equal singular values are found. As each equal */ /* singular value is found, an elementary reflector is computed to */ /* rotate the corresponding singular subspace so that the */ /* corresponding components of Z are zero in this new basis. */ *k = 1; k2 = n + 1; i__1 = n; for (j = 2; j <= i__1; ++j) { if ((d__1 = z__[j], abs(d__1)) <= tol) { /* Deflate due to small z component. */ --k2; idxp[k2] = j; if (j == n) { goto L100; } } else { jprev = j; goto L70; } /* L60: */ } L70: j = jprev; L80: ++j; if (j > n) { goto L90; } if ((d__1 = z__[j], abs(d__1)) <= tol) { /* Deflate due to small z component. */ --k2; idxp[k2] = j; } else { /* Check if singular values are close enough to allow deflation. */ if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { /* Deflation is possible. */ *s = z__[jprev]; *c__ = z__[j]; /* Find sqrt(a**2+b**2) without overflow or */ /* destructive underflow. */ tau = _starpu_dlapy2_(c__, s); z__[j] = tau; z__[jprev] = 0.; *c__ /= tau; *s = -(*s) / tau; /* Record the appropriate Givens rotation */ if (*icompq == 1) { ++(*givptr); idxjp = idxq[idx[jprev] + 1]; idxj = idxq[idx[j] + 1]; if (idxjp <= nlp1) { --idxjp; } if (idxj <= nlp1) { --idxj; } givcol[*givptr + (givcol_dim1 << 1)] = idxjp; givcol[*givptr + givcol_dim1] = idxj; givnum[*givptr + (givnum_dim1 << 1)] = *c__; givnum[*givptr + givnum_dim1] = *s; } _starpu_drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s); _starpu_drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s); --k2; idxp[k2] = jprev; jprev = j; } else { ++(*k); zw[*k] = z__[jprev]; dsigma[*k] = d__[jprev]; idxp[*k] = jprev; jprev = j; } } goto L80; L90: /* Record the last singular value. */ ++(*k); zw[*k] = z__[jprev]; dsigma[*k] = d__[jprev]; idxp[*k] = jprev; L100: /* Sort the singular values into DSIGMA. The singular values which */ /* were not deflated go into the first K slots of DSIGMA, except */ /* that DSIGMA(1) is treated separately. */ i__1 = n; for (j = 2; j <= i__1; ++j) { jp = idxp[j]; dsigma[j] = d__[jp]; vfw[j] = vf[jp]; vlw[j] = vl[jp]; /* L110: */ } if (*icompq == 1) { i__1 = n; for (j = 2; j <= i__1; ++j) { jp = idxp[j]; perm[j] = idxq[idx[jp] + 1]; if (perm[j] <= nlp1) { --perm[j]; } /* L120: */ } } /* The deflated singular values go back into the last N - K slots of */ /* D. */ i__1 = n - *k; _starpu_dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); /* Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and */ /* VL(M). */ dsigma[1] = 0.; hlftol = tol / 2.; if (abs(dsigma[2]) <= hlftol) { dsigma[2] = hlftol; } if (m > n) { z__[1] = _starpu_dlapy2_(&z1, &z__[m]); if (z__[1] <= tol) { *c__ = 1.; *s = 0.; z__[1] = tol; } else { *c__ = z1 / z__[1]; *s = -z__[m] / z__[1]; } _starpu_drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s); _starpu_drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s); } else { if (abs(z1) <= tol) { z__[1] = tol; } else { z__[1] = z1; } } /* Restore Z, VF, and VL. */ i__1 = *k - 1; _starpu_dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1); i__1 = n - 1; _starpu_dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1); i__1 = n - 1; _starpu_dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1); return 0; /* End of DLASD7 */ } /* _starpu_dlasd7_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasd8.c000066400000000000000000000246161507764646700205640ustar00rootroot00000000000000/* dlasd8.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static doublereal c_b8 = 1.; /* Subroutine */ int _starpu_dlasd8_(integer *icompq, integer *k, doublereal *d__, doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * work, integer *info) { /* System generated locals */ integer difr_dim1, difr_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ integer i__, j; doublereal dj, rho; integer iwk1, iwk2, iwk3; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal temp; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); integer iwk2i, iwk3i; doublereal diflj, difrj, dsigj; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlasd4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal dsigjp; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* October 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASD8 finds the square roots of the roots of the secular equation, */ /* as defined by the values in DSIGMA and Z. It makes the appropriate */ /* calls to DLASD4, and stores, for each element in D, the distance */ /* to its two nearest poles (elements in DSIGMA). It also updates */ /* the arrays VF and VL, the first and last components of all the */ /* right singular vectors of the original bidiagonal matrix. */ /* DLASD8 is called from DLASD6. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether singular vectors are to be computed in */ /* factored form in the calling routine: */ /* = 0: Compute singular values only. */ /* = 1: Compute singular vectors in factored form as well. */ /* K (input) INTEGER */ /* The number of terms in the rational function to be solved */ /* by DLASD4. K >= 1. */ /* D (output) DOUBLE PRECISION array, dimension ( K ) */ /* On output, D contains the updated singular values. */ /* Z (input/output) DOUBLE PRECISION array, dimension ( K ) */ /* On entry, the first K elements of this array contain the */ /* components of the deflation-adjusted updating row vector. */ /* On exit, Z is updated. */ /* VF (input/output) DOUBLE PRECISION array, dimension ( K ) */ /* On entry, VF contains information passed through DBEDE8. */ /* On exit, VF contains the first K components of the first */ /* components of all right singular vectors of the bidiagonal */ /* matrix. */ /* VL (input/output) DOUBLE PRECISION array, dimension ( K ) */ /* On entry, VL contains information passed through DBEDE8. */ /* On exit, VL contains the first K components of the last */ /* components of all right singular vectors of the bidiagonal */ /* matrix. */ /* DIFL (output) DOUBLE PRECISION array, dimension ( K ) */ /* On exit, DIFL(I) = D(I) - DSIGMA(I). */ /* DIFR (output) DOUBLE PRECISION array, */ /* dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and */ /* dimension ( K ) if ICOMPQ = 0. */ /* On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not */ /* defined and will not be referenced. */ /* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */ /* normalizing factors for the right singular vector matrix. */ /* LDDIFR (input) INTEGER */ /* The leading dimension of DIFR, must be at least K. */ /* DSIGMA (input/output) DOUBLE PRECISION array, dimension ( K ) */ /* On entry, the first K elements of this array contain the old */ /* roots of the deflated updating problem. These are the poles */ /* of the secular equation. */ /* On exit, the elements of DSIGMA may be very slightly altered */ /* in value. */ /* WORK (workspace) DOUBLE PRECISION array, dimension at least 3 * K */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --z__; --vf; --vl; --difl; difr_dim1 = *lddifr; difr_offset = 1 + difr_dim1; difr -= difr_offset; --dsigma; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*k < 1) { *info = -2; } else if (*lddifr < *k) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASD8", &i__1); return 0; } /* Quick return if possible */ if (*k == 1) { d__[1] = abs(z__[1]); difl[1] = d__[1]; if (*icompq == 1) { difl[2] = 1.; difr[(difr_dim1 << 1) + 1] = 1.; } return 0; } /* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DSIGMA(I) if it is 1; this makes the subsequent */ /* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DSIGMA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DSIGMA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dsigma[i__] = _starpu_dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; /* L10: */ } /* Book keeping. */ iwk1 = 1; iwk2 = iwk1 + *k; iwk3 = iwk2 + *k; iwk2i = iwk2 - 1; iwk3i = iwk3 - 1; /* Normalize Z. */ rho = _starpu_dnrm2_(k, &z__[1], &c__1); _starpu_dlascl_("G", &c__0, &c__0, &rho, &c_b8, k, &c__1, &z__[1], k, info); rho *= rho; /* Initialize WORK(IWK3). */ _starpu_dlaset_("A", k, &c__1, &c_b8, &c_b8, &work[iwk3], k); /* Compute the updated singular values, the arrays DIFL, DIFR, */ /* and the updated Z. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { _starpu_dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ iwk2], info); /* If the root finder fails, the computation is terminated. */ if (*info != 0) { return 0; } work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; difl[j] = -work[j]; difr[j + difr_dim1] = -work[j + 1]; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L20: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L30: */ } /* L40: */ } /* Compute updated Z. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1))); z__[i__] = d_sign(&d__2, &z__[i__]); /* L50: */ } /* Update VF and VL. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = d__[j]; dsigj = -dsigma[j]; if (j < *k) { difrj = -difr[j + difr_dim1]; dsigjp = -dsigma[j + 1]; } work[j] = -z__[j] / diflj / (dsigma[j] + dj); i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (_starpu_dlamc3_(&dsigma[i__], &dsigj) - diflj) / ( dsigma[i__] + dj); /* L60: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (_starpu_dlamc3_(&dsigma[i__], &dsigjp) + difrj) / (dsigma[i__] + dj); /* L70: */ } temp = _starpu_dnrm2_(k, &work[1], &c__1); work[iwk2i + j] = _starpu_ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; work[iwk3i + j] = _starpu_ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; if (*icompq == 1) { difr[j + (difr_dim1 << 1)] = temp; } /* L80: */ } _starpu_dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); _starpu_dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); return 0; /* End of DLASD8 */ } /* _starpu_dlasd8_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasda.c000066400000000000000000000412351507764646700206310ustar00rootroot00000000000000/* dlasda.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__0 = 0; static doublereal c_b11 = 0.; static doublereal c_b12 = 1.; static integer c__1 = 1; static integer c__2 = 2; /* Subroutine */ int _starpu_dlasda_(integer *icompq, integer *smlsiz, integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; doublereal beta; integer idxq, nlvl; doublereal alpha; integer inode, ndiml, ndimr, idxqi, itemp; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer sqrei; extern /* Subroutine */ int _starpu_dlasd6_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); integer nwork1, nwork2; extern /* Subroutine */ int _starpu_dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), _starpu_dlaset_( char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); integer smlszp; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Using a divide and conquer approach, DLASDA computes the singular */ /* value decomposition (SVD) of a real upper bidiagonal N-by-M matrix */ /* B with diagonal D and offdiagonal E, where M = N + SQRE. The */ /* algorithm computes the singular values in the SVD B = U * S * VT. */ /* The orthogonal matrices U and VT are optionally computed in */ /* compact form. */ /* A related subroutine, DLASD0, computes the singular values and */ /* the singular vectors in explicit form. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether singular vectors are to be computed */ /* in compact form, as follows */ /* = 0: Compute singular values only. */ /* = 1: Compute singular vectors of upper bidiagonal */ /* matrix in compact form. */ /* SMLSIZ (input) INTEGER */ /* The maximum size of the subproblems at the bottom of the */ /* computation tree. */ /* N (input) INTEGER */ /* The row dimension of the upper bidiagonal matrix. This is */ /* also the dimension of the main diagonal array D. */ /* SQRE (input) INTEGER */ /* Specifies the column dimension of the bidiagonal matrix. */ /* = 0: The bidiagonal matrix has column dimension M = N; */ /* = 1: The bidiagonal matrix has column dimension M = N + 1. */ /* D (input/output) DOUBLE PRECISION array, dimension ( N ) */ /* On entry D contains the main diagonal of the bidiagonal */ /* matrix. On exit D, if INFO = 0, contains its singular values. */ /* E (input) DOUBLE PRECISION array, dimension ( M-1 ) */ /* Contains the subdiagonal entries of the bidiagonal matrix. */ /* On exit, E has been destroyed. */ /* U (output) DOUBLE PRECISION array, */ /* dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced */ /* if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left */ /* singular vector matrices of all subproblems at the bottom */ /* level. */ /* LDU (input) INTEGER, LDU = > N. */ /* The leading dimension of arrays U, VT, DIFL, DIFR, POLES, */ /* GIVNUM, and Z. */ /* VT (output) DOUBLE PRECISION array, */ /* dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced */ /* if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right */ /* singular vector matrices of all subproblems at the bottom */ /* level. */ /* K (output) INTEGER array, */ /* dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. */ /* If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th */ /* secular equation on the computation tree. */ /* DIFL (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), */ /* where NLVL = floor(log_2 (N/SMLSIZ))). */ /* DIFR (output) DOUBLE PRECISION array, */ /* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and */ /* dimension ( N ) if ICOMPQ = 0. */ /* If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) */ /* record distances between singular values on the I-th */ /* level and singular values on the (I -1)-th level, and */ /* DIFR(1:N, 2 * I ) contains the normalizing factors for */ /* the right singular vector matrix. See DLASD8 for details. */ /* Z (output) DOUBLE PRECISION array, */ /* dimension ( LDU, NLVL ) if ICOMPQ = 1 and */ /* dimension ( N ) if ICOMPQ = 0. */ /* The first K elements of Z(1, I) contain the components of */ /* the deflation-adjusted updating row vector for subproblems */ /* on the I-th level. */ /* POLES (output) DOUBLE PRECISION array, */ /* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced */ /* if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and */ /* POLES(1, 2*I) contain the new and old singular values */ /* involved in the secular equations on the I-th level. */ /* GIVPTR (output) INTEGER array, */ /* dimension ( N ) if ICOMPQ = 1, and not referenced if */ /* ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records */ /* the number of Givens rotations performed on the I-th */ /* problem on the computation tree. */ /* GIVCOL (output) INTEGER array, */ /* dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not */ /* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, */ /* GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations */ /* of Givens rotations performed on the I-th level on the */ /* computation tree. */ /* LDGCOL (input) INTEGER, LDGCOL = > N. */ /* The leading dimension of arrays GIVCOL and PERM. */ /* PERM (output) INTEGER array, */ /* dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced */ /* if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records */ /* permutations done on the I-th level of the computation tree. */ /* GIVNUM (output) DOUBLE PRECISION array, */ /* dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not */ /* referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, */ /* GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- */ /* values of Givens rotations performed on the I-th level on */ /* the computation tree. */ /* C (output) DOUBLE PRECISION array, */ /* dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. */ /* If ICOMPQ = 1 and the I-th subproblem is not square, on exit, */ /* C( I ) contains the C-value of a Givens rotation related to */ /* the right null space of the I-th subproblem. */ /* S (output) DOUBLE PRECISION array, dimension ( N ) if */ /* ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 */ /* and the I-th subproblem is not square, on exit, S( I ) */ /* contains the S-value of a Givens rotation related to */ /* the right null space of the I-th subproblem. */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). */ /* IWORK (workspace) INTEGER array. */ /* Dimension must be at least (7 * N). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1; givcol -= givcol_offset; --c__; --s; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } else if (*ldu < *n + *sqre) { *info = -8; } else if (*ldgcol < *n) { *info = -17; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASDA", &i__1); return 0; } m = *n + *sqre; /* If the input matrix is too small, call DLASDQ to find the SVD. */ if (*n <= *smlsiz) { if (*icompq == 0) { _starpu_dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & work[1], info); } else { _starpu_dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); } return 0; } /* Book-keeping and set up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; idxq = ndimr + *n; iwk = idxq + *n; ncc = 0; nru = 0; smlszp = *smlsiz + 1; vf = 1; vl = vf + m; nwork1 = vl + m; nwork2 = nwork1 + smlszp * smlszp; _starpu_dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* for the nodes on bottom level of the tree, solve */ /* their subproblems by DLASDQ. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node */ /* NL : number of rows of left subproblem */ /* NR : number of rows of right subproblem */ /* NLF: starting row of the left subproblem */ /* NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nlp1 = nl + 1; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; idxqi = idxq + nlf - 2; vfi = vf + nlf - 1; vli = vl + nlf - 1; sqrei = 1; if (*icompq == 0) { _starpu_dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &work[nwork1], &smlszp); _starpu_dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], &nl, &work[nwork2], info); itemp = nwork1 + nl * smlszp; _starpu_dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); _starpu_dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); } else { _starpu_dlaset_("A", &nl, &nl, &c_b11, &c_b12, &u[nlf + u_dim1], ldu); _starpu_dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &vt[nlf + vt_dim1], ldu); _starpu_dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf + u_dim1], ldu, &work[nwork1], info); _starpu_dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1); _starpu_dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1) ; } if (*info != 0) { return 0; } i__2 = nl; for (j = 1; j <= i__2; ++j) { iwork[idxqi + j] = j; /* L10: */ } if (i__ == nd && *sqre == 0) { sqrei = 0; } else { sqrei = 1; } idxqi += nlp1; vfi += nlp1; vli += nlp1; nrp1 = nr + sqrei; if (*icompq == 0) { _starpu_dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &work[nwork1], &smlszp); _starpu_dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], &nr, &work[nwork2], info); itemp = nwork1 + (nrp1 - 1) * smlszp; _starpu_dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); _starpu_dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); } else { _starpu_dlaset_("A", &nr, &nr, &c_b11, &c_b12, &u[nrf + u_dim1], ldu); _starpu_dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &vt[nrf + vt_dim1], ldu); _starpu_dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf + u_dim1], ldu, &work[nwork1], info); _starpu_dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1); _starpu_dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1) ; } if (*info != 0) { return 0; } i__2 = nr; for (j = 1; j <= i__2; ++j) { iwork[idxqi + j] = j; /* L20: */ } /* L30: */ } /* Now conquer each subproblem bottom-up. */ j = pow_ii(&c__2, &nlvl); for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on */ /* the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqrei = *sqre; } else { sqrei = 1; } vfi = vf + nlf - 1; vli = vl + nlf - 1; idxqi = idxq + nlf - 1; alpha = d__[ic]; beta = e[ic]; if (*icompq == 0) { _starpu_dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & work[vli], &alpha, &beta, &iwork[idxqi], &perm[ perm_offset], &givptr[1], &givcol[givcol_offset], ldgcol, &givnum[givnum_offset], ldu, &poles[ poles_offset], &difl[difl_offset], &difr[difr_offset], &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], &iwork[iwk], info); } else { --j; _starpu_dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf + lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], & difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j], &s[j], &work[nwork1], &iwork[iwk], info); } if (*info != 0) { return 0; } /* L40: */ } /* L50: */ } return 0; /* End of DLASDA */ } /* _starpu_dlasda_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasdq.c000066400000000000000000000271421507764646700206520ustar00rootroot00000000000000/* dlasdq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dlasdq_(char *uplo, integer *sqre, integer *n, integer * ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; /* Local variables */ integer i__, j; doublereal r__, cs, sn; integer np1, isub; doublereal smin; integer sqre1; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer * , doublereal *, integer *); integer iuplo; extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *, integer *), _starpu_dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); logical rotate; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASDQ computes the singular value decomposition (SVD) of a real */ /* (upper or lower) bidiagonal matrix with diagonal D and offdiagonal */ /* E, accumulating the transformations if desired. Letting B denote */ /* the input bidiagonal matrix, the algorithm computes orthogonal */ /* matrices Q and P such that B = Q * S * P' (P' denotes the transpose */ /* of P). The singular values S are overwritten on D. */ /* The input matrix U is changed to U * Q if desired. */ /* The input matrix VT is changed to P' * VT if desired. */ /* The input matrix C is changed to Q' * C if desired. */ /* See "Computing Small Singular Values of Bidiagonal Matrices With */ /* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */ /* LAPACK Working Note #3, for a detailed description of the algorithm. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* On entry, UPLO specifies whether the input bidiagonal matrix */ /* is upper or lower bidiagonal, and wether it is square are */ /* not. */ /* UPLO = 'U' or 'u' B is upper bidiagonal. */ /* UPLO = 'L' or 'l' B is lower bidiagonal. */ /* SQRE (input) INTEGER */ /* = 0: then the input matrix is N-by-N. */ /* = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and */ /* (N+1)-by-N if UPLU = 'L'. */ /* The bidiagonal matrix has */ /* N = NL + NR + 1 rows and */ /* M = N + SQRE >= N columns. */ /* N (input) INTEGER */ /* On entry, N specifies the number of rows and columns */ /* in the matrix. N must be at least 0. */ /* NCVT (input) INTEGER */ /* On entry, NCVT specifies the number of columns of */ /* the matrix VT. NCVT must be at least 0. */ /* NRU (input) INTEGER */ /* On entry, NRU specifies the number of rows of */ /* the matrix U. NRU must be at least 0. */ /* NCC (input) INTEGER */ /* On entry, NCC specifies the number of columns of */ /* the matrix C. NCC must be at least 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, D contains the diagonal entries of the */ /* bidiagonal matrix whose SVD is desired. On normal exit, */ /* D contains the singular values in ascending order. */ /* E (input/output) DOUBLE PRECISION array. */ /* dimension is (N-1) if SQRE = 0 and N if SQRE = 1. */ /* On entry, the entries of E contain the offdiagonal entries */ /* of the bidiagonal matrix whose SVD is desired. On normal */ /* exit, E will contain 0. If the algorithm does not converge, */ /* D and E will contain the diagonal and superdiagonal entries */ /* of a bidiagonal matrix orthogonally equivalent to the one */ /* given as input. */ /* VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) */ /* On entry, contains a matrix which on exit has been */ /* premultiplied by P', dimension N-by-NCVT if SQRE = 0 */ /* and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). */ /* LDVT (input) INTEGER */ /* On entry, LDVT specifies the leading dimension of VT as */ /* declared in the calling (sub) program. LDVT must be at */ /* least 1. If NCVT is nonzero LDVT must also be at least N. */ /* U (input/output) DOUBLE PRECISION array, dimension (LDU, N) */ /* On entry, contains a matrix which on exit has been */ /* postmultiplied by Q, dimension NRU-by-N if SQRE = 0 */ /* and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). */ /* LDU (input) INTEGER */ /* On entry, LDU specifies the leading dimension of U as */ /* declared in the calling (sub) program. LDU must be at */ /* least max( 1, NRU ) . */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) */ /* On entry, contains an N-by-NCC matrix which on exit */ /* has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 */ /* and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). */ /* LDC (input) INTEGER */ /* On entry, LDC specifies the leading dimension of C as */ /* declared in the calling (sub) program. LDC must be at */ /* least 1. If NCC is nonzero, LDC must also be at least N. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* Workspace. Only referenced if one of NCVT, NRU, or NCC is */ /* nonzero, and if N is at least 2. */ /* INFO (output) INTEGER */ /* On exit, a value of 0 indicates a successful exit. */ /* If INFO < 0, argument number -INFO is illegal. */ /* If INFO > 0, the algorithm did not converge, and INFO */ /* specifies how many superdiagonals did not converge. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; iuplo = 0; if (_starpu_lsame_(uplo, "U")) { iuplo = 1; } if (_starpu_lsame_(uplo, "L")) { iuplo = 2; } if (iuplo == 0) { *info = -1; } else if (*sqre < 0 || *sqre > 1) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ncvt < 0) { *info = -4; } else if (*nru < 0) { *info = -5; } else if (*ncc < 0) { *info = -6; } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { *info = -10; } else if (*ldu < max(1,*nru)) { *info = -12; } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { *info = -14; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASDQ", &i__1); return 0; } if (*n == 0) { return 0; } /* ROTATE is true if any singular vectors desired, false otherwise */ rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; np1 = *n + 1; sqre1 = *sqre; /* If matrix non-square upper bidiagonal, rotate to be lower */ /* bidiagonal. The rotations are on the right. */ if (iuplo == 1 && sqre1 == 1) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (rotate) { work[i__] = cs; work[*n + i__] = sn; } /* L10: */ } _starpu_dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); d__[*n] = r__; e[*n] = 0.; if (rotate) { work[*n] = cs; work[*n + *n] = sn; } iuplo = 2; sqre1 = 0; /* Update singular vectors if desired. */ if (*ncvt > 0) { _starpu_dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ vt_offset], ldvt); } } /* If matrix lower bidiagonal, rotate to be upper bidiagonal */ /* by applying Givens rotations on the left. */ if (iuplo == 2) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (rotate) { work[i__] = cs; work[*n + i__] = sn; } /* L20: */ } /* If matrix (N+1)-by-N lower bidiagonal, one additional */ /* rotation is needed. */ if (sqre1 == 1) { _starpu_dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); d__[*n] = r__; if (rotate) { work[*n] = cs; work[*n + *n] = sn; } } /* Update singular vectors if desired. */ if (*nru > 0) { if (sqre1 == 0) { _starpu_dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ u_offset], ldu); } else { _starpu_dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ u_offset], ldu); } } if (*ncc > 0) { if (sqre1 == 0) { _starpu_dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ c_offset], ldc); } else { _starpu_dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ c_offset], ldc); } } } /* Call DBDSQR to compute the SVD of the reduced real */ /* N-by-N upper bidiagonal matrix. */ _starpu_dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ u_offset], ldu, &c__[c_offset], ldc, &work[1], info); /* Sort the singular values into ascending order (insertion sort on */ /* singular values, but only one transposition per singular vector) */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Scan for smallest D(I). */ isub = i__; smin = d__[i__]; i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { if (d__[j] < smin) { isub = j; smin = d__[j]; } /* L30: */ } if (isub != i__) { /* Swap singular values and vectors. */ d__[isub] = d__[i__]; d__[i__] = smin; if (*ncvt > 0) { _starpu_dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1], ldvt); } if (*nru > 0) { _starpu_dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1] , &c__1); } if (*ncc > 0) { _starpu_dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc) ; } } /* L40: */ } return 0; /* End of DLASDQ */ } /* _starpu_dlasdq_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasdt.c000066400000000000000000000070311507764646700206500ustar00rootroot00000000000000/* dlasdt.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlasdt_(integer *n, integer *lvl, integer *nd, integer * inode, integer *ndiml, integer *ndimr, integer *msub) { /* System generated locals */ integer i__1, i__2; /* Builtin functions */ double log(doublereal); /* Local variables */ integer i__, il, ir, maxn; doublereal temp; integer nlvl, llst, ncrnt; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASDT creates a tree of subproblems for bidiagonal divide and */ /* conquer. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* On entry, the number of diagonal elements of the */ /* bidiagonal matrix. */ /* LVL (output) INTEGER */ /* On exit, the number of levels on the computation tree. */ /* ND (output) INTEGER */ /* On exit, the number of nodes on the tree. */ /* INODE (output) INTEGER array, dimension ( N ) */ /* On exit, centers of subproblems. */ /* NDIML (output) INTEGER array, dimension ( N ) */ /* On exit, row dimensions of left children. */ /* NDIMR (output) INTEGER array, dimension ( N ) */ /* On exit, row dimensions of right children. */ /* MSUB (input) INTEGER. */ /* On entry, the maximum row dimension each subproblem at the */ /* bottom of the tree can be of. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Find the number of levels on the tree. */ /* Parameter adjustments */ --ndimr; --ndiml; --inode; /* Function Body */ maxn = max(1,*n); temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.); *lvl = (integer) temp + 1; i__ = *n / 2; inode[1] = i__ + 1; ndiml[1] = i__; ndimr[1] = *n - i__ - 1; il = 0; ir = 1; llst = 1; i__1 = *lvl - 1; for (nlvl = 1; nlvl <= i__1; ++nlvl) { /* Constructing the tree at (NLVL+1)-st level. The number of */ /* nodes created on this level is LLST * 2. */ i__2 = llst - 1; for (i__ = 0; i__ <= i__2; ++i__) { il += 2; ir += 2; ncrnt = llst + i__; ndiml[il] = ndiml[ncrnt] / 2; ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1; inode[il] = inode[ncrnt] - ndimr[il] - 1; ndiml[ir] = ndimr[ncrnt] / 2; ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1; inode[ir] = inode[ncrnt] + ndiml[ir] + 1; /* L10: */ } llst <<= 1; /* L20: */ } *nd = (llst << 1) - 1; return 0; /* End of DLASDT */ } /* _starpu_dlasdt_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaset.c000066400000000000000000000101001507764646700206400ustar00rootroot00000000000000/* dlaset.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaset_(char *uplo, integer *m, integer *n, doublereal * alpha, doublereal *beta, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j; extern logical _starpu_lsame_(char *, char *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASET initializes an m-by-n matrix A to BETA on the diagonal and */ /* ALPHA on the offdiagonals. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies the part of the matrix A to be set. */ /* = 'U': Upper triangular part is set; the strictly lower */ /* triangular part of A is not changed. */ /* = 'L': Lower triangular part is set; the strictly upper */ /* triangular part of A is not changed. */ /* Otherwise: All of the matrix A is set. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* ALPHA (input) DOUBLE PRECISION */ /* The constant to which the offdiagonal elements are to be set. */ /* BETA (input) DOUBLE PRECISION */ /* The constant to which the diagonal elements are to be set. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On exit, the leading m-by-n submatrix of A is set as follows: */ /* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */ /* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */ /* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */ /* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (_starpu_lsame_(uplo, "U")) { /* Set the strictly upper triangular or trapezoidal part of the */ /* array to ALPHA. */ i__1 = *n; for (j = 2; j <= i__1; ++j) { /* Computing MIN */ i__3 = j - 1; i__2 = min(i__3,*m); for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = *alpha; /* L10: */ } /* L20: */ } } else if (_starpu_lsame_(uplo, "L")) { /* Set the strictly lower triangular or trapezoidal part of the */ /* array to ALPHA. */ i__1 = min(*m,*n); for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j + 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = *alpha; /* L30: */ } /* L40: */ } } else { /* Set the leading m-by-n submatrix to ALPHA. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = *alpha; /* L50: */ } /* L60: */ } } /* Set the first min(M,N) diagonal elements to BETA. */ i__1 = min(*m,*n); for (i__ = 1; i__ <= i__1; ++i__) { a[i__ + i__ * a_dim1] = *beta; /* L70: */ } return 0; /* End of DLASET */ } /* _starpu_dlaset_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasq1.c000066400000000000000000000150701507764646700205640ustar00rootroot00000000000000/* dlasq1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__2 = 2; static integer c__0 = 0; /* Subroutine */ int _starpu_dlasq1_(integer *n, doublereal *d__, doublereal *e, doublereal *work, integer *info) { /* System generated locals */ integer i__1, i__2; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal eps; extern /* Subroutine */ int _starpu_dlas2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal scale; integer iinfo; doublereal sigmn; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal sigmx; extern /* Subroutine */ int _starpu_dlasq2_(integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlasrt_( char *, integer *, doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ /* -- Berkeley -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASQ1 computes the singular values of a real N-by-N bidiagonal */ /* matrix with diagonal D and off-diagonal E. The singular values */ /* are computed to high relative accuracy, in the absence of */ /* denormalization, underflow and overflow. The algorithm was first */ /* presented in */ /* "Accurate singular values and differential qd algorithms" by K. V. */ /* Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, */ /* 1994, */ /* and the present implementation is described in "An implementation of */ /* the dqds Algorithm (Positive Case)", LAPACK Working Note. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of rows and columns in the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, D contains the diagonal elements of the */ /* bidiagonal matrix whose SVD is desired. On normal exit, */ /* D contains the singular values in decreasing order. */ /* E (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, elements E(1:N-1) contain the off-diagonal elements */ /* of the bidiagonal matrix whose SVD is desired. */ /* On exit, E is overwritten. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: the algorithm failed */ /* = 1, a split was marked by a positive value in E */ /* = 2, current block of Z not diagonalized after 30*N */ /* iterations (in inner while loop) */ /* = 3, termination criterion of outer while loop not met */ /* (program created more than N unreduced blocks) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --work; --e; --d__; /* Function Body */ *info = 0; if (*n < 0) { *info = -2; i__1 = -(*info); _starpu_xerbla_("DLASQ1", &i__1); return 0; } else if (*n == 0) { return 0; } else if (*n == 1) { d__[1] = abs(d__[1]); return 0; } else if (*n == 2) { _starpu_dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); d__[1] = sigmx; d__[2] = sigmn; return 0; } /* Estimate the largest singular value. */ sigmx = 0.; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = (d__1 = d__[i__], abs(d__1)); /* Computing MAX */ d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1)); sigmx = max(d__2,d__3); /* L10: */ } d__[*n] = (d__1 = d__[*n], abs(d__1)); /* Early return if SIGMX is zero (matrix is already diagonal). */ if (sigmx == 0.) { _starpu_dlasrt_("D", n, &d__[1], &iinfo); return 0; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = sigmx, d__2 = d__[i__]; sigmx = max(d__1,d__2); /* L20: */ } /* Copy D and E into WORK (in the Z format) and scale (squaring the */ /* input data makes scaling by a power of the radix pointless). */ eps = _starpu_dlamch_("Precision"); safmin = _starpu_dlamch_("Safe minimum"); scale = sqrt(eps / safmin); _starpu_dcopy_(n, &d__[1], &c__1, &work[1], &c__2); i__1 = *n - 1; _starpu_dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2); i__1 = (*n << 1) - 1; i__2 = (*n << 1) - 1; _starpu_dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, &iinfo); /* Compute the q's and e's. */ i__1 = (*n << 1) - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing 2nd power */ d__1 = work[i__]; work[i__] = d__1 * d__1; /* L30: */ } work[*n * 2] = 0.; _starpu_dlasq2_(n, &work[1], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = sqrt(work[i__]); /* L40: */ } _starpu_dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & iinfo); } return 0; /* End of DLASQ1 */ } /* _starpu_dlasq1_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasq2.c000066400000000000000000000366231507764646700205740ustar00rootroot00000000000000/* dlasq2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__2 = 2; static integer c__10 = 10; static integer c__3 = 3; static integer c__4 = 4; static integer c__11 = 11; /* Subroutine */ int _starpu_dlasq2_(integer *n, doublereal *z__, integer *info) { /* System generated locals */ integer i__1, i__2, i__3; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal d__, e, g; integer k; doublereal s, t; integer i0, i4, n0; doublereal dn; integer pp; doublereal dn1, dn2, dee, eps, tau, tol; integer ipn4; doublereal tol2; logical ieee; integer nbig; doublereal dmin__, emin, emax; integer kmin, ndiv, iter; doublereal qmin, temp, qmax, zmax; integer splt; doublereal dmin1, dmin2; integer nfail; doublereal desig, trace, sigma; integer iinfo, ttype; extern /* Subroutine */ int _starpu_dlasq3_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, logical *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); doublereal deemin; integer iwhila, iwhilb; doublereal oldemn, safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ /* -- Berkeley -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASQ2 computes all the eigenvalues of the symmetric positive */ /* definite tridiagonal matrix associated with the qd array Z to high */ /* relative accuracy are computed to high relative accuracy, in the */ /* absence of denormalization, underflow and overflow. */ /* To see the relation of Z to the tridiagonal matrix, let L be a */ /* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */ /* let U be an upper bidiagonal matrix with 1's above and diagonal */ /* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */ /* symmetric tridiagonal to which it is similar. */ /* Note : DLASQ2 defines a logical variable, IEEE, which is true */ /* on machines which follow ieee-754 floating-point standard in their */ /* handling of infinities and NaNs, and false otherwise. This variable */ /* is passed to DLASQ3. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of rows and columns in the matrix. N >= 0. */ /* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */ /* On entry Z holds the qd array. On exit, entries 1 to N hold */ /* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */ /* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */ /* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */ /* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */ /* shifts that failed. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if the i-th argument is a scalar and had an illegal */ /* value, then INFO = -i, if the i-th argument is an */ /* array and the j-entry had an illegal value, then */ /* INFO = -(i*100+j) */ /* > 0: the algorithm failed */ /* = 1, a split was marked by a positive value in E */ /* = 2, current block of Z not diagonalized after 30*N */ /* iterations (in inner while loop) */ /* = 3, termination criterion of outer while loop not met */ /* (program created more than N unreduced blocks) */ /* Further Details */ /* =============== */ /* Local Variables: I0:N0 defines a current unreduced segment of Z. */ /* The shifts are accumulated in SIGMA. Iteration count is in ITER. */ /* Ping-pong is controlled by PP (alternates between 0 and 1). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments. */ /* (in case DLASQ2 is not called by DLASQ1) */ /* Parameter adjustments */ --z__; /* Function Body */ *info = 0; eps = _starpu_dlamch_("Precision"); safmin = _starpu_dlamch_("Safe minimum"); tol = eps * 100.; /* Computing 2nd power */ d__1 = tol; tol2 = d__1 * d__1; if (*n < 0) { *info = -1; _starpu_xerbla_("DLASQ2", &c__1); return 0; } else if (*n == 0) { return 0; } else if (*n == 1) { /* 1-by-1 case. */ if (z__[1] < 0.) { *info = -201; _starpu_xerbla_("DLASQ2", &c__2); } return 0; } else if (*n == 2) { /* 2-by-2 case. */ if (z__[2] < 0. || z__[3] < 0.) { *info = -2; _starpu_xerbla_("DLASQ2", &c__2); return 0; } else if (z__[3] > z__[1]) { d__ = z__[3]; z__[3] = z__[1]; z__[1] = d__; } z__[5] = z__[1] + z__[2] + z__[3]; if (z__[2] > z__[3] * tol2) { t = (z__[1] - z__[3] + z__[2]) * .5; s = z__[3] * (z__[2] / t); if (s <= t) { s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.))); } else { s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); } t = z__[1] + (s + z__[2]); z__[3] *= z__[1] / t; z__[1] = t; } z__[2] = z__[3]; z__[6] = z__[2] + z__[1]; return 0; } /* Check for negative data and compute sums of q's and e's. */ z__[*n * 2] = 0.; emin = z__[2]; qmax = 0.; zmax = 0.; d__ = 0.; e = 0.; i__1 = *n - 1 << 1; for (k = 1; k <= i__1; k += 2) { if (z__[k] < 0.) { *info = -(k + 200); _starpu_xerbla_("DLASQ2", &c__2); return 0; } else if (z__[k + 1] < 0.) { *info = -(k + 201); _starpu_xerbla_("DLASQ2", &c__2); return 0; } d__ += z__[k]; e += z__[k + 1]; /* Computing MAX */ d__1 = qmax, d__2 = z__[k]; qmax = max(d__1,d__2); /* Computing MIN */ d__1 = emin, d__2 = z__[k + 1]; emin = min(d__1,d__2); /* Computing MAX */ d__1 = max(qmax,zmax), d__2 = z__[k + 1]; zmax = max(d__1,d__2); /* L10: */ } if (z__[(*n << 1) - 1] < 0.) { *info = -((*n << 1) + 199); _starpu_xerbla_("DLASQ2", &c__2); return 0; } d__ += z__[(*n << 1) - 1]; /* Computing MAX */ d__1 = qmax, d__2 = z__[(*n << 1) - 1]; qmax = max(d__1,d__2); zmax = max(qmax,zmax); /* Check for diagonality. */ if (e == 0.) { i__1 = *n; for (k = 2; k <= i__1; ++k) { z__[k] = z__[(k << 1) - 1]; /* L20: */ } _starpu_dlasrt_("D", n, &z__[1], &iinfo); z__[(*n << 1) - 1] = d__; return 0; } trace = d__ + e; /* Check for zero data. */ if (trace == 0.) { z__[(*n << 1) - 1] = 0.; return 0; } /* Check whether the machine is IEEE conformable. */ ieee = _starpu_ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1 && _starpu_ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1; /* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ for (k = *n << 1; k >= 2; k += -2) { z__[k * 2] = 0.; z__[(k << 1) - 1] = z__[k]; z__[(k << 1) - 2] = 0.; z__[(k << 1) - 3] = z__[k - 1]; /* L30: */ } i0 = 1; n0 = *n; /* Reverse the qd-array, if warranted. */ if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) { ipn4 = i0 + n0 << 2; i__1 = i0 + n0 - 1 << 1; for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { temp = z__[i4 - 3]; z__[i4 - 3] = z__[ipn4 - i4 - 3]; z__[ipn4 - i4 - 3] = temp; temp = z__[i4 - 1]; z__[i4 - 1] = z__[ipn4 - i4 - 5]; z__[ipn4 - i4 - 5] = temp; /* L40: */ } } /* Initial split checking via dqd and Li's test. */ pp = 0; for (k = 1; k <= 2; ++k) { d__ = z__[(n0 << 2) + pp - 3]; i__1 = (i0 << 2) + pp; for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { if (z__[i4 - 1] <= tol2 * d__) { z__[i4 - 1] = -0.; d__ = z__[i4 - 3]; } else { d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); } /* L50: */ } /* dqd maps Z to ZZ plus Li's test. */ emin = z__[(i0 << 2) + pp + 1]; d__ = z__[(i0 << 2) + pp - 3]; i__1 = (n0 - 1 << 2) + pp; for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; if (z__[i4 - 1] <= tol2 * d__) { z__[i4 - 1] = -0.; z__[i4 - (pp << 1) - 2] = d__; z__[i4 - (pp << 1)] = 0.; d__ = z__[i4 + 1]; } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; d__ *= temp; } else { z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( pp << 1) - 2]); d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); } /* Computing MIN */ d__1 = emin, d__2 = z__[i4 - (pp << 1)]; emin = min(d__1,d__2); /* L60: */ } z__[(n0 << 2) - pp - 2] = d__; /* Now find qmax. */ qmax = z__[(i0 << 2) - pp - 2]; i__1 = (n0 << 2) - pp - 2; for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { /* Computing MAX */ d__1 = qmax, d__2 = z__[i4]; qmax = max(d__1,d__2); /* L70: */ } /* Prepare for the next iteration on K. */ pp = 1 - pp; /* L80: */ } /* Initialise variables to pass to DLASQ3. */ ttype = 0; dmin1 = 0.; dmin2 = 0.; dn = 0.; dn1 = 0.; dn2 = 0.; g = 0.; tau = 0.; iter = 2; nfail = 0; ndiv = n0 - i0 << 1; i__1 = *n + 1; for (iwhila = 1; iwhila <= i__1; ++iwhila) { if (n0 < 1) { goto L170; } /* While array unfinished do */ /* E(N0) holds the value of SIGMA when submatrix in I0:N0 */ /* splits from the rest of the array, but is negated. */ desig = 0.; if (n0 == *n) { sigma = 0.; } else { sigma = -z__[(n0 << 2) - 1]; } if (sigma < 0.) { *info = 1; return 0; } /* Find last unreduced submatrix's top index I0, find QMAX and */ /* EMIN. Find Gershgorin-type bound if Q's much greater than E's. */ emax = 0.; if (n0 > i0) { emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1)); } else { emin = 0.; } qmin = z__[(n0 << 2) - 3]; qmax = qmin; for (i4 = n0 << 2; i4 >= 8; i4 += -4) { if (z__[i4 - 5] <= 0.) { goto L100; } if (qmin >= emax * 4.) { /* Computing MIN */ d__1 = qmin, d__2 = z__[i4 - 3]; qmin = min(d__1,d__2); /* Computing MAX */ d__1 = emax, d__2 = z__[i4 - 5]; emax = max(d__1,d__2); } /* Computing MAX */ d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5]; qmax = max(d__1,d__2); /* Computing MIN */ d__1 = emin, d__2 = z__[i4 - 5]; emin = min(d__1,d__2); /* L90: */ } i4 = 4; L100: i0 = i4 / 4; pp = 0; if (n0 - i0 > 1) { dee = z__[(i0 << 2) - 3]; deemin = dee; kmin = i0; i__2 = (n0 << 2) - 3; for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) { dee = z__[i4] * (dee / (dee + z__[i4 - 2])); if (dee <= deemin) { deemin = dee; kmin = (i4 + 3) / 4; } /* L110: */ } if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] * .5) { ipn4 = i0 + n0 << 2; pp = 2; i__2 = i0 + n0 - 1 << 1; for (i4 = i0 << 2; i4 <= i__2; i4 += 4) { temp = z__[i4 - 3]; z__[i4 - 3] = z__[ipn4 - i4 - 3]; z__[ipn4 - i4 - 3] = temp; temp = z__[i4 - 2]; z__[i4 - 2] = z__[ipn4 - i4 - 2]; z__[ipn4 - i4 - 2] = temp; temp = z__[i4 - 1]; z__[i4 - 1] = z__[ipn4 - i4 - 5]; z__[ipn4 - i4 - 5] = temp; temp = z__[i4]; z__[i4] = z__[ipn4 - i4 - 4]; z__[ipn4 - i4 - 4] = temp; /* L120: */ } } } /* Put -(initial shift) into DMIN. */ /* Computing MAX */ d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax); dmin__ = -max(d__1,d__2); /* Now I0:N0 is unreduced. */ /* PP = 0 for ping, PP = 1 for pong. */ /* PP = 2 indicates that flipping was applied to the Z array and */ /* and that the tests for deflation upon entry in DLASQ3 */ /* should not be performed. */ nbig = (n0 - i0 + 1) * 30; i__2 = nbig; for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { if (i0 > n0) { goto L150; } /* While submatrix unfinished take a good dqds step. */ _starpu_dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, & dn1, &dn2, &g, &tau); pp = 1 - pp; /* When EMIN is very small check for splits. */ if (pp == 0 && n0 - i0 >= 3) { if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * sigma) { splt = i0 - 1; qmax = z__[(i0 << 2) - 3]; emin = z__[(i0 << 2) - 1]; oldemn = z__[i0 * 4]; i__3 = n0 - 3 << 2; for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= tol2 * sigma) { z__[i4 - 1] = -sigma; splt = i4 / 4; qmax = 0.; emin = z__[i4 + 3]; oldemn = z__[i4 + 4]; } else { /* Computing MAX */ d__1 = qmax, d__2 = z__[i4 + 1]; qmax = max(d__1,d__2); /* Computing MIN */ d__1 = emin, d__2 = z__[i4 - 1]; emin = min(d__1,d__2); /* Computing MIN */ d__1 = oldemn, d__2 = z__[i4]; oldemn = min(d__1,d__2); } /* L130: */ } z__[(n0 << 2) - 1] = emin; z__[n0 * 4] = oldemn; i0 = splt + 1; } } /* L140: */ } *info = 2; return 0; /* end IWHILB */ L150: /* L160: */ ; } *info = 3; return 0; /* end IWHILA */ L170: /* Move q's to the front. */ i__1 = *n; for (k = 2; k <= i__1; ++k) { z__[k] = z__[(k << 2) - 3]; /* L180: */ } /* Sort and compute sum of eigenvalues. */ _starpu_dlasrt_("D", n, &z__[1], &iinfo); e = 0.; for (k = *n; k >= 1; --k) { e += z__[k]; /* L190: */ } /* Store trace, sum(eigenvalues) and information on performance. */ z__[(*n << 1) + 1] = trace; z__[(*n << 1) + 2] = e; z__[(*n << 1) + 3] = (doublereal) iter; /* Computing 2nd power */ i__1 = *n; z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1); z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter; return 0; /* End of DLASQ2 */ } /* _starpu_dlasq2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasq3.c000066400000000000000000000217601507764646700205710ustar00rootroot00000000000000/* dlasq3.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlasq3_(integer *i0, integer *n0, doublereal *z__, integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee, integer *ttype, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *g, doublereal *tau) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal s, t; integer j4, nn; doublereal eps, tol; integer n0in, ipn4; doublereal tol2, temp; extern /* Subroutine */ int _starpu_dlasq4_(integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlasq5_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *), _starpu_dlasq6_( integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); extern logical _starpu_disnan_(doublereal *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ /* -- Berkeley -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */ /* In case of failure it changes shifts, and tries again until output */ /* is positive. */ /* Arguments */ /* ========= */ /* I0 (input) INTEGER */ /* First index. */ /* N0 (input) INTEGER */ /* Last index. */ /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ /* Z holds the qd array. */ /* PP (input/output) INTEGER */ /* PP=0 for ping, PP=1 for pong. */ /* PP=2 indicates that flipping was applied to the Z array */ /* and that the initial tests for deflation should not be */ /* performed. */ /* DMIN (output) DOUBLE PRECISION */ /* Minimum value of d. */ /* SIGMA (output) DOUBLE PRECISION */ /* Sum of shifts used in current segment. */ /* DESIG (input/output) DOUBLE PRECISION */ /* Lower order part of SIGMA */ /* QMAX (input) DOUBLE PRECISION */ /* Maximum value of q. */ /* NFAIL (output) INTEGER */ /* Number of times shift was too big. */ /* ITER (output) INTEGER */ /* Number of iterations. */ /* NDIV (output) INTEGER */ /* Number of divisions. */ /* IEEE (input) LOGICAL */ /* Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */ /* TTYPE (input/output) INTEGER */ /* Shift type. */ /* DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */ /* These are passed as arguments in order to save their values */ /* between calls to DLASQ3. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Function .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --z__; /* Function Body */ n0in = *n0; eps = _starpu_dlamch_("Precision"); tol = eps * 100.; /* Computing 2nd power */ d__1 = tol; tol2 = d__1 * d__1; /* Check for deflation. */ L10: if (*n0 < *i0) { return 0; } if (*n0 == *i0) { goto L20; } nn = (*n0 << 2) + *pp; if (*n0 == *i0 + 1) { goto L40; } /* Check whether E(N0-1) is negligible, 1 eigenvalue. */ if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - 4] > tol2 * z__[nn - 7]) { goto L30; } L20: z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; --(*n0); goto L10; /* Check whether E(N0-2) is negligible, 2 eigenvalues. */ L30: if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ nn - 11]) { goto L50; } L40: if (z__[nn - 3] > z__[nn - 7]) { s = z__[nn - 3]; z__[nn - 3] = z__[nn - 7]; z__[nn - 7] = s; } if (z__[nn - 5] > z__[nn - 3] * tol2) { t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5; s = z__[nn - 3] * (z__[nn - 5] / t); if (s <= t) { s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.))); } else { s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); } t = z__[nn - 7] + (s + z__[nn - 5]); z__[nn - 3] *= z__[nn - 7] / t; z__[nn - 7] = t; } z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; *n0 += -2; goto L10; L50: if (*pp == 2) { *pp = 0; } /* Reverse the qd-array, if warranted. */ if (*dmin__ <= 0. || *n0 < n0in) { if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) { ipn4 = *i0 + *n0 << 2; i__1 = *i0 + *n0 - 1 << 1; for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { temp = z__[j4 - 3]; z__[j4 - 3] = z__[ipn4 - j4 - 3]; z__[ipn4 - j4 - 3] = temp; temp = z__[j4 - 2]; z__[j4 - 2] = z__[ipn4 - j4 - 2]; z__[ipn4 - j4 - 2] = temp; temp = z__[j4 - 1]; z__[j4 - 1] = z__[ipn4 - j4 - 5]; z__[ipn4 - j4 - 5] = temp; temp = z__[j4]; z__[j4] = z__[ipn4 - j4 - 4]; z__[ipn4 - j4 - 4] = temp; /* L60: */ } if (*n0 - *i0 <= 4) { z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; } /* Computing MIN */ d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1]; *dmin2 = min(d__1,d__2); /* Computing MIN */ d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1] , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3]; z__[(*n0 << 2) + *pp - 1] = min(d__1,d__2); /* Computing MIN */ d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4]; z__[(*n0 << 2) - *pp] = min(d__1,d__2); /* Computing MAX */ d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = max(d__1, d__2), d__2 = z__[(*i0 << 2) + *pp + 1]; *qmax = max(d__1,d__2); *dmin__ = -0.; } } /* Choose a shift. */ _starpu_dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, tau, ttype, g); /* Call dqds until DMIN > 0. */ L70: _starpu_dlasq5_(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, ieee); *ndiv += *n0 - *i0 + 2; ++(*iter); /* Check status. */ if (*dmin__ >= 0. && *dmin1 > 0.) { /* Success. */ goto L90; } else if (*dmin__ < 0. && *dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < tol * (*sigma + *dn1) && abs(*dn) < tol * *sigma) { /* Convergence hidden by negative DN. */ z__[(*n0 - 1 << 2) - *pp + 2] = 0.; *dmin__ = 0.; goto L90; } else if (*dmin__ < 0.) { /* TAU too big. Select new TAU and try again. */ ++(*nfail); if (*ttype < -22) { /* Failed twice. Play it safe. */ *tau = 0.; } else if (*dmin1 > 0.) { /* Late failure. Gives excellent shift. */ *tau = (*tau + *dmin__) * (1. - eps * 2.); *ttype += -11; } else { /* Early failure. Divide by 4. */ *tau *= .25; *ttype += -12; } goto L70; } else if (_starpu_disnan_(dmin__)) { /* NaN. */ if (*tau == 0.) { goto L80; } else { *tau = 0.; goto L70; } } else { /* Possible underflow. Play it safe. */ goto L80; } /* Risk of underflow. */ L80: _starpu_dlasq6_(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2); *ndiv += *n0 - *i0 + 2; ++(*iter); *tau = 0.; L90: if (*tau < *sigma) { *desig += *tau; t = *sigma + *desig; *desig -= t - *sigma; } else { t = *sigma + *tau; *desig = *sigma - (t - *tau) + *desig; } *sigma = t; return 0; /* End of DLASQ3 */ } /* _starpu_dlasq3_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasq4.c000066400000000000000000000221161507764646700205660ustar00rootroot00000000000000/* dlasq4.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlasq4_(integer *i0, integer *n0, doublereal *z__, integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *tau, integer *ttype, doublereal *g) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal s, a2, b1, b2; integer i4, nn, np; doublereal gam, gap1, gap2; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ /* -- Berkeley -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASQ4 computes an approximation TAU to the smallest eigenvalue */ /* using values of d from the previous transform. */ /* I0 (input) INTEGER */ /* First index. */ /* N0 (input) INTEGER */ /* Last index. */ /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ /* Z holds the qd array. */ /* PP (input) INTEGER */ /* PP=0 for ping, PP=1 for pong. */ /* NOIN (input) INTEGER */ /* The value of N0 at start of EIGTEST. */ /* DMIN (input) DOUBLE PRECISION */ /* Minimum value of d. */ /* DMIN1 (input) DOUBLE PRECISION */ /* Minimum value of d, excluding D( N0 ). */ /* DMIN2 (input) DOUBLE PRECISION */ /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ /* DN (input) DOUBLE PRECISION */ /* d(N) */ /* DN1 (input) DOUBLE PRECISION */ /* d(N-1) */ /* DN2 (input) DOUBLE PRECISION */ /* d(N-2) */ /* TAU (output) DOUBLE PRECISION */ /* This is the shift. */ /* TTYPE (output) INTEGER */ /* Shift type. */ /* G (input/output) REAL */ /* G is passed as an argument in order to save its value between */ /* calls to DLASQ4. */ /* Further Details */ /* =============== */ /* CNST1 = 9/16 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* A negative DMIN forces the shift to take that absolute value */ /* TTYPE records the type of shift. */ /* Parameter adjustments */ --z__; /* Function Body */ if (*dmin__ <= 0.) { *tau = -(*dmin__); *ttype = -1; return 0; } nn = (*n0 << 2) + *pp; if (*n0in == *n0) { /* No eigenvalues deflated. */ if (*dmin__ == *dn || *dmin__ == *dn1) { b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); a2 = z__[nn - 7] + z__[nn - 5]; /* Cases 2 and 3. */ if (*dmin__ == *dn && *dmin1 == *dn1) { gap2 = *dmin2 - a2 - *dmin2 * .25; if (gap2 > 0. && gap2 > b2) { gap1 = a2 - *dn - b2 / gap2 * b2; } else { gap1 = a2 - *dn - (b1 + b2); } if (gap1 > 0. && gap1 > b1) { /* Computing MAX */ d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5; s = max(d__1,d__2); *ttype = -2; } else { s = 0.; if (*dn > b1) { s = *dn - b1; } if (a2 > b1 + b2) { /* Computing MIN */ d__1 = s, d__2 = a2 - (b1 + b2); s = min(d__1,d__2); } /* Computing MAX */ d__1 = s, d__2 = *dmin__ * .333; s = max(d__1,d__2); *ttype = -3; } } else { /* Case 4. */ *ttype = -4; s = *dmin__ * .25; if (*dmin__ == *dn) { gam = *dn; a2 = 0.; if (z__[nn - 5] > z__[nn - 7]) { return 0; } b2 = z__[nn - 5] / z__[nn - 7]; np = nn - 9; } else { np = nn - (*pp << 1); b2 = z__[np - 2]; gam = *dn1; if (z__[np - 4] > z__[np - 2]) { return 0; } a2 = z__[np - 4] / z__[np - 2]; if (z__[nn - 9] > z__[nn - 11]) { return 0; } b2 = z__[nn - 9] / z__[nn - 11]; np = nn - 13; } /* Approximate contribution to norm squared from I < NN-1. */ a2 += b2; i__1 = (*i0 << 2) - 1 + *pp; for (i4 = np; i4 >= i__1; i4 += -4) { if (b2 == 0.) { goto L20; } b1 = b2; if (z__[i4] > z__[i4 - 2]) { return 0; } b2 *= z__[i4] / z__[i4 - 2]; a2 += b2; if (max(b2,b1) * 100. < a2 || .563 < a2) { goto L20; } /* L10: */ } L20: a2 *= 1.05; /* Rayleigh quotient residual bound. */ if (a2 < .563) { s = gam * (1. - sqrt(a2)) / (a2 + 1.); } } } else if (*dmin__ == *dn2) { /* Case 5. */ *ttype = -5; s = *dmin__ * .25; /* Compute contribution to norm squared from I > NN-2. */ np = nn - (*pp << 1); b1 = z__[np - 2]; b2 = z__[np - 6]; gam = *dn2; if (z__[np - 8] > b2 || z__[np - 4] > b1) { return 0; } a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.); /* Approximate contribution to norm squared from I < NN-2. */ if (*n0 - *i0 > 2) { b2 = z__[nn - 13] / z__[nn - 15]; a2 += b2; i__1 = (*i0 << 2) - 1 + *pp; for (i4 = nn - 17; i4 >= i__1; i4 += -4) { if (b2 == 0.) { goto L40; } b1 = b2; if (z__[i4] > z__[i4 - 2]) { return 0; } b2 *= z__[i4] / z__[i4 - 2]; a2 += b2; if (max(b2,b1) * 100. < a2 || .563 < a2) { goto L40; } /* L30: */ } L40: a2 *= 1.05; } if (a2 < .563) { s = gam * (1. - sqrt(a2)) / (a2 + 1.); } } else { /* Case 6, no information to guide us. */ if (*ttype == -6) { *g += (1. - *g) * .333; } else if (*ttype == -18) { *g = .083250000000000005; } else { *g = .25; } s = *g * *dmin__; *ttype = -6; } } else if (*n0in == *n0 + 1) { /* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ if (*dmin1 == *dn1 && *dmin2 == *dn2) { /* Cases 7 and 8. */ *ttype = -7; s = *dmin1 * .333; if (z__[nn - 5] > z__[nn - 7]) { return 0; } b1 = z__[nn - 5] / z__[nn - 7]; b2 = b1; if (b2 == 0.) { goto L60; } i__1 = (*i0 << 2) - 1 + *pp; for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { a2 = b1; if (z__[i4] > z__[i4 - 2]) { return 0; } b1 *= z__[i4] / z__[i4 - 2]; b2 += b1; if (max(b1,a2) * 100. < b2) { goto L60; } /* L50: */ } L60: b2 = sqrt(b2 * 1.05); /* Computing 2nd power */ d__1 = b2; a2 = *dmin1 / (d__1 * d__1 + 1.); gap2 = *dmin2 * .5 - a2; if (gap2 > 0. && gap2 > b2 * a2) { /* Computing MAX */ d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); s = max(d__1,d__2); } else { /* Computing MAX */ d__1 = s, d__2 = a2 * (1. - b2 * 1.01); s = max(d__1,d__2); *ttype = -8; } } else { /* Case 9. */ s = *dmin1 * .25; if (*dmin1 == *dn1) { s = *dmin1 * .5; } *ttype = -9; } } else if (*n0in == *n0 + 2) { /* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */ /* Cases 10 and 11. */ if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) { *ttype = -10; s = *dmin2 * .333; if (z__[nn - 5] > z__[nn - 7]) { return 0; } b1 = z__[nn - 5] / z__[nn - 7]; b2 = b1; if (b2 == 0.) { goto L80; } i__1 = (*i0 << 2) - 1 + *pp; for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { if (z__[i4] > z__[i4 - 2]) { return 0; } b1 *= z__[i4] / z__[i4 - 2]; b2 += b1; if (b1 * 100. < b2) { goto L80; } /* L70: */ } L80: b2 = sqrt(b2 * 1.05); /* Computing 2nd power */ d__1 = b2; a2 = *dmin2 / (d__1 * d__1 + 1.); gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ nn - 9]) - a2; if (gap2 > 0. && gap2 > b2 * a2) { /* Computing MAX */ d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); s = max(d__1,d__2); } else { /* Computing MAX */ d__1 = s, d__2 = a2 * (1. - b2 * 1.01); s = max(d__1,d__2); } } else { s = *dmin2 * .25; *ttype = -11; } } else if (*n0in > *n0 + 2) { /* Case 12, more than two eigenvalues deflated. No information. */ s = 0.; *ttype = -12; } *tau = s; return 0; /* End of DLASQ4 */ } /* _starpu_dlasq4_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasq5.c000066400000000000000000000137521507764646700205750ustar00rootroot00000000000000/* dlasq5.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlasq5_(integer *i0, integer *n0, doublereal *z__, integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2, logical *ieee) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Local variables */ doublereal d__; integer j4, j4p2; doublereal emin, temp; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ /* -- Berkeley -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASQ5 computes one dqds transform in ping-pong form, one */ /* version for IEEE machines another for non IEEE machines. */ /* Arguments */ /* ========= */ /* I0 (input) INTEGER */ /* First index. */ /* N0 (input) INTEGER */ /* Last index. */ /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ /* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ /* an extra argument. */ /* PP (input) INTEGER */ /* PP=0 for ping, PP=1 for pong. */ /* TAU (input) DOUBLE PRECISION */ /* This is the shift. */ /* DMIN (output) DOUBLE PRECISION */ /* Minimum value of d. */ /* DMIN1 (output) DOUBLE PRECISION */ /* Minimum value of d, excluding D( N0 ). */ /* DMIN2 (output) DOUBLE PRECISION */ /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ /* DN (output) DOUBLE PRECISION */ /* d(N0), the last value of d. */ /* DNM1 (output) DOUBLE PRECISION */ /* d(N0-1). */ /* DNM2 (output) DOUBLE PRECISION */ /* d(N0-2). */ /* IEEE (input) LOGICAL */ /* Flag for IEEE or non IEEE arithmetic. */ /* ===================================================================== */ /* .. Parameter .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --z__; /* Function Body */ if (*n0 - *i0 - 1 <= 0) { return 0; } j4 = (*i0 << 2) + *pp - 3; emin = z__[j4 + 4]; d__ = z__[j4] - *tau; *dmin__ = d__; *dmin1 = -z__[j4]; if (*ieee) { /* Code for IEEE arithmetic. */ if (*pp == 0) { i__1 = *n0 - 3 << 2; for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { z__[j4 - 2] = d__ + z__[j4 - 1]; temp = z__[j4 + 1] / z__[j4 - 2]; d__ = d__ * temp - *tau; *dmin__ = min(*dmin__,d__); z__[j4] = z__[j4 - 1] * temp; /* Computing MIN */ d__1 = z__[j4]; emin = min(d__1,emin); /* L10: */ } } else { i__1 = *n0 - 3 << 2; for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { z__[j4 - 3] = d__ + z__[j4]; temp = z__[j4 + 2] / z__[j4 - 3]; d__ = d__ * temp - *tau; *dmin__ = min(*dmin__,d__); z__[j4 - 1] = z__[j4] * temp; /* Computing MIN */ d__1 = z__[j4 - 1]; emin = min(d__1,emin); /* L20: */ } } /* Unroll last two steps. */ *dnm2 = d__; *dmin2 = *dmin__; j4 = (*n0 - 2 << 2) - *pp; j4p2 = j4 + (*pp << 1) - 1; z__[j4 - 2] = *dnm2 + z__[j4p2]; z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; *dmin__ = min(*dmin__,*dnm1); *dmin1 = *dmin__; j4 += 4; j4p2 = j4 + (*pp << 1) - 1; z__[j4 - 2] = *dnm1 + z__[j4p2]; z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; *dmin__ = min(*dmin__,*dn); } else { /* Code for non IEEE arithmetic. */ if (*pp == 0) { i__1 = *n0 - 3 << 2; for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { z__[j4 - 2] = d__ + z__[j4 - 1]; if (d__ < 0.) { return 0; } else { z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; } *dmin__ = min(*dmin__,d__); /* Computing MIN */ d__1 = emin, d__2 = z__[j4]; emin = min(d__1,d__2); /* L30: */ } } else { i__1 = *n0 - 3 << 2; for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { z__[j4 - 3] = d__ + z__[j4]; if (d__ < 0.) { return 0; } else { z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; } *dmin__ = min(*dmin__,d__); /* Computing MIN */ d__1 = emin, d__2 = z__[j4 - 1]; emin = min(d__1,d__2); /* L40: */ } } /* Unroll last two steps. */ *dnm2 = d__; *dmin2 = *dmin__; j4 = (*n0 - 2 << 2) - *pp; j4p2 = j4 + (*pp << 1) - 1; z__[j4 - 2] = *dnm2 + z__[j4p2]; if (*dnm2 < 0.) { return 0; } else { z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; } *dmin__ = min(*dmin__,*dnm1); *dmin1 = *dmin__; j4 += 4; j4p2 = j4 + (*pp << 1) - 1; z__[j4 - 2] = *dnm1 + z__[j4p2]; if (*dnm1 < 0.) { return 0; } else { z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; } *dmin__ = min(*dmin__,*dn); } z__[j4 + 2] = *dn; z__[(*n0 << 2) - *pp] = emin; return 0; /* End of DLASQ5 */ } /* _starpu_dlasq5_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasq6.c000066400000000000000000000132011507764646700205630ustar00rootroot00000000000000/* dlasq6.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlasq6_(integer *i0, integer *n0, doublereal *z__, integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Local variables */ doublereal d__; integer j4, j4p2; doublereal emin, temp; extern doublereal _starpu_dlamch_(char *); doublereal safmin; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ /* -- Berkeley -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASQ6 computes one dqd (shift equal to zero) transform in */ /* ping-pong form, with protection against underflow and overflow. */ /* Arguments */ /* ========= */ /* I0 (input) INTEGER */ /* First index. */ /* N0 (input) INTEGER */ /* Last index. */ /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ /* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ /* an extra argument. */ /* PP (input) INTEGER */ /* PP=0 for ping, PP=1 for pong. */ /* DMIN (output) DOUBLE PRECISION */ /* Minimum value of d. */ /* DMIN1 (output) DOUBLE PRECISION */ /* Minimum value of d, excluding D( N0 ). */ /* DMIN2 (output) DOUBLE PRECISION */ /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ /* DN (output) DOUBLE PRECISION */ /* d(N0), the last value of d. */ /* DNM1 (output) DOUBLE PRECISION */ /* d(N0-1). */ /* DNM2 (output) DOUBLE PRECISION */ /* d(N0-2). */ /* ===================================================================== */ /* .. Parameter .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Function .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --z__; /* Function Body */ if (*n0 - *i0 - 1 <= 0) { return 0; } safmin = _starpu_dlamch_("Safe minimum"); j4 = (*i0 << 2) + *pp - 3; emin = z__[j4 + 4]; d__ = z__[j4]; *dmin__ = d__; if (*pp == 0) { i__1 = *n0 - 3 << 2; for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { z__[j4 - 2] = d__ + z__[j4 - 1]; if (z__[j4 - 2] == 0.) { z__[j4] = 0.; d__ = z__[j4 + 1]; *dmin__ = d__; emin = 0.; } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 - 2] < z__[j4 + 1]) { temp = z__[j4 + 1] / z__[j4 - 2]; z__[j4] = z__[j4 - 1] * temp; d__ *= temp; } else { z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); } *dmin__ = min(*dmin__,d__); /* Computing MIN */ d__1 = emin, d__2 = z__[j4]; emin = min(d__1,d__2); /* L10: */ } } else { i__1 = *n0 - 3 << 2; for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { z__[j4 - 3] = d__ + z__[j4]; if (z__[j4 - 3] == 0.) { z__[j4 - 1] = 0.; d__ = z__[j4 + 2]; *dmin__ = d__; emin = 0.; } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 - 3] < z__[j4 + 2]) { temp = z__[j4 + 2] / z__[j4 - 3]; z__[j4 - 1] = z__[j4] * temp; d__ *= temp; } else { z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); } *dmin__ = min(*dmin__,d__); /* Computing MIN */ d__1 = emin, d__2 = z__[j4 - 1]; emin = min(d__1,d__2); /* L20: */ } } /* Unroll last two steps. */ *dnm2 = d__; *dmin2 = *dmin__; j4 = (*n0 - 2 << 2) - *pp; j4p2 = j4 + (*pp << 1) - 1; z__[j4 - 2] = *dnm2 + z__[j4p2]; if (z__[j4 - 2] == 0.) { z__[j4] = 0.; *dnm1 = z__[j4p2 + 2]; *dmin__ = *dnm1; emin = 0.; } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < z__[j4p2 + 2]) { temp = z__[j4p2 + 2] / z__[j4 - 2]; z__[j4] = z__[j4p2] * temp; *dnm1 = *dnm2 * temp; } else { z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); } *dmin__ = min(*dmin__,*dnm1); *dmin1 = *dmin__; j4 += 4; j4p2 = j4 + (*pp << 1) - 1; z__[j4 - 2] = *dnm1 + z__[j4p2]; if (z__[j4 - 2] == 0.) { z__[j4] = 0.; *dn = z__[j4p2 + 2]; *dmin__ = *dn; emin = 0.; } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < z__[j4p2 + 2]) { temp = z__[j4p2 + 2] / z__[j4 - 2]; z__[j4] = z__[j4p2] * temp; *dn = *dnm1 * temp; } else { z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); } *dmin__ = min(*dmin__,*dn); z__[j4 + 2] = *dn; z__[(*n0 << 2) - *pp] = emin; return 0; /* End of DLASQ6 */ } /* _starpu_dlasq6_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasr.c000066400000000000000000000322431507764646700205050ustar00rootroot00000000000000/* dlasr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlasr_(char *side, char *pivot, char *direct, integer *m, integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, info; doublereal temp; extern logical _starpu_lsame_(char *, char *); doublereal ctemp, stemp; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASR applies a sequence of plane rotations to a real matrix A, */ /* from either the left or the right. */ /* When SIDE = 'L', the transformation takes the form */ /* A := P*A */ /* and when SIDE = 'R', the transformation takes the form */ /* A := A*P**T */ /* where P is an orthogonal matrix consisting of a sequence of z plane */ /* rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', */ /* and P**T is the transpose of P. */ /* When DIRECT = 'F' (Forward sequence), then */ /* P = P(z-1) * ... * P(2) * P(1) */ /* and when DIRECT = 'B' (Backward sequence), then */ /* P = P(1) * P(2) * ... * P(z-1) */ /* where P(k) is a plane rotation matrix defined by the 2-by-2 rotation */ /* R(k) = ( c(k) s(k) ) */ /* = ( -s(k) c(k) ). */ /* When PIVOT = 'V' (Variable pivot), the rotation is performed */ /* for the plane (k,k+1), i.e., P(k) has the form */ /* P(k) = ( 1 ) */ /* ( ... ) */ /* ( 1 ) */ /* ( c(k) s(k) ) */ /* ( -s(k) c(k) ) */ /* ( 1 ) */ /* ( ... ) */ /* ( 1 ) */ /* where R(k) appears as a rank-2 modification to the identity matrix in */ /* rows and columns k and k+1. */ /* When PIVOT = 'T' (Top pivot), the rotation is performed for the */ /* plane (1,k+1), so P(k) has the form */ /* P(k) = ( c(k) s(k) ) */ /* ( 1 ) */ /* ( ... ) */ /* ( 1 ) */ /* ( -s(k) c(k) ) */ /* ( 1 ) */ /* ( ... ) */ /* ( 1 ) */ /* where R(k) appears in rows and columns 1 and k+1. */ /* Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is */ /* performed for the plane (k,z), giving P(k) the form */ /* P(k) = ( 1 ) */ /* ( ... ) */ /* ( 1 ) */ /* ( c(k) s(k) ) */ /* ( 1 ) */ /* ( ... ) */ /* ( 1 ) */ /* ( -s(k) c(k) ) */ /* where R(k) appears in rows and columns k and z. The rotations are */ /* performed without ever forming P(k) explicitly. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* Specifies whether the plane rotation matrix P is applied to */ /* A on the left or the right. */ /* = 'L': Left, compute A := P*A */ /* = 'R': Right, compute A:= A*P**T */ /* PIVOT (input) CHARACTER*1 */ /* Specifies the plane for which P(k) is a plane rotation */ /* matrix. */ /* = 'V': Variable pivot, the plane (k,k+1) */ /* = 'T': Top pivot, the plane (1,k+1) */ /* = 'B': Bottom pivot, the plane (k,z) */ /* DIRECT (input) CHARACTER*1 */ /* Specifies whether P is a forward or backward sequence of */ /* plane rotations. */ /* = 'F': Forward, P = P(z-1)*...*P(2)*P(1) */ /* = 'B': Backward, P = P(1)*P(2)*...*P(z-1) */ /* M (input) INTEGER */ /* The number of rows of the matrix A. If m <= 1, an immediate */ /* return is effected. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. If n <= 1, an */ /* immediate return is effected. */ /* C (input) DOUBLE PRECISION array, dimension */ /* (M-1) if SIDE = 'L' */ /* (N-1) if SIDE = 'R' */ /* The cosines c(k) of the plane rotations. */ /* S (input) DOUBLE PRECISION array, dimension */ /* (M-1) if SIDE = 'L' */ /* (N-1) if SIDE = 'R' */ /* The sines s(k) of the plane rotations. The 2-by-2 plane */ /* rotation part of the matrix P(k), R(k), has the form */ /* R(k) = ( c(k) s(k) ) */ /* ( -s(k) c(k) ). */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* The M-by-N matrix A. On exit, A is overwritten by P*A if */ /* SIDE = 'R' or by A*P**T if SIDE = 'L'. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ --c__; --s; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ info = 0; if (! (_starpu_lsame_(side, "L") || _starpu_lsame_(side, "R"))) { info = 1; } else if (! (_starpu_lsame_(pivot, "V") || _starpu_lsame_(pivot, "T") || _starpu_lsame_(pivot, "B"))) { info = 2; } else if (! (_starpu_lsame_(direct, "F") || _starpu_lsame_(direct, "B"))) { info = 3; } else if (*m < 0) { info = 4; } else if (*n < 0) { info = 5; } else if (*lda < max(1,*m)) { info = 9; } if (info != 0) { _starpu_xerbla_("DLASR ", &info); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } if (_starpu_lsame_(side, "L")) { /* Form P * A */ if (_starpu_lsame_(pivot, "V")) { if (_starpu_lsame_(direct, "F")) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { ctemp = c__[j]; stemp = s[j]; if (ctemp != 1. || stemp != 0.) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { temp = a[j + 1 + i__ * a_dim1]; a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * a[j + i__ * a_dim1]; a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + i__ * a_dim1]; /* L10: */ } } /* L20: */ } } else if (_starpu_lsame_(direct, "B")) { for (j = *m - 1; j >= 1; --j) { ctemp = c__[j]; stemp = s[j]; if (ctemp != 1. || stemp != 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { temp = a[j + 1 + i__ * a_dim1]; a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * a[j + i__ * a_dim1]; a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + i__ * a_dim1]; /* L30: */ } } /* L40: */ } } } else if (_starpu_lsame_(pivot, "T")) { if (_starpu_lsame_(direct, "F")) { i__1 = *m; for (j = 2; j <= i__1; ++j) { ctemp = c__[j - 1]; stemp = s[j - 1]; if (ctemp != 1. || stemp != 0.) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { temp = a[j + i__ * a_dim1]; a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ i__ * a_dim1 + 1]; a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ i__ * a_dim1 + 1]; /* L50: */ } } /* L60: */ } } else if (_starpu_lsame_(direct, "B")) { for (j = *m; j >= 2; --j) { ctemp = c__[j - 1]; stemp = s[j - 1]; if (ctemp != 1. || stemp != 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { temp = a[j + i__ * a_dim1]; a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ i__ * a_dim1 + 1]; a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ i__ * a_dim1 + 1]; /* L70: */ } } /* L80: */ } } } else if (_starpu_lsame_(pivot, "B")) { if (_starpu_lsame_(direct, "F")) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { ctemp = c__[j]; stemp = s[j]; if (ctemp != 1. || stemp != 0.) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { temp = a[j + i__ * a_dim1]; a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + ctemp * temp; a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * a_dim1] - stemp * temp; /* L90: */ } } /* L100: */ } } else if (_starpu_lsame_(direct, "B")) { for (j = *m - 1; j >= 1; --j) { ctemp = c__[j]; stemp = s[j]; if (ctemp != 1. || stemp != 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { temp = a[j + i__ * a_dim1]; a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + ctemp * temp; a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * a_dim1] - stemp * temp; /* L110: */ } } /* L120: */ } } } } else if (_starpu_lsame_(side, "R")) { /* Form A * P' */ if (_starpu_lsame_(pivot, "V")) { if (_starpu_lsame_(direct, "F")) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { ctemp = c__[j]; stemp = s[j]; if (ctemp != 1. || stemp != 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = a[i__ + (j + 1) * a_dim1]; a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * a[i__ + j * a_dim1]; a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ i__ + j * a_dim1]; /* L130: */ } } /* L140: */ } } else if (_starpu_lsame_(direct, "B")) { for (j = *n - 1; j >= 1; --j) { ctemp = c__[j]; stemp = s[j]; if (ctemp != 1. || stemp != 0.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { temp = a[i__ + (j + 1) * a_dim1]; a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * a[i__ + j * a_dim1]; a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ i__ + j * a_dim1]; /* L150: */ } } /* L160: */ } } } else if (_starpu_lsame_(pivot, "T")) { if (_starpu_lsame_(direct, "F")) { i__1 = *n; for (j = 2; j <= i__1; ++j) { ctemp = c__[j - 1]; stemp = s[j - 1]; if (ctemp != 1. || stemp != 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = a[i__ + j * a_dim1]; a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ i__ + a_dim1]; a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + a_dim1]; /* L170: */ } } /* L180: */ } } else if (_starpu_lsame_(direct, "B")) { for (j = *n; j >= 2; --j) { ctemp = c__[j - 1]; stemp = s[j - 1]; if (ctemp != 1. || stemp != 0.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { temp = a[i__ + j * a_dim1]; a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ i__ + a_dim1]; a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + a_dim1]; /* L190: */ } } /* L200: */ } } } else if (_starpu_lsame_(pivot, "B")) { if (_starpu_lsame_(direct, "F")) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { ctemp = c__[j]; stemp = s[j]; if (ctemp != 1. || stemp != 0.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = a[i__ + j * a_dim1]; a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + ctemp * temp; a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * a_dim1] - stemp * temp; /* L210: */ } } /* L220: */ } } else if (_starpu_lsame_(direct, "B")) { for (j = *n - 1; j >= 1; --j) { ctemp = c__[j]; stemp = s[j]; if (ctemp != 1. || stemp != 0.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { temp = a[i__ + j * a_dim1]; a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + ctemp * temp; a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * a_dim1] - stemp * temp; /* L230: */ } } /* L240: */ } } } } return 0; /* End of DLASR */ } /* _starpu_dlasr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasrt.c000066400000000000000000000135471507764646700206770ustar00rootroot00000000000000/* dlasrt.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlasrt_(char *id, integer *n, doublereal *d__, integer * info) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, j; doublereal d1, d2, d3; integer dir; doublereal tmp; integer endd; extern logical _starpu_lsame_(char *, char *); integer stack[64] /* was [2][32] */; doublereal dmnmx; integer start; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer stkpnt; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Sort the numbers in D in increasing order (if ID = 'I') or */ /* in decreasing order (if ID = 'D' ). */ /* Use Quick Sort, reverting to Insertion sort on arrays of */ /* size <= 20. Dimension of STACK limits N to about 2**32. */ /* Arguments */ /* ========= */ /* ID (input) CHARACTER*1 */ /* = 'I': sort D in increasing order; */ /* = 'D': sort D in decreasing order. */ /* N (input) INTEGER */ /* The length of the array D. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the array to be sorted. */ /* On exit, D has been sorted into increasing order */ /* (D(1) <= ... <= D(N) ) or into decreasing order */ /* (D(1) >= ... >= D(N) ), depending on ID. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input paramters. */ /* Parameter adjustments */ --d__; /* Function Body */ *info = 0; dir = -1; if (_starpu_lsame_(id, "D")) { dir = 0; } else if (_starpu_lsame_(id, "I")) { dir = 1; } if (dir == -1) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLASRT", &i__1); return 0; } /* Quick return if possible */ if (*n <= 1) { return 0; } stkpnt = 1; stack[0] = 1; stack[1] = *n; L10: start = stack[(stkpnt << 1) - 2]; endd = stack[(stkpnt << 1) - 1]; --stkpnt; if (endd - start <= 20 && endd - start > 0) { /* Do Insertion sort on D( START:ENDD ) */ if (dir == 0) { /* Sort into decreasing order */ i__1 = endd; for (i__ = start + 1; i__ <= i__1; ++i__) { i__2 = start + 1; for (j = i__; j >= i__2; --j) { if (d__[j] > d__[j - 1]) { dmnmx = d__[j]; d__[j] = d__[j - 1]; d__[j - 1] = dmnmx; } else { goto L30; } /* L20: */ } L30: ; } } else { /* Sort into increasing order */ i__1 = endd; for (i__ = start + 1; i__ <= i__1; ++i__) { i__2 = start + 1; for (j = i__; j >= i__2; --j) { if (d__[j] < d__[j - 1]) { dmnmx = d__[j]; d__[j] = d__[j - 1]; d__[j - 1] = dmnmx; } else { goto L50; } /* L40: */ } L50: ; } } } else if (endd - start > 20) { /* Partition D( START:ENDD ) and stack parts, largest one first */ /* Choose partition entry as median of 3 */ d1 = d__[start]; d2 = d__[endd]; i__ = (start + endd) / 2; d3 = d__[i__]; if (d1 < d2) { if (d3 < d1) { dmnmx = d1; } else if (d3 < d2) { dmnmx = d3; } else { dmnmx = d2; } } else { if (d3 < d2) { dmnmx = d2; } else if (d3 < d1) { dmnmx = d3; } else { dmnmx = d1; } } if (dir == 0) { /* Sort into decreasing order */ i__ = start - 1; j = endd + 1; L60: L70: --j; if (d__[j] < dmnmx) { goto L70; } L80: ++i__; if (d__[i__] > dmnmx) { goto L80; } if (i__ < j) { tmp = d__[i__]; d__[i__] = d__[j]; d__[j] = tmp; goto L60; } if (j - start > endd - j - 1) { ++stkpnt; stack[(stkpnt << 1) - 2] = start; stack[(stkpnt << 1) - 1] = j; ++stkpnt; stack[(stkpnt << 1) - 2] = j + 1; stack[(stkpnt << 1) - 1] = endd; } else { ++stkpnt; stack[(stkpnt << 1) - 2] = j + 1; stack[(stkpnt << 1) - 1] = endd; ++stkpnt; stack[(stkpnt << 1) - 2] = start; stack[(stkpnt << 1) - 1] = j; } } else { /* Sort into increasing order */ i__ = start - 1; j = endd + 1; L90: L100: --j; if (d__[j] > dmnmx) { goto L100; } L110: ++i__; if (d__[i__] < dmnmx) { goto L110; } if (i__ < j) { tmp = d__[i__]; d__[i__] = d__[j]; d__[j] = tmp; goto L90; } if (j - start > endd - j - 1) { ++stkpnt; stack[(stkpnt << 1) - 2] = start; stack[(stkpnt << 1) - 1] = j; ++stkpnt; stack[(stkpnt << 1) - 2] = j + 1; stack[(stkpnt << 1) - 1] = endd; } else { ++stkpnt; stack[(stkpnt << 1) - 2] = j + 1; stack[(stkpnt << 1) - 1] = endd; ++stkpnt; stack[(stkpnt << 1) - 2] = start; stack[(stkpnt << 1) - 1] = j; } } } if (stkpnt > 0) { goto L10; } return 0; /* End of DLASRT */ } /* _starpu_dlasrt_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlassq.c000066400000000000000000000064141507764646700206700ustar00rootroot00000000000000/* dlassq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlassq_(integer *n, doublereal *x, integer *incx, doublereal *scale, doublereal *sumsq) { /* System generated locals */ integer i__1, i__2; doublereal d__1; /* Local variables */ integer ix; doublereal absxi; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASSQ returns the values scl and smsq such that */ /* ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */ /* where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is */ /* assumed to be non-negative and scl returns the value */ /* scl = max( scale, abs( x( i ) ) ). */ /* scale and sumsq must be supplied in SCALE and SUMSQ and */ /* scl and smsq are overwritten on SCALE and SUMSQ respectively. */ /* The routine makes only one pass through the vector x. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of elements to be used from the vector X. */ /* X (input) DOUBLE PRECISION array, dimension (N) */ /* The vector for which a scaled sum of squares is computed. */ /* x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */ /* INCX (input) INTEGER */ /* The increment between successive values of the vector X. */ /* INCX > 0. */ /* SCALE (input/output) DOUBLE PRECISION */ /* On entry, the value scale in the equation above. */ /* On exit, SCALE is overwritten with scl , the scaling factor */ /* for the sum of squares. */ /* SUMSQ (input/output) DOUBLE PRECISION */ /* On entry, the value sumsq in the equation above. */ /* On exit, SUMSQ is overwritten with smsq , the basic sum of */ /* squares from which scl has been factored out. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --x; /* Function Body */ if (*n > 0) { i__1 = (*n - 1) * *incx + 1; i__2 = *incx; for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { if (x[ix] != 0.) { absxi = (d__1 = x[ix], abs(d__1)); if (*scale < absxi) { /* Computing 2nd power */ d__1 = *scale / absxi; *sumsq = *sumsq * (d__1 * d__1) + 1; *scale = absxi; } else { /* Computing 2nd power */ d__1 = absxi / *scale; *sumsq += d__1 * d__1; } } /* L10: */ } } return 0; /* End of DLASSQ */ } /* _starpu_dlassq_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasv2.c000066400000000000000000000151231507764646700205710ustar00rootroot00000000000000/* dlasv2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b3 = 2.; static doublereal c_b4 = 1.; /* Subroutine */ int _starpu_dlasv2_(doublereal *f, doublereal *g, doublereal *h__, doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * csr, doublereal *snl, doublereal *csl) { /* System generated locals */ doublereal d__1; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt, crt, slt, srt; integer pmax; doublereal temp; logical swap; doublereal tsign; extern doublereal _starpu_dlamch_(char *); logical gasmal; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASV2 computes the singular value decomposition of a 2-by-2 */ /* triangular matrix */ /* [ F G ] */ /* [ 0 H ]. */ /* On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the */ /* smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and */ /* right singular vectors for abs(SSMAX), giving the decomposition */ /* [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] */ /* [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. */ /* Arguments */ /* ========= */ /* F (input) DOUBLE PRECISION */ /* The (1,1) element of the 2-by-2 matrix. */ /* G (input) DOUBLE PRECISION */ /* The (1,2) element of the 2-by-2 matrix. */ /* H (input) DOUBLE PRECISION */ /* The (2,2) element of the 2-by-2 matrix. */ /* SSMIN (output) DOUBLE PRECISION */ /* abs(SSMIN) is the smaller singular value. */ /* SSMAX (output) DOUBLE PRECISION */ /* abs(SSMAX) is the larger singular value. */ /* SNL (output) DOUBLE PRECISION */ /* CSL (output) DOUBLE PRECISION */ /* The vector (CSL, SNL) is a unit left singular vector for the */ /* singular value abs(SSMAX). */ /* SNR (output) DOUBLE PRECISION */ /* CSR (output) DOUBLE PRECISION */ /* The vector (CSR, SNR) is a unit right singular vector for the */ /* singular value abs(SSMAX). */ /* Further Details */ /* =============== */ /* Any input parameter may be aliased with any output parameter. */ /* Barring over/underflow and assuming a guard digit in subtraction, all */ /* output quantities are correct to within a few units in the last */ /* place (ulps). */ /* In IEEE arithmetic, the code works correctly if one matrix element is */ /* infinite. */ /* Overflow will not occur unless the largest singular value itself */ /* overflows or is within a few ulps of overflow. (On machines with */ /* partial overflow, like the Cray, overflow may occur if the largest */ /* singular value is within a factor of 2 of overflow.) */ /* Underflow is harmless if underflow is gradual. Otherwise, results */ /* may correspond to a matrix modified by perturbations of size near */ /* the underflow threshold. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ ft = *f; fa = abs(ft); ht = *h__; ha = abs(*h__); /* PMAX points to the maximum absolute element of matrix */ /* PMAX = 1 if F largest in absolute values */ /* PMAX = 2 if G largest in absolute values */ /* PMAX = 3 if H largest in absolute values */ pmax = 1; swap = ha > fa; if (swap) { pmax = 3; temp = ft; ft = ht; ht = temp; temp = fa; fa = ha; ha = temp; /* Now FA .ge. HA */ } gt = *g; ga = abs(gt); if (ga == 0.) { /* Diagonal matrix */ *ssmin = ha; *ssmax = fa; clt = 1.; crt = 1.; slt = 0.; srt = 0.; } else { gasmal = TRUE_; if (ga > fa) { pmax = 2; if (fa / ga < _starpu_dlamch_("EPS")) { /* Case of very large GA */ gasmal = FALSE_; *ssmax = ga; if (ha > 1.) { *ssmin = fa / (ga / ha); } else { *ssmin = fa / ga * ha; } clt = 1.; slt = ht / gt; srt = 1.; crt = ft / gt; } } if (gasmal) { /* Normal case */ d__ = fa - ha; if (d__ == fa) { /* Copes with infinite F or H */ l = 1.; } else { l = d__ / fa; } /* Note that 0 .le. L .le. 1 */ m = gt / ft; /* Note that abs(M) .le. 1/macheps */ t = 2. - l; /* Note that T .ge. 1 */ mm = m * m; tt = t * t; s = sqrt(tt + mm); /* Note that 1 .le. S .le. 1 + 1/macheps */ if (l == 0.) { r__ = abs(m); } else { r__ = sqrt(l * l + mm); } /* Note that 0 .le. R .le. 1 + 1/macheps */ a = (s + r__) * .5; /* Note that 1 .le. A .le. 1 + abs(M) */ *ssmin = ha / a; *ssmax = fa * a; if (mm == 0.) { /* Note that M is very tiny */ if (l == 0.) { t = d_sign(&c_b3, &ft) * d_sign(&c_b4, >); } else { t = gt / d_sign(&d__, &ft) + m / t; } } else { t = (m / (s + t) + m / (r__ + l)) * (a + 1.); } l = sqrt(t * t + 4.); crt = 2. / l; srt = t / l; clt = (crt + srt * m) / a; slt = ht / ft * srt / a; } } if (swap) { *csl = srt; *snl = crt; *csr = slt; *snr = clt; } else { *csl = clt; *snl = slt; *csr = crt; *snr = srt; } /* Correct signs of SSMAX and SSMIN */ if (pmax == 1) { tsign = d_sign(&c_b4, csr) * d_sign(&c_b4, csl) * d_sign(&c_b4, f); } if (pmax == 2) { tsign = d_sign(&c_b4, snr) * d_sign(&c_b4, csl) * d_sign(&c_b4, g); } if (pmax == 3) { tsign = d_sign(&c_b4, snr) * d_sign(&c_b4, snl) * d_sign(&c_b4, h__); } *ssmax = d_sign(ssmax, &tsign); d__1 = tsign * d_sign(&c_b4, f) * d_sign(&c_b4, h__); *ssmin = d_sign(ssmin, &d__1); return 0; /* End of DLASV2 */ } /* _starpu_dlasv2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlaswp.c000066400000000000000000000077251507764646700207010ustar00rootroot00000000000000/* dlaswp.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlaswp_(integer *n, doublereal *a, integer *lda, integer *k1, integer *k2, integer *ipiv, integer *incx) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; doublereal temp; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASWP performs a series of row interchanges on the matrix A. */ /* One row interchange is initiated for each of rows K1 through K2 of A. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the matrix of column dimension N to which the row */ /* interchanges will be applied. */ /* On exit, the permuted matrix. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* K1 (input) INTEGER */ /* The first element of IPIV for which a row interchange will */ /* be done. */ /* K2 (input) INTEGER */ /* The last element of IPIV for which a row interchange will */ /* be done. */ /* IPIV (input) INTEGER array, dimension (K2*abs(INCX)) */ /* The vector of pivot indices. Only the elements in positions */ /* K1 through K2 of IPIV are accessed. */ /* IPIV(K) = L implies rows K and L are to be interchanged. */ /* INCX (input) INTEGER */ /* The increment between successive values of IPIV. If IPIV */ /* is negative, the pivots are applied in reverse order. */ /* Further Details */ /* =============== */ /* Modified by */ /* R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Interchange row I with row IPIV(I) for each of rows K1 through K2. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ if (*incx > 0) { ix0 = *k1; i1 = *k1; i2 = *k2; inc = 1; } else if (*incx < 0) { ix0 = (1 - *k2) * *incx + 1; i1 = *k2; i2 = *k1; inc = -1; } else { return 0; } n32 = *n / 32 << 5; if (n32 != 0) { i__1 = n32; for (j = 1; j <= i__1; j += 32) { ix = ix0; i__2 = i2; i__3 = inc; for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) { ip = ipiv[ix]; if (ip != i__) { i__4 = j + 31; for (k = j; k <= i__4; ++k) { temp = a[i__ + k * a_dim1]; a[i__ + k * a_dim1] = a[ip + k * a_dim1]; a[ip + k * a_dim1] = temp; /* L10: */ } } ix += *incx; /* L20: */ } /* L30: */ } } if (n32 != *n) { ++n32; ix = ix0; i__1 = i2; i__3 = inc; for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { ip = ipiv[ix]; if (ip != i__) { i__2 = *n; for (k = n32; k <= i__2; ++k) { temp = a[i__ + k * a_dim1]; a[i__ + k * a_dim1] = a[ip + k * a_dim1]; a[ip + k * a_dim1] = temp; /* L40: */ } } ix += *incx; /* L50: */ } } return 0; /* End of DLASWP */ } /* _starpu_dlaswp_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasy2.c000066400000000000000000000342021507764646700205730ustar00rootroot00000000000000/* dlasy2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__4 = 4; static integer c__1 = 1; static integer c__16 = 16; static integer c__0 = 0; /* Subroutine */ int _starpu_dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, integer *n1, integer *n2, doublereal *tl, integer *ldtl, doublereal * tr, integer *ldtr, doublereal *b, integer *ldb, doublereal *scale, doublereal *x, integer *ldx, doublereal *xnorm, integer *info) { /* Initialized data */ static integer locu12[4] = { 3,4,1,2 }; static integer locl21[4] = { 2,1,4,3 }; static integer locu22[4] = { 4,3,2,1 }; static logical xswpiv[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; static logical bswpiv[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; /* System generated locals */ integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1, x_offset; doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8; /* Local variables */ integer i__, j, k; doublereal x2[2], l21, u11, u12; integer ip, jp; doublereal u22, t16[16] /* was [4][4] */, gam, bet, eps, sgn, tmp[4], tau1, btmp[4], smin; integer ipiv; doublereal temp; integer jpiv[4]; doublereal xmax; integer ipsv, jpsv; logical bswap; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical xswap; extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); doublereal smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in */ /* op(TL)*X + ISGN*X*op(TR) = SCALE*B, */ /* where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or */ /* -1. op(T) = T or T', where T' denotes the transpose of T. */ /* Arguments */ /* ========= */ /* LTRANL (input) LOGICAL */ /* On entry, LTRANL specifies the op(TL): */ /* = .FALSE., op(TL) = TL, */ /* = .TRUE., op(TL) = TL'. */ /* LTRANR (input) LOGICAL */ /* On entry, LTRANR specifies the op(TR): */ /* = .FALSE., op(TR) = TR, */ /* = .TRUE., op(TR) = TR'. */ /* ISGN (input) INTEGER */ /* On entry, ISGN specifies the sign of the equation */ /* as described before. ISGN may only be 1 or -1. */ /* N1 (input) INTEGER */ /* On entry, N1 specifies the order of matrix TL. */ /* N1 may only be 0, 1 or 2. */ /* N2 (input) INTEGER */ /* On entry, N2 specifies the order of matrix TR. */ /* N2 may only be 0, 1 or 2. */ /* TL (input) DOUBLE PRECISION array, dimension (LDTL,2) */ /* On entry, TL contains an N1 by N1 matrix. */ /* LDTL (input) INTEGER */ /* The leading dimension of the matrix TL. LDTL >= max(1,N1). */ /* TR (input) DOUBLE PRECISION array, dimension (LDTR,2) */ /* On entry, TR contains an N2 by N2 matrix. */ /* LDTR (input) INTEGER */ /* The leading dimension of the matrix TR. LDTR >= max(1,N2). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,2) */ /* On entry, the N1 by N2 matrix B contains the right-hand */ /* side of the equation. */ /* LDB (input) INTEGER */ /* The leading dimension of the matrix B. LDB >= max(1,N1). */ /* SCALE (output) DOUBLE PRECISION */ /* On exit, SCALE contains the scale factor. SCALE is chosen */ /* less than or equal to 1 to prevent the solution overflowing. */ /* X (output) DOUBLE PRECISION array, dimension (LDX,2) */ /* On exit, X contains the N1 by N2 solution. */ /* LDX (input) INTEGER */ /* The leading dimension of the matrix X. LDX >= max(1,N1). */ /* XNORM (output) DOUBLE PRECISION */ /* On exit, XNORM is the infinity-norm of the solution. */ /* INFO (output) INTEGER */ /* On exit, INFO is set to */ /* 0: successful exit. */ /* 1: TL and TR have too close eigenvalues, so TL or */ /* TR is perturbed to get a nonsingular equation. */ /* NOTE: In the interests of speed, this routine does not */ /* check the inputs for errors. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Data statements .. */ /* Parameter adjustments */ tl_dim1 = *ldtl; tl_offset = 1 + tl_dim1; tl -= tl_offset; tr_dim1 = *ldtr; tr_offset = 1 + tr_dim1; tr -= tr_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; /* Function Body */ /* .. */ /* .. Executable Statements .. */ /* Do not check the input parameters for errors */ *info = 0; /* Quick return if possible */ if (*n1 == 0 || *n2 == 0) { return 0; } /* Set constants to control overflow */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S") / eps; sgn = (doublereal) (*isgn); k = *n1 + *n1 + *n2 - 2; switch (k) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L50; } /* 1 by 1: TL11*X + SGN*X*TR11 = B11 */ L10: tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; bet = abs(tau1); if (bet <= smlnum) { tau1 = smlnum; bet = smlnum; *info = 1; } *scale = 1.; gam = (d__1 = b[b_dim1 + 1], abs(d__1)); if (smlnum * gam > bet) { *scale = 1. / gam; } x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1; *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); return 0; /* 1 by 2: */ /* TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12] = [B11 B12] */ /* [TR21 TR22] */ L20: /* Computing MAX */ /* Computing MAX */ d__7 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__8 = (d__2 = tr[tr_dim1 + 1] , abs(d__2)), d__7 = max(d__7,d__8), d__8 = (d__3 = tr[(tr_dim1 << 1) + 1], abs(d__3)), d__7 = max(d__7,d__8), d__8 = (d__4 = tr[ tr_dim1 + 2], abs(d__4)), d__7 = max(d__7,d__8), d__8 = (d__5 = tr[(tr_dim1 << 1) + 2], abs(d__5)); d__6 = eps * max(d__7,d__8); smin = max(d__6,smlnum); tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; if (*ltranr) { tmp[1] = sgn * tr[tr_dim1 + 2]; tmp[2] = sgn * tr[(tr_dim1 << 1) + 1]; } else { tmp[1] = sgn * tr[(tr_dim1 << 1) + 1]; tmp[2] = sgn * tr[tr_dim1 + 2]; } btmp[0] = b[b_dim1 + 1]; btmp[1] = b[(b_dim1 << 1) + 1]; goto L40; /* 2 by 1: */ /* op[TL11 TL12]*[X11] + ISGN* [X11]*TR11 = [B11] */ /* [TL21 TL22] [X21] [X21] [B21] */ L30: /* Computing MAX */ /* Computing MAX */ d__7 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__8 = (d__2 = tl[tl_dim1 + 1] , abs(d__2)), d__7 = max(d__7,d__8), d__8 = (d__3 = tl[(tl_dim1 << 1) + 1], abs(d__3)), d__7 = max(d__7,d__8), d__8 = (d__4 = tl[ tl_dim1 + 2], abs(d__4)), d__7 = max(d__7,d__8), d__8 = (d__5 = tl[(tl_dim1 << 1) + 2], abs(d__5)); d__6 = eps * max(d__7,d__8); smin = max(d__6,smlnum); tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; if (*ltranl) { tmp[1] = tl[(tl_dim1 << 1) + 1]; tmp[2] = tl[tl_dim1 + 2]; } else { tmp[1] = tl[tl_dim1 + 2]; tmp[2] = tl[(tl_dim1 << 1) + 1]; } btmp[0] = b[b_dim1 + 1]; btmp[1] = b[b_dim1 + 2]; L40: /* Solve 2 by 2 system using complete pivoting. */ /* Set pivots less than SMIN to SMIN. */ ipiv = _starpu_idamax_(&c__4, tmp, &c__1); u11 = tmp[ipiv - 1]; if (abs(u11) <= smin) { *info = 1; u11 = smin; } u12 = tmp[locu12[ipiv - 1] - 1]; l21 = tmp[locl21[ipiv - 1] - 1] / u11; u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21; xswap = xswpiv[ipiv - 1]; bswap = bswpiv[ipiv - 1]; if (abs(u22) <= smin) { *info = 1; u22 = smin; } if (bswap) { temp = btmp[1]; btmp[1] = btmp[0] - l21 * temp; btmp[0] = temp; } else { btmp[1] -= l21 * btmp[0]; } *scale = 1.; if (smlnum * 2. * abs(btmp[1]) > abs(u22) || smlnum * 2. * abs(btmp[0]) > abs(u11)) { /* Computing MAX */ d__1 = abs(btmp[0]), d__2 = abs(btmp[1]); *scale = .5 / max(d__1,d__2); btmp[0] *= *scale; btmp[1] *= *scale; } x2[1] = btmp[1] / u22; x2[0] = btmp[0] / u11 - u12 / u11 * x2[1]; if (xswap) { temp = x2[1]; x2[1] = x2[0]; x2[0] = temp; } x[x_dim1 + 1] = x2[0]; if (*n1 == 1) { x[(x_dim1 << 1) + 1] = x2[1]; *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + 1], abs(d__2)); } else { x[x_dim1 + 2] = x2[1]; /* Computing MAX */ d__3 = (d__1 = x[x_dim1 + 1], abs(d__1)), d__4 = (d__2 = x[x_dim1 + 2] , abs(d__2)); *xnorm = max(d__3,d__4); } return 0; /* 2 by 2: */ /* op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] */ /* [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] */ /* Solve equivalent 4 by 4 system using complete pivoting. */ /* Set pivots less than SMIN to SMIN. */ L50: /* Computing MAX */ d__5 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__6 = (d__2 = tr[(tr_dim1 << 1) + 1], abs(d__2)), d__5 = max(d__5,d__6), d__6 = (d__3 = tr[ tr_dim1 + 2], abs(d__3)), d__5 = max(d__5,d__6), d__6 = (d__4 = tr[(tr_dim1 << 1) + 2], abs(d__4)); smin = max(d__5,d__6); /* Computing MAX */ d__5 = smin, d__6 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__5 = max(d__5, d__6), d__6 = (d__2 = tl[(tl_dim1 << 1) + 1], abs(d__2)), d__5 = max(d__5,d__6), d__6 = (d__3 = tl[tl_dim1 + 2], abs(d__3)), d__5 = max(d__5,d__6), d__6 = (d__4 = tl[(tl_dim1 << 1) + 2], abs(d__4)) ; smin = max(d__5,d__6); /* Computing MAX */ d__1 = eps * smin; smin = max(d__1,smlnum); btmp[0] = 0.; _starpu_dcopy_(&c__16, btmp, &c__0, t16, &c__1); t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2]; if (*ltranl) { t16[4] = tl[tl_dim1 + 2]; t16[1] = tl[(tl_dim1 << 1) + 1]; t16[14] = tl[tl_dim1 + 2]; t16[11] = tl[(tl_dim1 << 1) + 1]; } else { t16[4] = tl[(tl_dim1 << 1) + 1]; t16[1] = tl[tl_dim1 + 2]; t16[14] = tl[(tl_dim1 << 1) + 1]; t16[11] = tl[tl_dim1 + 2]; } if (*ltranr) { t16[8] = sgn * tr[(tr_dim1 << 1) + 1]; t16[13] = sgn * tr[(tr_dim1 << 1) + 1]; t16[2] = sgn * tr[tr_dim1 + 2]; t16[7] = sgn * tr[tr_dim1 + 2]; } else { t16[8] = sgn * tr[tr_dim1 + 2]; t16[13] = sgn * tr[tr_dim1 + 2]; t16[2] = sgn * tr[(tr_dim1 << 1) + 1]; t16[7] = sgn * tr[(tr_dim1 << 1) + 1]; } btmp[0] = b[b_dim1 + 1]; btmp[1] = b[b_dim1 + 2]; btmp[2] = b[(b_dim1 << 1) + 1]; btmp[3] = b[(b_dim1 << 1) + 2]; /* Perform elimination */ for (i__ = 1; i__ <= 3; ++i__) { xmax = 0.; for (ip = i__; ip <= 4; ++ip) { for (jp = i__; jp <= 4; ++jp) { if ((d__1 = t16[ip + (jp << 2) - 5], abs(d__1)) >= xmax) { xmax = (d__1 = t16[ip + (jp << 2) - 5], abs(d__1)); ipsv = ip; jpsv = jp; } /* L60: */ } /* L70: */ } if (ipsv != i__) { _starpu_dswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4); temp = btmp[i__ - 1]; btmp[i__ - 1] = btmp[ipsv - 1]; btmp[ipsv - 1] = temp; } if (jpsv != i__) { _starpu_dswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4], &c__1); } jpiv[i__ - 1] = jpsv; if ((d__1 = t16[i__ + (i__ << 2) - 5], abs(d__1)) < smin) { *info = 1; t16[i__ + (i__ << 2) - 5] = smin; } for (j = i__ + 1; j <= 4; ++j) { t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5]; btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1]; for (k = i__ + 1; k <= 4; ++k) { t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + ( k << 2) - 5]; /* L80: */ } /* L90: */ } /* L100: */ } if (abs(t16[15]) < smin) { t16[15] = smin; } *scale = 1.; if (smlnum * 8. * abs(btmp[0]) > abs(t16[0]) || smlnum * 8. * abs(btmp[1]) > abs(t16[5]) || smlnum * 8. * abs(btmp[2]) > abs(t16[10]) || smlnum * 8. * abs(btmp[3]) > abs(t16[15])) { /* Computing MAX */ d__1 = abs(btmp[0]), d__2 = abs(btmp[1]), d__1 = max(d__1,d__2), d__2 = abs(btmp[2]), d__1 = max(d__1,d__2), d__2 = abs(btmp[3]); *scale = .125 / max(d__1,d__2); btmp[0] *= *scale; btmp[1] *= *scale; btmp[2] *= *scale; btmp[3] *= *scale; } for (i__ = 1; i__ <= 4; ++i__) { k = 5 - i__; temp = 1. / t16[k + (k << 2) - 5]; tmp[k - 1] = btmp[k - 1] * temp; for (j = k + 1; j <= 4; ++j) { tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1]; /* L110: */ } /* L120: */ } for (i__ = 1; i__ <= 3; ++i__) { if (jpiv[4 - i__ - 1] != 4 - i__) { temp = tmp[4 - i__ - 1]; tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1]; tmp[jpiv[4 - i__ - 1] - 1] = temp; } /* L130: */ } x[x_dim1 + 1] = tmp[0]; x[x_dim1 + 2] = tmp[1]; x[(x_dim1 << 1) + 1] = tmp[2]; x[(x_dim1 << 1) + 2] = tmp[3]; /* Computing MAX */ d__1 = abs(tmp[0]) + abs(tmp[2]), d__2 = abs(tmp[1]) + abs(tmp[3]); *xnorm = max(d__1,d__2); return 0; /* End of DLASY2 */ } /* _starpu_dlasy2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlasyf.c000066400000000000000000000502531507764646700206630ustar00rootroot00000000000000/* dlasyf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = -1.; static doublereal c_b9 = 1.; /* Subroutine */ int _starpu_dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublereal *a, integer *lda, integer *ipiv, doublereal *w, integer * ldw, integer *info) { /* System generated locals */ integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer j, k; doublereal t, r1, d11, d21, d22; integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; doublereal alpha; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer kstep; doublereal absakk; extern integer _starpu_idamax_(integer *, doublereal *, integer *); doublereal colmax, rowmax; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASYF computes a partial factorization of a real symmetric matrix A */ /* using the Bunch-Kaufman diagonal pivoting method. The partial */ /* factorization has the form: */ /* A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: */ /* ( 0 U22 ) ( 0 D ) ( U12' U22' ) */ /* A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' */ /* ( L21 I ) ( 0 A22 ) ( 0 I ) */ /* where the order of D is at most NB. The actual order is returned in */ /* the argument KB, and is either NB or NB-1, or N if N <= NB. */ /* DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code */ /* (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or */ /* A22 (if UPLO = 'L'). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NB (input) INTEGER */ /* The maximum number of columns of the matrix A that should be */ /* factored. NB should be at least 2 to allow for 2-by-2 pivot */ /* blocks. */ /* KB (output) INTEGER */ /* The number of columns of A that were actually factored. */ /* KB is either NB-1 or NB, or N if N <= NB. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n-by-n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n-by-n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, A contains details of the partial factorization. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (output) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D. */ /* If UPLO = 'U', only the last KB elements of IPIV are set; */ /* if UPLO = 'L', only the first KB elements are set. */ /* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ /* interchanged and D(k,k) is a 1-by-1 diagonal block. */ /* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ /* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ /* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ /* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ /* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ /* W (workspace) DOUBLE PRECISION array, dimension (LDW,NB) */ /* LDW (input) INTEGER */ /* The leading dimension of the array W. LDW >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* > 0: if INFO = k, D(k,k) is exactly zero. The factorization */ /* has been completed, but the block diagonal matrix D is */ /* exactly singular. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; w_dim1 = *ldw; w_offset = 1 + w_dim1; w -= w_offset; /* Function Body */ *info = 0; /* Initialize ALPHA for use in choosing pivot block size. */ alpha = (sqrt(17.) + 1.) / 8.; if (_starpu_lsame_(uplo, "U")) { /* Factorize the trailing columns of A using the upper triangle */ /* of A and working backwards, and compute the matrix W = U12*D */ /* for use in updating A11 */ /* K is the main loop index, decreasing from N in steps of 1 or 2 */ /* KW is the column of W which corresponds to column K of A */ k = *n; L10: kw = *nb + k - *n; /* Exit from loop */ if (k <= *n - *nb + 1 && *nb < *n || k < 1) { goto L30; } /* Copy column K of A to column KW of W and update it */ _starpu_dcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); if (k < *n) { i__1 = *n - k; _starpu_dgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1], lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b9, &w[kw * w_dim1 + 1], &c__1); } kstep = 1; /* Determine rows and columns to be interchanged and whether */ /* a 1-by-1 or 2-by-2 pivot block will be used */ absakk = (d__1 = w[k + kw * w_dim1], abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in */ /* column K, and COLMAX is its absolute value */ if (k > 1) { i__1 = k - 1; imax = _starpu_idamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); colmax = (d__1 = w[imax + kw * w_dim1], abs(d__1)); } else { colmax = 0.; } if (max(absakk,colmax) == 0.) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* Copy column IMAX to column KW-1 of W and update it */ _starpu_dcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); i__1 = k - imax; _starpu_dcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); if (k < *n) { i__1 = *n - k; _starpu_dgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1], ldw, &c_b9, &w[(kw - 1) * w_dim1 + 1], &c__1); } /* JMAX is the column-index of the largest off-diagonal */ /* element in row IMAX, and ROWMAX is its absolute value */ i__1 = k - imax; jmax = imax + _starpu_idamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); rowmax = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1)); if (imax > 1) { i__1 = imax - 1; jmax = _starpu_idamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); /* Computing MAX */ d__2 = rowmax, d__3 = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1)); rowmax = max(d__2,d__3); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else if ((d__1 = w[imax + (kw - 1) * w_dim1], abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 */ /* pivot block */ kp = imax; /* copy column KW-1 of W to column KW */ _starpu_dcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); } else { /* interchange rows and columns K-1 and IMAX, use 2-by-2 */ /* pivot block */ kp = imax; kstep = 2; } } kk = k - kstep + 1; kkw = *nb + kk - *n; /* Updated column KP is already stored in column KKW of W */ if (kp != kk) { /* Copy non-updated column KK to column KP */ a[kp + k * a_dim1] = a[kk + k * a_dim1]; i__1 = k - 1 - kp; _starpu_dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); _starpu_dcopy_(&kp, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], & c__1); /* Interchange rows KK and KP in last KK columns of A and W */ i__1 = *n - kk + 1; _starpu_dswap_(&i__1, &a[kk + kk * a_dim1], lda, &a[kp + kk * a_dim1], lda); i__1 = *n - kk + 1; _starpu_dswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); } if (kstep == 1) { /* 1-by-1 pivot block D(k): column KW of W now holds */ /* W(k) = U(k)*D(k) */ /* where U(k) is the k-th column of U */ /* Store U(k) in column k of A */ _starpu_dcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], & c__1); r1 = 1. / a[k + k * a_dim1]; i__1 = k - 1; _starpu_dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); } else { /* 2-by-2 pivot block D(k): columns KW and KW-1 of W now */ /* hold */ /* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ /* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ /* of U */ if (k > 2) { /* Store U(k) and U(k-1) in columns k and k-1 of A */ d21 = w[k - 1 + kw * w_dim1]; d11 = w[k + kw * w_dim1] / d21; d22 = w[k - 1 + (kw - 1) * w_dim1] / d21; t = 1. / (d11 * d22 - 1.); d21 = t / d21; i__1 = k - 2; for (j = 1; j <= i__1; ++j) { a[j + (k - 1) * a_dim1] = d21 * (d11 * w[j + (kw - 1) * w_dim1] - w[j + kw * w_dim1]); a[j + k * a_dim1] = d21 * (d22 * w[j + kw * w_dim1] - w[j + (kw - 1) * w_dim1]); /* L20: */ } } /* Copy D(k) to A */ a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1]; a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1]; a[k + k * a_dim1] = w[k + kw * w_dim1]; } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k - 1] = -kp; } /* Decrease K and return to the start of the main loop */ k -= kstep; goto L10; L30: /* Update the upper triangle of A11 (= A(1:k,1:k)) as */ /* A11 := A11 - U12*D*U12' = A11 - U12*W' */ /* computing blocks of NB columns at a time */ i__1 = -(*nb); for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { /* Computing MIN */ i__2 = *nb, i__3 = k - j + 1; jb = min(i__2,i__3); /* Update the upper triangle of the diagonal block */ i__2 = j + jb - 1; for (jj = j; jj <= i__2; ++jj) { i__3 = jj - j + 1; i__4 = *n - k; _starpu_dgemv_("No transpose", &i__3, &i__4, &c_b8, &a[j + (k + 1) * a_dim1], lda, &w[jj + (kw + 1) * w_dim1], ldw, &c_b9, &a[j + jj * a_dim1], &c__1); /* L40: */ } /* Update the rectangular superdiagonal block */ i__2 = j - 1; i__3 = *n - k; _starpu_dgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &c_b8, &a[( k + 1) * a_dim1 + 1], lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b9, &a[j * a_dim1 + 1], lda); /* L50: */ } /* Put U12 in standard form by partially undoing the interchanges */ /* in columns k+1:n */ j = k + 1; L60: jj = j; jp = ipiv[j]; if (jp < 0) { jp = -jp; ++j; } ++j; if (jp != jj && j <= *n) { i__1 = *n - j + 1; _starpu_dswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); } if (j <= *n) { goto L60; } /* Set KB to the number of columns factorized */ *kb = *n - k; } else { /* Factorize the leading columns of A using the lower triangle */ /* of A and working forwards, and compute the matrix W = L21*D */ /* for use in updating A22 */ /* K is the main loop index, increasing from 1 in steps of 1 or 2 */ k = 1; L70: /* Exit from loop */ if (k >= *nb && *nb < *n || k > *n) { goto L90; } /* Copy column K of A to column K of W and update it */ i__1 = *n - k + 1; _starpu_dcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1); i__1 = *n - k + 1; i__2 = k - 1; _starpu_dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b9, &w[k + k * w_dim1], &c__1); kstep = 1; /* Determine rows and columns to be interchanged and whether */ /* a 1-by-1 or 2-by-2 pivot block will be used */ absakk = (d__1 = w[k + k * w_dim1], abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in */ /* column K, and COLMAX is its absolute value */ if (k < *n) { i__1 = *n - k; imax = k + _starpu_idamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); colmax = (d__1 = w[imax + k * w_dim1], abs(d__1)); } else { colmax = 0.; } if (max(absakk,colmax) == 0.) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* Copy column IMAX to column K+1 of W and update it */ i__1 = imax - k; _starpu_dcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); i__1 = *n - imax + 1; _starpu_dcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + 1) * w_dim1], &c__1); i__1 = *n - k + 1; i__2 = k - 1; _starpu_dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[imax + w_dim1], ldw, &c_b9, &w[k + (k + 1) * w_dim1], &c__1); /* JMAX is the column-index of the largest off-diagonal */ /* element in row IMAX, and ROWMAX is its absolute value */ i__1 = imax - k; jmax = k - 1 + _starpu_idamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1) ; rowmax = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1)); if (imax < *n) { i__1 = *n - imax; jmax = imax + _starpu_idamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); /* Computing MAX */ d__2 = rowmax, d__3 = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1)); rowmax = max(d__2,d__3); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else if ((d__1 = w[imax + (k + 1) * w_dim1], abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 */ /* pivot block */ kp = imax; /* copy column K+1 of W to column K */ i__1 = *n - k + 1; _starpu_dcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); } else { /* interchange rows and columns K+1 and IMAX, use 2-by-2 */ /* pivot block */ kp = imax; kstep = 2; } } kk = k + kstep - 1; /* Updated column KP is already stored in column KK of W */ if (kp != kk) { /* Copy non-updated column KK to column KP */ a[kp + k * a_dim1] = a[kk + k * a_dim1]; i__1 = kp - k - 1; _starpu_dcopy_(&i__1, &a[k + 1 + kk * a_dim1], &c__1, &a[kp + (k + 1) * a_dim1], lda); i__1 = *n - kp + 1; _starpu_dcopy_(&i__1, &a[kp + kk * a_dim1], &c__1, &a[kp + kp * a_dim1], &c__1); /* Interchange rows KK and KP in first KK columns of A and W */ _starpu_dswap_(&kk, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); _starpu_dswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); } if (kstep == 1) { /* 1-by-1 pivot block D(k): column k of W now holds */ /* W(k) = L(k)*D(k) */ /* where L(k) is the k-th column of L */ /* Store L(k) in column k of A */ i__1 = *n - k + 1; _starpu_dcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], & c__1); if (k < *n) { r1 = 1. / a[k + k * a_dim1]; i__1 = *n - k; _starpu_dscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); } } else { /* 2-by-2 pivot block D(k): columns k and k+1 of W now hold */ /* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */ /* where L(k) and L(k+1) are the k-th and (k+1)-th columns */ /* of L */ if (k < *n - 1) { /* Store L(k) and L(k+1) in columns k and k+1 of A */ d21 = w[k + 1 + k * w_dim1]; d11 = w[k + 1 + (k + 1) * w_dim1] / d21; d22 = w[k + k * w_dim1] / d21; t = 1. / (d11 * d22 - 1.); d21 = t / d21; i__1 = *n; for (j = k + 2; j <= i__1; ++j) { a[j + k * a_dim1] = d21 * (d11 * w[j + k * w_dim1] - w[j + (k + 1) * w_dim1]); a[j + (k + 1) * a_dim1] = d21 * (d22 * w[j + (k + 1) * w_dim1] - w[j + k * w_dim1]); /* L80: */ } } /* Copy D(k) to A */ a[k + k * a_dim1] = w[k + k * w_dim1]; a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1]; a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1]; } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k + 1] = -kp; } /* Increase K and return to the start of the main loop */ k += kstep; goto L70; L90: /* Update the lower triangle of A22 (= A(k:n,k:n)) as */ /* A22 := A22 - L21*D*L21' = A22 - L21*W' */ /* computing blocks of NB columns at a time */ i__1 = *n; i__2 = *nb; for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = *nb, i__4 = *n - j + 1; jb = min(i__3,i__4); /* Update the lower triangle of the diagonal block */ i__3 = j + jb - 1; for (jj = j; jj <= i__3; ++jj) { i__4 = j + jb - jj; i__5 = k - 1; _starpu_dgemv_("No transpose", &i__4, &i__5, &c_b8, &a[jj + a_dim1], lda, &w[jj + w_dim1], ldw, &c_b9, &a[jj + jj * a_dim1] , &c__1); /* L100: */ } /* Update the rectangular subdiagonal block */ if (j + jb <= *n) { i__3 = *n - j - jb + 1; i__4 = k - 1; _starpu_dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &c_b8, &a[j + jb + a_dim1], lda, &w[j + w_dim1], ldw, &c_b9, &a[j + jb + j * a_dim1], lda); } /* L110: */ } /* Put L21 in standard form by partially undoing the interchanges */ /* in columns 1:k-1 */ j = k - 1; L120: jj = j; jp = ipiv[j]; if (jp < 0) { jp = -jp; --j; } --j; if (jp != jj && j >= 1) { _starpu_dswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); } if (j >= 1) { goto L120; } /* Set KB to the number of columns factorized */ *kb = k - 1; } return 0; /* End of DLASYF */ } /* _starpu_dlasyf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlat2s.c000066400000000000000000000073771507764646700206030ustar00rootroot00000000000000/* dlat2s.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlat2s_(char *uplo, integer *n, doublereal *a, integer * lda, real *sa, integer *ldsa, integer *info) { /* System generated locals */ integer sa_dim1, sa_offset, a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j; doublereal rmax; extern logical _starpu_lsame_(char *, char *); logical upper; extern doublereal _starpu_slamch_(char *); /* -- LAPACK PROTOTYPE auxiliary routine (version 3.1.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* May 2007 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAT2S converts a DOUBLE PRECISION triangular matrix, SA, to a SINGLE */ /* PRECISION triangular matrix, A. */ /* RMAX is the overflow for the SINGLE PRECISION arithmetic */ /* DLAS2S checks that all the entries of A are between -RMAX and */ /* RMAX. If not the convertion is aborted and a flag is raised. */ /* This is an auxiliary routine so there is no argument checking. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* N (input) INTEGER */ /* The number of rows and columns of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N triangular coefficient matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* SA (output) REAL array, dimension (LDSA,N) */ /* Only the UPLO part of SA is referenced. On exit, if INFO=0, */ /* the N-by-N coefficient matrix SA; if INFO>0, the content of */ /* the UPLO part of SA is unspecified. */ /* LDSA (input) INTEGER */ /* The leading dimension of the array SA. LDSA >= max(1,M). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* = 1: an entry of the matrix A is greater than the SINGLE */ /* PRECISION overflow threshold, in this case, the content */ /* of the UPLO part of SA in exit is unspecified. */ /* ========= */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; sa_dim1 = *ldsa; sa_offset = 1 + sa_dim1; sa -= sa_offset; /* Function Body */ rmax = _starpu_slamch_("O"); upper = _starpu_lsame_(uplo, "U"); if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { if (a[i__ + j * a_dim1] < -rmax || a[i__ + j * a_dim1] > rmax) { *info = 1; goto L50; } sa[i__ + j * sa_dim1] = a[i__ + j * a_dim1]; /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { if (a[i__ + j * a_dim1] < -rmax || a[i__ + j * a_dim1] > rmax) { *info = 1; goto L50; } sa[i__ + j * sa_dim1] = a[i__ + j * a_dim1]; /* L30: */ } /* L40: */ } } L50: return 0; /* End of DLAT2S */ } /* _starpu_dlat2s_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlatbs.c000066400000000000000000000544501507764646700206550ustar00rootroot00000000000000/* dlatbs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b36 = .5; /* Subroutine */ int _starpu_dlatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j; doublereal xj, rec, tjj; integer jinc, jlen; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal xbnd; integer imax; doublereal tmax, tjjs, xmax, grow, sumj; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); integer maind; extern logical _starpu_lsame_(char *, char *); doublereal tscal, uscal; extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); integer jlast; extern /* Subroutine */ int _starpu_dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; logical notran; integer jfirst; doublereal smlnum; logical nounit; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLATBS solves one of the triangular systems */ /* A *x = s*b or A'*x = s*b */ /* with scaling to prevent overflow, where A is an upper or lower */ /* triangular band matrix. Here A' denotes the transpose of A, x and b */ /* are n-element vectors, and s is a scaling factor, usually less than */ /* or equal to 1, chosen so that the components of x will be less than */ /* the overflow threshold. If the unscaled problem will not cause */ /* overflow, the Level 2 BLAS routine DTBSV is called. If the matrix A */ /* is singular (A(j,j) = 0 for some j), then s is set to 0 and a */ /* non-trivial solution to A*x = 0 is returned. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the matrix A is upper or lower triangular. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* TRANS (input) CHARACTER*1 */ /* Specifies the operation applied to A. */ /* = 'N': Solve A * x = s*b (No transpose) */ /* = 'T': Solve A'* x = s*b (Transpose) */ /* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* Specifies whether or not the matrix A is unit triangular. */ /* = 'N': Non-unit triangular */ /* = 'U': Unit triangular */ /* NORMIN (input) CHARACTER*1 */ /* Specifies whether CNORM has been set or not. */ /* = 'Y': CNORM contains the column norms on entry */ /* = 'N': CNORM is not set on entry. On exit, the norms will */ /* be computed and stored in CNORM. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of subdiagonals or superdiagonals in the */ /* triangular matrix A. KD >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The upper or lower triangular band matrix A, stored in the */ /* first KD+1 rows of the array. The j-th column of A is stored */ /* in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* X (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the right hand side b of the triangular system. */ /* On exit, X is overwritten by the solution vector x. */ /* SCALE (output) DOUBLE PRECISION */ /* The scaling factor s for the triangular system */ /* A * x = s*b or A'* x = s*b. */ /* If SCALE = 0, the matrix A is singular or badly scaled, and */ /* the vector x is an exact or approximate solution to A*x = 0. */ /* CNORM (input or output) DOUBLE PRECISION array, dimension (N) */ /* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */ /* contains the norm of the off-diagonal part of the j-th column */ /* of A. If TRANS = 'N', CNORM(j) must be greater than or equal */ /* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */ /* must be greater than or equal to the 1-norm. */ /* If NORMIN = 'N', CNORM is an output argument and CNORM(j) */ /* returns the 1-norm of the offdiagonal part of the j-th column */ /* of A. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* Further Details */ /* ======= ======= */ /* A rough bound on x is computed; if that is less than overflow, DTBSV */ /* is called, otherwise, specific code is used which checks for possible */ /* overflow or divide-by-zero at every operation. */ /* A columnwise scheme is used for solving A*x = b. The basic algorithm */ /* if A is lower triangular is */ /* x[1:n] := b[1:n] */ /* for j = 1, ..., n */ /* x(j) := x(j) / A(j,j) */ /* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */ /* end */ /* Define bounds on the components of x after j iterations of the loop: */ /* M(j) = bound on x[1:j] */ /* G(j) = bound on x[j+1:n] */ /* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */ /* Then for iteration j+1 we have */ /* M(j+1) <= G(j) / | A(j+1,j+1) | */ /* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */ /* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */ /* where CNORM(j+1) is greater than or equal to the infinity-norm of */ /* column j+1 of A, not counting the diagonal. Hence */ /* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */ /* 1<=i<=j */ /* and */ /* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */ /* 1<=i< j */ /* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTBSV if the */ /* reciprocal of the largest M(j), j=1,..,n, is larger than */ /* max(underflow, 1/overflow). */ /* The bound on x(j) is also used to determine when a step in the */ /* columnwise method can be performed without fear of overflow. If */ /* the computed bound is greater than a large constant, x is scaled to */ /* prevent overflow, but if the bound overflows, x is set to 0, x(j) to */ /* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */ /* Similarly, a row-wise scheme is used to solve A'*x = b. The basic */ /* algorithm for A upper triangular is */ /* for j = 1, ..., n */ /* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */ /* end */ /* We simultaneously compute two bounds */ /* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */ /* M(j) = bound on x(i), 1<=i<=j */ /* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */ /* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */ /* Then the bound on x(j) is */ /* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */ /* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */ /* 1<=i<=j */ /* and we can safely call DTBSV if 1/M(n) and 1/G(n) are both greater */ /* than max(underflow, 1/overflow). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --x; --cnorm; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); notran = _starpu_lsame_(trans, "N"); nounit = _starpu_lsame_(diag, "N"); /* Test the input parameters. */ if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (! _starpu_lsame_(normin, "Y") && ! _starpu_lsame_(normin, "N")) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*kd < 0) { *info = -6; } else if (*ldab < *kd + 1) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLATBS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine machine dependent parameters to control overflow. */ smlnum = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); bignum = 1. / smlnum; *scale = 1.; if (_starpu_lsame_(normin, "N")) { /* Compute the 1-norm of each column, not including the diagonal. */ if (upper) { /* A is upper triangular. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kd, i__3 = j - 1; jlen = min(i__2,i__3); cnorm[j] = _starpu_dasum_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1], & c__1); /* L10: */ } } else { /* A is lower triangular. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kd, i__3 = *n - j; jlen = min(i__2,i__3); if (jlen > 0) { cnorm[j] = _starpu_dasum_(&jlen, &ab[j * ab_dim1 + 2], &c__1); } else { cnorm[j] = 0.; } /* L20: */ } } } /* Scale the column norms by TSCAL if the maximum element in CNORM is */ /* greater than BIGNUM. */ imax = _starpu_idamax_(n, &cnorm[1], &c__1); tmax = cnorm[imax]; if (tmax <= bignum) { tscal = 1.; } else { tscal = 1. / (smlnum * tmax); _starpu_dscal_(n, &tscal, &cnorm[1], &c__1); } /* Compute a bound on the computed solution vector to see if the */ /* Level 2 BLAS routine DTBSV can be used. */ j = _starpu_idamax_(n, &x[1], &c__1); xmax = (d__1 = x[j], abs(d__1)); xbnd = xmax; if (notran) { /* Compute the growth in A * x = b. */ if (upper) { jfirst = *n; jlast = 1; jinc = -1; maind = *kd + 1; } else { jfirst = 1; jlast = *n; jinc = 1; maind = 1; } if (tscal != 1.) { grow = 0.; goto L50; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, G(0) = max{x(i), i=1,...,n}. */ grow = 1. / max(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* M(j) = G(j-1) / abs(A(j,j)) */ tjj = (d__1 = ab[maind + j * ab_dim1], abs(d__1)); /* Computing MIN */ d__1 = xbnd, d__2 = min(1.,tjj) * grow; xbnd = min(d__1,d__2); if (tjj + cnorm[j] >= smlnum) { /* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ grow *= tjj / (tjj + cnorm[j]); } else { /* G(j) could overflow, set GROW to 0. */ grow = 0.; } /* L30: */ } grow = xbnd; } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ /* Computing MIN */ d__1 = 1., d__2 = 1. / max(xbnd,smlnum); grow = min(d__1,d__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* G(j) = G(j-1)*( 1 + CNORM(j) ) */ grow *= 1. / (cnorm[j] + 1.); /* L40: */ } } L50: ; } else { /* Compute the growth in A' * x = b. */ if (upper) { jfirst = 1; jlast = *n; jinc = 1; maind = *kd + 1; } else { jfirst = *n; jlast = 1; jinc = -1; maind = 1; } if (tscal != 1.) { grow = 0.; goto L80; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, M(0) = max{x(i), i=1,...,n}. */ grow = 1. / max(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ xj = cnorm[j] + 1.; /* Computing MIN */ d__1 = grow, d__2 = xbnd / xj; grow = min(d__1,d__2); /* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ tjj = (d__1 = ab[maind + j * ab_dim1], abs(d__1)); if (xj > tjj) { xbnd *= tjj / xj; } /* L60: */ } grow = min(grow,xbnd); } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ /* Computing MIN */ d__1 = 1., d__2 = 1. / max(xbnd,smlnum); grow = min(d__1,d__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = ( 1 + CNORM(j) )*G(j-1) */ xj = cnorm[j] + 1.; grow /= xj; /* L70: */ } } L80: ; } if (grow * tscal > smlnum) { /* Use the Level 2 BLAS solve if the reciprocal of the bound on */ /* elements of X is not too small. */ _starpu_dtbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &x[1], &c__1); } else { /* Use a Level 1 BLAS solve, scaling intermediate results. */ if (xmax > bignum) { /* Scale X so that its components are less than or equal to */ /* BIGNUM in absolute value. */ *scale = bignum / xmax; _starpu_dscal_(n, scale, &x[1], &c__1); xmax = bignum; } if (notran) { /* Solve A * x = b */ i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ xj = (d__1 = x[j], abs(d__1)); if (nounit) { tjjs = ab[maind + j * ab_dim1] * tscal; } else { tjjs = tscal; if (tscal == 1.) { goto L100; } } tjj = abs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.) { if (xj > tjj * bignum) { /* Scale x by 1/b(j). */ rec = 1. / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; xj = (d__1 = x[j], abs(d__1)); } else if (tjj > 0.) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ /* to avoid overflow when dividing by A(j,j). */ rec = tjj * bignum / xj; if (cnorm[j] > 1.) { /* Scale by 1/CNORM(j) to avoid overflow when */ /* multiplying x(j) times column j. */ rec /= cnorm[j]; } _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; xj = (d__1 = x[j], abs(d__1)); } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.; /* L90: */ } x[j] = 1.; xj = 1.; *scale = 0.; xmax = 0.; } L100: /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j of A. */ if (xj > 1.) { rec = 1. / xj; if (cnorm[j] > (bignum - xmax) * rec) { /* Scale x by 1/(2*abs(x(j))). */ rec *= .5; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } else if (xj * cnorm[j] > bignum - xmax) { /* Scale x by 1/2. */ _starpu_dscal_(n, &c_b36, &x[1], &c__1); *scale *= .5; } if (upper) { if (j > 1) { /* Compute the update */ /* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - */ /* x(j)* A(max(1,j-kd):j-1,j) */ /* Computing MIN */ i__3 = *kd, i__4 = j - 1; jlen = min(i__3,i__4); d__1 = -x[j] * tscal; _starpu_daxpy_(&jlen, &d__1, &ab[*kd + 1 - jlen + j * ab_dim1] , &c__1, &x[j - jlen], &c__1); i__3 = j - 1; i__ = _starpu_idamax_(&i__3, &x[1], &c__1); xmax = (d__1 = x[i__], abs(d__1)); } } else if (j < *n) { /* Compute the update */ /* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - */ /* x(j) * A(j+1:min(j+kd,n),j) */ /* Computing MIN */ i__3 = *kd, i__4 = *n - j; jlen = min(i__3,i__4); if (jlen > 0) { d__1 = -x[j] * tscal; _starpu_daxpy_(&jlen, &d__1, &ab[j * ab_dim1 + 2], &c__1, &x[ j + 1], &c__1); } i__3 = *n - j; i__ = j + _starpu_idamax_(&i__3, &x[j + 1], &c__1); xmax = (d__1 = x[i__], abs(d__1)); } /* L110: */ } } else { /* Solve A' * x = b */ i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). */ /* k<>j */ xj = (d__1 = x[j], abs(d__1)); uscal = tscal; rec = 1. / max(xmax,1.); if (cnorm[j] > (bignum - xj) * rec) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= .5; if (nounit) { tjjs = ab[maind + j * ab_dim1] * tscal; } else { tjjs = tscal; } tjj = abs(tjjs); if (tjj > 1.) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. */ /* Computing MIN */ d__1 = 1., d__2 = rec * tjj; rec = min(d__1,d__2); uscal /= tjjs; } if (rec < 1.) { _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } sumj = 0.; if (uscal == 1.) { /* If the scaling needed for A in the dot product is 1, */ /* call DDOT to perform the dot product. */ if (upper) { /* Computing MIN */ i__3 = *kd, i__4 = j - 1; jlen = min(i__3,i__4); sumj = _starpu_ddot_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1], &c__1, &x[j - jlen], &c__1); } else { /* Computing MIN */ i__3 = *kd, i__4 = *n - j; jlen = min(i__3,i__4); if (jlen > 0) { sumj = _starpu_ddot_(&jlen, &ab[j * ab_dim1 + 2], &c__1, & x[j + 1], &c__1); } } } else { /* Otherwise, use in-line code for the dot product. */ if (upper) { /* Computing MIN */ i__3 = *kd, i__4 = j - 1; jlen = min(i__3,i__4); i__3 = jlen; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ab[*kd + i__ - jlen + j * ab_dim1] * uscal * x[j - jlen - 1 + i__]; /* L120: */ } } else { /* Computing MIN */ i__3 = *kd, i__4 = *n - j; jlen = min(i__3,i__4); i__3 = jlen; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ab[i__ + 1 + j * ab_dim1] * uscal * x[j + i__]; /* L130: */ } } } if (uscal == tscal) { /* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */ /* was not used to scale the dotproduct. */ x[j] -= sumj; xj = (d__1 = x[j], abs(d__1)); if (nounit) { /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjjs = ab[maind + j * ab_dim1] * tscal; } else { tjjs = tscal; if (tscal == 1.) { goto L150; } } tjj = abs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.) { if (xj > tjj * bignum) { /* Scale X by 1/abs(x(j)). */ rec = 1. / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; } else if (tjj > 0.) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = tjj * bignum / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A'*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.; /* L140: */ } x[j] = 1.; *scale = 0.; xmax = 0.; } L150: ; } else { /* Compute x(j) := x(j) / A(j,j) - sumj if the dot */ /* product has already been divided by 1/A(j,j). */ x[j] = x[j] / tjjs - sumj; } /* Computing MAX */ d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1)); xmax = max(d__2,d__3); /* L160: */ } } *scale /= tscal; } /* Scale the column norms by 1/TSCAL for return. */ if (tscal != 1.) { d__1 = 1. / tscal; _starpu_dscal_(n, &d__1, &cnorm[1], &c__1); } return 0; /* End of DLATBS */ } /* _starpu_dlatbs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlatdf.c000066400000000000000000000245071507764646700206420ustar00rootroot00000000000000/* dlatdf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b23 = 1.; static doublereal c_b37 = -1.; /* Subroutine */ int _starpu_dlatdf_(integer *ijob, integer *n, doublereal *z__, integer *ldz, doublereal *rhs, doublereal *rdsum, doublereal *rdscal, integer *ipiv, integer *jpiv) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k; doublereal bm, bp, xm[8], xp[8]; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer info; doublereal temp, work[32]; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); doublereal pmone; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); doublereal sminu; integer iwork[8]; doublereal splus; extern /* Subroutine */ int _starpu_dgesc2_(integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *), _starpu_dgecon_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *), _starpu_dlaswp_( integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLATDF uses the LU factorization of the n-by-n matrix Z computed by */ /* DGETC2 and computes a contribution to the reciprocal Dif-estimate */ /* by solving Z * x = b for x, and choosing the r.h.s. b such that */ /* the norm of x is as large as possible. On entry RHS = b holds the */ /* contribution from earlier solved sub-systems, and on return RHS = x. */ /* The factorization of Z returned by DGETC2 has the form Z = P*L*U*Q, */ /* where P and Q are permutation matrices. L is lower triangular with */ /* unit diagonal elements and U is upper triangular. */ /* Arguments */ /* ========= */ /* IJOB (input) INTEGER */ /* IJOB = 2: First compute an approximative null-vector e */ /* of Z using DGECON, e is normalized and solve for */ /* Zx = +-e - f with the sign giving the greater value */ /* of 2-norm(x). About 5 times as expensive as Default. */ /* IJOB .ne. 2: Local look ahead strategy where all entries of */ /* the r.h.s. b is choosen as either +1 or -1 (Default). */ /* N (input) INTEGER */ /* The number of columns of the matrix Z. */ /* Z (input) DOUBLE PRECISION array, dimension (LDZ, N) */ /* On entry, the LU part of the factorization of the n-by-n */ /* matrix Z computed by DGETC2: Z = P * L * U * Q */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDA >= max(1, N). */ /* RHS (input/output) DOUBLE PRECISION array, dimension N. */ /* On entry, RHS contains contributions from other subsystems. */ /* On exit, RHS contains the solution of the subsystem with */ /* entries acoording to the value of IJOB (see above). */ /* RDSUM (input/output) DOUBLE PRECISION */ /* On entry, the sum of squares of computed contributions to */ /* the Dif-estimate under computation by DTGSYL, where the */ /* scaling factor RDSCAL (see below) has been factored out. */ /* On exit, the corresponding sum of squares updated with the */ /* contributions from the current sub-system. */ /* If TRANS = 'T' RDSUM is not touched. */ /* NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL. */ /* RDSCAL (input/output) DOUBLE PRECISION */ /* On entry, scaling factor used to prevent overflow in RDSUM. */ /* On exit, RDSCAL is updated w.r.t. the current contributions */ /* in RDSUM. */ /* If TRANS = 'T', RDSCAL is not touched. */ /* NOTE: RDSCAL only makes sense when DTGSY2 is called by */ /* DTGSYL. */ /* IPIV (input) INTEGER array, dimension (N). */ /* The pivot indices; for 1 <= i <= N, row i of the */ /* matrix has been interchanged with row IPIV(i). */ /* JPIV (input) INTEGER array, dimension (N). */ /* The pivot indices; for 1 <= j <= N, column j of the */ /* matrix has been interchanged with column JPIV(j). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* This routine is a further developed implementation of algorithm */ /* BSOLVE in [1] using complete pivoting in the LU factorization. */ /* [1] Bo Kagstrom and Lars Westin, */ /* Generalized Schur Methods with Condition Estimators for */ /* Solving the Generalized Sylvester Equation, IEEE Transactions */ /* on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. */ /* [2] Peter Poromaa, */ /* On Efficient and Robust Estimators for the Separation */ /* between two Regular Matrix Pairs with Applications in */ /* Condition Estimation. Report IMINF-95.05, Departement of */ /* Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --rhs; --ipiv; --jpiv; /* Function Body */ if (*ijob != 2) { /* Apply permutations IPIV to RHS */ i__1 = *n - 1; _starpu_dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &ipiv[1], &c__1); /* Solve for L-part choosing RHS either to +1 or -1. */ pmone = -1.; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { bp = rhs[j] + 1.; bm = rhs[j] - 1.; splus = 1.; /* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and */ /* SMIN computed more efficiently than in BSOLVE [1]. */ i__2 = *n - j; splus += _starpu_ddot_(&i__2, &z__[j + 1 + j * z_dim1], &c__1, &z__[j + 1 + j * z_dim1], &c__1); i__2 = *n - j; sminu = _starpu_ddot_(&i__2, &z__[j + 1 + j * z_dim1], &c__1, &rhs[j + 1], &c__1); splus *= rhs[j]; if (splus > sminu) { rhs[j] = bp; } else if (sminu > splus) { rhs[j] = bm; } else { /* In this case the updating sums are equal and we can */ /* choose RHS(J) +1 or -1. The first time this happens */ /* we choose -1, thereafter +1. This is a simple way to */ /* get good estimates of matrices like Byers well-known */ /* example (see [1]). (Not done in BSOLVE.) */ rhs[j] += pmone; pmone = 1.; } /* Compute the remaining r.h.s. */ temp = -rhs[j]; i__2 = *n - j; _starpu_daxpy_(&i__2, &temp, &z__[j + 1 + j * z_dim1], &c__1, &rhs[j + 1], &c__1); /* L10: */ } /* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done */ /* in BSOLVE and will hopefully give us a better estimate because */ /* any ill-conditioning of the original matrix is transfered to U */ /* and not to L. U(N, N) is an approximation to sigma_min(LU). */ i__1 = *n - 1; _starpu_dcopy_(&i__1, &rhs[1], &c__1, xp, &c__1); xp[*n - 1] = rhs[*n] + 1.; rhs[*n] += -1.; splus = 0.; sminu = 0.; for (i__ = *n; i__ >= 1; --i__) { temp = 1. / z__[i__ + i__ * z_dim1]; xp[i__ - 1] *= temp; rhs[i__] *= temp; i__1 = *n; for (k = i__ + 1; k <= i__1; ++k) { xp[i__ - 1] -= xp[k - 1] * (z__[i__ + k * z_dim1] * temp); rhs[i__] -= rhs[k] * (z__[i__ + k * z_dim1] * temp); /* L20: */ } splus += (d__1 = xp[i__ - 1], abs(d__1)); sminu += (d__1 = rhs[i__], abs(d__1)); /* L30: */ } if (splus > sminu) { _starpu_dcopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Apply the permutations JPIV to the computed solution (RHS) */ i__1 = *n - 1; _starpu_dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &jpiv[1], &c_n1); /* Compute the sum of squares */ _starpu_dlassq_(n, &rhs[1], &c__1, rdscal, rdsum); } else { /* IJOB = 2, Compute approximate nullvector XM of Z */ _starpu_dgecon_("I", n, &z__[z_offset], ldz, &c_b23, &temp, work, iwork, & info); _starpu_dcopy_(n, &work[*n], &c__1, xm, &c__1); /* Compute RHS */ i__1 = *n - 1; _starpu_dlaswp_(&c__1, xm, ldz, &c__1, &i__1, &ipiv[1], &c_n1); temp = 1. / sqrt(_starpu_ddot_(n, xm, &c__1, xm, &c__1)); _starpu_dscal_(n, &temp, xm, &c__1); _starpu_dcopy_(n, xm, &c__1, xp, &c__1); _starpu_daxpy_(n, &c_b23, &rhs[1], &c__1, xp, &c__1); _starpu_daxpy_(n, &c_b37, xm, &c__1, &rhs[1], &c__1); _starpu_dgesc2_(n, &z__[z_offset], ldz, &rhs[1], &ipiv[1], &jpiv[1], &temp); _starpu_dgesc2_(n, &z__[z_offset], ldz, xp, &ipiv[1], &jpiv[1], &temp); if (_starpu_dasum_(n, xp, &c__1) > _starpu_dasum_(n, &rhs[1], &c__1)) { _starpu_dcopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Compute the sum of squares */ _starpu_dlassq_(n, &rhs[1], &c__1, rdscal, rdsum); } return 0; /* End of DLATDF */ } /* _starpu_dlatdf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlatps.c000066400000000000000000000522631507764646700206730ustar00rootroot00000000000000/* dlatps.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b36 = .5; /* Subroutine */ int _starpu_dlatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublereal *ap, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info) { /* System generated locals */ integer i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, ip; doublereal xj, rec, tjj; integer jinc, jlen; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal xbnd; integer imax; doublereal tmax, tjjs, xmax, grow, sumj; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); doublereal tscal, uscal; extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); integer jlast; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; logical notran; integer jfirst; doublereal smlnum; logical nounit; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLATPS solves one of the triangular systems */ /* A *x = s*b or A'*x = s*b */ /* with scaling to prevent overflow, where A is an upper or lower */ /* triangular matrix stored in packed form. Here A' denotes the */ /* transpose of A, x and b are n-element vectors, and s is a scaling */ /* factor, usually less than or equal to 1, chosen so that the */ /* components of x will be less than the overflow threshold. If the */ /* unscaled problem will not cause overflow, the Level 2 BLAS routine */ /* DTPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), */ /* then s is set to 0 and a non-trivial solution to A*x = 0 is returned. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the matrix A is upper or lower triangular. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* TRANS (input) CHARACTER*1 */ /* Specifies the operation applied to A. */ /* = 'N': Solve A * x = s*b (No transpose) */ /* = 'T': Solve A'* x = s*b (Transpose) */ /* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* Specifies whether or not the matrix A is unit triangular. */ /* = 'N': Non-unit triangular */ /* = 'U': Unit triangular */ /* NORMIN (input) CHARACTER*1 */ /* Specifies whether CNORM has been set or not. */ /* = 'Y': CNORM contains the column norms on entry */ /* = 'N': CNORM is not set on entry. On exit, the norms will */ /* be computed and stored in CNORM. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangular matrix A, packed columnwise in */ /* a linear array. The j-th column of A is stored in the array */ /* AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* X (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the right hand side b of the triangular system. */ /* On exit, X is overwritten by the solution vector x. */ /* SCALE (output) DOUBLE PRECISION */ /* The scaling factor s for the triangular system */ /* A * x = s*b or A'* x = s*b. */ /* If SCALE = 0, the matrix A is singular or badly scaled, and */ /* the vector x is an exact or approximate solution to A*x = 0. */ /* CNORM (input or output) DOUBLE PRECISION array, dimension (N) */ /* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */ /* contains the norm of the off-diagonal part of the j-th column */ /* of A. If TRANS = 'N', CNORM(j) must be greater than or equal */ /* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */ /* must be greater than or equal to the 1-norm. */ /* If NORMIN = 'N', CNORM is an output argument and CNORM(j) */ /* returns the 1-norm of the offdiagonal part of the j-th column */ /* of A. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* Further Details */ /* ======= ======= */ /* A rough bound on x is computed; if that is less than overflow, DTPSV */ /* is called, otherwise, specific code is used which checks for possible */ /* overflow or divide-by-zero at every operation. */ /* A columnwise scheme is used for solving A*x = b. The basic algorithm */ /* if A is lower triangular is */ /* x[1:n] := b[1:n] */ /* for j = 1, ..., n */ /* x(j) := x(j) / A(j,j) */ /* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */ /* end */ /* Define bounds on the components of x after j iterations of the loop: */ /* M(j) = bound on x[1:j] */ /* G(j) = bound on x[j+1:n] */ /* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */ /* Then for iteration j+1 we have */ /* M(j+1) <= G(j) / | A(j+1,j+1) | */ /* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */ /* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */ /* where CNORM(j+1) is greater than or equal to the infinity-norm of */ /* column j+1 of A, not counting the diagonal. Hence */ /* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */ /* 1<=i<=j */ /* and */ /* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */ /* 1<=i< j */ /* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTPSV if the */ /* reciprocal of the largest M(j), j=1,..,n, is larger than */ /* max(underflow, 1/overflow). */ /* The bound on x(j) is also used to determine when a step in the */ /* columnwise method can be performed without fear of overflow. If */ /* the computed bound is greater than a large constant, x is scaled to */ /* prevent overflow, but if the bound overflows, x is set to 0, x(j) to */ /* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */ /* Similarly, a row-wise scheme is used to solve A'*x = b. The basic */ /* algorithm for A upper triangular is */ /* for j = 1, ..., n */ /* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */ /* end */ /* We simultaneously compute two bounds */ /* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */ /* M(j) = bound on x(i), 1<=i<=j */ /* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */ /* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */ /* Then the bound on x(j) is */ /* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */ /* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */ /* 1<=i<=j */ /* and we can safely call DTPSV if 1/M(n) and 1/G(n) are both greater */ /* than max(underflow, 1/overflow). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --cnorm; --x; --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); notran = _starpu_lsame_(trans, "N"); nounit = _starpu_lsame_(diag, "N"); /* Test the input parameters. */ if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (! _starpu_lsame_(normin, "Y") && ! _starpu_lsame_(normin, "N")) { *info = -4; } else if (*n < 0) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLATPS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine machine dependent parameters to control overflow. */ smlnum = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); bignum = 1. / smlnum; *scale = 1.; if (_starpu_lsame_(normin, "N")) { /* Compute the 1-norm of each column, not including the diagonal. */ if (upper) { /* A is upper triangular. */ ip = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; cnorm[j] = _starpu_dasum_(&i__2, &ap[ip], &c__1); ip += j; /* L10: */ } } else { /* A is lower triangular. */ ip = 1; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = *n - j; cnorm[j] = _starpu_dasum_(&i__2, &ap[ip + 1], &c__1); ip = ip + *n - j + 1; /* L20: */ } cnorm[*n] = 0.; } } /* Scale the column norms by TSCAL if the maximum element in CNORM is */ /* greater than BIGNUM. */ imax = _starpu_idamax_(n, &cnorm[1], &c__1); tmax = cnorm[imax]; if (tmax <= bignum) { tscal = 1.; } else { tscal = 1. / (smlnum * tmax); _starpu_dscal_(n, &tscal, &cnorm[1], &c__1); } /* Compute a bound on the computed solution vector to see if the */ /* Level 2 BLAS routine DTPSV can be used. */ j = _starpu_idamax_(n, &x[1], &c__1); xmax = (d__1 = x[j], abs(d__1)); xbnd = xmax; if (notran) { /* Compute the growth in A * x = b. */ if (upper) { jfirst = *n; jlast = 1; jinc = -1; } else { jfirst = 1; jlast = *n; jinc = 1; } if (tscal != 1.) { grow = 0.; goto L50; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, G(0) = max{x(i), i=1,...,n}. */ grow = 1. / max(xbnd,smlnum); xbnd = grow; ip = jfirst * (jfirst + 1) / 2; jlen = *n; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* M(j) = G(j-1) / abs(A(j,j)) */ tjj = (d__1 = ap[ip], abs(d__1)); /* Computing MIN */ d__1 = xbnd, d__2 = min(1.,tjj) * grow; xbnd = min(d__1,d__2); if (tjj + cnorm[j] >= smlnum) { /* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ grow *= tjj / (tjj + cnorm[j]); } else { /* G(j) could overflow, set GROW to 0. */ grow = 0.; } ip += jinc * jlen; --jlen; /* L30: */ } grow = xbnd; } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ /* Computing MIN */ d__1 = 1., d__2 = 1. / max(xbnd,smlnum); grow = min(d__1,d__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* G(j) = G(j-1)*( 1 + CNORM(j) ) */ grow *= 1. / (cnorm[j] + 1.); /* L40: */ } } L50: ; } else { /* Compute the growth in A' * x = b. */ if (upper) { jfirst = 1; jlast = *n; jinc = 1; } else { jfirst = *n; jlast = 1; jinc = -1; } if (tscal != 1.) { grow = 0.; goto L80; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, M(0) = max{x(i), i=1,...,n}. */ grow = 1. / max(xbnd,smlnum); xbnd = grow; ip = jfirst * (jfirst + 1) / 2; jlen = 1; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ xj = cnorm[j] + 1.; /* Computing MIN */ d__1 = grow, d__2 = xbnd / xj; grow = min(d__1,d__2); /* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ tjj = (d__1 = ap[ip], abs(d__1)); if (xj > tjj) { xbnd *= tjj / xj; } ++jlen; ip += jinc * jlen; /* L60: */ } grow = min(grow,xbnd); } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ /* Computing MIN */ d__1 = 1., d__2 = 1. / max(xbnd,smlnum); grow = min(d__1,d__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = ( 1 + CNORM(j) )*G(j-1) */ xj = cnorm[j] + 1.; grow /= xj; /* L70: */ } } L80: ; } if (grow * tscal > smlnum) { /* Use the Level 2 BLAS solve if the reciprocal of the bound on */ /* elements of X is not too small. */ _starpu_dtpsv_(uplo, trans, diag, n, &ap[1], &x[1], &c__1); } else { /* Use a Level 1 BLAS solve, scaling intermediate results. */ if (xmax > bignum) { /* Scale X so that its components are less than or equal to */ /* BIGNUM in absolute value. */ *scale = bignum / xmax; _starpu_dscal_(n, scale, &x[1], &c__1); xmax = bignum; } if (notran) { /* Solve A * x = b */ ip = jfirst * (jfirst + 1) / 2; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ xj = (d__1 = x[j], abs(d__1)); if (nounit) { tjjs = ap[ip] * tscal; } else { tjjs = tscal; if (tscal == 1.) { goto L100; } } tjj = abs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.) { if (xj > tjj * bignum) { /* Scale x by 1/b(j). */ rec = 1. / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; xj = (d__1 = x[j], abs(d__1)); } else if (tjj > 0.) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ /* to avoid overflow when dividing by A(j,j). */ rec = tjj * bignum / xj; if (cnorm[j] > 1.) { /* Scale by 1/CNORM(j) to avoid overflow when */ /* multiplying x(j) times column j. */ rec /= cnorm[j]; } _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; xj = (d__1 = x[j], abs(d__1)); } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.; /* L90: */ } x[j] = 1.; xj = 1.; *scale = 0.; xmax = 0.; } L100: /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j of A. */ if (xj > 1.) { rec = 1. / xj; if (cnorm[j] > (bignum - xmax) * rec) { /* Scale x by 1/(2*abs(x(j))). */ rec *= .5; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } else if (xj * cnorm[j] > bignum - xmax) { /* Scale x by 1/2. */ _starpu_dscal_(n, &c_b36, &x[1], &c__1); *scale *= .5; } if (upper) { if (j > 1) { /* Compute the update */ /* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */ i__3 = j - 1; d__1 = -x[j] * tscal; _starpu_daxpy_(&i__3, &d__1, &ap[ip - j + 1], &c__1, &x[1], & c__1); i__3 = j - 1; i__ = _starpu_idamax_(&i__3, &x[1], &c__1); xmax = (d__1 = x[i__], abs(d__1)); } ip -= j; } else { if (j < *n) { /* Compute the update */ /* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */ i__3 = *n - j; d__1 = -x[j] * tscal; _starpu_daxpy_(&i__3, &d__1, &ap[ip + 1], &c__1, &x[j + 1], & c__1); i__3 = *n - j; i__ = j + _starpu_idamax_(&i__3, &x[j + 1], &c__1); xmax = (d__1 = x[i__], abs(d__1)); } ip = ip + *n - j + 1; } /* L110: */ } } else { /* Solve A' * x = b */ ip = jfirst * (jfirst + 1) / 2; jlen = 1; i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). */ /* k<>j */ xj = (d__1 = x[j], abs(d__1)); uscal = tscal; rec = 1. / max(xmax,1.); if (cnorm[j] > (bignum - xj) * rec) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= .5; if (nounit) { tjjs = ap[ip] * tscal; } else { tjjs = tscal; } tjj = abs(tjjs); if (tjj > 1.) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. */ /* Computing MIN */ d__1 = 1., d__2 = rec * tjj; rec = min(d__1,d__2); uscal /= tjjs; } if (rec < 1.) { _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } sumj = 0.; if (uscal == 1.) { /* If the scaling needed for A in the dot product is 1, */ /* call DDOT to perform the dot product. */ if (upper) { i__3 = j - 1; sumj = _starpu_ddot_(&i__3, &ap[ip - j + 1], &c__1, &x[1], & c__1); } else if (j < *n) { i__3 = *n - j; sumj = _starpu_ddot_(&i__3, &ap[ip + 1], &c__1, &x[j + 1], & c__1); } } else { /* Otherwise, use in-line code for the dot product. */ if (upper) { i__3 = j - 1; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ap[ip - j + i__] * uscal * x[i__]; /* L120: */ } } else if (j < *n) { i__3 = *n - j; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ap[ip + i__] * uscal * x[j + i__]; /* L130: */ } } } if (uscal == tscal) { /* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */ /* was not used to scale the dotproduct. */ x[j] -= sumj; xj = (d__1 = x[j], abs(d__1)); if (nounit) { /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjjs = ap[ip] * tscal; } else { tjjs = tscal; if (tscal == 1.) { goto L150; } } tjj = abs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.) { if (xj > tjj * bignum) { /* Scale X by 1/abs(x(j)). */ rec = 1. / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; } else if (tjj > 0.) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = tjj * bignum / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A'*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.; /* L140: */ } x[j] = 1.; *scale = 0.; xmax = 0.; } L150: ; } else { /* Compute x(j) := x(j) / A(j,j) - sumj if the dot */ /* product has already been divided by 1/A(j,j). */ x[j] = x[j] / tjjs - sumj; } /* Computing MAX */ d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1)); xmax = max(d__2,d__3); ++jlen; ip += jinc * jlen; /* L160: */ } } *scale /= tscal; } /* Scale the column norms by 1/TSCAL for return. */ if (tscal != 1.) { d__1 = 1. / tscal; _starpu_dscal_(n, &d__1, &cnorm[1], &c__1); } return 0; /* End of DLATPS */ } /* _starpu_dlatps_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlatrd.c000066400000000000000000000303321507764646700206470ustar00rootroot00000000000000/* dlatrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b5 = -1.; static doublereal c_b6 = 1.; static integer c__1 = 1; static doublereal c_b16 = 0.; /* Subroutine */ int _starpu_dlatrd_(char *uplo, integer *n, integer *nb, doublereal * a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, integer *ldw) { /* System generated locals */ integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; /* Local variables */ integer i__, iw; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal alpha; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLATRD reduces NB rows and columns of a real symmetric matrix A to */ /* symmetric tridiagonal form by an orthogonal similarity */ /* transformation Q' * A * Q, and returns the matrices V and W which are */ /* needed to apply the transformation to the unreduced part of A. */ /* If UPLO = 'U', DLATRD reduces the last NB rows and columns of a */ /* matrix, of which the upper triangle is supplied; */ /* if UPLO = 'L', DLATRD reduces the first NB rows and columns of a */ /* matrix, of which the lower triangle is supplied. */ /* This is an auxiliary routine called by DSYTRD. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. */ /* NB (input) INTEGER */ /* The number of rows and columns to be reduced. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n-by-n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n-by-n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit: */ /* if UPLO = 'U', the last NB columns have been reduced to */ /* tridiagonal form, with the diagonal elements overwriting */ /* the diagonal elements of A; the elements above the diagonal */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of elementary reflectors; */ /* if UPLO = 'L', the first NB columns have been reduced to */ /* tridiagonal form, with the diagonal elements overwriting */ /* the diagonal elements of A; the elements below the diagonal */ /* with the array TAU, represent the orthogonal matrix Q as a */ /* product of elementary reflectors. */ /* See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= (1,N). */ /* E (output) DOUBLE PRECISION array, dimension (N-1) */ /* If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal */ /* elements of the last NB columns of the reduced matrix; */ /* if UPLO = 'L', E(1:nb) contains the subdiagonal elements of */ /* the first NB columns of the reduced matrix. */ /* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ /* The scalar factors of the elementary reflectors, stored in */ /* TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. */ /* See Further Details. */ /* W (output) DOUBLE PRECISION array, dimension (LDW,NB) */ /* The n-by-nb matrix W required to update the unreduced part */ /* of A. */ /* LDW (input) INTEGER */ /* The leading dimension of the array W. LDW >= max(1,N). */ /* Further Details */ /* =============== */ /* If UPLO = 'U', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(n) H(n-1) . . . H(n-nb+1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), */ /* and tau in TAU(i-1). */ /* If UPLO = 'L', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(1) H(2) . . . H(nb). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ /* and tau in TAU(i). */ /* The elements of the vectors v together form the n-by-nb matrix V */ /* which is needed, with W, to apply the transformation to the unreduced */ /* part of the matrix, using a symmetric rank-2k update of the form: */ /* A := A - V*W' - W*V'. */ /* The contents of A on exit are illustrated by the following examples */ /* with n = 5 and nb = 2: */ /* if UPLO = 'U': if UPLO = 'L': */ /* ( a a a v4 v5 ) ( d ) */ /* ( a a v4 v5 ) ( 1 d ) */ /* ( a 1 v5 ) ( v1 1 a ) */ /* ( d 1 ) ( v1 v2 a a ) */ /* ( d ) ( v1 v2 a a a ) */ /* where d denotes a diagonal element of the reduced matrix, a denotes */ /* an element of the original matrix that is unchanged, and vi denotes */ /* an element of the vector defining H(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --e; --tau; w_dim1 = *ldw; w_offset = 1 + w_dim1; w -= w_offset; /* Function Body */ if (*n <= 0) { return 0; } if (_starpu_lsame_(uplo, "U")) { /* Reduce last NB columns of upper triangle */ i__1 = *n - *nb + 1; for (i__ = *n; i__ >= i__1; --i__) { iw = i__ - *n + *nb; if (i__ < *n) { /* Update A(1:i,i) */ i__2 = *n - i__; _starpu_dgemv_("No transpose", &i__, &i__2, &c_b5, &a[(i__ + 1) * a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & c_b6, &a[i__ * a_dim1 + 1], &c__1); i__2 = *n - i__; _starpu_dgemv_("No transpose", &i__, &i__2, &c_b5, &w[(iw + 1) * w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & c_b6, &a[i__ * a_dim1 + 1], &c__1); } if (i__ > 1) { /* Generate elementary reflector H(i) to annihilate */ /* A(1:i-2,i) */ i__2 = i__ - 1; _starpu_dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 + 1], &c__1, &tau[i__ - 1]); e[i__ - 1] = a[i__ - 1 + i__ * a_dim1]; a[i__ - 1 + i__ * a_dim1] = 1.; /* Compute W(1:i-1,i) */ i__2 = i__ - 1; _starpu_dsymv_("Upper", &i__2, &c_b6, &a[a_offset], lda, &a[i__ * a_dim1 + 1], &c__1, &c_b16, &w[iw * w_dim1 + 1], & c__1); if (i__ < *n) { i__2 = i__ - 1; i__3 = *n - i__; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b6, &w[(iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, & c_b16, &w[i__ + 1 + iw * w_dim1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b6, &a[(i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, & c_b16, &w[i__ + 1 + iw * w_dim1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &w[(iw + 1) * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1); } i__2 = i__ - 1; _starpu_dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); i__2 = i__ - 1; alpha = tau[i__ - 1] * -.5 * _starpu_ddot_(&i__2, &w[iw * w_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], &c__1); i__2 = i__ - 1; _starpu_daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * w_dim1 + 1], &c__1); } /* L10: */ } } else { /* Reduce first NB columns of lower triangle */ i__1 = *nb; for (i__ = 1; i__ <= i__1; ++i__) { /* Update A(i:n,i) */ i__2 = *n - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda, &w[i__ + w_dim1], ldw, &c_b6, &a[i__ + i__ * a_dim1], & c__1); i__2 = *n - i__ + 1; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &w[i__ + w_dim1], ldw, &a[i__ + a_dim1], lda, &c_b6, &a[i__ + i__ * a_dim1], & c__1); if (i__ < *n) { /* Generate elementary reflector H(i) to annihilate */ /* A(i+2:n,i) */ i__2 = *n - i__; /* Computing MIN */ i__3 = i__ + 2; _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n)+ i__ * a_dim1], &c__1, &tau[i__]); e[i__] = a[i__ + 1 + i__ * a_dim1]; a[i__ + 1 + i__ * a_dim1] = 1.; /* Compute W(i+1:n,i) */ i__2 = *n - i__; _starpu_dsymv_("Lower", &i__2, &c_b6, &a[i__ + 1 + (i__ + 1) * a_dim1] , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[ i__ + 1 + i__ * w_dim1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b6, &w[i__ + 1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[ i__ * w_dim1 + 1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[ i__ + 1 + i__ * w_dim1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b6, &a[i__ + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[ i__ * w_dim1 + 1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b5, &w[i__ + 1 + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[ i__ + 1 + i__ * w_dim1], &c__1); i__2 = *n - i__; _starpu_dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); i__2 = *n - i__; alpha = tau[i__] * -.5 * _starpu_ddot_(&i__2, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); i__2 = *n - i__; _starpu_daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ i__ + 1 + i__ * w_dim1], &c__1); } /* L20: */ } } return 0; /* End of DLATRD */ } /* _starpu_dlatrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlatrs.c000066400000000000000000000525671507764646700207040ustar00rootroot00000000000000/* dlatrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b36 = .5; /* Subroutine */ int _starpu_dlatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, doublereal *a, integer *lda, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j; doublereal xj, rec, tjj; integer jinc; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal xbnd; integer imax; doublereal tmax, tjjs, xmax, grow, sumj; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); doublereal tscal, uscal; extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); integer jlast; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dtrsv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; logical notran; integer jfirst; doublereal smlnum; logical nounit; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLATRS solves one of the triangular systems */ /* A *x = s*b or A'*x = s*b */ /* with scaling to prevent overflow. Here A is an upper or lower */ /* triangular matrix, A' denotes the transpose of A, x and b are */ /* n-element vectors, and s is a scaling factor, usually less than */ /* or equal to 1, chosen so that the components of x will be less than */ /* the overflow threshold. If the unscaled problem will not cause */ /* overflow, the Level 2 BLAS routine DTRSV is called. If the matrix A */ /* is singular (A(j,j) = 0 for some j), then s is set to 0 and a */ /* non-trivial solution to A*x = 0 is returned. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the matrix A is upper or lower triangular. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* TRANS (input) CHARACTER*1 */ /* Specifies the operation applied to A. */ /* = 'N': Solve A * x = s*b (No transpose) */ /* = 'T': Solve A'* x = s*b (Transpose) */ /* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* Specifies whether or not the matrix A is unit triangular. */ /* = 'N': Non-unit triangular */ /* = 'U': Unit triangular */ /* NORMIN (input) CHARACTER*1 */ /* Specifies whether CNORM has been set or not. */ /* = 'Y': CNORM contains the column norms on entry */ /* = 'N': CNORM is not set on entry. On exit, the norms will */ /* be computed and stored in CNORM. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The triangular matrix A. If UPLO = 'U', the leading n by n */ /* upper triangular part of the array A contains the upper */ /* triangular matrix, and the strictly lower triangular part of */ /* A is not referenced. If UPLO = 'L', the leading n by n lower */ /* triangular part of the array A contains the lower triangular */ /* matrix, and the strictly upper triangular part of A is not */ /* referenced. If DIAG = 'U', the diagonal elements of A are */ /* also not referenced and are assumed to be 1. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max (1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the right hand side b of the triangular system. */ /* On exit, X is overwritten by the solution vector x. */ /* SCALE (output) DOUBLE PRECISION */ /* The scaling factor s for the triangular system */ /* A * x = s*b or A'* x = s*b. */ /* If SCALE = 0, the matrix A is singular or badly scaled, and */ /* the vector x is an exact or approximate solution to A*x = 0. */ /* CNORM (input or output) DOUBLE PRECISION array, dimension (N) */ /* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */ /* contains the norm of the off-diagonal part of the j-th column */ /* of A. If TRANS = 'N', CNORM(j) must be greater than or equal */ /* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */ /* must be greater than or equal to the 1-norm. */ /* If NORMIN = 'N', CNORM is an output argument and CNORM(j) */ /* returns the 1-norm of the offdiagonal part of the j-th column */ /* of A. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* Further Details */ /* ======= ======= */ /* A rough bound on x is computed; if that is less than overflow, DTRSV */ /* is called, otherwise, specific code is used which checks for possible */ /* overflow or divide-by-zero at every operation. */ /* A columnwise scheme is used for solving A*x = b. The basic algorithm */ /* if A is lower triangular is */ /* x[1:n] := b[1:n] */ /* for j = 1, ..., n */ /* x(j) := x(j) / A(j,j) */ /* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */ /* end */ /* Define bounds on the components of x after j iterations of the loop: */ /* M(j) = bound on x[1:j] */ /* G(j) = bound on x[j+1:n] */ /* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */ /* Then for iteration j+1 we have */ /* M(j+1) <= G(j) / | A(j+1,j+1) | */ /* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */ /* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */ /* where CNORM(j+1) is greater than or equal to the infinity-norm of */ /* column j+1 of A, not counting the diagonal. Hence */ /* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */ /* 1<=i<=j */ /* and */ /* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */ /* 1<=i< j */ /* Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the */ /* reciprocal of the largest M(j), j=1,..,n, is larger than */ /* max(underflow, 1/overflow). */ /* The bound on x(j) is also used to determine when a step in the */ /* columnwise method can be performed without fear of overflow. If */ /* the computed bound is greater than a large constant, x is scaled to */ /* prevent overflow, but if the bound overflows, x is set to 0, x(j) to */ /* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */ /* Similarly, a row-wise scheme is used to solve A'*x = b. The basic */ /* algorithm for A upper triangular is */ /* for j = 1, ..., n */ /* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */ /* end */ /* We simultaneously compute two bounds */ /* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */ /* M(j) = bound on x(i), 1<=i<=j */ /* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */ /* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */ /* Then the bound on x(j) is */ /* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */ /* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */ /* 1<=i<=j */ /* and we can safely call DTRSV if 1/M(n) and 1/G(n) are both greater */ /* than max(underflow, 1/overflow). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; --cnorm; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); notran = _starpu_lsame_(trans, "N"); nounit = _starpu_lsame_(diag, "N"); /* Test the input parameters. */ if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (! _starpu_lsame_(normin, "Y") && ! _starpu_lsame_(normin, "N")) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLATRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine machine dependent parameters to control overflow. */ smlnum = _starpu_dlamch_("Safe minimum") / _starpu_dlamch_("Precision"); bignum = 1. / smlnum; *scale = 1.; if (_starpu_lsame_(normin, "N")) { /* Compute the 1-norm of each column, not including the diagonal. */ if (upper) { /* A is upper triangular. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; cnorm[j] = _starpu_dasum_(&i__2, &a[j * a_dim1 + 1], &c__1); /* L10: */ } } else { /* A is lower triangular. */ i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = *n - j; cnorm[j] = _starpu_dasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); /* L20: */ } cnorm[*n] = 0.; } } /* Scale the column norms by TSCAL if the maximum element in CNORM is */ /* greater than BIGNUM. */ imax = _starpu_idamax_(n, &cnorm[1], &c__1); tmax = cnorm[imax]; if (tmax <= bignum) { tscal = 1.; } else { tscal = 1. / (smlnum * tmax); _starpu_dscal_(n, &tscal, &cnorm[1], &c__1); } /* Compute a bound on the computed solution vector to see if the */ /* Level 2 BLAS routine DTRSV can be used. */ j = _starpu_idamax_(n, &x[1], &c__1); xmax = (d__1 = x[j], abs(d__1)); xbnd = xmax; if (notran) { /* Compute the growth in A * x = b. */ if (upper) { jfirst = *n; jlast = 1; jinc = -1; } else { jfirst = 1; jlast = *n; jinc = 1; } if (tscal != 1.) { grow = 0.; goto L50; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, G(0) = max{x(i), i=1,...,n}. */ grow = 1. / max(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* M(j) = G(j-1) / abs(A(j,j)) */ tjj = (d__1 = a[j + j * a_dim1], abs(d__1)); /* Computing MIN */ d__1 = xbnd, d__2 = min(1.,tjj) * grow; xbnd = min(d__1,d__2); if (tjj + cnorm[j] >= smlnum) { /* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ grow *= tjj / (tjj + cnorm[j]); } else { /* G(j) could overflow, set GROW to 0. */ grow = 0.; } /* L30: */ } grow = xbnd; } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ /* Computing MIN */ d__1 = 1., d__2 = 1. / max(xbnd,smlnum); grow = min(d__1,d__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* G(j) = G(j-1)*( 1 + CNORM(j) ) */ grow *= 1. / (cnorm[j] + 1.); /* L40: */ } } L50: ; } else { /* Compute the growth in A' * x = b. */ if (upper) { jfirst = 1; jlast = *n; jinc = 1; } else { jfirst = *n; jlast = 1; jinc = -1; } if (tscal != 1.) { grow = 0.; goto L80; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, M(0) = max{x(i), i=1,...,n}. */ grow = 1. / max(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ xj = cnorm[j] + 1.; /* Computing MIN */ d__1 = grow, d__2 = xbnd / xj; grow = min(d__1,d__2); /* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ tjj = (d__1 = a[j + j * a_dim1], abs(d__1)); if (xj > tjj) { xbnd *= tjj / xj; } /* L60: */ } grow = min(grow,xbnd); } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ /* Computing MIN */ d__1 = 1., d__2 = 1. / max(xbnd,smlnum); grow = min(d__1,d__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = ( 1 + CNORM(j) )*G(j-1) */ xj = cnorm[j] + 1.; grow /= xj; /* L70: */ } } L80: ; } if (grow * tscal > smlnum) { /* Use the Level 2 BLAS solve if the reciprocal of the bound on */ /* elements of X is not too small. */ _starpu_dtrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1); } else { /* Use a Level 1 BLAS solve, scaling intermediate results. */ if (xmax > bignum) { /* Scale X so that its components are less than or equal to */ /* BIGNUM in absolute value. */ *scale = bignum / xmax; _starpu_dscal_(n, scale, &x[1], &c__1); xmax = bignum; } if (notran) { /* Solve A * x = b */ i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ xj = (d__1 = x[j], abs(d__1)); if (nounit) { tjjs = a[j + j * a_dim1] * tscal; } else { tjjs = tscal; if (tscal == 1.) { goto L100; } } tjj = abs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.) { if (xj > tjj * bignum) { /* Scale x by 1/b(j). */ rec = 1. / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; xj = (d__1 = x[j], abs(d__1)); } else if (tjj > 0.) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ /* to avoid overflow when dividing by A(j,j). */ rec = tjj * bignum / xj; if (cnorm[j] > 1.) { /* Scale by 1/CNORM(j) to avoid overflow when */ /* multiplying x(j) times column j. */ rec /= cnorm[j]; } _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; xj = (d__1 = x[j], abs(d__1)); } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.; /* L90: */ } x[j] = 1.; xj = 1.; *scale = 0.; xmax = 0.; } L100: /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j of A. */ if (xj > 1.) { rec = 1. / xj; if (cnorm[j] > (bignum - xmax) * rec) { /* Scale x by 1/(2*abs(x(j))). */ rec *= .5; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } else if (xj * cnorm[j] > bignum - xmax) { /* Scale x by 1/2. */ _starpu_dscal_(n, &c_b36, &x[1], &c__1); *scale *= .5; } if (upper) { if (j > 1) { /* Compute the update */ /* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */ i__3 = j - 1; d__1 = -x[j] * tscal; _starpu_daxpy_(&i__3, &d__1, &a[j * a_dim1 + 1], &c__1, &x[1], &c__1); i__3 = j - 1; i__ = _starpu_idamax_(&i__3, &x[1], &c__1); xmax = (d__1 = x[i__], abs(d__1)); } } else { if (j < *n) { /* Compute the update */ /* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */ i__3 = *n - j; d__1 = -x[j] * tscal; _starpu_daxpy_(&i__3, &d__1, &a[j + 1 + j * a_dim1], &c__1, & x[j + 1], &c__1); i__3 = *n - j; i__ = j + _starpu_idamax_(&i__3, &x[j + 1], &c__1); xmax = (d__1 = x[i__], abs(d__1)); } } /* L110: */ } } else { /* Solve A' * x = b */ i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). */ /* k<>j */ xj = (d__1 = x[j], abs(d__1)); uscal = tscal; rec = 1. / max(xmax,1.); if (cnorm[j] > (bignum - xj) * rec) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= .5; if (nounit) { tjjs = a[j + j * a_dim1] * tscal; } else { tjjs = tscal; } tjj = abs(tjjs); if (tjj > 1.) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. */ /* Computing MIN */ d__1 = 1., d__2 = rec * tjj; rec = min(d__1,d__2); uscal /= tjjs; } if (rec < 1.) { _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } sumj = 0.; if (uscal == 1.) { /* If the scaling needed for A in the dot product is 1, */ /* call DDOT to perform the dot product. */ if (upper) { i__3 = j - 1; sumj = _starpu_ddot_(&i__3, &a[j * a_dim1 + 1], &c__1, &x[1], &c__1); } else if (j < *n) { i__3 = *n - j; sumj = _starpu_ddot_(&i__3, &a[j + 1 + j * a_dim1], &c__1, &x[ j + 1], &c__1); } } else { /* Otherwise, use in-line code for the dot product. */ if (upper) { i__3 = j - 1; for (i__ = 1; i__ <= i__3; ++i__) { sumj += a[i__ + j * a_dim1] * uscal * x[i__]; /* L120: */ } } else if (j < *n) { i__3 = *n; for (i__ = j + 1; i__ <= i__3; ++i__) { sumj += a[i__ + j * a_dim1] * uscal * x[i__]; /* L130: */ } } } if (uscal == tscal) { /* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */ /* was not used to scale the dotproduct. */ x[j] -= sumj; xj = (d__1 = x[j], abs(d__1)); if (nounit) { tjjs = a[j + j * a_dim1] * tscal; } else { tjjs = tscal; if (tscal == 1.) { goto L150; } } /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjj = abs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.) { if (xj > tjj * bignum) { /* Scale X by 1/abs(x(j)). */ rec = 1. / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; } else if (tjj > 0.) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = tjj * bignum / xj; _starpu_dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A'*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.; /* L140: */ } x[j] = 1.; *scale = 0.; xmax = 0.; } L150: ; } else { /* Compute x(j) := x(j) / A(j,j) - sumj if the dot */ /* product has already been divided by 1/A(j,j). */ x[j] = x[j] / tjjs - sumj; } /* Computing MAX */ d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1)); xmax = max(d__2,d__3); /* L160: */ } } *scale /= tscal; } /* Scale the column norms by 1/TSCAL for return. */ if (tscal != 1.) { d__1 = 1. / tscal; _starpu_dscal_(n, &d__1, &cnorm[1], &c__1); } return 0; /* End of DLATRS */ } /* _starpu_dlatrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlatrz.c000066400000000000000000000117401507764646700206770ustar00rootroot00000000000000/* dlatrz.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dlatrz_(integer *m, integer *n, integer *l, doublereal * a, integer *lda, doublereal *tau, doublereal *work) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__; extern /* Subroutine */ int _starpu_dlarz_(char *, integer *, integer *, integer * , doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dlarfp_(integer *, doublereal *, doublereal *, integer *, doublereal *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLATRZ factors the M-by-(M+L) real upper trapezoidal matrix */ /* [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z, by means */ /* of orthogonal transformations. Z is an (M+L)-by-(M+L) orthogonal */ /* matrix and, R and A1 are M-by-M upper triangular matrices. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* L (input) INTEGER */ /* The number of columns of the matrix A containing the */ /* meaningful part of the Householder vectors. N-M >= L >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the leading M-by-N upper trapezoidal part of the */ /* array A must contain the matrix to be factorized. */ /* On exit, the leading M-by-M upper triangular part of A */ /* contains the upper triangular matrix R, and elements N-L+1 to */ /* N of the first M rows of A, with the array TAU, represent the */ /* orthogonal matrix Z as a product of M elementary reflectors. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (M) */ /* The scalar factors of the elementary reflectors. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* The factorization is obtained by Householder's method. The kth */ /* transformation matrix, Z( k ), which is used to introduce zeros into */ /* the ( m - k + 1 )th row of A, is given in the form */ /* Z( k ) = ( I 0 ), */ /* ( 0 T( k ) ) */ /* where */ /* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), */ /* ( 0 ) */ /* ( z( k ) ) */ /* tau is a scalar and z( k ) is an l element vector. tau and z( k ) */ /* are chosen to annihilate the elements of the kth row of A2. */ /* The scalar tau is returned in the kth element of TAU and the vector */ /* u( k ) in the kth row of A2, such that the elements of z( k ) are */ /* in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in */ /* the upper triangular part of A1. */ /* Z is given by */ /* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Quick return if possible */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ if (*m == 0) { return 0; } else if (*m == *n) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tau[i__] = 0.; /* L10: */ } return 0; } for (i__ = *m; i__ >= 1; --i__) { /* Generate elementary reflector H(i) to annihilate */ /* [ A(i,i) A(i,n-l+1:n) ] */ i__1 = *l + 1; _starpu_dlarfp_(&i__1, &a[i__ + i__ * a_dim1], &a[i__ + (*n - *l + 1) * a_dim1], lda, &tau[i__]); /* Apply H(i) to A(1:i-1,i:n) from the right */ i__1 = i__ - 1; i__2 = *n - i__ + 1; _starpu_dlarz_("Right", &i__1, &i__2, l, &a[i__ + (*n - *l + 1) * a_dim1], lda, &tau[i__], &a[i__ * a_dim1 + 1], lda, &work[1]); /* L20: */ } return 0; /* End of DLATRZ */ } /* _starpu_dlatrz_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlatzm.c000066400000000000000000000134721507764646700206760ustar00rootroot00000000000000/* dlatzm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b5 = 1.; /* Subroutine */ int _starpu_dlatzm_(char *side, integer *m, integer *n, doublereal * v, integer *incv, doublereal *tau, doublereal *c1, doublereal *c2, integer *ldc, doublereal *work) { /* System generated locals */ integer c1_dim1, c1_offset, c2_dim1, c2_offset, i__1; doublereal d__1; /* Local variables */ extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *) ; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This routine is deprecated and has been replaced by routine DORMRZ. */ /* DLATZM applies a Householder matrix generated by DTZRQF to a matrix. */ /* Let P = I - tau*u*u', u = ( 1 ), */ /* ( v ) */ /* where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if */ /* SIDE = 'R'. */ /* If SIDE equals 'L', let */ /* C = [ C1 ] 1 */ /* [ C2 ] m-1 */ /* n */ /* Then C is overwritten by P*C. */ /* If SIDE equals 'R', let */ /* C = [ C1, C2 ] m */ /* 1 n-1 */ /* Then C is overwritten by C*P. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': form P * C */ /* = 'R': form C * P */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* V (input) DOUBLE PRECISION array, dimension */ /* (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ /* (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ /* The vector v in the representation of P. V is not used */ /* if TAU = 0. */ /* INCV (input) INTEGER */ /* The increment between elements of v. INCV <> 0 */ /* TAU (input) DOUBLE PRECISION */ /* The value tau in the representation of P. */ /* C1 (input/output) DOUBLE PRECISION array, dimension */ /* (LDC,N) if SIDE = 'L' */ /* (M,1) if SIDE = 'R' */ /* On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 */ /* if SIDE = 'R'. */ /* On exit, the first row of P*C if SIDE = 'L', or the first */ /* column of C*P if SIDE = 'R'. */ /* C2 (input/output) DOUBLE PRECISION array, dimension */ /* (LDC, N) if SIDE = 'L' */ /* (LDC, N-1) if SIDE = 'R' */ /* On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the */ /* m x (n - 1) matrix C2 if SIDE = 'R'. */ /* On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P */ /* if SIDE = 'R'. */ /* LDC (input) INTEGER */ /* The leading dimension of the arrays C1 and C2. LDC >= (1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L' */ /* (M) if SIDE = 'R' */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --v; c2_dim1 = *ldc; c2_offset = 1 + c2_dim1; c2 -= c2_offset; c1_dim1 = *ldc; c1_offset = 1 + c1_dim1; c1 -= c1_offset; --work; /* Function Body */ if (min(*m,*n) == 0 || *tau == 0.) { return 0; } if (_starpu_lsame_(side, "L")) { /* w := C1 + v' * C2 */ _starpu_dcopy_(n, &c1[c1_offset], ldc, &work[1], &c__1); i__1 = *m - 1; _starpu_dgemv_("Transpose", &i__1, n, &c_b5, &c2[c2_offset], ldc, &v[1], incv, &c_b5, &work[1], &c__1); /* [ C1 ] := [ C1 ] - tau* [ 1 ] * w' */ /* [ C2 ] [ C2 ] [ v ] */ d__1 = -(*tau); _starpu_daxpy_(n, &d__1, &work[1], &c__1, &c1[c1_offset], ldc); i__1 = *m - 1; d__1 = -(*tau); _starpu_dger_(&i__1, n, &d__1, &v[1], incv, &work[1], &c__1, &c2[c2_offset], ldc); } else if (_starpu_lsame_(side, "R")) { /* w := C1 + C2 * v */ _starpu_dcopy_(m, &c1[c1_offset], &c__1, &work[1], &c__1); i__1 = *n - 1; _starpu_dgemv_("No transpose", m, &i__1, &c_b5, &c2[c2_offset], ldc, &v[1], incv, &c_b5, &work[1], &c__1); /* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] */ d__1 = -(*tau); _starpu_daxpy_(m, &d__1, &work[1], &c__1, &c1[c1_offset], &c__1); i__1 = *n - 1; d__1 = -(*tau); _starpu_dger_(m, &i__1, &d__1, &work[1], &c__1, &v[1], incv, &c2[c2_offset], ldc); } return 0; /* End of DLATZM */ } /* _starpu_dlatzm_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlauu2.c000066400000000000000000000122261507764646700205730ustar00rootroot00000000000000/* dlauu2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b7 = 1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dlauu2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__; doublereal aii; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAUU2 computes the product U * U' or L' * L, where the triangular */ /* factor U or L is stored in the upper or lower triangular part of */ /* the array A. */ /* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */ /* overwriting the factor U in A. */ /* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */ /* overwriting the factor L in A. */ /* This is the unblocked form of the algorithm, calling Level 2 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the triangular factor stored in the array A */ /* is upper or lower triangular: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the triangular factor U or L. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the triangular factor U or L. */ /* On exit, if UPLO = 'U', the upper triangle of A is */ /* overwritten with the upper triangle of the product U * U'; */ /* if UPLO = 'L', the lower triangle of A is overwritten with */ /* the lower triangle of the product L' * L. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAUU2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (upper) { /* Compute the product U * U'. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { aii = a[i__ + i__ * a_dim1]; if (i__ < *n) { i__2 = *n - i__ + 1; a[i__ + i__ * a_dim1] = _starpu_ddot_(&i__2, &a[i__ + i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda); i__2 = i__ - 1; i__3 = *n - i__; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b7, &a[(i__ + 1) * a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & aii, &a[i__ * a_dim1 + 1], &c__1); } else { _starpu_dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); } /* L10: */ } } else { /* Compute the product L' * L. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { aii = a[i__ + i__ * a_dim1]; if (i__ < *n) { i__2 = *n - i__ + 1; a[i__ + i__ * a_dim1] = _starpu_ddot_(&i__2, &a[i__ + i__ * a_dim1], & c__1, &a[i__ + i__ * a_dim1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b7, &a[i__ + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[i__ + a_dim1], lda); } else { _starpu_dscal_(&i__, &aii, &a[i__ + a_dim1], lda); } /* L20: */ } } return 0; /* End of DLAUU2 */ } /* _starpu_dlauu2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dlauum.c000066400000000000000000000147741507764646700207000ustar00rootroot00000000000000/* dlauum.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b15 = 1.; /* Subroutine */ int _starpu_dlauum_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, ib, nb; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlauu2_(char *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAUUM computes the product U * U' or L' * L, where the triangular */ /* factor U or L is stored in the upper or lower triangular part of */ /* the array A. */ /* If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */ /* overwriting the factor U in A. */ /* If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */ /* overwriting the factor L in A. */ /* This is the blocked form of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the triangular factor stored in the array A */ /* is upper or lower triangular: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the triangular factor U or L. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the triangular factor U or L. */ /* On exit, if UPLO = 'U', the upper triangle of A is */ /* overwritten with the upper triangle of the product U * U'; */ /* if UPLO = 'L', the lower triangle of A is overwritten with */ /* the lower triangle of the product L' * L. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DLAUUM", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = _starpu_ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ _starpu_dlauu2_(uplo, n, &a[a_offset], lda, info); } else { /* Use blocked code */ if (upper) { /* Compute the product U * U'. */ i__1 = *n; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = nb, i__4 = *n - i__ + 1; ib = min(i__3,i__4); i__3 = i__ - 1; _starpu_dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, &c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + 1], lda) ; _starpu_dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); if (i__ + ib <= *n) { i__3 = i__ - 1; i__4 = *n - i__ - ib + 1; _starpu_dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, & c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ * a_dim1 + 1], lda); i__3 = *n - i__ - ib + 1; _starpu_dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[ i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1], lda); } /* L10: */ } } else { /* Compute the product L' * L. */ i__2 = *n; i__1 = nb; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { /* Computing MIN */ i__3 = nb, i__4 = *n - i__ + 1; ib = min(i__3,i__4); i__3 = i__ - 1; _starpu_dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, & c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], lda); _starpu_dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); if (i__ + ib <= *n) { i__3 = i__ - 1; i__4 = *n - i__ - ib + 1; _starpu_dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, & c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda); i__3 = *n - i__ - ib + 1; _starpu_dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ + ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1], lda); } /* L20: */ } } } return 0; /* End of DLAUUM */ } /* _starpu_dlauum_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dopgtr.c000066400000000000000000000125761507764646700207060ustar00rootroot00000000000000/* dopgtr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dopgtr_(char *uplo, integer *n, doublereal *ap, doublereal *tau, doublereal *q, integer *ldq, doublereal *work, integer *info) { /* System generated locals */ integer q_dim1, q_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, ij; extern logical _starpu_lsame_(char *, char *); integer iinfo; logical upper; extern /* Subroutine */ int _starpu_dorg2l_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dorg2r_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DOPGTR generates a real orthogonal matrix Q which is defined as the */ /* product of n-1 elementary reflectors H(i) of order n, as returned by */ /* DSPTRD using packed storage: */ /* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), */ /* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangular packed storage used in previous */ /* call to DSPTRD; */ /* = 'L': Lower triangular packed storage used in previous */ /* call to DSPTRD. */ /* N (input) INTEGER */ /* The order of the matrix Q. N >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The vectors which define the elementary reflectors, as */ /* returned by DSPTRD. */ /* TAU (input) DOUBLE PRECISION array, dimension (N-1) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DSPTRD. */ /* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* The N-by-N orthogonal matrix Q. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N-1) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ --ap; --tau; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --work; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldq < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DOPGTR", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (upper) { /* Q was determined by a call to DSPTRD with UPLO = 'U' */ /* Unpack the vectors which define the elementary reflectors and */ /* set the last row and column of Q equal to those of the unit */ /* matrix */ ij = 2; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { q[i__ + j * q_dim1] = ap[ij]; ++ij; /* L10: */ } ij += 2; q[*n + j * q_dim1] = 0.; /* L20: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { q[i__ + *n * q_dim1] = 0.; /* L30: */ } q[*n + *n * q_dim1] = 1.; /* Generate Q(1:n-1,1:n-1) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; _starpu_dorg2l_(&i__1, &i__2, &i__3, &q[q_offset], ldq, &tau[1], &work[1], & iinfo); } else { /* Q was determined by a call to DSPTRD with UPLO = 'L'. */ /* Unpack the vectors which define the elementary reflectors and */ /* set the first row and column of Q equal to those of the unit */ /* matrix */ q[q_dim1 + 1] = 1.; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { q[i__ + q_dim1] = 0.; /* L40: */ } ij = 3; i__1 = *n; for (j = 2; j <= i__1; ++j) { q[j * q_dim1 + 1] = 0.; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { q[i__ + j * q_dim1] = ap[ij]; ++ij; /* L50: */ } ij += 2; /* L60: */ } if (*n > 1) { /* Generate Q(2:n,2:n) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; _starpu_dorg2r_(&i__1, &i__2, &i__3, &q[(q_dim1 << 1) + 2], ldq, &tau[1], &work[1], &iinfo); } } return 0; /* End of DOPGTR */ } /* _starpu_dopgtr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dopmtr.c000066400000000000000000000162231507764646700207050ustar00rootroot00000000000000/* dopmtr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dopmtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublereal *ap, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer c_dim1, c_offset, i__1, i__2; /* Local variables */ integer i__, i1, i2, i3, ic, jc, ii, mi, ni, nq; doublereal aii; logical left; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); extern logical _starpu_lsame_(char *, char *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran, forwrd; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DOPMTR overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix of order nq, with nq = m if */ /* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of */ /* nq-1 elementary reflectors, as returned by DSPTRD using packed */ /* storage: */ /* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); */ /* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangular packed storage used in previous */ /* call to DSPTRD; */ /* = 'L': Lower triangular packed storage used in previous */ /* call to DSPTRD. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension */ /* (M*(M+1)/2) if SIDE = 'L' */ /* (N*(N+1)/2) if SIDE = 'R' */ /* The vectors which define the elementary reflectors, as */ /* returned by DSPTRD. AP is modified by the routine but */ /* restored on exit. */ /* TAU (input) DOUBLE PRECISION array, dimension (M-1) if SIDE = 'L' */ /* or (N-1) if SIDE = 'R' */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DSPTRD. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L' */ /* (M) if SIDE = 'R' */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ --ap; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); upper = _starpu_lsame_(uplo, "U"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*ldc < max(1,*m)) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DOPMTR", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } if (upper) { /* Q was determined by a call to DSPTRD with UPLO = 'U' */ forwrd = left && notran || ! left && ! notran; if (forwrd) { i1 = 1; i2 = nq - 1; i3 = 1; ii = 2; } else { i1 = nq - 1; i2 = 1; i3 = -1; ii = nq * (nq + 1) / 2 - 1; } if (left) { ni = *n; } else { mi = *m; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { if (left) { /* H(i) is applied to C(1:i,1:n) */ mi = i__; } else { /* H(i) is applied to C(1:m,1:i) */ ni = i__; } /* Apply H(i) */ aii = ap[ii]; ap[ii] = 1.; _starpu_dlarf_(side, &mi, &ni, &ap[ii - i__ + 1], &c__1, &tau[i__], &c__[ c_offset], ldc, &work[1]); ap[ii] = aii; if (forwrd) { ii = ii + i__ + 2; } else { ii = ii - i__ - 1; } /* L10: */ } } else { /* Q was determined by a call to DSPTRD with UPLO = 'L'. */ forwrd = left && ! notran || ! left && notran; if (forwrd) { i1 = 1; i2 = nq - 1; i3 = 1; ii = 2; } else { i1 = nq - 1; i2 = 1; i3 = -1; ii = nq * (nq + 1) / 2 - 1; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } i__2 = i2; i__1 = i3; for (i__ = i1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { aii = ap[ii]; ap[ii] = 1.; if (left) { /* H(i) is applied to C(i+1:m,1:n) */ mi = *m - i__; ic = i__ + 1; } else { /* H(i) is applied to C(1:m,i+1:n) */ ni = *n - i__; jc = i__ + 1; } /* Apply H(i) */ _starpu_dlarf_(side, &mi, &ni, &ap[ii], &c__1, &tau[i__], &c__[ic + jc * c_dim1], ldc, &work[1]); ap[ii] = aii; if (forwrd) { ii = ii + nq - i__ + 1; } else { ii = ii - nq + i__ - 2; } /* L20: */ } } return 0; /* End of DOPMTR */ } /* _starpu_dopmtr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorg2l.c000066400000000000000000000111351507764646700205660ustar00rootroot00000000000000/* dorg2l.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dorg2l_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__, j, l, ii; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORG2L generates an m by n real matrix Q with orthonormal columns, */ /* which is defined as the last n columns of a product of k elementary */ /* reflectors of order m */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGEQLF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix Q. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix Q. M >= N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines the */ /* matrix Q. N >= K >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the (n-k+i)-th column must contain the vector which */ /* defines the elementary reflector H(i), for i = 1,2,...,k, as */ /* returned by DGEQLF in the last k columns of its array */ /* argument A. */ /* On exit, the m by n matrix Q. */ /* LDA (input) INTEGER */ /* The first dimension of the array A. LDA >= max(1,M). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQLF. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument has an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0 || *n > *m) { *info = -2; } else if (*k < 0 || *k > *n) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORG2L", &i__1); return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } /* Initialise columns 1:n-k to columns of the unit matrix */ i__1 = *n - *k; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (l = 1; l <= i__2; ++l) { a[l + j * a_dim1] = 0.; /* L10: */ } a[*m - *n + j + j * a_dim1] = 1.; /* L20: */ } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { ii = *n - *k + i__; /* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left */ a[*m - *n + ii + ii * a_dim1] = 1.; i__2 = *m - *n + ii; i__3 = ii - 1; _starpu_dlarf_("Left", &i__2, &i__3, &a[ii * a_dim1 + 1], &c__1, &tau[i__], & a[a_offset], lda, &work[1]); i__2 = *m - *n + ii - 1; d__1 = -tau[i__]; _starpu_dscal_(&i__2, &d__1, &a[ii * a_dim1 + 1], &c__1); a[*m - *n + ii + ii * a_dim1] = 1. - tau[i__]; /* Set A(m-k+i+1:m,n-k+i) to zero */ i__2 = *m; for (l = *m - *n + ii + 1; l <= i__2; ++l) { a[l + ii * a_dim1] = 0.; /* L30: */ } /* L40: */ } return 0; /* End of DORG2L */ } /* _starpu_dorg2l_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorg2r.c000066400000000000000000000111221507764646700205700ustar00rootroot00000000000000/* dorg2r.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dorg2r_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Local variables */ integer i__, j, l; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORG2R generates an m by n real matrix Q with orthonormal columns, */ /* which is defined as the first n columns of a product of k elementary */ /* reflectors of order m */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGEQRF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix Q. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix Q. M >= N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines the */ /* matrix Q. N >= K >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the i-th column must contain the vector which */ /* defines the elementary reflector H(i), for i = 1,2,...,k, as */ /* returned by DGEQRF in the first k columns of its array */ /* argument A. */ /* On exit, the m-by-n matrix Q. */ /* LDA (input) INTEGER */ /* The first dimension of the array A. LDA >= max(1,M). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQRF. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument has an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0 || *n > *m) { *info = -2; } else if (*k < 0 || *k > *n) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORG2R", &i__1); return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } /* Initialise columns k+1:n to columns of the unit matrix */ i__1 = *n; for (j = *k + 1; j <= i__1; ++j) { i__2 = *m; for (l = 1; l <= i__2; ++l) { a[l + j * a_dim1] = 0.; /* L10: */ } a[j + j * a_dim1] = 1.; /* L20: */ } for (i__ = *k; i__ >= 1; --i__) { /* Apply H(i) to A(i:m,i:n) from the left */ if (i__ < *n) { a[i__ + i__ * a_dim1] = 1.; i__1 = *m - i__ + 1; i__2 = *n - i__; _starpu_dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); } if (i__ < *m) { i__1 = *m - i__; d__1 = -tau[i__]; _starpu_dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1); } a[i__ + i__ * a_dim1] = 1. - tau[i__]; /* Set A(1:i-1,i) to zero */ i__1 = i__ - 1; for (l = 1; l <= i__1; ++l) { a[l + i__ * a_dim1] = 0.; /* L30: */ } /* L40: */ } return 0; /* End of DORG2R */ } /* _starpu_dorg2r_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorgbr.c000066400000000000000000000212331507764646700206540ustar00rootroot00000000000000/* dorgbr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dorgbr_(char *vect, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, nb, mn; extern logical _starpu_lsame_(char *, char *); integer iinfo; logical wantq; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dorglq_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORGBR generates one of the real orthogonal matrices Q or P**T */ /* determined by DGEBRD when reducing a real matrix A to bidiagonal */ /* form: A = Q * B * P**T. Q and P**T are defined as products of */ /* elementary reflectors H(i) or G(i) respectively. */ /* If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q */ /* is of order M: */ /* if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n */ /* columns of Q, where m >= n >= k; */ /* if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an */ /* M-by-M matrix. */ /* If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T */ /* is of order N: */ /* if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m */ /* rows of P**T, where n >= m >= k; */ /* if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as */ /* an N-by-N matrix. */ /* Arguments */ /* ========= */ /* VECT (input) CHARACTER*1 */ /* Specifies whether the matrix Q or the matrix P**T is */ /* required, as defined in the transformation applied by DGEBRD: */ /* = 'Q': generate Q; */ /* = 'P': generate P**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix Q or P**T to be returned. */ /* M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix Q or P**T to be returned. */ /* N >= 0. */ /* If VECT = 'Q', M >= N >= min(M,K); */ /* if VECT = 'P', N >= M >= min(N,K). */ /* K (input) INTEGER */ /* If VECT = 'Q', the number of columns in the original M-by-K */ /* matrix reduced by DGEBRD. */ /* If VECT = 'P', the number of rows in the original K-by-N */ /* matrix reduced by DGEBRD. */ /* K >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the vectors which define the elementary reflectors, */ /* as returned by DGEBRD. */ /* On exit, the M-by-N matrix Q or P**T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (input) DOUBLE PRECISION array, dimension */ /* (min(M,K)) if VECT = 'Q' */ /* (min(N,K)) if VECT = 'P' */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i) or G(i), which determines Q or P**T, as */ /* returned by DGEBRD in its array argument TAUQ or TAUP. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,min(M,N)). */ /* For optimum performance LWORK >= min(M,N)*NB, where NB */ /* is the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; wantq = _starpu_lsame_(vect, "Q"); mn = min(*m,*n); lquery = *lwork == -1; if (! wantq && ! _starpu_lsame_(vect, "P")) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( *m > *n || *m < min(*n,*k))) { *info = -3; } else if (*k < 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else if (*lwork < max(1,mn) && ! lquery) { *info = -9; } if (*info == 0) { if (wantq) { nb = _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1); } else { nb = _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1); } lwkopt = max(1,mn) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORGBR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { work[1] = 1.; return 0; } if (wantq) { /* Form Q, determined by a call to DGEBRD to reduce an m-by-k */ /* matrix */ if (*m >= *k) { /* If m >= k, assume m >= n >= k */ _starpu_dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & iinfo); } else { /* If m < k, assume m = n */ /* Shift the vectors which define the elementary reflectors one */ /* column to the right, and set the first row and column of Q */ /* to those of the unit matrix */ for (j = *m; j >= 2; --j) { a[j * a_dim1 + 1] = 0.; i__1 = *m; for (i__ = j + 1; i__ <= i__1; ++i__) { a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; /* L10: */ } /* L20: */ } a[a_dim1 + 1] = 1.; i__1 = *m; for (i__ = 2; i__ <= i__1; ++i__) { a[i__ + a_dim1] = 0.; /* L30: */ } if (*m > 1) { /* Form Q(2:m,2:m) */ i__1 = *m - 1; i__2 = *m - 1; i__3 = *m - 1; _starpu_dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ 1], &work[1], lwork, &iinfo); } } } else { /* Form P', determined by a call to DGEBRD to reduce a k-by-n */ /* matrix */ if (*k < *n) { /* If k < n, assume k <= m <= n */ _starpu_dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & iinfo); } else { /* If k >= n, assume m = n */ /* Shift the vectors which define the elementary reflectors one */ /* row downward, and set the first row and column of P' to */ /* those of the unit matrix */ a[a_dim1 + 1] = 1.; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { a[i__ + a_dim1] = 0.; /* L40: */ } i__1 = *n; for (j = 2; j <= i__1; ++j) { for (i__ = j - 1; i__ >= 2; --i__) { a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1]; /* L50: */ } a[j * a_dim1 + 1] = 0.; /* L60: */ } if (*n > 1) { /* Form P'(2:n,2:n) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; _starpu_dorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ 1], &work[1], lwork, &iinfo); } } } work[1] = (doublereal) lwkopt; return 0; /* End of DORGBR */ } /* _starpu_dorgbr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorghr.c000066400000000000000000000136361507764646700206720ustar00rootroot00000000000000/* dorghr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dorghr_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, nb, nh, iinfo; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORGHR generates a real orthogonal matrix Q which is defined as the */ /* product of IHI-ILO elementary reflectors of order N, as returned by */ /* DGEHRD: */ /* Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix Q. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* ILO and IHI must have the same values as in the previous call */ /* of DGEHRD. Q is equal to the unit matrix except in the */ /* submatrix Q(ilo+1:ihi,ilo+1:ihi). */ /* 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the vectors which define the elementary reflectors, */ /* as returned by DGEHRD. */ /* On exit, the N-by-N orthogonal matrix Q. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* TAU (input) DOUBLE PRECISION array, dimension (N-1) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEHRD. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= IHI-ILO. */ /* For optimum performance LWORK >= (IHI-ILO)*NB, where NB is */ /* the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; nh = *ihi - *ilo; lquery = *lwork == -1; if (*n < 0) { *info = -1; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -2; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*lwork < max(1,nh) && ! lquery) { *info = -8; } if (*info == 0) { nb = _starpu_ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1); lwkopt = max(1,nh) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORGHR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { work[1] = 1.; return 0; } /* Shift the vectors which define the elementary reflectors one */ /* column to the right, and set the first ilo and the last n-ihi */ /* rows and columns to those of the unit matrix */ i__1 = *ilo + 1; for (j = *ihi; j >= i__1; --j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L10: */ } i__2 = *ihi; for (i__ = j + 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; /* L20: */ } i__2 = *n; for (i__ = *ihi + 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L30: */ } /* L40: */ } i__1 = *ilo; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L50: */ } a[j + j * a_dim1] = 1.; /* L60: */ } i__1 = *n; for (j = *ihi + 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L70: */ } a[j + j * a_dim1] = 1.; /* L80: */ } if (nh > 0) { /* Generate Q(ilo+1:ihi,ilo+1:ihi) */ _starpu_dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* ilo], &work[1], lwork, &iinfo); } work[1] = (doublereal) lwkopt; return 0; /* End of DORGHR */ } /* _starpu_dorghr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorgl2.c000066400000000000000000000110421507764646700205630ustar00rootroot00000000000000/* dorgl2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dorgl2_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Local variables */ integer i__, j, l; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORGL2 generates an m by n real matrix Q with orthonormal rows, */ /* which is defined as the first m rows of a product of k elementary */ /* reflectors of order n */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGELQF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix Q. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix Q. N >= M. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines the */ /* matrix Q. M >= K >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the i-th row must contain the vector which defines */ /* the elementary reflector H(i), for i = 1,2,...,k, as returned */ /* by DGELQF in the first k rows of its array argument A. */ /* On exit, the m-by-n matrix Q. */ /* LDA (input) INTEGER */ /* The first dimension of the array A. LDA >= max(1,M). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGELQF. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument has an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*k < 0 || *k > *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORGL2", &i__1); return 0; } /* Quick return if possible */ if (*m <= 0) { return 0; } if (*k < *m) { /* Initialise rows k+1:m to rows of the unit matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (l = *k + 1; l <= i__2; ++l) { a[l + j * a_dim1] = 0.; /* L10: */ } if (j > *k && j <= *m) { a[j + j * a_dim1] = 1.; } /* L20: */ } } for (i__ = *k; i__ >= 1; --i__) { /* Apply H(i) to A(i:m,i:n) from the right */ if (i__ < *n) { if (i__ < *m) { a[i__ + i__ * a_dim1] = 1.; i__1 = *m - i__; i__2 = *n - i__ + 1; _starpu_dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); } i__1 = *n - i__; d__1 = -tau[i__]; _starpu_dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda); } a[i__ + i__ * a_dim1] = 1. - tau[i__]; /* Set A(i,1:i-1) to zero */ i__1 = i__ - 1; for (l = 1; l <= i__1; ++l) { a[i__ + l * a_dim1] = 0.; /* L30: */ } /* L40: */ } return 0; /* End of DORGL2 */ } /* _starpu_dorgl2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorglq.c000066400000000000000000000173671507764646700207020ustar00rootroot00000000000000/* dorglq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dorglq_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dorgl2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORGLQ generates an M-by-N real matrix Q with orthonormal rows, */ /* which is defined as the first M rows of a product of K elementary */ /* reflectors of order N */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGELQF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix Q. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix Q. N >= M. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines the */ /* matrix Q. M >= K >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the i-th row must contain the vector which defines */ /* the elementary reflector H(i), for i = 1,2,...,k, as returned */ /* by DGELQF in the first k rows of its array argument A. */ /* On exit, the M-by-N matrix Q. */ /* LDA (input) INTEGER */ /* The first dimension of the array A. LDA >= max(1,M). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGELQF. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,M). */ /* For optimum performance LWORK >= M*NB, where NB is */ /* the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument has an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; nb = _starpu_ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1); lwkopt = max(1,*m) * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*k < 0 || *k > *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*lwork < max(1,*m) && ! lquery) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORGLQ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m <= 0) { work[1] = 1.; return 0; } nbmin = 2; nx = 0; iws = *m; if (nb > 1 && nb < *k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1); nx = max(i__1,i__2); if (nx < *k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *m; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < *k && nx < *k) { /* Use blocked code after the last block. */ /* The first kk rows are handled by the block method. */ ki = (*k - nx - 1) / nb * nb; /* Computing MIN */ i__1 = *k, i__2 = ki + nb; kk = min(i__1,i__2); /* Set A(kk+1:m,1:kk) to zero. */ i__1 = kk; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = kk + 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L10: */ } /* L20: */ } } else { kk = 0; } /* Use unblocked code for the last or only block. */ if (kk < *m) { i__1 = *m - kk; i__2 = *n - kk; i__3 = *k - kk; _starpu_dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & tau[kk + 1], &work[1], &iinfo); } if (kk > 0) { /* Use blocked code */ i__1 = -nb; for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { /* Computing MIN */ i__2 = nb, i__3 = *k - i__ + 1; ib = min(i__2,i__3); if (i__ + ib <= *m) { /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__2 = *n - i__ + 1; _starpu_dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &ldwork); /* Apply H' to A(i+ib:m,i:n) from the right */ i__2 = *m - i__ - ib + 1; i__3 = *n - i__ + 1; _starpu_dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, & i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + 1], &ldwork); } /* Apply H' to columns i:n of current block */ i__2 = *n - i__ + 1; _starpu_dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & work[1], &iinfo); /* Set columns 1:i-1 of current block to zero */ i__2 = i__ - 1; for (j = 1; j <= i__2; ++j) { i__3 = i__ + ib - 1; for (l = i__; l <= i__3; ++l) { a[l + j * a_dim1] = 0.; /* L30: */ } /* L40: */ } /* L50: */ } } work[1] = (doublereal) iws; return 0; /* End of DORGLQ */ } /* _starpu_dorglq_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorgql.c000066400000000000000000000177051507764646700206760ustar00rootroot00000000000000/* dorgql.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dorgql_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j, l, ib, nb, kk, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dorg2l_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORGQL generates an M-by-N real matrix Q with orthonormal columns, */ /* which is defined as the last N columns of a product of K elementary */ /* reflectors of order M */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGEQLF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix Q. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix Q. M >= N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines the */ /* matrix Q. N >= K >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the (n-k+i)-th column must contain the vector which */ /* defines the elementary reflector H(i), for i = 1,2,...,k, as */ /* returned by DGEQLF in the last k columns of its array */ /* argument A. */ /* On exit, the M-by-N matrix Q. */ /* LDA (input) INTEGER */ /* The first dimension of the array A. LDA >= max(1,M). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQLF. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N). */ /* For optimum performance LWORK >= N*NB, where NB is the */ /* optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument has an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0 || *n > *m) { *info = -2; } else if (*k < 0 || *k > *n) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } if (*info == 0) { if (*n == 0) { lwkopt = 1; } else { nb = _starpu_ilaenv_(&c__1, "DORGQL", " ", m, n, k, &c_n1); lwkopt = *n * nb; } work[1] = (doublereal) lwkopt; if (*lwork < max(1,*n) && ! lquery) { *info = -8; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORGQL", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } nbmin = 2; nx = 0; iws = *n; if (nb > 1 && nb < *k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DORGQL", " ", m, n, k, &c_n1); nx = max(i__1,i__2); if (nx < *k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *n; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORGQL", " ", m, n, k, &c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < *k && nx < *k) { /* Use blocked code after the first block. */ /* The last kk columns are handled by the block method. */ /* Computing MIN */ i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; kk = min(i__1,i__2); /* Set A(m-kk+1:m,1:n-kk) to zero. */ i__1 = *n - kk; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = *m - kk + 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L10: */ } /* L20: */ } } else { kk = 0; } /* Use unblocked code for the first or only block. */ i__1 = *m - kk; i__2 = *n - kk; i__3 = *k - kk; _starpu_dorg2l_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo) ; if (kk > 0) { /* Use blocked code */ i__1 = *k; i__2 = nb; for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = nb, i__4 = *k - i__ + 1; ib = min(i__3,i__4); if (*n - *k + i__ > 1) { /* Form the triangular factor of the block reflector */ /* H = H(i+ib-1) . . . H(i+1) H(i) */ i__3 = *m - *k + i__ + ib - 1; _starpu_dlarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - *k + i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork); /* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ i__3 = *m - *k + i__ + ib - 1; i__4 = *n - *k + i__ - 1; _starpu_dlarfb_("Left", "No transpose", "Backward", "Columnwise", & i__3, &i__4, &ib, &a[(*n - *k + i__) * a_dim1 + 1], lda, &work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork); } /* Apply H to rows 1:m-k+i+ib-1 of current block */ i__3 = *m - *k + i__ + ib - 1; _starpu_dorg2l_(&i__3, &ib, &ib, &a[(*n - *k + i__) * a_dim1 + 1], lda, & tau[i__], &work[1], &iinfo); /* Set rows m-k+i+ib:m of current block to zero */ i__3 = *n - *k + i__ + ib - 1; for (j = *n - *k + i__; j <= i__3; ++j) { i__4 = *m; for (l = *m - *k + i__ + ib; l <= i__4; ++l) { a[l + j * a_dim1] = 0.; /* L30: */ } /* L40: */ } /* L50: */ } } work[1] = (doublereal) iws; return 0; /* End of DORGQL */ } /* _starpu_dorgql_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorgqr.c000066400000000000000000000174451507764646700207050ustar00rootroot00000000000000/* dorgqr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dorgqr_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dorg2r_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORGQR generates an M-by-N real matrix Q with orthonormal columns, */ /* which is defined as the first N columns of a product of K elementary */ /* reflectors of order M */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGEQRF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix Q. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix Q. M >= N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines the */ /* matrix Q. N >= K >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the i-th column must contain the vector which */ /* defines the elementary reflector H(i), for i = 1,2,...,k, as */ /* returned by DGEQRF in the first k columns of its array */ /* argument A. */ /* On exit, the M-by-N matrix Q. */ /* LDA (input) INTEGER */ /* The first dimension of the array A. LDA >= max(1,M). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQRF. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N). */ /* For optimum performance LWORK >= N*NB, where NB is the */ /* optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument has an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; nb = _starpu_ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1); lwkopt = max(1,*n) * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0 || *n > *m) { *info = -2; } else if (*k < 0 || *k > *n) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*lwork < max(1,*n) && ! lquery) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORGQR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n <= 0) { work[1] = 1.; return 0; } nbmin = 2; nx = 0; iws = *n; if (nb > 1 && nb < *k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1); nx = max(i__1,i__2); if (nx < *k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *n; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < *k && nx < *k) { /* Use blocked code after the last block. */ /* The first kk columns are handled by the block method. */ ki = (*k - nx - 1) / nb * nb; /* Computing MIN */ i__1 = *k, i__2 = ki + nb; kk = min(i__1,i__2); /* Set A(1:kk,kk+1:n) to zero. */ i__1 = *n; for (j = kk + 1; j <= i__1; ++j) { i__2 = kk; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L10: */ } /* L20: */ } } else { kk = 0; } /* Use unblocked code for the last or only block. */ if (kk < *n) { i__1 = *m - kk; i__2 = *n - kk; i__3 = *k - kk; _starpu_dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & tau[kk + 1], &work[1], &iinfo); } if (kk > 0) { /* Use blocked code */ i__1 = -nb; for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { /* Computing MIN */ i__2 = nb, i__3 = *k - i__ + 1; ib = min(i__2,i__3); if (i__ + ib <= *n) { /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__2 = *m - i__ + 1; _starpu_dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1], &ldwork); /* Apply H to A(i:m,i+ib:n) from the left */ i__2 = *m - i__ + 1; i__3 = *n - i__ - ib + 1; _starpu_dlarfb_("Left", "No transpose", "Forward", "Columnwise", & i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & work[ib + 1], &ldwork); } /* Apply H to rows i:m of current block */ i__2 = *m - i__ + 1; _starpu_dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & work[1], &iinfo); /* Set rows 1:i-1 of current block to zero */ i__2 = i__ + ib - 1; for (j = i__; j <= i__2; ++j) { i__3 = i__ - 1; for (l = 1; l <= i__3; ++l) { a[l + j * a_dim1] = 0.; /* L30: */ } /* L40: */ } /* L50: */ } } work[1] = (doublereal) iws; return 0; /* End of DORGQR */ } /* _starpu_dorgqr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorgr2.c000066400000000000000000000111111507764646700205660ustar00rootroot00000000000000/* dorgr2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dorgr2_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__, j, l, ii; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORGR2 generates an m by n real matrix Q with orthonormal rows, */ /* which is defined as the last m rows of a product of k elementary */ /* reflectors of order n */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGERQF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix Q. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix Q. N >= M. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines the */ /* matrix Q. M >= K >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the (m-k+i)-th row must contain the vector which */ /* defines the elementary reflector H(i), for i = 1,2,...,k, as */ /* returned by DGERQF in the last k rows of its array argument */ /* A. */ /* On exit, the m by n matrix Q. */ /* LDA (input) INTEGER */ /* The first dimension of the array A. LDA >= max(1,M). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGERQF. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (M) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument has an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*k < 0 || *k > *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORGR2", &i__1); return 0; } /* Quick return if possible */ if (*m <= 0) { return 0; } if (*k < *m) { /* Initialise rows 1:m-k to rows of the unit matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m - *k; for (l = 1; l <= i__2; ++l) { a[l + j * a_dim1] = 0.; /* L10: */ } if (j > *n - *m && j <= *n - *k) { a[*m - *n + j + j * a_dim1] = 1.; } /* L20: */ } } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { ii = *m - *k + i__; /* Apply H(i) to A(1:m-k+i,1:n-k+i) from the right */ a[ii + (*n - *m + ii) * a_dim1] = 1.; i__2 = ii - 1; i__3 = *n - *m + ii; _starpu_dlarf_("Right", &i__2, &i__3, &a[ii + a_dim1], lda, &tau[i__], &a[ a_offset], lda, &work[1]); i__2 = *n - *m + ii - 1; d__1 = -tau[i__]; _starpu_dscal_(&i__2, &d__1, &a[ii + a_dim1], lda); a[ii + (*n - *m + ii) * a_dim1] = 1. - tau[i__]; /* Set A(m-k+i,n-k+i+1:n) to zero */ i__2 = *n; for (l = *n - *m + ii + 1; l <= i__2; ++l) { a[ii + l * a_dim1] = 0.; /* L30: */ } /* L40: */ } return 0; /* End of DORGR2 */ } /* _starpu_dorgr2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorgrq.c000066400000000000000000000175341507764646700207040ustar00rootroot00000000000000/* dorgrq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dorgrq_(integer *m, integer *n, integer *k, doublereal * a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j, l, ib, nb, ii, kk, nx, iws, nbmin, iinfo; extern /* Subroutine */ int _starpu_dorgr2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORGRQ generates an M-by-N real matrix Q with orthonormal rows, */ /* which is defined as the last M rows of a product of K elementary */ /* reflectors of order N */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGERQF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix Q. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix Q. N >= M. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines the */ /* matrix Q. M >= K >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the (m-k+i)-th row must contain the vector which */ /* defines the elementary reflector H(i), for i = 1,2,...,k, as */ /* returned by DGERQF in the last k rows of its array argument */ /* A. */ /* On exit, the M-by-N matrix Q. */ /* LDA (input) INTEGER */ /* The first dimension of the array A. LDA >= max(1,M). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGERQF. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,M). */ /* For optimum performance LWORK >= M*NB, where NB is the */ /* optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument has an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*k < 0 || *k > *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } if (*info == 0) { if (*m <= 0) { lwkopt = 1; } else { nb = _starpu_ilaenv_(&c__1, "DORGRQ", " ", m, n, k, &c_n1); lwkopt = *m * nb; } work[1] = (doublereal) lwkopt; if (*lwork < max(1,*m) && ! lquery) { *info = -8; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORGRQ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m <= 0) { return 0; } nbmin = 2; nx = 0; iws = *m; if (nb > 1 && nb < *k) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DORGRQ", " ", m, n, k, &c_n1); nx = max(i__1,i__2); if (nx < *k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *m; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORGRQ", " ", m, n, k, &c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < *k && nx < *k) { /* Use blocked code after the first block. */ /* The last kk rows are handled by the block method. */ /* Computing MIN */ i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; kk = min(i__1,i__2); /* Set A(1:m-kk,n-kk+1:n) to zero. */ i__1 = *n; for (j = *n - kk + 1; j <= i__1; ++j) { i__2 = *m - kk; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = 0.; /* L10: */ } /* L20: */ } } else { kk = 0; } /* Use unblocked code for the first or only block. */ i__1 = *m - kk; i__2 = *n - kk; i__3 = *k - kk; _starpu_dorgr2_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo) ; if (kk > 0) { /* Use blocked code */ i__1 = *k; i__2 = nb; for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = nb, i__4 = *k - i__ + 1; ib = min(i__3,i__4); ii = *m - *k + i__; if (ii > 1) { /* Form the triangular factor of the block reflector */ /* H = H(i+ib-1) . . . H(i+1) H(i) */ i__3 = *n - *k + i__ + ib - 1; _starpu_dlarft_("Backward", "Rowwise", &i__3, &ib, &a[ii + a_dim1], lda, &tau[i__], &work[1], &ldwork); /* Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */ i__3 = ii - 1; i__4 = *n - *k + i__ + ib - 1; _starpu_dlarfb_("Right", "Transpose", "Backward", "Rowwise", &i__3, & i__4, &ib, &a[ii + a_dim1], lda, &work[1], &ldwork, & a[a_offset], lda, &work[ib + 1], &ldwork); } /* Apply H' to columns 1:n-k+i+ib-1 of current block */ i__3 = *n - *k + i__ + ib - 1; _starpu_dorgr2_(&ib, &i__3, &ib, &a[ii + a_dim1], lda, &tau[i__], &work[1] , &iinfo); /* Set columns n-k+i+ib:n of current block to zero */ i__3 = *n; for (l = *n - *k + i__ + ib; l <= i__3; ++l) { i__4 = ii + ib - 1; for (j = ii; j <= i__4; ++j) { a[j + l * a_dim1] = 0.; /* L30: */ } /* L40: */ } /* L50: */ } } work[1] = (doublereal) iws; return 0; /* End of DORGRQ */ } /* _starpu_dorgrq_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorgtr.c000066400000000000000000000155501507764646700207030ustar00rootroot00000000000000/* dorgtr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dorgtr_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, nb; extern logical _starpu_lsame_(char *, char *); integer iinfo; logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dorgql_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORGTR generates a real orthogonal matrix Q which is defined as the */ /* product of n-1 elementary reflectors of order N, as returned by */ /* DSYTRD: */ /* if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), */ /* if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A contains elementary reflectors */ /* from DSYTRD; */ /* = 'L': Lower triangle of A contains elementary reflectors */ /* from DSYTRD. */ /* N (input) INTEGER */ /* The order of the matrix Q. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the vectors which define the elementary reflectors, */ /* as returned by DSYTRD. */ /* On exit, the N-by-N orthogonal matrix Q. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* TAU (input) DOUBLE PRECISION array, dimension (N-1) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DSYTRD. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N-1). */ /* For optimum performance LWORK >= (N-1)*NB, where NB is */ /* the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *n - 1; if (*lwork < max(i__1,i__2) && ! lquery) { *info = -7; } } if (*info == 0) { if (upper) { i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; nb = _starpu_ilaenv_(&c__1, "DORGQL", " ", &i__1, &i__2, &i__3, &c_n1); } else { i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; nb = _starpu_ilaenv_(&c__1, "DORGQR", " ", &i__1, &i__2, &i__3, &c_n1); } /* Computing MAX */ i__1 = 1, i__2 = *n - 1; lwkopt = max(i__1,i__2) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORGTR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { work[1] = 1.; return 0; } if (upper) { /* Q was determined by a call to DSYTRD with UPLO = 'U' */ /* Shift the vectors which define the elementary reflectors one */ /* column to the left, and set the last row and column of Q to */ /* those of the unit matrix */ i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = a[i__ + (j + 1) * a_dim1]; /* L10: */ } a[*n + j * a_dim1] = 0.; /* L20: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { a[i__ + *n * a_dim1] = 0.; /* L30: */ } a[*n + *n * a_dim1] = 1.; /* Generate Q(1:n-1,1:n-1) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; _starpu_dorgql_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], lwork, &iinfo); } else { /* Q was determined by a call to DSYTRD with UPLO = 'L'. */ /* Shift the vectors which define the elementary reflectors one */ /* column to the right, and set the first row and column of Q to */ /* those of the unit matrix */ for (j = *n; j >= 2; --j) { a[j * a_dim1 + 1] = 0.; i__1 = *n; for (i__ = j + 1; i__ <= i__1; ++i__) { a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; /* L40: */ } /* L50: */ } a[a_dim1 + 1] = 1.; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { a[i__ + a_dim1] = 0.; /* L60: */ } if (*n > 1) { /* Generate Q(2:n,2:n) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; _starpu_dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[1], &work[1], lwork, &iinfo); } } work[1] = (doublereal) lwkopt; return 0; /* End of DORGTR */ } /* _starpu_dorgtr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorm2l.c000066400000000000000000000141151507764646700205750ustar00rootroot00000000000000/* dorm2l.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dorm2l_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ integer i__, i1, i2, i3, mi, ni, nq; doublereal aii; logical left; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORM2L overwrites the general real m by n matrix C with */ /* Q * C if SIDE = 'L' and TRANS = 'N', or */ /* Q'* C if SIDE = 'L' and TRANS = 'T', or */ /* C * Q if SIDE = 'R' and TRANS = 'N', or */ /* C * Q' if SIDE = 'R' and TRANS = 'T', */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q' from the Left */ /* = 'R': apply Q or Q' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply Q (No transpose) */ /* = 'T': apply Q' (Transpose) */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ /* The i-th column must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGEQLF in the last k columns of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* If SIDE = 'L', LDA >= max(1,M); */ /* if SIDE = 'R', LDA >= max(1,N). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQLF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L', */ /* (M) if SIDE = 'R' */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,nq)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORM2L", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { return 0; } if (left && notran || ! left && ! notran) { i1 = 1; i2 = *k; i3 = 1; } else { i1 = *k; i2 = 1; i3 = -1; } if (left) { ni = *n; } else { mi = *m; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { if (left) { /* H(i) is applied to C(1:m-k+i,1:n) */ mi = *m - *k + i__; } else { /* H(i) is applied to C(1:m,1:n-k+i) */ ni = *n - *k + i__; } /* Apply H(i) */ aii = a[nq - *k + i__ + i__ * a_dim1]; a[nq - *k + i__ + i__ * a_dim1] = 1.; _starpu_dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[ c_offset], ldc, &work[1]); a[nq - *k + i__ + i__ * a_dim1] = aii; /* L10: */ } return 0; /* End of DORM2L */ } /* _starpu_dorm2l_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorm2r.c000066400000000000000000000141501507764646700206020ustar00rootroot00000000000000/* dorm2r.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dorm2r_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ integer i__, i1, i2, i3, ic, jc, mi, ni, nq; doublereal aii; logical left; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORM2R overwrites the general real m by n matrix C with */ /* Q * C if SIDE = 'L' and TRANS = 'N', or */ /* Q'* C if SIDE = 'L' and TRANS = 'T', or */ /* C * Q if SIDE = 'R' and TRANS = 'N', or */ /* C * Q' if SIDE = 'R' and TRANS = 'T', */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q' from the Left */ /* = 'R': apply Q or Q' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply Q (No transpose) */ /* = 'T': apply Q' (Transpose) */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ /* The i-th column must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGEQRF in the first k columns of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* If SIDE = 'L', LDA >= max(1,M); */ /* if SIDE = 'R', LDA >= max(1,N). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQRF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L', */ /* (M) if SIDE = 'R' */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,nq)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORM2R", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { return 0; } if (left && ! notran || ! left && notran) { i1 = 1; i2 = *k; i3 = 1; } else { i1 = *k; i2 = 1; i3 = -1; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { if (left) { /* H(i) is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H(i) is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H(i) */ aii = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; _starpu_dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ ic + jc * c_dim1], ldc, &work[1]); a[i__ + i__ * a_dim1] = aii; /* L10: */ } return 0; /* End of DORM2R */ } /* _starpu_dorm2r_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormbr.c000066400000000000000000000250541507764646700206670ustar00rootroot00000000000000/* dormbr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; /* Subroutine */ int _starpu_dormbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i1, i2, nb, mi, ni, nq, nw; logical left; extern logical _starpu_lsame_(char *, char *); integer iinfo; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); logical notran; extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); logical applyq; char transt[1]; integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C */ /* with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C */ /* with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': P * C C * P */ /* TRANS = 'T': P**T * C C * P**T */ /* Here Q and P**T are the orthogonal matrices determined by DGEBRD when */ /* reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and */ /* P**T are defined as products of elementary reflectors H(i) and G(i) */ /* respectively. */ /* Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the */ /* order of the orthogonal matrix Q or P**T that is applied. */ /* If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: */ /* if nq >= k, Q = H(1) H(2) . . . H(k); */ /* if nq < k, Q = H(1) H(2) . . . H(nq-1). */ /* If VECT = 'P', A is assumed to have been a K-by-NQ matrix: */ /* if k < nq, P = G(1) G(2) . . . G(k); */ /* if k >= nq, P = G(1) G(2) . . . G(nq-1). */ /* Arguments */ /* ========= */ /* VECT (input) CHARACTER*1 */ /* = 'Q': apply Q or Q**T; */ /* = 'P': apply P or P**T. */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q, Q**T, P or P**T from the Left; */ /* = 'R': apply Q, Q**T, P or P**T from the Right. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q or P; */ /* = 'T': Transpose, apply Q**T or P**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* If VECT = 'Q', the number of columns in the original */ /* matrix reduced by DGEBRD. */ /* If VECT = 'P', the number of rows in the original */ /* matrix reduced by DGEBRD. */ /* K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,min(nq,K)) if VECT = 'Q' */ /* (LDA,nq) if VECT = 'P' */ /* The vectors which define the elementary reflectors H(i) and */ /* G(i), whose products determine the matrices Q and P, as */ /* returned by DGEBRD. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* If VECT = 'Q', LDA >= max(1,nq); */ /* if VECT = 'P', LDA >= max(1,min(nq,K)). */ /* TAU (input) DOUBLE PRECISION array, dimension (min(nq,K)) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i) or G(i) which determines Q or P, as returned */ /* by DGEBRD in the array argument TAUQ or TAUP. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q */ /* or P*C or P**T*C or C*P or C*P**T. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; applyq = _starpu_lsame_(vect, "Q"); left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q or P and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! applyq && ! _starpu_lsame_(vect, "P")) { *info = -1; } else if (! left && ! _starpu_lsame_(side, "R")) { *info = -2; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*k < 0) { *info = -6; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = min(nq,*k); if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -11; } else if (*lwork < max(1,nw) && ! lquery) { *info = -13; } } if (*info == 0) { if (applyq) { if (left) { /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = *m - 1; i__2 = *m - 1; nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1); } else { /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = *n - 1; i__2 = *n - 1; nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1); } } else { if (left) { /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = *m - 1; i__2 = *m - 1; nb = _starpu_ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1); } else { /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = *n - 1; i__2 = *n - 1; nb = _starpu_ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1); } } lwkopt = max(1,nw) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMBR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ work[1] = 1.; if (*m == 0 || *n == 0) { return 0; } if (applyq) { /* Apply Q */ if (nq >= *k) { /* Q was determined by a call to DGEBRD with nq >= k */ _starpu_dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], lwork, &iinfo); } else if (nq > 1) { /* Q was determined by a call to DGEBRD with nq < k */ if (left) { mi = *m - 1; ni = *n; i1 = 2; i2 = 1; } else { mi = *m; ni = *n - 1; i1 = 1; i2 = 2; } i__1 = nq - 1; _starpu_dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); } } else { /* Apply P */ if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } if (nq > *k) { /* P was determined by a call to DGEBRD with nq > k */ _starpu_dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], lwork, &iinfo); } else if (nq > 1) { /* P was determined by a call to DGEBRD with nq <= k */ if (left) { mi = *m - 1; ni = *n; i1 = 2; i2 = 1; } else { mi = *m; ni = *n - 1; i1 = 1; i2 = 2; } i__1 = nq - 1; _starpu_dormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & iinfo); } } work[1] = (doublereal) lwkopt; return 0; /* End of DORMBR */ } /* _starpu_dormbr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormhr.c000066400000000000000000000173011507764646700206710ustar00rootroot00000000000000/* dormhr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; /* Subroutine */ int _starpu_dormhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal * tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i1, i2, nb, mi, nh, ni, nq, nw; logical left; extern logical _starpu_lsame_(char *, char *); integer iinfo; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMHR overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix of order nq, with nq = m if */ /* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of */ /* IHI-ILO elementary reflectors, as returned by DGEHRD: */ /* Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* ILO and IHI must have the same values as in the previous call */ /* of DGEHRD. Q is equal to the unit matrix except in the */ /* submatrix Q(ilo+1:ihi,ilo+1:ihi). */ /* If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and */ /* ILO = 1 and IHI = 0, if M = 0; */ /* if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and */ /* ILO = 1 and IHI = 0, if N = 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L' */ /* (LDA,N) if SIDE = 'R' */ /* The vectors which define the elementary reflectors, as */ /* returned by DGEHRD. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. */ /* TAU (input) DOUBLE PRECISION array, dimension */ /* (M-1) if SIDE = 'L' */ /* (N-1) if SIDE = 'R' */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEHRD. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; nh = *ihi - *ilo; left = _starpu_lsame_(side, "L"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*ilo < 1 || *ilo > max(1,nq)) { *info = -5; } else if (*ihi < min(*ilo,nq) || *ihi > nq) { *info = -6; } else if (*lda < max(1,nq)) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -11; } else if (*lwork < max(1,nw) && ! lquery) { *info = -13; } if (*info == 0) { if (left) { /* Writing concatenation */ i__1[0] = 1, a__1[0] = side; i__1[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, &nh, n, &nh, &c_n1); } else { /* Writing concatenation */ i__1[0] = 1, a__1[0] = side; i__1[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, &nh, &nh, &c_n1); } lwkopt = max(1,nw) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__2 = -(*info); _starpu_xerbla_("DORMHR", &i__2); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || nh == 0) { work[1] = 1.; return 0; } if (left) { mi = nh; ni = *n; i1 = *ilo + 1; i2 = 1; } else { mi = *m; ni = nh; i1 = 1; i2 = *ilo + 1; } _starpu_dormqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, & tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); work[1] = (doublereal) lwkopt; return 0; /* End of DORMHR */ } /* _starpu_dormhr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dorml2.c000066400000000000000000000141061507764646700205750ustar00rootroot00000000000000/* dorml2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dorml2_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ integer i__, i1, i2, i3, ic, jc, mi, ni, nq; doublereal aii; logical left; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORML2 overwrites the general real m by n matrix C with */ /* Q * C if SIDE = 'L' and TRANS = 'N', or */ /* Q'* C if SIDE = 'L' and TRANS = 'T', or */ /* C * Q if SIDE = 'R' and TRANS = 'N', or */ /* C * Q' if SIDE = 'R' and TRANS = 'T', */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q' from the Left */ /* = 'R': apply Q or Q' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply Q (No transpose) */ /* = 'T': apply Q' (Transpose) */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L', */ /* (LDA,N) if SIDE = 'R' */ /* The i-th row must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGELQF in the first k rows of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,K). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGELQF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L', */ /* (M) if SIDE = 'R' */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,*k)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORML2", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { return 0; } if (left && notran || ! left && ! notran) { i1 = 1; i2 = *k; i3 = 1; } else { i1 = *k; i2 = 1; i3 = -1; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { if (left) { /* H(i) is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H(i) is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H(i) */ aii = a[i__ + i__ * a_dim1]; a[i__ + i__ * a_dim1] = 1.; _starpu_dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ ic + jc * c_dim1], ldc, &work[1]); a[i__ + i__ * a_dim1] = aii; /* L10: */ } return 0; /* End of DORML2 */ } /* _starpu_dorml2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormlq.c000066400000000000000000000225331507764646700206770ustar00rootroot00000000000000/* dormlq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static integer c__65 = 65; /* Subroutine */ int _starpu_dormlq_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, i__5; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i__; doublereal t[4160] /* was [65][64] */; integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; logical left; extern logical _starpu_lsame_(char *, char *); integer nbmin, iinfo; extern /* Subroutine */ int _starpu_dorml2_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); logical notran; integer ldwork; char transt[1]; integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMLQ overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L', */ /* (LDA,N) if SIDE = 'R' */ /* The i-th row must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGELQF in the first k rows of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,K). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGELQF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,*k)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*lwork < max(1,nw) && ! lquery) { *info = -12; } if (*info == 0) { /* Determine the block size. NB may be at most NBMAX, where NBMAX */ /* is used to define the local array T. */ /* Computing MIN */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1); nb = min(i__1,i__2); lwkopt = max(1,nw) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMLQ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { work[1] = 1.; return 0; } nbmin = 2; ldwork = nw; if (nb > 1 && nb < *k) { iws = nw * nb; if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1); nbmin = max(i__1,i__2); } } else { iws = nw; } if (nb < nbmin || nb >= *k) { /* Use unblocked code */ _starpu_dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], &iinfo); } else { /* Use blocked code */ if (left && notran || ! left && ! notran) { i1 = 1; i2 = *k; i3 = nb; } else { i1 = (*k - 1) / nb * nb + 1; i2 = 1; i3 = -nb; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__4 = nb, i__5 = *k - i__ + 1; ib = min(i__4,i__5); /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__4 = nq - i__ + 1; _starpu_dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], t, &c__65); if (left) { /* H or H' is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H or H' is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H or H' */ _starpu_dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], ldc, &work[1], &ldwork); /* L10: */ } } work[1] = (doublereal) lwkopt; return 0; /* End of DORMLQ */ } /* _starpu_dormlq_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormql.c000066400000000000000000000223371507764646700207010ustar00rootroot00000000000000/* dormql.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static integer c__65 = 65; /* Subroutine */ int _starpu_dormql_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, i__5; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i__; doublereal t[4160] /* was [65][64] */; integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; logical left; extern logical _starpu_lsame_(char *, char *); integer nbmin, iinfo; extern /* Subroutine */ int _starpu_dorm2l_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); logical notran; integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMQL overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(k) . . . H(2) H(1) */ /* as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ /* The i-th column must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGEQLF in the last k columns of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* If SIDE = 'L', LDA >= max(1,M); */ /* if SIDE = 'R', LDA >= max(1,N). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQLF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = max(1,*n); } else { nq = *n; nw = max(1,*m); } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,nq)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info == 0) { if (*m == 0 || *n == 0) { lwkopt = 1; } else { /* Determine the block size. NB may be at most NBMAX, where */ /* NBMAX is used to define the local array T. */ /* Computing MIN */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1); nb = min(i__1,i__2); lwkopt = nw * nb; } work[1] = (doublereal) lwkopt; if (*lwork < nw && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMQL", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } nbmin = 2; ldwork = nw; if (nb > 1 && nb < *k) { iws = nw * nb; if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1); nbmin = max(i__1,i__2); } } else { iws = nw; } if (nb < nbmin || nb >= *k) { /* Use unblocked code */ _starpu_dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], &iinfo); } else { /* Use blocked code */ if (left && notran || ! left && ! notran) { i1 = 1; i2 = *k; i3 = nb; } else { i1 = (*k - 1) / nb * nb + 1; i2 = 1; i3 = -nb; } if (left) { ni = *n; } else { mi = *m; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__4 = nb, i__5 = *k - i__ + 1; ib = min(i__4,i__5); /* Form the triangular factor of the block reflector */ /* H = H(i+ib-1) . . . H(i+1) H(i) */ i__4 = nq - *k + i__ + ib - 1; _starpu_dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] , lda, &tau[i__], t, &c__65); if (left) { /* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ mi = *m - *k + i__ + ib - 1; } else { /* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ ni = *n - *k + i__ + ib - 1; } /* Apply H or H' */ _starpu_dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & work[1], &ldwork); /* L10: */ } } work[1] = (doublereal) lwkopt; return 0; /* End of DORMQL */ } /* _starpu_dormql_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormqr.c000066400000000000000000000223061507764646700207030ustar00rootroot00000000000000/* dormqr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static integer c__65 = 65; /* Subroutine */ int _starpu_dormqr_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, i__5; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i__; doublereal t[4160] /* was [65][64] */; integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; logical left; extern logical _starpu_lsame_(char *, char *); integer nbmin, iinfo; extern /* Subroutine */ int _starpu_dorm2r_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); logical notran; integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMQR overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,K) */ /* The i-th column must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGEQRF in the first k columns of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* If SIDE = 'L', LDA >= max(1,M); */ /* if SIDE = 'R', LDA >= max(1,N). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGEQRF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,nq)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*lwork < max(1,nw) && ! lquery) { *info = -12; } if (*info == 0) { /* Determine the block size. NB may be at most NBMAX, where NBMAX */ /* is used to define the local array T. */ /* Computing MIN */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1); nb = min(i__1,i__2); lwkopt = max(1,nw) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMQR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { work[1] = 1.; return 0; } nbmin = 2; ldwork = nw; if (nb > 1 && nb < *k) { iws = nw * nb; if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1); nbmin = max(i__1,i__2); } } else { iws = nw; } if (nb < nbmin || nb >= *k) { /* Use unblocked code */ _starpu_dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], &iinfo); } else { /* Use blocked code */ if (left && ! notran || ! left && notran) { i1 = 1; i2 = *k; i3 = nb; } else { i1 = (*k - 1) / nb * nb + 1; i2 = 1; i3 = -nb; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__4 = nb, i__5 = *k - i__ + 1; ib = min(i__4,i__5); /* Form the triangular factor of the block reflector */ /* H = H(i) H(i+1) . . . H(i+ib-1) */ i__4 = nq - i__ + 1; _starpu_dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], t, &c__65) ; if (left) { /* H or H' is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H or H' is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H or H' */ _starpu_dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], ldc, &work[1], &ldwork); /* L10: */ } } work[1] = (doublereal) lwkopt; return 0; /* End of DORMQR */ } /* _starpu_dormqr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormr2.c000066400000000000000000000140551507764646700206060ustar00rootroot00000000000000/* dormr2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dormr2_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ integer i__, i1, i2, i3, mi, ni, nq; doublereal aii; logical left; extern /* Subroutine */ int _starpu_dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMR2 overwrites the general real m by n matrix C with */ /* Q * C if SIDE = 'L' and TRANS = 'N', or */ /* Q'* C if SIDE = 'L' and TRANS = 'T', or */ /* C * Q if SIDE = 'R' and TRANS = 'N', or */ /* C * Q' if SIDE = 'R' and TRANS = 'T', */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGERQF. Q is of order m if SIDE = 'L' and of order n */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q' from the Left */ /* = 'R': apply Q or Q' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply Q (No transpose) */ /* = 'T': apply Q' (Transpose) */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L', */ /* (LDA,N) if SIDE = 'R' */ /* The i-th row must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGERQF in the last k rows of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,K). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGERQF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m by n matrix C. */ /* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L', */ /* (M) if SIDE = 'R' */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,*k)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMR2", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { return 0; } if (left && ! notran || ! left && notran) { i1 = 1; i2 = *k; i3 = 1; } else { i1 = *k; i2 = 1; i3 = -1; } if (left) { ni = *n; } else { mi = *m; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { if (left) { /* H(i) is applied to C(1:m-k+i,1:n) */ mi = *m - *k + i__; } else { /* H(i) is applied to C(1:m,1:n-k+i) */ ni = *n - *k + i__; } /* Apply H(i) */ aii = a[i__ + (nq - *k + i__) * a_dim1]; a[i__ + (nq - *k + i__) * a_dim1] = 1.; _starpu_dlarf_(side, &mi, &ni, &a[i__ + a_dim1], lda, &tau[i__], &c__[ c_offset], ldc, &work[1]); a[i__ + (nq - *k + i__) * a_dim1] = aii; /* L10: */ } return 0; /* End of DORMR2 */ } /* _starpu_dormr2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormr3.c000066400000000000000000000146771507764646700206210ustar00rootroot00000000000000/* dormr3.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dormr3_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ integer i__, i1, i2, i3, ja, ic, jc, mi, ni, nq; logical left; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dlarz_(char *, integer *, integer *, integer * , doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); logical notran; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMR3 overwrites the general real m by n matrix C with */ /* Q * C if SIDE = 'L' and TRANS = 'N', or */ /* Q'* C if SIDE = 'L' and TRANS = 'T', or */ /* C * Q if SIDE = 'R' and TRANS = 'N', or */ /* C * Q' if SIDE = 'R' and TRANS = 'T', */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DTZRZF. Q is of order m if SIDE = 'L' and of order n */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q' from the Left */ /* = 'R': apply Q or Q' from the Right */ /* TRANS (input) CHARACTER*1 */ /* = 'N': apply Q (No transpose) */ /* = 'T': apply Q' (Transpose) */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* L (input) INTEGER */ /* The number of columns of the matrix A containing */ /* the meaningful part of the Householder reflectors. */ /* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L', */ /* (LDA,N) if SIDE = 'R' */ /* The i-th row must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DTZRZF in the last k rows of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,K). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DTZRZF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the m-by-n matrix C. */ /* On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (N) if SIDE = 'L', */ /* (M) if SIDE = 'R' */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*l < 0 || left && *l > *m || ! left && *l > *n) { *info = -6; } else if (*lda < max(1,*k)) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -11; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMR3", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { return 0; } if (left && ! notran || ! left && notran) { i1 = 1; i2 = *k; i3 = 1; } else { i1 = *k; i2 = 1; i3 = -1; } if (left) { ni = *n; ja = *m - *l + 1; jc = 1; } else { mi = *m; ja = *n - *l + 1; ic = 1; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { if (left) { /* H(i) or H(i)' is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H(i) or H(i)' is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H(i) or H(i)' */ _starpu_dlarz_(side, &mi, &ni, l, &a[i__ + ja * a_dim1], lda, &tau[i__], &c__[ ic + jc * c_dim1], ldc, &work[1]); /* L10: */ } return 0; /* End of DORMR3 */ } /* _starpu_dormr3_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormrq.c000066400000000000000000000225711507764646700207070ustar00rootroot00000000000000/* dormrq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static integer c__65 = 65; /* Subroutine */ int _starpu_dormrq_(char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, i__5; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i__; doublereal t[4160] /* was [65][64] */; integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; logical left; extern logical _starpu_lsame_(char *, char *); integer nbmin, iinfo; extern /* Subroutine */ int _starpu_dormr2_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); logical notran; integer ldwork; char transt[1]; integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMRQ overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L', */ /* (LDA,N) if SIDE = 'R' */ /* The i-th row must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DGERQF in the last k rows of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,K). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DGERQF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = max(1,*n); } else { nq = *n; nw = max(1,*m); } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,*k)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info == 0) { if (*m == 0 || *n == 0) { lwkopt = 1; } else { /* Determine the block size. NB may be at most NBMAX, where */ /* NBMAX is used to define the local array T. */ /* Computing MIN */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMRQ", ch__1, m, n, k, &c_n1); nb = min(i__1,i__2); lwkopt = nw * nb; } work[1] = (doublereal) lwkopt; if (*lwork < nw && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMRQ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } nbmin = 2; ldwork = nw; if (nb > 1 && nb < *k) { iws = nw * nb; if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMRQ", ch__1, m, n, k, &c_n1); nbmin = max(i__1,i__2); } } else { iws = nw; } if (nb < nbmin || nb >= *k) { /* Use unblocked code */ _starpu_dormr2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], &iinfo); } else { /* Use blocked code */ if (left && ! notran || ! left && notran) { i1 = 1; i2 = *k; i3 = nb; } else { i1 = (*k - 1) / nb * nb + 1; i2 = 1; i3 = -nb; } if (left) { ni = *n; } else { mi = *m; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__4 = nb, i__5 = *k - i__ + 1; ib = min(i__4,i__5); /* Form the triangular factor of the block reflector */ /* H = H(i+ib-1) . . . H(i+1) H(i) */ i__4 = nq - *k + i__ + ib - 1; _starpu_dlarft_("Backward", "Rowwise", &i__4, &ib, &a[i__ + a_dim1], lda, &tau[i__], t, &c__65); if (left) { /* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ mi = *m - *k + i__ + ib - 1; } else { /* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ ni = *n - *k + i__ + ib - 1; } /* Apply H or H' */ _starpu_dlarfb_(side, transt, "Backward", "Rowwise", &mi, &ni, &ib, &a[ i__ + a_dim1], lda, t, &c__65, &c__[c_offset], ldc, &work[ 1], &ldwork); /* L10: */ } } work[1] = (doublereal) lwkopt; return 0; /* End of DORMRQ */ } /* _starpu_dormrq_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormrz.c000066400000000000000000000237501507764646700207200ustar00rootroot00000000000000/* dormrz.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static integer c__65 = 65; /* Subroutine */ int _starpu_dormrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, i__5; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i__; doublereal t[4160] /* was [65][64] */; integer i1, i2, i3, ib, ic, ja, jc, nb, mi, ni, nq, nw, iws; logical left; extern logical _starpu_lsame_(char *, char *); integer nbmin, iinfo; extern /* Subroutine */ int _starpu_dormr3_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dlarzb_(char *, char *, char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarzt_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical notran; integer ldwork; char transt[1]; integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* January 2007 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMRZ overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix defined as the product of k */ /* elementary reflectors */ /* Q = H(1) H(2) . . . H(k) */ /* as returned by DTZRZF. Q is of order M if SIDE = 'L' and of order N */ /* if SIDE = 'R'. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* K (input) INTEGER */ /* The number of elementary reflectors whose product defines */ /* the matrix Q. */ /* If SIDE = 'L', M >= K >= 0; */ /* if SIDE = 'R', N >= K >= 0. */ /* L (input) INTEGER */ /* The number of columns of the matrix A containing */ /* the meaningful part of the Householder reflectors. */ /* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L', */ /* (LDA,N) if SIDE = 'R' */ /* The i-th row must contain the vector which defines the */ /* elementary reflector H(i), for i = 1,2,...,k, as returned by */ /* DTZRZF in the last k rows of its array argument A. */ /* A is modified by the routine but restored on exit. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,K). */ /* TAU (input) DOUBLE PRECISION array, dimension (K) */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DTZRZF. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); notran = _starpu_lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = max(1,*n); } else { nq = *n; nw = max(1,*m); } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*l < 0 || left && *l > *m || ! left && *l > *n) { *info = -6; } else if (*lda < max(1,*k)) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -11; } if (*info == 0) { if (*m == 0 || *n == 0) { lwkopt = 1; } else { /* Determine the block size. NB may be at most NBMAX, where */ /* NBMAX is used to define the local array T. */ /* Computing MIN */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 64, i__2 = _starpu_ilaenv_(&c__1, "DORMRQ", ch__1, m, n, k, &c_n1); nb = min(i__1,i__2); lwkopt = nw * nb; } work[1] = (doublereal) lwkopt; if (*lwork < max(1,nw) && ! lquery) { *info = -13; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DORMRZ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { work[1] = 1.; return 0; } nbmin = 2; ldwork = nw; if (nb > 1 && nb < *k) { iws = nw * nb; if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX */ /* Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DORMRQ", ch__1, m, n, k, &c_n1); nbmin = max(i__1,i__2); } } else { iws = nw; } if (nb < nbmin || nb >= *k) { /* Use unblocked code */ _starpu_dormr3_(side, trans, m, n, k, l, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], &iinfo); } else { /* Use blocked code */ if (left && ! notran || ! left && notran) { i1 = 1; i2 = *k; i3 = nb; } else { i1 = (*k - 1) / nb * nb + 1; i2 = 1; i3 = -nb; } if (left) { ni = *n; jc = 1; ja = *m - *l + 1; } else { mi = *m; ic = 1; ja = *n - *l + 1; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__4 = nb, i__5 = *k - i__ + 1; ib = min(i__4,i__5); /* Form the triangular factor of the block reflector */ /* H = H(i+ib-1) . . . H(i+1) H(i) */ _starpu_dlarzt_("Backward", "Rowwise", l, &ib, &a[i__ + ja * a_dim1], lda, &tau[i__], t, &c__65); if (left) { /* H or H' is applied to C(i:m,1:n) */ mi = *m - i__ + 1; ic = i__; } else { /* H or H' is applied to C(1:m,i:n) */ ni = *n - i__ + 1; jc = i__; } /* Apply H or H' */ _starpu_dlarzb_(side, transt, "Backward", "Rowwise", &mi, &ni, &ib, l, &a[ i__ + ja * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1] , ldc, &work[1], &ldwork); /* L10: */ } } work[1] = (doublereal) lwkopt; return 0; /* End of DORMRZ */ } /* _starpu_dormrz_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dormtr.c000066400000000000000000000211461507764646700207070ustar00rootroot00000000000000/* dormtr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; /* Subroutine */ int _starpu_dormtr_(char *side, char *uplo, char *trans, integer *m, integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer i1, i2, nb, mi, ni, nq, nw; logical left; extern logical _starpu_lsame_(char *, char *); integer iinfo; logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dormql_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DORMTR overwrites the general real M-by-N matrix C with */ /* SIDE = 'L' SIDE = 'R' */ /* TRANS = 'N': Q * C C * Q */ /* TRANS = 'T': Q**T * C C * Q**T */ /* where Q is a real orthogonal matrix of order nq, with nq = m if */ /* SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of */ /* nq-1 elementary reflectors, as returned by DSYTRD: */ /* if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); */ /* if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': apply Q or Q**T from the Left; */ /* = 'R': apply Q or Q**T from the Right. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A contains elementary reflectors */ /* from DSYTRD; */ /* = 'L': Lower triangle of A contains elementary reflectors */ /* from DSYTRD. */ /* TRANS (input) CHARACTER*1 */ /* = 'N': No transpose, apply Q; */ /* = 'T': Transpose, apply Q**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix C. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension */ /* (LDA,M) if SIDE = 'L' */ /* (LDA,N) if SIDE = 'R' */ /* The vectors which define the elementary reflectors, as */ /* returned by DSYTRD. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. */ /* LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. */ /* TAU (input) DOUBLE PRECISION array, dimension */ /* (M-1) if SIDE = 'L' */ /* (N-1) if SIDE = 'R' */ /* TAU(i) must contain the scalar factor of the elementary */ /* reflector H(i), as returned by DSYTRD. */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If SIDE = 'L', LWORK >= max(1,N); */ /* if SIDE = 'R', LWORK >= max(1,M). */ /* For optimum performance LWORK >= N*NB if SIDE = 'L', and */ /* LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ /* blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = _starpu_lsame_(side, "L"); upper = _starpu_lsame_(uplo, "U"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! left && ! _starpu_lsame_(side, "R")) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T")) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,nq)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*lwork < max(1,nw) && ! lquery) { *info = -12; } if (*info == 0) { if (upper) { if (left) { /* Writing concatenation */ i__1[0] = 1, a__1[0] = side; i__1[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); i__2 = *m - 1; i__3 = *m - 1; nb = _starpu_ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1); } else { /* Writing concatenation */ i__1[0] = 1, a__1[0] = side; i__1[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); i__2 = *n - 1; i__3 = *n - 1; nb = _starpu_ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1); } } else { if (left) { /* Writing concatenation */ i__1[0] = 1, a__1[0] = side; i__1[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); i__2 = *m - 1; i__3 = *m - 1; nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1); } else { /* Writing concatenation */ i__1[0] = 1, a__1[0] = side; i__1[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); i__2 = *n - 1; i__3 = *n - 1; nb = _starpu_ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1); } } lwkopt = max(1,nw) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__2 = -(*info); _starpu_xerbla_("DORMTR", &i__2); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || nq == 1) { work[1] = 1.; return 0; } if (left) { mi = *m - 1; ni = *n; } else { mi = *m; ni = *n - 1; } if (upper) { /* Q was determined by a call to DSYTRD with UPLO = 'U' */ i__2 = nq - 1; _starpu_dormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); } else { /* Q was determined by a call to DSYTRD with UPLO = 'L' */ if (left) { i1 = 2; i2 = 1; } else { i1 = 1; i2 = 2; } i__2 = nq - 1; _starpu_dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); } work[1] = (doublereal) lwkopt; return 0; /* End of DORMTR */ } /* _starpu_dormtr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpbcon.c000066400000000000000000000154541507764646700206520ustar00rootroot00000000000000/* dpbcon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dpbcon_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *anorm, doublereal *rcond, doublereal * work, integer *iwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1; doublereal d__1; /* Local variables */ integer ix, kase; doublereal scale; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal scalel; extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlatbs_(char *, char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal scaleu; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal ainvnm; char normin[1]; doublereal smlnum; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPBCON estimates the reciprocal of the condition number (in the */ /* 1-norm) of a real symmetric positive definite band matrix using the */ /* Cholesky factorization A = U**T*U or A = L*L**T computed by DPBTRF. */ /* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ /* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangular factor stored in AB; */ /* = 'L': Lower triangular factor stored in AB. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T of the band matrix A, stored in the */ /* first KD+1 rows of the array. The j-th column of U or L is */ /* stored in the j-th column of the array AB as follows: */ /* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* ANORM (input) DOUBLE PRECISION */ /* The 1-norm (or infinity-norm) of the symmetric band matrix A. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ /* estimate of the 1-norm of inv(A) computed in this routine. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } else if (*anorm < 0.) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPBCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm == 0.) { return 0; } smlnum = _starpu_dlamch_("Safe minimum"); /* Estimate the 1-norm of the inverse. */ kase = 0; *(unsigned char *)normin = 'N'; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (upper) { /* Multiply by inv(U'). */ _starpu_dlatbs_("Upper", "Transpose", "Non-unit", normin, n, kd, &ab[ ab_offset], ldab, &work[1], &scalel, &work[(*n << 1) + 1], info); *(unsigned char *)normin = 'Y'; /* Multiply by inv(U). */ _starpu_dlatbs_("Upper", "No transpose", "Non-unit", normin, n, kd, &ab[ ab_offset], ldab, &work[1], &scaleu, &work[(*n << 1) + 1], info); } else { /* Multiply by inv(L). */ _starpu_dlatbs_("Lower", "No transpose", "Non-unit", normin, n, kd, &ab[ ab_offset], ldab, &work[1], &scalel, &work[(*n << 1) + 1], info); *(unsigned char *)normin = 'Y'; /* Multiply by inv(L'). */ _starpu_dlatbs_("Lower", "Transpose", "Non-unit", normin, n, kd, &ab[ ab_offset], ldab, &work[1], &scaleu, &work[(*n << 1) + 1], info); } /* Multiply by 1/SCALE if doing so will not cause overflow. */ scale = scalel * scaleu; if (scale != 1.) { ix = _starpu_idamax_(n, &work[1], &c__1); if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) { goto L20; } _starpu_drscl_(n, &scale, &work[1], &c__1); } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } L20: return 0; /* End of DPBCON */ } /* _starpu_dpbcon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpbequ.c000066400000000000000000000130471507764646700206610ustar00rootroot00000000000000/* dpbequ.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dpbequ_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j; doublereal smin; extern logical _starpu_lsame_(char *, char *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPBEQU computes row and column scalings intended to equilibrate a */ /* symmetric positive definite band matrix A and reduce its condition */ /* number (with respect to the two-norm). S contains the scale factors, */ /* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with */ /* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This */ /* choice of S puts the condition number of B within a factor N of the */ /* smallest possible condition number over all possible diagonal */ /* scalings. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangular of A is stored; */ /* = 'L': Lower triangular of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The upper or lower triangle of the symmetric band matrix A, */ /* stored in the first KD+1 rows of the array. The j-th column */ /* of A is stored in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array A. LDAB >= KD+1. */ /* S (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, S contains the scale factors for A. */ /* SCOND (output) DOUBLE PRECISION */ /* If INFO = 0, S contains the ratio of the smallest S(i) to */ /* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ /* large nor too small, it is not worth scaling by S. */ /* AMAX (output) DOUBLE PRECISION */ /* Absolute value of largest matrix element. If AMAX is very */ /* close to overflow or very close to underflow, the matrix */ /* should be scaled. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --s; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPBEQU", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *scond = 1.; *amax = 0.; return 0; } if (upper) { j = *kd + 1; } else { j = 1; } /* Initialize SMIN and AMAX. */ s[1] = ab[j + ab_dim1]; smin = s[1]; *amax = s[1]; /* Find the minimum and maximum diagonal elements. */ i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { s[i__] = ab[j + i__ * ab_dim1]; /* Computing MIN */ d__1 = smin, d__2 = s[i__]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = *amax, d__2 = s[i__]; *amax = max(d__1,d__2); /* L10: */ } if (smin <= 0.) { /* Find the first non-positive diagonal element and return. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (s[i__] <= 0.) { *info = i__; return 0; } /* L20: */ } } else { /* Set the scale factors to the reciprocals */ /* of the diagonal elements. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { s[i__] = 1. / sqrt(s[i__]); /* L30: */ } /* Compute SCOND = min(S(I)) / max(S(I)) */ *scond = sqrt(smin) / sqrt(*amax); } return 0; /* End of DPBEQU */ } /* _starpu_dpbequ_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpbrfs.c000066400000000000000000000320171507764646700206570ustar00rootroot00000000000000/* dpbrfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = -1.; static doublereal c_b14 = 1.; /* Subroutine */ int _starpu_dpbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k, l; doublereal s, xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dsbmv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *) ; integer count; logical upper; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpbtrs_( char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal lstres; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPBRFS improves the computed solution to a system of linear */ /* equations when the coefficient matrix is symmetric positive definite */ /* and banded, and provides error bounds and backward error estimates */ /* for the solution. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The upper or lower triangle of the symmetric band matrix A, */ /* stored in the first KD+1 rows of the array. The j-th column */ /* of A is stored in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T of the band matrix A as computed by */ /* DPBTRF, in the same storage format as A (see AB). */ /* LDAFB (input) INTEGER */ /* The leading dimension of the array AFB. LDAFB >= KD+1. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DPBTRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Internal Parameters */ /* =================== */ /* ITMAX is the maximum number of steps of iterative refinement. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldab < *kd + 1) { *info = -6; } else if (*ldafb < *kd + 1) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPBRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ /* Computing MIN */ i__1 = *n + 1, i__2 = (*kd << 1) + 2; nz = min(i__1,i__2); eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - A * X */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dsbmv_(uplo, n, kd, &c_b12, &ab[ab_offset], ldab, &x[j * x_dim1 + 1], &c__1, &c_b14, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L30: */ } /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); l = *kd + 1 - k; /* Computing MAX */ i__3 = 1, i__4 = k - *kd; i__5 = k - 1; for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { work[i__] += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) * xk; s += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) * ( d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L40: */ } work[k] = work[k] + (d__1 = ab[*kd + 1 + k * ab_dim1], abs( d__1)) * xk + s; /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); work[k] += (d__1 = ab[k * ab_dim1 + 1], abs(d__1)) * xk; l = 1 - k; /* Computing MIN */ i__3 = *n, i__4 = k + *kd; i__5 = min(i__3,i__4); for (i__ = k + 1; i__ <= i__5; ++i__) { work[i__] += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) * xk; s += (d__1 = ab[l + i__ + k * ab_dim1], abs(d__1)) * ( d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L60: */ } work[k] += s; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if */ /* 1) The residual BERR(J) is larger than machine epsilon, and */ /* 2) BERR(J) decreased by at least a factor of 2 during the */ /* last iteration, and */ /* 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ _starpu_dpbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + 1] , n, info); _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) ; lstres = berr[j]; ++count; goto L20; } /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(A))* */ /* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(A) is the inverse of A */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(A)*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(A) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(A'). */ _starpu_dpbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L110: */ } } else if (kase == 2) { /* Multiply by inv(A)*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L120: */ } _starpu_dpbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DPBRFS */ } /* _starpu_dpbrfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpbstf.c000066400000000000000000000214271507764646700206640ustar00rootroot00000000000000/* dpbstf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b9 = -1.; /* Subroutine */ int _starpu_dpbstf_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer j, m, km; doublereal ajj; integer kld; extern /* Subroutine */ int _starpu_dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dscal_( integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPBSTF computes a split Cholesky factorization of a real */ /* symmetric positive definite band matrix A. */ /* This routine is designed to be used in conjunction with DSBGST. */ /* The factorization has the form A = S**T*S where S is a band matrix */ /* of the same bandwidth as A and the following structure: */ /* S = ( U ) */ /* ( M L ) */ /* where U is upper triangular of order m = (n+kd)/2, and L is lower */ /* triangular of order n-m. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first kd+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* On exit, if INFO = 0, the factor S from the split Cholesky */ /* factorization A = S**T*S. See Further Details. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the factorization could not be completed, */ /* because the updated element a(i,i) was negative; the */ /* matrix A is not positive definite. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* N = 7, KD = 2: */ /* S = ( s11 s12 s13 ) */ /* ( s22 s23 s24 ) */ /* ( s33 s34 ) */ /* ( s44 ) */ /* ( s53 s54 s55 ) */ /* ( s64 s65 s66 ) */ /* ( s75 s76 s77 ) */ /* If UPLO = 'U', the array AB holds: */ /* on entry: on exit: */ /* * * a13 a24 a35 a46 a57 * * s13 s24 s53 s64 s75 */ /* * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54 s65 s76 */ /* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 */ /* If UPLO = 'L', the array AB holds: */ /* on entry: on exit: */ /* a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 */ /* a21 a32 a43 a54 a65 a76 * s12 s23 s34 s54 s65 s76 * */ /* a31 a42 a53 a64 a64 * * s13 s24 s53 s64 s75 * * */ /* Array elements marked * are not used by the routine. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPBSTF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Computing MAX */ i__1 = 1, i__2 = *ldab - 1; kld = max(i__1,i__2); /* Set the splitting point m. */ m = (*n + *kd) / 2; if (upper) { /* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). */ i__1 = m + 1; for (j = *n; j >= i__1; --j) { /* Compute s(j,j) and test for non-positive-definiteness. */ ajj = ab[*kd + 1 + j * ab_dim1]; if (ajj <= 0.) { goto L50; } ajj = sqrt(ajj); ab[*kd + 1 + j * ab_dim1] = ajj; /* Computing MIN */ i__2 = j - 1; km = min(i__2,*kd); /* Compute elements j-km:j-1 of the j-th column and update the */ /* the leading submatrix within the band. */ d__1 = 1. / ajj; _starpu_dscal_(&km, &d__1, &ab[*kd + 1 - km + j * ab_dim1], &c__1); _starpu_dsyr_("Upper", &km, &c_b9, &ab[*kd + 1 - km + j * ab_dim1], &c__1, &ab[*kd + 1 + (j - km) * ab_dim1], &kld); /* L10: */ } /* Factorize the updated submatrix A(1:m,1:m) as U**T*U. */ i__1 = m; for (j = 1; j <= i__1; ++j) { /* Compute s(j,j) and test for non-positive-definiteness. */ ajj = ab[*kd + 1 + j * ab_dim1]; if (ajj <= 0.) { goto L50; } ajj = sqrt(ajj); ab[*kd + 1 + j * ab_dim1] = ajj; /* Computing MIN */ i__2 = *kd, i__3 = m - j; km = min(i__2,i__3); /* Compute elements j+1:j+km of the j-th row and update the */ /* trailing submatrix within the band. */ if (km > 0) { d__1 = 1. / ajj; _starpu_dscal_(&km, &d__1, &ab[*kd + (j + 1) * ab_dim1], &kld); _starpu_dsyr_("Upper", &km, &c_b9, &ab[*kd + (j + 1) * ab_dim1], &kld, &ab[*kd + 1 + (j + 1) * ab_dim1], &kld); } /* L20: */ } } else { /* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). */ i__1 = m + 1; for (j = *n; j >= i__1; --j) { /* Compute s(j,j) and test for non-positive-definiteness. */ ajj = ab[j * ab_dim1 + 1]; if (ajj <= 0.) { goto L50; } ajj = sqrt(ajj); ab[j * ab_dim1 + 1] = ajj; /* Computing MIN */ i__2 = j - 1; km = min(i__2,*kd); /* Compute elements j-km:j-1 of the j-th row and update the */ /* trailing submatrix within the band. */ d__1 = 1. / ajj; _starpu_dscal_(&km, &d__1, &ab[km + 1 + (j - km) * ab_dim1], &kld); _starpu_dsyr_("Lower", &km, &c_b9, &ab[km + 1 + (j - km) * ab_dim1], &kld, &ab[(j - km) * ab_dim1 + 1], &kld); /* L30: */ } /* Factorize the updated submatrix A(1:m,1:m) as U**T*U. */ i__1 = m; for (j = 1; j <= i__1; ++j) { /* Compute s(j,j) and test for non-positive-definiteness. */ ajj = ab[j * ab_dim1 + 1]; if (ajj <= 0.) { goto L50; } ajj = sqrt(ajj); ab[j * ab_dim1 + 1] = ajj; /* Computing MIN */ i__2 = *kd, i__3 = m - j; km = min(i__2,i__3); /* Compute elements j+1:j+km of the j-th column and update the */ /* trailing submatrix within the band. */ if (km > 0) { d__1 = 1. / ajj; _starpu_dscal_(&km, &d__1, &ab[j * ab_dim1 + 2], &c__1); _starpu_dsyr_("Lower", &km, &c_b9, &ab[j * ab_dim1 + 2], &c__1, &ab[( j + 1) * ab_dim1 + 1], &kld); } /* L40: */ } } return 0; L50: *info = j; return 0; /* End of DPBSTF */ } /* _starpu_dpbstf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpbsv.c000066400000000000000000000141471507764646700205210ustar00rootroot00000000000000/* dpbsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dpbsv_(char *uplo, integer *n, integer *kd, integer * nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpbtrf_( char *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dpbtrs_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPBSV computes the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N symmetric positive definite band matrix and X */ /* and B are N-by-NRHS matrices. */ /* The Cholesky decomposition is used to factor A as */ /* A = U**T * U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular band matrix, and L is a lower */ /* triangular band matrix, with the same number of superdiagonals or */ /* subdiagonals as A. The factored form of A is then used to solve the */ /* system of equations A * X = B. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). */ /* See below for further details. */ /* On exit, if INFO = 0, the triangular factor U or L from the */ /* Cholesky factorization A = U**T*U or A = L*L**T of the band */ /* matrix A, in the same storage format as A. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i of A is not */ /* positive definite, so the factorization could not be */ /* completed, and the solution has not been computed. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* N = 6, KD = 2, and UPLO = 'U': */ /* On entry: On exit: */ /* * * a13 a24 a35 a46 * * u13 u24 u35 u46 */ /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ /* Similarly, if UPLO = 'L' the format of A is as follows: */ /* On entry: On exit: */ /* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 */ /* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * */ /* a31 a42 a53 a64 * * l31 l42 l53 l64 * * */ /* Array elements marked * are not used by the routine. */ /* ===================================================================== */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldab < *kd + 1) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPBSV ", &i__1); return 0; } /* Compute the Cholesky factorization A = U'*U or A = L*L'. */ _starpu_dpbtrf_(uplo, n, kd, &ab[ab_offset], ldab, info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ _starpu_dpbtrs_(uplo, n, kd, nrhs, &ab[ab_offset], ldab, &b[b_offset], ldb, info); } return 0; /* End of DPBSV */ } /* _starpu_dpbsv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpbsvx.c000066400000000000000000000434731507764646700207150ustar00rootroot00000000000000/* dpbsvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dpbsvx_(char *fact, char *uplo, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, char *equed, doublereal *s, doublereal *b, integer * ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ integer i__, j, j1, j2; doublereal amax, smin, smax; extern logical _starpu_lsame_(char *, char *); doublereal scond, anorm; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); logical equil, rcequ, upper; extern doublereal _starpu_dlamch_(char *), _starpu_dlansb_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dpbcon_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dlaqsb_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, char *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dpbequ_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dpbrfs_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dpbtrf_(char *, integer *, integer *, doublereal *, integer *, integer *); integer infequ; extern /* Subroutine */ int _starpu_dpbtrs_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal smlnum; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPBSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to */ /* compute the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N symmetric positive definite band matrix and X */ /* and B are N-by-NRHS matrices. */ /* Error bounds on the solution and a condition estimate are also */ /* provided. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'E', real scaling factors are computed to equilibrate */ /* the system: */ /* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B */ /* Whether or not the system will be equilibrated depends on the */ /* scaling of the matrix A, but if equilibration is used, A is */ /* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ /* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to */ /* factor the matrix A (after equilibration if FACT = 'E') as */ /* A = U**T * U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular band matrix, and L is a lower */ /* triangular band matrix. */ /* 3. If the leading i-by-i principal minor is not positive definite, */ /* then the routine returns with INFO = i. Otherwise, the factored */ /* form of A is used to estimate the condition number of the matrix */ /* A. If the reciprocal of the condition number is less than machine */ /* precision, INFO = N+1 is returned as a warning, but the routine */ /* still goes on to solve for X and compute error bounds as */ /* described below. */ /* 4. The system of equations is solved for X using the factored form */ /* of A. */ /* 5. Iterative refinement is applied to improve the computed solution */ /* matrix and calculate error bounds and backward error estimates */ /* for it. */ /* 6. If equilibration was used, the matrix X is premultiplied by */ /* diag(S) so that it solves the original system before */ /* equilibration. */ /* Arguments */ /* ========= */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of the matrix A is */ /* supplied on entry, and if not, whether the matrix A should be */ /* equilibrated before it is factored. */ /* = 'F': On entry, AFB contains the factored form of A. */ /* If EQUED = 'Y', the matrix A has been equilibrated */ /* with scaling factors given by S. AB and AFB will not */ /* be modified. */ /* = 'N': The matrix A will be copied to AFB and factored. */ /* = 'E': The matrix A will be equilibrated if necessary, then */ /* copied to AFB and factored. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* NRHS (input) INTEGER */ /* The number of right-hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array, except */ /* if FACT = 'F' and EQUED = 'Y', then A must contain the */ /* equilibrated matrix diag(S)*A*diag(S). The j-th column of A */ /* is stored in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). */ /* See below for further details. */ /* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ /* diag(S)*A*diag(S). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array A. LDAB >= KD+1. */ /* AFB (input or output) DOUBLE PRECISION array, dimension (LDAFB,N) */ /* If FACT = 'F', then AFB is an input argument and on entry */ /* contains the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T of the band matrix */ /* A, in the same storage format as A (see AB). If EQUED = 'Y', */ /* then AFB is the factored form of the equilibrated matrix A. */ /* If FACT = 'N', then AFB is an output argument and on exit */ /* returns the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T. */ /* If FACT = 'E', then AFB is an output argument and on exit */ /* returns the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T of the equilibrated */ /* matrix A (see the description of A for the form of the */ /* equilibrated matrix). */ /* LDAFB (input) INTEGER */ /* The leading dimension of the array AFB. LDAFB >= KD+1. */ /* EQUED (input or output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration (always true if FACT = 'N'). */ /* = 'Y': Equilibration was done, i.e., A has been replaced by */ /* diag(S) * A * diag(S). */ /* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ /* output argument. */ /* S (input or output) DOUBLE PRECISION array, dimension (N) */ /* The scale factors for A; not accessed if EQUED = 'N'. S is */ /* an input argument if FACT = 'F'; otherwise, S is an output */ /* argument. If FACT = 'F' and EQUED = 'Y', each element of S */ /* must be positive. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', */ /* B is overwritten by diag(S) * B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to */ /* the original system of equations. Note that if EQUED = 'Y', */ /* A and B are modified on exit, and the solution to the */ /* equilibrated system is inv(diag(S))*X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The estimate of the reciprocal condition number of the matrix */ /* A after equilibration (if done). If RCOND is less than the */ /* machine precision (in particular, if RCOND = 0), the matrix */ /* is singular to working precision. This condition is */ /* indicated by a return code of INFO > 0. */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= N: the leading minor of order i of A is */ /* not positive definite, so the factorization */ /* could not be completed, and the solution has not */ /* been computed. RCOND = 0 is returned. */ /* = N+1: U is nonsingular, but RCOND is less than machine */ /* precision, meaning that the matrix is singular */ /* to working precision. Nevertheless, the */ /* solution and error bounds are computed because */ /* there are a number of situations where the */ /* computed solution can be more accurate than the */ /* value of RCOND would suggest. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* N = 6, KD = 2, and UPLO = 'U': */ /* Two-dimensional storage of the symmetric matrix A: */ /* a11 a12 a13 */ /* a22 a23 a24 */ /* a33 a34 a35 */ /* a44 a45 a46 */ /* a55 a56 */ /* (aij=conjg(aji)) a66 */ /* Band storage of the upper triangle of A: */ /* * * a13 a24 a35 a46 */ /* * a12 a23 a34 a45 a56 */ /* a11 a22 a33 a44 a55 a66 */ /* Similarly, if UPLO = 'L' the format of A is as follows: */ /* a11 a22 a33 a44 a55 a66 */ /* a21 a32 a43 a54 a65 * */ /* a31 a42 a53 a64 * * */ /* Array elements marked * are not used by the routine. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; --s; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); equil = _starpu_lsame_(fact, "E"); upper = _starpu_lsame_(uplo, "U"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rcequ = FALSE_; } else { rcequ = _starpu_lsame_(equed, "Y"); smlnum = _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* Test the input parameters. */ if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kd < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldab < *kd + 1) { *info = -7; } else if (*ldafb < *kd + 1) { *info = -9; } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( equed, "N"))) { *info = -10; } else { if (rcequ) { smin = bignum; smax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = smin, d__2 = s[j]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = smax, d__2 = s[j]; smax = max(d__1,d__2); /* L10: */ } if (smin <= 0.) { *info = -11; } else if (*n > 0) { scond = max(smin,smlnum) / min(smax,bignum); } else { scond = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -13; } else if (*ldx < max(1,*n)) { *info = -15; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPBSVX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ _starpu_dpbequ_(uplo, n, kd, &ab[ab_offset], ldab, &s[1], &scond, &amax, & infequ); if (infequ == 0) { /* Equilibrate the matrix. */ _starpu_dlaqsb_(uplo, n, kd, &ab[ab_offset], ldab, &s[1], &scond, &amax, equed); rcequ = _starpu_lsame_(equed, "Y"); } } /* Scale the right-hand side. */ if (rcequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = s[i__] * b[i__ + j * b_dim1]; /* L20: */ } /* L30: */ } } if (nofact || equil) { /* Compute the Cholesky factorization A = U'*U or A = L*L'. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = j - *kd; j1 = max(i__2,1); i__2 = j - j1 + 1; _starpu_dcopy_(&i__2, &ab[*kd + 1 - j + j1 + j * ab_dim1], &c__1, & afb[*kd + 1 - j + j1 + j * afb_dim1], &c__1); /* L40: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = j + *kd; j2 = min(i__2,*n); i__2 = j2 - j + 1; _starpu_dcopy_(&i__2, &ab[j * ab_dim1 + 1], &c__1, &afb[j * afb_dim1 + 1], &c__1); /* L50: */ } } _starpu_dpbtrf_(uplo, n, kd, &afb[afb_offset], ldafb, info); /* Return if INFO is non-zero. */ if (*info > 0) { *rcond = 0.; return 0; } } /* Compute the norm of the matrix A. */ anorm = _starpu_dlansb_("1", uplo, n, kd, &ab[ab_offset], ldab, &work[1]); /* Compute the reciprocal of the condition number of A. */ _starpu_dpbcon_(uplo, n, kd, &afb[afb_offset], ldafb, &anorm, rcond, &work[1], & iwork[1], info); /* Compute the solution matrix X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dpbtrs_(uplo, n, kd, nrhs, &afb[afb_offset], ldafb, &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and */ /* compute error bounds and backward error estimates for it. */ _starpu_dpbrfs_(uplo, n, kd, nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1] , &iwork[1], info); /* Transform the solution matrix X to a solution of the original */ /* system. */ if (rcequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ + j * x_dim1] = s[i__] * x[i__ + j * x_dim1]; /* L60: */ } /* L70: */ } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] /= scond; /* L80: */ } } /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < _starpu_dlamch_("Epsilon")) { *info = *n + 1; } return 0; /* End of DPBSVX */ } /* _starpu_dpbsvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpbtf2.c000066400000000000000000000156311507764646700205630ustar00rootroot00000000000000/* dpbtf2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b8 = -1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dpbtf2_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer j, kn; doublereal ajj; integer kld; extern /* Subroutine */ int _starpu_dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dscal_( integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPBTF2 computes the Cholesky factorization of a real symmetric */ /* positive definite band matrix A. */ /* The factorization has the form */ /* A = U' * U , if UPLO = 'U', or */ /* A = L * L', if UPLO = 'L', */ /* where U is an upper triangular matrix, U' is the transpose of U, and */ /* L is lower triangular. */ /* This is the unblocked version of the algorithm, calling Level 2 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of super-diagonals of the matrix A if UPLO = 'U', */ /* or the number of sub-diagonals if UPLO = 'L'. KD >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* On exit, if INFO = 0, the triangular factor U or L from the */ /* Cholesky factorization A = U'*U or A = L*L' of the band */ /* matrix A, in the same storage format as A. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* > 0: if INFO = k, the leading minor of order k is not */ /* positive definite, and the factorization could not be */ /* completed. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* N = 6, KD = 2, and UPLO = 'U': */ /* On entry: On exit: */ /* * * a13 a24 a35 a46 * * u13 u24 u35 u46 */ /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ /* Similarly, if UPLO = 'L' the format of A is as follows: */ /* On entry: On exit: */ /* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 */ /* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * */ /* a31 a42 a53 a64 * * l31 l42 l53 l64 * * */ /* Array elements marked * are not used by the routine. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPBTF2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Computing MAX */ i__1 = 1, i__2 = *ldab - 1; kld = max(i__1,i__2); if (upper) { /* Compute the Cholesky factorization A = U'*U. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Compute U(J,J) and test for non-positive-definiteness. */ ajj = ab[*kd + 1 + j * ab_dim1]; if (ajj <= 0.) { goto L30; } ajj = sqrt(ajj); ab[*kd + 1 + j * ab_dim1] = ajj; /* Compute elements J+1:J+KN of row J and update the */ /* trailing submatrix within the band. */ /* Computing MIN */ i__2 = *kd, i__3 = *n - j; kn = min(i__2,i__3); if (kn > 0) { d__1 = 1. / ajj; _starpu_dscal_(&kn, &d__1, &ab[*kd + (j + 1) * ab_dim1], &kld); _starpu_dsyr_("Upper", &kn, &c_b8, &ab[*kd + (j + 1) * ab_dim1], &kld, &ab[*kd + 1 + (j + 1) * ab_dim1], &kld); } /* L10: */ } } else { /* Compute the Cholesky factorization A = L*L'. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Compute L(J,J) and test for non-positive-definiteness. */ ajj = ab[j * ab_dim1 + 1]; if (ajj <= 0.) { goto L30; } ajj = sqrt(ajj); ab[j * ab_dim1 + 1] = ajj; /* Compute elements J+1:J+KN of column J and update the */ /* trailing submatrix within the band. */ /* Computing MIN */ i__2 = *kd, i__3 = *n - j; kn = min(i__2,i__3); if (kn > 0) { d__1 = 1. / ajj; _starpu_dscal_(&kn, &d__1, &ab[j * ab_dim1 + 2], &c__1); _starpu_dsyr_("Lower", &kn, &c_b8, &ab[j * ab_dim1 + 2], &c__1, &ab[( j + 1) * ab_dim1 + 1], &kld); } /* L20: */ } } return 0; L30: *info = j; return 0; /* End of DPBTF2 */ } /* _starpu_dpbtf2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpbtrf.c000066400000000000000000000327701507764646700206660ustar00rootroot00000000000000/* dpbtrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b18 = 1.; static doublereal c_b21 = -1.; static integer c__33 = 33; /* Subroutine */ int _starpu_dpbtrf_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, j, i2, i3, ib, nb, ii, jj; doublereal work[1056] /* was [33][32] */; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsyrk_( char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dpbtf2_(char *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dpotf2_(char *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPBTRF computes the Cholesky factorization of a real symmetric */ /* positive definite band matrix A. */ /* The factorization has the form */ /* A = U**T * U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* On exit, if INFO = 0, the triangular factor U or L from the */ /* Cholesky factorization A = U**T*U or A = L*L**T of the band */ /* matrix A, in the same storage format as A. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i is not */ /* positive definite, and the factorization could not be */ /* completed. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* N = 6, KD = 2, and UPLO = 'U': */ /* On entry: On exit: */ /* * * a13 a24 a35 a46 * * u13 u24 u35 u46 */ /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ /* Similarly, if UPLO = 'L' the format of A is as follows: */ /* On entry: On exit: */ /* a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 */ /* a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * */ /* a31 a42 a53 a64 * * l31 l42 l53 l64 * * */ /* Array elements marked * are not used by the routine. */ /* Contributed by */ /* Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; /* Function Body */ *info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPBTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment */ nb = _starpu_ilaenv_(&c__1, "DPBTRF", uplo, n, kd, &c_n1, &c_n1); /* The block size must not exceed the semi-bandwidth KD, and must not */ /* exceed the limit set by the size of the local array WORK. */ nb = min(nb,32); if (nb <= 1 || nb > *kd) { /* Use unblocked code */ _starpu_dpbtf2_(uplo, n, kd, &ab[ab_offset], ldab, info); } else { /* Use blocked code */ if (_starpu_lsame_(uplo, "U")) { /* Compute the Cholesky factorization of a symmetric band */ /* matrix, given the upper triangle of the matrix in band */ /* storage. */ /* Zero the upper triangle of the work array. */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[i__ + j * 33 - 34] = 0.; /* L10: */ } /* L20: */ } /* Process the band matrix one diagonal block at a time. */ i__1 = *n; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = nb, i__4 = *n - i__ + 1; ib = min(i__3,i__4); /* Factorize the diagonal block */ i__3 = *ldab - 1; _starpu_dpotf2_(uplo, &ib, &ab[*kd + 1 + i__ * ab_dim1], &i__3, &ii); if (ii != 0) { *info = i__ + ii - 1; goto L150; } if (i__ + ib <= *n) { /* Update the relevant part of the trailing submatrix. */ /* If A11 denotes the diagonal block which has just been */ /* factorized, then we need to update the remaining */ /* blocks in the diagram: */ /* A11 A12 A13 */ /* A22 A23 */ /* A33 */ /* The numbers of rows and columns in the partitioning */ /* are IB, I2, I3 respectively. The blocks A12, A22 and */ /* A23 are empty if IB = KD. The upper triangle of A13 */ /* lies outside the band. */ /* Computing MIN */ i__3 = *kd - ib, i__4 = *n - i__ - ib + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = ib, i__4 = *n - i__ - *kd + 1; i3 = min(i__3,i__4); if (i2 > 0) { /* Update A12 */ i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib, &i2, &c_b18, &ab[*kd + 1 + i__ * ab_dim1], & i__3, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1] , &i__4); /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dsyrk_("Upper", "Transpose", &i2, &ib, &c_b21, &ab[* kd + 1 - ib + (i__ + ib) * ab_dim1], &i__3, & c_b18, &ab[*kd + 1 + (i__ + ib) * ab_dim1], & i__4); } if (i3 > 0) { /* Copy the lower triangle of A13 into the work array. */ i__3 = i3; for (jj = 1; jj <= i__3; ++jj) { i__4 = ib; for (ii = jj; ii <= i__4; ++ii) { work[ii + jj * 33 - 34] = ab[ii - jj + 1 + ( jj + i__ + *kd - 1) * ab_dim1]; /* L30: */ } /* L40: */ } /* Update A13 (in the work array). */ i__3 = *ldab - 1; _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib, &i3, &c_b18, &ab[*kd + 1 + i__ * ab_dim1], & i__3, work, &c__33); /* Update A23 */ if (i2 > 0) { i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dgemm_("Transpose", "No Transpose", &i2, &i3, &ib, &c_b21, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1], &i__3, work, &c__33, &c_b18, & ab[ib + 1 + (i__ + *kd) * ab_dim1], &i__4); } /* Update A33 */ i__3 = *ldab - 1; _starpu_dsyrk_("Upper", "Transpose", &i3, &ib, &c_b21, work, & c__33, &c_b18, &ab[*kd + 1 + (i__ + *kd) * ab_dim1], &i__3); /* Copy the lower triangle of A13 back into place. */ i__3 = i3; for (jj = 1; jj <= i__3; ++jj) { i__4 = ib; for (ii = jj; ii <= i__4; ++ii) { ab[ii - jj + 1 + (jj + i__ + *kd - 1) * ab_dim1] = work[ii + jj * 33 - 34]; /* L50: */ } /* L60: */ } } } /* L70: */ } } else { /* Compute the Cholesky factorization of a symmetric band */ /* matrix, given the lower triangle of the matrix in band */ /* storage. */ /* Zero the lower triangle of the work array. */ i__2 = nb; for (j = 1; j <= i__2; ++j) { i__1 = nb; for (i__ = j + 1; i__ <= i__1; ++i__) { work[i__ + j * 33 - 34] = 0.; /* L80: */ } /* L90: */ } /* Process the band matrix one diagonal block at a time. */ i__2 = *n; i__1 = nb; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { /* Computing MIN */ i__3 = nb, i__4 = *n - i__ + 1; ib = min(i__3,i__4); /* Factorize the diagonal block */ i__3 = *ldab - 1; _starpu_dpotf2_(uplo, &ib, &ab[i__ * ab_dim1 + 1], &i__3, &ii); if (ii != 0) { *info = i__ + ii - 1; goto L150; } if (i__ + ib <= *n) { /* Update the relevant part of the trailing submatrix. */ /* If A11 denotes the diagonal block which has just been */ /* factorized, then we need to update the remaining */ /* blocks in the diagram: */ /* A11 */ /* A21 A22 */ /* A31 A32 A33 */ /* The numbers of rows and columns in the partitioning */ /* are IB, I2, I3 respectively. The blocks A21, A22 and */ /* A32 are empty if IB = KD. The lower triangle of A31 */ /* lies outside the band. */ /* Computing MIN */ i__3 = *kd - ib, i__4 = *n - i__ - ib + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = ib, i__4 = *n - i__ - *kd + 1; i3 = min(i__3,i__4); if (i2 > 0) { /* Update A21 */ i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i2, &ib, &c_b18, &ab[i__ * ab_dim1 + 1], &i__3, & ab[ib + 1 + i__ * ab_dim1], &i__4); /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dsyrk_("Lower", "No Transpose", &i2, &ib, &c_b21, &ab[ ib + 1 + i__ * ab_dim1], &i__3, &c_b18, &ab[( i__ + ib) * ab_dim1 + 1], &i__4); } if (i3 > 0) { /* Copy the upper triangle of A31 into the work array. */ i__3 = ib; for (jj = 1; jj <= i__3; ++jj) { i__4 = min(jj,i3); for (ii = 1; ii <= i__4; ++ii) { work[ii + jj * 33 - 34] = ab[*kd + 1 - jj + ii + (jj + i__ - 1) * ab_dim1]; /* L100: */ } /* L110: */ } /* Update A31 (in the work array). */ i__3 = *ldab - 1; _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i3, &ib, &c_b18, &ab[i__ * ab_dim1 + 1], &i__3, work, &c__33); /* Update A32 */ if (i2 > 0) { i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_dgemm_("No transpose", "Transpose", &i3, &i2, &ib, &c_b21, work, &c__33, &ab[ib + 1 + i__ * ab_dim1], &i__3, &c_b18, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1], &i__4); } /* Update A33 */ i__3 = *ldab - 1; _starpu_dsyrk_("Lower", "No Transpose", &i3, &ib, &c_b21, work, &c__33, &c_b18, &ab[(i__ + *kd) * ab_dim1 + 1], &i__3); /* Copy the upper triangle of A31 back into place. */ i__3 = ib; for (jj = 1; jj <= i__3; ++jj) { i__4 = min(jj,i3); for (ii = 1; ii <= i__4; ++ii) { ab[*kd + 1 - jj + ii + (jj + i__ - 1) * ab_dim1] = work[ii + jj * 33 - 34]; /* L120: */ } /* L130: */ } } } /* L140: */ } } } return 0; L150: return 0; /* End of DPBTRF */ } /* _starpu_dpbtrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpbtrs.c000066400000000000000000000123201507764646700206700ustar00rootroot00000000000000/* dpbtrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dpbtrs_(char *uplo, integer *n, integer *kd, integer * nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; /* Local variables */ integer j; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPBTRS solves a system of linear equations A*X = B with a symmetric */ /* positive definite band matrix A using the Cholesky factorization */ /* A = U**T*U or A = L*L**T computed by DPBTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangular factor stored in AB; */ /* = 'L': Lower triangular factor stored in AB. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T of the band matrix A, stored in the */ /* first KD+1 rows of the array. The j-th column of U or L is */ /* stored in the j-th column of the array AB as follows: */ /* if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldab < *kd + 1) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPBTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B where A = U'*U. */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Solve U'*X = B, overwriting B with X. */ _starpu_dtbsv_("Upper", "Transpose", "Non-unit", n, kd, &ab[ab_offset], ldab, &b[j * b_dim1 + 1], &c__1); /* Solve U*X = B, overwriting B with X. */ _starpu_dtbsv_("Upper", "No transpose", "Non-unit", n, kd, &ab[ab_offset], ldab, &b[j * b_dim1 + 1], &c__1); /* L10: */ } } else { /* Solve A*X = B where A = L*L'. */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Solve L*X = B, overwriting B with X. */ _starpu_dtbsv_("Lower", "No transpose", "Non-unit", n, kd, &ab[ab_offset], ldab, &b[j * b_dim1 + 1], &c__1); /* Solve L'*X = B, overwriting B with X. */ _starpu_dtbsv_("Lower", "Transpose", "Non-unit", n, kd, &ab[ab_offset], ldab, &b[j * b_dim1 + 1], &c__1); /* L20: */ } } return 0; /* End of DPBTRS */ } /* _starpu_dpbtrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpftrf.c000066400000000000000000000323501507764646700206640ustar00rootroot00000000000000/* dpftrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b12 = 1.; static doublereal c_b15 = -1.; /* Subroutine */ int _starpu_dpftrf_(char *transr, char *uplo, integer *n, doublereal *a, integer *info) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer k, n1, n2; logical normaltransr; extern logical _starpu_lsame_(char *, char *); logical lower; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsyrk_( char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); logical nisodd; extern /* Subroutine */ int _starpu_dpotrf_(char *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* Purpose */ /* ======= */ /* DPFTRF computes the Cholesky factorization of a real symmetric */ /* positive definite matrix A. */ /* The factorization has the form */ /* A = U**T * U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular. */ /* This is the block version of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* TRANSR (input) CHARACTER */ /* = 'N': The Normal TRANSR of RFP A is stored; */ /* = 'T': The Transpose TRANSR of RFP A is stored. */ /* UPLO (input) CHARACTER */ /* = 'U': Upper triangle of RFP A is stored; */ /* = 'L': Lower triangle of RFP A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ); */ /* On entry, the symmetric matrix A in RFP format. RFP format is */ /* described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */ /* then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */ /* (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */ /* the transpose of RFP A as defined when */ /* TRANSR = 'N'. The contents of RFP A are defined by UPLO as */ /* follows: If UPLO = 'U' the RFP A contains the NT elements of */ /* upper packed A. If UPLO = 'L' the RFP A contains the elements */ /* of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = */ /* 'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N */ /* is odd. See the Note below for more details. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization RFP A = U**T*U or RFP A = L*L**T. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i is not */ /* positive definite, and the factorization could not be */ /* completed. */ /* Notes */ /* ===== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ *info = 0; normaltransr = _starpu_lsame_(transr, "N"); lower = _starpu_lsame_(uplo, "L"); if (! normaltransr && ! _starpu_lsame_(transr, "T")) { *info = -1; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPFTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* If N is odd, set NISODD = .TRUE. */ /* If N is even, set K = N/2 and NISODD = .FALSE. */ if (*n % 2 == 0) { k = *n / 2; nisodd = FALSE_; } else { nisodd = TRUE_; } /* Set N1 and N2 depending on LOWER */ if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } /* start execution: there are eight cases */ if (nisodd) { /* N is odd */ if (normaltransr) { /* N is odd and TRANSR = 'N' */ if (lower) { /* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) */ /* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) */ /* T1 -> a(0), T2 -> a(n), S -> a(n1) */ _starpu_dpotrf_("L", &n1, a, n, info); if (*info > 0) { return 0; } _starpu_dtrsm_("R", "L", "T", "N", &n2, &n1, &c_b12, a, n, &a[n1], n); _starpu_dsyrk_("U", "N", &n2, &n1, &c_b15, &a[n1], n, &c_b12, &a[*n], n); _starpu_dpotrf_("U", &n2, &a[*n], n, info); if (*info > 0) { *info += n1; } } else { /* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) */ /* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) */ /* T1 -> a(n2), T2 -> a(n1), S -> a(0) */ _starpu_dpotrf_("L", &n1, &a[n2], n, info); if (*info > 0) { return 0; } _starpu_dtrsm_("L", "L", "N", "N", &n1, &n2, &c_b12, &a[n2], n, a, n); _starpu_dsyrk_("U", "T", &n2, &n1, &c_b15, a, n, &c_b12, &a[n1], n); _starpu_dpotrf_("U", &n2, &a[n1], n, info); if (*info > 0) { *info += n1; } } } else { /* N is odd and TRANSR = 'T' */ if (lower) { /* SRPA for LOWER, TRANSPOSE and N is odd */ /* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) */ /* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 */ _starpu_dpotrf_("U", &n1, a, &n1, info); if (*info > 0) { return 0; } _starpu_dtrsm_("L", "U", "T", "N", &n1, &n2, &c_b12, a, &n1, &a[n1 * n1], &n1); _starpu_dsyrk_("L", "T", &n2, &n1, &c_b15, &a[n1 * n1], &n1, &c_b12, & a[1], &n1); _starpu_dpotrf_("L", &n2, &a[1], &n1, info); if (*info > 0) { *info += n1; } } else { /* SRPA for UPPER, TRANSPOSE and N is odd */ /* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) */ /* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 */ _starpu_dpotrf_("U", &n1, &a[n2 * n2], &n2, info); if (*info > 0) { return 0; } _starpu_dtrsm_("R", "U", "N", "N", &n2, &n1, &c_b12, &a[n2 * n2], &n2, a, &n2); _starpu_dsyrk_("L", "N", &n2, &n1, &c_b15, a, &n2, &c_b12, &a[n1 * n2] , &n2); _starpu_dpotrf_("L", &n2, &a[n1 * n2], &n2, info); if (*info > 0) { *info += n1; } } } } else { /* N is even */ if (normaltransr) { /* N is even and TRANSR = 'N' */ if (lower) { /* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ /* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */ /* T1 -> a(1), T2 -> a(0), S -> a(k+1) */ i__1 = *n + 1; _starpu_dpotrf_("L", &k, &a[1], &i__1, info); if (*info > 0) { return 0; } i__1 = *n + 1; i__2 = *n + 1; _starpu_dtrsm_("R", "L", "T", "N", &k, &k, &c_b12, &a[1], &i__1, &a[k + 1], &i__2); i__1 = *n + 1; i__2 = *n + 1; _starpu_dsyrk_("U", "N", &k, &k, &c_b15, &a[k + 1], &i__1, &c_b12, a, &i__2); i__1 = *n + 1; _starpu_dpotrf_("U", &k, a, &i__1, info); if (*info > 0) { *info += k; } } else { /* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ /* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */ /* T1 -> a(k+1), T2 -> a(k), S -> a(0) */ i__1 = *n + 1; _starpu_dpotrf_("L", &k, &a[k + 1], &i__1, info); if (*info > 0) { return 0; } i__1 = *n + 1; i__2 = *n + 1; _starpu_dtrsm_("L", "L", "N", "N", &k, &k, &c_b12, &a[k + 1], &i__1, a, &i__2); i__1 = *n + 1; i__2 = *n + 1; _starpu_dsyrk_("U", "T", &k, &k, &c_b15, a, &i__1, &c_b12, &a[k], & i__2); i__1 = *n + 1; _starpu_dpotrf_("U", &k, &a[k], &i__1, info); if (*info > 0) { *info += k; } } } else { /* N is even and TRANSR = 'T' */ if (lower) { /* SRPA for LOWER, TRANSPOSE and N is even (see paper) */ /* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) */ /* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */ _starpu_dpotrf_("U", &k, &a[k], &k, info); if (*info > 0) { return 0; } _starpu_dtrsm_("L", "U", "T", "N", &k, &k, &c_b12, &a[k], &n1, &a[k * (k + 1)], &k); _starpu_dsyrk_("L", "T", &k, &k, &c_b15, &a[k * (k + 1)], &k, &c_b12, a, &k); _starpu_dpotrf_("L", &k, a, &k, info); if (*info > 0) { *info += k; } } else { /* SRPA for UPPER, TRANSPOSE and N is even (see paper) */ /* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) */ /* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */ _starpu_dpotrf_("U", &k, &a[k * (k + 1)], &k, info); if (*info > 0) { return 0; } _starpu_dtrsm_("R", "U", "N", "N", &k, &k, &c_b12, &a[k * (k + 1)], & k, a, &k); _starpu_dsyrk_("L", "N", &k, &k, &c_b15, a, &k, &c_b12, &a[k * k], &k); _starpu_dpotrf_("L", &k, &a[k * k], &k, info); if (*info > 0) { *info += k; } } } } return 0; /* End of DPFTRF */ } /* _starpu_dpftrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpftri.c000066400000000000000000000307661507764646700207000ustar00rootroot00000000000000/* dpftri.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b11 = 1.; /* Subroutine */ int _starpu_dpftri_(char *transr, char *uplo, integer *n, doublereal *a, integer *info) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer k, n1, n2; logical normaltransr; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical lower; extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); logical nisodd; extern /* Subroutine */ int _starpu_dlauum_(char *, integer *, doublereal *, integer *, integer *), _starpu_dtftri_(char *, char *, char *, integer *, doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPFTRI computes the inverse of a (real) symmetric positive definite */ /* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T */ /* computed by DPFTRF. */ /* Arguments */ /* ========= */ /* TRANSR (input) CHARACTER */ /* = 'N': The Normal TRANSR of RFP A is stored; */ /* = 'T': The Transpose TRANSR of RFP A is stored. */ /* UPLO (input) CHARACTER */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ) */ /* On entry, the symmetric matrix A in RFP format. RFP format is */ /* described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */ /* then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */ /* (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */ /* the transpose of RFP A as defined when */ /* TRANSR = 'N'. The contents of RFP A are defined by UPLO as */ /* follows: If UPLO = 'U' the RFP A contains the nt elements of */ /* upper packed A. If UPLO = 'L' the RFP A contains the elements */ /* of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = */ /* 'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N */ /* is odd. See the Note below for more details. */ /* On exit, the symmetric inverse of the original matrix, in the */ /* same storage format. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the (i,i) element of the factor U or L is */ /* zero, and the inverse could not be computed. */ /* Notes */ /* ===== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ *info = 0; normaltransr = _starpu_lsame_(transr, "N"); lower = _starpu_lsame_(uplo, "L"); if (! normaltransr && ! _starpu_lsame_(transr, "T")) { *info = -1; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPFTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Invert the triangular Cholesky factor U or L. */ _starpu_dtftri_(transr, uplo, "N", n, a, info); if (*info > 0) { return 0; } /* If N is odd, set NISODD = .TRUE. */ /* If N is even, set K = N/2 and NISODD = .FALSE. */ if (*n % 2 == 0) { k = *n / 2; nisodd = FALSE_; } else { nisodd = TRUE_; } /* Set N1 and N2 depending on LOWER */ if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } /* Start execution of triangular matrix multiply: inv(U)*inv(U)^C or */ /* inv(L)^C*inv(L). There are eight cases. */ if (nisodd) { /* N is odd */ if (normaltransr) { /* N is odd and TRANSR = 'N' */ if (lower) { /* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) */ /* T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) */ /* T1 -> a(0), T2 -> a(n), S -> a(N1) */ _starpu_dlauum_("L", &n1, a, n, info); _starpu_dsyrk_("L", "T", &n1, &n2, &c_b11, &a[n1], n, &c_b11, a, n); _starpu_dtrmm_("L", "U", "N", "N", &n2, &n1, &c_b11, &a[*n], n, &a[n1] , n); _starpu_dlauum_("U", &n2, &a[*n], n, info); } else { /* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) */ /* T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) */ /* T1 -> a(N2), T2 -> a(N1), S -> a(0) */ _starpu_dlauum_("L", &n1, &a[n2], n, info); _starpu_dsyrk_("L", "N", &n1, &n2, &c_b11, a, n, &c_b11, &a[n2], n); _starpu_dtrmm_("R", "U", "T", "N", &n1, &n2, &c_b11, &a[n1], n, a, n); _starpu_dlauum_("U", &n2, &a[n1], n, info); } } else { /* N is odd and TRANSR = 'T' */ if (lower) { /* SRPA for LOWER, TRANSPOSE, and N is odd */ /* T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) */ _starpu_dlauum_("U", &n1, a, &n1, info); _starpu_dsyrk_("U", "N", &n1, &n2, &c_b11, &a[n1 * n1], &n1, &c_b11, a, &n1); _starpu_dtrmm_("R", "L", "N", "N", &n1, &n2, &c_b11, &a[1], &n1, &a[ n1 * n1], &n1); _starpu_dlauum_("L", &n2, &a[1], &n1, info); } else { /* SRPA for UPPER, TRANSPOSE, and N is odd */ /* T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) */ _starpu_dlauum_("U", &n1, &a[n2 * n2], &n2, info); _starpu_dsyrk_("U", "T", &n1, &n2, &c_b11, a, &n2, &c_b11, &a[n2 * n2] , &n2); _starpu_dtrmm_("L", "L", "T", "N", &n2, &n1, &c_b11, &a[n1 * n2], &n2, a, &n2); _starpu_dlauum_("L", &n2, &a[n1 * n2], &n2, info); } } } else { /* N is even */ if (normaltransr) { /* N is even and TRANSR = 'N' */ if (lower) { /* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ /* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */ /* T1 -> a(1), T2 -> a(0), S -> a(k+1) */ i__1 = *n + 1; _starpu_dlauum_("L", &k, &a[1], &i__1, info); i__1 = *n + 1; i__2 = *n + 1; _starpu_dsyrk_("L", "T", &k, &k, &c_b11, &a[k + 1], &i__1, &c_b11, &a[ 1], &i__2); i__1 = *n + 1; i__2 = *n + 1; _starpu_dtrmm_("L", "U", "N", "N", &k, &k, &c_b11, a, &i__1, &a[k + 1] , &i__2); i__1 = *n + 1; _starpu_dlauum_("U", &k, a, &i__1, info); } else { /* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ /* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */ /* T1 -> a(k+1), T2 -> a(k), S -> a(0) */ i__1 = *n + 1; _starpu_dlauum_("L", &k, &a[k + 1], &i__1, info); i__1 = *n + 1; i__2 = *n + 1; _starpu_dsyrk_("L", "N", &k, &k, &c_b11, a, &i__1, &c_b11, &a[k + 1], &i__2); i__1 = *n + 1; i__2 = *n + 1; _starpu_dtrmm_("R", "U", "T", "N", &k, &k, &c_b11, &a[k], &i__1, a, & i__2); i__1 = *n + 1; _starpu_dlauum_("U", &k, &a[k], &i__1, info); } } else { /* N is even and TRANSR = 'T' */ if (lower) { /* SRPA for LOWER, TRANSPOSE, and N is even (see paper) */ /* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), */ /* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */ _starpu_dlauum_("U", &k, &a[k], &k, info); _starpu_dsyrk_("U", "N", &k, &k, &c_b11, &a[k * (k + 1)], &k, &c_b11, &a[k], &k); _starpu_dtrmm_("R", "L", "N", "N", &k, &k, &c_b11, a, &k, &a[k * (k + 1)], &k); _starpu_dlauum_("L", &k, a, &k, info); } else { /* SRPA for UPPER, TRANSPOSE, and N is even (see paper) */ /* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0), */ /* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */ _starpu_dlauum_("U", &k, &a[k * (k + 1)], &k, info); _starpu_dsyrk_("U", "T", &k, &k, &c_b11, a, &k, &c_b11, &a[k * (k + 1) ], &k); _starpu_dtrmm_("L", "L", "T", "N", &k, &k, &c_b11, &a[k * k], &k, a, & k); _starpu_dlauum_("L", &k, &a[k * k], &k, info); } } } return 0; /* End of DPFTRI */ } /* _starpu_dpftri_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpftrs.c000066400000000000000000000174021507764646700207020ustar00rootroot00000000000000/* dpftrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b10 = 1.; /* Subroutine */ int _starpu_dpftrs_(char *transr, char *uplo, integer *n, integer * nrhs, doublereal *a, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1; /* Local variables */ logical normaltransr; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtfsm_(char *, char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); logical lower; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPFTRS solves a system of linear equations A*X = B with a symmetric */ /* positive definite matrix A using the Cholesky factorization */ /* A = U**T*U or A = L*L**T computed by DPFTRF. */ /* Arguments */ /* ========= */ /* TRANSR (input) CHARACTER */ /* = 'N': The Normal TRANSR of RFP A is stored; */ /* = 'T': The Transpose TRANSR of RFP A is stored. */ /* UPLO (input) CHARACTER */ /* = 'U': Upper triangle of RFP A is stored; */ /* = 'L': Lower triangle of RFP A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ). */ /* The triangular factor U or L from the Cholesky factorization */ /* of RFP A = U**T*U or RFP A = L*L**T, as computed by DPFTRF. */ /* See note below for more details about RFP A. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Notes */ /* ===== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; normaltransr = _starpu_lsame_(transr, "N"); lower = _starpu_lsame_(uplo, "L"); if (! normaltransr && ! _starpu_lsame_(transr, "T")) { *info = -1; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPFTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } /* start execution: there are two triangular solves */ if (lower) { _starpu_dtfsm_(transr, "L", uplo, "N", "N", n, nrhs, &c_b10, a, &b[b_offset], ldb); _starpu_dtfsm_(transr, "L", uplo, "T", "N", n, nrhs, &c_b10, a, &b[b_offset], ldb); } else { _starpu_dtfsm_(transr, "L", uplo, "T", "N", n, nrhs, &c_b10, a, &b[b_offset], ldb); _starpu_dtfsm_(transr, "L", uplo, "N", "N", n, nrhs, &c_b10, a, &b[b_offset], ldb); } return 0; /* End of DPFTRS */ } /* _starpu_dpftrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpocon.c000066400000000000000000000142661507764646700206670ustar00rootroot00000000000000/* dpocon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dpocon_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1; doublereal d__1; /* Local variables */ integer ix, kase; doublereal scale; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal scalel; extern integer _starpu_idamax_(integer *, doublereal *, integer *); doublereal scaleu; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal ainvnm; extern /* Subroutine */ int _starpu_dlatrs_(char *, char *, char *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *); char normin[1]; doublereal smlnum; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOCON estimates the reciprocal of the condition number (in the */ /* 1-norm) of a real symmetric positive definite matrix using the */ /* Cholesky factorization A = U**T*U or A = L*L**T computed by DPOTRF. */ /* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ /* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* ANORM (input) DOUBLE PRECISION */ /* The 1-norm (or infinity-norm) of the symmetric matrix A. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ /* estimate of the 1-norm of inv(A) computed in this routine. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*anorm < 0.) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm == 0.) { return 0; } smlnum = _starpu_dlamch_("Safe minimum"); /* Estimate the 1-norm of inv(A). */ kase = 0; *(unsigned char *)normin = 'N'; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (upper) { /* Multiply by inv(U'). */ _starpu_dlatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset], lda, &work[1], &scalel, &work[(*n << 1) + 1], info); *(unsigned char *)normin = 'Y'; /* Multiply by inv(U). */ _starpu_dlatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[ a_offset], lda, &work[1], &scaleu, &work[(*n << 1) + 1], info); } else { /* Multiply by inv(L). */ _starpu_dlatrs_("Lower", "No transpose", "Non-unit", normin, n, &a[ a_offset], lda, &work[1], &scalel, &work[(*n << 1) + 1], info); *(unsigned char *)normin = 'Y'; /* Multiply by inv(L'). */ _starpu_dlatrs_("Lower", "Transpose", "Non-unit", normin, n, &a[a_offset], lda, &work[1], &scaleu, &work[(*n << 1) + 1], info); } /* Multiply by 1/SCALE if doing so will not cause overflow. */ scale = scalel * scaleu; if (scale != 1.) { ix = _starpu_idamax_(n, &work[1], &c__1); if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) { goto L20; } _starpu_drscl_(n, &scale, &work[1], &c__1); } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } L20: return 0; /* End of DPOCON */ } /* _starpu_dpocon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpoequ.c000066400000000000000000000112001507764646700206630ustar00rootroot00000000000000/* dpoequ.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dpoequ_(integer *n, doublereal *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal smin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOEQU computes row and column scalings intended to equilibrate a */ /* symmetric positive definite matrix A and reduce its condition number */ /* (with respect to the two-norm). S contains the scale factors, */ /* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with */ /* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This */ /* choice of S puts the condition number of B within a factor N of the */ /* smallest possible condition number over all possible diagonal */ /* scalings. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The N-by-N symmetric positive definite matrix whose scaling */ /* factors are to be computed. Only the diagonal elements of A */ /* are referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* S (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, S contains the scale factors for A. */ /* SCOND (output) DOUBLE PRECISION */ /* If INFO = 0, S contains the ratio of the smallest S(i) to */ /* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ /* large nor too small, it is not worth scaling by S. */ /* AMAX (output) DOUBLE PRECISION */ /* Absolute value of largest matrix element. If AMAX is very */ /* close to overflow or very close to underflow, the matrix */ /* should be scaled. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --s; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*lda < max(1,*n)) { *info = -3; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOEQU", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *scond = 1.; *amax = 0.; return 0; } /* Find the minimum and maximum diagonal elements. */ s[1] = a[a_dim1 + 1]; smin = s[1]; *amax = s[1]; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { s[i__] = a[i__ + i__ * a_dim1]; /* Computing MIN */ d__1 = smin, d__2 = s[i__]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = *amax, d__2 = s[i__]; *amax = max(d__1,d__2); /* L10: */ } if (smin <= 0.) { /* Find the first non-positive diagonal element and return. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (s[i__] <= 0.) { *info = i__; return 0; } /* L20: */ } } else { /* Set the scale factors to the reciprocals */ /* of the diagonal elements. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { s[i__] = 1. / sqrt(s[i__]); /* L30: */ } /* Compute SCOND = min(S(I)) / max(S(I)) */ *scond = sqrt(smin) / sqrt(*amax); } return 0; /* End of DPOEQU */ } /* _starpu_dpoequ_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpoequb.c000066400000000000000000000124511507764646700210360ustar00rootroot00000000000000/* dpoequb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dpoequb_(integer *n, doublereal *a, integer *lda, doublereal *s, doublereal *scond, doublereal *amax, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal); /* Local variables */ integer i__; doublereal tmp, base, smin; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOEQU computes row and column scalings intended to equilibrate a */ /* symmetric positive definite matrix A and reduce its condition number */ /* (with respect to the two-norm). S contains the scale factors, */ /* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with */ /* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This */ /* choice of S puts the condition number of B within a factor N of the */ /* smallest possible condition number over all possible diagonal */ /* scalings. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The N-by-N symmetric positive definite matrix whose scaling */ /* factors are to be computed. Only the diagonal elements of A */ /* are referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* S (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, S contains the scale factors for A. */ /* SCOND (output) DOUBLE PRECISION */ /* If INFO = 0, S contains the ratio of the smallest S(i) to */ /* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ /* large nor too small, it is not worth scaling by S. */ /* AMAX (output) DOUBLE PRECISION */ /* Absolute value of largest matrix element. If AMAX is very */ /* close to overflow or very close to underflow, the matrix */ /* should be scaled. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Positive definite only performs 1 pass of equilibration. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --s; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*lda < max(1,*n)) { *info = -3; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOEQUB", &i__1); return 0; } /* Quick return if possible. */ if (*n == 0) { *scond = 1.; *amax = 0.; return 0; } base = _starpu_dlamch_("B"); tmp = -.5 / log(base); /* Find the minimum and maximum diagonal elements. */ s[1] = a[a_dim1 + 1]; smin = s[1]; *amax = s[1]; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { s[i__] = a[i__ + i__ * a_dim1]; /* Computing MIN */ d__1 = smin, d__2 = s[i__]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = *amax, d__2 = s[i__]; *amax = max(d__1,d__2); /* L10: */ } if (smin <= 0.) { /* Find the first non-positive diagonal element and return. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (s[i__] <= 0.) { *info = i__; return 0; } /* L20: */ } } else { /* Set the scale factors to the reciprocals */ /* of the diagonal elements. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = (integer) (tmp * log(s[i__])); s[i__] = pow_di(&base, &i__2); /* L30: */ } /* Compute SCOND = min(S(I)) / max(S(I)). */ *scond = sqrt(smin) / sqrt(*amax); } return 0; /* End of DPOEQUB */ } /* _starpu_dpoequb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dporfs.c000066400000000000000000000307631507764646700207020ustar00rootroot00000000000000/* dporfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = -1.; static doublereal c_b14 = 1.; /* Subroutine */ int _starpu_dporfs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k; doublereal s, xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer count; logical upper; extern /* Subroutine */ int _starpu_dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpotrs_( char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal lstres; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPORFS improves the computed solution to a system of linear */ /* equations when the coefficient matrix is symmetric positive definite, */ /* and provides error bounds and backward error estimates for the */ /* solution. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of A contains the upper triangular part */ /* of the matrix A, and the strictly lower triangular part of A */ /* is not referenced. If UPLO = 'L', the leading N-by-N lower */ /* triangular part of A contains the lower triangular part of */ /* the matrix A, and the strictly upper triangular part of A is */ /* not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DPOTRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Internal Parameters */ /* =================== */ /* ITMAX is the maximum number of steps of iterative refinement. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldaf < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldx < max(1,*n)) { *info = -11; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPORFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - A * X */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dsymv_(uplo, n, &c_b12, &a[a_offset], lda, &x[j * x_dim1 + 1], &c__1, &c_b14, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L30: */ } /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ i__ + j * x_dim1], abs(d__2)); /* L40: */ } work[k] = work[k] + (d__1 = a[k + k * a_dim1], abs(d__1)) * xk + s; /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); work[k] += (d__1 = a[k + k * a_dim1], abs(d__1)) * xk; i__3 = *n; for (i__ = k + 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ i__ + j * x_dim1], abs(d__2)); /* L60: */ } work[k] += s; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if */ /* 1) The residual BERR(J) is larger than machine epsilon, and */ /* 2) BERR(J) decreased by at least a factor of 2 during the */ /* last iteration, and */ /* 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ _starpu_dpotrs_(uplo, n, &c__1, &af[af_offset], ldaf, &work[*n + 1], n, info); _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) ; lstres = berr[j]; ++count; goto L20; } /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(A))* */ /* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(A) is the inverse of A */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(A)*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(A) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(A'). */ _starpu_dpotrs_(uplo, n, &c__1, &af[af_offset], ldaf, &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L110: */ } } else if (kase == 2) { /* Multiply by inv(A)*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L120: */ } _starpu_dpotrs_(uplo, n, &c__1, &af[af_offset], ldaf, &work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DPORFS */ } /* _starpu_dporfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dporfsx.c000066400000000000000000000600131507764646700210610ustar00rootroot00000000000000/* dporfsx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c_n1 = -1; static integer c__0 = 0; static integer c__1 = 1; /* Subroutine */ int _starpu_dporfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer * ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * nparams, doublereal *params, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; integer ref_type__, j; doublereal rcond_tmp__; integer prec_type__; extern doublereal _starpu_dla_porcond__(char *, integer *, doublereal *, integer * , doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen); doublereal cwise_wrong__; extern /* Subroutine */ int _starpu_dla_porfsx_extended__(integer *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, ftnlen); char norm[1]; logical ignore_cwise__; extern logical _starpu_lsame_(char *, char *); doublereal anorm; logical rcequ; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpocon_( char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern integer _starpu_ilaprec_(char *); integer ithresh, n_norms__; doublereal rthresh; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPORFSX improves the computed solution to a system of linear */ /* equations when the coefficient matrix is symmetric positive */ /* definite, and provides error bounds and backward error estimates */ /* for the solution. In addition to normwise error bound, the code */ /* provides maximum componentwise error bound if possible. See */ /* comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */ /* error bounds. */ /* The original system of linear equations may have been equilibrated */ /* before calling this routine, as described by arguments EQUED and S */ /* below. In this case, the solution and error bounds returned are */ /* for the original unequilibrated system. */ /* Arguments */ /* ========= */ /* Some optional parameters are bundled in the PARAMS array. These */ /* settings determine how refinement is performed, but often the */ /* defaults are acceptable. If the defaults are acceptable, users */ /* can pass NPARAMS = 0 which prevents the source code from accessing */ /* the PARAMS argument. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* EQUED (input) CHARACTER*1 */ /* Specifies the form of equilibration that was done to A */ /* before calling this routine. This is needed to compute */ /* the solution and error bounds correctly. */ /* = 'N': No equilibration */ /* = 'Y': Both row and column equilibration, i.e., A has been */ /* replaced by diag(S) * A * diag(S). */ /* The right hand side B has been changed accordingly. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of A contains the upper triangular part */ /* of the matrix A, and the strictly lower triangular part of A */ /* is not referenced. If UPLO = 'L', the leading N-by-N lower */ /* triangular part of A contains the lower triangular part of */ /* the matrix A, and the strictly upper triangular part of A is */ /* not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* S (input or output) DOUBLE PRECISION array, dimension (N) */ /* The row scale factors for A. If EQUED = 'Y', A is multiplied on */ /* the left and right by diag(S). S is an input argument if FACT = */ /* 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ /* = 'Y', each element of S must be positive. If S is output, each */ /* element of S is a power of the radix. If S is input, each element */ /* of S should be a power of the radix to ensure a reliable solution */ /* and error estimates. Scaling by powers of the radix does not cause */ /* rounding errors unless the result underflows or overflows. */ /* Rounding errors during scaling lead to refining with a matrix that */ /* is not equivalent to the input matrix, producing error estimates */ /* that may not be reliable. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DGETRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* Componentwise relative backward error. This is the */ /* componentwise relative backward error of each solution vector X(j) */ /* (i.e., the smallest relative change in any element of A or B that */ /* makes X(j) an exact solution). */ /* N_ERR_BNDS (input) INTEGER */ /* Number of error bounds to return for each right hand side */ /* and each type (normwise or componentwise). See ERR_BNDS_NORM and */ /* ERR_BNDS_COMP below. */ /* ERR_BNDS_NORM (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* normwise relative error, which is defined as follows: */ /* Normwise relative error in the ith solution vector: */ /* max_j (abs(XTRUE(j,i) - X(j,i))) */ /* ------------------------------ */ /* max_j abs(X(j,i)) */ /* The array is indexed by the type of error information as described */ /* below. There currently are up to three pieces of information */ /* returned. */ /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_NORM(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * dlamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * dlamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated normwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * dlamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*A, where S scales each row by a power of the */ /* radix so all absolute row sums of Z are approximately 1. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* ERR_BNDS_COMP (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* componentwise relative error, which is defined as follows: */ /* Componentwise relative error in the ith solution vector: */ /* abs(XTRUE(j,i) - X(j,i)) */ /* max_j ---------------------- */ /* abs(X(j,i)) */ /* The array is indexed by the right-hand side i (on which the */ /* componentwise relative error depends), and the type of error */ /* information as described below. There currently are up to three */ /* pieces of information returned for each right-hand side. If */ /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ /* the first (:,N_ERR_BNDS) entries are returned. */ /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_COMP(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * dlamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * dlamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated componentwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * dlamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*(A*diag(x)), where x is the solution for the */ /* current right-hand side and S scales each row of */ /* A*diag(x) by a power of the radix so all absolute row */ /* sums of Z are approximately 1. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* NPARAMS (input) INTEGER */ /* Specifies the number of parameters set in PARAMS. If .LE. 0, the */ /* PARAMS array is never referenced and default values are used. */ /* PARAMS (input / output) DOUBLE PRECISION array, dimension NPARAMS */ /* Specifies algorithm parameters. If an entry is .LT. 0.0, then */ /* that entry will be filled with default value used for that */ /* parameter. Only positions up to NPARAMS are accessed; defaults */ /* are used for higher-numbered parameters. */ /* PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ /* refinement or not. */ /* Default: 1.0D+0 */ /* = 0.0 : No refinement is performed, and no error bounds are */ /* computed. */ /* = 1.0 : Use the double-precision refinement algorithm, */ /* possibly with doubled-single computations if the */ /* compilation environment does not support DOUBLE */ /* PRECISION. */ /* (other values are reserved for future use) */ /* PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ /* computations allowed for refinement. */ /* Default: 10 */ /* Aggressive: Set to 100 to permit convergence using approximate */ /* factorizations or factorizations other than LU. If */ /* the factorization uses a technique other than */ /* Gaussian elimination, the guarantees in */ /* err_bnds_norm and err_bnds_comp may no longer be */ /* trustworthy. */ /* PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ /* will attempt to find a solution with small componentwise */ /* relative error in the double-precision algorithm. Positive */ /* is true, 0.0 is false. */ /* Default: 1.0 (attempt componentwise convergence) */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: Successful exit. The solution to every right-hand side is */ /* guaranteed. */ /* < 0: If INFO = -i, the i-th argument had an illegal value */ /* > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly singular, so */ /* the solution and error bounds could not be computed. RCOND = 0 */ /* is returned. */ /* = N+J: The solution corresponding to the Jth right-hand side is */ /* not guaranteed. The solutions corresponding to other right- */ /* hand sides K with K > J may not be guaranteed as well, but */ /* only the first such right-hand side is reported. If a small */ /* componentwise error is not requested (PARAMS(3) = 0.0) then */ /* the Jth right-hand side is the first with a normwise error */ /* bound that is not guaranteed (the smallest J such */ /* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ /* the Jth right-hand side is the first with either a normwise or */ /* componentwise error bound that is not guaranteed (the smallest */ /* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ /* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ /* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ /* about all of the right-hand sides check ERR_BNDS_NORM or */ /* ERR_BNDS_COMP. */ /* ================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Check the input parameters. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --s; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --berr; --params; --work; --iwork; /* Function Body */ *info = 0; ref_type__ = 1; if (*nparams >= 1) { if (params[1] < 0.) { params[1] = 1.; } else { ref_type__ = (integer) params[1]; } } /* Set default parameters. */ illrcond_thresh__ = (doublereal) (*n) * _starpu_dlamch_("Epsilon"); ithresh = 10; rthresh = .5; unstable_thresh__ = .25; ignore_cwise__ = FALSE_; if (*nparams >= 2) { if (params[2] < 0.) { params[2] = (doublereal) ithresh; } else { ithresh = (integer) params[2]; } } if (*nparams >= 3) { if (params[3] < 0.) { if (ignore_cwise__) { params[3] = 0.; } else { params[3] = 1.; } } else { ignore_cwise__ = params[3] == 0.; } } if (ref_type__ == 0 || *n_err_bnds__ == 0) { n_norms__ = 0; } else if (ignore_cwise__) { n_norms__ = 1; } else { n_norms__ = 2; } rcequ = _starpu_lsame_(equed, "Y"); /* Test input parameters. */ if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! rcequ && ! _starpu_lsame_(equed, "N")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -11; } else if (*ldx < max(1,*n)) { *info = -13; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPORFSX", &i__1); return 0; } /* Quick return if possible. */ if (*n == 0 || *nrhs == 0) { *rcond = 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { berr[j] = 0.; if (*n_err_bnds__ >= 1) { err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } else if (*n_err_bnds__ >= 2) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; } else if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; } } return 0; } /* Default to failure. */ *rcond = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { berr[j] = 1.; if (*n_err_bnds__ >= 1) { err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } else if (*n_err_bnds__ >= 2) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; } else if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; } } /* Compute the norm of A and the reciprocal of the condition */ /* number of A. */ *(unsigned char *)norm = 'I'; anorm = _starpu_dlansy_(norm, uplo, n, &a[a_offset], lda, &work[1]); _starpu_dpocon_(uplo, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], info); /* Perform refinement on each right-hand side */ if (ref_type__ != 0) { prec_type__ = _starpu_ilaprec_("E"); _starpu_dla_porfsx_extended__(&prec_type__, uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &rcequ, &s[1], &b[b_offset], ldb, &x[ x_offset], ldx, &berr[1], &n_norms__, &err_bnds_norm__[ err_bnds_norm_offset], &err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, &rthresh, &unstable_thresh__, & ignore_cwise__, info, (ftnlen)1); } /* Computing MAX */ d__1 = 10., d__2 = sqrt((doublereal) (*n)); err_lbnd__ = max(d__1,d__2) * _starpu_dlamch_("Epsilon"); if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { /* Compute scaled normwise condition number cond(A*C). */ if (rcequ) { rcond_tmp__ = _starpu_dla_porcond__(uplo, n, &a[a_offset], lda, &af[ af_offset], ldaf, &c_n1, &s[1], info, &work[1], &iwork[1], (ftnlen)1); } else { rcond_tmp__ = _starpu_dla_porcond__(uplo, n, &a[a_offset], lda, &af[ af_offset], ldaf, &c__0, &s[1], info, &work[1], &iwork[1], (ftnlen)1); } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Cap the error at 1.0. */ if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] > 1.) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; } /* Threshold the error (see LAWN). */ if (rcond_tmp__ < illrcond_thresh__) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; if (*info <= *n) { *info = *n + j; } } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < err_lbnd__) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; } /* Save the condition number. */ if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; } } } if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { /* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ /* each right-hand side using the current solution as an estimate of */ /* the true solution. If the componentwise error estimate is too */ /* large, then the solution is a lousy estimate of truth and the */ /* estimated RCOND may be too optimistic. To avoid misleading users, */ /* the inverse condition number is set to 0.0 when the estimated */ /* cwise error is at least CWISE_WRONG. */ cwise_wrong__ = sqrt(_starpu_dlamch_("Epsilon")); i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < cwise_wrong__) { rcond_tmp__ = _starpu_dla_porcond__(uplo, n, &a[a_offset], lda, &af[ af_offset], ldaf, &c__1, &x[j * x_dim1 + 1], info, & work[1], &iwork[1], (ftnlen)1); } else { rcond_tmp__ = 0.; } /* Cap the error at 1.0. */ if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] > 1.) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; } /* Threshold the error (see LAWN). */ if (rcond_tmp__ < illrcond_thresh__) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; if (params[3] == 1. && *info < *n + j) { *info = *n + j; } } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < err_lbnd__) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } /* Save the condition number. */ if (*n_err_bnds__ >= 3) { err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; } } } return 0; /* End of DPORFSX */ } /* _starpu_dporfsx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dposv.c000066400000000000000000000116501507764646700205320ustar00rootroot00000000000000/* dposv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dposv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpotrf_( char *, integer *, doublereal *, integer *, integer *), _starpu_dpotrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOSV computes the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N symmetric positive definite matrix and X and B */ /* are N-by-NRHS matrices. */ /* The Cholesky decomposition is used to factor A as */ /* A = U**T* U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is a lower triangular */ /* matrix. The factored form of A is then used to solve the system of */ /* equations A * X = B. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i of A is not */ /* positive definite, so the factorization could not be */ /* completed, and the solution has not been computed. */ /* ===================================================================== */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOSV ", &i__1); return 0; } /* Compute the Cholesky factorization A = U'*U or A = L*L'. */ _starpu_dpotrf_(uplo, n, &a[a_offset], lda, info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ _starpu_dpotrs_(uplo, n, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); } return 0; /* End of DPOSV */ } /* _starpu_dposv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dposvx.c000066400000000000000000000401631507764646700207230ustar00rootroot00000000000000/* dposvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dposvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal * berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ integer i__, j; doublereal amax, smin, smax; extern logical _starpu_lsame_(char *, char *); doublereal scond, anorm; logical equil, rcequ; extern doublereal _starpu_dlamch_(char *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dpocon_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); integer infequ; extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dlaqsy_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, char *), _starpu_dpoequ_(integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dporfs_( char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dpotrf_(char *, integer *, doublereal *, integer *, integer *); doublereal smlnum; extern /* Subroutine */ int _starpu_dpotrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to */ /* compute the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N symmetric positive definite matrix and X and B */ /* are N-by-NRHS matrices. */ /* Error bounds on the solution and a condition estimate are also */ /* provided. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'E', real scaling factors are computed to equilibrate */ /* the system: */ /* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B */ /* Whether or not the system will be equilibrated depends on the */ /* scaling of the matrix A, but if equilibration is used, A is */ /* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ /* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to */ /* factor the matrix A (after equilibration if FACT = 'E') as */ /* A = U**T* U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is a lower triangular */ /* matrix. */ /* 3. If the leading i-by-i principal minor is not positive definite, */ /* then the routine returns with INFO = i. Otherwise, the factored */ /* form of A is used to estimate the condition number of the matrix */ /* A. If the reciprocal of the condition number is less than machine */ /* precision, INFO = N+1 is returned as a warning, but the routine */ /* still goes on to solve for X and compute error bounds as */ /* described below. */ /* 4. The system of equations is solved for X using the factored form */ /* of A. */ /* 5. Iterative refinement is applied to improve the computed solution */ /* matrix and calculate error bounds and backward error estimates */ /* for it. */ /* 6. If equilibration was used, the matrix X is premultiplied by */ /* diag(S) so that it solves the original system before */ /* equilibration. */ /* Arguments */ /* ========= */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of the matrix A is */ /* supplied on entry, and if not, whether the matrix A should be */ /* equilibrated before it is factored. */ /* = 'F': On entry, AF contains the factored form of A. */ /* If EQUED = 'Y', the matrix A has been equilibrated */ /* with scaling factors given by S. A and AF will not */ /* be modified. */ /* = 'N': The matrix A will be copied to AF and factored. */ /* = 'E': The matrix A will be equilibrated if necessary, then */ /* copied to AF and factored. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A, except if FACT = 'F' and */ /* EQUED = 'Y', then A must contain the equilibrated matrix */ /* diag(S)*A*diag(S). If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. A is not modified if */ /* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ /* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ /* diag(S)*A*diag(S). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ /* If FACT = 'F', then AF is an input argument and on entry */ /* contains the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T, in the same storage */ /* format as A. If EQUED .ne. 'N', then AF is the factored form */ /* of the equilibrated matrix diag(S)*A*diag(S). */ /* If FACT = 'N', then AF is an output argument and on exit */ /* returns the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T of the original */ /* matrix A. */ /* If FACT = 'E', then AF is an output argument and on exit */ /* returns the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T of the equilibrated */ /* matrix A (see the description of A for the form of the */ /* equilibrated matrix). */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* EQUED (input or output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration (always true if FACT = 'N'). */ /* = 'Y': Equilibration was done, i.e., A has been replaced by */ /* diag(S) * A * diag(S). */ /* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ /* output argument. */ /* S (input or output) DOUBLE PRECISION array, dimension (N) */ /* The scale factors for A; not accessed if EQUED = 'N'. S is */ /* an input argument if FACT = 'F'; otherwise, S is an output */ /* argument. If FACT = 'F' and EQUED = 'Y', each element of S */ /* must be positive. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', */ /* B is overwritten by diag(S) * B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to */ /* the original system of equations. Note that if EQUED = 'Y', */ /* A and B are modified on exit, and the solution to the */ /* equilibrated system is inv(diag(S))*X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The estimate of the reciprocal condition number of the matrix */ /* A after equilibration (if done). If RCOND is less than the */ /* machine precision (in particular, if RCOND = 0), the matrix */ /* is singular to working precision. This condition is */ /* indicated by a return code of INFO > 0. */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= N: the leading minor of order i of A is */ /* not positive definite, so the factorization */ /* could not be completed, and the solution has not */ /* been computed. RCOND = 0 is returned. */ /* = N+1: U is nonsingular, but RCOND is less than machine */ /* precision, meaning that the matrix is singular */ /* to working precision. Nevertheless, the */ /* solution and error bounds are computed because */ /* there are a number of situations where the */ /* computed solution can be more accurate than the */ /* value of RCOND would suggest. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --s; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); equil = _starpu_lsame_(fact, "E"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rcequ = FALSE_; } else { rcequ = _starpu_lsame_(equed, "Y"); smlnum = _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* Test the input parameters. */ if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( equed, "N"))) { *info = -9; } else { if (rcequ) { smin = bignum; smax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = smin, d__2 = s[j]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = smax, d__2 = s[j]; smax = max(d__1,d__2); /* L10: */ } if (smin <= 0.) { *info = -10; } else if (*n > 0) { scond = max(smin,smlnum) / min(smax,bignum); } else { scond = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -12; } else if (*ldx < max(1,*n)) { *info = -14; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOSVX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ _starpu_dpoequ_(n, &a[a_offset], lda, &s[1], &scond, &amax, &infequ); if (infequ == 0) { /* Equilibrate the matrix. */ _starpu_dlaqsy_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, equed); rcequ = _starpu_lsame_(equed, "Y"); } } /* Scale the right hand side. */ if (rcequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = s[i__] * b[i__ + j * b_dim1]; /* L20: */ } /* L30: */ } } if (nofact || equil) { /* Compute the Cholesky factorization A = U'*U or A = L*L'. */ _starpu_dlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); _starpu_dpotrf_(uplo, n, &af[af_offset], ldaf, info); /* Return if INFO is non-zero. */ if (*info > 0) { *rcond = 0.; return 0; } } /* Compute the norm of the matrix A. */ anorm = _starpu_dlansy_("1", uplo, n, &a[a_offset], lda, &work[1]); /* Compute the reciprocal of the condition number of A. */ _starpu_dpocon_(uplo, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], info); /* Compute the solution matrix X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dpotrs_(uplo, n, nrhs, &af[af_offset], ldaf, &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and */ /* compute error bounds and backward error estimates for it. */ _starpu_dporfs_(uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &b[ b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1], & iwork[1], info); /* Transform the solution matrix X to a solution of the original */ /* system. */ if (rcequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ + j * x_dim1] = s[i__] * x[i__ + j * x_dim1]; /* L40: */ } /* L50: */ } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] /= scond; /* L60: */ } } /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < _starpu_dlamch_("Epsilon")) { *info = *n + 1; } return 0; /* End of DPOSVX */ } /* _starpu_dposvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dposvxx.c000066400000000000000000000631461507764646700211210ustar00rootroot00000000000000/* dposvxx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dposvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, doublereal * berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublereal * work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1; doublereal d__1, d__2; /* Local variables */ integer j; doublereal amax, smin, smax; extern doublereal _starpu_dla_porpvgrw__(char *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, ftnlen); extern logical _starpu_lsame_(char *, char *); doublereal scond; logical equil, rcequ; extern doublereal _starpu_dlamch_(char *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; integer infequ; extern /* Subroutine */ int _starpu_dlaqsy_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, char *), _starpu_dpotrf_(char *, integer *, doublereal *, integer *, integer *); doublereal smlnum; extern /* Subroutine */ int _starpu_dpotrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dlascl2_(integer *, integer *, doublereal *, doublereal * , integer *), _starpu_dpoequb_(integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dporfsx_( char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOSVXX uses the Cholesky factorization A = U**T*U or A = L*L**T */ /* to compute the solution to a double precision system of linear equations */ /* A * X = B, where A is an N-by-N symmetric positive definite matrix */ /* and X and B are N-by-NRHS matrices. */ /* If requested, both normwise and maximum componentwise error bounds */ /* are returned. DPOSVXX will return a solution with a tiny */ /* guaranteed error (O(eps) where eps is the working machine */ /* precision) unless the matrix is very ill-conditioned, in which */ /* case a warning is returned. Relevant condition numbers also are */ /* calculated and returned. */ /* DPOSVXX accepts user-provided factorizations and equilibration */ /* factors; see the definitions of the FACT and EQUED options. */ /* Solving with refinement and using a factorization from a previous */ /* DPOSVXX call will also produce a solution with either O(eps) */ /* errors or warnings, but we cannot make that claim for general */ /* user-provided factorizations and equilibration factors if they */ /* differ from what DPOSVXX would itself produce. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ /* the system: */ /* diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B */ /* Whether or not the system will be equilibrated depends on the */ /* scaling of the matrix A, but if equilibration is used, A is */ /* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ /* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to */ /* factor the matrix A (after equilibration if FACT = 'E') as */ /* A = U**T* U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is a lower triangular */ /* matrix. */ /* 3. If the leading i-by-i principal minor is not positive definite, */ /* then the routine returns with INFO = i. Otherwise, the factored */ /* form of A is used to estimate the condition number of the matrix */ /* A (see argument RCOND). If the reciprocal of the condition number */ /* is less than machine precision, the routine still goes on to solve */ /* for X and compute error bounds as described below. */ /* 4. The system of equations is solved for X using the factored form */ /* of A. */ /* 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ /* the routine will use iterative refinement to try to get a small */ /* error and error bounds. Refinement calculates the residual to at */ /* least twice the working precision. */ /* 6. If equilibration was used, the matrix X is premultiplied by */ /* diag(S) so that it solves the original system before */ /* equilibration. */ /* Arguments */ /* ========= */ /* Some optional parameters are bundled in the PARAMS array. These */ /* settings determine how refinement is performed, but often the */ /* defaults are acceptable. If the defaults are acceptable, users */ /* can pass NPARAMS = 0 which prevents the source code from accessing */ /* the PARAMS argument. */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of the matrix A is */ /* supplied on entry, and if not, whether the matrix A should be */ /* equilibrated before it is factored. */ /* = 'F': On entry, AF contains the factored form of A. */ /* If EQUED is not 'N', the matrix A has been */ /* equilibrated with scaling factors given by S. */ /* A and AF are not modified. */ /* = 'N': The matrix A will be copied to AF and factored. */ /* = 'E': The matrix A will be equilibrated if necessary, then */ /* copied to AF and factored. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A, except if FACT = 'F' and EQUED = */ /* 'Y', then A must contain the equilibrated matrix */ /* diag(S)*A*diag(S). If UPLO = 'U', the leading N-by-N upper */ /* triangular part of A contains the upper triangular part of the */ /* matrix A, and the strictly lower triangular part of A is not */ /* referenced. If UPLO = 'L', the leading N-by-N lower triangular */ /* part of A contains the lower triangular part of the matrix A, and */ /* the strictly upper triangular part of A is not referenced. A is */ /* not modified if FACT = 'F' or 'N', or if FACT = 'E' and EQUED = */ /* 'N' on exit. */ /* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ /* diag(S)*A*diag(S). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ /* If FACT = 'F', then AF is an input argument and on entry */ /* contains the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T, in the same storage */ /* format as A. If EQUED .ne. 'N', then AF is the factored */ /* form of the equilibrated matrix diag(S)*A*diag(S). */ /* If FACT = 'N', then AF is an output argument and on exit */ /* returns the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T of the original */ /* matrix A. */ /* If FACT = 'E', then AF is an output argument and on exit */ /* returns the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T of the equilibrated */ /* matrix A (see the description of A for the form of the */ /* equilibrated matrix). */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* EQUED (input or output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration (always true if FACT = 'N'). */ /* = 'Y': Both row and column equilibration, i.e., A has been */ /* replaced by diag(S) * A * diag(S). */ /* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ /* output argument. */ /* S (input or output) DOUBLE PRECISION array, dimension (N) */ /* The row scale factors for A. If EQUED = 'Y', A is multiplied on */ /* the left and right by diag(S). S is an input argument if FACT = */ /* 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ /* = 'Y', each element of S must be positive. If S is output, each */ /* element of S is a power of the radix. If S is input, each element */ /* of S should be a power of the radix to ensure a reliable solution */ /* and error estimates. Scaling by powers of the radix does not cause */ /* rounding errors unless the result underflows or overflows. */ /* Rounding errors during scaling lead to refining with a matrix that */ /* is not equivalent to the input matrix, producing error estimates */ /* that may not be reliable. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, */ /* if EQUED = 'N', B is not modified; */ /* if EQUED = 'Y', B is overwritten by diag(S)*B; */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0, the N-by-NRHS solution matrix X to the original */ /* system of equations. Note that A and B are modified on exit if */ /* EQUED .ne. 'N', and the solution to the equilibrated system is */ /* inv(diag(S))*X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* RPVGRW (output) DOUBLE PRECISION */ /* Reciprocal pivot growth. On exit, this contains the reciprocal */ /* pivot growth factor norm(A)/norm(U). The "max absolute element" */ /* norm is used. If this is much less than 1, then the stability of */ /* the LU factorization of the (equilibrated) matrix A could be poor. */ /* This also means that the solution X, estimated condition numbers, */ /* and error bounds could be unreliable. If factorization fails with */ /* 0 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly singular, so */ /* the solution and error bounds could not be computed. RCOND = 0 */ /* is returned. */ /* = N+J: The solution corresponding to the Jth right-hand side is */ /* not guaranteed. The solutions corresponding to other right- */ /* hand sides K with K > J may not be guaranteed as well, but */ /* only the first such right-hand side is reported. If a small */ /* componentwise error is not requested (PARAMS(3) = 0.0) then */ /* the Jth right-hand side is the first with a normwise error */ /* bound that is not guaranteed (the smallest J such */ /* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ /* the Jth right-hand side is the first with either a normwise or */ /* componentwise error bound that is not guaranteed (the smallest */ /* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ /* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ /* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ /* about all of the right-hand sides check ERR_BNDS_NORM or */ /* ERR_BNDS_COMP. */ /* ================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --s; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --berr; --params; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); equil = _starpu_lsame_(fact, "E"); smlnum = _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; if (nofact || equil) { *(unsigned char *)equed = 'N'; rcequ = FALSE_; } else { rcequ = _starpu_lsame_(equed, "Y"); } /* Default is failure. If an input parameter is wrong or */ /* factorization fails, make everything look horrible. Only the */ /* pivot growth is set here, the rest is initialized in DPORFSX. */ *rpvgrw = 0.; /* Test the input parameters. PARAMS is not tested until DPORFSX. */ if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( equed, "N"))) { *info = -9; } else { if (rcequ) { smin = bignum; smax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = smin, d__2 = s[j]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = smax, d__2 = s[j]; smax = max(d__1,d__2); /* L10: */ } if (smin <= 0.) { *info = -10; } else if (*n > 0) { scond = max(smin,smlnum) / min(smax,bignum); } else { scond = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -12; } else if (*ldx < max(1,*n)) { *info = -14; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOSVXX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ _starpu_dpoequb_(n, &a[a_offset], lda, &s[1], &scond, &amax, &infequ); if (infequ == 0) { /* Equilibrate the matrix. */ _starpu_dlaqsy_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, equed); rcequ = _starpu_lsame_(equed, "Y"); } } /* Scale the right-hand side. */ if (rcequ) { _starpu_dlascl2_(n, nrhs, &s[1], &b[b_offset], ldb); } if (nofact || equil) { /* Compute the LU factorization of A. */ _starpu_dlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); _starpu_dpotrf_(uplo, n, &af[af_offset], ldaf, info); /* Return if INFO is non-zero. */ if (*info != 0) { /* Pivot in column INFO is exactly 0 */ /* Compute the reciprocal pivot growth factor of the */ /* leading rank-deficient INFO columns of A. */ *rpvgrw = _starpu_dla_porpvgrw__(uplo, info, &a[a_offset], lda, &af[ af_offset], ldaf, &work[1], (ftnlen)1); return 0; } } /* Compute the reciprocal growth factor RPVGRW. */ *rpvgrw = _starpu_dla_porpvgrw__(uplo, n, &a[a_offset], lda, &af[af_offset], ldaf, &work[1], (ftnlen)1); /* Compute the solution matrix X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dpotrs_(uplo, n, nrhs, &af[af_offset], ldaf, &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and */ /* compute error bounds and backward error estimates for it. */ _starpu_dporfsx_(uplo, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & s[1], &b[b_offset], ldb, &x[x_offset], ldx, rcond, &berr[1], n_err_bnds__, &err_bnds_norm__[err_bnds_norm_offset], & err_bnds_comp__[err_bnds_comp_offset], nparams, ¶ms[1], &work[ 1], &iwork[1], info); /* Scale solutions. */ if (rcequ) { _starpu_dlascl2_(n, nrhs, &s[1], &x[x_offset], ldx); } return 0; /* End of DPOSVXX */ } /* _starpu_dposvxx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpotf2.c000066400000000000000000000142641507764646700206010ustar00rootroot00000000000000/* dpotf2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b10 = -1.; static doublereal c_b12 = 1.; /* Subroutine */ int _starpu_dpotf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer j; doublereal ajj; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical upper; extern logical _starpu_disnan_(doublereal *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOTF2 computes the Cholesky factorization of a real symmetric */ /* positive definite matrix A. */ /* The factorization has the form */ /* A = U' * U , if UPLO = 'U', or */ /* A = L * L', if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular. */ /* This is the unblocked version of the algorithm, calling Level 2 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n by n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n by n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization A = U'*U or A = L*L'. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* > 0: if INFO = k, the leading minor of order k is not */ /* positive definite, and the factorization could not be */ /* completed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOTF2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (upper) { /* Compute the Cholesky factorization A = U'*U. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Compute U(J,J) and test for non-positive-definiteness. */ i__2 = j - 1; ajj = a[j + j * a_dim1] - _starpu_ddot_(&i__2, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1], &c__1); if (ajj <= 0. || _starpu_disnan_(&ajj)) { a[j + j * a_dim1] = ajj; goto L30; } ajj = sqrt(ajj); a[j + j * a_dim1] = ajj; /* Compute elements J+1:N of row J. */ if (j < *n) { i__2 = j - 1; i__3 = *n - j; _starpu_dgemv_("Transpose", &i__2, &i__3, &c_b10, &a[(j + 1) * a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b12, &a[j + ( j + 1) * a_dim1], lda); i__2 = *n - j; d__1 = 1. / ajj; _starpu_dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); } /* L10: */ } } else { /* Compute the Cholesky factorization A = L*L'. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Compute L(J,J) and test for non-positive-definiteness. */ i__2 = j - 1; ajj = a[j + j * a_dim1] - _starpu_ddot_(&i__2, &a[j + a_dim1], lda, &a[j + a_dim1], lda); if (ajj <= 0. || _starpu_disnan_(&ajj)) { a[j + j * a_dim1] = ajj; goto L30; } ajj = sqrt(ajj); a[j + j * a_dim1] = ajj; /* Compute elements J+1:N of column J. */ if (j < *n) { i__2 = *n - j; i__3 = j - 1; _starpu_dgemv_("No transpose", &i__2, &i__3, &c_b10, &a[j + 1 + a_dim1], lda, &a[j + a_dim1], lda, &c_b12, &a[j + 1 + j * a_dim1], &c__1); i__2 = *n - j; d__1 = 1. / ajj; _starpu_dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); } /* L20: */ } } goto L40; L30: *info = j; L40: return 0; /* End of DPOTF2 */ } /* _starpu_dpotf2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpotrf.c000066400000000000000000000163151507764646700207000ustar00rootroot00000000000000/* dpotrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b13 = -1.; static doublereal c_b14 = 1.; /* Subroutine */ int _starpu_dpotrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer j, jb, nb; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dpotf2_(char *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOTRF computes the Cholesky factorization of a real symmetric */ /* positive definite matrix A. */ /* The factorization has the form */ /* A = U**T * U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular. */ /* This is the block version of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i is not */ /* positive definite, and the factorization could not be */ /* completed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = _starpu_ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1); if (nb <= 1 || nb >= *n) { /* Use unblocked code. */ _starpu_dpotf2_(uplo, n, &a[a_offset], lda, info); } else { /* Use blocked code. */ if (upper) { /* Compute the Cholesky factorization A = U'*U. */ i__1 = *n; i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Update and factorize the current diagonal block and test */ /* for non-positive-definiteness. */ /* Computing MIN */ i__3 = nb, i__4 = *n - j + 1; jb = min(i__3,i__4); i__3 = j - 1; _starpu_dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b13, &a[j * a_dim1 + 1], lda, &c_b14, &a[j + j * a_dim1], lda); _starpu_dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); if (*info != 0) { goto L30; } if (j + jb <= *n) { /* Compute the current block row. */ i__3 = *n - j - jb + 1; i__4 = j - 1; _starpu_dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & c_b13, &a[j * a_dim1 + 1], lda, &a[(j + jb) * a_dim1 + 1], lda, &c_b14, &a[j + (j + jb) * a_dim1], lda); i__3 = *n - j - jb + 1; _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & i__3, &c_b14, &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda); } /* L10: */ } } else { /* Compute the Cholesky factorization A = L*L'. */ i__2 = *n; i__1 = nb; for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Update and factorize the current diagonal block and test */ /* for non-positive-definiteness. */ /* Computing MIN */ i__3 = nb, i__4 = *n - j + 1; jb = min(i__3,i__4); i__3 = j - 1; _starpu_dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b13, &a[j + a_dim1], lda, &c_b14, &a[j + j * a_dim1], lda); _starpu_dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); if (*info != 0) { goto L30; } if (j + jb <= *n) { /* Compute the current block column. */ i__3 = *n - j - jb + 1; i__4 = j - 1; _starpu_dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & c_b13, &a[j + jb + a_dim1], lda, &a[j + a_dim1], lda, &c_b14, &a[j + jb + j * a_dim1], lda); i__3 = *n - j - jb + 1; _starpu_dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & jb, &c_b14, &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda); } /* L20: */ } } } goto L40; L30: *info = *info + j - 1; L40: return 0; /* End of DPOTRF */ } /* _starpu_dpotrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpotri.c000066400000000000000000000067711507764646700207100ustar00rootroot00000000000000/* dpotri.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dpotri_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlauum_( char *, integer *, doublereal *, integer *, integer *), _starpu_dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOTRI computes the inverse of a real symmetric positive definite */ /* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T */ /* computed by DPOTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T, as computed by */ /* DPOTRF. */ /* On exit, the upper or lower triangle of the (symmetric) */ /* inverse of A, overwriting the input factor U or L. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the (i,i) element of the factor U or L is */ /* zero, and the inverse could not be computed. */ /* ===================================================================== */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Invert the triangular Cholesky factor U or L. */ _starpu_dtrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); if (*info > 0) { return 0; } /* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ _starpu_dlauum_(uplo, n, &a[a_offset], lda, info); return 0; /* End of DPOTRI */ } /* _starpu_dpotri_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpotrs.c000066400000000000000000000111051507764646700207050ustar00rootroot00000000000000/* dpotrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b9 = 1.; /* Subroutine */ int _starpu_dpotrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOTRS solves a system of linear equations A*X = B with a symmetric */ /* positive definite matrix A using the Cholesky factorization */ /* A = U**T*U or A = L*L**T computed by DPOTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, as computed by DPOTRF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPOTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B where A = U'*U. */ /* Solve U'*X = B, overwriting B with X. */ _starpu_dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b9, &a[ a_offset], lda, &b[b_offset], ldb); /* Solve U*X = B, overwriting B with X. */ _starpu_dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b9, & a[a_offset], lda, &b[b_offset], ldb); } else { /* Solve A*X = B where A = L*L'. */ /* Solve L*X = B, overwriting B with X. */ _starpu_dtrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b9, & a[a_offset], lda, &b[b_offset], ldb); /* Solve L'*X = B, overwriting B with X. */ _starpu_dtrsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b9, &a[ a_offset], lda, &b[b_offset], ldb); } return 0; /* End of DPOTRS */ } /* _starpu_dpotrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dppcon.c000066400000000000000000000141671507764646700206700ustar00rootroot00000000000000/* dppcon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dppcon_(char *uplo, integer *n, doublereal *ap, doublereal *anorm, doublereal *rcond, doublereal *work, integer * iwork, integer *info) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ integer ix, kase; doublereal scale; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal scalel; extern integer _starpu_idamax_(integer *, doublereal *, integer *); doublereal scaleu; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlatps_( char *, char *, char *, char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); doublereal ainvnm; char normin[1]; doublereal smlnum; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPPCON estimates the reciprocal of the condition number (in the */ /* 1-norm) of a real symmetric positive definite packed matrix using */ /* the Cholesky factorization A = U**T*U or A = L*L**T computed by */ /* DPPTRF. */ /* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ /* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, packed columnwise in a linear */ /* array. The j-th column of U or L is stored in the array AP */ /* as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. */ /* ANORM (input) DOUBLE PRECISION */ /* The 1-norm (or infinity-norm) of the symmetric matrix A. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ /* estimate of the 1-norm of inv(A) computed in this routine. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --iwork; --work; --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*anorm < 0.) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPPCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm == 0.) { return 0; } smlnum = _starpu_dlamch_("Safe minimum"); /* Estimate the 1-norm of the inverse. */ kase = 0; *(unsigned char *)normin = 'N'; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (upper) { /* Multiply by inv(U'). */ _starpu_dlatps_("Upper", "Transpose", "Non-unit", normin, n, &ap[1], & work[1], &scalel, &work[(*n << 1) + 1], info); *(unsigned char *)normin = 'Y'; /* Multiply by inv(U). */ _starpu_dlatps_("Upper", "No transpose", "Non-unit", normin, n, &ap[1], & work[1], &scaleu, &work[(*n << 1) + 1], info); } else { /* Multiply by inv(L). */ _starpu_dlatps_("Lower", "No transpose", "Non-unit", normin, n, &ap[1], & work[1], &scalel, &work[(*n << 1) + 1], info); *(unsigned char *)normin = 'Y'; /* Multiply by inv(L'). */ _starpu_dlatps_("Lower", "Transpose", "Non-unit", normin, n, &ap[1], & work[1], &scaleu, &work[(*n << 1) + 1], info); } /* Multiply by 1/SCALE if doing so will not cause overflow. */ scale = scalel * scaleu; if (scale != 1.) { ix = _starpu_idamax_(n, &work[1], &c__1); if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) { goto L20; } _starpu_drscl_(n, &scale, &work[1], &c__1); } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } L20: return 0; /* End of DPPCON */ } /* _starpu_dppcon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dppequ.c000066400000000000000000000127631507764646700207030ustar00rootroot00000000000000/* dppequ.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dppequ_(char *uplo, integer *n, doublereal *ap, doublereal *s, doublereal *scond, doublereal *amax, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, jj; doublereal smin; extern logical _starpu_lsame_(char *, char *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPPEQU computes row and column scalings intended to equilibrate a */ /* symmetric positive definite matrix A in packed storage and reduce */ /* its condition number (with respect to the two-norm). S contains the */ /* scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix */ /* B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. */ /* This choice of S puts the condition number of B within a factor N of */ /* the smallest possible condition number over all possible diagonal */ /* scalings. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangle of the symmetric matrix A, packed */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* S (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, S contains the scale factors for A. */ /* SCOND (output) DOUBLE PRECISION */ /* If INFO = 0, S contains the ratio of the smallest S(i) to */ /* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ /* large nor too small, it is not worth scaling by S. */ /* AMAX (output) DOUBLE PRECISION */ /* Absolute value of largest matrix element. If AMAX is very */ /* close to overflow or very close to underflow, the matrix */ /* should be scaled. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --s; --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPPEQU", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *scond = 1.; *amax = 0.; return 0; } /* Initialize SMIN and AMAX. */ s[1] = ap[1]; smin = s[1]; *amax = s[1]; if (upper) { /* UPLO = 'U': Upper triangle of A is stored. */ /* Find the minimum and maximum diagonal elements. */ jj = 1; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { jj += i__; s[i__] = ap[jj]; /* Computing MIN */ d__1 = smin, d__2 = s[i__]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = *amax, d__2 = s[i__]; *amax = max(d__1,d__2); /* L10: */ } } else { /* UPLO = 'L': Lower triangle of A is stored. */ /* Find the minimum and maximum diagonal elements. */ jj = 1; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { jj = jj + *n - i__ + 2; s[i__] = ap[jj]; /* Computing MIN */ d__1 = smin, d__2 = s[i__]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = *amax, d__2 = s[i__]; *amax = max(d__1,d__2); /* L20: */ } } if (smin <= 0.) { /* Find the first non-positive diagonal element and return. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (s[i__] <= 0.) { *info = i__; return 0; } /* L30: */ } } else { /* Set the scale factors to the reciprocals */ /* of the diagonal elements. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { s[i__] = 1. / sqrt(s[i__]); /* L40: */ } /* Compute SCOND = min(S(I)) / max(S(I)) */ *scond = sqrt(smin) / sqrt(*amax); } return 0; /* End of DPPEQU */ } /* _starpu_dppequ_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpprfs.c000066400000000000000000000300241507764646700206710ustar00rootroot00000000000000/* dpprfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = -1.; static doublereal c_b14 = 1.; /* Subroutine */ int _starpu_dpprfs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *afp, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k; doublereal s; integer ik, kk; doublereal xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer count; extern /* Subroutine */ int _starpu_dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal lstres; extern /* Subroutine */ int _starpu_dpptrs_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPPRFS improves the computed solution to a system of linear */ /* equations when the coefficient matrix is symmetric positive definite */ /* and packed, and provides error bounds and backward error estimates */ /* for the solution. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangle of the symmetric matrix A, packed */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* AFP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, as computed by DPPTRF/ZPPTRF, */ /* packed columnwise in a linear array in the same format as A */ /* (see AP). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DPPTRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Internal Parameters */ /* =================== */ /* ITMAX is the maximum number of steps of iterative refinement. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --afp; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldx < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPPRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - A * X */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dspmv_(uplo, n, &c_b12, &ap[1], &x[j * x_dim1 + 1], &c__1, &c_b14, & work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L30: */ } /* Compute abs(A)*abs(X) + abs(B). */ kk = 1; if (upper) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); ik = kk; i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); ++ik; /* L40: */ } work[k] = work[k] + (d__1 = ap[kk + k - 1], abs(d__1)) * xk + s; kk += k; /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); work[k] += (d__1 = ap[kk], abs(d__1)) * xk; ik = kk + 1; i__3 = *n; for (i__ = k + 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); ++ik; /* L60: */ } work[k] += s; kk += *n - k + 1; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if */ /* 1) The residual BERR(J) is larger than machine epsilon, and */ /* 2) BERR(J) decreased by at least a factor of 2 during the */ /* last iteration, and */ /* 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ _starpu_dpptrs_(uplo, n, &c__1, &afp[1], &work[*n + 1], n, info); _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) ; lstres = berr[j]; ++count; goto L20; } /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(A))* */ /* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(A) is the inverse of A */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(A)*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(A) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(A'). */ _starpu_dpptrs_(uplo, n, &c__1, &afp[1], &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L110: */ } } else if (kase == 2) { /* Multiply by inv(A)*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L120: */ } _starpu_dpptrs_(uplo, n, &c__1, &afp[1], &work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DPPRFS */ } /* _starpu_dpprfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dppsv.c000066400000000000000000000121041507764646700205260ustar00rootroot00000000000000/* dppsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dppsv_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpptrf_( char *, integer *, doublereal *, integer *), _starpu_dpptrs_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPPSV computes the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N symmetric positive definite matrix stored in */ /* packed format and X and B are N-by-NRHS matrices. */ /* The Cholesky decomposition is used to factor A as */ /* A = U**T* U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is a lower triangular */ /* matrix. The factored form of A is then used to solve the system of */ /* equations A * X = B. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* See below for further details. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T, in the same storage */ /* format as A. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i of A is not */ /* positive definite, so the factorization could not be */ /* completed, and the solution has not been computed. */ /* Further Details */ /* =============== */ /* The packed storage scheme is illustrated by the following example */ /* when N = 4, UPLO = 'U': */ /* Two-dimensional storage of the symmetric matrix A: */ /* a11 a12 a13 a14 */ /* a22 a23 a24 */ /* a33 a34 (aij = conjg(aji)) */ /* a44 */ /* Packed storage of the upper triangle of A: */ /* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ /* ===================================================================== */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPPSV ", &i__1); return 0; } /* Compute the Cholesky factorization A = U'*U or A = L*L'. */ _starpu_dpptrf_(uplo, n, &ap[1], info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ _starpu_dpptrs_(uplo, n, nrhs, &ap[1], &b[b_offset], ldb, info); } return 0; /* End of DPPSV */ } /* _starpu_dppsv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dppsvx.c000066400000000000000000000400151507764646700207200ustar00rootroot00000000000000/* dppsvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dppsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *ap, doublereal *afp, char *equed, doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * iwork, integer *info) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ integer i__, j; doublereal amax, smin, smax; extern logical _starpu_lsame_(char *, char *); doublereal scond, anorm; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); logical equil, rcequ; extern doublereal _starpu_dlamch_(char *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dppcon_(char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dlaqsp_(char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, char *); integer infequ; extern /* Subroutine */ int _starpu_dppequ_(char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dpprfs_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dpptrf_(char *, integer *, doublereal *, integer *); doublereal smlnum; extern /* Subroutine */ int _starpu_dpptrs_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPPSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to */ /* compute the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N symmetric positive definite matrix stored in */ /* packed format and X and B are N-by-NRHS matrices. */ /* Error bounds on the solution and a condition estimate are also */ /* provided. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'E', real scaling factors are computed to equilibrate */ /* the system: */ /* diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B */ /* Whether or not the system will be equilibrated depends on the */ /* scaling of the matrix A, but if equilibration is used, A is */ /* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ /* 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to */ /* factor the matrix A (after equilibration if FACT = 'E') as */ /* A = U**T* U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is a lower triangular */ /* matrix. */ /* 3. If the leading i-by-i principal minor is not positive definite, */ /* then the routine returns with INFO = i. Otherwise, the factored */ /* form of A is used to estimate the condition number of the matrix */ /* A. If the reciprocal of the condition number is less than machine */ /* precision, INFO = N+1 is returned as a warning, but the routine */ /* still goes on to solve for X and compute error bounds as */ /* described below. */ /* 4. The system of equations is solved for X using the factored form */ /* of A. */ /* 5. Iterative refinement is applied to improve the computed solution */ /* matrix and calculate error bounds and backward error estimates */ /* for it. */ /* 6. If equilibration was used, the matrix X is premultiplied by */ /* diag(S) so that it solves the original system before */ /* equilibration. */ /* Arguments */ /* ========= */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of the matrix A is */ /* supplied on entry, and if not, whether the matrix A should be */ /* equilibrated before it is factored. */ /* = 'F': On entry, AFP contains the factored form of A. */ /* If EQUED = 'Y', the matrix A has been equilibrated */ /* with scaling factors given by S. AP and AFP will not */ /* be modified. */ /* = 'N': The matrix A will be copied to AFP and factored. */ /* = 'E': The matrix A will be equilibrated if necessary, then */ /* copied to AFP and factored. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array, except if FACT = 'F' */ /* and EQUED = 'Y', then A must contain the equilibrated matrix */ /* diag(S)*A*diag(S). The j-th column of A is stored in the */ /* array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* See below for further details. A is not modified if */ /* FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ /* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ /* diag(S)*A*diag(S). */ /* AFP (input or output) DOUBLE PRECISION array, dimension */ /* (N*(N+1)/2) */ /* If FACT = 'F', then AFP is an input argument and on entry */ /* contains the triangular factor U or L from the Cholesky */ /* factorization A = U'*U or A = L*L', in the same storage */ /* format as A. If EQUED .ne. 'N', then AFP is the factored */ /* form of the equilibrated matrix A. */ /* If FACT = 'N', then AFP is an output argument and on exit */ /* returns the triangular factor U or L from the Cholesky */ /* factorization A = U'*U or A = L*L' of the original matrix A. */ /* If FACT = 'E', then AFP is an output argument and on exit */ /* returns the triangular factor U or L from the Cholesky */ /* factorization A = U'*U or A = L*L' of the equilibrated */ /* matrix A (see the description of AP for the form of the */ /* equilibrated matrix). */ /* EQUED (input or output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration (always true if FACT = 'N'). */ /* = 'Y': Equilibration was done, i.e., A has been replaced by */ /* diag(S) * A * diag(S). */ /* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ /* output argument. */ /* S (input or output) DOUBLE PRECISION array, dimension (N) */ /* The scale factors for A; not accessed if EQUED = 'N'. S is */ /* an input argument if FACT = 'F'; otherwise, S is an output */ /* argument. If FACT = 'F' and EQUED = 'Y', each element of S */ /* must be positive. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', */ /* B is overwritten by diag(S) * B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to */ /* the original system of equations. Note that if EQUED = 'Y', */ /* A and B are modified on exit, and the solution to the */ /* equilibrated system is inv(diag(S))*X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The estimate of the reciprocal condition number of the matrix */ /* A after equilibration (if done). If RCOND is less than the */ /* machine precision (in particular, if RCOND = 0), the matrix */ /* is singular to working precision. This condition is */ /* indicated by a return code of INFO > 0. */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= N: the leading minor of order i of A is */ /* not positive definite, so the factorization */ /* could not be completed, and the solution has not */ /* been computed. RCOND = 0 is returned. */ /* = N+1: U is nonsingular, but RCOND is less than machine */ /* precision, meaning that the matrix is singular */ /* to working precision. Nevertheless, the */ /* solution and error bounds are computed because */ /* there are a number of situations where the */ /* computed solution can be more accurate than the */ /* value of RCOND would suggest. */ /* Further Details */ /* =============== */ /* The packed storage scheme is illustrated by the following example */ /* when N = 4, UPLO = 'U': */ /* Two-dimensional storage of the symmetric matrix A: */ /* a11 a12 a13 a14 */ /* a22 a23 a24 */ /* a33 a34 (aij = conjg(aji)) */ /* a44 */ /* Packed storage of the upper triangle of A: */ /* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ap; --afp; --s; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); equil = _starpu_lsame_(fact, "E"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rcequ = FALSE_; } else { rcequ = _starpu_lsame_(equed, "Y"); smlnum = _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* Test the input parameters. */ if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( equed, "N"))) { *info = -7; } else { if (rcequ) { smin = bignum; smax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = smin, d__2 = s[j]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = smax, d__2 = s[j]; smax = max(d__1,d__2); /* L10: */ } if (smin <= 0.) { *info = -8; } else if (*n > 0) { scond = max(smin,smlnum) / min(smax,bignum); } else { scond = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPPSVX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ _starpu_dppequ_(uplo, n, &ap[1], &s[1], &scond, &amax, &infequ); if (infequ == 0) { /* Equilibrate the matrix. */ _starpu_dlaqsp_(uplo, n, &ap[1], &s[1], &scond, &amax, equed); rcequ = _starpu_lsame_(equed, "Y"); } } /* Scale the right-hand side. */ if (rcequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = s[i__] * b[i__ + j * b_dim1]; /* L20: */ } /* L30: */ } } if (nofact || equil) { /* Compute the Cholesky factorization A = U'*U or A = L*L'. */ i__1 = *n * (*n + 1) / 2; _starpu_dcopy_(&i__1, &ap[1], &c__1, &afp[1], &c__1); _starpu_dpptrf_(uplo, n, &afp[1], info); /* Return if INFO is non-zero. */ if (*info > 0) { *rcond = 0.; return 0; } } /* Compute the norm of the matrix A. */ anorm = _starpu_dlansp_("I", uplo, n, &ap[1], &work[1]); /* Compute the reciprocal of the condition number of A. */ _starpu_dppcon_(uplo, n, &afp[1], &anorm, rcond, &work[1], &iwork[1], info); /* Compute the solution matrix X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dpptrs_(uplo, n, nrhs, &afp[1], &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and */ /* compute error bounds and backward error estimates for it. */ _starpu_dpprfs_(uplo, n, nrhs, &ap[1], &afp[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1], &iwork[1], info); /* Transform the solution matrix X to a solution of the original */ /* system. */ if (rcequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ + j * x_dim1] = s[i__] * x[i__ + j * x_dim1]; /* L40: */ } /* L50: */ } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] /= scond; /* L60: */ } } /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < _starpu_dlamch_("Epsilon")) { *info = *n + 1; } return 0; /* End of DPPSVX */ } /* _starpu_dppsvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpptrf.c000066400000000000000000000134031507764646700206740ustar00rootroot00000000000000/* dpptrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b16 = -1.; /* Subroutine */ int _starpu_dpptrf_(char *uplo, integer *n, doublereal *ap, integer * info) { /* System generated locals */ integer i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer j, jc, jj; doublereal ajj; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dspr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); logical upper; extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPPTRF computes the Cholesky factorization of a real symmetric */ /* positive definite matrix A stored in packed format. */ /* The factorization has the form */ /* A = U**T * U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* See below for further details. */ /* On exit, if INFO = 0, the triangular factor U or L from the */ /* Cholesky factorization A = U**T*U or A = L*L**T, in the same */ /* storage format as A. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i is not */ /* positive definite, and the factorization could not be */ /* completed. */ /* Further Details */ /* ======= ======= */ /* The packed storage scheme is illustrated by the following example */ /* when N = 4, UPLO = 'U': */ /* Two-dimensional storage of the symmetric matrix A: */ /* a11 a12 a13 a14 */ /* a22 a23 a24 */ /* a33 a34 (aij = aji) */ /* a44 */ /* Packed storage of the upper triangle of A: */ /* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPPTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (upper) { /* Compute the Cholesky factorization A = U'*U. */ jj = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { jc = jj + 1; jj += j; /* Compute elements 1:J-1 of column J. */ if (j > 1) { i__2 = j - 1; _starpu_dtpsv_("Upper", "Transpose", "Non-unit", &i__2, &ap[1], &ap[ jc], &c__1); } /* Compute U(J,J) and test for non-positive-definiteness. */ i__2 = j - 1; ajj = ap[jj] - _starpu_ddot_(&i__2, &ap[jc], &c__1, &ap[jc], &c__1); if (ajj <= 0.) { ap[jj] = ajj; goto L30; } ap[jj] = sqrt(ajj); /* L10: */ } } else { /* Compute the Cholesky factorization A = L*L'. */ jj = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Compute L(J,J) and test for non-positive-definiteness. */ ajj = ap[jj]; if (ajj <= 0.) { ap[jj] = ajj; goto L30; } ajj = sqrt(ajj); ap[jj] = ajj; /* Compute elements J+1:N of column J and update the trailing */ /* submatrix. */ if (j < *n) { i__2 = *n - j; d__1 = 1. / ajj; _starpu_dscal_(&i__2, &d__1, &ap[jj + 1], &c__1); i__2 = *n - j; _starpu_dspr_("Lower", &i__2, &c_b16, &ap[jj + 1], &c__1, &ap[jj + *n - j + 1]); jj = jj + *n - j + 1; } /* L20: */ } } goto L40; L30: *info = j; L40: return 0; /* End of DPPTRF */ } /* _starpu_dpptrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpptri.c000066400000000000000000000112741507764646700207030ustar00rootroot00000000000000/* dpptri.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b8 = 1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dpptri_(char *uplo, integer *n, doublereal *ap, integer * info) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer j, jc, jj; doublereal ajj; integer jjn; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dspr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dtptri_( char *, char *, integer *, doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPPTRI computes the inverse of a real symmetric positive definite */ /* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T */ /* computed by DPPTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangular factor is stored in AP; */ /* = 'L': Lower triangular factor is stored in AP. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T, packed columnwise as */ /* a linear array. The j-th column of U or L is stored in the */ /* array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. */ /* On exit, the upper or lower triangle of the (symmetric) */ /* inverse of A, overwriting the input factor U or L. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the (i,i) element of the factor U or L is */ /* zero, and the inverse could not be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPPTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Invert the triangular Cholesky factor U or L. */ _starpu_dtptri_(uplo, "Non-unit", n, &ap[1], info); if (*info > 0) { return 0; } if (upper) { /* Compute the product inv(U) * inv(U)'. */ jj = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { jc = jj + 1; jj += j; if (j > 1) { i__2 = j - 1; _starpu_dspr_("Upper", &i__2, &c_b8, &ap[jc], &c__1, &ap[1]); } ajj = ap[jj]; _starpu_dscal_(&j, &ajj, &ap[jc], &c__1); /* L10: */ } } else { /* Compute the product inv(L)' * inv(L). */ jj = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { jjn = jj + *n - j + 1; i__2 = *n - j + 1; ap[jj] = _starpu_ddot_(&i__2, &ap[jj], &c__1, &ap[jj], &c__1); if (j < *n) { i__2 = *n - j; _starpu_dtpmv_("Lower", "Transpose", "Non-unit", &i__2, &ap[jjn], &ap[ jj + 1], &c__1); } jj = jjn; /* L20: */ } } return 0; /* End of DPPTRI */ } /* _starpu_dpptri_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpptrs.c000066400000000000000000000111351507764646700207110ustar00rootroot00000000000000/* dpptrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dpptrs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1; /* Local variables */ integer i__; extern logical _starpu_lsame_(char *, char *); logical upper; extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPPTRS solves a system of linear equations A*X = B with a symmetric */ /* positive definite matrix A in packed storage using the Cholesky */ /* factorization A = U**T*U or A = L*L**T computed by DPPTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The triangular factor U or L from the Cholesky factorization */ /* A = U**T*U or A = L*L**T, packed columnwise in a linear */ /* array. The j-th column of U or L is stored in the array AP */ /* as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPPTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B where A = U'*U. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { /* Solve U'*X = B, overwriting B with X. */ _starpu_dtpsv_("Upper", "Transpose", "Non-unit", n, &ap[1], &b[i__ * b_dim1 + 1], &c__1); /* Solve U*X = B, overwriting B with X. */ _starpu_dtpsv_("Upper", "No transpose", "Non-unit", n, &ap[1], &b[i__ * b_dim1 + 1], &c__1); /* L10: */ } } else { /* Solve A*X = B where A = L*L'. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { /* Solve L*Y = B, overwriting B with X. */ _starpu_dtpsv_("Lower", "No transpose", "Non-unit", n, &ap[1], &b[i__ * b_dim1 + 1], &c__1); /* Solve L'*X = Y, overwriting B with X. */ _starpu_dtpsv_("Lower", "Transpose", "Non-unit", n, &ap[1], &b[i__ * b_dim1 + 1], &c__1); /* L20: */ } } return 0; /* End of DPPTRS */ } /* _starpu_dpptrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpstf2.c000066400000000000000000000244441507764646700206060ustar00rootroot00000000000000/* dpstf2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b16 = -1.; static doublereal c_b18 = 1.; /* Subroutine */ int _starpu_dpstf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, maxlocval; doublereal ajj; integer pvt; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal dtemp; integer itemp; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal dstop; logical upper; extern doublereal _starpu_dlamch_(char *); extern logical _starpu_disnan_(doublereal *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_dmaxloc_(doublereal *, integer *); /* -- LAPACK PROTOTYPE routine (version 3.2) -- */ /* Craig Lucas, University of Manchester / NAG Ltd. */ /* October, 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPSTF2 computes the Cholesky factorization with complete */ /* pivoting of a real symmetric positive semidefinite matrix A. */ /* The factorization has the form */ /* P' * A * P = U' * U , if UPLO = 'U', */ /* P' * A * P = L * L', if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular, and */ /* P is stored as vector PIV. */ /* This algorithm does not attempt to check that A is positive */ /* semidefinite. This version of the algorithm calls level 2 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n by n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n by n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization as above. */ /* PIV (output) INTEGER array, dimension (N) */ /* PIV is such that the nonzero entries are P( PIV(K), K ) = 1. */ /* RANK (output) INTEGER */ /* The rank of A given by the number of steps the algorithm */ /* completed. */ /* TOL (input) DOUBLE PRECISION */ /* User defined tolerance. If TOL < 0, then N*U*MAX( A( K,K ) ) */ /* will be used. The algorithm terminates at the (K-1)st step */ /* if the pivot <= TOL. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* WORK DOUBLE PRECISION array, dimension (2*N) */ /* Work space. */ /* INFO (output) INTEGER */ /* < 0: If INFO = -K, the K-th argument had an illegal value, */ /* = 0: algorithm completed successfully, and */ /* > 0: the matrix A is either rank deficient with computed rank */ /* as returned in RANK, or is indefinite. See Section 7 of */ /* LAPACK Working Note #161 for further information. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ --work; --piv; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPSTF2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Initialize PIV */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { piv[i__] = i__; /* L100: */ } /* Compute stopping value */ pvt = 1; ajj = a[pvt + pvt * a_dim1]; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if (a[i__ + i__ * a_dim1] > ajj) { pvt = i__; ajj = a[pvt + pvt * a_dim1]; } } if (ajj == 0. || _starpu_disnan_(&ajj)) { *rank = 0; *info = 1; goto L170; } /* Compute stopping value if not supplied */ if (*tol < 0.) { dstop = *n * _starpu_dlamch_("Epsilon") * ajj; } else { dstop = *tol; } /* Set first half of WORK to zero, holds dot products */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L110: */ } if (upper) { /* Compute the Cholesky factorization P' * A * P = U' * U */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Find pivot, test for exit, else swap rows and columns */ /* Update dot products, compute possible pivots which are */ /* stored in the second half of WORK */ i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { if (j > 1) { /* Computing 2nd power */ d__1 = a[j - 1 + i__ * a_dim1]; work[i__] += d__1 * d__1; } work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__]; /* L120: */ } if (j > 1) { maxlocval = (*n << 1) - (*n + j) + 1; itemp = _starpu_dmaxloc_(&work[*n + j], &maxlocval); pvt = itemp + j - 1; ajj = work[*n + pvt]; if (ajj <= dstop || _starpu_disnan_(&ajj)) { a[j + j * a_dim1] = ajj; goto L160; } } if (j != pvt) { /* Pivot OK, so can now swap pivot rows and columns */ a[pvt + pvt * a_dim1] = a[j + j * a_dim1]; i__2 = j - 1; _starpu_dswap_(&i__2, &a[j * a_dim1 + 1], &c__1, &a[pvt * a_dim1 + 1], &c__1); if (pvt < *n) { i__2 = *n - pvt; _starpu_dswap_(&i__2, &a[j + (pvt + 1) * a_dim1], lda, &a[pvt + ( pvt + 1) * a_dim1], lda); } i__2 = pvt - j - 1; _starpu_dswap_(&i__2, &a[j + (j + 1) * a_dim1], lda, &a[j + 1 + pvt * a_dim1], &c__1); /* Swap dot products and PIV */ dtemp = work[j]; work[j] = work[pvt]; work[pvt] = dtemp; itemp = piv[pvt]; piv[pvt] = piv[j]; piv[j] = itemp; } ajj = sqrt(ajj); a[j + j * a_dim1] = ajj; /* Compute elements J+1:N of row J */ if (j < *n) { i__2 = j - 1; i__3 = *n - j; _starpu_dgemv_("Trans", &i__2, &i__3, &c_b16, &a[(j + 1) * a_dim1 + 1] , lda, &a[j * a_dim1 + 1], &c__1, &c_b18, &a[j + (j + 1) * a_dim1], lda); i__2 = *n - j; d__1 = 1. / ajj; _starpu_dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); } /* L130: */ } } else { /* Compute the Cholesky factorization P' * A * P = L * L' */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Find pivot, test for exit, else swap rows and columns */ /* Update dot products, compute possible pivots which are */ /* stored in the second half of WORK */ i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { if (j > 1) { /* Computing 2nd power */ d__1 = a[i__ + (j - 1) * a_dim1]; work[i__] += d__1 * d__1; } work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__]; /* L140: */ } if (j > 1) { maxlocval = (*n << 1) - (*n + j) + 1; itemp = _starpu_dmaxloc_(&work[*n + j], &maxlocval); pvt = itemp + j - 1; ajj = work[*n + pvt]; if (ajj <= dstop || _starpu_disnan_(&ajj)) { a[j + j * a_dim1] = ajj; goto L160; } } if (j != pvt) { /* Pivot OK, so can now swap pivot rows and columns */ a[pvt + pvt * a_dim1] = a[j + j * a_dim1]; i__2 = j - 1; _starpu_dswap_(&i__2, &a[j + a_dim1], lda, &a[pvt + a_dim1], lda); if (pvt < *n) { i__2 = *n - pvt; _starpu_dswap_(&i__2, &a[pvt + 1 + j * a_dim1], &c__1, &a[pvt + 1 + pvt * a_dim1], &c__1); } i__2 = pvt - j - 1; _starpu_dswap_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &a[pvt + (j + 1) * a_dim1], lda); /* Swap dot products and PIV */ dtemp = work[j]; work[j] = work[pvt]; work[pvt] = dtemp; itemp = piv[pvt]; piv[pvt] = piv[j]; piv[j] = itemp; } ajj = sqrt(ajj); a[j + j * a_dim1] = ajj; /* Compute elements J+1:N of column J */ if (j < *n) { i__2 = *n - j; i__3 = j - 1; _starpu_dgemv_("No Trans", &i__2, &i__3, &c_b16, &a[j + 1 + a_dim1], lda, &a[j + a_dim1], lda, &c_b18, &a[j + 1 + j * a_dim1], &c__1); i__2 = *n - j; d__1 = 1. / ajj; _starpu_dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); } /* L150: */ } } /* Ran to completion, A has full rank */ *rank = *n; goto L170; L160: /* Rank is number of steps completed. Set INFO = 1 to signal */ /* that the factorization cannot be used to solve a system. */ *rank = j - 1; *info = 1; L170: return 0; /* End of DPSTF2 */ } /* _starpu_dpstf2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpstrf.c000066400000000000000000000306521507764646700207040ustar00rootroot00000000000000/* dpstrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b22 = -1.; static doublereal c_b24 = 1.; /* Subroutine */ int _starpu_dpstrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *piv, integer *rank, doublereal *tol, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k, maxlocvar, jb, nb; doublereal ajj; integer pvt; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal dtemp; integer itemp; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal dstop; logical upper; extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dpstf2_(char *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); extern logical _starpu_disnan_(doublereal *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern integer _starpu_dmaxloc_(doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Craig Lucas, University of Manchester / NAG Ltd. */ /* October, 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPSTRF computes the Cholesky factorization with complete */ /* pivoting of a real symmetric positive semidefinite matrix A. */ /* The factorization has the form */ /* P' * A * P = U' * U , if UPLO = 'U', */ /* P' * A * P = L * L', if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular, and */ /* P is stored as vector PIV. */ /* This algorithm does not attempt to check that A is positive */ /* semidefinite. This version of the algorithm calls level 3 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n by n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n by n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization as above. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* PIV (output) INTEGER array, dimension (N) */ /* PIV is such that the nonzero entries are P( PIV(K), K ) = 1. */ /* RANK (output) INTEGER */ /* The rank of A given by the number of steps the algorithm */ /* completed. */ /* TOL (input) DOUBLE PRECISION */ /* User defined tolerance. If TOL < 0, then N*U*MAX( A(K,K) ) */ /* will be used. The algorithm terminates at the (K-1)st step */ /* if the pivot <= TOL. */ /* WORK DOUBLE PRECISION array, dimension (2*N) */ /* Work space. */ /* INFO (output) INTEGER */ /* < 0: If INFO = -K, the K-th argument had an illegal value, */ /* = 0: algorithm completed successfully, and */ /* > 0: the matrix A is either rank deficient with computed rank */ /* as returned in RANK, or is indefinite. See Section 7 of */ /* LAPACK Working Note #161 for further information. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --work; --piv; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPSTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get block size */ nb = _starpu_ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ _starpu_dpstf2_(uplo, n, &a[a_dim1 + 1], lda, &piv[1], rank, tol, &work[1], info); goto L200; } else { /* Initialize PIV */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { piv[i__] = i__; /* L100: */ } /* Compute stopping value */ pvt = 1; ajj = a[pvt + pvt * a_dim1]; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if (a[i__ + i__ * a_dim1] > ajj) { pvt = i__; ajj = a[pvt + pvt * a_dim1]; } } if (ajj == 0. || _starpu_disnan_(&ajj)) { *rank = 0; *info = 1; goto L200; } /* Compute stopping value if not supplied */ if (*tol < 0.) { dstop = *n * _starpu_dlamch_("Epsilon") * ajj; } else { dstop = *tol; } if (upper) { /* Compute the Cholesky factorization P' * A * P = U' * U */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Account for last block not being NB wide */ /* Computing MIN */ i__3 = nb, i__4 = *n - k + 1; jb = min(i__3,i__4); /* Set relevant part of first half of WORK to zero, */ /* holds dot products */ i__3 = *n; for (i__ = k; i__ <= i__3; ++i__) { work[i__] = 0.; /* L110: */ } i__3 = k + jb - 1; for (j = k; j <= i__3; ++j) { /* Find pivot, test for exit, else swap rows and columns */ /* Update dot products, compute possible pivots which are */ /* stored in the second half of WORK */ i__4 = *n; for (i__ = j; i__ <= i__4; ++i__) { if (j > k) { /* Computing 2nd power */ d__1 = a[j - 1 + i__ * a_dim1]; work[i__] += d__1 * d__1; } work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__]; /* L120: */ } if (j > 1) { maxlocvar = (*n << 1) - (*n + j) + 1; itemp = _starpu_dmaxloc_(&work[*n + j], &maxlocvar); pvt = itemp + j - 1; ajj = work[*n + pvt]; if (ajj <= dstop || _starpu_disnan_(&ajj)) { a[j + j * a_dim1] = ajj; goto L190; } } if (j != pvt) { /* Pivot OK, so can now swap pivot rows and columns */ a[pvt + pvt * a_dim1] = a[j + j * a_dim1]; i__4 = j - 1; _starpu_dswap_(&i__4, &a[j * a_dim1 + 1], &c__1, &a[pvt * a_dim1 + 1], &c__1); if (pvt < *n) { i__4 = *n - pvt; _starpu_dswap_(&i__4, &a[j + (pvt + 1) * a_dim1], lda, &a[ pvt + (pvt + 1) * a_dim1], lda); } i__4 = pvt - j - 1; _starpu_dswap_(&i__4, &a[j + (j + 1) * a_dim1], lda, &a[j + 1 + pvt * a_dim1], &c__1); /* Swap dot products and PIV */ dtemp = work[j]; work[j] = work[pvt]; work[pvt] = dtemp; itemp = piv[pvt]; piv[pvt] = piv[j]; piv[j] = itemp; } ajj = sqrt(ajj); a[j + j * a_dim1] = ajj; /* Compute elements J+1:N of row J. */ if (j < *n) { i__4 = j - k; i__5 = *n - j; _starpu_dgemv_("Trans", &i__4, &i__5, &c_b22, &a[k + (j + 1) * a_dim1], lda, &a[k + j * a_dim1], &c__1, & c_b24, &a[j + (j + 1) * a_dim1], lda); i__4 = *n - j; d__1 = 1. / ajj; _starpu_dscal_(&i__4, &d__1, &a[j + (j + 1) * a_dim1], lda); } /* L130: */ } /* Update trailing matrix, J already incremented */ if (k + jb <= *n) { i__3 = *n - j + 1; _starpu_dsyrk_("Upper", "Trans", &i__3, &jb, &c_b22, &a[k + j * a_dim1], lda, &c_b24, &a[j + j * a_dim1], lda); } /* L140: */ } } else { /* Compute the Cholesky factorization P' * A * P = L * L' */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Account for last block not being NB wide */ /* Computing MIN */ i__3 = nb, i__4 = *n - k + 1; jb = min(i__3,i__4); /* Set relevant part of first half of WORK to zero, */ /* holds dot products */ i__3 = *n; for (i__ = k; i__ <= i__3; ++i__) { work[i__] = 0.; /* L150: */ } i__3 = k + jb - 1; for (j = k; j <= i__3; ++j) { /* Find pivot, test for exit, else swap rows and columns */ /* Update dot products, compute possible pivots which are */ /* stored in the second half of WORK */ i__4 = *n; for (i__ = j; i__ <= i__4; ++i__) { if (j > k) { /* Computing 2nd power */ d__1 = a[i__ + (j - 1) * a_dim1]; work[i__] += d__1 * d__1; } work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__]; /* L160: */ } if (j > 1) { maxlocvar = (*n << 1) - (*n + j) + 1; itemp = _starpu_dmaxloc_(&work[*n + j], &maxlocvar); pvt = itemp + j - 1; ajj = work[*n + pvt]; if (ajj <= dstop || _starpu_disnan_(&ajj)) { a[j + j * a_dim1] = ajj; goto L190; } } if (j != pvt) { /* Pivot OK, so can now swap pivot rows and columns */ a[pvt + pvt * a_dim1] = a[j + j * a_dim1]; i__4 = j - 1; _starpu_dswap_(&i__4, &a[j + a_dim1], lda, &a[pvt + a_dim1], lda); if (pvt < *n) { i__4 = *n - pvt; _starpu_dswap_(&i__4, &a[pvt + 1 + j * a_dim1], &c__1, &a[ pvt + 1 + pvt * a_dim1], &c__1); } i__4 = pvt - j - 1; _starpu_dswap_(&i__4, &a[j + 1 + j * a_dim1], &c__1, &a[pvt + (j + 1) * a_dim1], lda); /* Swap dot products and PIV */ dtemp = work[j]; work[j] = work[pvt]; work[pvt] = dtemp; itemp = piv[pvt]; piv[pvt] = piv[j]; piv[j] = itemp; } ajj = sqrt(ajj); a[j + j * a_dim1] = ajj; /* Compute elements J+1:N of column J. */ if (j < *n) { i__4 = *n - j; i__5 = j - k; _starpu_dgemv_("No Trans", &i__4, &i__5, &c_b22, &a[j + 1 + k * a_dim1], lda, &a[j + k * a_dim1], lda, & c_b24, &a[j + 1 + j * a_dim1], &c__1); i__4 = *n - j; d__1 = 1. / ajj; _starpu_dscal_(&i__4, &d__1, &a[j + 1 + j * a_dim1], &c__1); } /* L170: */ } /* Update trailing matrix, J already incremented */ if (k + jb <= *n) { i__3 = *n - j + 1; _starpu_dsyrk_("Lower", "No Trans", &i__3, &jb, &c_b22, &a[j + k * a_dim1], lda, &c_b24, &a[j + j * a_dim1], lda); } /* L180: */ } } } /* Ran to completion, A has full rank */ *rank = *n; goto L200; L190: /* Rank is the number of steps completed. Set INFO = 1 to signal */ /* that the factorization cannot be used to solve a system. */ *rank = j - 1; *info = 1; L200: return 0; /* End of DPSTRF */ } /* _starpu_dpstrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dptcon.c000066400000000000000000000114431507764646700206660ustar00rootroot00000000000000/* dptcon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dptcon_(integer *n, doublereal *d__, doublereal *e, doublereal *anorm, doublereal *rcond, doublereal *work, integer *info) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ integer i__, ix; extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal ainvnm; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPTCON computes the reciprocal of the condition number (in the */ /* 1-norm) of a real symmetric positive definite tridiagonal matrix */ /* using the factorization A = L*D*L**T or A = U**T*D*U computed by */ /* DPTTRF. */ /* Norm(inv(A)) is computed by a direct method, and the reciprocal of */ /* the condition number is computed as */ /* RCOND = 1 / (ANORM * norm(inv(A))). */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the diagonal matrix D from the */ /* factorization of A, as computed by DPTTRF. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) off-diagonal elements of the unit bidiagonal factor */ /* U or L from the factorization of A, as computed by DPTTRF. */ /* ANORM (input) DOUBLE PRECISION */ /* The 1-norm of the original matrix A. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the */ /* 1-norm of inv(A) computed in this routine. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The method used is described in Nicholas J. Higham, "Efficient */ /* Algorithms for Computing the Condition Number of a Tridiagonal */ /* Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments. */ /* Parameter adjustments */ --work; --e; --d__; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*anorm < 0.) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPTCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm == 0.) { return 0; } /* Check that D(1:N) is positive. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] <= 0.) { return 0; } /* L10: */ } /* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by */ /* m(i,j) = abs(A(i,j)), i = j, */ /* m(i,j) = -abs(A(i,j)), i .ne. j, */ /* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. */ /* Solve M(L) * x = e. */ work[1] = 1.; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { work[i__] = work[i__ - 1] * (d__1 = e[i__ - 1], abs(d__1)) + 1.; /* L20: */ } /* Solve D * M(L)' * x = b. */ work[*n] /= d__[*n]; for (i__ = *n - 1; i__ >= 1; --i__) { work[i__] = work[i__] / d__[i__] + work[i__ + 1] * (d__1 = e[i__], abs(d__1)); /* L30: */ } /* Compute AINVNM = max(x(i)), 1<=i<=n. */ ix = _starpu_idamax_(n, &work[1], &c__1); ainvnm = (d__1 = work[ix], abs(d__1)); /* Compute the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } return 0; /* End of DPTCON */ } /* _starpu_dptcon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpteqr.c000066400000000000000000000166241507764646700207040ustar00rootroot00000000000000/* dpteqr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b7 = 0.; static doublereal c_b8 = 1.; static integer c__0 = 0; static integer c__1 = 1; /* Subroutine */ int _starpu_dpteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal c__[1] /* was [1][1] */; integer i__; doublereal vt[1] /* was [1][1] */; integer nru; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); integer icompz; extern /* Subroutine */ int _starpu_dpttrf_(integer *, doublereal *, doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPTEQR computes all eigenvalues and, optionally, eigenvectors of a */ /* symmetric positive definite tridiagonal matrix by first factoring the */ /* matrix using DPTTRF, and then calling DBDSQR to compute the singular */ /* values of the bidiagonal factor. */ /* This routine computes the eigenvalues of the positive definite */ /* tridiagonal matrix to high relative accuracy. This means that if the */ /* eigenvalues range over many orders of magnitude in size, then the */ /* small eigenvalues and corresponding eigenvectors will be computed */ /* more accurately than, for example, with the standard QR method. */ /* The eigenvectors of a full or band symmetric positive definite matrix */ /* can also be found if DSYTRD, DSPTRD, or DSBTRD has been used to */ /* reduce this matrix to tridiagonal form. (The reduction to tridiagonal */ /* form, however, may preclude the possibility of obtaining high */ /* relative accuracy in the small eigenvalues of the original matrix, if */ /* these eigenvalues range over many orders of magnitude.) */ /* Arguments */ /* ========= */ /* COMPZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only. */ /* = 'V': Compute eigenvectors of original symmetric */ /* matrix also. Array Z contains the orthogonal */ /* matrix used to reduce the original matrix to */ /* tridiagonal form. */ /* = 'I': Compute eigenvectors of tridiagonal matrix also. */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the tridiagonal */ /* matrix. */ /* On normal exit, D contains the eigenvalues, in descending */ /* order. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ /* matrix. */ /* On exit, E has been destroyed. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* On entry, if COMPZ = 'V', the orthogonal matrix used in the */ /* reduction to tridiagonal form. */ /* On exit, if COMPZ = 'V', the orthonormal eigenvectors of the */ /* original symmetric matrix; */ /* if COMPZ = 'I', the orthonormal eigenvectors of the */ /* tridiagonal matrix. */ /* If INFO > 0 on exit, Z contains the eigenvectors associated */ /* with only the stored eigenvalues. */ /* If COMPZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* COMPZ = 'V' or 'I', LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, and i is: */ /* <= N the Cholesky factorization of the matrix could */ /* not be performed because the i-th principal minor */ /* was not positive definite. */ /* > N the SVD algorithm failed to converge; */ /* if INFO = N+i, i off-diagonal elements of the */ /* bidiagonal factor did not converge to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ *info = 0; if (_starpu_lsame_(compz, "N")) { icompz = 0; } else if (_starpu_lsame_(compz, "V")) { icompz = 1; } else if (_starpu_lsame_(compz, "I")) { icompz = 2; } else { icompz = -1; } if (icompz < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPTEQR", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (icompz > 0) { z__[z_dim1 + 1] = 1.; } return 0; } if (icompz == 2) { _starpu_dlaset_("Full", n, n, &c_b7, &c_b8, &z__[z_offset], ldz); } /* Call DPTTRF to factor the matrix. */ _starpu_dpttrf_(n, &d__[1], &e[1], info); if (*info != 0) { return 0; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = sqrt(d__[i__]); /* L10: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] *= d__[i__]; /* L20: */ } /* Call DBDSQR to compute the singular values/vectors of the */ /* bidiagonal factor. */ if (icompz > 0) { nru = *n; } else { nru = 0; } _starpu_dbdsqr_("Lower", n, &c__0, &nru, &c__0, &d__[1], &e[1], vt, &c__1, &z__[ z_offset], ldz, c__, &c__1, &work[1], info); /* Square the singular values. */ if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] *= d__[i__]; /* L30: */ } } else { *info = *n + *info; } return 0; /* End of DPTEQR */ } /* _starpu_dpteqr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dptrfs.c000066400000000000000000000251541507764646700207050ustar00rootroot00000000000000/* dptrfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b11 = 1.; /* Subroutine */ int _starpu_dptrfs_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *info) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j; doublereal s, bi, cx, dx, ex; integer ix, nz; doublereal eps, safe1, safe2; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer count; extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal lstres; extern /* Subroutine */ int _starpu_dpttrs_(integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPTRFS improves the computed solution to a system of linear */ /* equations when the coefficient matrix is symmetric positive definite */ /* and tridiagonal, and provides error bounds and backward error */ /* estimates for the solution. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the tridiagonal matrix A. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) subdiagonal elements of the tridiagonal matrix A. */ /* DF (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the diagonal matrix D from the */ /* factorization computed by DPTTRF. */ /* EF (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) subdiagonal elements of the unit bidiagonal factor */ /* L from the factorization computed by DPTTRF. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DPTTRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Internal Parameters */ /* =================== */ /* ITMAX is the maximum number of steps of iterative refinement. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; --df; --ef; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*ldx < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPTRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = 4; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - A * X. Also compute */ /* abs(A)*abs(x) + abs(b) for use in the backward error bound. */ if (*n == 1) { bi = b[j * b_dim1 + 1]; dx = d__[1] * x[j * x_dim1 + 1]; work[*n + 1] = bi - dx; work[1] = abs(bi) + abs(dx); } else { bi = b[j * b_dim1 + 1]; dx = d__[1] * x[j * x_dim1 + 1]; ex = e[1] * x[j * x_dim1 + 2]; work[*n + 1] = bi - dx - ex; work[1] = abs(bi) + abs(dx) + abs(ex); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { bi = b[i__ + j * b_dim1]; cx = e[i__ - 1] * x[i__ - 1 + j * x_dim1]; dx = d__[i__] * x[i__ + j * x_dim1]; ex = e[i__] * x[i__ + 1 + j * x_dim1]; work[*n + i__] = bi - cx - dx - ex; work[i__] = abs(bi) + abs(cx) + abs(dx) + abs(ex); /* L30: */ } bi = b[*n + j * b_dim1]; cx = e[*n - 1] * x[*n - 1 + j * x_dim1]; dx = d__[*n] * x[*n + j * x_dim1]; work[*n + *n] = bi - cx - dx; work[*n] = abs(bi) + abs(cx) + abs(dx); } /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L40: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if */ /* 1) The residual BERR(J) is larger than machine epsilon, and */ /* 2) BERR(J) decreased by at least a factor of 2 during the */ /* last iteration, and */ /* 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ _starpu_dpttrs_(n, &c__1, &df[1], &ef[1], &work[*n + 1], n, info); _starpu_daxpy_(n, &c_b11, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) ; lstres = berr[j]; ++count; goto L20; } /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(A))* */ /* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(A) is the inverse of A */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(A)*abs(X) + abs(B) is less than SAFE2. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L50: */ } ix = _starpu_idamax_(n, &work[1], &c__1); ferr[j] = work[ix]; /* Estimate the norm of inv(A). */ /* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by */ /* m(i,j) = abs(A(i,j)), i = j, */ /* m(i,j) = -abs(A(i,j)), i .ne. j, */ /* and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. */ /* Solve M(L) * x = e. */ work[1] = 1.; i__2 = *n; for (i__ = 2; i__ <= i__2; ++i__) { work[i__] = work[i__ - 1] * (d__1 = ef[i__ - 1], abs(d__1)) + 1.; /* L60: */ } /* Solve D * M(L)' * x = b. */ work[*n] /= df[*n]; for (i__ = *n - 1; i__ >= 1; --i__) { work[i__] = work[i__] / df[i__] + work[i__ + 1] * (d__1 = ef[i__], abs(d__1)); /* L70: */ } /* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. */ ix = _starpu_idamax_(n, &work[1], &c__1); ferr[j] *= (d__1 = work[ix], abs(d__1)); /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L80: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L90: */ } return 0; /* End of DPTRFS */ } /* _starpu_dptrfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dptsv.c000066400000000000000000000102131507764646700205310ustar00rootroot00000000000000/* dptsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dptsv_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1; /* Local variables */ extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpttrf_( integer *, doublereal *, doublereal *, integer *), _starpu_dpttrs_( integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPTSV computes the solution to a real system of linear equations */ /* A*X = B, where A is an N-by-N symmetric positive definite tridiagonal */ /* matrix, and X and B are N-by-NRHS matrices. */ /* A is factored as A = L*D*L**T, and the factored form of A is then */ /* used to solve the system of equations. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the tridiagonal matrix */ /* A. On exit, the n diagonal elements of the diagonal matrix */ /* D from the factorization A = L*D*L**T. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ /* matrix A. On exit, the (n-1) subdiagonal elements of the */ /* unit bidiagonal factor L from the L*D*L**T factorization of */ /* A. (E can also be regarded as the superdiagonal of the unit */ /* bidiagonal factor U from the U**T*D*U factorization of A.) */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i is not */ /* positive definite, and the solution has not been */ /* computed. The factorization has not been completed */ /* unless i = N. */ /* ===================================================================== */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*ldb < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPTSV ", &i__1); return 0; } /* Compute the L*D*L' (or U'*D*U) factorization of A. */ _starpu_dpttrf_(n, &d__[1], &e[1], info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ _starpu_dpttrs_(n, nrhs, &d__[1], &e[1], &b[b_offset], ldb, info); } return 0; /* End of DPTSV */ } /* _starpu_dptsv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dptsvx.c000066400000000000000000000243761507764646700207400ustar00rootroot00000000000000/* dptsvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dptsvx_(char *fact, integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * info) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); doublereal anorm; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dptcon_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dptrfs_( integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dpttrf_( integer *, doublereal *, doublereal *, integer *), _starpu_dpttrs_( integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPTSVX uses the factorization A = L*D*L**T to compute the solution */ /* to a real system of linear equations A*X = B, where A is an N-by-N */ /* symmetric positive definite tridiagonal matrix and X and B are */ /* N-by-NRHS matrices. */ /* Error bounds on the solution and a condition estimate are also */ /* provided. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'N', the matrix A is factored as A = L*D*L**T, where L */ /* is a unit lower bidiagonal matrix and D is diagonal. The */ /* factorization can also be regarded as having the form */ /* A = U**T*D*U. */ /* 2. If the leading i-by-i principal minor is not positive definite, */ /* then the routine returns with INFO = i. Otherwise, the factored */ /* form of A is used to estimate the condition number of the matrix */ /* A. If the reciprocal of the condition number is less than machine */ /* precision, INFO = N+1 is returned as a warning, but the routine */ /* still goes on to solve for X and compute error bounds as */ /* described below. */ /* 3. The system of equations is solved for X using the factored form */ /* of A. */ /* 4. Iterative refinement is applied to improve the computed solution */ /* matrix and calculate error bounds and backward error estimates */ /* for it. */ /* Arguments */ /* ========= */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of A has been */ /* supplied on entry. */ /* = 'F': On entry, DF and EF contain the factored form of A. */ /* D, E, DF, and EF will not be modified. */ /* = 'N': The matrix A will be copied to DF and EF and */ /* factored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the tridiagonal matrix A. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) subdiagonal elements of the tridiagonal matrix A. */ /* DF (input or output) DOUBLE PRECISION array, dimension (N) */ /* If FACT = 'F', then DF is an input argument and on entry */ /* contains the n diagonal elements of the diagonal matrix D */ /* from the L*D*L**T factorization of A. */ /* If FACT = 'N', then DF is an output argument and on exit */ /* contains the n diagonal elements of the diagonal matrix D */ /* from the L*D*L**T factorization of A. */ /* EF (input or output) DOUBLE PRECISION array, dimension (N-1) */ /* If FACT = 'F', then EF is an input argument and on entry */ /* contains the (n-1) subdiagonal elements of the unit */ /* bidiagonal factor L from the L*D*L**T factorization of A. */ /* If FACT = 'N', then EF is an output argument and on exit */ /* contains the (n-1) subdiagonal elements of the unit */ /* bidiagonal factor L from the L*D*L**T factorization of A. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The N-by-NRHS right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0 of INFO = N+1, the N-by-NRHS solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal condition number of the matrix A. If RCOND */ /* is less than the machine precision (in particular, if */ /* RCOND = 0), the matrix is singular to working precision. */ /* This condition is indicated by a return code of INFO > 0. */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in any */ /* element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= N: the leading minor of order i of A is */ /* not positive definite, so the factorization */ /* could not be completed, and the solution has not */ /* been computed. RCOND = 0 is returned. */ /* = N+1: U is nonsingular, but RCOND is less than machine */ /* precision, meaning that the matrix is singular */ /* to working precision. Nevertheless, the */ /* solution and error bounds are computed because */ /* there are a number of situations where the */ /* computed solution can be more accurate than the */ /* value of RCOND would suggest. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; --df; --ef; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); if (! nofact && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldx < max(1,*n)) { *info = -11; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPTSVX", &i__1); return 0; } if (nofact) { /* Compute the L*D*L' (or U'*D*U) factorization of A. */ _starpu_dcopy_(n, &d__[1], &c__1, &df[1], &c__1); if (*n > 1) { i__1 = *n - 1; _starpu_dcopy_(&i__1, &e[1], &c__1, &ef[1], &c__1); } _starpu_dpttrf_(n, &df[1], &ef[1], info); /* Return if INFO is non-zero. */ if (*info > 0) { *rcond = 0.; return 0; } } /* Compute the norm of the matrix A. */ anorm = _starpu_dlanst_("1", n, &d__[1], &e[1]); /* Compute the reciprocal of the condition number of A. */ _starpu_dptcon_(n, &df[1], &ef[1], &anorm, rcond, &work[1], info); /* Compute the solution vectors X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dpttrs_(n, nrhs, &df[1], &ef[1], &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solutions and */ /* compute error bounds and backward error estimates for them. */ _starpu_dptrfs_(n, nrhs, &d__[1], &e[1], &df[1], &ef[1], &b[b_offset], ldb, &x[ x_offset], ldx, &ferr[1], &berr[1], &work[1], info); /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < _starpu_dlamch_("Epsilon")) { *info = *n + 1; } return 0; /* End of DPTSVX */ } /* _starpu_dptsvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpttrf.c000066400000000000000000000106011507764646700206750ustar00rootroot00000000000000/* dpttrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dpttrf_(integer *n, doublereal *d__, doublereal *e, integer *info) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, i4; doublereal ei; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPTTRF computes the L*D*L' factorization of a real symmetric */ /* positive definite tridiagonal matrix A. The factorization may also */ /* be regarded as having the form A = U'*D*U. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the tridiagonal matrix */ /* A. On exit, the n diagonal elements of the diagonal matrix */ /* D from the L*D*L' factorization of A. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ /* matrix A. On exit, the (n-1) subdiagonal elements of the */ /* unit bidiagonal factor L from the L*D*L' factorization of A. */ /* E can also be regarded as the superdiagonal of the unit */ /* bidiagonal factor U from the U'*D*U factorization of A. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* > 0: if INFO = k, the leading minor of order k is not */ /* positive definite; if k < N, the factorization could not */ /* be completed, while if k = N, the factorization was */ /* completed, but D(N) <= 0. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --e; --d__; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; i__1 = -(*info); _starpu_xerbla_("DPTTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the L*D*L' (or U'*D*U) factorization of A. */ i4 = (*n - 1) % 4; i__1 = i4; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] <= 0.) { *info = i__; goto L30; } ei = e[i__]; e[i__] = ei / d__[i__]; d__[i__ + 1] -= e[i__] * ei; /* L10: */ } i__1 = *n - 4; for (i__ = i4 + 1; i__ <= i__1; i__ += 4) { /* Drop out of the loop if d(i) <= 0: the matrix is not positive */ /* definite. */ if (d__[i__] <= 0.) { *info = i__; goto L30; } /* Solve for e(i) and d(i+1). */ ei = e[i__]; e[i__] = ei / d__[i__]; d__[i__ + 1] -= e[i__] * ei; if (d__[i__ + 1] <= 0.) { *info = i__ + 1; goto L30; } /* Solve for e(i+1) and d(i+2). */ ei = e[i__ + 1]; e[i__ + 1] = ei / d__[i__ + 1]; d__[i__ + 2] -= e[i__ + 1] * ei; if (d__[i__ + 2] <= 0.) { *info = i__ + 2; goto L30; } /* Solve for e(i+2) and d(i+3). */ ei = e[i__ + 2]; e[i__ + 2] = ei / d__[i__ + 2]; d__[i__ + 3] -= e[i__ + 2] * ei; if (d__[i__ + 3] <= 0.) { *info = i__ + 3; goto L30; } /* Solve for e(i+3) and d(i+4). */ ei = e[i__ + 3]; e[i__ + 3] = ei / d__[i__ + 3]; d__[i__ + 4] -= e[i__ + 3] * ei; /* L20: */ } /* Check d(n) for positive definiteness. */ if (d__[*n] <= 0.) { *info = *n; } L30: return 0; /* End of DPTTRF */ } /* _starpu_dpttrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dpttrs.c000066400000000000000000000105601507764646700207160ustar00rootroot00000000000000/* dpttrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dpttrs_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer j, jb, nb; extern /* Subroutine */ int _starpu_dptts2_(integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPTTRS solves a tridiagonal system of the form */ /* A * X = B */ /* using the L*D*L' factorization of A computed by DPTTRF. D is a */ /* diagonal matrix specified in the vector D, L is a unit bidiagonal */ /* matrix whose subdiagonal is specified in the vector E, and X and B */ /* are N by NRHS matrices. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the tridiagonal matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the diagonal matrix D from the */ /* L*D*L' factorization of A. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) subdiagonal elements of the unit bidiagonal factor */ /* L from the L*D*L' factorization of A. E can also be regarded */ /* as the superdiagonal of the unit bidiagonal factor U from the */ /* factorization A = U'*D*U. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side vectors B for the system of */ /* linear equations. */ /* On exit, the solution vectors, X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments. */ /* Parameter adjustments */ --d__; --e; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*ldb < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DPTTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } /* Determine the number of right-hand sides to solve at a time. */ if (*nrhs == 1) { nb = 1; } else { /* Computing MAX */ i__1 = 1, i__2 = _starpu_ilaenv_(&c__1, "DPTTRS", " ", n, nrhs, &c_n1, &c_n1); nb = max(i__1,i__2); } if (nb >= *nrhs) { _starpu_dptts2_(n, nrhs, &d__[1], &e[1], &b[b_offset], ldb); } else { i__1 = *nrhs; i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = *nrhs - j + 1; jb = min(i__3,nb); _starpu_dptts2_(n, &jb, &d__[1], &e[1], &b[j * b_dim1 + 1], ldb); /* L10: */ } } return 0; /* End of DPTTRS */ } /* _starpu_dpttrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dptts2.c000066400000000000000000000072761507764646700206300ustar00rootroot00000000000000/* dptts2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dptts2_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; doublereal d__1; /* Local variables */ integer i__, j; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPTTS2 solves a tridiagonal system of the form */ /* A * X = B */ /* using the L*D*L' factorization of A computed by DPTTRF. D is a */ /* diagonal matrix specified in the vector D, L is a unit bidiagonal */ /* matrix whose subdiagonal is specified in the vector E, and X and B */ /* are N by NRHS matrices. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the tridiagonal matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the diagonal matrix D from the */ /* L*D*L' factorization of A. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) subdiagonal elements of the unit bidiagonal factor */ /* L from the L*D*L' factorization of A. E can also be regarded */ /* as the superdiagonal of the unit bidiagonal factor U from the */ /* factorization A = U'*D*U. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side vectors B for the system of */ /* linear equations. */ /* On exit, the solution vectors, X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ --d__; --e; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ if (*n <= 1) { if (*n == 1) { d__1 = 1. / d__[1]; _starpu_dscal_(nrhs, &d__1, &b[b_offset], ldb); } return 0; } /* Solve A * X = B using the factorization A = L*D*L', */ /* overwriting each right hand side vector with its solution. */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Solve L * x = b. */ i__2 = *n; for (i__ = 2; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] -= b[i__ - 1 + j * b_dim1] * e[i__ - 1]; /* L10: */ } /* Solve D * L' * x = b. */ b[*n + j * b_dim1] /= d__[*n]; for (i__ = *n - 1; i__ >= 1; --i__) { b[i__ + j * b_dim1] = b[i__ + j * b_dim1] / d__[i__] - b[i__ + 1 + j * b_dim1] * e[i__]; /* L20: */ } /* L30: */ } return 0; /* End of DPTTS2 */ } /* _starpu_dptts2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/drscl.c000066400000000000000000000065351507764646700205140ustar00rootroot00000000000000/* drscl.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_drscl_(integer *n, doublereal *sa, doublereal *sx, integer *incx) { doublereal mul, cden; logical done; doublereal cnum, cden1, cnum1; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); doublereal bignum, smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DRSCL multiplies an n-element real vector x by the real scalar 1/a. */ /* This is done without overflow or underflow as long as */ /* the final result x/a does not overflow or underflow. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of components of the vector x. */ /* SA (input) DOUBLE PRECISION */ /* The scalar a which is used to divide each component of x. */ /* SA must be >= 0, or the subroutine will divide by zero. */ /* SX (input/output) DOUBLE PRECISION array, dimension */ /* (1+(N-1)*abs(INCX)) */ /* The n-element vector x. */ /* INCX (input) INTEGER */ /* The increment between successive values of the vector SX. */ /* > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ --sx; /* Function Body */ if (*n <= 0) { return 0; } /* Get machine parameters */ smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); /* Initialize the denominator to SA and the numerator to 1. */ cden = *sa; cnum = 1.; L10: cden1 = cden * smlnum; cnum1 = cnum / bignum; if (abs(cden1) > abs(cnum) && cnum != 0.) { /* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. */ mul = smlnum; done = FALSE_; cden = cden1; } else if (abs(cnum1) > abs(cden)) { /* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. */ mul = bignum; done = FALSE_; cnum = cnum1; } else { /* Multiply X by CNUM / CDEN and return. */ mul = cnum / cden; done = TRUE_; } /* Scale the vector X by MUL */ _starpu_dscal_(n, &mul, &sx[1], incx); if (! done) { goto L10; } return 0; /* End of DRSCL */ } /* _starpu_drscl_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsbev.c000066400000000000000000000177531507764646700205140ustar00rootroot00000000000000/* dsbev.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b11 = 1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dsbev_(char *jobz, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal eps; integer inde; doublereal anrm; integer imax; doublereal rmin, rmax; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo; logical lower, wantz; extern doublereal _starpu_dlamch_(char *); integer iscale; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlansb_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dsbtrd_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsterf_( integer *, doublereal *, doublereal *, integer *); integer indwrk; extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); doublereal smlnum; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBEV computes all the eigenvalues and, optionally, eigenvectors of */ /* a real symmetric band matrix A. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* On exit, AB is overwritten by values generated during the */ /* reduction to tridiagonal form. If UPLO = 'U', the first */ /* superdiagonal and the diagonal of the tridiagonal matrix T */ /* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ /* the diagonal and first subdiagonal of T are returned in the */ /* first two rows of AB. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD + 1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ /* eigenvectors of the matrix A, with the i-th column of Z */ /* holding the eigenvector associated with W(i). */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,3*N-2)) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the algorithm failed to converge; i */ /* off-diagonal elements of an intermediate tridiagonal */ /* form did not converge to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); lower = _starpu_lsame_(uplo, "L"); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (lower || _starpu_lsame_(uplo, "U"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kd < 0) { *info = -4; } else if (*ldab < *kd + 1) { *info = -6; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSBEV ", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (lower) { w[1] = ab[ab_dim1 + 1]; } else { w[1] = ab[*kd + 1 + ab_dim1]; } if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = _starpu_dlansb_("M", uplo, n, kd, &ab[ab_offset], ldab, &work[1]); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { if (lower) { _starpu_dlascl_("B", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, info); } else { _starpu_dlascl_("Q", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, info); } } /* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. */ inde = 1; indwrk = inde + *n; _starpu_dsbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &work[inde], &z__[ z_offset], ldz, &work[indwrk], &iinfo); /* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. */ if (! wantz) { _starpu_dsterf_(n, &w[1], &work[inde], info); } else { _starpu_dsteqr_(jobz, n, &w[1], &work[inde], &z__[z_offset], ldz, &work[ indwrk], info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { if (*info == 0) { imax = *n; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &w[1], &c__1); } return 0; /* End of DSBEV */ } /* _starpu_dsbev_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsbevd.c000066400000000000000000000261021507764646700206440ustar00rootroot00000000000000/* dsbevd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b11 = 1.; static doublereal c_b18 = 0.; static integer c__1 = 1; /* Subroutine */ int _starpu_dsbevd_(char *jobz, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal eps; integer inde; doublereal anrm, rmin, rmax; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo, lwmin; logical lower, wantz; integer indwk2, llwrk2; extern doublereal _starpu_dlamch_(char *); integer iscale; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlansb_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dstedc_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dsbtrd_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsterf_( integer *, doublereal *, doublereal *, integer *); integer indwrk, liwmin; doublereal smlnum; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBEVD computes all the eigenvalues and, optionally, eigenvectors of */ /* a real symmetric band matrix A. If eigenvectors are desired, it uses */ /* a divide and conquer algorithm. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* On exit, AB is overwritten by values generated during the */ /* reduction to tridiagonal form. If UPLO = 'U', the first */ /* superdiagonal and the diagonal of the tridiagonal matrix T */ /* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ /* the diagonal and first subdiagonal of T are returned in the */ /* first two rows of AB. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD + 1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ /* eigenvectors of the matrix A, with the i-th column of Z */ /* holding the eigenvector associated with W(i). */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, */ /* dimension (LWORK) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* IF N <= 1, LWORK must be at least 1. */ /* If JOBZ = 'N' and N > 2, LWORK must be at least 2*N. */ /* If JOBZ = 'V' and N > 2, LWORK must be at least */ /* ( 1 + 5*N + 2*N**2 ). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal sizes of the WORK and IWORK */ /* arrays, returns these values as the first entries of the WORK */ /* and IWORK arrays, and no error message related to LWORK or */ /* LIWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array LIWORK. */ /* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */ /* If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal sizes of the WORK and */ /* IWORK arrays, returns these values as the first entries of */ /* the WORK and IWORK arrays, and no error message related to */ /* LWORK or LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the algorithm failed to converge; i */ /* off-diagonal elements of an intermediate tridiagonal */ /* form did not converge to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); lower = _starpu_lsame_(uplo, "L"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (*n <= 1) { liwmin = 1; lwmin = 1; } else { if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 5 + 1 + (i__1 * i__1 << 1); } else { liwmin = 1; lwmin = *n << 1; } } if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (lower || _starpu_lsame_(uplo, "U"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kd < 0) { *info = -4; } else if (*ldab < *kd + 1) { *info = -6; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -9; } if (*info == 0) { work[1] = (doublereal) lwmin; iwork[1] = liwmin; if (*lwork < lwmin && ! lquery) { *info = -11; } else if (*liwork < liwmin && ! lquery) { *info = -13; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSBEVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { w[1] = ab[ab_dim1 + 1]; if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = _starpu_dlansb_("M", uplo, n, kd, &ab[ab_offset], ldab, &work[1]); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { if (lower) { _starpu_dlascl_("B", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, info); } else { _starpu_dlascl_("Q", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, info); } } /* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. */ inde = 1; indwrk = inde + *n; indwk2 = indwrk + *n * *n; llwrk2 = *lwork - indwk2 + 1; _starpu_dsbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &work[inde], &z__[ z_offset], ldz, &work[indwrk], &iinfo); /* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. */ if (! wantz) { _starpu_dsterf_(n, &w[1], &work[inde], info); } else { _starpu_dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & llwrk2, &iwork[1], liwork, info); _starpu_dgemm_("N", "N", n, n, n, &c_b11, &z__[z_offset], ldz, &work[indwrk], n, &c_b18, &work[indwk2], n); _starpu_dlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { d__1 = 1. / sigma; _starpu_dscal_(n, &d__1, &w[1], &c__1); } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSBEVD */ } /* _starpu_dsbevd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsbevx.c000066400000000000000000000406661507764646700207030ustar00rootroot00000000000000/* dsbevx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b14 = 1.; static integer c__1 = 1; static doublereal c_b34 = 0.; /* Subroutine */ int _starpu_dsbevx_(char *jobz, char *range, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *q, integer * ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, jj; doublereal eps, vll, vuu, tmp1; integer indd, inde; doublereal anrm; integer imax; doublereal rmin, rmax; logical test; integer itmp1, indee; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iinfo; char order[1]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical lower, wantz; extern doublereal _starpu_dlamch_(char *); logical alleig, indeig; integer iscale, indibl; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlansb_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); logical valeig; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal abstll, bignum; extern /* Subroutine */ int _starpu_dsbtrd_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer indisp; extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indiwo; extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer indwrk; extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer nsplit; doublereal smlnum; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBEVX computes selected eigenvalues and, optionally, eigenvectors */ /* of a real symmetric band matrix A. Eigenvalues and eigenvectors can */ /* be selected by specifying either a range of values or a range of */ /* indices for the desired eigenvalues. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found; */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found; */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* On exit, AB is overwritten by values generated during the */ /* reduction to tridiagonal form. If UPLO = 'U', the first */ /* superdiagonal and the diagonal of the tridiagonal matrix T */ /* are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ /* the diagonal and first subdiagonal of T are returned in the */ /* first two rows of AB. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD + 1. */ /* Q (output) DOUBLE PRECISION array, dimension (LDQ, N) */ /* If JOBZ = 'V', the N-by-N orthogonal matrix used in the */ /* reduction to tridiagonal form. */ /* If JOBZ = 'N', the array Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. If JOBZ = 'V', then */ /* LDQ >= max(1,N). */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less than */ /* or equal to zero, then EPS*|T| will be used in its place, */ /* where |T| is the 1-norm of the tridiagonal matrix obtained */ /* by reducing AB to tridiagonal form. */ /* Eigenvalues will be computed most accurately when ABSTOL is */ /* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ /* If this routine returns with INFO>0, indicating that some */ /* eigenvectors did not converge, try setting ABSTOL to */ /* 2*DLAMCH('S'). */ /* See "Computing Small Singular Values of Bidiagonal Matrices */ /* with Guaranteed High Relative Accuracy," by Demmel and */ /* Kahan, LAPACK Working Note #3. */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* The first M elements contain the selected eigenvalues in */ /* ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix A */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* If an eigenvector fails to converge, then that column of Z */ /* contains the latest approximation to the eigenvector, and the */ /* index of the eigenvector is returned in IFAIL. */ /* If JOBZ = 'N', then Z is not referenced. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (7*N) */ /* IWORK (workspace) INTEGER array, dimension (5*N) */ /* IFAIL (output) INTEGER array, dimension (N) */ /* If JOBZ = 'V', then if INFO = 0, the first M elements of */ /* IFAIL are zero. If INFO > 0, then IFAIL contains the */ /* indices of the eigenvectors that failed to converge. */ /* If JOBZ = 'N', then IFAIL is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, then i eigenvectors failed to converge. */ /* Their indices are stored in array IFAIL. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); lower = _starpu_lsame_(uplo, "L"); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (lower || _starpu_lsame_(uplo, "U"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*kd < 0) { *info = -5; } else if (*ldab < *kd + 1) { *info = -7; } else if (wantz && *ldq < max(1,*n)) { *info = -9; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -11; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -12; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -13; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -18; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSBEVX", &i__1); return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } if (*n == 1) { *m = 1; if (lower) { tmp1 = ab[ab_dim1 + 1]; } else { tmp1 = ab[*kd + 1 + ab_dim1]; } if (valeig) { if (! (*vl < tmp1 && *vu >= tmp1)) { *m = 0; } } if (*m == 1) { w[1] = tmp1; if (wantz) { z__[z_dim1 + 1] = 1.; } } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; abstll = *abstol; if (valeig) { vll = *vl; vuu = *vu; } else { vll = 0.; vuu = 0.; } anrm = _starpu_dlansb_("M", uplo, n, kd, &ab[ab_offset], ldab, &work[1]); if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { if (lower) { _starpu_dlascl_("B", kd, kd, &c_b14, &sigma, n, n, &ab[ab_offset], ldab, info); } else { _starpu_dlascl_("Q", kd, kd, &c_b14, &sigma, n, n, &ab[ab_offset], ldab, info); } if (*abstol > 0.) { abstll = *abstol * sigma; } if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. */ indd = 1; inde = indd + *n; indwrk = inde + *n; _starpu_dsbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &work[indd], &work[inde], &q[q_offset], ldq, &work[indwrk], &iinfo); /* If all eigenvalues are desired and ABSTOL is less than or equal */ /* to zero, then call DSTERF or SSTEQR. If this fails for some */ /* eigenvalue, then try DSTEBZ. */ test = FALSE_; if (indeig) { if (*il == 1 && *iu == *n) { test = TRUE_; } } if ((alleig || test) && *abstol <= 0.) { _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); indee = indwrk + (*n << 1); if (! wantz) { i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); _starpu_dsterf_(n, &w[1], &work[indee], info); } else { _starpu_dlacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); _starpu_dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ indwrk], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L10: */ } } } if (*info == 0) { *m = *n; goto L30; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indibl = 1; indisp = indibl + *n; indiwo = indisp + *n; _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ indwrk], &iwork[indiwo], info); if (wantz) { _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & ifail[1], info); /* Apply orthogonal matrix used in reduction to tridiagonal */ /* form to eigenvectors returned by DSTEIN. */ i__1 = *m; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); _starpu_dgemv_("N", n, n, &c_b14, &q[q_offset], ldq, &work[1], &c__1, & c_b34, &z__[j * z_dim1 + 1], &c__1); /* L20: */ } } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L30: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with */ /* eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L40: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], &c__1); if (*info != 0) { itmp1 = ifail[i__]; ifail[i__] = ifail[j]; ifail[j] = itmp1; } } /* L50: */ } } return 0; /* End of DSBEVX */ } /* _starpu_dsbevx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsbgst.c000066400000000000000000001413151507764646700206670ustar00rootroot00000000000000/* dsbgst.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b8 = 0.; static doublereal c_b9 = 1.; static integer c__1 = 1; static doublereal c_b20 = -1.; /* Subroutine */ int _starpu_dsbgst_(char *vect, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * ldbb, doublereal *x, integer *ldx, doublereal *work, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, bb_dim1, bb_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4; doublereal d__1; /* Local variables */ integer i__, j, k, l, m; doublereal t; integer i0, i1, i2, j1, j2; doublereal ra; integer nr, nx, ka1, kb1; doublereal ra1; integer j1t, j2t; doublereal bii; integer kbt, nrt, inca; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_drot_(integer *, doublereal *, integer *, doublereal * , integer *, doublereal *, doublereal *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); logical upper, wantx; extern /* Subroutine */ int _starpu_dlar2v_(integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_xerbla_( char *, integer *), _starpu_dlargv_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); logical update; extern /* Subroutine */ int _starpu_dlartv_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBGST reduces a real symmetric-definite banded generalized */ /* eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, */ /* such that C has the same bandwidth as A. */ /* B must have been previously factorized as S**T*S by DPBSTF, using a */ /* split Cholesky factorization. A is overwritten by C = X**T*A*X, where */ /* X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the */ /* bandwidth of A. */ /* Arguments */ /* ========= */ /* VECT (input) CHARACTER*1 */ /* = 'N': do not form the transformation matrix X; */ /* = 'V': form X. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* KA (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ /* KB (input) INTEGER */ /* The number of superdiagonals of the matrix B if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first ka+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). */ /* On exit, the transformed matrix X**T*A*X, stored in the same */ /* format as A. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KA+1. */ /* BB (input) DOUBLE PRECISION array, dimension (LDBB,N) */ /* The banded factor S from the split Cholesky factorization of */ /* B, as returned by DPBSTF, stored in the first KB+1 rows of */ /* the array. */ /* LDBB (input) INTEGER */ /* The leading dimension of the array BB. LDBB >= KB+1. */ /* X (output) DOUBLE PRECISION array, dimension (LDX,N) */ /* If VECT = 'V', the n-by-n matrix X. */ /* If VECT = 'N', the array X is not referenced. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. */ /* LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; bb_dim1 = *ldbb; bb_offset = 1 + bb_dim1; bb -= bb_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --work; /* Function Body */ wantx = _starpu_lsame_(vect, "V"); upper = _starpu_lsame_(uplo, "U"); ka1 = *ka + 1; kb1 = *kb + 1; *info = 0; if (! wantx && ! _starpu_lsame_(vect, "N")) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ka < 0) { *info = -4; } else if (*kb < 0 || *kb > *ka) { *info = -5; } else if (*ldab < *ka + 1) { *info = -7; } else if (*ldbb < *kb + 1) { *info = -9; } else if (*ldx < 1 || wantx && *ldx < max(1,*n)) { *info = -11; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSBGST", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } inca = *ldab * ka1; /* Initialize X to the unit matrix, if needed */ if (wantx) { _starpu_dlaset_("Full", n, n, &c_b8, &c_b9, &x[x_offset], ldx); } /* Set M to the splitting point m. It must be the same value as is */ /* used in DPBSTF. The chosen value allows the arrays WORK and RWORK */ /* to be of dimension (N). */ m = (*n + *kb) / 2; /* The routine works in two phases, corresponding to the two halves */ /* of the split Cholesky factorization of B as S**T*S where */ /* S = ( U ) */ /* ( M L ) */ /* with U upper triangular of order m, and L lower triangular of */ /* order n-m. S has the same bandwidth as B. */ /* S is treated as a product of elementary matrices: */ /* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) */ /* where S(i) is determined by the i-th row of S. */ /* In phase 1, the index i takes the values n, n-1, ... , m+1; */ /* in phase 2, it takes the values 1, 2, ... , m. */ /* For each value of i, the current matrix A is updated by forming */ /* inv(S(i))**T*A*inv(S(i)). This creates a triangular bulge outside */ /* the band of A. The bulge is then pushed down toward the bottom of */ /* A in phase 1, and up toward the top of A in phase 2, by applying */ /* plane rotations. */ /* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 */ /* of them are linearly independent, so annihilating a bulge requires */ /* only 2*kb-1 plane rotations. The rotations are divided into a 1st */ /* set of kb-1 rotations, and a 2nd set of kb rotations. */ /* Wherever possible, rotations are generated and applied in vector */ /* operations of length NR between the indices J1 and J2 (sometimes */ /* replaced by modified values NRT, J1T or J2T). */ /* The cosines and sines of the rotations are stored in the array */ /* WORK. The cosines of the 1st set of rotations are stored in */ /* elements n+2:n+m-kb-1 and the sines of the 1st set in elements */ /* 2:m-kb-1; the cosines of the 2nd set are stored in elements */ /* n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n. */ /* The bulges are not formed explicitly; nonzero elements outside the */ /* band are created only when they are required for generating new */ /* rotations; they are stored in the array WORK, in positions where */ /* they are later overwritten by the sines of the rotations which */ /* annihilate them. */ /* **************************** Phase 1 ***************************** */ /* The logical structure of this phase is: */ /* UPDATE = .TRUE. */ /* DO I = N, M + 1, -1 */ /* use S(i) to update A and create a new bulge */ /* apply rotations to push all bulges KA positions downward */ /* END DO */ /* UPDATE = .FALSE. */ /* DO I = M + KA + 1, N - 1 */ /* apply rotations to push all bulges KA positions downward */ /* END DO */ /* To avoid duplicating code, the two loops are merged. */ update = TRUE_; i__ = *n + 1; L10: if (update) { --i__; /* Computing MIN */ i__1 = *kb, i__2 = i__ - 1; kbt = min(i__1,i__2); i0 = i__ - 1; /* Computing MIN */ i__1 = *n, i__2 = i__ + *ka; i1 = min(i__1,i__2); i2 = i__ - kbt + ka1; if (i__ < m + 1) { update = FALSE_; ++i__; i0 = m; if (*ka == 0) { goto L480; } goto L10; } } else { i__ += *ka; if (i__ > *n - 1) { goto L480; } } if (upper) { /* Transform A, working with the upper triangle */ if (update) { /* Form inv(S(i))**T * A * inv(S(i)) */ bii = bb[kb1 + i__ * bb_dim1]; i__1 = i1; for (j = i__; j <= i__1; ++j) { ab[i__ - j + ka1 + j * ab_dim1] /= bii; /* L20: */ } /* Computing MAX */ i__1 = 1, i__2 = i__ - *ka; i__3 = i__; for (j = max(i__1,i__2); j <= i__3; ++j) { ab[j - i__ + ka1 + i__ * ab_dim1] /= bii; /* L30: */ } i__3 = i__ - 1; for (k = i__ - kbt; k <= i__3; ++k) { i__1 = k; for (j = i__ - kbt; j <= i__1; ++j) { ab[j - k + ka1 + k * ab_dim1] = ab[j - k + ka1 + k * ab_dim1] - bb[j - i__ + kb1 + i__ * bb_dim1] * ab[ k - i__ + ka1 + i__ * ab_dim1] - bb[k - i__ + kb1 + i__ * bb_dim1] * ab[j - i__ + ka1 + i__ * ab_dim1] + ab[ka1 + i__ * ab_dim1] * bb[j - i__ + kb1 + i__ * bb_dim1] * bb[k - i__ + kb1 + i__ * bb_dim1]; /* L40: */ } /* Computing MAX */ i__1 = 1, i__2 = i__ - *ka; i__4 = i__ - kbt - 1; for (j = max(i__1,i__2); j <= i__4; ++j) { ab[j - k + ka1 + k * ab_dim1] -= bb[k - i__ + kb1 + i__ * bb_dim1] * ab[j - i__ + ka1 + i__ * ab_dim1]; /* L50: */ } /* L60: */ } i__3 = i1; for (j = i__; j <= i__3; ++j) { /* Computing MAX */ i__4 = j - *ka, i__1 = i__ - kbt; i__2 = i__ - 1; for (k = max(i__4,i__1); k <= i__2; ++k) { ab[k - j + ka1 + j * ab_dim1] -= bb[k - i__ + kb1 + i__ * bb_dim1] * ab[i__ - j + ka1 + j * ab_dim1]; /* L70: */ } /* L80: */ } if (wantx) { /* post-multiply X by inv(S(i)) */ i__3 = *n - m; d__1 = 1. / bii; _starpu_dscal_(&i__3, &d__1, &x[m + 1 + i__ * x_dim1], &c__1); if (kbt > 0) { i__3 = *n - m; _starpu_dger_(&i__3, &kbt, &c_b20, &x[m + 1 + i__ * x_dim1], & c__1, &bb[kb1 - kbt + i__ * bb_dim1], &c__1, &x[m + 1 + (i__ - kbt) * x_dim1], ldx); } } /* store a(i,i1) in RA1 for use in next loop over K */ ra1 = ab[i__ - i1 + ka1 + i1 * ab_dim1]; } /* Generate and apply vectors of rotations to chase all the */ /* existing bulges KA positions down toward the bottom of the */ /* band */ i__3 = *kb - 1; for (k = 1; k <= i__3; ++k) { if (update) { /* Determine the rotations which would annihilate the bulge */ /* which has in theory just been created */ if (i__ - k + *ka < *n && i__ - k > 1) { /* generate rotation to annihilate a(i,i-k+ka+1) */ _starpu_dlartg_(&ab[k + 1 + (i__ - k + *ka) * ab_dim1], &ra1, & work[*n + i__ - k + *ka - m], &work[i__ - k + *ka - m], &ra); /* create nonzero element a(i-k,i-k+ka+1) outside the */ /* band and store it in WORK(i-k) */ t = -bb[kb1 - k + i__ * bb_dim1] * ra1; work[i__ - k] = work[*n + i__ - k + *ka - m] * t - work[ i__ - k + *ka - m] * ab[(i__ - k + *ka) * ab_dim1 + 1]; ab[(i__ - k + *ka) * ab_dim1 + 1] = work[i__ - k + *ka - m] * t + work[*n + i__ - k + *ka - m] * ab[(i__ - k + *ka) * ab_dim1 + 1]; ra1 = ra; } } /* Computing MAX */ i__2 = 1, i__4 = k - i0 + 2; j2 = i__ - k - 1 + max(i__2,i__4) * ka1; nr = (*n - j2 + *ka) / ka1; j1 = j2 + (nr - 1) * ka1; if (update) { /* Computing MAX */ i__2 = j2, i__4 = i__ + (*ka << 1) - k + 1; j2t = max(i__2,i__4); } else { j2t = j2; } nrt = (*n - j2t + *ka) / ka1; i__2 = j1; i__4 = ka1; for (j = j2t; i__4 < 0 ? j >= i__2 : j <= i__2; j += i__4) { /* create nonzero element a(j-ka,j+1) outside the band */ /* and store it in WORK(j-m) */ work[j - m] *= ab[(j + 1) * ab_dim1 + 1]; ab[(j + 1) * ab_dim1 + 1] = work[*n + j - m] * ab[(j + 1) * ab_dim1 + 1]; /* L90: */ } /* generate rotations in 1st set to annihilate elements which */ /* have been created outside the band */ if (nrt > 0) { _starpu_dlargv_(&nrt, &ab[j2t * ab_dim1 + 1], &inca, &work[j2t - m], & ka1, &work[*n + j2t - m], &ka1); } if (nr > 0) { /* apply rotations in 1st set from the right */ i__4 = *ka - 1; for (l = 1; l <= i__4; ++l) { _starpu_dlartv_(&nr, &ab[ka1 - l + j2 * ab_dim1], &inca, &ab[*ka - l + (j2 + 1) * ab_dim1], &inca, &work[*n + j2 - m], &work[j2 - m], &ka1); /* L100: */ } /* apply rotations in 1st set from both sides to diagonal */ /* blocks */ _starpu_dlar2v_(&nr, &ab[ka1 + j2 * ab_dim1], &ab[ka1 + (j2 + 1) * ab_dim1], &ab[*ka + (j2 + 1) * ab_dim1], &inca, &work[ *n + j2 - m], &work[j2 - m], &ka1); } /* start applying rotations in 1st set from the left */ i__4 = *kb - k + 1; for (l = *ka - 1; l >= i__4; --l) { nrt = (*n - j2 + l) / ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & work[*n + j2 - m], &work[j2 - m], &ka1); } /* L110: */ } if (wantx) { /* post-multiply X by product of rotations in 1st set */ i__4 = j1; i__2 = ka1; for (j = j2; i__2 < 0 ? j >= i__4 : j <= i__4; j += i__2) { i__1 = *n - m; _starpu_drot_(&i__1, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + 1) * x_dim1], &c__1, &work[*n + j - m], &work[j - m]); /* L120: */ } } /* L130: */ } if (update) { if (i2 <= *n && kbt > 0) { /* create nonzero element a(i-kbt,i-kbt+ka+1) outside the */ /* band and store it in WORK(i-kbt) */ work[i__ - kbt] = -bb[kb1 - kbt + i__ * bb_dim1] * ra1; } } for (k = *kb; k >= 1; --k) { if (update) { /* Computing MAX */ i__3 = 2, i__2 = k - i0 + 1; j2 = i__ - k - 1 + max(i__3,i__2) * ka1; } else { /* Computing MAX */ i__3 = 1, i__2 = k - i0 + 1; j2 = i__ - k - 1 + max(i__3,i__2) * ka1; } /* finish applying rotations in 2nd set from the left */ for (l = *kb - k; l >= 1; --l) { nrt = (*n - j2 + *ka + l) / ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + (j2 - l + 1) * ab_dim1], &inca, &ab[ l + 1 + (j2 - l + 1) * ab_dim1], &inca, &work[*n + j2 - *ka], &work[j2 - *ka], &ka1); } /* L140: */ } nr = (*n - j2 + *ka) / ka1; j1 = j2 + (nr - 1) * ka1; i__3 = j2; i__2 = -ka1; for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { work[j] = work[j - *ka]; work[*n + j] = work[*n + j - *ka]; /* L150: */ } i__2 = j1; i__3 = ka1; for (j = j2; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { /* create nonzero element a(j-ka,j+1) outside the band */ /* and store it in WORK(j) */ work[j] *= ab[(j + 1) * ab_dim1 + 1]; ab[(j + 1) * ab_dim1 + 1] = work[*n + j] * ab[(j + 1) * ab_dim1 + 1]; /* L160: */ } if (update) { if (i__ - k < *n - *ka && k <= kbt) { work[i__ - k + *ka] = work[i__ - k]; } } /* L170: */ } for (k = *kb; k >= 1; --k) { /* Computing MAX */ i__3 = 1, i__2 = k - i0 + 1; j2 = i__ - k - 1 + max(i__3,i__2) * ka1; nr = (*n - j2 + *ka) / ka1; j1 = j2 + (nr - 1) * ka1; if (nr > 0) { /* generate rotations in 2nd set to annihilate elements */ /* which have been created outside the band */ _starpu_dlargv_(&nr, &ab[j2 * ab_dim1 + 1], &inca, &work[j2], &ka1, & work[*n + j2], &ka1); /* apply rotations in 2nd set from the right */ i__3 = *ka - 1; for (l = 1; l <= i__3; ++l) { _starpu_dlartv_(&nr, &ab[ka1 - l + j2 * ab_dim1], &inca, &ab[*ka - l + (j2 + 1) * ab_dim1], &inca, &work[*n + j2], &work[j2], &ka1); /* L180: */ } /* apply rotations in 2nd set from both sides to diagonal */ /* blocks */ _starpu_dlar2v_(&nr, &ab[ka1 + j2 * ab_dim1], &ab[ka1 + (j2 + 1) * ab_dim1], &ab[*ka + (j2 + 1) * ab_dim1], &inca, &work[ *n + j2], &work[j2], &ka1); } /* start applying rotations in 2nd set from the left */ i__3 = *kb - k + 1; for (l = *ka - 1; l >= i__3; --l) { nrt = (*n - j2 + l) / ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & work[*n + j2], &work[j2], &ka1); } /* L190: */ } if (wantx) { /* post-multiply X by product of rotations in 2nd set */ i__3 = j1; i__2 = ka1; for (j = j2; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { i__4 = *n - m; _starpu_drot_(&i__4, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + 1) * x_dim1], &c__1, &work[*n + j], &work[j]); /* L200: */ } } /* L210: */ } i__2 = *kb - 1; for (k = 1; k <= i__2; ++k) { /* Computing MAX */ i__3 = 1, i__4 = k - i0 + 2; j2 = i__ - k - 1 + max(i__3,i__4) * ka1; /* finish applying rotations in 1st set from the left */ for (l = *kb - k; l >= 1; --l) { nrt = (*n - j2 + l) / ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & work[*n + j2 - m], &work[j2 - m], &ka1); } /* L220: */ } /* L230: */ } if (*kb > 1) { i__2 = i__ - *kb + (*ka << 1) + 1; for (j = *n - 1; j >= i__2; --j) { work[*n + j - m] = work[*n + j - *ka - m]; work[j - m] = work[j - *ka - m]; /* L240: */ } } } else { /* Transform A, working with the lower triangle */ if (update) { /* Form inv(S(i))**T * A * inv(S(i)) */ bii = bb[i__ * bb_dim1 + 1]; i__2 = i1; for (j = i__; j <= i__2; ++j) { ab[j - i__ + 1 + i__ * ab_dim1] /= bii; /* L250: */ } /* Computing MAX */ i__2 = 1, i__3 = i__ - *ka; i__4 = i__; for (j = max(i__2,i__3); j <= i__4; ++j) { ab[i__ - j + 1 + j * ab_dim1] /= bii; /* L260: */ } i__4 = i__ - 1; for (k = i__ - kbt; k <= i__4; ++k) { i__2 = k; for (j = i__ - kbt; j <= i__2; ++j) { ab[k - j + 1 + j * ab_dim1] = ab[k - j + 1 + j * ab_dim1] - bb[i__ - j + 1 + j * bb_dim1] * ab[i__ - k + 1 + k * ab_dim1] - bb[i__ - k + 1 + k * bb_dim1] * ab[i__ - j + 1 + j * ab_dim1] + ab[i__ * ab_dim1 + 1] * bb[i__ - j + 1 + j * bb_dim1] * bb[i__ - k + 1 + k * bb_dim1]; /* L270: */ } /* Computing MAX */ i__2 = 1, i__3 = i__ - *ka; i__1 = i__ - kbt - 1; for (j = max(i__2,i__3); j <= i__1; ++j) { ab[k - j + 1 + j * ab_dim1] -= bb[i__ - k + 1 + k * bb_dim1] * ab[i__ - j + 1 + j * ab_dim1]; /* L280: */ } /* L290: */ } i__4 = i1; for (j = i__; j <= i__4; ++j) { /* Computing MAX */ i__1 = j - *ka, i__2 = i__ - kbt; i__3 = i__ - 1; for (k = max(i__1,i__2); k <= i__3; ++k) { ab[j - k + 1 + k * ab_dim1] -= bb[i__ - k + 1 + k * bb_dim1] * ab[j - i__ + 1 + i__ * ab_dim1]; /* L300: */ } /* L310: */ } if (wantx) { /* post-multiply X by inv(S(i)) */ i__4 = *n - m; d__1 = 1. / bii; _starpu_dscal_(&i__4, &d__1, &x[m + 1 + i__ * x_dim1], &c__1); if (kbt > 0) { i__4 = *n - m; i__3 = *ldbb - 1; _starpu_dger_(&i__4, &kbt, &c_b20, &x[m + 1 + i__ * x_dim1], & c__1, &bb[kbt + 1 + (i__ - kbt) * bb_dim1], &i__3, &x[m + 1 + (i__ - kbt) * x_dim1], ldx); } } /* store a(i1,i) in RA1 for use in next loop over K */ ra1 = ab[i1 - i__ + 1 + i__ * ab_dim1]; } /* Generate and apply vectors of rotations to chase all the */ /* existing bulges KA positions down toward the bottom of the */ /* band */ i__4 = *kb - 1; for (k = 1; k <= i__4; ++k) { if (update) { /* Determine the rotations which would annihilate the bulge */ /* which has in theory just been created */ if (i__ - k + *ka < *n && i__ - k > 1) { /* generate rotation to annihilate a(i-k+ka+1,i) */ _starpu_dlartg_(&ab[ka1 - k + i__ * ab_dim1], &ra1, &work[*n + i__ - k + *ka - m], &work[i__ - k + *ka - m], &ra) ; /* create nonzero element a(i-k+ka+1,i-k) outside the */ /* band and store it in WORK(i-k) */ t = -bb[k + 1 + (i__ - k) * bb_dim1] * ra1; work[i__ - k] = work[*n + i__ - k + *ka - m] * t - work[ i__ - k + *ka - m] * ab[ka1 + (i__ - k) * ab_dim1] ; ab[ka1 + (i__ - k) * ab_dim1] = work[i__ - k + *ka - m] * t + work[*n + i__ - k + *ka - m] * ab[ka1 + (i__ - k) * ab_dim1]; ra1 = ra; } } /* Computing MAX */ i__3 = 1, i__1 = k - i0 + 2; j2 = i__ - k - 1 + max(i__3,i__1) * ka1; nr = (*n - j2 + *ka) / ka1; j1 = j2 + (nr - 1) * ka1; if (update) { /* Computing MAX */ i__3 = j2, i__1 = i__ + (*ka << 1) - k + 1; j2t = max(i__3,i__1); } else { j2t = j2; } nrt = (*n - j2t + *ka) / ka1; i__3 = j1; i__1 = ka1; for (j = j2t; i__1 < 0 ? j >= i__3 : j <= i__3; j += i__1) { /* create nonzero element a(j+1,j-ka) outside the band */ /* and store it in WORK(j-m) */ work[j - m] *= ab[ka1 + (j - *ka + 1) * ab_dim1]; ab[ka1 + (j - *ka + 1) * ab_dim1] = work[*n + j - m] * ab[ka1 + (j - *ka + 1) * ab_dim1]; /* L320: */ } /* generate rotations in 1st set to annihilate elements which */ /* have been created outside the band */ if (nrt > 0) { _starpu_dlargv_(&nrt, &ab[ka1 + (j2t - *ka) * ab_dim1], &inca, &work[ j2t - m], &ka1, &work[*n + j2t - m], &ka1); } if (nr > 0) { /* apply rotations in 1st set from the left */ i__1 = *ka - 1; for (l = 1; l <= i__1; ++l) { _starpu_dlartv_(&nr, &ab[l + 1 + (j2 - l) * ab_dim1], &inca, &ab[ l + 2 + (j2 - l) * ab_dim1], &inca, &work[*n + j2 - m], &work[j2 - m], &ka1); /* L330: */ } /* apply rotations in 1st set from both sides to diagonal */ /* blocks */ _starpu_dlar2v_(&nr, &ab[j2 * ab_dim1 + 1], &ab[(j2 + 1) * ab_dim1 + 1], &ab[j2 * ab_dim1 + 2], &inca, &work[*n + j2 - m], &work[j2 - m], &ka1); } /* start applying rotations in 1st set from the right */ i__1 = *kb - k + 1; for (l = *ka - 1; l >= i__1; --l) { nrt = (*n - j2 + l) / ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ ka1 - l + (j2 + 1) * ab_dim1], &inca, &work[*n + j2 - m], &work[j2 - m], &ka1); } /* L340: */ } if (wantx) { /* post-multiply X by product of rotations in 1st set */ i__1 = j1; i__3 = ka1; for (j = j2; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { i__2 = *n - m; _starpu_drot_(&i__2, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + 1) * x_dim1], &c__1, &work[*n + j - m], &work[j - m]); /* L350: */ } } /* L360: */ } if (update) { if (i2 <= *n && kbt > 0) { /* create nonzero element a(i-kbt+ka+1,i-kbt) outside the */ /* band and store it in WORK(i-kbt) */ work[i__ - kbt] = -bb[kbt + 1 + (i__ - kbt) * bb_dim1] * ra1; } } for (k = *kb; k >= 1; --k) { if (update) { /* Computing MAX */ i__4 = 2, i__3 = k - i0 + 1; j2 = i__ - k - 1 + max(i__4,i__3) * ka1; } else { /* Computing MAX */ i__4 = 1, i__3 = k - i0 + 1; j2 = i__ - k - 1 + max(i__4,i__3) * ka1; } /* finish applying rotations in 2nd set from the right */ for (l = *kb - k; l >= 1; --l) { nrt = (*n - j2 + *ka + l) / ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j2 - *ka) * ab_dim1], & inca, &ab[ka1 - l + (j2 - *ka + 1) * ab_dim1], & inca, &work[*n + j2 - *ka], &work[j2 - *ka], &ka1) ; } /* L370: */ } nr = (*n - j2 + *ka) / ka1; j1 = j2 + (nr - 1) * ka1; i__4 = j2; i__3 = -ka1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { work[j] = work[j - *ka]; work[*n + j] = work[*n + j - *ka]; /* L380: */ } i__3 = j1; i__4 = ka1; for (j = j2; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j+1,j-ka) outside the band */ /* and store it in WORK(j) */ work[j] *= ab[ka1 + (j - *ka + 1) * ab_dim1]; ab[ka1 + (j - *ka + 1) * ab_dim1] = work[*n + j] * ab[ka1 + ( j - *ka + 1) * ab_dim1]; /* L390: */ } if (update) { if (i__ - k < *n - *ka && k <= kbt) { work[i__ - k + *ka] = work[i__ - k]; } } /* L400: */ } for (k = *kb; k >= 1; --k) { /* Computing MAX */ i__4 = 1, i__3 = k - i0 + 1; j2 = i__ - k - 1 + max(i__4,i__3) * ka1; nr = (*n - j2 + *ka) / ka1; j1 = j2 + (nr - 1) * ka1; if (nr > 0) { /* generate rotations in 2nd set to annihilate elements */ /* which have been created outside the band */ _starpu_dlargv_(&nr, &ab[ka1 + (j2 - *ka) * ab_dim1], &inca, &work[j2] , &ka1, &work[*n + j2], &ka1); /* apply rotations in 2nd set from the left */ i__4 = *ka - 1; for (l = 1; l <= i__4; ++l) { _starpu_dlartv_(&nr, &ab[l + 1 + (j2 - l) * ab_dim1], &inca, &ab[ l + 2 + (j2 - l) * ab_dim1], &inca, &work[*n + j2] , &work[j2], &ka1); /* L410: */ } /* apply rotations in 2nd set from both sides to diagonal */ /* blocks */ _starpu_dlar2v_(&nr, &ab[j2 * ab_dim1 + 1], &ab[(j2 + 1) * ab_dim1 + 1], &ab[j2 * ab_dim1 + 2], &inca, &work[*n + j2], & work[j2], &ka1); } /* start applying rotations in 2nd set from the right */ i__4 = *kb - k + 1; for (l = *ka - 1; l >= i__4; --l) { nrt = (*n - j2 + l) / ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ ka1 - l + (j2 + 1) * ab_dim1], &inca, &work[*n + j2], &work[j2], &ka1); } /* L420: */ } if (wantx) { /* post-multiply X by product of rotations in 2nd set */ i__4 = j1; i__3 = ka1; for (j = j2; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { i__1 = *n - m; _starpu_drot_(&i__1, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + 1) * x_dim1], &c__1, &work[*n + j], &work[j]); /* L430: */ } } /* L440: */ } i__3 = *kb - 1; for (k = 1; k <= i__3; ++k) { /* Computing MAX */ i__4 = 1, i__1 = k - i0 + 2; j2 = i__ - k - 1 + max(i__4,i__1) * ka1; /* finish applying rotations in 1st set from the right */ for (l = *kb - k; l >= 1; --l) { nrt = (*n - j2 + l) / ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ ka1 - l + (j2 + 1) * ab_dim1], &inca, &work[*n + j2 - m], &work[j2 - m], &ka1); } /* L450: */ } /* L460: */ } if (*kb > 1) { i__3 = i__ - *kb + (*ka << 1) + 1; for (j = *n - 1; j >= i__3; --j) { work[*n + j - m] = work[*n + j - *ka - m]; work[j - m] = work[j - *ka - m]; /* L470: */ } } } goto L10; L480: /* **************************** Phase 2 ***************************** */ /* The logical structure of this phase is: */ /* UPDATE = .TRUE. */ /* DO I = 1, M */ /* use S(i) to update A and create a new bulge */ /* apply rotations to push all bulges KA positions upward */ /* END DO */ /* UPDATE = .FALSE. */ /* DO I = M - KA - 1, 2, -1 */ /* apply rotations to push all bulges KA positions upward */ /* END DO */ /* To avoid duplicating code, the two loops are merged. */ update = TRUE_; i__ = 0; L490: if (update) { ++i__; /* Computing MIN */ i__3 = *kb, i__4 = m - i__; kbt = min(i__3,i__4); i0 = i__ + 1; /* Computing MAX */ i__3 = 1, i__4 = i__ - *ka; i1 = max(i__3,i__4); i2 = i__ + kbt - ka1; if (i__ > m) { update = FALSE_; --i__; i0 = m + 1; if (*ka == 0) { return 0; } goto L490; } } else { i__ -= *ka; if (i__ < 2) { return 0; } } if (i__ < m - kbt) { nx = m; } else { nx = *n; } if (upper) { /* Transform A, working with the upper triangle */ if (update) { /* Form inv(S(i))**T * A * inv(S(i)) */ bii = bb[kb1 + i__ * bb_dim1]; i__3 = i__; for (j = i1; j <= i__3; ++j) { ab[j - i__ + ka1 + i__ * ab_dim1] /= bii; /* L500: */ } /* Computing MIN */ i__4 = *n, i__1 = i__ + *ka; i__3 = min(i__4,i__1); for (j = i__; j <= i__3; ++j) { ab[i__ - j + ka1 + j * ab_dim1] /= bii; /* L510: */ } i__3 = i__ + kbt; for (k = i__ + 1; k <= i__3; ++k) { i__4 = i__ + kbt; for (j = k; j <= i__4; ++j) { ab[k - j + ka1 + j * ab_dim1] = ab[k - j + ka1 + j * ab_dim1] - bb[i__ - j + kb1 + j * bb_dim1] * ab[ i__ - k + ka1 + k * ab_dim1] - bb[i__ - k + kb1 + k * bb_dim1] * ab[i__ - j + ka1 + j * ab_dim1] + ab[ka1 + i__ * ab_dim1] * bb[i__ - j + kb1 + j * bb_dim1] * bb[i__ - k + kb1 + k * bb_dim1]; /* L520: */ } /* Computing MIN */ i__1 = *n, i__2 = i__ + *ka; i__4 = min(i__1,i__2); for (j = i__ + kbt + 1; j <= i__4; ++j) { ab[k - j + ka1 + j * ab_dim1] -= bb[i__ - k + kb1 + k * bb_dim1] * ab[i__ - j + ka1 + j * ab_dim1]; /* L530: */ } /* L540: */ } i__3 = i__; for (j = i1; j <= i__3; ++j) { /* Computing MIN */ i__1 = j + *ka, i__2 = i__ + kbt; i__4 = min(i__1,i__2); for (k = i__ + 1; k <= i__4; ++k) { ab[j - k + ka1 + k * ab_dim1] -= bb[i__ - k + kb1 + k * bb_dim1] * ab[j - i__ + ka1 + i__ * ab_dim1]; /* L550: */ } /* L560: */ } if (wantx) { /* post-multiply X by inv(S(i)) */ d__1 = 1. / bii; _starpu_dscal_(&nx, &d__1, &x[i__ * x_dim1 + 1], &c__1); if (kbt > 0) { i__3 = *ldbb - 1; _starpu_dger_(&nx, &kbt, &c_b20, &x[i__ * x_dim1 + 1], &c__1, &bb[ *kb + (i__ + 1) * bb_dim1], &i__3, &x[(i__ + 1) * x_dim1 + 1], ldx); } } /* store a(i1,i) in RA1 for use in next loop over K */ ra1 = ab[i1 - i__ + ka1 + i__ * ab_dim1]; } /* Generate and apply vectors of rotations to chase all the */ /* existing bulges KA positions up toward the top of the band */ i__3 = *kb - 1; for (k = 1; k <= i__3; ++k) { if (update) { /* Determine the rotations which would annihilate the bulge */ /* which has in theory just been created */ if (i__ + k - ka1 > 0 && i__ + k < m) { /* generate rotation to annihilate a(i+k-ka-1,i) */ _starpu_dlartg_(&ab[k + 1 + i__ * ab_dim1], &ra1, &work[*n + i__ + k - *ka], &work[i__ + k - *ka], &ra); /* create nonzero element a(i+k-ka-1,i+k) outside the */ /* band and store it in WORK(m-kb+i+k) */ t = -bb[kb1 - k + (i__ + k) * bb_dim1] * ra1; work[m - *kb + i__ + k] = work[*n + i__ + k - *ka] * t - work[i__ + k - *ka] * ab[(i__ + k) * ab_dim1 + 1]; ab[(i__ + k) * ab_dim1 + 1] = work[i__ + k - *ka] * t + work[*n + i__ + k - *ka] * ab[(i__ + k) * ab_dim1 + 1]; ra1 = ra; } } /* Computing MAX */ i__4 = 1, i__1 = k + i0 - m + 1; j2 = i__ + k + 1 - max(i__4,i__1) * ka1; nr = (j2 + *ka - 1) / ka1; j1 = j2 - (nr - 1) * ka1; if (update) { /* Computing MIN */ i__4 = j2, i__1 = i__ - (*ka << 1) + k - 1; j2t = min(i__4,i__1); } else { j2t = j2; } nrt = (j2t + *ka - 1) / ka1; i__4 = j2t; i__1 = ka1; for (j = j1; i__1 < 0 ? j >= i__4 : j <= i__4; j += i__1) { /* create nonzero element a(j-1,j+ka) outside the band */ /* and store it in WORK(j) */ work[j] *= ab[(j + *ka - 1) * ab_dim1 + 1]; ab[(j + *ka - 1) * ab_dim1 + 1] = work[*n + j] * ab[(j + *ka - 1) * ab_dim1 + 1]; /* L570: */ } /* generate rotations in 1st set to annihilate elements which */ /* have been created outside the band */ if (nrt > 0) { _starpu_dlargv_(&nrt, &ab[(j1 + *ka) * ab_dim1 + 1], &inca, &work[j1], &ka1, &work[*n + j1], &ka1); } if (nr > 0) { /* apply rotations in 1st set from the left */ i__1 = *ka - 1; for (l = 1; l <= i__1; ++l) { _starpu_dlartv_(&nr, &ab[ka1 - l + (j1 + l) * ab_dim1], &inca, & ab[*ka - l + (j1 + l) * ab_dim1], &inca, &work[*n + j1], &work[j1], &ka1); /* L580: */ } /* apply rotations in 1st set from both sides to diagonal */ /* blocks */ _starpu_dlar2v_(&nr, &ab[ka1 + j1 * ab_dim1], &ab[ka1 + (j1 - 1) * ab_dim1], &ab[*ka + j1 * ab_dim1], &inca, &work[*n + j1], &work[j1], &ka1); } /* start applying rotations in 1st set from the right */ i__1 = *kb - k + 1; for (l = *ka - 1; l >= i__1; --l) { nrt = (j2 + l - 1) / ka1; j1t = j2 - (nrt - 1) * ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( j1t - 1) * ab_dim1], &inca, &work[*n + j1t], & work[j1t], &ka1); } /* L590: */ } if (wantx) { /* post-multiply X by product of rotations in 1st set */ i__1 = j2; i__4 = ka1; for (j = j1; i__4 < 0 ? j >= i__1 : j <= i__1; j += i__4) { _starpu_drot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + 1], &c__1, &work[*n + j], &work[j]); /* L600: */ } } /* L610: */ } if (update) { if (i2 > 0 && kbt > 0) { /* create nonzero element a(i+kbt-ka-1,i+kbt) outside the */ /* band and store it in WORK(m-kb+i+kbt) */ work[m - *kb + i__ + kbt] = -bb[kb1 - kbt + (i__ + kbt) * bb_dim1] * ra1; } } for (k = *kb; k >= 1; --k) { if (update) { /* Computing MAX */ i__3 = 2, i__4 = k + i0 - m; j2 = i__ + k + 1 - max(i__3,i__4) * ka1; } else { /* Computing MAX */ i__3 = 1, i__4 = k + i0 - m; j2 = i__ + k + 1 - max(i__3,i__4) * ka1; } /* finish applying rotations in 2nd set from the right */ for (l = *kb - k; l >= 1; --l) { nrt = (j2 + *ka + l - 1) / ka1; j1t = j2 - (nrt - 1) * ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + (j1t + *ka) * ab_dim1], &inca, &ab[ l + 1 + (j1t + *ka - 1) * ab_dim1], &inca, &work[* n + m - *kb + j1t + *ka], &work[m - *kb + j1t + * ka], &ka1); } /* L620: */ } nr = (j2 + *ka - 1) / ka1; j1 = j2 - (nr - 1) * ka1; i__3 = j2; i__4 = ka1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { work[m - *kb + j] = work[m - *kb + j + *ka]; work[*n + m - *kb + j] = work[*n + m - *kb + j + *ka]; /* L630: */ } i__4 = j2; i__3 = ka1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { /* create nonzero element a(j-1,j+ka) outside the band */ /* and store it in WORK(m-kb+j) */ work[m - *kb + j] *= ab[(j + *ka - 1) * ab_dim1 + 1]; ab[(j + *ka - 1) * ab_dim1 + 1] = work[*n + m - *kb + j] * ab[ (j + *ka - 1) * ab_dim1 + 1]; /* L640: */ } if (update) { if (i__ + k > ka1 && k <= kbt) { work[m - *kb + i__ + k - *ka] = work[m - *kb + i__ + k]; } } /* L650: */ } for (k = *kb; k >= 1; --k) { /* Computing MAX */ i__3 = 1, i__4 = k + i0 - m; j2 = i__ + k + 1 - max(i__3,i__4) * ka1; nr = (j2 + *ka - 1) / ka1; j1 = j2 - (nr - 1) * ka1; if (nr > 0) { /* generate rotations in 2nd set to annihilate elements */ /* which have been created outside the band */ _starpu_dlargv_(&nr, &ab[(j1 + *ka) * ab_dim1 + 1], &inca, &work[m - * kb + j1], &ka1, &work[*n + m - *kb + j1], &ka1); /* apply rotations in 2nd set from the left */ i__3 = *ka - 1; for (l = 1; l <= i__3; ++l) { _starpu_dlartv_(&nr, &ab[ka1 - l + (j1 + l) * ab_dim1], &inca, & ab[*ka - l + (j1 + l) * ab_dim1], &inca, &work[*n + m - *kb + j1], &work[m - *kb + j1], &ka1); /* L660: */ } /* apply rotations in 2nd set from both sides to diagonal */ /* blocks */ _starpu_dlar2v_(&nr, &ab[ka1 + j1 * ab_dim1], &ab[ka1 + (j1 - 1) * ab_dim1], &ab[*ka + j1 * ab_dim1], &inca, &work[*n + m - *kb + j1], &work[m - *kb + j1], &ka1); } /* start applying rotations in 2nd set from the right */ i__3 = *kb - k + 1; for (l = *ka - 1; l >= i__3; --l) { nrt = (j2 + l - 1) / ka1; j1t = j2 - (nrt - 1) * ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( j1t - 1) * ab_dim1], &inca, &work[*n + m - *kb + j1t], &work[m - *kb + j1t], &ka1); } /* L670: */ } if (wantx) { /* post-multiply X by product of rotations in 2nd set */ i__3 = j2; i__4 = ka1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { _starpu_drot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + 1], &c__1, &work[*n + m - *kb + j], &work[m - * kb + j]); /* L680: */ } } /* L690: */ } i__4 = *kb - 1; for (k = 1; k <= i__4; ++k) { /* Computing MAX */ i__3 = 1, i__1 = k + i0 - m + 1; j2 = i__ + k + 1 - max(i__3,i__1) * ka1; /* finish applying rotations in 1st set from the right */ for (l = *kb - k; l >= 1; --l) { nrt = (j2 + l - 1) / ka1; j1t = j2 - (nrt - 1) * ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( j1t - 1) * ab_dim1], &inca, &work[*n + j1t], & work[j1t], &ka1); } /* L700: */ } /* L710: */ } if (*kb > 1) { /* Computing MIN */ i__3 = i__ + *kb; i__4 = min(i__3,m) - (*ka << 1) - 1; for (j = 2; j <= i__4; ++j) { work[*n + j] = work[*n + j + *ka]; work[j] = work[j + *ka]; /* L720: */ } } } else { /* Transform A, working with the lower triangle */ if (update) { /* Form inv(S(i))**T * A * inv(S(i)) */ bii = bb[i__ * bb_dim1 + 1]; i__4 = i__; for (j = i1; j <= i__4; ++j) { ab[i__ - j + 1 + j * ab_dim1] /= bii; /* L730: */ } /* Computing MIN */ i__3 = *n, i__1 = i__ + *ka; i__4 = min(i__3,i__1); for (j = i__; j <= i__4; ++j) { ab[j - i__ + 1 + i__ * ab_dim1] /= bii; /* L740: */ } i__4 = i__ + kbt; for (k = i__ + 1; k <= i__4; ++k) { i__3 = i__ + kbt; for (j = k; j <= i__3; ++j) { ab[j - k + 1 + k * ab_dim1] = ab[j - k + 1 + k * ab_dim1] - bb[j - i__ + 1 + i__ * bb_dim1] * ab[k - i__ + 1 + i__ * ab_dim1] - bb[k - i__ + 1 + i__ * bb_dim1] * ab[j - i__ + 1 + i__ * ab_dim1] + ab[ i__ * ab_dim1 + 1] * bb[j - i__ + 1 + i__ * bb_dim1] * bb[k - i__ + 1 + i__ * bb_dim1]; /* L750: */ } /* Computing MIN */ i__1 = *n, i__2 = i__ + *ka; i__3 = min(i__1,i__2); for (j = i__ + kbt + 1; j <= i__3; ++j) { ab[j - k + 1 + k * ab_dim1] -= bb[k - i__ + 1 + i__ * bb_dim1] * ab[j - i__ + 1 + i__ * ab_dim1]; /* L760: */ } /* L770: */ } i__4 = i__; for (j = i1; j <= i__4; ++j) { /* Computing MIN */ i__1 = j + *ka, i__2 = i__ + kbt; i__3 = min(i__1,i__2); for (k = i__ + 1; k <= i__3; ++k) { ab[k - j + 1 + j * ab_dim1] -= bb[k - i__ + 1 + i__ * bb_dim1] * ab[i__ - j + 1 + j * ab_dim1]; /* L780: */ } /* L790: */ } if (wantx) { /* post-multiply X by inv(S(i)) */ d__1 = 1. / bii; _starpu_dscal_(&nx, &d__1, &x[i__ * x_dim1 + 1], &c__1); if (kbt > 0) { _starpu_dger_(&nx, &kbt, &c_b20, &x[i__ * x_dim1 + 1], &c__1, &bb[ i__ * bb_dim1 + 2], &c__1, &x[(i__ + 1) * x_dim1 + 1], ldx); } } /* store a(i,i1) in RA1 for use in next loop over K */ ra1 = ab[i__ - i1 + 1 + i1 * ab_dim1]; } /* Generate and apply vectors of rotations to chase all the */ /* existing bulges KA positions up toward the top of the band */ i__4 = *kb - 1; for (k = 1; k <= i__4; ++k) { if (update) { /* Determine the rotations which would annihilate the bulge */ /* which has in theory just been created */ if (i__ + k - ka1 > 0 && i__ + k < m) { /* generate rotation to annihilate a(i,i+k-ka-1) */ _starpu_dlartg_(&ab[ka1 - k + (i__ + k - *ka) * ab_dim1], &ra1, & work[*n + i__ + k - *ka], &work[i__ + k - *ka], & ra); /* create nonzero element a(i+k,i+k-ka-1) outside the */ /* band and store it in WORK(m-kb+i+k) */ t = -bb[k + 1 + i__ * bb_dim1] * ra1; work[m - *kb + i__ + k] = work[*n + i__ + k - *ka] * t - work[i__ + k - *ka] * ab[ka1 + (i__ + k - *ka) * ab_dim1]; ab[ka1 + (i__ + k - *ka) * ab_dim1] = work[i__ + k - *ka] * t + work[*n + i__ + k - *ka] * ab[ka1 + (i__ + k - *ka) * ab_dim1]; ra1 = ra; } } /* Computing MAX */ i__3 = 1, i__1 = k + i0 - m + 1; j2 = i__ + k + 1 - max(i__3,i__1) * ka1; nr = (j2 + *ka - 1) / ka1; j1 = j2 - (nr - 1) * ka1; if (update) { /* Computing MIN */ i__3 = j2, i__1 = i__ - (*ka << 1) + k - 1; j2t = min(i__3,i__1); } else { j2t = j2; } nrt = (j2t + *ka - 1) / ka1; i__3 = j2t; i__1 = ka1; for (j = j1; i__1 < 0 ? j >= i__3 : j <= i__3; j += i__1) { /* create nonzero element a(j+ka,j-1) outside the band */ /* and store it in WORK(j) */ work[j] *= ab[ka1 + (j - 1) * ab_dim1]; ab[ka1 + (j - 1) * ab_dim1] = work[*n + j] * ab[ka1 + (j - 1) * ab_dim1]; /* L800: */ } /* generate rotations in 1st set to annihilate elements which */ /* have been created outside the band */ if (nrt > 0) { _starpu_dlargv_(&nrt, &ab[ka1 + j1 * ab_dim1], &inca, &work[j1], &ka1, &work[*n + j1], &ka1); } if (nr > 0) { /* apply rotations in 1st set from the right */ i__1 = *ka - 1; for (l = 1; l <= i__1; ++l) { _starpu_dlartv_(&nr, &ab[l + 1 + j1 * ab_dim1], &inca, &ab[l + 2 + (j1 - 1) * ab_dim1], &inca, &work[*n + j1], & work[j1], &ka1); /* L810: */ } /* apply rotations in 1st set from both sides to diagonal */ /* blocks */ _starpu_dlar2v_(&nr, &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 2], &inca, &work[*n + j1] , &work[j1], &ka1); } /* start applying rotations in 1st set from the left */ i__1 = *kb - k + 1; for (l = *ka - 1; l >= i__1; --l) { nrt = (j2 + l - 1) / ka1; j1t = j2 - (nrt - 1) * ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] , &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], &inca, &work[*n + j1t], &work[j1t], &ka1); } /* L820: */ } if (wantx) { /* post-multiply X by product of rotations in 1st set */ i__1 = j2; i__3 = ka1; for (j = j1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { _starpu_drot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + 1], &c__1, &work[*n + j], &work[j]); /* L830: */ } } /* L840: */ } if (update) { if (i2 > 0 && kbt > 0) { /* create nonzero element a(i+kbt,i+kbt-ka-1) outside the */ /* band and store it in WORK(m-kb+i+kbt) */ work[m - *kb + i__ + kbt] = -bb[kbt + 1 + i__ * bb_dim1] * ra1; } } for (k = *kb; k >= 1; --k) { if (update) { /* Computing MAX */ i__4 = 2, i__3 = k + i0 - m; j2 = i__ + k + 1 - max(i__4,i__3) * ka1; } else { /* Computing MAX */ i__4 = 1, i__3 = k + i0 - m; j2 = i__ + k + 1 - max(i__4,i__3) * ka1; } /* finish applying rotations in 2nd set from the left */ for (l = *kb - k; l >= 1; --l) { nrt = (j2 + *ka + l - 1) / ka1; j1t = j2 - (nrt - 1) * ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j1t + l - 1) * ab_dim1], &inca, &ab[ka1 - l + (j1t + l - 1) * ab_dim1], & inca, &work[*n + m - *kb + j1t + *ka], &work[m - * kb + j1t + *ka], &ka1); } /* L850: */ } nr = (j2 + *ka - 1) / ka1; j1 = j2 - (nr - 1) * ka1; i__4 = j2; i__3 = ka1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { work[m - *kb + j] = work[m - *kb + j + *ka]; work[*n + m - *kb + j] = work[*n + m - *kb + j + *ka]; /* L860: */ } i__3 = j2; i__4 = ka1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j+ka,j-1) outside the band */ /* and store it in WORK(m-kb+j) */ work[m - *kb + j] *= ab[ka1 + (j - 1) * ab_dim1]; ab[ka1 + (j - 1) * ab_dim1] = work[*n + m - *kb + j] * ab[ka1 + (j - 1) * ab_dim1]; /* L870: */ } if (update) { if (i__ + k > ka1 && k <= kbt) { work[m - *kb + i__ + k - *ka] = work[m - *kb + i__ + k]; } } /* L880: */ } for (k = *kb; k >= 1; --k) { /* Computing MAX */ i__4 = 1, i__3 = k + i0 - m; j2 = i__ + k + 1 - max(i__4,i__3) * ka1; nr = (j2 + *ka - 1) / ka1; j1 = j2 - (nr - 1) * ka1; if (nr > 0) { /* generate rotations in 2nd set to annihilate elements */ /* which have been created outside the band */ _starpu_dlargv_(&nr, &ab[ka1 + j1 * ab_dim1], &inca, &work[m - *kb + j1], &ka1, &work[*n + m - *kb + j1], &ka1); /* apply rotations in 2nd set from the right */ i__4 = *ka - 1; for (l = 1; l <= i__4; ++l) { _starpu_dlartv_(&nr, &ab[l + 1 + j1 * ab_dim1], &inca, &ab[l + 2 + (j1 - 1) * ab_dim1], &inca, &work[*n + m - *kb + j1], &work[m - *kb + j1], &ka1); /* L890: */ } /* apply rotations in 2nd set from both sides to diagonal */ /* blocks */ _starpu_dlar2v_(&nr, &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 2], &inca, &work[*n + m - *kb + j1], &work[m - *kb + j1], &ka1); } /* start applying rotations in 2nd set from the left */ i__4 = *kb - k + 1; for (l = *ka - 1; l >= i__4; --l) { nrt = (j2 + l - 1) / ka1; j1t = j2 - (nrt - 1) * ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] , &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], &inca, &work[*n + m - *kb + j1t], &work[m - *kb + j1t], &ka1); } /* L900: */ } if (wantx) { /* post-multiply X by product of rotations in 2nd set */ i__4 = j2; i__3 = ka1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { _starpu_drot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + 1], &c__1, &work[*n + m - *kb + j], &work[m - * kb + j]); /* L910: */ } } /* L920: */ } i__3 = *kb - 1; for (k = 1; k <= i__3; ++k) { /* Computing MAX */ i__4 = 1, i__1 = k + i0 - m + 1; j2 = i__ + k + 1 - max(i__4,i__1) * ka1; /* finish applying rotations in 1st set from the left */ for (l = *kb - k; l >= 1; --l) { nrt = (j2 + l - 1) / ka1; j1t = j2 - (nrt - 1) * ka1; if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] , &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], &inca, &work[*n + j1t], &work[j1t], &ka1); } /* L930: */ } /* L940: */ } if (*kb > 1) { /* Computing MIN */ i__4 = i__ + *kb; i__3 = min(i__4,m) - (*ka << 1) - 1; for (j = 2; j <= i__3; ++j) { work[*n + j] = work[*n + j + *ka]; work[j] = work[j + *ka]; /* L950: */ } } } goto L490; /* End of DSBGST */ } /* _starpu_dsbgst_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsbgv.c000066400000000000000000000176761507764646700205220ustar00rootroot00000000000000/* dsbgv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsbgv_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1; /* Local variables */ integer inde; char vect[1]; extern logical _starpu_lsame_(char *, char *); integer iinfo; logical upper, wantz; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpbstf_( char *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dsbtrd_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsbgst_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indwrk; extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBGV computes all the eigenvalues, and optionally, the eigenvectors */ /* of a real generalized symmetric-definite banded eigenproblem, of */ /* the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric */ /* and banded, and B is also positive definite. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangles of A and B are stored; */ /* = 'L': Lower triangles of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* KA (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ /* KB (input) INTEGER */ /* The number of superdiagonals of the matrix B if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first ka+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). */ /* On exit, the contents of AB are destroyed. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KA+1. */ /* BB (input/output) DOUBLE PRECISION array, dimension (LDBB, N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix B, stored in the first kb+1 rows of the array. The */ /* j-th column of B is stored in the j-th column of the array BB */ /* as follows: */ /* if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; */ /* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). */ /* On exit, the factor S from the split Cholesky factorization */ /* B = S**T*S, as returned by DPBSTF. */ /* LDBB (input) INTEGER */ /* The leading dimension of the array BB. LDBB >= KB+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ /* eigenvectors, with the i-th column of Z holding the */ /* eigenvector associated with W(i). The eigenvectors are */ /* normalized so that Z**T*B*Z = I. */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= N. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is: */ /* <= N: the algorithm failed to converge: */ /* i off-diagonal elements of an intermediate */ /* tridiagonal form did not converge to zero; */ /* > N: if INFO = N + i, for 1 <= i <= N, then DPBSTF */ /* returned INFO = i: B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; bb_dim1 = *ldbb; bb_offset = 1 + bb_dim1; bb -= bb_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); upper = _starpu_lsame_(uplo, "U"); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (upper || _starpu_lsame_(uplo, "L"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ka < 0) { *info = -4; } else if (*kb < 0 || *kb > *ka) { *info = -5; } else if (*ldab < *ka + 1) { *info = -7; } else if (*ldbb < *kb + 1) { *info = -9; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -12; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSBGV ", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a split Cholesky factorization of B. */ _starpu_dpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem. */ inde = 1; indwrk = inde + *n; _starpu_dsbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, &z__[z_offset], ldz, &work[indwrk], &iinfo) ; /* Reduce to tridiagonal form. */ if (wantz) { *(unsigned char *)vect = 'U'; } else { *(unsigned char *)vect = 'N'; } _starpu_dsbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &work[inde], &z__[ z_offset], ldz, &work[indwrk], &iinfo); /* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. */ if (! wantz) { _starpu_dsterf_(n, &w[1], &work[inde], info); } else { _starpu_dsteqr_(jobz, n, &w[1], &work[inde], &z__[z_offset], ldz, &work[ indwrk], info); } return 0; /* End of DSBGV */ } /* _starpu_dsbgv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsbgvd.c000066400000000000000000000264651507764646700206620ustar00rootroot00000000000000/* dsbgvd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b12 = 1.; static doublereal c_b13 = 0.; /* Subroutine */ int _starpu_dsbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal *bb, integer * ldbb, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1; /* Local variables */ integer inde; char vect[1]; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); integer iinfo, lwmin; logical upper, wantz; integer indwk2, llwrk2; extern /* Subroutine */ int _starpu_dstedc_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dpbstf_(char *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dsbtrd_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsbgst_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indwrk, liwmin; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBGVD computes all the eigenvalues, and optionally, the eigenvectors */ /* of a real generalized symmetric-definite banded eigenproblem, of the */ /* form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric and */ /* banded, and B is also positive definite. If eigenvectors are */ /* desired, it uses a divide and conquer algorithm. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangles of A and B are stored; */ /* = 'L': Lower triangles of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* KA (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ /* KB (input) INTEGER */ /* The number of superdiagonals of the matrix B if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first ka+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). */ /* On exit, the contents of AB are destroyed. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KA+1. */ /* BB (input/output) DOUBLE PRECISION array, dimension (LDBB, N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix B, stored in the first kb+1 rows of the array. The */ /* j-th column of B is stored in the j-th column of the array BB */ /* as follows: */ /* if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; */ /* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). */ /* On exit, the factor S from the split Cholesky factorization */ /* B = S**T*S, as returned by DPBSTF. */ /* LDBB (input) INTEGER */ /* The leading dimension of the array BB. LDBB >= KB+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ /* eigenvectors, with the i-th column of Z holding the */ /* eigenvector associated with W(i). The eigenvectors are */ /* normalized so Z**T*B*Z = I. */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If N <= 1, LWORK >= 1. */ /* If JOBZ = 'N' and N > 1, LWORK >= 3*N. */ /* If JOBZ = 'V' and N > 1, LWORK >= 1 + 5*N + 2*N**2. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal sizes of the WORK and IWORK */ /* arrays, returns these values as the first entries of the WORK */ /* and IWORK arrays, and no error message related to LWORK or */ /* LIWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If JOBZ = 'N' or N <= 1, LIWORK >= 1. */ /* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal sizes of the WORK and */ /* IWORK arrays, returns these values as the first entries of */ /* the WORK and IWORK arrays, and no error message related to */ /* LWORK or LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is: */ /* <= N: the algorithm failed to converge: */ /* i off-diagonal elements of an intermediate */ /* tridiagonal form did not converge to zero; */ /* > N: if INFO = N + i, for 1 <= i <= N, then DPBSTF */ /* returned INFO = i: B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; bb_dim1 = *ldbb; bb_offset = 1 + bb_dim1; bb -= bb_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); upper = _starpu_lsame_(uplo, "U"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (*n <= 1) { liwmin = 1; lwmin = 1; } else if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 5 + 1 + (i__1 * i__1 << 1); } else { liwmin = 1; lwmin = *n << 1; } if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (upper || _starpu_lsame_(uplo, "L"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ka < 0) { *info = -4; } else if (*kb < 0 || *kb > *ka) { *info = -5; } else if (*ldab < *ka + 1) { *info = -7; } else if (*ldbb < *kb + 1) { *info = -9; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -12; } if (*info == 0) { work[1] = (doublereal) lwmin; iwork[1] = liwmin; if (*lwork < lwmin && ! lquery) { *info = -14; } else if (*liwork < liwmin && ! lquery) { *info = -16; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSBGVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a split Cholesky factorization of B. */ _starpu_dpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem. */ inde = 1; indwrk = inde + *n; indwk2 = indwrk + *n * *n; llwrk2 = *lwork - indwk2 + 1; _starpu_dsbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, &z__[z_offset], ldz, &work[indwrk], &iinfo) ; /* Reduce to tridiagonal form. */ if (wantz) { *(unsigned char *)vect = 'U'; } else { *(unsigned char *)vect = 'N'; } _starpu_dsbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &work[inde], &z__[ z_offset], ldz, &work[indwrk], &iinfo); /* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. */ if (! wantz) { _starpu_dsterf_(n, &w[1], &work[inde], info); } else { _starpu_dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & llwrk2, &iwork[1], liwork, info); _starpu_dgemm_("N", "N", n, n, n, &c_b12, &z__[z_offset], ldz, &work[indwrk], n, &c_b13, &work[indwk2], n); _starpu_dlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSBGVD */ } /* _starpu_dsbgvd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsbgvx.c000066400000000000000000000371171507764646700207020ustar00rootroot00000000000000/* dsbgvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b25 = 1.; static doublereal c_b27 = 0.; /* Subroutine */ int _starpu_dsbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, doublereal *ab, integer *ldab, doublereal * bb, integer *ldbb, doublereal *q, integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, bb_dim1, bb_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; /* Local variables */ integer i__, j, jj; doublereal tmp1; integer indd, inde; char vect[1]; logical test; integer itmp1, indee; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iinfo; char order[1]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical upper, wantz, alleig, indeig; integer indibl; logical valeig; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dpbstf_(char *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dsbtrd_( char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer indisp; extern /* Subroutine */ int _starpu_dsbgst_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *); integer indiwo; extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *), _starpu_dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer indwrk; extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer nsplit; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBGVX computes selected eigenvalues, and optionally, eigenvectors */ /* of a real generalized symmetric-definite banded eigenproblem, of */ /* the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric */ /* and banded, and B is also positive definite. Eigenvalues and */ /* eigenvectors can be selected by specifying either all eigenvalues, */ /* a range of values or a range of indices for the desired eigenvalues. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangles of A and B are stored; */ /* = 'L': Lower triangles of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* KA (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ /* KB (input) INTEGER */ /* The number of superdiagonals of the matrix B if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first ka+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). */ /* On exit, the contents of AB are destroyed. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KA+1. */ /* BB (input/output) DOUBLE PRECISION array, dimension (LDBB, N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix B, stored in the first kb+1 rows of the array. The */ /* j-th column of B is stored in the j-th column of the array BB */ /* as follows: */ /* if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; */ /* if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). */ /* On exit, the factor S from the split Cholesky factorization */ /* B = S**T*S, as returned by DPBSTF. */ /* LDBB (input) INTEGER */ /* The leading dimension of the array BB. LDBB >= KB+1. */ /* Q (output) DOUBLE PRECISION array, dimension (LDQ, N) */ /* If JOBZ = 'V', the n-by-n matrix used in the reduction of */ /* A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, */ /* and consequently C to tridiagonal form. */ /* If JOBZ = 'N', the array Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. If JOBZ = 'N', */ /* LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less than */ /* or equal to zero, then EPS*|T| will be used in its place, */ /* where |T| is the 1-norm of the tridiagonal matrix obtained */ /* by reducing A to tridiagonal form. */ /* Eigenvalues will be computed most accurately when ABSTOL is */ /* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ /* If this routine returns with INFO>0, indicating that some */ /* eigenvectors did not converge, try setting ABSTOL to */ /* 2*DLAMCH('S'). */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ /* eigenvectors, with the i-th column of Z holding the */ /* eigenvector associated with W(i). The eigenvectors are */ /* normalized so Z**T*B*Z = I. */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (7*N) */ /* IWORK (workspace/output) INTEGER array, dimension (5*N) */ /* IFAIL (output) INTEGER array, dimension (M) */ /* If JOBZ = 'V', then if INFO = 0, the first M elements of */ /* IFAIL are zero. If INFO > 0, then IFAIL contains the */ /* indices of the eigenvalues that failed to converge. */ /* If JOBZ = 'N', then IFAIL is not referenced. */ /* INFO (output) INTEGER */ /* = 0 : successful exit */ /* < 0 : if INFO = -i, the i-th argument had an illegal value */ /* <= N: if INFO = i, then i eigenvectors failed to converge. */ /* Their indices are stored in IFAIL. */ /* > N : DPBSTF returned an error code; i.e., */ /* if INFO = N + i, for 1 <= i <= N, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; bb_dim1 = *ldbb; bb_offset = 1 + bb_dim1; bb -= bb_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); upper = _starpu_lsame_(uplo, "U"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (upper || _starpu_lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*ka < 0) { *info = -5; } else if (*kb < 0 || *kb > *ka) { *info = -6; } else if (*ldab < *ka + 1) { *info = -8; } else if (*ldbb < *kb + 1) { *info = -10; } else if (*ldq < 1 || wantz && *ldq < *n) { *info = -12; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -14; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -15; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -16; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -21; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSBGVX", &i__1); return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } /* Form a split Cholesky factorization of B. */ _starpu_dpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem. */ _starpu_dsbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, &q[q_offset], ldq, &work[1], &iinfo); /* Reduce symmetric band matrix to tridiagonal form. */ indd = 1; inde = indd + *n; indwrk = inde + *n; if (wantz) { *(unsigned char *)vect = 'U'; } else { *(unsigned char *)vect = 'N'; } _starpu_dsbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &work[indd], &work[inde], &q[q_offset], ldq, &work[indwrk], &iinfo); /* If all eigenvalues are desired and ABSTOL is less than or equal */ /* to zero, then call DSTERF or SSTEQR. If this fails for some */ /* eigenvalue, then try DSTEBZ. */ test = FALSE_; if (indeig) { if (*il == 1 && *iu == *n) { test = TRUE_; } } if ((alleig || test) && *abstol <= 0.) { _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); indee = indwrk + (*n << 1); i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); if (! wantz) { _starpu_dsterf_(n, &w[1], &work[indee], info); } else { _starpu_dlacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); _starpu_dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ indwrk], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L10: */ } } } if (*info == 0) { *m = *n; goto L30; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, */ /* call DSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indibl = 1; indisp = indibl + *n; indiwo = indisp + *n; _starpu_dstebz_(range, order, n, vl, vu, il, iu, abstol, &work[indd], &work[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[indwrk], &iwork[indiwo], info); if (wantz) { _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & ifail[1], info); /* Apply transformation matrix used in reduction to tridiagonal */ /* form to eigenvectors returned by DSTEIN. */ i__1 = *m; for (j = 1; j <= i__1; ++j) { _starpu_dcopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); _starpu_dgemv_("N", n, n, &c_b25, &q[q_offset], ldq, &work[1], &c__1, & c_b27, &z__[j * z_dim1 + 1], &c__1); /* L20: */ } } L30: /* If eigenvalues are not in order, then sort them, along with */ /* eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L40: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], &c__1); if (*info != 0) { itmp1 = ifail[i__]; ifail[i__] = ifail[j]; ifail[j] = itmp1; } } /* L50: */ } } return 0; /* End of DSBGVX */ } /* _starpu_dsbgvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsbtrd.c000066400000000000000000000452331507764646700206650ustar00rootroot00000000000000/* dsbtrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b9 = 0.; static doublereal c_b10 = 1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dsbtrd_(char *vect, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *d__, doublereal *e, doublereal *q, integer *ldq, doublereal *work, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ integer i__, j, k, l, i2, j1, j2, nq, nr, kd1, ibl, iqb, kdn, jin, nrt, kdm1, inca, jend, lend, jinc, incx, last; doublereal temp; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer j1end, j1inc, iqend; extern logical _starpu_lsame_(char *, char *); logical initq, wantq, upper; extern /* Subroutine */ int _starpu_dlar2v_(integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iqaend; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *, integer *), _starpu_dlargv_( integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlartv_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBTRD reduces a real symmetric band matrix A to symmetric */ /* tridiagonal form T by an orthogonal similarity transformation: */ /* Q**T * A * Q = T. */ /* Arguments */ /* ========= */ /* VECT (input) CHARACTER*1 */ /* = 'N': do not form Q; */ /* = 'V': form Q; */ /* = 'U': update a matrix X, by forming X*Q. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* On exit, the diagonal elements of AB are overwritten by the */ /* diagonal elements of the tridiagonal matrix T; if KD > 0, the */ /* elements on the first superdiagonal (if UPLO = 'U') or the */ /* first subdiagonal (if UPLO = 'L') are overwritten by the */ /* off-diagonal elements of T; the rest of AB is overwritten by */ /* values generated during the reduction. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* D (output) DOUBLE PRECISION array, dimension (N) */ /* The diagonal elements of the tridiagonal matrix T. */ /* E (output) DOUBLE PRECISION array, dimension (N-1) */ /* The off-diagonal elements of the tridiagonal matrix T: */ /* E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* On entry, if VECT = 'U', then Q must contain an N-by-N */ /* matrix X; if VECT = 'N' or 'V', then Q need not be set. */ /* On exit: */ /* if VECT = 'V', Q contains the N-by-N orthogonal matrix Q; */ /* if VECT = 'U', Q contains the product X*Q; */ /* if VECT = 'N', the array Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. */ /* LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* Modified by Linda Kaufman, Bell Labs. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --d__; --e; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --work; /* Function Body */ initq = _starpu_lsame_(vect, "V"); wantq = initq || _starpu_lsame_(vect, "U"); upper = _starpu_lsame_(uplo, "U"); kd1 = *kd + 1; kdm1 = *kd - 1; incx = *ldab - 1; iqend = 1; *info = 0; if (! wantq && ! _starpu_lsame_(vect, "N")) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kd < 0) { *info = -4; } else if (*ldab < kd1) { *info = -6; } else if (*ldq < max(1,*n) && wantq) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSBTRD", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Initialize Q to the unit matrix, if needed */ if (initq) { _starpu_dlaset_("Full", n, n, &c_b9, &c_b10, &q[q_offset], ldq); } /* Wherever possible, plane rotations are generated and applied in */ /* vector operations of length NR over the index set J1:J2:KD1. */ /* The cosines and sines of the plane rotations are stored in the */ /* arrays D and WORK. */ inca = kd1 * *ldab; /* Computing MIN */ i__1 = *n - 1; kdn = min(i__1,*kd); if (upper) { if (*kd > 1) { /* Reduce to tridiagonal form, working with upper triangle */ nr = 0; j1 = kdn + 2; j2 = 1; i__1 = *n - 2; for (i__ = 1; i__ <= i__1; ++i__) { /* Reduce i-th row of matrix to tridiagonal form */ for (k = kdn + 1; k >= 2; --k) { j1 += kdn; j2 += kdn; if (nr > 0) { /* generate plane rotations to annihilate nonzero */ /* elements which have been created outside the band */ _starpu_dlargv_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &inca, & work[j1], &kd1, &d__[j1], &kd1); /* apply rotations from the right */ /* Dependent on the the number of diagonals either */ /* DLARTV or DROT is used */ if (nr >= (*kd << 1) - 1) { i__2 = *kd - 1; for (l = 1; l <= i__2; ++l) { _starpu_dlartv_(&nr, &ab[l + 1 + (j1 - 1) * ab_dim1], &inca, &ab[l + j1 * ab_dim1], &inca, & d__[j1], &work[j1], &kd1); /* L10: */ } } else { jend = j1 + (nr - 1) * kd1; i__2 = jend; i__3 = kd1; for (jinc = j1; i__3 < 0 ? jinc >= i__2 : jinc <= i__2; jinc += i__3) { _starpu_drot_(&kdm1, &ab[(jinc - 1) * ab_dim1 + 2], & c__1, &ab[jinc * ab_dim1 + 1], &c__1, &d__[jinc], &work[jinc]); /* L20: */ } } } if (k > 2) { if (k <= *n - i__ + 1) { /* generate plane rotation to annihilate a(i,i+k-1) */ /* within the band */ _starpu_dlartg_(&ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1] , &ab[*kd - k + 2 + (i__ + k - 1) * ab_dim1], &d__[i__ + k - 1], &work[i__ + k - 1], &temp); ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1] = temp; /* apply rotation from the right */ i__3 = k - 3; _starpu_drot_(&i__3, &ab[*kd - k + 4 + (i__ + k - 2) * ab_dim1], &c__1, &ab[*kd - k + 3 + (i__ + k - 1) * ab_dim1], &c__1, &d__[i__ + k - 1], &work[i__ + k - 1]); } ++nr; j1 = j1 - kdn - 1; } /* apply plane rotations from both sides to diagonal */ /* blocks */ if (nr > 0) { _starpu_dlar2v_(&nr, &ab[kd1 + (j1 - 1) * ab_dim1], &ab[kd1 + j1 * ab_dim1], &ab[*kd + j1 * ab_dim1], &inca, &d__[j1], &work[j1], &kd1); } /* apply plane rotations from the left */ if (nr > 0) { if ((*kd << 1) - 1 < nr) { /* Dependent on the the number of diagonals either */ /* DLARTV or DROT is used */ i__3 = *kd - 1; for (l = 1; l <= i__3; ++l) { if (j2 + l > *n) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[*kd - l + (j1 + l) * ab_dim1], &inca, &ab[*kd - l + 1 + (j1 + l) * ab_dim1], &inca, & d__[j1], &work[j1], &kd1); } /* L30: */ } } else { j1end = j1 + kd1 * (nr - 2); if (j1end >= j1) { i__3 = j1end; i__2 = kd1; for (jin = j1; i__2 < 0 ? jin >= i__3 : jin <= i__3; jin += i__2) { i__4 = *kd - 1; _starpu_drot_(&i__4, &ab[*kd - 1 + (jin + 1) * ab_dim1], &incx, &ab[*kd + (jin + 1) * ab_dim1], &incx, &d__[jin], & work[jin]); /* L40: */ } } /* Computing MIN */ i__2 = kdm1, i__3 = *n - j2; lend = min(i__2,i__3); last = j1end + kd1; if (lend > 0) { _starpu_drot_(&lend, &ab[*kd - 1 + (last + 1) * ab_dim1], &incx, &ab[*kd + (last + 1) * ab_dim1], &incx, &d__[last], &work[ last]); } } } if (wantq) { /* accumulate product of plane rotations in Q */ if (initq) { /* take advantage of the fact that Q was */ /* initially the Identity matrix */ iqend = max(iqend,j2); /* Computing MAX */ i__2 = 0, i__3 = k - 3; i2 = max(i__2,i__3); iqaend = i__ * *kd + 1; if (k == 2) { iqaend += *kd; } iqaend = min(iqaend,iqend); i__2 = j2; i__3 = kd1; for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { ibl = i__ - i2 / kdm1; ++i2; /* Computing MAX */ i__4 = 1, i__5 = j - ibl; iqb = max(i__4,i__5); nq = iqaend + 1 - iqb; /* Computing MIN */ i__4 = iqaend + *kd; iqaend = min(i__4,iqend); _starpu_drot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, &q[iqb + j * q_dim1], &c__1, &d__[j], &work[j]); /* L50: */ } } else { i__3 = j2; i__2 = kd1; for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { _starpu_drot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ j * q_dim1 + 1], &c__1, &d__[j], & work[j]); /* L60: */ } } } if (j2 + kdn > *n) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 = j2 - kdn - 1; } i__2 = j2; i__3 = kd1; for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { /* create nonzero element a(j-1,j+kd) outside the band */ /* and store it in WORK */ work[j + *kd] = work[j] * ab[(j + *kd) * ab_dim1 + 1]; ab[(j + *kd) * ab_dim1 + 1] = d__[j] * ab[(j + *kd) * ab_dim1 + 1]; /* L70: */ } /* L80: */ } /* L90: */ } } if (*kd > 0) { /* copy off-diagonal elements to E */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = ab[*kd + (i__ + 1) * ab_dim1]; /* L100: */ } } else { /* set E to zero if original matrix was diagonal */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = 0.; /* L110: */ } } /* copy diagonal elements to D */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab[kd1 + i__ * ab_dim1]; /* L120: */ } } else { if (*kd > 1) { /* Reduce to tridiagonal form, working with lower triangle */ nr = 0; j1 = kdn + 2; j2 = 1; i__1 = *n - 2; for (i__ = 1; i__ <= i__1; ++i__) { /* Reduce i-th column of matrix to tridiagonal form */ for (k = kdn + 1; k >= 2; --k) { j1 += kdn; j2 += kdn; if (nr > 0) { /* generate plane rotations to annihilate nonzero */ /* elements which have been created outside the band */ _starpu_dlargv_(&nr, &ab[kd1 + (j1 - kd1) * ab_dim1], &inca, & work[j1], &kd1, &d__[j1], &kd1); /* apply plane rotations from one side */ /* Dependent on the the number of diagonals either */ /* DLARTV or DROT is used */ if (nr > (*kd << 1) - 1) { i__3 = *kd - 1; for (l = 1; l <= i__3; ++l) { _starpu_dlartv_(&nr, &ab[kd1 - l + (j1 - kd1 + l) * ab_dim1], &inca, &ab[kd1 - l + 1 + ( j1 - kd1 + l) * ab_dim1], &inca, &d__[ j1], &work[j1], &kd1); /* L130: */ } } else { jend = j1 + kd1 * (nr - 1); i__3 = jend; i__2 = kd1; for (jinc = j1; i__2 < 0 ? jinc >= i__3 : jinc <= i__3; jinc += i__2) { _starpu_drot_(&kdm1, &ab[*kd + (jinc - *kd) * ab_dim1] , &incx, &ab[kd1 + (jinc - *kd) * ab_dim1], &incx, &d__[jinc], &work[ jinc]); /* L140: */ } } } if (k > 2) { if (k <= *n - i__ + 1) { /* generate plane rotation to annihilate a(i+k-1,i) */ /* within the band */ _starpu_dlartg_(&ab[k - 1 + i__ * ab_dim1], &ab[k + i__ * ab_dim1], &d__[i__ + k - 1], &work[i__ + k - 1], &temp); ab[k - 1 + i__ * ab_dim1] = temp; /* apply rotation from the left */ i__2 = k - 3; i__3 = *ldab - 1; i__4 = *ldab - 1; _starpu_drot_(&i__2, &ab[k - 2 + (i__ + 1) * ab_dim1], & i__3, &ab[k - 1 + (i__ + 1) * ab_dim1], & i__4, &d__[i__ + k - 1], &work[i__ + k - 1]); } ++nr; j1 = j1 - kdn - 1; } /* apply plane rotations from both sides to diagonal */ /* blocks */ if (nr > 0) { _starpu_dlar2v_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 2], & inca, &d__[j1], &work[j1], &kd1); } /* apply plane rotations from the right */ /* Dependent on the the number of diagonals either */ /* DLARTV or DROT is used */ if (nr > 0) { if (nr > (*kd << 1) - 1) { i__2 = *kd - 1; for (l = 1; l <= i__2; ++l) { if (j2 + l > *n) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { _starpu_dlartv_(&nrt, &ab[l + 2 + (j1 - 1) * ab_dim1], &inca, &ab[l + 1 + j1 * ab_dim1], &inca, &d__[j1], &work[ j1], &kd1); } /* L150: */ } } else { j1end = j1 + kd1 * (nr - 2); if (j1end >= j1) { i__2 = j1end; i__3 = kd1; for (j1inc = j1; i__3 < 0 ? j1inc >= i__2 : j1inc <= i__2; j1inc += i__3) { _starpu_drot_(&kdm1, &ab[(j1inc - 1) * ab_dim1 + 3], &c__1, &ab[j1inc * ab_dim1 + 2], &c__1, &d__[j1inc], &work[ j1inc]); /* L160: */ } } /* Computing MIN */ i__3 = kdm1, i__2 = *n - j2; lend = min(i__3,i__2); last = j1end + kd1; if (lend > 0) { _starpu_drot_(&lend, &ab[(last - 1) * ab_dim1 + 3], & c__1, &ab[last * ab_dim1 + 2], &c__1, &d__[last], &work[last]); } } } if (wantq) { /* accumulate product of plane rotations in Q */ if (initq) { /* take advantage of the fact that Q was */ /* initially the Identity matrix */ iqend = max(iqend,j2); /* Computing MAX */ i__3 = 0, i__2 = k - 3; i2 = max(i__3,i__2); iqaend = i__ * *kd + 1; if (k == 2) { iqaend += *kd; } iqaend = min(iqaend,iqend); i__3 = j2; i__2 = kd1; for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { ibl = i__ - i2 / kdm1; ++i2; /* Computing MAX */ i__4 = 1, i__5 = j - ibl; iqb = max(i__4,i__5); nq = iqaend + 1 - iqb; /* Computing MIN */ i__4 = iqaend + *kd; iqaend = min(i__4,iqend); _starpu_drot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, &q[iqb + j * q_dim1], &c__1, &d__[j], &work[j]); /* L170: */ } } else { i__2 = j2; i__3 = kd1; for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { _starpu_drot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ j * q_dim1 + 1], &c__1, &d__[j], & work[j]); /* L180: */ } } } if (j2 + kdn > *n) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 = j2 - kdn - 1; } i__3 = j2; i__2 = kd1; for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { /* create nonzero element a(j+kd,j-1) outside the */ /* band and store it in WORK */ work[j + *kd] = work[j] * ab[kd1 + j * ab_dim1]; ab[kd1 + j * ab_dim1] = d__[j] * ab[kd1 + j * ab_dim1] ; /* L190: */ } /* L200: */ } /* L210: */ } } if (*kd > 0) { /* copy off-diagonal elements to E */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = ab[i__ * ab_dim1 + 2]; /* L220: */ } } else { /* set E to zero if original matrix was diagonal */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = 0.; /* L230: */ } } /* copy diagonal elements to D */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab[i__ * ab_dim1 + 1]; /* L240: */ } } return 0; /* End of DSBTRD */ } /* _starpu_dsbtrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsfrk.c000066400000000000000000000357121507764646700205150ustar00rootroot00000000000000/* dsfrk.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsfrk_(char *transr, char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, doublereal *c__) { /* System generated locals */ integer a_dim1, a_offset, i__1; /* Local variables */ integer j, n1, n2, nk, info; logical normaltransr; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); integer nrowa; logical lower; extern /* Subroutine */ int _starpu_dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); logical nisodd, notrans; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Julien Langou of the Univ. of Colorado Denver -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Level 3 BLAS like routine for C in RFP Format. */ /* DSFRK performs one of the symmetric rank--k operations */ /* C := alpha*A*A' + beta*C, */ /* or */ /* C := alpha*A'*A + beta*C, */ /* where alpha and beta are real scalars, C is an n--by--n symmetric */ /* matrix and A is an n--by--k matrix in the first case and a k--by--n */ /* matrix in the second case. */ /* Arguments */ /* ========== */ /* TRANSR (input) CHARACTER */ /* = 'N': The Normal Form of RFP A is stored; */ /* = 'T': The Transpose Form of RFP A is stored. */ /* UPLO - (input) CHARACTER */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the array C is to be referenced as */ /* follows: */ /* UPLO = 'U' or 'u' Only the upper triangular part of C */ /* is to be referenced. */ /* UPLO = 'L' or 'l' Only the lower triangular part of C */ /* is to be referenced. */ /* Unchanged on exit. */ /* TRANS - (input) CHARACTER */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. */ /* TRANS = 'T' or 't' C := alpha*A'*A + beta*C. */ /* Unchanged on exit. */ /* N - (input) INTEGER. */ /* On entry, N specifies the order of the matrix C. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* K - (input) INTEGER. */ /* On entry with TRANS = 'N' or 'n', K specifies the number */ /* of columns of the matrix A, and on entry with TRANS = 'T' */ /* or 't', K specifies the number of rows of the matrix A. K */ /* must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - (input) DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - (input) DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where KA */ /* is K when TRANS = 'N' or 'n', and is N otherwise. Before */ /* entry with TRANS = 'N' or 'n', the leading N--by--K part of */ /* the array A must contain the matrix A, otherwise the leading */ /* K--by--N part of the array A must contain the matrix A. */ /* Unchanged on exit. */ /* LDA - (input) INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. When TRANS = 'N' or 'n' */ /* then LDA must be at least max( 1, n ), otherwise LDA must */ /* be at least max( 1, k ). */ /* Unchanged on exit. */ /* BETA - (input) DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. */ /* Unchanged on exit. */ /* C - (input/output) DOUBLE PRECISION array, dimension ( NT ); */ /* NT = N*(N+1)/2. On entry, the symmetric matrix C in RFP */ /* Format. RFP Format is described by TRANSR, UPLO and N. */ /* Arguments */ /* ========== */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --c__; /* Function Body */ info = 0; normaltransr = _starpu_lsame_(transr, "N"); lower = _starpu_lsame_(uplo, "L"); notrans = _starpu_lsame_(trans, "N"); if (notrans) { nrowa = *n; } else { nrowa = *k; } if (! normaltransr && ! _starpu_lsame_(transr, "T")) { info = -1; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { info = -2; } else if (! notrans && ! _starpu_lsame_(trans, "T")) { info = -3; } else if (*n < 0) { info = -4; } else if (*k < 0) { info = -5; } else if (*lda < max(1,nrowa)) { info = -8; } if (info != 0) { i__1 = -info; _starpu_xerbla_("DSFRK ", &i__1); return 0; } /* Quick return if possible. */ /* The quick return case: ((ALPHA.EQ.0).AND.(BETA.NE.ZERO)) is not */ /* done (it is in DSYRK for example) and left in the general case. */ if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { return 0; } if (*alpha == 0. && *beta == 0.) { i__1 = *n * (*n + 1) / 2; for (j = 1; j <= i__1; ++j) { c__[j] = 0.; } return 0; } /* C is N-by-N. */ /* If N is odd, set NISODD = .TRUE., and N1 and N2. */ /* If N is even, NISODD = .FALSE., and NK. */ if (*n % 2 == 0) { nisodd = FALSE_; nk = *n / 2; } else { nisodd = TRUE_; if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } } if (nisodd) { /* N is odd */ if (normaltransr) { /* N is odd and TRANSR = 'N' */ if (lower) { /* N is odd, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */ _starpu_dsyrk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[1], n); _starpu_dsyrk_("U", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, beta, &c__[*n + 1], n); _starpu_dgemm_("N", "T", &n2, &n1, k, alpha, &a[n1 + 1 + a_dim1], lda, &a[a_dim1 + 1], lda, beta, &c__[n1 + 1], n); } else { /* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' */ _starpu_dsyrk_("L", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[1], n); _starpu_dsyrk_("U", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], lda, beta, &c__[*n + 1], n) ; _starpu_dgemm_("T", "N", &n2, &n1, k, alpha, &a[(n1 + 1) * a_dim1 + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[n1 + 1] , n); } } else { /* N is odd, TRANSR = 'N', and UPLO = 'U' */ if (notrans) { /* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */ _starpu_dsyrk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[n2 + 1], n); _starpu_dsyrk_("U", "N", &n2, k, alpha, &a[n2 + a_dim1], lda, beta, &c__[n1 + 1], n); _starpu_dgemm_("N", "T", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, &a[n2 + a_dim1], lda, beta, &c__[1], n); } else { /* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' */ _starpu_dsyrk_("L", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[n2 + 1], n); _starpu_dsyrk_("U", "T", &n2, k, alpha, &a[n2 * a_dim1 + 1], lda, beta, &c__[n1 + 1], n); _starpu_dgemm_("T", "N", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, &a[n2 * a_dim1 + 1], lda, beta, &c__[1], n); } } } else { /* N is odd, and TRANSR = 'T' */ if (lower) { /* N is odd, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' */ _starpu_dsyrk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[1], &n1); _starpu_dsyrk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, beta, &c__[2], &n1); _starpu_dgemm_("N", "T", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, &a[n1 + 1 + a_dim1], lda, beta, &c__[n1 * n1 + 1], &n1); } else { /* N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' */ _starpu_dsyrk_("U", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[1], &n1); _starpu_dsyrk_("L", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], lda, beta, &c__[2], &n1); _starpu_dgemm_("T", "N", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, &a[(n1 + 1) * a_dim1 + 1], lda, beta, &c__[n1 * n1 + 1], &n1); } } else { /* N is odd, TRANSR = 'T', and UPLO = 'U' */ if (notrans) { /* N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' */ _starpu_dsyrk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[n2 * n2 + 1], &n2); _starpu_dsyrk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, beta, &c__[n1 * n2 + 1], &n2); _starpu_dgemm_("N", "T", &n2, &n1, k, alpha, &a[n1 + 1 + a_dim1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], &n2); } else { /* N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' */ _starpu_dsyrk_("U", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[n2 * n2 + 1], &n2); _starpu_dsyrk_("L", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], lda, beta, &c__[n1 * n2 + 1], &n2); _starpu_dgemm_("T", "N", &n2, &n1, k, alpha, &a[(n1 + 1) * a_dim1 + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], & n2); } } } } else { /* N is even */ if (normaltransr) { /* N is even and TRANSR = 'N' */ if (lower) { /* N is even, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */ i__1 = *n + 1; _starpu_dsyrk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[2], &i__1); i__1 = *n + 1; _starpu_dsyrk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, beta, &c__[1], &i__1); i__1 = *n + 1; _starpu_dgemm_("N", "T", &nk, &nk, k, alpha, &a[nk + 1 + a_dim1], lda, &a[a_dim1 + 1], lda, beta, &c__[nk + 2], & i__1); } else { /* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' */ i__1 = *n + 1; _starpu_dsyrk_("L", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[2], &i__1); i__1 = *n + 1; _starpu_dsyrk_("U", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[1], &i__1); i__1 = *n + 1; _starpu_dgemm_("T", "N", &nk, &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[nk + 2] , &i__1); } } else { /* N is even, TRANSR = 'N', and UPLO = 'U' */ if (notrans) { /* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */ i__1 = *n + 1; _starpu_dsyrk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[nk + 2], &i__1); i__1 = *n + 1; _starpu_dsyrk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, beta, &c__[nk + 1], &i__1); i__1 = *n + 1; _starpu_dgemm_("N", "T", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, &a[nk + 1 + a_dim1], lda, beta, &c__[1], &i__1); } else { /* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' */ i__1 = *n + 1; _starpu_dsyrk_("L", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[nk + 2], &i__1); i__1 = *n + 1; _starpu_dsyrk_("U", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[nk + 1], &i__1); i__1 = *n + 1; _starpu_dgemm_("T", "N", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[1], & i__1); } } } else { /* N is even, and TRANSR = 'T' */ if (lower) { /* N is even, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' */ _starpu_dsyrk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[nk + 1], &nk); _starpu_dsyrk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, beta, &c__[1], &nk); _starpu_dgemm_("N", "T", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, &a[nk + 1 + a_dim1], lda, beta, &c__[(nk + 1) * nk + 1], &nk); } else { /* N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' */ _starpu_dsyrk_("U", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[nk + 1], &nk); _starpu_dsyrk_("L", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[1], &nk); _starpu_dgemm_("T", "N", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[(nk + 1) * nk + 1], &nk); } } else { /* N is even, TRANSR = 'T', and UPLO = 'U' */ if (notrans) { /* N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' */ _starpu_dsyrk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[nk * (nk + 1) + 1], &nk); _starpu_dsyrk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, beta, &c__[nk * nk + 1], &nk); _starpu_dgemm_("N", "T", &nk, &nk, k, alpha, &a[nk + 1 + a_dim1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], &nk); } else { /* N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' */ _starpu_dsyrk_("U", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, &c__[nk * (nk + 1) + 1], &nk); _starpu_dsyrk_("L", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[nk * nk + 1], &nk); _starpu_dgemm_("T", "N", &nk, &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], & nk); } } } } return 0; /* End of DSFRK */ } /* _starpu_dsfrk_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsgesv.c000066400000000000000000000323471507764646700207000ustar00rootroot00000000000000/* dsgesv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b10 = -1.; static doublereal c_b11 = 1.; static integer c__1 = 1; /* Subroutine */ int _starpu__starpu_dsgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *work, real *swork, integer *iter, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, work_dim1, work_offset, x_dim1, x_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal cte, eps, anrm; integer ptsa; doublereal rnrm, xnrm; integer ptsx; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iiter; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlag2s_(integer *, integer *, doublereal *, integer *, real *, integer *, integer *), _starpu_slag2d_( integer *, integer *, real *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dgetrf_(integer *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_sgetrf_(integer *, integer *, real *, integer *, integer *, integer *), _starpu_sgetrs_(char *, integer *, integer *, real *, integer *, integer *, real *, integer *, integer *); /* -- LAPACK PROTOTYPE driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* February 2007 */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSGESV computes the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ /* DSGESV first attempts to factorize the matrix in SINGLE PRECISION */ /* and use this factorization within an iterative refinement procedure */ /* to produce a solution with DOUBLE PRECISION normwise backward error */ /* quality (see below). If the approach fails the method switches to a */ /* DOUBLE PRECISION factorization and solve. */ /* The iterative refinement is not going to be a winning strategy if */ /* the ratio SINGLE PRECISION performance over DOUBLE PRECISION */ /* performance is too small. A reasonable strategy should take the */ /* number of right-hand sides and the size of the matrix into account. */ /* This might be done with a call to ILAENV in the future. Up to now, we */ /* always try iterative refinement. */ /* The iterative refinement process is stopped if */ /* ITER > ITERMAX */ /* or for all the RHS we have: */ /* RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX */ /* where */ /* o ITER is the number of the current iteration in the iterative */ /* refinement process */ /* o RNRM is the infinity-norm of the residual */ /* o XNRM is the infinity-norm of the solution */ /* o ANRM is the infinity-operator-norm of the matrix A */ /* o EPS is the machine epsilon returned by DLAMCH('Epsilon') */ /* The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 */ /* respectively. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input or input/ouptut) DOUBLE PRECISION array, */ /* dimension (LDA,N) */ /* On entry, the N-by-N coefficient matrix A. */ /* On exit, if iterative refinement has been successfully used */ /* (INFO.EQ.0 and ITER.GE.0, see description below), then A is */ /* unchanged, if double precision factorization has been used */ /* (INFO.EQ.0 and ITER.LT.0, see description below), then the */ /* array A contains the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (output) INTEGER array, dimension (N) */ /* The pivot indices that define the permutation matrix P; */ /* row i of the matrix was interchanged with row IPIV(i). */ /* Corresponds either to the single precision factorization */ /* (if INFO.EQ.0 and ITER.GE.0) or the double precision */ /* factorization (if INFO.EQ.0 and ITER.LT.0). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The N-by-NRHS right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0, the N-by-NRHS solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N*NRHS) */ /* This array is used to hold the residual vectors. */ /* SWORK (workspace) REAL array, dimension (N*(N+NRHS)) */ /* This array is used to use the single precision matrix and the */ /* right-hand sides or solutions in single precision. */ /* ITER (output) INTEGER */ /* < 0: iterative refinement has failed, double precision */ /* factorization has been performed */ /* -1 : the routine fell back to full precision for */ /* implementation- or machine-specific reasons */ /* -2 : narrowing the precision induced an overflow, */ /* the routine fell back to full precision */ /* -3 : failure of SGETRF */ /* -31: stop the iterative refinement after the 30th */ /* iterations */ /* > 0: iterative refinement has been sucessfully used. */ /* Returns the number of iterations */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) computed in DOUBLE PRECISION is */ /* exactly zero. The factorization has been completed, */ /* but the factor U is exactly singular, so the solution */ /* could not be computed. */ /* ========= */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ work_dim1 = *n; work_offset = 1 + work_dim1; work -= work_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --swork; /* Function Body */ *info = 0; *iter = 0; /* Test the input parameters. */ if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldx < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSGESV", &i__1); return 0; } /* Quick return if (N.EQ.0). */ if (*n == 0) { return 0; } /* Skip single precision iterative refinement if a priori slower */ /* than double precision factorization. */ if (FALSE_) { *iter = -1; goto L40; } /* Compute some constants. */ anrm = _starpu_dlange_("I", n, n, &a[a_offset], lda, &work[work_offset]); eps = _starpu_dlamch_("Epsilon"); cte = anrm * eps * sqrt((doublereal) (*n)) * 1.; /* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. */ ptsa = 1; ptsx = ptsa + *n * *n; /* Convert B from double precision to single precision and store the */ /* result in SX. */ _starpu_dlag2s_(n, nrhs, &b[b_offset], ldb, &swork[ptsx], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Convert A from double precision to single precision and store the */ /* result in SA. */ _starpu_dlag2s_(n, n, &a[a_offset], lda, &swork[ptsa], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Compute the LU factorization of SA. */ _starpu_sgetrf_(n, n, &swork[ptsa], n, &ipiv[1], info); if (*info != 0) { *iter = -3; goto L40; } /* Solve the system SA*SX = SB. */ _starpu_sgetrs_("No transpose", n, nrhs, &swork[ptsa], n, &ipiv[1], &swork[ptsx], n, info); /* Convert SX back to double precision */ _starpu_slag2d_(n, nrhs, &swork[ptsx], n, &x[x_offset], ldx, info); /* Compute R = B - AX (R is WORK). */ _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); _starpu_dgemm_("No Transpose", "No Transpose", n, nrhs, n, &c_b10, &a[a_offset], lda, &x[x_offset], ldx, &c_b11, &work[work_offset], n); /* Check whether the NRHS normwise backward errors satisfy the */ /* stopping criterion. If yes, set ITER=0 and return. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { xnrm = (d__1 = x[_starpu_idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1], abs(d__1)); rnrm = (d__1 = work[_starpu_idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1], abs(d__1)); if (rnrm > xnrm * cte) { goto L10; } } /* If we are here, the NRHS normwise backward errors satisfy the */ /* stopping criterion. We are good to exit. */ *iter = 0; return 0; L10: for (iiter = 1; iiter <= 30; ++iiter) { /* Convert R (in WORK) from double precision to single precision */ /* and store the result in SX. */ _starpu_dlag2s_(n, nrhs, &work[work_offset], n, &swork[ptsx], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Solve the system SA*SX = SR. */ _starpu_sgetrs_("No transpose", n, nrhs, &swork[ptsa], n, &ipiv[1], &swork[ ptsx], n, info); /* Convert SX back to double precision and update the current */ /* iterate. */ _starpu_slag2d_(n, nrhs, &swork[ptsx], n, &work[work_offset], n, info); i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_daxpy_(n, &c_b11, &work[i__ * work_dim1 + 1], &c__1, &x[i__ * x_dim1 + 1], &c__1); } /* Compute R = B - AX (R is WORK). */ _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); _starpu_dgemm_("No Transpose", "No Transpose", n, nrhs, n, &c_b10, &a[ a_offset], lda, &x[x_offset], ldx, &c_b11, &work[work_offset], n); /* Check whether the NRHS normwise backward errors satisfy the */ /* stopping criterion. If yes, set ITER=IITER>0 and return. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { xnrm = (d__1 = x[_starpu_idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1], abs(d__1)); rnrm = (d__1 = work[_starpu_idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1], abs(d__1)); if (rnrm > xnrm * cte) { goto L20; } } /* If we are here, the NRHS normwise backward errors satisfy the */ /* stopping criterion, we are good to exit. */ *iter = iiter; return 0; L20: /* L30: */ ; } /* If we are at this place of the code, this is because we have */ /* performed ITER=ITERMAX iterations and never satisified the */ /* stopping criterion, set up the ITER flag accordingly and follow up */ /* on double precision routine. */ *iter = -31; L40: /* Single-precision iterative refinement failed to converge to a */ /* satisfactory solution, so we resort to double precision. */ _starpu_dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); if (*info != 0) { return 0; } _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &x[x_offset] , ldx, info); return 0; /* End of DSGESV. */ } /* _starpu__starpu_dsgesv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspcon.c000066400000000000000000000124651507764646700206720ustar00rootroot00000000000000/* dspcon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dspcon_(char *uplo, integer *n, doublereal *ap, integer * ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer i__1; /* Local variables */ integer i__, ip, kase; extern logical _starpu_lsame_(char *, char *); integer isave[3]; logical upper; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); doublereal ainvnm; extern /* Subroutine */ int _starpu_dsptrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPCON estimates the reciprocal of the condition number (in the */ /* 1-norm) of a real symmetric packed matrix A using the factorization */ /* A = U*D*U**T or A = L*D*L**T computed by DSPTRF. */ /* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ /* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the details of the factorization are stored */ /* as an upper or lower triangular matrix. */ /* = 'U': Upper triangular, form is A = U*D*U**T; */ /* = 'L': Lower triangular, form is A = L*D*L**T. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The block diagonal matrix D and the multipliers used to */ /* obtain the factor U or L as computed by DSPTRF, stored as a */ /* packed triangular matrix. */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSPTRF. */ /* ANORM (input) DOUBLE PRECISION */ /* The 1-norm of the original matrix A. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ /* estimate of the 1-norm of inv(A) computed in this routine. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --iwork; --work; --ipiv; --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*anorm < 0.) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm <= 0.) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ ip = *n * (*n + 1) / 2; for (i__ = *n; i__ >= 1; --i__) { if (ipiv[i__] > 0 && ap[ip] == 0.) { return 0; } ip -= i__; /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ ip = 1; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (ipiv[i__] > 0 && ap[ip] == 0.) { return 0; } ip = ip + *n - i__ + 1; /* L20: */ } } /* Estimate the 1-norm of the inverse. */ kase = 0; L30: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { /* Multiply by inv(L*D*L') or inv(U*D*U'). */ _starpu_dsptrs_(uplo, n, &c__1, &ap[1], &ipiv[1], &work[1], n, info); goto L30; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } return 0; /* End of DSPCON */ } /* _starpu_dspcon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspev.c000066400000000000000000000163421507764646700205230ustar00rootroot00000000000000/* dspev.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dspev_(char *jobz, char *uplo, integer *n, doublereal * ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal eps; integer inde; doublereal anrm; integer imax; doublereal rmin, rmax; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo; logical wantz; extern doublereal _starpu_dlamch_(char *); integer iscale; doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, doublereal *); integer indtau; extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indwrk; extern /* Subroutine */ int _starpu_dopgtr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsptrd_(char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); doublereal smlnum; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPEV computes all the eigenvalues and, optionally, eigenvectors of a */ /* real symmetric matrix A in packed storage. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, AP is overwritten by values generated during the */ /* reduction to tridiagonal form. If UPLO = 'U', the diagonal */ /* and first superdiagonal of the tridiagonal matrix T overwrite */ /* the corresponding elements of A, and if UPLO = 'L', the */ /* diagonal and first subdiagonal of T overwrite the */ /* corresponding elements of A. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ /* eigenvectors of the matrix A, with the i-th column of Z */ /* holding the eigenvector associated with W(i). */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, the algorithm failed to converge; i */ /* off-diagonal elements of an intermediate tridiagonal */ /* form did not converge to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (_starpu_lsame_(uplo, "U") || _starpu_lsame_(uplo, "L"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPEV ", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { w[1] = ap[1]; if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = _starpu_dlansp_("M", uplo, n, &ap[1], &work[1]); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { i__1 = *n * (*n + 1) / 2; _starpu_dscal_(&i__1, &sigma, &ap[1], &c__1); } /* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. */ inde = 1; indtau = inde + *n; _starpu_dsptrd_(uplo, n, &ap[1], &w[1], &work[inde], &work[indtau], &iinfo); /* For eigenvalues only, call DSTERF. For eigenvectors, first call */ /* DOPGTR to generate the orthogonal matrix, then call DSTEQR. */ if (! wantz) { _starpu_dsterf_(n, &w[1], &work[inde], info); } else { indwrk = indtau + *n; _starpu_dopgtr_(uplo, n, &ap[1], &work[indtau], &z__[z_offset], ldz, &work[ indwrk], &iinfo); _starpu_dsteqr_(jobz, n, &w[1], &work[inde], &z__[z_offset], ldz, &work[ indtau], info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { if (*info == 0) { imax = *n; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &w[1], &c__1); } return 0; /* End of DSPEV */ } /* _starpu_dspev_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspevd.c000066400000000000000000000240731507764646700206670ustar00rootroot00000000000000/* dspevd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dspevd_(char *jobz, char *uplo, integer *n, doublereal * ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal eps; integer inde; doublereal anrm, rmin, rmax; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo, lwmin; logical wantz; extern doublereal _starpu_dlamch_(char *); integer iscale; extern /* Subroutine */ int _starpu_dstedc_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, doublereal *); integer indtau; extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indwrk, liwmin; extern /* Subroutine */ int _starpu_dsptrd_(char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dopmtr_(char *, char *, char *, integer *, integer *, doublereal * , doublereal *, doublereal *, integer *, doublereal *, integer *); integer llwork; doublereal smlnum; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPEVD computes all the eigenvalues and, optionally, eigenvectors */ /* of a real symmetric matrix A in packed storage. If eigenvectors are */ /* desired, it uses a divide and conquer algorithm. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, AP is overwritten by values generated during the */ /* reduction to tridiagonal form. If UPLO = 'U', the diagonal */ /* and first superdiagonal of the tridiagonal matrix T overwrite */ /* the corresponding elements of A, and if UPLO = 'L', the */ /* diagonal and first subdiagonal of T overwrite the */ /* corresponding elements of A. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ /* eigenvectors of the matrix A, with the i-th column of Z */ /* holding the eigenvector associated with W(i). */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, */ /* dimension (LWORK) */ /* On exit, if INFO = 0, WORK(1) returns the required LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If N <= 1, LWORK must be at least 1. */ /* If JOBZ = 'N' and N > 1, LWORK must be at least 2*N. */ /* If JOBZ = 'V' and N > 1, LWORK must be at least */ /* 1 + 6*N + N**2. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the required sizes of the WORK and IWORK */ /* arrays, returns these values as the first entries of the WORK */ /* and IWORK arrays, and no error message related to LWORK or */ /* LIWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the required LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */ /* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the required sizes of the WORK and */ /* IWORK arrays, returns these values as the first entries of */ /* the WORK and IWORK arrays, and no error message related to */ /* LWORK or LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, the algorithm failed to converge; i */ /* off-diagonal elements of an intermediate tridiagonal */ /* form did not converge to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (_starpu_lsame_(uplo, "U") || _starpu_lsame_(uplo, "L"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -7; } if (*info == 0) { if (*n <= 1) { liwmin = 1; lwmin = 1; } else { if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 6 + 1 + i__1 * i__1; } else { liwmin = 1; lwmin = *n << 1; } } iwork[1] = liwmin; work[1] = (doublereal) lwmin; if (*lwork < lwmin && ! lquery) { *info = -9; } else if (*liwork < liwmin && ! lquery) { *info = -11; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPEVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { w[1] = ap[1]; if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = _starpu_dlansp_("M", uplo, n, &ap[1], &work[1]); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { i__1 = *n * (*n + 1) / 2; _starpu_dscal_(&i__1, &sigma, &ap[1], &c__1); } /* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. */ inde = 1; indtau = inde + *n; _starpu_dsptrd_(uplo, n, &ap[1], &w[1], &work[inde], &work[indtau], &iinfo); /* For eigenvalues only, call DSTERF. For eigenvectors, first call */ /* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ /* tridiagonal matrix, then call DOPMTR to multiply it by the */ /* Householder transformations represented in AP. */ if (! wantz) { _starpu_dsterf_(n, &w[1], &work[inde], info); } else { indwrk = indtau + *n; llwork = *lwork - indwrk + 1; _starpu_dstedc_("I", n, &w[1], &work[inde], &z__[z_offset], ldz, &work[indwrk] , &llwork, &iwork[1], liwork, info); _starpu_dopmtr_("L", uplo, "N", n, n, &ap[1], &work[indtau], &z__[z_offset], ldz, &work[indwrk], &iinfo); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { d__1 = 1. / sigma; _starpu_dscal_(n, &d__1, &w[1], &c__1); } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSPEVD */ } /* _starpu_dspevd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspevx.c000066400000000000000000000351161507764646700207130ustar00rootroot00000000000000/* dspevx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dspevx_(char *jobz, char *range, char *uplo, integer *n, doublereal *ap, doublereal *vl, doublereal *vu, integer *il, integer * iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, jj; doublereal eps, vll, vuu, tmp1; integer indd, inde; doublereal anrm; integer imax; doublereal rmin, rmax; logical test; integer itmp1, indee; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo; char order[1]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical wantz; extern doublereal _starpu_dlamch_(char *); logical alleig, indeig; integer iscale, indibl; logical valeig; doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal abstll, bignum; extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, doublereal *); integer indtau, indisp; extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indiwo; extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer indwrk; extern /* Subroutine */ int _starpu_dopgtr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsptrd_(char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dopmtr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer nsplit; doublereal smlnum; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPEVX computes selected eigenvalues and, optionally, eigenvectors */ /* of a real symmetric matrix A in packed storage. Eigenvalues/vectors */ /* can be selected by specifying either a range of values or a range of */ /* indices for the desired eigenvalues. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found; */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found; */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, AP is overwritten by values generated during the */ /* reduction to tridiagonal form. If UPLO = 'U', the diagonal */ /* and first superdiagonal of the tridiagonal matrix T overwrite */ /* the corresponding elements of A, and if UPLO = 'L', the */ /* diagonal and first subdiagonal of T overwrite the */ /* corresponding elements of A. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less than */ /* or equal to zero, then EPS*|T| will be used in its place, */ /* where |T| is the 1-norm of the tridiagonal matrix obtained */ /* by reducing AP to tridiagonal form. */ /* Eigenvalues will be computed most accurately when ABSTOL is */ /* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ /* If this routine returns with INFO>0, indicating that some */ /* eigenvectors did not converge, try setting ABSTOL to */ /* 2*DLAMCH('S'). */ /* See "Computing Small Singular Values of Bidiagonal Matrices */ /* with Guaranteed High Relative Accuracy," by Demmel and */ /* Kahan, LAPACK Working Note #3. */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the selected eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix A */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* If an eigenvector fails to converge, then that column of Z */ /* contains the latest approximation to the eigenvector, and the */ /* index of the eigenvector is returned in IFAIL. */ /* If JOBZ = 'N', then Z is not referenced. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (8*N) */ /* IWORK (workspace) INTEGER array, dimension (5*N) */ /* IFAIL (output) INTEGER array, dimension (N) */ /* If JOBZ = 'V', then if INFO = 0, the first M elements of */ /* IFAIL are zero. If INFO > 0, then IFAIL contains the */ /* indices of the eigenvectors that failed to converge. */ /* If JOBZ = 'N', then IFAIL is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, then i eigenvectors failed to converge. */ /* Their indices are stored in array IFAIL. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (_starpu_lsame_(uplo, "L") || _starpu_lsame_(uplo, "U"))) { *info = -3; } else if (*n < 0) { *info = -4; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -7; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -8; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -9; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -14; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPEVX", &i__1); return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } if (*n == 1) { if (alleig || indeig) { *m = 1; w[1] = ap[1]; } else { if (*vl < ap[1] && *vu >= ap[1]) { *m = 1; w[1] = ap[1]; } } if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; abstll = *abstol; if (valeig) { vll = *vl; vuu = *vu; } else { vll = 0.; vuu = 0.; } anrm = _starpu_dlansp_("M", uplo, n, &ap[1], &work[1]); if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { i__1 = *n * (*n + 1) / 2; _starpu_dscal_(&i__1, &sigma, &ap[1], &c__1); if (*abstol > 0.) { abstll = *abstol * sigma; } if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. */ indtau = 1; inde = indtau + *n; indd = inde + *n; indwrk = indd + *n; _starpu_dsptrd_(uplo, n, &ap[1], &work[indd], &work[inde], &work[indtau], &iinfo); /* If all eigenvalues are desired and ABSTOL is less than or equal */ /* to zero, then call DSTERF or DOPGTR and SSTEQR. If this fails */ /* for some eigenvalue, then try DSTEBZ. */ test = FALSE_; if (indeig) { if (*il == 1 && *iu == *n) { test = TRUE_; } } if ((alleig || test) && *abstol <= 0.) { _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); indee = indwrk + (*n << 1); if (! wantz) { i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); _starpu_dsterf_(n, &w[1], &work[indee], info); } else { _starpu_dopgtr_(uplo, n, &ap[1], &work[indtau], &z__[z_offset], ldz, & work[indwrk], &iinfo); i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); _starpu_dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ indwrk], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L10: */ } } } if (*info == 0) { *m = *n; goto L20; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indibl = 1; indisp = indibl + *n; indiwo = indisp + *n; _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ indwrk], &iwork[indiwo], info); if (wantz) { _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & ifail[1], info); /* Apply orthogonal matrix used in reduction to tridiagonal */ /* form to eigenvectors returned by DSTEIN. */ _starpu_dopmtr_("L", uplo, "N", n, m, &ap[1], &work[indtau], &z__[z_offset], ldz, &work[indwrk], &iinfo); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L20: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with */ /* eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L30: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], &c__1); if (*info != 0) { itmp1 = ifail[i__]; ifail[i__] = ifail[j]; ifail[j] = itmp1; } } /* L40: */ } } return 0; /* End of DSPEVX */ } /* _starpu_dspevx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspgst.c000066400000000000000000000177411507764646700207120ustar00rootroot00000000000000/* dspgst.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b9 = -1.; static doublereal c_b11 = 1.; /* Subroutine */ int _starpu_dspgst_(integer *itype, char *uplo, integer *n, doublereal *ap, doublereal *bp, integer *info) { /* System generated locals */ integer i__1, i__2; doublereal d__1; /* Local variables */ integer j, k, j1, k1, jj, kk; doublereal ct, ajj; integer j1j1; doublereal akk; integer k1k1; doublereal bjj, bkk; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dspr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPGST reduces a real symmetric-definite generalized eigenproblem */ /* to standard form, using packed storage. */ /* If ITYPE = 1, the problem is A*x = lambda*B*x, */ /* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */ /* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ /* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */ /* B must have been previously factorized as U**T*U or L*L**T by DPPTRF. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */ /* = 2 or 3: compute U*A*U**T or L**T*A*L. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored and B is factored as */ /* U**T*U; */ /* = 'L': Lower triangle of A is stored and B is factored as */ /* L*L**T. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, if INFO = 0, the transformed matrix, stored in the */ /* same format as A. */ /* BP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The triangular factor from the Cholesky factorization of B, */ /* stored in the same format as A, as returned by DPPTRF. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --bp; --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (*itype < 1 || *itype > 3) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPGST", &i__1); return 0; } if (*itype == 1) { if (upper) { /* Compute inv(U')*A*inv(U) */ /* J1 and JJ are the indices of A(1,j) and A(j,j) */ jj = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { j1 = jj + 1; jj += j; /* Compute the j-th column of the upper triangle of A */ bjj = bp[jj]; _starpu_dtpsv_(uplo, "Transpose", "Nonunit", &j, &bp[1], &ap[j1], & c__1); i__2 = j - 1; _starpu_dspmv_(uplo, &i__2, &c_b9, &ap[1], &bp[j1], &c__1, &c_b11, & ap[j1], &c__1); i__2 = j - 1; d__1 = 1. / bjj; _starpu_dscal_(&i__2, &d__1, &ap[j1], &c__1); i__2 = j - 1; ap[jj] = (ap[jj] - _starpu_ddot_(&i__2, &ap[j1], &c__1, &bp[j1], & c__1)) / bjj; /* L10: */ } } else { /* Compute inv(L)*A*inv(L') */ /* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) */ kk = 1; i__1 = *n; for (k = 1; k <= i__1; ++k) { k1k1 = kk + *n - k + 1; /* Update the lower triangle of A(k:n,k:n) */ akk = ap[kk]; bkk = bp[kk]; /* Computing 2nd power */ d__1 = bkk; akk /= d__1 * d__1; ap[kk] = akk; if (k < *n) { i__2 = *n - k; d__1 = 1. / bkk; _starpu_dscal_(&i__2, &d__1, &ap[kk + 1], &c__1); ct = akk * -.5; i__2 = *n - k; _starpu_daxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1) ; i__2 = *n - k; _starpu_dspr2_(uplo, &i__2, &c_b9, &ap[kk + 1], &c__1, &bp[kk + 1] , &c__1, &ap[k1k1]); i__2 = *n - k; _starpu_daxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1) ; i__2 = *n - k; _starpu_dtpsv_(uplo, "No transpose", "Non-unit", &i__2, &bp[k1k1], &ap[kk + 1], &c__1); } kk = k1k1; /* L20: */ } } } else { if (upper) { /* Compute U*A*U' */ /* K1 and KK are the indices of A(1,k) and A(k,k) */ kk = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { k1 = kk + 1; kk += k; /* Update the upper triangle of A(1:k,1:k) */ akk = ap[kk]; bkk = bp[kk]; i__2 = k - 1; _starpu_dtpmv_(uplo, "No transpose", "Non-unit", &i__2, &bp[1], &ap[ k1], &c__1); ct = akk * .5; i__2 = k - 1; _starpu_daxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1); i__2 = k - 1; _starpu_dspr2_(uplo, &i__2, &c_b11, &ap[k1], &c__1, &bp[k1], &c__1, & ap[1]); i__2 = k - 1; _starpu_daxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1); i__2 = k - 1; _starpu_dscal_(&i__2, &bkk, &ap[k1], &c__1); /* Computing 2nd power */ d__1 = bkk; ap[kk] = akk * (d__1 * d__1); /* L30: */ } } else { /* Compute L'*A*L */ /* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) */ jj = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { j1j1 = jj + *n - j + 1; /* Compute the j-th column of the lower triangle of A */ ajj = ap[jj]; bjj = bp[jj]; i__2 = *n - j; ap[jj] = ajj * bjj + _starpu_ddot_(&i__2, &ap[jj + 1], &c__1, &bp[jj + 1], &c__1); i__2 = *n - j; _starpu_dscal_(&i__2, &bjj, &ap[jj + 1], &c__1); i__2 = *n - j; _starpu_dspmv_(uplo, &i__2, &c_b11, &ap[j1j1], &bp[jj + 1], &c__1, & c_b11, &ap[jj + 1], &c__1); i__2 = *n - j + 1; _starpu_dtpmv_(uplo, "Transpose", "Non-unit", &i__2, &bp[jj], &ap[jj], &c__1); jj = j1j1; /* L40: */ } } } return 0; /* End of DSPGST */ } /* _starpu_dspgst_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspgv.c000066400000000000000000000171771507764646700205340ustar00rootroot00000000000000/* dspgv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dspgv_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1; /* Local variables */ integer j, neig; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dspev_(char *, char *, integer *, doublereal * , doublereal *, doublereal *, integer *, doublereal *, integer *); char trans[1]; logical upper; extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *); logical wantz; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpptrf_( char *, integer *, doublereal *, integer *), _starpu_dspgst_( integer *, char *, integer *, doublereal *, doublereal *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPGV computes all the eigenvalues and, optionally, the eigenvectors */ /* of a real generalized symmetric-definite eigenproblem, of the form */ /* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ /* Here A and B are assumed to be symmetric, stored in packed format, */ /* and B is also positive definite. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* Specifies the problem type to be solved: */ /* = 1: A*x = (lambda)*B*x */ /* = 2: A*B*x = (lambda)*x */ /* = 3: B*A*x = (lambda)*x */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangles of A and B are stored; */ /* = 'L': Lower triangles of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension */ /* (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, the contents of AP are destroyed. */ /* BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* B, packed columnwise in a linear array. The j-th column of B */ /* is stored in the array BP as follows: */ /* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; */ /* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. */ /* On exit, the triangular factor U or L from the Cholesky */ /* factorization B = U**T*U or B = L*L**T, in the same storage */ /* format as B. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ /* eigenvectors. The eigenvectors are normalized as follows: */ /* if ITYPE = 1 or 2, Z**T*B*Z = I; */ /* if ITYPE = 3, Z**T*inv(B)*Z = I. */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: DPPTRF or DSPEV returned an error code: */ /* <= N: if INFO = i, DSPEV failed to converge; */ /* i off-diagonal elements of an intermediate */ /* tridiagonal form did not converge to zero. */ /* > N: if INFO = n + i, for 1 <= i <= n, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --bp; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); upper = _starpu_lsame_(uplo, "U"); *info = 0; if (*itype < 1 || *itype > 3) { *info = -1; } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -2; } else if (! (upper || _starpu_lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPGV ", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ _starpu_dpptrf_(uplo, n, &bp[1], info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ _starpu_dspgst_(itype, uplo, n, &ap[1], &bp[1], info); _starpu_dspev_(jobz, uplo, n, &ap[1], &w[1], &z__[z_offset], ldz, &work[1], info); if (wantz) { /* Backtransform eigenvectors to the original problem. */ neig = *n; if (*info > 0) { neig = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ /* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } i__1 = neig; for (j = 1; j <= i__1; ++j) { _starpu_dtpsv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + 1], &c__1); /* L10: */ } } else if (*itype == 3) { /* For B*A*x=(lambda)*x; */ /* backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } i__1 = neig; for (j = 1; j <= i__1; ++j) { _starpu_dtpmv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + 1], &c__1); /* L20: */ } } } return 0; /* End of DSPGV */ } /* _starpu_dspgv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspgvd.c000066400000000000000000000252441507764646700206720ustar00rootroot00000000000000/* dspgvd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dspgvd_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1; doublereal d__1, d__2; /* Local variables */ integer j, neig; extern logical _starpu_lsame_(char *, char *); integer lwmin; char trans[1]; logical upper; extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *); logical wantz; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dspevd_( char *, char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *); integer liwmin; extern /* Subroutine */ int _starpu_dpptrf_(char *, integer *, doublereal *, integer *), _starpu_dspgst_(integer *, char *, integer *, doublereal *, doublereal *, integer *); logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPGVD computes all the eigenvalues, and optionally, the eigenvectors */ /* of a real generalized symmetric-definite eigenproblem, of the form */ /* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ /* B are assumed to be symmetric, stored in packed format, and B is also */ /* positive definite. */ /* If eigenvectors are desired, it uses a divide and conquer algorithm. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* Specifies the problem type to be solved: */ /* = 1: A*x = (lambda)*B*x */ /* = 2: A*B*x = (lambda)*x */ /* = 3: B*A*x = (lambda)*x */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangles of A and B are stored; */ /* = 'L': Lower triangles of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, the contents of AP are destroyed. */ /* BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* B, packed columnwise in a linear array. The j-th column of B */ /* is stored in the array BP as follows: */ /* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; */ /* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. */ /* On exit, the triangular factor U or L from the Cholesky */ /* factorization B = U**T*U or B = L*L**T, in the same storage */ /* format as B. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ /* eigenvectors. The eigenvectors are normalized as follows: */ /* if ITYPE = 1 or 2, Z**T*B*Z = I; */ /* if ITYPE = 3, Z**T*inv(B)*Z = I. */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the required LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If N <= 1, LWORK >= 1. */ /* If JOBZ = 'N' and N > 1, LWORK >= 2*N. */ /* If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the required sizes of the WORK and IWORK */ /* arrays, returns these values as the first entries of the WORK */ /* and IWORK arrays, and no error message related to LWORK or */ /* LIWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the required LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If JOBZ = 'N' or N <= 1, LIWORK >= 1. */ /* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the required sizes of the WORK and */ /* IWORK arrays, returns these values as the first entries of */ /* the WORK and IWORK arrays, and no error message related to */ /* LWORK or LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: DPPTRF or DSPEVD returned an error code: */ /* <= N: if INFO = i, DSPEVD failed to converge; */ /* i off-diagonal elements of an intermediate */ /* tridiagonal form did not converge to zero; */ /* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --bp; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); upper = _starpu_lsame_(uplo, "U"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (*itype < 1 || *itype > 3) { *info = -1; } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -2; } else if (! (upper || _starpu_lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -9; } if (*info == 0) { if (*n <= 1) { liwmin = 1; lwmin = 1; } else { if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); } else { liwmin = 1; lwmin = *n << 1; } } work[1] = (doublereal) lwmin; iwork[1] = liwmin; if (*lwork < lwmin && ! lquery) { *info = -11; } else if (*liwork < liwmin && ! lquery) { *info = -13; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPGVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a Cholesky factorization of BP. */ _starpu_dpptrf_(uplo, n, &bp[1], info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ _starpu_dspgst_(itype, uplo, n, &ap[1], &bp[1], info); _starpu_dspevd_(jobz, uplo, n, &ap[1], &w[1], &z__[z_offset], ldz, &work[1], lwork, &iwork[1], liwork, info); /* Computing MAX */ d__1 = (doublereal) lwmin; lwmin = (integer) max(d__1,work[1]); /* Computing MAX */ d__1 = (doublereal) liwmin, d__2 = (doublereal) iwork[1]; liwmin = (integer) max(d__1,d__2); if (wantz) { /* Backtransform eigenvectors to the original problem. */ neig = *n; if (*info > 0) { neig = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ /* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } i__1 = neig; for (j = 1; j <= i__1; ++j) { _starpu_dtpsv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + 1], &c__1); /* L10: */ } } else if (*itype == 3) { /* For B*A*x=(lambda)*x; */ /* backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } i__1 = neig; for (j = 1; j <= i__1; ++j) { _starpu_dtpmv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + 1], &c__1); /* L20: */ } } } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSPGVD */ } /* _starpu_dspgvd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspgvx.c000066400000000000000000000272311507764646700207140ustar00rootroot00000000000000/* dspgvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dspgvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublereal *ap, doublereal *bp, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1; /* Local variables */ integer j; extern logical _starpu_lsame_(char *, char *); char trans[1]; logical upper; extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *); logical wantz, alleig, indeig, valeig; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpptrf_( char *, integer *, doublereal *, integer *), _starpu_dspgst_( integer *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_dspevx_(char *, char *, char *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPGVX computes selected eigenvalues, and optionally, eigenvectors */ /* of a real generalized symmetric-definite eigenproblem, of the form */ /* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A */ /* and B are assumed to be symmetric, stored in packed storage, and B */ /* is also positive definite. Eigenvalues and eigenvectors can be */ /* selected by specifying either a range of values or a range of indices */ /* for the desired eigenvalues. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* Specifies the problem type to be solved: */ /* = 1: A*x = (lambda)*B*x */ /* = 2: A*B*x = (lambda)*x */ /* = 3: B*A*x = (lambda)*x */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A and B are stored; */ /* = 'L': Lower triangle of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrix pencil (A,B). N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, the contents of AP are destroyed. */ /* BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* B, packed columnwise in a linear array. The j-th column of B */ /* is stored in the array BP as follows: */ /* if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; */ /* if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. */ /* On exit, the triangular factor U or L from the Cholesky */ /* factorization B = U**T*U or B = L*L**T, in the same storage */ /* format as B. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less than */ /* or equal to zero, then EPS*|T| will be used in its place, */ /* where |T| is the 1-norm of the tridiagonal matrix obtained */ /* by reducing A to tridiagonal form. */ /* Eigenvalues will be computed most accurately when ABSTOL is */ /* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ /* If this routine returns with INFO>0, indicating that some */ /* eigenvectors did not converge, try setting ABSTOL to */ /* 2*DLAMCH('S'). */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* On normal exit, the first M elements contain the selected */ /* eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ /* If JOBZ = 'N', then Z is not referenced. */ /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix A */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* The eigenvectors are normalized as follows: */ /* if ITYPE = 1 or 2, Z**T*B*Z = I; */ /* if ITYPE = 3, Z**T*inv(B)*Z = I. */ /* If an eigenvector fails to converge, then that column of Z */ /* contains the latest approximation to the eigenvector, and the */ /* index of the eigenvector is returned in IFAIL. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (8*N) */ /* IWORK (workspace) INTEGER array, dimension (5*N) */ /* IFAIL (output) INTEGER array, dimension (N) */ /* If JOBZ = 'V', then if INFO = 0, the first M elements of */ /* IFAIL are zero. If INFO > 0, then IFAIL contains the */ /* indices of the eigenvectors that failed to converge. */ /* If JOBZ = 'N', then IFAIL is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: DPPTRF or DSPEVX returned an error code: */ /* <= N: if INFO = i, DSPEVX failed to converge; */ /* i eigenvectors failed to converge. Their indices */ /* are stored in array IFAIL. */ /* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --bp; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ upper = _starpu_lsame_(uplo, "U"); wantz = _starpu_lsame_(jobz, "V"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); *info = 0; if (*itype < 1 || *itype > 3) { *info = -1; } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -2; } else if (! (alleig || valeig || indeig)) { *info = -3; } else if (! (upper || _starpu_lsame_(uplo, "L"))) { *info = -4; } else if (*n < 0) { *info = -5; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -9; } } else if (indeig) { if (*il < 1) { *info = -10; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -11; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -16; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPGVX", &i__1); return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ _starpu_dpptrf_(uplo, n, &bp[1], info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ _starpu_dspgst_(itype, uplo, n, &ap[1], &bp[1], info); _starpu_dspevx_(jobz, range, uplo, n, &ap[1], vl, vu, il, iu, abstol, m, &w[1], & z__[z_offset], ldz, &work[1], &iwork[1], &ifail[1], info); if (wantz) { /* Backtransform eigenvectors to the original problem. */ if (*info > 0) { *m = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ /* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } i__1 = *m; for (j = 1; j <= i__1; ++j) { _starpu_dtpsv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + 1], &c__1); /* L10: */ } } else if (*itype == 3) { /* For B*A*x=(lambda)*x; */ /* backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } i__1 = *m; for (j = 1; j <= i__1; ++j) { _starpu_dtpmv_(uplo, trans, "Non-unit", n, &bp[1], &z__[j * z_dim1 + 1], &c__1); /* L20: */ } } } return 0; /* End of DSPGVX */ } /* _starpu_dspgvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsposv.c000066400000000000000000000327431507764646700207230ustar00rootroot00000000000000/* dsposv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b10 = -1.; static doublereal c_b11 = 1.; static integer c__1 = 1; /* Subroutine */ int _starpu__starpu_dsposv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *work, real *swork, integer *iter, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, work_dim1, work_offset, x_dim1, x_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal cte, eps, anrm; integer ptsa; doublereal rnrm, xnrm; integer ptsx; extern logical _starpu_lsame_(char *, char *); integer iiter; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlag2s_(integer *, integer *, doublereal *, integer *, real *, integer *, integer *), _starpu_slag2d_(integer *, integer *, real *, integer *, doublereal *, integer *, integer *), _starpu_dlat2s_(char *, integer *, doublereal *, integer *, real *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dpotrf_(char *, integer *, doublereal *, integer *, integer *), _starpu_dpotrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_spotrf_(char *, integer *, real *, integer *, integer *), _starpu_spotrs_(char *, integer *, integer *, real *, integer *, real *, integer *, integer *); /* -- LAPACK PROTOTYPE driver routine (version 3.1.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */ /* May 2007 */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPOSV computes the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N symmetric positive definite matrix and X and B */ /* are N-by-NRHS matrices. */ /* DSPOSV first attempts to factorize the matrix in SINGLE PRECISION */ /* and use this factorization within an iterative refinement procedure */ /* to produce a solution with DOUBLE PRECISION normwise backward error */ /* quality (see below). If the approach fails the method switches to a */ /* DOUBLE PRECISION factorization and solve. */ /* The iterative refinement is not going to be a winning strategy if */ /* the ratio SINGLE PRECISION performance over DOUBLE PRECISION */ /* performance is too small. A reasonable strategy should take the */ /* number of right-hand sides and the size of the matrix into account. */ /* This might be done with a call to ILAENV in the future. Up to now, we */ /* always try iterative refinement. */ /* The iterative refinement process is stopped if */ /* ITER > ITERMAX */ /* or for all the RHS we have: */ /* RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX */ /* where */ /* o ITER is the number of the current iteration in the iterative */ /* refinement process */ /* o RNRM is the infinity-norm of the residual */ /* o XNRM is the infinity-norm of the solution */ /* o ANRM is the infinity-operator-norm of the matrix A */ /* o EPS is the machine epsilon returned by DLAMCH('Epsilon') */ /* The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 */ /* respectively. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input or input/ouptut) DOUBLE PRECISION array, */ /* dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if iterative refinement has been successfully used */ /* (INFO.EQ.0 and ITER.GE.0, see description below), then A is */ /* unchanged, if double precision factorization has been used */ /* (INFO.EQ.0 and ITER.LT.0, see description below), then the */ /* array A contains the factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The N-by-NRHS right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0, the N-by-NRHS solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N*NRHS) */ /* This array is used to hold the residual vectors. */ /* SWORK (workspace) REAL array, dimension (N*(N+NRHS)) */ /* This array is used to use the single precision matrix and the */ /* right-hand sides or solutions in single precision. */ /* ITER (output) INTEGER */ /* < 0: iterative refinement has failed, double precision */ /* factorization has been performed */ /* -1 : the routine fell back to full precision for */ /* implementation- or machine-specific reasons */ /* -2 : narrowing the precision induced an overflow, */ /* the routine fell back to full precision */ /* -3 : failure of SPOTRF */ /* -31: stop the iterative refinement after the 30th */ /* iterations */ /* > 0: iterative refinement has been sucessfully used. */ /* Returns the number of iterations */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i of (DOUBLE */ /* PRECISION) A is not positive definite, so the */ /* factorization could not be completed, and the solution */ /* has not been computed. */ /* ========= */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ work_dim1 = *n; work_offset = 1 + work_dim1; work -= work_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --swork; /* Function Body */ *info = 0; *iter = 0; /* Test the input parameters. */ if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldx < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPOSV", &i__1); return 0; } /* Quick return if (N.EQ.0). */ if (*n == 0) { return 0; } /* Skip single precision iterative refinement if a priori slower */ /* than double precision factorization. */ if (FALSE_) { *iter = -1; goto L40; } /* Compute some constants. */ anrm = _starpu_dlansy_("I", uplo, n, &a[a_offset], lda, &work[work_offset]); eps = _starpu_dlamch_("Epsilon"); cte = anrm * eps * sqrt((doublereal) (*n)) * 1.; /* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. */ ptsa = 1; ptsx = ptsa + *n * *n; /* Convert B from double precision to single precision and store the */ /* result in SX. */ _starpu_dlag2s_(n, nrhs, &b[b_offset], ldb, &swork[ptsx], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Convert A from double precision to single precision and store the */ /* result in SA. */ _starpu_dlat2s_(uplo, n, &a[a_offset], lda, &swork[ptsa], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Compute the Cholesky factorization of SA. */ _starpu_spotrf_(uplo, n, &swork[ptsa], n, info); if (*info != 0) { *iter = -3; goto L40; } /* Solve the system SA*SX = SB. */ _starpu_spotrs_(uplo, n, nrhs, &swork[ptsa], n, &swork[ptsx], n, info); /* Convert SX back to double precision */ _starpu_slag2d_(n, nrhs, &swork[ptsx], n, &x[x_offset], ldx, info); /* Compute R = B - AX (R is WORK). */ _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); _starpu_dsymm_("Left", uplo, n, nrhs, &c_b10, &a[a_offset], lda, &x[x_offset], ldx, &c_b11, &work[work_offset], n); /* Check whether the NRHS normwise backward errors satisfy the */ /* stopping criterion. If yes, set ITER=0 and return. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { xnrm = (d__1 = x[_starpu_idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1], abs(d__1)); rnrm = (d__1 = work[_starpu_idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1], abs(d__1)); if (rnrm > xnrm * cte) { goto L10; } } /* If we are here, the NRHS normwise backward errors satisfy the */ /* stopping criterion. We are good to exit. */ *iter = 0; return 0; L10: for (iiter = 1; iiter <= 30; ++iiter) { /* Convert R (in WORK) from double precision to single precision */ /* and store the result in SX. */ _starpu_dlag2s_(n, nrhs, &work[work_offset], n, &swork[ptsx], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Solve the system SA*SX = SR. */ _starpu_spotrs_(uplo, n, nrhs, &swork[ptsa], n, &swork[ptsx], n, info); /* Convert SX back to double precision and update the current */ /* iterate. */ _starpu_slag2d_(n, nrhs, &swork[ptsx], n, &work[work_offset], n, info); i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_daxpy_(n, &c_b11, &work[i__ * work_dim1 + 1], &c__1, &x[i__ * x_dim1 + 1], &c__1); } /* Compute R = B - AX (R is WORK). */ _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); _starpu_dsymm_("L", uplo, n, nrhs, &c_b10, &a[a_offset], lda, &x[x_offset], ldx, &c_b11, &work[work_offset], n); /* Check whether the NRHS normwise backward errors satisfy the */ /* stopping criterion. If yes, set ITER=IITER>0 and return. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { xnrm = (d__1 = x[_starpu_idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1], abs(d__1)); rnrm = (d__1 = work[_starpu_idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1], abs(d__1)); if (rnrm > xnrm * cte) { goto L20; } } /* If we are here, the NRHS normwise backward errors satisfy the */ /* stopping criterion, we are good to exit. */ *iter = iiter; return 0; L20: /* L30: */ ; } /* If we are at this place of the code, this is because we have */ /* performed ITER=ITERMAX iterations and never satisified the */ /* stopping criterion, set up the ITER flag accordingly and follow */ /* up on double precision routine. */ *iter = -31; L40: /* Single-precision iterative refinement failed to converge to a */ /* satisfactory solution, so we resort to double precision. */ _starpu_dpotrf_(uplo, n, &a[a_offset], lda, info); if (*info != 0) { return 0; } _starpu_dlacpy_("All", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dpotrs_(uplo, n, nrhs, &a[a_offset], lda, &x[x_offset], ldx, info); return 0; /* End of DSPOSV. */ } /* _starpu__starpu_dsposv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsprfs.c000066400000000000000000000305071507764646700207020ustar00rootroot00000000000000/* dsprfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = -1.; static doublereal c_b14 = 1.; /* Subroutine */ int _starpu_dsprfs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k; doublereal s; integer ik, kk; doublereal xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer count; extern /* Subroutine */ int _starpu_dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal lstres; extern /* Subroutine */ int _starpu_dsptrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPRFS improves the computed solution to a system of linear */ /* equations when the coefficient matrix is symmetric indefinite */ /* and packed, and provides error bounds and backward error estimates */ /* for the solution. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangle of the symmetric matrix A, packed */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* AFP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The factored form of the matrix A. AFP contains the block */ /* diagonal matrix D and the multipliers used to obtain the */ /* factor U or L from the factorization A = U*D*U**T or */ /* A = L*D*L**T as computed by DSPTRF, stored as a packed */ /* triangular matrix. */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSPTRF. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DSPTRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Internal Parameters */ /* =================== */ /* ITMAX is the maximum number of steps of iterative refinement. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --afp; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*ldx < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - A * X */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dspmv_(uplo, n, &c_b12, &ap[1], &x[j * x_dim1 + 1], &c__1, &c_b14, & work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L30: */ } /* Compute abs(A)*abs(X) + abs(B). */ kk = 1; if (upper) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); ik = kk; i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); ++ik; /* L40: */ } work[k] = work[k] + (d__1 = ap[kk + k - 1], abs(d__1)) * xk + s; kk += k; /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); work[k] += (d__1 = ap[kk], abs(d__1)) * xk; ik = kk + 1; i__3 = *n; for (i__ = k + 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); ++ik; /* L60: */ } work[k] += s; kk += *n - k + 1; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if */ /* 1) The residual BERR(J) is larger than machine epsilon, and */ /* 2) BERR(J) decreased by at least a factor of 2 during the */ /* last iteration, and */ /* 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ _starpu_dsptrs_(uplo, n, &c__1, &afp[1], &ipiv[1], &work[*n + 1], n, info); _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) ; lstres = berr[j]; ++count; goto L20; } /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(A))* */ /* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(A) is the inverse of A */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(A)*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(A) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(A'). */ _starpu_dsptrs_(uplo, n, &c__1, &afp[1], &ipiv[1], &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L110: */ } } else if (kase == 2) { /* Multiply by inv(A)*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L120: */ } _starpu_dsptrs_(uplo, n, &c__1, &afp[1], &ipiv[1], &work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DSPRFS */ } /* _starpu_dsprfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspsv.c000066400000000000000000000140211507764646700205310ustar00rootroot00000000000000/* dspsv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dspsv_(char *uplo, integer *n, integer *nrhs, doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dsptrf_( char *, integer *, doublereal *, integer *, integer *), _starpu_dsptrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPSV computes the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N symmetric matrix stored in packed format and X */ /* and B are N-by-NRHS matrices. */ /* The diagonal pivoting method is used to factor A as */ /* A = U * D * U**T, if UPLO = 'U', or */ /* A = L * D * L**T, if UPLO = 'L', */ /* where U (or L) is a product of permutation and unit upper (lower) */ /* triangular matrices, D is symmetric and block diagonal with 1-by-1 */ /* and 2-by-2 diagonal blocks. The factored form of A is then used to */ /* solve the system of equations A * X = B. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* See below for further details. */ /* On exit, the block diagonal matrix D and the multipliers used */ /* to obtain the factor U or L from the factorization */ /* A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as */ /* a packed triangular matrix in the same storage format as A. */ /* IPIV (output) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D, as */ /* determined by DSPTRF. If IPIV(k) > 0, then rows and columns */ /* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 */ /* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, */ /* then rows and columns k-1 and -IPIV(k) were interchanged and */ /* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and */ /* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and */ /* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 */ /* diagonal block. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ /* has been completed, but the block diagonal matrix D is */ /* exactly singular, so the solution could not be */ /* computed. */ /* Further Details */ /* =============== */ /* The packed storage scheme is illustrated by the following example */ /* when N = 4, UPLO = 'U': */ /* Two-dimensional storage of the symmetric matrix A: */ /* a11 a12 a13 a14 */ /* a22 a23 a24 */ /* a33 a34 (aij = aji) */ /* a44 */ /* Packed storage of the upper triangle of A: */ /* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ /* ===================================================================== */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPSV ", &i__1); return 0; } /* Compute the factorization A = U*D*U' or A = L*D*L'. */ _starpu_dsptrf_(uplo, n, &ap[1], &ipiv[1], info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ _starpu_dsptrs_(uplo, n, nrhs, &ap[1], &ipiv[1], &b[b_offset], ldb, info); } return 0; /* End of DSPSV */ } /* _starpu_dspsv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dspsvx.c000066400000000000000000000306621507764646700207320ustar00rootroot00000000000000/* dspsvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dspsvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); doublereal anorm; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal _starpu_dlamch_(char *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern doublereal _starpu_dlansp_(char *, char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dspcon_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dsprfs_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dsptrf_(char *, integer *, doublereal *, integer *, integer *), _starpu_dsptrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPSVX uses the diagonal pivoting factorization A = U*D*U**T or */ /* A = L*D*L**T to compute the solution to a real system of linear */ /* equations A * X = B, where A is an N-by-N symmetric matrix stored */ /* in packed format and X and B are N-by-NRHS matrices. */ /* Error bounds on the solution and a condition estimate are also */ /* provided. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'N', the diagonal pivoting method is used to factor A as */ /* A = U * D * U**T, if UPLO = 'U', or */ /* A = L * D * L**T, if UPLO = 'L', */ /* where U (or L) is a product of permutation and unit upper (lower) */ /* triangular matrices and D is symmetric and block diagonal with */ /* 1-by-1 and 2-by-2 diagonal blocks. */ /* 2. If some D(i,i)=0, so that D is exactly singular, then the routine */ /* returns with INFO = i. Otherwise, the factored form of A is used */ /* to estimate the condition number of the matrix A. If the */ /* reciprocal of the condition number is less than machine precision, */ /* INFO = N+1 is returned as a warning, but the routine still goes on */ /* to solve for X and compute error bounds as described below. */ /* 3. The system of equations is solved for X using the factored form */ /* of A. */ /* 4. Iterative refinement is applied to improve the computed solution */ /* matrix and calculate error bounds and backward error estimates */ /* for it. */ /* Arguments */ /* ========= */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of A has been */ /* supplied on entry. */ /* = 'F': On entry, AFP and IPIV contain the factored form of */ /* A. AP, AFP and IPIV will not be modified. */ /* = 'N': The matrix A will be copied to AFP and factored. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangle of the symmetric matrix A, packed */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* See below for further details. */ /* AFP (input or output) DOUBLE PRECISION array, dimension */ /* (N*(N+1)/2) */ /* If FACT = 'F', then AFP is an input argument and on entry */ /* contains the block diagonal matrix D and the multipliers used */ /* to obtain the factor U or L from the factorization */ /* A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as */ /* a packed triangular matrix in the same storage format as A. */ /* If FACT = 'N', then AFP is an output argument and on exit */ /* contains the block diagonal matrix D and the multipliers used */ /* to obtain the factor U or L from the factorization */ /* A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as */ /* a packed triangular matrix in the same storage format as A. */ /* IPIV (input or output) INTEGER array, dimension (N) */ /* If FACT = 'F', then IPIV is an input argument and on entry */ /* contains details of the interchanges and the block structure */ /* of D, as determined by DSPTRF. */ /* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ /* interchanged and D(k,k) is a 1-by-1 diagonal block. */ /* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ /* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ /* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ /* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ /* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ /* If FACT = 'N', then IPIV is an output argument and on exit */ /* contains details of the interchanges and the block structure */ /* of D, as determined by DSPTRF. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The N-by-NRHS right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The estimate of the reciprocal condition number of the matrix */ /* A. If RCOND is less than the machine precision (in */ /* particular, if RCOND = 0), the matrix is singular to working */ /* precision. This condition is indicated by a return code of */ /* INFO > 0. */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= N: D(i,i) is exactly zero. The factorization */ /* has been completed but the factor D is exactly */ /* singular, so the solution and error bounds could */ /* not be computed. RCOND = 0 is returned. */ /* = N+1: D is nonsingular, but RCOND is less than machine */ /* precision, meaning that the matrix is singular */ /* to working precision. Nevertheless, the */ /* solution and error bounds are computed because */ /* there are a number of situations where the */ /* computed solution can be more accurate than the */ /* value of RCOND would suggest. */ /* Further Details */ /* =============== */ /* The packed storage scheme is illustrated by the following example */ /* when N = 4, UPLO = 'U': */ /* Two-dimensional storage of the symmetric matrix A: */ /* a11 a12 a13 a14 */ /* a22 a23 a24 */ /* a33 a34 (aij = aji) */ /* a44 */ /* Packed storage of the upper triangle of A: */ /* AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --afp; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); if (! nofact && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldx < max(1,*n)) { *info = -11; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPSVX", &i__1); return 0; } if (nofact) { /* Compute the factorization A = U*D*U' or A = L*D*L'. */ i__1 = *n * (*n + 1) / 2; _starpu_dcopy_(&i__1, &ap[1], &c__1, &afp[1], &c__1); _starpu_dsptrf_(uplo, n, &afp[1], &ipiv[1], info); /* Return if INFO is non-zero. */ if (*info > 0) { *rcond = 0.; return 0; } } /* Compute the norm of the matrix A. */ anorm = _starpu_dlansp_("I", uplo, n, &ap[1], &work[1]); /* Compute the reciprocal of the condition number of A. */ _starpu_dspcon_(uplo, n, &afp[1], &ipiv[1], &anorm, rcond, &work[1], &iwork[1], info); /* Compute the solution vectors X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dsptrs_(uplo, n, nrhs, &afp[1], &ipiv[1], &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solutions and */ /* compute error bounds and backward error estimates for them. */ _starpu_dsprfs_(uplo, n, nrhs, &ap[1], &afp[1], &ipiv[1], &b[b_offset], ldb, &x[ x_offset], ldx, &ferr[1], &berr[1], &work[1], &iwork[1], info); /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < _starpu_dlamch_("Epsilon")) { *info = *n + 1; } return 0; /* End of DSPSVX */ } /* _starpu_dspsvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsptrd.c000066400000000000000000000204141507764646700206750ustar00rootroot00000000000000/* dsptrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = 0.; static doublereal c_b14 = -1.; /* Subroutine */ int _starpu_dsptrd_(char *uplo, integer *n, doublereal *ap, doublereal *d__, doublereal *e, doublereal *tau, integer *info) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer i__, i1, ii, i1i1; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal taui; extern /* Subroutine */ int _starpu_dspr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *); doublereal alpha; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPTRD reduces a real symmetric matrix A stored in packed form to */ /* symmetric tridiagonal form T by an orthogonal similarity */ /* transformation: Q**T * A * Q = T. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, if UPLO = 'U', the diagonal and first superdiagonal */ /* of A are overwritten by the corresponding elements of the */ /* tridiagonal matrix T, and the elements above the first */ /* superdiagonal, with the array TAU, represent the orthogonal */ /* matrix Q as a product of elementary reflectors; if UPLO */ /* = 'L', the diagonal and first subdiagonal of A are over- */ /* written by the corresponding elements of the tridiagonal */ /* matrix T, and the elements below the first subdiagonal, with */ /* the array TAU, represent the orthogonal matrix Q as a product */ /* of elementary reflectors. See Further Details. */ /* D (output) DOUBLE PRECISION array, dimension (N) */ /* The diagonal elements of the tridiagonal matrix T: */ /* D(i) = A(i,i). */ /* E (output) DOUBLE PRECISION array, dimension (N-1) */ /* The off-diagonal elements of the tridiagonal matrix T: */ /* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ /* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* If UPLO = 'U', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(n-1) . . . H(2) H(1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, */ /* overwriting A(1:i-1,i+1), and tau is stored in TAU(i). */ /* If UPLO = 'L', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(1) H(2) . . . H(n-1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, */ /* overwriting A(i+2:n,i), and tau is stored in TAU(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ --tau; --e; --d__; --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPTRD", &i__1); return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } if (upper) { /* Reduce the upper triangle of A. */ /* I1 is the index in AP of A(1,I+1). */ i1 = *n * (*n - 1) / 2 + 1; for (i__ = *n - 1; i__ >= 1; --i__) { /* Generate elementary reflector H(i) = I - tau * v * v' */ /* to annihilate A(1:i-1,i+1) */ _starpu_dlarfg_(&i__, &ap[i1 + i__ - 1], &ap[i1], &c__1, &taui); e[i__] = ap[i1 + i__ - 1]; if (taui != 0.) { /* Apply H(i) from both sides to A(1:i,1:i) */ ap[i1 + i__ - 1] = 1.; /* Compute y := tau * A * v storing y in TAU(1:i) */ _starpu_dspmv_(uplo, &i__, &taui, &ap[1], &ap[i1], &c__1, &c_b8, &tau[ 1], &c__1); /* Compute w := y - 1/2 * tau * (y'*v) * v */ alpha = taui * -.5 * _starpu_ddot_(&i__, &tau[1], &c__1, &ap[i1], & c__1); _starpu_daxpy_(&i__, &alpha, &ap[i1], &c__1, &tau[1], &c__1); /* Apply the transformation as a rank-2 update: */ /* A := A - v * w' - w * v' */ _starpu_dspr2_(uplo, &i__, &c_b14, &ap[i1], &c__1, &tau[1], &c__1, & ap[1]); ap[i1 + i__ - 1] = e[i__]; } d__[i__ + 1] = ap[i1 + i__]; tau[i__] = taui; i1 -= i__; /* L10: */ } d__[1] = ap[1]; } else { /* Reduce the lower triangle of A. II is the index in AP of */ /* A(i,i) and I1I1 is the index of A(i+1,i+1). */ ii = 1; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { i1i1 = ii + *n - i__ + 1; /* Generate elementary reflector H(i) = I - tau * v * v' */ /* to annihilate A(i+2:n,i) */ i__2 = *n - i__; _starpu_dlarfg_(&i__2, &ap[ii + 1], &ap[ii + 2], &c__1, &taui); e[i__] = ap[ii + 1]; if (taui != 0.) { /* Apply H(i) from both sides to A(i+1:n,i+1:n) */ ap[ii + 1] = 1.; /* Compute y := tau * A * v storing y in TAU(i:n-1) */ i__2 = *n - i__; _starpu_dspmv_(uplo, &i__2, &taui, &ap[i1i1], &ap[ii + 1], &c__1, & c_b8, &tau[i__], &c__1); /* Compute w := y - 1/2 * tau * (y'*v) * v */ i__2 = *n - i__; alpha = taui * -.5 * _starpu_ddot_(&i__2, &tau[i__], &c__1, &ap[ii + 1], &c__1); i__2 = *n - i__; _starpu_daxpy_(&i__2, &alpha, &ap[ii + 1], &c__1, &tau[i__], &c__1); /* Apply the transformation as a rank-2 update: */ /* A := A - v * w' - w * v' */ i__2 = *n - i__; _starpu_dspr2_(uplo, &i__2, &c_b14, &ap[ii + 1], &c__1, &tau[i__], & c__1, &ap[i1i1]); ap[ii + 1] = e[i__]; } d__[i__] = ap[ii]; tau[i__] = taui; ii = i1i1; /* L20: */ } d__[*n] = ap[ii]; } return 0; /* End of DSPTRD */ } /* _starpu_dsptrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsptrf.c000066400000000000000000000415641507764646700207100ustar00rootroot00000000000000/* dsptrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dsptrf_(char *uplo, integer *n, doublereal *ap, integer * ipiv, integer *info) { /* System generated locals */ integer i__1, i__2; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k; doublereal t, r1, d11, d12, d21, d22; integer kc, kk, kp; doublereal wk; integer kx, knc, kpc, npp; doublereal wkm1, wkp1; integer imax, jmax; extern /* Subroutine */ int _starpu_dspr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *); doublereal alpha; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer kstep; logical upper; doublereal absakk; extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal colmax, rowmax; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPTRF computes the factorization of a real symmetric matrix A stored */ /* in packed format using the Bunch-Kaufman diagonal pivoting method: */ /* A = U*D*U**T or A = L*D*L**T */ /* where U (or L) is a product of permutation and unit upper (lower) */ /* triangular matrices, and D is symmetric and block diagonal with */ /* 1-by-1 and 2-by-2 diagonal blocks. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, the block diagonal matrix D and the multipliers used */ /* to obtain the factor U or L, stored as a packed triangular */ /* matrix overwriting A (see below for further details). */ /* IPIV (output) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D. */ /* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ /* interchanged and D(k,k) is a 1-by-1 diagonal block. */ /* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ /* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ /* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ /* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ /* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ /* has been completed, but the block diagonal matrix D is */ /* exactly singular, and division by zero will occur if it */ /* is used to solve a system of equations. */ /* Further Details */ /* =============== */ /* 5-96 - Based on modifications by J. Lewis, Boeing Computer Services */ /* Company */ /* If UPLO = 'U', then A = U*D*U', where */ /* U = P(n)*U(n)* ... *P(k)U(k)* ..., */ /* i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ /* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ /* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ /* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ /* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ /* ( I v 0 ) k-s */ /* U(k) = ( 0 I 0 ) s */ /* ( 0 0 I ) n-k */ /* k-s s n-k */ /* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ /* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ /* and A(k,k), and v overwrites A(1:k-2,k-1:k). */ /* If UPLO = 'L', then A = L*D*L', where */ /* L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ /* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ /* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ /* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ /* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ /* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ /* ( I 0 0 ) k-1 */ /* L(k) = ( 0 I 0 ) s */ /* ( 0 v I ) n-k-s+1 */ /* k-1 s n-k-s+1 */ /* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ /* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ /* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ipiv; --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPTRF", &i__1); return 0; } /* Initialize ALPHA for use in choosing pivot block size. */ alpha = (sqrt(17.) + 1.) / 8.; if (upper) { /* Factorize A as U*D*U' using the upper triangle of A */ /* K is the main loop index, decreasing from N to 1 in steps of */ /* 1 or 2 */ k = *n; kc = (*n - 1) * *n / 2 + 1; L10: knc = kc; /* If K < 1, exit from loop */ if (k < 1) { goto L110; } kstep = 1; /* Determine rows and columns to be interchanged and whether */ /* a 1-by-1 or 2-by-2 pivot block will be used */ absakk = (d__1 = ap[kc + k - 1], abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in */ /* column K, and COLMAX is its absolute value */ if (k > 1) { i__1 = k - 1; imax = _starpu_idamax_(&i__1, &ap[kc], &c__1); colmax = (d__1 = ap[kc + imax - 1], abs(d__1)); } else { colmax = 0.; } if (max(absakk,colmax) == 0.) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* JMAX is the column-index of the largest off-diagonal */ /* element in row IMAX, and ROWMAX is its absolute value */ rowmax = 0.; jmax = imax; kx = imax * (imax + 1) / 2 + imax; i__1 = k; for (j = imax + 1; j <= i__1; ++j) { if ((d__1 = ap[kx], abs(d__1)) > rowmax) { rowmax = (d__1 = ap[kx], abs(d__1)); jmax = j; } kx += j; /* L20: */ } kpc = (imax - 1) * imax / 2 + 1; if (imax > 1) { i__1 = imax - 1; jmax = _starpu_idamax_(&i__1, &ap[kpc], &c__1); /* Computing MAX */ d__2 = rowmax, d__3 = (d__1 = ap[kpc + jmax - 1], abs( d__1)); rowmax = max(d__2,d__3); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else if ((d__1 = ap[kpc + imax - 1], abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 */ /* pivot block */ kp = imax; } else { /* interchange rows and columns K-1 and IMAX, use 2-by-2 */ /* pivot block */ kp = imax; kstep = 2; } } kk = k - kstep + 1; if (kstep == 2) { knc = knc - k + 1; } if (kp != kk) { /* Interchange rows and columns KK and KP in the leading */ /* submatrix A(1:k,1:k) */ i__1 = kp - 1; _starpu_dswap_(&i__1, &ap[knc], &c__1, &ap[kpc], &c__1); kx = kpc + kp - 1; i__1 = kk - 1; for (j = kp + 1; j <= i__1; ++j) { kx = kx + j - 1; t = ap[knc + j - 1]; ap[knc + j - 1] = ap[kx]; ap[kx] = t; /* L30: */ } t = ap[knc + kk - 1]; ap[knc + kk - 1] = ap[kpc + kp - 1]; ap[kpc + kp - 1] = t; if (kstep == 2) { t = ap[kc + k - 2]; ap[kc + k - 2] = ap[kc + kp - 1]; ap[kc + kp - 1] = t; } } /* Update the leading submatrix */ if (kstep == 1) { /* 1-by-1 pivot block D(k): column k now holds */ /* W(k) = U(k)*D(k) */ /* where U(k) is the k-th column of U */ /* Perform a rank-1 update of A(1:k-1,1:k-1) as */ /* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' */ r1 = 1. / ap[kc + k - 1]; i__1 = k - 1; d__1 = -r1; _starpu_dspr_(uplo, &i__1, &d__1, &ap[kc], &c__1, &ap[1]); /* Store U(k) in column k */ i__1 = k - 1; _starpu_dscal_(&i__1, &r1, &ap[kc], &c__1); } else { /* 2-by-2 pivot block D(k): columns k and k-1 now hold */ /* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ /* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ /* of U */ /* Perform a rank-2 update of A(1:k-2,1:k-2) as */ /* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' */ /* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' */ if (k > 2) { d12 = ap[k - 1 + (k - 1) * k / 2]; d22 = ap[k - 1 + (k - 2) * (k - 1) / 2] / d12; d11 = ap[k + (k - 1) * k / 2] / d12; t = 1. / (d11 * d22 - 1.); d12 = t / d12; for (j = k - 2; j >= 1; --j) { wkm1 = d12 * (d11 * ap[j + (k - 2) * (k - 1) / 2] - ap[j + (k - 1) * k / 2]); wk = d12 * (d22 * ap[j + (k - 1) * k / 2] - ap[j + (k - 2) * (k - 1) / 2]); for (i__ = j; i__ >= 1; --i__) { ap[i__ + (j - 1) * j / 2] = ap[i__ + (j - 1) * j / 2] - ap[i__ + (k - 1) * k / 2] * wk - ap[ i__ + (k - 2) * (k - 1) / 2] * wkm1; /* L40: */ } ap[j + (k - 1) * k / 2] = wk; ap[j + (k - 2) * (k - 1) / 2] = wkm1; /* L50: */ } } } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k - 1] = -kp; } /* Decrease K and return to the start of the main loop */ k -= kstep; kc = knc - k; goto L10; } else { /* Factorize A as L*D*L' using the lower triangle of A */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2 */ k = 1; kc = 1; npp = *n * (*n + 1) / 2; L60: knc = kc; /* If K > N, exit from loop */ if (k > *n) { goto L110; } kstep = 1; /* Determine rows and columns to be interchanged and whether */ /* a 1-by-1 or 2-by-2 pivot block will be used */ absakk = (d__1 = ap[kc], abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in */ /* column K, and COLMAX is its absolute value */ if (k < *n) { i__1 = *n - k; imax = k + _starpu_idamax_(&i__1, &ap[kc + 1], &c__1); colmax = (d__1 = ap[kc + imax - k], abs(d__1)); } else { colmax = 0.; } if (max(absakk,colmax) == 0.) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* JMAX is the column-index of the largest off-diagonal */ /* element in row IMAX, and ROWMAX is its absolute value */ rowmax = 0.; kx = kc + imax - k; i__1 = imax - 1; for (j = k; j <= i__1; ++j) { if ((d__1 = ap[kx], abs(d__1)) > rowmax) { rowmax = (d__1 = ap[kx], abs(d__1)); jmax = j; } kx = kx + *n - j; /* L70: */ } kpc = npp - (*n - imax + 1) * (*n - imax + 2) / 2 + 1; if (imax < *n) { i__1 = *n - imax; jmax = imax + _starpu_idamax_(&i__1, &ap[kpc + 1], &c__1); /* Computing MAX */ d__2 = rowmax, d__3 = (d__1 = ap[kpc + jmax - imax], abs( d__1)); rowmax = max(d__2,d__3); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else if ((d__1 = ap[kpc], abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 */ /* pivot block */ kp = imax; } else { /* interchange rows and columns K+1 and IMAX, use 2-by-2 */ /* pivot block */ kp = imax; kstep = 2; } } kk = k + kstep - 1; if (kstep == 2) { knc = knc + *n - k + 1; } if (kp != kk) { /* Interchange rows and columns KK and KP in the trailing */ /* submatrix A(k:n,k:n) */ if (kp < *n) { i__1 = *n - kp; _starpu_dswap_(&i__1, &ap[knc + kp - kk + 1], &c__1, &ap[kpc + 1], &c__1); } kx = knc + kp - kk; i__1 = kp - 1; for (j = kk + 1; j <= i__1; ++j) { kx = kx + *n - j + 1; t = ap[knc + j - kk]; ap[knc + j - kk] = ap[kx]; ap[kx] = t; /* L80: */ } t = ap[knc]; ap[knc] = ap[kpc]; ap[kpc] = t; if (kstep == 2) { t = ap[kc + 1]; ap[kc + 1] = ap[kc + kp - k]; ap[kc + kp - k] = t; } } /* Update the trailing submatrix */ if (kstep == 1) { /* 1-by-1 pivot block D(k): column k now holds */ /* W(k) = L(k)*D(k) */ /* where L(k) is the k-th column of L */ if (k < *n) { /* Perform a rank-1 update of A(k+1:n,k+1:n) as */ /* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' */ r1 = 1. / ap[kc]; i__1 = *n - k; d__1 = -r1; _starpu_dspr_(uplo, &i__1, &d__1, &ap[kc + 1], &c__1, &ap[kc + *n - k + 1]); /* Store L(k) in column K */ i__1 = *n - k; _starpu_dscal_(&i__1, &r1, &ap[kc + 1], &c__1); } } else { /* 2-by-2 pivot block D(k): columns K and K+1 now hold */ /* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */ /* where L(k) and L(k+1) are the k-th and (k+1)-th columns */ /* of L */ if (k < *n - 1) { /* Perform a rank-2 update of A(k+2:n,k+2:n) as */ /* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )' */ /* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )' */ d21 = ap[k + 1 + (k - 1) * ((*n << 1) - k) / 2]; d11 = ap[k + 1 + k * ((*n << 1) - k - 1) / 2] / d21; d22 = ap[k + (k - 1) * ((*n << 1) - k) / 2] / d21; t = 1. / (d11 * d22 - 1.); d21 = t / d21; i__1 = *n; for (j = k + 2; j <= i__1; ++j) { wk = d21 * (d11 * ap[j + (k - 1) * ((*n << 1) - k) / 2] - ap[j + k * ((*n << 1) - k - 1) / 2]); wkp1 = d21 * (d22 * ap[j + k * ((*n << 1) - k - 1) / 2] - ap[j + (k - 1) * ((*n << 1) - k) / 2]); i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ap[i__ + (j - 1) * ((*n << 1) - j) / 2] = ap[i__ + (j - 1) * ((*n << 1) - j) / 2] - ap[i__ + (k - 1) * ((*n << 1) - k) / 2] * wk - ap[i__ + k * ((*n << 1) - k - 1) / 2] * wkp1; /* L90: */ } ap[j + (k - 1) * ((*n << 1) - k) / 2] = wk; ap[j + k * ((*n << 1) - k - 1) / 2] = wkp1; /* L100: */ } } } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k + 1] = -kp; } /* Increase K and return to the start of the main loop */ k += kstep; kc = knc + *n - k + 2; goto L60; } L110: return 0; /* End of DSPTRF */ } /* _starpu_dsptrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsptri.c000066400000000000000000000250461507764646700207100ustar00rootroot00000000000000/* dsptri.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b11 = -1.; static doublereal c_b13 = 0.; /* Subroutine */ int _starpu_dsptri_(char *uplo, integer *n, doublereal *ap, integer * ipiv, doublereal *work, integer *info) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ doublereal d__; integer j, k; doublereal t, ak; integer kc, kp, kx, kpc, npp; doublereal akp1; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal temp, akkp1; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer kstep; extern /* Subroutine */ int _starpu_dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer kcnext; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPTRI computes the inverse of a real symmetric indefinite matrix */ /* A in packed storage using the factorization A = U*D*U**T or */ /* A = L*D*L**T computed by DSPTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the details of the factorization are stored */ /* as an upper or lower triangular matrix. */ /* = 'U': Upper triangular, form is A = U*D*U**T; */ /* = 'L': Lower triangular, form is A = L*D*L**T. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the block diagonal matrix D and the multipliers */ /* used to obtain the factor U or L as computed by DSPTRF, */ /* stored as a packed triangular matrix. */ /* On exit, if INFO = 0, the (symmetric) inverse of the original */ /* matrix, stored as a packed triangular matrix. The j-th column */ /* of inv(A) is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; */ /* if UPLO = 'L', */ /* AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSPTRF. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its */ /* inverse could not be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --work; --ipiv; --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ kp = *n * (*n + 1) / 2; for (*info = *n; *info >= 1; --(*info)) { if (ipiv[*info] > 0 && ap[kp] == 0.) { return 0; } kp -= *info; /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ kp = 1; i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ipiv[*info] > 0 && ap[kp] == 0.) { return 0; } kp = kp + *n - *info + 1; /* L20: */ } } *info = 0; if (upper) { /* Compute inv(A) from the factorization A = U*D*U'. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L30: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } kcnext = kc + k; if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Invert the diagonal block. */ ap[kc + k - 1] = 1. / ap[kc + k - 1]; /* Compute column K of the inverse. */ if (k > 1) { i__1 = k - 1; _starpu_dcopy_(&i__1, &ap[kc], &c__1, &work[1], &c__1); i__1 = k - 1; _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[1], &work[1], &c__1, &c_b13, & ap[kc], &c__1); i__1 = k - 1; ap[kc + k - 1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kc], & c__1); } kstep = 1; } else { /* 2 x 2 diagonal block */ /* Invert the diagonal block. */ t = (d__1 = ap[kcnext + k - 1], abs(d__1)); ak = ap[kc + k - 1] / t; akp1 = ap[kcnext + k] / t; akkp1 = ap[kcnext + k - 1] / t; d__ = t * (ak * akp1 - 1.); ap[kc + k - 1] = akp1 / d__; ap[kcnext + k] = ak / d__; ap[kcnext + k - 1] = -akkp1 / d__; /* Compute columns K and K+1 of the inverse. */ if (k > 1) { i__1 = k - 1; _starpu_dcopy_(&i__1, &ap[kc], &c__1, &work[1], &c__1); i__1 = k - 1; _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[1], &work[1], &c__1, &c_b13, & ap[kc], &c__1); i__1 = k - 1; ap[kc + k - 1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kc], & c__1); i__1 = k - 1; ap[kcnext + k - 1] -= _starpu_ddot_(&i__1, &ap[kc], &c__1, &ap[kcnext] , &c__1); i__1 = k - 1; _starpu_dcopy_(&i__1, &ap[kcnext], &c__1, &work[1], &c__1); i__1 = k - 1; _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[1], &work[1], &c__1, &c_b13, & ap[kcnext], &c__1); i__1 = k - 1; ap[kcnext + k] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kcnext], & c__1); } kstep = 2; kcnext = kcnext + k + 1; } kp = (i__1 = ipiv[k], abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the leading */ /* submatrix A(1:k+1,1:k+1) */ kpc = (kp - 1) * kp / 2 + 1; i__1 = kp - 1; _starpu_dswap_(&i__1, &ap[kc], &c__1, &ap[kpc], &c__1); kx = kpc + kp - 1; i__1 = k - 1; for (j = kp + 1; j <= i__1; ++j) { kx = kx + j - 1; temp = ap[kc + j - 1]; ap[kc + j - 1] = ap[kx]; ap[kx] = temp; /* L40: */ } temp = ap[kc + k - 1]; ap[kc + k - 1] = ap[kpc + kp - 1]; ap[kpc + kp - 1] = temp; if (kstep == 2) { temp = ap[kc + k + k - 1]; ap[kc + k + k - 1] = ap[kc + k + kp - 1]; ap[kc + k + kp - 1] = temp; } } k += kstep; kc = kcnext; goto L30; L50: ; } else { /* Compute inv(A) from the factorization A = L*D*L'. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ npp = *n * (*n + 1) / 2; k = *n; kc = npp; L60: /* If K < 1, exit from loop. */ if (k < 1) { goto L80; } kcnext = kc - (*n - k + 2); if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Invert the diagonal block. */ ap[kc] = 1. / ap[kc]; /* Compute column K of the inverse. */ if (k < *n) { i__1 = *n - k; _starpu_dcopy_(&i__1, &ap[kc + 1], &c__1, &work[1], &c__1); i__1 = *n - k; _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[kc + *n - k + 1], &work[1], & c__1, &c_b13, &ap[kc + 1], &c__1); i__1 = *n - k; ap[kc] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kc + 1], &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block */ /* Invert the diagonal block. */ t = (d__1 = ap[kcnext + 1], abs(d__1)); ak = ap[kcnext] / t; akp1 = ap[kc] / t; akkp1 = ap[kcnext + 1] / t; d__ = t * (ak * akp1 - 1.); ap[kcnext] = akp1 / d__; ap[kc] = ak / d__; ap[kcnext + 1] = -akkp1 / d__; /* Compute columns K-1 and K of the inverse. */ if (k < *n) { i__1 = *n - k; _starpu_dcopy_(&i__1, &ap[kc + 1], &c__1, &work[1], &c__1); i__1 = *n - k; _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[kc + (*n - k + 1)], &work[1], &c__1, &c_b13, &ap[kc + 1], &c__1); i__1 = *n - k; ap[kc] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kc + 1], &c__1); i__1 = *n - k; ap[kcnext + 1] -= _starpu_ddot_(&i__1, &ap[kc + 1], &c__1, &ap[kcnext + 2], &c__1); i__1 = *n - k; _starpu_dcopy_(&i__1, &ap[kcnext + 2], &c__1, &work[1], &c__1); i__1 = *n - k; _starpu_dspmv_(uplo, &i__1, &c_b11, &ap[kc + (*n - k + 1)], &work[1], &c__1, &c_b13, &ap[kcnext + 2], &c__1); i__1 = *n - k; ap[kcnext] -= _starpu_ddot_(&i__1, &work[1], &c__1, &ap[kcnext + 2], & c__1); } kstep = 2; kcnext -= *n - k + 3; } kp = (i__1 = ipiv[k], abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the trailing */ /* submatrix A(k-1:n,k-1:n) */ kpc = npp - (*n - kp + 1) * (*n - kp + 2) / 2 + 1; if (kp < *n) { i__1 = *n - kp; _starpu_dswap_(&i__1, &ap[kc + kp - k + 1], &c__1, &ap[kpc + 1], & c__1); } kx = kc + kp - k; i__1 = kp - 1; for (j = k + 1; j <= i__1; ++j) { kx = kx + *n - j + 1; temp = ap[kc + j - k]; ap[kc + j - k] = ap[kx]; ap[kx] = temp; /* L70: */ } temp = ap[kc]; ap[kc] = ap[kpc]; ap[kpc] = temp; if (kstep == 2) { temp = ap[kc - *n + k - 1]; ap[kc - *n + k - 1] = ap[kc - *n + kp - 1]; ap[kc - *n + kp - 1] = temp; } } k -= kstep; kc = kcnext; goto L60; L80: ; } return 0; /* End of DSPTRI */ } /* _starpu_dsptri_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsptrs.c000066400000000000000000000272711507764646700207240ustar00rootroot00000000000000/* dsptrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b7 = -1.; static integer c__1 = 1; static doublereal c_b19 = 1.; /* Subroutine */ int _starpu_dsptrs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer * info) { /* System generated locals */ integer b_dim1, b_offset, i__1; doublereal d__1; /* Local variables */ integer j, k; doublereal ak, bk; integer kc, kp; doublereal akm1, bkm1; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal akm1k; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); doublereal denom; extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPTRS solves a system of linear equations A*X = B with a real */ /* symmetric matrix A stored in packed format using the factorization */ /* A = U*D*U**T or A = L*D*L**T computed by DSPTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the details of the factorization are stored */ /* as an upper or lower triangular matrix. */ /* = 'U': Upper triangular, form is A = U*D*U**T; */ /* = 'L': Lower triangular, form is A = L*D*L**T. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The block diagonal matrix D and the multipliers used to */ /* obtain the factor U or L as computed by DSPTRF, stored as a */ /* packed triangular matrix. */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSPTRF. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ap; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSPTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B, where A = U*D*U'. */ /* First solve U*D*X = B, overwriting B with X. */ /* K is the main loop index, decreasing from N to 1 in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = *n; kc = *n * (*n + 1) / 2 + 1; L10: /* If K < 1, exit from loop. */ if (k < 1) { goto L30; } kc -= k; if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation */ /* stored in column K of A. */ i__1 = k - 1; _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc], &c__1, &b[k + b_dim1], ldb, &b[ b_dim1 + 1], ldb); /* Multiply by the inverse of the diagonal block. */ d__1 = 1. / ap[kc + k - 1]; _starpu_dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); --k; } else { /* 2 x 2 diagonal block */ /* Interchange rows K-1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k - 1) { _starpu_dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation */ /* stored in columns K-1 and K of A. */ i__1 = k - 2; _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc], &c__1, &b[k + b_dim1], ldb, &b[ b_dim1 + 1], ldb); i__1 = k - 2; _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc - (k - 1)], &c__1, &b[k - 1 + b_dim1], ldb, &b[b_dim1 + 1], ldb); /* Multiply by the inverse of the diagonal block. */ akm1k = ap[kc + k - 2]; akm1 = ap[kc - 1] / akm1k; ak = ap[kc + k - 1] / akm1k; denom = akm1 * ak - 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bkm1 = b[k - 1 + j * b_dim1] / akm1k; bk = b[k + j * b_dim1] / akm1k; b[k - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; b[k + j * b_dim1] = (akm1 * bk - bkm1) / denom; /* L20: */ } kc = kc - k + 1; k += -2; } goto L10; L30: /* Next solve U'*X = B, overwriting B with X. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L40: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Multiply by inv(U'(K)), where U(K) is the transformation */ /* stored in column K of A. */ i__1 = k - 1; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &ap[kc] , &c__1, &c_b19, &b[k + b_dim1], ldb); /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } kc += k; ++k; } else { /* 2 x 2 diagonal block */ /* Multiply by inv(U'(K+1)), where U(K+1) is the transformation */ /* stored in columns K and K+1 of A. */ i__1 = k - 1; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &ap[kc] , &c__1, &c_b19, &b[k + b_dim1], ldb); i__1 = k - 1; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &ap[kc + k], &c__1, &c_b19, &b[k + 1 + b_dim1], ldb); /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } kc = kc + (k << 1) + 1; k += 2; } goto L40; L50: ; } else { /* Solve A*X = B, where A = L*D*L'. */ /* First solve L*D*X = B, overwriting B with X. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L60: /* If K > N, exit from loop. */ if (k > *n) { goto L80; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation */ /* stored in column K of A. */ if (k < *n) { i__1 = *n - k; _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc + 1], &c__1, &b[k + b_dim1], ldb, &b[k + 1 + b_dim1], ldb); } /* Multiply by the inverse of the diagonal block. */ d__1 = 1. / ap[kc]; _starpu_dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); kc = kc + *n - k + 1; ++k; } else { /* 2 x 2 diagonal block */ /* Interchange rows K+1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k + 1) { _starpu_dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation */ /* stored in columns K and K+1 of A. */ if (k < *n - 1) { i__1 = *n - k - 1; _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc + 2], &c__1, &b[k + b_dim1], ldb, &b[k + 2 + b_dim1], ldb); i__1 = *n - k - 1; _starpu_dger_(&i__1, nrhs, &c_b7, &ap[kc + *n - k + 2], &c__1, &b[k + 1 + b_dim1], ldb, &b[k + 2 + b_dim1], ldb); } /* Multiply by the inverse of the diagonal block. */ akm1k = ap[kc + 1]; akm1 = ap[kc] / akm1k; ak = ap[kc + *n - k + 1] / akm1k; denom = akm1 * ak - 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bkm1 = b[k + j * b_dim1] / akm1k; bk = b[k + 1 + j * b_dim1] / akm1k; b[k + j * b_dim1] = (ak * bkm1 - bk) / denom; b[k + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; /* L70: */ } kc = kc + (*n - k << 1) + 1; k += 2; } goto L60; L80: /* Next solve L'*X = B, overwriting B with X. */ /* K is the main loop index, decreasing from N to 1 in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = *n; kc = *n * (*n + 1) / 2 + 1; L90: /* If K < 1, exit from loop. */ if (k < 1) { goto L100; } kc -= *n - k + 1; if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Multiply by inv(L'(K)), where L(K) is the transformation */ /* stored in column K of A. */ if (k < *n) { i__1 = *n - k; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, &ap[kc + 1], &c__1, &c_b19, &b[k + b_dim1], ldb); } /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } --k; } else { /* 2 x 2 diagonal block */ /* Multiply by inv(L'(K-1)), where L(K-1) is the transformation */ /* stored in columns K-1 and K of A. */ if (k < *n) { i__1 = *n - k; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, &ap[kc + 1], &c__1, &c_b19, &b[k + b_dim1], ldb); i__1 = *n - k; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, &ap[kc - (*n - k)], &c__1, &c_b19, &b[k - 1 + b_dim1], ldb); } /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } kc -= *n - k + 2; k += -2; } goto L90; L100: ; } return 0; /* End of DSPTRS */ } /* _starpu_dsptrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dstebz.c000066400000000000000000000522101507764646700206670ustar00rootroot00000000000000/* dstebz.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; static integer c__0 = 0; /* Subroutine */ int _starpu_dstebz_(char *range, char *order, integer *n, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, doublereal *d__, doublereal *e, integer *m, integer *nsplit, doublereal *w, integer *iblock, integer *isplit, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer i__1, i__2, i__3; doublereal d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double sqrt(doublereal), log(doublereal); /* Local variables */ integer j, ib, jb, ie, je, nb; doublereal gl; integer im, in; doublereal gu; integer iw; doublereal wl, wu; integer nwl; doublereal ulp, wlu, wul; integer nwu; doublereal tmp1, tmp2; integer iend, ioff, iout, itmp1, jdisc; extern logical _starpu_lsame_(char *, char *); integer iinfo; doublereal atoli; integer iwoff; doublereal bnorm; integer itmax; doublereal wkill, rtoli, tnorm; extern doublereal _starpu_dlamch_(char *); integer ibegin; extern /* Subroutine */ int _starpu_dlaebz_(integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer irange, idiscl; doublereal safemn; integer idumma[1]; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer idiscu, iorder; logical ncnvrg; doublereal pivmin; logical toofew; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* 8-18-00: Increase FUDGE factor for T3E (eca) */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEBZ computes the eigenvalues of a symmetric tridiagonal */ /* matrix T. The user may ask for all eigenvalues, all eigenvalues */ /* in the half-open interval (VL, VU], or the IL-th through IU-th */ /* eigenvalues. */ /* To avoid overflow, the matrix must be scaled so that its */ /* largest element is no greater than overflow**(1/2) * */ /* underflow**(1/4) in absolute value, and for greatest */ /* accuracy, it should not be much smaller than that. */ /* See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */ /* Matrix", Report CS41, Computer Science Dept., Stanford */ /* University, July 21, 1966. */ /* Arguments */ /* ========= */ /* RANGE (input) CHARACTER*1 */ /* = 'A': ("All") all eigenvalues will be found. */ /* = 'V': ("Value") all eigenvalues in the half-open interval */ /* (VL, VU] will be found. */ /* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */ /* entire matrix) will be found. */ /* ORDER (input) CHARACTER*1 */ /* = 'B': ("By Block") the eigenvalues will be grouped by */ /* split-off block (see IBLOCK, ISPLIT) and */ /* ordered from smallest to largest within */ /* the block. */ /* = 'E': ("Entire matrix") */ /* the eigenvalues for the entire matrix */ /* will be ordered from smallest to */ /* largest. */ /* N (input) INTEGER */ /* The order of the tridiagonal matrix T. N >= 0. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. Eigenvalues less than or equal */ /* to VL, or greater than VU, will not be returned. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute tolerance for the eigenvalues. An eigenvalue */ /* (or cluster) is considered to be located if it has been */ /* determined to lie in an interval whose width is ABSTOL or */ /* less. If ABSTOL is less than or equal to zero, then ULP*|T| */ /* will be used, where |T| means the 1-norm of T. */ /* Eigenvalues will be computed most accurately when ABSTOL is */ /* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the tridiagonal matrix T. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) off-diagonal elements of the tridiagonal matrix T. */ /* M (output) INTEGER */ /* The actual number of eigenvalues found. 0 <= M <= N. */ /* (See also the description of INFO=2,3.) */ /* NSPLIT (output) INTEGER */ /* The number of diagonal blocks in the matrix T. */ /* 1 <= NSPLIT <= N. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, the first M elements of W will contain the */ /* eigenvalues. (DSTEBZ may use the remaining N-M elements as */ /* workspace.) */ /* IBLOCK (output) INTEGER array, dimension (N) */ /* At each row/column j where E(j) is zero or small, the */ /* matrix T is considered to split into a block diagonal */ /* matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which */ /* block (from 1 to the number of blocks) the eigenvalue W(i) */ /* belongs. (DSTEBZ may use the remaining N-M elements as */ /* workspace.) */ /* ISPLIT (output) INTEGER array, dimension (N) */ /* The splitting points, at which T breaks up into submatrices. */ /* The first submatrix consists of rows/columns 1 to ISPLIT(1), */ /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */ /* etc., and the NSPLIT-th consists of rows/columns */ /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */ /* (Only the first NSPLIT elements will actually be used, but */ /* since the user cannot know a priori what value NSPLIT will */ /* have, N words must be reserved for ISPLIT.) */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* IWORK (workspace) INTEGER array, dimension (3*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: some or all of the eigenvalues failed to converge or */ /* were not computed: */ /* =1 or 3: Bisection failed to converge for some */ /* eigenvalues; these eigenvalues are flagged by a */ /* negative block number. The effect is that the */ /* eigenvalues may not be as accurate as the */ /* absolute and relative tolerances. This is */ /* generally caused by unexpectedly inaccurate */ /* arithmetic. */ /* =2 or 3: RANGE='I' only: Not all of the eigenvalues */ /* IL:IU were found. */ /* Effect: M < IU+1-IL */ /* Cause: non-monotonic arithmetic, causing the */ /* Sturm sequence to be non-monotonic. */ /* Cure: recalculate, using RANGE='A', and pick */ /* out eigenvalues IL:IU. In some cases, */ /* increasing the PARAMETER "FUDGE" may */ /* make things work. */ /* = 4: RANGE='I', and the Gershgorin interval */ /* initially used was too small. No eigenvalues */ /* were computed. */ /* Probable cause: your machine has sloppy */ /* floating-point arithmetic. */ /* Cure: Increase the PARAMETER "FUDGE", */ /* recompile, and try again. */ /* Internal Parameters */ /* =================== */ /* RELFAC DOUBLE PRECISION, default = 2.0e0 */ /* The relative tolerance. An interval (a,b] lies within */ /* "relative tolerance" if b-a < RELFAC*ulp*max(|a|,|b|), */ /* where "ulp" is the machine precision (distance from 1 to */ /* the next larger floating point number.) */ /* FUDGE DOUBLE PRECISION, default = 2 */ /* A "fudge factor" to widen the Gershgorin intervals. Ideally, */ /* a value of 1 should work, but on machines with sloppy */ /* arithmetic, this needs to be larger. The default for */ /* publicly released versions should be large enough to handle */ /* the worst machine around. Note that this has no effect */ /* on accuracy of the solution. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --iwork; --work; --isplit; --iblock; --w; --e; --d__; /* Function Body */ *info = 0; /* Decode RANGE */ if (_starpu_lsame_(range, "A")) { irange = 1; } else if (_starpu_lsame_(range, "V")) { irange = 2; } else if (_starpu_lsame_(range, "I")) { irange = 3; } else { irange = 0; } /* Decode ORDER */ if (_starpu_lsame_(order, "B")) { iorder = 2; } else if (_starpu_lsame_(order, "E")) { iorder = 1; } else { iorder = 0; } /* Check for Errors */ if (irange <= 0) { *info = -1; } else if (iorder <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (irange == 2) { if (*vl >= *vu) { *info = -5; } } else if (irange == 3 && (*il < 1 || *il > max(1,*n))) { *info = -6; } else if (irange == 3 && (*iu < min(*n,*il) || *iu > *n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSTEBZ", &i__1); return 0; } /* Initialize error flags */ *info = 0; ncnvrg = FALSE_; toofew = FALSE_; /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } /* Simplifications: */ if (irange == 3 && *il == 1 && *iu == *n) { irange = 1; } /* Get machine constants */ /* NB is the minimum vector length for vector bisection, or 0 */ /* if only scalar is to be done. */ safemn = _starpu_dlamch_("S"); ulp = _starpu_dlamch_("P"); rtoli = ulp * 2.; nb = _starpu_ilaenv_(&c__1, "DSTEBZ", " ", n, &c_n1, &c_n1, &c_n1); if (nb <= 1) { nb = 0; } /* Special Case when N=1 */ if (*n == 1) { *nsplit = 1; isplit[1] = 1; if (irange == 2 && (*vl >= d__[1] || *vu < d__[1])) { *m = 0; } else { w[1] = d__[1]; iblock[1] = 1; *m = 1; } return 0; } /* Compute Splitting Points */ *nsplit = 1; work[*n] = 0.; pivmin = 1.; /* DIR$ NOVECTOR */ i__1 = *n; for (j = 2; j <= i__1; ++j) { /* Computing 2nd power */ d__1 = e[j - 1]; tmp1 = d__1 * d__1; /* Computing 2nd power */ d__2 = ulp; if ((d__1 = d__[j] * d__[j - 1], abs(d__1)) * (d__2 * d__2) + safemn > tmp1) { isplit[*nsplit] = j - 1; ++(*nsplit); work[j - 1] = 0.; } else { work[j - 1] = tmp1; pivmin = max(pivmin,tmp1); } /* L10: */ } isplit[*nsplit] = *n; pivmin *= safemn; /* Compute Interval and ATOLI */ if (irange == 3) { /* RANGE='I': Compute the interval containing eigenvalues */ /* IL through IU. */ /* Compute Gershgorin interval for entire (split) matrix */ /* and use it as the initial interval */ gu = d__[1]; gl = d__[1]; tmp1 = 0.; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { tmp2 = sqrt(work[j]); /* Computing MAX */ d__1 = gu, d__2 = d__[j] + tmp1 + tmp2; gu = max(d__1,d__2); /* Computing MIN */ d__1 = gl, d__2 = d__[j] - tmp1 - tmp2; gl = min(d__1,d__2); tmp1 = tmp2; /* L20: */ } /* Computing MAX */ d__1 = gu, d__2 = d__[*n] + tmp1; gu = max(d__1,d__2); /* Computing MIN */ d__1 = gl, d__2 = d__[*n] - tmp1; gl = min(d__1,d__2); /* Computing MAX */ d__1 = abs(gl), d__2 = abs(gu); tnorm = max(d__1,d__2); gl = gl - tnorm * 2.1 * ulp * *n - pivmin * 4.2000000000000002; gu = gu + tnorm * 2.1 * ulp * *n + pivmin * 2.1; /* Compute Iteration parameters */ itmax = (integer) ((log(tnorm + pivmin) - log(pivmin)) / log(2.)) + 2; if (*abstol <= 0.) { atoli = ulp * tnorm; } else { atoli = *abstol; } work[*n + 1] = gl; work[*n + 2] = gl; work[*n + 3] = gu; work[*n + 4] = gu; work[*n + 5] = gl; work[*n + 6] = gu; iwork[1] = -1; iwork[2] = -1; iwork[3] = *n + 1; iwork[4] = *n + 1; iwork[5] = *il - 1; iwork[6] = *iu; _starpu_dlaebz_(&c__3, &itmax, n, &c__2, &c__2, &nb, &atoli, &rtoli, &pivmin, &d__[1], &e[1], &work[1], &iwork[5], &work[*n + 1], &work[*n + 5], &iout, &iwork[1], &w[1], &iblock[1], &iinfo); if (iwork[6] == *iu) { wl = work[*n + 1]; wlu = work[*n + 3]; nwl = iwork[1]; wu = work[*n + 4]; wul = work[*n + 2]; nwu = iwork[4]; } else { wl = work[*n + 2]; wlu = work[*n + 4]; nwl = iwork[2]; wu = work[*n + 3]; wul = work[*n + 1]; nwu = iwork[3]; } if (nwl < 0 || nwl >= *n || nwu < 1 || nwu > *n) { *info = 4; return 0; } } else { /* RANGE='A' or 'V' -- Set ATOLI */ /* Computing MAX */ d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = d__[*n], abs(d__1)) + ( d__2 = e[*n - 1], abs(d__2)); tnorm = max(d__3,d__4); i__1 = *n - 1; for (j = 2; j <= i__1; ++j) { /* Computing MAX */ d__4 = tnorm, d__5 = (d__1 = d__[j], abs(d__1)) + (d__2 = e[j - 1] , abs(d__2)) + (d__3 = e[j], abs(d__3)); tnorm = max(d__4,d__5); /* L30: */ } if (*abstol <= 0.) { atoli = ulp * tnorm; } else { atoli = *abstol; } if (irange == 2) { wl = *vl; wu = *vu; } else { wl = 0.; wu = 0.; } } /* Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU. */ /* NWL accumulates the number of eigenvalues .le. WL, */ /* NWU accumulates the number of eigenvalues .le. WU */ *m = 0; iend = 0; *info = 0; nwl = 0; nwu = 0; i__1 = *nsplit; for (jb = 1; jb <= i__1; ++jb) { ioff = iend; ibegin = ioff + 1; iend = isplit[jb]; in = iend - ioff; if (in == 1) { /* Special Case -- IN=1 */ if (irange == 1 || wl >= d__[ibegin] - pivmin) { ++nwl; } if (irange == 1 || wu >= d__[ibegin] - pivmin) { ++nwu; } if (irange == 1 || wl < d__[ibegin] - pivmin && wu >= d__[ibegin] - pivmin) { ++(*m); w[*m] = d__[ibegin]; iblock[*m] = jb; } } else { /* General Case -- IN > 1 */ /* Compute Gershgorin Interval */ /* and use it as the initial interval */ gu = d__[ibegin]; gl = d__[ibegin]; tmp1 = 0.; i__2 = iend - 1; for (j = ibegin; j <= i__2; ++j) { tmp2 = (d__1 = e[j], abs(d__1)); /* Computing MAX */ d__1 = gu, d__2 = d__[j] + tmp1 + tmp2; gu = max(d__1,d__2); /* Computing MIN */ d__1 = gl, d__2 = d__[j] - tmp1 - tmp2; gl = min(d__1,d__2); tmp1 = tmp2; /* L40: */ } /* Computing MAX */ d__1 = gu, d__2 = d__[iend] + tmp1; gu = max(d__1,d__2); /* Computing MIN */ d__1 = gl, d__2 = d__[iend] - tmp1; gl = min(d__1,d__2); /* Computing MAX */ d__1 = abs(gl), d__2 = abs(gu); bnorm = max(d__1,d__2); gl = gl - bnorm * 2.1 * ulp * in - pivmin * 2.1; gu = gu + bnorm * 2.1 * ulp * in + pivmin * 2.1; /* Compute ATOLI for the current submatrix */ if (*abstol <= 0.) { /* Computing MAX */ d__1 = abs(gl), d__2 = abs(gu); atoli = ulp * max(d__1,d__2); } else { atoli = *abstol; } if (irange > 1) { if (gu < wl) { nwl += in; nwu += in; goto L70; } gl = max(gl,wl); gu = min(gu,wu); if (gl >= gu) { goto L70; } } /* Set Up Initial Interval */ work[*n + 1] = gl; work[*n + in + 1] = gu; _starpu_dlaebz_(&c__1, &c__0, &in, &in, &c__1, &nb, &atoli, &rtoli, & pivmin, &d__[ibegin], &e[ibegin], &work[ibegin], idumma, & work[*n + 1], &work[*n + (in << 1) + 1], &im, &iwork[1], & w[*m + 1], &iblock[*m + 1], &iinfo); nwl += iwork[1]; nwu += iwork[in + 1]; iwoff = *m - iwork[1]; /* Compute Eigenvalues */ itmax = (integer) ((log(gu - gl + pivmin) - log(pivmin)) / log(2.) ) + 2; _starpu_dlaebz_(&c__2, &itmax, &in, &in, &c__1, &nb, &atoli, &rtoli, & pivmin, &d__[ibegin], &e[ibegin], &work[ibegin], idumma, & work[*n + 1], &work[*n + (in << 1) + 1], &iout, &iwork[1], &w[*m + 1], &iblock[*m + 1], &iinfo); /* Copy Eigenvalues Into W and IBLOCK */ /* Use -JB for block number for unconverged eigenvalues. */ i__2 = iout; for (j = 1; j <= i__2; ++j) { tmp1 = (work[j + *n] + work[j + in + *n]) * .5; /* Flag non-convergence. */ if (j > iout - iinfo) { ncnvrg = TRUE_; ib = -jb; } else { ib = jb; } i__3 = iwork[j + in] + iwoff; for (je = iwork[j] + 1 + iwoff; je <= i__3; ++je) { w[je] = tmp1; iblock[je] = ib; /* L50: */ } /* L60: */ } *m += im; } L70: ; } /* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU */ /* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. */ if (irange == 3) { im = 0; idiscl = *il - 1 - nwl; idiscu = nwu - *iu; if (idiscl > 0 || idiscu > 0) { i__1 = *m; for (je = 1; je <= i__1; ++je) { if (w[je] <= wlu && idiscl > 0) { --idiscl; } else if (w[je] >= wul && idiscu > 0) { --idiscu; } else { ++im; w[im] = w[je]; iblock[im] = iblock[je]; } /* L80: */ } *m = im; } if (idiscl > 0 || idiscu > 0) { /* Code to deal with effects of bad arithmetic: */ /* Some low eigenvalues to be discarded are not in (WL,WLU], */ /* or high eigenvalues to be discarded are not in (WUL,WU] */ /* so just kill off the smallest IDISCL/largest IDISCU */ /* eigenvalues, by simply finding the smallest/largest */ /* eigenvalue(s). */ /* (If N(w) is monotone non-decreasing, this should never */ /* happen.) */ if (idiscl > 0) { wkill = wu; i__1 = idiscl; for (jdisc = 1; jdisc <= i__1; ++jdisc) { iw = 0; i__2 = *m; for (je = 1; je <= i__2; ++je) { if (iblock[je] != 0 && (w[je] < wkill || iw == 0)) { iw = je; wkill = w[je]; } /* L90: */ } iblock[iw] = 0; /* L100: */ } } if (idiscu > 0) { wkill = wl; i__1 = idiscu; for (jdisc = 1; jdisc <= i__1; ++jdisc) { iw = 0; i__2 = *m; for (je = 1; je <= i__2; ++je) { if (iblock[je] != 0 && (w[je] > wkill || iw == 0)) { iw = je; wkill = w[je]; } /* L110: */ } iblock[iw] = 0; /* L120: */ } } im = 0; i__1 = *m; for (je = 1; je <= i__1; ++je) { if (iblock[je] != 0) { ++im; w[im] = w[je]; iblock[im] = iblock[je]; } /* L130: */ } *m = im; } if (idiscl < 0 || idiscu < 0) { toofew = TRUE_; } } /* If ORDER='B', do nothing -- the eigenvalues are already sorted */ /* by block. */ /* If ORDER='E', sort the eigenvalues from smallest to largest */ if (iorder == 1 && *nsplit > 1) { i__1 = *m - 1; for (je = 1; je <= i__1; ++je) { ie = 0; tmp1 = w[je]; i__2 = *m; for (j = je + 1; j <= i__2; ++j) { if (w[j] < tmp1) { ie = j; tmp1 = w[j]; } /* L140: */ } if (ie != 0) { itmp1 = iblock[ie]; w[ie] = w[je]; iblock[ie] = iblock[je]; w[je] = tmp1; iblock[je] = itmp1; } /* L150: */ } } *info = 0; if (ncnvrg) { ++(*info); } if (toofew) { *info += 2; } return 0; /* End of DSTEBZ */ } /* _starpu_dstebz_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dstedc.c000066400000000000000000000364011507764646700206460ustar00rootroot00000000000000/* dstedc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__9 = 9; static integer c__0 = 0; static integer c__2 = 2; static doublereal c_b17 = 0.; static doublereal c_b18 = 1.; static integer c__1 = 1; /* Subroutine */ int _starpu_dstedc_(char *compz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double log(doublereal); integer pow_ii(integer *, integer *); double sqrt(doublereal); /* Local variables */ integer i__, j, k, m; doublereal p; integer ii, lgn; doublereal eps, tiny; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer lwmin; extern /* Subroutine */ int _starpu_dlaed0_(integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); integer start; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer finish; extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *), _starpu_dlasrt_(char *, integer *, doublereal *, integer *); integer liwmin, icompz; extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); doublereal orgnrm; logical lquery; integer smlsiz, storez, strtrw; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEDC computes all eigenvalues and, optionally, eigenvectors of a */ /* symmetric tridiagonal matrix using the divide and conquer method. */ /* The eigenvectors of a full or band real symmetric matrix can also be */ /* found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this */ /* matrix to tridiagonal form. */ /* This code makes very mild assumptions about floating point */ /* arithmetic. It will work on machines with a guard digit in */ /* add/subtract, or on those binary machines without guard digits */ /* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ /* It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. See DLAED3 for details. */ /* Arguments */ /* ========= */ /* COMPZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only. */ /* = 'I': Compute eigenvectors of tridiagonal matrix also. */ /* = 'V': Compute eigenvectors of original dense symmetric */ /* matrix also. On entry, Z contains the orthogonal */ /* matrix used to reduce the original matrix to */ /* tridiagonal form. */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the diagonal elements of the tridiagonal matrix. */ /* On exit, if INFO = 0, the eigenvalues in ascending order. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the subdiagonal elements of the tridiagonal matrix. */ /* On exit, E has been destroyed. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* On entry, if COMPZ = 'V', then Z contains the orthogonal */ /* matrix used in the reduction to tridiagonal form. */ /* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the */ /* orthonormal eigenvectors of the original symmetric matrix, */ /* and if COMPZ = 'I', Z contains the orthonormal eigenvectors */ /* of the symmetric tridiagonal matrix. */ /* If COMPZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1. */ /* If eigenvectors are desired, then LDZ >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, */ /* dimension (LWORK) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. */ /* If COMPZ = 'V' and N > 1 then LWORK must be at least */ /* ( 1 + 3*N + 2*N*lg N + 3*N**2 ), */ /* where lg( N ) = smallest integer k such */ /* that 2**k >= N. */ /* If COMPZ = 'I' and N > 1 then LWORK must be at least */ /* ( 1 + 4*N + N**2 ). */ /* Note that for COMPZ = 'I' or 'V', then if N is less than or */ /* equal to the minimum divide size, usually 25, then LWORK need */ /* only be max(1,2*(N-1)). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. */ /* If COMPZ = 'V' and N > 1 then LIWORK must be at least */ /* ( 6 + 6*N + 5*N*lg N ). */ /* If COMPZ = 'I' and N > 1 then LIWORK must be at least */ /* ( 3 + 5*N ). */ /* Note that for COMPZ = 'I' or 'V', then if N is less than or */ /* equal to the minimum divide size, usually 25, then LIWORK */ /* need only be 1. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal size of the IWORK array, */ /* returns this value as the first entry of the IWORK array, and */ /* no error message related to LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: The algorithm failed to compute an eigenvalue while */ /* working on the submatrix lying in rows and columns */ /* INFO/(N+1) through mod(INFO,N+1). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* Modified by Francoise Tisseur, University of Tennessee. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; /* Function Body */ *info = 0; lquery = *lwork == -1 || *liwork == -1; if (_starpu_lsame_(compz, "N")) { icompz = 0; } else if (_starpu_lsame_(compz, "V")) { icompz = 1; } else if (_starpu_lsame_(compz, "I")) { icompz = 2; } else { icompz = -1; } if (icompz < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { *info = -6; } if (*info == 0) { /* Compute the workspace requirements */ smlsiz = _starpu_ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0); if (*n <= 1 || icompz == 0) { liwmin = 1; lwmin = 1; } else if (*n <= smlsiz) { liwmin = 1; lwmin = *n - 1 << 1; } else { lgn = (integer) (log((doublereal) (*n)) / log(2.)); if (pow_ii(&c__2, &lgn) < *n) { ++lgn; } if (pow_ii(&c__2, &lgn) < *n) { ++lgn; } if (icompz == 1) { /* Computing 2nd power */ i__1 = *n; lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; liwmin = *n * 6 + 6 + *n * 5 * lgn; } else if (icompz == 2) { /* Computing 2nd power */ i__1 = *n; lwmin = (*n << 2) + 1 + i__1 * i__1; liwmin = *n * 5 + 3; } } work[1] = (doublereal) lwmin; iwork[1] = liwmin; if (*lwork < lwmin && ! lquery) { *info = -8; } else if (*liwork < liwmin && ! lquery) { *info = -10; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSTEDC", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (icompz != 0) { z__[z_dim1 + 1] = 1.; } return 0; } /* If the following conditional clause is removed, then the routine */ /* will use the Divide and Conquer routine to compute only the */ /* eigenvalues, which requires (3N + 3N**2) real workspace and */ /* (2 + 5N + 2N lg(N)) integer workspace. */ /* Since on many architectures DSTERF is much faster than any other */ /* algorithm for finding eigenvalues only, it is used here */ /* as the default. If the conditional clause is removed, then */ /* information on the size of workspace needs to be changed. */ /* If COMPZ = 'N', use DSTERF to compute the eigenvalues. */ if (icompz == 0) { _starpu_dsterf_(n, &d__[1], &e[1], info); goto L50; } /* If N is smaller than the minimum divide size (SMLSIZ+1), then */ /* solve the problem with another solver. */ if (*n <= smlsiz) { _starpu_dsteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info); } else { /* If COMPZ = 'V', the Z matrix must be stored elsewhere for later */ /* use. */ if (icompz == 1) { storez = *n * *n + 1; } else { storez = 1; } if (icompz == 2) { _starpu_dlaset_("Full", n, n, &c_b17, &c_b18, &z__[z_offset], ldz); } /* Scale. */ orgnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); if (orgnrm == 0.) { goto L50; } eps = _starpu_dlamch_("Epsilon"); start = 1; /* while ( START <= N ) */ L10: if (start <= *n) { /* Let FINISH be the position of the next subdiagonal entry */ /* such that E( FINISH ) <= TINY or FINISH = N if no such */ /* subdiagonal exists. The matrix identified by the elements */ /* between START and FINISH constitutes an independent */ /* sub-problem. */ finish = start; L20: if (finish < *n) { tiny = eps * sqrt((d__1 = d__[finish], abs(d__1))) * sqrt(( d__2 = d__[finish + 1], abs(d__2))); if ((d__1 = e[finish], abs(d__1)) > tiny) { ++finish; goto L20; } } /* (Sub) Problem determined. Compute its size and solve it. */ m = finish - start + 1; if (m == 1) { start = finish + 1; goto L10; } if (m > smlsiz) { /* Scale. */ orgnrm = _starpu_dlanst_("M", &m, &d__[start], &e[start]); _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b18, &m, &c__1, &d__[ start], &m, info); i__1 = m - 1; i__2 = m - 1; _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b18, &i__1, &c__1, &e[ start], &i__2, info); if (icompz == 1) { strtrw = 1; } else { strtrw = start; } _starpu_dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw + start * z_dim1], ldz, &work[1], n, &work[storez], & iwork[1], info); if (*info != 0) { *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m + 1) + start - 1; goto L50; } /* Scale back. */ _starpu_dlascl_("G", &c__0, &c__0, &c_b18, &orgnrm, &m, &c__1, &d__[ start], &m, info); } else { if (icompz == 1) { /* Since QR won't update a Z matrix which is larger than */ /* the length of D, we must solve the sub-problem in a */ /* workspace and then multiply back into Z. */ _starpu_dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, & work[m * m + 1], info); _starpu_dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[ storez], n); _starpu_dgemm_("N", "N", n, &m, &m, &c_b18, &work[storez], n, & work[1], &m, &c_b17, &z__[start * z_dim1 + 1], ldz); } else if (icompz == 2) { _starpu_dsteqr_("I", &m, &d__[start], &e[start], &z__[start + start * z_dim1], ldz, &work[1], info); } else { _starpu_dsterf_(&m, &d__[start], &e[start], info); } if (*info != 0) { *info = start * (*n + 1) + finish; goto L50; } } start = finish + 1; goto L10; } /* endwhile */ /* If the problem split any number of times, then the eigenvalues */ /* will not be properly ordered. Here we permute the eigenvalues */ /* (and the associated eigenvectors) into ascending order. */ if (m != *n) { if (icompz == 0) { /* Use Quick Sort */ _starpu_dlasrt_("I", n, &d__[1], info); } else { /* Use Selection Sort to minimize swaps of eigenvectors */ i__1 = *n; for (ii = 2; ii <= i__1; ++ii) { i__ = ii - 1; k = i__; p = d__[i__]; i__2 = *n; for (j = ii; j <= i__2; ++j) { if (d__[j] < p) { k = j; p = d__[j]; } /* L30: */ } if (k != i__) { d__[k] = d__[i__]; d__[i__] = p; _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], &c__1); } /* L40: */ } } } } L50: work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSTEDC */ } /* _starpu_dstedc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dstegr.c000066400000000000000000000207541507764646700206740ustar00rootroot00000000000000/* dstegr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dstegr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer z_dim1, z_offset; /* Local variables */ extern /* Subroutine */ int _starpu_dstemr_(char *, char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, integer *, integer *, integer *, integer *); logical tryrac; /* -- LAPACK computational routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEGR computes selected eigenvalues and, optionally, eigenvectors */ /* of a real symmetric tridiagonal matrix T. Any such unreduced matrix has */ /* a well defined set of pairwise different real eigenvalues, the corresponding */ /* real eigenvectors are pairwise orthogonal. */ /* The spectrum may be computed either completely or partially by specifying */ /* either an interval (VL,VU] or a range of indices IL:IU for the desired */ /* eigenvalues. */ /* DSTEGR is a compatability wrapper around the improved DSTEMR routine. */ /* See DSTEMR for further details. */ /* One important change is that the ABSTOL parameter no longer provides any */ /* benefit and hence is no longer used. */ /* Note : DSTEGR and DSTEMR work only on machines which follow */ /* IEEE-754 floating-point standard in their handling of infinities and */ /* NaNs. Normal execution may create these exceptiona values and hence */ /* may abort due to a floating point exception in environments which */ /* do not conform to the IEEE-754 standard. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the N diagonal elements of the tridiagonal matrix */ /* T. On exit, D is overwritten. */ /* E (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the (N-1) subdiagonal elements of the tridiagonal */ /* matrix T in elements 1 to N-1 of E. E(N) need not be set on */ /* input, but is used internally as workspace. */ /* On exit, E is overwritten. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* Unused. Was the absolute error tolerance for the */ /* eigenvalues/eigenvectors in previous versions. */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* The first M elements contain the selected eigenvalues in */ /* ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ /* If JOBZ = 'V', and if INFO = 0, then the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix T */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* If JOBZ = 'N', then Z is not referenced. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* Supplying N columns is always safe. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', then LDZ >= max(1,N). */ /* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) */ /* The support of the eigenvectors in Z, i.e., the indices */ /* indicating the nonzero elements in Z. The i-th computed eigenvector */ /* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ /* ISUPPZ( 2*i ). This is relevant in the case when the matrix */ /* is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ /* On exit, if INFO = 0, WORK(1) returns the optimal */ /* (and minimal) LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,18*N) */ /* if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (LIWORK) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. LIWORK >= max(1,10*N) */ /* if the eigenvectors are desired, and LIWORK >= max(1,8*N) */ /* if only the eigenvalues are to be computed. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal size of the IWORK array, */ /* returns this value as the first entry of the IWORK array, and */ /* no error message related to LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* On exit, INFO */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = 1X, internal error in DLARRE, */ /* if INFO = 2X, internal error in DLARRV. */ /* Here, the digit X = ABS( IINFO ) < 10, where IINFO is */ /* the nonzero error code returned by DLARRE or */ /* DLARRV, respectively. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Inderjit Dhillon, IBM Almaden, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, LBNL/NERSC, USA */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --d__; --e; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --isuppz; --work; --iwork; /* Function Body */ *info = 0; tryrac = FALSE_; _starpu_dstemr_(jobz, range, n, &d__[1], &e[1], vl, vu, il, iu, m, &w[1], &z__[ z_offset], ldz, n, &isuppz[1], &tryrac, &work[1], lwork, &iwork[1] , liwork, info); /* End of DSTEGR */ return 0; } /* _starpu_dstegr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dstein.c000066400000000000000000000310621507764646700206640ustar00rootroot00000000000000/* dstein.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__2 = 2; static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dstein_(integer *n, doublereal *d__, doublereal *e, integer *m, doublereal *w, integer *iblock, integer *isplit, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, b1, j1, bn; doublereal xj, scl, eps, sep, nrm, tol; integer its; doublereal xjm, ztr, eps1; integer jblk, nblk; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer jmax; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); integer iseed[4], gpind, iinfo; extern doublereal _starpu_dasum_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); doublereal ortol; integer indrv1, indrv2, indrv3, indrv4, indrv5; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlagtf_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer * , integer *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlagts_( integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer nrmchk; extern /* Subroutine */ int _starpu_dlarnv_(integer *, integer *, integer *, doublereal *); integer blksiz; doublereal onenrm, dtpcrt, pertol; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEIN computes the eigenvectors of a real symmetric tridiagonal */ /* matrix T corresponding to specified eigenvalues, using inverse */ /* iteration. */ /* The maximum number of iterations allowed for each eigenvector is */ /* specified by an internal parameter MAXITS (currently set to 5). */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input) DOUBLE PRECISION array, dimension (N) */ /* The n diagonal elements of the tridiagonal matrix T. */ /* E (input) DOUBLE PRECISION array, dimension (N-1) */ /* The (n-1) subdiagonal elements of the tridiagonal matrix */ /* T, in elements 1 to N-1. */ /* M (input) INTEGER */ /* The number of eigenvectors to be found. 0 <= M <= N. */ /* W (input) DOUBLE PRECISION array, dimension (N) */ /* The first M elements of W contain the eigenvalues for */ /* which eigenvectors are to be computed. The eigenvalues */ /* should be grouped by split-off block and ordered from */ /* smallest to largest within the block. ( The output array */ /* W from DSTEBZ with ORDER = 'B' is expected here. ) */ /* IBLOCK (input) INTEGER array, dimension (N) */ /* The submatrix indices associated with the corresponding */ /* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to */ /* the first submatrix from the top, =2 if W(i) belongs to */ /* the second submatrix, etc. ( The output array IBLOCK */ /* from DSTEBZ is expected here. ) */ /* ISPLIT (input) INTEGER array, dimension (N) */ /* The splitting points, at which T breaks up into submatrices. */ /* The first submatrix consists of rows/columns 1 to */ /* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */ /* through ISPLIT( 2 ), etc. */ /* ( The output array ISPLIT from DSTEBZ is expected here. ) */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, M) */ /* The computed eigenvectors. The eigenvector associated */ /* with the eigenvalue W(i) is stored in the i-th column of */ /* Z. Any vector which fails to converge is set to its current */ /* iterate after MAXITS iterations. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (5*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* IFAIL (output) INTEGER array, dimension (M) */ /* On normal exit, all elements of IFAIL are zero. */ /* If one or more eigenvectors fail to converge after */ /* MAXITS iterations, then their indices are stored in */ /* array IFAIL. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, then i eigenvectors failed to converge */ /* in MAXITS iterations. Their indices are stored in */ /* array IFAIL. */ /* Internal Parameters */ /* =================== */ /* MAXITS INTEGER, default = 5 */ /* The maximum number of iterations performed. */ /* EXTRA INTEGER, default = 2 */ /* The number of iterations performed after norm growth */ /* criterion is satisfied, should be at least 1. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; --w; --iblock; --isplit; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ *info = 0; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L10: */ } if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -4; } else if (*ldz < max(1,*n)) { *info = -9; } else { i__1 = *m; for (j = 2; j <= i__1; ++j) { if (iblock[j] < iblock[j - 1]) { *info = -6; goto L30; } if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) { *info = -5; goto L30; } /* L20: */ } L30: ; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSTEIN", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *m == 0) { return 0; } else if (*n == 1) { z__[z_dim1 + 1] = 1.; return 0; } /* Get machine constants. */ eps = _starpu_dlamch_("Precision"); /* Initialize seed for random number generator DLARNV. */ for (i__ = 1; i__ <= 4; ++i__) { iseed[i__ - 1] = 1; /* L40: */ } /* Initialize pointers. */ indrv1 = 0; indrv2 = indrv1 + *n; indrv3 = indrv2 + *n; indrv4 = indrv3 + *n; indrv5 = indrv4 + *n; /* Compute eigenvectors of matrix blocks. */ j1 = 1; i__1 = iblock[*m]; for (nblk = 1; nblk <= i__1; ++nblk) { /* Find starting and ending indices of block nblk. */ if (nblk == 1) { b1 = 1; } else { b1 = isplit[nblk - 1] + 1; } bn = isplit[nblk]; blksiz = bn - b1 + 1; if (blksiz == 1) { goto L60; } gpind = b1; /* Compute reorthogonalization criterion and stopping criterion. */ onenrm = (d__1 = d__[b1], abs(d__1)) + (d__2 = e[b1], abs(d__2)); /* Computing MAX */ d__3 = onenrm, d__4 = (d__1 = d__[bn], abs(d__1)) + (d__2 = e[bn - 1], abs(d__2)); onenrm = max(d__3,d__4); i__2 = bn - 1; for (i__ = b1 + 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__4 = onenrm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[ i__ - 1], abs(d__2)) + (d__3 = e[i__], abs(d__3)); onenrm = max(d__4,d__5); /* L50: */ } ortol = onenrm * .001; dtpcrt = sqrt(.1 / blksiz); /* Loop through eigenvalues of block nblk. */ L60: jblk = 0; i__2 = *m; for (j = j1; j <= i__2; ++j) { if (iblock[j] != nblk) { j1 = j; goto L160; } ++jblk; xj = w[j]; /* Skip all the work if the block size is one. */ if (blksiz == 1) { work[indrv1 + 1] = 1.; goto L120; } /* If eigenvalues j and j-1 are too close, add a relatively */ /* small perturbation. */ if (jblk > 1) { eps1 = (d__1 = eps * xj, abs(d__1)); pertol = eps1 * 10.; sep = xj - xjm; if (sep < pertol) { xj = xjm + pertol; } } its = 0; nrmchk = 0; /* Get random starting vector. */ _starpu_dlarnv_(&c__2, iseed, &blksiz, &work[indrv1 + 1]); /* Copy the matrix T so it won't be destroyed in factorization. */ _starpu_dcopy_(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1); i__3 = blksiz - 1; _starpu_dcopy_(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1); i__3 = blksiz - 1; _starpu_dcopy_(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1); /* Compute LU factors with partial pivoting ( PT = LU ) */ tol = 0.; _starpu_dlagtf_(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[ indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo); /* Update iteration count. */ L70: ++its; if (its > 5) { goto L100; } /* Normalize and scale the righthand side vector Pb. */ /* Computing MAX */ d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], abs(d__1)); scl = blksiz * onenrm * max(d__2,d__3) / _starpu_dasum_(&blksiz, &work[ indrv1 + 1], &c__1); _starpu_dscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1); /* Solve the system LU = Pb. */ _starpu_dlagts_(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], & work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[ indrv1 + 1], &tol, &iinfo); /* Reorthogonalize by modified Gram-Schmidt if eigenvalues are */ /* close enough. */ if (jblk == 1) { goto L90; } if ((d__1 = xj - xjm, abs(d__1)) > ortol) { gpind = j; } if (gpind != j) { i__3 = j - 1; for (i__ = gpind; i__ <= i__3; ++i__) { ztr = -_starpu_ddot_(&blksiz, &work[indrv1 + 1], &c__1, &z__[b1 + i__ * z_dim1], &c__1); _starpu_daxpy_(&blksiz, &ztr, &z__[b1 + i__ * z_dim1], &c__1, & work[indrv1 + 1], &c__1); /* L80: */ } } /* Check the infinity norm of the iterate. */ L90: jmax = _starpu_idamax_(&blksiz, &work[indrv1 + 1], &c__1); nrm = (d__1 = work[indrv1 + jmax], abs(d__1)); /* Continue for additional iterations after norm reaches */ /* stopping criterion. */ if (nrm < dtpcrt) { goto L70; } ++nrmchk; if (nrmchk < 3) { goto L70; } goto L110; /* If stopping criterion was not satisfied, update info and */ /* store eigenvector number in array ifail. */ L100: ++(*info); ifail[*info] = j; /* Accept iterate as jth eigenvector. */ L110: scl = 1. / _starpu_dnrm2_(&blksiz, &work[indrv1 + 1], &c__1); jmax = _starpu_idamax_(&blksiz, &work[indrv1 + 1], &c__1); if (work[indrv1 + jmax] < 0.) { scl = -scl; } _starpu_dscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1); L120: i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { z__[i__ + j * z_dim1] = 0.; /* L130: */ } i__3 = blksiz; for (i__ = 1; i__ <= i__3; ++i__) { z__[b1 + i__ - 1 + j * z_dim1] = work[indrv1 + i__]; /* L140: */ } /* Save the shift to check eigenvalue spacing at next */ /* iteration. */ xjm = xj; /* L150: */ } L160: ; } return 0; /* End of DSTEIN */ } /* _starpu_dstein_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dstemr.c000066400000000000000000000620141507764646700206750ustar00rootroot00000000000000/* dstemr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b18 = .001; /* Subroutine */ int _starpu_dstemr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *nzc, integer *isuppz, logical *tryrac, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j; doublereal r1, r2; integer jj; doublereal cs; integer in; doublereal sn, wl, wu; integer iil, iiu; doublereal eps, tmp; integer indd, iend, jblk, wend; doublereal rmin, rmax; integer itmp; doublereal tnrm; extern /* Subroutine */ int _starpu_dlae2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); integer inde2, itmp2; doublereal rtol1, rtol2; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal scale; integer indgp; extern logical _starpu_lsame_(char *, char *); integer iinfo, iindw, ilast; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer lwmin; logical wantz; extern /* Subroutine */ int _starpu_dlaev2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); logical alleig; integer ibegin; logical indeig; integer iindbl; logical valeig; extern /* Subroutine */ int _starpu_dlarrc_(char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *), _starpu_dlarre_(char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); integer wbegin; doublereal safmin; extern /* Subroutine */ int _starpu_dlarrj_(integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; integer inderr, iindwk, indgrs, offset; extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlarrr_(integer *, doublereal *, doublereal *, integer *), _starpu_dlarrv_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlasrt_(char *, integer *, doublereal *, integer *); doublereal thresh; integer iinspl, ifirst, indwrk, liwmin, nzcmin; doublereal pivmin; integer nsplit; doublereal smlnum; logical lquery, zquery; /* -- LAPACK computational routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEMR computes selected eigenvalues and, optionally, eigenvectors */ /* of a real symmetric tridiagonal matrix T. Any such unreduced matrix has */ /* a well defined set of pairwise different real eigenvalues, the corresponding */ /* real eigenvectors are pairwise orthogonal. */ /* The spectrum may be computed either completely or partially by specifying */ /* either an interval (VL,VU] or a range of indices IL:IU for the desired */ /* eigenvalues. */ /* Depending on the number of desired eigenvalues, these are computed either */ /* by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are */ /* computed by the use of various suitable L D L^T factorizations near clusters */ /* of close eigenvalues (referred to as RRRs, Relatively Robust */ /* Representations). An informal sketch of the algorithm follows. */ /* For each unreduced block (submatrix) of T, */ /* (a) Compute T - sigma I = L D L^T, so that L and D */ /* define all the wanted eigenvalues to high relative accuracy. */ /* This means that small relative changes in the entries of D and L */ /* cause only small relative changes in the eigenvalues and */ /* eigenvectors. The standard (unfactored) representation of the */ /* tridiagonal matrix T does not have this property in general. */ /* (b) Compute the eigenvalues to suitable accuracy. */ /* If the eigenvectors are desired, the algorithm attains full */ /* accuracy of the computed eigenvalues only right before */ /* the corresponding vectors have to be computed, see steps c) and d). */ /* (c) For each cluster of close eigenvalues, select a new */ /* shift close to the cluster, find a new factorization, and refine */ /* the shifted eigenvalues to suitable accuracy. */ /* (d) For each eigenvalue with a large enough relative separation compute */ /* the corresponding eigenvector by forming a rank revealing twisted */ /* factorization. Go back to (c) for any clusters that remain. */ /* For more details, see: */ /* - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ /* to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ /* Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ /* - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ /* Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ /* 2004. Also LAPACK Working Note 154. */ /* - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ /* tridiagonal eigenvalue/eigenvector problem", */ /* Computer Science Division Technical Report No. UCB/CSD-97-971, */ /* UC Berkeley, May 1997. */ /* Notes: */ /* 1.DSTEMR works only on machines which follow IEEE-754 */ /* floating-point standard in their handling of infinities and NaNs. */ /* This permits the use of efficient inner loops avoiding a check for */ /* zero divisors. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the N diagonal elements of the tridiagonal matrix */ /* T. On exit, D is overwritten. */ /* E (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the (N-1) subdiagonal elements of the tridiagonal */ /* matrix T in elements 1 to N-1 of E. E(N) need not be set on */ /* input, but is used internally as workspace. */ /* On exit, E is overwritten. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* The first M elements contain the selected eigenvalues in */ /* ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ /* If JOBZ = 'V', and if INFO = 0, then the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix T */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* If JOBZ = 'N', then Z is not referenced. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and can be computed with a workspace */ /* query by setting NZC = -1, see below. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', then LDZ >= max(1,N). */ /* NZC (input) INTEGER */ /* The number of eigenvectors to be held in the array Z. */ /* If RANGE = 'A', then NZC >= max(1,N). */ /* If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. */ /* If RANGE = 'I', then NZC >= IU-IL+1. */ /* If NZC = -1, then a workspace query is assumed; the */ /* routine calculates the number of columns of the array Z that */ /* are needed to hold the eigenvectors. */ /* This value is returned as the first entry of the Z array, and */ /* no error message related to NZC is issued by XERBLA. */ /* ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) */ /* The support of the eigenvectors in Z, i.e., the indices */ /* indicating the nonzero elements in Z. The i-th computed eigenvector */ /* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ /* ISUPPZ( 2*i ). This is relevant in the case when the matrix */ /* is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. */ /* TRYRAC (input/output) LOGICAL */ /* If TRYRAC.EQ..TRUE., indicates that the code should check whether */ /* the tridiagonal matrix defines its eigenvalues to high relative */ /* accuracy. If so, the code uses relative-accuracy preserving */ /* algorithms that might be (a bit) slower depending on the matrix. */ /* If the matrix does not define its eigenvalues to high relative */ /* accuracy, the code can uses possibly faster algorithms. */ /* If TRYRAC.EQ..FALSE., the code is not required to guarantee */ /* relatively accurate eigenvalues and can use the fastest possible */ /* techniques. */ /* On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix */ /* does not define its eigenvalues to high relative accuracy. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */ /* On exit, if INFO = 0, WORK(1) returns the optimal */ /* (and minimal) LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,18*N) */ /* if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (LIWORK) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. LIWORK >= max(1,10*N) */ /* if the eigenvectors are desired, and LIWORK >= max(1,8*N) */ /* if only the eigenvalues are to be computed. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal size of the IWORK array, */ /* returns this value as the first entry of the IWORK array, and */ /* no error message related to LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* On exit, INFO */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = 1X, internal error in DLARRE, */ /* if INFO = 2X, internal error in DLARRV. */ /* Here, the digit X = ABS( IINFO ) < 10, where IINFO is */ /* the nonzero error code returned by DLARRE or */ /* DLARRV, respectively. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Beresford Parlett, University of California, Berkeley, USA */ /* Jim Demmel, University of California, Berkeley, USA */ /* Inderjit Dhillon, University of Texas, Austin, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Christof Voemel, University of California, Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --isuppz; --work; --iwork; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); lquery = *lwork == -1 || *liwork == -1; zquery = *nzc == -1; /* DSTEMR needs WORK of size 6*N, IWORK of size 3*N. */ /* In addition, DLARRE needs WORK of size 6*N, IWORK of size 5*N. */ /* Furthermore, DLARRV needs WORK of size 12*N, IWORK of size 7*N. */ if (wantz) { lwmin = *n * 18; liwmin = *n * 10; } else { /* need less workspace if only the eigenvalues are wanted */ lwmin = *n * 12; liwmin = *n << 3; } wl = 0.; wu = 0.; iil = 0; iiu = 0; if (valeig) { /* We do not reference VL, VU in the cases RANGE = 'I','A' */ /* The interval (WL, WU] contains all the wanted eigenvalues. */ /* It is either given by the user or computed in DLARRE. */ wl = *vl; wu = *vu; } else if (indeig) { /* We do not reference IL, IU in the cases RANGE = 'V','A' */ iil = *il; iiu = *iu; } *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (*n < 0) { *info = -3; } else if (valeig && *n > 0 && wu <= wl) { *info = -7; } else if (indeig && (iil < 1 || iil > *n)) { *info = -8; } else if (indeig && (iiu < iil || iiu > *n)) { *info = -9; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -13; } else if (*lwork < lwmin && ! lquery) { *info = -17; } else if (*liwork < liwmin && ! lquery) { *info = -19; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); if (*info == 0) { work[1] = (doublereal) lwmin; iwork[1] = liwmin; if (wantz && alleig) { nzcmin = *n; } else if (wantz && valeig) { _starpu_dlarrc_("T", n, vl, vu, &d__[1], &e[1], &safmin, &nzcmin, &itmp, & itmp2, info); } else if (wantz && indeig) { nzcmin = iiu - iil + 1; } else { /* WANTZ .EQ. FALSE. */ nzcmin = 0; } if (zquery && *info == 0) { z__[z_dim1 + 1] = (doublereal) nzcmin; } else if (*nzc < nzcmin && ! zquery) { *info = -14; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSTEMR", &i__1); return 0; } else if (lquery || zquery) { return 0; } /* Handle N = 0, 1, and 2 cases immediately */ *m = 0; if (*n == 0) { return 0; } if (*n == 1) { if (alleig || indeig) { *m = 1; w[1] = d__[1]; } else { if (wl < d__[1] && wu >= d__[1]) { *m = 1; w[1] = d__[1]; } } if (wantz && ! zquery) { z__[z_dim1 + 1] = 1.; isuppz[1] = 1; isuppz[2] = 1; } return 0; } if (*n == 2) { if (! wantz) { _starpu_dlae2_(&d__[1], &e[1], &d__[2], &r1, &r2); } else if (wantz && ! zquery) { _starpu_dlaev2_(&d__[1], &e[1], &d__[2], &r1, &r2, &cs, &sn); } if (alleig || valeig && r2 > wl && r2 <= wu || indeig && iil == 1) { ++(*m); w[*m] = r2; if (wantz && ! zquery) { z__[*m * z_dim1 + 1] = -sn; z__[*m * z_dim1 + 2] = cs; /* Note: At most one of SN and CS can be zero. */ if (sn != 0.) { if (cs != 0.) { isuppz[(*m << 1) - 1] = 1; isuppz[(*m << 1) - 1] = 2; } else { isuppz[(*m << 1) - 1] = 1; isuppz[(*m << 1) - 1] = 1; } } else { isuppz[(*m << 1) - 1] = 2; isuppz[*m * 2] = 2; } } } if (alleig || valeig && r1 > wl && r1 <= wu || indeig && iiu == 2) { ++(*m); w[*m] = r1; if (wantz && ! zquery) { z__[*m * z_dim1 + 1] = cs; z__[*m * z_dim1 + 2] = sn; /* Note: At most one of SN and CS can be zero. */ if (sn != 0.) { if (cs != 0.) { isuppz[(*m << 1) - 1] = 1; isuppz[(*m << 1) - 1] = 2; } else { isuppz[(*m << 1) - 1] = 1; isuppz[(*m << 1) - 1] = 1; } } else { isuppz[(*m << 1) - 1] = 2; isuppz[*m * 2] = 2; } } } return 0; } /* Continue with general N */ indgrs = 1; inderr = (*n << 1) + 1; indgp = *n * 3 + 1; indd = (*n << 2) + 1; inde2 = *n * 5 + 1; indwrk = *n * 6 + 1; iinspl = 1; iindbl = *n + 1; iindw = (*n << 1) + 1; iindwk = *n * 3 + 1; /* Scale matrix to allowable range, if necessary. */ /* The allowable range is related to the PIVMIN parameter; see the */ /* comments in DLARRD. The preference for scaling small values */ /* up is heuristic; we expect users' matrices not to be close to the */ /* RMAX threshold. */ scale = 1.; tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); if (tnrm > 0. && tnrm < rmin) { scale = rmin / tnrm; } else if (tnrm > rmax) { scale = rmax / tnrm; } if (scale != 1.) { _starpu_dscal_(n, &scale, &d__[1], &c__1); i__1 = *n - 1; _starpu_dscal_(&i__1, &scale, &e[1], &c__1); tnrm *= scale; if (valeig) { /* If eigenvalues in interval have to be found, */ /* scale (WL, WU] accordingly */ wl *= scale; wu *= scale; } } /* Compute the desired eigenvalues of the tridiagonal after splitting */ /* into smaller subblocks if the corresponding off-diagonal elements */ /* are small */ /* THRESH is the splitting parameter for DLARRE */ /* A negative THRESH forces the old splitting criterion based on the */ /* size of the off-diagonal. A positive THRESH switches to splitting */ /* which preserves relative accuracy. */ if (*tryrac) { /* Test whether the matrix warrants the more expensive relative approach. */ _starpu_dlarrr_(n, &d__[1], &e[1], &iinfo); } else { /* The user does not care about relative accurately eigenvalues */ iinfo = -1; } /* Set the splitting criterion */ if (iinfo == 0) { thresh = eps; } else { thresh = -eps; /* relative accuracy is desired but T does not guarantee it */ *tryrac = FALSE_; } if (*tryrac) { /* Copy original diagonal, needed to guarantee relative accuracy */ _starpu_dcopy_(n, &d__[1], &c__1, &work[indd], &c__1); } /* Store the squares of the offdiagonal values of T */ i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /* Computing 2nd power */ d__1 = e[j]; work[inde2 + j - 1] = d__1 * d__1; /* L5: */ } /* Set the tolerance parameters for bisection */ if (! wantz) { /* DLARRE computes the eigenvalues to full precision. */ rtol1 = eps * 4.; rtol2 = eps * 4.; } else { /* DLARRE computes the eigenvalues to less than full precision. */ /* DLARRV will refine the eigenvalue approximations, and we can */ /* need less accurate initial bisection in DLARRE. */ /* Note: these settings do only affect the subset case and DLARRE */ rtol1 = sqrt(eps); /* Computing MAX */ d__1 = sqrt(eps) * .005, d__2 = eps * 4.; rtol2 = max(d__1,d__2); } _starpu_dlarre_(range, n, &wl, &wu, &iil, &iiu, &d__[1], &e[1], &work[inde2], & rtol1, &rtol2, &thresh, &nsplit, &iwork[iinspl], m, &w[1], &work[ inderr], &work[indgp], &iwork[iindbl], &iwork[iindw], &work[ indgrs], &pivmin, &work[indwrk], &iwork[iindwk], &iinfo); if (iinfo != 0) { *info = abs(iinfo) + 10; return 0; } /* Note that if RANGE .NE. 'V', DLARRE computes bounds on the desired */ /* part of the spectrum. All desired eigenvalues are contained in */ /* (WL,WU] */ if (wantz) { /* Compute the desired eigenvectors corresponding to the computed */ /* eigenvalues */ _starpu_dlarrv_(n, &wl, &wu, &d__[1], &e[1], &pivmin, &iwork[iinspl], m, & c__1, m, &c_b18, &rtol1, &rtol2, &w[1], &work[inderr], &work[ indgp], &iwork[iindbl], &iwork[iindw], &work[indgrs], &z__[ z_offset], ldz, &isuppz[1], &work[indwrk], &iwork[iindwk], & iinfo); if (iinfo != 0) { *info = abs(iinfo) + 20; return 0; } } else { /* DLARRE computes eigenvalues of the (shifted) root representation */ /* DLARRV returns the eigenvalues of the unshifted matrix. */ /* However, if the eigenvectors are not desired by the user, we need */ /* to apply the corresponding shifts from DLARRE to obtain the */ /* eigenvalues of the original matrix. */ i__1 = *m; for (j = 1; j <= i__1; ++j) { itmp = iwork[iindbl + j - 1]; w[j] += e[iwork[iinspl + itmp - 1]]; /* L20: */ } } if (*tryrac) { /* Refine computed eigenvalues so that they are relatively accurate */ /* with respect to the original matrix T. */ ibegin = 1; wbegin = 1; i__1 = iwork[iindbl + *m - 1]; for (jblk = 1; jblk <= i__1; ++jblk) { iend = iwork[iinspl + jblk - 1]; in = iend - ibegin + 1; wend = wbegin - 1; /* check if any eigenvalues have to be refined in this block */ L36: if (wend < *m) { if (iwork[iindbl + wend] == jblk) { ++wend; goto L36; } } if (wend < wbegin) { ibegin = iend + 1; goto L39; } offset = iwork[iindw + wbegin - 1] - 1; ifirst = iwork[iindw + wbegin - 1]; ilast = iwork[iindw + wend - 1]; rtol2 = eps * 4.; _starpu_dlarrj_(&in, &work[indd + ibegin - 1], &work[inde2 + ibegin - 1], &ifirst, &ilast, &rtol2, &offset, &w[wbegin], &work[ inderr + wbegin - 1], &work[indwrk], &iwork[iindwk], & pivmin, &tnrm, &iinfo); ibegin = iend + 1; wbegin = wend + 1; L39: ; } } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (scale != 1.) { d__1 = 1. / scale; _starpu_dscal_(m, &d__1, &w[1], &c__1); } /* If eigenvalues are not in increasing order, then sort them, */ /* possibly along with eigenvectors. */ if (nsplit > 1) { if (! wantz) { _starpu_dlasrt_("I", m, &w[1], &iinfo); if (iinfo != 0) { *info = 3; return 0; } } else { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp) { i__ = jj; tmp = w[jj]; } /* L50: */ } if (i__ != 0) { w[i__] = w[j]; w[j] = tmp; if (wantz) { _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], &c__1); itmp = isuppz[(i__ << 1) - 1]; isuppz[(i__ << 1) - 1] = isuppz[(j << 1) - 1]; isuppz[(j << 1) - 1] = itmp; itmp = isuppz[i__ * 2]; isuppz[i__ * 2] = isuppz[j * 2]; isuppz[j * 2] = itmp; } } /* L60: */ } } } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSTEMR */ } /* _starpu_dstemr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsteqr.c000066400000000000000000000354601507764646700207060ustar00rootroot00000000000000/* dsteqr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b9 = 0.; static doublereal c_b10 = 1.; static integer c__0 = 0; static integer c__1 = 1; static integer c__2 = 2; /* Subroutine */ int _starpu_dsteqr_(char *compz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ doublereal b, c__, f, g; integer i__, j, k, l, m; doublereal p, r__, s; integer l1, ii, mm, lm1, mm1, nm1; doublereal rt1, rt2, eps; integer lsv; doublereal tst, eps2; integer lend, jtot; extern /* Subroutine */ int _starpu_dlae2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal anorm; extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaev2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); integer lendm1, lendp1; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); integer iscale; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal safmax; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *, integer *); integer lendsv; doublereal ssfmin; integer nmaxit, icompz; doublereal ssfmax; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEQR computes all eigenvalues and, optionally, eigenvectors of a */ /* symmetric tridiagonal matrix using the implicit QL or QR method. */ /* The eigenvectors of a full or band symmetric matrix can also be found */ /* if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to */ /* tridiagonal form. */ /* Arguments */ /* ========= */ /* COMPZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only. */ /* = 'V': Compute eigenvalues and eigenvectors of the original */ /* symmetric matrix. On entry, Z must contain the */ /* orthogonal matrix used to reduce the original matrix */ /* to tridiagonal form. */ /* = 'I': Compute eigenvalues and eigenvectors of the */ /* tridiagonal matrix. Z is initialized to the identity */ /* matrix. */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the diagonal elements of the tridiagonal matrix. */ /* On exit, if INFO = 0, the eigenvalues in ascending order. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ /* matrix. */ /* On exit, E has been destroyed. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* On entry, if COMPZ = 'V', then Z contains the orthogonal */ /* matrix used in the reduction to tridiagonal form. */ /* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the */ /* orthonormal eigenvectors of the original symmetric matrix, */ /* and if COMPZ = 'I', Z contains the orthonormal eigenvectors */ /* of the symmetric tridiagonal matrix. */ /* If COMPZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* eigenvectors are desired, then LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) */ /* If COMPZ = 'N', then WORK is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: the algorithm has failed to find all the eigenvalues in */ /* a total of 30*N iterations; if INFO = i, then i */ /* elements of E have not converged to zero; on exit, D */ /* and E contain the elements of a symmetric tridiagonal */ /* matrix which is orthogonally similar to the original */ /* matrix. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ *info = 0; if (_starpu_lsame_(compz, "N")) { icompz = 0; } else if (_starpu_lsame_(compz, "V")) { icompz = 1; } else if (_starpu_lsame_(compz, "I")) { icompz = 2; } else { icompz = -1; } if (icompz < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSTEQR", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (icompz == 2) { z__[z_dim1 + 1] = 1.; } return 0; } /* Determine the unit roundoff and over/underflow thresholds. */ eps = _starpu_dlamch_("E"); /* Computing 2nd power */ d__1 = eps; eps2 = d__1 * d__1; safmin = _starpu_dlamch_("S"); safmax = 1. / safmin; ssfmax = sqrt(safmax) / 3.; ssfmin = sqrt(safmin) / eps2; /* Compute the eigenvalues and eigenvectors of the tridiagonal */ /* matrix. */ if (icompz == 2) { _starpu_dlaset_("Full", n, n, &c_b9, &c_b10, &z__[z_offset], ldz); } nmaxit = *n * 30; jtot = 0; /* Determine where the matrix splits and choose QL or QR iteration */ /* for each block, according to whether top or bottom diagonal */ /* element is smaller. */ l1 = 1; nm1 = *n - 1; L10: if (l1 > *n) { goto L160; } if (l1 > 1) { e[l1 - 1] = 0.; } if (l1 <= nm1) { i__1 = nm1; for (m = l1; m <= i__1; ++m) { tst = (d__1 = e[m], abs(d__1)); if (tst == 0.) { goto L30; } if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) { e[m] = 0.; goto L30; } /* L20: */ } } m = *n; L30: l = l1; lsv = l; lend = m; lendsv = lend; l1 = m + 1; if (lend == l) { goto L10; } /* Scale submatrix in rows and columns L to LEND */ i__1 = lend - l + 1; anorm = _starpu_dlanst_("I", &i__1, &d__[l], &e[l]); iscale = 0; if (anorm == 0.) { goto L10; } if (anorm > ssfmax) { iscale = 1; i__1 = lend - l + 1; _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, info); i__1 = lend - l; _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, info); } else if (anorm < ssfmin) { iscale = 2; i__1 = lend - l + 1; _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, info); i__1 = lend - l; _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, info); } /* Choose between QL and QR iteration */ if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { lend = lsv; l = lendsv; } if (lend > l) { /* QL Iteration */ /* Look for small subdiagonal element. */ L40: if (l != lend) { lendm1 = lend - 1; i__1 = lendm1; for (m = l; m <= i__1; ++m) { /* Computing 2nd power */ d__2 = (d__1 = e[m], abs(d__1)); tst = d__2 * d__2; if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + 1], abs(d__2)) + safmin) { goto L60; } /* L50: */ } } m = lend; L60: if (m < lend) { e[m] = 0.; } p = d__[l]; if (m == l) { goto L80; } /* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 */ /* to compute its eigensystem. */ if (m == l + 1) { if (icompz > 0) { _starpu_dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); work[l] = c__; work[*n - 1 + l] = s; _starpu_dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & z__[l * z_dim1 + 1], ldz); } else { _starpu_dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); } d__[l] = rt1; d__[l + 1] = rt2; e[l] = 0.; l += 2; if (l <= lend) { goto L40; } goto L140; } if (jtot == nmaxit) { goto L140; } ++jtot; /* Form shift. */ g = (d__[l + 1] - p) / (e[l] * 2.); r__ = _starpu_dlapy2_(&g, &c_b10); g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); s = 1.; c__ = 1.; p = 0.; /* Inner loop */ mm1 = m - 1; i__1 = l; for (i__ = mm1; i__ >= i__1; --i__) { f = s * e[i__]; b = c__ * e[i__]; _starpu_dlartg_(&g, &f, &c__, &s, &r__); if (i__ != m - 1) { e[i__ + 1] = r__; } g = d__[i__ + 1] - p; r__ = (d__[i__] - g) * s + c__ * 2. * b; p = s * r__; d__[i__ + 1] = g + p; g = c__ * r__ - b; /* If eigenvectors are desired, then save rotations. */ if (icompz > 0) { work[i__] = c__; work[*n - 1 + i__] = -s; } /* L70: */ } /* If eigenvectors are desired, then apply saved rotations. */ if (icompz > 0) { mm = m - l + 1; _starpu_dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l * z_dim1 + 1], ldz); } d__[l] -= p; e[l] = g; goto L40; /* Eigenvalue found. */ L80: d__[l] = p; ++l; if (l <= lend) { goto L40; } goto L140; } else { /* QR Iteration */ /* Look for small superdiagonal element. */ L90: if (l != lend) { lendp1 = lend + 1; i__1 = lendp1; for (m = l; m >= i__1; --m) { /* Computing 2nd power */ d__2 = (d__1 = e[m - 1], abs(d__1)); tst = d__2 * d__2; if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m - 1], abs(d__2)) + safmin) { goto L110; } /* L100: */ } } m = lend; L110: if (m > lend) { e[m - 1] = 0.; } p = d__[l]; if (m == l) { goto L130; } /* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 */ /* to compute its eigensystem. */ if (m == l - 1) { if (icompz > 0) { _starpu_dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) ; work[m] = c__; work[*n - 1 + m] = s; _starpu_dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & z__[(l - 1) * z_dim1 + 1], ldz); } else { _starpu_dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); } d__[l - 1] = rt1; d__[l] = rt2; e[l - 1] = 0.; l += -2; if (l >= lend) { goto L90; } goto L140; } if (jtot == nmaxit) { goto L140; } ++jtot; /* Form shift. */ g = (d__[l - 1] - p) / (e[l - 1] * 2.); r__ = _starpu_dlapy2_(&g, &c_b10); g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); s = 1.; c__ = 1.; p = 0.; /* Inner loop */ lm1 = l - 1; i__1 = lm1; for (i__ = m; i__ <= i__1; ++i__) { f = s * e[i__]; b = c__ * e[i__]; _starpu_dlartg_(&g, &f, &c__, &s, &r__); if (i__ != m) { e[i__ - 1] = r__; } g = d__[i__] - p; r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; p = s * r__; d__[i__] = g + p; g = c__ * r__ - b; /* If eigenvectors are desired, then save rotations. */ if (icompz > 0) { work[i__] = c__; work[*n - 1 + i__] = s; } /* L120: */ } /* If eigenvectors are desired, then apply saved rotations. */ if (icompz > 0) { mm = l - m + 1; _starpu_dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m * z_dim1 + 1], ldz); } d__[l] -= p; e[lm1] = g; goto L90; /* Eigenvalue found. */ L130: d__[l] = p; --l; if (l >= lend) { goto L90; } goto L140; } /* Undo scaling if necessary */ L140: if (iscale == 1) { i__1 = lendsv - lsv + 1; _starpu_dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], n, info); i__1 = lendsv - lsv; _starpu_dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, info); } else if (iscale == 2) { i__1 = lendsv - lsv + 1; _starpu_dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], n, info); i__1 = lendsv - lsv; _starpu_dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, info); } /* Check for no convergence to an eigenvalue after a total */ /* of N*MAXIT iterations. */ if (jtot < nmaxit) { goto L10; } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { if (e[i__] != 0.) { ++(*info); } /* L150: */ } goto L190; /* Order eigenvalues and eigenvectors. */ L160: if (icompz == 0) { /* Use Quick Sort */ _starpu_dlasrt_("I", n, &d__[1], info); } else { /* Use Selection Sort to minimize swaps of eigenvectors */ i__1 = *n; for (ii = 2; ii <= i__1; ++ii) { i__ = ii - 1; k = i__; p = d__[i__]; i__2 = *n; for (j = ii; j <= i__2; ++j) { if (d__[j] < p) { k = j; p = d__[j]; } /* L170: */ } if (k != i__) { d__[k] = d__[i__]; d__[i__] = p; _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], &c__1); } /* L180: */ } } L190: return 0; /* End of DSTEQR */ } /* _starpu_dsteqr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsterf.c000066400000000000000000000234511507764646700206700ustar00rootroot00000000000000/* dsterf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__0 = 0; static integer c__1 = 1; static doublereal c_b32 = 1.; /* Subroutine */ int _starpu_dsterf_(integer *n, doublereal *d__, doublereal *e, integer *info) { /* System generated locals */ integer i__1; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ doublereal c__; integer i__, l, m; doublereal p, r__, s; integer l1; doublereal bb, rt1, rt2, eps, rte; integer lsv; doublereal eps2, oldc; integer lend, jtot; extern /* Subroutine */ int _starpu_dlae2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal gamma, alpha, sigma, anorm; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *); integer iscale; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal oldgam, safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal safmax; extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *, integer *); integer lendsv; doublereal ssfmin; integer nmaxit; doublereal ssfmax; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTERF computes all eigenvalues of a symmetric tridiagonal matrix */ /* using the Pal-Walker-Kahan variant of the QL or QR algorithm. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the tridiagonal matrix. */ /* On exit, if INFO = 0, the eigenvalues in ascending order. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ /* matrix. */ /* On exit, E has been destroyed. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: the algorithm failed to find all of the eigenvalues in */ /* a total of 30*N iterations; if INFO = i, then i */ /* elements of E have not converged to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --e; --d__; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n < 0) { *info = -1; i__1 = -(*info); _starpu_xerbla_("DSTERF", &i__1); return 0; } if (*n <= 1) { return 0; } /* Determine the unit roundoff for this environment. */ eps = _starpu_dlamch_("E"); /* Computing 2nd power */ d__1 = eps; eps2 = d__1 * d__1; safmin = _starpu_dlamch_("S"); safmax = 1. / safmin; ssfmax = sqrt(safmax) / 3.; ssfmin = sqrt(safmin) / eps2; /* Compute the eigenvalues of the tridiagonal matrix. */ nmaxit = *n * 30; sigma = 0.; jtot = 0; /* Determine where the matrix splits and choose QL or QR iteration */ /* for each block, according to whether top or bottom diagonal */ /* element is smaller. */ l1 = 1; L10: if (l1 > *n) { goto L170; } if (l1 > 1) { e[l1 - 1] = 0.; } i__1 = *n - 1; for (m = l1; m <= i__1; ++m) { if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) { e[m] = 0.; goto L30; } /* L20: */ } m = *n; L30: l = l1; lsv = l; lend = m; lendsv = lend; l1 = m + 1; if (lend == l) { goto L10; } /* Scale submatrix in rows and columns L to LEND */ i__1 = lend - l + 1; anorm = _starpu_dlanst_("I", &i__1, &d__[l], &e[l]); iscale = 0; if (anorm > ssfmax) { iscale = 1; i__1 = lend - l + 1; _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, info); i__1 = lend - l; _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, info); } else if (anorm < ssfmin) { iscale = 2; i__1 = lend - l + 1; _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, info); i__1 = lend - l; _starpu_dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, info); } i__1 = lend - 1; for (i__ = l; i__ <= i__1; ++i__) { /* Computing 2nd power */ d__1 = e[i__]; e[i__] = d__1 * d__1; /* L40: */ } /* Choose between QL and QR iteration */ if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { lend = lsv; l = lendsv; } if (lend >= l) { /* QL Iteration */ /* Look for small subdiagonal element. */ L50: if (l != lend) { i__1 = lend - 1; for (m = l; m <= i__1; ++m) { if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m + 1], abs(d__1))) { goto L70; } /* L60: */ } } m = lend; L70: if (m < lend) { e[m] = 0.; } p = d__[l]; if (m == l) { goto L90; } /* If remaining matrix is 2 by 2, use DLAE2 to compute its */ /* eigenvalues. */ if (m == l + 1) { rte = sqrt(e[l]); _starpu_dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); d__[l] = rt1; d__[l + 1] = rt2; e[l] = 0.; l += 2; if (l <= lend) { goto L50; } goto L150; } if (jtot == nmaxit) { goto L150; } ++jtot; /* Form shift. */ rte = sqrt(e[l]); sigma = (d__[l + 1] - p) / (rte * 2.); r__ = _starpu_dlapy2_(&sigma, &c_b32); sigma = p - rte / (sigma + d_sign(&r__, &sigma)); c__ = 1.; s = 0.; gamma = d__[m] - sigma; p = gamma * gamma; /* Inner loop */ i__1 = l; for (i__ = m - 1; i__ >= i__1; --i__) { bb = e[i__]; r__ = p + bb; if (i__ != m - 1) { e[i__ + 1] = s * r__; } oldc = c__; c__ = p / r__; s = bb / r__; oldgam = gamma; alpha = d__[i__]; gamma = c__ * (alpha - sigma) - s * oldgam; d__[i__ + 1] = oldgam + (alpha - gamma); if (c__ != 0.) { p = gamma * gamma / c__; } else { p = oldc * bb; } /* L80: */ } e[l] = s * p; d__[l] = sigma + gamma; goto L50; /* Eigenvalue found. */ L90: d__[l] = p; ++l; if (l <= lend) { goto L50; } goto L150; } else { /* QR Iteration */ /* Look for small superdiagonal element. */ L100: i__1 = lend + 1; for (m = l; m >= i__1; --m) { if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m - 1], abs(d__1))) { goto L120; } /* L110: */ } m = lend; L120: if (m > lend) { e[m - 1] = 0.; } p = d__[l]; if (m == l) { goto L140; } /* If remaining matrix is 2 by 2, use DLAE2 to compute its */ /* eigenvalues. */ if (m == l - 1) { rte = sqrt(e[l - 1]); _starpu_dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); d__[l] = rt1; d__[l - 1] = rt2; e[l - 1] = 0.; l += -2; if (l >= lend) { goto L100; } goto L150; } if (jtot == nmaxit) { goto L150; } ++jtot; /* Form shift. */ rte = sqrt(e[l - 1]); sigma = (d__[l - 1] - p) / (rte * 2.); r__ = _starpu_dlapy2_(&sigma, &c_b32); sigma = p - rte / (sigma + d_sign(&r__, &sigma)); c__ = 1.; s = 0.; gamma = d__[m] - sigma; p = gamma * gamma; /* Inner loop */ i__1 = l - 1; for (i__ = m; i__ <= i__1; ++i__) { bb = e[i__]; r__ = p + bb; if (i__ != m) { e[i__ - 1] = s * r__; } oldc = c__; c__ = p / r__; s = bb / r__; oldgam = gamma; alpha = d__[i__ + 1]; gamma = c__ * (alpha - sigma) - s * oldgam; d__[i__] = oldgam + (alpha - gamma); if (c__ != 0.) { p = gamma * gamma / c__; } else { p = oldc * bb; } /* L130: */ } e[l - 1] = s * p; d__[l] = sigma + gamma; goto L100; /* Eigenvalue found. */ L140: d__[l] = p; --l; if (l >= lend) { goto L100; } goto L150; } /* Undo scaling if necessary */ L150: if (iscale == 1) { i__1 = lendsv - lsv + 1; _starpu_dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], n, info); } if (iscale == 2) { i__1 = lendsv - lsv + 1; _starpu_dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], n, info); } /* Check for no convergence to an eigenvalue after a total */ /* of N*MAXIT iterations. */ if (jtot < nmaxit) { goto L10; } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { if (e[i__] != 0.) { ++(*info); } /* L160: */ } goto L180; /* Sort eigenvalues in increasing order. */ L170: _starpu_dlasrt_("I", n, &d__[1], info); L180: return 0; /* End of DSTERF */ } /* _starpu_dsterf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dstev.c000066400000000000000000000134531507764646700205270ustar00rootroot00000000000000/* dstev.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dstev_(char *jobz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal eps; integer imax; doublereal rmin, rmax, tnrm; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); logical wantz; extern doublereal _starpu_dlamch_(char *); integer iscale; doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *), _starpu_dsteqr_(char *, integer *, doublereal *, doublereal * , doublereal *, integer *, doublereal *, integer *); doublereal smlnum; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEV computes all eigenvalues and, optionally, eigenvectors of a */ /* real symmetric tridiagonal matrix A. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the tridiagonal matrix */ /* A. */ /* On exit, if INFO = 0, the eigenvalues in ascending order. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ /* matrix A, stored in elements 1 to N-1 of E. */ /* On exit, the contents of E are destroyed. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ /* eigenvectors of the matrix A, with the i-th column of Z */ /* holding the eigenvector associated with D(i). */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) */ /* If JOBZ = 'N', WORK is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the algorithm failed to converge; i */ /* off-diagonal elements of E did not converge to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSTEV ", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ iscale = 0; tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); if (tnrm > 0. && tnrm < rmin) { iscale = 1; sigma = rmin / tnrm; } else if (tnrm > rmax) { iscale = 1; sigma = rmax / tnrm; } if (iscale == 1) { _starpu_dscal_(n, &sigma, &d__[1], &c__1); i__1 = *n - 1; _starpu_dscal_(&i__1, &sigma, &e[1], &c__1); } /* For eigenvalues only, call DSTERF. For eigenvalues and */ /* eigenvectors, call DSTEQR. */ if (! wantz) { _starpu_dsterf_(n, &d__[1], &e[1], info); } else { _starpu_dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { if (*info == 0) { imax = *n; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &d__[1], &c__1); } return 0; /* End of DSTEV */ } /* _starpu_dstev_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dstevd.c000066400000000000000000000203751507764646700206740ustar00rootroot00000000000000/* dstevd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dstevd_(char *jobz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal eps, rmin, rmax, tnrm; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer lwmin; logical wantz; extern doublereal _starpu_dlamch_(char *); integer iscale; extern /* Subroutine */ int _starpu_dstedc_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer liwmin; doublereal smlnum; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEVD computes all eigenvalues and, optionally, eigenvectors of a */ /* real symmetric tridiagonal matrix. If eigenvectors are desired, it */ /* uses a divide and conquer algorithm. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the tridiagonal matrix */ /* A. */ /* On exit, if INFO = 0, the eigenvalues in ascending order. */ /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */ /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ /* matrix A, stored in elements 1 to N-1 of E. */ /* On exit, the contents of E are destroyed. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, N) */ /* If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ /* eigenvectors of the matrix A, with the i-th column of Z */ /* holding the eigenvector associated with D(i). */ /* If JOBZ = 'N', then Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, */ /* dimension (LWORK) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If JOBZ = 'N' or N <= 1 then LWORK must be at least 1. */ /* If JOBZ = 'V' and N > 1 then LWORK must be at least */ /* ( 1 + 4*N + N**2 ). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal sizes of the WORK and IWORK */ /* arrays, returns these values as the first entries of the WORK */ /* and IWORK arrays, and no error message related to LWORK or */ /* LIWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If JOBZ = 'N' or N <= 1 then LIWORK must be at least 1. */ /* If JOBZ = 'V' and N > 1 then LIWORK must be at least 3+5*N. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal sizes of the WORK and */ /* IWORK arrays, returns these values as the first entries of */ /* the WORK and IWORK arrays, and no error message related to */ /* LWORK or LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the algorithm failed to converge; i */ /* off-diagonal elements of E did not converge to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); lquery = *lwork == -1 || *liwork == -1; *info = 0; liwmin = 1; lwmin = 1; if (*n > 1 && wantz) { /* Computing 2nd power */ i__1 = *n; lwmin = (*n << 2) + 1 + i__1 * i__1; liwmin = *n * 5 + 3; } if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -6; } if (*info == 0) { work[1] = (doublereal) lwmin; iwork[1] = liwmin; if (*lwork < lwmin && ! lquery) { *info = -8; } else if (*liwork < liwmin && ! lquery) { *info = -10; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSTEVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ iscale = 0; tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); if (tnrm > 0. && tnrm < rmin) { iscale = 1; sigma = rmin / tnrm; } else if (tnrm > rmax) { iscale = 1; sigma = rmax / tnrm; } if (iscale == 1) { _starpu_dscal_(n, &sigma, &d__[1], &c__1); i__1 = *n - 1; _starpu_dscal_(&i__1, &sigma, &e[1], &c__1); } /* For eigenvalues only, call DSTERF. For eigenvalues and */ /* eigenvectors, call DSTEDC. */ if (! wantz) { _starpu_dsterf_(n, &d__[1], &e[1], info); } else { _starpu_dstedc_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], lwork, &iwork[1], liwork, info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { d__1 = 1. / sigma; _starpu_dscal_(n, &d__1, &d__[1], &c__1); } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSTEVD */ } /* _starpu_dstevd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dstevr.c000066400000000000000000000441671507764646700207170ustar00rootroot00000000000000/* dstevr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__10 = 10; static integer c__1 = 1; static integer c__2 = 2; static integer c__3 = 3; static integer c__4 = 4; /* Subroutine */ int _starpu_dstevr_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, jj; doublereal eps, vll, vuu, tmp1; integer imax; doublereal rmin, rmax; logical test; doublereal tnrm; integer itmp1; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); char order[1]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer lwmin; logical wantz; extern doublereal _starpu_dlamch_(char *); logical alleig, indeig; integer iscale, ieeeok, indibl, indifl; logical valeig; doublereal safmin; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); integer indisp; extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indiwo; extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dstemr_(char *, char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, integer *, integer *, integer *, integer *); integer liwmin; logical tryrac; integer nsplit; doublereal smlnum; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEVR computes selected eigenvalues and, optionally, eigenvectors */ /* of a real symmetric tridiagonal matrix T. Eigenvalues and */ /* eigenvectors can be selected by specifying either a range of values */ /* or a range of indices for the desired eigenvalues. */ /* Whenever possible, DSTEVR calls DSTEMR to compute the */ /* eigenspectrum using Relatively Robust Representations. DSTEMR */ /* computes eigenvalues by the dqds algorithm, while orthogonal */ /* eigenvectors are computed from various "good" L D L^T representations */ /* (also known as Relatively Robust Representations). Gram-Schmidt */ /* orthogonalization is avoided as far as possible. More specifically, */ /* the various steps of the algorithm are as follows. For the i-th */ /* unreduced block of T, */ /* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T */ /* is a relatively robust representation, */ /* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high */ /* relative accuracy by the dqds algorithm, */ /* (c) If there is a cluster of close eigenvalues, "choose" sigma_i */ /* close to the cluster, and go to step (a), */ /* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, */ /* compute the corresponding eigenvector by forming a */ /* rank-revealing twisted factorization. */ /* The desired accuracy of the output can be specified by the input */ /* parameter ABSTOL. */ /* For more details, see "A new O(n^2) algorithm for the symmetric */ /* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, */ /* Computer Science Division Technical Report No. UCB//CSD-97-971, */ /* UC Berkeley, May 1997. */ /* Note 1 : DSTEVR calls DSTEMR when the full spectrum is requested */ /* on machines which conform to the ieee-754 floating point standard. */ /* DSTEVR calls DSTEBZ and DSTEIN on non-ieee machines and */ /* when partial spectrum requests are made. */ /* Normal execution of DSTEMR may create NaNs and infinities and */ /* hence may abort due to a floating point exception in environments */ /* which do not handle NaNs and infinities in the ieee standard default */ /* manner. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* ********* For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and */ /* ********* DSTEIN are called */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the tridiagonal matrix */ /* A. */ /* On exit, D may be multiplied by a constant factor chosen */ /* to avoid over/underflow in computing the eigenvalues. */ /* E (input/output) DOUBLE PRECISION array, dimension (max(1,N-1)) */ /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ /* matrix A in elements 1 to N-1 of E. */ /* On exit, E may be multiplied by a constant factor chosen */ /* to avoid over/underflow in computing the eigenvalues. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less than */ /* or equal to zero, then EPS*|T| will be used in its place, */ /* where |T| is the 1-norm of the tridiagonal matrix obtained */ /* by reducing A to tridiagonal form. */ /* See "Computing Small Singular Values of Bidiagonal Matrices */ /* with Guaranteed High Relative Accuracy," by Demmel and */ /* Kahan, LAPACK Working Note #3. */ /* If high relative accuracy is important, set ABSTOL to */ /* DLAMCH( 'Safe minimum' ). Doing so will guarantee that */ /* eigenvalues are computed to high relative accuracy when */ /* possible in future releases. The current code does not */ /* make any guarantees about high relative accuracy, but */ /* future releases will. See J. Barlow and J. Demmel, */ /* "Computing Accurate Eigensystems of Scaled Diagonally */ /* Dominant Matrices", LAPACK Working Note #7, for a discussion */ /* of which matrices define their eigenvalues to high relative */ /* accuracy. */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* The first M elements contain the selected eigenvalues in */ /* ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix A */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */ /* The support of the eigenvectors in Z, i.e., the indices */ /* indicating the nonzero elements in Z. The i-th eigenvector */ /* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ /* ISUPPZ( 2*i ). */ /* ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal (and */ /* minimal) LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,20*N). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal sizes of the WORK and IWORK */ /* arrays, returns these values as the first entries of the WORK */ /* and IWORK arrays, and no error message related to LWORK or */ /* LIWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal (and */ /* minimal) LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. LIWORK >= max(1,10*N). */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal sizes of the WORK and */ /* IWORK arrays, returns these values as the first entries of */ /* the WORK and IWORK arrays, and no error message related to */ /* LWORK or LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: Internal error */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Inderjit Dhillon, IBM Almaden, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Ken Stanley, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --isuppz; --work; --iwork; /* Function Body */ ieeeok = _starpu_ilaenv_(&c__10, "DSTEVR", "N", &c__1, &c__2, &c__3, &c__4); wantz = _starpu_lsame_(jobz, "V"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); lquery = *lwork == -1 || *liwork == -1; /* Computing MAX */ i__1 = 1, i__2 = *n * 20; lwmin = max(i__1,i__2); /* Computing MAX */ i__1 = 1, i__2 = *n * 10; liwmin = max(i__1,i__2); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (*n < 0) { *info = -3; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -7; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -8; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -9; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -14; } } if (*info == 0) { work[1] = (doublereal) lwmin; iwork[1] = liwmin; if (*lwork < lwmin && ! lquery) { *info = -17; } else if (*liwork < liwmin && ! lquery) { *info = -19; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSTEVR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } if (*n == 1) { if (alleig || indeig) { *m = 1; w[1] = d__[1]; } else { if (*vl < d__[1] && *vu >= d__[1]) { *m = 1; w[1] = d__[1]; } } if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; vll = *vl; vuu = *vu; tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); if (tnrm > 0. && tnrm < rmin) { iscale = 1; sigma = rmin / tnrm; } else if (tnrm > rmax) { iscale = 1; sigma = rmax / tnrm; } if (iscale == 1) { _starpu_dscal_(n, &sigma, &d__[1], &c__1); i__1 = *n - 1; _starpu_dscal_(&i__1, &sigma, &e[1], &c__1); if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* Initialize indices into workspaces. Note: These indices are used only */ /* if DSTERF or DSTEMR fail. */ /* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and */ /* stores the block indices of each of the M<=N eigenvalues. */ indibl = 1; /* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and */ /* stores the starting and finishing indices of each block. */ indisp = indibl + *n; /* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */ /* that corresponding to eigenvectors that fail to converge in */ /* DSTEIN. This information is discarded; if any fail, the driver */ /* returns INFO > 0. */ indifl = indisp + *n; /* INDIWO is the offset of the remaining integer workspace. */ indiwo = indisp + *n; /* If all eigenvalues are desired, then */ /* call DSTERF or DSTEMR. If this fails for some eigenvalue, then */ /* try DSTEBZ. */ test = FALSE_; if (indeig) { if (*il == 1 && *iu == *n) { test = TRUE_; } } if ((alleig || test) && ieeeok == 1) { i__1 = *n - 1; _starpu_dcopy_(&i__1, &e[1], &c__1, &work[1], &c__1); if (! wantz) { _starpu_dcopy_(n, &d__[1], &c__1, &w[1], &c__1); _starpu_dsterf_(n, &w[1], &work[1], info); } else { _starpu_dcopy_(n, &d__[1], &c__1, &work[*n + 1], &c__1); if (*abstol <= *n * 2. * eps) { tryrac = TRUE_; } else { tryrac = FALSE_; } i__1 = *lwork - (*n << 1); _starpu_dstemr_(jobz, "A", n, &work[*n + 1], &work[1], vl, vu, il, iu, m, &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, &work[ (*n << 1) + 1], &i__1, &iwork[1], liwork, info); } if (*info == 0) { *m = *n; goto L10; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, abstol, &d__[1], &e[1], m, & nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[1], &iwork[ indiwo], info); if (wantz) { _starpu_dstein_(n, &d__[1], &e[1], m, &w[1], &iwork[indibl], &iwork[indisp], & z__[z_offset], ldz, &work[1], &iwork[indiwo], &iwork[indifl], info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L10: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with */ /* eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L20: */ } if (i__ != 0) { itmp1 = iwork[i__]; w[i__] = w[j]; iwork[i__] = iwork[j]; w[j] = tmp1; iwork[j] = itmp1; _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], &c__1); } /* L30: */ } } /* Causes problems with tests 19 & 20: */ /* IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 */ work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSTEVR */ } /* _starpu_dstevr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dstevx.c000066400000000000000000000316741507764646700207240ustar00rootroot00000000000000/* dstevx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dstevx_(char *jobz, char *range, integer *n, doublereal * d__, doublereal *e, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *iwork, integer *ifail, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, jj; doublereal eps, vll, vuu, tmp1; integer imax; doublereal rmin, rmax; logical test; doublereal tnrm; integer itmp1; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); char order[1]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical wantz; extern doublereal _starpu_dlamch_(char *); logical alleig, indeig; integer iscale, indibl; logical valeig; doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *); integer indisp; extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indiwo; extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer indwrk; extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer nsplit; doublereal smlnum; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSTEVX computes selected eigenvalues and, optionally, eigenvectors */ /* of a real symmetric tridiagonal matrix A. Eigenvalues and */ /* eigenvectors can be selected by specifying either a range of values */ /* or a range of indices for the desired eigenvalues. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* N (input) INTEGER */ /* The order of the matrix. N >= 0. */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, the n diagonal elements of the tridiagonal matrix */ /* A. */ /* On exit, D may be multiplied by a constant factor chosen */ /* to avoid over/underflow in computing the eigenvalues. */ /* E (input/output) DOUBLE PRECISION array, dimension (max(1,N-1)) */ /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ /* matrix A in elements 1 to N-1 of E. */ /* On exit, E may be multiplied by a constant factor chosen */ /* to avoid over/underflow in computing the eigenvalues. */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less */ /* than or equal to zero, then EPS*|T| will be used in */ /* its place, where |T| is the 1-norm of the tridiagonal */ /* matrix. */ /* Eigenvalues will be computed most accurately when ABSTOL is */ /* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ /* If this routine returns with INFO>0, indicating that some */ /* eigenvectors did not converge, try setting ABSTOL to */ /* 2*DLAMCH('S'). */ /* See "Computing Small Singular Values of Bidiagonal Matrices */ /* with Guaranteed High Relative Accuracy," by Demmel and */ /* Kahan, LAPACK Working Note #3. */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* The first M elements contain the selected eigenvalues in */ /* ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix A */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* If an eigenvector fails to converge (INFO > 0), then that */ /* column of Z contains the latest approximation to the */ /* eigenvector, and the index of the eigenvector is returned */ /* in IFAIL. If JOBZ = 'N', then Z is not referenced. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (5*N) */ /* IWORK (workspace) INTEGER array, dimension (5*N) */ /* IFAIL (output) INTEGER array, dimension (N) */ /* If JOBZ = 'V', then if INFO = 0, the first M elements of */ /* IFAIL are zero. If INFO > 0, then IFAIL contains the */ /* indices of the eigenvectors that failed to converge. */ /* If JOBZ = 'N', then IFAIL is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, then i eigenvectors failed to converge. */ /* Their indices are stored in array IFAIL. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (*n < 0) { *info = -3; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -7; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -8; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -9; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -14; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSTEVX", &i__1); return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } if (*n == 1) { if (alleig || indeig) { *m = 1; w[1] = d__[1]; } else { if (*vl < d__[1] && *vu >= d__[1]) { *m = 1; w[1] = d__[1]; } } if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; if (valeig) { vll = *vl; vuu = *vu; } else { vll = 0.; vuu = 0.; } tnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]); if (tnrm > 0. && tnrm < rmin) { iscale = 1; sigma = rmin / tnrm; } else if (tnrm > rmax) { iscale = 1; sigma = rmax / tnrm; } if (iscale == 1) { _starpu_dscal_(n, &sigma, &d__[1], &c__1); i__1 = *n - 1; _starpu_dscal_(&i__1, &sigma, &e[1], &c__1); if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* If all eigenvalues are desired and ABSTOL is less than zero, then */ /* call DSTERF or SSTEQR. If this fails for some eigenvalue, then */ /* try DSTEBZ. */ test = FALSE_; if (indeig) { if (*il == 1 && *iu == *n) { test = TRUE_; } } if ((alleig || test) && *abstol <= 0.) { _starpu_dcopy_(n, &d__[1], &c__1, &w[1], &c__1); i__1 = *n - 1; _starpu_dcopy_(&i__1, &e[1], &c__1, &work[1], &c__1); indwrk = *n + 1; if (! wantz) { _starpu_dsterf_(n, &w[1], &work[1], info); } else { _starpu_dsteqr_("I", n, &w[1], &work[1], &z__[z_offset], ldz, &work[ indwrk], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L10: */ } } } if (*info == 0) { *m = *n; goto L20; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indwrk = 1; indibl = 1; indisp = indibl + *n; indiwo = indisp + *n; _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, abstol, &d__[1], &e[1], m, & nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[indwrk], & iwork[indiwo], info); if (wantz) { _starpu_dstein_(n, &d__[1], &e[1], m, &w[1], &iwork[indibl], &iwork[indisp], & z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], &ifail[1], info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L20: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with */ /* eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L30: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], &c__1); if (*info != 0) { itmp1 = ifail[i__]; ifail[i__] = ifail[j]; ifail[j] = itmp1; } } /* L40: */ } } return 0; /* End of DSTEVX */ } /* _starpu_dstevx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsycon.c000066400000000000000000000127751507764646700207070ustar00rootroot00000000000000/* dsycon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dsycon_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal * work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1; /* Local variables */ integer i__, kase; extern logical _starpu_lsame_(char *, char *); integer isave[3]; logical upper; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); doublereal ainvnm; extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYCON estimates the reciprocal of the condition number (in the */ /* 1-norm) of a real symmetric matrix A using the factorization */ /* A = U*D*U**T or A = L*D*L**T computed by DSYTRF. */ /* An estimate is obtained for norm(inv(A)), and the reciprocal of the */ /* condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the details of the factorization are stored */ /* as an upper or lower triangular matrix. */ /* = 'U': Upper triangular, form is A = U*D*U**T; */ /* = 'L': Lower triangular, form is A = L*D*L**T. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The block diagonal matrix D and the multipliers used to */ /* obtain the factor U or L as computed by DSYTRF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSYTRF. */ /* ANORM (input) DOUBLE PRECISION */ /* The 1-norm of the original matrix A. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ /* estimate of the 1-norm of inv(A) computed in this routine. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*anorm < 0.) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm <= 0.) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ for (i__ = *n; i__ >= 1; --i__) { if (ipiv[i__] > 0 && a[i__ + i__ * a_dim1] == 0.) { return 0; } /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (ipiv[i__] > 0 && a[i__ + i__ * a_dim1] == 0.) { return 0; } /* L20: */ } } /* Estimate the 1-norm of the inverse. */ kase = 0; L30: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { /* Multiply by inv(L*D*L') or inv(U*D*U'). */ _starpu_dsytrs_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], n, info); goto L30; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } return 0; /* End of DSYCON */ } /* _starpu_dsycon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsyequb.c000066400000000000000000000227761507764646700210660ustar00rootroot00000000000000/* dsyequb.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dsyequb_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *s, doublereal *scond, doublereal *amax, doublereal * work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *); /* Local variables */ doublereal d__; integer i__, j; doublereal t, u, c0, c1, c2, si; logical up; doublereal avg, std, tol, base; integer iter; doublereal smin, smax, scale; extern logical _starpu_lsame_(char *, char *); doublereal sumsq; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); doublereal smlnum; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYEQUB computes row and column scalings intended to equilibrate a */ /* symmetric matrix A and reduce its condition number */ /* (with respect to the two-norm). S contains the scale factors, */ /* S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with */ /* elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This */ /* choice of S puts the condition number of B within a factor N of the */ /* smallest possible condition number over all possible diagonal */ /* scalings. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The N-by-N symmetric matrix whose scaling */ /* factors are to be computed. Only the diagonal elements of A */ /* are referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* S (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, S contains the scale factors for A. */ /* SCOND (output) DOUBLE PRECISION */ /* If INFO = 0, S contains the ratio of the smallest S(i) to */ /* the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ /* large nor too small, it is not worth scaling by S. */ /* AMAX (output) DOUBLE PRECISION */ /* Absolute value of largest matrix element. If AMAX is very */ /* close to overflow or very close to underflow, the matrix */ /* should be scaled. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element is nonpositive. */ /* Further Details */ /* ======= ======= */ /* Reference: Livne, O.E. and Golub, G.H., "Scaling by Binormalization", */ /* Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. */ /* DOI 10.1023/B:NUMA.0000016606.32820.69 */ /* Tech report version: http://ruready.utah.edu/archive/papers/bin.pdf */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --s; --work; /* Function Body */ *info = 0; if (! (_starpu_lsame_(uplo, "U") || _starpu_lsame_(uplo, "L"))) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYEQUB", &i__1); return 0; } up = _starpu_lsame_(uplo, "U"); *amax = 0.; /* Quick return if possible. */ if (*n == 0) { *scond = 1.; return 0; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { s[i__] = 0.; } *amax = 0.; if (up) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = s[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); s[i__] = max(d__2,d__3); /* Computing MAX */ d__2 = s[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); s[j] = max(d__2,d__3); /* Computing MAX */ d__2 = *amax, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); *amax = max(d__2,d__3); } /* Computing MAX */ d__2 = s[j], d__3 = (d__1 = a[j + j * a_dim1], abs(d__1)); s[j] = max(d__2,d__3); /* Computing MAX */ d__2 = *amax, d__3 = (d__1 = a[j + j * a_dim1], abs(d__1)); *amax = max(d__2,d__3); } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ d__2 = s[j], d__3 = (d__1 = a[j + j * a_dim1], abs(d__1)); s[j] = max(d__2,d__3); /* Computing MAX */ d__2 = *amax, d__3 = (d__1 = a[j + j * a_dim1], abs(d__1)); *amax = max(d__2,d__3); i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = s[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); s[i__] = max(d__2,d__3); /* Computing MAX */ d__2 = s[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); s[j] = max(d__2,d__3); /* Computing MAX */ d__2 = *amax, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); *amax = max(d__2,d__3); } } } i__1 = *n; for (j = 1; j <= i__1; ++j) { s[j] = 1. / s[j]; } tol = 1. / sqrt(*n * 2.); for (iter = 1; iter <= 100; ++iter) { scale = 0.; sumsq = 0.; /* BETA = |A|S */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; } if (up) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { t = (d__1 = a[i__ + j * a_dim1], abs(d__1)); work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)) * s[ j]; work[j] += (d__1 = a[i__ + j * a_dim1], abs(d__1)) * s[ i__]; } work[j] += (d__1 = a[j + j * a_dim1], abs(d__1)) * s[j]; } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { work[j] += (d__1 = a[j + j * a_dim1], abs(d__1)) * s[j]; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { t = (d__1 = a[i__ + j * a_dim1], abs(d__1)); work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)) * s[ j]; work[j] += (d__1 = a[i__ + j * a_dim1], abs(d__1)) * s[ i__]; } } } /* avg = s^T beta / n */ avg = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { avg += s[i__] * work[i__]; } avg /= *n; std = 0.; i__1 = *n * 3; for (i__ = (*n << 1) + 1; i__ <= i__1; ++i__) { work[i__] = s[i__ - (*n << 1)] * work[i__ - (*n << 1)] - avg; } _starpu_dlassq_(n, &work[(*n << 1) + 1], &c__1, &scale, &sumsq); std = scale * sqrt(sumsq / *n); if (std < tol * avg) { goto L999; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { t = (d__1 = a[i__ + i__ * a_dim1], abs(d__1)); si = s[i__]; c2 = (*n - 1) * t; c1 = (*n - 2) * (work[i__] - t * si); c0 = -(t * si) * si + work[i__] * 2 * si - *n * avg; d__ = c1 * c1 - c0 * 4 * c2; if (d__ <= 0.) { *info = -1; return 0; } si = c0 * -2 / (c1 + sqrt(d__)); d__ = si - s[i__]; u = 0.; if (up) { i__2 = i__; for (j = 1; j <= i__2; ++j) { t = (d__1 = a[j + i__ * a_dim1], abs(d__1)); u += s[j] * t; work[j] += d__ * t; } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { t = (d__1 = a[i__ + j * a_dim1], abs(d__1)); u += s[j] * t; work[j] += d__ * t; } } else { i__2 = i__; for (j = 1; j <= i__2; ++j) { t = (d__1 = a[i__ + j * a_dim1], abs(d__1)); u += s[j] * t; work[j] += d__ * t; } i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { t = (d__1 = a[j + i__ * a_dim1], abs(d__1)); u += s[j] * t; work[j] += d__ * t; } } avg += (u + work[i__]) * d__ / *n; s[i__] = si; } } L999: smlnum = _starpu_dlamch_("SAFEMIN"); bignum = 1. / smlnum; smin = bignum; smax = 0.; t = 1. / sqrt(avg); base = _starpu_dlamch_("B"); u = 1. / log(base); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = (integer) (u * log(s[i__] * t)); s[i__] = pow_di(&base, &i__2); /* Computing MIN */ d__1 = smin, d__2 = s[i__]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = smax, d__2 = s[i__]; smax = max(d__1,d__2); } *scond = max(smin,smlnum) / min(smax,bignum); return 0; } /* _starpu_dsyequb_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsyev.c000066400000000000000000000205761507764646700205400ustar00rootroot00000000000000/* dsyev.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__0 = 0; static doublereal c_b17 = 1.; /* Subroutine */ int _starpu_dsyev_(char *jobz, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *w, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer nb; doublereal eps; integer inde; doublereal anrm; integer imax; doublereal rmin, rmax; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo; logical lower, wantz; extern doublereal _starpu_dlamch_(char *); integer iscale; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal safmin; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; integer indtau; extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); integer indwrk; extern /* Subroutine */ int _starpu_dorgtr_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsytrd_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); integer llwork; doublereal smlnum; integer lwkopt; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYEV computes all eigenvalues and, optionally, eigenvectors of a */ /* real symmetric matrix A. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ /* orthonormal eigenvectors of the matrix A. */ /* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ /* or the upper triangle (if UPLO='U') of A, including the */ /* diagonal, is destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK >= max(1,3*N-1). */ /* For optimal efficiency, LWORK >= (NB+2)*N, */ /* where NB is the blocksize for DSYTRD returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the algorithm failed to converge; i */ /* off-diagonal elements of an intermediate tridiagonal */ /* form did not converge to zero. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --w; --work; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); lower = _starpu_lsame_(uplo, "L"); lquery = *lwork == -1; *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (lower || _starpu_lsame_(uplo, "U"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } if (*info == 0) { nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); /* Computing MAX */ i__1 = 1, i__2 = (nb + 2) * *n; lwkopt = max(i__1,i__2); work[1] = (doublereal) lwkopt; /* Computing MAX */ i__1 = 1, i__2 = *n * 3 - 1; if (*lwork < max(i__1,i__2) && ! lquery) { *info = -8; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYEV ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { w[1] = a[a_dim1 + 1]; work[1] = 2.; if (wantz) { a[a_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = _starpu_dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { _starpu_dlascl_(uplo, &c__0, &c__0, &c_b17, &sigma, n, n, &a[a_offset], lda, info); } /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ inde = 1; indtau = inde + *n; indwrk = indtau + *n; llwork = *lwork - indwrk + 1; _starpu_dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & work[indwrk], &llwork, &iinfo); /* For eigenvalues only, call DSTERF. For eigenvectors, first call */ /* DORGTR to generate the orthogonal matrix, then call DSTEQR. */ if (! wantz) { _starpu_dsterf_(n, &w[1], &work[inde], info); } else { _starpu_dorgtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], & llwork, &iinfo); _starpu_dsteqr_(jobz, n, &w[1], &work[inde], &a[a_offset], lda, &work[indtau], info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { if (*info == 0) { imax = *n; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &w[1], &c__1); } /* Set WORK(1) to optimal workspace size. */ work[1] = (doublereal) lwkopt; return 0; /* End of DSYEV */ } /* _starpu_dsyev_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsyevd.c000066400000000000000000000267251507764646700207060ustar00rootroot00000000000000/* dsyevd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__0 = 0; static doublereal c_b17 = 1.; /* Subroutine */ int _starpu_dsyevd_(char *jobz, char *uplo, integer *n, doublereal * a, integer *lda, doublereal *w, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal eps; integer inde; doublereal anrm, rmin, rmax; integer lopt; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo, lwmin, liopt; logical lower, wantz; integer indwk2, llwrk2; extern doublereal _starpu_dlamch_(char *); integer iscale; extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dstedc_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *), _starpu_dlacpy_( char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; integer indtau; extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); integer indwrk, liwmin; extern /* Subroutine */ int _starpu_dormtr_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dsytrd_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); integer llwork; doublereal smlnum; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYEVD computes all eigenvalues and, optionally, eigenvectors of a */ /* real symmetric matrix A. If eigenvectors are desired, it uses a */ /* divide and conquer algorithm. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Because of large use of BLAS of level 3, DSYEVD needs N**2 more */ /* workspace than DSYEVX. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ /* orthonormal eigenvectors of the matrix A. */ /* If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ /* or the upper triangle (if UPLO='U') of A, including the */ /* diagonal, is destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* WORK (workspace/output) DOUBLE PRECISION array, */ /* dimension (LWORK) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If N <= 1, LWORK must be at least 1. */ /* If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. */ /* If JOBZ = 'V' and N > 1, LWORK must be at least */ /* 1 + 6*N + 2*N**2. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal sizes of the WORK and IWORK */ /* arrays, returns these values as the first entries of the WORK */ /* and IWORK arrays, and no error message related to LWORK or */ /* LIWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If N <= 1, LIWORK must be at least 1. */ /* If JOBZ = 'N' and N > 1, LIWORK must be at least 1. */ /* If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal sizes of the WORK and */ /* IWORK arrays, returns these values as the first entries of */ /* the WORK and IWORK arrays, and no error message related to */ /* LWORK or LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i and JOBZ = 'N', then the algorithm failed */ /* to converge; i off-diagonal elements of an intermediate */ /* tridiagonal form did not converge to zero; */ /* if INFO = i and JOBZ = 'V', then the algorithm failed */ /* to compute an eigenvalue while working on the submatrix */ /* lying in rows and columns INFO/(N+1) through */ /* mod(INFO,N+1). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* Modified by Francoise Tisseur, University of Tennessee. */ /* Modified description of INFO. Sven, 16 Feb 05. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --w; --work; --iwork; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); lower = _starpu_lsame_(uplo, "L"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (lower || _starpu_lsame_(uplo, "U"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } if (*info == 0) { if (*n <= 1) { liwmin = 1; lwmin = 1; lopt = lwmin; liopt = liwmin; } else { if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); } else { liwmin = 1; lwmin = (*n << 1) + 1; } /* Computing MAX */ i__1 = lwmin, i__2 = (*n << 1) + _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); lopt = max(i__1,i__2); liopt = liwmin; } work[1] = (doublereal) lopt; iwork[1] = liopt; if (*lwork < lwmin && ! lquery) { *info = -8; } else if (*liwork < liwmin && ! lquery) { *info = -10; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYEVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { w[1] = a[a_dim1 + 1]; if (wantz) { a[a_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = _starpu_dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { _starpu_dlascl_(uplo, &c__0, &c__0, &c_b17, &sigma, n, n, &a[a_offset], lda, info); } /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ inde = 1; indtau = inde + *n; indwrk = indtau + *n; llwork = *lwork - indwrk + 1; indwk2 = indwrk + *n * *n; llwrk2 = *lwork - indwk2 + 1; _starpu_dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & work[indwrk], &llwork, &iinfo); lopt = (integer) ((*n << 1) + work[indwrk]); /* For eigenvalues only, call DSTERF. For eigenvectors, first call */ /* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ /* tridiagonal matrix, then call DORMTR to multiply it by the */ /* Householder transformations stored in A. */ if (! wantz) { _starpu_dsterf_(n, &w[1], &work[inde], info); } else { _starpu_dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & llwrk2, &iwork[1], liwork, info); _starpu_dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ indwrk], n, &work[indwk2], &llwrk2, &iinfo); _starpu_dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); /* Computing MAX */ /* Computing 2nd power */ i__3 = *n; i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1); lopt = max(i__1,i__2); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { d__1 = 1. / sigma; _starpu_dscal_(n, &d__1, &w[1], &c__1); } work[1] = (doublereal) lopt; iwork[1] = liopt; return 0; /* End of DSYEVD */ } /* _starpu_dsyevd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsyevr.c000066400000000000000000000557021507764646700207210ustar00rootroot00000000000000/* dsyevr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__10 = 10; static integer c__1 = 1; static integer c__2 = 2; static integer c__3 = 3; static integer c__4 = 4; static integer c_n1 = -1; /* Subroutine */ int _starpu_dsyevr_(char *jobz, char *range, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, nb, jj; doublereal eps, vll, vuu, tmp1; integer indd, inde; doublereal anrm; integer imax; doublereal rmin, rmax; integer inddd, indee; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo; char order[1]; integer indwk; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer lwmin; logical lower, wantz; extern doublereal _starpu_dlamch_(char *); logical alleig, indeig; integer iscale, ieeeok, indibl, indifl; logical valeig; doublereal safmin; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal abstll, bignum; integer indtau, indisp; extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indiwo, indwkn; extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dstemr_(char *, char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, integer *, integer *, integer *, integer *); integer liwmin; logical tryrac; extern /* Subroutine */ int _starpu_dormtr_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer llwrkn, llwork, nsplit; doublereal smlnum; extern /* Subroutine */ int _starpu_dsytrd_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYEVR computes selected eigenvalues and, optionally, eigenvectors */ /* of a real symmetric matrix A. Eigenvalues and eigenvectors can be */ /* selected by specifying either a range of values or a range of */ /* indices for the desired eigenvalues. */ /* DSYEVR first reduces the matrix A to tridiagonal form T with a call */ /* to DSYTRD. Then, whenever possible, DSYEVR calls DSTEMR to compute */ /* the eigenspectrum using Relatively Robust Representations. DSTEMR */ /* computes eigenvalues by the dqds algorithm, while orthogonal */ /* eigenvectors are computed from various "good" L D L^T representations */ /* (also known as Relatively Robust Representations). Gram-Schmidt */ /* orthogonalization is avoided as far as possible. More specifically, */ /* the various steps of the algorithm are as follows. */ /* For each unreduced block (submatrix) of T, */ /* (a) Compute T - sigma I = L D L^T, so that L and D */ /* define all the wanted eigenvalues to high relative accuracy. */ /* This means that small relative changes in the entries of D and L */ /* cause only small relative changes in the eigenvalues and */ /* eigenvectors. The standard (unfactored) representation of the */ /* tridiagonal matrix T does not have this property in general. */ /* (b) Compute the eigenvalues to suitable accuracy. */ /* If the eigenvectors are desired, the algorithm attains full */ /* accuracy of the computed eigenvalues only right before */ /* the corresponding vectors have to be computed, see steps c) and d). */ /* (c) For each cluster of close eigenvalues, select a new */ /* shift close to the cluster, find a new factorization, and refine */ /* the shifted eigenvalues to suitable accuracy. */ /* (d) For each eigenvalue with a large enough relative separation compute */ /* the corresponding eigenvector by forming a rank revealing twisted */ /* factorization. Go back to (c) for any clusters that remain. */ /* The desired accuracy of the output can be specified by the input */ /* parameter ABSTOL. */ /* For more details, see DSTEMR's documentation and: */ /* - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ /* to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ /* Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ /* - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ /* Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ /* 2004. Also LAPACK Working Note 154. */ /* - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ /* tridiagonal eigenvalue/eigenvector problem", */ /* Computer Science Division Technical Report No. UCB/CSD-97-971, */ /* UC Berkeley, May 1997. */ /* Note 1 : DSYEVR calls DSTEMR when the full spectrum is requested */ /* on machines which conform to the ieee-754 floating point standard. */ /* DSYEVR calls DSTEBZ and SSTEIN on non-ieee machines and */ /* when partial spectrum requests are made. */ /* Normal execution of DSTEMR may create NaNs and infinities and */ /* hence may abort due to a floating point exception in environments */ /* which do not handle NaNs and infinities in the ieee standard default */ /* manner. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* ********* For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and */ /* ********* DSTEIN are called */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, the lower triangle (if UPLO='L') or the upper */ /* triangle (if UPLO='U') of A, including the diagonal, is */ /* destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less than */ /* or equal to zero, then EPS*|T| will be used in its place, */ /* where |T| is the 1-norm of the tridiagonal matrix obtained */ /* by reducing A to tridiagonal form. */ /* See "Computing Small Singular Values of Bidiagonal Matrices */ /* with Guaranteed High Relative Accuracy," by Demmel and */ /* Kahan, LAPACK Working Note #3. */ /* If high relative accuracy is important, set ABSTOL to */ /* DLAMCH( 'Safe minimum' ). Doing so will guarantee that */ /* eigenvalues are computed to high relative accuracy when */ /* possible in future releases. The current code does not */ /* make any guarantees about high relative accuracy, but */ /* future releases will. See J. Barlow and J. Demmel, */ /* "Computing Accurate Eigensystems of Scaled Diagonally */ /* Dominant Matrices", LAPACK Working Note #7, for a discussion */ /* of which matrices define their eigenvalues to high relative */ /* accuracy. */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* The first M elements contain the selected eigenvalues in */ /* ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix A */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* If JOBZ = 'N', then Z is not referenced. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* Supplying N columns is always safe. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */ /* The support of the eigenvectors in Z, i.e., the indices */ /* indicating the nonzero elements in Z. The i-th eigenvector */ /* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ /* ISUPPZ( 2*i ). */ /* ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,26*N). */ /* For optimal efficiency, LWORK >= (NB+6)*N, */ /* where NB is the max of the blocksize for DSYTRD and DORMTR */ /* returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. LIWORK >= max(1,10*N). */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal size of the IWORK array, */ /* returns this value as the first entry of the IWORK array, and */ /* no error message related to LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: Internal error */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Inderjit Dhillon, IBM Almaden, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* Ken Stanley, Computer Science Division, University of */ /* California at Berkeley, USA */ /* Jason Riedy, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --isuppz; --work; --iwork; /* Function Body */ ieeeok = _starpu_ilaenv_(&c__10, "DSYEVR", "N", &c__1, &c__2, &c__3, &c__4); lower = _starpu_lsame_(uplo, "L"); wantz = _starpu_lsame_(jobz, "V"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); lquery = *lwork == -1 || *liwork == -1; /* Computing MAX */ i__1 = 1, i__2 = *n * 26; lwmin = max(i__1,i__2); /* Computing MAX */ i__1 = 1, i__2 = *n * 10; liwmin = max(i__1,i__2); *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (lower || _starpu_lsame_(uplo, "U"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -8; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -9; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -10; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -15; } else if (*lwork < lwmin && ! lquery) { *info = -18; } else if (*liwork < liwmin && ! lquery) { *info = -20; } } if (*info == 0) { nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMTR", uplo, n, &c_n1, &c_n1, & c_n1); nb = max(i__1,i__2); /* Computing MAX */ i__1 = (nb + 1) * *n; lwkopt = max(i__1,lwmin); work[1] = (doublereal) lwkopt; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYEVR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { work[1] = 1.; return 0; } if (*n == 1) { work[1] = 7.; if (alleig || indeig) { *m = 1; w[1] = a[a_dim1 + 1]; } else { if (*vl < a[a_dim1 + 1] && *vu >= a[a_dim1 + 1]) { *m = 1; w[1] = a[a_dim1 + 1]; } } if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; abstll = *abstol; vll = *vl; vuu = *vu; anrm = _starpu_dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { if (lower) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n - j + 1; _starpu_dscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); /* L10: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { _starpu_dscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); /* L20: */ } } if (*abstol > 0.) { abstll = *abstol * sigma; } if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* Initialize indices into workspaces. Note: The IWORK indices are */ /* used only if DSTERF or DSTEMR fail. */ /* WORK(INDTAU:INDTAU+N-1) stores the scalar factors of the */ /* elementary reflectors used in DSYTRD. */ indtau = 1; /* WORK(INDD:INDD+N-1) stores the tridiagonal's diagonal entries. */ indd = indtau + *n; /* WORK(INDE:INDE+N-1) stores the off-diagonal entries of the */ /* tridiagonal matrix from DSYTRD. */ inde = indd + *n; /* WORK(INDDD:INDDD+N-1) is a copy of the diagonal entries over */ /* -written by DSTEMR (the DSTERF path copies the diagonal to W). */ inddd = inde + *n; /* WORK(INDEE:INDEE+N-1) is a copy of the off-diagonal entries over */ /* -written while computing the eigenvalues in DSTERF and DSTEMR. */ indee = inddd + *n; /* INDWK is the starting offset of the left-over workspace, and */ /* LLWORK is the remaining workspace size. */ indwk = indee + *n; llwork = *lwork - indwk + 1; /* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and */ /* stores the block indices of each of the M<=N eigenvalues. */ indibl = 1; /* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and */ /* stores the starting and finishing indices of each block. */ indisp = indibl + *n; /* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */ /* that corresponding to eigenvectors that fail to converge in */ /* DSTEIN. This information is discarded; if any fail, the driver */ /* returns INFO > 0. */ indifl = indisp + *n; /* INDIWO is the offset of the remaining integer workspace. */ indiwo = indisp + *n; /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ _starpu_dsytrd_(uplo, n, &a[a_offset], lda, &work[indd], &work[inde], &work[ indtau], &work[indwk], &llwork, &iinfo); /* If all eigenvalues are desired */ /* then call DSTERF or DSTEMR and DORMTR. */ if ((alleig || indeig && *il == 1 && *iu == *n) && ieeeok == 1) { if (! wantz) { _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); _starpu_dsterf_(n, &w[1], &work[indee], info); } else { i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); _starpu_dcopy_(n, &work[indd], &c__1, &work[inddd], &c__1); if (*abstol <= *n * 2. * eps) { tryrac = TRUE_; } else { tryrac = FALSE_; } _starpu_dstemr_(jobz, "A", n, &work[inddd], &work[indee], vl, vu, il, iu, m, &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, & work[indwk], lwork, &iwork[1], liwork, info); /* Apply orthogonal matrix used in reduction to tridiagonal */ /* form to eigenvectors returned by DSTEIN. */ if (wantz && *info == 0) { indwkn = inde; llwrkn = *lwork - indwkn + 1; _starpu_dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau] , &z__[z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); } } if (*info == 0) { /* Everything worked. Skip DSTEBZ/DSTEIN. IWORK(:) are */ /* undefined. */ *m = *n; goto L30; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. */ /* Also call DSTEBZ and DSTEIN if DSTEMR fails. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ indwk], &iwork[indiwo], info); if (wantz) { _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ indisp], &z__[z_offset], ldz, &work[indwk], &iwork[indiwo], & iwork[indifl], info); /* Apply orthogonal matrix used in reduction to tridiagonal */ /* form to eigenvectors returned by DSTEIN. */ indwkn = inde; llwrkn = *lwork - indwkn + 1; _starpu_dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ /* Jump here if DSTEMR/DSTEIN succeeded. */ L30: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with */ /* eigenvectors. Note: We do not sort the IFAIL portion of IWORK. */ /* It may not be initialized (if DSTEMR/DSTEIN succeeded), and we do */ /* not return this detailed information to the user. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L40: */ } if (i__ != 0) { w[i__] = w[j]; w[j] = tmp1; _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], &c__1); } /* L50: */ } } /* Set WORK(1) to optimal workspace size. */ work[1] = (doublereal) lwkopt; iwork[1] = liwmin; return 0; /* End of DSYEVR */ } /* _starpu_dsyevr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsyevx.c000066400000000000000000000415161507764646700207250ustar00rootroot00000000000000/* dsyevx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dsyevx_(char *jobz, char *range, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *ifail, integer *info) { /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, nb, jj; doublereal eps, vll, vuu, tmp1; integer indd, inde; doublereal anrm; integer imax; doublereal rmin, rmax; logical test; integer itmp1, indee; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal sigma; extern logical _starpu_lsame_(char *, char *); integer iinfo; char order[1]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical lower, wantz; extern doublereal _starpu_dlamch_(char *); logical alleig, indeig; integer iscale, indibl; logical valeig; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal safmin; extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal abstll, bignum; integer indtau, indisp; extern /* Subroutine */ int _starpu_dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), _starpu_dsterf_(integer *, doublereal *, doublereal *, integer *); integer indiwo, indwkn; extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer indwrk, lwkmin; extern /* Subroutine */ int _starpu_dorgtr_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), _starpu_dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dormtr_(char *, char *, char *, integer *, integer *, doublereal * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer llwrkn, llwork, nsplit; doublereal smlnum; extern /* Subroutine */ int _starpu_dsytrd_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYEVX computes selected eigenvalues and, optionally, eigenvectors */ /* of a real symmetric matrix A. Eigenvalues and eigenvectors can be */ /* selected by specifying either a range of values or a range of indices */ /* for the desired eigenvalues. */ /* Arguments */ /* ========= */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, the lower triangle (if UPLO='L') or the upper */ /* triangle (if UPLO='U') of A, including the diagonal, is */ /* destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less than */ /* or equal to zero, then EPS*|T| will be used in its place, */ /* where |T| is the 1-norm of the tridiagonal matrix obtained */ /* by reducing A to tridiagonal form. */ /* Eigenvalues will be computed most accurately when ABSTOL is */ /* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ /* If this routine returns with INFO>0, indicating that some */ /* eigenvectors did not converge, try setting ABSTOL to */ /* 2*DLAMCH('S'). */ /* See "Computing Small Singular Values of Bidiagonal Matrices */ /* with Guaranteed High Relative Accuracy," by Demmel and */ /* Kahan, LAPACK Working Note #3. */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* On normal exit, the first M elements contain the selected */ /* eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix A */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* If an eigenvector fails to converge, then that column of Z */ /* contains the latest approximation to the eigenvector, and the */ /* index of the eigenvector is returned in IFAIL. */ /* If JOBZ = 'N', then Z is not referenced. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK >= 1, when N <= 1; */ /* otherwise 8*N. */ /* For optimal efficiency, LWORK >= (NB+3)*N, */ /* where NB is the max of the blocksize for DSYTRD and DORMTR */ /* returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (5*N) */ /* IFAIL (output) INTEGER array, dimension (N) */ /* If JOBZ = 'V', then if INFO = 0, the first M elements of */ /* IFAIL are zero. If INFO > 0, then IFAIL contains the */ /* indices of the eigenvectors that failed to converge. */ /* If JOBZ = 'N', then IFAIL is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, then i eigenvectors failed to converge. */ /* Their indices are stored in array IFAIL. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ lower = _starpu_lsame_(uplo, "L"); wantz = _starpu_lsame_(jobz, "V"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); lquery = *lwork == -1; *info = 0; if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (lower || _starpu_lsame_(uplo, "U"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -8; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -9; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -10; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -15; } } if (*info == 0) { if (*n <= 1) { lwkmin = 1; work[1] = (doublereal) lwkmin; } else { lwkmin = *n << 3; nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__1, "DORMTR", uplo, n, &c_n1, &c_n1, &c_n1); nb = max(i__1,i__2); /* Computing MAX */ i__1 = lwkmin, i__2 = (nb + 3) * *n; lwkopt = max(i__1,i__2); work[1] = (doublereal) lwkopt; } if (*lwork < lwkmin && ! lquery) { *info = -17; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYEVX", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } if (*n == 1) { if (alleig || indeig) { *m = 1; w[1] = a[a_dim1 + 1]; } else { if (*vl < a[a_dim1 + 1] && *vu >= a[a_dim1 + 1]) { *m = 1; w[1] = a[a_dim1 + 1]; } } if (wantz) { z__[z_dim1 + 1] = 1.; } return 0; } /* Get machine constants. */ safmin = _starpu_dlamch_("Safe minimum"); eps = _starpu_dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; abstll = *abstol; if (valeig) { vll = *vl; vuu = *vu; } anrm = _starpu_dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { if (lower) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n - j + 1; _starpu_dscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); /* L10: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { _starpu_dscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); /* L20: */ } } if (*abstol > 0.) { abstll = *abstol * sigma; } if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ indtau = 1; inde = indtau + *n; indd = inde + *n; indwrk = indd + *n; llwork = *lwork - indwrk + 1; _starpu_dsytrd_(uplo, n, &a[a_offset], lda, &work[indd], &work[inde], &work[ indtau], &work[indwrk], &llwork, &iinfo); /* If all eigenvalues are desired and ABSTOL is less than or equal to */ /* zero, then call DSTERF or DORGTR and SSTEQR. If this fails for */ /* some eigenvalue, then try DSTEBZ. */ test = FALSE_; if (indeig) { if (*il == 1 && *iu == *n) { test = TRUE_; } } if ((alleig || test) && *abstol <= 0.) { _starpu_dcopy_(n, &work[indd], &c__1, &w[1], &c__1); indee = indwrk + (*n << 1); if (! wantz) { i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); _starpu_dsterf_(n, &w[1], &work[indee], info); } else { _starpu_dlacpy_("A", n, n, &a[a_offset], lda, &z__[z_offset], ldz); _starpu_dorgtr_(uplo, n, &z__[z_offset], ldz, &work[indtau], &work[indwrk] , &llwork, &iinfo); i__1 = *n - 1; _starpu_dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); _starpu_dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ indwrk], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L30: */ } } } if (*info == 0) { *m = *n; goto L40; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indibl = 1; indisp = indibl + *n; indiwo = indisp + *n; _starpu_dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ indwrk], &iwork[indiwo], info); if (wantz) { _starpu_dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & ifail[1], info); /* Apply orthogonal matrix used in reduction to tridiagonal */ /* form to eigenvectors returned by DSTEIN. */ indwkn = inde; llwrkn = *lwork - indwkn + 1; _starpu_dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L40: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } d__1 = 1. / sigma; _starpu_dscal_(&imax, &d__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with */ /* eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L50: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], &c__1); if (*info != 0) { itmp1 = ifail[i__]; ifail[i__] = ifail[j]; ifail[j] = itmp1; } } /* L60: */ } } /* Set WORK(1) to optimal workspace size. */ work[1] = (doublereal) lwkopt; return 0; /* End of DSYEVX */ } /* _starpu_dsyevx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsygs2.c000066400000000000000000000215161507764646700206140ustar00rootroot00000000000000/* dsygs2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b6 = -1.; static integer c__1 = 1; static doublereal c_b27 = 1.; /* Subroutine */ int _starpu_dsygs2_(integer *itype, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; doublereal d__1; /* Local variables */ integer k; doublereal ct, akk, bkk; extern /* Subroutine */ int _starpu_dsyr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrsv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYGS2 reduces a real symmetric-definite generalized eigenproblem */ /* to standard form. */ /* If ITYPE = 1, the problem is A*x = lambda*B*x, */ /* and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L') */ /* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ /* B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L. */ /* B must have been previously factorized as U'*U or L*L' by DPOTRF. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L'); */ /* = 2 or 3: compute U*A*U' or L'*A*L. */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored, and how B has been factorized. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n by n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n by n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the transformed matrix, stored in the */ /* same format as A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,N) */ /* The triangular factor from the Cholesky factorization of B, */ /* as returned by DPOTRF. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (*itype < 1 || *itype > 3) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYGS2", &i__1); return 0; } if (*itype == 1) { if (upper) { /* Compute inv(U')*A*inv(U) */ i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Update the upper triangle of A(k:n,k:n) */ akk = a[k + k * a_dim1]; bkk = b[k + k * b_dim1]; /* Computing 2nd power */ d__1 = bkk; akk /= d__1 * d__1; a[k + k * a_dim1] = akk; if (k < *n) { i__2 = *n - k; d__1 = 1. / bkk; _starpu_dscal_(&i__2, &d__1, &a[k + (k + 1) * a_dim1], lda); ct = akk * -.5; i__2 = *n - k; _starpu_daxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( k + 1) * a_dim1], lda); i__2 = *n - k; _starpu_dsyr2_(uplo, &i__2, &c_b6, &a[k + (k + 1) * a_dim1], lda, &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) * a_dim1], lda); i__2 = *n - k; _starpu_daxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( k + 1) * a_dim1], lda); i__2 = *n - k; _starpu_dtrsv_(uplo, "Transpose", "Non-unit", &i__2, &b[k + 1 + ( k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], lda); } /* L10: */ } } else { /* Compute inv(L)*A*inv(L') */ i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Update the lower triangle of A(k:n,k:n) */ akk = a[k + k * a_dim1]; bkk = b[k + k * b_dim1]; /* Computing 2nd power */ d__1 = bkk; akk /= d__1 * d__1; a[k + k * a_dim1] = akk; if (k < *n) { i__2 = *n - k; d__1 = 1. / bkk; _starpu_dscal_(&i__2, &d__1, &a[k + 1 + k * a_dim1], &c__1); ct = akk * -.5; i__2 = *n - k; _starpu_daxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + k * a_dim1], &c__1); i__2 = *n - k; _starpu_dsyr2_(uplo, &i__2, &c_b6, &a[k + 1 + k * a_dim1], &c__1, &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) * a_dim1], lda); i__2 = *n - k; _starpu_daxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + k * a_dim1], &c__1); i__2 = *n - k; _starpu_dtrsv_(uplo, "No transpose", "Non-unit", &i__2, &b[k + 1 + (k + 1) * b_dim1], ldb, &a[k + 1 + k * a_dim1], &c__1); } /* L20: */ } } } else { if (upper) { /* Compute U*A*U' */ i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Update the upper triangle of A(1:k,1:k) */ akk = a[k + k * a_dim1]; bkk = b[k + k * b_dim1]; i__2 = k - 1; _starpu_dtrmv_(uplo, "No transpose", "Non-unit", &i__2, &b[b_offset], ldb, &a[k * a_dim1 + 1], &c__1); ct = akk * .5; i__2 = k - 1; _starpu_daxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); i__2 = k - 1; _starpu_dsyr2_(uplo, &i__2, &c_b27, &a[k * a_dim1 + 1], &c__1, &b[k * b_dim1 + 1], &c__1, &a[a_offset], lda); i__2 = k - 1; _starpu_daxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); i__2 = k - 1; _starpu_dscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1); /* Computing 2nd power */ d__1 = bkk; a[k + k * a_dim1] = akk * (d__1 * d__1); /* L30: */ } } else { /* Compute L'*A*L */ i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Update the lower triangle of A(1:k,1:k) */ akk = a[k + k * a_dim1]; bkk = b[k + k * b_dim1]; i__2 = k - 1; _starpu_dtrmv_(uplo, "Transpose", "Non-unit", &i__2, &b[b_offset], ldb, &a[k + a_dim1], lda); ct = akk * .5; i__2 = k - 1; _starpu_daxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); i__2 = k - 1; _starpu_dsyr2_(uplo, &i__2, &c_b27, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, &a[a_offset], lda); i__2 = k - 1; _starpu_daxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); i__2 = k - 1; _starpu_dscal_(&i__2, &bkk, &a[k + a_dim1], lda); /* Computing 2nd power */ d__1 = bkk; a[k + k * a_dim1] = akk * (d__1 * d__1); /* L40: */ } } } return 0; /* End of DSYGS2 */ } /* _starpu_dsygs2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsygst.c000066400000000000000000000257451507764646700207260ustar00rootroot00000000000000/* dsygst.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b14 = 1.; static doublereal c_b16 = -.5; static doublereal c_b19 = -1.; static doublereal c_b52 = .5; /* Subroutine */ int _starpu_dsygst_(integer *itype, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer k, kb, nb; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsymm_( char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dsygs2_( integer *, char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), _starpu_dsyr2k_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *) , _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYGST reduces a real symmetric-definite generalized eigenproblem */ /* to standard form. */ /* If ITYPE = 1, the problem is A*x = lambda*B*x, */ /* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */ /* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ /* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */ /* B must have been previously factorized as U**T*U or L*L**T by DPOTRF. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */ /* = 2 or 3: compute U*A*U**T or L**T*A*L. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored and B is factored as */ /* U**T*U; */ /* = 'L': Lower triangle of A is stored and B is factored as */ /* L*L**T. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the transformed matrix, stored in the */ /* same format as A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,N) */ /* The triangular factor from the Cholesky factorization of B, */ /* as returned by DPOTRF. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (*itype < 1 || *itype > 3) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYGST", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = _starpu_ilaenv_(&c__1, "DSYGST", uplo, n, &c_n1, &c_n1, &c_n1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ _starpu_dsygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); } else { /* Use blocked code */ if (*itype == 1) { if (upper) { /* Compute inv(U')*A*inv(U) */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(k:n,k:n) */ _starpu_dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; _starpu_dtrsm_("Left", uplo, "Transpose", "Non-unit", &kb, & i__3, &c_b14, &b[k + k * b_dim1], ldb, &a[k + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; _starpu_dsymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, &c_b14, &a[k + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; _starpu_dsyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a[k + (k + kb) * a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; _starpu_dsymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, &c_b14, &a[k + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; _starpu_dtrsm_("Right", uplo, "No transpose", "Non-unit", &kb, &i__3, &c_b14, &b[k + kb + (k + kb) * b_dim1] , ldb, &a[k + (k + kb) * a_dim1], lda); } /* L10: */ } } else { /* Compute inv(L)*A*inv(L') */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(k:n,k:n) */ _starpu_dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; _starpu_dtrsm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b[k + k * b_dim1], ldb, &a[k + kb + k * a_dim1], lda); i__3 = *n - k - kb + 1; _starpu_dsymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & c_b14, &a[k + kb + k * a_dim1], lda); i__3 = *n - k - kb + 1; _starpu_dsyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, &a[ k + kb + k * a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; _starpu_dsymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & c_b14, &a[k + kb + k * a_dim1], lda); i__3 = *n - k - kb + 1; _starpu_dtrsm_("Left", uplo, "No transpose", "Non-unit", & i__3, &kb, &c_b14, &b[k + kb + (k + kb) * b_dim1], ldb, &a[k + kb + k * a_dim1], lda); } /* L20: */ } } } else { if (upper) { /* Compute U*A*U' */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; _starpu_dtrmm_("Left", uplo, "No transpose", "Non-unit", &i__3, & kb, &c_b14, &b[b_offset], ldb, &a[k * a_dim1 + 1], lda) ; i__3 = k - 1; _starpu_dsymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ k * a_dim1 + 1], lda); i__3 = k - 1; _starpu_dsyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a[k * a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[a_offset], lda); i__3 = k - 1; _starpu_dsymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ k * a_dim1 + 1], lda); i__3 = k - 1; _starpu_dtrmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b[k + k * b_dim1], ldb, &a[k * a_dim1 + 1], lda); _starpu_dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); /* L30: */ } } else { /* Compute L'*A*L */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; _starpu_dtrmm_("Right", uplo, "No transpose", "Non-unit", &kb, & i__3, &c_b14, &b[b_offset], ldb, &a[k + a_dim1], lda); i__3 = k - 1; _starpu_dsymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + a_dim1], lda); i__3 = k - 1; _starpu_dsyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[ a_offset], lda); i__3 = k - 1; _starpu_dsymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + a_dim1], lda); i__3 = k - 1; _starpu_dtrmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, &c_b14, &b[k + k * b_dim1], ldb, &a[k + a_dim1], lda); _starpu_dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); /* L40: */ } } } } return 0; /* End of DSYGST */ } /* _starpu_dsygst_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsygv.c000066400000000000000000000224341507764646700205350ustar00rootroot00000000000000/* dsygv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b16 = 1.; /* Subroutine */ int _starpu_dsygv_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *w, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ integer nb, neig; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); char trans[1]; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dsyev_(char *, char *, integer *, doublereal * , integer *, doublereal *, doublereal *, integer *, integer *); logical wantz; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dpotrf_(char *, integer *, doublereal *, integer *, integer *); integer lwkmin; extern /* Subroutine */ int _starpu_dsygst_(integer *, char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYGV computes all the eigenvalues, and optionally, the eigenvectors */ /* of a real generalized symmetric-definite eigenproblem, of the form */ /* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ /* Here A and B are assumed to be symmetric and B is also */ /* positive definite. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* Specifies the problem type to be solved: */ /* = 1: A*x = (lambda)*B*x */ /* = 2: A*B*x = (lambda)*x */ /* = 3: B*A*x = (lambda)*x */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangles of A and B are stored; */ /* = 'L': Lower triangles of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ /* matrix Z of eigenvectors. The eigenvectors are normalized */ /* as follows: */ /* if ITYPE = 1 or 2, Z**T*B*Z = I; */ /* if ITYPE = 3, Z**T*inv(B)*Z = I. */ /* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ /* or the lower triangle (if UPLO='L') of A, including the */ /* diagonal, is destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, the symmetric positive definite matrix B. */ /* If UPLO = 'U', the leading N-by-N upper triangular part of B */ /* contains the upper triangular part of the matrix B. */ /* If UPLO = 'L', the leading N-by-N lower triangular part of B */ /* contains the lower triangular part of the matrix B. */ /* On exit, if INFO <= N, the part of B containing the matrix is */ /* overwritten by the triangular factor U or L from the Cholesky */ /* factorization B = U**T*U or B = L*L**T. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK >= max(1,3*N-1). */ /* For optimal efficiency, LWORK >= (NB+2)*N, */ /* where NB is the blocksize for DSYTRD returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: DPOTRF or DSYEV returned an error code: */ /* <= N: if INFO = i, DSYEV failed to converge; */ /* i off-diagonal elements of an intermediate */ /* tridiagonal form did not converge to zero; */ /* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --w; --work; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); upper = _starpu_lsame_(uplo, "U"); lquery = *lwork == -1; *info = 0; if (*itype < 1 || *itype > 3) { *info = -1; } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -2; } else if (! (upper || _starpu_lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info == 0) { /* Computing MAX */ i__1 = 1, i__2 = *n * 3 - 1; lwkmin = max(i__1,i__2); nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); /* Computing MAX */ i__1 = lwkmin, i__2 = (nb + 2) * *n; lwkopt = max(i__1,i__2); work[1] = (doublereal) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -11; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYGV ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ _starpu_dpotrf_(uplo, n, &b[b_offset], ldb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ _starpu_dsygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); _starpu_dsyev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, info); if (wantz) { /* Backtransform eigenvectors to the original problem. */ neig = *n; if (*info > 0) { neig = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ /* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } _starpu_dtrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b16, &b[ b_offset], ldb, &a[a_offset], lda); } else if (*itype == 3) { /* For B*A*x=(lambda)*x; */ /* backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } _starpu_dtrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b16, &b[ b_offset], ldb, &a[a_offset], lda); } } work[1] = (doublereal) lwkopt; return 0; /* End of DSYGV */ } /* _starpu_dsygv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsygvd.c000066400000000000000000000267741507764646700207140ustar00rootroot00000000000000/* dsygvd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b11 = 1.; /* Subroutine */ int _starpu_dsygvd_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *w, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; doublereal d__1, d__2; /* Local variables */ integer lopt; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer lwmin; char trans[1]; integer liopt; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper, wantz; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dpotrf_( char *, integer *, doublereal *, integer *, integer *); integer liwmin; extern /* Subroutine */ int _starpu_dsyevd_(char *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, integer *, integer *), _starpu_dsygst_(integer *, char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYGVD computes all the eigenvalues, and optionally, the eigenvectors */ /* of a real generalized symmetric-definite eigenproblem, of the form */ /* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ /* B are assumed to be symmetric and B is also positive definite. */ /* If eigenvectors are desired, it uses a divide and conquer algorithm. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* Specifies the problem type to be solved: */ /* = 1: A*x = (lambda)*B*x */ /* = 2: A*B*x = (lambda)*x */ /* = 3: B*A*x = (lambda)*x */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangles of A and B are stored; */ /* = 'L': Lower triangles of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ /* matrix Z of eigenvectors. The eigenvectors are normalized */ /* as follows: */ /* if ITYPE = 1 or 2, Z**T*B*Z = I; */ /* if ITYPE = 3, Z**T*inv(B)*Z = I. */ /* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ /* or the lower triangle (if UPLO='L') of A, including the */ /* diagonal, is destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, the symmetric matrix B. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of B contains the */ /* upper triangular part of the matrix B. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of B contains */ /* the lower triangular part of the matrix B. */ /* On exit, if INFO <= N, the part of B containing the matrix is */ /* overwritten by the triangular factor U or L from the Cholesky */ /* factorization B = U**T*U or B = L*L**T. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If N <= 1, LWORK >= 1. */ /* If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. */ /* If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal sizes of the WORK and IWORK */ /* arrays, returns these values as the first entries of the WORK */ /* and IWORK arrays, and no error message related to LWORK or */ /* LIWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If N <= 1, LIWORK >= 1. */ /* If JOBZ = 'N' and N > 1, LIWORK >= 1. */ /* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal sizes of the WORK and */ /* IWORK arrays, returns these values as the first entries of */ /* the WORK and IWORK arrays, and no error message related to */ /* LWORK or LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: DPOTRF or DSYEVD returned an error code: */ /* <= N: if INFO = i and JOBZ = 'N', then the algorithm */ /* failed to converge; i off-diagonal elements of an */ /* intermediate tridiagonal form did not converge to */ /* zero; */ /* if INFO = i and JOBZ = 'V', then the algorithm */ /* failed to compute an eigenvalue while working on */ /* the submatrix lying in rows and columns INFO/(N+1) */ /* through mod(INFO,N+1); */ /* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ /* Modified so that no backsubstitution is performed if DSYEVD fails to */ /* converge (NEIG in old code could be greater than N causing out of */ /* bounds reference to A - reported by Ralf Meyer). Also corrected the */ /* description of INFO and the test on ITYPE. Sven, 16 Feb 05. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --w; --work; --iwork; /* Function Body */ wantz = _starpu_lsame_(jobz, "V"); upper = _starpu_lsame_(uplo, "U"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (*n <= 1) { liwmin = 1; lwmin = 1; } else if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); } else { liwmin = 1; lwmin = (*n << 1) + 1; } lopt = lwmin; liopt = liwmin; if (*itype < 1 || *itype > 3) { *info = -1; } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -2; } else if (! (upper || _starpu_lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info == 0) { work[1] = (doublereal) lopt; iwork[1] = liopt; if (*lwork < lwmin && ! lquery) { *info = -11; } else if (*liwork < liwmin && ! lquery) { *info = -13; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYGVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ _starpu_dpotrf_(uplo, n, &b[b_offset], ldb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ _starpu_dsygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); _starpu_dsyevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &iwork[ 1], liwork, info); /* Computing MAX */ d__1 = (doublereal) lopt; lopt = (integer) max(d__1,work[1]); /* Computing MAX */ d__1 = (doublereal) liopt, d__2 = (doublereal) iwork[1]; liopt = (integer) max(d__1,d__2); if (wantz && *info == 0) { /* Backtransform eigenvectors to the original problem. */ if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ /* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } _starpu_dtrsm_("Left", uplo, trans, "Non-unit", n, n, &c_b11, &b[b_offset] , ldb, &a[a_offset], lda); } else if (*itype == 3) { /* For B*A*x=(lambda)*x; */ /* backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } _starpu_dtrmm_("Left", uplo, trans, "Non-unit", n, n, &c_b11, &b[b_offset] , ldb, &a[a_offset], lda); } } work[1] = (doublereal) lopt; iwork[1] = liopt; return 0; /* End of DSYGVD */ } /* _starpu_dsygvd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsygvx.c000066400000000000000000000334461507764646700207320ustar00rootroot00000000000000/* dsygvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b19 = 1.; /* Subroutine */ int _starpu_dsygvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *ifail, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1, i__2; /* Local variables */ integer nb; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); char trans[1]; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper, wantz, alleig, indeig, valeig; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dpotrf_(char *, integer *, doublereal *, integer *, integer *); integer lwkmin; extern /* Subroutine */ int _starpu_dsygst_(integer *, char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; extern /* Subroutine */ int _starpu_dsyevx_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYGVX computes selected eigenvalues, and optionally, eigenvectors */ /* of a real generalized symmetric-definite eigenproblem, of the form */ /* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A */ /* and B are assumed to be symmetric and B is also positive definite. */ /* Eigenvalues and eigenvectors can be selected by specifying either a */ /* range of values or a range of indices for the desired eigenvalues. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* Specifies the problem type to be solved: */ /* = 1: A*x = (lambda)*B*x */ /* = 2: A*B*x = (lambda)*x */ /* = 3: B*A*x = (lambda)*x */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A and B are stored; */ /* = 'L': Lower triangle of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrix pencil (A,B). N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, the lower triangle (if UPLO='L') or the upper */ /* triangle (if UPLO='U') of A, including the diagonal, is */ /* destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDA, N) */ /* On entry, the symmetric matrix B. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of B contains the */ /* upper triangular part of the matrix B. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of B contains */ /* the lower triangular part of the matrix B. */ /* On exit, if INFO <= N, the part of B containing the matrix is */ /* overwritten by the triangular factor U or L from the Cholesky */ /* factorization B = U**T*U or B = L*L**T. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* VL (input) DOUBLE PRECISION */ /* VU (input) DOUBLE PRECISION */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) DOUBLE PRECISION */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less than */ /* or equal to zero, then EPS*|T| will be used in its place, */ /* where |T| is the 1-norm of the tridiagonal matrix obtained */ /* by reducing A to tridiagonal form. */ /* Eigenvalues will be computed most accurately when ABSTOL is */ /* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ /* If this routine returns with INFO>0, indicating that some */ /* eigenvectors did not converge, try setting ABSTOL to */ /* 2*DLAMCH('S'). */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) DOUBLE PRECISION array, dimension (N) */ /* On normal exit, the first M elements contain the selected */ /* eigenvalues in ascending order. */ /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) */ /* If JOBZ = 'N', then Z is not referenced. */ /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix A */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* The eigenvectors are normalized as follows: */ /* if ITYPE = 1 or 2, Z**T*B*Z = I; */ /* if ITYPE = 3, Z**T*inv(B)*Z = I. */ /* If an eigenvector fails to converge, then that column of Z */ /* contains the latest approximation to the eigenvector, and the */ /* index of the eigenvector is returned in IFAIL. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK >= max(1,8*N). */ /* For optimal efficiency, LWORK >= (NB+3)*N, */ /* where NB is the blocksize for DSYTRD returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (5*N) */ /* IFAIL (output) INTEGER array, dimension (N) */ /* If JOBZ = 'V', then if INFO = 0, the first M elements of */ /* IFAIL are zero. If INFO > 0, then IFAIL contains the */ /* indices of the eigenvectors that failed to converge. */ /* If JOBZ = 'N', then IFAIL is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: DPOTRF or DSYEVX returned an error code: */ /* <= N: if INFO = i, DSYEVX failed to converge; */ /* i eigenvectors failed to converge. Their indices */ /* are stored in array IFAIL. */ /* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ upper = _starpu_lsame_(uplo, "U"); wantz = _starpu_lsame_(jobz, "V"); alleig = _starpu_lsame_(range, "A"); valeig = _starpu_lsame_(range, "V"); indeig = _starpu_lsame_(range, "I"); lquery = *lwork == -1; *info = 0; if (*itype < 1 || *itype > 3) { *info = -1; } else if (! (wantz || _starpu_lsame_(jobz, "N"))) { *info = -2; } else if (! (alleig || valeig || indeig)) { *info = -3; } else if (! (upper || _starpu_lsame_(uplo, "L"))) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -11; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -12; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -13; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -18; } } if (*info == 0) { /* Computing MAX */ i__1 = 1, i__2 = *n << 3; lwkmin = max(i__1,i__2); nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); /* Computing MAX */ i__1 = lwkmin, i__2 = (nb + 3) * *n; lwkopt = max(i__1,i__2); work[1] = (doublereal) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -20; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYGVX", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ _starpu_dpotrf_(uplo, n, &b[b_offset], ldb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ _starpu_dsygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); _starpu_dsyevx_(jobz, range, uplo, n, &a[a_offset], lda, vl, vu, il, iu, abstol, m, &w[1], &z__[z_offset], ldz, &work[1], lwork, &iwork[1], &ifail[ 1], info); if (wantz) { /* Backtransform eigenvectors to the original problem. */ if (*info > 0) { *m = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ /* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } _starpu_dtrsm_("Left", uplo, trans, "Non-unit", n, m, &c_b19, &b[b_offset] , ldb, &z__[z_offset], ldz); } else if (*itype == 3) { /* For B*A*x=(lambda)*x; */ /* backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } _starpu_dtrmm_("Left", uplo, trans, "Non-unit", n, m, &c_b19, &b[b_offset] , ldb, &z__[z_offset], ldz); } } /* Set WORK(1) to optimal workspace size. */ work[1] = (doublereal) lwkopt; return 0; /* End of DSYGVX */ } /* _starpu_dsygvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsyrfs.c000066400000000000000000000315531507764646700207150ustar00rootroot00000000000000/* dsyrfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = -1.; static doublereal c_b14 = 1.; /* Subroutine */ int _starpu_dsyrfs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k; doublereal s, xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer count; logical upper; extern /* Subroutine */ int _starpu_dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal lstres; extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYRFS improves the computed solution to a system of linear */ /* equations when the coefficient matrix is symmetric indefinite, and */ /* provides error bounds and backward error estimates for the solution. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of A contains the upper triangular part */ /* of the matrix A, and the strictly lower triangular part of A */ /* is not referenced. If UPLO = 'L', the leading N-by-N lower */ /* triangular part of A contains the lower triangular part of */ /* the matrix A, and the strictly upper triangular part of A is */ /* not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The factored form of the matrix A. AF contains the block */ /* diagonal matrix D and the multipliers used to obtain the */ /* factor U or L from the factorization A = U*D*U**T or */ /* A = L*D*L**T as computed by DSYTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSYTRF. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DSYTRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Internal Parameters */ /* =================== */ /* ITMAX is the maximum number of steps of iterative refinement. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldaf < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - A * X */ _starpu_dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dsymv_(uplo, n, &c_b12, &a[a_offset], lda, &x[j * x_dim1 + 1], &c__1, &c_b14, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L30: */ } /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ i__ + j * x_dim1], abs(d__2)); /* L40: */ } work[k] = work[k] + (d__1 = a[k + k * a_dim1], abs(d__1)) * xk + s; /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x[k + j * x_dim1], abs(d__1)); work[k] += (d__1 = a[k + k * a_dim1], abs(d__1)) * xk; i__3 = *n; for (i__ = k + 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ i__ + j * x_dim1], abs(d__2)); /* L60: */ } work[k] += s; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if */ /* 1) The residual BERR(J) is larger than machine epsilon, and */ /* 2) BERR(J) decreased by at least a factor of 2 during the */ /* last iteration, and */ /* 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ _starpu_dsytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n + 1], n, info); _starpu_daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) ; lstres = berr[j]; ++count; goto L20; } /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(A))* */ /* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(A) is the inverse of A */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(A)*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(A) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(A'). */ _starpu_dsytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ *n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L110: */ } } else if (kase == 2) { /* Multiply by inv(A)*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L120: */ } _starpu_dsytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ *n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DSYRFS */ } /* _starpu_dsyrfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsyrfsx.c000066400000000000000000000606231507764646700211050ustar00rootroot00000000000000/* dsyrfsx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c_n1 = -1; static integer c__0 = 0; static integer c__1 = 1; /* Subroutine */ int _starpu_dsyrfsx_(char *uplo, char *equed, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *s, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *berr, integer * n_err_bnds__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, integer *nparams, doublereal *params, doublereal * work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; integer ref_type__, j; doublereal rcond_tmp__; integer prec_type__; extern doublereal _starpu_dla_syrcond__(char *, integer *, doublereal *, integer * , doublereal *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen); doublereal cwise_wrong__; extern /* Subroutine */ int _starpu_dla_syrfsx_extended__(integer *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, logical *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, ftnlen); char norm[1]; logical ignore_cwise__; extern logical _starpu_lsame_(char *, char *); doublereal anorm; logical rcequ; extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dsycon_(char *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); extern integer _starpu_ilaprec_(char *); integer ithresh, n_norms__; doublereal rthresh; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYRFSX improves the computed solution to a system of linear */ /* equations when the coefficient matrix is symmetric indefinite, and */ /* provides error bounds and backward error estimates for the */ /* solution. In addition to normwise error bound, the code provides */ /* maximum componentwise error bound if possible. See comments for */ /* ERR_BNDS_NORM and ERR_BNDS_COMP for details of the error bounds. */ /* The original system of linear equations may have been equilibrated */ /* before calling this routine, as described by arguments EQUED and S */ /* below. In this case, the solution and error bounds returned are */ /* for the original unequilibrated system. */ /* Arguments */ /* ========= */ /* Some optional parameters are bundled in the PARAMS array. These */ /* settings determine how refinement is performed, but often the */ /* defaults are acceptable. If the defaults are acceptable, users */ /* can pass NPARAMS = 0 which prevents the source code from accessing */ /* the PARAMS argument. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* EQUED (input) CHARACTER*1 */ /* Specifies the form of equilibration that was done to A */ /* before calling this routine. This is needed to compute */ /* the solution and error bounds correctly. */ /* = 'N': No equilibration */ /* = 'Y': Both row and column equilibration, i.e., A has been */ /* replaced by diag(S) * A * diag(S). */ /* The right hand side B has been changed accordingly. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of A contains the upper triangular */ /* part of the matrix A, and the strictly lower triangular */ /* part of A is not referenced. If UPLO = 'L', the leading */ /* N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The factored form of the matrix A. AF contains the block */ /* diagonal matrix D and the multipliers used to obtain the */ /* factor U or L from the factorization A = U*D*U**T or A = */ /* L*D*L**T as computed by DSYTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSYTRF. */ /* S (input or output) DOUBLE PRECISION array, dimension (N) */ /* The scale factors for A. If EQUED = 'Y', A is multiplied on */ /* the left and right by diag(S). S is an input argument if FACT = */ /* 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ /* = 'Y', each element of S must be positive. If S is output, each */ /* element of S is a power of the radix. If S is input, each element */ /* of S should be a power of the radix to ensure a reliable solution */ /* and error estimates. Scaling by powers of the radix does not cause */ /* rounding errors unless the result underflows or overflows. */ /* Rounding errors during scaling lead to refining with a matrix that */ /* is not equivalent to the input matrix, producing error estimates */ /* that may not be reliable. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* On entry, the solution matrix X, as computed by DGETRS. */ /* On exit, the improved solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* Componentwise relative backward error. This is the */ /* componentwise relative backward error of each solution vector X(j) */ /* (i.e., the smallest relative change in any element of A or B that */ /* makes X(j) an exact solution). */ /* N_ERR_BNDS (input) INTEGER */ /* Number of error bounds to return for each right hand side */ /* and each type (normwise or componentwise). See ERR_BNDS_NORM and */ /* ERR_BNDS_COMP below. */ /* ERR_BNDS_NORM (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* normwise relative error, which is defined as follows: */ /* Normwise relative error in the ith solution vector: */ /* max_j (abs(XTRUE(j,i) - X(j,i))) */ /* ------------------------------ */ /* max_j abs(X(j,i)) */ /* The array is indexed by the type of error information as described */ /* below. There currently are up to three pieces of information */ /* returned. */ /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_NORM(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * dlamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * dlamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated normwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * dlamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*A, where S scales each row by a power of the */ /* radix so all absolute row sums of Z are approximately 1. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* ERR_BNDS_COMP (output) DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* componentwise relative error, which is defined as follows: */ /* Componentwise relative error in the ith solution vector: */ /* abs(XTRUE(j,i) - X(j,i)) */ /* max_j ---------------------- */ /* abs(X(j,i)) */ /* The array is indexed by the right-hand side i (on which the */ /* componentwise relative error depends), and the type of error */ /* information as described below. There currently are up to three */ /* pieces of information returned for each right-hand side. If */ /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ /* the first (:,N_ERR_BNDS) entries are returned. */ /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_COMP(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * dlamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * dlamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated componentwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * dlamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*(A*diag(x)), where x is the solution for the */ /* current right-hand side and S scales each row of */ /* A*diag(x) by a power of the radix so all absolute row */ /* sums of Z are approximately 1. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* NPARAMS (input) INTEGER */ /* Specifies the number of parameters set in PARAMS. If .LE. 0, the */ /* PARAMS array is never referenced and default values are used. */ /* PARAMS (input / output) DOUBLE PRECISION array, dimension NPARAMS */ /* Specifies algorithm parameters. If an entry is .LT. 0.0, then */ /* that entry will be filled with default value used for that */ /* parameter. Only positions up to NPARAMS are accessed; defaults */ /* are used for higher-numbered parameters. */ /* PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ /* refinement or not. */ /* Default: 1.0D+0 */ /* = 0.0 : No refinement is performed, and no error bounds are */ /* computed. */ /* = 1.0 : Use the double-precision refinement algorithm, */ /* possibly with doubled-single computations if the */ /* compilation environment does not support DOUBLE */ /* PRECISION. */ /* (other values are reserved for future use) */ /* PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ /* computations allowed for refinement. */ /* Default: 10 */ /* Aggressive: Set to 100 to permit convergence using approximate */ /* factorizations or factorizations other than LU. If */ /* the factorization uses a technique other than */ /* Gaussian elimination, the guarantees in */ /* err_bnds_norm and err_bnds_comp may no longer be */ /* trustworthy. */ /* PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ /* will attempt to find a solution with small componentwise */ /* relative error in the double-precision algorithm. Positive */ /* is true, 0.0 is false. */ /* Default: 1.0 (attempt componentwise convergence) */ /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: Successful exit. The solution to every right-hand side is */ /* guaranteed. */ /* < 0: If INFO = -i, the i-th argument had an illegal value */ /* > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly singular, so */ /* the solution and error bounds could not be computed. RCOND = 0 */ /* is returned. */ /* = N+J: The solution corresponding to the Jth right-hand side is */ /* not guaranteed. The solutions corresponding to other right- */ /* hand sides K with K > J may not be guaranteed as well, but */ /* only the first such right-hand side is reported. If a small */ /* componentwise error is not requested (PARAMS(3) = 0.0) then */ /* the Jth right-hand side is the first with a normwise error */ /* bound that is not guaranteed (the smallest J such */ /* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ /* the Jth right-hand side is the first with either a normwise or */ /* componentwise error bound that is not guaranteed (the smallest */ /* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ /* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ /* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ /* about all of the right-hand sides check ERR_BNDS_NORM or */ /* ERR_BNDS_COMP. */ /* ================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Check the input parameters. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --s; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --berr; --params; --work; --iwork; /* Function Body */ *info = 0; ref_type__ = 1; if (*nparams >= 1) { if (params[1] < 0.) { params[1] = 1.; } else { ref_type__ = (integer) params[1]; } } /* Set default parameters. */ illrcond_thresh__ = (doublereal) (*n) * _starpu_dlamch_("Epsilon"); ithresh = 10; rthresh = .5; unstable_thresh__ = .25; ignore_cwise__ = FALSE_; if (*nparams >= 2) { if (params[2] < 0.) { params[2] = (doublereal) ithresh; } else { ithresh = (integer) params[2]; } } if (*nparams >= 3) { if (params[3] < 0.) { if (ignore_cwise__) { params[3] = 0.; } else { params[3] = 1.; } } else { ignore_cwise__ = params[3] == 0.; } } if (ref_type__ == 0 || *n_err_bnds__ == 0) { n_norms__ = 0; } else if (ignore_cwise__) { n_norms__ = 1; } else { n_norms__ = 2; } rcequ = _starpu_lsame_(equed, "Y"); /* Test input parameters. */ if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! rcequ && ! _starpu_lsame_(equed, "N")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -11; } else if (*ldx < max(1,*n)) { *info = -13; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYRFSX", &i__1); return 0; } /* Quick return if possible. */ if (*n == 0 || *nrhs == 0) { *rcond = 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { berr[j] = 0.; if (*n_err_bnds__ >= 1) { err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } else if (*n_err_bnds__ >= 2) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; } else if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; } } return 0; } /* Default to failure. */ *rcond = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { berr[j] = 1.; if (*n_err_bnds__ >= 1) { err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } else if (*n_err_bnds__ >= 2) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; } else if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; } } /* Compute the norm of A and the reciprocal of the condition */ /* number of A. */ *(unsigned char *)norm = 'I'; anorm = _starpu_dlansy_(norm, uplo, n, &a[a_offset], lda, &work[1]); _starpu_dsycon_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &anorm, rcond, &work[1], &iwork[1], info); /* Perform refinement on each right-hand side */ if (ref_type__ != 0) { prec_type__ = _starpu_ilaprec_("E"); _starpu_dla_syrfsx_extended__(&prec_type__, uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &rcequ, &s[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], &n_norms__, & err_bnds_norm__[err_bnds_norm_offset], &err_bnds_comp__[ err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, &rthresh, & unstable_thresh__, &ignore_cwise__, info, (ftnlen)1); } /* Computing MAX */ d__1 = 10., d__2 = sqrt((doublereal) (*n)); err_lbnd__ = max(d__1,d__2) * _starpu_dlamch_("Epsilon"); if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { /* Compute scaled normwise condition number cond(A*C). */ if (rcequ) { rcond_tmp__ = _starpu_dla_syrcond__(uplo, n, &a[a_offset], lda, &af[ af_offset], ldaf, &ipiv[1], &c_n1, &s[1], info, &work[1], &iwork[1], (ftnlen)1); } else { rcond_tmp__ = _starpu_dla_syrcond__(uplo, n, &a[a_offset], lda, &af[ af_offset], ldaf, &ipiv[1], &c__0, &s[1], info, &work[1], &iwork[1], (ftnlen)1); } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Cap the error at 1.0. */ if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] > 1.) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; } /* Threshold the error (see LAWN). */ if (rcond_tmp__ < illrcond_thresh__) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; if (*info <= *n) { *info = *n + j; } } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < err_lbnd__) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; } /* Save the condition number. */ if (*n_err_bnds__ >= 3) { err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; } } } if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { /* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ /* each right-hand side using the current solution as an estimate of */ /* the true solution. If the componentwise error estimate is too */ /* large, then the solution is a lousy estimate of truth and the */ /* estimated RCOND may be too optimistic. To avoid misleading users, */ /* the inverse condition number is set to 0.0 when the estimated */ /* cwise error is at least CWISE_WRONG. */ cwise_wrong__ = sqrt(_starpu_dlamch_("Epsilon")); i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < cwise_wrong__) { rcond_tmp__ = _starpu_dla_syrcond__(uplo, n, &a[a_offset], lda, &af[ af_offset], ldaf, &ipiv[1], &c__1, &x[j * x_dim1 + 1], info, &work[1], &iwork[1], (ftnlen)1); } else { rcond_tmp__ = 0.; } /* Cap the error at 1.0. */ if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] > 1.) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; } /* Threshold the error (see LAWN). */ if (rcond_tmp__ < illrcond_thresh__) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; if (params[3] == 1. && *info < *n + j) { *info = *n + j; } } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < err_lbnd__) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; } /* Save the condition number. */ if (*n_err_bnds__ >= 3) { err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; } } } return 0; /* End of DSYRFSX */ } /* _starpu_dsyrfsx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsysv.c000066400000000000000000000164251507764646700205540ustar00rootroot00000000000000/* dsysv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dsysv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ integer nb; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dsytrf_(char *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYSV computes the solution to a real system of linear equations */ /* A * X = B, */ /* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS */ /* matrices. */ /* The diagonal pivoting method is used to factor A as */ /* A = U * D * U**T, if UPLO = 'U', or */ /* A = L * D * L**T, if UPLO = 'L', */ /* where U (or L) is a product of permutation and unit upper (lower) */ /* triangular matrices, and D is symmetric and block diagonal with */ /* 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then */ /* used to solve the system of equations A * X = B. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the block diagonal matrix D and the */ /* multipliers used to obtain the factor U or L from the */ /* factorization A = U*D*U**T or A = L*D*L**T as computed by */ /* DSYTRF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (output) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D, as */ /* determined by DSYTRF. If IPIV(k) > 0, then rows and columns */ /* k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 */ /* diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, */ /* then rows and columns k-1 and -IPIV(k) were interchanged and */ /* D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and */ /* IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and */ /* -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 */ /* diagonal block. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of WORK. LWORK >= 1, and for best performance */ /* LWORK >= max(1,N*NB), where NB is the optimal blocksize for */ /* DSYTRF. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ /* has been completed, but the block diagonal matrix D is */ /* exactly singular, so the solution could not be computed. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*lwork < 1 && ! lquery) { *info = -10; } if (*info == 0) { if (*n == 0) { lwkopt = 1; } else { nb = _starpu_ilaenv_(&c__1, "DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1); lwkopt = *n * nb; } work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYSV ", &i__1); return 0; } else if (lquery) { return 0; } /* Compute the factorization A = U*D*U' or A = L*D*L'. */ _starpu_dsytrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ _starpu_dsytrs_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, info); } work[1] = (doublereal) lwkopt; return 0; /* End of DSYSV */ } /* _starpu_dsysv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsysvx.c000066400000000000000000000334311507764646700207400ustar00rootroot00000000000000/* dsysvx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* Subroutine */ int _starpu_dsysvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer * ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *lwork, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; /* Local variables */ integer nb; extern logical _starpu_lsame_(char *, char *); doublereal anorm; extern doublereal _starpu_dlamch_(char *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern doublereal _starpu_dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dsycon_(char *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dsyrfs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), _starpu_dsytrf_(char *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; extern /* Subroutine */ int _starpu_dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYSVX uses the diagonal pivoting factorization to compute the */ /* solution to a real system of linear equations A * X = B, */ /* where A is an N-by-N symmetric matrix and X and B are N-by-NRHS */ /* matrices. */ /* Error bounds on the solution and a condition estimate are also */ /* provided. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'N', the diagonal pivoting method is used to factor A. */ /* The form of the factorization is */ /* A = U * D * U**T, if UPLO = 'U', or */ /* A = L * D * L**T, if UPLO = 'L', */ /* where U (or L) is a product of permutation and unit upper (lower) */ /* triangular matrices, and D is symmetric and block diagonal with */ /* 1-by-1 and 2-by-2 diagonal blocks. */ /* 2. If some D(i,i)=0, so that D is exactly singular, then the routine */ /* returns with INFO = i. Otherwise, the factored form of A is used */ /* to estimate the condition number of the matrix A. If the */ /* reciprocal of the condition number is less than machine precision, */ /* INFO = N+1 is returned as a warning, but the routine still goes on */ /* to solve for X and compute error bounds as described below. */ /* 3. The system of equations is solved for X using the factored form */ /* of A. */ /* 4. Iterative refinement is applied to improve the computed solution */ /* matrix and calculate error bounds and backward error estimates */ /* for it. */ /* Arguments */ /* ========= */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of A has been */ /* supplied on entry. */ /* = 'F': On entry, AF and IPIV contain the factored form of */ /* A. AF and IPIV will not be modified. */ /* = 'N': The matrix A will be copied to AF and factored. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of A contains the upper triangular part */ /* of the matrix A, and the strictly lower triangular part of A */ /* is not referenced. If UPLO = 'L', the leading N-by-N lower */ /* triangular part of A contains the lower triangular part of */ /* the matrix A, and the strictly upper triangular part of A is */ /* not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ /* If FACT = 'F', then AF is an input argument and on entry */ /* contains the block diagonal matrix D and the multipliers used */ /* to obtain the factor U or L from the factorization */ /* A = U*D*U**T or A = L*D*L**T as computed by DSYTRF. */ /* If FACT = 'N', then AF is an output argument and on exit */ /* returns the block diagonal matrix D and the multipliers used */ /* to obtain the factor U or L from the factorization */ /* A = U*D*U**T or A = L*D*L**T. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input or output) INTEGER array, dimension (N) */ /* If FACT = 'F', then IPIV is an input argument and on entry */ /* contains details of the interchanges and the block structure */ /* of D, as determined by DSYTRF. */ /* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ /* interchanged and D(k,k) is a 1-by-1 diagonal block. */ /* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ /* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ /* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ /* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ /* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ /* If FACT = 'N', then IPIV is an output argument and on exit */ /* contains details of the interchanges and the block structure */ /* of D, as determined by DSYTRF. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The N-by-NRHS right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The estimate of the reciprocal condition number of the matrix */ /* A. If RCOND is less than the machine precision (in */ /* particular, if RCOND = 0), the matrix is singular to working */ /* precision. This condition is indicated by a return code of */ /* INFO > 0. */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of WORK. LWORK >= max(1,3*N), and for best */ /* performance, when FACT = 'N', LWORK >= max(1,3*N,N*NB), where */ /* NB is the optimal blocksize for DSYTRF. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, and i is */ /* <= N: D(i,i) is exactly zero. The factorization */ /* has been completed but the factor D is exactly */ /* singular, so the solution and error bounds could */ /* not be computed. RCOND = 0 is returned. */ /* = N+1: D is nonsingular, but RCOND is less than machine */ /* precision, meaning that the matrix is singular */ /* to working precision. Nevertheless, the */ /* solution and error bounds are computed because */ /* there are a number of situations where the */ /* computed solution can be more accurate than the */ /* value of RCOND would suggest. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); lquery = *lwork == -1; if (! nofact && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -11; } else if (*ldx < max(1,*n)) { *info = -13; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *n * 3; if (*lwork < max(i__1,i__2) && ! lquery) { *info = -18; } } if (*info == 0) { /* Computing MAX */ i__1 = 1, i__2 = *n * 3; lwkopt = max(i__1,i__2); if (nofact) { nb = _starpu_ilaenv_(&c__1, "DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1); /* Computing MAX */ i__1 = lwkopt, i__2 = *n * nb; lwkopt = max(i__1,i__2); } work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYSVX", &i__1); return 0; } else if (lquery) { return 0; } if (nofact) { /* Compute the factorization A = U*D*U' or A = L*D*L'. */ _starpu_dlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); _starpu_dsytrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], lwork, info); /* Return if INFO is non-zero. */ if (*info > 0) { *rcond = 0.; return 0; } } /* Compute the norm of the matrix A. */ anorm = _starpu_dlansy_("I", uplo, n, &a[a_offset], lda, &work[1]); /* Compute the reciprocal of the condition number of A. */ _starpu_dsycon_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &anorm, rcond, &work[1], &iwork[1], info); /* Compute the solution vectors X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dsytrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solutions and */ /* compute error bounds and backward error estimates for them. */ _starpu_dsyrfs_(uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1] , &iwork[1], info); /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < _starpu_dlamch_("Epsilon")) { *info = *n + 1; } work[1] = (doublereal) lwkopt; return 0; /* End of DSYSVX */ } /* _starpu_dsysvx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsysvxx.c000066400000000000000000000644531507764646700211400ustar00rootroot00000000000000/* dsysvxx.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dsysvxx_(char *fact, char *uplo, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv, char *equed, doublereal *s, doublereal *b, integer * ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal * rpvgrw, doublereal *berr, integer *n_err_bnds__, doublereal * err_bnds_norm__, doublereal *err_bnds_comp__, integer *nparams, doublereal *params, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1; doublereal d__1, d__2; /* Local variables */ integer j; doublereal amax, smin, smax; extern doublereal _starpu_dla_syrpvgrw__(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, ftnlen); extern logical _starpu_lsame_(char *, char *); doublereal scond; logical equil, rcequ; extern doublereal _starpu_dlamch_(char *); logical nofact; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; integer infequ; extern /* Subroutine */ int _starpu_dlaqsy_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, char *); doublereal smlnum; extern /* Subroutine */ int _starpu_dsytrf_(char *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlascl2_(integer *, integer *, doublereal *, doublereal *, integer *), _starpu_dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dsyequb_(char *, integer *, doublereal *, integer *, doublereal * , doublereal *, doublereal *, doublereal *, integer *), _starpu_dsyrfsx_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYSVXX uses the diagonal pivoting factorization to compute the */ /* solution to a double precision system of linear equations A * X = B, where A */ /* is an N-by-N symmetric matrix and X and B are N-by-NRHS matrices. */ /* If requested, both normwise and maximum componentwise error bounds */ /* are returned. DSYSVXX will return a solution with a tiny */ /* guaranteed error (O(eps) where eps is the working machine */ /* precision) unless the matrix is very ill-conditioned, in which */ /* case a warning is returned. Relevant condition numbers also are */ /* calculated and returned. */ /* DSYSVXX accepts user-provided factorizations and equilibration */ /* factors; see the definitions of the FACT and EQUED options. */ /* Solving with refinement and using a factorization from a previous */ /* DSYSVXX call will also produce a solution with either O(eps) */ /* errors or warnings, but we cannot make that claim for general */ /* user-provided factorizations and equilibration factors if they */ /* differ from what DSYSVXX would itself produce. */ /* Description */ /* =========== */ /* The following steps are performed: */ /* 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ /* the system: */ /* diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B */ /* Whether or not the system will be equilibrated depends on the */ /* scaling of the matrix A, but if equilibration is used, A is */ /* overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ /* 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ /* the matrix A (after equilibration if FACT = 'E') as */ /* A = U * D * U**T, if UPLO = 'U', or */ /* A = L * D * L**T, if UPLO = 'L', */ /* where U (or L) is a product of permutation and unit upper (lower) */ /* triangular matrices, and D is symmetric and block diagonal with */ /* 1-by-1 and 2-by-2 diagonal blocks. */ /* 3. If some D(i,i)=0, so that D is exactly singular, then the */ /* routine returns with INFO = i. Otherwise, the factored form of A */ /* is used to estimate the condition number of the matrix A (see */ /* argument RCOND). If the reciprocal of the condition number is */ /* less than machine precision, the routine still goes on to solve */ /* for X and compute error bounds as described below. */ /* 4. The system of equations is solved for X using the factored form */ /* of A. */ /* 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ /* the routine will use iterative refinement to try to get a small */ /* error and error bounds. Refinement calculates the residual to at */ /* least twice the working precision. */ /* 6. If equilibration was used, the matrix X is premultiplied by */ /* diag(R) so that it solves the original system before */ /* equilibration. */ /* Arguments */ /* ========= */ /* Some optional parameters are bundled in the PARAMS array. These */ /* settings determine how refinement is performed, but often the */ /* defaults are acceptable. If the defaults are acceptable, users */ /* can pass NPARAMS = 0 which prevents the source code from accessing */ /* the PARAMS argument. */ /* FACT (input) CHARACTER*1 */ /* Specifies whether or not the factored form of the matrix A is */ /* supplied on entry, and if not, whether the matrix A should be */ /* equilibrated before it is factored. */ /* = 'F': On entry, AF and IPIV contain the factored form of A. */ /* If EQUED is not 'N', the matrix A has been */ /* equilibrated with scaling factors given by S. */ /* A, AF, and IPIV are not modified. */ /* = 'N': The matrix A will be copied to AF and factored. */ /* = 'E': The matrix A will be equilibrated if necessary, then */ /* copied to AF and factored. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* The symmetric matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of A contains the upper triangular */ /* part of the matrix A, and the strictly lower triangular */ /* part of A is not referenced. If UPLO = 'L', the leading */ /* N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ /* diag(S)*A*diag(S). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) */ /* If FACT = 'F', then AF is an input argument and on entry */ /* contains the block diagonal matrix D and the multipliers */ /* used to obtain the factor U or L from the factorization A = */ /* U*D*U**T or A = L*D*L**T as computed by DSYTRF. */ /* If FACT = 'N', then AF is an output argument and on exit */ /* returns the block diagonal matrix D and the multipliers */ /* used to obtain the factor U or L from the factorization A = */ /* U*D*U**T or A = L*D*L**T. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input or output) INTEGER array, dimension (N) */ /* If FACT = 'F', then IPIV is an input argument and on entry */ /* contains details of the interchanges and the block */ /* structure of D, as determined by DSYTRF. If IPIV(k) > 0, */ /* then rows and columns k and IPIV(k) were interchanged and */ /* D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and */ /* IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and */ /* -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 */ /* diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, */ /* then rows and columns k+1 and -IPIV(k) were interchanged */ /* and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ /* If FACT = 'N', then IPIV is an output argument and on exit */ /* contains details of the interchanges and the block */ /* structure of D, as determined by DSYTRF. */ /* EQUED (input or output) CHARACTER*1 */ /* Specifies the form of equilibration that was done. */ /* = 'N': No equilibration (always true if FACT = 'N'). */ /* = 'Y': Both row and column equilibration, i.e., A has been */ /* replaced by diag(S) * A * diag(S). */ /* EQUED is an input argument if FACT = 'F'; otherwise, it is an */ /* output argument. */ /* S (input or output) DOUBLE PRECISION array, dimension (N) */ /* The scale factors for A. If EQUED = 'Y', A is multiplied on */ /* the left and right by diag(S). S is an input argument if FACT = */ /* 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ /* = 'Y', each element of S must be positive. If S is output, each */ /* element of S is a power of the radix. If S is input, each element */ /* of S should be a power of the radix to ensure a reliable solution */ /* and error estimates. Scaling by powers of the radix does not cause */ /* rounding errors unless the result underflows or overflows. */ /* Rounding errors during scaling lead to refining with a matrix that */ /* is not equivalent to the input matrix, producing error estimates */ /* that may not be reliable. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the N-by-NRHS right hand side matrix B. */ /* On exit, */ /* if EQUED = 'N', B is not modified; */ /* if EQUED = 'Y', B is overwritten by diag(S)*B; */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* If INFO = 0, the N-by-NRHS solution matrix X to the original */ /* system of equations. Note that A and B are modified on exit if */ /* EQUED .ne. 'N', and the solution to the equilibrated system is */ /* inv(diag(S))*X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* RPVGRW (output) DOUBLE PRECISION */ /* Reciprocal pivot growth. On exit, this contains the reciprocal */ /* pivot growth factor norm(A)/norm(U). The "max absolute element" */ /* norm is used. If this is much less than 1, then the stability of */ /* the LU factorization of the (equilibrated) matrix A could be poor. */ /* This also means that the solution X, estimated condition numbers, */ /* and error bounds could be unreliable. If factorization fails with */ /* 0 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly singular, so */ /* the solution and error bounds could not be computed. RCOND = 0 */ /* is returned. */ /* = N+J: The solution corresponding to the Jth right-hand side is */ /* not guaranteed. The solutions corresponding to other right- */ /* hand sides K with K > J may not be guaranteed as well, but */ /* only the first such right-hand side is reported. If a small */ /* componentwise error is not requested (PARAMS(3) = 0.0) then */ /* the Jth right-hand side is the first with a normwise error */ /* bound that is not guaranteed (the smallest J such */ /* that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ /* the Jth right-hand side is the first with either a normwise or */ /* componentwise error bound that is not guaranteed (the smallest */ /* J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ /* ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ /* ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ /* about all of the right-hand sides check ERR_BNDS_NORM or */ /* ERR_BNDS_COMP. */ /* ================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1; af -= af_offset; --ipiv; --s; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --berr; --params; --work; --iwork; /* Function Body */ *info = 0; nofact = _starpu_lsame_(fact, "N"); equil = _starpu_lsame_(fact, "E"); smlnum = _starpu_dlamch_("Safe minimum"); bignum = 1. / smlnum; if (nofact || equil) { *(unsigned char *)equed = 'N'; rcequ = FALSE_; } else { rcequ = _starpu_lsame_(equed, "Y"); } /* Default is failure. If an input parameter is wrong or */ /* factorization fails, make everything look horrible. Only the */ /* pivot growth is set here, the rest is initialized in DSYRFSX. */ *rpvgrw = 0.; /* Test the input parameters. PARAMS is not tested until DSYRFSX. */ if (! nofact && ! equil && ! _starpu_lsame_(fact, "F")) { *info = -1; } else if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (_starpu_lsame_(fact, "F") && ! (rcequ || _starpu_lsame_( equed, "N"))) { *info = -9; } else { if (rcequ) { smin = bignum; smax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = smin, d__2 = s[j]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = smax, d__2 = s[j]; smax = max(d__1,d__2); /* L10: */ } if (smin <= 0.) { *info = -10; } else if (*n > 0) { scond = max(smin,smlnum) / min(smax,bignum); } else { scond = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -12; } else if (*ldx < max(1,*n)) { *info = -14; } } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYSVXX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ _starpu_dsyequb_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, &work[1], & infequ); if (infequ == 0) { /* Equilibrate the matrix. */ _starpu_dlaqsy_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, equed); rcequ = _starpu_lsame_(equed, "Y"); } } /* Scale the right-hand side. */ if (rcequ) { _starpu_dlascl2_(n, nrhs, &s[1], &b[b_offset], ldb); } if (nofact || equil) { /* Compute the LU factorization of A. */ _starpu_dlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); i__1 = max(1,*n) * 5; _starpu_dsytrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], &i__1, info); /* Return if INFO is non-zero. */ if (*info > 0) { /* Pivot in column INFO is exactly 0 */ /* Compute the reciprocal pivot growth factor of the */ /* leading rank-deficient INFO columns of A. */ if (*n > 0) { *rpvgrw = _starpu_dla_syrpvgrw__(uplo, n, info, &a[a_offset], lda, & af[af_offset], ldaf, &ipiv[1], &work[1], (ftnlen)1); } return 0; } } /* Compute the reciprocal pivot growth factor RPVGRW. */ if (*n > 0) { *rpvgrw = _starpu_dla_syrpvgrw__(uplo, n, info, &a[a_offset], lda, &af[ af_offset], ldaf, &ipiv[1], &work[1], (ftnlen)1); } /* Compute the solution matrix X. */ _starpu_dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); _starpu_dsytrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and */ /* compute error bounds and backward error estimates for it. */ _starpu_dsyrfsx_(uplo, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & ipiv[1], &s[1], &b[b_offset], ldb, &x[x_offset], ldx, rcond, & berr[1], n_err_bnds__, &err_bnds_norm__[err_bnds_norm_offset], & err_bnds_comp__[err_bnds_comp_offset], nparams, ¶ms[1], &work[ 1], &iwork[1], info); /* Scale solutions. */ if (rcequ) { _starpu_dlascl2_(n, nrhs, &s[1], &x[x_offset], ldx); } return 0; /* End of DSYSVXX */ } /* _starpu_dsysvxx_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsytd2.c000066400000000000000000000231111507764646700206030ustar00rootroot00000000000000/* dsytd2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = 0.; static doublereal c_b14 = -1.; /* Subroutine */ int _starpu_dsytd2_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal taui; extern /* Subroutine */ int _starpu_dsyr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal alpha; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer * ); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal */ /* form T by an orthogonal similarity transformation: Q' * A * Q = T. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n-by-n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n-by-n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if UPLO = 'U', the diagonal and first superdiagonal */ /* of A are overwritten by the corresponding elements of the */ /* tridiagonal matrix T, and the elements above the first */ /* superdiagonal, with the array TAU, represent the orthogonal */ /* matrix Q as a product of elementary reflectors; if UPLO */ /* = 'L', the diagonal and first subdiagonal of A are over- */ /* written by the corresponding elements of the tridiagonal */ /* matrix T, and the elements below the first subdiagonal, with */ /* the array TAU, represent the orthogonal matrix Q as a product */ /* of elementary reflectors. See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* D (output) DOUBLE PRECISION array, dimension (N) */ /* The diagonal elements of the tridiagonal matrix T: */ /* D(i) = A(i,i). */ /* E (output) DOUBLE PRECISION array, dimension (N-1) */ /* The off-diagonal elements of the tridiagonal matrix T: */ /* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ /* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* If UPLO = 'U', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(n-1) . . . H(2) H(1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ /* A(1:i-1,i+1), and tau in TAU(i). */ /* If UPLO = 'L', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(1) H(2) . . . H(n-1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ /* and tau in TAU(i). */ /* The contents of A on exit are illustrated by the following examples */ /* with n = 5: */ /* if UPLO = 'U': if UPLO = 'L': */ /* ( d e v2 v3 v4 ) ( d ) */ /* ( d e v3 v4 ) ( e d ) */ /* ( d e v4 ) ( v1 e d ) */ /* ( d e ) ( v1 v2 e d ) */ /* ( d ) ( v1 v2 v3 e d ) */ /* where d and e denote diagonal and off-diagonal elements of T, and vi */ /* denotes an element of the vector defining H(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --d__; --e; --tau; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYTD2", &i__1); return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } if (upper) { /* Reduce the upper triangle of A */ for (i__ = *n - 1; i__ >= 1; --i__) { /* Generate elementary reflector H(i) = I - tau * v * v' */ /* to annihilate A(1:i-1,i+1) */ _starpu_dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); e[i__] = a[i__ + (i__ + 1) * a_dim1]; if (taui != 0.) { /* Apply H(i) from both sides to A(1:i,1:i) */ a[i__ + (i__ + 1) * a_dim1] = 1.; /* Compute x := tau * A * v storing x in TAU(1:i) */ _starpu_dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * a_dim1 + 1], &c__1, &c_b8, &tau[1], &c__1); /* Compute w := x - 1/2 * tau * (x'*v) * v */ alpha = taui * -.5 * _starpu_ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1], &c__1); _starpu_daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ 1], &c__1); /* Apply the transformation as a rank-2 update: */ /* A := A - v * w' - w * v' */ _starpu_dsyr2_(uplo, &i__, &c_b14, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[1], &c__1, &a[a_offset], lda); a[i__ + (i__ + 1) * a_dim1] = e[i__]; } d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; tau[i__] = taui; /* L10: */ } d__[1] = a[a_dim1 + 1]; } else { /* Reduce the lower triangle of A */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Generate elementary reflector H(i) = I - tau * v * v' */ /* to annihilate A(i+2:n,i) */ i__2 = *n - i__; /* Computing MIN */ i__3 = i__ + 2; _starpu_dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n)+ i__ * a_dim1], &c__1, &taui); e[i__] = a[i__ + 1 + i__ * a_dim1]; if (taui != 0.) { /* Apply H(i) from both sides to A(i+1:n,i+1:n) */ a[i__ + 1 + i__ * a_dim1] = 1.; /* Compute x := tau * A * v storing y in TAU(i:n-1) */ i__2 = *n - i__; _starpu_dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b8, &tau[ i__], &c__1); /* Compute w := x - 1/2 * tau * (x'*v) * v */ i__2 = *n - i__; alpha = taui * -.5 * _starpu_ddot_(&i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); i__2 = *n - i__; _starpu_daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ i__], &c__1); /* Apply the transformation as a rank-2 update: */ /* A := A - v * w' - w * v' */ i__2 = *n - i__; _starpu_dsyr2_(uplo, &i__2, &c_b14, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda); a[i__ + 1 + i__ * a_dim1] = e[i__]; } d__[i__] = a[i__ + i__ * a_dim1]; tau[i__] = taui; /* L20: */ } d__[*n] = a[*n + *n * a_dim1]; } return 0; /* End of DSYTD2 */ } /* _starpu_dsytd2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsytf2.c000066400000000000000000000420751507764646700206170ustar00rootroot00000000000000/* dsytf2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dsytf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k; doublereal t, r1, d11, d12, d21, d22; integer kk, kp; doublereal wk, wkm1, wkp1; integer imax, jmax; extern /* Subroutine */ int _starpu_dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); doublereal alpha; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer kstep; logical upper; doublereal absakk; extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern logical _starpu_disnan_(doublereal *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal colmax, rowmax; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYTF2 computes the factorization of a real symmetric matrix A using */ /* the Bunch-Kaufman diagonal pivoting method: */ /* A = U*D*U' or A = L*D*L' */ /* where U (or L) is a product of permutation and unit upper (lower) */ /* triangular matrices, U' is the transpose of U, and D is symmetric and */ /* block diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ /* This is the unblocked version of the algorithm, calling Level 2 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n-by-n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n-by-n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, the block diagonal matrix D and the multipliers used */ /* to obtain the factor U or L (see below for further details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (output) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D. */ /* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ /* interchanged and D(k,k) is a 1-by-1 diagonal block. */ /* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ /* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ /* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ /* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ /* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* > 0: if INFO = k, D(k,k) is exactly zero. The factorization */ /* has been completed, but the block diagonal matrix D is */ /* exactly singular, and division by zero will occur if it */ /* is used to solve a system of equations. */ /* Further Details */ /* =============== */ /* 09-29-06 - patch from */ /* Bobby Cheng, MathWorks */ /* Replace l.204 and l.372 */ /* IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN */ /* by */ /* IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN */ /* 01-01-96 - Based on modifications by */ /* J. Lewis, Boeing Computer Services Company */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* 1-96 - Based on modifications by J. Lewis, Boeing Computer Services */ /* Company */ /* If UPLO = 'U', then A = U*D*U', where */ /* U = P(n)*U(n)* ... *P(k)U(k)* ..., */ /* i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ /* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ /* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ /* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ /* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ /* ( I v 0 ) k-s */ /* U(k) = ( 0 I 0 ) s */ /* ( 0 0 I ) n-k */ /* k-s s n-k */ /* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ /* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ /* and A(k,k), and v overwrites A(1:k-2,k-1:k). */ /* If UPLO = 'L', then A = L*D*L', where */ /* L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ /* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ /* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ /* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ /* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ /* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ /* ( I 0 0 ) k-1 */ /* L(k) = ( 0 I 0 ) s */ /* ( 0 v I ) n-k-s+1 */ /* k-1 s n-k-s+1 */ /* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ /* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ /* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYTF2", &i__1); return 0; } /* Initialize ALPHA for use in choosing pivot block size. */ alpha = (sqrt(17.) + 1.) / 8.; if (upper) { /* Factorize A as U*D*U' using the upper triangle of A */ /* K is the main loop index, decreasing from N to 1 in steps of */ /* 1 or 2 */ k = *n; L10: /* If K < 1, exit from loop */ if (k < 1) { goto L70; } kstep = 1; /* Determine rows and columns to be interchanged and whether */ /* a 1-by-1 or 2-by-2 pivot block will be used */ absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in */ /* column K, and COLMAX is its absolute value */ if (k > 1) { i__1 = k - 1; imax = _starpu_idamax_(&i__1, &a[k * a_dim1 + 1], &c__1); colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); } else { colmax = 0.; } if (max(absakk,colmax) == 0. || _starpu_disnan_(&absakk)) { /* Column K is zero or contains a NaN: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* JMAX is the column-index of the largest off-diagonal */ /* element in row IMAX, and ROWMAX is its absolute value */ i__1 = k - imax; jmax = imax + _starpu_idamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); if (imax > 1) { i__1 = imax - 1; jmax = _starpu_idamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); /* Computing MAX */ d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], abs(d__1)); rowmax = max(d__2,d__3); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 */ /* pivot block */ kp = imax; } else { /* interchange rows and columns K-1 and IMAX, use 2-by-2 */ /* pivot block */ kp = imax; kstep = 2; } } kk = k - kstep + 1; if (kp != kk) { /* Interchange rows and columns KK and KP in the leading */ /* submatrix A(1:k,1:k) */ i__1 = kp - 1; _starpu_dswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); i__1 = kk - kp - 1; _starpu_dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); t = a[kk + kk * a_dim1]; a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = t; if (kstep == 2) { t = a[k - 1 + k * a_dim1]; a[k - 1 + k * a_dim1] = a[kp + k * a_dim1]; a[kp + k * a_dim1] = t; } } /* Update the leading submatrix */ if (kstep == 1) { /* 1-by-1 pivot block D(k): column k now holds */ /* W(k) = U(k)*D(k) */ /* where U(k) is the k-th column of U */ /* Perform a rank-1 update of A(1:k-1,1:k-1) as */ /* A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)' */ r1 = 1. / a[k + k * a_dim1]; i__1 = k - 1; d__1 = -r1; _starpu_dsyr_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, &a[ a_offset], lda); /* Store U(k) in column k */ i__1 = k - 1; _starpu_dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); } else { /* 2-by-2 pivot block D(k): columns k and k-1 now hold */ /* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ /* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ /* of U */ /* Perform a rank-2 update of A(1:k-2,1:k-2) as */ /* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )' */ /* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )' */ if (k > 2) { d12 = a[k - 1 + k * a_dim1]; d22 = a[k - 1 + (k - 1) * a_dim1] / d12; d11 = a[k + k * a_dim1] / d12; t = 1. / (d11 * d22 - 1.); d12 = t / d12; for (j = k - 2; j >= 1; --j) { wkm1 = d12 * (d11 * a[j + (k - 1) * a_dim1] - a[j + k * a_dim1]); wk = d12 * (d22 * a[j + k * a_dim1] - a[j + (k - 1) * a_dim1]); for (i__ = j; i__ >= 1; --i__) { a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + k * a_dim1] * wk - a[i__ + (k - 1) * a_dim1] * wkm1; /* L20: */ } a[j + k * a_dim1] = wk; a[j + (k - 1) * a_dim1] = wkm1; /* L30: */ } } } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k - 1] = -kp; } /* Decrease K and return to the start of the main loop */ k -= kstep; goto L10; } else { /* Factorize A as L*D*L' using the lower triangle of A */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2 */ k = 1; L40: /* If K > N, exit from loop */ if (k > *n) { goto L70; } kstep = 1; /* Determine rows and columns to be interchanged and whether */ /* a 1-by-1 or 2-by-2 pivot block will be used */ absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in */ /* column K, and COLMAX is its absolute value */ if (k < *n) { i__1 = *n - k; imax = k + _starpu_idamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); } else { colmax = 0.; } if (max(absakk,colmax) == 0. || _starpu_disnan_(&absakk)) { /* Column K is zero or contains a NaN: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* JMAX is the column-index of the largest off-diagonal */ /* element in row IMAX, and ROWMAX is its absolute value */ i__1 = imax - k; jmax = k - 1 + _starpu_idamax_(&i__1, &a[imax + k * a_dim1], lda); rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); if (imax < *n) { i__1 = *n - imax; jmax = imax + _starpu_idamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); /* Computing MAX */ d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], abs(d__1)); rowmax = max(d__2,d__3); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 */ /* pivot block */ kp = imax; } else { /* interchange rows and columns K+1 and IMAX, use 2-by-2 */ /* pivot block */ kp = imax; kstep = 2; } } kk = k + kstep - 1; if (kp != kk) { /* Interchange rows and columns KK and KP in the trailing */ /* submatrix A(k:n,k:n) */ if (kp < *n) { i__1 = *n - kp; _starpu_dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); } i__1 = kp - kk - 1; _starpu_dswap_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); t = a[kk + kk * a_dim1]; a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = t; if (kstep == 2) { t = a[k + 1 + k * a_dim1]; a[k + 1 + k * a_dim1] = a[kp + k * a_dim1]; a[kp + k * a_dim1] = t; } } /* Update the trailing submatrix */ if (kstep == 1) { /* 1-by-1 pivot block D(k): column k now holds */ /* W(k) = L(k)*D(k) */ /* where L(k) is the k-th column of L */ if (k < *n) { /* Perform a rank-1 update of A(k+1:n,k+1:n) as */ /* A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)' */ d11 = 1. / a[k + k * a_dim1]; i__1 = *n - k; d__1 = -d11; _starpu_dsyr_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], &c__1, & a[k + 1 + (k + 1) * a_dim1], lda); /* Store L(k) in column K */ i__1 = *n - k; _starpu_dscal_(&i__1, &d11, &a[k + 1 + k * a_dim1], &c__1); } } else { /* 2-by-2 pivot block D(k) */ if (k < *n - 1) { /* Perform a rank-2 update of A(k+2:n,k+2:n) as */ /* A := A - ( (A(k) A(k+1))*D(k)**(-1) ) * (A(k) A(k+1))' */ /* where L(k) and L(k+1) are the k-th and (k+1)-th */ /* columns of L */ d21 = a[k + 1 + k * a_dim1]; d11 = a[k + 1 + (k + 1) * a_dim1] / d21; d22 = a[k + k * a_dim1] / d21; t = 1. / (d11 * d22 - 1.); d21 = t / d21; i__1 = *n; for (j = k + 2; j <= i__1; ++j) { wk = d21 * (d11 * a[j + k * a_dim1] - a[j + (k + 1) * a_dim1]); wkp1 = d21 * (d22 * a[j + (k + 1) * a_dim1] - a[j + k * a_dim1]); i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + k * a_dim1] * wk - a[i__ + (k + 1) * a_dim1] * wkp1; /* L50: */ } a[j + k * a_dim1] = wk; a[j + (k + 1) * a_dim1] = wkp1; /* L60: */ } } } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k + 1] = -kp; } /* Increase K and return to the start of the main loop */ k += kstep; goto L40; } L70: return 0; /* End of DSYTF2 */ } /* _starpu_dsytf2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsytrd.c000066400000000000000000000263051507764646700207130ustar00rootroot00000000000000/* dsytrd.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; static doublereal c_b22 = -1.; static doublereal c_b23 = 1.; /* Subroutine */ int _starpu_dsytrd_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, nb, kk, nx, iws; extern logical _starpu_lsame_(char *, char *); integer nbmin, iinfo; logical upper; extern /* Subroutine */ int _starpu_dsytd2_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dsyr2k_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlatrd_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYTRD reduces a real symmetric matrix A to real symmetric */ /* tridiagonal form T by an orthogonal similarity transformation: */ /* Q**T * A * Q = T. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if UPLO = 'U', the diagonal and first superdiagonal */ /* of A are overwritten by the corresponding elements of the */ /* tridiagonal matrix T, and the elements above the first */ /* superdiagonal, with the array TAU, represent the orthogonal */ /* matrix Q as a product of elementary reflectors; if UPLO */ /* = 'L', the diagonal and first subdiagonal of A are over- */ /* written by the corresponding elements of the tridiagonal */ /* matrix T, and the elements below the first subdiagonal, with */ /* the array TAU, represent the orthogonal matrix Q as a product */ /* of elementary reflectors. See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* D (output) DOUBLE PRECISION array, dimension (N) */ /* The diagonal elements of the tridiagonal matrix T: */ /* D(i) = A(i,i). */ /* E (output) DOUBLE PRECISION array, dimension (N-1) */ /* The off-diagonal elements of the tridiagonal matrix T: */ /* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ /* TAU (output) DOUBLE PRECISION array, dimension (N-1) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= 1. */ /* For optimum performance LWORK >= N*NB, where NB is the */ /* optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* If UPLO = 'U', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(n-1) . . . H(2) H(1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ /* A(1:i-1,i+1), and tau in TAU(i). */ /* If UPLO = 'L', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(1) H(2) . . . H(n-1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ /* and tau in TAU(i). */ /* The contents of A on exit are illustrated by the following examples */ /* with n = 5: */ /* if UPLO = 'U': if UPLO = 'L': */ /* ( d e v2 v3 v4 ) ( d ) */ /* ( d e v3 v4 ) ( e d ) */ /* ( d e v4 ) ( v1 e d ) */ /* ( d e ) ( v1 v2 e d ) */ /* ( d ) ( v1 v2 v3 e d ) */ /* where d and e denote diagonal and off-diagonal elements of T, and vi */ /* denotes an element of the vector defining H(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --d__; --e; --tau; --work; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); lquery = *lwork == -1; if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*lwork < 1 && ! lquery) { *info = -9; } if (*info == 0) { /* Determine the block size. */ nb = _starpu_ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); lwkopt = *n * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYTRD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { work[1] = 1.; return 0; } nx = *n; iws = 1; if (nb > 1 && nb < *n) { /* Determine when to cross over from blocked to unblocked code */ /* (last block is always handled by unblocked code). */ /* Computing MAX */ i__1 = nb, i__2 = _starpu_ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, & c_n1); nx = max(i__1,i__2); if (nx < *n) { /* Determine if workspace is large enough for blocked code. */ ldwork = *n; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: determine the */ /* minimum value of NB, and reduce NB or force use of */ /* unblocked code by setting NX = N. */ /* Computing MAX */ i__1 = *lwork / ldwork; nb = max(i__1,1); nbmin = _starpu_ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); if (nb < nbmin) { nx = *n; } } } else { nx = *n; } } else { nb = 1; } if (upper) { /* Reduce the upper triangle of A. */ /* Columns 1:kk are handled by the unblocked method. */ kk = *n - (*n - nx + nb - 1) / nb * nb; i__1 = kk + 1; i__2 = -nb; for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the */ /* matrix W which is needed to update the unreduced part of */ /* the matrix */ i__3 = i__ + nb - 1; _starpu_dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & work[1], &ldwork); /* Update the unreduced submatrix A(1:i-1,1:i-1), using an */ /* update of the form: A := A - V*W' - W*V' */ i__3 = i__ - 1; _starpu_dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b22, &a[i__ * a_dim1 + 1], lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda); /* Copy superdiagonal elements back into A, and diagonal */ /* elements into D */ i__3 = i__ + nb - 1; for (j = i__; j <= i__3; ++j) { a[j - 1 + j * a_dim1] = e[j - 1]; d__[j] = a[j + j * a_dim1]; /* L10: */ } /* L20: */ } /* Use unblocked code to reduce the last or only block */ _starpu_dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); } else { /* Reduce the lower triangle of A */ i__2 = *n - nx; i__1 = nb; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the */ /* matrix W which is needed to update the unreduced part of */ /* the matrix */ i__3 = *n - i__ + 1; _starpu_dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & tau[i__], &work[1], &ldwork); /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using */ /* an update of the form: A := A - V*W' - W*V' */ i__3 = *n - i__ - nb + 1; _starpu_dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b22, &a[i__ + nb + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b23, &a[ i__ + nb + (i__ + nb) * a_dim1], lda); /* Copy subdiagonal elements back into A, and diagonal */ /* elements into D */ i__3 = i__ + nb - 1; for (j = i__; j <= i__3; ++j) { a[j + 1 + j * a_dim1] = e[j]; d__[j] = a[j + j * a_dim1]; /* L30: */ } /* L40: */ } /* Use unblocked code to reduce the last or only block */ i__1 = *n - i__ + 1; _starpu_dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &tau[i__], &iinfo); } work[1] = (doublereal) lwkopt; return 0; /* End of DSYTRD */ } /* _starpu_dsytrd_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsytrf.c000066400000000000000000000245231507764646700207150ustar00rootroot00000000000000/* dsytrf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; /* Subroutine */ int _starpu_dsytrf_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer j, k, kb, nb, iws; extern logical _starpu_lsame_(char *, char *); integer nbmin, iinfo; logical upper; extern /* Subroutine */ int _starpu_dsytf2_(char *, integer *, doublereal *, integer *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dlasyf_(char *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYTRF computes the factorization of a real symmetric matrix A using */ /* the Bunch-Kaufman diagonal pivoting method. The form of the */ /* factorization is */ /* A = U*D*U**T or A = L*D*L**T */ /* where U (or L) is a product of permutation and unit upper (lower) */ /* triangular matrices, and D is symmetric and block diagonal with */ /* 1-by-1 and 2-by-2 diagonal blocks. */ /* This is the blocked version of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, the block diagonal matrix D and the multipliers used */ /* to obtain the factor U or L (see below for further details). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (output) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D. */ /* If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ /* interchanged and D(k,k) is a 1-by-1 diagonal block. */ /* If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ /* columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ /* is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ /* IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ /* interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of WORK. LWORK >=1. For best performance */ /* LWORK >= N*NB, where NB is the block size returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ /* has been completed, but the block diagonal matrix D is */ /* exactly singular, and division by zero will occur if it */ /* is used to solve a system of equations. */ /* Further Details */ /* =============== */ /* If UPLO = 'U', then A = U*D*U', where */ /* U = P(n)*U(n)* ... *P(k)U(k)* ..., */ /* i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ /* 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ /* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ /* defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ /* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ /* ( I v 0 ) k-s */ /* U(k) = ( 0 I 0 ) s */ /* ( 0 0 I ) n-k */ /* k-s s n-k */ /* If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ /* If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ /* and A(k,k), and v overwrites A(1:k-2,k-1:k). */ /* If UPLO = 'L', then A = L*D*L', where */ /* L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ /* i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ /* n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ /* and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ /* defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ /* that if the diagonal block D(k) is of order s (s = 1 or 2), then */ /* ( I 0 0 ) k-1 */ /* L(k) = ( 0 I 0 ) s */ /* ( 0 v I ) n-k-s+1 */ /* k-1 s n-k-s+1 */ /* If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ /* If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ /* and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; --work; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); lquery = *lwork == -1; if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*lwork < 1 && ! lquery) { *info = -7; } if (*info == 0) { /* Determine the block size */ nb = _starpu_ilaenv_(&c__1, "DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1); lwkopt = *n * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYTRF", &i__1); return 0; } else if (lquery) { return 0; } nbmin = 2; ldwork = *n; if (nb > 1 && nb < *n) { iws = ldwork * nb; if (*lwork < iws) { /* Computing MAX */ i__1 = *lwork / ldwork; nb = max(i__1,1); /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DSYTRF", uplo, n, &c_n1, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } else { iws = 1; } if (nb < nbmin) { nb = *n; } if (upper) { /* Factorize A as U*D*U' using the upper triangle of A */ /* K is the main loop index, decreasing from N to 1 in steps of */ /* KB, where KB is the number of columns factorized by DLASYF; */ /* KB is either NB or NB-1, or K for the last block */ k = *n; L10: /* If K < 1, exit from loop */ if (k < 1) { goto L40; } if (k > nb) { /* Factorize columns k-kb+1:k of A and use blocked code to */ /* update columns 1:k-kb */ _starpu_dlasyf_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], &ldwork, &iinfo); } else { /* Use unblocked code to factorize columns 1:k of A */ _starpu_dsytf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo); kb = k; } /* Set INFO on the first occurrence of a zero pivot */ if (*info == 0 && iinfo > 0) { *info = iinfo; } /* Decrease K and return to the start of the main loop */ k -= kb; goto L10; } else { /* Factorize A as L*D*L' using the lower triangle of A */ /* K is the main loop index, increasing from 1 to N in steps of */ /* KB, where KB is the number of columns factorized by DLASYF; */ /* KB is either NB or NB-1, or N-K+1 for the last block */ k = 1; L20: /* If K > N, exit from loop */ if (k > *n) { goto L40; } if (k <= *n - nb) { /* Factorize columns k:k+kb-1 of A and use blocked code to */ /* update columns k+kb:n */ i__1 = *n - k + 1; _starpu_dlasyf_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], &ldwork, &iinfo); } else { /* Use unblocked code to factorize columns k:n of A */ i__1 = *n - k + 1; _starpu_dsytf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo); kb = *n - k + 1; } /* Set INFO on the first occurrence of a zero pivot */ if (*info == 0 && iinfo > 0) { *info = iinfo + k - 1; } /* Adjust IPIV */ i__1 = k + kb - 1; for (j = k; j <= i__1; ++j) { if (ipiv[j] > 0) { ipiv[j] = ipiv[j] + k - 1; } else { ipiv[j] = ipiv[j] - k + 1; } /* L30: */ } /* Increase K and return to the start of the main loop */ k += kb; goto L20; } L40: work[1] = (doublereal) lwkopt; return 0; /* End of DSYTRF */ } /* _starpu_dsytrf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsytri.c000066400000000000000000000256501507764646700207220ustar00rootroot00000000000000/* dsytri.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b11 = -1.; static doublereal c_b13 = 0.; /* Subroutine */ int _starpu_dsytri_(char *uplo, integer *n, doublereal *a, integer * lda, integer *ipiv, doublereal *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1; doublereal d__1; /* Local variables */ doublereal d__; integer k; doublereal t, ak; integer kp; doublereal akp1; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal temp, akkp1; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); integer kstep; logical upper; extern /* Subroutine */ int _starpu_dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYTRI computes the inverse of a real symmetric indefinite matrix */ /* A using the factorization A = U*D*U**T or A = L*D*L**T computed by */ /* DSYTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the details of the factorization are stored */ /* as an upper or lower triangular matrix. */ /* = 'U': Upper triangular, form is A = U*D*U**T; */ /* = 'L': Lower triangular, form is A = L*D*L**T. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the block diagonal matrix D and the multipliers */ /* used to obtain the factor U or L as computed by DSYTRF. */ /* On exit, if INFO = 0, the (symmetric) inverse of the original */ /* matrix. If UPLO = 'U', the upper triangular part of the */ /* inverse is formed and the part of A below the diagonal is not */ /* referenced; if UPLO = 'L' the lower triangular part of the */ /* inverse is formed and the part of A above the diagonal is */ /* not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSYTRF. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its */ /* inverse could not be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; --work; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ for (*info = *n; *info >= 1; --(*info)) { if (ipiv[*info] > 0 && a[*info + *info * a_dim1] == 0.) { return 0; } /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ipiv[*info] > 0 && a[*info + *info * a_dim1] == 0.) { return 0; } /* L20: */ } } *info = 0; if (upper) { /* Compute inv(A) from the factorization A = U*D*U'. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; L30: /* If K > N, exit from loop. */ if (k > *n) { goto L40; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Invert the diagonal block. */ a[k + k * a_dim1] = 1. / a[k + k * a_dim1]; /* Compute column K of the inverse. */ if (k > 1) { i__1 = k - 1; _starpu_dcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); i__1 = k - 1; _starpu_dsymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + k * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block */ /* Invert the diagonal block. */ t = (d__1 = a[k + (k + 1) * a_dim1], abs(d__1)); ak = a[k + k * a_dim1] / t; akp1 = a[k + 1 + (k + 1) * a_dim1] / t; akkp1 = a[k + (k + 1) * a_dim1] / t; d__ = t * (ak * akp1 - 1.); a[k + k * a_dim1] = akp1 / d__; a[k + 1 + (k + 1) * a_dim1] = ak / d__; a[k + (k + 1) * a_dim1] = -akkp1 / d__; /* Compute columns K and K+1 of the inverse. */ if (k > 1) { i__1 = k - 1; _starpu_dcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); i__1 = k - 1; _starpu_dsymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + k * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + (k + 1) * a_dim1] -= _starpu_ddot_(&i__1, &a[k * a_dim1 + 1], & c__1, &a[(k + 1) * a_dim1 + 1], &c__1); i__1 = k - 1; _starpu_dcopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], & c__1); i__1 = k - 1; _starpu_dsymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a[(k + 1) * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + 1 + (k + 1) * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, & a[(k + 1) * a_dim1 + 1], &c__1); } kstep = 2; } kp = (i__1 = ipiv[k], abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the leading */ /* submatrix A(1:k+1,1:k+1) */ i__1 = kp - 1; _starpu_dswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], & c__1); i__1 = k - kp - 1; _starpu_dswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; if (kstep == 2) { temp = a[k + (k + 1) * a_dim1]; a[k + (k + 1) * a_dim1] = a[kp + (k + 1) * a_dim1]; a[kp + (k + 1) * a_dim1] = temp; } } k += kstep; goto L30; L40: ; } else { /* Compute inv(A) from the factorization A = L*D*L'. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = *n; L50: /* If K < 1, exit from loop. */ if (k < 1) { goto L60; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Invert the diagonal block. */ a[k + k * a_dim1] = 1. / a[k + k * a_dim1]; /* Compute column K of the inverse. */ if (k < *n) { i__1 = *n - k; _starpu_dcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); i__1 = *n - k; _starpu_dsymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, &c_b13, &a[k + 1 + k * a_dim1], & c__1); i__1 = *n - k; a[k + k * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block */ /* Invert the diagonal block. */ t = (d__1 = a[k + (k - 1) * a_dim1], abs(d__1)); ak = a[k - 1 + (k - 1) * a_dim1] / t; akp1 = a[k + k * a_dim1] / t; akkp1 = a[k + (k - 1) * a_dim1] / t; d__ = t * (ak * akp1 - 1.); a[k - 1 + (k - 1) * a_dim1] = akp1 / d__; a[k + k * a_dim1] = ak / d__; a[k + (k - 1) * a_dim1] = -akkp1 / d__; /* Compute columns K-1 and K of the inverse. */ if (k < *n) { i__1 = *n - k; _starpu_dcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); i__1 = *n - k; _starpu_dsymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, &c_b13, &a[k + 1 + k * a_dim1], & c__1); i__1 = *n - k; a[k + k * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); i__1 = *n - k; a[k + (k - 1) * a_dim1] -= _starpu_ddot_(&i__1, &a[k + 1 + k * a_dim1] , &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); i__1 = *n - k; _starpu_dcopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], & c__1); i__1 = *n - k; _starpu_dsymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, &c_b13, &a[k + 1 + (k - 1) * a_dim1] , &c__1); i__1 = *n - k; a[k - 1 + (k - 1) * a_dim1] -= _starpu_ddot_(&i__1, &work[1], &c__1, & a[k + 1 + (k - 1) * a_dim1], &c__1); } kstep = 2; } kp = (i__1 = ipiv[k], abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the trailing */ /* submatrix A(k-1:n,k-1:n) */ if (kp < *n) { i__1 = *n - kp; _starpu_dswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); } i__1 = kp - k - 1; _starpu_dswap_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[kp + (k + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; if (kstep == 2) { temp = a[k + (k - 1) * a_dim1]; a[k + (k - 1) * a_dim1] = a[kp + (k - 1) * a_dim1]; a[kp + (k - 1) * a_dim1] = temp; } } k -= kstep; goto L50; L60: ; } return 0; /* End of DSYTRI */ } /* _starpu_dsytri_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dsytrs.c000066400000000000000000000274651507764646700207420ustar00rootroot00000000000000/* dsytrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b7 = -1.; static integer c__1 = 1; static doublereal c_b19 = 1.; /* Subroutine */ int _starpu_dsytrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; doublereal d__1; /* Local variables */ integer j, k; doublereal ak, bk; integer kp; doublereal akm1, bkm1; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal akm1k; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); doublereal denom; extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSYTRS solves a system of linear equations A*X = B with a real */ /* symmetric matrix A using the factorization A = U*D*U**T or */ /* A = L*D*L**T computed by DSYTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the details of the factorization are stored */ /* as an upper or lower triangular matrix. */ /* = 'U': Upper triangular, form is A = U*D*U**T; */ /* = 'L': Lower triangular, form is A = L*D*L**T. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The block diagonal matrix D and the multipliers used to */ /* obtain the factor U or L as computed by DSYTRF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by DSYTRF. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DSYTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B, where A = U*D*U'. */ /* First solve U*D*X = B, overwriting B with X. */ /* K is the main loop index, decreasing from N to 1 in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = *n; L10: /* If K < 1, exit from loop. */ if (k < 1) { goto L30; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation */ /* stored in column K of A. */ i__1 = k - 1; _starpu_dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + b_dim1], ldb, &b[b_dim1 + 1], ldb); /* Multiply by the inverse of the diagonal block. */ d__1 = 1. / a[k + k * a_dim1]; _starpu_dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); --k; } else { /* 2 x 2 diagonal block */ /* Interchange rows K-1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k - 1) { _starpu_dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation */ /* stored in columns K-1 and K of A. */ i__1 = k - 2; _starpu_dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + b_dim1], ldb, &b[b_dim1 + 1], ldb); i__1 = k - 2; _starpu_dger_(&i__1, nrhs, &c_b7, &a[(k - 1) * a_dim1 + 1], &c__1, &b[k - 1 + b_dim1], ldb, &b[b_dim1 + 1], ldb); /* Multiply by the inverse of the diagonal block. */ akm1k = a[k - 1 + k * a_dim1]; akm1 = a[k - 1 + (k - 1) * a_dim1] / akm1k; ak = a[k + k * a_dim1] / akm1k; denom = akm1 * ak - 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bkm1 = b[k - 1 + j * b_dim1] / akm1k; bk = b[k + j * b_dim1] / akm1k; b[k - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; b[k + j * b_dim1] = (akm1 * bk - bkm1) / denom; /* L20: */ } k += -2; } goto L10; L30: /* Next solve U'*X = B, overwriting B with X. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; L40: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Multiply by inv(U'(K)), where U(K) is the transformation */ /* stored in column K of A. */ i__1 = k - 1; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * a_dim1 + 1], &c__1, &c_b19, &b[k + b_dim1], ldb); /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } ++k; } else { /* 2 x 2 diagonal block */ /* Multiply by inv(U'(K+1)), where U(K+1) is the transformation */ /* stored in columns K and K+1 of A. */ i__1 = k - 1; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * a_dim1 + 1], &c__1, &c_b19, &b[k + b_dim1], ldb); i__1 = k - 1; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[(k + 1) * a_dim1 + 1], &c__1, &c_b19, &b[k + 1 + b_dim1], ldb); /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } k += 2; } goto L40; L50: ; } else { /* Solve A*X = B, where A = L*D*L'. */ /* First solve L*D*X = B, overwriting B with X. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; L60: /* If K > N, exit from loop. */ if (k > *n) { goto L80; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation */ /* stored in column K of A. */ if (k < *n) { i__1 = *n - k; _starpu_dger_(&i__1, nrhs, &c_b7, &a[k + 1 + k * a_dim1], &c__1, &b[k + b_dim1], ldb, &b[k + 1 + b_dim1], ldb); } /* Multiply by the inverse of the diagonal block. */ d__1 = 1. / a[k + k * a_dim1]; _starpu_dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); ++k; } else { /* 2 x 2 diagonal block */ /* Interchange rows K+1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k + 1) { _starpu_dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation */ /* stored in columns K and K+1 of A. */ if (k < *n - 1) { i__1 = *n - k - 1; _starpu_dger_(&i__1, nrhs, &c_b7, &a[k + 2 + k * a_dim1], &c__1, &b[k + b_dim1], ldb, &b[k + 2 + b_dim1], ldb); i__1 = *n - k - 1; _starpu_dger_(&i__1, nrhs, &c_b7, &a[k + 2 + (k + 1) * a_dim1], &c__1, &b[k + 1 + b_dim1], ldb, &b[k + 2 + b_dim1], ldb); } /* Multiply by the inverse of the diagonal block. */ akm1k = a[k + 1 + k * a_dim1]; akm1 = a[k + k * a_dim1] / akm1k; ak = a[k + 1 + (k + 1) * a_dim1] / akm1k; denom = akm1 * ak - 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bkm1 = b[k + j * b_dim1] / akm1k; bk = b[k + 1 + j * b_dim1] / akm1k; b[k + j * b_dim1] = (ak * bkm1 - bk) / denom; b[k + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; /* L70: */ } k += 2; } goto L60; L80: /* Next solve L'*X = B, overwriting B with X. */ /* K is the main loop index, decreasing from N to 1 in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = *n; L90: /* If K < 1, exit from loop. */ if (k < 1) { goto L100; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Multiply by inv(L'(K)), where L(K) is the transformation */ /* stored in column K of A. */ if (k < *n) { i__1 = *n - k; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + b_dim1], ldb); } /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } --k; } else { /* 2 x 2 diagonal block */ /* Multiply by inv(L'(K-1)), where L(K-1) is the transformation */ /* stored in columns K-1 and K of A. */ if (k < *n) { i__1 = *n - k; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + b_dim1], ldb); i__1 = *n - k; _starpu_dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, &a[k + 1 + (k - 1) * a_dim1], &c__1, &c_b19, &b[ k - 1 + b_dim1], ldb); } /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { _starpu_dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } k += -2; } goto L90; L100: ; } return 0; /* End of DSYTRS */ } /* _starpu_dsytrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtbcon.c000066400000000000000000000161171507764646700206530ustar00rootroot00000000000000/* dtbcon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dtbcon_(char *norm, char *uplo, char *diag, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *rcond, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1; doublereal d__1; /* Local variables */ integer ix, kase, kase1; doublereal scale; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, integer *); doublereal anorm; logical upper; doublereal xnorm; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern doublereal _starpu_dlantb_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dlatbs_(char *, char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal ainvnm; logical onenrm; char normin[1]; doublereal smlnum; logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTBCON estimates the reciprocal of the condition number of a */ /* triangular band matrix A, in either the 1-norm or the infinity-norm. */ /* The norm of A is computed and an estimate is obtained for */ /* norm(inv(A)), then the reciprocal of the condition number is */ /* computed as */ /* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies whether the 1-norm condition number or the */ /* infinity-norm condition number is required: */ /* = '1' or 'O': 1-norm; */ /* = 'I': Infinity-norm. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals or subdiagonals of the */ /* triangular band matrix A. KD >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The upper or lower triangular band matrix A, stored in the */ /* first kd+1 rows of the array. The j-th column of A is stored */ /* in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* If DIAG = 'U', the diagonal elements of A are not referenced */ /* and are assumed to be 1. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); nounit = _starpu_lsame_(diag, "N"); if (! onenrm && ! _starpu_lsame_(norm, "I")) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*kd < 0) { *info = -5; } else if (*ldab < *kd + 1) { *info = -7; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTBCON", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *rcond = 1.; return 0; } *rcond = 0.; smlnum = _starpu_dlamch_("Safe minimum") * (doublereal) max(1,*n); /* Compute the norm of the triangular matrix A. */ anorm = _starpu_dlantb_(norm, uplo, diag, n, kd, &ab[ab_offset], ldab, &work[1]); /* Continue only if ANORM > 0. */ if (anorm > 0.) { /* Estimate the norm of the inverse of A. */ ainvnm = 0.; *(unsigned char *)normin = 'N'; if (onenrm) { kase1 = 1; } else { kase1 = 2; } kase = 0; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == kase1) { /* Multiply by inv(A). */ _starpu_dlatbs_(uplo, "No transpose", diag, normin, n, kd, &ab[ ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 1], info) ; } else { /* Multiply by inv(A'). */ _starpu_dlatbs_(uplo, "Transpose", diag, normin, n, kd, &ab[ab_offset] , ldab, &work[1], &scale, &work[(*n << 1) + 1], info); } *(unsigned char *)normin = 'Y'; /* Multiply by 1/SCALE if doing so will not cause overflow. */ if (scale != 1.) { ix = _starpu_idamax_(n, &work[1], &c__1); xnorm = (d__1 = work[ix], abs(d__1)); if (scale < xnorm * smlnum || scale == 0.) { goto L20; } _starpu_drscl_(n, &scale, &work[1], &c__1); } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / anorm / ainvnm; } } L20: return 0; /* End of DTBCON */ } /* _starpu_dtbcon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtbrfs.c000066400000000000000000000351131507764646700206630ustar00rootroot00000000000000/* dtbrfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b19 = -1.; /* Subroutine */ int _starpu_dtbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k; doublereal s, xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dtbmv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer * , doublereal *, integer *), _starpu_dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal * , doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; char transt[1]; logical nounit; doublereal lstres; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTBRFS provides error bounds and backward error estimates for the */ /* solution to a system of linear equations with a triangular band */ /* coefficient matrix. */ /* The solution matrix X must be computed by DTBTRS or some other */ /* means before entering this routine. DTBRFS does not do iterative */ /* refinement because doing so cannot improve the backward error. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals or subdiagonals of the */ /* triangular band matrix A. KD >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The upper or lower triangular band matrix A, stored in the */ /* first kd+1 rows of the array. The j-th column of A is stored */ /* in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* If DIAG = 'U', the diagonal elements of A are not referenced */ /* and are assumed to be 1. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* The solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); notran = _starpu_lsame_(trans, "N"); nounit = _starpu_lsame_(diag, "N"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*kd < 0) { *info = -5; } else if (*nrhs < 0) { *info = -6; } else if (*ldab < *kd + 1) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTBRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *kd + 2; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Compute residual R = B - op(A) * X, */ /* where op(A) = A or A', depending on TRANS. */ _starpu_dcopy_(n, &x[j * x_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dtbmv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[*n + 1], &c__1); _starpu_daxpy_(n, &c_b19, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L20: */ } if (notran) { /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); /* Computing MAX */ i__3 = 1, i__4 = k - *kd; i__5 = k; for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { work[i__] += (d__1 = ab[*kd + 1 + i__ - k + k * ab_dim1], abs(d__1)) * xk; /* L30: */ } /* L40: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); /* Computing MAX */ i__5 = 1, i__3 = k - *kd; i__4 = k - 1; for (i__ = max(i__5,i__3); i__ <= i__4; ++i__) { work[i__] += (d__1 = ab[*kd + 1 + i__ - k + k * ab_dim1], abs(d__1)) * xk; /* L50: */ } work[k] += xk; /* L60: */ } } } else { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); /* Computing MIN */ i__5 = *n, i__3 = k + *kd; i__4 = min(i__5,i__3); for (i__ = k; i__ <= i__4; ++i__) { work[i__] += (d__1 = ab[i__ + 1 - k + k * ab_dim1] , abs(d__1)) * xk; /* L70: */ } /* L80: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); /* Computing MIN */ i__5 = *n, i__3 = k + *kd; i__4 = min(i__5,i__3); for (i__ = k + 1; i__ <= i__4; ++i__) { work[i__] += (d__1 = ab[i__ + 1 - k + k * ab_dim1] , abs(d__1)) * xk; /* L90: */ } work[k] += xk; /* L100: */ } } } } else { /* Compute abs(A')*abs(X) + abs(B). */ if (upper) { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; /* Computing MAX */ i__4 = 1, i__5 = k - *kd; i__3 = k; for (i__ = max(i__4,i__5); i__ <= i__3; ++i__) { s += (d__1 = ab[*kd + 1 + i__ - k + k * ab_dim1], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L110: */ } work[k] += s; /* L120: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = (d__1 = x[k + j * x_dim1], abs(d__1)); /* Computing MAX */ i__3 = 1, i__4 = k - *kd; i__5 = k - 1; for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { s += (d__1 = ab[*kd + 1 + i__ - k + k * ab_dim1], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L130: */ } work[k] += s; /* L140: */ } } } else { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; /* Computing MIN */ i__3 = *n, i__4 = k + *kd; i__5 = min(i__3,i__4); for (i__ = k; i__ <= i__5; ++i__) { s += (d__1 = ab[i__ + 1 - k + k * ab_dim1], abs( d__1)) * (d__2 = x[i__ + j * x_dim1], abs( d__2)); /* L150: */ } work[k] += s; /* L160: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = (d__1 = x[k + j * x_dim1], abs(d__1)); /* Computing MIN */ i__3 = *n, i__4 = k + *kd; i__5 = min(i__3,i__4); for (i__ = k + 1; i__ <= i__5; ++i__) { s += (d__1 = ab[i__ + 1 - k + k * ab_dim1], abs( d__1)) * (d__2 = x[i__ + j * x_dim1], abs( d__2)); /* L170: */ } work[k] += s; /* L180: */ } } } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L190: */ } berr[j] = s; /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(op(A)))* */ /* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(op(A)) is the inverse of op(A) */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(op(A)) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L200: */ } kase = 0; L210: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)'). */ _starpu_dtbsv_(uplo, transt, diag, n, kd, &ab[ab_offset], ldab, &work[ *n + 1], &c__1); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L220: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L230: */ } _starpu_dtbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[* n + 1], &c__1); } goto L210; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L240: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L250: */ } return 0; /* End of DTBRFS */ } /* _starpu_dtbrfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtbtrs.c000066400000000000000000000134221507764646700207000ustar00rootroot00000000000000/* dtbtrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dtbtrs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; /* Local variables */ integer j; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTBTRS solves a triangular system of the form */ /* A * X = B or A**T * X = B, */ /* where A is a triangular band matrix of order N, and B is an */ /* N-by NRHS matrix. A check is made to verify that A is nonsingular. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals or subdiagonals of the */ /* triangular band matrix A. KD >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AB (input) DOUBLE PRECISION array, dimension (LDAB,N) */ /* The upper or lower triangular band matrix A, stored in the */ /* first kd+1 rows of AB. The j-th column of A is stored */ /* in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* If DIAG = 'U', the diagonal elements of A are not referenced */ /* and are assumed to be 1. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, if INFO = 0, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element of A is zero, */ /* indicating that the matrix is singular and the */ /* solutions X have not been computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; nounit = _starpu_lsame_(diag, "N"); upper = _starpu_lsame_(uplo, "U"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*kd < 0) { *info = -5; } else if (*nrhs < 0) { *info = -6; } else if (*ldab < *kd + 1) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTBTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity. */ if (nounit) { if (upper) { i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ab[*kd + 1 + *info * ab_dim1] == 0.) { return 0; } /* L10: */ } } else { i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ab[*info * ab_dim1 + 1] == 0.) { return 0; } /* L20: */ } } } *info = 0; /* Solve A * X = B or A' * X = B. */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { _starpu_dtbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &b[j * b_dim1 + 1], &c__1); /* L30: */ } return 0; /* End of DTBTRS */ } /* _starpu_dtbtrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtfsm.c000066400000000000000000000702411507764646700205150ustar00rootroot00000000000000/* dtfsm.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b23 = -1.; static doublereal c_b27 = 1.; /* Subroutine */ int _starpu_dtfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, doublereal *b, integer *ldb) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; /* Local variables */ integer i__, j, k, m1, m2, n1, n2, info; logical normaltransr; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical lside; extern logical _starpu_lsame_(char *, char *); logical lower; extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_( char *, integer *); logical misodd, nisodd, notrans; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Level 3 BLAS like routine for A in RFP Format. */ /* DTFSM solves the matrix equation */ /* op( A )*X = alpha*B or X*op( A ) = alpha*B */ /* where alpha is a scalar, X and B are m by n matrices, A is a unit, or */ /* non-unit, upper or lower triangular matrix and op( A ) is one of */ /* op( A ) = A or op( A ) = A'. */ /* A is in Rectangular Full Packed (RFP) Format. */ /* The matrix X is overwritten on B. */ /* Arguments */ /* ========== */ /* TRANSR - (input) CHARACTER */ /* = 'N': The Normal Form of RFP A is stored; */ /* = 'T': The Transpose Form of RFP A is stored. */ /* SIDE - (input) CHARACTER */ /* On entry, SIDE specifies whether op( A ) appears on the left */ /* or right of X as follows: */ /* SIDE = 'L' or 'l' op( A )*X = alpha*B. */ /* SIDE = 'R' or 'r' X*op( A ) = alpha*B. */ /* Unchanged on exit. */ /* UPLO - (input) CHARACTER */ /* On entry, UPLO specifies whether the RFP matrix A came from */ /* an upper or lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' RFP A came from an upper triangular matrix */ /* UPLO = 'L' or 'l' RFP A came from a lower triangular matrix */ /* Unchanged on exit. */ /* TRANS - (input) CHARACTER */ /* On entry, TRANS specifies the form of op( A ) to be used */ /* in the matrix multiplication as follows: */ /* TRANS = 'N' or 'n' op( A ) = A. */ /* TRANS = 'T' or 't' op( A ) = A'. */ /* Unchanged on exit. */ /* DIAG - (input) CHARACTER */ /* On entry, DIAG specifies whether or not RFP A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* M - (input) INTEGER. */ /* On entry, M specifies the number of rows of B. M must be at */ /* least zero. */ /* Unchanged on exit. */ /* N - (input) INTEGER. */ /* On entry, N specifies the number of columns of B. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* ALPHA - (input) DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. When alpha is */ /* zero then A is not referenced and B need not be set before */ /* entry. */ /* Unchanged on exit. */ /* A - (input) DOUBLE PRECISION array, dimension (NT); */ /* NT = N*(N+1)/2. On entry, the matrix A in RFP Format. */ /* RFP Format is described by TRANSR, UPLO and N as follows: */ /* If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; */ /* K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If */ /* TRANSR = 'T' then RFP is the transpose of RFP A as */ /* defined when TRANSR = 'N'. The contents of RFP A are defined */ /* by UPLO as follows: If UPLO = 'U' the RFP A contains the NT */ /* elements of upper packed A either in normal or */ /* transpose Format. If UPLO = 'L' the RFP A contains */ /* the NT elements of lower packed A either in normal or */ /* transpose Format. The LDA of RFP A is (N+1)/2 when */ /* TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is */ /* even and is N when is odd. */ /* See the Note below for more details. Unchanged on exit. */ /* B - (input/ouptut) DOUBLE PRECISION array, DIMENSION (LDB,N) */ /* Before entry, the leading m by n part of the array B must */ /* contain the right-hand side matrix B, and on exit is */ /* overwritten by the solution matrix X. */ /* LDB - (input) INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. LDB must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Further Details */ /* =============== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* Reference */ /* ========= */ /* ===================================================================== */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ b_dim1 = *ldb - 1 - 0 + 1; b_offset = 0 + b_dim1 * 0; b -= b_offset; /* Function Body */ info = 0; normaltransr = _starpu_lsame_(transr, "N"); lside = _starpu_lsame_(side, "L"); lower = _starpu_lsame_(uplo, "L"); notrans = _starpu_lsame_(trans, "N"); if (! normaltransr && ! _starpu_lsame_(transr, "T")) { info = -1; } else if (! lside && ! _starpu_lsame_(side, "R")) { info = -2; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { info = -3; } else if (! notrans && ! _starpu_lsame_(trans, "T")) { info = -4; } else if (! _starpu_lsame_(diag, "N") && ! _starpu_lsame_(diag, "U")) { info = -5; } else if (*m < 0) { info = -6; } else if (*n < 0) { info = -7; } else if (*ldb < max(1,*m)) { info = -11; } if (info != 0) { i__1 = -info; _starpu_xerbla_("DTFSM ", &i__1); return 0; } /* Quick return when ( (N.EQ.0).OR.(M.EQ.0) ) */ if (*m == 0 || *n == 0) { return 0; } /* Quick return when ALPHA.EQ.(0D+0) */ if (*alpha == 0.) { i__1 = *n - 1; for (j = 0; j <= i__1; ++j) { i__2 = *m - 1; for (i__ = 0; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L10: */ } /* L20: */ } return 0; } if (lside) { /* SIDE = 'L' */ /* A is M-by-M. */ /* If M is odd, set NISODD = .TRUE., and M1 and M2. */ /* If M is even, NISODD = .FALSE., and M. */ if (*m % 2 == 0) { misodd = FALSE_; k = *m / 2; } else { misodd = TRUE_; if (lower) { m2 = *m / 2; m1 = *m - m2; } else { m1 = *m / 2; m2 = *m - m1; } } if (misodd) { /* SIDE = 'L' and N is odd */ if (normaltransr) { /* SIDE = 'L', N is odd, and TRANSR = 'N' */ if (lower) { /* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and */ /* TRANS = 'N' */ if (*m == 1) { _starpu_dtrsm_("L", "L", "N", diag, &m1, n, alpha, a, m, & b[b_offset], ldb); } else { _starpu_dtrsm_("L", "L", "N", diag, &m1, n, alpha, a, m, & b[b_offset], ldb); _starpu_dgemm_("N", "N", &m2, n, &m1, &c_b23, &a[m1], m, & b[b_offset], ldb, alpha, &b[m1], ldb); _starpu_dtrsm_("L", "U", "T", diag, &m2, n, &c_b27, &a[*m] , m, &b[m1], ldb); } } else { /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and */ /* TRANS = 'T' */ if (*m == 1) { _starpu_dtrsm_("L", "L", "T", diag, &m1, n, alpha, a, m, & b[b_offset], ldb); } else { _starpu_dtrsm_("L", "U", "N", diag, &m2, n, alpha, &a[*m], m, &b[m1], ldb); _starpu_dgemm_("T", "N", &m1, n, &m2, &c_b23, &a[m1], m, & b[m1], ldb, alpha, &b[b_offset], ldb); _starpu_dtrsm_("L", "L", "T", diag, &m1, n, &c_b27, a, m, &b[b_offset], ldb); } } } else { /* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'U' */ if (! notrans) { /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and */ /* TRANS = 'N' */ _starpu_dtrsm_("L", "L", "N", diag, &m1, n, alpha, &a[m2], m, &b[b_offset], ldb); _starpu_dgemm_("T", "N", &m2, n, &m1, &c_b23, a, m, &b[ b_offset], ldb, alpha, &b[m1], ldb); _starpu_dtrsm_("L", "U", "T", diag, &m2, n, &c_b27, &a[m1], m, &b[m1], ldb); } else { /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and */ /* TRANS = 'T' */ _starpu_dtrsm_("L", "U", "N", diag, &m2, n, alpha, &a[m1], m, &b[m1], ldb); _starpu_dgemm_("N", "N", &m1, n, &m2, &c_b23, a, m, &b[m1], ldb, alpha, &b[b_offset], ldb); _starpu_dtrsm_("L", "L", "T", diag, &m1, n, &c_b27, &a[m2], m, &b[b_offset], ldb); } } } else { /* SIDE = 'L', N is odd, and TRANSR = 'T' */ if (lower) { /* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and */ /* TRANS = 'N' */ if (*m == 1) { _starpu_dtrsm_("L", "U", "T", diag, &m1, n, alpha, a, &m1, &b[b_offset], ldb); } else { _starpu_dtrsm_("L", "U", "T", diag, &m1, n, alpha, a, &m1, &b[b_offset], ldb); _starpu_dgemm_("T", "N", &m2, n, &m1, &c_b23, &a[m1 * m1], &m1, &b[b_offset], ldb, alpha, &b[m1], ldb); _starpu_dtrsm_("L", "L", "N", diag, &m2, n, &c_b27, &a[1], &m1, &b[m1], ldb); } } else { /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and */ /* TRANS = 'T' */ if (*m == 1) { _starpu_dtrsm_("L", "U", "N", diag, &m1, n, alpha, a, &m1, &b[b_offset], ldb); } else { _starpu_dtrsm_("L", "L", "T", diag, &m2, n, alpha, &a[1], &m1, &b[m1], ldb); _starpu_dgemm_("N", "N", &m1, n, &m2, &c_b23, &a[m1 * m1], &m1, &b[m1], ldb, alpha, &b[b_offset], ldb); _starpu_dtrsm_("L", "U", "N", diag, &m1, n, &c_b27, a, & m1, &b[b_offset], ldb); } } } else { /* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'U' */ if (! notrans) { /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and */ /* TRANS = 'N' */ _starpu_dtrsm_("L", "U", "T", diag, &m1, n, alpha, &a[m2 * m2] , &m2, &b[b_offset], ldb); _starpu_dgemm_("N", "N", &m2, n, &m1, &c_b23, a, &m2, &b[ b_offset], ldb, alpha, &b[m1], ldb); _starpu_dtrsm_("L", "L", "N", diag, &m2, n, &c_b27, &a[m1 * m2], &m2, &b[m1], ldb); } else { /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and */ /* TRANS = 'T' */ _starpu_dtrsm_("L", "L", "T", diag, &m2, n, alpha, &a[m1 * m2] , &m2, &b[m1], ldb); _starpu_dgemm_("T", "N", &m1, n, &m2, &c_b23, a, &m2, &b[m1], ldb, alpha, &b[b_offset], ldb); _starpu_dtrsm_("L", "U", "N", diag, &m1, n, &c_b27, &a[m2 * m2], &m2, &b[b_offset], ldb); } } } } else { /* SIDE = 'L' and N is even */ if (normaltransr) { /* SIDE = 'L', N is even, and TRANSR = 'N' */ if (lower) { /* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', */ /* and TRANS = 'N' */ i__1 = *m + 1; _starpu_dtrsm_("L", "L", "N", diag, &k, n, alpha, &a[1], & i__1, &b[b_offset], ldb); i__1 = *m + 1; _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, &a[k + 1], &i__1, &b[b_offset], ldb, alpha, &b[k], ldb); i__1 = *m + 1; _starpu_dtrsm_("L", "U", "T", diag, &k, n, &c_b27, a, &i__1, & b[k], ldb); } else { /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', */ /* and TRANS = 'T' */ i__1 = *m + 1; _starpu_dtrsm_("L", "U", "N", diag, &k, n, alpha, a, &i__1, & b[k], ldb); i__1 = *m + 1; _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, &a[k + 1], &i__1, &b[k], ldb, alpha, &b[b_offset], ldb); i__1 = *m + 1; _starpu_dtrsm_("L", "L", "T", diag, &k, n, &c_b27, &a[1], & i__1, &b[b_offset], ldb); } } else { /* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'U' */ if (! notrans) { /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', */ /* and TRANS = 'N' */ i__1 = *m + 1; _starpu_dtrsm_("L", "L", "N", diag, &k, n, alpha, &a[k + 1], & i__1, &b[b_offset], ldb); i__1 = *m + 1; _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, a, &i__1, &b[ b_offset], ldb, alpha, &b[k], ldb); i__1 = *m + 1; _starpu_dtrsm_("L", "U", "T", diag, &k, n, &c_b27, &a[k], & i__1, &b[k], ldb); } else { /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', */ /* and TRANS = 'T' */ i__1 = *m + 1; _starpu_dtrsm_("L", "U", "N", diag, &k, n, alpha, &a[k], & i__1, &b[k], ldb); i__1 = *m + 1; _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, a, &i__1, &b[k], ldb, alpha, &b[b_offset], ldb); i__1 = *m + 1; _starpu_dtrsm_("L", "L", "T", diag, &k, n, &c_b27, &a[k + 1], &i__1, &b[b_offset], ldb); } } } else { /* SIDE = 'L', N is even, and TRANSR = 'T' */ if (lower) { /* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', */ /* and TRANS = 'N' */ _starpu_dtrsm_("L", "U", "T", diag, &k, n, alpha, &a[k], &k, & b[b_offset], ldb); _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, &a[k * (k + 1)], & k, &b[b_offset], ldb, alpha, &b[k], ldb); _starpu_dtrsm_("L", "L", "N", diag, &k, n, &c_b27, a, &k, &b[ k], ldb); } else { /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', */ /* and TRANS = 'T' */ _starpu_dtrsm_("L", "L", "T", diag, &k, n, alpha, a, &k, &b[k] , ldb); _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, &a[k * (k + 1)], & k, &b[k], ldb, alpha, &b[b_offset], ldb); _starpu_dtrsm_("L", "U", "N", diag, &k, n, &c_b27, &a[k], &k, &b[b_offset], ldb); } } else { /* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'U' */ if (! notrans) { /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', */ /* and TRANS = 'N' */ _starpu_dtrsm_("L", "U", "T", diag, &k, n, alpha, &a[k * (k + 1)], &k, &b[b_offset], ldb); _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, a, &k, &b[ b_offset], ldb, alpha, &b[k], ldb); _starpu_dtrsm_("L", "L", "N", diag, &k, n, &c_b27, &a[k * k], &k, &b[k], ldb); } else { /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', */ /* and TRANS = 'T' */ _starpu_dtrsm_("L", "L", "T", diag, &k, n, alpha, &a[k * k], & k, &b[k], ldb); _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, a, &k, &b[k], ldb, alpha, &b[b_offset], ldb); _starpu_dtrsm_("L", "U", "N", diag, &k, n, &c_b27, &a[k * (k + 1)], &k, &b[b_offset], ldb); } } } } } else { /* SIDE = 'R' */ /* A is N-by-N. */ /* If N is odd, set NISODD = .TRUE., and N1 and N2. */ /* If N is even, NISODD = .FALSE., and K. */ if (*n % 2 == 0) { nisodd = FALSE_; k = *n / 2; } else { nisodd = TRUE_; if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } } if (nisodd) { /* SIDE = 'R' and N is odd */ if (normaltransr) { /* SIDE = 'R', N is odd, and TRANSR = 'N' */ if (lower) { /* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and */ /* TRANS = 'N' */ _starpu_dtrsm_("R", "U", "T", diag, m, &n2, alpha, &a[*n], n, &b[n1 * b_dim1], ldb); _starpu_dgemm_("N", "N", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], ldb, &a[n1], n, alpha, b, ldb); _starpu_dtrsm_("R", "L", "N", diag, m, &n1, &c_b27, a, n, b, ldb); } else { /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and */ /* TRANS = 'T' */ _starpu_dtrsm_("R", "L", "T", diag, m, &n1, alpha, a, n, b, ldb); _starpu_dgemm_("N", "T", m, &n2, &n1, &c_b23, b, ldb, &a[n1], n, alpha, &b[n1 * b_dim1], ldb); _starpu_dtrsm_("R", "U", "N", diag, m, &n2, &c_b27, &a[*n], n, &b[n1 * b_dim1], ldb); } } else { /* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'U' */ if (notrans) { /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and */ /* TRANS = 'N' */ _starpu_dtrsm_("R", "L", "T", diag, m, &n1, alpha, &a[n2], n, b, ldb); _starpu_dgemm_("N", "N", m, &n2, &n1, &c_b23, b, ldb, a, n, alpha, &b[n1 * b_dim1], ldb); _starpu_dtrsm_("R", "U", "N", diag, m, &n2, &c_b27, &a[n1], n, &b[n1 * b_dim1], ldb); } else { /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and */ /* TRANS = 'T' */ _starpu_dtrsm_("R", "U", "T", diag, m, &n2, alpha, &a[n1], n, &b[n1 * b_dim1], ldb); _starpu_dgemm_("N", "T", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], ldb, a, n, alpha, b, ldb); _starpu_dtrsm_("R", "L", "N", diag, m, &n1, &c_b27, &a[n2], n, b, ldb); } } } else { /* SIDE = 'R', N is odd, and TRANSR = 'T' */ if (lower) { /* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and */ /* TRANS = 'N' */ _starpu_dtrsm_("R", "L", "N", diag, m, &n2, alpha, &a[1], &n1, &b[n1 * b_dim1], ldb); _starpu_dgemm_("N", "T", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], ldb, &a[n1 * n1], &n1, alpha, b, ldb); _starpu_dtrsm_("R", "U", "T", diag, m, &n1, &c_b27, a, &n1, b, ldb); } else { /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and */ /* TRANS = 'T' */ _starpu_dtrsm_("R", "U", "N", diag, m, &n1, alpha, a, &n1, b, ldb); _starpu_dgemm_("N", "N", m, &n2, &n1, &c_b23, b, ldb, &a[n1 * n1], &n1, alpha, &b[n1 * b_dim1], ldb); _starpu_dtrsm_("R", "L", "T", diag, m, &n2, &c_b27, &a[1], & n1, &b[n1 * b_dim1], ldb); } } else { /* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'U' */ if (notrans) { /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and */ /* TRANS = 'N' */ _starpu_dtrsm_("R", "U", "N", diag, m, &n1, alpha, &a[n2 * n2] , &n2, b, ldb); _starpu_dgemm_("N", "T", m, &n2, &n1, &c_b23, b, ldb, a, &n2, alpha, &b[n1 * b_dim1], ldb); _starpu_dtrsm_("R", "L", "T", diag, m, &n2, &c_b27, &a[n1 * n2], &n2, &b[n1 * b_dim1], ldb); } else { /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and */ /* TRANS = 'T' */ _starpu_dtrsm_("R", "L", "N", diag, m, &n2, alpha, &a[n1 * n2] , &n2, &b[n1 * b_dim1], ldb); _starpu_dgemm_("N", "N", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], ldb, a, &n2, alpha, b, ldb); _starpu_dtrsm_("R", "U", "T", diag, m, &n1, &c_b27, &a[n2 * n2], &n2, b, ldb); } } } } else { /* SIDE = 'R' and N is even */ if (normaltransr) { /* SIDE = 'R', N is even, and TRANSR = 'N' */ if (lower) { /* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', */ /* and TRANS = 'N' */ i__1 = *n + 1; _starpu_dtrsm_("R", "U", "T", diag, m, &k, alpha, a, &i__1, & b[k * b_dim1], ldb); i__1 = *n + 1; _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, &b[k * b_dim1], ldb, &a[k + 1], &i__1, alpha, b, ldb); i__1 = *n + 1; _starpu_dtrsm_("R", "L", "N", diag, m, &k, &c_b27, &a[1], & i__1, b, ldb); } else { /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', */ /* and TRANS = 'T' */ i__1 = *n + 1; _starpu_dtrsm_("R", "L", "T", diag, m, &k, alpha, &a[1], & i__1, b, ldb); i__1 = *n + 1; _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, b, ldb, &a[k + 1], &i__1, alpha, &b[k * b_dim1], ldb); i__1 = *n + 1; _starpu_dtrsm_("R", "U", "N", diag, m, &k, &c_b27, a, &i__1, & b[k * b_dim1], ldb); } } else { /* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'U' */ if (notrans) { /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', */ /* and TRANS = 'N' */ i__1 = *n + 1; _starpu_dtrsm_("R", "L", "T", diag, m, &k, alpha, &a[k + 1], & i__1, b, ldb); i__1 = *n + 1; _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, b, ldb, a, &i__1, alpha, &b[k * b_dim1], ldb); i__1 = *n + 1; _starpu_dtrsm_("R", "U", "N", diag, m, &k, &c_b27, &a[k], & i__1, &b[k * b_dim1], ldb); } else { /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', */ /* and TRANS = 'T' */ i__1 = *n + 1; _starpu_dtrsm_("R", "U", "T", diag, m, &k, alpha, &a[k], & i__1, &b[k * b_dim1], ldb); i__1 = *n + 1; _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, &b[k * b_dim1], ldb, a, &i__1, alpha, b, ldb); i__1 = *n + 1; _starpu_dtrsm_("R", "L", "N", diag, m, &k, &c_b27, &a[k + 1], &i__1, b, ldb); } } } else { /* SIDE = 'R', N is even, and TRANSR = 'T' */ if (lower) { /* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', */ /* and TRANS = 'N' */ _starpu_dtrsm_("R", "L", "N", diag, m, &k, alpha, a, &k, &b[k * b_dim1], ldb); _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, &b[k * b_dim1], ldb, &a[(k + 1) * k], &k, alpha, b, ldb); _starpu_dtrsm_("R", "U", "T", diag, m, &k, &c_b27, &a[k], &k, b, ldb); } else { /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', */ /* and TRANS = 'T' */ _starpu_dtrsm_("R", "U", "N", diag, m, &k, alpha, &a[k], &k, b, ldb); _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, b, ldb, &a[(k + 1) * k], &k, alpha, &b[k * b_dim1], ldb); _starpu_dtrsm_("R", "L", "T", diag, m, &k, &c_b27, a, &k, &b[ k * b_dim1], ldb); } } else { /* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'U' */ if (notrans) { /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', */ /* and TRANS = 'N' */ _starpu_dtrsm_("R", "U", "N", diag, m, &k, alpha, &a[(k + 1) * k], &k, b, ldb); _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, b, ldb, a, &k, alpha, &b[k * b_dim1], ldb); _starpu_dtrsm_("R", "L", "T", diag, m, &k, &c_b27, &a[k * k], &k, &b[k * b_dim1], ldb); } else { /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', */ /* and TRANS = 'T' */ _starpu_dtrsm_("R", "L", "N", diag, m, &k, alpha, &a[k * k], & k, &b[k * b_dim1], ldb); _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, &b[k * b_dim1], ldb, a, &k, alpha, b, ldb); _starpu_dtrsm_("R", "U", "T", diag, m, &k, &c_b27, &a[(k + 1) * k], &k, b, ldb); } } } } } return 0; /* End of DTFSM */ } /* _starpu_dtfsm_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtftri.c000066400000000000000000000325431507764646700206770ustar00rootroot00000000000000/* dtftri.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b13 = -1.; static doublereal c_b18 = 1.; /* Subroutine */ int _starpu_dtftri_(char *transr, char *uplo, char *diag, integer *n, doublereal *a, integer *info) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer k, n1, n2; logical normaltransr; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical lower; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nisodd; extern /* Subroutine */ int _starpu_dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTFTRI computes the inverse of a triangular matrix A stored in RFP */ /* format. */ /* This is a Level 3 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* TRANSR (input) CHARACTER */ /* = 'N': The Normal TRANSR of RFP A is stored; */ /* = 'T': The Transpose TRANSR of RFP A is stored. */ /* UPLO (input) CHARACTER */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* DIAG (input) CHARACTER */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (0:nt-1); */ /* nt=N*(N+1)/2. On entry, the triangular factor of a Hermitian */ /* Positive Definite matrix A in RFP format. RFP format is */ /* described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */ /* then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */ /* (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */ /* the transpose of RFP A as defined when */ /* TRANSR = 'N'. The contents of RFP A are defined by UPLO as */ /* follows: If UPLO = 'U' the RFP A contains the nt elements of */ /* upper packed A; If UPLO = 'L' the RFP A contains the nt */ /* elements of lower packed A. The LDA of RFP A is (N+1)/2 when */ /* TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is */ /* even and N is odd. See the Note below for more details. */ /* On exit, the (triangular) inverse of the original matrix, in */ /* the same storage format. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, A(i,i) is exactly zero. The triangular */ /* matrix is singular and its inverse can not be computed. */ /* Notes */ /* ===== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ *info = 0; normaltransr = _starpu_lsame_(transr, "N"); lower = _starpu_lsame_(uplo, "L"); if (! normaltransr && ! _starpu_lsame_(transr, "T")) { *info = -1; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -2; } else if (! _starpu_lsame_(diag, "N") && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTFTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* If N is odd, set NISODD = .TRUE. */ /* If N is even, set K = N/2 and NISODD = .FALSE. */ if (*n % 2 == 0) { k = *n / 2; nisodd = FALSE_; } else { nisodd = TRUE_; } /* Set N1 and N2 depending on LOWER */ if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } /* start execution: there are eight cases */ if (nisodd) { /* N is odd */ if (normaltransr) { /* N is odd and TRANSR = 'N' */ if (lower) { /* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) */ /* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) */ /* T1 -> a(0), T2 -> a(n), S -> a(n1) */ _starpu_dtrtri_("L", diag, &n1, a, n, info); if (*info > 0) { return 0; } _starpu_dtrmm_("R", "L", "N", diag, &n2, &n1, &c_b13, a, n, &a[n1], n); _starpu_dtrtri_("U", diag, &n2, &a[*n], n, info) ; if (*info > 0) { *info += n1; } if (*info > 0) { return 0; } _starpu_dtrmm_("L", "U", "T", diag, &n2, &n1, &c_b18, &a[*n], n, &a[ n1], n); } else { /* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) */ /* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) */ /* T1 -> a(n2), T2 -> a(n1), S -> a(0) */ _starpu_dtrtri_("L", diag, &n1, &a[n2], n, info) ; if (*info > 0) { return 0; } _starpu_dtrmm_("L", "L", "T", diag, &n1, &n2, &c_b13, &a[n2], n, a, n); _starpu_dtrtri_("U", diag, &n2, &a[n1], n, info) ; if (*info > 0) { *info += n1; } if (*info > 0) { return 0; } _starpu_dtrmm_("R", "U", "N", diag, &n1, &n2, &c_b18, &a[n1], n, a, n); } } else { /* N is odd and TRANSR = 'T' */ if (lower) { /* SRPA for LOWER, TRANSPOSE and N is odd */ /* T1 -> a(0), T2 -> a(1), S -> a(0+n1*n1) */ _starpu_dtrtri_("U", diag, &n1, a, &n1, info); if (*info > 0) { return 0; } _starpu_dtrmm_("L", "U", "N", diag, &n1, &n2, &c_b13, a, &n1, &a[n1 * n1], &n1); _starpu_dtrtri_("L", diag, &n2, &a[1], &n1, info); if (*info > 0) { *info += n1; } if (*info > 0) { return 0; } _starpu_dtrmm_("R", "L", "T", diag, &n1, &n2, &c_b18, &a[1], &n1, &a[ n1 * n1], &n1); } else { /* SRPA for UPPER, TRANSPOSE and N is odd */ /* T1 -> a(0+n2*n2), T2 -> a(0+n1*n2), S -> a(0) */ _starpu_dtrtri_("U", diag, &n1, &a[n2 * n2], &n2, info); if (*info > 0) { return 0; } _starpu_dtrmm_("R", "U", "T", diag, &n2, &n1, &c_b13, &a[n2 * n2], & n2, a, &n2); _starpu_dtrtri_("L", diag, &n2, &a[n1 * n2], &n2, info); if (*info > 0) { *info += n1; } if (*info > 0) { return 0; } _starpu_dtrmm_("L", "L", "N", diag, &n2, &n1, &c_b18, &a[n1 * n2], & n2, a, &n2); } } } else { /* N is even */ if (normaltransr) { /* N is even and TRANSR = 'N' */ if (lower) { /* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ /* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */ /* T1 -> a(1), T2 -> a(0), S -> a(k+1) */ i__1 = *n + 1; _starpu_dtrtri_("L", diag, &k, &a[1], &i__1, info); if (*info > 0) { return 0; } i__1 = *n + 1; i__2 = *n + 1; _starpu_dtrmm_("R", "L", "N", diag, &k, &k, &c_b13, &a[1], &i__1, &a[ k + 1], &i__2); i__1 = *n + 1; _starpu_dtrtri_("U", diag, &k, a, &i__1, info); if (*info > 0) { *info += k; } if (*info > 0) { return 0; } i__1 = *n + 1; i__2 = *n + 1; _starpu_dtrmm_("L", "U", "T", diag, &k, &k, &c_b18, a, &i__1, &a[k + 1], &i__2) ; } else { /* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ /* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */ /* T1 -> a(k+1), T2 -> a(k), S -> a(0) */ i__1 = *n + 1; _starpu_dtrtri_("L", diag, &k, &a[k + 1], &i__1, info); if (*info > 0) { return 0; } i__1 = *n + 1; i__2 = *n + 1; _starpu_dtrmm_("L", "L", "T", diag, &k, &k, &c_b13, &a[k + 1], &i__1, a, &i__2); i__1 = *n + 1; _starpu_dtrtri_("U", diag, &k, &a[k], &i__1, info); if (*info > 0) { *info += k; } if (*info > 0) { return 0; } i__1 = *n + 1; i__2 = *n + 1; _starpu_dtrmm_("R", "U", "N", diag, &k, &k, &c_b18, &a[k], &i__1, a, & i__2); } } else { /* N is even and TRANSR = 'T' */ if (lower) { /* SRPA for LOWER, TRANSPOSE and N is even (see paper) */ /* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) */ /* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */ _starpu_dtrtri_("U", diag, &k, &a[k], &k, info); if (*info > 0) { return 0; } _starpu_dtrmm_("L", "U", "N", diag, &k, &k, &c_b13, &a[k], &k, &a[k * (k + 1)], &k); _starpu_dtrtri_("L", diag, &k, a, &k, info); if (*info > 0) { *info += k; } if (*info > 0) { return 0; } _starpu_dtrmm_("R", "L", "T", diag, &k, &k, &c_b18, a, &k, &a[k * (k + 1)], &k) ; } else { /* SRPA for UPPER, TRANSPOSE and N is even (see paper) */ /* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) */ /* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */ _starpu_dtrtri_("U", diag, &k, &a[k * (k + 1)], &k, info); if (*info > 0) { return 0; } _starpu_dtrmm_("R", "U", "T", diag, &k, &k, &c_b13, &a[k * (k + 1)], & k, a, &k); _starpu_dtrtri_("L", diag, &k, &a[k * k], &k, info); if (*info > 0) { *info += k; } if (*info > 0) { return 0; } _starpu_dtrmm_("L", "L", "N", diag, &k, &k, &c_b18, &a[k * k], &k, a, &k); } } } return 0; /* End of DTFTRI */ } /* _starpu_dtftri_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtfttp.c000066400000000000000000000313541507764646700207070ustar00rootroot00000000000000/* dtfttp.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtfttp_(char *transr, char *uplo, integer *n, doublereal *arf, doublereal *ap, integer *info) { /* System generated locals */ integer i__1, i__2, i__3; /* Local variables */ integer i__, j, k, n1, n2, ij, jp, js, nt, lda, ijp; logical normaltransr; extern logical _starpu_lsame_(char *, char *); logical lower; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nisodd; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTFTTP copies a triangular matrix A from rectangular full packed */ /* format (TF) to standard packed format (TP). */ /* Arguments */ /* ========= */ /* TRANSR (input) CHARACTER */ /* = 'N': ARF is in Normal format; */ /* = 'T': ARF is in Transpose format; */ /* UPLO (input) CHARACTER */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* ARF (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ /* On entry, the upper or lower triangular matrix A stored in */ /* RFP format. For a further discussion see Notes below. */ /* AP (output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ /* On exit, the upper or lower triangular matrix A, packed */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Notes */ /* ===== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ *info = 0; normaltransr = _starpu_lsame_(transr, "N"); lower = _starpu_lsame_(uplo, "L"); if (! normaltransr && ! _starpu_lsame_(transr, "T")) { *info = -1; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTFTTP", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (normaltransr) { ap[0] = arf[0]; } else { ap[0] = arf[0]; } return 0; } /* Size of array ARF(0:NT-1) */ nt = *n * (*n + 1) / 2; /* Set N1 and N2 depending on LOWER */ if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } /* If N is odd, set NISODD = .TRUE. */ /* If N is even, set K = N/2 and NISODD = .FALSE. */ /* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) */ /* where noe = 0 if n is even, noe = 1 if n is odd */ if (*n % 2 == 0) { k = *n / 2; nisodd = FALSE_; lda = *n + 1; } else { nisodd = TRUE_; lda = *n; } /* ARF^C has lda rows and n+1-noe cols */ if (! normaltransr) { lda = (*n + 1) / 2; } /* start execution: there are eight cases */ if (nisodd) { /* N is odd */ if (normaltransr) { /* N is odd and TRANSR = 'N' */ if (lower) { /* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) */ /* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) */ /* T1 -> a(0), T2 -> a(n), S -> a(n1); lda = n */ ijp = 0; jp = 0; i__1 = n2; for (j = 0; j <= i__1; ++j) { i__2 = *n - 1; for (i__ = j; i__ <= i__2; ++i__) { ij = i__ + jp; ap[ijp] = arf[ij]; ++ijp; } jp += lda; } i__1 = n2 - 1; for (i__ = 0; i__ <= i__1; ++i__) { i__2 = n2; for (j = i__ + 1; j <= i__2; ++j) { ij = i__ + j * lda; ap[ijp] = arf[ij]; ++ijp; } } } else { /* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) */ /* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) */ /* T1 -> a(n2), T2 -> a(n1), S -> a(0) */ ijp = 0; i__1 = n1 - 1; for (j = 0; j <= i__1; ++j) { ij = n2 + j; i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { ap[ijp] = arf[ij]; ++ijp; ij += lda; } } js = 0; i__1 = *n - 1; for (j = n1; j <= i__1; ++j) { ij = js; i__2 = js + j; for (ij = js; ij <= i__2; ++ij) { ap[ijp] = arf[ij]; ++ijp; } js += lda; } } } else { /* N is odd and TRANSR = 'T' */ if (lower) { /* SRPA for LOWER, TRANSPOSE and N is odd */ /* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) */ /* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 */ ijp = 0; i__1 = n2; for (i__ = 0; i__ <= i__1; ++i__) { i__2 = *n * lda - 1; i__3 = lda; for (ij = i__ * (lda + 1); i__3 < 0 ? ij >= i__2 : ij <= i__2; ij += i__3) { ap[ijp] = arf[ij]; ++ijp; } } js = 1; i__1 = n2 - 1; for (j = 0; j <= i__1; ++j) { i__3 = js + n2 - j - 1; for (ij = js; ij <= i__3; ++ij) { ap[ijp] = arf[ij]; ++ijp; } js = js + lda + 1; } } else { /* SRPA for UPPER, TRANSPOSE and N is odd */ /* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) */ /* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 */ ijp = 0; js = n2 * lda; i__1 = n1 - 1; for (j = 0; j <= i__1; ++j) { i__3 = js + j; for (ij = js; ij <= i__3; ++ij) { ap[ijp] = arf[ij]; ++ijp; } js += lda; } i__1 = n1; for (i__ = 0; i__ <= i__1; ++i__) { i__3 = i__ + (n1 + i__) * lda; i__2 = lda; for (ij = i__; i__2 < 0 ? ij >= i__3 : ij <= i__3; ij += i__2) { ap[ijp] = arf[ij]; ++ijp; } } } } } else { /* N is even */ if (normaltransr) { /* N is even and TRANSR = 'N' */ if (lower) { /* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ /* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */ /* T1 -> a(1), T2 -> a(0), S -> a(k+1) */ ijp = 0; jp = 0; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = *n - 1; for (i__ = j; i__ <= i__2; ++i__) { ij = i__ + 1 + jp; ap[ijp] = arf[ij]; ++ijp; } jp += lda; } i__1 = k - 1; for (i__ = 0; i__ <= i__1; ++i__) { i__2 = k - 1; for (j = i__; j <= i__2; ++j) { ij = i__ + j * lda; ap[ijp] = arf[ij]; ++ijp; } } } else { /* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */ /* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */ /* T1 -> a(k+1), T2 -> a(k), S -> a(0) */ ijp = 0; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { ij = k + 1 + j; i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { ap[ijp] = arf[ij]; ++ijp; ij += lda; } } js = 0; i__1 = *n - 1; for (j = k; j <= i__1; ++j) { ij = js; i__2 = js + j; for (ij = js; ij <= i__2; ++ij) { ap[ijp] = arf[ij]; ++ijp; } js += lda; } } } else { /* N is even and TRANSR = 'T' */ if (lower) { /* SRPA for LOWER, TRANSPOSE and N is even (see paper) */ /* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) */ /* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */ ijp = 0; i__1 = k - 1; for (i__ = 0; i__ <= i__1; ++i__) { i__2 = (*n + 1) * lda - 1; i__3 = lda; for (ij = i__ + (i__ + 1) * lda; i__3 < 0 ? ij >= i__2 : ij <= i__2; ij += i__3) { ap[ijp] = arf[ij]; ++ijp; } } js = 0; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__3 = js + k - j - 1; for (ij = js; ij <= i__3; ++ij) { ap[ijp] = arf[ij]; ++ijp; } js = js + lda + 1; } } else { /* SRPA for UPPER, TRANSPOSE and N is even (see paper) */ /* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) */ /* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */ ijp = 0; js = (k + 1) * lda; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__3 = js + j; for (ij = js; ij <= i__3; ++ij) { ap[ijp] = arf[ij]; ++ijp; } js += lda; } i__1 = k - 1; for (i__ = 0; i__ <= i__1; ++i__) { i__3 = i__ + (k + i__) * lda; i__2 = lda; for (ij = i__; i__2 < 0 ? ij >= i__3 : ij <= i__3; ij += i__2) { ap[ijp] = arf[ij]; ++ijp; } } } } } return 0; /* End of DTFTTP */ } /* _starpu_dtfttp_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtfttr.c000066400000000000000000000300071507764646700207030ustar00rootroot00000000000000/* dtfttr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtfttr_(char *transr, char *uplo, integer *n, doublereal *arf, doublereal *a, integer *lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, k, l, n1, n2, ij, nt, nx2, np1x2; logical normaltransr; extern logical _starpu_lsame_(char *, char *); logical lower; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nisodd; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTFTTR copies a triangular matrix A from rectangular full packed */ /* format (TF) to standard full format (TR). */ /* Arguments */ /* ========= */ /* TRANSR (input) CHARACTER */ /* = 'N': ARF is in Normal format; */ /* = 'T': ARF is in Transpose format. */ /* UPLO (input) CHARACTER */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* N (input) INTEGER */ /* The order of the matrices ARF and A. N >= 0. */ /* ARF (input) DOUBLE PRECISION array, dimension (N*(N+1)/2). */ /* On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L') */ /* matrix A in RFP format. See the "Notes" below for more */ /* details. */ /* A (output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On exit, the triangular matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of the array A contains */ /* the upper triangular matrix, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of the array A contains */ /* the lower triangular matrix, and the strictly upper */ /* triangular part of A is not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Notes */ /* ===== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* Reference */ /* ========= */ /* ===================================================================== */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda - 1 - 0 + 1; a_offset = 0 + a_dim1 * 0; a -= a_offset; /* Function Body */ *info = 0; normaltransr = _starpu_lsame_(transr, "N"); lower = _starpu_lsame_(uplo, "L"); if (! normaltransr && ! _starpu_lsame_(transr, "T")) { *info = -1; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTFTTR", &i__1); return 0; } /* Quick return if possible */ if (*n <= 1) { if (*n == 1) { a[0] = arf[0]; } return 0; } /* Size of array ARF(0:nt-1) */ nt = *n * (*n + 1) / 2; /* set N1 and N2 depending on LOWER: for N even N1=N2=K */ if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } /* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. */ /* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is */ /* N--by--(N+1)/2. */ if (*n % 2 == 0) { k = *n / 2; nisodd = FALSE_; if (! lower) { np1x2 = *n + *n + 2; } } else { nisodd = TRUE_; if (! lower) { nx2 = *n + *n; } } if (nisodd) { /* N is odd */ if (normaltransr) { /* N is odd and TRANSR = 'N' */ if (lower) { /* N is odd, TRANSR = 'N', and UPLO = 'L' */ ij = 0; i__1 = n2; for (j = 0; j <= i__1; ++j) { i__2 = n2 + j; for (i__ = n1; i__ <= i__2; ++i__) { a[n2 + j + i__ * a_dim1] = arf[ij]; ++ij; } i__2 = *n - 1; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = arf[ij]; ++ij; } } } else { /* N is odd, TRANSR = 'N', and UPLO = 'U' */ ij = nt - *n; i__1 = n1; for (j = *n - 1; j >= i__1; --j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = arf[ij]; ++ij; } i__2 = n1 - 1; for (l = j - n1; l <= i__2; ++l) { a[j - n1 + l * a_dim1] = arf[ij]; ++ij; } ij -= nx2; } } } else { /* N is odd and TRANSR = 'T' */ if (lower) { /* N is odd, TRANSR = 'T', and UPLO = 'L' */ ij = 0; i__1 = n2 - 1; for (j = 0; j <= i__1; ++j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { a[j + i__ * a_dim1] = arf[ij]; ++ij; } i__2 = *n - 1; for (i__ = n1 + j; i__ <= i__2; ++i__) { a[i__ + (n1 + j) * a_dim1] = arf[ij]; ++ij; } } i__1 = *n - 1; for (j = n2; j <= i__1; ++j) { i__2 = n1 - 1; for (i__ = 0; i__ <= i__2; ++i__) { a[j + i__ * a_dim1] = arf[ij]; ++ij; } } } else { /* N is odd, TRANSR = 'T', and UPLO = 'U' */ ij = 0; i__1 = n1; for (j = 0; j <= i__1; ++j) { i__2 = *n - 1; for (i__ = n1; i__ <= i__2; ++i__) { a[j + i__ * a_dim1] = arf[ij]; ++ij; } } i__1 = n1 - 1; for (j = 0; j <= i__1; ++j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = arf[ij]; ++ij; } i__2 = *n - 1; for (l = n2 + j; l <= i__2; ++l) { a[n2 + j + l * a_dim1] = arf[ij]; ++ij; } } } } } else { /* N is even */ if (normaltransr) { /* N is even and TRANSR = 'N' */ if (lower) { /* N is even, TRANSR = 'N', and UPLO = 'L' */ ij = 0; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = k + j; for (i__ = k; i__ <= i__2; ++i__) { a[k + j + i__ * a_dim1] = arf[ij]; ++ij; } i__2 = *n - 1; for (i__ = j; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = arf[ij]; ++ij; } } } else { /* N is even, TRANSR = 'N', and UPLO = 'U' */ ij = nt - *n - 1; i__1 = k; for (j = *n - 1; j >= i__1; --j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = arf[ij]; ++ij; } i__2 = k - 1; for (l = j - k; l <= i__2; ++l) { a[j - k + l * a_dim1] = arf[ij]; ++ij; } ij -= np1x2; } } } else { /* N is even and TRANSR = 'T' */ if (lower) { /* N is even, TRANSR = 'T', and UPLO = 'L' */ ij = 0; j = k; i__1 = *n - 1; for (i__ = k; i__ <= i__1; ++i__) { a[i__ + j * a_dim1] = arf[ij]; ++ij; } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { a[j + i__ * a_dim1] = arf[ij]; ++ij; } i__2 = *n - 1; for (i__ = k + 1 + j; i__ <= i__2; ++i__) { a[i__ + (k + 1 + j) * a_dim1] = arf[ij]; ++ij; } } i__1 = *n - 1; for (j = k - 1; j <= i__1; ++j) { i__2 = k - 1; for (i__ = 0; i__ <= i__2; ++i__) { a[j + i__ * a_dim1] = arf[ij]; ++ij; } } } else { /* N is even, TRANSR = 'T', and UPLO = 'U' */ ij = 0; i__1 = k; for (j = 0; j <= i__1; ++j) { i__2 = *n - 1; for (i__ = k; i__ <= i__2; ++i__) { a[j + i__ * a_dim1] = arf[ij]; ++ij; } } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { a[i__ + j * a_dim1] = arf[ij]; ++ij; } i__2 = *n - 1; for (l = k + 1 + j; l <= i__2; ++l) { a[k + 1 + j + l * a_dim1] = arf[ij]; ++ij; } } /* Note that here, on exit of the loop, J = K-1 */ i__1 = j; for (i__ = 0; i__ <= i__1; ++i__) { a[i__ + j * a_dim1] = arf[ij]; ++ij; } } } } return 0; /* End of DTFTTR */ } /* _starpu_dtfttr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtgevc.c000066400000000000000000001160071507764646700206550ustar00rootroot00000000000000/* dtgevc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static logical c_true = TRUE_; static integer c__2 = 2; static doublereal c_b34 = 1.; static integer c__1 = 1; static doublereal c_b36 = 0.; static logical c_false = FALSE_; /* Subroutine */ int _starpu_dtgevc_(char *side, char *howmny, logical *select, integer *n, doublereal *s, integer *lds, doublereal *p, integer *ldp, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, doublereal *work, integer *info) { /* System generated locals */ integer p_dim1, p_offset, s_dim1, s_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Local variables */ integer i__, j, ja, jc, je, na, im, jr, jw, nw; doublereal big; logical lsa, lsb; doublereal ulp, sum[4] /* was [2][2] */; integer ibeg, ieig, iend; doublereal dmin__, temp, xmax, sump[4] /* was [2][2] */, sums[4] /* was [2][2] */; extern /* Subroutine */ int _starpu_dlag2_(doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal cim2a, cim2b, cre2a, cre2b, temp2, bdiag[2], acoef, scale; logical ilall; integer iside; doublereal sbeta; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical il2by2; integer iinfo; doublereal small; logical compl; doublereal anorm, bnorm; logical compr; extern /* Subroutine */ int _starpu_dlaln2_(logical *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal * , doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal temp2i; extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *); doublereal temp2r; logical ilabad, ilbbad; doublereal acoefa, bcoefa, cimaga, cimagb; logical ilback; doublereal bcoefi, ascale, bscale, creala, crealb; extern doublereal _starpu_dlamch_(char *); doublereal bcoefr, salfar, safmin; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal xscale, bignum; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical ilcomp, ilcplx; integer ihwmny; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTGEVC computes some or all of the right and/or left eigenvectors of */ /* a pair of real matrices (S,P), where S is a quasi-triangular matrix */ /* and P is upper triangular. Matrix pairs of this type are produced by */ /* the generalized Schur factorization of a matrix pair (A,B): */ /* A = Q*S*Z**T, B = Q*P*Z**T */ /* as computed by DGGHRD + DHGEQZ. */ /* The right eigenvector x and the left eigenvector y of (S,P) */ /* corresponding to an eigenvalue w are defined by: */ /* S*x = w*P*x, (y**H)*S = w*(y**H)*P, */ /* where y**H denotes the conjugate tranpose of y. */ /* The eigenvalues are not input to this routine, but are computed */ /* directly from the diagonal blocks of S and P. */ /* This routine returns the matrices X and/or Y of right and left */ /* eigenvectors of (S,P), or the products Z*X and/or Q*Y, */ /* where Z and Q are input matrices. */ /* If Q and Z are the orthogonal factors from the generalized Schur */ /* factorization of a matrix pair (A,B), then Z*X and Q*Y */ /* are the matrices of right and left eigenvectors of (A,B). */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'R': compute right eigenvectors only; */ /* = 'L': compute left eigenvectors only; */ /* = 'B': compute both right and left eigenvectors. */ /* HOWMNY (input) CHARACTER*1 */ /* = 'A': compute all right and/or left eigenvectors; */ /* = 'B': compute all right and/or left eigenvectors, */ /* backtransformed by the matrices in VR and/or VL; */ /* = 'S': compute selected right and/or left eigenvectors, */ /* specified by the logical array SELECT. */ /* SELECT (input) LOGICAL array, dimension (N) */ /* If HOWMNY='S', SELECT specifies the eigenvectors to be */ /* computed. If w(j) is a real eigenvalue, the corresponding */ /* real eigenvector is computed if SELECT(j) is .TRUE.. */ /* If w(j) and w(j+1) are the real and imaginary parts of a */ /* complex eigenvalue, the corresponding complex eigenvector */ /* is computed if either SELECT(j) or SELECT(j+1) is .TRUE., */ /* and on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is */ /* set to .FALSE.. */ /* Not referenced if HOWMNY = 'A' or 'B'. */ /* N (input) INTEGER */ /* The order of the matrices S and P. N >= 0. */ /* S (input) DOUBLE PRECISION array, dimension (LDS,N) */ /* The upper quasi-triangular matrix S from a generalized Schur */ /* factorization, as computed by DHGEQZ. */ /* LDS (input) INTEGER */ /* The leading dimension of array S. LDS >= max(1,N). */ /* P (input) DOUBLE PRECISION array, dimension (LDP,N) */ /* The upper triangular matrix P from a generalized Schur */ /* factorization, as computed by DHGEQZ. */ /* 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks */ /* of S must be in positive diagonal form. */ /* LDP (input) INTEGER */ /* The leading dimension of array P. LDP >= max(1,N). */ /* VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) */ /* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must */ /* contain an N-by-N matrix Q (usually the orthogonal matrix Q */ /* of left Schur vectors returned by DHGEQZ). */ /* On exit, if SIDE = 'L' or 'B', VL contains: */ /* if HOWMNY = 'A', the matrix Y of left eigenvectors of (S,P); */ /* if HOWMNY = 'B', the matrix Q*Y; */ /* if HOWMNY = 'S', the left eigenvectors of (S,P) specified by */ /* SELECT, stored consecutively in the columns of */ /* VL, in the same order as their eigenvalues. */ /* A complex eigenvector corresponding to a complex eigenvalue */ /* is stored in two consecutive columns, the first holding the */ /* real part, and the second the imaginary part. */ /* Not referenced if SIDE = 'R'. */ /* LDVL (input) INTEGER */ /* The leading dimension of array VL. LDVL >= 1, and if */ /* SIDE = 'L' or 'B', LDVL >= N. */ /* VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) */ /* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must */ /* contain an N-by-N matrix Z (usually the orthogonal matrix Z */ /* of right Schur vectors returned by DHGEQZ). */ /* On exit, if SIDE = 'R' or 'B', VR contains: */ /* if HOWMNY = 'A', the matrix X of right eigenvectors of (S,P); */ /* if HOWMNY = 'B' or 'b', the matrix Z*X; */ /* if HOWMNY = 'S' or 's', the right eigenvectors of (S,P) */ /* specified by SELECT, stored consecutively in the */ /* columns of VR, in the same order as their */ /* eigenvalues. */ /* A complex eigenvector corresponding to a complex eigenvalue */ /* is stored in two consecutive columns, the first holding the */ /* real part and the second the imaginary part. */ /* Not referenced if SIDE = 'L'. */ /* LDVR (input) INTEGER */ /* The leading dimension of the array VR. LDVR >= 1, and if */ /* SIDE = 'R' or 'B', LDVR >= N. */ /* MM (input) INTEGER */ /* The number of columns in the arrays VL and/or VR. MM >= M. */ /* M (output) INTEGER */ /* The number of columns in the arrays VL and/or VR actually */ /* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M */ /* is set to N. Each selected real eigenvector occupies one */ /* column and each selected complex eigenvector occupies two */ /* columns. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: the 2-by-2 block (INFO:INFO+1) does not have a complex */ /* eigenvalue. */ /* Further Details */ /* =============== */ /* Allocation of workspace: */ /* ---------- -- --------- */ /* WORK( j ) = 1-norm of j-th column of A, above the diagonal */ /* WORK( N+j ) = 1-norm of j-th column of B, above the diagonal */ /* WORK( 2*N+1:3*N ) = real part of eigenvector */ /* WORK( 3*N+1:4*N ) = imaginary part of eigenvector */ /* WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector */ /* WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector */ /* Rowwise vs. columnwise solution methods: */ /* ------- -- ---------- -------- ------- */ /* Finding a generalized eigenvector consists basically of solving the */ /* singular triangular system */ /* (A - w B) x = 0 (for right) or: (A - w B)**H y = 0 (for left) */ /* Consider finding the i-th right eigenvector (assume all eigenvalues */ /* are real). The equation to be solved is: */ /* n i */ /* 0 = sum C(j,k) v(k) = sum C(j,k) v(k) for j = i,. . .,1 */ /* k=j k=j */ /* where C = (A - w B) (The components v(i+1:n) are 0.) */ /* The "rowwise" method is: */ /* (1) v(i) := 1 */ /* for j = i-1,. . .,1: */ /* i */ /* (2) compute s = - sum C(j,k) v(k) and */ /* k=j+1 */ /* (3) v(j) := s / C(j,j) */ /* Step 2 is sometimes called the "dot product" step, since it is an */ /* inner product between the j-th row and the portion of the eigenvector */ /* that has been computed so far. */ /* The "columnwise" method consists basically in doing the sums */ /* for all the rows in parallel. As each v(j) is computed, the */ /* contribution of v(j) times the j-th column of C is added to the */ /* partial sums. Since FORTRAN arrays are stored columnwise, this has */ /* the advantage that at each step, the elements of C that are accessed */ /* are adjacent to one another, whereas with the rowwise method, the */ /* elements accessed at a step are spaced LDS (and LDP) words apart. */ /* When finding left eigenvectors, the matrix in question is the */ /* transpose of the one in storage, so the rowwise method then */ /* actually accesses columns of A and B at each step, and so is the */ /* preferred method. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and Test the input parameters */ /* Parameter adjustments */ --select; s_dim1 = *lds; s_offset = 1 + s_dim1; s -= s_offset; p_dim1 = *ldp; p_offset = 1 + p_dim1; p -= p_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; /* Function Body */ if (_starpu_lsame_(howmny, "A")) { ihwmny = 1; ilall = TRUE_; ilback = FALSE_; } else if (_starpu_lsame_(howmny, "S")) { ihwmny = 2; ilall = FALSE_; ilback = FALSE_; } else if (_starpu_lsame_(howmny, "B")) { ihwmny = 3; ilall = TRUE_; ilback = TRUE_; } else { ihwmny = -1; ilall = TRUE_; } if (_starpu_lsame_(side, "R")) { iside = 1; compl = FALSE_; compr = TRUE_; } else if (_starpu_lsame_(side, "L")) { iside = 2; compl = TRUE_; compr = FALSE_; } else if (_starpu_lsame_(side, "B")) { iside = 3; compl = TRUE_; compr = TRUE_; } else { iside = -1; } *info = 0; if (iside < 0) { *info = -1; } else if (ihwmny < 0) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*lds < max(1,*n)) { *info = -6; } else if (*ldp < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTGEVC", &i__1); return 0; } /* Count the number of eigenvectors to be computed */ if (! ilall) { im = 0; ilcplx = FALSE_; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (ilcplx) { ilcplx = FALSE_; goto L10; } if (j < *n) { if (s[j + 1 + j * s_dim1] != 0.) { ilcplx = TRUE_; } } if (ilcplx) { if (select[j] || select[j + 1]) { im += 2; } } else { if (select[j]) { ++im; } } L10: ; } } else { im = *n; } /* Check 2-by-2 diagonal blocks of A, B */ ilabad = FALSE_; ilbbad = FALSE_; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { if (s[j + 1 + j * s_dim1] != 0.) { if (p[j + j * p_dim1] == 0. || p[j + 1 + (j + 1) * p_dim1] == 0. || p[j + (j + 1) * p_dim1] != 0.) { ilbbad = TRUE_; } if (j < *n - 1) { if (s[j + 2 + (j + 1) * s_dim1] != 0.) { ilabad = TRUE_; } } } /* L20: */ } if (ilabad) { *info = -5; } else if (ilbbad) { *info = -7; } else if (compl && *ldvl < *n || *ldvl < 1) { *info = -10; } else if (compr && *ldvr < *n || *ldvr < 1) { *info = -12; } else if (*mm < im) { *info = -13; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTGEVC", &i__1); return 0; } /* Quick return if possible */ *m = im; if (*n == 0) { return 0; } /* Machine Constants */ safmin = _starpu_dlamch_("Safe minimum"); big = 1. / safmin; _starpu_dlabad_(&safmin, &big); ulp = _starpu_dlamch_("Epsilon") * _starpu_dlamch_("Base"); small = safmin * *n / ulp; big = 1. / small; bignum = 1. / (safmin * *n); /* Compute the 1-norm of each column of the strictly upper triangular */ /* part (i.e., excluding all elements belonging to the diagonal */ /* blocks) of A and B to check for possible overflow in the */ /* triangular solver. */ anorm = (d__1 = s[s_dim1 + 1], abs(d__1)); if (*n > 1) { anorm += (d__1 = s[s_dim1 + 2], abs(d__1)); } bnorm = (d__1 = p[p_dim1 + 1], abs(d__1)); work[1] = 0.; work[*n + 1] = 0.; i__1 = *n; for (j = 2; j <= i__1; ++j) { temp = 0.; temp2 = 0.; if (s[j + (j - 1) * s_dim1] == 0.) { iend = j - 1; } else { iend = j - 2; } i__2 = iend; for (i__ = 1; i__ <= i__2; ++i__) { temp += (d__1 = s[i__ + j * s_dim1], abs(d__1)); temp2 += (d__1 = p[i__ + j * p_dim1], abs(d__1)); /* L30: */ } work[j] = temp; work[*n + j] = temp2; /* Computing MIN */ i__3 = j + 1; i__2 = min(i__3,*n); for (i__ = iend + 1; i__ <= i__2; ++i__) { temp += (d__1 = s[i__ + j * s_dim1], abs(d__1)); temp2 += (d__1 = p[i__ + j * p_dim1], abs(d__1)); /* L40: */ } anorm = max(anorm,temp); bnorm = max(bnorm,temp2); /* L50: */ } ascale = 1. / max(anorm,safmin); bscale = 1. / max(bnorm,safmin); /* Left eigenvectors */ if (compl) { ieig = 0; /* Main loop over eigenvalues */ ilcplx = FALSE_; i__1 = *n; for (je = 1; je <= i__1; ++je) { /* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or */ /* (b) this would be the second of a complex pair. */ /* Check for complex eigenvalue, so as to be sure of which */ /* entry(-ies) of SELECT to look at. */ if (ilcplx) { ilcplx = FALSE_; goto L220; } nw = 1; if (je < *n) { if (s[je + 1 + je * s_dim1] != 0.) { ilcplx = TRUE_; nw = 2; } } if (ilall) { ilcomp = TRUE_; } else if (ilcplx) { ilcomp = select[je] || select[je + 1]; } else { ilcomp = select[je]; } if (! ilcomp) { goto L220; } /* Decide if (a) singular pencil, (b) real eigenvalue, or */ /* (c) complex eigenvalue. */ if (! ilcplx) { if ((d__1 = s[je + je * s_dim1], abs(d__1)) <= safmin && ( d__2 = p[je + je * p_dim1], abs(d__2)) <= safmin) { /* Singular matrix pencil -- return unit eigenvector */ ++ieig; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vl[jr + ieig * vl_dim1] = 0.; /* L60: */ } vl[ieig + ieig * vl_dim1] = 1.; goto L220; } } /* Clear vector */ i__2 = nw * *n; for (jr = 1; jr <= i__2; ++jr) { work[(*n << 1) + jr] = 0.; /* L70: */ } /* T */ /* Compute coefficients in ( a A - b B ) y = 0 */ /* a is ACOEF */ /* b is BCOEFR + i*BCOEFI */ if (! ilcplx) { /* Real eigenvalue */ /* Computing MAX */ d__3 = (d__1 = s[je + je * s_dim1], abs(d__1)) * ascale, d__4 = (d__2 = p[je + je * p_dim1], abs(d__2)) * bscale, d__3 = max(d__3,d__4); temp = 1. / max(d__3,safmin); salfar = temp * s[je + je * s_dim1] * ascale; sbeta = temp * p[je + je * p_dim1] * bscale; acoef = sbeta * ascale; bcoefr = salfar * bscale; bcoefi = 0.; /* Scale to avoid underflow */ scale = 1.; lsa = abs(sbeta) >= safmin && abs(acoef) < small; lsb = abs(salfar) >= safmin && abs(bcoefr) < small; if (lsa) { scale = small / abs(sbeta) * min(anorm,big); } if (lsb) { /* Computing MAX */ d__1 = scale, d__2 = small / abs(salfar) * min(bnorm,big); scale = max(d__1,d__2); } if (lsa || lsb) { /* Computing MIN */ /* Computing MAX */ d__3 = 1., d__4 = abs(acoef), d__3 = max(d__3,d__4), d__4 = abs(bcoefr); d__1 = scale, d__2 = 1. / (safmin * max(d__3,d__4)); scale = min(d__1,d__2); if (lsa) { acoef = ascale * (scale * sbeta); } else { acoef = scale * acoef; } if (lsb) { bcoefr = bscale * (scale * salfar); } else { bcoefr = scale * bcoefr; } } acoefa = abs(acoef); bcoefa = abs(bcoefr); /* First component is 1 */ work[(*n << 1) + je] = 1.; xmax = 1.; } else { /* Complex eigenvalue */ d__1 = safmin * 100.; _starpu_dlag2_(&s[je + je * s_dim1], lds, &p[je + je * p_dim1], ldp, & d__1, &acoef, &temp, &bcoefr, &temp2, &bcoefi); bcoefi = -bcoefi; if (bcoefi == 0.) { *info = je; return 0; } /* Scale to avoid over/underflow */ acoefa = abs(acoef); bcoefa = abs(bcoefr) + abs(bcoefi); scale = 1.; if (acoefa * ulp < safmin && acoefa >= safmin) { scale = safmin / ulp / acoefa; } if (bcoefa * ulp < safmin && bcoefa >= safmin) { /* Computing MAX */ d__1 = scale, d__2 = safmin / ulp / bcoefa; scale = max(d__1,d__2); } if (safmin * acoefa > ascale) { scale = ascale / (safmin * acoefa); } if (safmin * bcoefa > bscale) { /* Computing MIN */ d__1 = scale, d__2 = bscale / (safmin * bcoefa); scale = min(d__1,d__2); } if (scale != 1.) { acoef = scale * acoef; acoefa = abs(acoef); bcoefr = scale * bcoefr; bcoefi = scale * bcoefi; bcoefa = abs(bcoefr) + abs(bcoefi); } /* Compute first two components of eigenvector */ temp = acoef * s[je + 1 + je * s_dim1]; temp2r = acoef * s[je + je * s_dim1] - bcoefr * p[je + je * p_dim1]; temp2i = -bcoefi * p[je + je * p_dim1]; if (abs(temp) > abs(temp2r) + abs(temp2i)) { work[(*n << 1) + je] = 1.; work[*n * 3 + je] = 0.; work[(*n << 1) + je + 1] = -temp2r / temp; work[*n * 3 + je + 1] = -temp2i / temp; } else { work[(*n << 1) + je + 1] = 1.; work[*n * 3 + je + 1] = 0.; temp = acoef * s[je + (je + 1) * s_dim1]; work[(*n << 1) + je] = (bcoefr * p[je + 1 + (je + 1) * p_dim1] - acoef * s[je + 1 + (je + 1) * s_dim1]) / temp; work[*n * 3 + je] = bcoefi * p[je + 1 + (je + 1) * p_dim1] / temp; } /* Computing MAX */ d__5 = (d__1 = work[(*n << 1) + je], abs(d__1)) + (d__2 = work[*n * 3 + je], abs(d__2)), d__6 = (d__3 = work[(* n << 1) + je + 1], abs(d__3)) + (d__4 = work[*n * 3 + je + 1], abs(d__4)); xmax = max(d__5,d__6); } /* Computing MAX */ d__1 = ulp * acoefa * anorm, d__2 = ulp * bcoefa * bnorm, d__1 = max(d__1,d__2); dmin__ = max(d__1,safmin); /* T */ /* Triangular solve of (a A - b B) y = 0 */ /* T */ /* (rowwise in (a A - b B) , or columnwise in (a A - b B) ) */ il2by2 = FALSE_; i__2 = *n; for (j = je + nw; j <= i__2; ++j) { if (il2by2) { il2by2 = FALSE_; goto L160; } na = 1; bdiag[0] = p[j + j * p_dim1]; if (j < *n) { if (s[j + 1 + j * s_dim1] != 0.) { il2by2 = TRUE_; bdiag[1] = p[j + 1 + (j + 1) * p_dim1]; na = 2; } } /* Check whether scaling is necessary for dot products */ xscale = 1. / max(1.,xmax); /* Computing MAX */ d__1 = work[j], d__2 = work[*n + j], d__1 = max(d__1,d__2), d__2 = acoefa * work[j] + bcoefa * work[*n + j]; temp = max(d__1,d__2); if (il2by2) { /* Computing MAX */ d__1 = temp, d__2 = work[j + 1], d__1 = max(d__1,d__2), d__2 = work[*n + j + 1], d__1 = max(d__1,d__2), d__2 = acoefa * work[j + 1] + bcoefa * work[*n + j + 1]; temp = max(d__1,d__2); } if (temp > bignum * xscale) { i__3 = nw - 1; for (jw = 0; jw <= i__3; ++jw) { i__4 = j - 1; for (jr = je; jr <= i__4; ++jr) { work[(jw + 2) * *n + jr] = xscale * work[(jw + 2) * *n + jr]; /* L80: */ } /* L90: */ } xmax *= xscale; } /* Compute dot products */ /* j-1 */ /* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) */ /* k=je */ /* To reduce the op count, this is done as */ /* _ j-1 _ j-1 */ /* a*conjg( sum S(k,j)*x(k) ) - b*conjg( sum P(k,j)*x(k) ) */ /* k=je k=je */ /* which may cause underflow problems if A or B are close */ /* to underflow. (E.g., less than SMALL.) */ /* A series of compiler directives to defeat vectorization */ /* for the next loop */ /* $PL$ CMCHAR=' ' */ /* DIR$ NEXTSCALAR */ /* $DIR SCALAR */ /* DIR$ NEXT SCALAR */ /* VD$L NOVECTOR */ /* DEC$ NOVECTOR */ /* VD$ NOVECTOR */ /* VDIR NOVECTOR */ /* VOCL LOOP,SCALAR */ /* IBM PREFER SCALAR */ /* $PL$ CMCHAR='*' */ i__3 = nw; for (jw = 1; jw <= i__3; ++jw) { /* $PL$ CMCHAR=' ' */ /* DIR$ NEXTSCALAR */ /* $DIR SCALAR */ /* DIR$ NEXT SCALAR */ /* VD$L NOVECTOR */ /* DEC$ NOVECTOR */ /* VD$ NOVECTOR */ /* VDIR NOVECTOR */ /* VOCL LOOP,SCALAR */ /* IBM PREFER SCALAR */ /* $PL$ CMCHAR='*' */ i__4 = na; for (ja = 1; ja <= i__4; ++ja) { sums[ja + (jw << 1) - 3] = 0.; sump[ja + (jw << 1) - 3] = 0.; i__5 = j - 1; for (jr = je; jr <= i__5; ++jr) { sums[ja + (jw << 1) - 3] += s[jr + (j + ja - 1) * s_dim1] * work[(jw + 1) * *n + jr]; sump[ja + (jw << 1) - 3] += p[jr + (j + ja - 1) * p_dim1] * work[(jw + 1) * *n + jr]; /* L100: */ } /* L110: */ } /* L120: */ } /* $PL$ CMCHAR=' ' */ /* DIR$ NEXTSCALAR */ /* $DIR SCALAR */ /* DIR$ NEXT SCALAR */ /* VD$L NOVECTOR */ /* DEC$ NOVECTOR */ /* VD$ NOVECTOR */ /* VDIR NOVECTOR */ /* VOCL LOOP,SCALAR */ /* IBM PREFER SCALAR */ /* $PL$ CMCHAR='*' */ i__3 = na; for (ja = 1; ja <= i__3; ++ja) { if (ilcplx) { sum[ja - 1] = -acoef * sums[ja - 1] + bcoefr * sump[ ja - 1] - bcoefi * sump[ja + 1]; sum[ja + 1] = -acoef * sums[ja + 1] + bcoefr * sump[ ja + 1] + bcoefi * sump[ja - 1]; } else { sum[ja - 1] = -acoef * sums[ja - 1] + bcoefr * sump[ ja - 1]; } /* L130: */ } /* T */ /* Solve ( a A - b B ) y = SUM(,) */ /* with scaling and perturbation of the denominator */ _starpu_dlaln2_(&c_true, &na, &nw, &dmin__, &acoef, &s[j + j * s_dim1] , lds, bdiag, &bdiag[1], sum, &c__2, &bcoefr, &bcoefi, &work[(*n << 1) + j], n, &scale, &temp, &iinfo); if (scale < 1.) { i__3 = nw - 1; for (jw = 0; jw <= i__3; ++jw) { i__4 = j - 1; for (jr = je; jr <= i__4; ++jr) { work[(jw + 2) * *n + jr] = scale * work[(jw + 2) * *n + jr]; /* L140: */ } /* L150: */ } xmax = scale * xmax; } xmax = max(xmax,temp); L160: ; } /* Copy eigenvector to VL, back transforming if */ /* HOWMNY='B'. */ ++ieig; if (ilback) { i__2 = nw - 1; for (jw = 0; jw <= i__2; ++jw) { i__3 = *n + 1 - je; _starpu_dgemv_("N", n, &i__3, &c_b34, &vl[je * vl_dim1 + 1], ldvl, &work[(jw + 2) * *n + je], &c__1, &c_b36, &work[( jw + 4) * *n + 1], &c__1); /* L170: */ } _starpu_dlacpy_(" ", n, &nw, &work[(*n << 2) + 1], n, &vl[je * vl_dim1 + 1], ldvl); ibeg = 1; } else { _starpu_dlacpy_(" ", n, &nw, &work[(*n << 1) + 1], n, &vl[ieig * vl_dim1 + 1], ldvl); ibeg = je; } /* Scale eigenvector */ xmax = 0.; if (ilcplx) { i__2 = *n; for (j = ibeg; j <= i__2; ++j) { /* Computing MAX */ d__3 = xmax, d__4 = (d__1 = vl[j + ieig * vl_dim1], abs( d__1)) + (d__2 = vl[j + (ieig + 1) * vl_dim1], abs(d__2)); xmax = max(d__3,d__4); /* L180: */ } } else { i__2 = *n; for (j = ibeg; j <= i__2; ++j) { /* Computing MAX */ d__2 = xmax, d__3 = (d__1 = vl[j + ieig * vl_dim1], abs( d__1)); xmax = max(d__2,d__3); /* L190: */ } } if (xmax > safmin) { xscale = 1. / xmax; i__2 = nw - 1; for (jw = 0; jw <= i__2; ++jw) { i__3 = *n; for (jr = ibeg; jr <= i__3; ++jr) { vl[jr + (ieig + jw) * vl_dim1] = xscale * vl[jr + ( ieig + jw) * vl_dim1]; /* L200: */ } /* L210: */ } } ieig = ieig + nw - 1; L220: ; } } /* Right eigenvectors */ if (compr) { ieig = im + 1; /* Main loop over eigenvalues */ ilcplx = FALSE_; for (je = *n; je >= 1; --je) { /* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or */ /* (b) this would be the second of a complex pair. */ /* Check for complex eigenvalue, so as to be sure of which */ /* entry(-ies) of SELECT to look at -- if complex, SELECT(JE) */ /* or SELECT(JE-1). */ /* If this is a complex pair, the 2-by-2 diagonal block */ /* corresponding to the eigenvalue is in rows/columns JE-1:JE */ if (ilcplx) { ilcplx = FALSE_; goto L500; } nw = 1; if (je > 1) { if (s[je + (je - 1) * s_dim1] != 0.) { ilcplx = TRUE_; nw = 2; } } if (ilall) { ilcomp = TRUE_; } else if (ilcplx) { ilcomp = select[je] || select[je - 1]; } else { ilcomp = select[je]; } if (! ilcomp) { goto L500; } /* Decide if (a) singular pencil, (b) real eigenvalue, or */ /* (c) complex eigenvalue. */ if (! ilcplx) { if ((d__1 = s[je + je * s_dim1], abs(d__1)) <= safmin && ( d__2 = p[je + je * p_dim1], abs(d__2)) <= safmin) { /* Singular matrix pencil -- unit eigenvector */ --ieig; i__1 = *n; for (jr = 1; jr <= i__1; ++jr) { vr[jr + ieig * vr_dim1] = 0.; /* L230: */ } vr[ieig + ieig * vr_dim1] = 1.; goto L500; } } /* Clear vector */ i__1 = nw - 1; for (jw = 0; jw <= i__1; ++jw) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { work[(jw + 2) * *n + jr] = 0.; /* L240: */ } /* L250: */ } /* Compute coefficients in ( a A - b B ) x = 0 */ /* a is ACOEF */ /* b is BCOEFR + i*BCOEFI */ if (! ilcplx) { /* Real eigenvalue */ /* Computing MAX */ d__3 = (d__1 = s[je + je * s_dim1], abs(d__1)) * ascale, d__4 = (d__2 = p[je + je * p_dim1], abs(d__2)) * bscale, d__3 = max(d__3,d__4); temp = 1. / max(d__3,safmin); salfar = temp * s[je + je * s_dim1] * ascale; sbeta = temp * p[je + je * p_dim1] * bscale; acoef = sbeta * ascale; bcoefr = salfar * bscale; bcoefi = 0.; /* Scale to avoid underflow */ scale = 1.; lsa = abs(sbeta) >= safmin && abs(acoef) < small; lsb = abs(salfar) >= safmin && abs(bcoefr) < small; if (lsa) { scale = small / abs(sbeta) * min(anorm,big); } if (lsb) { /* Computing MAX */ d__1 = scale, d__2 = small / abs(salfar) * min(bnorm,big); scale = max(d__1,d__2); } if (lsa || lsb) { /* Computing MIN */ /* Computing MAX */ d__3 = 1., d__4 = abs(acoef), d__3 = max(d__3,d__4), d__4 = abs(bcoefr); d__1 = scale, d__2 = 1. / (safmin * max(d__3,d__4)); scale = min(d__1,d__2); if (lsa) { acoef = ascale * (scale * sbeta); } else { acoef = scale * acoef; } if (lsb) { bcoefr = bscale * (scale * salfar); } else { bcoefr = scale * bcoefr; } } acoefa = abs(acoef); bcoefa = abs(bcoefr); /* First component is 1 */ work[(*n << 1) + je] = 1.; xmax = 1.; /* Compute contribution from column JE of A and B to sum */ /* (See "Further Details", above.) */ i__1 = je - 1; for (jr = 1; jr <= i__1; ++jr) { work[(*n << 1) + jr] = bcoefr * p[jr + je * p_dim1] - acoef * s[jr + je * s_dim1]; /* L260: */ } } else { /* Complex eigenvalue */ d__1 = safmin * 100.; _starpu_dlag2_(&s[je - 1 + (je - 1) * s_dim1], lds, &p[je - 1 + (je - 1) * p_dim1], ldp, &d__1, &acoef, &temp, &bcoefr, & temp2, &bcoefi); if (bcoefi == 0.) { *info = je - 1; return 0; } /* Scale to avoid over/underflow */ acoefa = abs(acoef); bcoefa = abs(bcoefr) + abs(bcoefi); scale = 1.; if (acoefa * ulp < safmin && acoefa >= safmin) { scale = safmin / ulp / acoefa; } if (bcoefa * ulp < safmin && bcoefa >= safmin) { /* Computing MAX */ d__1 = scale, d__2 = safmin / ulp / bcoefa; scale = max(d__1,d__2); } if (safmin * acoefa > ascale) { scale = ascale / (safmin * acoefa); } if (safmin * bcoefa > bscale) { /* Computing MIN */ d__1 = scale, d__2 = bscale / (safmin * bcoefa); scale = min(d__1,d__2); } if (scale != 1.) { acoef = scale * acoef; acoefa = abs(acoef); bcoefr = scale * bcoefr; bcoefi = scale * bcoefi; bcoefa = abs(bcoefr) + abs(bcoefi); } /* Compute first two components of eigenvector */ /* and contribution to sums */ temp = acoef * s[je + (je - 1) * s_dim1]; temp2r = acoef * s[je + je * s_dim1] - bcoefr * p[je + je * p_dim1]; temp2i = -bcoefi * p[je + je * p_dim1]; if (abs(temp) >= abs(temp2r) + abs(temp2i)) { work[(*n << 1) + je] = 1.; work[*n * 3 + je] = 0.; work[(*n << 1) + je - 1] = -temp2r / temp; work[*n * 3 + je - 1] = -temp2i / temp; } else { work[(*n << 1) + je - 1] = 1.; work[*n * 3 + je - 1] = 0.; temp = acoef * s[je - 1 + je * s_dim1]; work[(*n << 1) + je] = (bcoefr * p[je - 1 + (je - 1) * p_dim1] - acoef * s[je - 1 + (je - 1) * s_dim1]) / temp; work[*n * 3 + je] = bcoefi * p[je - 1 + (je - 1) * p_dim1] / temp; } /* Computing MAX */ d__5 = (d__1 = work[(*n << 1) + je], abs(d__1)) + (d__2 = work[*n * 3 + je], abs(d__2)), d__6 = (d__3 = work[(* n << 1) + je - 1], abs(d__3)) + (d__4 = work[*n * 3 + je - 1], abs(d__4)); xmax = max(d__5,d__6); /* Compute contribution from columns JE and JE-1 */ /* of A and B to the sums. */ creala = acoef * work[(*n << 1) + je - 1]; cimaga = acoef * work[*n * 3 + je - 1]; crealb = bcoefr * work[(*n << 1) + je - 1] - bcoefi * work[*n * 3 + je - 1]; cimagb = bcoefi * work[(*n << 1) + je - 1] + bcoefr * work[*n * 3 + je - 1]; cre2a = acoef * work[(*n << 1) + je]; cim2a = acoef * work[*n * 3 + je]; cre2b = bcoefr * work[(*n << 1) + je] - bcoefi * work[*n * 3 + je]; cim2b = bcoefi * work[(*n << 1) + je] + bcoefr * work[*n * 3 + je]; i__1 = je - 2; for (jr = 1; jr <= i__1; ++jr) { work[(*n << 1) + jr] = -creala * s[jr + (je - 1) * s_dim1] + crealb * p[jr + (je - 1) * p_dim1] - cre2a * s[ jr + je * s_dim1] + cre2b * p[jr + je * p_dim1]; work[*n * 3 + jr] = -cimaga * s[jr + (je - 1) * s_dim1] + cimagb * p[jr + (je - 1) * p_dim1] - cim2a * s[jr + je * s_dim1] + cim2b * p[jr + je * p_dim1]; /* L270: */ } } /* Computing MAX */ d__1 = ulp * acoefa * anorm, d__2 = ulp * bcoefa * bnorm, d__1 = max(d__1,d__2); dmin__ = max(d__1,safmin); /* Columnwise triangular solve of (a A - b B) x = 0 */ il2by2 = FALSE_; for (j = je - nw; j >= 1; --j) { /* If a 2-by-2 block, is in position j-1:j, wait until */ /* next iteration to process it (when it will be j:j+1) */ if (! il2by2 && j > 1) { if (s[j + (j - 1) * s_dim1] != 0.) { il2by2 = TRUE_; goto L370; } } bdiag[0] = p[j + j * p_dim1]; if (il2by2) { na = 2; bdiag[1] = p[j + 1 + (j + 1) * p_dim1]; } else { na = 1; } /* Compute x(j) (and x(j+1), if 2-by-2 block) */ _starpu_dlaln2_(&c_false, &na, &nw, &dmin__, &acoef, &s[j + j * s_dim1], lds, bdiag, &bdiag[1], &work[(*n << 1) + j], n, &bcoefr, &bcoefi, sum, &c__2, &scale, &temp, & iinfo); if (scale < 1.) { i__1 = nw - 1; for (jw = 0; jw <= i__1; ++jw) { i__2 = je; for (jr = 1; jr <= i__2; ++jr) { work[(jw + 2) * *n + jr] = scale * work[(jw + 2) * *n + jr]; /* L280: */ } /* L290: */ } } /* Computing MAX */ d__1 = scale * xmax; xmax = max(d__1,temp); i__1 = nw; for (jw = 1; jw <= i__1; ++jw) { i__2 = na; for (ja = 1; ja <= i__2; ++ja) { work[(jw + 1) * *n + j + ja - 1] = sum[ja + (jw << 1) - 3]; /* L300: */ } /* L310: */ } /* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling */ if (j > 1) { /* Check whether scaling is necessary for sum. */ xscale = 1. / max(1.,xmax); temp = acoefa * work[j] + bcoefa * work[*n + j]; if (il2by2) { /* Computing MAX */ d__1 = temp, d__2 = acoefa * work[j + 1] + bcoefa * work[*n + j + 1]; temp = max(d__1,d__2); } /* Computing MAX */ d__1 = max(temp,acoefa); temp = max(d__1,bcoefa); if (temp > bignum * xscale) { i__1 = nw - 1; for (jw = 0; jw <= i__1; ++jw) { i__2 = je; for (jr = 1; jr <= i__2; ++jr) { work[(jw + 2) * *n + jr] = xscale * work[(jw + 2) * *n + jr]; /* L320: */ } /* L330: */ } xmax *= xscale; } /* Compute the contributions of the off-diagonals of */ /* column j (and j+1, if 2-by-2 block) of A and B to the */ /* sums. */ i__1 = na; for (ja = 1; ja <= i__1; ++ja) { if (ilcplx) { creala = acoef * work[(*n << 1) + j + ja - 1]; cimaga = acoef * work[*n * 3 + j + ja - 1]; crealb = bcoefr * work[(*n << 1) + j + ja - 1] - bcoefi * work[*n * 3 + j + ja - 1]; cimagb = bcoefi * work[(*n << 1) + j + ja - 1] + bcoefr * work[*n * 3 + j + ja - 1]; i__2 = j - 1; for (jr = 1; jr <= i__2; ++jr) { work[(*n << 1) + jr] = work[(*n << 1) + jr] - creala * s[jr + (j + ja - 1) * s_dim1] + crealb * p[jr + (j + ja - 1) * p_dim1]; work[*n * 3 + jr] = work[*n * 3 + jr] - cimaga * s[jr + (j + ja - 1) * s_dim1] + cimagb * p[jr + (j + ja - 1) * p_dim1]; /* L340: */ } } else { creala = acoef * work[(*n << 1) + j + ja - 1]; crealb = bcoefr * work[(*n << 1) + j + ja - 1]; i__2 = j - 1; for (jr = 1; jr <= i__2; ++jr) { work[(*n << 1) + jr] = work[(*n << 1) + jr] - creala * s[jr + (j + ja - 1) * s_dim1] + crealb * p[jr + (j + ja - 1) * p_dim1]; /* L350: */ } } /* L360: */ } } il2by2 = FALSE_; L370: ; } /* Copy eigenvector to VR, back transforming if */ /* HOWMNY='B'. */ ieig -= nw; if (ilback) { i__1 = nw - 1; for (jw = 0; jw <= i__1; ++jw) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { work[(jw + 4) * *n + jr] = work[(jw + 2) * *n + 1] * vr[jr + vr_dim1]; /* L380: */ } /* A series of compiler directives to defeat */ /* vectorization for the next loop */ i__2 = je; for (jc = 2; jc <= i__2; ++jc) { i__3 = *n; for (jr = 1; jr <= i__3; ++jr) { work[(jw + 4) * *n + jr] += work[(jw + 2) * *n + jc] * vr[jr + jc * vr_dim1]; /* L390: */ } /* L400: */ } /* L410: */ } i__1 = nw - 1; for (jw = 0; jw <= i__1; ++jw) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr[jr + (ieig + jw) * vr_dim1] = work[(jw + 4) * *n + jr]; /* L420: */ } /* L430: */ } iend = *n; } else { i__1 = nw - 1; for (jw = 0; jw <= i__1; ++jw) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr[jr + (ieig + jw) * vr_dim1] = work[(jw + 2) * *n + jr]; /* L440: */ } /* L450: */ } iend = je; } /* Scale eigenvector */ xmax = 0.; if (ilcplx) { i__1 = iend; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ d__3 = xmax, d__4 = (d__1 = vr[j + ieig * vr_dim1], abs( d__1)) + (d__2 = vr[j + (ieig + 1) * vr_dim1], abs(d__2)); xmax = max(d__3,d__4); /* L460: */ } } else { i__1 = iend; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ d__2 = xmax, d__3 = (d__1 = vr[j + ieig * vr_dim1], abs( d__1)); xmax = max(d__2,d__3); /* L470: */ } } if (xmax > safmin) { xscale = 1. / xmax; i__1 = nw - 1; for (jw = 0; jw <= i__1; ++jw) { i__2 = iend; for (jr = 1; jr <= i__2; ++jr) { vr[jr + (ieig + jw) * vr_dim1] = xscale * vr[jr + ( ieig + jw) * vr_dim1]; /* L480: */ } /* L490: */ } } L500: ; } } return 0; /* End of DTGEVC */ } /* _starpu_dtgevc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtgex2.c000066400000000000000000000600361507764646700205760ustar00rootroot00000000000000/* dtgex2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__4 = 4; static doublereal c_b5 = 0.; static integer c__1 = 1; static integer c__2 = 2; static doublereal c_b42 = 1.; static doublereal c_b48 = -1.; static integer c__0 = 0; /* Subroutine */ int _starpu_dtgex2_(logical *wantq, logical *wantz, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * q, integer *ldq, doublereal *z__, integer *ldz, integer *j1, integer * n1, integer *n2, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ doublereal f, g; integer i__, m; doublereal s[16] /* was [4][4] */, t[16] /* was [4][4] */, be[2], ai[2] , ar[2], sa, sb, li[16] /* was [4][4] */, ir[16] /* was [4][4] */, ss, ws, eps; logical weak; doublereal ddum; integer idum; doublereal taul[4], dsum; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); doublereal taur[4], scpy[16] /* was [4][4] */, tcpy[16] /* was [4][4] */; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal scale, bqra21, brqa21; extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal licop[16] /* was [4][4] */; integer linfo; doublereal ircop[16] /* was [4][4] */, dnorm; integer iwork[4]; extern /* Subroutine */ int _starpu_dlagv2_(doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal * , doublereal *, doublereal *, doublereal *), _starpu_dgeqr2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dgerq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dorg2r_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dorgr2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dorm2r_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dormr2_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dtgsy2_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal dscale; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dlassq_(integer * , doublereal *, integer *, doublereal *, doublereal *); logical dtrong; doublereal thresh, smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) */ /* of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair */ /* (A, B) by an orthogonal equivalence transformation. */ /* (A, B) must be in generalized real Schur canonical form (as returned */ /* by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 */ /* diagonal blocks. B is upper triangular. */ /* Optionally, the matrices Q and Z of generalized Schur vectors are */ /* updated. */ /* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' */ /* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' */ /* Arguments */ /* ========= */ /* WANTQ (input) LOGICAL */ /* .TRUE. : update the left transformation matrix Q; */ /* .FALSE.: do not update Q. */ /* WANTZ (input) LOGICAL */ /* .TRUE. : update the right transformation matrix Z; */ /* .FALSE.: do not update Z. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION arrays, dimensions (LDA,N) */ /* On entry, the matrix A in the pair (A, B). */ /* On exit, the updated matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION arrays, dimensions (LDB,N) */ /* On entry, the matrix B in the pair (A, B). */ /* On exit, the updated matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* On entry, if WANTQ = .TRUE., the orthogonal matrix Q. */ /* On exit, the updated matrix Q. */ /* Not referenced if WANTQ = .FALSE.. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= 1. */ /* If WANTQ = .TRUE., LDQ >= N. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* On entry, if WANTZ =.TRUE., the orthogonal matrix Z. */ /* On exit, the updated matrix Z. */ /* Not referenced if WANTZ = .FALSE.. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1. */ /* If WANTZ = .TRUE., LDZ >= N. */ /* J1 (input) INTEGER */ /* The index to the first block (A11, B11). 1 <= J1 <= N. */ /* N1 (input) INTEGER */ /* The order of the first block (A11, B11). N1 = 0, 1 or 2. */ /* N2 (input) INTEGER */ /* The order of the second block (A22, B22). N2 = 0, 1 or 2. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)). */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* LWORK >= MAX( 1, N*(N2+N1), (N2+N1)*(N2+N1)*2 ) */ /* INFO (output) INTEGER */ /* =0: Successful exit */ /* >0: If INFO = 1, the transformed matrix (A, B) would be */ /* too far from generalized Schur form; the blocks are */ /* not swapped and (A, B) and (Q, Z) are unchanged. */ /* The problem of swapping is too ill-conditioned. */ /* <0: If INFO = -16: LWORK is too small. Appropriate value */ /* for LWORK is returned in WORK(1). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* In the current code both weak and strong stability tests are */ /* performed. The user can omit the strong stability test by changing */ /* the internal logical parameter WANDS to .FALSE.. See ref. [2] for */ /* details. */ /* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */ /* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */ /* M.S. Moonen et al (eds), Linear Algebra for Large Scale and */ /* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */ /* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */ /* Eigenvalues of a Regular Matrix Pair (A, B) and Condition */ /* Estimation: Theory, Algorithms and Software, */ /* Report UMINF - 94.04, Department of Computing Science, Umea */ /* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working */ /* Note 87. To appear in Numerical Algorithms, 1996. */ /* ===================================================================== */ /* Replaced various illegal calls to DCOPY by calls to DLASET, or by DO */ /* loops. Sven Hammarling, 1/5/02. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n <= 1 || *n1 <= 0 || *n2 <= 0) { return 0; } if (*n1 > *n || *j1 + *n1 > *n) { return 0; } m = *n1 + *n2; /* Computing MAX */ i__1 = 1, i__2 = *n * m, i__1 = max(i__1,i__2), i__2 = m * m << 1; if (*lwork < max(i__1,i__2)) { *info = -16; /* Computing MAX */ i__1 = 1, i__2 = *n * m, i__1 = max(i__1,i__2), i__2 = m * m << 1; work[1] = (doublereal) max(i__1,i__2); return 0; } weak = FALSE_; dtrong = FALSE_; /* Make a local copy of selected block */ _starpu_dlaset_("Full", &c__4, &c__4, &c_b5, &c_b5, li, &c__4); _starpu_dlaset_("Full", &c__4, &c__4, &c_b5, &c_b5, ir, &c__4); _starpu_dlacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, s, &c__4); _starpu_dlacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, t, &c__4); /* Compute threshold for testing acceptance of swapping. */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S") / eps; dscale = 0.; dsum = 1.; _starpu_dlacpy_("Full", &m, &m, s, &c__4, &work[1], &m); i__1 = m * m; _starpu_dlassq_(&i__1, &work[1], &c__1, &dscale, &dsum); _starpu_dlacpy_("Full", &m, &m, t, &c__4, &work[1], &m); i__1 = m * m; _starpu_dlassq_(&i__1, &work[1], &c__1, &dscale, &dsum); dnorm = dscale * sqrt(dsum); /* Computing MAX */ d__1 = eps * 10. * dnorm; thresh = max(d__1,smlnum); if (m == 2) { /* CASE 1: Swap 1-by-1 and 1-by-1 blocks. */ /* Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks */ /* using Givens rotations and perform the swap tentatively. */ f = s[5] * t[0] - t[5] * s[0]; g = s[5] * t[4] - t[5] * s[4]; sb = abs(t[5]); sa = abs(s[5]); _starpu_dlartg_(&f, &g, &ir[4], ir, &ddum); ir[1] = -ir[4]; ir[5] = ir[0]; _starpu_drot_(&c__2, s, &c__1, &s[4], &c__1, ir, &ir[1]); _starpu_drot_(&c__2, t, &c__1, &t[4], &c__1, ir, &ir[1]); if (sa >= sb) { _starpu_dlartg_(s, &s[1], li, &li[1], &ddum); } else { _starpu_dlartg_(t, &t[1], li, &li[1], &ddum); } _starpu_drot_(&c__2, s, &c__4, &s[1], &c__4, li, &li[1]); _starpu_drot_(&c__2, t, &c__4, &t[1], &c__4, li, &li[1]); li[5] = li[0]; li[4] = -li[1]; /* Weak stability test: */ /* |S21| + |T21| <= O(EPS * F-norm((S, T))) */ ws = abs(s[1]) + abs(t[1]); weak = ws <= thresh; if (! weak) { goto L70; } if (TRUE_) { /* Strong stability test: */ /* F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) */ _starpu_dlacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, &work[m * m + 1], &m); _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, & work[1], &m); _starpu_dgemm_("N", "T", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & c_b42, &work[m * m + 1], &m); dscale = 0.; dsum = 1.; i__1 = m * m; _starpu_dlassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); _starpu_dlacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, &work[m * m + 1], &m); _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, & work[1], &m); _starpu_dgemm_("N", "T", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & c_b42, &work[m * m + 1], &m); i__1 = m * m; _starpu_dlassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); dtrong = ss <= thresh; if (! dtrong) { goto L70; } } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and */ /* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__1 = *j1 + 1; _starpu_drot_(&i__1, &a[*j1 * a_dim1 + 1], &c__1, &a[(*j1 + 1) * a_dim1 + 1], &c__1, ir, &ir[1]); i__1 = *j1 + 1; _starpu_drot_(&i__1, &b[*j1 * b_dim1 + 1], &c__1, &b[(*j1 + 1) * b_dim1 + 1], &c__1, ir, &ir[1]); i__1 = *n - *j1 + 1; _starpu_drot_(&i__1, &a[*j1 + *j1 * a_dim1], lda, &a[*j1 + 1 + *j1 * a_dim1], lda, li, &li[1]); i__1 = *n - *j1 + 1; _starpu_drot_(&i__1, &b[*j1 + *j1 * b_dim1], ldb, &b[*j1 + 1 + *j1 * b_dim1], ldb, li, &li[1]); /* Set N1-by-N2 (2,1) - blocks to ZERO. */ a[*j1 + 1 + *j1 * a_dim1] = 0.; b[*j1 + 1 + *j1 * b_dim1] = 0.; /* Accumulate transformations into Q and Z if requested. */ if (*wantz) { _starpu_drot_(n, &z__[*j1 * z_dim1 + 1], &c__1, &z__[(*j1 + 1) * z_dim1 + 1], &c__1, ir, &ir[1]); } if (*wantq) { _starpu_drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[(*j1 + 1) * q_dim1 + 1], &c__1, li, &li[1]); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } else { /* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 */ /* and 2-by-2 blocks. */ /* Solve the generalized Sylvester equation */ /* S11 * R - L * S22 = SCALE * S12 */ /* T11 * R - L * T22 = SCALE * T12 */ /* for R and L. Solutions in LI and IR. */ _starpu_dlacpy_("Full", n1, n2, &t[(*n1 + 1 << 2) - 4], &c__4, li, &c__4); _starpu_dlacpy_("Full", n1, n2, &s[(*n1 + 1 << 2) - 4], &c__4, &ir[*n2 + 1 + ( *n1 + 1 << 2) - 5], &c__4); _starpu_dtgsy2_("N", &c__0, n1, n2, s, &c__4, &s[*n1 + 1 + (*n1 + 1 << 2) - 5] , &c__4, &ir[*n2 + 1 + (*n1 + 1 << 2) - 5], &c__4, t, &c__4, & t[*n1 + 1 + (*n1 + 1 << 2) - 5], &c__4, li, &c__4, &scale, & dsum, &dscale, iwork, &idum, &linfo); /* Compute orthogonal matrix QL: */ /* QL' * LI = [ TL ] */ /* [ 0 ] */ /* where */ /* LI = [ -L ] */ /* [ SCALE * identity(N2) ] */ i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dscal_(n1, &c_b48, &li[(i__ << 2) - 4], &c__1); li[*n1 + i__ + (i__ << 2) - 5] = scale; /* L10: */ } _starpu_dgeqr2_(&m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } _starpu_dorg2r_(&m, &m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Compute orthogonal matrix RQ: */ /* IR * RQ' = [ 0 TR], */ /* where IR = [ SCALE * identity(N1), R ] */ i__1 = *n1; for (i__ = 1; i__ <= i__1; ++i__) { ir[*n2 + i__ + (i__ << 2) - 5] = scale; /* L20: */ } _starpu_dgerq2_(n1, &m, &ir[*n2], &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } _starpu_dorgr2_(&m, &m, n1, ir, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Perform the swapping tentatively: */ _starpu_dgemm_("T", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, & work[1], &m); _starpu_dgemm_("N", "T", &m, &m, &m, &c_b42, &work[1], &m, ir, &c__4, &c_b5, s, &c__4); _starpu_dgemm_("T", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, & work[1], &m); _starpu_dgemm_("N", "T", &m, &m, &m, &c_b42, &work[1], &m, ir, &c__4, &c_b5, t, &c__4); _starpu_dlacpy_("F", &m, &m, s, &c__4, scpy, &c__4); _starpu_dlacpy_("F", &m, &m, t, &c__4, tcpy, &c__4); _starpu_dlacpy_("F", &m, &m, ir, &c__4, ircop, &c__4); _starpu_dlacpy_("F", &m, &m, li, &c__4, licop, &c__4); /* Triangularize the B-part by an RQ factorization. */ /* Apply transformation (from left) to A-part, giving S. */ _starpu_dgerq2_(&m, &m, t, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } _starpu_dormr2_("R", "T", &m, &m, &m, t, &c__4, taur, s, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } _starpu_dormr2_("L", "N", &m, &m, &m, t, &c__4, taur, ir, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BRQA21. (T21 is 0.) */ dscale = 0.; dsum = 1.; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dlassq_(n1, &s[*n2 + 1 + (i__ << 2) - 5], &c__1, &dscale, &dsum); /* L30: */ } brqa21 = dscale * sqrt(dsum); /* Triangularize the B-part by a QR factorization. */ /* Apply transformation (from right) to A-part, giving S. */ _starpu_dgeqr2_(&m, &m, tcpy, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } _starpu_dorm2r_("L", "T", &m, &m, &m, tcpy, &c__4, taul, scpy, &c__4, &work[1] , info); _starpu_dorm2r_("R", "N", &m, &m, &m, tcpy, &c__4, taul, licop, &c__4, &work[ 1], info); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BQRA21. (T21 is 0.) */ dscale = 0.; dsum = 1.; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dlassq_(n1, &scpy[*n2 + 1 + (i__ << 2) - 5], &c__1, &dscale, & dsum); /* L40: */ } bqra21 = dscale * sqrt(dsum); /* Decide which method to use. */ /* Weak stability test: */ /* F-norm(S21) <= O(EPS * F-norm((S, T))) */ if (bqra21 <= brqa21 && bqra21 <= thresh) { _starpu_dlacpy_("F", &m, &m, scpy, &c__4, s, &c__4); _starpu_dlacpy_("F", &m, &m, tcpy, &c__4, t, &c__4); _starpu_dlacpy_("F", &m, &m, ircop, &c__4, ir, &c__4); _starpu_dlacpy_("F", &m, &m, licop, &c__4, li, &c__4); } else if (brqa21 >= thresh) { goto L70; } /* Set lower triangle of B-part to zero */ i__1 = m - 1; i__2 = m - 1; _starpu_dlaset_("Lower", &i__1, &i__2, &c_b5, &c_b5, &t[1], &c__4); if (TRUE_) { /* Strong stability test: */ /* F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) */ _starpu_dlacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, &work[m * m + 1], &m); _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, & work[1], &m); _starpu_dgemm_("N", "N", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & c_b42, &work[m * m + 1], &m); dscale = 0.; dsum = 1.; i__1 = m * m; _starpu_dlassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); _starpu_dlacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, &work[m * m + 1], &m); _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, & work[1], &m); _starpu_dgemm_("N", "N", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & c_b42, &work[m * m + 1], &m); i__1 = m * m; _starpu_dlassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); dtrong = ss <= thresh; if (! dtrong) { goto L70; } } /* If the swap is accepted ("weakly" and "strongly"), apply the */ /* transformations and set N1-by-N2 (2,1)-block to zero. */ _starpu_dlaset_("Full", n1, n2, &c_b5, &c_b5, &s[*n2], &c__4); /* copy back M-by-M diagonal block starting at index J1 of (A, B) */ _starpu_dlacpy_("F", &m, &m, s, &c__4, &a[*j1 + *j1 * a_dim1], lda) ; _starpu_dlacpy_("F", &m, &m, t, &c__4, &b[*j1 + *j1 * b_dim1], ldb) ; _starpu_dlaset_("Full", &c__4, &c__4, &c_b5, &c_b5, t, &c__4); /* Standardize existing 2-by-2 blocks. */ i__1 = m * m; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L50: */ } work[1] = 1.; t[0] = 1.; idum = *lwork - m * m - 2; if (*n2 > 1) { _starpu_dlagv2_(&a[*j1 + *j1 * a_dim1], lda, &b[*j1 + *j1 * b_dim1], ldb, ar, ai, be, &work[1], &work[2], t, &t[1]); work[m + 1] = -work[2]; work[m + 2] = work[1]; t[*n2 + (*n2 << 2) - 5] = t[0]; t[4] = -t[1]; } work[m * m] = 1.; t[m + (m << 2) - 5] = 1.; if (*n1 > 1) { _starpu_dlagv2_(&a[*j1 + *n2 + (*j1 + *n2) * a_dim1], lda, &b[*j1 + *n2 + (*j1 + *n2) * b_dim1], ldb, taur, taul, &work[m * m + 1], &work[*n2 * m + *n2 + 1], &work[*n2 * m + *n2 + 2], &t[* n2 + 1 + (*n2 + 1 << 2) - 5], &t[m + (m - 1 << 2) - 5]); work[m * m] = work[*n2 * m + *n2 + 1]; work[m * m - 1] = -work[*n2 * m + *n2 + 2]; t[m + (m << 2) - 5] = t[*n2 + 1 + (*n2 + 1 << 2) - 5]; t[m - 1 + (m << 2) - 5] = -t[m + (m - 1 << 2) - 5]; } _starpu_dgemm_("T", "N", n2, n1, n2, &c_b42, &work[1], &m, &a[*j1 + (*j1 + * n2) * a_dim1], lda, &c_b5, &work[m * m + 1], n2); _starpu_dlacpy_("Full", n2, n1, &work[m * m + 1], n2, &a[*j1 + (*j1 + *n2) * a_dim1], lda); _starpu_dgemm_("T", "N", n2, n1, n2, &c_b42, &work[1], &m, &b[*j1 + (*j1 + * n2) * b_dim1], ldb, &c_b5, &work[m * m + 1], n2); _starpu_dlacpy_("Full", n2, n1, &work[m * m + 1], n2, &b[*j1 + (*j1 + *n2) * b_dim1], ldb); _starpu_dgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, &work[1], &m, &c_b5, & work[m * m + 1], &m); _starpu_dlacpy_("Full", &m, &m, &work[m * m + 1], &m, li, &c__4); _starpu_dgemm_("N", "N", n2, n1, n1, &c_b42, &a[*j1 + (*j1 + *n2) * a_dim1], lda, &t[*n2 + 1 + (*n2 + 1 << 2) - 5], &c__4, &c_b5, &work[1], n2); _starpu_dlacpy_("Full", n2, n1, &work[1], n2, &a[*j1 + (*j1 + *n2) * a_dim1], lda); _starpu_dgemm_("N", "N", n2, n1, n1, &c_b42, &b[*j1 + (*j1 + *n2) * b_dim1], ldb, &t[*n2 + 1 + (*n2 + 1 << 2) - 5], &c__4, &c_b5, &work[1], n2); _starpu_dlacpy_("Full", n2, n1, &work[1], n2, &b[*j1 + (*j1 + *n2) * b_dim1], ldb); _starpu_dgemm_("T", "N", &m, &m, &m, &c_b42, ir, &c__4, t, &c__4, &c_b5, & work[1], &m); _starpu_dlacpy_("Full", &m, &m, &work[1], &m, ir, &c__4); /* Accumulate transformations into Q and Z if requested. */ if (*wantq) { _starpu_dgemm_("N", "N", n, &m, &m, &c_b42, &q[*j1 * q_dim1 + 1], ldq, li, &c__4, &c_b5, &work[1], n); _starpu_dlacpy_("Full", n, &m, &work[1], n, &q[*j1 * q_dim1 + 1], ldq); } if (*wantz) { _starpu_dgemm_("N", "N", n, &m, &m, &c_b42, &z__[*j1 * z_dim1 + 1], ldz, ir, &c__4, &c_b5, &work[1], n); _starpu_dlacpy_("Full", n, &m, &work[1], n, &z__[*j1 * z_dim1 + 1], ldz); } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and */ /* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__ = *j1 + m; if (i__ <= *n) { i__1 = *n - i__ + 1; _starpu_dgemm_("T", "N", &m, &i__1, &m, &c_b42, li, &c__4, &a[*j1 + i__ * a_dim1], lda, &c_b5, &work[1], &m); i__1 = *n - i__ + 1; _starpu_dlacpy_("Full", &m, &i__1, &work[1], &m, &a[*j1 + i__ * a_dim1], lda); i__1 = *n - i__ + 1; _starpu_dgemm_("T", "N", &m, &i__1, &m, &c_b42, li, &c__4, &b[*j1 + i__ * b_dim1], lda, &c_b5, &work[1], &m); i__1 = *n - i__ + 1; _starpu_dlacpy_("Full", &m, &i__1, &work[1], &m, &b[*j1 + i__ * b_dim1], ldb); } i__ = *j1 - 1; if (i__ > 0) { _starpu_dgemm_("N", "N", &i__, &m, &m, &c_b42, &a[*j1 * a_dim1 + 1], lda, ir, &c__4, &c_b5, &work[1], &i__); _starpu_dlacpy_("Full", &i__, &m, &work[1], &i__, &a[*j1 * a_dim1 + 1], lda); _starpu_dgemm_("N", "N", &i__, &m, &m, &c_b42, &b[*j1 * b_dim1 + 1], ldb, ir, &c__4, &c_b5, &work[1], &i__); _starpu_dlacpy_("Full", &i__, &m, &work[1], &i__, &b[*j1 * b_dim1 + 1], ldb); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } /* Exit with INFO = 1 if swap was rejected. */ L70: *info = 1; return 0; /* End of DTGEX2 */ } /* _starpu_dtgex2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtgexc.c000066400000000000000000000336131507764646700206600ustar00rootroot00000000000000/* dtgexc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__2 = 2; /* Subroutine */ int _starpu_dtgexc_(logical *wantq, logical *wantz, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * q, integer *ldq, doublereal *z__, integer *ldz, integer *ifst, integer *ilst, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1; /* Local variables */ integer nbf, nbl, here, lwmin; extern /* Subroutine */ int _starpu_dtgex2_(logical *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); integer nbnext; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTGEXC reorders the generalized real Schur decomposition of a real */ /* matrix pair (A,B) using an orthogonal equivalence transformation */ /* (A, B) = Q * (A, B) * Z', */ /* so that the diagonal block of (A, B) with row index IFST is moved */ /* to row ILST. */ /* (A, B) must be in generalized real Schur canonical form (as returned */ /* by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 */ /* diagonal blocks. B is upper triangular. */ /* Optionally, the matrices Q and Z of generalized Schur vectors are */ /* updated. */ /* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' */ /* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' */ /* Arguments */ /* ========= */ /* WANTQ (input) LOGICAL */ /* .TRUE. : update the left transformation matrix Q; */ /* .FALSE.: do not update Q. */ /* WANTZ (input) LOGICAL */ /* .TRUE. : update the right transformation matrix Z; */ /* .FALSE.: do not update Z. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the matrix A in generalized real Schur canonical */ /* form. */ /* On exit, the updated matrix A, again in generalized */ /* real Schur canonical form. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ /* On entry, the matrix B in generalized real Schur canonical */ /* form (A,B). */ /* On exit, the updated matrix B, again in generalized */ /* real Schur canonical form (A,B). */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* On entry, if WANTQ = .TRUE., the orthogonal matrix Q. */ /* On exit, the updated matrix Q. */ /* If WANTQ = .FALSE., Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= 1. */ /* If WANTQ = .TRUE., LDQ >= N. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* On entry, if WANTZ = .TRUE., the orthogonal matrix Z. */ /* On exit, the updated matrix Z. */ /* If WANTZ = .FALSE., Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1. */ /* If WANTZ = .TRUE., LDZ >= N. */ /* IFST (input/output) INTEGER */ /* ILST (input/output) INTEGER */ /* Specify the reordering of the diagonal blocks of (A, B). */ /* The block with row index IFST is moved to row ILST, by a */ /* sequence of swapping between adjacent blocks. */ /* On exit, if IFST pointed on entry to the second row of */ /* a 2-by-2 block, it is changed to point to the first row; */ /* ILST always points to the first row of the block in its */ /* final position (which may differ from its input value by */ /* +1 or -1). 1 <= IFST, ILST <= N. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* LWORK >= 1 when N <= 1, otherwise LWORK >= 4*N + 16. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* =0: successful exit. */ /* <0: if INFO = -i, the i-th argument had an illegal value. */ /* =1: The transformed matrix pair (A, B) would be too far */ /* from generalized Schur form; the problem is ill- */ /* conditioned. (A, B) may have been partially reordered, */ /* and ILST points to the first row of the current */ /* position of the block being moved. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */ /* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */ /* M.S. Moonen et al (eds), Linear Algebra for Large Scale and */ /* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test input arguments. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldq < 1 || *wantq && *ldq < max(1,*n)) { *info = -9; } else if (*ldz < 1 || *wantz && *ldz < max(1,*n)) { *info = -11; } else if (*ifst < 1 || *ifst > *n) { *info = -12; } else if (*ilst < 1 || *ilst > *n) { *info = -13; } if (*info == 0) { if (*n <= 1) { lwmin = 1; } else { lwmin = (*n << 2) + 16; } work[1] = (doublereal) lwmin; if (*lwork < lwmin && ! lquery) { *info = -15; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTGEXC", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n <= 1) { return 0; } /* Determine the first row of the specified block and find out */ /* if it is 1-by-1 or 2-by-2. */ if (*ifst > 1) { if (a[*ifst + (*ifst - 1) * a_dim1] != 0.) { --(*ifst); } } nbf = 1; if (*ifst < *n) { if (a[*ifst + 1 + *ifst * a_dim1] != 0.) { nbf = 2; } } /* Determine the first row of the final block */ /* and find out if it is 1-by-1 or 2-by-2. */ if (*ilst > 1) { if (a[*ilst + (*ilst - 1) * a_dim1] != 0.) { --(*ilst); } } nbl = 1; if (*ilst < *n) { if (a[*ilst + 1 + *ilst * a_dim1] != 0.) { nbl = 2; } } if (*ifst == *ilst) { return 0; } if (*ifst < *ilst) { /* Update ILST. */ if (nbf == 2 && nbl == 1) { --(*ilst); } if (nbf == 1 && nbl == 2) { ++(*ilst); } here = *ifst; L10: /* Swap with next one below. */ if (nbf == 1 || nbf == 2) { /* Current block either 1-by-1 or 2-by-2. */ nbnext = 1; if (here + nbf + 1 <= *n) { if (a[here + nbf + 1 + (here + nbf) * a_dim1] != 0.) { nbnext = 2; } } _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[ q_offset], ldq, &z__[z_offset], ldz, &here, &nbf, &nbnext, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } here += nbnext; /* Test if 2-by-2 block breaks into two 1-by-1 blocks. */ if (nbf == 2) { if (a[here + 1 + here * a_dim1] == 0.) { nbf = 3; } } } else { /* Current block consists of two 1-by-1 blocks, each of which */ /* must be swapped individually. */ nbnext = 1; if (here + 3 <= *n) { if (a[here + 3 + (here + 2) * a_dim1] != 0.) { nbnext = 2; } } i__1 = here + 1; _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[ q_offset], ldq, &z__[z_offset], ldz, &i__1, &c__1, & nbnext, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } if (nbnext == 1) { /* Swap two 1-by-1 blocks. */ _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &here, &c__1, &c__1, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } ++here; } else { /* Recompute NBNEXT in case of 2-by-2 split. */ if (a[here + 2 + (here + 1) * a_dim1] == 0.) { nbnext = 1; } if (nbnext == 2) { /* 2-by-2 block did not split. */ _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & here, &c__1, &nbnext, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } here += 2; } else { /* 2-by-2 block did split. */ _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & here, &c__1, &c__1, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } ++here; _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & here, &c__1, &c__1, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } ++here; } } } if (here < *ilst) { goto L10; } } else { here = *ifst; L20: /* Swap with next one below. */ if (nbf == 1 || nbf == 2) { /* Current block either 1-by-1 or 2-by-2. */ nbnext = 1; if (here >= 3) { if (a[here - 1 + (here - 2) * a_dim1] != 0.) { nbnext = 2; } } i__1 = here - nbnext; _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[ q_offset], ldq, &z__[z_offset], ldz, &i__1, &nbnext, &nbf, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } here -= nbnext; /* Test if 2-by-2 block breaks into two 1-by-1 blocks. */ if (nbf == 2) { if (a[here + 1 + here * a_dim1] == 0.) { nbf = 3; } } } else { /* Current block consists of two 1-by-1 blocks, each of which */ /* must be swapped individually. */ nbnext = 1; if (here >= 3) { if (a[here - 1 + (here - 2) * a_dim1] != 0.) { nbnext = 2; } } i__1 = here - nbnext; _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[ q_offset], ldq, &z__[z_offset], ldz, &i__1, &nbnext, & c__1, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } if (nbnext == 1) { /* Swap two 1-by-1 blocks. */ _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &here, & nbnext, &c__1, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } --here; } else { /* Recompute NBNEXT in case of 2-by-2 split. */ if (a[here + (here - 1) * a_dim1] == 0.) { nbnext = 1; } if (nbnext == 2) { /* 2-by-2 block did not split. */ i__1 = here - 1; _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & i__1, &c__2, &c__1, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } here += -2; } else { /* 2-by-2 block did split. */ _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & here, &c__1, &c__1, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } --here; _starpu_dtgex2_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, & here, &c__1, &c__1, &work[1], lwork, info); if (*info != 0) { *ilst = here; return 0; } --here; } } } if (here > *ilst) { goto L20; } } *ilst = here; work[1] = (doublereal) lwmin; return 0; /* End of DTGEXC */ } /* _starpu_dtgexc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtgsen.c000066400000000000000000000676421507764646700206770ustar00rootroot00000000000000/* dtgsen.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__2 = 2; static doublereal c_b28 = 1.; /* Subroutine */ int _starpu_dtgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, doublereal *a, integer *lda, doublereal * b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *q, integer *ldq, doublereal *z__, integer *ldz, integer *m, doublereal *pl, doublereal *pr, doublereal *dif, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ integer i__, k, n1, n2, kk, ks, mn2, ijb; doublereal eps; integer kase; logical pair; integer ierr; doublereal dsum; logical swap; extern /* Subroutine */ int _starpu_dlag2_(doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); integer isave[3]; logical wantd; integer lwmin; logical wantp; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); logical wantd1, wantd2; extern doublereal _starpu_dlamch_(char *); doublereal dscale, rdscal; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dtgexc_(logical *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *), _starpu_dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); integer liwmin; extern /* Subroutine */ int _starpu_dtgsyl_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *); doublereal smlnum; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* January 2007 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTGSEN reorders the generalized real Schur decomposition of a real */ /* matrix pair (A, B) (in terms of an orthonormal equivalence trans- */ /* formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues */ /* appears in the leading diagonal blocks of the upper quasi-triangular */ /* matrix A and the upper triangular B. The leading columns of Q and */ /* Z form orthonormal bases of the corresponding left and right eigen- */ /* spaces (deflating subspaces). (A, B) must be in generalized real */ /* Schur canonical form (as returned by DGGES), i.e. A is block upper */ /* triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper */ /* triangular. */ /* DTGSEN also computes the generalized eigenvalues */ /* w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) */ /* of the reordered matrix pair (A, B). */ /* Optionally, DTGSEN computes the estimates of reciprocal condition */ /* numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), */ /* (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) */ /* between the matrix pairs (A11, B11) and (A22,B22) that correspond to */ /* the selected cluster and the eigenvalues outside the cluster, resp., */ /* and norms of "projections" onto left and right eigenspaces w.r.t. */ /* the selected cluster in the (1,1)-block. */ /* Arguments */ /* ========= */ /* IJOB (input) INTEGER */ /* Specifies whether condition numbers are required for the */ /* cluster of eigenvalues (PL and PR) or the deflating subspaces */ /* (Difu and Difl): */ /* =0: Only reorder w.r.t. SELECT. No extras. */ /* =1: Reciprocal of norms of "projections" onto left and right */ /* eigenspaces w.r.t. the selected cluster (PL and PR). */ /* =2: Upper bounds on Difu and Difl. F-norm-based estimate */ /* (DIF(1:2)). */ /* =3: Estimate of Difu and Difl. 1-norm-based estimate */ /* (DIF(1:2)). */ /* About 5 times as expensive as IJOB = 2. */ /* =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic */ /* version to get it all. */ /* =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) */ /* WANTQ (input) LOGICAL */ /* .TRUE. : update the left transformation matrix Q; */ /* .FALSE.: do not update Q. */ /* WANTZ (input) LOGICAL */ /* .TRUE. : update the right transformation matrix Z; */ /* .FALSE.: do not update Z. */ /* SELECT (input) LOGICAL array, dimension (N) */ /* SELECT specifies the eigenvalues in the selected cluster. */ /* To select a real eigenvalue w(j), SELECT(j) must be set to */ /* .TRUE.. To select a complex conjugate pair of eigenvalues */ /* w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, */ /* either SELECT(j) or SELECT(j+1) or both must be set to */ /* .TRUE.; a complex conjugate pair of eigenvalues must be */ /* either both included in the cluster or both excluded. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension(LDA,N) */ /* On entry, the upper quasi-triangular matrix A, with (A, B) in */ /* generalized real Schur canonical form. */ /* On exit, A is overwritten by the reordered matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension(LDB,N) */ /* On entry, the upper triangular matrix B, with (A, B) in */ /* generalized real Schur canonical form. */ /* On exit, B is overwritten by the reordered matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* ALPHAR (output) DOUBLE PRECISION array, dimension (N) */ /* ALPHAI (output) DOUBLE PRECISION array, dimension (N) */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ /* be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i */ /* and BETA(j),j=1,...,N are the diagonals of the complex Schur */ /* form (S,T) that would result if the 2-by-2 diagonal blocks of */ /* the real generalized Schur form of (A,B) were further reduced */ /* to triangular form using complex unitary transformations. */ /* If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ /* positive, then the j-th and (j+1)-st eigenvalues are a */ /* complex conjugate pair, with ALPHAI(j+1) negative. */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. */ /* On exit, Q has been postmultiplied by the left orthogonal */ /* transformation matrix which reorder (A, B); The leading M */ /* columns of Q form orthonormal bases for the specified pair of */ /* left eigenspaces (deflating subspaces). */ /* If WANTQ = .FALSE., Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= 1; */ /* and if WANTQ = .TRUE., LDQ >= N. */ /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */ /* On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. */ /* On exit, Z has been postmultiplied by the left orthogonal */ /* transformation matrix which reorder (A, B); The leading M */ /* columns of Z form orthonormal bases for the specified pair of */ /* left eigenspaces (deflating subspaces). */ /* If WANTZ = .FALSE., Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1; */ /* If WANTZ = .TRUE., LDZ >= N. */ /* M (output) INTEGER */ /* The dimension of the specified pair of left and right eigen- */ /* spaces (deflating subspaces). 0 <= M <= N. */ /* PL (output) DOUBLE PRECISION */ /* PR (output) DOUBLE PRECISION */ /* If IJOB = 1, 4 or 5, PL, PR are lower bounds on the */ /* reciprocal of the norm of "projections" onto left and right */ /* eigenspaces with respect to the selected cluster. */ /* 0 < PL, PR <= 1. */ /* If M = 0 or M = N, PL = PR = 1. */ /* If IJOB = 0, 2 or 3, PL and PR are not referenced. */ /* DIF (output) DOUBLE PRECISION array, dimension (2). */ /* If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. */ /* If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on */ /* Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based */ /* estimates of Difu and Difl. */ /* If M = 0 or N, DIF(1:2) = F-norm([A, B]). */ /* If IJOB = 0 or 1, DIF is not referenced. */ /* WORK (workspace/output) DOUBLE PRECISION array, */ /* dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= 4*N+16. */ /* If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)). */ /* If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* IF IJOB = 0, IWORK is not referenced. Otherwise, */ /* on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. LIWORK >= 1. */ /* If IJOB = 1, 2 or 4, LIWORK >= N+6. */ /* If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6). */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal size of the IWORK array, */ /* returns this value as the first entry of the IWORK array, and */ /* no error message related to LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* =0: Successful exit. */ /* <0: If INFO = -i, the i-th argument had an illegal value. */ /* =1: Reordering of (A, B) failed because the transformed */ /* matrix pair (A, B) would be too far from generalized */ /* Schur form; the problem is very ill-conditioned. */ /* (A, B) may have been partially reordered. */ /* If requested, 0 is returned in DIF(*), PL and PR. */ /* Further Details */ /* =============== */ /* DTGSEN first collects the selected eigenvalues by computing */ /* orthogonal U and W that move them to the top left corner of (A, B). */ /* In other words, the selected eigenvalues are the eigenvalues of */ /* (A11, B11) in: */ /* U'*(A, B)*W = (A11 A12) (B11 B12) n1 */ /* ( 0 A22),( 0 B22) n2 */ /* n1 n2 n1 n2 */ /* where N = n1+n2 and U' means the transpose of U. The first n1 columns */ /* of U and W span the specified pair of left and right eigenspaces */ /* (deflating subspaces) of (A, B). */ /* If (A, B) has been obtained from the generalized real Schur */ /* decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the */ /* reordered generalized real Schur form of (C, D) is given by */ /* (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)', */ /* and the first n1 columns of Q*U and Z*W span the corresponding */ /* deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). */ /* Note that if the selected eigenvalue is sufficiently ill-conditioned, */ /* then its value may differ significantly from its value before */ /* reordering. */ /* The reciprocal condition numbers of the left and right eigenspaces */ /* spanned by the first n1 columns of U and W (or Q*U and Z*W) may */ /* be returned in DIF(1:2), corresponding to Difu and Difl, resp. */ /* The Difu and Difl are defined as: */ /* Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) */ /* and */ /* Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], */ /* where sigma-min(Zu) is the smallest singular value of the */ /* (2*n1*n2)-by-(2*n1*n2) matrix */ /* Zu = [ kron(In2, A11) -kron(A22', In1) ] */ /* [ kron(In2, B11) -kron(B22', In1) ]. */ /* Here, Inx is the identity matrix of size nx and A22' is the */ /* transpose of A22. kron(X, Y) is the Kronecker product between */ /* the matrices X and Y. */ /* When DIF(2) is small, small changes in (A, B) can cause large changes */ /* in the deflating subspace. An approximate (asymptotic) bound on the */ /* maximum angular error in the computed deflating subspaces is */ /* EPS * norm((A, B)) / DIF(2), */ /* where EPS is the machine precision. */ /* The reciprocal norm of the projectors on the left and right */ /* eigenspaces associated with (A11, B11) may be returned in PL and PR. */ /* They are computed as follows. First we compute L and R so that */ /* P*(A, B)*Q is block diagonal, where */ /* P = ( I -L ) n1 Q = ( I R ) n1 */ /* ( 0 I ) n2 and ( 0 I ) n2 */ /* n1 n2 n1 n2 */ /* and (L, R) is the solution to the generalized Sylvester equation */ /* A11*R - L*A22 = -A12 */ /* B11*R - L*B22 = -B12 */ /* Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). */ /* An approximate (asymptotic) bound on the average absolute error of */ /* the selected eigenvalues is */ /* EPS * norm((A, B)) / PL. */ /* There are also global error bounds which valid for perturbations up */ /* to a certain restriction: A lower bound (x) on the smallest */ /* F-norm(E,F) for which an eigenvalue of (A11, B11) may move and */ /* coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), */ /* (i.e. (A + E, B + F), is */ /* x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). */ /* An approximate bound on x can be computed from DIF(1:2), PL and PR. */ /* If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed */ /* (L', R') and unperturbed (L, R) left and right deflating subspaces */ /* associated with the selected cluster in the (1,1)-blocks can be */ /* bounded as */ /* max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) */ /* max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) */ /* See LAPACK User's Guide section 4.11 or the following references */ /* for more information. */ /* Note that if the default method for computing the Frobenius-norm- */ /* based estimate DIF is not wanted (see DLATDF), then the parameter */ /* IDIFJB (see below) should be changed from 3 to 4 (routine DLATDF */ /* (IJOB = 2 will be used)). See DTGSYL for more details. */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* References */ /* ========== */ /* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */ /* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */ /* M.S. Moonen et al (eds), Linear Algebra for Large Scale and */ /* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */ /* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */ /* Eigenvalues of a Regular Matrix Pair (A, B) and Condition */ /* Estimation: Theory, Algorithms and Software, */ /* Report UMINF - 94.04, Department of Computing Science, Umea */ /* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working */ /* Note 87. To appear in Numerical Algorithms, 1996. */ /* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software */ /* for Solving the Generalized Sylvester Equation and Estimating the */ /* Separation between Regular Matrix Pairs, Report UMINF - 93.23, */ /* Department of Computing Science, Umea University, S-901 87 Umea, */ /* Sweden, December 1993, Revised April 1994, Also as LAPACK Working */ /* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, */ /* 1996. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test the input parameters */ /* Parameter adjustments */ --select; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alphar; --alphai; --beta; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --dif; --work; --iwork; /* Function Body */ *info = 0; lquery = *lwork == -1 || *liwork == -1; if (*ijob < 0 || *ijob > 5) { *info = -1; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldq < 1 || *wantq && *ldq < *n) { *info = -14; } else if (*ldz < 1 || *wantz && *ldz < *n) { *info = -16; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTGSEN", &i__1); return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S") / eps; ierr = 0; wantp = *ijob == 1 || *ijob >= 4; wantd1 = *ijob == 2 || *ijob == 4; wantd2 = *ijob == 3 || *ijob == 5; wantd = wantd1 || wantd2; /* Set M to the dimension of the specified pair of deflating */ /* subspaces. */ *m = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { if (k < *n) { if (a[k + 1 + k * a_dim1] == 0.) { if (select[k]) { ++(*m); } } else { pair = TRUE_; if (select[k] || select[k + 1]) { *m += 2; } } } else { if (select[*n]) { ++(*m); } } } /* L10: */ } if (*ijob == 1 || *ijob == 2 || *ijob == 4) { /* Computing MAX */ i__1 = 1, i__2 = (*n << 2) + 16, i__1 = max(i__1,i__2), i__2 = (*m << 1) * (*n - *m); lwmin = max(i__1,i__2); /* Computing MAX */ i__1 = 1, i__2 = *n + 6; liwmin = max(i__1,i__2); } else if (*ijob == 3 || *ijob == 5) { /* Computing MAX */ i__1 = 1, i__2 = (*n << 2) + 16, i__1 = max(i__1,i__2), i__2 = (*m << 2) * (*n - *m); lwmin = max(i__1,i__2); /* Computing MAX */ i__1 = 1, i__2 = (*m << 1) * (*n - *m), i__1 = max(i__1,i__2), i__2 = *n + 6; liwmin = max(i__1,i__2); } else { /* Computing MAX */ i__1 = 1, i__2 = (*n << 2) + 16; lwmin = max(i__1,i__2); liwmin = 1; } work[1] = (doublereal) lwmin; iwork[1] = liwmin; if (*lwork < lwmin && ! lquery) { *info = -22; } else if (*liwork < liwmin && ! lquery) { *info = -24; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTGSEN", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible. */ if (*m == *n || *m == 0) { if (wantp) { *pl = 1.; *pr = 1.; } if (wantd) { dscale = 0.; dsum = 1.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { _starpu_dlassq_(n, &a[i__ * a_dim1 + 1], &c__1, &dscale, &dsum); _starpu_dlassq_(n, &b[i__ * b_dim1 + 1], &c__1, &dscale, &dsum); /* L20: */ } dif[1] = dscale * sqrt(dsum); dif[2] = dif[1]; } goto L60; } /* Collect the selected blocks at the top-left corner of (A, B). */ ks = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { swap = select[k]; if (k < *n) { if (a[k + 1 + k * a_dim1] != 0.) { pair = TRUE_; swap = swap || select[k + 1]; } } if (swap) { ++ks; /* Swap the K-th block to position KS. */ /* Perform the reordering of diagonal blocks in (A, B) */ /* by orthogonal transformation matrices and update */ /* Q and Z accordingly (if requested): */ kk = k; if (k != ks) { _starpu_dtgexc_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &kk, &ks, &work[1], lwork, &ierr); } if (ierr > 0) { /* Swap is rejected: exit. */ *info = 1; if (wantp) { *pl = 0.; *pr = 0.; } if (wantd) { dif[1] = 0.; dif[2] = 0.; } goto L60; } if (pair) { ++ks; } } } /* L30: */ } if (wantp) { /* Solve generalized Sylvester equation for R and L */ /* and compute PL and PR. */ n1 = *m; n2 = *n - *m; i__ = n1 + 1; ijb = 0; _starpu_dlacpy_("Full", &n1, &n2, &a[i__ * a_dim1 + 1], lda, &work[1], &n1); _starpu_dlacpy_("Full", &n1, &n2, &b[i__ * b_dim1 + 1], ldb, &work[n1 * n2 + 1], &n1); i__1 = *lwork - (n1 << 1) * n2; _starpu_dtgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a[i__ + i__ * a_dim1] , lda, &work[1], &n1, &b[b_offset], ldb, &b[i__ + i__ * b_dim1], ldb, &work[n1 * n2 + 1], &n1, &dscale, &dif[1], & work[(n1 * n2 << 1) + 1], &i__1, &iwork[1], &ierr); /* Estimate the reciprocal of norms of "projections" onto left */ /* and right eigenspaces. */ rdscal = 0.; dsum = 1.; i__1 = n1 * n2; _starpu_dlassq_(&i__1, &work[1], &c__1, &rdscal, &dsum); *pl = rdscal * sqrt(dsum); if (*pl == 0.) { *pl = 1.; } else { *pl = dscale / (sqrt(dscale * dscale / *pl + *pl) * sqrt(*pl)); } rdscal = 0.; dsum = 1.; i__1 = n1 * n2; _starpu_dlassq_(&i__1, &work[n1 * n2 + 1], &c__1, &rdscal, &dsum); *pr = rdscal * sqrt(dsum); if (*pr == 0.) { *pr = 1.; } else { *pr = dscale / (sqrt(dscale * dscale / *pr + *pr) * sqrt(*pr)); } } if (wantd) { /* Compute estimates of Difu and Difl. */ if (wantd1) { n1 = *m; n2 = *n - *m; i__ = n1 + 1; ijb = 3; /* Frobenius norm-based Difu-estimate. */ i__1 = *lwork - (n1 << 1) * n2; _starpu_dtgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a[i__ + i__ * a_dim1], lda, &work[1], &n1, &b[b_offset], ldb, &b[i__ + i__ * b_dim1], ldb, &work[n1 * n2 + 1], &n1, &dscale, & dif[1], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], & ierr); /* Frobenius norm-based Difl-estimate. */ i__1 = *lwork - (n1 << 1) * n2; _starpu_dtgsyl_("N", &ijb, &n2, &n1, &a[i__ + i__ * a_dim1], lda, &a[ a_offset], lda, &work[1], &n2, &b[i__ + i__ * b_dim1], ldb, &b[b_offset], ldb, &work[n1 * n2 + 1], &n2, &dscale, &dif[2], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], & ierr); } else { /* Compute 1-norm-based estimates of Difu and Difl using */ /* reversed communication with DLACN2. In each step a */ /* generalized Sylvester equation or a transposed variant */ /* is solved. */ kase = 0; n1 = *m; n2 = *n - *m; i__ = n1 + 1; ijb = 0; mn2 = (n1 << 1) * n2; /* 1-norm-based estimate of Difu. */ L40: _starpu_dlacn2_(&mn2, &work[mn2 + 1], &work[1], &iwork[1], &dif[1], &kase, isave); if (kase != 0) { if (kase == 1) { /* Solve generalized Sylvester equation. */ i__1 = *lwork - (n1 << 1) * n2; _starpu_dtgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a[i__ + i__ * a_dim1], lda, &work[1], &n1, &b[b_offset], ldb, &b[i__ + i__ * b_dim1], ldb, &work[n1 * n2 + 1], &n1, &dscale, &dif[1], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); } else { /* Solve the transposed variant. */ i__1 = *lwork - (n1 << 1) * n2; _starpu_dtgsyl_("T", &ijb, &n1, &n2, &a[a_offset], lda, &a[i__ + i__ * a_dim1], lda, &work[1], &n1, &b[b_offset], ldb, &b[i__ + i__ * b_dim1], ldb, &work[n1 * n2 + 1], &n1, &dscale, &dif[1], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); } goto L40; } dif[1] = dscale / dif[1]; /* 1-norm-based estimate of Difl. */ L50: _starpu_dlacn2_(&mn2, &work[mn2 + 1], &work[1], &iwork[1], &dif[2], &kase, isave); if (kase != 0) { if (kase == 1) { /* Solve generalized Sylvester equation. */ i__1 = *lwork - (n1 << 1) * n2; _starpu_dtgsyl_("N", &ijb, &n2, &n1, &a[i__ + i__ * a_dim1], lda, &a[a_offset], lda, &work[1], &n2, &b[i__ + i__ * b_dim1], ldb, &b[b_offset], ldb, &work[n1 * n2 + 1], &n2, &dscale, &dif[2], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); } else { /* Solve the transposed variant. */ i__1 = *lwork - (n1 << 1) * n2; _starpu_dtgsyl_("T", &ijb, &n2, &n1, &a[i__ + i__ * a_dim1], lda, &a[a_offset], lda, &work[1], &n2, &b[i__ + i__ * b_dim1], ldb, &b[b_offset], ldb, &work[n1 * n2 + 1], &n2, &dscale, &dif[2], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); } goto L50; } dif[2] = dscale / dif[2]; } } L60: /* Compute generalized eigenvalues of reordered pair (A, B) and */ /* normalize the generalized Schur form. */ pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { if (k < *n) { if (a[k + 1 + k * a_dim1] != 0.) { pair = TRUE_; } } if (pair) { /* Compute the eigenvalue(s) at position K. */ work[1] = a[k + k * a_dim1]; work[2] = a[k + 1 + k * a_dim1]; work[3] = a[k + (k + 1) * a_dim1]; work[4] = a[k + 1 + (k + 1) * a_dim1]; work[5] = b[k + k * b_dim1]; work[6] = b[k + 1 + k * b_dim1]; work[7] = b[k + (k + 1) * b_dim1]; work[8] = b[k + 1 + (k + 1) * b_dim1]; d__1 = smlnum * eps; _starpu_dlag2_(&work[1], &c__2, &work[5], &c__2, &d__1, &beta[k], & beta[k + 1], &alphar[k], &alphar[k + 1], &alphai[k]); alphai[k + 1] = -alphai[k]; } else { if (d_sign(&c_b28, &b[k + k * b_dim1]) < 0.) { /* If B(K,K) is negative, make it positive */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { a[k + i__ * a_dim1] = -a[k + i__ * a_dim1]; b[k + i__ * b_dim1] = -b[k + i__ * b_dim1]; if (*wantq) { q[i__ + k * q_dim1] = -q[i__ + k * q_dim1]; } /* L70: */ } } alphar[k] = a[k + k * a_dim1]; alphai[k] = 0.; beta[k] = b[k + k * b_dim1]; } } /* L80: */ } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DTGSEN */ } /* _starpu_dtgsen_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtgsja.c000066400000000000000000000460001507764646700206500ustar00rootroot00000000000000/* dtgsja.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b13 = 0.; static doublereal c_b14 = 1.; static integer c__1 = 1; static doublereal c_b43 = -1.; /* Subroutine */ int _starpu_dtgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *tola, doublereal *tolb, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer * ldq, doublereal *work, integer *ncycle, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; doublereal d__1; /* Local variables */ integer i__, j; doublereal a1, a2, a3, b1, b2, b3, csq, csu, csv, snq, rwk, snu, snv; extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); doublereal gamma; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); logical initq, initu, initv, wantq, upper; doublereal error, ssmin; logical wantu, wantv; extern /* Subroutine */ int _starpu_dlags2_(logical *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlapll_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *); integer kcycle; extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTGSJA computes the generalized singular value decomposition (GSVD) */ /* of two real upper triangular (or trapezoidal) matrices A and B. */ /* On entry, it is assumed that matrices A and B have the following */ /* forms, which may be obtained by the preprocessing subroutine DGGSVP */ /* from a general M-by-N matrix A and P-by-N matrix B: */ /* N-K-L K L */ /* A = K ( 0 A12 A13 ) if M-K-L >= 0; */ /* L ( 0 0 A23 ) */ /* M-K-L ( 0 0 0 ) */ /* N-K-L K L */ /* A = K ( 0 A12 A13 ) if M-K-L < 0; */ /* M-K ( 0 0 A23 ) */ /* N-K-L K L */ /* B = L ( 0 0 B13 ) */ /* P-L ( 0 0 0 ) */ /* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */ /* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */ /* otherwise A23 is (M-K)-by-L upper trapezoidal. */ /* On exit, */ /* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ), */ /* where U, V and Q are orthogonal matrices, Z' denotes the transpose */ /* of Z, R is a nonsingular upper triangular matrix, and D1 and D2 are */ /* ``diagonal'' matrices, which are of the following structures: */ /* If M-K-L >= 0, */ /* K L */ /* D1 = K ( I 0 ) */ /* L ( 0 C ) */ /* M-K-L ( 0 0 ) */ /* K L */ /* D2 = L ( 0 S ) */ /* P-L ( 0 0 ) */ /* N-K-L K L */ /* ( 0 R ) = K ( 0 R11 R12 ) K */ /* L ( 0 0 R22 ) L */ /* where */ /* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */ /* S = diag( BETA(K+1), ... , BETA(K+L) ), */ /* C**2 + S**2 = I. */ /* R is stored in A(1:K+L,N-K-L+1:N) on exit. */ /* If M-K-L < 0, */ /* K M-K K+L-M */ /* D1 = K ( I 0 0 ) */ /* M-K ( 0 C 0 ) */ /* K M-K K+L-M */ /* D2 = M-K ( 0 S 0 ) */ /* K+L-M ( 0 0 I ) */ /* P-L ( 0 0 0 ) */ /* N-K-L K M-K K+L-M */ /* ( 0 R ) = K ( 0 R11 R12 R13 ) */ /* M-K ( 0 0 R22 R23 ) */ /* K+L-M ( 0 0 0 R33 ) */ /* where */ /* C = diag( ALPHA(K+1), ... , ALPHA(M) ), */ /* S = diag( BETA(K+1), ... , BETA(M) ), */ /* C**2 + S**2 = I. */ /* R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored */ /* ( 0 R22 R23 ) */ /* in B(M-K+1:L,N+M-K-L+1:N) on exit. */ /* The computation of the orthogonal transformation matrices U, V or Q */ /* is optional. These matrices may either be formed explicitly, or they */ /* may be postmultiplied into input matrices U1, V1, or Q1. */ /* Arguments */ /* ========= */ /* JOBU (input) CHARACTER*1 */ /* = 'U': U must contain an orthogonal matrix U1 on entry, and */ /* the product U1*U is returned; */ /* = 'I': U is initialized to the unit matrix, and the */ /* orthogonal matrix U is returned; */ /* = 'N': U is not computed. */ /* JOBV (input) CHARACTER*1 */ /* = 'V': V must contain an orthogonal matrix V1 on entry, and */ /* the product V1*V is returned; */ /* = 'I': V is initialized to the unit matrix, and the */ /* orthogonal matrix V is returned; */ /* = 'N': V is not computed. */ /* JOBQ (input) CHARACTER*1 */ /* = 'Q': Q must contain an orthogonal matrix Q1 on entry, and */ /* the product Q1*Q is returned; */ /* = 'I': Q is initialized to the unit matrix, and the */ /* orthogonal matrix Q is returned; */ /* = 'N': Q is not computed. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* P (input) INTEGER */ /* The number of rows of the matrix B. P >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrices A and B. N >= 0. */ /* K (input) INTEGER */ /* L (input) INTEGER */ /* K and L specify the subblocks in the input matrices A and B: */ /* A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N) */ /* of A and B, whose GSVD is going to be computed by DTGSJA. */ /* See Further details. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular */ /* matrix R or part of R. See Purpose for details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ /* On entry, the P-by-N matrix B. */ /* On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains */ /* a part of R. See Purpose for details. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,P). */ /* TOLA (input) DOUBLE PRECISION */ /* TOLB (input) DOUBLE PRECISION */ /* TOLA and TOLB are the convergence criteria for the Jacobi- */ /* Kogbetliantz iteration procedure. Generally, they are the */ /* same as used in the preprocessing step, say */ /* TOLA = max(M,N)*norm(A)*MAZHEPS, */ /* TOLB = max(P,N)*norm(B)*MAZHEPS. */ /* ALPHA (output) DOUBLE PRECISION array, dimension (N) */ /* BETA (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, ALPHA and BETA contain the generalized singular */ /* value pairs of A and B; */ /* ALPHA(1:K) = 1, */ /* BETA(1:K) = 0, */ /* and if M-K-L >= 0, */ /* ALPHA(K+1:K+L) = diag(C), */ /* BETA(K+1:K+L) = diag(S), */ /* or if M-K-L < 0, */ /* ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 */ /* BETA(K+1:M) = S, BETA(M+1:K+L) = 1. */ /* Furthermore, if K+L < N, */ /* ALPHA(K+L+1:N) = 0 and */ /* BETA(K+L+1:N) = 0. */ /* U (input/output) DOUBLE PRECISION array, dimension (LDU,M) */ /* On entry, if JOBU = 'U', U must contain a matrix U1 (usually */ /* the orthogonal matrix returned by DGGSVP). */ /* On exit, */ /* if JOBU = 'I', U contains the orthogonal matrix U; */ /* if JOBU = 'U', U contains the product U1*U. */ /* If JOBU = 'N', U is not referenced. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= max(1,M) if */ /* JOBU = 'U'; LDU >= 1 otherwise. */ /* V (input/output) DOUBLE PRECISION array, dimension (LDV,P) */ /* On entry, if JOBV = 'V', V must contain a matrix V1 (usually */ /* the orthogonal matrix returned by DGGSVP). */ /* On exit, */ /* if JOBV = 'I', V contains the orthogonal matrix V; */ /* if JOBV = 'V', V contains the product V1*V. */ /* If JOBV = 'N', V is not referenced. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. LDV >= max(1,P) if */ /* JOBV = 'V'; LDV >= 1 otherwise. */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually */ /* the orthogonal matrix returned by DGGSVP). */ /* On exit, */ /* if JOBQ = 'I', Q contains the orthogonal matrix Q; */ /* if JOBQ = 'Q', Q contains the product Q1*Q. */ /* If JOBQ = 'N', Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N) if */ /* JOBQ = 'Q'; LDQ >= 1 otherwise. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* NCYCLE (output) INTEGER */ /* The number of cycles required for convergence. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1: the procedure does not converge after MAXIT cycles. */ /* Internal Parameters */ /* =================== */ /* MAXIT INTEGER */ /* MAXIT specifies the total loops that the iterative procedure */ /* may take. If after MAXIT cycles, the routine fails to */ /* converge, we return INFO = 1. */ /* Further Details */ /* =============== */ /* DTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce */ /* min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L */ /* matrix B13 to the form: */ /* U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1, */ /* where U1, V1 and Q1 are orthogonal matrix, and Z' is the transpose */ /* of Z. C1 and S1 are diagonal matrices satisfying */ /* C1**2 + S1**2 = I, */ /* and R1 is an L-by-L nonsingular upper triangular matrix. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alpha; --beta; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --work; /* Function Body */ initu = _starpu_lsame_(jobu, "I"); wantu = initu || _starpu_lsame_(jobu, "U"); initv = _starpu_lsame_(jobv, "I"); wantv = initv || _starpu_lsame_(jobv, "V"); initq = _starpu_lsame_(jobq, "I"); wantq = initq || _starpu_lsame_(jobq, "Q"); *info = 0; if (! (initu || wantu || _starpu_lsame_(jobu, "N"))) { *info = -1; } else if (! (initv || wantv || _starpu_lsame_(jobv, "N"))) { *info = -2; } else if (! (initq || wantq || _starpu_lsame_(jobq, "N"))) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*p < 0) { *info = -5; } else if (*n < 0) { *info = -6; } else if (*lda < max(1,*m)) { *info = -10; } else if (*ldb < max(1,*p)) { *info = -12; } else if (*ldu < 1 || wantu && *ldu < *m) { *info = -18; } else if (*ldv < 1 || wantv && *ldv < *p) { *info = -20; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -22; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTGSJA", &i__1); return 0; } /* Initialize U, V and Q, if necessary */ if (initu) { _starpu_dlaset_("Full", m, m, &c_b13, &c_b14, &u[u_offset], ldu); } if (initv) { _starpu_dlaset_("Full", p, p, &c_b13, &c_b14, &v[v_offset], ldv); } if (initq) { _starpu_dlaset_("Full", n, n, &c_b13, &c_b14, &q[q_offset], ldq); } /* Loop until convergence */ upper = FALSE_; for (kcycle = 1; kcycle <= 40; ++kcycle) { upper = ! upper; i__1 = *l - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *l; for (j = i__ + 1; j <= i__2; ++j) { a1 = 0.; a2 = 0.; a3 = 0.; if (*k + i__ <= *m) { a1 = a[*k + i__ + (*n - *l + i__) * a_dim1]; } if (*k + j <= *m) { a3 = a[*k + j + (*n - *l + j) * a_dim1]; } b1 = b[i__ + (*n - *l + i__) * b_dim1]; b3 = b[j + (*n - *l + j) * b_dim1]; if (upper) { if (*k + i__ <= *m) { a2 = a[*k + i__ + (*n - *l + j) * a_dim1]; } b2 = b[i__ + (*n - *l + j) * b_dim1]; } else { if (*k + j <= *m) { a2 = a[*k + j + (*n - *l + i__) * a_dim1]; } b2 = b[j + (*n - *l + i__) * b_dim1]; } _starpu_dlags2_(&upper, &a1, &a2, &a3, &b1, &b2, &b3, &csu, &snu, & csv, &snv, &csq, &snq); /* Update (K+I)-th and (K+J)-th rows of matrix A: U'*A */ if (*k + j <= *m) { _starpu_drot_(l, &a[*k + j + (*n - *l + 1) * a_dim1], lda, &a[*k + i__ + (*n - *l + 1) * a_dim1], lda, &csu, &snu); } /* Update I-th and J-th rows of matrix B: V'*B */ _starpu_drot_(l, &b[j + (*n - *l + 1) * b_dim1], ldb, &b[i__ + (*n - * l + 1) * b_dim1], ldb, &csv, &snv); /* Update (N-L+I)-th and (N-L+J)-th columns of matrices */ /* A and B: A*Q and B*Q */ /* Computing MIN */ i__4 = *k + *l; i__3 = min(i__4,*m); _starpu_drot_(&i__3, &a[(*n - *l + j) * a_dim1 + 1], &c__1, &a[(*n - * l + i__) * a_dim1 + 1], &c__1, &csq, &snq); _starpu_drot_(l, &b[(*n - *l + j) * b_dim1 + 1], &c__1, &b[(*n - *l + i__) * b_dim1 + 1], &c__1, &csq, &snq); if (upper) { if (*k + i__ <= *m) { a[*k + i__ + (*n - *l + j) * a_dim1] = 0.; } b[i__ + (*n - *l + j) * b_dim1] = 0.; } else { if (*k + j <= *m) { a[*k + j + (*n - *l + i__) * a_dim1] = 0.; } b[j + (*n - *l + i__) * b_dim1] = 0.; } /* Update orthogonal matrices U, V, Q, if desired. */ if (wantu && *k + j <= *m) { _starpu_drot_(m, &u[(*k + j) * u_dim1 + 1], &c__1, &u[(*k + i__) * u_dim1 + 1], &c__1, &csu, &snu); } if (wantv) { _starpu_drot_(p, &v[j * v_dim1 + 1], &c__1, &v[i__ * v_dim1 + 1], &c__1, &csv, &snv); } if (wantq) { _starpu_drot_(n, &q[(*n - *l + j) * q_dim1 + 1], &c__1, &q[(*n - * l + i__) * q_dim1 + 1], &c__1, &csq, &snq); } /* L10: */ } /* L20: */ } if (! upper) { /* The matrices A13 and B13 were lower triangular at the start */ /* of the cycle, and are now upper triangular. */ /* Convergence test: test the parallelism of the corresponding */ /* rows of A and B. */ error = 0.; /* Computing MIN */ i__2 = *l, i__3 = *m - *k; i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *l - i__ + 1; _starpu_dcopy_(&i__2, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda, & work[1], &c__1); i__2 = *l - i__ + 1; _starpu_dcopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &work[* l + 1], &c__1); i__2 = *l - i__ + 1; _starpu_dlapll_(&i__2, &work[1], &c__1, &work[*l + 1], &c__1, &ssmin); error = max(error,ssmin); /* L30: */ } if (abs(error) <= min(*tola,*tolb)) { goto L50; } } /* End of cycle loop */ /* L40: */ } /* The algorithm has not converged after MAXIT cycles. */ *info = 1; goto L100; L50: /* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. */ /* Compute the generalized singular value pairs (ALPHA, BETA), and */ /* set the triangular matrix R to array A. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { alpha[i__] = 1.; beta[i__] = 0.; /* L60: */ } /* Computing MIN */ i__2 = *l, i__3 = *m - *k; i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { a1 = a[*k + i__ + (*n - *l + i__) * a_dim1]; b1 = b[i__ + (*n - *l + i__) * b_dim1]; if (a1 != 0.) { gamma = b1 / a1; /* change sign if necessary */ if (gamma < 0.) { i__2 = *l - i__ + 1; _starpu_dscal_(&i__2, &c_b43, &b[i__ + (*n - *l + i__) * b_dim1], ldb) ; if (wantv) { _starpu_dscal_(p, &c_b43, &v[i__ * v_dim1 + 1], &c__1); } } d__1 = abs(gamma); _starpu_dlartg_(&d__1, &c_b14, &beta[*k + i__], &alpha[*k + i__], &rwk); if (alpha[*k + i__] >= beta[*k + i__]) { i__2 = *l - i__ + 1; d__1 = 1. / alpha[*k + i__]; _starpu_dscal_(&i__2, &d__1, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda); } else { i__2 = *l - i__ + 1; d__1 = 1. / beta[*k + i__]; _starpu_dscal_(&i__2, &d__1, &b[i__ + (*n - *l + i__) * b_dim1], ldb); i__2 = *l - i__ + 1; _starpu_dcopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda); } } else { alpha[*k + i__] = 0.; beta[*k + i__] = 1.; i__2 = *l - i__ + 1; _starpu_dcopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda); } /* L70: */ } /* Post-assignment */ i__1 = *k + *l; for (i__ = *m + 1; i__ <= i__1; ++i__) { alpha[i__] = 0.; beta[i__] = 1.; /* L80: */ } if (*k + *l < *n) { i__1 = *n; for (i__ = *k + *l + 1; i__ <= i__1; ++i__) { alpha[i__] = 0.; beta[i__] = 0.; /* L90: */ } } L100: *ncycle = kcycle; return 0; /* End of DTGSJA */ } /* _starpu_dtgsja_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtgsna.c000066400000000000000000000562531507764646700206670ustar00rootroot00000000000000/* dtgsna.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b19 = 1.; static doublereal c_b21 = 0.; static integer c__2 = 2; static logical c_false = FALSE_; static integer c__3 = 3; /* Subroutine */ int _starpu_dtgsna_(char *job, char *howmny, logical *select, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, doublereal * work, integer *lwork, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, k; doublereal c1, c2; integer n1, n2, ks, iz; doublereal eps, beta, cond; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); logical pair; integer ierr; doublereal uhav, uhbv; integer ifst; doublereal lnrm; integer ilst; doublereal rnrm; extern /* Subroutine */ int _starpu_dlag2_(doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); doublereal root1, root2, scale; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal uhavi, uhbvi, tmpii; integer lwmin; logical wants; doublereal tmpir, tmpri, dummy[1], tmprr; extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); doublereal dummy1[1]; extern doublereal _starpu_dlamch_(char *); doublereal alphai, alphar; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *), _starpu_dtgexc_(logical *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *); logical wantbh, wantdf, somcon; doublereal alprqt; extern /* Subroutine */ int _starpu_dtgsyl_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *); doublereal smlnum; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTGSNA estimates reciprocal condition numbers for specified */ /* eigenvalues and/or eigenvectors of a matrix pair (A, B) in */ /* generalized real Schur canonical form (or of any matrix pair */ /* (Q*A*Z', Q*B*Z') with orthogonal matrices Q and Z, where */ /* Z' denotes the transpose of Z. */ /* (A, B) must be in generalized real Schur form (as returned by DGGES), */ /* i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal */ /* blocks. B is upper triangular. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* Specifies whether condition numbers are required for */ /* eigenvalues (S) or eigenvectors (DIF): */ /* = 'E': for eigenvalues only (S); */ /* = 'V': for eigenvectors only (DIF); */ /* = 'B': for both eigenvalues and eigenvectors (S and DIF). */ /* HOWMNY (input) CHARACTER*1 */ /* = 'A': compute condition numbers for all eigenpairs; */ /* = 'S': compute condition numbers for selected eigenpairs */ /* specified by the array SELECT. */ /* SELECT (input) LOGICAL array, dimension (N) */ /* If HOWMNY = 'S', SELECT specifies the eigenpairs for which */ /* condition numbers are required. To select condition numbers */ /* for the eigenpair corresponding to a real eigenvalue w(j), */ /* SELECT(j) must be set to .TRUE.. To select condition numbers */ /* corresponding to a complex conjugate pair of eigenvalues w(j) */ /* and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be */ /* set to .TRUE.. */ /* If HOWMNY = 'A', SELECT is not referenced. */ /* N (input) INTEGER */ /* The order of the square matrix pair (A, B). N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The upper quasi-triangular matrix A in the pair (A,B). */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,N) */ /* The upper triangular matrix B in the pair (A,B). */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* VL (input) DOUBLE PRECISION array, dimension (LDVL,M) */ /* If JOB = 'E' or 'B', VL must contain left eigenvectors of */ /* (A, B), corresponding to the eigenpairs specified by HOWMNY */ /* and SELECT. The eigenvectors must be stored in consecutive */ /* columns of VL, as returned by DTGEVC. */ /* If JOB = 'V', VL is not referenced. */ /* LDVL (input) INTEGER */ /* The leading dimension of the array VL. LDVL >= 1. */ /* If JOB = 'E' or 'B', LDVL >= N. */ /* VR (input) DOUBLE PRECISION array, dimension (LDVR,M) */ /* If JOB = 'E' or 'B', VR must contain right eigenvectors of */ /* (A, B), corresponding to the eigenpairs specified by HOWMNY */ /* and SELECT. The eigenvectors must be stored in consecutive */ /* columns ov VR, as returned by DTGEVC. */ /* If JOB = 'V', VR is not referenced. */ /* LDVR (input) INTEGER */ /* The leading dimension of the array VR. LDVR >= 1. */ /* If JOB = 'E' or 'B', LDVR >= N. */ /* S (output) DOUBLE PRECISION array, dimension (MM) */ /* If JOB = 'E' or 'B', the reciprocal condition numbers of the */ /* selected eigenvalues, stored in consecutive elements of the */ /* array. For a complex conjugate pair of eigenvalues two */ /* consecutive elements of S are set to the same value. Thus */ /* S(j), DIF(j), and the j-th columns of VL and VR all */ /* correspond to the same eigenpair (but not in general the */ /* j-th eigenpair, unless all eigenpairs are selected). */ /* If JOB = 'V', S is not referenced. */ /* DIF (output) DOUBLE PRECISION array, dimension (MM) */ /* If JOB = 'V' or 'B', the estimated reciprocal condition */ /* numbers of the selected eigenvectors, stored in consecutive */ /* elements of the array. For a complex eigenvector two */ /* consecutive elements of DIF are set to the same value. If */ /* the eigenvalues cannot be reordered to compute DIF(j), DIF(j) */ /* is set to 0; this can only occur when the true value would be */ /* very small anyway. */ /* If JOB = 'E', DIF is not referenced. */ /* MM (input) INTEGER */ /* The number of elements in the arrays S and DIF. MM >= M. */ /* M (output) INTEGER */ /* The number of elements of the arrays S and DIF used to store */ /* the specified condition numbers; for each selected real */ /* eigenvalue one element is used, and for each selected complex */ /* conjugate pair of eigenvalues, two elements are used. */ /* If HOWMNY = 'A', M is set to N. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N). */ /* If JOB = 'V' or 'B' LWORK >= 2*N*(N+2)+16. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (N + 6) */ /* If JOB = 'E', IWORK is not referenced. */ /* INFO (output) INTEGER */ /* =0: Successful exit */ /* <0: If INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The reciprocal of the condition number of a generalized eigenvalue */ /* w = (a, b) is defined as */ /* S(w) = (|u'Av|**2 + |u'Bv|**2)**(1/2) / (norm(u)*norm(v)) */ /* where u and v are the left and right eigenvectors of (A, B) */ /* corresponding to w; |z| denotes the absolute value of the complex */ /* number, and norm(u) denotes the 2-norm of the vector u. */ /* The pair (a, b) corresponds to an eigenvalue w = a/b (= u'Av/u'Bv) */ /* of the matrix pair (A, B). If both a and b equal zero, then (A B) is */ /* singular and S(I) = -1 is returned. */ /* An approximate error bound on the chordal distance between the i-th */ /* computed generalized eigenvalue w and the corresponding exact */ /* eigenvalue lambda is */ /* chord(w, lambda) <= EPS * norm(A, B) / S(I) */ /* where EPS is the machine precision. */ /* The reciprocal of the condition number DIF(i) of right eigenvector u */ /* and left eigenvector v corresponding to the generalized eigenvalue w */ /* is defined as follows: */ /* a) If the i-th eigenvalue w = (a,b) is real */ /* Suppose U and V are orthogonal transformations such that */ /* U'*(A, B)*V = (S, T) = ( a * ) ( b * ) 1 */ /* ( 0 S22 ),( 0 T22 ) n-1 */ /* 1 n-1 1 n-1 */ /* Then the reciprocal condition number DIF(i) is */ /* Difl((a, b), (S22, T22)) = sigma-min( Zl ), */ /* where sigma-min(Zl) denotes the smallest singular value of the */ /* 2(n-1)-by-2(n-1) matrix */ /* Zl = [ kron(a, In-1) -kron(1, S22) ] */ /* [ kron(b, In-1) -kron(1, T22) ] . */ /* Here In-1 is the identity matrix of size n-1. kron(X, Y) is the */ /* Kronecker product between the matrices X and Y. */ /* Note that if the default method for computing DIF(i) is wanted */ /* (see DLATDF), then the parameter DIFDRI (see below) should be */ /* changed from 3 to 4 (routine DLATDF(IJOB = 2 will be used)). */ /* See DTGSYL for more details. */ /* b) If the i-th and (i+1)-th eigenvalues are complex conjugate pair, */ /* Suppose U and V are orthogonal transformations such that */ /* U'*(A, B)*V = (S, T) = ( S11 * ) ( T11 * ) 2 */ /* ( 0 S22 ),( 0 T22) n-2 */ /* 2 n-2 2 n-2 */ /* and (S11, T11) corresponds to the complex conjugate eigenvalue */ /* pair (w, conjg(w)). There exist unitary matrices U1 and V1 such */ /* that */ /* U1'*S11*V1 = ( s11 s12 ) and U1'*T11*V1 = ( t11 t12 ) */ /* ( 0 s22 ) ( 0 t22 ) */ /* where the generalized eigenvalues w = s11/t11 and */ /* conjg(w) = s22/t22. */ /* Then the reciprocal condition number DIF(i) is bounded by */ /* min( d1, max( 1, |real(s11)/real(s22)| )*d2 ) */ /* where, d1 = Difl((s11, t11), (s22, t22)) = sigma-min(Z1), where */ /* Z1 is the complex 2-by-2 matrix */ /* Z1 = [ s11 -s22 ] */ /* [ t11 -t22 ], */ /* This is done by computing (using real arithmetic) the */ /* roots of the characteristical polynomial det(Z1' * Z1 - lambda I), */ /* where Z1' denotes the conjugate transpose of Z1 and det(X) denotes */ /* the determinant of X. */ /* and d2 is an upper bound on Difl((S11, T11), (S22, T22)), i.e. an */ /* upper bound on sigma-min(Z2), where Z2 is (2n-2)-by-(2n-2) */ /* Z2 = [ kron(S11', In-2) -kron(I2, S22) ] */ /* [ kron(T11', In-2) -kron(I2, T22) ] */ /* Note that if the default method for computing DIF is wanted (see */ /* DLATDF), then the parameter DIFDRI (see below) should be changed */ /* from 3 to 4 (routine DLATDF(IJOB = 2 will be used)). See DTGSYL */ /* for more details. */ /* For each eigenvalue/vector specified by SELECT, DIF stores a */ /* Frobenius norm-based estimate of Difl. */ /* An approximate error bound for the i-th computed eigenvector VL(i) or */ /* VR(i) is given by */ /* EPS * norm(A, B) / DIF(i). */ /* See ref. [2-3] for more details and further references. */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* References */ /* ========== */ /* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */ /* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */ /* M.S. Moonen et al (eds), Linear Algebra for Large Scale and */ /* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */ /* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */ /* Eigenvalues of a Regular Matrix Pair (A, B) and Condition */ /* Estimation: Theory, Algorithms and Software, */ /* Report UMINF - 94.04, Department of Computing Science, Umea */ /* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working */ /* Note 87. To appear in Numerical Algorithms, 1996. */ /* [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software */ /* for Solving the Generalized Sylvester Equation and Estimating the */ /* Separation between Regular Matrix Pairs, Report UMINF - 93.23, */ /* Department of Computing Science, Umea University, S-901 87 Umea, */ /* Sweden, December 1993, Revised April 1994, Also as LAPACK Working */ /* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, */ /* No 1, 1996. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test the input parameters */ /* Parameter adjustments */ --select; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --s; --dif; --work; --iwork; /* Function Body */ wantbh = _starpu_lsame_(job, "B"); wants = _starpu_lsame_(job, "E") || wantbh; wantdf = _starpu_lsame_(job, "V") || wantbh; somcon = _starpu_lsame_(howmny, "S"); *info = 0; lquery = *lwork == -1; if (! wants && ! wantdf) { *info = -1; } else if (! _starpu_lsame_(howmny, "A") && ! somcon) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } else if (wants && *ldvl < *n) { *info = -10; } else if (wants && *ldvr < *n) { *info = -12; } else { /* Set M to the number of eigenpairs for which condition numbers */ /* are required, and test MM. */ if (somcon) { *m = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { if (k < *n) { if (a[k + 1 + k * a_dim1] == 0.) { if (select[k]) { ++(*m); } } else { pair = TRUE_; if (select[k] || select[k + 1]) { *m += 2; } } } else { if (select[*n]) { ++(*m); } } } /* L10: */ } } else { *m = *n; } if (*n == 0) { lwmin = 1; } else if (_starpu_lsame_(job, "V") || _starpu_lsame_(job, "B")) { lwmin = (*n << 1) * (*n + 2) + 16; } else { lwmin = *n; } work[1] = (doublereal) lwmin; if (*mm < *m) { *info = -15; } else if (*lwork < lwmin && ! lquery) { *info = -18; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTGSNA", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S") / eps; ks = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Determine whether A(k,k) begins a 1-by-1 or 2-by-2 block. */ if (pair) { pair = FALSE_; goto L20; } else { if (k < *n) { pair = a[k + 1 + k * a_dim1] != 0.; } } /* Determine whether condition numbers are required for the k-th */ /* eigenpair. */ if (somcon) { if (pair) { if (! select[k] && ! select[k + 1]) { goto L20; } } else { if (! select[k]) { goto L20; } } } ++ks; if (wants) { /* Compute the reciprocal condition number of the k-th */ /* eigenvalue. */ if (pair) { /* Complex eigenvalue pair. */ d__1 = _starpu_dnrm2_(n, &vr[ks * vr_dim1 + 1], &c__1); d__2 = _starpu_dnrm2_(n, &vr[(ks + 1) * vr_dim1 + 1], &c__1); rnrm = _starpu_dlapy2_(&d__1, &d__2); d__1 = _starpu_dnrm2_(n, &vl[ks * vl_dim1 + 1], &c__1); d__2 = _starpu_dnrm2_(n, &vl[(ks + 1) * vl_dim1 + 1], &c__1); lnrm = _starpu_dlapy2_(&d__1, &d__2); _starpu_dgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[ks * vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1); tmprr = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], & c__1); tmpri = _starpu_ddot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], &c__1); _starpu_dgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[(ks + 1) * vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1); tmpii = _starpu_ddot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], &c__1); tmpir = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], & c__1); uhav = tmprr + tmpii; uhavi = tmpir - tmpri; _starpu_dgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[ks * vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1); tmprr = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], & c__1); tmpri = _starpu_ddot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], &c__1); _starpu_dgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[(ks + 1) * vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1); tmpii = _starpu_ddot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], &c__1); tmpir = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], & c__1); uhbv = tmprr + tmpii; uhbvi = tmpir - tmpri; uhav = _starpu_dlapy2_(&uhav, &uhavi); uhbv = _starpu_dlapy2_(&uhbv, &uhbvi); cond = _starpu_dlapy2_(&uhav, &uhbv); s[ks] = cond / (rnrm * lnrm); s[ks + 1] = s[ks]; } else { /* Real eigenvalue. */ rnrm = _starpu_dnrm2_(n, &vr[ks * vr_dim1 + 1], &c__1); lnrm = _starpu_dnrm2_(n, &vl[ks * vl_dim1 + 1], &c__1); _starpu_dgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[ks * vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1); uhav = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &c__1) ; _starpu_dgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[ks * vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1); uhbv = _starpu_ddot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &c__1) ; cond = _starpu_dlapy2_(&uhav, &uhbv); if (cond == 0.) { s[ks] = -1.; } else { s[ks] = cond / (rnrm * lnrm); } } } if (wantdf) { if (*n == 1) { dif[ks] = _starpu_dlapy2_(&a[a_dim1 + 1], &b[b_dim1 + 1]); goto L20; } /* Estimate the reciprocal condition number of the k-th */ /* eigenvectors. */ if (pair) { /* Copy the 2-by 2 pencil beginning at (A(k,k), B(k, k)). */ /* Compute the eigenvalue(s) at position K. */ work[1] = a[k + k * a_dim1]; work[2] = a[k + 1 + k * a_dim1]; work[3] = a[k + (k + 1) * a_dim1]; work[4] = a[k + 1 + (k + 1) * a_dim1]; work[5] = b[k + k * b_dim1]; work[6] = b[k + 1 + k * b_dim1]; work[7] = b[k + (k + 1) * b_dim1]; work[8] = b[k + 1 + (k + 1) * b_dim1]; d__1 = smlnum * eps; _starpu_dlag2_(&work[1], &c__2, &work[5], &c__2, &d__1, &beta, dummy1, &alphar, dummy, &alphai); alprqt = 1.; c1 = (alphar * alphar + alphai * alphai + beta * beta) * 2.; c2 = beta * 4. * beta * alphai * alphai; root1 = c1 + sqrt(c1 * c1 - c2 * 4.); root2 = c2 / root1; root1 /= 2.; /* Computing MIN */ d__1 = sqrt(root1), d__2 = sqrt(root2); cond = min(d__1,d__2); } /* Copy the matrix (A, B) to the array WORK and swap the */ /* diagonal block beginning at A(k,k) to the (1,1) position. */ _starpu_dlacpy_("Full", n, n, &a[a_offset], lda, &work[1], n); _starpu_dlacpy_("Full", n, n, &b[b_offset], ldb, &work[*n * *n + 1], n); ifst = k; ilst = 1; i__2 = *lwork - (*n << 1) * *n; _starpu_dtgexc_(&c_false, &c_false, n, &work[1], n, &work[*n * *n + 1], n, dummy, &c__1, dummy1, &c__1, &ifst, &ilst, &work[(*n * * n << 1) + 1], &i__2, &ierr); if (ierr > 0) { /* Ill-conditioned problem - swap rejected. */ dif[ks] = 0.; } else { /* Reordering successful, solve generalized Sylvester */ /* equation for R and L, */ /* A22 * R - L * A11 = A12 */ /* B22 * R - L * B11 = B12, */ /* and compute estimate of Difl((A11,B11), (A22, B22)). */ n1 = 1; if (work[2] != 0.) { n1 = 2; } n2 = *n - n1; if (n2 == 0) { dif[ks] = cond; } else { i__ = *n * *n + 1; iz = (*n << 1) * *n + 1; i__2 = *lwork - (*n << 1) * *n; _starpu_dtgsyl_("N", &c__3, &n2, &n1, &work[*n * n1 + n1 + 1], n, &work[1], n, &work[n1 + 1], n, &work[*n * n1 + n1 + i__], n, &work[i__], n, &work[n1 + i__], n, & scale, &dif[ks], &work[iz + 1], &i__2, &iwork[1], &ierr); if (pair) { /* Computing MIN */ d__1 = max(1.,alprqt) * dif[ks]; dif[ks] = min(d__1,cond); } } } if (pair) { dif[ks + 1] = dif[ks]; } } if (pair) { ++ks; } L20: ; } work[1] = (doublereal) lwmin; return 0; /* End of DTGSNA */ } /* _starpu_dtgsna_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtgsy2.c000066400000000000000000001006671507764646700206220ustar00rootroot00000000000000/* dtgsy2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__8 = 8; static integer c__1 = 1; static doublereal c_b27 = -1.; static doublereal c_b42 = 1.; static doublereal c_b56 = 0.; /* Subroutine */ int _starpu_dtgsy2_(char *trans, integer *ijob, integer *m, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * scale, doublereal *rdsum, doublereal *rdscal, integer *iwork, integer *pq, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1, d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, k, p, q; doublereal z__[64] /* was [8][8] */; integer ie, je, mb, nb, ii, jj, is, js; doublereal rhs[8]; integer isp1, jsp1; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); integer ierr, zdim, ipiv[8], jpiv[8]; doublereal alpha; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *) , _starpu_dgesc2_(integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *), _starpu_dgetc2_(integer *, doublereal *, integer *, integer *, integer *, integer *), _starpu_dlatdf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); doublereal scaloc; extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); logical notran; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* January 2007 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTGSY2 solves the generalized Sylvester equation: */ /* A * R - L * B = scale * C (1) */ /* D * R - L * E = scale * F, */ /* using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices, */ /* (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, */ /* N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E) */ /* must be in generalized Schur canonical form, i.e. A, B are upper */ /* quasi triangular and D, E are upper triangular. The solution (R, L) */ /* overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor */ /* chosen to avoid overflow. */ /* In matrix notation solving equation (1) corresponds to solve */ /* Z*x = scale*b, where Z is defined as */ /* Z = [ kron(In, A) -kron(B', Im) ] (2) */ /* [ kron(In, D) -kron(E', Im) ], */ /* Ik is the identity matrix of size k and X' is the transpose of X. */ /* kron(X, Y) is the Kronecker product between the matrices X and Y. */ /* In the process of solving (1), we solve a number of such systems */ /* where Dim(In), Dim(In) = 1 or 2. */ /* If TRANS = 'T', solve the transposed system Z'*y = scale*b for y, */ /* which is equivalent to solve for R and L in */ /* A' * R + D' * L = scale * C (3) */ /* R * B' + L * E' = scale * -F */ /* This case is used to compute an estimate of Dif[(A, D), (B, E)] = */ /* sigma_min(Z) using reverse communicaton with DLACON. */ /* DTGSY2 also (IJOB >= 1) contributes to the computation in DTGSYL */ /* of an upper bound on the separation between to matrix pairs. Then */ /* the input (A, D), (B, E) are sub-pencils of the matrix pair in */ /* DTGSYL. See DTGSYL for details. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* = 'N', solve the generalized Sylvester equation (1). */ /* = 'T': solve the 'transposed' system (3). */ /* IJOB (input) INTEGER */ /* Specifies what kind of functionality to be performed. */ /* = 0: solve (1) only. */ /* = 1: A contribution from this subsystem to a Frobenius */ /* norm-based estimate of the separation between two matrix */ /* pairs is computed. (look ahead strategy is used). */ /* = 2: A contribution from this subsystem to a Frobenius */ /* norm-based estimate of the separation between two matrix */ /* pairs is computed. (DGECON on sub-systems is used.) */ /* Not referenced if TRANS = 'T'. */ /* M (input) INTEGER */ /* On entry, M specifies the order of A and D, and the row */ /* dimension of C, F, R and L. */ /* N (input) INTEGER */ /* On entry, N specifies the order of B and E, and the column */ /* dimension of C, F, R and L. */ /* A (input) DOUBLE PRECISION array, dimension (LDA, M) */ /* On entry, A contains an upper quasi triangular matrix. */ /* LDA (input) INTEGER */ /* The leading dimension of the matrix A. LDA >= max(1, M). */ /* B (input) DOUBLE PRECISION array, dimension (LDB, N) */ /* On entry, B contains an upper quasi triangular matrix. */ /* LDB (input) INTEGER */ /* The leading dimension of the matrix B. LDB >= max(1, N). */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC, N) */ /* On entry, C contains the right-hand-side of the first matrix */ /* equation in (1). */ /* On exit, if IJOB = 0, C has been overwritten by the */ /* solution R. */ /* LDC (input) INTEGER */ /* The leading dimension of the matrix C. LDC >= max(1, M). */ /* D (input) DOUBLE PRECISION array, dimension (LDD, M) */ /* On entry, D contains an upper triangular matrix. */ /* LDD (input) INTEGER */ /* The leading dimension of the matrix D. LDD >= max(1, M). */ /* E (input) DOUBLE PRECISION array, dimension (LDE, N) */ /* On entry, E contains an upper triangular matrix. */ /* LDE (input) INTEGER */ /* The leading dimension of the matrix E. LDE >= max(1, N). */ /* F (input/output) DOUBLE PRECISION array, dimension (LDF, N) */ /* On entry, F contains the right-hand-side of the second matrix */ /* equation in (1). */ /* On exit, if IJOB = 0, F has been overwritten by the */ /* solution L. */ /* LDF (input) INTEGER */ /* The leading dimension of the matrix F. LDF >= max(1, M). */ /* SCALE (output) DOUBLE PRECISION */ /* On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions */ /* R and L (C and F on entry) will hold the solutions to a */ /* slightly perturbed system but the input matrices A, B, D and */ /* E have not been changed. If SCALE = 0, R and L will hold the */ /* solutions to the homogeneous system with C = F = 0. Normally, */ /* SCALE = 1. */ /* RDSUM (input/output) DOUBLE PRECISION */ /* On entry, the sum of squares of computed contributions to */ /* the Dif-estimate under computation by DTGSYL, where the */ /* scaling factor RDSCAL (see below) has been factored out. */ /* On exit, the corresponding sum of squares updated with the */ /* contributions from the current sub-system. */ /* If TRANS = 'T' RDSUM is not touched. */ /* NOTE: RDSUM only makes sense when DTGSY2 is called by DTGSYL. */ /* RDSCAL (input/output) DOUBLE PRECISION */ /* On entry, scaling factor used to prevent overflow in RDSUM. */ /* On exit, RDSCAL is updated w.r.t. the current contributions */ /* in RDSUM. */ /* If TRANS = 'T', RDSCAL is not touched. */ /* NOTE: RDSCAL only makes sense when DTGSY2 is called by */ /* DTGSYL. */ /* IWORK (workspace) INTEGER array, dimension (M+N+2) */ /* PQ (output) INTEGER */ /* On exit, the number of subsystems (of size 2-by-2, 4-by-4 and */ /* 8-by-8) solved by this routine. */ /* INFO (output) INTEGER */ /* On exit, if INFO is set to */ /* =0: Successful exit */ /* <0: If INFO = -i, the i-th argument had an illegal value. */ /* >0: The matrix pairs (A, D) and (B, E) have common or very */ /* close eigenvalues. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* ===================================================================== */ /* Replaced various illegal calls to DCOPY by calls to DLASET. */ /* Sven Hammarling, 27/5/02. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; d_dim1 = *ldd; d_offset = 1 + d_dim1; d__ -= d_offset; e_dim1 = *lde; e_offset = 1 + e_dim1; e -= e_offset; f_dim1 = *ldf; f_offset = 1 + f_dim1; f -= f_offset; --iwork; /* Function Body */ *info = 0; ierr = 0; notran = _starpu_lsame_(trans, "N"); if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -1; } else if (notran) { if (*ijob < 0 || *ijob > 2) { *info = -2; } } if (*info == 0) { if (*m <= 0) { *info = -3; } else if (*n <= 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*ldd < max(1,*m)) { *info = -12; } else if (*lde < max(1,*n)) { *info = -14; } else if (*ldf < max(1,*m)) { *info = -16; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTGSY2", &i__1); return 0; } /* Determine block structure of A */ *pq = 0; p = 0; i__ = 1; L10: if (i__ > *m) { goto L20; } ++p; iwork[p] = i__; if (i__ == *m) { goto L20; } if (a[i__ + 1 + i__ * a_dim1] != 0.) { i__ += 2; } else { ++i__; } goto L10; L20: iwork[p + 1] = *m + 1; /* Determine block structure of B */ q = p + 1; j = 1; L30: if (j > *n) { goto L40; } ++q; iwork[q] = j; if (j == *n) { goto L40; } if (b[j + 1 + j * b_dim1] != 0.) { j += 2; } else { ++j; } goto L30; L40: iwork[q + 1] = *n + 1; *pq = p * (q - p - 1); if (notran) { /* Solve (I, J) - subsystem */ /* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) */ /* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) */ /* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q */ *scale = 1.; scaloc = 1.; i__1 = q; for (j = p + 2; j <= i__1; ++j) { js = iwork[j]; jsp1 = js + 1; je = iwork[j + 1] - 1; nb = je - js + 1; for (i__ = p; i__ >= 1; --i__) { is = iwork[i__]; isp1 = is + 1; ie = iwork[i__ + 1] - 1; mb = ie - is + 1; zdim = mb * nb << 1; if (mb == 1 && nb == 1) { /* Build a 2-by-2 system Z * x = RHS */ z__[0] = a[is + is * a_dim1]; z__[1] = d__[is + is * d_dim1]; z__[8] = -b[js + js * b_dim1]; z__[9] = -e[js + js * e_dim1]; /* Set up right hand side(s) */ rhs[0] = c__[is + js * c_dim1]; rhs[1] = f[is + js * f_dim1]; /* Solve Z * x = RHS */ _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); if (ierr > 0) { *info = ierr; } if (*ijob == 0) { _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); if (scaloc != 1.) { i__2 = *n; for (k = 1; k <= i__2; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], & c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L50: */ } *scale *= scaloc; } } else { _starpu_dlatdf_(ijob, &zdim, z__, &c__8, rhs, rdsum, rdscal, ipiv, jpiv); } /* Unpack solution vector(s) */ c__[is + js * c_dim1] = rhs[0]; f[is + js * f_dim1] = rhs[1]; /* Substitute R(I, J) and L(I, J) into remaining */ /* equation. */ if (i__ > 1) { alpha = -rhs[0]; i__2 = is - 1; _starpu_daxpy_(&i__2, &alpha, &a[is * a_dim1 + 1], &c__1, & c__[js * c_dim1 + 1], &c__1); i__2 = is - 1; _starpu_daxpy_(&i__2, &alpha, &d__[is * d_dim1 + 1], &c__1, & f[js * f_dim1 + 1], &c__1); } if (j < q) { i__2 = *n - je; _starpu_daxpy_(&i__2, &rhs[1], &b[js + (je + 1) * b_dim1], ldb, &c__[is + (je + 1) * c_dim1], ldc); i__2 = *n - je; _starpu_daxpy_(&i__2, &rhs[1], &e[js + (je + 1) * e_dim1], lde, &f[is + (je + 1) * f_dim1], ldf); } } else if (mb == 1 && nb == 2) { /* Build a 4-by-4 system Z * x = RHS */ z__[0] = a[is + is * a_dim1]; z__[1] = 0.; z__[2] = d__[is + is * d_dim1]; z__[3] = 0.; z__[8] = 0.; z__[9] = a[is + is * a_dim1]; z__[10] = 0.; z__[11] = d__[is + is * d_dim1]; z__[16] = -b[js + js * b_dim1]; z__[17] = -b[js + jsp1 * b_dim1]; z__[18] = -e[js + js * e_dim1]; z__[19] = -e[js + jsp1 * e_dim1]; z__[24] = -b[jsp1 + js * b_dim1]; z__[25] = -b[jsp1 + jsp1 * b_dim1]; z__[26] = 0.; z__[27] = -e[jsp1 + jsp1 * e_dim1]; /* Set up right hand side(s) */ rhs[0] = c__[is + js * c_dim1]; rhs[1] = c__[is + jsp1 * c_dim1]; rhs[2] = f[is + js * f_dim1]; rhs[3] = f[is + jsp1 * f_dim1]; /* Solve Z * x = RHS */ _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); if (ierr > 0) { *info = ierr; } if (*ijob == 0) { _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); if (scaloc != 1.) { i__2 = *n; for (k = 1; k <= i__2; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], & c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L60: */ } *scale *= scaloc; } } else { _starpu_dlatdf_(ijob, &zdim, z__, &c__8, rhs, rdsum, rdscal, ipiv, jpiv); } /* Unpack solution vector(s) */ c__[is + js * c_dim1] = rhs[0]; c__[is + jsp1 * c_dim1] = rhs[1]; f[is + js * f_dim1] = rhs[2]; f[is + jsp1 * f_dim1] = rhs[3]; /* Substitute R(I, J) and L(I, J) into remaining */ /* equation. */ if (i__ > 1) { i__2 = is - 1; _starpu_dger_(&i__2, &nb, &c_b27, &a[is * a_dim1 + 1], &c__1, rhs, &c__1, &c__[js * c_dim1 + 1], ldc); i__2 = is - 1; _starpu_dger_(&i__2, &nb, &c_b27, &d__[is * d_dim1 + 1], & c__1, rhs, &c__1, &f[js * f_dim1 + 1], ldf); } if (j < q) { i__2 = *n - je; _starpu_daxpy_(&i__2, &rhs[2], &b[js + (je + 1) * b_dim1], ldb, &c__[is + (je + 1) * c_dim1], ldc); i__2 = *n - je; _starpu_daxpy_(&i__2, &rhs[2], &e[js + (je + 1) * e_dim1], lde, &f[is + (je + 1) * f_dim1], ldf); i__2 = *n - je; _starpu_daxpy_(&i__2, &rhs[3], &b[jsp1 + (je + 1) * b_dim1], ldb, &c__[is + (je + 1) * c_dim1], ldc); i__2 = *n - je; _starpu_daxpy_(&i__2, &rhs[3], &e[jsp1 + (je + 1) * e_dim1], lde, &f[is + (je + 1) * f_dim1], ldf); } } else if (mb == 2 && nb == 1) { /* Build a 4-by-4 system Z * x = RHS */ z__[0] = a[is + is * a_dim1]; z__[1] = a[isp1 + is * a_dim1]; z__[2] = d__[is + is * d_dim1]; z__[3] = 0.; z__[8] = a[is + isp1 * a_dim1]; z__[9] = a[isp1 + isp1 * a_dim1]; z__[10] = d__[is + isp1 * d_dim1]; z__[11] = d__[isp1 + isp1 * d_dim1]; z__[16] = -b[js + js * b_dim1]; z__[17] = 0.; z__[18] = -e[js + js * e_dim1]; z__[19] = 0.; z__[24] = 0.; z__[25] = -b[js + js * b_dim1]; z__[26] = 0.; z__[27] = -e[js + js * e_dim1]; /* Set up right hand side(s) */ rhs[0] = c__[is + js * c_dim1]; rhs[1] = c__[isp1 + js * c_dim1]; rhs[2] = f[is + js * f_dim1]; rhs[3] = f[isp1 + js * f_dim1]; /* Solve Z * x = RHS */ _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); if (ierr > 0) { *info = ierr; } if (*ijob == 0) { _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); if (scaloc != 1.) { i__2 = *n; for (k = 1; k <= i__2; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], & c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L70: */ } *scale *= scaloc; } } else { _starpu_dlatdf_(ijob, &zdim, z__, &c__8, rhs, rdsum, rdscal, ipiv, jpiv); } /* Unpack solution vector(s) */ c__[is + js * c_dim1] = rhs[0]; c__[isp1 + js * c_dim1] = rhs[1]; f[is + js * f_dim1] = rhs[2]; f[isp1 + js * f_dim1] = rhs[3]; /* Substitute R(I, J) and L(I, J) into remaining */ /* equation. */ if (i__ > 1) { i__2 = is - 1; _starpu_dgemv_("N", &i__2, &mb, &c_b27, &a[is * a_dim1 + 1], lda, rhs, &c__1, &c_b42, &c__[js * c_dim1 + 1] , &c__1); i__2 = is - 1; _starpu_dgemv_("N", &i__2, &mb, &c_b27, &d__[is * d_dim1 + 1], ldd, rhs, &c__1, &c_b42, &f[js * f_dim1 + 1], &c__1); } if (j < q) { i__2 = *n - je; _starpu_dger_(&mb, &i__2, &c_b42, &rhs[2], &c__1, &b[js + (je + 1) * b_dim1], ldb, &c__[is + (je + 1) * c_dim1], ldc); i__2 = *n - je; _starpu_dger_(&mb, &i__2, &c_b42, &rhs[2], &c__1, &e[js + (je + 1) * e_dim1], lde, &f[is + (je + 1) * f_dim1], ldf); } } else if (mb == 2 && nb == 2) { /* Build an 8-by-8 system Z * x = RHS */ _starpu_dlaset_("F", &c__8, &c__8, &c_b56, &c_b56, z__, &c__8); z__[0] = a[is + is * a_dim1]; z__[1] = a[isp1 + is * a_dim1]; z__[4] = d__[is + is * d_dim1]; z__[8] = a[is + isp1 * a_dim1]; z__[9] = a[isp1 + isp1 * a_dim1]; z__[12] = d__[is + isp1 * d_dim1]; z__[13] = d__[isp1 + isp1 * d_dim1]; z__[18] = a[is + is * a_dim1]; z__[19] = a[isp1 + is * a_dim1]; z__[22] = d__[is + is * d_dim1]; z__[26] = a[is + isp1 * a_dim1]; z__[27] = a[isp1 + isp1 * a_dim1]; z__[30] = d__[is + isp1 * d_dim1]; z__[31] = d__[isp1 + isp1 * d_dim1]; z__[32] = -b[js + js * b_dim1]; z__[34] = -b[js + jsp1 * b_dim1]; z__[36] = -e[js + js * e_dim1]; z__[38] = -e[js + jsp1 * e_dim1]; z__[41] = -b[js + js * b_dim1]; z__[43] = -b[js + jsp1 * b_dim1]; z__[45] = -e[js + js * e_dim1]; z__[47] = -e[js + jsp1 * e_dim1]; z__[48] = -b[jsp1 + js * b_dim1]; z__[50] = -b[jsp1 + jsp1 * b_dim1]; z__[54] = -e[jsp1 + jsp1 * e_dim1]; z__[57] = -b[jsp1 + js * b_dim1]; z__[59] = -b[jsp1 + jsp1 * b_dim1]; z__[63] = -e[jsp1 + jsp1 * e_dim1]; /* Set up right hand side(s) */ k = 1; ii = mb * nb + 1; i__2 = nb - 1; for (jj = 0; jj <= i__2; ++jj) { _starpu_dcopy_(&mb, &c__[is + (js + jj) * c_dim1], &c__1, & rhs[k - 1], &c__1); _starpu_dcopy_(&mb, &f[is + (js + jj) * f_dim1], &c__1, &rhs[ ii - 1], &c__1); k += mb; ii += mb; /* L80: */ } /* Solve Z * x = RHS */ _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); if (ierr > 0) { *info = ierr; } if (*ijob == 0) { _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); if (scaloc != 1.) { i__2 = *n; for (k = 1; k <= i__2; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], & c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L90: */ } *scale *= scaloc; } } else { _starpu_dlatdf_(ijob, &zdim, z__, &c__8, rhs, rdsum, rdscal, ipiv, jpiv); } /* Unpack solution vector(s) */ k = 1; ii = mb * nb + 1; i__2 = nb - 1; for (jj = 0; jj <= i__2; ++jj) { _starpu_dcopy_(&mb, &rhs[k - 1], &c__1, &c__[is + (js + jj) * c_dim1], &c__1); _starpu_dcopy_(&mb, &rhs[ii - 1], &c__1, &f[is + (js + jj) * f_dim1], &c__1); k += mb; ii += mb; /* L100: */ } /* Substitute R(I, J) and L(I, J) into remaining */ /* equation. */ if (i__ > 1) { i__2 = is - 1; _starpu_dgemm_("N", "N", &i__2, &nb, &mb, &c_b27, &a[is * a_dim1 + 1], lda, rhs, &mb, &c_b42, &c__[js * c_dim1 + 1], ldc); i__2 = is - 1; _starpu_dgemm_("N", "N", &i__2, &nb, &mb, &c_b27, &d__[is * d_dim1 + 1], ldd, rhs, &mb, &c_b42, &f[js * f_dim1 + 1], ldf); } if (j < q) { k = mb * nb + 1; i__2 = *n - je; _starpu_dgemm_("N", "N", &mb, &i__2, &nb, &c_b42, &rhs[k - 1], &mb, &b[js + (je + 1) * b_dim1], ldb, &c_b42, &c__[is + (je + 1) * c_dim1], ldc); i__2 = *n - je; _starpu_dgemm_("N", "N", &mb, &i__2, &nb, &c_b42, &rhs[k - 1], &mb, &e[js + (je + 1) * e_dim1], lde, &c_b42, &f[is + (je + 1) * f_dim1], ldf); } } /* L110: */ } /* L120: */ } } else { /* Solve (I, J) - subsystem */ /* A(I, I)' * R(I, J) + D(I, I)' * L(J, J) = C(I, J) */ /* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) */ /* for I = 1, 2, ..., P, J = Q, Q - 1, ..., 1 */ *scale = 1.; scaloc = 1.; i__1 = p; for (i__ = 1; i__ <= i__1; ++i__) { is = iwork[i__]; isp1 = is + 1; ie = i__; mb = ie - is + 1; i__2 = p + 2; for (j = q; j >= i__2; --j) { js = iwork[j]; jsp1 = js + 1; je = iwork[j + 1] - 1; nb = je - js + 1; zdim = mb * nb << 1; if (mb == 1 && nb == 1) { /* Build a 2-by-2 system Z' * x = RHS */ z__[0] = a[is + is * a_dim1]; z__[1] = -b[js + js * b_dim1]; z__[8] = d__[is + is * d_dim1]; z__[9] = -e[js + js * e_dim1]; /* Set up right hand side(s) */ rhs[0] = c__[is + js * c_dim1]; rhs[1] = f[is + js * f_dim1]; /* Solve Z' * x = RHS */ _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); if (ierr > 0) { *info = ierr; } _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); if (scaloc != 1.) { i__3 = *n; for (k = 1; k <= i__3; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L130: */ } *scale *= scaloc; } /* Unpack solution vector(s) */ c__[is + js * c_dim1] = rhs[0]; f[is + js * f_dim1] = rhs[1]; /* Substitute R(I, J) and L(I, J) into remaining */ /* equation. */ if (j > p + 2) { alpha = rhs[0]; i__3 = js - 1; _starpu_daxpy_(&i__3, &alpha, &b[js * b_dim1 + 1], &c__1, &f[ is + f_dim1], ldf); alpha = rhs[1]; i__3 = js - 1; _starpu_daxpy_(&i__3, &alpha, &e[js * e_dim1 + 1], &c__1, &f[ is + f_dim1], ldf); } if (i__ < p) { alpha = -rhs[0]; i__3 = *m - ie; _starpu_daxpy_(&i__3, &alpha, &a[is + (ie + 1) * a_dim1], lda, &c__[ie + 1 + js * c_dim1], &c__1); alpha = -rhs[1]; i__3 = *m - ie; _starpu_daxpy_(&i__3, &alpha, &d__[is + (ie + 1) * d_dim1], ldd, &c__[ie + 1 + js * c_dim1], &c__1); } } else if (mb == 1 && nb == 2) { /* Build a 4-by-4 system Z' * x = RHS */ z__[0] = a[is + is * a_dim1]; z__[1] = 0.; z__[2] = -b[js + js * b_dim1]; z__[3] = -b[jsp1 + js * b_dim1]; z__[8] = 0.; z__[9] = a[is + is * a_dim1]; z__[10] = -b[js + jsp1 * b_dim1]; z__[11] = -b[jsp1 + jsp1 * b_dim1]; z__[16] = d__[is + is * d_dim1]; z__[17] = 0.; z__[18] = -e[js + js * e_dim1]; z__[19] = 0.; z__[24] = 0.; z__[25] = d__[is + is * d_dim1]; z__[26] = -e[js + jsp1 * e_dim1]; z__[27] = -e[jsp1 + jsp1 * e_dim1]; /* Set up right hand side(s) */ rhs[0] = c__[is + js * c_dim1]; rhs[1] = c__[is + jsp1 * c_dim1]; rhs[2] = f[is + js * f_dim1]; rhs[3] = f[is + jsp1 * f_dim1]; /* Solve Z' * x = RHS */ _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); if (ierr > 0) { *info = ierr; } _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); if (scaloc != 1.) { i__3 = *n; for (k = 1; k <= i__3; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L140: */ } *scale *= scaloc; } /* Unpack solution vector(s) */ c__[is + js * c_dim1] = rhs[0]; c__[is + jsp1 * c_dim1] = rhs[1]; f[is + js * f_dim1] = rhs[2]; f[is + jsp1 * f_dim1] = rhs[3]; /* Substitute R(I, J) and L(I, J) into remaining */ /* equation. */ if (j > p + 2) { i__3 = js - 1; _starpu_daxpy_(&i__3, rhs, &b[js * b_dim1 + 1], &c__1, &f[is + f_dim1], ldf); i__3 = js - 1; _starpu_daxpy_(&i__3, &rhs[1], &b[jsp1 * b_dim1 + 1], &c__1, & f[is + f_dim1], ldf); i__3 = js - 1; _starpu_daxpy_(&i__3, &rhs[2], &e[js * e_dim1 + 1], &c__1, &f[ is + f_dim1], ldf); i__3 = js - 1; _starpu_daxpy_(&i__3, &rhs[3], &e[jsp1 * e_dim1 + 1], &c__1, & f[is + f_dim1], ldf); } if (i__ < p) { i__3 = *m - ie; _starpu_dger_(&i__3, &nb, &c_b27, &a[is + (ie + 1) * a_dim1], lda, rhs, &c__1, &c__[ie + 1 + js * c_dim1], ldc); i__3 = *m - ie; _starpu_dger_(&i__3, &nb, &c_b27, &d__[is + (ie + 1) * d_dim1] , ldd, &rhs[2], &c__1, &c__[ie + 1 + js * c_dim1], ldc); } } else if (mb == 2 && nb == 1) { /* Build a 4-by-4 system Z' * x = RHS */ z__[0] = a[is + is * a_dim1]; z__[1] = a[is + isp1 * a_dim1]; z__[2] = -b[js + js * b_dim1]; z__[3] = 0.; z__[8] = a[isp1 + is * a_dim1]; z__[9] = a[isp1 + isp1 * a_dim1]; z__[10] = 0.; z__[11] = -b[js + js * b_dim1]; z__[16] = d__[is + is * d_dim1]; z__[17] = d__[is + isp1 * d_dim1]; z__[18] = -e[js + js * e_dim1]; z__[19] = 0.; z__[24] = 0.; z__[25] = d__[isp1 + isp1 * d_dim1]; z__[26] = 0.; z__[27] = -e[js + js * e_dim1]; /* Set up right hand side(s) */ rhs[0] = c__[is + js * c_dim1]; rhs[1] = c__[isp1 + js * c_dim1]; rhs[2] = f[is + js * f_dim1]; rhs[3] = f[isp1 + js * f_dim1]; /* Solve Z' * x = RHS */ _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); if (ierr > 0) { *info = ierr; } _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); if (scaloc != 1.) { i__3 = *n; for (k = 1; k <= i__3; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L150: */ } *scale *= scaloc; } /* Unpack solution vector(s) */ c__[is + js * c_dim1] = rhs[0]; c__[isp1 + js * c_dim1] = rhs[1]; f[is + js * f_dim1] = rhs[2]; f[isp1 + js * f_dim1] = rhs[3]; /* Substitute R(I, J) and L(I, J) into remaining */ /* equation. */ if (j > p + 2) { i__3 = js - 1; _starpu_dger_(&mb, &i__3, &c_b42, rhs, &c__1, &b[js * b_dim1 + 1], &c__1, &f[is + f_dim1], ldf); i__3 = js - 1; _starpu_dger_(&mb, &i__3, &c_b42, &rhs[2], &c__1, &e[js * e_dim1 + 1], &c__1, &f[is + f_dim1], ldf); } if (i__ < p) { i__3 = *m - ie; _starpu_dgemv_("T", &mb, &i__3, &c_b27, &a[is + (ie + 1) * a_dim1], lda, rhs, &c__1, &c_b42, &c__[ie + 1 + js * c_dim1], &c__1); i__3 = *m - ie; _starpu_dgemv_("T", &mb, &i__3, &c_b27, &d__[is + (ie + 1) * d_dim1], ldd, &rhs[2], &c__1, &c_b42, &c__[ie + 1 + js * c_dim1], &c__1); } } else if (mb == 2 && nb == 2) { /* Build an 8-by-8 system Z' * x = RHS */ _starpu_dlaset_("F", &c__8, &c__8, &c_b56, &c_b56, z__, &c__8); z__[0] = a[is + is * a_dim1]; z__[1] = a[is + isp1 * a_dim1]; z__[4] = -b[js + js * b_dim1]; z__[6] = -b[jsp1 + js * b_dim1]; z__[8] = a[isp1 + is * a_dim1]; z__[9] = a[isp1 + isp1 * a_dim1]; z__[13] = -b[js + js * b_dim1]; z__[15] = -b[jsp1 + js * b_dim1]; z__[18] = a[is + is * a_dim1]; z__[19] = a[is + isp1 * a_dim1]; z__[20] = -b[js + jsp1 * b_dim1]; z__[22] = -b[jsp1 + jsp1 * b_dim1]; z__[26] = a[isp1 + is * a_dim1]; z__[27] = a[isp1 + isp1 * a_dim1]; z__[29] = -b[js + jsp1 * b_dim1]; z__[31] = -b[jsp1 + jsp1 * b_dim1]; z__[32] = d__[is + is * d_dim1]; z__[33] = d__[is + isp1 * d_dim1]; z__[36] = -e[js + js * e_dim1]; z__[41] = d__[isp1 + isp1 * d_dim1]; z__[45] = -e[js + js * e_dim1]; z__[50] = d__[is + is * d_dim1]; z__[51] = d__[is + isp1 * d_dim1]; z__[52] = -e[js + jsp1 * e_dim1]; z__[54] = -e[jsp1 + jsp1 * e_dim1]; z__[59] = d__[isp1 + isp1 * d_dim1]; z__[61] = -e[js + jsp1 * e_dim1]; z__[63] = -e[jsp1 + jsp1 * e_dim1]; /* Set up right hand side(s) */ k = 1; ii = mb * nb + 1; i__3 = nb - 1; for (jj = 0; jj <= i__3; ++jj) { _starpu_dcopy_(&mb, &c__[is + (js + jj) * c_dim1], &c__1, & rhs[k - 1], &c__1); _starpu_dcopy_(&mb, &f[is + (js + jj) * f_dim1], &c__1, &rhs[ ii - 1], &c__1); k += mb; ii += mb; /* L160: */ } /* Solve Z' * x = RHS */ _starpu_dgetc2_(&zdim, z__, &c__8, ipiv, jpiv, &ierr); if (ierr > 0) { *info = ierr; } _starpu_dgesc2_(&zdim, z__, &c__8, rhs, ipiv, jpiv, &scaloc); if (scaloc != 1.) { i__3 = *n; for (k = 1; k <= i__3; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L170: */ } *scale *= scaloc; } /* Unpack solution vector(s) */ k = 1; ii = mb * nb + 1; i__3 = nb - 1; for (jj = 0; jj <= i__3; ++jj) { _starpu_dcopy_(&mb, &rhs[k - 1], &c__1, &c__[is + (js + jj) * c_dim1], &c__1); _starpu_dcopy_(&mb, &rhs[ii - 1], &c__1, &f[is + (js + jj) * f_dim1], &c__1); k += mb; ii += mb; /* L180: */ } /* Substitute R(I, J) and L(I, J) into remaining */ /* equation. */ if (j > p + 2) { i__3 = js - 1; _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b42, &c__[is + js * c_dim1], ldc, &b[js * b_dim1 + 1], ldb, & c_b42, &f[is + f_dim1], ldf); i__3 = js - 1; _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b42, &f[is + js * f_dim1], ldf, &e[js * e_dim1 + 1], lde, & c_b42, &f[is + f_dim1], ldf); } if (i__ < p) { i__3 = *m - ie; _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b27, &a[is + (ie + 1) * a_dim1], lda, &c__[is + js * c_dim1], ldc, &c_b42, &c__[ie + 1 + js * c_dim1], ldc); i__3 = *m - ie; _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b27, &d__[is + ( ie + 1) * d_dim1], ldd, &f[is + js * f_dim1], ldf, &c_b42, &c__[ie + 1 + js * c_dim1], ldc); } } /* L190: */ } /* L200: */ } } return 0; /* End of DTGSY2 */ } /* _starpu_dtgsy2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtgsyl.c000066400000000000000000000521561507764646700207130ustar00rootroot00000000000000/* dtgsyl.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__2 = 2; static integer c_n1 = -1; static integer c__5 = 5; static doublereal c_b14 = 0.; static integer c__1 = 1; static doublereal c_b51 = -1.; static doublereal c_b52 = 1.; /* Subroutine */ int _starpu_dtgsyl_(char *trans, integer *ijob, integer *m, integer * n, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, integer *ldc, doublereal *d__, integer *ldd, doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal * scale, doublereal *dif, doublereal *work, integer *lwork, integer * iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1, d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3, i__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k, p, q, ie, je, mb, nb, is, js, pq; doublereal dsum; integer ppqq; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); integer ifunc, linfo, lwmin; doublereal scale2; extern /* Subroutine */ int _starpu_dtgsy2_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *); doublereal dscale, scaloc; extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer iround; logical notran; integer isolve; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTGSYL solves the generalized Sylvester equation: */ /* A * R - L * B = scale * C (1) */ /* D * R - L * E = scale * F */ /* where R and L are unknown m-by-n matrices, (A, D), (B, E) and */ /* (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, */ /* respectively, with real entries. (A, D) and (B, E) must be in */ /* generalized (real) Schur canonical form, i.e. A, B are upper quasi */ /* triangular and D, E are upper triangular. */ /* The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output */ /* scaling factor chosen to avoid overflow. */ /* In matrix notation (1) is equivalent to solve Zx = scale b, where */ /* Z is defined as */ /* Z = [ kron(In, A) -kron(B', Im) ] (2) */ /* [ kron(In, D) -kron(E', Im) ]. */ /* Here Ik is the identity matrix of size k and X' is the transpose of */ /* X. kron(X, Y) is the Kronecker product between the matrices X and Y. */ /* If TRANS = 'T', DTGSYL solves the transposed system Z'*y = scale*b, */ /* which is equivalent to solve for R and L in */ /* A' * R + D' * L = scale * C (3) */ /* R * B' + L * E' = scale * (-F) */ /* This case (TRANS = 'T') is used to compute an one-norm-based estimate */ /* of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) */ /* and (B,E), using DLACON. */ /* If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate */ /* of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the */ /* reciprocal of the smallest singular value of Z. See [1-2] for more */ /* information. */ /* This is a level 3 BLAS algorithm. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* = 'N', solve the generalized Sylvester equation (1). */ /* = 'T', solve the 'transposed' system (3). */ /* IJOB (input) INTEGER */ /* Specifies what kind of functionality to be performed. */ /* =0: solve (1) only. */ /* =1: The functionality of 0 and 3. */ /* =2: The functionality of 0 and 4. */ /* =3: Only an estimate of Dif[(A,D), (B,E)] is computed. */ /* (look ahead strategy IJOB = 1 is used). */ /* =4: Only an estimate of Dif[(A,D), (B,E)] is computed. */ /* ( DGECON on sub-systems is used ). */ /* Not referenced if TRANS = 'T'. */ /* M (input) INTEGER */ /* The order of the matrices A and D, and the row dimension of */ /* the matrices C, F, R and L. */ /* N (input) INTEGER */ /* The order of the matrices B and E, and the column dimension */ /* of the matrices C, F, R and L. */ /* A (input) DOUBLE PRECISION array, dimension (LDA, M) */ /* The upper quasi triangular matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1, M). */ /* B (input) DOUBLE PRECISION array, dimension (LDB, N) */ /* The upper quasi triangular matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1, N). */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC, N) */ /* On entry, C contains the right-hand-side of the first matrix */ /* equation in (1) or (3). */ /* On exit, if IJOB = 0, 1 or 2, C has been overwritten by */ /* the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, */ /* the solution achieved during the computation of the */ /* Dif-estimate. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1, M). */ /* D (input) DOUBLE PRECISION array, dimension (LDD, M) */ /* The upper triangular matrix D. */ /* LDD (input) INTEGER */ /* The leading dimension of the array D. LDD >= max(1, M). */ /* E (input) DOUBLE PRECISION array, dimension (LDE, N) */ /* The upper triangular matrix E. */ /* LDE (input) INTEGER */ /* The leading dimension of the array E. LDE >= max(1, N). */ /* F (input/output) DOUBLE PRECISION array, dimension (LDF, N) */ /* On entry, F contains the right-hand-side of the second matrix */ /* equation in (1) or (3). */ /* On exit, if IJOB = 0, 1 or 2, F has been overwritten by */ /* the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, */ /* the solution achieved during the computation of the */ /* Dif-estimate. */ /* LDF (input) INTEGER */ /* The leading dimension of the array F. LDF >= max(1, M). */ /* DIF (output) DOUBLE PRECISION */ /* On exit DIF is the reciprocal of a lower bound of the */ /* reciprocal of the Dif-function, i.e. DIF is an upper bound of */ /* Dif[(A,D), (B,E)] = sigma_min(Z), where Z as in (2). */ /* IF IJOB = 0 or TRANS = 'T', DIF is not touched. */ /* SCALE (output) DOUBLE PRECISION */ /* On exit SCALE is the scaling factor in (1) or (3). */ /* If 0 < SCALE < 1, C and F hold the solutions R and L, resp., */ /* to a slightly perturbed system but the input matrices A, B, D */ /* and E have not been changed. If SCALE = 0, C and F hold the */ /* solutions R and L, respectively, to the homogeneous system */ /* with C = F = 0. Normally, SCALE = 1. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK > = 1. */ /* If IJOB = 1 or 2 and TRANS = 'N', LWORK >= max(1,2*M*N). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (M+N+6) */ /* INFO (output) INTEGER */ /* =0: successful exit */ /* <0: If INFO = -i, the i-th argument had an illegal value. */ /* >0: (A, D) and (B, E) have common or close eigenvalues. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software */ /* for Solving the Generalized Sylvester Equation and Estimating the */ /* Separation between Regular Matrix Pairs, Report UMINF - 93.23, */ /* Department of Computing Science, Umea University, S-901 87 Umea, */ /* Sweden, December 1993, Revised April 1994, Also as LAPACK Working */ /* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, */ /* No 1, 1996. */ /* [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester */ /* Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. */ /* Appl., 15(4):1045-1060, 1994 */ /* [3] B. Kagstrom and L. Westin, Generalized Schur Methods with */ /* Condition Estimators for Solving the Generalized Sylvester */ /* Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, */ /* July 1989, pp 745-751. */ /* ===================================================================== */ /* Replaced various illegal calls to DCOPY by calls to DLASET. */ /* Sven Hammarling, 1/5/02. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; d_dim1 = *ldd; d_offset = 1 + d_dim1; d__ -= d_offset; e_dim1 = *lde; e_offset = 1 + e_dim1; e -= e_offset; f_dim1 = *ldf; f_offset = 1 + f_dim1; f -= f_offset; --work; --iwork; /* Function Body */ *info = 0; notran = _starpu_lsame_(trans, "N"); lquery = *lwork == -1; if (! notran && ! _starpu_lsame_(trans, "T")) { *info = -1; } else if (notran) { if (*ijob < 0 || *ijob > 4) { *info = -2; } } if (*info == 0) { if (*m <= 0) { *info = -3; } else if (*n <= 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*ldd < max(1,*m)) { *info = -12; } else if (*lde < max(1,*n)) { *info = -14; } else if (*ldf < max(1,*m)) { *info = -16; } } if (*info == 0) { if (notran) { if (*ijob == 1 || *ijob == 2) { /* Computing MAX */ i__1 = 1, i__2 = (*m << 1) * *n; lwmin = max(i__1,i__2); } else { lwmin = 1; } } else { lwmin = 1; } work[1] = (doublereal) lwmin; if (*lwork < lwmin && ! lquery) { *info = -20; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTGSYL", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { *scale = 1.; if (notran) { if (*ijob != 0) { *dif = 0.; } } return 0; } /* Determine optimal block sizes MB and NB */ mb = _starpu_ilaenv_(&c__2, "DTGSYL", trans, m, n, &c_n1, &c_n1); nb = _starpu_ilaenv_(&c__5, "DTGSYL", trans, m, n, &c_n1, &c_n1); isolve = 1; ifunc = 0; if (notran) { if (*ijob >= 3) { ifunc = *ijob - 2; _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc) ; _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf); } else if (*ijob >= 1) { isolve = 2; } } if (mb <= 1 && nb <= 1 || mb >= *m && nb >= *n) { i__1 = isolve; for (iround = 1; iround <= i__1; ++iround) { /* Use unblocked Level 2 solver */ dscale = 0.; dsum = 1.; pq = 0; _starpu_dtgsy2_(trans, &ifunc, m, n, &a[a_offset], lda, &b[b_offset], ldb, &c__[c_offset], ldc, &d__[d_offset], ldd, &e[e_offset], lde, &f[f_offset], ldf, scale, &dsum, &dscale, &iwork[1], &pq, info); if (dscale != 0.) { if (*ijob == 1 || *ijob == 3) { *dif = sqrt((doublereal) ((*m << 1) * *n)) / (dscale * sqrt(dsum)); } else { *dif = sqrt((doublereal) pq) / (dscale * sqrt(dsum)); } } if (isolve == 2 && iround == 1) { if (notran) { ifunc = *ijob; } scale2 = *scale; _starpu_dlacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m); _starpu_dlacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m); _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc); _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf); } else if (isolve == 2 && iround == 2) { _starpu_dlacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc); _starpu_dlacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf); *scale = scale2; } /* L30: */ } return 0; } /* Determine block structure of A */ p = 0; i__ = 1; L40: if (i__ > *m) { goto L50; } ++p; iwork[p] = i__; i__ += mb; if (i__ >= *m) { goto L50; } if (a[i__ + (i__ - 1) * a_dim1] != 0.) { ++i__; } goto L40; L50: iwork[p + 1] = *m + 1; if (iwork[p] == iwork[p + 1]) { --p; } /* Determine block structure of B */ q = p + 1; j = 1; L60: if (j > *n) { goto L70; } ++q; iwork[q] = j; j += nb; if (j >= *n) { goto L70; } if (b[j + (j - 1) * b_dim1] != 0.) { ++j; } goto L60; L70: iwork[q + 1] = *n + 1; if (iwork[q] == iwork[q + 1]) { --q; } if (notran) { i__1 = isolve; for (iround = 1; iround <= i__1; ++iround) { /* Solve (I, J)-subsystem */ /* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) */ /* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) */ /* for I = P, P - 1,..., 1; J = 1, 2,..., Q */ dscale = 0.; dsum = 1.; pq = 0; *scale = 1.; i__2 = q; for (j = p + 2; j <= i__2; ++j) { js = iwork[j]; je = iwork[j + 1] - 1; nb = je - js + 1; for (i__ = p; i__ >= 1; --i__) { is = iwork[i__]; ie = iwork[i__ + 1] - 1; mb = ie - is + 1; ppqq = 0; _starpu_dtgsy2_(trans, &ifunc, &mb, &nb, &a[is + is * a_dim1], lda, &b[js + js * b_dim1], ldb, &c__[is + js * c_dim1], ldc, &d__[is + is * d_dim1], ldd, &e[js + js * e_dim1], lde, &f[is + js * f_dim1], ldf, & scaloc, &dsum, &dscale, &iwork[q + 2], &ppqq, & linfo); if (linfo > 0) { *info = linfo; } pq += ppqq; if (scaloc != 1.) { i__3 = js - 1; for (k = 1; k <= i__3; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L80: */ } i__3 = je; for (k = js; k <= i__3; ++k) { i__4 = is - 1; _starpu_dscal_(&i__4, &scaloc, &c__[k * c_dim1 + 1], & c__1); i__4 = is - 1; _starpu_dscal_(&i__4, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L90: */ } i__3 = je; for (k = js; k <= i__3; ++k) { i__4 = *m - ie; _starpu_dscal_(&i__4, &scaloc, &c__[ie + 1 + k * c_dim1], &c__1); i__4 = *m - ie; _starpu_dscal_(&i__4, &scaloc, &f[ie + 1 + k * f_dim1], & c__1); /* L100: */ } i__3 = *n; for (k = je + 1; k <= i__3; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L110: */ } *scale *= scaloc; } /* Substitute R(I, J) and L(I, J) into remaining */ /* equation. */ if (i__ > 1) { i__3 = is - 1; _starpu_dgemm_("N", "N", &i__3, &nb, &mb, &c_b51, &a[is * a_dim1 + 1], lda, &c__[is + js * c_dim1], ldc, &c_b52, &c__[js * c_dim1 + 1], ldc); i__3 = is - 1; _starpu_dgemm_("N", "N", &i__3, &nb, &mb, &c_b51, &d__[is * d_dim1 + 1], ldd, &c__[is + js * c_dim1], ldc, &c_b52, &f[js * f_dim1 + 1], ldf); } if (j < q) { i__3 = *n - je; _starpu_dgemm_("N", "N", &mb, &i__3, &nb, &c_b52, &f[is + js * f_dim1], ldf, &b[js + (je + 1) * b_dim1], ldb, &c_b52, &c__[is + (je + 1) * c_dim1], ldc); i__3 = *n - je; _starpu_dgemm_("N", "N", &mb, &i__3, &nb, &c_b52, &f[is + js * f_dim1], ldf, &e[js + (je + 1) * e_dim1], lde, &c_b52, &f[is + (je + 1) * f_dim1], ldf); } /* L120: */ } /* L130: */ } if (dscale != 0.) { if (*ijob == 1 || *ijob == 3) { *dif = sqrt((doublereal) ((*m << 1) * *n)) / (dscale * sqrt(dsum)); } else { *dif = sqrt((doublereal) pq) / (dscale * sqrt(dsum)); } } if (isolve == 2 && iround == 1) { if (notran) { ifunc = *ijob; } scale2 = *scale; _starpu_dlacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m); _starpu_dlacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m); _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc); _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf); } else if (isolve == 2 && iround == 2) { _starpu_dlacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc); _starpu_dlacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf); *scale = scale2; } /* L150: */ } } else { /* Solve transposed (I, J)-subsystem */ /* A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J) */ /* R(I, J) * B(J, J)' + L(I, J) * E(J, J)' = -F(I, J) */ /* for I = 1,2,..., P; J = Q, Q-1,..., 1 */ *scale = 1.; i__1 = p; for (i__ = 1; i__ <= i__1; ++i__) { is = iwork[i__]; ie = iwork[i__ + 1] - 1; mb = ie - is + 1; i__2 = p + 2; for (j = q; j >= i__2; --j) { js = iwork[j]; je = iwork[j + 1] - 1; nb = je - js + 1; _starpu_dtgsy2_(trans, &ifunc, &mb, &nb, &a[is + is * a_dim1], lda, & b[js + js * b_dim1], ldb, &c__[is + js * c_dim1], ldc, &d__[is + is * d_dim1], ldd, &e[js + js * e_dim1], lde, &f[is + js * f_dim1], ldf, &scaloc, &dsum, & dscale, &iwork[q + 2], &ppqq, &linfo); if (linfo > 0) { *info = linfo; } if (scaloc != 1.) { i__3 = js - 1; for (k = 1; k <= i__3; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L160: */ } i__3 = je; for (k = js; k <= i__3; ++k) { i__4 = is - 1; _starpu_dscal_(&i__4, &scaloc, &c__[k * c_dim1 + 1], &c__1); i__4 = is - 1; _starpu_dscal_(&i__4, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L170: */ } i__3 = je; for (k = js; k <= i__3; ++k) { i__4 = *m - ie; _starpu_dscal_(&i__4, &scaloc, &c__[ie + 1 + k * c_dim1], & c__1); i__4 = *m - ie; _starpu_dscal_(&i__4, &scaloc, &f[ie + 1 + k * f_dim1], &c__1) ; /* L180: */ } i__3 = *n; for (k = je + 1; k <= i__3; ++k) { _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1); _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1); /* L190: */ } *scale *= scaloc; } /* Substitute R(I, J) and L(I, J) into remaining equation. */ if (j > p + 2) { i__3 = js - 1; _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b52, &c__[is + js * c_dim1], ldc, &b[js * b_dim1 + 1], ldb, &c_b52, & f[is + f_dim1], ldf); i__3 = js - 1; _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b52, &f[is + js * f_dim1], ldf, &e[js * e_dim1 + 1], lde, &c_b52, & f[is + f_dim1], ldf); } if (i__ < p) { i__3 = *m - ie; _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b51, &a[is + (ie + 1) * a_dim1], lda, &c__[is + js * c_dim1], ldc, & c_b52, &c__[ie + 1 + js * c_dim1], ldc); i__3 = *m - ie; _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b51, &d__[is + (ie + 1) * d_dim1], ldd, &f[is + js * f_dim1], ldf, & c_b52, &c__[ie + 1 + js * c_dim1], ldc); } /* L200: */ } /* L210: */ } } work[1] = (doublereal) lwmin; return 0; /* End of DTGSYL */ } /* _starpu_dtgsyl_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtpcon.c000066400000000000000000000150521507764646700206660ustar00rootroot00000000000000/* dtpcon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dtpcon_(char *norm, char *uplo, char *diag, integer *n, doublereal *ap, doublereal *rcond, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ integer ix, kase, kase1; doublereal scale; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, integer *); doublereal anorm; logical upper; doublereal xnorm; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern doublereal _starpu_dlantp_(char *, char *, char *, integer *, doublereal *, doublereal *); doublereal ainvnm; extern /* Subroutine */ int _starpu_dlatps_(char *, char *, char *, char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); logical onenrm; char normin[1]; doublereal smlnum; logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTPCON estimates the reciprocal of the condition number of a packed */ /* triangular matrix A, in either the 1-norm or the infinity-norm. */ /* The norm of A is computed and an estimate is obtained for */ /* norm(inv(A)), then the reciprocal of the condition number is */ /* computed as */ /* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies whether the 1-norm condition number or the */ /* infinity-norm condition number is required: */ /* = '1' or 'O': 1-norm; */ /* = 'I': Infinity-norm. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangular matrix A, packed columnwise in */ /* a linear array. The j-th column of A is stored in the array */ /* AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* If DIAG = 'U', the diagonal elements of A are not referenced */ /* and are assumed to be 1. */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --iwork; --work; --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); nounit = _starpu_lsame_(diag, "N"); if (! onenrm && ! _starpu_lsame_(norm, "I")) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTPCON", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *rcond = 1.; return 0; } *rcond = 0.; smlnum = _starpu_dlamch_("Safe minimum") * (doublereal) max(1,*n); /* Compute the norm of the triangular matrix A. */ anorm = _starpu_dlantp_(norm, uplo, diag, n, &ap[1], &work[1]); /* Continue only if ANORM > 0. */ if (anorm > 0.) { /* Estimate the norm of the inverse of A. */ ainvnm = 0.; *(unsigned char *)normin = 'N'; if (onenrm) { kase1 = 1; } else { kase1 = 2; } kase = 0; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == kase1) { /* Multiply by inv(A). */ _starpu_dlatps_(uplo, "No transpose", diag, normin, n, &ap[1], &work[ 1], &scale, &work[(*n << 1) + 1], info); } else { /* Multiply by inv(A'). */ _starpu_dlatps_(uplo, "Transpose", diag, normin, n, &ap[1], &work[1], &scale, &work[(*n << 1) + 1], info); } *(unsigned char *)normin = 'Y'; /* Multiply by 1/SCALE if doing so will not cause overflow. */ if (scale != 1.) { ix = _starpu_idamax_(n, &work[1], &c__1); xnorm = (d__1 = work[ix], abs(d__1)); if (scale < xnorm * smlnum || scale == 0.) { goto L20; } _starpu_drscl_(n, &scale, &work[1], &c__1); } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / anorm / ainvnm; } } L20: return 0; /* End of DTPCON */ } /* _starpu_dtpcon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtprfs.c000066400000000000000000000330731507764646700207040ustar00rootroot00000000000000/* dtprfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b19 = -1.; /* Subroutine */ int _starpu_dtprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k; doublereal s; integer kc; doublereal xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; char transt[1]; logical nounit; doublereal lstres; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTPRFS provides error bounds and backward error estimates for the */ /* solution to a system of linear equations with a triangular packed */ /* coefficient matrix. */ /* The solution matrix X must be computed by DTPTRS or some other */ /* means before entering this routine. DTPRFS does not do iterative */ /* refinement because doing so cannot improve the backward error. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangular matrix A, packed columnwise in */ /* a linear array. The j-th column of A is stored in the array */ /* AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* If DIAG = 'U', the diagonal elements of A are not referenced */ /* and are assumed to be 1. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* The solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); notran = _starpu_lsame_(trans, "N"); nounit = _starpu_lsame_(diag, "N"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*ldx < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTPRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Compute residual R = B - op(A) * X, */ /* where op(A) = A or A', depending on TRANS. */ _starpu_dcopy_(n, &x[j * x_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dtpmv_(uplo, trans, diag, n, &ap[1], &work[*n + 1], &c__1); _starpu_daxpy_(n, &c_b19, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L20: */ } if (notran) { /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { kc = 1; if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = k; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[kc + i__ - 1], abs(d__1)) * xk; /* L30: */ } kc += k; /* L40: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[kc + i__ - 1], abs(d__1)) * xk; /* L50: */ } work[k] += xk; kc += k; /* L60: */ } } } else { kc = 1; if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = *n; for (i__ = k; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[kc + i__ - k], abs(d__1)) * xk; /* L70: */ } kc = kc + *n - k + 1; /* L80: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = *n; for (i__ = k + 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[kc + i__ - k], abs(d__1)) * xk; /* L90: */ } work[k] += xk; kc = kc + *n - k + 1; /* L100: */ } } } } else { /* Compute abs(A')*abs(X) + abs(B). */ if (upper) { kc = 1; if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; i__3 = k; for (i__ = 1; i__ <= i__3; ++i__) { s += (d__1 = ap[kc + i__ - 1], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L110: */ } work[k] += s; kc += k; /* L120: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { s += (d__1 = ap[kc + i__ - 1], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L130: */ } work[k] += s; kc += k; /* L140: */ } } } else { kc = 1; if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; i__3 = *n; for (i__ = k; i__ <= i__3; ++i__) { s += (d__1 = ap[kc + i__ - k], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L150: */ } work[k] += s; kc = kc + *n - k + 1; /* L160: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = *n; for (i__ = k + 1; i__ <= i__3; ++i__) { s += (d__1 = ap[kc + i__ - k], abs(d__1)) * (d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L170: */ } work[k] += s; kc = kc + *n - k + 1; /* L180: */ } } } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L190: */ } berr[j] = s; /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(op(A)))* */ /* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(op(A)) is the inverse of op(A) */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(op(A)) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L200: */ } kase = 0; L210: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)'). */ _starpu_dtpsv_(uplo, transt, diag, n, &ap[1], &work[*n + 1], &c__1); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L220: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L230: */ } _starpu_dtpsv_(uplo, trans, diag, n, &ap[1], &work[*n + 1], &c__1); } goto L210; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L240: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L250: */ } return 0; /* End of DTPRFS */ } /* _starpu_dtprfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtptri.c000066400000000000000000000132641507764646700207100ustar00rootroot00000000000000/* dtptri.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dtptri_(char *uplo, char *diag, integer *n, doublereal * ap, integer *info) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer j, jc, jj; doublereal ajj; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); integer jclast; logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTPTRI computes the inverse of a real upper or lower triangular */ /* matrix A stored in packed format. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangular matrix A, stored */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. */ /* See below for further details. */ /* On exit, the (triangular) inverse of the original matrix, in */ /* the same packed storage format. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, A(i,i) is exactly zero. The triangular */ /* matrix is singular and its inverse can not be computed. */ /* Further Details */ /* =============== */ /* A triangular matrix A can be transferred to packed storage using one */ /* of the following program segments: */ /* UPLO = 'U': UPLO = 'L': */ /* JC = 1 JC = 1 */ /* DO 2 J = 1, N DO 2 J = 1, N */ /* DO 1 I = 1, J DO 1 I = J, N */ /* AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) */ /* 1 CONTINUE 1 CONTINUE */ /* JC = JC + J JC = JC + N - J + 1 */ /* 2 CONTINUE 2 CONTINUE */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); nounit = _starpu_lsame_(diag, "N"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTPTRI", &i__1); return 0; } /* Check for singularity if non-unit. */ if (nounit) { if (upper) { jj = 0; i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { jj += *info; if (ap[jj] == 0.) { return 0; } /* L10: */ } } else { jj = 1; i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ap[jj] == 0.) { return 0; } jj = jj + *n - *info + 1; /* L20: */ } } *info = 0; } if (upper) { /* Compute inverse of upper triangular matrix. */ jc = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (nounit) { ap[jc + j - 1] = 1. / ap[jc + j - 1]; ajj = -ap[jc + j - 1]; } else { ajj = -1.; } /* Compute elements 1:j-1 of j-th column. */ i__2 = j - 1; _starpu_dtpmv_("Upper", "No transpose", diag, &i__2, &ap[1], &ap[jc], & c__1); i__2 = j - 1; _starpu_dscal_(&i__2, &ajj, &ap[jc], &c__1); jc += j; /* L30: */ } } else { /* Compute inverse of lower triangular matrix. */ jc = *n * (*n + 1) / 2; for (j = *n; j >= 1; --j) { if (nounit) { ap[jc] = 1. / ap[jc]; ajj = -ap[jc]; } else { ajj = -1.; } if (j < *n) { /* Compute elements j+1:n of j-th column. */ i__1 = *n - j; _starpu_dtpmv_("Lower", "No transpose", diag, &i__1, &ap[jclast], &ap[ jc + 1], &c__1); i__1 = *n - j; _starpu_dscal_(&i__1, &ajj, &ap[jc + 1], &c__1); } jclast = jc; jc = jc - *n + j - 2; /* L40: */ } } return 0; /* End of DTPTRI */ } /* _starpu_dtptri_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtptrs.c000066400000000000000000000123071507764646700207170ustar00rootroot00000000000000/* dtptrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dtptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer * info) { /* System generated locals */ integer b_dim1, b_offset, i__1; /* Local variables */ integer j, jc; extern logical _starpu_lsame_(char *, char *); logical upper; extern /* Subroutine */ int _starpu_dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTPTRS solves a triangular system of the form */ /* A * X = B or A**T * X = B, */ /* where A is a triangular matrix of order N stored in packed format, */ /* and B is an N-by-NRHS matrix. A check is made to verify that A is */ /* nonsingular. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */ /* The upper or lower triangular matrix A, packed columnwise in */ /* a linear array. The j-th column of A is stored in the array */ /* AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, if INFO = 0, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element of A is zero, */ /* indicating that the matrix is singular and the */ /* solutions X have not been computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); nounit = _starpu_lsame_(diag, "N"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTPTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity. */ if (nounit) { if (upper) { jc = 1; i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ap[jc + *info - 1] == 0.) { return 0; } jc += *info; /* L10: */ } } else { jc = 1; i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ap[jc] == 0.) { return 0; } jc = jc + *n - *info + 1; /* L20: */ } } } *info = 0; /* Solve A * x = b or A' * x = b. */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { _starpu_dtpsv_(uplo, trans, diag, n, &ap[1], &b[j * b_dim1 + 1], &c__1); /* L30: */ } return 0; /* End of DTPTRS */ } /* _starpu_dtptrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtpttf.c000066400000000000000000000273341507764646700207120ustar00rootroot00000000000000/* dtpttf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtpttf_(char *transr, char *uplo, integer *n, doublereal *ap, doublereal *arf, integer *info) { /* System generated locals */ integer i__1, i__2, i__3; /* Local variables */ integer i__, j, k, n1, n2, ij, jp, js, nt, lda, ijp; logical normaltransr; extern logical _starpu_lsame_(char *, char *); logical lower; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nisodd; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* Purpose */ /* ======= */ /* DTPTTF copies a triangular matrix A from standard packed format (TP) */ /* to rectangular full packed format (TF). */ /* Arguments */ /* ========= */ /* TRANSR (input) CHARACTER */ /* = 'N': ARF in Normal format is wanted; */ /* = 'T': ARF in Conjugate-transpose format is wanted. */ /* UPLO (input) CHARACTER */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ /* On entry, the upper or lower triangular matrix A, packed */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* ARF (output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ /* On exit, the upper or lower triangular matrix A stored in */ /* RFP format. For a further discussion see Notes below. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Notes */ /* ===== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ *info = 0; normaltransr = _starpu_lsame_(transr, "N"); lower = _starpu_lsame_(uplo, "L"); if (! normaltransr && ! _starpu_lsame_(transr, "T")) { *info = -1; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTPTTF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (normaltransr) { arf[0] = ap[0]; } else { arf[0] = ap[0]; } return 0; } /* Size of array ARF(0:NT-1) */ nt = *n * (*n + 1) / 2; /* Set N1 and N2 depending on LOWER */ if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } /* If N is odd, set NISODD = .TRUE. */ /* If N is even, set K = N/2 and NISODD = .FALSE. */ /* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) */ /* where noe = 0 if n is even, noe = 1 if n is odd */ if (*n % 2 == 0) { k = *n / 2; nisodd = FALSE_; lda = *n + 1; } else { nisodd = TRUE_; lda = *n; } /* ARF^C has lda rows and n+1-noe cols */ if (! normaltransr) { lda = (*n + 1) / 2; } /* start execution: there are eight cases */ if (nisodd) { /* N is odd */ if (normaltransr) { /* N is odd and TRANSR = 'N' */ if (lower) { /* N is odd, TRANSR = 'N', and UPLO = 'L' */ ijp = 0; jp = 0; i__1 = n2; for (j = 0; j <= i__1; ++j) { i__2 = *n - 1; for (i__ = j; i__ <= i__2; ++i__) { ij = i__ + jp; arf[ij] = ap[ijp]; ++ijp; } jp += lda; } i__1 = n2 - 1; for (i__ = 0; i__ <= i__1; ++i__) { i__2 = n2; for (j = i__ + 1; j <= i__2; ++j) { ij = i__ + j * lda; arf[ij] = ap[ijp]; ++ijp; } } } else { /* N is odd, TRANSR = 'N', and UPLO = 'U' */ ijp = 0; i__1 = n1 - 1; for (j = 0; j <= i__1; ++j) { ij = n2 + j; i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = ap[ijp]; ++ijp; ij += lda; } } js = 0; i__1 = *n - 1; for (j = n1; j <= i__1; ++j) { ij = js; i__2 = js + j; for (ij = js; ij <= i__2; ++ij) { arf[ij] = ap[ijp]; ++ijp; } js += lda; } } } else { /* N is odd and TRANSR = 'T' */ if (lower) { /* N is odd, TRANSR = 'T', and UPLO = 'L' */ ijp = 0; i__1 = n2; for (i__ = 0; i__ <= i__1; ++i__) { i__2 = *n * lda - 1; i__3 = lda; for (ij = i__ * (lda + 1); i__3 < 0 ? ij >= i__2 : ij <= i__2; ij += i__3) { arf[ij] = ap[ijp]; ++ijp; } } js = 1; i__1 = n2 - 1; for (j = 0; j <= i__1; ++j) { i__3 = js + n2 - j - 1; for (ij = js; ij <= i__3; ++ij) { arf[ij] = ap[ijp]; ++ijp; } js = js + lda + 1; } } else { /* N is odd, TRANSR = 'T', and UPLO = 'U' */ ijp = 0; js = n2 * lda; i__1 = n1 - 1; for (j = 0; j <= i__1; ++j) { i__3 = js + j; for (ij = js; ij <= i__3; ++ij) { arf[ij] = ap[ijp]; ++ijp; } js += lda; } i__1 = n1; for (i__ = 0; i__ <= i__1; ++i__) { i__3 = i__ + (n1 + i__) * lda; i__2 = lda; for (ij = i__; i__2 < 0 ? ij >= i__3 : ij <= i__3; ij += i__2) { arf[ij] = ap[ijp]; ++ijp; } } } } } else { /* N is even */ if (normaltransr) { /* N is even and TRANSR = 'N' */ if (lower) { /* N is even, TRANSR = 'N', and UPLO = 'L' */ ijp = 0; jp = 0; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = *n - 1; for (i__ = j; i__ <= i__2; ++i__) { ij = i__ + 1 + jp; arf[ij] = ap[ijp]; ++ijp; } jp += lda; } i__1 = k - 1; for (i__ = 0; i__ <= i__1; ++i__) { i__2 = k - 1; for (j = i__; j <= i__2; ++j) { ij = i__ + j * lda; arf[ij] = ap[ijp]; ++ijp; } } } else { /* N is even, TRANSR = 'N', and UPLO = 'U' */ ijp = 0; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { ij = k + 1 + j; i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = ap[ijp]; ++ijp; ij += lda; } } js = 0; i__1 = *n - 1; for (j = k; j <= i__1; ++j) { ij = js; i__2 = js + j; for (ij = js; ij <= i__2; ++ij) { arf[ij] = ap[ijp]; ++ijp; } js += lda; } } } else { /* N is even and TRANSR = 'T' */ if (lower) { /* N is even, TRANSR = 'T', and UPLO = 'L' */ ijp = 0; i__1 = k - 1; for (i__ = 0; i__ <= i__1; ++i__) { i__2 = (*n + 1) * lda - 1; i__3 = lda; for (ij = i__ + (i__ + 1) * lda; i__3 < 0 ? ij >= i__2 : ij <= i__2; ij += i__3) { arf[ij] = ap[ijp]; ++ijp; } } js = 0; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__3 = js + k - j - 1; for (ij = js; ij <= i__3; ++ij) { arf[ij] = ap[ijp]; ++ijp; } js = js + lda + 1; } } else { /* N is even, TRANSR = 'T', and UPLO = 'U' */ ijp = 0; js = (k + 1) * lda; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__3 = js + j; for (ij = js; ij <= i__3; ++ij) { arf[ij] = ap[ijp]; ++ijp; } js += lda; } i__1 = k - 1; for (i__ = 0; i__ <= i__1; ++i__) { i__3 = i__ + (k + i__) * lda; i__2 = lda; for (ij = i__; i__2 < 0 ? ij >= i__3 : ij <= i__3; ij += i__2) { arf[ij] = ap[ijp]; ++ijp; } } } } } return 0; /* End of DTPTTF */ } /* _starpu_dtpttf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtpttr.c000066400000000000000000000101101507764646700207060ustar00rootroot00000000000000/* dtpttr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtpttr_(char *uplo, integer *n, doublereal *ap, doublereal *a, integer *lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, k; extern logical _starpu_lsame_(char *, char *); logical lower; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Julien Langou of the Univ. of Colorado Denver -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTPTTR copies a triangular matrix A from standard packed format (TP) */ /* to standard full format (TR). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER */ /* = 'U': A is upper triangular. */ /* = 'L': A is lower triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), */ /* On entry, the upper or lower triangular matrix A, packed */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* A (output) DOUBLE PRECISION array, dimension ( LDA, N ) */ /* On exit, the triangular matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; lower = _starpu_lsame_(uplo, "L"); if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTPTTR", &i__1); return 0; } if (lower) { k = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ++k; a[i__ + j * a_dim1] = ap[k]; } } } else { k = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { ++k; a[i__ + j * a_dim1] = ap[k]; } } } return 0; /* End of DTPTTR */ } /* _starpu_dtpttr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrcon.c000066400000000000000000000157341507764646700206770ustar00rootroot00000000000000/* dtrcon.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dtrcon_(char *norm, char *uplo, char *diag, integer *n, doublereal *a, integer *lda, doublereal *rcond, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1; doublereal d__1; /* Local variables */ integer ix, kase, kase1; doublereal scale; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_drscl_(integer *, doublereal *, doublereal *, integer *); doublereal anorm; logical upper; doublereal xnorm; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); extern doublereal _starpu_dlantr_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); doublereal ainvnm; extern /* Subroutine */ int _starpu_dlatrs_(char *, char *, char *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *); logical onenrm; char normin[1]; doublereal smlnum; logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRCON estimates the reciprocal of the condition number of a */ /* triangular matrix A, in either the 1-norm or the infinity-norm. */ /* The norm of A is computed and an estimate is obtained for */ /* norm(inv(A)), then the reciprocal of the condition number is */ /* computed as */ /* RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ /* Arguments */ /* ========= */ /* NORM (input) CHARACTER*1 */ /* Specifies whether the 1-norm condition number or the */ /* infinity-norm condition number is required: */ /* = '1' or 'O': 1-norm; */ /* = 'I': Infinity-norm. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The triangular matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of the array A contains the upper */ /* triangular matrix, and the strictly lower triangular part of */ /* A is not referenced. If UPLO = 'L', the leading N-by-N lower */ /* triangular part of the array A contains the lower triangular */ /* matrix, and the strictly upper triangular part of A is not */ /* referenced. If DIAG = 'U', the diagonal elements of A are */ /* also not referenced and are assumed to be 1. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of the matrix A, */ /* computed as RCOND = 1/(norm(A) * norm(inv(A))). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); onenrm = *(unsigned char *)norm == '1' || _starpu_lsame_(norm, "O"); nounit = _starpu_lsame_(diag, "N"); if (! onenrm && ! _starpu_lsame_(norm, "I")) { *info = -1; } else if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRCON", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *rcond = 1.; return 0; } *rcond = 0.; smlnum = _starpu_dlamch_("Safe minimum") * (doublereal) max(1,*n); /* Compute the norm of the triangular matrix A. */ anorm = _starpu_dlantr_(norm, uplo, diag, n, n, &a[a_offset], lda, &work[1]); /* Continue only if ANORM > 0. */ if (anorm > 0.) { /* Estimate the norm of the inverse of A. */ ainvnm = 0.; *(unsigned char *)normin = 'N'; if (onenrm) { kase1 = 1; } else { kase1 = 2; } kase = 0; L10: _starpu_dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); if (kase != 0) { if (kase == kase1) { /* Multiply by inv(A). */ _starpu_dlatrs_(uplo, "No transpose", diag, normin, n, &a[a_offset], lda, &work[1], &scale, &work[(*n << 1) + 1], info); } else { /* Multiply by inv(A'). */ _starpu_dlatrs_(uplo, "Transpose", diag, normin, n, &a[a_offset], lda, &work[1], &scale, &work[(*n << 1) + 1], info); } *(unsigned char *)normin = 'Y'; /* Multiply by 1/SCALE if doing so will not cause overflow. */ if (scale != 1.) { ix = _starpu_idamax_(n, &work[1], &c__1); xnorm = (d__1 = work[ix], abs(d__1)); if (scale < xnorm * smlnum || scale == 0.) { goto L20; } _starpu_drscl_(n, &scale, &work[1], &c__1); } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / anorm / ainvnm; } } L20: return 0; /* End of DTRCON */ } /* _starpu_dtrcon_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrevc.c000066400000000000000000001021321507764646700206620ustar00rootroot00000000000000/* dtrevc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static logical c_false = FALSE_; static integer c__1 = 1; static doublereal c_b22 = 1.; static doublereal c_b25 = 0.; static integer c__2 = 2; static logical c_true = TRUE_; /* Subroutine */ int _starpu_dtrevc_(char *side, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, doublereal *work, integer *info) { /* System generated locals */ integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k; doublereal x[4] /* was [2][2] */; integer j1, j2, n2, ii, ki, ip, is; doublereal wi, wr, rec, ulp, beta, emax; logical pair; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); logical allv; integer ierr; doublereal unfl, ovfl, smin; logical over; doublereal vmax; integer jnxt; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); doublereal scale; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal remax; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); logical leftv, bothv; extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); doublereal vcrit; logical somev; doublereal xnorm; extern /* Subroutine */ int _starpu_dlaln2_(logical *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal * , doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); extern integer _starpu_idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; logical rightv; doublereal smlnum; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTREVC computes some or all of the right and/or left eigenvectors of */ /* a real upper quasi-triangular matrix T. */ /* Matrices of this type are produced by the Schur factorization of */ /* a real general matrix: A = Q*T*Q**T, as computed by DHSEQR. */ /* The right eigenvector x and the left eigenvector y of T corresponding */ /* to an eigenvalue w are defined by: */ /* T*x = w*x, (y**H)*T = w*(y**H) */ /* where y**H denotes the conjugate transpose of y. */ /* The eigenvalues are not input to this routine, but are read directly */ /* from the diagonal blocks of T. */ /* This routine returns the matrices X and/or Y of right and left */ /* eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an */ /* input matrix. If Q is the orthogonal factor that reduces a matrix */ /* A to Schur form T, then Q*X and Q*Y are the matrices of right and */ /* left eigenvectors of A. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'R': compute right eigenvectors only; */ /* = 'L': compute left eigenvectors only; */ /* = 'B': compute both right and left eigenvectors. */ /* HOWMNY (input) CHARACTER*1 */ /* = 'A': compute all right and/or left eigenvectors; */ /* = 'B': compute all right and/or left eigenvectors, */ /* backtransformed by the matrices in VR and/or VL; */ /* = 'S': compute selected right and/or left eigenvectors, */ /* as indicated by the logical array SELECT. */ /* SELECT (input/output) LOGICAL array, dimension (N) */ /* If HOWMNY = 'S', SELECT specifies the eigenvectors to be */ /* computed. */ /* If w(j) is a real eigenvalue, the corresponding real */ /* eigenvector is computed if SELECT(j) is .TRUE.. */ /* If w(j) and w(j+1) are the real and imaginary parts of a */ /* complex eigenvalue, the corresponding complex eigenvector is */ /* computed if either SELECT(j) or SELECT(j+1) is .TRUE., and */ /* on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to */ /* .FALSE.. */ /* Not referenced if HOWMNY = 'A' or 'B'. */ /* N (input) INTEGER */ /* The order of the matrix T. N >= 0. */ /* T (input) DOUBLE PRECISION array, dimension (LDT,N) */ /* The upper quasi-triangular matrix T in Schur canonical form. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= max(1,N). */ /* VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) */ /* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must */ /* contain an N-by-N matrix Q (usually the orthogonal matrix Q */ /* of Schur vectors returned by DHSEQR). */ /* On exit, if SIDE = 'L' or 'B', VL contains: */ /* if HOWMNY = 'A', the matrix Y of left eigenvectors of T; */ /* if HOWMNY = 'B', the matrix Q*Y; */ /* if HOWMNY = 'S', the left eigenvectors of T specified by */ /* SELECT, stored consecutively in the columns */ /* of VL, in the same order as their */ /* eigenvalues. */ /* A complex eigenvector corresponding to a complex eigenvalue */ /* is stored in two consecutive columns, the first holding the */ /* real part, and the second the imaginary part. */ /* Not referenced if SIDE = 'R'. */ /* LDVL (input) INTEGER */ /* The leading dimension of the array VL. LDVL >= 1, and if */ /* SIDE = 'L' or 'B', LDVL >= N. */ /* VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) */ /* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must */ /* contain an N-by-N matrix Q (usually the orthogonal matrix Q */ /* of Schur vectors returned by DHSEQR). */ /* On exit, if SIDE = 'R' or 'B', VR contains: */ /* if HOWMNY = 'A', the matrix X of right eigenvectors of T; */ /* if HOWMNY = 'B', the matrix Q*X; */ /* if HOWMNY = 'S', the right eigenvectors of T specified by */ /* SELECT, stored consecutively in the columns */ /* of VR, in the same order as their */ /* eigenvalues. */ /* A complex eigenvector corresponding to a complex eigenvalue */ /* is stored in two consecutive columns, the first holding the */ /* real part and the second the imaginary part. */ /* Not referenced if SIDE = 'L'. */ /* LDVR (input) INTEGER */ /* The leading dimension of the array VR. LDVR >= 1, and if */ /* SIDE = 'R' or 'B', LDVR >= N. */ /* MM (input) INTEGER */ /* The number of columns in the arrays VL and/or VR. MM >= M. */ /* M (output) INTEGER */ /* The number of columns in the arrays VL and/or VR actually */ /* used to store the eigenvectors. */ /* If HOWMNY = 'A' or 'B', M is set to N. */ /* Each selected real eigenvector occupies one column and each */ /* selected complex eigenvector occupies two columns. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The algorithm used in this program is basically backward (forward) */ /* substitution, with scaling to make the the code robust against */ /* possible overflow. */ /* Each eigenvector is normalized so that the element of largest */ /* magnitude has magnitude 1; here the magnitude of a complex number */ /* (x,y) is taken to be |x| + |y|. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test the input parameters */ /* Parameter adjustments */ --select; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; /* Function Body */ bothv = _starpu_lsame_(side, "B"); rightv = _starpu_lsame_(side, "R") || bothv; leftv = _starpu_lsame_(side, "L") || bothv; allv = _starpu_lsame_(howmny, "A"); over = _starpu_lsame_(howmny, "B"); somev = _starpu_lsame_(howmny, "S"); *info = 0; if (! rightv && ! leftv) { *info = -1; } else if (! allv && ! over && ! somev) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*ldt < max(1,*n)) { *info = -6; } else if (*ldvl < 1 || leftv && *ldvl < *n) { *info = -8; } else if (*ldvr < 1 || rightv && *ldvr < *n) { *info = -10; } else { /* Set M to the number of columns required to store the selected */ /* eigenvectors, standardize the array SELECT if necessary, and */ /* test MM. */ if (somev) { *m = 0; pair = FALSE_; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (pair) { pair = FALSE_; select[j] = FALSE_; } else { if (j < *n) { if (t[j + 1 + j * t_dim1] == 0.) { if (select[j]) { ++(*m); } } else { pair = TRUE_; if (select[j] || select[j + 1]) { select[j] = TRUE_; *m += 2; } } } else { if (select[*n]) { ++(*m); } } } /* L10: */ } } else { *m = *n; } if (*mm < *m) { *info = -11; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTREVC", &i__1); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } /* Set the constants to control overflow. */ unfl = _starpu_dlamch_("Safe minimum"); ovfl = 1. / unfl; _starpu_dlabad_(&unfl, &ovfl); ulp = _starpu_dlamch_("Precision"); smlnum = unfl * (*n / ulp); bignum = (1. - ulp) / smlnum; /* Compute 1-norm of each column of strictly upper triangular */ /* part of T to control overflow in triangular solver. */ work[1] = 0.; i__1 = *n; for (j = 2; j <= i__1; ++j) { work[j] = 0.; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1)); /* L20: */ } /* L30: */ } /* Index IP is used to specify the real or complex eigenvalue: */ /* IP = 0, real eigenvalue, */ /* 1, first of conjugate complex pair: (wr,wi) */ /* -1, second of conjugate complex pair: (wr,wi) */ n2 = *n << 1; if (rightv) { /* Compute right eigenvectors. */ ip = 0; is = *m; for (ki = *n; ki >= 1; --ki) { if (ip == 1) { goto L130; } if (ki == 1) { goto L40; } if (t[ki + (ki - 1) * t_dim1] == 0.) { goto L40; } ip = -1; L40: if (somev) { if (ip == 0) { if (! select[ki]) { goto L130; } } else { if (! select[ki - 1]) { goto L130; } } } /* Compute the KI-th eigenvalue (WR,WI). */ wr = t[ki + ki * t_dim1]; wi = 0.; if (ip != 0) { wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2))); } /* Computing MAX */ d__1 = ulp * (abs(wr) + abs(wi)); smin = max(d__1,smlnum); if (ip == 0) { /* Real right eigenvector */ work[ki + *n] = 1.; /* Form right-hand side */ i__1 = ki - 1; for (k = 1; k <= i__1; ++k) { work[k + *n] = -t[k + ki * t_dim1]; /* L50: */ } /* Solve the upper quasi-triangular system: */ /* (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. */ jnxt = ki - 1; for (j = ki - 1; j >= 1; --j) { if (j > jnxt) { goto L60; } j1 = j; j2 = j; jnxt = j - 1; if (j > 1) { if (t[j + (j - 1) * t_dim1] != 0.) { j1 = j - 1; jnxt = j - 2; } } if (j1 == j2) { /* 1-by-1 diagonal block */ _starpu_dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b22, &t[j + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * n], n, &wr, &c_b25, x, &c__2, &scale, &xnorm, &ierr); /* Scale X(1,1) to avoid overflow when updating */ /* the right-hand side. */ if (xnorm > 1.) { if (work[j] > bignum / xnorm) { x[0] /= xnorm; scale /= xnorm; } } /* Scale if necessary */ if (scale != 1.) { _starpu_dscal_(&ki, &scale, &work[*n + 1], &c__1); } work[j + *n] = x[0]; /* Update right-hand side */ i__1 = j - 1; d__1 = -x[0]; _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ *n + 1], &c__1); } else { /* 2-by-2 diagonal block */ _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b22, &t[j - 1 + (j - 1) * t_dim1], ldt, &c_b22, &c_b22, & work[j - 1 + *n], n, &wr, &c_b25, x, &c__2, & scale, &xnorm, &ierr); /* Scale X(1,1) and X(2,1) to avoid overflow when */ /* updating the right-hand side. */ if (xnorm > 1.) { /* Computing MAX */ d__1 = work[j - 1], d__2 = work[j]; beta = max(d__1,d__2); if (beta > bignum / xnorm) { x[0] /= xnorm; x[1] /= xnorm; scale /= xnorm; } } /* Scale if necessary */ if (scale != 1.) { _starpu_dscal_(&ki, &scale, &work[*n + 1], &c__1); } work[j - 1 + *n] = x[0]; work[j + *n] = x[1]; /* Update right-hand side */ i__1 = j - 2; d__1 = -x[0]; _starpu_daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, &work[*n + 1], &c__1); i__1 = j - 2; d__1 = -x[1]; _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ *n + 1], &c__1); } L60: ; } /* Copy the vector x or Q*x to VR and normalize. */ if (! over) { _starpu_dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], & c__1); ii = _starpu_idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1)); _starpu_dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); i__1 = *n; for (k = ki + 1; k <= i__1; ++k) { vr[k + is * vr_dim1] = 0.; /* L70: */ } } else { if (ki > 1) { i__1 = ki - 1; _starpu_dgemv_("N", n, &i__1, &c_b22, &vr[vr_offset], ldvr, & work[*n + 1], &c__1, &work[ki + *n], &vr[ki * vr_dim1 + 1], &c__1); } ii = _starpu_idamax_(n, &vr[ki * vr_dim1 + 1], &c__1); remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1)); _starpu_dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); } } else { /* Complex right eigenvector. */ /* Initial solve */ /* [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. */ /* [ (T(KI,KI-1) T(KI,KI) ) ] */ if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[ ki + (ki - 1) * t_dim1], abs(d__2))) { work[ki - 1 + *n] = 1.; work[ki + n2] = wi / t[ki - 1 + ki * t_dim1]; } else { work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1]; work[ki + n2] = 1.; } work[ki + *n] = 0.; work[ki - 1 + n2] = 0.; /* Form right-hand side */ i__1 = ki - 2; for (k = 1; k <= i__1; ++k) { work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) * t_dim1]; work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1]; /* L80: */ } /* Solve upper quasi-triangular system: */ /* (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) */ jnxt = ki - 2; for (j = ki - 2; j >= 1; --j) { if (j > jnxt) { goto L90; } j1 = j; j2 = j; jnxt = j - 1; if (j > 1) { if (t[j + (j - 1) * t_dim1] != 0.) { j1 = j - 1; jnxt = j - 2; } } if (j1 == j2) { /* 1-by-1 diagonal block */ _starpu_dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b22, &t[j + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * n], n, &wr, &wi, x, &c__2, &scale, &xnorm, & ierr); /* Scale X(1,1) and X(1,2) to avoid overflow when */ /* updating the right-hand side. */ if (xnorm > 1.) { if (work[j] > bignum / xnorm) { x[0] /= xnorm; x[2] /= xnorm; scale /= xnorm; } } /* Scale if necessary */ if (scale != 1.) { _starpu_dscal_(&ki, &scale, &work[*n + 1], &c__1); _starpu_dscal_(&ki, &scale, &work[n2 + 1], &c__1); } work[j + *n] = x[0]; work[j + n2] = x[2]; /* Update the right-hand side */ i__1 = j - 1; d__1 = -x[0]; _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ *n + 1], &c__1); i__1 = j - 1; d__1 = -x[2]; _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ n2 + 1], &c__1); } else { /* 2-by-2 diagonal block */ _starpu_dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b22, &t[j - 1 + (j - 1) * t_dim1], ldt, &c_b22, &c_b22, & work[j - 1 + *n], n, &wr, &wi, x, &c__2, & scale, &xnorm, &ierr); /* Scale X to avoid overflow when updating */ /* the right-hand side. */ if (xnorm > 1.) { /* Computing MAX */ d__1 = work[j - 1], d__2 = work[j]; beta = max(d__1,d__2); if (beta > bignum / xnorm) { rec = 1. / xnorm; x[0] *= rec; x[2] *= rec; x[1] *= rec; x[3] *= rec; scale *= rec; } } /* Scale if necessary */ if (scale != 1.) { _starpu_dscal_(&ki, &scale, &work[*n + 1], &c__1); _starpu_dscal_(&ki, &scale, &work[n2 + 1], &c__1); } work[j - 1 + *n] = x[0]; work[j + *n] = x[1]; work[j - 1 + n2] = x[2]; work[j + n2] = x[3]; /* Update the right-hand side */ i__1 = j - 2; d__1 = -x[0]; _starpu_daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, &work[*n + 1], &c__1); i__1 = j - 2; d__1 = -x[1]; _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ *n + 1], &c__1); i__1 = j - 2; d__1 = -x[2]; _starpu_daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, &work[n2 + 1], &c__1); i__1 = j - 2; d__1 = -x[3]; _starpu_daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ n2 + 1], &c__1); } L90: ; } /* Copy the vector x or Q*x to VR and normalize. */ if (! over) { _starpu_dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1 + 1], &c__1); _starpu_dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], & c__1); emax = 0.; i__1 = ki; for (k = 1; k <= i__1; ++k) { /* Computing MAX */ d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1] , abs(d__1)) + (d__2 = vr[k + is * vr_dim1], abs(d__2)); emax = max(d__3,d__4); /* L100: */ } remax = 1. / emax; _starpu_dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); _starpu_dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); i__1 = *n; for (k = ki + 1; k <= i__1; ++k) { vr[k + (is - 1) * vr_dim1] = 0.; vr[k + is * vr_dim1] = 0.; /* L110: */ } } else { if (ki > 2) { i__1 = ki - 2; _starpu_dgemv_("N", n, &i__1, &c_b22, &vr[vr_offset], ldvr, & work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[( ki - 1) * vr_dim1 + 1], &c__1); i__1 = ki - 2; _starpu_dgemv_("N", n, &i__1, &c_b22, &vr[vr_offset], ldvr, & work[n2 + 1], &c__1, &work[ki + n2], &vr[ki * vr_dim1 + 1], &c__1); } else { _starpu_dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1 + 1], &c__1); _starpu_dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], & c__1); } emax = 0.; i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Computing MAX */ d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1] , abs(d__1)) + (d__2 = vr[k + ki * vr_dim1], abs(d__2)); emax = max(d__3,d__4); /* L120: */ } remax = 1. / emax; _starpu_dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); _starpu_dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); } } --is; if (ip != 0) { --is; } L130: if (ip == 1) { ip = 0; } if (ip == -1) { ip = 1; } /* L140: */ } } if (leftv) { /* Compute left eigenvectors. */ ip = 0; is = 1; i__1 = *n; for (ki = 1; ki <= i__1; ++ki) { if (ip == -1) { goto L250; } if (ki == *n) { goto L150; } if (t[ki + 1 + ki * t_dim1] == 0.) { goto L150; } ip = 1; L150: if (somev) { if (! select[ki]) { goto L250; } } /* Compute the KI-th eigenvalue (WR,WI). */ wr = t[ki + ki * t_dim1]; wi = 0.; if (ip != 0) { wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))); } /* Computing MAX */ d__1 = ulp * (abs(wr) + abs(wi)); smin = max(d__1,smlnum); if (ip == 0) { /* Real left eigenvector. */ work[ki + *n] = 1.; /* Form right-hand side */ i__2 = *n; for (k = ki + 1; k <= i__2; ++k) { work[k + *n] = -t[ki + k * t_dim1]; /* L160: */ } /* Solve the quasi-triangular system: */ /* (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK */ vmax = 1.; vcrit = bignum; jnxt = ki + 1; i__2 = *n; for (j = ki + 1; j <= i__2; ++j) { if (j < jnxt) { goto L170; } j1 = j; j2 = j; jnxt = j + 1; if (j < *n) { if (t[j + 1 + j * t_dim1] != 0.) { j2 = j + 1; jnxt = j + 2; } } if (j1 == j2) { /* 1-by-1 diagonal block */ /* Scale if necessary to avoid overflow when forming */ /* the right-hand side. */ if (work[j] > vcrit) { rec = 1. / vmax; i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &rec, &work[ki + *n], &c__1); vmax = 1.; vcrit = bignum; } i__3 = j - ki - 1; work[j + *n] -= _starpu_ddot_(&i__3, &t[ki + 1 + j * t_dim1], &c__1, &work[ki + 1 + *n], &c__1); /* Solve (T(J,J)-WR)'*X = WORK */ _starpu_dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b22, &t[j + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * n], n, &wr, &c_b25, x, &c__2, &scale, &xnorm, &ierr); /* Scale if necessary */ if (scale != 1.) { i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &scale, &work[ki + *n], &c__1); } work[j + *n] = x[0]; /* Computing MAX */ d__2 = (d__1 = work[j + *n], abs(d__1)); vmax = max(d__2,vmax); vcrit = bignum / vmax; } else { /* 2-by-2 diagonal block */ /* Scale if necessary to avoid overflow when forming */ /* the right-hand side. */ /* Computing MAX */ d__1 = work[j], d__2 = work[j + 1]; beta = max(d__1,d__2); if (beta > vcrit) { rec = 1. / vmax; i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &rec, &work[ki + *n], &c__1); vmax = 1.; vcrit = bignum; } i__3 = j - ki - 1; work[j + *n] -= _starpu_ddot_(&i__3, &t[ki + 1 + j * t_dim1], &c__1, &work[ki + 1 + *n], &c__1); i__3 = j - ki - 1; work[j + 1 + *n] -= _starpu_ddot_(&i__3, &t[ki + 1 + (j + 1) * t_dim1], &c__1, &work[ki + 1 + *n], &c__1); /* Solve */ /* [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) */ /* [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) */ _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b22, &t[j + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * n], n, &wr, &c_b25, x, &c__2, &scale, &xnorm, &ierr); /* Scale if necessary */ if (scale != 1.) { i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &scale, &work[ki + *n], &c__1); } work[j + *n] = x[0]; work[j + 1 + *n] = x[1]; /* Computing MAX */ d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 = work[j + 1 + *n], abs(d__2)), d__3 = max( d__3,d__4); vmax = max(d__3,vmax); vcrit = bignum / vmax; } L170: ; } /* Copy the vector x or Q*x to VL and normalize. */ if (! over) { i__2 = *n - ki + 1; _starpu_dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * vl_dim1], &c__1); i__2 = *n - ki + 1; ii = _starpu_idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1; remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1)); i__2 = *n - ki + 1; _starpu_dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); i__2 = ki - 1; for (k = 1; k <= i__2; ++k) { vl[k + is * vl_dim1] = 0.; /* L180: */ } } else { if (ki < *n) { i__2 = *n - ki; _starpu_dgemv_("N", n, &i__2, &c_b22, &vl[(ki + 1) * vl_dim1 + 1], ldvl, &work[ki + 1 + *n], &c__1, &work[ ki + *n], &vl[ki * vl_dim1 + 1], &c__1); } ii = _starpu_idamax_(n, &vl[ki * vl_dim1 + 1], &c__1); remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1)); _starpu_dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); } } else { /* Complex left eigenvector. */ /* Initial solve: */ /* ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. */ /* ((T(KI+1,KI) T(KI+1,KI+1)) ) */ if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))) { work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1]; work[ki + 1 + n2] = 1.; } else { work[ki + *n] = 1.; work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1]; } work[ki + 1 + *n] = 0.; work[ki + n2] = 0.; /* Form right-hand side */ i__2 = *n; for (k = ki + 2; k <= i__2; ++k) { work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1]; work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1] ; /* L190: */ } /* Solve complex quasi-triangular system: */ /* ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 */ vmax = 1.; vcrit = bignum; jnxt = ki + 2; i__2 = *n; for (j = ki + 2; j <= i__2; ++j) { if (j < jnxt) { goto L200; } j1 = j; j2 = j; jnxt = j + 1; if (j < *n) { if (t[j + 1 + j * t_dim1] != 0.) { j2 = j + 1; jnxt = j + 2; } } if (j1 == j2) { /* 1-by-1 diagonal block */ /* Scale if necessary to avoid overflow when */ /* forming the right-hand side elements. */ if (work[j] > vcrit) { rec = 1. / vmax; i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &rec, &work[ki + *n], &c__1); i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &rec, &work[ki + n2], &c__1); vmax = 1.; vcrit = bignum; } i__3 = j - ki - 2; work[j + *n] -= _starpu_ddot_(&i__3, &t[ki + 2 + j * t_dim1], &c__1, &work[ki + 2 + *n], &c__1); i__3 = j - ki - 2; work[j + n2] -= _starpu_ddot_(&i__3, &t[ki + 2 + j * t_dim1], &c__1, &work[ki + 2 + n2], &c__1); /* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */ d__1 = -wi; _starpu_dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b22, &t[j + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, & ierr); /* Scale if necessary */ if (scale != 1.) { i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &scale, &work[ki + *n], &c__1); i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &scale, &work[ki + n2], &c__1); } work[j + *n] = x[0]; work[j + n2] = x[2]; /* Computing MAX */ d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 = work[j + n2], abs(d__2)), d__3 = max(d__3, d__4); vmax = max(d__3,vmax); vcrit = bignum / vmax; } else { /* 2-by-2 diagonal block */ /* Scale if necessary to avoid overflow when forming */ /* the right-hand side elements. */ /* Computing MAX */ d__1 = work[j], d__2 = work[j + 1]; beta = max(d__1,d__2); if (beta > vcrit) { rec = 1. / vmax; i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &rec, &work[ki + *n], &c__1); i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &rec, &work[ki + n2], &c__1); vmax = 1.; vcrit = bignum; } i__3 = j - ki - 2; work[j + *n] -= _starpu_ddot_(&i__3, &t[ki + 2 + j * t_dim1], &c__1, &work[ki + 2 + *n], &c__1); i__3 = j - ki - 2; work[j + n2] -= _starpu_ddot_(&i__3, &t[ki + 2 + j * t_dim1], &c__1, &work[ki + 2 + n2], &c__1); i__3 = j - ki - 2; work[j + 1 + *n] -= _starpu_ddot_(&i__3, &t[ki + 2 + (j + 1) * t_dim1], &c__1, &work[ki + 2 + *n], &c__1); i__3 = j - ki - 2; work[j + 1 + n2] -= _starpu_ddot_(&i__3, &t[ki + 2 + (j + 1) * t_dim1], &c__1, &work[ki + 2 + n2], &c__1); /* Solve 2-by-2 complex linear equation */ /* ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B */ /* ([T(j+1,j) T(j+1,j+1)] ) */ d__1 = -wi; _starpu_dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b22, &t[j + j * t_dim1], ldt, &c_b22, &c_b22, &work[j + * n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, & ierr); /* Scale if necessary */ if (scale != 1.) { i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &scale, &work[ki + *n], &c__1); i__3 = *n - ki + 1; _starpu_dscal_(&i__3, &scale, &work[ki + n2], &c__1); } work[j + *n] = x[0]; work[j + n2] = x[2]; work[j + 1 + *n] = x[1]; work[j + 1 + n2] = x[3]; /* Computing MAX */ d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1, d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2) , d__2 = abs(x[3]), d__1 = max(d__1,d__2); vmax = max(d__1,vmax); vcrit = bignum / vmax; } L200: ; } /* Copy the vector x or Q*x to VL and normalize. */ if (! over) { i__2 = *n - ki + 1; _starpu_dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * vl_dim1], &c__1); i__2 = *n - ki + 1; _starpu_dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) * vl_dim1], &c__1); emax = 0.; i__2 = *n; for (k = ki; k <= i__2; ++k) { /* Computing MAX */ d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs( d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1], abs(d__2)); emax = max(d__3,d__4); /* L220: */ } remax = 1. / emax; i__2 = *n - ki + 1; _starpu_dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); i__2 = *n - ki + 1; _starpu_dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1) ; i__2 = ki - 1; for (k = 1; k <= i__2; ++k) { vl[k + is * vl_dim1] = 0.; vl[k + (is + 1) * vl_dim1] = 0.; /* L230: */ } } else { if (ki < *n - 1) { i__2 = *n - ki - 1; _starpu_dgemv_("N", n, &i__2, &c_b22, &vl[(ki + 2) * vl_dim1 + 1], ldvl, &work[ki + 2 + *n], &c__1, &work[ ki + *n], &vl[ki * vl_dim1 + 1], &c__1); i__2 = *n - ki - 1; _starpu_dgemv_("N", n, &i__2, &c_b22, &vl[(ki + 2) * vl_dim1 + 1], ldvl, &work[ki + 2 + n2], &c__1, &work[ ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], & c__1); } else { _starpu_dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], & c__1); _starpu_dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], &c__1); } emax = 0.; i__2 = *n; for (k = 1; k <= i__2; ++k) { /* Computing MAX */ d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs( d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1], abs(d__2)); emax = max(d__3,d__4); /* L240: */ } remax = 1. / emax; _starpu_dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); _starpu_dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); } } ++is; if (ip != 0) { ++is; } L250: if (ip == -1) { ip = 0; } if (ip == 1) { ip = -1; } /* L260: */ } } return 0; /* End of DTREVC */ } /* _starpu_dtrevc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrexc.c000066400000000000000000000245751507764646700207020ustar00rootroot00000000000000/* dtrexc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c__2 = 2; /* Subroutine */ int _starpu_dtrexc_(char *compq, integer *n, doublereal *t, integer * ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst, doublereal *work, integer *info) { /* System generated locals */ integer q_dim1, q_offset, t_dim1, t_offset, i__1; /* Local variables */ integer nbf, nbl, here; extern logical _starpu_lsame_(char *, char *); logical wantq; extern /* Subroutine */ int _starpu_dlaexc_(logical *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); integer nbnext; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTREXC reorders the real Schur factorization of a real matrix */ /* A = Q*T*Q**T, so that the diagonal block of T with row index IFST is */ /* moved to row ILST. */ /* The real Schur form T is reordered by an orthogonal similarity */ /* transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors */ /* is updated by postmultiplying it with Z. */ /* T must be in Schur canonical form (as returned by DHSEQR), that is, */ /* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each */ /* 2-by-2 diagonal block has its diagonal elements equal and its */ /* off-diagonal elements of opposite sign. */ /* Arguments */ /* ========= */ /* COMPQ (input) CHARACTER*1 */ /* = 'V': update the matrix Q of Schur vectors; */ /* = 'N': do not update Q. */ /* N (input) INTEGER */ /* The order of the matrix T. N >= 0. */ /* T (input/output) DOUBLE PRECISION array, dimension (LDT,N) */ /* On entry, the upper quasi-triangular matrix T, in Schur */ /* Schur canonical form. */ /* On exit, the reordered upper quasi-triangular matrix, again */ /* in Schur canonical form. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= max(1,N). */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. */ /* On exit, if COMPQ = 'V', Q has been postmultiplied by the */ /* orthogonal transformation matrix Z which reorders T. */ /* If COMPQ = 'N', Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* IFST (input/output) INTEGER */ /* ILST (input/output) INTEGER */ /* Specify the reordering of the diagonal blocks of T. */ /* The block with row index IFST is moved to row ILST, by a */ /* sequence of transpositions between adjacent blocks. */ /* On exit, if IFST pointed on entry to the second row of a */ /* 2-by-2 block, it is changed to point to the first row; ILST */ /* always points to the first row of the block in its final */ /* position (which may differ from its input value by +1 or -1). */ /* 1 <= IFST <= N; 1 <= ILST <= N. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* = 1: two adjacent blocks were too close to swap (the problem */ /* is very ill-conditioned); T may have been partially */ /* reordered, and ILST points to the first row of the */ /* current position of the block being moved. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test the input arguments. */ /* Parameter adjustments */ t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --work; /* Function Body */ *info = 0; wantq = _starpu_lsame_(compq, "V"); if (! wantq && ! _starpu_lsame_(compq, "N")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldt < max(1,*n)) { *info = -4; } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) { *info = -6; } else if (*ifst < 1 || *ifst > *n) { *info = -7; } else if (*ilst < 1 || *ilst > *n) { *info = -8; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTREXC", &i__1); return 0; } /* Quick return if possible */ if (*n <= 1) { return 0; } /* Determine the first row of specified block */ /* and find out it is 1 by 1 or 2 by 2. */ if (*ifst > 1) { if (t[*ifst + (*ifst - 1) * t_dim1] != 0.) { --(*ifst); } } nbf = 1; if (*ifst < *n) { if (t[*ifst + 1 + *ifst * t_dim1] != 0.) { nbf = 2; } } /* Determine the first row of the final block */ /* and find out it is 1 by 1 or 2 by 2. */ if (*ilst > 1) { if (t[*ilst + (*ilst - 1) * t_dim1] != 0.) { --(*ilst); } } nbl = 1; if (*ilst < *n) { if (t[*ilst + 1 + *ilst * t_dim1] != 0.) { nbl = 2; } } if (*ifst == *ilst) { return 0; } if (*ifst < *ilst) { /* Update ILST */ if (nbf == 2 && nbl == 1) { --(*ilst); } if (nbf == 1 && nbl == 2) { ++(*ilst); } here = *ifst; L10: /* Swap block with next one below */ if (nbf == 1 || nbf == 2) { /* Current block either 1 by 1 or 2 by 2 */ nbnext = 1; if (here + nbf + 1 <= *n) { if (t[here + nbf + 1 + (here + nbf) * t_dim1] != 0.) { nbnext = 2; } } _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, & nbf, &nbnext, &work[1], info); if (*info != 0) { *ilst = here; return 0; } here += nbnext; /* Test if 2 by 2 block breaks into two 1 by 1 blocks */ if (nbf == 2) { if (t[here + 1 + here * t_dim1] == 0.) { nbf = 3; } } } else { /* Current block consists of two 1 by 1 blocks each of which */ /* must be swapped individually */ nbnext = 1; if (here + 3 <= *n) { if (t[here + 3 + (here + 2) * t_dim1] != 0.) { nbnext = 2; } } i__1 = here + 1; _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & c__1, &nbnext, &work[1], info); if (*info != 0) { *ilst = here; return 0; } if (nbnext == 1) { /* Swap two 1 by 1 blocks, no problems possible */ _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &c__1, &nbnext, &work[1], info); ++here; } else { /* Recompute NBNEXT in case 2 by 2 split */ if (t[here + 2 + (here + 1) * t_dim1] == 0.) { nbnext = 1; } if (nbnext == 2) { /* 2 by 2 Block did not split */ _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &c__1, &nbnext, &work[1], info); if (*info != 0) { *ilst = here; return 0; } here += 2; } else { /* 2 by 2 Block did split */ _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &c__1, &c__1, &work[1], info); i__1 = here + 1; _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & i__1, &c__1, &c__1, &work[1], info); here += 2; } } } if (here < *ilst) { goto L10; } } else { here = *ifst; L20: /* Swap block with next one above */ if (nbf == 1 || nbf == 2) { /* Current block either 1 by 1 or 2 by 2 */ nbnext = 1; if (here >= 3) { if (t[here - 1 + (here - 2) * t_dim1] != 0.) { nbnext = 2; } } i__1 = here - nbnext; _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & nbnext, &nbf, &work[1], info); if (*info != 0) { *ilst = here; return 0; } here -= nbnext; /* Test if 2 by 2 block breaks into two 1 by 1 blocks */ if (nbf == 2) { if (t[here + 1 + here * t_dim1] == 0.) { nbf = 3; } } } else { /* Current block consists of two 1 by 1 blocks each of which */ /* must be swapped individually */ nbnext = 1; if (here >= 3) { if (t[here - 1 + (here - 2) * t_dim1] != 0.) { nbnext = 2; } } i__1 = here - nbnext; _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & nbnext, &c__1, &work[1], info); if (*info != 0) { *ilst = here; return 0; } if (nbnext == 1) { /* Swap two 1 by 1 blocks, no problems possible */ _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &nbnext, &c__1, &work[1], info); --here; } else { /* Recompute NBNEXT in case 2 by 2 split */ if (t[here + (here - 1) * t_dim1] == 0.) { nbnext = 1; } if (nbnext == 2) { /* 2 by 2 Block did not split */ i__1 = here - 1; _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & i__1, &c__2, &c__1, &work[1], info); if (*info != 0) { *ilst = here; return 0; } here += -2; } else { /* 2 by 2 Block did split */ _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &c__1, &c__1, &work[1], info); i__1 = here - 1; _starpu_dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & i__1, &c__1, &c__1, &work[1], info); here += -2; } } } if (here > *ilst) { goto L20; } } *ilst = here; return 0; /* End of DTREXC */ } /* _starpu_dtrexc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrrfs.c000066400000000000000000000334501507764646700207050ustar00rootroot00000000000000/* dtrrfs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b19 = -1.; /* Subroutine */ int _starpu_dtrrfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ integer i__, j, k; doublereal s, xk; integer nz; doublereal eps; integer kase; doublereal safe1, safe2; extern logical _starpu_lsame_(char *, char *); integer isave[3]; extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrsv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlamch_(char *); doublereal safmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical notran; char transt[1]; logical nounit; doublereal lstres; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRRFS provides error bounds and backward error estimates for the */ /* solution to a system of linear equations with a triangular */ /* coefficient matrix. */ /* The solution matrix X must be computed by DTRTRS or some other */ /* means before entering this routine. DTRRFS does not do iterative */ /* refinement because doing so cannot improve the backward error. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrices B and X. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The triangular matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of the array A contains the upper */ /* triangular matrix, and the strictly lower triangular part of */ /* A is not referenced. If UPLO = 'L', the leading N-by-N lower */ /* triangular part of the array A contains the lower triangular */ /* matrix, and the strictly upper triangular part of A is not */ /* referenced. If DIAG = 'U', the diagonal elements of A are */ /* also not referenced and are assumed to be 1. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right hand side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* The solution matrix X. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= max(1,N). */ /* FERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The estimated forward error bound for each solution vector */ /* X(j) (the j-th column of the solution matrix X). */ /* If XTRUE is the true solution corresponding to X(j), FERR(j) */ /* is an estimated upper bound for the magnitude of the largest */ /* element in (X(j) - XTRUE) divided by the magnitude of the */ /* largest element in X(j). The estimate is as reliable as */ /* the estimate for RCOND, and is almost always a slight */ /* overestimate of the true error. */ /* BERR (output) DOUBLE PRECISION array, dimension (NRHS) */ /* The componentwise relative backward error of each solution */ /* vector X(j) (i.e., the smallest relative change in */ /* any element of A or B that makes X(j) an exact solution). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (3*N) */ /* IWORK (workspace) INTEGER array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); notran = _starpu_lsame_(trans, "N"); nounit = _starpu_lsame_(diag, "N"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldx < max(1,*n)) { *info = -11; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = _starpu_dlamch_("Epsilon"); safmin = _starpu_dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Compute residual R = B - op(A) * X, */ /* where op(A) = A or A', depending on TRANS. */ _starpu_dcopy_(n, &x[j * x_dim1 + 1], &c__1, &work[*n + 1], &c__1); _starpu_dtrmv_(uplo, trans, diag, n, &a[a_offset], lda, &work[*n + 1], &c__1); _starpu_daxpy_(n, &c_b19, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. If the i-th component of the denominator is less */ /* than SAFE2, then SAFE1 is added to the i-th components of the */ /* numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); /* L20: */ } if (notran) { /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = k; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = a[i__ + k * a_dim1], abs( d__1)) * xk; /* L30: */ } /* L40: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = a[i__ + k * a_dim1], abs( d__1)) * xk; /* L50: */ } work[k] += xk; /* L60: */ } } } else { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = *n; for (i__ = k; i__ <= i__3; ++i__) { work[i__] += (d__1 = a[i__ + k * a_dim1], abs( d__1)) * xk; /* L70: */ } /* L80: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = *n; for (i__ = k + 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = a[i__ + k * a_dim1], abs( d__1)) * xk; /* L90: */ } work[k] += xk; /* L100: */ } } } } else { /* Compute abs(A')*abs(X) + abs(B). */ if (upper) { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; i__3 = k; for (i__ = 1; i__ <= i__3; ++i__) { s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * ( d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L110: */ } work[k] += s; /* L120: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * ( d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L130: */ } work[k] += s; /* L140: */ } } } else { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; i__3 = *n; for (i__ = k; i__ <= i__3; ++i__) { s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * ( d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L150: */ } work[k] += s; /* L160: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = (d__1 = x[k + j * x_dim1], abs(d__1)); i__3 = *n; for (i__ = k + 1; i__ <= i__3; ++i__) { s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * ( d__2 = x[i__ + j * x_dim1], abs(d__2)); /* L170: */ } work[k] += s; /* L180: */ } } } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L190: */ } berr[j] = s; /* Bound error from formula */ /* norm(X - XTRUE) / norm(X) .le. FERR = */ /* norm( abs(inv(op(A)))* */ /* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ /* where */ /* norm(Z) is the magnitude of the largest component of Z */ /* inv(op(A)) is the inverse of op(A) */ /* abs(Z) is the componentwise absolute value of the matrix or */ /* vector Z */ /* NZ is the maximum number of nonzeros in any row of A, plus 1 */ /* EPS is machine epsilon */ /* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ /* is incremented by SAFE1 if the i-th component of */ /* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ /* Use DLACN2 to estimate the infinity-norm of the matrix */ /* inv(op(A)) * diag(W), */ /* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L200: */ } kase = 0; L210: _starpu_dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)'). */ _starpu_dtrsv_(uplo, transt, diag, n, &a[a_offset], lda, &work[*n + 1] , &c__1); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L220: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L230: */ } _starpu_dtrsv_(uplo, trans, diag, n, &a[a_offset], lda, &work[*n + 1], &c__1); } goto L210; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); lstres = max(d__2,d__3); /* L240: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L250: */ } return 0; /* End of DTRRFS */ } /* _starpu_dtrrfs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrsen.c000066400000000000000000000410531507764646700206760ustar00rootroot00000000000000/* dtrsen.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c_n1 = -1; /* Subroutine */ int _starpu_dtrsen_(char *job, char *compq, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, doublereal *wr, doublereal *wi, integer *m, doublereal *s, doublereal *sep, doublereal *work, integer *lwork, integer *iwork, integer * liwork, integer *info) { /* System generated locals */ integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer k, n1, n2, kk, nn, ks; doublereal est; integer kase; logical pair; integer ierr; logical swap; doublereal scale; extern logical _starpu_lsame_(char *, char *); integer isave[3], lwmin; logical wantq, wants; doublereal rnorm; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); logical wantbh; extern /* Subroutine */ int _starpu_dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); integer liwmin; logical wantsp, lquery; extern /* Subroutine */ int _starpu_dtrsyl_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRSEN reorders the real Schur factorization of a real matrix */ /* A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in */ /* the leading diagonal blocks of the upper quasi-triangular matrix T, */ /* and the leading columns of Q form an orthonormal basis of the */ /* corresponding right invariant subspace. */ /* Optionally the routine computes the reciprocal condition numbers of */ /* the cluster of eigenvalues and/or the invariant subspace. */ /* T must be in Schur canonical form (as returned by DHSEQR), that is, */ /* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each */ /* 2-by-2 diagonal block has its diagonal elemnts equal and its */ /* off-diagonal elements of opposite sign. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* Specifies whether condition numbers are required for the */ /* cluster of eigenvalues (S) or the invariant subspace (SEP): */ /* = 'N': none; */ /* = 'E': for eigenvalues only (S); */ /* = 'V': for invariant subspace only (SEP); */ /* = 'B': for both eigenvalues and invariant subspace (S and */ /* SEP). */ /* COMPQ (input) CHARACTER*1 */ /* = 'V': update the matrix Q of Schur vectors; */ /* = 'N': do not update Q. */ /* SELECT (input) LOGICAL array, dimension (N) */ /* SELECT specifies the eigenvalues in the selected cluster. To */ /* select a real eigenvalue w(j), SELECT(j) must be set to */ /* .TRUE.. To select a complex conjugate pair of eigenvalues */ /* w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, */ /* either SELECT(j) or SELECT(j+1) or both must be set to */ /* .TRUE.; a complex conjugate pair of eigenvalues must be */ /* either both included in the cluster or both excluded. */ /* N (input) INTEGER */ /* The order of the matrix T. N >= 0. */ /* T (input/output) DOUBLE PRECISION array, dimension (LDT,N) */ /* On entry, the upper quasi-triangular matrix T, in Schur */ /* canonical form. */ /* On exit, T is overwritten by the reordered matrix T, again in */ /* Schur canonical form, with the selected eigenvalues in the */ /* leading diagonal blocks. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= max(1,N). */ /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* On entry, if COMPQ = 'V', the matrix Q of Schur vectors. */ /* On exit, if COMPQ = 'V', Q has been postmultiplied by the */ /* orthogonal transformation matrix which reorders T; the */ /* leading M columns of Q form an orthonormal basis for the */ /* specified invariant subspace. */ /* If COMPQ = 'N', Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. */ /* LDQ >= 1; and if COMPQ = 'V', LDQ >= N. */ /* WR (output) DOUBLE PRECISION array, dimension (N) */ /* WI (output) DOUBLE PRECISION array, dimension (N) */ /* The real and imaginary parts, respectively, of the reordered */ /* eigenvalues of T. The eigenvalues are stored in the same */ /* order as on the diagonal of T, with WR(i) = T(i,i) and, if */ /* T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0 and */ /* WI(i+1) = -WI(i). Note that if a complex eigenvalue is */ /* sufficiently ill-conditioned, then its value may differ */ /* significantly from its value before reordering. */ /* M (output) INTEGER */ /* The dimension of the specified invariant subspace. */ /* 0 < = M <= N. */ /* S (output) DOUBLE PRECISION */ /* If JOB = 'E' or 'B', S is a lower bound on the reciprocal */ /* condition number for the selected cluster of eigenvalues. */ /* S cannot underestimate the true reciprocal condition number */ /* by more than a factor of sqrt(N). If M = 0 or N, S = 1. */ /* If JOB = 'N' or 'V', S is not referenced. */ /* SEP (output) DOUBLE PRECISION */ /* If JOB = 'V' or 'B', SEP is the estimated reciprocal */ /* condition number of the specified invariant subspace. If */ /* M = 0 or N, SEP = norm(T). */ /* If JOB = 'N' or 'E', SEP is not referenced. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If JOB = 'N', LWORK >= max(1,N); */ /* if JOB = 'E', LWORK >= max(1,M*(N-M)); */ /* if JOB = 'V' or 'B', LWORK >= max(1,2*M*(N-M)). */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If JOB = 'N' or 'E', LIWORK >= 1; */ /* if JOB = 'V' or 'B', LIWORK >= max(1,M*(N-M)). */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal size of the IWORK array, */ /* returns this value as the first entry of the IWORK array, and */ /* no error message related to LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* = 1: reordering of T failed because some eigenvalues are too */ /* close to separate (the problem is very ill-conditioned); */ /* T may have been partially reordered, and WR and WI */ /* contain the eigenvalues in the same order as in T; S and */ /* SEP (if requested) are set to zero. */ /* Further Details */ /* =============== */ /* DTRSEN first collects the selected eigenvalues by computing an */ /* orthogonal transformation Z to move them to the top left corner of T. */ /* In other words, the selected eigenvalues are the eigenvalues of T11 */ /* in: */ /* Z'*T*Z = ( T11 T12 ) n1 */ /* ( 0 T22 ) n2 */ /* n1 n2 */ /* where N = n1+n2 and Z' means the transpose of Z. The first n1 columns */ /* of Z span the specified invariant subspace of T. */ /* If T has been obtained from the real Schur factorization of a matrix */ /* A = Q*T*Q', then the reordered real Schur factorization of A is given */ /* by A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span */ /* the corresponding invariant subspace of A. */ /* The reciprocal condition number of the average of the eigenvalues of */ /* T11 may be returned in S. S lies between 0 (very badly conditioned) */ /* and 1 (very well conditioned). It is computed as follows. First we */ /* compute R so that */ /* P = ( I R ) n1 */ /* ( 0 0 ) n2 */ /* n1 n2 */ /* is the projector on the invariant subspace associated with T11. */ /* R is the solution of the Sylvester equation: */ /* T11*R - R*T22 = T12. */ /* Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote */ /* the two-norm of M. Then S is computed as the lower bound */ /* (1 + F-norm(R)**2)**(-1/2) */ /* on the reciprocal of 2-norm(P), the true reciprocal condition number. */ /* S cannot underestimate 1 / 2-norm(P) by more than a factor of */ /* sqrt(N). */ /* An approximate error bound for the computed average of the */ /* eigenvalues of T11 is */ /* EPS * norm(T) / S */ /* where EPS is the machine precision. */ /* The reciprocal condition number of the right invariant subspace */ /* spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. */ /* SEP is defined as the separation of T11 and T22: */ /* sep( T11, T22 ) = sigma-min( C ) */ /* where sigma-min(C) is the smallest singular value of the */ /* n1*n2-by-n1*n2 matrix */ /* C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) */ /* I(m) is an m by m identity matrix, and kprod denotes the Kronecker */ /* product. We estimate sigma-min(C) by the reciprocal of an estimate of */ /* the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) */ /* cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). */ /* When SEP is small, small changes in T can cause large changes in */ /* the invariant subspace. An approximate bound on the maximum angular */ /* error in the computed right invariant subspace is */ /* EPS * norm(T) / SEP */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test the input parameters */ /* Parameter adjustments */ --select; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --wr; --wi; --work; --iwork; /* Function Body */ wantbh = _starpu_lsame_(job, "B"); wants = _starpu_lsame_(job, "E") || wantbh; wantsp = _starpu_lsame_(job, "V") || wantbh; wantq = _starpu_lsame_(compq, "V"); *info = 0; lquery = *lwork == -1; if (! _starpu_lsame_(job, "N") && ! wants && ! wantsp) { *info = -1; } else if (! _starpu_lsame_(compq, "N") && ! wantq) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*ldt < max(1,*n)) { *info = -6; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -8; } else { /* Set M to the dimension of the specified invariant subspace, */ /* and test LWORK and LIWORK. */ *m = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { if (k < *n) { if (t[k + 1 + k * t_dim1] == 0.) { if (select[k]) { ++(*m); } } else { pair = TRUE_; if (select[k] || select[k + 1]) { *m += 2; } } } else { if (select[*n]) { ++(*m); } } } /* L10: */ } n1 = *m; n2 = *n - *m; nn = n1 * n2; if (wantsp) { /* Computing MAX */ i__1 = 1, i__2 = nn << 1; lwmin = max(i__1,i__2); liwmin = max(1,nn); } else if (_starpu_lsame_(job, "N")) { lwmin = max(1,*n); liwmin = 1; } else if (_starpu_lsame_(job, "E")) { lwmin = max(1,nn); liwmin = 1; } if (*lwork < lwmin && ! lquery) { *info = -15; } else if (*liwork < liwmin && ! lquery) { *info = -17; } } if (*info == 0) { work[1] = (doublereal) lwmin; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRSEN", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible. */ if (*m == *n || *m == 0) { if (wants) { *s = 1.; } if (wantsp) { *sep = _starpu_dlange_("1", n, n, &t[t_offset], ldt, &work[1]); } goto L40; } /* Collect the selected blocks at the top-left corner of T. */ ks = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { swap = select[k]; if (k < *n) { if (t[k + 1 + k * t_dim1] != 0.) { pair = TRUE_; swap = swap || select[k + 1]; } } if (swap) { ++ks; /* Swap the K-th block to position KS. */ ierr = 0; kk = k; if (k != ks) { _starpu_dtrexc_(compq, n, &t[t_offset], ldt, &q[q_offset], ldq, & kk, &ks, &work[1], &ierr); } if (ierr == 1 || ierr == 2) { /* Blocks too close to swap: exit. */ *info = 1; if (wants) { *s = 0.; } if (wantsp) { *sep = 0.; } goto L40; } if (pair) { ++ks; } } } /* L20: */ } if (wants) { /* Solve Sylvester equation for R: */ /* T11*R - R*T22 = scale*T12 */ _starpu_dlacpy_("F", &n1, &n2, &t[(n1 + 1) * t_dim1 + 1], ldt, &work[1], &n1); _starpu_dtrsyl_("N", "N", &c_n1, &n1, &n2, &t[t_offset], ldt, &t[n1 + 1 + (n1 + 1) * t_dim1], ldt, &work[1], &n1, &scale, &ierr); /* Estimate the reciprocal of the condition number of the cluster */ /* of eigenvalues. */ rnorm = _starpu_dlange_("F", &n1, &n2, &work[1], &n1, &work[1]); if (rnorm == 0.) { *s = 1.; } else { *s = scale / (sqrt(scale * scale / rnorm + rnorm) * sqrt(rnorm)); } } if (wantsp) { /* Estimate sep(T11,T22). */ est = 0.; kase = 0; L30: _starpu_dlacn2_(&nn, &work[nn + 1], &work[1], &iwork[1], &est, &kase, isave); if (kase != 0) { if (kase == 1) { /* Solve T11*R - R*T22 = scale*X. */ _starpu_dtrsyl_("N", "N", &c_n1, &n1, &n2, &t[t_offset], ldt, &t[n1 + 1 + (n1 + 1) * t_dim1], ldt, &work[1], &n1, &scale, & ierr); } else { /* Solve T11'*R - R*T22' = scale*X. */ _starpu_dtrsyl_("T", "T", &c_n1, &n1, &n2, &t[t_offset], ldt, &t[n1 + 1 + (n1 + 1) * t_dim1], ldt, &work[1], &n1, &scale, & ierr); } goto L30; } *sep = scale / est; } L40: /* Store the output eigenvalues in WR and WI. */ i__1 = *n; for (k = 1; k <= i__1; ++k) { wr[k] = t[k + k * t_dim1]; wi[k] = 0.; /* L50: */ } i__1 = *n - 1; for (k = 1; k <= i__1; ++k) { if (t[k + 1 + k * t_dim1] != 0.) { wi[k] = sqrt((d__1 = t[k + (k + 1) * t_dim1], abs(d__1))) * sqrt(( d__2 = t[k + 1 + k * t_dim1], abs(d__2))); wi[k + 1] = -wi[k]; } /* L60: */ } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DTRSEN */ } /* _starpu_dtrsen_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrsna.c000066400000000000000000000450751507764646700207020ustar00rootroot00000000000000/* dtrsna.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static logical c_true = TRUE_; static logical c_false = FALSE_; /* Subroutine */ int _starpu_dtrsna_(char *job, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *sep, integer *mm, integer *m, doublereal *work, integer *ldwork, integer * iwork, integer *info) { /* System generated locals */ integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, work_dim1, work_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k, n2; doublereal cs; integer nn, ks; doublereal sn, mu, eps, est; integer kase; doublereal cond; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); logical pair; integer ierr; doublereal dumm, prod; integer ifst; doublereal lnrm; integer ilst; doublereal rnrm; extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *); doublereal prod1, prod2, scale, delta; extern logical _starpu_lsame_(char *, char *); integer isave[3]; logical wants; doublereal dummy[1]; extern /* Subroutine */ int _starpu_dlacn2_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); extern doublereal _starpu_dlapy2_(doublereal *, doublereal *); extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *); extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); doublereal bignum; logical wantbh; extern /* Subroutine */ int _starpu_dlaqtr_(logical *, logical *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), _starpu_dtrexc_(char *, integer * , doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); logical somcon; doublereal smlnum; logical wantsp; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* Modified to call DLACN2 in place of DLACON, 5 Feb 03, SJH. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRSNA estimates reciprocal condition numbers for specified */ /* eigenvalues and/or right eigenvectors of a real upper */ /* quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q */ /* orthogonal). */ /* T must be in Schur canonical form (as returned by DHSEQR), that is, */ /* block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each */ /* 2-by-2 diagonal block has its diagonal elements equal and its */ /* off-diagonal elements of opposite sign. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* Specifies whether condition numbers are required for */ /* eigenvalues (S) or eigenvectors (SEP): */ /* = 'E': for eigenvalues only (S); */ /* = 'V': for eigenvectors only (SEP); */ /* = 'B': for both eigenvalues and eigenvectors (S and SEP). */ /* HOWMNY (input) CHARACTER*1 */ /* = 'A': compute condition numbers for all eigenpairs; */ /* = 'S': compute condition numbers for selected eigenpairs */ /* specified by the array SELECT. */ /* SELECT (input) LOGICAL array, dimension (N) */ /* If HOWMNY = 'S', SELECT specifies the eigenpairs for which */ /* condition numbers are required. To select condition numbers */ /* for the eigenpair corresponding to a real eigenvalue w(j), */ /* SELECT(j) must be set to .TRUE.. To select condition numbers */ /* corresponding to a complex conjugate pair of eigenvalues w(j) */ /* and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be */ /* set to .TRUE.. */ /* If HOWMNY = 'A', SELECT is not referenced. */ /* N (input) INTEGER */ /* The order of the matrix T. N >= 0. */ /* T (input) DOUBLE PRECISION array, dimension (LDT,N) */ /* The upper quasi-triangular matrix T, in Schur canonical form. */ /* LDT (input) INTEGER */ /* The leading dimension of the array T. LDT >= max(1,N). */ /* VL (input) DOUBLE PRECISION array, dimension (LDVL,M) */ /* If JOB = 'E' or 'B', VL must contain left eigenvectors of T */ /* (or of any Q*T*Q**T with Q orthogonal), corresponding to the */ /* eigenpairs specified by HOWMNY and SELECT. The eigenvectors */ /* must be stored in consecutive columns of VL, as returned by */ /* DHSEIN or DTREVC. */ /* If JOB = 'V', VL is not referenced. */ /* LDVL (input) INTEGER */ /* The leading dimension of the array VL. */ /* LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. */ /* VR (input) DOUBLE PRECISION array, dimension (LDVR,M) */ /* If JOB = 'E' or 'B', VR must contain right eigenvectors of T */ /* (or of any Q*T*Q**T with Q orthogonal), corresponding to the */ /* eigenpairs specified by HOWMNY and SELECT. The eigenvectors */ /* must be stored in consecutive columns of VR, as returned by */ /* DHSEIN or DTREVC. */ /* If JOB = 'V', VR is not referenced. */ /* LDVR (input) INTEGER */ /* The leading dimension of the array VR. */ /* LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. */ /* S (output) DOUBLE PRECISION array, dimension (MM) */ /* If JOB = 'E' or 'B', the reciprocal condition numbers of the */ /* selected eigenvalues, stored in consecutive elements of the */ /* array. For a complex conjugate pair of eigenvalues two */ /* consecutive elements of S are set to the same value. Thus */ /* S(j), SEP(j), and the j-th columns of VL and VR all */ /* correspond to the same eigenpair (but not in general the */ /* j-th eigenpair, unless all eigenpairs are selected). */ /* If JOB = 'V', S is not referenced. */ /* SEP (output) DOUBLE PRECISION array, dimension (MM) */ /* If JOB = 'V' or 'B', the estimated reciprocal condition */ /* numbers of the selected eigenvectors, stored in consecutive */ /* elements of the array. For a complex eigenvector two */ /* consecutive elements of SEP are set to the same value. If */ /* the eigenvalues cannot be reordered to compute SEP(j), SEP(j) */ /* is set to 0; this can only occur when the true value would be */ /* very small anyway. */ /* If JOB = 'E', SEP is not referenced. */ /* MM (input) INTEGER */ /* The number of elements in the arrays S (if JOB = 'E' or 'B') */ /* and/or SEP (if JOB = 'V' or 'B'). MM >= M. */ /* M (output) INTEGER */ /* The number of elements of the arrays S and/or SEP actually */ /* used to store the estimated condition numbers. */ /* If HOWMNY = 'A', M is set to N. */ /* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,N+6) */ /* If JOB = 'E', WORK is not referenced. */ /* LDWORK (input) INTEGER */ /* The leading dimension of the array WORK. */ /* LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. */ /* IWORK (workspace) INTEGER array, dimension (2*(N-1)) */ /* If JOB = 'E', IWORK is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The reciprocal of the condition number of an eigenvalue lambda is */ /* defined as */ /* S(lambda) = |v'*u| / (norm(u)*norm(v)) */ /* where u and v are the right and left eigenvectors of T corresponding */ /* to lambda; v' denotes the conjugate-transpose of v, and norm(u) */ /* denotes the Euclidean norm. These reciprocal condition numbers always */ /* lie between zero (very badly conditioned) and one (very well */ /* conditioned). If n = 1, S(lambda) is defined to be 1. */ /* An approximate error bound for a computed eigenvalue W(i) is given by */ /* EPS * norm(T) / S(i) */ /* where EPS is the machine precision. */ /* The reciprocal of the condition number of the right eigenvector u */ /* corresponding to lambda is defined as follows. Suppose */ /* T = ( lambda c ) */ /* ( 0 T22 ) */ /* Then the reciprocal condition number is */ /* SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) */ /* where sigma-min denotes the smallest singular value. We approximate */ /* the smallest singular value by the reciprocal of an estimate of the */ /* one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is */ /* defined to be abs(T(1,1)). */ /* An approximate error bound for a computed right eigenvector VR(i) */ /* is given by */ /* EPS * norm(T) / SEP(i) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test the input parameters */ /* Parameter adjustments */ --select; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --s; --sep; work_dim1 = *ldwork; work_offset = 1 + work_dim1; work -= work_offset; --iwork; /* Function Body */ wantbh = _starpu_lsame_(job, "B"); wants = _starpu_lsame_(job, "E") || wantbh; wantsp = _starpu_lsame_(job, "V") || wantbh; somcon = _starpu_lsame_(howmny, "S"); *info = 0; if (! wants && ! wantsp) { *info = -1; } else if (! _starpu_lsame_(howmny, "A") && ! somcon) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*ldt < max(1,*n)) { *info = -6; } else if (*ldvl < 1 || wants && *ldvl < *n) { *info = -8; } else if (*ldvr < 1 || wants && *ldvr < *n) { *info = -10; } else { /* Set M to the number of eigenpairs for which condition numbers */ /* are required, and test MM. */ if (somcon) { *m = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { if (k < *n) { if (t[k + 1 + k * t_dim1] == 0.) { if (select[k]) { ++(*m); } } else { pair = TRUE_; if (select[k] || select[k + 1]) { *m += 2; } } } else { if (select[*n]) { ++(*m); } } } /* L10: */ } } else { *m = *n; } if (*mm < *m) { *info = -13; } else if (*ldwork < 1 || wantsp && *ldwork < *n) { *info = -16; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRSNA", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (somcon) { if (! select[1]) { return 0; } } if (wants) { s[1] = 1.; } if (wantsp) { sep[1] = (d__1 = t[t_dim1 + 1], abs(d__1)); } return 0; } /* Get machine constants */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S") / eps; bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); ks = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block. */ if (pair) { pair = FALSE_; goto L60; } else { if (k < *n) { pair = t[k + 1 + k * t_dim1] != 0.; } } /* Determine whether condition numbers are required for the k-th */ /* eigenpair. */ if (somcon) { if (pair) { if (! select[k] && ! select[k + 1]) { goto L60; } } else { if (! select[k]) { goto L60; } } } ++ks; if (wants) { /* Compute the reciprocal condition number of the k-th */ /* eigenvalue. */ if (! pair) { /* Real eigenvalue. */ prod = _starpu_ddot_(n, &vr[ks * vr_dim1 + 1], &c__1, &vl[ks * vl_dim1 + 1], &c__1); rnrm = _starpu_dnrm2_(n, &vr[ks * vr_dim1 + 1], &c__1); lnrm = _starpu_dnrm2_(n, &vl[ks * vl_dim1 + 1], &c__1); s[ks] = abs(prod) / (rnrm * lnrm); } else { /* Complex eigenvalue. */ prod1 = _starpu_ddot_(n, &vr[ks * vr_dim1 + 1], &c__1, &vl[ks * vl_dim1 + 1], &c__1); prod1 += _starpu_ddot_(n, &vr[(ks + 1) * vr_dim1 + 1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], &c__1); prod2 = _starpu_ddot_(n, &vl[ks * vl_dim1 + 1], &c__1, &vr[(ks + 1) * vr_dim1 + 1], &c__1); prod2 -= _starpu_ddot_(n, &vl[(ks + 1) * vl_dim1 + 1], &c__1, &vr[ks * vr_dim1 + 1], &c__1); d__1 = _starpu_dnrm2_(n, &vr[ks * vr_dim1 + 1], &c__1); d__2 = _starpu_dnrm2_(n, &vr[(ks + 1) * vr_dim1 + 1], &c__1); rnrm = _starpu_dlapy2_(&d__1, &d__2); d__1 = _starpu_dnrm2_(n, &vl[ks * vl_dim1 + 1], &c__1); d__2 = _starpu_dnrm2_(n, &vl[(ks + 1) * vl_dim1 + 1], &c__1); lnrm = _starpu_dlapy2_(&d__1, &d__2); cond = _starpu_dlapy2_(&prod1, &prod2) / (rnrm * lnrm); s[ks] = cond; s[ks + 1] = cond; } } if (wantsp) { /* Estimate the reciprocal condition number of the k-th */ /* eigenvector. */ /* Copy the matrix T to the array WORK and swap the diagonal */ /* block beginning at T(k,k) to the (1,1) position. */ _starpu_dlacpy_("Full", n, n, &t[t_offset], ldt, &work[work_offset], ldwork); ifst = k; ilst = 1; _starpu_dtrexc_("No Q", n, &work[work_offset], ldwork, dummy, &c__1, & ifst, &ilst, &work[(*n + 1) * work_dim1 + 1], &ierr); if (ierr == 1 || ierr == 2) { /* Could not swap because blocks not well separated */ scale = 1.; est = bignum; } else { /* Reordering successful */ if (work[work_dim1 + 2] == 0.) { /* Form C = T22 - lambda*I in WORK(2:N,2:N). */ i__2 = *n; for (i__ = 2; i__ <= i__2; ++i__) { work[i__ + i__ * work_dim1] -= work[work_dim1 + 1]; /* L20: */ } n2 = 1; nn = *n - 1; } else { /* Triangularize the 2 by 2 block by unitary */ /* transformation U = [ cs i*ss ] */ /* [ i*ss cs ]. */ /* such that the (1,1) position of WORK is complex */ /* eigenvalue lambda with positive imaginary part. (2,2) */ /* position of WORK is the complex eigenvalue lambda */ /* with negative imaginary part. */ mu = sqrt((d__1 = work[(work_dim1 << 1) + 1], abs(d__1))) * sqrt((d__2 = work[work_dim1 + 2], abs(d__2))); delta = _starpu_dlapy2_(&mu, &work[work_dim1 + 2]); cs = mu / delta; sn = -work[work_dim1 + 2] / delta; /* Form */ /* C' = WORK(2:N,2:N) + i*[rwork(1) ..... rwork(n-1) ] */ /* [ mu ] */ /* [ .. ] */ /* [ .. ] */ /* [ mu ] */ /* where C' is conjugate transpose of complex matrix C, */ /* and RWORK is stored starting in the N+1-st column of */ /* WORK. */ i__2 = *n; for (j = 3; j <= i__2; ++j) { work[j * work_dim1 + 2] = cs * work[j * work_dim1 + 2] ; work[j + j * work_dim1] -= work[work_dim1 + 1]; /* L30: */ } work[(work_dim1 << 1) + 2] = 0.; work[(*n + 1) * work_dim1 + 1] = mu * 2.; i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { work[i__ + (*n + 1) * work_dim1] = sn * work[(i__ + 1) * work_dim1 + 1]; /* L40: */ } n2 = 2; nn = *n - 1 << 1; } /* Estimate norm(inv(C')) */ est = 0.; kase = 0; L50: _starpu_dlacn2_(&nn, &work[(*n + 2) * work_dim1 + 1], &work[(*n + 4) * work_dim1 + 1], &iwork[1], &est, &kase, isave); if (kase != 0) { if (kase == 1) { if (n2 == 1) { /* Real eigenvalue: solve C'*x = scale*c. */ i__2 = *n - 1; _starpu_dlaqtr_(&c_true, &c_true, &i__2, &work[(work_dim1 << 1) + 2], ldwork, dummy, &dumm, &scale, &work[(*n + 4) * work_dim1 + 1], &work[(* n + 6) * work_dim1 + 1], &ierr); } else { /* Complex eigenvalue: solve */ /* C'*(p+iq) = scale*(c+id) in real arithmetic. */ i__2 = *n - 1; _starpu_dlaqtr_(&c_true, &c_false, &i__2, &work[( work_dim1 << 1) + 2], ldwork, &work[(*n + 1) * work_dim1 + 1], &mu, &scale, &work[(* n + 4) * work_dim1 + 1], &work[(*n + 6) * work_dim1 + 1], &ierr); } } else { if (n2 == 1) { /* Real eigenvalue: solve C*x = scale*c. */ i__2 = *n - 1; _starpu_dlaqtr_(&c_false, &c_true, &i__2, &work[( work_dim1 << 1) + 2], ldwork, dummy, & dumm, &scale, &work[(*n + 4) * work_dim1 + 1], &work[(*n + 6) * work_dim1 + 1], & ierr); } else { /* Complex eigenvalue: solve */ /* C*(p+iq) = scale*(c+id) in real arithmetic. */ i__2 = *n - 1; _starpu_dlaqtr_(&c_false, &c_false, &i__2, &work[( work_dim1 << 1) + 2], ldwork, &work[(*n + 1) * work_dim1 + 1], &mu, &scale, &work[(* n + 4) * work_dim1 + 1], &work[(*n + 6) * work_dim1 + 1], &ierr); } } goto L50; } } sep[ks] = scale / max(est,smlnum); if (pair) { sep[ks + 1] = sep[ks]; } } if (pair) { ++ks; } L60: ; } return 0; /* End of DTRSNA */ } /* _starpu_dtrsna_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrsyl.c000066400000000000000000001105321507764646700207170ustar00rootroot00000000000000/* dtrsyl.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static logical c_false = FALSE_; static integer c__2 = 2; static doublereal c_b26 = 1.; static doublereal c_b30 = 0.; static logical c_true = TRUE_; /* Subroutine */ int _starpu_dtrsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, doublereal *a, integer *lda, doublereal *b, integer * ldb, doublereal *c__, integer *ldc, doublereal *scale, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2; /* Local variables */ integer j, k, l; doublereal x[4] /* was [2][2] */; integer k1, k2, l1, l2; doublereal a11, db, da11, vec[4] /* was [2][2] */, dum[1], eps, sgn; extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer ierr; doublereal smin, suml, sumr; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); integer knext, lnext; doublereal xnorm; extern /* Subroutine */ int _starpu_dlaln2_(logical *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal * , doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlasy2_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlabad_(doublereal *, doublereal *); extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); doublereal scaloc; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); doublereal bignum; logical notrna, notrnb; doublereal smlnum; /* -- LAPACK routine (version 3.2) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRSYL solves the real Sylvester matrix equation: */ /* op(A)*X + X*op(B) = scale*C or */ /* op(A)*X - X*op(B) = scale*C, */ /* where op(A) = A or A**T, and A and B are both upper quasi- */ /* triangular. A is M-by-M and B is N-by-N; the right hand side C and */ /* the solution X are M-by-N; and scale is an output scale factor, set */ /* <= 1 to avoid overflow in X. */ /* A and B must be in Schur canonical form (as returned by DHSEQR), that */ /* is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; */ /* each 2-by-2 diagonal block has its diagonal elements equal and its */ /* off-diagonal elements of opposite sign. */ /* Arguments */ /* ========= */ /* TRANA (input) CHARACTER*1 */ /* Specifies the option op(A): */ /* = 'N': op(A) = A (No transpose) */ /* = 'T': op(A) = A**T (Transpose) */ /* = 'C': op(A) = A**H (Conjugate transpose = Transpose) */ /* TRANB (input) CHARACTER*1 */ /* Specifies the option op(B): */ /* = 'N': op(B) = B (No transpose) */ /* = 'T': op(B) = B**T (Transpose) */ /* = 'C': op(B) = B**H (Conjugate transpose = Transpose) */ /* ISGN (input) INTEGER */ /* Specifies the sign in the equation: */ /* = +1: solve op(A)*X + X*op(B) = scale*C */ /* = -1: solve op(A)*X - X*op(B) = scale*C */ /* M (input) INTEGER */ /* The order of the matrix A, and the number of rows in the */ /* matrices X and C. M >= 0. */ /* N (input) INTEGER */ /* The order of the matrix B, and the number of columns in the */ /* matrices X and C. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,M) */ /* The upper quasi-triangular matrix A, in Schur canonical form. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input) DOUBLE PRECISION array, dimension (LDB,N) */ /* The upper quasi-triangular matrix B, in Schur canonical form. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */ /* On entry, the M-by-N right hand side matrix C. */ /* On exit, C is overwritten by the solution matrix X. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M) */ /* SCALE (output) DOUBLE PRECISION */ /* The scale factor, scale, set <= 1 to avoid overflow in X. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* = 1: A and B have common or very close eigenvalues; perturbed */ /* values were used to solve the equation (but the matrices */ /* A and B are unchanged). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and Test input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; /* Function Body */ notrna = _starpu_lsame_(trana, "N"); notrnb = _starpu_lsame_(tranb, "N"); *info = 0; if (! notrna && ! _starpu_lsame_(trana, "T") && ! _starpu_lsame_( trana, "C")) { *info = -1; } else if (! notrnb && ! _starpu_lsame_(tranb, "T") && ! _starpu_lsame_(tranb, "C")) { *info = -2; } else if (*isgn != 1 && *isgn != -1) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*m)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldc < max(1,*m)) { *info = -11; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRSYL", &i__1); return 0; } /* Quick return if possible */ *scale = 1.; if (*m == 0 || *n == 0) { return 0; } /* Set constants to control overflow */ eps = _starpu_dlamch_("P"); smlnum = _starpu_dlamch_("S"); bignum = 1. / smlnum; _starpu_dlabad_(&smlnum, &bignum); smlnum = smlnum * (doublereal) (*m * *n) / eps; bignum = 1. / smlnum; /* Computing MAX */ d__1 = smlnum, d__2 = eps * _starpu_dlange_("M", m, m, &a[a_offset], lda, dum), d__1 = max(d__1,d__2), d__2 = eps * _starpu_dlange_("M", n, n, &b[b_offset], ldb, dum); smin = max(d__1,d__2); sgn = (doublereal) (*isgn); if (notrna && notrnb) { /* Solve A*X + ISGN*X*B = scale*C. */ /* The (K,L)th block of X is determined starting from */ /* bottom-left corner column by column by */ /* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ /* Where */ /* M L-1 */ /* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. */ /* I=K+1 J=1 */ /* Start column loop (index = L) */ /* L1 (L2) : column index of the first (first) row of X(K,L). */ lnext = 1; i__1 = *n; for (l = 1; l <= i__1; ++l) { if (l < lnext) { goto L60; } if (l == *n) { l1 = l; l2 = l; } else { if (b[l + 1 + l * b_dim1] != 0.) { l1 = l; l2 = l + 1; lnext = l + 2; } else { l1 = l; l2 = l; lnext = l + 1; } } /* Start row loop (index = K) */ /* K1 (K2): row index of the first (last) row of X(K,L). */ knext = *m; for (k = *m; k >= 1; --k) { if (k > knext) { goto L50; } if (k == 1) { k1 = k; k2 = k; } else { if (a[k + (k - 1) * a_dim1] != 0.) { k1 = k - 1; k2 = k; knext = k - 2; } else { k1 = k; k2 = k; knext = k - 1; } } if (l1 == l2 && k1 == k2) { i__2 = *m - k1; /* Computing MIN */ i__3 = k1 + 1; /* Computing MIN */ i__4 = k1 + 1; suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & c__[min(i__4, *m)+ l1 * c_dim1], &c__1); i__2 = l1 - 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); scaloc = 1.; a11 = a[k1 + k1 * a_dim1] + sgn * b[l1 + l1 * b_dim1]; da11 = abs(a11); if (da11 <= smin) { a11 = smin; da11 = smin; *info = 1; } db = abs(vec[0]); if (da11 < 1. && db > 1.) { if (db > bignum * da11) { scaloc = 1. / db; } } x[0] = vec[0] * scaloc / a11; if (scaloc != 1.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L10: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; } else if (l1 == l2 && k1 != k2) { i__2 = *m - k2; /* Computing MIN */ i__3 = k2 + 1; /* Computing MIN */ i__4 = k2 + 1; suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & c__[min(i__4, *m)+ l1 * c_dim1], &c__1); i__2 = l1 - 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); i__2 = *m - k2; /* Computing MIN */ i__3 = k2 + 1; /* Computing MIN */ i__4 = k2 + 1; suml = _starpu_ddot_(&i__2, &a[k2 + min(i__3, *m)* a_dim1], lda, & c__[min(i__4, *m)+ l1 * c_dim1], &c__1); i__2 = l1 - 1; sumr = _starpu_ddot_(&i__2, &c__[k2 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); d__1 = -sgn * b[l1 + l1 * b_dim1]; _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b26, &a[k1 + k1 * a_dim1], lda, &c_b26, &c_b26, vec, &c__2, &d__1, &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L20: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k2 + l1 * c_dim1] = x[1]; } else if (l1 != l2 && k1 == k2) { i__2 = *m - k1; /* Computing MIN */ i__3 = k1 + 1; /* Computing MIN */ i__4 = k1 + 1; suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & c__[min(i__4, *m)+ l1 * c_dim1], &c__1); i__2 = l1 - 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[0] = sgn * (c__[k1 + l1 * c_dim1] - (suml + sgn * sumr)); i__2 = *m - k1; /* Computing MIN */ i__3 = k1 + 1; /* Computing MIN */ i__4 = k1 + 1; suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & c__[min(i__4, *m)+ l2 * c_dim1], &c__1); i__2 = l1 - 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l2 * b_dim1 + 1], &c__1); vec[1] = sgn * (c__[k1 + l2 * c_dim1] - (suml + sgn * sumr)); d__1 = -sgn * a[k1 + k1 * a_dim1]; _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b26, &b[l1 + l1 * b_dim1], ldb, &c_b26, &c_b26, vec, &c__2, &d__1, &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L30: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k1 + l2 * c_dim1] = x[1]; } else if (l1 != l2 && k1 != k2) { i__2 = *m - k2; /* Computing MIN */ i__3 = k2 + 1; /* Computing MIN */ i__4 = k2 + 1; suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & c__[min(i__4, *m)+ l1 * c_dim1], &c__1); i__2 = l1 - 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); i__2 = *m - k2; /* Computing MIN */ i__3 = k2 + 1; /* Computing MIN */ i__4 = k2 + 1; suml = _starpu_ddot_(&i__2, &a[k1 + min(i__3, *m)* a_dim1], lda, & c__[min(i__4, *m)+ l2 * c_dim1], &c__1); i__2 = l1 - 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + c_dim1], ldc, &b[l2 * b_dim1 + 1], &c__1); vec[2] = c__[k1 + l2 * c_dim1] - (suml + sgn * sumr); i__2 = *m - k2; /* Computing MIN */ i__3 = k2 + 1; /* Computing MIN */ i__4 = k2 + 1; suml = _starpu_ddot_(&i__2, &a[k2 + min(i__3, *m)* a_dim1], lda, & c__[min(i__4, *m)+ l1 * c_dim1], &c__1); i__2 = l1 - 1; sumr = _starpu_ddot_(&i__2, &c__[k2 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); i__2 = *m - k2; /* Computing MIN */ i__3 = k2 + 1; /* Computing MIN */ i__4 = k2 + 1; suml = _starpu_ddot_(&i__2, &a[k2 + min(i__3, *m)* a_dim1], lda, & c__[min(i__4, *m)+ l2 * c_dim1], &c__1); i__2 = l1 - 1; sumr = _starpu_ddot_(&i__2, &c__[k2 + c_dim1], ldc, &b[l2 * b_dim1 + 1], &c__1); vec[3] = c__[k2 + l2 * c_dim1] - (suml + sgn * sumr); _starpu_dlasy2_(&c_false, &c_false, isgn, &c__2, &c__2, &a[k1 + k1 * a_dim1], lda, &b[l1 + l1 * b_dim1], ldb, vec, &c__2, &scaloc, x, &c__2, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L40: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k1 + l2 * c_dim1] = x[2]; c__[k2 + l1 * c_dim1] = x[1]; c__[k2 + l2 * c_dim1] = x[3]; } L50: ; } L60: ; } } else if (! notrna && notrnb) { /* Solve A' *X + ISGN*X*B = scale*C. */ /* The (K,L)th block of X is determined starting from */ /* upper-left corner column by column by */ /* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) */ /* Where */ /* K-1 L-1 */ /* R(K,L) = SUM [A(I,K)'*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] */ /* I=1 J=1 */ /* Start column loop (index = L) */ /* L1 (L2): column index of the first (last) row of X(K,L) */ lnext = 1; i__1 = *n; for (l = 1; l <= i__1; ++l) { if (l < lnext) { goto L120; } if (l == *n) { l1 = l; l2 = l; } else { if (b[l + 1 + l * b_dim1] != 0.) { l1 = l; l2 = l + 1; lnext = l + 2; } else { l1 = l; l2 = l; lnext = l + 1; } } /* Start row loop (index = K) */ /* K1 (K2): row index of the first (last) row of X(K,L) */ knext = 1; i__2 = *m; for (k = 1; k <= i__2; ++k) { if (k < knext) { goto L110; } if (k == *m) { k1 = k; k2 = k; } else { if (a[k + 1 + k * a_dim1] != 0.) { k1 = k; k2 = k + 1; knext = k + 2; } else { k1 = k; k2 = k; knext = k + 1; } } if (l1 == l2 && k1 == k2) { i__3 = k1 - 1; suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__3 = l1 - 1; sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); scaloc = 1.; a11 = a[k1 + k1 * a_dim1] + sgn * b[l1 + l1 * b_dim1]; da11 = abs(a11); if (da11 <= smin) { a11 = smin; da11 = smin; *info = 1; } db = abs(vec[0]); if (da11 < 1. && db > 1.) { if (db > bignum * da11) { scaloc = 1. / db; } } x[0] = vec[0] * scaloc / a11; if (scaloc != 1.) { i__3 = *n; for (j = 1; j <= i__3; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L70: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; } else if (l1 == l2 && k1 != k2) { i__3 = k1 - 1; suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__3 = l1 - 1; sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); i__3 = k1 - 1; suml = _starpu_ddot_(&i__3, &a[k2 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__3 = l1 - 1; sumr = _starpu_ddot_(&i__3, &c__[k2 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); d__1 = -sgn * b[l1 + l1 * b_dim1]; _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b26, &a[k1 + k1 * a_dim1], lda, &c_b26, &c_b26, vec, &c__2, &d__1, &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__3 = *n; for (j = 1; j <= i__3; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L80: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k2 + l1 * c_dim1] = x[1]; } else if (l1 != l2 && k1 == k2) { i__3 = k1 - 1; suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__3 = l1 - 1; sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[0] = sgn * (c__[k1 + l1 * c_dim1] - (suml + sgn * sumr)); i__3 = k1 - 1; suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l2 * c_dim1 + 1], &c__1); i__3 = l1 - 1; sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l2 * b_dim1 + 1], &c__1); vec[1] = sgn * (c__[k1 + l2 * c_dim1] - (suml + sgn * sumr)); d__1 = -sgn * a[k1 + k1 * a_dim1]; _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b26, &b[l1 + l1 * b_dim1], ldb, &c_b26, &c_b26, vec, &c__2, &d__1, &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__3 = *n; for (j = 1; j <= i__3; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L90: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k1 + l2 * c_dim1] = x[1]; } else if (l1 != l2 && k1 != k2) { i__3 = k1 - 1; suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__3 = l1 - 1; sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); i__3 = k1 - 1; suml = _starpu_ddot_(&i__3, &a[k1 * a_dim1 + 1], &c__1, &c__[l2 * c_dim1 + 1], &c__1); i__3 = l1 - 1; sumr = _starpu_ddot_(&i__3, &c__[k1 + c_dim1], ldc, &b[l2 * b_dim1 + 1], &c__1); vec[2] = c__[k1 + l2 * c_dim1] - (suml + sgn * sumr); i__3 = k1 - 1; suml = _starpu_ddot_(&i__3, &a[k2 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__3 = l1 - 1; sumr = _starpu_ddot_(&i__3, &c__[k2 + c_dim1], ldc, &b[l1 * b_dim1 + 1], &c__1); vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); i__3 = k1 - 1; suml = _starpu_ddot_(&i__3, &a[k2 * a_dim1 + 1], &c__1, &c__[l2 * c_dim1 + 1], &c__1); i__3 = l1 - 1; sumr = _starpu_ddot_(&i__3, &c__[k2 + c_dim1], ldc, &b[l2 * b_dim1 + 1], &c__1); vec[3] = c__[k2 + l2 * c_dim1] - (suml + sgn * sumr); _starpu_dlasy2_(&c_true, &c_false, isgn, &c__2, &c__2, &a[k1 + k1 * a_dim1], lda, &b[l1 + l1 * b_dim1], ldb, vec, & c__2, &scaloc, x, &c__2, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__3 = *n; for (j = 1; j <= i__3; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L100: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k1 + l2 * c_dim1] = x[2]; c__[k2 + l1 * c_dim1] = x[1]; c__[k2 + l2 * c_dim1] = x[3]; } L110: ; } L120: ; } } else if (! notrna && ! notrnb) { /* Solve A'*X + ISGN*X*B' = scale*C. */ /* The (K,L)th block of X is determined starting from */ /* top-right corner column by column by */ /* A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) */ /* Where */ /* K-1 N */ /* R(K,L) = SUM [A(I,K)'*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. */ /* I=1 J=L+1 */ /* Start column loop (index = L) */ /* L1 (L2): column index of the first (last) row of X(K,L) */ lnext = *n; for (l = *n; l >= 1; --l) { if (l > lnext) { goto L180; } if (l == 1) { l1 = l; l2 = l; } else { if (b[l + (l - 1) * b_dim1] != 0.) { l1 = l - 1; l2 = l; lnext = l - 2; } else { l1 = l; l2 = l; lnext = l - 1; } } /* Start row loop (index = K) */ /* K1 (K2): row index of the first (last) row of X(K,L) */ knext = 1; i__1 = *m; for (k = 1; k <= i__1; ++k) { if (k < knext) { goto L170; } if (k == *m) { k1 = k; k2 = k; } else { if (a[k + 1 + k * a_dim1] != 0.) { k1 = k; k2 = k + 1; knext = k + 2; } else { k1 = k; k2 = k; knext = k + 1; } } if (l1 == l2 && k1 == k2) { i__2 = k1 - 1; suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__2 = *n - l1; /* Computing MIN */ i__3 = l1 + 1; /* Computing MIN */ i__4 = l1 + 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, &b[l1 + min(i__4, *n)* b_dim1], ldb); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); scaloc = 1.; a11 = a[k1 + k1 * a_dim1] + sgn * b[l1 + l1 * b_dim1]; da11 = abs(a11); if (da11 <= smin) { a11 = smin; da11 = smin; *info = 1; } db = abs(vec[0]); if (da11 < 1. && db > 1.) { if (db > bignum * da11) { scaloc = 1. / db; } } x[0] = vec[0] * scaloc / a11; if (scaloc != 1.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L130: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; } else if (l1 == l2 && k1 != k2) { i__2 = k1 - 1; suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__2 = *n - l2; /* Computing MIN */ i__3 = l2 + 1; /* Computing MIN */ i__4 = l2 + 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, &b[l1 + min(i__4, *n)* b_dim1], ldb); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); i__2 = k1 - 1; suml = _starpu_ddot_(&i__2, &a[k2 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__2 = *n - l2; /* Computing MIN */ i__3 = l2 + 1; /* Computing MIN */ i__4 = l2 + 1; sumr = _starpu_ddot_(&i__2, &c__[k2 + min(i__3, *n)* c_dim1], ldc, &b[l1 + min(i__4, *n)* b_dim1], ldb); vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); d__1 = -sgn * b[l1 + l1 * b_dim1]; _starpu_dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b26, &a[k1 + k1 * a_dim1], lda, &c_b26, &c_b26, vec, &c__2, &d__1, &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L140: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k2 + l1 * c_dim1] = x[1]; } else if (l1 != l2 && k1 == k2) { i__2 = k1 - 1; suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__2 = *n - l2; /* Computing MIN */ i__3 = l2 + 1; /* Computing MIN */ i__4 = l2 + 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, &b[l1 + min(i__4, *n)* b_dim1], ldb); vec[0] = sgn * (c__[k1 + l1 * c_dim1] - (suml + sgn * sumr)); i__2 = k1 - 1; suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l2 * c_dim1 + 1], &c__1); i__2 = *n - l2; /* Computing MIN */ i__3 = l2 + 1; /* Computing MIN */ i__4 = l2 + 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, &b[l2 + min(i__4, *n)* b_dim1], ldb); vec[1] = sgn * (c__[k1 + l2 * c_dim1] - (suml + sgn * sumr)); d__1 = -sgn * a[k1 + k1 * a_dim1]; _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b26, &b[l1 + l1 * b_dim1], ldb, &c_b26, &c_b26, vec, &c__2, &d__1, &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L150: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k1 + l2 * c_dim1] = x[1]; } else if (l1 != l2 && k1 != k2) { i__2 = k1 - 1; suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__2 = *n - l2; /* Computing MIN */ i__3 = l2 + 1; /* Computing MIN */ i__4 = l2 + 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, &b[l1 + min(i__4, *n)* b_dim1], ldb); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); i__2 = k1 - 1; suml = _starpu_ddot_(&i__2, &a[k1 * a_dim1 + 1], &c__1, &c__[l2 * c_dim1 + 1], &c__1); i__2 = *n - l2; /* Computing MIN */ i__3 = l2 + 1; /* Computing MIN */ i__4 = l2 + 1; sumr = _starpu_ddot_(&i__2, &c__[k1 + min(i__3, *n)* c_dim1], ldc, &b[l2 + min(i__4, *n)* b_dim1], ldb); vec[2] = c__[k1 + l2 * c_dim1] - (suml + sgn * sumr); i__2 = k1 - 1; suml = _starpu_ddot_(&i__2, &a[k2 * a_dim1 + 1], &c__1, &c__[l1 * c_dim1 + 1], &c__1); i__2 = *n - l2; /* Computing MIN */ i__3 = l2 + 1; /* Computing MIN */ i__4 = l2 + 1; sumr = _starpu_ddot_(&i__2, &c__[k2 + min(i__3, *n)* c_dim1], ldc, &b[l1 + min(i__4, *n)* b_dim1], ldb); vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); i__2 = k1 - 1; suml = _starpu_ddot_(&i__2, &a[k2 * a_dim1 + 1], &c__1, &c__[l2 * c_dim1 + 1], &c__1); i__2 = *n - l2; /* Computing MIN */ i__3 = l2 + 1; /* Computing MIN */ i__4 = l2 + 1; sumr = _starpu_ddot_(&i__2, &c__[k2 + min(i__3, *n)* c_dim1], ldc, &b[l2 + min(i__4, *n)* b_dim1], ldb); vec[3] = c__[k2 + l2 * c_dim1] - (suml + sgn * sumr); _starpu_dlasy2_(&c_true, &c_true, isgn, &c__2, &c__2, &a[k1 + k1 * a_dim1], lda, &b[l1 + l1 * b_dim1], ldb, vec, & c__2, &scaloc, x, &c__2, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__2 = *n; for (j = 1; j <= i__2; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L160: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k1 + l2 * c_dim1] = x[2]; c__[k2 + l1 * c_dim1] = x[1]; c__[k2 + l2 * c_dim1] = x[3]; } L170: ; } L180: ; } } else if (notrna && ! notrnb) { /* Solve A*X + ISGN*X*B' = scale*C. */ /* The (K,L)th block of X is determined starting from */ /* bottom-right corner column by column by */ /* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L) */ /* Where */ /* M N */ /* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)']. */ /* I=K+1 J=L+1 */ /* Start column loop (index = L) */ /* L1 (L2): column index of the first (last) row of X(K,L) */ lnext = *n; for (l = *n; l >= 1; --l) { if (l > lnext) { goto L240; } if (l == 1) { l1 = l; l2 = l; } else { if (b[l + (l - 1) * b_dim1] != 0.) { l1 = l - 1; l2 = l; lnext = l - 2; } else { l1 = l; l2 = l; lnext = l - 1; } } /* Start row loop (index = K) */ /* K1 (K2): row index of the first (last) row of X(K,L) */ knext = *m; for (k = *m; k >= 1; --k) { if (k > knext) { goto L230; } if (k == 1) { k1 = k; k2 = k; } else { if (a[k + (k - 1) * a_dim1] != 0.) { k1 = k - 1; k2 = k; knext = k - 2; } else { k1 = k; k2 = k; knext = k - 1; } } if (l1 == l2 && k1 == k2) { i__1 = *m - k1; /* Computing MIN */ i__2 = k1 + 1; /* Computing MIN */ i__3 = k1 + 1; suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & c__[min(i__3, *m)+ l1 * c_dim1], &c__1); i__1 = *n - l1; /* Computing MIN */ i__2 = l1 + 1; /* Computing MIN */ i__3 = l1 + 1; sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, &b[l1 + min(i__3, *n)* b_dim1], ldb); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); scaloc = 1.; a11 = a[k1 + k1 * a_dim1] + sgn * b[l1 + l1 * b_dim1]; da11 = abs(a11); if (da11 <= smin) { a11 = smin; da11 = smin; *info = 1; } db = abs(vec[0]); if (da11 < 1. && db > 1.) { if (db > bignum * da11) { scaloc = 1. / db; } } x[0] = vec[0] * scaloc / a11; if (scaloc != 1.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L190: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; } else if (l1 == l2 && k1 != k2) { i__1 = *m - k2; /* Computing MIN */ i__2 = k2 + 1; /* Computing MIN */ i__3 = k2 + 1; suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & c__[min(i__3, *m)+ l1 * c_dim1], &c__1); i__1 = *n - l2; /* Computing MIN */ i__2 = l2 + 1; /* Computing MIN */ i__3 = l2 + 1; sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, &b[l1 + min(i__3, *n)* b_dim1], ldb); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); i__1 = *m - k2; /* Computing MIN */ i__2 = k2 + 1; /* Computing MIN */ i__3 = k2 + 1; suml = _starpu_ddot_(&i__1, &a[k2 + min(i__2, *m)* a_dim1], lda, & c__[min(i__3, *m)+ l1 * c_dim1], &c__1); i__1 = *n - l2; /* Computing MIN */ i__2 = l2 + 1; /* Computing MIN */ i__3 = l2 + 1; sumr = _starpu_ddot_(&i__1, &c__[k2 + min(i__2, *n)* c_dim1], ldc, &b[l1 + min(i__3, *n)* b_dim1], ldb); vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); d__1 = -sgn * b[l1 + l1 * b_dim1]; _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b26, &a[k1 + k1 * a_dim1], lda, &c_b26, &c_b26, vec, &c__2, &d__1, &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L200: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k2 + l1 * c_dim1] = x[1]; } else if (l1 != l2 && k1 == k2) { i__1 = *m - k1; /* Computing MIN */ i__2 = k1 + 1; /* Computing MIN */ i__3 = k1 + 1; suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & c__[min(i__3, *m)+ l1 * c_dim1], &c__1); i__1 = *n - l2; /* Computing MIN */ i__2 = l2 + 1; /* Computing MIN */ i__3 = l2 + 1; sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, &b[l1 + min(i__3, *n)* b_dim1], ldb); vec[0] = sgn * (c__[k1 + l1 * c_dim1] - (suml + sgn * sumr)); i__1 = *m - k1; /* Computing MIN */ i__2 = k1 + 1; /* Computing MIN */ i__3 = k1 + 1; suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & c__[min(i__3, *m)+ l2 * c_dim1], &c__1); i__1 = *n - l2; /* Computing MIN */ i__2 = l2 + 1; /* Computing MIN */ i__3 = l2 + 1; sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, &b[l2 + min(i__3, *n)* b_dim1], ldb); vec[1] = sgn * (c__[k1 + l2 * c_dim1] - (suml + sgn * sumr)); d__1 = -sgn * a[k1 + k1 * a_dim1]; _starpu_dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b26, &b[l1 + l1 * b_dim1], ldb, &c_b26, &c_b26, vec, &c__2, &d__1, &c_b30, x, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L210: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k1 + l2 * c_dim1] = x[1]; } else if (l1 != l2 && k1 != k2) { i__1 = *m - k2; /* Computing MIN */ i__2 = k2 + 1; /* Computing MIN */ i__3 = k2 + 1; suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & c__[min(i__3, *m)+ l1 * c_dim1], &c__1); i__1 = *n - l2; /* Computing MIN */ i__2 = l2 + 1; /* Computing MIN */ i__3 = l2 + 1; sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, &b[l1 + min(i__3, *n)* b_dim1], ldb); vec[0] = c__[k1 + l1 * c_dim1] - (suml + sgn * sumr); i__1 = *m - k2; /* Computing MIN */ i__2 = k2 + 1; /* Computing MIN */ i__3 = k2 + 1; suml = _starpu_ddot_(&i__1, &a[k1 + min(i__2, *m)* a_dim1], lda, & c__[min(i__3, *m)+ l2 * c_dim1], &c__1); i__1 = *n - l2; /* Computing MIN */ i__2 = l2 + 1; /* Computing MIN */ i__3 = l2 + 1; sumr = _starpu_ddot_(&i__1, &c__[k1 + min(i__2, *n)* c_dim1], ldc, &b[l2 + min(i__3, *n)* b_dim1], ldb); vec[2] = c__[k1 + l2 * c_dim1] - (suml + sgn * sumr); i__1 = *m - k2; /* Computing MIN */ i__2 = k2 + 1; /* Computing MIN */ i__3 = k2 + 1; suml = _starpu_ddot_(&i__1, &a[k2 + min(i__2, *m)* a_dim1], lda, & c__[min(i__3, *m)+ l1 * c_dim1], &c__1); i__1 = *n - l2; /* Computing MIN */ i__2 = l2 + 1; /* Computing MIN */ i__3 = l2 + 1; sumr = _starpu_ddot_(&i__1, &c__[k2 + min(i__2, *n)* c_dim1], ldc, &b[l1 + min(i__3, *n)* b_dim1], ldb); vec[1] = c__[k2 + l1 * c_dim1] - (suml + sgn * sumr); i__1 = *m - k2; /* Computing MIN */ i__2 = k2 + 1; /* Computing MIN */ i__3 = k2 + 1; suml = _starpu_ddot_(&i__1, &a[k2 + min(i__2, *m)* a_dim1], lda, & c__[min(i__3, *m)+ l2 * c_dim1], &c__1); i__1 = *n - l2; /* Computing MIN */ i__2 = l2 + 1; /* Computing MIN */ i__3 = l2 + 1; sumr = _starpu_ddot_(&i__1, &c__[k2 + min(i__2, *n)* c_dim1], ldc, &b[l2 + min(i__3, *n)* b_dim1], ldb); vec[3] = c__[k2 + l2 * c_dim1] - (suml + sgn * sumr); _starpu_dlasy2_(&c_false, &c_true, isgn, &c__2, &c__2, &a[k1 + k1 * a_dim1], lda, &b[l1 + l1 * b_dim1], ldb, vec, & c__2, &scaloc, x, &c__2, &xnorm, &ierr); if (ierr != 0) { *info = 1; } if (scaloc != 1.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { _starpu_dscal_(m, &scaloc, &c__[j * c_dim1 + 1], &c__1); /* L220: */ } *scale *= scaloc; } c__[k1 + l1 * c_dim1] = x[0]; c__[k1 + l2 * c_dim1] = x[2]; c__[k2 + l1 * c_dim1] = x[1]; c__[k2 + l2 * c_dim1] = x[3]; } L230: ; } L240: ; } } return 0; /* End of DTRSYL */ } /* _starpu_dtrsyl_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrti2.c000066400000000000000000000121351507764646700206060ustar00rootroot00000000000000/* dtrti2.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_dtrti2_(char *uplo, char *diag, integer *n, doublereal * a, integer *lda, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer j; doublereal ajj; extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, integer *); extern logical _starpu_lsame_(char *, char *); logical upper; extern /* Subroutine */ int _starpu_dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *); logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRTI2 computes the inverse of a real upper or lower triangular */ /* matrix. */ /* This is the Level 2 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the matrix A is upper or lower triangular. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* DIAG (input) CHARACTER*1 */ /* Specifies whether or not the matrix A is unit triangular. */ /* = 'N': Non-unit triangular */ /* = 'U': Unit triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the triangular matrix A. If UPLO = 'U', the */ /* leading n by n upper triangular part of the array A contains */ /* the upper triangular matrix, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n by n lower triangular part of the array A contains */ /* the lower triangular matrix, and the strictly upper */ /* triangular part of A is not referenced. If DIAG = 'U', the */ /* diagonal elements of A are also not referenced and are */ /* assumed to be 1. */ /* On exit, the (triangular) inverse of the original matrix, in */ /* the same storage format. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); nounit = _starpu_lsame_(diag, "N"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRTI2", &i__1); return 0; } if (upper) { /* Compute inverse of upper triangular matrix. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (nounit) { a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; ajj = -a[j + j * a_dim1]; } else { ajj = -1.; } /* Compute elements 1:j-1 of j-th column. */ i__2 = j - 1; _starpu_dtrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & a[j * a_dim1 + 1], &c__1); i__2 = j - 1; _starpu_dscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); /* L10: */ } } else { /* Compute inverse of lower triangular matrix. */ for (j = *n; j >= 1; --j) { if (nounit) { a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; ajj = -a[j + j * a_dim1]; } else { ajj = -1.; } if (j < *n) { /* Compute elements j+1:n of j-th column. */ i__1 = *n - j; _starpu_dtrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); i__1 = *n - j; _starpu_dscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); } /* L20: */ } } return 0; /* End of DTRTI2 */ } /* _starpu_dtrti2_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrtri.c000066400000000000000000000157121507764646700207120ustar00rootroot00000000000000/* dtrtri.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static doublereal c_b18 = 1.; static doublereal c_b22 = -1.; /* Subroutine */ int _starpu_dtrtri_(char *uplo, char *diag, integer *n, doublereal * a, integer *lda, integer *info) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ integer j, jb, nb, nn; extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dtrsm_( char *, char *, char *, char *, integer *, integer *, doublereal * , doublereal *, integer *, doublereal *, integer *); logical upper; extern /* Subroutine */ int _starpu_dtrti2_(char *, char *, integer *, doublereal *, integer *, integer *), _starpu_xerbla_(char *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRTRI computes the inverse of a real upper or lower triangular */ /* matrix A. */ /* This is the Level 3 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the triangular matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of the array A contains */ /* the upper triangular matrix, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of the array A contains */ /* the lower triangular matrix, and the strictly upper */ /* triangular part of A is not referenced. If DIAG = 'U', the */ /* diagonal elements of A are also not referenced and are */ /* assumed to be 1. */ /* On exit, the (triangular) inverse of the original matrix, in */ /* the same storage format. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, A(i,i) is exactly zero. The triangular */ /* matrix is singular and its inverse can not be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = _starpu_lsame_(uplo, "U"); nounit = _starpu_lsame_(diag, "N"); if (! upper && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity if non-unit. */ if (nounit) { i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (a[*info + *info * a_dim1] == 0.) { return 0; } /* L10: */ } *info = 0; } /* Determine the block size for this environment. */ /* Writing concatenation */ i__2[0] = 1, a__1[0] = uplo; i__2[1] = 1, a__1[1] = diag; s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); nb = _starpu_ilaenv_(&c__1, "DTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ _starpu_dtrti2_(uplo, diag, n, &a[a_offset], lda, info); } else { /* Use blocked code */ if (upper) { /* Compute inverse of upper triangular matrix */ i__1 = *n; i__3 = nb; for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { /* Computing MIN */ i__4 = nb, i__5 = *n - j + 1; jb = min(i__4,i__5); /* Compute rows 1:j-1 of current block column */ i__4 = j - 1; _starpu_dtrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & c_b18, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); i__4 = j - 1; _starpu_dtrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & c_b22, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], lda); /* Compute inverse of current diagonal block */ _starpu_dtrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); /* L20: */ } } else { /* Compute inverse of lower triangular matrix */ nn = (*n - 1) / nb * nb + 1; i__3 = -nb; for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { /* Computing MIN */ i__1 = nb, i__4 = *n - j + 1; jb = min(i__1,i__4); if (j + jb <= *n) { /* Compute rows j+jb:n of current block column */ i__1 = *n - j - jb + 1; _starpu_dtrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, &c_b18, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + jb + j * a_dim1], lda); i__1 = *n - j - jb + 1; _starpu_dtrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, &c_b22, &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda); } /* Compute inverse of current diagonal block */ _starpu_dtrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); /* L30: */ } } } return 0; /* End of DTRTRI */ } /* _starpu_dtrtri_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrtrs.c000066400000000000000000000126071507764646700207240ustar00rootroot00000000000000/* dtrtrs.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static doublereal c_b12 = 1.; /* Subroutine */ int _starpu_dtrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer * ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical _starpu_lsame_(char *, char *); extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_( char *, integer *); logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRTRS solves a triangular system of the form */ /* A * X = B or A**T * X = B, */ /* where A is a triangular matrix of order N, and B is an N-by-NRHS */ /* matrix. A check is made to verify that A is nonsingular. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The triangular matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of the array A contains the upper */ /* triangular matrix, and the strictly lower triangular part of */ /* A is not referenced. If UPLO = 'L', the leading N-by-N lower */ /* triangular part of the array A contains the lower triangular */ /* matrix, and the strictly upper triangular part of A is not */ /* referenced. If DIAG = 'U', the diagonal elements of A are */ /* also not referenced and are assumed to be 1. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, if INFO = 0, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element of A is zero, */ /* indicating that the matrix is singular and the solutions */ /* X have not been computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; nounit = _starpu_lsame_(diag, "N"); if (! _starpu_lsame_(uplo, "U") && ! _starpu_lsame_(uplo, "L")) { *info = -1; } else if (! _starpu_lsame_(trans, "N") && ! _starpu_lsame_(trans, "T") && ! _starpu_lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! _starpu_lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity. */ if (nounit) { i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (a[*info + *info * a_dim1] == 0.) { return 0; } /* L10: */ } } *info = 0; /* Solve A * x = b or A' * x = b. */ _starpu_dtrsm_("Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[ b_offset], ldb); return 0; /* End of DTRTRS */ } /* _starpu_dtrtrs_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrttf.c000066400000000000000000000276661507764646700207240ustar00rootroot00000000000000/* dtrttf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtrttf_(char *transr, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *arf, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, k, l, n1, n2, ij, nt, nx2, np1x2; logical normaltransr; extern logical _starpu_lsame_(char *, char *); logical lower; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); logical nisodd; /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRTTF copies a triangular matrix A from standard full format (TR) */ /* to rectangular full packed format (TF) . */ /* Arguments */ /* ========= */ /* TRANSR (input) CHARACTER */ /* = 'N': ARF in Normal form is wanted; */ /* = 'T': ARF in Transpose form is wanted. */ /* UPLO (input) CHARACTER */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N). */ /* On entry, the triangular matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of the array A contains */ /* the upper triangular matrix, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of the array A contains */ /* the lower triangular matrix, and the strictly upper */ /* triangular part of A is not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the matrix A. LDA >= max(1,N). */ /* ARF (output) DOUBLE PRECISION array, dimension (NT). */ /* NT=N*(N+1)/2. On exit, the triangular matrix A in RFP format. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Notes */ /* ===== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* Reference */ /* ========= */ /* ===================================================================== */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda - 1 - 0 + 1; a_offset = 0 + a_dim1 * 0; a -= a_offset; /* Function Body */ *info = 0; normaltransr = _starpu_lsame_(transr, "N"); lower = _starpu_lsame_(uplo, "L"); if (! normaltransr && ! _starpu_lsame_(transr, "T")) { *info = -1; } else if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRTTF", &i__1); return 0; } /* Quick return if possible */ if (*n <= 1) { if (*n == 1) { arf[0] = a[0]; } return 0; } /* Size of array ARF(0:nt-1) */ nt = *n * (*n + 1) / 2; /* Set N1 and N2 depending on LOWER: for N even N1=N2=K */ if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } /* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. */ /* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is */ /* N--by--(N+1)/2. */ if (*n % 2 == 0) { k = *n / 2; nisodd = FALSE_; if (! lower) { np1x2 = *n + *n + 2; } } else { nisodd = TRUE_; if (! lower) { nx2 = *n + *n; } } if (nisodd) { /* N is odd */ if (normaltransr) { /* N is odd and TRANSR = 'N' */ if (lower) { /* N is odd, TRANSR = 'N', and UPLO = 'L' */ ij = 0; i__1 = n2; for (j = 0; j <= i__1; ++j) { i__2 = n2 + j; for (i__ = n1; i__ <= i__2; ++i__) { arf[ij] = a[n2 + j + i__ * a_dim1]; ++ij; } i__2 = *n - 1; for (i__ = j; i__ <= i__2; ++i__) { arf[ij] = a[i__ + j * a_dim1]; ++ij; } } } else { /* N is odd, TRANSR = 'N', and UPLO = 'U' */ ij = nt - *n; i__1 = n1; for (j = *n - 1; j >= i__1; --j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = a[i__ + j * a_dim1]; ++ij; } i__2 = n1 - 1; for (l = j - n1; l <= i__2; ++l) { arf[ij] = a[j - n1 + l * a_dim1]; ++ij; } ij -= nx2; } } } else { /* N is odd and TRANSR = 'T' */ if (lower) { /* N is odd, TRANSR = 'T', and UPLO = 'L' */ ij = 0; i__1 = n2 - 1; for (j = 0; j <= i__1; ++j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = a[j + i__ * a_dim1]; ++ij; } i__2 = *n - 1; for (i__ = n1 + j; i__ <= i__2; ++i__) { arf[ij] = a[i__ + (n1 + j) * a_dim1]; ++ij; } } i__1 = *n - 1; for (j = n2; j <= i__1; ++j) { i__2 = n1 - 1; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = a[j + i__ * a_dim1]; ++ij; } } } else { /* N is odd, TRANSR = 'T', and UPLO = 'U' */ ij = 0; i__1 = n1; for (j = 0; j <= i__1; ++j) { i__2 = *n - 1; for (i__ = n1; i__ <= i__2; ++i__) { arf[ij] = a[j + i__ * a_dim1]; ++ij; } } i__1 = n1 - 1; for (j = 0; j <= i__1; ++j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = a[i__ + j * a_dim1]; ++ij; } i__2 = *n - 1; for (l = n2 + j; l <= i__2; ++l) { arf[ij] = a[n2 + j + l * a_dim1]; ++ij; } } } } } else { /* N is even */ if (normaltransr) { /* N is even and TRANSR = 'N' */ if (lower) { /* N is even, TRANSR = 'N', and UPLO = 'L' */ ij = 0; i__1 = k - 1; for (j = 0; j <= i__1; ++j) { i__2 = k + j; for (i__ = k; i__ <= i__2; ++i__) { arf[ij] = a[k + j + i__ * a_dim1]; ++ij; } i__2 = *n - 1; for (i__ = j; i__ <= i__2; ++i__) { arf[ij] = a[i__ + j * a_dim1]; ++ij; } } } else { /* N is even, TRANSR = 'N', and UPLO = 'U' */ ij = nt - *n - 1; i__1 = k; for (j = *n - 1; j >= i__1; --j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = a[i__ + j * a_dim1]; ++ij; } i__2 = k - 1; for (l = j - k; l <= i__2; ++l) { arf[ij] = a[j - k + l * a_dim1]; ++ij; } ij -= np1x2; } } } else { /* N is even and TRANSR = 'T' */ if (lower) { /* N is even, TRANSR = 'T', and UPLO = 'L' */ ij = 0; j = k; i__1 = *n - 1; for (i__ = k; i__ <= i__1; ++i__) { arf[ij] = a[i__ + j * a_dim1]; ++ij; } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = a[j + i__ * a_dim1]; ++ij; } i__2 = *n - 1; for (i__ = k + 1 + j; i__ <= i__2; ++i__) { arf[ij] = a[i__ + (k + 1 + j) * a_dim1]; ++ij; } } i__1 = *n - 1; for (j = k - 1; j <= i__1; ++j) { i__2 = k - 1; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = a[j + i__ * a_dim1]; ++ij; } } } else { /* N is even, TRANSR = 'T', and UPLO = 'U' */ ij = 0; i__1 = k; for (j = 0; j <= i__1; ++j) { i__2 = *n - 1; for (i__ = k; i__ <= i__2; ++i__) { arf[ij] = a[j + i__ * a_dim1]; ++ij; } } i__1 = k - 2; for (j = 0; j <= i__1; ++j) { i__2 = j; for (i__ = 0; i__ <= i__2; ++i__) { arf[ij] = a[i__ + j * a_dim1]; ++ij; } i__2 = *n - 1; for (l = k + 1 + j; l <= i__2; ++l) { arf[ij] = a[k + 1 + j + l * a_dim1]; ++ij; } } /* Note that here, on exit of the loop, J = K-1 */ i__1 = j; for (i__ = 0; i__ <= i__1; ++i__) { arf[ij] = a[i__ + j * a_dim1]; ++ij; } } } } return 0; /* End of DTRTTF */ } /* _starpu_dtrttf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtrttp.c000066400000000000000000000100701507764646700207130ustar00rootroot00000000000000/* dtrttp.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_dtrttp_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *ap, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ integer i__, j, k; extern logical _starpu_lsame_(char *, char *); logical lower; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- and Julien Langou of the Univ. of Colorado Denver -- */ /* -- November 2008 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTRTTP copies a triangular matrix A from full format (TR) to standard */ /* packed format (TP). */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER */ /* = 'U': A is upper triangular. */ /* = 'L': A is lower triangular. */ /* N (input) INTEGER */ /* The order of the matrices AP and A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On exit, the triangular matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AP (output) DOUBLE PRECISION array, dimension (N*(N+1)/2 */ /* On exit, the upper or lower triangular matrix A, packed */ /* columnwise in a linear array. The j-th column of A is stored */ /* in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ap; /* Function Body */ *info = 0; lower = _starpu_lsame_(uplo, "L"); if (! lower && ! _starpu_lsame_(uplo, "U")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTRTTP", &i__1); return 0; } if (lower) { k = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ++k; ap[k] = a[i__ + j * a_dim1]; } } } else { k = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { ++k; ap[k] = a[i__ + j * a_dim1]; } } } return 0; /* End of DTRTTP */ } /* _starpu_dtrttp_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtzrqf.c000066400000000000000000000150671507764646700207170ustar00rootroot00000000000000/* dtzrqf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = 1.; /* Subroutine */ int _starpu_dtzrqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Local variables */ integer i__, k, m1; extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), _starpu_dlarfp_( integer *, doublereal *, doublereal *, integer *, doublereal *), _starpu_xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This routine is deprecated and has been replaced by routine DTZRZF. */ /* DTZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A */ /* to upper triangular form by means of orthogonal transformations. */ /* The upper trapezoidal matrix A is factored as */ /* A = ( R 0 ) * Z, */ /* where Z is an N-by-N orthogonal matrix and R is an M-by-M upper */ /* triangular matrix. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= M. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the leading M-by-N upper trapezoidal part of the */ /* array A must contain the matrix to be factorized. */ /* On exit, the leading M-by-M upper triangular part of A */ /* contains the upper triangular matrix R, and elements M+1 to */ /* N of the first M rows of A, with the array TAU, represent the */ /* orthogonal matrix Z as a product of M elementary reflectors. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (M) */ /* The scalar factors of the elementary reflectors. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* The factorization is obtained by Householder's method. The kth */ /* transformation matrix, Z( k ), which is used to introduce zeros into */ /* the ( m - k + 1 )th row of A, is given in the form */ /* Z( k ) = ( I 0 ), */ /* ( 0 T( k ) ) */ /* where */ /* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), */ /* ( 0 ) */ /* ( z( k ) ) */ /* tau is a scalar and z( k ) is an ( n - m ) element vector. */ /* tau and z( k ) are chosen to annihilate the elements of the kth row */ /* of X. */ /* The scalar tau is returned in the kth element of TAU and the vector */ /* u( k ) in the kth row of A, such that the elements of z( k ) are */ /* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in */ /* the upper triangular part of A. */ /* Z is given by */ /* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTZRQF", &i__1); return 0; } /* Perform the factorization. */ if (*m == 0) { return 0; } if (*m == *n) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tau[i__] = 0.; /* L10: */ } } else { /* Computing MIN */ i__1 = *m + 1; m1 = min(i__1,*n); for (k = *m; k >= 1; --k) { /* Use a Householder reflection to zero the kth row of A. */ /* First set up the reflection. */ i__1 = *n - *m + 1; _starpu_dlarfp_(&i__1, &a[k + k * a_dim1], &a[k + m1 * a_dim1], lda, &tau[ k]); if (tau[k] != 0. && k > 1) { /* We now perform the operation A := A*P( k ). */ /* Use the first ( k - 1 ) elements of TAU to store a( k ), */ /* where a( k ) consists of the first ( k - 1 ) elements of */ /* the kth column of A. Also let B denote the first */ /* ( k - 1 ) rows of the last ( n - m ) columns of A. */ i__1 = k - 1; _starpu_dcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &tau[1], &c__1); /* Form w = a( k ) + B*z( k ) in TAU. */ i__1 = k - 1; i__2 = *n - *m; _starpu_dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[m1 * a_dim1 + 1], lda, &a[k + m1 * a_dim1], lda, &c_b8, &tau[1], & c__1); /* Now form a( k ) := a( k ) - tau*w */ /* and B := B - tau*w*z( k )'. */ i__1 = k - 1; d__1 = -tau[k]; _starpu_daxpy_(&i__1, &d__1, &tau[1], &c__1, &a[k * a_dim1 + 1], & c__1); i__1 = k - 1; i__2 = *n - *m; d__1 = -tau[k]; _starpu_dger_(&i__1, &i__2, &d__1, &tau[1], &c__1, &a[k + m1 * a_dim1] , lda, &a[m1 * a_dim1 + 1], lda); } /* L20: */ } } return 0; /* End of DTZRQF */ } /* _starpu_dtzrqf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dtzrzf.c000066400000000000000000000213201507764646700207150ustar00rootroot00000000000000/* dtzrzf.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* Subroutine */ int _starpu_dtzrzf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ integer i__, m1, ib, nb, ki, kk, mu, nx, iws, nbmin; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlarzb_( char *, char *, char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int _starpu_dlarzt_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), _starpu_dlatrz_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer ldwork, lwkopt; logical lquery; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A */ /* to upper triangular form by means of orthogonal transformations. */ /* The upper trapezoidal matrix A is factored as */ /* A = ( R 0 ) * Z, */ /* where Z is an N-by-N orthogonal matrix and R is an M-by-M upper */ /* triangular matrix. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= M. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the leading M-by-N upper trapezoidal part of the */ /* array A must contain the matrix to be factorized. */ /* On exit, the leading M-by-M upper triangular part of A */ /* contains the upper triangular matrix R, and elements M+1 to */ /* N of the first M rows of A, with the array TAU, represent the */ /* orthogonal matrix Z as a product of M elementary reflectors. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* TAU (output) DOUBLE PRECISION array, dimension (M) */ /* The scalar factors of the elementary reflectors. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,M). */ /* For optimum performance LWORK >= M*NB, where NB is */ /* the optimal blocksize. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* The factorization is obtained by Householder's method. The kth */ /* transformation matrix, Z( k ), which is used to introduce zeros into */ /* the ( m - k + 1 )th row of A, is given in the form */ /* Z( k ) = ( I 0 ), */ /* ( 0 T( k ) ) */ /* where */ /* T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), */ /* ( 0 ) */ /* ( z( k ) ) */ /* tau is a scalar and z( k ) is an ( n - m ) element vector. */ /* tau and z( k ) are chosen to annihilate the elements of the kth row */ /* of X. */ /* The scalar tau is returned in the kth element of TAU and the vector */ /* u( k ) in the kth row of A, such that the elements of z( k ) are */ /* in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in */ /* the upper triangular part of A. */ /* Z is given by */ /* Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info == 0) { if (*m == 0 || *m == *n) { lwkopt = 1; } else { /* Determine the block size. */ nb = _starpu_ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); lwkopt = *m * nb; } work[1] = (doublereal) lwkopt; if (*lwork < max(1,*m) && ! lquery) { *info = -7; } } if (*info != 0) { i__1 = -(*info); _starpu_xerbla_("DTZRZF", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0) { return 0; } else if (*m == *n) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { tau[i__] = 0.; /* L10: */ } return 0; } nbmin = 2; nx = 1; iws = *m; if (nb > 1 && nb < *m) { /* Determine when to cross over from blocked to unblocked code. */ /* Computing MAX */ i__1 = 0, i__2 = _starpu_ilaenv_(&c__3, "DGERQF", " ", m, n, &c_n1, &c_n1); nx = max(i__1,i__2); if (nx < *m) { /* Determine if workspace is large enough for blocked code. */ ldwork = *m; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and */ /* determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = _starpu_ilaenv_(&c__2, "DGERQF", " ", m, n, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < *m && nx < *m) { /* Use blocked code initially. */ /* The last kk rows are handled by the block method. */ /* Computing MIN */ i__1 = *m + 1; m1 = min(i__1,*n); ki = (*m - nx - 1) / nb * nb; /* Computing MIN */ i__1 = *m, i__2 = ki + nb; kk = min(i__1,i__2); i__1 = *m - kk + 1; i__2 = -nb; for (i__ = *m - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = *m - i__ + 1; ib = min(i__3,nb); /* Compute the TZ factorization of the current block */ /* A(i:i+ib-1,i:n) */ i__3 = *n - i__ + 1; i__4 = *n - *m; _starpu_dlatrz_(&ib, &i__3, &i__4, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]); if (i__ > 1) { /* Form the triangular factor of the block reflector */ /* H = H(i+ib-1) . . . H(i+1) H(i) */ i__3 = *n - *m; _starpu_dlarzt_("Backward", "Rowwise", &i__3, &ib, &a[i__ + m1 * a_dim1], lda, &tau[i__], &work[1], &ldwork); /* Apply H to A(1:i-1,i:n) from the right */ i__3 = i__ - 1; i__4 = *n - i__ + 1; i__5 = *n - *m; _starpu_dlarzb_("Right", "No transpose", "Backward", "Rowwise", &i__3, &i__4, &ib, &i__5, &a[i__ + m1 * a_dim1], lda, &work[ 1], &ldwork, &a[i__ * a_dim1 + 1], lda, &work[ib + 1], &ldwork) ; } /* L20: */ } mu = i__ + nb - 1; } else { mu = *m; } /* Use unblocked code to factor the last or only block */ if (mu > 0) { i__2 = *n - *m; _starpu_dlatrz_(&mu, n, &i__2, &a[a_offset], lda, &tau[1], &work[1]); } work[1] = (doublereal) lwkopt; return 0; /* End of DTZRZF */ } /* _starpu_dtzrzf_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/dzsum1.c000066400000000000000000000051321507764646700206200ustar00rootroot00000000000000/* dzsum1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" doublereal _starpu_dzsum1_(integer *n, doublecomplex *cx, integer *incx) { /* System generated locals */ integer i__1, i__2; doublereal ret_val; /* Builtin functions */ double z_abs(doublecomplex *); /* Local variables */ integer i__, nincx; doublereal stemp; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DZSUM1 takes the sum of the absolute values of a complex */ /* vector and returns a double precision result. */ /* Based on DZASUM from the Level 1 BLAS. */ /* The change is to use the 'genuine' absolute value. */ /* Contributed by Nick Higham for use with ZLACON. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of elements in the vector CX. */ /* CX (input) COMPLEX*16 array, dimension (N) */ /* The vector whose elements will be summed. */ /* INCX (input) INTEGER */ /* The spacing between successive values of CX. INCX > 0. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --cx; /* Function Body */ ret_val = 0.; stemp = 0.; if (*n <= 0) { return ret_val; } if (*incx == 1) { goto L20; } /* CODE FOR INCREMENT NOT EQUAL TO 1 */ nincx = *n * *incx; i__1 = nincx; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* NEXT LINE MODIFIED. */ stemp += z_abs(&cx[i__]); /* L10: */ } ret_val = stemp; return ret_val; /* CODE FOR INCREMENT EQUAL TO 1 */ L20: i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* NEXT LINE MODIFIED. */ stemp += z_abs(&cx[i__]); /* L30: */ } ret_val = stemp; return ret_val; /* End of DZSUM1 */ } /* _starpu_dzsum1_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/icmax1.c000066400000000000000000000053701507764646700205630ustar00rootroot00000000000000/* icmax1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_icmax1_(integer *n, complex *cx, integer *incx) { /* System generated locals */ integer ret_val, i__1; /* Builtin functions */ double c_abs(complex *); /* Local variables */ integer i__, ix; real smax; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ICMAX1 finds the index of the element whose real part has maximum */ /* absolute value. */ /* Based on ICAMAX from Level 1 BLAS. */ /* The change is to use the 'genuine' absolute value. */ /* Contributed by Nick Higham for use with CLACON. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of elements in the vector CX. */ /* CX (input) COMPLEX array, dimension (N) */ /* The vector whose elements will be summed. */ /* INCX (input) INTEGER */ /* The spacing between successive values of CX. INCX >= 1. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Statement Functions .. */ /* .. */ /* .. Statement Function definitions .. */ /* NEXT LINE IS THE ONLY MODIFICATION. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --cx; /* Function Body */ ret_val = 0; if (*n < 1) { return ret_val; } ret_val = 1; if (*n == 1) { return ret_val; } if (*incx == 1) { goto L30; } /* CODE FOR INCREMENT NOT EQUAL TO 1 */ ix = 1; smax = c_abs(&cx[1]); ix += *incx; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if (c_abs(&cx[ix]) <= smax) { goto L10; } ret_val = i__; smax = c_abs(&cx[ix]); L10: ix += *incx; /* L20: */ } return ret_val; /* CODE FOR INCREMENT EQUAL TO 1 */ L30: smax = c_abs(&cx[1]); i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if (c_abs(&cx[i__]) <= smax) { goto L40; } ret_val = i__; smax = c_abs(&cx[i__]); L40: ; } return ret_val; /* End of ICMAX1 */ } /* _starpu_icmax1_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ieeeck.c000066400000000000000000000065711507764646700206320ustar00rootroot00000000000000/* ieeeck.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ieeeck_(integer *ispec, real *zero, real *one) { /* System generated locals */ integer ret_val; /* Local variables */ real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, newzro; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* IEEECK is called from the ILAENV to verify that Infinity and */ /* possibly NaN arithmetic is safe (i.e. will not trap). */ /* Arguments */ /* ========= */ /* ISPEC (input) INTEGER */ /* Specifies whether to test just for inifinity arithmetic */ /* or whether to test for infinity and NaN arithmetic. */ /* = 0: Verify infinity arithmetic only. */ /* = 1: Verify infinity and NaN arithmetic. */ /* ZERO (input) REAL */ /* Must contain the value 0.0 */ /* This is passed to prevent the compiler from optimizing */ /* away this code. */ /* ONE (input) REAL */ /* Must contain the value 1.0 */ /* This is passed to prevent the compiler from optimizing */ /* away this code. */ /* RETURN VALUE: INTEGER */ /* = 0: Arithmetic failed to produce the correct answers */ /* = 1: Arithmetic produced the correct answers */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ ret_val = 1; posinf = *one / *zero; if (posinf <= *one) { ret_val = 0; return ret_val; } neginf = -(*one) / *zero; if (neginf >= *zero) { ret_val = 0; return ret_val; } negzro = *one / (neginf + *one); if (negzro != *zero) { ret_val = 0; return ret_val; } neginf = *one / negzro; if (neginf >= *zero) { ret_val = 0; return ret_val; } newzro = negzro + *zero; if (newzro != *zero) { ret_val = 0; return ret_val; } posinf = *one / newzro; if (posinf <= *one) { ret_val = 0; return ret_val; } neginf *= posinf; if (neginf >= *zero) { ret_val = 0; return ret_val; } posinf *= posinf; if (posinf <= *one) { ret_val = 0; return ret_val; } /* Return if we were only asked to check infinity arithmetic */ if (*ispec == 0) { return ret_val; } nan1 = posinf + neginf; nan2 = posinf / neginf; nan3 = posinf / posinf; nan4 = posinf * *zero; nan5 = neginf * negzro; nan6 = nan5 * 0.f; if (nan1 == nan1) { ret_val = 0; return ret_val; } if (nan2 == nan2) { ret_val = 0; return ret_val; } if (nan3 == nan3) { ret_val = 0; return ret_val; } if (nan4 == nan4) { ret_val = 0; return ret_val; } if (nan5 == nan5) { ret_val = 0; return ret_val; } if (nan6 == nan6) { ret_val = 0; return ret_val; } return ret_val; } /* _starpu_ieeeck_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilaclc.c000066400000000000000000000050521507764646700206250ustar00rootroot00000000000000/* ilaclc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ilaclc_(integer *m, integer *n, complex *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1, i__2; /* Local variables */ integer i__; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILACLC scans A for its last non-zero column. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) COMPLEX array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*n == 0) { ret_val = *n; } else /* if(complicated condition) */ { i__1 = *n * a_dim1 + 1; i__2 = *m + *n * a_dim1; if (a[i__1].r != 0.f || a[i__1].i != 0.f || (a[i__2].r != 0.f || a[ i__2].i != 0.f)) { ret_val = *n; } else { /* Now scan each column from the end, returning with the first non-zero. */ for (ret_val = *n; ret_val >= 1; --ret_val) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__ + ret_val * a_dim1; if (a[i__2].r != 0.f || a[i__2].i != 0.f) { return ret_val; } } } } } return ret_val; } /* _starpu_ilaclc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilaclr.c000066400000000000000000000050601507764646700206430ustar00rootroot00000000000000/* ilaclr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ilaclr_(integer *m, integer *n, complex *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1, i__2; /* Local variables */ integer i__, j; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILACLR scans A for its last non-zero row. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) COMPLEX array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*m == 0) { ret_val = *m; } else /* if(complicated condition) */ { i__1 = *m + a_dim1; i__2 = *m + *n * a_dim1; if (a[i__1].r != 0.f || a[i__1].i != 0.f || (a[i__2].r != 0.f || a[ i__2].i != 0.f)) { ret_val = *m; } else { /* Scan up each column tracking the last zero row seen. */ ret_val = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { i__2 = i__ + j * a_dim1; if (a[i__2].r != 0.f || a[i__2].i != 0.f) { break; } } ret_val = max(ret_val,i__); } } } return ret_val; } /* _starpu_ilaclr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/iladiag.c000066400000000000000000000035051507764646700207710ustar00rootroot00000000000000/* iladiag.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_iladiag_(char *diag) { /* System generated locals */ integer ret_val; /* Local variables */ extern logical _starpu_lsame_(char *, char *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* October 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine translated from a character string specifying if a */ /* matrix has unit diagonal or not to the relevant BLAST-specified */ /* integer constant. */ /* ILADIAG returns an INTEGER. If ILADIAG < 0, then the input is not a */ /* character indicating a unit or non-unit diagonal. Otherwise ILADIAG */ /* returns the constant value corresponding to DIAG. */ /* Arguments */ /* ========= */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ if (_starpu_lsame_(diag, "N")) { ret_val = 131; } else if (_starpu_lsame_(diag, "U")) { ret_val = 132; } else { ret_val = -1; } return ret_val; /* End of ILADIAG */ } /* _starpu_iladiag_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/iladlc.c000066400000000000000000000046141507764646700206310ustar00rootroot00000000000000/* iladlc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_iladlc_(integer *m, integer *n, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1; /* Local variables */ integer i__; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILADLC scans A for its last non-zero column. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*n == 0) { ret_val = *n; } else if (a[*n * a_dim1 + 1] != 0. || a[*m + *n * a_dim1] != 0.) { ret_val = *n; } else { /* Now scan each column from the end, returning with the first non-zero. */ for (ret_val = *n; ret_val >= 1; --ret_val) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (a[i__ + ret_val * a_dim1] != 0.) { return ret_val; } } } } return ret_val; } /* _starpu_iladlc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/iladlr.c000066400000000000000000000046011507764646700206440ustar00rootroot00000000000000/* iladlr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_iladlr_(integer *m, integer *n, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1; /* Local variables */ integer i__, j; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILADLR scans A for its last non-zero row. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*m == 0) { ret_val = *m; } else if (a[*m + a_dim1] != 0. || a[*m + *n * a_dim1] != 0.) { ret_val = *m; } else { /* Scan up each column tracking the last zero row seen. */ ret_val = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { if (a[i__ + j * a_dim1] != 0.) { break; } } ret_val = max(ret_val,i__); } } return ret_val; } /* _starpu_iladlr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilaenv.c000066400000000000000000000467721507764646700206720ustar00rootroot00000000000000/* ilaenv.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" #include "string.h" /* Table of constant values */ static integer c__1 = 1; static real c_b163 = 0.f; static real c_b164 = 1.f; static integer c__0 = 0; integer _starpu_ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, integer *n2, integer *n3, integer *n4) { /* System generated locals */ integer ret_val; /* Builtin functions */ /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_cmp(char *, char *, ftnlen, ftnlen); /* Local variables */ integer i__; char c1[1], c2[1], c3[1], c4[1]; integer ic, nb, iz, nx; logical cname; integer nbmin; logical sname; extern integer _starpu_ieeeck_(integer *, real *, real *); char subnam[1]; extern integer _starpu_iparmq_(integer *, char *, char *, integer *, integer *, integer *, integer *); ftnlen name_len, opts_len; name_len = strlen (name__); opts_len = strlen (opts); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* January 2007 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILAENV is called from the LAPACK routines to choose problem-dependent */ /* parameters for the local environment. See ISPEC for a description of */ /* the parameters. */ /* ILAENV returns an INTEGER */ /* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */ /* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */ /* This version provides a set of parameters which should give good, */ /* but not optimal, performance on many of the currently available */ /* computers. Users are encouraged to modify this subroutine to set */ /* the tuning parameters for their particular machine using the option */ /* and problem size information in the arguments. */ /* This routine will not function correctly if it is converted to all */ /* lower case. Converting it to all upper case is allowed. */ /* Arguments */ /* ========= */ /* ISPEC (input) INTEGER */ /* Specifies the parameter to be returned as the value of */ /* ILAENV. */ /* = 1: the optimal blocksize; if this value is 1, an unblocked */ /* algorithm will give the best performance. */ /* = 2: the minimum block size for which the block routine */ /* should be used; if the usable block size is less than */ /* this value, an unblocked routine should be used. */ /* = 3: the crossover point (in a block routine, for N less */ /* than this value, an unblocked routine should be used) */ /* = 4: the number of shifts, used in the nonsymmetric */ /* eigenvalue routines (DEPRECATED) */ /* = 5: the minimum column dimension for blocking to be used; */ /* rectangular blocks must have dimension at least k by m, */ /* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */ /* = 6: the crossover point for the SVD (when reducing an m by n */ /* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */ /* this value, a QR factorization is used first to reduce */ /* the matrix to a triangular form.) */ /* = 7: the number of processors */ /* = 8: the crossover point for the multishift QR method */ /* for nonsymmetric eigenvalue problems (DEPRECATED) */ /* = 9: maximum size of the subproblems at the bottom of the */ /* computation tree in the divide-and-conquer algorithm */ /* (used by xGELSD and xGESDD) */ /* =10: ieee NaN arithmetic can be trusted not to trap */ /* =11: infinity arithmetic can be trusted not to trap */ /* 12 <= ISPEC <= 16: */ /* xHSEQR or one of its subroutines, */ /* see IPARMQ for detailed explanation */ /* NAME (input) CHARACTER*(*) */ /* The name of the calling subroutine, in either upper case or */ /* lower case. */ /* OPTS (input) CHARACTER*(*) */ /* The character options to the subroutine NAME, concatenated */ /* into a single character string. For example, UPLO = 'U', */ /* TRANS = 'T', and DIAG = 'N' for a triangular routine would */ /* be specified as OPTS = 'UTN'. */ /* N1 (input) INTEGER */ /* N2 (input) INTEGER */ /* N3 (input) INTEGER */ /* N4 (input) INTEGER */ /* Problem dimensions for the subroutine NAME; these may not all */ /* be required. */ /* Further Details */ /* =============== */ /* The following conventions have been used when calling ILAENV from the */ /* LAPACK routines: */ /* 1) OPTS is a concatenation of all of the character options to */ /* subroutine NAME, in the same order that they appear in the */ /* argument list for NAME, even if they are not used in determining */ /* the value of the parameter specified by ISPEC. */ /* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */ /* that they appear in the argument list for NAME. N1 is used */ /* first, N2 second, and so on, and unused problem dimensions are */ /* passed a value of -1. */ /* 3) The parameter value returned by ILAENV is checked for validity in */ /* the calling subroutine. For example, ILAENV is used to retrieve */ /* the optimal blocksize for STRTRI as follows: */ /* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */ /* IF( NB.LE.1 ) NB = MAX( 1, N ) */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ switch (*ispec) { case 1: goto L10; case 2: goto L10; case 3: goto L10; case 4: goto L80; case 5: goto L90; case 6: goto L100; case 7: goto L110; case 8: goto L120; case 9: goto L130; case 10: goto L140; case 11: goto L150; case 12: goto L160; case 13: goto L160; case 14: goto L160; case 15: goto L160; case 16: goto L160; } /* Invalid value for ISPEC */ ret_val = -1; return ret_val; L10: /* Convert NAME to upper case if the first character is lower case. */ ret_val = 1; s_copy(subnam, name__, (ftnlen)1, name_len); ic = *(unsigned char *)subnam; iz = 'Z'; if (iz == 90 || iz == 122) { /* ASCII character set */ if (ic >= 97 && ic <= 122) { *(unsigned char *)subnam = (char) (ic - 32); for (i__ = 2; i__ <= 6; ++i__) { ic = *(unsigned char *)&subnam[i__ - 1]; if (ic >= 97 && ic <= 122) { *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); } /* L20: */ } } } else if (iz == 233 || iz == 169) { /* EBCDIC character set */ if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && ic <= 169) { *(unsigned char *)subnam = (char) (ic + 64); for (i__ = 2; i__ <= 6; ++i__) { ic = *(unsigned char *)&subnam[i__ - 1]; if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && ic <= 169) { *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); } /* L30: */ } } } else if (iz == 218 || iz == 250) { /* Prime machines: ASCII+128 */ if (ic >= 225 && ic <= 250) { *(unsigned char *)subnam = (char) (ic - 32); for (i__ = 2; i__ <= 6; ++i__) { ic = *(unsigned char *)&subnam[i__ - 1]; if (ic >= 225 && ic <= 250) { *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); } /* L40: */ } } } *(unsigned char *)c1 = *(unsigned char *)subnam; sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; if (! (cname || sname)) { return ret_val; } s_copy(c2, subnam + 1, (ftnlen)1, (ftnlen)2); s_copy(c3, subnam + 3, (ftnlen)1, (ftnlen)3); s_copy(c4, c3 + 1, (ftnlen)1, (ftnlen)2); switch (*ispec) { case 1: goto L50; case 2: goto L60; case 3: goto L70; } L50: /* ISPEC = 1: block size */ /* In these examples, separate code is provided for setting NB for */ /* real and complex. We assume that NB will take the same value in */ /* single or double precision. */ nb = 1; if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } else if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) 1, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 32; } else { nb = 32; } } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 32; } else { nb = 32; } } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 32; } else { nb = 32; } } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } } else if (s_cmp(c2, "PO", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nb = 32; } else if (sname && s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { nb = 64; } } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { nb = 64; } else if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nb = 32; } else if (s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) { nb = 64; } } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nb = 32; } } else if (*(unsigned char *)c3 == 'M') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nb = 32; } } } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nb = 32; } } else if (*(unsigned char *)c3 == 'M') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nb = 32; } } } else if (s_cmp(c2, "GB", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { if (*n4 <= 64) { nb = 1; } else { nb = 32; } } else { if (*n4 <= 64) { nb = 1; } else { nb = 32; } } } } else if (s_cmp(c2, "PB", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { if (*n2 <= 64) { nb = 1; } else { nb = 32; } } else { if (*n2 <= 64) { nb = 1; } else { nb = 32; } } } } else if (s_cmp(c2, "TR", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } } else if (s_cmp(c2, "LA", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "UUM", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nb = 64; } else { nb = 64; } } } else if (sname && s_cmp(c2, "ST", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "EBZ", (ftnlen)1, (ftnlen)3) == 0) { nb = 1; } } ret_val = nb; return ret_val; L60: /* ISPEC = 2: minimum block size */ nbmin = 2; if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 2; } else { nbmin = 2; } } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 2; } else { nbmin = 2; } } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 2; } else { nbmin = 2; } } else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 2; } else { nbmin = 2; } } } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nbmin = 8; } else { nbmin = 8; } } else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nbmin = 2; } } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nbmin = 2; } } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nbmin = 2; } } else if (*(unsigned char *)c3 == 'M') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nbmin = 2; } } } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nbmin = 2; } } else if (*(unsigned char *)c3 == 'M') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nbmin = 2; } } } ret_val = nbmin; return ret_val; L70: /* ISPEC = 3: crossover point */ nx = 0; if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, ( ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nx = 128; } else { nx = 128; } } else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nx = 128; } else { nx = 128; } } else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) { if (sname) { nx = 128; } else { nx = 128; } } } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) { if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nx = 32; } } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) { if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) { nx = 32; } } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nx = 128; } } } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) { if (*(unsigned char *)c3 == 'G') { if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, ( ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp( c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( ftnlen)1, (ftnlen)2) == 0) { nx = 128; } } } ret_val = nx; return ret_val; L80: /* ISPEC = 4: number of shifts (used by xHSEQR) */ ret_val = 6; return ret_val; L90: /* ISPEC = 5: minimum column dimension (not used) */ ret_val = 2; return ret_val; L100: /* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); return ret_val; L110: /* ISPEC = 7: number of processors (not used) */ ret_val = 1; return ret_val; L120: /* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ ret_val = 50; return ret_val; L130: /* ISPEC = 9: maximum size of the subproblems at the bottom of the */ /* computation tree in the divide-and-conquer algorithm */ /* (used by xGELSD and xGESDD) */ ret_val = 25; return ret_val; L140: /* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */ /* ILAENV = 0 */ ret_val = 1; if (ret_val == 1) { ret_val = _starpu_ieeeck_(&c__1, &c_b163, &c_b164); } return ret_val; L150: /* ISPEC = 11: infinity arithmetic can be trusted not to trap */ /* ILAENV = 0 */ ret_val = 1; if (ret_val == 1) { ret_val = _starpu_ieeeck_(&c__0, &c_b163, &c_b164); } return ret_val; L160: /* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */ ret_val = _starpu_iparmq_(ispec, name__, opts, n1, n2, n3, n4) ; return ret_val; /* End of ILAENV */ } /* _starpu_ilaenv_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilaprec.c000066400000000000000000000040651507764646700210200ustar00rootroot00000000000000/* ilaprec.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ilaprec_(char *prec) { /* System generated locals */ integer ret_val; /* Local variables */ extern logical _starpu_lsame_(char *, char *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* October 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine translated from a character string specifying an */ /* intermediate precision to the relevant BLAST-specified integer */ /* constant. */ /* ILAPREC returns an INTEGER. If ILAPREC < 0, then the input is not a */ /* character indicating a supported intermediate precision. Otherwise */ /* ILAPREC returns the constant value corresponding to PREC. */ /* Arguments */ /* ========= */ /* PREC (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'S': Single */ /* = 'D': Double */ /* = 'I': Indigenous */ /* = 'X', 'E': Extra */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ if (_starpu_lsame_(prec, "S")) { ret_val = 211; } else if (_starpu_lsame_(prec, "D")) { ret_val = 212; } else if (_starpu_lsame_(prec, "I")) { ret_val = 213; } else if (_starpu_lsame_(prec, "X") || _starpu_lsame_(prec, "E")) { ret_val = 214; } else { ret_val = -1; } return ret_val; /* End of ILAPREC */ } /* _starpu_ilaprec_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilaslc.c000066400000000000000000000045751507764646700206560ustar00rootroot00000000000000/* ilaslc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ilaslc_(integer *m, integer *n, real *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1; /* Local variables */ integer i__; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILASLC scans A for its last non-zero column. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) REAL array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*n == 0) { ret_val = *n; } else if (a[*n * a_dim1 + 1] != 0.f || a[*m + *n * a_dim1] != 0.f) { ret_val = *n; } else { /* Now scan each column from the end, returning with the first non-zero. */ for (ret_val = *n; ret_val >= 1; --ret_val) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (a[i__ + ret_val * a_dim1] != 0.f) { return ret_val; } } } } return ret_val; } /* _starpu_ilaslc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilaslr.c000066400000000000000000000045761507764646700206760ustar00rootroot00000000000000/* ilaslr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ilaslr_(integer *m, integer *n, real *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1; /* Local variables */ integer i__, j; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILASLR scans A for its last non-zero row. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) REAL array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*m == 0) { ret_val = *m; } else if (a[*m + a_dim1] != 0.f || a[*m + *n * a_dim1] != 0.f) { ret_val = *m; } else { /* Scan up each column tracking the last zero row seen. */ ret_val = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { if (a[i__ + j * a_dim1] != 0.f) { break; } } ret_val = max(ret_val,i__); } } return ret_val; } /* _starpu_ilaslr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilatrans.c000066400000000000000000000037171507764646700212210ustar00rootroot00000000000000/* ilatrans.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ilatrans_(char *trans) { /* System generated locals */ integer ret_val; /* Local variables */ extern logical _starpu_lsame_(char *, char *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* October 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine translates from a character string specifying a */ /* transposition operation to the relevant BLAST-specified integer */ /* constant. */ /* ILATRANS returns an INTEGER. If ILATRANS < 0, then the input is not */ /* a character indicating a transposition operator. Otherwise ILATRANS */ /* returns the constant value corresponding to TRANS. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': No transpose */ /* = 'T': Transpose */ /* = 'C': Conjugate transpose */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ if (_starpu_lsame_(trans, "N")) { ret_val = 111; } else if (_starpu_lsame_(trans, "T")) { ret_val = 112; } else if (_starpu_lsame_(trans, "C")) { ret_val = 113; } else { ret_val = -1; } return ret_val; /* End of ILATRANS */ } /* _starpu_ilatrans_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilauplo.c000066400000000000000000000035101507764646700210400ustar00rootroot00000000000000/* ilauplo.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ilauplo_(char *uplo) { /* System generated locals */ integer ret_val; /* Local variables */ extern logical _starpu_lsame_(char *, char *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* October 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine translated from a character string specifying a */ /* upper- or lower-triangular matrix to the relevant BLAST-specified */ /* integer constant. */ /* ILAUPLO returns an INTEGER. If ILAUPLO < 0, then the input is not */ /* a character indicating an upper- or lower-triangular matrix. */ /* Otherwise ILAUPLO returns the constant value corresponding to UPLO. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ if (_starpu_lsame_(uplo, "U")) { ret_val = 121; } else if (_starpu_lsame_(uplo, "L")) { ret_val = 122; } else { ret_val = -1; } return ret_val; /* End of ILAUPLO */ } /* _starpu_ilauplo_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilaver.c000066400000000000000000000027431507764646700206640ustar00rootroot00000000000000/* ilaver.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_ilaver_(integer *vers_major__, integer *vers_minor__, integer *vers_patch__) { /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* January 2007 */ /* .. */ /* Purpose */ /* ======= */ /* This subroutine return the Lapack version */ /* Arguments */ /* ========= */ /* VERS_MAJOR (output) INTEGER */ /* return the lapack major version */ /* VERS_MINOR (output) INTEGER */ /* return the lapack minor version from the major version */ /* VERS_PATCH (output) INTEGER */ /* return the lapack patch version from the minor version */ /* ===================================================================== */ /* ===================================================================== */ *vers_major__ = 3; *vers_minor__ = 1; *vers_patch__ = 1; /* ===================================================================== */ return 0; } /* _starpu_ilaver_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilazlc.c000066400000000000000000000050551507764646700206570ustar00rootroot00000000000000/* ilazlc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ilazlc_(integer *m, integer *n, doublecomplex *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1, i__2; /* Local variables */ integer i__; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILAZLC scans A for its last non-zero column. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) COMPLEX*16 array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*n == 0) { ret_val = *n; } else /* if(complicated condition) */ { i__1 = *n * a_dim1 + 1; i__2 = *m + *n * a_dim1; if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2] .i != 0.)) { ret_val = *n; } else { /* Now scan each column from the end, returning with the first non-zero. */ for (ret_val = *n; ret_val >= 1; --ret_val) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__ + ret_val * a_dim1; if (a[i__2].r != 0. || a[i__2].i != 0.) { return ret_val; } } } } } return ret_val; } /* _starpu_ilazlc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/ilazlr.c000066400000000000000000000050521507764646700206730ustar00rootroot00000000000000/* ilazlr.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_ilazlr_(integer *m, integer *n, doublecomplex *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, ret_val, i__1, i__2; /* Local variables */ integer i__, j; /* -- LAPACK auxiliary routine (version 3.2.1) -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ILAZLR scans A for its last non-zero row. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. */ /* A (input) COMPLEX*16 array, dimension (LDA,N) */ /* The m by n matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Quick test for the common case where one corner is non-zero. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ if (*m == 0) { ret_val = *m; } else /* if(complicated condition) */ { i__1 = *m + a_dim1; i__2 = *m + *n * a_dim1; if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2] .i != 0.)) { ret_val = *m; } else { /* Scan up each column tracking the last zero row seen. */ ret_val = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { i__2 = i__ + j * a_dim1; if (a[i__2].r != 0. || a[i__2].i != 0.) { break; } } ret_val = max(ret_val,i__); } } } return ret_val; } /* _starpu_ilazlr_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/iparmq.c000066400000000000000000000241731507764646700206740ustar00rootroot00000000000000/* iparmq.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer *ilo, integer *ihi, integer *lwork) { /* System generated locals */ integer ret_val, i__1, i__2; real r__1; /* Builtin functions */ double log(doublereal); integer i_nint(real *); /* Local variables */ integer nh, ns; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* Purpose */ /* ======= */ /* This program sets problem and machine dependent parameters */ /* useful for xHSEQR and its subroutines. It is called whenever */ /* ILAENV is called with 12 <= ISPEC <= 16 */ /* Arguments */ /* ========= */ /* ISPEC (input) integer scalar */ /* ISPEC specifies which tunable parameter IPARMQ should */ /* return. */ /* ISPEC=12: (INMIN) Matrices of order nmin or less */ /* are sent directly to xLAHQR, the implicit */ /* double shift QR algorithm. NMIN must be */ /* at least 11. */ /* ISPEC=13: (INWIN) Size of the deflation window. */ /* This is best set greater than or equal to */ /* the number of simultaneous shifts NS. */ /* Larger matrices benefit from larger deflation */ /* windows. */ /* ISPEC=14: (INIBL) Determines when to stop nibbling and */ /* invest in an (expensive) multi-shift QR sweep. */ /* If the aggressive early deflation subroutine */ /* finds LD converged eigenvalues from an order */ /* NW deflation window and LD.GT.(NW*NIBBLE)/100, */ /* then the next QR sweep is skipped and early */ /* deflation is applied immediately to the */ /* remaining active diagonal block. Setting */ /* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */ /* multi-shift QR sweep whenever early deflation */ /* finds a converged eigenvalue. Setting */ /* IPARMQ(ISPEC=14) greater than or equal to 100 */ /* prevents TTQRE from skipping a multi-shift */ /* QR sweep. */ /* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */ /* a multi-shift QR iteration. */ /* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */ /* following meanings. */ /* 0: During the multi-shift QR sweep, */ /* xLAQR5 does not accumulate reflections and */ /* does not use matrix-matrix multiply to */ /* update the far-from-diagonal matrix */ /* entries. */ /* 1: During the multi-shift QR sweep, */ /* xLAQR5 and/or xLAQRaccumulates reflections and uses */ /* matrix-matrix multiply to update the */ /* far-from-diagonal matrix entries. */ /* 2: During the multi-shift QR sweep. */ /* xLAQR5 accumulates reflections and takes */ /* advantage of 2-by-2 block structure during */ /* matrix-matrix multiplies. */ /* (If xTRMM is slower than xGEMM, then */ /* IPARMQ(ISPEC=16)=1 may be more efficient than */ /* IPARMQ(ISPEC=16)=2 despite the greater level of */ /* arithmetic work implied by the latter choice.) */ /* NAME (input) character string */ /* Name of the calling subroutine */ /* OPTS (input) character string */ /* This is a concatenation of the string arguments to */ /* TTQRE. */ /* N (input) integer scalar */ /* N is the order of the Hessenberg matrix H. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* It is assumed that H is already upper triangular */ /* in rows and columns 1:ILO-1 and IHI+1:N. */ /* LWORK (input) integer scalar */ /* The amount of workspace available. */ /* Further Details */ /* =============== */ /* Little is known about how best to choose these parameters. */ /* It is possible to use different values of the parameters */ /* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */ /* It is probably best to choose different parameters for */ /* different matrices and different parameters at different */ /* times during the iteration, but this has not been */ /* implemented --- yet. */ /* The best choices of most of the parameters depend */ /* in an ill-understood way on the relative execution */ /* rate of xLAQR3 and xLAQR5 and on the nature of each */ /* particular eigenvalue problem. Experiment may be the */ /* only practical way to determine which choices are most */ /* effective. */ /* Following is a list of default values supplied by IPARMQ. */ /* These defaults may be adjusted in order to attain better */ /* performance in any particular computational environment. */ /* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */ /* Default: 75. (Must be at least 11.) */ /* IPARMQ(ISPEC=13) Recommended deflation window size. */ /* This depends on ILO, IHI and NS, the */ /* number of simultaneous shifts returned */ /* by IPARMQ(ISPEC=15). The default for */ /* (IHI-ILO+1).LE.500 is NS. The default */ /* for (IHI-ILO+1).GT.500 is 3*NS/2. */ /* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */ /* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */ /* a multi-shift QR iteration. */ /* If IHI-ILO+1 is ... */ /* greater than ...but less ... the */ /* or equal to ... than default is */ /* 0 30 NS = 2+ */ /* 30 60 NS = 4+ */ /* 60 150 NS = 10 */ /* 150 590 NS = ** */ /* 590 3000 NS = 64 */ /* 3000 6000 NS = 128 */ /* 6000 infinity NS = 256 */ /* (+) By default matrices of this order are */ /* passed to the implicit double shift routine */ /* xLAHQR. See IPARMQ(ISPEC=12) above. These */ /* values of NS are used only in case of a rare */ /* xLAHQR failure. */ /* (**) The asterisks (**) indicate an ad-hoc */ /* function increasing from 10 to 64. */ /* IPARMQ(ISPEC=16) Select structured matrix multiply. */ /* (See ISPEC=16 above for details.) */ /* Default: 3. */ /* ================================================================ */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ if (*ispec == 15 || *ispec == 13 || *ispec == 16) { /* ==== Set the number simultaneous shifts ==== */ nh = *ihi - *ilo + 1; ns = 2; if (nh >= 30) { ns = 4; } if (nh >= 60) { ns = 10; } if (nh >= 150) { /* Computing MAX */ r__1 = log((real) nh) / log(2.f); i__1 = 10, i__2 = nh / i_nint(&r__1); ns = max(i__1,i__2); } if (nh >= 590) { ns = 64; } if (nh >= 3000) { ns = 128; } if (nh >= 6000) { ns = 256; } /* Computing MAX */ i__1 = 2, i__2 = ns - ns % 2; ns = max(i__1,i__2); } if (*ispec == 12) { /* ===== Matrices of order smaller than NMIN get sent */ /* . to xLAHQR, the classic double shift algorithm. */ /* . This must be at least 11. ==== */ ret_val = 75; } else if (*ispec == 14) { /* ==== INIBL: skip a multi-shift qr iteration and */ /* . whenever aggressive early deflation finds */ /* . at least (NIBBLE*(window size)/100) deflations. ==== */ ret_val = 14; } else if (*ispec == 15) { /* ==== NSHFTS: The number of simultaneous shifts ===== */ ret_val = ns; } else if (*ispec == 13) { /* ==== NW: deflation window size. ==== */ if (nh <= 500) { ret_val = ns; } else { ret_val = ns * 3 / 2; } } else if (*ispec == 16) { /* ==== IACC22: Whether to accumulate reflections */ /* . before updating the far-from-diagonal elements */ /* . and whether to use 2-by-2 block structure while */ /* . doing it. A small amount of work could be saved */ /* . by making this choice dependent also upon the */ /* . NH=IHI-ILO+1. */ ret_val = 0; if (ns >= 14) { ret_val = 1; } if (ns >= 14) { ret_val = 2; } } else { /* ===== invalid value of ispec ===== */ ret_val = -1; } /* ==== End of IPARMQ ==== */ return ret_val; } /* _starpu_iparmq_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/izmax1.c000066400000000000000000000054151507764646700206120ustar00rootroot00000000000000/* izmax1.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" integer _starpu_izmax1_(integer *n, doublecomplex *cx, integer *incx) { /* System generated locals */ integer ret_val, i__1; /* Builtin functions */ double z_abs(doublecomplex *); /* Local variables */ integer i__, ix; doublereal smax; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* IZMAX1 finds the index of the element whose real part has maximum */ /* absolute value. */ /* Based on IZAMAX from Level 1 BLAS. */ /* The change is to use the 'genuine' absolute value. */ /* Contributed by Nick Higham for use with ZLACON. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of elements in the vector CX. */ /* CX (input) COMPLEX*16 array, dimension (N) */ /* The vector whose elements will be summed. */ /* INCX (input) INTEGER */ /* The spacing between successive values of CX. INCX >= 1. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Statement Functions .. */ /* .. */ /* .. Statement Function definitions .. */ /* NEXT LINE IS THE ONLY MODIFICATION. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --cx; /* Function Body */ ret_val = 0; if (*n < 1) { return ret_val; } ret_val = 1; if (*n == 1) { return ret_val; } if (*incx == 1) { goto L30; } /* CODE FOR INCREMENT NOT EQUAL TO 1 */ ix = 1; smax = z_abs(&cx[1]); ix += *incx; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if (z_abs(&cx[ix]) <= smax) { goto L10; } ret_val = i__; smax = z_abs(&cx[ix]); L10: ix += *incx; /* L20: */ } return ret_val; /* CODE FOR INCREMENT EQUAL TO 1 */ L30: smax = z_abs(&cx[1]); i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { if (z_abs(&cx[i__]) <= smax) { goto L40; } ret_val = i__; smax = z_abs(&cx[i__]); L40: ; } return ret_val; /* End of IZMAX1 */ } /* _starpu_izmax1_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/lsamen.c000066400000000000000000000047111507764646700206560ustar00rootroot00000000000000/* lsamen.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" #include "string.h" logical _starpu_lsamen_(integer *n, char *ca, char *cb) { /* System generated locals */ integer i__1; logical ret_val; /* Builtin functions */ integer i_len(char *, ftnlen); /* Local variables */ integer i__; extern logical _starpu_lsame_(char *, char *); ftnlen ca_len, cb_len; ca_len = strlen (ca); cb_len = strlen (cb); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* LSAMEN tests if the first N letters of CA are the same as the */ /* first N letters of CB, regardless of case. */ /* LSAMEN returns .TRUE. if CA and CB are equivalent except for case */ /* and .FALSE. otherwise. LSAMEN also returns .FALSE. if LEN( CA ) */ /* or LEN( CB ) is less than N. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of characters in CA and CB to be compared. */ /* CA (input) CHARACTER*(*) */ /* CB (input) CHARACTER*(*) */ /* CA and CB specify two character strings of length at least N. */ /* Only the first N characters of each string will be accessed. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ ret_val = FALSE_; if (i_len(ca, ca_len) < *n || i_len(cb, cb_len) < *n) { goto L20; } /* Do for each character in the two strings. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Test if the characters are equal using LSAME. */ if (! _starpu_lsame_(ca + (i__ - 1), cb + (i__ - 1))) { goto L20; } /* L10: */ } ret_val = TRUE_; L20: return ret_val; /* End of LSAMEN */ } /* _starpu_lsamen_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/maxloc.c000066400000000000000000000030761507764646700206650ustar00rootroot00000000000000/* maxloc.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* ********************************************************************************** */ integer _starpu_smaxloc_(real *a, integer *dimm) { /* System generated locals */ integer ret_val, i__1; /* Local variables */ integer i__; real smax; /* Parameter adjustments */ --a; /* Function Body */ ret_val = 1; smax = a[1]; i__1 = *dimm; for (i__ = 2; i__ <= i__1; ++i__) { if (smax < a[i__]) { smax = a[i__]; ret_val = i__; } /* L10: */ } return ret_val; } /* _starpu_smaxloc_ */ /* ********************************************************************************** */ integer _starpu_dmaxloc_(doublereal *a, integer *dimm) { /* System generated locals */ integer ret_val, i__1; /* Local variables */ integer i__; doublereal dmax__; /* Parameter adjustments */ --a; /* Function Body */ ret_val = 1; dmax__ = a[1]; i__1 = *dimm; for (i__ = 2; i__ <= i__1; ++i__) { if (dmax__ < a[i__]) { dmax__ = a[i__]; ret_val = i__; } /* L20: */ } return ret_val; } /* _starpu_dmaxloc_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/xerbla.c000066400000000000000000000034261507764646700206560ustar00rootroot00000000000000/* xerbla.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" #include "stdio.h" /* Table of constant values */ static integer c__1 = 1; /* Subroutine */ int _starpu_xerbla_(char *srname, integer *info) { /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* XERBLA is an error handler for the LAPACK routines. */ /* It is called by an LAPACK routine if an input parameter has an */ /* invalid value. A message is printed and execution stops. */ /* Installers may consider modifying the STOP statement in order to */ /* call system-specific exception-handling facilities. */ /* Arguments */ /* ========= */ /* SRNAME (input) CHARACTER*(*) */ /* The name of the routine which called XERBLA. */ /* INFO (input) INTEGER */ /* The position of the invalid parameter in the parameter list */ /* of the calling routine. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ printf("** On entry to %s, parameter number %ld had an illegal value\n", srname, *info); /* End of XERBLA */ return 0; } /* _starpu_xerbla_ */ starpu-1.4.9+dfsg/min-dgels/base/SRC/xerbla_array.c000066400000000000000000000064361507764646700220600ustar00rootroot00000000000000/* _starpu_xerbla_array.f -- translated by f2c (version 20061008). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" #include "blaswrap.h" /* Subroutine */ int _starpu_xerbla_array__(char *srname_array__, integer * srname_len__, integer *info, ftnlen srname_array_len) { /* System generated locals */ integer i__1, i__2, i__3; /* Builtin functions */ /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer i_len(char *, ftnlen); /* Local variables */ integer i__; extern /* Subroutine */ int _starpu_xerbla_(char *, integer *); char srname[32]; /* -- LAPACK auxiliary routine (version 3.0) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* September 19, 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* XERBLA_ARRAY assists other languages in calling XERBLA, the LAPACK */ /* and BLAS error handler. Rather than taking a Fortran string argument */ /* as the function's name, XERBLA_ARRAY takes an array of single */ /* characters along with the array's length. XERBLA_ARRAY then copies */ /* up to 32 characters of that array into a Fortran string and passes */ /* that to XERBLA. If called with a non-positive SRNAME_LEN, */ /* XERBLA_ARRAY will call XERBLA with a string of all blank characters. */ /* Say some macro or other device makes XERBLA_ARRAY available to C99 */ /* by a name lapack_xerbla and with a common Fortran calling convention. */ /* Then a C99 program could invoke XERBLA via: */ /* { */ /* int flen = strlen(__func__); */ /* lapack_xerbla(__func__, &flen, &info); */ /* } */ /* Providing XERBLA_ARRAY is not necessary for intercepting LAPACK */ /* errors. XERBLA_ARRAY calls XERBLA. */ /* Arguments */ /* ========= */ /* SRNAME_ARRAY (input) CHARACTER(1) array, dimension (SRNAME_LEN) */ /* The name of the routine which called XERBLA_ARRAY. */ /* SRNAME_LEN (input) INTEGER */ /* The length of the name in SRNAME_ARRAY. */ /* INFO (input) INTEGER */ /* The position of the invalid parameter in the parameter list */ /* of the calling routine. */ /* ===================================================================== */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --srname_array__; /* Function Body */ s_copy(srname, "", (ftnlen)32, (ftnlen)0); /* Computing MIN */ i__2 = *srname_len__, i__3 = i_len(srname, (ftnlen)32); i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { *(unsigned char *)&srname[i__ - 1] = *(unsigned char *)& srname_array__[i__]; } _starpu_xerbla_(srname, info); return 0; } /* _starpu_xerbla_array__ */ starpu-1.4.9+dfsg/min-dgels/base/make.inc000066400000000000000000000042351507764646700202150ustar00rootroot00000000000000# -*- Makefile -*- #################################################################### # LAPACK make include file. # # LAPACK, Version 3.2.1 # # June 2009 # #################################################################### # # See the INSTALL/ directory for more examples. # SHELL = /bin/sh # # The machine (platform) identifier to append to the library names # PLAT = _LINUX # # Modify the FORTRAN and OPTS definitions to refer to the # compiler and desired compiler options for your machine. NOOPT # refers to the compiler options desired when NO OPTIMIZATION is # selected. Define LOADER and LOADOPTS to refer to the loader # and desired load options for your machine. # ####################################################### # This is used to compile C libary #CC = gcc # if no wrapping of the blas library is needed, uncomment next line #CC = gcc -DNO_BLAS_WRAP CFLAGS = -O3 -I$(TOPDIR)/INCLUDE -fPIC #LOADER = gcc LOADER = $(CC) LOADOPTS = NOOPT = -O0 -I$(TOPDIR)/INCLUDE DRVCFLAGS = $(CFLAGS) F2CCFLAGS = $(CFLAGS) ####################################################################### # # Timer for the SECOND and DSECND routines # # Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME # TIMER = EXT_ETIME # For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ # TIMER = EXT_ETIME_ # For gfortran compiler: SECOND and DSECND will use a call to the INTERNAL FUNCTION ETIME # TIMER = INT_ETIME # If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) # SECOND and DSECND will use a call to the Fortran standard INTERNAL FUNCTION CPU_TIME TIMER = INT_CPU_TIME # If neither of this works...you can use the NONE value... In that case, SECOND and DSECND will always return 0 # TIMER = NONE # # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS= cr RANLIB = ranlib BLASLIB=../../../build/minlibblas.a F2CLIB=../../../build/minlibf2c.a starpu-1.4.9+dfsg/mpi/000077500000000000000000000000001507764646700145755ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/GNUmakefile.in000066400000000000000000000032701507764646700172560ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # SUBDIRS= @STARPU_BUILD_EXAMPLES_TRUE@SUBDIRS += examples @STARPU_BUILD_TESTS_TRUE@SUBDIRS += tests all: @STARPU_SIMGRID_FALSE@check: check-recursive @STARPU_SIMGRID_FALSE@ : # divide by 4 the number of jobs to run in parallel, since mpirun will start 4 # processes in the tests and examples @STARPU_SIMGRID_FALSE@check-recursive: @STARPU_SIMGRID_FALSE@ RET=0 ; \ @STARPU_SIMGRID_FALSE@ NJOBS=`printf %s "$(MAKEFLAGS)" | sed -ne 's/.*-j \?\([0-9]\+\).*/\1/p'` ; \ @STARPU_SIMGRID_FALSE@ JOBS="" ; \ @STARPU_SIMGRID_FALSE@ if [ -n "$$NJOBS" ] ; then \ @STARPU_SIMGRID_FALSE@ if [ "$$NJOBS" -ge 4 ] ; then \ @STARPU_SIMGRID_FALSE@ JOBS="-j$$(($$NJOBS / 4))" ; \ @STARPU_SIMGRID_FALSE@ else \ @STARPU_SIMGRID_FALSE@ JOBS="-j1" ; \ @STARPU_SIMGRID_FALSE@ fi ; \ @STARPU_SIMGRID_FALSE@ fi ; \ @STARPU_SIMGRID_FALSE@ for i in $(SUBDIRS) ; do \ @STARPU_SIMGRID_FALSE@ $(MAKE) check -C $$i MAKEFLAGS="$(MAKEFLAGS) $$JOBS" || RET=1; \ @STARPU_SIMGRID_FALSE@ done ; \ @STARPU_SIMGRID_FALSE@ exit $$RET %: force @$(MAKE) -f Makefile $@ force: ; starpu-1.4.9+dfsg/mpi/Makefile.am000066400000000000000000000024271507764646700166360ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-subdirtests.mk SUBDIRS=src tools if STARPU_BUILD_EXAMPLES SUBDIRS += examples endif if STARPU_BUILD_TESTS SUBDIRS += tests endif EXTRA_DIST = \ dev/starpu_mpi_comm_check.sh pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = packages/libstarpumpi.pc packages/starpumpi-1.0.pc packages/starpumpi-1.1.pc packages/starpumpi-1.2.pc packages/starpumpi-1.3.pc packages/starpumpi-1.4.pc versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = \ include/starpu_mpi.h \ include/starpu_mpi_ft.h \ include/starpu_mpi_lb.h \ include/fstarpu_mpi_mod.f90 starpu-1.4.9+dfsg/mpi/Makefile.in000066400000000000000000000763221507764646700166540ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_EXAMPLES_TRUE@am__append_1 = examples @STARPU_BUILD_TESTS_TRUE@am__append_2 = tests subdir = mpi ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(versinclude_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = GNUmakefile CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgconfigdir)" \ "$(DESTDIR)$(versincludedir)" DATA = $(pkgconfig_DATA) HEADERS = $(versinclude_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = src tools examples tests am__DIST_COMMON = $(srcdir)/GNUmakefile.in $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-subdirtests.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = src tools $(am__append_1) $(am__append_2) EXTRA_DIST = \ dev/starpu_mpi_comm_check.sh pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = packages/libstarpumpi.pc packages/starpumpi-1.0.pc packages/starpumpi-1.1.pc packages/starpumpi-1.2.pc packages/starpumpi-1.3.pc packages/starpumpi-1.4.pc versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = \ include/starpu_mpi.h \ include/starpu_mpi_ft.h \ include/starpu_mpi_lb.h \ include/fstarpu_mpi_mod.f90 all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign mpi/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): GNUmakefile: $(top_builddir)/config.status $(srcdir)/GNUmakefile.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgconfigDATA: $(pkgconfig_DATA) @$(NORMAL_INSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ done uninstall-pkgconfigDATA: @$(NORMAL_UNINSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) install-versincludeHEADERS: $(versinclude_HEADERS) @$(NORMAL_INSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ done uninstall-versincludeHEADERS: @$(NORMAL_UNINSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(DATA) $(HEADERS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(versincludedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-pkgconfigDATA install-versincludeHEADERS install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-pkgconfigDATA uninstall-versincludeHEADERS .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-pkgconfigDATA install-ps \ install-ps-am install-strip install-versincludeHEADERS \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags tags-am uninstall uninstall-am uninstall-pkgconfigDATA \ uninstall-versincludeHEADERS .PRECIOUS: Makefile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/mpi/dev/000077500000000000000000000000001507764646700153535ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/dev/starpu_mpi_comm_check.sh000077500000000000000000000062431507764646700222520ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script to check MPI communications are done properly # The application should be launched with STARPU_MPI_COMM=1 # e.g # $ export STARPU_MPI_COMM=1 # $ mpirun --output-filename starpu_mpi.log appli parameters # and then the script can be launched with the output files # $ starpu_mpi_comm_check.sh starpu_mpi.log.* if test -z "$1" then echo Syntax error: parameter missing exit 1 fi # Get the nodes identifiers nodes=$(for f in $* do grep starpu_mpi $f | grep '\[' | awk '{print $1}'| sed 's/\[\(.*\)\]\[starpu_mpi\]/\1/' | grep "^[[:digit:]]*$" done |sort|uniq ) echo nodes $nodes DIR=/tmp # for each node, extract send and receive communications for node in $nodes do for f in $* do grep starpu_mpi $f |grep "\[$node" done > $DIR/starpu_mpi_node$node.log grep -- "-->" $DIR/starpu_mpi_node$node.log > $DIR/starpu_mpi_node${node}_send.log grep -- "<--" $DIR/starpu_mpi_node$node.log > $DIR/starpu_mpi_node${node}_recv.log done # count the number of traced lines #for node in $nodes #do # wc -l $DIR/starpu_mpi_node${node}_recv.log # lines=$(grep :42:42 $DIR/starpu_mpi_node${node}_recv.log | wc -l) # lines2=$(( lines + lines )) # echo $lines2 # lines3=$(( lines2 + lines )) # echo $lines3 #done # for each pair of nodes, check tags are sent and received in the same order for src in $nodes do for dst in $nodes do if test $src != $dst then grep ":$dst:42:" $DIR/starpu_mpi_node${src}_send.log| awk -F':' '{print $6}' > $DIR/node${src}_send_to_${dst}.log grep ":$src:42:" $DIR/starpu_mpi_node${dst}_recv.log|awk -F ':' '{print $6}'> $DIR/node${dst}_recv_from_${src}.log diff --side-by-side --suppress-common-lines $DIR/node${src}_send_to_${dst}.log $DIR/node${dst}_recv_from_${src}.log > $DIR/check_$$ if test -s $DIR/check_$$ then echo $src $dst less $DIR/check_$$ fi fi done done # check each envelope reception is followed by the appropriate data reception # first line: MPI_Recv of the envelope # second line: display envelope information # third line: MPI_Recv of the data for node in $nodes do echo processing $DIR/starpu_mpi_node${node}_recv.log ( while read line do read line2 read line3 #echo processing tag2=$(echo $line2 | awk -F ':' '{print $6}') tag3=$(echo $line3 | awk -F ':' '{print $6}') if test "$tag2" != "$tag3" then echo erreur echo $tag2 $tag3 echo $line echo $line2 echo $line3 fi done ) < $DIR/starpu_mpi_node${node}_recv.log done starpu-1.4.9+dfsg/mpi/examples/000077500000000000000000000000001507764646700164135ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/Makefile.am000066400000000000000000000373671507764646700204670ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Thibaut Lambert # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk CC=$(MPICC) CCLD=$(MPICC) FC=$(MPIFORT) FCLD=$(MPIFORT) include $(top_srcdir)/make/starpu-loader.mk LAUNCHER = $(STARPU_MPIEXEC) LAUNCHER_ENV = $(MPI_RUN_ENV) if STARPU_SIMGRID LOADER_BIN = $(LAUNCHER) endif if STARPU_MPI_CHECK TESTS = $(starpu_mpi_EXAMPLES) endif check_PROGRAMS = $(LOADER) $(starpu_mpi_EXAMPLES) starpu_mpi_EXAMPLES = BUILT_SOURCES = CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log *.mod native_fortran/fstarpu_mod.f90 native_fortran/fstarpu_mpi_mod.f90 EXTRA_DIST = \ mpi_lu/mpi_lu-float.h \ mpi_lu/mpi_lu-double.h \ mpi_lu/plu_example.c \ mpi_lu/plu_implicit_example.c \ mpi_lu/plu_outofcore_example.c \ mpi_lu/plu_solve.c \ mpi_lu/pxlu.h \ mpi_lu/pxlu.c \ mpi_lu/pxlu_implicit.c \ mpi_lu/pxlu_kernels.h \ mpi_lu/pxlu_kernels.c \ matrix_mult/mm_2dbc.c \ native_fortran/nf_mm_2dbc.f90 \ matrix_decomposition/mpi_cholesky.h \ matrix_decomposition/mpi_cholesky_codelets.h \ matrix_decomposition/mpi_cholesky_kernels.h \ matrix_decomposition/mpi_cholesky_models.h \ matrix_decomposition/mpi_decomposition_params.h \ matrix_decomposition/mpi_decomposition_matrix.h \ user_datatype/my_interface.h \ benchs/abstract_sendrecv_bench.h\ benchs/bench_helper.h \ benchs/gemm_helper.h \ benchs/burst_helper.h \ helper.h \ perf.sh examplebindir = $(libdir)/starpu/mpi examplebin_PROGRAMS = AM_CFLAGS += $(MAGMA_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS += $(MAGMA_CFLAGS) $(APP_CXXFLAGS) AM_FFLAGS += $(MAGMA_CFLAGS) $(APP_FFLAGS) AM_FCFLAGS += $(MAGMA_CFLAGS) $(APP_FCFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) LIBS += $(STARPU_CUDA_LDFLAGS) LIBS += -lm $(MAGMA_LIBS) ################### # Stencil example # ################### examplebin_PROGRAMS += \ stencil/stencil5 starpu_mpi_EXAMPLES += \ stencil/stencil5 if STARPU_USE_MPI_MPI examplebin_PROGRAMS += \ stencil/stencil5_lb starpu_mpi_EXAMPLES += \ stencil/stencil5_lb endif ################## # Cache examples # ################## examplebin_PROGRAMS += \ cache/cache \ cache/cache_disable starpu_mpi_EXAMPLES += \ cache/cache \ cache/cache_disable ################## # MPI LU example # ################## if !STARPU_NO_BLAS_LIB examplebin_PROGRAMS += \ mpi_lu/plu_example_float \ mpi_lu/plu_example_double \ mpi_lu/plu_implicit_example_float \ mpi_lu/plu_implicit_example_double \ mpi_lu/plu_outofcore_example_float \ mpi_lu/plu_outofcore_example_double if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ mpi_lu/plu_example_float \ mpi_lu/plu_example_double \ mpi_lu/plu_implicit_example_float \ mpi_lu/plu_implicit_example_double \ mpi_lu/plu_outofcore_example_float \ mpi_lu/plu_outofcore_example_double endif mpi_lu_plu_example_float_LDADD = \ $(STARPU_LIBNUMA_LDFLAGS) \ $(STARPU_BLAS_LDFLAGS) -lm mpi_lu_plu_example_float_SOURCES = \ mpi_lu/plu_example_float.c \ mpi_lu/plu_solve_float.c \ mpi_lu/pslu_kernels.c \ mpi_lu/pslu.c \ ../../examples/common/blas.c mpi_lu_plu_example_double_LDADD = \ $(STARPU_LIBNUMA_LDFLAGS) \ $(STARPU_BLAS_LDFLAGS) -lm mpi_lu_plu_example_double_SOURCES = \ mpi_lu/plu_example_double.c \ mpi_lu/plu_solve_double.c \ mpi_lu/pdlu_kernels.c \ mpi_lu/pdlu.c \ ../../examples/common/blas.c mpi_lu_plu_implicit_example_float_LDADD = \ $(STARPU_LIBNUMA_LDFLAGS) \ $(STARPU_BLAS_LDFLAGS) -lm mpi_lu_plu_implicit_example_float_SOURCES = \ mpi_lu/plu_implicit_example_float.c \ mpi_lu/plu_solve_float.c \ mpi_lu/pslu_kernels.c \ mpi_lu/pslu_implicit.c \ ../../examples/common/blas.c mpi_lu_plu_implicit_example_double_LDADD = \ $(STARPU_LIBNUMA_LDFLAGS) \ $(STARPU_BLAS_LDFLAGS) -lm mpi_lu_plu_implicit_example_double_SOURCES = \ mpi_lu/plu_implicit_example_double.c \ mpi_lu/plu_solve_double.c \ mpi_lu/pdlu_kernels.c \ mpi_lu/pdlu_implicit.c \ ../../examples/common/blas.c mpi_lu_plu_outofcore_example_float_LDADD = \ $(STARPU_LIBNUMA_LDFLAGS) \ $(STARPU_BLAS_LDFLAGS) -lm mpi_lu_plu_outofcore_example_float_SOURCES = \ mpi_lu/plu_outofcore_example_float.c \ mpi_lu/plu_solve_float.c \ mpi_lu/pslu_kernels.c \ mpi_lu/pslu_implicit.c \ ../../examples/common/blas.c mpi_lu_plu_outofcore_example_double_LDADD = \ $(STARPU_LIBNUMA_LDFLAGS) \ $(STARPU_BLAS_LDFLAGS) -lm mpi_lu_plu_outofcore_example_double_SOURCES = \ mpi_lu/plu_outofcore_example_double.c \ mpi_lu/plu_solve_double.c \ mpi_lu/pdlu_kernels.c \ mpi_lu/pdlu_implicit.c \ ../../examples/common/blas.c endif ######################## # MPI Cholesky example # ######################## if !STARPU_NO_BLAS_LIB examplebin_PROGRAMS += \ matrix_decomposition/mpi_cholesky \ matrix_decomposition/mpi_cholesky_distributed matrix_decomposition_mpi_cholesky_SOURCES = \ matrix_decomposition/mpi_cholesky.c \ matrix_decomposition/mpi_cholesky_models.c \ matrix_decomposition/mpi_cholesky_kernels.c \ matrix_decomposition/mpi_cholesky_codelets.c \ matrix_decomposition/mpi_decomposition_params.c \ matrix_decomposition/mpi_decomposition_matrix.c \ ../../examples/common/blas.c matrix_decomposition_mpi_cholesky_LDADD = \ $(STARPU_BLAS_LDFLAGS) -lm matrix_decomposition_mpi_cholesky_distributed_SOURCES = \ matrix_decomposition/mpi_cholesky_distributed.c \ matrix_decomposition/mpi_cholesky_models.c \ matrix_decomposition/mpi_cholesky_kernels.c \ matrix_decomposition/mpi_cholesky_codelets.c \ matrix_decomposition/mpi_decomposition_params.c \ matrix_decomposition/mpi_decomposition_matrix.c \ ../../examples/common/blas.c matrix_decomposition_mpi_cholesky_distributed_LDADD = \ $(STARPU_BLAS_LDFLAGS) -lm starpu_mpi_EXAMPLES += \ matrix_decomposition/mpi_cholesky \ matrix_decomposition/mpi_cholesky_distributed endif ############## # CG example # ############## if !STARPU_SIMGRID if !STARPU_NO_BLAS_LIB examplebin_PROGRAMS += cg/cg cg_cg_SOURCES = \ cg/cg.c \ ../../examples/common/blas.c cg_cg_LDADD = \ $(STARPU_BLAS_LDFLAGS) endif endif ############################ # MPI Matrix mult examples # ############################ examplebin_PROGRAMS += \ matrix_mult/mm matrix_mult_mm_LDADD = \ -lm if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ matrix_mult/mm endif if !STARPU_NO_BLAS_LIB examplebin_PROGRAMS += \ matrix_mult/mm_2dbc matrix_mult_mm_2dbc_SOURCES = \ matrix_mult/mm_2dbc.c \ ../../examples/common/blas.c matrix_mult_mm_2dbc_LDADD = \ $(STARPU_LIBNUMA_LDFLAGS) \ $(STARPU_BLAS_LDFLAGS) -lm if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ matrix_mult/mm_2dbc endif endif ######################## # MPI STARPU_MPI_REDUX # ######################## examplebin_PROGRAMS += \ mpi_redux/mpi_redux \ mpi_redux/mpi_redux_autowrapup \ mpi_redux/mpi_redux_tree mpi_redux_mpi_redux_LDADD = \ -lm mpi_redux_mpi_redux_autowrapup_LDADD = \ -lm mpi_redux_mpi_redux_tree_LDADD = \ -lm if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ mpi_redux/mpi_redux \ mpi_redux/mpi_redux_autowrapup \ mpi_redux/mpi_redux_tree endif ########################################## # Native Fortran MPI Matrix mult example # ########################################## if STARPU_HAVE_MPIFORT if !STARPU_SANITIZE examplebin_PROGRAMS += \ native_fortran/nf_mm \ native_fortran/nf_mm_task_build \ native_fortran/nf_basic_ring native_fortran_nf_mm_SOURCES = \ native_fortran/nf_mm_cl.f90 \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_mm.f90 native_fortran_nf_mm_LDADD = \ -lm native_fortran_nf_mm_task_build_SOURCES = \ native_fortran/nf_mm_cl.f90 \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_mm_task_build.f90 native_fortran_nf_mm_task_build_LDADD = \ -lm native_fortran_nf_basic_ring_SOURCES = \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_basic_ring.f90 native_fortran_nf_basic_ring_LDADD = \ -lm if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ native_fortran/nf_mm \ native_fortran/nf_mm_task_build \ native_fortran/nf_basic_ring endif if !STARPU_NO_BLAS_LIB if STARPU_HAVE_LIBLAPACK examplebin_PROGRAMS += \ native_fortran/nf_mm_2dbc native_fortran_nf_mm_2dbc_SOURCES = \ native_fortran/nf_mm_cl_blas.f90 \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_mm_2dbc.f90 native_fortran_nf_mm_2dbc_LDADD = \ $(STARPU_LIBNUMA_LDFLAGS) \ $(STARPU_BLAS_LDFLAGS) -lm -llapack if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ native_fortran/nf_mm_2dbc endif endif endif endif endif ######################################## # Native Fortran MPI STARPU_REDUX test # ######################################## if STARPU_HAVE_MPIFORT if !STARPU_SANITIZE examplebin_PROGRAMS += \ native_fortran/nf_mpi_redux \ native_fortran/nf_mpi_redux_tree native_fortran_nf_mpi_redux_SOURCES = \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_mpi_redux.f90 native_fortran_nf_mpi_redux_LDADD = \ -lm native_fortran_nf_mpi_redux_tree_SOURCES = \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_mpi_redux_tree.f90 native_fortran_nf_mpi_redux_tree_LDADD = \ -lm if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ native_fortran/nf_mpi_redux \ native_fortran/nf_mpi_redux_tree endif endif endif ######################################## # Native Fortran MPI STARPU_REDUX test # ######################################## if STARPU_HAVE_MPIFORT if !STARPU_SANITIZE examplebin_PROGRAMS += \ native_fortran/nf_redux_test native_fortran_nf_redux_test_SOURCES = \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_redux_test.f90 native_fortran_nf_redux_test_LDADD = \ -lm if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ native_fortran/nf_redux_test endif endif endif ################### # complex example # ################### examplebin_PROGRAMS += \ complex/mpi_complex complex_mpi_complex_SOURCES = \ complex/mpi_complex.c \ ../../examples/interface/complex_interface.c starpu_mpi_EXAMPLES += \ complex/mpi_complex ######################### # user_datatype example # ######################### examplebin_PROGRAMS += \ user_datatype/user_datatype \ user_datatype/user_datatype2 \ user_datatype/user_datatype_early \ user_datatype/user_datatype_interface user_datatype_user_datatype_SOURCES = \ user_datatype/user_datatype.c \ user_datatype/my_interface.c user_datatype_user_datatype2_SOURCES = \ user_datatype/user_datatype2.c \ user_datatype/my_interface.c user_datatype_user_datatype_early_SOURCES = \ user_datatype/user_datatype_early.c \ user_datatype/my_interface.c user_datatype_user_datatype_interface_SOURCES = \ user_datatype/user_datatype_interface.c \ user_datatype/my_interface.c if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ user_datatype/user_datatype2 \ user_datatype/user_datatype_early \ user_datatype/user_datatype \ user_datatype/user_datatype_interface endif ################### # comm example # ################### examplebin_PROGRAMS += \ comm/comm \ comm/mix_comm \ comm/group if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ comm/comm \ comm/mix_comm \ comm/group endif ################## # filter example # ################## examplebin_PROGRAMS += \ filters/filter if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ filters/filter endif # Native Fortran example # - link over source file to build our own object native_fortran/fstarpu_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ native_fortran/fstarpu_mpi_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/mpi/include/$(notdir $@) $@ if STARPU_HAVE_MPIFORT if !STARPU_SANITIZE # - express the creation of .mod along .o fstarpu_mod.mod: native_fortran/fstarpu_mod.o fstarpu_mpi_mod.mod: native_fortran/fstarpu_mpi_mod.o nf_mm_cl.mod: native_fortran/nf_mm_cl.o nf_mm_cl_blas.mod: native_fortran/nf_mm_cl_blas.o # - list explicit dependences to control proper module files dependencies native_fortran/fstarpu_mpi_mod.o: fstarpu_mod.mod native_fortran/nf_mm_cl.o: fstarpu_mod.mod fstarpu_mpi_mod.mod native_fortran/nf_mm_cl_blas.o: fstarpu_mod.mod fstarpu_mpi_mod.mod native_fortran/nf_mm.o: nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod native_fortran/nf_mm_2dbc.o: nf_mm_cl.mod nf_mm_cl_blas.mod fstarpu_mpi_mod.mod fstarpu_mod.mod native_fortran/nf_mm_task_build.o: nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod native_fortran/nf_basic_ring.o: fstarpu_mpi_mod.mod fstarpu_mod.mod native_fortran/nf_redux_test.o: fstarpu_mpi_mod.mod fstarpu_mod.mod native_fortran/nf_mpi_redux.o: fstarpu_mpi_mod.mod fstarpu_mod.mod native_fortran/nf_mpi_redux_tree.o: fstarpu_mpi_mod.mod fstarpu_mod.mod endif endif ########## # benchs # ########## examplebin_PROGRAMS += \ benchs/sendrecv_bench \ benchs/burst if !STARPU_USE_MPI_MPI examplebin_PROGRAMS += \ benchs/sendrecv_parallel_tasks_bench endif if !STARPU_NO_BLAS_LIB examplebin_PROGRAMS += \ benchs/sendrecv_gemm_bench \ benchs/burst_gemm endif if !STARPU_SIMGRID starpu_mpi_EXAMPLES += \ benchs/sendrecv_bench \ benchs/burst if STARPU_MPI_SYNC_CLOCKS examplebin_PROGRAMS += \ benchs/bcast_bench \ benchs/recv_wait_finalize_bench starpu_mpi_EXAMPLES += \ benchs/bcast_bench \ benchs/recv_wait_finalize_bench endif if !STARPU_USE_MPI_MPI starpu_mpi_EXAMPLES += \ benchs/sendrecv_parallel_tasks_bench endif if !STARPU_NO_BLAS_LIB starpu_mpi_EXAMPLES += \ benchs/sendrecv_gemm_bench \ benchs/burst_gemm endif endif benchs_sendrecv_bench_SOURCES = benchs/sendrecv_bench.c benchs_sendrecv_bench_SOURCES += benchs/bench_helper.c benchs_sendrecv_bench_SOURCES += benchs/abstract_sendrecv_bench.c benchs_bcast_bench_SOURCES = benchs/bcast_bench.c benchs_bcast_bench_SOURCES += benchs/bench_helper.c benchs_bcast_bench_LDADD = $(MPI_SYNC_CLOCKS_LIBS) benchs_bcast_bench_CFLAGS = $(MPI_SYNC_CLOCKS_CFLAGS) benchs_recv_wait_finalize_bench_SOURCES = benchs/recv_wait_finalize_bench.c benchs_recv_wait_finalize_bench_LDADD = $(MPI_SYNC_CLOCKS_LIBS) benchs_recv_wait_finalize_bench_CFLAGS = $(MPI_SYNC_CLOCKS_CFLAGS) benchs_sendrecv_parallel_tasks_bench_SOURCES = benchs/sendrecv_parallel_tasks_bench.c benchs_sendrecv_parallel_tasks_bench_SOURCES += benchs/bench_helper.c benchs_burst_SOURCES = benchs/burst.c benchs_burst_SOURCES += benchs/burst_helper.c if !STARPU_NO_BLAS_LIB benchs_sendrecv_gemm_bench_SOURCES = benchs/sendrecv_gemm_bench.c benchs_sendrecv_gemm_bench_SOURCES += benchs/bench_helper.c benchs_sendrecv_gemm_bench_SOURCES += benchs/gemm_helper.c benchs_sendrecv_gemm_bench_SOURCES += benchs/abstract_sendrecv_bench.c benchs_sendrecv_gemm_bench_SOURCES += ../../examples/common/blas.c benchs_sendrecv_gemm_bench_LDADD = $(STARPU_BLAS_LDFLAGS) benchs_burst_gemm_SOURCES = benchs/burst_gemm.c benchs_burst_gemm_SOURCES += benchs/gemm_helper.c benchs_burst_gemm_SOURCES += benchs/burst_helper.c benchs_burst_gemm_SOURCES += ../../examples/common/blas.c benchs_burst_gemm_LDADD = $(STARPU_BLAS_LDFLAGS) endif starpu-1.4.9+dfsg/mpi/examples/Makefile.in000066400000000000000000005054461507764646700204760ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_23) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader @STARPU_MPI_CHECK_TRUE@TESTS = $(am__EXEEXT_14) check_PROGRAMS = $(am__EXEEXT_14) examplebin_PROGRAMS = stencil/stencil5$(EXEEXT) $(am__EXEEXT_1) \ cache/cache$(EXEEXT) cache/cache_disable$(EXEEXT) \ $(am__EXEEXT_15) $(am__EXEEXT_16) matrix_mult/mm$(EXEEXT) \ $(am__EXEEXT_17) mpi_redux/mpi_redux$(EXEEXT) \ mpi_redux/mpi_redux_autowrapup$(EXEEXT) \ mpi_redux/mpi_redux_tree$(EXEEXT) $(am__EXEEXT_18) \ $(am__EXEEXT_19) $(am__EXEEXT_20) complex/mpi_complex$(EXEEXT) \ user_datatype/user_datatype$(EXEEXT) \ user_datatype/user_datatype2$(EXEEXT) \ user_datatype/user_datatype_early$(EXEEXT) \ user_datatype/user_datatype_interface$(EXEEXT) \ comm/comm$(EXEEXT) comm/mix_comm$(EXEEXT) comm/group$(EXEEXT) \ filters/filter$(EXEEXT) benchs/sendrecv_bench$(EXEEXT) \ benchs/burst$(EXEEXT) $(am__EXEEXT_21) $(am__EXEEXT_22) \ $(am__EXEEXT_11) @STARPU_USE_MPI_MPI_TRUE@am__append_8 = \ @STARPU_USE_MPI_MPI_TRUE@ stencil/stencil5_lb @STARPU_USE_MPI_MPI_TRUE@am__append_9 = \ @STARPU_USE_MPI_MPI_TRUE@ stencil/stencil5_lb ################## # MPI LU example # ################## ######################## # MPI Cholesky example # ######################## @STARPU_NO_BLAS_LIB_FALSE@am__append_10 = mpi_lu/plu_example_float \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_double \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_float \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_double \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_float \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_double \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_11 = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_example_float \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_example_double \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_implicit_example_float \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_implicit_example_double \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_outofcore_example_float \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_outofcore_example_double @STARPU_NO_BLAS_LIB_FALSE@am__append_12 = \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed ############## # CG example # ############## @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_13 = cg/cg @STARPU_SIMGRID_FALSE@am__append_14 = \ @STARPU_SIMGRID_FALSE@ matrix_mult/mm @STARPU_NO_BLAS_LIB_FALSE@am__append_15 = \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_mult/mm_2dbc @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_16 = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ matrix_mult/mm_2dbc @STARPU_SIMGRID_FALSE@am__append_17 = \ @STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux \ @STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux_autowrapup \ @STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux_tree ########################################## # Native Fortran MPI Matrix mult example # ########################################## @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am__append_18 = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_task_build \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_basic_ring @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_19 = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mm \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mm_task_build \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_basic_ring @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@am__append_20 = \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_2dbc @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_21 = \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mm_2dbc ######################################## # Native Fortran MPI STARPU_REDUX test # ######################################## ######################################## # Native Fortran MPI STARPU_REDUX test # ######################################## @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am__append_22 = native_fortran/nf_mpi_redux \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux_tree \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_redux_test @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_23 = native_fortran/nf_mpi_redux \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mpi_redux_tree \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_redux_test @STARPU_SIMGRID_FALSE@am__append_24 = user_datatype/user_datatype2 \ @STARPU_SIMGRID_FALSE@ user_datatype/user_datatype_early \ @STARPU_SIMGRID_FALSE@ user_datatype/user_datatype \ @STARPU_SIMGRID_FALSE@ user_datatype/user_datatype_interface \ @STARPU_SIMGRID_FALSE@ comm/comm comm/mix_comm comm/group \ @STARPU_SIMGRID_FALSE@ filters/filter benchs/sendrecv_bench \ @STARPU_SIMGRID_FALSE@ benchs/burst @STARPU_USE_MPI_MPI_FALSE@am__append_25 = \ @STARPU_USE_MPI_MPI_FALSE@ benchs/sendrecv_parallel_tasks_bench @STARPU_NO_BLAS_LIB_FALSE@am__append_26 = \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/sendrecv_gemm_bench \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_gemm @STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@am__append_27 = \ @STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/bcast_bench \ @STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/recv_wait_finalize_bench @STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@am__append_28 = \ @STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/bcast_bench \ @STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/recv_wait_finalize_bench @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_FALSE@am__append_29 = \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_FALSE@ benchs/sendrecv_parallel_tasks_bench @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__append_30 = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ benchs/sendrecv_gemm_bench \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ benchs/burst_gemm subdir = mpi/examples ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = @STARPU_USE_MPI_MPI_TRUE@am__EXEEXT_1 = stencil/stencil5_lb$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_2 = mpi_lu/plu_example_float$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_example_double$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_implicit_example_float$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_implicit_example_double$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_outofcore_example_float$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ mpi_lu/plu_outofcore_example_double$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_3 = matrix_decomposition/mpi_cholesky$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed$(EXEEXT) @STARPU_SIMGRID_FALSE@am__EXEEXT_4 = matrix_mult/mm$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_5 = matrix_mult/mm_2dbc$(EXEEXT) @STARPU_SIMGRID_FALSE@am__EXEEXT_6 = mpi_redux/mpi_redux$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux_autowrapup$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ mpi_redux/mpi_redux_tree$(EXEEXT) @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_7 = native_fortran/nf_mm$(EXEEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mm_task_build$(EXEEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_basic_ring$(EXEEXT) @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_8 = native_fortran/nf_mm_2dbc$(EXEEXT) @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_9 = native_fortran/nf_mpi_redux$(EXEEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_mpi_redux_tree$(EXEEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ native_fortran/nf_redux_test$(EXEEXT) @STARPU_SIMGRID_FALSE@am__EXEEXT_10 = \ @STARPU_SIMGRID_FALSE@ user_datatype/user_datatype2$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ user_datatype/user_datatype_early$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ user_datatype/user_datatype$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ user_datatype/user_datatype_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ comm/comm$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ comm/mix_comm$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ comm/group$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ filters/filter$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ benchs/sendrecv_bench$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ benchs/burst$(EXEEXT) @STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_11 = benchs/bcast_bench$(EXEEXT) \ @STARPU_MPI_SYNC_CLOCKS_TRUE@@STARPU_SIMGRID_FALSE@ benchs/recv_wait_finalize_bench$(EXEEXT) @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_FALSE@am__EXEEXT_12 = benchs/sendrecv_parallel_tasks_bench$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_13 = benchs/sendrecv_gemm_bench$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ benchs/burst_gemm$(EXEEXT) am__EXEEXT_14 = stencil/stencil5$(EXEEXT) $(am__EXEEXT_1) \ cache/cache$(EXEEXT) cache/cache_disable$(EXEEXT) \ $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \ $(am__EXEEXT_5) $(am__EXEEXT_6) $(am__EXEEXT_7) \ $(am__EXEEXT_8) $(am__EXEEXT_9) complex/mpi_complex$(EXEEXT) \ $(am__EXEEXT_10) $(am__EXEEXT_11) $(am__EXEEXT_12) \ $(am__EXEEXT_13) @STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_15 = \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_float$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_double$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_float$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_double$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_float$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_double$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_16 = cg/cg$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_17 = \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_mult/mm_2dbc$(EXEEXT) @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am__EXEEXT_18 = native_fortran/nf_mm$(EXEEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_task_build$(EXEEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_basic_ring$(EXEEXT) @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@am__EXEEXT_19 = native_fortran/nf_mm_2dbc$(EXEEXT) @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am__EXEEXT_20 = native_fortran/nf_mpi_redux$(EXEEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux_tree$(EXEEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_redux_test$(EXEEXT) @STARPU_USE_MPI_MPI_FALSE@am__EXEEXT_21 = benchs/sendrecv_parallel_tasks_bench$(EXEEXT) @STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_22 = \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/sendrecv_gemm_bench$(EXEEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_gemm$(EXEEXT) am__installdirs = "$(DESTDIR)$(examplebindir)" @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_23 = loader$(EXEEXT) PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) am__dirstamp = $(am__leading_dot)dirstamp am_benchs_bcast_bench_OBJECTS = \ benchs/bcast_bench-bcast_bench.$(OBJEXT) \ benchs/bcast_bench-bench_helper.$(OBJEXT) benchs_bcast_bench_OBJECTS = $(am_benchs_bcast_bench_OBJECTS) am__DEPENDENCIES_1 = benchs_bcast_bench_DEPENDENCIES = $(am__DEPENDENCIES_1) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = benchs_bcast_bench_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(benchs_bcast_bench_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ am_benchs_burst_OBJECTS = benchs/burst.$(OBJEXT) \ benchs/burst_helper.$(OBJEXT) benchs_burst_OBJECTS = $(am_benchs_burst_OBJECTS) benchs_burst_LDADD = $(LDADD) am__benchs_burst_gemm_SOURCES_DIST = benchs/burst_gemm.c \ benchs/gemm_helper.c benchs/burst_helper.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_benchs_burst_gemm_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_gemm.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/gemm_helper.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_helper.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) benchs_burst_gemm_OBJECTS = $(am_benchs_burst_gemm_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@benchs_burst_gemm_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am_benchs_recv_wait_finalize_bench_OBJECTS = benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.$(OBJEXT) benchs_recv_wait_finalize_bench_OBJECTS = \ $(am_benchs_recv_wait_finalize_bench_OBJECTS) benchs_recv_wait_finalize_bench_DEPENDENCIES = $(am__DEPENDENCIES_1) benchs_recv_wait_finalize_bench_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ am_benchs_sendrecv_bench_OBJECTS = benchs/sendrecv_bench.$(OBJEXT) \ benchs/bench_helper.$(OBJEXT) \ benchs/abstract_sendrecv_bench.$(OBJEXT) benchs_sendrecv_bench_OBJECTS = $(am_benchs_sendrecv_bench_OBJECTS) benchs_sendrecv_bench_LDADD = $(LDADD) am__benchs_sendrecv_gemm_bench_SOURCES_DIST = \ benchs/sendrecv_gemm_bench.c benchs/bench_helper.c \ benchs/gemm_helper.c benchs/abstract_sendrecv_bench.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_benchs_sendrecv_gemm_bench_OBJECTS = benchs/sendrecv_gemm_bench.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/bench_helper.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/gemm_helper.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/abstract_sendrecv_bench.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) benchs_sendrecv_gemm_bench_OBJECTS = \ $(am_benchs_sendrecv_gemm_bench_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@benchs_sendrecv_gemm_bench_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am_benchs_sendrecv_parallel_tasks_bench_OBJECTS = \ benchs/sendrecv_parallel_tasks_bench.$(OBJEXT) \ benchs/bench_helper.$(OBJEXT) benchs_sendrecv_parallel_tasks_bench_OBJECTS = \ $(am_benchs_sendrecv_parallel_tasks_bench_OBJECTS) benchs_sendrecv_parallel_tasks_bench_LDADD = $(LDADD) cache_cache_SOURCES = cache/cache.c cache_cache_OBJECTS = cache/cache.$(OBJEXT) cache_cache_LDADD = $(LDADD) cache_cache_disable_SOURCES = cache/cache_disable.c cache_cache_disable_OBJECTS = cache/cache_disable.$(OBJEXT) cache_cache_disable_LDADD = $(LDADD) am__cg_cg_SOURCES_DIST = cg/cg.c ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@am_cg_cg_OBJECTS = cg/cg.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ ../../examples/common/blas.$(OBJEXT) cg_cg_OBJECTS = $(am_cg_cg_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@cg_cg_DEPENDENCIES = $(am__DEPENDENCIES_1) comm_comm_SOURCES = comm/comm.c comm_comm_OBJECTS = comm/comm.$(OBJEXT) comm_comm_LDADD = $(LDADD) comm_group_SOURCES = comm/group.c comm_group_OBJECTS = comm/group.$(OBJEXT) comm_group_LDADD = $(LDADD) comm_mix_comm_SOURCES = comm/mix_comm.c comm_mix_comm_OBJECTS = comm/mix_comm.$(OBJEXT) comm_mix_comm_LDADD = $(LDADD) am_complex_mpi_complex_OBJECTS = complex/mpi_complex.$(OBJEXT) \ ../../examples/interface/complex_interface.$(OBJEXT) complex_mpi_complex_OBJECTS = $(am_complex_mpi_complex_OBJECTS) complex_mpi_complex_LDADD = $(LDADD) filters_filter_SOURCES = filters/filter.c filters_filter_OBJECTS = filters/filter.$(OBJEXT) filters_filter_LDADD = $(LDADD) loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) am__matrix_decomposition_mpi_cholesky_SOURCES_DIST = \ matrix_decomposition/mpi_cholesky.c \ matrix_decomposition/mpi_cholesky_models.c \ matrix_decomposition/mpi_cholesky_kernels.c \ matrix_decomposition/mpi_cholesky_codelets.c \ matrix_decomposition/mpi_decomposition_params.c \ matrix_decomposition/mpi_decomposition_matrix.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_matrix_decomposition_mpi_cholesky_OBJECTS = matrix_decomposition/mpi_cholesky.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_models.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_codelets.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_params.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_matrix.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) matrix_decomposition_mpi_cholesky_OBJECTS = \ $(am_matrix_decomposition_mpi_cholesky_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__matrix_decomposition_mpi_cholesky_distributed_SOURCES_DIST = \ matrix_decomposition/mpi_cholesky_distributed.c \ matrix_decomposition/mpi_cholesky_models.c \ matrix_decomposition/mpi_cholesky_kernels.c \ matrix_decomposition/mpi_cholesky_codelets.c \ matrix_decomposition/mpi_decomposition_params.c \ matrix_decomposition/mpi_decomposition_matrix.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_matrix_decomposition_mpi_cholesky_distributed_OBJECTS = matrix_decomposition/mpi_cholesky_distributed.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_models.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_codelets.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_params.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_matrix.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) matrix_decomposition_mpi_cholesky_distributed_OBJECTS = \ $(am_matrix_decomposition_mpi_cholesky_distributed_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_distributed_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) matrix_mult_mm_SOURCES = matrix_mult/mm.c matrix_mult_mm_OBJECTS = matrix_mult/mm.$(OBJEXT) matrix_mult_mm_DEPENDENCIES = am__matrix_mult_mm_2dbc_SOURCES_DIST = matrix_mult/mm_2dbc.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_matrix_mult_mm_2dbc_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_mult/mm_2dbc.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) matrix_mult_mm_2dbc_OBJECTS = $(am_matrix_mult_mm_2dbc_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@matrix_mult_mm_2dbc_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__mpi_lu_plu_example_double_SOURCES_DIST = \ mpi_lu/plu_example_double.c mpi_lu/plu_solve_double.c \ mpi_lu/pdlu_kernels.c mpi_lu/pdlu.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_example_double_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_double.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) mpi_lu_plu_example_double_OBJECTS = \ $(am_mpi_lu_plu_example_double_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_double_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__mpi_lu_plu_example_float_SOURCES_DIST = \ mpi_lu/plu_example_float.c mpi_lu/plu_solve_float.c \ mpi_lu/pslu_kernels.c mpi_lu/pslu.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_example_float_OBJECTS = \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_float.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) mpi_lu_plu_example_float_OBJECTS = \ $(am_mpi_lu_plu_example_float_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_float_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__mpi_lu_plu_implicit_example_double_SOURCES_DIST = \ mpi_lu/plu_implicit_example_double.c mpi_lu/plu_solve_double.c \ mpi_lu/pdlu_kernels.c mpi_lu/pdlu_implicit.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_implicit_example_double_OBJECTS = mpi_lu/plu_implicit_example_double.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_implicit.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) mpi_lu_plu_implicit_example_double_OBJECTS = \ $(am_mpi_lu_plu_implicit_example_double_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_double_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__mpi_lu_plu_implicit_example_float_SOURCES_DIST = \ mpi_lu/plu_implicit_example_float.c mpi_lu/plu_solve_float.c \ mpi_lu/pslu_kernels.c mpi_lu/pslu_implicit.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_implicit_example_float_OBJECTS = mpi_lu/plu_implicit_example_float.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_implicit.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) mpi_lu_plu_implicit_example_float_OBJECTS = \ $(am_mpi_lu_plu_implicit_example_float_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_float_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__mpi_lu_plu_outofcore_example_double_SOURCES_DIST = \ mpi_lu/plu_outofcore_example_double.c \ mpi_lu/plu_solve_double.c mpi_lu/pdlu_kernels.c \ mpi_lu/pdlu_implicit.c ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_outofcore_example_double_OBJECTS = mpi_lu/plu_outofcore_example_double.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_implicit.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) mpi_lu_plu_outofcore_example_double_OBJECTS = \ $(am_mpi_lu_plu_outofcore_example_double_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_double_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) am__mpi_lu_plu_outofcore_example_float_SOURCES_DIST = \ mpi_lu/plu_outofcore_example_float.c mpi_lu/plu_solve_float.c \ mpi_lu/pslu_kernels.c mpi_lu/pslu_implicit.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_mpi_lu_plu_outofcore_example_float_OBJECTS = mpi_lu/plu_outofcore_example_float.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_implicit.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) mpi_lu_plu_outofcore_example_float_OBJECTS = \ $(am_mpi_lu_plu_outofcore_example_float_OBJECTS) @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_float_DEPENDENCIES = \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) mpi_redux_mpi_redux_SOURCES = mpi_redux/mpi_redux.c mpi_redux_mpi_redux_OBJECTS = mpi_redux/mpi_redux.$(OBJEXT) mpi_redux_mpi_redux_DEPENDENCIES = mpi_redux_mpi_redux_autowrapup_SOURCES = \ mpi_redux/mpi_redux_autowrapup.c mpi_redux_mpi_redux_autowrapup_OBJECTS = \ mpi_redux/mpi_redux_autowrapup.$(OBJEXT) mpi_redux_mpi_redux_autowrapup_DEPENDENCIES = mpi_redux_mpi_redux_tree_SOURCES = mpi_redux/mpi_redux_tree.c mpi_redux_mpi_redux_tree_OBJECTS = mpi_redux/mpi_redux_tree.$(OBJEXT) mpi_redux_mpi_redux_tree_DEPENDENCIES = am__native_fortran_nf_basic_ring_SOURCES_DIST = \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_basic_ring.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_basic_ring_OBJECTS = native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_basic_ring.$(OBJEXT) native_fortran_nf_basic_ring_OBJECTS = \ $(am_native_fortran_nf_basic_ring_OBJECTS) native_fortran_nf_basic_ring_DEPENDENCIES = am__native_fortran_nf_mm_SOURCES_DIST = native_fortran/nf_mm_cl.f90 \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 native_fortran/nf_mm.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mm_OBJECTS = native_fortran/nf_mm_cl.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm.$(OBJEXT) native_fortran_nf_mm_OBJECTS = $(am_native_fortran_nf_mm_OBJECTS) native_fortran_nf_mm_DEPENDENCIES = am__native_fortran_nf_mm_2dbc_SOURCES_DIST = \ native_fortran/nf_mm_cl_blas.f90 \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 native_fortran/nf_mm_2dbc.f90 @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mm_2dbc_OBJECTS = native_fortran/nf_mm_cl_blas.$(OBJEXT) \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_2dbc.$(OBJEXT) native_fortran_nf_mm_2dbc_OBJECTS = \ $(am_native_fortran_nf_mm_2dbc_OBJECTS) @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_2dbc_DEPENDENCIES = $(am__DEPENDENCIES_1) \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ $(am__DEPENDENCIES_1) am__native_fortran_nf_mm_task_build_SOURCES_DIST = \ native_fortran/nf_mm_cl.f90 native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_mm_task_build.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mm_task_build_OBJECTS = native_fortran/nf_mm_cl.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_task_build.$(OBJEXT) native_fortran_nf_mm_task_build_OBJECTS = \ $(am_native_fortran_nf_mm_task_build_OBJECTS) native_fortran_nf_mm_task_build_DEPENDENCIES = am__native_fortran_nf_mpi_redux_SOURCES_DIST = \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 native_fortran/nf_mpi_redux.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mpi_redux_OBJECTS = native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux.$(OBJEXT) native_fortran_nf_mpi_redux_OBJECTS = \ $(am_native_fortran_nf_mpi_redux_OBJECTS) native_fortran_nf_mpi_redux_DEPENDENCIES = am__native_fortran_nf_mpi_redux_tree_SOURCES_DIST = \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_mpi_redux_tree.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_mpi_redux_tree_OBJECTS = native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux_tree.$(OBJEXT) native_fortran_nf_mpi_redux_tree_OBJECTS = \ $(am_native_fortran_nf_mpi_redux_tree_OBJECTS) native_fortran_nf_mpi_redux_tree_DEPENDENCIES = am__native_fortran_nf_redux_test_SOURCES_DIST = \ native_fortran/fstarpu_mpi_mod.f90 \ native_fortran/fstarpu_mod.f90 \ native_fortran/nf_redux_test.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@am_native_fortran_nf_redux_test_OBJECTS = native_fortran/fstarpu_mpi_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.$(OBJEXT) \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_redux_test.$(OBJEXT) native_fortran_nf_redux_test_OBJECTS = \ $(am_native_fortran_nf_redux_test_OBJECTS) native_fortran_nf_redux_test_DEPENDENCIES = stencil_stencil5_SOURCES = stencil/stencil5.c stencil_stencil5_OBJECTS = stencil/stencil5.$(OBJEXT) stencil_stencil5_LDADD = $(LDADD) stencil_stencil5_lb_SOURCES = stencil/stencil5_lb.c stencil_stencil5_lb_OBJECTS = stencil/stencil5_lb.$(OBJEXT) stencil_stencil5_lb_LDADD = $(LDADD) am_user_datatype_user_datatype_OBJECTS = \ user_datatype/user_datatype.$(OBJEXT) \ user_datatype/my_interface.$(OBJEXT) user_datatype_user_datatype_OBJECTS = \ $(am_user_datatype_user_datatype_OBJECTS) user_datatype_user_datatype_LDADD = $(LDADD) am_user_datatype_user_datatype2_OBJECTS = \ user_datatype/user_datatype2.$(OBJEXT) \ user_datatype/my_interface.$(OBJEXT) user_datatype_user_datatype2_OBJECTS = \ $(am_user_datatype_user_datatype2_OBJECTS) user_datatype_user_datatype2_LDADD = $(LDADD) am_user_datatype_user_datatype_early_OBJECTS = \ user_datatype/user_datatype_early.$(OBJEXT) \ user_datatype/my_interface.$(OBJEXT) user_datatype_user_datatype_early_OBJECTS = \ $(am_user_datatype_user_datatype_early_OBJECTS) user_datatype_user_datatype_early_LDADD = $(LDADD) am_user_datatype_user_datatype_interface_OBJECTS = \ user_datatype/user_datatype_interface.$(OBJEXT) \ user_datatype/my_interface.$(OBJEXT) user_datatype_user_datatype_interface_OBJECTS = \ $(am_user_datatype_user_datatype_interface_OBJECTS) user_datatype_user_datatype_interface_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ../../examples/common/$(DEPDIR)/blas.Po \ ../../examples/interface/$(DEPDIR)/complex_interface.Po \ ./$(DEPDIR)/loader-loader.Po \ benchs/$(DEPDIR)/abstract_sendrecv_bench.Po \ benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po \ benchs/$(DEPDIR)/bcast_bench-bench_helper.Po \ benchs/$(DEPDIR)/bench_helper.Po benchs/$(DEPDIR)/burst.Po \ benchs/$(DEPDIR)/burst_gemm.Po \ benchs/$(DEPDIR)/burst_helper.Po \ benchs/$(DEPDIR)/gemm_helper.Po \ benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po \ benchs/$(DEPDIR)/sendrecv_bench.Po \ benchs/$(DEPDIR)/sendrecv_gemm_bench.Po \ benchs/$(DEPDIR)/sendrecv_parallel_tasks_bench.Po \ cache/$(DEPDIR)/cache.Po cache/$(DEPDIR)/cache_disable.Po \ cg/$(DEPDIR)/cg.Po comm/$(DEPDIR)/comm.Po \ comm/$(DEPDIR)/group.Po comm/$(DEPDIR)/mix_comm.Po \ complex/$(DEPDIR)/mpi_complex.Po filters/$(DEPDIR)/filter.Po \ matrix_decomposition/$(DEPDIR)/mpi_cholesky.Po \ matrix_decomposition/$(DEPDIR)/mpi_cholesky_codelets.Po \ matrix_decomposition/$(DEPDIR)/mpi_cholesky_distributed.Po \ matrix_decomposition/$(DEPDIR)/mpi_cholesky_kernels.Po \ matrix_decomposition/$(DEPDIR)/mpi_cholesky_models.Po \ matrix_decomposition/$(DEPDIR)/mpi_decomposition_matrix.Po \ matrix_decomposition/$(DEPDIR)/mpi_decomposition_params.Po \ matrix_mult/$(DEPDIR)/mm.Po matrix_mult/$(DEPDIR)/mm_2dbc.Po \ mpi_lu/$(DEPDIR)/pdlu.Po mpi_lu/$(DEPDIR)/pdlu_implicit.Po \ mpi_lu/$(DEPDIR)/pdlu_kernels.Po \ mpi_lu/$(DEPDIR)/plu_example_double.Po \ mpi_lu/$(DEPDIR)/plu_example_float.Po \ mpi_lu/$(DEPDIR)/plu_implicit_example_double.Po \ mpi_lu/$(DEPDIR)/plu_implicit_example_float.Po \ mpi_lu/$(DEPDIR)/plu_outofcore_example_double.Po \ mpi_lu/$(DEPDIR)/plu_outofcore_example_float.Po \ mpi_lu/$(DEPDIR)/plu_solve_double.Po \ mpi_lu/$(DEPDIR)/plu_solve_float.Po mpi_lu/$(DEPDIR)/pslu.Po \ mpi_lu/$(DEPDIR)/pslu_implicit.Po \ mpi_lu/$(DEPDIR)/pslu_kernels.Po \ mpi_redux/$(DEPDIR)/mpi_redux.Po \ mpi_redux/$(DEPDIR)/mpi_redux_autowrapup.Po \ mpi_redux/$(DEPDIR)/mpi_redux_tree.Po \ stencil/$(DEPDIR)/stencil5.Po stencil/$(DEPDIR)/stencil5_lb.Po \ user_datatype/$(DEPDIR)/my_interface.Po \ user_datatype/$(DEPDIR)/user_datatype.Po \ user_datatype/$(DEPDIR)/user_datatype2.Po \ user_datatype/$(DEPDIR)/user_datatype_early.Po \ user_datatype/$(DEPDIR)/user_datatype_interface.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) LTFCCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) AM_V_FC = $(am__v_FC_@AM_V@) am__v_FC_ = $(am__v_FC_@AM_DEFAULT_V@) am__v_FC_0 = @echo " FC " $@; am__v_FC_1 = FCLINK = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_FCLD = $(am__v_FCLD_@AM_V@) am__v_FCLD_ = $(am__v_FCLD_@AM_DEFAULT_V@) am__v_FCLD_0 = @echo " FCLD " $@; am__v_FCLD_1 = SOURCES = $(benchs_bcast_bench_SOURCES) $(benchs_burst_SOURCES) \ $(benchs_burst_gemm_SOURCES) \ $(benchs_recv_wait_finalize_bench_SOURCES) \ $(benchs_sendrecv_bench_SOURCES) \ $(benchs_sendrecv_gemm_bench_SOURCES) \ $(benchs_sendrecv_parallel_tasks_bench_SOURCES) cache/cache.c \ cache/cache_disable.c $(cg_cg_SOURCES) comm/comm.c \ comm/group.c comm/mix_comm.c $(complex_mpi_complex_SOURCES) \ filters/filter.c loader.c \ $(matrix_decomposition_mpi_cholesky_SOURCES) \ $(matrix_decomposition_mpi_cholesky_distributed_SOURCES) \ matrix_mult/mm.c $(matrix_mult_mm_2dbc_SOURCES) \ $(mpi_lu_plu_example_double_SOURCES) \ $(mpi_lu_plu_example_float_SOURCES) \ $(mpi_lu_plu_implicit_example_double_SOURCES) \ $(mpi_lu_plu_implicit_example_float_SOURCES) \ $(mpi_lu_plu_outofcore_example_double_SOURCES) \ $(mpi_lu_plu_outofcore_example_float_SOURCES) \ mpi_redux/mpi_redux.c mpi_redux/mpi_redux_autowrapup.c \ mpi_redux/mpi_redux_tree.c \ $(native_fortran_nf_basic_ring_SOURCES) \ $(native_fortran_nf_mm_SOURCES) \ $(native_fortran_nf_mm_2dbc_SOURCES) \ $(native_fortran_nf_mm_task_build_SOURCES) \ $(native_fortran_nf_mpi_redux_SOURCES) \ $(native_fortran_nf_mpi_redux_tree_SOURCES) \ $(native_fortran_nf_redux_test_SOURCES) stencil/stencil5.c \ stencil/stencil5_lb.c $(user_datatype_user_datatype_SOURCES) \ $(user_datatype_user_datatype2_SOURCES) \ $(user_datatype_user_datatype_early_SOURCES) \ $(user_datatype_user_datatype_interface_SOURCES) DIST_SOURCES = $(benchs_bcast_bench_SOURCES) $(benchs_burst_SOURCES) \ $(am__benchs_burst_gemm_SOURCES_DIST) \ $(benchs_recv_wait_finalize_bench_SOURCES) \ $(benchs_sendrecv_bench_SOURCES) \ $(am__benchs_sendrecv_gemm_bench_SOURCES_DIST) \ $(benchs_sendrecv_parallel_tasks_bench_SOURCES) cache/cache.c \ cache/cache_disable.c $(am__cg_cg_SOURCES_DIST) comm/comm.c \ comm/group.c comm/mix_comm.c $(complex_mpi_complex_SOURCES) \ filters/filter.c loader.c \ $(am__matrix_decomposition_mpi_cholesky_SOURCES_DIST) \ $(am__matrix_decomposition_mpi_cholesky_distributed_SOURCES_DIST) \ matrix_mult/mm.c $(am__matrix_mult_mm_2dbc_SOURCES_DIST) \ $(am__mpi_lu_plu_example_double_SOURCES_DIST) \ $(am__mpi_lu_plu_example_float_SOURCES_DIST) \ $(am__mpi_lu_plu_implicit_example_double_SOURCES_DIST) \ $(am__mpi_lu_plu_implicit_example_float_SOURCES_DIST) \ $(am__mpi_lu_plu_outofcore_example_double_SOURCES_DIST) \ $(am__mpi_lu_plu_outofcore_example_float_SOURCES_DIST) \ mpi_redux/mpi_redux.c mpi_redux/mpi_redux_autowrapup.c \ mpi_redux/mpi_redux_tree.c \ $(am__native_fortran_nf_basic_ring_SOURCES_DIST) \ $(am__native_fortran_nf_mm_SOURCES_DIST) \ $(am__native_fortran_nf_mm_2dbc_SOURCES_DIST) \ $(am__native_fortran_nf_mm_task_build_SOURCES_DIST) \ $(am__native_fortran_nf_mpi_redux_SOURCES_DIST) \ $(am__native_fortran_nf_mpi_redux_tree_SOURCES_DIST) \ $(am__native_fortran_nf_redux_test_SOURCES_DIST) \ stencil/stencil5.c stencil/stencil5_lb.c \ $(user_datatype_user_datatype_SOURCES) \ $(user_datatype_user_datatype2_SOURCES) \ $(user_datatype_user_datatype_early_SOURCES) \ $(user_datatype_user_datatype_interface_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) AM_RECURSIVE_TARGETS = check recheck TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Thibaut Lambert # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # CC = $(MPICC) CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = $(MPIFORT) FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la \ $(STARPU_EXPORTED_LIBS) $(STARPU_CUDA_LDFLAGS) -lm \ $(MAGMA_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(MPI_RUN_ENV) LAUNCHER = $(STARPU_MPIEXEC) AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(MAGMA_CFLAGS) $(APP_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(MAGMA_CFLAGS) $(APP_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(MAGMA_CFLAGS) $(APP_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile CCLD = $(MPICC) FCLD = $(MPIFORT) # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_SIMGRID_TRUE@LOADER_BIN = $(LAUNCHER) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 starpu_mpi_EXAMPLES = stencil/stencil5 $(am__append_9) cache/cache \ cache/cache_disable $(am__append_11) $(am__append_12) \ $(am__append_14) $(am__append_16) $(am__append_17) \ $(am__append_19) $(am__append_21) $(am__append_23) \ complex/mpi_complex $(am__append_24) $(am__append_28) \ $(am__append_29) $(am__append_30) BUILT_SOURCES = CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log *.mod native_fortran/fstarpu_mod.f90 native_fortran/fstarpu_mpi_mod.f90 EXTRA_DIST = \ mpi_lu/mpi_lu-float.h \ mpi_lu/mpi_lu-double.h \ mpi_lu/plu_example.c \ mpi_lu/plu_implicit_example.c \ mpi_lu/plu_outofcore_example.c \ mpi_lu/plu_solve.c \ mpi_lu/pxlu.h \ mpi_lu/pxlu.c \ mpi_lu/pxlu_implicit.c \ mpi_lu/pxlu_kernels.h \ mpi_lu/pxlu_kernels.c \ matrix_mult/mm_2dbc.c \ native_fortran/nf_mm_2dbc.f90 \ matrix_decomposition/mpi_cholesky.h \ matrix_decomposition/mpi_cholesky_codelets.h \ matrix_decomposition/mpi_cholesky_kernels.h \ matrix_decomposition/mpi_cholesky_models.h \ matrix_decomposition/mpi_decomposition_params.h \ matrix_decomposition/mpi_decomposition_matrix.h \ user_datatype/my_interface.h \ benchs/abstract_sendrecv_bench.h\ benchs/bench_helper.h \ benchs/gemm_helper.h \ benchs/burst_helper.h \ helper.h \ perf.sh examplebindir = $(libdir)/starpu/mpi AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_float_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_float_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_float.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_double_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_example_double_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_example_double.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_float_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_float_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_float.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_implicit.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_double_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_implicit_example_double_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_implicit_example_double.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_implicit.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_float_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_float_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_float.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_float.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pslu_implicit.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_double_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm @STARPU_NO_BLAS_LIB_FALSE@mpi_lu_plu_outofcore_example_double_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_outofcore_example_double.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/plu_solve_double.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ mpi_lu/pdlu_implicit.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_models.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_codelets.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_params.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_matrix.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm @STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_distributed_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_distributed.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_models.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_cholesky_codelets.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_params.c \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_decomposition/mpi_decomposition_matrix.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@matrix_decomposition_mpi_cholesky_distributed_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@cg_cg_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ cg/cg.c \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@cg_cg_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@@STARPU_SIMGRID_FALSE@ $(STARPU_BLAS_LDFLAGS) matrix_mult_mm_LDADD = \ -lm @STARPU_NO_BLAS_LIB_FALSE@matrix_mult_mm_2dbc_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ matrix_mult/mm_2dbc.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@matrix_mult_mm_2dbc_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm mpi_redux_mpi_redux_LDADD = \ -lm mpi_redux_mpi_redux_autowrapup_LDADD = \ -lm mpi_redux_mpi_redux_tree_LDADD = \ -lm @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_SOURCES = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_cl.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_LDADD = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_task_build_SOURCES = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_cl.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_task_build.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_task_build_LDADD = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_basic_ring_SOURCES = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_basic_ring.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_basic_ring_LDADD = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_2dbc_SOURCES = \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_cl_blas.f90 \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mm_2dbc.f90 @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mm_2dbc_LDADD = \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ $(STARPU_LIBNUMA_LDFLAGS) \ @STARPU_HAVE_LIBLAPACK_TRUE@@STARPU_HAVE_MPIFORT_TRUE@@STARPU_NO_BLAS_LIB_FALSE@@STARPU_SANITIZE_FALSE@ $(STARPU_BLAS_LDFLAGS) -lm -llapack @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mpi_redux_SOURCES = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mpi_redux_LDADD = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mpi_redux_tree_SOURCES = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_mpi_redux_tree.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_mpi_redux_tree_LDADD = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_redux_test_SOURCES = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mpi_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/fstarpu_mod.f90 \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ native_fortran/nf_redux_test.f90 @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran_nf_redux_test_LDADD = \ @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@ -lm complex_mpi_complex_SOURCES = \ complex/mpi_complex.c \ ../../examples/interface/complex_interface.c user_datatype_user_datatype_SOURCES = \ user_datatype/user_datatype.c \ user_datatype/my_interface.c user_datatype_user_datatype2_SOURCES = \ user_datatype/user_datatype2.c \ user_datatype/my_interface.c user_datatype_user_datatype_early_SOURCES = \ user_datatype/user_datatype_early.c \ user_datatype/my_interface.c user_datatype_user_datatype_interface_SOURCES = \ user_datatype/user_datatype_interface.c \ user_datatype/my_interface.c benchs_sendrecv_bench_SOURCES = benchs/sendrecv_bench.c \ benchs/bench_helper.c benchs/abstract_sendrecv_bench.c benchs_bcast_bench_SOURCES = benchs/bcast_bench.c \ benchs/bench_helper.c benchs_bcast_bench_LDADD = $(MPI_SYNC_CLOCKS_LIBS) benchs_bcast_bench_CFLAGS = $(MPI_SYNC_CLOCKS_CFLAGS) benchs_recv_wait_finalize_bench_SOURCES = benchs/recv_wait_finalize_bench.c benchs_recv_wait_finalize_bench_LDADD = $(MPI_SYNC_CLOCKS_LIBS) benchs_recv_wait_finalize_bench_CFLAGS = $(MPI_SYNC_CLOCKS_CFLAGS) benchs_sendrecv_parallel_tasks_bench_SOURCES = \ benchs/sendrecv_parallel_tasks_bench.c benchs/bench_helper.c benchs_burst_SOURCES = benchs/burst.c benchs/burst_helper.c @STARPU_NO_BLAS_LIB_FALSE@benchs_sendrecv_gemm_bench_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/sendrecv_gemm_bench.c \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/bench_helper.c \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/gemm_helper.c \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/abstract_sendrecv_bench.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@benchs_sendrecv_gemm_bench_LDADD = $(STARPU_BLAS_LDFLAGS) @STARPU_NO_BLAS_LIB_FALSE@benchs_burst_gemm_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_gemm.c \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/gemm_helper.c \ @STARPU_NO_BLAS_LIB_FALSE@ benchs/burst_helper.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@benchs_burst_gemm_LDADD = $(STARPU_BLAS_LDFLAGS) all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .f90 .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/examples/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign mpi/examples/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list benchs/$(am__dirstamp): @$(MKDIR_P) benchs @: > benchs/$(am__dirstamp) benchs/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) benchs/$(DEPDIR) @: > benchs/$(DEPDIR)/$(am__dirstamp) benchs/bcast_bench-bcast_bench.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) benchs/bcast_bench-bench_helper.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) benchs/bcast_bench$(EXEEXT): $(benchs_bcast_bench_OBJECTS) $(benchs_bcast_bench_DEPENDENCIES) $(EXTRA_benchs_bcast_bench_DEPENDENCIES) benchs/$(am__dirstamp) @rm -f benchs/bcast_bench$(EXEEXT) $(AM_V_CCLD)$(benchs_bcast_bench_LINK) $(benchs_bcast_bench_OBJECTS) $(benchs_bcast_bench_LDADD) $(LIBS) benchs/burst.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) benchs/burst_helper.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) benchs/burst$(EXEEXT): $(benchs_burst_OBJECTS) $(benchs_burst_DEPENDENCIES) $(EXTRA_benchs_burst_DEPENDENCIES) benchs/$(am__dirstamp) @rm -f benchs/burst$(EXEEXT) $(AM_V_CCLD)$(LINK) $(benchs_burst_OBJECTS) $(benchs_burst_LDADD) $(LIBS) benchs/burst_gemm.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) benchs/gemm_helper.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) ../../examples/common/$(am__dirstamp): @$(MKDIR_P) ../../examples/common @: > ../../examples/common/$(am__dirstamp) ../../examples/common/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) ../../examples/common/$(DEPDIR) @: > ../../examples/common/$(DEPDIR)/$(am__dirstamp) ../../examples/common/blas.$(OBJEXT): \ ../../examples/common/$(am__dirstamp) \ ../../examples/common/$(DEPDIR)/$(am__dirstamp) benchs/burst_gemm$(EXEEXT): $(benchs_burst_gemm_OBJECTS) $(benchs_burst_gemm_DEPENDENCIES) $(EXTRA_benchs_burst_gemm_DEPENDENCIES) benchs/$(am__dirstamp) @rm -f benchs/burst_gemm$(EXEEXT) $(AM_V_CCLD)$(LINK) $(benchs_burst_gemm_OBJECTS) $(benchs_burst_gemm_LDADD) $(LIBS) benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.$(OBJEXT): \ benchs/$(am__dirstamp) benchs/$(DEPDIR)/$(am__dirstamp) benchs/recv_wait_finalize_bench$(EXEEXT): $(benchs_recv_wait_finalize_bench_OBJECTS) $(benchs_recv_wait_finalize_bench_DEPENDENCIES) $(EXTRA_benchs_recv_wait_finalize_bench_DEPENDENCIES) benchs/$(am__dirstamp) @rm -f benchs/recv_wait_finalize_bench$(EXEEXT) $(AM_V_CCLD)$(benchs_recv_wait_finalize_bench_LINK) $(benchs_recv_wait_finalize_bench_OBJECTS) $(benchs_recv_wait_finalize_bench_LDADD) $(LIBS) benchs/sendrecv_bench.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) benchs/bench_helper.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) benchs/abstract_sendrecv_bench.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) benchs/sendrecv_bench$(EXEEXT): $(benchs_sendrecv_bench_OBJECTS) $(benchs_sendrecv_bench_DEPENDENCIES) $(EXTRA_benchs_sendrecv_bench_DEPENDENCIES) benchs/$(am__dirstamp) @rm -f benchs/sendrecv_bench$(EXEEXT) $(AM_V_CCLD)$(LINK) $(benchs_sendrecv_bench_OBJECTS) $(benchs_sendrecv_bench_LDADD) $(LIBS) benchs/sendrecv_gemm_bench.$(OBJEXT): benchs/$(am__dirstamp) \ benchs/$(DEPDIR)/$(am__dirstamp) benchs/sendrecv_gemm_bench$(EXEEXT): $(benchs_sendrecv_gemm_bench_OBJECTS) $(benchs_sendrecv_gemm_bench_DEPENDENCIES) $(EXTRA_benchs_sendrecv_gemm_bench_DEPENDENCIES) benchs/$(am__dirstamp) @rm -f benchs/sendrecv_gemm_bench$(EXEEXT) $(AM_V_CCLD)$(LINK) $(benchs_sendrecv_gemm_bench_OBJECTS) $(benchs_sendrecv_gemm_bench_LDADD) $(LIBS) benchs/sendrecv_parallel_tasks_bench.$(OBJEXT): \ benchs/$(am__dirstamp) benchs/$(DEPDIR)/$(am__dirstamp) benchs/sendrecv_parallel_tasks_bench$(EXEEXT): $(benchs_sendrecv_parallel_tasks_bench_OBJECTS) $(benchs_sendrecv_parallel_tasks_bench_DEPENDENCIES) $(EXTRA_benchs_sendrecv_parallel_tasks_bench_DEPENDENCIES) benchs/$(am__dirstamp) @rm -f benchs/sendrecv_parallel_tasks_bench$(EXEEXT) $(AM_V_CCLD)$(LINK) $(benchs_sendrecv_parallel_tasks_bench_OBJECTS) $(benchs_sendrecv_parallel_tasks_bench_LDADD) $(LIBS) cache/$(am__dirstamp): @$(MKDIR_P) cache @: > cache/$(am__dirstamp) cache/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) cache/$(DEPDIR) @: > cache/$(DEPDIR)/$(am__dirstamp) cache/cache.$(OBJEXT): cache/$(am__dirstamp) \ cache/$(DEPDIR)/$(am__dirstamp) cache/cache$(EXEEXT): $(cache_cache_OBJECTS) $(cache_cache_DEPENDENCIES) $(EXTRA_cache_cache_DEPENDENCIES) cache/$(am__dirstamp) @rm -f cache/cache$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cache_cache_OBJECTS) $(cache_cache_LDADD) $(LIBS) cache/cache_disable.$(OBJEXT): cache/$(am__dirstamp) \ cache/$(DEPDIR)/$(am__dirstamp) cache/cache_disable$(EXEEXT): $(cache_cache_disable_OBJECTS) $(cache_cache_disable_DEPENDENCIES) $(EXTRA_cache_cache_disable_DEPENDENCIES) cache/$(am__dirstamp) @rm -f cache/cache_disable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cache_cache_disable_OBJECTS) $(cache_cache_disable_LDADD) $(LIBS) cg/$(am__dirstamp): @$(MKDIR_P) cg @: > cg/$(am__dirstamp) cg/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) cg/$(DEPDIR) @: > cg/$(DEPDIR)/$(am__dirstamp) cg/cg.$(OBJEXT): cg/$(am__dirstamp) cg/$(DEPDIR)/$(am__dirstamp) cg/cg$(EXEEXT): $(cg_cg_OBJECTS) $(cg_cg_DEPENDENCIES) $(EXTRA_cg_cg_DEPENDENCIES) cg/$(am__dirstamp) @rm -f cg/cg$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cg_cg_OBJECTS) $(cg_cg_LDADD) $(LIBS) comm/$(am__dirstamp): @$(MKDIR_P) comm @: > comm/$(am__dirstamp) comm/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) comm/$(DEPDIR) @: > comm/$(DEPDIR)/$(am__dirstamp) comm/comm.$(OBJEXT): comm/$(am__dirstamp) \ comm/$(DEPDIR)/$(am__dirstamp) comm/comm$(EXEEXT): $(comm_comm_OBJECTS) $(comm_comm_DEPENDENCIES) $(EXTRA_comm_comm_DEPENDENCIES) comm/$(am__dirstamp) @rm -f comm/comm$(EXEEXT) $(AM_V_CCLD)$(LINK) $(comm_comm_OBJECTS) $(comm_comm_LDADD) $(LIBS) comm/group.$(OBJEXT): comm/$(am__dirstamp) \ comm/$(DEPDIR)/$(am__dirstamp) comm/group$(EXEEXT): $(comm_group_OBJECTS) $(comm_group_DEPENDENCIES) $(EXTRA_comm_group_DEPENDENCIES) comm/$(am__dirstamp) @rm -f comm/group$(EXEEXT) $(AM_V_CCLD)$(LINK) $(comm_group_OBJECTS) $(comm_group_LDADD) $(LIBS) comm/mix_comm.$(OBJEXT): comm/$(am__dirstamp) \ comm/$(DEPDIR)/$(am__dirstamp) comm/mix_comm$(EXEEXT): $(comm_mix_comm_OBJECTS) $(comm_mix_comm_DEPENDENCIES) $(EXTRA_comm_mix_comm_DEPENDENCIES) comm/$(am__dirstamp) @rm -f comm/mix_comm$(EXEEXT) $(AM_V_CCLD)$(LINK) $(comm_mix_comm_OBJECTS) $(comm_mix_comm_LDADD) $(LIBS) complex/$(am__dirstamp): @$(MKDIR_P) complex @: > complex/$(am__dirstamp) complex/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) complex/$(DEPDIR) @: > complex/$(DEPDIR)/$(am__dirstamp) complex/mpi_complex.$(OBJEXT): complex/$(am__dirstamp) \ complex/$(DEPDIR)/$(am__dirstamp) ../../examples/interface/$(am__dirstamp): @$(MKDIR_P) ../../examples/interface @: > ../../examples/interface/$(am__dirstamp) ../../examples/interface/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) ../../examples/interface/$(DEPDIR) @: > ../../examples/interface/$(DEPDIR)/$(am__dirstamp) ../../examples/interface/complex_interface.$(OBJEXT): \ ../../examples/interface/$(am__dirstamp) \ ../../examples/interface/$(DEPDIR)/$(am__dirstamp) complex/mpi_complex$(EXEEXT): $(complex_mpi_complex_OBJECTS) $(complex_mpi_complex_DEPENDENCIES) $(EXTRA_complex_mpi_complex_DEPENDENCIES) complex/$(am__dirstamp) @rm -f complex/mpi_complex$(EXEEXT) $(AM_V_CCLD)$(LINK) $(complex_mpi_complex_OBJECTS) $(complex_mpi_complex_LDADD) $(LIBS) filters/$(am__dirstamp): @$(MKDIR_P) filters @: > filters/$(am__dirstamp) filters/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) filters/$(DEPDIR) @: > filters/$(DEPDIR)/$(am__dirstamp) filters/filter.$(OBJEXT): filters/$(am__dirstamp) \ filters/$(DEPDIR)/$(am__dirstamp) filters/filter$(EXEEXT): $(filters_filter_OBJECTS) $(filters_filter_DEPENDENCIES) $(EXTRA_filters_filter_DEPENDENCIES) filters/$(am__dirstamp) @rm -f filters/filter$(EXEEXT) $(AM_V_CCLD)$(LINK) $(filters_filter_OBJECTS) $(filters_filter_LDADD) $(LIBS) loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) matrix_decomposition/$(am__dirstamp): @$(MKDIR_P) matrix_decomposition @: > matrix_decomposition/$(am__dirstamp) matrix_decomposition/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) matrix_decomposition/$(DEPDIR) @: > matrix_decomposition/$(DEPDIR)/$(am__dirstamp) matrix_decomposition/mpi_cholesky.$(OBJEXT): \ matrix_decomposition/$(am__dirstamp) \ matrix_decomposition/$(DEPDIR)/$(am__dirstamp) matrix_decomposition/mpi_cholesky_models.$(OBJEXT): \ matrix_decomposition/$(am__dirstamp) \ matrix_decomposition/$(DEPDIR)/$(am__dirstamp) matrix_decomposition/mpi_cholesky_kernels.$(OBJEXT): \ matrix_decomposition/$(am__dirstamp) \ matrix_decomposition/$(DEPDIR)/$(am__dirstamp) matrix_decomposition/mpi_cholesky_codelets.$(OBJEXT): \ matrix_decomposition/$(am__dirstamp) \ matrix_decomposition/$(DEPDIR)/$(am__dirstamp) matrix_decomposition/mpi_decomposition_params.$(OBJEXT): \ matrix_decomposition/$(am__dirstamp) \ matrix_decomposition/$(DEPDIR)/$(am__dirstamp) matrix_decomposition/mpi_decomposition_matrix.$(OBJEXT): \ matrix_decomposition/$(am__dirstamp) \ matrix_decomposition/$(DEPDIR)/$(am__dirstamp) matrix_decomposition/mpi_cholesky$(EXEEXT): $(matrix_decomposition_mpi_cholesky_OBJECTS) $(matrix_decomposition_mpi_cholesky_DEPENDENCIES) $(EXTRA_matrix_decomposition_mpi_cholesky_DEPENDENCIES) matrix_decomposition/$(am__dirstamp) @rm -f matrix_decomposition/mpi_cholesky$(EXEEXT) $(AM_V_CCLD)$(LINK) $(matrix_decomposition_mpi_cholesky_OBJECTS) $(matrix_decomposition_mpi_cholesky_LDADD) $(LIBS) matrix_decomposition/mpi_cholesky_distributed.$(OBJEXT): \ matrix_decomposition/$(am__dirstamp) \ matrix_decomposition/$(DEPDIR)/$(am__dirstamp) matrix_decomposition/mpi_cholesky_distributed$(EXEEXT): $(matrix_decomposition_mpi_cholesky_distributed_OBJECTS) $(matrix_decomposition_mpi_cholesky_distributed_DEPENDENCIES) $(EXTRA_matrix_decomposition_mpi_cholesky_distributed_DEPENDENCIES) matrix_decomposition/$(am__dirstamp) @rm -f matrix_decomposition/mpi_cholesky_distributed$(EXEEXT) $(AM_V_CCLD)$(LINK) $(matrix_decomposition_mpi_cholesky_distributed_OBJECTS) $(matrix_decomposition_mpi_cholesky_distributed_LDADD) $(LIBS) matrix_mult/$(am__dirstamp): @$(MKDIR_P) matrix_mult @: > matrix_mult/$(am__dirstamp) matrix_mult/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) matrix_mult/$(DEPDIR) @: > matrix_mult/$(DEPDIR)/$(am__dirstamp) matrix_mult/mm.$(OBJEXT): matrix_mult/$(am__dirstamp) \ matrix_mult/$(DEPDIR)/$(am__dirstamp) matrix_mult/mm$(EXEEXT): $(matrix_mult_mm_OBJECTS) $(matrix_mult_mm_DEPENDENCIES) $(EXTRA_matrix_mult_mm_DEPENDENCIES) matrix_mult/$(am__dirstamp) @rm -f matrix_mult/mm$(EXEEXT) $(AM_V_CCLD)$(LINK) $(matrix_mult_mm_OBJECTS) $(matrix_mult_mm_LDADD) $(LIBS) matrix_mult/mm_2dbc.$(OBJEXT): matrix_mult/$(am__dirstamp) \ matrix_mult/$(DEPDIR)/$(am__dirstamp) matrix_mult/mm_2dbc$(EXEEXT): $(matrix_mult_mm_2dbc_OBJECTS) $(matrix_mult_mm_2dbc_DEPENDENCIES) $(EXTRA_matrix_mult_mm_2dbc_DEPENDENCIES) matrix_mult/$(am__dirstamp) @rm -f matrix_mult/mm_2dbc$(EXEEXT) $(AM_V_CCLD)$(LINK) $(matrix_mult_mm_2dbc_OBJECTS) $(matrix_mult_mm_2dbc_LDADD) $(LIBS) mpi_lu/$(am__dirstamp): @$(MKDIR_P) mpi_lu @: > mpi_lu/$(am__dirstamp) mpi_lu/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mpi_lu/$(DEPDIR) @: > mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/plu_example_double.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/plu_solve_double.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/pdlu_kernels.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/pdlu.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/plu_example_double$(EXEEXT): $(mpi_lu_plu_example_double_OBJECTS) $(mpi_lu_plu_example_double_DEPENDENCIES) $(EXTRA_mpi_lu_plu_example_double_DEPENDENCIES) mpi_lu/$(am__dirstamp) @rm -f mpi_lu/plu_example_double$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_example_double_OBJECTS) $(mpi_lu_plu_example_double_LDADD) $(LIBS) mpi_lu/plu_example_float.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/plu_solve_float.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/pslu_kernels.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/pslu.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/plu_example_float$(EXEEXT): $(mpi_lu_plu_example_float_OBJECTS) $(mpi_lu_plu_example_float_DEPENDENCIES) $(EXTRA_mpi_lu_plu_example_float_DEPENDENCIES) mpi_lu/$(am__dirstamp) @rm -f mpi_lu/plu_example_float$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_example_float_OBJECTS) $(mpi_lu_plu_example_float_LDADD) $(LIBS) mpi_lu/plu_implicit_example_double.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/pdlu_implicit.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/plu_implicit_example_double$(EXEEXT): $(mpi_lu_plu_implicit_example_double_OBJECTS) $(mpi_lu_plu_implicit_example_double_DEPENDENCIES) $(EXTRA_mpi_lu_plu_implicit_example_double_DEPENDENCIES) mpi_lu/$(am__dirstamp) @rm -f mpi_lu/plu_implicit_example_double$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_implicit_example_double_OBJECTS) $(mpi_lu_plu_implicit_example_double_LDADD) $(LIBS) mpi_lu/plu_implicit_example_float.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/pslu_implicit.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/plu_implicit_example_float$(EXEEXT): $(mpi_lu_plu_implicit_example_float_OBJECTS) $(mpi_lu_plu_implicit_example_float_DEPENDENCIES) $(EXTRA_mpi_lu_plu_implicit_example_float_DEPENDENCIES) mpi_lu/$(am__dirstamp) @rm -f mpi_lu/plu_implicit_example_float$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_implicit_example_float_OBJECTS) $(mpi_lu_plu_implicit_example_float_LDADD) $(LIBS) mpi_lu/plu_outofcore_example_double.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/plu_outofcore_example_double$(EXEEXT): $(mpi_lu_plu_outofcore_example_double_OBJECTS) $(mpi_lu_plu_outofcore_example_double_DEPENDENCIES) $(EXTRA_mpi_lu_plu_outofcore_example_double_DEPENDENCIES) mpi_lu/$(am__dirstamp) @rm -f mpi_lu/plu_outofcore_example_double$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_outofcore_example_double_OBJECTS) $(mpi_lu_plu_outofcore_example_double_LDADD) $(LIBS) mpi_lu/plu_outofcore_example_float.$(OBJEXT): mpi_lu/$(am__dirstamp) \ mpi_lu/$(DEPDIR)/$(am__dirstamp) mpi_lu/plu_outofcore_example_float$(EXEEXT): $(mpi_lu_plu_outofcore_example_float_OBJECTS) $(mpi_lu_plu_outofcore_example_float_DEPENDENCIES) $(EXTRA_mpi_lu_plu_outofcore_example_float_DEPENDENCIES) mpi_lu/$(am__dirstamp) @rm -f mpi_lu/plu_outofcore_example_float$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_lu_plu_outofcore_example_float_OBJECTS) $(mpi_lu_plu_outofcore_example_float_LDADD) $(LIBS) mpi_redux/$(am__dirstamp): @$(MKDIR_P) mpi_redux @: > mpi_redux/$(am__dirstamp) mpi_redux/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mpi_redux/$(DEPDIR) @: > mpi_redux/$(DEPDIR)/$(am__dirstamp) mpi_redux/mpi_redux.$(OBJEXT): mpi_redux/$(am__dirstamp) \ mpi_redux/$(DEPDIR)/$(am__dirstamp) mpi_redux/mpi_redux$(EXEEXT): $(mpi_redux_mpi_redux_OBJECTS) $(mpi_redux_mpi_redux_DEPENDENCIES) $(EXTRA_mpi_redux_mpi_redux_DEPENDENCIES) mpi_redux/$(am__dirstamp) @rm -f mpi_redux/mpi_redux$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_redux_mpi_redux_OBJECTS) $(mpi_redux_mpi_redux_LDADD) $(LIBS) mpi_redux/mpi_redux_autowrapup.$(OBJEXT): mpi_redux/$(am__dirstamp) \ mpi_redux/$(DEPDIR)/$(am__dirstamp) mpi_redux/mpi_redux_autowrapup$(EXEEXT): $(mpi_redux_mpi_redux_autowrapup_OBJECTS) $(mpi_redux_mpi_redux_autowrapup_DEPENDENCIES) $(EXTRA_mpi_redux_mpi_redux_autowrapup_DEPENDENCIES) mpi_redux/$(am__dirstamp) @rm -f mpi_redux/mpi_redux_autowrapup$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_redux_mpi_redux_autowrapup_OBJECTS) $(mpi_redux_mpi_redux_autowrapup_LDADD) $(LIBS) mpi_redux/mpi_redux_tree.$(OBJEXT): mpi_redux/$(am__dirstamp) \ mpi_redux/$(DEPDIR)/$(am__dirstamp) mpi_redux/mpi_redux_tree$(EXEEXT): $(mpi_redux_mpi_redux_tree_OBJECTS) $(mpi_redux_mpi_redux_tree_DEPENDENCIES) $(EXTRA_mpi_redux_mpi_redux_tree_DEPENDENCIES) mpi_redux/$(am__dirstamp) @rm -f mpi_redux/mpi_redux_tree$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_redux_mpi_redux_tree_OBJECTS) $(mpi_redux_mpi_redux_tree_LDADD) $(LIBS) native_fortran/$(am__dirstamp): @$(MKDIR_P) native_fortran @: > native_fortran/$(am__dirstamp) native_fortran/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) native_fortran/$(DEPDIR) @: > native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/fstarpu_mpi_mod.$(OBJEXT): \ native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/fstarpu_mod.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_basic_ring.$(OBJEXT): \ native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_basic_ring$(EXEEXT): $(native_fortran_nf_basic_ring_OBJECTS) $(native_fortran_nf_basic_ring_DEPENDENCIES) $(EXTRA_native_fortran_nf_basic_ring_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_basic_ring$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_basic_ring_OBJECTS) $(native_fortran_nf_basic_ring_LDADD) $(LIBS) native_fortran/nf_mm_cl.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_mm.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_mm$(EXEEXT): $(native_fortran_nf_mm_OBJECTS) $(native_fortran_nf_mm_DEPENDENCIES) $(EXTRA_native_fortran_nf_mm_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_mm$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mm_OBJECTS) $(native_fortran_nf_mm_LDADD) $(LIBS) native_fortran/nf_mm_cl_blas.$(OBJEXT): \ native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_mm_2dbc.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_mm_2dbc$(EXEEXT): $(native_fortran_nf_mm_2dbc_OBJECTS) $(native_fortran_nf_mm_2dbc_DEPENDENCIES) $(EXTRA_native_fortran_nf_mm_2dbc_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_mm_2dbc$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mm_2dbc_OBJECTS) $(native_fortran_nf_mm_2dbc_LDADD) $(LIBS) native_fortran/nf_mm_task_build.$(OBJEXT): \ native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_mm_task_build$(EXEEXT): $(native_fortran_nf_mm_task_build_OBJECTS) $(native_fortran_nf_mm_task_build_DEPENDENCIES) $(EXTRA_native_fortran_nf_mm_task_build_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_mm_task_build$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mm_task_build_OBJECTS) $(native_fortran_nf_mm_task_build_LDADD) $(LIBS) native_fortran/nf_mpi_redux.$(OBJEXT): native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_mpi_redux$(EXEEXT): $(native_fortran_nf_mpi_redux_OBJECTS) $(native_fortran_nf_mpi_redux_DEPENDENCIES) $(EXTRA_native_fortran_nf_mpi_redux_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_mpi_redux$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mpi_redux_OBJECTS) $(native_fortran_nf_mpi_redux_LDADD) $(LIBS) native_fortran/nf_mpi_redux_tree.$(OBJEXT): \ native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_mpi_redux_tree$(EXEEXT): $(native_fortran_nf_mpi_redux_tree_OBJECTS) $(native_fortran_nf_mpi_redux_tree_DEPENDENCIES) $(EXTRA_native_fortran_nf_mpi_redux_tree_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_mpi_redux_tree$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_mpi_redux_tree_OBJECTS) $(native_fortran_nf_mpi_redux_tree_LDADD) $(LIBS) native_fortran/nf_redux_test.$(OBJEXT): \ native_fortran/$(am__dirstamp) \ native_fortran/$(DEPDIR)/$(am__dirstamp) native_fortran/nf_redux_test$(EXEEXT): $(native_fortran_nf_redux_test_OBJECTS) $(native_fortran_nf_redux_test_DEPENDENCIES) $(EXTRA_native_fortran_nf_redux_test_DEPENDENCIES) native_fortran/$(am__dirstamp) @rm -f native_fortran/nf_redux_test$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(native_fortran_nf_redux_test_OBJECTS) $(native_fortran_nf_redux_test_LDADD) $(LIBS) stencil/$(am__dirstamp): @$(MKDIR_P) stencil @: > stencil/$(am__dirstamp) stencil/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) stencil/$(DEPDIR) @: > stencil/$(DEPDIR)/$(am__dirstamp) stencil/stencil5.$(OBJEXT): stencil/$(am__dirstamp) \ stencil/$(DEPDIR)/$(am__dirstamp) stencil/stencil5$(EXEEXT): $(stencil_stencil5_OBJECTS) $(stencil_stencil5_DEPENDENCIES) $(EXTRA_stencil_stencil5_DEPENDENCIES) stencil/$(am__dirstamp) @rm -f stencil/stencil5$(EXEEXT) $(AM_V_CCLD)$(LINK) $(stencil_stencil5_OBJECTS) $(stencil_stencil5_LDADD) $(LIBS) stencil/stencil5_lb.$(OBJEXT): stencil/$(am__dirstamp) \ stencil/$(DEPDIR)/$(am__dirstamp) stencil/stencil5_lb$(EXEEXT): $(stencil_stencil5_lb_OBJECTS) $(stencil_stencil5_lb_DEPENDENCIES) $(EXTRA_stencil_stencil5_lb_DEPENDENCIES) stencil/$(am__dirstamp) @rm -f stencil/stencil5_lb$(EXEEXT) $(AM_V_CCLD)$(LINK) $(stencil_stencil5_lb_OBJECTS) $(stencil_stencil5_lb_LDADD) $(LIBS) user_datatype/$(am__dirstamp): @$(MKDIR_P) user_datatype @: > user_datatype/$(am__dirstamp) user_datatype/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) user_datatype/$(DEPDIR) @: > user_datatype/$(DEPDIR)/$(am__dirstamp) user_datatype/user_datatype.$(OBJEXT): user_datatype/$(am__dirstamp) \ user_datatype/$(DEPDIR)/$(am__dirstamp) user_datatype/my_interface.$(OBJEXT): user_datatype/$(am__dirstamp) \ user_datatype/$(DEPDIR)/$(am__dirstamp) user_datatype/user_datatype$(EXEEXT): $(user_datatype_user_datatype_OBJECTS) $(user_datatype_user_datatype_DEPENDENCIES) $(EXTRA_user_datatype_user_datatype_DEPENDENCIES) user_datatype/$(am__dirstamp) @rm -f user_datatype/user_datatype$(EXEEXT) $(AM_V_CCLD)$(LINK) $(user_datatype_user_datatype_OBJECTS) $(user_datatype_user_datatype_LDADD) $(LIBS) user_datatype/user_datatype2.$(OBJEXT): user_datatype/$(am__dirstamp) \ user_datatype/$(DEPDIR)/$(am__dirstamp) user_datatype/user_datatype2$(EXEEXT): $(user_datatype_user_datatype2_OBJECTS) $(user_datatype_user_datatype2_DEPENDENCIES) $(EXTRA_user_datatype_user_datatype2_DEPENDENCIES) user_datatype/$(am__dirstamp) @rm -f user_datatype/user_datatype2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(user_datatype_user_datatype2_OBJECTS) $(user_datatype_user_datatype2_LDADD) $(LIBS) user_datatype/user_datatype_early.$(OBJEXT): \ user_datatype/$(am__dirstamp) \ user_datatype/$(DEPDIR)/$(am__dirstamp) user_datatype/user_datatype_early$(EXEEXT): $(user_datatype_user_datatype_early_OBJECTS) $(user_datatype_user_datatype_early_DEPENDENCIES) $(EXTRA_user_datatype_user_datatype_early_DEPENDENCIES) user_datatype/$(am__dirstamp) @rm -f user_datatype/user_datatype_early$(EXEEXT) $(AM_V_CCLD)$(LINK) $(user_datatype_user_datatype_early_OBJECTS) $(user_datatype_user_datatype_early_LDADD) $(LIBS) user_datatype/user_datatype_interface.$(OBJEXT): \ user_datatype/$(am__dirstamp) \ user_datatype/$(DEPDIR)/$(am__dirstamp) user_datatype/user_datatype_interface$(EXEEXT): $(user_datatype_user_datatype_interface_OBJECTS) $(user_datatype_user_datatype_interface_DEPENDENCIES) $(EXTRA_user_datatype_user_datatype_interface_DEPENDENCIES) user_datatype/$(am__dirstamp) @rm -f user_datatype/user_datatype_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(user_datatype_user_datatype_interface_OBJECTS) $(user_datatype_user_datatype_interface_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f ../../examples/common/*.$(OBJEXT) -rm -f ../../examples/interface/*.$(OBJEXT) -rm -f benchs/*.$(OBJEXT) -rm -f cache/*.$(OBJEXT) -rm -f cg/*.$(OBJEXT) -rm -f comm/*.$(OBJEXT) -rm -f complex/*.$(OBJEXT) -rm -f filters/*.$(OBJEXT) -rm -f matrix_decomposition/*.$(OBJEXT) -rm -f matrix_mult/*.$(OBJEXT) -rm -f mpi_lu/*.$(OBJEXT) -rm -f mpi_redux/*.$(OBJEXT) -rm -f native_fortran/*.$(OBJEXT) -rm -f stencil/*.$(OBJEXT) -rm -f user_datatype/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@../../examples/common/$(DEPDIR)/blas.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@../../examples/interface/$(DEPDIR)/complex_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/abstract_sendrecv_bench.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/bcast_bench-bench_helper.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/bench_helper.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/burst.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/burst_gemm.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/burst_helper.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/gemm_helper.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/sendrecv_bench.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/sendrecv_gemm_bench.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@benchs/$(DEPDIR)/sendrecv_parallel_tasks_bench.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cache/$(DEPDIR)/cache.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cache/$(DEPDIR)/cache_disable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cg/$(DEPDIR)/cg.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/comm.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/group.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/mix_comm.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@complex/$(DEPDIR)/mpi_complex.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@filters/$(DEPDIR)/filter.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky_codelets.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky_distributed.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_cholesky_models.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_decomposition_matrix.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matrix_decomposition/$(DEPDIR)/mpi_decomposition_params.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matrix_mult/$(DEPDIR)/mm.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matrix_mult/$(DEPDIR)/mm_2dbc.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pdlu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pdlu_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pdlu_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_example_double.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_example_float.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_implicit_example_double.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_implicit_example_float.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_outofcore_example_double.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_outofcore_example_float.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_solve_double.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/plu_solve_float.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pslu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pslu_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_lu/$(DEPDIR)/pslu_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_redux/$(DEPDIR)/mpi_redux.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_redux/$(DEPDIR)/mpi_redux_autowrapup.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_redux/$(DEPDIR)/mpi_redux_tree.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@stencil/$(DEPDIR)/stencil5.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@stencil/$(DEPDIR)/stencil5_lb.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/my_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/user_datatype.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/user_datatype2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/user_datatype_early.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@user_datatype/$(DEPDIR)/user_datatype_interface.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< benchs/bcast_bench-bcast_bench.o: benchs/bcast_bench.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -MT benchs/bcast_bench-bcast_bench.o -MD -MP -MF benchs/$(DEPDIR)/bcast_bench-bcast_bench.Tpo -c -o benchs/bcast_bench-bcast_bench.o `test -f 'benchs/bcast_bench.c' || echo '$(srcdir)/'`benchs/bcast_bench.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/bcast_bench-bcast_bench.Tpo benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/bcast_bench.c' object='benchs/bcast_bench-bcast_bench.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -c -o benchs/bcast_bench-bcast_bench.o `test -f 'benchs/bcast_bench.c' || echo '$(srcdir)/'`benchs/bcast_bench.c benchs/bcast_bench-bcast_bench.obj: benchs/bcast_bench.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -MT benchs/bcast_bench-bcast_bench.obj -MD -MP -MF benchs/$(DEPDIR)/bcast_bench-bcast_bench.Tpo -c -o benchs/bcast_bench-bcast_bench.obj `if test -f 'benchs/bcast_bench.c'; then $(CYGPATH_W) 'benchs/bcast_bench.c'; else $(CYGPATH_W) '$(srcdir)/benchs/bcast_bench.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/bcast_bench-bcast_bench.Tpo benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/bcast_bench.c' object='benchs/bcast_bench-bcast_bench.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -c -o benchs/bcast_bench-bcast_bench.obj `if test -f 'benchs/bcast_bench.c'; then $(CYGPATH_W) 'benchs/bcast_bench.c'; else $(CYGPATH_W) '$(srcdir)/benchs/bcast_bench.c'; fi` benchs/bcast_bench-bench_helper.o: benchs/bench_helper.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -MT benchs/bcast_bench-bench_helper.o -MD -MP -MF benchs/$(DEPDIR)/bcast_bench-bench_helper.Tpo -c -o benchs/bcast_bench-bench_helper.o `test -f 'benchs/bench_helper.c' || echo '$(srcdir)/'`benchs/bench_helper.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/bcast_bench-bench_helper.Tpo benchs/$(DEPDIR)/bcast_bench-bench_helper.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/bench_helper.c' object='benchs/bcast_bench-bench_helper.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -c -o benchs/bcast_bench-bench_helper.o `test -f 'benchs/bench_helper.c' || echo '$(srcdir)/'`benchs/bench_helper.c benchs/bcast_bench-bench_helper.obj: benchs/bench_helper.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -MT benchs/bcast_bench-bench_helper.obj -MD -MP -MF benchs/$(DEPDIR)/bcast_bench-bench_helper.Tpo -c -o benchs/bcast_bench-bench_helper.obj `if test -f 'benchs/bench_helper.c'; then $(CYGPATH_W) 'benchs/bench_helper.c'; else $(CYGPATH_W) '$(srcdir)/benchs/bench_helper.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/bcast_bench-bench_helper.Tpo benchs/$(DEPDIR)/bcast_bench-bench_helper.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/bench_helper.c' object='benchs/bcast_bench-bench_helper.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_bcast_bench_CFLAGS) $(CFLAGS) -c -o benchs/bcast_bench-bench_helper.obj `if test -f 'benchs/bench_helper.c'; then $(CYGPATH_W) 'benchs/bench_helper.c'; else $(CYGPATH_W) '$(srcdir)/benchs/bench_helper.c'; fi` benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o: benchs/recv_wait_finalize_bench.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) -MT benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o -MD -MP -MF benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Tpo -c -o benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o `test -f 'benchs/recv_wait_finalize_bench.c' || echo '$(srcdir)/'`benchs/recv_wait_finalize_bench.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Tpo benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/recv_wait_finalize_bench.c' object='benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) -c -o benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.o `test -f 'benchs/recv_wait_finalize_bench.c' || echo '$(srcdir)/'`benchs/recv_wait_finalize_bench.c benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj: benchs/recv_wait_finalize_bench.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) -MT benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj -MD -MP -MF benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Tpo -c -o benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj `if test -f 'benchs/recv_wait_finalize_bench.c'; then $(CYGPATH_W) 'benchs/recv_wait_finalize_bench.c'; else $(CYGPATH_W) '$(srcdir)/benchs/recv_wait_finalize_bench.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Tpo benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='benchs/recv_wait_finalize_bench.c' object='benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(benchs_recv_wait_finalize_bench_CFLAGS) $(CFLAGS) -c -o benchs/recv_wait_finalize_bench-recv_wait_finalize_bench.obj `if test -f 'benchs/recv_wait_finalize_bench.c'; then $(CYGPATH_W) 'benchs/recv_wait_finalize_bench.c'; else $(CYGPATH_W) '$(srcdir)/benchs/recv_wait_finalize_bench.c'; fi` loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` .f90.o: $(AM_V_FC)$(FCCOMPILE) -c -o $@ $< .f90.obj: $(AM_V_FC)$(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .f90.lo: $(AM_V_FC)$(LTFCCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf benchs/.libs benchs/_libs -rm -rf cache/.libs cache/_libs -rm -rf cg/.libs cg/_libs -rm -rf comm/.libs comm/_libs -rm -rf complex/.libs complex/_libs -rm -rf filters/.libs filters/_libs -rm -rf matrix_decomposition/.libs matrix_decomposition/_libs -rm -rf matrix_mult/.libs matrix_mult/_libs -rm -rf mpi_lu/.libs mpi_lu/_libs -rm -rf mpi_redux/.libs mpi_redux/_libs -rm -rf native_fortran/.libs native_fortran/_libs -rm -rf stencil/.libs stencil/_libs -rm -rf user_datatype/.libs user_datatype/_libs ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? stencil/stencil5.log: stencil/stencil5$(EXEEXT) @p='stencil/stencil5$(EXEEXT)'; \ b='stencil/stencil5'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) stencil/stencil5_lb.log: stencil/stencil5_lb$(EXEEXT) @p='stencil/stencil5_lb$(EXEEXT)'; \ b='stencil/stencil5_lb'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cache/cache.log: cache/cache$(EXEEXT) @p='cache/cache$(EXEEXT)'; \ b='cache/cache'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) cache/cache_disable.log: cache/cache_disable$(EXEEXT) @p='cache/cache_disable$(EXEEXT)'; \ b='cache/cache_disable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_lu/plu_example_float.log: mpi_lu/plu_example_float$(EXEEXT) @p='mpi_lu/plu_example_float$(EXEEXT)'; \ b='mpi_lu/plu_example_float'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_lu/plu_example_double.log: mpi_lu/plu_example_double$(EXEEXT) @p='mpi_lu/plu_example_double$(EXEEXT)'; \ b='mpi_lu/plu_example_double'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_lu/plu_implicit_example_float.log: mpi_lu/plu_implicit_example_float$(EXEEXT) @p='mpi_lu/plu_implicit_example_float$(EXEEXT)'; \ b='mpi_lu/plu_implicit_example_float'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_lu/plu_implicit_example_double.log: mpi_lu/plu_implicit_example_double$(EXEEXT) @p='mpi_lu/plu_implicit_example_double$(EXEEXT)'; \ b='mpi_lu/plu_implicit_example_double'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_lu/plu_outofcore_example_float.log: mpi_lu/plu_outofcore_example_float$(EXEEXT) @p='mpi_lu/plu_outofcore_example_float$(EXEEXT)'; \ b='mpi_lu/plu_outofcore_example_float'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_lu/plu_outofcore_example_double.log: mpi_lu/plu_outofcore_example_double$(EXEEXT) @p='mpi_lu/plu_outofcore_example_double$(EXEEXT)'; \ b='mpi_lu/plu_outofcore_example_double'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) matrix_decomposition/mpi_cholesky.log: matrix_decomposition/mpi_cholesky$(EXEEXT) @p='matrix_decomposition/mpi_cholesky$(EXEEXT)'; \ b='matrix_decomposition/mpi_cholesky'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) matrix_decomposition/mpi_cholesky_distributed.log: matrix_decomposition/mpi_cholesky_distributed$(EXEEXT) @p='matrix_decomposition/mpi_cholesky_distributed$(EXEEXT)'; \ b='matrix_decomposition/mpi_cholesky_distributed'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) matrix_mult/mm.log: matrix_mult/mm$(EXEEXT) @p='matrix_mult/mm$(EXEEXT)'; \ b='matrix_mult/mm'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) matrix_mult/mm_2dbc.log: matrix_mult/mm_2dbc$(EXEEXT) @p='matrix_mult/mm_2dbc$(EXEEXT)'; \ b='matrix_mult/mm_2dbc'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_redux/mpi_redux.log: mpi_redux/mpi_redux$(EXEEXT) @p='mpi_redux/mpi_redux$(EXEEXT)'; \ b='mpi_redux/mpi_redux'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_redux/mpi_redux_autowrapup.log: mpi_redux/mpi_redux_autowrapup$(EXEEXT) @p='mpi_redux/mpi_redux_autowrapup$(EXEEXT)'; \ b='mpi_redux/mpi_redux_autowrapup'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_redux/mpi_redux_tree.log: mpi_redux/mpi_redux_tree$(EXEEXT) @p='mpi_redux/mpi_redux_tree$(EXEEXT)'; \ b='mpi_redux/mpi_redux_tree'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_mm.log: native_fortran/nf_mm$(EXEEXT) @p='native_fortran/nf_mm$(EXEEXT)'; \ b='native_fortran/nf_mm'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_mm_task_build.log: native_fortran/nf_mm_task_build$(EXEEXT) @p='native_fortran/nf_mm_task_build$(EXEEXT)'; \ b='native_fortran/nf_mm_task_build'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_basic_ring.log: native_fortran/nf_basic_ring$(EXEEXT) @p='native_fortran/nf_basic_ring$(EXEEXT)'; \ b='native_fortran/nf_basic_ring'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_mm_2dbc.log: native_fortran/nf_mm_2dbc$(EXEEXT) @p='native_fortran/nf_mm_2dbc$(EXEEXT)'; \ b='native_fortran/nf_mm_2dbc'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_mpi_redux.log: native_fortran/nf_mpi_redux$(EXEEXT) @p='native_fortran/nf_mpi_redux$(EXEEXT)'; \ b='native_fortran/nf_mpi_redux'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_mpi_redux_tree.log: native_fortran/nf_mpi_redux_tree$(EXEEXT) @p='native_fortran/nf_mpi_redux_tree$(EXEEXT)'; \ b='native_fortran/nf_mpi_redux_tree'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) native_fortran/nf_redux_test.log: native_fortran/nf_redux_test$(EXEEXT) @p='native_fortran/nf_redux_test$(EXEEXT)'; \ b='native_fortran/nf_redux_test'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) complex/mpi_complex.log: complex/mpi_complex$(EXEEXT) @p='complex/mpi_complex$(EXEEXT)'; \ b='complex/mpi_complex'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) user_datatype/user_datatype2.log: user_datatype/user_datatype2$(EXEEXT) @p='user_datatype/user_datatype2$(EXEEXT)'; \ b='user_datatype/user_datatype2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) user_datatype/user_datatype_early.log: user_datatype/user_datatype_early$(EXEEXT) @p='user_datatype/user_datatype_early$(EXEEXT)'; \ b='user_datatype/user_datatype_early'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) user_datatype/user_datatype.log: user_datatype/user_datatype$(EXEEXT) @p='user_datatype/user_datatype$(EXEEXT)'; \ b='user_datatype/user_datatype'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) user_datatype/user_datatype_interface.log: user_datatype/user_datatype_interface$(EXEEXT) @p='user_datatype/user_datatype_interface$(EXEEXT)'; \ b='user_datatype/user_datatype_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) comm/comm.log: comm/comm$(EXEEXT) @p='comm/comm$(EXEEXT)'; \ b='comm/comm'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) comm/mix_comm.log: comm/mix_comm$(EXEEXT) @p='comm/mix_comm$(EXEEXT)'; \ b='comm/mix_comm'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) comm/group.log: comm/group$(EXEEXT) @p='comm/group$(EXEEXT)'; \ b='comm/group'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) filters/filter.log: filters/filter$(EXEEXT) @p='filters/filter$(EXEEXT)'; \ b='filters/filter'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) benchs/sendrecv_bench.log: benchs/sendrecv_bench$(EXEEXT) @p='benchs/sendrecv_bench$(EXEEXT)'; \ b='benchs/sendrecv_bench'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) benchs/burst.log: benchs/burst$(EXEEXT) @p='benchs/burst$(EXEEXT)'; \ b='benchs/burst'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) benchs/bcast_bench.log: benchs/bcast_bench$(EXEEXT) @p='benchs/bcast_bench$(EXEEXT)'; \ b='benchs/bcast_bench'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) benchs/recv_wait_finalize_bench.log: benchs/recv_wait_finalize_bench$(EXEEXT) @p='benchs/recv_wait_finalize_bench$(EXEEXT)'; \ b='benchs/recv_wait_finalize_bench'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) benchs/sendrecv_parallel_tasks_bench.log: benchs/sendrecv_parallel_tasks_bench$(EXEEXT) @p='benchs/sendrecv_parallel_tasks_bench$(EXEEXT)'; \ b='benchs/sendrecv_parallel_tasks_bench'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) benchs/sendrecv_gemm_bench.log: benchs/sendrecv_gemm_bench$(EXEEXT) @p='benchs/sendrecv_gemm_bench$(EXEEXT)'; \ b='benchs/sendrecv_gemm_bench'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) benchs/burst_gemm.log: benchs/burst_gemm$(EXEEXT) @p='benchs/burst_gemm$(EXEEXT)'; \ b='benchs/burst_gemm'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-am all-am: Makefile $(PROGRAMS) installdirs: for dir in "$(DESTDIR)$(examplebindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-am install-exec: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f ../../examples/common/$(DEPDIR)/$(am__dirstamp) -rm -f ../../examples/common/$(am__dirstamp) -rm -f ../../examples/interface/$(DEPDIR)/$(am__dirstamp) -rm -f ../../examples/interface/$(am__dirstamp) -rm -f benchs/$(DEPDIR)/$(am__dirstamp) -rm -f benchs/$(am__dirstamp) -rm -f cache/$(DEPDIR)/$(am__dirstamp) -rm -f cache/$(am__dirstamp) -rm -f cg/$(DEPDIR)/$(am__dirstamp) -rm -f cg/$(am__dirstamp) -rm -f comm/$(DEPDIR)/$(am__dirstamp) -rm -f comm/$(am__dirstamp) -rm -f complex/$(DEPDIR)/$(am__dirstamp) -rm -f complex/$(am__dirstamp) -rm -f filters/$(DEPDIR)/$(am__dirstamp) -rm -f filters/$(am__dirstamp) -rm -f matrix_decomposition/$(DEPDIR)/$(am__dirstamp) -rm -f matrix_decomposition/$(am__dirstamp) -rm -f matrix_mult/$(DEPDIR)/$(am__dirstamp) -rm -f matrix_mult/$(am__dirstamp) -rm -f mpi_lu/$(DEPDIR)/$(am__dirstamp) -rm -f mpi_lu/$(am__dirstamp) -rm -f mpi_redux/$(DEPDIR)/$(am__dirstamp) -rm -f mpi_redux/$(am__dirstamp) -rm -f native_fortran/$(DEPDIR)/$(am__dirstamp) -rm -f native_fortran/$(am__dirstamp) -rm -f stencil/$(DEPDIR)/$(am__dirstamp) -rm -f stencil/$(am__dirstamp) -rm -f user_datatype/$(DEPDIR)/$(am__dirstamp) -rm -f user_datatype/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) clean: clean-am clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -f ../../examples/common/$(DEPDIR)/blas.Po -rm -f ../../examples/interface/$(DEPDIR)/complex_interface.Po -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f benchs/$(DEPDIR)/abstract_sendrecv_bench.Po -rm -f benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po -rm -f benchs/$(DEPDIR)/bcast_bench-bench_helper.Po -rm -f benchs/$(DEPDIR)/bench_helper.Po -rm -f benchs/$(DEPDIR)/burst.Po -rm -f benchs/$(DEPDIR)/burst_gemm.Po -rm -f benchs/$(DEPDIR)/burst_helper.Po -rm -f benchs/$(DEPDIR)/gemm_helper.Po -rm -f benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po -rm -f benchs/$(DEPDIR)/sendrecv_bench.Po -rm -f benchs/$(DEPDIR)/sendrecv_gemm_bench.Po -rm -f benchs/$(DEPDIR)/sendrecv_parallel_tasks_bench.Po -rm -f cache/$(DEPDIR)/cache.Po -rm -f cache/$(DEPDIR)/cache_disable.Po -rm -f cg/$(DEPDIR)/cg.Po -rm -f comm/$(DEPDIR)/comm.Po -rm -f comm/$(DEPDIR)/group.Po -rm -f comm/$(DEPDIR)/mix_comm.Po -rm -f complex/$(DEPDIR)/mpi_complex.Po -rm -f filters/$(DEPDIR)/filter.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_codelets.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_distributed.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_kernels.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_models.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_decomposition_matrix.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_decomposition_params.Po -rm -f matrix_mult/$(DEPDIR)/mm.Po -rm -f matrix_mult/$(DEPDIR)/mm_2dbc.Po -rm -f mpi_lu/$(DEPDIR)/pdlu.Po -rm -f mpi_lu/$(DEPDIR)/pdlu_implicit.Po -rm -f mpi_lu/$(DEPDIR)/pdlu_kernels.Po -rm -f mpi_lu/$(DEPDIR)/plu_example_double.Po -rm -f mpi_lu/$(DEPDIR)/plu_example_float.Po -rm -f mpi_lu/$(DEPDIR)/plu_implicit_example_double.Po -rm -f mpi_lu/$(DEPDIR)/plu_implicit_example_float.Po -rm -f mpi_lu/$(DEPDIR)/plu_outofcore_example_double.Po -rm -f mpi_lu/$(DEPDIR)/plu_outofcore_example_float.Po -rm -f mpi_lu/$(DEPDIR)/plu_solve_double.Po -rm -f mpi_lu/$(DEPDIR)/plu_solve_float.Po -rm -f mpi_lu/$(DEPDIR)/pslu.Po -rm -f mpi_lu/$(DEPDIR)/pslu_implicit.Po -rm -f mpi_lu/$(DEPDIR)/pslu_kernels.Po -rm -f mpi_redux/$(DEPDIR)/mpi_redux.Po -rm -f mpi_redux/$(DEPDIR)/mpi_redux_autowrapup.Po -rm -f mpi_redux/$(DEPDIR)/mpi_redux_tree.Po -rm -f stencil/$(DEPDIR)/stencil5.Po -rm -f stencil/$(DEPDIR)/stencil5_lb.Po -rm -f user_datatype/$(DEPDIR)/my_interface.Po -rm -f user_datatype/$(DEPDIR)/user_datatype.Po -rm -f user_datatype/$(DEPDIR)/user_datatype2.Po -rm -f user_datatype/$(DEPDIR)/user_datatype_early.Po -rm -f user_datatype/$(DEPDIR)/user_datatype_interface.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-examplebinPROGRAMS install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ../../examples/common/$(DEPDIR)/blas.Po -rm -f ../../examples/interface/$(DEPDIR)/complex_interface.Po -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f benchs/$(DEPDIR)/abstract_sendrecv_bench.Po -rm -f benchs/$(DEPDIR)/bcast_bench-bcast_bench.Po -rm -f benchs/$(DEPDIR)/bcast_bench-bench_helper.Po -rm -f benchs/$(DEPDIR)/bench_helper.Po -rm -f benchs/$(DEPDIR)/burst.Po -rm -f benchs/$(DEPDIR)/burst_gemm.Po -rm -f benchs/$(DEPDIR)/burst_helper.Po -rm -f benchs/$(DEPDIR)/gemm_helper.Po -rm -f benchs/$(DEPDIR)/recv_wait_finalize_bench-recv_wait_finalize_bench.Po -rm -f benchs/$(DEPDIR)/sendrecv_bench.Po -rm -f benchs/$(DEPDIR)/sendrecv_gemm_bench.Po -rm -f benchs/$(DEPDIR)/sendrecv_parallel_tasks_bench.Po -rm -f cache/$(DEPDIR)/cache.Po -rm -f cache/$(DEPDIR)/cache_disable.Po -rm -f cg/$(DEPDIR)/cg.Po -rm -f comm/$(DEPDIR)/comm.Po -rm -f comm/$(DEPDIR)/group.Po -rm -f comm/$(DEPDIR)/mix_comm.Po -rm -f complex/$(DEPDIR)/mpi_complex.Po -rm -f filters/$(DEPDIR)/filter.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_codelets.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_distributed.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_kernels.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_cholesky_models.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_decomposition_matrix.Po -rm -f matrix_decomposition/$(DEPDIR)/mpi_decomposition_params.Po -rm -f matrix_mult/$(DEPDIR)/mm.Po -rm -f matrix_mult/$(DEPDIR)/mm_2dbc.Po -rm -f mpi_lu/$(DEPDIR)/pdlu.Po -rm -f mpi_lu/$(DEPDIR)/pdlu_implicit.Po -rm -f mpi_lu/$(DEPDIR)/pdlu_kernels.Po -rm -f mpi_lu/$(DEPDIR)/plu_example_double.Po -rm -f mpi_lu/$(DEPDIR)/plu_example_float.Po -rm -f mpi_lu/$(DEPDIR)/plu_implicit_example_double.Po -rm -f mpi_lu/$(DEPDIR)/plu_implicit_example_float.Po -rm -f mpi_lu/$(DEPDIR)/plu_outofcore_example_double.Po -rm -f mpi_lu/$(DEPDIR)/plu_outofcore_example_float.Po -rm -f mpi_lu/$(DEPDIR)/plu_solve_double.Po -rm -f mpi_lu/$(DEPDIR)/plu_solve_float.Po -rm -f mpi_lu/$(DEPDIR)/pslu.Po -rm -f mpi_lu/$(DEPDIR)/pslu_implicit.Po -rm -f mpi_lu/$(DEPDIR)/pslu_kernels.Po -rm -f mpi_redux/$(DEPDIR)/mpi_redux.Po -rm -f mpi_redux/$(DEPDIR)/mpi_redux_autowrapup.Po -rm -f mpi_redux/$(DEPDIR)/mpi_redux_tree.Po -rm -f stencil/$(DEPDIR)/stencil5.Po -rm -f stencil/$(DEPDIR)/stencil5_lb.Po -rm -f user_datatype/$(DEPDIR)/my_interface.Po -rm -f user_datatype/$(DEPDIR)/user_datatype.Po -rm -f user_datatype/$(DEPDIR)/user_datatype2.Po -rm -f user_datatype/$(DEPDIR)/user_datatype_early.Po -rm -f user_datatype/$(DEPDIR)/user_datatype_interface.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-examplebinPROGRAMS .MAKE: all check check-am install install-am install-exec \ install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am \ install-examplebinPROGRAMS install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-examplebinPROGRAMS .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS # Native Fortran example # - link over source file to build our own object native_fortran/fstarpu_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ native_fortran/fstarpu_mpi_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/mpi/include/$(notdir $@) $@ # - express the creation of .mod along .o @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@fstarpu_mod.mod: native_fortran/fstarpu_mod.o @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@fstarpu_mpi_mod.mod: native_fortran/fstarpu_mpi_mod.o @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@nf_mm_cl.mod: native_fortran/nf_mm_cl.o @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@nf_mm_cl_blas.mod: native_fortran/nf_mm_cl_blas.o # - list explicit dependences to control proper module files dependencies @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/fstarpu_mpi_mod.o: fstarpu_mod.mod @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm_cl.o: fstarpu_mod.mod fstarpu_mpi_mod.mod @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm_cl_blas.o: fstarpu_mod.mod fstarpu_mpi_mod.mod @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm.o: nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm_2dbc.o: nf_mm_cl.mod nf_mm_cl_blas.mod fstarpu_mpi_mod.mod fstarpu_mod.mod @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mm_task_build.o: nf_mm_cl.mod fstarpu_mpi_mod.mod fstarpu_mod.mod @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_basic_ring.o: fstarpu_mpi_mod.mod fstarpu_mod.mod @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_redux_test.o: fstarpu_mpi_mod.mod fstarpu_mod.mod @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mpi_redux.o: fstarpu_mpi_mod.mod fstarpu_mod.mod @STARPU_HAVE_MPIFORT_TRUE@@STARPU_SANITIZE_FALSE@native_fortran/nf_mpi_redux_tree.o: fstarpu_mpi_mod.mod fstarpu_mod.mod # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/mpi/examples/benchs/000077500000000000000000000000001507764646700176555ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/benchs/abstract_sendrecv_bench.c000066400000000000000000000145551507764646700246660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "bench_helper.h" #include "abstract_sendrecv_bench.h" /* * Memset */ #ifdef STARPU_USE_CUDA static void cuda_memset_codelet(void *descr[], void *arg) { (void)arg; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); cudaMemsetAsync(buf, 0, length, starpu_cuda_get_local_stream()); } #endif void cpu_memset_codelet(void *descr[], void *arg) { (void)arg; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); memset(buf, 0, length * sizeof(*buf)); } static struct starpu_codelet memset_cl = { .cpu_funcs = {cpu_memset_codelet}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_memset_codelet}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .cpu_funcs_name = {"cpu_memset_codelet"}, .nbuffers = 1, .modes = {STARPU_W} }; int sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier, int bidir, int mem_node) { uint64_t iterations = LOOPS_DEFAULT; uint64_t s; uint64_t j; uint64_t k; if (mpi_rank >= 2) { starpu_pause(); if (thread_barrier != NULL) { STARPU_PTHREAD_BARRIER_WAIT(thread_barrier); } for (s = NX_MIN; s <= NX_MAX; s = bench_next_size(s)) { iterations = bench_nb_iterations(iterations, s); starpu_mpi_barrier(MPI_COMM_WORLD); for (j = 0; j < iterations; j++) { starpu_mpi_barrier(MPI_COMM_WORLD); } } starpu_resume(); return 0; } if (mpi_rank == 0) { printf("Times in us\n"); printf("# size (Bytes)\t| latency \t| 10^6 B/s \t| MB/s \t| d1 \t|median \t| avg \t| d9 \t| max\n"); } starpu_data_handle_t handle_send, handle_recv; float* vector_send = NULL; float* vector_recv = NULL; double t1, t2, global_tstart, global_tend; double* lats = malloc(sizeof(double) * LOOPS_DEFAULT); starpu_mpi_req send_req, recv_req; int ret; if (thread_barrier != NULL) { STARPU_PTHREAD_BARRIER_WAIT(thread_barrier); } global_tstart = starpu_timing_now(); for (s = NX_MIN; s <= NX_MAX; s = bench_next_size(s)) { vector_send = (void *)starpu_malloc_on_node_flags(mem_node, s, STARPU_MALLOC_PINNED); vector_recv = (void *)starpu_malloc_on_node_flags(mem_node, s, STARPU_MALLOC_PINNED); starpu_vector_data_register(&handle_send, mem_node, (uintptr_t) vector_send, s, 1); starpu_vector_data_register(&handle_recv, mem_node, (uintptr_t) vector_recv, s, 1); ret = starpu_task_insert(&memset_cl, STARPU_W, handle_send, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&memset_cl, STARPU_W, handle_recv, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); iterations = bench_nb_iterations(iterations, s); starpu_mpi_barrier(MPI_COMM_WORLD); for (j = 0; j < iterations; j++) { if (mpi_rank == 0) { t1 = starpu_timing_now(); if (bidir) { ret = starpu_mpi_isend(handle_send, &send_req, 1, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_irecv(handle_recv, &recv_req, 1, 1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); ret = starpu_mpi_wait(&send_req, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); ret = starpu_mpi_wait(&recv_req, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } else { ret = starpu_mpi_send(handle_send, 1, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_recv(handle_recv, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } t2 = starpu_timing_now(); const double t = (t2 - t1) / 2; lats[j] = t; } else { if (bidir) { ret = starpu_mpi_irecv(handle_recv, &recv_req, 0, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); ret = starpu_mpi_isend(handle_send, &send_req, 0, 1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_wait(&recv_req, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); ret = starpu_mpi_wait(&send_req, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } else { ret = starpu_mpi_recv(handle_recv, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); ret = starpu_mpi_send(handle_send, 0, 1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } } starpu_mpi_barrier(MPI_COMM_WORLD); } if (mpi_rank == 0) { qsort(lats, iterations, sizeof(double), &comp_double); const double min_lat = lats[0]; const double max_lat = lats[iterations - 1]; const double med_lat = lats[(iterations - 1) / 2]; const double d1_lat = lats[(iterations - 1) / 10]; const double d9_lat = lats[9 * (iterations - 1) / 10]; double avg_lat = 0.0; for(k = 0; k < iterations; k++) { avg_lat += lats[k]; } avg_lat /= iterations; const double bw_million_byte = s / min_lat; const double bw_mbyte = bw_million_byte / 1.048576; printf("%9lld\t%9.3lf\t%9.3f\t%9.3f\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\n", (long long)s, min_lat, bw_million_byte, bw_mbyte, d1_lat, med_lat, avg_lat, d9_lat, max_lat); fflush(stdout); } starpu_data_unregister(handle_recv); starpu_data_unregister(handle_send); starpu_free_on_node_flags(mem_node, (uintptr_t)vector_send, s, STARPU_MALLOC_PINNED); starpu_free_on_node_flags(mem_node, (uintptr_t)vector_recv, s, STARPU_MALLOC_PINNED); } global_tend = starpu_timing_now(); if (mpi_rank == 0) { printf("Comm bench took %9.3lf ms\n", (global_tend - global_tstart) / 1000); } free(lats); return 0; } starpu-1.4.9+dfsg/mpi/examples/benchs/abstract_sendrecv_bench.h000066400000000000000000000014501507764646700246610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include int sendrecv_bench(int mpi_rank, starpu_pthread_barrier_t* thread_barrier, int bidir, int memnode); starpu-1.4.9+dfsg/mpi/examples/benchs/bcast_bench.c000066400000000000000000000225441507764646700222630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Basic broadcast benchmark with synchronized clocks. * Inspired a lot from NewMadeleine examples/bench-coll/nm_bench_coll_mcast.c * * Synchronized clocks (mpi_sync_clocks) are available here: * https://gitlab.inria.fr/pm2/pm2/-/tree/master/mpi_sync_clocks * and are detected during StarPU's configure. */ #include #include #include "helper.h" #include "bench_helper.h" #define SERVER_PRINTF(fmt, ...) do { if(rank == 0) { printf(fmt, ## __VA_ARGS__); fflush(stdout); }} while(0) #undef MULT_DEFAULT #undef LOOPS_DEFAULT #ifdef STARPU_QUICK_CHECK #define MIN_DEFAULT 1 #define MAX_DEFAULT 1024 #define LOOPS_DEFAULT 2 #define INCR_DEFAULT 2 #define MULT_DEFAULT 2 #else #define MIN_DEFAULT 1 #define MAX_DEFAULT (16*1024*1024) #define LOOPS_DEFAULT 50 #define INCR_DEFAULT 1 #define MULT_DEFAULT 1.4 #endif #define NODE_INCREMENT 1 static starpu_data_handle_t data_handle, data_handle_in, data_handle_out; static int use_tasks = 0; static void writer_cpu_func(void *descr[], void *args) { (void) descr; (void) args; } static struct starpu_codelet writer_cl = { .cpu_funcs = { writer_cpu_func }, .cpu_funcs_name = { "writer_task" }, .nbuffers = 1, .modes = { STARPU_W } }; static void reader_cpu_func(void* descr[], void* args) { (void) descr; (void) args; } static struct starpu_codelet reader_cl = { .cpu_funcs = { reader_cpu_func }, .cpu_funcs_name = { "reader_task" }, .nbuffers = 2, .modes = { STARPU_R, STARPU_W } }; static void usage(void) { fprintf(stderr, "-N iterations - iterations per length [%d]\n", LOOPS_DEFAULT); fprintf(stderr, "--tasks - triggers coop through task dependency instead of StarPU's MPI interface\n"); fprintf(stderr, "-P incr - number of nodes increment [%d]\n", NODE_INCREMENT); } static inline uint64_t _next(uint64_t len, double multiplier, uint64_t increment) { uint64_t next = len * multiplier + increment; if (next <= len) next++; return next; } static void bcast(MPI_Comm subcomm, int rank, int nb_dests) { int i = 0, ret; if (use_tasks) { starpu_mpi_task_insert(subcomm, &writer_cl, STARPU_W, data_handle_in, 0); for (i = 1; i <= nb_dests; i++) { starpu_mpi_data_register(data_handle_out, i, i); starpu_mpi_task_insert(subcomm, &reader_cl, STARPU_R, data_handle_in, STARPU_W, data_handle_out, 0); } /* Resume StarPU's workers only after submitting tasks, to make * sure the coop will be correctly detected. */ starpu_resume(); starpu_task_wait_for_all(); starpu_pause(); } else { if (rank == 0) { /* We explicitly tell StarPU this send will be a broadcast with n recipients. */ starpu_mpi_coop_sends_data_handle_nb_sends(data_handle, nb_dests); for (i = 1; i <= nb_dests; i++) { ret = starpu_mpi_isend_detached(data_handle, i , 0x42, subcomm, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } else { ret = starpu_mpi_recv(data_handle, 0, 0x42, subcomm, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } } } int main(int argc, char**argv) { const uint64_t start_len = MIN_DEFAULT; const uint64_t end_len = MAX_DEFAULT; const double multiplier = MULT_DEFAULT; const uint64_t increment = INCR_DEFAULT; int iterations = LOOPS_DEFAULT; int node_increment = NODE_INCREMENT; int i, ret, rank, worldsize, subcomm_rank, thread_support; MPI_Group world_group; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-N") == 0) { iterations = atoi(argv[++i]); } else if (strcmp(argv[i], "--tasks") == 0) { use_tasks = 1; } else if (strcmp(argv[i], "-P") == 0) { node_increment = atoi(argv[++i]); } else { fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); usage(); exit(1); } } if (MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &thread_support) != MPI_SUCCESS) { FPRINTF(stderr, "MPI_Init_thread failed\n"); return EXIT_FAILURE; } if (thread_support < MPI_THREAD_MULTIPLE) { /* We need MPI_THREAD_MULTIPLE for the StarPU's MPI thread and * the main thread calling functions from mpi_sync_clocks. */ FPRINTF(stderr, "This benchmark requires MPI_THREAD_MULTIPLE support.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); if (worldsize < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } MPI_Comm_group(MPI_COMM_WORLD, &world_group); /* Pause workers for this bench, to avoid any impact on performances from polling workers */ starpu_pause(); starpu_mpi_barrier(MPI_COMM_WORLD); SERVER_PRINTF("# start_len = %lu B\n", start_len); SERVER_PRINTF("# end_len = %lu B\n", end_len); SERVER_PRINTF("# increment = %lu\n", increment); SERVER_PRINTF("# multiplier = %f\n", multiplier); SERVER_PRINTF("# iterations = %d\n", iterations); SERVER_PRINTF("# coop = %s\n", starpu_mpi_coop_sends_get_use() ? "on" : "off"); SERVER_PRINTF("# n.nodes length n.iter min.lat. median average max.lat. \n"); int nb_nodes; for (nb_nodes = 2; nb_nodes <= worldsize; nb_nodes += node_increment) { SERVER_PRINTF("# starting %d nb_nodes...\n", nb_nodes); if (rank >= nb_nodes) { continue; } int* group_ranks = malloc(nb_nodes * sizeof(int)); for (i = 0; i < nb_nodes; i++) { group_ranks[i] = i; } MPI_Group sub_group; MPI_Group_incl(world_group, nb_nodes, group_ranks, &sub_group); MPI_Comm sub_comm; MPI_Comm_create_group(MPI_COMM_WORLD, sub_group, 0, &sub_comm); MPI_Comm_rank(sub_comm, &subcomm_rank); uint64_t len; for (len = start_len; len < end_len; len = _next(len, multiplier, increment)) { char* buf1 = malloc(len); char* buf2 = malloc(len); /* Precise the buffer where the data will be received, to take benefit from the rcache. */ if (use_tasks) { starpu_vector_data_register(&data_handle_in, STARPU_MAIN_RAM, (uintptr_t) buf1, len, 1); starpu_vector_data_register(&data_handle_out, STARPU_MAIN_RAM, (uintptr_t) buf2, len, 1); starpu_mpi_data_register(data_handle_in, 0, 0); } else { starpu_vector_data_register(&data_handle, STARPU_MAIN_RAM, (uintptr_t) buf1, len, 1); } mpi_sync_clocks_t clocks = mpi_sync_clocks_init(sub_comm); double* lats = (subcomm_rank == 0) ? malloc(iterations * sizeof(double)) : NULL; int k; for (k = 0; k < iterations; k++) { int* rc_all = (subcomm_rank == 0) ? malloc(nb_nodes * sizeof(int)) : NULL; double local_lat = -1.0; int rc = 0; do { const double b = mpi_sync_clocks_barrier(clocks, NULL); rc = (b < 0.0); const double t_begin = mpi_sync_clocks_get_time_usec(clocks); bcast(sub_comm, subcomm_rank, nb_nodes-1); const double t_end = mpi_sync_clocks_get_time_usec(clocks); local_lat = t_end - t_begin; /* collect sync barrier success */ MPI_Gather(&rc, 1, MPI_INT, rc_all, 1, MPI_INT, 0, sub_comm); if (subcomm_rank == 0) { int i; for (i = 0; i < nb_nodes; i++) { rc |= rc_all[i]; } } MPI_Bcast(&rc, 1, MPI_INT, 0, sub_comm); } while(rc != 0); /* find maximum latency across nb_nodes */ double* lat_all = (subcomm_rank == 0) ? malloc(nb_nodes * sizeof(double)) : NULL; MPI_Gather(&local_lat, 1, MPI_DOUBLE, lat_all, 1, MPI_DOUBLE, 0, sub_comm); if (subcomm_rank == 0) { int i; double max_lat = 0.0; for (i = 0; i < nb_nodes; i++) { if (lat_all[i] > max_lat) { max_lat = lat_all[i]; } } lats[k] = max_lat; free(rc_all); free(lat_all); } } /* compute time stats across iterations */ if (subcomm_rank == 0) { qsort(lats, iterations, sizeof(double), &comp_double); const double min_lat = lats[0]; const double max_lat = lats[iterations - 1]; const double med_lat = lats[(iterations - 1) / 2]; double avg_lat = 0.0; for (k = 0; k < iterations; k++) { avg_lat += lats[k]; } avg_lat /= iterations; printf("%4d\t%9lu\t%7d\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf \n", nb_nodes, len, iterations, min_lat, med_lat, avg_lat, max_lat); fflush(stdout); free(lats); } if (use_tasks) { starpu_data_unregister(data_handle_in); starpu_data_unregister(data_handle_out); } else { starpu_data_unregister(data_handle); } free(buf1); free(buf2); mpi_sync_clocks_shutdown(clocks); clocks = NULL; } } SERVER_PRINTF("# bench end\n"); MPI_Group_free(&world_group); starpu_resume(); starpu_mpi_shutdown(); MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/examples/benchs/bench_helper.c000066400000000000000000000024721507764646700224440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "bench_helper.h" int comp_double(const void*_a, const void*_b) { const double* a = _a; const double* b = _b; if(*a < *b) return -1; else if(*a > *b) return 1; else return 0; } uint64_t bench_next_size(uint64_t len) { uint64_t next = len * MULT_DEFAULT; if(next <= len) next++; return next; } uint64_t bench_nb_iterations(int iterations, uint64_t len) { const uint64_t max_data = NX_MAX; if(len == 0) len = 1; uint64_t data_size = ((uint64_t)iterations * (uint64_t)len); if(data_size > max_data) { iterations = (max_data / (uint64_t)len); if(iterations < 2) iterations = 2; } return iterations; } starpu-1.4.9+dfsg/mpi/examples/benchs/bench_helper.h000066400000000000000000000023551507764646700224510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #define NX_MIN 1 #ifdef STARPU_QUICK_CHECK #define MULT_DEFAULT 4 #define LOOPS_DEFAULT 50 #define NX_MAX (64 * 1) // kB #elif !defined(STARPU_LONG_CHECK) #define MULT_DEFAULT 4 #define LOOPS_DEFAULT 10000 #define NX_MAX (128 * 1024 * 1024) // kB #else #define MULT_DEFAULT 2 #define LOOPS_DEFAULT 100000 #define NX_MAX (512 * 1024 * 1024) // kB #endif int comp_double(const void*_a, const void*_b); uint64_t bench_next_size(uint64_t len); uint64_t bench_nb_iterations(int iterations, uint64_t len); starpu-1.4.9+dfsg/mpi/examples/benchs/burst.c000066400000000000000000000035141507764646700211630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This test sends simultaneously many communications, with various configurations. * * Global purpose is to run with trace recording, to watch the behaviour of communications. */ #include #include "helper.h" #include "burst_helper.h" void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-nreqs") == 0) { burst_nb_requests = atoi(argv[++i]); } else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) { fprintf(stderr,"Usage: %s [-nreqs nreqs]\n", argv[0]); fprintf(stderr,"Currently selected: %d requests in each burst\n", burst_nb_requests); exit(EXIT_SUCCESS); } else { fprintf(stderr,"Unrecognized option %s\n", argv[i]); exit(EXIT_FAILURE); } } } int main(int argc, char **argv) { int ret, rank; parse_args(argc, argv); ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); burst_init_data(rank); burst_all(rank); /* Clear up */ burst_free_data(rank); starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/benchs/burst_gemm.c000066400000000000000000000125121507764646700221660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Program to be executed with trace recording to watch the impact of * computations (or task polling) on communications. */ #include #include #include #include #include #include #include "helper.h" #include "gemm_helper.h" #include "burst_helper.h" static int gemm_warmup = 1; static int gemm_warmup_wait = 0; void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nslices = strtol(argv[++i], &argptr, 10); matrix_dim = 320 * nslices; } else if (strcmp(argv[i], "-size") == 0) { char *argptr; unsigned matrix_dim_tmp = strtol(argv[++i], &argptr, 10); if (matrix_dim_tmp % 320 != 0) { fprintf(stderr, "Matrix size has to be a multiple of 320\n"); } else { matrix_dim = matrix_dim_tmp; nslices = matrix_dim / 320; } } else if (strcmp(argv[i], "-check") == 0) { check = 1; } else if (strcmp(argv[i], "-nreqs") == 0) { burst_nb_requests = atoi(argv[++i]); } else if (strcmp(argv[i], "-no-gemm-warmup") == 0) { gemm_warmup = 0; } else if (strcmp(argv[i], "-gemm-warmup-wait") == 0) { /* All warmup GEMMs will start at the same moment */ gemm_warmup_wait = 1; } else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) { fprintf(stderr,"Usage: %s [-nblocks n] [-size size] [-check] [-nreqs nreqs] [-no-gemm-warmup] [-gemm-warmup-wait]\n", argv[0]); fprintf(stderr,"Currently selected: matrix size: %u - %u blocks - %d requests in each burst - gemm warmup: %d -gemm-warmup-wait: %d\n", matrix_dim, nslices, burst_nb_requests, gemm_warmup, gemm_warmup_wait); exit(EXIT_SUCCESS); } else { fprintf(stderr,"Unrecognized option %s\n", argv[i]); exit(EXIT_FAILURE); } } } int main(int argc, char **argv) { int ret, worldsize, mpi_rank; #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) matrix_dim = 16; #endif parse_args(argc, argv); ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); if (worldsize < 2) { if (mpi_rank == 0) FPRINTF(stderr, "We need 2 processes.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } gemm_alloc_data(); if (gemm_init_data() == -ENODEV) goto enodev; /* GEMM warmup, to really load the BLAS library */ if (gemm_warmup) { if (gemm_warmup_wait) { starpu_task_wait_for_all(); starpu_pause(); } if(gemm_submit_tasks() == -ENODEV) goto enodev; if (gemm_warmup_wait) { starpu_resume(); } } burst_init_data(mpi_rank); /* Wait for everything and everybody: */ starpu_task_wait_for_all(); starpu_mpi_barrier(MPI_COMM_WORLD); FPRINTF(stderr, "** Burst warmup **\n"); burst_all(mpi_rank); starpu_sleep(0.3); // sleep to easily distinguish different bursts in traces FPRINTF(stderr, "** Burst while there is no task available, but workers are polling **\n"); burst_all(mpi_rank); starpu_sleep(0.3); // sleep to easily distinguish different bursts in traces FPRINTF(stderr, "** Burst while there is no task available, workers are paused **\n"); starpu_pause(); burst_all(mpi_rank); starpu_sleep(0.3); // sleep to easily distinguish different bursts in traces FPRINTF(stderr, "** Burst while workers are really working **\n"); if(gemm_submit_tasks() == -ENODEV) goto enodev; starpu_resume(); burst_all(mpi_rank); FPRINTF(stderr, "Burst done, now waiting for computing tasks to finish\n"); /* Wait for everything and everybody: */ starpu_task_wait_for_all(); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_sleep(0.3); // sleep to easily distinguish different parts in traces FPRINTF(stderr, "** Workers are computing, without communications **\n"); starpu_pause(); if(gemm_submit_tasks() == -ENODEV) goto enodev; starpu_resume(); /* Wait for everything and everybody: */ starpu_task_wait_for_all(); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_sleep(0.3); // sleep to easily distinguish different parts in traces FPRINTF(stderr, "** Burst while workers are computing, but polling a moment between each task **\n"); starpu_pause(); gemm_add_polling_dependencies(); if(gemm_submit_tasks_with_tags(/* enable task tags */ 1) == -ENODEV) goto enodev; starpu_resume(); burst_all(mpi_rank); /* Wait for everything and everybody: */ starpu_task_wait_for_all(); starpu_mpi_barrier(MPI_COMM_WORLD); enodev: gemm_release(); burst_free_data(mpi_rank); starpu_mpi_shutdown(); return ret; } starpu-1.4.9+dfsg/mpi/examples/benchs/burst_helper.c000066400000000000000000000164251507764646700225270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #include "burst_helper.h" #if defined(STARPU_SIMGRID) || defined(STARPU_QUICK_CHECK) #define NB_REQUESTS 10 #else #define NB_REQUESTS 50 #endif #define NX_ARRAY (320 * 320) static starpu_data_handle_t* recv_handles; static starpu_data_handle_t* send_handles; static float** recv_buffers; static float** send_buffers; static starpu_mpi_req* recv_reqs; static starpu_mpi_req* send_reqs; int burst_nb_requests = NB_REQUESTS; void burst_init_data(int rank) { unsigned nx = NX_ARRAY; #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) { nx = 4*4; burst_nb_requests = 4; } #endif if (rank == 0 || rank == 1) { recv_handles = malloc(burst_nb_requests * sizeof(starpu_data_handle_t)); send_handles = malloc(burst_nb_requests * sizeof(starpu_data_handle_t)); recv_buffers = malloc(burst_nb_requests * sizeof(float*)); send_buffers = malloc(burst_nb_requests * sizeof(float*)); recv_reqs = malloc(burst_nb_requests * sizeof(starpu_mpi_req)); send_reqs = malloc(burst_nb_requests * sizeof(starpu_mpi_req)); int i = 0; for (i = 0; i < burst_nb_requests; i++) { starpu_malloc((void **)&send_buffers[i], NX_ARRAY * sizeof(float)); memset(send_buffers[i], 0, NX_ARRAY * sizeof(float)); starpu_vector_data_register(&send_handles[i], STARPU_MAIN_RAM, (uintptr_t) send_buffers[i], NX_ARRAY, sizeof(float)); starpu_malloc((void **)&recv_buffers[i], NX_ARRAY * sizeof(float)); memset(recv_buffers[i], 0, NX_ARRAY * sizeof(float)); starpu_vector_data_register(&recv_handles[i], STARPU_MAIN_RAM, (uintptr_t) recv_buffers[i], NX_ARRAY, sizeof(float)); } } } void burst_free_data(int rank) { if (rank == 0 || rank == 1) { int i = 0; for (i = 0; i < burst_nb_requests; i++) { starpu_data_unregister(send_handles[i]); starpu_free_noflag(send_buffers[i], NX_ARRAY * sizeof(float)); starpu_data_unregister(recv_handles[i]); starpu_free_noflag(recv_buffers[i], NX_ARRAY * sizeof(float)); } free(recv_handles); free(send_handles); free(recv_buffers); free(send_buffers); free(recv_reqs); free(send_reqs); } } /* Burst simultaneous from both nodes: 0 and 1 post all the recvs, synchronise, and then post all the sends */ void burst_bidir(int rank) { int other_rank = (rank == 0) ? 1 : 0; int i, ret; FPRINTF(stderr, "Simultaneous....start (rank %d)\n", rank); if (rank == 0 || rank == 1) { for (i = 0; i < burst_nb_requests; i++) { recv_reqs[i] = NULL; ret = starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], other_rank, i, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } } starpu_mpi_barrier(MPI_COMM_WORLD); if (rank == 0 || rank == 1) { for (i = 0; i < burst_nb_requests; i++) { send_reqs[i] = NULL; ret = starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_prio"); } for (i = 0; i < burst_nb_requests; i++) { if (recv_reqs[i]) ret = starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE); if (send_reqs[i]) ret = starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } } FPRINTF(stderr, "Simultaneous....end (rank %d)\n", rank); starpu_mpi_barrier(MPI_COMM_WORLD); } void burst_unidir(int sender, int receiver, int rank) { FPRINTF(stderr, "%d -> %d... start (rank %d)\n", sender, receiver, rank); int i, ret; if (rank == receiver) { for (i = 0; i < burst_nb_requests; i++) { recv_reqs[i] = NULL; ret = starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], sender, i, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } } starpu_mpi_barrier(MPI_COMM_WORLD); if (rank == sender) { for (i = 0; i < burst_nb_requests; i++) { send_reqs[i] = NULL; ret = starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], receiver, i, i, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_prio"); } } if (rank == sender || rank == receiver) { for (i = 0; i < burst_nb_requests; i++) { if (rank != sender && recv_reqs[i]) ret = starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE); if (rank == sender && send_reqs[i]) ret = starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } } FPRINTF(stderr, "%d -> %d... end (rank %d)\n", sender, receiver, rank); starpu_mpi_barrier(MPI_COMM_WORLD); } /* Half burst from both nodes, second half burst is triggered after some requests finished. */ void burst_bidir_half_postponed(int rank) { int other_rank = (rank == 0) ? 1 : 0; int i, ret; FPRINTF(stderr, "Half/half burst...start (rank %d)\n", rank); if (rank == 0 || rank == 1) { for (i = 0; i < burst_nb_requests; i++) { recv_reqs[i] = NULL; ret = starpu_mpi_irecv(recv_handles[i], &recv_reqs[i], other_rank, i, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } } starpu_mpi_barrier(MPI_COMM_WORLD); if (rank == 0 || rank == 1) { for (i = 0; i < (burst_nb_requests / 2); i++) { send_reqs[i] = NULL; ret = starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_prio"); } if (recv_reqs[burst_nb_requests / 4]) { ret = starpu_mpi_wait(&recv_reqs[burst_nb_requests / 4], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } for (i = (burst_nb_requests / 2); i < burst_nb_requests; i++) { send_reqs[i] = NULL; ret = starpu_mpi_isend_prio(send_handles[i], &send_reqs[i], other_rank, i, i, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_prio"); } for (i = 0; i < burst_nb_requests; i++) { if (recv_reqs[i]) ret = starpu_mpi_wait(&recv_reqs[i], MPI_STATUS_IGNORE); if (send_reqs[i]) ret = starpu_mpi_wait(&send_reqs[i], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } } FPRINTF(stderr, "Half/half burst...done (rank %d)\n", rank); starpu_mpi_barrier(MPI_COMM_WORLD); } void burst_all(int rank) { double start, end; start = starpu_timing_now(); /* Burst simultaneous from both nodes: 0 and 1 post all the recvs, synchronise, and then post all the sends */ burst_bidir(rank); /* Burst from 0 to 1 : rank 1 posts all the recvs, barrier, then rank 0 posts all the sends */ burst_unidir(0, 1, rank); /* Burst from 1 to 0 : rank 0 posts all the recvs, barrier, then rank 1 posts all the sends */ burst_unidir(1, 0, rank); /* Half burst from both nodes, second half burst is triggered after some requests finished. */ burst_bidir_half_postponed(rank); end = starpu_timing_now(); FPRINTF(stderr, "All bursts took %.0f ms\n", (end - start) / 1000.0); } starpu-1.4.9+dfsg/mpi/examples/benchs/burst_helper.h000066400000000000000000000020261507764646700225240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MPI_TESTS_BURST_HELPER__ #define __MPI_TESTS_BURST_HELPER__ extern int burst_nb_requests; void burst_init_data(int rank); void burst_free_data(int rank); void burst_bidir(int rank); void burst_unidir(int sender, int receiver, int rank); void burst_bidir_half_postponed(int rank); void burst_all(int rank); #endif /* __MPI_TESTS_BURST_HELPER__ */ starpu-1.4.9+dfsg/mpi/examples/benchs/gemm_helper.c000066400000000000000000000231711507764646700223110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../../examples/mult/simple.h" #include "helper.h" #include "gemm_helper.h" #define CHECK_TASK_SUBMIT(ret) do { \ if (ret == -ENODEV) \ { \ return -ENODEV; \ } \ STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); \ } while(0) unsigned nslices = 4; #if defined(STARPU_QUICK_CHECK) && !defined(STARPU_SIMGRID) unsigned matrix_dim = 256; #else unsigned matrix_dim = 320 * 4; #endif unsigned check = 0; int comm_thread_cpuid = -1; static TYPE *A, *B, *C; static starpu_data_handle_t A_handle, B_handle, C_handle; static void check_output(void) { /* compute C = C - AB */ CPU_GEMM("N", "N", matrix_dim, matrix_dim, matrix_dim, (TYPE)-1.0f, A, matrix_dim, B, matrix_dim, (TYPE)1.0f, C, matrix_dim); /* make sure C = 0 */ TYPE err; err = CPU_ASUM(matrix_dim*matrix_dim, C, 1); if (err < matrix_dim*matrix_dim*0.001) { FPRINTF(stderr, "Results are OK\n"); } else { int max; max = CPU_IAMAX(matrix_dim*matrix_dim, C, 1); FPRINTF(stderr, "There were errors ... err = %f\n", err); FPRINTF(stderr, "Max error : %e\n", C[max]); } } static void partition_mult_data(void) { starpu_matrix_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, matrix_dim, matrix_dim, matrix_dim, sizeof(TYPE)); starpu_matrix_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, matrix_dim, matrix_dim, matrix_dim, sizeof(TYPE)); starpu_matrix_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, matrix_dim, matrix_dim, matrix_dim, sizeof(TYPE)); struct starpu_data_filter vert; memset(&vert, 0, sizeof(vert)); vert.filter_func = starpu_matrix_filter_vertical_block; vert.nchildren = nslices; struct starpu_data_filter horiz; memset(&horiz, 0, sizeof(horiz)); horiz.filter_func = starpu_matrix_filter_block; horiz.nchildren = nslices; starpu_data_partition(B_handle, &vert); starpu_data_partition(A_handle, &horiz); starpu_data_map_filters(C_handle, 2, &vert, &horiz); } static void cpu_init_matrix_random(void *descr[], void *arg) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned nx = STARPU_MATRIX_GET_NX(descr[0]); unsigned ny = STARPU_MATRIX_GET_NY(descr[0]); unsigned i = 0; for (i = 0; i < nx *ny; i++) { subA[i] = (TYPE) (starpu_drand48()); subB[i] = (TYPE) (starpu_drand48()); } } static void cpu_init_matrix_zero(void *descr[], void *arg) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned nx = STARPU_MATRIX_GET_NX(descr[0]); unsigned ny = STARPU_MATRIX_GET_NY(descr[0]); unsigned i = 0; for (i = 0; i < nx *ny; i++) { subA[i] = (TYPE) (0); } } static void cpu_mult(void *descr[], void *arg) { (void)arg; TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); int worker_size = starpu_combined_worker_get_size(); if (worker_size == 1) { /* Sequential CPU task */ CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC); } else { /* Parallel CPU task */ unsigned rank = starpu_combined_worker_get_rank(); unsigned block_size = (nyC + worker_size - 1)/worker_size; unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; STARPU_ASSERT(nyC == STARPU_MATRIX_GET_NY(descr[1])); TYPE *new_subB = &subB[block_size*rank]; TYPE *new_subC = &subC[block_size*rank]; CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, (TYPE)0.0, new_subC, ldC); } } static struct starpu_perfmodel starpu_gemm_model = { .type = STARPU_HISTORY_BASED, .symbol = STARPU_GEMM_STR(gemm) }; static struct starpu_codelet cl = { .type = STARPU_SEQ, /* changed to STARPU_SPMD if -spmd is passed */ .max_parallelism = INT_MAX, .cpu_funcs = {cpu_mult}, .cpu_funcs_name = {"cpu_mult"}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW}, .model = &starpu_gemm_model }; static struct starpu_codelet cl_init_matrix_random = { .max_parallelism = INT_MAX, .cpu_funcs = {cpu_init_matrix_random}, .cpu_funcs_name = {"cpu_init_matrix_random"}, .nbuffers = 2, .modes = {STARPU_W, STARPU_W}, .name = "init_matrix_random", .color = 0xffa500 // orange }; static struct starpu_codelet cl_init_matrix_zero = { .max_parallelism = INT_MAX, .cpu_funcs = {cpu_init_matrix_zero}, .cpu_funcs_name = {"cpu_init_matrix_zero"}, .nbuffers = 1, .modes = {STARPU_W}, .name = "init_matrix_zero", .color = 0x808000 // olive }; /* Allocate and partition buffers */ void gemm_alloc_data() { starpu_malloc_flags((void **)&A, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&B, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_malloc_flags((void **)&C, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); partition_mult_data(); } /* Submit tasks to initialize matrices: fill them with zeros or random numbers */ int gemm_init_data() { #ifndef STARPU_SIMGRID int ret; unsigned x, y; for (x = 0; x < nslices; x++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl_init_matrix_random; task->handles[0] = starpu_data_get_sub_data(A_handle, 1, x); task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x); ret = starpu_task_submit(task); CHECK_TASK_SUBMIT(ret); for (y = 0; y < nslices; y++) { task = starpu_task_create(); task->cl = &cl_init_matrix_zero; task->handles[0] = starpu_data_get_sub_data(C_handle, 2, x, y); ret = starpu_task_submit(task); CHECK_TASK_SUBMIT(ret); } } #endif return 0; } /* Submit tasks to compute the GEMM */ int gemm_submit_tasks() { return gemm_submit_tasks_with_tags(/* by default, disable task tags */ 0); } int gemm_submit_tasks_with_tags(int with_tags) { int ret; unsigned x, y; starpu_tag_t task_tag = 0; for (x = 0; x < nslices; x++) for (y = 0; y < nslices; y++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = starpu_data_get_sub_data(A_handle, 1, y); task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x); task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y); task->flops = 2ULL * (matrix_dim/nslices) * (matrix_dim/nslices) * matrix_dim; if (with_tags) { task->use_tag = 1; task->tag_id = ++task_tag; } ret = starpu_task_submit(task); CHECK_TASK_SUBMIT(ret); starpu_data_wont_use(starpu_data_get_sub_data(C_handle, 2, x, y)); } return 0; } /* Add dependencies between GEMM tasks to see the impact of polling workers which will at the end get a task. * The new dependency graph has the following shape: * - the same number of GEMMs as the number of workers are executed in parallel on all workers ("a column of tasks") * - then a GEMM waits all tasks of the previous column of tasks, and is executed on a worker * - the next column of tasks waits for the previous GEMM * - and so on... * * worker 0 | 1 | 4 | 5 | 8 | 9 | * worker 1 | 2 | | 6 | | 10 | ... * worker 2 | 3 | | 7 | | 11 | * * This function has to be called before gemm_submit_tasks_with_tags(1). */ void gemm_add_polling_dependencies() { starpu_tag_t nb_tasks = (starpu_tag_t) nslices * (starpu_tag_t) nslices; unsigned nb_workers = starpu_worker_get_count(); starpu_tag_t synchro_tag; starpu_tag_t previous_tag; starpu_tag_t next_tag; for (synchro_tag = nb_workers+1; synchro_tag <= nb_tasks; synchro_tag += (nb_workers+1)) { // this synchro tag depends on tasks of previous column of tasks: for (previous_tag = synchro_tag - nb_workers; previous_tag < synchro_tag; previous_tag++) { starpu_tag_declare_deps(synchro_tag, 1, previous_tag); } // tasks of the next column of tasks depend on this synchro tag: // this actually allows workers to poll for new tasks, while no task is available for (next_tag = synchro_tag+1; next_tag < (synchro_tag + nb_workers + 1) && next_tag <= nb_tasks; next_tag++) { starpu_tag_declare_deps(next_tag, 1, synchro_tag); } } } void gemm_release() { starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); starpu_data_unregister(C_handle); if (check) check_output(); starpu_free_flags(A, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_free_flags(B, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); starpu_free_flags(C, matrix_dim*matrix_dim*sizeof(TYPE), STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED); } starpu-1.4.9+dfsg/mpi/examples/benchs/gemm_helper.h000066400000000000000000000021261507764646700223130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MPI_TESTS_GEMM_HELPER__ #define __MPI_TESTS_GEMM_HELPER__ #include extern unsigned nslices; extern unsigned matrix_dim; extern unsigned check; extern int comm_thread_cpuid; void gemm_alloc_data(); int gemm_init_data(); int gemm_submit_tasks(); void gemm_release(); void gemm_add_polling_dependencies(); int gemm_submit_tasks_with_tags(int with_tags); #endif /* __MPI_TESTS_GEMM_HELPER__ */ starpu-1.4.9+dfsg/mpi/examples/benchs/recv_wait_finalize_bench.c000066400000000000000000000206051507764646700250270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This benchmark measures the impact of the STARPU_MPI_RECV_FINALIZE env var: * when set to 0, StarPU can use received buffers for task only reading these * buffers, while the communication library still holds a reference on this * buffer (to continue the tree broadcast, for instance). * Inspired a lot from NewMadeleine examples/mcast/nm_mcast_prio.c * * Synchronized clocks (mpi_sync_clocks) are available here: * https://gitlab.inria.fr/pm2/pm2/-/tree/master/mpi_sync_clocks * and are detected during StarPU's configure. */ #include #include #include "helper.h" #define SERVER_PRINTF(fmt, ...) do { if(rank == 0) { printf(fmt, ## __VA_ARGS__); fflush(stdout); }} while(0) #define DEFAULT_ARRAY_SIZE 1 #ifdef STARPU_QUICK_CHECK #define DEFAULT_ROUND 5 #else #define DEFAULT_ROUND 200 #endif static starpu_data_handle_t data_handle; static int rank; static double received_time, finalized_time; static mpi_sync_clocks_t clocks; static int* prios; // Codelet executed just to block start of the broadcast and be sure the broadcast will be correctly detected: static void trigger_coop_cpu_func(void *descr[], void *args) { (void) descr; (void) args; } static struct starpu_codelet trigger_coop_cl = { .cpu_funcs = { trigger_coop_cpu_func }, .cpu_funcs_name = { "trigger_coop_task" }, .name = "trigger_coop", .nbuffers = 1, .modes = { STARPU_W } }; // Codelet executed when data just arrived, but communication library has still a reference on it static void received_cpu_func(void *descr[], void *args) { (void) descr; (void) args; received_time = mpi_sync_clocks_get_time_usec(clocks); } static struct starpu_codelet received_cl = { .cpu_funcs = { received_cpu_func }, .cpu_funcs_name = { "received_task" }, .name = "received", .nbuffers = 1, .modes = { STARPU_R } }; // Codelet executed when data is released by communication library static void finalized_cpu_func(void *descr[], void *args) { (void) descr; (void) args; finalized_time = mpi_sync_clocks_get_time_usec(clocks); } static struct starpu_codelet finalized_cl = { .cpu_funcs = { finalized_cpu_func }, .cpu_funcs_name = { "finalized_task" }, .name = "finalized", .nbuffers = 1, .modes = { STARPU_W } }; static void usage(void) { fprintf(stderr, "-s array size - number of bytes to broadcast [%d]\n", DEFAULT_ARRAY_SIZE); fprintf(stderr, "-rounds rounds - number of iterations [%d]\n", DEFAULT_ROUND); } static void bcast(int nb_dests, double* time_to_receive, double* time_to_finalize) { int i = 0; starpu_mpi_data_set_rank(data_handle, 0); /* This first task is just to retain communications, and be sure they * will be detected as a broadcast, if there are enough nodes. */ starpu_mpi_task_insert(MPI_COMM_WORLD, &trigger_coop_cl, STARPU_W, data_handle, 0); for (i = 1; i < nb_dests; i++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &received_cl, STARPU_R, data_handle, STARPU_EXECUTE_ON_NODE, i, STARPU_PRIORITY, prios[i-1], 0); } for (i = 1; i < nb_dests; i++) { /* Little bit hacky here: we change the owner of the handle to * be the node on which we are just about to submit a task to * be executed on that node, with this handle. This is done to * avoid additional communications we don't want in this bench. * In real applications, the coherency of the data will * probably be broken, but for this bench we don't care. */ starpu_mpi_data_set_rank(data_handle, i); starpu_mpi_task_insert(MPI_COMM_WORLD, &finalized_cl, STARPU_W, data_handle, 0); } mpi_sync_clocks_barrier(clocks, NULL); const double t_begin = mpi_sync_clocks_get_time_usec(clocks); /* Resume StarPU's workers only after submitting tasks, to make * sure the coop will be correctly detected. */ starpu_resume(); starpu_task_wait_for_all(); starpu_pause(); *time_to_receive = received_time - t_begin; *time_to_finalize = finalized_time - t_begin; } int main(int argc, char**argv) { int i, ret, worldsize, rounds = DEFAULT_ROUND, thread_support; long long int s = DEFAULT_ARRAY_SIZE; double time_to_receive, time_to_finalize; double total_time_to_receive = 0.0, total_time_to_finalize = 0.0; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-s") == 0) { s = (int long long) atoi(argv[++i]); continue; } if (strcmp(argv[i], "-rounds") == 0) { rounds = atoi(argv[++i]); continue; } else { fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); usage(); exit(1); } } if (rounds <= 0) { FPRINTF(stderr, "The number of iterations has to be greater than 0.\n"); return EXIT_FAILURE; } if (MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &thread_support) != MPI_SUCCESS) { FPRINTF(stderr, "MPI_Init_thread failed\n"); return EXIT_FAILURE; } if (thread_support < MPI_THREAD_MULTIPLE) { /* We need MPI_THREAD_MULTIPLE for the StarPU's MPI thread and * the main thread calling functions from mpi_sync_clocks. */ FPRINTF(stderr, "This benchmark requires MPI_THREAD_MULTIPLE support.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); if (worldsize < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } /* Pause workers for this bench, to avoid any impact on performances * from polling workers, and to detect correctly coop */ starpu_pause(); starpu_mpi_barrier(MPI_COMM_WORLD); SERVER_PRINTF("# message size = %lld B\n", s); SERVER_PRINTF("# iterations = %d\n", rounds); SERVER_PRINTF("# coop = %s\n", starpu_mpi_coop_sends_get_use() ? "on" : "off"); SERVER_PRINTF("# node ; prio ; delay data (usec.); finalized (usec.)\n"); clocks = mpi_sync_clocks_init(MPI_COMM_WORLD); prios = malloc((worldsize-1) * sizeof(int)); for (i = 0; i < worldsize-1; i++) { prios[i] = i; } char* buffer = malloc(s); memset(buffer, 0, s); /* To keep the same buffer and get good performances with rcache, we * provide the buffer for sender and receivers. If we let StarPU manage * the buffer, it can change it between iterations. * The original owner of the data (the sender) is defined with * starpu_mpi_data_set_rank() in bcast(). */ starpu_vector_data_register(&data_handle, STARPU_MAIN_RAM, (uintptr_t) buffer, s, sizeof(char)); starpu_mpi_data_set_tag(data_handle, 0xee); for (i = 0; i < rounds; i++) { bcast(worldsize, &time_to_receive, &time_to_finalize); total_time_to_receive += time_to_receive; total_time_to_finalize += time_to_finalize; } total_time_to_receive /= rounds; total_time_to_finalize /= rounds; if (rank == 0) { double* totals_time_to_receive = malloc(sizeof(double) * worldsize); double* totals_time_to_finalize = malloc(sizeof(double) * worldsize); MPI_Gather(&total_time_to_receive, 1, MPI_DOUBLE, totals_time_to_receive, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(&total_time_to_finalize, 1, MPI_DOUBLE, totals_time_to_finalize, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); for (i = 1; i < worldsize; i++) { printf("%d \t %d \t %g \t %g\n", i, prios[i-1], totals_time_to_receive[i], totals_time_to_finalize[i]); } free(totals_time_to_receive); free(totals_time_to_finalize); } else { MPI_Gather(&total_time_to_receive, 1, MPI_DOUBLE, NULL, 0, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(&total_time_to_finalize, 1, MPI_DOUBLE, NULL, 0, MPI_DOUBLE, 0, MPI_COMM_WORLD); } starpu_data_unregister(data_handle); free(buffer); free(prios); mpi_sync_clocks_shutdown(clocks); SERVER_PRINTF("# bench end\n"); starpu_resume(); starpu_mpi_shutdown(); MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/examples/benchs/sendrecv_bench.c000066400000000000000000000063111507764646700227720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Basic send receive benchmark. * Inspired a lot from NewMadeleine examples/benchmarks/nm_bench_sendrecv.c * * The option --bidir is available to do full-duplex communications. */ #include #include "helper.h" #include "abstract_sendrecv_bench.h" static inline void man() { fprintf(stderr, "Options:\n"); fprintf(stderr, "\t-h --help display this help\n"); fprintf(stderr, "\t-p pause workers during benchmark\n"); fprintf(stderr, "\t--bidir full-duplex communications\n"); fprintf(stderr, "\t--memnode-cuda allocate message buffers on first cuda device\n"); exit(EXIT_SUCCESS); } int main(int argc, char **argv) { int ret, rank, worldsize; int pause_workers = 0; int i; int bidir = 0; int mem_node = STARPU_MAIN_RAM; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-p") == 0) { pause_workers = 1; printf("Workers will be paused during benchmark.\n"); } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { man(); } else if (strcmp(argv[i], "--bidir") == 0) { bidir = 1; printf("Communications will be full-duplex.\n"); } else if (strcmp(argv[i], "--memnode-cuda") == 0) { int worker_id = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0); if(worker_id == -1) { fprintf(stderr,"Error: asked for CUDA memory node allocation, but no cuda worker found.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } else { mem_node = starpu_worker_get_memory_node(worker_id); fprintf(stderr,"Memory will be allocated on the first CUDA worker.\n"); } } else { fprintf(stderr,"Unrecognized option %s\n", argv[i]); man(); } } starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); if (worldsize < 2) { if (rank == 0) FPRINTF(stderr, "We need 2 processes.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } #if !defined(STARPU_LONG_CHECK) if (rank == 0) { printf("To have a more precise benchmark, configure StarPU with --enable-long-check\n"); } #endif if (pause_workers) { /* Pause workers for this bench: all workers polling for tasks has a strong impact on performances */ starpu_pause(); } ret = sendrecv_bench(rank, NULL, bidir, mem_node); if (ret == -ENODEV) { fprintf(stderr, "No device available\n"); } if (pause_workers) { starpu_resume(); } starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/benchs/sendrecv_gemm_bench.c000066400000000000000000000122611507764646700240000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Simple *not distributed* parallel GEMM implementation and sendrecv bench at the same time. * * This bench is a merge of mpi/tests/sendrecv_bench and examples/mult/sgemm * * A *non-distributed* GEMM is computed on each node, while a sendrecv bench is running, * completely independently. The goal is to measure the impact of worker computations on * communications. * * Use the -nblocks parameter to define the matrix size (matrix size = nblocks * 320), such as * the GEMM finishes after the sendrecv bench. */ #include #include #include #include #include #include #include "helper.h" #include "abstract_sendrecv_bench.h" #include "gemm_helper.h" static int mpi_rank; static starpu_pthread_barrier_t thread_barrier; static void* comm_thread_func(void* arg) { if (comm_thread_cpuid < 0) { comm_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); } if (starpu_bind_thread_on(comm_thread_cpuid, 0, "Comm") < 0) { char hostname[65]; gethostname(hostname, sizeof(hostname)); fprintf(stderr, "[%s] No core was available for the comm thread. You should increase STARPU_RESERVE_NCPU or decrease STARPU_NCPU\n", hostname); } int ret = sendrecv_bench(mpi_rank, &thread_barrier, /* half-duplex communications */ 0, /* allocate MPI buffers on CPU */ STARPU_MAIN_RAM); if (ret == -ENODEV) { fprintf(stderr, "No device available\n"); } return NULL; } void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nslices = strtol(argv[++i], &argptr, 10); matrix_dim = 320 * nslices; } else if (strcmp(argv[i], "-size") == 0) { char *argptr; unsigned matrix_dim_tmp = strtol(argv[++i], &argptr, 10); if (matrix_dim_tmp % 320 != 0) { fprintf(stderr, "Matrix size has to be a multiple of 320\n"); } else { matrix_dim = matrix_dim_tmp; nslices = matrix_dim / 320; } } else if (strcmp(argv[i], "-check") == 0) { check = 1; } else if (strcmp(argv[i], "-comm-thread-cpuid") == 0) { comm_thread_cpuid = atoi(argv[++i]); } else if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0) { fprintf(stderr,"Usage: %s [-nblocks n] [-size size] [-check] [-comm-thread-cpuid cpuid]\n", argv[0]); fprintf(stderr,"Currently selected: matrix size: %u - %u blocks\n", matrix_dim, nslices); fprintf(stderr, "Use -comm-thread-cpuid to specify where to bind the comm benchmarking thread\n"); exit(EXIT_SUCCESS); } else { fprintf(stderr,"Unrecognized option %s\n", argv[i]); exit(EXIT_FAILURE); } } } int main(int argc, char **argv) { double start, end; int ret, worldsize; starpu_pthread_t comm_thread; char hostname[255]; gethostname(hostname, 255); parse_args(argc, argv); starpu_fxt_autostart_profiling(0); ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); if (worldsize < 2) { if (mpi_rank == 0) FPRINTF(stderr, "We need 2 processes.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } STARPU_PTHREAD_BARRIER_INIT(&thread_barrier, NULL, 2); // Start comm thread, benchmarking sendrecv: STARPU_PTHREAD_CREATE(&comm_thread, NULL, comm_thread_func, NULL); // Main thread will submit GEMM tasks: gemm_alloc_data(); if (mpi_rank == 0) { printf("# node\tx\ty\tz\tms\tGFlops\n"); } starpu_pause(); if(gemm_init_data() == -ENODEV || gemm_submit_tasks() == -ENODEV) { starpu_mpi_barrier(MPI_COMM_WORLD); STARPU_PTHREAD_BARRIER_WAIT(&thread_barrier); ret = 77; goto enodev; } starpu_mpi_barrier(MPI_COMM_WORLD); starpu_fxt_start_profiling(); STARPU_PTHREAD_BARRIER_WAIT(&thread_barrier); start = starpu_timing_now(); starpu_resume(); starpu_task_wait_for_all(); end = starpu_timing_now(); double timing = end - start; double flops = 2.0*((unsigned long long)matrix_dim) * ((unsigned long long)matrix_dim)*((unsigned long long)matrix_dim); printf("%s\t%u\t%u\t%u\t%.0f\t%.1f\n", hostname, matrix_dim, matrix_dim, matrix_dim, timing/1000.0, flops/timing/1000.0); enodev: gemm_release(); // Wait comm thread: STARPU_PTHREAD_JOIN(comm_thread, NULL); STARPU_PTHREAD_BARRIER_DESTROY(&thread_barrier); starpu_fxt_stop_profiling(); if (ret) starpu_resume(); starpu_mpi_shutdown(); return ret; } starpu-1.4.9+dfsg/mpi/examples/benchs/sendrecv_parallel_tasks_bench.c000066400000000000000000000151431507764646700260560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * sendrecv benchmark from different tasks, executed simultaneously on several * workers. * Inspired a lot from NewMadeleine examples/piom/nm_piom_pingpong.c * * The goal is to measure impact of calls to starpu_mpi_* from different threads. * * Use STARPU_NCPU to set the number of parallel ping pongs * * * Note: This currently can not work with the MPI backend with more than 1 CPU, * since with big sizes, the MPI_Wait call in the MPI thread may block waiting * for the peer to call MPI_Recv+Wait, and there is no guarantee that the peer * will call MPI_Recv+Wait for the same data since tasks can proceed in any * order. */ #include #include "helper.h" #include "bench_helper.h" #define NB_WARMUP_PINGPONGS 10 /* We reduce NX_MAX, since some NICs don't support exchanging simultaneously such amount of memory */ #undef NX_MAX #ifdef STARPU_QUICK_CHECK #define NX_MAX (1024) #else #define NX_MAX (64 * 1024 * 1024) #endif void cpu_task(void* descr[], void* args) { int mpi_rank; uint64_t iterations = #ifdef STARPU_QUICK_CHECK 10; #else LOOPS_DEFAULT / 100; #endif uint64_t s; starpu_data_handle_t handle_send, handle_recv; double t1, t2; int asked_worker; int current_worker = starpu_worker_get_id(); uint64_t j; uint64_t k; int ret; starpu_codelet_unpack_args(args, &mpi_rank, &asked_worker, &s, &handle_send, &handle_recv); STARPU_ASSERT(asked_worker == current_worker); iterations = bench_nb_iterations(iterations, s); double* lats = malloc(sizeof(double) * iterations); for (j = 0; j < NB_WARMUP_PINGPONGS; j++) { if (mpi_rank == 0) { ret = starpu_mpi_send(handle_send, 1, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_recv(handle_recv, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } else { ret = starpu_mpi_recv(handle_recv, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); ret = starpu_mpi_send(handle_send, 0, 1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } } for (j = 0; j < iterations; j++) { if (mpi_rank == 0) { t1 = starpu_timing_now(); ret = starpu_mpi_send(handle_send, 1, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_recv(handle_recv, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); t2 = starpu_timing_now(); lats[j] = (t2 - t1) / 2; } else { ret = starpu_mpi_recv(handle_recv, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); ret = starpu_mpi_send(handle_send, 0, 1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } } if (mpi_rank == 0) { qsort(lats, iterations, sizeof(double), &comp_double); const double min_lat = lats[0]; const double max_lat = lats[iterations - 1]; const double med_lat = lats[(iterations - 1) / 2]; const double d1_lat = lats[(iterations - 1) / 10]; const double d9_lat = lats[9 * (iterations - 1) / 10]; double avg_lat = 0.0; for(k = 0; k < iterations; k++) { avg_lat += lats[k]; } avg_lat /= iterations; const double bw_million_byte = s / min_lat; const double bw_mbyte = bw_million_byte / 1.048576; printf("%2d\t\t%9lld\t%9.3lf\t%9.3f\t%9.3f\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\n", current_worker, (long long) s, min_lat, bw_million_byte, bw_mbyte, d1_lat, med_lat, avg_lat, d9_lat, max_lat); fflush(stdout); } free(lats); } static struct starpu_codelet cl = { .cpu_funcs = { cpu_task }, .cpu_funcs_name = { "cpu_task" }, .nbuffers = 0 }; int main(int argc, char **argv) { int ret, rank, worldsize; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); if (worldsize < 2) { if (rank == 0) FPRINTF(stderr, "We need 2 processes.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } if (rank == 0) { printf("Times in us\n"); printf("# worker | size (Bytes)\t| latency \t| 10^6 B/s \t| MB/s \t| d1 \t|median \t| avg \t| d9 \t| max\n"); } else if (rank >= 2) { starpu_mpi_shutdown(); return 0; } unsigned cpu_count = starpu_cpu_worker_get_count(); uint64_t s; unsigned i; int* workers = malloc(cpu_count * sizeof(int)); float** vectors_send = malloc(cpu_count * sizeof(float*)); float** vectors_recv = malloc(cpu_count * sizeof(float*)); starpu_data_handle_t* handles_send = malloc(cpu_count * sizeof(starpu_data_handle_t)); starpu_data_handle_t* handles_recv = malloc(cpu_count * sizeof(starpu_data_handle_t)); for (s = NX_MIN; s <= NX_MAX; s = bench_next_size(s)) { starpu_pause(); for (i = 0; i < cpu_count; i++) { workers[i] = i; vectors_send[i] = malloc(s); vectors_recv[i] = malloc(s); memset(vectors_send[i], 0, s); memset(vectors_recv[i], 0, s); starpu_vector_data_register(&handles_send[i], STARPU_MAIN_RAM, (uintptr_t) vectors_send[i], s, 1); starpu_vector_data_register(&handles_recv[i], STARPU_MAIN_RAM, (uintptr_t) vectors_recv[i], s, 1); ret = starpu_task_insert(&cl, STARPU_EXECUTE_ON_WORKER, workers[i], STARPU_VALUE, &rank, sizeof(int), STARPU_VALUE, workers + i, sizeof(int), STARPU_VALUE, &s, sizeof(uint64_t), STARPU_VALUE, &handles_send[i], sizeof(starpu_data_handle_t), STARPU_VALUE, &handles_recv[i], sizeof(starpu_data_handle_t), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_resume(); starpu_task_wait_for_all(); for (i = 0; i < cpu_count; i++) { starpu_data_unregister(handles_send[i]); starpu_data_unregister(handles_recv[i]); free(vectors_send[i]); free(vectors_recv[i]); } } free(workers); free(vectors_send); free(vectors_recv); free(handles_send); free(handles_recv); starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/cache/000077500000000000000000000000001507764646700174565ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/cache/cache.c000066400000000000000000000057071507764646700206760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)descr; (void)_args; } struct starpu_codelet mycodelet_r = { .cpu_funcs = {func_cpu}, .nbuffers = 1, .modes = {STARPU_R}, .model = &starpu_perfmodel_nop, }; struct starpu_codelet mycodelet_w = { .cpu_funcs = {func_cpu}, .nbuffers = 1, .modes = {STARPU_W}, .model = &starpu_perfmodel_nop, }; struct starpu_codelet mycodelet_rw = { .cpu_funcs = {func_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; void test(struct starpu_codelet *codelet, enum starpu_data_access_mode mode, starpu_data_handle_t data, int rank, int in_cache) { int cache; int ret; ret = starpu_mpi_task_insert(MPI_COMM_WORLD, codelet, mode, data, STARPU_EXECUTE_ON_NODE, 1, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); cache = starpu_mpi_cached_receive(data); if (rank == 1) { if (in_cache) { STARPU_ASSERT_MSG(cache == 1, "Data should be in cache\n"); } else { STARPU_ASSERT_MSG(cache == 0, "Data should NOT be in cache\n"); } } } int main(int argc, char **argv) { int rank; int ret; unsigned val = 42; starpu_data_handle_t data; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); if (starpu_mpi_cache_is_enabled() == 0) goto skip; if (rank == 0) starpu_variable_data_register(&data, STARPU_MAIN_RAM, (uintptr_t)&val, sizeof(unsigned)); else starpu_variable_data_register(&data, -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data, 42, 0); FPRINTF_MPI(stderr, "Registering data %p with tag %d and node %d\n", data, 42, 0); // We use the same data with different access modes and we check if it is // available or not in the cache test(&mycodelet_r, STARPU_R, data, rank, 1); test(&mycodelet_rw, STARPU_RW, data, rank, 0); test(&mycodelet_r, STARPU_R, data, rank, 1); test(&mycodelet_r, STARPU_R, data, rank, 1); test(&mycodelet_w, STARPU_W, data, rank, 0); FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data); skip: starpu_mpi_shutdown(); return starpu_mpi_cache_is_enabled() == 0 ? STARPU_TEST_SKIPPED : 0; } starpu-1.4.9+dfsg/mpi/examples/cache/cache_disable.c000066400000000000000000000052371507764646700223570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)descr; (void)_args; } struct starpu_codelet mycodelet_r = { .cpu_funcs = {func_cpu}, .nbuffers = 1, .modes = {STARPU_R}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int rank; int ret; unsigned *val; starpu_data_handle_t data; int in_cache; int cache; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); cache = starpu_mpi_cache_is_enabled(); if (cache == 0) goto skip; val = malloc(sizeof(*val)); *val = 12; if (rank == 0) starpu_variable_data_register(&data, STARPU_MAIN_RAM, (uintptr_t)val, sizeof(unsigned)); else starpu_variable_data_register(&data, -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data, 42, 0); FPRINTF_MPI(stderr, "Registering data %p with tag %d and node %d\n", data, 42, 0); ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); in_cache = starpu_mpi_cached_receive(data); if (rank == 1) { STARPU_ASSERT_MSG(in_cache == 1, "Data should be in cache\n"); } // We clean the cache starpu_mpi_cache_set(0); // We check the data is no longer in the cache in_cache = starpu_mpi_cached_receive(data); if (rank == 1) { STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n"); } ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r, STARPU_R, data, STARPU_EXECUTE_ON_NODE, 1, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); in_cache = starpu_mpi_cached_receive(data); if (rank == 1) { STARPU_ASSERT_MSG(in_cache == 0, "Data should NOT be in cache\n"); } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data); free(val); skip: starpu_mpi_shutdown(); return cache == 0 ? STARPU_TEST_SKIPPED : 0; } starpu-1.4.9+dfsg/mpi/examples/cg/000077500000000000000000000000001507764646700170045ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/cg/cg.c000066400000000000000000000265131507764646700175500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include /* * Distributed version of Conjugate Gradient implemented in examples/cg/cg.c * * Use -display-result option and compare with the non-distributed version: the * x vector should be the same. */ #include "../../../examples/cg/cg.h" static int copy_handle(starpu_data_handle_t* dst, starpu_data_handle_t* src, unsigned nblocks); #define HANDLE_TYPE_VECTOR starpu_data_handle_t* #define HANDLE_TYPE_MATRIX starpu_data_handle_t** #define TASK_INSERT(cl, ...) starpu_mpi_task_insert(MPI_COMM_WORLD, cl, ##__VA_ARGS__) #define GET_VECTOR_BLOCK(v, i) v[i] #define GET_MATRIX_BLOCK(m, i, j) m[i][j] #define BARRIER() starpu_mpi_barrier(MPI_COMM_WORLD); #define GET_DATA_HANDLE(handle) starpu_mpi_get_data_on_all_nodes_detached(MPI_COMM_WORLD, handle) static unsigned block_size; static int rank; static int nodes_p = 2; static int nodes_q; static TYPE ***A; static TYPE **x; static TYPE **b; static TYPE **r; static TYPE **d; static TYPE **q; #define FPRINTF_SERVER(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT") && rank == 0) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #include "../../../examples/cg/cg_kernels.c" static int my_distrib(const int yy, const int xx) { return (yy%nodes_q)*nodes_p + (xx%nodes_p); } static int copy_handle(starpu_data_handle_t* dst, starpu_data_handle_t* src, unsigned nb) { unsigned block; for (block = 0; block < nb; block++) { if (rank == my_distrib(block, 0)) { starpu_data_cpy(dst[block], src[block], /* asynchronous */ 1, /* without callback */ NULL, NULL); } } return 0; } /* * Generate Input data */ static void generate_random_problem(void) { unsigned ii, jj, j, i; int mpi_rank; A = malloc(nblocks * sizeof(TYPE **)); x = malloc(nblocks * sizeof(TYPE *)); b = malloc(nblocks * sizeof(TYPE *)); r = malloc(nblocks * sizeof(TYPE *)); d = malloc(nblocks * sizeof(TYPE *)); q = malloc(nblocks * sizeof(TYPE *)); for (j = 0; j < nblocks; j++) { A[j] = malloc(nblocks * sizeof(TYPE*)); mpi_rank = my_distrib(j, 0); if (mpi_rank == rank || display_result) { starpu_malloc((void**) &x[j], block_size*sizeof(TYPE)); } if (mpi_rank == rank) { starpu_malloc((void**) &b[j], block_size*sizeof(TYPE)); starpu_malloc((void**) &r[j], block_size*sizeof(TYPE)); starpu_malloc((void**) &d[j], block_size*sizeof(TYPE)); starpu_malloc((void**) &q[j], block_size*sizeof(TYPE)); for (jj = 0; jj < block_size; jj++) { x[j][jj] = (TYPE) 0.0; b[j][jj] = (TYPE) 1.0; r[j][jj] = (TYPE) 0.0; d[j][jj] = (TYPE) 0.0; q[j][jj] = (TYPE) 0.0; } } for (i = 0; i < nblocks; i++) { mpi_rank = my_distrib(j, i); if (mpi_rank == rank) { starpu_malloc((void**) &A[j][i], block_size*block_size*sizeof(TYPE)); for (ii = 0; ii < block_size; ii++) { for (jj = 0; jj < block_size; jj++) { /* We take Hilbert matrix that is not well conditioned but definite positive: H(i,j) = 1/(1+i+j) */ A[j][i][jj + ii*block_size] = (TYPE) (1.0/(1.0+(ii+(j*block_size)+jj+(i*block_size)))); } } } } } } static void free_data(void) { unsigned j, i; int mpi_rank; for (j = 0; j < nblocks; j++) { mpi_rank = my_distrib(j, 0); if (mpi_rank == rank || display_result) { starpu_free_noflag((void*) x[j], block_size*sizeof(TYPE)); } if (mpi_rank == rank) { starpu_free_noflag((void*) b[j], block_size*sizeof(TYPE)); starpu_free_noflag((void*) r[j], block_size*sizeof(TYPE)); starpu_free_noflag((void*) d[j], block_size*sizeof(TYPE)); starpu_free_noflag((void*) q[j], block_size*sizeof(TYPE)); } for (i = 0; i < nblocks; i++) { mpi_rank = my_distrib(j, i); if (mpi_rank == rank) { starpu_free_noflag((void*) A[j][i], block_size*block_size*sizeof(TYPE)); } } free(A[j]); } free(A); free(x); free(b); free(r); free(d); free(q); } static void register_data(void) { unsigned j, i; int mpi_rank; starpu_mpi_tag_t mpi_tag = 0; A_handle = malloc(nblocks*sizeof(starpu_data_handle_t*)); x_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); b_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); r_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); d_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); q_handle = malloc(nblocks*sizeof(starpu_data_handle_t)); for (j = 0; j < nblocks; j++) { mpi_rank = my_distrib(j, 0); A_handle[j] = malloc(nblocks*sizeof(starpu_data_handle_t)); if (mpi_rank == rank || display_result) { starpu_vector_data_register(&x_handle[j], STARPU_MAIN_RAM, (uintptr_t) x[j], block_size, sizeof(TYPE)); } else if (!display_result) { assert(mpi_rank != rank); starpu_vector_data_register(&x_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); } if (mpi_rank == rank) { starpu_vector_data_register(&b_handle[j], STARPU_MAIN_RAM, (uintptr_t) b[j], block_size, sizeof(TYPE)); starpu_vector_data_register(&r_handle[j], STARPU_MAIN_RAM, (uintptr_t) r[j], block_size, sizeof(TYPE)); starpu_vector_data_register(&d_handle[j], STARPU_MAIN_RAM, (uintptr_t) d[j], block_size, sizeof(TYPE)); starpu_vector_data_register(&q_handle[j], STARPU_MAIN_RAM, (uintptr_t) q[j], block_size, sizeof(TYPE)); } else { starpu_vector_data_register(&b_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); starpu_vector_data_register(&r_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); starpu_vector_data_register(&d_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); starpu_vector_data_register(&q_handle[j], -1, (uintptr_t) NULL, block_size, sizeof(TYPE)); } starpu_data_set_coordinates(x_handle[j], 1, j); starpu_mpi_data_register(x_handle[j], ++mpi_tag, mpi_rank); starpu_data_set_coordinates(b_handle[j], 1, j); starpu_mpi_data_register(b_handle[j], ++mpi_tag, mpi_rank); starpu_data_set_coordinates(r_handle[j], 1, j); starpu_mpi_data_register(r_handle[j], ++mpi_tag, mpi_rank); starpu_data_set_coordinates(d_handle[j], 1, j); starpu_mpi_data_register(d_handle[j], ++mpi_tag, mpi_rank); starpu_data_set_coordinates(q_handle[j], 1, j); starpu_mpi_data_register(q_handle[j], ++mpi_tag, mpi_rank); if (use_reduction) { starpu_data_set_reduction_methods(q_handle[j], &accumulate_vector_cl, &bzero_vector_cl); starpu_data_set_reduction_methods(r_handle[j], &accumulate_vector_cl, &bzero_vector_cl); } for (i = 0; i < nblocks; i++) { mpi_rank = my_distrib(j, i); if (mpi_rank == rank) { starpu_matrix_data_register(&A_handle[j][i], STARPU_MAIN_RAM, (uintptr_t) A[j][i], block_size, block_size, block_size, sizeof(TYPE)); } else { starpu_matrix_data_register(&A_handle[j][i], -1, (uintptr_t) NULL, block_size, block_size, block_size, sizeof(TYPE)); } starpu_data_set_coordinates(A_handle[j][i], 2, i, j); starpu_mpi_data_register(A_handle[j][i], ++mpi_tag, mpi_rank); } } starpu_variable_data_register(&dtq_handle, STARPU_MAIN_RAM, (uintptr_t)&dtq, sizeof(TYPE)); starpu_variable_data_register(&rtr_handle, STARPU_MAIN_RAM, (uintptr_t)&rtr, sizeof(TYPE)); starpu_mpi_data_register(rtr_handle, ++mpi_tag, 0); starpu_mpi_data_register(dtq_handle, ++mpi_tag, 0); if (use_reduction) { starpu_data_set_reduction_methods(dtq_handle, &accumulate_variable_cl, &bzero_variable_cl); starpu_data_set_reduction_methods(rtr_handle, &accumulate_variable_cl, &bzero_variable_cl); } } static void unregister_data(void) { unsigned j, i; for (j = 0; j < nblocks; j++) { starpu_data_unregister(x_handle[j]); starpu_data_unregister(b_handle[j]); starpu_data_unregister(r_handle[j]); starpu_data_unregister(d_handle[j]); starpu_data_unregister(q_handle[j]); for (i = 0; i < nblocks; i++) { starpu_data_unregister(A_handle[j][i]); } free(A_handle[j]); } starpu_data_unregister(dtq_handle); starpu_data_unregister(rtr_handle); free(A_handle); free(x_handle); free(b_handle); free(r_handle); free(d_handle); free(q_handle); } static void display_x_result(void) { unsigned j, i; for (j = 0; j < nblocks; j++) { starpu_mpi_get_data_on_node(MPI_COMM_WORLD, x_handle[j], 0); } if (rank == 0) { FPRINTF_SERVER(stderr, "Computed X vector:\n"); for (j = 0; j < nblocks; j++) { starpu_data_acquire(x_handle[j], STARPU_R); for (i = 0; i < block_size; i++) { FPRINTF(stderr, "% 02.2e\n", x[j][i]); } starpu_data_release(x_handle[j]); } } } static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-p") == 0) { nodes_p = atoi(argv[++i]); continue; } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0) { FPRINTF_SERVER(stderr, "usage: %s [-h] [-nblocks #blocks] [-display-result] [-p node_grid_width] [-n problem_size] [-no-reduction] [-maxiter i]\n", argv[0]); exit(-1); } } parse_common_args(argc, argv); } int main(int argc, char **argv) { int worldsize, ret; double start, end; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return 77; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); parse_args(argc, argv); if (worldsize % nodes_p != 0) { FPRINTF_SERVER(stderr, "Node grid (%d) width must divide the number of nodes (%d).\n", nodes_p, worldsize); starpu_mpi_shutdown(); return 1; } nodes_q = worldsize / nodes_p; if (n % nblocks != 0) { FPRINTF_SERVER(stderr, "The number of blocks (%u) must divide the matrix size (%lld).\n", nblocks, n); starpu_mpi_shutdown(); return 1; } block_size = n / nblocks; starpu_cublas_init(); FPRINTF_SERVER(stderr, "************** PARAMETERS ***************\n"); FPRINTF_SERVER(stderr, "%d nodes (%dx%d)\n", worldsize, nodes_p, nodes_q); FPRINTF_SERVER(stderr, "Problem size (-n): %lld\n", n); FPRINTF_SERVER(stderr, "Maximum number of iterations (-maxiter): %d\n", i_max); FPRINTF_SERVER(stderr, "Number of blocks (-nblocks): %u\n", nblocks); FPRINTF_SERVER(stderr, "Reduction (-no-reduction): %s\n", use_reduction ? "enabled" : "disabled"); starpu_mpi_barrier(MPI_COMM_WORLD); start = starpu_timing_now(); generate_random_problem(); register_data(); starpu_mpi_barrier(MPI_COMM_WORLD); end = starpu_timing_now(); FPRINTF_SERVER(stderr, "Problem initialization timing : %2.2f seconds\n", (end-start)/1e6); ret = cg(); if (ret == -ENODEV) { ret = 77; goto enodev; } starpu_task_wait_for_all(); if (display_result) { display_x_result(); } enodev: unregister_data(); free_data(); starpu_cublas_shutdown(); starpu_mpi_shutdown(); return ret; } starpu-1.4.9+dfsg/mpi/examples/comm/000077500000000000000000000000001507764646700173465ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/comm/comm.c000066400000000000000000000117671507764646700204610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example splits the whole set of communicators in subgroups, * all communications take place within each subgroups */ #include #include "../helper.h" #define DATA0_TAG 12 #define DATA1_TAG 22 MPI_Comm newcomm; void func_cpu(void *descr[], void *_args) { int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int rank; starpu_codelet_unpack_args(_args, &rank); FPRINTF_MPI_COMM(stderr, newcomm, "Executing codelet with value %d and rank %d\n", *value, rank); STARPU_ASSERT_MSG(*value == rank, "Received value %d is not the expected value %d\n", *value, rank); } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int size, x=789; int color; int rank, newrank; int ret; starpu_data_handle_t data[2]; int thread_support; if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) { fprintf(stderr,"MPI_Init_thread failed\n"); exit(1); } if (thread_support == MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); if (thread_support < MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI does not have thread support!\n"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 4) { FPRINTF(stderr, "We need at least 4 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } color = rank%2; MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm); MPI_Comm_rank(newcomm, &newrank); FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color); if (newrank == 0) { FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank); MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm); } else if (newrank == 1) { MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x); } ret = starpu_mpi_init_conf(NULL, NULL, 0, newcomm, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); if (newrank == 0) { starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_mpi_data_register_comm(data[1], DATA1_TAG, 0, newcomm); } else starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[0], DATA0_TAG, 0, newcomm); if (newrank == 0) { starpu_mpi_req req[2]; ret = starpu_mpi_issend(data[1], &req[0], 1, DATA1_TAG, newcomm); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); ret = starpu_mpi_isend(data[0], &req[1], 1, DATA0_TAG, newcomm); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); ret = starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } else if (newrank == 1) { int *xx; ret = starpu_mpi_recv(data[0], 0, DATA0_TAG, newcomm, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(data[0], STARPU_R); xx = (int *)starpu_variable_get_local_ptr(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_data_release(data[0]); starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[1], DATA1_TAG, 0, newcomm); ret = starpu_mpi_recv(data[0], 0, DATA1_TAG, newcomm, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(data[0], STARPU_R); xx = (int *)starpu_variable_get_local_ptr(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_data_release(data[0]); } if (newrank == 0 || newrank == 1) { starpu_mpi_task_insert(newcomm, &mycodelet, STARPU_RW, data[0], STARPU_VALUE, &x, sizeof(x), STARPU_EXECUTE_ON_NODE, 1, 0); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); } starpu_mpi_shutdown_comm(newcomm); MPI_Comm_free(&newcomm); MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/examples/comm/group.c000066400000000000000000000104661507764646700206550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" int main(int argc, char **argv) { #ifdef STARPU_HAVE_MPI_COMM_CREATE_GROUP int thread_support; if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) { fprintf(stderr,"MPI_Init_thread failed\n"); exit(1); } if (thread_support == MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); if (thread_support < MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI does not have thread support!\n"); int world_rank, world_size; MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); if (world_size < 4) { FPRINTF(stderr, "We need at least 4 processes.\n"); MPI_Finalize(); return (world_rank==0) ? STARPU_TEST_SKIPPED : 0; } // create a new communicator with the even ranks processes int ranks[world_size/2]; int pos,n; for(pos=0,n=0 ; pos #include "../helper.h" MPI_Comm newcomm; void func_cpu(void *descr[], void *_args) { int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int rank; starpu_codelet_unpack_args(_args, &rank); FPRINTF_MPI_COMM(stderr, newcomm, "Executing codelet with value %d and rank %d\n", *value, rank); STARPU_ASSERT_MSG(*value == rank, "Received value %d is not the expected value %d\n", *value, rank); } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int size, x; int color; int rank, newrank; int ret; starpu_data_handle_t data[3]; int value = 90; int thread_support; if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) { fprintf(stderr,"MPI_Init_thread failed\n"); exit(1); } if (thread_support == MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); if (thread_support < MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI does not have thread support!\n"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 4) { FPRINTF(stderr, "We need at least 4 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } color = rank%2; MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm); MPI_Comm_rank(newcomm, &newrank); FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color); if (newrank == 0) { FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank); MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm); } else if (newrank == 1) { MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x); } ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); ret = starpu_mpi_comm_register(newcomm); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_comm_register"); if (rank == 0) { starpu_variable_data_register(&data[2], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int)); } else starpu_variable_data_register(&data[2], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[2], 44, 0, MPI_COMM_WORLD); if (newrank == 0) { starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); } else starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[0], 12, 0, newcomm); if (newrank == 0) { starpu_mpi_req req[2]; ret = starpu_mpi_issend(data[1], &req[0], 1, 22, newcomm); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); ret = starpu_mpi_isend(data[0], &req[1], 1, 12, newcomm); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); ret = starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } else if (newrank == 1) { int *xx; ret = starpu_mpi_recv(data[0], 0, 12, newcomm, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(data[0], STARPU_R); xx = (int *)starpu_variable_get_local_ptr(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_data_release(data[0]); starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); ret = starpu_mpi_recv(data[0], 0, 22, newcomm, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(data[0], STARPU_R); xx = (int *)starpu_variable_get_local_ptr(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_data_release(data[0]); } if (rank == 0) { starpu_data_acquire(data[2], STARPU_R); int rvalue = *((int *)starpu_variable_get_local_ptr(data[2])); starpu_data_release(data[2]); FPRINTF_MPI_COMM(stderr, MPI_COMM_WORLD, "sending value %d to %d and receiving from %d\n", rvalue, 1, size-1); ret = starpu_mpi_send(data[2], 1, 44, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_recv(data[2], size-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(data[2], STARPU_R); int *xx = (int *)starpu_variable_get_local_ptr(data[2]); FPRINTF_MPI_COMM(stderr, MPI_COMM_WORLD, "Value back is %d\n", *xx); STARPU_ASSERT_MSG(*xx == rvalue + (2*(size-1)), "Received value %d is incorrect (should be %d)\n", *xx, rvalue + (2*(size-1))); starpu_data_release(data[2]); } else { int next = (rank == size-1) ? 0 : rank+1; ret = starpu_mpi_recv(data[2], rank-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(data[2], STARPU_RW); int *xx = (int *)starpu_variable_get_local_ptr(data[2]); FPRINTF_MPI_COMM(stderr, MPI_COMM_WORLD, "receiving %d from %d and sending %d to %d\n", *xx, rank-1, *xx+2, next); *xx = *xx + 2; starpu_data_release(data[2]); ret = starpu_mpi_send(data[2], next, 44, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } if (newrank == 0 || newrank == 1) { starpu_mpi_task_insert(newcomm, &mycodelet, STARPU_RW, data[0], STARPU_VALUE, &x, sizeof(x), STARPU_EXECUTE_ON_NODE, 1, 0); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); } starpu_data_unregister(data[2]); starpu_mpi_shutdown(); MPI_Comm_free(&newcomm); MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/examples/complex/000077500000000000000000000000001507764646700200625ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/complex/mpi_complex.c000066400000000000000000000111001507764646700225330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void display_foo_codelet(void *descr[], void *_args) { (void)_args; int *foo = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); FPRINTF(stderr, "foo = %d\n", *foo); } struct starpu_codelet foo_display = { .cpu_funcs = {display_foo_codelet}, .nbuffers = 1, .modes = {STARPU_R}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int rank, nodes; int ret; int compare=0; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (nodes < 2) fprintf(stderr, "We need at least 2 processes.\n"); else fprintf(stderr, "We need at least 1 CPU.\n"); } starpu_mpi_shutdown(); return 77; } starpu_data_handle_t handle; starpu_data_handle_t handle2; double real[2] = {4.0, 2.0}; double imaginary[2] = {7.0, 9.0}; double real2[2] = {14.0, 12.0}; double imaginary2[2] = {17.0, 19.0}; if (rank == 1) { real[0] = 0.0; real[1] = 0.0; imaginary[0] = 0.0; imaginary[1] = 0.0; } starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2); starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2); // Ping-pong if (rank == 0) { int *compare_ptr = &compare; ret = starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); ret = starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle2, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } else if (rank == 1) { ret = starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } // Ping if (rank == 0) { starpu_data_handle_t xhandle; double xreal = 4.0; double ximaginary = 8.0; starpu_complex_data_register(&xhandle, STARPU_MAIN_RAM, &xreal, &ximaginary, 1); ret = starpu_mpi_send(xhandle, 1, 30, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); starpu_data_unregister(xhandle); } else if (rank == 1) { MPI_Status status; starpu_data_handle_t xhandle; double xreal = 14.0; double ximaginary = 18.0; starpu_complex_data_register(&xhandle, STARPU_MAIN_RAM, &xreal, &ximaginary, 1); ret = starpu_mpi_recv(xhandle, 0, 30, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_unregister(xhandle); FPRINTF(stderr, "[received] real %f imaginary %f\n", xreal, ximaginary); STARPU_ASSERT_MSG(xreal == 4 && ximaginary == 8, "Incorrect received value\n"); } starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_data_unregister(handle2); starpu_mpi_shutdown(); return (rank == 0) ? !compare : 0; } starpu-1.4.9+dfsg/mpi/examples/filters/000077500000000000000000000000001507764646700200635ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/filters/filter.c000066400000000000000000000114131507764646700215140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This examplifies how to declare a new filter function. */ #include #define NX 20 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { unsigned i; int factor; int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "computing on rank %d\n", rank); unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); starpu_codelet_unpack_args(cl_arg, &factor); for (i = 0; i < n; i++) val[i] *= factor; } struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "vector_scal" }; void vector_filter(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; uint32_t nx = vector_father->nx; size_t elemsize = vector_father->elemsize; STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); STARPU_ASSERT(nchunks == 2); STARPU_ASSERT_MSG((nx % nchunks) == 0, "nx=%u is not a multiple of nchunks %u\n", nx, nchunks); vector_child->id = vector_father->id; vector_child->nx = nx/2; vector_child->elemsize = elemsize; vector_child->allocsize = vector_child->nx * elemsize; if (vector_father->dev_handle) { size_t offset = (id *(nx/nchunks)) * elemsize; if (vector_father->ptr) vector_child->ptr = vector_father->ptr + offset; vector_child->dev_handle = vector_father->dev_handle; vector_child->offset = vector_father->offset + offset; } } int main(int argc, char **argv) { int i, rank, nodes; int vector[NX]; int vector_check[NX]; starpu_data_handle_t vhandle; starpu_data_handle_t handles[2]; int factor[2] = {2, 3}; int ret; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (nodes < 2) fprintf(stderr, "We need at least 2 processes.\n"); else fprintf(stderr, "We need at least 1 CPU.\n"); } starpu_mpi_shutdown(); return 77; } for(i=0 ; i #include #ifdef STARPU_HAVE_VALGRIND_H #include #endif #ifdef STARPU_HAVE_HELGRIND_H #include #endif #define STARPU_TEST_SKIPPED 77 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \ int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank); \ fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \ fflush(ofile); }} while(0); #define FPRINTF_MPI_COMM(ofile, comm, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \ int _disp_rank; starpu_mpi_comm_rank(comm, &_disp_rank); \ fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \ fflush(ofile); }} while(0); starpu-1.4.9+dfsg/mpi/examples/loader.c000066400000000000000000000274611507764646700200370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/000077500000000000000000000000001507764646700226535ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_cholesky.c000066400000000000000000000044121507764646700255060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_cholesky.h" #include "helper.h" int main(int argc, char **argv) { /* create a simple definite positive symmetric matrix example * * Hilbert matrix : h(i,j) = 1/(i+j+1) * */ float ***bmat; int rank, nodes, ret; double timing, flops; #ifndef STARPU_SIMGRID int correctness; #endif ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); starpu_cublas_init(); if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) { if (rank == 0) { FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n"); } starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } parse_args(argc, argv, nodes); if (checkpoint_enabled) starpu_mpi_checkpoint_init(); matrix_init(&bmat, rank, nodes, 1); matrix_display(bmat, rank, nodes); dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops); #ifndef STARPU_SIMGRID matrix_display(bmat, rank, nodes); if (check && rank == 0) dw_cholesky_check_computation(bmat, rank, nodes, &correctness, &flops, 0.001); #endif matrix_free(&bmat, rank, nodes, 1); starpu_cublas_shutdown(); if (checkpoint_enabled) starpu_mpi_checkpoint_shutdown(); starpu_mpi_shutdown(); #ifndef STARPU_SIMGRID if (check && rank == 0) assert(correctness); #endif if (rank == 0) { FPRINTF(stdout, "Computation time (in ms): %2.2f\n", timing/1000); FPRINTF(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f)); } return 0; } starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_cholesky.h000066400000000000000000000021321507764646700255100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MPI_CHOLESKY_H__ #define __MPI_CHOLESKY_H__ #include #include #include "mpi_cholesky_codelets.h" #include "mpi_cholesky_kernels.h" #include "mpi_cholesky_models.h" #include "mpi_decomposition_matrix.h" #include "mpi_decomposition_params.h" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #endif // __MPI_CHOLESKY_H__ starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c000066400000000000000000000600001507764646700273630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_cholesky.h" #include #include #include #include /* This is from magma -- Innovative Computing Laboratory -- Electrical Engineering and Computer Science Department -- University of Tennessee -- (C) Copyright 2009 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Tennessee, Knoxville nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) #define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)) * (double)(__n) - (1. / 6.))) #define FLOPS_SPOTRF(__n) (FMULS_POTRF((__n)) + FADDS_POTRF((__n))) #define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) #define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) #define FMULS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m))) #define FADDS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m))) #define FMULS_TRSM FMULS_TRMM #define FADDS_TRSM FMULS_TRMM #define FLOPS_STRSM(__m, __n) (FMULS_TRSM((__m), (__n)) + FADDS_TRSM((__m), (__n))) #define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) #define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) #define FLOPS_SSYRK(__k, __n) (FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n))) #define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) #define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) #define FLOPS_SGEMM(__m, __n, __k) (FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k))) /* End of magma code */ int _nodes; starpu_mpi_checkpoint_template_t* checkpoint_p; int backup_function(int rank) { return (rank/dblockx)*dblockx +(rank+1)%dblockx; // return (rank+1)%_nodes; } /* * Create the codelets */ static struct starpu_codelet cl_potrf = { .cpu_funcs = {chol_cpu_codelet_update_potrf}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_potrf}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .model = &chol_model_potrf, .color = 0xffff00, }; static struct starpu_codelet cl_trsm = { .cpu_funcs = {chol_cpu_codelet_update_trsm}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_trsm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &chol_model_trsm, .color = 0x8080ff, }; static struct starpu_codelet cl_syrk = { .cpu_funcs = {chol_cpu_codelet_update_syrk}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_syrk}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW | STARPU_COMMUTE}, .model = &chol_model_syrk, .color = 0x00ff00, }; static struct starpu_codelet cl_gemm = { .cpu_funcs = {chol_cpu_codelet_update_gemm}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_gemm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE}, .model = &chol_model_gemm, .color = 0x00c000, }; static void run_cholesky(starpu_data_handle_t **data_handles, int rank, int nodes) { unsigned k, m, n; unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; unsigned nn = size/nblocks; if (checkpoint_enabled) { starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_VALUE, &k, sizeof(k), nblocks*nblocks+10, backup_function); starpu_mpi_checkpoint_template_freeze(checkpoint_p); } #ifdef STARPU_DEVEL #warning Add pruning #endif for (k = 0; k < nblocks; k++) { starpu_iteration_push(k); starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_potrf, STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO, STARPU_RW, data_handles[k][k], STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), 0); for (m = k+1; m= nblocks) /* Skip first item when even number of tiles */ continue; /* Accumulate updates from TRSMs */ for (k = 0; k < n; k++) { if (m == n) starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_syrk, STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, STARPU_R, data_handles[n][k], STARPU_RW | STARPU_COMMUTE, data_handles[m][n], STARPU_FLOPS, (double) FLOPS_SSYRK(nn, nn), 0); else starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_gemm, STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, STARPU_R, data_handles[n][k], STARPU_R, data_handles[m][k], STARPU_RW | STARPU_COMMUTE, data_handles[m][n], STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn), 0); if (m == nblocks-1) { /* Nobody else will need it */ starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][k]); starpu_data_wont_use(data_handles[n][k]); } } /* non-diagonal block, solve */ k = n; starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_trsm, STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, STARPU_R, data_handles[k][k], STARPU_RW, data_handles[m][k], STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn), 0); if (m == nblocks - 1) { /* We do not need the potrf result any more */ starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][n]); starpu_data_wont_use(data_handles[n][n]); } } if (checkpoint_enabled) { if (a%checkpoint_period==checkpoint_period-1) starpu_mpi_checkpoint_template_submit(*checkpoint_p, (int)(2*nblocks -4*a)); } starpu_iteration_pop(); } } /* TODO: generate from compiler polyhedral analysis of classical algorithm */ static void run_cholesky_prio(starpu_data_handle_t **data_handles, int rank STARPU_ATTRIBUTE_UNUSED, int nodes STARPU_ATTRIBUTE_UNUSED) { unsigned a; int k, m, n; unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; unsigned nn = size/nblocks; /* * This is basically similar to above, except that we shift k according to the priorities set in the algorithm, so that gemm prio ~= 2*nblocks - a * double-antidiagonal number: * - a=0 contains (0,0) plus (1,0) * - a=1 contains (2,0), (1,1) plus (3,0), (2, 1) * - etc. */ if (checkpoint_enabled) { starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_VALUE, &a, sizeof(a), nblocks*nblocks+10, backup_function); starpu_mpi_checkpoint_template_freeze(checkpoint_p); } for (a = 0; a < 4*nblocks; a++) { starpu_iteration_push(a); for (k = 0; k < (int) nblocks; k++) { n = k; /* Should be m = a-k-n; for potrf and trsm to respect priorities, but needs to be this for dependencies */ m = a-2*k-n; if (m == n) { /* diagonal block, factorize */ starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_potrf, STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k) : STARPU_MAX_PRIO, STARPU_RW, data_handles[k][k], STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), 0); } else if (m >= n && m < (int) nblocks) { /* non-diagonal block, solve */ starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_trsm, STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m) : (m == k+1)?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, STARPU_R, data_handles[k][k], STARPU_RW, data_handles[m][k], STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn), 0); } if (m == (int) nblocks - 1) { /* We do not need the potrf result any more */ starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][n]); starpu_data_wont_use(data_handles[n][n]); } /* column within antidiagonal for a */ for (n = k + 1; n < (int) nblocks; n++) { /* row */ m = a-2*k-n; if (m >= n && m < (int) nblocks) { /* Update */ if (m == n) starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_syrk, STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, STARPU_R, data_handles[n][k], STARPU_RW | STARPU_COMMUTE, data_handles[m][n], STARPU_FLOPS, (double) FLOPS_SSYRK(nn, nn), 0); else starpu_mpi_task_insert(MPI_COMM_WORLD, &cl_gemm, STARPU_PRIORITY, noprio ? STARPU_DEFAULT_PRIO : unbound_prio ? (int)(2*nblocks - 2*k - m - n) : ((n == k+1) && (m == k+1))?STARPU_MAX_PRIO:STARPU_DEFAULT_PRIO, STARPU_R, data_handles[n][k], STARPU_R, data_handles[m][k], STARPU_RW | STARPU_COMMUTE, data_handles[m][n], STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn), 0); if (m == (int) nblocks - 1) { /* Nobody else will need it */ starpu_data_wont_use(data_handles[n][k]); starpu_mpi_cache_flush(MPI_COMM_WORLD, data_handles[n][k]); } } } } if (checkpoint_enabled) { if (a%(4*checkpoint_period)==(4*checkpoint_period)-1) starpu_mpi_checkpoint_template_submit(*checkpoint_p, (int)(2*nblocks - a)); } starpu_iteration_pop(); } } /* * code to bootstrap the factorization * and construct the DAG */ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing, double *flops) { double start; double end; starpu_data_handle_t **data_handles; unsigned m, n; /* create all the DAG nodes */ if (checkpoint_enabled) { _nodes = nodes; starpu_malloc((void**)&checkpoint_p, sizeof(starpu_mpi_checkpoint_template_t)); starpu_mpi_checkpoint_template_create(checkpoint_p, 13, 0); } data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *)); for(m=0 ; m=n) starpu_mpi_checkpoint_template_add_entry(checkpoint_p, STARPU_R, data_handles[m][n], backup_function(mpi_rank)); } } } } starpu_mpi_wait_for_all(MPI_COMM_WORLD); starpu_mpi_barrier(MPI_COMM_WORLD); start = starpu_timing_now(); switch (submission) { case TRIANGLES: run_cholesky(data_handles, rank, nodes); break; case COLUMNS: run_cholesky_column(data_handles, rank, nodes); break; case ANTIDIAGONALS: run_cholesky_antidiagonal(data_handles, rank, nodes); break; case PRIOS: run_cholesky_prio(data_handles, rank, nodes); break; default: STARPU_ABORT(); } starpu_mpi_wait_for_all(MPI_COMM_WORLD); starpu_mpi_barrier(MPI_COMM_WORLD); end = starpu_timing_now(); for (m = 0; m < nblocks; m++) { for(n = 0; n < nblocks ; n++) { /* Get back data on node 0 for the check */ if (check && data_handles[m][n]) starpu_mpi_get_data_on_node(MPI_COMM_WORLD, data_handles[m][n], 0); if (data_handles[m][n]) starpu_data_unregister(data_handles[m][n]); } free(data_handles[m]); } free(data_handles); if (rank == 0) { *timing = end - start; *flops = FLOPS_SPOTRF(size); } } void dw_cholesky_check_computation(float ***matA, int rank, int nodes, int *correctness, double *flops, double epsilon) { unsigned nn,mm,n,m; float *rmat = malloc(size*size*sizeof(float)); for(n=0 ; n mm) { rmat[mm+nn*size] = 0.0f; // debug } } } float *test_mat = malloc(size*size*sizeof(float)); STARPU_ASSERT(test_mat); STARPU_SSYRK("L", "N", size, size, 1.0f, rmat, size, 0.0f, test_mat, size); FPRINTF(stderr, "[%d] comparing results ...\n", rank); if (display) { for (mm = 0; mm < size; mm++) { for (nn = 0; nn < size; nn++) { if (nn <= mm) { printf("%2.2f\t", test_mat[mm +nn*size]); } else { printf(".\t"); } } printf("\n"); } } *correctness = 1; for(n = 0; n < nblocks ; n++) { for (m = 0; m < nblocks; m++) { for (nn = BLOCKSIZE*n ; nn < BLOCKSIZE*(n+1); nn++) { for (mm = BLOCKSIZE*m ; mm < BLOCKSIZE*(m+1); mm++) { if (nn <= mm) { float orig = (1.0f/(1.0f+nn+mm)) + ((nn == mm)?1.0f*size:0.0f); float err = fabsf(test_mat[mm +nn*size] - orig) / orig; if (err > epsilon) { FPRINTF(stderr, "[%d] Error[%u, %u] --> %2.20f != %2.20f (err %2.20f)\n", rank, nn, mm, test_mat[mm +nn*size], orig, err); *correctness = 0; *flops = 0; break; } } } } } } free(rmat); free(test_mat); } starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_cholesky_codelets.h000066400000000000000000000021061507764646700273730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MPI_CHOLESKY_CODELETS_H__ #define __MPI_CHOLESKY_CODELETS_H__ /* * code to bootstrap the factorization * and construct the DAG */ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing, double *flops); void dw_cholesky_check_computation(float ***matA, int rank, int nodes, int *correctness, double *flops, double epsilon); #endif /* __MPI_CHOLESKY_CODELETS_H__ */ starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_cholesky_distributed.c000066400000000000000000000037211507764646700301120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_cholesky.h" /* This is the same as matrix_decomposition, but the matrix is not allocated in * totality on all nodes, thus allowing much bigger matrices, but doesn't allow * trivial checks */ int main(int argc, char **argv) { /* create a simple definite positive symmetric matrix example * * Hilbert matrix : h(i,j) = 1/(i+j+1) * */ float ***bmat; int rank, nodes, ret; double timing, flops; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); starpu_cublas_init(); parse_args(argc, argv, nodes); if (checkpoint_enabled) starpu_mpi_checkpoint_init(); if (check) { fprintf(stderr,"can't check in distributed mode\n"); check = 0; } matrix_init(&bmat, rank, nodes, 0); dw_cholesky(bmat, size/nblocks, rank, nodes, &timing, &flops); matrix_free(&bmat, rank, nodes, 0); starpu_cublas_shutdown(); if (checkpoint_enabled) starpu_mpi_checkpoint_shutdown(); starpu_mpi_shutdown(); if (rank == 0) { FPRINTF(stdout, "Computation time (in ms): %2.2f\n", timing/1000); FPRINTF(stdout, "Synthetic GFlops : %2.2f\n", (flops/timing/1000.0f)); } return 0; } starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.c000066400000000000000000000200531507764646700272300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_cholesky.h" #include #include "common/blas.h" #ifdef STARPU_USE_CUDA #include #include #include #ifdef STARPU_HAVE_MAGMA #include "magma.h" #include "magma_lapack.h" #endif #endif /* * GEMM */ #if defined(STARPU_USE_CUDA) static const float p1 = 1.0; static const float m1 = -1.0; #endif static inline void chol_common_cpu_codelet_update_gemm(void *descr[], int s, void *_args) { (void)_args; /* printf("gemm\n"); */ float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NY(descr[2]); unsigned dy = STARPU_MATRIX_GET_NX(descr[2]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); switch (s) { case 0: /* CPU kernel */ STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, right, ld12, 1.0f, center, ld22); break; #ifdef STARPU_USE_CUDA case 1: { /* CUDA kernel */ cublasStatus_t status = cublasSgemm(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz, &m1, left, ld21, right, ld12, &p1, center, ld22); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_gemm(void *descr[], void *_args) { chol_common_cpu_codelet_update_gemm(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_gemm(void *descr[], void *_args) { chol_common_cpu_codelet_update_gemm(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ /* * SYRK */ static inline void chol_common_cpu_codelet_update_syrk(void *descr[], int s, void *_args) { (void)_args; /* printf("syrk\n"); */ float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned dx = STARPU_MATRIX_GET_NY(descr[1]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[1]); switch (s) { case 0: { /* CPU kernel */ STARPU_SSYRK("L", "N", dx, dz, -1.0f, left, ld21, 1.0f, center, ld22); break; } #ifdef STARPU_USE_CUDA case 1: { /* CUDA kernel */ cublasStatus_t status = cublasSsyrk(starpu_cublas_get_local_handle(), CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, dx, dz, &m1, left, ld21, &p1, center, ld22); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_syrk(void *descr[], void *_args) { chol_common_cpu_codelet_update_syrk(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_syrk(void *descr[], void *_args) { chol_common_cpu_codelet_update_syrk(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ /* * TRSM */ static inline void chol_common_codelet_update_trsm(void *descr[], int s, void *_args) { (void)_args; /* printf("trsm\n"); */ float *sub11; float *sub21; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]); unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif switch (s) { case 0: STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21); break; #ifdef STARPU_USE_CUDA case 1: status = cublasStrsm(starpu_cublas_get_local_handle(), CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, nx21, ny21, &p1, sub11, ld11, sub21, ld21); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_trsm(void *descr[], void *_args) { chol_common_codelet_update_trsm(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_trsm(void *descr[], void *_args) { chol_common_codelet_update_trsm(descr, 1, _args); } #endif /* * POTRF */ static inline void chol_common_codelet_update_potrf(void *descr[], int s, void *_args) { (void)_args; /* printf("potrf\n"); */ float *sub11; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned nx = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); unsigned z; switch (s) { case 0: #ifdef STARPU_MKL STARPU_SPOTRF("L", nx, sub11, ld); #else /* * - alpha 11 <- lambda 11 = sqrt(alpha11) * - alpha 21 <- l 21 = alpha 21 / lambda 11 * - A22 <- A22 - l21 trans(l21) */ for (z = 0; z < nx; z++) { float lambda11; lambda11 = sqrt(sub11[z+z*ld]); sub11[z+z*ld] = lambda11; STARPU_ASSERT(lambda11 != 0.0f); STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1); STARPU_SSYR("L", nx - z - 1, -1.0f, &sub11[(z+1)+z*ld], 1, &sub11[(z+1)+(z+1)*ld], ld); } #endif break; #ifdef STARPU_USE_CUDA case 1: #ifdef STARPU_HAVE_MAGMA { int ret; int info; #if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) cudaStream_t stream = starpu_cuda_get_local_stream(); cublasSetKernelStream(stream); magmablasSetKernelStream(stream); #else starpu_cublas_set_stream(); #endif ret = magma_spotrf_gpu(MagmaLower, nx, sub11, ld, &info); if (ret != MAGMA_SUCCESS) { fprintf(stderr, "Error in Magma: %d\n", ret); STARPU_ABORT(); } #if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) cudaError_t cures = cudaStreamSynchronize(stream); #else cudaError_t cures = cudaDeviceSynchronize(); #endif STARPU_ASSERT(!cures); } #else { float *lambda11; cublasStatus_t status; cudaStream_t stream = starpu_cuda_get_local_stream(); cublasHandle_t handle = starpu_cublas_get_local_handle(); cudaHostAlloc((void **)&lambda11, sizeof(float), 0); for (z = 0; z < nx; z++) { cudaMemcpyAsync(lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); STARPU_ASSERT(*lambda11 != 0.0f); *lambda11 = sqrt(*lambda11); /* cublasSetVector(1, sizeof(float), lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float)); */ cudaMemcpyAsync(&sub11[z+z*ld], lambda11, sizeof(float), cudaMemcpyHostToDevice, stream); float scal = 1.0f/(*lambda11); status = cublasSscal(handle, nx - z - 1, &scal, &sub11[(z+1)+z*ld], 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); status = cublasSsyr(handle, CUBLAS_FILL_MODE_UPPER, nx - z - 1, &m1, &sub11[(z+1)+z*ld], 1, &sub11[(z+1)+(z+1)*ld], ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } cudaStreamSynchronize(stream); cudaFreeHost(lambda11); } #endif break; #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_potrf(void *descr[], void *_args) { chol_common_codelet_update_potrf(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_potrf(void *descr[], void *_args) { chol_common_codelet_update_potrf(descr, 1, _args); } #endif/* STARPU_USE_CUDA */ starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_cholesky_kernels.h000066400000000000000000000024521507764646700272400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MPI_CHOLESKY_KERNELS_H__ #define __MPI_CHOLESKY_KERNELS_H__ #include void chol_cpu_codelet_update_potrf(void **, void *); void chol_cpu_codelet_update_trsm(void **, void *); void chol_cpu_codelet_update_syrk(void **, void *); void chol_cpu_codelet_update_gemm(void **, void *); #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_potrf(void *descr[], void *_args); void chol_cublas_codelet_update_trsm(void *descr[], void *_args); void chol_cublas_codelet_update_syrk(void *descr[], void *_args); void chol_cublas_codelet_update_gemm(void *descr[], void *_args); #endif #endif // __MPI_CHOLESKY_KERNELS_H__ starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_cholesky_models.c000066400000000000000000000022361507764646700270530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_cholesky.h" /* * Number of flops of Gemm */ struct starpu_perfmodel chol_model_potrf = { .type = STARPU_HISTORY_BASED, .symbol = "chol_model_potrf" }; struct starpu_perfmodel chol_model_trsm = { .type = STARPU_HISTORY_BASED, .symbol = "chol_model_trsm" }; struct starpu_perfmodel chol_model_syrk = { .type = STARPU_HISTORY_BASED, .symbol = "chol_model_syrk" }; struct starpu_perfmodel chol_model_gemm = { .type = STARPU_HISTORY_BASED, .symbol = "chol_model_gemm" }; starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_cholesky_models.h000066400000000000000000000017271507764646700270640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DW_CHOLESKY_MODELS_H__ #define __DW_CHOLESKY_MODELS_H__ extern struct starpu_perfmodel chol_model_potrf; extern struct starpu_perfmodel chol_model_trsm; extern struct starpu_perfmodel chol_model_syrk; extern struct starpu_perfmodel chol_model_gemm; #endif // __DW_CHOLESKY_MODELS_H__ starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_decomposition_matrix.c000066400000000000000000000061271507764646700301320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_cholesky.h" /* Returns the MPI node number where data indexes index is */ int my_distrib(int y, int x, int nb_nodes) { (void)nb_nodes; //return (x+y) % nb_nodes; return (x%dblockx)+(y%dblocky)*dblockx; } void matrix_display(float ***bmat, int rank, int nodes) { int n; if (!display) return; starpu_mpi_barrier(MPI_COMM_WORLD); for (n = 0; n < rank; n++) starpu_mpi_barrier(MPI_COMM_WORLD); unsigned y; printf("[%d] Input :\n", rank); for(y=0 ; y #include #include #include #ifdef STARPU_HAVE_VALGRIND_H #include #endif #ifdef STARPU_QUICK_CHECK unsigned size = 2*320; unsigned nblocks = 2; unsigned nbigblocks = 2; #elif !defined(STARPU_LONG_CHECK) unsigned size = 4*320; unsigned nblocks = 4; unsigned nbigblocks = 2; #else unsigned size = 16*320; unsigned nblocks = 16; unsigned nbigblocks = 2; #endif unsigned noprio = 0; unsigned check = 0; unsigned display = 0; int dblockx = -1; int dblocky = -1; enum submission submission = TRIANGLES; unsigned long checkpoint_period = 1; #ifdef STARPU_USE_MPI_FT int checkpoint_enabled = 1; #else int checkpoint_enabled = 0; #endif void parse_args(int argc, char **argv, int nodes) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-size") == 0) { char *argptr; size = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-dblockx") == 0) { char *argptr; dblockx = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-dblocky") == 0) { char *argptr; dblocky = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nblocks = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-nbigblocks") == 0) { char *argptr; nbigblocks = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-columns") == 0) { submission = COLUMNS; } else if (strcmp(argv[i], "-antidiagonals") == 0) { submission = ANTIDIAGONALS; } else if (strcmp(argv[i], "-prios") == 0) { submission = PRIOS; } else if (strcmp(argv[i], "-no-prio") == 0) { noprio = 1; } else if (strcmp(argv[i], "-checkpoint-period") == 0) { char *argptr; checkpoint_period = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-checkpoint-enabled") == 0) { char *argptr; checkpoint_enabled = strtol(argv[++i], &argptr, 10); } else if (strcmp(argv[i], "-check") == 0) { check = 1; } else if (strcmp(argv[i], "-display") == 0) { display = 1; } else /* if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) */ { printf("usage : %s [-size size] [-nblocks nblocks] [-columns] [-antidiagonals] [-prios] [-no-prio] [-display] [-check] [-checkpoint-period period] [-checkpoint-enabled 0/1]\n", argv[0]); fprintf(stderr,"Currently selected: %ux%u and %ux%u blocks checkpoint enabled %d with period %lu\n", size, size, nblocks, nblocks, checkpoint_enabled, checkpoint_period); exit(0); } } #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) size = 16; #endif if (nblocks > size) nblocks = size; if (dblockx == -1 || dblocky == -1) { int factor; dblockx = nodes; dblocky = 1; for(factor=sqrt(nodes) ; factor>1 ; factor--) { if (nodes % factor == 0) { dblockx = nodes/factor; dblocky = factor; break; } } } FPRINTF(stdout, "size: %u - nblocks: %u - dblocksx: %d - dblocksy: %d\n", size, nblocks, dblockx, dblocky); } starpu-1.4.9+dfsg/mpi/examples/matrix_decomposition/mpi_decomposition_params.h000066400000000000000000000023371507764646700301150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MPI_CHOLESKY_PARAMS_H__ #define __MPI_CHOLESKY_PARAMS_H__ #define BLOCKSIZE (size/nblocks) extern unsigned size; extern unsigned nblocks; extern unsigned nbigblocks; extern unsigned noprio; extern unsigned check; extern unsigned display; extern int dblockx; extern int dblocky; extern unsigned long checkpoint_period; extern int checkpoint_enabled; enum submission { TRIANGLES, COLUMNS, ANTIDIAGONALS, PRIOS, }; extern enum submission submission; void parse_args(int argc, char **argv, int nodes); #endif // __MPI_CHOLESKY_PARAMS_H__ starpu-1.4.9+dfsg/mpi/examples/matrix_mult/000077500000000000000000000000001507764646700207605ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/matrix_mult/mm.c000066400000000000000000000240361507764646700215420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example illustrates how to distribute a pre-existing data structure to * a set of computing nodes using StarPU-MPI routines. */ #include #include #include #include #include #include #include "helper.h" #define VERBOSE 0 static int N = 16; /* Matrix size */ static int BS = 4; /* Block size */ #define NB ((N)/(BS)) /* Number of blocks */ /* Matrices. Will be allocated as regular, linearized C arrays */ static double *A = NULL; /* A will be partitioned as BS rows x N cols blocks */ static double *B = NULL; /* B will be partitioned as N rows x BS cols blocks */ static double *C = NULL; /* C will be partitioned as BS rows x BS cols blocks */ /* Arrays of data handles for managing matrix blocks */ static starpu_data_handle_t *A_h; static starpu_data_handle_t *B_h; static starpu_data_handle_t *C_h; static int comm_rank; /* mpi rank of the process */ static int comm_size; /* size of the mpi session */ static void alloc_matrices(void) { /* Regular 'malloc' can also be used instead, however, starpu_malloc make sure that * the area is allocated in suitably pinned memory to improve data transfers, especially * with CUDA */ starpu_malloc((void **)&A, N*N*sizeof(double)); starpu_malloc((void **)&B, N*N*sizeof(double)); starpu_malloc((void **)&C, N*N*sizeof(double)); } static void free_matrices(void) { starpu_free_noflag(A, N*N*sizeof(double)); starpu_free_noflag(B, N*N*sizeof(double)); starpu_free_noflag(C, N*N*sizeof(double)); } static void init_matrices(void) { int row,col; for (row = 0; row < N; row++) { for (col = 0; col < N; col++) { A[row*N+col] = (row==col)?2:0; B[row*N+col] = row*N+col; C[row*N+col] = 0; } } } #if VERBOSE static void disp_matrix(double *m) { int row,col; for (row = 0; row < N; row++) { for (col = 0; col < N; col++) { printf("\t%.2lf", m[row*N+col]); } printf("\n"); } } #endif static void check_result(void) { int row,col; for (row = 0; row < N; row++) { for (col = 0; col < N; col++) { if (fabs(C[row*N+col] - 2*(row*N+col)) > 1.0) { fprintf(stderr, "check failed\n"); exit(1); } } } #if VERBOSE printf("success\n"); #endif } /* Register the matrix blocks to StarPU and to StarPU-MPI */ static void register_matrices() { A_h = calloc(NB, sizeof(starpu_data_handle_t)); B_h = calloc(NB, sizeof(starpu_data_handle_t)); C_h = calloc(NB*NB, sizeof(starpu_data_handle_t)); /* Memory region, where the data being registered resides. * In this example, all blocks are allocated by node 0, thus * - node 0 specifies STARPU_MAIN_RAM to indicate that it owns the block in its main memory * - nodes !0 specify -1 to indicate that they don't have a copy of the block initially */ int mr = (comm_rank == 0) ? STARPU_MAIN_RAM : -1; /* mpi tag used for the block */ starpu_mpi_tag_t tag = 0; int b_row,b_col; for (b_row = 0; b_row < NB; b_row++) { /* Register a block to StarPU */ starpu_matrix_data_register(&A_h[b_row], mr, (comm_rank == 0)?(uintptr_t)(A+b_row*BS*N):0, N, N, BS, sizeof(double)); /* Register a block to StarPU-MPI, specifying the mpi tag to use for transferring the block * and the rank of the owner node. * * Note: StarPU-MPI is an autonomous layer built on top of StarPU, hence the two separate * registration steps. */ starpu_data_set_coordinates(A_h[b_row], 2, 0, b_row); starpu_mpi_data_register(A_h[b_row], tag++, 0); } for (b_col = 0; b_col < NB; b_col++) { starpu_matrix_data_register(&B_h[b_col], mr, (comm_rank == 0)?(uintptr_t)(B+b_col*BS):0, N, BS, N, sizeof(double)); starpu_data_set_coordinates(B_h[b_col], 2, b_col, 0); starpu_mpi_data_register(B_h[b_col], tag++, 0); } for (b_row = 0; b_row < NB; b_row++) { for (b_col = 0; b_col < NB; b_col++) { starpu_matrix_data_register(&C_h[b_row*NB+b_col], mr, (comm_rank == 0)?(uintptr_t)(C+b_row*BS*N+b_col*BS):0, N, BS, BS, sizeof(double)); starpu_data_set_coordinates(C_h[b_row*NB+b_col], 2, b_col, b_row); starpu_mpi_data_register(C_h[b_row*NB+b_col], tag++, 0); } } } /* Transfer ownership of the C matrix blocks following some user-defined distribution over the nodes. * Note: since C will be Write-accessed, it will implicitly define which node perform the task * associated to a given block. */ static void distribute_matrix_C(void) { int b_row,b_col; for (b_row = 0; b_row < NB; b_row++) { for (b_col = 0; b_col < NB; b_col++) { starpu_data_handle_t h = C_h[b_row*NB+b_col]; /* Select the node where the block should be computed. */ int target_rank = (b_row+b_col)%comm_size; /* Move the block on to its new owner. */ starpu_mpi_data_migrate(MPI_COMM_WORLD, h, target_rank); } } } /* Transfer ownership of the C matrix blocks back to node 0, for display purpose. This is not mandatory. */ static void undistribute_matrix_C(void) { int b_row,b_col; for (b_row = 0; b_row < NB; b_row++) { for (b_col = 0; b_col < NB; b_col++) { starpu_data_handle_t h = C_h[b_row*NB+b_col]; starpu_mpi_data_migrate(MPI_COMM_WORLD, h, 0); } } } /* Unregister matrices from the StarPU management. */ static void unregister_matrices() { int b_row,b_col; for (b_row = 0; b_row < NB; b_row++) { starpu_data_unregister(A_h[b_row]); } for (b_col = 0; b_col < NB; b_col++) { starpu_data_unregister(B_h[b_col]); } for (b_row = 0; b_row < NB; b_row++) { for (b_col = 0; b_col < NB; b_col++) { starpu_data_unregister(C_h[b_row*NB+b_col]); } } free(A_h); free(B_h); free(C_h); } /* Perform the actual computation. In a real-life case, this would rather call a BLAS 'gemm' routine * instead. */ static void cpu_mult(void *handles[], void *arg) { (void)arg; double *block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]); double *block_B = (double *)STARPU_MATRIX_GET_PTR(handles[1]); double *block_C = (double *)STARPU_MATRIX_GET_PTR(handles[2]); unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]); unsigned n_col_B = STARPU_MATRIX_GET_NX(handles[1]); unsigned n_col_C = STARPU_MATRIX_GET_NX(handles[2]); unsigned n_row_A = STARPU_MATRIX_GET_NY(handles[0]); unsigned n_row_B = STARPU_MATRIX_GET_NY(handles[1]); unsigned n_row_C = STARPU_MATRIX_GET_NY(handles[2]); unsigned ld_A = STARPU_MATRIX_GET_LD(handles[0]); unsigned ld_B = STARPU_MATRIX_GET_LD(handles[1]); unsigned ld_C = STARPU_MATRIX_GET_LD(handles[2]); /* Sanity check, not needed in real life case */ assert(n_col_C == n_col_B); assert(n_row_C == n_row_A); assert(n_col_A == n_row_B); unsigned i,j,k; for (k = 0; k < n_row_C; k++) { for (j = 0; j < n_col_C; j++) { for (i = 0; i < n_col_A; i++) { block_C[k*ld_C+j] += block_A[k*ld_A+i] * block_B[i*ld_B+j]; } #if VERBOSE /* For illustration purpose, shows which node computed * the block in the decimal part of the cell */ block_C[k*ld_C+j] += comm_rank / 100.0; #endif } } } /* Define a StarPU 'codelet' structure for the matrix multiply kernel above. * This structure enable specifying multiple implementations for the kernel (such as CUDA or OpenCL versions) */ static struct starpu_codelet gemm_cl = { .cpu_funcs = {cpu_mult}, /* cpu implementation(s) of the routine */ .nbuffers = 3, /* number of data handles referenced by this routine */ .modes = {STARPU_R, STARPU_R, STARPU_RW}, /* access modes for each data handle */ .name = "gemm" /* to display task name in traces */ }; int main(int argc, char *argv[]) { /* Initializes STarPU and the StarPU-MPI layer */ int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } /* Parse the matrix size and block size optional args */ if (argc > 1) { N = atoi(argv[1]); if (N < 1) { fprintf(stderr, "invalid matrix size\n"); exit(1); } if (argc > 2) { BS = atoi(argv[2]); } if (BS < 1 || N % BS != 0) { fprintf(stderr, "invalid block size\n"); exit(1); } } /* Get the process rank and session size */ starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); if (comm_rank == 0) { #if VERBOSE printf("N = %d\n", N); printf("BS = %d\n", BS); printf("NB = %d\n", NB); printf("comm_size = %d\n", comm_size); #endif /* In this example, node rank 0 performs all the memory allocations and initializations, * and the blocks are later distributed on the other nodes. * This is not mandatory however, and blocks could be allocated on other nodes right * from the beginning, depending on the application needs (in particular for the case * where the session wide data footprint is larger than a single node available memory. */ alloc_matrices(); init_matrices(); } /* Register matrices to StarPU and StarPU-MPI */ register_matrices(); /* Distribute C blocks */ distribute_matrix_C(); int b_row,b_col; for (b_row = 0; b_row < NB; b_row++) { for (b_col = 0; b_col < NB; b_col++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &gemm_cl, STARPU_R, A_h[b_row], STARPU_R, B_h[b_col], STARPU_RW, C_h[b_row*NB+b_col], 0); } } starpu_task_wait_for_all(); undistribute_matrix_C(); unregister_matrices(); if (comm_rank == 0) { #if VERBOSE disp_matrix(C); #endif check_result(); free_matrices(); } starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/matrix_mult/mm_2dbc.c000066400000000000000000000246301507764646700224340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example illustrates the computation of general matrices with originally * distributed A, B and C matrices to a set of computing nodes. */ #include #include #include #include #include #include #include #include "helper.h" #include #define VERBOSE 0 static int M = 1024; /* Matrix size */ static int N = 1024; /* Matrix size */ static int K = 1024; /* Matrix size */ static int BS = 512; /* Block size */ static int P = 2; /* height of the grid */ static int Q = 2; /* width of the grid */ static int T = 1; /* number of runs */ static int trace = 0; /* whether to trace */ #define MB ((M)/(BS)) /* Number of blocks */ #define NB ((N)/(BS)) /* Number of blocks */ #define KB ((K)/(BS)) /* Number of blocks */ /* Arrays of data handles for managing matrix blocks */ static starpu_data_handle_t *A_h; static starpu_data_handle_t *B_h; static starpu_data_handle_t *C_h; static int comm_rank; /* mpi rank of the process */ static int comm_size; /* size of the mpi session */ struct block { double* c; int owner; }; struct matrix { int mb, nb, b; struct block* blocks; }; /* Matrices. Will be allocated as regular, linearized C arrays */ static struct matrix *A = NULL; /* A will be partitioned as MB x KB blocks */ static struct matrix *B = NULL; /* B will be partitioned as KB x NB blocks */ static struct matrix *C = NULL; /* C will be partitioned as MB x NB blocks */ struct matrix* alloc_matrix(int mb, int nb) { struct matrix* X; X = malloc(sizeof(struct matrix)); X->blocks = malloc(mb*nb*sizeof(struct block)); int i,j; for (i = 0; iblocks[i*nb+j].owner = (i%P)*Q + (j%Q); if (X->blocks[i*nb+j].owner == comm_rank) X->blocks[i*nb+j].c = malloc(BS*BS*sizeof(double)); } } X->mb = mb; X->nb = nb; X->b = BS; return X; } static void alloc_matrices(void) { if (VERBOSE) printf("Allocating matrices\n"); A = alloc_matrix(MB,KB); B = alloc_matrix(KB,NB); C = alloc_matrix(MB,NB); } static void free_matrix(struct matrix* X, int mb, int nb) { int i,j; for (i = 0; iblocks[i*nb+j].owner == comm_rank) free(X->blocks[i*nb+j].c); } } free(X->blocks); free(X); } static void free_matrices(void) { if (VERBOSE) printf("Freeing matrices\n"); free_matrix(A,MB,KB); free_matrix(B,KB,NB); free_matrix(C,MB,NB); } static void register_matrix(struct matrix* X, starpu_data_handle_t* X_h, starpu_mpi_tag_t *tag, int mb, int nb) { int b_row, b_col; for (b_row = 0; b_row < mb; b_row++) { for (b_col = 0; b_col < nb; b_col++) { if (X->blocks[b_row*nb+b_col].owner == comm_rank) { starpu_matrix_data_register(&X_h[b_row*nb+b_col], STARPU_MAIN_RAM, (uintptr_t) X->blocks[b_row*nb+b_col].c, BS, BS, BS, sizeof(double)); } else { starpu_matrix_data_register(&X_h[b_row*nb+b_col], -1, (uintptr_t) NULL, BS, BS, BS, sizeof(double)); } // printf("tag:%d\n",*tag); starpu_mpi_data_register(X_h[b_row*nb+b_col], (*tag)++, X->blocks[b_row*nb+b_col].owner); } } } starpu_mpi_tag_t tag = 0; /* Register the matrix blocks to StarPU and to StarPU-MPI */ static void register_matrices() { if (VERBOSE) printf("Registering matrices\n"); A_h = calloc(MB*KB, sizeof(starpu_data_handle_t)); B_h = calloc(KB*NB, sizeof(starpu_data_handle_t)); C_h = calloc(MB*NB, sizeof(starpu_data_handle_t)); /* mpi tag used for the block */ register_matrix(A,A_h,&tag,MB,KB); register_matrix(B,B_h,&tag,KB,NB); register_matrix(C,C_h,&tag,MB,NB); } static void unregister_matrix(struct matrix* X, starpu_data_handle_t* X_h, int mb, int nb) { int b_row,b_col; for (b_row = 0; b_row < mb; b_row++) { for (b_col = 0; b_col < nb; b_col++) { starpu_data_unregister(X_h[b_row*nb+b_col]); } } free(X_h); } /* Unregister matrices from the StarPU management. */ static void unregister_matrices() { if (VERBOSE) printf("Unregistering matrices\n"); unregister_matrix(A,A_h,MB,KB); unregister_matrix(B,B_h,KB,NB); unregister_matrix(C,C_h,MB,NB); } static void cpu_mult(void *handles[], void *arg) { (void)arg; double *block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]); double *block_B = (double *)STARPU_MATRIX_GET_PTR(handles[1]); double *block_C = (double *)STARPU_MATRIX_GET_PTR(handles[2]); unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]); unsigned n_col_C = STARPU_MATRIX_GET_NX(handles[2]); unsigned n_row_C = STARPU_MATRIX_GET_NY(handles[2]); unsigned ld_A = STARPU_MATRIX_GET_LD(handles[0]); unsigned ld_B = STARPU_MATRIX_GET_LD(handles[1]); unsigned ld_C = STARPU_MATRIX_GET_LD(handles[2]); if (VERBOSE) printf("gemm_task\n"); STARPU_DGEMM("N", "N", n_row_C,n_col_C,n_col_A, 1.0, block_A, ld_A, block_B, ld_B, 1.0, block_C, ld_C); } static void cpu_fill(void *handles[], void *arg) { (void)arg; double *block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]); unsigned n_col_A = STARPU_MATRIX_GET_NX(handles[0]); unsigned n_row_A = STARPU_MATRIX_GET_NY(handles[0]); unsigned i,j; if (VERBOSE) printf("fill_task\n"); for (i=0;iblocks[row*nb+col].owner == comm_rank) { starpu_mpi_task_insert(MPI_COMM_WORLD, &fill_cl, STARPU_W, X_h[row*nb+col], 0); } } } } static void init_matrices(void) { if (VERBOSE) printf("Initializing matrices\n"); // I own all the blocks init_matrix(A,A_h,MB,KB); starpu_mpi_wait_for_all(MPI_COMM_WORLD); init_matrix(B,B_h,KB,NB); starpu_mpi_wait_for_all(MPI_COMM_WORLD); init_matrix(C,C_h,MB,NB); starpu_mpi_wait_for_all(MPI_COMM_WORLD); } int main(int argc, char *argv[]) { /* Initializes StarPU and the StarPU-MPI layer */ starpu_fxt_autostart_profiling(0); int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); /* Get the process rank and session size */ starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); if (comm_rank == 0) printf("Launching with %d arguments\n",argc); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); return (comm_rank == 0) ? STARPU_TEST_SKIPPED : 0; } /* Parse the matrix size and block size optional args */ // M, N, K, B, P, Q if (argc < 8) { if (comm_rank == 0) fprintf(stderr, "using default sizes for arguments\n"); } else { M = atoi(argv[1]); N = atoi(argv[2]); K = atoi(argv[3]); BS = atoi(argv[4]); P = atoi(argv[5]); Q = atoi(argv[6]); T = atoi(argv[7]); } if (BS < 1 || M % BS != 0) { if (comm_rank == 0) fprintf(stderr, "invalid block size\n"); starpu_mpi_shutdown(); return (comm_rank == 0) ? 1 : 0; } if (BS < 1 || N % BS != 0) { if (comm_rank == 0) fprintf(stderr, "invalid block size\n"); starpu_mpi_shutdown(); return (comm_rank == 0) ? 1 : 0; } if (BS < 1 || K % BS != 0) { if (comm_rank == 0) fprintf(stderr, "invalid block size\n"); starpu_mpi_shutdown(); return (comm_rank == 0) ? 1 : 0; } if (argc > 9) { if (comm_rank == 0) fprintf(stderr, "invalid argument size (reuqire 8 arguments, 9 if tracing ; given %d)\n",argc); starpu_mpi_shutdown(); return (comm_rank == 0) ? 1 : 0; } else if (argc == 9) { trace = 1; } if (P < 1 || Q < 1 || P*Q != comm_size) { fprintf(stderr, "invalid grid size\n"); starpu_mpi_shutdown(); return (comm_rank == 0) ? 1 : 0; } if (comm_rank == 0) { printf("MxNxK = %dx%dx%d\n", M, N, K); printf("BS = %d\n", BS); printf("MxNxKb = %dx%dx%d\n", MB,NB,KB); printf("comm_size = %d\n", comm_size); printf("PxQ = %dx%d\n", P, Q); } int trial; double start, stop; if (trace) starpu_fxt_start_profiling(); for (trial =0; trial < T; trial++) { alloc_matrices(); register_matrices(); init_matrices(); starpu_mpi_barrier(MPI_COMM_WORLD); start = starpu_timing_now(); int b_row,b_col,b_aisle; for (b_row = 0; b_row < MB; b_row++) { for (b_col = 0; b_col < NB; b_col++) { for (b_aisle=0;b_aisle took %f s | %f Gflop/s\n", comm_rank, timing/1000/1000, 2.0*M*N*K/(timing*1000)); starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD); unregister_matrices(); free_matrices(); } if (trace) starpu_fxt_stop_profiling(); starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/mpi_lu/000077500000000000000000000000001507764646700177005ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/mpi_lu/mpi_lu-double.h000066400000000000000000000025521507764646700226120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define TYPE double #define MPI_TYPE MPI_DOUBLE #define CUBLAS_TYPE TYPE #define STARPU_PLU(name) starpu_pdlu_##name #define CUBLAS_GEMM cublasDgemm #define CUBLAS_TRSM cublasDtrsm #define CUBLAS_SCAL cublasDscal #define CUBLAS_GER cublasDger #define CUBLAS_SWAP cublasDswap #define CUBLAS_IAMAX cublasIdamax #define CPU_GEMM STARPU_DGEMM #define CPU_GEMV STARPU_DGEMV #define CPU_TRSM STARPU_DTRSM #define CPU_SCAL STARPU_DSCAL #define CPU_GER STARPU_DGER #define CPU_SWAP STARPU_DSWAP #define CPU_TRMM STARPU_DTRMM #define CPU_AXPY STARPU_DAXPY #define CPU_ASUM STARPU_DASUM #define CPU_IAMAX STARPU_IDAMAX #define PIVOT_THRESHHOLD 10e-10 #define ISZERO(f) (fpclassify(f) == FP_ZERO) starpu-1.4.9+dfsg/mpi/examples/mpi_lu/mpi_lu-float.h000066400000000000000000000025471507764646700224510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define TYPE float #define MPI_TYPE MPI_FLOAT #define CUBLAS_TYPE TYPE #define STARPU_PLU(name) starpu_pslu_##name #define CUBLAS_GEMM cublasSgemm #define CUBLAS_TRSM cublasStrsm #define CUBLAS_SCAL cublasSscal #define CUBLAS_GER cublasSger #define CUBLAS_SWAP cublasSswap #define CUBLAS_IAMAX cublasIsamax #define CPU_GEMM STARPU_SGEMM #define CPU_GEMV STARPU_SGEMV #define CPU_TRSM STARPU_STRSM #define CPU_SCAL STARPU_SSCAL #define CPU_GER STARPU_SGER #define CPU_SWAP STARPU_SSWAP #define CPU_TRMM STARPU_STRMM #define CPU_AXPY STARPU_SAXPY #define CPU_ASUM STARPU_SASUM #define CPU_IAMAX STARPU_ISAMAX #define PIVOT_THRESHHOLD 10e-5 #define ISZERO(f) (fpclassify(f) == FP_ZERO) starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pdlu.c000066400000000000000000000013341507764646700210110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-double.h" #include "pxlu.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pdlu_implicit.c000066400000000000000000000014211507764646700227000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-double.h" #include "pxlu_implicit.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pdlu_kernels.c000066400000000000000000000013441507764646700225350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-double.h" #include "pxlu_kernels.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_example.c000066400000000000000000000413401507764646700223610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "helper.h" #include #include #include #include #include #include #include "pxlu.h" //#include "pxlu_kernels.h" #ifdef STARPU_HAVE_LIBNUMA #include #endif #ifdef STARPU_HAVE_VALGRIND_H #include #endif static unsigned long size = 4096; static unsigned nblocks = 16; static unsigned check = 0; static int p = -1; static int q = -1; static unsigned display = 0; static unsigned no_prio = 0; #ifdef STARPU_HAVE_LIBNUMA static unsigned numa = 0; #endif static size_t allocated_memory = 0; static size_t allocated_memory_extra = 0; static starpu_data_handle_t *dataA_handles; static TYPE **dataA; /* In order to implement the distributed LU decomposition, we allocate * temporary buffers */ #ifdef SINGLE_TMP11 static starpu_data_handle_t tmp_11_block_handle; static TYPE *tmp_11_block; #else static starpu_data_handle_t *tmp_11_block_handles; static TYPE **tmp_11_block; #endif #ifdef SINGLE_TMP1221 static starpu_data_handle_t *tmp_12_block_handles; static TYPE **tmp_12_block; static starpu_data_handle_t *tmp_21_block_handles; static TYPE **tmp_21_block; #else static starpu_data_handle_t *(tmp_12_block_handles[2]); static TYPE **(tmp_12_block[2]); static starpu_data_handle_t *(tmp_21_block_handles[2]); static TYPE **(tmp_21_block[2]); #endif static void parse_args(int rank, int argc, char **argv) { (void)rank; int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-size") == 0) { char *argptr; size = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nblocks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-check") == 0) { check = 1; } if (strcmp(argv[i], "-display") == 0) { display = 1; } if (strcmp(argv[i], "-numa") == 0) { #ifdef STARPU_HAVE_LIBNUMA numa = 1; #else if (rank == 0) fprintf(stderr, "Warning: libnuma is not available\n"); #endif } if (strcmp(argv[i], "-p") == 0) { char *argptr; p = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-q") == 0) { char *argptr; q = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0) { fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q]\n", argv[0]); fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n"); exit(0); } } #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) { size = 4; nblocks = 4; } #endif } unsigned STARPU_PLU(display_flag)(void) { return display; } static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks) { const unsigned block_size = (psize/pnblocks); unsigned i, j; for (i = 0; i < block_size; i++) for (j = 0; j < block_size; j++) { blockptr[j+i*block_size] = (TYPE)starpu_drand48(); } } #ifdef SINGLE_TMP11 starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(void) { return tmp_11_block_handle; } #else starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(unsigned k) { return tmp_11_block_handles[k]; } #endif #ifdef SINGLE_TMP1221 starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j) { return tmp_12_block_handles[j]; } starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i) { return tmp_21_block_handles[i]; } #else starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k) { return tmp_12_block_handles[k%2][j]; } starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k) { return tmp_21_block_handles[k%2][i]; } #endif static unsigned tmp_11_block_is_needed(int rank, unsigned pnblocks, unsigned k) { (void)rank; (void)pnblocks; (void)k; return 1; } static unsigned tmp_12_block_is_needed(int rank, unsigned pnblocks, unsigned j) { unsigned i; for (i = 1; i < pnblocks; i++) { if (get_block_rank(i, j) == rank) return 1; } return 0; } static unsigned tmp_21_block_is_needed(int rank, unsigned pnblocks, unsigned i) { unsigned j; for (j = 1; j < pnblocks; j++) { if (get_block_rank(i, j) == rank) return 1; } return 0; } static void init_matrix(int rank) { #ifdef STARPU_HAVE_LIBNUMA if (numa) { fprintf(stderr, "Using INTERLEAVE policy\n"); unsigned long nodemask = ((1<<0)|(1<<1)); int ret = set_mempolicy(MPOL_INTERLEAVE, &nodemask, 3); if (ret) perror("set_mempolicy failed"); } #endif /* Allocate a grid of data handles, not all of them have to be allocated later on */ dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t)); dataA = calloc(nblocks*nblocks, sizeof(TYPE *)); allocated_memory_extra += nblocks*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); /* Allocate all the blocks that belong to this mpi node */ unsigned long i,j; for (j = 0; j < nblocks; j++) { for (i = 0; i < nblocks; i++) { TYPE **blockptr = &dataA[j+i*nblocks]; // starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; if (get_block_rank(i, j) == rank) { /* This blocks should be treated by the current MPI process */ /* Allocate and fill it */ starpu_malloc((void **)blockptr, blocksize); allocated_memory += blocksize; //fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j); fill_block_with_random(*blockptr, size, nblocks); //fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j); if (i == j) { unsigned tmp; for (tmp = 0; tmp < size/nblocks; tmp++) { (*blockptr)[tmp*((size/nblocks)+1)] += 1; (*blockptr)[tmp*((size/nblocks)+1)] *= 100; } } /* Register it to StarPU */ starpu_matrix_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*blockptr, size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); starpu_data_set_coordinates(*handleptr, 2, j, i); } else { *blockptr = STARPU_POISON_PTR; *handleptr = STARPU_POISON_PTR; } } } /* Allocate the temporary buffers required for the distributed algorithm */ unsigned k; /* tmp buffer 11 */ #ifdef SINGLE_TMP11 starpu_malloc((void **)&tmp_11_block, blocksize); allocated_memory_extra += blocksize; starpu_matrix_data_register(&tmp_11_block_handle, STARPU_MAIN_RAM, (uintptr_t)tmp_11_block, size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); #else tmp_11_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t)); tmp_11_block = calloc(nblocks, sizeof(TYPE *)); allocated_memory_extra += nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); for (k = 0; k < nblocks; k++) { if (tmp_11_block_is_needed(rank, nblocks, k)) { starpu_malloc((void **)&tmp_11_block[k], blocksize); allocated_memory_extra += blocksize; STARPU_ASSERT(tmp_11_block[k]); starpu_matrix_data_register(&tmp_11_block_handles[k], STARPU_MAIN_RAM, (uintptr_t)tmp_11_block[k], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); } } #endif /* tmp buffers 12 and 21 */ #ifdef SINGLE_TMP1221 tmp_12_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t)); tmp_21_block_handles = calloc(nblocks, sizeof(starpu_data_handle_t)); tmp_12_block = calloc(nblocks, sizeof(TYPE *)); tmp_21_block = calloc(nblocks, sizeof(TYPE *)); allocated_memory_extra += 2*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); #else for (i = 0; i < 2; i++) { tmp_12_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle_t)); tmp_21_block_handles[i] = calloc(nblocks, sizeof(starpu_data_handle_t)); tmp_12_block[i] = calloc(nblocks, sizeof(TYPE *)); tmp_21_block[i] = calloc(nblocks, sizeof(TYPE *)); allocated_memory_extra += 2*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); } #endif for (k = 0; k < nblocks; k++) { #ifdef SINGLE_TMP1221 if (tmp_12_block_is_needed(rank, nblocks, k)) { starpu_malloc((void **)&tmp_12_block[k], blocksize); allocated_memory_extra += blocksize; STARPU_ASSERT(tmp_12_block[k]); starpu_matrix_data_register(&tmp_12_block_handles[k], STARPU_MAIN_RAM, (uintptr_t)tmp_12_block[k], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); } if (tmp_21_block_is_needed(rank, nblocks, k)) { starpu_malloc((void **)&tmp_21_block[k], blocksize); allocated_memory_extra += blocksize; STARPU_ASSERT(tmp_21_block[k]); starpu_matrix_data_register(&tmp_21_block_handles[k], STARPU_MAIN_RAM, (uintptr_t)tmp_21_block[k], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); } #else for (i = 0; i < 2; i++) { if (tmp_12_block_is_needed(rank, nblocks, k)) { starpu_malloc((void **)&tmp_12_block[i][k], blocksize); allocated_memory_extra += blocksize; STARPU_ASSERT(tmp_12_block[i][k]); starpu_matrix_data_register(&tmp_12_block_handles[i][k], STARPU_MAIN_RAM, (uintptr_t)tmp_12_block[i][k], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); } if (tmp_21_block_is_needed(rank, nblocks, k)) { starpu_malloc((void **)&tmp_21_block[i][k], blocksize); allocated_memory_extra += blocksize; STARPU_ASSERT(tmp_21_block[i][k]); starpu_matrix_data_register(&tmp_21_block_handles[i][k], STARPU_MAIN_RAM, (uintptr_t)tmp_21_block[i][k], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); } } #endif } //display_all_blocks(nblocks, size/nblocks); } TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j) { return dataA[j+i*nblocks]; } int get_block_rank(unsigned i, unsigned j) { /* Take a 2D block cyclic distribution */ /* NB: p (resp. q) is for "direction" i (resp. j) */ return (j % q) * p + (i % p); } starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j) { return dataA_handles[j+i*nblocks]; } static void display_grid(int rank, unsigned pnblocks) { if (!display) return; //if (rank == 0) { fprintf(stderr, "2D grid layout (Rank %d): \n", rank); unsigned i, j; for (j = 0; j < pnblocks; j++) { for (i = 0; i < pnblocks; i++) { TYPE *blockptr = STARPU_PLU(get_block)(i, j); starpu_data_handle_t handle = STARPU_PLU(get_block_handle)(i, j); fprintf(stderr, "%d (data %p handle %p)", get_block_rank(i, j), blockptr, handle); } fprintf(stderr, "\n"); } } } int main(int argc, char **argv) { int rank; int world_size; int ret; unsigned i, j, k; /* * Initialization */ int thread_support; if (MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) { fprintf(stderr,"MPI_Init_thread failed\n"); exit(1); } if (thread_support == MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); if (thread_support < MPI_THREAD_FUNNELED) fprintf(stderr,"Warning: MPI does not have thread support!\n"); starpu_srand48((long int)time(NULL)); parse_args(rank, argc, argv); ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size); /* We disable sequential consistency in this example */ starpu_data_set_default_sequential_consistency_flag(0); if (p == -1 && q==-1) { fprintf(stderr, "Setting default values for p and q\n"); p = (q % 2 == 0) ? 2 : 1; q = world_size / p; } STARPU_ASSERT_MSG(p*q == world_size, "p=%d, q=%d, world_size=%d\n", p, q, world_size); starpu_cublas_init(); int barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); /* * Problem Init */ init_matrix(rank); fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank, (int)(allocated_memory/(1024*1024)), (int)(allocated_memory_extra/(1024*1024)), (int)((allocated_memory+allocated_memory_extra)/(1024*1024))); display_grid(rank, nblocks); TYPE *a_r = NULL; // STARPU_PLU(display_data_content)(a_r, size); if (check) { TYPE *x, *y; x = calloc(size, sizeof(TYPE)); STARPU_ASSERT(x); y = calloc(size, sizeof(TYPE)); STARPU_ASSERT(y); if (rank == 0) { unsigned ind; for (ind = 0; ind < size; ind++) x[ind] = (TYPE)starpu_drand48(); } a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); if (rank == 0) STARPU_PLU(display_data_content)(a_r, size); // STARPU_PLU(compute_ax)(size, x, y, nblocks, rank); free(x); free(y); } barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio); /* * Report performance */ int reduce_ret; double min_timing = timing; double max_timing = timing; double sum_timing = timing; reduce_ret = MPI_Reduce(&timing, &min_timing, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); reduce_ret = MPI_Reduce(&timing, &max_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); reduce_ret = MPI_Reduce(&timing, &sum_timing, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); STARPU_ASSERT(reduce_ret == MPI_SUCCESS); if (rank == 0) { fprintf(stderr, "Computation took: %f ms\n", max_timing/1000); fprintf(stderr, "\tMIN : %f ms\n", min_timing/1000); fprintf(stderr, "\tMAX : %f ms\n", max_timing/1000); fprintf(stderr, "\tAVG : %f ms\n", sum_timing/(world_size*1000)); unsigned n = size; double flop = (2.0f*n*n*n)/3.0f; fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/max_timing/1000.0f)); } /* * Test Result Correctness */ if (check) { /* * Compute || A - LU || */ STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r); #if 0 /* * Compute || Ax - LUx || */ unsigned ind; y2 = calloc(size, sizeof(TYPE)); STARPU_ASSERT(y); if (rank == 0) { for (ind = 0; ind < size; ind++) { y2[ind] = (TYPE)0.0; } } STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank); /* Compute y2 = y2 - y */ CPU_AXPY(size, -1.0, y, 1, y2, 1); TYPE err = CPU_ASUM(size, y2, 1); int max = CPU_IAMAX(size, y2, 1); fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size)); fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]); #endif } /* * Termination */ size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); for (j = 0; j < nblocks; j++) { for (i = 0; i < nblocks; i++) { starpu_data_handle_t handle = dataA_handles[j+nblocks*i]; if (handle != STARPU_POISON_PTR) starpu_data_unregister(handle); TYPE *blockptr = dataA[j+i*nblocks]; if (blockptr != STARPU_POISON_PTR) starpu_free_noflag(blockptr, blocksize); } } free(dataA_handles); free(dataA); #ifdef SINGLE_TMP11 starpu_data_unregister(tmp_11_block_handle); starpu_free_noflag(tmp_11_block, blocksize); #else for (k = 0; k < nblocks; k++) { if (tmp_11_block_is_needed(rank, nblocks, k)) { starpu_data_unregister(tmp_11_block_handles[k]); starpu_free_noflag(tmp_11_block[k], blocksize); } } free(tmp_11_block_handles); free(tmp_11_block); #endif for (k = 0; k < nblocks; k++) { #ifdef SINGLE_TMP1221 if (tmp_12_block_is_needed(rank, nblocks, k)) { starpu_data_unregister(tmp_12_block_handles); starpu_free_noflag(tmp_12_block[k], blocksize); } if (tmp_21_block_is_needed(rank, nblocks, k)) { starpu_data_unregister(tmp_21_block_handles[k]); starpu_free_noflag(tmp_21_block[k], blocksize); } #else for (i = 0; i < 2; i++) { if (tmp_12_block_is_needed(rank, nblocks, k)) { starpu_data_unregister(tmp_12_block_handles[i][k]); starpu_free_noflag(tmp_12_block[i][k], blocksize); } if (tmp_21_block_is_needed(rank, nblocks, k)) { starpu_data_unregister(tmp_21_block_handles[i][k]); starpu_free_noflag(tmp_21_block[i][k], blocksize); } } #endif } #ifdef SINGLE_TMP1221 free(tmp_12_block_handles); free(tmp_21_block_handles); free(tmp_12_block); free(tmp_21_block); #else for (i = 0; i < 2; i++) { free(tmp_12_block_handles[i]); free(tmp_21_block_handles[i]); free(tmp_12_block[i]); free(tmp_21_block[i]); } #endif barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); starpu_cublas_shutdown(); starpu_mpi_shutdown(); MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_example_double.c000066400000000000000000000013431507764646700237120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-double.h" #include "plu_example.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_example_float.c000066400000000000000000000013421507764646700235440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-float.h" #include "plu_example.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_implicit_example.c000066400000000000000000000214351507764646700242560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "helper.h" #include #include #include #include #include #include #include "pxlu.h" //#include "pxlu_kernels.h" #ifdef STARPU_HAVE_LIBNUMA #include #endif #ifdef STARPU_HAVE_VALGRIND_H #include #endif static unsigned long size = 4096; static unsigned nblocks = 16; static unsigned check = 0; static int p = -1; static int q = -1; static unsigned display = 0; static unsigned no_prio = 0; #ifdef STARPU_HAVE_LIBNUMA static unsigned numa = 0; #endif static size_t allocated_memory = 0; static size_t allocated_memory_extra = 0; static starpu_data_handle_t *dataA_handles; static TYPE **dataA; int get_block_rank(unsigned i, unsigned j); static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-size") == 0) { char *argptr; size = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nblocks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-check") == 0) { check = 1; } if (strcmp(argv[i], "-display") == 0) { display = 1; } if (strcmp(argv[i], "-numa") == 0) { #ifdef STARPU_HAVE_LIBNUMA numa = 1; #else fprintf(stderr, "Warning: libnuma is not available\n"); #endif } if (strcmp(argv[i], "-p") == 0) { char *argptr; p = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-q") == 0) { char *argptr; q = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0) { fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q]\n", argv[0]); fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n"); exit(0); } } #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) { size = 4; nblocks = 4; } #endif } unsigned STARPU_PLU(display_flag)(void) { return display; } static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks) { const unsigned block_size = (psize/pnblocks); unsigned i, j; for (i = 0; i < block_size; i++) for (j = 0; j < block_size; j++) { blockptr[j+i*block_size] = (TYPE)starpu_drand48(); } } static void init_matrix(int rank) { #ifdef STARPU_HAVE_LIBNUMA if (numa) { fprintf(stderr, "Using INTERLEAVE policy\n"); unsigned long nodemask = ((1<<0)|(1<<1)); int ret = set_mempolicy(MPOL_INTERLEAVE, &nodemask, 3); if (ret) perror("set_mempolicy failed"); } #endif /* Allocate a grid of data handles, not all of them have to be allocated later on */ dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t)); dataA = calloc(nblocks*nblocks, sizeof(TYPE *)); allocated_memory_extra += nblocks*nblocks*(sizeof(starpu_data_handle_t) + sizeof(TYPE *)); size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); /* Allocate all the blocks that belong to this mpi node */ unsigned long i,j; for (j = 0; j < nblocks; j++) { for (i = 0; i < nblocks; i++) { int block_rank = get_block_rank(i, j); TYPE **blockptr = &dataA[j+i*nblocks]; // starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; if (block_rank == rank) { /* This blocks should be treated by the current MPI process */ /* Allocate and fill it */ starpu_malloc((void **)blockptr, blocksize); allocated_memory += blocksize; //fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j); fill_block_with_random(*blockptr, size, nblocks); //fprintf(stderr, "Rank %d : fill block (i = %d, j = %d)\n", rank, i, j); if (i == j) { unsigned tmp; for (tmp = 0; tmp < size/nblocks; tmp++) { (*blockptr)[tmp*((size/nblocks)+1)] += (TYPE)10*nblocks; } } /* Register it to StarPU */ starpu_matrix_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*blockptr, size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); } else { starpu_matrix_data_register(handleptr, -1, 0, size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); *blockptr = STARPU_POISON_PTR; } starpu_data_set_coordinates(*handleptr, 2, j, i); starpu_mpi_data_register(*handleptr, j+i*nblocks, block_rank); } } //display_all_blocks(nblocks, size/nblocks); } TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j) { return dataA[j+i*nblocks]; } int get_block_rank(unsigned i, unsigned j) { /* Take a 2D block cyclic distribution */ /* NB: p (resp. q) is for "direction" i (resp. j) */ return (j % q) * p + (i % p); } starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j) { return dataA_handles[j+i*nblocks]; } static void display_grid(int rank, unsigned pnblocks) { if (!display) return; //if (rank == 0) { fprintf(stderr, "2D grid layout (Rank %d): \n", rank); unsigned i, j; for (j = 0; j < pnblocks; j++) { for (i = 0; i < pnblocks; i++) { TYPE *blockptr = STARPU_PLU(get_block)(i, j); starpu_data_handle_t handle = STARPU_PLU(get_block_handle)(i, j); fprintf(stderr, "%d (data %p handle %p)", get_block_rank(i, j), blockptr, handle); } fprintf(stderr, "\n"); } } } int main(int argc, char **argv) { int rank; int world_size; int ret; unsigned i, j; starpu_srand48((long int)time(NULL)); parse_args(argc, argv); ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size); if (p == -1 && q==-1) { fprintf(stderr, "Setting default values for p and q\n"); p = (q % 2 == 0) ? 2 : 1; q = world_size / p; } STARPU_ASSERT_MSG(p*q == world_size, "p=%d, q=%d, world_size=%d\n", p, q, world_size); starpu_cublas_init(); /* * Problem Init */ init_matrix(rank); fprintf(stderr, "Rank %d: allocated (%d + %d) MB = %d MB\n", rank, (int)(allocated_memory/(1024*1024)), (int)(allocated_memory_extra/(1024*1024)), (int)((allocated_memory+allocated_memory_extra)/(1024*1024))); display_grid(rank, nblocks); TYPE *a_r = NULL; // STARPU_PLU(display_data_content)(a_r, size); if (check) { TYPE *x, *y; x = calloc(size, sizeof(TYPE)); STARPU_ASSERT(x); y = calloc(size, sizeof(TYPE)); STARPU_ASSERT(y); if (rank == 0) { unsigned ind; for (ind = 0; ind < size; ind++) x[ind] = (TYPE)starpu_drand48(); } a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); if (rank == 0) STARPU_PLU(display_data_content)(a_r, size); // STARPU_PLU(compute_ax)(size, x, y, nblocks, rank); free(x); free(y); } double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio); /* * Report performance */ if (rank == 0) { fprintf(stderr, "Computation took: %f ms\n", timing/1000); unsigned n = size; double flop = (2.0f*n*n*n)/3.0f; fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f)); } /* * Test Result Correctness */ if (check) { /* * Compute || A - LU || */ STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r); #if 0 /* * Compute || Ax - LUx || */ unsigned ind; y2 = calloc(size, sizeof(TYPE)); STARPU_ASSERT(y); if (rank == 0) { for (ind = 0; ind < size; ind++) { y2[ind] = (TYPE)0.0; } } STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank); /* Compute y2 = y2 - y */ CPU_AXPY(size, -1.0, y, 1, y2, 1); TYPE err = CPU_ASUM(size, y2, 1); int max = CPU_IAMAX(size, y2, 1); fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size)); fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]); #endif } /* * Termination */ size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); for (j = 0; j < nblocks; j++) { for (i = 0; i < nblocks; i++) { starpu_data_unregister(dataA_handles[j+nblocks*i]); TYPE *blockptr = dataA[j+i*nblocks]; if (blockptr != STARPU_POISON_PTR) starpu_free_noflag(blockptr, blocksize); } } free(dataA_handles); free(dataA); starpu_cublas_shutdown(); starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_implicit_example_double.c000066400000000000000000000014301507764646700256010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-double.h" #include "plu_implicit_example.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_implicit_example_float.c000066400000000000000000000014271507764646700254420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-float.h" #include "plu_implicit_example.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_outofcore_example.c000066400000000000000000000254421507764646700244530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "helper.h" #include #include #include #include #include #include #include #include #include #include "pxlu.h" //#include "pxlu_kernels.h" #ifdef STARPU_HAVE_LIBNUMA #include #endif #ifdef STARPU_HAVE_VALGRIND_H #include #endif static unsigned long size = 4096; static unsigned nblocks = 16; static size_t blocksize; static unsigned check = 0; static int p = -1; static int q = -1; static unsigned display = 0; static unsigned no_prio = 0; #ifdef STARPU_HAVE_LIBNUMA static unsigned numa = 0; #endif unsigned bound = 0; unsigned bounddeps = 0; unsigned boundprio = 0; static size_t allocated_memory = 0; static starpu_data_handle_t *dataA_handles; static void **disk_objs; static int disk_node; int get_block_rank(unsigned i, unsigned j); static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-size") == 0) { char *argptr; size = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nblocks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-check") == 0) { check = 1; } if (strcmp(argv[i], "-display") == 0) { display = 1; } if (strcmp(argv[i], "-numa") == 0) { #ifdef STARPU_HAVE_LIBNUMA numa = 1; #else fprintf(stderr, "Warning: libnuma is not available\n"); #endif } if (strcmp(argv[i], "-p") == 0) { char *argptr; p = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-q") == 0) { char *argptr; q = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-path") == 0) { path = argv[++i]; } if (strcmp(argv[i], "-bound") == 0) { bound = 1; } if (strcmp(argv[i], "-bounddeps") == 0) { bound = 1; bounddeps = 1; } if (strcmp(argv[i], "-bounddepsprio") == 0) { bound = 1; bounddeps = 1; boundprio = 1; } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "--help") == 0) { fprintf(stderr,"usage: %s [-size n] [-nblocks b] [-check] [-display] [-numa] [-p p] [-q q] [-path PATH] [-bound] [-bounddeps] [-bounddepsprio]\n", argv[0]); fprintf(stderr,"\np * q must be equal to the number of MPI nodes\n"); exit(0); } } #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) { size = 4; nblocks = 4; } #endif } unsigned STARPU_PLU(display_flag)(void) { return display; } static void fill_block_with_random(TYPE *blockptr, unsigned psize, unsigned pnblocks) { const unsigned block_size = (psize/pnblocks); unsigned i, j; for (i = 0; i < block_size; i++) for (j = 0; j < block_size; j++) { blockptr[j+i*block_size] = (TYPE)starpu_drand48(); } } static void create_matrix() { TYPE *blockptr = malloc(blocksize); int fd; char *filename; unsigned filename_length = strlen(path) + 1 + sizeof(nblocks)*3 + 1 + sizeof(nblocks)*3 + 1; filename = malloc(filename_length); allocated_memory += nblocks*nblocks*blocksize; /* Create the whole matrix on the disk */ unsigned i,j; for (j = 0; j < nblocks; j++) { for (i = 0; i < nblocks; i++) { fill_block_with_random(blockptr, size, nblocks); if (i == j) { unsigned tmp; for (tmp = 0; tmp < size/nblocks; tmp++) { blockptr[tmp*((size/nblocks)+1)] += (TYPE)10*nblocks; } } snprintf(filename, filename_length, "%s/%u,%u", path, i, j); fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0777); if (fd < 0) { perror("open"); exit(1); } if (write(fd, blockptr, blocksize) != (starpu_ssize_t) blocksize) { fprintf(stderr,"short write"); exit(1); } if (close(fd) < 0) { perror("close"); exit(1); } } } free(blockptr); free(filename); } static void init_matrix(int rank) { /* Allocate a grid of data handles, not all of them have to be allocated later on */ dataA_handles = calloc(nblocks*nblocks, sizeof(starpu_data_handle_t)); disk_objs = calloc(nblocks*nblocks, sizeof(*disk_objs)); disk_node = starpu_disk_register(&starpu_disk_unistd_ops, path, STARPU_MAX(16*1024*1024, size*size*sizeof(TYPE))); assert(disk_node >= 0); char filename[sizeof(nblocks)*3 + 1 + sizeof(nblocks)*3 + 1]; /* Allocate all the blocks that belong to this mpi node */ unsigned i,j; for (j = 0; j < nblocks; j++) { for (i = 0; i < nblocks; i++) { int block_rank = get_block_rank(i, j); // starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; starpu_data_handle_t *handleptr = &dataA_handles[j+nblocks*i]; if (block_rank == rank) { snprintf(filename, sizeof(filename), "%u,%u", i, j); /* Register it to StarPU */ disk_objs[j+nblocks*i] = starpu_disk_open(disk_node, filename, blocksize); if (!disk_objs[j+nblocks*i]) { fprintf(stderr,"could not open %s\n", filename); exit(1); } starpu_matrix_data_register(handleptr, disk_node, (uintptr_t) disk_objs[j+nblocks*i], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); starpu_data_acquire_on_node(*handleptr, STARPU_MAIN_RAM, STARPU_W); void *interface = starpu_data_get_interface_on_node(*handleptr, STARPU_MAIN_RAM); TYPE *data = (void*) STARPU_MATRIX_GET_PTR(interface); fill_block_with_random(data, size, nblocks); if (i == j) { unsigned tmp; for (tmp = 0; tmp < size/nblocks; tmp++) { data[tmp*((size/nblocks)+1)] += 1; data[tmp*((size/nblocks)+1)] *= 100; } } starpu_data_release_on_node(*handleptr, STARPU_MAIN_RAM); } else { disk_objs[j+nblocks*i] = NULL; starpu_matrix_data_register(handleptr, -1, 0, size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); } starpu_data_set_coordinates(*handleptr, 2, j, i); starpu_mpi_data_register(*handleptr, j+i*nblocks, block_rank); } } //display_all_blocks(nblocks, size/nblocks); } static void destroy_matrix(int rank) { char *filename; unsigned filename_length = strlen(path) + 1 + sizeof(nblocks)*3 + 1 + sizeof(nblocks)*3 + 1; unsigned i,j; filename = malloc(filename_length); for (j = 0; j < nblocks; j++) { for (i = 0; i < nblocks; i++) { int block_rank = get_block_rank(i, j); if (block_rank == rank) { snprintf(filename, filename_length, "%s/%u,%u", path, i, j); unlink(filename); } } } free(filename); rmdir(path); } TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j) { (void)i; (void)j; /* This does not really make sense in out of core */ assert(0); } int get_block_rank(unsigned i, unsigned j) { /* Take a 2D block cyclic distribution */ /* NB: p (resp. q) is for "direction" i (resp. j) */ return (j % q) * p + (i % p); } starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j) { return dataA_handles[j+i*nblocks]; } #if STARPU_MAXNODES == 1 /* Cannot register a disk */ int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else int main(int argc, char **argv) { int rank; int world_size; int ret; unsigned i, j; starpu_srand48((long int)time(NULL)); parse_args(argc, argv); blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE); ret = mkdir(path, 0777); if (ret != 0 && errno != EEXIST) { fprintf(stderr,"%s does not exist\n", path); exit(1); } ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size); if (p == -1 && q==-1) { fprintf(stderr, "Setting default values for p and q\n"); p = (q % 2 == 0) ? 2 : 1; q = world_size / p; } STARPU_ASSERT_MSG(p*q == world_size, "p=%d, q=%d, world_size=%d\n", p, q, world_size); starpu_cublas_init(); /* * Problem Init */ if (rank == 0) create_matrix(); starpu_mpi_barrier(MPI_COMM_WORLD); init_matrix(rank); if (rank == 0) fprintf(stderr, "%dMB on disk\n", (int)(allocated_memory/(1024*1024))); TYPE *a_r = NULL; // STARPU_PLU(display_data_content)(a_r, size); if (check) { TYPE *x, *y; x = calloc(size, sizeof(TYPE)); STARPU_ASSERT(x); y = calloc(size, sizeof(TYPE)); STARPU_ASSERT(y); if (rank == 0) { unsigned ind; for (ind = 0; ind < size; ind++) x[ind] = (TYPE)starpu_drand48(); } a_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); if (rank == 0) STARPU_PLU(display_data_content)(a_r, size); // STARPU_PLU(compute_ax)(size, x, y, nblocks, rank); free(x); free(y); } if (bound) starpu_bound_start(bounddeps, boundprio); double timing = STARPU_PLU(plu_main)(nblocks, rank, world_size, no_prio); if (bound) starpu_bound_stop(); /* * Report performance */ if (rank == 0) { fprintf(stderr, "Computation took: %f ms\n", timing/1000); unsigned n = size; double flop = (2.0f*n*n*n)/3.0f; printf("# size\tms\tGFlops"); fflush(stdout); if (bound) printf("\tTms\tTGFlops"); printf("\n"); printf("%u\t%.0f\t%2.2f", n, timing/1000, (flop/timing/1000.0f)); if (bound) { double min; starpu_bound_compute(&min, NULL, 0); printf("\t%.0f\t%.1f", min, flop/min/1000000.0f); } printf("\n"); } /* * Test Result Correctness */ if (check) { /* * Compute || A - LU || */ STARPU_PLU(compute_lu_matrix)(size, nblocks, a_r); #if 0 /* * Compute || Ax - LUx || */ unsigned ind; y2 = calloc(size, sizeof(TYPE)); STARPU_ASSERT(y); if (rank == 0) { for (ind = 0; ind < size; ind++) { y2[ind] = (TYPE)0.0; } } STARPU_PLU(compute_lux)(size, x, y2, nblocks, rank); /* Compute y2 = y2 - y */ CPU_AXPY(size, -1.0, y, 1, y2, 1); TYPE err = CPU_ASUM(size, y2, 1); int max = CPU_IAMAX(size, y2, 1); fprintf(stderr, "(A - LU)X Avg error : %e\n", err/(size*size)); fprintf(stderr, "(A - LU)X Max error : %e\n", y2[max]); #endif } /* * Termination */ for (j = 0; j < nblocks; j++) { for (i = 0; i < nblocks; i++) { starpu_data_unregister(dataA_handles[j+nblocks*i]); if (disk_objs[j+nblocks*i]) starpu_disk_close(disk_node, disk_objs[j+nblocks*i], blocksize); } } free(dataA_handles); free(disk_objs); destroy_matrix(rank); starpu_cublas_shutdown(); starpu_mpi_shutdown(); return 0; } #endif starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_outofcore_example_double.c000066400000000000000000000015121507764646700257750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ static char *path = "./starpu-ooc-files-double"; #include "mpi_lu-double.h" #include "plu_outofcore_example.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_outofcore_example_float.c000066400000000000000000000015101507764646700256260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ static char *path = "./starpu-ooc-files-float"; #include "mpi_lu-float.h" #include "plu_outofcore_example.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_solve.c000066400000000000000000000233231507764646700220570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "pxlu.h" /* * Various useful functions */ static double frobenius_norm(TYPE *v, unsigned n) { double sum2 = 0.0; /* compute sqrt(Sum(|x|^2)) */ unsigned i,j; for (j = 0; j < n; j++) for (i = 0; i < n; i++) { double a = fabsl((double)v[i+n*j]); sum2 += a*a; } return sqrt(sum2); } void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize) { if (!STARPU_PLU(display_flag)()) return; fprintf(stderr, "DISPLAY BLOCK\n"); unsigned i, j; for (j = 0; j < blocksize; j++) { for (i = 0; i < blocksize; i++) { fprintf(stderr, "%f ", data[j+i*blocksize]); } fprintf(stderr, "\n"); } fprintf(stderr, "****\n"); } void STARPU_PLU(extract_upper)(unsigned block_size, TYPE *inblock, TYPE *outblock) { unsigned li, lj; for (lj = 0; lj < block_size; lj++) { /* Upper block diag is 1 */ outblock[lj*(block_size + 1)] = (TYPE)1.0; for (li = lj + 1; li < block_size; li++) { outblock[lj + li*block_size] = inblock[lj + li*block_size]; } } } void STARPU_PLU(extract_lower)(unsigned block_size, TYPE *inblock, TYPE *outblock) { unsigned li, lj; for (lj = 0; lj < block_size; lj++) { for (li = 0; li <= lj; li++) { outblock[lj + li*block_size] = inblock[lj + li*block_size]; } } } /* * Compute Ax = y */ static void STARPU_PLU(compute_ax_block)(unsigned block_size, TYPE *block_data, TYPE *sub_x, TYPE *sub_y) { fprintf(stderr, "block data %p sub x %p sub y %p\n", block_data, sub_x, sub_y); CPU_GEMV("N", block_size, block_size, 1.0, block_data, block_size, sub_x, 1, 1.0, sub_y, 1); } static void STARPU_PLU(compute_ax_block_upper)(unsigned size, unsigned nblocks, TYPE *block_data, TYPE *sub_x, TYPE *sub_y) { unsigned block_size = size/nblocks; /* Take a copy of the upper part of the diagonal block */ TYPE *upper_block_copy = calloc((block_size)*(block_size), sizeof(TYPE)); STARPU_PLU(extract_upper)(block_size, block_data, upper_block_copy); STARPU_PLU(compute_ax_block)(block_size, upper_block_copy, sub_x, sub_y); free(upper_block_copy); } static void STARPU_PLU(compute_ax_block_lower)(unsigned size, unsigned nblocks, TYPE *block_data, TYPE *sub_x, TYPE *sub_y) { unsigned block_size = size/nblocks; /* Take a copy of the upper part of the diagonal block */ TYPE *lower_block_copy = calloc((block_size)*(block_size), sizeof(TYPE)); STARPU_PLU(extract_lower)(block_size, block_data, lower_block_copy); STARPU_PLU(compute_ax_block)(size/nblocks, lower_block_copy, sub_x, sub_y); free(lower_block_copy); } void STARPU_PLU(compute_lux)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank) { /* Create temporary buffers where all MPI processes are going to * compute Ui x = yi where Ai is the matrix containing the blocks of U * affected to process i, and 0 everywhere else. We then have y as the * sum of all yi. */ TYPE *yi = calloc(size, sizeof(TYPE)); fprintf(stderr, "Compute LU\n"); unsigned block_size = size/nblocks; /* Compute UiX = Yi */ unsigned long i,j; for (j = 0; j < nblocks; j++) { if (get_block_rank(j, j) == rank) { TYPE *block_data = STARPU_PLU(get_block)(j, j); TYPE *sub_x = &x[j*(block_size)]; TYPE *sub_yi = &yi[j*(block_size)]; STARPU_PLU(compute_ax_block_upper)(size, nblocks, block_data, sub_x, sub_yi); } for (i = j + 1; i < nblocks; i++) { if (get_block_rank(i, j) == rank) { /* That block belongs to the current MPI process */ TYPE *block_data = STARPU_PLU(get_block)(i, j); TYPE *sub_x = &x[i*(block_size)]; TYPE *sub_yi = &yi[j*(block_size)]; STARPU_PLU(compute_ax_block)(size/nblocks, block_data, sub_x, sub_yi); } } } /* Grab Sum Yi in X */ MPI_Reduce(yi, x, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD); memset(yi, 0, size*sizeof(TYPE)); // unsigned ind; // if (rank == 0) // { // fprintf(stderr, "INTERMEDIATE\n"); // for (ind = 0; ind < STARPU_MIN(10, size); ind++) // { // fprintf(stderr, "x[%d] = %f\n", ind, (float)x[ind]); // } // fprintf(stderr, "****\n"); // } /* Everyone needs x */ int bcst_ret; bcst_ret = MPI_Bcast(&x, size, MPI_TYPE, 0, MPI_COMM_WORLD); STARPU_ASSERT(bcst_ret == MPI_SUCCESS); /* Compute LiX = Yi (with X = UX) */ for (j = 0; j < nblocks; j++) { if (j > 0) for (i = 0; i < j; i++) { if (get_block_rank(i, j) == rank) { /* That block belongs to the current MPI process */ TYPE *block_data = STARPU_PLU(get_block)(i, j); TYPE *sub_x = &x[i*(block_size)]; TYPE *sub_yi = &yi[j*(block_size)]; STARPU_PLU(compute_ax_block)(size/nblocks, block_data, sub_x, sub_yi); } } if (get_block_rank(j, j) == rank) { TYPE *block_data = STARPU_PLU(get_block)(j, j); TYPE *sub_x = &x[j*(block_size)]; TYPE *sub_yi = &yi[j*(block_size)]; STARPU_PLU(compute_ax_block_lower)(size, nblocks, block_data, sub_x, sub_yi); } } /* Grab Sum Yi in Y */ MPI_Reduce(yi, y, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD); free(yi); } /* * Allocate a contiguous matrix on node 0 and fill it with the whole * content of the matrix distributed across all nodes. */ TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks) { // fprintf(stderr, "RECONSTRUCT MATRIX size %d nblocks %d\n", size, nblocks); TYPE *bigmatrix = calloc(size*size, sizeof(TYPE)); unsigned block_size = size/nblocks; int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); unsigned bi, bj; for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { TYPE *block = NULL; int block_rank = get_block_rank(bi, bj); if (block_rank == 0) { block = STARPU_PLU(get_block)(bi, bj); } else { MPI_Status status; if (rank == 0) { block = calloc(block_size*block_size, sizeof(TYPE)); int ret = MPI_Recv(block, block_size*block_size, MPI_TYPE, block_rank, 0, MPI_COMM_WORLD, &status); STARPU_ASSERT(ret == MPI_SUCCESS); } else if (rank == block_rank) { block = STARPU_PLU(get_block)(bi, bj); int ret = MPI_Send(block, block_size*block_size, MPI_TYPE, 0, 0, MPI_COMM_WORLD); STARPU_ASSERT(ret == MPI_SUCCESS); } } if (rank == 0) { unsigned j, i; for (j = 0; j < block_size; j++) for (i = 0; i < block_size; i++) { bigmatrix[(j + bj*block_size)+(i+bi*block_size)*size] = block[j+i*block_size]; } if (get_block_rank(bi, bj) != 0) free(block); } } return bigmatrix; } /* x and y must be valid (at least) on 0 */ void STARPU_PLU(compute_ax)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank) { unsigned block_size = size/nblocks; /* Send x to everyone */ int bcst_ret; bcst_ret = MPI_Bcast(&x, size, MPI_TYPE, 0, MPI_COMM_WORLD); STARPU_ASSERT(bcst_ret == MPI_SUCCESS); /* Create temporary buffers where all MPI processes are going to * compute Ai x = yi where Ai is the matrix containing the blocks of A * affected to process i, and 0 everywhere else. We then have y as the * sum of all yi. */ TYPE *yi = calloc(size, sizeof(TYPE)); /* Compute Aix = yi */ unsigned long i,j; for (j = 0; j < nblocks; j++) { for (i = 0; i < nblocks; i++) { if (get_block_rank(i, j) == rank) { /* That block belongs to the current MPI process */ TYPE *block_data = STARPU_PLU(get_block)(i, j); TYPE *sub_x = &x[i*block_size]; TYPE *sub_yi = &yi[j*block_size]; STARPU_PLU(compute_ax_block)(block_size, block_data, sub_x, sub_yi); } } } /* Compute the Sum of all yi = y */ MPI_Reduce(yi, y, size, MPI_TYPE, MPI_SUM, 0, MPI_COMM_WORLD); fprintf(stderr, "RANK %d - FOO 1 y[0] %f\n", rank, y[0]); free(yi); } void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved) { TYPE *all_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); unsigned display = STARPU_PLU(display_flag)(); int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); if (rank == 0) { TYPE *L = malloc((size_t)size*size*sizeof(TYPE)); TYPE *U = malloc((size_t)size*size*sizeof(TYPE)); memset(L, 0, size*size*sizeof(TYPE)); memset(U, 0, size*size*sizeof(TYPE)); /* only keep the lower part */ unsigned i, j; for (j = 0; j < size; j++) { for (i = 0; i < j; i++) { L[j+i*size] = all_r[j+i*size]; } /* diag i = j */ L[j+j*size] = all_r[j+j*size]; U[j+j*size] = 1.0; for (i = j+1; i < size; i++) { U[j+i*size] = all_r[j+i*size]; } } STARPU_PLU(display_data_content)(L, size); STARPU_PLU(display_data_content)(U, size); /* now A_err = L, compute L*U */ CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size); if (display) fprintf(stderr, "\nLU\n"); STARPU_PLU(display_data_content)(L, size); /* compute "LU - A" in L*/ CPU_AXPY(size*size, -1.0, Asaved, 1, L, 1); TYPE err = CPU_ASUM(size*size, L, 1); int max = CPU_IAMAX(size*size, L, 1); if (display) fprintf(stderr, "DISPLAY ERROR\n"); STARPU_PLU(display_data_content)(L, size); fprintf(stderr, "(A - LU) Avg error : %e\n", err/(size*size)); fprintf(stderr, "(A - LU) Max error : %e\n", L[max]); double residual = frobenius_norm(L, size); double matnorm = frobenius_norm(Asaved, size); fprintf(stderr, "||A-LU|| / (||A||*N) : %e\n", residual/(matnorm*size)); } free(all_r); } starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_solve_double.c000066400000000000000000000013411507764646700234050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-double.h" #include "plu_solve.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/plu_solve_float.c000066400000000000000000000013401507764646700232370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-float.h" #include "plu_solve.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pslu.c000066400000000000000000000013331507764646700210270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-float.h" #include "pxlu.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pslu_implicit.c000066400000000000000000000014201507764646700227160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-float.h" #include "pxlu_implicit.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pslu_kernels.c000066400000000000000000000013431507764646700225530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_lu-float.h" #include "pxlu_kernels.c" starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pxlu.c000066400000000000000000000543441507764646700210460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "pxlu.h" #include "pxlu_kernels.h" #include #define MPI_TAG_GETRF(k) ((1U << 16) | (k)) #define MPI_TAG_TRSM_LL(k, j) ((2U << 16) | (k)<<8 | (j)) #define MPI_TAG_TRSM_RU(k, i) ((3U << 16) | (i)<<8 | (k)) // GETRF TRSM_RU // TRSM_LL GEMM #define TAG_GETRF(k) ((starpu_tag_t)((1ULL<<50) | (unsigned long long)(k))) #define TAG_TRSM_LL(k,j) ((starpu_tag_t)(((2ULL<<50) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_TRSM_RU(k,i) ((starpu_tag_t)(((3ULL<<50) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(i)))) #define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<50) | ((unsigned long long)(k)<<32) \ | ((unsigned long long)(i)<<16) \ | (unsigned long long)(j)))) #define TAG_GETRF_SAVE(k) ((starpu_tag_t)((5ULL<<50) | (unsigned long long)(k))) #define TAG_TRSM_LL_SAVE(k,j) ((starpu_tag_t)(((6ULL<<50) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_TRSM_RU_SAVE(k,i) ((starpu_tag_t)(((7ULL<<50) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(i)))) #define TAG_GETRF_SAVE_PARTIAL(k) ((starpu_tag_t)((8ULL<<50) | (unsigned long long)(k))) #define TAG_TRSM_LL_SAVE_PARTIAL(k,j) ((starpu_tag_t)(((9ULL<<50) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_TRSM_RU_SAVE_PARTIAL(k,i) ((starpu_tag_t)(((10ULL<<50) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(i)))) #define STARPU_TAG_INIT ((starpu_tag_t)(11ULL<<50)) //#define VERBOSE_INIT 1 //#define DEBUG 1 static unsigned no_prio = 0; static unsigned nblocks = 0; static int rank = -1; static int world_size = -1; struct callback_arg { unsigned i, j, k; }; /* * Various */ static struct debug_info *create_debug_info(unsigned i, unsigned j, unsigned k) { struct debug_info *info = malloc(sizeof(struct debug_info)); info->i = i; info->j = j; info->k = k; return info; } static struct starpu_task *create_task(starpu_tag_t id) { struct starpu_task *task = starpu_task_create(); task->cl_arg = NULL; task->use_tag = 1; task->tag_id = id; return task; } /* Send handle to every node appearing in the mask, and unlock tag once the * transfers are done. */ static void send_data_to_mask(starpu_data_handle_t handle, int *rank_mask, starpu_mpi_tag_t mpi_tag, starpu_tag_t tag) { unsigned cnt = 0; STARPU_ASSERT(handle != STARPU_POISON_PTR); int rank_array[world_size]; MPI_Comm comm_array[world_size]; starpu_mpi_tag_t mpi_tag_array[world_size]; starpu_data_handle_t handle_array[world_size]; int r; for (r = 0; r < world_size; r++) { if (rank_mask[r]) { rank_array[cnt] = r; comm_array[cnt] = MPI_COMM_WORLD; mpi_tag_array[cnt] = mpi_tag; handle_array[cnt] = handle; cnt++; } } if (cnt == 0) { /* In case there is no message to send, we release the tag at * once */ starpu_tag_notify_from_apps(tag); } else { int ret = starpu_mpi_isend_array_detached_unlock_tag(cnt, handle_array, rank_array, mpi_tag_array, comm_array, tag); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_array_detached_unlock_tag"); } } /* Initiate a receive request once all dependencies are fulfilled and unlock * tag 'unlocked_tag' once it's done. */ struct recv_when_done_callback_arg { int source; starpu_mpi_tag_t mpi_tag; starpu_data_handle_t handle; starpu_tag_t unlocked_tag; }; static void callback_receive_when_done(void *_arg) { struct recv_when_done_callback_arg *arg = _arg; int ret = starpu_mpi_irecv_detached_unlock_tag(arg->handle, arg->source, arg->mpi_tag, MPI_COMM_WORLD, arg->unlocked_tag); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached_unlock_tag"); free(arg); } static void receive_when_deps_are_done(unsigned ndeps, starpu_tag_t *deps_tags, int source, starpu_mpi_tag_t mpi_tag, starpu_data_handle_t handle, starpu_tag_t partial_tag, starpu_tag_t unlocked_tag) { STARPU_ASSERT(handle != STARPU_POISON_PTR); struct recv_when_done_callback_arg *arg = malloc(sizeof(struct recv_when_done_callback_arg)); arg->source = source; arg->mpi_tag = mpi_tag; arg->handle = handle; arg->unlocked_tag = unlocked_tag; if (ndeps == 0) { callback_receive_when_done(arg); return; } starpu_create_sync_task(partial_tag, ndeps, deps_tags, callback_receive_when_done, arg); } /* * Task GETRF (diagonal factorization) */ static void create_task_getrf_recv(unsigned k) { /* The current node is not computing that task, so we receive the block * with MPI */ /* We don't issue a MPI receive request until everyone using the * temporary buffer is done : 11_(k-1) can be used by 12_(k-1)j and * 21(k-1)i with i,j >= k */ unsigned ndeps = 0; starpu_tag_t tag_array[2*nblocks]; #ifdef SINGLE_TMP11 if (k > 0) { unsigned i; for (i = (k-1)+1; i < nblocks; i++) { if (rank == get_block_rank(i, k-1)) tag_array[ndeps++] = TAG_TRSM_RU(k-1, i); } unsigned j; for (j = (k-1)+1; j < nblocks; j++) { if (rank == get_block_rank(k-1, j)) tag_array[ndeps++] = TAG_TRSM_LL(k-1, j); } } #endif int source = get_block_rank(k, k); #ifdef SINGLE_TMP11 starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_11_block_handle)(); #else starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_11_block_handle)(k); #endif starpu_mpi_tag_t mpi_tag = MPI_TAG_GETRF(k); starpu_tag_t partial_tag = TAG_GETRF_SAVE_PARTIAL(k); starpu_tag_t unlocked_tag = TAG_GETRF_SAVE(k); // fprintf(stderr, "NODE %d - 11 (%d) - recv when done ndeps %d - tag array %lx\n", rank, k, ndeps, tag_array[0]); receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag); } static void find_nodes_using_11(unsigned k, int *rank_mask) { memset(rank_mask, 0, world_size*sizeof(int)); /* Block 11_k is used to compute 12_kj + 12ki with i,j > k */ unsigned i; for (i = k+1; i < nblocks; i++) { int r = get_block_rank(i, k); rank_mask[r] = 1; } unsigned j; for (j = k+1; j < nblocks; j++) { int r = get_block_rank(k, j); rank_mask[r] = 1; } } static void callback_task_getrf_real(void *_arg) { struct callback_arg *arg = _arg; unsigned k = arg->k; /* Find all the nodes potentially requiring this block */ int rank_mask[world_size]; find_nodes_using_11(k, rank_mask); rank_mask[rank] = 0; /* Send the block to those nodes */ starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(k, k); starpu_tag_t tag = TAG_GETRF_SAVE(k); starpu_mpi_tag_t mpi_tag = MPI_TAG_GETRF(k); send_data_to_mask(block_handle, rank_mask, mpi_tag, tag); free(arg); } static void create_task_getrf_real(unsigned k) { struct starpu_task *task = create_task(TAG_GETRF(k)); task->cl = &STARPU_PLU(cl_getrf); task->color = 0xffff00; task->cl_arg = create_debug_info(k, k, k); task->cl_arg_free = 1; /* which sub-data is manipulated ? */ task->handles[0] = STARPU_PLU(get_block_handle)(k, k); struct callback_arg *arg = malloc(sizeof(struct callback_arg)); arg->k = k; task->callback_func = callback_task_getrf_real; task->callback_arg = arg; /* this is an important task */ if (!no_prio) task->priority = 3*nblocks - 3*k; /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GETRF(k), 1, TAG_GEMM(k-1, k, k)); } else { starpu_tag_declare_deps(TAG_GETRF(k), 1, STARPU_TAG_INIT); } int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static void create_task_getrf(unsigned k) { if (get_block_rank(k, k) == rank) { #ifdef VERBOSE_INIT fprintf(stderr, "CREATE real task 11(%u) (TAG_GETRF_SAVE(%u) = %llux) on node %d\n", k, k, (unsigned long long) TAG_GETRF_SAVE(k), rank); #endif create_task_getrf_real(k); } else { /* We don't handle the task, but perhaps we have to generate MPI transfers. */ int rank_mask[world_size]; find_nodes_using_11(k, rank_mask); if (rank_mask[rank]) { #ifdef VERBOSE_INIT fprintf(stderr, "create RECV task 11(%u) on node %d\n", k, rank); #endif create_task_getrf_recv(k); } else { #ifdef VERBOSE_INIT fprintf(stderr, "Node %d needs not 11(%u)\n", rank, k); #endif } } } /* * Task TRSM_LL */ static void create_task_trsm_ll_recv(unsigned k, unsigned j) { /* The current node is not computing that task, so we receive the block * with MPI */ /* We don't issue a MPI receive request until everyone using the * temporary buffer is done : 12_(k-1)j can be used by 22_(k-1)ij with * i >= k */ unsigned ndeps = 0; starpu_tag_t tag_array[nblocks]; unsigned start; unsigned bound; #ifdef SINGLE_TMP1221 bound = 0; start = (k-1)+1; #else bound = 1; start = (k-2)+1; #endif if (k > bound) { unsigned i; for (i = start; i < nblocks; i++) { if (rank == get_block_rank(i, j)) #ifdef SINGLE_TMP1221 tag_array[ndeps++] = TAG_GEMM(k-1, i, j); #else tag_array[ndeps++] = TAG_GEMM(k-2, i, j); #endif } } int source = get_block_rank(k, j); #ifdef SINGLE_TMP1221 starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_12_block_handle)(j); #else starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_12_block_handle)(j,k); #endif starpu_mpi_tag_t mpi_tag = MPI_TAG_TRSM_LL(k, j); starpu_tag_t partial_tag = TAG_TRSM_LL_SAVE_PARTIAL(k, j); starpu_tag_t unlocked_tag = TAG_TRSM_LL_SAVE(k, j); receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag); } static void find_nodes_using_12(unsigned k, unsigned j, int *rank_mask) { memset(rank_mask, 0, world_size*sizeof(int)); /* Block 12_kj is used to compute 22_kij with i > k */ unsigned i; for (i = k+1; i < nblocks; i++) { int r = get_block_rank(i, j); rank_mask[r] = 1; } } static void callback_task_trsm_ll_real(void *_arg) { struct callback_arg *arg = _arg; unsigned k = arg->k; unsigned j = arg->j; /* Find all the nodes potentially requiring this block */ int rank_mask[world_size]; find_nodes_using_12(k, j, rank_mask); rank_mask[rank] = 0; /* Send the block to those nodes */ starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(k, j); starpu_tag_t tag = TAG_TRSM_LL_SAVE(k, j); starpu_mpi_tag_t mpi_tag = MPI_TAG_TRSM_LL(k, j); send_data_to_mask(block_handle, rank_mask, mpi_tag, tag); free(arg); } static void create_task_trsm_ll_real(unsigned k, unsigned j) { struct starpu_task *task = create_task(TAG_TRSM_LL(k, j)); #ifdef STARPU_DEVEL #warning temporary fix :/ #endif // task->cl = &STARPU_PLU(cl_trsm_ll); task->cl = &STARPU_PLU(cl_trsm_ru); task->color = 0x8080ff; task->cl_arg = create_debug_info(j, j, k); task->cl_arg_free = 1; unsigned diag_block_is_local = (get_block_rank(k, k) == rank); starpu_tag_t tag_11_dep; /* which sub-data is manipulated ? */ starpu_data_handle_t diag_block; if (diag_block_is_local) { diag_block = STARPU_PLU(get_block_handle)(k, k); tag_11_dep = TAG_GETRF(k); } else { #ifdef SINGLE_TMP11 diag_block = STARPU_PLU(get_tmp_11_block_handle)(); #else diag_block = STARPU_PLU(get_tmp_11_block_handle)(k); #endif tag_11_dep = TAG_GETRF_SAVE(k); } task->handles[0] = diag_block; task->handles[1] = STARPU_PLU(get_block_handle)(k, j); STARPU_ASSERT(get_block_rank(k, j) == rank); STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR); STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR); struct callback_arg *arg = malloc(sizeof(struct callback_arg)); arg->j = j; arg->k = k; task->callback_func = callback_task_trsm_ll_real; task->callback_arg = arg; if (!no_prio) task->priority = 3*nblocks - (2*k + j); /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 2, tag_11_dep, TAG_GEMM(k-1, k, j)); } else { starpu_tag_declare_deps(TAG_TRSM_LL(k, j), 1, tag_11_dep); } int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static void create_task_trsm_ll(unsigned k, unsigned j) { if (get_block_rank(k, j) == rank) { #ifdef VERBOSE_INIT fprintf(stderr, "CREATE real task 12(k = %u, j = %u) on node %d\n", k, j, rank); #endif create_task_trsm_ll_real(k, j); } else { /* We don't handle the task, but perhaps we have to generate MPI transfers. */ int rank_mask[world_size]; find_nodes_using_12(k, j, rank_mask); if (rank_mask[rank]) { #ifdef VERBOSE_INIT fprintf(stderr, "create RECV task 12(k = %u, j = %u) on node %d\n", k, j, rank); #endif create_task_trsm_ll_recv(k, j); } else { #ifdef VERBOSE_INIT fprintf(stderr, "Node %d needs not 12(k=%u, i=%u)\n", rank, k, j); #endif } } } /* * Task TRSM_RU */ static void create_task_trsm_ru_recv(unsigned k, unsigned i) { /* The current node is not computing that task, so we receive the block * with MPI */ /* We don't issue a MPI receive request until everyone using the * temporary buffer is done : 21_(k-1)i can be used by 22_(k-1)ij with * j >= k */ unsigned ndeps = 0; starpu_tag_t tag_array[nblocks]; unsigned bound; unsigned start; #ifdef SINGLE_TMP1221 bound = 0; start = (k-1)+1; #else bound = 1; start = (k-2)+1; #endif if (k > bound) { unsigned j; for (j = start; j < nblocks; j++) { if (rank == get_block_rank(i, j)) #ifdef SINGLE_TMP1221 tag_array[ndeps++] = TAG_GEMM(k-1, i, j); #else tag_array[ndeps++] = TAG_GEMM(k-2, i, j); #endif } } int source = get_block_rank(i, k); #ifdef SINGLE_TMP1221 starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_21_block_handle)(i); #else starpu_data_handle_t block_handle = STARPU_PLU(get_tmp_21_block_handle)(i, k); #endif starpu_mpi_tag_t mpi_tag = MPI_TAG_TRSM_RU(k, i); starpu_tag_t partial_tag = TAG_TRSM_RU_SAVE_PARTIAL(k, i); starpu_tag_t unlocked_tag = TAG_TRSM_RU_SAVE(k, i); // fprintf(stderr, "NODE %d - 21 (%d, %d) - recv when done ndeps %d - tag array %lx\n", rank, k, i, ndeps, tag_array[0]); receive_when_deps_are_done(ndeps, tag_array, source, mpi_tag, block_handle, partial_tag, unlocked_tag); } static void find_nodes_using_21(unsigned k, unsigned i, int *rank_mask) { memset(rank_mask, 0, world_size*sizeof(int)); /* Block 21_ki is used to compute 22_kij with j > k */ unsigned j; for (j = k+1; j < nblocks; j++) { int r = get_block_rank(i, j); rank_mask[r] = 1; } } static void callback_task_trsm_ru_real(void *_arg) { struct callback_arg *arg = _arg; unsigned k = arg->k; unsigned i = arg->i; /* Find all the nodes potentially requiring this block */ int rank_mask[world_size]; find_nodes_using_21(k, i, rank_mask); rank_mask[rank] = 0; /* Send the block to those nodes */ starpu_data_handle_t block_handle = STARPU_PLU(get_block_handle)(i, k); starpu_tag_t tag = TAG_TRSM_RU_SAVE(k, i); starpu_mpi_tag_t mpi_tag = MPI_TAG_TRSM_RU(k, i); send_data_to_mask(block_handle, rank_mask, mpi_tag, tag); free(arg); } static void create_task_trsm_ru_real(unsigned k, unsigned i) { struct starpu_task *task = create_task(TAG_TRSM_RU(k, i)); #ifdef STARPU_DEVEL #warning temporary fix #endif // task->cl = &STARPU_PLU(cl_trsm_ru); task->cl = &STARPU_PLU(cl_trsm_ll); task->color = 0x8080c0; task->cl_arg = create_debug_info(i, i, k); task->cl_arg_free = 1; unsigned diag_block_is_local = (get_block_rank(k, k) == rank); starpu_tag_t tag_11_dep; /* which sub-data is manipulated ? */ starpu_data_handle_t diag_block; if (diag_block_is_local) { diag_block = STARPU_PLU(get_block_handle)(k, k); tag_11_dep = TAG_GETRF(k); } else { #ifdef SINGLE_TMP11 diag_block = STARPU_PLU(get_tmp_11_block_handle)(); #else diag_block = STARPU_PLU(get_tmp_11_block_handle)(k); #endif tag_11_dep = TAG_GETRF_SAVE(k); } task->handles[0] = diag_block; task->handles[1] = STARPU_PLU(get_block_handle)(i, k); STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR); STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR); struct callback_arg *arg = malloc(sizeof(struct callback_arg)); arg->i = i; arg->k = k; task->callback_func = callback_task_trsm_ru_real; task->callback_arg = arg; if (!no_prio) task->priority = 3*nblocks - (2*k + i); /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 2, tag_11_dep, TAG_GEMM(k-1, i, k)); } else { starpu_tag_declare_deps(TAG_TRSM_RU(k, i), 1, tag_11_dep); } int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static void create_task_trsm_ru(unsigned k, unsigned i) { if (get_block_rank(i, k) == rank) { #ifdef VERBOSE_INIT fprintf(stderr, "CREATE real task 21(k = %u, i = %u) on node %d\n", k, i, rank); #endif create_task_trsm_ru_real(k, i); } else { /* We don't handle the task, but perhaps we have to generate MPI transfers. */ int rank_mask[world_size]; find_nodes_using_21(k, i, rank_mask); if (rank_mask[rank]) { #ifdef VERBOSE_INIT fprintf(stderr, "create RECV task 21(k = %u, i = %u) on node %d\n", k, i, rank); #endif create_task_trsm_ru_recv(k, i); } else { #ifdef VERBOSE_INIT fprintf(stderr, "Node %d needs not 21(k=%u, i=%u)\n", rank, k,i); #endif } } } /* * Task GEMM */ static void create_task_gemm_real(unsigned k, unsigned i, unsigned j) { // printf("task 22 k,i,j = %d,%d,%d TAG = %llx\n", k,i,j, TAG_GEMM(k,i,j)); struct starpu_task *task = create_task(TAG_GEMM(k, i, j)); task->cl = &STARPU_PLU(cl_gemm); task->color = 0x00ff00; task->cl_arg = create_debug_info(i, j, k); task->cl_arg_free = 1; /* which sub-data is manipulated ? */ /* produced by TAG_TRSM_RU_SAVE(k, i) */ unsigned block21_is_local = (get_block_rank(i, k) == rank); starpu_tag_t tag_21_dep; starpu_data_handle_t block21; if (block21_is_local) { block21 = STARPU_PLU(get_block_handle)(i, k); tag_21_dep = TAG_TRSM_RU(k, i); } else { #ifdef SINGLE_TMP1221 block21 = STARPU_PLU(get_tmp_21_block_handle)(i); #else block21 = STARPU_PLU(get_tmp_21_block_handle)(i, k); #endif tag_21_dep = TAG_TRSM_RU_SAVE(k, i); } /* produced by TAG_TRSM_LL_SAVE(k, j) */ unsigned block12_is_local = (get_block_rank(k, j) == rank); starpu_tag_t tag_12_dep; starpu_data_handle_t block12; if (block12_is_local) { // block12 = STARPU_PLU(get_block_handle)(j, k); block12 = STARPU_PLU(get_block_handle)(k, j); tag_12_dep = TAG_TRSM_LL(k, j); } else { #ifdef SINGLE_TMP1221 block12 = STARPU_PLU(get_tmp_12_block_handle)(j); #else block12 = STARPU_PLU(get_tmp_12_block_handle)(j, k); #endif tag_12_dep = TAG_TRSM_LL_SAVE(k, j); } #ifdef STARPU_DEVEL #warning temporary fix :/ #endif //task->handles[0] = block21; task->handles[0] = block12; //task->handles[1] = block12; task->handles[1] = block21; /* produced by TAG_GEMM(k-1, i, j) */ task->handles[2] = STARPU_PLU(get_block_handle)(i, j); STARPU_ASSERT(task->handles[0] != STARPU_POISON_PTR); STARPU_ASSERT(task->handles[1] != STARPU_POISON_PTR); STARPU_ASSERT(task->handles[2] != STARPU_POISON_PTR); if (!no_prio) task->priority = 3*nblocks - (k + i + j); /* enforce dependencies ... */ if (k > 0) { starpu_tag_declare_deps(TAG_GEMM(k, i, j), 3, TAG_GEMM(k-1, i, j), tag_12_dep, tag_21_dep); } else { starpu_tag_declare_deps(TAG_GEMM(k, i, j), 2, tag_12_dep, tag_21_dep); } int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static void create_task_gemm(unsigned k, unsigned i, unsigned j) { if (get_block_rank(i, j) == rank) { // fprintf(stderr, "CREATE real task 22(k = %d, i = %d, j = %d) on node %d\n", k, i, j, rank); create_task_gemm_real(k, i, j); } // else // { // fprintf(stderr, "Node %d needs not 22(k=%d, i=%d, j = %d)\n", rank, k,i,j); // } } static void wait_tag_and_fetch_handle(starpu_tag_t tag, starpu_data_handle_t handle) { STARPU_ASSERT(handle != STARPU_POISON_PTR); starpu_tag_wait(tag); // fprintf(stderr, "Rank %d : tag %lx is done\n", rank, tag); starpu_data_acquire(handle, STARPU_R); starpu_data_release(handle); // starpu_data_unregister(handle); } static void wait_termination(void) { unsigned k, i, j; for (k = 0; k < nblocks; k++) { /* Wait task 11k if needed */ if (get_block_rank(k, k) == rank) { starpu_data_handle_t diag_block = STARPU_PLU(get_block_handle)(k, k); wait_tag_and_fetch_handle(TAG_GETRF_SAVE(k), diag_block); } for (i = k + 1; i < nblocks; i++) { /* Wait task 21ki if needed */ if (get_block_rank(i, k) == rank) { starpu_data_handle_t block21 = STARPU_PLU(get_block_handle)(i, k); //starpu_data_handle_t block21 = STARPU_PLU(get_block_handle)(k, i); //fprintf(stderr, "BLOCK21 i %d k %d -> handle %p\n", i, k, block21); wait_tag_and_fetch_handle(TAG_TRSM_RU_SAVE(k, i), block21); } } for (j = k + 1; j < nblocks; j++) { /* Wait task 12kj if needed */ if (get_block_rank(k, j) == rank) { //starpu_data_handle_t block12 = STARPU_PLU(get_block_handle)(j, k); starpu_data_handle_t block12 = STARPU_PLU(get_block_handle)(k, j); //fprintf(stderr, "BLOCK12 j %d k %d -> handle %p\n", j, k, block12); wait_tag_and_fetch_handle(TAG_TRSM_LL_SAVE(k, j), block12); } } } } /* * code to bootstrap the factorization */ double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio) { double start; double end; nblocks = _nblocks; rank = _rank; world_size = _world_size; no_prio = _no_prio; /* create all the DAG nodes */ unsigned i,j,k; for (k = 0; k < nblocks; k++) { starpu_iteration_push(k); create_task_getrf(k); for (i = k+1; i took %f ms\n", rank, timing/1000); return timing; } starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pxlu.h000066400000000000000000000043661507764646700210520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __PXLU_H__ #define __PXLU_H__ #include #include #include #ifdef STARPU_USE_CUDA #include #endif #define BLAS3_FLOP(n1,n2,n3) (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) //#define SINGLE_TMP11 1 //#define SINGLE_TMP1221 1 struct debug_info { unsigned i; unsigned j; unsigned k; }; double STARPU_PLU(plu_main)(unsigned nblocks, int rank, int world_size, unsigned no_prio); TYPE *STARPU_PLU(reconstruct_matrix)(unsigned size, unsigned nblocks); void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved); unsigned STARPU_PLU(display_flag)(void); void STARPU_PLU(compute_ax)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank); void STARPU_PLU(compute_lux)(unsigned size, TYPE *x, TYPE *y, unsigned nblocks, int rank); starpu_data_handle_t STARPU_PLU(get_block_handle)(unsigned i, unsigned j); TYPE *STARPU_PLU(get_block)(unsigned i, unsigned j); #ifdef SINGLE_TMP11 starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(void); #else starpu_data_handle_t STARPU_PLU(get_tmp_11_block_handle)(unsigned k); #endif #ifdef SINGLE_TMP1221 starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j); starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i); #else starpu_data_handle_t STARPU_PLU(get_tmp_12_block_handle)(unsigned j, unsigned k); starpu_data_handle_t STARPU_PLU(get_tmp_21_block_handle)(unsigned i, unsigned k); #endif void STARPU_PLU(display_data_content)(TYPE *data, unsigned blocksize); int get_block_rank(unsigned i, unsigned j); #endif // __PXLU_H__ starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pxlu_implicit.c000066400000000000000000000114511507764646700227300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "pxlu.h" #include "pxlu_kernels.h" #include //#define VERBOSE_INIT 1 //#define DEBUG 1 static unsigned no_prio = 0; static unsigned nblocks = 0; static int rank = -1; static int world_size = -1; struct callback_arg { unsigned i, j, k; }; /* * Task GETRF (diagonal factorization) */ static void create_task_getrf(unsigned k) { starpu_mpi_task_insert(MPI_COMM_WORLD, &STARPU_PLU(cl_getrf), STARPU_VALUE, &k, sizeof(k), STARPU_VALUE, &k, sizeof(k), STARPU_VALUE, &k, sizeof(k), STARPU_RW, STARPU_PLU(get_block_handle)(k, k), STARPU_PRIORITY, !no_prio ? (int) (3*nblocks - 3*k) : STARPU_MIN_PRIO, 0); } /* * Task TRSM_LL */ static void create_task_trsm_ll(unsigned k, unsigned j) { #ifdef STARPU_DEVEL #warning temporary fix #endif starpu_mpi_task_insert(MPI_COMM_WORLD, //&STARPU_PLU(cl_trsm_ll), &STARPU_PLU(cl_trsm_ru), STARPU_VALUE, &j, sizeof(j), STARPU_VALUE, &j, sizeof(j), STARPU_VALUE, &k, sizeof(k), STARPU_R, STARPU_PLU(get_block_handle)(k, k), STARPU_RW, STARPU_PLU(get_block_handle)(k, j), STARPU_PRIORITY, !no_prio ? (int) (3*nblocks - (2*k + j)) : STARPU_MIN_PRIO, 0); } /* * Task TRSM_RU */ static void create_task_trsm_ru(unsigned k, unsigned i) { #ifdef STARPU_DEVEL #warning temporary fix #endif starpu_mpi_task_insert(MPI_COMM_WORLD, //&STARPU_PLU(cl_trsm_ru), &STARPU_PLU(cl_trsm_ll), STARPU_VALUE, &i, sizeof(i), STARPU_VALUE, &i, sizeof(i), STARPU_VALUE, &k, sizeof(k), STARPU_R, STARPU_PLU(get_block_handle)(k, k), STARPU_RW, STARPU_PLU(get_block_handle)(i, k), STARPU_PRIORITY, !no_prio ? (int) (3*nblocks - (2*k + i)) : STARPU_MIN_PRIO, 0); } /* * Task GEMM */ static void create_task_gemm(unsigned k, unsigned i, unsigned j) { starpu_mpi_task_insert(MPI_COMM_WORLD, &STARPU_PLU(cl_gemm), STARPU_VALUE, &i, sizeof(i), STARPU_VALUE, &j, sizeof(j), STARPU_VALUE, &k, sizeof(k), STARPU_R, STARPU_PLU(get_block_handle)(k, j), STARPU_R, STARPU_PLU(get_block_handle)(i, k), STARPU_RW, STARPU_PLU(get_block_handle)(i, j), STARPU_PRIORITY, !no_prio ? (int) (3*nblocks - (k + i + j)) : STARPU_MIN_PRIO, 0); } /* * code to bootstrap the factorization */ double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio) { double start; double end; int ret; nblocks = _nblocks; rank = _rank; world_size = _world_size; no_prio = _no_prio; /* create all the DAG nodes */ unsigned i,j,k; ret = starpu_mpi_wait_for_all(MPI_COMM_WORLD); STARPU_ASSERT(ret == MPI_SUCCESS); ret = starpu_mpi_barrier(MPI_COMM_WORLD); STARPU_ASSERT(ret == MPI_SUCCESS); start = starpu_timing_now(); for (k = 0; k < nblocks; k++) { starpu_iteration_push(k); create_task_getrf(k); for (i = k+1; i took %f ms\n", rank, timing/1000); return timing; } starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pxlu_kernels.c000066400000000000000000000302201507764646700225540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "pxlu.h" #include "pxlu_kernels.h" #include ///#define VERBOSE_KERNELS 1 #ifdef STARPU_USE_CUDA static const TYPE p1 = 1.0f; static const TYPE m1 = -1.0f; #endif /* * GEMM */ static inline void STARPU_PLU(common_gemm)(void *descr[], int s, void *_args) { TYPE *right = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *left = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *center = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); #ifdef VERBOSE_KERNELS struct debug_info *info = _args; int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "KERNEL GEMM %d - k = %u i = %u j = %u\n", rank, info->k, info->i, info->j); #else (void)_args; #endif #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif switch (s) { case 0: CPU_GEMM("N", "N", dy, dx, dz, (TYPE)-1.0, right, ld21, left, ld12, (TYPE)1.0, center, ld22); break; #ifdef STARPU_USE_CUDA case 1: { status = CUBLAS_GEMM(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_N, dx, dy, dz, (CUBLAS_TYPE *)&m1, (CUBLAS_TYPE *)right, ld21, (CUBLAS_TYPE *)left, ld12, (CUBLAS_TYPE *)&p1, (CUBLAS_TYPE *)center, ld22); if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS)) STARPU_CUBLAS_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } #ifdef VERBOSE_KERNELS fprintf(stderr, "KERNEL GEMM %d - k = %u i = %u j = %u done\n", rank, info->k, info->i, info->j); #endif } static void STARPU_PLU(cpu_gemm)(void *descr[], void *_args) { STARPU_PLU(common_gemm)(descr, 0, _args); } #ifdef STARPU_USE_CUDA static void STARPU_PLU(cublas_gemm)(void *descr[], void *_args) { STARPU_PLU(common_gemm)(descr, 1, _args); } #endif// STARPU_USE_CUDA static struct starpu_perfmodel STARPU_PLU(model_gemm) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_PLU_STR(lu_model_gemm_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_PLU_STR(lu_model_gemm_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_PLU_STR(lu_model_gemm_openblas) #else .symbol = STARPU_PLU_STR(lu_model_gemm) #endif }; #define STRINGIFY_(x) #x #define STRINGIFY(x) STRINGIFY_(x) struct starpu_codelet STARPU_PLU(cl_gemm) = { .cpu_funcs = {STARPU_PLU(cpu_gemm)}, .cpu_funcs_name = {STRINGIFY(STARPU_PLU(cpu_gemm))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_PLU(cublas_gemm)}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW}, .model = &STARPU_PLU(model_gemm) }; /* * TRSM_LL */ static inline void STARPU_PLU(common_trsmll)(void *descr[], int s, void *_args) { TYPE *sub11; TYPE *sub12; sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); sub12 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef VERBOSE_KERNELS struct debug_info *info = _args; int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); #warning fixed debugging according to other tweak //fprintf(stderr, "KERNEL TRSM_LL %d - k = %u i %u\n", rank, info->k, info->i); fprintf(stderr, "KERNEL TRSM_RU %d - k = %u i %u\n", rank, info->k, info->j); //fprintf(stderr, "INPUT 12 GETRF\n"); fprintf(stderr, "INPUT 21 GETRF\n"); STARPU_PLU(display_data_content)(sub11, nx12); //fprintf(stderr, "INPUT 12 TRSM_LL\n"); fprintf(stderr, "INPUT 21 TRSM_RU\n"); STARPU_PLU(display_data_content)(sub12, nx12); #else (void)_args; #endif #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif /* solve L11 U12 = A12 (find U12) */ switch (s) { case 0: CPU_TRSM("L", "L", "N", "N", nx12, ny12, (TYPE)1.0, sub11, ld11, sub12, ld12); break; #ifdef STARPU_USE_CUDA case 1: status = CUBLAS_TRSM(starpu_cublas_get_local_handle(), CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, ny12, nx12, (CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub12, ld12); if (STARPU_UNLIKELY(status != CUBLAS_STATUS_SUCCESS)) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } #ifdef VERBOSE_KERNELS //fprintf(stderr, "OUTPUT 12 TRSM_LL\n"); fprintf(stderr, "OUTPUT 21 TRSM_RU\n"); STARPU_PLU(display_data_content)(sub12, nx12); #endif } static void STARPU_PLU(cpu_trsmll)(void *descr[], void *_args) { STARPU_PLU(common_trsmll)(descr, 0, _args); } #ifdef STARPU_USE_CUDA static void STARPU_PLU(cublas_trsmll)(void *descr[], void *_args) { STARPU_PLU(common_trsmll)(descr, 1, _args); } #endif // STARPU_USE_CUDA static struct starpu_perfmodel STARPU_PLU(model_trsm_ll) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_PLU_STR(lu_model_trsm_ll_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_PLU_STR(lu_model_trsm_ll_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_PLU_STR(lu_model_trsm_ll_openblas) #else .symbol = STARPU_PLU_STR(lu_model_trsm_ll) #endif }; struct starpu_codelet STARPU_PLU(cl_trsm_ll) = { .cpu_funcs = {STARPU_PLU(cpu_trsmll)}, .cpu_funcs_name = {STRINGIFY(STARPU_PLU(cpu_trsmll))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_PLU(cublas_trsmll)}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &STARPU_PLU(model_trsm_ll) }; /* * TRSM_RU */ static inline void STARPU_PLU(common_trsmru)(void *descr[], int s, void *_args) { TYPE *sub11; TYPE *sub21; sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef VERBOSE_KERNELS struct debug_info *info = _args; int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); #warning fixed debugging according to other tweak //fprintf(stderr, "KERNEL TRSM_RU %d (k = %u, i = %u)\n", rank, info->k, info->i); fprintf(stderr, "KERNEL TRSM_LL %d (k = %u, j = %u)\n", rank, info->k, info->j); //fprintf(stderr, "INPUT 21 GETRF\n"); fprintf(stderr, "INPUT 12 GETRF\n"); STARPU_PLU(display_data_content)(sub11, nx21); //fprintf(stderr, "INPUT 21 TRSM_RU\n"); fprintf(stderr, "INPUT 12 TRSM_LL\n"); STARPU_PLU(display_data_content)(sub21, nx21); #else (void)_args; #endif #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif switch (s) { case 0: CPU_TRSM("R", "U", "N", "U", nx21, ny21, (TYPE)1.0, sub11, ld11, sub21, ld21); break; #ifdef STARPU_USE_CUDA case 1: status = CUBLAS_TRSM(starpu_cublas_get_local_handle(), CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, ny21, nx21, (CUBLAS_TYPE*)&p1, (CUBLAS_TYPE*)sub11, ld11, (CUBLAS_TYPE*)sub21, ld21); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } #ifdef VERBOSE_KERNELS //fprintf(stderr, "OUTPUT 21 GETRF\n"); fprintf(stderr, "OUTPUT 12 GETRF\n"); STARPU_PLU(display_data_content)(sub11, nx21); //fprintf(stderr, "OUTPUT 21 TRSM_RU\n"); fprintf(stderr, "OUTPUT 12 TRSM_LL\n"); STARPU_PLU(display_data_content)(sub21, nx21); #endif } static void STARPU_PLU(cpu_trsmru)(void *descr[], void *_args) { STARPU_PLU(common_trsmru)(descr, 0, _args); } #ifdef STARPU_USE_CUDA static void STARPU_PLU(cublas_trsmru)(void *descr[], void *_args) { STARPU_PLU(common_trsmru)(descr, 1, _args); } #endif static struct starpu_perfmodel STARPU_PLU(model_trsm_ru) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_PLU_STR(lu_model_trsm_ru_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_PLU_STR(lu_model_trsm_ru_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_PLU_STR(lu_model_trsm_ru_openblas) #else .symbol = STARPU_PLU_STR(lu_model_trsm_ru) #endif }; struct starpu_codelet STARPU_PLU(cl_trsm_ru) = { .cpu_funcs = {STARPU_PLU(cpu_trsmru)}, .cpu_funcs_name = {STRINGIFY(STARPU_PLU(cpu_trsmru))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_PLU(cublas_trsmru)}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &STARPU_PLU(model_trsm_ru) }; /* * GETRF */ static inline void STARPU_PLU(common_getrf)(void *descr[], int s, void *_args) { TYPE *sub11; sub11 = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); unsigned long z; #ifdef STARPU_USE_CUDA cublasStatus_t status; cublasHandle_t handle; cudaStream_t stream; #endif #ifdef VERBOSE_KERNELS struct debug_info *info = _args; int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "KERNEL 11 %d - k = %u\n", rank, info->k); #else (void)_args; #endif switch (s) { case 0: for (z = 0; z < nx; z++) { TYPE pivot; pivot = sub11[z+z*ld]; STARPU_ASSERT(!ISZERO(pivot)); CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld); CPU_GER(nx - z - 1, nx - z - 1, -1.0, &sub11[(z+1)+z*ld], 1, &sub11[z+(z+1)*ld], ld, &sub11[(z+1) + (z+1)*ld],ld); } break; #ifdef STARPU_USE_CUDA case 1: handle = starpu_cublas_get_local_handle(); stream = starpu_cuda_get_local_stream(); for (z = 0; z < nx; z++) { TYPE pivot; TYPE inv_pivot; cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(TYPE), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); STARPU_ASSERT(!ISZERO(pivot)); inv_pivot = 1.0/pivot; status = CUBLAS_SCAL(handle, nx - z - 1, (CUBLAS_TYPE*)&inv_pivot, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); status = CUBLAS_GER(handle, nx - z - 1, nx - z - 1, (CUBLAS_TYPE*)&m1, (CUBLAS_TYPE*)&sub11[(z+1)+z*ld], 1, (CUBLAS_TYPE*)&sub11[z+(z+1)*ld], ld, (CUBLAS_TYPE*)&sub11[(z+1) + (z+1)*ld],ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } cudaStreamSynchronize(stream); break; #endif default: STARPU_ABORT(); break; } #ifdef VERBOSE_KERNELS fprintf(stderr, "KERNEL GETRF %d - k = %u\n", rank, info->k); #endif } static void STARPU_PLU(cpu_getrf)(void *descr[], void *_args) { STARPU_PLU(common_getrf)(descr, 0, _args); } #ifdef STARPU_USE_CUDA static void STARPU_PLU(cublas_getrf)(void *descr[], void *_args) { STARPU_PLU(common_getrf)(descr, 1, _args); } #endif// STARPU_USE_CUDA static struct starpu_perfmodel STARPU_PLU(model_getrf) = { .type = STARPU_HISTORY_BASED, #ifdef STARPU_ATLAS .symbol = STARPU_PLU_STR(lu_model_getrf_atlas) #elif defined(STARPU_GOTO) .symbol = STARPU_PLU_STR(lu_model_getrf_goto) #elif defined(STARPU_OPENBLAS) .symbol = STARPU_PLU_STR(lu_model_getrf_openblas) #else .symbol = STARPU_PLU_STR(lu_model_getrf) #endif }; struct starpu_codelet STARPU_PLU(cl_getrf) = { .cpu_funcs = {STARPU_PLU(cpu_getrf)}, .cpu_funcs_name = {STRINGIFY(STARPU_PLU(cpu_getrf))}, #ifdef STARPU_USE_CUDA .cuda_funcs = {STARPU_PLU(cublas_getrf)}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .model = &STARPU_PLU(model_getrf) }; starpu-1.4.9+dfsg/mpi/examples/mpi_lu/pxlu_kernels.h000066400000000000000000000021161507764646700225640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __PXLU_KERNELS_H__ #define __PXLU_KERNELS_H__ #include #define str(s) #s #define xstr(s) str(s) #define STARPU_PLU_STR(name) xstr(STARPU_PLU(name)) extern struct starpu_codelet STARPU_PLU(cl_getrf); extern struct starpu_codelet STARPU_PLU(cl_trsm_ll); extern struct starpu_codelet STARPU_PLU(cl_trsm_ru); extern struct starpu_codelet STARPU_PLU(cl_gemm); #endif // __PXLU_KERNELS_H__ starpu-1.4.9+dfsg/mpi/examples/mpi_redux/000077500000000000000000000000001507764646700204075ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/mpi_redux/mpi_redux.c000066400000000000000000000141321507764646700225500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example illustrates how to use the STARPU_MPI_REDUX mode * and compare it with the standard STARPU_REDUX. * * In order to make this comparison salliant, the init codelet is not * a task that set the handle to a neutral element but rather depends * on the working node. * This is not a proper way to use a reduction pattern however it * can be analogous to the cost/weight of each contribution. */ #include #include #include #include #include #include #include "helper.h" #include static void cl_cpu_work(void *handles[], void*arg) { (void)arg; double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); double *b = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); starpu_sleep(0.01); FPRINTF(stderr, "work_cl (rank:%d,worker:%d) %f =>",starpu_mpi_world_rank(), starpu_worker_get_id(), *a); *a = 3.0 + *a + *b; FPRINTF(stderr, "%f\n",*a); } static struct starpu_codelet work_cl = { .cpu_funcs = { cl_cpu_work }, .nbuffers = 2, .modes = { STARPU_REDUX, STARPU_R }, .name = "task_init" }; static struct starpu_codelet mpi_work_cl = { .cpu_funcs = { cl_cpu_work }, .nbuffers = 2, .modes = { STARPU_RW | STARPU_COMMUTE, STARPU_R }, .name = "task_init-mpi" }; static void cl_cpu_task_init(void *handles[], void*arg) { (void) arg; double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); starpu_sleep(0.005); FPRINTF(stderr, "init_cl (rank:%d,worker:%d) %d (was %f)\n", starpu_mpi_world_rank(), starpu_worker_get_id(), starpu_mpi_world_rank(), #ifdef STARPU_HAVE_VALGRIND_H RUNNING_ON_VALGRIND ? 0. : #endif *a); *a = starpu_mpi_world_rank(); } static struct starpu_codelet task_init_cl = { .cpu_funcs = { cl_cpu_task_init }, .nbuffers = 1, .modes = { STARPU_W }, .name = "task_init" }; static void cl_cpu_task_red(void *handles[], void*arg) { (void) arg; double *ad = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); double *as = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); starpu_sleep(0.01); FPRINTF(stderr, "red_cl (rank:%d,worker:%d) %f ; %f --> %f\n", starpu_mpi_world_rank(), starpu_worker_get_id(), *as, *ad, *as+*ad); *ad = *ad + *as; } static struct starpu_codelet task_red_cl = { .cpu_funcs = { cl_cpu_task_red }, .nbuffers = 2, .modes = { STARPU_RW|STARPU_COMMUTE, STARPU_R }, .name = "task_red" }; int main(int argc, char *argv[]) { int comm_rank, comm_size; /* Initializes STarPU and the StarPU-MPI layer */ starpu_fxt_autostart_profiling(0); int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); int nworkers = starpu_cpu_worker_get_count(); if (nworkers < 2) { FPRINTF(stderr, "We need at least 2 CPU worker per node.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); if (comm_size < 2) { FPRINTF(stderr, "We need at least 2 nodes.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); double a, b[comm_size]; starpu_data_handle_t a_h, b_h[comm_size]; double work_coef = 2; enum starpu_data_access_mode task_mode; int i,j,work_node; starpu_mpi_tag_t tag = 0; for (i = 0 ; i < 2 ; i++) { starpu_mpi_barrier(MPI_COMM_WORLD); if (i==0) task_mode = STARPU_MPI_REDUX; else task_mode = STARPU_REDUX; if (comm_rank == 0) { a = 1.0; FPRINTF(stderr, "init a = %f\n", a); starpu_variable_data_register(&a_h, STARPU_MAIN_RAM, (uintptr_t)&a, sizeof(double)); for (j=0;j %f expected %f\n", a, 1.0 + (comm_size - 1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1)*3.0 + tmp1) + tmp2); } starpu_data_unregister(a_h); for (work_node=0; work_node < comm_size;work_node++) starpu_data_unregister(b_h[work_node]); starpu_mpi_barrier(MPI_COMM_WORLD); } starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/mpi_redux/mpi_redux_autowrapup.c000066400000000000000000000147441507764646700250500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example is similar to mpi_redux.c * * It iterates over multiple ways to wrap-up reduction patterns : either by * - waiting for all mpi + tasks * - calling mpi_redux yourself * - inserting a reading task on the handle to reduce */ #include #include #include #include #include #include #include "helper.h" #include static void cl_cpu_read(void *handles[], void*arg) { (void) arg; (void) handles; } static struct starpu_codelet read_cl = { .cpu_funcs = { cl_cpu_read }, .nbuffers = 1, .modes = { STARPU_R }, .name = "task_read" }; static void cl_cpu_work(void *handles[], void*arg) { (void)arg; double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); double *b = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); starpu_sleep(0.01); FPRINTF(stderr, "work_cl (rank:%d,worker:%d) %f =>",starpu_mpi_world_rank(), starpu_worker_get_id(), *a); *a = 3.0 + *a + *b; FPRINTF(stderr, "%f\n",*a); } static struct starpu_codelet work_cl = { .cpu_funcs = { cl_cpu_work }, .nbuffers = 2, .modes = { STARPU_REDUX, STARPU_R }, .name = "task_init" }; static struct starpu_codelet mpi_work_cl = { .cpu_funcs = { cl_cpu_work }, .nbuffers = 2, .modes = { STARPU_RW | STARPU_COMMUTE, STARPU_R }, .name = "task_init-mpi" }; static void cl_cpu_task_init(void *handles[], void*arg) { (void) arg; double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); starpu_sleep(0.005); FPRINTF(stderr, "init_cl (rank:%d,worker:%d) %d (was %f)\n", starpu_mpi_world_rank(), starpu_worker_get_id(), starpu_mpi_world_rank(), #ifdef STARPU_HAVE_VALGRIND_H RUNNING_ON_VALGRIND ? 0. : #endif *a); *a = starpu_mpi_world_rank(); } static struct starpu_codelet task_init_cl = { .cpu_funcs = { cl_cpu_task_init }, .nbuffers = 1, .modes = { STARPU_W }, .name = "task_init" }; static void cl_cpu_task_red(void *handles[], void*arg) { (void) arg; double *ad = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); double *as = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); starpu_sleep(0.01); FPRINTF(stderr, "red_cl (rank:%d,worker:%d) %f ; %f --> %f\n", starpu_mpi_world_rank(), starpu_worker_get_id(), *as, *ad, *as+*ad); *ad = *ad + *as; } static struct starpu_codelet task_red_cl = { .cpu_funcs = { cl_cpu_task_red }, .nbuffers = 2, .modes = { STARPU_RW|STARPU_COMMUTE, STARPU_R }, .name = "task_red" }; int main(int argc, char *argv[]) { int comm_rank, comm_size; /* Initializes STarPU and the StarPU-MPI layer */ starpu_fxt_autostart_profiling(0); int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); int nworkers = starpu_cpu_worker_get_count(); if (nworkers < 2) { FPRINTF(stderr, "We need at least 2 CPU worker per node.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } FPRINTF(stderr, "there are %d workers\n", nworkers); starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); if (comm_size < 2) { FPRINTF(stderr, "We need at least 2 nodes.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); double a, b[comm_size]; starpu_data_handle_t a_h, b_h[comm_size]; double work_coef = 2; enum starpu_data_access_mode task_mode; int wrapup,i,j,work_node; starpu_mpi_tag_t tag = 0; for (wrapup = 0; wrapup <= 2; wrapup ++) { for (i = 0 ; i < 2 ; i++) { starpu_mpi_barrier(MPI_COMM_WORLD); if (i==0) task_mode = STARPU_MPI_REDUX; else task_mode = STARPU_REDUX; if (comm_rank == 0) { a = 1.0; FPRINTF(stderr, "init a = %f\n", a); starpu_variable_data_register(&a_h, STARPU_MAIN_RAM, (uintptr_t)&a, sizeof(double)); for (j=0;j %f expected %f\n", a, 1.0 + (comm_size - 1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1)*3.0 + tmp1) + tmp2); } starpu_data_unregister(a_h); for (work_node=0; work_node < comm_size;work_node++) starpu_data_unregister(b_h[work_node]); starpu_mpi_barrier(MPI_COMM_WORLD); } } starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/mpi_redux/mpi_redux_tree.c000066400000000000000000000133001507764646700235630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This example illustrates how to use the STARPU_MPI_REDUX mode * and compare it with the standard STARPU_REDUX. * * In order to make this comparison salliant, the init codelet is not * a task that set the handle to a neutral element but rather depends * on the working node. * This is not a proper way to use a reduction pattern however it * can be analogous to the cost/weight of each contribution. */ #include #include #include #include #include #include #include "helper.h" #include static void cl_cpu_work(void *handles[], void*arg) { (void)arg; double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); double *b = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); starpu_sleep(0.01); FPRINTF(stderr, "work_cl (rank:%d,worker:%d) %f =>",starpu_mpi_world_rank(), starpu_worker_get_id(), *a); *a = 3.0 + *a + *b; FPRINTF(stderr, "%f\n",*a); } static struct starpu_codelet work_cl = { .cpu_funcs = { cl_cpu_work }, .nbuffers = 2, .modes = { STARPU_REDUX, STARPU_R }, .name = "task_init" }; static struct starpu_codelet mpi_work_cl = { .cpu_funcs = { cl_cpu_work }, .nbuffers = 2, .modes = { STARPU_RW | STARPU_COMMUTE, STARPU_R }, .name = "task_init-mpi" }; static void cl_cpu_task_init(void *handles[], void*arg) { (void) arg; double *a = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); starpu_sleep(0.005); FPRINTF(stderr, "init_cl (rank:%d,worker:%d) %d (was %f)\n", starpu_mpi_world_rank(), starpu_worker_get_id(), starpu_mpi_world_rank(), #ifdef STARPU_HAVE_VALGRIND_H RUNNING_ON_VALGRIND ? 0. : #endif *a); *a = starpu_mpi_world_rank(); } static struct starpu_codelet task_init_cl = { .cpu_funcs = { cl_cpu_task_init }, .nbuffers = 1, .modes = { STARPU_W }, .name = "task_init" }; static void cl_cpu_task_red(void *handles[], void*arg) { (void) arg; double *ad = (double *)STARPU_VARIABLE_GET_PTR(handles[0]); double *as = (double *)STARPU_VARIABLE_GET_PTR(handles[1]); starpu_sleep(0.01); FPRINTF(stderr, "red_cl (rank:%d,worker:%d) %f ; %f --> %f\n", starpu_mpi_world_rank(), starpu_worker_get_id(), *as, *ad, *as+*ad); *ad = *ad + *as; } static struct starpu_codelet task_red_cl = { .cpu_funcs = { cl_cpu_task_red }, .nbuffers = 2, .modes = { STARPU_RW|STARPU_COMMUTE, STARPU_R }, .name = "task_red" }; int main(int argc, char *argv[]) { int comm_rank, comm_size; /* Initializes STarPU and the StarPU-MPI layer */ starpu_fxt_autostart_profiling(0); int ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_ini_conft"); int nworkers = starpu_cpu_worker_get_count(); starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); if (comm_size < 2) { FPRINTF(stderr, "We need at least 2 nodes.\n"); starpu_mpi_shutdown(); return STARPU_TEST_SKIPPED; } starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank); double a, b[comm_size]; starpu_data_handle_t a_h, b_h[comm_size]; double work_coef = 2; enum starpu_data_access_mode task_mode; int arity,j,work_node; starpu_mpi_tag_t tag = 0; for (arity = 2 ; arity < comm_size ; arity++) { starpu_mpi_barrier(MPI_COMM_WORLD); task_mode = STARPU_MPI_REDUX; if (comm_rank == 0) { a = 1.0; FPRINTF(stderr, "init a = %f\n", a); starpu_variable_data_register(&a_h, STARPU_MAIN_RAM, (uintptr_t)&a, sizeof(double)); for (j=0;j %f expected %f\n", a, 1.0 + (comm_size - 1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1)*3.0 + tmp)); } starpu_data_unregister(a_h); for (work_node=0; work_node < comm_size;work_node++) starpu_data_unregister(b_h[work_node]); starpu_mpi_barrier(MPI_COMM_WORLD); } starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/native_fortran/000077500000000000000000000000001507764646700214345ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/native_fortran/fstarpu_mod.f90000066400000000000000000005046651507764646700243170ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! !> @ingroup API_Fortran !> @brief Fortran API module fstarpu_mod use iso_c_binding implicit none ! Note: Constants truly are intptr_t, but are declared as c_ptr to be ! readily usable in c_ptr arrays to mimic variadic functions. ! Note: Bitwise or operator is provided by the .ior. overloaded operator type(c_ptr), bind(C) :: FSTARPU_R type(c_ptr), bind(C) :: FSTARPU_W type(c_ptr), bind(C) :: FSTARPU_RW type(c_ptr), bind(C) :: FSTARPU_SCRATCH type(c_ptr), bind(C) :: FSTARPU_REDUX type(c_ptr), bind(C) :: FSTARPU_MPI_REDUX type(c_ptr), bind(C) :: FSTARPU_COMMUTE type(c_ptr), bind(C) :: FSTARPU_SSEND type(c_ptr), bind(C) :: FSTARPU_LOCALITY type(c_ptr), bind(C) :: FSTARPU_DATA_ARRAY type(c_ptr), bind(C) :: FSTARPU_DATA_MODE_ARRAY type(c_ptr), bind(C) :: FSTARPU_CL_ARGS type(c_ptr), bind(C) :: FSTARPU_CL_ARGS_NFREE type(c_ptr), bind(C) :: FSTARPU_TASK_DEPS_ARRAY type(c_ptr), bind(C) :: FSTARPU_CALLBACK type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG type(c_ptr), bind(C) :: FSTARPU_CALLBACK_WITH_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG type(c_ptr), bind(C) :: FSTARPU_CALLBACK_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG type(c_ptr), bind(C) :: FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE type(c_ptr), bind(C) :: FSTARPU_PRIORITY type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_NODE type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_DATA type(c_ptr), bind(C) :: FSTARPU_EXECUTE_ON_WORKER type(c_ptr), bind(C) :: FSTARPU_WORKER_ORDER type(c_ptr), bind(C) :: FSTARPU_EXECUTE_WHERE type(c_ptr), bind(C) :: FSTARPU_HYPERVISOR_TAG type(c_ptr), bind(C) :: FSTARPU_POSSIBLY_PARALLEL type(c_ptr), bind(C) :: FSTARPU_FLOPS type(c_ptr), bind(C) :: FSTARPU_TAG type(c_ptr), bind(C) :: FSTARPU_TAG_ONLY type(c_ptr), bind(C) :: FSTARPU_NAME type(c_ptr), bind(C) :: FSTARPU_TASK_COLOR type(c_ptr), bind(C) :: FSTARPU_TASK_SYNCHRONOUS type(c_ptr), bind(C) :: FSTARPU_HANDLES_SEQUENTIAL_CONSISTENCY type(c_ptr), bind(C) :: FSTARPU_TASK_END_DEP type(c_ptr), bind(C) :: FSTARPU_NODE_SELECTION_POLICY type(c_ptr), bind(C) :: FSTARPU_TASK_SCHED_DATA type(c_ptr), bind(C) :: FSTARPU_VALUE type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX type(c_ptr), bind(C) :: FSTARPU_CPU_WORKER type(c_ptr), bind(C) :: FSTARPU_CUDA_WORKER type(c_ptr), bind(C) :: FSTARPU_OPENCL_WORKER type(c_ptr), bind(C) :: FSTARPU_ANY_WORKER integer(c_int), bind(C) :: FSTARPU_NMAXBUFS type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_NAME type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_STRUCT type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MIN_PRIO type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_MAX_PRIO type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_HIERARCHY_LEVEL type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_NESTED type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_AWAKE_WORKERS type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_POLICY_INIT type(c_ptr), bind(C) :: FSTARPU_SCHED_CTX_USER_DATA type(c_ptr), bind(C) :: FSTARPU_NOWHERE type(c_ptr), bind(C) :: FSTARPU_CPU type(c_ptr), bind(C) :: FSTARPU_CUDA type(c_ptr), bind(C) :: FSTARPU_OPENCL type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE type(c_ptr), bind(C) :: FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT type(c_ptr), bind(C) :: FSTARPU_CUDA_ASYNC type(c_ptr), bind(C) :: FSTARPU_OPENCL_ASYNC !type(c_ptr), bind(C) :: FSTARPU_PER_WORKER !type(c_ptr), bind(C) :: FSTARPU_PER_ARCH !type(c_ptr), bind(C) :: FSTARPU_PER_COMMON type(c_ptr), bind(C) :: FSTARPU_HISTORY_BASED type(c_ptr), bind(C) :: FSTARPU_REGRESSION_BASED type(c_ptr), bind(C) :: FSTARPU_NL_REGRESSION_BASED type(c_ptr), bind(C) :: FSTARPU_MULTIPLE_REGRESSION_BASED type(c_ptr), bind(C) :: FSTARPU_SEQ type(c_ptr), bind(C) :: FSTARPU_SPMD type(c_ptr), bind(C) :: FSTARPU_FORKJOIN ! (some) portable iso_c_binding types type(c_ptr), bind(C) :: FSTARPU_SZ_C_DOUBLE type(c_ptr), bind(C) :: FSTARPU_SZ_C_FLOAT type(c_ptr), bind(C) :: FSTARPU_SZ_C_CHAR type(c_ptr), bind(C) :: FSTARPU_SZ_C_INT type(c_ptr), bind(C) :: FSTARPU_SZ_C_INTPTR_T type(c_ptr), bind(C) :: FSTARPU_SZ_C_PTR type(c_ptr), bind(C) :: FSTARPU_SZ_C_SIZE_T ! (some) native Fortran types type(c_ptr), bind(C) :: FSTARPU_SZ_CHARACTER type(c_ptr), bind(C) :: FSTARPU_SZ_INTEGER type(c_ptr), bind(C) :: FSTARPU_SZ_INT4 type(c_ptr), bind(C) :: FSTARPU_SZ_INT8 type(c_ptr), bind(C) :: FSTARPU_SZ_REAL type(c_ptr), bind(C) :: FSTARPU_SZ_REAL4 type(c_ptr), bind(C) :: FSTARPU_SZ_REAL8 type(c_ptr), bind(C) :: FSTARPU_SZ_DOUBLE_PRECISION type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX4 type(c_ptr), bind(C) :: FSTARPU_SZ_COMPLEX8 integer(c_int), bind(C), target :: FSTARPU_DEFAULT_PRIO interface operator (.ior.) procedure or_cptrs end interface operator (.ior.) interface ! == starpu.h == ! void starpu_conf_init(struct starpu_conf *conf); subroutine fstarpu_conf_init (conf) bind(C,name="starpu_conf_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: conf end subroutine fstarpu_conf_init function fstarpu_conf_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_conf_allocate end function fstarpu_conf_allocate subroutine fstarpu_conf_free (conf) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: conf end subroutine fstarpu_conf_free subroutine fstarpu_conf_set_sched_policy_name (conf, policy_name) bind(C) use iso_c_binding, only: c_ptr, c_char type(c_ptr), value, intent(in) :: conf character(c_char), intent(in) :: policy_name end subroutine fstarpu_conf_set_sched_policy_name subroutine fstarpu_conf_set_min_prio (conf, min_prio) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: min_prio end subroutine fstarpu_conf_set_min_prio subroutine fstarpu_conf_set_max_prio (conf, max_prio) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: max_prio end subroutine fstarpu_conf_set_max_prio subroutine fstarpu_conf_set_ncpu (conf, ncpu) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: ncpu end subroutine fstarpu_conf_set_ncpu subroutine fstarpu_conf_set_ncuda (conf, ncuda) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: ncuda end subroutine fstarpu_conf_set_ncuda subroutine fstarpu_conf_set_nopencl (conf, nopencl) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: conf integer(c_int), value, intent(in) :: nopencl end subroutine fstarpu_conf_set_nopencl ! starpu_init: see fstarpu_init ! starpu_initialize: see fstarpu_init ! void starpu_pause(void); subroutine fstarpu_pause() bind(C,name="starpu_pause") end subroutine fstarpu_pause ! void starpu_resume(void); subroutine fstarpu_resume() bind(C,name="starpu_resume") end subroutine fstarpu_resume ! int starpu_is_paused(void); function fstarpu_is_paused() bind(C,name="starpu_is_paused") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_is_paused end function fstarpu_is_paused ! void starpu_shutdown(void); subroutine fstarpu_shutdown () bind(C,name="starpu_shutdown") end subroutine fstarpu_shutdown ! starpu_topology_print subroutine fstarpu_topology_print () bind(C) end subroutine fstarpu_topology_print ! int starpu_asynchronous_copy_disabled(void); function fstarpu_asynchronous_copy_disabled() bind(C,name="starpu_asynchronous_copy_disabled") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_asynchronous_copy_disabled end function fstarpu_asynchronous_copy_disabled ! int starpu_asynchronous_cuda_copy_disabled(void); function fstarpu_asynchronous_cuda_copy_disabled() bind(C,name="starpu_asynchronous_cuda_copy_disabled") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_asynchronous_cuda_copy_disabled end function fstarpu_asynchronous_cuda_copy_disabled ! int starpu_asynchronous_opencl_copy_disabled(void); function fstarpu_asynchronous_opencl_copy_disabled() bind(C,name="starpu_asynchronous_opencl_copy_disabled") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_asynchronous_opencl_copy_disabled end function fstarpu_asynchronous_opencl_copy_disabled ! void starpu_display_stats(); subroutine fstarpu_display_stats() bind(C,name="starpu_display_stats") end subroutine fstarpu_display_stats ! void starpu_get_version(int *major, int *minor, int *release); subroutine fstarpu_get_version(major,minor,release) bind(C,name="starpu_get_version") use iso_c_binding, only: c_int integer(c_int), intent(out) :: major,minor,release end subroutine fstarpu_get_version ! == starpu_worker.h == ! unsigned starpu_worker_get_count(void); function fstarpu_worker_get_count() bind(C,name="starpu_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_count end function fstarpu_worker_get_count ! unsigned starpu_combined_worker_get_count(void); function fstarpu_combined_worker_get_count() bind(C,name="starpu_combined_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_count end function fstarpu_combined_worker_get_count ! unsigned starpu_worker_is_combined_worker(int id); function fstarpu_worker_is_combined_worker(id) bind(C,name="starpu_worker_is_combined_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_is_combined_worker integer(c_int), value, intent(in) :: id end function fstarpu_worker_is_combined_worker ! unsigned starpu_cpu_worker_get_count(void); function fstarpu_cpu_worker_get_count() bind(C,name="starpu_cpu_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_cpu_worker_get_count end function fstarpu_cpu_worker_get_count ! unsigned starpu_cuda_worker_get_count(void); function fstarpu_cuda_worker_get_count() bind(C,name="starpu_cuda_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_cuda_worker_get_count end function fstarpu_cuda_worker_get_count ! unsigned starpu_opencl_worker_get_count(void); function fstarpu_opencl_worker_get_count() bind(C,name="starpu_opencl_worker_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_opencl_worker_get_count end function fstarpu_opencl_worker_get_count ! int starpu_worker_get_id(void); function fstarpu_worker_get_id() bind(C,name="starpu_worker_get_id") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_id end function fstarpu_worker_get_id ! _starpu_worker_get_id_check ! starpu_worker_get_id_check ! int starpu_worker_get_bindid(int workerid); function fstarpu_worker_get_bindid(id) bind(C,name="starpu_worker_get_bindid") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_bindid integer(c_int), value, intent(in) :: id end function fstarpu_worker_get_bindid ! int starpu_combined_worker_get_id(void); function fstarpu_combined_worker_get_id() bind(C,name="starpu_combined_worker_get_id") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_id end function fstarpu_combined_worker_get_id ! int starpu_combined_worker_get_size(void); function fstarpu_combined_worker_get_size() bind(C,name="starpu_combined_worker_get_size") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_size end function fstarpu_combined_worker_get_size ! int starpu_combined_worker_get_rank(void); function fstarpu_combined_worker_get_rank() bind(C,name="starpu_combined_worker_get_rank") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_combined_worker_get_rank end function fstarpu_combined_worker_get_rank ! enum starpu_worker_archtype starpu_worker_get_type(int id); function fstarpu_worker_get_type(id) bind(C) use iso_c_binding, only: c_int, c_ptr type(c_ptr) :: fstarpu_worker_get_type ! C function returns c_intptr_t integer(c_int),value,intent(in) :: id end function fstarpu_worker_get_type ! int starpu_worker_get_count_by_type(enum starpu_worker_archtype type); function fstarpu_worker_get_count_by_type(typeid) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_count_by_type type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func end function fstarpu_worker_get_count_by_type ! int starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); function fstarpu_worker_get_ids_by_type(typeid, workerids, maxsize) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_ids_by_type type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func integer(c_int),intent(out) :: workerids(*) integer(c_int),value,intent(in) :: maxsize end function fstarpu_worker_get_ids_by_type ! int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num); function fstarpu_worker_get_by_type(typeid, num) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_by_type type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func integer(c_int),value,intent(in) :: num end function fstarpu_worker_get_by_type ! int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid); function fstarpu_worker_get_by_devid(typeid, devid) bind(C) use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_worker_get_by_devid type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func integer(c_int),value,intent(in) :: devid end function fstarpu_worker_get_by_devid ! void starpu_worker_get_name(int id, char *dst, size_t maxlen); subroutine fstarpu_worker_get_name(id, dst, maxlen) bind(C,name="starpu_worker_get_name") use iso_c_binding, only: c_int, c_char, c_size_t integer(c_int),value,intent(in) :: id character(c_char),intent(out) :: dst(*) integer(c_size_t),value,intent(in) :: maxlen end subroutine fstarpu_worker_get_name ! int starpu_worker_get_devid(int id); function fstarpu_worker_get_devid(id) bind(C,name="starpu_worker_get_devid") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_devid integer(c_int), value, intent(in) :: id end function fstarpu_worker_get_devid ! struct starpu_tree* starpu_workers_get_tree(void); ! unsigned starpu_worker_get_sched_ctx_list(int worker, unsigned **sched_ctx); ! unsigned starpu_worker_is_blocked(int workerid); function fstarpu_worker_is_blocked(id) bind(C,name="starpu_worker_is_blocked") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_is_blocked integer(c_int), value, intent(in) :: id end function fstarpu_worker_is_blocked ! unsigned starpu_worker_is_slave_somewhere(int workerid); function fstarpu_worker_is_slave_somewhere(id) bind(C,name="starpu_worker_is_slave_somewhere") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_is_slave_somewhere integer(c_int), value, intent(in) :: id end function fstarpu_worker_is_slave_somewhere ! char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type); subroutine fstarpu_worker_get_type_as_string(typeid,dst,maxlen) bind(C) use iso_c_binding, only: c_ptr, c_char, c_size_t type(c_ptr),value,intent(in) :: typeid ! c_intptr_t expected by C func character(c_char),intent(out) :: dst(*) integer(c_size_t),value,intent(in) :: maxlen end subroutine fstarpu_worker_get_type_as_string ! int starpu_bindid_get_workerids(int bindid, int **workerids); ! == starpu_task.h == function fstarpu_task_create_sync (handle, mode) bind(C,name="starpu_task_create_sync") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_create_sync type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: mode end function fstarpu_task_create_sync ! void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array); subroutine fstarpu_tag_declare_deps_array(id,ndeps,tag_array) bind(C,name="starpu_tag_declare_deps_array") use iso_c_binding, only: c_int, c_long_long integer(c_int), value, intent(in) :: id integer(c_int), value, intent(in) :: ndeps integer(c_long_long), intent(in) :: tag_array(*) end subroutine fstarpu_tag_declare_deps_array ! void starpu_task_declare_deps(starpu_tag_t id, unsigned ndeps, ...); subroutine fstarpu_task_declare_deps(task,ndeps,root_task) bind(C,name="starpu_task_declare_deps") use iso_c_binding, only: c_int, c_ptr type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: ndeps type(c_ptr), value, intent(in) :: root_task end subroutine fstarpu_task_declare_deps ! void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]); subroutine fstarpu_task_declare_deps_array(task,ndeps,task_array) bind(C,name="starpu_task_declare_deps_array") use iso_c_binding, only: c_int, c_ptr type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: ndeps type(c_ptr), intent(in) :: task_array(*) end subroutine fstarpu_task_declare_deps_array ! void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps) subroutine fstarpu_task_end_dep_add(task, nb_deps) & bind(C,name="starpu_task_end_dep_add") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: nb_deps end subroutine fstarpu_task_end_dep_add ! void starpu_task_end_dep_release(struct starpu_task *t) subroutine fstarpu_task_end_dep_release(task) & bind(C,name="starpu_task_end_dep_release") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_end_dep_release ! int starpu_tag_wait(starpu_tag_t id); function fstarpu_tag_wait(id) bind(C,name="starpu_tag_wait") use iso_c_binding, only: c_int, c_long_long integer(c_int) :: fstarpu_tag_wait integer(c_long_long), value, intent(in) :: id end function fstarpu_tag_wait ! int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id); function fstarpu_tag_wait_array(ntags,tag_array) bind(C,name="starpu_tag_wait_array") use iso_c_binding, only: c_int, c_long_long integer(c_int) :: fstarpu_tag_wait_array integer(c_int), value, intent(in) :: ntags integer(c_long_long), intent(in) :: tag_array(*) end function fstarpu_tag_wait_array ! void starpu_tag_notify_from_apps(starpu_tag_t id); subroutine fstarpu_tag_notify_from_apps(id) bind(C,name="starpu_tag_notify_from_apps") use iso_c_binding, only: c_long_long integer(c_long_long), value, intent(in) :: id end subroutine fstarpu_tag_notify_from_apps ! void starpu_tag_restart(starpu_tag_t id); subroutine fstarpu_tag_restart(id) bind(C,name="starpu_tag_restart") use iso_c_binding, only: c_long_long integer(c_long_long), value, intent(in) :: id end subroutine fstarpu_tag_restart ! void starpu_tag_remove(starpu_tag_t id); subroutine fstarpu_tag_remove(id) bind(C,name="starpu_tag_remove") use iso_c_binding, only: c_long_long integer(c_long_long), value, intent(in) :: id end subroutine fstarpu_tag_remove ! struct starpu_task *starpu_tag_get_task(starpu_tag_t id); function fstarpu_tag_get_task(id) bind(C,name="starpu_tag_get_task") use iso_c_binding, only: c_ptr, c_long_long type(c_ptr) :: fstarpu_tag_get_task integer(c_long_long), value, intent(in) :: id end function fstarpu_tag_get_task ! void starpu_task_init(struct starpu_task *task); subroutine fstarpu_task_init (task) bind(C,name="starpu_task_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_init ! void starpu_task_clean(struct starpu_task *task); subroutine fstarpu_task_clean (task) bind(C,name="starpu_task_clean") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_clean ! struct starpu_task *starpu_task_create(void) STARPU_ATTRIBUTE_MALLOC; function fstarpu_task_create () bind(C,name="starpu_task_create") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_create end function fstarpu_task_create ! void starpu_task_destroy(struct starpu_task *task); subroutine fstarpu_task_destroy (task) bind(C,name="starpu_task_destroy") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_destroy ! void starpu_task_set_destroy(struct starpu_task *task); subroutine fstarpu_task_set_destroy (task) bind(C,name="starpu_task_set_destroy") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: task end subroutine fstarpu_task_set_destroy ! int starpu_task_submit(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_submit (task) bind(C,name="starpu_task_submit") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_submit type(c_ptr), value, intent(in) :: task end function fstarpu_task_submit ! int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id); function fstarpu_task_submit_to_ctx (task,sched_ctx_id) bind(C,name="starpu_task_submit_to_ctx") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_submit_to_ctx type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_task_submit_to_ctx ! int starpu_task_finished(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_finished (task) bind(C,name="starpu_task_finished") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_finished type(c_ptr), value, intent(in) :: task end function fstarpu_task_finished ! int starpu_task_wait(struct starpu_task *task) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_wait (task) bind(C,name="starpu_task_wait") use iso_c_binding, only: c_int,c_ptr integer(c_int) :: fstarpu_task_wait type(c_ptr), value, intent(in) :: task end function fstarpu_task_wait ! int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) STARPU_WARN_UNUSED_RESULT; function fstarpu_task_wait_array(task_array,ntasks) bind(C,name="starpu_task_wait_array") use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_task_wait_array integer(c_int), value, intent(in) :: ntasks type(c_ptr), intent(in) :: task_array end function fstarpu_task_wait_array ! int starpu_task_wait_for_all(void); subroutine fstarpu_task_wait_for_all () bind(C,name="starpu_task_wait_for_all") end subroutine fstarpu_task_wait_for_all ! int starpu_task_wait_for_n_submitted(unsigned n); subroutine fstarpu_task_wait_for_n_submitted (n) bind(C,name="starpu_task_wait_for_n_submitted") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: n end subroutine fstarpu_task_wait_for_n_submitted ! int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx_id); subroutine fstarpu_task_wait_for_all_in_ctx (ctx) bind(C,name="starpu_task_wait_for_all_in_ctx") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_task_wait_for_all_in_ctx ! int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx_id, unsigned n); subroutine fstarpu_task_wait_for_n_submitted_in_ctx (ctx,n) bind(C,name="starpu_task_wait_for_n_submitted_in_ctx") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx integer(c_int), value, intent(in) :: n end subroutine fstarpu_task_wait_for_n_submitted_in_ctx ! int starpu_task_wait_for_no_ready(void); function fstarpu_task_wait_for_no_ready () bind(C,name="starpu_task_wait_for_no_ready") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_task_wait_for_no_ready end function fstarpu_task_wait_for_no_ready ! int starpu_task_nready(void); function fstarpu_task_nready () bind(C,name="starpu_task_nready") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_task_nready end function fstarpu_task_nready ! int starpu_task_nsubmitted(void); function fstarpu_task_nsubmitted () bind(C,name="starpu_task_nsubmitted") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_task_nsubmitted end function fstarpu_task_nsubmitted ! void starpu_do_schedule(void); subroutine fstarpu_do_schedule () bind(C,name="starpu_do_schedule") end subroutine fstarpu_do_schedule ! starpu_codelet_init subroutine fstarpu_codelet_init (codelet) bind(C,name="starpu_codelet_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: codelet end subroutine fstarpu_codelet_init ! starpu_codelet_display_stats subroutine fstarpu_codelet_display_stats (codelet) bind(C,name="starpu_codelet_display_stats") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: codelet end subroutine fstarpu_codelet_display_stats ! struct starpu_task *starpu_task_get_current(void); function fstarpu_task_get_current () bind(C,name="starpu_task_get_current") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_get_current end function fstarpu_task_get_current ! void starpu_parallel_task_barrier_init(struct starpu_task *task, int workerid); subroutine fstarpu_parallel_task_barrier_init_init (task,id) & bind(C,name="starpu_parallel_task_barrier_init_init") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: id end subroutine fstarpu_parallel_task_barrier_init_init ! void starpu_parallel_task_barrier_init_n(struct starpu_task *task, int worker_size); subroutine fstarpu_parallel_task_barrier_init_n_init_n (task,sz) & bind(C,name="starpu_parallel_task_barrier_init_n_init_n") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sz end subroutine fstarpu_parallel_task_barrier_init_n_init_n ! struct starpu_task *starpu_task_dup(struct starpu_task *task); function fstarpu_task_dup (task) bind(C,name="starpu_task_dup") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_task_dup type(c_ptr), value, intent(in) :: task end function fstarpu_task_dup ! void starpu_task_set_implementation(struct starpu_task *task, unsigned impl); subroutine fstarpu_task_set_implementation (task,impl) & bind(C,name="starpu_task_set_implementation") use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: impl end subroutine fstarpu_task_set_implementation ! unsigned starpu_task_get_implementation(struct starpu_task *task); function fstarpu_task_get_implementation (task) & bind(C,name="starpu_task_get_implementation") use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: task integer(c_int) :: fstarpu_task_get_implementation end function fstarpu_task_get_implementation ! -- function fstarpu_codelet_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_codelet_allocate end function fstarpu_codelet_allocate subroutine fstarpu_codelet_free (cl) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl end subroutine fstarpu_codelet_free subroutine fstarpu_codelet_set_name (cl, cl_name) bind(C) use iso_c_binding, only: c_ptr, c_char type(c_ptr), value, intent(in) :: cl character(c_char), intent(in) :: cl_name end subroutine fstarpu_codelet_set_name subroutine fstarpu_codelet_set_color (cl, cl_color) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: cl integer(c_int), value, intent(in) :: cl_color end subroutine fstarpu_codelet_set_color subroutine fstarpu_codelet_set_model (cl, cl_perfmodel) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: cl_perfmodel end subroutine fstarpu_codelet_set_model subroutine fstarpu_codelet_set_energy_model (cl, cl_perfmodel) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: cl_perfmodel end subroutine fstarpu_codelet_set_energy_model subroutine fstarpu_codelet_add_cpu_func (cl, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: cl type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_codelet_add_cpu_func subroutine fstarpu_codelet_add_cuda_func (cl, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: cl type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_codelet_add_cuda_func subroutine fstarpu_codelet_add_cuda_flags (cl, flags) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t end subroutine fstarpu_codelet_add_cuda_flags subroutine fstarpu_codelet_add_opencl_func (cl, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: cl type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_codelet_add_opencl_func subroutine fstarpu_codelet_add_opencl_flags (cl, flags) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t end subroutine fstarpu_codelet_add_opencl_flags subroutine fstarpu_codelet_add_buffer (cl, mode) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t end subroutine fstarpu_codelet_add_buffer subroutine fstarpu_codelet_set_variable_nbuffers (cl) bind(C) use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: cl end subroutine fstarpu_codelet_set_variable_nbuffers subroutine fstarpu_codelet_set_nbuffers (cl, nbuffers) bind(C) use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: cl integer(c_int), value, intent(in) :: nbuffers end subroutine fstarpu_codelet_set_nbuffers subroutine fstarpu_codelet_set_flags (cl, flags) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: flags ! C function expects an intptr_t end subroutine fstarpu_codelet_set_flags subroutine fstarpu_codelet_set_where (cl, where) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: where ! C function expects an intptr_t end subroutine fstarpu_codelet_set_where subroutine fstarpu_codelet_set_type (cl, type_constant) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl type(c_ptr), value, intent(in) :: type_constant ! C function expects an intptr_t end subroutine fstarpu_codelet_set_type subroutine fstarpu_codelet_set_max_parallelism (cl, max_parallelism) bind(C) use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: cl integer(c_int), value, intent(in) :: max_parallelism end subroutine fstarpu_codelet_set_max_parallelism function fstarpu_perfmodel_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_perfmodel_allocate end function fstarpu_perfmodel_allocate subroutine fstarpu_perfmodel_free (model) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: model end subroutine fstarpu_perfmodel_free subroutine fstarpu_perfmodel_set_symbol (model, model_symbol) bind(C) use iso_c_binding, only: c_ptr, c_char type(c_ptr), value, intent(in) :: model character(c_char), intent(in) :: model_symbol end subroutine fstarpu_perfmodel_set_symbol subroutine fstarpu_perfmodel_set_type (model, type) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: model type(c_ptr), value, intent(in) :: type ! C function expects an intptr_t end subroutine fstarpu_perfmodel_set_type ! == starpu_data_interface.h == ! uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags); ! uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size); function fstarpu_malloc_on_node(node,sz) bind(C,name="starpu_malloc_on_node") use iso_c_binding, only: c_int,c_intptr_t,c_size_t integer(c_intptr_t) :: fstarpu_malloc_on_node integer(c_int), value, intent(in) :: node integer(c_size_t), value, intent(in) :: sz end function fstarpu_malloc_on_node ! void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags); ! void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size); subroutine fstarpu_free_on_node(node,addr,sz) bind(C,name="starpu_free_on_node") use iso_c_binding, only: c_int,c_intptr_t,c_size_t integer(c_int), value, intent(in) :: node integer(c_intptr_t), value, intent(in) :: addr integer(c_size_t), value, intent(in) :: sz end subroutine fstarpu_free_on_node ! void starpu_malloc_on_node_set_default_flags(unsigned node, int flags); ! int starpu_data_interface_get_next_id(void); ! void starpu_data_register(starpu_data_handle_t *handleptr, unsigned home_node, void *data_interface, struct starpu_data_interface_ops *ops); ! void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node); subroutine fstarpug_data_ptr_register (dh,node) bind(C,name="starpu_data_ptr_register") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpug_data_ptr_register ! void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc); subroutine fstarpu_data_register_same (dh_dst,dh_src) bind(C,name="starpu_data_register_same") use iso_c_binding, only: c_ptr type(c_ptr), intent(out) :: dh_dst type(c_ptr), value, intent(in) :: dh_src end subroutine fstarpu_data_register_same ! void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node); function fstarpu_data_handle_to_pointer (dh,node) bind(C,name="starpu_data_handle_to_pointer") use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_handle_to_pointer type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end function fstarpu_data_handle_to_pointer ! void *starpu_data_get_local_ptr(starpu_data_handle_t handle); function fstarpu_data_get_local_ptr (dh) bind(C,name="starpu_data_get_local_ptr") use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_get_local_ptr type(c_ptr), value, intent(in) :: dh end function fstarpu_data_get_local_ptr ! void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node); ! == starpu_data_interface.h: tensor == ! void starpu_tensor_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize); subroutine fstarpu_tensor_data_register(dh, home_node, ptr, ldy, ldz, ldt, nx, ny, nz, nt, elt_size) & bind(C,name="starpu_tensor_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz integer(c_int), value, intent(in) :: ldt integer(c_int), value, intent(in) :: nx integer(c_int), value, intent(in) :: ny integer(c_int), value, intent(in) :: nz integer(c_int), value, intent(in) :: nt integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_tensor_data_register ! void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt); subroutine fstarpu_tensor_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz, ldt) & bind(C,name="starpu_tensor_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz integer(c_int), value, intent(in) :: ldt end subroutine fstarpu_tensor_ptr_register function fstarpu_tensor_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_tensor_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ptr function fstarpu_tensor_get_ldy(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ldy type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ldy function fstarpu_tensor_get_ldz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ldz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ldz function fstarpu_tensor_get_ldt(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ldt type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ldt function fstarpu_tensor_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_nx function fstarpu_tensor_get_ny(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_ny type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_ny function fstarpu_tensor_get_nz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_nz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_nz function fstarpu_tensor_get_nt(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_tensor_get_nt type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_tensor_get_nt ! == starpu_data_interface.h: block == ! void starpu_block_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize); subroutine fstarpu_block_data_register(dh, home_node, ptr, ldy, ldz, nx, ny, nz, elt_size) & bind(C,name="starpu_block_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz integer(c_int), value, intent(in) :: nx integer(c_int), value, intent(in) :: ny integer(c_int), value, intent(in) :: nz integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_block_data_register ! void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz); subroutine fstarpu_block_ptr_register(dh, node, ptr, dev_handle, offset, ldy, ldz) & bind(C,name="starpu_block_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset integer(c_int), value, intent(in) :: ldy integer(c_int), value, intent(in) :: ldz end subroutine fstarpu_block_ptr_register function fstarpu_block_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_block_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ptr function fstarpu_block_get_ldy(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_ldy type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ldy function fstarpu_block_get_ldz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_ldz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ldz function fstarpu_block_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_nx function fstarpu_block_get_ny(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_ny type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_ny function fstarpu_block_get_nz(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_block_get_nz type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_block_get_nz ! == starpu_data_interface.h: matrix == ! void starpu_matrix_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize); subroutine fstarpu_matrix_data_register(dh, home_node, ptr, ld, nx, ny, elt_size) & bind(C,name="starpu_matrix_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: ld integer(c_int), value, intent(in) :: nx integer(c_int), value, intent(in) :: ny integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_matrix_data_register ! void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld); subroutine fstarpu_matrix_ptr_register(dh, node, ptr, dev_handle, offset, ld) & bind(C,name="starpu_matrix_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset integer(c_int), value, intent(in) :: ld end subroutine fstarpu_matrix_ptr_register function fstarpu_matrix_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_matrix_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_ptr function fstarpu_matrix_get_ld(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_matrix_get_ld type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_ld function fstarpu_matrix_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_matrix_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_nx function fstarpu_matrix_get_ny(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_matrix_get_ny type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_matrix_get_ny ! == starpu_data_interface.h: vector == ! void starpu_vector_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize); subroutine fstarpu_vector_data_register(dh, home_node, ptr,nx, elt_size) & bind(C,name="starpu_vector_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_int), value, intent(in) :: nx integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_vector_data_register ! void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); subroutine fstarpu_vector_ptr_register(dh, node, ptr, dev_handle, offset) & bind(C,name="starpu_vector_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset end subroutine fstarpu_vector_ptr_register function fstarpu_vector_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_vector_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_vector_get_ptr function fstarpu_vector_get_nx(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_vector_get_nx type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_vector_get_nx ! == starpu_data_interface.h: variable == ! void starpu_variable_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, size_t size); subroutine fstarpu_variable_data_register(dh, home_node, ptr, elt_size) & bind(C,name="starpu_variable_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: home_node type(c_ptr), value, intent(in) :: ptr integer(c_size_t), value, intent(in) :: elt_size end subroutine fstarpu_variable_data_register ! void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset); subroutine fstarpu_variable_ptr_register(dh, node, ptr, dev_handle, offset) & bind(C,name="starpu_variable_ptr_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh integer(c_int), value, intent(in) :: node type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: dev_handle integer(c_size_t), value, intent(in) :: offset end subroutine fstarpu_variable_ptr_register function fstarpu_variable_get_ptr(buffers, i) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_variable_get_ptr type(c_ptr), value, intent(in) :: buffers integer(c_int), value, intent(in) :: i end function fstarpu_variable_get_ptr ! == starpu_data_interface.h: void == ! void starpu_void_data_register(starpu_data_handle_t *handle); subroutine fstarpu_void_data_register(dh) & bind(C,name="starpu_void_data_register") use iso_c_binding, only: c_ptr, c_int, c_size_t type(c_ptr), intent(out) :: dh end subroutine fstarpu_void_data_register ! == starpu_data_filter.h == function fstarpu_data_filter_allocate () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_data_filter_allocate end function fstarpu_data_filter_allocate subroutine fstarpu_data_filter_free (filter) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: filter end subroutine fstarpu_data_filter_free ! Note: use fstarpu_df_alloc_ prefix instead of fstarpu_data_filter_allocate_ ! to fit within the Fortran id length limit */ function fstarpu_df_alloc_bcsr_filter_canonical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_bcsr_filter_canonical_block end function fstarpu_df_alloc_bcsr_filter_canonical_block function fstarpu_df_alloc_csr_filter_vertical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_csr_filter_vertical_block end function fstarpu_df_alloc_csr_filter_vertical_block function fstarpu_df_alloc_matrix_filter_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block end function fstarpu_df_alloc_matrix_filter_block function fstarpu_df_alloc_matrix_filter_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_block_shadow end function fstarpu_df_alloc_matrix_filter_block_shadow function fstarpu_df_alloc_matrix_filter_vertical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block end function fstarpu_df_alloc_matrix_filter_vertical_block function fstarpu_df_alloc_matrix_filter_vertical_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_matrix_filter_vertical_block_shadow end function fstarpu_df_alloc_matrix_filter_vertical_block_shadow function fstarpu_df_alloc_vector_filter_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_block end function fstarpu_df_alloc_vector_filter_block function fstarpu_df_alloc_vector_filter_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_block_shadow end function fstarpu_df_alloc_vector_filter_block_shadow function fstarpu_df_alloc_vector_filter_list () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_list end function fstarpu_df_alloc_vector_filter_list function fstarpu_df_alloc_vector_filter_divide_in_2 () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_vector_filter_divide_in_2 end function fstarpu_df_alloc_vector_filter_divide_in_2 function fstarpu_df_alloc_block_filter_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_block end function fstarpu_df_alloc_block_filter_block function fstarpu_df_alloc_block_filter_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_block_shadow end function fstarpu_df_alloc_block_filter_block_shadow function fstarpu_df_alloc_block_filter_vertical_block () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block end function fstarpu_df_alloc_block_filter_vertical_block function fstarpu_df_alloc_block_filter_vertical_block_shadow () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_df_alloc_block_filter_vertical_block_shadow end function fstarpu_df_alloc_block_filter_vertical_block_shadow subroutine fstarpu_data_filter_set_filter_func (filter, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: filter type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_data_filter_set_filter_func subroutine fstarpu_data_filter_set_nchildren (filter, nchildren) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: filter integer(c_int), value, intent(in) :: nchildren end subroutine fstarpu_data_filter_set_nchildren subroutine fstarpu_data_filter_set_get_nchildren_func (filter, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: filter type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_data_filter_set_get_nchildren_func subroutine fstarpu_data_filter_set_get_child_ops_func (filter, f_ptr) bind(C) use iso_c_binding, only: c_ptr, c_funptr type(c_ptr), value, intent(in) :: filter type(c_funptr), value, intent(in) :: f_ptr end subroutine fstarpu_data_filter_set_get_child_ops_func subroutine fstarpu_data_filter_set_filter_arg (filter, filter_arg) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: filter integer(c_int), value, intent(in) :: filter_arg end subroutine fstarpu_data_filter_set_filter_arg subroutine fstarpu_data_filter_set_filter_arg_ptr (filter, filter_arg_ptr) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: filter_arg_ptr end subroutine fstarpu_data_filter_set_filter_arg_ptr ! void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f); subroutine fstarpu_data_partition (dh,filter) bind(C,name="starpu_data_partition") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: filter end subroutine fstarpu_data_partition ! void starpu_data_unpartition(starpu_data_handle_t root_data, unsigned gathering_node); subroutine fstarpu_data_unpartition (root_dh,gathering_node) bind(C,name="starpu_data_unpartition") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: root_dh integer(c_int), value, intent(in) :: gathering_node end subroutine fstarpu_data_unpartition ! void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children); subroutine fstarpu_data_partition_plan (dh,filter,children) & bind(C,name="starpu_data_partition_plan") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: filter type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_plan ! void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_submit (dh,nparts,children) & bind(C,name="starpu_data_partition_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_submit ! void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_readonly_submit (dh,nparts,children) & bind(C,name="starpu_data_partition_readonly_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_readonly_submit ! void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_readwrite_upgrade_submit (dh,nparts,children) & bind(C,name="starpu_data_partition_readwrite_upgrade_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_readwrite_upgrade_submit ! void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); subroutine fstarpu_data_unpartition_submit (dh,nparts,children,gathering_node) & bind(C,name="starpu_data_unpartition_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) integer(c_int), value, intent(in) :: gathering_node end subroutine fstarpu_data_unpartition_submit ! void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node); subroutine fstarpu_data_unpartition_readonly_submit (dh,nparts,children,gathering_node) & bind(C,name="starpu_data_unpartition_readonly_submit") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) integer(c_int), value, intent(in) :: gathering_node end subroutine fstarpu_data_unpartition_readonly_submit ! void starpu_data_partition_clean(starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children); subroutine fstarpu_data_partition_clean (dh,nparts,children) & bind(C,name="starpu_data_partition_clean") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: nparts type(c_ptr), intent(in) :: children(*) end subroutine fstarpu_data_partition_clean ! int starpu_data_get_nb_children(starpu_data_handle_t handle); function fstarpu_data_get_nb_children(dh) bind(C,name="starpu_data_get_nb_children") use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_data_get_nb_children type(c_ptr), value, intent(in) :: dh end function fstarpu_data_get_nb_children ! starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i); function fstarpu_data_get_child(dh,i) bind(C,name="starpu_data_get_child") use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_get_child type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: i end function fstarpu_data_get_child ! starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_data, unsigned depth, ... ); ! . see: fstarpu_data_get_sub_data ! starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_data, unsigned depth, va_list pa); ! . see: fstarpu_data_get_sub_data ! note: defined in filters.c function fstarpu_data_get_sub_data (root_dh,depth,indices) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_get_sub_data type(c_ptr), value, intent(in) :: root_dh integer(c_int), value, intent(in) :: depth integer(c_int), intent(in) :: indices(*) end function fstarpu_data_get_sub_data ! void starpu_data_map_filters(starpu_data_handle_t root_data, unsigned nfilters, ...); ! . see fstarpu_data_map_filters ! void starpu_data_vmap_filters(starpu_data_handle_t root_data, unsigned nfilters, va_list pa); ! . see fstarpu_data_map_filters ! note: defined in filters.c subroutine fstarpu_data_map_filters (root_dh,nfilters,filters) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: root_dh integer(c_int), value, intent(in) :: nfilters type(c_ptr), intent(in) :: filters(*) end subroutine fstarpu_data_map_filters ! void starpu_matrix_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_block ! void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_block_shadow ! void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_vertical_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_vertical_block ! void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_matrix_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_matrix_filter_vertical_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_matrix_filter_vertical_block_shadow ! void starpu_vector_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_block ! void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_block_shadow ! void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_list_long (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_list_long") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_list_long ! void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_filter_list (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_filter_list") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_filter_list ! void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_vector_divide_in_2 (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_vector_divide_in_2") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_vector_divide_in_2 ! void starpu_block_filter_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_block ! void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_block_shadow ! void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_vertical_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_vertical_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_vertical_block ! void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_vertical_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_vertical_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_vertical_block_shadow ! void starpu_block_filter_depth_block(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_depth_block (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_depth_block") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_depth_block ! void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts); subroutine fstarpu_block_filter_depth_block_shadow (father_interface,child_interface,filter,id,nparts) & bind(C,name="starpu_block_filter_depth_block_shadow") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: father_interface type(c_ptr), value, intent(in) :: child_interface type(c_ptr), value, intent(in) :: filter type(c_ptr), value, intent(in) :: id type(c_ptr), value, intent(in) :: nparts end subroutine fstarpu_block_filter_depth_block_shadow ! == starpu_data.h == ! void starpu_data_unregister(starpu_data_handle_t handle); subroutine fstarpu_data_unregister (dh) bind(C,name="starpu_data_unregister") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_unregister ! void starpu_data_unregister_no_coherency(starpu_data_handle_t handle); subroutine fstarpu_data_unregister_no_coherency (dh) bind(C,name="starpu_data_unregister_no_coherency") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_unregister_no_coherency ! void starpu_data_unregister_submit(starpu_data_handle_t handle); subroutine fstarpu_data_unregister_submit (dh) bind(C,name="starpu_data_unregister_submit") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_unregister_submit ! void starpu_data_deinitialize(starpu_data_handle_t handle); subroutine fstarpu_data_deinitialize (dh) bind(C,name="starpu_data_deinitialize") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_deinitialize ! void starpu_data_deinitialize_submit(starpu_data_handle_t handle); subroutine fstarpu_data_deinitialize_submit (dh) bind(C,name="starpu_data_deinitialize_submit") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_deinitialize_submit ! void starpu_data_invalidate(starpu_data_handle_t handle); subroutine fstarpu_data_invalidate (dh) bind(C,name="starpu_data_invalidate") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_invalidate ! void starpu_data_invalidate_submit(starpu_data_handle_t handle); subroutine fstarpu_data_invalidate_submit (dh) bind(C,name="starpu_data_invalidate_submit") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_invalidate_submit ! void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important); subroutine fstarpu_data_advise_as_important (dh,is_important) bind(C,name="starpu_data_advise_as_important") use iso_c_binding, only: c_ptr,c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: is_important end subroutine fstarpu_data_advise_as_important ! starpu_data_acquire: see fstarpu_data_acquire subroutine fstarpu_data_acquire (dh, mode) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mode ! C function expects an intptr_t end subroutine fstarpu_data_acquire ! int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode); ! int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); ! int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg); ! int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); ! int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency); ! void starpu_data_release(starpu_data_handle_t handle); subroutine fstarpu_data_release (dh) bind(C,name="starpu_data_release") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_release ! void starpu_data_release_on_node(starpu_data_handle_t handle, int node); subroutine fstarpu_data_release_on_node (dh, node) bind(C,name="starpu_data_release_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpu_data_release_on_node ! starpu_arbiter_t starpu_arbiter_create(void) STARPU_ATTRIBUTE_MALLOC; function fstarpu_arbiter_create () bind(C,name="starpu_arbiter_create") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_arbiter_create end function fstarpu_arbiter_create ! void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter); subroutine fstarpu_data_assign_arbiter (dh,arbiter) bind(C,name="starpu_data_assign_arbiter") use iso_c_binding, only: c_ptr type(c_ptr), intent(out) :: dh type(c_ptr), value, intent(in) :: arbiter end subroutine fstarpu_data_assign_arbiter ! void starpu_arbiter_destroy(starpu_arbiter_t arbiter); subroutine fstarpu_arbiter_destroy (arbiter) bind(C,name="starpu_arbiter_destroy") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: arbiter end subroutine fstarpu_arbiter_destroy ! void starpu_data_display_memory_stats(); subroutine fstarpu_display_memory_stats() bind(C,name="starpu_display_memory_stats") end subroutine fstarpu_display_memory_stats ! int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node); subroutine fstarpu_data_request_allocation (dh, node) & bind(C,name="starpu_data_request_allocation") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpu_data_request_allocation ! int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); subroutine fstarpu_data_fetch_on_node (dh, node, async) & bind(C,name="starpu_data_fetch_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async end subroutine fstarpu_data_fetch_on_node ! int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); subroutine fstarpu_data_prefetch_on_node (dh, node, async) & bind(C,name="starpu_data_prefetch_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async end subroutine fstarpu_data_prefetch_on_node ! int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); subroutine fstarpu_data_prefetch_on_node_prio (dh, node, async, prio) & bind(C,name="starpu_data_prefetch_on_node_prio") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async integer(c_int), value, intent(in) :: prio end subroutine fstarpu_data_prefetch_on_node_prio ! int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async); subroutine fstarpu_data_idle_prefetch_on_node (dh, node, async) & bind(C,name="starpu_data_idle_prefetch_on_node") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async end subroutine fstarpu_data_idle_prefetch_on_node ! int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio); subroutine fstarpu_data_idle_prefetch_on_node_prio (dh, node, async, prio) & bind(C,name="starpu_data_idle_prefetch_on_node_prio") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node integer(c_int), value, intent(in) :: async integer(c_int), value, intent(in) :: prio end subroutine fstarpu_data_idle_prefetch_on_node_prio !unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node); function fstarpu_data_is_on_node(dh, node) & bind(C,name="starpu_data_is_on_node") use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_data_is_on_node type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end function fstarpu_data_is_on_node ! void starpu_data_wont_use(starpu_data_handle_t handle); subroutine fstarpu_data_wont_use (dh) bind(c,name="starpu_data_wont_use") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_data_wont_use ! unsigned starpu_worker_get_memory_node(unsigned workerid); function fstarpu_worker_get_memory_node(id) bind(C,name="starpu_worker_get_memory_node") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_worker_get_memory_node integer(c_int), value, intent(in) :: id end function fstarpu_worker_get_memory_node ! unsigned starpu_memory_nodes_get_count(void); function fstarpu_memory_nodes_get_count() bind(C,name="starpu_memory_nodes_get_count") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_memory_nodes_get_count end function fstarpu_memory_nodes_get_count ! enum starpu_node_kind starpu_node_get_kind(unsigned node); ! void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask); ! void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag); ! unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle); ! unsigned starpu_data_get_default_sequential_consistency_flag(void); ! void starpu_data_set_default_sequential_consistency_flag(unsigned flag); ! void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested); ! void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl); subroutine fstarpu_data_set_reduction_methods (dh,redux_cl,init_cl) bind(C,name="starpu_data_set_reduction_methods") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: redux_cl type(c_ptr), value, intent(in) :: init_cl end subroutine fstarpu_data_set_reduction_methods ! void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_args, struct starpu_codelet *init_cl, void *init_args) subroutine fstarpu_data_set_reduction_methods_with_args (dh,redux_cl,redux_args,init_cl,init_args) & bind(C,name="starpu_data_set_reduction_methods_with_args") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: redux_cl type(c_ptr), value, intent(in) :: redux_args type(c_ptr), value, intent(in) :: init_cl type(c_ptr), value, intent(in) :: init_args end subroutine fstarpu_data_set_reduction_methods_with_args ! struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle); ! unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node); function fstarpu_data_test_if_allocated_on_node(dh,mem_node) bind(C,name="starpu_data_test_if_allocated_on_node") use iso_c_binding, only: c_ptr, c_int integer(c_int) :: fstarpu_data_test_if_allocated_on_node type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: mem_node end function fstarpu_data_test_if_allocated_on_node ! void starpu_memchunk_tidy(unsigned memory_node); subroutine fstarpu_memchunk_tidy (mem_node) bind(c,name="starpu_memchunk_tidy") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: mem_node end subroutine fstarpu_memchunk_tidy ! == starpu_task_util.h == ! starpu_data_handle_t *fstarpu_data_handle_array_alloc(int nb); function fstarpu_data_handle_array_alloc (nb) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_handle_array_alloc integer(c_int), value, intent(in) :: nb end function fstarpu_data_handle_array_alloc ! void fstarpu_data_handle_array_free(starpu_data_handle_t *handles); subroutine fstarpu_data_handle_array_free (handles) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: handles end subroutine fstarpu_data_handle_array_free ! void fstarpu_data_handle_array_set(starpu_data_handle_t *handles, int i, starpu_data_handle_t handle); subroutine fstarpu_data_handle_array_set (handles, i, handle) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: handles integer(c_int), value, intent(in) :: i type(c_ptr), value, intent(in) :: handle end subroutine fstarpu_data_handle_array_set ! struct starpu_data_descr *fstarpu_data_descr_array_alloc(int nb); function fstarpu_data_descr_array_alloc (nb) bind(C) use iso_c_binding, only: c_ptr, c_int type(c_ptr) :: fstarpu_data_descr_array_alloc integer(c_int), value, intent(in) :: nb end function fstarpu_data_descr_array_alloc ! struct starpu_data_descr *fstarpu_data_descr_alloc(void); function fstarpu_data_descr_alloc () bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_data_descr_alloc end function fstarpu_data_descr_alloc ! void fstarpu_data_descr_array_free(struct starpu_data_descr *descrs); subroutine fstarpu_data_descr_array_free (descrs) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: descrs end subroutine fstarpu_data_descr_array_free ! void fstarpu_data_descr_free(struct starpu_data_descr *descr); subroutine fstarpu_data_descrg_free (descr) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: descr end subroutine fstarpu_data_descrg_free ! void fstarpu_data_descr_array_set(struct starpu_data_descr *descrs, int i, starpu_data_handle_t handle, intptr_t mode); subroutine fstarpu_data_descr_array_set (descrs, i, handle, mode) bind(C) use iso_c_binding, only: c_ptr, c_int, c_intptr_t type(c_ptr), value, intent(in) :: descrs integer(c_int), value, intent(in) :: i type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t end subroutine fstarpu_data_descr_array_set ! void fstarpu_data_descr_set(struct starpu_data_descr *descr, starpu_data_handle_t handle, intptr_t mode); subroutine fstarpu_data_descr_set (descr, handle, mode) bind(C) use iso_c_binding, only: c_ptr, c_intptr_t type(c_ptr), value, intent(in) :: descr type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: mode ! C func expects c_intptr_t end subroutine fstarpu_data_descr_set subroutine fstarpu_task_insert(arglist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_task_insert subroutine fstarpu_insert_task(arglist) bind(C,name="fstarpu_task_insert") use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_insert_task subroutine fstarpu_unpack_arg(cl_arg,bufferlist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: cl_arg type(c_ptr), dimension(*), intent(in) :: bufferlist end subroutine fstarpu_unpack_arg ! void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg) subroutine fstarpu_create_sync_task(sync_tag, ndeps, tag_array, callback, callback_arg) & bind(C,name="starpu_create_sync_task") use iso_c_binding, only: c_int, c_long_long, c_ptr, c_funptr integer(c_int), value, intent(in) :: sync_tag integer(c_int), value, intent(in) :: ndeps integer(c_long_long), intent(in) :: tag_array(*) type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: callback_arg end subroutine fstarpu_create_sync_task ! == starpu_sched_ctx.h == ! starpu_sched_ctx_create: see fstarpu_sched_ctx_create function fstarpu_sched_ctx_create(workers_array,nworkers,ctx_name, arglist) bind(C) use iso_c_binding, only: c_int, c_char, c_ptr integer(c_int) :: fstarpu_sched_ctx_create integer(c_int), intent(in) :: workers_array(*) integer(c_int), value, intent(in) :: nworkers character(c_char), intent(in) :: ctx_name type(c_ptr), dimension(*), intent(in) :: arglist end function fstarpu_sched_ctx_create ! unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap); function fstarpu_sched_ctx_create_inside_interval(policy_name, sched_ctx_name, & min_ncpus, max_ncpus, min_ngpus, max_ngpus, allow_overlap) & bind(C,name="starpu_sched_ctx_create_inside_interval") use iso_c_binding, only: c_int, c_char integer(c_int) :: fstarpu_sched_ctx_create_inside_interval character(c_char), intent(in) :: policy_name character(c_char), intent(in) :: sched_ctx_name integer(c_int), value, intent(in) :: min_ncpus integer(c_int), value, intent(in) :: max_ncpus integer(c_int), value, intent(in) :: min_ngpus integer(c_int), value, intent(in) :: max_ngpus integer(c_int), value, intent(in) :: allow_overlap end function fstarpu_sched_ctx_create_inside_interval ! void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args); subroutine fstarpu_sched_ctx_register_close_callback (sched_ctx_id, close_callback, args) & bind(c,name="starpu_sched_ctx_register_close_callback") use iso_c_binding, only: c_ptr, c_funptr, c_int integer(c_int), value, intent(in) :: sched_ctx_id type(c_funptr), value, intent(in) :: close_callback type(c_ptr), value, intent(in) :: args end subroutine fstarpu_sched_ctx_register_close_callback ! void starpu_sched_ctx_add_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_add_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_add_workers") use iso_c_binding, only: c_int integer(c_int), intent(in) :: workerids (*) integer(c_int), value, intent(in) :: nworkers integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_add_workers ! void starpu_sched_ctx_remove_workers(int *workerids_ctx, int nworkers_ctx, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_remove_workers(workerids,nworkers,ctx) bind(C,name="starpu_sched_ctx_remove_workers") use iso_c_binding, only: c_int integer(c_int), intent(in) :: workerids (*) integer(c_int), value, intent(in) :: nworkers integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_remove_workers ! starpu_sched_ctx_display_workers: see fstarpu_sched_ctx_display_workers subroutine fstarpu_sched_ctx_display_workers (ctx) bind(C) use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_display_workers ! void starpu_sched_ctx_delete(unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_delete (ctx) bind(C,name="starpu_sched_ctx_delete") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx end subroutine fstarpu_sched_ctx_delete ! void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor); subroutine fstarpu_sched_ctx_set_inheritor (ctx,inheritor) bind(C,name="starpu_sched_ctx_set_inheritor") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: ctx integer(c_int), value, intent(in) :: inheritor end subroutine fstarpu_sched_ctx_set_inheritor ! unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_inheritor (ctx) bind(C,name="starpu_sched_ctx_get_inheritor") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_inheritor integer(c_int), value, intent(in) :: ctx end function fstarpu_sched_ctx_get_inheritor ! unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_hierarchy_level (ctx) bind(C,name="starpu_sched_ctx_get_hierarchy_level") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_hierarchy_level integer(c_int), value, intent(in) :: ctx end function fstarpu_sched_ctx_get_hierarchy_level ! void starpu_sched_ctx_set_context(unsigned *sched_ctx_id); subroutine fstarpu_sched_ctx_set_context (ctx_ptr) bind(C,name="starpu_sched_ctx_set_context") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: ctx_ptr end subroutine fstarpu_sched_ctx_set_context ! unsigned starpu_sched_ctx_get_context(void); function fstarpu_sched_ctx_get_context () bind(C,name="starpu_sched_ctx_get_context") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_context end function fstarpu_sched_ctx_get_context ! void starpu_sched_ctx_stop_task_submission(void); subroutine fstarpu_sched_ctx_stop_task_submission () bind(c,name="starpu_sched_ctx_stop_task_submission") use iso_c_binding end subroutine fstarpu_sched_ctx_stop_task_submission ! void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_finished_submit (sched_ctx_id) bind(c,name="starpu_sched_ctx_finished_submit") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_finished_submit ! unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids); ! unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids); ! unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_nworkers (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_nworkers") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_nworkers integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_nworkers ! unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2); function fstarpu_sched_ctx_get_nshared_workers (sched_ctx_id, sched_ctx_id2) & bind(c,name="starpu_sched_ctx_get_nshared_workers") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_nshared_workers integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: sched_ctx_id2 end function fstarpu_sched_ctx_get_nshared_workers ! unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id); function fstarpu_sched_ctx_contains_worker (workerid, sched_ctx_id) & bind(c,name="starpu_sched_ctx_contains_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_contains_worker integer(c_int), value, intent(in) :: workerid integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_contains_worker ! unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id); function fstarpu_sched_ctx_contains_type_of_worker (arch, sched_ctx_id) & bind(c,name="starpu_sched_ctx_contains_type_of_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_contains_type_of_worker integer(c_int), value, intent(in) :: arch integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_contains_type_of_worker ! unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id); function fstarpu_sched_ctx_worker_get_id (sched_ctx_id) & bind(c,name="starpu_sched_ctx_worker_get_id") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_worker_get_id integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_worker_get_id ! unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task); function fstarpu_sched_ctx_get_ctx_for_task (task) & bind(c,name="starpu_sched_ctx_get_ctx_for_task") use iso_c_binding, only: c_int, c_ptr integer(c_int) :: fstarpu_sched_ctx_get_ctx_for_task type(c_ptr), value, intent(in) :: task end function fstarpu_sched_ctx_get_ctx_for_task ! unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid); function fstarpu_sched_ctx_overlapping_ctxs_on_worker (workerid) & bind(c,name="starpu_sched_ctx_overlapping_ctxs_on_worker") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_overlapping_ctxs_on_worker integer(c_int), value, intent(in) :: workerid end function fstarpu_sched_ctx_overlapping_ctxs_on_worker ! int starpu_sched_get_min_priority(void); function fstarpu_sched_get_min_priority () & bind(c,name="starpu_sched_get_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_get_min_priority end function fstarpu_sched_get_min_priority ! int starpu_sched_get_max_priority(void); function fstarpu_sched_get_max_priority () & bind(c,name="starpu_sched_get_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_get_max_priority end function fstarpu_sched_get_max_priority ! int starpu_sched_set_min_priority(int min_prio); function fstarpu_sched_set_min_priority (min_prio) & bind(c,name="starpu_sched_set_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_set_min_priority integer(c_int), value, intent(in) :: min_prio end function fstarpu_sched_set_min_priority ! int starpu_sched_set_max_priority(int max_prio); function fstarpu_sched_set_max_priority (max_prio) & bind(c,name="starpu_sched_set_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_set_max_priority integer(c_int), value, intent(in) :: max_prio end function fstarpu_sched_set_max_priority ! int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_min_priority (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_min_priority integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_min_priority ! int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_max_priority (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_max_priority integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_max_priority ! int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio); function fstarpu_sched_ctx_set_min_priority (sched_ctx_id, min_prio) & bind(c,name="starpu_sched_ctx_set_min_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_set_min_priority integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: min_prio end function fstarpu_sched_ctx_set_min_priority ! int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio); function fstarpu_sched_ctx_set_max_priority (sched_ctx_id, max_prio) & bind(c,name="starpu_sched_ctx_set_max_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_set_max_priority integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: max_prio end function fstarpu_sched_ctx_set_max_priority ! int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id); function fstarpu_sched_ctx_min_priority_is_set (sched_ctx_id) & bind(c,name="starpu_sched_ctx_min_priority_is_set") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_min_priority_is_set integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_min_priority_is_set ! int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id); function fstarpu_sched_ctx_max_priority_is_set (sched_ctx_id) & bind(c,name="starpu_sched_ctx_max_priority_is_set") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_max_priority_is_set integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_max_priority_is_set ! void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_user_data(sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_user_data") use iso_c_binding, only: c_int, c_ptr integer(c_int), value, intent(in) :: sched_ctx_id type(c_ptr) :: fstarpu_sched_ctx_get_user_data end function fstarpu_sched_ctx_get_user_data ! struct starpu_worker_collection *starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type) STARPU_ATTRIBUTE_MALLOC; ! void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_delete_worker_collection (sched_ctx_id) & bind(c,name="starpu_sched_ctx_delete_worker_collection") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_delete_worker_collection ! struct starpu_worker_collection *starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id); ! void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void *policy_data); subroutine fstarpu_sched_ctx_set_policy_data (sched_ctx_id, policy_data) & bind(c,name="starpu_sched_ctx_set_policy_data") use iso_c_binding, only: c_int, c_ptr integer(c_int), value, intent(in) :: sched_ctx_id type(c_ptr), value, intent(in) :: policy_data end subroutine fstarpu_sched_ctx_set_policy_data ! void *starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_policy_data (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_policy_data") use iso_c_binding, only: c_int, c_ptr type(c_ptr) :: fstarpu_sched_ctx_get_policy_data integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_policy_data ! void *starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void *param, unsigned sched_ctx_id); function fstarpu_sched_ctx_exec_parallel_code (func, param, sched_ctx_id) & bind(c,name="starpu_sched_ctx_exec_parallel_code") use iso_c_binding, only: c_int, c_funptr, c_ptr type(c_ptr) :: fstarpu_sched_ctx_exec_parallel_code type(c_funptr), value, intent(in) :: func type(c_ptr), value, intent(in) :: param integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_exec_parallel_code ! int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_nready_tasks (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_nready_tasks") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_nready_tasks integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_nready_tasks ! double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_nready_flops (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_nready_flops") use iso_c_binding, only: c_int, c_double real(c_double) :: fstarpu_sched_ctx_get_nready_flops integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_nready_flops ! void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid); subroutine fstarpu_sched_ctx_list_task_counters_increment (sched_ctx_id, workerid) & bind(c,name="starpu_sched_ctx_list_task_counters_increment") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: workerid end subroutine fstarpu_sched_ctx_list_task_counters_increment ! void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid); subroutine fstarpu_sched_ctx_list_task_counters_decrement (sched_ctx_id, workerid) & bind(c,name="starpu_sched_ctx_list_task_counters_decrement") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: workerid end subroutine fstarpu_sched_ctx_list_task_counters_decrement ! void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid); subroutine fstarpu_sched_ctx_list_task_counters_reset (sched_ctx_id, workerid) & bind(c,name="starpu_sched_ctx_list_task_counters_reset") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: workerid end subroutine fstarpu_sched_ctx_list_task_counters_reset ! void starpu_sched_ctx_list_task_counters_increment_all(struct starpu_task *task, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_list_task_counters_increment_all (task, sched_ctx_id) & bind(c,name="starpu_sched_ctx_list_task_counters_increment_all") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_list_task_counters_increment_all ! void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_list_task_counters_decrement_all (task, sched_ctx_id) & bind(c,name="starpu_sched_ctx_list_task_counters_decrement_all") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_list_task_counters_decrement_all ! void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_list_task_counters_reset_all (task, sched_ctx_id) & bind(c,name="starpu_sched_ctx_list_task_counters_reset_all") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_list_task_counters_reset_all ! unsigned starpu_sched_ctx_get_priority(int worker, unsigned sched_ctx_id); function fstarpu_sched_ctx_get_priority (worker, sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_priority") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_priority integer(c_int), value, intent(in) :: worker integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_priority ! void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids); ! void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid); subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid (cpuid) & bind(c,name="starpu_sched_ctx_bind_current_thread_to_cpuid") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: cpuid end subroutine fstarpu_sched_ctx_bind_current_thread_to_cpuid ! int starpu_sched_ctx_book_workers_for_task(unsigned sched_ctx_id, int *workerids, int nworkers); function fstarpu_sched_ctx_book_workers_for_task (sched_ctx_id, workerids, nworkers) & bind(c,name="starpu_sched_ctx_book_workers_for_task") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_book_workers_for_task integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), intent(in) :: workerids(*) integer(c_int), value, intent(in) :: nworkers end function fstarpu_sched_ctx_book_workers_for_task ! void starpu_sched_ctx_unbook_workers_for_task(unsigned sched_ctx_id, int master); subroutine fstarpu_sched_ctx_unbook_workers_for_task (sched_ctx_id, master) & bind(c,name="starpu_sched_ctx_unbook_workers_for_task") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: sched_ctx_id integer(c_int), value, intent(in) :: master end subroutine fstarpu_sched_ctx_unbook_workers_for_task ! unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id); function fstarpu_sched_ctx_worker_is_master_for_child_ctx (workerid, sched_ctx_id) & bind(c,name="starpu_sched_ctx_worker_is_master_for_child_ctx") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_worker_is_master_for_child_ctx integer(c_int), value, intent(in) :: workerid integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_worker_is_master_for_child_ctx ! unsigned starpu_sched_ctx_master_get_context(int masterid); function fstarpu_sched_ctx_master_get_context (masterid) & bind(c,name="starpu_sched_ctx_master_get_context") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_master_get_context integer(c_int), value, intent(in) :: masterid end function fstarpu_sched_ctx_master_get_context ! void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double flops); subroutine fstarpu_sched_ctx_revert_task_counters (sched_ctx_id, flops) & bind(c,name="starpu_sched_ctx_revert_task_counters") use iso_c_binding, only: c_int, c_double integer(c_int), value, intent(in) :: sched_ctx_id real(c_double), value, intent(in) :: flops end subroutine fstarpu_sched_ctx_revert_task_counters ! void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_ctx, unsigned manage_mutex); subroutine fstarpu_sched_ctx_move_task_to_ctx (task, sched_ctx, manage_mutex) & bind(c,name="starpu_sched_ctx_move_task_to_ctx") use iso_c_binding, only: c_ptr, c_int type(c_ptr), value, intent(in) :: task integer(c_int), value, intent(in) :: sched_ctx integer(c_int), value, intent(in) :: manage_mutex end subroutine fstarpu_sched_ctx_move_task_to_ctx ! int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id); function fstarpu_sched_ctx_get_worker_rank (sched_ctx_id) & bind(c,name="starpu_sched_ctx_get_worker_rank") use iso_c_binding, only: c_int integer(c_int) :: fstarpu_sched_ctx_get_worker_rank integer(c_int), value, intent(in) :: sched_ctx_id end function fstarpu_sched_ctx_get_worker_rank ! unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers); ! void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id); subroutine fstarpu_sched_ctx_call_pushed_task_cb (workerid, sched_ctx_id) & bind(c,name="starpu_sched_ctx_call_pushed_task_cb") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: workerid integer(c_int), value, intent(in) :: sched_ctx_id end subroutine fstarpu_sched_ctx_call_pushed_task_cb ! == starpu_fxt.h == ! void starpu_fxt_options_init(struct starpu_fxt_options *options); subroutine fstarpu_fxt_options_init (fxt_options) bind(C,name="starpu_fxt_options_init") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: fxt_options end subroutine fstarpu_fxt_options_init ! void starpu_fxt_generate_trace(struct starpu_fxt_options *options); subroutine fstarpu_fxt_generate_trace (fxt_options) bind(C,name="starpu_fxt_generate_trace") use iso_c_binding, only: c_ptr type(c_ptr), value, intent(in) :: fxt_options end subroutine fstarpu_fxt_generate_trace ! void starpu_fxt_autostart_profiling(int autostart); subroutine fstarpu_fxt_autostart_profiling (autostart) bind(c,name="starpu_fxt_autostart_profiling") use iso_c_binding, only: c_int integer(c_int), value, intent(in) :: autostart end subroutine fstarpu_fxt_autostart_profiling ! void starpu_fxt_start_profiling(void); subroutine fstarpu_fxt_start_profiling () bind(c,name="starpu_fxt_start_profiling") use iso_c_binding end subroutine fstarpu_fxt_start_profiling ! void starpu_fxt_stop_profiling(void); subroutine fstarpu_fxt_stop_profiling () bind(c,name="starpu_fxt_stop_profiling") use iso_c_binding end subroutine fstarpu_fxt_stop_profiling ! void starpu_fxt_write_data_trace(char *filename_in); subroutine fstarpu_fxt_write_data_trace (filename) bind(c,name="starpu_fxt_write_data_trace") use iso_c_binding, only: c_char character(c_char), intent(in) :: filename end subroutine fstarpu_fxt_write_data_trace ! void starpu_fxt_trace_user_event(unsigned long code); subroutine fstarpu_trace_user_event (code) bind(c,name="starpu_trace_user_event") use iso_c_binding, only: c_long integer(c_long), value, intent(in) :: code end subroutine fstarpu_trace_user_event ! double starpu_timing_now(void) function fstarpu_timing_now () bind(C,name="starpu_timing_now") use iso_c_binding, only: c_double real(c_double) :: fstarpu_timing_now end function fstarpu_timing_now ! == starpu_cuda.h == ! cudaStream_t starpu_cuda_get_local_stream(void); function fstarpu_cuda_get_local_stream () bind(C,name="starpu_cuda_get_local_stream") use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_cuda_get_local_stream end function fstarpu_cuda_get_local_stream ! == starpu_stdlib.h == ! int starpu_malloc(void **A, size_t dim); function fstarpu_malloc (ptr, len) bind(C,name="starpu_malloc") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), intent(out) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_malloc end function fstarpu_malloc ! int starpu_free_noflag(void *A, size_t dim); function fstarpu_free_noflag (ptr, len) bind(C,name="starpu_free_noflag") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), value, intent(in) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_free_noflag end function fstarpu_free_noflag ! int starpu_memory_pin(void *addr, size_t size); function fstarpu_memory_pin (ptr, len) bind(C,name="starpu_memory_pin") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), value, intent(in) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_memory_pin end function fstarpu_memory_pin ! int starpu_memory_unpin(void *addr, size_t size); function fstarpu_memory_unpin (ptr, len) bind(C,name="starpu_memory_unpin") use iso_c_binding, only: c_ptr, c_long, c_int type(c_ptr), value, intent(in) :: ptr integer(c_long), value, intent(in) :: len integer(c_int) :: fstarpu_memory_unpin end function fstarpu_memory_unpin ! int starpu_sleep(float nb_sec); subroutine fstarpu_sleep (nb_sec) bind(C,name="starpu_sleep") use iso_c_binding, only: c_float real(c_float), value, intent(in) :: nb_sec end subroutine fstarpu_sleep ! int starpu_usleep(float nb_sec); subroutine fstarpu_usleep (nb_sec) bind(C,name="starpu_usleep") use iso_c_binding, only: c_float real(c_float), value, intent(in) :: nb_sec end subroutine fstarpu_usleep ! void starpu_cublas_init(void); subroutine fstarpu_cublas_init () bind(C,name="starpu_cublas_init") end subroutine fstarpu_cublas_init ! void starpu_cublas_shutdown(void); subroutine fstarpu_cublas_shutdown () bind(C,name="starpu_cublas_shutdown") end subroutine fstarpu_cublas_shutdown end interface contains function or_cptrs(op1,op2) type(c_ptr) :: or_cptrs type(c_ptr),intent(in) :: op1,op2 integer(c_intptr_t) :: i_op1,i_op2 i_op1 = transfer(op1,0_c_intptr_t) i_op2 = transfer(op2,0_c_intptr_t) or_cptrs = transfer(ior(i_op1,i_op2), C_NULL_PTR) end function function ip_to_p(i) bind(C) use iso_c_binding, only: c_ptr,c_intptr_t,C_NULL_PTR type(c_ptr) :: ip_to_p integer(c_intptr_t), value, intent(in) :: i ip_to_p = transfer(i,C_NULL_PTR) end function ip_to_p function p_to_ip(p) bind(C) use iso_c_binding, only: c_ptr,c_intptr_t integer(c_intptr_t) :: p_to_ip type(c_ptr), value, intent(in) :: p p_to_ip = transfer(p,0_c_intptr_t) end function p_to_ip function sz_to_p(sz) bind(C) use iso_c_binding, only: c_ptr,c_size_t,c_intptr_t type(c_ptr) :: sz_to_p integer(c_size_t), value, intent(in) :: sz sz_to_p = ip_to_p(int(sz,kind=c_intptr_t)) end function sz_to_p function fstarpu_init (conf) bind(C) use iso_c_binding integer(c_int) :: fstarpu_init type(c_ptr), value, intent(in) :: conf real(c_double) :: FSTARPU_SZ_C_DOUBLE_dummy real(c_float) :: FSTARPU_SZ_C_FLOAT_dummy character(c_char) :: FSTARPU_SZ_C_CHAR_dummy integer(c_int) :: FSTARPU_SZ_C_INT_dummy integer(c_intptr_t) :: FSTARPU_SZ_C_INTPTR_T_dummy type(c_ptr) :: FSTARPU_SZ_C_PTR_dummy integer(c_size_t) :: FSTARPU_SZ_C_SIZE_T_dummy character :: FSTARPU_SZ_CHARACTER_dummy integer :: FSTARPU_SZ_INTEGER_dummy integer(4) :: FSTARPU_SZ_INT4_dummy integer(8) :: FSTARPU_SZ_INT8_dummy real :: FSTARPU_SZ_REAL_dummy real(4) :: FSTARPU_SZ_REAL4_dummy real(8) :: FSTARPU_SZ_REAL8_dummy double precision :: FSTARPU_SZ_DOUBLE_PRECISION_dummy complex :: FSTARPU_SZ_COMPLEX_dummy complex(4) :: FSTARPU_SZ_COMPLEX4_dummy complex(8) :: FSTARPU_SZ_COMPLEX8_dummy ! Note: Referencing global C constants from Fortran has ! been found unreliable on some architectures, notably ! on Darwin. The get_integer/get_pointer_constant ! scheme is a workaround to that issue. interface ! These functions are not exported to the end user function fstarpu_get_constant(s) bind(C) use iso_c_binding, only: c_ptr,c_char type(c_ptr) :: fstarpu_get_constant ! C function returns an intptr_t character(kind=c_char) :: s end function fstarpu_get_constant function fstarpu_init_internal (conf) bind(C,name="starpu_init") use iso_c_binding, only: c_ptr,c_int integer(c_int) :: fstarpu_init_internal type(c_ptr), value :: conf end function fstarpu_init_internal end interface ! Initialize Fortran constants from C peers FSTARPU_R = fstarpu_get_constant(C_CHAR_"FSTARPU_R"//C_NULL_CHAR) FSTARPU_W = fstarpu_get_constant(C_CHAR_"FSTARPU_W"//C_NULL_CHAR) FSTARPU_RW = fstarpu_get_constant(C_CHAR_"FSTARPU_RW"//C_NULL_CHAR) FSTARPU_SCRATCH = fstarpu_get_constant(C_CHAR_"FSTARPU_SCRATCH"//C_NULL_CHAR) FSTARPU_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_REDUX"//C_NULL_CHAR) FSTARPU_MPI_REDUX = fstarpu_get_constant(C_CHAR_"FSTARPU_MPI_REDUX"//C_NULL_CHAR) FSTARPU_COMMUTE = fstarpu_get_constant(C_CHAR_"FSTARPU_COMMUTE"//C_NULL_CHAR) FSTARPU_SSEND = fstarpu_get_constant(C_CHAR_"FSTARPU_SSEND"//C_NULL_CHAR) FSTARPU_LOCALITY = fstarpu_get_constant(C_CHAR_"FSTARPU_LOCALITY"//C_NULL_CHAR) FSTARPU_DATA_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_ARRAY"//C_NULL_CHAR) FSTARPU_DATA_MODE_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_DATA_MODE_ARRAY"//C_NULL_CHAR) FSTARPU_CL_ARGS = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS"//C_NULL_CHAR) FSTARPU_CL_ARGS_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CL_ARGS_NFREE"//C_NULL_CHAR) FSTARPU_TASK_DEPS_ARRAY = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_DEPS_ARRAY"//C_NULL_CHAR) FSTARPU_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK"//C_NULL_CHAR) FSTARPU_CALLBACK_WITH_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG"//C_NULL_CHAR) FSTARPU_CALLBACK_WITH_ARG_NFREE = & fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_WITH_ARG_NFREE"//C_NULL_CHAR) FSTARPU_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG"//C_NULL_CHAR) FSTARPU_CALLBACK_ARG_NFREE = fstarpu_get_constant(C_CHAR_"FSTARPU_CALLBACK_ARG_NFREE"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_ARG = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE = & fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_POP = fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_POP_ARG = & fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG"//C_NULL_CHAR) FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE = & fstarpu_get_constant(C_CHAR_"FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE"//C_NULL_CHAR) FSTARPU_PRIORITY = fstarpu_get_constant(C_CHAR_"FSTARPU_PRIORITY"//C_NULL_CHAR) FSTARPU_EXECUTE_ON_NODE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_NODE"//C_NULL_CHAR) FSTARPU_EXECUTE_ON_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_DATA"//C_NULL_CHAR) FSTARPU_EXECUTE_ON_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_ON_WORKER"//C_NULL_CHAR) FSTARPU_WORKER_ORDER = fstarpu_get_constant(C_CHAR_"FSTARPU_WORKER_ORDER"//C_NULL_CHAR) FSTARPU_EXECUTE_WHERE = fstarpu_get_constant(C_CHAR_"FSTARPU_EXECUTE_WHERE"//C_NULL_CHAR) FSTARPU_HYPERVISOR_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_HYPERVISOR_TAG"//C_NULL_CHAR) FSTARPU_POSSIBLY_PARALLEL = fstarpu_get_constant(C_CHAR_"FSTARPU_POSSIBLY_PARALLEL"//C_NULL_CHAR) FSTARPU_FLOPS = fstarpu_get_constant(C_CHAR_"FSTARPU_FLOPS"//C_NULL_CHAR) FSTARPU_TAG = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG"//C_NULL_CHAR) FSTARPU_TAG_ONLY = fstarpu_get_constant(C_CHAR_"FSTARPU_TAG_ONLY"//C_NULL_CHAR) FSTARPU_NAME = fstarpu_get_constant(C_CHAR_"FSTARPU_NAME"//C_NULL_CHAR) FSTARPU_NODE_SELECTION_POLICY = fstarpu_get_constant(C_CHAR_"FSTARPU_NODE_SELECTION_POLICY"//C_NULL_CHAR) FSTARPU_TASK_SCHED_DATA = fstarpu_get_constant(C_CHAR_"FSTARPU_TASK_SCHED_DATA"//C_NULL_CHAR) FSTARPU_VALUE = fstarpu_get_constant(C_CHAR_"FSTARPU_VALUE"//C_NULL_CHAR) FSTARPU_SCHED_CTX = fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX"//C_NULL_CHAR) FSTARPU_CPU_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CPU_WORKER"//C_NULL_CHAR) FSTARPU_CUDA_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_WORKER"//C_NULL_CHAR) FSTARPU_OPENCL_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_WORKER"//C_NULL_CHAR) FSTARPU_ANY_WORKER = fstarpu_get_constant(C_CHAR_"FSTARPU_ANY_WORKER"//C_NULL_CHAR) FSTARPU_NMAXBUFS = int(p_to_ip(fstarpu_get_constant(C_CHAR_"FSTARPU_NMAXBUFS"//C_NULL_CHAR)),c_int) FSTARPU_SCHED_CTX_POLICY_NAME = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_NAME"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_STRUCT = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_STRUCT"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_MIN_PRIO = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MIN_PRIO"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_MAX_PRIO = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_MAX_PRIO"//C_NULL_CHAR) FSTARPU_SCHED_CTX_HIERARCHY_LEVEL = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_HIERARCHY_LEVEL"//C_NULL_CHAR) FSTARPU_SCHED_CTX_NESTED = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_NESTED"//C_NULL_CHAR) FSTARPU_SCHED_CTX_AWAKE_WORKERS = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_AWAKE_WORKERS"//C_NULL_CHAR) FSTARPU_SCHED_CTX_POLICY_INIT = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_POLICY_INIT"//C_NULL_CHAR) FSTARPU_SCHED_CTX_USER_DATA = & fstarpu_get_constant(C_CHAR_"FSTARPU_SCHED_CTX_USER_DATA"//C_NULL_CHAR) FSTARPU_NOWHERE = & fstarpu_get_constant(C_CHAR_"FSTARPU_NOWHERE"//C_NULL_CHAR) FSTARPU_CPU = & fstarpu_get_constant(C_CHAR_"FSTARPU_CPU"//C_NULL_CHAR) FSTARPU_CUDA = & fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA"//C_NULL_CHAR) FSTARPU_OPENCL = & fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL"//C_NULL_CHAR) FSTARPU_CODELET_SIMGRID_EXECUTE = & fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE"//C_NULL_CHAR) FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT = & fstarpu_get_constant(C_CHAR_"FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT"//C_NULL_CHAR) FSTARPU_CUDA_ASYNC = & fstarpu_get_constant(C_CHAR_"FSTARPU_CUDA_ASYNC"//C_NULL_CHAR) FSTARPU_OPENCL_ASYNC = & fstarpu_get_constant(C_CHAR_"FSTARPU_OPENCL_ASYNC"//C_NULL_CHAR) !FSTARPU_PER_WORKER = & ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_WORKER"//C_NULL_CHAR) !FSTARPU_PER_ARCH = & ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_ARCH"//C_NULL_CHAR) !FSTARPU_PER_COMMON = & ! fstarpu_get_constant(C_CHAR_"FSTARPU_PER_COMMON"//C_NULL_CHAR) FSTARPU_HISTORY_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_HISTORY_BASED"//C_NULL_CHAR) FSTARPU_REGRESSION_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_REGRESSION_BASED"//C_NULL_CHAR) FSTARPU_NL_REGRESSION_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_NL_REGRESSION_BASED"//C_NULL_CHAR) FSTARPU_MULTIPLE_REGRESSION_BASED = & fstarpu_get_constant(C_CHAR_"FSTARPU_MULTIPLE_REGRESSION_BASED"//C_NULL_CHAR) FSTARPU_SEQ = & fstarpu_get_constant(C_CHAR_"FSTARPU_SEQ"//C_NULL_CHAR) FSTARPU_SPMD = & fstarpu_get_constant(C_CHAR_"FSTARPU_SPMD"//C_NULL_CHAR) FSTARPU_FORKJOIN = & fstarpu_get_constant(C_CHAR_"FSTARPU_FORKJOIN"//C_NULL_CHAR) ! Initialize size constants as 'c_ptr' FSTARPU_SZ_C_DOUBLE = sz_to_p(c_sizeof(FSTARPU_SZ_C_DOUBLE_dummy)) FSTARPU_SZ_C_FLOAT = sz_to_p(c_sizeof(FSTARPU_SZ_C_FLOAT_dummy)) FSTARPU_SZ_C_CHAR = sz_to_p(c_sizeof(FSTARPU_SZ_C_CHAR_dummy)) FSTARPU_SZ_C_INT = sz_to_p(c_sizeof(FSTARPU_SZ_C_INT_dummy)) FSTARPU_SZ_C_INTPTR_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_INTPTR_T_dummy)) FSTARPU_SZ_C_PTR = sz_to_p(c_sizeof(FSTARPU_SZ_C_PTR_dummy)) FSTARPU_SZ_C_SIZE_T = sz_to_p(c_sizeof(FSTARPU_SZ_C_SIZE_T_dummy)) FSTARPU_SZ_CHARACTER = sz_to_p(c_sizeof(FSTARPU_SZ_CHARACTER_dummy)) FSTARPU_SZ_INTEGER = sz_to_p(c_sizeof(FSTARPU_SZ_INTEGER_dummy)) FSTARPU_SZ_INT4 = sz_to_p(c_sizeof(FSTARPU_SZ_INT4_dummy)) FSTARPU_SZ_INT8 = sz_to_p(c_sizeof(FSTARPU_SZ_INT8_dummy)) FSTARPU_SZ_REAL = sz_to_p(c_sizeof(FSTARPU_SZ_REAL_dummy)) FSTARPU_SZ_REAL4 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL4_dummy)) FSTARPU_SZ_REAL8 = sz_to_p(c_sizeof(FSTARPU_SZ_REAL8_dummy)) FSTARPU_SZ_DOUBLE_PRECISION = sz_to_p(c_sizeof(FSTARPU_SZ_DOUBLE_PRECISION_dummy)) FSTARPU_SZ_COMPLEX = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX_dummy)) FSTARPU_SZ_COMPLEX4 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX4_dummy)) FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) FSTARPU_SZ_COMPLEX8 = sz_to_p(c_sizeof(FSTARPU_SZ_COMPLEX8_dummy)) FSTARPU_DEFAULT_PRIO = int(p_to_ip(& fstarpu_get_constant(C_CHAR_"FSTARPU_DEFAULT_PRIO"//C_NULL_CHAR)),c_int) ! Initialize StarPU if (c_associated(conf)) then fstarpu_init = fstarpu_init_internal(conf) else fstarpu_init = fstarpu_init_internal(C_NULL_PTR) end if end function fstarpu_init function fstarpu_csizet_to_cptr(i) bind(C) use iso_c_binding type(c_ptr) :: fstarpu_csizet_to_cptr integer(c_size_t) :: i fstarpu_csizet_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) end function fstarpu_csizet_to_cptr function fstarpu_int_to_cptr(i) bind(C) use iso_c_binding type(c_ptr) :: fstarpu_int_to_cptr integer(c_int) :: i fstarpu_int_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) end function fstarpu_int_to_cptr function fstarpu_long_to_cptr(i) bind(C) use iso_c_binding type(c_ptr) :: fstarpu_long_to_cptr integer(c_long) :: i fstarpu_long_to_cptr = transfer(int(i,kind=c_intptr_t),C_NULL_PTR) end function fstarpu_long_to_cptr ! Note: do not add binding declarations here in 'CONTAINS' ! section, because the compiler generates empty functions for ! them. ! Instead, put binding declarations in the 'INTERFACE' section ! above. end module fstarpu_mod starpu-1.4.9+dfsg/mpi/examples/native_fortran/fstarpu_mpi_mod.f90000066400000000000000000001320341507764646700251470ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! module fstarpu_mpi_mod use iso_c_binding use fstarpu_mod implicit none interface ! == mpi/include/starpu_mpi.h == ! int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); function fstarpu_mpi_isend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_isend ! == mpi/include/starpu_mpi.h == ! int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); function fstarpu_mpi_isend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_prio type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_isend_prio ! int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm); function fstarpu_mpi_irecv (dh, mpi_req, src, data_tag, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_irecv type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_irecv ! int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); function fstarpu_mpi_send (dh, dst, data_tag, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_send type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_send ! int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); function fstarpu_mpi_send_prio (dh, dst, data_tag, prio, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_send_prio type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_send_prio ! int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status); function fstarpu_mpi_recv (dh, src, data_tag, mpi_comm, mpi_status) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: mpi_status end function fstarpu_mpi_recv ! int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_isend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_detached type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_isend_detached ! int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_isend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_detached_prio type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_isend_detached_prio ! int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_recv_detached (dh, src, data_tag, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv_detached type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_recv_detached ! int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); function fstarpu_mpi_issend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_issend type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_issend ! int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); function fstarpu_mpi_issend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_issend_prio type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_issend_prio ! int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_issend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_issend_detached type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_issend_detached ! int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_issend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_issend_detached_prio type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_issend_detached_prio ! int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status); function fstarpu_mpi_wait(req,st) bind(C,name="starpu_mpi_wait") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_wait type(c_ptr), value, intent(in) :: req type(c_ptr), value, intent(in) :: st end function fstarpu_mpi_wait ! int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status); function fstarpu_mpi_test(req,flag,st) bind(C,name="starpu_mpi_test") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_test type(c_ptr), value, intent(in) :: req type(c_ptr), value, intent(in) :: flag type(c_ptr), value, intent(in) :: st end function fstarpu_mpi_test ! int starpu_mpi_barrier(MPI_Comm comm); function fstarpu_mpi_barrier (mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_barrier integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_barrier ! int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency); function fstarpu_mpi_recv_detached_sequential_consistency (dh, src, data_tag, mpi_comm, callback, arg, seq_const) & bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv_detached_sequential_consistency type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg integer(c_int), value, intent(in) :: seq_const end function fstarpu_mpi_recv_detached_sequential_consistency ! int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm); ! -> cf fstarpu_mpi_init ! int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi); ! -> cf fstarpu_mpi_init ! int starpu_mpi_initialize(void) STARPU_DEPRECATED; ! -> cf fstarpu_mpi_init ! int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED; ! -> cf fstarpu_mpi_init ! int starpu_mpi_shutdown(void); function fstarpu_mpi_shutdown () bind(C,name="starpu_mpi_shutdown") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_shutdown end function fstarpu_mpi_shutdown ! struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); function fstarpu_mpi_task_build(arglist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_mpi_task_build type(c_ptr), dimension(*), intent(in) :: arglist end function fstarpu_mpi_task_build ! int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); subroutine fstarpu_mpi_task_post_build(arglist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_mpi_task_post_build ! int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...); subroutine fstarpu_mpi_task_insert(arglist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_mpi_task_insert subroutine fstarpu_mpi_insert_task(arglist) bind(C,name="fstarpu_mpi_task_insert") use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_mpi_insert_task ! void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node); subroutine fstarpu_mpi_get_data_on_node(mpi_comm,dh,node) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpu_mpi_get_data_on_node ! void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg); subroutine fstarpu_mpi_get_data_on_node_detached(mpi_comm,dh,node,callback,arg) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end subroutine fstarpu_mpi_get_data_on_node_detached ! void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle); subroutine fstarpu_mpi_redux_data(mpi_comm,dh) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_mpi_redux_data ! void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio); subroutine fstarpu_mpi_redux_data_prio(mpi_comm,dh, prio) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: prio end subroutine fstarpu_mpi_redux_data_prio ! void starpu_mpi_redux_data_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int arity); subroutine fstarpu_mpi_redux_data_tree(mpi_comm,dh, arity) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: arity end subroutine fstarpu_mpi_redux_data_tree ! void starpu_mpi_redux_data_prio_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int prio, int arity); subroutine fstarpu_mpi_redux_data_prio_tree(mpi_comm,dh, prio, arity) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: arity end subroutine fstarpu_mpi_redux_data_prio_tree ! int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); function fstarpu_mpi_scatter_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_scatter_detached type(c_ptr), intent(in) :: dhs(*) integer(c_int), value, intent(in) :: cnt integer(c_int), value, intent(in) :: root integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: scallback type(c_ptr), value, intent(in) :: sarg type(c_funptr), value, intent(in) :: rcallback type(c_ptr), value, intent(in) :: rarg end function fstarpu_mpi_scatter_detached ! int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); function fstarpu_mpi_gather_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_gather_detached type(c_ptr), intent(in) :: dhs(*) integer(c_int), value, intent(in) :: cnt integer(c_int), value, intent(in) :: root integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: scallback type(c_ptr), value, intent(in) :: sarg type(c_funptr), value, intent(in) :: rcallback type(c_ptr), value, intent(in) :: rarg end function fstarpu_mpi_gather_detached ! int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); function fstarpu_mpi_isend_detached_unlock_tag (dh, dst, data_tag, mpi_comm, starpu_tag) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_isend_detached_unlock_tag ! int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag); function fstarpu_mpi_isend_detached_unlock_tag_prio (dh, dst, data_tag, prio, mpi_comm, starpu_tag) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag_prio type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_isend_detached_unlock_tag_prio ! int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); function fstarpu_mpi_recv_detached_unlock_tag (dh, src, data_tag, mpi_comm, starpu_tag) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv_detached_unlock_tag type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_recv_detached_unlock_tag ! int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, MPI_Comm *comm, starpu_tag_t tag); function fstarpu_mpi_isend_array_detached_unlock_tag (array_size, dhs, dsts, data_tags, mpi_comms, starpu_tag) & bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag integer(c_int), value, intent(in) :: array_size type(c_ptr), intent(in) :: dhs(*) integer(c_int), intent(in) :: dsts(*) integer(c_int64_t), intent(in) :: data_tags(*) integer(c_int), intent(in) :: mpi_comms(*) type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_isend_array_detached_unlock_tag ! int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag); function fstarpu_mpi_isend_array_detached_unlock_tag_prio (array_size, dhs, dsts, data_tags, prio, mpi_comms, & starpu_tag) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag_prio integer(c_int), value, intent(in) :: array_size type(c_ptr), intent(in) :: dhs(*) integer(c_int), intent(in) :: dsts(*) integer(c_int64_t), intent(in) :: data_tags(*) integer(c_int), intent(in) :: prio(*) integer(c_int), intent(in) :: mpi_comms(*) type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_isend_array_detached_unlock_tag_prio ! int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *data_tag, MPI_Comm *comm, starpu_tag_t tag); function fstarpu_mpi_recv_array_detached_unlock_tag (array_size, dhs, srcs, data_tags, mpi_comms, starpu_tag) & bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv_array_detached_unlock_tag integer(c_int), value, intent(in) :: array_size type(c_ptr), intent(in) :: dhs(*) integer(c_int), intent(in) :: srcs(*) integer(c_int64_t), intent(in) :: data_tags(*) integer(c_int), intent(in) :: mpi_comms(*) type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_recv_array_detached_unlock_tag ! void starpu_mpi_comm_stats_retrieve(size_t *comm_stats); subroutine fstarpu_mpi_comm_stats_retrieve (comm_stats) bind(C,name="starpu_mpi_comm_stats_retrieve") use iso_c_binding implicit none integer(c_size_t), intent(in) :: comm_stats(*) end subroutine fstarpu_mpi_comm_stats_retrieve ! void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle); subroutine fstarpu_mpi_cache_flush(mpi_comm,dh) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_mpi_cache_flush ! void starpu_mpi_cache_flush_all_data(MPI_Comm comm); subroutine fstarpu_mpi_cache_flush_all_data(mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm end subroutine fstarpu_mpi_cache_flush_all_data ! int starpu_mpi_comm_size(MPI_Comm comm, int *size); function fstarpu_mpi_comm_size(mpi_comm,sz) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm integer(c_int), intent(out) :: sz integer(c_int) :: fstarpu_mpi_comm_size end function fstarpu_mpi_comm_size ! int starpu_mpi_comm_rank(MPI_Comm comm, int *rank); function fstarpu_mpi_comm_rank(mpi_comm,rank) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm integer(c_int), intent(out) :: rank integer(c_int) :: fstarpu_mpi_comm_rank end function fstarpu_mpi_comm_rank ! int starpu_mpi_world_rank(void); function fstarpu_mpi_world_rank() bind(C,name="starpu_mpi_world_rank") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_world_rank end function fstarpu_mpi_world_rank ! int starpu_mpi_world_size(void); function fstarpu_mpi_world_size() bind(C,name="starpu_mpi_world_size") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_world_size end function fstarpu_mpi_world_size ! int starpu_mpi_world_size(void); function fstarpu_mpi_world_comm() bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_world_comm end function fstarpu_mpi_world_comm ! void starpu_mpi_comm_stats_enable() subroutine fstarpu_mpi_comm_stats_enable() bind(C) use iso_c_binding implicit none end subroutine fstarpu_mpi_comm_stats_enable ! void starpu_mpi_comm_stats_disable() subroutine fstarpu_mpi_comm_stats_disable() bind(C) use iso_c_binding implicit none end subroutine fstarpu_mpi_comm_stats_disable ! int starpu_mpi_get_communication_tag(void); function fstarpu_mpi_get_communication_tag() bind(C,name="starpu_mpi_get_communication_tag") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_get_communication_tag end function fstarpu_mpi_get_communication_tag ! void starpu_mpi_set_communication_tag(int tag); subroutine fstarpu_mpi_set_communication_tag(tag) bind(C,name="starpu_mpi_set_communication_tag") use iso_c_binding implicit none integer(c_int64_t), value, intent(in) :: tag end subroutine fstarpu_mpi_set_communication_tag ! void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm); subroutine fstarpu_mpi_data_register_comm(dh,tag,rank,mpi_comm) bind(C) use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int64_t), value, intent(in) :: tag integer(c_int), value, intent(in) :: rank integer(c_int), value, intent(in) :: mpi_comm end subroutine fstarpu_mpi_data_register_comm ! #define starpu_mpi_data_register(data_handle, tag, rank) starpu_mpi_data_register_comm(data_handle, tag, rank, MPI_COMM_WORLD) subroutine fstarpu_mpi_data_register(dh,tag,rank) bind(C) use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int64_t), value, intent(in) :: tag integer(c_int), value, intent(in) :: rank end subroutine fstarpu_mpi_data_register ! void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm); subroutine fstarpu_mpi_data_set_rank_comm(dh,rank,mpi_comm) bind(C) use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: rank integer(c_int), value, intent(in) :: mpi_comm end subroutine fstarpu_mpi_data_set_rank_comm ! #define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD) subroutine fstarpu_mpi_data_set_rank(dh,rank) bind(C) use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: rank end subroutine fstarpu_mpi_data_set_rank ! void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag); subroutine fstarpu_mpi_data_set_tag(dh,tag) bind(C,name="starpu_mpi_data_set_tag") use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int64_t), value, intent(in) :: tag end subroutine fstarpu_mpi_data_set_tag ! int starpu_mpi_data_get_rank(starpu_data_handle_t handle); function fstarpu_mpi_data_get_rank(dh) bind(C,name="starpu_mpi_data_get_rank") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_data_get_rank type(c_ptr), value, intent(in) :: dh end function fstarpu_mpi_data_get_rank ! int starpu_mpi_data_get_tag(starpu_data_handle_t handle); function fstarpu_mpi_data_get_tag(dh) bind(C,name="starpu_mpi_data_get_tag") use iso_c_binding implicit none integer(c_int64_t) :: fstarpu_mpi_data_get_tag type(c_ptr), value, intent(in) :: dh end function fstarpu_mpi_data_get_tag ! void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int rank); subroutine fstarpu_mpi_data_migrate(mpi_comm,dh,rank) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: rank end subroutine fstarpu_mpi_data_migrate ! #define STARPU_MPI_NODE_SELECTION_CURRENT_POLICY -1 ! #define STARPU_MPI_NODE_SELECTION_MOST_R_DATA 0 ! int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func); function fstarpu_mpi_node_selection_register_policy(policy_func) & bind(C,name="starpu_mpi_node_selection_register_policy") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_node_selection_register_policy type(c_funptr), value, intent(in) :: policy_func end function fstarpu_mpi_node_selection_register_policy ! int starpu_mpi_node_selection_unregister_policy(int policy); function fstarpu_mpi_node_selection_unregister_policy(policy) & bind(C,name="starpu_mpi_node_selection_unregister_policy") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_node_selection_unregister_policy type(c_ptr), value, intent(in) :: policy end function fstarpu_mpi_node_selection_unregister_policy ! int starpu_mpi_node_selection_get_current_policy(); function fstarpu_mpi_data_selection_get_current_policy() & bind(C,name="starpu_mpi_data_selection_get_current_policy") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_data_selection_get_current_policy end function fstarpu_mpi_data_selection_get_current_policy ! int starpu_mpi_node_selection_set_current_policy(int policy); function fstarpu_mpi_data_selection_set_current_policy(policy) & bind(C,name="starpu_mpi_data_selection_set_current_policy") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_data_selection_set_current_policy type(c_ptr), value, intent(in) :: policy end function fstarpu_mpi_data_selection_set_current_policy ! int starpu_mpi_cache_is_enabled(); function fstarpu_mpi_cache_is_enabled() bind(C,name="starpu_mpi_cache_is_enabled") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_cache_is_enabled end function fstarpu_mpi_cache_is_enabled ! int starpu_mpi_cache_set(int enabled); function fstarpu_mpi_cache_set(enabled) bind(C,name="starpu_mpi_cache_set") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_cache_set integer(c_int), value, intent(in) :: enabled end function fstarpu_mpi_cache_set ! int starpu_mpi_wait_for_all(MPI_Comm comm); function fstarpu_mpi_wait_for_all (mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_wait_for_all integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_wait_for_all ! int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); function fstarpu_mpi_datatype_register(dh, alloc_func, free_func) bind(C,name="starpu_mpi_datatype_register") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_datatype_register type(c_ptr), value, intent(in) :: dh type(c_funptr), value, intent(in) :: alloc_func type(c_funptr), value, intent(in) :: free_func end function fstarpu_mpi_datatype_register ! int starpu_mpi_datatype_unregister(starpu_data_handle_t handle); function fstarpu_mpi_datatype_unregister(dh) bind(C,name="starpu_mpi_datatype_unregister") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_datatype_unregister type(c_ptr), value, intent(in) :: dh end function fstarpu_mpi_datatype_unregister function fstarpu_mpi_req_alloc() bind(C) use iso_c_binding implicit none type(c_ptr) :: fstarpu_mpi_req_alloc end function fstarpu_mpi_req_alloc subroutine fstarpu_mpi_req_free(req) bind(C) use iso_c_binding implicit none type(c_ptr),value,intent(in) :: req end subroutine fstarpu_mpi_req_free function fstarpu_mpi_status_alloc() bind(C) use iso_c_binding implicit none type(c_ptr) :: fstarpu_mpi_status_alloc end function fstarpu_mpi_status_alloc subroutine fstarpu_mpi_status_free(st) bind(C) use iso_c_binding implicit none type(c_ptr),value,intent(in) :: st end subroutine fstarpu_mpi_status_free end interface contains function fstarpu_mpi_init (initialize_mpi,mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_init integer(c_int), intent(in) :: initialize_mpi integer(c_int), optional, intent(in) :: mpi_comm type(c_ptr) :: argcv integer(c_int) :: fargc,i,farg_len character(len=1) :: farg_1 character(len=:), allocatable :: farg integer(c_int) :: mpi_comm_present, mpi_comm_or_0 integer(c_int) :: ret interface function fstarpu_mpi_argcv_alloc(argc, initialize_mpi, comm_present, comm) bind(C) use iso_c_binding implicit none type(c_ptr) :: fstarpu_mpi_argcv_alloc integer(c_int),value,intent(in) :: argc integer(c_int),value,intent(in) :: initialize_mpi integer(c_int),value,intent(in) :: comm_present integer(c_int),value,intent(in) :: comm end function fstarpu_mpi_argcv_alloc subroutine fstarpu_mpi_argcv_set_arg(argcv, i, l, s) bind(C) use iso_c_binding implicit none type(c_ptr),value,intent(in) :: argcv integer(c_int),value,intent(in) :: i integer(c_int),value,intent(in) :: l character(c_char),intent(in) :: s end subroutine fstarpu_mpi_argcv_set_arg subroutine fstarpu_mpi_argcv_free(argcv) bind(C) use iso_c_binding implicit none type(c_ptr),value,intent(in) :: argcv end subroutine fstarpu_mpi_argcv_free function fstarpu_mpi_init_c(argcv) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_init_c type(c_ptr),value,intent(in) :: argcv end function fstarpu_mpi_init_c end interface fargc = command_argument_count() !write(*,*) "fargc",fargc if (present(mpi_comm)) then mpi_comm_present = 1 mpi_comm_or_0 = mpi_comm else mpi_comm_present = 0 mpi_comm_or_0 = 0 end if !write(*,*) "initialize_mpi",initialize_mpi !write(*,*) "mpi_comm_present",mpi_comm_present argcv = fstarpu_mpi_argcv_alloc(fargc, initialize_mpi, mpi_comm_present, mpi_comm_or_0) do i=0,fargc-1 call get_command_argument(i, farg_1, farg_len) allocate (character(len=farg_len) :: farg) call get_command_argument(i, farg) call fstarpu_mpi_argcv_set_arg(argcv, i, farg_len, farg) deallocate (farg) end do ret = fstarpu_mpi_init_c(argcv) call fstarpu_mpi_argcv_free(argcv) fstarpu_mpi_init = ret end function fstarpu_mpi_init end module fstarpu_mpi_mod starpu-1.4.9+dfsg/mpi/examples/native_fortran/nf_basic_ring.f90000066400000000000000000000077711507764646700245530ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! program nf_basic_ring use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module use fstarpu_mpi_mod ! StarPU-MPI interfacing module implicit none integer(c_int) :: ncpu integer(c_int) :: ret integer(c_int) :: rank,sz integer(c_int),target :: token = 42 integer(c_int) :: nloops = 32 integer(c_int) :: loop integer(c_int64_t) :: tag integer(c_int) :: world integer(c_int) :: src,dst type(c_ptr) :: token_dh, st ret = fstarpu_init(C_NULL_PTR) if (ret == -19) then stop 77 else if (ret /= 0) then stop 1 end if ret = fstarpu_mpi_init(1) print *,"fstarpu_mpi_init status:", ret if (ret /= 0) then stop 1 end if ! stop there if no CPU worker available ncpu = fstarpu_cpu_worker_get_count() if (ncpu == 0) then call fstarpu_shutdown() ret = fstarpu_mpi_shutdown() stop 77 end if world = fstarpu_mpi_world_comm() rank = fstarpu_mpi_world_rank() sz = fstarpu_mpi_world_size() write(*,*) "rank=", rank,"size=",sz,"world=",world if (sz < 2) then call fstarpu_shutdown() ret = fstarpu_mpi_shutdown() stop 77 end if call fstarpu_variable_data_register(token_dh, 0, c_loc(token), c_sizeof(token)) st = fstarpu_mpi_status_alloc() do loop=1,nloops tag = loop*sz+rank token = 0 if (loop == 1.and.rank == 0) then write(*,*) "rank=", rank,"token=",token else src = modulo((rank+sz-1),sz) write(*,*) "rank=", rank,"recv--> src =", src, "tag =", tag ret = fstarpu_mpi_recv(token_dh, src, tag, world, st) if (ret /= 0) then write(*,*) "fstarpu_mpi_recv failed" stop 1 end if write(*,*) "rank=", rank,"recv<--","token=",token token = token+1 end if if (loop == nloops.and.rank == (sz-1)) then call fstarpu_data_acquire(token_dh, FSTARPU_R) write(*,*) "finished: rank=", rank,"token=",token call fstarpu_data_release(token_dh) else dst = modulo((rank+1),sz) write(*,*) "rank=", rank,"send--> dst =", dst, "tag =", tag+1 ret = fstarpu_mpi_send(token_dh, dst, tag+1, world) if (ret /= 0) then write(*,*) "fstarpu_mpi_recv failed" stop 1 end if write(*,*) "rank=", rank,"send<--" end if end do call fstarpu_mpi_status_free(st) call fstarpu_data_unregister(token_dh) call fstarpu_shutdown() ret = fstarpu_mpi_shutdown() print *,"fstarpu_mpi_shutdown status:", ret if (ret /= 0) then stop 1 end if end program nf_basic_ring starpu-1.4.9+dfsg/mpi/examples/native_fortran/nf_mm.f90000066400000000000000000000170431507764646700230550ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! program nf_mm use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module use fstarpu_mpi_mod ! StarPU-MPI interfacing module use nf_mm_cl implicit none logical, parameter :: verbose = .false. integer(c_int) :: comm_size, comm_rank integer(c_int), target :: comm_world integer(c_int) :: N = 16, BS = 4, NB real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:) type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:) type(c_ptr) :: cl_mm integer(c_int) :: ncpu integer(c_int) :: ret integer(c_int) :: row, col integer(c_int) :: b_row, b_col integer(c_int) :: mr, rank integer(c_int64_t) :: tag ret = fstarpu_init(C_NULL_PTR) if (ret == -19) then stop 77 else if (ret /= 0) then stop 1 end if ret = fstarpu_mpi_init(1) print *,"fstarpu_mpi_init status:", ret if (ret /= 0) then stop 1 end if ! stop there if no CPU worker available ncpu = fstarpu_cpu_worker_get_count() if (ncpu == 0) then call fstarpu_shutdown() stop 77 end if comm_world = fstarpu_mpi_world_comm() comm_size = fstarpu_mpi_world_size() comm_rank = fstarpu_mpi_world_rank() if (comm_size < 2) then call fstarpu_shutdown() ret = fstarpu_mpi_shutdown() stop 77 end if ! TODO: process app's argc/argv NB = N/BS ! allocate and initialize codelet cl_mm = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(cl_mm, c_char_"nf_mm_cl"//c_null_char) call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_mult)) call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW) ! allocate matrices if (comm_rank == 0) then allocate(A(N,N)) allocate(B(N,N)) allocate(C(N,N)) end if ! init matrices if (comm_rank == 0) then do col=1,N do row=1,N if (row == col) then A(row,col) = 2 else A(row,col) = 0 end if B(row,col) = row*N+col C(row,col) = 0 end do end do if (verbose) then print *,"A" call mat_disp(A) print *,"B" call mat_disp(B) print *,"C" call mat_disp(C) end if end if ! allocate data handles allocate(dh_A(NB)) allocate(dh_B(NB)) allocate(dh_C(NB,NB)) ! register matrices if (comm_rank == 0) then mr = 0 ! TODO: use STARPU_MAIN_RAM constant else mr = -1 end if tag = 0 do b_row=1,NB if (comm_rank == 0) then call fstarpu_matrix_data_register(dh_A(b_row), mr, & c_loc( A(1+(b_row-1)*BS,1) ), N, BS, N, c_sizeof(A(1,1))) else call fstarpu_matrix_data_register(dh_A(b_row), mr, & c_null_ptr, N, BS, N, c_sizeof(A(1,1))) end if call fstarpu_mpi_data_register(dh_A(b_row), tag, 0) tag = tag+1 end do do b_col=1,NB if (comm_rank == 0) then call fstarpu_matrix_data_register(dh_B(b_col), mr, & c_loc( B(1,1+(b_col-1)*BS) ), N, N, BS, c_sizeof(B(1,1))) else call fstarpu_matrix_data_register(dh_B(b_col), mr, & c_null_ptr, N, N, BS, c_sizeof(B(1,1))) end if call fstarpu_mpi_data_register(dh_B(b_col), tag, 0) tag = tag+1 end do do b_col=1,NB do b_row=1,NB if (comm_rank == 0) then call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, & c_loc( C(1+(b_row-1)*BS,1+(b_col-1)*BS) ), N, BS, BS, c_sizeof(C(1,1))) else call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, & c_null_ptr, N, BS, BS, c_sizeof(C(1,1))) end if call fstarpu_mpi_data_register(dh_C(b_row,b_col), tag, 0) tag = tag+1 end do end do ! distribute matrix C do b_col=1,NB do b_row=1,NB rank = modulo(b_row+b_col, comm_size) call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), rank) end do end do do b_col=1,NB do b_row=1,NB call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, & FSTARPU_R, dh_A(b_row), & FSTARPU_R, dh_B(b_col), & FSTARPU_RW, dh_C(b_row,b_col), & C_NULL_PTR /)) end do end do call fstarpu_task_wait_for_all() ! undistribute matrix C do b_col=1,NB do b_row=1,NB call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), 0) end do end do ! unregister matrices do b_row=1,NB call fstarpu_data_unregister(dh_A(b_row)) end do do b_col=1,NB call fstarpu_data_unregister(dh_B(b_col)) end do do b_col=1,NB do b_row=1,NB call fstarpu_data_unregister(dh_C(b_row,b_col)) end do end do ! check result if (comm_rank == 0) then if (verbose) then print *,"final C" call mat_disp(C) end if do col=1,N do row=1,N if (abs(C(row,col) - 2*(row*N+col)) > 1.0) then print *, "check failed" stop 1 end if end do end do end if ! free handles deallocate(dh_A) deallocate(dh_B) deallocate(dh_C) ! free matrices if (comm_rank == 0) then deallocate(A) deallocate(B) deallocate(C) end if call fstarpu_codelet_free(cl_mm) call fstarpu_shutdown() ret = fstarpu_mpi_shutdown() print *,"fstarpu_mpi_shutdown status:", ret if (ret /= 0) then stop 1 end if end program nf_mm starpu-1.4.9+dfsg/mpi/examples/native_fortran/nf_mm_2dbc.f90000066400000000000000000000230131507764646700237410ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! program nf_mm_2dbc use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module use fstarpu_mpi_mod ! StarPU-MPI interfacing module use nf_mm_cl use nf_mm_cl_blas implicit none type block_type real(kind=c_double), allocatable :: c(:,:) type(c_ptr) :: h integer :: owner end type block_type type dsmat_type integer :: m, n, b type(block_type), allocatable :: blocks(:,:) end type dsmat_type logical, parameter :: verbose = .false. logical :: trace = .false. integer(c_int) :: comm_size, comm_rank integer(c_int), target :: comm_world integer :: bs integer(c_int) :: m, mb integer(c_int) :: n, nb integer(c_int) :: k, kb character(len=20) :: str type(dsmat_type),target :: A, B, C real(kind=c_double), target :: alpha, beta, zbeta type(c_ptr) :: cl_mm, cl_fill integer(c_int) :: ncpu integer(c_int) :: ret integer :: i, j, l, p , q, trial, t integer :: te, ts, tr real :: tf, gflops ret = fstarpu_init(C_NULL_PTR) if (ret == -19) then stop 77 else if (ret /= 0) then stop 1 end if ret = fstarpu_mpi_init(1) if (ret /= 0) then write(*,'("fstarpu_mpi_init status:",i4)') ret stop 1 end if ! stop there if no CPU worker available ncpu = fstarpu_cpu_worker_get_count() if (ncpu == 0) then call fstarpu_shutdown() stop 77 end if comm_world = fstarpu_mpi_world_comm() comm_size = fstarpu_mpi_world_size() comm_rank = fstarpu_mpi_world_rank() if (comm_size < 2) then call fstarpu_shutdown() ret = fstarpu_mpi_shutdown() stop 77 end if if (command_argument_count() >= 1) then call get_command_argument(1, value=str, length=i) read(str(1:i),*) m else m = 10 end if if (command_argument_count() >= 2) then call get_command_argument(2, value=str, length=i) read(str(1:i),*) n else n = 10 end if if (command_argument_count() >= 3) then call get_command_argument(3, value=str, length=i) read(str(1:i),*) k else k = 10 end if if (command_argument_count() >= 4) then call get_command_argument(4, value=str, length=i) read(str(1:i),*) bs else bs = 1 end if if (command_argument_count() >= 5) then call get_command_argument(5, value=str, length=i) read(str(1:i),*) p else p = 1 end if if (command_argument_count() >= 6) then call get_command_argument(6, value=str, length=i) read(str(1:i),*) q else q = 1 end if if (command_argument_count() >= 8) then call get_command_argument(7, value=str, length=i) read(str(1:i),*) t else t = 1 end if if (command_argument_count() == 8) then trace = .true. end if if (mod(m,bs).ne.0) stop 75 if (mod(n,bs).ne.0) stop 75 if (mod(k,bs).ne.0) stop 75 mb = m/bs nb = n/bs kb = k/bs if (comm_rank.eq.0) then write(*,'("========================================")') write(*,'("mxnxk = ",i5,"x",i5,"x",i5)') m, n, k write(*,'("mbxnbxkb = ",i5,"x",i5,"x",i5)') mb, nb, kb write(*,'("B = ",i5)') bs write(*,'("PxQ = ",i3,"x",i3)') p,q write(*,'("trace = ",l)') trace write(*,'("========================================")') end if ret = fstarpu_mpi_barrier(comm_world) ! initialize codelets call initialize_codelets() alpha = 0.42 beta = 3.14 do trial=1,t ! allocate matrices call initialize_matrix(a,mb,kb,"A") call initialize_matrix(b,kb,nb,"B") call initialize_matrix(c,mb,nb,"C") ret = fstarpu_mpi_barrier(comm_world) call fill_matrix(A, mb,kb,"A") ret = fstarpu_mpi_wait_for_all(comm_world) ret = fstarpu_mpi_barrier(comm_world) call fill_matrix(B, kb,nb,"B") ret = fstarpu_mpi_wait_for_all(comm_world) ret = fstarpu_mpi_barrier(comm_world) call fill_matrix(C, mb,nb,"C") ret = fstarpu_mpi_wait_for_all(comm_world) ret = fstarpu_mpi_barrier(comm_world) call system_clock(ts) ! submit matrix multiplication do i=1,mb do j=1,nb do l=1,kb ! if (comm_rank.eq.0) write(*,*) "GEMM", b_col,b_row,b_aisle if (l.eq.1) then; zbeta = beta; else; zbeta = 1.0d0; end if call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, & FSTARPU_VALUE, c_loc(alpha), FSTARPU_SZ_REAL8, & FSTARPU_VALUE, c_loc(zbeta), FSTARPU_SZ_REAL8, & FSTARPU_R, A%blocks(i,l)%h, & FSTARPU_R, B%blocks(l,j)%h, & FSTARPU_RW, C%blocks(i,j)%h, & c_null_ptr /)) end do end do end do ret = fstarpu_mpi_wait_for_all(comm_world) ret = fstarpu_mpi_barrier(comm_world) call system_clock(te,tr) tf = max(real(te-ts)/real(tr),1e-20) gflops = 2.0*m*n*k/(tf*10**9) if (comm_rank.eq.0) write(*,'("RANK ",i3," -> took ",e15.8," s | ", e15.8,"Gflop/s")') & comm_rank, tf, gflops ! unregister matrices call unregister_matrix(A,mb,kb) call unregister_matrix(B,kb,nb) call unregister_matrix(C,mb,nb) end do call fstarpu_codelet_free(cl_mm) call fstarpu_codelet_free(cl_fill) call fstarpu_shutdown() ret = fstarpu_mpi_shutdown() if (ret /= 0) then write(*,'("fstarpu_mpi_shutdown status:",i4)') ret stop 1 end if contains subroutine initialize_codelets() implicit none cl_mm = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(cl_mm, c_char_"nf_gemm_cl"//c_null_char) call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_gemm)) call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW) cl_fill = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(cl_fill, c_char_"nf_fill_cl"//c_null_char) call fstarpu_codelet_add_cpu_func(cl_fill, C_FUNLOC(cl_cpu_fill)) call fstarpu_codelet_add_buffer(cl_fill, FSTARPU_W) end subroutine initialize_codelets subroutine initialize_matrix(X,mb,nb,cname) implicit none type(dsmat_type), target :: x integer :: mb, nb character :: cname integer :: i, j type(block_type), pointer :: xij integer(c_int64_t), save :: tag = 1 x%m = mb*bs x%n = nb*bs x%b = bs allocate(x%blocks(mb,nb)) do i=1,mb do j=1,nb xij => x%blocks(i,j) xij%owner = mod(i-1,p)*q + mod(j-1,q) if (comm_rank.eq.xij%owner) then ! write(*,*) comm_rank,"] I own ",cname,"_",i,j,"so I register it with tag",tag allocate(xij%c(bs,bs)) call fstarpu_matrix_data_register( xij%h, 0, c_loc( xij%c(1,1) ), & bs, bs, bs, c_sizeof(xij%c(1,1)) ) else ! write(*,*) comm_rank,"] ",xij%owner," owns ",cname,"_",i,j,"so it registers it with tag",tag call fstarpu_matrix_data_register( xij%h, -1, c_null_ptr, & bs, bs, bs, c_sizeof(alpha) ) end if call fstarpu_mpi_data_register(xij%h, tag, xij%owner) tag = tag + 1 end do end do end subroutine initialize_matrix subroutine fill_matrix(x,mb,nb,cname) implicit none type(dsmat_type), target :: x integer :: mb, nb character :: cname integer :: i, j type(block_type), pointer :: xij do i=1,mb do j=1,nb xij => x%blocks(i,j) if (comm_rank.eq.xij%owner) then ! write(*,*) comm_rank,"] I own ",cname,"_",i,j,"so I fill it" call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_fill, & FSTARPU_W, xij%h, & c_null_ptr /)) else !write(*,*) comm_rank,"] ",xij%owner,"owns ",cname,"_",i,j,"so it fills it" end if end do end do end subroutine fill_matrix subroutine unregister_matrix(x,mb,nb) implicit none integer :: mb, nb type(block_type), pointer :: xij type(dsmat_type), target :: x integer :: i, j do i=1,mb do j=1,nb xij => x%blocks(i,j) call fstarpu_data_unregister(xij%h) if (comm_rank.eq.xij%owner) then deallocate(xij%c) end if end do end do deallocate(x%blocks) end subroutine unregister_matrix end program starpu-1.4.9+dfsg/mpi/examples/native_fortran/nf_mm_cl.f90000066400000000000000000000056651507764646700235420ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! module nf_mm_cl contains subroutine mat_disp (m) ! declared here so it can be used both for the ! program and for debugging codelet routines use iso_c_binding ! C interfacing module implicit none real(kind=c_double) :: m(:,:) integer i,j do i=lbound(m,1),ubound(m,1) write(*, fmt="(A2) ",advance="no") "| " do j=lbound(m,2),ubound(m,2) write(*, fmt="(F6.1,A1) ", advance="no") m(i,j)," " end do write(*,*) "|" end do write(*,*) end subroutine recursive subroutine cl_cpu_mult (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused real(kind=c_double),pointer :: A(:,:), B(:,:), C(:,:) integer :: ld_A,nx_A,ny_A integer :: ld_B,nx_B,ny_B integer :: ld_C,nx_C,ny_C integer :: i,j,k ld_A = fstarpu_matrix_get_ld(buffers, 0) ld_B = fstarpu_matrix_get_ld(buffers, 1) ld_C = fstarpu_matrix_get_ld(buffers, 2) nx_A = fstarpu_matrix_get_nx(buffers, 0) nx_B = fstarpu_matrix_get_nx(buffers, 1) nx_C = fstarpu_matrix_get_nx(buffers, 2) ny_A = fstarpu_matrix_get_ny(buffers, 0) ny_B = fstarpu_matrix_get_ny(buffers, 1) ny_C = fstarpu_matrix_get_ny(buffers, 2) if (ny_C /= ny_B) then write(*,*) "C -- B column mismatch" stop 1 end if if (nx_C /= nx_A) then write(*,*) "C -- A row mismatch" stop 1 end if if (ny_A /= nx_B) then write(*,*) "A -- B col/row mismatch" stop 1 end if call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), A, shape=[ld_A,ny_A]) call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), B, shape=[ld_B,ny_B]) call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 2), C, shape=[ld_C,ny_C]) do k = 1, ny_C do j = 1, nx_C do i = 1, nx_B C(j,k) = C(j,k) + A(j,i) * B(i,k) end do end do end do end subroutine cl_cpu_mult end module nf_mm_cl starpu-1.4.9+dfsg/mpi/examples/native_fortran/nf_mm_cl_blas.f90000066400000000000000000000065321507764646700245350ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! module nf_mm_cl_blas contains recursive subroutine cl_cpu_gemm (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused real, target :: alpha, beta real(kind=c_double),pointer :: A(:,:), B(:,:), C(:,:) integer :: ld_A,nx_A,ny_A integer :: ld_B,nx_B,ny_B integer :: ld_C,nx_C,ny_C integer :: i,j,k write(*,*) "gemm task" call fstarpu_unpack_arg( cl_args, (/ c_loc(alpha), c_loc(beta) /)) ld_A = fstarpu_matrix_get_ld(buffers, 0) ld_B = fstarpu_matrix_get_ld(buffers, 1) ld_C = fstarpu_matrix_get_ld(buffers, 2) nx_A = fstarpu_matrix_get_nx(buffers, 0) nx_B = fstarpu_matrix_get_nx(buffers, 1) nx_C = fstarpu_matrix_get_nx(buffers, 2) ny_A = fstarpu_matrix_get_ny(buffers, 0) ny_B = fstarpu_matrix_get_ny(buffers, 1) ny_C = fstarpu_matrix_get_ny(buffers, 2) call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), A, shape=[ld_A,ny_A]) call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), B, shape=[ld_B,ny_B]) call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 2), C, shape=[ld_C,ny_C]) call dgemm('n','n',nx_C,ny_C,nx_B, alpha, A(1,1), ld_A, B(1,1), ld_B, & beta, C(1,1), ld_C) write(*,*) "end gemm task" return end subroutine cl_cpu_gemm recursive subroutine cl_cpu_fill (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module use fstarpu_mpi_mod implicit none type(c_ptr), value, intent(in) :: cl_args type(c_ptr), value, intent(in) :: buffers real(kind=c_double), pointer :: x(:,:) integer :: m, n, ld integer :: j integer :: iseed(4) = (/1,1,1,1/) integer :: comm_rank comm_rank = fstarpu_mpi_world_rank() m = fstarpu_matrix_get_nx(buffers, 0) n = fstarpu_matrix_get_ny(buffers, 0) ld = fstarpu_matrix_get_ld(buffers, 0) write(*,*) comm_rank,"] fill", m, n, ld call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), x, shape=(/ld,n/)) ! copied from qrm_dsmat_fill_task a few lines up do j=1,n call dlarnv(2, iseed(1), m, x(1, j)) end do write(*,*) comm_rank,"]end fill task" return end subroutine cl_cpu_fill end module nf_mm_cl_blas starpu-1.4.9+dfsg/mpi/examples/native_fortran/nf_mm_task_build.f90000066400000000000000000000202301507764646700252460ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! program nf_mm use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module use fstarpu_mpi_mod ! StarPU-MPI interfacing module use nf_mm_cl implicit none logical, parameter :: verbose = .false. integer(c_int) :: comm_size, comm_rank integer(c_int), target :: comm_world integer(c_int) :: N = 16, BS = 4, NB real(kind=c_double),allocatable,target :: A(:,:), B(:,:), C(:,:) type(c_ptr),allocatable :: dh_A(:), dh_B(:), dh_C(:,:) type(c_ptr) :: cl_mm type(c_ptr) :: task integer(c_int) :: ncpu integer(c_int) :: ret integer(c_int) :: row, col integer(c_int) :: b_row, b_col integer(c_int) :: mr, rank integer(c_int64_t) :: tag ret = fstarpu_init(C_NULL_PTR) if (ret == -19) then stop 77 else if (ret /= 0) then stop 1 end if ret = fstarpu_mpi_init(1) print *,"fstarpu_mpi_init status:", ret if (ret /= 0) then stop 1 end if ! stop there if no CPU worker available ncpu = fstarpu_cpu_worker_get_count() if (ncpu == 0) then call fstarpu_shutdown() stop 77 end if comm_world = fstarpu_mpi_world_comm() comm_size = fstarpu_mpi_world_size() comm_rank = fstarpu_mpi_world_rank() if (comm_size < 2) then call fstarpu_shutdown() ret = fstarpu_mpi_shutdown() stop 77 end if ! TODO: process app's argc/argv NB = N/BS ! allocate and initialize codelet cl_mm = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(cl_mm, c_char_"nf_mm_cl"//c_null_char) call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_mult)) call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R) call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW) ! allocate matrices if (comm_rank == 0) then allocate(A(N,N)) allocate(B(N,N)) allocate(C(N,N)) end if ! init matrices if (comm_rank == 0) then do col=1,N do row=1,N if (row == col) then A(row,col) = 2 else A(row,col) = 0 end if B(row,col) = row*N+col C(row,col) = 0 end do end do if (verbose) then print *,"A" call mat_disp(A) print *,"B" call mat_disp(B) print *,"C" call mat_disp(C) end if end if ! allocate data handles allocate(dh_A(NB)) allocate(dh_B(NB)) allocate(dh_C(NB,NB)) ! register matrices if (comm_rank == 0) then mr = 0 ! TODO: use STARPU_MAIN_RAM constant else mr = -1 end if tag = 0 do b_row=1,NB if (comm_rank == 0) then call fstarpu_matrix_data_register(dh_A(b_row), mr, & c_loc( A(1+(b_row-1)*BS,1) ), N, BS, N, c_sizeof(A(1,1))) else call fstarpu_matrix_data_register(dh_A(b_row), mr, & c_null_ptr, N, BS, N, c_sizeof(A(1,1))) end if call fstarpu_mpi_data_register(dh_A(b_row), tag, 0) tag = tag+1 end do do b_col=1,NB if (comm_rank == 0) then call fstarpu_matrix_data_register(dh_B(b_col), mr, & c_loc( B(1,1+(b_col-1)*BS) ), N, N, BS, c_sizeof(B(1,1))) else call fstarpu_matrix_data_register(dh_B(b_col), mr, & c_null_ptr, N, N, BS, c_sizeof(B(1,1))) end if call fstarpu_mpi_data_register(dh_B(b_col), tag, 0) tag = tag+1 end do do b_col=1,NB do b_row=1,NB if (comm_rank == 0) then call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, & c_loc( C(1+(b_row-1)*BS,1+(b_col-1)*BS) ), N, BS, BS, c_sizeof(C(1,1))) else call fstarpu_matrix_data_register(dh_C(b_row,b_col), mr, & c_null_ptr, N, BS, BS, c_sizeof(C(1,1))) end if call fstarpu_mpi_data_register(dh_C(b_row,b_col), tag, 0) tag = tag+1 end do end do ! distribute matrix C do b_col=1,NB do b_row=1,NB rank = modulo(b_row+b_col, comm_size) call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), rank) end do end do do b_col=1,NB do b_row=1,NB task = fstarpu_mpi_task_build((/ c_loc(comm_world), cl_mm, & FSTARPU_R, dh_A(b_row), & FSTARPU_R, dh_B(b_col), & FSTARPU_RW, dh_C(b_row,b_col), & C_NULL_PTR /)) if (c_associated(task)) then ret = fstarpu_task_submit(task) endif call fstarpu_mpi_task_post_build((/ c_loc(comm_world), cl_mm, & FSTARPU_R, dh_A(b_row), & FSTARPU_R, dh_B(b_col), & FSTARPU_RW, dh_C(b_row,b_col), & C_NULL_PTR /)) end do end do call fstarpu_task_wait_for_all() ! undistribute matrix C do b_col=1,NB do b_row=1,NB call fstarpu_mpi_data_migrate(comm_world, dh_c(b_row,b_col), 0) end do end do ! unregister matrices do b_row=1,NB call fstarpu_data_unregister(dh_A(b_row)) end do do b_col=1,NB call fstarpu_data_unregister(dh_B(b_col)) end do do b_col=1,NB do b_row=1,NB call fstarpu_data_unregister(dh_C(b_row,b_col)) end do end do ! check result if (comm_rank == 0) then if (verbose) then print *,"final C" call mat_disp(C) end if do col=1,N do row=1,N if (abs(C(row,col) - 2*(row*N+col)) > 1.0) then print *, "check failed" stop 1 end if end do end do end if ! free handles deallocate(dh_A) deallocate(dh_B) deallocate(dh_C) ! free matrices if (comm_rank == 0) then deallocate(A) deallocate(B) deallocate(C) end if call fstarpu_codelet_free(cl_mm) call fstarpu_shutdown() ret = fstarpu_mpi_shutdown() print *,"fstarpu_mpi_shutdown status:", ret if (ret /= 0) then stop 1 end if end program nf_mm starpu-1.4.9+dfsg/mpi/examples/native_fortran/nf_mpi_redux.f90000066400000000000000000000207661507764646700244460ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! program nf_mpi_redux use iso_c_binding use fstarpu_mod use fstarpu_mpi_mod implicit none integer, target :: ret, np, i, j, trial type(c_ptr) :: work_cl, task_rw_cl,task_red_cl, task_ini_cl character(kind=c_char,len=*), parameter :: name=C_CHAR_"task"//C_NULL_CHAR character(kind=c_char,len=*), parameter :: namered=C_CHAR_"task_red"//C_NULL_CHAR character(kind=c_char,len=*), parameter :: nameini=C_CHAR_"task_ini"//C_NULL_CHAR real(kind(1.d0)), target :: a,tmp real(kind(1.d0)), target, allocatable :: b(:) integer(kind=8) :: tag, err type(c_ptr) :: ahdl type(c_ptr), target, allocatable :: bhdl(:) type(c_ptr) :: task_mode, codelet_mode integer, target :: comm_world,comm_w_rank, comm_size integer(c_int), target :: w_node, nworkers, work_coef call fstarpu_fxt_autostart_profiling(0) ret = fstarpu_init(c_null_ptr) ret = fstarpu_mpi_init(1) comm_world = fstarpu_mpi_world_comm() comm_w_rank = fstarpu_mpi_world_rank() comm_size = fstarpu_mpi_world_size() if (comm_size.lt.2) then write(*,'(" ")') write(*,'("This application is meant to run with at least two nodes (found ",i4," ; i am ",i4,").")') comm_size, comm_w_rank stop 2 end if allocate(b(comm_size-1), bhdl(comm_size-1)) nworkers = fstarpu_worker_get_count() if (nworkers.lt.1) then write(*,'(" ")') write(*,'("This application is meant to run with at least one worker per node.")') stop 2 end if ! allocate and reduction codelets task_red_cl = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(task_red_cl, namered) call fstarpu_codelet_add_cpu_func(task_red_cl,C_FUNLOC(cl_cpu_task_red)) call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_RW.ior.FSTARPU_COMMUTE) call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_R) task_ini_cl = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(task_ini_cl, nameini) call fstarpu_codelet_add_cpu_func(task_ini_cl,C_FUNLOC(cl_cpu_task_ini)) call fstarpu_codelet_add_buffer(task_ini_cl, FSTARPU_W) work_coef=2 do trial=1,2 if (trial.eq.2) then write(*,*) "Using STARPU_MPI_REDUX" codelet_mode = FSTARPU_RW.ior.FSTARPU_COMMUTE task_mode = FSTARPU_MPI_REDUX else if (trial.eq.1) then write(*,*) "Using STARPU_REDUX" codelet_mode = FSTARPU_REDUX task_mode = FSTARPU_REDUX end if ! allocate and fill codelet structs work_cl = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(work_cl, name) call fstarpu_codelet_add_cpu_func(work_cl, C_FUNLOC(cl_cpu_task)) call fstarpu_codelet_add_buffer(work_cl, codelet_mode) call fstarpu_codelet_add_buffer(work_cl, FSTARPU_R) err = fstarpu_mpi_barrier(comm_world) if(comm_w_rank.eq.0) then write(*,'(" ")') a = 1.0 write(*,*) "init a = ", a else b(comm_w_rank) = 1.0 / (comm_w_rank + 1.0) write(*,*) "init b_",comm_w_rank,"=", b(comm_w_rank) end if err = fstarpu_mpi_barrier(comm_world) tag = 0 if(comm_w_rank.eq.0) then call fstarpu_variable_data_register(ahdl, 0, c_loc(a),c_sizeof(a)) do i=1,comm_size-1 call fstarpu_variable_data_register(bhdl(i), -1, c_null_ptr,c_sizeof(b(i))) end do else call fstarpu_variable_data_register(ahdl, -1, c_null_ptr,c_sizeof(a)) do i=1,comm_size-1 if (i.eq.comm_w_rank) then call fstarpu_variable_data_register(bhdl(i), 0, c_loc(b(i)),c_sizeof(b(i))) else call fstarpu_variable_data_register(bhdl(i), -1, c_null_ptr,c_sizeof(b(i))) end if end do end if call fstarpu_mpi_data_register(ahdl, tag, 0) do i=1,comm_size-1 call fstarpu_mpi_data_register(bhdl(i), tag+i,i) end do tag = tag + comm_size call fstarpu_data_set_reduction_methods(ahdl,task_red_cl,task_ini_cl) err = fstarpu_mpi_barrier(comm_world) call fstarpu_fxt_start_profiling() do w_node=1,comm_size-1 do i=1,work_coef*nworkers call fstarpu_mpi_task_insert( (/ c_loc(comm_world), & work_cl, & task_mode, ahdl, & FSTARPU_R, bhdl(w_node), & FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), & C_NULL_PTR /)) end do end do call fstarpu_mpi_redux_data(comm_world, ahdl) err = fstarpu_mpi_wait_for_all(comm_world) if(comm_w_rank.eq.0) then tmp = 0 do w_node=1,comm_size-1 tmp = tmp + 1.0 / (w_node+1.0) end do write(*,*) 'computed result ---> ',a, "expected =",& 1.0 + (comm_size-1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1.0)*3.0 + tmp) end if err = fstarpu_mpi_barrier(comm_world) call fstarpu_data_unregister(ahdl) do w_node=1,comm_size-1 call fstarpu_data_unregister(bhdl(w_node)) end do call fstarpu_codelet_free(work_cl) end do call fstarpu_fxt_stop_profiling() call fstarpu_codelet_free(task_red_cl) call fstarpu_codelet_free(task_ini_cl) err = fstarpu_mpi_shutdown() call fstarpu_shutdown() deallocate(b, bhdl) stop 0 contains recursive subroutine cl_cpu_task (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int) :: ret, worker_id integer :: comm_rank integer, target :: i real(kind(1.d0)), pointer :: a, b real(kind(1.d0)) :: old_a worker_id = fstarpu_worker_get_id() comm_rank = fstarpu_mpi_world_rank() call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), b) call fstarpu_sleep(real(0.01, c_float)) old_a = a a = old_a + 3.0 + b write(*,*) "task (c_w_rank:",comm_rank," worker_id:",worker_id,") from ",old_a,"to",a return end subroutine cl_cpu_task recursive subroutine cl_cpu_task_red (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int) :: ret, worker_id integer, target :: comm_rank real(kind(1.d0)), pointer :: as, ad real(kind(1.d0)) :: old_ad worker_id = fstarpu_worker_get_id() comm_rank = fstarpu_mpi_world_rank() call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), ad) call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), as) old_ad = ad ad = ad + as call fstarpu_sleep(real(0.01, c_float)) write(*,*) "red_cl (c_w_rank:",comm_rank,"worker_id:",worker_id,")",as, old_ad, ' ---> ',ad return end subroutine cl_cpu_task_red recursive subroutine cl_cpu_task_ini (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int) :: ret, worker_id integer, target :: comm_rank real(kind(1.d0)), pointer :: a worker_id = fstarpu_worker_get_id() comm_rank = fstarpu_mpi_world_rank() call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) call fstarpu_sleep(real(0.005, c_float)) ! As this codelet is run by each worker in the REDUX mode case ! this initialization makes salient the number of copies spawned write(*,*) "ini_cl (c_w_rank:",comm_rank,"worker_id:",worker_id,") set to", comm_rank a = comm_rank return end subroutine cl_cpu_task_ini end program starpu-1.4.9+dfsg/mpi/examples/native_fortran/nf_mpi_redux_tree.f90000066400000000000000000000201041507764646700254470ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! program nf_mpi_redux use iso_c_binding use fstarpu_mod use fstarpu_mpi_mod implicit none integer, target :: ret, np, i, j, arity type(c_ptr) :: work_cl, task_rw_cl,task_red_cl, task_ini_cl character(kind=c_char,len=*), parameter :: name=C_CHAR_"task"//C_NULL_CHAR character(kind=c_char,len=*), parameter :: namered=C_CHAR_"task_red"//C_NULL_CHAR character(kind=c_char,len=*), parameter :: nameini=C_CHAR_"task_ini"//C_NULL_CHAR real(kind(1.d0)), target :: a,tmp real(kind(1.d0)), target, allocatable :: b(:) integer(kind=8) :: tag, err type(c_ptr), target :: ahdl type(c_ptr), target, allocatable :: bhdl(:) type(c_ptr) :: task_mode, codelet_mode integer, target :: comm_world,comm_w_rank, comm_size integer(c_int), target :: w_node, nworkers, work_coef !call fstarpu_fxt_autostart_profiling(0) ret = fstarpu_init(c_null_ptr) ret = fstarpu_mpi_init(1) comm_world = fstarpu_mpi_world_comm() comm_w_rank = fstarpu_mpi_world_rank() comm_size = fstarpu_mpi_world_size() allocate(b(comm_size-1), bhdl(comm_size-1)) nworkers = fstarpu_worker_get_count() if (nworkers.lt.1) then write(*,'(" ")') write(*,'("This application is meant to run with at least one worker per node.")') stop 2 end if ! allocate and reduction codelets task_red_cl = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(task_red_cl, namered) call fstarpu_codelet_add_cpu_func(task_red_cl,C_FUNLOC(cl_cpu_task_red)) call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_RW.ior.FSTARPU_COMMUTE) call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_R) task_ini_cl = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(task_ini_cl, nameini) call fstarpu_codelet_add_cpu_func(task_ini_cl,C_FUNLOC(cl_cpu_task_ini)) call fstarpu_codelet_add_buffer(task_ini_cl, FSTARPU_W) work_coef=2 codelet_mode = FSTARPU_RW.ior.FSTARPU_COMMUTE task_mode = FSTARPU_MPI_REDUX ! allocate and fill codelet structs work_cl = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(work_cl, name) call fstarpu_codelet_add_cpu_func(work_cl, C_FUNLOC(cl_cpu_task)) call fstarpu_codelet_add_buffer(work_cl, codelet_mode) call fstarpu_codelet_add_buffer(work_cl, FSTARPU_R) err = fstarpu_mpi_barrier(comm_world) do arity=2,comm_size if(comm_w_rank.eq.0) then write(*,'(" ")') a = 1.0 write(*,*) "init a = ", a else b(comm_w_rank) = 1.0 / (comm_w_rank + 1.0) write(*,*) "init b_",comm_w_rank,"=", b(comm_w_rank) end if err = fstarpu_mpi_barrier(comm_world) tag = 0 if(comm_w_rank.eq.0) then call fstarpu_variable_data_register(ahdl, 0, c_loc(a),c_sizeof(a)) do i=1,comm_size-1 call fstarpu_variable_data_register(bhdl(i), -1, c_null_ptr,c_sizeof(b(i))) end do else call fstarpu_variable_data_register(ahdl, -1, c_null_ptr,c_sizeof(a)) do i=1,comm_size-1 if (i.eq.comm_w_rank) then call fstarpu_variable_data_register(bhdl(i), 0, c_loc(b(i)),c_sizeof(b(i))) else call fstarpu_variable_data_register(bhdl(i), -1, c_null_ptr,c_sizeof(b(i))) end if end do end if call fstarpu_mpi_data_register(ahdl, tag, 0) do i=1,comm_size-1 call fstarpu_mpi_data_register(bhdl(i), tag+i,i) end do tag = tag + comm_size call fstarpu_data_set_reduction_methods(ahdl,task_red_cl,task_ini_cl) err = fstarpu_mpi_barrier(comm_world) call fstarpu_fxt_start_profiling() do w_node=1,comm_size-1 do i=1,work_coef*nworkers call fstarpu_mpi_task_insert( (/ c_loc(comm_world), & work_cl, & task_mode, ahdl, & FSTARPU_R, bhdl(w_node), & FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), & C_NULL_PTR /)) end do end do call fstarpu_mpi_redux_data_tree(comm_world, ahdl, arity) err = fstarpu_mpi_wait_for_all(comm_world) if(comm_w_rank.eq.0) then tmp = 0 do w_node=1,comm_size-1 tmp = tmp + 1.0 / (w_node+1.0) end do write(*,*) 'computed result ---> ',a, "expected =",& 1.0 + (comm_size-1.0)*(comm_size)/2.0 + work_coef*nworkers*((comm_size-1.0)*3.0 + tmp) end if err = fstarpu_mpi_barrier(comm_world) call fstarpu_data_unregister(ahdl) do w_node=1,comm_size-1 call fstarpu_data_unregister(bhdl(w_node)) end do call fstarpu_fxt_stop_profiling() end do call fstarpu_codelet_free(work_cl) call fstarpu_codelet_free(task_red_cl) call fstarpu_codelet_free(task_ini_cl) err = fstarpu_mpi_shutdown() call fstarpu_shutdown() deallocate(b, bhdl) stop 0 contains recursive subroutine cl_cpu_task (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int) :: ret, worker_id integer :: comm_rank integer, target :: i real(kind(1.d0)), pointer :: a, b real(kind(1.d0)) :: old_a worker_id = fstarpu_worker_get_id() comm_rank = fstarpu_mpi_world_rank() call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), b) call fstarpu_sleep(real(0.01, c_float)) old_a = a a = old_a + 3.0 + b write(*,*) "task (c_w_rank:",comm_rank," worker_id:",worker_id,") from ",old_a,"to",a return end subroutine cl_cpu_task recursive subroutine cl_cpu_task_red (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int) :: ret, worker_id integer, target :: comm_rank real(kind(1.d0)), pointer :: as, ad real(kind(1.d0)) :: old_ad worker_id = fstarpu_worker_get_id() comm_rank = fstarpu_mpi_world_rank() call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), ad) call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), as) old_ad = ad ad = ad + as call fstarpu_sleep(real(0.01, c_float)) write(*,*) "red_cl (c_w_rank:",comm_rank,"worker_id:",worker_id,")",as, old_ad, ' ---> ',ad return end subroutine cl_cpu_task_red recursive subroutine cl_cpu_task_ini (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int) :: ret, worker_id integer, target :: comm_rank real(kind(1.d0)), pointer :: a worker_id = fstarpu_worker_get_id() comm_rank = fstarpu_mpi_world_rank() call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) call fstarpu_sleep(real(0.005, c_float)) ! As this codelet is run by each worker in the REDUX mode case ! this initialization makes salient the number of copies spawned write(*,*) "ini_cl (c_w_rank:",comm_rank,"worker_id:",worker_id,") set to", comm_rank a = comm_rank return end subroutine cl_cpu_task_ini end program starpu-1.4.9+dfsg/mpi/examples/native_fortran/nf_redux_test.f90000066400000000000000000000176011507764646700246320ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! program main use iso_c_binding use fstarpu_mod use fstarpu_mpi_mod implicit none integer, target :: ret, np, i, j type(c_ptr) :: task_cl, task_rw_cl, task_red_cl, task_ini_cl character(kind=c_char,len=*), parameter :: name=C_CHAR_"task"//C_NULL_CHAR character(kind=c_char,len=*), parameter :: namered=C_CHAR_"task_red"//C_NULL_CHAR character(kind=c_char,len=*), parameter :: nameini=C_CHAR_"task_ini"//C_NULL_CHAR real(kind(1.d0)), target :: a1, a2, b1, b2 integer(kind=8) :: tag, err type(c_ptr) :: a1hdl, a2hdl, b1hdl, b2hdl integer, target :: comm, comm_world, comm_w_rank, comm_size integer(c_int), target :: w_node call fstarpu_fxt_autostart_profiling(0) ret = fstarpu_init(c_null_ptr) ret = fstarpu_mpi_init(1) comm_world = fstarpu_mpi_world_comm() comm_w_rank = fstarpu_mpi_world_rank() comm_size = fstarpu_mpi_world_size() if (comm_size.ne.4) then write(*,'(" ")') write(*,'("This application is meant to run with 4 MPI")') stop 1 end if err = fstarpu_mpi_barrier(comm_world) if(comm_w_rank.eq.0) then write(*,'(" ")') a1 = 1.0 write(*,*) "init_a1", a1 b1 = 0.5 write(*,*) "init b1", b1 end if if(comm_w_rank.eq.1) then write(*,'(" ")') a2 = 2.0 write(*,*) "init_a2", a2 b2 = 0.8 write(*,*) "init b2", b2 end if ! allocate and fill codelet structs task_cl = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(task_cl, name) call fstarpu_codelet_add_cpu_func(task_cl, C_FUNLOC(cl_cpu_task)) call fstarpu_codelet_add_buffer(task_cl, FSTARPU_REDUX) call fstarpu_codelet_add_buffer(task_cl, FSTARPU_R) ! allocate and reduction codelets task_red_cl = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(task_red_cl, namered) call fstarpu_codelet_add_cpu_func(task_red_cl,C_FUNLOC(cl_cpu_task_red)) call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_RW.ior.FSTARPU_COMMUTE) call fstarpu_codelet_add_buffer(task_red_cl, FSTARPU_R) task_ini_cl = fstarpu_codelet_allocate() call fstarpu_codelet_set_name(task_ini_cl, nameini) call fstarpu_codelet_add_cpu_func(task_ini_cl,C_FUNLOC(cl_cpu_task_ini)) call fstarpu_codelet_add_buffer(task_ini_cl, FSTARPU_W) err = fstarpu_mpi_barrier(comm_world) tag = 0 if(comm_w_rank.eq.0) then call fstarpu_variable_data_register(a1hdl, 0, c_loc(a1),c_sizeof(a1)) call fstarpu_variable_data_register(b1hdl, 0, c_loc(b1),c_sizeof(b1)) else call fstarpu_variable_data_register(a1hdl, -1, c_null_ptr,c_sizeof(a1)) call fstarpu_variable_data_register(b1hdl, -1, c_null_ptr,c_sizeof(b1)) end if call fstarpu_mpi_data_register(a1hdl,tag,0) call fstarpu_mpi_data_register(b1hdl, tag+1,0) tag = tag + 2 if(comm_w_rank.eq.1) then call fstarpu_variable_data_register(a2hdl, 0, c_loc(a2),c_sizeof(a2)) call fstarpu_variable_data_register(b2hdl, 0, c_loc(b2),c_sizeof(b2)) else call fstarpu_variable_data_register(a2hdl, -1, c_null_ptr,c_sizeof(a2)) call fstarpu_variable_data_register(b2hdl, -1, c_null_ptr,c_sizeof(b2)) end if call fstarpu_mpi_data_register(a2hdl,tag,1) call fstarpu_mpi_data_register(b2hdl, tag+1, 1) tag = tag + 2 call fstarpu_data_set_reduction_methods(a1hdl, task_red_cl,task_ini_cl) call fstarpu_data_set_reduction_methods(a2hdl, task_red_cl,task_ini_cl) err = fstarpu_mpi_barrier(comm_world) call fstarpu_fxt_start_profiling() w_node = 3 comm = comm_world call fstarpu_mpi_task_insert( (/ c_loc(comm), & task_cl, & FSTARPU_REDUX, a1hdl, & FSTARPU_R, b1hdl, & FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), & C_NULL_PTR /)) w_node = 2 comm = comm_world call fstarpu_mpi_task_insert( (/ c_loc(comm), & task_cl, & FSTARPU_REDUX, a2hdl, & FSTARPU_R, b2hdl, & FSTARPU_EXECUTE_ON_NODE, c_loc(w_node), & C_NULL_PTR /)) call fstarpu_mpi_redux_data(comm_world, a1hdl) call fstarpu_mpi_redux_data(comm_world, a2hdl) ! write(*,*) "waiting all tasks ..." err = fstarpu_mpi_wait_for_all(comm_world) if(comm_w_rank.eq.0) then write(*,*) 'computed result ---> ',a1, "expected =",4.5 end if if(comm_w_rank.eq.1) then write(*,*) 'computed result ---> ',a2, "expected=",5.8 end if call fstarpu_data_unregister(a1hdl) call fstarpu_data_unregister(a2hdl) call fstarpu_data_unregister(b1hdl) call fstarpu_data_unregister(b2hdl) call fstarpu_fxt_stop_profiling() call fstarpu_codelet_free(task_cl) call fstarpu_codelet_free(task_red_cl) call fstarpu_codelet_free(task_ini_cl) err = fstarpu_mpi_shutdown() call fstarpu_shutdown() stop contains recursive subroutine cl_cpu_task (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int) :: ret, worker_id integer :: comm_rank integer, target :: i real(kind(1.d0)), pointer :: a, b real(kind(1.d0)) :: old_a worker_id = fstarpu_worker_get_id() comm_rank = fstarpu_mpi_world_rank() call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), b) call fstarpu_sleep(real(0.01, c_float)) old_a = a a = 3.0 + b write(*,*) "task (c_w_rank:",comm_rank,") from ",old_a,"to",a return end subroutine cl_cpu_task recursive subroutine cl_cpu_task_red (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int) :: ret integer, target :: comm_rank real(kind(1.d0)), pointer :: as, ad real(kind(1.d0)) :: old_ad comm_rank = fstarpu_mpi_world_rank() call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), ad) call c_f_pointer(fstarpu_variable_get_ptr(buffers, 1), as) old_ad = ad ad = ad + as call fstarpu_sleep(real(0.01, c_float)) write(*,*) "red_cl (c_w_rank:",comm_rank,")",as, old_ad, ' ---> ',ad return end subroutine cl_cpu_task_red recursive subroutine cl_cpu_task_ini (buffers, cl_args) bind(C) use iso_c_binding ! C interfacing module use fstarpu_mod ! StarPU interfacing module implicit none type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused integer(c_int) :: ret integer, target :: comm_rank real(kind(1.d0)), pointer :: a comm_rank = fstarpu_mpi_world_rank() call c_f_pointer(fstarpu_variable_get_ptr(buffers, 0), a) call fstarpu_sleep(real(0.005, c_float)) a = 0.0 write(*,*) "ini_cl (c_w_rank:",comm_rank,")" return end subroutine cl_cpu_task_ini end program main starpu-1.4.9+dfsg/mpi/examples/perf.sh000077500000000000000000000053521507764646700177130ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # 4G x np = 4 * (k*1K) ^ 2 # A G * np = 4 * k^2 * 1M # A * 250 * np = k^2 # A = 6 # k = sqrt(1500*np) # np = 1 => k = 32 # np = 2 => k = 48 # np = 3 => k = 64 # np = 4 => k = 64 # Problem size NBLOCKS=16 BLOCKSIZE=1024 SIZE=$(($NBLOCKS*$BLOCKSIZE)) echo "JOB ID ${PBS_JOBID}" nnodes=$(cat machinefile.${PBS_JOBID}|wc -l) echo "got $nnodes mpi nodes" # Calibrate ncalibrate=0 for i in `seq 1 $ncalibrate` do echo "STARPU_CALIBRATE $i/$ncalibrate" STARPU_CALIBRATE=1 STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $nnodes $MS_LAUNCHER $STARPU_LAUNCH ./mpi_lu/plu_example_float -p 2 -q 2 -nblocks 32 -size $((32*$BLOCKSIZE)) -numa done func() { ngpus=$1 np=$2 p=$3 q=$4 nblocks=$5 echo "*******************************************"> log echo "*************** NGPUS $ngpus - np $np - nblocks $nblocks **************">> log echo "*******************************************">> log cat log cat log >> log.all STARPU_NCPUS=0 STARPU_NCUDA=$ngpus STARPU_SCHED="dmda" STARPU_PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np $MS_LAUNCHER $STARPU_LAUNCH ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err cat log.out > log cat log.err >> log cat log cat log >> log.all } rm -f log.all #how many time do we repeat each experiment ? nloops=3 per_node_max_memory=7000 for np in 1 2 4 do for nblocks in 16 32 48 64 80 do for ngpus_per_node in 1 2 3 4 do for loop in `seq 1 $nloops` do # Compute p and q from np case $np in 1) p=1; q=1;; 2) p=2; q=1;; 4) p=2; q=2;; *) echo -n "does not support $np nodes yet";; esac # Does the problem fit into memory ? matrix_size=$(($nblocks * $BLOCKSIZE)) per_node_memory=$(($((4*$matrix_size*$matrix_size/(1024*1024))) / $np)) echo "NP $np P $p Q $q SIZE $per_node_memory NBLOCKS $nblocks" if test $per_node_memory -ge $per_node_max_memory; then echo "Problem is too large !" else func $ngpus_per_node $np $p $q $nblocks echo "go !" fi done done done done starpu-1.4.9+dfsg/mpi/examples/stencil/000077500000000000000000000000001507764646700200545ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/examples/stencil/stencil5.c000066400000000000000000000157231507764646700217560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void stencil5_cpu(void *descr[], void *_args) { (void)_args; float *xy = (float *)STARPU_VARIABLE_GET_PTR(descr[0]); float *xm1y = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); float *xp1y = (float *)STARPU_VARIABLE_GET_PTR(descr[2]); float *xym1 = (float *)STARPU_VARIABLE_GET_PTR(descr[3]); float *xyp1 = (float *)STARPU_VARIABLE_GET_PTR(descr[4]); // fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1); *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5; // fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1); } struct starpu_codelet stencil5_cl = { .cpu_funcs = {stencil5_cpu}, .nbuffers = 5, .modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}, .model = &starpu_perfmodel_nop, }; #ifdef STARPU_QUICK_CHECK # define NITER_DEF 5 # define X 4 # define Y 4 #elif !defined(STARPU_LONG_CHECK) # define NITER_DEF 10 # define X 5 # define Y 5 #else # define NITER_DEF 100 # define X 20 # define Y 20 #endif int display = 0; int niter = NITER_DEF; /* Returns the MPI node number where data indexes index is */ int my_distrib(int x, int y, int nb_nodes) { /* Block distrib */ return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes; } /* Shifted distribution, for migration example */ int my_distrib2(int x, int y, int nb_nodes) { return (my_distrib(x, y, nb_nodes) + 1) % nb_nodes; } static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-iter") == 0) { char *argptr; niter = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-display") == 0) { display = 1; } } } int main(int argc, char **argv) { int my_rank, size, x, y, loop; float mean=0; float matrix[X][Y]; starpu_data_handle_t data_handles[X][Y]; int ret; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); if (my_rank == 0) return 77; else return 0; } parse_args(argc, argv); /* Initial data values */ starpu_srand48((long int)time(NULL)); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { matrix[x][y] = (float)starpu_drand48(); mean += matrix[x][y]; } } mean /= (X*Y); if (display) { FPRINTF_MPI(stdout, "mean=%2.2f\n", mean); for(x = 0; x < X; x++) { fprintf(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { fprintf(stdout, "%2.2f ", matrix[x][y]); } fprintf(stdout, "\n"); } } /* Initial distribution */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib(x, y, size); if (mpi_rank == my_rank) { //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float)); } else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) { /* I don't own this index, but will need it for my computations */ //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); } else { /* I know it's useless to allocate anything for this */ data_handles[x][y] = NULL; } if (data_handles[x][y]) { starpu_data_set_coordinates(data_handles[x][y], 2, x, y); starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } } } /* First computation with initial distribution */ for(loop=0 ; loop #include #include #include void stencil5_cpu(void *descr[], void *_args) { (void)_args; float *xy = (float *)STARPU_VARIABLE_GET_PTR(descr[0]); float *xm1y = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); float *xp1y = (float *)STARPU_VARIABLE_GET_PTR(descr[2]); float *xym1 = (float *)STARPU_VARIABLE_GET_PTR(descr[3]); float *xyp1 = (float *)STARPU_VARIABLE_GET_PTR(descr[4]); // fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1); *xy = (*xy + *xm1y + *xp1y + *xym1 + *xyp1) / 5; // fprintf(stdout, "VALUES: %2.2f %2.2f %2.2f %2.2f %2.2f\n", *xy, *xm1y, *xp1y, *xym1, *xyp1); } struct starpu_codelet stencil5_cl = { .cpu_funcs = {stencil5_cpu}, .nbuffers = 5, .modes = {STARPU_RW, STARPU_R, STARPU_R, STARPU_R, STARPU_R}, .model = &starpu_perfmodel_nop, }; #ifdef STARPU_QUICK_CHECK # define NITER_DEF 5 # define X 4 # define Y 4 #elif !defined(STARPU_LONG_CHECK) # define NITER_DEF 10 # define X 5 # define Y 5 #else # define NITER_DEF 100 # define X 20 # define Y 20 #endif int display = 0; int niter = NITER_DEF; /* Returns the MPI node number where data indexes index is */ int my_distrib(int x, int y, int nb_nodes) { /* Block distrib */ return ((int)(x / sqrt(nb_nodes) + (y / sqrt(nb_nodes)) * sqrt(nb_nodes))) % nb_nodes; } static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-iter") == 0) { char *argptr; niter = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-display") == 0) { display = 1; } } } void get_neighbors(int **neighbor_ids, int *nneighbors) { int rank, size; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size <= 2) { *nneighbors = 1; *neighbor_ids = malloc(sizeof(int)); *neighbor_ids[0] = rank==size-1?0:rank+1; fprintf(stderr, "rank %d has neighbor %d\n", rank, *neighbor_ids[0]); } else { *nneighbors = 2; *neighbor_ids = malloc(2*sizeof(int)); (*neighbor_ids)[0] = rank==size-1?0:rank+1; (*neighbor_ids)[1] = rank==0?size-1:rank-1; fprintf(stderr, "rank %d has neighbor %d and %d\n", rank, (*neighbor_ids)[0], (*neighbor_ids)[1]); } } struct data_node { starpu_data_handle_t data_handle; int node; }; struct data_node data_nodes[X][Y]; void get_data_unit_to_migrate(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node) { int rank, x, y; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "Looking to move data from %d to %d\n", rank, dst_node); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { if (data_nodes[x][y].node == rank) { *handle_unit = malloc(sizeof(starpu_data_handle_t)); *handle_unit[0] = data_nodes[x][y].data_handle; *nhandles = 1; data_nodes[x][y].node = dst_node; return; } } } *nhandles = 0; } int main(int argc, char **argv) { int my_rank, size, x, y, loop; float mean=0; float matrix[X][Y]; struct starpu_mpi_lb_conf itf; int ret; itf.get_neighbors = get_neighbors; itf.get_data_unit_to_migrate = get_data_unit_to_migrate; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size > 2) { FPRINTF(stderr, "Only works with 2 nodes\n"); starpu_mpi_shutdown(); if (my_rank == 0) return 77; else return 0; } if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); if (my_rank == 0) return 77; else return 0; } { char sleep_thr[10]; snprintf(sleep_thr, 10, "%d", Y); setenv("LB_HEAT_SLEEP_THRESHOLD", sleep_thr, 1); } starpu_mpi_lb_init("heat", &itf); parse_args(argc, argv); /* Initial data values */ starpu_srand48((long int)time(NULL)); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { matrix[x][y] = (float)starpu_drand48(); mean += matrix[x][y]; } } mean /= (X*Y); if (display) { FPRINTF_MPI(stdout, "mean=%2.2f\n", mean); for(x = 0; x < X; x++) { fprintf(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { fprintf(stdout, "%2.2f ", matrix[x][y]); } fprintf(stdout, "\n"); } } /* Initial distribution */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { data_nodes[x][y].node = my_distrib(x, y, size); if (data_nodes[x][y].node == my_rank) { //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_nodes[x][y].data_handle, 0, (uintptr_t)&(matrix[x][y]), sizeof(float)); } else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) { /* I don't own this index, but will need it for my computations */ //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_nodes[x][y].data_handle, -1, (uintptr_t)NULL, sizeof(float)); } else { /* I know it's useless to allocate anything for this */ data_nodes[x][y].data_handle = NULL; } if (data_nodes[x][y].data_handle) { starpu_data_set_coordinates(data_nodes[x][y].data_handle, 2, x, y); starpu_mpi_data_register(data_nodes[x][y].data_handle, (y*X)+x, data_nodes[x][y].node); } } } /* First computation with initial distribution */ for(loop=0 ; loop #include #include "my_interface.h" void starpu_my_data_display_codelet_cpu(void *descr[], void *_args) { char c = STARPU_MY_DATA_GET_CHAR(descr[0]); int d = STARPU_MY_DATA_GET_INT(descr[0]); char msg[100]=""; if (_args) starpu_codelet_unpack_args(_args, &msg); fprintf(stderr, "[%s] My value = '%c' %d\n", msg, c, d); } void starpu_my_data_compare_codelet_cpu(void *descr[], void *_args) { int *compare; starpu_codelet_unpack_args(_args, &compare); int d0 = STARPU_MY_DATA_GET_INT(descr[0]); char c0 = STARPU_MY_DATA_GET_CHAR(descr[0]); int d1 = STARPU_MY_DATA_GET_INT(descr[1]); char c1 = STARPU_MY_DATA_GET_CHAR(descr[1]); *compare = (d0 == d1 && c0 == c1); } void _starpu_my_data_datatype_allocate(unsigned node, MPI_Datatype *mpi_datatype) { int ret; int blocklengths[2] = {1, 1}; MPI_Aint displacements[2]; MPI_Datatype types[2] = {MPI_INT, MPI_CHAR}; struct starpu_my_data *myinterface; myinterface = malloc(sizeof(struct starpu_my_data)); MPI_Get_address(myinterface, displacements); MPI_Get_address(&myinterface[0].c, displacements+1); displacements[1] -= displacements[0]; displacements[0] = 0; ret = MPI_Type_create_struct(2, blocklengths, displacements, types, mpi_datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); ret = MPI_Type_commit(mpi_datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); free(myinterface); } int starpu_my_data_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype) { (void)handle; _starpu_my_data_datatype_allocate(node, mpi_datatype); return 0; } void starpu_my_data_datatype_free(MPI_Datatype *mpi_datatype) { int ret = MPI_Type_free(mpi_datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); } int starpu_my_data2_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype) { (void)handle; (void)mpi_datatype; return -1; } void starpu_my_data2_datatype_free(MPI_Datatype *mpi_datatype) { STARPU_ASSERT_MSG(0, "should not be called\n"); } char starpu_my_data_interface_get_char(void *interface) { struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) interface; struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; return data->c; } int starpu_my_data_interface_get_int(void *interface) { struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) interface; struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; return data->d; } int starpu_my_data_get_int(starpu_data_handle_t handle) { struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; return data->d; } char starpu_my_data_get_char(starpu_data_handle_t handle) { struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; return data->c; } static void data_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_my_data_interface *local_interface = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = my_data_interface->ptr; local_interface->dev_handle = my_data_interface->dev_handle; local_interface->offset = my_data_interface->offset; } else { local_interface->ptr = 0; local_interface->dev_handle = 0; local_interface->offset = 0; } } } static starpu_ssize_t data_allocate_data_on_node(void *data_interface, unsigned node) { uintptr_t addr = 0, handle; struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; starpu_ssize_t allocated_memory = sizeof(int)+sizeof(char); handle = starpu_malloc_on_node(node, allocated_memory); if (!handle) return -ENOMEM; if (starpu_node_get_kind(node) != STARPU_OPENCL_RAM) addr = handle; /* update the data properly in consequence */ my_data_interface->ptr = addr; my_data_interface->dev_handle = handle; my_data_interface->offset = 0; return allocated_memory; } static void data_free_data_on_node(void *data_interface, unsigned node) { struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; starpu_free_on_node(node, my_data_interface->dev_handle, sizeof(int)+sizeof(char)); my_data_interface->ptr = 0; my_data_interface->dev_handle = 0; } static size_t data_get_size(starpu_data_handle_t handle) { (void)handle; return sizeof(int) + sizeof(char); } static size_t data_get_alloc_size(starpu_data_handle_t handle) { (void)handle; return sizeof(int) + sizeof(char); } static uint32_t data_footprint(starpu_data_handle_t handle) { struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return starpu_hash_crc32c_be(my_data->ptr, 0); } static int data_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { (void)handle; (void)node; (void)ptr; (void)count; STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the pack_data function should not happen\n"); return 0; } static int data_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { (void)handle; (void)node; (void)ptr; STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the unpack_data function should not happen\n"); return 0; } static int data_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { (void)handle; (void)node; (void)ptr; (void)count; STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the unpack_data function should not happen\n"); return 0; } static int data_pack_data2(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); *count = sizeof(int) + sizeof(char); if (ptr != NULL) { int d = starpu_my_data_get_int(handle); char c = starpu_my_data_get_char(handle); *ptr = (void*) starpu_malloc_on_node_flags(node, *count, 0); memcpy(*ptr, &d, sizeof(int)); char *x = *ptr; x += sizeof(int); memcpy(x, &c, sizeof(char)); } return 0; } static int data_peek_data2(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { (void)count; STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); STARPU_ASSERT(count == sizeof(int)+sizeof(char)); struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, node); struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; memcpy(&data->d, ptr, sizeof(int)); char *x = ptr; x += sizeof(int); memcpy(&data->c, x, sizeof(char)); return 0; } static int data_unpack_data2(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { data_peek_data2(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); return 0; } static starpu_ssize_t data_describe(void *data_interface, char *buf, size_t size) { struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) data_interface; struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; if (data) return snprintf(buf, size, "Data%d-%c", data->d, data->c); else return snprintf(buf, size, "DataUNKNOWN"); } static void *data_to_pointer(void *data_interface, unsigned node) { (void) node; struct starpu_my_data_interface *my_data_interface = data_interface; return (void*) my_data_interface->ptr; } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_my_data_interface *src = src_interface; struct starpu_my_data_interface *dst = dst_interface; int ret = 0; fprintf(stderr, "copying data src=%p to dst=%p\n", (void*) src->ptr, (void*) dst->ptr); if (starpu_interface_copy(src->dev_handle, src->offset, src_node, dst->dev_handle, dst->offset, dst_node, sizeof(int) + sizeof(char), async_data)) ret = -EAGAIN; return ret; } static const struct starpu_data_copy_methods data_copy_methods = { .any_to_any = copy_any_to_any }; static struct starpu_data_interface_ops interface_data_ops = { .register_data_handle = data_register_data_handle, .allocate_data_on_node = data_allocate_data_on_node, .free_data_on_node = data_free_data_on_node, .copy_methods = &data_copy_methods, .get_size = data_get_size, .get_alloc_size = data_get_alloc_size, .footprint = data_footprint, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpu_my_data_interface), .to_pointer = data_to_pointer, .pack_data = data_pack_data, .peek_data = data_peek_data, .unpack_data = data_unpack_data, .describe = data_describe }; void starpu_my_data_register(starpu_data_handle_t *handleptr, unsigned home_node, struct starpu_my_data *xc) { if (interface_data_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID) { interface_data_ops.interfaceid = starpu_data_interface_get_next_id(); starpu_mpi_interface_datatype_node_register(interface_data_ops.interfaceid, starpu_my_data_datatype_allocate, starpu_my_data_datatype_free); } struct starpu_my_data_interface data = { .id = interface_data_ops.interfaceid, .ptr = (uintptr_t) xc, .dev_handle = (uintptr_t) xc, .offset = 0, }; starpu_data_register(handleptr, home_node, &data, &interface_data_ops); } void starpu_my_data_shutdown(void) { starpu_mpi_interface_datatype_unregister(interface_data_ops.interfaceid); } static struct starpu_data_interface_ops interface_data2_ops = { .register_data_handle = data_register_data_handle, .allocate_data_on_node = data_allocate_data_on_node, .free_data_on_node = data_free_data_on_node, .copy_methods = &data_copy_methods, .get_size = data_get_size, .get_alloc_size = data_get_alloc_size, .footprint = data_footprint, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpu_my_data_interface), .to_pointer = data_to_pointer, .pack_data = data_pack_data2, .peek_data = data_peek_data2, .unpack_data = data_unpack_data2, .describe = data_describe }; void starpu_my_data2_register(starpu_data_handle_t *handleptr, unsigned home_node, struct starpu_my_data *xc) { if (interface_data2_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID) { interface_data2_ops.interfaceid = starpu_data_interface_get_next_id(); starpu_mpi_interface_datatype_node_register(interface_data2_ops.interfaceid, starpu_my_data2_datatype_allocate, starpu_my_data2_datatype_free); } struct starpu_my_data_interface data = { .id = interface_data_ops.interfaceid, .ptr = (uintptr_t) xc, .dev_handle = (uintptr_t) xc, .offset = 0, }; starpu_data_register(handleptr, home_node, &data, &interface_data2_ops); } void starpu_my_data2_shutdown(void) { starpu_mpi_interface_datatype_unregister(interface_data2_ops.interfaceid); } starpu-1.4.9+dfsg/mpi/examples/user_datatype/my_interface.h000066400000000000000000000057221507764646700241100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifndef __DATA_INTERFACE_H #define __DATA_INTERFACE_H struct starpu_my_data_interface { enum starpu_data_interface_id id; /**< Identifier of the interface */ uintptr_t ptr; /**< local pointer of the data */ uintptr_t dev_handle; /**< device handle of the data. */ size_t offset; /**< offset in the data */ }; struct starpu_my_data { int d; char c; }; void starpu_my_data_register(starpu_data_handle_t *handle, unsigned home_node, struct starpu_my_data *xc); void starpu_my_data2_register(starpu_data_handle_t *handle, unsigned home_node, struct starpu_my_data *xc); char starpu_my_data_get_char(starpu_data_handle_t handle); int starpu_my_data_get_int(starpu_data_handle_t handle); char starpu_my_data_interface_get_char(void *interface); int starpu_my_data_interface_get_int(void *interface); #define STARPU_MY_DATA_GET_CHAR(interface) starpu_my_data_interface_get_char(interface) #define STARPU_MY_DATA_GET_INT(interface) starpu_my_data_interface_get_int(interface) void _starpu_my_data_datatype_allocate(unsigned node, MPI_Datatype *mpi_datatype); int starpu_my_data_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype); void starpu_my_data_datatype_free(MPI_Datatype *mpi_datatype); int starpu_my_data2_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype); void starpu_my_data2_datatype_free(MPI_Datatype *mpi_datatype); void starpu_my_data_display_codelet_cpu(void *descr[], void *_args); void starpu_my_data_compare_codelet_cpu(void *descr[], void *_args); static struct starpu_codelet starpu_my_data_display_codelet = { .cpu_funcs = {starpu_my_data_display_codelet_cpu}, .cpu_funcs_name = {"starpu_my_data_display_codelet_cpu"}, .nbuffers = 1, .modes = {STARPU_R}, .model = &starpu_perfmodel_nop, .name = "starpu_my_data_display_codelet" }; static struct starpu_codelet starpu_my_data_compare_codelet = { .cpu_funcs = {starpu_my_data_compare_codelet_cpu}, .cpu_funcs_name = {"starpu_my_data_compare_codelet_cpu"}, .nbuffers = 2, .modes = {STARPU_R, STARPU_R}, .model = &starpu_perfmodel_nop, .name = "starpu_my_data_compare_codelet" }; void starpu_my_data_shutdown(void); void starpu_my_data2_shutdown(void); #endif /* __MY_INTERFACE_H */ starpu-1.4.9+dfsg/mpi/examples/user_datatype/user_datatype.c000066400000000000000000000122261507764646700243040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "my_interface.h" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) int main(int argc, char **argv) { int rank, nodes; int ret=0; int compare=0; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (nodes < 2) fprintf(stderr, "We need at least 2 processes.\n"); else fprintf(stderr, "We need at least 1 CPU.\n"); } starpu_mpi_shutdown(); return 77; } struct starpu_my_data my0 = {.d = 42 , .c = 'n'}; struct starpu_my_data my1 = {.d = 98 , .c = 'z'}; starpu_data_handle_t handle0; starpu_data_handle_t handle1; if (rank == 1) { my0.d = 0; my0.c = 'z'; } starpu_my_data_register(&handle0, STARPU_MAIN_RAM, &my0); starpu_my_data_register(&handle1, -1, &my1); // Send data directly with MPI if (rank == 0) { MPI_Datatype mpi_datatype; _starpu_my_data_datatype_allocate(STARPU_MAIN_RAM, &mpi_datatype); MPI_Send(&my0, 1, mpi_datatype, 1, 42, MPI_COMM_WORLD); starpu_my_data_datatype_free(&mpi_datatype); } else if (rank == 1) { MPI_Datatype mpi_datatype; MPI_Status status; struct starpu_my_data myx; _starpu_my_data_datatype_allocate(STARPU_MAIN_RAM, &mpi_datatype); MPI_Recv(&myx, 1, mpi_datatype, 0, 42, MPI_COMM_WORLD, &status); FPRINTF(stderr, "[mpi] Received value: '%c' %d\n", myx.c, myx.d); starpu_my_data_datatype_free(&mpi_datatype); STARPU_ASSERT_MSG(myx.d == 42 && myx.c == 'n', "Incorrect received value\n"); } if (rank == 0) { struct starpu_my_data myx = {.d = 98 , .c = 'z'}; starpu_data_handle_t handlex; starpu_my_data_register(&handlex, STARPU_MAIN_RAM, &myx); ret = starpu_mpi_send(handlex, 1, 10, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_send"); starpu_data_unregister(handlex); } else if (rank == 1) { MPI_Status status; struct starpu_my_data myx = {.d = 11 , .c = 'a'}; starpu_data_handle_t handlex; starpu_my_data_register(&handlex, STARPU_MAIN_RAM, &myx); ret = starpu_mpi_recv(handlex, 0, 10, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_unregister(handlex); FPRINTF(stderr, "[starpu mpi] myx.d=%d myx.c=%c\n", myx.d, myx.c); STARPU_ASSERT_MSG(myx.d == 98 && myx.c == 'z', "Incorrect received value\n"); } if (rank == 0) { int *compare_ptr = &compare; ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle0, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_isend_detached(handle0, 1, 20, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); ret = starpu_mpi_irecv_detached(handle1, 1, 30, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle1, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&starpu_my_data_compare_codelet, STARPU_R, handle0, STARPU_R, handle1, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } else if (rank == 1) { ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 initial value", strlen("node1 initial value")+1, STARPU_R, handle0, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_irecv_detached(handle0, 0, 20, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle0, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_isend_detached(handle0, 0, 30, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } starpu_mpi_wait_for_all(MPI_COMM_WORLD); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_data_unregister(handle0); starpu_data_unregister(handle1); starpu_my_data_shutdown(); starpu_mpi_shutdown(); if (rank == 0) { FPRINTF(stderr, "[node 0] %s\n", compare==1?"SUCCESS":"FAILURE"); } return (rank == 0) ? !compare : 0; } starpu-1.4.9+dfsg/mpi/examples/user_datatype/user_datatype2.c000066400000000000000000000074131507764646700243700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "my_interface.h" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) int main(int argc, char **argv) { int rank, nodes; int ret=0; int compare=0; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (nodes < 2) fprintf(stderr, "We need at least 2 processes.\n"); else fprintf(stderr, "We need at least 1 CPU.\n"); } starpu_mpi_shutdown(); return 77; } struct starpu_my_data my0 = {.d = 42 , .c = 'n'}; struct starpu_my_data my1 = {.d = 98 , .c = 'z'}; starpu_data_handle_t handle0; starpu_data_handle_t handle1; if (rank == 1) { my0.d = 0; my0.c = 'z'; } starpu_my_data2_register(&handle0, STARPU_MAIN_RAM, &my0); starpu_my_data2_register(&handle1, -1, &my1); starpu_mpi_barrier(MPI_COMM_WORLD); if (rank == 0) { int *compare_ptr = &compare; ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle0, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_isend_detached(handle0, 1, 10, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); ret = starpu_mpi_irecv_detached(handle1, 1, 20, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle1, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&starpu_my_data_compare_codelet, STARPU_R, handle0, STARPU_R, handle1, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } else if (rank == 1) { ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 initial value", strlen("node1 initial value")+1, STARPU_R, handle0, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_irecv_detached(handle0, 0, 10, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle0, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_isend_detached(handle0, 0, 20, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } starpu_mpi_wait_for_all(MPI_COMM_WORLD); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_mpi_datatype_unregister(handle0); starpu_data_unregister(handle0); starpu_data_unregister(handle1); starpu_my_data2_shutdown(); starpu_mpi_shutdown(); if (rank == 0) { FPRINTF(stderr, "[node 0] %s\n", compare==1?"SUCCESS":"FAILURE"); } return (rank == 0) ? !compare : 0; } starpu-1.4.9+dfsg/mpi/examples/user_datatype/user_datatype_early.c000066400000000000000000000071731507764646700255050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "my_interface.h" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) int main(int argc, char **argv) { int rank, nodes; int ret=0; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (nodes < 2) fprintf(stderr, "We need at least 2 processes.\n"); else fprintf(stderr, "We need at least 1 CPU.\n"); } starpu_mpi_shutdown(); return 77; } struct starpu_my_data my0 = {.d = 42 , .c = 'n'}; struct starpu_my_data my1 = {.d = 11 , .c = 'a'}; if (rank == 1) { my0.d *= 2; my0.c += 1; my1.d *= 2; my1.c += 1; } starpu_data_handle_t handle0; starpu_data_handle_t handle1; starpu_my_data_register(&handle0, STARPU_MAIN_RAM, &my0); starpu_my_data_register(&handle1, STARPU_MAIN_RAM, &my1); if (rank == 0) { ret = starpu_mpi_send(handle0, 1, 10, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_send(handle1, 1, 20, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } else if (rank == 1) { // We want handle0 to be received as early_data and as starpu_mpi_data_register() has not be called, it will be received as raw memory, and then unpacked with MPI_Unpack() ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 handle0 init value", strlen("node1 handle0 init value")+1, STARPU_R, handle0, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 handle1 init value", strlen("node1 handle1 init value")+1, STARPU_R, handle1, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_recv(handle1, 0, 20, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv1"); ret = starpu_mpi_recv(handle0, 0, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv2"); ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 handle0 received value", strlen("node1 handle0 received value")+1, STARPU_R, handle0, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 handle1 received value", strlen("node1 handle1 received value")+1, STARPU_R, handle1, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_mpi_wait_for_all(MPI_COMM_WORLD); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_data_unregister(handle0); starpu_data_unregister(handle1); if (rank == 1) { STARPU_ASSERT_MSG(my0.d == 42 && my0.c == 'n' && my1.d == 11 && my1.c == 'a', "Incorrect received values"); } starpu_my_data_shutdown(); starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/examples/user_datatype/user_datatype_interface.c000066400000000000000000000067321507764646700263310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "my_interface.h" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) int main(int argc, char **argv) { int rank, nodes; int ret=0; struct starpu_my_data my_data; struct starpu_my_data my_data2 = {.d = 77, .c = 'x'}; starpu_data_handle_t my_handle1; starpu_data_handle_t my_handle2; starpu_data_handle_t my_handle3; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (nodes < 2) fprintf(stderr, "We need at least 2 processes.\n"); else fprintf(stderr, "We need at least 1 CPU.\n"); } starpu_mpi_shutdown(); return 77; } if (rank == 0) { my_data.d = 42; my_data.c = 'n'; } else { my_data.d = 0; my_data.c = 'z'; } starpu_my_data_register(&my_handle1, STARPU_MAIN_RAM, &my_data2); starpu_my_data_register(&my_handle2, STARPU_MAIN_RAM, &my_data2); starpu_my_data_register(&my_handle3, STARPU_MAIN_RAM, &my_data); starpu_mpi_barrier(MPI_COMM_WORLD); if (rank == 0) { ret = starpu_mpi_send(my_handle1, 1, 10, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_send(my_handle2, 1, 12, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_send(my_handle3, 1, 14, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } else if (rank == 1) { starpu_mpi_req req; ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 initial value", strlen("node1 initial value")+1, STARPU_R, my_handle3, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_irecv(my_handle3, &req, 0, 14, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); ret = starpu_mpi_recv(my_handle2, 0, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); ret = starpu_mpi_recv(my_handle1, 0, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); ret = starpu_mpi_wait(&req, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); ret = starpu_task_insert(&starpu_my_data_display_codelet, STARPU_VALUE, "node1 rceived value", strlen("node1 rceived value")+1, STARPU_R, my_handle3, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_task_wait_for_all(); starpu_mpi_wait_for_all(MPI_COMM_WORLD); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_data_unregister(my_handle1); starpu_data_unregister(my_handle2); starpu_data_unregister(my_handle3); starpu_my_data_shutdown(); starpu_mpi_shutdown(); return 0; } starpu-1.4.9+dfsg/mpi/include/000077500000000000000000000000001507764646700162205ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/include/fstarpu_mpi_mod.f90000066400000000000000000001320341507764646700217330ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! module fstarpu_mpi_mod use iso_c_binding use fstarpu_mod implicit none interface ! == mpi/include/starpu_mpi.h == ! int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); function fstarpu_mpi_isend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_isend ! == mpi/include/starpu_mpi.h == ! int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); function fstarpu_mpi_isend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_prio type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_isend_prio ! int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm); function fstarpu_mpi_irecv (dh, mpi_req, src, data_tag, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_irecv type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_irecv ! int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); function fstarpu_mpi_send (dh, dst, data_tag, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_send type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_send ! int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); function fstarpu_mpi_send_prio (dh, dst, data_tag, prio, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_send_prio type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_send_prio ! int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status); function fstarpu_mpi_recv (dh, src, data_tag, mpi_comm, mpi_status) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: mpi_status end function fstarpu_mpi_recv ! int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_isend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_detached type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_isend_detached ! int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_isend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_detached_prio type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_isend_detached_prio ! int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_recv_detached (dh, src, data_tag, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv_detached type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_recv_detached ! int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); function fstarpu_mpi_issend (dh, mpi_req, dst, data_tag, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_issend type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_issend ! int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); function fstarpu_mpi_issend_prio (dh, mpi_req, dst, data_tag, prio, mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_issend_prio type(c_ptr), value, intent(in) :: dh type(c_ptr), value, intent(in) :: mpi_req integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_issend_prio ! int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_issend_detached (dh, dst, data_tag, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_issend_detached type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_issend_detached ! int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); function fstarpu_mpi_issend_detached_prio (dh, dst, data_tag, prio, mpi_comm, callback, arg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_issend_detached_prio type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end function fstarpu_mpi_issend_detached_prio ! int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status); function fstarpu_mpi_wait(req,st) bind(C,name="starpu_mpi_wait") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_wait type(c_ptr), value, intent(in) :: req type(c_ptr), value, intent(in) :: st end function fstarpu_mpi_wait ! int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status); function fstarpu_mpi_test(req,flag,st) bind(C,name="starpu_mpi_test") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_test type(c_ptr), value, intent(in) :: req type(c_ptr), value, intent(in) :: flag type(c_ptr), value, intent(in) :: st end function fstarpu_mpi_test ! int starpu_mpi_barrier(MPI_Comm comm); function fstarpu_mpi_barrier (mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_barrier integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_barrier ! int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency); function fstarpu_mpi_recv_detached_sequential_consistency (dh, src, data_tag, mpi_comm, callback, arg, seq_const) & bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv_detached_sequential_consistency type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg integer(c_int), value, intent(in) :: seq_const end function fstarpu_mpi_recv_detached_sequential_consistency ! int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm); ! -> cf fstarpu_mpi_init ! int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi); ! -> cf fstarpu_mpi_init ! int starpu_mpi_initialize(void) STARPU_DEPRECATED; ! -> cf fstarpu_mpi_init ! int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED; ! -> cf fstarpu_mpi_init ! int starpu_mpi_shutdown(void); function fstarpu_mpi_shutdown () bind(C,name="starpu_mpi_shutdown") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_shutdown end function fstarpu_mpi_shutdown ! struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); function fstarpu_mpi_task_build(arglist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr) :: fstarpu_mpi_task_build type(c_ptr), dimension(*), intent(in) :: arglist end function fstarpu_mpi_task_build ! int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); subroutine fstarpu_mpi_task_post_build(arglist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_mpi_task_post_build ! int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...); subroutine fstarpu_mpi_task_insert(arglist) bind(C) use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_mpi_task_insert subroutine fstarpu_mpi_insert_task(arglist) bind(C,name="fstarpu_mpi_task_insert") use iso_c_binding, only: c_ptr type(c_ptr), dimension(*), intent(in) :: arglist end subroutine fstarpu_mpi_insert_task ! void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node); subroutine fstarpu_mpi_get_data_on_node(mpi_comm,dh,node) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node end subroutine fstarpu_mpi_get_data_on_node ! void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg); subroutine fstarpu_mpi_get_data_on_node_detached(mpi_comm,dh,node,callback,arg) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: node type(c_funptr), value, intent(in) :: callback type(c_ptr), value, intent(in) :: arg end subroutine fstarpu_mpi_get_data_on_node_detached ! void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle); subroutine fstarpu_mpi_redux_data(mpi_comm,dh) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_mpi_redux_data ! void starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio); subroutine fstarpu_mpi_redux_data_prio(mpi_comm,dh, prio) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: prio end subroutine fstarpu_mpi_redux_data_prio ! void starpu_mpi_redux_data_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int arity); subroutine fstarpu_mpi_redux_data_tree(mpi_comm,dh, arity) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: arity end subroutine fstarpu_mpi_redux_data_tree ! void starpu_mpi_redux_data_prio_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int prio, int arity); subroutine fstarpu_mpi_redux_data_prio_tree(mpi_comm,dh, prio, arity) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: arity end subroutine fstarpu_mpi_redux_data_prio_tree ! int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); function fstarpu_mpi_scatter_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_scatter_detached type(c_ptr), intent(in) :: dhs(*) integer(c_int), value, intent(in) :: cnt integer(c_int), value, intent(in) :: root integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: scallback type(c_ptr), value, intent(in) :: sarg type(c_funptr), value, intent(in) :: rcallback type(c_ptr), value, intent(in) :: rarg end function fstarpu_mpi_scatter_detached ! int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); function fstarpu_mpi_gather_detached (dhs, cnt, root, mpi_comm, scallback, sarg, rcallback, rarg) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_gather_detached type(c_ptr), intent(in) :: dhs(*) integer(c_int), value, intent(in) :: cnt integer(c_int), value, intent(in) :: root integer(c_int), value, intent(in) :: mpi_comm type(c_funptr), value, intent(in) :: scallback type(c_ptr), value, intent(in) :: sarg type(c_funptr), value, intent(in) :: rcallback type(c_ptr), value, intent(in) :: rarg end function fstarpu_mpi_gather_detached ! int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); function fstarpu_mpi_isend_detached_unlock_tag (dh, dst, data_tag, mpi_comm, starpu_tag) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_isend_detached_unlock_tag ! int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag); function fstarpu_mpi_isend_detached_unlock_tag_prio (dh, dst, data_tag, prio, mpi_comm, starpu_tag) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_detached_unlock_tag_prio type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: dst integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: prio integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_isend_detached_unlock_tag_prio ! int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); function fstarpu_mpi_recv_detached_unlock_tag (dh, src, data_tag, mpi_comm, starpu_tag) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv_detached_unlock_tag type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: src integer(c_int64_t), value, intent(in) :: data_tag integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_recv_detached_unlock_tag ! int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, MPI_Comm *comm, starpu_tag_t tag); function fstarpu_mpi_isend_array_detached_unlock_tag (array_size, dhs, dsts, data_tags, mpi_comms, starpu_tag) & bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag integer(c_int), value, intent(in) :: array_size type(c_ptr), intent(in) :: dhs(*) integer(c_int), intent(in) :: dsts(*) integer(c_int64_t), intent(in) :: data_tags(*) integer(c_int), intent(in) :: mpi_comms(*) type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_isend_array_detached_unlock_tag ! int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag); function fstarpu_mpi_isend_array_detached_unlock_tag_prio (array_size, dhs, dsts, data_tags, prio, mpi_comms, & starpu_tag) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_isend_array_detached_unlock_tag_prio integer(c_int), value, intent(in) :: array_size type(c_ptr), intent(in) :: dhs(*) integer(c_int), intent(in) :: dsts(*) integer(c_int64_t), intent(in) :: data_tags(*) integer(c_int), intent(in) :: prio(*) integer(c_int), intent(in) :: mpi_comms(*) type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_isend_array_detached_unlock_tag_prio ! int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *data_tag, MPI_Comm *comm, starpu_tag_t tag); function fstarpu_mpi_recv_array_detached_unlock_tag (array_size, dhs, srcs, data_tags, mpi_comms, starpu_tag) & bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_recv_array_detached_unlock_tag integer(c_int), value, intent(in) :: array_size type(c_ptr), intent(in) :: dhs(*) integer(c_int), intent(in) :: srcs(*) integer(c_int64_t), intent(in) :: data_tags(*) integer(c_int), intent(in) :: mpi_comms(*) type(c_ptr), value, intent(in) :: starpu_tag end function fstarpu_mpi_recv_array_detached_unlock_tag ! void starpu_mpi_comm_stats_retrieve(size_t *comm_stats); subroutine fstarpu_mpi_comm_stats_retrieve (comm_stats) bind(C,name="starpu_mpi_comm_stats_retrieve") use iso_c_binding implicit none integer(c_size_t), intent(in) :: comm_stats(*) end subroutine fstarpu_mpi_comm_stats_retrieve ! void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle); subroutine fstarpu_mpi_cache_flush(mpi_comm,dh) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh end subroutine fstarpu_mpi_cache_flush ! void starpu_mpi_cache_flush_all_data(MPI_Comm comm); subroutine fstarpu_mpi_cache_flush_all_data(mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm end subroutine fstarpu_mpi_cache_flush_all_data ! int starpu_mpi_comm_size(MPI_Comm comm, int *size); function fstarpu_mpi_comm_size(mpi_comm,sz) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm integer(c_int), intent(out) :: sz integer(c_int) :: fstarpu_mpi_comm_size end function fstarpu_mpi_comm_size ! int starpu_mpi_comm_rank(MPI_Comm comm, int *rank); function fstarpu_mpi_comm_rank(mpi_comm,rank) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm integer(c_int), intent(out) :: rank integer(c_int) :: fstarpu_mpi_comm_rank end function fstarpu_mpi_comm_rank ! int starpu_mpi_world_rank(void); function fstarpu_mpi_world_rank() bind(C,name="starpu_mpi_world_rank") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_world_rank end function fstarpu_mpi_world_rank ! int starpu_mpi_world_size(void); function fstarpu_mpi_world_size() bind(C,name="starpu_mpi_world_size") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_world_size end function fstarpu_mpi_world_size ! int starpu_mpi_world_size(void); function fstarpu_mpi_world_comm() bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_world_comm end function fstarpu_mpi_world_comm ! void starpu_mpi_comm_stats_enable() subroutine fstarpu_mpi_comm_stats_enable() bind(C) use iso_c_binding implicit none end subroutine fstarpu_mpi_comm_stats_enable ! void starpu_mpi_comm_stats_disable() subroutine fstarpu_mpi_comm_stats_disable() bind(C) use iso_c_binding implicit none end subroutine fstarpu_mpi_comm_stats_disable ! int starpu_mpi_get_communication_tag(void); function fstarpu_mpi_get_communication_tag() bind(C,name="starpu_mpi_get_communication_tag") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_get_communication_tag end function fstarpu_mpi_get_communication_tag ! void starpu_mpi_set_communication_tag(int tag); subroutine fstarpu_mpi_set_communication_tag(tag) bind(C,name="starpu_mpi_set_communication_tag") use iso_c_binding implicit none integer(c_int64_t), value, intent(in) :: tag end subroutine fstarpu_mpi_set_communication_tag ! void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm); subroutine fstarpu_mpi_data_register_comm(dh,tag,rank,mpi_comm) bind(C) use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int64_t), value, intent(in) :: tag integer(c_int), value, intent(in) :: rank integer(c_int), value, intent(in) :: mpi_comm end subroutine fstarpu_mpi_data_register_comm ! #define starpu_mpi_data_register(data_handle, tag, rank) starpu_mpi_data_register_comm(data_handle, tag, rank, MPI_COMM_WORLD) subroutine fstarpu_mpi_data_register(dh,tag,rank) bind(C) use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int64_t), value, intent(in) :: tag integer(c_int), value, intent(in) :: rank end subroutine fstarpu_mpi_data_register ! void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm); subroutine fstarpu_mpi_data_set_rank_comm(dh,rank,mpi_comm) bind(C) use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: rank integer(c_int), value, intent(in) :: mpi_comm end subroutine fstarpu_mpi_data_set_rank_comm ! #define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD) subroutine fstarpu_mpi_data_set_rank(dh,rank) bind(C) use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: rank end subroutine fstarpu_mpi_data_set_rank ! void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag); subroutine fstarpu_mpi_data_set_tag(dh,tag) bind(C,name="starpu_mpi_data_set_tag") use iso_c_binding implicit none type(c_ptr), value, intent(in) :: dh integer(c_int64_t), value, intent(in) :: tag end subroutine fstarpu_mpi_data_set_tag ! int starpu_mpi_data_get_rank(starpu_data_handle_t handle); function fstarpu_mpi_data_get_rank(dh) bind(C,name="starpu_mpi_data_get_rank") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_data_get_rank type(c_ptr), value, intent(in) :: dh end function fstarpu_mpi_data_get_rank ! int starpu_mpi_data_get_tag(starpu_data_handle_t handle); function fstarpu_mpi_data_get_tag(dh) bind(C,name="starpu_mpi_data_get_tag") use iso_c_binding implicit none integer(c_int64_t) :: fstarpu_mpi_data_get_tag type(c_ptr), value, intent(in) :: dh end function fstarpu_mpi_data_get_tag ! void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int rank); subroutine fstarpu_mpi_data_migrate(mpi_comm,dh,rank) bind(C) use iso_c_binding implicit none integer(c_int), value, intent(in) :: mpi_comm type(c_ptr), value, intent(in) :: dh integer(c_int), value, intent(in) :: rank end subroutine fstarpu_mpi_data_migrate ! #define STARPU_MPI_NODE_SELECTION_CURRENT_POLICY -1 ! #define STARPU_MPI_NODE_SELECTION_MOST_R_DATA 0 ! int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func); function fstarpu_mpi_node_selection_register_policy(policy_func) & bind(C,name="starpu_mpi_node_selection_register_policy") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_node_selection_register_policy type(c_funptr), value, intent(in) :: policy_func end function fstarpu_mpi_node_selection_register_policy ! int starpu_mpi_node_selection_unregister_policy(int policy); function fstarpu_mpi_node_selection_unregister_policy(policy) & bind(C,name="starpu_mpi_node_selection_unregister_policy") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_node_selection_unregister_policy type(c_ptr), value, intent(in) :: policy end function fstarpu_mpi_node_selection_unregister_policy ! int starpu_mpi_node_selection_get_current_policy(); function fstarpu_mpi_data_selection_get_current_policy() & bind(C,name="starpu_mpi_data_selection_get_current_policy") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_data_selection_get_current_policy end function fstarpu_mpi_data_selection_get_current_policy ! int starpu_mpi_node_selection_set_current_policy(int policy); function fstarpu_mpi_data_selection_set_current_policy(policy) & bind(C,name="starpu_mpi_data_selection_set_current_policy") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_data_selection_set_current_policy type(c_ptr), value, intent(in) :: policy end function fstarpu_mpi_data_selection_set_current_policy ! int starpu_mpi_cache_is_enabled(); function fstarpu_mpi_cache_is_enabled() bind(C,name="starpu_mpi_cache_is_enabled") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_cache_is_enabled end function fstarpu_mpi_cache_is_enabled ! int starpu_mpi_cache_set(int enabled); function fstarpu_mpi_cache_set(enabled) bind(C,name="starpu_mpi_cache_set") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_cache_set integer(c_int), value, intent(in) :: enabled end function fstarpu_mpi_cache_set ! int starpu_mpi_wait_for_all(MPI_Comm comm); function fstarpu_mpi_wait_for_all (mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_wait_for_all integer(c_int), value, intent(in) :: mpi_comm end function fstarpu_mpi_wait_for_all ! int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); function fstarpu_mpi_datatype_register(dh, alloc_func, free_func) bind(C,name="starpu_mpi_datatype_register") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_datatype_register type(c_ptr), value, intent(in) :: dh type(c_funptr), value, intent(in) :: alloc_func type(c_funptr), value, intent(in) :: free_func end function fstarpu_mpi_datatype_register ! int starpu_mpi_datatype_unregister(starpu_data_handle_t handle); function fstarpu_mpi_datatype_unregister(dh) bind(C,name="starpu_mpi_datatype_unregister") use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_datatype_unregister type(c_ptr), value, intent(in) :: dh end function fstarpu_mpi_datatype_unregister function fstarpu_mpi_req_alloc() bind(C) use iso_c_binding implicit none type(c_ptr) :: fstarpu_mpi_req_alloc end function fstarpu_mpi_req_alloc subroutine fstarpu_mpi_req_free(req) bind(C) use iso_c_binding implicit none type(c_ptr),value,intent(in) :: req end subroutine fstarpu_mpi_req_free function fstarpu_mpi_status_alloc() bind(C) use iso_c_binding implicit none type(c_ptr) :: fstarpu_mpi_status_alloc end function fstarpu_mpi_status_alloc subroutine fstarpu_mpi_status_free(st) bind(C) use iso_c_binding implicit none type(c_ptr),value,intent(in) :: st end subroutine fstarpu_mpi_status_free end interface contains function fstarpu_mpi_init (initialize_mpi,mpi_comm) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_init integer(c_int), intent(in) :: initialize_mpi integer(c_int), optional, intent(in) :: mpi_comm type(c_ptr) :: argcv integer(c_int) :: fargc,i,farg_len character(len=1) :: farg_1 character(len=:), allocatable :: farg integer(c_int) :: mpi_comm_present, mpi_comm_or_0 integer(c_int) :: ret interface function fstarpu_mpi_argcv_alloc(argc, initialize_mpi, comm_present, comm) bind(C) use iso_c_binding implicit none type(c_ptr) :: fstarpu_mpi_argcv_alloc integer(c_int),value,intent(in) :: argc integer(c_int),value,intent(in) :: initialize_mpi integer(c_int),value,intent(in) :: comm_present integer(c_int),value,intent(in) :: comm end function fstarpu_mpi_argcv_alloc subroutine fstarpu_mpi_argcv_set_arg(argcv, i, l, s) bind(C) use iso_c_binding implicit none type(c_ptr),value,intent(in) :: argcv integer(c_int),value,intent(in) :: i integer(c_int),value,intent(in) :: l character(c_char),intent(in) :: s end subroutine fstarpu_mpi_argcv_set_arg subroutine fstarpu_mpi_argcv_free(argcv) bind(C) use iso_c_binding implicit none type(c_ptr),value,intent(in) :: argcv end subroutine fstarpu_mpi_argcv_free function fstarpu_mpi_init_c(argcv) bind(C) use iso_c_binding implicit none integer(c_int) :: fstarpu_mpi_init_c type(c_ptr),value,intent(in) :: argcv end function fstarpu_mpi_init_c end interface fargc = command_argument_count() !write(*,*) "fargc",fargc if (present(mpi_comm)) then mpi_comm_present = 1 mpi_comm_or_0 = mpi_comm else mpi_comm_present = 0 mpi_comm_or_0 = 0 end if !write(*,*) "initialize_mpi",initialize_mpi !write(*,*) "mpi_comm_present",mpi_comm_present argcv = fstarpu_mpi_argcv_alloc(fargc, initialize_mpi, mpi_comm_present, mpi_comm_or_0) do i=0,fargc-1 call get_command_argument(i, farg_1, farg_len) allocate (character(len=farg_len) :: farg) call get_command_argument(i, farg) call fstarpu_mpi_argcv_set_arg(argcv, i, farg_len, farg) deallocate (farg) end do ret = fstarpu_mpi_init_c(argcv) call fstarpu_mpi_argcv_free(argcv) fstarpu_mpi_init = ret end function fstarpu_mpi_init end module fstarpu_mpi_mod starpu-1.4.9+dfsg/mpi/include/starpu_mpi.h000066400000000000000000001126511507764646700205620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_H__ #define __STARPU_MPI_H__ #include #if defined(STARPU_USE_MPI) #include #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_MPI_Support MPI Support @{ */ /** @name Initialisation @{ */ /** Initialize the StarPU library with the given \p conf, and initialize the StarPU-MPI library with the given MPI communicator \p comm. \p initialize_mpi indicates if MPI should be initialized or not by StarPU. StarPU-MPI takes the opportunity to modify \p conf to either reserve a core for its MPI thread (by default), or execute MPI calls on the CPU driver 0 between tasks. */ int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf); /** Same as starpu_mpi_init_conf(), except that this does not initialize the StarPU library. The caller thus has to call starpu_init() before this, and it can not reserve a core for the MPI communications. */ int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm); /** Call starpu_mpi_init_comm() with the MPI communicator \c MPI_COMM_WORLD. */ int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi); /** @deprecated This function has been made deprecated. One should use instead the function starpu_mpi_init(). This function does not call \c MPI_Init(), it should be called beforehand. */ int starpu_mpi_initialize(void) STARPU_DEPRECATED; /** @deprecated This function has been made deprecated. One should use instead the function starpu_mpi_init(). MPI will be initialized by starpumpi by calling MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED, ...). */ int starpu_mpi_initialize_extended(int *rank, int *world_size) STARPU_DEPRECATED; /** Call starpu_mpi_shutdown_comm() with the MPI communicator \c MPI_COMM_WORLD */ int starpu_mpi_shutdown(void); /** Clean the starpumpi library. This must be called after calling any \c starpu_mpi functions and before the call to starpu_shutdown(), if any. \c MPI_Finalize() will be called if StarPU-MPI has been initialized by starpu_mpi_init(). */ int starpu_mpi_shutdown_comm(MPI_Comm comm); /** Register \p comm. The function is automatically called for the communicator given to starpu_mpi_init_comm(). */ int starpu_mpi_comm_register(MPI_Comm comm); /** Return in \p size the size of the communicator \p comm. The function will fail if starpu_mpi_comm_register() has not been previously called with the given communicator. */ int starpu_mpi_comm_size(MPI_Comm comm, int *size); /** Return in \p rank the rank of the calling process in the communicator \p comm. The function will fail if starpu_mpi_comm_register() has not been previously called with the given communicator. */ int starpu_mpi_comm_rank(MPI_Comm comm, int *rank); /** Return the rank of the calling process in the communicator \c MPI_COMM_WORLD */ int starpu_mpi_world_rank(void); /** Return the size of the communicator \c MPI_COMM_WORLD */ int starpu_mpi_world_size(void); /** When given to the function starpu_mpi_comm_get_attr(), retrieve the value for the upper bound for tag value. */ #define STARPU_MPI_TAG_UB MPI_TAG_UB /** Retrieve an attribute value by key, similarly to the MPI function \c MPI_comm_get_attr(), except that the value is a pointer to int64_t instead of int. If an attribute is attached on \p comm to \p keyval, then the call returns \p flag equal to \c 1, and the attribute value in \p attribute_val. Otherwise, \p flag is set to \0. */ int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag); /** Get the logical index of the core where the MPI thread is bound. */ int starpu_mpi_get_thread_cpuid(void); /** Get the tag used for MPI communications submitted by StarPU. */ int starpu_mpi_get_communication_tag(void); /** Set the tag used for MPI communications submitted by StarPU. */ void starpu_mpi_set_communication_tag(int tag); /** @} */ /** @name Communication \anchor MPIPtpCommunication @{ */ /** Opaque type for communication request */ typedef void *starpu_mpi_req; /** Type of the message tag. */ typedef int64_t starpu_mpi_tag_t; /** Post a standard-mode, non blocking send of \p data_handle to the node \p dest using the message tag \p data_tag within the communicator \p comm. After the call, the pointer to the request \p req can be used to test or to wait for the completion of the communication. */ int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); /** Similar to starpu_mpi_isend(), but take a priority \p prio. */ int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); /** Post a nonblocking receive in \p data_handle from the node \p source using the message tag \p data_tag within the communicator \p comm. After the call, the pointer to the request \p req can be used to test or to wait for the completion of the communication. */ int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm); /** Perform a standard-mode, blocking send of \p data_handle to the node \p dest using the message tag \p data_tag within the communicator \p comm. */ int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); /** Similar to starpu_mpi_send(), but take a priority \p prio. */ int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); /** Perform a standard-mode, blocking receive in \p data_handle from the node \p source using the message tag \p data_tag within the communicator \p comm. The value of \p status cannot be NULL, use the predefined value MPI_STATUS_IGNORE to ignore the status. */ int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status); /** Similar to starpu_mpi_recv(), but take a priority \p prio */ int starpu_mpi_recv_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, MPI_Status *status); /** Post a standard-mode, non blocking send of \p data_handle to the node \p dest using the message tag \p data_tag within the communicator \p comm. On completion, the \p callback function is called with the argument \p arg. Similarly to the pthread detached functionality, when a detached communication completes, its resources are automatically released back to the system, there is no need to test or to wait for the completion of the request. */ int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); /** Similar to starpu_mpi_isend_detached(), but take a priority \p prio. */ int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); /** Post a nonblocking receive in \p data_handle from the node \p source using the message tag \p data_tag within the communicator \p comm. On completion, the \p callback function is called with the argument \p arg. Similarly to the pthread detached functionality, when a detached communication completes, its resources are automatically released back to the system, there is no need to test or to wait for the completion of the request. */ int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); /** Same of starpu_mpi_irecv_detached but with the \p prio parameter. */ int starpu_mpi_irecv_detached_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); /** Post a nonblocking receive in \p data_handle from the node \p source using the message tag \p data_tag within the communicator \p comm. On completion, the \p callback function is called with the argument \p arg. The parameter \p sequential_consistency allows to enable or disable the sequential consistency for \p data handle (sequential consistency will be enabled or disabled based on the value of the parameter \p sequential_consistency and the value of the sequential consistency defined for \p data_handle). Similarly to the pthread detached functionality, when a detached communication completes, its resources are automatically released back to the system, there is no need to test or to wait for the completion of the request. */ int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency); /** Perform a synchronous-mode, non-blocking send of \p data_handle to the node \p dest using the message tag \p data_tag within the communicator \p comm. */ int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm); /** Similar to starpu_mpi_issend(), but take a priority \p prio. */ int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm); /** Perform a synchronous-mode, non-blocking send of \p data_handle to the node \p dest using the message tag \p data_tag within the communicator \p comm. On completion, the \p callback function is called with the argument \p arg. Similarly to the pthread detached functionality, when a detached communication completes, its resources are automatically released back to the system, there is no need to test or to wait for the completion of the request. */ int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg); /** Similar to starpu_mpi_issend_detached(), but take a priority \p prio. */ int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg); /** Return when the operation identified by request \p req is complete. The value of \p status cannot be NULL, use the predefined value MPI_STATUS_IGNORE to ignore the status. */ int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status); /** If the operation identified by \p req is complete, set \p flag to 1. The \p status object is set to contain information on the completed operation. */ int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status); /** Block the caller until all group members of the communicator \p comm have called it. */ int starpu_mpi_barrier(MPI_Comm comm); /** Wait until all StarPU tasks and communications for the given communicator are completed. */ int starpu_mpi_wait_for_all(MPI_Comm comm); /** Post a standard-mode, non blocking send of \p data_handle to the node \p dest using the message tag \p data_tag within the communicator \p comm. On completion, \p tag is unlocked. */ int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); /** Similar to starpu_mpi_isend_detached_unlock_tag(), but take a priority \p prio. */ int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag); /** Post a nonblocking receive in \p data_handle from the node \p source using the message tag \p data_tag within the communicator \p comm. On completion, \p tag is unlocked. */ int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag); /** Post \p array_size standard-mode, non blocking send. Each post sends the n-th data of the array \p data_handle to the n-th node of the array \p dest using the n-th message tag of the array \p data_tag within the n-th communicator of the array \p comm. On completion of the all the requests, \p tag is unlocked. */ int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag); /** Similar to starpu_mpi_isend_array_detached_unlock_tag(), but take a priority \p prio. */ int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag); /** Post \p array_size nonblocking receive. Each post receives in the n-th data of the array \p data_handle from the n-th node of the array \p source using the n-th message tag of the array \p data_tag within the n-th communicator of the array \p comm. On completion of the all the requests, \p tag is unlocked. */ int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag); typedef int (*starpu_mpi_datatype_allocate_func_t)(starpu_data_handle_t, MPI_Datatype *); typedef int (*starpu_mpi_datatype_node_allocate_func_t)(starpu_data_handle_t, unsigned node, MPI_Datatype *); typedef void (*starpu_mpi_datatype_free_func_t)(MPI_Datatype *); /** Register functions to create and free a MPI datatype for the given handle. Similar to starpu_mpi_interface_datatype_register(). It is important that the function is called before any communication can take place for a data with the given handle. See \ref ExchangingUserDefinedDataInterface for an example. */ int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); /** Register functions to create and free a MPI datatype for the given interface id. Similar to starpu_mpi_datatype_register(). It is important that the function is called before any communication can take place for a data with the given handle. See \ref ExchangingUserDefinedDataInterface for an example. */ int starpu_mpi_interface_datatype_register(enum starpu_data_interface_id id, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); /** Register functions to create and free a MPI datatype for the given handle. Similar to starpu_mpi_interface_datatype_register(). It is important that the function is called before any communication can take place for a data with the given handle. See \ref ExchangingUserDefinedDataInterface for an example. */ int starpu_mpi_datatype_node_register(starpu_data_handle_t handle, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); /** Register functions to create and free a MPI datatype for the given interface id. Similar to starpu_mpi_datatype_register(). It is important that the function is called before any communication can take place for a data with the given handle. See \ref ExchangingUserDefinedDataInterface for an example. */ int starpu_mpi_interface_datatype_node_register(enum starpu_data_interface_id id, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func); /** Unregister the MPI datatype functions stored for the interface of the given handle. */ int starpu_mpi_datatype_unregister(starpu_data_handle_t handle); /** Unregister the MPI datatype functions stored for the interface of the given interface id. Similar to starpu_mpi_datatype_unregister(). */ int starpu_mpi_interface_datatype_unregister(enum starpu_data_interface_id id); /** @} */ /** @name Communication Cache @{ */ /** Return 1 if the communication cache is enabled, 0 otherwise */ int starpu_mpi_cache_is_enabled(void); /** If \p enabled is 1, enable the communication cache. Otherwise, clean the cache if it was enabled and disable it. */ int starpu_mpi_cache_set(int enabled); /** Clear the send and receive communication cache for the data \p data_handle and invalidate the value. The function has to be called at the same point of task graph submission by all the MPI nodes on which the handle was registered. The function does nothing if the cache mechanism is disabled (see \ref STARPU_MPI_CACHE). */ void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle); /** Clear the send and receive communication cache for all data and invalidate their values. The function has to be called at the same point of task graph submission by all the MPI nodes. The function does nothing if the cache mechanism is disabled (see \ref STARPU_MPI_CACHE). */ void starpu_mpi_cache_flush_all_data(MPI_Comm comm); /** Test whether \p data_handle is cached for reception, i.e. the value was previously received from the owner node, and not flushed since then. */ int starpu_mpi_cached_receive(starpu_data_handle_t data_handle); /** * If \p data is already available in the reception cache, return 1 * If \p data is NOT available in the reception cache, add it to the * cache and return 0 * Return 0 if the communication cache is not enabled */ int starpu_mpi_cached_receive_set(starpu_data_handle_t data); int starpu_mpi_cached_cp_receive_set(starpu_data_handle_t data_handle); /** * Remove \p data from the reception cache */ void starpu_mpi_cached_receive_clear(starpu_data_handle_t data); /** Test whether \p data_handle is cached for emission to node \p dest, i.e. the value was previously sent to \p dest, and not flushed since then. */ int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest); /** * If \p data is already available in the emission cache for node * \p dest, return 1 * If \p data is NOT available in the emission cache for node \p dest, * add it to the cache and return 0 * Return 0 if the communication cache is not enabled */ int starpu_mpi_cached_send_set(starpu_data_handle_t data, int dest); /** * Remove \p data from the emission cache */ void starpu_mpi_cached_send_clear(starpu_data_handle_t data); /** @} */ /** @name MPI Insert Task \anchor MPIInsertTask @{ */ /** Can be used as rank when calling starpu_mpi_data_register() and alike, to specify that the data is per-node: each node will have its own value. Tasks writing to such data will be replicated on all nodes (and all parameters then have to be per-node). Tasks not writing to such data will just take the node-local value without any MPI communication. */ #define STARPU_MPI_PER_NODE -2 /** Register to MPI a StarPU data handle with the given tag, rank and MPI communicator. It also automatically clears the MPI communication cache when unregistering the data. */ void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm); /** Register to MPI a StarPU data handle with the given tag, rank and the MPI communicator \c MPI_COMM_WORLD. It also automatically clears the MPI communication cache when unregistering the data. */ #define starpu_mpi_data_register(data_handle, data_tag, rank) starpu_mpi_data_register_comm(data_handle, data_tag, rank, MPI_COMM_WORLD) /** Register to MPI a StarPU data handle with the given tag. No rank will be defined. It also automatically clears the MPI communication cache when unregistering the data. */ void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag); /** Symbol kept for backward compatibility. Call function starpu_mpi_data_set_tag() */ #define starpu_data_set_tag starpu_mpi_data_set_tag /** Register to MPI a StarPU data handle with the given rank and given communicator. No tag will be defined. It also automatically clears the MPI communication cache when unregistering the data. */ void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm); /** Register to MPI a StarPU data handle with the given rank and the MPI communicator \c MPI_COMM_WORLD. No tag will be defined. It also automatically clears the MPI communication cache when unregistering the data. */ #define starpu_mpi_data_set_rank(handle, rank) starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD) /** Symbol kept for backward compatibility. Call function starpu_mpi_data_set_rank() */ #define starpu_data_set_rank starpu_mpi_data_set_rank /** Return the rank of the given data. */ int starpu_mpi_data_get_rank(starpu_data_handle_t handle); /** Symbol kept for backward compatibility. Call function starpu_mpi_data_get_rank() */ #define starpu_data_get_rank starpu_mpi_data_get_rank /** Return the tag of the given data. */ starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t handle); /** Return the redux map of the given data. */ char *starpu_mpi_data_get_redux_map(starpu_data_handle_t handle); /** Symbol kept for backward compatibility. Call function starpu_mpi_data_get_tag() */ #define starpu_data_get_tag starpu_mpi_data_get_tag /** Create and submit a task corresponding to codelet with the following arguments. The argument list must be zero-terminated. The arguments following the codelet are the same types as for the function starpu_task_insert(). Access modes for data can also be set with ::STARPU_SSEND to specify the data has to be sent using a synchronous and non-blocking mode (see starpu_mpi_issend()). The extra argument ::STARPU_EXECUTE_ON_NODE followed by an integer allows to specify the MPI node to execute the codelet. It is also possible to specify that the node owning a specific data will execute the codelet, by using ::STARPU_EXECUTE_ON_DATA followed by a data handle. The internal algorithm is as follows:
    1. Find out which MPI node is going to execute the codelet.
      • If there is only one node owning data in ::STARPU_W mode, it will be selected;
      • If there is several nodes owning data in ::STARPU_W mode, a node will be selected according to a given node selection policy (see ::STARPU_NODE_SELECTION_POLICY or starpu_mpi_node_selection_set_current_policy())
      • The argument ::STARPU_EXECUTE_ON_NODE followed by an integer can be used to specify the node; Ignored if the node value is -1.
      • The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.
    2. Send and receive data as requested. Nodes owning data which need to be read by the task are sending them to the MPI node which will execute it. The latter receives them.
    3. Execute the codelet. This is done by the MPI node selected in the 1st step of the algorithm.
    4. If several MPI nodes own data to be written to, send written data back to their owners.
    The algorithm also includes a communication cache mechanism that allows not to send data twice to the same MPI node, unless the data has been modified. The cache can be disabled (see \ref STARPU_MPI_CACHE). */ int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...); #ifdef STARPU_USE_FXT #define starpu_mpi_task_insert(comm, cl, ...) \ starpu_mpi_task_insert(comm, cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) #endif /** Identical to starpu_mpi_task_insert(). Symbol kept for backward compatibility. */ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...); #ifdef STARPU_USE_FXT #define starpu_mpi_insert_task(comm, cl, ...) \ starpu_mpi_insert_task(comm, cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) #endif /** Create a task corresponding to \p codelet with the following given arguments. The argument list must be zero-terminated. The function performs the first two steps of the function starpu_mpi_task_insert(), i.e. submitting the MPI communications needed before the execution of the task, and the creation of the task on one node. Only the MPI node selected in the first step of the algorithm will return a valid task structure which can then be submitted, others will return NULL. The function starpu_mpi_task_post_build() MUST be called after that on all nodes, and after the submission of the task on the node which creates it, with the SAME list of arguments. */ struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); #ifdef STARPU_USE_FXT #define starpu_mpi_task_build(comm, cl, ...) \ starpu_mpi_task_build(comm, cl, STARPU_TASK_FILE, __FILE__, STARPU_TASK_LINE, __LINE__, ##__VA_ARGS__) #endif /** Offer a va_list variant of starpu_mpi_task_build. */ struct starpu_task *starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list); /** MUST be called after a call to starpu_mpi_task_build(), with the SAME list of arguments. Perform the fourth -- last -- step of the algorithm described in starpu_mpi_task_insert(). */ int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...); /** Offer a va_list variant of starpu_mpi_task_post_build. */ int starpu_mpi_task_post_build_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list); /** Structure used to pass data from starpu_mpi_task_exchange_data_before_execution() to starpu_mpi_task_exchange_data_after_execution() */ struct starpu_mpi_task_exchange_params { int do_execute; /**< is the caller going to execute the task */ int xrank; /**< node executing the task */ int priority; /**< priority of the task being executed */ }; /** Perform all necessary communications needed before the execution of the given task. The field \c priority of \p params will be set with the rank of the node which is selected to submit \p task. After calling this function, and the submission of the task for the selected node, all nodes MUST call the function starpu_mpi_task_exchange_data_after_execution() with the parameters \p descrs and \p params. */ int starpu_mpi_task_exchange_data_before_execution(MPI_Comm comm, struct starpu_task *task, struct starpu_data_descr *descrs, struct starpu_mpi_task_exchange_params *params); /** MUST be called after a call to starpu_mpi_task_exchange_data_before_execution() with the same arguments \p descrs and \p params. \p nb_data is the number of data in \p descrs. Perform all the necessary communications needed after the execution of the task, i.e the fourth -- last -- step of the algorithm described in starpu_mpi_task_insert(). */ int starpu_mpi_task_exchange_data_after_execution(MPI_Comm comm, struct starpu_data_descr *descrs, unsigned nb_data, struct starpu_mpi_task_exchange_params params); /** Transfer data \p data_handle to MPI node \p node, sending it from its owner if needed. At least the target node and the owner have to call the function. This waits for the transfer to be over. */ int starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node); /** Transfer data \p data_handle to MPI node \p node, sending it from its owner if needed. At least the target node and the owner have to call the function. On reception, the \p callback function is called with the argument \p arg. */ int starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void *), void *arg); /** Transfer data \p data_handle to all MPI nodes, sending it from its owner if needed. All nodes have to call the function. */ void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle); /** Submit migration of the data onto the \p new_rank MPI node. This means both submitting the transfer of the data to node \p new_rank if it hasn't been submitted already, and setting the home node of the data to the new node. Further data transfers submitted by starpu_mpi_task_insert() will be done from that new node. This function thus needs to be called on all nodes which have registered the data at the same point of tasks submissions. This also flushes the cache for this data to avoid incoherencies. */ void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t handle, int new_rank); /** @} */ /** @name Node Selection Policy \anchor MPINodeSelectionPolicy @{ */ /** Define the current policy */ #define STARPU_MPI_NODE_SELECTION_CURRENT_POLICY -1 /** Define the policy in which the selected node is the one having the most data in ::STARPU_R mode */ #define STARPU_MPI_NODE_SELECTION_MOST_R_DATA 0 typedef int (*starpu_mpi_select_node_policy_func_t)(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data); /** Register a new policy which can then be used when there is several nodes owning data in ::STARPU_W mode. Here an example of function defining a node selection policy. The codelet will be executed on the node owing the first data with a size bigger than 1M, or on the node 0 if no data fits the given size. \code{.c} int my_node_selection_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) { // me is the current MPI rank // nb_nodes is the number of MPI nodes // descr is the description of the data specified when calling starpu_mpi_task_insert // nb_data is the number of data in descr int i; for(i= 0 ; i 1024*1024) return rank; } } return 0; } \endcode */ int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func); /** Unregister a previously registered policy. */ int starpu_mpi_node_selection_unregister_policy(int policy); /** Return the current policy used to select the node which will execute the codelet */ int starpu_mpi_node_selection_get_current_policy(void); /** Set the current policy used to select the node which will execute the codelet. The policy ::STARPU_MPI_NODE_SELECTION_MOST_R_DATA selects the node having the most data in ::STARPU_R mode so as to minimize the amount of data to be transferred. */ int starpu_mpi_node_selection_set_current_policy(int policy); /** @} */ /** @name Collective Operations \anchor MPICollectiveOperations @{ */ /** Perform a reduction on the given data \p handle. All nodes send the data to its owner node which will perform a reduction. */ int starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle); /** Similar to starpu_mpi_redux_data(), but take a priority \p prio. */ int starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio); /** Perform a reduction on the given data \p handle. Nodes perform the reduction through in a tree-based fashion. The tree use is an \p arity - ary tree. */ int starpu_mpi_redux_data_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int arity); /** Similar to starpu_mpi_redux_data_tree(), but take a priority \p prio. */ int starpu_mpi_redux_data_prio_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int prio, int arity); /** Scatter data among processes of the communicator based on the ownership of the data. For each data of the array \p data_handles, the process \p root sends the data to the process owning this data. Processes receiving data must have valid data handles to receive them. On completion of the collective communication, the \p scallback function is called with the argument \p sarg on the process \p root, the \p rcallback function is called with the argument \p rarg on any other process. */ int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); /** Gather data from the different processes of the communicator onto the process \p root. Each process owning data handle in the array \p data_handles will send them to the process \p root. The process \p root must have valid data handles to receive the data. On completion of the collective communication, the \p rcallback function is called with the argument \p rarg on the process root, the \p scallback function is called with the argument \p sarg on any other process. */ int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg); /** @} */ /** @name Dynamic Broadcasts \anchor MPIDynamicBroadcasts @{ */ /** Enable or disable coop sends.
    Used for benchmark, not recommended for production: can cause problems if there are still communications while disabling, or when shutting down StarPU.
    This function must be called after the initialization of StarPU. */ void starpu_mpi_coop_sends_set_use(int use_coop_sends); /** Return whether coop sends are enabled or not. */ int starpu_mpi_coop_sends_get_use(void); /** Explicit the number of different sends of the \p data_handle. When the number of sends is reached, a collective operation is triggered. If this function isn't called, StarPU will trigger a collective operation containing only posted sends while the data wasn't available. */ void starpu_mpi_coop_sends_data_handle_nb_sends(starpu_data_handle_t data_handle, int nb_sends); /** @} */ /** @name Statistics \anchor MPIStats @{ */ /** Disable the aggregation of communications statistics. */ void starpu_mpi_comm_stats_disable(void); /** Enable the aggregation of communications statistics. */ void starpu_mpi_comm_stats_enable(void); /** Retrieve the current communications statistics from the current node in the array \p comm_stats which must have a size greater or equal to the world size. Communications statistics must have been enabled, either through the function starpu_mpi_comm_stats_enable() or through the environment variable \ref STARPU_MPI_STATS. */ void starpu_mpi_comm_stats_retrieve(size_t *comm_stats); /** @} */ /** @name Miscellaneous \anchor MPIMisc @{ */ int starpu_mpi_pre_submit_hook_register(void (*f)(struct starpu_task *)); int starpu_mpi_pre_submit_hook_unregister(void); /** Copy the content of \p src_handle into \p dst_handle. If both data are on the same node, the function starpu_data_cpy() is called, otherwise a MPI transfer is initiated between both nodes. The parameter \p asynchronous indicates whether the function should block or not. If \p callback_func is not NULL, this callback function is executed on the owner node of the data \p dst_handle after the handle has been received, and it is given the pointer \p callback_arg as argument. See \ref MPITaskUtility for more details. */ int starpu_mpi_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg); /** Similar to starpu_mpi_data_cpy(), but take a priority \p prio. */ int starpu_mpi_data_cpy_priority(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg, int priority); /** @} */ /** @name Data Tags Management \anchor MPITags @{ */ /** Book a range of unique tags of size \p nbtags to be used to register StarPU data handles. This function returns the minimal tag value available \c mintag to allow the registration of data with tags in the continuous range [[ \c mintag, \c mintag + \p nbtags ]] Note that this function must be called by all MPI processes involved in the computations with the same parameters and in the exact same order to make sure the tags are identical from one node to another. */ int64_t starpu_mpi_tags_allocate(int64_t nbtags); /** Release the range of tags starting by the given \p mintag value. The mintag value must be a value obtained through a call to starpu_mpi_tags_allocate(). Note that this function must be called by all MPI processes involved in the computations with the same parameters and in the exact same order to make sure the tags are identical from one node to another as for starpu_mpi_tags_allocate(). */ void starpu_mpi_tags_free(int64_t mintag); /** @} */ #ifdef __cplusplus } #endif #endif // STARPU_USE_MPI #endif // __STARPU_MPI_H__ starpu-1.4.9+dfsg/mpi/include/starpu_mpi_ft.h000066400000000000000000000150371507764646700212530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_FT_H__ #define __STARPU_MPI_FT_H__ #include #ifdef __cplusplus extern "C" { #endif struct _starpu_mpi_checkpoint_template; typedef struct _starpu_mpi_checkpoint_template *starpu_mpi_checkpoint_template_t; #if defined(STARPU_USE_MPI_FT) /** @defgroup API_MPI_FT_Support MPI Fault Tolerance Support @{ */ /** Initialise the checkpoint mechanism */ int starpu_mpi_checkpoint_init(void); /** Shutdown the checkpoint mechanism */ int starpu_mpi_checkpoint_shutdown(void); /** * Wrapped function to register a checkpoint template \p cp_template with the given arguments. * It is then ready to use with ::starpu_mpi_checkpoint_template_submit() during the program execution. * This command executes ::starpu_mpi_checkpoint_template_create(), adds the given checkpoint entry and freezes the * checkpoint, and therefore can no longer be modified. * A unique checkpoint id \p cp_id is requested from the user in order to create several templates and to * match with a corresponding ::starpu_mpi_init_from_checkpoint() (not implemented yet). * * The arguments following the \p cp_template and the \p cp_id can be of the following types: *
      *
    • ::STARPU_R followed by a data handle and the backup rank; *
    • ::STARPU_DATA_ARRAY followed by an array of data handles, * its number of elements and a backup rank (non functional); *
    • ::STARPU_VALUE followed by a pointer to the unregistered value, * its size in bytes, a unique tag (as the ones given for data handle registering) * and the function giving the back up rank of the rank argument : int(backup_of)(int) . *
    • The argument list must be ended by the value 0. *
    */ int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t *cp_template, int cp_id, int cp_domain, ...); /** * Create a new checkpoint template. A unique checkpoint id \p cp_id is requested from * the user in order to create several templates and to * match with a corresponding ::starpu_mpi_init_from_checkpoint() (not implemented yet). * Note a template must be frozen with ::starpu_mpi_checkpoint_template_freeze() in order to use it * with ::starpu_mpi_checkpoint_template_submit(). */ int starpu_mpi_checkpoint_template_create(starpu_mpi_checkpoint_template_t *cp_template, int cp_id, int cp_domain); /** * Add a single entry to a checkpoint template previously created with ::starpu_mpi_checkpoint_template_create(). * As many entries can be added to a template with as many argument to a single function call, or with as many * calls to this function. * Once all the entry added, the * template must be frozen before using ::starpu_mpi_checkpoint_template_submit(). * * The arguments following the \p cp_template can be of the following types: *
      *
    • ::STARPU_R followed by a data handle and the backup rank; *
    • (non functional) ::STARPU_DATA_ARRAY followed by an array of data handles, * its number of elements and a backup rank (non functional); *
    • ::STARPU_VALUE followed by a pointer to the unregistered value, * its size in bytes, a unique tag (as the ones given for data handle registering) * and the function giving the back up rank of the rank argument : int(backup_of)(int) . *
    • The argument list must be ended by the value 0. *
    */ int starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t *cp_template, ...); /** * Freeze the given template. * A frozen template can no longer be modified with ::starpu_mpi_checkpoint_template_add_entry(). * A template must be frozen before using ::starpu_mpi_checkpoint_template_submit(). */ int starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t *cp_template); /** * Submit the checkpoint to StarPU, and can be seen as a cut in the task graph. StarPU will save the data as currently * described in the submission. Note that the data external to StarPu (::STARPU_VALUE) will be saved with the current value * at submission time (when ::starpu_mpi_checkpoint_template_submit() is called). * The data internal to StarPU (aka handles given with ::STARPU_R) will be saved with their value at * execution time (when the task submitted before the ::starpu_mpi_checkpoint_template_submit() have been executed, * and before this data is modified by the tasks submitted after the ::starpu_mpi_checkpoint_template_submit()) */ int starpu_mpi_checkpoint_template_submit(starpu_mpi_checkpoint_template_t cp_template, int prio); int starpu_mpi_checkpoint_template_print(starpu_mpi_checkpoint_template_t cp_template); #else // !STARPU_USE_MPI_FT static inline int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t *cp_template STARPU_ATTRIBUTE_UNUSED, int cp_id STARPU_ATTRIBUTE_UNUSED, int cp_domain STARPU_ATTRIBUTE_UNUSED, ...) { return 0; } static inline int starpu_mpi_checkpoint_template_create(starpu_mpi_checkpoint_template_t *cp_template STARPU_ATTRIBUTE_UNUSED, int cp_id STARPU_ATTRIBUTE_UNUSED, int cp_domain STARPU_ATTRIBUTE_UNUSED) { return 0; } static inline int starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t *cp_template STARPU_ATTRIBUTE_UNUSED, ...) { return 0; } static inline int starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t *cp_template STARPU_ATTRIBUTE_UNUSED) { return 0; } static inline int starpu_mpi_checkpoint_template_submit(starpu_mpi_checkpoint_template_t cp_template STARPU_ATTRIBUTE_UNUSED, int prio STARPU_ATTRIBUTE_UNUSED) { return 0; } static inline int starpu_mpi_ft_turn_on(void) { return 0; } static inline int starpu_mpi_ft_turn_off(void) { return 0; } static inline int starpu_mpi_checkpoint_template_print(starpu_mpi_checkpoint_template_t cp_template STARPU_ATTRIBUTE_UNUSED) { return 0; } static inline int starpu_mpi_checkpoint_init(void) { return 0; } static inline int starpu_mpi_checkpoint_shutdown(void) { return 0; } /** @} */ #endif // STARPU_USE_MPI_FT #ifdef __cplusplus } #endif #endif // __STARPU_MPI_FT_H__ starpu-1.4.9+dfsg/mpi/include/starpu_mpi_lb.h000066400000000000000000000024511507764646700212330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_LOAD_BALANCER_H__ #define __STARPU_MPI_LOAD_BALANCER_H__ #include #ifdef __cplusplus extern "C" { #endif /** todo */ struct starpu_mpi_lb_conf { void (*get_neighbors)(int **neighbor_ids, int *nneighbors); void (*get_data_unit_to_migrate)(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node); }; /** Initialize the load balancer's environment with the load policy provided by the user */ void starpu_mpi_lb_init(const char *lb_policy_name, struct starpu_mpi_lb_conf *); void starpu_mpi_lb_shutdown(void); #ifdef __cplusplus } #endif #endif // __STARPU_MPI_LOAD_BALANCER_H__ starpu-1.4.9+dfsg/mpi/packages/000077500000000000000000000000001507764646700163535ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/packages/libstarpumpi.pc.in000066400000000000000000000021211507764646700220130ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpumpi Description: offers MPI support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ -DSTARPU_USE_DEPRECATED_API Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ Requires: libstarpu Requires.private: starpu-1.4.9+dfsg/mpi/packages/starpumpi-1.0.pc.in000066400000000000000000000020661507764646700216300ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpumpi Description: offers MPI support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ Requires: starpu-1.0 Requires.private: starpu-1.4.9+dfsg/mpi/packages/starpumpi-1.1.pc.in000066400000000000000000000020661507764646700216310ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpumpi Description: offers MPI support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ Requires: starpu-1.1 Requires.private: starpu-1.4.9+dfsg/mpi/packages/starpumpi-1.2.pc.in000066400000000000000000000020661507764646700216320ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpumpi Description: offers MPI support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ Requires: starpu-1.2 Requires.private: starpu-1.4.9+dfsg/mpi/packages/starpumpi-1.3.pc.in000066400000000000000000000020661507764646700216330ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpumpi Description: offers MPI support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ Requires: starpu-1.3 Requires.private: starpu-1.4.9+dfsg/mpi/packages/starpumpi-1.4.pc.in000066400000000000000000000020661507764646700216340ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpumpi Description: offers MPI support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Libs: -L${libdir} -lstarpumpi-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_EXPORTED_LIBS@ Requires: starpu-1.4 Requires.private: starpu-1.4.9+dfsg/mpi/src/000077500000000000000000000000001507764646700153645ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/src/Makefile.am000066400000000000000000000122541507764646700174240ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk CC=$(MPICC) CCLD=$(MPICC) BUILT_SOURCES = SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CFLAGS += $(FXT_CFLAGS) $(NMAD_CFLAGS) $(MPI_SYNC_CLOCKS_CFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src $(STARPU_H_CPPFLAGS) -DBUILDING_STARPU LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(FXT_LDFLAGS) $(FXT_LIBS) LIBS += $(NMAD_LDFLAGS) $(NMAD_LIBS) LIBS += $(MPICC_LDFLAGS) LIBS += $(MPI_SYNC_CLOCKS_LIBS) ldflags = if STARPU_HAVE_WINDOWS LC_MESSAGES=C export LC_MESSAGES ldflags += -Xlinker --output-def -Xlinker .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def if STARPU_HAVE_MS_LIB .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib: libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la dolib ./dolib "$(STARPU_MS_LIB)" $(STARPU_MS_LIB_ARCH) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def @STARPU_EFFECTIVE_VERSION@ $(libstarpumpi_so_version) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib all-local: .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib endif STARPU_HAVE_MS_LIB install-exec-hook: $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def $(DESTDIR)$(libdir) if STARPU_HAVE_MS_LIB $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib $(DESTDIR)$(libdir) $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.exp $(DESTDIR)$(libdir) endif STARPU_HAVE_MS_LIB endif STARPU_HAVE_WINDOWS lib_LTLIBRARIES = libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ -version-info $(LIBSTARPUMPI_INTERFACE_CURRENT):$(LIBSTARPUMPI_INTERFACE_REVISION):$(LIBSTARPUMPI_INTERFACE_AGE) noinst_HEADERS = \ starpu_mpi_private.h \ starpu_mpi_fxt.h \ starpu_mpi_stats.h \ starpu_mpi_datatype.h \ starpu_mpi_cache.h \ starpu_mpi_select_node.h \ starpu_mpi_cache_stats.h \ starpu_mpi_task_insert.h \ starpu_mpi_init.h \ nmad/starpu_mpi_nmad_coop.h \ mpi/starpu_mpi_mpi.h \ mpi/starpu_mpi_early_data.h \ mpi/starpu_mpi_early_request.h \ mpi/starpu_mpi_sync_data.h \ mpi/starpu_mpi_comm.h \ mpi/starpu_mpi_tag.h \ mpi/starpu_mpi_driver.h \ mpi/starpu_mpi_mpi_backend.h \ nmad/starpu_mpi_nmad_backend.h \ nmad/starpu_mpi_nmad_unknown_datatype.h \ nmad/starpu_mpi_nmad.h \ load_balancer/policy/data_movements_interface.h \ load_balancer/policy/load_data_interface.h \ load_balancer/policy/load_balancer_policy.h if STARPU_USE_MPI_FT noinst_HEADERS += \ mpi_failure_tolerance/starpu_mpi_ft.h \ mpi_failure_tolerance/starpu_mpi_checkpoint.h \ mpi_failure_tolerance/starpu_mpi_checkpoint_template.h \ mpi_failure_tolerance/starpu_mpi_ft_service_comms.h \ mpi_failure_tolerance/starpu_mpi_checkpoint_package.h \ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h \ mpi_failure_tolerance/starpu_mpi_ft_stats.h endif STARPU_USE_MPI_FT libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ starpu_mpi.c \ starpu_mpi_req.c \ starpu_mpi_coop_sends.c \ starpu_mpi_helper.c \ starpu_mpi_datatype.c \ starpu_mpi_task_insert.c \ starpu_mpi_collective.c \ starpu_mpi_stats.c \ starpu_mpi_private.c \ starpu_mpi_cache.c \ starpu_mpi_select_node.c \ starpu_mpi_cache_stats.c \ starpu_mpi_fortran.c \ starpu_mpi_task_insert_fortran.c \ starpu_mpi_init.c \ starpu_mpi_tags.c \ nmad/starpu_mpi_nmad_coop.c \ nmad/starpu_mpi_nmad_unknown_datatype.c \ nmad/starpu_mpi_nmad.c \ nmad/starpu_mpi_nmad_backend.c \ mpi/starpu_mpi_mpi.c \ mpi/starpu_mpi_mpi_backend.c \ mpi/starpu_mpi_early_data.c \ mpi/starpu_mpi_early_request.c \ mpi/starpu_mpi_sync_data.c \ mpi/starpu_mpi_comm.c \ mpi/starpu_mpi_tag.c \ load_balancer/policy/data_movements_interface.c \ load_balancer/policy/load_data_interface.c \ load_balancer/policy/load_heat_propagation.c \ load_balancer/load_balancer.c if STARPU_USE_MPI_FT libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += \ mpi_failure_tolerance/starpu_mpi_ft.c \ mpi_failure_tolerance/starpu_mpi_checkpoint.c \ mpi_failure_tolerance/starpu_mpi_checkpoint_template.c \ mpi_failure_tolerance/starpu_mpi_ft_service_comms.c \ mpi_failure_tolerance/starpu_mpi_checkpoint_package.c \ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c \ mpi_failure_tolerance/starpu_mpi_ft_stats.c endif STARPU_USE_MPI_FT if STARPU_USE_FXT libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += \ starpu_mpi_fxt.c endif starpu-1.4.9+dfsg/mpi/src/Makefile.in000066400000000000000000001677211507764646700174470ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ @STARPU_HAVE_WINDOWS_TRUE@am__append_3 = -Xlinker --output-def -Xlinker .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def @STARPU_USE_MPI_FT_TRUE@am__append_4 = \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft.h \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint.h \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_template.h \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_service_comms.h \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_package.h \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_stats.h @STARPU_USE_MPI_FT_TRUE@am__append_5 = \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft.c \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint.c \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_template.c \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_service_comms.c \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_package.c \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_stats.c @STARPU_USE_FXT_TRUE@am__append_6 = \ @STARPU_USE_FXT_TRUE@ starpu_mpi_fxt.c subdir = mpi/src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(libdir)" LTLIBRARIES = $(lib_LTLIBRARIES) libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = am__libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST = \ starpu_mpi.c starpu_mpi_req.c starpu_mpi_coop_sends.c \ starpu_mpi_helper.c starpu_mpi_datatype.c \ starpu_mpi_task_insert.c starpu_mpi_collective.c \ starpu_mpi_stats.c starpu_mpi_private.c starpu_mpi_cache.c \ starpu_mpi_select_node.c starpu_mpi_cache_stats.c \ starpu_mpi_fortran.c starpu_mpi_task_insert_fortran.c \ starpu_mpi_init.c starpu_mpi_tags.c \ nmad/starpu_mpi_nmad_coop.c \ nmad/starpu_mpi_nmad_unknown_datatype.c nmad/starpu_mpi_nmad.c \ nmad/starpu_mpi_nmad_backend.c mpi/starpu_mpi_mpi.c \ mpi/starpu_mpi_mpi_backend.c mpi/starpu_mpi_early_data.c \ mpi/starpu_mpi_early_request.c mpi/starpu_mpi_sync_data.c \ mpi/starpu_mpi_comm.c mpi/starpu_mpi_tag.c \ load_balancer/policy/data_movements_interface.c \ load_balancer/policy/load_data_interface.c \ load_balancer/policy/load_heat_propagation.c \ load_balancer/load_balancer.c \ mpi_failure_tolerance/starpu_mpi_ft.c \ mpi_failure_tolerance/starpu_mpi_checkpoint.c \ mpi_failure_tolerance/starpu_mpi_checkpoint_template.c \ mpi_failure_tolerance/starpu_mpi_ft_service_comms.c \ mpi_failure_tolerance/starpu_mpi_checkpoint_package.c \ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c \ mpi_failure_tolerance/starpu_mpi_ft_stats.c starpu_mpi_fxt.c am__dirstamp = $(am__leading_dot)dirstamp @STARPU_USE_MPI_FT_TRUE@am__objects_1 = mpi_failure_tolerance/starpu_mpi_ft.lo \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint.lo \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_template.lo \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_service_comms.lo \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_package.lo \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.lo \ @STARPU_USE_MPI_FT_TRUE@ mpi_failure_tolerance/starpu_mpi_ft_stats.lo @STARPU_USE_FXT_TRUE@am__objects_2 = starpu_mpi_fxt.lo am_libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = starpu_mpi.lo \ starpu_mpi_req.lo starpu_mpi_coop_sends.lo \ starpu_mpi_helper.lo starpu_mpi_datatype.lo \ starpu_mpi_task_insert.lo starpu_mpi_collective.lo \ starpu_mpi_stats.lo starpu_mpi_private.lo starpu_mpi_cache.lo \ starpu_mpi_select_node.lo starpu_mpi_cache_stats.lo \ starpu_mpi_fortran.lo starpu_mpi_task_insert_fortran.lo \ starpu_mpi_init.lo starpu_mpi_tags.lo \ nmad/starpu_mpi_nmad_coop.lo \ nmad/starpu_mpi_nmad_unknown_datatype.lo \ nmad/starpu_mpi_nmad.lo nmad/starpu_mpi_nmad_backend.lo \ mpi/starpu_mpi_mpi.lo mpi/starpu_mpi_mpi_backend.lo \ mpi/starpu_mpi_early_data.lo mpi/starpu_mpi_early_request.lo \ mpi/starpu_mpi_sync_data.lo mpi/starpu_mpi_comm.lo \ mpi/starpu_mpi_tag.lo \ load_balancer/policy/data_movements_interface.lo \ load_balancer/policy/load_data_interface.lo \ load_balancer/policy/load_heat_propagation.lo \ load_balancer/load_balancer.lo $(am__objects_1) \ $(am__objects_2) libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ $(am_libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) \ $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ $(LDFLAGS) -o $@ AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/starpu_mpi.Plo \ ./$(DEPDIR)/starpu_mpi_cache.Plo \ ./$(DEPDIR)/starpu_mpi_cache_stats.Plo \ ./$(DEPDIR)/starpu_mpi_collective.Plo \ ./$(DEPDIR)/starpu_mpi_coop_sends.Plo \ ./$(DEPDIR)/starpu_mpi_datatype.Plo \ ./$(DEPDIR)/starpu_mpi_fortran.Plo \ ./$(DEPDIR)/starpu_mpi_fxt.Plo \ ./$(DEPDIR)/starpu_mpi_helper.Plo \ ./$(DEPDIR)/starpu_mpi_init.Plo \ ./$(DEPDIR)/starpu_mpi_private.Plo \ ./$(DEPDIR)/starpu_mpi_req.Plo \ ./$(DEPDIR)/starpu_mpi_select_node.Plo \ ./$(DEPDIR)/starpu_mpi_stats.Plo \ ./$(DEPDIR)/starpu_mpi_tags.Plo \ ./$(DEPDIR)/starpu_mpi_task_insert.Plo \ ./$(DEPDIR)/starpu_mpi_task_insert_fortran.Plo \ load_balancer/$(DEPDIR)/load_balancer.Plo \ load_balancer/policy/$(DEPDIR)/data_movements_interface.Plo \ load_balancer/policy/$(DEPDIR)/load_data_interface.Plo \ load_balancer/policy/$(DEPDIR)/load_heat_propagation.Plo \ mpi/$(DEPDIR)/starpu_mpi_comm.Plo \ mpi/$(DEPDIR)/starpu_mpi_early_data.Plo \ mpi/$(DEPDIR)/starpu_mpi_early_request.Plo \ mpi/$(DEPDIR)/starpu_mpi_mpi.Plo \ mpi/$(DEPDIR)/starpu_mpi_mpi_backend.Plo \ mpi/$(DEPDIR)/starpu_mpi_sync_data.Plo \ mpi/$(DEPDIR)/starpu_mpi_tag.Plo \ mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint.Plo \ mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_package.Plo \ mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_template.Plo \ mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_tracker.Plo \ mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft.Plo \ mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_service_comms.Plo \ mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_stats.Plo \ nmad/$(DEPDIR)/starpu_mpi_nmad.Plo \ nmad/$(DEPDIR)/starpu_mpi_nmad_backend.Plo \ nmad/$(DEPDIR)/starpu_mpi_nmad_coop.Plo \ nmad/$(DEPDIR)/starpu_mpi_nmad_unknown_datatype.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) DIST_SOURCES = $(am__libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST) RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__noinst_HEADERS_DIST = starpu_mpi_private.h starpu_mpi_fxt.h \ starpu_mpi_stats.h starpu_mpi_datatype.h starpu_mpi_cache.h \ starpu_mpi_select_node.h starpu_mpi_cache_stats.h \ starpu_mpi_task_insert.h starpu_mpi_init.h \ nmad/starpu_mpi_nmad_coop.h mpi/starpu_mpi_mpi.h \ mpi/starpu_mpi_early_data.h mpi/starpu_mpi_early_request.h \ mpi/starpu_mpi_sync_data.h mpi/starpu_mpi_comm.h \ mpi/starpu_mpi_tag.h mpi/starpu_mpi_driver.h \ mpi/starpu_mpi_mpi_backend.h nmad/starpu_mpi_nmad_backend.h \ nmad/starpu_mpi_nmad_unknown_datatype.h nmad/starpu_mpi_nmad.h \ load_balancer/policy/data_movements_interface.h \ load_balancer/policy/load_data_interface.h \ load_balancer/policy/load_balancer_policy.h \ mpi_failure_tolerance/starpu_mpi_ft.h \ mpi_failure_tolerance/starpu_mpi_checkpoint.h \ mpi_failure_tolerance/starpu_mpi_checkpoint_template.h \ mpi_failure_tolerance/starpu_mpi_ft_service_comms.h \ mpi_failure_tolerance/starpu_mpi_checkpoint_package.h \ mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h \ mpi_failure_tolerance/starpu_mpi_ft_stats.h HEADERS = $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = $(MPICC) CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) $(FXT_LDFLAGS) $(FXT_LIBS) \ $(NMAD_LDFLAGS) $(NMAD_LIBS) $(MPICC_LDFLAGS) \ $(MPI_SYNC_CLOCKS_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(FXT_CFLAGS) $(NMAD_CFLAGS) \ $(MPI_SYNC_CLOCKS_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) CCLD = $(MPICC) BUILT_SOURCES = SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src $(STARPU_H_CPPFLAGS) -DBUILDING_STARPU ldflags = $(am__append_3) @STARPU_HAVE_WINDOWS_TRUE@LC_MESSAGES = C lib_LTLIBRARIES = libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ -version-info $(LIBSTARPUMPI_INTERFACE_CURRENT):$(LIBSTARPUMPI_INTERFACE_REVISION):$(LIBSTARPUMPI_INTERFACE_AGE) noinst_HEADERS = starpu_mpi_private.h starpu_mpi_fxt.h \ starpu_mpi_stats.h starpu_mpi_datatype.h starpu_mpi_cache.h \ starpu_mpi_select_node.h starpu_mpi_cache_stats.h \ starpu_mpi_task_insert.h starpu_mpi_init.h \ nmad/starpu_mpi_nmad_coop.h mpi/starpu_mpi_mpi.h \ mpi/starpu_mpi_early_data.h mpi/starpu_mpi_early_request.h \ mpi/starpu_mpi_sync_data.h mpi/starpu_mpi_comm.h \ mpi/starpu_mpi_tag.h mpi/starpu_mpi_driver.h \ mpi/starpu_mpi_mpi_backend.h nmad/starpu_mpi_nmad_backend.h \ nmad/starpu_mpi_nmad_unknown_datatype.h nmad/starpu_mpi_nmad.h \ load_balancer/policy/data_movements_interface.h \ load_balancer/policy/load_data_interface.h \ load_balancer/policy/load_balancer_policy.h $(am__append_4) libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpu_mpi.c \ starpu_mpi_req.c starpu_mpi_coop_sends.c starpu_mpi_helper.c \ starpu_mpi_datatype.c starpu_mpi_task_insert.c \ starpu_mpi_collective.c starpu_mpi_stats.c \ starpu_mpi_private.c starpu_mpi_cache.c \ starpu_mpi_select_node.c starpu_mpi_cache_stats.c \ starpu_mpi_fortran.c starpu_mpi_task_insert_fortran.c \ starpu_mpi_init.c starpu_mpi_tags.c \ nmad/starpu_mpi_nmad_coop.c \ nmad/starpu_mpi_nmad_unknown_datatype.c nmad/starpu_mpi_nmad.c \ nmad/starpu_mpi_nmad_backend.c mpi/starpu_mpi_mpi.c \ mpi/starpu_mpi_mpi_backend.c mpi/starpu_mpi_early_data.c \ mpi/starpu_mpi_early_request.c mpi/starpu_mpi_sync_data.c \ mpi/starpu_mpi_comm.c mpi/starpu_mpi_tag.c \ load_balancer/policy/data_movements_interface.c \ load_balancer/policy/load_data_interface.c \ load_balancer/policy/load_heat_propagation.c \ load_balancer/load_balancer.c $(am__append_5) $(am__append_6) all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign mpi/src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; \ locs=`for p in $$list; do echo $$p; done | \ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ sort -u`; \ test -z "$$locs" || { \ echo rm -f $${locs}; \ rm -f $${locs}; \ } nmad/$(am__dirstamp): @$(MKDIR_P) nmad @: > nmad/$(am__dirstamp) nmad/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) nmad/$(DEPDIR) @: > nmad/$(DEPDIR)/$(am__dirstamp) nmad/starpu_mpi_nmad_coop.lo: nmad/$(am__dirstamp) \ nmad/$(DEPDIR)/$(am__dirstamp) nmad/starpu_mpi_nmad_unknown_datatype.lo: nmad/$(am__dirstamp) \ nmad/$(DEPDIR)/$(am__dirstamp) nmad/starpu_mpi_nmad.lo: nmad/$(am__dirstamp) \ nmad/$(DEPDIR)/$(am__dirstamp) nmad/starpu_mpi_nmad_backend.lo: nmad/$(am__dirstamp) \ nmad/$(DEPDIR)/$(am__dirstamp) mpi/$(am__dirstamp): @$(MKDIR_P) mpi @: > mpi/$(am__dirstamp) mpi/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mpi/$(DEPDIR) @: > mpi/$(DEPDIR)/$(am__dirstamp) mpi/starpu_mpi_mpi.lo: mpi/$(am__dirstamp) \ mpi/$(DEPDIR)/$(am__dirstamp) mpi/starpu_mpi_mpi_backend.lo: mpi/$(am__dirstamp) \ mpi/$(DEPDIR)/$(am__dirstamp) mpi/starpu_mpi_early_data.lo: mpi/$(am__dirstamp) \ mpi/$(DEPDIR)/$(am__dirstamp) mpi/starpu_mpi_early_request.lo: mpi/$(am__dirstamp) \ mpi/$(DEPDIR)/$(am__dirstamp) mpi/starpu_mpi_sync_data.lo: mpi/$(am__dirstamp) \ mpi/$(DEPDIR)/$(am__dirstamp) mpi/starpu_mpi_comm.lo: mpi/$(am__dirstamp) \ mpi/$(DEPDIR)/$(am__dirstamp) mpi/starpu_mpi_tag.lo: mpi/$(am__dirstamp) \ mpi/$(DEPDIR)/$(am__dirstamp) load_balancer/policy/$(am__dirstamp): @$(MKDIR_P) load_balancer/policy @: > load_balancer/policy/$(am__dirstamp) load_balancer/policy/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) load_balancer/policy/$(DEPDIR) @: > load_balancer/policy/$(DEPDIR)/$(am__dirstamp) load_balancer/policy/data_movements_interface.lo: \ load_balancer/policy/$(am__dirstamp) \ load_balancer/policy/$(DEPDIR)/$(am__dirstamp) load_balancer/policy/load_data_interface.lo: \ load_balancer/policy/$(am__dirstamp) \ load_balancer/policy/$(DEPDIR)/$(am__dirstamp) load_balancer/policy/load_heat_propagation.lo: \ load_balancer/policy/$(am__dirstamp) \ load_balancer/policy/$(DEPDIR)/$(am__dirstamp) load_balancer/$(am__dirstamp): @$(MKDIR_P) load_balancer @: > load_balancer/$(am__dirstamp) load_balancer/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) load_balancer/$(DEPDIR) @: > load_balancer/$(DEPDIR)/$(am__dirstamp) load_balancer/load_balancer.lo: load_balancer/$(am__dirstamp) \ load_balancer/$(DEPDIR)/$(am__dirstamp) mpi_failure_tolerance/$(am__dirstamp): @$(MKDIR_P) mpi_failure_tolerance @: > mpi_failure_tolerance/$(am__dirstamp) mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mpi_failure_tolerance/$(DEPDIR) @: > mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) mpi_failure_tolerance/starpu_mpi_ft.lo: \ mpi_failure_tolerance/$(am__dirstamp) \ mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) mpi_failure_tolerance/starpu_mpi_checkpoint.lo: \ mpi_failure_tolerance/$(am__dirstamp) \ mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) mpi_failure_tolerance/starpu_mpi_checkpoint_template.lo: \ mpi_failure_tolerance/$(am__dirstamp) \ mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) mpi_failure_tolerance/starpu_mpi_ft_service_comms.lo: \ mpi_failure_tolerance/$(am__dirstamp) \ mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) mpi_failure_tolerance/starpu_mpi_checkpoint_package.lo: \ mpi_failure_tolerance/$(am__dirstamp) \ mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.lo: \ mpi_failure_tolerance/$(am__dirstamp) \ mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) mpi_failure_tolerance/starpu_mpi_ft_stats.lo: \ mpi_failure_tolerance/$(am__dirstamp) \ mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(AM_V_CCLD)$(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f load_balancer/*.$(OBJEXT) -rm -f load_balancer/*.lo -rm -f load_balancer/policy/*.$(OBJEXT) -rm -f load_balancer/policy/*.lo -rm -f mpi/*.$(OBJEXT) -rm -f mpi/*.lo -rm -f mpi_failure_tolerance/*.$(OBJEXT) -rm -f mpi_failure_tolerance/*.lo -rm -f nmad/*.$(OBJEXT) -rm -f nmad/*.lo distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_cache.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_cache_stats.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_collective.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_coop_sends.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_datatype.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_fortran.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_fxt.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_helper.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_init.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_private.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_req.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_select_node.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_stats.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_tags.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_task_insert.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_mpi_task_insert_fortran.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@load_balancer/$(DEPDIR)/load_balancer.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@load_balancer/policy/$(DEPDIR)/data_movements_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@load_balancer/policy/$(DEPDIR)/load_data_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@load_balancer/policy/$(DEPDIR)/load_heat_propagation.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_comm.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_early_data.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_early_request.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_mpi.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_mpi_backend.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_sync_data.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi/$(DEPDIR)/starpu_mpi_tag.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_package.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_template.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_tracker.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_service_comms.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_stats.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@nmad/$(DEPDIR)/starpu_mpi_nmad.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@nmad/$(DEPDIR)/starpu_mpi_nmad_backend.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@nmad/$(DEPDIR)/starpu_mpi_nmad_coop.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@nmad/$(DEPDIR)/starpu_mpi_nmad_unknown_datatype.Plo@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf load_balancer/.libs load_balancer/_libs -rm -rf load_balancer/policy/.libs load_balancer/policy/_libs -rm -rf mpi/.libs mpi/_libs -rm -rf mpi_failure_tolerance/.libs mpi_failure_tolerance/_libs -rm -rf nmad/.libs nmad/_libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-recursive @STARPU_HAVE_MS_LIB_FALSE@all-local: @STARPU_HAVE_WINDOWS_FALSE@all-local: all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-recursive install-exec: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f load_balancer/$(DEPDIR)/$(am__dirstamp) -rm -f load_balancer/$(am__dirstamp) -rm -f load_balancer/policy/$(DEPDIR)/$(am__dirstamp) -rm -f load_balancer/policy/$(am__dirstamp) -rm -f mpi/$(DEPDIR)/$(am__dirstamp) -rm -f mpi/$(am__dirstamp) -rm -f mpi_failure_tolerance/$(DEPDIR)/$(am__dirstamp) -rm -f mpi_failure_tolerance/$(am__dirstamp) -rm -f nmad/$(DEPDIR)/$(am__dirstamp) -rm -f nmad/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) @STARPU_HAVE_WINDOWS_FALSE@install-exec-hook: clean: clean-recursive clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/starpu_mpi.Plo -rm -f ./$(DEPDIR)/starpu_mpi_cache.Plo -rm -f ./$(DEPDIR)/starpu_mpi_cache_stats.Plo -rm -f ./$(DEPDIR)/starpu_mpi_collective.Plo -rm -f ./$(DEPDIR)/starpu_mpi_coop_sends.Plo -rm -f ./$(DEPDIR)/starpu_mpi_datatype.Plo -rm -f ./$(DEPDIR)/starpu_mpi_fortran.Plo -rm -f ./$(DEPDIR)/starpu_mpi_fxt.Plo -rm -f ./$(DEPDIR)/starpu_mpi_helper.Plo -rm -f ./$(DEPDIR)/starpu_mpi_init.Plo -rm -f ./$(DEPDIR)/starpu_mpi_private.Plo -rm -f ./$(DEPDIR)/starpu_mpi_req.Plo -rm -f ./$(DEPDIR)/starpu_mpi_select_node.Plo -rm -f ./$(DEPDIR)/starpu_mpi_stats.Plo -rm -f ./$(DEPDIR)/starpu_mpi_tags.Plo -rm -f ./$(DEPDIR)/starpu_mpi_task_insert.Plo -rm -f ./$(DEPDIR)/starpu_mpi_task_insert_fortran.Plo -rm -f load_balancer/$(DEPDIR)/load_balancer.Plo -rm -f load_balancer/policy/$(DEPDIR)/data_movements_interface.Plo -rm -f load_balancer/policy/$(DEPDIR)/load_data_interface.Plo -rm -f load_balancer/policy/$(DEPDIR)/load_heat_propagation.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_comm.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_early_data.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_early_request.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_mpi.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_mpi_backend.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_sync_data.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_tag.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_package.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_template.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_tracker.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_service_comms.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_stats.Plo -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad.Plo -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_backend.Plo -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_coop.Plo -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_unknown_datatype.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-libLTLIBRARIES @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/starpu_mpi.Plo -rm -f ./$(DEPDIR)/starpu_mpi_cache.Plo -rm -f ./$(DEPDIR)/starpu_mpi_cache_stats.Plo -rm -f ./$(DEPDIR)/starpu_mpi_collective.Plo -rm -f ./$(DEPDIR)/starpu_mpi_coop_sends.Plo -rm -f ./$(DEPDIR)/starpu_mpi_datatype.Plo -rm -f ./$(DEPDIR)/starpu_mpi_fortran.Plo -rm -f ./$(DEPDIR)/starpu_mpi_fxt.Plo -rm -f ./$(DEPDIR)/starpu_mpi_helper.Plo -rm -f ./$(DEPDIR)/starpu_mpi_init.Plo -rm -f ./$(DEPDIR)/starpu_mpi_private.Plo -rm -f ./$(DEPDIR)/starpu_mpi_req.Plo -rm -f ./$(DEPDIR)/starpu_mpi_select_node.Plo -rm -f ./$(DEPDIR)/starpu_mpi_stats.Plo -rm -f ./$(DEPDIR)/starpu_mpi_tags.Plo -rm -f ./$(DEPDIR)/starpu_mpi_task_insert.Plo -rm -f ./$(DEPDIR)/starpu_mpi_task_insert_fortran.Plo -rm -f load_balancer/$(DEPDIR)/load_balancer.Plo -rm -f load_balancer/policy/$(DEPDIR)/data_movements_interface.Plo -rm -f load_balancer/policy/$(DEPDIR)/load_data_interface.Plo -rm -f load_balancer/policy/$(DEPDIR)/load_heat_propagation.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_comm.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_early_data.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_early_request.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_mpi.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_mpi_backend.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_sync_data.Plo -rm -f mpi/$(DEPDIR)/starpu_mpi_tag.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_package.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_template.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_checkpoint_tracker.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_service_comms.Plo -rm -f mpi_failure_tolerance/$(DEPDIR)/starpu_mpi_ft_stats.Plo -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad.Plo -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_backend.Plo -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_coop.Plo -rm -f nmad/$(DEPDIR)/starpu_mpi_nmad_unknown_datatype.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-libLTLIBRARIES .MAKE: $(am__recursive_targets) all check install install-am \ install-exec install-exec-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ am--depfiles check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-exec-hook install-html install-html-am \ install-info install-info-am install-libLTLIBRARIES \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null @STARPU_HAVE_WINDOWS_TRUE@export LC_MESSAGES @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@.libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib: libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la dolib @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ ./dolib "$(STARPU_MS_LIB)" $(STARPU_MS_LIB_ARCH) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def @STARPU_EFFECTIVE_VERSION@ $(libstarpumpi_so_version) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@all-local: .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib @STARPU_HAVE_WINDOWS_TRUE@install-exec-hook: @STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.def $(DESTDIR)$(libdir) @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.lib $(DESTDIR)$(libdir) @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.exp $(DESTDIR)$(libdir) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/mpi/src/load_balancer/000077500000000000000000000000001507764646700201325ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/src/load_balancer/load_balancer.c000066400000000000000000000113011507764646700230400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include "policy/load_balancer_policy.h" #if defined(STARPU_USE_MPI_MPI) static struct load_balancer_policy *defined_policy = NULL; typedef void (*_post_exec_hook_func_t)(struct starpu_task *task, unsigned sched_ctx_id); static _post_exec_hook_func_t saved_post_exec_hook[STARPU_NMAX_SCHED_CTXS]; static void post_exec_hook_wrapper(struct starpu_task *task, unsigned sched_ctx_id) { //fprintf(stderr,"I am called ! \n"); if (defined_policy && defined_policy->finished_task_entry_point) defined_policy->finished_task_entry_point(); if (saved_post_exec_hook[sched_ctx_id]) saved_post_exec_hook[sched_ctx_id](task, sched_ctx_id); } static struct load_balancer_policy *predefined_policies[] = { &load_heat_propagation_policy, NULL }; void starpu_mpi_lb_init(const char *lb_policy_name, struct starpu_mpi_lb_conf *itf) { int ret; const char *policy_name = starpu_getenv("STARPU_MPI_LB"); if (!policy_name) policy_name = lb_policy_name; if (!policy_name || (strcmp(policy_name, "help") == 0)) { _STARPU_MSG("Warning : load balancing is disabled for this run.\n"); _STARPU_MSG("Use the STARPU_MPI_LB = environment variable to use a load balancer.\n"); _STARPU_MSG("Available load balancers :\n"); struct load_balancer_policy **policy; for(policy=predefined_policies ; *policy!=NULL ; policy++) { struct load_balancer_policy *p = *policy; fprintf(stderr," - %s\n", p->policy_name); } return; } if (policy_name) { struct load_balancer_policy **policy; for(policy=predefined_policies ; *policy!=NULL ; policy++) { struct load_balancer_policy *p = *policy; if (p->policy_name) { if (strcmp(policy_name, p->policy_name) == 0) { /* we found a policy with the requested name */ defined_policy = p; break; } } } } if (!defined_policy) { _STARPU_MSG("Error : no load balancer with the name %s. Load balancing will be disabled for this run.\n", policy_name); return; } ret = defined_policy->init(itf); if (ret != 0) { _STARPU_MSG("Error (%d) in %s->init: invalid starpu_mpi_lb_conf. Load balancing will be disabled for this run.\n", ret, defined_policy->policy_name); return; } /* starpu_register_hook(submitted_task, defined_policy->submitted_task_entry_point); */ if (defined_policy->submitted_task_entry_point) starpu_mpi_pre_submit_hook_register(defined_policy->submitted_task_entry_point); /* starpu_register_hook(finished_task, defined_policy->finished_task_entry_point); */ if (defined_policy->finished_task_entry_point) { int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i); if (sched_policy) { _STARPU_DEBUG("Setting post_exec_hook for scheduling context %d %s (%d)\n", i, sched_policy->policy_name, STARPU_NMAX_SCHED_CTXS); saved_post_exec_hook[i] = sched_policy->post_exec_hook; sched_policy->post_exec_hook = post_exec_hook_wrapper; } else saved_post_exec_hook[i] = NULL; } } return; } void starpu_mpi_lb_shutdown() { if (!defined_policy) return; int ret = defined_policy->deinit(); if (ret != 0) { _STARPU_MSG("Error (%d) in %s->deinit\n", ret, defined_policy->policy_name); return; } /* starpu_unregister_hook(submitted_task, defined_policy->submitted_task_entry_point); */ if (defined_policy->submitted_task_entry_point) starpu_mpi_pre_submit_hook_unregister(); /* starpu_unregister_hook(finished_task, defined_policy->finished_task_entry_point); */ if (defined_policy->finished_task_entry_point) { int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { if (saved_post_exec_hook[i]) { struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i); sched_policy->post_exec_hook = saved_post_exec_hook[i]; saved_post_exec_hook[i] = NULL; } } } defined_policy = NULL; } #endif /* STARPU_USE_MPI_MPI */ starpu-1.4.9+dfsg/mpi/src/load_balancer/policy/000077500000000000000000000000001507764646700214315ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/src/load_balancer/policy/data_movements_interface.c000066400000000000000000000232451507764646700266310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "data_movements_interface.h" #if defined(STARPU_USE_MPI_MPI) starpu_mpi_tag_t **data_movements_get_ref_tags_table(starpu_data_handle_t handle) { struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); if (dm_interface->tags) return &dm_interface->tags; else return NULL; } int **data_movements_get_ref_ranks_table(starpu_data_handle_t handle) { struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); if (dm_interface->ranks) return &dm_interface->ranks; else return NULL; } starpu_mpi_tag_t *data_movements_get_tags_table(starpu_data_handle_t handle) { struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return dm_interface->tags; } int *data_movements_get_ranks_table(starpu_data_handle_t handle) { struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return dm_interface->ranks; } int data_movements_get_size_tables(starpu_data_handle_t handle) { struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return dm_interface->size; } static void data_movements_free_data_on_node(void *data_interface, unsigned node); static starpu_ssize_t data_movements_allocate_data_on_node(void *data_interface, unsigned node); int data_movements_reallocate_tables_interface(struct data_movements_interface *dm_interface, unsigned node, int size) { if (dm_interface->tags) { data_movements_free_data_on_node(dm_interface, node); dm_interface->tags = NULL; dm_interface->ranks = NULL; } else { STARPU_ASSERT(!dm_interface->tags); STARPU_ASSERT(!dm_interface->ranks); } dm_interface->size = size; if (dm_interface->size) { starpu_ssize_t resize = data_movements_allocate_data_on_node(dm_interface, node); STARPU_ASSERT(resize > 0); } return 0 ; } int data_movements_reallocate_tables(starpu_data_handle_t handle, unsigned node, int size) { struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, node); return data_movements_reallocate_tables_interface(dm_interface, node, size); } static void data_movements_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct data_movements_interface *local_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, node); local_interface->size = dm_interface->size; if (node == home_node) { local_interface->tags = dm_interface->tags; local_interface->ranks = dm_interface->ranks; } else { local_interface->tags = NULL; local_interface->ranks = NULL; } } } static starpu_ssize_t data_movements_allocate_data_on_node(void *data_interface, unsigned node) { struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface; if (!dm_interface->size) { dm_interface->tags = NULL; dm_interface->ranks = NULL; return 0; } starpu_mpi_tag_t *addr_tags; int *addr_ranks; starpu_ssize_t requested_memory_tags = dm_interface->size * sizeof(starpu_mpi_tag_t); starpu_ssize_t requested_memory_ranks = dm_interface->size * sizeof(int); addr_tags = (starpu_mpi_tag_t*) starpu_malloc_on_node(node, requested_memory_tags); if (!addr_tags) goto fail_tags; addr_ranks = (int*) starpu_malloc_on_node(node, requested_memory_ranks); if (!addr_ranks) goto fail_ranks; /* update the data properly in consequence */ dm_interface->tags = addr_tags; dm_interface->ranks = addr_ranks; return requested_memory_tags+requested_memory_ranks; fail_ranks: starpu_free_on_node(node, (uintptr_t) addr_tags, requested_memory_tags); fail_tags: return -ENOMEM; } static void data_movements_free_data_on_node(void *data_interface, unsigned node) { struct data_movements_interface *dm_interface = (struct data_movements_interface *) data_interface; if (! dm_interface->tags) return; starpu_ssize_t requested_memory_tags = dm_interface->size * sizeof(starpu_mpi_tag_t); starpu_ssize_t requested_memory_ranks = dm_interface->size * sizeof(int); starpu_free_on_node(node, (uintptr_t) dm_interface->tags, requested_memory_tags); dm_interface->tags = NULL; starpu_free_on_node(node, (uintptr_t) dm_interface->ranks, requested_memory_ranks); dm_interface->ranks = NULL; } static size_t data_movements_get_size(starpu_data_handle_t handle) { size_t size; struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); size = (dm_interface->size * sizeof(starpu_mpi_tag_t)) + (dm_interface->size * sizeof(int)) + sizeof(int); return size; } static uint32_t data_movements_footprint(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(data_movements_get_size(handle), 0); } static int data_movements_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, node); *count = data_movements_get_size(handle); if (ptr != NULL) { char *data = (void*) starpu_malloc_on_node_flags(node, *count, 0); assert(data); *ptr = data; memcpy(data, &dm_interface->size, sizeof(int)); if (dm_interface->size) { memcpy(data+sizeof(int), dm_interface->tags, (dm_interface->size*sizeof(starpu_mpi_tag_t))); memcpy(data+sizeof(int)+(dm_interface->size*sizeof(starpu_mpi_tag_t)), dm_interface->ranks, dm_interface->size*sizeof(int)); } } return 0; } static int data_movements_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { char *data = ptr; STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct data_movements_interface *dm_interface = (struct data_movements_interface *) starpu_data_get_interface_on_node(handle, node); int size = 0; memcpy(&size, data, sizeof(int)); STARPU_ASSERT(count == (2 * size * sizeof(int)) + sizeof(int)); data_movements_reallocate_tables(handle, node, size); if (dm_interface->size) { memcpy(dm_interface->tags, data+sizeof(int), dm_interface->size*sizeof(starpu_mpi_tag_t)); memcpy(dm_interface->ranks, data+sizeof(int)+(dm_interface->size*sizeof(starpu_mpi_tag_t)), dm_interface->size*sizeof(int)); } return 0; } static int data_movements_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { data_movements_peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); return 0; } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct data_movements_interface *src_data_movements = src_interface; struct data_movements_interface *dst_data_movements = dst_interface; int ret = 0; data_movements_reallocate_tables_interface(dst_data_movements, dst_node, src_data_movements->size); if (starpu_interface_copy((uintptr_t) src_data_movements->tags, 0, src_node, (uintptr_t) dst_data_movements->tags, 0, dst_node, src_data_movements->size*sizeof(starpu_mpi_tag_t), async_data)) ret = -EAGAIN; if (starpu_interface_copy((uintptr_t) src_data_movements->ranks, 0, src_node, (uintptr_t) dst_data_movements->ranks, 0, dst_node, src_data_movements->size*sizeof(int), async_data)) ret = -EAGAIN; return ret; } static const struct starpu_data_copy_methods data_movements_copy_methods = { .any_to_any = copy_any_to_any }; static struct starpu_data_interface_ops interface_data_movements_ops = { .register_data_handle = data_movements_register_data_handle, .allocate_data_on_node = data_movements_allocate_data_on_node, .free_data_on_node = data_movements_free_data_on_node, .copy_methods = &data_movements_copy_methods, .get_size = data_movements_get_size, .footprint = data_movements_footprint, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct data_movements_interface), .to_pointer = NULL, .pack_data = data_movements_pack_data, .peek_data = data_movements_peek_data, .unpack_data = data_movements_unpack_data, .describe = NULL }; void data_movements_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int *ranks, starpu_mpi_tag_t *tags, int size) { struct data_movements_interface data_movements = { .tags = tags, .ranks = ranks, .size = size }; starpu_data_register(handleptr, home_node, &data_movements, &interface_data_movements_ops); } #endif starpu-1.4.9+dfsg/mpi/src/load_balancer/policy/data_movements_interface.h000066400000000000000000000036771507764646700266450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include /** @file */ #ifndef __DATA_MOVEMENTS_INTERFACE_H #define __DATA_MOVEMENTS_INTERFACE_H /** interface for data_movements */ struct data_movements_interface { /** Data tags table */ starpu_mpi_tag_t *tags; /** Ranks table (where to move the corresponding data) */ int *ranks; /** Size of the tables */ int size; }; void data_movements_data_register(starpu_data_handle_t *handle, unsigned home_node, int *ranks, starpu_mpi_tag_t *tags, int size); starpu_mpi_tag_t **data_movements_get_ref_tags_table(starpu_data_handle_t handle); int **data_movements_get_ref_ranks_table(starpu_data_handle_t handle); int data_movements_reallocate_tables(starpu_data_handle_t handle, unsigned node, int size); starpu_mpi_tag_t *data_movements_get_tags_table(starpu_data_handle_t handle); int *data_movements_get_ranks_table(starpu_data_handle_t handle); int data_movements_get_size_tables(starpu_data_handle_t handle); #define DATA_MOVEMENTS_GET_SIZE_TABLES(interface) (((struct data_movements_interface *)(interface))->size) #define DATA_MOVEMENTS_GET_TAGS_TABLE(interface) (((struct data_movements_interface *)(interface))->tags) #define DATA_MOVEMENTS_GET_RANKS_TABLE(interface) (((struct data_movements_interface *)(interface))->ranks) #endif /* __DATA_MOVEMENTS_INTERFACE_H */ starpu-1.4.9+dfsg/mpi/src/load_balancer/policy/load_balancer_policy.h000066400000000000000000000032541507764646700257330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __LOAD_BALANCER_POLICY_H__ #define __LOAD_BALANCER_POLICY_H__ #include /** @file */ #ifdef __cplusplus extern "C" { #endif /** A load balancer consists in a collection of operations on a data * representing the load of the application (in terms of computation, memory, * whatever). StarPU allows several entry points for the user. The load * balancer allows the user to give its load balancing methods to be used on * these entry points of the runtime system. */ struct load_balancer_policy { int (*init)(struct starpu_mpi_lb_conf *); int (*deinit)(); void (*submitted_task_entry_point)(struct starpu_task *task); void (*finished_task_entry_point)(void); /** Name of the load balancing policy. The selection of the load balancer is * performed through the use of the STARPU_MPI_LB=name environment * variable. */ const char *policy_name; }; extern struct load_balancer_policy load_heat_propagation_policy; #ifdef __cplusplus } #endif #endif // __LOAD_BALANCER_POLICY_H__ starpu-1.4.9+dfsg/mpi/src/load_balancer/policy/load_data_interface.c000066400000000000000000000204671507764646700255360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "load_data_interface.h" #if defined(STARPU_USE_MPI_MPI) int load_data_get_sleep_threshold(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return ld_interface->sleep_task_threshold; } int load_data_get_wakeup_threshold(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return ld_interface->wakeup_task_threshold; } int load_data_get_current_phase(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return ld_interface->phase; } int load_data_get_nsubmitted_tasks(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return ld_interface->nsubmitted_tasks; } int load_data_get_nfinished_tasks(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return ld_interface->nfinished_tasks; } int load_data_inc_nsubmitted_tasks(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); (ld_interface->nsubmitted_tasks)++; return 0; } int load_data_inc_nfinished_tasks(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); (ld_interface->nfinished_tasks)++; return 0; } int load_data_next_phase(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); ld_interface->phase++; return 0; } int load_data_update_elapsed_time(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); ld_interface->elapsed_time = starpu_timing_now() - ld_interface->start; return 0; } double load_data_get_elapsed_time(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return ld_interface->elapsed_time; } int load_data_update_wakeup_cond(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); int previous_threshold = ld_interface->wakeup_task_threshold; ld_interface->wakeup_task_threshold += (ld_interface->nsubmitted_tasks - previous_threshold) * ld_interface->wakeup_ratio; return 0; } int load_data_wakeup_cond(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return (ld_interface->wakeup_task_threshold > 0) && (ld_interface->nfinished_tasks == ld_interface->wakeup_task_threshold); } static void load_data_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { (void) home_node; struct load_data_interface *ld_interface = (struct load_data_interface *) data_interface; unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct load_data_interface *local_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, node); local_interface->start = ld_interface->start; local_interface->elapsed_time = ld_interface->elapsed_time; local_interface->phase = ld_interface->phase; local_interface->nsubmitted_tasks = ld_interface->nsubmitted_tasks; local_interface->nfinished_tasks = ld_interface->nsubmitted_tasks; local_interface->wakeup_task_threshold = ld_interface->wakeup_task_threshold; local_interface->wakeup_ratio = ld_interface->wakeup_ratio; local_interface->sleep_task_threshold = ld_interface->sleep_task_threshold; } } static starpu_ssize_t load_data_allocate_data_on_node(void *data_interface, unsigned node) { (void) data_interface; (void) node; return 0; } static void load_data_free_data_on_node(void *data_interface, unsigned node) { (void) data_interface; (void) node; } static size_t load_data_get_size(starpu_data_handle_t handle) { (void) handle; return sizeof(struct load_data_interface); } static uint32_t load_data_footprint(starpu_data_handle_t handle) { struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return starpu_hash_crc32c_be(ld_interface->start, starpu_hash_crc32c_be(ld_interface->elapsed_time, starpu_hash_crc32c_be(ld_interface->nsubmitted_tasks, starpu_hash_crc32c_be(ld_interface->sleep_task_threshold, ld_interface->wakeup_task_threshold)))); } static int load_data_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, node); *count = load_data_get_size(handle); if (ptr != NULL) { char *data = (void*) starpu_malloc_on_node_flags(node, *count, 0); *ptr = data; memcpy(data, ld_interface, *count); } return 0; } static int load_data_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { char *data = ptr; STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct load_data_interface *ld_interface = (struct load_data_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == sizeof(struct load_data_interface)); memcpy(ld_interface, data, count); return 0; } static int load_data_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { load_data_peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); return 0; } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { (void) src_interface; (void) dst_interface; (void) src_node; (void) dst_node; (void) async_data; return 0; } static const struct starpu_data_copy_methods load_data_copy_methods = { .any_to_any = copy_any_to_any }; static struct starpu_data_interface_ops interface_load_data_ops = { .register_data_handle = load_data_register_data_handle, .allocate_data_on_node = load_data_allocate_data_on_node, .free_data_on_node = load_data_free_data_on_node, .copy_methods = &load_data_copy_methods, .get_size = load_data_get_size, .footprint = load_data_footprint, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct load_data_interface), .to_pointer = NULL, .pack_data = load_data_pack_data, .peek_data = load_data_peek_data, .unpack_data = load_data_unpack_data, .describe = NULL }; void load_data_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int sleep_task_threshold, double wakeup_ratio) { struct load_data_interface load_data = { .start = starpu_timing_now(), .elapsed_time = 0, .phase = 0, .nsubmitted_tasks = 0, .nfinished_tasks = 0, .sleep_task_threshold = sleep_task_threshold, .wakeup_task_threshold = 0, .wakeup_ratio = wakeup_ratio }; starpu_data_register(handleptr, home_node, &load_data, &interface_load_data_ops); } #endif starpu-1.4.9+dfsg/mpi/src/load_balancer/policy/load_data_interface.h000066400000000000000000000054301507764646700255340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include /** @file */ #ifndef __LOAD_DATA_INTERFACE_H #define __LOAD_DATA_INTERFACE_H /** interface for load_data */ struct load_data_interface { /** Starting time of the execution */ double start; /** Elapsed time until the start time and the time when event "launch a load * balancing phase" is triggered */ double elapsed_time; /** Current submission phase, i.e how many balanced steps have already * happened so far. */ int phase; /** Number of currently submitted tasks */ int nsubmitted_tasks; /** Number of currently finished tasks */ int nfinished_tasks; /** Task threshold to sleep the submission thread */ int sleep_task_threshold; /** Task threshold to wake-up the submission thread */ int wakeup_task_threshold; /** Ratio of submitted tasks to wait for completion before waking up the * submission thread */ double wakeup_ratio; }; void load_data_data_register(starpu_data_handle_t *handle, unsigned home_node, int sleep_task_threshold, double wakeup_ratio); int load_data_get_sleep_threshold(starpu_data_handle_t handle); int load_data_get_wakeup_threshold(starpu_data_handle_t handle); int load_data_get_current_phase(starpu_data_handle_t handle); int load_data_get_nsubmitted_tasks(starpu_data_handle_t handle); int load_data_get_nfinished_tasks(starpu_data_handle_t handle); int load_data_inc_nsubmitted_tasks(starpu_data_handle_t handle); int load_data_inc_nfinished_tasks(starpu_data_handle_t handle); int load_data_next_phase(starpu_data_handle_t handle); int load_data_update_elapsed_time(starpu_data_handle_t handle); double load_data_get_elapsed_time(starpu_data_handle_t handle); int load_data_update_wakeup_cond(starpu_data_handle_t handle); int load_data_wakeup_cond(starpu_data_handle_t handle); #define LOAD_DATA_GET_NSUBMITTED_TASKS(interface) (((struct load_data_interface *)(interface))->nsubmitted_tasks) #define LOAD_DATA_GET_SLEEP_THRESHOLD(interface) (((struct load_data_interface *)(interface))->sleep_task_threshold) #define LOAD_DATA_GET_WAKEUP_THRESHOLD(interface) (((struct load_data_interface *)(interface))->wakeup_task_threshold) #endif /* __LOAD_DATA_INTERFACE_H */ starpu-1.4.9+dfsg/mpi/src/load_balancer/policy/load_heat_propagation.c000066400000000000000000000557401507764646700261330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include "load_balancer_policy.h" #include "data_movements_interface.h" #include "load_data_interface.h" #include #if defined(STARPU_USE_MPI_MPI) static starpu_mpi_tag_t TAG_LOAD(int n) { return ((starpu_mpi_tag_t) n+1) << 24; } static starpu_mpi_tag_t TAG_MOV(int n) { return ((starpu_mpi_tag_t) n+1) << 20; } /* Hash table of local pieces of data that has been moved out of the local MPI * node by the load balancer. All of these pieces of data must be migrated back * to the local node at the end of the execution. */ struct moved_data_entry { UT_hash_handle hh; starpu_data_handle_t handle; }; static struct moved_data_entry *mdh = NULL; static starpu_pthread_mutex_t load_data_mutex; static starpu_pthread_cond_t load_data_cond; /* MPI infos */ static int my_rank; static int world_size; /* Number of neighbours of the local MPI node and their IDs. These are given by * the get_neighbors() method, and thus can be easily changed. */ static int *neighbor_ids = NULL; static int nneighbors = 0; /* Local load data */ static starpu_data_handle_t *load_data_handle = NULL; static starpu_data_handle_t *load_data_handle_cpy = NULL; /* Load data of neighbours */ static starpu_data_handle_t *neighbor_load_data_handles = NULL; /* Table which contains a data_movements_handle for each MPI node of * MPI_COMM_WORLD. Since all the MPI nodes must be advised of any data * movement, this table will be used to perform communications of data * movements handles following an all-to-all model. */ static starpu_data_handle_t *data_movements_handles = NULL; /* Load balancer interface which contains the application-specific methods for * the load balancer to use. */ static struct starpu_mpi_lb_conf *user_itf = NULL; static double time_threshold = 20000; /****************************************************************************** * Balancing * *****************************************************************************/ /* Decides which data has to move where, and fills the * data_movements_handles[my_rank] data handle from that. * In data : * - local load_data_handle * - nneighbors * - neighbor_ids[nneighbors] * - neighbor_load_data_handles[nneighbors] * Out data : * - data_movements_handles[my_rank] */ static void balance(starpu_data_handle_t load_data_cpy) { int less_loaded = -1; int n; double ref_elapsed_time; double my_elapsed_time = load_data_get_elapsed_time(load_data_cpy); /* Search for the less loaded neighbor */ ref_elapsed_time = my_elapsed_time; for (n = 0; n < nneighbors; n++) { double elapsed_time = load_data_get_elapsed_time(neighbor_load_data_handles[n]); if (ref_elapsed_time > elapsed_time) { //fprintf(stderr,"Node%d: ref local time %lf vs neighbour%d time %lf\n", my_rank, ref_elapsed_time, neighbor_ids[n], elapsed_time); less_loaded = neighbor_ids[n]; ref_elapsed_time = elapsed_time; } } starpu_data_acquire_on_node(data_movements_handles[my_rank], STARPU_MAIN_RAM, STARPU_RW); /* We found it */ if (less_loaded >= 0) { _STARPU_DEBUG("Less loaded found on node %d : %d\n", my_rank, less_loaded); double diff_time = my_elapsed_time - ref_elapsed_time; /* If the difference is higher than a time threshold, we move * one data to the less loaded neighbour. */ /* TODO: How to decide the time threshold ? */ if ((time_threshold > 0) && (diff_time >= time_threshold)) { starpu_data_handle_t *handles = NULL; int nhandles = 0; user_itf->get_data_unit_to_migrate(&handles, &nhandles, less_loaded); data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, nhandles); if (nhandles) { starpu_mpi_tag_t *tags = data_movements_get_tags_table(data_movements_handles[my_rank]); int *ranks = data_movements_get_ranks_table(data_movements_handles[my_rank]); for (n = 0; n < nhandles; n++) { tags[n] = starpu_mpi_data_get_tag(handles[n]); ranks[n] = less_loaded; } free(handles); } } else data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, 0); } else data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, 0); starpu_data_release_on_node(data_movements_handles[my_rank], STARPU_MAIN_RAM); } static void exchange_load_data_infos(starpu_data_handle_t load_data_cpy) { int i; /* Allocate all requests and status for point-to-point communications */ starpu_mpi_req load_send_req[nneighbors]; starpu_mpi_req load_recv_req[nneighbors]; MPI_Status load_send_status[nneighbors]; MPI_Status load_recv_status[nneighbors]; int flag, ret; /* Send the local load data to neighbour nodes, and receive the remote load * data from neighbour nodes */ for (i = 0; i < nneighbors; i++) { //_STARPU_DEBUG("[node %d] sending and receiving with %i-th neighbor %i\n", my_rank, i, neighbor_ids[i]); ret = starpu_mpi_isend(load_data_cpy, &load_send_req[i], neighbor_ids[i], TAG_LOAD(my_rank), MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_irecv(neighbor_load_data_handles[i], &load_recv_req[i], neighbor_ids[i], TAG_LOAD(neighbor_ids[i]), MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } /* Wait for completion of all send requests */ for (i = 0; i < nneighbors; i++) { flag = 0; while (!flag) { ret = starpu_mpi_test(&load_send_req[i], &flag, &load_send_status[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); } } /* Wait for completion of all receive requests */ for (i = 0; i < nneighbors; i++) { flag = 0; while (!flag) { ret = starpu_mpi_test(&load_recv_req[i], &flag, &load_recv_status[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); } } } static void exchange_data_movements_infos() { int i; /* Allocate all requests and status for point-to-point communications */ starpu_mpi_req data_movements_send_req[world_size]; starpu_mpi_req data_movements_recv_req[world_size]; MPI_Status data_movements_send_status[world_size]; MPI_Status data_movements_recv_status[world_size]; int flag, ret; /* Send the new ranks of local data to all other nodes, and receive the new * ranks of all remote data from all other nodes */ for (i = 0; i < world_size; i++) { if (i != my_rank) { //_STARPU_DEBUG("[node %d] Send and receive data movement with %d\n", my_rank, i); ret = starpu_mpi_isend(data_movements_handles[my_rank], &data_movements_send_req[i], i, TAG_MOV(my_rank), MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_irecv(data_movements_handles[i], &data_movements_recv_req[i], i, TAG_MOV(i), MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } } /* Wait for completion of all send requests */ for (i = 0; i < world_size; i++) { if (i != my_rank) { //fprintf(stderr,"Wait for sending data movement of %d to %d\n", my_rank, i); flag = 0; while (!flag) { ret = starpu_mpi_test(&data_movements_send_req[i], &flag, &data_movements_send_status[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); } } } /* Wait for completion of all receive requests */ for (i = 0; i < world_size; i++) { if (i != my_rank) { //fprintf(stderr,"Wait for receiving data movement from %d on %d\n", i, my_rank); flag = 0; while (!flag) { ret = starpu_mpi_test(&data_movements_recv_req[i], &flag, &data_movements_recv_status[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); } } } } static void update_data_ranks() { int i,j; /* Update the new ranks for all concerned data */ for (i = 0; i < world_size; i++) { int ndata_to_update = data_movements_get_size_tables(data_movements_handles[i]); if (ndata_to_update) { //fprintf(stderr,"Update %d data from table %d on node %d\n", ndata_to_update, i, my_rank); for (j = 0; j < ndata_to_update; j++) { starpu_data_handle_t handle = _starpu_mpi_tag_get_data_handle_from_tag((data_movements_get_tags_table(data_movements_handles[i]))[j]); STARPU_ASSERT(handle); int dst_rank = (data_movements_get_ranks_table(data_movements_handles[i]))[j]; /* Save the fact that the data has been moved out of this node */ if (i == my_rank) { struct moved_data_entry *md; _STARPU_MPI_MALLOC(md, sizeof(struct moved_data_entry)); md->handle = handle; HASH_ADD_PTR(mdh, handle, md); } else if (dst_rank == my_rank) { /* The data has been moved out, and now is moved back, so * update the state of the moved_data hash table to reflect * this change */ struct moved_data_entry *md = NULL; HASH_FIND_PTR(mdh, &handle, md); if (md) { HASH_DEL(mdh, md); free(md); } } //if (i == my_rank) //{ // if (dst_rank != my_rank) // fprintf(stderr,"Move data %p (tag %d) from node %d to node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], my_rank, dst_rank); // else // fprintf(stderr,"Bring back data %p (tag %d) from node %d on node %d\n", handle, (data_movements_get_tags_table(data_movements_handles[i]))[j], starpu_mpi_data_get_rank(handle), my_rank); //} _STARPU_DEBUG("Call of starpu_mpi_get_data_on_node(%"PRIi64",%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank); /* Migrate the data handle */ int ret = starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); _STARPU_DEBUG("New rank (%d) of data %"PRIi64" upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank); starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD); } } } } static void clean_balance() { int i; starpu_mpi_cache_flush(MPI_COMM_WORLD, *load_data_handle_cpy); for (i = 0; i < nneighbors; i++) starpu_mpi_cache_flush(MPI_COMM_WORLD, neighbor_load_data_handles[i]); for (i = 0; i < world_size; i++) starpu_mpi_cache_flush(MPI_COMM_WORLD, data_movements_handles[i]); } /* Core function of the load balancer. Computes from the load_data_cpy handle a * load balancing of the work to come (if needed), perform the necessary data * communications and negotiate with the other nodes the rebalancing. */ static void heat_balance(starpu_data_handle_t load_data_cpy) { /* Exchange load data handles with neighboring nodes */ exchange_load_data_infos(load_data_cpy); /* Determine if this node should sent data to other nodes : * which ones, how much data */ balance(load_data_cpy); /* Exchange data movements with neighboring nodes */ exchange_data_movements_infos(); /* Perform data movements */ update_data_ranks(); /* Clean the data handles to properly launch the next balance phase */ clean_balance(); } /****************************************************************************** * Heat Load Balancer Entry Points * *****************************************************************************/ static void submitted_task_heat(struct starpu_task *task) { load_data_inc_nsubmitted_tasks(*load_data_handle); //if (load_data_get_nsubmitted_tasks(*load_data_handle) > task->tag_id) //{ // fprintf(stderr,"Error : nsubmitted_tasks (%d) > tag_id (%lld) ! \n", load_data_get_nsubmitted_tasks(*load_data_handle), (long long int)task->tag_id); // STARPU_ASSERT(0); //} int phase = load_data_get_current_phase(*load_data_handle); /* Numbering of tasks in StarPU-MPI should be given by the application with * the STARPU_TAG_ONLY insert task option for now. */ /* TODO: Properly implement a solution for numbering tasks in StarPU-MPI */ if (((int)task->tag_id / load_data_get_sleep_threshold(*load_data_handle)) > phase) { STARPU_PTHREAD_MUTEX_LOCK(&load_data_mutex); load_data_update_wakeup_cond(*load_data_handle); //fprintf(stderr,"Node %d sleep on tag %lld\n", my_rank, (long long int)task->tag_id); //if (load_data_get_nsubmitted_tasks(*load_data_handle) < load_data_get_wakeup_threshold(*load_data_handle)) //{ // fprintf(stderr,"Error : nsubmitted_tasks (%d) lower than wakeup_threshold (%d) !\n", load_data_get_nsubmitted_tasks(*load_data_handle), load_data_get_wakeup_threshold(*load_data_handle)); // STARPU_ASSERT(0); //} if (load_data_get_wakeup_threshold(*load_data_handle) > load_data_get_nfinished_tasks(*load_data_handle)) STARPU_PTHREAD_COND_WAIT(&load_data_cond, &load_data_mutex); load_data_next_phase(*load_data_handle); /* Register a copy of the load data at this moment, to allow to compute * the heat balance while not locking the load data during the whole * balance step, which could cause all the workers to wait on the lock * to update the data. */ struct starpu_data_interface_ops *itf_load_data = starpu_data_get_interface_ops(*load_data_handle); void* itf_src = starpu_data_get_interface_on_node(*load_data_handle, STARPU_MAIN_RAM); void* itf_dst = starpu_data_get_interface_on_node(*load_data_handle_cpy, STARPU_MAIN_RAM); memcpy(itf_dst, itf_src, itf_load_data->interface_size); _STARPU_DEBUG("[node %d] Balance phase %d\n", my_rank, load_data_get_current_phase(*load_data_handle)); STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex); heat_balance(*load_data_handle_cpy); } } static void finished_task_heat(void) { //fprintf(stderr,"Try to decrement nsubmitted_tasks..."); STARPU_PTHREAD_MUTEX_LOCK(&load_data_mutex); load_data_inc_nfinished_tasks(*load_data_handle); //fprintf(stderr,"Decrement nsubmitted_tasks, now %d\n", load_data_get_nsubmitted_tasks(*load_data_handle)); if (load_data_wakeup_cond(*load_data_handle)) { //fprintf(stderr,"Wakeup ! nfinished_tasks = %d, wakeup_threshold = %d\n", load_data_get_nfinished_tasks(*load_data_handle), load_data_get_wakeup_threshold(*load_data_handle)); load_data_update_elapsed_time(*load_data_handle); STARPU_PTHREAD_COND_SIGNAL(&load_data_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex); } else STARPU_PTHREAD_MUTEX_UNLOCK(&load_data_mutex); } /****************************************************************************** * Initialization / Deinitialization * *****************************************************************************/ static int init_heat(struct starpu_mpi_lb_conf *itf) { int i; int sleep_task_threshold; double wakeup_ratio; starpu_mpi_comm_size(MPI_COMM_WORLD, &world_size); starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); /* Immediately return if the starpu_mpi_lb_conf is invalid. */ if (!(itf && itf->get_neighbors && itf->get_data_unit_to_migrate)) { _STARPU_MSG("Error: struct starpu_mpi_lb_conf %p invalid\n", itf); return 1; } _STARPU_MPI_MALLOC(user_itf, sizeof(struct starpu_mpi_lb_conf)); memcpy(user_itf, itf, sizeof(struct starpu_mpi_lb_conf)); /* Get the neighbors of the local MPI node */ user_itf->get_neighbors(&neighbor_ids, &nneighbors); if (nneighbors == 0) { _STARPU_MSG("Error: Function get_neighbors returning 0 neighbor\n"); free(user_itf); user_itf = NULL; return 2; } /* The sleep threshold is deducted from the numbering of tasks by the * application. For example, with this threshold, the submission thread * will stop when a task for which the numbering is 2000 or above will be * submitted to StarPU-MPI. However, much less tasks can be really * submitted to the local MPI node: the sleeping of the submission threads * checks the numbering of the tasks, not how many tasks have been * submitted to the local MPI node, which are two different things. */ char *sleep_env = starpu_getenv("LB_HEAT_SLEEP_THRESHOLD"); if (sleep_env) sleep_task_threshold = atoi(sleep_env); else sleep_task_threshold = 2000; char *wakeup_env = starpu_getenv("LB_HEAT_WAKEUP_RATIO"); if (wakeup_env) wakeup_ratio = atof(wakeup_env); else wakeup_ratio = 0.5; char *time_env = starpu_getenv("LB_HEAT_TIME_THRESHOLD"); if (time_env) time_threshold = atoi(time_env); else time_threshold = 2000; STARPU_PTHREAD_MUTEX_INIT(&load_data_mutex, NULL); STARPU_PTHREAD_COND_INIT(&load_data_cond, NULL); /* Allocate, initialize and register all the data handles that will be * needed for the load balancer, to not reallocate them at each balance * step. */ /* Local load data */ _STARPU_MPI_CALLOC(load_data_handle, 1, sizeof(starpu_data_handle_t)); load_data_data_register(load_data_handle, STARPU_MAIN_RAM, sleep_task_threshold, wakeup_ratio); /* Copy of the local load data to enable parallel update of the load data * with communications to neighbor nodes */ _STARPU_MPI_CALLOC(load_data_handle_cpy, 1, sizeof(starpu_data_handle_t)); void *local_interface = starpu_data_get_interface_on_node(*load_data_handle, STARPU_MAIN_RAM); struct starpu_data_interface_ops *itf_load_data = starpu_data_get_interface_ops(*load_data_handle); starpu_data_register(load_data_handle_cpy, STARPU_MAIN_RAM, local_interface, itf_load_data); starpu_mpi_data_register(*load_data_handle_cpy, TAG_LOAD(my_rank), my_rank); /* Remote load data */ _STARPU_MPI_CALLOC(neighbor_load_data_handles, nneighbors, sizeof(starpu_data_handle_t)); for (i = 0; i < nneighbors; i++) { load_data_data_register(&neighbor_load_data_handles[i], STARPU_MAIN_RAM, sleep_task_threshold, wakeup_ratio); starpu_mpi_data_register(neighbor_load_data_handles[i], TAG_LOAD(neighbor_ids[i]), neighbor_ids[i]); } /* Data movements handles */ _STARPU_MPI_MALLOC(data_movements_handles, world_size*sizeof(starpu_data_handle_t)); for (i = 0; i < world_size; i++) { data_movements_data_register(&data_movements_handles[i], STARPU_MAIN_RAM, NULL, NULL, 0); starpu_mpi_data_register(data_movements_handles[i], TAG_MOV(i), i); } /* Hash table of moved data that will be brought back on the node at * termination time */ mdh = NULL; return 0; } /* Move back all the data that has been migrated out of this node at * denitialization time of the load balancer, to ensure the consistency with * the ranks of data originally registered by the application. */ static void move_back_data() { int i,j; /* Update the new ranks for all concerned data */ for (i = 0; i < world_size; i++) { /* In this case, each data_movements_handles contains the handles to move back on the specific node */ int ndata_to_update = data_movements_get_size_tables(data_movements_handles[i]); if (ndata_to_update) { _STARPU_DEBUG("Move back %d data from table %d on node %d\n", ndata_to_update, i, my_rank); for (j = 0; j < ndata_to_update; j++) { starpu_data_handle_t handle = _starpu_mpi_tag_get_data_handle_from_tag((data_movements_get_tags_table(data_movements_handles[i]))[j]); STARPU_ASSERT(handle); int dst_rank = (data_movements_get_ranks_table(data_movements_handles[i]))[j]; STARPU_ASSERT(i == dst_rank); if (i == my_rank) { /* The data is moved back, so update the state of the * moved_data hash table to reflect this change */ struct moved_data_entry *md = NULL; HASH_FIND_PTR(mdh, &handle, md); if (md) { HASH_DEL(mdh, md); free(md); } } //fprintf(stderr,"Call of starpu_mpi_get_data_on_node(%d,%d) on node %d\n", starpu_mpi_data_get_tag(handle), dst_rank, my_rank); /* Migrate the data handle */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, handle, dst_rank, NULL, NULL); //fprintf(stderr,"New rank (%d) of data %d upgraded on node %d\n", dst_rank, starpu_mpi_data_get_tag(handle), my_rank); starpu_mpi_data_set_rank_comm(handle, dst_rank, MPI_COMM_WORLD); } } } } static int deinit_heat() { int i; if ((!user_itf) || (nneighbors == 0)) return 1; _STARPU_DEBUG("Shutting down heat lb policy\n"); unsigned int ndata_to_move_back = HASH_COUNT(mdh); starpu_data_acquire_on_node(data_movements_handles[my_rank], STARPU_MAIN_RAM, STARPU_RW); if (ndata_to_move_back) { _STARPU_DEBUG("Move back %u data on node %d ..\n", ndata_to_move_back, my_rank); data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, ndata_to_move_back); starpu_mpi_tag_t *tags = data_movements_get_tags_table(data_movements_handles[my_rank]); int *ranks = data_movements_get_ranks_table(data_movements_handles[my_rank]); int n = 0; struct moved_data_entry *md=NULL, *tmp=NULL; HASH_ITER(hh, mdh, md, tmp) { tags[n] = starpu_mpi_data_get_tag(md->handle); ranks[n] = my_rank; n++; } } else data_movements_reallocate_tables(data_movements_handles[my_rank], STARPU_MAIN_RAM, 0); starpu_data_release_on_node(data_movements_handles[my_rank], STARPU_MAIN_RAM); exchange_data_movements_infos(); move_back_data(); /* This assert ensures that all nodes have properly gotten back all the * data that has been moven out of the node. */ STARPU_ASSERT(HASH_COUNT(mdh) == 0); free(mdh); mdh = NULL; starpu_data_unregister(*load_data_handle); free(load_data_handle); load_data_handle = NULL; starpu_mpi_cache_flush(MPI_COMM_WORLD, *load_data_handle_cpy); starpu_data_unregister(*load_data_handle_cpy); free(load_data_handle_cpy); load_data_handle_cpy = NULL; for (i = 0; i < nneighbors; i++) { starpu_mpi_cache_flush(MPI_COMM_WORLD, neighbor_load_data_handles[i]); starpu_data_unregister(neighbor_load_data_handles[i]); } free(neighbor_load_data_handles); neighbor_load_data_handles = NULL; nneighbors = 0; free(neighbor_ids); neighbor_ids = NULL; for (i = 0; i < world_size; i++) { starpu_mpi_cache_flush(MPI_COMM_WORLD, data_movements_handles[i]); starpu_data_acquire_on_node(data_movements_handles[i], STARPU_MAIN_RAM, STARPU_W); data_movements_reallocate_tables(data_movements_handles[i], STARPU_MAIN_RAM, 0); starpu_data_release_on_node(data_movements_handles[i], STARPU_MAIN_RAM); starpu_data_unregister(data_movements_handles[i]); } free(data_movements_handles); data_movements_handles = NULL; STARPU_PTHREAD_MUTEX_DESTROY(&load_data_mutex); STARPU_PTHREAD_COND_DESTROY(&load_data_cond); free(user_itf); user_itf = NULL; return 0; } /****************************************************************************** * Policy * *****************************************************************************/ struct load_balancer_policy load_heat_propagation_policy = { .init = init_heat, .deinit = deinit_heat, .submitted_task_entry_point = submitted_task_heat, .finished_task_entry_point = finished_task_heat, .policy_name = "heat" }; #endif starpu-1.4.9+dfsg/mpi/src/mpi/000077500000000000000000000000001507764646700161515ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_comm.c000066400000000000000000000156361507764646700215260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Guillaume Beauchamp * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #ifdef STARPU_USE_MPI_MPI struct _starpu_mpi_comm { MPI_Comm comm; struct _starpu_mpi_envelope *envelope; MPI_Request request; int posted; #ifdef STARPU_SIMGRID MPI_Status status; starpu_pthread_queue_t queue; unsigned done; #endif }; struct _starpu_mpi_comm_hashtable { UT_hash_handle hh; MPI_Comm comm; }; /* Protect between comm addition from submitting tasks and MPI thread */ static starpu_pthread_rwlock_t _starpu_mpi_comms_mutex; struct _starpu_mpi_comm_hashtable *_starpu_mpi_comms_cache; struct _starpu_mpi_comm **_starpu_mpi_comms; int _starpu_mpi_comm_nb; int _starpu_mpi_comm_allocated; int _starpu_mpi_comm_tested; void _starpu_mpi_comm_init(MPI_Comm comm) { _STARPU_MPI_DEBUG(10, "allocating for %d communicators\n", _starpu_mpi_comm_allocated); _starpu_mpi_comm_allocated=10; _STARPU_MPI_CALLOC(_starpu_mpi_comms, _starpu_mpi_comm_allocated, sizeof(struct _starpu_mpi_comm *)); _starpu_mpi_comm_nb=0; _starpu_mpi_comm_tested=0; _starpu_mpi_comms_cache = NULL; STARPU_PTHREAD_RWLOCK_INIT(&_starpu_mpi_comms_mutex, NULL); _starpu_mpi_comm_register(comm); } void _starpu_mpi_comm_shutdown() { int i; for(i=0 ; i<_starpu_mpi_comm_nb ; i++) { struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm; free(_comm->envelope); #ifdef STARPU_SIMGRID starpu_pthread_queue_unregister(&_starpu_mpi_thread_wait, &_comm->queue); starpu_pthread_queue_destroy(&_comm->queue); #endif free(_comm); } free(_starpu_mpi_comms); struct _starpu_mpi_comm_hashtable *entry=NULL, *tmp=NULL; HASH_ITER(hh, _starpu_mpi_comms_cache, entry, tmp) { HASH_DEL(_starpu_mpi_comms_cache, entry); free(entry); } STARPU_PTHREAD_RWLOCK_DESTROY(&_starpu_mpi_comms_mutex); } void _starpu_mpi_comm_register(MPI_Comm comm) { struct _starpu_mpi_comm_hashtable *found; STARPU_PTHREAD_RWLOCK_RDLOCK(&_starpu_mpi_comms_mutex); HASH_FIND(hh, _starpu_mpi_comms_cache, &comm, sizeof(MPI_Comm), found); STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); if (found) { _STARPU_MPI_DEBUG(10, "comm %ld (%ld) already registered\n", (long int)comm, (long int)MPI_COMM_WORLD); return; } STARPU_PTHREAD_RWLOCK_WRLOCK(&_starpu_mpi_comms_mutex); HASH_FIND(hh, _starpu_mpi_comms_cache, &comm, sizeof(MPI_Comm), found); if (found) { _STARPU_MPI_DEBUG(10, "comm %ld (%ld) already registered in between\n", (long int)comm, (long int)MPI_COMM_WORLD); } else { if (_starpu_mpi_comm_nb == _starpu_mpi_comm_allocated) { _starpu_mpi_comm_allocated *= 2; _STARPU_MPI_DEBUG(10, "reallocating for %d communicators\n", _starpu_mpi_comm_allocated); _STARPU_MPI_REALLOC(_starpu_mpi_comms, _starpu_mpi_comm_allocated * sizeof(struct _starpu_mpi_comm *)); } _STARPU_MPI_DEBUG(10, "registering comm %ld (%ld) number %d\n", (long int)comm, (long int)MPI_COMM_WORLD, _starpu_mpi_comm_nb); struct _starpu_mpi_comm *_comm; _STARPU_MPI_CALLOC(_comm, 1, sizeof(struct _starpu_mpi_comm)); _comm->comm = comm; _STARPU_MPI_CALLOC(_comm->envelope, 1,sizeof(struct _starpu_mpi_envelope)); _comm->posted = 0; _starpu_mpi_comms[_starpu_mpi_comm_nb] = _comm; _starpu_mpi_comm_nb++; struct _starpu_mpi_comm_hashtable *entry; _STARPU_MPI_MALLOC(entry, sizeof(*entry)); entry->comm = comm; HASH_ADD(hh, _starpu_mpi_comms_cache, comm, sizeof(entry->comm), entry); #ifdef STARPU_SIMGRID starpu_pthread_queue_init(&_comm->queue); starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &_comm->queue); _comm->done = 0; #endif } STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); } void _starpu_mpi_comm_post_recv() { int i; STARPU_PTHREAD_RWLOCK_RDLOCK(&_starpu_mpi_comms_mutex); for(i=0 ; i<_starpu_mpi_comm_nb ; i++) { struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm; if (_comm->posted == 0) { _STARPU_MPI_DEBUG(3, "Posting a receive to get a data envelop on comm %d %ld\n", i, (long int)_comm->comm); _STARPU_MPI_COMM_FROM_DEBUG(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, (int64_t)_STARPU_MPI_TAG_ENVELOPE, _comm->comm); MPI_Irecv(_comm->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, MPI_ANY_SOURCE, _STARPU_MPI_TAG_ENVELOPE, _comm->comm, &_comm->request); #ifdef STARPU_SIMGRID _starpu_mpi_simgrid_wait_req(&_comm->request, &_comm->status, &_comm->queue, &_comm->done); #endif _comm->posted = 1; } } STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); } int _starpu_mpi_comm_test_recv(MPI_Status *status, struct _starpu_mpi_envelope **envelope, MPI_Comm *comm) { int i=_starpu_mpi_comm_tested; STARPU_PTHREAD_RWLOCK_RDLOCK(&_starpu_mpi_comms_mutex); while (1) { struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm; if (_comm->posted) { int flag, res; /* test whether an envelope has arrived. */ #ifdef STARPU_SIMGRID res = _starpu_mpi_simgrid_mpi_test(&_comm->done, &flag); memcpy(status, &_comm->status, sizeof(*status)); #else res = MPI_Test(&_comm->request, &flag, status); #endif STARPU_ASSERT(res == MPI_SUCCESS); if (flag) { _comm->posted = 0; _starpu_mpi_comm_tested++; if (_starpu_mpi_comm_tested == _starpu_mpi_comm_nb) _starpu_mpi_comm_tested = 0; *envelope = _comm->envelope; *comm = _comm->comm; STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); return 1; } } i++; if (i == _starpu_mpi_comm_nb) { i=0; } if (i == _starpu_mpi_comm_tested) { // We have tested all the requests, none has completed STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); return 0; } } STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); return 0; } void _starpu_mpi_comm_cancel_recv() { int i; STARPU_PTHREAD_RWLOCK_RDLOCK(&_starpu_mpi_comms_mutex); for(i=0 ; i<_starpu_mpi_comm_nb ; i++) { struct _starpu_mpi_comm *_comm = _starpu_mpi_comms[i]; // get the ith _comm; if (_comm->posted == 1) { MPI_Cancel(&_comm->request); #ifndef STARPU_SIMGRID { MPI_Status status; MPI_Wait(&_comm->request, &status); } #endif _comm->posted = 0; } } STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_mpi_comms_mutex); } #endif /* STARPU_USE_MPI_MPI */ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_comm.h000066400000000000000000000024531507764646700215240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_COMM_H__ #define __STARPU_MPI_COMM_H__ #include #include #include #ifdef STARPU_USE_MPI_MPI #include /** @file */ #ifdef __cplusplus extern "C" { #endif void _starpu_mpi_comm_init(MPI_Comm comm); void _starpu_mpi_comm_shutdown(); void _starpu_mpi_comm_register(MPI_Comm comm); void _starpu_mpi_comm_post_recv(); int _starpu_mpi_comm_test_recv(MPI_Status *status, struct _starpu_mpi_envelope **envelope, MPI_Comm *comm); void _starpu_mpi_comm_cancel_recv(); #ifdef __cplusplus } #endif #endif // STARPU_USE_MPI_MPI #endif // __STARPU_MPI_COMM_H__ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_driver.h000066400000000000000000000020171507764646700220600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_DRIVER_H__ #define __STARPU_MPI_DRIVER_H__ #include /** @file */ #ifdef STARPU_USE_MPI_MPI #ifdef __cplusplus extern "C" { #endif void _starpu_mpi_driver_init(struct starpu_conf *conf); void _starpu_mpi_driver_shutdown(); #ifdef __cplusplus } #endif #endif // STARPU_USE_MPI_MPI #endif // __STARPU_MPI_DRIVER_H__ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_early_data.c000066400000000000000000000207151507764646700226720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #ifdef STARPU_USE_MPI_MPI /** the hashlist is on 2 levels, the first top level is indexed on (node, rank), the second lower level is indexed on the data tag */ struct _starpu_mpi_early_data_handle_hashlist { struct _starpu_mpi_early_data_handle_tag_hashlist *datahash; UT_hash_handle hh; struct _starpu_mpi_node node; }; /** stores data which have been received by MPI but have not been requested by the application */ static starpu_pthread_mutex_t _starpu_mpi_early_data_handle_mutex; static struct _starpu_mpi_early_data_handle_hashlist *_starpu_mpi_early_data_handle_hashmap = NULL; static int _starpu_mpi_early_data_handle_hashmap_count = 0; void _starpu_mpi_early_data_init(void) { _starpu_mpi_early_data_handle_hashmap = NULL; _starpu_mpi_early_data_handle_hashmap_count = 0; STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_early_data_handle_mutex, NULL); } void _starpu_mpi_early_data_check_termination(void) { if (_starpu_mpi_early_data_handle_hashmap_count != 0) { struct _starpu_mpi_early_data_handle_hashlist *current=NULL, *tmp=NULL; HASH_ITER(hh, _starpu_mpi_early_data_handle_hashmap, current, tmp) { struct _starpu_mpi_early_data_handle_tag_hashlist *tag_current=NULL, *tag_tmp=NULL; HASH_ITER(hh, current->datahash, tag_current, tag_tmp) { _STARPU_MSG("Unexpected message with comm %ld source %d tag %ld\n", (long int)current->node.comm, current->node.rank, tag_current->data_tag); } } STARPU_ASSERT_MSG(_starpu_mpi_early_data_handle_hashmap_count == 0, "Number of unexpected received messages left is not 0 (but %d), did you forget to post a receive corresponding to a send?", _starpu_mpi_early_data_handle_hashmap_count); } } void _starpu_mpi_early_data_shutdown(void) { struct _starpu_mpi_early_data_handle_hashlist *current=NULL, *tmp=NULL; HASH_ITER(hh, _starpu_mpi_early_data_handle_hashmap, current, tmp) { _STARPU_MPI_DEBUG(600, "Hash early_data with comm %ld source %d\n", (long int) current->node.comm, current->node.rank); struct _starpu_mpi_early_data_handle_tag_hashlist *tag_entry=NULL, *tag_tmp=NULL; HASH_ITER(hh, current->datahash, tag_entry, tag_tmp) { _STARPU_MPI_DEBUG(600, "Hash 2nd level with tag %ld\n", tag_entry->data_tag); STARPU_ASSERT(_starpu_mpi_early_data_handle_list_empty(&tag_entry->list)); HASH_DEL(current->datahash, tag_entry); free(tag_entry); } HASH_DEL(_starpu_mpi_early_data_handle_hashmap, current); free(current); } STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_early_data_handle_mutex); } struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _starpu_mpi_envelope *envelope, int source, MPI_Comm comm) { struct _starpu_mpi_early_data_handle* early_data_handle; _STARPU_MPI_CALLOC(early_data_handle, 1, sizeof(struct _starpu_mpi_early_data_handle)); STARPU_PTHREAD_MUTEX_INIT(&early_data_handle->req_mutex, NULL); STARPU_PTHREAD_COND_INIT(&early_data_handle->req_cond, NULL); early_data_handle->node_tag.node.comm = comm; early_data_handle->node_tag.node.rank = source; early_data_handle->node_tag.data_tag = envelope->data_tag; return early_data_handle; } void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle) { free(early_data_handle); } struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag) { struct _starpu_mpi_early_data_handle_hashlist *hashlist; struct _starpu_mpi_early_data_handle *early_data_handle; STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex); _STARPU_MPI_DEBUG(60, "Looking for early_data_handle with comm %ld source %d tag %ld\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &node_tag->node, sizeof(struct _starpu_mpi_node), hashlist); if (hashlist == NULL) { _STARPU_MPI_DEBUG(600, "No entry for (comm %ld, source %d)\n", (long int)node_tag->node.comm, node_tag->node.rank); early_data_handle = NULL; } else { struct _starpu_mpi_early_data_handle_tag_hashlist *tag_hashlist; HASH_FIND(hh, hashlist->datahash, &node_tag->data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); if (tag_hashlist == NULL) { _STARPU_MPI_DEBUG(600, "No entry for tag %ld\n", node_tag->data_tag); early_data_handle = NULL; } else if (_starpu_mpi_early_data_handle_list_empty(&tag_hashlist->list)) { _STARPU_MPI_DEBUG(600, "List empty for tag %ld\n", node_tag->data_tag); early_data_handle = NULL; } else { _starpu_mpi_early_data_handle_hashmap_count --; early_data_handle = _starpu_mpi_early_data_handle_list_pop_front(&tag_hashlist->list); } } _STARPU_MPI_DEBUG(60, "Found early_data_handle %p with comm %ld source %d tag %ld\n", early_data_handle, (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex); return early_data_handle; } struct _starpu_mpi_early_data_handle_tag_hashlist *_starpu_mpi_early_data_extract(struct _starpu_mpi_node_tag *node_tag) { struct _starpu_mpi_early_data_handle_hashlist *hashlist; struct _starpu_mpi_early_data_handle_tag_hashlist *tag_hashlist = NULL; STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex); _STARPU_MPI_DEBUG(60, "Looking for hashlist for (comm %ld, source %d)\n", (long int)node_tag->node.comm, node_tag->node.rank); HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &node_tag->node, sizeof(struct _starpu_mpi_node), hashlist); if (hashlist) { _STARPU_MPI_DEBUG(60, "Looking for hashlist for (tag %ld)\n", node_tag->data_tag); HASH_FIND(hh, hashlist->datahash, &node_tag->data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); if (tag_hashlist) { _starpu_mpi_early_data_handle_hashmap_count -= _starpu_mpi_early_data_handle_list_size(&tag_hashlist->list); HASH_DEL(hashlist->datahash, tag_hashlist); } } _STARPU_MPI_DEBUG(60, "Found hashlist %p for (comm %ld, source %d) and (tag %ld)\n", tag_hashlist, (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex); return tag_hashlist; } void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle) { STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_data_handle_mutex); _STARPU_MPI_DEBUG(60, "Adding early_data_handle %p with comm %ld source %d tag %ld (%p)\n", early_data_handle, (long int)early_data_handle->node_tag.node.comm, early_data_handle->node_tag.node.rank, early_data_handle->node_tag.data_tag, &early_data_handle->node_tag.node); struct _starpu_mpi_early_data_handle_hashlist *hashlist; HASH_FIND(hh, _starpu_mpi_early_data_handle_hashmap, &early_data_handle->node_tag.node, sizeof(struct _starpu_mpi_node), hashlist); if (hashlist == NULL) { _STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_early_data_handle_hashlist)); hashlist->node = early_data_handle->node_tag.node; hashlist->datahash = NULL; HASH_ADD(hh, _starpu_mpi_early_data_handle_hashmap, node, sizeof(hashlist->node), hashlist); } struct _starpu_mpi_early_data_handle_tag_hashlist *tag_hashlist; HASH_FIND(hh, hashlist->datahash, &early_data_handle->node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); if (tag_hashlist == NULL) { _STARPU_MPI_MALLOC(tag_hashlist, sizeof(struct _starpu_mpi_early_data_handle_tag_hashlist)); tag_hashlist->data_tag = early_data_handle->node_tag.data_tag; HASH_ADD(hh, hashlist->datahash, data_tag, sizeof(tag_hashlist->data_tag), tag_hashlist); _starpu_mpi_early_data_handle_list_init(&tag_hashlist->list); } _starpu_mpi_early_data_handle_list_push_back(&tag_hashlist->list, early_data_handle); _starpu_mpi_early_data_handle_hashmap_count ++; STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_data_handle_mutex); } #endif // STARPU_USE_MPI_MPI starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_early_data.h000066400000000000000000000044351507764646700227000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_EARLY_DATA_H__ #define __STARPU_MPI_EARLY_DATA_H__ #include #include #include #include #include #include #include /** @file */ #ifdef STARPU_USE_MPI_MPI #ifdef __cplusplus extern "C" { #endif LIST_TYPE(_starpu_mpi_early_data_handle, starpu_data_handle_t handle; struct _starpu_mpi_req *req; void *buffer; size_t size; unsigned buffer_node; struct _starpu_mpi_node_tag node_tag; starpu_pthread_mutex_t req_mutex; starpu_pthread_cond_t req_cond; ); struct _starpu_mpi_early_data_handle_tag_hashlist { struct _starpu_mpi_early_data_handle_list list; UT_hash_handle hh; starpu_mpi_tag_t data_tag; }; struct _starpu_mpi_envelope; void _starpu_mpi_early_data_init(void); void _starpu_mpi_early_data_check_termination(void); void _starpu_mpi_early_data_shutdown(void); struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _starpu_mpi_envelope *envelope, int source, MPI_Comm comm) STARPU_ATTRIBUTE_MALLOC; struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_find(struct _starpu_mpi_node_tag *node_tag); void _starpu_mpi_early_data_add(struct _starpu_mpi_early_data_handle *early_data_handle); void _starpu_mpi_early_data_delete(struct _starpu_mpi_early_data_handle *early_data_handle); // Not used now but needed for fault tolerance struct _starpu_mpi_early_data_handle_tag_hashlist *_starpu_mpi_early_data_extract(struct _starpu_mpi_node_tag *node_tag); #ifdef __cplusplus } #endif #endif /* STARPU_USE_MPI_MPI */ #endif /* __STARPU_MPI_EARLY_DATA_H__ */ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_early_request.c000066400000000000000000000147411507764646700234530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #ifdef STARPU_USE_MPI_MPI /** stores application requests for which data have not been received yet */ /** the hashlist is on 2 levels, the first top level is indexed on (node, rank), the second lower level is indexed on the data tag */ struct _starpu_mpi_early_request_hashlist { struct _starpu_mpi_early_request_tag_hashlist *datahash; UT_hash_handle hh; struct _starpu_mpi_node node; }; static starpu_pthread_mutex_t _starpu_mpi_early_request_mutex; struct _starpu_mpi_early_request_hashlist *_starpu_mpi_early_request_hash; int _starpu_mpi_early_request_hash_count; void _starpu_mpi_early_request_init() { _starpu_mpi_early_request_hash = NULL; _starpu_mpi_early_request_hash_count = 0; STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_early_request_mutex, NULL); } void _starpu_mpi_early_request_shutdown() { struct _starpu_mpi_early_request_hashlist *entry=NULL, *tmp=NULL; HASH_ITER(hh, _starpu_mpi_early_request_hash, entry, tmp) { struct _starpu_mpi_early_request_tag_hashlist *tag_entry=NULL, *tag_tmp=NULL; HASH_ITER(hh, entry->datahash, tag_entry, tag_tmp) { STARPU_ASSERT(_starpu_mpi_req_list_empty(&tag_entry->list)); HASH_DEL(entry->datahash, tag_entry); free(tag_entry); } HASH_DEL(_starpu_mpi_early_request_hash, entry); free(entry); } STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_early_request_mutex); } int _starpu_mpi_early_request_count() { return _starpu_mpi_early_request_hash_count; } void _starpu_mpi_early_request_check_termination() { STARPU_ASSERT_MSG(_starpu_mpi_early_request_count() == 0, "Number of early requests left is not zero"); } struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm) { struct _starpu_mpi_node_tag node_tag; struct _starpu_mpi_req *found; struct _starpu_mpi_early_request_hashlist *hashlist; memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); node_tag.node.comm = comm; node_tag.node.rank = source; node_tag.data_tag = data_tag; STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex); _STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %ld\n", (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag); HASH_FIND(hh, _starpu_mpi_early_request_hash, &node_tag.node, sizeof(struct _starpu_mpi_node), hashlist); if (hashlist == NULL) { found = NULL; } else { struct _starpu_mpi_early_request_tag_hashlist *tag_hashlist; HASH_FIND(hh, hashlist->datahash, &node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); if (tag_hashlist == NULL) { found = NULL; } else if (_starpu_mpi_req_list_empty(&tag_hashlist->list)) { found = NULL; } else { found = _starpu_mpi_req_list_pop_front(&tag_hashlist->list); _starpu_mpi_early_request_hash_count --; } } _STARPU_MPI_DEBUG(100, "Found early_request %p with comm %ld source %d tag %ld\n", found, (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex); return found; } struct _starpu_mpi_early_request_tag_hashlist *_starpu_mpi_early_request_extract(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm) { struct _starpu_mpi_node_tag node_tag; struct _starpu_mpi_early_request_hashlist *hashlist; struct _starpu_mpi_early_request_tag_hashlist *tag_hashlist = NULL; memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); node_tag.node.comm = comm; node_tag.node.rank = source; node_tag.data_tag = data_tag; STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex); _STARPU_MPI_DEBUG(100, "Looking for early_request with comm %ld source %d tag %ld\n", (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag); HASH_FIND(hh, _starpu_mpi_early_request_hash, &node_tag.node, sizeof(struct _starpu_mpi_node), hashlist); if (hashlist) { HASH_FIND(hh, hashlist->datahash, &node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); if (tag_hashlist) { _starpu_mpi_early_request_hash_count -= _starpu_mpi_req_list_size(&tag_hashlist->list); HASH_DEL(hashlist->datahash, tag_hashlist); } } _STARPU_MPI_DEBUG(100, "Found hashlist %p with comm %ld source %d tag %ld\n", hashlist, (long int)node_tag.node.comm, node_tag.node.rank, node_tag.data_tag); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex); return tag_hashlist; } void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req) { STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_early_request_mutex); _STARPU_MPI_DEBUG(100, "Adding request %p with comm %ld source %d tag %ld in the application request hashmap\n", req, (long int)req->node_tag.node.comm, req->node_tag.node.rank, req->node_tag.data_tag); struct _starpu_mpi_early_request_hashlist *hashlist; HASH_FIND(hh, _starpu_mpi_early_request_hash, &req->node_tag.node, sizeof(struct _starpu_mpi_node), hashlist); if (hashlist == NULL) { _STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_early_request_hashlist)); hashlist->node = req->node_tag.node; hashlist->datahash = NULL; HASH_ADD(hh, _starpu_mpi_early_request_hash, node, sizeof(hashlist->node), hashlist); } struct _starpu_mpi_early_request_tag_hashlist *tag_hashlist; HASH_FIND(hh, hashlist->datahash, &req->node_tag.data_tag, sizeof(starpu_mpi_tag_t), tag_hashlist); if (tag_hashlist == NULL) { _STARPU_MPI_MALLOC(tag_hashlist, sizeof(struct _starpu_mpi_early_request_tag_hashlist)); tag_hashlist->data_tag = req->node_tag.data_tag; HASH_ADD(hh, hashlist->datahash, data_tag, sizeof(tag_hashlist->data_tag), tag_hashlist); _starpu_mpi_req_list_init(&tag_hashlist->list); } _starpu_mpi_req_list_push_back(&tag_hashlist->list, req); _starpu_mpi_early_request_hash_count ++; STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_early_request_mutex); } #endif // STARPU_USE_MPI_MPI starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_early_request.h000066400000000000000000000033321507764646700234520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_EARLY_REQUEST_H__ #define __STARPU_MPI_EARLY_REQUEST_H__ #include #include #include #include #include /** @file */ #ifdef STARPU_USE_MPI_MPI #ifdef __cplusplus extern "C" { #endif struct _starpu_mpi_early_request_tag_hashlist { struct _starpu_mpi_req_list list; UT_hash_handle hh; starpu_mpi_tag_t data_tag; }; void _starpu_mpi_early_request_init(void); void _starpu_mpi_early_request_shutdown(void); int _starpu_mpi_early_request_count(void); void _starpu_mpi_early_request_check_termination(void); void _starpu_mpi_early_request_enqueue(struct _starpu_mpi_req *req); struct _starpu_mpi_req* _starpu_mpi_early_request_dequeue(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm); // Not used now but needed for fault tolerance struct _starpu_mpi_early_request_tag_hashlist *_starpu_mpi_early_request_extract(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm); #ifdef __cplusplus } #endif #endif /* STARPU_USE_MPI_MPI */ #endif /* __STARPU_MPI_EARLY_REQUEST_H__ */ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_mpi.c000066400000000000000000002051021507764646700213450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Guillaume Beauchamp * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_USE_MPI_FT #include #endif // STARPU_USE_MPI_FT #ifdef STARPU_USE_FXT #include #endif #ifdef STARPU_USE_MPI_MPI /* Number of ready requests to process before polling for completed requests */ static unsigned nready_process; /* Force allocation of early data */ static int early_data_force_allocate; static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req); static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req); static void _starpu_mpi_handle_detached_request(struct _starpu_mpi_req *req); static void _starpu_mpi_early_data_cb(void* arg); /* The list of ready requests */ static struct _starpu_mpi_req_list ready_recv_requests; static struct _starpu_mpi_req_prio_list ready_send_requests; /* The list of detached requests that have already been submitted to MPI */ static struct _starpu_mpi_req_list detached_requests; /* Number of send requests to submit to MPI at the same time */ static unsigned ndetached_send_requests_max; static unsigned ndetached_send_requests = 0; /* Condition to wake up progression thread */ static starpu_pthread_cond_t progress_cond; static starpu_pthread_mutex_t progress_mutex; /* Condition to wake up waiting for all current MPI requests to finish */ static starpu_pthread_cond_t barrier_cond; #ifndef STARPU_SIMGRID static starpu_pthread_t progress_thread; #endif static int running = 0; /* Provides synchronization between an early request, a sync request, and an early data handle: * we keep it held while checking and posting one to prevent the other. * This is to be taken always before the progress_mutex. */ static starpu_pthread_mutex_t early_data_mutex; /* Driver taken by StarPU-MPI to process tasks when there is no requests to * handle instead of polling endlessly */ static struct starpu_driver *mpi_driver = NULL; static int mpi_driver_call_freq = 0; static int mpi_driver_task_freq = 0; #ifdef STARPU_SIMGRID static int wait_counter; static starpu_pthread_cond_t wait_counter_cond; static starpu_pthread_mutex_t wait_counter_mutex; starpu_pthread_wait_t _starpu_mpi_thread_wait; starpu_pthread_queue_t _starpu_mpi_thread_dontsleep; #endif /* Count requests posted by the application and not yet submitted to MPI */ static starpu_pthread_mutex_t posted_requests_mutex; static int posted_requests = 0; static int newer_requests; static int mpi_wait_for_all_running = 0; #define _STARPU_MPI_INC_POSTED_REQUESTS(req, value) { \ STARPU_PTHREAD_MUTEX_LOCK(&posted_requests_mutex); \ posted_requests += value; \ _STARPU_MPI_DEBUG(0, "posted_requests : %d with req %p srcdst %d tag %"PRIi64" and type %s %d\n", posted_requests, req, req->node_tag.node.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->backend->is_internal_req); \ STARPU_PTHREAD_MUTEX_UNLOCK(&posted_requests_mutex); \ } #ifdef STARPU_SIMGRID #pragma weak smpi_simulated_main_ extern int smpi_simulated_main_(int argc, char *argv[]); static #if SIMGRID_VERSION >= 32600 void #else int #endif _starpu_smpi_simulated_main(int argc, char *argv[]) { #if SIMGRID_VERSION < 32600 return #endif smpi_simulated_main_(argc, argv); } #pragma weak smpi_process_set_user_data #if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA && !defined(smpi_process_set_user_data) extern void smpi_process_set_user_data(void *); #endif #endif #ifdef STARPU_USE_FXT static int trace_loop = 0; #endif /********************************************************/ /* */ /* Send/Receive functionalities */ /* */ /********************************************************/ struct _starpu_mpi_early_data_cb_args { starpu_data_handle_t data_handle; starpu_data_handle_t early_handle; struct _starpu_mpi_req *req; void *buffer; size_t size; unsigned buffer_node; }; #if 0 void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends) { (void)coop_sends; /* TODO: turn them into redirects & forwards */ } #endif void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_control, int submit_data) { (void)submit_control; unsigned i, n = coop_sends->n; /* Note: coop_sends might disappear very very soon after last request is submitted */ for (i = 0; i < n; i++) { if (coop_sends->reqs_array[i]->request_type == SEND_REQ && submit_data) { _STARPU_MPI_DEBUG(0, "cooperative sends %p sending to %d\n", coop_sends, coop_sends->reqs_array[i]->node_tag.node.rank); _starpu_mpi_submit_ready_request(coop_sends->reqs_array[i]); } /* TODO: handle redirect requests */ } } void _starpu_mpi_submit_ready_request(void *arg) { _STARPU_MPI_LOG_IN(); struct _starpu_mpi_req *req = arg; if (req->reserved_size) { /* The core will have really allocated the reception buffer now, release our reservation */ starpu_memory_deallocate(req->node, req->reserved_size); req->reserved_size = 0; } _STARPU_MPI_DEBUG(0, "new req %p srcdst %d tag %"PRIi64" and type %s %d\n", req, req->node_tag.node.rank, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->backend->is_internal_req); if (req->request_type == RECV_REQ) { /* Case : the request is the internal receive request submitted * by StarPU-MPI to receive incoming data without a matching * early_request from the application. We immediately allocate the * pointer associated to the data_handle, and push it into the * ready_requests list, so as the real MPI request can be submitted * before the next submission of the envelope-catching request. */ if (req->backend->is_internal_req) { _starpu_mpi_datatype_allocate(req->data_handle, req); if (req->registered_datatype == 1) { req->count = 1; req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); } else { STARPU_ASSERT(req->count); req->ptr = (void *)starpu_malloc_on_node_flags(req->node, req->count, 0); } STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); _STARPU_MPI_DEBUG(3, "Pushing internal starpu_mpi_irecv request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); _starpu_mpi_req_list_push_front(&ready_recv_requests, req); /* inform the starpu mpi thread that the request has been pushed in the ready_requests list */ req->posted = 1; STARPU_PTHREAD_COND_BROADCAST(&req->backend->posted_cond); } else { STARPU_PTHREAD_MUTEX_LOCK(&early_data_mutex); /* test whether some data with the given tag and source have already been received by StarPU-MPI*/ struct _starpu_mpi_early_data_handle *early_data_handle = _starpu_mpi_early_data_find(&req->node_tag); if (early_data_handle) { /* Got the early_data_handle */ STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); /* Case: a receive request for a data with the given tag and source has already been * posted to MPI by StarPU. Asynchronously requests a Read permission over the temporary handle , * so as when the internal receive is completed, the _starpu_mpi_early_data_cb function * will be called to bring the data back to the original data handle associated to the request.*/ _STARPU_MPI_DEBUG(3, "The RECV request %p with tag %"PRIi64" has already been received, copying previously received data into handle's pointer..\n", req, req->node_tag.data_tag); STARPU_ASSERT(req->data_handle != early_data_handle->handle); req->backend->internal_req = early_data_handle->req; req->backend->early_data_handle = early_data_handle; struct _starpu_mpi_early_data_cb_args *cb_args; _STARPU_MPI_MALLOC(cb_args, sizeof(struct _starpu_mpi_early_data_cb_args)); cb_args->data_handle = req->data_handle; cb_args->early_handle = early_data_handle->handle; cb_args->buffer = early_data_handle->buffer; cb_args->size = early_data_handle->size; cb_args->buffer_node = early_data_handle->buffer_node; cb_args->req = req; _STARPU_MPI_DEBUG(3, "Calling data_acquire_cb on starpu_mpi_copy_cb..\n"); // FIXME: when buffer == NULL, do not hardcode acquiring on early_data_handle->buffer_node, to just acquire where the data happens to have been stored by MPI starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(early_data_handle->handle,early_data_handle->buffer_node,STARPU_R,NULL,_starpu_mpi_early_data_cb,(void*) cb_args, 1, 0, NULL, NULL, req->prio); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); } else { struct _starpu_mpi_req *sync_req = _starpu_mpi_sync_data_find(req->node_tag.data_tag, req->node_tag.node.rank, req->node_tag.node.comm); _STARPU_MPI_DEBUG(3, "----------> Looking for sync data for tag %"PRIi64" and src %d = %p\n", req->node_tag.data_tag, req->node_tag.node.rank, sync_req); if (sync_req) { /* Got the sync req */ STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); /* Case: we already received the send envelope, we can proceed with the receive */ req->sync = 1; _starpu_mpi_datatype_allocate(req->data_handle, req); if (req->registered_datatype == 1) { req->count = 1; req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); } else { req->count = sync_req->count; STARPU_ASSERT(req->count); req->ptr = (void *)starpu_malloc_on_node_flags(req->node, req->count, 0); } STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); _starpu_mpi_req_list_push_front(&ready_recv_requests, req); /* Throw away the dumb request that was only used to know that we got the envelope */ _starpu_mpi_request_destroy(sync_req); } else { /* Case: no matching data has been received. Store the receive request as an early_request. */ _STARPU_MPI_DEBUG(3, "Adding the pending receive request %p (srcdst %d tag %"PRIi64") into the request hashmap\n", req, req->node_tag.node.rank, req->node_tag.data_tag); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); _starpu_mpi_early_request_enqueue(req); /* We have queued our early request, we can let the progression thread look at it */ STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); } } } } else { STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); if (req->request_type == SEND_REQ) { if (_starpu_mpi_thread_multiple_send && (ndetached_send_requests_max == 0 || ndetached_send_requests < ndetached_send_requests_max)) { /* Directly send from this thread */ STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _starpu_mpi_handle_ready_request(req); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); } else /* Defer to MPI thread */ _starpu_mpi_req_prio_list_push_front(&ready_send_requests, req); } else _starpu_mpi_req_list_push_front(&ready_recv_requests, req); _STARPU_MPI_DEBUG(3, "Pushing new request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); } newer_requests = 1; STARPU_PTHREAD_COND_BROADCAST(&progress_cond); #ifdef STARPU_SIMGRID starpu_pthread_queue_signal(&_starpu_mpi_thread_dontsleep); #endif STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _STARPU_MPI_LOG_OUT(); } void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req) { (void)req; _STARPU_MPI_INC_POSTED_REQUESTS(req, 1); } #ifdef STARPU_SIMGRID int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag) { *flag = 0; if (*done) { starpu_pthread_queue_signal(&_starpu_mpi_thread_dontsleep); *flag = 1; } return MPI_SUCCESS; } static void _starpu_mpi_simgrid_wait_req_func(void* arg) { struct _starpu_simgrid_mpi_req *sim_req = arg; int ret; starpu_pthread_detach(starpu_pthread_self()); STARPU_PTHREAD_MUTEX_LOCK(&wait_counter_mutex); wait_counter++; STARPU_PTHREAD_MUTEX_UNLOCK(&wait_counter_mutex); ret = MPI_Wait(sim_req->request, sim_req->status); STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(ret)); _STARPU_MPI_DEBUG(0, "request %p finished\n", sim_req->request); *(sim_req->done) = 1; starpu_pthread_queue_broadcast(sim_req->queue); free(sim_req); STARPU_PTHREAD_MUTEX_LOCK(&wait_counter_mutex); if (--wait_counter == 0) STARPU_PTHREAD_COND_SIGNAL(&wait_counter_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&wait_counter_mutex); } void _starpu_mpi_simgrid_wait_req(MPI_Request *request, MPI_Status *status, starpu_pthread_queue_t *queue, unsigned *done) { struct _starpu_simgrid_mpi_req *sim_req; _STARPU_MPI_CALLOC(sim_req, 1, sizeof(struct _starpu_simgrid_mpi_req)); sim_req->request = request; sim_req->status = status; sim_req->queue = queue; sim_req->done = done; *done = 0; _STARPU_MPI_DEBUG(0, "will wait for request %p to finish\n", sim_req->request); starpu_pthread_attr_t attr; starpu_pthread_attr_init(&attr); starpu_pthread_attr_setstacksize(&attr, 32786); _starpu_simgrid_xbt_thread_create("wait for mpi transfer", &attr, _starpu_mpi_simgrid_wait_req_func, sim_req); } #endif /********************************************************/ /* */ /* Send functionalities */ /* */ /********************************************************/ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); _STARPU_MPI_DEBUG(0, "post MPI isend request %p type %s tag %"PRIi64" dst %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync); _starpu_mpi_comm_amounts_inc(req->node_tag.node.comm, req->node, req->node_tag.node.rank, req->datatype, req->count); _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag, 0); if (req->sync == 0) { _STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.node.comm); req->ret = MPI_Isend(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.node.comm, &req->backend->data_request); STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); } else { _STARPU_MPI_COMM_TO_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.node.comm); req->ret = MPI_Issend(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.node.comm, &req->backend->data_request); STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Issend returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); } #ifdef STARPU_SIMGRID _starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done); #endif // this trace event is the start of the communication link: _STARPU_MPI_TRACE_ISEND_SUBMIT_END(_STARPU_MPI_FUT_POINT_TO_POINT_SEND, req, req->prio); /* somebody is perhaps waiting for the MPI request to be posted */ STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); req->submitted = 1; STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); _starpu_mpi_handle_detached_request(req); _STARPU_MPI_LOG_OUT(); } void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req) { _starpu_mpi_datatype_allocate(req->data_handle, req); _STARPU_MPI_CALLOC(req->backend->envelope, 1,sizeof(struct _starpu_mpi_envelope)); req->backend->envelope->mode = _STARPU_MPI_ENVELOPE_DATA; req->backend->envelope->data_tag = req->node_tag.data_tag; req->backend->envelope->sync = req->sync; if (req->registered_datatype == 1) { int size, ret; req->count = 1; req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); MPI_Type_size(req->datatype, &size); req->backend->envelope->size = (starpu_ssize_t)req->count * size; _STARPU_MPI_DEBUG(20, "Post MPI isend count (%ld) datatype_size %ld request to %d\n",req->count,starpu_data_get_size(req->data_handle), req->node_tag.node.rank); _STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.node.comm); ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm, &req->backend->size_req); STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending envelope, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret)); } else { int ret; // Do not pack the data, just try to find out the size starpu_data_pack_node(req->data_handle, req->node, NULL, &(req->backend->envelope->size)); if (req->backend->envelope->size != -1) { // We already know the size of the data, let's send it to overlap with the packing of the data _STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (first call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.node.rank); req->count = req->backend->envelope->size; _STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.node.comm); ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm, &req->backend->size_req); STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret)); } // Pack the data starpu_data_pack_node(req->data_handle, req->node, &req->ptr, &req->count); if (req->backend->envelope->size == -1) { // We know the size now, let's send it _STARPU_MPI_DEBUG(20, "Sending size %ld (%ld %s) to node %d (second call to pack)\n", req->backend->envelope->size, sizeof(req->count), "MPI_BYTE", req->node_tag.node.rank); _STARPU_MPI_COMM_TO_DEBUG(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->backend->envelope->data_tag, req->node_tag.node.comm); ret = MPI_Isend(req->backend->envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm, &req->backend->size_req); STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %s", _starpu_mpi_get_mpi_error_code(ret)); } else { // We check the size returned with the 2 calls to pack is the same STARPU_MPI_ASSERT_MSG(req->count == req->backend->envelope->size, "Calls to pack_data returned different sizes %ld != %ld", req->count, req->backend->envelope->size); } // We can send the data now } if (req->sync) { // If the data is to be sent in synchronous mode, we need to wait for the receiver ready message _starpu_mpi_sync_data_add(req); } else { // Otherwise we can send the data _starpu_mpi_isend_data_func(req); } } /********************************************************/ /* */ /* receive functionalities */ /* */ /********************************************************/ void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); _STARPU_MPI_DEBUG(0, "post MPI irecv request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); if (req->sync) { struct _starpu_mpi_envelope *_envelope; _STARPU_MPI_CALLOC(_envelope, 1, sizeof(struct _starpu_mpi_envelope)); _envelope->mode = _STARPU_MPI_ENVELOPE_SYNC_READY; _envelope->data_tag = req->node_tag.data_tag; _STARPU_MPI_DEBUG(20, "Telling node %d it can send the data and waiting for the data back ...\n", req->node_tag.node.rank); _STARPU_MPI_COMM_TO_DEBUG(_envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, _envelope->data_tag, req->node_tag.node.comm); req->ret = MPI_Send(_envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, req->node_tag.node.rank, _STARPU_MPI_TAG_ENVELOPE, req->node_tag.node.comm); STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Send returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); free(_envelope); _envelope = NULL; } if (req->sync) { _STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.data_tag, req->node_tag.node.comm); req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_SYNC_DATA, req->node_tag.node.comm, &req->backend->data_request); } else { _STARPU_MPI_COMM_FROM_DEBUG(req, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.data_tag, req->node_tag.node.comm); req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, _STARPU_MPI_TAG_DATA, req->node_tag.node.comm, &req->backend->data_request); } #ifdef STARPU_SIMGRID _starpu_mpi_simgrid_wait_req(&req->backend->data_request, &req->status_store, &req->queue, &req->done); #endif STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_IRecv returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); _STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag); /* somebody is perhaps waiting for the MPI request to be posted */ STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); req->submitted = 1; STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); _starpu_mpi_handle_detached_request(req); _STARPU_MPI_LOG_OUT(); } /********************************************************/ /* */ /* Wait functionalities */ /* */ /********************************************************/ #ifndef STARPU_SIMGRID void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req) { _STARPU_MPI_LOG_IN(); /* Which is the mpi request we are waiting for ? */ struct _starpu_mpi_req *req = waiting_req->backend->other_request; _STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); if (req->backend->data_request != MPI_REQUEST_NULL) { req->ret = MPI_Wait(&req->backend->data_request, waiting_req->status); STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); } _STARPU_MPI_TRACE_UWAIT_END(req->node_tag.node.rank, req->node_tag.data_tag); _starpu_mpi_handle_request_termination(req); _STARPU_MPI_LOG_OUT(); } #endif int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status) { int ret; struct _starpu_mpi_req *req = *public_req; _STARPU_MPI_LOG_IN(); #ifdef STARPU_SIMGRID _STARPU_MPI_TRACE_UWAIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); starpu_pthread_wait_t wait; starpu_pthread_wait_init(&wait); starpu_pthread_queue_register(&wait, &req->queue); while (1) { starpu_pthread_wait_reset(&wait); if (req->done) break; starpu_pthread_wait_wait(&wait); } starpu_pthread_queue_unregister(&wait, &req->queue); starpu_pthread_wait_destroy(&wait); _STARPU_MPI_TRACE_UWAIT_END(req->node_tag.node.rank, req->node_tag.data_tag); if (status) *status = req->status_store; _starpu_mpi_handle_request_termination(req); #else struct _starpu_mpi_req *waiting_req; /* We cannot try to complete a MPI request that was not actually posted * to MPI yet. */ STARPU_PTHREAD_MUTEX_LOCK(&(req->backend->req_mutex)); while (!(req->submitted)) STARPU_PTHREAD_COND_WAIT(&(req->backend->req_cond), &(req->backend->req_mutex)); STARPU_PTHREAD_MUTEX_UNLOCK(&(req->backend->req_mutex)); /* Initialize the request structure */ _starpu_mpi_request_init(&waiting_req); waiting_req->prio = INT_MAX; waiting_req->status = status; waiting_req->backend->other_request = req; waiting_req->func = _starpu_mpi_wait_func; waiting_req->request_type = WAIT_REQ; _STARPU_MPI_INC_POSTED_REQUESTS(waiting_req, 1); _starpu_mpi_submit_ready_request(waiting_req); /* We wait for the MPI request to finish */ STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); while (!req->completed) STARPU_PTHREAD_COND_WAIT(&req->backend->req_cond, &req->backend->req_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); /* The internal request structure was automatically allocated */ _STARPU_MPI_INC_POSTED_REQUESTS(waiting_req, -1); _starpu_mpi_request_destroy(waiting_req); #endif *public_req = NULL; if (req->backend->internal_req) { _starpu_mpi_request_destroy(req->backend->internal_req); } ret = req->ret; _starpu_mpi_request_destroy(req); _STARPU_MPI_LOG_OUT(); #ifdef STARPU_DEVEL #warning see if we can return ret instead of 0 #endif (void)ret; return 0; } /********************************************************/ /* */ /* Test functionalities */ /* */ /********************************************************/ #ifndef STARPU_SIMGRID void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req) { _STARPU_MPI_LOG_IN(); /* Which is the mpi request we are testing for ? */ struct _starpu_mpi_req *req = testing_req->backend->other_request; _STARPU_MPI_DEBUG(0, "Test request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); _STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); req->ret = MPI_Test(&req->backend->data_request, testing_req->flag, testing_req->status); STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); _STARPU_MPI_TRACE_UTESTING_END(req->node_tag.node.rank, req->node_tag.data_tag); if (*testing_req->flag) { testing_req->ret = req->ret; _starpu_mpi_handle_request_termination(req); } STARPU_PTHREAD_MUTEX_LOCK(&testing_req->backend->req_mutex); testing_req->completed = 1; STARPU_PTHREAD_COND_SIGNAL(&testing_req->backend->req_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&testing_req->backend->req_mutex); _STARPU_MPI_LOG_OUT(); } #endif int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status) { _STARPU_MPI_LOG_IN(); int ret = 0; STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_test needs a valid starpu_mpi_req"); struct _starpu_mpi_req *req = *public_req; STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request"); STARPU_VALGRIND_YIELD(); #ifdef STARPU_SIMGRID ret = req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, flag); if (*flag) { if (status) *status = req->status_store; _starpu_mpi_handle_request_termination(req); } #else STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); unsigned submitted = req->submitted; STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); if (submitted) { struct _starpu_mpi_req *testing_req; /* Initialize the request structure */ _starpu_mpi_request_init(&testing_req); testing_req->prio = INT_MAX; testing_req->flag = flag; testing_req->status = status; testing_req->backend->other_request = req; testing_req->func = _starpu_mpi_test_func; testing_req->completed = 0; testing_req->request_type = TEST_REQ; _STARPU_MPI_INC_POSTED_REQUESTS(testing_req, 1); _starpu_mpi_submit_ready_request(testing_req); /* We wait for the test request to finish */ STARPU_PTHREAD_MUTEX_LOCK(&(testing_req->backend->req_mutex)); while (!(testing_req->completed)) STARPU_PTHREAD_COND_WAIT(&(testing_req->backend->req_cond), &(testing_req->backend->req_mutex)); STARPU_PTHREAD_MUTEX_UNLOCK(&(testing_req->backend->req_mutex)); ret = testing_req->ret; _STARPU_MPI_INC_POSTED_REQUESTS(testing_req, -1); _starpu_mpi_request_destroy(testing_req); } else { *flag = 0; } #endif if (*flag) { /* The request was completed so we free the internal * request structure which was automatically allocated * */ *public_req = NULL; if (req->backend->internal_req) { _starpu_mpi_request_destroy(req->backend->internal_req); } _starpu_mpi_request_destroy(req); } _STARPU_MPI_LOG_OUT(); #ifdef STARPU_DEVEL #warning see if we can return ret instead of 0 #endif (void)ret; return 0; } /********************************************************/ /* */ /* Barrier functionalities */ /* */ /********************************************************/ static void _starpu_mpi_barrier_func(struct _starpu_mpi_req *barrier_req) { _STARPU_MPI_LOG_IN(); /* FIXME: rather use MPI_Ibarrier and make it a detached request. * We'd then be able to introduce starpu_mpi_ibarrier, and make * starpu_mpi_barrier just call starpu_mpi_ibarrier(); starpu_mpi_wait(); * That'll solve locking issue when intermixing starpu_mpi_barrier with * other communications. */ barrier_req->ret = MPI_Barrier(barrier_req->node_tag.node.comm); STARPU_MPI_ASSERT_MSG(barrier_req->ret == MPI_SUCCESS, "MPI_Barrier returning %s", _starpu_mpi_get_mpi_error_code(barrier_req->ret)); _starpu_mpi_handle_request_termination(barrier_req); _STARPU_MPI_LOG_OUT(); } int _starpu_mpi_barrier(MPI_Comm comm) { struct _starpu_mpi_req *barrier_req; /* Initialize the request structure */ _starpu_mpi_request_init(&barrier_req); barrier_req->prio = INT_MAX; barrier_req->func = _starpu_mpi_barrier_func; barrier_req->request_type = BARRIER_REQ; barrier_req->node_tag.node.comm = comm; _STARPU_MPI_INC_POSTED_REQUESTS(barrier_req, 1); _starpu_mpi_submit_ready_request(barrier_req); /* We wait for the MPI request to finish */ STARPU_PTHREAD_MUTEX_LOCK(&barrier_req->backend->req_mutex); while (!barrier_req->completed) STARPU_PTHREAD_COND_WAIT(&barrier_req->backend->req_cond, &barrier_req->backend->req_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&barrier_req->backend->req_mutex); // posted_requests will be decreated in _starpu_mpi_handle_request_termination() called by _starpu_mpi_barrier_func() _starpu_mpi_request_destroy(barrier_req); _STARPU_MPI_LOG_OUT(); return 0; } int _starpu_mpi_wait_for_all(MPI_Comm comm) { (void) comm; _STARPU_MPI_LOG_IN(); /* First wait for *both* all tasks and MPI requests to finish, in case * some tasks generate MPI requests, MPI requests generate tasks, etc. */ STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); STARPU_MPI_ASSERT_MSG(!mpi_wait_for_all_running, "Concurrent starpu_mpi_wait_for_all is not implemented, even on different communicators"); mpi_wait_for_all_running = 1; do { while (posted_requests)// || !_starpu_mpi_req_list_empty(&ready_recv_requests) || !_starpu_mpi_req_prio_list_empty(&ready_send_requests) || !_starpu_mpi_req_list_empty(&detached_requests) || _starpu_mpi_early_request_count() != 0 || _starpu_mpi_sync_data_count() != 0) /* Wait for all current MPI requests to finish */ STARPU_PTHREAD_COND_WAIT(&barrier_cond, &progress_mutex); /* No current request, clear flag */ newer_requests = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); /* Now wait for all tasks */ starpu_task_wait_for_all(); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); /* Check newer_requests again, in case some MPI requests * triggered by tasks completed and triggered tasks between * wait_for_all finished and we take the lock */ } while (posted_requests || newer_requests);// || !_starpu_mpi_req_list_empty(&ready_recv_requests) || !_starpu_mpi_req_prio_list_empty(&ready_send_requests) || !_starpu_mpi_req_list_empty(&detached_requests) || _starpu_mpi_early_request_count() != 0 || _starpu_mpi_sync_data_count() != 0 ); mpi_wait_for_all_running = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); return 0; } /********************************************************/ /* */ /* Progression */ /* */ /********************************************************/ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); _STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d internal_req %p\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->backend->internal_req); if (req->backend->internal_req) { _starpu_mpi_early_data_delete(req->backend->early_data_handle); } else { if (req->request_type == RECV_REQ || req->request_type == SEND_REQ) { if (req->request_type == SEND_REQ) { // We need to make sure the communication for sending the size // has completed, as MPI can re-order messages, let's call // MPI_Wait to make sure data have been sent int ret; ret = MPI_Wait(&req->backend->size_req, MPI_STATUS_IGNORE); STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Wait returning %s", _starpu_mpi_get_mpi_error_code(ret)); } if (req->registered_datatype == 0) { if (req->request_type == SEND_REQ) { starpu_free_on_node_flags(req->node, (uintptr_t)req->ptr, req->count, 0); req->ptr = NULL; } else if (req->request_type == RECV_REQ) { if (starpu_data_get_interface_ops(req->data_handle)->peek_data) { starpu_data_peek_node(req->data_handle, req->node, req->ptr, req->count); starpu_free_on_node_flags(req->node, (uintptr_t)req->ptr, req->count, 0); } else { starpu_data_unpack_node(req->data_handle, req->node, req->ptr, req->count); } starpu_memory_deallocate(req->node, req->count); } } else { _starpu_mpi_datatype_free(req->data_handle, &req->datatype); } } // for recv requests, this event is the end of the communication link: _STARPU_MPI_TRACE_TERMINATED(req); } _starpu_mpi_release_req_data(req); if (req->backend->envelope) { free(req->backend->envelope); req->backend->envelope = NULL; } /* Execute the specified callback, if any */ if (req->callback) req->callback(req->callback_arg); _STARPU_MPI_INC_POSTED_REQUESTS(req, -1); /* tell anyone potentially waiting on the request that it is * terminated now */ STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); req->completed = 1; STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); _STARPU_MPI_LOG_OUT(); } /* This is called when the data is now received in the early data handle, we can * now copy it over to the real handle. */ static void _starpu_mpi_early_data_cb(void* arg) { struct _starpu_mpi_early_data_cb_args *args = arg; if (args->buffer) { /* Data has been received as a raw memory, it has to be unpacked */ struct starpu_data_interface_ops *itf_src = starpu_data_get_interface_ops(args->early_handle); struct starpu_data_interface_ops *itf_dst = starpu_data_get_interface_ops(args->data_handle); MPI_Datatype datatype = _starpu_mpi_datatype_get_user_defined_datatype(args->data_handle, args->req->node); if (datatype) { int position=0; void *ptr = starpu_data_handle_to_pointer(args->data_handle, args->req->node); MPI_Unpack(args->buffer, itf_src->get_size(args->early_handle), &position, ptr, 1, datatype, args->req->node_tag.node.comm); starpu_free_on_node_flags(args->buffer_node, (uintptr_t) args->buffer, args->size, 0); args->buffer = NULL; _starpu_mpi_datatype_free(args->data_handle, &datatype); } else { STARPU_MPI_ASSERT_MSG(itf_dst->peek_data || itf_dst->unpack_data , "The data interface does not define an unpack function\n"); // FIXME: Actually we may not want unpack_data to free the buffer, for the case when we are participating to a collective send if (itf_dst->peek_data) { itf_dst->peek_data(args->data_handle, args->req->node, args->buffer, itf_src->get_size(args->early_handle)); starpu_free_on_node_flags(args->buffer_node, (uintptr_t) args->buffer, itf_src->get_size(args->early_handle), 0); } else itf_dst->unpack_data(args->data_handle, args->req->node, args->buffer, itf_src->get_size(args->early_handle)); args->buffer = NULL; } } else { struct starpu_data_interface_ops *itf = starpu_data_get_interface_ops(args->early_handle); void* itf_src = starpu_data_get_interface_on_node(args->early_handle, args->buffer_node); void* itf_dst = starpu_data_get_interface_on_node(args->data_handle, args->req->node); if (!itf->copy_methods->ram_to_ram) { _STARPU_MPI_DEBUG(3, "Initiating any_to_any copy..\n"); itf->copy_methods->any_to_any(itf_src, args->buffer_node, itf_dst, args->req->node, NULL); } else { _STARPU_MPI_DEBUG(3, "Initiating ram_to_ram copy..\n"); itf->copy_methods->ram_to_ram(itf_src, args->buffer_node, itf_dst, args->req->node); } } _STARPU_MPI_DEBUG(3, "Done, handling release of early_handle..\n"); starpu_data_release_on_node(args->early_handle, args->buffer_node); _STARPU_MPI_DEBUG(3, "Done, handling unregister of early_handle..\n"); /* XXX: note that we have already freed the registered buffer above. In * principle that's unsafe. As of now it is fine because StarPU has no reason to access it. */ starpu_data_unregister_submit(args->early_handle); _STARPU_MPI_DEBUG(3, "Done, handling request %p termination of the already received request\n",args->req); // If the request is detached, we need to call _starpu_mpi_handle_request_termination // as it will not be called automatically as the request is not in the list detached_requests if (args->req->detached) { /* have the internal request destroyed now or when completed */ STARPU_PTHREAD_MUTEX_LOCK(&args->req->backend->internal_req->backend->req_mutex); if (args->req->backend->internal_req->backend->to_destroy) { /* The request completed first, can now destroy it */ STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->internal_req->backend->req_mutex); _starpu_mpi_request_destroy(args->req->backend->internal_req); } else { /* The request didn't complete yet, tell it to destroy it when it completes */ args->req->backend->internal_req->backend->to_destroy = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->internal_req->backend->req_mutex); } _starpu_mpi_handle_request_termination(args->req); _starpu_mpi_request_destroy(args->req); } else { // else: If the request is not detached its termination will // be handled when calling starpu_mpi_wait // We store in the application request the internal MPI // request so that it can be used by starpu_mpi_wait args->req->backend->data_request = args->req->backend->internal_req->backend->data_request; STARPU_PTHREAD_MUTEX_LOCK(&args->req->backend->req_mutex); args->req->submitted = 1; STARPU_PTHREAD_COND_BROADCAST(&args->req->backend->req_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&args->req->backend->req_mutex); #ifdef STARPU_SIMGRID args->req->done = 1; #endif } free(args); args = NULL; } // We suppose progress_mutex is locked static void _starpu_mpi_test_detached_requests(void) { //_STARPU_MPI_LOG_IN(); int flag; struct _starpu_mpi_req *req; if (_starpu_mpi_req_list_empty(&detached_requests)) { //_STARPU_MPI_LOG_OUT(); return; } _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN(); req = _starpu_mpi_req_list_begin(&detached_requests); while (req != _starpu_mpi_req_list_end(&detached_requests)) { STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _STARPU_MPI_TRACE_TEST_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); //_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %"PRIi64" - TYPE %s %d\n", &req->backend->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.node.rank); #ifdef STARPU_SIMGRID req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, &flag); #else STARPU_MPI_ASSERT_MSG(req->backend->data_request != MPI_REQUEST_NULL, "Cannot test completion of the request MPI_REQUEST_NULL"); req->ret = MPI_Test(&req->backend->data_request, &flag, MPI_STATUS_IGNORE); #endif STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); _STARPU_MPI_TRACE_TEST_END(req->node_tag.node.rank, req->node_tag.data_tag); if (!flag) { req = _starpu_mpi_req_list_next(req); } else { _STARPU_MPI_TRACE_POLLING_END(); struct _starpu_mpi_req *next_req; next_req = _starpu_mpi_req_list_next(req); _STARPU_MPI_TRACE_COMPLETE_BEGIN(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag); _starpu_mpi_handle_request_termination(req); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); if (req->request_type == SEND_REQ && ndetached_send_requests_max > 0) // if ndetached_send_requests_max == 0, we don't limit the number of concurrent MPI send requests ndetached_send_requests--; _starpu_mpi_req_list_erase(&detached_requests, req); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _STARPU_MPI_TRACE_COMPLETE_END(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag); STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); /* We don't want to free internal non-detached requests, we need to get their MPI request before destroying them */ if (req->backend->is_internal_req && !req->backend->to_destroy) { /* We have completed the request, let the application request destroy it */ req->backend->to_destroy = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); } else { STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); _starpu_mpi_request_destroy(req); } req = next_req; _STARPU_MPI_TRACE_POLLING_BEGIN(); } STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); } _STARPU_MPI_TRACE_TESTING_DETACHED_END(); //_STARPU_MPI_LOG_OUT(); } static void _starpu_mpi_handle_detached_request(struct _starpu_mpi_req *req) { if (req->detached) { STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); if (req->request_type == SEND_REQ && ndetached_send_requests_max > 0) // if ndetached_send_requests_max == 0, we don't limit the number of concurrent MPI send requests ndetached_send_requests++; /* put the submitted request into the list of pending requests * so that it can be handled by the progression mechanisms */ _starpu_mpi_req_list_push_back(&detached_requests, req); STARPU_PTHREAD_COND_SIGNAL(&progress_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); } } static void _starpu_mpi_handle_ready_request(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); STARPU_MPI_ASSERT_MSG(req, "Invalid request"); /* submit the request to MPI */ _STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); /* Set GPU device for current request if GPU Direct is supported */ if (_starpu_mpi_has_cuda) { int mem_node = req->node; if (mem_node >= 0) { enum starpu_node_kind node_kind = starpu_node_get_kind (mem_node); switch (node_kind) { #ifdef STARPU_USE_CUDA case STARPU_CUDA_RAM: if (_starpu_mpi_cuda_devid == -1 && starpu_cuda_worker_get_count() > 1) cudaSetDevice(starpu_memory_node_get_devid(mem_node)); break; #endif default: break; } } } req->func(req); _STARPU_MPI_LOG_OUT(); } static void _starpu_mpi_receive_early_data(struct _starpu_mpi_envelope *envelope, MPI_Status status, MPI_Comm comm) { _STARPU_MPI_DEBUG(20, "Request with tag %"PRIi64" and source %d not found, creating a early_data_handle to receive incoming data..\n", envelope->data_tag, status.MPI_SOURCE); _STARPU_MPI_DEBUG(20, "Request sync %d\n", envelope->sync); struct _starpu_mpi_early_data_handle* early_data_handle = _starpu_mpi_early_data_create(envelope, status.MPI_SOURCE, comm); _starpu_mpi_early_data_add(early_data_handle); starpu_data_handle_t data_handle; data_handle = _starpu_mpi_tag_get_data_handle_from_tag(envelope->data_tag); // TODO: rather select some memory node next to the NIC unsigned buffer_node = STARPU_MAIN_RAM; if (data_handle && starpu_data_get_interface_id(data_handle) < STARPU_MAX_INTERFACE_ID && !early_data_force_allocate) { /* We know which data will receive it and we won't have to unpack, use just the same kind of data. */ early_data_handle->buffer = NULL; early_data_handle->buffer_node = buffer_node; starpu_data_register_same(&early_data_handle->handle, data_handle); //_starpu_mpi_early_data_add(early_data_handle); } else { /* The application has not registered yet a data with the tag, * we are going to receive the data as a raw memory, and give it * to the application when it post a receive for this tag */ _STARPU_MPI_DEBUG(3, "Posting a receive for a data of size %d which has not yet been registered\n", (int)envelope->size); early_data_handle->buffer = (void *)starpu_malloc_on_node_flags(buffer_node, envelope->size, 0); early_data_handle->size = envelope->size; early_data_handle->buffer_node = buffer_node; starpu_variable_data_register(&early_data_handle->handle, buffer_node, (uintptr_t) early_data_handle->buffer, envelope->size); //_starpu_mpi_early_data_add(early_data_handle); } _STARPU_MPI_DEBUG(20, "Posting internal detached irecv on early_data_handle with tag %"PRIi64" from comm %ld src %d ..\n", early_data_handle->node_tag.data_tag, (long int)comm, status.MPI_SOURCE); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); early_data_handle->req = _starpu_mpi_irecv_common(early_data_handle->handle, status.MPI_SOURCE, early_data_handle->node_tag.data_tag, comm, 1, 0, NULL, NULL, 1, 1, envelope->size, STARPU_DEFAULT_PRIO); /* The early data handle is ready, we can let _starpu_mpi_submit_ready_request * proceed with acquiring it */ STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); // We wait until the request is pushed in the // ready_request list while (!(early_data_handle->req->posted)) STARPU_PTHREAD_COND_WAIT(&(early_data_handle->req->backend->posted_cond), &progress_mutex); // Handle the request immediately to make sure the mpi_irecv is // posted before receiving an other envelope _starpu_mpi_req_list_erase(&ready_recv_requests, early_data_handle->req); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _starpu_mpi_handle_ready_request(early_data_handle->req); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); } static void *_starpu_mpi_progress_thread_func(void *arg) { struct _starpu_mpi_argc_argv *argc_argv = (struct _starpu_mpi_argc_argv *) arg; starpu_pthread_setname("MPI"); _starpu_mpi_env_init(); #ifndef STARPU_SIMGRID if (_starpu_mpi_thread_cpuid < 0) { _starpu_mpi_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); } if (!_starpu_mpi_nobind && starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI") < 0) { char hostname[65]; gethostname(hostname, sizeof(hostname)); _STARPU_DISP("[%s] No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n", hostname); } _starpu_mpi_do_initialize(argc_argv); if (!_starpu_mpi_nobind && _starpu_mpi_thread_cpuid >= 0) /* In case MPI changed the binding */ starpu_bind_thread_on(_starpu_mpi_thread_cpuid, STARPU_THREAD_ACTIVE, "MPI"); #else /* Now that MPI is set up, let the rest of simgrid get initialized */ char **argv_cpy; _STARPU_MPI_MALLOC(argv_cpy, *(argc_argv->argc) * sizeof(char*)); int i; for (i = 0; i < *(argc_argv->argc); i++) argv_cpy[i] = strdup((*(argc_argv->argv))[i]); void **tsd; _STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*)); #if defined(HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_SET_DATA) _starpu_simgrid_actor_create("main", _starpu_smpi_simulated_main, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy); /* And set TSD for us */ #ifdef HAVE_SG_ACTOR_SET_DATA sg_actor_set_data(sg_actor_self(), tsd); #else sg_actor_data_set(sg_actor_self(), tsd); #endif #else MSG_process_create_with_arguments("main", _starpu_smpi_simulated_main, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy); /* And set TSD for us */ if (!smpi_process_set_user_data) { _STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n"); } smpi_process_set_user_data(tsd); #endif /* And wait for StarPU to get initialized, to come back to the same * situation as native execution where that's always the case. */ starpu_wait_initialized(); #endif _starpu_mpi_comm_amounts_init(argc_argv->comm); _starpu_mpi_cache_init(argc_argv->comm); _starpu_mpi_select_node_init(); _starpu_mpi_tag_init(); _starpu_mpi_comm_init(argc_argv->comm); _starpu_mpi_tags_init(); _starpu_mpi_early_request_init(); _starpu_mpi_early_data_init(); _starpu_mpi_sync_data_init(); _starpu_mpi_datatype_init(); if (mpi_driver) starpu_driver_init(mpi_driver); #ifdef STARPU_SIMGRID starpu_pthread_wait_init(&_starpu_mpi_thread_wait); starpu_pthread_queue_init(&_starpu_mpi_thread_dontsleep); starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &_starpu_mpi_thread_dontsleep); #endif #ifdef STARPU_USE_FXT _starpu_mpi_fxt_init(argc_argv); #endif /* notify the main thread that the progression thread is ready */ STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); running = 1; STARPU_PTHREAD_COND_SIGNAL(&progress_cond); int envelope_request_submitted = 0; int mpi_driver_loop_counter = 0; int mpi_driver_task_counter = 0; _STARPU_MPI_TRACE_POLLING_BEGIN(); while (running || posted_requests || !(_starpu_mpi_req_list_empty(&ready_recv_requests)) || !(_starpu_mpi_req_prio_list_empty(&ready_send_requests)) || !(_starpu_mpi_req_list_empty(&detached_requests))) { #ifdef STARPU_SIMGRID starpu_pthread_wait_reset(&_starpu_mpi_thread_wait); #endif /* shall we block ? */ unsigned block = _starpu_mpi_req_list_empty(&ready_recv_requests) && _starpu_mpi_req_prio_list_empty(&ready_send_requests) && _starpu_mpi_early_request_count() == 0 && _starpu_mpi_sync_data_count() == 0 && _starpu_mpi_req_list_empty(&detached_requests); if (block) { //_STARPU_MPI_DEBUG(3, "NO MORE REQUESTS TO HANDLE\n"); _STARPU_MPI_TRACE_SLEEP_BEGIN(); /* Notify mpi_barrier */ STARPU_PTHREAD_COND_SIGNAL(&barrier_cond); } #ifdef STARPU_USE_MPI_FT block = block && !starpu_mpi_ft_busy(); #endif // STARPU_USE_MPI_FT if (block) { //_STARPU_MPI_DEBUG(3, "NO MORE REQUESTS TO HANDLE\n"); _STARPU_MPI_TRACE_SLEEP_BEGIN(); STARPU_PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); _STARPU_MPI_TRACE_SLEEP_END(); } /* get one recv request */ unsigned n = 0; while (!_starpu_mpi_req_list_empty(&ready_recv_requests)) { _STARPU_MPI_TRACE_POLLING_END(); struct _starpu_mpi_req *req; if (n++ == nready_process) /* Already spent some time on submitting ready recv requests, poll before processing more ready recv requests */ break; req = _starpu_mpi_req_list_pop_back(&ready_recv_requests); /* handling a request is likely to block for a while * (on a sync_data_with_mem call), we want to let the * application submit requests in the meantime, so we * release the lock. */ STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _starpu_mpi_handle_ready_request(req); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); } /* get one send request */ n = 0; while (!_starpu_mpi_req_prio_list_empty(&ready_send_requests) && (ndetached_send_requests_max == 0 || ndetached_send_requests < ndetached_send_requests_max)) { struct _starpu_mpi_req *req; if (n++ == nready_process) /* Already spent some time on submitting ready send requests, poll before processing more ready send requests */ break; req = _starpu_mpi_req_prio_list_pop_back_highest(&ready_send_requests); /* handling a request is likely to block for a while * (on a sync_data_with_mem call), we want to let the * application submit requests in the meantime, so we * release the lock. */ STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _starpu_mpi_handle_ready_request(req); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); } _STARPU_MPI_TRACE_POLLING_BEGIN(); /* If there is no currently submitted envelope_request submitted to * catch envelopes from senders, and there is some pending * receive requests on our side, we resubmit a header request. */ if (((_starpu_mpi_early_request_count() > 0) || (_starpu_mpi_sync_data_count() > 0)) && (envelope_request_submitted == 0))// && (HASH_COUNT(_starpu_mpi_early_data_handle_hashmap) == 0)) { _starpu_mpi_comm_post_recv(); envelope_request_submitted = 1; } /* test whether there are some terminated "detached request" */ _starpu_mpi_test_detached_requests(); if (envelope_request_submitted == 1) { int flag; struct _starpu_mpi_envelope *envelope; MPI_Status envelope_status; MPI_Comm envelope_comm; /* test whether an envelope has arrived. */ flag = _starpu_mpi_comm_test_recv(&envelope_status, &envelope, &envelope_comm); if (flag) { _STARPU_MPI_TRACE_POLLING_END(); _STARPU_MPI_COMM_FROM_DEBUG(envelope, sizeof(struct _starpu_mpi_envelope), MPI_BYTE, envelope_status.MPI_SOURCE, _STARPU_MPI_TAG_ENVELOPE, envelope->data_tag, envelope_comm); _STARPU_MPI_DEBUG(4, "Envelope received with mode %d\n", envelope->mode); if (envelope->mode == _STARPU_MPI_ENVELOPE_SYNC_READY) { struct _starpu_mpi_req *_sync_req = _starpu_mpi_sync_data_find(envelope->data_tag, envelope_status.MPI_SOURCE, envelope_comm); _STARPU_MPI_DEBUG(20, "Sending data with tag %"PRIi64" to node %d\n", _sync_req->node_tag.data_tag, envelope_status.MPI_SOURCE); STARPU_MPI_ASSERT_MSG(envelope->data_tag == _sync_req->node_tag.data_tag, "Tag mismatch (envelope %"PRIi64" != req %"PRIi64")\n", envelope->data_tag, _sync_req->node_tag.data_tag); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _starpu_mpi_isend_data_func(_sync_req); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); } else { _STARPU_MPI_DEBUG(3, "Searching for application request with tag %"PRIi64" and source %d (size %ld)\n", envelope->data_tag, envelope_status.MPI_SOURCE, envelope->size); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); STARPU_PTHREAD_MUTEX_LOCK(&early_data_mutex); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); struct _starpu_mpi_req *early_request = _starpu_mpi_early_request_dequeue(envelope->data_tag, envelope_status.MPI_SOURCE, envelope_comm); /* Case: a data will arrive before a matching receive is * posted by the application. Create a temporary handle to * store the incoming data, submit a starpu_mpi_irecv_detached * on this handle, and store it as an early_data */ if (early_request == NULL) { if (envelope->sync) { _STARPU_MPI_DEBUG(2000, "-------------------------> adding request for tag %"PRIi64"\n", envelope->data_tag); struct _starpu_mpi_req *new_req; #ifdef STARPU_DEVEL #warning creating a request is not really useful. #endif /* Initialize the request structure */ _starpu_mpi_request_init(&new_req); new_req->request_type = RECV_REQ; new_req->data_handle = NULL; new_req->node_tag.node.rank = envelope_status.MPI_SOURCE; new_req->node_tag.data_tag = envelope->data_tag; new_req->node_tag.node.comm = envelope_comm; new_req->detached = 1; new_req->sync = 1; new_req->callback = NULL; new_req->callback_arg = NULL; new_req->func = _starpu_mpi_irecv_size_func; new_req->sequential_consistency = 1; new_req->backend->is_internal_req = 0; // ???? new_req->count = envelope->size; _starpu_mpi_sync_data_add(new_req); /* We have queued our sync request, we can let _starpu_mpi_submit_ready_request find it */ STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); } else { /* This will release early_data_mutex when appropriate */ _starpu_mpi_receive_early_data(envelope, envelope_status, envelope_comm); } } /* Case: a matching application request has been found for * the incoming data, we handle the correct allocation * of the pointer associated to the data handle, then * submit the corresponding receive with * _starpu_mpi_handle_ready_request. */ else { /* Got the early request */ STARPU_PTHREAD_MUTEX_UNLOCK(&early_data_mutex); _STARPU_MPI_DEBUG(2000, "A matching application request has been found for the incoming data with tag %"PRIi64"\n", envelope->data_tag); _STARPU_MPI_DEBUG(2000, "Request sync %d\n", envelope->sync); early_request->sync = envelope->sync; _starpu_mpi_datatype_allocate(early_request->data_handle, early_request); if (early_request->registered_datatype == 1) { early_request->count = 1; early_request->ptr = starpu_data_handle_to_pointer(early_request->data_handle, early_request->node); } else { early_request->count = envelope->size; early_request->ptr = (void *)starpu_malloc_on_node_flags(early_request->node, early_request->count, 0); starpu_memory_allocate(early_request->node, early_request->count, STARPU_MEMORY_OVERFLOW); STARPU_MPI_ASSERT_MSG(early_request->ptr, "cannot allocate message of size %ld\n", early_request->count); } _STARPU_MPI_DEBUG(3, "Handling new request... \n"); /* handling a request is likely to block for a while * (on a sync_data_with_mem call), we want to let the * application submit requests in the meantime, so we * release the lock. */ STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _starpu_mpi_handle_ready_request(early_request); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); } } envelope_request_submitted = 0; _STARPU_MPI_TRACE_POLLING_BEGIN(); } else { /* A call is made to driver_run_once only when * the progression thread have gone through the * communication progression loop * mpi_driver_call_freq times. It is * interesting to tune the * STARPU_MPI_DRIVER_CALL_FREQUENCY * depending on whether the user wants * reactivity or computing power from the MPI * progression thread. */ if (mpi_driver && (++mpi_driver_loop_counter == mpi_driver_call_freq)) { mpi_driver_loop_counter = 0; mpi_driver_task_counter = 0; while (mpi_driver_task_counter++ < mpi_driver_task_freq) { _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN(); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _STARPU_MPI_DEBUG(4, "running once mpi driver\n"); starpu_driver_run_once(mpi_driver); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); _STARPU_MPI_TRACE_DRIVER_RUN_END(); } } //_STARPU_MPI_DEBUG(4, "Nothing received, continue ..\n"); } } #ifdef STARPU_USE_MPI_FT STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); starpu_mpi_ft_progress(); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); #endif // STARPU_USE_MPI_FT #ifdef STARPU_SIMGRID STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); starpu_pthread_wait_wait(&_starpu_mpi_thread_wait); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); #endif // We release the lock to avoid monopolizing it while polling for terminations STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); STARPU_VALGRIND_YIELD(); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); } _STARPU_MPI_TRACE_POLLING_END(); if (envelope_request_submitted) { _starpu_mpi_comm_cancel_recv(); envelope_request_submitted = 0; } #ifdef STARPU_SIMGRID STARPU_PTHREAD_MUTEX_LOCK(&wait_counter_mutex); while (wait_counter != 0) STARPU_PTHREAD_COND_WAIT(&wait_counter_cond, &wait_counter_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&wait_counter_mutex); STARPU_PTHREAD_MUTEX_DESTROY(&wait_counter_mutex); STARPU_PTHREAD_COND_DESTROY(&wait_counter_cond); starpu_pthread_queue_unregister(&_starpu_mpi_thread_wait, &_starpu_mpi_thread_dontsleep); starpu_pthread_queue_destroy(&_starpu_mpi_thread_dontsleep); starpu_pthread_wait_destroy(&_starpu_mpi_thread_wait); #endif STARPU_MPI_ASSERT_MSG(_starpu_mpi_req_list_empty(&detached_requests), "List of detached requests not empty"); STARPU_MPI_ASSERT_MSG(ndetached_send_requests == 0, "Number of detached send requests not 0"); STARPU_MPI_ASSERT_MSG(_starpu_mpi_req_list_empty(&ready_recv_requests), "List of ready requests not empty"); STARPU_MPI_ASSERT_MSG(_starpu_mpi_req_prio_list_empty(&ready_send_requests), "List of ready requests not empty"); STARPU_MPI_ASSERT_MSG(posted_requests == 0, "Number of posted request is not zero"); _starpu_mpi_early_request_check_termination(); _starpu_mpi_early_data_check_termination(); _starpu_mpi_sync_data_check_termination(); _starpu_mpi_req_prio_list_deinit(&ready_send_requests); #ifdef STARPU_USE_FXT _starpu_mpi_fxt_shutdown(); #endif if (argc_argv->initialize_mpi) { _STARPU_MPI_DEBUG(0, "Calling MPI_Finalize()\n"); MPI_Finalize(); } STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); _starpu_mpi_sync_data_shutdown(); _starpu_mpi_early_data_shutdown(); _starpu_mpi_early_request_shutdown(); _starpu_mpi_datatype_shutdown(); free(argc_argv); return NULL; } int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv) { STARPU_PTHREAD_MUTEX_INIT(&progress_mutex, NULL); STARPU_PTHREAD_MUTEX_INIT(&early_data_mutex, NULL); STARPU_PTHREAD_COND_INIT(&progress_cond, NULL); STARPU_PTHREAD_COND_INIT(&barrier_cond, NULL); _starpu_mpi_req_list_init(&ready_recv_requests); _starpu_mpi_req_prio_list_init(&ready_send_requests); _starpu_mpi_req_list_init(&detached_requests); STARPU_PTHREAD_MUTEX_INIT(&posted_requests_mutex, NULL); nready_process = starpu_getenv_number_default("STARPU_MPI_NREADY_PROCESS", 10); ndetached_send_requests_max = starpu_getenv_number_default("STARPU_MPI_NDETACHED_SEND", 10); early_data_force_allocate = starpu_getenv_number_default("STARPU_MPI_EARLYDATA_ALLOCATE", 0); #ifdef STARPU_SIMGRID STARPU_PTHREAD_MUTEX_INIT(&wait_counter_mutex, NULL); STARPU_PTHREAD_COND_INIT(&wait_counter_cond, NULL); #endif #ifdef STARPU_SIMGRID _starpu_mpi_progress_thread_func(argc_argv); return 0; #else STARPU_PTHREAD_CREATE(&progress_thread, NULL, _starpu_mpi_progress_thread_func, argc_argv); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); while (!running) STARPU_PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); return 0; #endif } #ifdef STARPU_SIMGRID void _starpu_mpi_wait_for_initialization() { /* Wait for MPI initialization to finish */ STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); while (!running) STARPU_PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); } #endif void _starpu_mpi_progress_shutdown(void **value) { if (!running) { _STARPU_ERROR("The progress thread was not launched. Was StarPU successfully initialized?\n"); } STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); running = 0; STARPU_PTHREAD_COND_BROADCAST(&progress_cond); #ifdef STARPU_SIMGRID starpu_pthread_queue_signal(&_starpu_mpi_thread_dontsleep); #endif STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); #ifdef STARPU_SIMGRID /* FIXME: should rather properly wait for _starpu_mpi_progress_thread_func to finish */ (void) value; starpu_sleep(1); #else STARPU_PTHREAD_JOIN(progress_thread, value); #endif STARPU_PTHREAD_MUTEX_DESTROY(&posted_requests_mutex); STARPU_PTHREAD_MUTEX_DESTROY(&progress_mutex); STARPU_PTHREAD_MUTEX_DESTROY(&early_data_mutex); STARPU_PTHREAD_COND_DESTROY(&barrier_cond); } static int64_t _starpu_mpi_tag_max = INT64_MAX; int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag) { (void) comm; if (keyval == STARPU_MPI_TAG_UB) { *flag = 1; *(int64_t **)attribute_val = &_starpu_mpi_tag_max; } else { *flag = 0; } return 0; } void _starpu_mpi_driver_init(struct starpu_conf *conf) { /* We only initialize the driver if the environment variable * STARPU_MPI_DRIVER_CALL_FREQUENCY is defined by the user. If this environment * variable is not defined or defined at a value lower than or equal to zero, * StarPU-MPI will not use a driver. */ int driver_env = starpu_getenv_number_default("STARPU_MPI_DRIVER_CALL_FREQUENCY", 0); if (driver_env > 0) { #ifdef STARPU_SIMGRID _STARPU_DISP("Warning: MPI driver is not supported with simgrid, this will be disabled\n"); return; #endif mpi_driver_call_freq = driver_env; _STARPU_MALLOC(mpi_driver, sizeof(struct starpu_driver)); mpi_driver->type = STARPU_CPU_WORKER; mpi_driver->id.cpu_id = 0; conf->not_launched_drivers = mpi_driver; conf->n_not_launched_drivers = 1; int tasks_freq_env = starpu_getenv_number_default("STARPU_MPI_DRIVER_TASK_FREQUENCY", 0); if (tasks_freq_env > 0) mpi_driver_task_freq = tasks_freq_env; } } void _starpu_mpi_wake_up_progress_thread() { STARPU_PTHREAD_COND_SIGNAL(&progress_cond); } void _starpu_mpi_driver_shutdown() { if (mpi_driver) { starpu_driver_deinit(mpi_driver); free(mpi_driver); mpi_driver = NULL; } } #endif /* STARPU_USE_MPI_MPI */ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_mpi.h000066400000000000000000000031461507764646700213560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_MPI_H__ #define __STARPU_MPI_MPI_H__ #include #include #include #include #include /** @file */ #ifdef STARPU_USE_MPI_MPI #ifdef __cplusplus extern "C" { #endif int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv); void _starpu_mpi_progress_shutdown(void **value); #ifdef STARPU_SIMGRID void _starpu_mpi_wait_for_initialization(); #endif int _starpu_mpi_barrier(MPI_Comm comm); int _starpu_mpi_wait_for_all(MPI_Comm comm); int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status); int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status); void _starpu_mpi_wake_up_progress_thread(); void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req); void _starpu_mpi_irecv_size_func(struct _starpu_mpi_req *req); #ifdef __cplusplus } #endif #endif /* STARPU_USE_MPI_MPI */ #endif /* __STARPU_MPI_MPI_H__ */ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_mpi_backend.c000066400000000000000000000111431507764646700230140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef STARPU_USE_MPI_MPI #include #include #include #include #include #include #include static void starpu_mpi_mpi_backend_constructor(void) __attribute__((constructor)); static void starpu_mpi_mpi_backend_constructor(void) { #ifdef HAVE_PIOMAN /* We don't want progression in both PIOman and StarPU */ setenv("PIOM_ENABLE_PROGRESSION", "0", 0); #endif } void _starpu_mpi_mpi_backend_init(struct starpu_conf *conf) { _starpu_mpi_driver_init(conf); } void _starpu_mpi_mpi_backend_shutdown(void) { _starpu_mpi_tag_shutdown(); _starpu_mpi_comm_shutdown(); _starpu_mpi_driver_shutdown(); } int _starpu_mpi_mpi_backend_reserve_core(void) { return (starpu_getenv_number_default("STARPU_MPI_DRIVER_CALL_FREQUENCY", 0) <= 0); } void _starpu_mpi_mpi_backend_request_init(struct _starpu_mpi_req *req) { _STARPU_MPI_CALLOC(req->backend, 1, sizeof(struct _starpu_mpi_req_backend)); //req->backend->data_request = 0; STARPU_PTHREAD_MUTEX_INIT0(&req->backend->req_mutex, NULL); STARPU_PTHREAD_COND_INIT0(&req->backend->req_cond, NULL); STARPU_PTHREAD_COND_INIT0(&req->backend->posted_cond, NULL); //req->backend->other_request = NULL; //req->backend->size_req = 0; //req->backend->internal_req = NULL; //req->backend->is_internal_req = 0; req->backend->to_destroy = 1; //req->backend->early_data_handle = NULL; //req->backend->envelope = NULL; } void _starpu_mpi_mpi_backend_request_fill(struct _starpu_mpi_req *req, int is_internal_req) { _starpu_mpi_comm_register(req->node_tag.node.comm); req->backend->is_internal_req = is_internal_req; /* For internal requests, we wait for both the request completion and the matching application request completion */ req->backend->to_destroy = !is_internal_req; } void _starpu_mpi_mpi_backend_request_destroy(struct _starpu_mpi_req *req) { STARPU_PTHREAD_MUTEX_DESTROY(&req->backend->req_mutex); STARPU_PTHREAD_COND_DESTROY(&req->backend->req_cond); STARPU_PTHREAD_COND_DESTROY(&req->backend->posted_cond); free(req->backend); req->backend = NULL; } void _starpu_mpi_mpi_backend_data_clear(starpu_data_handle_t data_handle) { _starpu_mpi_tag_data_release(data_handle); } void _starpu_mpi_mpi_backend_data_register(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag) { _starpu_mpi_tag_data_register(data_handle, data_tag); } void _starpu_mpi_mpi_backend_comm_register(MPI_Comm comm) { _starpu_mpi_comm_register(comm); } struct _starpu_mpi_backend _mpi_backend = { ._starpu_mpi_backend_init = _starpu_mpi_mpi_backend_init, ._starpu_mpi_backend_shutdown = _starpu_mpi_mpi_backend_shutdown, ._starpu_mpi_backend_reserve_core = _starpu_mpi_mpi_backend_reserve_core, ._starpu_mpi_backend_request_init = _starpu_mpi_mpi_backend_request_init, ._starpu_mpi_backend_request_fill = _starpu_mpi_mpi_backend_request_fill, ._starpu_mpi_backend_request_destroy = _starpu_mpi_mpi_backend_request_destroy, ._starpu_mpi_backend_data_clear = _starpu_mpi_mpi_backend_data_clear, ._starpu_mpi_backend_data_register = _starpu_mpi_mpi_backend_data_register, ._starpu_mpi_backend_comm_register = _starpu_mpi_mpi_backend_comm_register, ._starpu_mpi_backend_progress_init = _starpu_mpi_progress_init, ._starpu_mpi_backend_progress_shutdown = _starpu_mpi_progress_shutdown, #ifdef STARPU_SIMGRID ._starpu_mpi_backend_wait_for_initialization = _starpu_mpi_wait_for_initialization, #endif ._starpu_mpi_backend_barrier = _starpu_mpi_barrier, ._starpu_mpi_backend_wait_for_all = _starpu_mpi_wait_for_all, ._starpu_mpi_backend_wait = _starpu_mpi_wait, ._starpu_mpi_backend_test = _starpu_mpi_test, ._starpu_mpi_backend_isend_size_func = _starpu_mpi_isend_size_func, ._starpu_mpi_backend_irecv_size_func = _starpu_mpi_irecv_size_func, }; #endif /* STARPU_USE_MPI_MPI*/ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_mpi_backend.h000066400000000000000000000043001507764646700230160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_MPI_BACKEND_H__ #define __STARPU_MPI_MPI_BACKEND_H__ #include #include /** @file */ #ifdef __cplusplus extern "C" { #endif #ifdef STARPU_USE_MPI_MPI extern int _starpu_mpi_tag; #define _STARPU_MPI_TAG_ENVELOPE _starpu_mpi_tag #define _STARPU_MPI_TAG_DATA _starpu_mpi_tag+1 #define _STARPU_MPI_TAG_SYNC_DATA _starpu_mpi_tag+2 #ifdef STARPU_USE_MPI_FT #define _STARPU_MPI_TAG_CP_ACK _starpu_mpi_tag+3 #define _STARPU_MPI_TAG_CP_RCVRY _starpu_mpi_tag+4 #define _STARPU_MPI_TAG_EXT_DATA _starpu_mpi_tag+5 #define _STARPU_MPI_TAG_CP_INFO _starpu_mpi_tag+6 #endif // STARPU_USE_MPI_FT enum _starpu_envelope_mode { _STARPU_MPI_ENVELOPE_DATA=0, _STARPU_MPI_ENVELOPE_SYNC_READY=1 }; struct _starpu_mpi_envelope { enum _starpu_envelope_mode mode; starpu_ssize_t size; starpu_mpi_tag_t data_tag; unsigned sync; }; struct _starpu_mpi_req_backend { MPI_Request data_request; starpu_pthread_mutex_t req_mutex; starpu_pthread_cond_t req_cond; starpu_pthread_cond_t posted_cond; /** In the case of a Wait/Test request, we are going to post a request * to test the completion of another request */ struct _starpu_mpi_req *other_request; MPI_Request size_req; struct _starpu_mpi_envelope* envelope; unsigned is_internal_req:1; unsigned to_destroy:1; struct _starpu_mpi_req *internal_req; struct _starpu_mpi_early_data_handle *early_data_handle; UT_hash_handle hh; }; #endif // STARPU_USE_MPI_MPI #ifdef __cplusplus } #endif #endif // __STARPU_MPI_MPI_BACKEND_H__ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_sync_data.c000066400000000000000000000127621507764646700225350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #ifdef STARPU_USE_MPI_MPI struct _starpu_mpi_sync_data_handle_hashlist { struct _starpu_mpi_req_list list; UT_hash_handle hh; struct _starpu_mpi_node_tag node_tag; }; /** stores data which have been received by MPI but have not been requested by the application */ static starpu_pthread_mutex_t _starpu_mpi_sync_data_handle_mutex; static struct _starpu_mpi_sync_data_handle_hashlist *_starpu_mpi_sync_data_handle_hashmap = NULL; static int _starpu_mpi_sync_data_handle_hashmap_count = 0; void _starpu_mpi_sync_data_init(void) { _starpu_mpi_sync_data_handle_hashmap = NULL; STARPU_PTHREAD_MUTEX_INIT(&_starpu_mpi_sync_data_handle_mutex, NULL); _starpu_mpi_sync_data_handle_hashmap_count = 0; } void _starpu_mpi_sync_data_shutdown(void) { struct _starpu_mpi_sync_data_handle_hashlist *current=NULL, *tmp=NULL; HASH_ITER(hh, _starpu_mpi_sync_data_handle_hashmap, current, tmp) { STARPU_ASSERT(_starpu_mpi_req_list_empty(¤t->list)); HASH_DEL(_starpu_mpi_sync_data_handle_hashmap, current); free(current); } STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_mpi_sync_data_handle_mutex); } #ifdef STARPU_VERBOSE static void _starpu_mpi_sync_data_handle_display_hash(struct _starpu_mpi_node_tag *node_tag) { struct _starpu_mpi_sync_data_handle_hashlist *hashlist; HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist); if (hashlist == NULL) { _STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld does not exist\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); } else if (_starpu_mpi_req_list_empty(&hashlist->list)) { _STARPU_MPI_DEBUG(60, "Hashlist for comm %ld source %d and tag %ld is empty\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag); } else { struct _starpu_mpi_req *cur; for (cur = _starpu_mpi_req_list_begin(&hashlist->list) ; cur != _starpu_mpi_req_list_end(&hashlist->list); cur = _starpu_mpi_req_list_next(cur)) { _STARPU_MPI_DEBUG(60, "Element for comm %ld source %d and tag %ld: %p\n", (long int)node_tag->node.comm, node_tag->node.rank, node_tag->data_tag, cur); } } } #endif void _starpu_mpi_sync_data_check_termination(void) { STARPU_ASSERT_MSG(_starpu_mpi_sync_data_handle_hashmap_count == 0, "Number of sync received messages left is not zero, did you forget to post a receive corresponding to a send?"); } int _starpu_mpi_sync_data_count(void) { return _starpu_mpi_sync_data_handle_hashmap_count; } struct _starpu_mpi_req *_starpu_mpi_sync_data_find(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm) { struct _starpu_mpi_req *req; struct _starpu_mpi_node_tag node_tag; struct _starpu_mpi_sync_data_handle_hashlist *found; memset(&node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); node_tag.node.comm = comm; node_tag.node.rank = source; node_tag.data_tag = data_tag; _STARPU_MPI_DEBUG(60, "Looking for sync_data_handle with comm %ld source %d tag %ld in the hashmap\n", (long int)comm, source, data_tag); STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_sync_data_handle_mutex); HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, &node_tag, sizeof(struct _starpu_mpi_node_tag), found); if (found == NULL) { req = NULL; } else { if (_starpu_mpi_req_list_empty(&found->list)) { req = NULL; } else { req = _starpu_mpi_req_list_pop_front(&found->list); _starpu_mpi_sync_data_handle_hashmap_count --; } } STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_sync_data_handle_mutex); _STARPU_MPI_DEBUG(60, "Found sync_data_handle %p with comm %ld source %d tag %ld in the hashmap\n", req, (long int)comm, source, data_tag); return req; } void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *sync_req) { struct _starpu_mpi_sync_data_handle_hashlist *hashlist; _STARPU_MPI_DEBUG(2000, "Adding sync_req %p with comm %ld source %d tag %ld in the hashmap\n", sync_req, (long int)sync_req->node_tag.node.comm, sync_req->node_tag.node.rank, sync_req->node_tag.data_tag); STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_sync_data_handle_mutex); HASH_FIND(hh, _starpu_mpi_sync_data_handle_hashmap, &sync_req->node_tag, sizeof(struct _starpu_mpi_node_tag), hashlist); if (hashlist == NULL) { _STARPU_MPI_MALLOC(hashlist, sizeof(struct _starpu_mpi_sync_data_handle_hashlist)); _starpu_mpi_req_list_init(&hashlist->list); hashlist->node_tag = sync_req->node_tag; HASH_ADD(hh, _starpu_mpi_sync_data_handle_hashmap, node_tag, sizeof(hashlist->node_tag), hashlist); } _starpu_mpi_req_list_push_back(&hashlist->list, sync_req); _starpu_mpi_sync_data_handle_hashmap_count ++; STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_sync_data_handle_mutex); #ifdef STARPU_VERBOSE _starpu_mpi_sync_data_handle_display_hash(&sync_req->node_tag); #endif } #endif // STARPU_USE_MPI_MPI starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_sync_data.h000066400000000000000000000025541507764646700225400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_SYNC_DATA_H__ #define __STARPU_MPI_SYNC_DATA_H__ #include #include #include #include #include /** @file */ #ifdef STARPU_USE_MPI_MPI #ifdef __cplusplus extern "C" { #endif void _starpu_mpi_sync_data_init(void); void _starpu_mpi_sync_data_check_termination(void); void _starpu_mpi_sync_data_shutdown(void); struct _starpu_mpi_req *_starpu_mpi_sync_data_find(starpu_mpi_tag_t data_tag, int source, MPI_Comm comm); void _starpu_mpi_sync_data_add(struct _starpu_mpi_req *req); int _starpu_mpi_sync_data_count(); #ifdef __cplusplus } #endif #endif /* STARPU_USE_MPI_MPI */ #endif /* __STARPU_MPI_SYNC_DATA_H__ */ starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_tag.c000066400000000000000000000075061507764646700213430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #ifdef STARPU_USE_MPI_MPI /* Entry in the `registered_tag_handles' hash table. */ struct handle_tag_entry { UT_hash_handle hh; starpu_mpi_tag_t data_tag; starpu_data_handle_t handle; }; /* Hash table mapping host tags to data handles. */ static struct handle_tag_entry *registered_tag_handles; static struct _starpu_spinlock registered_tag_handles_lock; void _starpu_mpi_tag_init(void) { _starpu_spin_init(®istered_tag_handles_lock); } void _starpu_mpi_tag_shutdown(void) { struct handle_tag_entry *tag_entry=NULL, *tag_tmp=NULL; _starpu_spin_destroy(®istered_tag_handles_lock); HASH_ITER(hh, registered_tag_handles, tag_entry, tag_tmp) { HASH_DEL(registered_tag_handles, tag_entry); free(tag_entry); } registered_tag_handles = NULL; } starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(starpu_mpi_tag_t data_tag) { struct handle_tag_entry *ret; _starpu_spin_lock(®istered_tag_handles_lock); HASH_FIND(hh, registered_tag_handles, &data_tag, sizeof(ret->data_tag), ret); _starpu_spin_unlock(®istered_tag_handles_lock); if (ret) { return ret->handle; } else { return NULL; } } void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag) { if (data_tag == -1) { /* No tag for this data, probably a temporary data not to be communicated */ return; } struct handle_tag_entry *entry; _STARPU_MPI_MALLOC(entry, sizeof(*entry)); STARPU_ASSERT_MSG(!(_starpu_mpi_tag_get_data_handle_from_tag(data_tag)), "There is already a data handle %p registered with the tag %ld\n", _starpu_mpi_tag_get_data_handle_from_tag(data_tag), data_tag); _STARPU_MPI_DEBUG(42, "Adding handle %p with tag %"PRIi64" in hashtable\n", handle, data_tag); entry->handle = handle; entry->data_tag = data_tag; _starpu_spin_lock(®istered_tag_handles_lock); #ifndef STARPU_NO_ASSERT struct handle_tag_entry *old; HASH_FIND(hh, registered_tag_handles, &data_tag, sizeof(entry->data_tag), old); STARPU_ASSERT_MSG(!old, "tag %"PRIi64" being registered for data %p, but is already used by data %p!\n", data_tag, handle, old?old->handle:NULL); #endif HASH_ADD(hh, registered_tag_handles, data_tag, sizeof(entry->data_tag), entry); _starpu_spin_unlock(®istered_tag_handles_lock); } int _starpu_mpi_tag_data_release(starpu_data_handle_t handle) { starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(handle); _STARPU_MPI_DEBUG(42, "Removing handle %p with tag %"PRIi64" from hashtable\n", handle, data_tag); if (data_tag != -1) { struct handle_tag_entry *tag_entry; _starpu_spin_lock(®istered_tag_handles_lock); HASH_FIND(hh, registered_tag_handles, &(((struct _starpu_mpi_data *)(handle->mpi_data))->node_tag.data_tag), sizeof(tag_entry->data_tag), tag_entry); STARPU_ASSERT_MSG((tag_entry != NULL),"Data handle %p with tag %"PRIi64" isn't in the hashmap !", handle, data_tag); HASH_DEL(registered_tag_handles, tag_entry); _starpu_spin_unlock(®istered_tag_handles_lock); free(tag_entry); } return 0; } #endif // STARPU_USE_MPI_MPI starpu-1.4.9+dfsg/mpi/src/mpi/starpu_mpi_tag.h000066400000000000000000000024131507764646700213400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_TAG_H__ #define __STARPU_MPI_TAG_H__ #include #include #include /** @file */ #ifdef STARPU_USE_MPI_MPI #ifdef __cplusplus extern "C" { #endif void _starpu_mpi_tag_init(void); void _starpu_mpi_tag_shutdown(void); void _starpu_mpi_tag_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag); int _starpu_mpi_tag_data_release(starpu_data_handle_t handle); starpu_data_handle_t _starpu_mpi_tag_get_data_handle_from_tag(starpu_mpi_tag_t data_tag); #ifdef __cplusplus } #endif #endif // STARPU_USE_MPI_MPI #endif // __STARPU_MPI_TAG_H__ starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/000077500000000000000000000000001507764646700217145ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.c000066400000000000000000000272341507764646700264620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include // Should be deduced at preprocessing (Nmad vs MPI) #include "starpu_mpi_cache.h" #define MAX_CP_TEMPLATE_NUMBER 32 // Arbitrary limit starpu_pthread_mutex_t cp_lib_mutex; void _ack_msg_send_cb(void* _args) { struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; _STARPU_MPI_FT_STATS_SEND_FT_SERVICE_MSG(sizeof(struct _starpu_mpi_cp_ack_msg)); _STARPU_MPI_DEBUG(3, "Ack send succeeded cpid:%d, cpinst:%d, dest:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance, arg->rank); //free(arg); } void _ack_msg_recv_cb(void* _args) { struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; int ret; _STARPU_MPI_FT_STATS_RECV_FT_SERVICE_MSG(sizeof(struct _starpu_mpi_cp_ack_msg)); _STARPU_MPI_DEBUG(3, "ack msg recved id:%d inst:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance); ret = _checkpoint_template_digest_ack_reception(arg->msg.checkpoint_id, arg->msg.checkpoint_instance); if (ret == 0) { //free(arg); } else if (ret == -1) { STARPU_ABORT_MSG("Could not find CP template, cpid:%d - cpinst:%d\n", arg->msg.checkpoint_id, arg->msg.checkpoint_instance); } } void _starpu_mpi_store_data_and_send_ack_cb(struct _starpu_mpi_cp_ack_arg_cb* arg) { checkpoint_package_data_add(arg->msg.checkpoint_id, arg->msg.checkpoint_instance, arg->rank, arg->tag, arg->type, arg->copy_handle, arg->count); _STARPU_MPI_DEBUG(3,"Send ack msg to %d: id=%d inst=%d\n", arg->rank, arg->msg.checkpoint_id, arg->msg.checkpoint_instance); _starpu_mpi_ft_service_post_send((void *) &arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_ACK, MPI_COMM_WORLD, _ack_msg_send_cb, arg); } void _starpu_mpi_push_cp_ack_recv_cb(struct _starpu_mpi_cp_ack_arg_cb* arg) { _STARPU_MPI_DEBUG(3, "Posting ack recv cb from %d\n", arg->rank); _starpu_mpi_ft_service_post_special_recv(_STARPU_MPI_TAG_CP_ACK); // _ft_service_msg_irecv_cb((void *) &arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, // _STARPU_MPI_TAG_CP_ACK, MPI_COMM_WORLD, _ack_msg_recv_cb, arg); } void _recv_internal_dup_ro_cb(void* _args) { struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; starpu_data_release(arg->copy_handle); _starpu_mpi_store_data_and_send_ack_cb(arg); } void _recv_cp_external_data_cb(void* _args) { struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; _STARPU_MPI_FT_STATS_RECV_CP_DATA(starpu_data_get_size(arg->handle)); // an handle has specifically been created, Let's get the value back, and unregister the handle arg->copy_handle = starpu_data_handle_to_pointer(arg->handle, STARPU_MAIN_RAM); starpu_data_unregister_submit(arg->handle); _starpu_mpi_store_data_and_send_ack_cb(arg); } void _send_cp_external_data_cb(void* _args) { struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; _STARPU_MPI_FT_STATS_SEND_CP_DATA(starpu_data_get_size(arg->handle)); free(starpu_data_handle_to_pointer(arg->handle, STARPU_MAIN_RAM)); starpu_data_unregister_submit(arg->handle); _starpu_mpi_push_cp_ack_recv_cb(arg); } void _send_cp_internal_data_cb(void* _args) { struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; _starpu_mpi_push_cp_ack_recv_cb(_args); if (!arg->cache_flag) { //TODO: check cp_domain! struct _starpu_mpi_checkpoint_tracker* tracker = _starpu_mpi_checkpoint_template_get_tracking_inst_by_id_inst(0, arg->checkpoint_instance_hint); if(!tracker->first_msg_sent_flag) { tracker->first_msg_sent_flag = 1; _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(arg->checkpoint_instance_hint,0); } } } void _send_internal_data_stats(struct _starpu_mpi_cp_ack_arg_cb* arg) { if (arg->cache_flag) { _STARPU_MPI_FT_STATS_SEND_CACHED_CP_DATA(starpu_data_get_size(arg->handle)); } else { _STARPU_MPI_FT_STATS_SEND_CP_DATA(starpu_data_get_size(arg->handle)); } } int starpu_mpi_checkpoint_template_submit(starpu_mpi_checkpoint_template_t cp_template, int prio) { starpu_data_handle_t handle; struct _starpu_mpi_data* mpi_data; struct _starpu_mpi_cp_ack_arg_cb* arg; void* cpy_ptr; struct _starpu_mpi_checkpoint_template_item* item; int current_instance; current_instance = increment_current_instance(); _starpu_mpi_checkpoint_post_cp_discard_recv(cp_template); _starpu_mpi_checkpoint_template_create_instance_tracker(cp_template, cp_template->cp_id, cp_template->checkpoint_domain, current_instance); //TODO check what happens when all the ack msg are received when we arrive here. item = _starpu_mpi_checkpoint_template_get_first_data(cp_template); while (item != _starpu_mpi_checkpoint_template_end(cp_template)) { switch (item->type) { case STARPU_VALUE: // TODO: Maybe do not pass via starpu handles for external data, and need to reimplement mpi comm layer for _STARPU_MALLOC(arg, sizeof(struct _starpu_mpi_cp_ack_arg_cb)); arg->tag = item->tag; arg->type = STARPU_VALUE; arg->count = item->count; arg->cache_flag = 0; if (item->backupped_by != -1) { _STARPU_MALLOC(cpy_ptr, item->count); memcpy(cpy_ptr, item->ptr, item->count); starpu_variable_data_register(&arg->handle, STARPU_MAIN_RAM, (uintptr_t)cpy_ptr, item->count); arg->rank = item->backupped_by; _STARPU_MPI_DEBUG(0, "Submit CP: sending external data:%d, tag:%ld, to :%d\n", (int)(*(int*)cpy_ptr), arg->tag, arg->rank); starpu_mpi_isend_detached_prio(arg->handle, arg->rank, arg->tag, prio, MPI_COMM_WORLD, &_send_cp_external_data_cb, (void*)arg); // The callback needs to free the handle specially created for the send, and post ack recv } else if (item->backup_of != -1) { int ret; arg->msg.checkpoint_id = cp_template->cp_id; arg->msg.checkpoint_instance = current_instance; _STARPU_MALLOC(cpy_ptr, item->count); starpu_variable_data_register(&arg->handle, STARPU_MAIN_RAM, (uintptr_t)cpy_ptr, item->count); arg->rank = item->backup_of; _STARPU_MPI_DEBUG(0, "Submit CP: receiving external data tag:%ld, from :%d\n", arg->tag, arg->rank); ret = starpu_mpi_irecv_detached(arg->handle, arg->rank, arg->tag, MPI_COMM_WORLD, &_recv_cp_external_data_cb, (void*)arg); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); // The callback needs to store the received data and post ack send } break; case STARPU_R: handle = (starpu_data_handle_t)item->ptr; mpi_data = _starpu_mpi_data_get(handle); if (starpu_mpi_data_get_rank(handle)==_my_rank) { if (!mpi_data->modified) { _starpu_mpi_checkpoint_tracker_update(cp_template, cp_template->cp_id, cp_template->checkpoint_domain, current_instance); //TODO: check if the data are all acknowledged _STARPU_MPI_DEBUG(0, "Submit CP: skip send starPU data to %d (tag %d)\n", item->backupped_by, (int)starpu_mpi_data_get_tag(handle)); _STARPU_MPI_FT_STATS_SEND_CACHED_CP_DATA(starpu_data_get_size(handle)); break; // We don't want to CP a data that is still at initial state. } _STARPU_MPI_DEBUG(0, "Submit CP: sending starPU data to %d (tag %d)\n", item->backupped_by, (int)starpu_mpi_data_get_tag(handle)); _STARPU_MALLOC(arg, sizeof(struct _starpu_mpi_cp_ack_arg_cb)); arg->rank = item->backupped_by; arg->handle = handle; arg->tag = starpu_mpi_data_get_tag(handle); arg->type = STARPU_R; arg->count = item->count; arg->checkpoint_instance_hint = current_instance; _starpu_mpi_isend_cache_aware(handle, item->backupped_by, starpu_mpi_data_get_tag(handle), MPI_COMM_WORLD, 1, 0, prio, &_send_cp_internal_data_cb, (void*)arg, 1, &arg->cache_flag); // the callbacks need to post ack recv. The cache one needs to release the handle. _send_internal_data_stats(arg); } else if (item->backup_of == starpu_mpi_data_get_rank(handle)) { if (!mpi_data->modified) { _STARPU_MPI_DEBUG(0, "Submit CP: skip recv starPU data to %d (tag %d)\n", item->backupped_by, (int)starpu_mpi_data_get_tag(handle)); _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(starpu_data_get_size(handle)); break; // We don't want to CP a data that is still at initial state. } _STARPU_MPI_DEBUG(0, "Submit CP: receiving starPU data from %d (tag %d)\n", starpu_mpi_data_get_rank(handle), (int)starpu_mpi_data_get_tag(handle)); _STARPU_MALLOC(arg, sizeof(struct _starpu_mpi_cp_ack_arg_cb)); arg->rank = item->backup_of; arg->handle = handle; arg->tag = starpu_mpi_data_get_tag(handle); arg->type = STARPU_R; arg->count = item->count; arg->msg.checkpoint_id = cp_template->cp_id; arg->msg.checkpoint_instance = current_instance; _starpu_mpi_irecv_cache_aware(handle, starpu_mpi_data_get_rank(handle), starpu_mpi_data_get_tag(handle), MPI_COMM_WORLD, 1, 0, NULL, NULL, 1, 0, 1, &arg->cache_flag); // The callback needs to do nothing. The cached one must release the handle. // _recv_internal_data_stats(arg); // Now done in data_cache_set starpu_data_dup_ro(&arg->copy_handle, arg->handle, 1); starpu_data_acquire_cb(arg->copy_handle, STARPU_R, _recv_internal_dup_ro_cb, arg); // The callback need to store the data and post ack send. } break; } item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item); } return 0; } // ///** // * receives param of type starpu_mpi_checkpoint_template_t // * @param args // * @return // */ //void _starpu_mpi_checkpoint_ack_send_cb(void* args) //{ // starpu_mpi_checkpoint_template_t cp_template = (starpu_mpi_checkpoint_template_t) args; // starpu_pthread_mutex_lock(&cp_template->mutex); // cp_template->remaining_ack_awaited--; // starpu_pthread_mutex_unlock(&cp_template->mutex); //} // //void _starpu_checkpoint_cached_data_send_copy_and_ack(void* _arg) //{ // struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _arg; // starpu_data_register_same(&arg->copy_handle, arg->handle); // starpu_data_cpy_priority(arg->copy_handle, arg->handle, 1, _starpu_mpi_push_cp_ack_recv_cb, _arg, STARPU_MAX_PRIO); // starpu_data_release(arg->handle); //} // //void _starpu_checkpoint_data_send_copy_and_ack(void* _args) //{ // struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _args; // starpu_data_register_same(&arg->copy_handle, arg->handle); // starpu_data_cpy_priority(arg->copy_handle, arg->handle, 1, _starpu_mpi_push_cp_ack_recv_cb, _args, STARPU_MAX_PRIO); //} // //void _starpu_mpi_treat_cache_ack_no_lock_cb(void* _args) //{ // starpu_mpi_checkpoint_template_t cp_template = (starpu_mpi_checkpoint_template_t)_args; // cp_template->remaining_ack_awaited--; //} starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint.h000066400000000000000000000031551507764646700264630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef FT_STARPU_STARPU_MPI_CHECKPOINT_H #define FT_STARPU_STARPU_MPI_CHECKPOINT_H #include #include #include #ifdef __cplusplus extern "C" { #endif extern int _my_rank; struct _starpu_mpi_cp_ack_msg { int checkpoint_id; int checkpoint_instance; }; struct _starpu_mpi_cp_info_msg { int checkpoint_id; int checkpoint_instance; int validation:1; int discard:1; }; struct _starpu_mpi_cp_ack_arg_cb { int rank; starpu_data_handle_t handle; starpu_data_handle_t copy_handle; int type; int count; starpu_mpi_tag_t tag; struct _starpu_mpi_cp_ack_msg msg; int checkpoint_instance_hint; int cache_flag; }; struct _starpu_mpi_cp_discard_arg_cb { int rank; struct _starpu_mpi_cp_info_msg msg; }; void _ack_msg_recv_cb(void* _args); #ifdef __cplusplus } #endif #endif //FT_STARPU_STARPU_MPI_CHECKPOINT_H starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.c000066400000000000000000000155131507764646700301320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include struct _starpu_mpi_checkpoint_data_list* checkpoint_data_list; starpu_pthread_mutex_t package_package_mutex; int _checkpoint_package_data_delete_all(); int checkpoint_package_init() { STARPU_PTHREAD_MUTEX_INIT(&package_package_mutex, NULL); checkpoint_data_list = _starpu_mpi_checkpoint_data_list_new(); _starpu_mpi_checkpoint_data_list_init(checkpoint_data_list); return 0; } int checkpoint_package_shutdown() { _checkpoint_package_data_delete_all(); STARPU_PTHREAD_MUTEX_DESTROY(&package_package_mutex); return 0; } #ifdef STARPU_USE_MPI_FT_STATS void _stats_store_checkpoint_data(struct _starpu_mpi_checkpoint_data* new_checkpoint_data) { struct _starpu_mpi_checkpoint_data* next_checkpoint_data; struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_list_begin(checkpoint_data_list); while (checkpoint_data != _starpu_mpi_checkpoint_data_list_end(checkpoint_data_list)) { next_checkpoint_data = _starpu_mpi_checkpoint_data_list_next(checkpoint_data); if (checkpoint_data->tag == new_checkpoint_data->tag && checkpoint_data->ptr == new_checkpoint_data->ptr) { // The data is already in the CP data list,don't count it as a new data return; } checkpoint_data = next_checkpoint_data; } _STARPU_MPI_FT_STATS_STORE_CP_DATA(new_checkpoint_data->type==STARPU_VALUE?new_checkpoint_data->count:new_checkpoint_data->type==STARPU_R?starpu_data_get_size((starpu_data_handle_t) new_checkpoint_data->ptr):-1); } #else void _stats_store_checkpoint_data(STARPU_ATTRIBUTE_UNUSED struct _starpu_mpi_checkpoint_data* new_checkpoint_data) { return; } #endif #ifdef STARPU_USE_MPI_FT_STATS void _stats_discard_checkpoint_data(struct _starpu_mpi_checkpoint_data* new_checkpoint_data) { struct _starpu_mpi_checkpoint_data* next_checkpoint_data; struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_list_begin(checkpoint_data_list); while (checkpoint_data != _starpu_mpi_checkpoint_data_list_end(checkpoint_data_list)) { next_checkpoint_data = _starpu_mpi_checkpoint_data_list_next(checkpoint_data); if (checkpoint_data->tag == new_checkpoint_data->tag && checkpoint_data->ptr == new_checkpoint_data->ptr) { // The data is still in the CP data list, don't count it as a discard return; } checkpoint_data = next_checkpoint_data; } _STARPU_MPI_FT_STATS_DISCARD_CP_DATA(new_checkpoint_data->type==STARPU_VALUE?new_checkpoint_data->count:new_checkpoint_data->type==STARPU_R?starpu_data_get_size((starpu_data_handle_t) new_checkpoint_data->ptr):-1); } #else void _stats_discard_checkpoint_data(STARPU_ATTRIBUTE_UNUSED struct _starpu_mpi_checkpoint_data* new_checkpoint_data) { return; } #endif int checkpoint_package_data_add(int cp_id, int cp_inst, int rank, starpu_mpi_tag_t tag, int type, void* ptr, int count) { struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_new(); checkpoint_data->cp_id = cp_id; checkpoint_data->cp_inst = cp_inst; checkpoint_data->rank = rank; checkpoint_data->tag = tag; checkpoint_data->type = type; checkpoint_data->ptr = ptr; checkpoint_data->count = count; STARPU_PTHREAD_MUTEX_LOCK(&package_package_mutex); _stats_store_checkpoint_data(checkpoint_data); _starpu_mpi_checkpoint_data_list_push_back(checkpoint_data_list, checkpoint_data); STARPU_PTHREAD_MUTEX_UNLOCK(&package_package_mutex); _STARPU_MPI_DEBUG(8, "CP data (%p) added - cpid:%d - cpinst:%d - rank:%d - tag:%ld\n", checkpoint_data->ptr, checkpoint_data->cp_id, checkpoint_data->cp_inst, checkpoint_data->rank, checkpoint_data->tag); return 0; } int _checkpoint_package_data_delete(struct _starpu_mpi_checkpoint_data* checkpoint_data) { size_t size; _starpu_mpi_checkpoint_data_list_erase(checkpoint_data_list, checkpoint_data); _stats_discard_checkpoint_data(checkpoint_data); if (checkpoint_data->type==STARPU_R) { starpu_data_handle_t handle = checkpoint_data->ptr; size = starpu_data_get_size(handle); _STARPU_MPI_DEBUG(8, "Clearing handle %p entry\n", handle); starpu_data_unregister_submit(handle); } else if (checkpoint_data->type==STARPU_VALUE) { size = checkpoint_data->count; _STARPU_MPI_DEBUG(8, "Clearing external data entry\n"); free(checkpoint_data->ptr); } else { STARPU_ABORT_MSG("Unrecognized data type: %d\n", checkpoint_data->type); } free(checkpoint_data); return size; } int checkpoint_package_data_del(int cp_id, int cp_inst, int rank) { (void)cp_id; int done = 0; size_t size = 0; struct _starpu_mpi_checkpoint_data* next_checkpoint_data = NULL; STARPU_PTHREAD_MUTEX_LOCK(&package_package_mutex); struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_list_begin(checkpoint_data_list); while (checkpoint_data != _starpu_mpi_checkpoint_data_list_end(checkpoint_data_list)) { next_checkpoint_data = _starpu_mpi_checkpoint_data_list_next(checkpoint_data); // I delete all the old data (i.e. the cp inst is strictly lower than the one of the just validated CP) only for // the rank that initiated the CP if (checkpoint_data->cp_instrank==rank) { size += _checkpoint_package_data_delete(checkpoint_data); done++; } checkpoint_data = next_checkpoint_data; } STARPU_PTHREAD_MUTEX_UNLOCK(&package_package_mutex); _STARPU_MPI_DEBUG(0, "cleared %d data from checkpoint database (%ld bytes).\n", done, size); return done; } int _checkpoint_package_data_delete_all() { int done = 0; size_t size = 0; struct _starpu_mpi_checkpoint_data* next_checkpoint_data = NULL; STARPU_PTHREAD_MUTEX_LOCK(&package_package_mutex); struct _starpu_mpi_checkpoint_data* checkpoint_data = _starpu_mpi_checkpoint_data_list_begin(checkpoint_data_list); while (checkpoint_data != _starpu_mpi_checkpoint_data_list_end(checkpoint_data_list)) { next_checkpoint_data = _starpu_mpi_checkpoint_data_list_next(checkpoint_data); // I delete all the data size += _checkpoint_package_data_delete(checkpoint_data); done++; checkpoint_data = next_checkpoint_data; } STARPU_PTHREAD_MUTEX_UNLOCK(&package_package_mutex); _STARPU_MPI_DEBUG(0, "cleared %d data from checkpoint database (%ld bytes).\n", done, size); return done; } starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_package.h000066400000000000000000000026741507764646700301430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef _STARPU_MPI_CHECKPOINT_PACKAGE_H #define _STARPU_MPI_CHECKPOINT_PACKAGE_H #include #include #include #ifdef __cplusplus extern "C" { #endif /*TODO: This structure should be a hashtable accessible with these keys: * CPid > CPinstance > Rank > tag */ LIST_TYPE(_starpu_mpi_checkpoint_data, int cp_id; int cp_inst; int rank; starpu_mpi_tag_t tag; int type; void* ptr; int count; ); int checkpoint_package_init(); int checkpoint_package_shutdown(); int checkpoint_package_data_add(int cp_id, int cp_inst, int rank, starpu_mpi_tag_t tag, int type, void* ptr, int count); int checkpoint_package_data_del(int cp_id, int cp_inst, int rank); #ifdef __cplusplus } #endif #endif //_STARPU_MPI_CHECKPOINT_PACKAGE_H starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.c000066400000000000000000000523031507764646700303500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include starpu_pthread_mutex_t cp_template_mutex; starpu_pthread_mutex_t current_instance_mutex; starpu_mpi_checkpoint_template_t cp_template_array[MAX_CP_TEMPLATE_NUMBER]; int cp_template_array_size = 0; static int my_rank; static int comm_size; static int current_instance; typedef int (*backup_of_fn)(int); int increment_current_instance() { int _inst; STARPU_PTHREAD_MUTEX_LOCK(¤t_instance_mutex); _inst = ++current_instance; STARPU_PTHREAD_MUTEX_UNLOCK(¤t_instance_mutex); return _inst; } int get_current_instance() { int _inst; STARPU_PTHREAD_MUTEX_LOCK(¤t_instance_mutex); _inst = current_instance; STARPU_PTHREAD_MUTEX_UNLOCK(¤t_instance_mutex); return _inst; } void checkpoint_template_lib_init(void) { STARPU_PTHREAD_MUTEX_INIT(¤t_instance_mutex, NULL); STARPU_PTHREAD_MUTEX_INIT(&cp_template_mutex, NULL); starpu_mpi_comm_rank(MPI_COMM_WORLD, &_my_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &comm_size); current_instance = 0; #ifdef STARPU_MPI_VERBOSE _starpu_mpi_set_debug_level_max(1000); #endif } void checkpoint_template_lib_quit(void) { int i; for (i=0 ; imutex); STARPU_ASSERT_MSG(!cp_template->frozen, "It is not possible to modify registered checkpoint template.\n"); struct _starpu_mpi_checkpoint_template_item* item; item = _starpu_mpi_checkpoint_template_item_create(type, ptr, count, backupped_by, backup_of, tag); _starpu_mpi_checkpoint_template_item_list_push_back(&cp_template->list, item); _checkpoint_template_add_to_backup_arrays(cp_template, backupped_by, backup_of); _STARPU_MPI_DEBUG(5, "New checkpoint data entry %p (data:%p) has been added to cp_template with id:%d. (%s)\n", item, item->ptr, cp_template->cp_id, backupped_by == -1 ? "BACKUP_OF" : "BACKUPPED_BY"); STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template->mutex); return 0; } int starpu_mpi_checkpoint_template_create(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, int cp_domain) { *cp_template = _starpu_mpi_checkpoint_template_new(cp_id, cp_domain); return 0; } int _starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t cp_template, int arg_type, va_list varg_list) { void* ptr; int count; int backupped_by; int data_rank; starpu_mpi_tag_t tag; backup_of_fn _backup_of; int i; arg_type = arg_type & ~STARPU_COMMUTE; switch(arg_type) { case STARPU_R: ptr = va_arg(varg_list, void*); count = 1; backupped_by = va_arg(varg_list, int); data_rank = starpu_mpi_data_get_rank((starpu_data_handle_t)ptr); if (_my_rank==data_rank) { return _starpu_mpi_checkpoint_template_add_data(cp_template, arg_type, ptr, count, backupped_by, -1, -1); } else if(_my_rank == backupped_by) { return _starpu_mpi_checkpoint_template_add_data(cp_template, arg_type, ptr, count, -1, data_rank, -1); } else { /* Since this data does not concern me (i.e. it is nor my data neither a data which I'm the back up) * it is considered unnecessary to register in the CP */ return 0; } break; case STARPU_VALUE: ptr = va_arg(varg_list, void*); count = va_arg(varg_list, int); tag = va_arg(varg_list, starpu_mpi_tag_t); _backup_of = va_arg(varg_list, backup_of_fn); /* I register the backup that will save this data */ _starpu_mpi_checkpoint_template_add_data(cp_template, arg_type, ptr, count, _backup_of(_my_rank), -1, tag); for (i=0 ; i<_my_rank ; i++) { if (_backup_of(i) == _my_rank) { /* I'm the back up of someone else for this data, I have to remember it */ _starpu_mpi_checkpoint_template_add_data(cp_template, arg_type, ptr, count, -1, i, tag); } } for (i=_my_rank+1 ; irank, arg->msg.checkpoint_id, arg->msg.checkpoint_instance); checkpoint_package_data_del(arg->msg.checkpoint_id, arg->msg.checkpoint_instance, arg->rank); // TODO free _args } int _starpu_mpi_checkpoint_post_cp_discard_recv(starpu_mpi_checkpoint_template_t cp_template) { /* A new CP is submitted. We must post matching recv for the message warning the future checkpoint integrity (so * I can discard old data from deprecated checkpoint). * I will receive a msg if I have old CP data. * TODO: For the message logging discard, I will receive message from the people I exchanged with since the last checkpoint. * */ struct _starpu_mpi_cp_discard_arg_cb* arg; int i; for (i=0 ; ibackup_of_array_used_size ; i++) { _STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); arg->rank = cp_template->backup_of_array[i]; _STARPU_MPI_DEBUG(10, "Post DISCARD msg reception from %d\n", arg->rank); _starpu_mpi_ft_service_post_special_recv(_STARPU_MPI_TAG_CP_INFO); // _ft_service_msg_irecv_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, // MPI_COMM_WORLD, _cp_discard_message_recv_cb, (void *) arg); } return i; } void _cp_discard_message_send_cb(void* _args) { _STARPU_MPI_FT_STATS_SEND_FT_SERVICE_MSG(sizeof(struct _starpu_mpi_cp_ack_msg)); free(_args); } int _starpu_mpi_checkpoint_post_cp_discard_send(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_instance) { /* The CP data replication has succeeded. I must send the message warning the checkpoint integrity (so * they can discard old data from deprecated checkpoint). * I will send to the ones if it has old CP data from me. * TODO: For the message logging discard, I will send message to the people I exchanged with since the last checkpoint. * */ struct _starpu_mpi_cp_discard_arg_cb* arg; int i; for (i=0 ; i < cp_template->backupped_by_array_used_size ; i++) { _STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); arg->rank = cp_template->backupped_by_array[i]; _STARPU_MPI_DEBUG(10, "Post CP DISCARD msg sending to %d\n", arg->rank); arg->msg.discard=1; arg->msg.validation=0; arg->msg.checkpoint_id = cp_id; arg->msg.checkpoint_instance = cp_instance; _starpu_mpi_ft_service_post_send(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_send_cb, (void *) arg); } return 0; } starpu_mpi_checkpoint_template_t _starpu_mpi_get_checkpoint_template_by_id(int checkpoint_id) { int i; STARPU_PTHREAD_MUTEX_LOCK(&cp_template_mutex); for (i=0 ; i < cp_template_array_size ; i++) { // STARPU_PTHREAD_MUTEX_LOCK(&cp_template_array[i]->mutex); if (cp_template_array[i]->cp_id == checkpoint_id) { // STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_array[i]->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_mutex); return cp_template_array[i]; } // STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_array[i]->mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_mutex); return NULL; } //int _starpu_mpi_checkpoint_post_cp_discard_recv(starpu_mpi_checkpoint_template_t cp_template) //{ // /* A new CP is submitted. We must post matching recv for the message warning the future checkpoint integrity (so // * I can tag the data as CP validated, and discard old data from deprecated checkpoint). // * I will receive a msg if I have old CP data, or if I am the back up for a node into the upcoming Checkpoint. // * * Here the union of the different list is processed to post message reception only once. // * TODO: For the message logging discard, I will receive message from the people I exchanged with since the last checkpoint. // * */ // struct _starpu_mpi_cp_discard_arg_cb* arg; // int i, j, flag; // starpu_mpi_checkpoint_template_t old_template; // for (i=0 ; ibackup_of_array_used_size ; i++) // { // STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); // arg->rank = cp_template->backup_of_array[i]; // _STARPU_MPI_DEBUG(10, "Posting DISCARD msg reception from %d\n", arg->rank); // _ft_service_msg_irecv_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_recv_cb, (void*)arg); // } // if (last_valid_checkpoint.checkpoint_id == -1) // { // return -1; // } // else if (last_valid_checkpoint.checkpoint_id!=cp_template->cp_id) // { // old_template = _starpu_mpi_get_checkpoint_template_by_id(last_valid_checkpoint.checkpoint_id); // for (i=0 ; ibackup_of_array_used_size ; i++) // { // flag=0; // for(j=0 ; jbackup_of_array_used_size ; j++) // { // if (cp_template->backup_of_array[j] == old_template->backup_of_array[i]) // { // flag = 1; // break; // } // } // if (flag==0) // { // STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); // arg->rank = old_template->backup_of_array[i]; // _STARPU_MPI_DEBUG(10, "Posting DISCARD msg reception from %d - LAST VALIDATED CP\n", arg->rank); // _ft_service_msg_irecv_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_recv_cb, (void*)arg); // } // } // } // return 0; //} //int _starpu_mpi_checkpoint_post_cp_discard_send(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_instance) //{ // /* The CP data replication has succeeded. I must send the message warning the future checkpoint integrity (so // * they can tag the data as CP validated, and discard old data from deprecated checkpoint). // * I will send to one if it has old CP data from me, or if it is my backup for a data into the just succeeded Checkpoint. // * * Here the union of the different list is processed to send message only once. // * TODO: For the message logging discard, I will send message to the people I exchanged with since the last checkpoint. // * */ // struct _starpu_mpi_cp_discard_arg_cb* arg; // int i, j, flag; // starpu_mpi_checkpoint_template_t old_template; // for (i=0 ; ibackupped_by_array_used_size ; i++) // { // STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); // arg->rank = cp_template->backupped_by_array[i]; // _STARPU_MPI_DEBUG(10, "Sending DISCARD msg reception to %d\n", arg->rank); // arg->msg.checkpoint_id = cp_id; // arg->msg.checkpoint_instance = cp_instance; // _ft_service_msg_isend_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_send_cb, (void*)arg); // } // if (last_valid_checkpoint.checkpoint_id == -1) // { // return -1; // } // else if (last_valid_checkpoint.checkpoint_id!=cp_template->cp_id) // { // old_template = _starpu_mpi_get_checkpoint_template_by_id(last_valid_checkpoint.checkpoint_id); // for (i=0 ; ibackupped_by_array_used_size ; i++) // { // flag=0; // for(j=0 ; jbackupped_by_array_used_size ; j++) // { // if (cp_template->backupped_by_array[j] == old_template->backupped_by_array[i]) // { // flag = 1; // break; // } // } // if (flag==0) // { // STARPU_MPI_MALLOC(arg, sizeof(struct _starpu_mpi_cp_discard_arg_cb)); // arg->rank = old_template->backupped_by_array[i]; // _STARPU_MPI_DEBUG(10, "Sending DISCARD msg to %d - OLD CP\n", arg->rank); // arg->msg.checkpoint_id = cp_id; // arg->msg.checkpoint_instance = cp_instance; // _ft_service_msg_isend_cb(&arg->msg, sizeof(struct _starpu_mpi_cp_ack_msg), arg->rank, _STARPU_MPI_TAG_CP_INFO, MPI_COMM_WORLD, _cp_discard_message_send_cb, (void*)arg); // } // } // } // return 0; //} int _starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t cp_template) { // char str[256]; int i; STARPU_PTHREAD_MUTEX_LOCK(&cp_template->mutex); _STARPU_MPI_DEBUG(2, "Start freezing checkpoint id:%d\n", cp_template->cp_id); cp_template->frozen = 1; cp_template->message_to_send_number = 0; cp_template->size = _starpu_mpi_checkpoint_template_item_list_size(&cp_template->list); struct _starpu_mpi_checkpoint_template_item* item = _starpu_mpi_checkpoint_template_get_first_data(cp_template); while (item != _starpu_mpi_checkpoint_template_end(cp_template)) { if (item->backup_of==-1 && item->backupped_by!=-1) { cp_template->message_to_send_number++; } item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item); } // sprintf(str, "backupped by Array maxsize:%d - currentsize:%d - ", cp_template->backupped_by_array_max_size, cp_template->backupped_by_array_used_size); // for (int i=0 ; ibackupped_by_array_used_size ; i++) // { // sprintf(str,"%s%d ", str, cp_template->backupped_by_array[i]); // } // fprintf(stderr, "%s\n", str); // // sprintf(str,"backup of Array maxsize:%d - currentsize:%d - ", cp_template->backup_of_array_max_size, cp_template->backup_of_array_used_size); // for (int i=0 ; ibackup_of_array_used_size ; i++) // { // sprintf(str,"%s%d ", str, cp_template->backup_of_array[i]); // } // fprintf(stderr, "%s\n", str); STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template->mutex); STARPU_PTHREAD_MUTEX_LOCK(&cp_template_mutex); for (i=0 ; i < cp_template_array_size ; i++) { STARPU_ASSERT_MSG(cp_template_array[i]->cp_id != cp_template->cp_id, "A checkpoint with id %d has already been registered.\n", cp_template->cp_id); } cp_template_array[cp_template_array_size] = cp_template; cp_template_array_size++; STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_mutex); _STARPU_MPI_DEBUG(2, "Checkpoint id:%d is frozen and registered.\n", cp_template->cp_id); return cp_template->size; } int _starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, int cp_domain, va_list varg_list) { int arg_type; starpu_mpi_checkpoint_template_t _cp_template = _starpu_mpi_checkpoint_template_new(cp_id, cp_domain); va_list varg_list_copy; va_copy(varg_list_copy, varg_list); while ((arg_type = va_arg(varg_list_copy, int)) != 0) { _starpu_mpi_checkpoint_template_add_entry(_cp_template, arg_type, varg_list_copy); } va_end(varg_list_copy); _starpu_mpi_checkpoint_template_freeze(_cp_template); *cp_template = _cp_template; return 0; } int starpu_mpi_checkpoint_template_freeze(starpu_mpi_checkpoint_template_t* cp_template) { return _starpu_mpi_checkpoint_template_freeze(*cp_template); } int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t* cp_template, int cp_id, int cp_domain, ...) { va_list varg_list; va_start(varg_list, cp_domain); int ret = _starpu_mpi_checkpoint_template_register(cp_template, cp_id, cp_domain, varg_list); va_end(varg_list); return ret; } int starpu_mpi_checkpoint_template_add_entry(starpu_mpi_checkpoint_template_t* cp_template, ...) { va_list varg_list; int arg_type; int ret; va_start(varg_list, cp_template); arg_type = va_arg(varg_list, int); STARPU_ASSERT_MSG(arg_type!=STARPU_NONE, "Unhandled arg_type: STARPU_NONE(0).\n"); ret = _starpu_mpi_checkpoint_template_add_entry(*cp_template, arg_type, varg_list); va_end(varg_list); return ret; } int _checkpoint_template_digest_ack_reception(int checkpoint_id, int checkpoint_instance) { int remaining_ack_messages; struct _starpu_mpi_checkpoint_tracker* tracker, *tracker1; starpu_mpi_checkpoint_template_t cp_template = _starpu_mpi_get_checkpoint_template_by_id(checkpoint_id); STARPU_PTHREAD_MUTEX_LOCK(&cp_template_mutex); _STARPU_MPI_DEBUG(20, "Digesting ack recv: id=%d, inst=%d\n", checkpoint_id, checkpoint_instance); tracker = _starpu_mpi_checkpoint_tracker_update(cp_template, checkpoint_id, cp_template->checkpoint_domain, checkpoint_instance); remaining_ack_messages = _starpu_mpi_checkpoint_check_tracker(tracker); if (remaining_ack_messages>0) { _STARPU_MPI_DEBUG(20, "The CP (id:%d - inst:%d) found, remaining ack msg awaited:%d.\n", checkpoint_id, checkpoint_instance, remaining_ack_messages); } else if (remaining_ack_messages==0) { _STARPU_MPI_DEBUG(0, "The CP (id:%d - inst:%d) has been successfully saved and acknowledged.\n", checkpoint_id, checkpoint_instance); tracker = _starpu_mpi_checkpoint_tracker_validate_instance(tracker); _STARPU_MPI_TRACE_CHECKPOINT_END(checkpoint_instance, cp_template->checkpoint_domain); if (tracker==NULL) { // TODO:should warn some people, because the msg logging is not implemented(this precise nodes to contact) _STARPU_MPI_DEBUG(0, "No previous checkpoint to discard\n"); } else { if (tracker->old) { tracker1 = _starpu_mpi_checkpoint_tracker_get_last_valid_tracker(tracker->cp_domain); _starpu_mpi_checkpoint_post_cp_discard_send(tracker->cp_template, tracker1->cp_id, tracker1->cp_inst); } else { _starpu_mpi_checkpoint_post_cp_discard_send(tracker->cp_template, checkpoint_id, checkpoint_instance); } } } else if (remaining_ack_messages==-1) { STARPU_ABORT_MSG("Inst (id:%d - inst:%d) is already valid. should not have received an ack msg.\n", checkpoint_id, checkpoint_instance); } else { STARPU_ABORT_MSG("Critical error, can not identify %d as remaining messages\n", remaining_ack_messages); } _STARPU_MPI_DEBUG(20, "Digested\n"); STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template_mutex); return 0; } void _checkpoint_template_digest_ack_reception_cb(void* _arg) { struct _starpu_mpi_cp_ack_arg_cb* arg = (struct _starpu_mpi_cp_ack_arg_cb*) _arg; _checkpoint_template_digest_ack_reception(arg->msg.checkpoint_id, arg->msg.checkpoint_instance); } // For test purpose int starpu_mpi_checkpoint_template_print(starpu_mpi_checkpoint_template_t cp_template) { // int val; int i = 0; struct _starpu_mpi_checkpoint_template_item* item = _starpu_mpi_checkpoint_template_get_first_data(cp_template); while (item != _starpu_mpi_checkpoint_template_end(cp_template)) { fprintf(stderr,"Item %2d: ", i); if (item->type == STARPU_VALUE) { // fprintf(stderr, "STARPU_VALUE - Value=%d - backupof:%d - backupedby:%d\n", (*(int *)(item->ptr)), item->backup_of, item->backupped_by); fprintf(stderr, "STARPU_VALUE - pointer:%p - backupof:%d - backupedby:%d\n", item->ptr, item->backup_of, item->backupped_by); } else if (item->type == STARPU_R) { // val = *(int*)starpu_data_handle_to_pointer(*(starpu_data_handle_t*)(item->ptr), 0); // fprintf(stderr, "STARPU_R - Value=%d - backupof:%d - backupedby:%d\n", val, item->backup_of, item->backupped_by); fprintf(stderr, "STARPU_R - pointer:%p - backupof:%d - backupedby:%d\n", item->ptr, item->backup_of, item->backupped_by); } else if (item->type == STARPU_DATA_ARRAY) { // fprintf(stderr, "STARPU_DATA_ARRAY - Multiple values: %d", *(int*)starpu_data_handle_to_pointer(((starpu_data_handle_t)item->ptr), 0)); // // for (int j=1 ; jcount, 5) ; j++) // { // fprintf(stderr, ", %d", *(int*)starpu_data_handle_to_pointer(((starpu_data_handle_t*)item->ptr)[j], 0)); //j*sizeof(starpu_data_handle_t) // } // fprintf(stderr, "...\n"); } else { printf("Unrecognized type.\n"); } item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item); i++; } return 0; } starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_template.h000066400000000000000000000212301507764646700303500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef _STARPU_MPI_CHECKPOINT_TEMPLATE_H #define _STARPU_MPI_CHECKPOINT_TEMPLATE_H #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif #define MAX_CP_TEMPLATE_NUMBER 32 // Arbitrary limit #define _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE 2 extern starpu_pthread_mutex_t cp_template_mutex; extern int cp_template_array_size; extern starpu_mpi_checkpoint_template_t cp_template_array[MAX_CP_TEMPLATE_NUMBER]; int increment_current_instance(); int get_current_instance(); void checkpoint_template_lib_init(void); void checkpoint_template_lib_quit(void); int _checkpoint_template_digest_ack_reception(int checkpoint_id, int checkpoint_instance); void _checkpoint_template_digest_ack_reception_cb(void* _arg); void _cp_discard_message_recv_cb(void* _args); starpu_mpi_checkpoint_template_t _starpu_mpi_get_checkpoint_template_by_id(int checkpoint_id); int _starpu_mpi_checkpoint_post_cp_discard_recv(starpu_mpi_checkpoint_template_t cp_template); int _starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template_t *cp_template, int cp_id, int cp_domain, va_list varg_list); LIST_TYPE(_starpu_mpi_checkpoint_template_tracking_inst, int cp_id; int cp_inst; int cp_domain; starpu_mpi_checkpoint_template_t cp_template; int ack_msg_count; int valid:1; ) LIST_TYPE(_starpu_mpi_checkpoint_template_item, int type; void *ptr; size_t count; int backupped_by; int backup_of; starpu_mpi_tag_t tag; ) struct _starpu_mpi_checkpoint_template { struct _starpu_mpi_checkpoint_template_item_list list; int size; int cp_id; int checkpoint_domain; int message_to_send_number; int frozen; starpu_pthread_mutex_t mutex; int *backup_of_array; int backup_of_array_max_size; int backup_of_array_used_size; int *backupped_by_array; int backupped_by_array_max_size; int backupped_by_array_used_size; }; static inline int checkpoint_template_array_realloc(int** array, int* max_size, int growth_factor) { // fprintf(stderr, "old array %p - first elem %d\n", *array, *array[0]); // fprintf(stderr, "Newsize=%d\n", growth_factor*(*max_size)); _STARPU_MPI_REALLOC(*array, growth_factor*(*max_size)*sizeof(int)); // fprintf(stderr, "Newarray=%p\n", *array); *max_size = growth_factor*(*max_size); return *max_size; } static inline int checkpoint_template_backup_of_array_realloc_double(struct _starpu_mpi_checkpoint_template* checkpoint_template) { return checkpoint_template_array_realloc(&checkpoint_template->backup_of_array, &checkpoint_template->backup_of_array_max_size, 2); } static inline int checkpoint_template_backupped_by_array_realloc_double(struct _starpu_mpi_checkpoint_template* checkpoint_template) { return checkpoint_template_array_realloc(&checkpoint_template->backupped_by_array, &checkpoint_template->backupped_by_array_max_size, 2); } static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_item_create(int type, void* ptr, int count, int backupped_by, int backup_of, starpu_mpi_tag_t tag) { struct _starpu_mpi_checkpoint_template_item* item; _STARPU_MPI_CALLOC(item, 1, sizeof(struct _starpu_mpi_checkpoint_template_item)); item->type = type; item->ptr = ptr; item->count = count; item->backupped_by = backupped_by; item->backup_of = backup_of; item->tag = tag; return item; } static inline starpu_mpi_checkpoint_template_t _starpu_mpi_checkpoint_template_new(int cp_id, int cp_domain) { starpu_mpi_checkpoint_template_t _cp_template; _STARPU_MPI_CALLOC(_cp_template, 1, sizeof(struct _starpu_mpi_checkpoint_template)); _cp_template->cp_id = cp_id; _cp_template->checkpoint_domain = cp_domain; _cp_template->backup_of_array_max_size = _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE; _STARPU_MPI_MALLOC(_cp_template->backup_of_array, _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE); _cp_template->backup_of_array[0] = -1; _cp_template->backup_of_array_used_size = 0; _cp_template->backupped_by_array_max_size = _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE; _STARPU_MPI_MALLOC(_cp_template->backupped_by_array, _CHECKPOINT_TEMPLATE_BACKUPED_RANK_ARRAY_DEFAULT_SIZE); _cp_template->backupped_by_array[0] = -1; _cp_template->backupped_by_array_used_size = 0; STARPU_PTHREAD_MUTEX_INIT(&_cp_template->mutex, NULL); return _cp_template; } static inline int _checkpoint_template_add_to_backup_arrays(starpu_mpi_checkpoint_template_t cp_template, int backupped_by, int backup_of) { int i; if (backup_of == -1) { for (i = 0; i < cp_template->backupped_by_array_used_size; i++) { if (backupped_by == cp_template->backupped_by_array[i]) { return 0; } } if (cp_template->backupped_by_array_used_size + 1 == cp_template->backupped_by_array_max_size) { checkpoint_template_backupped_by_array_realloc_double(cp_template); } cp_template->backupped_by_array[cp_template->backupped_by_array_used_size] = backupped_by; cp_template->backupped_by_array_used_size++; cp_template->backupped_by_array[cp_template->backupped_by_array_used_size] = -1; return backupped_by; } else if (backupped_by == -1) { for (i = 0; i < cp_template->backup_of_array_used_size; i++) { if (backup_of == cp_template->backup_of_array[i]) { return 0; } } if (cp_template->backup_of_array_used_size + 1 == cp_template->backup_of_array_max_size) { checkpoint_template_backup_of_array_realloc_double(cp_template); } cp_template->backup_of_array[cp_template->backup_of_array_used_size] = backup_of; cp_template->backup_of_array_used_size++; cp_template->backup_of_array[cp_template->backup_of_array_used_size] = -1; return backup_of; } else { _STARPU_DISP("[warning] Checkpoint template item does not refer any backup information. This should not happen.\n"); } return -1; } static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_get_first_data(starpu_mpi_checkpoint_template_t template) { return _starpu_mpi_checkpoint_template_item_list_front(&template->list); } static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_get_next_data(starpu_mpi_checkpoint_template_t template STARPU_ATTRIBUTE_UNUSED, struct _starpu_mpi_checkpoint_template_item* ref_data) { return _starpu_mpi_checkpoint_template_item_list_next(ref_data); } static inline struct _starpu_mpi_checkpoint_template_item* _starpu_mpi_checkpoint_template_end(starpu_mpi_checkpoint_template_t template STARPU_ATTRIBUTE_UNUSED) { return NULL; } static inline int _starpu_checkpoint_template_free(starpu_mpi_checkpoint_template_t cp_template) { struct _starpu_mpi_checkpoint_template_item* item; struct _starpu_mpi_checkpoint_template_item* next_item; STARPU_PTHREAD_MUTEX_LOCK(&cp_template->mutex); item = _starpu_mpi_checkpoint_template_get_first_data(cp_template); while (item != _starpu_mpi_checkpoint_template_end(cp_template)) { next_item = _starpu_mpi_checkpoint_template_get_next_data(cp_template, item); free(item); item = next_item; } STARPU_PTHREAD_MUTEX_UNLOCK(&cp_template->mutex); STARPU_PTHREAD_MUTEX_DESTROY(&cp_template->mutex); free(cp_template); return 0; } #ifdef __cplusplus } #endif #endif //_STARPU_MPI_CHECKPOINT_TEMPLATE_H starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.c000066400000000000000000000232071507764646700301710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "starpu_mpi_checkpoint_template.h" struct _starpu_mpi_checkpoint_domain_tracker_index_list* domain_tracker_list; starpu_pthread_mutex_t tracker_mutex; struct _starpu_mpi_checkpoint_domain_tracker_entry { UT_hash_handle hh; int instance; struct _starpu_mpi_checkpoint_tracker tracker; }; LIST_TYPE(_starpu_mpi_checkpoint_domain_tracker_index, int domain; struct _starpu_mpi_checkpoint_tracker* last_valid_instance; struct _starpu_mpi_checkpoint_domain_tracker_entry* tracked_inst_hash_table; ) static inline void _starpu_mpi_checkpoint_domain_tracker_index_init(struct _starpu_mpi_checkpoint_domain_tracker_index* index) { index->domain = -1; index->tracked_inst_hash_table = NULL; index->last_valid_instance = NULL; } static inline void _starpu_mpi_checkpoint_domain_tracker_entry_init(struct _starpu_mpi_checkpoint_domain_tracker_entry* entry) { entry->instance = -1; entry->tracker.cp_id = -1; entry->tracker.cp_inst = -1; entry->tracker.cp_domain = -1; entry->tracker.cp_template = NULL; entry->tracker.ack_msg_count = 0; entry->tracker.first_msg_sent_flag = 0; entry->tracker.valid = 0; entry->tracker.old = 0; } static inline struct _starpu_mpi_checkpoint_domain_tracker_index* get_domain_tracker_index(int domain) { struct _starpu_mpi_checkpoint_domain_tracker_index* index; for (index = _starpu_mpi_checkpoint_domain_tracker_index_list_begin(domain_tracker_list) ; index != _starpu_mpi_checkpoint_domain_tracker_index_list_end(domain_tracker_list) ; index = _starpu_mpi_checkpoint_domain_tracker_index_list_next(index)) { if (index->domain == domain) { return index; } } return NULL; } static inline struct _starpu_mpi_checkpoint_domain_tracker_index* add_domain_tracker_index(int domain) { struct _starpu_mpi_checkpoint_domain_tracker_index* index; _STARPU_MPI_MALLOC(index, sizeof(struct _starpu_mpi_checkpoint_domain_tracker_index)); _starpu_mpi_checkpoint_domain_tracker_index_init(index); index->domain = domain; _starpu_mpi_checkpoint_domain_tracker_index_list_push_back(domain_tracker_list, index); return index; } static inline struct _starpu_mpi_checkpoint_domain_tracker_entry* get_tracker_entry(struct _starpu_mpi_checkpoint_domain_tracker_index* index, int instance) { struct _starpu_mpi_checkpoint_domain_tracker_entry* entry = NULL; if (index->tracked_inst_hash_table) { HASH_FIND_INT(index->tracked_inst_hash_table, &instance, entry); } return entry; } static inline struct _starpu_mpi_checkpoint_domain_tracker_entry* add_tracker_entry(struct _starpu_mpi_checkpoint_domain_tracker_index* index, int cp_id, int cp_inst, int cp_domain, starpu_mpi_checkpoint_template_t cp_template) { struct _starpu_mpi_checkpoint_domain_tracker_entry* entry; _STARPU_MPI_MALLOC(entry, sizeof(struct _starpu_mpi_checkpoint_domain_tracker_entry)); _starpu_mpi_checkpoint_domain_tracker_entry_init(entry); entry->instance = cp_inst; entry->tracker.cp_id = cp_id; entry->tracker.cp_inst = cp_inst; entry->tracker.cp_domain = cp_domain; entry->tracker.cp_template = cp_template; entry->tracker.ack_msg_count = cp_template->message_to_send_number; HASH_ADD_INT(index->tracked_inst_hash_table, instance, entry); return entry; } static inline int _clear_domain_tracker_index(struct _starpu_mpi_checkpoint_domain_tracker_index* index) { struct _starpu_mpi_checkpoint_domain_tracker_entry* entry, *tmp; HASH_ITER(hh, index->tracked_inst_hash_table, entry, tmp) { HASH_DEL(index->tracked_inst_hash_table, entry); free(entry); } return 0; } static inline int _domain_tracker_delete_all() { struct _starpu_mpi_checkpoint_domain_tracker_index* temp_index; struct _starpu_mpi_checkpoint_domain_tracker_index* index = _starpu_mpi_checkpoint_domain_tracker_index_list_begin(domain_tracker_list) ; while (index != _starpu_mpi_checkpoint_domain_tracker_index_list_end(domain_tracker_list)) { temp_index = _starpu_mpi_checkpoint_domain_tracker_index_list_next(index); _clear_domain_tracker_index(index); _starpu_mpi_checkpoint_domain_tracker_index_list_erase(domain_tracker_list, index); free(index); index = temp_index; } return 0; } int _starpu_mpi_checkpoint_tracker_init() { domain_tracker_list = _starpu_mpi_checkpoint_domain_tracker_index_list_new(); STARPU_PTHREAD_MUTEX_INIT(&tracker_mutex, NULL); return 0; } int _starpu_mpi_checkpoint_tracker_shutdown() { _domain_tracker_delete_all(); STARPU_PTHREAD_MUTEX_DESTROY(&tracker_mutex); free(domain_tracker_list); return 0; } struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_template_get_tracking_inst_by_id_inst(int cp_domain, int cp_inst) { STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); struct _starpu_mpi_checkpoint_domain_tracker_index *index = get_domain_tracker_index(cp_domain); if (NULL == index) { STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); return NULL; } struct _starpu_mpi_checkpoint_domain_tracker_entry *entry = get_tracker_entry(index, cp_inst); if (NULL == entry) { STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); return NULL; } STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); return &entry->tracker; } struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_template_create_instance_tracker(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_domain, int cp_inst) { STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); struct _starpu_mpi_checkpoint_domain_tracker_entry *entry; struct _starpu_mpi_checkpoint_domain_tracker_index *index = get_domain_tracker_index(cp_domain); if (NULL == index) index = add_domain_tracker_index(cp_domain); entry = get_tracker_entry(index, cp_inst); if (NULL == entry) entry = add_tracker_entry(index, cp_id, cp_inst, cp_domain, cp_template); STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); return &entry->tracker; } struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_update(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_domain, int cp_instance) { STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); struct _starpu_mpi_checkpoint_domain_tracker_entry* entry; struct _starpu_mpi_checkpoint_domain_tracker_index* index = get_domain_tracker_index(cp_domain); if (NULL == index) index = add_domain_tracker_index(cp_domain); entry = get_tracker_entry(index, cp_instance); if (NULL == entry) { STARPU_ASSERT_MSG(cp_template!=NULL, "Couldn't find a CP template with the cpid:%d\n", cp_id); entry = add_tracker_entry(index, cp_id, cp_instance, cp_domain, cp_template); } STARPU_ASSERT_MSG(entry->tracker.ack_msg_count>0, "Error. Trying to count ack message while all have already been received. id:%d, inst:%d, remaining_ack_messages:%d\n", entry->tracker.cp_id, entry->instance, entry->tracker.ack_msg_count); entry->tracker.ack_msg_count--; STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); return &entry->tracker; } int _starpu_mpi_checkpoint_check_tracker(struct _starpu_mpi_checkpoint_tracker* tracker) { if (tracker->valid) { return -1; } return tracker->ack_msg_count; } struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_validate_instance(struct _starpu_mpi_checkpoint_tracker* tracker) { STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); // Here we validate a checkpoint and return the old cp info that must be discarded struct _starpu_mpi_checkpoint_tracker* temp_tracker; struct _starpu_mpi_checkpoint_domain_tracker_index* index = get_domain_tracker_index(tracker->cp_domain); if (NULL == index->last_valid_instance || tracker->cp_inst > index->last_valid_instance->cp_inst) { _STARPU_MPI_DEBUG(0, "The CP (id:%d - dom:%d - inst:%d) has been fully acknowledged, and is now the latest valid CP for the domain.\n", tracker->cp_id, tracker->cp_domain, tracker->cp_inst); // The checkpoint to validate is the newest of the domain. Update the latest CP and return the old "latest" temp_tracker = index->last_valid_instance; index->last_valid_instance = tracker; tracker->valid = 1; if (STARPU_LIKELY(temp_tracker!=NULL)) { temp_tracker->old = 1; } STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); return temp_tracker; } else { _STARPU_MPI_DEBUG(0, "The CP (id:%d - dom:%d - inst:%d) has been fully acknowledged, while a more recent one (id:%d - dom:%d - inst:%d) is already validated.\n", tracker->cp_id, tracker->cp_domain, tracker->cp_inst, index->last_valid_instance->cp_id, index->last_valid_instance->cp_domain, index->last_valid_instance->cp_inst); // The checkpoint to validate is older than the latest validated, just return it to discard it tracker->valid = 1; tracker->old =1; STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); return tracker; } } struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_get_last_valid_tracker(int domain) { STARPU_PTHREAD_MUTEX_LOCK(&tracker_mutex); struct _starpu_mpi_checkpoint_domain_tracker_index* index = get_domain_tracker_index(domain); STARPU_PTHREAD_MUTEX_UNLOCK(&tracker_mutex); return index->last_valid_instance; } starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_checkpoint_tracker.h000066400000000000000000000042521507764646700301750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef FT_STARPU_STARPU_MPI_CHECKPOINT_TRACKER_H #define FT_STARPU_STARPU_MPI_CHECKPOINT_TRACKER_H #ifdef __cplusplus extern "C" { #endif struct _starpu_mpi_checkpoint_tracker { int cp_id; int cp_inst; int cp_domain; starpu_mpi_checkpoint_template_t cp_template; int ack_msg_count; int first_msg_sent_flag; int old:1; int valid: 1; }; int _starpu_mpi_checkpoint_tracker_init(); int _starpu_mpi_checkpoint_tracker_shutdown(); struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_template_get_tracking_inst_by_id_inst(int cp_domain, int cp_inst); struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_template_create_instance_tracker(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_domain, int cp_inst); struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_update(starpu_mpi_checkpoint_template_t cp_template, int cp_id, int cp_domain, int cp_instance); int _starpu_mpi_checkpoint_check_tracker(struct _starpu_mpi_checkpoint_tracker* tracker); struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_validate_instance(struct _starpu_mpi_checkpoint_tracker* tracker); struct _starpu_mpi_checkpoint_tracker* _starpu_mpi_checkpoint_tracker_get_last_valid_tracker(int domain); #ifdef __cplusplus } #endif #endif //FT_STARPU_STARPU_MPI_CHECKPOINT_TRACKER_H starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_ft.c000066400000000000000000000035561507764646700247450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include starpu_pthread_mutex_t ft_mutex; int _my_rank; int starpu_mpi_checkpoint_init(void) { STARPU_PTHREAD_MUTEX_INIT(&ft_mutex, NULL); starpu_mpi_comm_rank(MPI_COMM_WORLD, &_my_rank); //TODO: check compatibility with several Comms behaviour starpu_mpi_ft_service_lib_init(_ack_msg_recv_cb, _cp_discard_message_recv_cb); checkpoint_template_lib_init(); _starpu_mpi_checkpoint_tracker_init(); checkpoint_package_init(); _STARPU_MPI_FT_STATS_INIT(); return 0; } int starpu_mpi_checkpoint_shutdown(void) { checkpoint_template_lib_quit(); checkpoint_package_shutdown(); _starpu_mpi_checkpoint_tracker_shutdown(); STARPU_PTHREAD_MUTEX_DESTROY(&ft_mutex); _STARPU_MPI_FT_STATS_WRITE_TO_FD(stderr); _STARPU_MPI_FT_STATS_SHUTDOWN(); return 0; } void starpu_mpi_ft_progress(void) { starpu_mpi_ft_service_progress(); } int starpu_mpi_ft_busy() { return starpu_mpi_ft_service_lib_busy(); } starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_ft.h000066400000000000000000000015771507764646700247530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef _STARPU_MPI_FT_H #define _STARPU_MPI_FT_H #ifdef __cplusplus extern "C" { #endif void starpu_mpi_ft_progress(void); int starpu_mpi_ft_busy(); #ifdef __cplusplus } #endif #endif //_STARPU_MPI_FT_H starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.c000066400000000000000000000331771507764646700276650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include // Should be deduced at preprocessing (Nmad vs MPI) #include #include "starpu_mpi_cache.h" #define SIMULTANEOUS_ACK_MSG_RECV_MAX 2 #define SIMULTANEOUS_CP_INFO_RECV_MAX 2 #define SIMULTANEOUS_PENDING_SEND_MAX 40 static struct _starpu_mpi_req_list detached_ft_service_requests; static struct _starpu_mpi_req_list ready_send_ft_service_requests; static unsigned detached_send_n_ft_service_requests; static starpu_pthread_mutex_t detached_ft_service_requests_mutex; static starpu_pthread_mutex_t ft_service_requests_mutex; int ready_ack_msgs_recv; int pending_ack_msgs_recv; int ready_cp_info_msgs_recv; int pending_cp_info_msgs_recv; int ready_send_ft_service_msg; int pending_send_ft_service_msg; typedef void (*cb_fn_type)(void*); cb_fn_type ack_msg_recv_cb; cb_fn_type cp_info_recv_cb; int _starpu_mpi_ft_service_submit_rdy() { int i; struct _starpu_mpi_req* req; int max_loop; STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); max_loop = MIN(SIMULTANEOUS_ACK_MSG_RECV_MAX-pending_ack_msgs_recv, ready_ack_msgs_recv); for (i=0 ; imsg)); req->ptr = (void*)&arg->msg; req->datatype = MPI_BYTE; _STARPU_MALLOC(req->status, sizeof(MPI_Status)); STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, req->node_tag.data_tag, req->node_tag.node.comm, &req->backend->data_request); _STARPU_MPI_DEBUG(5, "Posting MPI_Irecv ft service msg: req %p tag %"PRIi64" src %d comm %ld ptr %p\n", req, req->node_tag.data_tag, req->node_tag.node.rank, (long int)req->node_tag.node.comm, req->ptr); _starpu_mpi_req_list_push_back(&detached_ft_service_requests, req); pending_ack_msgs_recv++; ready_ack_msgs_recv--; req->submitted = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); } max_loop = MIN(SIMULTANEOUS_CP_INFO_RECV_MAX-pending_cp_info_msgs_recv, ready_cp_info_msgs_recv); for (i=0 ; imsg)); req->ptr = (void*)&arg->msg; req->datatype = MPI_BYTE; _STARPU_MALLOC(req->status, sizeof(MPI_Status)); STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); MPI_Irecv(req->ptr, req->count, req->datatype, req->node_tag.node.rank, req->node_tag.data_tag, req->node_tag.node.comm, &req->backend->data_request); _STARPU_MPI_DEBUG(5, "Posting MPI_Irecv ft service msg: req %p tag %"PRIi64" src %d comm %ld ptr %p\n", req, req->node_tag.data_tag, req->node_tag.node.rank, (long int)req->node_tag.node.comm, req->ptr); _starpu_mpi_req_list_push_back(&detached_ft_service_requests, req); pending_cp_info_msgs_recv++; ready_cp_info_msgs_recv--; req->submitted = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); } max_loop = MIN(SIMULTANEOUS_PENDING_SEND_MAX-pending_send_ft_service_msg, ready_send_ft_service_msg); for (i=0 ; iptr, req->count, req->datatype, req->node_tag.node.rank, req->node_tag.data_tag, req->node_tag.node.comm, &req->backend->data_request); _STARPU_MPI_DEBUG(5, "Posting MPI_Isend ft service msg: req %p tag %"PRIi64" src %d comm %ld ptr %p\n", req, req->node_tag.data_tag, req->node_tag.node.rank, (long int)req->node_tag.node.comm, req->ptr); _starpu_mpi_req_list_push_back(&detached_ft_service_requests, req); pending_send_ft_service_msg++; ready_send_ft_service_msg--; req->submitted = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); } int _starpu_mpi_ft_service_post_special_recv(int tag) { _STARPU_MPI_DEBUG(5, "Pushing ft service msg: %s tag %"PRIi64" ANYSOURCE\n", _starpu_mpi_request_type(RECV_REQ), tag); if (tag==_STARPU_MPI_TAG_CP_ACK) { STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); ready_ack_msgs_recv++; STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); } else if (tag==_STARPU_MPI_TAG_CP_INFO) { STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); ready_cp_info_msgs_recv++; STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); } else { STARPU_ABORT_MSG("Only _STARPU_MPI_TAG_CP_ACK or _STARPU_MPI_TAG_CP_INFO are service msgs.\n"); } _starpu_mpi_wake_up_progress_thread(); return 0; } int _starpu_mpi_ft_service_post_send(void* msg, int count, int rank, int tag, MPI_Comm comm, void (*callback)(void *), void* arg) { struct _starpu_mpi_req* req; /* Check if the tag is a service message */ STARPU_ASSERT_MSG(tag==_STARPU_MPI_TAG_CP_ACK || tag == _STARPU_MPI_TAG_CP_INFO, "Only _STARPU_MPI_TAG_CP_ACK or _STARPU_MPI_TAG_CP_INFO are service msgs."); /* Initialize the request structure */ req = _starpu_mpi_request_fill(NULL, rank, tag, comm, 1, 0, 0, callback, arg, SEND_REQ, NULL, 1, 0, count); // TODO: Check compatibility with prio req->ptr = msg; req->datatype = MPI_BYTE; _STARPU_MALLOC(req->status, sizeof(MPI_Status)); _STARPU_MPI_DEBUG(5, "Pushing ft service msg: %s req %p tag %"PRIi64" src %d ptr %p\n", _starpu_mpi_request_type(SEND_REQ), req, tag, rank, msg); STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); ready_send_ft_service_msg++; _starpu_mpi_req_list_push_back(&ready_send_ft_service_requests, req); STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); _starpu_mpi_wake_up_progress_thread(); return 0; } static void _starpu_mpi_handle_ft_request_termination(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); _STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %"PRIi64" src %d data %p ptr %p datatype '%s' count %d registered_datatype %d internal_req %p\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int) req->count, req->registered_datatype, req->backend->internal_req); if (req->backend->internal_req) { // free(req->backend->early_data_handle); // req->backend->early_data_handle = NULL; } else { if (req->request_type == RECV_REQ || req->request_type == SEND_REQ) { if (req->registered_datatype == 0) { if (req->request_type == SEND_REQ) { // We need to make sure the communication for sending the size // has completed, as MPI can re-order messages, let's call // MPI_Wait to make sure data have been sent starpu_free_on_node_flags(STARPU_MAIN_RAM, (uintptr_t) req->ptr, req->count, 0); req->ptr = NULL; } else if (req->request_type == RECV_REQ) { // req->ptr is freed by starpu_data_unpack starpu_data_unpack(req->data_handle, req->ptr, req->count); starpu_memory_deallocate(STARPU_MAIN_RAM, req->count); } } else { //_starpu_mpi_datatype_free(req->data_handle, &req->datatype); } } //_STARPU_MPI_TRACE_TERMINATED(req, req->node_tag.node.rank, req->node_tag.data_tag); } _starpu_mpi_release_req_data(req); if (req->backend->envelope) { free(req->backend->envelope); req->backend->envelope = NULL; } /* Execute the specified callback, if any */ if (req->callback) { if (req->request_type == RECV_REQ) { if (req->node_tag.data_tag == _STARPU_MPI_TAG_CP_ACK) { struct _starpu_mpi_cp_ack_arg_cb* tmp = (struct _starpu_mpi_cp_ack_arg_cb *) req->callback_arg; tmp->rank = req->status->MPI_SOURCE; } else if (req->node_tag.data_tag == _STARPU_MPI_TAG_CP_INFO) { struct _starpu_mpi_cp_discard_arg_cb* tmp = (struct _starpu_mpi_cp_discard_arg_cb *) req->callback_arg; tmp->rank = req->status->MPI_SOURCE; } } req->callback(req->callback_arg); } /* tell anyone potentially waiting on the request that it is * terminated now */ STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); req->completed = 1; STARPU_PTHREAD_COND_BROADCAST(&req->backend->req_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); _STARPU_MPI_LOG_OUT(); } void starpu_mpi_test_ft_detached_service_requests(void) { //_STARPU_MPI_LOG_IN(); int flag; struct _starpu_mpi_req *req; STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); if (_starpu_mpi_req_list_empty(&detached_ft_service_requests)) { STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); //_STARPU_MPI_LOG_OUT(); return; } //_STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN(); req = _starpu_mpi_req_list_begin(&detached_ft_service_requests); while (req != _starpu_mpi_req_list_end(&detached_ft_service_requests)) { STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); //_STARPU_MPI_TRACE_TEST_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); //_STARPU_MPI_DEBUG(3, "Test detached request %p - mpitag %"PRIi64" - TYPE %s %d\n", &req->backend->data_request, req->node_tag.data_tag, _starpu_mpi_request_type(req->request_type), req->node_tag.node.rank); #ifdef STARPU_SIMGRID req->ret = _starpu_mpi_simgrid_mpi_test(&req->done, &flag); #else STARPU_MPI_ASSERT_MSG(req->backend->data_request != MPI_REQUEST_NULL, "Cannot test completion of the request MPI_REQUEST_NULL"); req->ret = MPI_Test(&req->backend->data_request, &flag, req->status); #endif STARPU_MPI_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %s", _starpu_mpi_get_mpi_error_code(req->ret)); //_STARPU_MPI_TRACE_TEST_END(req->node_tag.node.rank, req->node_tag.data_tag); if (!flag) { req = _starpu_mpi_req_list_next(req); } else { //_STARPU_MPI_TRACE_POLLING_END(); struct _starpu_mpi_req *next_req; next_req = _starpu_mpi_req_list_next(req); //_STARPU_MPI_TRACE_COMPLETE_BEGIN(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag); STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); STARPU_PTHREAD_MUTEX_LOCK(&ft_service_requests_mutex); if (req->request_type == SEND_REQ) pending_send_ft_service_msg--; if (req->request_type == RECV_REQ) { if (req->node_tag.data_tag == _STARPU_MPI_TAG_CP_ACK) pending_ack_msgs_recv--; else if (req->node_tag.data_tag == _STARPU_MPI_TAG_CP_INFO) pending_cp_info_msgs_recv--; } STARPU_PTHREAD_MUTEX_UNLOCK(&ft_service_requests_mutex); _starpu_mpi_req_list_erase(&detached_ft_service_requests, req); STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); _starpu_mpi_handle_ft_request_termination(req); //_STARPU_MPI_TRACE_COMPLETE_END(req->request_type, req->node_tag.node.rank, req->node_tag.data_tag); STARPU_PTHREAD_MUTEX_LOCK(&req->backend->req_mutex); /* We don't want to free internal non-detached requests, we need to get their MPI request before destroying them */ if (req->backend->is_internal_req && !req->backend->to_destroy) { /* We have completed the request, let the application request destroy it */ req->backend->to_destroy = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); } else { STARPU_PTHREAD_MUTEX_UNLOCK(&req->backend->req_mutex); _starpu_mpi_request_destroy(req); } req = next_req; //_STARPU_MPI_TRACE_POLLING_BEGIN(); } STARPU_PTHREAD_MUTEX_LOCK(&detached_ft_service_requests_mutex); } //_STARPU_MPI_TRACE_TESTING_DETACHED_END(); STARPU_PTHREAD_MUTEX_UNLOCK(&detached_ft_service_requests_mutex); //_STARPU_MPI_LOG_OUT(); } int starpu_mpi_ft_service_progress() { starpu_mpi_test_ft_detached_service_requests(); _starpu_mpi_ft_service_submit_rdy(); return 0; } int starpu_mpi_ft_service_lib_init(void(*_ack_msg_recv_cb)(void*), void(*_cp_info_recv_cb)(void*)) { _starpu_mpi_req_list_init(&detached_ft_service_requests); _starpu_mpi_req_list_init(&ready_send_ft_service_requests); STARPU_PTHREAD_MUTEX_INIT(&detached_ft_service_requests_mutex, NULL); STARPU_PTHREAD_MUTEX_INIT(&ft_service_requests_mutex, NULL); ready_ack_msgs_recv = 0; pending_ack_msgs_recv = 0; ready_cp_info_msgs_recv = 0; pending_cp_info_msgs_recv = 0; ready_send_ft_service_msg = 0; pending_send_ft_service_msg = 0; ack_msg_recv_cb = _ack_msg_recv_cb; cp_info_recv_cb = _cp_info_recv_cb; return 0; } int starpu_mpi_ft_service_lib_busy() { return !_starpu_mpi_req_list_empty(&detached_ft_service_requests); } starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_service_comms.h000066400000000000000000000024521507764646700276620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef FT_STARPU_STARPU_MPI_FT_SERVICE_COMMS_H #define FT_STARPU_STARPU_MPI_FT_SERVICE_COMMS_H #ifdef __cplusplus extern "C" { #endif int _starpu_mpi_ft_service_post_special_recv(int tag); int _starpu_mpi_ft_service_post_send(void* msg, int count, int rank, int tag, MPI_Comm comm, void (*callback)(void *), void* arg); void starpu_mpi_test_ft_detached_service_requests(void); int starpu_mpi_ft_service_progress(); int starpu_mpi_ft_service_lib_init(void(*_ack_msg_recv_cb)(void*), void(*cp_info_recv_cb)(void*)); int starpu_mpi_ft_service_lib_busy(); #ifdef __cplusplus } #endif #endif //FT_STARPU_STARPU_MPI_FT_SERVICE_COMMS_H starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.c000066400000000000000000000027071507764646700261600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include starpu_pthread_mutex_t _ft_stats_mutex; int cp_data_msgs_sent_count; size_t cp_data_msgs_sent_total_size; int cp_data_msgs_received_count; size_t cp_data_msgs_received_total_size; int cp_data_msgs_sent_cached_count; size_t cp_data_msgs_sent_cached_total_size; int cp_data_msgs_received_cached_count; size_t cp_data_msgs_received_cached_total_size; int cp_data_msgs_received_cp_cached_count; size_t cp_data_msgs_received_cp_cached_total_size; int ft_service_msgs_sent_count; size_t ft_service_msgs_sent_total_size; int ft_service_msgs_received_count; size_t ft_service_msgs_received_total_size; struct size_sample_list cp_data_in_memory_list; //over time size_t cp_data_in_memory_size_max_at_t; size_t cp_data_in_memory_size_total; starpu-1.4.9+dfsg/mpi/src/mpi_failure_tolerance/starpu_mpi_ft_stats.h000066400000000000000000000273161507764646700261700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef FT_STARPU_STARPU_MPI_FT_STATS_H #define FT_STARPU_STARPU_MPI_FT_STATS_H #include #include #ifdef STARPU_USE_MPI_FT #ifdef __cplusplus extern "C" { #endif extern starpu_pthread_mutex_t _ft_stats_mutex; extern int cp_data_msgs_sent_count; extern size_t cp_data_msgs_sent_total_size; extern int cp_data_msgs_received_count; extern size_t cp_data_msgs_received_total_size; extern int cp_data_msgs_sent_cached_count; extern size_t cp_data_msgs_sent_cached_total_size; extern int cp_data_msgs_received_cached_count; extern size_t cp_data_msgs_received_cached_total_size; extern int cp_data_msgs_received_cp_cached_count; extern size_t cp_data_msgs_received_cp_cached_total_size; extern int ft_service_msgs_sent_count; extern size_t ft_service_msgs_sent_total_size; extern int ft_service_msgs_received_count; extern size_t ft_service_msgs_received_total_size; extern struct size_sample_list cp_data_in_memory_list; //over time extern size_t cp_data_in_memory_size_total; extern size_t cp_data_in_memory_size_max_at_t; static inline void stat_init(); static inline void _starpu_ft_stats_shutdown(); static inline void _starpu_ft_stats_write_to_fd(); static inline void _starpu_ft_stats_send_data(size_t size); static inline void _starpu_ft_stats_send_data_cached(size_t size);; static inline void _starpu_ft_stats_recv_data(size_t size); static inline void _starpu_ft_stats_recv_data_cached(size_t size); static inline void _starpu_ft_stats_recv_data_cp_cached(size_t size); static inline void _starpu_ft_stats_service_msg_send(size_t size); static inline void _starpu_ft_stats_service_msg_recv(size_t size); static inline void _starpu_ft_stats_add_cp_data_in_memory(size_t size); static inline void _starpu_ft_stats_free_cp_data_in_memory(size_t size); #ifdef STARPU_USE_MPI_FT_STATS #define _STARPU_MPI_FT_STATS_INIT() do{ stat_init(); }while(0) #define _STARPU_MPI_FT_STATS_SHUTDOWN() do{ _starpu_ft_stats_shutdown(); }while(0) #define _STARPU_MPI_FT_STATS_WRITE_TO_FD(fd) do{ _starpu_ft_stats_write_to_fd(fd); }while(0) #define _STARPU_MPI_FT_STATS_SEND_CP_DATA(size) do{ _starpu_ft_stats_send_data(size); }while(0) #define _STARPU_MPI_FT_STATS_CANCEL_SEND_CP_DATA(size) do{ _starpu_ft_stats_cancel_send_data(size); }while(0) #define _STARPU_MPI_FT_STATS_SEND_CACHED_CP_DATA(size) do{ _starpu_ft_stats_send_data_cached(size); }while(0) #define _STARPU_MPI_FT_STATS_RECV_CP_DATA(size) do{ _starpu_ft_stats_recv_data(size); }while(0) #define _STARPU_MPI_FT_STATS_CANCEL_RECV_CP_DATA(size) do{ _starpu_ft_stats_cancel_recv_data(size); }while(0) #define _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(size) do{ _starpu_ft_stats_recv_data_cached(size); }while(0) #define _STARPU_MPI_FT_STATS_RECV_CP_CACHED_CP_DATA(size) do{ _starpu_ft_stats_recv_data_cp_cached(size); }while(0) #define _STARPU_MPI_FT_STATS_SEND_FT_SERVICE_MSG(size) do{ _starpu_ft_stats_service_msg_send(size); }while(0) #define _STARPU_MPI_FT_STATS_RECV_FT_SERVICE_MSG(size) do{ _starpu_ft_stats_service_msg_recv(size); }while(0) #define _STARPU_MPI_FT_STATS_STORE_CP_DATA(size) do{ _starpu_ft_stats_add_cp_data_in_memory(size); }while(0) #define _STARPU_MPI_FT_STATS_DISCARD_CP_DATA(size) do{ _starpu_ft_stats_free_cp_data_in_memory(size); }while(0) #else //_STARPU_MPI_FT_STATS #define _STARPU_MPI_FT_STATS_INIT() do{}while(0) #define _STARPU_MPI_FT_STATS_SHUTDOWN() do{}while(0) #define _STARPU_MPI_FT_STATS_WRITE_TO_FD(fd) do{}while(0) #define _STARPU_MPI_FT_STATS_SEND_CP_DATA(size) do{}while(0) #define _STARPU_MPI_FT_STATS_CANCEL_SEND_CP_DATA(size) do{}while(0) #define _STARPU_MPI_FT_STATS_SEND_CACHED_CP_DATA(size) do{}while(0) #define _STARPU_MPI_FT_STATS_RECV_CP_DATA(size) do{}while(0) #define _STARPU_MPI_FT_STATS_CANCEL_RECV_CP_DATA(size) do{}while(0) #define _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(size) do{}while(0) #define _STARPU_MPI_FT_STATS_RECV_CP_CACHED_CP_DATA(size) do{}while(0) #define _STARPU_MPI_FT_STATS_SEND_FT_SERVICE_MSG(size) do{}while(0) #define _STARPU_MPI_FT_STATS_RECV_FT_SERVICE_MSG(size) do{}while(0) #define _STARPU_MPI_FT_STATS_STORE_CP_DATA(size) do{}while(0) #define _STARPU_MPI_FT_STATS_DISCARD_CP_DATA(size) do{}while(0) #endif //_STARPU_MPI_FT_STATS LIST_TYPE(size_sample, \ size_t size; ) static inline void stat_init() { STARPU_PTHREAD_MUTEX_INIT(&_ft_stats_mutex, NULL); size_sample_list_init(&cp_data_in_memory_list); cp_data_msgs_sent_count = 0; cp_data_msgs_sent_total_size = 0; cp_data_msgs_received_count = 0; cp_data_msgs_received_total_size = 0; cp_data_msgs_sent_cached_count = 0; cp_data_msgs_sent_cached_total_size = 0; cp_data_msgs_received_cached_count = 0; cp_data_msgs_received_cached_total_size = 0; cp_data_msgs_received_cp_cached_count = 0; cp_data_msgs_received_cp_cached_total_size = 0; ft_service_msgs_sent_count = 0; ft_service_msgs_sent_total_size = 0; ft_service_msgs_received_count = 0; ft_service_msgs_received_total_size = 0; cp_data_in_memory_size_total = 0; cp_data_in_memory_size_max_at_t = 0; } static inline void _starpu_ft_stats_send_data(size_t size) { STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); cp_data_msgs_sent_count++; cp_data_msgs_sent_total_size+=size; STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_cancel_send_data(size_t size) { STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); cp_data_msgs_sent_count--; cp_data_msgs_sent_total_size-=size; STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_send_data_cached(size_t size) { STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); cp_data_msgs_sent_cached_count++; cp_data_msgs_sent_cached_total_size+=size; STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_recv_data(size_t size) { STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); cp_data_msgs_received_count++; cp_data_msgs_received_total_size+=size; STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_cancel_recv_data(size_t size) { STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); cp_data_msgs_received_count--; cp_data_msgs_received_total_size-=size; STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_recv_data_cached(size_t size) { STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); cp_data_msgs_received_cached_count++; cp_data_msgs_received_cached_total_size+=size; STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_recv_data_cp_cached(size_t size) { STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); cp_data_msgs_received_cp_cached_count++; cp_data_msgs_received_cp_cached_total_size+=size; STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_service_msg_send(size_t size) { STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); ft_service_msgs_sent_count++; ft_service_msgs_sent_total_size+=size; STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_service_msg_recv(size_t size) { STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); ft_service_msgs_received_count++; ft_service_msgs_received_total_size+=size; STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_add_cp_data_in_memory(size_t size) { size_t tmp; struct size_sample *tmp_sample, *sample = malloc(sizeof(struct size_sample)); STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); cp_data_in_memory_size_total+=size; tmp_sample = size_sample_list_back(&cp_data_in_memory_list); tmp = (NULL==tmp_sample?0:tmp_sample->size); tmp+=size; if (tmp>cp_data_in_memory_size_max_at_t) { cp_data_in_memory_size_max_at_t = tmp; } sample->size = tmp; size_sample_list_push_back(&cp_data_in_memory_list, sample); STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _starpu_ft_stats_free_cp_data_in_memory(size_t size) { size_t tmp; struct size_sample* sample = malloc(sizeof(struct size_sample)); STARPU_ASSERT_MSG((int)size != -1, "Cannot count a data of size -1. An error has occurred.\n"); STARPU_PTHREAD_MUTEX_LOCK(&_ft_stats_mutex); tmp = size_sample_list_back(&cp_data_in_memory_list)->size; tmp-=size; sample->size = tmp; size_sample_list_push_back(&cp_data_in_memory_list, sample); STARPU_PTHREAD_MUTEX_UNLOCK(&_ft_stats_mutex); } static inline void _ft_stats_free_cp_data_in_memory_list() { struct size_sample *next, *sample = size_sample_list_begin(&cp_data_in_memory_list); while (sample != size_sample_list_end(&cp_data_in_memory_list)) { next = size_sample_list_next(sample); size_sample_list_erase(&cp_data_in_memory_list, sample); free(sample); sample = next; } } static inline void _starpu_ft_stats_write_to_fd(FILE* fd) { // HEADER fprintf(fd, "TYPE\tCP_DATA_NORMAL_COUNT\tCP_DATA_NORMAL_TOTAL_SIZE\tCP_DATA_CACHED_COUNT\tCP_DATA_CACHED_SIZE\tFT_SERVICE_MSGS_COUNT\tFT_SERVICE_MSGS_TOTAL_SIZE\n"); // DATA fprintf(fd, "SEND\t%d\t" "%ld\t" "%d\t" "%ld\t" "%d\t" "%ld\n", cp_data_msgs_sent_count, cp_data_msgs_sent_total_size, cp_data_msgs_sent_cached_count, cp_data_msgs_sent_cached_total_size, ft_service_msgs_sent_count, ft_service_msgs_sent_total_size); fprintf(fd, "RECV\t%d\t" "%ld\t" "%d\t" "%ld\t" "%d\t" "%ld\n", cp_data_msgs_received_count, cp_data_msgs_received_total_size, cp_data_msgs_received_cached_count, cp_data_msgs_received_cached_total_size+cp_data_msgs_received_cp_cached_total_size, ft_service_msgs_received_count, ft_service_msgs_received_total_size); fprintf(fd, "\n"); fprintf(fd, "IN_MEM_CP_DATA_TOTAL:%lu\n", cp_data_in_memory_size_total); fprintf(fd, "\n"); fprintf(fd, "IN_MEM_CP_DATA_MAX_AT_T:%lu\n", cp_data_in_memory_size_max_at_t); fprintf(fd, "\n"); // fprintf(fd, "IN_MEM_CP_DATA_TRACKING\n"); // struct size_sample *sample = size_sample_list_begin(&cp_data_in_memory_list); // while (sample != size_sample_list_end(&cp_data_in_memory_list)) // { // fprintf(fd, "%ld\n", sample->size); // sample = size_sample_list_next(sample); // } // fprintf(fd, "\n"); } static inline void _starpu_ft_stats_shutdown() { _ft_stats_free_cp_data_in_memory_list(); } #ifdef __cplusplus } #endif #endif // STARPU_USE_MPI_FT #endif //FT_STARPU_STARPU_MPI_FT_STATS_H starpu-1.4.9+dfsg/mpi/src/nmad/000077500000000000000000000000001507764646700163035ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/src/nmad/starpu_mpi_nmad.c000066400000000000000000000710701507764646700216360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Guillaume Beauchamp * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_USE_FXT #include #endif #ifdef STARPU_USE_MPI_NMAD #include #include #include #include #include "starpu_mpi_nmad_coop.h" #include "starpu_mpi_nmad_backend.h" #include "starpu_mpi_nmad_unknown_datatype.h" void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req); void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req); static inline void _starpu_mpi_request_end(struct _starpu_mpi_req* req, int post_callback_sem); static inline void _starpu_mpi_request_try_end(struct _starpu_mpi_req* req, int post_callback_sem); /* Condition to wake up waiting for all current MPI requests to finish */ static starpu_pthread_t progress_thread; static starpu_pthread_cond_t progress_cond; static starpu_pthread_mutex_t progress_mutex; static volatile int running = 0; static starpu_pthread_cond_t mpi_wait_for_all_running_cond; static int mpi_wait_for_all_running = 0; static starpu_pthread_mutex_t mpi_wait_for_all_running_mutex; /* Count running requests: this counter is incremented just before StarPU * submits a MPI request, and decremented when a MPI request finishes. */ static volatile int nb_pending_requests = 0; #define REQ_FINALIZED 0x1 PUK_LFSTACK_TYPE(callback, struct _starpu_mpi_req *req;); static callback_lfstack_t callback_stack; static starpu_sem_t callback_sem; static int nmad_mcast_started = 0; /********************************************************/ /* */ /* Send/Receive functionalities */ /* */ /********************************************************/ void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req STARPU_ATTRIBUTE_UNUSED) { int new_nb = STARPU_ATOMIC_ADD(&nb_pending_requests, 1); (void)new_nb; } /********************************************************/ /* */ /* Send functionalities */ /* */ /********************************************************/ static void _starpu_mpi_isend_known_datatype(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); STARPU_ASSERT_MSG(req->registered_datatype == 1, "Datatype is not registered, it cannot be sent through this way !"); _STARPU_MPI_DEBUG(30, "post NM isend request %p type %s tag %ld src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync); _starpu_mpi_comm_amounts_inc(req->node_tag.node.comm, req->node, req->node_tag.node.rank, req->datatype, req->count); _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag, 0); struct nm_data_s data; nm_mpi_nmad_data_get(&data, (void*)req->ptr, req->datatype, req->count); nm_sr_send_init(req->backend->session, &(req->backend->data_request)); nm_sr_send_pack_data(req->backend->session, &(req->backend->data_request), &data); nm_sr_send_set_priority(req->backend->session, &req->backend->data_request, req->prio); // this trace event is the start of the communication link: _STARPU_MPI_TRACE_ISEND_SUBMIT_END(_STARPU_MPI_FUT_POINT_TO_POINT_SEND, req, req->prio); if (req->sync == 0) { req->ret = nm_sr_send_isend(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag); STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "MPI_Isend returning %d", req->ret); } else { req->ret = nm_sr_send_issend(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag); STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "MPI_Issend returning %d", req->ret); } _starpu_mpi_handle_pending_request(req); _STARPU_MPI_LOG_OUT(); } void _starpu_mpi_isend_func(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); _starpu_mpi_datatype_allocate(req->data_handle, req); if (req->registered_datatype == 1) { req->count = 1; req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); _starpu_mpi_isend_known_datatype(req); } else { _starpu_mpi_isend_unknown_datatype(req); } _STARPU_MPI_LOG_OUT(); } /********************************************************/ /* */ /* Receive functionalities */ /* */ /********************************************************/ static void _starpu_mpi_irecv_known_datatype(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); STARPU_ASSERT_MSG(req->registered_datatype == 1, "Datatype is not registered, it cannot be received through this way !"); _STARPU_MPI_DEBUG(20, "post NM irecv request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); struct nm_data_s data; nm_mpi_nmad_data_get(&data, (void*)req->ptr, req->datatype, req->count); nm_sr_recv_init(req->backend->session, &(req->backend->data_request)); nm_sr_recv_unpack_data(req->backend->session, &(req->backend->data_request), &data); nm_sr_recv_irecv(req->backend->session, &(req->backend->data_request), req->backend->gate, req->node_tag.data_tag, NM_TAG_MASK_FULL); _STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag); _starpu_mpi_handle_pending_request(req); _STARPU_MPI_LOG_OUT(); } void _starpu_mpi_irecv_func(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); _starpu_mpi_datatype_allocate(req->data_handle, req); if (req->registered_datatype == 1) { req->count = 1; req->ptr = starpu_data_handle_to_pointer(req->data_handle, req->node); _starpu_mpi_irecv_known_datatype(req); } else { _starpu_mpi_irecv_unknown_datatype(req); } _STARPU_MPI_LOG_OUT(); } /********************************************************/ /* */ /* Wait functionalities */ /* */ /********************************************************/ #define _starpu_mpi_req_status(PUBLIC_REQ,STATUS) do { \ STATUS->MPI_SOURCE=PUBLIC_REQ->node_tag.node.rank; /**< field name mandatory by spec */ \ STATUS->MPI_TAG=PUBLIC_REQ->node_tag.data_tag; /**< field name mandatory by spec */ \ STATUS->MPI_ERROR=PUBLIC_REQ->ret; /**< field name mandatory by spec */ \ STATUS->size=PUBLIC_REQ->count; /**< size of data received */ \ STATUS->cancelled=0; /**< whether request was cancelled */ \ } while(0) int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status) { _STARPU_MPI_LOG_IN(); STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_wait needs a valid starpu_mpi_req"); struct _starpu_mpi_req *req = *public_req; STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Wait cannot be called on a detached request"); /* we must do a test_locked to avoid race condition : * without req_cond could still be used and couldn't be freed)*/ while (!req->completed || ! piom_cond_test_locked(&(req->backend->req_cond),REQ_FINALIZED)) { piom_cond_wait(&(req->backend->req_cond),REQ_FINALIZED); } if (status!=MPI_STATUS_IGNORE) _starpu_mpi_req_status(req,status); _starpu_mpi_request_try_end(req, 1); *public_req = NULL; _STARPU_MPI_LOG_OUT(); return MPI_SUCCESS; } /********************************************************/ /* */ /* Test functionalities */ /* */ /********************************************************/ int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status) { _STARPU_MPI_LOG_IN(); STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_test needs a valid starpu_mpi_req"); struct _starpu_mpi_req *req = *public_req; STARPU_MPI_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request"); _STARPU_MPI_DEBUG(2, "Test request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); STARPU_VALGRIND_YIELD(); _STARPU_MPI_TRACE_UTESTING_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); /* we must do a test_locked to avoid race condition : * without req_cond could still be used and couldn't be freed)*/ *flag = req->completed && piom_cond_test_locked(&(req->backend->req_cond),REQ_FINALIZED); if (*flag && status!=MPI_STATUS_IGNORE) _starpu_mpi_req_status(req,status); _STARPU_MPI_TRACE_UTESTING_END(req->node_tag.node.rank, req->node_tag.data_tag); if(*flag) { _starpu_mpi_request_try_end(req, 1); *public_req = NULL; } _STARPU_MPI_LOG_OUT(); return MPI_SUCCESS; } /********************************************************/ /* */ /* Barrier functionalities */ /* */ /********************************************************/ int _starpu_mpi_barrier(MPI_Comm comm) { _STARPU_MPI_LOG_IN(); int ret = MPI_Barrier(comm); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier returning %d", ret); _STARPU_MPI_LOG_OUT(); return ret; } int _starpu_mpi_wait_for_all(MPI_Comm comm) { (void) comm; _STARPU_MPI_LOG_IN(); STARPU_PTHREAD_MUTEX_LOCK(&mpi_wait_for_all_running_mutex); STARPU_MPI_ASSERT_MSG(!mpi_wait_for_all_running, "Concurrent starpu_mpi_wait_for_all is not implemented, even on different communicators"); mpi_wait_for_all_running = 1; do { while (nb_pending_requests) STARPU_PTHREAD_COND_WAIT(&mpi_wait_for_all_running_cond, &mpi_wait_for_all_running_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mpi_wait_for_all_running_mutex); starpu_task_wait_for_all(); STARPU_PTHREAD_MUTEX_LOCK(&mpi_wait_for_all_running_mutex); } while (nb_pending_requests); mpi_wait_for_all_running = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&mpi_wait_for_all_running_mutex); _STARPU_MPI_LOG_OUT(); return 0; } /********************************************************/ /* */ /* Progression */ /* */ /********************************************************/ /* Completely finalize a request: destroy it and decrement the number of pending requests */ static inline void _starpu_mpi_request_end(struct _starpu_mpi_req* req, int post_callback_sem) { /* Destroying a request and decrementing the number of pending requests * should be done together, so let's wrap these two things in a * function. This means instead of calling _starpu_mpi_request_destroy(), * you should call this function. */ /* If request went through _starpu_mpi_handle_received_data(), finalized has to be true: */ assert((req->backend->has_received_data && req->backend->finalized) || !req->backend->has_received_data); _starpu_mpi_request_destroy(req); int pending_remaining = STARPU_ATOMIC_ADD(&nb_pending_requests, -1); assert(pending_remaining >= 0); if (!pending_remaining) { STARPU_PTHREAD_COND_BROADCAST(&mpi_wait_for_all_running_cond); if (post_callback_sem && !running) { starpu_sem_post(&callback_sem); } } } /* Check if the caller has to completely finalize a request and try to do it */ static inline void _starpu_mpi_request_try_end(struct _starpu_mpi_req* req, int post_callback_sem) { _starpu_spin_lock(&req->backend->finalized_to_destroy_lock); if (!req->backend->has_received_data || req->backend->finalized) { _starpu_spin_unlock(&req->backend->finalized_to_destroy_lock); _starpu_mpi_request_end(req, post_callback_sem); } else { /* Request isn't finalized yet (NewMadeleine still needs it), since * this function should have destroyed the request, tell * _starpu_mpi_handle_request_termination() to destroy it when * NewMadeleine won't need it anymore. */ req->backend->to_destroy = 1; _starpu_spin_unlock(&req->backend->finalized_to_destroy_lock); } } /* Do required actions when a request is completed (but maybe not finalized!) */ static inline void _starpu_mpi_handle_post_actions(struct _starpu_mpi_req* req) { if (req->callback) { /* Callbacks are executed outside of this function, later by the * progression thread. * Indeed, this current function is executed by a NewMadeleine handler, * and possibly inside of a PIOman ltask. In such context, some locking * or system calls can be forbidden to avoid any deadlock, thus * callbacks are deported outside of this handler. */ struct callback_lfstack_cell_s* c = padico_malloc(sizeof(struct callback_lfstack_cell_s)); c->req = req; callback_lfstack_push(&callback_stack, c); /* The main thread can exit without waiting * the end of the detached request. Callback thread * must then be kept alive if they have a callback.*/ starpu_sem_post(&callback_sem); } else if(!req->detached) { /* tell anyone potentially waiting on the request that it is * terminated now (should be done after the callback)*/ req->completed = 1; piom_cond_signal(&req->backend->req_cond, REQ_FINALIZED); } } /* Function called when data arrived, but NewMadeleine still holds a reference * on it (to make progress a broadcast for instance). Application can thus read * the data, but not yet write it. */ void _starpu_mpi_handle_received_data(struct _starpu_mpi_req* req) { _STARPU_MPI_LOG_IN(); assert(req->request_type == RECV_REQ); assert(!_starpu_mpi_recv_wait_finalize); assert(!req->backend->has_received_data); assert(!req->backend->finalized); req->backend->has_received_data = 1; if (req->registered_datatype == 0) { /* Without peek_data, we can't unpack data for StarPU's use and keep * the buffer alive for NewMadeleine, so calling * _starpu_mpi_handle_received_data() makes no sense. */ assert(starpu_data_get_interface_ops(req->data_handle)->peek_data); starpu_data_peek_node(req->data_handle, req->node, req->ptr, req->count); } // Release write acquire on the handle: can unlock tasks waiting to read the handle: starpu_data_release_to(req->data_handle, STARPU_R); _starpu_mpi_handle_post_actions(req); _STARPU_MPI_LOG_OUT(); } /* Function called when nmad completely finished a request */ void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req* req) { _STARPU_MPI_LOG_IN(); _STARPU_MPI_DEBUG(2, "complete MPI request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); assert(!req->backend->finalized); if (req->request_type == RECV_REQ || req->request_type == SEND_REQ) { if (req->registered_datatype == 0) { if (req->request_type == RECV_REQ) { if (starpu_data_get_interface_ops(req->data_handle)->peek_data) { if (!req->backend->has_received_data) { starpu_data_peek_node(req->data_handle, req->node, req->ptr, req->count); } starpu_free_on_node_flags(req->node, (uintptr_t) req->ptr, req->count, 0); } else { // req->ptr is freed by starpu_data_unpack starpu_data_unpack_node(req->data_handle, req->node, req->ptr, req->count); } } else starpu_free_on_node_flags(req->node, (uintptr_t) req->ptr, req->count, 0); } else if (req->backend->posted) // with coop, only one request is really used to do the broadcast, so only posted request really allocates memory for the data: { nm_mpi_nmad_data_release(req->datatype); _starpu_mpi_datatype_free(req->data_handle, &req->datatype); } } // for recv requests, this event is the end of the communication link: _STARPU_MPI_TRACE_TERMINATED(req); _starpu_mpi_release_req_data(req); if (req->backend->has_received_data) { assert(req->request_type == RECV_REQ); /* Callback, test or wait were unlocked by * _starpu_mpi_handle_received_data(), maybe they were already * executed and since the request wasn't finalized yet, they didn't * destroy the request, and we have to do it now: */ _starpu_spin_lock(&req->backend->finalized_to_destroy_lock); req->backend->finalized = 1; if (req->backend->to_destroy || req->detached) { _starpu_spin_unlock(&req->backend->finalized_to_destroy_lock); _starpu_mpi_request_end(req, 1); } else { _starpu_spin_unlock(&req->backend->finalized_to_destroy_lock); } } else if (!req->callback && req->detached) { /* This request has no callback and is detached: we have to end it now: */ _starpu_mpi_request_end(req, 1); } else { _starpu_mpi_handle_post_actions(req); } _STARPU_MPI_LOG_OUT(); } void _starpu_mpi_handle_request_termination_callback(nm_sr_event_t event STARPU_ATTRIBUTE_UNUSED, const nm_sr_event_info_t* event_info STARPU_ATTRIBUTE_UNUSED, void* ref) { assert(ref != NULL); struct _starpu_mpi_req* req = (struct _starpu_mpi_req*) ref; req->backend->posted = 1; // a network event was triggered for this request, so it was really posted if (event & NM_SR_EVENT_FINALIZED) { _starpu_mpi_handle_request_termination(req); } else if (event & NM_SR_EVENT_RECV_COMPLETED && req->request_type == RECV_REQ && !_starpu_mpi_recv_wait_finalize && req->sequential_consistency) { /* About required sequential consistency: * "If it is 0, user can launch tasks writing in the handle, which will * mix data manipulated by nmad and data manipulated by tasks, this * could break some expected behaviours." (sthibault) */ /* Unknown datatype case is in starpu_mpi_nmad_unknown_datatype.c */ assert(req->registered_datatype == 1); _starpu_mpi_handle_received_data(req); } } void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req) { assert(req != NULL); nm_sr_request_set_ref(&req->backend->data_request, req); int ret = nm_sr_request_monitor(req->backend->session, &req->backend->data_request, NM_SR_EVENT_FINALIZED | NM_SR_EVENT_RECV_COMPLETED, _starpu_mpi_handle_request_termination_callback); assert(ret == NM_ESUCCESS); } void _starpu_mpi_submit_ready_request(void *arg) { _STARPU_MPI_LOG_IN(); struct _starpu_mpi_req *req = arg; STARPU_ASSERT_MSG(req, "Invalid request"); if (req->reserved_size) { /* The core will have really allocated the reception buffer now, release our reservation */ starpu_memory_deallocate(req->node, req->reserved_size); req->reserved_size = 0; } /* submit the request to MPI directly from submitter */ _STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); req->func(req); _STARPU_MPI_LOG_OUT(); } static void *_starpu_mpi_progress_thread_func(void *arg) { struct _starpu_mpi_argc_argv *argc_argv = (struct _starpu_mpi_argc_argv *) arg; #ifndef STARPU_SIMGRID if (!_starpu_mpi_nobind && starpu_bind_thread_on(_starpu_mpi_thread_cpuid, 0, "MPI") < 0) { char hostname[65]; gethostname(hostname, sizeof(hostname)); _STARPU_DISP("[%s] No core was available for the MPI thread. You should use STARPU_RESERVE_NCPU to leave one core available for MPI, or specify one core less in STARPU_NCPU\n", hostname); } #endif #ifdef STARPU_SIMGRID /* Now that MPI is set up, let the rest of simgrid get initialized */ char **argv_cpy; _STARPU_MPI_MALLOC(argv_cpy, *(argc_argv->argc) * sizeof(char*)); int i; for (i = 0; i < *(argc_argv->argc); i++) argv_cpy[i] = strdup((*(argc_argv->argv))[i]); #if defined(HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_SET_DATA) _starpu_simgrid_actor_create("main", smpi_simulated_main_, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy); #else MSG_process_create_with_arguments("main", smpi_simulated_main_, NULL, _starpu_simgrid_get_host_by_name("MAIN"), *(argc_argv->argc), argv_cpy); /* And set TSD for us */ void **tsd; _STARPU_CALLOC(tsd, MAX_TSD + 1, sizeof(void*)); if (!smpi_process_set_user_data) { _STARPU_ERROR("Your version of simgrid does not provide smpi_process_set_user_data, we can not continue without it\n"); } smpi_process_set_user_data(tsd); #endif #endif _starpu_mpi_comm_amounts_init(argc_argv->comm); _starpu_mpi_cache_init(argc_argv->comm); _starpu_mpi_select_node_init(); _starpu_mpi_datatype_init(); _starpu_mpi_tags_init(); #ifdef STARPU_USE_FXT _starpu_mpi_fxt_init(argc_argv); #ifdef HAVE_NM_TRACE_ADD_SYNCHRO_POINT starpu_fxt_trace_user_meta_string("Clock_synchronize"); nm_trace_add_synchro_point(); #endif #endif if (_starpu_mpi_use_coop_sends) { if (argc_argv->world_size > 2) { _starpu_mpi_nmad_coop_init(); nmad_mcast_started = 1; // to shutdown mcast } else { _starpu_mpi_use_coop_sends = 0; } } /* notify the main thread that the progression thread is ready */ STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); running = 1; STARPU_PTHREAD_COND_SIGNAL(&progress_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); while (1) { struct callback_lfstack_cell_s* c = callback_lfstack_pop(&callback_stack); int err=0; if(running || nb_pending_requests>0) { /* shall we block ? */ err = starpu_sem_wait(&callback_sem); //running nb_pending_requests can change while waiting } if(c==NULL) { c = callback_lfstack_pop(&callback_stack); if (c == NULL) { if(running && nb_pending_requests>0) { STARPU_ASSERT_MSG(c!=NULL, "Callback thread awakened without callback ready with error %d.",err); } else { if (nb_pending_requests==0) break; } continue; } } c->req->callback(c->req->callback_arg); if (c->req->detached) { _starpu_mpi_request_try_end(c->req, 0); } else { c->req->completed=1; piom_cond_signal(&(c->req->backend->req_cond), REQ_FINALIZED); } free(c); } /** Now, shutting down MPI **/ STARPU_ASSERT_MSG(callback_lfstack_pop(&callback_stack)==NULL, "List of callback not empty."); STARPU_ASSERT_MSG(nb_pending_requests==0, "Request still pending."); /* We cannot rely on _starpu_mpi_use_coop_sends to shutdown mcast: * coops can be disabled with starpu_mpi_coop_sends_set_use() after * initialization of mcast. */ if (nmad_mcast_started) { _starpu_mpi_nmad_coop_shutdown(); } #ifdef STARPU_USE_FXT _starpu_mpi_fxt_shutdown(); #endif if (argc_argv->initialize_mpi) { _STARPU_MPI_DEBUG(3, "Calling MPI_Finalize()\n"); MPI_Finalize(); } starpu_sem_destroy(&callback_sem); free(argc_argv); return NULL; } /********************************************************/ /* */ /* (De)Initialization methods */ /* */ /********************************************************/ // #ifdef STARPU_MPI_ACTIVITY // static int hookid = - 1; // #endif /* STARPU_MPI_ACTIVITY */ int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv) { STARPU_PTHREAD_MUTEX_INIT(&progress_mutex, NULL); STARPU_PTHREAD_COND_INIT(&progress_cond, NULL); STARPU_PTHREAD_MUTEX_INIT(&mpi_wait_for_all_running_mutex, NULL); STARPU_PTHREAD_COND_INIT(&mpi_wait_for_all_running_cond, NULL); starpu_sem_init(&callback_sem, 0, 0); running = 0; _starpu_mpi_env_init(); /* This function calls MPI_Init_thread if needed, and it initializes internal NMAD/Pioman variables, * required for piom_ltask_set_bound_thread_indexes() */ _starpu_mpi_do_initialize(argc_argv); if (!_starpu_mpi_nobind && _starpu_mpi_thread_cpuid < 0) { _starpu_mpi_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); } callback_lfstack_init(&callback_stack); /* Tell pioman to use a bound thread for communication progression: * share the same core as StarPU's MPI thread, the MPI thread has very low activity with NMAD backend */ #ifdef HAVE_PIOM_LTASK_SET_BOUND_THREAD_OS_INDEXES /* We prefer to give the OS index of the core, because StarPU can have * a different vision of the topology, especially if STARPU_WORKERS_GETBIND * is enabled */ int indexes[1] = { starpu_get_pu_os_index((unsigned) _starpu_mpi_thread_cpuid) }; if (!_starpu_mpi_nobind) piom_ltask_set_bound_thread_os_indexes(HWLOC_OBJ_PU, indexes, 1); #else int indexes[1] = { _starpu_mpi_thread_cpuid }; if (!_starpu_mpi_nobind) piom_ltask_set_bound_thread_indexes(HWLOC_OBJ_PU, indexes, 1); #endif /* Register some hooks for communication progress if needed */ int polling_point_prog, polling_point_idle; char *s_prog_hooks = starpu_getenv("STARPU_MPI_NMAD_PROG_HOOKS"); char *s_idle_hooks = starpu_getenv("STARPU_MPI_NMAD_IDLE_HOOKS"); if(!s_prog_hooks) { polling_point_prog = 0; } else { polling_point_prog = (strcmp(s_prog_hooks, "FORCED") == 0) ? PIOM_POLL_POINT_FORCED : (strcmp(s_prog_hooks, "SINGLE") == 0) ? PIOM_POLL_POINT_SINGLE : (strcmp(s_prog_hooks, "HOOK") == 0) ? PIOM_POLL_POINT_HOOK : 0; } if(!s_idle_hooks) { polling_point_idle = 0; } else { polling_point_idle = (strcmp(s_idle_hooks, "FORCED") == 0) ? PIOM_POLL_POINT_FORCED : (strcmp(s_idle_hooks, "SINGLE") == 0) ? PIOM_POLL_POINT_SINGLE : (strcmp(s_idle_hooks, "HOOK") == 0) ? PIOM_POLL_POINT_HOOK : 0; } if(polling_point_prog) { starpu_progression_hook_register((void *)&piom_ltask_schedule, (void *)&polling_point_prog); } if(polling_point_idle) { starpu_idle_hook_register((void *)&piom_ltask_schedule, (void *)&polling_point_idle); } /* Launch thread used for nmad callbacks */ STARPU_PTHREAD_CREATE(&progress_thread, NULL, _starpu_mpi_progress_thread_func, argc_argv); STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); while (!running) STARPU_PTHREAD_COND_WAIT(&progress_cond, &progress_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); return 0; } void _starpu_mpi_progress_shutdown(void **value) { if (!running) { _STARPU_ERROR("The progress thread was not launched. Was StarPU successfully initialized?\n"); } /* kill the progression thread */ STARPU_PTHREAD_MUTEX_LOCK(&progress_mutex); running = 0; STARPU_PTHREAD_COND_BROADCAST(&progress_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&progress_mutex); starpu_sem_post(&callback_sem); STARPU_PTHREAD_JOIN(progress_thread, value); callback_lfstack_destroy(&callback_stack); STARPU_PTHREAD_MUTEX_DESTROY(&progress_mutex); STARPU_PTHREAD_COND_DESTROY(&progress_cond); STARPU_PTHREAD_MUTEX_DESTROY(&mpi_wait_for_all_running_mutex); STARPU_PTHREAD_COND_DESTROY(&mpi_wait_for_all_running_cond); } static int64_t _starpu_mpi_tag_max = INT64_MAX; int starpu_mpi_comm_get_attr(MPI_Comm comm, int keyval, void *attribute_val, int *flag) { (void) comm; if (keyval == STARPU_MPI_TAG_UB) { if ((uint64_t) _starpu_mpi_tag_max > NM_TAG_MAX) _starpu_mpi_tag_max = NM_TAG_MAX; /* manage case where nmad max tag causes overflow if represented as starpu tag */ *(int64_t **)attribute_val = &_starpu_mpi_tag_max; *flag = 1; } else { *flag = 0; } return 0; } #endif /* STARPU_USE_MPI_NMAD*/ starpu-1.4.9+dfsg/mpi/src/nmad/starpu_mpi_nmad.h000066400000000000000000000034161507764646700216420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_NMAD_H__ #define __STARPU_MPI_NMAD_H__ #include #include #include #include #include /** @file */ #ifdef STARPU_USE_MPI_NMAD #ifdef __cplusplus extern "C" { #endif int _starpu_mpi_progress_init(struct _starpu_mpi_argc_argv *argc_argv); void _starpu_mpi_progress_shutdown(void **value); //#ifdef STARPU_SIMGRID //void _starpu_mpi_wait_for_initialization(); //#endif int _starpu_mpi_barrier(MPI_Comm comm); int _starpu_mpi_wait_for_all(MPI_Comm comm); int _starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status); int _starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status); void _starpu_mpi_isend_func(struct _starpu_mpi_req *req); void _starpu_mpi_irecv_func(struct _starpu_mpi_req *req); void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req); void _starpu_mpi_handle_pending_request(struct _starpu_mpi_req *req); void _starpu_mpi_handle_received_data(struct _starpu_mpi_req* req); #ifdef __cplusplus } #endif #endif /* STARPU_USE_MPI_NMAD */ #endif /* __STARPU_MPI_NMAD_H__ */ starpu-1.4.9+dfsg/mpi/src/nmad/starpu_mpi_nmad_backend.c000066400000000000000000000102001507764646700232710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "starpu_mpi_nmad_backend.h" #include #include "starpu_mpi_nmad.h" #ifdef STARPU_USE_MPI_NMAD #include static void starpu_mpi_nmad_backend_constructor(void) __attribute__((constructor)); static void starpu_mpi_nmad_backend_constructor(void) { /* strat_prio is preferred for StarPU instead of default strat_aggreg */ setenv("NMAD_STRATEGY", "prio", 0 /* do not overwrite user-supplied value, if set */); /* prefer rcache on ibverbs */ setenv("NMAD_IBVERBS_RCACHE", "1", 0); /* use pioman dedicated thread */ setenv("PIOM_DEDICATED", "1", 0); /* pioman waits for starpu to place its dedicated thread */ setenv("PIOM_DEDICATED_WAIT", "1", 0); } void _starpu_mpi_nmad_backend_init(struct starpu_conf *conf) { (void)conf; nm_abi_config_check(); } void _starpu_mpi_nmad_backend_shutdown(void) { } int _starpu_mpi_nmad_backend_reserve_core(void) { return 1; } void _starpu_mpi_nmad_backend_request_init(struct _starpu_mpi_req *req) { _STARPU_MPI_CALLOC(req->backend, 1, sizeof(struct _starpu_mpi_req_backend)); piom_cond_init(&req->backend->req_cond, 0); req->backend->data_request = NM_SR_REQUEST_NULL; req->backend->posted = 0; req->backend->has_received_data = 0; req->backend->finalized = 0; req->backend->to_destroy = 0; _starpu_spin_init(&req->backend->finalized_to_destroy_lock); } void _starpu_mpi_nmad_backend_request_fill(struct _starpu_mpi_req *req, int is_internal_req STARPU_ATTRIBUTE_UNUSED) { /* this function gives session and gate: */ nm_mpi_nmad_dest(&req->backend->session, &req->backend->gate, req->node_tag.node.comm, req->node_tag.node.rank); } void _starpu_mpi_nmad_backend_request_destroy(struct _starpu_mpi_req *req) { piom_cond_destroy(&(req->backend->req_cond)); _starpu_spin_destroy(&req->backend->finalized_to_destroy_lock); free(req->backend); } void _starpu_mpi_nmad_backend_data_clear(starpu_data_handle_t data_handle) { (void)data_handle; } void _starpu_mpi_nmad_backend_data_register(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag) { (void)data_handle; (void)data_tag; } void _starpu_mpi_nmad_backend_comm_register(MPI_Comm comm) { (void)comm; } struct _starpu_mpi_backend _mpi_backend = { ._starpu_mpi_backend_init = _starpu_mpi_nmad_backend_init, ._starpu_mpi_backend_shutdown = _starpu_mpi_nmad_backend_shutdown, ._starpu_mpi_backend_reserve_core = _starpu_mpi_nmad_backend_reserve_core, ._starpu_mpi_backend_request_init = _starpu_mpi_nmad_backend_request_init, ._starpu_mpi_backend_request_fill = _starpu_mpi_nmad_backend_request_fill, ._starpu_mpi_backend_request_destroy = _starpu_mpi_nmad_backend_request_destroy, ._starpu_mpi_backend_data_clear = _starpu_mpi_nmad_backend_data_clear, ._starpu_mpi_backend_data_register = _starpu_mpi_nmad_backend_data_register, ._starpu_mpi_backend_comm_register = _starpu_mpi_nmad_backend_comm_register, ._starpu_mpi_backend_progress_init = _starpu_mpi_progress_init, ._starpu_mpi_backend_progress_shutdown = _starpu_mpi_progress_shutdown, //#ifdef STARPU_SIMGRID // ._starpu_mpi_backend_wait_for_initialization = _starpu_mpi_wait_for_initialization, //#endif ._starpu_mpi_backend_barrier = _starpu_mpi_barrier, ._starpu_mpi_backend_wait_for_all = _starpu_mpi_wait_for_all, ._starpu_mpi_backend_wait = _starpu_mpi_wait, ._starpu_mpi_backend_test = _starpu_mpi_test, ._starpu_mpi_backend_isend_size_func = _starpu_mpi_isend_func, ._starpu_mpi_backend_irecv_size_func = _starpu_mpi_irecv_func, }; #endif /* STARPU_USE_MPI_NMAD*/ starpu-1.4.9+dfsg/mpi/src/nmad/starpu_mpi_nmad_backend.h000066400000000000000000000037601507764646700233130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_NMAD_BACKEND_H__ #define __STARPU_MPI_NMAD_BACKEND_H__ #include #include /** @file */ #ifdef __cplusplus extern "C" { #endif #ifdef STARPU_USE_MPI_NMAD #include #include #include struct _starpu_mpi_req_backend { nm_gate_t gate; nm_session_t session; nm_sr_request_t data_request; piom_cond_t req_cond; int posted; // with coop, only one request is really posted, we need to know if the request was really posted to possibly free data int has_received_data; // tell if request went through _starpu_mpi_handle_received_data() to release write lock int finalized; // tell if _starpu_mpi_handle_request_termination() was called, so starpu_mpi_test() and starpu_mpi_wait() have to free the request int to_destroy; // tell if starpu_mpi_wait() or starpu_mpi_test() was called before _starpu_mpi_handle_request_termination() and thus this last function will have to free the request struct _starpu_spinlock finalized_to_destroy_lock; /** When datatype is unknown */ struct nm_data_s unknown_datatype_data; // will contain size of the datatype and data itself struct iovec unknown_datatype_v[2]; }; #endif // STARPU_USE_MPI_NMAD #ifdef __cplusplus } #endif #endif // __STARPU_MPI_NMAD_BACKEND_H__ starpu-1.4.9+dfsg/mpi/src/nmad/starpu_mpi_nmad_coop.c000066400000000000000000000120571507764646700226560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef STARPU_USE_MPI_NMAD #include #include #include #include #include #include #include "starpu_mpi_nmad_coop.h" #include "starpu_mpi_nmad_backend.h" #include "starpu_mpi_nmad_unknown_datatype.h" extern void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req); struct mcast_send { nm_mcast_t mcast; int* dests; int* prios; struct _starpu_mpi_req* req; struct nm_data_s data; }; static nm_mcast_service_t mcast_service; void _starpu_mpi_nmad_coop_init(void) { mcast_service = nm_mcast_init(nm_mpi_comm(MPI_COMM_WORLD)); } void _starpu_mpi_nmad_end_coop_callback(void* arg) { /* Callback called by the root node of the broadcast, when its job is done; * not by receivers. */ struct mcast_send* mcast = (struct mcast_send*) arg; mcast->req->backend->posted = 1; _starpu_mpi_handle_request_termination(mcast->req); nm_mcast_send_destroy(&mcast->mcast); free(mcast->dests); if (_starpu_mpi_use_prio) { free(mcast->prios); } free(mcast); } void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_control STARPU_ATTRIBUTE_UNUSED, int submit_data) { if (!submit_data) { return; } _STARPU_MPI_LOG_IN(); unsigned n = coop_sends->n; assert(n >= 2); #if 0 // sure, a tree does not worth it for only two recipients, but if the user wants a broadcast with a chain routing, he really wants only one request to be sent from this node. if (n == 2) // a broadcast tree does not worth it for only two recipients { assert(coop_sends->reqs_array[0]->request_type == SEND_REQ); _starpu_mpi_submit_ready_request(coop_sends->reqs_array[0]); assert(coop_sends->reqs_array[1]->request_type == SEND_REQ); _starpu_mpi_submit_ready_request(coop_sends->reqs_array[1]); } else #endif { starpu_fxt_trace_user_event_string("collective send"); unsigned i = 0; struct _starpu_mpi_req *starpu_req; struct mcast_send* mcast = malloc(sizeof(struct mcast_send)); mcast->dests = malloc(n * sizeof(int)); if (_starpu_mpi_use_prio) { mcast->prios = malloc(n * sizeof(int)); } else { mcast->prios = NULL; } /* We don't increase the amount of communicated data, because we don't * know which tree type will be executed to do the broadcast, so we * don't know how many data will actually be sent from this node. */ _starpu_mpi_nb_coop_inc(n); for (i = 0; i < n; i++) { starpu_req = coop_sends->reqs_array[i]; assert(starpu_req->request_type == SEND_REQ); assert(starpu_req->coop_sends_head != NULL); mcast->dests[i] = starpu_req->node_tag.node.rank; if (_starpu_mpi_use_prio) { mcast->prios[i] = starpu_req->prio; } // this trace event is the start of the communication link: _STARPU_MPI_TRACE_ISEND_SUBMIT_END(_STARPU_MPI_FUT_COLLECTIVE_SEND, starpu_req, starpu_req->prio); // Keep the first request to do the mcast, but consider other as finished: if (i > 0) { _starpu_mpi_handle_request_termination(starpu_req); } } starpu_req = coop_sends->reqs_array[0]; _starpu_mpi_datatype_allocate(starpu_req->data_handle, starpu_req); nm_len_t header_len = 0; if (starpu_req->registered_datatype == 1) { starpu_req->count = 1; starpu_req->ptr = starpu_data_handle_to_pointer(starpu_req->data_handle, STARPU_MAIN_RAM); nm_mpi_nmad_data_get(&mcast->data, (void*)starpu_req->ptr, starpu_req->datatype, starpu_req->count); } else { _starpu_mpi_isend_prepare_unknown_datatype(starpu_req, &mcast->data); header_len = sizeof(starpu_ssize_t); // we send the size of the data as a header } mcast->req = starpu_req; nm_comm_t comm = nm_comm_get_by_session(starpu_req->backend->session); assert(comm != NULL); nm_mcast_send_init(mcast_service, &mcast->mcast); nm_mcast_send_set_notifier(&mcast->mcast, _starpu_mpi_nmad_end_coop_callback, mcast); nm_mcast_isend(&mcast->mcast, comm, mcast->dests, mcast->prios, n, starpu_req->node_tag.data_tag, &mcast->data, header_len, NM_COLL_TREE_DEFAULT); } _STARPU_MPI_LOG_OUT(); } void _starpu_mpi_nmad_coop_shutdown(void) { nm_mcast_finalize(mcast_service); } void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends STARPU_ATTRIBUTE_UNUSED) { /* The NMAD implementation doesn't use this function. */ } #endif /* STARPU_USE_MPI_NMAD*/ starpu-1.4.9+dfsg/mpi/src/nmad/starpu_mpi_nmad_coop.h000066400000000000000000000021741507764646700226620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_NMAD_COOP_H__ #define __STARPU_MPI_NMAD_COOP_H__ #include #ifdef STARPU_USE_MPI_NMAD #ifdef __cplusplus extern "C" { #endif #include #include void _starpu_mpi_nmad_coop_init(void); void _starpu_mpi_nmad_coop_shutdown(void); void _starpu_mpi_nmad_end_coop_callback(void* arg); #ifdef __cplusplus } #endif #endif // STARPU_USE_MPI_NMAD #endif // __STARPU_MPI_NMAD_COOP_H__ starpu-1.4.9+dfsg/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.c000066400000000000000000000162651507764646700253150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef STARPU_USE_MPI_NMAD #include #include #include #include #include #include "starpu_mpi_nmad.h" #include "starpu_mpi_nmad_backend.h" #include "starpu_mpi_nmad_unknown_datatype.h" /********************************************** * Send **********************************************/ void _starpu_mpi_isend_prepare_unknown_datatype(struct _starpu_mpi_req* req, struct nm_data_s* data) { STARPU_ASSERT_MSG(req->registered_datatype != 1, "Datatype is registered, no need to send it through this way !"); starpu_data_pack_node(req->data_handle, req->node, &req->ptr, &req->count); req->backend->unknown_datatype_v[0].iov_base = &req->count; req->backend->unknown_datatype_v[0].iov_len = sizeof(starpu_ssize_t); req->backend->unknown_datatype_v[1].iov_base = req->ptr; req->backend->unknown_datatype_v[1].iov_len = req->count; nm_data_iov_build(data, req->backend->unknown_datatype_v, 2); } void _starpu_mpi_isend_unknown_datatype(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); _STARPU_MPI_DEBUG(30, "post NM isend (unknown datatype) request %p type %s tag %ld src %d data %p datasize %ld ptr %p datatype '%s' count %d registered_datatype %d sync %d\n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, starpu_data_get_size(req->data_handle), req->ptr, req->datatype_name, (int)req->count, req->registered_datatype, req->sync); _starpu_mpi_comm_amounts_inc(req->node_tag.node.comm, req->node, req->node_tag.node.rank, req->datatype, req->count); _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag, 0); _starpu_mpi_isend_prepare_unknown_datatype(req, &req->backend->unknown_datatype_data); nm_sr_send_init(req->backend->session, &req->backend->data_request); nm_sr_send_pack_data(req->backend->session, &req->backend->data_request, &req->backend->unknown_datatype_data); nm_sr_send_set_priority(req->backend->session, &req->backend->data_request, req->prio); nm_sr_send_header(req->backend->session, &req->backend->data_request, sizeof(starpu_ssize_t)); // this trace event is the start of the communication link: _STARPU_MPI_TRACE_ISEND_SUBMIT_END(_STARPU_MPI_FUT_POINT_TO_POINT_SEND, req, req->prio); if (req->sync == 0) { req->ret = nm_sr_send_isend(req->backend->session, &req->backend->data_request, req->backend->gate, req->node_tag.data_tag); STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "nm_sr_send_isend returning %d", req->ret); } else { req->ret = nm_sr_send_issend(req->backend->session, &req->backend->data_request, req->backend->gate, req->node_tag.data_tag); STARPU_ASSERT_MSG(req->ret == NM_ESUCCESS, "nm_sr_send_issend returning %d", req->ret); } _starpu_mpi_handle_pending_request(req); _STARPU_MPI_LOG_OUT(); } /********************************************** * Receive **********************************************/ static void _starpu_mpi_unknown_datatype_recv_callback(nm_sr_event_t event, const nm_sr_event_info_t* p_info STARPU_ATTRIBUTE_UNUSED, void* ref) { STARPU_ASSERT_MSG(!((event & NM_SR_EVENT_FINALIZED) && (event & NM_SR_EVENT_RECV_DATA)), "Both events can't be triggered at the same time !"); struct _starpu_mpi_req* req = (struct _starpu_mpi_req*) ref; assert(req->request_type == RECV_REQ); assert(req->registered_datatype != 1); req->backend->posted = 1; // a network event was triggered for this request, so it was really posted if (event & NM_SR_EVENT_RECV_DATA) { // Header arrived, so get the size of the datatype and store it in req->count: struct nm_data_s data_header; nm_data_contiguous_build(&data_header, &req->count, sizeof(starpu_ssize_t)); nm_sr_recv_peek(req->backend->session, &req->backend->data_request, &data_header); // Now we know the size, allocate the buffer: req->ptr = (void *)starpu_malloc_on_node_flags(req->node, req->count, 0); STARPU_ASSERT_MSG(req->ptr, "cannot allocate message of size %ld", req->count); /* Last step: give this buffer to NewMadeleine to receive data * We need to use an iov to easily take into account the offset used * during the peek. */ req->backend->unknown_datatype_v[0].iov_base = &req->count; req->backend->unknown_datatype_v[0].iov_len = sizeof(starpu_ssize_t); req->backend->unknown_datatype_v[1].iov_base = req->ptr; req->backend->unknown_datatype_v[1].iov_len = req->count; nm_data_iov_build(&req->backend->unknown_datatype_data, req->backend->unknown_datatype_v, 2); nm_sr_recv_offset(req->backend->session, &req->backend->data_request, sizeof(starpu_ssize_t)); nm_sr_recv_unpack_data(req->backend->session, &req->backend->data_request, &req->backend->unknown_datatype_data); } else if (event & NM_SR_EVENT_FINALIZED) { _starpu_mpi_handle_request_termination(req); } else if (event & NM_SR_EVENT_RECV_COMPLETED && !_starpu_mpi_recv_wait_finalize && req->sequential_consistency && starpu_data_get_interface_ops(req->data_handle)->peek_data) { _starpu_mpi_handle_received_data(req); } } void _starpu_mpi_irecv_unknown_datatype(struct _starpu_mpi_req *req) { _STARPU_MPI_LOG_IN(); STARPU_ASSERT_MSG(req->registered_datatype != 1, "Datatype is registered, no need to receive it through this way !"); _STARPU_MPI_DEBUG(20, "post NM irecv (datatype unknown) request %p type %s tag %ld src %d data %p ptr %p datatype '%s' count %d registered_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->node_tag.data_tag, req->node_tag.node.rank, req->data_handle, req->ptr, req->datatype_name, (int)req->count, req->registered_datatype); _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(req->node_tag.node.rank, req->node_tag.data_tag); /* we post a recv without giving a buffer because we don't know the required size of this buffer, * the buffer will be allocated and provided to nmad when the header of data will be received, * in _starpu_mpi_unknown_datatype_recv_callback() */ nm_sr_recv_init(req->backend->session, &req->backend->data_request); nm_sr_request_set_ref(&req->backend->data_request, req); nm_sr_request_monitor(req->backend->session, &req->backend->data_request, NM_SR_EVENT_FINALIZED | NM_SR_EVENT_RECV_DATA | NM_SR_EVENT_RECV_COMPLETED, &_starpu_mpi_unknown_datatype_recv_callback); nm_sr_recv_irecv(req->backend->session, &req->backend->data_request, req->backend->gate, req->node_tag.data_tag, NM_TAG_MASK_FULL); _STARPU_MPI_TRACE_IRECV_SUBMIT_END(req->node_tag.node.rank, req->node_tag.data_tag); _STARPU_MPI_LOG_OUT(); } #endif // STARPU_USE_MPI_NMAD starpu-1.4.9+dfsg/mpi/src/nmad/starpu_mpi_nmad_unknown_datatype.h000066400000000000000000000024331507764646700253120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_NMAD_UNKNOWN_DATATYPE_H__ #define __STARPU_MPI_NMAD_UNKNOWN_DATATYPE_H__ #include /** @file */ #ifdef __cplusplus extern "C" { #endif #ifdef STARPU_USE_MPI_NMAD #include #include void _starpu_mpi_isend_prepare_unknown_datatype(struct _starpu_mpi_req* req, struct nm_data_s* data); void _starpu_mpi_isend_unknown_datatype(struct _starpu_mpi_req *req); void _starpu_mpi_irecv_unknown_datatype(struct _starpu_mpi_req *req); #endif // STARPU_USE_MPI_NMAD #ifdef __cplusplus } #endif #endif // __STARPU_MPI_NMAD_UNKNOWN_DATATYPE_H__ starpu-1.4.9+dfsg/mpi/src/starpu_mpi.c000066400000000000000000000622451507764646700177240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2019,2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int _starpu_mpi_choose_node(starpu_data_handle_t handle, enum starpu_data_access_mode mode) { if (mode & STARPU_W) { /* Receiving */ /* TODO: lookup NIC location */ /* Where to receive the data? */ if (handle->home_node >= 0 && starpu_node_get_kind(handle->home_node) == STARPU_CPU_RAM) /* For now, better use the home node to avoid duplicates */ return handle->home_node; /* Several potential places */ unsigned i; if (_starpu_mpi_has_cuda) for (i = 0; i < STARPU_MAXNODES; i++) { /* Note: We take as a hint that it's allocated on the GPU as * a clue that we want to push directly to the GPU */ if (starpu_node_get_kind(i) == STARPU_CUDA_RAM && handle->per_node[i].allocated && (_starpu_mpi_cuda_devid == -1 || _starpu_mpi_cuda_devid == starpu_memory_node_get_devid(i))) /* This node already has allocated buffers, let's just use it */ return i; } for (i = 0; i < STARPU_MAXNODES; i++) { /* Note: We take as a hint that it's allocated on a NUMA node as * a clue that we want to push directly to that NUMA node */ if (starpu_node_get_kind(i) == STARPU_CPU_RAM && handle->per_node[i].allocated) /* This node already has allocated buffers, let's just use it */ return i; } /* No luck, take the least loaded node */ starpu_ssize_t maximum = 0; starpu_ssize_t needed = starpu_data_get_alloc_size(handle); unsigned node = STARPU_MAIN_RAM; for (i = 0; i < STARPU_MAXNODES; i++) { if (starpu_node_get_kind(i) == STARPU_CPU_RAM || (_starpu_mpi_has_cuda && starpu_node_get_kind(i) == STARPU_CUDA_RAM)) { starpu_ssize_t size = starpu_memory_get_available(i); if (size >= needed && size > maximum) { node = i; maximum = size; } } } return node; } else { /* Sending */ /* Several potential places */ unsigned i; for (i = 0; i < STARPU_MAXNODES; i++) { if ((starpu_node_get_kind(i) == STARPU_CPU_RAM || (starpu_node_get_kind(i) == STARPU_CUDA_RAM && _starpu_mpi_has_cuda && (_starpu_mpi_cuda_devid == -1 || _starpu_mpi_cuda_devid == starpu_memory_node_get_devid(i)))) && handle->per_node[i].state != STARPU_INVALID) /* This node already has the value, let's just use it */ /* TODO: rather pick up place next to NIC */ return i; } /* No luck, take the least loaded node, to transfer from e.g. GPU */ starpu_ssize_t maximum = 0; starpu_ssize_t needed = starpu_data_get_alloc_size(handle); unsigned node = STARPU_MAIN_RAM; for (i = 0; i < STARPU_MAXNODES; i++) { if (starpu_node_get_kind(i) == STARPU_CPU_RAM) { starpu_ssize_t size = starpu_memory_get_available(i); if (size >= needed && size > maximum) { node = i; maximum = size; } } } return node; } } static void _starpu_mpi_acquired_callback(void *arg, int *nodep, enum starpu_data_access_mode mode) { struct _starpu_mpi_req *req = arg; int node = *nodep; /* The data was acquired in terms of dependencies, we can now look the * current state of the handle and decide which node we prefer for the data * fetch */ if (node < 0) node = _starpu_mpi_choose_node(req->data_handle, mode); req->node = *nodep = node; } void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency) { int node = -1; /* Asynchronously request StarPU to fetch the data in main memory: when * it is available in main memory, _starpu_mpi_submit_ready_request(req) is called and * the request is actually submitted */ if (_starpu_mpi_mem_throttle && mode & STARPU_W && !req->data_handle->initialized) { /* We will trigger allocation, pre-reserve for it */ size_t size = starpu_data_get_size(req->data_handle); if (size) { /* FIXME: rather take the less-loaded NUMA node */ node = STARPU_MAIN_RAM; /* This will potentially block */ starpu_memory_allocate(node, size, STARPU_MEMORY_WAIT); req->reserved_size = size; /* This also decides where we will store the data */ req->node = node; } } if (sequential_consistency) { starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, node, mode, _starpu_mpi_acquired_callback, _starpu_mpi_submit_ready_request, (void *)req, 1 /*sequential consistency*/, 1, &req->pre_sync_jobid, &req->post_sync_jobid, req->prio); } else { /* post_sync_job_id has already been filled */ starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, node, mode, _starpu_mpi_acquired_callback, _starpu_mpi_submit_ready_request, (void *)req, 0 /*sequential consistency*/, 1, &req->pre_sync_jobid, NULL, req->prio); } } struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, int sequential_consistency) { if (STARPU_UNLIKELY(_starpu_mpi_fake_world_size != -1)) { /* Don't actually do the communication */ return NULL; } #ifdef STARPU_MPI_PEDANTIC_ISEND enum starpu_data_access_mode mode = STARPU_RW; #else enum starpu_data_access_mode mode = STARPU_R; #endif struct _starpu_mpi_req *req = _starpu_mpi_request_fill(data_handle, dest, data_tag, comm, detached, sync, prio, callback, arg, SEND_REQ, _mpi_backend._starpu_mpi_backend_isend_size_func, sequential_consistency, 0, 0); _starpu_mpi_req_willpost(req); if (_starpu_mpi_use_coop_sends && detached == 1 && sync == 0 && callback == NULL) { /* It's a send & forget send, we can perhaps optimize its distribution over several nodes */ _starpu_mpi_coop_send(data_handle, req, mode, sequential_consistency); return req; } /* Post normally */ _starpu_mpi_isend_irecv_common(req, mode, sequential_consistency); return req; } int starpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm) { _STARPU_MPI_LOG_IN(); STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_isend needs a valid starpu_mpi_req"); struct _starpu_mpi_req *req; _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, data_tag, 0); req = _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 0, 0, prio, NULL, NULL, 1); _STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, data_tag, 0); STARPU_MPI_ASSERT_MSG(req, "Invalid return for _starpu_mpi_isend_common"); *public_req = req; _STARPU_MPI_LOG_OUT(); return 0; } int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm) { return starpu_mpi_isend_prio(data_handle, public_req, dest, data_tag, 0, comm); } int starpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg) { _STARPU_MPI_LOG_IN(); _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 1, 0, prio, callback, arg, 1); _STARPU_MPI_LOG_OUT(); return 0; } int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg) { return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg); } int starpu_mpi_send_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm) { starpu_mpi_req req; MPI_Status status; int ret; _STARPU_MPI_LOG_IN(); ret = starpu_mpi_isend_prio(data_handle, &req, dest, data_tag, prio, comm); if (ret) return ret; memset(&status, 0, sizeof(MPI_Status)); ret = starpu_mpi_wait(&req, &status); _STARPU_MPI_LOG_OUT(); return ret; } int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm) { return starpu_mpi_send_prio(data_handle, dest, data_tag, 0, comm); } int starpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm) { _STARPU_MPI_LOG_IN(); STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_issend needs a valid starpu_mpi_req"); struct _starpu_mpi_req *req; req = _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 0, 1, prio, NULL, NULL, 1); STARPU_MPI_ASSERT_MSG(req, "Invalid return for _starpu_mpi_isend_common"); *public_req = req; _STARPU_MPI_LOG_OUT(); return 0; } int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm) { return starpu_mpi_issend_prio(data_handle, public_req, dest, data_tag, 0, comm); } int starpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg) { _STARPU_MPI_LOG_IN(); _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, 1, 1, prio, callback, arg, 1); _STARPU_MPI_LOG_OUT(); return 0; } int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg) { return starpu_mpi_issend_detached_prio(data_handle, dest, data_tag, 0, comm, callback, arg); } struct _starpu_mpi_req* _starpu_mpi_isend_cache_aware(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *_arg, int sequential_consistency, int* cache_flag) { struct _starpu_mpi_req* req = NULL; int already_sent = starpu_mpi_cached_send_set(data_handle, dest); if (already_sent == 0) { *cache_flag = 0; if (data_tag == -1) _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); _STARPU_MPI_DEBUG(1, "Send data %p to %d\n", data_handle, dest); req = _starpu_mpi_isend_common(data_handle, dest, data_tag, comm, detached, sync, prio, callback, _arg, sequential_consistency); } else { _STARPU_MPI_DEBUG(1, "STARPU CACHE: Data already sent\n"); *cache_flag = 1; if (callback) callback(_arg); } return req; } struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int prio) { if (_starpu_mpi_fake_world_size != -1) { /* Don't actually do the communication */ return NULL; } struct _starpu_mpi_req *req = _starpu_mpi_request_fill(data_handle, source, data_tag, comm, detached, sync, prio, callback, arg, RECV_REQ, _mpi_backend._starpu_mpi_backend_irecv_size_func, sequential_consistency, is_internal_req, count); _starpu_mpi_req_willpost(req); if (sequential_consistency == 0) { /* Synchronization task jobid from redux is used */ _starpu_mpi_redux_fill_post_sync_jobid(arg, &(req->post_sync_jobid)); } _starpu_mpi_isend_irecv_common(req, STARPU_W, sequential_consistency); return req; } int _starpu_mpi_irecv_prio(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm) { _STARPU_MPI_LOG_IN(); STARPU_MPI_ASSERT_MSG(public_req, "starpu_mpi_irecv needs a valid starpu_mpi_req"); struct _starpu_mpi_req *req; _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(source, data_tag); req = _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 0, 0, NULL, NULL, 1, 0, 0, prio); _STARPU_MPI_TRACE_IRECV_COMPLETE_END(source, data_tag); STARPU_MPI_ASSERT_MSG(req, "Invalid return for _starpu_mpi_irecv_common"); *public_req = req; _STARPU_MPI_LOG_OUT(); return 0; } int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm) { return _starpu_mpi_irecv_prio(data_handle, public_req, source, data_tag, STARPU_DEFAULT_PRIO, comm); } int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg) { _STARPU_MPI_LOG_IN(); _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 1, 0, callback, arg, 1, 0, 0, STARPU_DEFAULT_PRIO); _STARPU_MPI_LOG_OUT(); return 0; } int starpu_mpi_irecv_detached_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, void (*callback)(void *), void *arg) { _STARPU_MPI_LOG_IN(); _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 1, 0, callback, arg, 1, 0, 0, prio); _STARPU_MPI_LOG_OUT(); return 0; } int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency) { _STARPU_MPI_LOG_IN(); _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, 1, 0, callback, arg, sequential_consistency, 0, 0, STARPU_DEFAULT_PRIO); _STARPU_MPI_LOG_OUT(); return 0; } int _starpu_mpi_recv_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, MPI_Status *status) { STARPU_ASSERT_MSG(status != NULL || status == MPI_STATUS_IGNORE, "MPI_Status value cannot be NULL or different from MPI_STATUS_IGNORE"); starpu_mpi_req req; int ret; _STARPU_MPI_LOG_IN(); ret = _starpu_mpi_irecv_prio(data_handle, &req, source, data_tag, prio, comm); if (ret) return ret; ret = starpu_mpi_wait(&req, status); _STARPU_MPI_LOG_OUT(); return ret; } int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, MPI_Status *status) { return _starpu_mpi_recv_prio(data_handle, source, data_tag, STARPU_DEFAULT_PRIO, comm, status); } int starpu_mpi_recv_prio(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, MPI_Status *status) { return _starpu_mpi_recv_prio(data_handle, source, data_tag, prio, comm, status); } struct _starpu_mpi_req* _starpu_mpi_irecv_cache_aware(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *_arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int* cache_flag) { struct _starpu_mpi_req* req = NULL; int already_received = starpu_mpi_cached_cp_receive_set(data_handle); if (already_received == 0) { if (data_tag == -1) _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); _STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, source); req = _starpu_mpi_irecv_common(data_handle, source, data_tag, comm, detached, sync, callback, _arg, sequential_consistency, is_internal_req, count, STARPU_DEFAULT_PRIO); //TODO: Allow to pass prio in args *cache_flag = 0; } else { _STARPU_MPI_DEBUG(1, "STARPU CACHE: Data already received\n"); *cache_flag =1; if (callback) callback(_arg); } return req; } int starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status) { STARPU_ASSERT_MSG(status != NULL || status == MPI_STATUS_IGNORE, "MPI_Status value cannot be NULL or different from MPI_STATUS_IGNORE"); return _mpi_backend._starpu_mpi_backend_wait(public_req, status); } int starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status) { STARPU_ASSERT_MSG(status != NULL || status == MPI_STATUS_IGNORE, "MPI_Status value cannot be NULL or different from MPI_STATUS_IGNORE"); return _mpi_backend._starpu_mpi_backend_test(public_req, flag, status); } int starpu_mpi_barrier(MPI_Comm comm) { return _mpi_backend._starpu_mpi_backend_barrier(comm); } void _starpu_mpi_data_clear(starpu_data_handle_t data_handle) { struct _starpu_mpi_data *data = data_handle->mpi_data; _mpi_backend._starpu_mpi_backend_data_clear(data_handle); _starpu_mpi_cache_data_clear(data_handle); _starpu_spin_destroy(&data->coop_lock); free(data->redux_map); data->redux_map = NULL; free(data); } struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle) { struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (mpi_data) { STARPU_ASSERT(mpi_data->magic == 42); } else { _STARPU_CALLOC(mpi_data, 1, sizeof(struct _starpu_mpi_data)); mpi_data->magic = 42; mpi_data->node_tag.data_tag = -1; mpi_data->node_tag.node.rank = -1; mpi_data->node_tag.node.comm = MPI_COMM_WORLD; mpi_data->nb_future_sends = 0; _starpu_spin_init(&mpi_data->coop_lock); data_handle->mpi_data = mpi_data; _starpu_mpi_cache_data_init(data_handle); _starpu_data_set_unregister_hook(data_handle, _starpu_mpi_data_clear); } return mpi_data; } void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag, int rank, MPI_Comm comm) { struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle); if (data_tag != -1) { _mpi_backend._starpu_mpi_backend_data_register(data_handle, data_tag); mpi_data->node_tag.data_tag = data_tag; _STARPU_MPI_TRACE_DATA_SET_TAG(data_handle, data_tag); } if (rank != -1) { _STARPU_MPI_TRACE_DATA_SET_RANK(data_handle, rank); mpi_data->node_tag.node.rank = rank; mpi_data->node_tag.node.comm = comm; } } void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm) { starpu_mpi_data_register_comm(handle, -1, rank, comm); } void starpu_mpi_data_set_tag(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag) { starpu_mpi_data_register_comm(handle, data_tag, -1, MPI_COMM_WORLD); } int starpu_mpi_data_get_rank(starpu_data_handle_t data) { STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data); return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.node.rank; } starpu_mpi_tag_t starpu_mpi_data_get_tag(starpu_data_handle_t data) { STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data); return ((struct _starpu_mpi_data *)(data->mpi_data))->node_tag.data_tag; } char* starpu_mpi_data_get_redux_map(starpu_data_handle_t data) { STARPU_ASSERT_MSG(data->mpi_data, "starpu_mpi_data_register MUST be called for data %p\n", data); return ((struct _starpu_mpi_data *)(data->mpi_data))->redux_map; } int starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg) { int me, rank; starpu_mpi_tag_t data_tag; rank = starpu_mpi_data_get_rank(data_handle); if (rank == -1) { _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register() or starpu_mpi_data_register_comm()\n"); } starpu_mpi_comm_rank(comm, &me); if (node == rank) return 0; data_tag = starpu_mpi_data_get_tag(data_handle); if (data_tag == -1) { _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register() or starpu_mpi_data_register_comm()\n"); } if (me == node) { _STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node); int already_received = starpu_mpi_cached_receive_set(data_handle); if (already_received == 0) { _STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank); return starpu_mpi_irecv_detached(data_handle, rank, data_tag, comm, callback, arg); } } else if (me == rank) { _STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node); int already_sent = starpu_mpi_cached_send_set(data_handle, node); if (already_sent == 0) { _STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node); return starpu_mpi_isend_detached(data_handle, node, data_tag, comm, NULL, NULL); } } return 0; } int starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node) { int me, rank; starpu_mpi_tag_t data_tag; rank = starpu_mpi_data_get_rank(data_handle); if (rank == -1) { _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n"); } starpu_mpi_comm_rank(comm, &me); if (node == rank) return 0; data_tag = starpu_mpi_data_get_tag(data_handle); if (data_tag == -1) { _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); } if (me == node) { MPI_Status status; _STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node); int already_received = starpu_mpi_cached_receive_set(data_handle); if (already_received == 0) { _STARPU_MPI_DEBUG(1, "Receiving data %p from %d\n", data_handle, rank); return starpu_mpi_recv(data_handle, rank, data_tag, comm, &status); } } else if (me == rank) { _STARPU_MPI_DEBUG(1, "Migrating data %p from %d to %d\n", data_handle, rank, node); int already_sent = starpu_mpi_cached_send_set(data_handle, node); if (already_sent == 0) { _STARPU_MPI_DEBUG(1, "Sending data %p to %d\n", data_handle, node); return starpu_mpi_send(data_handle, node, data_tag, comm); } } return 0; } void starpu_mpi_get_data_on_all_nodes_detached(MPI_Comm comm, starpu_data_handle_t data_handle) { int size, i; starpu_mpi_comm_size(comm, &size); for (i = 0; i < size; i++) starpu_mpi_get_data_on_node_detached(comm, data_handle, i, NULL, NULL); } void starpu_mpi_data_migrate(MPI_Comm comm, starpu_data_handle_t data, int new_rank) { int old_rank = starpu_mpi_data_get_rank(data); if (new_rank == old_rank) /* Already there */ return; /* First submit data migration if it's not already on destination */ starpu_mpi_get_data_on_node_detached(comm, data, new_rank, NULL, NULL); /* And note new owner */ starpu_mpi_data_set_rank_comm(data, new_rank, comm); /* Flush cache in all other nodes */ /* TODO: Ideally we'd transmit the knowledge of who owns it */ /* TODO: or at least remember that the previous owner has the data, that's an easy case to support */ starpu_mpi_cache_flush(comm, data); return; } int starpu_mpi_wait_for_all(MPI_Comm comm) { /* If the user forgets to call mpi_redux_data or insert R tasks on the reduced handles */ /* then, we wrap reduction patterns for them. This is typical of benchmarks */ _starpu_mpi_redux_wrapup_data_all(); return _mpi_backend._starpu_mpi_backend_wait_for_all(comm); } void starpu_mpi_comm_stats_disable() { _starpu_mpi_comm_stats_disable(); } void starpu_mpi_comm_stats_enable() { _starpu_mpi_comm_stats_enable(); } int _starpu_mpi_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg, int priority) { int src, dst; int ret; if (dst_handle == src_handle) { if (callback_func) callback_func(callback_arg); return 0; } ret = 0; src = starpu_mpi_data_get_rank(src_handle); dst = starpu_mpi_data_get_rank(dst_handle); if (src == dst) // Both data are on the same node, no need to transfer data ret = starpu_data_cpy_priority(dst_handle, src_handle, asynchronous, callback_func, callback_arg, priority); else { // We need to transfer data int rank; starpu_mpi_tag_t tag; tag = starpu_mpi_data_get_tag(dst_handle); starpu_mpi_comm_rank(comm, &rank); if (rank == src) { if (asynchronous == 1) ret = starpu_mpi_isend_detached_prio(src_handle, dst, tag, priority, comm, NULL, NULL); else ret = starpu_mpi_send_prio(src_handle, dst, tag, priority, comm); } else if (rank == dst) { if (asynchronous == 1) ret = starpu_mpi_irecv_detached_prio(dst_handle, src, tag, priority, comm, callback_func, callback_arg); else { ret = starpu_mpi_recv_prio(dst_handle, src, tag, priority, comm, MPI_STATUS_IGNORE); if (callback_func) callback_func(callback_arg); } return ret; } } starpu_mpi_cache_flush(comm, dst_handle); return ret; } int starpu_mpi_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg) { return _starpu_mpi_data_cpy(dst_handle, src_handle, comm, asynchronous, callback_func, callback_arg, STARPU_DEFAULT_PRIO); } int starpu_mpi_data_cpy_priority(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, MPI_Comm comm, int asynchronous, void (*callback_func)(void *), void *callback_arg, int priority) { return _starpu_mpi_data_cpy(dst_handle, src_handle, comm, asynchronous, callback_func, callback_arg, priority); } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_cache.c000066400000000000000000000305411507764646700210410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include /* Whether we are allowed to keep copies of remote data. */ struct _starpu_data_entry { UT_hash_handle hh; starpu_data_handle_t data_handle; }; static starpu_pthread_mutex_t _cache_mutex; static struct _starpu_data_entry *_cache_data = NULL; int _starpu_cache_enabled=1; static MPI_Comm _starpu_cache_comm; static int _starpu_cache_comm_size; static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle); int starpu_mpi_cache_is_enabled() { return _starpu_cache_enabled==1; } int starpu_mpi_cache_set(int enabled) { if (enabled == 1) { _starpu_cache_enabled = 1; } else { if (_starpu_cache_enabled) { // We need to clean the cache starpu_mpi_cache_flush_all_data(_starpu_cache_comm); _starpu_mpi_cache_shutdown(); } _starpu_cache_enabled = 0; } return 0; } void _starpu_mpi_cache_init(MPI_Comm comm) { _starpu_cache_enabled = starpu_getenv_number("STARPU_MPI_CACHE"); if (_starpu_cache_enabled == -1) { _starpu_cache_enabled = 1; } if (_starpu_cache_enabled == 0) { _STARPU_DISP("Warning: StarPU MPI Communication cache is disabled\n"); return; } _starpu_cache_comm = comm; starpu_mpi_comm_size(comm, &_starpu_cache_comm_size); _starpu_mpi_cache_stats_init(); STARPU_PTHREAD_MUTEX_INIT(&_cache_mutex, NULL); } void _starpu_mpi_cache_shutdown(void) { if (_starpu_cache_enabled == 0) return; struct _starpu_data_entry *entry=NULL, *tmp=NULL; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); HASH_ITER(hh, _cache_data, entry, tmp) { HASH_DEL(_cache_data, entry); free(entry); } STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); STARPU_PTHREAD_MUTEX_DESTROY(&_cache_mutex); _starpu_mpi_cache_stats_shutdown(); } void _starpu_mpi_cache_data_clear(starpu_data_handle_t data_handle) { struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (_starpu_cache_enabled == 1) { struct _starpu_data_entry *entry; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); _starpu_mpi_cache_flush_nolock(data_handle); HASH_FIND_PTR(_cache_data, &data_handle, entry); if (entry != NULL) { HASH_DEL(_cache_data, entry); free(entry); } STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); } free(mpi_data->cache_sent); } void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle) { int i; struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (_starpu_cache_enabled == 0) return; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); mpi_data->cache_received = 0; mpi_data->ft_induced_cache_received = 0; mpi_data->ft_induced_cache_received_count = 0; _STARPU_MALLOC(mpi_data->cache_sent, _starpu_cache_comm_size*sizeof(mpi_data->cache_sent[0])); for(i=0 ; i<_starpu_cache_comm_size ; i++) { mpi_data->cache_sent[i] = 0; } STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); } static void _starpu_mpi_cache_data_add_nolock(starpu_data_handle_t data_handle) { struct _starpu_data_entry *entry; if (_starpu_cache_enabled == 0) return; HASH_FIND_PTR(_cache_data, &data_handle, entry); if (entry == NULL) { _STARPU_MPI_MALLOC(entry, sizeof(*entry)); entry->data_handle = data_handle; HASH_ADD_PTR(_cache_data, data_handle, entry); } } static void _starpu_mpi_cache_data_remove_nolock(starpu_data_handle_t data_handle) { struct _starpu_data_entry *entry; if (_starpu_cache_enabled == 0) return; HASH_FIND_PTR(_cache_data, &data_handle, entry); if (entry) { HASH_DEL(_cache_data, entry); free(entry); } } /************************************** * Received cache **************************************/ void starpu_mpi_cached_receive_clear(starpu_data_handle_t data_handle) { int mpi_rank = starpu_mpi_data_get_rank(data_handle); struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (_starpu_cache_enabled == 0) return; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); STARPU_ASSERT(mpi_data->magic == 42); STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size); if (mpi_data->cache_received == 1) { #ifdef STARPU_DEVEL # warning TODO: Somebody else will write to the data, so discard our cached copy if any. starpu_mpi could just remember itself. #endif _STARPU_MPI_DEBUG(2, "Clearing receive cache for data %p\n", data_handle); mpi_data->cache_received = 0; mpi_data->ft_induced_cache_received = 0; mpi_data->ft_induced_cache_received_count = 0; starpu_data_invalidate_submit(data_handle); _starpu_mpi_cache_data_remove_nolock(data_handle); _starpu_mpi_cache_stats_dec(mpi_rank, data_handle); } STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); } int starpu_mpi_cached_receive_set(starpu_data_handle_t data_handle) { int mpi_rank = starpu_mpi_data_get_rank(data_handle); struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (_starpu_cache_enabled == 0) return 0; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); STARPU_ASSERT(mpi_data->magic == 42); STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size); int already_received = mpi_data->cache_received; if (already_received == 0) { _STARPU_MPI_DEBUG(2, "Noting that data %p has already been received by %d\n", data_handle, mpi_rank); mpi_data->cache_received = 1; _starpu_mpi_cache_data_add_nolock(data_handle); _starpu_mpi_cache_stats_inc(mpi_rank, data_handle); } else { #ifdef STARPU_USE_MPI_FT_STATS if (mpi_data->ft_induced_cache_received == 1 && mpi_data->ft_induced_cache_received_count == 0) { _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(starpu_data_get_size(data_handle)); _STARPU_MPI_FT_STATS_CANCEL_RECV_CP_DATA(starpu_data_get_size(data_handle)); mpi_data->ft_induced_cache_received_count = 1; } #endif //STARPU_USE_MPI_FT_STATS _STARPU_MPI_DEBUG(2, "Do not receive data %p from node %d as it is already available\n", data_handle, mpi_rank); } STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); return already_received; } int starpu_mpi_cached_cp_receive_set(starpu_data_handle_t data_handle) { int mpi_rank = starpu_mpi_data_get_rank(data_handle); struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (_starpu_cache_enabled == 0) return 0; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); STARPU_ASSERT(mpi_data->magic == 42); STARPU_MPI_ASSERT_MSG(mpi_rank < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", mpi_rank, _starpu_cache_comm_size); int already_received = mpi_data->cache_received; if (already_received == 0) { _STARPU_MPI_DEBUG(2, "Noting that data %p has already been received by %d\n", data_handle, mpi_rank); mpi_data->cache_received = 1; mpi_data->ft_induced_cache_received = 1; #ifdef STARPU_USE_MPI_FT_STATS _STARPU_MPI_FT_STATS_RECV_CP_DATA(starpu_data_get_size(data_handle)); #endif _starpu_mpi_cache_data_add_nolock(data_handle); _starpu_mpi_cache_stats_inc(mpi_rank, data_handle); } else { #ifdef STARPU_USE_MPI_FT_STATS if (mpi_data->ft_induced_cache_received == 1) _STARPU_MPI_FT_STATS_RECV_CP_CACHED_CP_DATA(starpu_data_get_size(data_handle)); else _STARPU_MPI_FT_STATS_RECV_CACHED_CP_DATA(starpu_data_get_size(data_handle)); #endif _STARPU_MPI_DEBUG(2, "Do not receive data %p from node %d as it is already available\n", data_handle, mpi_rank); } STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); return already_received; } int starpu_mpi_cached_receive(starpu_data_handle_t data_handle) { int already_received; struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (_starpu_cache_enabled == 0) return 0; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); STARPU_ASSERT(mpi_data->magic == 42); already_received = mpi_data->cache_received; STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); return already_received; } /************************************** * Send cache **************************************/ void starpu_mpi_cached_send_clear(starpu_data_handle_t data_handle) { int n, size; struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (_starpu_cache_enabled == 0) return; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); starpu_mpi_comm_size(mpi_data->node_tag.node.comm, &size); for(n=0 ; ncache_sent[n] == 1) { _STARPU_MPI_DEBUG(2, "Clearing send cache for data %p\n", data_handle); mpi_data->cache_sent[n] = 0; _starpu_mpi_cache_data_remove_nolock(data_handle); } } STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); } int starpu_mpi_cached_send_set(starpu_data_handle_t data_handle, int dest) { struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (_starpu_cache_enabled == 0) return 0; STARPU_MPI_ASSERT_MSG(dest < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", dest, _starpu_cache_comm_size); STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); int already_sent = mpi_data->cache_sent[dest]; if (mpi_data->cache_sent[dest] == 0) { mpi_data->cache_sent[dest] = 1; _starpu_mpi_cache_data_add_nolock(data_handle); _STARPU_MPI_DEBUG(2, "Noting that data %p has already been sent to %d\n", data_handle, dest); } else { _STARPU_MPI_DEBUG(2, "Do not send data %p to node %d as it has already been sent\n", data_handle, dest); } STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); return already_sent; } int starpu_mpi_cached_send(starpu_data_handle_t data_handle, int dest) { struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; int already_sent; if (_starpu_cache_enabled == 0) return 0; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); STARPU_MPI_ASSERT_MSG(dest < _starpu_cache_comm_size, "Node %d invalid. Max node is %d\n", dest, _starpu_cache_comm_size); already_sent = mpi_data->cache_sent[dest]; STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); return already_sent; } static void _starpu_mpi_cache_flush_nolock(starpu_data_handle_t data_handle) { struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; int i, nb_nodes; if (_starpu_cache_enabled == 0) return; starpu_mpi_comm_size(mpi_data->node_tag.node.comm, &nb_nodes); for(i=0 ; icache_sent[i] == 1) { _STARPU_MPI_DEBUG(2, "Clearing send cache for data %p\n", data_handle); mpi_data->cache_sent[i] = 0; _starpu_mpi_cache_stats_dec(i, data_handle); } } if (mpi_data->cache_received == 1) { int mpi_rank = starpu_mpi_data_get_rank(data_handle); _STARPU_MPI_DEBUG(2, "Clearing received cache for data %p\n", data_handle); mpi_data->cache_received = 0; mpi_data->ft_induced_cache_received = 0; mpi_data->ft_induced_cache_received_count = 0; _starpu_mpi_cache_stats_dec(mpi_rank, data_handle); } } static void _starpu_mpi_cache_flush_and_invalidate_nolock(MPI_Comm comm, starpu_data_handle_t data_handle) { int my_rank, mpi_rank; _starpu_mpi_cache_flush_nolock(data_handle); starpu_mpi_comm_rank(comm, &my_rank); mpi_rank = starpu_mpi_data_get_rank(data_handle); if (mpi_rank != my_rank && mpi_rank != -1) // Clean the memory on nodes which do not own the data starpu_data_invalidate_submit(data_handle); } void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle) { _starpu_mpi_data_flush(data_handle); if (_starpu_cache_enabled == 0) return; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); _starpu_mpi_cache_flush_and_invalidate_nolock(comm, data_handle); _starpu_mpi_cache_data_remove_nolock(data_handle); STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); } void starpu_mpi_cache_flush_all_data(MPI_Comm comm) { struct _starpu_data_entry *entry=NULL, *tmp=NULL; if (_starpu_cache_enabled == 0) return; STARPU_PTHREAD_MUTEX_LOCK(&_cache_mutex); HASH_ITER(hh, _cache_data, entry, tmp) { _starpu_mpi_cache_flush_and_invalidate_nolock(comm, entry->data_handle); HASH_DEL(_cache_data, entry); free(entry); } STARPU_PTHREAD_MUTEX_UNLOCK(&_cache_mutex); } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_cache.h000066400000000000000000000022331507764646700210430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_CACHE_H__ #define __STARPU_MPI_CACHE_H__ #include #include #include /** @file */ #ifdef __cplusplus extern "C" { #endif extern int _starpu_cache_enabled; void _starpu_mpi_cache_init(MPI_Comm comm); void _starpu_mpi_cache_shutdown(void); void _starpu_mpi_cache_data_init(starpu_data_handle_t data_handle); void _starpu_mpi_cache_data_clear(starpu_data_handle_t data_handle); #ifdef __cplusplus } #endif #endif // __STARPU_MPI_CACHE_H__ starpu-1.4.9+dfsg/mpi/src/starpu_mpi_cache_stats.c000066400000000000000000000031131507764646700222520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include static int stats_enabled=0; void _starpu_mpi_cache_stats_init() { stats_enabled = starpu_getenv_number("STARPU_MPI_CACHE_STATS"); if (stats_enabled == -1) { stats_enabled = 0; } if (stats_enabled == 0) return; _STARPU_DISP("Warning: StarPU is executed with STARPU_MPI_CACHE_STATS=1, which slows down a bit\n"); } void _starpu_mpi_cache_stats_shutdown() { if (stats_enabled == 0) return; } void _starpu_mpi_cache_stats_update(unsigned dst, starpu_data_handle_t data_handle, int count) { size_t size; if (stats_enabled == 0) return; size = starpu_data_get_size(data_handle); if (count == 1) { _STARPU_MPI_MSG("[communication cache] + %10ld to %u\n", (long)size, dst); } else // count == -1 { _STARPU_MPI_MSG("[communication cache] - %10ld from %u\n", (long)size, dst); } } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_cache_stats.h000066400000000000000000000024651507764646700222700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_CACHE_STATS_H__ #define __STARPU_MPI_CACHE_STATS_H__ #include #include #include /** @file */ #ifdef __cplusplus extern "C" { #endif void _starpu_mpi_cache_stats_init(); void _starpu_mpi_cache_stats_shutdown(); void _starpu_mpi_cache_stats_update(unsigned dst, starpu_data_handle_t data_handle, int count); #define _starpu_mpi_cache_stats_inc(dst, data_handle) _starpu_mpi_cache_stats_update(dst, data_handle, +1) #define _starpu_mpi_cache_stats_dec(dst, data_handle) _starpu_mpi_cache_stats_update(dst, data_handle, -1) #ifdef __cplusplus } #endif #endif // __STARPU_MPI_CACHE_STATS_H__ starpu-1.4.9+dfsg/mpi/src/starpu_mpi_collective.c000066400000000000000000000114341507764646700221270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include struct _callback_arg { void (*callback)(void *); void *arg; int nb; int count; }; static void _callback_collective(void *arg) { struct _callback_arg *callback_arg = arg; callback_arg->nb ++; if (callback_arg->nb == callback_arg->count) { callback_arg->callback(callback_arg->arg); free(callback_arg); } } static int _callback_set(int rank, starpu_data_handle_t *data_handles, int count, int root, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg, void (**callback_func)(void *), struct _callback_arg **callback_arg) { void (*callback)(void *); callback = (rank == root) ? scallback : rcallback; if (*callback) { int x; *callback_func = _callback_collective; _STARPU_MPI_MALLOC(*callback_arg, sizeof(struct _callback_arg)); (*callback_arg)->count = 0; (*callback_arg)->nb = 0; (*callback_arg)->callback = (rank == root) ? scallback : rcallback; (*callback_arg)->arg = (rank == root) ? sarg : rarg; for(x = 0; x < count ; x++) { if (data_handles[x]) { int owner = starpu_mpi_data_get_rank(data_handles[x]); starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data_handles[x]); STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); if ((rank == root) && (owner != root)) { (*callback_arg)->count ++; } if ((rank != root) && (owner == rank)) { (*callback_arg)->count ++; } } } if (!(*callback_arg)->count) { free(*callback_arg); return 1; } } return 0; } int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) { int rank; int x; struct _callback_arg *callback_arg = NULL; void (*callback_func)(void *) = NULL; starpu_mpi_comm_rank(comm, &rank); x = _callback_set(rank, data_handles, count, root, scallback, sarg, rcallback, rarg, &callback_func, &callback_arg); if (x == 1) return 0; for(x = 0; x < count ; x++) { if (data_handles[x]) { int ret; int owner = starpu_mpi_data_get_rank(data_handles[x]); starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data_handles[x]); STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); if ((rank == root) && (owner != root)) { //fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, owner); ret = starpu_mpi_isend_detached(data_handles[x], owner, data_tag, comm, callback_func, callback_arg); if (ret) return ret; } if ((rank != root) && (owner == rank)) { //fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, root); ret = starpu_mpi_irecv_detached(data_handles[x], root, data_tag, comm, callback_func, callback_arg); if (ret) return ret; } } } return 0; } int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) { int rank; int x; struct _callback_arg *callback_arg = NULL; void (*callback_func)(void *) = NULL; starpu_mpi_comm_rank(comm, &rank); x = _callback_set(rank, data_handles, count, root, scallback, sarg, rcallback, rarg, &callback_func, &callback_arg); if (x == 1) return 0; for(x = 0; x < count ; x++) { if (data_handles[x]) { int ret; int owner = starpu_mpi_data_get_rank(data_handles[x]); starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data_handles[x]); STARPU_ASSERT_MSG(data_tag >= 0, "Invalid tag for data handle"); if ((rank == root) && (owner != root)) { //fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, owner); ret = starpu_mpi_irecv_detached(data_handles[x], owner, data_tag, comm, callback_func, callback_arg); if (ret) return ret; } if ((rank != root) && (owner == rank)) { //fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, root); ret = starpu_mpi_isend_detached(data_handles[x], root, data_tag, comm, callback_func, callback_arg); if (ret) return ret; } } } return 0; } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_coop_sends.c000066400000000000000000000305741507764646700221400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include /* * One node sends the same data to several nodes. Gather them into a * "coop_sends", which then has a global view of all the required sends, and can * establish a diffusion tree by telling receiving nodes to retransmit what they * received (forwards) to others, and to others that they will receive from the * former (redirects). */ /* This is called after a request is finished processing, to release the data */ void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req) { if (!req->data_handle) return; if (_starpu_mpi_req_multilist_queued_coop_sends(req)) { struct _starpu_mpi_coop_sends *coop_sends = req->coop_sends_head; assert(coop_sends != NULL); struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data; int last; _starpu_spin_lock(&mpi_data->coop_lock); /* Part of a cooperative send, dequeue ourself from others */ _starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req); last = _starpu_mpi_req_multilist_empty_coop_sends(&coop_sends->reqs); _starpu_spin_unlock(&mpi_data->coop_lock); if (last) { /* We were last, release data */ free(coop_sends->reqs_array); free(coop_sends); starpu_data_release_on_node(req->data_handle, req->node); } } else { /* Trivial request */ starpu_data_release_on_node(req->data_handle, req->node); } } /* The data was acquired in terms of dependencies, we can now look the * current state of the handle and decide which node we prefer for the data * fetch */ static void _starpu_mpi_coop_send_acquired_callback(void *arg, int *nodep, enum starpu_data_access_mode mode) { struct _starpu_mpi_coop_sends *coop_sends = arg; int node = *nodep; if (node < 0) node = _starpu_mpi_choose_node(coop_sends->data_handle, mode); /* Record the node in the first req */ _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs)->node = node; *nodep = node; } /* Comparison function for getting qsort to put requests with high priority first */ static int _starpu_mpi_reqs_prio_compare(const void *a, const void *b) { const struct _starpu_mpi_req * const *ra = a; const struct _starpu_mpi_req * const *rb = b; if ((*rb)->prio < (*ra)->prio) return -1; else if ((*rb)->prio == (*ra)->prio) return 0; else return 1; } /* Sort the requests by priority and build a diffusion tree. Actually does something only once per coop_sends bag. */ static void _starpu_mpi_coop_sends_optimize(struct _starpu_mpi_coop_sends *coop_sends) { STARPU_ASSERT(coop_sends->n > 1); _starpu_spin_lock(&coop_sends->lock); if (!coop_sends->reqs_array) { unsigned n = coop_sends->n, i; struct _starpu_mpi_req *cur; struct _starpu_mpi_req **reqs; _STARPU_MPI_DEBUG(0, "handling cooperative sends %p for %u neighbours\n", coop_sends, n); /* Store them in an array */ _STARPU_CALLOC(reqs, n, sizeof(*reqs)); for (cur = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs), i = 0; cur != _starpu_mpi_req_multilist_end_coop_sends(&coop_sends->reqs); cur = _starpu_mpi_req_multilist_next_coop_sends(cur), i++) reqs[i] = cur; coop_sends->reqs_array = reqs; /* Sort them */ qsort(reqs, n, sizeof(*reqs), _starpu_mpi_reqs_prio_compare); #if 0 /* And build the diffusion tree */ _starpu_mpi_coop_sends_build_tree(coop_sends); #endif } _starpu_spin_unlock(&coop_sends->lock); } /* This is called on completion of acquisition of data for a cooperative send */ static void _starpu_mpi_coop_sends_data_ready(void *arg) { _STARPU_MPI_LOG_IN(); struct _starpu_mpi_coop_sends *coop_sends = arg; struct _starpu_mpi_data *mpi_data = coop_sends->mpi_data; struct _starpu_mpi_req *cur; unsigned node; /* Take the cooperative send bag out from more submissions */ if (mpi_data->coop_sends == coop_sends) { _starpu_spin_lock(&mpi_data->coop_lock); if (mpi_data->coop_sends == coop_sends) mpi_data->coop_sends = NULL; _starpu_spin_unlock(&mpi_data->coop_lock); } /* Copy over the memory node number */ cur = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs); node = cur->node; for (; cur != _starpu_mpi_req_multilist_end_coop_sends(&coop_sends->reqs); cur = _starpu_mpi_req_multilist_next_coop_sends(cur)) { cur->node = node; cur->pre_sync_jobid = coop_sends->pre_sync_jobid; // for tracing purposes } if (coop_sends->n == 1) { /* Trivial case, just submit it */ _starpu_mpi_submit_ready_request(_starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs)); } else { /* Build diffusion tree */ _starpu_mpi_coop_sends_optimize(coop_sends); /* And submit them */ if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0) { mpi_data->nb_future_sends = 0; _starpu_mpi_submit_coop_sends(coop_sends, 1, 1); } else _starpu_mpi_submit_coop_sends(coop_sends, 0, 1); } _STARPU_MPI_LOG_OUT(); } /* This is called when we want to stop including new members in a cooperative send, * either because we know there won't be any other members due to the algorithm * or because the value has changed. */ static void _starpu_mpi_coop_send_flush(struct _starpu_mpi_coop_sends *coop_sends) { if (!coop_sends || coop_sends->n == 1) return; /* Build diffusion tree */ _starpu_mpi_coop_sends_optimize(coop_sends); /* And submit them */ if (STARPU_TEST_AND_SET(&coop_sends->redirects_sent, 1) == 0) _starpu_mpi_submit_coop_sends(coop_sends, 1, 0); } /* This is called when a write to the data was just submitted, which means we * can't make future sends cooperate with past sends since it's not the same value */ void _starpu_mpi_data_flush(starpu_data_handle_t data_handle) { struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; struct _starpu_mpi_coop_sends *coop_sends; if (!mpi_data) return; _starpu_spin_lock(&mpi_data->coop_lock); coop_sends = mpi_data->coop_sends; if (coop_sends) mpi_data->coop_sends = NULL; _starpu_spin_unlock(&mpi_data->coop_lock); if (coop_sends) { _STARPU_MPI_DEBUG(0, "%p: data written to, flush cooperative sends %p\n", data_handle, coop_sends); _starpu_mpi_coop_send_flush(coop_sends); } } /* Test whether a request is compatible with a cooperative send */ static int _starpu_mpi_coop_send_compatible(struct _starpu_mpi_req *req, struct _starpu_mpi_coop_sends *coop_sends) { if (!_starpu_cache_enabled) { /* If MPI cache isn't enabled, duplicates can appear in the list * of recipients. * Presence of duplicates can lead to deadlocks, so if adding * this req request to the coop_sends will introduce * duplicates, we consider this req as incompatible. * * This a requirement coming from the NewMadeleine * implementation. If one day, there is a MPI implementation, * this constraint might move to the NewMadeleine backend. * * See mpi/tests/coop_cache.c for a test case. */ int inserting_dest = req->node_tag.node.rank; struct _starpu_mpi_req* cur = NULL; for (cur = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs); cur != _starpu_mpi_req_multilist_end_coop_sends(&coop_sends->reqs); cur = _starpu_mpi_req_multilist_next_coop_sends(cur)) { if (cur->node_tag.node.rank == inserting_dest) { return 0; } } } struct _starpu_mpi_req *prevreq = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs); return /* we can cope with tag being different */ prevreq->node_tag.node.comm == req->node_tag.node.comm && prevreq->sequential_consistency == req->sequential_consistency; } void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency) { struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle); struct _starpu_mpi_coop_sends *coop_sends = NULL, *tofree = NULL; int done = 0, queue, first = 1; /* Try to add ourself to something existing, otherwise create one. */ while (!done) { _starpu_spin_lock(&mpi_data->coop_lock); if (mpi_data->coop_sends) { /* Already something, check we are coherent with it */ queue = _starpu_mpi_coop_send_compatible(req, mpi_data->coop_sends); if (queue) { /* Yes, queue ourself there */ if (coop_sends) { /* Remove ourself from what we created for ourself first */ /* Note 2022-09-21: according to code coverage(see * https://files.inria.fr/starpu/testing/master/coverage/mpi/src/starpu_mpi_coop_sends.c.gcov.html), * this block is dead code. */ _starpu_mpi_req_multilist_erase_coop_sends(&coop_sends->reqs, req); tofree = coop_sends; } coop_sends = mpi_data->coop_sends; _STARPU_MPI_DEBUG(0, "%p: add to cooperative sends %p, dest %d\n", data_handle, coop_sends, req->node_tag.node.rank); /* Get the pre_sync_jobid of the first send request, to build a coherent DAG in the traces: */ struct _starpu_mpi_req *firstreq; firstreq = _starpu_mpi_req_multilist_begin_coop_sends(&coop_sends->reqs); req->pre_sync_jobid = firstreq->pre_sync_jobid; _starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req); coop_sends->n++; req->coop_sends_head = coop_sends; first = 0; done = 1; } else { /* Nope, incompatible, send it as a regular point-to-point communication * * TODO: this could be improved by having several coop_sends "bags" available * simultaneously, which will trigger different broadcasts. */ _starpu_spin_unlock(&mpi_data->coop_lock); _starpu_mpi_isend_irecv_common(req, mode, sequential_consistency); return; } } else if (coop_sends) { /* Nobody else and we have allocated one, we're first! */ _STARPU_MPI_DEBUG(0, "%p: new cooperative sends %p for tag %"PRIi64", dest %d\n", data_handle, coop_sends, req->node_tag.data_tag, req->node_tag.node.rank); mpi_data->coop_sends = coop_sends; first = 1; done = 1; } _starpu_spin_unlock(&mpi_data->coop_lock); if (!done && !coop_sends) { /* Didn't find something to join, create one out of critical section */ _STARPU_MPI_CALLOC(coop_sends, 1, sizeof(*coop_sends)); coop_sends->data_handle = data_handle; coop_sends->redirects_sent = 0; coop_sends->n = 1; _starpu_mpi_req_multilist_head_init_coop_sends(&coop_sends->reqs); _starpu_mpi_req_multilist_push_back_coop_sends(&coop_sends->reqs, req); _starpu_spin_init(&coop_sends->lock); req->coop_sends_head = coop_sends; coop_sends->mpi_data = mpi_data; } /* We at worse do two iteration */ STARPU_ASSERT(done || coop_sends); } STARPU_ASSERT(coop_sends); /* In case we created one for nothing after all */ free(tofree); if ((mpi_data->nb_future_sends != 0 && mpi_data->nb_future_sends == coop_sends->n) || (mpi_data->nb_future_sends == 0 && first)) /* We were first, we are responsible for acquiring the data for everybody */ starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(req->data_handle, -1, mode, _starpu_mpi_coop_send_acquired_callback, _starpu_mpi_coop_sends_data_ready, coop_sends, sequential_consistency, 0, &coop_sends->pre_sync_jobid, NULL, req->prio); else req->pre_sync_jobid = coop_sends->pre_sync_jobid; } void starpu_mpi_coop_sends_data_handle_nb_sends(starpu_data_handle_t data_handle, int nb_sends) { struct _starpu_mpi_data *mpi_data = _starpu_mpi_data_get(data_handle); /* Has no effect is coops are disabled: this attribute is used only in * _starpu_mpi_coop_send() that is called only if coops are enabled */ mpi_data->nb_future_sends = nb_sends; } void starpu_mpi_coop_sends_set_use(int use_coop_sends) { if (starpu_mpi_world_size() <= 2) { _STARPU_DISP("Not enough MPI processes to use coop_sends\n"); return; } _starpu_mpi_use_coop_sends = use_coop_sends; } int starpu_mpi_coop_sends_get_use(void) { return _starpu_mpi_use_coop_sends; } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_datatype.c000066400000000000000000000406701507764646700216150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include struct _starpu_mpi_datatype_funcs { enum starpu_data_interface_id id; starpu_mpi_datatype_allocate_func_t allocate_datatype_func; starpu_mpi_datatype_node_allocate_func_t allocate_datatype_node_func; starpu_mpi_datatype_free_func_t free_datatype_func; UT_hash_handle hh; }; /* We want to allow applications calling starpu_mpi_interface_datatype_register/unregister as constructor/destructor */ static starpu_pthread_mutex_t _starpu_mpi_datatype_funcs_table_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static struct _starpu_mpi_datatype_funcs *_starpu_mpi_datatype_funcs_table = NULL; void _starpu_mpi_datatype_init(void) { } void _starpu_mpi_datatype_shutdown(void) { } /* * Matrix */ static int handle_to_datatype_matrix(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) { struct starpu_matrix_interface *matrix_interface = starpu_data_get_interface_on_node(data_handle, node); int ret; unsigned nx = STARPU_MATRIX_GET_NX(matrix_interface); unsigned ny = STARPU_MATRIX_GET_NY(matrix_interface); unsigned ld = STARPU_MATRIX_GET_LD(matrix_interface); size_t elemsize = STARPU_MATRIX_GET_ELEMSIZE(matrix_interface); ret = MPI_Type_vector(ny, nx*elemsize, ld*elemsize, MPI_BYTE, datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed"); ret = MPI_Type_commit(datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); return 0; } /* * Block */ static int handle_to_datatype_block(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) { struct starpu_block_interface *block_interface = starpu_data_get_interface_on_node(data_handle, node); int ret; unsigned nx = STARPU_BLOCK_GET_NX(block_interface); unsigned ny = STARPU_BLOCK_GET_NY(block_interface); unsigned nz = STARPU_BLOCK_GET_NZ(block_interface); unsigned ldy = STARPU_BLOCK_GET_LDY(block_interface); unsigned ldz = STARPU_BLOCK_GET_LDZ(block_interface); size_t elemsize = STARPU_BLOCK_GET_ELEMSIZE(block_interface); MPI_Datatype datatype_2dlayer; ret = MPI_Type_vector(ny, nx*elemsize, ldy*elemsize, MPI_BYTE, &datatype_2dlayer); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed"); ret = MPI_Type_create_hvector(nz, 1, ldz*elemsize, datatype_2dlayer, datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed"); ret = MPI_Type_commit(datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); ret = MPI_Type_free(&datatype_2dlayer); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); return 0; } /* * Tensor */ static int handle_to_datatype_tensor(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) { struct starpu_tensor_interface *tensor_interface = starpu_data_get_interface_on_node(data_handle, node); int ret; unsigned nx = STARPU_TENSOR_GET_NX(tensor_interface); unsigned ny = STARPU_TENSOR_GET_NY(tensor_interface); unsigned nz = STARPU_TENSOR_GET_NZ(tensor_interface); unsigned nt = STARPU_TENSOR_GET_NT(tensor_interface); unsigned ldy = STARPU_TENSOR_GET_LDY(tensor_interface); unsigned ldz = STARPU_TENSOR_GET_LDZ(tensor_interface); unsigned ldt = STARPU_TENSOR_GET_LDT(tensor_interface); size_t elemsize = STARPU_TENSOR_GET_ELEMSIZE(tensor_interface); MPI_Datatype datatype_3dlayer; ret = MPI_Type_vector(ny, nx*elemsize, ldy*elemsize, MPI_BYTE, &datatype_3dlayer); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed"); MPI_Datatype datatype_2dlayer; ret = MPI_Type_create_hvector(nz, 1, ldz*elemsize, datatype_3dlayer, &datatype_2dlayer); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed"); ret = MPI_Type_create_hvector(nt, 1, ldt*elemsize, datatype_2dlayer, datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed"); ret = MPI_Type_commit(datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); ret = MPI_Type_free(&datatype_3dlayer); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); ret = MPI_Type_free(&datatype_2dlayer); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); return 0; } /* * Ndim */ static int handle_to_datatype_ndim(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) { struct starpu_ndim_interface *ndim_interface = starpu_data_get_interface_on_node(data_handle, node); int ret; unsigned *nn = STARPU_NDIM_GET_NN(ndim_interface); unsigned *ldn = STARPU_NDIM_GET_LDN(ndim_interface); size_t ndim = STARPU_NDIM_GET_NDIM(ndim_interface); size_t elemsize = STARPU_NDIM_GET_ELEMSIZE(ndim_interface); if (ndim > 1) { MPI_Datatype datatype_ndlayer; ret = MPI_Type_vector(nn[1], nn[0]*elemsize, ldn[1]*elemsize, MPI_BYTE, &datatype_ndlayer); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed"); MPI_Datatype oldtype = datatype_ndlayer, newtype; unsigned i; for (i = 2; i < ndim; i++) { ret = MPI_Type_create_hvector(nn[i], 1, ldn[i]*elemsize, oldtype, &newtype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed"); ret = MPI_Type_free(&oldtype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); oldtype = newtype; } *datatype = oldtype; } else if (ndim == 1) { ret = MPI_Type_contiguous(nn[0]*elemsize, MPI_BYTE, datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); } ret = MPI_Type_commit(datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); return 0; } /* * Vector */ static int handle_to_datatype_vector(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) { struct starpu_vector_interface *vector_interface = starpu_data_get_interface_on_node(data_handle, node); int ret; unsigned nx = STARPU_VECTOR_GET_NX(vector_interface); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(vector_interface); ret = MPI_Type_contiguous(nx*elemsize, MPI_BYTE, datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); ret = MPI_Type_commit(datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); return 0; } /* * Variable */ static int handle_to_datatype_variable(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) { struct starpu_variable_interface *variable_interface = starpu_data_get_interface_on_node(data_handle, node); int ret; size_t elemsize = STARPU_VARIABLE_GET_ELEMSIZE(variable_interface); ret = MPI_Type_contiguous(elemsize, MPI_BYTE, datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); ret = MPI_Type_commit(datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); return 0; } /* * Void */ static int handle_to_datatype_void(starpu_data_handle_t data_handle, unsigned node, MPI_Datatype *datatype) { int ret; (void)data_handle; (void)node; ret = MPI_Type_contiguous(0, MPI_BYTE, datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); ret = MPI_Type_commit(datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); return 0; } /* * Generic */ static starpu_mpi_datatype_node_allocate_func_t handle_to_datatype_funcs[STARPU_MAX_INTERFACE_ID] = { //#define DYNAMIC_MATRICES #ifndef DYNAMIC_MATRICES [STARPU_MATRIX_INTERFACE_ID] = handle_to_datatype_matrix, #endif [STARPU_BLOCK_INTERFACE_ID] = handle_to_datatype_block, [STARPU_TENSOR_INTERFACE_ID] = handle_to_datatype_tensor, [STARPU_NDIM_INTERFACE_ID] = handle_to_datatype_ndim, [STARPU_VECTOR_INTERFACE_ID] = handle_to_datatype_vector, [STARPU_CSR_INTERFACE_ID] = NULL, /* Sent through pack/unpack operations */ [STARPU_BCSR_INTERFACE_ID] = NULL, /* Sent through pack/unpack operations */ [STARPU_VARIABLE_INTERFACE_ID] = handle_to_datatype_variable, [STARPU_VOID_INTERFACE_ID] = handle_to_datatype_void, [STARPU_MULTIFORMAT_INTERFACE_ID] = NULL, }; MPI_Datatype _starpu_mpi_datatype_get_user_defined_datatype(starpu_data_handle_t data_handle, unsigned node) { enum starpu_data_interface_id id = starpu_data_get_interface_id(data_handle); if (id < STARPU_MAX_INTERFACE_ID) return 0; struct _starpu_mpi_datatype_funcs *table; STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); if (table && (table->allocate_datatype_node_func || table->allocate_datatype_func)) { MPI_Datatype datatype; int ret; if (table->allocate_datatype_node_func) ret = table->allocate_datatype_node_func(data_handle, node, &datatype); else ret = table->allocate_datatype_func(data_handle, &datatype); if (ret == 0) return datatype; else return 0; } return 0; } void _starpu_mpi_datatype_allocate(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req) { enum starpu_data_interface_id id = starpu_data_get_interface_id(data_handle); if (id < STARPU_MAX_INTERFACE_ID) { starpu_mpi_datatype_node_allocate_func_t func = handle_to_datatype_funcs[id]; if (func) { func(data_handle, req->node, &req->datatype); req->registered_datatype = 1; } else { /* The datatype is predefined by StarPU but it will be sent as a memory area */ req->datatype = MPI_BYTE; req->registered_datatype = 0; } } else { struct _starpu_mpi_datatype_funcs *table; STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); if (table) { STARPU_ASSERT_MSG(table->allocate_datatype_node_func || table->allocate_datatype_func, "Handle To Datatype Function not defined for StarPU data interface %d", id); int ret; if (table->allocate_datatype_node_func) ret = table->allocate_datatype_node_func(data_handle, req->node, &req->datatype); else ret = table->allocate_datatype_func(data_handle, &req->datatype); if (ret == 0) req->registered_datatype = 1; else { /* Couldn't register, probably complex data which needs packing. */ req->datatype = MPI_BYTE; req->registered_datatype = 0; } } else { /* The datatype is not predefined by StarPU */ req->datatype = MPI_BYTE; req->registered_datatype = 0; } } #ifdef STARPU_VERBOSE { char datatype_name[MPI_MAX_OBJECT_NAME]; int datatype_name_len; MPI_Type_get_name(req->datatype, datatype_name, &datatype_name_len); if (datatype_name_len == 0) req->datatype_name = strdup("User defined datatype"); else req->datatype_name = strdup(datatype_name); } #endif } static void _starpu_mpi_handle_free_simple_datatype(MPI_Datatype *datatype) { int ret = MPI_Type_free(datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); } static starpu_mpi_datatype_free_func_t handle_free_datatype_funcs[STARPU_MAX_INTERFACE_ID] = { #ifndef DYNAMIC_MATRICES [STARPU_MATRIX_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, #endif [STARPU_BLOCK_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, [STARPU_TENSOR_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, [STARPU_VECTOR_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, [STARPU_NDIM_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, [STARPU_CSR_INTERFACE_ID] = NULL, /* Sent through pack/unpack operations */ [STARPU_BCSR_INTERFACE_ID] = NULL, /* Sent through pack/unpack operations */ [STARPU_VARIABLE_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, [STARPU_VOID_INTERFACE_ID] = _starpu_mpi_handle_free_simple_datatype, [STARPU_MULTIFORMAT_INTERFACE_ID] = NULL, }; void _starpu_mpi_datatype_free(starpu_data_handle_t data_handle, MPI_Datatype *datatype) { enum starpu_data_interface_id id = starpu_data_get_interface_id(data_handle); if (id < STARPU_MAX_INTERFACE_ID) { starpu_mpi_datatype_free_func_t func = handle_free_datatype_funcs[id]; if (func) func(datatype); } else { struct _starpu_mpi_datatype_funcs *table; STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); if (table) { STARPU_ASSERT_MSG(table->free_datatype_func, "Free Datatype Function not defined for StarPU data interface %d", id); if (*datatype != MPI_BYTE) table->free_datatype_func(datatype); } } /* else the datatype is not predefined by StarPU */ } int _starpu_mpi_interface_datatype_register(enum starpu_data_interface_id id, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_node_func, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func) { struct _starpu_mpi_datatype_funcs *table; STARPU_ASSERT_MSG(id >= STARPU_MAX_INTERFACE_ID, "Cannot redefine the MPI datatype for a predefined StarPU datatype"); STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); if (table) { table->allocate_datatype_node_func = allocate_datatype_node_func; table->allocate_datatype_func = allocate_datatype_func; table->free_datatype_func = free_datatype_func; } else { _STARPU_MPI_MALLOC(table, sizeof(struct _starpu_mpi_datatype_funcs)); table->id = id; table->allocate_datatype_node_func = allocate_datatype_node_func; table->allocate_datatype_func = allocate_datatype_func; table->free_datatype_func = free_datatype_func; HASH_ADD_INT(_starpu_mpi_datatype_funcs_table, id, table); } STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); return 0; } int starpu_mpi_interface_datatype_node_register(enum starpu_data_interface_id id, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_node_func, starpu_mpi_datatype_free_func_t free_datatype_func) { return _starpu_mpi_interface_datatype_register(id, allocate_datatype_node_func, NULL, free_datatype_func); } int starpu_mpi_interface_datatype_register(enum starpu_data_interface_id id, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func) { return _starpu_mpi_interface_datatype_register(id, NULL, allocate_datatype_func, free_datatype_func); } int starpu_mpi_datatype_node_register(starpu_data_handle_t handle, starpu_mpi_datatype_node_allocate_func_t allocate_datatype_node_func, starpu_mpi_datatype_free_func_t free_datatype_func) { enum starpu_data_interface_id id = starpu_data_get_interface_id(handle); int ret; ret = starpu_mpi_interface_datatype_node_register(id, allocate_datatype_node_func, free_datatype_func); STARPU_ASSERT_MSG(handle->ops->handle_to_pointer || handle->ops->to_pointer, "The data interface must define the operation 'to_pointer'\n"); return ret; } int starpu_mpi_datatype_register(starpu_data_handle_t handle, starpu_mpi_datatype_allocate_func_t allocate_datatype_func, starpu_mpi_datatype_free_func_t free_datatype_func) { enum starpu_data_interface_id id = starpu_data_get_interface_id(handle); int ret; ret = starpu_mpi_interface_datatype_register(id, allocate_datatype_func, free_datatype_func); STARPU_ASSERT_MSG(handle->ops->handle_to_pointer || handle->ops->to_pointer, "The data interface must define the operation 'to_pointer'\n"); return ret; } int starpu_mpi_interface_datatype_unregister(enum starpu_data_interface_id id) { struct _starpu_mpi_datatype_funcs *table; STARPU_ASSERT_MSG(id >= STARPU_MAX_INTERFACE_ID, "Cannot redefine the MPI datatype for a predefined StarPU datatype"); STARPU_PTHREAD_MUTEX_LOCK(&_starpu_mpi_datatype_funcs_table_mutex); HASH_FIND_INT(_starpu_mpi_datatype_funcs_table, &id, table); if (table) { HASH_DEL(_starpu_mpi_datatype_funcs_table, table); free(table); } STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_mpi_datatype_funcs_table_mutex); return 0; } int starpu_mpi_datatype_unregister(starpu_data_handle_t handle) { enum starpu_data_interface_id id = starpu_data_get_interface_id(handle); return starpu_mpi_interface_datatype_unregister(id); } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_datatype.h000066400000000000000000000024421507764646700216150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_DATATYPE_H__ #define __STARPU_MPI_DATATYPE_H__ #include #include /** @file */ #ifdef __cplusplus extern "C" { #endif void _starpu_mpi_datatype_init(void); void _starpu_mpi_datatype_shutdown(void); void _starpu_mpi_datatype_allocate(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req); void _starpu_mpi_datatype_free(starpu_data_handle_t data_handle, MPI_Datatype *datatype); MPI_Datatype _starpu_mpi_datatype_get_user_defined_datatype(starpu_data_handle_t data_handle, unsigned node); #ifdef __cplusplus } #endif #endif // __STARPU_MPI_DATATYPE_H__ starpu-1.4.9+dfsg/mpi/src/starpu_mpi_fortran.c000066400000000000000000000263571507764646700214630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "starpu_mpi_private.h" #ifdef HAVE_MPI_COMM_F2C /* Fortran related functions */ struct _starpu_mpi_argc_argv *fstarpu_mpi_argcv_alloc(int argc, int initialize_mpi, int comm_present, MPI_Fint comm) { struct _starpu_mpi_argc_argv *argcv; _STARPU_MPI_CALLOC(argcv, 1,sizeof(*argcv)); argcv->initialize_mpi = initialize_mpi; if (comm_present) { argcv->comm = MPI_Comm_f2c(comm); } else { argcv->comm = MPI_COMM_WORLD; } argcv->fargc = argc; argcv->argc = &argcv->fargc; _STARPU_MPI_CALLOC(argcv->fargv, argc, sizeof(char *)); argcv->argv = &argcv->fargv; return argcv; } void fstarpu_mpi_argcv_set_arg(struct _starpu_mpi_argc_argv *argcv, int i, int len, char *_s) { STARPU_ASSERT(len >= 0); STARPU_ASSERT(i >= 0 && i < argcv->fargc); char *s; _STARPU_MPI_MALLOC(s, len+1); memcpy(s, _s, len); s[len] = '\0'; argcv->fargv[i] = s; } void fstarpu_mpi_argcv_free(struct _starpu_mpi_argc_argv *argcv) { if (argcv->fargv != NULL) { int i; for (i=0; ifargc; i++) { free(argcv->fargv[i]); } free(argcv->fargv); } free(argcv); } starpu_mpi_req *fstarpu_mpi_req_alloc(void) { void *ptr; _STARPU_MPI_CALLOC(ptr, 1, sizeof(starpu_mpi_req)); return ptr; } void fstarpu_mpi_req_free(starpu_mpi_req *req) { free(req); } MPI_Status *fstarpu_mpi_status_alloc(void) { void *ptr; _STARPU_MPI_CALLOC(ptr, 1, sizeof(MPI_Status)); return ptr; } void fstarpu_mpi_status_free(MPI_Status *status) { free(status); } int fstarpu_mpi_barrier(MPI_Fint comm) { return starpu_mpi_barrier(MPI_Comm_f2c(comm)); } int fstarpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg, int seq_const) { return starpu_mpi_irecv_detached_sequential_consistency(data_handle, src, data_tag, MPI_Comm_f2c(comm), callback, arg, seq_const); } int fstarpu_mpi_init_c(struct _starpu_mpi_argc_argv *argcv) { return starpu_mpi_init_comm(argcv->argc, argcv->argv, argcv->initialize_mpi, argcv->comm); } int fstarpu_mpi_get_data_on_node(MPI_Fint comm, starpu_data_handle_t data_handle, int node) { return starpu_mpi_get_data_on_node(MPI_Comm_f2c(comm), data_handle, node); } int fstarpu_mpi_get_data_on_node_detached(MPI_Fint comm, starpu_data_handle_t data_handle, int node, void (*callback)(void *), void *arg) { return starpu_mpi_get_data_on_node_detached(MPI_Comm_f2c(comm), data_handle, node, callback, arg); } int fstarpu_mpi_redux_data(MPI_Fint comm, starpu_data_handle_t data_handle) { return starpu_mpi_redux_data(MPI_Comm_f2c(comm), data_handle); } int fstarpu_mpi_redux_data_prio(MPI_Fint comm, starpu_data_handle_t data_handle, int prio) { return starpu_mpi_redux_data_prio(MPI_Comm_f2c(comm), data_handle, prio); } int fstarpu_mpi_redux_data_tree(MPI_Fint comm, starpu_data_handle_t data_handle, int arity) { return starpu_mpi_redux_data_tree(MPI_Comm_f2c(comm), data_handle, arity); } int fstarpu_mpi_redux_data_prio_tree(MPI_Fint comm, starpu_data_handle_t data_handle, int prio, int arity) { return starpu_mpi_redux_data_prio_tree(MPI_Comm_f2c(comm), data_handle, prio, arity); } /* scatter/gather */ int fstarpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) { return starpu_mpi_scatter_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg); } int fstarpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int cnt, int root, MPI_Fint comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg) { return starpu_mpi_gather_detached(data_handles, cnt, root, MPI_Comm_f2c(comm), scallback, sarg, rcallback, rarg); } /* isend/irecv detached unlock tag */ int fstarpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag) { return starpu_mpi_isend_detached_unlock_tag(data_handle, dst, data_tag, MPI_Comm_f2c(comm), *starpu_tag); } int fstarpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm, starpu_tag_t *starpu_tag) { return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), *starpu_tag); } int fstarpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, starpu_tag_t *starpu_tag) { return starpu_mpi_irecv_detached_unlock_tag(data_handle, src, data_tag, MPI_Comm_f2c(comm), *starpu_tag); } /* isend/irecv array detached unlock tag */ int fstarpu_mpi_isend_array_detached_unlock_tag_prio(int array_size, starpu_data_handle_t *data_handles, int *dsts, starpu_mpi_tag_t *data_tags, int *prio, MPI_Fint *_comms, starpu_tag_t *starpu_tag) { MPI_Comm comms[array_size]; int i; for (i = 0; i < array_size; i++) { comms[i] = MPI_Comm_f2c(_comms[i]); } int ret = starpu_mpi_isend_array_detached_unlock_tag_prio((unsigned)array_size, data_handles, dsts, data_tags, prio, comms, *starpu_tag); return ret; } int fstarpu_mpi_isend_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *dsts, starpu_mpi_tag_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag) { return fstarpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handles, dsts, data_tags, NULL, _comms, starpu_tag); } int fstarpu_mpi_irecv_array_detached_unlock_tag(int array_size, starpu_data_handle_t *data_handles, int *srcs, starpu_mpi_tag_t *data_tags, MPI_Fint *_comms, starpu_tag_t *starpu_tag) { MPI_Comm comms[array_size]; int i; for (i = 0; i < array_size; i++) { comms[i] = MPI_Comm_f2c(_comms[i]); } int ret = starpu_mpi_irecv_array_detached_unlock_tag((unsigned)array_size, data_handles, srcs, data_tags, comms, *starpu_tag); return ret; } /* isend/irecv */ int fstarpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm) { return starpu_mpi_isend(data_handle, req, dst, data_tag, MPI_Comm_f2c(comm)); } int fstarpu_mpi_isend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm) { return starpu_mpi_isend_prio(data_handle, req, dst, data_tag, prio, MPI_Comm_f2c(comm)); } int fstarpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm) { return starpu_mpi_irecv(data_handle, req, src, data_tag, MPI_Comm_f2c(comm)); } /* send/recv */ int fstarpu_mpi_send(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm) { return starpu_mpi_send(data_handle, dst, data_tag, MPI_Comm_f2c(comm)); } int fstarpu_mpi_send_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm) { return starpu_mpi_send_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm)); } int fstarpu_mpi_recv(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, MPI_Status *status) { return starpu_mpi_recv(data_handle, src, data_tag, MPI_Comm_f2c(comm), status); } /* isend/irecv detached */ int fstarpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg) { return starpu_mpi_isend_detached(data_handle, dst, data_tag, MPI_Comm_f2c(comm), callback, arg); } int fstarpu_mpi_isend_detached_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg) { return starpu_mpi_isend_detached_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), callback, arg); } int fstarpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int src, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg) { return starpu_mpi_irecv_detached(data_handle, src, data_tag, MPI_Comm_f2c(comm), callback, arg); } /* issend / issend detached */ int fstarpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm) { return starpu_mpi_issend(data_handle, req, dst, data_tag, MPI_Comm_f2c(comm)); } int fstarpu_mpi_issend_prio(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm) { return starpu_mpi_issend_prio(data_handle, req, dst, data_tag, prio, MPI_Comm_f2c(comm)); } int fstarpu_mpi_issend_detached(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, MPI_Fint comm, void (*callback)(void *), void *arg) { return starpu_mpi_issend_detached(data_handle, dst, data_tag, MPI_Comm_f2c(comm), callback, arg); } int fstarpu_mpi_issend_detached_prio(starpu_data_handle_t data_handle, int dst, starpu_mpi_tag_t data_tag, int prio, MPI_Fint comm, void (*callback)(void *), void *arg) { return starpu_mpi_issend_detached_prio(data_handle, dst, data_tag, prio, MPI_Comm_f2c(comm), callback, arg); } /* cache */ void fstarpu_mpi_cache_flush(MPI_Fint comm, starpu_data_handle_t data_handle) { return starpu_mpi_cache_flush(MPI_Comm_f2c(comm), data_handle); } void fstarpu_mpi_cache_flush_all_data(MPI_Fint comm) { return starpu_mpi_cache_flush_all_data(MPI_Comm_f2c(comm)); } int fstarpu_mpi_comm_size(MPI_Fint comm, int *size) { return starpu_mpi_comm_size(MPI_Comm_f2c(comm), size); } int fstarpu_mpi_comm_rank(MPI_Fint comm, int *rank) { return starpu_mpi_comm_rank(MPI_Comm_f2c(comm), rank); } MPI_Fint fstarpu_mpi_world_comm() { return MPI_Comm_c2f(MPI_COMM_WORLD); } void fstarpu_mpi_comm_stats_disable() { starpu_mpi_comm_stats_disable(); } void fstarpu_mpi_comm_stats_enable() { starpu_mpi_comm_stats_enable(); } void fstarpu_mpi_data_register_comm(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag, int rank, MPI_Fint comm) { return starpu_mpi_data_register_comm(handle, data_tag, rank, MPI_Comm_f2c(comm)); } void fstarpu_mpi_data_register(starpu_data_handle_t handle, starpu_mpi_tag_t data_tag, int rank) { return starpu_mpi_data_register_comm(handle, data_tag, rank, MPI_COMM_WORLD); } void fstarpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Fint comm) { return starpu_mpi_data_set_rank_comm(handle, rank, MPI_Comm_f2c(comm)); } void fstarpu_mpi_data_set_rank(starpu_data_handle_t handle, int rank) { return starpu_mpi_data_set_rank_comm(handle, rank, MPI_COMM_WORLD); } void fstarpu_mpi_data_migrate(MPI_Fint comm, starpu_data_handle_t handle, int rank) { return starpu_mpi_data_migrate(MPI_Comm_f2c(comm), handle, rank); } int fstarpu_mpi_wait_for_all(MPI_Fint comm) { return starpu_mpi_wait_for_all(MPI_Comm_f2c(comm)); } #endif starpu-1.4.9+dfsg/mpi/src/starpu_mpi_fxt.c000066400000000000000000000106641507764646700206030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef STARPU_HAVE_MPI_SYNC_CLOCKS #include static mpi_sync_clocks_t mpi_sync_clock; #endif static int fxt_random_number = -1; #if defined(STARPU_HAVE_MPI_SYNC_CLOCKS) && !defined(STARPU_SIMGRID) /* Use the same clock as the one used by mpi_sync_clocks */ uint64_t fut_getstamp(void) { sync_clocks_generic_tick_t tick; sync_clocks_generic_get_tick(tick); return (uint64_t) (sync_clocks_generic_tick2usec(tick)*1000.); } #endif static void _starpu_mpi_add_sync_point_in_fxt(void) { int rank, worldsize, ret; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); STARPU_ASSERT(worldsize > 1); ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier returning %s", _starpu_mpi_get_mpi_error_code(ret)); if (fxt_random_number == -1) // only for the first sync point { /* We generate a "unique" key so that we can make sure that different * FxT traces come from the same MPI run. */ if (rank == 0) fxt_random_number = time(NULL); _STARPU_MPI_DEBUG(3, "unique key %x\n", fxt_random_number); ret = MPI_Bcast(&fxt_random_number, 1, MPI_INT, 0, MPI_COMM_WORLD); STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Bcast returning %s", _starpu_mpi_get_mpi_error_code(ret)); } #ifdef STARPU_HAVE_MPI_SYNC_CLOCKS if (starpu_getenv_number("STARPU_MPI_TRACE_SYNC_CLOCKS") != 0) { mpi_sync_clocks_synchronize(mpi_sync_clock); double local_sync_time; mpi_sync_clocks_barrier(mpi_sync_clock, &local_sync_time); /* Even if with this synchronized barrier, all nodes are supposed to left * out the barrier exactly at the same time, we can't be sure, the * following event will be recorded at the same time on each MPI processes, * because this thread can be preempted between the end of the barrier and * the event record. That's why we need to store the local time when the * barrier was unlocked as an additional information of the event, we can't * rely on the timestamp of the event. */ _STARPU_MPI_TRACE_BARRIER(rank, worldsize, fxt_random_number, (mpi_sync_clocks_get_time_origin_usec(mpi_sync_clock) + local_sync_time) * 1000.); } else /* mpi_sync_synchronize() can be long (several seconds), one can prefer to use a less precise but faster method: */ #endif { ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_MPI_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier returning %s", _starpu_mpi_get_mpi_error_code(ret)); _STARPU_MPI_TRACE_BARRIER(rank, worldsize, fxt_random_number, 0); } } void _starpu_mpi_fxt_init(void* arg) { struct _starpu_mpi_argc_argv *argc_argv = (struct _starpu_mpi_argc_argv *) arg; if (_starpu_fxt_wait_initialisation()) { #ifdef STARPU_HAVE_MPI_SYNC_CLOCKS if (argc_argv->world_size > 1 && starpu_getenv_number("STARPU_MPI_TRACE_SYNC_CLOCKS") != 0) { mpi_sync_clock = mpi_sync_clocks_init(MPI_COMM_WORLD); } #endif /* We need to record our ID in the trace before the main thread makes any MPI call */ _STARPU_MPI_TRACE_START(argc_argv->rank, argc_argv->world_size); starpu_profiling_set_id(argc_argv->rank); _starpu_profiling_set_mpi_worldsize(argc_argv->world_size); if (argc_argv->world_size > 1) { _starpu_mpi_add_sync_point_in_fxt(); } } } void _starpu_mpi_fxt_shutdown() { if (starpu_fxt_is_enabled()) { int worldsize; starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); if (worldsize > 1) { /* We add a synchronization point at the end of the trace, * to be able to interpolate times, in order to correct * time drift. */ _starpu_mpi_add_sync_point_in_fxt(); #ifdef STARPU_HAVE_MPI_SYNC_CLOCKS if (starpu_getenv_number("STARPU_MPI_TRACE_SYNC_CLOCKS") != 0) { mpi_sync_clocks_shutdown(mpi_sync_clock); } #endif } } } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_fxt.h000066400000000000000000000276001507764646700206060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2019-2019 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_FXT_H__ #define __STARPU_MPI_FXT_H__ #include #include #include /** @file */ #ifdef __cplusplus extern "C" { #endif #define _STARPU_MPI_FUT_POINT_TO_POINT_SEND 0x100 #define _STARPU_MPI_FUT_COLLECTIVE_SEND 0x101 #define _STARPU_MPI_FUT_START 0x5201 #define _STARPU_MPI_FUT_STOP 0x5202 #define _STARPU_MPI_FUT_BARRIER 0x5203 #define _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN 0x5204 #define _STARPU_MPI_FUT_ISEND_SUBMIT_END 0x5205 #define _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN 0x5206 #define _STARPU_MPI_FUT_IRECV_SUBMIT_END 0x5207 #define _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN 0x5208 #define _STARPU_MPI_FUT_ISEND_COMPLETE_END 0x5209 #define _STARPU_MPI_FUT_DATA_SET_RANK 0x521a #define _STARPU_MPI_FUT_IRECV_TERMINATED 0x521b #define _STARPU_MPI_FUT_ISEND_TERMINATED 0x521c #define _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN 0x521d #define _STARPU_MPI_FUT_TESTING_DETACHED_END 0x521e #define _STARPU_MPI_FUT_TEST_BEGIN 0x521f #define _STARPU_MPI_FUT_TEST_END 0x5220 #define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN 0x520a #define _STARPU_MPI_FUT_IRECV_COMPLETE_END 0x520b #define _STARPU_MPI_FUT_SLEEP_BEGIN 0x520c #define _STARPU_MPI_FUT_SLEEP_END 0x520d #define _STARPU_MPI_FUT_DTESTING_BEGIN 0x520e #define _STARPU_MPI_FUT_DTESTING_END 0x520f #define _STARPU_MPI_FUT_UTESTING_BEGIN 0x5210 #define _STARPU_MPI_FUT_UTESTING_END 0x5211 #define _STARPU_MPI_FUT_UWAIT_BEGIN 0x5212 #define _STARPU_MPI_FUT_UWAIT_END 0x5213 #define _STARPU_MPI_FUT_POLLING_BEGIN 0x5214 #define _STARPU_MPI_FUT_POLLING_END 0x5215 #define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN 0x5216 #define _STARPU_MPI_FUT_DRIVER_RUN_END 0x5217 #define _STARPU_MPI_FUT_DATA_SET_TAG 0x5218 #define _STARPU_MPI_FUT_IRECV_NUMA_NODE 0x5219 #define _STARPU_MPI_FUT_ISEND_NUMA_NODE 0x5221 #define _STARPU_MPI_FUT_CHECKPOINT_BEGIN 0x5222 #define _STARPU_MPI_FUT_CHECKPOINT_END 0x5223 #ifdef STARPU_USE_FXT #define _STARPU_MPI_TRACE_START(rank, worldsize) \ FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_START, (rank), (worldsize), _starpu_gettid()); #define _STARPU_MPI_TRACE_STOP(rank, worldsize) \ FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_STOP, (rank), (worldsize), _starpu_gettid()); #define _STARPU_MPI_TRACE_BARRIER(rank, worldsize, key, local_time) do {\ if (_starpu_fxt_started) \ FUT_DO_ALWAYS_PROBE5(_STARPU_MPI_FUT_BARRIER, (rank), (worldsize), (key), (local_time), _starpu_gettid()); \ } while (0) #define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(dest, data_tag, size) \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN, (dest), (data_tag), (size), _starpu_gettid()); #define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(type, req, prio) \ FUT_FULL_PROBE8(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_SUBMIT_END, (type), (req)->node_tag.node.rank, (req)->node_tag.data_tag, starpu_data_get_size((req)->data_handle), (req)->pre_sync_jobid, (req)->data_handle, (prio), _starpu_gettid()); \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA, _STARPU_MPI_FUT_ISEND_NUMA_NODE, (req)->node_tag.node.rank, (req)->pre_sync_jobid, starpu_get_memory_location_bitmap((req)->ptr, starpu_data_get_size((req)->data_handle)), _starpu_gettid()); #define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(src, data_tag) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN, (src), (data_tag), _starpu_gettid()); #define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(src, data_tag) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_SUBMIT_END, (src), (data_tag), _starpu_gettid()); #define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, data_tag, size) \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN, (dest), (data_tag), (size), _starpu_gettid()); #define _STARPU_MPI_TRACE_COMPLETE_BEGIN(type, rank, data_tag) \ if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN((rank), (data_tag), 0); } #define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, data_tag, size) \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_COMPLETE_END, (dest), (data_tag), (size), _starpu_gettid()); #define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(src, data_tag) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN, (src), (data_tag), _starpu_gettid()); #define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(src, data_tag) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_COMPLETE_END, (src), (data_tag), _starpu_gettid()); #define _STARPU_MPI_TRACE_COMPLETE_END(type, rank, data_tag) \ if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_END((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_END((rank), (data_tag), 0); } #define _STARPU_MPI_TRACE_TERMINATED(req) \ if ((req)->request_type == RECV_REQ) { \ FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_TERMINATED, (req)->node_tag.node.rank, (req)->node_tag.data_tag, (req)->post_sync_jobid, _starpu_gettid(), (req)->data_handle); \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA, _STARPU_MPI_FUT_IRECV_NUMA_NODE, (req)->node_tag.node.rank, (req)->post_sync_jobid, starpu_get_memory_location_bitmap((req)->ptr, starpu_data_get_size((req)->data_handle)), _starpu_gettid()); \ } else \ if ((req)->request_type == SEND_REQ) FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_TERMINATED, (req)->node_tag.node.rank, (req)->node_tag.data_tag, _starpu_gettid()); #define _STARPU_MPI_TRACE_SLEEP_BEGIN() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_SLEEP_BEGIN, _starpu_gettid()); #define _STARPU_MPI_TRACE_SLEEP_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_SLEEP_END, _starpu_gettid()); #define _STARPU_MPI_TRACE_DTESTING_BEGIN() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DTESTING_BEGIN, _starpu_gettid()); #define _STARPU_MPI_TRACE_DTESTING_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DTESTING_END, _starpu_gettid()); #define _STARPU_MPI_TRACE_UTESTING_BEGIN(src, data_tag) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UTESTING_BEGIN, (src), (data_tag), _starpu_gettid()); #define _STARPU_MPI_TRACE_UTESTING_END(src, data_tag) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UTESTING_END, (src), (data_tag), _starpu_gettid()); #define _STARPU_MPI_TRACE_UWAIT_BEGIN(src, data_tag) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UWAIT_BEGIN, (src), (data_tag), _starpu_gettid()); #define _STARPU_MPI_TRACE_UWAIT_END(src, data_tag) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UWAIT_END, (src), (data_tag), _starpu_gettid()); #define _STARPU_MPI_TRACE_DATA_SET_RANK(handle, rank) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DATA_SET_RANK, (handle), (rank), _starpu_gettid()); #define _STARPU_MPI_TRACE_DATA_SET_TAG(handle, data_tag) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DATA_SET_TAG, (handle), (data_tag), _starpu_gettid()); #if 0 /* This is very expensive in the trace, only enable for debugging */ #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() \ FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_BEGIN, _starpu_gettid()); #define _STARPU_MPI_TRACE_TESTING_DETACHED_END() \ FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_END, _starpu_gettid()); #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) \ FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_BEGIN, (peer), (data_tag), _starpu_gettid()); #define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) \ FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_END, (peer), (data_tag), _starpu_gettid()); #else #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() do {} while(0) #define _STARPU_MPI_TRACE_TESTING_DETACHED_END() do {} while(0) #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) do {} while(0) #define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) do {} while(0) #endif #define _STARPU_MPI_TRACE_POLLING_BEGIN() \ if(!trace_loop) { \ trace_loop = 1; \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_POLLING_BEGIN, _starpu_gettid()); \ } #define _STARPU_MPI_TRACE_POLLING_END() \ if(trace_loop) { \ trace_loop = 0; \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_POLLING_END, _starpu_gettid()); \ } #define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DRIVER_RUN_BEGIN, _starpu_gettid()); #define _STARPU_MPI_TRACE_DRIVER_RUN_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DRIVER_RUN_END, _starpu_gettid()); #define _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(cp_instance, cp_domain) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_EVENT, _STARPU_MPI_FUT_CHECKPOINT_BEGIN, (cp_instance), (cp_domain), _starpu_gettid()); #define _STARPU_MPI_TRACE_CHECKPOINT_END(cp_instance, cp_domain) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_EVENT, _STARPU_MPI_FUT_CHECKPOINT_END, (cp_instance), (cp_domain), _starpu_gettid()); #define TRACE #else #define _STARPU_MPI_TRACE_START(a, b) do {} while(0); #define _STARPU_MPI_TRACE_STOP(a, b) do {} while(0); #define _STARPU_MPI_TRACE_BARRIER(a, b, c, d) do {} while(0); #define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(a, b, c) do {} while(0); #define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(a, b, c) do {} while(0); #define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(a, b) do {} while(0); #define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(a, b) do {} while(0); #define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(a, b, c) do {} while(0); #define _STARPU_MPI_TRACE_COMPLETE_BEGIN(a, b, c) do {} while(0); #define _STARPU_MPI_TRACE_COMPLETE_END(a, b, c) do {} while(0); #define _STARPU_MPI_TRACE_TERMINATED(a) do {} while(0); #define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(a, b, c) do {} while(0); #define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(a, b) do {} while(0); #define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(a, b) do {} while(0); #define _STARPU_MPI_TRACE_SLEEP_BEGIN() do {} while(0); #define _STARPU_MPI_TRACE_SLEEP_END() do {} while(0); #define _STARPU_MPI_TRACE_DTESTING_BEGIN() do {} while(0); #define _STARPU_MPI_TRACE_DTESTING_END() do {} while(0); #define _STARPU_MPI_TRACE_UTESTING_BEGIN(a, b) do {} while(0); #define _STARPU_MPI_TRACE_UTESTING_END(a, b) do {} while(0); #define _STARPU_MPI_TRACE_UWAIT_BEGIN(a, b) do {} while(0); #define _STARPU_MPI_TRACE_UWAIT_END(a, b) do {} while(0); #define _STARPU_MPI_TRACE_DATA_SET_RANK(a, b) do {} while(0); #define _STARPU_MPI_TRACE_DATA_SET_TAG(a, b) do {} while(0); #define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() do {} while(0) #define _STARPU_MPI_TRACE_TESTING_DETACHED_END() do {} while(0) #define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) do {} while(0) #define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) do {} while(0) #define _STARPU_MPI_TRACE_POLLING_BEGIN() do {} while(0); #define _STARPU_MPI_TRACE_POLLING_END() do {} while(0); #define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN() do {} while(0); #define _STARPU_MPI_TRACE_DRIVER_RUN_END() do {} while(0); #define _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(cp_instance, cp_domain) do {} while(0) #define _STARPU_MPI_TRACE_CHECKPOINT_END(cp_instance, cp_domain) do {} while(0) #endif void _starpu_mpi_fxt_init(void* arg); void _starpu_mpi_fxt_shutdown(); #ifdef __cplusplus } #endif #endif // __STARPU_MPI_FXT_H__ starpu-1.4.9+dfsg/mpi/src/starpu_mpi_helper.c000066400000000000000000000071541507764646700212610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static void starpu_mpi_unlock_tag_callback(void *arg) { starpu_tag_t *tagptr = arg; starpu_tag_notify_from_apps(*tagptr); free(tagptr); } int starpu_mpi_isend_detached_unlock_tag_prio(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, int prio, MPI_Comm comm, starpu_tag_t tag) { starpu_tag_t *tagptr; _STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t)); *tagptr = tag; return starpu_mpi_isend_detached_prio(data_handle, dest, data_tag, prio, comm, starpu_mpi_unlock_tag_callback, tagptr); } int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag) { return starpu_mpi_isend_detached_unlock_tag_prio(data_handle, dest, data_tag, 0, comm, tag); } int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, starpu_tag_t tag) { starpu_tag_t *tagptr; _STARPU_MPI_MALLOC(tagptr, sizeof(starpu_tag_t)); *tagptr = tag; return starpu_mpi_irecv_detached(data_handle, source, data_tag, comm, starpu_mpi_unlock_tag_callback, tagptr); } struct arg_array { int array_size; starpu_tag_t tag; }; static void starpu_mpi_array_unlock_callback(void *_arg) { struct arg_array *arg = _arg; int remaining = STARPU_ATOMIC_ADD(&arg->array_size, -1); if (remaining == 0) { starpu_tag_notify_from_apps(arg->tag); free(arg); } } int starpu_mpi_isend_array_detached_unlock_tag_prio(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, int *prio, MPI_Comm *comm, starpu_tag_t tag) { int ret; if (!array_size) return 0; struct arg_array *arg; _STARPU_MPI_MALLOC(arg, sizeof(struct arg_array)); arg->array_size = array_size; arg->tag = tag; unsigned elem; for (elem = 0; elem < array_size; elem++) { int p = 0; if (prio) p = prio[elem]; ret = starpu_mpi_isend_detached_prio(data_handle[elem], dest[elem], data_tag[elem], p, comm[elem], starpu_mpi_array_unlock_callback, arg); if (ret) return ret; } return 0; } int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag) { return starpu_mpi_isend_array_detached_unlock_tag_prio(array_size, data_handle, dest, data_tag, NULL, comm, tag); } int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, starpu_mpi_tag_t *data_tag, MPI_Comm *comm, starpu_tag_t tag) { if (!array_size) return 0; int ret; struct arg_array *arg; _STARPU_MPI_MALLOC(arg, sizeof(struct arg_array)); arg->array_size = array_size; arg->tag = tag; unsigned elem; for (elem = 0; elem < array_size; elem++) { ret = starpu_mpi_irecv_detached(data_handle[elem], source[elem], data_tag[elem], comm[elem], starpu_mpi_array_unlock_callback, arg); if (ret) return ret; } return 0; } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_init.c000066400000000000000000000340271507764646700207440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_HAVE_MPI_EXT #include #endif #ifdef STARPU_SIMGRID static int _mpi_world_size; static int _mpi_world_rank; #endif static int _mpi_initialized_starpu; static int _starpu_mpi_gpudirect; /* Whether GPU direct was explicitly requested (1) or disabled (0), or should be enabled if available (-1) */ int _starpu_mpi_has_cuda; /* Whether GPU direct is available */ int _starpu_mpi_psm2; /* Whether MPI has PSM2 or not. Useful when using old intel compilers, for which psm2 detection is buggy */ int _starpu_mpi_cuda_devid = -1; /* Which device GPU direct is enabled for (-1 = all) */ static void _starpu_mpi_print_thread_level_support(int thread_level, char *msg) { const char *level = NULL; switch (thread_level) { case MPI_THREAD_SERIALIZED: { level = "MPI_THREAD_SERIALIZED"; _STARPU_DISP("MPI%s %s; Multiple threads may make MPI calls, but only one at a time.\n", msg, level); break; } case MPI_THREAD_FUNNELED: { level = "MPI_THREAD_FUNNELED"; _STARPU_DISP("MPI%s %s; The application can safely make calls to StarPU-MPI functions, but should not call directly MPI communication functions.\n", msg, level); break; } case MPI_THREAD_SINGLE: { level = "MPI_THREAD_SINGLE"; _STARPU_DISP("MPI%s %s; MPI does not have multi-thread support, this might cause problems. The application can make calls to StarPU-MPI functions, but not call directly MPI Communication functions.\n", msg, level); break; } case MPI_THREAD_MULTIPLE: /* no problem */ break; } if (thread_level != MPI_THREAD_MULTIPLE && _starpu_mpi_thread_multiple_send) { _STARPU_DISP("STARPU_MPI_THREAD_MULTIPLE_SEND requested but MPI%s %s, disabling STARPU_MPI_THREAD_MULTIPLE_SEND\n", msg, level); _starpu_mpi_thread_multiple_send = 0; } } void _starpu_mpi_do_initialize(struct _starpu_mpi_argc_argv *argc_argv) { #ifdef STARPU_USE_CUDA if (_starpu_mpi_gpudirect != 0 && starpu_cuda_worker_get_count() > 0) { /* Some GPUDirect implementations (e.g. psm2) want cudaSetDevice to be called before MPI_Init */ int cuda_worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0); STARPU_ASSERT(cuda_worker >= 0); int devid = starpu_worker_get_devid(cuda_worker); STARPU_ASSERT(devid >= 0); cudaSetDevice(devid); } #endif if (argc_argv->initialize_mpi) { STARPU_ASSERT_MSG(argc_argv->comm == MPI_COMM_WORLD, "It does not make sense to ask StarPU-MPI to initialize MPI while a non-world communicator was given"); int thread_support; _STARPU_DEBUG("Calling MPI_Init_thread\n"); if (MPI_Init_thread(argc_argv->argc, argc_argv->argv, _starpu_mpi_thread_multiple_send ? MPI_THREAD_MULTIPLE : MPI_THREAD_SERIALIZED, &thread_support) != MPI_SUCCESS) { _STARPU_ERROR("MPI_Init_thread failed\n"); } _starpu_mpi_print_thread_level_support(thread_support, "_Init_thread level ="); } else { int provided; MPI_Query_thread(&provided); _starpu_mpi_print_thread_level_support(provided, " has been initialized with"); } // automatically register the given communicator starpu_mpi_comm_register(argc_argv->comm); if (argc_argv->comm != MPI_COMM_WORLD) starpu_mpi_comm_register(MPI_COMM_WORLD); MPI_Comm_rank(argc_argv->comm, &argc_argv->rank); MPI_Comm_size(argc_argv->comm, &argc_argv->world_size); MPI_Comm_set_errhandler(argc_argv->comm, MPI_ERRORS_RETURN); #ifdef STARPU_USE_CUDA #ifdef MPIX_CUDA_AWARE_SUPPORT if (MPIX_Query_cuda_support()) _starpu_mpi_has_cuda = 1; else if (_starpu_mpi_gpudirect > 0) _STARPU_DISP("Warning: MPI GPUDirect requested, but MPIX_Query_cuda_support reports that it is not supported.\n"); _STARPU_DEBUG("MPI has CUDA: %d\n", _starpu_mpi_has_cuda); if (!_starpu_mpi_gpudirect) { _STARPU_DEBUG("But disabled by user\n"); _starpu_mpi_has_cuda = 0; } if (_starpu_mpi_has_cuda && _starpu_mpi_psm2) { #pragma weak psm2_init extern int psm2_init(int *major, int *minor); if (psm2_init && starpu_cuda_worker_get_count() > 1) { int cuda_worker = starpu_worker_get_by_type(STARPU_CUDA_WORKER, 0); _starpu_mpi_cuda_devid = starpu_worker_get_devid(cuda_worker); _STARPU_DISP("Warning: MPI GPUDirect is enabled using the PSM2 driver, but StarPU will be driving several CUDA GPUs.\n"); _STARPU_DISP("Since the PSM2 driver only supports one CUDA GPU at a time for GPU Direct (at least as of its version 11.2.185), StarPU-MPI will use GPU Direct only for CUDA%d.\n", _starpu_mpi_cuda_devid); _STARPU_DISP("To get GPU Direct working with all CUDA GPUs with the PSM2 driver, you will unfortunately have to run one MPI rank per GPU.\n"); _STARPU_DISP("if you are sure you are not actually using PSM2, you can set STARPU_MPI_PSM2=0 to disable PSM2 detection.\n"); } } #else if (_starpu_mpi_gpudirect > 0) _STARPU_DISP("Warning: MPI GPUDirect requested, but the MPIX_Query_cuda_support function is not provided by the MPI Implementation, did you compile it with CUDA support and the Cuda MPI extension?\n"); _STARPU_DEBUG("No CUDA support in MPI\n"); #endif #endif #ifdef STARPU_SIMGRID _mpi_world_size = argc_argv->world_size; _mpi_world_rank = argc_argv->rank; #endif } static void _starpu_mpi_backend_check() { STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_init != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_shutdown != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_reserve_core != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_request_init != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_request_fill != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_request_destroy != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_data_clear != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_data_register != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_comm_register != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_progress_init != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_progress_shutdown != NULL); #ifdef STARPU_SIMGRID STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_wait_for_initialization != NULL); #endif STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_barrier != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_wait_for_all != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_wait != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_test != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_isend_size_func != NULL); STARPU_ASSERT(_mpi_backend._starpu_mpi_backend_irecv_size_func != NULL); } static int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm) { struct _starpu_mpi_argc_argv *argc_argv; _STARPU_MALLOC(argc_argv, sizeof(struct _starpu_mpi_argc_argv)); argc_argv->initialize_mpi = initialize_mpi; argc_argv->argc = argc; argc_argv->argv = argv; argc_argv->comm = comm; _starpu_implicit_data_deps_write_hook(_starpu_mpi_data_flush); _starpu_mpi_backend_check(); _starpu_mpi_gpudirect = starpu_getenv_number("STARPU_MPI_GPUDIRECT"); _starpu_mpi_psm2 = starpu_getenv_number_default("STARPU_MPI_PSM2", 1); #ifdef STARPU_SIMGRID /* Call MPI_Init_thread as early as possible, to initialize simgrid * before working with mutexes etc. */ _starpu_mpi_do_initialize(argc_argv); #endif int ret = _mpi_backend._starpu_mpi_backend_progress_init(argc_argv); if (starpu_getenv_number_default("STARPU_DISPLAY_BINDINGS", 0)) { int rank, size, i; char hostname[65]; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); gethostname(hostname, sizeof(hostname)); /* We make a barrier between each node calling hwloc-ps, to avoid mixing * outputs in stdout. */ for (i = 0; i < size; i++) { starpu_mpi_barrier(MPI_COMM_WORLD); if (rank == i) { fprintf(stdout, "== Binding for rank %d on node %s ==\n", rank, hostname); starpu_display_bindings(); fflush(stdout); } } starpu_mpi_barrier(MPI_COMM_WORLD); if (rank == 0) { fprintf(stdout, "== End of bindings ==\n"); fflush(stdout); } } return ret; } #ifdef STARPU_SIMGRID /* This is called before application's main, to initialize SMPI before we can * create MSG processes to run application's main */ int _starpu_mpi_simgrid_init(int argc, char *argv[]) { return _starpu_mpi_initialize(&argc, &argv, 1, MPI_COMM_WORLD); } #endif int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm) { #ifdef STARPU_SIMGRID (void)argc; (void)argv; (void)initialize_mpi; (void)comm; _mpi_backend._starpu_mpi_backend_wait_for_initialization(); return 0; #else return _starpu_mpi_initialize(argc, argv, initialize_mpi, comm); #endif } int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi) { return starpu_mpi_init_comm(argc, argv, initialize_mpi, MPI_COMM_WORLD); } int starpu_mpi_initialize(void) { #ifdef STARPU_SIMGRID return 0; #else return _starpu_mpi_initialize(NULL, NULL, 0, MPI_COMM_WORLD); #endif } int starpu_mpi_initialize_extended(int *rank, int *world_size) { #ifdef STARPU_SIMGRID *world_size = _mpi_world_size; *rank = _mpi_world_rank; return 0; #else int ret; ret = _starpu_mpi_initialize(NULL, NULL, 1, MPI_COMM_WORLD); if (ret == 0) { starpu_mpi_comm_rank(MPI_COMM_WORLD, rank); starpu_mpi_comm_size(MPI_COMM_WORLD, world_size); } return ret; #endif } int starpu_mpi_init_conf(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm, struct starpu_conf *conf) { struct starpu_conf localconf; if (!conf) { starpu_conf_init(&localconf); conf = &localconf; } _mpi_backend._starpu_mpi_backend_init(conf); /* Reserve a core only if required by the backend and if STARPU_NCPU isn't provided */ int mpi_thread_cpuid = starpu_getenv_number_default("STARPU_MPI_THREAD_CPUID", -1); int mpi_thread_coreid = starpu_getenv_number_default("STARPU_MPI_THREAD_COREID", -1); if (mpi_thread_cpuid < 0 && mpi_thread_coreid < 0 && _mpi_backend._starpu_mpi_backend_reserve_core() && conf->ncpus == -1) { /* Reserve a core for our progression thread */ if (conf->reserve_ncpus == -1) conf->reserve_ncpus = 1; else conf->reserve_ncpus++; } conf->will_use_mpi = 1; int ret = starpu_init(conf); if (ret < 0) return ret; _mpi_initialized_starpu = 1; return starpu_mpi_init_comm(argc, argv, initialize_mpi, comm); } int starpu_mpi_shutdown(void) { return starpu_mpi_shutdown_comm(MPI_COMM_WORLD); } struct comm_size_entry { UT_hash_handle hh; MPI_Comm comm; int size; int rank; }; static struct comm_size_entry *registered_comms = NULL; int starpu_mpi_shutdown_comm(MPI_Comm comm) { void *value; int rank, world_size; /* Make sure we do not have MPI communications pending in the task graph * before shutting down MPI */ starpu_mpi_wait_for_all(comm); /* We need to get the rank before calling MPI_Finalize to pass to _starpu_mpi_comm_amounts_display() */ starpu_mpi_comm_rank(comm, &rank); starpu_mpi_comm_size(comm, &world_size); /* kill the progression thread */ _mpi_backend._starpu_mpi_backend_progress_shutdown(&value); #ifdef STARPU_USE_FXT if (starpu_fxt_is_enabled()) { _STARPU_MPI_TRACE_STOP(rank, world_size); } #endif // STARPU_USE_FXT _starpu_mpi_comm_amounts_display(stderr, rank); _starpu_mpi_comm_amounts_shutdown(); _starpu_mpi_cache_shutdown(); _mpi_backend._starpu_mpi_backend_shutdown(); struct comm_size_entry *entry=NULL, *tmp=NULL; HASH_ITER(hh, registered_comms, entry, tmp) { HASH_DEL(registered_comms, entry); free(entry); } if (_mpi_initialized_starpu) starpu_shutdown(); return 0; } int starpu_mpi_comm_register(MPI_Comm comm) { struct comm_size_entry *entry; _STARPU_MPI_MALLOC(entry, sizeof(*entry)); entry->comm = comm; MPI_Comm_size(entry->comm, &(entry->size)); MPI_Comm_rank(entry->comm, &(entry->rank)); HASH_ADD(hh, registered_comms, comm, sizeof(entry->comm), entry); return 0; } int starpu_mpi_comm_size(MPI_Comm comm, int *size) { if (_starpu_mpi_fake_world_size != -1) { *size = _starpu_mpi_fake_world_size; return 0; } #ifdef STARPU_SIMGRID STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now"); *size = _mpi_world_size; return 0; #else struct comm_size_entry *entry; HASH_FIND(hh, registered_comms, &comm, sizeof(entry->comm), entry); STARPU_ASSERT_MSG(entry, "Communicator %ld has not been registered\n", (long int)comm); *size = entry->size; return 0; #endif } int starpu_mpi_comm_rank(MPI_Comm comm, int *rank) { if (_starpu_mpi_fake_world_rank != -1) { *rank = _starpu_mpi_fake_world_rank; return 0; } #ifdef STARPU_SIMGRID STARPU_MPI_ASSERT_MSG(comm == MPI_COMM_WORLD, "StarPU-SMPI only works with MPI_COMM_WORLD for now"); *rank = _mpi_world_rank; return 0; #else struct comm_size_entry *entry; HASH_FIND(hh, registered_comms, &comm, sizeof(entry->comm), entry); STARPU_ASSERT_MSG(entry, "Communicator %ld has not been registered\n", (long int)comm); *rank = entry->rank; return 0; #endif } int starpu_mpi_world_size(void) { int size; starpu_mpi_comm_size(MPI_COMM_WORLD, &size); return size; } int starpu_mpi_world_rank(void) { int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); return rank; } int starpu_mpi_get_thread_cpuid(void) { return _starpu_mpi_thread_cpuid; } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_init.h000066400000000000000000000017261507764646700207510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_INIT_H__ #define __STARPU_MPI_INIT_H__ #include #include /** @file */ #ifdef __cplusplus extern "C" { #endif void _starpu_mpi_do_initialize(struct _starpu_mpi_argc_argv *argc_argv); #ifdef __cplusplus } #endif #endif // __STARPU_MPI_INIT_H__ starpu-1.4.9+dfsg/mpi/src/starpu_mpi_private.c000066400000000000000000000071531507764646700214530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include int _starpu_debug_rank=-1; int _starpu_debug_level_min=0; int _starpu_debug_level_max=0; int _starpu_mpi_tag = 42; int _starpu_mpi_comm_debug; int _starpu_mpi_nobind = -1; int _starpu_mpi_thread_cpuid = -1; int _starpu_mpi_thread_multiple_send = 0; int _starpu_mpi_use_prio = 1; int _starpu_mpi_fake_world_size = -1; int _starpu_mpi_fake_world_rank = -1; int _starpu_mpi_use_coop_sends = 1; int _starpu_mpi_mem_throttle = 0; int _starpu_mpi_recv_wait_finalize = 0; void _starpu_mpi_set_debug_level_min(int level) { _starpu_debug_level_min = level; } void _starpu_mpi_set_debug_level_max(int level) { _starpu_debug_level_max = level; } int starpu_mpi_get_communication_tag(void) { return _starpu_mpi_tag; } void starpu_mpi_set_communication_tag(int tag) { _starpu_mpi_tag = tag; } char *_starpu_mpi_get_mpi_error_code(int code) { static char str[MPI_MAX_OBJECT_NAME]; int len; MPI_Error_string(code, str, &len); return str; } void _starpu_mpi_env_init(void) { _starpu_mpi_comm_debug = starpu_getenv("STARPU_MPI_COMM") != NULL; _starpu_mpi_fake_world_size = starpu_getenv_number("STARPU_MPI_FAKE_SIZE"); _starpu_mpi_fake_world_rank = starpu_getenv_number("STARPU_MPI_FAKE_RANK"); _starpu_mpi_nobind = starpu_getenv_number_default("STARPU_MPI_NOBIND", 0); _starpu_mpi_thread_cpuid = starpu_getenv_number_default("STARPU_MPI_THREAD_CPUID", -1); _starpu_mpi_thread_multiple_send = starpu_getenv_number_default("STARPU_MPI_THREAD_MULTIPLE_SEND", 0); _starpu_mpi_use_prio = starpu_getenv_number_default("STARPU_MPI_PRIORITIES", 1); _starpu_mpi_use_coop_sends = starpu_getenv_number_default("STARPU_MPI_COOP_SENDS", 1); _starpu_mpi_mem_throttle = starpu_getenv_number_default("STARPU_MPI_MEM_THROTTLE", 0); _starpu_debug_level_min = starpu_getenv_number_default("STARPU_MPI_DEBUG_LEVEL_MIN", 0); _starpu_debug_level_max = starpu_getenv_number_default("STARPU_MPI_DEBUG_LEVEL_MAX", 0); _starpu_mpi_recv_wait_finalize = starpu_getenv_number_default("STARPU_MPI_RECV_WAIT_FINALIZE", _starpu_mpi_recv_wait_finalize); int mpi_thread_coreid = starpu_getenv_number_default("STARPU_MPI_THREAD_COREID", -1); if (_starpu_mpi_thread_cpuid >= 0 && mpi_thread_coreid >= 0) { _STARPU_DISP("Warning: STARPU_MPI_THREAD_CPUID and STARPU_MPI_THREAD_COREID cannot be set at the same time. STARPU_MAIN_THREAD_CPUID will be used.\n"); } if (_starpu_mpi_thread_cpuid == -1 && mpi_thread_coreid >= 0) { _starpu_mpi_thread_cpuid = mpi_thread_coreid * _starpu_get_nhyperthreads(); } } char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type) { switch (request_type) { case SEND_REQ: return "SEND_REQ"; case RECV_REQ: return "RECV_REQ"; case WAIT_REQ: return "WAIT_REQ"; case TEST_REQ: return "TEST_REQ"; case BARRIER_REQ: return "BARRIER_REQ"; case UNKNOWN_REQ: return "UNSET_REQ"; default: return "unknown request type"; } } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_private.h000066400000000000000000000360251507764646700214600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_PRIVATE_H__ #define __STARPU_MPI_PRIVATE_H__ #include #include #include #include #include #include #include #include #include /** @file */ #ifdef __cplusplus extern "C" { #endif #ifdef STARPU_SIMGRID extern starpu_pthread_wait_t _starpu_mpi_thread_wait; extern starpu_pthread_queue_t _starpu_mpi_thread_dontsleep; struct _starpu_simgrid_mpi_req { MPI_Request *request; MPI_Status *status; starpu_pthread_queue_t *queue; unsigned *done; }; int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag); void _starpu_mpi_simgrid_wait_req(MPI_Request *request, MPI_Status *status, starpu_pthread_queue_t *queue, unsigned *done); #endif struct _starpu_mpi_req* _starpu_mpi_isend_cache_aware(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *_arg, int sequential_consistency, int* cache_flag); struct _starpu_mpi_req* _starpu_mpi_irecv_cache_aware(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *_arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int* cache_flag); extern int _starpu_debug_rank; char *_starpu_mpi_get_mpi_error_code(int code); extern int _starpu_mpi_comm_debug; #ifdef STARPU_MPI_VERBOSE extern int _starpu_debug_level_min; extern int _starpu_debug_level_max; void _starpu_mpi_set_debug_level_min(int level); void _starpu_mpi_set_debug_level_max(int level); #endif extern int _starpu_mpi_fake_world_size; extern int _starpu_mpi_fake_world_rank; extern int _starpu_mpi_use_prio; extern int _starpu_mpi_nobind; extern int _starpu_mpi_thread_cpuid; extern int _starpu_mpi_thread_multiple_send; extern int _starpu_mpi_use_coop_sends; extern int _starpu_mpi_mem_throttle; extern int _starpu_mpi_recv_wait_finalize; extern int _starpu_mpi_has_cuda; extern int _starpu_mpi_cuda_devid; void _starpu_mpi_env_init(void); #ifdef STARPU_NO_ASSERT # define STARPU_MPI_ASSERT_MSG(x, msg, ...) do { if (0) { (void) (x); }} while(0) #else # if defined(__CUDACC__) && defined(STARPU_HAVE_WINDOWS) int _starpu_debug_rank; # define STARPU_MPI_ASSERT_MSG(x, msg, ...) \ do \ { \ if (STARPU_UNLIKELY(!(x))) \ { \ if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ fprintf(stderr, "\n[%d][starpu_mpi][%s][assert failure] " msg "\n\n", _starpu_debug_rank, __starpu_func__, ## __VA_ARGS__); *(int*)NULL = 0; \ } \ } while(0) # else # define STARPU_MPI_ASSERT_MSG(x, msg, ...) \ do \ { \ if (STARPU_UNLIKELY(!(x))) \ { \ if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ fprintf(stderr, "\n[%d][starpu_mpi][%s][assert failure] " msg "\n\n", _starpu_debug_rank, __starpu_func__, ## __VA_ARGS__); \ } \ assert(x); \ } while(0) # endif #endif #define _STARPU_MPI_MALLOC(ptr, size) do { ptr = malloc(size); STARPU_MPI_ASSERT_MSG(ptr != NULL, "Cannot allocate %ld bytes\n", (long) (size)); } while (0) #define _STARPU_MPI_CALLOC(ptr, nmemb, size) do { ptr = calloc(nmemb, size); STARPU_MPI_ASSERT_MSG(ptr != NULL, "Cannot allocate %ld bytes\n", (long) (nmemb*size)); } while (0) #define _STARPU_MPI_REALLOC(ptr, size) do { void *_new_ptr = realloc(ptr, size); STARPU_MPI_ASSERT_MSG(_new_ptr != NULL, "Cannot reallocate %ld bytes\n", (long) (size)); ptr = _new_ptr; } while (0) #ifdef STARPU_MPI_VERBOSE # define _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, node, tag, utag, comm, way) \ do \ { \ if (_starpu_mpi_comm_debug) \ { \ int __size; \ char _comm_name[128]; \ int _comm_name_len; \ int _rank; \ starpu_mpi_comm_rank(comm, &_rank); \ MPI_Type_size(datatype, &__size); \ MPI_Comm_get_name(comm, _comm_name, &_comm_name_len); \ fprintf(stderr, "[%d][starpu_mpi] :%d:%s:%d:%d:%ld:%s:%p:%ld:%d:%s:%d\n", _rank, _rank, way, node, tag, utag, _comm_name, ptr, count, __size, __starpu_func__ , __LINE__); \ fflush(stderr); \ } \ } while(0) # define _STARPU_MPI_COMM_TO_DEBUG(ptr, count, datatype, dest, tag, utag, comm) _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, dest, tag, utag, comm, "-->") # define _STARPU_MPI_COMM_FROM_DEBUG(ptr, count, datatype, source, tag, utag, comm) _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, source, tag, utag, comm, "<--") # define _STARPU_MPI_DEBUG(level, fmt, ...) \ do \ { \ if (!_starpu_silent && _starpu_debug_level_min <= level && level <= _starpu_debug_level_max) \ { \ if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] " fmt , (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__,## __VA_ARGS__); \ fflush(stderr); \ } \ } while(0) #else # define _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, node, tag, utag, comm, way) do { } while(0) # define _STARPU_MPI_COMM_TO_DEBUG(ptr, count, datatype, dest, tag, utag, comm) do { } while(0) # define _STARPU_MPI_COMM_FROM_DEBUG(ptr, count, datatype, source, tag, utag, comm) do { } while(0) # define _STARPU_MPI_DEBUG(level, fmt, ...) do { } while(0) #endif #define _STARPU_MPI_DISP(fmt, ...) do { if (!_starpu_silent) { \ if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] " fmt , (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__ ,## __VA_ARGS__); \ fflush(stderr); }} while(0) #define _STARPU_MPI_MSG(fmt, ...) do { if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ fprintf(stderr, "[%d][starpu_mpi][%s:%d] " fmt , _starpu_debug_rank, __starpu_func__ , __LINE__ ,## __VA_ARGS__); \ fflush(stderr); } while(0) #ifdef STARPU_MPI_EXTRA_VERBOSE # define _STARPU_MPI_LOG_IN() do { if (!_starpu_silent) { \ if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] -->\n", (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__); \ fflush(stderr); }} while(0) # define _STARPU_MPI_LOG_OUT() do { if (!_starpu_silent) { \ if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \ fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] <--\n", (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__, __LINE__); \ fflush(stderr); }} while(0) #else # define _STARPU_MPI_LOG_IN() # define _STARPU_MPI_LOG_OUT() #endif enum _starpu_mpi_request_type { SEND_REQ=0, RECV_REQ=1, WAIT_REQ=2, TEST_REQ=3, BARRIER_REQ=4, PROBE_REQ=5, UNKNOWN_REQ=6, }; struct _starpu_mpi_node { MPI_Comm comm; int rank; }; struct _starpu_mpi_node_tag { struct _starpu_mpi_node node; starpu_mpi_tag_t data_tag; }; MULTILIST_CREATE_TYPE(_starpu_mpi_req, coop_sends) /** One bag of cooperative sends */ struct _starpu_mpi_coop_sends { starpu_data_handle_t data_handle; /** List of send requests */ struct _starpu_mpi_req_multilist_coop_sends reqs; struct _starpu_mpi_data *mpi_data; /** Array of send requests, after sorting out */ struct _starpu_spinlock lock; struct _starpu_mpi_req **reqs_array; unsigned n; unsigned redirects_sent; /* Used to trace dependencies */ long pre_sync_jobid; }; /** Initialized in starpu_mpi_data_register_comm */ struct _starpu_mpi_data { int magic; struct _starpu_mpi_node_tag node_tag; char *cache_sent; unsigned int cache_received; unsigned int ft_induced_cache_received:1; unsigned int ft_induced_cache_received_count:1; unsigned int modified:1; // Whether the data has been modified since the registration. /** Array used to store the contributing nodes to this data * when it is accessed in (MPI_)REDUX mode. */ char* redux_map; /** Rendez-vous data for opportunistic cooperative sends, * Needed to synchronize between submit thread and workers */ struct _starpu_spinlock coop_lock; /** Current cooperative send bag */ struct _starpu_mpi_coop_sends *coop_sends; /** When provided, wait the given number of sends to start a coop, instead of just waiting that data are ready */ unsigned nb_future_sends; }; struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle); struct _starpu_mpi_req_backend; struct _starpu_mpi_req; LIST_TYPE(_starpu_mpi_req, /** description of the data at StarPU level */ starpu_data_handle_t data_handle; int prio; unsigned node; /* Which StarPU memory node this will read from / write to */ /** description of the data to be sent/received */ MPI_Datatype datatype; char *datatype_name; void *ptr; starpu_ssize_t count; int registered_datatype; // = 0: datatype is not predefined by StarPU; = 1: otherwise; initialized with -1 struct _starpu_mpi_req_backend *backend; /** who are we talking to ? */ struct _starpu_mpi_node_tag node_tag; void (*func)(struct _starpu_mpi_req *); MPI_Status *status; struct _starpu_mpi_req_multilist_coop_sends coop_sends; struct _starpu_mpi_coop_sends *coop_sends_head; int *flag; unsigned sync; /** Amount of memory pre-reserved for the reception buffer */ size_t reserved_size; int ret; /** 0 send, 1 recv */ enum _starpu_mpi_request_type request_type; unsigned submitted; unsigned completed; unsigned posted; /** in the case of detached requests */ int detached; void *callback_arg; void (*callback)(void *); int sequential_consistency; long pre_sync_jobid; long post_sync_jobid; #ifdef STARPU_SIMGRID MPI_Status status_store; starpu_pthread_queue_t queue; unsigned done; #endif ); PRIO_LIST_TYPE(_starpu_mpi_req, prio) MULTILIST_CREATE_INLINES(struct _starpu_mpi_req, _starpu_mpi_req, coop_sends) /** To be called before actually queueing a request, so the communication layer knows it has something to look at */ void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req); /** To be called to actually submit the request */ void _starpu_mpi_submit_ready_request(void *arg); /** To be called when request is completed */ void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req); void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency); #if 0 /** Build a communication tree. Called before _starpu_mpi_coop_send is ever called. coop_sends->lock is held. */ void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends); #endif /** Try to merge with send request with other send requests */ void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency); /** Actually submit the coop_sends bag to MPI. * At least one of submit_control or submit_data is true. * _starpu_mpi_submit_coop_sends may be called either * - just once with both parameters being true, * - or once with submit_control being true (data is not available yet, but we * can send control messages), and a second time with submit_data being true. Or * the converse, possibly on different threads, etc. */ void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_control, int submit_data); /* * Fills post_sync_jobid with the reduction synchronization task jobid */ void _starpu_mpi_redux_fill_post_sync_jobid(const void * const redux_data_args, long * const post_sync_jobid); void _starpu_mpi_request_init(struct _starpu_mpi_req **req); struct _starpu_mpi_req * _starpu_mpi_request_fill(starpu_data_handle_t data_handle, int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *), int sequential_consistency, int is_internal_req, starpu_ssize_t count); void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req); char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type); struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int prio); int _starpu_mpi_choose_node(starpu_data_handle_t data_handle, enum starpu_data_access_mode mode); void _starpu_mpi_data_flush(starpu_data_handle_t data_handle); /** To be called at initialization to set up the tags upper bound */ void _starpu_mpi_tags_init(void); struct _starpu_mpi_argc_argv { int initialize_mpi; int *argc; char ***argv; MPI_Comm comm; /** Fortran argc */ int fargc; /** Fortran argv */ char **fargv; int rank; int world_size; }; /** * Specific functions to backend implementation */ struct _starpu_mpi_backend { void (*_starpu_mpi_backend_init)(struct starpu_conf *conf); void (*_starpu_mpi_backend_shutdown)(void); int (*_starpu_mpi_backend_reserve_core)(void); void (*_starpu_mpi_backend_request_init)(struct _starpu_mpi_req *req); void (*_starpu_mpi_backend_request_fill)(struct _starpu_mpi_req *req, int is_internal_req); void (*_starpu_mpi_backend_request_destroy)(struct _starpu_mpi_req *req); void (*_starpu_mpi_backend_data_clear)(starpu_data_handle_t data_handle); void (*_starpu_mpi_backend_data_register)(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag); void (*_starpu_mpi_backend_comm_register)(MPI_Comm comm); int (*_starpu_mpi_backend_progress_init)(struct _starpu_mpi_argc_argv *argc_argv); void (*_starpu_mpi_backend_progress_shutdown)(void **value); #ifdef STARPU_SIMGRID void (*_starpu_mpi_backend_wait_for_initialization)(); #endif int (*_starpu_mpi_backend_barrier)(MPI_Comm comm); int (*_starpu_mpi_backend_wait_for_all)(MPI_Comm comm); int (*_starpu_mpi_backend_wait)(starpu_mpi_req *public_req, MPI_Status *status); int (*_starpu_mpi_backend_test)(starpu_mpi_req *public_req, int *flag, MPI_Status *status); void (*_starpu_mpi_backend_isend_size_func)(struct _starpu_mpi_req *req); void (*_starpu_mpi_backend_irecv_size_func)(struct _starpu_mpi_req *req); }; extern struct _starpu_mpi_backend _mpi_backend; #ifdef __cplusplus } #endif #endif // __STARPU_MPI_PRIVATE_H__ starpu-1.4.9+dfsg/mpi/src/starpu_mpi_req.c000066400000000000000000000066351507764646700205740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Guillaume Beauchamp * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include void _starpu_mpi_request_init(struct _starpu_mpi_req **req) { _STARPU_MPI_CALLOC(*req, 1, sizeof(struct _starpu_mpi_req)); /* Do not remove the commented lines, we keep them to make sure we did not forget to initialize a field */ /* Initialize the request structure */ //(*req)->data_handle = NULL; //(*req)->prio = 0; (*req)->node = (unsigned) -1; //(*req)->datatype = 0; //(*req)->datatype_name = NULL; //(*req)->ptr = NULL; (*req)->count = -1; (*req)->registered_datatype = -1; (*req)->node_tag.node.rank = -1; (*req)->node_tag.data_tag = -1; //(*req)->node_tag.node.comm = 0; //(*req)->func = NULL; //(*req)->status = NULL; //(*req)->flag = NULL; _starpu_mpi_req_multilist_init_coop_sends(*req); (*req)->ret = -1; (*req)->request_type = UNKNOWN_REQ; //(*req)->submitted = 0; //(*req)->completed = 0; //(*req)->posted = 0; //(*req)->sync = 0; (*req)->detached = -1; //(*req)->callback = NULL; //(*req)->callback_arg = NULL; (*req)->sequential_consistency = 1; (*req)->pre_sync_jobid = -1; (*req)->post_sync_jobid = -1; #ifdef STARPU_SIMGRID starpu_pthread_queue_init(&((*req)->queue)); starpu_pthread_queue_register(&_starpu_mpi_thread_wait, &((*req)->queue)); //(*req)->done = 0; #endif _mpi_backend._starpu_mpi_backend_request_init(*req); } struct _starpu_mpi_req *_starpu_mpi_request_fill(starpu_data_handle_t data_handle, int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg, enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *), int sequential_consistency, int is_internal_req, starpu_ssize_t count) { struct _starpu_mpi_req *req; /* Initialize the request structure */ _starpu_mpi_request_init(&req); req->request_type = request_type; /* prio_list is sorted by increasing values */ if (_starpu_mpi_use_prio) req->prio = prio; req->data_handle = data_handle; req->node_tag.node.rank = srcdst; req->node_tag.data_tag = data_tag; req->node_tag.node.comm = comm; req->detached = detached; req->sync = sync; req->callback = callback; req->callback_arg = arg; req->func = func; req->sequential_consistency = sequential_consistency; req->count = count; _mpi_backend._starpu_mpi_backend_request_fill(req, is_internal_req); return req; } void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req) { _mpi_backend._starpu_mpi_backend_request_destroy(req); free(req->datatype_name); req->datatype_name = NULL; #ifdef STARPU_SIMGRID starpu_pthread_queue_unregister(&_starpu_mpi_thread_wait, &req->queue); starpu_pthread_queue_destroy(&req->queue); #endif free(req); } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_select_node.c000066400000000000000000000073741507764646700222720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include static int _current_policy = STARPU_MPI_NODE_SELECTION_MOST_R_DATA; static int _last_predefined_policy = STARPU_MPI_NODE_SELECTION_MOST_R_DATA; static starpu_mpi_select_node_policy_func_t _policies[_STARPU_MPI_NODE_SELECTION_MAX_POLICY]; int _starpu_mpi_select_node_with_most_data(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data); void _starpu_mpi_select_node_init() { int i; _policies[STARPU_MPI_NODE_SELECTION_MOST_R_DATA] = _starpu_mpi_select_node_with_most_data; for(i=_last_predefined_policy+1 ; i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY ; i++) _policies[i] = NULL; } int starpu_mpi_node_selection_get_current_policy() { return _current_policy; } int starpu_mpi_node_selection_set_current_policy(int policy) { STARPU_ASSERT_MSG(_policies[policy] != NULL, "Policy %d invalid.\n", policy); _current_policy = policy; return 0; } int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func) { int i=_last_predefined_policy+1; // Look for a unregistered policy while(i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY) { if (_policies[i] == NULL) break; i++; } STARPU_ASSERT_MSG(i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY, "No unused policy available. Unregister existing policies before registering a new one."); _policies[i] = policy_func; return i; } int starpu_mpi_node_selection_unregister_policy(int policy) { STARPU_ASSERT_MSG(policy > _last_predefined_policy, "Policy %d invalid. Only user-registered policies can be unregistered\n", policy); _policies[policy] = NULL; return 0; } int _starpu_mpi_select_node_with_most_data(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) { size_t *size_on_nodes; size_t max_size; int i; int xrank = 0; (void)me; _STARPU_MPI_CALLOC(size_on_nodes, nb_nodes, sizeof(size_t)); for(i= 0 ; iops->get_size(data); if (rank == STARPU_MPI_PER_NODE) /* Each of them has it */ continue; if (mode & STARPU_R) size_on_nodes[rank] += size; if (mode & STARPU_W) /* Would have to transfer it back */ size_on_nodes[rank] += size; } max_size = 0; for(i=0 ; i max_size) { max_size = size_on_nodes[i]; xrank = i; } } free(size_on_nodes); return xrank; } int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data, int policy) { int ppolicy = policy == STARPU_MPI_NODE_SELECTION_CURRENT_POLICY ? _current_policy : policy; STARPU_ASSERT_MSG(ppolicy < _STARPU_MPI_NODE_SELECTION_MAX_POLICY, "Invalid policy %d\n", ppolicy); STARPU_ASSERT_MSG(_policies[ppolicy], "Unregistered policy %d\n", ppolicy); starpu_mpi_select_node_policy_func_t func = _policies[ppolicy]; return func(me, nb_nodes, descr, nb_data); } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_select_node.h000066400000000000000000000021131507764646700222610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_SELECT_NODE_H__ #define __STARPU_MPI_SELECT_NODE_H__ #include /** @file */ #ifdef __cplusplus extern "C" { #endif #define _STARPU_MPI_NODE_SELECTION_MAX_POLICY 24 void _starpu_mpi_select_node_init(); int _starpu_mpi_select_node(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data, int policy); #ifdef __cplusplus } #endif #endif // __STARPU_MPI_SELECT_NODE_H__ starpu-1.4.9+dfsg/mpi/src/starpu_mpi_stats.c000066400000000000000000000132421507764646700211330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include /* measure the amount of data transfers between each pair of MPI nodes */ static size_t *comm_amount = NULL; static size_t comm_amount_memnode[STARPU_MAXNODES]; static int world_size; static int stats_enabled = 0; static int nb_coop; static int* nb_nodes_per_coop = NULL; static double time_init; static MPI_Comm comm_init; static int nb_sends = 0; static size_t max_sent_size = 0; #ifdef STARPU_USE_MPI_NMAD static struct _starpu_spinlock stats_lock; #endif void _starpu_mpi_comm_amounts_init(MPI_Comm comm) { if (stats_enabled != 1) { time_init = starpu_timing_now(); comm_init = comm; stats_enabled = starpu_getenv_number("STARPU_MPI_STATS"); if (stats_enabled == -1) { /* Legacy env var */ stats_enabled = starpu_getenv_number("STARPU_COMM_STATS"); } if (stats_enabled == -1) { stats_enabled = 0; } } if (stats_enabled == 0) return; _STARPU_DISP("Warning: StarPU is executed with STARPU_MPI_STATS=1, which slows down a bit\n"); starpu_mpi_comm_size(comm, &world_size); _STARPU_MPI_DEBUG(1, "allocating for %d nodes\n", world_size); _STARPU_MPI_CALLOC(comm_amount, world_size, sizeof(size_t)); nb_coop = 0; _STARPU_MPI_CALLOC(nb_nodes_per_coop, world_size, sizeof(int)); #ifdef STARPU_USE_MPI_NMAD _starpu_spin_init(&stats_lock); #endif } void _starpu_mpi_comm_stats_disable() { stats_enabled = 0; } void _starpu_mpi_comm_stats_enable() { stats_enabled = 1; if (comm_amount == NULL) { _starpu_mpi_comm_amounts_init(comm_init); } } void _starpu_mpi_comm_amounts_shutdown() { if (comm_amount) { free(comm_amount); free(nb_nodes_per_coop); comm_amount = NULL; nb_nodes_per_coop = NULL; #ifdef STARPU_USE_MPI_NMAD _starpu_spin_destroy(&stats_lock); #endif } } void _starpu_mpi_comm_amounts_inc(MPI_Comm comm, unsigned memnode, unsigned dst, MPI_Datatype datatype, int count) { int src, size; if (stats_enabled == 0) return; starpu_mpi_comm_rank(comm, &src); MPI_Type_size(datatype, &size); _STARPU_MPI_DEBUG(1, "[%d] adding %d to %d, from node %d\n", src, count*size, dst, memnode); STARPU_ASSERT(memnode < starpu_memory_nodes_get_count()); #ifdef STARPU_USE_MPI_NMAD /* With NewMadeleine, the send requests are triggered from the workers, so * this is a critical section. */ _starpu_spin_lock(&stats_lock); #endif comm_amount[dst] += count*size; comm_amount_memnode[memnode] += count*size; if (((size_t) count*size) > max_sent_size) { max_sent_size = count*size; } nb_sends++; #ifdef STARPU_USE_MPI_NMAD _starpu_spin_unlock(&stats_lock); #endif } void _starpu_mpi_nb_coop_inc(int nb_nodes_in_coop) { if (stats_enabled == 0) return; assert(nb_nodes_in_coop > 0); assert(nb_nodes_in_coop < world_size); #ifdef STARPU_USE_MPI_NMAD STARPU_ATTRIBUTE_UNUSED size_t dummy = STARPU_ATOMIC_ADD(&nb_coop, 1); dummy = STARPU_ATOMIC_ADD(&nb_nodes_per_coop[nb_nodes_in_coop-1], 1); #else nb_coop++; nb_nodes_per_coop[nb_nodes_in_coop-1]++; #endif } void starpu_mpi_comm_stats_retrieve(size_t *comm_stats) { if (comm_amount) memcpy(comm_stats, comm_amount, world_size * sizeof(size_t)); } void _starpu_mpi_comm_amounts_display(FILE *stream, int node) { int dst; size_t sum = 0; if (comm_amount == NULL) return; double time = starpu_timing_now() - time_init; for (dst = 0; dst < world_size; dst++) { sum += comm_amount[dst]; } fprintf(stream, "\n[starpu_comm_stats][%d] TOTAL:\t%f B\t%f MB\t %f B/s\t %f MB/s\n", node, (float)sum, (float)sum/1024/1024, (float)sum/(float)time, (float)sum/1204/1024/(float)time); fprintf(stream, "[starpu_comm_stats][%d] nb_sends: %d\n", node, nb_sends); fprintf(stream, "[starpu_comm_stats][%d] max_sent_size: %ld\n", node, max_sent_size); fprintf(stream, "[starpu_comm_stats][%d] average sent size: %ld\n", node, nb_sends ? sum / nb_sends : 0); for (dst = 0; dst < world_size; dst++) { if (comm_amount[dst]) fprintf(stream, "[starpu_comm_stats][%d:%d]\t%f B\t%f MB\t %f B/s\t %f MB/s\n", node, dst, (float)comm_amount[dst], ((float)comm_amount[dst])/(1024*1024), (float)comm_amount[dst]/(float)time, ((float)comm_amount[dst])/(1024*1024)/(float)time); } char name[32]; unsigned xdst; for (xdst = 0; xdst < starpu_memory_nodes_get_count(); xdst++) { if (comm_amount_memnode[xdst]) { starpu_memory_node_get_name(xdst, name, sizeof(name)); fprintf(stream, "[starpu_comm_stats_memnode][%d:%s]\t%f B\t%f MB\t %f B/s\t %f MB/s\n", node, name, (float)comm_amount_memnode[xdst], ((float)comm_amount_memnode[xdst])/(1024*1024), (float)comm_amount_memnode[xdst]/(float)time, ((float)comm_amount_memnode[xdst])/(1024*1024)/(float)time); } } fprintf(stream, "[starpu_comm_stats][%d] NB_COOP: %d\n", node, nb_coop); for (dst = 0; dst < world_size; dst++) { if (nb_nodes_per_coop[dst] != 0) { fprintf(stream, "[starpu_comm_stats][%d]\t %d in coop: %d (%f%%)\n", node, dst+1, nb_nodes_per_coop[dst], nb_coop ? (((float) nb_nodes_per_coop[dst]) / nb_coop) * 100. : 0); } } } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_stats.h000066400000000000000000000024711507764646700211420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_STATS_H__ #define __STARPU_MPI_STATS_H__ #include #include #include /** @file */ #ifdef __cplusplus extern "C" { #endif void _starpu_mpi_comm_amounts_init(MPI_Comm comm); void _starpu_mpi_comm_stats_disable(void); void _starpu_mpi_comm_stats_enable(void); void _starpu_mpi_comm_amounts_shutdown(void); void _starpu_mpi_comm_amounts_inc(MPI_Comm comm, unsigned memnode, unsigned dst, MPI_Datatype datatype, int count); void _starpu_mpi_nb_coop_inc(int nb_nodes_in_coop); void _starpu_mpi_comm_amounts_display(FILE *stream, int node); #ifdef __cplusplus } #endif #endif // __STARPU_MPI_STATS_H__ starpu-1.4.9+dfsg/mpi/src/starpu_mpi_tags.c000066400000000000000000000064371507764646700207430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include /** * @brief Structure to store tags ranges * * List structure to manage the set of available tags. */ struct starpu_tags_range { int64_t min; /**< Minimal value in the range */ int64_t max; /**< Maximal value in the range */ struct starpu_tags_range *next; /**< Pointer to the following range */ }; static struct starpu_tags_range *cst_first = NULL; /** * @brief StarPU tag upper bound */ static int64_t _starpu_tags_ub = 0; void _starpu_mpi_tags_init(void) { if (!_starpu_tags_ub) { int ok = 0; void *tag_ub_p = NULL; starpu_mpi_comm_get_attr(MPI_COMM_WORLD, STARPU_MPI_TAG_UB, &tag_ub_p, &ok); _starpu_tags_ub = (uint64_t)((intptr_t)tag_ub_p); STARPU_ASSERT_MSG(ok, "Failed to get the STARPU_MPI_TAG_UB attribute\n"); } } int64_t starpu_mpi_tags_allocate(int64_t nbtags) { struct starpu_tags_range *new; struct starpu_tags_range *prev = NULL; struct starpu_tags_range *current = cst_first; int64_t min = 0; int64_t max = (current == NULL) ? _starpu_tags_ub : current->min; if (nbtags == 0) { return -1; } STARPU_ASSERT(_starpu_tags_ub != 0); /* StarPU tag must be initialized */ while (((max - min) < nbtags) && (current != NULL)) { min = current->max; prev = current; current = current->next; max = (current == NULL) ? _starpu_tags_ub : current->min; } if ((max - min) < nbtags) { _STARPU_ERROR("No space left in tags.\n" ); return -1; } _STARPU_MALLOC(new, sizeof(struct starpu_tags_range)); new->min = min; new->max = min + nbtags; new->next = current; if (prev == NULL) { cst_first = new; } else { STARPU_ASSERT(prev->next == current); prev->next = new; } _STARPU_MPI_DEBUG(0, "Allocates tag range %ld - %ld\n", min, min + nbtags); STARPU_ASSERT(cst_first != NULL); return new->min; } void starpu_mpi_tags_free(int64_t min) { struct starpu_tags_range *prev = NULL; struct starpu_tags_range *current = cst_first; STARPU_ASSERT(cst_first != NULL); /* At least one range must be registered */ while ((current != NULL) && (current->min < min)) { prev = current; current = current->next; } if (current == NULL) { _STARPU_ERROR("Failed to release the tag range starting by %ld", min); return; } STARPU_ASSERT(current != NULL); STARPU_ASSERT(current->min == min); if (prev) { prev->next = current->next; } else { STARPU_ASSERT(current == cst_first); cst_first = current->next; } _STARPU_MPI_DEBUG(0, "Free tag range %ld - %ld\n", current->min, current->max); free(current); return; } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_task_insert.c000066400000000000000000001116341507764646700223270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "starpu_mpi_task_insert.h" #define _SEND_DATA(data, mode, dest, data_tag, prio, comm, callback, arg) \ do { \ if (mode & STARPU_SSEND) \ return starpu_mpi_issend_detached_prio(data, dest, data_tag, prio, comm, callback, arg); \ else \ return starpu_mpi_isend_detached_prio(data, dest, data_tag, prio, comm, callback, arg); \ } while (0) static void (*pre_submit_hook)(struct starpu_task *task) = NULL; /* reduction wrap-up */ // entry in the table struct _starpu_redux_data_entry { UT_hash_handle hh; starpu_data_handle_t data_handle; }; // the table static struct _starpu_redux_data_entry *_redux_data = NULL; void _starpu_mpi_pre_submit_hook_call(struct starpu_task *task) { if (pre_submit_hook) pre_submit_hook(task); } int starpu_mpi_pre_submit_hook_register(void (*f)(struct starpu_task *)) { if (pre_submit_hook) _STARPU_MSG("Warning: a pre_submit_hook has already been registered. Please check if you really want to erase the previously registered hook.\n"); pre_submit_hook = f; return 0; } int starpu_mpi_pre_submit_hook_unregister() { pre_submit_hook = NULL; return 0; } int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int *do_execute, int *inconsistent_execute, int *xrank) { if (mode & STARPU_W || mode & STARPU_REDUX) { if (!data) { /* We don't have anything allocated for this. * The application knows we won't do anything * about this task */ /* Yes, the app could actually not call * task_insert at all itself, this is just a * safeguard. */ _STARPU_MPI_DEBUG(3, "oh oh\n"); _STARPU_MPI_LOG_OUT(); return -EINVAL; } int mpi_rank = starpu_mpi_data_get_rank(data); if (mpi_rank == -1) { _STARPU_ERROR("Data %p with mode STARPU_W needs to have a valid rank", data); } if (*xrank == -1) { // No node has been selected yet *xrank = mpi_rank; _STARPU_MPI_DEBUG(100, "Codelet is going to be executed by node %d\n", *xrank); *do_execute = mpi_rank == STARPU_MPI_PER_NODE || (mpi_rank == me); } else if (mpi_rank != *xrank) { _STARPU_MPI_DEBUG(100, "Another node %d had already been selected to execute the codelet, can't now set %d\n", *xrank, mpi_rank); *inconsistent_execute = 1; if (*xrank == STARPU_MPI_PER_NODE) _STARPU_ERROR("Data %p has rank %d but we had STARPU_MPI_PER_NODE data before that", data, mpi_rank); else if (mpi_rank == STARPU_MPI_PER_NODE) _STARPU_ERROR("Data %p has rank STARPU_MPI_PER_NODE but we had non-STARPU_MPI_PER_NODE data before that (rank %d)", data, *xrank); } } _STARPU_MPI_DEBUG(100, "Executing: inconsistent=%d, do_execute=%d, xrank=%d\n", *inconsistent_execute, *do_execute, *xrank); return 0; } int _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int xrank, int do_execute, int prio, MPI_Comm comm) { if (data && xrank == STARPU_MPI_PER_NODE) { STARPU_ASSERT_MSG(starpu_mpi_data_get_rank(data) == STARPU_MPI_PER_NODE, "If task is replicated, it has to access only per-node data"); } if (data && mode & STARPU_R && !(mode & STARPU_MPI_REDUX)) { int mpi_rank = starpu_mpi_data_get_rank(data); starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data); if (mpi_rank == -1) { _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n"); } if (do_execute && mpi_rank != STARPU_MPI_PER_NODE && mpi_rank != me) { /* The node is going to execute the codelet, but it does not own the data, it needs to receive the data from the owner node */ int already_received = starpu_mpi_cached_receive_set(data); if (already_received == 0) { if (data_tag == -1) _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); _STARPU_MPI_DEBUG(1, "Receiving data %p from %d with prio %d\n", data, mpi_rank, prio); int ret = starpu_mpi_irecv_detached_prio(data, mpi_rank, data_tag, prio, comm, NULL, NULL); if (ret) return ret; } // else the node has already received the data } if (!do_execute && mpi_rank == me) { /* The node owns the data, but another node is going to execute the codelet, the node needs to send the data to the executee node. */ int already_sent = starpu_mpi_cached_send_set(data, xrank); if (already_sent == 0) { if (data_tag == -1) _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); _STARPU_MPI_DEBUG(1, "Sending data %p to %d with prio %d\n", data, xrank, prio); _SEND_DATA(data, mode, xrank, data_tag, prio, comm, NULL, NULL); } // Else the data has already been sent } } return 0; } static int _starpu_mpi_exchange_data_after_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int xrank, int do_execute, int prio, MPI_Comm comm) { if (mode & STARPU_W && !(mode & STARPU_MPI_REDUX)) { int mpi_rank = starpu_mpi_data_get_rank(data); starpu_mpi_tag_t data_tag = starpu_mpi_data_get_tag(data); struct _starpu_mpi_data* mpi_data = _starpu_mpi_data_get(data); if(mpi_rank == -1) { _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n"); } mpi_data->modified=1; if (mpi_rank == STARPU_MPI_PER_NODE) { mpi_rank = me; } if (mpi_rank == me) { if (xrank != -1 && (xrank != STARPU_MPI_PER_NODE && me != xrank)) { _STARPU_MPI_DEBUG(1, "Receive data %p back from the task %d which executed the codelet with prio %d...\n", data, xrank, prio); if(data_tag == -1) _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); int ret = starpu_mpi_irecv_detached_prio(data, xrank, data_tag, prio, comm, NULL, NULL); if (ret) return ret; } } else if (do_execute) { if(data_tag == -1) _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); _STARPU_MPI_DEBUG(1, "Send data %p back to its owner %d with prio %d...\n", data, mpi_rank, prio); _SEND_DATA(data, mode, mpi_rank, data_tag, prio, comm, NULL, NULL); } } return 0; } static void _starpu_mpi_clear_data_after_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int do_execute) { if (_starpu_cache_enabled) { if ((mode & STARPU_W && !(mode & STARPU_MPI_REDUX)) || mode & STARPU_REDUX) { /* The data has been modified, it MUST be removed from the cache */ starpu_mpi_cached_send_clear(data); starpu_mpi_cached_receive_clear(data); } } else { /* We allocated a temporary buffer for the received data, now drop it */ if ((mode & STARPU_R && !(mode & STARPU_MPI_REDUX)) && do_execute) { int mpi_rank = starpu_mpi_data_get_rank(data); if (mpi_rank == STARPU_MPI_PER_NODE) { mpi_rank = me; } if (mpi_rank != me && mpi_rank != -1) { starpu_data_invalidate_submit(data); } } } } static int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nodes, int *xrank, int *do_execute, struct starpu_data_descr **descrs_p, int *nb_data_p, int *prio_p, va_list varg_list) { /* XXX: _fstarpu_mpi_task_decode_v needs to be updated at the same time */ va_list varg_list_copy; int inconsistent_execute = 0; int arg_type; int node_selected = 0; int nb_allocated_data = 16; struct starpu_data_descr *descrs; int nb_data; int prio = 0; int select_node_policy = STARPU_MPI_NODE_SELECTION_CURRENT_POLICY; _STARPU_TRACE_TASK_MPI_DECODE_START(); _STARPU_MPI_MALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); nb_data = 0; *do_execute = -1; *xrank = -1; va_copy(varg_list_copy, varg_list); while ((arg_type = va_arg(varg_list_copy, int)) != 0) { int arg_type_nocommute = arg_type & ~STARPU_COMMUTE; if (arg_type==STARPU_EXECUTE_ON_NODE) { int rank = va_arg(varg_list_copy, int); if (rank != -1) { *xrank = rank; if (node_selected == 0) { _STARPU_MPI_DEBUG(100, "Executing on node %d\n", *xrank); *do_execute = 1; node_selected = 1; inconsistent_execute = 0; } } } else if (arg_type==STARPU_EXECUTE_ON_DATA) { starpu_data_handle_t data = va_arg(varg_list_copy, starpu_data_handle_t); if (node_selected == 0) { *xrank = starpu_mpi_data_get_rank(data); STARPU_ASSERT_MSG(*xrank != -1, "Rank of the data must be set using starpu_mpi_data_register() or starpu_data_set_rank()"); _STARPU_MPI_DEBUG(100, "Executing on data node %d\n", *xrank); STARPU_ASSERT_MSG(*xrank <= nb_nodes, "Node %d to execute codelet is not a valid node (%d)", *xrank, nb_nodes); *do_execute = 1; node_selected = 1; inconsistent_execute = 0; } } else if (arg_type_nocommute & STARPU_R || arg_type_nocommute & STARPU_W || arg_type_nocommute & STARPU_RW || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX || arg_type & STARPU_MPI_REDUX) { starpu_data_handle_t data = va_arg(varg_list_copy, starpu_data_handle_t); enum starpu_data_access_mode mode = (enum starpu_data_access_mode) arg_type; if (node_selected == 0) { int ret = _starpu_mpi_find_executee_node(data, mode, me, do_execute, &inconsistent_execute, xrank); if (ret == -EINVAL) { free(descrs); va_end(varg_list_copy); _STARPU_TRACE_TASK_MPI_DECODE_END(); return ret; } } if (nb_data >= nb_allocated_data) { nb_allocated_data *= 2; _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); } descrs[nb_data].handle = data; descrs[nb_data].mode = mode; nb_data ++; } else if (arg_type == STARPU_DATA_ARRAY) { starpu_data_handle_t *data = va_arg(varg_list_copy, starpu_data_handle_t *); int nb_handles = va_arg(varg_list_copy, int); int i; for(i=0 ; inbuffers == STARPU_VARIABLE_NBUFFERS || nb_data < codelet->nbuffers, "Too many data passed to starpu_mpi_task_insert"); enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(codelet, nb_data); if (node_selected == 0) { int ret = _starpu_mpi_find_executee_node(data[i], mode, me, do_execute, &inconsistent_execute, xrank); if (ret == -EINVAL) { free(descrs); va_end(varg_list_copy); _STARPU_TRACE_TASK_MPI_DECODE_END(); return ret; } } if (nb_data >= nb_allocated_data) { nb_allocated_data *= 2; _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); } descrs[nb_data].handle = data[i]; descrs[nb_data].mode = mode; nb_data ++; } } else if (arg_type == STARPU_DATA_MODE_ARRAY) { struct starpu_data_descr *_descrs = va_arg(varg_list_copy, struct starpu_data_descr*); int nb_handles = va_arg(varg_list_copy, int); int i; for(i=0 ; i= nb_allocated_data) { nb_allocated_data *= 2; _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); } descrs[nb_data].handle = _descrs[i].handle; descrs[nb_data].mode = mode; nb_data ++; } } else if (arg_type==STARPU_VALUE) { (void)va_arg(varg_list_copy, void *); (void)va_arg(varg_list_copy, size_t); } else if (arg_type==STARPU_CL_ARGS) { (void)va_arg(varg_list_copy, void *); (void)va_arg(varg_list_copy, size_t); } else if (arg_type==STARPU_CL_ARGS_NFREE) { (void)va_arg(varg_list_copy, void *); (void)va_arg(varg_list_copy, size_t); } else if (arg_type==STARPU_TASK_DEPS_ARRAY) { (void)va_arg(varg_list_copy, unsigned); (void)va_arg(varg_list_copy, struct starpu_task **); } else if (arg_type==STARPU_TASK_END_DEPS_ARRAY) { (void)va_arg(varg_list_copy, unsigned); (void)va_arg(varg_list_copy, struct starpu_task **); } else if (arg_type==STARPU_CALLBACK) { (void)va_arg(varg_list_copy, _starpu_callback_func_t); } else if (arg_type==STARPU_CALLBACK_WITH_ARG) { (void)va_arg(varg_list_copy, _starpu_callback_func_t); (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_CALLBACK_WITH_ARG_NFREE) { (void)va_arg(varg_list_copy, _starpu_callback_func_t); (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_CALLBACK_ARG) { (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_CALLBACK_ARG_NFREE) { (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_EPILOGUE_CALLBACK) { (void)va_arg(varg_list_copy, _starpu_callback_func_t); } else if (arg_type==STARPU_EPILOGUE_CALLBACK_ARG) { (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_PRIORITY) { prio = va_arg(varg_list_copy, int); } /* STARPU_EXECUTE_ON_NODE handled above */ /* STARPU_EXECUTE_ON_DATA handled above */ /* STARPU_DATA_ARRAY handled above */ /* STARPU_DATA_MODE_ARRAY handled above */ else if (arg_type==STARPU_TAG) { (void)va_arg(varg_list_copy, starpu_tag_t); } else if (arg_type==STARPU_HYPERVISOR_TAG) { (void)va_arg(varg_list_copy, int); } else if (arg_type==STARPU_FLOPS) { (void)va_arg(varg_list_copy, double); } else if (arg_type==STARPU_SCHED_CTX) { (void)va_arg(varg_list_copy, unsigned); } else if (arg_type==STARPU_PROLOGUE_CALLBACK) { (void)va_arg(varg_list_copy, _starpu_callback_func_t); } else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG) { (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG_NFREE) { (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP) { (void)va_arg(varg_list_copy, _starpu_callback_func_t); } else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG) { (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE) { (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_EXECUTE_WHERE) { // the flag is decoded and set later when // calling function _starpu_task_insert_create() (void)va_arg(varg_list_copy, unsigned long long); } else if (arg_type==STARPU_EXECUTE_ON_WORKER) { // the flag is decoded and set later when // calling function _starpu_task_insert_create() (void)va_arg(varg_list_copy, int); } else if (arg_type==STARPU_TAG_ONLY) { (void)va_arg(varg_list_copy, starpu_tag_t); } else if (arg_type==STARPU_NAME) { (void)va_arg(varg_list_copy, const char *); } else if (arg_type==STARPU_POSSIBLY_PARALLEL) { (void)va_arg(varg_list_copy, unsigned); } else if (arg_type==STARPU_WORKER_ORDER) { // the flag is decoded and set later when // calling function _starpu_task_insert_create() (void)va_arg(varg_list_copy, unsigned); } else if (arg_type==STARPU_NODE_SELECTION_POLICY) { select_node_policy = va_arg(varg_list_copy, int); } else if (arg_type==STARPU_TASK_COLOR) { (void)va_arg(varg_list_copy, int); } else if (arg_type==STARPU_TASK_SYNCHRONOUS) { (void)va_arg(varg_list_copy, int); } else if (arg_type==STARPU_TRANSACTION) { (void)va_arg(varg_list_copy, struct starpu_transaction *); } else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY) { (void)va_arg(varg_list_copy, char *); } #ifdef STARPU_BUBBLE else if (arg_type==STARPU_BUBBLE_FUNC) { STARPU_ASSERT_MSG(0, "Bubbles + MPI not supported yet\n"); (void)va_arg(varg_list,void*); } else if (arg_type==STARPU_BUBBLE_FUNC_ARG) { (void)va_arg(varg_list,void*); } else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC) { (void)va_arg(varg_list,void*); } else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC_ARG) { (void)va_arg(varg_list,void*); } #endif else if (arg_type==STARPU_TASK_END_DEP) { (void)va_arg(varg_list_copy, int); } else if (arg_type==STARPU_TASK_WORKERIDS) { (void)va_arg(varg_list_copy, unsigned); (void)va_arg(varg_list_copy, uint32_t*); } else if (arg_type==STARPU_SEQUENTIAL_CONSISTENCY) { (void)va_arg(varg_list_copy, unsigned); } else if (arg_type==STARPU_TASK_PROFILING_INFO) { (void)va_arg(varg_list_copy, struct starpu_profiling_task_info *); } else if (arg_type==STARPU_TASK_NO_SUBMITORDER) { (void)va_arg(varg_list_copy, unsigned); } else if (arg_type==STARPU_TASK_SCHED_DATA) { (void)va_arg(varg_list_copy, void *); } else if (arg_type==STARPU_TASK_FILE) { (void)va_arg(varg_list_copy, const char *); } else if (arg_type==STARPU_TASK_LINE) { (void)va_arg(varg_list_copy, int); } else { STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type); } } va_end(varg_list_copy); if (inconsistent_execute == 1 || *xrank == -1) { // We need to find out which node is going to execute the codelet. _STARPU_MPI_DEBUG(100, "Different nodes are owning W data. The node to execute the codelet is going to be selected with the current selection node policy. See starpu_mpi_node_selection_set_current_policy() to change the policy, or use STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA to specify the node\n"); *xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy); *do_execute = *xrank == STARPU_MPI_PER_NODE || (me == *xrank); } else { _STARPU_MPI_DEBUG(100, "Inconsistent=%d - xrank=%d\n", inconsistent_execute, *xrank); *do_execute = *xrank == STARPU_MPI_PER_NODE || (me == *xrank); } _STARPU_MPI_DEBUG(100, "do_execute=%d\n", *do_execute); *descrs_p = descrs; *nb_data_p = nb_data; *prio_p = prio; _STARPU_TRACE_TASK_MPI_DECODE_END(); return 0; } static int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, struct starpu_task **task, int *xrank_p, struct starpu_data_descr **descrs_p, int *nb_data_p, int *prio_p, va_list varg_list) { int me, do_execute, xrank, nb_nodes; int ret; int i; struct starpu_data_descr *descrs = NULL; int nb_data; int prio; _STARPU_MPI_LOG_IN(); starpu_mpi_comm_rank(comm, &me); starpu_mpi_comm_size(comm, &nb_nodes); /* Find out whether we are to execute the data because we own the data to be written to. */ ret = _starpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, varg_list); if (ret < 0) return ret; _STARPU_TRACE_TASK_MPI_PRE_START(); /* Send and receive data as requested */ for(i=0 ; impi_data) { char *redux_map = starpu_mpi_data_get_redux_map(descrs[i].handle); if (redux_map != NULL && descrs[i].mode & STARPU_R && descrs[i].mode & ~ STARPU_REDUX && descrs[i].mode & ~ STARPU_MPI_REDUX) { _starpu_mpi_redux_wrapup_data(descrs[i].handle); } } _starpu_mpi_exchange_data_before_execution(descrs[i].handle, descrs[i].mode, me, xrank, do_execute, prio, comm); } if (xrank_p) *xrank_p = xrank; if (nb_data_p) *nb_data_p = nb_data; if (prio_p) *prio_p = prio; if (descrs_p) *descrs_p = descrs; else free(descrs); if (do_execute == 1) { va_list varg_list_copy; _STARPU_MPI_DEBUG(100, "Execution of the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL); *task = starpu_task_create(); (*task)->cl_arg_free = 1; (*task)->callback_arg_free = 1; (*task)->prologue_callback_arg_free = 1; (*task)->prologue_callback_pop_arg_free = 1; va_copy(varg_list_copy, varg_list); _starpu_task_insert_create(codelet, *task, varg_list_copy); va_end(varg_list_copy); if ((*task)->cl) { /* we suppose the current context is not going to change between now and the execution of the task */ (*task)->sched_ctx = _starpu_sched_ctx_get_current_context(); /* Check the type of worker(s) required by the task exist */ if (STARPU_UNLIKELY(!_starpu_worker_exists(*task))) { _STARPU_MPI_DEBUG(0, "There is no worker to execute the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL); return -ENODEV; } /* In case we require that a task should be explicitly * executed on a specific worker, we make sure that the worker * is able to execute this task. */ if (STARPU_UNLIKELY((*task)->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task((*task)->workerid, *task, 0))) { _STARPU_MPI_DEBUG(0, "The specified worker %d cannot execute the codelet %p (%s)\n", (*task)->workerid, codelet, codelet?codelet->name:NULL); return -ENODEV; } } } _STARPU_TRACE_TASK_MPI_PRE_END(); return do_execute; } int _starpu_mpi_task_postbuild_v(MPI_Comm comm, int xrank, int do_execute, struct starpu_data_descr *descrs, int nb_data, int prio) { int me, i; _STARPU_TRACE_TASK_MPI_POST_START(); starpu_mpi_comm_rank(comm, &me); for(i=0 ; impi_data; int rrank = starpu_mpi_data_get_rank(descrs[i].handle); int size; starpu_mpi_comm_size(comm, &size); if (mpi_data->redux_map == NULL) { _STARPU_CALLOC(mpi_data->redux_map, size, sizeof(mpi_data->redux_map[0])); } mpi_data->redux_map [xrank] = 1; mpi_data->redux_map [rrank] = 1; int outside_owner = 0; int j; for (j = 0; j < size; j++) { if (mpi_data->redux_map[j] && j != rrank) { outside_owner = 1; break; } } if (outside_owner) { struct _starpu_redux_data_entry *entry; HASH_FIND_PTR(_redux_data, &descrs[i].handle, entry); if (entry == NULL) { _STARPU_MPI_MALLOC(entry, sizeof(*entry)); starpu_data_handle_t data_handle = descrs[i].handle; entry->data_handle = data_handle; HASH_ADD_PTR(_redux_data, data_handle, entry); } } } _starpu_mpi_exchange_data_after_execution(descrs[i].handle, descrs[i].mode, me, xrank, do_execute, prio, comm); _starpu_mpi_clear_data_after_execution(descrs[i].handle, descrs[i].mode, me, do_execute); } _STARPU_TRACE_TASK_MPI_POST_END(); _STARPU_MPI_LOG_OUT(); return 0; } static int _starpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list) { struct starpu_task *task; int ret; int xrank; int do_execute = 0; struct starpu_data_descr *descrs; int nb_data; int prio; ret = _starpu_mpi_task_build_v(comm, codelet, &task, &xrank, &descrs, &nb_data, &prio, varg_list); if (ret < 0) return ret; if (ret == 1) { do_execute = 1; ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { _STARPU_MSG("submission of task %p with codelet %p failed (symbol `%s') (err: ENODEV)\n", task, task->cl, (codelet == NULL) ? "none" : task->cl->name ? task->cl->name : (task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none"); task->destroy = 0; starpu_task_destroy(task); free(descrs); return -ENODEV; } } int val = _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio); free(descrs); if (ret == 1) _starpu_mpi_pre_submit_hook_call(task); return val; } #undef starpu_mpi_task_insert int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...) { va_list varg_list; int ret; va_start(varg_list, codelet); ret = _starpu_mpi_task_insert_v(comm, codelet, varg_list); va_end(varg_list); return ret; } #undef starpu_mpi_insert_task int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...) { va_list varg_list; int ret; va_start(varg_list, codelet); ret = _starpu_mpi_task_insert_v(comm, codelet, varg_list); va_end(varg_list); return ret; } #undef starpu_mpi_task_build struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...) { va_list varg_list; struct starpu_task *task; int ret; va_start(varg_list, codelet); ret = _starpu_mpi_task_build_v(comm, codelet, &task, NULL, NULL, NULL, NULL, varg_list); va_end(varg_list); return (ret == 1 || ret == -ENODEV) ? task : NULL; } struct starpu_task *starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list) { struct starpu_task *task; int ret; ret = _starpu_mpi_task_build_v(comm, codelet, &task, NULL, NULL, NULL, NULL, varg_list); return (ret == 1 || ret == -ENODEV) ? task : NULL; } int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...) { int xrank, do_execute; int ret, me, nb_nodes; va_list varg_list; struct starpu_data_descr *descrs; int nb_data; int prio; starpu_mpi_comm_rank(comm, &me); starpu_mpi_comm_size(comm, &nb_nodes); va_start(varg_list, codelet); /* Find out whether we are to execute the data because we own the data to be written to. */ ret = _starpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, varg_list); va_end(varg_list); if (ret < 0) return ret; ret = _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio); free(descrs); return ret; } int starpu_mpi_task_post_build_v(MPI_Comm comm, struct starpu_codelet *codelet, va_list varg_list) { int xrank, do_execute; int ret, me, nb_nodes; struct starpu_data_descr *descrs; int nb_data; int prio; starpu_mpi_comm_rank(comm, &me); starpu_mpi_comm_size(comm, &nb_nodes); /* Find out whether we are to execute the data because we own the data to be written to. */ ret = _starpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, varg_list); if (ret < 0) return ret; ret = _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio); free(descrs); return ret; } int starpu_mpi_task_exchange_data_before_execution(MPI_Comm comm, struct starpu_task *task, struct starpu_data_descr *descrs, struct starpu_mpi_task_exchange_params *params) { int me, nb_nodes, inconsistent_execute; unsigned i; int select_node_policy = STARPU_MPI_NODE_SELECTION_CURRENT_POLICY; unsigned nb_data; nb_data = STARPU_TASK_GET_NBUFFERS(task); starpu_mpi_comm_rank(comm, &me); starpu_mpi_comm_size(comm, &nb_nodes); params->xrank = -1; inconsistent_execute = 0; for(i=0 ; ido_execute), &inconsistent_execute, &(params->xrank)); if (ret == -EINVAL) { return ret; } } if (inconsistent_execute == 1 || params->xrank == -1) { // We need to find out which node is going to execute the codelet. _STARPU_MPI_DEBUG(100, "Different nodes are owning W data. The node to execute the codelet is going to be selected with the current selection node policy. See starpu_mpi_node_selection_set_current_policy() to change the policy, or use STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA to specify the node\n"); params->xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy); params->do_execute = (params->xrank == STARPU_MPI_PER_NODE) || (me == params->xrank); } else { _STARPU_MPI_DEBUG(100, "Inconsistent=%d - xrank=%d\n", inconsistent_execute, params->xrank); params->do_execute = (params->xrank == STARPU_MPI_PER_NODE) || (me == params->xrank); } for(i=0 ; ixrank, params->do_execute, task->priority, comm); } params->priority = task->priority; return 0; } int starpu_mpi_task_exchange_data_after_execution(MPI_Comm comm, struct starpu_data_descr *descrs, unsigned nb_data, struct starpu_mpi_task_exchange_params params) { return _starpu_mpi_task_postbuild_v(comm, params.xrank, params.do_execute, descrs, nb_data, params.priority); } struct starpu_codelet _starpu_mpi_redux_data_synchro_cl = { .where = STARPU_NOWHERE, .modes = {STARPU_R, STARPU_W}, .nbuffers = 2 }; struct _starpu_mpi_redux_data_args { starpu_data_handle_t data_handle; starpu_data_handle_t new_handle; starpu_mpi_tag_t data_tag; int node; MPI_Comm comm; struct starpu_task *taskB; long taskC_jobid; }; void _starpu_mpi_redux_fill_post_sync_jobid(const void * const redux_data_args, long * const post_sync_jobid) { *post_sync_jobid = ((const struct _starpu_mpi_redux_data_args *) redux_data_args)->taskC_jobid; } int starpu_mpi_redux_data_prio_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int prio, int arity) { int me, rank, nb_nodes; starpu_mpi_tag_t data_tag; rank = starpu_mpi_data_get_rank(data_handle); data_tag = starpu_mpi_data_get_tag(data_handle); struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (rank == -1) { _STARPU_ERROR("StarPU needs to be told the MPI rank of this data, using starpu_mpi_data_register\n"); } if (data_tag == -1) { _STARPU_ERROR("StarPU needs to be told the MPI tag of this data, using starpu_mpi_data_register\n"); } if (mpi_data->redux_map == NULL) { _STARPU_MPI_DEBUG(5, "I do not contribute to this reduction\n"); return 0; } starpu_mpi_comm_rank(comm, &me); starpu_mpi_comm_size(comm, &nb_nodes); struct _starpu_redux_data_entry *entry; HASH_FIND_PTR(_redux_data, &data_handle, entry); #ifdef STARPU_MPI_VERBOSE int current_level=0; #endif int nb_contrib, next_nb_contrib; int i, j, step, node; char root_in_step, me_in_step; // https://stackoverflow.com/questions/109023/how-to-count-the-number-of-set-bits-in-a-32-bit-integer // https://stackoverflow.com/a/109025 // see hamming weight //nb_contrib = std::popcount(mpi_data->redux_map); // most preferable nb_contrib=0; for (i=0;iredux_map[%d] = %d\n", i, mpi_data->redux_map[i]); if (mpi_data->redux_map[i]) nb_contrib++; } if (nb_contrib < 2) { _STARPU_MPI_DEBUG(5, "Not enough contributors to create a n-ary reduction tree.\n"); /* duplicated at the end of this function */ if (entry != NULL) { HASH_DEL(_redux_data, entry); free(entry); } free(mpi_data->redux_map); mpi_data->redux_map = NULL; return 0; } if (arity < 2) { arity = nb_contrib; } arity = STARPU_MIN(arity,nb_contrib); _STARPU_MPI_DEBUG(5, "There is %d contributors\n", nb_contrib); int contributors[nb_contrib]; int reducing_node; j=0; for (i=0;iredux_map[i]); if (mpi_data->redux_map[i]) { contributors[j++] = i; } } for (i=0;iredux_cl, STARPU_RW|STARPU_COMMUTE, data_handle, STARPU_R, new_handle, STARPU_PRIORITY, prio, STARPU_NAME, "redux_prio_tree_redux_cl", 0); if (ret) return ret; starpu_data_unregister_submit(new_handle); } } } else if (me_in_step) { _STARPU_MPI_DEBUG(5, "Sending redux handle to %d ...\n", reducing_node); int ret = starpu_mpi_isend_detached_prio(data_handle, reducing_node, data_tag, prio, comm, NULL, NULL); if (ret) return ret; starpu_data_invalidate_submit(data_handle); } contributors[step] = reducing_node; } nb_contrib = next_nb_contrib; #ifdef STARPU_MPI_VERBOSE current_level++; #endif } /* duplicated when not enough contributors */ if (entry != NULL) { HASH_DEL(_redux_data, entry); free(entry); } free(mpi_data->redux_map); mpi_data->redux_map = NULL; return 0; } int starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle) { return starpu_mpi_redux_data_prio(comm, data_handle, 0); } int starpu_mpi_redux_data_tree(MPI_Comm comm, starpu_data_handle_t data_handle, int arity) { return starpu_mpi_redux_data_prio_tree(comm, data_handle, 0, arity); } int starpu_mpi_redux_data_prio(MPI_Comm comm, starpu_data_handle_t data_handle, int prio) { int nb_nodes, nb_contrib, i; struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; if (mpi_data->redux_map == NULL) { _STARPU_MPI_DEBUG(5, "I do not contribute to this reduction\n"); return 0; } starpu_mpi_comm_size(comm, &nb_nodes); nb_contrib=0; for (i=0;iredux_map[i]) { nb_contrib++; } } return starpu_mpi_redux_data_prio_tree(comm, data_handle, prio, nb_contrib); } void _starpu_mpi_redux_wrapup_data(starpu_data_handle_t data_handle) { // We could check if the handle makes sense but we do not because it helps the programmer using coherent // distributed-memory reduction patterns size_t data_size = starpu_data_get_size(data_handle); // Small data => flat tree | binary tree int _starpu_mpi_redux_threshold = starpu_getenv_number_default("STARPU_MPI_REDUX_ARITY_THRESHOLD", 1024); int _starpu_mpi_redux_tree_size = 2; if (_starpu_mpi_redux_threshold < 0 || (_starpu_mpi_redux_threshold > 0 && data_size < (size_t) _starpu_mpi_redux_threshold)) { _starpu_mpi_redux_tree_size = STARPU_MAXNODES; } struct _starpu_mpi_data *mpi_data = data_handle->mpi_data; struct _starpu_redux_data_entry *entry; HASH_FIND_PTR(_redux_data, &data_handle, entry); if (entry != NULL) { starpu_mpi_redux_data_tree(mpi_data->node_tag.node.comm,data_handle,_starpu_mpi_redux_tree_size); } return; } void _starpu_mpi_redux_wrapup_data_all() { struct _starpu_redux_data_entry *entry = NULL, *tmp = NULL; HASH_ITER(hh, _redux_data, entry, tmp) { _starpu_mpi_redux_wrapup_data(entry->data_handle); } return; } starpu-1.4.9+dfsg/mpi/src/starpu_mpi_task_insert.h000066400000000000000000000027711507764646700223350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_MPI_TASK_INSERT_H__ #define __STARPU_MPI_TASK_INSERT_H__ /** @file */ #ifdef __cplusplus extern "C" { #endif int _starpu_mpi_find_executee_node(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int *do_execute, int *inconsistent_execute, int *xrank); int _starpu_mpi_exchange_data_before_execution(starpu_data_handle_t data, enum starpu_data_access_mode mode, int me, int xrank, int do_execute, int prio, MPI_Comm comm); int _starpu_mpi_task_postbuild_v(MPI_Comm comm, int xrank, int do_execute, struct starpu_data_descr *descrs, int nb_data, int prio); void _starpu_mpi_redux_wrapup_data_all(); void _starpu_mpi_redux_wrapup_data(starpu_data_handle_t data_handle); void _starpu_mpi_pre_submit_hook_call(struct starpu_task *task); #ifdef __cplusplus } #endif #endif /* __STARPU_MPI_TASK_INSERT_H__ */ starpu-1.4.9+dfsg/mpi/src/starpu_mpi_task_insert_fortran.c000066400000000000000000000375421507764646700240670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_MPI_COMM_F2C static int _fstarpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nodes, int *xrank, int *do_execute, struct starpu_data_descr **descrs_p, int *nb_data_p, int *prio_p, void **arglist) { int arg_i = 0; int inconsistent_execute = 0; int node_selected = 0; int nb_allocated_data = 16; struct starpu_data_descr *descrs; int nb_data; int prio = 0; int select_node_policy = STARPU_MPI_NODE_SELECTION_CURRENT_POLICY; _STARPU_TRACE_TASK_MPI_DECODE_START(); _STARPU_MPI_MALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); nb_data = 0; *do_execute = -1; *xrank = -1; while (arglist[arg_i] != NULL) { int arg_type = (int)(intptr_t)arglist[arg_i]; int arg_type_nocommute = arg_type & ~STARPU_COMMUTE; if (arg_type==STARPU_EXECUTE_ON_NODE) { arg_i++; int rank = *(int *)arglist[arg_i]; if (rank != -1) { *xrank = rank; if (node_selected == 0) { _STARPU_MPI_DEBUG(100, "Executing on node %d\n", *xrank); *do_execute = 1; node_selected = 1; inconsistent_execute = 0; } } } else if (arg_type==STARPU_EXECUTE_ON_DATA) { arg_i++; starpu_data_handle_t data = arglist[arg_i]; if (node_selected == 0) { *xrank = starpu_mpi_data_get_rank(data); STARPU_ASSERT_MSG(*xrank != -1, "Rank of the data must be set using starpu_mpi_data_register() or starpu_data_set_rank()"); _STARPU_MPI_DEBUG(100, "Executing on data node %d\n", *xrank); STARPU_ASSERT_MSG(*xrank <= nb_nodes, "Node %d to execute codelet is not a valid node (%d)", *xrank, nb_nodes); *do_execute = 1; node_selected = 1; inconsistent_execute = 0; } } else if (arg_type_nocommute & STARPU_R || arg_type_nocommute & STARPU_W || arg_type_nocommute & STARPU_RW || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX || arg_type & STARPU_MPI_REDUX) { arg_i++; starpu_data_handle_t data = arglist[arg_i]; enum starpu_data_access_mode mode = (enum starpu_data_access_mode) arg_type; if (node_selected == 0) { int ret = _starpu_mpi_find_executee_node(data, mode, me, do_execute, &inconsistent_execute, xrank); if (ret == -EINVAL) { free(descrs); _STARPU_TRACE_TASK_MPI_DECODE_END(); return ret; } } if (nb_data >= nb_allocated_data) { nb_allocated_data *= 2; _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); } descrs[nb_data].handle = data; descrs[nb_data].mode = mode; nb_data ++; } else if (arg_type == STARPU_DATA_ARRAY) { arg_i++; starpu_data_handle_t *data = arglist[arg_i]; arg_i++; int nb_handles = *(int *)arglist[arg_i]; int i; for(i=0 ; inbuffers == STARPU_VARIABLE_NBUFFERS || nb_data < codelet->nbuffers, "Too many data passed to starpu_mpi_task_insert"); enum starpu_data_access_mode mode = STARPU_CODELET_GET_MODE(codelet, nb_data); if (node_selected == 0) { int ret = _starpu_mpi_find_executee_node(data[i], mode, me, do_execute, &inconsistent_execute, xrank); if (ret == -EINVAL) { free(descrs); _STARPU_TRACE_TASK_MPI_DECODE_END(); return ret; } } if (nb_data >= nb_allocated_data) { nb_allocated_data *= 2; _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); } descrs[nb_data].handle = data[i]; descrs[nb_data].mode = mode; nb_data ++; } } else if (arg_type == STARPU_DATA_MODE_ARRAY) { arg_i++; struct starpu_data_descr *_descrs = arglist[arg_i]; arg_i++; int nb_handles = *(int *)arglist[arg_i]; int i; for(i=0 ; i= nb_allocated_data) { nb_allocated_data *= 2; _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); } descrs[nb_data].handle = _descrs[i].handle; descrs[nb_data].mode = mode; nb_data ++; } } else if (arg_type==STARPU_VALUE) { arg_i++; /* void* */ arg_i++; /* size_t */ } else if (arg_type==STARPU_CL_ARGS) { arg_i++; /* void* */ arg_i++; /* size_t */ } else if (arg_type==STARPU_CL_ARGS_NFREE) { arg_i++; /* void* */ arg_i++; /* size_t */ } else if (arg_type==STARPU_TASK_DEPS_ARRAY) { arg_i++; /* unsigned */ arg_i++; /* struct starpu_task ** */ } else if (arg_type==STARPU_TASK_END_DEPS_ARRAY) { arg_i++; /* unsigned */ arg_i++; /* struct starpu_task ** */ } else if (arg_type==STARPU_CALLBACK) { arg_i++; /* _starpu_callback_func_t */ } else if (arg_type==STARPU_CALLBACK_WITH_ARG) { arg_i++; /* _starpu_callback_func_t */ arg_i++; /* void* */ } else if (arg_type==STARPU_CALLBACK_WITH_ARG_NFREE) { arg_i++; /* _starpu_callback_func_t */ arg_i++; /* void* */ } else if (arg_type==STARPU_CALLBACK_ARG) { arg_i++; /* void* */ } else if (arg_type==STARPU_CALLBACK_ARG_NFREE) { arg_i++; /* void* */ } else if (arg_type==STARPU_EPILOGUE_CALLBACK) { arg_i++; /* _starpu_callback_func_t */ } else if (arg_type==STARPU_EPILOGUE_CALLBACK_ARG) { arg_i++; /* void* */ } else if (arg_type==STARPU_PRIORITY) { arg_i++; prio = *(int *)arglist[arg_i]; /* int* */ } /* STARPU_EXECUTE_ON_NODE handled above */ /* STARPU_EXECUTE_ON_DATA handled above */ /* STARPU_DATA_ARRAY handled above */ /* STARPU_DATA_MODE_ARRAY handled above */ else if (arg_type==STARPU_TAG) { arg_i++; /* starpu_tag_t* */ } else if (arg_type==STARPU_HYPERVISOR_TAG) { arg_i++; /* int* */ } else if (arg_type==STARPU_FLOPS) { arg_i++; /* double* */ } else if (arg_type==STARPU_SCHED_CTX) { arg_i++; /* unsigned* */ } else if (arg_type==STARPU_PROLOGUE_CALLBACK) { arg_i++; /* _starpu_callback_func_t */ } else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG) { arg_i++; /* void* */ } else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG_NFREE) { arg_i++; /* void* */ } else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP) { arg_i++; /* _starpu_callback_func_t */ } else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG) { arg_i++; /* void* */ } else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE) { arg_i++; /* void* */ } else if (arg_type==STARPU_EXECUTE_WHERE) { arg_i++; /* int* */ } else if (arg_type==STARPU_EXECUTE_ON_WORKER) { arg_i++; /* int* */ } else if (arg_type==STARPU_TAG_ONLY) { arg_i++; /* starpu_tag_t* */ } else if (arg_type==STARPU_NAME) { arg_i++; /* char* */ } else if (arg_type==STARPU_POSSIBLY_PARALLEL) { arg_i++; /* unsigned* */ } else if (arg_type==STARPU_WORKER_ORDER) { arg_i++; /* unsigned* */ } else if (arg_type==STARPU_NODE_SELECTION_POLICY) { arg_i++; /* int* */ } else if (arg_type==STARPU_TASK_COLOR) { arg_i++; /* int* */ } else if (arg_type==STARPU_TASK_SYNCHRONOUS) { arg_i++; /* int* */ } else if (arg_type==STARPU_TRANSACTION) { arg_i++; /* struct starpu_transaction * */ } else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY) { arg_i++; /* char* */ } #ifdef STARPU_BUBBLE else if (arg_type==STARPU_BUBBLE_FUNC) { STARPU_ASSERT_MSG(0, "Bubbles + MPI not supported yet\n"); arg_i++; } else if (arg_type==STARPU_BUBBLE_FUNC_ARG) { arg_i++; } else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC) { arg_i++; } else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC_ARG) { arg_i++; } #endif else if (arg_type==STARPU_TASK_END_DEP) { arg_i++; /* int */ } else if (arg_type==STARPU_TASK_WORKERIDS) { arg_i++; /* unsigned */ arg_i++; /* uint32_t* */ } else if (arg_type==STARPU_SEQUENTIAL_CONSISTENCY) { arg_i++; /* unsigned */ } else if (arg_type==STARPU_TASK_PROFILING_INFO) { arg_i++; /* struct starpu_profiling_task_info * */ } else if (arg_type==STARPU_TASK_NO_SUBMITORDER) { arg_i++; /* unsigned */ } else if (arg_type==STARPU_TASK_SCHED_DATA) { arg_i++; /* void * */ } else if (arg_type==STARPU_TASK_FILE) { arg_i++; /* char* */ } else if (arg_type==STARPU_TASK_LINE) { arg_i++; /* int */ } else { STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type); } arg_i++; } if (inconsistent_execute == 1 || *xrank == -1) { // We need to find out which node is going to execute the codelet. _STARPU_MPI_DISP("Different nodes are owning W data. The node to execute the codelet is going to be selected with the current selection node policy. See starpu_mpi_node_selection_set_current_policy() to change the policy, or use STARPU_EXECUTE_ON_NODE or STARPU_EXECUTE_ON_DATA to specify the node\n"); *xrank = _starpu_mpi_select_node(me, nb_nodes, descrs, nb_data, select_node_policy); *do_execute = *xrank == STARPU_MPI_PER_NODE || (me == *xrank); } else { _STARPU_MPI_DEBUG(100, "Inconsistent=%d - xrank=%d\n", inconsistent_execute, *xrank); *do_execute = *xrank == STARPU_MPI_PER_NODE || (me == *xrank); } _STARPU_MPI_DEBUG(100, "do_execute=%d\n", *do_execute); *descrs_p = descrs; *nb_data_p = nb_data; *prio_p = prio; _STARPU_TRACE_TASK_MPI_DECODE_END(); return 0; } static int _fstarpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, struct starpu_task **task, int *xrank_p, struct starpu_data_descr **descrs_p, int *nb_data_p, int *prio_p, void **arglist) { int me, do_execute, xrank, nb_nodes; int ret; int i; struct starpu_data_descr *descrs; int nb_data; int prio; _STARPU_MPI_LOG_IN(); starpu_mpi_comm_rank(comm, &me); starpu_mpi_comm_size(comm, &nb_nodes); /* Find out whether we are to execute the data because we own the data to be written to. */ ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, arglist); if (ret < 0) return ret; _STARPU_TRACE_TASK_MPI_PRE_START(); /* Send and receive data as requested */ for(i=0 ; impi_data) { char *redux_map = starpu_mpi_data_get_redux_map(descrs[i].handle); if (redux_map != NULL && descrs[i].mode & STARPU_R && descrs[i].mode & ~ STARPU_REDUX && descrs[i].mode & ~ STARPU_MPI_REDUX) { _starpu_mpi_redux_wrapup_data(descrs[i].handle); } } _starpu_mpi_exchange_data_before_execution(descrs[i].handle, descrs[i].mode, me, xrank, do_execute, prio, comm); } if (xrank_p) *xrank_p = xrank; if (nb_data_p) *nb_data_p = nb_data; if (prio_p) *prio_p = prio; if (descrs_p) *descrs_p = descrs; else free(descrs); if (do_execute == 1) { _STARPU_MPI_DEBUG(100, "Execution of the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL); *task = starpu_task_create(); (*task)->cl_arg_free = 1; (*task)->callback_arg_free = 1; (*task)->prologue_callback_arg_free = 1; (*task)->prologue_callback_pop_arg_free = 1; _fstarpu_task_insert_create(codelet, *task, arglist); if ((*task)->cl) { /* we suppose the current context is not going to change between now and the execution of the task */ (*task)->sched_ctx = _starpu_sched_ctx_get_current_context(); /* Check the type of worker(s) required by the task exist */ if (STARPU_UNLIKELY(!_starpu_worker_exists(*task))) { _STARPU_MPI_DEBUG(0, "There is no worker to execute the codelet %p (%s)\n", codelet, codelet?codelet->name:NULL); return -ENODEV; } /* In case we require that a task should be explicitly * executed on a specific worker, we make sure that the worker * is able to execute this task. */ if (STARPU_UNLIKELY((*task)->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task((*task)->workerid, *task, 0))) { _STARPU_MPI_DEBUG(0, "The specified worker %d cannot execute the codelet %p (%s)\n", (*task)->workerid, codelet, codelet?codelet->name:NULL); return -ENODEV; } } } _STARPU_TRACE_TASK_MPI_PRE_END(); return do_execute; } static int _fstarpu_mpi_task_insert_v(MPI_Comm comm, struct starpu_codelet *codelet, void **arglist) { struct starpu_task *task; int ret; int xrank; int do_execute = 0; struct starpu_data_descr *descrs; int nb_data; int prio; ret = _fstarpu_mpi_task_build_v(comm, codelet, &task, &xrank, &descrs, &nb_data, &prio, arglist); if (ret < 0) return ret; if (ret == 1) { do_execute = 1; ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { _STARPU_MSG("submission of task %p with codelet %p failed (symbol `%s') (err: ENODEV)\n", task, task->cl, (codelet == NULL) ? "none" : task->cl->name ? task->cl->name : (task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none"); task->destroy = 0; starpu_task_destroy(task); free(descrs); return -ENODEV; } } int val = _starpu_mpi_task_postbuild_v(comm, xrank, do_execute, descrs, nb_data, prio); free(descrs); if (ret == 1) _starpu_mpi_pre_submit_hook_call(task); return val; } void fstarpu_mpi_task_insert(void **arglist) { MPI_Fint comm = *((MPI_Fint *)arglist[0]); struct starpu_codelet *codelet = arglist[1]; if (codelet == NULL) { STARPU_ABORT_MSG("task without codelet"); } int ret; ret = _fstarpu_mpi_task_insert_v(MPI_Comm_f2c(comm), codelet, arglist+2); STARPU_ASSERT(ret >= 0); } /* fstarpu_mpi_insert_task: aliased to fstarpu_mpi_task_insert in fstarpu_mpi_mod.f90 */ struct starpu_task *fstarpu_mpi_task_build(void **arglist) { MPI_Fint comm = *((MPI_Fint *)arglist[0]); struct starpu_codelet *codelet = arglist[1]; if (codelet == NULL) { STARPU_ABORT_MSG("task without codelet"); } struct starpu_task *task; int ret; ret = _fstarpu_mpi_task_build_v(MPI_Comm_f2c(comm), codelet, &task, NULL, NULL, NULL, NULL, arglist+2); return (ret == 1 || ret == -ENODEV) ? task : NULL; } void fstarpu_mpi_task_post_build(void **arglist) { MPI_Fint comm = *((MPI_Fint *)arglist[0]); struct starpu_codelet *codelet = arglist[1]; if (codelet == NULL) { STARPU_ABORT_MSG("task without codelet"); } int xrank, do_execute; int ret, me, nb_nodes; struct starpu_data_descr *descrs; int nb_data; int prio; starpu_mpi_comm_rank(MPI_Comm_f2c(comm), &me); starpu_mpi_comm_size(MPI_Comm_f2c(comm), &nb_nodes); /* Find out whether we are to execute the data because we own the data to be written to. */ ret = _fstarpu_mpi_task_decode_v(codelet, me, nb_nodes, &xrank, &do_execute, &descrs, &nb_data, &prio, arglist+2); STARPU_ASSERT(ret >= 0); ret = _starpu_mpi_task_postbuild_v(MPI_Comm_f2c(comm), xrank, do_execute, descrs, nb_data, prio); free(descrs); STARPU_ASSERT(ret >= 0); } #endif /* HAVE_MPI_COMM_F2C */ starpu-1.4.9+dfsg/mpi/tests/000077500000000000000000000000001507764646700157375ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/tests/Makefile.am000066400000000000000000000161461507764646700200030ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Thibaut Lambert # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk SUFFIXES = .hip CC=$(MPICC) CCLD=$(MPICC) include $(top_srcdir)/make/starpu-loader.mk LAUNCHER = $(STARPU_MPIEXEC) LAUNCHER_ENV = $(MPI_RUN_ENV) if STARPU_SIMGRID LOADER_BIN = $(LAUNCHER) endif if STARPU_MPI_CHECK TESTS = $(starpu_mpi_TESTS) endif check_PROGRAMS = $(LOADER) $(starpu_mpi_TESTS) BUILT_SOURCES = CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log EXTRA_DIST = \ helper.h \ user_defined_datatype_value.h examplebindir = $(libdir)/starpu/examples/mpi examplebin_PROGRAMS = AM_CFLAGS += $(APP_CFLAGS) AM_CXXFLAGS += $(APP_CXXFLAGS) AM_FFLAGS += $(APP_FFLAGS) AM_FCFLAGS += $(APP_FCFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/ $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) ######################## # Unit testcases # ######################## starpu_mpi_TESTS = starpu_mpi_TESTS += \ callback \ driver \ early_stuff \ insert_task_block \ insert_task_can_execute \ insert_task_tags \ multiple_send \ policy_register \ policy_register_many \ policy_selection \ star \ stats \ user_defined_datatype \ wait_for_all if !STARPU_SIMGRID starpu_mpi_TESTS += \ attr \ ndim_interface endif if !STARPU_MPI_MINIMAL_TESTS starpu_mpi_TESTS += \ broadcast \ early_request \ gather \ gather2 \ insert_task \ insert_task_count \ insert_task_dyn_handles \ insert_task_node_choice \ insert_task_owner \ insert_task_owner2 \ insert_task_owner_data \ matrix \ matrix2 \ mpi_barrier \ mpi_detached_tag \ mpi_earlyrecv \ mpi_irecv \ mpi_irecv_detached \ mpi_isend \ mpi_isend_detached \ mpi_reduction \ mpi_redux \ mpi_scatter_gather \ mpi_test \ pingpong \ policy_selection2 \ ring \ ring_async \ ring_async_implicit \ ring_sync \ ring_sync_detached \ temporary \ data_cpy \ mpi_data_cpy endif if !STARPU_MPI_MINIMAL_TESTS if !STARPU_SIMGRID # missing support in simgrid starpu_mpi_TESTS += \ display_bindings \ mpi_earlyrecv2 \ mpi_earlyrecv2_sync \ block_interface \ block_interface_pinned \ ndim_interface \ insert_task_compute \ insert_task_sent_cache \ insert_task_recv_cache \ insert_task_seq \ tags_allocate \ tags_checking \ sync \ coop \ coop_datatype \ coop_large \ coop_many \ coop_acknowledgement \ coop_recv_not_yet_posted \ coop_chained_sends \ coop_wrong_order \ coop_without_task \ coop_user_defined_datatype \ coop_recv_wait_finalize \ coop_insert_task \ coop_cache \ mpi_task_submit if STARPU_USE_MPI_MPI starpu_mpi_TESTS += \ load_balancer endif endif # Expected to fail starpu_mpi_TESTS += \ policy_register_toomany \ policy_unregister \ starpu_redefine endif noinst_PROGRAMS += \ datatypes \ pingpong \ mpi_test \ mpi_isend \ mpi_earlyrecv \ mpi_earlyrecv2 \ mpi_earlyrecv2_sync \ mpi_irecv \ mpi_barrier \ mpi_isend_detached \ mpi_irecv_detached \ mpi_detached_tag \ mpi_redux \ ring \ ring_sync \ ring_sync_detached \ ring_async \ ring_async_implicit \ temporary \ data_cpy \ mpi_data_cpy \ early_stuff \ block_interface \ block_interface_pinned \ ndim_interface \ attr \ broadcast \ callback \ matrix \ matrix2 \ insert_task \ insert_task_compute \ insert_task_sent_cache \ insert_task_recv_cache \ insert_task_can_execute \ insert_task_block \ insert_task_owner \ insert_task_owner2 \ insert_task_owner_data \ insert_task_node_choice \ insert_task_count \ insert_task_dyn_handles \ insert_task_seq \ insert_task_tags \ multiple_send \ mpi_scatter_gather \ mpi_reduction \ user_defined_datatype \ tags_allocate \ tags_checking \ star \ stats \ sync \ gather \ gather2 \ policy_register \ policy_register_many \ policy_register_toomany \ policy_unregister \ policy_selection \ policy_selection2 \ early_request \ starpu_redefine \ load_balancer \ driver \ coop \ coop_datatype \ coop_large \ coop_many \ coop_acknowledgement \ coop_recv_not_yet_posted \ coop_chained_sends \ coop_wrong_order \ coop_without_task \ coop_user_defined_datatype \ coop_recv_wait_finalize \ coop_insert_task \ coop_cache \ nothing \ display_bindings \ mpi_task_submit \ wait_for_all if STARPU_USE_MPI_FT noinst_PROGRAMS += \ checkpoints endif STARPU_USE_MPI_FT XFAIL_TESTS= \ policy_register_toomany \ policy_unregister \ starpu_redefine \ nothing ring_SOURCES = ring.c ring_sync_SOURCES = ring_sync.c ring_sync_detached_SOURCES = ring_sync_detached.c ring_async_SOURCES = ring_async.c ring_async_implicit_SOURCES = ring_async_implicit.c insert_task_count_SOURCES = insert_task_count.c if STARPU_USE_CUDA ring_SOURCES += ring_kernel.cu ring_sync_SOURCES += ring_kernel.cu ring_sync_detached_SOURCES += ring_kernel.cu ring_async_SOURCES += ring_kernel.cu ring_async_implicit_SOURCES += ring_kernel.cu insert_task_count_SOURCES += ring_kernel.cu endif if STARPU_USE_HIP ring_SOURCES += ring_kernel_hip.hip ring_sync_SOURCES += ring_kernel_hip.hip ring_sync_detached_SOURCES += ring_kernel_hip.hip ring_async_SOURCES += ring_kernel_hip.hip ring_async_implicit_SOURCES += ring_kernel_hip.hip insert_task_count_SOURCES += ring_kernel_hip.hip endif mpi_reduction_SOURCES = mpi_reduction.c mpi_reduction_SOURCES += mpi_reduction_kernels.c user_defined_datatype_SOURCES = user_defined_datatype.c user_defined_datatype_SOURCES += ../../examples/interface/complex_interface.c mpi_earlyrecv2_SOURCES = mpi_earlyrecv2.c mpi_earlyrecv2_SOURCES += ../../examples/interface/complex_interface.c mpi_earlyrecv2_sync_SOURCES = mpi_earlyrecv2_sync.c mpi_earlyrecv2_sync_SOURCES += ../../examples/interface/complex_interface.c coop_user_defined_datatype_SOURCES = coop_user_defined_datatype.c coop_user_defined_datatype_SOURCES += ../../examples/interface/complex_interface.c early_stuff_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) starpu-1.4.9+dfsg/mpi/tests/Makefile.in000066400000000000000000004447361507764646700200260ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_7) datatypes$(EXEEXT) pingpong$(EXEEXT) \ mpi_test$(EXEEXT) mpi_isend$(EXEEXT) mpi_earlyrecv$(EXEEXT) \ mpi_earlyrecv2$(EXEEXT) mpi_earlyrecv2_sync$(EXEEXT) \ mpi_irecv$(EXEEXT) mpi_barrier$(EXEEXT) \ mpi_isend_detached$(EXEEXT) mpi_irecv_detached$(EXEEXT) \ mpi_detached_tag$(EXEEXT) mpi_redux$(EXEEXT) ring$(EXEEXT) \ ring_sync$(EXEEXT) ring_sync_detached$(EXEEXT) \ ring_async$(EXEEXT) ring_async_implicit$(EXEEXT) \ temporary$(EXEEXT) data_cpy$(EXEEXT) mpi_data_cpy$(EXEEXT) \ early_stuff$(EXEEXT) block_interface$(EXEEXT) \ block_interface_pinned$(EXEEXT) ndim_interface$(EXEEXT) \ attr$(EXEEXT) broadcast$(EXEEXT) callback$(EXEEXT) \ matrix$(EXEEXT) matrix2$(EXEEXT) insert_task$(EXEEXT) \ insert_task_compute$(EXEEXT) insert_task_sent_cache$(EXEEXT) \ insert_task_recv_cache$(EXEEXT) \ insert_task_can_execute$(EXEEXT) insert_task_block$(EXEEXT) \ insert_task_owner$(EXEEXT) insert_task_owner2$(EXEEXT) \ insert_task_owner_data$(EXEEXT) \ insert_task_node_choice$(EXEEXT) insert_task_count$(EXEEXT) \ insert_task_dyn_handles$(EXEEXT) insert_task_seq$(EXEEXT) \ insert_task_tags$(EXEEXT) multiple_send$(EXEEXT) \ mpi_scatter_gather$(EXEEXT) mpi_reduction$(EXEEXT) \ user_defined_datatype$(EXEEXT) tags_allocate$(EXEEXT) \ tags_checking$(EXEEXT) star$(EXEEXT) stats$(EXEEXT) \ sync$(EXEEXT) gather$(EXEEXT) gather2$(EXEEXT) \ policy_register$(EXEEXT) policy_register_many$(EXEEXT) \ policy_register_toomany$(EXEEXT) policy_unregister$(EXEEXT) \ policy_selection$(EXEEXT) policy_selection2$(EXEEXT) \ early_request$(EXEEXT) starpu_redefine$(EXEEXT) \ load_balancer$(EXEEXT) driver$(EXEEXT) coop$(EXEEXT) \ coop_datatype$(EXEEXT) coop_large$(EXEEXT) coop_many$(EXEEXT) \ coop_acknowledgement$(EXEEXT) \ coop_recv_not_yet_posted$(EXEEXT) coop_chained_sends$(EXEEXT) \ coop_wrong_order$(EXEEXT) coop_without_task$(EXEEXT) \ coop_user_defined_datatype$(EXEEXT) \ coop_recv_wait_finalize$(EXEEXT) coop_insert_task$(EXEEXT) \ coop_cache$(EXEEXT) nothing$(EXEEXT) display_bindings$(EXEEXT) \ mpi_task_submit$(EXEEXT) wait_for_all$(EXEEXT) $(am__EXEEXT_8) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader @STARPU_MPI_CHECK_TRUE@TESTS = $(am__EXEEXT_6) check_PROGRAMS = $(am__EXEEXT_6) examplebin_PROGRAMS = @STARPU_SIMGRID_FALSE@am__append_8 = \ @STARPU_SIMGRID_FALSE@ attr \ @STARPU_SIMGRID_FALSE@ ndim_interface @STARPU_MPI_MINIMAL_TESTS_FALSE@am__append_9 = \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ broadcast \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ early_request \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ gather \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ gather2 \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_count \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_dyn_handles \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_node_choice \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner2 \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner_data \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ matrix \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ matrix2 \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_barrier \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_detached_tag \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_earlyrecv \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_irecv \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_irecv_detached \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_isend \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_isend_detached \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_reduction \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_redux \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_scatter_gather \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_test \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ pingpong \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_selection2 \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_async \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_async_implicit \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_sync \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_sync_detached \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ temporary \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ data_cpy \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_data_cpy # missing support in simgrid @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@am__append_10 = \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ display_bindings \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_earlyrecv2 \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_earlyrecv2_sync \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ block_interface \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ block_interface_pinned \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ ndim_interface \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_compute \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_sent_cache \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_recv_cache \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_seq \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ tags_allocate \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ tags_checking \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ sync \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_datatype \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_large \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_many \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_acknowledgement \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_recv_not_yet_posted \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_chained_sends \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_wrong_order \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_without_task \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_user_defined_datatype \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_recv_wait_finalize \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_insert_task \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_cache \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_task_submit @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_TRUE@am__append_11 = \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_TRUE@ load_balancer # Expected to fail @STARPU_MPI_MINIMAL_TESTS_FALSE@am__append_12 = \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_register_toomany \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_unregister \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ starpu_redefine @STARPU_USE_MPI_FT_TRUE@am__append_13 = \ @STARPU_USE_MPI_FT_TRUE@ checkpoints XFAIL_TESTS = policy_register_toomany$(EXEEXT) \ policy_unregister$(EXEEXT) starpu_redefine$(EXEEXT) \ nothing$(EXEEXT) @STARPU_USE_CUDA_TRUE@am__append_14 = ring_kernel.cu @STARPU_USE_CUDA_TRUE@am__append_15 = ring_kernel.cu @STARPU_USE_CUDA_TRUE@am__append_16 = ring_kernel.cu @STARPU_USE_CUDA_TRUE@am__append_17 = ring_kernel.cu @STARPU_USE_CUDA_TRUE@am__append_18 = ring_kernel.cu @STARPU_USE_CUDA_TRUE@am__append_19 = ring_kernel.cu @STARPU_USE_HIP_TRUE@am__append_20 = ring_kernel_hip.hip @STARPU_USE_HIP_TRUE@am__append_21 = ring_kernel_hip.hip @STARPU_USE_HIP_TRUE@am__append_22 = ring_kernel_hip.hip @STARPU_USE_HIP_TRUE@am__append_23 = ring_kernel_hip.hip @STARPU_USE_HIP_TRUE@am__append_24 = ring_kernel_hip.hip @STARPU_USE_HIP_TRUE@am__append_25 = ring_kernel_hip.hip subdir = mpi/tests ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = @STARPU_SIMGRID_FALSE@am__EXEEXT_1 = attr$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ ndim_interface$(EXEEXT) @STARPU_MPI_MINIMAL_TESTS_FALSE@am__EXEEXT_2 = broadcast$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ early_request$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ gather$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ gather2$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_count$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_dyn_handles$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_node_choice$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner2$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ insert_task_owner_data$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ matrix$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ matrix2$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_barrier$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_detached_tag$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_earlyrecv$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_irecv$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_irecv_detached$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_isend$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_isend_detached$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_reduction$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_redux$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_scatter_gather$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_test$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ pingpong$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_selection2$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_async$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_async_implicit$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_sync$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ ring_sync_detached$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ temporary$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ data_cpy$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ mpi_data_cpy$(EXEEXT) @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_3 = display_bindings$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_earlyrecv2$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_earlyrecv2_sync$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ block_interface$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ block_interface_pinned$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ ndim_interface$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_compute$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_sent_cache$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_recv_cache$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ insert_task_seq$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ tags_allocate$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ tags_checking$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ sync$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_datatype$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_large$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_many$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_acknowledgement$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_recv_not_yet_posted$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_chained_sends$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_wrong_order$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_without_task$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_user_defined_datatype$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_recv_wait_finalize$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_insert_task$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ coop_cache$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@ mpi_task_submit$(EXEEXT) @STARPU_MPI_MINIMAL_TESTS_FALSE@@STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MPI_TRUE@am__EXEEXT_4 = load_balancer$(EXEEXT) @STARPU_MPI_MINIMAL_TESTS_FALSE@am__EXEEXT_5 = policy_register_toomany$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ policy_unregister$(EXEEXT) \ @STARPU_MPI_MINIMAL_TESTS_FALSE@ starpu_redefine$(EXEEXT) am__EXEEXT_6 = callback$(EXEEXT) driver$(EXEEXT) early_stuff$(EXEEXT) \ insert_task_block$(EXEEXT) insert_task_can_execute$(EXEEXT) \ insert_task_tags$(EXEEXT) multiple_send$(EXEEXT) \ policy_register$(EXEEXT) policy_register_many$(EXEEXT) \ policy_selection$(EXEEXT) star$(EXEEXT) stats$(EXEEXT) \ user_defined_datatype$(EXEEXT) wait_for_all$(EXEEXT) \ $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ $(am__EXEEXT_4) $(am__EXEEXT_5) am__installdirs = "$(DESTDIR)$(examplebindir)" @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_7 = loader$(EXEEXT) @STARPU_USE_MPI_FT_TRUE@am__EXEEXT_8 = checkpoints$(EXEEXT) PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) attr_SOURCES = attr.c attr_OBJECTS = attr.$(OBJEXT) attr_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = block_interface_SOURCES = block_interface.c block_interface_OBJECTS = block_interface.$(OBJEXT) block_interface_LDADD = $(LDADD) block_interface_pinned_SOURCES = block_interface_pinned.c block_interface_pinned_OBJECTS = block_interface_pinned.$(OBJEXT) block_interface_pinned_LDADD = $(LDADD) broadcast_SOURCES = broadcast.c broadcast_OBJECTS = broadcast.$(OBJEXT) broadcast_LDADD = $(LDADD) callback_SOURCES = callback.c callback_OBJECTS = callback.$(OBJEXT) callback_LDADD = $(LDADD) checkpoints_SOURCES = checkpoints.c checkpoints_OBJECTS = checkpoints.$(OBJEXT) checkpoints_LDADD = $(LDADD) coop_SOURCES = coop.c coop_OBJECTS = coop.$(OBJEXT) coop_LDADD = $(LDADD) coop_acknowledgement_SOURCES = coop_acknowledgement.c coop_acknowledgement_OBJECTS = coop_acknowledgement.$(OBJEXT) coop_acknowledgement_LDADD = $(LDADD) coop_cache_SOURCES = coop_cache.c coop_cache_OBJECTS = coop_cache.$(OBJEXT) coop_cache_LDADD = $(LDADD) coop_chained_sends_SOURCES = coop_chained_sends.c coop_chained_sends_OBJECTS = coop_chained_sends.$(OBJEXT) coop_chained_sends_LDADD = $(LDADD) coop_datatype_SOURCES = coop_datatype.c coop_datatype_OBJECTS = coop_datatype.$(OBJEXT) coop_datatype_LDADD = $(LDADD) coop_insert_task_SOURCES = coop_insert_task.c coop_insert_task_OBJECTS = coop_insert_task.$(OBJEXT) coop_insert_task_LDADD = $(LDADD) coop_large_SOURCES = coop_large.c coop_large_OBJECTS = coop_large.$(OBJEXT) coop_large_LDADD = $(LDADD) coop_many_SOURCES = coop_many.c coop_many_OBJECTS = coop_many.$(OBJEXT) coop_many_LDADD = $(LDADD) coop_recv_not_yet_posted_SOURCES = coop_recv_not_yet_posted.c coop_recv_not_yet_posted_OBJECTS = coop_recv_not_yet_posted.$(OBJEXT) coop_recv_not_yet_posted_LDADD = $(LDADD) coop_recv_wait_finalize_SOURCES = coop_recv_wait_finalize.c coop_recv_wait_finalize_OBJECTS = coop_recv_wait_finalize.$(OBJEXT) coop_recv_wait_finalize_LDADD = $(LDADD) am__dirstamp = $(am__leading_dot)dirstamp am_coop_user_defined_datatype_OBJECTS = \ coop_user_defined_datatype.$(OBJEXT) \ ../../examples/interface/complex_interface.$(OBJEXT) coop_user_defined_datatype_OBJECTS = \ $(am_coop_user_defined_datatype_OBJECTS) coop_user_defined_datatype_LDADD = $(LDADD) coop_without_task_SOURCES = coop_without_task.c coop_without_task_OBJECTS = coop_without_task.$(OBJEXT) coop_without_task_LDADD = $(LDADD) coop_wrong_order_SOURCES = coop_wrong_order.c coop_wrong_order_OBJECTS = coop_wrong_order.$(OBJEXT) coop_wrong_order_LDADD = $(LDADD) data_cpy_SOURCES = data_cpy.c data_cpy_OBJECTS = data_cpy.$(OBJEXT) data_cpy_LDADD = $(LDADD) datatypes_SOURCES = datatypes.c datatypes_OBJECTS = datatypes.$(OBJEXT) datatypes_LDADD = $(LDADD) display_bindings_SOURCES = display_bindings.c display_bindings_OBJECTS = display_bindings.$(OBJEXT) display_bindings_LDADD = $(LDADD) driver_SOURCES = driver.c driver_OBJECTS = driver.$(OBJEXT) driver_LDADD = $(LDADD) early_request_SOURCES = early_request.c early_request_OBJECTS = early_request.$(OBJEXT) early_request_LDADD = $(LDADD) early_stuff_SOURCES = early_stuff.c early_stuff_OBJECTS = early_stuff-early_stuff.$(OBJEXT) early_stuff_LDADD = $(LDADD) early_stuff_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(early_stuff_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ gather_SOURCES = gather.c gather_OBJECTS = gather.$(OBJEXT) gather_LDADD = $(LDADD) gather2_SOURCES = gather2.c gather2_OBJECTS = gather2.$(OBJEXT) gather2_LDADD = $(LDADD) insert_task_SOURCES = insert_task.c insert_task_OBJECTS = insert_task.$(OBJEXT) insert_task_LDADD = $(LDADD) insert_task_block_SOURCES = insert_task_block.c insert_task_block_OBJECTS = insert_task_block.$(OBJEXT) insert_task_block_LDADD = $(LDADD) insert_task_can_execute_SOURCES = insert_task_can_execute.c insert_task_can_execute_OBJECTS = insert_task_can_execute.$(OBJEXT) insert_task_can_execute_LDADD = $(LDADD) insert_task_compute_SOURCES = insert_task_compute.c insert_task_compute_OBJECTS = insert_task_compute.$(OBJEXT) insert_task_compute_LDADD = $(LDADD) am__insert_task_count_SOURCES_DIST = insert_task_count.c \ ring_kernel.cu ring_kernel_hip.hip @STARPU_USE_CUDA_TRUE@am__objects_1 = ring_kernel.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_2 = ring_kernel_hip.$(OBJEXT) am_insert_task_count_OBJECTS = insert_task_count.$(OBJEXT) \ $(am__objects_1) $(am__objects_2) insert_task_count_OBJECTS = $(am_insert_task_count_OBJECTS) insert_task_count_LDADD = $(LDADD) insert_task_dyn_handles_SOURCES = insert_task_dyn_handles.c insert_task_dyn_handles_OBJECTS = insert_task_dyn_handles.$(OBJEXT) insert_task_dyn_handles_LDADD = $(LDADD) insert_task_node_choice_SOURCES = insert_task_node_choice.c insert_task_node_choice_OBJECTS = insert_task_node_choice.$(OBJEXT) insert_task_node_choice_LDADD = $(LDADD) insert_task_owner_SOURCES = insert_task_owner.c insert_task_owner_OBJECTS = insert_task_owner.$(OBJEXT) insert_task_owner_LDADD = $(LDADD) insert_task_owner2_SOURCES = insert_task_owner2.c insert_task_owner2_OBJECTS = insert_task_owner2.$(OBJEXT) insert_task_owner2_LDADD = $(LDADD) insert_task_owner_data_SOURCES = insert_task_owner_data.c insert_task_owner_data_OBJECTS = insert_task_owner_data.$(OBJEXT) insert_task_owner_data_LDADD = $(LDADD) insert_task_recv_cache_SOURCES = insert_task_recv_cache.c insert_task_recv_cache_OBJECTS = insert_task_recv_cache.$(OBJEXT) insert_task_recv_cache_LDADD = $(LDADD) insert_task_sent_cache_SOURCES = insert_task_sent_cache.c insert_task_sent_cache_OBJECTS = insert_task_sent_cache.$(OBJEXT) insert_task_sent_cache_LDADD = $(LDADD) insert_task_seq_SOURCES = insert_task_seq.c insert_task_seq_OBJECTS = insert_task_seq.$(OBJEXT) insert_task_seq_LDADD = $(LDADD) insert_task_tags_SOURCES = insert_task_tags.c insert_task_tags_OBJECTS = insert_task_tags.$(OBJEXT) insert_task_tags_LDADD = $(LDADD) load_balancer_SOURCES = load_balancer.c load_balancer_OBJECTS = load_balancer.$(OBJEXT) load_balancer_LDADD = $(LDADD) loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) matrix_SOURCES = matrix.c matrix_OBJECTS = matrix.$(OBJEXT) matrix_LDADD = $(LDADD) matrix2_SOURCES = matrix2.c matrix2_OBJECTS = matrix2.$(OBJEXT) matrix2_LDADD = $(LDADD) mpi_barrier_SOURCES = mpi_barrier.c mpi_barrier_OBJECTS = mpi_barrier.$(OBJEXT) mpi_barrier_LDADD = $(LDADD) mpi_data_cpy_SOURCES = mpi_data_cpy.c mpi_data_cpy_OBJECTS = mpi_data_cpy.$(OBJEXT) mpi_data_cpy_LDADD = $(LDADD) mpi_detached_tag_SOURCES = mpi_detached_tag.c mpi_detached_tag_OBJECTS = mpi_detached_tag.$(OBJEXT) mpi_detached_tag_LDADD = $(LDADD) mpi_earlyrecv_SOURCES = mpi_earlyrecv.c mpi_earlyrecv_OBJECTS = mpi_earlyrecv.$(OBJEXT) mpi_earlyrecv_LDADD = $(LDADD) am_mpi_earlyrecv2_OBJECTS = mpi_earlyrecv2.$(OBJEXT) \ ../../examples/interface/complex_interface.$(OBJEXT) mpi_earlyrecv2_OBJECTS = $(am_mpi_earlyrecv2_OBJECTS) mpi_earlyrecv2_LDADD = $(LDADD) am_mpi_earlyrecv2_sync_OBJECTS = mpi_earlyrecv2_sync.$(OBJEXT) \ ../../examples/interface/complex_interface.$(OBJEXT) mpi_earlyrecv2_sync_OBJECTS = $(am_mpi_earlyrecv2_sync_OBJECTS) mpi_earlyrecv2_sync_LDADD = $(LDADD) mpi_irecv_SOURCES = mpi_irecv.c mpi_irecv_OBJECTS = mpi_irecv.$(OBJEXT) mpi_irecv_LDADD = $(LDADD) mpi_irecv_detached_SOURCES = mpi_irecv_detached.c mpi_irecv_detached_OBJECTS = mpi_irecv_detached.$(OBJEXT) mpi_irecv_detached_LDADD = $(LDADD) mpi_isend_SOURCES = mpi_isend.c mpi_isend_OBJECTS = mpi_isend.$(OBJEXT) mpi_isend_LDADD = $(LDADD) mpi_isend_detached_SOURCES = mpi_isend_detached.c mpi_isend_detached_OBJECTS = mpi_isend_detached.$(OBJEXT) mpi_isend_detached_LDADD = $(LDADD) am_mpi_reduction_OBJECTS = mpi_reduction.$(OBJEXT) \ mpi_reduction_kernels.$(OBJEXT) mpi_reduction_OBJECTS = $(am_mpi_reduction_OBJECTS) mpi_reduction_LDADD = $(LDADD) mpi_redux_SOURCES = mpi_redux.c mpi_redux_OBJECTS = mpi_redux.$(OBJEXT) mpi_redux_LDADD = $(LDADD) mpi_scatter_gather_SOURCES = mpi_scatter_gather.c mpi_scatter_gather_OBJECTS = mpi_scatter_gather.$(OBJEXT) mpi_scatter_gather_LDADD = $(LDADD) mpi_task_submit_SOURCES = mpi_task_submit.c mpi_task_submit_OBJECTS = mpi_task_submit.$(OBJEXT) mpi_task_submit_LDADD = $(LDADD) mpi_test_SOURCES = mpi_test.c mpi_test_OBJECTS = mpi_test.$(OBJEXT) mpi_test_LDADD = $(LDADD) multiple_send_SOURCES = multiple_send.c multiple_send_OBJECTS = multiple_send.$(OBJEXT) multiple_send_LDADD = $(LDADD) ndim_interface_SOURCES = ndim_interface.c ndim_interface_OBJECTS = ndim_interface.$(OBJEXT) ndim_interface_LDADD = $(LDADD) nothing_SOURCES = nothing.c nothing_OBJECTS = nothing.$(OBJEXT) nothing_LDADD = $(LDADD) pingpong_SOURCES = pingpong.c pingpong_OBJECTS = pingpong.$(OBJEXT) pingpong_LDADD = $(LDADD) policy_register_SOURCES = policy_register.c policy_register_OBJECTS = policy_register.$(OBJEXT) policy_register_LDADD = $(LDADD) policy_register_many_SOURCES = policy_register_many.c policy_register_many_OBJECTS = policy_register_many.$(OBJEXT) policy_register_many_LDADD = $(LDADD) policy_register_toomany_SOURCES = policy_register_toomany.c policy_register_toomany_OBJECTS = policy_register_toomany.$(OBJEXT) policy_register_toomany_LDADD = $(LDADD) policy_selection_SOURCES = policy_selection.c policy_selection_OBJECTS = policy_selection.$(OBJEXT) policy_selection_LDADD = $(LDADD) policy_selection2_SOURCES = policy_selection2.c policy_selection2_OBJECTS = policy_selection2.$(OBJEXT) policy_selection2_LDADD = $(LDADD) policy_unregister_SOURCES = policy_unregister.c policy_unregister_OBJECTS = policy_unregister.$(OBJEXT) policy_unregister_LDADD = $(LDADD) am__ring_SOURCES_DIST = ring.c ring_kernel.cu ring_kernel_hip.hip am_ring_OBJECTS = ring.$(OBJEXT) $(am__objects_1) $(am__objects_2) ring_OBJECTS = $(am_ring_OBJECTS) ring_LDADD = $(LDADD) am__ring_async_SOURCES_DIST = ring_async.c ring_kernel.cu \ ring_kernel_hip.hip am_ring_async_OBJECTS = ring_async.$(OBJEXT) $(am__objects_1) \ $(am__objects_2) ring_async_OBJECTS = $(am_ring_async_OBJECTS) ring_async_LDADD = $(LDADD) am__ring_async_implicit_SOURCES_DIST = ring_async_implicit.c \ ring_kernel.cu ring_kernel_hip.hip am_ring_async_implicit_OBJECTS = ring_async_implicit.$(OBJEXT) \ $(am__objects_1) $(am__objects_2) ring_async_implicit_OBJECTS = $(am_ring_async_implicit_OBJECTS) ring_async_implicit_LDADD = $(LDADD) am__ring_sync_SOURCES_DIST = ring_sync.c ring_kernel.cu \ ring_kernel_hip.hip am_ring_sync_OBJECTS = ring_sync.$(OBJEXT) $(am__objects_1) \ $(am__objects_2) ring_sync_OBJECTS = $(am_ring_sync_OBJECTS) ring_sync_LDADD = $(LDADD) am__ring_sync_detached_SOURCES_DIST = ring_sync_detached.c \ ring_kernel.cu ring_kernel_hip.hip am_ring_sync_detached_OBJECTS = ring_sync_detached.$(OBJEXT) \ $(am__objects_1) $(am__objects_2) ring_sync_detached_OBJECTS = $(am_ring_sync_detached_OBJECTS) ring_sync_detached_LDADD = $(LDADD) star_SOURCES = star.c star_OBJECTS = star.$(OBJEXT) star_LDADD = $(LDADD) starpu_redefine_SOURCES = starpu_redefine.c starpu_redefine_OBJECTS = starpu_redefine.$(OBJEXT) starpu_redefine_LDADD = $(LDADD) stats_SOURCES = stats.c stats_OBJECTS = stats.$(OBJEXT) stats_LDADD = $(LDADD) sync_SOURCES = sync.c sync_OBJECTS = sync.$(OBJEXT) sync_LDADD = $(LDADD) tags_allocate_SOURCES = tags_allocate.c tags_allocate_OBJECTS = tags_allocate.$(OBJEXT) tags_allocate_LDADD = $(LDADD) tags_checking_SOURCES = tags_checking.c tags_checking_OBJECTS = tags_checking.$(OBJEXT) tags_checking_LDADD = $(LDADD) temporary_SOURCES = temporary.c temporary_OBJECTS = temporary.$(OBJEXT) temporary_LDADD = $(LDADD) am_user_defined_datatype_OBJECTS = user_defined_datatype.$(OBJEXT) \ ../../examples/interface/complex_interface.$(OBJEXT) user_defined_datatype_OBJECTS = $(am_user_defined_datatype_OBJECTS) user_defined_datatype_LDADD = $(LDADD) wait_for_all_SOURCES = wait_for_all.c wait_for_all_OBJECTS = wait_for_all.$(OBJEXT) wait_for_all_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = \ ../../examples/interface/$(DEPDIR)/complex_interface.Po \ ./$(DEPDIR)/attr.Po ./$(DEPDIR)/block_interface.Po \ ./$(DEPDIR)/block_interface_pinned.Po ./$(DEPDIR)/broadcast.Po \ ./$(DEPDIR)/callback.Po ./$(DEPDIR)/checkpoints.Po \ ./$(DEPDIR)/coop.Po ./$(DEPDIR)/coop_acknowledgement.Po \ ./$(DEPDIR)/coop_cache.Po ./$(DEPDIR)/coop_chained_sends.Po \ ./$(DEPDIR)/coop_datatype.Po ./$(DEPDIR)/coop_insert_task.Po \ ./$(DEPDIR)/coop_large.Po ./$(DEPDIR)/coop_many.Po \ ./$(DEPDIR)/coop_recv_not_yet_posted.Po \ ./$(DEPDIR)/coop_recv_wait_finalize.Po \ ./$(DEPDIR)/coop_user_defined_datatype.Po \ ./$(DEPDIR)/coop_without_task.Po \ ./$(DEPDIR)/coop_wrong_order.Po ./$(DEPDIR)/data_cpy.Po \ ./$(DEPDIR)/datatypes.Po ./$(DEPDIR)/display_bindings.Po \ ./$(DEPDIR)/driver.Po ./$(DEPDIR)/early_request.Po \ ./$(DEPDIR)/early_stuff-early_stuff.Po ./$(DEPDIR)/gather.Po \ ./$(DEPDIR)/gather2.Po ./$(DEPDIR)/insert_task.Po \ ./$(DEPDIR)/insert_task_block.Po \ ./$(DEPDIR)/insert_task_can_execute.Po \ ./$(DEPDIR)/insert_task_compute.Po \ ./$(DEPDIR)/insert_task_count.Po \ ./$(DEPDIR)/insert_task_dyn_handles.Po \ ./$(DEPDIR)/insert_task_node_choice.Po \ ./$(DEPDIR)/insert_task_owner.Po \ ./$(DEPDIR)/insert_task_owner2.Po \ ./$(DEPDIR)/insert_task_owner_data.Po \ ./$(DEPDIR)/insert_task_recv_cache.Po \ ./$(DEPDIR)/insert_task_sent_cache.Po \ ./$(DEPDIR)/insert_task_seq.Po ./$(DEPDIR)/insert_task_tags.Po \ ./$(DEPDIR)/load_balancer.Po ./$(DEPDIR)/loader-loader.Po \ ./$(DEPDIR)/matrix.Po ./$(DEPDIR)/matrix2.Po \ ./$(DEPDIR)/mpi_barrier.Po ./$(DEPDIR)/mpi_data_cpy.Po \ ./$(DEPDIR)/mpi_detached_tag.Po ./$(DEPDIR)/mpi_earlyrecv.Po \ ./$(DEPDIR)/mpi_earlyrecv2.Po \ ./$(DEPDIR)/mpi_earlyrecv2_sync.Po ./$(DEPDIR)/mpi_irecv.Po \ ./$(DEPDIR)/mpi_irecv_detached.Po ./$(DEPDIR)/mpi_isend.Po \ ./$(DEPDIR)/mpi_isend_detached.Po ./$(DEPDIR)/mpi_reduction.Po \ ./$(DEPDIR)/mpi_reduction_kernels.Po ./$(DEPDIR)/mpi_redux.Po \ ./$(DEPDIR)/mpi_scatter_gather.Po \ ./$(DEPDIR)/mpi_task_submit.Po ./$(DEPDIR)/mpi_test.Po \ ./$(DEPDIR)/multiple_send.Po ./$(DEPDIR)/ndim_interface.Po \ ./$(DEPDIR)/nothing.Po ./$(DEPDIR)/pingpong.Po \ ./$(DEPDIR)/policy_register.Po \ ./$(DEPDIR)/policy_register_many.Po \ ./$(DEPDIR)/policy_register_toomany.Po \ ./$(DEPDIR)/policy_selection.Po \ ./$(DEPDIR)/policy_selection2.Po \ ./$(DEPDIR)/policy_unregister.Po ./$(DEPDIR)/ring.Po \ ./$(DEPDIR)/ring_async.Po ./$(DEPDIR)/ring_async_implicit.Po \ ./$(DEPDIR)/ring_sync.Po ./$(DEPDIR)/ring_sync_detached.Po \ ./$(DEPDIR)/star.Po ./$(DEPDIR)/starpu_redefine.Po \ ./$(DEPDIR)/stats.Po ./$(DEPDIR)/sync.Po \ ./$(DEPDIR)/tags_allocate.Po ./$(DEPDIR)/tags_checking.Po \ ./$(DEPDIR)/temporary.Po ./$(DEPDIR)/user_defined_datatype.Po \ ./$(DEPDIR)/wait_for_all.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = attr.c block_interface.c block_interface_pinned.c \ broadcast.c callback.c checkpoints.c coop.c \ coop_acknowledgement.c coop_cache.c coop_chained_sends.c \ coop_datatype.c coop_insert_task.c coop_large.c coop_many.c \ coop_recv_not_yet_posted.c coop_recv_wait_finalize.c \ $(coop_user_defined_datatype_SOURCES) coop_without_task.c \ coop_wrong_order.c data_cpy.c datatypes.c display_bindings.c \ driver.c early_request.c early_stuff.c gather.c gather2.c \ insert_task.c insert_task_block.c insert_task_can_execute.c \ insert_task_compute.c $(insert_task_count_SOURCES) \ insert_task_dyn_handles.c insert_task_node_choice.c \ insert_task_owner.c insert_task_owner2.c \ insert_task_owner_data.c insert_task_recv_cache.c \ insert_task_sent_cache.c insert_task_seq.c insert_task_tags.c \ load_balancer.c loader.c matrix.c matrix2.c mpi_barrier.c \ mpi_data_cpy.c mpi_detached_tag.c mpi_earlyrecv.c \ $(mpi_earlyrecv2_SOURCES) $(mpi_earlyrecv2_sync_SOURCES) \ mpi_irecv.c mpi_irecv_detached.c mpi_isend.c \ mpi_isend_detached.c $(mpi_reduction_SOURCES) mpi_redux.c \ mpi_scatter_gather.c mpi_task_submit.c mpi_test.c \ multiple_send.c ndim_interface.c nothing.c pingpong.c \ policy_register.c policy_register_many.c \ policy_register_toomany.c policy_selection.c \ policy_selection2.c policy_unregister.c $(ring_SOURCES) \ $(ring_async_SOURCES) $(ring_async_implicit_SOURCES) \ $(ring_sync_SOURCES) $(ring_sync_detached_SOURCES) star.c \ starpu_redefine.c stats.c sync.c tags_allocate.c \ tags_checking.c temporary.c $(user_defined_datatype_SOURCES) \ wait_for_all.c DIST_SOURCES = attr.c block_interface.c block_interface_pinned.c \ broadcast.c callback.c checkpoints.c coop.c \ coop_acknowledgement.c coop_cache.c coop_chained_sends.c \ coop_datatype.c coop_insert_task.c coop_large.c coop_many.c \ coop_recv_not_yet_posted.c coop_recv_wait_finalize.c \ $(coop_user_defined_datatype_SOURCES) coop_without_task.c \ coop_wrong_order.c data_cpy.c datatypes.c display_bindings.c \ driver.c early_request.c early_stuff.c gather.c gather2.c \ insert_task.c insert_task_block.c insert_task_can_execute.c \ insert_task_compute.c $(am__insert_task_count_SOURCES_DIST) \ insert_task_dyn_handles.c insert_task_node_choice.c \ insert_task_owner.c insert_task_owner2.c \ insert_task_owner_data.c insert_task_recv_cache.c \ insert_task_sent_cache.c insert_task_seq.c insert_task_tags.c \ load_balancer.c loader.c matrix.c matrix2.c mpi_barrier.c \ mpi_data_cpy.c mpi_detached_tag.c mpi_earlyrecv.c \ $(mpi_earlyrecv2_SOURCES) $(mpi_earlyrecv2_sync_SOURCES) \ mpi_irecv.c mpi_irecv_detached.c mpi_isend.c \ mpi_isend_detached.c $(mpi_reduction_SOURCES) mpi_redux.c \ mpi_scatter_gather.c mpi_task_submit.c mpi_test.c \ multiple_send.c ndim_interface.c nothing.c pingpong.c \ policy_register.c policy_register_many.c \ policy_register_toomany.c policy_selection.c \ policy_selection2.c policy_unregister.c \ $(am__ring_SOURCES_DIST) $(am__ring_async_SOURCES_DIST) \ $(am__ring_async_implicit_SOURCES_DIST) \ $(am__ring_sync_SOURCES_DIST) \ $(am__ring_sync_detached_SOURCES_DIST) star.c \ starpu_redefine.c stats.c sync.c tags_allocate.c \ tags_checking.c temporary.c $(user_defined_datatype_SOURCES) \ wait_for_all.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) AM_RECURSIVE_TARGETS = check recheck TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = $(MPICC) CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) \ $(top_builddir)/src/@LIBSTARPU_LINK@ \ ../src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la \ $(STARPU_EXPORTED_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(MPI_RUN_ENV) LAUNCHER = $(STARPU_MPIEXEC) AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(APP_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(APP_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(APP_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Thibaut Lambert # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # SUFFIXES = .hip CCLD = $(MPICC) # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_SIMGRID_TRUE@LOADER_BIN = $(LAUNCHER) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 BUILT_SOURCES = CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log EXTRA_DIST = \ helper.h \ user_defined_datatype_value.h examplebindir = $(libdir)/starpu/examples/mpi AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/mpi/include -I$(top_srcdir)/mpi/src -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_srcdir)/examples/ $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ ######################## # Unit testcases # ######################## starpu_mpi_TESTS = callback driver early_stuff insert_task_block \ insert_task_can_execute insert_task_tags multiple_send \ policy_register policy_register_many policy_selection star \ stats user_defined_datatype wait_for_all $(am__append_8) \ $(am__append_9) $(am__append_10) $(am__append_11) \ $(am__append_12) ring_SOURCES = ring.c $(am__append_14) $(am__append_20) ring_sync_SOURCES = ring_sync.c $(am__append_15) $(am__append_21) ring_sync_detached_SOURCES = ring_sync_detached.c $(am__append_16) \ $(am__append_22) ring_async_SOURCES = ring_async.c $(am__append_17) $(am__append_23) ring_async_implicit_SOURCES = ring_async_implicit.c $(am__append_18) \ $(am__append_24) insert_task_count_SOURCES = insert_task_count.c $(am__append_19) \ $(am__append_25) mpi_reduction_SOURCES = mpi_reduction.c mpi_reduction_kernels.c user_defined_datatype_SOURCES = user_defined_datatype.c \ ../../examples/interface/complex_interface.c mpi_earlyrecv2_SOURCES = mpi_earlyrecv2.c \ ../../examples/interface/complex_interface.c mpi_earlyrecv2_sync_SOURCES = mpi_earlyrecv2_sync.c \ ../../examples/interface/complex_interface.c coop_user_defined_datatype_SOURCES = coop_user_defined_datatype.c \ ../../examples/interface/complex_interface.c early_stuff_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: .SUFFIXES: .hip .c .cu .cubin .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/tests/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign mpi/tests/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list attr$(EXEEXT): $(attr_OBJECTS) $(attr_DEPENDENCIES) $(EXTRA_attr_DEPENDENCIES) @rm -f attr$(EXEEXT) $(AM_V_CCLD)$(LINK) $(attr_OBJECTS) $(attr_LDADD) $(LIBS) block_interface$(EXEEXT): $(block_interface_OBJECTS) $(block_interface_DEPENDENCIES) $(EXTRA_block_interface_DEPENDENCIES) @rm -f block_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(block_interface_OBJECTS) $(block_interface_LDADD) $(LIBS) block_interface_pinned$(EXEEXT): $(block_interface_pinned_OBJECTS) $(block_interface_pinned_DEPENDENCIES) $(EXTRA_block_interface_pinned_DEPENDENCIES) @rm -f block_interface_pinned$(EXEEXT) $(AM_V_CCLD)$(LINK) $(block_interface_pinned_OBJECTS) $(block_interface_pinned_LDADD) $(LIBS) broadcast$(EXEEXT): $(broadcast_OBJECTS) $(broadcast_DEPENDENCIES) $(EXTRA_broadcast_DEPENDENCIES) @rm -f broadcast$(EXEEXT) $(AM_V_CCLD)$(LINK) $(broadcast_OBJECTS) $(broadcast_LDADD) $(LIBS) callback$(EXEEXT): $(callback_OBJECTS) $(callback_DEPENDENCIES) $(EXTRA_callback_DEPENDENCIES) @rm -f callback$(EXEEXT) $(AM_V_CCLD)$(LINK) $(callback_OBJECTS) $(callback_LDADD) $(LIBS) checkpoints$(EXEEXT): $(checkpoints_OBJECTS) $(checkpoints_DEPENDENCIES) $(EXTRA_checkpoints_DEPENDENCIES) @rm -f checkpoints$(EXEEXT) $(AM_V_CCLD)$(LINK) $(checkpoints_OBJECTS) $(checkpoints_LDADD) $(LIBS) coop$(EXEEXT): $(coop_OBJECTS) $(coop_DEPENDENCIES) $(EXTRA_coop_DEPENDENCIES) @rm -f coop$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_OBJECTS) $(coop_LDADD) $(LIBS) coop_acknowledgement$(EXEEXT): $(coop_acknowledgement_OBJECTS) $(coop_acknowledgement_DEPENDENCIES) $(EXTRA_coop_acknowledgement_DEPENDENCIES) @rm -f coop_acknowledgement$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_acknowledgement_OBJECTS) $(coop_acknowledgement_LDADD) $(LIBS) coop_cache$(EXEEXT): $(coop_cache_OBJECTS) $(coop_cache_DEPENDENCIES) $(EXTRA_coop_cache_DEPENDENCIES) @rm -f coop_cache$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_cache_OBJECTS) $(coop_cache_LDADD) $(LIBS) coop_chained_sends$(EXEEXT): $(coop_chained_sends_OBJECTS) $(coop_chained_sends_DEPENDENCIES) $(EXTRA_coop_chained_sends_DEPENDENCIES) @rm -f coop_chained_sends$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_chained_sends_OBJECTS) $(coop_chained_sends_LDADD) $(LIBS) coop_datatype$(EXEEXT): $(coop_datatype_OBJECTS) $(coop_datatype_DEPENDENCIES) $(EXTRA_coop_datatype_DEPENDENCIES) @rm -f coop_datatype$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_datatype_OBJECTS) $(coop_datatype_LDADD) $(LIBS) coop_insert_task$(EXEEXT): $(coop_insert_task_OBJECTS) $(coop_insert_task_DEPENDENCIES) $(EXTRA_coop_insert_task_DEPENDENCIES) @rm -f coop_insert_task$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_insert_task_OBJECTS) $(coop_insert_task_LDADD) $(LIBS) coop_large$(EXEEXT): $(coop_large_OBJECTS) $(coop_large_DEPENDENCIES) $(EXTRA_coop_large_DEPENDENCIES) @rm -f coop_large$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_large_OBJECTS) $(coop_large_LDADD) $(LIBS) coop_many$(EXEEXT): $(coop_many_OBJECTS) $(coop_many_DEPENDENCIES) $(EXTRA_coop_many_DEPENDENCIES) @rm -f coop_many$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_many_OBJECTS) $(coop_many_LDADD) $(LIBS) coop_recv_not_yet_posted$(EXEEXT): $(coop_recv_not_yet_posted_OBJECTS) $(coop_recv_not_yet_posted_DEPENDENCIES) $(EXTRA_coop_recv_not_yet_posted_DEPENDENCIES) @rm -f coop_recv_not_yet_posted$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_recv_not_yet_posted_OBJECTS) $(coop_recv_not_yet_posted_LDADD) $(LIBS) coop_recv_wait_finalize$(EXEEXT): $(coop_recv_wait_finalize_OBJECTS) $(coop_recv_wait_finalize_DEPENDENCIES) $(EXTRA_coop_recv_wait_finalize_DEPENDENCIES) @rm -f coop_recv_wait_finalize$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_recv_wait_finalize_OBJECTS) $(coop_recv_wait_finalize_LDADD) $(LIBS) ../../examples/interface/$(am__dirstamp): @$(MKDIR_P) ../../examples/interface @: > ../../examples/interface/$(am__dirstamp) ../../examples/interface/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) ../../examples/interface/$(DEPDIR) @: > ../../examples/interface/$(DEPDIR)/$(am__dirstamp) ../../examples/interface/complex_interface.$(OBJEXT): \ ../../examples/interface/$(am__dirstamp) \ ../../examples/interface/$(DEPDIR)/$(am__dirstamp) coop_user_defined_datatype$(EXEEXT): $(coop_user_defined_datatype_OBJECTS) $(coop_user_defined_datatype_DEPENDENCIES) $(EXTRA_coop_user_defined_datatype_DEPENDENCIES) @rm -f coop_user_defined_datatype$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_user_defined_datatype_OBJECTS) $(coop_user_defined_datatype_LDADD) $(LIBS) coop_without_task$(EXEEXT): $(coop_without_task_OBJECTS) $(coop_without_task_DEPENDENCIES) $(EXTRA_coop_without_task_DEPENDENCIES) @rm -f coop_without_task$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_without_task_OBJECTS) $(coop_without_task_LDADD) $(LIBS) coop_wrong_order$(EXEEXT): $(coop_wrong_order_OBJECTS) $(coop_wrong_order_DEPENDENCIES) $(EXTRA_coop_wrong_order_DEPENDENCIES) @rm -f coop_wrong_order$(EXEEXT) $(AM_V_CCLD)$(LINK) $(coop_wrong_order_OBJECTS) $(coop_wrong_order_LDADD) $(LIBS) data_cpy$(EXEEXT): $(data_cpy_OBJECTS) $(data_cpy_DEPENDENCIES) $(EXTRA_data_cpy_DEPENDENCIES) @rm -f data_cpy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(data_cpy_OBJECTS) $(data_cpy_LDADD) $(LIBS) datatypes$(EXEEXT): $(datatypes_OBJECTS) $(datatypes_DEPENDENCIES) $(EXTRA_datatypes_DEPENDENCIES) @rm -f datatypes$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datatypes_OBJECTS) $(datatypes_LDADD) $(LIBS) display_bindings$(EXEEXT): $(display_bindings_OBJECTS) $(display_bindings_DEPENDENCIES) $(EXTRA_display_bindings_DEPENDENCIES) @rm -f display_bindings$(EXEEXT) $(AM_V_CCLD)$(LINK) $(display_bindings_OBJECTS) $(display_bindings_LDADD) $(LIBS) driver$(EXEEXT): $(driver_OBJECTS) $(driver_DEPENDENCIES) $(EXTRA_driver_DEPENDENCIES) @rm -f driver$(EXEEXT) $(AM_V_CCLD)$(LINK) $(driver_OBJECTS) $(driver_LDADD) $(LIBS) early_request$(EXEEXT): $(early_request_OBJECTS) $(early_request_DEPENDENCIES) $(EXTRA_early_request_DEPENDENCIES) @rm -f early_request$(EXEEXT) $(AM_V_CCLD)$(LINK) $(early_request_OBJECTS) $(early_request_LDADD) $(LIBS) early_stuff$(EXEEXT): $(early_stuff_OBJECTS) $(early_stuff_DEPENDENCIES) $(EXTRA_early_stuff_DEPENDENCIES) @rm -f early_stuff$(EXEEXT) $(AM_V_CCLD)$(early_stuff_LINK) $(early_stuff_OBJECTS) $(early_stuff_LDADD) $(LIBS) gather$(EXEEXT): $(gather_OBJECTS) $(gather_DEPENDENCIES) $(EXTRA_gather_DEPENDENCIES) @rm -f gather$(EXEEXT) $(AM_V_CCLD)$(LINK) $(gather_OBJECTS) $(gather_LDADD) $(LIBS) gather2$(EXEEXT): $(gather2_OBJECTS) $(gather2_DEPENDENCIES) $(EXTRA_gather2_DEPENDENCIES) @rm -f gather2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(gather2_OBJECTS) $(gather2_LDADD) $(LIBS) insert_task$(EXEEXT): $(insert_task_OBJECTS) $(insert_task_DEPENDENCIES) $(EXTRA_insert_task_DEPENDENCIES) @rm -f insert_task$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_OBJECTS) $(insert_task_LDADD) $(LIBS) insert_task_block$(EXEEXT): $(insert_task_block_OBJECTS) $(insert_task_block_DEPENDENCIES) $(EXTRA_insert_task_block_DEPENDENCIES) @rm -f insert_task_block$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_block_OBJECTS) $(insert_task_block_LDADD) $(LIBS) insert_task_can_execute$(EXEEXT): $(insert_task_can_execute_OBJECTS) $(insert_task_can_execute_DEPENDENCIES) $(EXTRA_insert_task_can_execute_DEPENDENCIES) @rm -f insert_task_can_execute$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_can_execute_OBJECTS) $(insert_task_can_execute_LDADD) $(LIBS) insert_task_compute$(EXEEXT): $(insert_task_compute_OBJECTS) $(insert_task_compute_DEPENDENCIES) $(EXTRA_insert_task_compute_DEPENDENCIES) @rm -f insert_task_compute$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_compute_OBJECTS) $(insert_task_compute_LDADD) $(LIBS) insert_task_count$(EXEEXT): $(insert_task_count_OBJECTS) $(insert_task_count_DEPENDENCIES) $(EXTRA_insert_task_count_DEPENDENCIES) @rm -f insert_task_count$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_count_OBJECTS) $(insert_task_count_LDADD) $(LIBS) insert_task_dyn_handles$(EXEEXT): $(insert_task_dyn_handles_OBJECTS) $(insert_task_dyn_handles_DEPENDENCIES) $(EXTRA_insert_task_dyn_handles_DEPENDENCIES) @rm -f insert_task_dyn_handles$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_dyn_handles_OBJECTS) $(insert_task_dyn_handles_LDADD) $(LIBS) insert_task_node_choice$(EXEEXT): $(insert_task_node_choice_OBJECTS) $(insert_task_node_choice_DEPENDENCIES) $(EXTRA_insert_task_node_choice_DEPENDENCIES) @rm -f insert_task_node_choice$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_node_choice_OBJECTS) $(insert_task_node_choice_LDADD) $(LIBS) insert_task_owner$(EXEEXT): $(insert_task_owner_OBJECTS) $(insert_task_owner_DEPENDENCIES) $(EXTRA_insert_task_owner_DEPENDENCIES) @rm -f insert_task_owner$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_owner_OBJECTS) $(insert_task_owner_LDADD) $(LIBS) insert_task_owner2$(EXEEXT): $(insert_task_owner2_OBJECTS) $(insert_task_owner2_DEPENDENCIES) $(EXTRA_insert_task_owner2_DEPENDENCIES) @rm -f insert_task_owner2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_owner2_OBJECTS) $(insert_task_owner2_LDADD) $(LIBS) insert_task_owner_data$(EXEEXT): $(insert_task_owner_data_OBJECTS) $(insert_task_owner_data_DEPENDENCIES) $(EXTRA_insert_task_owner_data_DEPENDENCIES) @rm -f insert_task_owner_data$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_owner_data_OBJECTS) $(insert_task_owner_data_LDADD) $(LIBS) insert_task_recv_cache$(EXEEXT): $(insert_task_recv_cache_OBJECTS) $(insert_task_recv_cache_DEPENDENCIES) $(EXTRA_insert_task_recv_cache_DEPENDENCIES) @rm -f insert_task_recv_cache$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_recv_cache_OBJECTS) $(insert_task_recv_cache_LDADD) $(LIBS) insert_task_sent_cache$(EXEEXT): $(insert_task_sent_cache_OBJECTS) $(insert_task_sent_cache_DEPENDENCIES) $(EXTRA_insert_task_sent_cache_DEPENDENCIES) @rm -f insert_task_sent_cache$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_sent_cache_OBJECTS) $(insert_task_sent_cache_LDADD) $(LIBS) insert_task_seq$(EXEEXT): $(insert_task_seq_OBJECTS) $(insert_task_seq_DEPENDENCIES) $(EXTRA_insert_task_seq_DEPENDENCIES) @rm -f insert_task_seq$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_seq_OBJECTS) $(insert_task_seq_LDADD) $(LIBS) insert_task_tags$(EXEEXT): $(insert_task_tags_OBJECTS) $(insert_task_tags_DEPENDENCIES) $(EXTRA_insert_task_tags_DEPENDENCIES) @rm -f insert_task_tags$(EXEEXT) $(AM_V_CCLD)$(LINK) $(insert_task_tags_OBJECTS) $(insert_task_tags_LDADD) $(LIBS) load_balancer$(EXEEXT): $(load_balancer_OBJECTS) $(load_balancer_DEPENDENCIES) $(EXTRA_load_balancer_DEPENDENCIES) @rm -f load_balancer$(EXEEXT) $(AM_V_CCLD)$(LINK) $(load_balancer_OBJECTS) $(load_balancer_LDADD) $(LIBS) loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) matrix$(EXEEXT): $(matrix_OBJECTS) $(matrix_DEPENDENCIES) $(EXTRA_matrix_DEPENDENCIES) @rm -f matrix$(EXEEXT) $(AM_V_CCLD)$(LINK) $(matrix_OBJECTS) $(matrix_LDADD) $(LIBS) matrix2$(EXEEXT): $(matrix2_OBJECTS) $(matrix2_DEPENDENCIES) $(EXTRA_matrix2_DEPENDENCIES) @rm -f matrix2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(matrix2_OBJECTS) $(matrix2_LDADD) $(LIBS) mpi_barrier$(EXEEXT): $(mpi_barrier_OBJECTS) $(mpi_barrier_DEPENDENCIES) $(EXTRA_mpi_barrier_DEPENDENCIES) @rm -f mpi_barrier$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_barrier_OBJECTS) $(mpi_barrier_LDADD) $(LIBS) mpi_data_cpy$(EXEEXT): $(mpi_data_cpy_OBJECTS) $(mpi_data_cpy_DEPENDENCIES) $(EXTRA_mpi_data_cpy_DEPENDENCIES) @rm -f mpi_data_cpy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_data_cpy_OBJECTS) $(mpi_data_cpy_LDADD) $(LIBS) mpi_detached_tag$(EXEEXT): $(mpi_detached_tag_OBJECTS) $(mpi_detached_tag_DEPENDENCIES) $(EXTRA_mpi_detached_tag_DEPENDENCIES) @rm -f mpi_detached_tag$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_detached_tag_OBJECTS) $(mpi_detached_tag_LDADD) $(LIBS) mpi_earlyrecv$(EXEEXT): $(mpi_earlyrecv_OBJECTS) $(mpi_earlyrecv_DEPENDENCIES) $(EXTRA_mpi_earlyrecv_DEPENDENCIES) @rm -f mpi_earlyrecv$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_earlyrecv_OBJECTS) $(mpi_earlyrecv_LDADD) $(LIBS) mpi_earlyrecv2$(EXEEXT): $(mpi_earlyrecv2_OBJECTS) $(mpi_earlyrecv2_DEPENDENCIES) $(EXTRA_mpi_earlyrecv2_DEPENDENCIES) @rm -f mpi_earlyrecv2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_earlyrecv2_OBJECTS) $(mpi_earlyrecv2_LDADD) $(LIBS) mpi_earlyrecv2_sync$(EXEEXT): $(mpi_earlyrecv2_sync_OBJECTS) $(mpi_earlyrecv2_sync_DEPENDENCIES) $(EXTRA_mpi_earlyrecv2_sync_DEPENDENCIES) @rm -f mpi_earlyrecv2_sync$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_earlyrecv2_sync_OBJECTS) $(mpi_earlyrecv2_sync_LDADD) $(LIBS) mpi_irecv$(EXEEXT): $(mpi_irecv_OBJECTS) $(mpi_irecv_DEPENDENCIES) $(EXTRA_mpi_irecv_DEPENDENCIES) @rm -f mpi_irecv$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_irecv_OBJECTS) $(mpi_irecv_LDADD) $(LIBS) mpi_irecv_detached$(EXEEXT): $(mpi_irecv_detached_OBJECTS) $(mpi_irecv_detached_DEPENDENCIES) $(EXTRA_mpi_irecv_detached_DEPENDENCIES) @rm -f mpi_irecv_detached$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_irecv_detached_OBJECTS) $(mpi_irecv_detached_LDADD) $(LIBS) mpi_isend$(EXEEXT): $(mpi_isend_OBJECTS) $(mpi_isend_DEPENDENCIES) $(EXTRA_mpi_isend_DEPENDENCIES) @rm -f mpi_isend$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_isend_OBJECTS) $(mpi_isend_LDADD) $(LIBS) mpi_isend_detached$(EXEEXT): $(mpi_isend_detached_OBJECTS) $(mpi_isend_detached_DEPENDENCIES) $(EXTRA_mpi_isend_detached_DEPENDENCIES) @rm -f mpi_isend_detached$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_isend_detached_OBJECTS) $(mpi_isend_detached_LDADD) $(LIBS) mpi_reduction$(EXEEXT): $(mpi_reduction_OBJECTS) $(mpi_reduction_DEPENDENCIES) $(EXTRA_mpi_reduction_DEPENDENCIES) @rm -f mpi_reduction$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_reduction_OBJECTS) $(mpi_reduction_LDADD) $(LIBS) mpi_redux$(EXEEXT): $(mpi_redux_OBJECTS) $(mpi_redux_DEPENDENCIES) $(EXTRA_mpi_redux_DEPENDENCIES) @rm -f mpi_redux$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_redux_OBJECTS) $(mpi_redux_LDADD) $(LIBS) mpi_scatter_gather$(EXEEXT): $(mpi_scatter_gather_OBJECTS) $(mpi_scatter_gather_DEPENDENCIES) $(EXTRA_mpi_scatter_gather_DEPENDENCIES) @rm -f mpi_scatter_gather$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_scatter_gather_OBJECTS) $(mpi_scatter_gather_LDADD) $(LIBS) mpi_task_submit$(EXEEXT): $(mpi_task_submit_OBJECTS) $(mpi_task_submit_DEPENDENCIES) $(EXTRA_mpi_task_submit_DEPENDENCIES) @rm -f mpi_task_submit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_task_submit_OBJECTS) $(mpi_task_submit_LDADD) $(LIBS) mpi_test$(EXEEXT): $(mpi_test_OBJECTS) $(mpi_test_DEPENDENCIES) $(EXTRA_mpi_test_DEPENDENCIES) @rm -f mpi_test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mpi_test_OBJECTS) $(mpi_test_LDADD) $(LIBS) multiple_send$(EXEEXT): $(multiple_send_OBJECTS) $(multiple_send_DEPENDENCIES) $(EXTRA_multiple_send_DEPENDENCIES) @rm -f multiple_send$(EXEEXT) $(AM_V_CCLD)$(LINK) $(multiple_send_OBJECTS) $(multiple_send_LDADD) $(LIBS) ndim_interface$(EXEEXT): $(ndim_interface_OBJECTS) $(ndim_interface_DEPENDENCIES) $(EXTRA_ndim_interface_DEPENDENCIES) @rm -f ndim_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(ndim_interface_OBJECTS) $(ndim_interface_LDADD) $(LIBS) nothing$(EXEEXT): $(nothing_OBJECTS) $(nothing_DEPENDENCIES) $(EXTRA_nothing_DEPENDENCIES) @rm -f nothing$(EXEEXT) $(AM_V_CCLD)$(LINK) $(nothing_OBJECTS) $(nothing_LDADD) $(LIBS) pingpong$(EXEEXT): $(pingpong_OBJECTS) $(pingpong_DEPENDENCIES) $(EXTRA_pingpong_DEPENDENCIES) @rm -f pingpong$(EXEEXT) $(AM_V_CCLD)$(LINK) $(pingpong_OBJECTS) $(pingpong_LDADD) $(LIBS) policy_register$(EXEEXT): $(policy_register_OBJECTS) $(policy_register_DEPENDENCIES) $(EXTRA_policy_register_DEPENDENCIES) @rm -f policy_register$(EXEEXT) $(AM_V_CCLD)$(LINK) $(policy_register_OBJECTS) $(policy_register_LDADD) $(LIBS) policy_register_many$(EXEEXT): $(policy_register_many_OBJECTS) $(policy_register_many_DEPENDENCIES) $(EXTRA_policy_register_many_DEPENDENCIES) @rm -f policy_register_many$(EXEEXT) $(AM_V_CCLD)$(LINK) $(policy_register_many_OBJECTS) $(policy_register_many_LDADD) $(LIBS) policy_register_toomany$(EXEEXT): $(policy_register_toomany_OBJECTS) $(policy_register_toomany_DEPENDENCIES) $(EXTRA_policy_register_toomany_DEPENDENCIES) @rm -f policy_register_toomany$(EXEEXT) $(AM_V_CCLD)$(LINK) $(policy_register_toomany_OBJECTS) $(policy_register_toomany_LDADD) $(LIBS) policy_selection$(EXEEXT): $(policy_selection_OBJECTS) $(policy_selection_DEPENDENCIES) $(EXTRA_policy_selection_DEPENDENCIES) @rm -f policy_selection$(EXEEXT) $(AM_V_CCLD)$(LINK) $(policy_selection_OBJECTS) $(policy_selection_LDADD) $(LIBS) policy_selection2$(EXEEXT): $(policy_selection2_OBJECTS) $(policy_selection2_DEPENDENCIES) $(EXTRA_policy_selection2_DEPENDENCIES) @rm -f policy_selection2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(policy_selection2_OBJECTS) $(policy_selection2_LDADD) $(LIBS) policy_unregister$(EXEEXT): $(policy_unregister_OBJECTS) $(policy_unregister_DEPENDENCIES) $(EXTRA_policy_unregister_DEPENDENCIES) @rm -f policy_unregister$(EXEEXT) $(AM_V_CCLD)$(LINK) $(policy_unregister_OBJECTS) $(policy_unregister_LDADD) $(LIBS) ring$(EXEEXT): $(ring_OBJECTS) $(ring_DEPENDENCIES) $(EXTRA_ring_DEPENDENCIES) @rm -f ring$(EXEEXT) $(AM_V_CCLD)$(LINK) $(ring_OBJECTS) $(ring_LDADD) $(LIBS) ring_async$(EXEEXT): $(ring_async_OBJECTS) $(ring_async_DEPENDENCIES) $(EXTRA_ring_async_DEPENDENCIES) @rm -f ring_async$(EXEEXT) $(AM_V_CCLD)$(LINK) $(ring_async_OBJECTS) $(ring_async_LDADD) $(LIBS) ring_async_implicit$(EXEEXT): $(ring_async_implicit_OBJECTS) $(ring_async_implicit_DEPENDENCIES) $(EXTRA_ring_async_implicit_DEPENDENCIES) @rm -f ring_async_implicit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(ring_async_implicit_OBJECTS) $(ring_async_implicit_LDADD) $(LIBS) ring_sync$(EXEEXT): $(ring_sync_OBJECTS) $(ring_sync_DEPENDENCIES) $(EXTRA_ring_sync_DEPENDENCIES) @rm -f ring_sync$(EXEEXT) $(AM_V_CCLD)$(LINK) $(ring_sync_OBJECTS) $(ring_sync_LDADD) $(LIBS) ring_sync_detached$(EXEEXT): $(ring_sync_detached_OBJECTS) $(ring_sync_detached_DEPENDENCIES) $(EXTRA_ring_sync_detached_DEPENDENCIES) @rm -f ring_sync_detached$(EXEEXT) $(AM_V_CCLD)$(LINK) $(ring_sync_detached_OBJECTS) $(ring_sync_detached_LDADD) $(LIBS) star$(EXEEXT): $(star_OBJECTS) $(star_DEPENDENCIES) $(EXTRA_star_DEPENDENCIES) @rm -f star$(EXEEXT) $(AM_V_CCLD)$(LINK) $(star_OBJECTS) $(star_LDADD) $(LIBS) starpu_redefine$(EXEEXT): $(starpu_redefine_OBJECTS) $(starpu_redefine_DEPENDENCIES) $(EXTRA_starpu_redefine_DEPENDENCIES) @rm -f starpu_redefine$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_redefine_OBJECTS) $(starpu_redefine_LDADD) $(LIBS) stats$(EXEEXT): $(stats_OBJECTS) $(stats_DEPENDENCIES) $(EXTRA_stats_DEPENDENCIES) @rm -f stats$(EXEEXT) $(AM_V_CCLD)$(LINK) $(stats_OBJECTS) $(stats_LDADD) $(LIBS) sync$(EXEEXT): $(sync_OBJECTS) $(sync_DEPENDENCIES) $(EXTRA_sync_DEPENDENCIES) @rm -f sync$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sync_OBJECTS) $(sync_LDADD) $(LIBS) tags_allocate$(EXEEXT): $(tags_allocate_OBJECTS) $(tags_allocate_DEPENDENCIES) $(EXTRA_tags_allocate_DEPENDENCIES) @rm -f tags_allocate$(EXEEXT) $(AM_V_CCLD)$(LINK) $(tags_allocate_OBJECTS) $(tags_allocate_LDADD) $(LIBS) tags_checking$(EXEEXT): $(tags_checking_OBJECTS) $(tags_checking_DEPENDENCIES) $(EXTRA_tags_checking_DEPENDENCIES) @rm -f tags_checking$(EXEEXT) $(AM_V_CCLD)$(LINK) $(tags_checking_OBJECTS) $(tags_checking_LDADD) $(LIBS) temporary$(EXEEXT): $(temporary_OBJECTS) $(temporary_DEPENDENCIES) $(EXTRA_temporary_DEPENDENCIES) @rm -f temporary$(EXEEXT) $(AM_V_CCLD)$(LINK) $(temporary_OBJECTS) $(temporary_LDADD) $(LIBS) user_defined_datatype$(EXEEXT): $(user_defined_datatype_OBJECTS) $(user_defined_datatype_DEPENDENCIES) $(EXTRA_user_defined_datatype_DEPENDENCIES) @rm -f user_defined_datatype$(EXEEXT) $(AM_V_CCLD)$(LINK) $(user_defined_datatype_OBJECTS) $(user_defined_datatype_LDADD) $(LIBS) wait_for_all$(EXEEXT): $(wait_for_all_OBJECTS) $(wait_for_all_DEPENDENCIES) $(EXTRA_wait_for_all_DEPENDENCIES) @rm -f wait_for_all$(EXEEXT) $(AM_V_CCLD)$(LINK) $(wait_for_all_OBJECTS) $(wait_for_all_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f ../../examples/interface/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@../../examples/interface/$(DEPDIR)/complex_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/attr.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/block_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/block_interface_pinned.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/broadcast.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/callback.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/checkpoints.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_acknowledgement.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_cache.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_chained_sends.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_datatype.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_insert_task.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_large.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_many.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_recv_not_yet_posted.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_recv_wait_finalize.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_user_defined_datatype.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_without_task.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coop_wrong_order.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/data_cpy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/datatypes.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/display_bindings.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/driver.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/early_request.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/early_stuff-early_stuff.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gather.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gather2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_block.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_can_execute.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_compute.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_count.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_dyn_handles.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_node_choice.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_owner.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_owner2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_owner_data.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_recv_cache.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_sent_cache.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_seq.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insert_task_tags.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/load_balancer.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrix.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrix2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_barrier.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_data_cpy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_detached_tag.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_earlyrecv.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_earlyrecv2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_earlyrecv2_sync.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_irecv.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_irecv_detached.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_isend.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_isend_detached.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_reduction.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_reduction_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_redux.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_scatter_gather.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_task_submit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multiple_send.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ndim_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nothing.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pingpong.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_register.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_register_many.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_register_toomany.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_selection.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_selection2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/policy_unregister.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring_async.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring_async_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring_sync.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ring_sync_detached.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/star.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_redefine.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stats.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sync.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tags_allocate.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tags_checking.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/temporary.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/user_defined_datatype.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wait_for_all.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< early_stuff-early_stuff.o: early_stuff.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(early_stuff_CFLAGS) $(CFLAGS) -MT early_stuff-early_stuff.o -MD -MP -MF $(DEPDIR)/early_stuff-early_stuff.Tpo -c -o early_stuff-early_stuff.o `test -f 'early_stuff.c' || echo '$(srcdir)/'`early_stuff.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/early_stuff-early_stuff.Tpo $(DEPDIR)/early_stuff-early_stuff.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='early_stuff.c' object='early_stuff-early_stuff.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(early_stuff_CFLAGS) $(CFLAGS) -c -o early_stuff-early_stuff.o `test -f 'early_stuff.c' || echo '$(srcdir)/'`early_stuff.c early_stuff-early_stuff.obj: early_stuff.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(early_stuff_CFLAGS) $(CFLAGS) -MT early_stuff-early_stuff.obj -MD -MP -MF $(DEPDIR)/early_stuff-early_stuff.Tpo -c -o early_stuff-early_stuff.obj `if test -f 'early_stuff.c'; then $(CYGPATH_W) 'early_stuff.c'; else $(CYGPATH_W) '$(srcdir)/early_stuff.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/early_stuff-early_stuff.Tpo $(DEPDIR)/early_stuff-early_stuff.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='early_stuff.c' object='early_stuff-early_stuff.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(early_stuff_CFLAGS) $(CFLAGS) -c -o early_stuff-early_stuff.obj `if test -f 'early_stuff.c'; then $(CYGPATH_W) 'early_stuff.c'; else $(CYGPATH_W) '$(srcdir)/early_stuff.c'; fi` loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? callback.log: callback$(EXEEXT) @p='callback$(EXEEXT)'; \ b='callback'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) driver.log: driver$(EXEEXT) @p='driver$(EXEEXT)'; \ b='driver'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) early_stuff.log: early_stuff$(EXEEXT) @p='early_stuff$(EXEEXT)'; \ b='early_stuff'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_block.log: insert_task_block$(EXEEXT) @p='insert_task_block$(EXEEXT)'; \ b='insert_task_block'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_can_execute.log: insert_task_can_execute$(EXEEXT) @p='insert_task_can_execute$(EXEEXT)'; \ b='insert_task_can_execute'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_tags.log: insert_task_tags$(EXEEXT) @p='insert_task_tags$(EXEEXT)'; \ b='insert_task_tags'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) multiple_send.log: multiple_send$(EXEEXT) @p='multiple_send$(EXEEXT)'; \ b='multiple_send'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) policy_register.log: policy_register$(EXEEXT) @p='policy_register$(EXEEXT)'; \ b='policy_register'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) policy_register_many.log: policy_register_many$(EXEEXT) @p='policy_register_many$(EXEEXT)'; \ b='policy_register_many'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) policy_selection.log: policy_selection$(EXEEXT) @p='policy_selection$(EXEEXT)'; \ b='policy_selection'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) star.log: star$(EXEEXT) @p='star$(EXEEXT)'; \ b='star'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) stats.log: stats$(EXEEXT) @p='stats$(EXEEXT)'; \ b='stats'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) user_defined_datatype.log: user_defined_datatype$(EXEEXT) @p='user_defined_datatype$(EXEEXT)'; \ b='user_defined_datatype'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) wait_for_all.log: wait_for_all$(EXEEXT) @p='wait_for_all$(EXEEXT)'; \ b='wait_for_all'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) attr.log: attr$(EXEEXT) @p='attr$(EXEEXT)'; \ b='attr'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) ndim_interface.log: ndim_interface$(EXEEXT) @p='ndim_interface$(EXEEXT)'; \ b='ndim_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) broadcast.log: broadcast$(EXEEXT) @p='broadcast$(EXEEXT)'; \ b='broadcast'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) early_request.log: early_request$(EXEEXT) @p='early_request$(EXEEXT)'; \ b='early_request'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) gather.log: gather$(EXEEXT) @p='gather$(EXEEXT)'; \ b='gather'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) gather2.log: gather2$(EXEEXT) @p='gather2$(EXEEXT)'; \ b='gather2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task.log: insert_task$(EXEEXT) @p='insert_task$(EXEEXT)'; \ b='insert_task'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_count.log: insert_task_count$(EXEEXT) @p='insert_task_count$(EXEEXT)'; \ b='insert_task_count'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_dyn_handles.log: insert_task_dyn_handles$(EXEEXT) @p='insert_task_dyn_handles$(EXEEXT)'; \ b='insert_task_dyn_handles'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_node_choice.log: insert_task_node_choice$(EXEEXT) @p='insert_task_node_choice$(EXEEXT)'; \ b='insert_task_node_choice'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_owner.log: insert_task_owner$(EXEEXT) @p='insert_task_owner$(EXEEXT)'; \ b='insert_task_owner'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_owner2.log: insert_task_owner2$(EXEEXT) @p='insert_task_owner2$(EXEEXT)'; \ b='insert_task_owner2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_owner_data.log: insert_task_owner_data$(EXEEXT) @p='insert_task_owner_data$(EXEEXT)'; \ b='insert_task_owner_data'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) matrix.log: matrix$(EXEEXT) @p='matrix$(EXEEXT)'; \ b='matrix'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) matrix2.log: matrix2$(EXEEXT) @p='matrix2$(EXEEXT)'; \ b='matrix2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_barrier.log: mpi_barrier$(EXEEXT) @p='mpi_barrier$(EXEEXT)'; \ b='mpi_barrier'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_detached_tag.log: mpi_detached_tag$(EXEEXT) @p='mpi_detached_tag$(EXEEXT)'; \ b='mpi_detached_tag'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_earlyrecv.log: mpi_earlyrecv$(EXEEXT) @p='mpi_earlyrecv$(EXEEXT)'; \ b='mpi_earlyrecv'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_irecv.log: mpi_irecv$(EXEEXT) @p='mpi_irecv$(EXEEXT)'; \ b='mpi_irecv'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_irecv_detached.log: mpi_irecv_detached$(EXEEXT) @p='mpi_irecv_detached$(EXEEXT)'; \ b='mpi_irecv_detached'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_isend.log: mpi_isend$(EXEEXT) @p='mpi_isend$(EXEEXT)'; \ b='mpi_isend'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_isend_detached.log: mpi_isend_detached$(EXEEXT) @p='mpi_isend_detached$(EXEEXT)'; \ b='mpi_isend_detached'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_reduction.log: mpi_reduction$(EXEEXT) @p='mpi_reduction$(EXEEXT)'; \ b='mpi_reduction'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_redux.log: mpi_redux$(EXEEXT) @p='mpi_redux$(EXEEXT)'; \ b='mpi_redux'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_scatter_gather.log: mpi_scatter_gather$(EXEEXT) @p='mpi_scatter_gather$(EXEEXT)'; \ b='mpi_scatter_gather'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_test.log: mpi_test$(EXEEXT) @p='mpi_test$(EXEEXT)'; \ b='mpi_test'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) pingpong.log: pingpong$(EXEEXT) @p='pingpong$(EXEEXT)'; \ b='pingpong'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) policy_selection2.log: policy_selection2$(EXEEXT) @p='policy_selection2$(EXEEXT)'; \ b='policy_selection2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) ring.log: ring$(EXEEXT) @p='ring$(EXEEXT)'; \ b='ring'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) ring_async.log: ring_async$(EXEEXT) @p='ring_async$(EXEEXT)'; \ b='ring_async'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) ring_async_implicit.log: ring_async_implicit$(EXEEXT) @p='ring_async_implicit$(EXEEXT)'; \ b='ring_async_implicit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) ring_sync.log: ring_sync$(EXEEXT) @p='ring_sync$(EXEEXT)'; \ b='ring_sync'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) ring_sync_detached.log: ring_sync_detached$(EXEEXT) @p='ring_sync_detached$(EXEEXT)'; \ b='ring_sync_detached'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) temporary.log: temporary$(EXEEXT) @p='temporary$(EXEEXT)'; \ b='temporary'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) data_cpy.log: data_cpy$(EXEEXT) @p='data_cpy$(EXEEXT)'; \ b='data_cpy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_data_cpy.log: mpi_data_cpy$(EXEEXT) @p='mpi_data_cpy$(EXEEXT)'; \ b='mpi_data_cpy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) display_bindings.log: display_bindings$(EXEEXT) @p='display_bindings$(EXEEXT)'; \ b='display_bindings'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_earlyrecv2.log: mpi_earlyrecv2$(EXEEXT) @p='mpi_earlyrecv2$(EXEEXT)'; \ b='mpi_earlyrecv2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_earlyrecv2_sync.log: mpi_earlyrecv2_sync$(EXEEXT) @p='mpi_earlyrecv2_sync$(EXEEXT)'; \ b='mpi_earlyrecv2_sync'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) block_interface.log: block_interface$(EXEEXT) @p='block_interface$(EXEEXT)'; \ b='block_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) block_interface_pinned.log: block_interface_pinned$(EXEEXT) @p='block_interface_pinned$(EXEEXT)'; \ b='block_interface_pinned'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_compute.log: insert_task_compute$(EXEEXT) @p='insert_task_compute$(EXEEXT)'; \ b='insert_task_compute'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_sent_cache.log: insert_task_sent_cache$(EXEEXT) @p='insert_task_sent_cache$(EXEEXT)'; \ b='insert_task_sent_cache'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_recv_cache.log: insert_task_recv_cache$(EXEEXT) @p='insert_task_recv_cache$(EXEEXT)'; \ b='insert_task_recv_cache'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) insert_task_seq.log: insert_task_seq$(EXEEXT) @p='insert_task_seq$(EXEEXT)'; \ b='insert_task_seq'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tags_allocate.log: tags_allocate$(EXEEXT) @p='tags_allocate$(EXEEXT)'; \ b='tags_allocate'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) tags_checking.log: tags_checking$(EXEEXT) @p='tags_checking$(EXEEXT)'; \ b='tags_checking'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sync.log: sync$(EXEEXT) @p='sync$(EXEEXT)'; \ b='sync'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop.log: coop$(EXEEXT) @p='coop$(EXEEXT)'; \ b='coop'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_datatype.log: coop_datatype$(EXEEXT) @p='coop_datatype$(EXEEXT)'; \ b='coop_datatype'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_large.log: coop_large$(EXEEXT) @p='coop_large$(EXEEXT)'; \ b='coop_large'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_many.log: coop_many$(EXEEXT) @p='coop_many$(EXEEXT)'; \ b='coop_many'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_acknowledgement.log: coop_acknowledgement$(EXEEXT) @p='coop_acknowledgement$(EXEEXT)'; \ b='coop_acknowledgement'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_recv_not_yet_posted.log: coop_recv_not_yet_posted$(EXEEXT) @p='coop_recv_not_yet_posted$(EXEEXT)'; \ b='coop_recv_not_yet_posted'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_chained_sends.log: coop_chained_sends$(EXEEXT) @p='coop_chained_sends$(EXEEXT)'; \ b='coop_chained_sends'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_wrong_order.log: coop_wrong_order$(EXEEXT) @p='coop_wrong_order$(EXEEXT)'; \ b='coop_wrong_order'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_without_task.log: coop_without_task$(EXEEXT) @p='coop_without_task$(EXEEXT)'; \ b='coop_without_task'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_user_defined_datatype.log: coop_user_defined_datatype$(EXEEXT) @p='coop_user_defined_datatype$(EXEEXT)'; \ b='coop_user_defined_datatype'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_recv_wait_finalize.log: coop_recv_wait_finalize$(EXEEXT) @p='coop_recv_wait_finalize$(EXEEXT)'; \ b='coop_recv_wait_finalize'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_insert_task.log: coop_insert_task$(EXEEXT) @p='coop_insert_task$(EXEEXT)'; \ b='coop_insert_task'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) coop_cache.log: coop_cache$(EXEEXT) @p='coop_cache$(EXEEXT)'; \ b='coop_cache'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mpi_task_submit.log: mpi_task_submit$(EXEEXT) @p='mpi_task_submit$(EXEEXT)'; \ b='mpi_task_submit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) load_balancer.log: load_balancer$(EXEEXT) @p='load_balancer$(EXEEXT)'; \ b='load_balancer'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) policy_register_toomany.log: policy_register_toomany$(EXEEXT) @p='policy_register_toomany$(EXEEXT)'; \ b='policy_register_toomany'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) policy_unregister.log: policy_unregister$(EXEEXT) @p='policy_unregister$(EXEEXT)'; \ b='policy_unregister'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_redefine.log: starpu_redefine$(EXEEXT) @p='starpu_redefine$(EXEEXT)'; \ b='starpu_redefine'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-am all-am: Makefile $(PROGRAMS) installdirs: for dir in "$(DESTDIR)$(examplebindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-am install-exec: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f ../../examples/interface/$(DEPDIR)/$(am__dirstamp) -rm -f ../../examples/interface/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) clean: clean-am clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -f ../../examples/interface/$(DEPDIR)/complex_interface.Po -rm -f ./$(DEPDIR)/attr.Po -rm -f ./$(DEPDIR)/block_interface.Po -rm -f ./$(DEPDIR)/block_interface_pinned.Po -rm -f ./$(DEPDIR)/broadcast.Po -rm -f ./$(DEPDIR)/callback.Po -rm -f ./$(DEPDIR)/checkpoints.Po -rm -f ./$(DEPDIR)/coop.Po -rm -f ./$(DEPDIR)/coop_acknowledgement.Po -rm -f ./$(DEPDIR)/coop_cache.Po -rm -f ./$(DEPDIR)/coop_chained_sends.Po -rm -f ./$(DEPDIR)/coop_datatype.Po -rm -f ./$(DEPDIR)/coop_insert_task.Po -rm -f ./$(DEPDIR)/coop_large.Po -rm -f ./$(DEPDIR)/coop_many.Po -rm -f ./$(DEPDIR)/coop_recv_not_yet_posted.Po -rm -f ./$(DEPDIR)/coop_recv_wait_finalize.Po -rm -f ./$(DEPDIR)/coop_user_defined_datatype.Po -rm -f ./$(DEPDIR)/coop_without_task.Po -rm -f ./$(DEPDIR)/coop_wrong_order.Po -rm -f ./$(DEPDIR)/data_cpy.Po -rm -f ./$(DEPDIR)/datatypes.Po -rm -f ./$(DEPDIR)/display_bindings.Po -rm -f ./$(DEPDIR)/driver.Po -rm -f ./$(DEPDIR)/early_request.Po -rm -f ./$(DEPDIR)/early_stuff-early_stuff.Po -rm -f ./$(DEPDIR)/gather.Po -rm -f ./$(DEPDIR)/gather2.Po -rm -f ./$(DEPDIR)/insert_task.Po -rm -f ./$(DEPDIR)/insert_task_block.Po -rm -f ./$(DEPDIR)/insert_task_can_execute.Po -rm -f ./$(DEPDIR)/insert_task_compute.Po -rm -f ./$(DEPDIR)/insert_task_count.Po -rm -f ./$(DEPDIR)/insert_task_dyn_handles.Po -rm -f ./$(DEPDIR)/insert_task_node_choice.Po -rm -f ./$(DEPDIR)/insert_task_owner.Po -rm -f ./$(DEPDIR)/insert_task_owner2.Po -rm -f ./$(DEPDIR)/insert_task_owner_data.Po -rm -f ./$(DEPDIR)/insert_task_recv_cache.Po -rm -f ./$(DEPDIR)/insert_task_sent_cache.Po -rm -f ./$(DEPDIR)/insert_task_seq.Po -rm -f ./$(DEPDIR)/insert_task_tags.Po -rm -f ./$(DEPDIR)/load_balancer.Po -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f ./$(DEPDIR)/matrix.Po -rm -f ./$(DEPDIR)/matrix2.Po -rm -f ./$(DEPDIR)/mpi_barrier.Po -rm -f ./$(DEPDIR)/mpi_data_cpy.Po -rm -f ./$(DEPDIR)/mpi_detached_tag.Po -rm -f ./$(DEPDIR)/mpi_earlyrecv.Po -rm -f ./$(DEPDIR)/mpi_earlyrecv2.Po -rm -f ./$(DEPDIR)/mpi_earlyrecv2_sync.Po -rm -f ./$(DEPDIR)/mpi_irecv.Po -rm -f ./$(DEPDIR)/mpi_irecv_detached.Po -rm -f ./$(DEPDIR)/mpi_isend.Po -rm -f ./$(DEPDIR)/mpi_isend_detached.Po -rm -f ./$(DEPDIR)/mpi_reduction.Po -rm -f ./$(DEPDIR)/mpi_reduction_kernels.Po -rm -f ./$(DEPDIR)/mpi_redux.Po -rm -f ./$(DEPDIR)/mpi_scatter_gather.Po -rm -f ./$(DEPDIR)/mpi_task_submit.Po -rm -f ./$(DEPDIR)/mpi_test.Po -rm -f ./$(DEPDIR)/multiple_send.Po -rm -f ./$(DEPDIR)/ndim_interface.Po -rm -f ./$(DEPDIR)/nothing.Po -rm -f ./$(DEPDIR)/pingpong.Po -rm -f ./$(DEPDIR)/policy_register.Po -rm -f ./$(DEPDIR)/policy_register_many.Po -rm -f ./$(DEPDIR)/policy_register_toomany.Po -rm -f ./$(DEPDIR)/policy_selection.Po -rm -f ./$(DEPDIR)/policy_selection2.Po -rm -f ./$(DEPDIR)/policy_unregister.Po -rm -f ./$(DEPDIR)/ring.Po -rm -f ./$(DEPDIR)/ring_async.Po -rm -f ./$(DEPDIR)/ring_async_implicit.Po -rm -f ./$(DEPDIR)/ring_sync.Po -rm -f ./$(DEPDIR)/ring_sync_detached.Po -rm -f ./$(DEPDIR)/star.Po -rm -f ./$(DEPDIR)/starpu_redefine.Po -rm -f ./$(DEPDIR)/stats.Po -rm -f ./$(DEPDIR)/sync.Po -rm -f ./$(DEPDIR)/tags_allocate.Po -rm -f ./$(DEPDIR)/tags_checking.Po -rm -f ./$(DEPDIR)/temporary.Po -rm -f ./$(DEPDIR)/user_defined_datatype.Po -rm -f ./$(DEPDIR)/wait_for_all.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-examplebinPROGRAMS install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ../../examples/interface/$(DEPDIR)/complex_interface.Po -rm -f ./$(DEPDIR)/attr.Po -rm -f ./$(DEPDIR)/block_interface.Po -rm -f ./$(DEPDIR)/block_interface_pinned.Po -rm -f ./$(DEPDIR)/broadcast.Po -rm -f ./$(DEPDIR)/callback.Po -rm -f ./$(DEPDIR)/checkpoints.Po -rm -f ./$(DEPDIR)/coop.Po -rm -f ./$(DEPDIR)/coop_acknowledgement.Po -rm -f ./$(DEPDIR)/coop_cache.Po -rm -f ./$(DEPDIR)/coop_chained_sends.Po -rm -f ./$(DEPDIR)/coop_datatype.Po -rm -f ./$(DEPDIR)/coop_insert_task.Po -rm -f ./$(DEPDIR)/coop_large.Po -rm -f ./$(DEPDIR)/coop_many.Po -rm -f ./$(DEPDIR)/coop_recv_not_yet_posted.Po -rm -f ./$(DEPDIR)/coop_recv_wait_finalize.Po -rm -f ./$(DEPDIR)/coop_user_defined_datatype.Po -rm -f ./$(DEPDIR)/coop_without_task.Po -rm -f ./$(DEPDIR)/coop_wrong_order.Po -rm -f ./$(DEPDIR)/data_cpy.Po -rm -f ./$(DEPDIR)/datatypes.Po -rm -f ./$(DEPDIR)/display_bindings.Po -rm -f ./$(DEPDIR)/driver.Po -rm -f ./$(DEPDIR)/early_request.Po -rm -f ./$(DEPDIR)/early_stuff-early_stuff.Po -rm -f ./$(DEPDIR)/gather.Po -rm -f ./$(DEPDIR)/gather2.Po -rm -f ./$(DEPDIR)/insert_task.Po -rm -f ./$(DEPDIR)/insert_task_block.Po -rm -f ./$(DEPDIR)/insert_task_can_execute.Po -rm -f ./$(DEPDIR)/insert_task_compute.Po -rm -f ./$(DEPDIR)/insert_task_count.Po -rm -f ./$(DEPDIR)/insert_task_dyn_handles.Po -rm -f ./$(DEPDIR)/insert_task_node_choice.Po -rm -f ./$(DEPDIR)/insert_task_owner.Po -rm -f ./$(DEPDIR)/insert_task_owner2.Po -rm -f ./$(DEPDIR)/insert_task_owner_data.Po -rm -f ./$(DEPDIR)/insert_task_recv_cache.Po -rm -f ./$(DEPDIR)/insert_task_sent_cache.Po -rm -f ./$(DEPDIR)/insert_task_seq.Po -rm -f ./$(DEPDIR)/insert_task_tags.Po -rm -f ./$(DEPDIR)/load_balancer.Po -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f ./$(DEPDIR)/matrix.Po -rm -f ./$(DEPDIR)/matrix2.Po -rm -f ./$(DEPDIR)/mpi_barrier.Po -rm -f ./$(DEPDIR)/mpi_data_cpy.Po -rm -f ./$(DEPDIR)/mpi_detached_tag.Po -rm -f ./$(DEPDIR)/mpi_earlyrecv.Po -rm -f ./$(DEPDIR)/mpi_earlyrecv2.Po -rm -f ./$(DEPDIR)/mpi_earlyrecv2_sync.Po -rm -f ./$(DEPDIR)/mpi_irecv.Po -rm -f ./$(DEPDIR)/mpi_irecv_detached.Po -rm -f ./$(DEPDIR)/mpi_isend.Po -rm -f ./$(DEPDIR)/mpi_isend_detached.Po -rm -f ./$(DEPDIR)/mpi_reduction.Po -rm -f ./$(DEPDIR)/mpi_reduction_kernels.Po -rm -f ./$(DEPDIR)/mpi_redux.Po -rm -f ./$(DEPDIR)/mpi_scatter_gather.Po -rm -f ./$(DEPDIR)/mpi_task_submit.Po -rm -f ./$(DEPDIR)/mpi_test.Po -rm -f ./$(DEPDIR)/multiple_send.Po -rm -f ./$(DEPDIR)/ndim_interface.Po -rm -f ./$(DEPDIR)/nothing.Po -rm -f ./$(DEPDIR)/pingpong.Po -rm -f ./$(DEPDIR)/policy_register.Po -rm -f ./$(DEPDIR)/policy_register_many.Po -rm -f ./$(DEPDIR)/policy_register_toomany.Po -rm -f ./$(DEPDIR)/policy_selection.Po -rm -f ./$(DEPDIR)/policy_selection2.Po -rm -f ./$(DEPDIR)/policy_unregister.Po -rm -f ./$(DEPDIR)/ring.Po -rm -f ./$(DEPDIR)/ring_async.Po -rm -f ./$(DEPDIR)/ring_async_implicit.Po -rm -f ./$(DEPDIR)/ring_sync.Po -rm -f ./$(DEPDIR)/ring_sync_detached.Po -rm -f ./$(DEPDIR)/star.Po -rm -f ./$(DEPDIR)/starpu_redefine.Po -rm -f ./$(DEPDIR)/stats.Po -rm -f ./$(DEPDIR)/sync.Po -rm -f ./$(DEPDIR)/tags_allocate.Po -rm -f ./$(DEPDIR)/tags_checking.Po -rm -f ./$(DEPDIR)/temporary.Po -rm -f ./$(DEPDIR)/user_defined_datatype.Po -rm -f ./$(DEPDIR)/wait_for_all.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-examplebinPROGRAMS .MAKE: all check check-am install install-am install-exec \ install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am \ install-examplebinPROGRAMS install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-examplebinPROGRAMS .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/mpi/tests/attr.c000066400000000000000000000024701507764646700170600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #include int main(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED) { int flag; int64_t *value; int64_t rvalue; starpu_mpi_comm_get_attr(MPI_COMM_WORLD, 42, NULL, &flag); STARPU_ASSERT_MSG(flag == 0, "starpu_mpi_comm_get_attr was called with invalid argument\n"); starpu_mpi_comm_get_attr(MPI_COMM_WORLD, STARPU_MPI_TAG_UB, &value, &flag); STARPU_ASSERT_MSG(flag == 1, "starpu_mpi_comm_get_attr was called with valid argument\n"); rvalue = *value; FPRINTF(stderr, "Value: %"PRIi64"\n", *value); FPRINTF(stderr, "Value: %"PRIi64"\n", rvalue); return 0; } starpu-1.4.9+dfsg/mpi/tests/block_interface.c000066400000000000000000000100671507764646700212210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 4 #else # define NITER 2048 #endif #define BIGSIZE 32 #define SIZE 8 int main(int argc, char **argv) { int ret, rank, size; int mpi_init; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return STARPU_TEST_SKIPPED; } /* Node 0 will allocate a big block and only register an inner part of * it as the block data, Node 1 will allocate a block of small size and * register it directly. Node 0 and 1 will then exchange the content of * their blocks. */ float *block = NULL; starpu_data_handle_t block_handle = NULL; if (rank == 0) { block = calloc(BIGSIZE*BIGSIZE*BIGSIZE, sizeof(float)); assert(block); /* fill the inner block */ unsigned i, j, k; for (k = 0; k < SIZE; k++) for (j = 0; j < SIZE; j++) for (i = 0; i < SIZE; i++) { block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f; } starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE, SIZE, SIZE, SIZE, sizeof(float)); } else if (rank == 1) { block = calloc(SIZE*SIZE*SIZE, sizeof(float)); assert(block); starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, SIZE, SIZE*SIZE, SIZE, SIZE, SIZE, sizeof(float)); } if (rank == 0) { MPI_Status status; ret = starpu_mpi_send(block_handle, 1, 0x42, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); /* check the content of the block */ ret = starpu_data_acquire(block_handle, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); unsigned i, j, k; for (k = 0; k < SIZE; k++) for (j = 0; j < SIZE; j++) for (i = 0; i < SIZE; i++) { assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f); } starpu_data_release(block_handle); } else if (rank == 1) { MPI_Status status; ret = starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); /* check the content of the block and modify it */ ret = starpu_data_acquire(block_handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); unsigned i, j, k; for (k = 0; k < SIZE; k++) for (j = 0; j < SIZE; j++) for (i = 0; i < SIZE; i++) { assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f); block[i + j*SIZE + k*SIZE*SIZE] = 33.0f; } starpu_data_release(block_handle); ret = starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } if (rank == 0 || rank == 1) { starpu_data_unregister(block_handle); free(block); } FPRINTF(stdout, "Rank %d is done\n", rank); fflush(stdout); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/block_interface_pinned.c000066400000000000000000000103071507764646700225530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 16 #else # define NITER 2048 #endif #define BIGSIZE 128 #define SIZE 64 int main(int argc, char **argv) { int ret, rank, size; int mpi_init; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return STARPU_TEST_SKIPPED; } /* Node 0 will allocate a big block and only register an inner part of * it as the block data, Node 1 will allocate a block of small size and * register it directly. Node 0 and 1 will then exchange the content of * their blocks. */ float *block = NULL; starpu_data_handle_t block_handle = NULL; if (rank == 0) { starpu_malloc((void **)&block, BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float)); memset(block, 0, BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float)); /* fill the inner block */ unsigned i, j, k; for (k = 0; k < SIZE; k++) for (j = 0; j < SIZE; j++) for (i = 0; i < SIZE; i++) { block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] = 1.0f; } starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, BIGSIZE, BIGSIZE*BIGSIZE, SIZE, SIZE, SIZE, sizeof(float)); } else if (rank == 1) { starpu_malloc((void **)&block, SIZE*SIZE*SIZE*sizeof(float)); memset(block, 0, SIZE*SIZE*SIZE*sizeof(float)); starpu_block_data_register(&block_handle, STARPU_MAIN_RAM, (uintptr_t)block, SIZE, SIZE*SIZE, SIZE, SIZE, SIZE, sizeof(float)); } if (rank == 0) { MPI_Status status; ret = starpu_mpi_send(block_handle, 1, 0x42, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_recv(block_handle, 1, 0x1337, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); /* check the content of the block */ ret = starpu_data_acquire(block_handle, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); unsigned i, j, k; for (k = 0; k < SIZE; k++) for (j = 0; j < SIZE; j++) for (i = 0; i < SIZE; i++) { assert(block[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE] == 33.0f); } starpu_data_release(block_handle); } else if (rank == 1) { MPI_Status status; ret = starpu_mpi_recv(block_handle, 0, 0x42, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); /* check the content of the block and modify it */ ret = starpu_data_acquire(block_handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); unsigned i, j, k; for (k = 0; k < SIZE; k++) for (j = 0; j < SIZE; j++) for (i = 0; i < SIZE; i++) { assert(block[i + j*SIZE + k*SIZE*SIZE] == 1.0f); block[i + j*SIZE + k*SIZE*SIZE] = 33.0f; } starpu_data_release(block_handle); ret = starpu_mpi_send(block_handle, 0, 0x1337, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } if (rank == 0 || rank == 1) { starpu_data_unregister(block_handle); starpu_free_noflag(block, BIGSIZE*BIGSIZE*BIGSIZE*sizeof(float)); } FPRINTF(stdout, "Rank %d is done\n", rank); fflush(stdout); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/broadcast.c000066400000000000000000000061411507764646700200470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" void wait_CPU(void *descr[], void *args) { int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); int val; starpu_codelet_unpack_args(args, &val); *var = val; starpu_sleep(1); } static struct starpu_codelet cl = { .cpu_funcs = { wait_CPU }, .cpu_funcs_name = { "wait_CPU" }, .nbuffers = 1, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .modes = { STARPU_W }, }; int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var=-1; int mpi_init; MPI_Status status; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); if (rank == 0) { int val, n; val = 42; ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); for(n = 1 ; n < size ; n++) { FPRINTF_MPI(stderr, "sending data to %d\n", n); ret = starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } val = 43; ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); for(n = 1 ; n < size ; n++) { FPRINTF_MPI(stderr, "sending data to %d\n", n); ret = starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } else { ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle, STARPU_R); STARPU_ASSERT(var == 42); starpu_data_release(handle); ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle, STARPU_R); STARPU_ASSERT(var == 43); starpu_data_release(handle); FPRINTF_MPI(stderr, "received data\n"); } starpu_data_unregister(handle); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/callback.c000066400000000000000000000065211507764646700176430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" static int expected_x=40; static int expected_y=12; void my_func(void *descr[], void *_args) { (void)descr; (void)_args; FPRINTF_MPI(stderr, "i am here\n"); } struct starpu_codelet my_codelet = { .cpu_funcs = {my_func}, .cuda_funcs = {my_func}, .opencl_funcs = {my_func}, .model = &starpu_perfmodel_nop, }; static void callback(void *ptr) { int *x = (int *)ptr; FPRINTF_MPI(stderr, "x=%d\n", *x); STARPU_ASSERT_MSG(*x == expected_x, "%d != %d\n", *x, expected_x); (*x)++; } static void prologue_callback(void *ptr) { int *y = (int *)ptr; FPRINTF_MPI(stderr, "y=%d\n", *y); STARPU_ASSERT_MSG(*y == expected_y, "%d != %d\n", *y, expected_y); (*y)++; } int main(int argc, char **argv) { int ret; int x=40; int y=12; int rank, size; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; conf.nopencl = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); ret = starpu_mpi_task_insert(MPI_COMM_WORLD, NULL, STARPU_EXECUTE_ON_NODE, 0, STARPU_CALLBACK_WITH_ARG_NFREE, callback, &x, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); if (rank == 0) expected_x ++; ret = starpu_mpi_task_insert(MPI_COMM_WORLD, NULL, STARPU_EXECUTE_ON_NODE, 0, STARPU_CALLBACK, callback, STARPU_CALLBACK_ARG_NFREE, &x, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); if (rank == 0) expected_x ++; STARPU_ASSERT_MSG(x == expected_x, "x should be equal to %d and not %d\n", expected_x, x); ret = starpu_mpi_task_insert(MPI_COMM_WORLD, NULL, STARPU_EXECUTE_ON_NODE, 0, STARPU_PROLOGUE_CALLBACK, prologue_callback, STARPU_PROLOGUE_CALLBACK_ARG_NFREE, &y, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); if (rank == 0) expected_y ++; ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &my_codelet, STARPU_EXECUTE_ON_NODE, 0, STARPU_PROLOGUE_CALLBACK_POP, prologue_callback, STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE, &y, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); if (rank == 0) expected_y ++; STARPU_ASSERT_MSG(y == expected_y, "y should be equal to %d and not %d\n", expected_y, y); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/mpi/tests/checkpoints.c000066400000000000000000000124511507764646700204200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #define ARRAY_SIZE 12 int nb_nodes; int me; int backup_of(int _me) { if (_me==0) return 1; else return 0; return (_me+1)%nb_nodes; } int pseudotest_checkpoint_template_register(int argc, char* argv[]) { int mpi_init; starpu_data_handle_t h; starpu_data_handle_t h_array[ARRAY_SIZE]; starpu_mpi_checkpoint_template_t cp_template1, cp_template2; int val = 42; int val2 = 1234; int array[ARRAY_SIZE]; int ret; struct starpu_conf conf; //init array for (int i=0 ; i #include "helper.h" void task_cpu_func(void *descr[], void *args) { int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); int val; starpu_codelet_unpack_args(args, &val); *var = val; } static struct starpu_codelet cl = { .cpu_funcs = { task_cpu_func }, .cpu_funcs_name = { "task_cpu_func" }, .nbuffers = 1, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .modes = { STARPU_W }, }; int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var = -1; int mpi_init; MPI_Status status; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); if (rank == 0) { int val, n; val = 42; ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* The task previously inserted should be enough to detect the coop, * but to be sure, indicate the number of sends requests before really * sending the data: */ starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); for(n = 1 ; n < size ; n++) { FPRINTF_MPI(stderr, "sending data to %d with prio %d\n", n, size-n); ret = starpu_mpi_isend_detached_prio(handle, n, 0, size-n, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached_prio"); } } else { ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle, STARPU_R); printf("[%d] received data: %d\n", rank, var); STARPU_ASSERT(var == 42); starpu_data_release(handle); FPRINTF_MPI(stderr, "received data\n"); } starpu_data_unregister(handle); printf("[%d] end\n", rank); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/coop_acknowledgement.c000066400000000000000000000076341507764646700223040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" void task_cpu_func(void *descr[], void *args) { int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); int val; starpu_codelet_unpack_args(args, &val); *var = val; } static struct starpu_codelet cl = { .cpu_funcs = { task_cpu_func }, .cpu_funcs_name = { "task_cpu_func" }, .nbuffers = 1, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .modes = { STARPU_W }, }; int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var = -1; int mpi_init; MPI_Status status; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); if (rank == 0) { int val, i; starpu_data_handle_t* ack_handles = malloc((size-1) * sizeof(starpu_data_handle_t)); int* acks = calloc(size - 1, sizeof(int)); starpu_mpi_req* ack_reqs = calloc((size-1), sizeof(starpu_mpi_req)); val = 42; ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* The task previously inserted should be enough to detect the coop, * but to be sure, indicate the number of sends requests before really * sending the data: */ starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); for(i = 1 ; i < size ; i++) { starpu_variable_data_register(&ack_handles[i-1], STARPU_MAIN_RAM, (uintptr_t) &acks[i-1], sizeof(int)); ret = starpu_mpi_irecv(ack_handles[i-1], &ack_reqs[i-1], i, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); FPRINTF_MPI(stderr, "sending data to %d\n", i); ret = starpu_mpi_isend_detached(handle, i, 0, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } for(i = 0; i < size-1; i++) { ret = starpu_mpi_wait(&ack_reqs[i], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); starpu_data_acquire(ack_handles[i], STARPU_R); STARPU_ASSERT(acks[i] == 1); starpu_data_release(ack_handles[i]); starpu_data_unregister(ack_handles[i]); } free(ack_handles); free(acks); free(ack_reqs); } else { starpu_data_handle_t ack_handle; int ack = 1; starpu_variable_data_register(&ack_handle, STARPU_MAIN_RAM, (uintptr_t) &ack, sizeof(ack)); ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle, STARPU_R); printf("[%d] received data: %d\n", rank, var); STARPU_ASSERT(var == 42); ret = starpu_mpi_send(ack_handle, 0, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); starpu_data_unregister(ack_handle); starpu_data_release(handle); FPRINTF_MPI(stderr, "received data\n"); } starpu_data_unregister(handle); printf("[%d] end\n", rank); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/coop_cache.c000066400000000000000000000103351507764646700201700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in childING.LGPL for more details. */ /* This test generates a task graph that would lead to duplicate recipients if * MPI cache is disabled: output of the "parent" task is required by all * "child" tasks, each MPI rank executing two "child" tasks. * * Duplicates in the list of recipients of a broadcasts can lead to a deadlock. * In the NewMadeleine implementation, the following will happen, when cache * is disabled: * - Rank 0 will trigger a broadcast to ranks {1, 2, 3, 1, 2} * - Ranks 1, 2, and 3 will post *one* recv for the data tag 0 * - In the binomial routing tree, the rank 2 will forward the data to rank 2 (so, itself) * - However, on rank 2, the first recv will be finalized only after all * forwards are done. But the forward to 2 can be finished only when the second * recv is posted. Posting the second recv will be done only after the first one * is finalized. Hence the deadlock. * */ #include #include "helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else static void parent_cpu_func(void *descr[], void *args) { starpu_sleep(2); // Give time to submit other tasks and detect coop } static struct starpu_codelet parent_cl = { .cpu_funcs = { parent_cpu_func }, .cpu_funcs_name = { "parent_task" }, .nbuffers = 1, .modes = { STARPU_W } }; static void child_cpu_func(void* descr[], void* args) { // do nothing } static struct starpu_codelet child_cl = { .cpu_funcs = { child_cpu_func }, .cpu_funcs_name = { "child_task" }, .nbuffers = 2, .modes = { STARPU_R, STARPU_W } }; static inline int my_distrib(int x, int nb_nodes) { return x % nb_nodes; } static inline void do_test(starpu_mpi_tag_t *initial_tag, char* cache_enabled) { int ret, rank, worldsize, i; int* data; starpu_data_handle_t* handles; struct starpu_conf conf; setenv("STARPU_MPI_CACHE", cache_enabled, 1); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); int nblocks = 2 * worldsize; int **blocks = malloc(nblocks * sizeof(int*)); handles = malloc(nblocks*sizeof(starpu_data_handle_t)); for (i = 0; i < nblocks; i++) { int mpi_rank = my_distrib(i, worldsize); if (mpi_rank == rank) { blocks[i] = calloc(320*320, sizeof(float)); starpu_vector_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)blocks[i], 320*320, sizeof(float)); } else { blocks[i] = NULL; starpu_vector_data_register(&handles[i], -1, (uintptr_t)NULL, 320*320, sizeof(float)); } STARPU_ASSERT(handles[i] != NULL); starpu_mpi_data_register(handles[i], *initial_tag+i, mpi_rank); } starpu_mpi_task_insert(MPI_COMM_WORLD, &parent_cl, STARPU_W, handles[0], 0); for (i = 1; i < nblocks-1; i++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &child_cl, STARPU_R, handles[0], STARPU_W, handles[i], 0); } starpu_task_wait_for_all(); for (i = 0; i < nblocks; i++) { starpu_data_unregister(handles[i]); if (my_distrib(i, worldsize) == rank) { free(blocks[i]); } } free(handles); free(blocks); *initial_tag += 2*worldsize; starpu_mpi_shutdown(); } int main(int argc, char **argv) { starpu_mpi_tag_t initial_tag = 0; MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); do_test(&initial_tag, /* disable cache */ "0"); do_test(&initial_tag, /* enable cache */ "1"); MPI_Finalize(); return 0; } #endif starpu-1.4.9+dfsg/mpi/tests/coop_chained_sends.c000066400000000000000000000076601507764646700217230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #define NX 20 void scal_cpu_func(void *buffers[], void *cl_arg) { unsigned i; struct starpu_vector_interface *vector = buffers[0]; unsigned n = STARPU_VECTOR_GET_NX(vector); float *val = (float *) STARPU_VECTOR_GET_PTR(vector); /* scale the vector */ for (i = 0; i < n; i++) val[i] *= 2; } static struct starpu_codelet cl = { .where = STARPU_CPU, .cpu_funcs = { scal_cpu_func }, .cpu_funcs_name = { "scal_cpu_func" }, .nbuffers = 1, .modes = { STARPU_RW } }; int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int mpi_init; int i = 0, n = 0; MPI_Status status; struct starpu_conf conf; float* vector = malloc(NX * sizeof(float)); for (i = 0; i < NX; i++) { vector[i] = 1.0f; } MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) vector, NX, sizeof(float)); if (rank == 0) { ret = starpu_task_insert(&cl, STARPU_RW, handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* The task previously inserted should be enough to detect the coop, * but to be sure, indicate the number of sends requests before really * sending the data: */ starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); for (n = 1 ; n < size ; n++) { FPRINTF_MPI(stderr, "sending data to %d\n", n); ret = starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } else { ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); FPRINTF_MPI(stderr, "received data\n"); starpu_data_acquire(handle, STARPU_R); STARPU_ASSERT_MSG(vector[0] == 2, "vector[0] = %f, expected 2\n", vector[0]); STARPU_ASSERT_MSG(vector[NX-1] == 2, "vector[%d] = %f, expected 2\n", NX-1, vector[NX-1]); starpu_data_release(handle); if (rank == 1) { ret = starpu_task_insert(&cl, STARPU_RW, handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-2); for (i = 2; i < size; i++) { FPRINTF_MPI(stderr, "sending data to %d\n", i); ret = starpu_mpi_isend_detached(handle, i, 1, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } else { ret = starpu_mpi_recv(handle, 1, 1, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); FPRINTF_MPI(stderr, "received data\n"); starpu_data_acquire(handle, STARPU_R); STARPU_ASSERT_MSG(vector[0] == 4, "vector[0] = %f, expected 4\n", vector[0]); STARPU_ASSERT_MSG(vector[NX-1] == 4, "vector[%d] = %f, expected 4\n", NX-1, vector[NX-1]); starpu_data_release(handle); } } starpu_data_unregister(handle); printf("[%d] end\n", rank); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); free(vector); return 0; } starpu-1.4.9+dfsg/mpi/tests/coop_datatype.c000066400000000000000000000214121507764646700207360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" struct starpu_my_data_interface { enum starpu_data_interface_id id; /**< Identifier of the interface */ uintptr_t ptr; /**< local pointer of the data */ uintptr_t dev_handle; /**< device handle of the data. */ size_t offset; /**< offset in the data */ }; struct starpu_my_data { int d; char c; }; void _starpu_my_data_datatype_allocate(unsigned node, MPI_Datatype *mpi_datatype) { int ret; int blocklengths[2] = {1, 1}; MPI_Aint displacements[2]; MPI_Datatype types[2] = {MPI_INT, MPI_CHAR}; struct starpu_my_data *myinterface; myinterface = calloc(1, sizeof(struct starpu_my_data)); MPI_Get_address(myinterface, displacements); MPI_Get_address(&myinterface[0].c, displacements+1); displacements[1] -= displacements[0]; displacements[0] = 0; ret = MPI_Type_create_struct(2, blocklengths, displacements, types, mpi_datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed"); ret = MPI_Type_commit(mpi_datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed"); free(myinterface); } int starpu_my_data_datatype_allocate(starpu_data_handle_t handle, unsigned node, MPI_Datatype *mpi_datatype) { (void)handle; _starpu_my_data_datatype_allocate(node, mpi_datatype); return 0; } void starpu_my_data_datatype_free(MPI_Datatype *mpi_datatype) { int ret; ret = MPI_Type_free(mpi_datatype); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_free failed"); } static void data_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_my_data_interface *local_interface = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = my_data_interface->ptr; local_interface->dev_handle = my_data_interface->dev_handle; local_interface->offset = my_data_interface->offset; } else { local_interface->ptr = 0; local_interface->dev_handle = 0; local_interface->offset = 0; } } } static starpu_ssize_t data_allocate_data_on_node(void *data_interface, unsigned node) { uintptr_t addr = 0, handle; struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; starpu_ssize_t allocated_memory = sizeof(int)+sizeof(char); handle = starpu_malloc_on_node(node, allocated_memory); if (!handle) return -ENOMEM; if (starpu_node_get_kind(node) != STARPU_OPENCL_RAM) addr = handle; /* update the data properly in consequence */ my_data_interface->ptr = addr; my_data_interface->dev_handle = handle; my_data_interface->offset = 0; return allocated_memory; } static void data_free_data_on_node(void *data_interface, unsigned node) { struct starpu_my_data_interface *my_data_interface = (struct starpu_my_data_interface *) data_interface; starpu_free_on_node(node, my_data_interface->dev_handle, sizeof(int)+sizeof(char)); my_data_interface->ptr = 0; my_data_interface->dev_handle = 0; } static size_t data_get_size(starpu_data_handle_t handle) { (void)handle; return sizeof(int) + sizeof(char); } static size_t data_get_alloc_size(starpu_data_handle_t handle) { (void)handle; return sizeof(int) + sizeof(char); } static uint32_t data_footprint(starpu_data_handle_t handle) { struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return starpu_hash_crc32c_be(my_data->ptr, 0); } static int data_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { (void)handle; (void)node; (void)ptr; (void)count; STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the pack_data function should not happen\n"); return 0; } static int data_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { (void)handle; (void)node; (void)ptr; STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the unpack_data function should not happen\n"); return 0; } static int data_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { (void)handle; (void)node; (void)ptr; (void)count; STARPU_ASSERT_MSG(0, "The data interface has been registered with starpu_mpi_datatype_register(). Calling the unpack_data function should not happen\n"); return 0; } static starpu_ssize_t data_describe(void *data_interface, char *buf, size_t size) { struct starpu_my_data_interface *my_data = (struct starpu_my_data_interface *) data_interface; struct starpu_my_data *data = (struct starpu_my_data *)my_data->ptr; return snprintf(buf, size, "Data%d-%c", data->d, data->c); } static void *data_to_pointer(void *data_interface, unsigned node) { (void) node; struct starpu_my_data_interface *my_data_interface = data_interface; return (void*) my_data_interface->ptr; } static struct starpu_data_interface_ops interface_data_ops = { .register_data_handle = data_register_data_handle, .allocate_data_on_node = data_allocate_data_on_node, .free_data_on_node = data_free_data_on_node, .get_size = data_get_size, .get_alloc_size = data_get_alloc_size, .footprint = data_footprint, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpu_my_data_interface), .to_pointer = data_to_pointer, .pack_data = data_pack_data, .peek_data = data_peek_data, .unpack_data = data_unpack_data, .describe = data_describe }; void starpu_my_data_register(starpu_data_handle_t *handleptr, unsigned home_node, struct starpu_my_data *xc) { if (interface_data_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID) { interface_data_ops.interfaceid = starpu_data_interface_get_next_id(); starpu_mpi_interface_datatype_node_register(interface_data_ops.interfaceid, starpu_my_data_datatype_allocate, starpu_my_data_datatype_free); } struct starpu_my_data_interface data = { .id = interface_data_ops.interfaceid, .ptr = (uintptr_t) xc, .dev_handle = (uintptr_t) xc, .offset = 0, }; starpu_data_register(handleptr, home_node, &data, &interface_data_ops); } void starpu_my_data_shutdown(void) { starpu_mpi_interface_datatype_unregister(interface_data_ops.interfaceid); } int main(int argc, char **argv) { int rank, nodes, mpi_init; int ret; const int tag = 12; int i = 0; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (nodes < 2) fprintf(stderr, "We need at least 2 processes.\n"); else fprintf(stderr, "We need at least 1 CPU.\n"); } starpu_mpi_shutdown(); return 77; } struct starpu_my_data my0; starpu_data_handle_t handle0; starpu_my_data_register(&handle0, STARPU_MAIN_RAM, &my0); if (rank == 0) { my0.d = 43; my0.c = 'm'; starpu_mpi_coop_sends_data_handle_nb_sends(handle0, nodes-1); for (i = 1; i < nodes; i++) { ret = starpu_mpi_isend_detached(handle0, i, tag, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } else { my0.d = 23; my0.c = 'd'; ret = starpu_mpi_recv(handle0, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle0, STARPU_R); printf("[%d] received: %d %c\n", rank, my0.d, my0.c); assert(my0.d == 43); assert(my0.c == 'm'); starpu_data_release(handle0); } starpu_mpi_wait_for_all(MPI_COMM_WORLD); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_data_unregister(handle0); starpu_my_data_shutdown(); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/coop_insert_task.c000066400000000000000000000067201507764646700214560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Test to ensure coop are correctly detected even through the * starpu_mpi_task_insert() API. * * One task put an initial value in a buffer, then each node copies the content * of this buffer in a local buffer, within a task. Since each node needs the * initial buffer, this triggers a broadcast. */ #include #include "helper.h" #define TARGET_VALUE 42 static void init_cpu_func(void *descr[], void *args) { int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); int val; starpu_codelet_unpack_args(args, &val); *var = val; starpu_sleep(2); // Give time to submit other tasks and detect coop } static struct starpu_codelet init_cl = { .cpu_funcs = { init_cpu_func }, .cpu_funcs_name = { "init_task" }, .nbuffers = 1, .modes = { STARPU_W } }; static void copy_cpu_func(void* descr[], void* args) { (void) args; int *var_src = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); int *var_target = (int*) STARPU_VARIABLE_GET_PTR(descr[1]); *var_target = *var_src; } static struct starpu_codelet copy_cl = { .cpu_funcs = { copy_cpu_func }, .cpu_funcs_name = { "copy_task" }, .nbuffers = 2, .modes = { STARPU_R, STARPU_W } }; int main(int argc, char **argv) { int ret, rank, size, mpi_init, i; int* data; starpu_data_handle_t* handles; MPI_Status status; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); data = malloc(size*sizeof(int)); handles = malloc(size*sizeof(starpu_data_handle_t)); for (i = 0; i < size; i++) { if (i == rank) { starpu_variable_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)&data[i], sizeof(int)); } else { starpu_variable_data_register(&handles[i], -1, (uintptr_t)NULL, sizeof(int)); } STARPU_ASSERT(handles[i] != NULL); starpu_mpi_data_register(handles[i], i, i); } int val = TARGET_VALUE; starpu_mpi_task_insert(MPI_COMM_WORLD, &init_cl, STARPU_W, handles[0], STARPU_VALUE, &val, sizeof(val), 0); for (i = 1; i < size; i++) { starpu_mpi_task_insert(MPI_COMM_WORLD, ©_cl, STARPU_R, handles[0], STARPU_W, handles[i], 0); } starpu_data_acquire(handles[rank], STARPU_R); int* handle_ptr = (int*) starpu_variable_get_local_ptr(handles[rank]); printf("[%d] data: %d\n", rank, *handle_ptr); STARPU_ASSERT(*handle_ptr == TARGET_VALUE); starpu_data_release(handles[rank]); for (i = 0; i < size; i++) { starpu_data_unregister(handles[i]); } free(handles); free(data); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/coop_large.c000066400000000000000000000063511507764646700202220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #define NX 4800000 void scal_cpu_func(void *buffers[], void *cl_arg) { unsigned i; float factor; struct starpu_vector_interface *vector = buffers[0]; unsigned n = STARPU_VECTOR_GET_NX(vector); float *val = (float *) STARPU_VECTOR_GET_PTR(vector); starpu_codelet_unpack_args(cl_arg, &factor); /* scale the vector */ for (i = 0; i < n; i++) val[i] *= factor; } static struct starpu_codelet cl = { .where = STARPU_CPU, .cpu_funcs = { scal_cpu_func }, .cpu_funcs_name = { "scal_cpu_func" }, .nbuffers = 1, .modes = { STARPU_RW } }; int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int mpi_init; int i = 0; MPI_Status status; struct starpu_conf conf; float* vector = malloc(NX * sizeof(float)); for (i = 0; i < NX; i++) { vector[i] = 1.0f; } MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) vector, NX, sizeof(float)); float factor = 3.14; if (rank == 0) { ret = starpu_task_insert(&cl, STARPU_RW, handle, STARPU_VALUE, &factor, sizeof(factor), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* The task previously inserted should be enough to detect the coop, * but to be sure, indicate the number of sends requests before really * sending the data: */ starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); for (i = 1 ; i < size ; i++) { FPRINTF_MPI(stderr, "sending data to %d\n", i); ret = starpu_mpi_isend_detached(handle, i, 0, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } else { ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle, STARPU_R); STARPU_ASSERT_MSG(vector[0] == factor, "vector[0] = %f, expected %f\n", vector[0], factor); STARPU_ASSERT_MSG(vector[NX-1] == factor, "vector[%d] = %f, expected %f\n", NX-1, vector[NX-1], factor); starpu_data_release(handle); FPRINTF_MPI(stderr, "received data\n"); } starpu_data_unregister(handle); printf("[%d] end\n", rank); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); free(vector); return 0; } starpu-1.4.9+dfsg/mpi/tests/coop_many.c000066400000000000000000000073771507764646700201050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Every rank asynchronously sends coop and receives from coop several times */ #include #include "helper.h" #define NX (256*256) #define NB_MCASTS 10 int main(int argc, char **argv) { int ret, rank, worldsize; int mpi_init; int i = 0, j = 0; MPI_Status status; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); float **vectors = malloc(NB_MCASTS*worldsize*sizeof(float*)); starpu_data_handle_t *handles = malloc(NB_MCASTS*worldsize*sizeof(starpu_data_handle_t)); starpu_mpi_req *reqs = malloc(NB_MCASTS*worldsize*sizeof(starpu_mpi_req)); for (i = 0; i < NB_MCASTS*worldsize; i++) { vectors[i] = malloc(NX*sizeof(float)); for (j = 0; j < NX; j++) { vectors[i][j] = i; } starpu_vector_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t) vectors[i], NX, sizeof(float)); } int sender_rank = 0; // Submit all communications: for (sender_rank = 0; sender_rank < worldsize; sender_rank++) { for (i = 0; i < NB_MCASTS; i++) { int tag = sender_rank*NB_MCASTS+i; assert(tag < worldsize*NB_MCASTS); if (rank == sender_rank) { starpu_mpi_coop_sends_data_handle_nb_sends(handles[tag], worldsize-1); for (j = 0; j < worldsize; j++) { if (j != sender_rank) { ret = starpu_mpi_isend_detached(handles[tag], j, tag, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } } else { ret = starpu_mpi_irecv(handles[tag], &reqs[tag], sender_rank, tag, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } } } // Wait for all receives: for (sender_rank = 0; sender_rank < worldsize; sender_rank++) { for (i = 0; i < NB_MCASTS; i++) { int tag = sender_rank*NB_MCASTS+i; assert(tag < worldsize*NB_MCASTS); if (rank != sender_rank) { ret = starpu_mpi_wait(&reqs[tag], MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); starpu_data_acquire(handles[i], STARPU_R); STARPU_ASSERT_MSG(vectors[i][0] == i, "vectors[%d][0] = %f, expected %d\n", i, vectors[i][0], i); STARPU_ASSERT_MSG(vectors[i][NX-1] == i, "vector[%d][%d] = %f, expected %d\n", i, NX-1, vectors[i][NX-1], i); starpu_data_release(handles[i]); } } } // This barrier is unblocked after all receives are done, that means all isends are also done, so we can after that unregister handles (there is no implicit wait on the isends) starpu_mpi_wait_for_all(MPI_COMM_WORLD); starpu_mpi_barrier(MPI_COMM_WORLD); for (i = 0; i < NB_MCASTS*worldsize; i++) { starpu_data_unregister(handles[i]); free(vectors[i]); } free(vectors); free(handles); free(reqs); printf("[%d] end\n", rank); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/coop_recv_not_yet_posted.c000066400000000000000000000056631507764646700232130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void task_cpu_func(void *descr[], void *args) { int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); int val; starpu_codelet_unpack_args(args, &val); *var = val; } static struct starpu_codelet cl = { .cpu_funcs = { task_cpu_func }, .cpu_funcs_name = { "task_cpu_func" }, .nbuffers = 1, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .modes = { STARPU_W }, }; int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var = -1; int mpi_init; MPI_Status status; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); if (rank == 0) { int val, n; val = 42; ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* The task previously inserted should be enough to detect the coop, * but to be sure, indicate the number of sends requests before really * sending the data: */ starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); for(n = 1 ; n < size ; n++) { FPRINTF_MPI(stderr, "sending data to %d\n", n); ret = starpu_mpi_isend_detached(handle, n, 15, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } else { if (rank == 2) { sleep(5); } printf("[%d] will post recv\n", rank); fflush(stdout); ret = starpu_mpi_recv(handle, 0, 15, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle, STARPU_R); printf("[%d] received data: %d\n", rank, var); STARPU_ASSERT(var == 42); starpu_data_release(handle); FPRINTF_MPI(stderr, "received data\n"); } starpu_data_unregister(handle); printf("[%d] end\n", rank); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/coop_recv_wait_finalize.c000066400000000000000000000063471507764646700230010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This test checks if STARPU_MPI_RECV_WAIT_FINALIZE env var doesn't break anything. */ #include #include #include #include "helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else static int rank, worldsize; static void task_cpu_func(void *descr[], void *args) { int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); int val; starpu_codelet_unpack_args(args, &val); *var = val; } static struct starpu_codelet cl = { .cpu_funcs = { task_cpu_func }, .cpu_funcs_name = { "task_cpu_func" }, .nbuffers = 1, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .modes = { STARPU_W }, }; static void test(starpu_mpi_tag_t tag, char* enabled) { int var = -1; int ret; starpu_data_handle_t handle; struct starpu_conf conf; setenv("STARPU_MPI_RECV_WAIT_FINALIZE", enabled, 1); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) &var, sizeof(var)); if (rank == 0) { int val, n; val = 42; ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* The task previously inserted should be enough to detect the coop, * but to be sure, indicate the number of sends requests before really * sending the data: */ starpu_mpi_coop_sends_data_handle_nb_sends(handle, worldsize-1); for(n = 1 ; n < worldsize ; n++) { ret = starpu_mpi_isend_detached(handle, n, tag, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } else { ret = starpu_mpi_recv(handle, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); FPRINTF_MPI(stderr, "received data\n"); starpu_data_acquire(handle, STARPU_R); FPRINTF_MPI(stderr, "acquired data\n"); printf("[%d] acquired data: %d\n", rank, var); STARPU_ASSERT(var == 42); starpu_data_release(handle); FPRINTF_MPI(stderr, "received data\n"); } starpu_data_unregister(handle); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_mpi_shutdown(); } int main(int argc, char** argv) { MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &worldsize); test(42, "0"); test(24, "1"); MPI_Finalize(); return 0; } #endif starpu-1.4.9+dfsg/mpi/tests/coop_user_defined_datatype.c000066400000000000000000000120521507764646700234520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Copy of test user_defined_datatype.c, but with coop */ #include #include #include #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define ELEMENTS 10 #else # define ELEMENTS 1000 #endif static int my_rank, worldsize, is_sender; void test_handle_recv_send(starpu_data_handle_t *handles, int nb_handles, starpu_mpi_tag_t tag) { int i, j; int ret; if (is_sender) { for(i=0 ; i #include "helper.h" int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var = -1; int mpi_init; MPI_Status status; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (rank == 0) { int n; var = 42; starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); /* This function tells StarPU to wait for size-1 sends of handle before really * sending the data. There are many sends of the same handle, so a dynamic * broadcast is triggered. */ starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); for(n = 1 ; n < size ; n++) { FPRINTF_MPI(stderr, "sending data to %d with prio %d\n", n, size-n); ret = starpu_mpi_isend_detached_prio(handle, n, 0, size-n, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached_prio"); } } else { starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle, STARPU_R); printf("[%d] received data: %d\n", rank, var); STARPU_ASSERT(var == 42); starpu_data_release(handle); FPRINTF_MPI(stderr, "received data\n"); } starpu_data_unregister(handle); printf("[%d] end\n", rank); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/coop_wrong_order.c000066400000000000000000000073271507764646700214630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" void task_cpu_func(void *descr[], void *args) { int *var = (int*) STARPU_VARIABLE_GET_PTR(descr[0]); int val; printf("running task\n"); starpu_codelet_unpack_args(args, &val); *var = val; } static struct starpu_codelet cl = { .cpu_funcs = { task_cpu_func }, .cpu_funcs_name = { "task_cpu_func" }, .nbuffers = 1, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .modes = { STARPU_W }, }; int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var = -1; int mpi_init; MPI_Status status; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) &var, sizeof(var)); if (rank == 0) { int val, n; val = 42; ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); for(n = 1 ; n < size ; n++) { FPRINTF_MPI(stderr, "sending data to %d with tag 1\n", n); ret = starpu_mpi_isend_detached(handle, n, 1, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } val = 43; ret = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &val, sizeof(val), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* The task previously inserted should be enough to detect the coop, * but to be sure, indicate the number of sends requests before really * sending the data: */ starpu_mpi_coop_sends_data_handle_nb_sends(handle, size-1); for(n = 1 ; n < size ; n++) { FPRINTF_MPI(stderr, "sending data to %d with tag 0\n", n); ret = starpu_mpi_isend_detached(handle, n, 0, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } else { FPRINTF_MPI(stderr, "waiting for data with tag 0\n"); ret = starpu_mpi_recv(handle, 0, 0, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle, STARPU_R); FPRINTF_MPI(stderr, "received data: %d\n", var); STARPU_ASSERT(var == 43); starpu_data_release(handle); FPRINTF_MPI(stderr, "waiting for data with tag 1\n"); ret = starpu_mpi_recv(handle, 0, 1, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(handle, STARPU_R); FPRINTF_MPI(stderr, "received data: %d\n", var); STARPU_ASSERT(var == 42); starpu_data_release(handle); } starpu_mpi_wait_for_all(MPI_COMM_WORLD); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_data_unregister(handle); printf("[%d] end\n", rank); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/data_cpy.c000066400000000000000000000060411507764646700176700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" void callback(void *arg) { FPRINTF_MPI(stderr, "value in callback: %d\n", *((int *)arg)); } int main(int argc, char **argv) { int ret, rank, size; int mpi_init; starpu_data_handle_t src_handle, dst_handle; int value; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } { value = rank; starpu_variable_data_register(&src_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); starpu_mpi_data_register(src_handle, 12, 0); starpu_variable_data_register(&dst_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); starpu_mpi_data_register(dst_handle, 42, 1); FPRINTF_MPI(stderr, "value before copy: %d\n", value); if (rank == 1) STARPU_ASSERT_MSG(value == rank, "before copy value %d should be %d\n", value, rank); starpu_mpi_data_cpy(dst_handle, src_handle, MPI_COMM_WORLD, 0, callback, &value); starpu_data_unregister(src_handle); starpu_data_unregister(dst_handle); FPRINTF_MPI(stderr, "value after copy: %d\n", value); if (rank == 1) STARPU_ASSERT_MSG(value == 0, "after copy value %d should be %d\n", value, 0); } { value = rank+12; starpu_variable_data_register(&src_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); starpu_mpi_data_register(src_handle, 12, 0); starpu_variable_data_register(&dst_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); starpu_mpi_data_register(dst_handle, 42, 1); FPRINTF_MPI(stderr, "value before copy: %d\n", value); if (rank == 1) STARPU_ASSERT_MSG(value == rank+12, "before copy value %d should be %d\n", value, rank+12); starpu_mpi_data_cpy(dst_handle, src_handle, MPI_COMM_WORLD, 1, callback, &value); starpu_data_unregister(src_handle); starpu_data_unregister(dst_handle); FPRINTF_MPI(stderr, "value after copy: %d\n", value); if (rank == 1) STARPU_ASSERT_MSG(value == 12, "after copy value %d should be %d\n", value, 12); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/datatypes.c000066400000000000000000000442311507764646700201050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" typedef void (*check_func)(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error); void send_recv_and_check(int rank, int node, starpu_data_handle_t handle_s, int tag_s, starpu_data_handle_t handle_r, int tag_r, int *error, check_func func) { int ret; MPI_Status status; if (rank == 0) { ret = starpu_mpi_send(handle_s, node, tag_s, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_recv(handle_r, node, tag_r, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); assert(func); func(handle_s, handle_r, error); } else if (rank == 1) { ret = starpu_mpi_recv(handle_s, node, tag_s, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); ret = starpu_mpi_send(handle_s, node, tag_r, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } } /* * Void */ void check_void(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error) { (void)error; (void)handle_s; (void)handle_r; FPRINTF_MPI(stderr, "Success with void value\n"); } void exchange_void(int rank, int *error) { STARPU_SKIP_IF_VALGRIND; if (rank == 0) { starpu_data_handle_t void_handle[2]; starpu_void_data_register(&void_handle[0]); starpu_void_data_register(&void_handle[1]); send_recv_and_check(rank, 1, void_handle[0], 0x42, void_handle[1], 0x1337, error, check_void); starpu_data_unregister(void_handle[0]); starpu_data_unregister(void_handle[1]); } else if (rank == 1) { starpu_data_handle_t void_handle; starpu_void_data_register(&void_handle); send_recv_and_check(rank, 0, void_handle, 0x42, NULL, 0x1337, NULL, NULL); starpu_data_unregister(void_handle); } } /* * Variable */ void check_variable(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error) { float *v_s, *v_r; STARPU_ASSERT(starpu_variable_get_elemsize(handle_s) == starpu_variable_get_elemsize(handle_r)); starpu_data_acquire(handle_s, STARPU_R); v_s = (float *)starpu_variable_get_local_ptr(handle_s); starpu_data_release(handle_s); starpu_data_acquire(handle_r, STARPU_R); v_r = (float *)starpu_variable_get_local_ptr(handle_r); starpu_data_release(handle_r); if (*v_s == *v_r) { FPRINTF_MPI(stderr, "Success with variable value: %f == %f\n", *v_s, *v_r); } else { *error = 1; FPRINTF_MPI(stderr, "Error with variable value: %f != %f\n", *v_s, *v_r); } } void exchange_variable(int rank, int *error) { if (rank == 0) { float v = 42.12; starpu_data_handle_t variable_handle[2]; starpu_variable_data_register(&variable_handle[0], STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(v)); starpu_variable_data_register(&variable_handle[1], -1, (uintptr_t)NULL, sizeof(v)); send_recv_and_check(rank, 1, variable_handle[0], 0x42, variable_handle[1], 0x1337, error, check_variable); starpu_data_unregister(variable_handle[0]); starpu_data_unregister(variable_handle[1]); } else if (rank == 1) { starpu_data_handle_t variable_handle; starpu_variable_data_register(&variable_handle, -1, (uintptr_t)NULL, sizeof(float)); send_recv_and_check(rank, 0, variable_handle, 0x42, NULL, 0x1337, NULL, NULL); starpu_data_unregister(variable_handle); } } /* * Vector */ void check_vector(starpu_data_handle_t handle_s, starpu_data_handle_t handle_r, int *error) { int i; int nx; int *v_r, *v_s; STARPU_ASSERT(starpu_vector_get_elemsize(handle_s) == starpu_vector_get_elemsize(handle_r)); STARPU_ASSERT(starpu_vector_get_nx(handle_s) == starpu_vector_get_nx(handle_r)); nx = starpu_vector_get_nx(handle_r); v_r = (int *)starpu_vector_get_local_ptr(handle_r); v_s = (int *)starpu_vector_get_local_ptr(handle_s); for(i=0 ; i %d] value: %c == %c\n", x, y, index, matrix_s[index], matrix_r[index]); } else { *error = 1; FPRINTF_MPI(stderr, "Error with matrix[%d,%d --> %d] value: %c != %c\n", x, y, index, matrix_s[index], matrix_r[index]); } } } } void exchange_matrix(int rank, int *error) { int nx=3; int ny=2; if (rank == 0) { char *matrix, n='a'; int x, y; starpu_data_handle_t matrix_handle[2]; starpu_malloc((void **)&matrix, nx*ny*sizeof(char)); assert(matrix); for(y=0 ; y %d] value: %f == %f\n", x, y, z, index, block_s[index], block_r[index]); } else { *error = 1; FPRINTF_MPI(stderr, "Error with block[%d,%d,%d --> %d] value: %f != %f\n", x, y, z, index, block_s[index], block_r[index]); } } } starpu_data_release(handle_s); starpu_data_release(handle_r); } void exchange_block(int rank, int *error) { int nx=3; int ny=2; int nz=4; if (rank == 0) { float *block, n=1.0; int x, y, z; starpu_data_handle_t block_handle[2]; starpu_malloc((void **)&block, nx*ny*nz*sizeof(float)); assert(block); for(z=0 ; z #include #include #include "helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else int main(int argc, char **argv) { int ret; setenv("STARPU_DISPLAY_BINDINGS", "1", 1); MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_shutdown(); MPI_Finalize(); return EXIT_SUCCESS; } #endif starpu-1.4.9+dfsg/mpi/tests/driver.c000066400000000000000000000115501507764646700174000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else int main(int argc, char **argv) { int ret, rank, size, i; starpu_data_handle_t tab_handle[4]; int values[4]; starpu_mpi_req request[2] = {NULL, NULL}; int mpi_init; struct starpu_conf conf; setenv("STARPU_MPI_DRIVER_CALL_FREQUENCY", "1", 1); setenv("STARPU_MPI_DRIVER_TASK_FREQUENCY", "10", 1); MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { FPRINTF_MPI(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } for(i=0 ; i<4 ; i++) { if (i<3 || rank%2) { // all data are registered on all nodes, but the 4th data which is not registered on the receiving node values[i] = (rank+1) * (i+1); starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&values[i], sizeof(values[i])); starpu_mpi_data_register(tab_handle[i], i, rank); } } int other_rank = rank%2 == 0 ? rank+1 : rank-1; FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank); if (rank%2) { FPRINTF_MPI(stderr, "Sending values %d and %d to node %d\n", values[0], values[3], other_rank); // this data will be received as an early registered data ret = starpu_mpi_isend(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); // this data will be received as an early UNregistered data ret = starpu_mpi_isend(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_send(tab_handle[1], other_rank, 1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_recv(tab_handle[2], other_rank, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } else { ret = starpu_mpi_recv(tab_handle[1], other_rank, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); ret = starpu_mpi_send(tab_handle[2], other_rank, 2, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); // we register the data starpu_variable_data_register(&tab_handle[3], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register(tab_handle[3], 3, rank); ret = starpu_mpi_irecv(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); ret = starpu_mpi_irecv(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } int finished=0; while (!finished) { for(i=0 ; i<2 ; i++) { if (request[i]) { int flag; MPI_Status status; ret = starpu_mpi_test(&request[i], &flag, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); if (flag) FPRINTF_MPI(stderr, "request[%d] = %d %p\n", i, flag, request[i]); } } finished = request[0] == NULL && request[1] == NULL; #ifdef STARPU_SIMGRID starpu_sleep(0.001); #endif } if (rank%2 == 0) { void *ptr0; void *ptr3; starpu_data_acquire(tab_handle[0], STARPU_RW); ptr0 = starpu_data_get_local_ptr(tab_handle[0]); starpu_data_release(tab_handle[0]); starpu_data_acquire(tab_handle[3], STARPU_RW); ptr3 = starpu_data_get_local_ptr(tab_handle[3]); starpu_data_release(tab_handle[3]); ret = (*((int *)ptr0) == (other_rank+1)*1) && (*((int *)ptr3) == (other_rank+1)*4); ret = !ret; FPRINTF_MPI(stderr, "[%s] Received values %d and %d from node %d\n", ret?"FAILURE":"SUCCESS", *((int *)ptr0), *((int *)ptr3), other_rank); } for(i=0 ; i<4 ; i++) starpu_data_unregister(tab_handle[i]); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } #endif starpu-1.4.9+dfsg/mpi/tests/early_request.c000066400000000000000000000165161507764646700210000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #define NUM_EL 5 #ifdef STARPU_QUICK_CHECK # define NUM_LOOPS 2 #else # define NUM_LOOPS 10 #endif /* * This testcase written by J-M Couteyen allows to test that several * early requests for a given source and tag can be posted to StarPU * by the application before data arrive. * * In this test case, multiples processes (called "domains") exchanges * information between multiple "elements" multiple times, with * different sizes (in order to catch error more easily). * The communications are independent between the elements (each one * as its proper tag), but must occur in the submitted order for an * element taken independently. */ struct element { int tag; int foreign_domain; int array_send[100]; int array_recv[100]; starpu_data_handle_t ensure_submitted_order_send; starpu_data_handle_t ensure_submitted_order_recv; starpu_data_handle_t send; starpu_data_handle_t recv; }; /* functions/codelet to fill the bufferss*/ void fill_tmp_buffer(void *buffers[], void *cl_arg) { (void)cl_arg; int *tmp = (int *) STARPU_VECTOR_GET_PTR(buffers[0]); int nx = STARPU_VECTOR_GET_NX(buffers[0]); int i; for (i=0; itag=size; el->foreign_domain=foreign_domain; int mpi_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); starpu_vector_data_register(&el->recv, 0, (uintptr_t)el->array_recv, size, sizeof(int)); starpu_vector_data_register(&el->send, 0, (uintptr_t)el->array_send, size, sizeof(int)); starpu_void_data_register(&el->ensure_submitted_order_send); starpu_void_data_register(&el->ensure_submitted_order_recv); } void free_element(struct element *el) { starpu_data_unregister(el->recv); starpu_data_unregister(el->send); starpu_data_unregister(el->ensure_submitted_order_send); starpu_data_unregister(el->ensure_submitted_order_recv); } void insert_work_for_one_element(struct element *el) { starpu_data_handle_t tmp_recv; starpu_data_handle_t tmp_send; int ret; starpu_vector_data_register(&tmp_recv, -1, 0, el->tag, sizeof(int)); starpu_vector_data_register(&tmp_send, -1, 0, el->tag, sizeof(int)); //Emulate the work to fill the send buffer ret = starpu_task_insert(&fill_tmp_buffer_cl, STARPU_W,tmp_send, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); //Send operation ret = starpu_task_insert(&submitted_order_rw, STARPU_RW,el->ensure_submitted_order_send, STARPU_RW,tmp_send, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_isend_detached(tmp_send,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); ret = starpu_task_insert(&submitted_order_rw, STARPU_RW,el->ensure_submitted_order_send, STARPU_RW,tmp_send, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); //Recv operation for current element ret = starpu_task_insert(&submitted_order, STARPU_RW,el->ensure_submitted_order_recv, STARPU_W,tmp_recv, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_mpi_irecv_detached(tmp_recv,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); //Emulate the "reading" of the recv value. ret = starpu_task_insert(&read_ghost_value_cl, STARPU_R,tmp_recv, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_unregister_submit(tmp_send); starpu_data_unregister_submit(tmp_recv); } /*main program*/ int main(int argc, char * argv[]) { /* Init */ int ret; int mpi_rank, mpi_size; int mpi_init; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &mpi_size); if (starpu_cpu_worker_get_count() == 0) { if (mpi_rank == 0) FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return mpi_rank == 0 ? STARPU_TEST_SKIPPED : 0; } /*element initialization : domains are connected as a ring for this test*/ int num_elements=NUM_EL; struct element * el_left=malloc(num_elements*sizeof(el_left[0])); struct element * el_right=malloc(num_elements*sizeof(el_right[0])); int i; for(i=0;i #include #include "helper.h" #ifndef STARPU_USE_MPI_MPI int main(int argc, char **argv) { int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); if (!mpi_init) MPI_Finalize(); return 0; } #else #include #include #include void early_data() { struct _starpu_mpi_early_data_handle *edh[2]; struct _starpu_mpi_envelope envelope[2]; struct _starpu_mpi_node_tag node_tag[2]; struct _starpu_mpi_early_data_handle *early; struct _starpu_mpi_early_data_handle_tag_hashlist *hash; memset(&node_tag[0], 0, sizeof(struct _starpu_mpi_node_tag)); node_tag[0].node.rank = 1; node_tag[0].node.comm = MPI_COMM_WORLD; node_tag[0].data_tag = 42; memset(&node_tag[1], 0, sizeof(struct _starpu_mpi_node_tag)); node_tag[1].node.rank = 2; node_tag[1].node.comm = MPI_COMM_WORLD; node_tag[1].data_tag = 84; envelope[0].data_tag = node_tag[0].data_tag; edh[0] = _starpu_mpi_early_data_create(&envelope[0], node_tag[0].node.rank, node_tag[0].node.comm); envelope[1].data_tag = node_tag[1].data_tag; edh[1] = _starpu_mpi_early_data_create(&envelope[1], node_tag[1].node.rank, node_tag[1].node.comm); _starpu_mpi_early_data_add(edh[0]); _starpu_mpi_early_data_add(edh[1]); hash = _starpu_mpi_early_data_extract(&node_tag[0]); STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 1); early = _starpu_mpi_early_data_handle_list_pop_front(&hash->list); STARPU_ASSERT(early->node_tag.node.comm == node_tag[0].node.comm && early->node_tag.node.rank == node_tag[0].node.rank && early->node_tag.data_tag == node_tag[0].data_tag); STARPU_ASSERT(_starpu_mpi_early_data_handle_list_size(&hash->list) == 0); _starpu_mpi_early_data_delete(early); free(hash); early = _starpu_mpi_early_data_find(&node_tag[1]); STARPU_ASSERT(early->node_tag.node.comm == node_tag[1].node.comm && early->node_tag.node.rank == node_tag[1].node.rank && early->node_tag.data_tag == node_tag[1].data_tag); _starpu_mpi_early_data_delete(early); } void early_request() { struct _starpu_mpi_req req[2]; struct _starpu_mpi_req *early; struct _starpu_mpi_early_request_tag_hashlist *hash; memset(&req[0].node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); req[0].node_tag.node.rank = 1; req[0].node_tag.node.comm = MPI_COMM_WORLD; req[0].node_tag.data_tag = 42; memset(&req[1].node_tag, 0, sizeof(struct _starpu_mpi_node_tag)); req[1].node_tag.node.rank = 2; req[1].node_tag.node.comm = MPI_COMM_WORLD; req[1].node_tag.data_tag = 84; _starpu_mpi_early_request_enqueue(&req[1]); _starpu_mpi_early_request_enqueue(&req[0]); early = _starpu_mpi_early_request_dequeue(req[0].node_tag.data_tag, req[0].node_tag.node.rank, req[0].node_tag.node.comm); STARPU_ASSERT(early->node_tag.data_tag == req[0].node_tag.data_tag && early->node_tag.node.rank == req[0].node_tag.node.rank && early->node_tag.node.comm == req[0].node_tag.node.comm); hash = _starpu_mpi_early_request_extract(req[1].node_tag.data_tag, req[1].node_tag.node.rank, req[1].node_tag.node.comm); STARPU_ASSERT(_starpu_mpi_req_list_size(&hash->list) == 1); early = _starpu_mpi_req_list_pop_front(&hash->list); STARPU_ASSERT(_starpu_mpi_req_list_size(&hash->list) == 0); STARPU_ASSERT(early->node_tag.data_tag == req[1].node_tag.data_tag && early->node_tag.node.rank == req[1].node_tag.node.rank && early->node_tag.node.comm == req[1].node_tag.node.comm); free(hash); } int main(int argc, char **argv) { int ret; int mpi_init; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); early_data(); early_request(); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } #endif starpu-1.4.9+dfsg/mpi/tests/gather.c000066400000000000000000000046341507764646700173640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var; int mpi_init; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size<3) { FPRINTF(stderr, "We need more than 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank == 0) { int n; for(n=1 ; n from node <%d>\n", var, n); FPRINTF_MPI(stderr, "received <%d> from node %d\n", var, n); starpu_data_release(handle); starpu_data_unregister(handle); } } else { FPRINTF_MPI(stderr, "sending to node %d\n", 0); var = rank; starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); ret = starpu_mpi_send(handle, 0, 42, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); starpu_data_unregister(handle); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/gather2.c000066400000000000000000000070331507764646700174420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" int main(int argc, char **argv) { int ret, rank, size; int mpi_init; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size<3) { FPRINTF(stderr, "We need more than 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank == 0) { int n; for(n=1 ; n from node <%d>\n", var[0], n); FPRINTF_MPI(stderr, "received <%d> from node %d\n", var[0], n); starpu_data_release(handle[0]); ret = starpu_mpi_recv(handle[0], n, 44, MPI_COMM_WORLD, &status[1]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); ret = starpu_mpi_recv(handle[1], n, 46, MPI_COMM_WORLD, &status[2]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); for(i=0 ; i<2 ; i++) starpu_data_acquire(handle[i], STARPU_R); STARPU_ASSERT_MSG(var[0] == n*2, "Received incorrect value <%d> from node <%d>\n", var[0], n); STARPU_ASSERT_MSG(var[1] == n*4, "Received incorrect value <%d> from node <%d>\n", var[0], n); FPRINTF_MPI(stderr, "received <%d> and <%d> from node %d\n", var[0], var[1], n); for(i=0 ; i<2 ; i++) starpu_data_release(handle[i]); for(i=0 ; i<2 ; i++) starpu_data_unregister(handle[i]); } } else { int i, var[3]; starpu_data_handle_t handle[3]; FPRINTF_MPI(stderr, "sending to node %d\n", 0); var[0] = rank; var[1] = var[0] * 2; var[2] = var[0] * 4; for(i=0 ; i<3 ; i++) starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i])); ret = starpu_mpi_send(handle[0], 0, 42, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_send(handle[1], 0, 44, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_send(handle[2], 0, 46, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); for(i=0 ; i<3 ; i++) starpu_data_unregister(handle[i]); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/helper.h000066400000000000000000000045751507764646700174020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../../tests/helper.h" #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); fflush(ofile); }} while(0) #define PRINTF(fmt, ...) do { if (!getenv("STARPU_SSILENT")) {printf(fmt, ## __VA_ARGS__); fflush(stdout); }} while(0) #define FPRINTF_MPI(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \ int _disp_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &_disp_rank); \ fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \ fflush(ofile); }} while(0) #define FPRINTF_MPI_COMM(ofile, comm, fmt, ...) do { if (!getenv("STARPU_SSILENT")) { \ int _disp_rank; starpu_mpi_comm_rank(comm, &_disp_rank); \ fprintf(ofile, "[%d][starpu_mpi][%s] " fmt , _disp_rank, __starpu_func__ ,## __VA_ARGS__); \ fflush(ofile); }} while(0); #define MPI_INIT_THREAD_real(argc, argv, required) do { \ int thread_support; \ if (MPI_Init_thread(argc, argv, required, &thread_support) != MPI_SUCCESS) \ { \ fprintf(stderr,"MPI_Init_thread failed\n"); \ exit(1); \ } \ if (thread_support == MPI_THREAD_FUNNELED) \ fprintf(stderr,"Warning: MPI only has funneled thread support, not serialized, hoping this will work\n"); \ if (thread_support < MPI_THREAD_FUNNELED) \ fprintf(stderr,"Warning: MPI does not have thread support!\n"); } while(0) #ifdef STARPU_SIMGRID #define MPI_INIT_THREAD(argc, argv, required, init) do { *(init) = 1 ; } while(0) #else #define MPI_INIT_THREAD(argc, argv, required, init) do { \ *(init) = 0; \ MPI_INIT_THREAD_real(argc, argv, required); } while(0) #endif starpu-1.4.9+dfsg/mpi/tests/insert_task.c000066400000000000000000000102041507764646700204260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)_args; unsigned *x = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned *y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); FPRINTF(stdout, "VALUES: %u %u\n", *x, *y); *x = (*x + *y) / 2; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R}, .model = &starpu_perfmodel_nop, }; #define X 4 #define Y 5 /* Returns the MPI node number where data indexes index is */ int my_distrib(int x, int y, int nb_nodes) { return (x + y) % nb_nodes; } int main(int argc, char **argv) { int rank, size, x, y; int value=0, ret; unsigned matrix[X][Y]; starpu_data_handle_t data_handles[X][Y]; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { matrix[x][y] = (rank+1)*10 + value; value++; } } #if 0 for(x = 0; x < X; x++) { FPRINTF(stdout, "[%d] ", rank); for (y = 0; y < Y; y++) { FPRINTF(stdout, "%3d ", matrix[x][y]); } FPRINTF(stdout, "\n"); } #endif for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib(x, y, size); if (mpi_rank == rank) { //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y); starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[x][y]), sizeof(unsigned)); } else { /* I don't own this index, but will need it for my computations */ //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", rank, x, y); starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(unsigned)); } if (data_handles[x][y]) { starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } } } ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[1][1], STARPU_R, data_handles[0][1], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0][1], STARPU_R, data_handles[0][0], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[3][1], STARPU_R, data_handles[0][1], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); enodev: for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { if (data_handles[x][y]) starpu_data_unregister(data_handles[x][y]); } } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); #if 0 for(x = 0; x < X; x++) { FPRINTF(stdout, "[%d] ", rank); for (y = 0; y < Y; y++) { FPRINTF(stdout, "%3d ", matrix[x][y]); } FPRINTF(stdout, "\n"); } #endif return 0; } starpu-1.4.9+dfsg/mpi/tests/insert_task_block.c000066400000000000000000000100101507764646700215730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)_args; unsigned *matrix = (unsigned *)STARPU_MATRIX_GET_PTR(descr[0]); int nx = (int)STARPU_MATRIX_GET_NX(descr[0]); int ny = (int)STARPU_MATRIX_GET_NY(descr[0]); int ld = (int)STARPU_MATRIX_GET_LD(descr[0]); int i, j; unsigned sum=0; for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { sum += matrix[i+j*ld]; } } for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { matrix[i+j*ld] = sum;///(nx*ny); } } } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 1, #ifdef STARPU_SIMGRID .model = &starpu_perfmodel_nop, #endif .modes = {STARPU_RW} }; #define SIZE 6 #define BLOCKS 3 /* Returns the MPI node number where data indexes index is */ int my_distrib(int x, int y, int nb_nodes) { return (x + y) % nb_nodes; } int main(int argc, char **argv) { int rank, size, x, y; int ret, value=0; unsigned matrix[SIZE*SIZE]; starpu_data_handle_t data_handles[SIZE][SIZE]; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); for(x = 0; x < SIZE; x++) { for (y = 0; y < SIZE; y++) { matrix[x+y*SIZE] = rank*100 + value; value++; } } #if 1 for(x = 0; x < SIZE; x++) { FPRINTF(stdout, "[%d] ", rank); for (y = 0; y < SIZE; y++) { FPRINTF(stdout, "%3u ", matrix[x+y*SIZE]); } FPRINTF(stdout, "\n"); } #endif for(x = 0; x < BLOCKS ; x++) { for (y = 0; y < BLOCKS; y++) { int mpi_rank = my_distrib(x, y, size); if (mpi_rank == rank) { //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", rank, x, y); starpu_matrix_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&(matrix[((SIZE/BLOCKS)*x) + ((SIZE/BLOCKS)*y) * SIZE]), SIZE, SIZE/BLOCKS, SIZE/BLOCKS, sizeof(unsigned)); } else { /* I don't own this index, but will need it for my computations */ //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", rank, x, y); starpu_matrix_data_register(&data_handles[x][y], -1, (uintptr_t)&(matrix[((SIZE/BLOCKS)*x) + ((SIZE/BLOCKS)*y) * SIZE]), SIZE, SIZE/BLOCKS, SIZE/BLOCKS, sizeof(unsigned)); } if (data_handles[x][y]) { starpu_mpi_data_register(data_handles[x][y], (y*BLOCKS)+x, mpi_rank); } } } for(x = 0; x < BLOCKS; x++) { for (y = 0; y < BLOCKS; y++) { ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[x][y], 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); } } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); for(x = 0; x < BLOCKS; x++) { for (y = 0; y < BLOCKS; y++) { if (data_handles[x][y]) starpu_data_unregister(data_handles[x][y]); } } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); #if 1 for(x = 0; x < SIZE; x++) { FPRINTF(stdout, "[%d] ", rank); for (y = 0; y < SIZE; y++) { FPRINTF(stdout, "%3u ", matrix[x+y*SIZE]); } FPRINTF(stdout, "\n"); } #endif return 0; } starpu-1.4.9+dfsg/mpi/tests/insert_task_can_execute.c000066400000000000000000000044171507764646700230020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" void cpu_fun(void* buffers[], void* args) { float *ptr = (float*)(STARPU_VECTOR_GET_PTR(buffers[0])); ptr[0] = 42; } int can_execute(unsigned workerid, struct starpu_task* task, unsigned nimpl) { return 1; } static struct starpu_codelet codelet = { .can_execute = can_execute, .cpu_funcs = {cpu_fun}, .nbuffers = 1, .modes = {STARPU_W}, .model = &starpu_perfmodel_nop, .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; int main(int argc, char** argv) { struct starpu_conf conf; int mpi_init; int rank; int ret; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); // register a vector of one element float *data = malloc(sizeof(float)); data[0] = 55; starpu_data_handle_t handle; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) data, 1, sizeof(data[0])); starpu_mpi_data_register(handle, 0, 0); // run the task starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet, STARPU_W, handle, NULL); // gather the result starpu_data_unregister(handle); // check results ret = 0; if (rank == 0) { if (data[0] == 42) { ret = 0; fprintf(stderr, "Success!\n"); } else { ret = 1; fprintf(stderr, "Failure!\n"); } } free(data); // shutdown starpu starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return ret; } starpu-1.4.9+dfsg/mpi/tests/insert_task_compute.c000066400000000000000000000146761507764646700222030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" void func_cpu(void *descr[], void *_args) { int rank; int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int *y = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); starpu_codelet_unpack_args(_args, &rank); FPRINTF(stdout, "[%d] VALUES: %d %d\n", rank, *x, *y); *x = *x * *y; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R}, .model = &starpu_perfmodel_nop, }; int test(int rank, int node, starpu_mpi_tag_t initial_tag, int *before, int *after, int task_insert, int data_array) { int ok, ret, i, x[2]; starpu_data_handle_t data_handles[2]; struct starpu_data_descr descrs[2]; int barrier_ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); if (starpu_cpu_worker_get_count() == 0) { // If there is no cpu to execute the codelet, mpi will block trying to do the post-execution communication ret = -ENODEV; FPRINTF_MPI(stderr, "No CPU is available\n"); goto nodata; } FPRINTF_MPI(stderr, "Testing with node=%d - task_insert=%d - data_array=%d - \n", node, task_insert, data_array); for(i=0 ; i<2 ; i++) { if (rank <= 1) { x[i] = before[rank*2+i]; //FPRINTF_MPI(stderr, "before computation x[%d] = %d\n", i, x[i]); } else x[i] = rank*2+i; if (rank == i) starpu_variable_data_register(&data_handles[i], 0, (uintptr_t)&x[i], sizeof(int)); else starpu_variable_data_register(&data_handles[i], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register(data_handles[i], initial_tag+i, i); descrs[i].handle = data_handles[i]; } descrs[0].mode = STARPU_RW; descrs[1].mode = STARPU_R; switch(task_insert) { case 0: { struct starpu_task *task = NULL; switch(data_array) { case 0: { task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], STARPU_VALUE, &rank, sizeof(rank), STARPU_EXECUTE_ON_NODE, node, 0); break; } case 1: { task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, STARPU_DATA_ARRAY, data_handles, 2, STARPU_VALUE, &rank, sizeof(rank), STARPU_EXECUTE_ON_NODE, node, 0); break; } case 2: { task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, STARPU_DATA_MODE_ARRAY, descrs, 2, STARPU_VALUE, &rank, sizeof(rank), STARPU_EXECUTE_ON_NODE, node, 0); break; } } if (task) { ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } switch(data_array) { case 0: { starpu_mpi_task_post_build(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], STARPU_EXECUTE_ON_NODE, node, 0); break; } case 1: { starpu_mpi_task_post_build(MPI_COMM_WORLD, &mycodelet, STARPU_DATA_ARRAY, data_handles, 2, STARPU_EXECUTE_ON_NODE, node, 0); break; } case 2: { starpu_mpi_task_post_build(MPI_COMM_WORLD, &mycodelet, STARPU_DATA_MODE_ARRAY, descrs, 2, STARPU_EXECUTE_ON_NODE, node, 0); break; } } break; } case 1: { switch(data_array) { case 0: { ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_R, data_handles[1], STARPU_VALUE, &rank, sizeof(rank), STARPU_EXECUTE_ON_NODE, node, 0); if (ret == -ENODEV) goto enodev; break; } case 1: { ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_DATA_ARRAY, data_handles, 2, STARPU_VALUE, &rank, sizeof(rank), STARPU_EXECUTE_ON_NODE, node, 0); if (ret == -ENODEV) goto enodev; break; } case 2: { ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_DATA_MODE_ARRAY, descrs, 2, STARPU_VALUE, &rank, sizeof(rank), STARPU_EXECUTE_ON_NODE, node, 0); if (ret == -ENODEV) goto enodev; break; } } STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); break; } } starpu_task_wait_for_all(); enodev: for(i=0; i<2; i++) { starpu_data_unregister(data_handles[i]); } ok = 1; #ifndef STARPU_SIMGRID if (rank <= 1) { for(i=0; i<2; i++) { ok = ok && (x[i] == after[rank*2+i]); if (x[i] != after[rank*2+i]) FPRINTF_MPI(stderr, "after computation x[%d] = %d, should be %d\n", i, x[i], after[rank*2+i]); } FPRINTF_MPI(stderr, "result is %s\n", ok?"CORRECT":"NOT CORRECT"); } #endif nodata: barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); starpu_mpi_shutdown(); return ret == -ENODEV ? ret : !ok; } int main(int argc, char **argv) { int rank; int global_ret, ret; int before[4] = {10, 20, 11, 22}; int after_node[2][4] = {{220, 20, 11, 22}, {220, 20, 11, 22}}; int node, insert_task, data_array; starpu_mpi_tag_t initial_tag = 0; MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); MPI_Comm_rank(MPI_COMM_WORLD, &rank); global_ret = 0; for(node=0 ; node<=1 ; node++) { for(insert_task=0 ; insert_task<=1 ; insert_task++) { for(data_array=0 ; data_array<=2 ; data_array++) { ret = test(rank, node, initial_tag, before, after_node[node], insert_task, data_array); initial_tag += 2; if (ret == -ENODEV || ret) global_ret = ret; } } } MPI_Finalize(); if (rank == 0) return global_ret==-ENODEV?STARPU_TEST_SKIPPED:global_ret; else return 0; } starpu-1.4.9+dfsg/mpi/tests/insert_task_count.c000066400000000000000000000066171507764646700216530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 32 #elif !defined(STARPU_LONG_CHECK) # define NITER 256 #else # define NITER 2048 #endif #ifdef STARPU_USE_CUDA extern void increment_cuda(void *descr[], void *_args); #endif #ifdef STARPU_USE_HIP extern void increment_hip(void *descr[], void *_args); #endif void increment_cpu(void *descr[], void *_args) { (void)_args; int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); (*tokenptr)++; } static struct starpu_codelet increment_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {increment_cuda}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {increment_hip}, #endif .cpu_funcs = {increment_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int ret, rank, size; int token = 0; starpu_data_handle_t token_handle; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) { if (rank == 0) { if (size < 2) FPRINTF(stderr, "We need at least 2 processes.\n"); else FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank == 1) starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token)); else starpu_vector_data_register(&token_handle, -1, (uintptr_t)NULL, 1, sizeof(token)); starpu_mpi_data_register(token_handle, 12, 1); int nloops = NITER; int loop; FPRINTF_MPI(stderr, "Start with token value %d\n", token); for (loop = 0; loop < nloops; loop++) { if (loop % 2) starpu_mpi_task_insert(MPI_COMM_WORLD, &increment_cl, STARPU_RW|STARPU_SSEND, token_handle, STARPU_EXECUTE_ON_NODE, 0, 0); else starpu_mpi_task_insert(MPI_COMM_WORLD, &increment_cl, STARPU_RW, token_handle, STARPU_EXECUTE_ON_NODE, 0, 0); } starpu_task_wait_for_all(); starpu_data_unregister(token_handle); FPRINTF_MPI(stderr, "Final value for token %d\n", token); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); #ifndef STARPU_SIMGRID if (rank == 1) { STARPU_ASSERT_MSG(token == nloops, "token==%d != expected_value==%d\n", token, nloops); } else { STARPU_ASSERT_MSG(token == 0, "token==%d != expected_value==0\n", token); } #endif return 0; } starpu-1.4.9+dfsg/mpi/tests/insert_task_dyn_handles.c000066400000000000000000000105431507764646700230040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "helper.h" #define FFACTOR 42 void func_cpu(void *descr[], void *_args) { (void)_args; int num = starpu_task_get_current()->nbuffers; int *factor = (int *)STARPU_VARIABLE_GET_PTR(descr[num-1]); int i; for (i = 0; i < num-1; i++) { int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[i]); *x = *x + 1**factor; } } struct starpu_codelet codelet = { .cpu_funcs = {func_cpu}, .cpu_funcs_name = {"func_cpu"}, .nbuffers = STARPU_VARIABLE_NBUFFERS, #ifdef STARPU_SIMGRID .model = &starpu_perfmodel_nop, #endif }; int main(int argc, char **argv) { int *x; int i, ret, loop; int rank; int factor=0; #ifdef STARPU_QUICK_CHECK int nloops = 4; #else int nloops = 16; #endif starpu_data_handle_t *data_handles; starpu_data_handle_t factor_handle; struct starpu_data_descr *descrs; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); if (starpu_cpu_worker_get_count() == 0) { if (rank == 0) FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } x = calloc(1, (STARPU_NMAXBUFS+15) * sizeof(int)); data_handles = malloc((STARPU_NMAXBUFS+15) * sizeof(starpu_data_handle_t)); descrs = malloc((STARPU_NMAXBUFS+15) * sizeof(struct starpu_data_descr)); for(i=0 ; i #include #include "helper.h" void func_cpu(void *descr[], void *_args) { int node; int rank; (void)descr; starpu_codelet_unpack_args(_args, &node); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); FPRINTF_MPI(stderr, "Expected node: %d - Actual node: %d\n", node, rank); assert(node == rank); } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW}, .model = &starpu_perfmodel_nop, .name = "insert_task_node_choice" }; int main(int argc, char **argv) { int ret, rank, size, err, node; int x0=32; long long x1=23; starpu_data_handle_t data_handlesx0; starpu_data_handle_t data_handlesx1; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (rank != 0 && rank != 1) goto end; if (rank == 0) { starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0)); starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1)); } else { starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0)); starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1)); } starpu_mpi_data_register(data_handlesx0, 100, 0); starpu_mpi_data_register(data_handlesx1, 200, 1); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_VALUE, &node, sizeof(node), STARPU_EXECUTE_ON_NODE, 0, STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, 0); assert(err == 0); node = starpu_data_get_rank(data_handlesx1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_VALUE, &node, sizeof(node), STARPU_EXECUTE_ON_DATA, data_handlesx1, STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, 0); assert(err == 0); // Node 1 has a long long data which has a bigger size than a // int, so it is going to be selected by the node selection // policy to execute the codelet err = starpu_mpi_node_selection_set_current_policy(STARPU_MPI_NODE_SELECTION_MOST_R_DATA); assert(err == 0); node = 1; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, 0); assert(err == 0); FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data_handlesx0); starpu_data_unregister(data_handlesx1); end: starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/insert_task_owner.c000066400000000000000000000133241507764646700216460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void func_cpu(void *descr[], void *_args) { int node; int rank; (void)descr; starpu_codelet_unpack_args(_args, &node); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); FPRINTF_MPI(stderr, "Expected node: %d - Actual node: %d\n", node, rank); assert(node == rank); } struct starpu_codelet mycodelet_r_w = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .model = &starpu_perfmodel_nop, }; struct starpu_codelet mycodelet_rw_r = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R}, .model = &starpu_perfmodel_nop, }; struct starpu_codelet mycodelet_rw_rw = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW}, .model = &starpu_perfmodel_nop, }; struct starpu_codelet mycodelet_w_r = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_W, STARPU_R}, .model = &starpu_perfmodel_nop, }; struct starpu_codelet mycodelet_r_r = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_R, STARPU_R}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int ret, rank, size, err, node; long x0=32; int x1=23; starpu_data_handle_t data_handlesx0 = NULL; starpu_data_handle_t data_handlesx1 = NULL; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (starpu_cpu_worker_get_count() == 0) { if (rank == 0) FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank != 0 && rank != 1) goto end; if (rank == 0) { starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, rank); starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, 1); } else if (rank == 1) { starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, rank); starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, 0); } node = starpu_mpi_data_get_rank(data_handlesx1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, 0); assert(err == 0); node = starpu_mpi_data_get_rank(data_handlesx0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_r, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1, 0); assert(err == 0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, 0); assert(err == 0); node = 1; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_r, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_w_r, STARPU_VALUE, &node, sizeof(node), STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data_handlesx0); starpu_data_unregister(data_handlesx1); end: starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/insert_task_owner2.c000066400000000000000000000105201507764646700217230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)_args; int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int *x1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); int *x2 = (int *)STARPU_VARIABLE_GET_PTR(descr[2]); int *y = (int *)STARPU_VARIABLE_GET_PTR(descr[3]); FPRINTF(stderr, "-------> CODELET VALUES: %d %d nan %d\n", *x0, *x1, *y); *x2 = *y; *y = (*x0 + *x1) * 100; *x1 = 12; FPRINTF(stderr, "-------> CODELET VALUES: %d %d %d %d\n", *x0, *x1, *x2, *y); } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 4, .modes = {STARPU_R, STARPU_RW, STARPU_W, STARPU_RW}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int rank, size, err; int x[3], y=0; int oldx[3]; int i, ret=0; starpu_data_handle_t data_handles[4]; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (starpu_cpu_worker_get_count() == 0) { if (rank == 0) FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank == 0) { for(i=0 ; i<3 ; i++) { x[i] = 10*(i+1); oldx[i] = 10*(i+1); starpu_variable_data_register(&data_handles[i], STARPU_MAIN_RAM, (uintptr_t)&x[i], sizeof(x[i])); } y = -1; starpu_variable_data_register(&data_handles[3], -1, (uintptr_t)NULL, sizeof(int)); } else { for(i=0 ; i<3 ; i++) { x[i] = -1; starpu_variable_data_register(&data_handles[i], -1, (uintptr_t)NULL, sizeof(int)); } y=200; starpu_variable_data_register(&data_handles[3], STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(int)); } for(i=0 ; i<3 ; i++) { starpu_mpi_data_register(data_handles[i], i, 0); } starpu_mpi_data_register(data_handles[3], 3, 1); FPRINTF(stderr, "[%d][init] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_R, data_handles[0], STARPU_RW, data_handles[1], STARPU_W, data_handles[2], STARPU_RW, data_handles[3], STARPU_EXECUTE_ON_NODE, 1, 0); STARPU_CHECK_RETURN_VALUE(err, "starpu_mpi_task_insert"); starpu_task_wait_for_all(); int *values = malloc(4 * sizeof(int)); for(i=0 ; i<4 ; i++) { starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL); if (rank == 0) { starpu_data_acquire(data_handles[i], STARPU_R); values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i])); starpu_data_release(data_handles[i]); } starpu_data_unregister(data_handles[i]); } if (rank == 0) { FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d %d %d\n", rank, values[0], values[1], values[2], values[3]); if (values[0] != oldx[0] || values[1] != 12 || values[2] != 200 || values[3] != ((oldx[0] + oldx[1]) * 100)) { FPRINTF(stderr, "[%d][error] values[0] %d != x[0] %d && values[1] %d != 12 && values[2] %d != 200 && values[3] %d != ((x[0] %d + x[1] %d) * 100)\n", rank, values[0], oldx[0], values[1], values[2], values[3], oldx[0], oldx[1]); ret = 1; } else { FPRINTF(stderr, "[%d] correct computation\n", rank); } } FPRINTF(stderr, "[%d][end] VALUES: %d %d %d %d\n", rank, x[0], x[1], x[2], y); free(values); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return (rank == 0) ? ret : 0; } starpu-1.4.9+dfsg/mpi/tests/insert_task_owner_data.c000066400000000000000000000067051507764646700226440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)_args; int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int *x1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); *x0 += 1; *x1 *= *x1; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int rank, size, err; int x[2]; int ret, i; starpu_data_handle_t data_handles[2]; int values[2]; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (starpu_cpu_worker_get_count() == 0) { if (rank == 0) FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank == 0) { x[0] = 11; starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x[0], sizeof(x[0])); starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1])); } else if (rank == 1) { x[1] = 12; starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0])); starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&x[1], sizeof(x[1])); } else { starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0])); starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1])); } starpu_mpi_data_register(data_handles[0], 0, 0); starpu_mpi_data_register(data_handles[1], 1, 1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], STARPU_EXECUTE_ON_DATA, data_handles[1], 0); assert(err == 0); starpu_task_wait_for_all(); for(i=0 ; i<2 ; i++) { starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL); if (rank == 0) { starpu_data_acquire(data_handles[i], STARPU_R); values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i])); starpu_data_release(data_handles[i]); } } ret = 0; if (rank == 0) { FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d\n", rank, values[0], values[1]); if (values[0] != 12 || values[1] != 144) { ret = EXIT_FAILURE; } } starpu_data_unregister(data_handles[0]); starpu_data_unregister(data_handles[1]); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return ret; } starpu-1.4.9+dfsg/mpi/tests/insert_task_recv_cache.c000066400000000000000000000122051507764646700225730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else void func_cpu(void *descr[], void *_args) { (void)descr; (void)_args; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R}, .model = &starpu_perfmodel_nop, }; #define NB_ELEMENTS 1000 #define NB_DATA 2 /* Returns the MPI node number where data indexes index is */ int my_distrib(int x) { return x; } void test_cache(int rank, starpu_mpi_tag_t initial_tag, char *enabled, size_t *comm_amount) { int i; int ret; unsigned *v[NB_DATA]; starpu_data_handle_t data_handles[NB_DATA]; struct starpu_conf conf; FPRINTF(stderr, "Testing with STARPU_MPI_CACHE=%s\n", enabled); setenv("STARPU_MPI_CACHE", enabled, 1); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); for(i = 0; i < NB_DATA; i++) { int j; v[i] = calloc(NB_ELEMENTS, sizeof(unsigned)); for(j=0 ; j #include #include #include "helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else void func_cpu(void *descr[], void *_args) { (void)descr; (void)_args; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R}, .model = &starpu_perfmodel_nop, }; #define NB_ELEMENTS 1000 #define NB_DATA 2 /* Returns the MPI node number where data indexes index is */ int my_distrib(int x) { return x; } void test_cache(int rank, starpu_mpi_tag_t initial_tag, char *enabled, size_t *comm_amount) { int i; int ret; unsigned *v[NB_DATA]; starpu_data_handle_t data_handles[NB_DATA]; struct starpu_conf conf; setenv("STARPU_MPI_CACHE", enabled, 1); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_coop_sends_set_use(0); // disable coop_sends to avoid having wrong results when cache is disabled for(i = 0; i < NB_DATA; i++) { int j; starpu_malloc((void **)&v[i], NB_ELEMENTS * sizeof(unsigned)); for(j=0 ; j #include #include "helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else void func_cpu(void *descr[], void *_args) { (void) descr; (void) _args; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R}, .model = &starpu_perfmodel_nop, }; struct starpu_codelet mycodelet2 = { .cpu_funcs = {func_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; #define X 4 /* Returns the MPI node number where data is */ int my_distrib(int x, int nb_nodes) { return x % nb_nodes; } void dotest(int rank, int size, starpu_mpi_tag_t initial_tag, char *enabled) { int x, i; int ret; unsigned values[X]; starpu_data_handle_t data_handles[X]; struct starpu_conf conf; setenv("STARPU_MPI_CACHE", enabled, 1); FPRINTF(stderr, "Testing with cache '%s'\n", enabled); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); for(x = 0; x < X; x++) { values[x] = (rank+1)*10; } for(x = 0; x < X; x++) { int mpi_rank = my_distrib(x, size); if (mpi_rank == rank) { starpu_variable_data_register(&data_handles[x], STARPU_MAIN_RAM, (uintptr_t)&(values[x]), sizeof(unsigned)); } else { /* I don't own this index, but will need it for my computations */ starpu_variable_data_register(&data_handles[x], -1, (uintptr_t)NULL, sizeof(unsigned)); } if (data_handles[x]) { starpu_mpi_data_register(data_handles[x], initial_tag+x, mpi_rank); } } for(i = 0 ; i #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void) _args; (void) descr; FPRINTF_MPI(stderr, "Hello\n"); } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, .name = "insert_task_tags" }; int main(int argc, char **argv) { int ret, rank, err; int x=32; starpu_data_handle_t handle0; starpu_data_handle_t handle1; int64_t *value; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); if (rank != 0 && rank != 1) goto end; starpu_variable_data_register(&handle0, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); starpu_variable_data_register(&handle1, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); starpu_mpi_comm_get_attr(MPI_COMM_WORLD, STARPU_MPI_TAG_UB, &value, &err); assert(err == 1); starpu_mpi_data_register(handle0, (*value)-1, 1); starpu_mpi_data_register(handle1, (*value)-2, 1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_EXECUTE_ON_NODE, 0, STARPU_RW, handle0, 0); assert(err == 0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_EXECUTE_ON_NODE, 1, STARPU_RW, handle1, 0); assert(err == 0); FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(handle0); starpu_data_unregister(handle1); end: starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/load_balancer.c000066400000000000000000000037031507764646700206540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_USE_MPI_MPI) #warning unsetenv is not defined. Skipping test int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else void get_neighbors(int **neighbor_ids, int *nneighbors) { int rank, size; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); *nneighbors = 1; *neighbor_ids = malloc(sizeof(int)); *neighbor_ids[0] = rank==size-1?0:rank+1; } void get_data_unit_to_migrate(starpu_data_handle_t **handle_unit, int *nhandles, int dst_node) { (void)handle_unit; (void)dst_node; *nhandles = 0; } int main(int argc, char **argv) { int ret; struct starpu_mpi_lb_conf itf; int mpi_init; itf.get_neighbors = get_neighbors; itf.get_data_unit_to_migrate = get_data_unit_to_migrate; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); unsetenv("STARPU_MPI_LB"); starpu_mpi_lb_init(NULL, NULL); starpu_mpi_lb_shutdown(); starpu_mpi_lb_init("heat", &itf); starpu_mpi_lb_shutdown(); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } #endif starpu-1.4.9+dfsg/mpi/tests/loader.c000066400000000000000000000274611507764646700173630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/mpi/tests/matrix.c000066400000000000000000000101621507764646700174070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)_args; unsigned *A = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned *X = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); unsigned *Y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[2]); FPRINTF_MPI(stderr, "VALUES: Y=%3u A=%3u X=%3u\n", *Y, *A, *X); *Y = *Y + *A * *X; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW}, .model = &starpu_perfmodel_nop, }; #define N 4 int main(int argc, char **argv) { int rank, n; int ret; unsigned A[N]; unsigned X[N]; unsigned Y; starpu_data_handle_t data_A[N]; starpu_data_handle_t data_X[N]; starpu_data_handle_t data_Y; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); if (starpu_cpu_worker_get_count() == 0) { if (rank == 0) FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } for(n = 0; n < N; n++) { A[n] = (n+1)*10; X[n] = n+1; } Y = 0; FPRINTF_MPI(stderr, "A = "); for(n = 0; n < N; n++) { FPRINTF(stderr, "%u ", A[n]); } FPRINTF(stderr, "\n"); FPRINTF_MPI(stderr, "X = "); for(n = 0; n < N; n++) { FPRINTF(stderr, "%u ", X[n]); } FPRINTF(stderr, "\n"); for(n = 0; n < N; n++) { if (rank == n%2) starpu_variable_data_register(&data_A[n], STARPU_MAIN_RAM, (uintptr_t)&A[n], sizeof(unsigned)); else starpu_variable_data_register(&data_A[n], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data_A[n], n+100, n%2); FPRINTF_MPI(stderr, "Registering A[%d] to %p with tag %d and node %d\n", n, data_A[n], n+100, n%2); if (rank == n%2) starpu_variable_data_register(&data_X[n], STARPU_MAIN_RAM, (uintptr_t)&X[n], sizeof(unsigned)); else starpu_variable_data_register(&data_X[n], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data_X[n], n+200, n%2); FPRINTF_MPI(stderr, "Registering X[%d] to %p with tag %d and node %d\n", n, data_X[n], n+200, n%2); } if (rank == 0) starpu_variable_data_register(&data_Y, STARPU_MAIN_RAM, (uintptr_t)&Y, sizeof(unsigned)); else starpu_variable_data_register(&data_Y, -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data_Y, 10, 0); FPRINTF_MPI(stderr, "Registering Y to %p with tag %d and node %d\n", data_Y, 10, 0); for(n = 0; n < N; n++) { ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_R, data_A[n], STARPU_R, data_X[n], STARPU_RW, data_Y, STARPU_EXECUTE_ON_DATA, data_A[n], 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); for(n = 0; n < N; n++) { starpu_data_unregister(data_A[n]); starpu_data_unregister(data_X[n]); } starpu_data_unregister(data_Y); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); FPRINTF(stdout, "[%d] Y=%u\n", rank, Y); #ifndef STARPU_SIMGRID if (rank == 0) { STARPU_ASSERT_MSG(Y==300, "Error when calculating Y=%u\n", Y); } #endif return 0; } starpu-1.4.9+dfsg/mpi/tests/matrix2.c000066400000000000000000000074471507764646700175050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)_args; unsigned *A = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned *X = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); unsigned *Y = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[2]); FPRINTF_MPI(stderr, "VALUES: Y=%3u A=%3u X=%3u\n", *Y, *A, *X); *Y = *Y + *A * *X; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW}, .model = &starpu_perfmodel_nop, }; #define N 4 int main(int argc, char **argv) { int rank, size; int n; int ret; unsigned A[N]; unsigned X[N]; starpu_data_handle_t data_A[N]; starpu_data_handle_t data_X[N]; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(NULL, NULL, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if ((size < 3) || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (size < 3) FPRINTF(stderr, "We need at least 3 processes.\n"); else FPRINTF(stderr, "We need at least 1 CPU worker.\n"); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } for(n = 0; n < N; n++) { A[n] = (n+1)*10; X[n] = n+1; } FPRINTF_MPI(stderr, "A = "); for(n = 0; n < N; n++) { FPRINTF(stderr, "%u ", A[n]); } FPRINTF(stderr, "\n"); FPRINTF_MPI(stderr, "X = "); for(n = 0; n < N; n++) { FPRINTF(stderr, "%u ", X[n]); } FPRINTF(stderr, "\n"); for(n = 0; n < N; n++) { if (rank == n%2) starpu_variable_data_register(&data_A[n], STARPU_MAIN_RAM, (uintptr_t)&A[n], sizeof(unsigned)); else starpu_variable_data_register(&data_A[n], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data_A[n], n+100, n%2); FPRINTF_MPI(stderr, "Registering A[%d] to %p with tag %d and node %d\n", n,data_A[n], n+100, n%2); } for(n = 0; n < N; n++) { if (rank == 2) starpu_variable_data_register(&data_X[n], STARPU_MAIN_RAM, (uintptr_t)&X[n], sizeof(unsigned)); else starpu_variable_data_register(&data_X[n], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data_X[n], n+200, 2); FPRINTF_MPI(stderr, "Registering X[%d] to %p with tag %d and node %d\n", n, data_X[n], n+200, 2); } for(n = 0; n < N-1; n++) { ret = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_R, data_A[n], STARPU_R, data_X[n], STARPU_RW, data_X[N-1], STARPU_EXECUTE_ON_DATA, data_A[n], 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_task_insert"); } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); for(n = 0; n < N; n++) { starpu_data_unregister(data_A[n]); starpu_data_unregister(data_X[n]); } starpu_mpi_shutdown(); FPRINTF(stdout, "[%d] X[%d]=%u\n", rank, N-1, X[N-1]); #ifndef STARPU_SIMGRID if (rank == 2) { STARPU_ASSERT_MSG(X[N-1]==144, "Error when calculating X[N-1]=%u\n", X[N-1]); } #endif if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/mpi_barrier.c000066400000000000000000000021121507764646700203720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" int main(int argc, char **argv) { int ret, mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_barrier(MPI_COMM_WORLD); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/mpi_data_cpy.c000066400000000000000000000057421507764646700205440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #define DATA_TAG 666 #define INC_COUNT 10 void func_cpu(void *descr[], void *_args) { int rank; int *value = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); starpu_codelet_unpack_args(_args, &rank); FPRINTF(stderr, "[rank %d] value in %d\n", rank, *value); (*value)++; FPRINTF(stderr, "[rank %d] value out %d\n", rank, *value); } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, .name = "increment", .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; int main(int argc, char **argv) { int size, rank; int ret; int value = 0; starpu_data_handle_t *data; int mpi_init; int i; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); data = (starpu_data_handle_t*)malloc(size*sizeof(starpu_data_handle_t)); for(i=0; i #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 16 #elif !defined(STARPU_LONG_CHECK) # define NITER 256 #else # define NITER 2048 #endif #define SIZE 16 float *tab; starpu_data_handle_t tab_handle; int main(int argc, char **argv) { int ret, rank, size; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { if (rank == 0) FPRINTF(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } tab = calloc(SIZE, sizeof(float)); starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); int nloops = NITER; int loop; int other_rank = rank%2 == 0 ? rank+1 : rank-1; for (loop = 0; loop < nloops; loop++) { starpu_tag_t tag = (starpu_tag_t)loop; if ((loop % 2) == (rank%2)) { ret = starpu_mpi_isend_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached_unlock_tag"); } else { ret= starpu_mpi_irecv_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached_unlock_tag"); } starpu_tag_wait(tag); } starpu_data_unregister(tab_handle); free(tab); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/mpi_earlyrecv.c000066400000000000000000000107411507764646700207470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #include int main(int argc, char **argv) { int ret, rank, size, i; starpu_data_handle_t tab_handle[4]; int values[4]; starpu_mpi_req request[2] = {NULL, NULL}; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { FPRINTF_MPI(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } for(i=0 ; i<4 ; i++) { if (i<3 || rank%2) { // all data are registered on all nodes, but the 4th data which is not registered on the receiving node values[i] = (rank+1) * (i+1); starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&values[i], sizeof(values[i])); starpu_mpi_data_register(tab_handle[i], i, rank); } } int other_rank = rank%2 == 0 ? rank+1 : rank-1; FPRINTF_MPI(stderr, "rank %d exchanging with rank %d\n", rank, other_rank); if (rank%2) { FPRINTF_MPI(stderr, "Sending values %d and %d to node %d\n", values[0], values[3], other_rank); // this data will be received as an early registered data ret = starpu_mpi_isend(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); // this data will be received as an early UNregistered data ret = starpu_mpi_isend(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_send(tab_handle[1], other_rank, 1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_recv(tab_handle[2], other_rank, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } else { ret = starpu_mpi_recv(tab_handle[1], other_rank, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); ret = starpu_mpi_send(tab_handle[2], other_rank, 2, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); // we register the data starpu_variable_data_register(&tab_handle[3], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register(tab_handle[3], 3, rank); ret = starpu_mpi_irecv(tab_handle[3], &request[1], other_rank, 3, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); ret = starpu_mpi_irecv(tab_handle[0], &request[0], other_rank, 0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } int finished=0; while (!finished) { for(i=0 ; i<2 ; i++) { if (request[i]) { int flag; MPI_Status status; ret = starpu_mpi_test(&request[i], &flag, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); if (flag) FPRINTF_MPI(stderr, "request[%d] = %d %p\n", i, flag, request[i]); } } finished = request[0] == NULL && request[1] == NULL; #ifdef STARPU_SIMGRID starpu_sleep(0.001); #endif } if (rank%2 == 0) { void *ptr0; void *ptr3; starpu_data_acquire(tab_handle[0], STARPU_RW); ptr0 = starpu_data_get_local_ptr(tab_handle[0]); starpu_data_release(tab_handle[0]); starpu_data_acquire(tab_handle[3], STARPU_RW); ptr3 = starpu_data_get_local_ptr(tab_handle[3]); starpu_data_release(tab_handle[3]); ret = (*((int *)ptr0) == (other_rank+1)*1) && (*((int *)ptr3) == (other_rank+1)*4); ret = !ret; FPRINTF_MPI(stderr, "[%s] Received values %d and %d from node %d\n", ret?"FAILURE":"SUCCESS", *((int *)ptr0), *((int *)ptr3), other_rank); } for(i=0 ; i<4 ; i++) starpu_data_unregister(tab_handle[i]); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? ret : 0; } starpu-1.4.9+dfsg/mpi/tests/mpi_earlyrecv2.c000066400000000000000000000172471507764646700210410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #include #include #define NB 10 static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; void callback(void *arg) { unsigned *received = arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); *received = *received + 1; FPRINTF_MPI(stderr, "Requests %u received\n", *received); STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } typedef void (*check_func)(starpu_data_handle_t handle, int i, int rank, int *error); int exchange(int rank, starpu_data_handle_t *handles, starpu_mpi_tag_t initial_tag, check_func func, int detached) { int other_rank = rank%2 == 0 ? rank+1 : rank-1; int i; int ret; if (rank%2) { ret = starpu_mpi_send(handles[0], other_rank, initial_tag+0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_send(handles[NB-1], other_rank, initial_tag+NB-1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); for(i=1 ; i #include "helper.h" #include #include #define NB 6 typedef void (*check_func)(starpu_data_handle_t handle, int i, int rank, int *error); int exchange(int rank, starpu_data_handle_t *handles, starpu_mpi_tag_t initial_tag, check_func func) { int other_rank = rank%2 == 0 ? rank+1 : rank-1; int i; int ret=0; starpu_mpi_req req[NB]; memset(req, 0, NB*sizeof(starpu_mpi_req)); if (rank%2) { ret = starpu_mpi_issend(handles[0], &req[0], other_rank, initial_tag+0, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); ret = starpu_mpi_issend(handles[NB-2], &req[NB-2], other_rank, initial_tag+NB-2, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); ret = starpu_mpi_isend(handles[NB-1], &req[NB-1], other_rank, initial_tag+NB-1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); for(i=1 ; i #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 16 #else # define NITER 2048 #endif #define SIZE 16 float *tab; starpu_data_handle_t tab_handle; int main(int argc, char **argv) { int ret, rank, size; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { if (rank == 0) FPRINTF(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } tab = calloc(SIZE, sizeof(float)); starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); int nloops = NITER; int loop; int other_rank = rank%2 == 0 ? rank+1 : rank-1; for (loop = 0; loop < nloops; loop++) { if ((loop % 2) == (rank%2)) { ret = starpu_mpi_send(tab_handle, other_rank, loop, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } else { MPI_Status status; starpu_mpi_req req; ret = starpu_mpi_irecv(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); ret = starpu_mpi_wait(&req, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } } starpu_data_unregister(tab_handle); free(tab); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/mpi_irecv_detached.c000066400000000000000000000053041507764646700217030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 16 #elif !defined(STARPU_LONG_CHECK) # define NITER 256 #else # define NITER 2048 #endif #define SIZE 16 float *tab; starpu_data_handle_t tab_handle; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; void callback(void *arg) { unsigned *received = arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); *received = 1; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } int main(int argc, char **argv) { int ret, rank, size; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { if (rank == 0) FPRINTF(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } tab = calloc(SIZE, sizeof(float)); starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); int nloops = NITER; int loop; int other_rank = rank%2 == 0 ? rank+1 : rank-1; for (loop = 0; loop < nloops; loop++) { if ((loop % 2) == (rank%2)) { ret = starpu_mpi_send(tab_handle, other_rank, loop, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } else { int received = 0; ret = starpu_mpi_irecv_detached(tab_handle, other_rank, loop, MPI_COMM_WORLD, callback, &received); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (!received) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } } starpu_data_unregister(tab_handle); free(tab); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/mpi_isend.c000066400000000000000000000044201507764646700200520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 16 #else # define NITER 2048 #endif #define SIZE 16 float *tab; starpu_data_handle_t tab_handle; int main(int argc, char **argv) { int ret, rank, size; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { if (rank == 0) FPRINTF(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } tab = calloc(SIZE, sizeof(float)); starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); int nloops = NITER; int loop; int other_rank = rank%2 == 0 ? rank+1 : rank-1; for (loop = 0; loop < nloops; loop++) { if ((loop % 2) == (rank%2)) { MPI_Status status; starpu_mpi_req req; ret = starpu_mpi_isend(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_wait(&req, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } else { MPI_Status status; ret = starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } } starpu_data_unregister(tab_handle); free(tab); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/mpi_isend_detached.c000066400000000000000000000056121507764646700216770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 16 #elif !defined(STARPU_LONG_CHECK) # define NITER 256 #else # define NITER 2048 #endif #define SIZE 16 static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; void callback(void *arg) { unsigned *completed = arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); *completed = 1; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } int main(int argc, char **argv) { int ret, rank, size; float *tab; starpu_data_handle_t tab_handle; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { if (rank == 0) FPRINTF(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } tab = calloc(SIZE, sizeof(float)); starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); int nloops = NITER; int loop; int other_rank = rank%2 == 0 ? rank+1 : rank-1; for (loop = 0; loop < nloops; loop++) { if ((loop % 2) == (rank%2)) { int sent = 0; ret = starpu_mpi_isend_detached(tab_handle, other_rank, loop, MPI_COMM_WORLD, callback, &sent); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (!sent) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } else { int received = 0; ret = starpu_mpi_irecv_detached(tab_handle, other_rank, loop, MPI_COMM_WORLD, callback, &received); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (!received) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } } starpu_data_unregister(tab_handle); free(tab); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/mpi_reduction.c000066400000000000000000000125241507764646700207500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" extern void init_cpu_func(void *descr[], void *cl_arg); extern void redux_cpu_func(void *descr[], void *cl_arg); extern void dot_cpu_func(void *descr[], void *cl_arg); extern void display_cpu_func(void *descr[], void *cl_arg); static struct starpu_codelet init_codelet = { .cpu_funcs = {init_cpu_func}, .nbuffers = 1, .modes = {STARPU_W}, #ifdef STARPU_SIMGRID .model = &starpu_perfmodel_nop, #endif .name = "init_codelet" }; static struct starpu_codelet redux_codelet = { .cpu_funcs = {redux_cpu_func}, .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .nbuffers = 2, #ifdef STARPU_SIMGRID .model = &starpu_perfmodel_nop, #endif .name = "redux_codelet" }; static struct starpu_codelet dot_codelet = { .cpu_funcs = {dot_cpu_func}, .nbuffers = 2, .modes = {STARPU_R, STARPU_REDUX}, #ifdef STARPU_SIMGRID .model = &starpu_perfmodel_nop, #endif .name = "dot_codelet" }; static struct starpu_codelet display_codelet = { .cpu_funcs = {display_cpu_func}, .nbuffers = 1, .modes = {STARPU_R}, #ifdef STARPU_SIMGRID .model = &starpu_perfmodel_nop, #endif .name = "display_codelet" }; /* Returns the MPI node number where data indexes index is */ int my_distrib(int x, int nb_nodes) { return x % nb_nodes; } int main(int argc, char **argv) { int my_rank, size, x, y, i; long int *vector; long int dot, sum=0; starpu_data_handle_t *handles; starpu_data_handle_t dot_handle; struct starpu_conf conf; int ret; int nb_elements, step, loops; int mpi_init; STARPU_SKIP_IF_VALGRIND_RETURN_SKIP; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return STARPU_TEST_SKIPPED; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &my_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (starpu_cpu_worker_get_count() == 0) { if (my_rank == 0) FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return my_rank == 0 ? STARPU_TEST_SKIPPED : 0; } nb_elements = size*8000; step = 4; loops = 5; starpu_malloc((void **)&vector, nb_elements*sizeof(long int)); for(x = 0; x < nb_elements; x+=step) { int mpi_rank = my_distrib(x/step, size); if (mpi_rank == my_rank) { for(y=0 ; y #include #include "helper.h" /* * Codelet to create a neutral element */ void init_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); *dot = 0; FPRINTF_MPI(stderr, "Init dot\n"); } /* * Codelet to perform the reduction of two elements */ void redux_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; long int *dota = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); long int *dotb = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]); *dota = *dota + *dotb; FPRINTF_MPI(stderr, "Calling redux %ld=%ld+%ld\n", *dota, *dota-*dotb, *dotb); } /* * Dot product codelet */ void dot_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; long int *local_x = (long int *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]); //FPRINTF_MPI(stderr, "Before dot=%ld (adding %d elements...)\n", *dot, n); unsigned i; for (i = 0; i < n; i++) { //FPRINTF_MPI(stderr, "Adding %ld\n", local_x[i]); *dot += local_x[i]; } //FPRINTF_MPI(stderr, "After dot=%ld\n", *dot); } /* * Display codelet */ void display_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; long int *local_x = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); FPRINTF_MPI(stderr, "Local=%ld\n", *local_x); } starpu-1.4.9+dfsg/mpi/tests/mpi_redux.c000066400000000000000000000071021507764646700200770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This test does a manual reduction: all ranks send a number to the rank 0, * the rank 0 sums these numbers and sends back the result to all ranks. */ #include #include "helper.h" static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; void callback(void *arg) { unsigned *received = arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); *received = *received + 1; FPRINTF_MPI(stderr, "received = %u\n", *received); STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } int main(int argc, char **argv) { int ret, rank, size, sum; int value=0; starpu_data_handle_t *handles; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); sum = ((size-1) * (size) / 2); if (rank == 0) { int src; int received = 1; handles = malloc(size * sizeof(starpu_data_handle_t)); for(src=1 ; src #include "helper.h" /* Returns the MPI node number where data indexes index is */ int my_distrib(int x, int nb_nodes) { return x % nb_nodes; } void cpu_codelet(void *descr[], void *_args) { int *vector = (int *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; int rank; starpu_codelet_unpack_args(_args, &rank); for (i = 0; i < nx; i++) { //fprintf(stderr,"rank %d v[%d] = %d\n", rank, i, vector[i]); vector[i] *= rank+2; } } static struct starpu_codelet cl = { .cpu_funcs = {cpu_codelet}, .nbuffers = 1, .modes = {STARPU_RW}, #ifdef STARPU_SIMGRID .model = &starpu_perfmodel_nop, #endif }; void scallback(void *arg) { char *msg = arg; FPRINTF_MPI(stderr, "Sending completed for <%s>\n", msg); } void rcallback(void *arg) { char *msg = arg; FPRINTF_MPI(stderr, "Reception completed for <%s>\n", msg); } int main(int argc, char **argv) { int rank, nodes, ret, x; int *vector = NULL; starpu_data_handle_t *data_handles; int size=10; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (starpu_cpu_worker_get_count() == 0) { if (rank == 0) FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank == 0) { /* Allocate the vector */ starpu_malloc((void **)&vector, size * sizeof(int)); for(x=0 ; x #include "helper.h" void func_cpu(void *descr[], void *_args) { int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int *y = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); FPRINTF(stdout, "VALUES: %d %d\n", *x, *y); } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int ret, i, x[2]; starpu_data_handle_t data_handles[2]; int barrier_ret; int rank; struct starpu_task *task; struct starpu_mpi_task_exchange_params params; struct starpu_data_descr descrs[2]; struct starpu_conf conf; MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); if (ret == -ENODEV) return rank==0?STARPU_TEST_SKIPPED:0; STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); if (starpu_cpu_worker_get_count() == 0) { // If there is no cpu to execute the codelet, mpi will block trying to do the post-execution communication ret = -ENODEV; FPRINTF_MPI(stderr, "No CPU is available\n"); goto nodata; } for(i=0 ; i<2 ; i++) { x[i] = rank*2 + (i+1); starpu_variable_data_register(&data_handles[i], STARPU_MAIN_RAM, (uintptr_t)&x[i], sizeof(int)); starpu_mpi_data_register(data_handles[i], i, i); } task = starpu_task_create(); task->cl = &mycodelet; task->handles[0] = data_handles[0]; task->handles[1] = data_handles[1]; starpu_mpi_task_exchange_data_before_execution(MPI_COMM_WORLD, task, descrs, ¶ms); if (params.do_execute) { ret = starpu_task_submit(task); if (ret == -ENODEV) { task->destroy = 0; starpu_task_destroy(task); goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } else { task->destroy = 0; starpu_task_destroy(task); } starpu_mpi_task_exchange_data_after_execution(MPI_COMM_WORLD, descrs, 2, params); starpu_task_wait_for_all(); enodev: for(i=0; i<2; i++) { starpu_data_unregister(data_handles[i]); } nodata: barrier_ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT(barrier_ret == MPI_SUCCESS); starpu_mpi_shutdown(); MPI_Finalize(); if (rank == 0) return ret==-ENODEV?STARPU_TEST_SKIPPED:ret; else return 0; } starpu-1.4.9+dfsg/mpi/tests/mpi_test.c000066400000000000000000000045621507764646700177360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 16 #else # define NITER 2048 #endif #define SIZE 16 int main(int argc, char **argv) { int ret, rank, size; float *tab; starpu_data_handle_t tab_handle; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { if (rank == 0) FPRINTF(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } tab = calloc(SIZE, sizeof(float)); starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); int nloops = NITER; int loop; int other_rank = rank%2 == 0 ? rank+1 : rank-1; for (loop = 0; loop < nloops; loop++) { starpu_mpi_req req; if ((loop % 2) == (rank%2)) { ret = starpu_mpi_isend(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); } else { ret = starpu_mpi_irecv(tab_handle, &req, other_rank, loop, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } int finished = 0; do { MPI_Status status; ret = starpu_mpi_test(&req, &finished, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); #ifdef STARPU_SIMGRID starpu_sleep(0.001); #endif } while (!finished); } starpu_data_unregister(tab_handle); free(tab); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/multiple_send.c000066400000000000000000000065331507764646700207560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" int main(int argc, char **argv) { int ret, rank, size; unsigned send[2] = {42, 11}; unsigned recv[2] = {33, 33}; starpu_mpi_req req[2]; starpu_data_handle_t send_handle[2]; starpu_data_handle_t recv_handle[2]; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } starpu_variable_data_register(&send_handle[0], STARPU_MAIN_RAM, (uintptr_t)&send[0], sizeof(unsigned)); starpu_variable_data_register(&send_handle[1], STARPU_MAIN_RAM, (uintptr_t)&send[1], sizeof(unsigned)); starpu_variable_data_register(&recv_handle[0], STARPU_MAIN_RAM, (uintptr_t)&recv[0], sizeof(unsigned)); starpu_variable_data_register(&recv_handle[1], STARPU_MAIN_RAM, (uintptr_t)&recv[1], sizeof(unsigned)); if (rank == 0) { ret = starpu_mpi_isend(send_handle[0], &(req[0]), 1, 12, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_isend(send_handle[1], &(req[1]), 1, 13, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); } else if (rank == 1) { ret = starpu_mpi_irecv(recv_handle[0], &(req[0]), 0, 12, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); ret = starpu_mpi_irecv(recv_handle[1], &(req[1]), 0, 13, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); } if (rank == 0 || rank == 1) { int nb_req=2; while (nb_req) { int r=0; for(r=0 ; r<2 ; r++) { if (req[r]) { int finished = 0; MPI_Status status; ret = starpu_mpi_test(&req[r], &finished, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_test"); STARPU_ASSERT(finished != -1); if (finished) { FPRINTF(stderr, "[%d] Request %d finished\n", rank, r); req[r] = NULL; nb_req--; } } } #ifdef STARPU_SIMGRID starpu_sleep(0.001); #endif } } FPRINTF(stderr, "[%d] All requests finished\n", rank); starpu_data_unregister(send_handle[0]); starpu_data_unregister(send_handle[1]); starpu_data_unregister(recv_handle[0]); starpu_data_unregister(recv_handle[1]); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/ndim_interface.c000066400000000000000000000107461507764646700210620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 16 #else # define NITER 2048 #endif #define BIGSIZE 32 #define SIZE 16 int main(int argc, char **argv) { int ret, rank, size; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } /* Node 0 will allocate a big 4-dim array and only register an inner part of * it as the 4-dim array, Node 1 will allocate a 4-dim array of small size and * register it directly. Node 0 and 1 will then exchange the content of * their arrays. */ int *arr4d = NULL; starpu_data_handle_t arr4d_handle = NULL; if (rank == 0) { arr4d = calloc(BIGSIZE*BIGSIZE*BIGSIZE*BIGSIZE, sizeof(int)); assert(arr4d); /* fill the inner 4-dim array */ unsigned i, j, k, l; int n = 0; for (l = 0; l < SIZE; l++) { for (k = 0; k < SIZE; k++) { for (j = 0; j < SIZE; j++) { for (i = 0; i < SIZE; i++) { arr4d[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE + l*BIGSIZE*BIGSIZE*BIGSIZE] = n++; } } } } unsigned nn[4] = {SIZE, SIZE, SIZE, SIZE}; unsigned ldn[4] = {1, BIGSIZE, BIGSIZE*BIGSIZE, BIGSIZE*BIGSIZE*BIGSIZE}; starpu_ndim_data_register(&arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); } else if (rank == 1) { arr4d = calloc(SIZE*SIZE*SIZE*SIZE, sizeof(int)); assert(arr4d); unsigned nn[4] = {SIZE, SIZE, SIZE, SIZE}; unsigned ldn[4] = {1, SIZE, SIZE*SIZE, SIZE*SIZE*SIZE}; starpu_ndim_data_register(&arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)arr4d, ldn, nn, 4, sizeof(int)); } if (rank == 0) { ret = starpu_mpi_send(arr4d_handle, 1, 0x42, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); MPI_Status status; ret = starpu_mpi_recv(arr4d_handle, 1, 0x1337, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); /* check the content of the 4-dim array */ ret = starpu_data_acquire(arr4d_handle, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); int m = 10; unsigned i, j, k, l; for (l = 0; l < SIZE; l++) { for (k = 0; k < SIZE; k++) { for (j = 0; j < SIZE; j++) { for (i = 0; i < SIZE; i++) { assert(arr4d[i + j*BIGSIZE + k*BIGSIZE*BIGSIZE + l*BIGSIZE*BIGSIZE*BIGSIZE] == m); m++; } } } } starpu_data_release(arr4d_handle); } else if (rank == 1) { MPI_Status status; ret = starpu_mpi_recv(arr4d_handle, 0, 0x42, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); /* check the content of the 4-dim array and modify it */ ret = starpu_data_acquire(arr4d_handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); int n = 0, m = 10; unsigned i, j, k, l; for (l = 0; l < SIZE; l++) { for (k = 0; k < SIZE; k++) { for (j = 0; j < SIZE; j++) { for (i = 0; i < SIZE; i++) { assert(arr4d[i + j*SIZE + k*SIZE*SIZE + l*SIZE*SIZE*SIZE] == n); n++; arr4d[i + j*SIZE + k*SIZE*SIZE + l*SIZE*SIZE*SIZE] = m++; } } } } starpu_data_release(arr4d_handle); ret = starpu_mpi_send(arr4d_handle, 0, 0x1337, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } FPRINTF(stdout, "Rank %d is done\n", rank); fflush(stdout); if (rank == 0 || rank == 1) { starpu_data_unregister(arr4d_handle); free(arr4d); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/nothing.c000066400000000000000000000034251507764646700175550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This program does nothing. It waits until it is interrupted by the user. * Useful to check binding while StarPU is running. */ #include #include #include "helper.h" int main(int argc, char **argv) { int ret, rank, worldsize; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_pause(); // our program will only wait, no need to stress cores by polling workers starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &worldsize); starpu_mpi_barrier(MPI_COMM_WORLD); char hostname[65]; gethostname(hostname, sizeof(hostname)); printf("[rank %d on %s] ready to wait !\n", rank, hostname); if (rank == 0) { printf("You can now check if thread binding is correct, for instance.\n"); } fflush(stdout); while(1) { sleep(1); } // TODO: maybe better handle the user interruption ? starpu_resume(); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/pingpong.c000066400000000000000000000115371507764646700177330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define DEFAULT_NITER 16 #else # define DEFAULT_NITER 2048 #endif #define DEFAULT_DATA_SIZE 16 #define DEFAULT_SLEEP_TIME 0 #define DEFAULT_METHOD 0 // ping pongs void usage() { fprintf(stderr, "-n [number of iteration] (default: %d)\n", DEFAULT_NITER); fprintf(stderr, "-s [number of floats to exchange] (default: %d)\n", DEFAULT_DATA_SIZE); fprintf(stderr, "-S [time in millisecond of sleep between exchange, less than 1 second] (default: %d)\n", DEFAULT_SLEEP_TIME); fprintf(stderr, "-b : broadcasts instead of simple pair-wise ping-pongs (default: %s)\n", DEFAULT_METHOD ? "broadcast" : "ping pongs"); } float *tab; starpu_data_handle_t tab_handle; int main(int argc, char **argv) { int ret, rank, size; int mpi_init; int i; int niter = DEFAULT_NITER; int data_size = DEFAULT_DATA_SIZE; int sleep_time = DEFAULT_SLEEP_TIME; int method = DEFAULT_METHOD; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-n") == 0) { niter = atoi(argv[i+1]); if (niter <= 0) { fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); usage(); exit(0); } i++; } else if (strcmp(argv[i], "-s") == 0) { data_size = atoi(argv[i+1]); if (data_size <= 0) { fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); usage(); exit(0); } i++; } else if(strcmp(argv[i], "-S") == 0) { sleep_time = atoi(argv[i+1]); if (sleep_time <= 0 || sleep_time >= 1000) { fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); usage(); exit(0); } i++; } else if(strcmp(argv[i], "-b") == 0) { method = 1; // broadcasts } else { fprintf(stderr, "%s: illegal argument %s\n", argv[0], argv[i]); usage(); exit(0); } } MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { if (rank == 0) FPRINTF(stderr, "We need a even number of processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank == 0) { FPRINTF(stdout, "Number of iterations: %d\n", niter); FPRINTF(stdout, "Number of floats to exchange: %d\n", data_size); FPRINTF(stdout, "Sleep time between exchanges: %d milliseconds\n", sleep_time); if (method == 0) FPRINTF(stdout, "Method: ping pongs\n"); else FPRINTF(stdout, "Method: broadcasts\n"); } tab = calloc(data_size, sizeof(float)); starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, data_size, sizeof(float)); int loop; int other_rank = rank%2 == 0 ? rank+1 : rank-1; int sender; int r; if (method == 0) // ping pongs { for (loop = 0; loop < niter; loop++) { if ((loop % 2) == (rank%2)) { //FPRINTF_MPI(stderr, "Sending to %d\n", other_rank); ret = starpu_mpi_send(tab_handle, other_rank, loop, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } else { MPI_Status status; //FPRINTF_MPI(stderr, "Receiving from %d\n", other_rank); ret = starpu_mpi_recv(tab_handle, other_rank, loop, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } starpu_sleep(sleep_time / 1000); } } else // broadcasts { for (loop = 0; loop < niter; loop++) { sender = loop % size; if (sender == rank) { for (r = 0; r < size; r++) { if (r != rank) { ret = starpu_mpi_send(tab_handle, r, (r * niter) + loop, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); starpu_sleep(sleep_time / 1000); } } } else { MPI_Status status; ret = starpu_mpi_recv(tab_handle, sender, (rank * niter) + loop, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); for (r = 0; r < (size-1); r++) starpu_sleep(sleep_time / 1000); } } } starpu_data_unregister(tab_handle); free(tab); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/policy_register.c000066400000000000000000000073041507764646700213120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)descr; (void)_args; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_W, STARPU_W}, .model = &starpu_perfmodel_nop, }; int starpu_mpi_select_node_my_policy_0(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) { (void) me; (void) nb_nodes; (void) nb_data; starpu_data_handle_t data = descr[0].handle; return starpu_data_get_rank(data); } int starpu_mpi_select_node_my_policy_1(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) { (void) me; (void) nb_nodes; (void) nb_data; starpu_data_handle_t data = descr[1].handle; return starpu_data_get_rank(data); } int main(int argc, char **argv) { int ret; int rank, size; int policy; struct starpu_task *task; starpu_data_handle_t handles[2]; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank == 0) starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); else starpu_variable_data_register(&handles[0], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register(handles[0], 10, 0); if (rank == 1) starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); else starpu_variable_data_register(&handles[1], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register(handles[1], 20, 1); policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy_1); starpu_mpi_node_selection_set_current_policy(policy); task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, STARPU_W, handles[0], STARPU_W, handles[1], 0); FPRINTF_MPI(stderr, "Task %p\n", task); if (rank == 1) { STARPU_ASSERT_MSG(task, "Task should be executed by rank 1\n"); task->destroy = 0; starpu_task_destroy(task); } else { STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 1\n"); } policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy_0); task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet, STARPU_W, handles[0], STARPU_W, handles[1], STARPU_NODE_SELECTION_POLICY, policy, 0); FPRINTF_MPI(stderr, "Task %p\n", task); if (rank == 0) { STARPU_ASSERT_MSG(task, "Task should be executed by rank 0\n"); task->destroy = 0; starpu_task_destroy(task); } else { STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 0\n"); } starpu_data_unregister(handles[0]); starpu_data_unregister(handles[1]); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/policy_register_many.c000066400000000000000000000036201507764646700223330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" int starpu_mpi_select_node_my_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) { (void) me; (void) nb_nodes; (void) descr; (void) nb_data; return 0; } int main(int argc, char **argv) { int ret; int i, policy; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); for(i=0 ; i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY-1 ; i++) { policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy); FPRINTF_MPI(stderr, "New policy %d\n", policy); } starpu_mpi_node_selection_unregister_policy(_STARPU_MPI_NODE_SELECTION_MAX_POLICY-2); policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy); FPRINTF_MPI(stderr, "New policy %d\n", policy); STARPU_ASSERT(policy==_STARPU_MPI_NODE_SELECTION_MAX_POLICY-2); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/policy_register_toomany.c000066400000000000000000000033401507764646700230540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" int starpu_mpi_select_node_my_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data) { (void) me; (void) nb_nodes; (void) descr; (void) nb_data; return 0; } int main(int argc, char **argv) { int ret; int i; struct starpu_conf conf; int mpi_init; #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) return STARPU_TEST_SKIPPED; #endif disable_coredump(); MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); for(i=0 ; i<_STARPU_MPI_NODE_SELECTION_MAX_POLICY+1 ; i++) { int policy = starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_my_policy); FPRINTF_MPI(stderr, "New policy %d\n", policy); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/policy_selection.c000066400000000000000000000111271507764646700214510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)descr; (void)_args; } struct starpu_codelet mycodelet_2 = { .cpu_funcs = {func_cpu}, .nbuffers = 2, .modes = {STARPU_W, STARPU_W}, .model = &starpu_perfmodel_nop, }; struct starpu_codelet mycodelet_3 = { .cpu_funcs = {func_cpu}, .nbuffers = 3, .modes = {STARPU_R, STARPU_W, STARPU_W}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int ret; int rank, size; int policy = 12; struct starpu_task *task; starpu_data_handle_t handles[3]; int mpi_init; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); (void)mpi_init; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 3) { if (rank == 0) FPRINTF(stderr, "We need at least 3 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank == 0) { starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); } else { starpu_variable_data_register(&handles[0], -1, (uintptr_t)NULL, sizeof(int)); } starpu_mpi_data_register(handles[0], 10, 0); if (rank == 1) { starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); } else { starpu_variable_data_register(&handles[1], -1, (uintptr_t)NULL, sizeof(int)); } starpu_mpi_data_register(handles[1], 20, 1); if (rank == 2) { starpu_variable_data_register(&handles[2], STARPU_MAIN_RAM, (uintptr_t)&policy, sizeof(int)); } else { starpu_variable_data_register(&handles[2], -1, (uintptr_t)NULL, sizeof(int)); } starpu_mpi_data_register(handles[2], 30, 2); // Force the execution on node 1 task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_3, STARPU_R, handles[2], STARPU_W, handles[0], STARPU_W, handles[1], STARPU_EXECUTE_ON_NODE, 1, 0); FPRINTF_MPI(stderr, "Task %p\n", task); if (rank == 1) { STARPU_ASSERT_MSG(task, "Task should be executed by rank 1"); task->destroy = 0; starpu_task_destroy(task); } else { STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 1"); } // Force the execution on node 1 task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_2, STARPU_W, handles[0], STARPU_W, handles[1], STARPU_EXECUTE_ON_NODE, 1, 0); FPRINTF_MPI(stderr, "Task %p\n", task); if (rank == 1) { STARPU_ASSERT_MSG(task, "Task should be executed by rank 1"); task->destroy = 0; starpu_task_destroy(task); } else { STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 1"); } // Let StarPU choose the node task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_3, STARPU_R, handles[2], STARPU_W, handles[0], STARPU_W, handles[1], 0); FPRINTF_MPI(stderr, "Task %p\n", task); if (rank == 0) { STARPU_ASSERT_MSG(task, "Task should be executed by rank 0"); task->destroy = 0; starpu_task_destroy(task); } else { STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 2"); } // Let StarPU choose the node task = starpu_mpi_task_build(MPI_COMM_WORLD, &mycodelet_2, STARPU_W, handles[0], STARPU_W, handles[1], 0); FPRINTF_MPI(stderr, "Task %p\n", task); if (rank == 0) { STARPU_ASSERT_MSG(task, "Task should be executed by rank 0"); task->destroy = 0; starpu_task_destroy(task); } else { STARPU_ASSERT_MSG(task == NULL, "Task should be executed by rank 0"); } starpu_data_unregister(handles[0]); starpu_data_unregister(handles[1]); starpu_data_unregister(handles[2]); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/policy_selection2.c000066400000000000000000000100351507764646700215300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" void func_cpu(void *descr[], void *_args) { (void)_args; int *data0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int *data1 = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); int *data2 = (int *)STARPU_VARIABLE_GET_PTR(descr[2]); *data1 = *data0; *data2 = *data0; } struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .nbuffers = 3, .modes = {STARPU_R, STARPU_W, STARPU_W}, .model = &starpu_perfmodel_nop, }; int main(int argc, char **argv) { int ret; int i; int rank, size; int data[3]; starpu_data_handle_t handles[3]; int mpi_init; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); (void)mpi_init; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if ((size < 3) || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (size < 3) FPRINTF(stderr, "We need at least 3 processes.\n"); else FPRINTF(stderr, "We need at least 1 CPU worker.\n"); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } data[0] = 42; starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&data[0], sizeof(int)); starpu_mpi_data_register(handles[0], 10, 0); data[1] = 42; starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&data[1], sizeof(int)); starpu_mpi_data_register(handles[1], 20, 1); data[2] = 12; starpu_variable_data_register(&handles[2], STARPU_MAIN_RAM, (uintptr_t)&data[2], sizeof(int)); starpu_mpi_data_register(handles[2], 30, 2); starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_R, handles[2], STARPU_W, handles[0], STARPU_W, handles[1], 0); for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R); FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]); for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]); #ifndef STARPU_SIMGRID if (rank == 0) { STARPU_ASSERT_MSG(data[0] == data[2] && data[1] == data[2], "Computation incorrect. data[%d] (%d) != data[%d] (%d) && data[%d] (%d) != data[%d] (%d)\n", 0, data[0], 2, data[2], 1, data[1], 2, data[2]); } #endif for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_W); for(i=0 ; i<2 ; i++) data[i] = 12; for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]); // Let StarPU choose the node starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_R, handles[2], STARPU_W, handles[0], STARPU_W, handles[1], STARPU_EXECUTE_ON_NODE, 1, 0); for(i=0 ; i<2 ; i++) starpu_data_acquire(handles[i], STARPU_R); FPRINTF_MPI(stderr, "data[%d,%d,%d] = %d,%d,%d\n", 0, 1, 2, data[0], data[1], data[2]); for(i=0 ; i<2 ; i++) starpu_data_release(handles[i]); #ifndef STARPU_SIMGRID if (rank == 1) { STARPU_ASSERT_MSG(data[0] == data[2] && data[1] == data[2], "Computation incorrect. data[%d] (%d) != data[%d] (%d) && data[%d] (%d) != data[%d] (%d)\n", 0, data[0], 2, data[2], 1, data[1], 2, data[2]); } #endif for(i=0 ; i<3 ; i++) starpu_data_unregister(handles[i]); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/policy_unregister.c000066400000000000000000000026021507764646700216510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" int main(int argc, char **argv) { int ret; struct starpu_conf conf; int mpi_init; #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) return STARPU_TEST_SKIPPED; #endif disable_coredump(); MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_node_selection_unregister_policy(STARPU_MPI_NODE_SELECTION_MOST_R_DATA); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/ring.c000066400000000000000000000075731507764646700170560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 32 #elif !defined(STARPU_LONG_CHECK) # define NITER 256 #else # define NITER 2048 #endif #ifdef STARPU_USE_CUDA extern void increment_cuda(void *descr[], void *_args); #endif #ifdef STARPU_USE_HIP extern void increment_hip(void *descr[], void *_args); #endif void increment_cpu(void *descr[], void *_args) { (void)_args; int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); (*tokenptr)++; } static struct starpu_codelet increment_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {increment_cuda}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {increment_hip}, #endif .cpu_funcs = {increment_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; void increment_token(starpu_data_handle_t token_handle) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = token_handle; task->synchronous = 1; int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } int main(int argc, char **argv) { int ret, rank, size; int mpi_init; int token = 42; starpu_data_handle_t token_handle; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) { if (rank == 0) { if (size < 2) FPRINTF(stderr, "We need at least 2 processes.\n"); else FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token)); int nloops = NITER; int loop; int last_loop = nloops - 1; int last_rank = size - 1; for (loop = 0; loop < nloops; loop++) { starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; if (loop == 0 && rank == 0) { starpu_data_acquire(token_handle, STARPU_W); token = 0; FPRINTF(stdout, "Start with token value %d\n", token); starpu_data_release(token_handle); } else { MPI_Status status; ret = starpu_mpi_recv(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } increment_token(token_handle); if (loop == last_loop && rank == last_rank) { starpu_data_acquire(token_handle, STARPU_R); FPRINTF(stdout, "Finished : token value %d\n", token); starpu_data_release(token_handle); } else { ret = starpu_mpi_send(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } } starpu_data_unregister(token_handle); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); #ifndef STARPU_SIMGRID if (rank == last_rank) { FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); STARPU_ASSERT(token == nloops*size); } #endif return 0; } starpu-1.4.9+dfsg/mpi/tests/ring_async.c000066400000000000000000000102041507764646700202340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 32 #elif !defined(STARPU_LONG_CHECK) # define NITER 256 #else # define NITER 2048 #endif #ifdef STARPU_USE_CUDA extern void increment_cuda(void *descr[], void *_args); #endif #ifdef STARPU_USE_HIP extern void increment_hip(void *descr[], void *_args); #endif void increment_cpu(void *descr[], void *_args) { (void)_args; int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); (*tokenptr)++; } static struct starpu_codelet increment_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {increment_cuda}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {increment_hip}, #endif .cpu_funcs = {increment_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; void increment_token(starpu_data_handle_t token_handle) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = token_handle; task->synchronous = 1; int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } int main(int argc, char **argv) { int ret, rank, size; int mpi_init; int token = 42; starpu_data_handle_t token_handle; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) { if (rank == 0) { if (size < 2) FPRINTF(stderr, "We need at least 2 processes.\n"); else FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token)); int nloops = NITER; int loop; int last_loop = nloops - 1; int last_rank = size - 1; for (loop = 0; loop < nloops; loop++) { starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; if (loop == 0 && rank == 0) { starpu_data_acquire(token_handle, STARPU_W); token = 0; FPRINTF(stdout, "Start with token value %d\n", token); starpu_data_release(token_handle); } else { MPI_Status status; starpu_mpi_req req; ret = starpu_mpi_irecv(token_handle, &req, (rank+size-1)%size, tag, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv"); ret = starpu_mpi_wait(&req, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } increment_token(token_handle); if (loop == last_loop && rank == last_rank) { starpu_data_acquire(token_handle, STARPU_R); FPRINTF(stdout, "Finished : token value %d\n", token); starpu_data_release(token_handle); } else { starpu_mpi_req req; MPI_Status status; ret = starpu_mpi_isend(token_handle, &req, (rank+1)%size, tag+1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend"); ret = starpu_mpi_wait(&req, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } } starpu_data_unregister(token_handle); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); #ifndef STARPU_SIMGRID if (rank == last_rank) { FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); STARPU_ASSERT(token == nloops*size); } #endif return 0; } starpu-1.4.9+dfsg/mpi/tests/ring_async_implicit.c000066400000000000000000000076411507764646700221410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 32 #elif !defined(STARPU_LONG_CHECK) # define NITER 256 #else # define NITER 2048 #endif #ifdef STARPU_USE_CUDA extern void increment_cuda(void *descr[], void *_args); #endif #ifdef STARPU_USE_HIP extern void increment_hip(void *descr[], void *_args); #endif void increment_cpu(void *descr[], void *_args) { (void)_args; int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); (*tokenptr)++; } static struct starpu_codelet increment_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {increment_cuda}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {increment_hip}, #endif .cpu_funcs = {increment_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; void increment_token(starpu_data_handle_t token_handle) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = token_handle; int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } int main(int argc, char **argv) { int ret, rank, size; int token = 42; starpu_data_handle_t token_handle; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) { if (rank == 0) { if (size < 2) FPRINTF(stderr, "We need at least 2 processes.\n"); else FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token)); int nloops = NITER; int loop; int last_loop = nloops - 1; int last_rank = size - 1; for (loop = 0; loop < nloops; loop++) { starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; if (loop == 0 && rank == 0) { starpu_data_acquire(token_handle, STARPU_W); token = 0; FPRINTF(stdout, "Start with token value %d\n", token); starpu_data_release(token_handle); } else { ret = starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); } increment_token(token_handle); if (loop == last_loop && rank == last_rank) { starpu_data_acquire(token_handle, STARPU_R); FPRINTF(stdout, "Finished : token value %d\n", token); starpu_data_release(token_handle); } else { ret = starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } } starpu_task_wait_for_all(); starpu_data_unregister(token_handle); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); #ifndef STARPU_SIMGRID if (rank == last_rank) { FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); STARPU_ASSERT(token == nloops*size); } #endif return 0; } starpu-1.4.9+dfsg/mpi/tests/ring_kernel.cu000066400000000000000000000021731507764646700205720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include static __global__ void cuda_incrementer(int *token) { (*token)++; } extern "C" void increment_cuda(void *descr[], void *_args) { (void) _args; int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); cuda_incrementer<<<1,1, 0, starpu_cuda_get_local_stream()>>>(tokenptr); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/mpi/tests/ring_kernel_hip.hip000066400000000000000000000022111507764646700215740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include static __global__ void hip_incrementer(int *token) { (*token)++; } extern "C" void increment_hip(void *descr[], void *_args) { (void) _args; int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); hipLaunchKernelGGL(hip_incrementer, 1, 1, 0, starpu_hip_get_local_stream(), tokenptr); hipError_t status = hipGetLastError(); if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); hipStreamSynchronize(starpu_hip_get_local_stream()); } starpu-1.4.9+dfsg/mpi/tests/ring_sync.c000066400000000000000000000076641507764646700201130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 32 #elif !defined(STARPU_LONG_CHECK) # define NITER 256 #else # define NITER 2048 #endif #ifdef STARPU_USE_CUDA extern void increment_cuda(void *descr[], void *_args); #endif #ifdef STARPU_USE_HIP extern void increment_hip(void *descr[], void *_args); #endif void increment_cpu(void *descr[], void *_args) { (void)_args; int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); (*tokenptr)++; } static struct starpu_codelet increment_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {increment_cuda}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {increment_hip}, #endif .cpu_funcs = {increment_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; void increment_token(starpu_data_handle_t token_handle) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = token_handle; task->synchronous = 1; int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } int main(int argc, char **argv) { int ret, rank, size; int mpi_init; int token = 42; starpu_data_handle_t token_handle; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0)) { if (rank == 0) { if (size < 2) FPRINTF(stderr, "We need at least 2 processes.\n"); else FPRINTF(stderr, "We need at least 1 CPU or CUDA worker.\n"); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token)); int nloops = NITER; int loop; int last_loop = nloops - 1; int last_rank = size - 1; for (loop = 0; loop < nloops; loop++) { starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; if (loop == 0 && rank == 0) { starpu_data_acquire(token_handle, STARPU_W); token = 0; FPRINTF(stdout, "Start with token value %d\n", token); starpu_data_release(token_handle); } else { MPI_Status status; ret = starpu_mpi_recv(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } increment_token(token_handle); if (loop == last_loop && rank == last_rank) { starpu_data_acquire(token_handle, STARPU_R); FPRINTF(stdout, "Finished : token value %d\n", token); starpu_data_release(token_handle); } else { starpu_mpi_req req; MPI_Status status; ret = starpu_mpi_issend(token_handle, &req, (rank+1)%size, tag+1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); ret = starpu_mpi_wait(&req, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } } starpu_data_unregister(token_handle); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); #ifndef STARPU_SIMGRID if (rank == last_rank) { FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); STARPU_ASSERT(token == nloops*size); } #endif return 0; } starpu-1.4.9+dfsg/mpi/tests/ring_sync_detached.c000066400000000000000000000106621507764646700217240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define NITER 32 #elif !defined(STARPU_LONG_CHECK) # define NITER 256 #else # define NITER 2048 #endif #ifdef STARPU_USE_CUDA extern void increment_cuda(void *descr[], void *_args); #endif #ifdef STARPU_USE_HIP extern void increment_hip(void *descr[], void *_args); #endif void increment_cpu(void *descr[], void *_args) { (void)_args; int *tokenptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); (*tokenptr)++; } static struct starpu_codelet increment_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {increment_cuda}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {increment_hip}, #endif .cpu_funcs = {increment_cpu}, .nbuffers = 1, .modes = {STARPU_RW}, .model = &starpu_perfmodel_nop, }; void increment_token(starpu_data_handle_t handle) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = handle; task->synchronous = 1; int ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; void callback(void *arg) { unsigned *completed = arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); *completed = 1; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } int main(int argc, char **argv) { int ret, rank, size; int token = 42; starpu_data_handle_t token_handle; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2 || (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() == 0) || (starpu_cpu_worker_get_count() + starpu_hip_worker_get_count() == 0)) { if (rank == 0) { if (size < 2) FPRINTF(stderr, "We need at least 2 processes.\n"); else FPRINTF(stderr, "We need at least 1 CPU or CUDA or HIP worker.\n"); } starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } starpu_vector_data_register(&token_handle, 0, (uintptr_t)&token, 1, sizeof(token)); int nloops = NITER; int loop; int last_loop = nloops - 1; int last_rank = size - 1; for (loop = 0; loop < nloops; loop++) { starpu_mpi_tag_t tag = ((starpu_mpi_tag_t) loop)*size + rank; if (loop == 0 && rank == 0) { starpu_data_acquire(token_handle, STARPU_W); token = 0; FPRINTF_MPI(stderr, "Start with token value %d\n", token); starpu_data_release(token_handle); } else { MPI_Status status; ret = starpu_mpi_recv(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, &status); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } increment_token(token_handle); if (loop == last_loop && rank == last_rank) { starpu_data_acquire(token_handle, STARPU_R); FPRINTF_MPI(stderr, "Finished : token value %d\n", token); starpu_data_release(token_handle); } else { int sent = 0; ret = starpu_mpi_issend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, callback, &sent); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend_detached"); STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (!sent) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } } starpu_data_unregister(token_handle); FPRINTF_MPI(stderr, "Final value for token %d\n", token); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); #ifndef STARPU_SIMGRID if (rank == last_rank) { FPRINTF(stderr, "[%d] token = %d == %d * %d ?\n", rank, token, nloops, size); STARPU_ASSERT(token == nloops*size); } #endif return 0; } starpu-1.4.9+dfsg/mpi/tests/star.c000066400000000000000000000054231507764646700170600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" static void read_func(void *descr[], void *_args) { (void)_args; int *a = (void*) STARPU_VARIABLE_GET_PTR(descr[0]); FPRINTF_MPI(stderr, "x = %d\n", *a); } static struct starpu_codelet read_codelet = { .cpu_funcs = {read_func}, .nbuffers = 1, .modes = {STARPU_R}, #ifdef STARPU_SIMGRID .model = &starpu_perfmodel_nop, #endif .name = "read_codelet" }; static void write_func(void *descr[], void *_args) { int rank, *a; a = (void*) STARPU_VARIABLE_GET_PTR(descr[0]); starpu_codelet_unpack_args(_args, &rank); *a = rank+12; FPRINTF_MPI(stderr, "x = %d rank=%d\n", *a, rank); } static struct starpu_codelet write_codelet = { .cpu_funcs = {write_func}, .nbuffers = 1, .modes = {STARPU_W}, #ifdef STARPU_SIMGRID .model = &starpu_perfmodel_nop, #endif .name = "write_codelet" }; int main(int argc, char **argv) { int ret, rank, size, node; starpu_data_handle_t handle; int var=42; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size<3) { FPRINTF(stderr, "We need more than 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (rank==0) starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); else starpu_variable_data_register(&handle, -1, (uintptr_t)NULL, sizeof(var)); starpu_mpi_data_register(handle, 42, 0); for(node=1 ; node #include "helper.h" int main(int argc, char **argv) { int ret; starpu_data_handle_t handle; int mpi_init; #ifdef STARPU_HAVE_VALGRIND_H if (RUNNING_ON_VALGRIND) return STARPU_TEST_SKIPPED; #endif disable_coredump(); MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(NULL, NULL, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&ret, 1, sizeof(int)); starpu_mpi_datatype_register(handle, NULL, NULL); starpu_data_unregister(handle); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/stats.c000066400000000000000000000057031507764646700172460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_HAVE_SETENV) #warning unsetenv or setenv are not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else int main(int argc, char **argv) { int ret, rank, size; int mpi_init; int value; starpu_data_handle_t handle; size_t *stats; unsetenv("STARPU_MPI_CACHE"); unsetenv("STARPU_MPI_STATS"); unsetenv("STARPU_COMM_STATS"); MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } stats = calloc(size, sizeof(stats[0])); value = rank; starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); if (rank == 0) { ret = starpu_mpi_send(handle, 1, 42, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } else if (rank == 1) { ret = starpu_mpi_recv(handle, 0, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } starpu_mpi_comm_stats_enable(); if (rank == 0) { ret = starpu_mpi_send(handle, 1, 42, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } else if (rank == 1) { ret = starpu_mpi_recv(handle, 0, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } starpu_mpi_comm_stats_disable(); if (rank == 0) { ret = starpu_mpi_send(handle, 1, 42, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } else if (rank == 1) { ret = starpu_mpi_recv(handle, 0, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } starpu_data_unregister(handle); starpu_mpi_comm_stats_retrieve(stats); if (rank == 0) STARPU_ASSERT_MSG(stats[1] == sizeof(int), "Comm stats are incorrect %ld != %ld\n", stats[0], (long)sizeof(int)); free(stats); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } #endif starpu-1.4.9+dfsg/mpi/tests/sync.c000066400000000000000000000071221507764646700170610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" int main(int argc, char **argv) { int size, x=789; int rank, other_rank; int ret; starpu_data_handle_t data[2]; int mpi_init; struct starpu_conf conf; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size % 2) { FPRINTF(stderr, "We need a even number of processes.\n"); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } other_rank = rank%2 == 0 ? rank+1 : rank-1; FPRINTF(stderr, "rank %d exchanging with rank %d\n", rank, other_rank); if (rank % 2) { MPI_Send(&rank, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD); FPRINTF(stderr, "[%d] sending %d\n", rank, rank); } else { MPI_Recv(&x, 1, MPI_INT, other_rank, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); FPRINTF(stderr, "[%d] received %d\n", rank, x); } starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); if (rank % 2) { starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned)); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(unsigned)); starpu_mpi_data_register(data[1], 22, 0); } else starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data[0], 12, 0); if (rank % 2) { starpu_mpi_req req; ret = starpu_mpi_issend(data[1], &req, other_rank, 22, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_issend"); ret = starpu_mpi_send(data[0], other_rank, 12, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); ret = starpu_mpi_wait(&req, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_wait"); } else { int *xx; ret = starpu_mpi_recv(data[0], other_rank, 12, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(data[0], STARPU_R); xx = (int *)starpu_variable_get_local_ptr(data[0]); FPRINTF_MPI(stderr, "received %d\n", *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_data_release(data[0]); starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_mpi_data_register(data[1], 22, 0); ret = starpu_mpi_recv(data[0], other_rank, 22, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); starpu_data_acquire(data[0], STARPU_R); xx = (int *)starpu_variable_get_local_ptr(data[0]); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_data_release(data[0]); } starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/tags_allocate.c000066400000000000000000000036141507764646700207110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #define X 3 #define Y 4 int main(int argc, char **argv) { int size, rank, mpi_init; int ret=0; int x, y; struct starpu_conf conf; int matrix[X][Y]; starpu_data_handle_t data_handles[X][Y]; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); int64_t mintag = starpu_mpi_tags_allocate(X*Y); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int tag = y*Y + x; matrix[x][y] = tag; starpu_variable_data_register(&data_handles[x][y], STARPU_MAIN_RAM, (uintptr_t)&matrix[x][y], sizeof(matrix[x][y])); starpu_mpi_data_register(data_handles[x][y], mintag + tag, 0); } } // Here we can use the data for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { starpu_data_unregister(data_handles[x][y]); } } starpu_mpi_tags_free(mintag); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? ret : 0; } starpu-1.4.9+dfsg/mpi/tests/tags_checking.c000066400000000000000000000102601507764646700206730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper.h" #define VAL0 12 #define VAL1 24 static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; void callback(void *arg) { unsigned *received = arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); *received = *received + 1; FPRINTF_MPI(stderr, "Request %u received\n", *received); STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } int do_test(int rank, starpu_mpi_tag_t initial_tag, int sdetached, int rdetached) { int ret, i; int val[2]; starpu_data_handle_t data[2]; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(NULL, NULL, 0, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); if (rank == 1) { val[0] = VAL0; val[1] = VAL1; } else { val[0] = -1; val[1] = -1; } starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&val[0], sizeof(val[0])); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&val[1], sizeof(val[1])); starpu_mpi_data_register(data[0], initial_tag+77, 1); starpu_mpi_data_register(data[1], initial_tag+88, 1); if (rank == 1) { for(i=1 ; i>=0 ; i--) { if (sdetached) { ret = starpu_mpi_isend_detached(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } else { ret = starpu_mpi_send(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_send"); } } } else if (rank == 0) { int received = 0; for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) { if (rdetached) { ret = starpu_mpi_irecv_detached(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, callback, &received); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_irecv_detached"); } else { ret = starpu_mpi_recv(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } } if (rdetached) { STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (received != 2) { FPRINTF_MPI(stderr, "Received %d messages\n", received); STARPU_PTHREAD_COND_WAIT(&cond, &mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } for(i=0 ; i<2 ; i++) starpu_data_acquire(data[i], STARPU_R); for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) starpu_data_release(data[i]); } FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); if (rank == 0) { ret = (val[0] == VAL0 && val[1] == VAL1) ? 0 : 1; } starpu_mpi_shutdown(); return ret; } int main(int argc, char **argv) { int size; int rank; int ret=0; int sdetached, rdetached; starpu_mpi_tag_t initial_tag = 0; MPI_INIT_THREAD_real(&argc, &argv, MPI_THREAD_SERIALIZED); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { FPRINTF(stderr, "We need at least 2 processes.\n"); MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } for(sdetached=0 ; sdetached<=1 ; sdetached++) { for(rdetached=0 ; rdetached<=1 ; rdetached++) { ret += do_test(rank, initial_tag, sdetached, rdetached); initial_tag += 2; } } MPI_Finalize(); return rank == 0 ? ret : 0; } starpu-1.4.9+dfsg/mpi/tests/temporary.c000066400000000000000000000117231507764646700201310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This tests that one can register temporary data0 on each MPI node which can mix with common data0 */ #include #include "helper.h" static void func_add(void *descr[], void *_args) { (void)_args; int *a = (void*) STARPU_VARIABLE_GET_PTR(descr[0]); const int *b = (void*) STARPU_VARIABLE_GET_PTR(descr[1]); const int *c = (void*) STARPU_VARIABLE_GET_PTR(descr[2]); *a = *b + *c; FPRINTF_MPI(stderr, "%d + %d = %d\n", *b, *c, *a); } static struct starpu_codelet codelet_add = { .cpu_funcs = {func_add}, .nbuffers = 3, .modes = {STARPU_W, STARPU_R, STARPU_R}, .model = &starpu_perfmodel_nop, .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; int main(int argc, char **argv) { int rank, size, n; int ret; int val0 = 0, val1 = 0; starpu_data_handle_t data0, data1, tmp0, tmp, tmp2; struct starpu_conf conf; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, &conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } if (starpu_mpi_cache_is_enabled() == 0) goto skip; if (rank == 0) { val0 = 1; starpu_variable_data_register(&data0, STARPU_MAIN_RAM, (uintptr_t)&val0, sizeof(val0)); starpu_variable_data_register(&data1, -1, (uintptr_t)NULL, sizeof(val0)); starpu_variable_data_register(&tmp0, -1, (uintptr_t)NULL, sizeof(val0)); starpu_mpi_data_register(tmp0, -1, 0); } else if (rank == 1) { starpu_variable_data_register(&data0, -1, (uintptr_t)NULL, sizeof(val0)); starpu_variable_data_register(&data1, STARPU_MAIN_RAM, (uintptr_t)&val1, sizeof(val1)); tmp0 = NULL; } else { starpu_variable_data_register(&data0, -1, (uintptr_t)NULL, sizeof(val0)); starpu_variable_data_register(&data1, -1, (uintptr_t)NULL, sizeof(val0)); tmp0 = NULL; } starpu_variable_data_register(&tmp, -1, (uintptr_t)NULL, sizeof(val0)); starpu_variable_data_register(&tmp2, -1, (uintptr_t)NULL, sizeof(val0)); starpu_mpi_data_register(data0, 42, 0); starpu_mpi_data_register(data1, 43, 1); starpu_mpi_data_register(tmp, 44, 0); starpu_mpi_data_register(tmp2, -1, STARPU_MPI_PER_NODE); /* Test temporary data0 on node 0 only */ starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, tmp0, STARPU_R, data0, STARPU_R, data0, 0); starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, data0, STARPU_R, tmp0, STARPU_R, tmp0, 0); starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, tmp, STARPU_R, data0, STARPU_R, data0, 0); /* Now make some tmp per-node, so that each node replicates the computation */ for (n = 0; n < size; n++) if (n != 0) /* Get the value on all nodes */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, tmp, n, NULL, NULL); starpu_mpi_data_set_rank(tmp, STARPU_MPI_PER_NODE); /* This task writes to a per-node data, so will be executed by all nodes */ starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, tmp2, STARPU_R, tmp, STARPU_R, tmp, 0); /* All MPI nodes have computed the value (no MPI communication here!) */ starpu_data_acquire_on_node(tmp2, STARPU_MAIN_RAM, STARPU_R); STARPU_ASSERT(*(int*)starpu_data_handle_to_pointer(tmp2, STARPU_MAIN_RAM) == 16); starpu_data_release_on_node(tmp2, STARPU_MAIN_RAM); /* And nodes 0 and 1 do something with it */ starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, data0, STARPU_R, tmp, STARPU_R, tmp2, 0); starpu_mpi_task_insert(MPI_COMM_WORLD, &codelet_add, STARPU_W, data1, STARPU_R, tmp, STARPU_R, tmp2, 0); starpu_task_wait_for_all(); if (rank == 0) { starpu_data_unregister(tmp0); } starpu_data_unregister(data0); starpu_data_unregister(data1); starpu_data_unregister(tmp); starpu_data_unregister(tmp2); if (rank == 0) STARPU_ASSERT_MSG(val0 == 24, "[rank 0] %d should be %d\n", val0, 24); if (rank == 1) STARPU_ASSERT_MSG(val1 == 24, "[rank 1] %d should be %d\n", val1, 24); skip: starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return 0; } starpu-1.4.9+dfsg/mpi/tests/user_defined_datatype.c000066400000000000000000000127571507764646700224460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "helper.h" #ifdef STARPU_QUICK_CHECK # define ELEMENTS 10 #else # define ELEMENTS 1000 #endif typedef void (*test_func)(starpu_data_handle_t *, int, int, starpu_mpi_tag_t); void test_handle_irecv_isend_detached(starpu_data_handle_t *handles, int nb_handles, int rank, starpu_mpi_tag_t tag) { int i; (void)rank; for(i=0 ; ivalue) int *starpu_value_get(starpu_data_handle_t handle) { struct starpu_value_interface *value_interface = (struct starpu_value_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return value_interface->value; } static void value_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_value_interface *value_interface = (struct starpu_value_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_value_interface *local_interface = (struct starpu_value_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) local_interface->value = value_interface->value; else local_interface->value = 0; } } static starpu_ssize_t value_allocate_data_on_node(void *data_interface, unsigned node) { struct starpu_value_interface *value_interface = (struct starpu_value_interface *) data_interface; int *addr = 0; addr = (int *) starpu_malloc_on_node(node, sizeof(int)); if (!addr) return -ENOMEM; /* update the data properly in consequence */ value_interface->value = addr; return sizeof(int); } static void value_free_data_on_node(void *data_interface, unsigned node) { struct starpu_value_interface *value_interface = (struct starpu_value_interface *) data_interface; starpu_free_on_node(node, (uintptr_t) value_interface->value, sizeof(int)); value_interface->value = NULL; } static size_t value_get_size(starpu_data_handle_t handle) { (void)handle; return sizeof(int); } static uint32_t value_footprint(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(value_get_size(handle), 0); } static void *value_to_pointer(void *data_interface, unsigned node) { (void) node; struct starpu_value_interface *value_interface = data_interface; return (void*) value_interface->value; } static int value_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_value_interface *value_interface = (struct starpu_value_interface *) starpu_data_get_interface_on_node(handle, node); *count = sizeof(int); if (ptr != NULL) { *ptr = (void*) starpu_malloc_on_node_flags(node, *count, 0); memcpy(*ptr, value_interface->value, sizeof(int)); } return 0; } static int value_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { (void)count; STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_value_interface *value_interface = (struct starpu_value_interface *) starpu_data_get_interface_on_node(handle, node); value_interface->value[0] = ((int *)ptr)[0]; assert(value_interface->value[0] == 36); return 0; } static int value_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { value_peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); return 0; } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_value_interface *src_value = src_interface; struct starpu_value_interface *dst_value = dst_interface; return starpu_interface_copy((uintptr_t) src_value->value, 0, src_node, (uintptr_t) dst_value->value, 0, dst_node, sizeof(int), async_data); } static const struct starpu_data_copy_methods value_copy_methods = { .any_to_any = copy_any_to_any }; static struct starpu_data_interface_ops interface_value_ops = { .register_data_handle = value_register_data_handle, .allocate_data_on_node = value_allocate_data_on_node, .free_data_on_node = value_free_data_on_node, .copy_methods = &value_copy_methods, .get_size = value_get_size, .footprint = value_footprint, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpu_value_interface), .to_pointer = value_to_pointer, .pack_data = value_pack_data, .peek_data = value_peek_data, .unpack_data = value_unpack_data }; void starpu_value_data_register(starpu_data_handle_t *handleptr, unsigned home_node, int *value) { struct starpu_value_interface value_int = { .value = value }; starpu_data_register(handleptr, home_node, &value_int, &interface_value_ops); } #endif /* _USER_DEFINED_DATATYPE_VALUE_H */ starpu-1.4.9+dfsg/mpi/tests/wait_for_all.c000066400000000000000000000045431507764646700205530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void callback(void *arg) { int *completed = arg; *completed = 1; } #define SIZE 370*000*0000 int main(int argc, char **argv) { int ret, rank, size; int mpi_init; starpu_data_handle_t handle; char *value; int comm_completed=42; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_mpi_init_conf(&argc, &argv, mpi_init, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); return rank == 0 ? STARPU_TEST_SKIPPED : 0; } value = calloc(SIZE, sizeof(value[0])); starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)value, SIZE, sizeof(value[0])); if (rank == 1) { ret = starpu_mpi_send(handle, 0, 1, MPI_COMM_WORLD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_isend_detached"); } else if (rank == 0) { ret = starpu_mpi_irecv_detached(handle, 1, 1, MPI_COMM_WORLD, callback, &comm_completed); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_recv"); } starpu_mpi_wait_for_all(MPI_COMM_WORLD); if (rank == 0) { if (comm_completed == 42) { FPRINTF_MPI(stderr, "comm not completed\n"); ret = 1; } else { FPRINTF_MPI(stderr, "comm completed\n"); } } starpu_data_unregister(handle); free(value); starpu_mpi_shutdown(); if (!mpi_init) MPI_Finalize(); if (rank == 0 && comm_completed == 42) { FPRINTF(stderr, "comm still not completed\n"); ret = 1; } return (rank == 0) ? ret : 0; } starpu-1.4.9+dfsg/mpi/tools/000077500000000000000000000000001507764646700157355ustar00rootroot00000000000000starpu-1.4.9+dfsg/mpi/tools/Makefile.am000066400000000000000000000027121507764646700177730ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2017-2017 Erwan Leria # Copyright (C) 2013-2013 Thibaut Lambert # Copyright (C) 2013-2013 Joris Pablo # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk SUBDIRS = AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_srcdir)/mpi/include -I$(top_builddir)/src -I$(top_srcdir)/src -DSTARPU_REPLAY_MPI $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) CC=$(CC_OR_MPICC) CCLD=$(CC_OR_MPICC) starpu_replay.c starpu_replay_sched.c: $(V_ln) $(LN_S) $(top_srcdir)/tools/$(notdir $@) $@ if STARPU_SIMGRID bin_PROGRAMS = starpu_replay_mpi starpu_replay_mpi_SOURCES = \ starpu_replay.c \ starpu_replay_sched.c endif starpu-1.4.9+dfsg/mpi/tools/Makefile.in000066400000000000000000001114761507764646700200140ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ @STARPU_SIMGRID_TRUE@bin_PROGRAMS = starpu_replay_mpi$(EXEEXT) subdir = mpi/tools ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__installdirs = "$(DESTDIR)$(bindir)" PROGRAMS = $(bin_PROGRAMS) am__starpu_replay_mpi_SOURCES_DIST = starpu_replay.c \ starpu_replay_sched.c @STARPU_SIMGRID_TRUE@am_starpu_replay_mpi_OBJECTS = \ @STARPU_SIMGRID_TRUE@ starpu_replay.$(OBJEXT) \ @STARPU_SIMGRID_TRUE@ starpu_replay_sched.$(OBJEXT) starpu_replay_mpi_OBJECTS = $(am_starpu_replay_mpi_OBJECTS) starpu_replay_mpi_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/starpu_replay.Po \ ./$(DEPDIR)/starpu_replay_sched.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(starpu_replay_mpi_SOURCES) DIST_SOURCES = $(am__starpu_replay_mpi_SOURCES_DIST) RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = $(CC_OR_MPICC) CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la \ $(STARPU_EXPORTED_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2017-2017 Erwan Leria # Copyright (C) 2013-2013 Thibaut Lambert # Copyright (C) 2013-2013 Joris Pablo # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # SUBDIRS = AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_srcdir)/mpi/include -I$(top_builddir)/src -I$(top_srcdir)/src -DSTARPU_REPLAY_MPI $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ CCLD = $(CC_OR_MPICC) @STARPU_SIMGRID_TRUE@starpu_replay_mpi_SOURCES = \ @STARPU_SIMGRID_TRUE@ starpu_replay.c \ @STARPU_SIMGRID_TRUE@ starpu_replay_sched.c all: all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign mpi/tools/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign mpi/tools/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-binPROGRAMS: $(bin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ } \ ; done uninstall-binPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(bindir)" && rm -f $$files clean-binPROGRAMS: @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list starpu_replay_mpi$(EXEEXT): $(starpu_replay_mpi_OBJECTS) $(starpu_replay_mpi_DEPENDENCIES) $(EXTRA_starpu_replay_mpi_DEPENDENCIES) @rm -f starpu_replay_mpi$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_replay_mpi_OBJECTS) $(starpu_replay_mpi_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_replay.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_replay_sched.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(PROGRAMS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(bindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/starpu_replay.Po -rm -f ./$(DEPDIR)/starpu_replay_sched.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-binPROGRAMS install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/starpu_replay.Po -rm -f ./$(DEPDIR)/starpu_replay_sched.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-binPROGRAMS .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-am clean clean-binPROGRAMS \ clean-generic clean-libtool cscopelist-am ctags ctags-am \ distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-binPROGRAMS \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ uninstall-binPROGRAMS .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null starpu_replay.c starpu_replay_sched.c: $(V_ln) $(LN_S) $(top_srcdir)/tools/$(notdir $@) $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/mpi/tools/starpu_replay.c000066400000000000000000000733751507764646700210120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Erwan Leria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This reads a tasks.rec file and replays the recorded task graph. * Currently, this version is done to run with simgrid. * * For further information, contact erwan.leria@inria.fr */ #include #include #include #include #include #include #include #include #define REPLAY_NMAX_DEPENDENCIES 8 #define ARRAY_DUP(in, out, n) memcpy(out, in, n * sizeof(*out)) #define ARRAY_INIT(array, n) memset(array, 0, n * sizeof(*array)) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Declarations of global variables, structures, pointers, ... * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ static int static_workerid; /* TODO: move to core header while moving starpu_replay_sched to core */ extern void schedRecInit(const char * filename); extern void applySchedRec(struct starpu_task * starpu_task, long submit_order); /* Enum for normal and "wontuse" tasks */ enum task_type {NormalTask, WontUseTask}; typedef unsigned long jobid_t; enum task_type control; static char *name = NULL; static char *model = NULL; static jobid_t jobid; static jobid_t *dependson; static long submitorder = -1; static starpu_tag_t tag; static int workerid; static uint32_t footprint; static double flops, total_flops = 0.; static double startTime; //start time (The instant when the task starts) static double endTime; //end time (The instant when the task ends) static int iteration = -1; static starpu_data_handle_t handles[STARPU_NMAXBUFS]; static enum starpu_data_access_mode modes[STARPU_NMAXBUFS]; static char normal_reg_signal[STARPU_NMAXBUFS]; /* Use the following arrays when the number of data is greater than STARPU_NMAXBUFS */ starpu_data_handle_t * handles_ptr; enum starpu_data_access_mode * modes_ptr; size_t * sizes_set; static size_t dependson_size; static size_t ndependson; static unsigned nb_parameters = 0; /* Number of parameters */ static int alloc_mode; /* If alloc_mode value is 1, then the handles are stored in dyn_handles, else they are in handles */ static int priority = 0; char * reg_signal = NULL; /* The register signal (0 or 1 coded on 8 bit) is used to know which handle of the task has to be registered in StarPU (in fact to avoid handle twice)*/ /* Record all tasks, hashed by jobid. */ static struct task { struct starpu_rbtree_node node; UT_hash_handle hh; jobid_t jobid; int iteration; long submit_order; jobid_t *deps; size_t ndependson; struct starpu_task task; enum task_type type; int reg_signal; } *tasks; /* Record handles */ static struct handle { UT_hash_handle hh; starpu_data_handle_t mem_ptr; /* This value should be the registered handle */ starpu_data_handle_t handle; /* The key is the original value of the handle in the file */ } * handles_hash; /* Record models */ static struct perfmodel { UT_hash_handle hh; struct starpu_perfmodel perfmodel; char * model_name; } * model_hash; /* * Replay data interface * We don't care about many things anyway, essentially only sizes. */ struct replay_interface { enum starpu_data_interface_id id; starpu_data_handle_t orig_handle; size_t size; size_t alloc_size; size_t max_size; }; static struct starpu_data_interface_ops replay_interface_ops; static void register_replay(starpu_data_handle_t handle, int home_node, void *data_interface) { (void) home_node; struct replay_interface *replay_interface = data_interface; unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct replay_interface *local_interface = starpu_data_get_interface_on_node(handle, node); local_interface->id = replay_interface->id; local_interface->orig_handle = replay_interface->orig_handle; local_interface->size = replay_interface->size; local_interface->alloc_size = replay_interface->alloc_size; local_interface->max_size = replay_interface->max_size; } } static void replay_data_register(starpu_data_handle_t *handleptr, starpu_data_handle_t orig_handle, int home_node, size_t size, size_t alloc_size, size_t max_size) { struct replay_interface interface = { .id = replay_interface_ops.interfaceid, .orig_handle = orig_handle, .size = size, .alloc_size = alloc_size, .max_size = max_size, }; starpu_data_register(handleptr, home_node, &interface, &replay_interface_ops); } static size_t replay_get_size(starpu_data_handle_t handle) { struct replay_interface *interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return interface->size; } static size_t replay_get_alloc_size(starpu_data_handle_t handle) { struct replay_interface *interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return interface->alloc_size; } static size_t replay_get_max_size(starpu_data_handle_t handle) { struct replay_interface *interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return interface->max_size; } static uint32_t replay_footprint(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(replay_get_size(handle), 0); } static int replay_compare(void *data_interface_a, void *data_interface_b) { struct replay_interface *replay_a = data_interface_a; struct replay_interface *replay_b = data_interface_b; /* Two variables are considered compatible if they have the same size */ return replay_a->size == replay_b->size; } static void display_replay(starpu_data_handle_t handle, FILE *f) { struct replay_interface *replay_interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%lu/%lu/%lu\t", (unsigned long) replay_interface->size, (unsigned long) replay_interface->alloc_size, (unsigned long) replay_interface->max_size); } static starpu_ssize_t describe_replay(void *data_interface, char *buf, size_t size) { struct replay_interface *replay_interface = data_interface; return snprintf(buf, size, "r%lu/%lu/%lu\t", (unsigned long) replay_interface->size, (unsigned long) replay_interface->alloc_size, (unsigned long) replay_interface->max_size); } static starpu_ssize_t allocate_replay_on_node(void *data_interface, unsigned dst_node) { struct replay_interface *replay_interface = data_interface; starpu_memory_allocate(dst_node, replay_interface->alloc_size, STARPU_MEMORY_OVERFLOW); return 0; } static void free_replay_on_node(void *data_interface, unsigned dst_node) { struct replay_interface *replay_interface = data_interface; starpu_memory_deallocate(dst_node, replay_interface->alloc_size); } static int replay_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { (void) dst_interface; struct replay_interface *src = src_interface; /* We don't care about pointers */ return starpu_interface_copy(1, 0, src_node, 1, 0, dst_node, src->size, async_data); } static const struct starpu_data_copy_methods replay_copy_data_methods = { .any_to_any = replay_copy, }; static struct starpu_data_interface_ops replay_interface_ops = { .register_data_handle = register_replay, .allocate_data_on_node = allocate_replay_on_node, .free_data_on_node = free_replay_on_node, .copy_methods = &replay_copy_data_methods, .get_size = replay_get_size, .get_alloc_size = replay_get_alloc_size, .get_max_size = replay_get_max_size, .footprint = replay_footprint, .compare = replay_compare, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct replay_interface), .display = display_replay, .pack_data = NULL, .peek_data = NULL, .unpack_data = NULL, .describe = describe_replay, /* We want to observe actual allocations/deallocations */ .dontcache = 1, }; /* [SUBMITORDER] The tree of the submit order */ static struct starpu_rbtree tree = STARPU_RBTREE_INITIALIZER; /* the cmp_fn arg for rb_tree_insert() */ unsigned int diff(struct starpu_rbtree_node * left_elm, struct starpu_rbtree_node * right_elm) { long oleft = ((struct task *) left_elm)->submit_order; long oright = ((struct task *) right_elm)->submit_order; if (oleft == -1 && oright == -1) { if (left_elm < right_elm) return -1; else return 1; } return oleft - oright; } /* Settings for the perfmodel */ struct task_arg { uint32_t footprint; unsigned narch; double perf[]; }; uint32_t get_footprint(struct starpu_task * task) { return ((struct task_arg*) (task->cl_arg))->footprint; } double arch_cost_function(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl) { int device = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); STARPU_ASSERT(device != -1); (void) nimpl; /* Then, get the pointer to the value of the expected time */ struct task_arg *arg = task->cl_arg; if (device < (int) arg->narch) { double val = arg->perf[device]; if (!(val == 0 || isnan(val))) return val; } fprintf(stderr, "[starpu] Error, expected_time is 0 or lower (replay.c line : %d)", __LINE__- 6); return 0.0; } /* End of settings */ static unsigned long nexecuted_tasks; void dumb_kernel(void *buffers[], void *args) { (void) buffers; (void) args; nexecuted_tasks++; if (!(nexecuted_tasks % 1000)) { fprintf(stderr, "\rExecuted task %lu...", nexecuted_tasks); fflush(stdout); } unsigned this_worker = starpu_worker_get_id_check(); struct starpu_perfmodel_arch *perf_arch = starpu_worker_get_perf_archtype(this_worker, STARPU_NMAX_SCHED_CTXS); struct starpu_task *task = starpu_task_get_current(); unsigned impl = starpu_task_get_implementation(task); double length = starpu_task_expected_length(task, perf_arch, impl); STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length), "Codelet %s does not have a perfmodel, or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated", starpu_task_get_name(task)); starpu_sleep(length / 1000000); } /* [CODELET] Initialization of an unique codelet for all the tasks*/ static int can_execute(unsigned worker_id, struct starpu_task *task, unsigned nimpl) { struct starpu_perfmodel_arch * arch = starpu_worker_get_perf_archtype(worker_id, STARPU_NMAX_SCHED_CTXS); int device = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); if (device == -1) /* Doesn't exist yet, thus unknown, assuming it can not work there. */ return 0; (void) nimpl; /* Then, get the pointer to the value of the expected time */ struct task_arg *arg = task->cl_arg; if (device < (int) arg->narch) { double val = arg->perf[device]; if (!(val == 0 || isnan(val))) return 1; } return 0; } static struct starpu_perfmodel myperfmodel = { .type = STARPU_PER_ARCH, .arch_cost_function = arch_cost_function, .footprint = get_footprint, }; static struct starpu_codelet cl = { .cpu_funcs = { dumb_kernel }, .cpu_funcs_name = { "dumb_kernel" }, .cuda_funcs = { dumb_kernel }, .opencl_funcs = { dumb_kernel }, .nbuffers = STARPU_VARIABLE_NBUFFERS, .can_execute = can_execute, .model = &myperfmodel, .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; /* * * * * * * * * * * * * * * * * * * Functions * * * * * * * * * * * * * * * * * * * */ /* The following function checks if the program has to use static or dynamic arrays*/ static int set_alloc_mode(int total_parameters) { return total_parameters <= STARPU_NMAXBUFS; } /* According to the allocation mode, modify handles_ptr and modes_ptr in static or dynamic */ static void arrays_managing(int mode) { if (mode) { handles_ptr = &handles[0]; modes_ptr = &modes[0]; reg_signal = &normal_reg_signal[0]; } else { _STARPU_MALLOC(handles_ptr, sizeof(*handles_ptr) * nb_parameters); _STARPU_MALLOC(modes_ptr, sizeof(*modes_ptr) * nb_parameters); _STARPU_CALLOC(reg_signal, nb_parameters, sizeof(char)); } } static unsigned count_number_tokens(const char* buffer, const char* delim) { char* dup = strdup(buffer); int result = 0; char* token = strtok(dup, delim); while(token != NULL) { ++result; token = strtok(NULL, delim); } free(dup); return result; } /* Check if a handle hasn't been registered yet */ static void variable_data_register_check(size_t * array_of_size, int nb_handles) { int h, i; starpu_data_handle_t orig_handles[nb_handles]; ARRAY_DUP(handles_ptr, orig_handles, nb_handles); for (h = 0 ; h < nb_handles ; h++) { if(reg_signal[h]) /* Get the register signal, if it's 1 do ... */ { struct handle * handles_cell; for (i = 0; i < h; i++) { /* Maybe we just registered it in this very h loop */ if (handles_ptr[h] == orig_handles[i]) { handles_ptr[h] = handles_ptr[i]; break; } } if (i == h) { _STARPU_MALLOC(handles_cell, sizeof(*handles_cell)); STARPU_ASSERT(handles_cell != NULL); handles_cell->handle = handles_ptr[h]; /* Get the hidden key (initial handle from the file) to store it as a key*/ replay_data_register(handles_ptr+h, handles_ptr[h], modes_ptr[h] & STARPU_R ? STARPU_MAIN_RAM : -1, array_of_size[h], array_of_size[h], array_of_size[h]); handles_cell->mem_ptr = handles_ptr[h]; /* Store the new value of the handle into the hash table */ HASH_ADD(hh, handles_hash, handle, sizeof(handles_ptr[h]), handles_cell); } } } } void reset(void) { control = NormalTask; if (name != NULL) { free(name); name = NULL; } if (model != NULL) { free(model); model = NULL; } if (sizes_set != NULL) { free(sizes_set); sizes_set = NULL; } if (reg_signal != NULL) { if (!alloc_mode) { free(reg_signal); reg_signal = NULL; } else { ARRAY_INIT(reg_signal, nb_parameters); } } jobid = 0; ndependson = 0; tag = -1; workerid = -1; footprint = 0; startTime = 0.0; endTime = 0.0; if (submitorder != -1) submitorder = -1; iteration = -1; nb_parameters = 0; alloc_mode = 1; } void fix_wontuse_handle(struct task * wontuseTask) { STARPU_ASSERT(wontuseTask); if (!wontuseTask->reg_signal) /* Data was already registered when we created this task, so it's already a handle */ return; struct handle *handle_tmp; /* Data was not registered when we created this task, so this is the application pointer, look it up now */ HASH_FIND(hh, handles_hash, &wontuseTask->task.handles[0], sizeof(wontuseTask->task.handles[0]), handle_tmp); if (handle_tmp) wontuseTask->task.handles[0] = handle_tmp->mem_ptr; else /* This data wasn't actually used, don't care about it */ wontuseTask->task.handles[0] = NULL; } /* Function that submits all the tasks (used when the program reaches EOF) */ int submit_tasks(void) { /* Add dependencies */ const struct starpu_rbtree * tmptree = &tree; struct starpu_rbtree_node * currentNode = starpu_rbtree_first(tmptree); long last_submitorder = 0; while (currentNode != NULL) { struct task * currentTask = (struct task *) currentNode; if (currentTask->type == NormalTask) { if (currentTask->submit_order != -1) { STARPU_ASSERT(currentTask->submit_order >= last_submitorder + 1); while (currentTask->submit_order > last_submitorder + 1) { /* Oops, some tasks were not submitted by original application, fake some */ struct starpu_task *task = starpu_task_create(); int ret; task->cl = NULL; task->name = "fake task for submit order"; ret = starpu_task_submit(task); STARPU_ASSERT(ret == 0); last_submitorder++; } } if (currentTask->ndependson > 0) { struct starpu_task * taskdeps[currentTask->ndependson]; unsigned i, j = 0; for (i = 0; i < currentTask->ndependson; i++) { struct task * taskdep; /* Get the ith jobid of deps_jobid */ HASH_FIND(hh, tasks, ¤tTask->deps[i], sizeof(jobid), taskdep); if(taskdep) { taskdeps[j] = &taskdep->task; j ++; } } starpu_task_declare_deps_array(¤tTask->task, j, taskdeps); } if (!(currentTask->iteration == -1)) starpu_iteration_push(currentTask->iteration); applySchedRec(¤tTask->task, currentTask->submit_order); if (currentTask->submit_order == -1) currentTask->task.no_submitorder = 1; int ret_val = starpu_task_submit(¤tTask->task); if (!(currentTask->iteration == -1)) starpu_iteration_pop(); if (ret_val != 0) { fprintf(stderr, "\nWhile submitting task %ld (%s): return %d\n", currentTask->submit_order, currentTask->task.name? currentTask->task.name : "unknown", ret_val); return -1; } //fprintf(stderr, "submitting task %s (%lu, %llu)\n", currentTask->task.name?currentTask->task.name:"anonymous", currentTask->jobid, (unsigned long long) currentTask->task.tag_id); if (!(currentTask->submit_order % 1000)) { fprintf(stderr, "\rSubmitted task order %ld...", currentTask->submit_order); fflush(stdout); } if (currentTask->submit_order != -1) last_submitorder++; } else { fix_wontuse_handle(currentTask); /* Add the handle in the wontuse task */ if (currentTask->task.handles[0]) { starpu_data_wont_use(currentTask->task.handles[0]); last_submitorder++; } } currentNode = starpu_rbtree_next(currentNode); } fprintf(stderr, " done.\n"); return 1; } /* * * * * * * * * * * * * * * */ /* * * * * * MAIN * * * * * * */ /* * * * * * * * * * * * * * */ static void usage(const char *program) { fprintf(stderr,"Usage: %s [--static-workerid] tasks.rec [sched.rec]\n", program); exit(EXIT_FAILURE); } int main(int argc, char **argv) { FILE *rec; char *s; const char *tasks_rec = NULL; const char *sched_rec = NULL; unsigned i; size_t s_allocated = 128; unsigned long nread_tasks = 0; /* FIXME: we do not support data with sequential consistency disabled */ _STARPU_MALLOC(s, s_allocated); dependson_size = REPLAY_NMAX_DEPENDENCIES; /* Change the value of REPLAY_NMAX_DEPENCIES to modify the number of dependencies */ _STARPU_MALLOC(dependson, dependson_size * sizeof (* dependson)); alloc_mode = 1; for (i = 1; i < (unsigned) argc; i++) { if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) { usage(argv[0]); } else if (!strcmp(argv[i], "--static-workerid")) { static_workerid = 1; } else { if (!tasks_rec) tasks_rec = argv[i]; else if (!sched_rec) sched_rec = argv[i]; else usage(argv[0]); } } if (!tasks_rec) usage(argv[0]); if (sched_rec) schedRecInit(sched_rec); rec = fopen(tasks_rec, "r"); if (!rec) { fprintf(stderr,"unable to open file %s: %s\n", tasks_rec, strerror(errno)); exit(EXIT_FAILURE); } int ret = starpu_init(NULL); if (ret == -ENODEV) goto enodev; /* Read line by line, and on empty line submit the task with the accumulated information */ reset(); double start = starpu_timing_now(); int linenum = 0; while(1) { char *ln; if (!fgets(s, s_allocated, rec)) { fprintf(stderr, " done.\n"); int submitted = submit_tasks(); if (submitted == -1) { goto enodev; } goto eof; } while (!(ln = strchr(s, '\n'))) { /* fprintf(stderr,"buffer size %d too small, doubling it\n", s_allocated); */ _STARPU_REALLOC(s, s_allocated * 2); if (!fgets(s + s_allocated-1, s_allocated+1, rec)) { fprintf(stderr, "\n"); int submitted = submit_tasks(); if (submitted == -1) { goto enodev; } goto eof; } s_allocated *= 2; } linenum++; if (ln == s) { /* Empty line, do task */ struct task * task; _STARPU_MALLOC(task, sizeof(*task)); starpu_task_init(&task->task); task->deps = NULL; task->submit_order = submitorder; starpu_rbtree_node_init(&task->node); starpu_rbtree_insert(&tree, &task->node, diff); task->jobid = jobid; task->iteration = iteration; if (name != NULL) task->task.name = strdup(name); task->type = control; if (control == NormalTask) { if (workerid >= 0) { task->task.priority = priority; task->task.cl = &cl; if (static_workerid) { task->task.workerid = workerid; task->task.execute_on_a_specific_worker = 1; } if (alloc_mode) { /* Duplicating the handles stored (and registered in the current context) into the task */ ARRAY_DUP(modes_ptr, task->task.modes, nb_parameters); ARRAY_DUP(modes_ptr, task->task.cl->modes, nb_parameters); variable_data_register_check(sizes_set, nb_parameters); ARRAY_DUP(handles_ptr, task->task.handles, nb_parameters); } else { task->task.dyn_modes = modes_ptr; _STARPU_MALLOC(task->task.cl->dyn_modes, (sizeof(*task->task.cl->dyn_modes) * nb_parameters)); ARRAY_DUP(modes_ptr, task->task.cl->dyn_modes, nb_parameters); variable_data_register_check(sizes_set, nb_parameters); task->task.dyn_handles = handles_ptr; } task->task.nbuffers = nb_parameters; struct perfmodel * realmodel; HASH_FIND_STR(model_hash, model, realmodel); if (realmodel == NULL) { int len = strlen(model); _STARPU_CALLOC(realmodel, 1, sizeof(struct perfmodel)); _STARPU_MALLOC(realmodel->model_name, sizeof(char) * (len+1)); realmodel->model_name = strcpy(realmodel->model_name, model); starpu_perfmodel_init(&realmodel->perfmodel); int error = starpu_perfmodel_load_symbol(model, &realmodel->perfmodel); if (!error) { HASH_ADD_STR(model_hash, model_name, realmodel); } else { fprintf(stderr, "[starpu][Warning] Error loading perfmodel symbol %s\n", model); fprintf(stderr, "[starpu][Warning] Taking only measurements from the given execution, and forcing execution on worker %d\n", workerid); starpu_perfmodel_unload_model(&realmodel->perfmodel); free(realmodel->model_name); free(realmodel); realmodel = NULL; } } struct starpu_perfmodel_arch *arch = starpu_worker_get_perf_archtype(workerid, 0); unsigned comb = starpu_perfmodel_arch_comb_add(arch->ndevices, arch->devices); unsigned narch = starpu_perfmodel_get_narch_combs(); struct task_arg *arg; _STARPU_MALLOC(arg, sizeof(struct task_arg) + sizeof(double) * narch); arg->footprint = footprint; arg->narch = narch; double * perfTime = arg->perf; if (realmodel == NULL) { /* Erf, do without perfmodel, for execution there */ task->task.workerid = workerid; task->task.execute_on_a_specific_worker = 1; for (i = 0; i < narch ; i++) { if (i == comb) perfTime[i] = endTime - startTime; else perfTime[i] = NAN; } } else { int one = 0; for (i = 0; i < narch ; i++) { arch = starpu_perfmodel_arch_comb_fetch(i); perfTime[i] = starpu_perfmodel_history_based_expected_perf(&realmodel->perfmodel, arch, footprint); if (!(perfTime[i] == 0 || isnan(perfTime[i]))) one = 1; } if (!one) { fprintf(stderr, "We do not have any performance measurement for symbol '%s' for footprint %x, we can not execute this", model, footprint); exit(EXIT_FAILURE); } } task->task.cl_arg = arg; task->task.flops = flops; total_flops += flops; } task->task.cl_arg_size = 0; task->task.tag_id = tag; task->task.use_tag = 1; task->ndependson = ndependson; if (ndependson > 0) { _STARPU_MALLOC(task->deps, ndependson * sizeof (* task->deps)); ARRAY_DUP(dependson, task->deps, ndependson); } } else { STARPU_ASSERT(nb_parameters == 1); task->reg_signal = reg_signal[0]; ARRAY_DUP(handles_ptr, task->task.handles, nb_parameters); } /* Add this task to task hash */ HASH_ADD(hh, tasks, jobid, sizeof(jobid), task); nread_tasks++; if (!(nread_tasks % 1000)) { fprintf(stderr, "\rRead task %lu...", nread_tasks); fflush(stdout); } reset(); } /* Record various information */ #define TEST(field) (!strncmp(s, field": ", strlen(field) + 2)) else if(TEST("Control")) { char * c = s+9; if(!strncmp(c, "WontUse", 7)) { control = WontUseTask; nb_parameters = 1; alloc_mode = set_alloc_mode(nb_parameters); arrays_managing(alloc_mode); } else control = NormalTask; } else if (TEST("Name")) { *ln = 0; name = strdup(s+6); } else if (TEST("Model")) { *ln = 0; model = strdup(s+7); } else if (TEST("JobId")) jobid = atol(s+7); else if(TEST("SubmitOrder")) submitorder = atoi(s+13); else if (TEST("DependsOn")) { char *c = s + 11; for (ndependson = 0; *c != '\n'; ndependson++) { if (ndependson >= dependson_size) { dependson_size *= 2; _STARPU_REALLOC(dependson, dependson_size * sizeof(*dependson)); } dependson[ndependson] = strtol(c, &c, 10); } } else if (TEST("Tag")) { tag = strtol(s+5, NULL, 16); } else if (TEST("WorkerId")) { workerid = atoi(s+10); } else if (TEST("Footprint")) { footprint = strtoul(s+11, NULL, 16); } else if (TEST("Parameters")) { /* Nothing to do */ } else if (TEST("Handles")) { *ln = 0; char *buffer = s + 9; const char *delim = " "; unsigned nb_parameters_line = count_number_tokens(buffer, delim); if(nb_parameters == 0) { nb_parameters = nb_parameters_line; arrays_managing(set_alloc_mode(nb_parameters)); } else STARPU_ASSERT(nb_parameters == nb_parameters_line); char* token = strtok(buffer, delim); for (i = 0 ; i < nb_parameters ; i++) { STARPU_ASSERT(token); struct handle *handles_cell; /* A cell of the hash table for the handles */ starpu_data_handle_t handle_value = (starpu_data_handle_t) strtol(token, NULL, 16); /* Get the ith handle on the line (in the file) */ HASH_FIND(hh, handles_hash, &handle_value, sizeof(handle_value), handles_cell); /* Find if the handle_value was already registered as a key in the hash table */ /* If it wasn't, then add it to the hash table */ if (handles_cell == NULL) { /* Hide the initial handle from the file into the handles array to find it when necessary */ handles_ptr[i] = handle_value; reg_signal[i] = 1; } else { handles_ptr[i] = handles_cell->mem_ptr; reg_signal[i] = 0; } token = strtok(NULL, delim); } } else if (TEST("Modes")) { *ln = 0; char * buffer = s + 7; unsigned mode_i = 0; const char * delim = " "; unsigned nb_parameters_line = count_number_tokens(buffer, delim); if(nb_parameters == 0) { nb_parameters = nb_parameters_line; arrays_managing(set_alloc_mode(nb_parameters)); } else STARPU_ASSERT(nb_parameters == nb_parameters_line); char* token = strtok(buffer, delim); while (token != NULL && mode_i < nb_parameters) { /* Subject to the names of starpu modes enumerator are not modified */ if (!strncmp(token, "RW", 2)) { *(modes_ptr+mode_i) = STARPU_RW; mode_i++; } else if (!strncmp(token, "R", 1)) { *(modes_ptr+mode_i) = STARPU_R; mode_i++; } else if (!strncmp(token, "W", 1)) { *(modes_ptr+mode_i) = STARPU_W; mode_i++; } /* Other cases produce a warning*/ else { fprintf(stderr, "[Warning] A mode is different from R/W (jobid task : %lu)", jobid); } token = strtok(NULL, delim); } } else if (TEST("Sizes")) { *ln = 0; char * buffer = s + 7; const char * delim = " "; unsigned nb_parameters_line = count_number_tokens(buffer, delim); unsigned k = 0; if(nb_parameters == 0) { nb_parameters = nb_parameters_line; arrays_managing(set_alloc_mode(nb_parameters)); } else STARPU_ASSERT(nb_parameters == nb_parameters_line); _STARPU_MALLOC(sizes_set, nb_parameters * sizeof(size_t)); char * token = strtok(buffer, delim); while (token != NULL && k < nb_parameters) { sizes_set[k] = strtol(token, NULL, 10); token = strtok(NULL, delim); k++; } } else if (TEST("StartTime")) { startTime = strtod(s+11, NULL); } else if (TEST("EndTime")) { endTime = strtod(s+9, NULL); } else if (TEST("GFlop")) { flops = 1000000000 * strtod(s+7, NULL); } else if (TEST("Iteration")) { iteration = (unsigned) strtol(s+11, NULL, 10); } else if (TEST("Priority")) { priority = strtol(s + 10, NULL, 10); } } eof: starpu_task_wait_for_all(); fprintf(stderr, " done.\n"); printf("%g ms", (starpu_timing_now() - start) / 1000.); if (total_flops != 0.) printf("\t%g GF/s", (total_flops / (starpu_timing_now() - start)) / 1000.); printf("\n"); /* FREE allocated memory */ free(dependson); free(s); /* End of FREE */ struct handle *handle=NULL, *handletmp=NULL; HASH_ITER(hh, handles_hash, handle, handletmp) { starpu_data_unregister(handle->mem_ptr); HASH_DEL(handles_hash, handle); free(handle); } struct perfmodel *model_s=NULL, *modeltmp=NULL; HASH_ITER(hh, model_hash, model_s, modeltmp) { starpu_perfmodel_unload_model(&model_s->perfmodel); HASH_DEL(model_hash, model_s); free(model_s->model_name); free(model_s); } struct task *task=NULL, *tasktmp=NULL; HASH_ITER(hh, tasks, task, tasktmp) { free(task->task.cl_arg); free((char*)task->task.name); if (task->task.dyn_handles != NULL) { free(task->task.dyn_handles); free(task->task.dyn_modes); } HASH_DEL(tasks, task); starpu_task_clean(&task->task); free(task->deps); starpu_rbtree_remove(&tree, &task->node); free(task); } starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/mpi/tools/starpu_replay_sched.c000066400000000000000000000245601507764646700221500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Erwan Leria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This reads a sched.rec file and mangles submitted tasks according to the hint * from that file. */ #include #include #include #include #include #include #include #include // // sched.rec files look like this: // // SubmitOrder: 1234 // Priority: 12 // SpecificWorker: 1 // Workers: 0 1 2 // DependsOn: 1235 // // Prefetch: 1234 // DependsOn: 1233 // MemoryNode: 1 // Parameters: 1 #define CPY(src, dst, n) memcpy(dst, src, n * sizeof(*dst)) #if 0 #define debug(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) #else #define debug(fmt, ...) (void)0 #endif static unsigned long submitorder; /* Also use as prefetchtag */ static int priority; static int eosw; static unsigned workerorder; static int memnode; /* FIXME: MAXs */ static uint32_t workers[STARPU_NMAXWORKERS/32]; static unsigned nworkers; static unsigned dependson[STARPU_NMAXBUFS]; static unsigned ndependson; static unsigned params[STARPU_NMAXBUFS]; static unsigned nparams; static enum sched_type { NormalTask, PrefetchTask, } sched_type; static struct starpu_codelet cl_prefetch = { .where = STARPU_NOWHERE, .nbuffers = 1, .modes = { STARPU_R }, }; static struct task { UT_hash_handle hh; unsigned long submitorder; int priority; int memnode; unsigned dependson[STARPU_NMAXBUFS]; unsigned ndependson; struct starpu_task *depends_tasks[STARPU_NMAXBUFS]; /* For real tasks */ int eosw; unsigned workerorder; uint32_t workers[STARPU_NMAXWORKERS/32]; unsigned nworkers; /* For prefetch tasks */ unsigned params[STARPU_NMAXBUFS]; unsigned nparams; struct starpu_task *pref_task; /* Actual prefetch task */ } *mangled_tasks, *prefetch_tasks; LIST_TYPE(dep, struct task *task; unsigned i; ); struct deps { UT_hash_handle hh; unsigned long submitorder; struct dep_list list; } *dependencies = NULL; static void reset(void) { submitorder = 0; priority = INT_MIN; eosw = -1; memset(&workers, 0, sizeof(workers)); nworkers = 0; ndependson = 0; sched_type = NormalTask; nparams = 0; memnode = -1; workerorder = 0; } /* TODO : respecter l'ordre de soumission des tâches SubmitOrder */ static void checkField(char * s) { /* Record various information */ #define TEST(field) (!strncmp(s, field": ", strlen(field) + 2)) if (TEST("SubmitOrder")) { s = s + strlen("SubmitOrder: "); submitorder = strtol(s, NULL, 10); } else if (TEST("Priority")) { s = s + strlen("Priority: "); priority = strtol(s, NULL, 10); } else if (TEST("SpecificWorker")) { s = s + strlen("SpecificWorker: "); eosw = strtol(s, NULL, 10); } else if (TEST("Workers")) { s = s + strlen("Workers: "); char * delim = " "; char * token = strtok(s, delim); int i = 0; while (token != NULL) { int k = strtol(token, NULL, 10); STARPU_ASSERT_MSG(k < STARPU_NMAXWORKERS, "%d is bigger than maximum %d\n", k, STARPU_NMAXWORKERS); workers[k/(sizeof(*workers)*8)] |= (1 << (k%(sizeof(*workers)*8))); i++; token = strtok(NULL, delim); } nworkers = i; } else if (TEST("DependsOn")) { /* NOTE : dependsons (in the sched.rec) should be the submit orders of the dependencies, otherwise it can occur an undefined behaviour (contrary to the tasks.rec where dependencies are jobids */ unsigned i = 0; char * delim = " "; char * token = strtok(s+strlen("DependsOn: "), delim); while (token != NULL) { dependson[i] = strtol(token, NULL, 10); i++; token = strtok(NULL, delim); } ndependson = i; } else if (TEST("Prefetch")) { s = s + strlen("Prefetch: "); submitorder = strtol(s, NULL, 10); sched_type = PrefetchTask; } else if (TEST("Parameters")) { s = s + strlen("Parameters: "); char * delim = " "; char * token = strtok(s, delim); int i = 0; while (token != NULL) { params[i] = strtol(token, NULL, 10); i++; token = strtok(NULL, delim); } nparams = i; } else if (TEST("MemoryNode")) { s = s + strlen("MemoryNode: "); memnode = strtol(s, NULL, 10); } else if (TEST("Workerorder")) { s = s + strlen("Workerorder: "); workerorder = strtol(s, NULL, 10); } } void schedRecInit(const char * filename) { FILE * f = fopen(filename, "r"); if(f == NULL) { fprintf(stderr,"unable to open file %s: %s\n", filename, strerror(errno)); return; } size_t lnsize = 128; char *s; _STARPU_MALLOC(s, sizeof(*s) * lnsize); int eof = 0; reset(); while(!eof && !feof(f)) { char *ln; /* Get the line */ if (!fgets(s, lnsize, f)) { eof = 1; } while (!(ln = strchr(s, '\n'))) { _STARPU_REALLOC(s, lnsize * 2); if (!fgets(s + lnsize-1, lnsize+1, f)) { eof = 1; break; } lnsize *= 2; } if ((ln == s || eof) && submitorder) { /* Empty line, doit */ struct task * task; unsigned i; _STARPU_MALLOC(task, sizeof(*task)); task->submitorder = submitorder; task->priority = priority; task->memnode = memnode; CPY(dependson, task->dependson, ndependson); task->ndependson = ndependson; /* Also record submitorder of tasks that this one will need to depend on */ for (i = 0; i < ndependson; i++) { struct dep *dep; struct starpu_task *starpu_task; _STARPU_MALLOC(dep, sizeof(*dep)); dep->task = task; dep->i = i; struct deps *deps; HASH_FIND(hh, dependencies, &task->dependson[i], sizeof(submitorder), deps); if (!deps) { /* No task depends on this one yet, add a cell for it */ _STARPU_MALLOC(deps, sizeof(*deps)); dep_list_init(&deps->list); deps->submitorder = task->dependson[i]; HASH_ADD(hh, dependencies, submitorder, sizeof(submitorder), deps); } dep_list_push_back(&deps->list, dep); /* Create the intermediate task */ starpu_task = dep->task->depends_tasks[i] = starpu_task_create(); starpu_task->cl = NULL; starpu_task->destroy = 0; starpu_task->no_submitorder = 1; } switch (sched_type) { case NormalTask: /* A new task to mangle, record what needs to be done */ task->eosw = eosw; task->workerorder = workerorder; CPY(workers, task->workers, STARPU_NMAXWORKERS/32); task->nworkers = nworkers; STARPU_ASSERT(nparams == 0); debug("adding mangled task %lu\n", submitorder); HASH_ADD(hh, mangled_tasks, submitorder, sizeof(submitorder), task); break; case PrefetchTask: STARPU_ASSERT(memnode >= 0); STARPU_ASSERT(eosw == -1); STARPU_ASSERT(workerorder == 0); STARPU_ASSERT(nworkers == 0); CPY(params, task->params, nparams); task->nparams = nparams; /* TODO: more params */ STARPU_ASSERT_MSG(nparams == 1, "only supports one parameter at a time"); debug("adding prefetch task for %lu\n", submitorder); HASH_ADD(hh, prefetch_tasks, submitorder, sizeof(submitorder), task); break; default: STARPU_ASSERT(0); break; } reset(); } else checkField(s); } fclose(f); free(s); } static void do_prefetch(void *arg) { unsigned node = (uintptr_t) arg; starpu_data_idle_prefetch_on_node(starpu_task_get_current()->handles[0], node, 1); } void applySchedRec(struct starpu_task *starpu_task, unsigned long submit_order) { struct task *task; struct deps *deps; int ret; HASH_FIND(hh, dependencies, &submit_order, sizeof(submit_order), deps); if (deps) { struct dep *dep; for (dep = dep_list_begin(&deps->list); dep != dep_list_end(&deps->list); dep = dep_list_next(dep)) { debug("task %lu is %d-th dep for %lu\n", submit_order, dep->i, dep->task->submitorder); /* Some task will depend on this one, make the dependency */ starpu_task_declare_deps_array(dep->task->depends_tasks[dep->i], 1, &starpu_task); ret = starpu_task_submit(dep->task->depends_tasks[dep->i]); STARPU_ASSERT(ret == 0); } } HASH_FIND(hh, prefetch_tasks, &submit_order, sizeof(submit_order), task); if (task) { /* We want to submit a prefetch for this task */ debug("task %lu has a prefetch for parameter %d to node %d\n", submit_order, task->params[0], task->memnode); struct starpu_task *pref_task; pref_task = task->pref_task = starpu_task_create(); pref_task->cl = &cl_prefetch; pref_task->destroy = 1; pref_task->no_submitorder = 1; pref_task->callback_arg = (void*)(uintptr_t) task->memnode; pref_task->callback_func = do_prefetch; /* TODO: more params */ pref_task->handles[0] = starpu_task->handles[task->params[0]]; /* Make it depend on intermediate tasks */ if (task->ndependson) { debug("%u dependencies\n", task->ndependson); starpu_task_declare_deps_array(pref_task, task->ndependson, task->depends_tasks); } ret = starpu_task_submit(pref_task); STARPU_ASSERT(ret == 0); } HASH_FIND(hh, mangled_tasks, &submit_order, sizeof(submit_order), task); if (task == NULL) /* Nothing to do for this */ return; debug("mangling task %lu\n", submit_order); if (task->eosw >= 0) { debug("execute on a specific worker %d\n", task->eosw); starpu_task->workerid = task->eosw; starpu_task->execute_on_a_specific_worker = 1; } if (task->workerorder > 0) { debug("workerorder %d\n", task->workerorder); starpu_task->workerorder = task->workerorder; } if (task->priority != INT_MIN) { debug("priority %d\n", task->priority); starpu_task->priority = task->priority; } if (task->nworkers) { debug("%u workers %x\n", task->nworkers, task->workers[0]); starpu_task->workerids_len = sizeof(task->workers) / sizeof(task->workers[0]); _STARPU_MALLOC(starpu_task->workerids, task->nworkers * sizeof(*starpu_task->workerids)); CPY(task->workers, starpu_task->workerids, STARPU_NMAXWORKERS/32); } if (task->ndependson) { debug("%u dependencies\n", task->ndependson); starpu_task_declare_deps_array(starpu_task, task->ndependson, task->depends_tasks); } /* And now, let it go! */ } starpu-1.4.9+dfsg/packages/000077500000000000000000000000001507764646700155665ustar00rootroot00000000000000starpu-1.4.9+dfsg/packages/libstarpu.pc.in000066400000000000000000000023651507764646700205320ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ pkglibdir=@pkglibdir@ includedir=@includedir@ starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Name: starpu Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API -DSTARPU_USE_DEPRECATED_ONE_ZERO_API Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ Requires: @HWLOC_REQUIRES@ starpu-1.4.9+dfsg/packages/starpu-1.0.pc.in000066400000000000000000000023311507764646700203300ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ pkglibdir=@pkglibdir@ includedir=@includedir@ starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Name: starpu Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ Requires: @HWLOC_REQUIRES@ starpu-1.4.9+dfsg/packages/starpu-1.1.pc.in000066400000000000000000000022641507764646700203360ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ pkglibdir=@pkglibdir@ includedir=@includedir@ starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Name: starpu Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ Requires: @HWLOC_REQUIRES@ starpu-1.4.9+dfsg/packages/starpu-1.2.pc.in000066400000000000000000000022641507764646700203370ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ pkglibdir=@pkglibdir@ includedir=@includedir@ starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Name: starpu Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ Requires: @HWLOC_REQUIRES@ starpu-1.4.9+dfsg/packages/starpu-1.3.in000066400000000000000000000034211507764646700177330ustar00rootroot00000000000000#%Module # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # proc ModulesHelp { } { puts stderr "\t[module-info name] - loads the StarPU @STARPU_EFFECTIVE_VERSION@ environment" puts stderr "\tThe following environment variables are modified:" puts stderr "\t$PATH" puts stderr "\t$LD_LIBRARY_PATH" puts stderr "\t$LIBRARY_PATH" puts stderr "\t$INCLUDE" puts stderr "\t$CPATH" puts stderr "\t$PKG_CONFIG_PATH" puts stderr "\t$MANPATH" } set prefix @prefix@ conflict starpu module-whatis "loads the StarPU @STARPU_EFFECTIVE_VERSION@ environment" if {![file exists $prefix]} { puts stderr "\t[module-info name] Load Error: $prefix does not exist" break exit 1 } set exec_prefix @exec_prefix@ set libdir @libdir@ set datarootdir @datarootdir@ prepend-path PATH @bindir@ prepend-path LD_LIBRARY_PATH @libdir@ prepend-path LIBRARY_PATH @libdir@ prepend-path INCLUDE @includedir@/starpu/@STARPU_EFFECTIVE_VERSION@ prepend-path CPATH @includedir@/starpu/@STARPU_EFFECTIVE_VERSION@ prepend-path PKG_CONFIG_PATH @libdir@/pkgconfig prepend-path MANPATH @mandir@ prepend-path PYTHONPATH @libdir@/python@PYTHON_VERSION@/site-packages starpu-1.4.9+dfsg/packages/starpu-1.3.pc.in000066400000000000000000000022641507764646700203400ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ pkglibdir=@pkglibdir@ includedir=@includedir@ starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Name: starpu Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ Requires: @HWLOC_REQUIRES@ starpu-1.4.9+dfsg/packages/starpu-1.4.in000066400000000000000000000034211507764646700177340ustar00rootroot00000000000000#%Module # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # proc ModulesHelp { } { puts stderr "\t[module-info name] - loads the StarPU @STARPU_EFFECTIVE_VERSION@ environment" puts stderr "\tThe following environment variables are modified:" puts stderr "\t$PATH" puts stderr "\t$LD_LIBRARY_PATH" puts stderr "\t$LIBRARY_PATH" puts stderr "\t$INCLUDE" puts stderr "\t$CPATH" puts stderr "\t$PKG_CONFIG_PATH" puts stderr "\t$MANPATH" } set prefix @prefix@ conflict starpu module-whatis "loads the StarPU @STARPU_EFFECTIVE_VERSION@ environment" if {![file exists $prefix]} { puts stderr "\t[module-info name] Load Error: $prefix does not exist" break exit 1 } set exec_prefix @exec_prefix@ set libdir @libdir@ set datarootdir @datarootdir@ prepend-path PATH @bindir@ prepend-path LD_LIBRARY_PATH @libdir@ prepend-path LIBRARY_PATH @libdir@ prepend-path INCLUDE @includedir@/starpu/@STARPU_EFFECTIVE_VERSION@ prepend-path CPATH @includedir@/starpu/@STARPU_EFFECTIVE_VERSION@ prepend-path PKG_CONFIG_PATH @libdir@/pkgconfig prepend-path MANPATH @mandir@ prepend-path PYTHONPATH @libdir@/python@PYTHON_VERSION@/site-packages starpu-1.4.9+dfsg/packages/starpu-1.4.pc.in000066400000000000000000000022641507764646700203410ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ pkglibdir=@pkglibdir@ includedir=@includedir@ starpu_includedir=${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ Name: starpu Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${starpu_includedir} @STARPU_H_CPPFLAGS@ Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_EXPORTED_LIBS@ @STARPU_SC_HYPERVISOR@ Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@ Requires: @HWLOC_REQUIRES@ starpu-1.4.9+dfsg/sc_hypervisor/000077500000000000000000000000001507764646700167075ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/Makefile.am000066400000000000000000000017631507764646700207520ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-subdirtests.mk SUBDIRS = src examples versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = include/sc_hypervisor.h \ include/sc_hypervisor_config.h \ include/sc_hypervisor_monitoring.h \ include/sc_hypervisor_policy.h \ include/sc_hypervisor_lp.h starpu-1.4.9+dfsg/sc_hypervisor/Makefile.in000066400000000000000000000734201507764646700207620ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = sc_hypervisor ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(versinclude_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(versincludedir)" HEADERS = $(versinclude_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-subdirtests.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = src examples versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = include/sc_hypervisor.h \ include/sc_hypervisor_config.h \ include/sc_hypervisor_monitoring.h \ include/sc_hypervisor_policy.h \ include/sc_hypervisor_lp.h all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sc_hypervisor/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign sc_hypervisor/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-versincludeHEADERS: $(versinclude_HEADERS) @$(NORMAL_INSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ done uninstall-versincludeHEADERS: @$(NORMAL_UNINSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(HEADERS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(versincludedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-versincludeHEADERS install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-versincludeHEADERS .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip install-versincludeHEADERS installcheck \ installcheck-am installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ uninstall-am uninstall-versincludeHEADERS .PRECIOUS: Makefile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/sc_hypervisor/examples/000077500000000000000000000000001507764646700205255ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/examples/Makefile.am000066400000000000000000000035501507764646700225640ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk AM_CFLAGS += $(MAGMA_CFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/examples -I$(top_builddir)/include -I$(top_srcdir)/sc_hypervisor/include -I$(top_srcdir)/sc_hypervisor/examples $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la $(STARPU_EXPORTED_LIBS) LIBS += $(STARPU_CUDA_LDFLAGS) noinst_PROGRAMS = \ app_driven_test/app_driven_test \ lp_test/lp_test \ lp_test/lp_resize_test \ hierarchical_ctxs/resize_hierarchical_ctxs if !STARPU_NO_BLAS_LIB noinst_PROGRAMS += \ cholesky/cholesky_implicit noinst_HEADERS = \ cholesky/cholesky.h \ sched_ctx_utils/sched_ctx_utils.h endif if !STARPU_NO_BLAS_LIB cholesky_cholesky_implicit_SOURCES = \ cholesky/cholesky_implicit.c \ cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c \ sched_ctx_utils/sched_ctx_utils.c \ ../../examples/common/blas.c cholesky_cholesky_implicit_LDADD = \ $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la \ $(STARPU_BLAS_LDFLAGS) endif app_driven_test_app_driven_test_LDADD = \ $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la starpu-1.4.9+dfsg/sc_hypervisor/examples/Makefile.in000066400000000000000000001254131507764646700226000ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = app_driven_test/app_driven_test$(EXEEXT) \ lp_test/lp_test$(EXEEXT) lp_test/lp_resize_test$(EXEEXT) \ hierarchical_ctxs/resize_hierarchical_ctxs$(EXEEXT) \ $(am__EXEEXT_1) @STARPU_NO_BLAS_LIB_FALSE@am__append_3 = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit subdir = sc_hypervisor/examples ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = @STARPU_NO_BLAS_LIB_FALSE@am__EXEEXT_1 = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit$(EXEEXT) PROGRAMS = $(noinst_PROGRAMS) app_driven_test_app_driven_test_SOURCES = \ app_driven_test/app_driven_test.c am__dirstamp = $(am__leading_dot)dirstamp app_driven_test_app_driven_test_OBJECTS = \ app_driven_test/app_driven_test.$(OBJEXT) app_driven_test_app_driven_test_DEPENDENCIES = \ $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = am__cholesky_cholesky_implicit_SOURCES_DIST = \ cholesky/cholesky_implicit.c cholesky/cholesky_models.c \ cholesky/cholesky_kernels.c sched_ctx_utils/sched_ctx_utils.c \ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@am_cholesky_cholesky_implicit_OBJECTS = cholesky/cholesky_implicit.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.$(OBJEXT) \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.$(OBJEXT) cholesky_cholesky_implicit_OBJECTS = \ $(am_cholesky_cholesky_implicit_OBJECTS) am__DEPENDENCIES_1 = @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_DEPENDENCIES = $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la \ @STARPU_NO_BLAS_LIB_FALSE@ $(am__DEPENDENCIES_1) hierarchical_ctxs_resize_hierarchical_ctxs_SOURCES = \ hierarchical_ctxs/resize_hierarchical_ctxs.c hierarchical_ctxs_resize_hierarchical_ctxs_OBJECTS = \ hierarchical_ctxs/resize_hierarchical_ctxs.$(OBJEXT) hierarchical_ctxs_resize_hierarchical_ctxs_LDADD = $(LDADD) lp_test_lp_resize_test_SOURCES = lp_test/lp_resize_test.c lp_test_lp_resize_test_OBJECTS = lp_test/lp_resize_test.$(OBJEXT) lp_test_lp_resize_test_LDADD = $(LDADD) lp_test_lp_test_SOURCES = lp_test/lp_test.c lp_test_lp_test_OBJECTS = lp_test/lp_test.$(OBJEXT) lp_test_lp_test_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ../../examples/common/$(DEPDIR)/blas.Po \ app_driven_test/$(DEPDIR)/app_driven_test.Po \ cholesky/$(DEPDIR)/cholesky_implicit.Po \ cholesky/$(DEPDIR)/cholesky_kernels.Po \ cholesky/$(DEPDIR)/cholesky_models.Po \ hierarchical_ctxs/$(DEPDIR)/resize_hierarchical_ctxs.Po \ lp_test/$(DEPDIR)/lp_resize_test.Po \ lp_test/$(DEPDIR)/lp_test.Po \ sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = app_driven_test/app_driven_test.c \ $(cholesky_cholesky_implicit_SOURCES) \ hierarchical_ctxs/resize_hierarchical_ctxs.c \ lp_test/lp_resize_test.c lp_test/lp_test.c DIST_SOURCES = app_driven_test/app_driven_test.c \ $(am__cholesky_cholesky_implicit_SOURCES_DIST) \ hierarchical_ctxs/resize_hierarchical_ctxs.c \ lp_test/lp_resize_test.c lp_test/lp_test.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__noinst_HEADERS_DIST = cholesky/cholesky.h \ sched_ctx_utils/sched_ctx_utils.h HEADERS = $(noinst_HEADERS) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la \ $(STARPU_EXPORTED_LIBS) $(STARPU_CUDA_LDFLAGS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = LAUNCHER = # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/examples -I$(top_builddir)/include -I$(top_srcdir)/sc_hypervisor/include -I$(top_srcdir)/sc_hypervisor/examples $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ @STARPU_NO_BLAS_LIB_FALSE@noinst_HEADERS = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky.h \ @STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.h @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_SOURCES = \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_implicit.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_models.c \ @STARPU_NO_BLAS_LIB_FALSE@ cholesky/cholesky_kernels.c \ @STARPU_NO_BLAS_LIB_FALSE@ sched_ctx_utils/sched_ctx_utils.c \ @STARPU_NO_BLAS_LIB_FALSE@ ../../examples/common/blas.c @STARPU_NO_BLAS_LIB_FALSE@cholesky_cholesky_implicit_LDADD = \ @STARPU_NO_BLAS_LIB_FALSE@ $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la \ @STARPU_NO_BLAS_LIB_FALSE@ $(STARPU_BLAS_LDFLAGS) app_driven_test_app_driven_test_LDADD = \ $(top_builddir)/sc_hypervisor/src/libsc_hypervisor.la all: all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sc_hypervisor/examples/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign sc_hypervisor/examples/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list app_driven_test/$(am__dirstamp): @$(MKDIR_P) app_driven_test @: > app_driven_test/$(am__dirstamp) app_driven_test/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) app_driven_test/$(DEPDIR) @: > app_driven_test/$(DEPDIR)/$(am__dirstamp) app_driven_test/app_driven_test.$(OBJEXT): \ app_driven_test/$(am__dirstamp) \ app_driven_test/$(DEPDIR)/$(am__dirstamp) app_driven_test/app_driven_test$(EXEEXT): $(app_driven_test_app_driven_test_OBJECTS) $(app_driven_test_app_driven_test_DEPENDENCIES) $(EXTRA_app_driven_test_app_driven_test_DEPENDENCIES) app_driven_test/$(am__dirstamp) @rm -f app_driven_test/app_driven_test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(app_driven_test_app_driven_test_OBJECTS) $(app_driven_test_app_driven_test_LDADD) $(LIBS) cholesky/$(am__dirstamp): @$(MKDIR_P) cholesky @: > cholesky/$(am__dirstamp) cholesky/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) cholesky/$(DEPDIR) @: > cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_implicit.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_models.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_kernels.$(OBJEXT): cholesky/$(am__dirstamp) \ cholesky/$(DEPDIR)/$(am__dirstamp) sched_ctx_utils/$(am__dirstamp): @$(MKDIR_P) sched_ctx_utils @: > sched_ctx_utils/$(am__dirstamp) sched_ctx_utils/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) sched_ctx_utils/$(DEPDIR) @: > sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) sched_ctx_utils/sched_ctx_utils.$(OBJEXT): \ sched_ctx_utils/$(am__dirstamp) \ sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) ../../examples/common/$(am__dirstamp): @$(MKDIR_P) ../../examples/common @: > ../../examples/common/$(am__dirstamp) ../../examples/common/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) ../../examples/common/$(DEPDIR) @: > ../../examples/common/$(DEPDIR)/$(am__dirstamp) ../../examples/common/blas.$(OBJEXT): \ ../../examples/common/$(am__dirstamp) \ ../../examples/common/$(DEPDIR)/$(am__dirstamp) cholesky/cholesky_implicit$(EXEEXT): $(cholesky_cholesky_implicit_OBJECTS) $(cholesky_cholesky_implicit_DEPENDENCIES) $(EXTRA_cholesky_cholesky_implicit_DEPENDENCIES) cholesky/$(am__dirstamp) @rm -f cholesky/cholesky_implicit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(cholesky_cholesky_implicit_OBJECTS) $(cholesky_cholesky_implicit_LDADD) $(LIBS) hierarchical_ctxs/$(am__dirstamp): @$(MKDIR_P) hierarchical_ctxs @: > hierarchical_ctxs/$(am__dirstamp) hierarchical_ctxs/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) hierarchical_ctxs/$(DEPDIR) @: > hierarchical_ctxs/$(DEPDIR)/$(am__dirstamp) hierarchical_ctxs/resize_hierarchical_ctxs.$(OBJEXT): \ hierarchical_ctxs/$(am__dirstamp) \ hierarchical_ctxs/$(DEPDIR)/$(am__dirstamp) hierarchical_ctxs/resize_hierarchical_ctxs$(EXEEXT): $(hierarchical_ctxs_resize_hierarchical_ctxs_OBJECTS) $(hierarchical_ctxs_resize_hierarchical_ctxs_DEPENDENCIES) $(EXTRA_hierarchical_ctxs_resize_hierarchical_ctxs_DEPENDENCIES) hierarchical_ctxs/$(am__dirstamp) @rm -f hierarchical_ctxs/resize_hierarchical_ctxs$(EXEEXT) $(AM_V_CCLD)$(LINK) $(hierarchical_ctxs_resize_hierarchical_ctxs_OBJECTS) $(hierarchical_ctxs_resize_hierarchical_ctxs_LDADD) $(LIBS) lp_test/$(am__dirstamp): @$(MKDIR_P) lp_test @: > lp_test/$(am__dirstamp) lp_test/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) lp_test/$(DEPDIR) @: > lp_test/$(DEPDIR)/$(am__dirstamp) lp_test/lp_resize_test.$(OBJEXT): lp_test/$(am__dirstamp) \ lp_test/$(DEPDIR)/$(am__dirstamp) lp_test/lp_resize_test$(EXEEXT): $(lp_test_lp_resize_test_OBJECTS) $(lp_test_lp_resize_test_DEPENDENCIES) $(EXTRA_lp_test_lp_resize_test_DEPENDENCIES) lp_test/$(am__dirstamp) @rm -f lp_test/lp_resize_test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lp_test_lp_resize_test_OBJECTS) $(lp_test_lp_resize_test_LDADD) $(LIBS) lp_test/lp_test.$(OBJEXT): lp_test/$(am__dirstamp) \ lp_test/$(DEPDIR)/$(am__dirstamp) lp_test/lp_test$(EXEEXT): $(lp_test_lp_test_OBJECTS) $(lp_test_lp_test_DEPENDENCIES) $(EXTRA_lp_test_lp_test_DEPENDENCIES) lp_test/$(am__dirstamp) @rm -f lp_test/lp_test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lp_test_lp_test_OBJECTS) $(lp_test_lp_test_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f ../../examples/common/*.$(OBJEXT) -rm -f app_driven_test/*.$(OBJEXT) -rm -f cholesky/*.$(OBJEXT) -rm -f hierarchical_ctxs/*.$(OBJEXT) -rm -f lp_test/*.$(OBJEXT) -rm -f sched_ctx_utils/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@../../examples/common/$(DEPDIR)/blas.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@app_driven_test/$(DEPDIR)/app_driven_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@cholesky/$(DEPDIR)/cholesky_models.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hierarchical_ctxs/$(DEPDIR)/resize_hierarchical_ctxs.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lp_test/$(DEPDIR)/lp_resize_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@lp_test/$(DEPDIR)/lp_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf app_driven_test/.libs app_driven_test/_libs -rm -rf cholesky/.libs cholesky/_libs -rm -rf hierarchical_ctxs/.libs hierarchical_ctxs/_libs -rm -rf lp_test/.libs lp_test/_libs ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(PROGRAMS) $(HEADERS) installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f ../../examples/common/$(DEPDIR)/$(am__dirstamp) -rm -f ../../examples/common/$(am__dirstamp) -rm -f app_driven_test/$(DEPDIR)/$(am__dirstamp) -rm -f app_driven_test/$(am__dirstamp) -rm -f cholesky/$(DEPDIR)/$(am__dirstamp) -rm -f cholesky/$(am__dirstamp) -rm -f hierarchical_ctxs/$(DEPDIR)/$(am__dirstamp) -rm -f hierarchical_ctxs/$(am__dirstamp) -rm -f lp_test/$(DEPDIR)/$(am__dirstamp) -rm -f lp_test/$(am__dirstamp) -rm -f sched_ctx_utils/$(DEPDIR)/$(am__dirstamp) -rm -f sched_ctx_utils/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ mostlyclean-am distclean: distclean-am -rm -f ../../examples/common/$(DEPDIR)/blas.Po -rm -f app_driven_test/$(DEPDIR)/app_driven_test.Po -rm -f cholesky/$(DEPDIR)/cholesky_implicit.Po -rm -f cholesky/$(DEPDIR)/cholesky_kernels.Po -rm -f cholesky/$(DEPDIR)/cholesky_models.Po -rm -f hierarchical_ctxs/$(DEPDIR)/resize_hierarchical_ctxs.Po -rm -f lp_test/$(DEPDIR)/lp_resize_test.Po -rm -f lp_test/$(DEPDIR)/lp_test.Po -rm -f sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ../../examples/common/$(DEPDIR)/blas.Po -rm -f app_driven_test/$(DEPDIR)/app_driven_test.Po -rm -f cholesky/$(DEPDIR)/cholesky_implicit.Po -rm -f cholesky/$(DEPDIR)/cholesky_kernels.Po -rm -f cholesky/$(DEPDIR)/cholesky_models.Po -rm -f hierarchical_ctxs/$(DEPDIR)/resize_hierarchical_ctxs.Po -rm -f lp_test/$(DEPDIR)/lp_resize_test.Po -rm -f lp_test/$(DEPDIR)/lp_test.Po -rm -f sched_ctx_utils/$(DEPDIR)/sched_ctx_utils.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-man install-pdf install-pdf-am \ install-ps install-ps-am install-strip installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags tags-am uninstall uninstall-am .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/sc_hypervisor/examples/app_driven_test/000077500000000000000000000000001507764646700237135ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/examples/app_driven_test/app_driven_test.c000066400000000000000000000130431507764646700272460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #define NTASKS 1000 #define NINCR 10 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) struct params { unsigned sched_ctx; int task_tag; }; unsigned val[2]; starpu_pthread_mutex_t mut[2]; /* Every implementation of a codelet must have this prototype, the first * argument (buffers) describes the buffers/streams that are managed by the * DSM; the second arguments references read-only data that is passed as an * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there * are no data input/output managed by the DSM (cl.nbuffers = 0) */ void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg) { struct params *params = (struct params *) cl_arg; int i; for(i = 0; i < NINCR; i++) { STARPU_PTHREAD_MUTEX_LOCK(&mut[params->sched_ctx - 1]); val[params->sched_ctx - 1]++; STARPU_PTHREAD_MUTEX_UNLOCK(&mut[params->sched_ctx - 1]); } if(params->task_tag != 0) FPRINTF(stdout, "Task with tag %d executed in ctx = %u %u counter_tests\n", params->task_tag, params->sched_ctx, val[params->sched_ctx - 1]); } struct starpu_codelet cl = {0}; /* the management of the tags is done by the user */ /* who will take care that the tags will be unique */ int tag = 1; void* submit_tasks_thread(void *arg) { unsigned sched_ctx = *((unsigned*)arg); starpu_sched_ctx_set_context(&sched_ctx); struct starpu_task *task[NTASKS]; struct params params[NTASKS]; int i; for(i = 0; i < NTASKS; i++) { task[i] = starpu_task_create(); // starpu_usleep(5000); cl.cpu_funcs[0] = cpu_func; cl.nbuffers = 0; task[i]->cl = &cl; if(sched_ctx == 1 && i == 5) { /* tag the tasks whose execution will start the resizing process */ task[i]->hypervisor_tag = tag; /* indicate particular settings the context should have when the resizing will be done */ sc_hypervisor_ctl(sched_ctx, SC_HYPERVISOR_TIME_TO_APPLY, tag, SC_HYPERVISOR_MIN_WORKERS, 2, SC_HYPERVISOR_MAX_WORKERS, 12, SC_HYPERVISOR_NULL); printf("require resize for sched_ctx %u at tag %d\n", sched_ctx, tag); /* specify that the contexts should be resized when the task having this particular tag will finish executing */ sc_hypervisor_post_resize_request(sched_ctx, tag); } params[i].sched_ctx = sched_ctx; params[i].task_tag = task[i]->hypervisor_tag; task[i]->cl_arg = ¶ms[i]; task[i]->cl_arg_size = sizeof(params); int ret = starpu_task_submit(task[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); return NULL; } int main() { int ret = starpu_init(NULL); if (ret == -ENODEV) return 77; int num_workers = starpu_worker_get_count(); int nres1 = num_workers; int nres2 = num_workers; int resources1[nres1]; int resources2[nres2]; int i; for(i = 0; i < nres1; i++) resources1[i] = i; for(i = 0; i < nres2; i++) resources2[i] = i; /* create contexts */ unsigned sched_ctx1 = starpu_sched_ctx_create(resources1, nres1, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); unsigned sched_ctx2 = starpu_sched_ctx_create(resources2, nres2, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); /* initialize the hypervisor */ struct sc_hypervisor_policy policy = {}; policy.custom = 0; /* indicate which strategy to use in this particular case we use app_driven which allows the user to resize the ctxs dynamically at particular moments of the execution of the application */ policy.name = "app_driven"; void *perf_counters = sc_hypervisor_init(&policy); /* let starpu know which performance counters should use to inform the hypervisor how the application and the resources are executing */ starpu_sched_ctx_set_perf_counters(sched_ctx1, perf_counters); starpu_sched_ctx_set_perf_counters(sched_ctx2, perf_counters); /* register the contexts that should be managed by the hypervisor and indicate an approximate amount of workload if known; in this case we don't know it and we put 0 */ sc_hypervisor_register_ctx(sched_ctx1, 0.0); sc_hypervisor_register_ctx(sched_ctx2, 0.0); starpu_pthread_t tid[2]; val[0] = 0; val[1] = 0; STARPU_PTHREAD_MUTEX_INIT(&mut[0], NULL); STARPU_PTHREAD_MUTEX_INIT(&mut[1], NULL); /* we create two threads to simulate simultaneous submission of tasks */ STARPU_PTHREAD_CREATE(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1); STARPU_PTHREAD_CREATE(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2); STARPU_PTHREAD_JOIN(tid[0], NULL); STARPU_PTHREAD_JOIN(tid[1], NULL); /* free starpu and hypervisor data */ starpu_shutdown(); sc_hypervisor_shutdown(); FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR); FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR); return 0; } starpu-1.4.9+dfsg/sc_hypervisor/examples/cholesky/000077500000000000000000000000001507764646700223465ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/examples/cholesky/cholesky.h000066400000000000000000000170341507764646700243450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DW_CHOLESKY_H__ #define __DW_CHOLESKY_H__ #include #include #include #include #ifdef STARPU_USE_CUDA #include #include #include #endif #include #include #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define NMAXBLOCKS 32 #define TAG_POTRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) #define TAG_TRSM(k,j) ((starpu_tag_t)((3ULL<<60) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ | ((unsigned long long)(i)<<16) \ | (unsigned long long)(j)))) #define TAG_POTRF_AUX(k, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) | (1ULL<<56) | (unsigned long long)(k))) #define TAG_TRSM_AUX(k,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) \ | ((3ULL<<56) |(((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_GEMM_AUX(k,i,j, prefix) ((starpu_tag_t)((((unsigned long long)(prefix))<<60) \ | ((4ULL<<56) | ((unsigned long long)(k)<<32) \ | ((unsigned long long)(i)<<16) \ | (unsigned long long)(j)))) #define BLOCKSIZE (size/nblocks) #define BLAS3_FLOP(n1,n2,n3) \ (2*((uint64_t)n1)*((uint64_t)n2)*((uint64_t)n3)) /* This is from magma -- Innovative Computing Laboratory -- Electrical Engineering and Computer Science Department -- University of Tennessee -- (C) Copyright 2009 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Tennessee, Knoxville nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) #define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)) * (double)(__n) - (1. / 6.))) #define FLOPS_SPOTRF(__n) (FMULS_POTRF((__n)) + FADDS_POTRF((__n))) #define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) #define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) #define FMULS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m))) #define FADDS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m))) #define FMULS_TRSM FMULS_TRMM #define FADDS_TRSM FMULS_TRMM #define FLOPS_STRSM(__m, __n) (FMULS_TRSM((__m), (__n)) + FADDS_TRSM((__m), (__n))) #define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) #define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) #define FLOPS_SSYRK(__k, __n) (FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n))) #define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) #define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) #define FLOPS_SGEMM(__m, __n, __k) (FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k))) /* End of magma code */ extern unsigned g_size; extern unsigned g_nblocks; extern unsigned g_nbigblocks; extern unsigned g_pinned; extern unsigned g_noprio; extern unsigned g_check; extern unsigned g_bound; extern unsigned g_with_ctxs; extern unsigned g_with_noctxs; extern unsigned g_chole1; extern unsigned g_chole2; extern struct starpu_perfmodel chol_model_potrf; extern struct starpu_perfmodel chol_model_trsm; extern struct starpu_perfmodel chol_model_syrk; extern struct starpu_perfmodel chol_model_gemm; void chol_cpu_codelet_update_potrf(void **, void *); void chol_cpu_codelet_update_trsm(void **, void *); void chol_cpu_codelet_update_syrk(void **, void *); void chol_cpu_codelet_update_gemm(void **, void *); extern struct starpu_codelet cl_potrf; extern struct starpu_codelet cl_trsm; extern struct starpu_codelet cl_syrk; extern struct starpu_codelet cl_gemm; double cpu_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cpu_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cpu_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cpu_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_potrf(void *descr[], void *_args); void chol_cublas_codelet_update_trsm(void *descr[], void *_args); void chol_cublas_codelet_update_syrk(void *descr[], void *_args); void chol_cublas_codelet_update_gemm(void *descr[], void *_args); double cuda_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cuda_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cuda_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); double cuda_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); #endif void initialize_chol_model(struct starpu_perfmodel* model, char* symbol, double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)); void parse_args(int argc, char **argv); #endif /* __DW_CHOLESKY_H__ */ starpu-1.4.9+dfsg/sc_hypervisor/examples/cholesky/cholesky_implicit.c000066400000000000000000000226461507764646700262370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "cholesky.h" #include "../sched_ctx_utils/sched_ctx_utils.h" /* * code to bootstrap the factorization * and construct the DAG */ static void callback_turn_spmd_on(void *arg) { (void)arg; cl_gemm.type = STARPU_SPMD; } int hypervisor_tag = 1; static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) { int ret; double start; double end; unsigned k,m,n; unsigned long nx = starpu_matrix_get_nx(dataA); unsigned long nn = nx/nblocks; int prio_level = g_noprio?STARPU_DEFAULT_PRIO:STARPU_MAX_PRIO; if (g_bound) starpu_bound_start(0, 0); start = starpu_timing_now(); /* create all the DAG nodes */ for (k = 0; k < nblocks; k++) { starpu_iteration_push(k); starpu_data_handle_t sdatakk = starpu_data_get_sub_data(dataA, 2, k, k); if(k == 0 && g_with_ctxs) { ret = starpu_task_insert(&cl_potrf, STARPU_PRIORITY, prio_level, STARPU_RW, sdatakk, STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL, STARPU_HYPERVISOR_TAG, hypervisor_tag, 0); if (ret == -ENODEV) return 77; set_hypervisor_conf(START_BENCH, hypervisor_tag++); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } else { ret = starpu_task_insert(&cl_potrf, STARPU_PRIORITY, prio_level, STARPU_RW, sdatakk, STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL, STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), 0); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } for (m = k+1; m m) { mat[m+n*size] = 0.0f; /* debug */ } } } float *test_mat = malloc(size*size*sizeof(float)); STARPU_ASSERT(test_mat); STARPU_SSYRK("L", "N", size, size, 1.0f, mat, size, 0.0f, test_mat, size); FPRINTF(stderr, "comparing results ...\n"); #ifdef PRINT_OUTPUT for (m = 0; m < size; m++) { for (n = 0; n < size; n++) { if (n <= m) { FPRINTF(stdout, "%2.2f\t", test_mat[m +n*size]); } else { FPRINTF(stdout, ".\t"); } } FPRINTF(stdout, "\n"); } #endif for (m = 0; m < size; m++) { for (n = 0; n < size; n++) { if (n <= m) { float orig = (1.0f/(1.0f+m+n)) + ((m == n)?1.0f*size:0.0f); float err = fabsf(test_mat[m +n*size] - orig) / orig; if (err > 0.0001) { FPRINTF(stderr, "Error[%u, %u] --> %2.6f != %2.6f (err %2.6f)\n", m, n, test_mat[m +n*size], orig, err); assert(0); } } } } free(test_mat); } starpu_free_noflag(mat, (size_t)size*size*sizeof(float)); } int main(int argc, char **argv) { int ret; /* create a simple definite positive symmetric matrix example * * Hilbert matrix : h(i,j) = 1/(i+j+1) * */ parse_args(argc, argv); if(g_with_ctxs || g_with_noctxs || g_chole1 || g_chole2) parse_args_ctx(argc, argv); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_CUDA initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,cuda_chol_task_potrf_cost); initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,cuda_chol_task_trsm_cost); initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,cuda_chol_task_syrk_cost); initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,cuda_chol_task_gemm_cost); #else initialize_chol_model(&chol_model_potrf,"chol_model_potrf",cpu_chol_task_potrf_cost,NULL); initialize_chol_model(&chol_model_trsm,"chol_model_trsm",cpu_chol_task_trsm_cost,NULL); initialize_chol_model(&chol_model_syrk,"chol_model_syrk",cpu_chol_task_syrk_cost,NULL); initialize_chol_model(&chol_model_gemm,"chol_model_gemm",cpu_chol_task_gemm_cost,NULL); #endif starpu_cublas_init(); if(g_with_ctxs) { construct_contexts(); start_2benchs(execute_cholesky); } else if(g_with_noctxs) start_2benchs(execute_cholesky); else if(g_chole1) start_1stbench(execute_cholesky); else if(g_chole2) start_2ndbench(execute_cholesky); else execute_cholesky(NULL, g_size, g_nblocks); starpu_cublas_shutdown(); starpu_shutdown(); if(g_with_ctxs) end_contexts(); return 0; } starpu-1.4.9+dfsg/sc_hypervisor/examples/cholesky/cholesky_kernels.c000066400000000000000000000247211507764646700260640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "cholesky.h" //#include "../common/blas.h" #ifdef STARPU_USE_CUDA #include #include #ifdef STARPU_HAVE_MAGMA #include "magma.h" #include "magma_lapack.h" #endif #endif /* * GEMM */ #if defined(STARPU_USE_CUDA) static const float p1 = 1.0; static const float m1 = -1.0; #endif static inline void chol_common_cpu_codelet_update_gemm(void *descr[], int s, void *_args) { (void)_args; /* printf("gemm\n"); */ float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NY(descr[2]); unsigned dy = STARPU_MATRIX_GET_NX(descr[2]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); switch (s) { case 0: { /* CPU kernel */ int worker_size = starpu_combined_worker_get_size(); if (worker_size == 1) { /* Sequential CPU kernel */ STARPU_SGEMM("N", "T", dy, dx, dz, -1.0f, left, ld21, right, ld12, 1.0f, center, ld22); } else { /* Parallel CPU kernel */ int rank = starpu_combined_worker_get_rank(); unsigned block_size = (dx + worker_size - 1)/worker_size; unsigned new_dx = STARPU_MIN(dx, block_size*(rank+1)) - block_size*rank; float *new_left = &left[block_size*rank]; float *new_center = ¢er[block_size*rank]; STARPU_SGEMM("N", "T", dy, new_dx, dz, -1.0f, new_left, ld21, right, ld12, 1.0f, new_center, ld22); } break; } #ifdef STARPU_USE_CUDA case 1: { /* CUDA kernel */ cublasStatus_t status = cublasSgemm(starpu_cublas_get_local_handle(), CUBLAS_OP_N, CUBLAS_OP_T, dy, dx, dz, &m1, left, ld21, right, ld12, &p1, center, ld22); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_gemm(void *descr[], void *_args) { chol_common_cpu_codelet_update_gemm(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_gemm(void *descr[], void *_args) { chol_common_cpu_codelet_update_gemm(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ /* * SYRK */ static inline void chol_common_cpu_codelet_update_syrk(void *descr[], int s, void *_args) { (void)_args; /* printf("syrk\n"); */ float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned dx = STARPU_MATRIX_GET_NY(descr[1]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[1]); switch (s) { case 0: { /* CPU kernel */ STARPU_SSYRK("L", "N", dx, dz, -1.0f, left, ld21, 1.0f, center, ld22); break; } #ifdef STARPU_USE_CUDA case 1: { /* CUDA kernel */ cublasStatus_t status = cublasSsyrk(starpu_cublas_get_local_handle(), CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, dx, dz, &m1, left, ld21, &p1, center, ld22); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_syrk(void *descr[], void *_args) { chol_common_cpu_codelet_update_syrk(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_syrk(void *descr[], void *_args) { chol_common_cpu_codelet_update_syrk(descr, 1, _args); } #endif /* STARPU_USE_CUDA */ /* * TRSM */ static inline void chol_common_codelet_update_trsm(void *descr[], int s, void *_args) { (void)_args; /* printf("trsm\n"); */ float *sub11; float *sub21; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx21 = STARPU_MATRIX_GET_NY(descr[1]); unsigned ny21 = STARPU_MATRIX_GET_NX(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus_t status; #endif switch (s) { case 0: STARPU_STRSM("R", "L", "T", "N", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21); break; #ifdef STARPU_USE_CUDA case 1: status = cublasStrsm(starpu_cublas_get_local_handle(), CUBLAS_SIDE_RIGHT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_T, CUBLAS_DIAG_NON_UNIT, nx21, ny21, &p1, sub11, ld11, sub21, ld21); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_trsm(void *descr[], void *_args) { chol_common_codelet_update_trsm(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_trsm(void *descr[], void *_args) { chol_common_codelet_update_trsm(descr, 1, _args); } #endif /* * POTRF */ static inline void chol_common_codelet_update_potrf(void *descr[], int s, void *_args) { (void)_args; /* printf("potrf\n"); */ float *sub11; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned nx = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); unsigned z; switch (s) { case 0: #ifdef STARPU_MKL STARPU_SPOTRF("L", nx, sub11, ld); #else /* * - alpha 11 <- lambda 11 = sqrt(alpha11) * - alpha 21 <- l 21 = alpha 21 / lambda 11 * - A22 <- A22 - l21 trans(l21) */ for (z = 0; z < nx; z++) { float lambda11; lambda11 = sqrt(sub11[z+z*ld]); sub11[z+z*ld] = lambda11; STARPU_ASSERT(lambda11 != 0.0f); STARPU_SSCAL(nx - z - 1, 1.0f/lambda11, &sub11[(z+1)+z*ld], 1); STARPU_SSYR("L", nx - z - 1, -1.0f, &sub11[(z+1)+z*ld], 1, &sub11[(z+1)+(z+1)*ld], ld); } #endif break; #ifdef STARPU_USE_CUDA case 1: #ifdef STARPU_HAVE_MAGMA { int ret; int info; #if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) cudaStream_t stream = starpu_cuda_get_local_stream(); cublasSetKernelStream(stream); magmablasSetKernelStream(stream); #else starpu_cublas_set_stream(); #endif ret = magma_spotrf_gpu(MagmaLower, nx, sub11, ld, &info); if (ret != MAGMA_SUCCESS) { fprintf(stderr, "Error in Magma: %d\n", ret); STARPU_ABORT(); } #if (MAGMA_VERSION_MAJOR > 1) || (MAGMA_VERSION_MAJOR == 1 && MAGMA_VERSION_MINOR >= 4) cudaError_t cures = cudaStreamSynchronize(stream); #else cudaError_t cures = cudaDeviceSynchronize(); #endif STARPU_ASSERT(!cures); } #else { float *lambda11; cublasStatus_t status; cudaStream_t stream = starpu_cuda_get_local_stream(); cublasHandle_t handle = starpu_cublas_get_local_handle(); cudaHostAlloc((void **)&lambda11, sizeof(float), 0); for (z = 0; z < nx; z++) { cudaMemcpyAsync(lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); STARPU_ASSERT(*lambda11 != 0.0f); *lambda11 = sqrt(*lambda11); /* cublasSetVector(1, sizeof(float), lambda11, sizeof(float), &sub11[z+z*ld], sizeof(float)); */ cudaMemcpyAsync(&sub11[z+z*ld], lambda11, sizeof(float), cudaMemcpyHostToDevice, stream); float scal = 1.0f/(*lambda11); status = cublasSscal(handle, nx - z - 1, &scal, &sub11[(z+1)+z*ld], 1); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); status = cublasSsyr(handle, CUBLAS_FILL_MODE_UPPER, nx - z - 1, &m1, &sub11[(z+1)+z*ld], 1, &sub11[(z+1)+(z+1)*ld], ld); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); } cudaStreamSynchronize(stream); cudaFreeHost(lambda11); } #endif break; #endif default: STARPU_ABORT(); break; } } void chol_cpu_codelet_update_potrf(void *descr[], void *_args) { chol_common_codelet_update_potrf(descr, 0, _args); } #ifdef STARPU_USE_CUDA void chol_cublas_codelet_update_potrf(void *descr[], void *_args) { chol_common_codelet_update_potrf(descr, 1, _args); } #endif/* STARPU_USE_CUDA */ struct starpu_perfmodel chol_model_potrf; struct starpu_perfmodel chol_model_trsm; struct starpu_perfmodel chol_model_syrk; struct starpu_perfmodel chol_model_gemm; /* * Create the codelets */ struct starpu_codelet cl_potrf = { .type = STARPU_SEQ, .cpu_funcs = {chol_cpu_codelet_update_potrf}, .cpu_funcs_name = {"chol_cpu_codelet_update_potrf"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_potrf}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .nbuffers = 1, .modes = { STARPU_RW }, .model = &chol_model_potrf, .color = 0xffff00, }; struct starpu_codelet cl_trsm = { .type = STARPU_SEQ, .cpu_funcs = {chol_cpu_codelet_update_trsm}, .cpu_funcs_name = {"chol_cpu_codelet_update_trsm"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_trsm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = { STARPU_R, STARPU_RW }, .model = &chol_model_trsm, .color = 0x8080ff, }; struct starpu_codelet cl_syrk = { .type = STARPU_SEQ, .max_parallelism = INT_MAX, .cpu_funcs = {chol_cpu_codelet_update_syrk}, .cpu_funcs_name = {"chol_cpu_codelet_update_syrk"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_syrk}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 2, .modes = { STARPU_R, STARPU_RW }, .model = &chol_model_syrk, .color = 0x00ff00, }; struct starpu_codelet cl_gemm = { .type = STARPU_SEQ, .max_parallelism = INT_MAX, .cpu_funcs = {chol_cpu_codelet_update_gemm}, .cpu_funcs_name = {"chol_cpu_codelet_update_gemm"}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_gemm}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 3, .modes = { STARPU_R, STARPU_R, STARPU_RW }, .model = &chol_model_gemm, .color = 0x00c000, }; starpu-1.4.9+dfsg/sc_hypervisor/examples/cholesky/cholesky_models.c000066400000000000000000000150131507764646700256760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Example of a cost model for BLAS operations. This is really just an * example! */ /* * As a convention, in that file, buffers[0] is represented by A, * buffers[1] is B ... */ /* * Number of flops of Gemm */ #include #include #include "cholesky.h" /* #define USE_PERTURBATION 1 */ #ifdef USE_PERTURBATION #define PERTURB(a) ((starpu_drand48()*2.0f*(AMPL) + 1.0f - (AMPL))*(a)) #else #define PERTURB(a) (a) #endif double cpu_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/1000.0f*0.894/0.79176); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cpu_chol_task_potrf_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cuda_chol_task_potrf_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/5.088633/0.9883); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cuda_chol_task_potrf_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cpu_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/7706.674/0.95/0.9965); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cpu_chol_task_trsm_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cuda_chol_task_trsm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/87.29520); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cuda_chol_task_trsm_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cpu_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760)/2; #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cpu_chol_task_syrk_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cuda_chol_task_syrk_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666)/2; #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cuda_chol_task_syrk_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cpu_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/8.0760); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cpu_chol_task_gemm_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } double cuda_chol_task_gemm_cost(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void)arch; (void)nimpl; uint32_t n; n = starpu_matrix_get_nx(task->handles[0]); double cost = (((double)(n)*n*n)/50.0f/10.75/76.30666); #ifdef STARPU_MODEL_DEBUG FPRINTF(stdout, "cuda_chol_task_gemm_cost n %u cost %e\n", n, cost); #endif return PERTURB(cost); } void initialize_chol_model(struct starpu_perfmodel* model, char * symbol, double (*cpu_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned), double (*cuda_cost_function)(struct starpu_task *, struct starpu_perfmodel_arch*, unsigned)) { struct starpu_perfmodel_per_arch *per_arch; model->symbol = symbol; model->type = STARPU_HISTORY_BASED; starpu_perfmodel_init(model); per_arch = starpu_perfmodel_get_model_per_devices(model, 0, STARPU_CPU_WORKER, 0, 1, -1); per_arch->cost_function = cpu_cost_function; // We could also call directly: // starpu_perfmodel_set_per_devices_cost_function(model, 0, cpu_cost_function, STARPU_CPU_WORKER, 0, 1, -1); if(starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) != 0) { per_arch = starpu_perfmodel_get_model_per_devices(model, 0, STARPU_CUDA_WORKER, 0, 1, -1); per_arch->cost_function = cuda_cost_function; } } unsigned g_size = 4*1024; unsigned g_nblocks = 16; unsigned g_nbigblocks = 8; unsigned g_pinned = 0; unsigned g_noprio = 0; unsigned g_check = 0; unsigned g_bound = 0; unsigned g_with_ctxs = 0; unsigned g_with_noctxs = 0; unsigned g_chole1 = 0; unsigned g_chole2 = 0; void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-with_ctxs") == 0) { g_with_ctxs = 1; break; } if (strcmp(argv[i], "-with_noctxs") == 0) { g_with_noctxs = 1; break; } if (strcmp(argv[i], "-chole1") == 0) { g_chole1 = 1; break; } if (strcmp(argv[i], "-chole2") == 0) { g_chole2 = 1; break; } if (strcmp(argv[i], "-size") == 0) { char *argptr; g_size = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; g_nblocks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nbigblocks") == 0) { char *argptr; g_nbigblocks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-pin") == 0) { g_pinned = 1; } if (strcmp(argv[i], "-no-prio") == 0) { g_noprio = 1; } if (strcmp(argv[i], "-bound") == 0) { g_bound = 1; } if (strcmp(argv[i], "-check") == 0) { g_check = 1; } if (strcmp(argv[i], "-h") == 0) { printf("usage : %s [-pin] [-size size] [-nblocks nblocks] [-check]\n", argv[0]); } } } starpu-1.4.9+dfsg/sc_hypervisor/examples/hierarchical_ctxs/000077500000000000000000000000001507764646700242045ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/examples/hierarchical_ctxs/resize_hierarchical_ctxs.c000066400000000000000000000130321507764646700314070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #define NTASKS 1000 #define NINCR 10 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) unsigned val[3]; starpu_pthread_mutex_t mut[3]; /* Every implementation of a codelet must have this prototype, the first * argument (buffers) describes the buffers/streams that are managed by the * DSM; the second arguments references read-only data that is passed as an * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there * are no data input/output managed by the DSM (cl.nbuffers = 0) */ void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg) { unsigned sched_ctx = *((unsigned *) cl_arg); int i; for(i = 0; i < NINCR; i++) { STARPU_PTHREAD_MUTEX_LOCK(&mut[sched_ctx - 1]); val[sched_ctx - 1]++; STARPU_PTHREAD_MUTEX_UNLOCK(&mut[sched_ctx - 1]); } } struct starpu_codelet cl = {0}; void* submit_tasks_thread(void *arg) { unsigned sched_ctx = *((unsigned*)arg); starpu_sched_ctx_set_context(&sched_ctx); struct starpu_task *task[NTASKS]; int i; for(i = 0; i < NTASKS; i++) { task[i] = starpu_task_create(); cl.cpu_funcs[0] = cpu_func; cl.nbuffers = 0; task[i]->cl = &cl; task[i]->cl_arg = &sched_ctx; task[i]->cl_arg_size = sizeof(unsigned); task[i]->flops = NINCR*1000000000.0; int ret = starpu_task_submit(task[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); if(i == NTASKS/2) sc_hypervisor_resize_ctxs(NULL, -1, NULL, -1); } starpu_task_wait_for_all(); return NULL; } int main() { int ret = starpu_init(NULL); if (ret == -ENODEV) return 77; /* create contexts */ unsigned sched_ctx1 = starpu_sched_ctx_create(NULL, 0, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", STARPU_SCHED_CTX_HIERARCHY_LEVEL, 0, 0); unsigned sched_ctx2 = starpu_sched_ctx_create(NULL, 0, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", STARPU_SCHED_CTX_HIERARCHY_LEVEL, 1, 0); unsigned sched_ctx3 = starpu_sched_ctx_create(NULL, 0, "sched_ctx3", STARPU_SCHED_CTX_POLICY_NAME, "dmda", STARPU_SCHED_CTX_HIERARCHY_LEVEL, 1, 0); starpu_sched_ctx_set_inheritor(sched_ctx2, sched_ctx1); starpu_sched_ctx_set_inheritor(sched_ctx3, sched_ctx1); /* initialize the hypervisor */ struct sc_hypervisor_policy policy; policy.custom = 0; /* indicate which strategy to use * in this particular case we use app_driven which allows the user to resize * the ctxs dynamically at particular moments of the execution of the application */ policy.name = "feft_lp"; void *perf_counters = sc_hypervisor_init(&policy); /* let starpu know which performance counters should use * to inform the hypervisor how the application and the resources are executing */ starpu_sched_ctx_set_perf_counters(sched_ctx1, perf_counters); starpu_sched_ctx_set_perf_counters(sched_ctx2, perf_counters); starpu_sched_ctx_set_perf_counters(sched_ctx3, perf_counters); double flops1 = NTASKS*NINCR*1000000000.0; double flops2 = NTASKS*NINCR*1000000000.0; double flops3 = NTASKS*NINCR*1000000000.0; /* register the contexts that should be managed by the hypervisor * and indicate an approximate amount of workload if known; in this case we don't know it and we put 0 */ sc_hypervisor_register_ctx(sched_ctx1, flops1); sc_hypervisor_register_ctx(sched_ctx2, flops2); sc_hypervisor_register_ctx(sched_ctx3, flops3); unsigned ncpus = starpu_cpu_worker_get_count(); sc_hypervisor_ctl(sched_ctx1, SC_HYPERVISOR_MAX_WORKERS, ncpus, SC_HYPERVISOR_NULL); sc_hypervisor_ctl(sched_ctx2, SC_HYPERVISOR_MAX_WORKERS, ncpus, SC_HYPERVISOR_NULL); sc_hypervisor_ctl(sched_ctx3, SC_HYPERVISOR_MAX_WORKERS, ncpus, SC_HYPERVISOR_NULL); /* lp strategy allows sizing the contexts because we know the total number of flops * to be executed */ sc_hypervisor_size_ctxs(NULL, -1, NULL, -1); starpu_pthread_t tid[3]; val[0] = 0; val[1] = 0; val[2] = 0; STARPU_PTHREAD_MUTEX_INIT(&mut[0], NULL); STARPU_PTHREAD_MUTEX_INIT(&mut[1], NULL); STARPU_PTHREAD_MUTEX_INIT(&mut[2], NULL); /* we create two threads to simulate simultaneous submission of tasks */ STARPU_PTHREAD_CREATE(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1); STARPU_PTHREAD_CREATE(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2); STARPU_PTHREAD_CREATE(&tid[2], NULL, submit_tasks_thread, (void*)&sched_ctx3); STARPU_PTHREAD_JOIN(tid[0], NULL); STARPU_PTHREAD_JOIN(tid[1], NULL); STARPU_PTHREAD_JOIN(tid[2], NULL); /* free starpu and hypervisor data */ starpu_shutdown(); sc_hypervisor_shutdown(); FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR); FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR); FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx3, val[2], NTASKS*NINCR); return 0; } starpu-1.4.9+dfsg/sc_hypervisor/examples/lp_test/000077500000000000000000000000001507764646700221775ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/examples/lp_test/lp_resize_test.c000066400000000000000000000107041507764646700254000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #define NTASKS 1000 #define NINCR 10 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) unsigned val[2]; starpu_pthread_mutex_t mut[2]; /* Every implementation of a codelet must have this prototype, the first * argument (buffers) describes the buffers/streams that are managed by the * DSM; the second arguments references read-only data that is passed as an * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there * are no data input/output managed by the DSM (cl.nbuffers = 0) */ void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg) { unsigned sched_ctx = *((unsigned *) cl_arg); int i; for(i = 0; i < NINCR; i++) { STARPU_PTHREAD_MUTEX_LOCK(&mut[sched_ctx - 1]); val[sched_ctx - 1]++; STARPU_PTHREAD_MUTEX_UNLOCK(&mut[sched_ctx - 1]); } } struct starpu_codelet cl = {0}; void* submit_tasks_thread(void *arg) { unsigned sched_ctx = *((unsigned*)arg); starpu_sched_ctx_set_context(&sched_ctx); struct starpu_task *task[NTASKS]; int i; for(i = 0; i < NTASKS; i++) { task[i] = starpu_task_create(); cl.cpu_funcs[0] = cpu_func; cl.nbuffers = 0; task[i]->cl = &cl; task[i]->cl_arg = &sched_ctx; task[i]->cl_arg_size = sizeof(unsigned); task[i]->flops = NINCR*1000000000.0; int ret = starpu_task_submit(task[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); if(i == NTASKS/2) sc_hypervisor_resize_ctxs(NULL, -1, NULL, -1); } starpu_task_wait_for_all(); return NULL; } int main() { int ret = starpu_init(NULL); if (ret == -ENODEV) return 77; /* create contexts */ unsigned sched_ctx1 = starpu_sched_ctx_create(NULL, 0, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); unsigned sched_ctx2 = starpu_sched_ctx_create(NULL, 0, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); /* initialize the hypervisor */ struct sc_hypervisor_policy policy; policy.custom = 0; /* indicate which strategy to use in this particular case we use app_driven which allows the user to resize the ctxs dynamically at particular moments of the execution of the application */ policy.name = "feft_lp"; void *perf_counters = sc_hypervisor_init(&policy); /* let starpu know which performance counters should use to inform the hypervisor how the application and the resources are executing */ starpu_sched_ctx_set_perf_counters(sched_ctx1, perf_counters); starpu_sched_ctx_set_perf_counters(sched_ctx2, perf_counters); double flops1 = NTASKS*NINCR*1000000000.0; double flops2 = NTASKS*NINCR*1000000000.0; /* register the contexts that should be managed by the hypervisor and indicate an approximate amount of workload if known; in this case we don't know it and we put 0 */ sc_hypervisor_register_ctx(sched_ctx1, flops1); sc_hypervisor_register_ctx(sched_ctx2, flops2); /* lp strategy allows sizing the contexts because we know the total number of flops * to be executed */ sc_hypervisor_size_ctxs(NULL, -1, NULL, -1); starpu_pthread_t tid[2]; val[0] = 0; val[1] = 0; STARPU_PTHREAD_MUTEX_INIT(&mut[0], NULL); STARPU_PTHREAD_MUTEX_INIT(&mut[1], NULL); /* we create two threads to simulate simultaneous submission of tasks */ STARPU_PTHREAD_CREATE(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1); STARPU_PTHREAD_CREATE(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2); STARPU_PTHREAD_JOIN(tid[0], NULL); STARPU_PTHREAD_JOIN(tid[1], NULL); /* free starpu and hypervisor data */ starpu_shutdown(); sc_hypervisor_shutdown(); FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR); FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR); return 0; } starpu-1.4.9+dfsg/sc_hypervisor/examples/lp_test/lp_test.c000066400000000000000000000106011507764646700240130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #define NTASKS 1000 #define NINCR 10 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) unsigned val[2]; starpu_pthread_mutex_t mut[2]; /* Every implementation of a codelet must have this prototype, the first * argument (buffers) describes the buffers/streams that are managed by the * DSM; the second arguments references read-only data that is passed as an * argument of the codelet (task->cl_arg). Here, "buffers" is unused as there * are no data input/output managed by the DSM (cl.nbuffers = 0) */ void cpu_func(__attribute__((unused))void *buffers[], void *cl_arg) { unsigned sched_ctx = *((unsigned *) cl_arg); int i; for(i = 0; i < NINCR; i++) { STARPU_PTHREAD_MUTEX_LOCK(&mut[sched_ctx - 1]); val[sched_ctx - 1]++; STARPU_PTHREAD_MUTEX_UNLOCK(&mut[sched_ctx - 1]); } } struct starpu_codelet cl = {0}; void* submit_tasks_thread(void *arg) { unsigned sched_ctx = *((unsigned*)arg); starpu_sched_ctx_set_context(&sched_ctx); struct starpu_task *task[NTASKS]; int i; for(i = 0; i < NTASKS; i++) { task[i] = starpu_task_create(); cl.cpu_funcs[0] = cpu_func; cl.nbuffers = 0; task[i]->cl = &cl; task[i]->cl_arg = &sched_ctx; task[i]->cl_arg_size = sizeof(unsigned); task[i]->flops = NINCR*1000000000.0; int ret = starpu_task_submit(task[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); return NULL; } int main() { int ret = starpu_init(NULL); if (ret == -ENODEV) return 77; /* create contexts */ unsigned sched_ctx1 = starpu_sched_ctx_create(NULL, 0, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); unsigned sched_ctx2 = starpu_sched_ctx_create(NULL, 0, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "dmda", 0); /* initialize the hypervisor */ struct sc_hypervisor_policy policy; policy.custom = 0; /* indicate which strategy to use in this particular case we use app_driven which allows the user to resize the ctxs dynamically at particular moments of the execution of the application */ policy.name = "feft_lp"; void *perf_counters = sc_hypervisor_init(&policy); /* let starpu know which performance counters should use to inform the hypervisor how the application and the resources are executing */ starpu_sched_ctx_set_perf_counters(sched_ctx1, perf_counters); starpu_sched_ctx_set_perf_counters(sched_ctx2, perf_counters); double flops1 = NTASKS*NINCR*1000000000.0; double flops2 = NTASKS*NINCR*1000000000.0; /* register the contexts that should be managed by the hypervisor and indicate an approximate amount of workload if known; in this case we don't know it and we put 0 */ sc_hypervisor_register_ctx(sched_ctx1, flops1); sc_hypervisor_register_ctx(sched_ctx2, flops2); /* lp strategy allows sizing the contexts because we know the total number of flops to be executed */ sc_hypervisor_size_ctxs(NULL, -1, NULL, -1); starpu_pthread_t tid[2]; val[0] = 0; val[1] = 0; STARPU_PTHREAD_MUTEX_INIT(&mut[0], NULL); STARPU_PTHREAD_MUTEX_INIT(&mut[1], NULL); /* we create two threads to simulate simultaneous submission of tasks */ STARPU_PTHREAD_CREATE(&tid[0], NULL, submit_tasks_thread, (void*)&sched_ctx1); STARPU_PTHREAD_CREATE(&tid[1], NULL, submit_tasks_thread, (void*)&sched_ctx2); STARPU_PTHREAD_JOIN(tid[0], NULL); STARPU_PTHREAD_JOIN(tid[1], NULL); /* free starpu and hypervisor data */ starpu_shutdown(); sc_hypervisor_shutdown(); FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx1, val[0], NTASKS*NINCR); FPRINTF(stdout, "ctx = %u executed %u counter_tests out of %d \n", sched_ctx2, val[1], NTASKS*NINCR); return 0; } starpu-1.4.9+dfsg/sc_hypervisor/examples/sched_ctx_utils/000077500000000000000000000000001507764646700237115ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.c000066400000000000000000000323301507764646700272420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sched_ctx_utils.h" #include #include "sc_hypervisor.h" #define NSAMPLES 3 unsigned size1; unsigned size2; unsigned nblocks1; unsigned nblocks2; unsigned cpu1; unsigned cpu2; unsigned gpu; unsigned gpu1; unsigned gpu2; typedef struct { unsigned id; unsigned ctx; int the_other_ctx; int *workers; int nworkers; void (*bench)(float*, unsigned, unsigned); unsigned size; unsigned nblocks; float *mat[NSAMPLES]; } params; typedef struct { double flops; double avg_timing; } retvals; int first = 1; starpu_pthread_mutex_t mut; retvals rv[2]; params p1, p2; int it = 0; int it2 = 0; starpu_pthread_key_t key; void init() { size1 = 4*1024; size2 = 4*1024; nblocks1 = 16; nblocks2 = 16; cpu1 = 0; cpu2 = 0; gpu = 0; gpu1 = 0; gpu2 = 0; rv[0].flops = 0.0; rv[1].flops = 0.0; rv[1].avg_timing = 0.0; rv[1].avg_timing = 0.0; p1.ctx = 0; p2.ctx = 0; p1.id = 0; p2.id = 1; STARPU_PTHREAD_KEY_CREATE(&key, NULL); } void update_sched_ctx_timing_results(double flops, double avg_timing) { unsigned *id = STARPU_PTHREAD_GETSPECIFIC(key); rv[*id].flops += flops; rv[*id].avg_timing += avg_timing; } void* start_bench(void *val) { params *p = (params*)val; int i; STARPU_PTHREAD_SETSPECIFIC(key, &p->id); if(p->ctx != 0) starpu_sched_ctx_set_context(&p->ctx); for(i = 0; i < NSAMPLES; i++) p->bench(p->mat[i], p->size, p->nblocks); /* if(p->ctx != 0) */ /* { */ /* STARPU_PTHREAD_MUTEX_LOCK(&mut); */ /* if(first){ */ /* sc_hypervisor_unregiser_ctx(p->ctx); */ /* starpu_sched_ctx_delete(p->ctx, p->the_other_ctx); */ /* } */ /* first = 0; */ /* STARPU_PTHREAD_MUTEX_UNLOCK(&mut); */ /* } */ sc_hypervisor_stop_resize(p->the_other_ctx); rv[p->id].flops /= NSAMPLES; rv[p->id].avg_timing /= NSAMPLES; return NULL; } float* construct_matrix(unsigned size) { float *mat; starpu_malloc((void **)&mat, (size_t)size*size*sizeof(float)); unsigned i,j; for (i = 0; i < size; i++) { for (j = 0; j < size; j++) { mat[j +i*size] = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f); /* mat[j +i*size] = ((i == j)?1.0f*size:0.0f); */ } } return mat; } void start_2benchs(void (*bench)(float*, unsigned, unsigned)) { p1.bench = bench; p1.size = size1; p1.nblocks = nblocks1; p2.bench = bench; p2.size = size2; p2.nblocks = nblocks2; int i; for(i = 0; i < NSAMPLES; i++) { p1.mat[i] = construct_matrix(p1.size); p2.mat[i] = construct_matrix(p2.size); } starpu_pthread_t tid[2]; STARPU_PTHREAD_MUTEX_INIT(&mut, NULL); struct timeval start; struct timeval end; gettimeofday(&start, NULL); STARPU_PTHREAD_CREATE(&tid[0], NULL, (void*)start_bench, (void*)&p1); STARPU_PTHREAD_CREATE(&tid[1], NULL, (void*)start_bench, (void*)&p2); STARPU_PTHREAD_JOIN(tid[0], NULL); STARPU_PTHREAD_JOIN(tid[1], NULL); gettimeofday(&end, NULL); STARPU_PTHREAD_MUTEX_DESTROY(&mut); double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); timing /= 1000000; printf("%2.2f %2.2f ", rv[0].flops, rv[1].flops); printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, rv[1].avg_timing, timing); } void start_1stbench(void (*bench)(float*, unsigned, unsigned)) { p1.bench = bench; p1.size = size1; p1.nblocks = nblocks1; int i; for(i = 0; i < NSAMPLES; i++) { p1.mat[i] = construct_matrix(p1.size); } struct timeval start; struct timeval end; gettimeofday(&start, NULL); start_bench((void*)&p1); gettimeofday(&end, NULL); STARPU_PTHREAD_MUTEX_DESTROY(&mut); double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); timing /= 1000000; printf("%2.2f ", rv[0].flops); printf("%2.2f %2.2f\n", rv[0].avg_timing, timing); } void start_2ndbench(void (*bench)(float*, unsigned, unsigned)) { p2.bench = bench; p2.size = size2; p2.nblocks = nblocks2; int i; for(i = 0; i < NSAMPLES; i++) { p2.mat[i] = construct_matrix(p2.size); } struct timeval start; struct timeval end; gettimeofday(&start, NULL); start_bench((void*)&p2); gettimeofday(&end, NULL); STARPU_PTHREAD_MUTEX_DESTROY(&mut); double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); timing /= 1000000; printf("%2.2f ", rv[1].flops); printf("%2.2f %2.2f\n", rv[1].avg_timing, timing); } void construct_contexts() { struct sc_hypervisor_policy policy; policy.custom = 0; policy.name = "idle"; void *perf_counters = sc_hypervisor_init(&policy); int nworkers1 = cpu1 + gpu + gpu1; int nworkers2 = cpu2 + gpu + gpu2; /* unsigned n_all_gpus = gpu + gpu1 + gpu2; */ int i; /* int k = 0; */ nworkers1 = 12; p1.workers = (int*)malloc(nworkers1*sizeof(int)); /* for(i = 0; i < gpu; i++) */ /* p1.workers[k++] = i; */ /* for(i = gpu; i < gpu + gpu1; i++) */ /* p1.workers[k++] = i; */ /* for(i = n_all_gpus; i < n_all_gpus + cpu1; i++) */ /* p1.workers[k++] = i; */ for(i = 0; i < 12; i++) p1.workers[i] = i; p1.ctx = starpu_sched_ctx_create(p1.workers, nworkers1, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); starpu_sched_ctx_set_perf_counters(p1.ctx, perf_counters); p2.the_other_ctx = (int)p1.ctx; p1.nworkers = nworkers1; sc_hypervisor_register_ctx(p1.ctx, 0.0); /* sc_hypervisor_ctl(p1.ctx, */ /* SC_HYPERVISOR_MAX_IDLE, p1.workers, p1.nworkers, 5000.0, */ /* SC_HYPERVISOR_MAX_IDLE, p1.workers, gpu+gpu1, 100000.0, */ /* SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */ /* SC_HYPERVISOR_GRANULARITY, 2, */ /* SC_HYPERVISOR_MIN_TASKS, 1000, */ /* SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE, 100000.0, */ /* SC_HYPERVISOR_MIN_WORKERS, 6, */ /* SC_HYPERVISOR_MAX_WORKERS, 12, */ /* NULL); */ sc_hypervisor_ctl(p1.ctx, SC_HYPERVISOR_GRANULARITY, 2, SC_HYPERVISOR_MIN_TASKS, 1000, SC_HYPERVISOR_MIN_WORKERS, 6, SC_HYPERVISOR_MAX_WORKERS, 12, NULL); /* k = 0; */ p2.workers = (int*)malloc(nworkers2*sizeof(int)); /* for(i = 0; i < gpu; i++) */ /* p2.workers[k++] = i; */ /* for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++) */ /* p2.workers[k++] = i; */ /* for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++) */ /* p2.workers[k++] = i; */ p2.ctx = starpu_sched_ctx_create(p2.workers, 0, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); starpu_sched_ctx_set_perf_counters(p2.ctx, perf_counters); p1.the_other_ctx = (int)p2.ctx; p2.nworkers = 0; sc_hypervisor_register_ctx(p2.ctx, 0.0); /* sc_hypervisor_ctl(p2.ctx, */ /* SC_HYPERVISOR_MAX_IDLE, p2.workers, p2.nworkers, 2000.0, */ /* SC_HYPERVISOR_MAX_IDLE, p2.workers, gpu+gpu2, 5000.0, */ /* SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */ /* SC_HYPERVISOR_GRANULARITY, 2, */ /* SC_HYPERVISOR_MIN_TASKS, 500, */ /* SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE, 1000.0, */ /* SC_HYPERVISOR_MIN_WORKERS, 4, */ /* SC_HYPERVISOR_MAX_WORKERS, 8, */ /* NULL); */ sc_hypervisor_ctl(p2.ctx, SC_HYPERVISOR_GRANULARITY, 2, SC_HYPERVISOR_MIN_TASKS, 500, SC_HYPERVISOR_MIN_WORKERS, 0, SC_HYPERVISOR_MAX_WORKERS, 6, NULL); } void set_hypervisor_conf(int event, int task_tag) { (void)event; (void)task_tag; /* unsigned *id = STARPU_PTHREAD_GETSPECIFIC(key); */ /* if(*id == 0) */ /* { */ /* if(event == END_BENCH) */ /* { */ /* if(it < 2) */ /* { */ /* sc_hypervisor_ctl(p2.ctx, */ /* SC_HYPERVISOR_MIN_WORKERS, 2, */ /* SC_HYPERVISOR_MAX_WORKERS, 4, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* printf("%d: set max %d for tag %d\n", p2.ctx, 4, task_tag); */ /* sc_hypervisor_ctl(p1.ctx, */ /* SC_HYPERVISOR_MIN_WORKERS, 6, */ /* SC_HYPERVISOR_MAX_WORKERS, 8, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* printf("%d: set max %d for tag %d\n", p1.ctx, 8, task_tag); */ /* sc_hypervisor_resize(p1.ctx, task_tag); */ /* } */ /* if(it == 2) */ /* { */ /* sc_hypervisor_ctl(p2.ctx, */ /* SC_HYPERVISOR_MIN_WORKERS, 12, */ /* SC_HYPERVISOR_MAX_WORKERS, 12, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* printf("%d: set max %d for tag %d\n", p2.ctx, 12, task_tag); */ /* sc_hypervisor_ctl(p1.ctx, */ /* SC_HYPERVISOR_MIN_WORKERS, 0, */ /* SC_HYPERVISOR_MAX_WORKERS, 0, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* printf("%d: set max %d for tag %d\n", p1.ctx, 0, task_tag); */ /* sc_hypervisor_resize(p1.ctx, task_tag); */ /* } */ /* it++; */ /* } */ /* } */ /* else */ /* { */ /* if(event == END_BENCH) */ /* { */ /* if(it2 < 3) */ /* { */ /* sc_hypervisor_ctl(p1.ctx, */ /* SC_HYPERVISOR_MIN_WORKERS, 6, */ /* SC_HYPERVISOR_MAX_WORKERS, 12, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* printf("%d: set max %d for tag %d\n", p1.ctx, 12, task_tag); */ /* sc_hypervisor_ctl(p2.ctx, */ /* SC_HYPERVISOR_MIN_WORKERS, 0, */ /* SC_HYPERVISOR_MAX_WORKERS, 0, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* printf("%d: set max %d for tag %d\n", p2.ctx, 0, task_tag); */ /* sc_hypervisor_resize(p2.ctx, task_tag); */ /* } */ /* it2++; */ /* } */ /* } */ /* if(*id == 1) */ /* { */ /* if(event == START_BENCH) */ /* { */ /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ /* sc_hypervisor_ctl(p1.ctx, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 800000.0, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* } */ /* else */ /* { */ /* if(it2 < 2) */ /* { */ /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ /* sc_hypervisor_ctl(p2.ctx, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 500.0, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 200.0, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* } */ /* if(it2 == 2) */ /* { */ /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ /* sc_hypervisor_ctl(p2.ctx, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 1000.0, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 500.0, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* SC_HYPERVISOR_MAX_WORKERS, 12, */ /* NULL); */ /* } */ /* it2++; */ /* } */ /* } else { */ /* if(event == START_BENCH) */ /* { */ /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ /* sc_hypervisor_ctl(p1.ctx, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 1500.0, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 4000.0, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* } */ /* if(event == END_BENCH) */ /* { */ /* if(it < 2) */ /* { */ /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ /* sc_hypervisor_ctl(p1.ctx, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 100.0, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 5000.0, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* } */ /* if(it == 2) */ /* { */ /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */ /* sc_hypervisor_ctl(p1.ctx, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 5000.0, */ /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 10000.0, */ /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */ /* NULL); */ /* } */ /* it++; */ /* } */ /* } */ } void end_contexts() { free(p1.workers); free(p2.workers); sc_hypervisor_shutdown(); } void parse_args_ctx(int argc, char **argv) { init(); int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-size1") == 0) { char *argptr; size1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks1") == 0) { char *argptr; nblocks1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-size2") == 0) { char *argptr; size2 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks2") == 0) { char *argptr; nblocks2 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-cpu1") == 0) { char *argptr; cpu1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-cpu2") == 0) { char *argptr; cpu2 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-gpu") == 0) { char *argptr; gpu = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-gpu1") == 0) { char *argptr; gpu1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-gpu2") == 0) { char *argptr; gpu2 = strtol(argv[++i], &argptr, 10); } } } starpu-1.4.9+dfsg/sc_hypervisor/examples/sched_ctx_utils/sched_ctx_utils.h000066400000000000000000000024101507764646700272430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #define START_BENCH 0 #define END_BENCH 1 void parse_args_ctx(int argc, char **argv); void update_sched_ctx_timing_results(double gflops, double timing); void construct_contexts(); void end_contexts(void); void start_2benchs(void (*bench)(float *mat, unsigned size, unsigned nblocks)); void start_1stbench(void (*bench)(float *mat, unsigned size, unsigned nblocks)); void start_2ndbench(void (*bench)(float *mat, unsigned size, unsigned nblocks)); void set_hypervisor_conf(int event, int task_tag); starpu-1.4.9+dfsg/sc_hypervisor/include/000077500000000000000000000000001507764646700203325ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/include/sc_hypervisor.h000066400000000000000000000215701507764646700234070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SC_HYPERVISOR_H #define SC_HYPERVISOR_H #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /** @ingroup API_SC_Hypervisor Methods to implement a hypervisor resizing policy. */ struct sc_hypervisor_policy { /** Indicate the name of the policy, if there is not a custom policy, the policy corresponding to this name will be used by the hypervisor */ const char *name; /** Indicate whether the policy is custom or not */ unsigned custom; /** Distribute workers to contexts even at the beginning of the program */ void (*size_ctxs)(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); /** Require explicit resizing */ void (*resize_ctxs)(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); /** Called whenever the indicated worker executes another idle cycle in sched_ctx */ void (*handle_idle_cycle)(unsigned sched_ctx, int worker); /** Called whenever a task is pushed on the worker’s queue corresponding to the context sched_ctx */ void (*handle_pushed_task)(unsigned sched_ctx, int worker); /** Called whenever a task is poped from the worker’s queue corresponding to the context sched_ctx */ void (*handle_poped_task)(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint); /** Called whenever a task is executed on the indicated worker and context after a long period of idle time */ void (*handle_idle_end)(unsigned sched_ctx, int worker); /** Called whenever a tag task has just been executed. The table of resize requests is provided as well as the tag */ void (*handle_post_exec_hook)(unsigned sched_ctx, int task_tag); /** the hypervisor takes a decision when a job was submitted in this ctx */ void (*handle_submitted_job)(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size); /** the hypervisor takes a decision when a certain ctx was deleted */ void (*end_ctx)(unsigned sched_ctx); /** the hypervisor takes a decision when a certain ctx was registered */ void (*start_ctx)(unsigned sched_ctx); /** the hypervisor initializes values for the workers */ void (*init_worker)(int workerid, unsigned sched_ctx); }; /** @defgroup API_SC_Hypervisor_usage Scheduling Context Hypervisor - Regular usage There is a single hypervisor that is in charge of resizing contexts and the resizing strategy is chosen at the initialization of the hypervisor. A single resize can be done at a time. The Scheduling Context Hypervisor Plugin provides a series of performance counters to StarPU. By incrementing them, StarPU can help the hypervisor in the resizing decision making process. The function sc_hypervisor_init() initializes the hypervisor to use the strategy provided as parameter and creates the performance counters (see starpu_sched_ctx_performance_counters). These performance counters represent actually some callbacks that will be used by the contexts to notify the information needed by the hypervisor. Scheduling Contexts that have to be resized by the hypervisor must be first registered to the hypervisor using the function sc_hypervisor_register_ctx() Note: The Hypervisor is actually a worker that takes this role once certain conditions trigger the resizing process (there is no additional thread assigned to the hypervisor). @{ */ /** synchronise the hypervisor when several workers try to update its information */ extern starpu_pthread_mutex_t act_hypervisor_mutex; /** Start the hypervisor with the given policy */ void *sc_hypervisor_init(struct sc_hypervisor_policy *policy); /** Shutdown the hypervisor. The hypervisor and all information concerning it is cleaned. There is no synchronization between this function and starpu_shutdown(). Thus, this should be called after starpu_shutdown(), because the performance counters will still need allocated callback functions. */ void sc_hypervisor_shutdown(void); /** Register the context to the hypervisor, and indicate the number of flops the context will execute (used for Gflops rate based strategy) */ void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops); /** Unregister a context from the hypervisor, and so exclude the context from the resizing process */ void sc_hypervisor_unregister_ctx(unsigned sched_ctx); /** Require resizing the context \p sched_ctx whenever a task tagged with the id \p task_tag finished executing */ void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag); /** Require reconsidering the distribution of resources over the indicated scheduling contexts, i.e reevaluate the distribution of the resources and eventually resize if needed */ void sc_hypervisor_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); /** Do not allow the hypervisor to resize a context. */ void sc_hypervisor_stop_resize(unsigned sched_ctx); /** Allow the hypervisor to resize a context if necessary. */ void sc_hypervisor_start_resize(unsigned sched_ctx); /** Return the name of the resizing policy used by the hypervisor */ const char *sc_hypervisor_get_policy(void); /** Ask the hypervisor to add workers to a sched_ctx */ void sc_hypervisor_add_workers_to_sched_ctx(int *workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx); /** Ask the hypervisor to remove workers from a sched_ctx */ void sc_hypervisor_remove_workers_from_sched_ctx(int *workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now); /** Ask the hypervisor to move workers from one context to another */ void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int *workers_to_move, unsigned nworkers_to_move, unsigned now); /** Ask the hypervisor to choose a distribution of workers in the required contexts */ void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); /** Check if there are pending demands of resizing */ unsigned sc_hypervisor_get_size_req(unsigned **sched_ctxs, int *nsched_ctxs, int **workers, int *nworkers); /** Save a demand of resizing */ void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); /** Clear the list of pending demands of resizing */ void sc_hypervisor_free_size_req(void); /** Check out if a context can be resized */ unsigned sc_hypervisor_can_resize(unsigned sched_ctx); /** Indicate the types of tasks a context will execute in order to better decide the sizing of ctxs */ void sc_hypervisor_set_type_of_task(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size); /** Change dynamically the total number of flops of a context, move the deadline of the finishing time of the context */ void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops); /** Change dynamically the number of the elapsed flops in a context, modify the past in order to better compute the speed */ void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_task_flops); /** Update the min and max workers needed by each context */ void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs, int max_nworkers); /** Return a list of contexts that are on the same level in the hierarchy of contexts */ void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id); /** Returns the number of levels of ctxs registered to the hyp */ unsigned sc_hypervisor_get_nhierarchy_levels(void); /** Return the leaves ctxs from the list of ctxs */ void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *leaves, int *nleaves); /** Return the nready flops of all ctxs below in hierarchy of sched_ctx */ double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx); void sc_hypervisor_print_overhead(void); void sc_hypervisor_init_worker(int workerid, unsigned sched_ctx); /** @} */ #ifdef __cplusplus } #endif #endif starpu-1.4.9+dfsg/sc_hypervisor/include/sc_hypervisor_config.h000066400000000000000000000147461507764646700247430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SC_HYPERVISOR_CONFIG_H #define SC_HYPERVISOR_CONFIG_H #include #ifdef __cplusplus extern "C" { #endif /** @ingroup API_SC_Hypervisor @{ */ /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 3 arguments: an array of int for the workerids to apply the condition, an int to indicate the size of the array, and a double value indicating the maximum idle time allowed for a worker before the resizing process should be triggered */ #define SC_HYPERVISOR_MAX_IDLE -1 #define SC_HYPERVISOR_MIN_WORKING -2 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 3 arguments: an array of int for the workerids to apply the condition, an int to indicate the size of the array, and an int value indicating the priority of the workers previously mentioned. The workers with the smallest priority are moved the first. */ #define SC_HYPERVISOR_PRIORITY -3 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 1 argument(int) indicating the minimum number of workers a context should have, underneath this limit the context cannot execute. */ #define SC_HYPERVISOR_MIN_WORKERS -4 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 1 argument(int) indicating the maximum number of workers a context should have, above this limit the context would not be able to scale */ #define SC_HYPERVISOR_MAX_WORKERS -5 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 1 argument(int) indicating the granularity of the resizing process (the number of workers should be moved from the context once it is resized) This parameter is ignore for the Gflops rate based strategy (see \ref ResizingStrategies), the number of workers that have to be moved is calculated by the strategy. */ #define SC_HYPERVISOR_GRANULARITY -6 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 2 arguments: an array of int for the workerids to apply the condition and an int to indicate the size of the array. These workers are not allowed to be moved from the context. */ #define SC_HYPERVISOR_FIXED_WORKERS -7 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 1 argument (int) that indicated the minimum number of tasks that have to be executed before the context could be resized. This parameter is ignored for the Application Driven strategy (see \ref ResizingStrategies) where the user indicates exactly when the resize should be done. */ #define SC_HYPERVISOR_MIN_TASKS -8 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 1 argument, a double value indicating the maximum idle time allowed for workers that have just been moved from other contexts in the current context. */ #define SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE -9 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 1 argument (int) indicating the tag an executed task should have such that this configuration should be taken into account. */ #define SC_HYPERVISOR_TIME_TO_APPLY -10 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 1 argument */ #define SC_HYPERVISOR_NULL -11 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 1 argument, a double, that indicates the number of flops needed to be executed before computing the speed of a worker */ #define SC_HYPERVISOR_ISPEED_W_SAMPLE -12 /** This macro is used when calling sc_hypervisor_ctl() and must be followed by 1 argument, a double, that indicates the number of flops needed to be executed before computing the speed of a context */ #define SC_HYPERVISOR_ISPEED_CTX_SAMPLE -13 #define SC_HYPERVISOR_TIME_SAMPLE -14 #define MAX_IDLE_TIME 5000000000 #define MIN_WORKING_TIME 500 /** Methods that implement a hypervisor resizing policy. */ struct sc_hypervisor_policy_config { /** Indicate the minimum number of workers needed by the context */ int min_nworkers; /** Indicate the maximum number of workers needed by the context */ int max_nworkers; /** Indicate the workers granularity of the context */ int granularity; /** Indicate the priority of each worker to stay in the context the smaller the priority the faster it will be moved to another context */ int priority[STARPU_NMAXWORKERS]; /** Indicate the maximum idle time accepted before a resize is triggered above this limit the priority of the worker is reduced */ double max_idle[STARPU_NMAXWORKERS]; /** Indicate that underneath this limit the priority of the worker is reduced */ double min_working[STARPU_NMAXWORKERS]; /** Indicate which workers can be moved and which ones are fixed */ int fixed_workers[STARPU_NMAXWORKERS]; /** Indicate the maximum idle time accepted before a resize is triggered for the workers that just arrived in the new context */ double new_workers_max_idle; /** Indicate the sample used to compute the instant speed per worker */ double ispeed_w_sample[STARPU_NMAXWORKERS]; /** Indicate the sample used to compute the instant speed per ctxs */ double ispeed_ctx_sample; /** Indicate the sample used to compute the instant speed per ctx (in seconds) */ double time_sample; }; /** Specify the configuration for a context */ void sc_hypervisor_set_config(unsigned sched_ctx, void *config); /** Return the configuration of a context */ struct sc_hypervisor_policy_config *sc_hypervisor_get_config(unsigned sched_ctx); /** Specify different parameters for the configuration of a context. The list must be zero-terminated */ void sc_hypervisor_ctl(unsigned sched_ctx, ...); /** @} */ #ifdef __cplusplus } #endif #endif starpu-1.4.9+dfsg/sc_hypervisor/include/sc_hypervisor_lp.h000066400000000000000000000116041507764646700240770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SC_HYPERVISOR_LP_H #define SC_HYPERVISOR_LP_H #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_SC_Hypervisor_LP Scheduling Context Hypervisor - Linear Programming @{ */ #ifdef STARPU_HAVE_GLPK_H #include #endif //STARPU_HAVE_GLPK_H struct sc_hypervisor_policy_task_pool; struct types_of_workers; /** return tmax, and compute in table res the nr of workers needed by each context st the system ends up in the smallest tma */ double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers], struct types_of_workers *tw, unsigned *in_sched_ctxs); /** return tmax of the system */ double sc_hypervisor_lp_get_tmax(int nw, int *workers); /** the linear programme determines a rational number of resources for each ctx, we round them depending on the type of resource */ void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw]); /** redistribute the resource in contexts by assigning the first x available resources to each one */ void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], unsigned *sched_ctxs, struct types_of_workers *tw); /** make the first distribution of resource in contexts by assigning the first x available resources to each one */ void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned *sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw); /** make the first distribution of resource in contexts by assigning the first x available resources to each one, share not integer no of workers */ void sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(unsigned *sched_ctxs, int ns, int nw, double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw); /** place resources in contexts depending on whether they already have workers or not */ void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs, int *workers, unsigned do_size, struct types_of_workers *tw); /** not used resources are shared between all contexts */ void sc_hypervisor_lp_share_remaining_resources(int ns, unsigned *sched_ctxs, int nworkers, int *workers); /** dichotomy btw t1 & t2 */ double sc_hypervisor_lp_find_tmax(double t1, double t2); /** execute the lp through dichotomy */ unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw], unsigned solve_lp_integer, void *specific_data, double tmin, double tmax, double smallest_tmax, double (*lp_estimated_distrib_func)(int lns, int lnw, double ldraft_w_in_s[ns][nw], unsigned lis_integer, double ltmax, void *lspecifc_data)); #ifdef STARPU_HAVE_GLPK_H /** linear program that returns 1/tmax, and computes in table res the nr of workers needed by each context st the system ends up in the smallest tmax */ double sc_hypervisor_lp_simulate_distrib_flops(int nsched_ctxs, int ntypes_of_workers, double speed[nsched_ctxs][ntypes_of_workers], double flops[nsched_ctxs], double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers], unsigned sched_ctxs[nsched_ctxs], double vmax); /** linear program that simulates a distribution of tasks that minimises the execution time of the tasks in the pool */ double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt], double times[nw][nt], unsigned is_integer, double tmax, unsigned *in_sched_ctxs, struct sc_hypervisor_policy_task_pool *tmp_task_pools); /** linear program that simulates a distribution of flops over the workers on particular sample of the execution of the application such that the entire sample would finish in a minimum amount of time */ double sc_hypervisor_lp_simulate_distrib_flops_on_sample(int ns, int nw, double final_w_in_s[ns][nw], unsigned is_integer, double tmax, double **speed, double flops[ns], double **final_flops_on_w); #endif // STARPU_HAVE_GLPK_H /** @} */ #ifdef __cplusplus } #endif #endif starpu-1.4.9+dfsg/sc_hypervisor/include/sc_hypervisor_monitoring.h000066400000000000000000000145501507764646700256540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2015-2015 Mathieu Lirzin * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SC_HYPERVISOR_MONITORING_H #define SC_HYPERVISOR_MONITORING_H #include #ifdef __cplusplus extern "C" { #endif /** @ingroup API_SC_Hypervisor @{ */ /** Structure to check if the workers moved to another context are actually taken into account in that context. */ struct sc_hypervisor_resize_ack { /** The context receiving the new workers */ int receiver_sched_ctx; /** List of workers required to be moved */ int *moved_workers; /** Number of workers required to be moved */ int nmoved_workers; /** List of workers that actually got in the receiver ctx. If the value corresponding to a worker is 1, this worker got moved in the new context. */ int *acked_workers; }; /** Wrapper of the contexts available in StarPU which contains all information about a context obtained by incrementing the performance counters. it is attached to a sched_ctx storing monitoring information */ struct sc_hypervisor_wrapper { /** the monitored context */ unsigned sched_ctx; /** The corresponding resize configuration */ struct sc_hypervisor_policy_config *config; /** the start time of the resizing sample of the workers of this context */ double start_time_w[STARPU_NMAXWORKERS]; /** The idle time counter of each worker of the context */ double current_idle_time[STARPU_NMAXWORKERS]; /** The time the workers were idle from the last resize */ double idle_time[STARPU_NMAXWORKERS]; /** The moment when the workers started being idle */ double idle_start_time[STARPU_NMAXWORKERS]; /** Time during which the worker executed tasks */ double exec_time[STARPU_NMAXWORKERS]; /** Time when the worker started executing a task */ double exec_start_time[STARPU_NMAXWORKERS]; /** List of workers that will leave the context (lazy resizing process) */ int worker_to_be_removed[STARPU_NMAXWORKERS]; /** Number of tasks pushed on each worker in this context */ int pushed_tasks[STARPU_NMAXWORKERS]; /** Number of tasks poped from each worker in this context */ int poped_tasks[STARPU_NMAXWORKERS]; /** The total number of flops to execute by the context */ double total_flops; /** The number of flops executed by each workers of the context */ double total_elapsed_flops[STARPU_NMAXWORKERS]; /** number of flops executed since last resizing */ double elapsed_flops[STARPU_NMAXWORKERS]; /** Quantity of data (in bytes) used to execute tasks on each worker in this context */ size_t elapsed_data[STARPU_NMAXWORKERS]; /** Number of tasks executed on each worker in this context */ int elapsed_tasks[STARPU_NMAXWORKERS]; /** the average speed of the type of workers when they belonged to this context 0 - cuda 1 - cpu */ double ref_speed[2]; /** Number of flops submitted to this context */ double submitted_flops; /** Number of flops that still have to be executed by the workers in this context */ double remaining_flops; /** Start time of the resizing sample of this context */ double start_time; /** First time a task was pushed to this context */ double real_start_time; /** Start time for sample in which the hypervisor is not allowed to react bc too expensive */ double hyp_react_start_time; /** Structure confirming the last resize finished and a new one can be done. Workers do not leave the current context until the receiver context does not ack the receive of these workers */ struct sc_hypervisor_resize_ack resize_ack; /** Mutex needed to synchronize the acknowledgment of the workers into the receiver context */ starpu_pthread_mutex_t mutex; /** Boolean indicating if the hypervisor can use the flops corresponding to the entire execution of the context */ unsigned total_flops_available; /** boolean indicating that a context is being sized */ unsigned to_be_sized; /** Boolean indicating if we add the idle of this worker to the idle of the context */ unsigned compute_idle[STARPU_NMAXWORKERS]; /** Boolean indicating if we add the entiere idle of this worker to the idle of the context or just half */ unsigned compute_partial_idle[STARPU_NMAXWORKERS]; /** consider the max in the lp */ unsigned consider_max; }; /** Return the wrapper of the given context @ingroup API_SC_Hypervisor */ struct sc_hypervisor_wrapper *sc_hypervisor_get_wrapper(unsigned sched_ctx); /** Get the list of registered contexts @ingroup API_SC_Hypervisor */ unsigned *sc_hypervisor_get_sched_ctxs(void); /** Get the number of registered contexts @ingroup API_SC_Hypervisor */ int sc_hypervisor_get_nsched_ctxs(void); /** Get the number of workers of a certain architecture in a context */ int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archtype arch); /** Get the number of flops executed by a context since last resizing (reset to 0 when a resizing is done) @ingroup API_SC_Hypervisor */ double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper *sc_w); /** Get the number of flops executed by a context since the beginning */ double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper *sc_w); /** Compute an average value of the cpu/cuda speed */ double sc_hypervisorsc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch); /** Compte the actual speed of all workers of a specific type of worker */ double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch); /** @} */ #ifdef __cplusplus } #endif #endif starpu-1.4.9+dfsg/sc_hypervisor/include/sc_hypervisor_policy.h000066400000000000000000000146541507764646700247730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SC_HYPERVISOR_POLICY_H #define SC_HYPERVISOR_POLICY_H #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_SC_Hypervisor Scheduling Context Hypervisor - Building a new resizing policy @{ */ #define HYPERVISOR_REDIM_SAMPLE 0.02 #define HYPERVISOR_START_REDIM_SAMPLE 0.1 #define SC_NOTHING 0 #define SC_IDLE 1 #define SC_SPEED 2 struct types_of_workers { unsigned ncpus; unsigned ncuda; unsigned nw; }; /** Task wrapper linked list @ingroup API_SC_Hypervisor */ struct sc_hypervisor_policy_task_pool { /** Which codelet has been executed */ struct starpu_codelet *cl; /** Task footprint key */ uint32_t footprint; /** Context the task belongs to */ unsigned sched_ctx_id; /** Number of tasks of this kind */ unsigned long n; /** The quantity of data(in bytes) needed by the task to execute */ size_t data_size; /** Other task kinds */ struct sc_hypervisor_policy_task_pool *next; }; /** add task information to a task wrapper linked list */ void sc_hypervisor_policy_add_task_to_pool(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, struct sc_hypervisor_policy_task_pool **task_pools, size_t data_size); /** remove task information from a task wrapper linked list */ void sc_hypervisor_policy_remove_task_from_pool(struct starpu_task *task, uint32_t footprint, struct sc_hypervisor_policy_task_pool **task_pools); /** clone a task wrapper linked list */ struct sc_hypervisor_policy_task_pool *sc_hypervisor_policy_clone_task_pool(struct sc_hypervisor_policy_task_pool *tp); /** get the execution time of the submitted tasks out of starpu's calibration files */ void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools); /** find the context with the lowest priority in order to move some workers */ unsigned sc_hypervisor_find_lowest_prio_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move); /** find the first most idle workers of a context */ int *sc_hypervisor_get_idlest_workers(unsigned sched_ctx, int *nworkers, enum starpu_worker_archtype arch); /** find the first most idle workers in a list */ int *sc_hypervisor_get_idlest_workers_in_list(int *start, int *workers, int nall_workers, int *nworkers, enum starpu_worker_archtype arch); /** find workers that can be moved from a context (if the constraints of min, max, etc allow this) */ int sc_hypervisor_get_movable_nworkers(struct sc_hypervisor_policy_config *config, unsigned sched_ctx, enum starpu_worker_archtype arch); /** compute how many workers should be moved from this context */ int sc_hypervisor_compute_nworkers_to_move(unsigned req_sched_ctx); /** check the policy's constraints in order to resize */ unsigned sc_hypervisor_policy_resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize, unsigned now); /** check the policy's constraints in order to resize and find a context willing the resources */ unsigned sc_hypervisor_policy_resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now); /** compute the speed of a context */ double sc_hypervisor_get_ctx_speed(struct sc_hypervisor_wrapper *sc_w); /** get the time of execution of the slowest context */ double sc_hypervisor_get_slowest_ctx_exec_time(void); /** get the time of execution of the fastest context */ double sc_hypervisor_get_fastest_ctx_exec_time(void); /** compute the speed of a workers in a context */ double sc_hypervisor_get_speed_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker); /** compute the speed of a type of worker in a context */ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch); /** compute the speed of a type of worker in a context depending on its history */ double sc_hypervisor_get_ref_speed_per_worker_type(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch); /** compute the average speed of a type of worker in all ctxs from the beginning of appl */ double sc_hypervisor_get_avg_speed(enum starpu_worker_archtype arch); /** verify if we need to consider the max in the lp */ void sc_hypervisor_check_if_consider_max(struct types_of_workers *tw); /** get the list of workers grouped by type */ void sc_hypervisor_group_workers_by_type(struct types_of_workers *tw, int *total_nw); /** get what type of worker corresponds to a certain index of types of workers */ enum starpu_worker_archtype sc_hypervisor_get_arch_for_index(unsigned w, struct types_of_workers *tw); /** get the index of types of workers corresponding to the type of workers indicated */ unsigned sc_hypervisor_get_index_for_arch(enum starpu_worker_archtype arch, struct types_of_workers *tw); /** check if we trigger resizing or not */ unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker); /** check if worker was idle long enough */ unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker); /** check if there is a speed gap btw ctxs */ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers); /** check if there is a speed gap btw ctxs on one level */ unsigned sc_hypervisor_check_speed_gap_btw_ctxs_on_level(int level, int *workers_in, int nworkers_in, unsigned father_sched_ctx_id, unsigned **sched_ctxs, int *nsched_ctxs); /** check what triggers resizing (idle, speed, etc. */ unsigned sc_hypervisor_get_resize_criteria(void); /** load information concerning the type of workers into a types_of_workers struct */ struct types_of_workers *sc_hypervisor_get_types_of_workers(int *workers, unsigned nworkers); /** @} */ #ifdef __cplusplus } #endif #endif starpu-1.4.9+dfsg/sc_hypervisor/src/000077500000000000000000000000001507764646700174765ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/src/Makefile.am000066400000000000000000000033151507764646700215340ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/sc_hypervisor/include/ -I$(top_srcdir)/sc_hypervisor/src $(STARPU_H_CPPFLAGS) LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) lib_LTLIBRARIES = libsc_hypervisor.la libsc_hypervisor_la_SOURCES = \ sc_hypervisor.c \ sc_config.c \ policies_utils/policy_tools.c \ policies_utils/speed.c \ policies_utils/task_pool.c \ policies_utils/lp_tools.c \ policies_utils/lp_programs.c \ policies_utils/dichotomy.c \ hypervisor_policies/idle_policy.c \ hypervisor_policies/app_driven_policy.c \ hypervisor_policies/gflops_rate_policy.c \ hypervisor_policies/feft_lp_policy.c \ hypervisor_policies/teft_lp_policy.c \ hypervisor_policies/ispeed_policy.c \ hypervisor_policies/ispeed_lp_policy.c \ hypervisor_policies/throughput_lp_policy.c \ hypervisor_policies/hard_coded_policy.c \ hypervisor_policies/perf_count_policy.c noinst_HEADERS = \ sc_hypervisor_intern.h \ uthash.h starpu-1.4.9+dfsg/sc_hypervisor/src/Makefile.in000066400000000000000000001215751507764646700215560ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = sc_hypervisor/src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(libdir)" LTLIBRARIES = $(lib_LTLIBRARIES) libsc_hypervisor_la_LIBADD = am__dirstamp = $(am__leading_dot)dirstamp am_libsc_hypervisor_la_OBJECTS = sc_hypervisor.lo sc_config.lo \ policies_utils/policy_tools.lo policies_utils/speed.lo \ policies_utils/task_pool.lo policies_utils/lp_tools.lo \ policies_utils/lp_programs.lo policies_utils/dichotomy.lo \ hypervisor_policies/idle_policy.lo \ hypervisor_policies/app_driven_policy.lo \ hypervisor_policies/gflops_rate_policy.lo \ hypervisor_policies/feft_lp_policy.lo \ hypervisor_policies/teft_lp_policy.lo \ hypervisor_policies/ispeed_policy.lo \ hypervisor_policies/ispeed_lp_policy.lo \ hypervisor_policies/throughput_lp_policy.lo \ hypervisor_policies/hard_coded_policy.lo \ hypervisor_policies/perf_count_policy.lo libsc_hypervisor_la_OBJECTS = $(am_libsc_hypervisor_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/sc_config.Plo \ ./$(DEPDIR)/sc_hypervisor.Plo \ hypervisor_policies/$(DEPDIR)/app_driven_policy.Plo \ hypervisor_policies/$(DEPDIR)/feft_lp_policy.Plo \ hypervisor_policies/$(DEPDIR)/gflops_rate_policy.Plo \ hypervisor_policies/$(DEPDIR)/hard_coded_policy.Plo \ hypervisor_policies/$(DEPDIR)/idle_policy.Plo \ hypervisor_policies/$(DEPDIR)/ispeed_lp_policy.Plo \ hypervisor_policies/$(DEPDIR)/ispeed_policy.Plo \ hypervisor_policies/$(DEPDIR)/perf_count_policy.Plo \ hypervisor_policies/$(DEPDIR)/teft_lp_policy.Plo \ hypervisor_policies/$(DEPDIR)/throughput_lp_policy.Plo \ policies_utils/$(DEPDIR)/dichotomy.Plo \ policies_utils/$(DEPDIR)/lp_programs.Plo \ policies_utils/$(DEPDIR)/lp_tools.Plo \ policies_utils/$(DEPDIR)/policy_tools.Plo \ policies_utils/$(DEPDIR)/speed.Plo \ policies_utils/$(DEPDIR)/task_pool.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(libsc_hypervisor_la_SOURCES) DIST_SOURCES = $(libsc_hypervisor_la_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac HEADERS = $(noinst_HEADERS) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) AM_CPPFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/sc_hypervisor/include/ -I$(top_srcdir)/sc_hypervisor/src $(STARPU_H_CPPFLAGS) lib_LTLIBRARIES = libsc_hypervisor.la libsc_hypervisor_la_SOURCES = \ sc_hypervisor.c \ sc_config.c \ policies_utils/policy_tools.c \ policies_utils/speed.c \ policies_utils/task_pool.c \ policies_utils/lp_tools.c \ policies_utils/lp_programs.c \ policies_utils/dichotomy.c \ hypervisor_policies/idle_policy.c \ hypervisor_policies/app_driven_policy.c \ hypervisor_policies/gflops_rate_policy.c \ hypervisor_policies/feft_lp_policy.c \ hypervisor_policies/teft_lp_policy.c \ hypervisor_policies/ispeed_policy.c \ hypervisor_policies/ispeed_lp_policy.c \ hypervisor_policies/throughput_lp_policy.c \ hypervisor_policies/hard_coded_policy.c \ hypervisor_policies/perf_count_policy.c noinst_HEADERS = \ sc_hypervisor_intern.h \ uthash.h all: all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sc_hypervisor/src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign sc_hypervisor/src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; \ locs=`for p in $$list; do echo $$p; done | \ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ sort -u`; \ test -z "$$locs" || { \ echo rm -f $${locs}; \ rm -f $${locs}; \ } policies_utils/$(am__dirstamp): @$(MKDIR_P) policies_utils @: > policies_utils/$(am__dirstamp) policies_utils/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) policies_utils/$(DEPDIR) @: > policies_utils/$(DEPDIR)/$(am__dirstamp) policies_utils/policy_tools.lo: policies_utils/$(am__dirstamp) \ policies_utils/$(DEPDIR)/$(am__dirstamp) policies_utils/speed.lo: policies_utils/$(am__dirstamp) \ policies_utils/$(DEPDIR)/$(am__dirstamp) policies_utils/task_pool.lo: policies_utils/$(am__dirstamp) \ policies_utils/$(DEPDIR)/$(am__dirstamp) policies_utils/lp_tools.lo: policies_utils/$(am__dirstamp) \ policies_utils/$(DEPDIR)/$(am__dirstamp) policies_utils/lp_programs.lo: policies_utils/$(am__dirstamp) \ policies_utils/$(DEPDIR)/$(am__dirstamp) policies_utils/dichotomy.lo: policies_utils/$(am__dirstamp) \ policies_utils/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/$(am__dirstamp): @$(MKDIR_P) hypervisor_policies @: > hypervisor_policies/$(am__dirstamp) hypervisor_policies/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) hypervisor_policies/$(DEPDIR) @: > hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/idle_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/app_driven_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/gflops_rate_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/feft_lp_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/teft_lp_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/ispeed_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/ispeed_lp_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/throughput_lp_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/hard_coded_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) hypervisor_policies/perf_count_policy.lo: \ hypervisor_policies/$(am__dirstamp) \ hypervisor_policies/$(DEPDIR)/$(am__dirstamp) libsc_hypervisor.la: $(libsc_hypervisor_la_OBJECTS) $(libsc_hypervisor_la_DEPENDENCIES) $(EXTRA_libsc_hypervisor_la_DEPENDENCIES) $(AM_V_CCLD)$(LINK) -rpath $(libdir) $(libsc_hypervisor_la_OBJECTS) $(libsc_hypervisor_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f hypervisor_policies/*.$(OBJEXT) -rm -f hypervisor_policies/*.lo -rm -f policies_utils/*.$(OBJEXT) -rm -f policies_utils/*.lo distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sc_config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sc_hypervisor.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/app_driven_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/feft_lp_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/gflops_rate_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/hard_coded_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/idle_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/ispeed_lp_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/ispeed_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/perf_count_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/teft_lp_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@hypervisor_policies/$(DEPDIR)/throughput_lp_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/dichotomy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/lp_programs.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/lp_tools.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/policy_tools.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/speed.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@policies_utils/$(DEPDIR)/task_pool.Plo@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf hypervisor_policies/.libs hypervisor_policies/_libs -rm -rf policies_utils/.libs policies_utils/_libs ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f hypervisor_policies/$(DEPDIR)/$(am__dirstamp) -rm -f hypervisor_policies/$(am__dirstamp) -rm -f policies_utils/$(DEPDIR)/$(am__dirstamp) -rm -f policies_utils/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ mostlyclean-am distclean: distclean-am -rm -f ./$(DEPDIR)/sc_config.Plo -rm -f ./$(DEPDIR)/sc_hypervisor.Plo -rm -f hypervisor_policies/$(DEPDIR)/app_driven_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/feft_lp_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/gflops_rate_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/hard_coded_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/idle_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/ispeed_lp_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/ispeed_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/perf_count_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/teft_lp_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/throughput_lp_policy.Plo -rm -f policies_utils/$(DEPDIR)/dichotomy.Plo -rm -f policies_utils/$(DEPDIR)/lp_programs.Plo -rm -f policies_utils/$(DEPDIR)/lp_tools.Plo -rm -f policies_utils/$(DEPDIR)/policy_tools.Plo -rm -f policies_utils/$(DEPDIR)/speed.Plo -rm -f policies_utils/$(DEPDIR)/task_pool.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-libLTLIBRARIES install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/sc_config.Plo -rm -f ./$(DEPDIR)/sc_hypervisor.Plo -rm -f hypervisor_policies/$(DEPDIR)/app_driven_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/feft_lp_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/gflops_rate_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/hard_coded_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/idle_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/ispeed_lp_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/ispeed_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/perf_count_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/teft_lp_policy.Plo -rm -f hypervisor_policies/$(DEPDIR)/throughput_lp_policy.Plo -rm -f policies_utils/$(DEPDIR)/dichotomy.Plo -rm -f policies_utils/$(DEPDIR)/lp_programs.Plo -rm -f policies_utils/$(DEPDIR)/lp_tools.Plo -rm -f policies_utils/$(DEPDIR)/policy_tools.Plo -rm -f policies_utils/$(DEPDIR)/speed.Plo -rm -f policies_utils/$(DEPDIR)/task_pool.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-libLTLIBRARIES .MAKE: install-am install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ clean-generic clean-libLTLIBRARIES clean-libtool cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-libLTLIBRARIES install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/000077500000000000000000000000001507764646700235775ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/app_driven_policy.c000066400000000000000000000023341507764646700274530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include static void app_driven_handle_post_exec_hook(unsigned sched_ctx, __attribute__((unused)) int task_tag) { sc_hypervisor_policy_resize_to_unknown_receiver(sched_ctx, 1); } struct sc_hypervisor_policy app_driven_policy = { .size_ctxs = NULL, .handle_poped_task = NULL, .handle_pushed_task = NULL, .handle_idle_cycle = NULL, .handle_idle_end = NULL, .handle_post_exec_hook = app_driven_handle_post_exec_hook, .handle_submitted_job = NULL, .end_ctx = NULL, .init_worker = NULL, .custom = 0, .name = "app_driven" }; starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/feft_lp_policy.c000066400000000000000000000247731507764646700267560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_lp.h" #include "sc_hypervisor_policy.h" #include #include unsigned long resize_no = 0; #ifdef STARPU_HAVE_GLPK_H static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) { /* for vite */ int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("resize_no = %lu %d ctxs\n", resize_no, ns); #endif if(ns <= 0) return; unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs; unsigned curr_nworkers = nworkers == -1 ? starpu_worker_get_count() : (unsigned)nworkers; struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, curr_nworkers); int nw = tw->nw; double nworkers_per_ctx[ns][nw]; int total_nw[nw]; sc_hypervisor_group_workers_by_type(tw, total_nw); struct timeval start_time; struct timeval end_time; gettimeofday(&start_time, NULL); double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw, tw, sched_ctxs); gettimeofday(&end_time, NULL); long diff_s = end_time.tv_sec - start_time.tv_sec; long diff_us = end_time.tv_usec - start_time.tv_usec; __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000.0; if(vmax != -1.0) { /* int nworkers_per_ctx_rounded[ns][nw]; */ /* sc_hypervisor_lp_round_double_to_int(ns, nw, nworkers_per_ctx, nworkers_per_ctx_rounded); */ /* // sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw); */ /* sc_hypervisor_lp_distribute_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx_rounded, nworkers_per_ctx, workers, curr_nworkers, tw); */ sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(curr_sched_ctxs, ns, nw, nworkers_per_ctx, workers, curr_nworkers, tw); sc_hypervisor_lp_share_remaining_resources(ns, curr_sched_ctxs, curr_nworkers, workers); } #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("*****finished resize \n"); #endif free(tw); return; } static void _try_resizing_hierarchically(unsigned levels, unsigned current_level, unsigned *sched_ctxs, unsigned nsched_ctxs, int *pus, int npus) { if(levels == 0) return; _try_resizing(sched_ctxs, nsched_ctxs, pus, npus); unsigned s; for(s = 0; s < nsched_ctxs; s++) { unsigned *sched_ctxs_child; int nsched_ctxs_child = 0; sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, current_level+1, sched_ctxs[s]); if(nsched_ctxs_child > 0) { int *pus_father; unsigned npus_father = 0; npus_father = starpu_sched_ctx_get_workers_list(sched_ctxs[s], &pus_father); _try_resizing_hierarchically(levels-1, current_level+1, sched_ctxs_child, nsched_ctxs_child, pus_father, npus_father); free(pus_father); free(sched_ctxs_child); } } return; } static unsigned _get_min_level(unsigned *sched_ctxs, int nsched_ctxs) { unsigned min = sc_hypervisor_get_nhierarchy_levels(); int s; for(s = 0; s < nsched_ctxs; s++) { unsigned level = starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]); if(level < min) min = level; } return min; } static unsigned _get_first_level(unsigned *sched_ctxs, int nsched_ctxs, unsigned *first_level, int *nsched_ctxs_first_level) { unsigned min = _get_min_level(sched_ctxs, nsched_ctxs); int s; for(s = 0; s < nsched_ctxs; s++) if(starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]) == min) first_level[(*nsched_ctxs_first_level)++] = sched_ctxs[s]; return min; } static void _resize(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) { starpu_fxt_trace_user_event(resize_no); unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); if(nhierarchy_levels > 1) { if(nsched_ctxs == -1) { unsigned *sched_ctxs2; int nsched_ctxs2; sc_hypervisor_get_ctxs_on_level(&sched_ctxs2, &nsched_ctxs2, 0, STARPU_NMAX_SCHED_CTXS); if(nsched_ctxs2 > 0) { _try_resizing_hierarchically(nhierarchy_levels, 0, sched_ctxs2, nsched_ctxs2, workers, nworkers); free(sched_ctxs2); } } else { unsigned first_level[nsched_ctxs]; int nsched_ctxs_first_level = 0; int min = _get_first_level(sched_ctxs, nsched_ctxs, first_level, &nsched_ctxs_first_level); _try_resizing_hierarchically(nhierarchy_levels, min, first_level, nsched_ctxs_first_level, workers, nworkers); } } else _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers); resize_no++; } static void _resize_if_speed_diff(unsigned sched_ctx, int worker) { (void)worker; unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); if(nhierarchy_levels > 1) { int current_level = (int)starpu_sched_ctx_get_hierarchy_level(sched_ctx); if(current_level == 0) { _resize(NULL, -1, NULL, -1); return; } unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx); int level; int *pus_father_old = NULL; unsigned npus_father_old = 0; unsigned *sched_ctxs_old = NULL; int nsched_ctxs_old = 0; unsigned is_speed_diff = 0; for(level = current_level ; level >= 0; level--) { int *pus_father = NULL; int npus_father = -1; if(level > 0) npus_father = starpu_sched_ctx_get_workers_list(father, &pus_father); unsigned *sched_ctxs = NULL; int nsched_ctxs = 0; is_speed_diff = sc_hypervisor_check_speed_gap_btw_ctxs_on_level(level, pus_father, npus_father, father, &sched_ctxs, &nsched_ctxs); if(!is_speed_diff) { if(level == current_level) { if(pus_father) free(pus_father); if(sched_ctxs) free(sched_ctxs); pus_father = NULL; sched_ctxs = NULL; break; } else { _resize(sched_ctxs_old, nsched_ctxs_old, pus_father_old, npus_father_old); if(pus_father_old) free(pus_father_old); if(sched_ctxs_old) free(sched_ctxs_old); pus_father_old = NULL; sched_ctxs_old = NULL; if(pus_father) free(pus_father); if(nsched_ctxs > 0) free(sched_ctxs); pus_father = NULL; sched_ctxs = NULL; break; } } if(pus_father_old) free(pus_father_old); if(sched_ctxs_old) free(sched_ctxs_old); pus_father_old = pus_father; sched_ctxs_old = sched_ctxs; npus_father_old = npus_father; nsched_ctxs_old = nsched_ctxs; father = level > 1 ? starpu_sched_ctx_get_inheritor(father) : STARPU_NMAX_SCHED_CTXS; } if(is_speed_diff) { if(pus_father_old) free(pus_father_old); if(sched_ctxs_old) free(sched_ctxs_old); _resize(NULL, -1, NULL, -1); } } else { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_IDLE) { _resize(NULL, -1, NULL, -1); } else { if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) _resize(NULL, -1, NULL, -1); } } return; } static void feft_lp_handle_poped_task(unsigned sched_ctx, int worker, __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) { if(worker == -2) return; unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_SPEED) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { _resize_if_speed_diff(sched_ctx, worker); STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } } static void feft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) { STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); struct sc_hypervisor_wrapper* sc_w = NULL; int s = 0; for(s = 0; s < nsched_ctxs; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); sc_w->to_be_sized = 1; } _resize(sched_ctxs, nsched_ctxs, workers, nworkers); #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("finished size ctxs\n"); #endif STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } static void _resize_leaves(int worker) { unsigned s; unsigned *sched_ctxs = NULL; unsigned nsched_ctxs = starpu_worker_get_sched_ctx_list(worker, &sched_ctxs); unsigned workers_sched_ctxs[nsched_ctxs]; unsigned nworkers_sched_ctxs = 0; struct sc_hypervisor_wrapper *sc_w = NULL; for(s = 0; s < nsched_ctxs; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS) { workers_sched_ctxs[nworkers_sched_ctxs++] = sched_ctxs[s]; } } free(sched_ctxs); unsigned leaves[nsched_ctxs]; int nleaves = 0; sc_hypervisor_get_leaves(workers_sched_ctxs, nworkers_sched_ctxs, leaves, &nleaves); int x; for(x = 0; x < nleaves; x++) _resize_if_speed_diff(leaves[x], worker); } static void feft_lp_handle_idle_cycle(unsigned sched_ctx, int worker) { (void)sched_ctx; unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING)// && criteria == SC_IDLE) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { _resize_leaves(worker); STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } } static void feft_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { struct sc_hypervisor_wrapper* sc_w = NULL; int s = 0; for(s = 0; s < nsched_ctxs; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops) { STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); return; } } _resize(sched_ctxs, nsched_ctxs, workers, nworkers); STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } struct sc_hypervisor_policy feft_lp_policy = { .size_ctxs = feft_lp_size_ctxs, .resize_ctxs = feft_lp_resize_ctxs, .handle_poped_task = feft_lp_handle_poped_task, .handle_pushed_task = NULL, .handle_idle_cycle = feft_lp_handle_idle_cycle, .handle_idle_end = NULL, .handle_post_exec_hook = NULL, .handle_submitted_job = NULL, .end_ctx = NULL, .init_worker = NULL, .custom = 0, .name = "feft_lp" }; #endif /* STARPU_HAVE_GLPK_H */ starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/gflops_rate_policy.c000066400000000000000000000250311507764646700276300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_policy.h" static double _get_total_elapsed_flops_per_sched_ctx(unsigned sched_ctx) { struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); double ret_val = 0.0; int i; for(i = 0; i < STARPU_NMAXWORKERS; i++) ret_val += sc_w->total_elapsed_flops[i]; return ret_val; } double _get_exp_end(unsigned sched_ctx) { struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(sched_ctx); double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); if(elapsed_flops >= 1.0) { double curr_time = starpu_timing_now(); double elapsed_time = curr_time - sc_w->start_time; double exp_end = (elapsed_time * sc_w->remaining_flops / elapsed_flops) + curr_time; return exp_end; } return -1.0; } /* computes the instructions left to be executed out of the total instructions to execute */ double _get_flops_left_pct(unsigned sched_ctx) { struct sc_hypervisor_wrapper *wrapper = sc_hypervisor_get_wrapper(sched_ctx); double total_elapsed_flops = _get_total_elapsed_flops_per_sched_ctx(sched_ctx); if(wrapper->total_flops == total_elapsed_flops || total_elapsed_flops > wrapper->total_flops) return 0.0; return (wrapper->total_flops - total_elapsed_flops)/wrapper->total_flops; } /* select the workers needed to be moved in order to force the sender and the receiver context to finish simultaneously */ static int* _get_workers_to_move(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int *nworkers) { struct sc_hypervisor_wrapper* sender_sc_w = sc_hypervisor_get_wrapper(sender_sched_ctx); struct sc_hypervisor_wrapper* receiver_sc_w = sc_hypervisor_get_wrapper(receiver_sched_ctx); int *workers = NULL; double v_receiver = sc_hypervisor_get_ctx_speed(receiver_sc_w); double receiver_remainig_flops = receiver_sc_w->remaining_flops; double sender_exp_end = _get_exp_end(sender_sched_ctx); double sender_v_cpu = sc_hypervisor_get_speed_per_worker_type(sender_sc_w, STARPU_CPU_WORKER); double v_for_rctx = (receiver_remainig_flops/(sender_exp_end - starpu_timing_now())) - v_receiver; int nworkers_needed = v_for_rctx/sender_v_cpu; /* printf("%d->%d: v_rec %lf v %lf v_cpu %lf w_needed %d \n", sender_sched_ctx, receiver_sched_ctx, */ /* v_receiver, v_for_rctx, sender_v_cpu, nworkers_needed); */ if(nworkers_needed > 0) { struct sc_hypervisor_policy_config *sender_config = sc_hypervisor_get_config(sender_sched_ctx); int potential_moving_cpus = sc_hypervisor_get_movable_nworkers(sender_config, sender_sched_ctx, STARPU_CPU_WORKER); int potential_moving_gpus = sc_hypervisor_get_movable_nworkers(sender_config, sender_sched_ctx, STARPU_CUDA_WORKER); int sender_nworkers = (int)starpu_sched_ctx_get_nworkers(sender_sched_ctx); struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(receiver_sched_ctx); int nworkers_ctx = (int)starpu_sched_ctx_get_nworkers(receiver_sched_ctx); if(nworkers_needed < (potential_moving_cpus + 5 * potential_moving_gpus)) { if((sender_nworkers - nworkers_needed) >= sender_config->min_nworkers) { if((nworkers_ctx + nworkers_needed) > config->max_nworkers) nworkers_needed = nworkers_ctx > config->max_nworkers ? 0 : (config->max_nworkers - nworkers_ctx); if(nworkers_needed > 0) { int ngpus = nworkers_needed / 5; int *gpus; gpus = sc_hypervisor_get_idlest_workers(sender_sched_ctx, &ngpus, STARPU_CUDA_WORKER); int ncpus = nworkers_needed - ngpus; int *cpus; cpus = sc_hypervisor_get_idlest_workers(sender_sched_ctx, &ncpus, STARPU_CPU_WORKER); workers = (int*)malloc(nworkers_needed*sizeof(int)); int i; printf("%d: gpus: ", nworkers_needed); for(i = 0; i < ngpus; i++) { workers[(*nworkers)++] = gpus[i]; printf("%d ", gpus[i]); } printf(" cpus:"); for(i = 0; i < ncpus; i++) { workers[(*nworkers)++] = cpus[i]; printf("%d ", cpus[i]); } printf("\n"); free(gpus); free(cpus); } } } else { /*if the needed number of workers is to big we only move the number of workers corresponding to the granularity set by the user */ int nworkers_to_move = sc_hypervisor_compute_nworkers_to_move(sender_sched_ctx); if(sender_nworkers - nworkers_to_move >= sender_config->min_nworkers) { int nshared_workers = (int)starpu_sched_ctx_get_nshared_workers(sender_sched_ctx, receiver_sched_ctx); if((nworkers_ctx + nworkers_to_move - nshared_workers) > config->max_nworkers) nworkers_to_move = nworkers_ctx > config->max_nworkers ? 0 : (config->max_nworkers - nworkers_ctx + nshared_workers); if(nworkers_to_move > 0) { workers = sc_hypervisor_get_idlest_workers(sender_sched_ctx, &nworkers_to_move, STARPU_ANY_WORKER); *nworkers = nworkers_to_move; } } } } return workers; } static unsigned _gflops_rate_resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize) { int ret = 1; if(force_resize) STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); else ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { int nworkers_to_move = 0; int *workers_to_move = _get_workers_to_move(sender_sched_ctx, receiver_sched_ctx, &nworkers_to_move); if(nworkers_to_move > 0) { sc_hypervisor_move_workers(sender_sched_ctx, receiver_sched_ctx, workers_to_move, nworkers_to_move, 0); struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(receiver_sched_ctx); int i; for(i = 0; i < nworkers_to_move; i++) new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle; free(workers_to_move); } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); return 1; } return 0; } static int _find_fastest_sched_ctx() { unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); double first_exp_end = _get_exp_end(sched_ctxs[0]); int fastest_sched_ctx = first_exp_end == -1.0 ? -1 : (int)sched_ctxs[0]; double curr_exp_end = 0.0; int i; for(i = 1; i < nsched_ctxs; i++) { curr_exp_end = _get_exp_end(sched_ctxs[i]); if((curr_exp_end < first_exp_end || first_exp_end == -1.0) && curr_exp_end != -1.0) { first_exp_end = curr_exp_end; fastest_sched_ctx = sched_ctxs[i]; } } return fastest_sched_ctx; } static int _find_slowest_sched_ctx() { unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); int slowest_sched_ctx = -1; double curr_exp_end = 0.0; double last_exp_end = -1.0; int i; for(i = 0; i < nsched_ctxs; i++) { curr_exp_end = _get_exp_end(sched_ctxs[i]); /*if it hasn't started bc of no resources give it priority */ if(curr_exp_end == -1.0) return sched_ctxs[i]; if(curr_exp_end > last_exp_end) { slowest_sched_ctx = sched_ctxs[i]; last_exp_end = curr_exp_end; } } return slowest_sched_ctx; } static int _find_slowest_available_sched_ctx(unsigned sched_ctx) { unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); int slowest_sched_ctx = -1; double curr_exp_end = 0.0; double last_exp_end = -1.0; int i; for(i = 0; i < nsched_ctxs; i++) { if(sched_ctxs[i] != sched_ctx) { curr_exp_end = _get_exp_end(sched_ctxs[i]); /*if it hasn't started bc of no resources give it priority */ if(curr_exp_end == -1.0) return sched_ctxs[i]; if(last_exp_end < curr_exp_end) { slowest_sched_ctx = sched_ctxs[i]; last_exp_end = curr_exp_end; } } } return slowest_sched_ctx; } static void gflops_rate_resize(unsigned sched_ctx) { _get_exp_end(sched_ctx); double flops_left_pct = _get_flops_left_pct(sched_ctx); /* if the context finished all the instructions it had to execute we move all the resources to the slowest context */ if(flops_left_pct == 0.0f) { int slowest_sched_ctx = _find_slowest_available_sched_ctx(sched_ctx); if(slowest_sched_ctx != -1) { double slowest_flops_left_pct = _get_flops_left_pct(slowest_sched_ctx); if(slowest_flops_left_pct != 0.0f) { struct sc_hypervisor_policy_config* config = sc_hypervisor_get_config(sched_ctx); config->min_nworkers = 0; config->max_nworkers = 0; printf("ctx %u finished & gives away the res to %d; slow_left %lf\n", sched_ctx, slowest_sched_ctx, slowest_flops_left_pct); sc_hypervisor_policy_resize(sched_ctx, slowest_sched_ctx, 1, 1); sc_hypervisor_stop_resize(slowest_sched_ctx); } } } int fastest_sched_ctx = _find_fastest_sched_ctx(); int slowest_sched_ctx = _find_slowest_sched_ctx(); if(fastest_sched_ctx != -1 && slowest_sched_ctx != -1 && fastest_sched_ctx != slowest_sched_ctx) { double fastest_exp_end = _get_exp_end(fastest_sched_ctx); double slowest_exp_end = _get_exp_end(slowest_sched_ctx); if((slowest_exp_end == -1.0 && fastest_exp_end != -1.0) || ((fastest_exp_end + (fastest_exp_end*0.5)) < slowest_exp_end)) { double fast_flops_left_pct = _get_flops_left_pct(fastest_sched_ctx); if(fast_flops_left_pct < 0.8) { struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(slowest_sched_ctx); double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); if((elapsed_flops/sc_w->total_flops) > 0.1) _gflops_rate_resize(fastest_sched_ctx, slowest_sched_ctx, 0); } } } } static void gflops_rate_handle_poped_task(unsigned sched_ctx, __attribute__((unused)) int worker, __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) { gflops_rate_resize(sched_ctx); } struct sc_hypervisor_policy gflops_rate_policy = { .size_ctxs = NULL, .resize_ctxs = NULL, .handle_poped_task = gflops_rate_handle_poped_task, .handle_pushed_task = NULL, .handle_idle_cycle = NULL, .handle_idle_end = NULL, .handle_post_exec_hook = NULL, .handle_submitted_job = NULL, .end_ctx = NULL, .init_worker = NULL, .custom = 0, .name = "gflops_rate" }; starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/hard_coded_policy.c000066400000000000000000000077451507764646700274130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_policy.h" #include "sc_hypervisor_lp.h" #include "sc_hypervisor_policy.h" unsigned hard_coded_worker_belong_to_other_sched_ctx(unsigned sched_ctx, int worker) { unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); int i; for(i = 0; i < nsched_ctxs; i++) if(sched_ctxs[i] != sched_ctx && starpu_sched_ctx_contains_worker(worker, sched_ctxs[i])) return 1; return 0; } void hard_coded_handle_idle_cycle(unsigned sched_ctx, int worker) { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING)// && criteria == SC_SPEED) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { // if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker)) // if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) if(sc_hypervisor_check_idle(sched_ctx, worker)) { if(hard_coded_worker_belong_to_other_sched_ctx(sched_ctx, worker)) sc_hypervisor_remove_workers_from_sched_ctx(&worker, 1, sched_ctx, 1); else { // sc_hypervisor_policy_resize_to_unknown_receiver(sched_ctx, 0); unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int ns = sc_hypervisor_get_nsched_ctxs(); int nworkers = (int)starpu_worker_get_count(); struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(NULL, nworkers); int nw = tw->nw; double w_in_s[ns][nw]; w_in_s[0][0] = 1; w_in_s[0][1] = 3; w_in_s[1][0] = 8; w_in_s[1][1] = 0; // sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, NULL, 1, tw); sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(sched_ctxs, ns, tw->nw, w_in_s, NULL, nworkers, tw); free(tw); } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } } static void hard_coded_handle_poped_task(unsigned sched_ctx, __attribute__((unused))int worker, struct starpu_task *task, uint32_t footprint) { (void)task; (void)footprint; unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_SPEED) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker)) { // sc_hypervisor_policy_resize_to_unknown_receiver(sched_ctx, 0); unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int ns = sc_hypervisor_get_nsched_ctxs(); int nworkers = (int)starpu_worker_get_count(); struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(NULL, nworkers); int nw = tw->nw; double w_in_s[ns][nw]; w_in_s[0][0] = 1; w_in_s[0][1] = 3; w_in_s[1][0] = 8; w_in_s[1][1] = 0; // sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, NULL, 1, tw); sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(sched_ctxs, ns, tw->nw, w_in_s, NULL, nworkers, tw); free(tw); } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } } struct sc_hypervisor_policy hard_coded_policy = { .size_ctxs = NULL, .handle_poped_task = hard_coded_handle_poped_task, .handle_pushed_task = NULL, .handle_idle_cycle = hard_coded_handle_idle_cycle, .handle_idle_end = NULL, .handle_post_exec_hook = NULL, .handle_submitted_job = NULL, .end_ctx = NULL, .init_worker = NULL, .custom = 0, .name = "hard_coded" }; starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/idle_policy.c000066400000000000000000000033021507764646700262350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_policy.h" unsigned worker_belong_to_other_sched_ctx(unsigned sched_ctx, int worker) { unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); int i; for(i = 0; i < nsched_ctxs; i++) if(sched_ctxs[i] != sched_ctx && starpu_sched_ctx_contains_worker(worker, sched_ctxs[i])) return 1; return 0; } void idle_handle_idle_cycle(unsigned sched_ctx, int worker) { if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker)) { if(worker_belong_to_other_sched_ctx(sched_ctx, worker)) sc_hypervisor_remove_workers_from_sched_ctx(&worker, 1, sched_ctx, 1); else sc_hypervisor_policy_resize_to_unknown_receiver(sched_ctx, 0); } } struct sc_hypervisor_policy idle_policy = { .size_ctxs = NULL, .handle_poped_task = NULL, .handle_pushed_task = NULL, .handle_idle_cycle = idle_handle_idle_cycle, .handle_idle_end = NULL, .handle_post_exec_hook = NULL, .handle_submitted_job = NULL, .end_ctx = NULL, .init_worker = NULL, .custom = 0, .name = "idle" }; starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/ispeed_lp_policy.c000066400000000000000000000175641507764646700273030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "sc_hypervisor_lp.h" #include "sc_hypervisor_policy.h" #include #include struct ispeed_lp_data { double **speed; double *flops; double **flops_on_w; int *workers; }; #ifdef STARPU_HAVE_GLPK_H static double _compute_workers_distrib(int ns, int nw, double final_w_in_s[ns][nw], unsigned is_integer, double tmax, void *specific_data) { struct ispeed_lp_data *sd = (struct ispeed_lp_data *)specific_data; double **speed = sd->speed; double *flops = sd->flops; double **final_flops_on_w = sd->flops_on_w; return sc_hypervisor_lp_simulate_distrib_flops_on_sample(ns, nw, final_w_in_s, is_integer, tmax, speed, flops, final_flops_on_w); } static unsigned _compute_flops_distribution_over_ctxs(int ns, int nw, double w_in_s[ns][nw], double **flops_on_w, unsigned *sched_ctxs, int *workers) { double *flops = (double*)malloc(ns*sizeof(double)); double **speed = (double **)malloc(ns*sizeof(double*)); int i; for(i = 0; i < ns; i++) speed[i] = (double*)malloc(nw*sizeof(double)); int w,s; struct sc_hypervisor_wrapper* sc_w = NULL; for(s = 0; s < ns; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); for(w = 0; w < nw; w++) { w_in_s[s][w] = 0.0; int worker = workers == NULL ? w : workers[w]; speed[s][w] = sc_hypervisor_get_speed_per_worker(sc_w, worker); if(speed[s][w] == -1.0) { enum starpu_worker_archtype arch = starpu_worker_get_type(worker); speed[s][w] = sc_hypervisor_get_speed(sc_w, arch); if(arch == STARPU_CUDA_WORKER) { unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx); if(!worker_in_ctx) { double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)) / 1000; speed[s][w] = (speed[s][w] * transfer_speed) / (speed[s][w] + transfer_speed); } } } // printf("v[w%d][s%d] = %lf\n",w, s, speed[s][w]); } struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]); flops[s] = config->ispeed_ctx_sample/1000000000; /* in gflops */ } /* take the exec time of the slowest ctx as starting point and then try to minimize it as increasing it a little for the faster ctxs */ double tmax = sc_hypervisor_get_slowest_ctx_exec_time(); double smallest_tmax = sc_hypervisor_get_fastest_ctx_exec_time(); //tmax - 0.5*tmax; // printf("tmax %lf smallest %lf\n", tmax, smallest_tmax); double tmin = 0.0; struct ispeed_lp_data specific_data; specific_data.speed = speed; specific_data.flops = flops; specific_data.flops_on_w = flops_on_w; specific_data.workers = workers; unsigned found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, tmin, tmax, smallest_tmax, _compute_workers_distrib); for(i = 0; i < ns; i++) free(speed[i]); free(speed); return found_sol; } static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; int nw = nworkers == -1 ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */ unsigned *curr_sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs; struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw); int ntypes_of_workers = tw->nw; double w_in_s[ns][nw]; double **flops_on_w = (double**)malloc(ns*sizeof(double*)); int i; for(i = 0; i < ns; i++) flops_on_w[i] = (double*)malloc(nw*sizeof(double)); struct timeval start_time; struct timeval end_time; gettimeofday(&start_time, NULL); unsigned found_sol = _compute_flops_distribution_over_ctxs(ns, nw, w_in_s, flops_on_w, curr_sched_ctxs, workers); gettimeofday(&end_time, NULL); long diff_s = end_time.tv_sec - start_time.tv_sec; long diff_us = end_time.tv_usec - start_time.tv_usec; __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000.0; /* if we did find at least one solution redistribute the resources */ if(found_sol) { int w, s; double nworkers_per_ctx[ns][ntypes_of_workers]; int nworkers_per_ctx_rounded[ns][ntypes_of_workers]; for(s = 0; s < ns; s++) { for(w = 0; w < ntypes_of_workers; w++) { nworkers_per_ctx[s][w] = 0.0; nworkers_per_ctx_rounded[s][w] = 0; } } for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { enum starpu_worker_archtype arch = starpu_worker_get_type(w); int idx = sc_hypervisor_get_index_for_arch(arch, tw); nworkers_per_ctx[s][idx] += w_in_s[s][w]; if(arch == STARPU_CUDA_WORKER) { if(w_in_s[s][w] >= 0.3) nworkers_per_ctx_rounded[s][idx]++; } else { if(w_in_s[s][w] > 0.5) nworkers_per_ctx_rounded[s][idx]++; } } } /* for(s = 0; s < ns; s++) */ /* printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */ /* nworkers_rounded[s][1], nworkers_rounded[s][0]); */ sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_per_ctx_rounded, nworkers_per_ctx, curr_sched_ctxs, tw); } free(tw); for(i = 0; i < ns; i++) free(flops_on_w[i]); free(flops_on_w); } static void ispeed_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_SPEED) { if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) { _try_resizing(NULL, -1, NULL, -1); } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void ispeed_lp_handle_idle_cycle(unsigned sched_ctx, int worker) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_IDLE) { if(sc_hypervisor_check_idle(sched_ctx, worker)) { _try_resizing(NULL, -1, NULL, -1); } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void ispeed_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers); STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void ispeed_lp_end_ctx(__attribute__((unused))unsigned sched_ctx) { /* struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); */ /* int worker; */ /* for(worker = 0; worker < 12; worker++) */ /* printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */ return; } struct sc_hypervisor_policy ispeed_lp_policy = { .size_ctxs = NULL, .resize_ctxs = ispeed_lp_resize_ctxs, .handle_poped_task = ispeed_lp_handle_poped_task, .handle_pushed_task = NULL, .handle_idle_cycle = ispeed_lp_handle_idle_cycle, .handle_idle_end = NULL, .handle_post_exec_hook = NULL, .handle_submitted_job = NULL, .end_ctx = ispeed_lp_end_ctx, .init_worker = NULL, .custom = 0, .name = "ispeed_lp" }; #endif /* STARPU_HAVE_GLPK_H */ starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/ispeed_policy.c000066400000000000000000000143471507764646700266040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_policy.h" static unsigned _get_fastest_sched_ctx(void) { unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); int fastest_sched_ctx = STARPU_NMAX_SCHED_CTXS; double curr_speed = 0.0; double biggest_speed = 0.0; int i; for(i = 0; i < nsched_ctxs; i++) { curr_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(sched_ctxs[i])); if(curr_speed > biggest_speed) { fastest_sched_ctx = sched_ctxs[i]; biggest_speed = curr_speed; } } return fastest_sched_ctx; } static unsigned _get_slowest_sched_ctx(void) { unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); double smallest_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(sched_ctxs[0])); unsigned slowest_sched_ctx = smallest_speed == -1.0 ? STARPU_NMAX_SCHED_CTXS : sched_ctxs[0]; double curr_speed = 0.0; int i; for(i = 1; i < nsched_ctxs; i++) { curr_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(sched_ctxs[i])); if((curr_speed < smallest_speed || smallest_speed == 0.0) && curr_speed != -1.0) { smallest_speed = curr_speed; slowest_sched_ctx = sched_ctxs[i]; } } return slowest_sched_ctx; } /* get first nworkers with the highest idle time in the context */ static int* _get_slowest_workers(unsigned sched_ctx, int *nworkers, enum starpu_worker_archtype arch) { struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx); int *curr_workers = (int*)malloc((*nworkers) * sizeof(int)); int i; for(i = 0; i < *nworkers; i++) curr_workers[i] = -1; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); int index; int worker; int considered = 0; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); for(index = 0; index < *nworkers; index++) { while(workers->has_next(workers, &it)) { considered = 0; worker = workers->get_next(workers, &it); enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); if(arch == STARPU_ANY_WORKER || curr_arch == arch) { if(!config->fixed_workers[worker]) { for(i = 0; i < index; i++) { if(curr_workers[i] == worker) { considered = 1; break; } } if(!considered) { double worker_speed = sc_hypervisor_get_speed_per_worker(sc_w, worker); if(worker_speed != -1.0) { /* the first iteration*/ if(curr_workers[index] < 0) curr_workers[index] = worker; /* small priority worker is the first to leave the ctx*/ else if(config->priority[worker] < config->priority[curr_workers[index]]) curr_workers[index] = worker; /* if we don't consider priorities check for the workers with the biggest idle time */ else if(config->priority[worker] == config->priority[curr_workers[index]]) { double curr_worker_speed = sc_hypervisor_get_speed_per_worker(sc_w, curr_workers[index]); // printf("speed[%d] = %lf speed[%d] = %lf\n", worker, worker_speed, curr_workers[index], curr_worker_speed); if(worker_speed < curr_worker_speed && curr_worker_speed != -1.0) { curr_workers[index] = worker; } } } } } } } if(curr_workers[index] < 0) { *nworkers = index; break; } } return curr_workers; } static void ispeed_handle_poped_task(unsigned sched_ctx, int worker, __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { if(sc_hypervisor_criteria_fulfilled(sched_ctx, worker)) { unsigned fastest_sched_ctx = _get_fastest_sched_ctx(); unsigned slowest_sched_ctx = _get_slowest_sched_ctx(); if(fastest_sched_ctx != STARPU_NMAX_SCHED_CTXS && slowest_sched_ctx != STARPU_NMAX_SCHED_CTXS && fastest_sched_ctx != slowest_sched_ctx) { int nworkers_to_move = sc_hypervisor_compute_nworkers_to_move(fastest_sched_ctx); if(nworkers_to_move > 0) { int *workers_to_move = _get_slowest_workers(fastest_sched_ctx, &nworkers_to_move, STARPU_ANY_WORKER); if(nworkers_to_move > 0) { double new_speed = 0.0; int i; for(i = 0; i < nworkers_to_move; i++) new_speed += sc_hypervisor_get_speed_per_worker(sc_hypervisor_get_wrapper(fastest_sched_ctx), workers_to_move[i]); double fastest_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(fastest_sched_ctx)); double slowest_speed = sc_hypervisor_get_ctx_speed(sc_hypervisor_get_wrapper(slowest_sched_ctx)); // printf("fast_speed(%d) %lf slow_speed(%d) %lf new speed(%d) %lf \n", fastest_sched_ctx, fastest_speed, slowest_sched_ctx, // slowest_speed, workers_to_move[0], new_speed); if(fastest_speed != -1.0 && slowest_speed != -1.0 && (slowest_speed + new_speed) <= (fastest_speed - new_speed)) { sc_hypervisor_move_workers(fastest_sched_ctx, slowest_sched_ctx, workers_to_move, nworkers_to_move, 0); } } free(workers_to_move); } } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } struct sc_hypervisor_policy ispeed_policy = { .size_ctxs = NULL, .handle_poped_task = ispeed_handle_poped_task, .handle_pushed_task = NULL, .handle_idle_cycle = NULL, .handle_idle_end = NULL, .handle_post_exec_hook = NULL, .handle_submitted_job = NULL, .end_ctx = NULL, .init_worker = NULL, .custom = 0, .name = "ispeed" }; starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/perf_count_policy.c000066400000000000000000000325151507764646700274740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_policy.h" #include #include #include #include #include #include #include #include #include #include struct perf_event_attr pe_instr[STARPU_NMAXWORKERS]; /* struct perf_event_attr pe_cycles[STARPU_NMAXWORKERS]; */ /* struct perf_event_attr pe_cache_misses[STARPU_NMAXWORKERS]; */ /* struct perf_event_attr pe_cache_refs[STARPU_NMAXWORKERS]; */ /* struct perf_event_attr pe_branch_instr[STARPU_NMAXWORKERS]; */ struct perf_event_attr pe_fps[STARPU_NMAXWORKERS]; int fd_instr[STARPU_NMAXWORKERS]; /* int fd_cycles[STARPU_NMAXWORKERS]; */ /* int fd_cache_misses[STARPU_NMAXWORKERS]; */ /* int fd_cache_refs[STARPU_NMAXWORKERS]; */ /* int fd_branch_instr[STARPU_NMAXWORKERS]; */ int fd_fps[STARPU_NMAXWORKERS]; unsigned perf_event_opened[STARPU_NMAXWORKERS]; long long total_instr[STARPU_NMAX_SCHED_CTXS]; /* long long total_cycles[STARPU_NMAX_SCHED_CTXS]; */ /* long long total_time[STARPU_NMAX_SCHED_CTXS]; */ /* long long total_cache_misses[STARPU_NMAX_SCHED_CTXS]; */ /* long long total_cache_refs[STARPU_NMAX_SCHED_CTXS]; */ /* long long total_branch_instr[STARPU_NMAX_SCHED_CTXS]; */ long long total_fps[STARPU_NMAX_SCHED_CTXS]; struct read_format { uint64_t value; /* The value of the event */ uint64_t time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ uint64_t time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ uint64_t id; /* if PERF_FORMAT_ID */ }; static long perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { int ret = syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); return ret; } void print_results_for_worker(int workerid, unsigned sched_ctx, struct starpu_task *task) { ssize_t rread; long long instr, /*cycles, cache_misses, cache_refs, branch_instr,*/ fps; rread = read(fd_instr[workerid], &instr, sizeof(instr)); assert(rread == sizeof(instr)); /* read(fd_cycles[workerid], &cycles, sizeof(long long)); */ /* read(fd_cache_misses[workerid], &cache_misses, sizeof(long long)); */ /* read(fd_cache_refs[workerid], &cache_refs, sizeof(long long)); */ /* read(fd_branch_instr[workerid], &branch_instr, sizeof(long long)); */ rread = read(fd_fps[workerid], &fps, sizeof(long long)); assert(rread == sizeof(long long)); total_instr[sched_ctx] += instr; /* total_cycles[sched_ctx] += cycles; */ /* total_cache_misses[sched_ctx] += cache_misses; */ /* total_cache_refs[sched_ctx] += cache_refs; */ /* total_branch_instr[sched_ctx] += branch_instr; */ total_fps[sched_ctx] += fps; printf("Instrs %lf M instr of worker %lf M\n", (double)total_instr[sched_ctx]/1000000, (double)instr/1000000); printf("Fps %lf M curr fps %lf M \n", (double)total_fps[sched_ctx]/1000000, (double)fps/1000000); printf("Task Flops %lf k %s \n", task->flops/1000, (task->cl && task->cl->model) ? task->cl->model->symbol : "task null"); printf("-------------------------------------------\n"); } void print_results_for_ctx(unsigned sched_ctx, struct starpu_task *task) { long long curr_total_instr = 0; /* long long curr_total_cycles = 0; */ /* long long curr_total_cache_misses = 0; */ /* long long curr_total_cache_refs = 0; */ /* long long curr_total_branch_instr = 0; */ long long curr_total_fps = 0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); struct starpu_sched_ctx_iterator it; int workerid; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { ssize_t rread; workerid = workers->get_next(workers, &it); // Read event counter value struct read_format instr, /*cycles, cache_misses, cache_refs, branch_instr,*/ fps; rread = read(fd_instr[workerid], &instr, sizeof(struct read_format)); assert(rread==sizeof(struct read_format)); /* read(fd_cycles[workerid], &cycles, sizeof(long long)); */ /* read(fd_cache_misses[workerid], &cache_misses, sizeof(long long)); */ /* read(fd_cache_refs[workerid], &cache_refs, sizeof(long long)); */ /* read(fd_branch_instr[workerid], &branch_instr, sizeof(long long)); */ rread = read(fd_fps[workerid], &fps, sizeof(struct read_format)); assert(rread == sizeof(struct read_format)); curr_total_instr += (instr.time_enabled != 0 && instr.time_running !=0) ? instr.value * instr.time_enabled/instr.time_running : instr.value; printf("w%d instr time enabled %"PRIu64" time running %"PRIu64" \n", workerid, instr.time_enabled, instr.time_running); /* curr_total_cycles += cycles; */ /* curr_total_cache_misses += cache_misses; */ /* curr_total_cache_refs += cache_refs; */ /* curr_total_branch_instr += branch_instr; */ curr_total_fps += (fps.time_enabled != 0 && fps.time_running !=0) ? fps.value * fps.time_enabled/fps.time_running : fps.value; printf("w%d fps time enabled %lu time running %lu \n", workerid, fps.time_enabled, fps.time_running); } total_instr[sched_ctx] += curr_total_instr; /* total_cycles[sched_ctx] += curr_total_cycles; */ /* total_cache_misses[sched_ctx] += curr_total_cache_misses; */ /* total_cache_refs[sched_ctx] += curr_total_cache_refs; */ /* total_branch_instr[sched_ctx] += curr_total_branch_instr; */ total_fps[sched_ctx] += curr_total_fps; printf("%u: Instrs %lf k curr instr %lf k\n", sched_ctx, (double)total_instr[sched_ctx]/1000, (double)curr_total_instr/1000); printf("%u: Fps %lf k curr fps %lf k\n", sched_ctx, (double)total_fps[sched_ctx]/1000, (double)curr_total_fps/1000); printf("%u: Task Flops %lf k %s \n", sched_ctx, task->flops/1000, (task->cl && task->cl->model) ? task->cl->model->symbol : "task null"); printf("-------------------------------------------\n"); } void config_event(struct perf_event_attr *event, unsigned with_time, uint64_t event_type, uint64_t config_type) { memset(event, 0, sizeof(struct perf_event_attr)); event->type = event_type; event->size = sizeof(struct perf_event_attr); event->config = config_type; event->disabled = 1; // Event is initially disabled event->exclude_kernel = 1; // excluding events that happen in the kernel space if(with_time) { /* if the PMU is multiplexing several events we measure the time spent to actually measure this event (time_running) and compare it to the one expected is did, thus we compute the precision of the counter*/ event->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING; } } void open_event(int *file_desc, struct perf_event_attr *event, int group_fd) { *file_desc = perf_event_open(event, 0, -1, group_fd, 0); if (*file_desc == -1) { fprintf(stderr, "Error opening leader %llx\n", event->config); perror("perf_event_open"); exit(0); } } void config_all_events_for_worker(int workerid) { config_event(&pe_instr[workerid], 1, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); /* config_event(&pe_cycles[workerid], 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); */ /* config_event(&pe_cache_misses[workerid], 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); */ /* config_event(&pe_cache_refs[workerid], 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); */ /* config_event(&pe_branch_instr[workerid], 0, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); */ config_event(&pe_fps[workerid], 1, PERF_TYPE_RAW, 0x1010); } void open_all_events_for_worker(int curr_workerid) { open_event(&fd_instr[curr_workerid], &pe_instr[curr_workerid], -1); /* open_event(&fd_cycles[curr_workerid], &pe_cycles[curr_workerid], fd_instr[curr_workerid]); */ /* open_event(&fd_cache_misses[curr_workerid], &pe_cache_misses[curr_workerid], fd_instr[curr_workerid]); */ /* open_event(&fd_cache_refs[curr_workerid], &pe_cache_refs[curr_workerid], fd_instr[curr_workerid]); */ /* open_event(&fd_branch_instr[curr_workerid], &pe_branch_instr[curr_workerid], fd_instr[curr_workerid]); */ open_event(&fd_fps[curr_workerid], &pe_fps[curr_workerid], fd_instr[curr_workerid]); } void close_all_events_for_worker(int curr_workerid) { close(fd_instr[curr_workerid]); /* close(fd_cycles[curr_workerid]); */ /* close(fd_cache_misses[curr_workerid]); */ /* close(fd_cache_refs[curr_workerid]); */ /* close(fd_branch_instr[curr_workerid]); */ close(fd_fps[curr_workerid]); } void start_monitoring_all_events_for_worker(int workerid) { ioctl(fd_instr[workerid], PERF_EVENT_IOC_RESET, 0); ioctl(fd_instr[workerid], PERF_EVENT_IOC_ENABLE, 0); /* ioctl(fd_cycles[workerid], PERF_EVENT_IOC_RESET, 0); */ /* ioctl(fd_cycles[workerid], PERF_EVENT_IOC_ENABLE, 0); */ /* ioctl(fd_cache_misses[workerid], PERF_EVENT_IOC_RESET, 0); */ /* ioctl(fd_cache_misses[workerid], PERF_EVENT_IOC_ENABLE, 0); */ /* ioctl(fd_cache_refs[workerid], PERF_EVENT_IOC_RESET, 0); */ /* ioctl(fd_cache_refs[workerid], PERF_EVENT_IOC_ENABLE, 0); */ /* ioctl(fd_branch_instr[workerid], PERF_EVENT_IOC_RESET, 0); */ /* ioctl(fd_branch_instr[workerid], PERF_EVENT_IOC_ENABLE, 0); */ ioctl(fd_fps[workerid], PERF_EVENT_IOC_RESET, 0); ioctl(fd_fps[workerid], PERF_EVENT_IOC_ENABLE, 0); } void stop_monitoring_all_events_for_worker(int workerid) { ioctl(fd_instr[workerid], PERF_EVENT_IOC_DISABLE, 0); /* ioctl(fd_cycles[workerid], PERF_EVENT_IOC_DISABLE, 0); */ /* ioctl(fd_cache_misses[workerid], PERF_EVENT_IOC_DISABLE, 0); */ /* ioctl(fd_cache_refs[workerid], PERF_EVENT_IOC_DISABLE, 0); */ /* ioctl(fd_branch_instr[workerid], PERF_EVENT_IOC_DISABLE, 0); */ ioctl(fd_fps[workerid], PERF_EVENT_IOC_DISABLE, 0); } void perf_count_handle_idle_end(unsigned sched_ctx, int worker) { unsigned has_starpu_scheduler; unsigned has_awake_workers; has_starpu_scheduler = starpu_sched_ctx_has_starpu_scheduler(sched_ctx, &has_awake_workers); if(!has_starpu_scheduler && !has_awake_workers) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); struct starpu_sched_ctx_iterator it; int workerid; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { workerid = workers->get_next(workers, &it); if(perf_event_opened[workerid]) start_monitoring_all_events_for_worker(workerid); } } else { if(!perf_event_opened[worker]) { config_all_events_for_worker(worker); open_all_events_for_worker(worker); perf_event_opened[worker] = 1; } start_monitoring_all_events_for_worker(worker); } } void perf_count_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, __attribute__((unused))uint32_t footprint) { unsigned has_starpu_scheduler; unsigned has_awake_workers; has_starpu_scheduler = starpu_sched_ctx_has_starpu_scheduler(sched_ctx, &has_awake_workers); if(!has_starpu_scheduler && !has_awake_workers) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); struct starpu_sched_ctx_iterator it; int workerid; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { workerid = workers->get_next(workers, &it); if(perf_event_opened[workerid]) stop_monitoring_all_events_for_worker(workerid); } // printf("worker requesting %d in ctx %d \n", starpu_worker_get_id(), sched_ctx); print_results_for_ctx(sched_ctx, task); } else { if(perf_event_opened[worker]) stop_monitoring_all_events_for_worker(worker); print_results_for_worker(worker, sched_ctx, task); } } void perf_count_init_worker(int workerid, unsigned sched_ctx) { (void)sched_ctx; if(!perf_event_opened[workerid]) { open_all_events_for_worker(workerid); perf_event_opened[workerid] = 1; } else { close_all_events_for_worker(workerid); open_all_events_for_worker(workerid); } } void perf_count_start_ctx(unsigned sched_ctx) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); struct starpu_sched_ctx_iterator it; int workerid; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { workerid = workers->get_next(workers, &it); config_all_events_for_worker(workerid); } } void perf_count_end_ctx(unsigned sched_ctx) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); struct starpu_sched_ctx_iterator it; int workerid; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { workerid = workers->get_next(workers, &it); close_all_events_for_worker(workerid); } } struct sc_hypervisor_policy perf_count_policy = { .size_ctxs = NULL, .handle_poped_task = perf_count_handle_poped_task, .handle_pushed_task = NULL, .handle_idle_cycle = NULL, .handle_idle_end = perf_count_handle_idle_end, .handle_post_exec_hook = NULL, .handle_submitted_job = NULL, .end_ctx = perf_count_end_ctx, .start_ctx = perf_count_start_ctx, .init_worker = perf_count_init_worker, .custom = 0, .name = "perf_count" }; starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/teft_lp_policy.c000066400000000000000000000242511507764646700267630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "sc_hypervisor_lp.h" #include "sc_hypervisor_policy.h" #include #include static struct sc_hypervisor_policy_task_pool *task_pools = NULL; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; struct teft_lp_data { int nt; double **tasks; unsigned *in_sched_ctxs; int *workers; struct sc_hypervisor_policy_task_pool *tmp_task_pools; unsigned size_ctxs; }; static double _compute_workers_distrib(int ns, int nw, double final_w_in_s[ns][nw], unsigned is_integer, double tmax, void *specific_data) { struct teft_lp_data *sd = (struct teft_lp_data *)specific_data; int nt = sd->nt; double **final_tasks = sd->tasks; unsigned *in_sched_ctxs = sd->in_sched_ctxs; int *workers = sd->workers; struct sc_hypervisor_policy_task_pool *tmp_task_pools = sd->tmp_task_pools; unsigned size_ctxs = sd->size_ctxs; if(tmp_task_pools == NULL) return 0.0; double w_in_s[ns][nw]; double tasks[nw][nt]; double times[nw][nt]; /* times in ms */ sc_hypervisor_get_tasks_times(nw, nt, times, workers, size_ctxs, task_pools); double res = 0.0; #ifdef STARPU_HAVE_GLPK_H res = sc_hypervisor_lp_simulate_distrib_tasks(ns, nw, nt, w_in_s, tasks, times, is_integer, tmax, in_sched_ctxs, tmp_task_pools); #endif //STARPU_HAVE_GLPK_H if(res != 0.0) { int s, w, t; for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) final_w_in_s[s][w] = w_in_s[s][w]; for(w = 0; w < nw; w++) for(t = 0; t < nt; t++) final_tasks[w][t] = tasks[w][t]; } return res; } static void _size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */ int nt = 0; /* Number of different kinds of tasks */ struct sc_hypervisor_policy_task_pool * tp; for (tp = task_pools; tp; tp = tp->next) nt++; double w_in_s[ns][nw]; double **tasks=(double**)malloc(nw*sizeof(double*)); int i; for(i = 0; i < nw; i++) tasks[i] = (double*)malloc(nt*sizeof(double)); struct teft_lp_data specific_data; specific_data.nt = nt; specific_data.tasks = tasks; specific_data.in_sched_ctxs = sched_ctxs; specific_data.workers = workers; specific_data.tmp_task_pools = task_pools; specific_data.size_ctxs = 1; /* smallest possible tmax, difficult to obtain as we compute the nr of flops and not the tasks */ /*lp computes it in s but it's converted to ms just before return */ double possible_tmax = sc_hypervisor_lp_get_tmax(nw, workers); double smallest_tmax = possible_tmax / 3; double tmax = possible_tmax * ns; double tmin = 0.0; unsigned found_sol = 0; if(nt > 0 && tmax > 0.0) { found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, tmin, tmax, smallest_tmax, _compute_workers_distrib); } /* if we did find at least one solution redistribute the resources */ if(found_sol) { struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw); sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, workers, 1, tw); free(tw); } for(i = 0; i < nw; i++) free(tasks[i]); free(tasks); } static void size_if_required() { int nsched_ctxs, nworkers; unsigned *sched_ctxs; int *workers; unsigned has_req = sc_hypervisor_get_size_req(&sched_ctxs, &nsched_ctxs, &workers, &nworkers); if(has_req) { struct sc_hypervisor_wrapper* sc_w = NULL; unsigned ready_to_size = 1; int s; STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); for(s = 0; s < nsched_ctxs; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); // if(sc_w->submitted_flops < sc_w->total_flops) if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops) ready_to_size = 0; } if(ready_to_size) { _size_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers); sc_hypervisor_free_size_req(); } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void teft_lp_handle_submitted_job(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size) { /* count the tasks of the same type */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); sc_hypervisor_policy_add_task_to_pool(cl, sched_ctx, footprint, &task_pools, data_size); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); size_if_required(); } static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; if(ns < 2) return; int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */ sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs; int nt = 0; /* Number of different kinds of tasks */ // STARPU_PTHREAD_MUTEX_LOCK(&mutex); /* we don't take the mutex bc a correct value of the number of tasks is not required but we do a copy in order to be sure that the linear progr won't segfault if the list of submitted task will change during the exec */ struct sc_hypervisor_policy_task_pool *tp = NULL; struct sc_hypervisor_policy_task_pool *tmp_task_pools = sc_hypervisor_policy_clone_task_pool(task_pools); for (tp = task_pools; tp; tp = tp->next) nt++; double w_in_s[ns][nw]; double **tasks_per_worker=(double**)malloc(nw*sizeof(double*)); int i; for(i = 0; i < nw; i++) tasks_per_worker[i] = (double*)malloc(nt*sizeof(double)); struct teft_lp_data specific_data; specific_data.nt = nt; specific_data.tasks = tasks_per_worker; specific_data.in_sched_ctxs = NULL; specific_data.workers = NULL; specific_data.tmp_task_pools = tmp_task_pools; specific_data.size_ctxs = 0; /* smallest possible tmax, difficult to obtain as we compute the nr of flops and not the tasks */ /*lp computes it in s but it's converted to ms just before return */ double possible_tmax = sc_hypervisor_lp_get_tmax(nw, NULL); double smallest_tmax = possible_tmax/2.0; double tmax = possible_tmax + smallest_tmax; double tmin = smallest_tmax; unsigned found_sol = 0; if(nt > 0 && tmax > 0.0) { struct timeval start_time; struct timeval end_time; gettimeofday(&start_time, NULL); found_sol = sc_hypervisor_lp_execute_dichotomy(ns, nw, w_in_s, 1, (void*)&specific_data, tmin, tmax, smallest_tmax, _compute_workers_distrib); gettimeofday(&end_time, NULL); long diff_s = end_time.tv_sec - start_time.tv_sec; long diff_us = end_time.tv_usec - start_time.tv_usec; __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000.0; } // STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* if we did find at least one solution redistribute the resources */ if(found_sol) { struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw); sc_hypervisor_lp_place_resources_in_ctx(ns, nw, w_in_s, sched_ctxs, workers, 0, tw); free(tw); } struct sc_hypervisor_policy_task_pool *next = NULL; struct sc_hypervisor_policy_task_pool *tmp_tp = tmp_task_pools; while(tmp_task_pools) { next = tmp_tp->next; free(tmp_tp); tmp_tp = next; tmp_task_pools = next; } for(i = 0; i < nw; i++) free(tasks_per_worker[i]); free(tasks_per_worker); } static void teft_lp_handle_poped_task(unsigned sched_ctx, int worker, struct starpu_task *task, uint32_t footprint) { (void)sched_ctx; if(worker > -2) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_SPEED) { if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) { _try_resizing(NULL, -1, NULL, -1); } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } /* too expensive to take this mutex and correct value of the number of tasks is not compulsory */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); sc_hypervisor_policy_remove_task_from_pool(task, footprint, &task_pools); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } static void teft_lp_handle_idle_cycle(unsigned sched_ctx, int worker) { (void)sched_ctx; (void)worker; unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING)// && criteria == SC_IDLE) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { _try_resizing(NULL, -1, NULL, -1); STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } return; } static void teft_lp_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { sc_hypervisor_save_size_req(sched_ctxs, nsched_ctxs, workers, nworkers); } static void teft_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { struct sc_hypervisor_wrapper* sc_w = NULL; int s = 0; for(s = 0; s < nsched_ctxs; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); if((sc_w->submitted_flops + (0.1*sc_w->total_flops)) < sc_w->total_flops) { STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); return; } } _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers); STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } struct sc_hypervisor_policy teft_lp_policy = { .size_ctxs = teft_lp_size_ctxs, .resize_ctxs = teft_lp_resize_ctxs, .handle_poped_task = teft_lp_handle_poped_task, .handle_pushed_task = NULL, .handle_idle_cycle = teft_lp_handle_idle_cycle, .handle_idle_end = NULL, .handle_post_exec_hook = NULL, .handle_submitted_job = teft_lp_handle_submitted_job, .end_ctx = NULL, .init_worker = NULL, .custom = 0, .name = "teft_lp" }; starpu-1.4.9+dfsg/sc_hypervisor/src/hypervisor_policies/throughput_lp_policy.c000066400000000000000000000222741507764646700302350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "sc_hypervisor_lp.h" #include "sc_hypervisor_policy.h" #include #include static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], unsigned integer); static unsigned _compute_max_speed(int ns, int nw, double w_in_s[ns][nw], unsigned *in_sched_ctxs, int *workers) { double speed[ns][nw]; unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs; int w,s; struct sc_hypervisor_wrapper* sc_w = NULL; for(s = 0; s < ns; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); for(w = 0; w < nw; w++) { w_in_s[s][w] = 0.0; int worker = workers == NULL ? w : workers[w]; enum starpu_worker_archtype arch = starpu_worker_get_type(worker); speed[s][w] = sc_hypervisor_get_speed(sc_w, arch); } } struct timeval start_time; struct timeval end_time; gettimeofday(&start_time, NULL); double res = _glp_resolve(ns, nw, speed, w_in_s, 1); gettimeofday(&end_time, NULL); long diff_s = end_time.tv_sec - start_time.tv_sec; long diff_us = end_time.tv_usec - start_time.tv_usec; __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000; if(res > 0.0) return 1; return 0; } /* * GNU Linear Programming Kit backend */ #ifdef STARPU_HAVE_GLPK_H #include static double _glp_resolve(int ns, int nw, double speed[ns][nw], double w_in_s[ns][nw], unsigned integer) { int w = 0, s = 0; glp_prob *lp; lp = glp_create_prob(); glp_set_prob_name(lp, "StarPU theoretical bound"); glp_set_obj_dir(lp, GLP_MAX); glp_set_obj_name(lp, "total speed"); { int ne = 2 * ns * nw /* worker execution time */ + 1 + 1 ; /* glp dumbness */ int n = 1; int ia[ne], ja[ne]; double ar[ne]; /* Variables: x[s][w] the acknwoledgment that the worker w belongs to the context s */ glp_add_cols(lp, nw*ns + 1); for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) { char name[32]; snprintf(name, sizeof(name), "w%ds%dn", w, s); glp_set_col_name(lp, s*nw+w+1, name); if (integer) { glp_set_col_kind(lp, s*nw+w+1, GLP_IV); glp_set_col_bnds(lp, s*nw+w+1, GLP_DB, 0, 1); } else glp_set_col_bnds(lp, s*nw+w+1, GLP_DB, 0.0, 1.0); } /* vmax should be positif */ /* Z = vmax structural variable, x[s][w] are auxiliary variables */ glp_set_col_name(lp, nw*ns+1, "vmax"); glp_set_col_bnds(lp, nw*ns+1, GLP_LO, 0.0, 0.0); glp_set_obj_coef(lp, nw*ns+1, 1.); int curr_row_idx = 0; /* Total worker speed */ glp_add_rows(lp, 1); /*sum(x[s][w]*speed[s][w]) >= vmax */ char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "worker %s", name); glp_set_row_name(lp, curr_row_idx + 1, title); for(s = 0; s < ns; s++) { for (w = 0; w < nw; w++) { /* x[s][w] */ ia[n] = curr_row_idx + 1; ja[n] = s*nw+w+1; ar[n] = speed[s][w]; n++; } } /* vmax */ ia[n] = curr_row_idx + 1; ja[n] = nw*ns+1; ar[n] = (-1); n++; glp_set_row_bnds(lp, curr_row_idx + 1, GLP_LO, 0.0, 0.0); curr_row_idx += 1 ; /* sum(x[s][w]) = 1 */ glp_add_rows(lp, nw); for (w = 0; w < nw; w++) { starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "w%x", w); glp_set_row_name(lp, curr_row_idx+w+1, title); for(s = 0; s < ns; s++) { ia[n] = curr_row_idx+w+1; ja[n] = s*nw+w+1; ar[n] = 1; n++; } if(integer) glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1); else glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0); } if(n != ne) printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne); STARPU_ASSERT(n == ne); glp_load_matrix(lp, ne-1, ia, ja, ar); } glp_smcp parm; glp_init_smcp(&parm); parm.msg_lev = GLP_MSG_OFF; int ret = glp_simplex(lp, &parm); if (ret) { glp_delete_prob(lp); lp = NULL; return 0.0; } if (integer) { glp_iocp iocp; glp_init_iocp(&iocp); iocp.msg_lev = GLP_MSG_OFF; glp_intopt(lp, &iocp); int stat = glp_mip_status(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS) { glp_delete_prob(lp); lp = NULL; return 0.0; } } int stat = glp_get_prim_stat(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS) { glp_delete_prob(lp); lp = NULL; printf("No sol!!!\n"); return 0.0; } double res = glp_get_obj_val(lp); for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) { if (integer) w_in_s[s][w] = (double)glp_mip_col_val(lp, s*nw+w+1); else w_in_s[s][w] = glp_get_col_prim(lp, s*nw+w+1); } glp_delete_prob(lp); return res; } static void _try_resizing(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ns = sched_ctxs == NULL ? sc_hypervisor_get_nsched_ctxs() : nsched_ctxs; int nw = workers == NULL ? (int)starpu_worker_get_count() : nworkers; /* Number of different workers */ sched_ctxs = sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs; double w_in_s[ns][nw]; unsigned found_sol = _compute_max_speed(ns, nw, w_in_s, sched_ctxs, workers); /* if we did find at least one solution redistribute the resources */ if(found_sol) { struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nw); int w, s; double nworkers_per_ctx[ns][tw->nw]; int nworkers_per_ctx_rounded[ns][tw->nw]; for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { nworkers_per_ctx[s][w] = 0.0; nworkers_per_ctx_rounded[s][w] = 0; } } for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { enum starpu_worker_archtype arch = starpu_worker_get_type(w); int idx = sc_hypervisor_get_index_for_arch(STARPU_CUDA_WORKER, tw); nworkers_per_ctx[s][idx] += w_in_s[s][w]; if(arch == STARPU_CUDA_WORKER) { if(w_in_s[s][w] >= 0.3) nworkers_per_ctx_rounded[s][idx]++; } else { idx = sc_hypervisor_get_index_for_arch(STARPU_CPU_WORKER, tw); nworkers_per_ctx[s][idx] += w_in_s[s][w]; if(w_in_s[s][w] > 0.5) nworkers_per_ctx_rounded[s][idx]++; } } } /* for(s = 0; s < ns; s++) */ /* printf("%d: cpus = %lf gpus = %lf cpus_round = %d gpus_round = %d\n", s, nworkers[s][1], nworkers[s][0], */ /* nworkers_rounded[s][1], nworkers_rounded[s][0]); */ sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, tw->nw, nworkers_per_ctx_rounded, nworkers_per_ctx, sched_ctxs, tw); free(tw); } } static void throughput_lp_handle_poped_task(__attribute__((unused))unsigned sched_ctx, __attribute__((unused))int worker, __attribute__((unused))struct starpu_task *task, __attribute__((unused))uint32_t footprint) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_SPEED) { if(sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1)) { _try_resizing(NULL, -1, NULL, -1); } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void throughput_lp_handle_idle_cycle(unsigned sched_ctx, int worker) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING && criteria == SC_IDLE) { if(sc_hypervisor_check_idle(sched_ctx, worker)) { _try_resizing(NULL, -1, NULL, -1); // sc_hypervisor_move_workers(sched_ctx, 3 - sched_ctx, &worker, 1, 1); } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void throughput_lp_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { _try_resizing(sched_ctxs, nsched_ctxs, workers, nworkers); STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } } static void throughput_lp_end_ctx(__attribute__((unused))unsigned sched_ctx) { /* struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); */ /* int worker; */ /* for(worker = 0; worker < 12; worker++) */ /* printf("%d/%d: speed %lf\n", worker, sched_ctx, sc_w->ref_speed[worker]); */ return; } struct sc_hypervisor_policy throughput_lp_policy = { .size_ctxs = NULL, .resize_ctxs = throughput_lp_resize_ctxs, .handle_poped_task = throughput_lp_handle_poped_task, .handle_pushed_task = NULL, .handle_idle_cycle = throughput_lp_handle_idle_cycle, .handle_idle_end = NULL, .handle_post_exec_hook = NULL, .handle_submitted_job = NULL, .end_ctx = throughput_lp_end_ctx, .init_worker = NULL, .custom = 0, .name = "throughput_lp" }; #endif /* STARPU_HAVE_GLPK_H */ starpu-1.4.9+dfsg/sc_hypervisor/src/policies_utils/000077500000000000000000000000001507764646700225255ustar00rootroot00000000000000starpu-1.4.9+dfsg/sc_hypervisor/src/policies_utils/dichotomy.c000066400000000000000000000075721507764646700247030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_lp.h" #include "sc_hypervisor_policy.h" #include #include /* executes the function lp_estimated_distrib_func over the interval [tmin, tmax] until it finds the lowest value that * still has solutions */ unsigned sc_hypervisor_lp_execute_dichotomy(int ns, int nw, double w_in_s[ns][nw], unsigned solve_lp_integer, void *specific_data, double tmin, double tmax, double smallest_tmax, double (*lp_estimated_distrib_func)(int lns, int lnw, double ldraft_w_in_s[ns][nw], unsigned lis_integer, double ltmax, void *lspecifc_data)) { (void)smallest_tmax; double res = 1.0; unsigned has_sol = 0; double tmid = tmax; unsigned found_sol = 0; struct timeval start_time; struct timeval end_time; int nd = 0; double found_tmid = tmax; double potential_tmid = tmid; double threshold = tmax*0.1; gettimeofday(&start_time, NULL); /* we fix tmax and we do not treat it as an unknown we just vary by dichotomy its values*/ while(1) { /* find solution and save the values in draft tables only if there is a solution for the system we save them in the proper table */ printf("solving for tmid %lf \n", tmid); res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, tmid, specific_data); if(res < 0.0) { printf("timeouted no point in continuing\n"); found_sol = 0; break; } else if(res != 0.0) { has_sol = 1; found_sol = 1; found_tmid = tmid; printf("found sol for tmid %lf \n", tmid); } else { printf("failed for tmid %lf \n", tmid); if(tmid == tmax) { printf("failed for tmid %lf from the first time\n", tmid); break; } has_sol = 0; } /* if we have a solution with this tmid try a smaller value bigger than the old one */ if(has_sol) { /* if the difference between tmax and tmid is smaller than a given threshold there is no point in searching more precision */ tmax = tmid; potential_tmid = tmin + ((tmax-tmin)/2.0); if((tmax - potential_tmid) < threshold) { printf("had_sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid); break; } printf("try for smaller potential tmid %lf \n", potential_tmid); } else /*else try a bigger one */ { /* if we previously found a good sol and we keep failing we stop searching for a better sol */ tmin = tmid; potential_tmid = tmin + ((tmax-tmin)/2.0); if((tmax - potential_tmid) < threshold) { printf("didn't have sol but stop doing it for tmin %lf tmax %lf and potential tmid %lf \n", tmin, tmax, potential_tmid); break; } printf("try for bigger potential tmid %lf \n", potential_tmid); } tmid = potential_tmid; nd++; } printf("solve againd for tmid %lf \n", found_tmid); if(found_sol) { res = lp_estimated_distrib_func(ns, nw, w_in_s, solve_lp_integer, found_tmid, specific_data); found_sol = (res != 0.0); } printf("found sol %u for tmid %lf\n", found_sol, found_tmid); gettimeofday(&end_time, NULL); long diff_s = end_time.tv_sec - start_time.tv_sec; long diff_us = end_time.tv_usec - start_time.tv_usec; __attribute__((unused)) float timing = (float)(diff_s*1000000 + diff_us)/1000; return found_sol; } starpu-1.4.9+dfsg/sc_hypervisor/src/policies_utils/lp_programs.c000066400000000000000000000433771507764646700252340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * GNU Linear Programming Kit backend */ #include "sc_hypervisor_policy.h" #include "sc_hypervisor_lp.h" #ifdef STARPU_HAVE_GLPK_H double sc_hypervisor_lp_simulate_distrib_tasks(int ns, int nw, int nt, double w_in_s[ns][nw], double tasks[nw][nt], double times[nw][nt], unsigned is_integer, double tmax, unsigned *in_sched_ctxs, struct sc_hypervisor_policy_task_pool *tmp_task_pools) { struct sc_hypervisor_policy_task_pool * tp; int t, w, s; glp_prob *lp; lp = glp_create_prob(); glp_set_prob_name(lp, "StarPU theoretical bound"); glp_set_obj_dir(lp, GLP_MAX); glp_set_obj_name(lp, "total execution time"); { int ne = nt * nw /* worker execution time */ + nw * ns + nw * (nt + ns) + 1; /* glp dumbness */ int n = 1; int ia[ne], ja[ne]; double ar[ne]; /* Variables: number of tasks i assigned to worker j, and tmax */ glp_add_cols(lp, nw*nt+ns*nw); #define colnum(w, t) ((t)*nw+(w)+1) for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) glp_set_obj_coef(lp, nw*nt+s*nw+w+1, 1.); for (w = 0; w < nw; w++) for (t = 0; t < nt; t++) { char name[32]; snprintf(name, sizeof(name), "w%dt%dn", w, t); glp_set_col_name(lp, colnum(w, t), name); if (is_integer) { glp_set_col_kind(lp, colnum(w, t), GLP_IV); glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0, 0); } else glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0.0, 0.0); } for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) { char name[32]; snprintf(name, sizeof(name), "w%ds%dn", w, s); glp_set_col_name(lp, nw*nt+s*nw+w+1, name); if (is_integer) { glp_set_col_kind(lp, nw*nt+s*nw+w+1, GLP_IV); glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0, 1); } else glp_set_col_bnds(lp, nw*nt+s*nw+w+1, GLP_DB, 0.0, 1.0); } unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs; int curr_row_idx = 0; /* Total worker execution time */ glp_add_rows(lp, nw*ns); for (t = 0; t < nt; t++) { int someone = 0; for (w = 0; w < nw; w++) if (!isnan(times[w][t])) someone = 1; if (!someone) { /* This task does not have any performance model at all, abort */ printf("NO PERF MODELS\n"); glp_delete_prob(lp); return 0.0; } } /*sum(t[t][w]*n[t][w]) < x[s][w]*tmax */ for(s = 0; s < ns; s++) { for (w = 0; w < nw; w++) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "worker %s", name); glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title); for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next) { if(tp->sched_ctx_id == sched_ctxs[s]) { ia[n] = curr_row_idx+s*nw+w+1; ja[n] = colnum(w, t); if (isnan(times[w][t])) { printf("had to insert huge val \n"); ar[n] = 1000000000.; } else ar[n] = times[w][t]; n++; } } /* x[s][w] = 1 | 0 */ ia[n] = curr_row_idx+s*nw+w+1; ja[n] = nw*nt+s*nw+w+1; ar[n] = (-1) * tmax; n++; if (is_integer) { glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0, 0); } else glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0); } } curr_row_idx += nw*ns; /* Total task completion */ glp_add_rows(lp, nt); for (t = 0, tp = tmp_task_pools; tp; t++, tp = tp->next) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "task %s key %x", tp->cl->name, (unsigned) tp->footprint); glp_set_row_name(lp, curr_row_idx+t+1, title); for (w = 0; w < nw; w++) { ia[n] = curr_row_idx+t+1; ja[n] = colnum(w, t); ar[n] = 1; n++; } glp_set_row_bnds(lp, curr_row_idx+t+1, GLP_FX, tp->n, tp->n); } curr_row_idx += nt; /* sum(x[s][i]) = 1 */ glp_add_rows(lp, nw); for (w = 0; w < nw; w++) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "w%x", w); glp_set_row_name(lp, curr_row_idx+w+1, title); for(s = 0; s < ns; s++) { ia[n] = curr_row_idx+w+1; ja[n] = nw*nt+s*nw+w+1; ar[n] = 1; n++; } if(is_integer) glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1); else glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0); } if(n != ne) printf("ns= %d nw = %d nt = %d n = %d ne = %d\n", ns, nw, nt, n, ne); STARPU_ASSERT(n == ne); glp_load_matrix(lp, ne-1, ia, ja, ar); } glp_smcp parm; glp_init_smcp(&parm); parm.msg_lev = GLP_MSG_OFF; int ret = glp_simplex(lp, &parm); /* char str[50]; */ /* sprintf(str, "outpu_lp_%g", tmax); */ /* glp_print_sol(lp, str); */ if (ret) { printf("error in simplex\n"); glp_delete_prob(lp); lp = NULL; return 0.0; } int stat = glp_get_prim_stat(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS) { glp_delete_prob(lp); // printf("no_sol in tmax = %lf\n", tmax); lp = NULL; return 0.0; } if (is_integer) { glp_iocp iocp; glp_init_iocp(&iocp); iocp.msg_lev = GLP_MSG_OFF; // iocp.tm_lim = 1000; glp_intopt(lp, &iocp); stat = glp_mip_status(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS || stat == GLP_ETMLIM || stat == GLP_UNDEF) { // printf("no int sol in tmax = %lf\n", tmax); if(stat == GLP_ETMLIM || stat == GLP_UNDEF) printf("timeout \n"); glp_delete_prob(lp); lp = NULL; return 0.0; } } double res = glp_get_obj_val(lp); for (w = 0; w < nw; w++) for (t = 0; t < nt; t++) if (is_integer) tasks[w][t] = (double)glp_mip_col_val(lp, colnum(w, t)); else tasks[w][t] = glp_get_col_prim(lp, colnum(w, t)); /* printf("**********************************************\n"); */ /* printf("for tmax %lf\n", tmax); */ for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) { if (is_integer) w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*nt+s*nw+w+1); else w_in_s[s][w] = glp_get_col_prim(lp, nw*nt+s*nw+w+1); // printf("w %d in ctx %d = %lf\n", w, s, w_in_s[s][w]); } /* printf("\n"); */ /* printf("**********************************************\n"); */ glp_delete_prob(lp); return res; } double sc_hypervisor_lp_simulate_distrib_flops(int ns, int nw, double v[ns][nw], double flops[ns], double res[ns][nw], int total_nw[nw], unsigned sched_ctxs[ns], double last_vmax) { int integer = 1; int s, w; glp_prob *lp; int ne = (ns*nw+1)*(ns+nw) + 1; /* glp dumbness */ int n = 1; int ia[ne], ja[ne]; double ar[ne]; lp = glp_create_prob(); glp_set_prob_name(lp, "sample"); glp_set_obj_dir(lp, GLP_MAX); glp_set_obj_name(lp, "max speed"); /* we add nw*ns columns one for each type of worker in each context and another column corresponding to the 1/tmax bound (bc 1/tmax is a variable too)*/ glp_add_cols(lp, nw*ns+1); /* struct sc_hypervisor_wrapper *sc_w = NULL; */ for(s = 0; s < ns; s++) { /* sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); */ struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]); for(w = 0; w < nw; w++) { char name[32]; snprintf(name, sizeof(name), "worker%dctx%d", w, s); glp_set_col_name(lp, n, name); if (integer) { glp_set_col_kind(lp, n, GLP_IV); /* if(sc_w->consider_max) */ /* { */ /* if(config->max_nworkers == 0) */ /* glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, config->max_nworkers); */ /* else */ /* glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, config->max_nworkers); */ /* } */ /* else */ { if(total_nw[w] == 0) glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers, total_nw[w]); else glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers, total_nw[w]); } } else { /* if(sc_w->consider_max) */ /* { */ /* if(config->max_nworkers == 0) */ /* glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, config->max_nworkers*1.0); */ /* else */ /* glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, config->max_nworkers*1.0); */ /* #ifdef STARPU_SC_HYPERVISOR_DEBUG */ /* printf("%d****************consider max %lf in lp\n", sched_ctxs[s], config->max_nworkers*1.0); */ /* #endif */ /* } */ /* else */ { if(total_nw[w] == 0) glp_set_col_bnds(lp, n, GLP_FX, config->min_nworkers*1.0, total_nw[w]*1.0); else glp_set_col_bnds(lp, n, GLP_DB, config->min_nworkers*1.0, total_nw[w]*1.0); #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("%u****************don't consider max %d but total %d in lp\n", sched_ctxs[s], config->max_nworkers, total_nw[w]); #endif } } n++; } } #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("ns = %d nw = %d\n", ns, nw); #endif /*1/tmax should belong to the interval [0.0;1.0]*/ glp_set_col_name(lp, n, "vmax"); // glp_set_col_bnds(lp, n, GLP_DB, 0.0, 1.0); if(last_vmax != -1.0) glp_set_col_bnds(lp, n, GLP_LO, last_vmax, last_vmax); else glp_set_col_bnds(lp, n, GLP_LO, 0.0, 0.0); /* Z = 1/tmax -> 1/tmax structural variable, nCPUs & nGPUs in ctx are auxiliary variables */ glp_set_obj_coef(lp, n, 1.0); n = 1; /* one row corresponds to one ctx*/ glp_add_rows(lp, ns); for(s = 0; s < ns; s++) { char name[32]; snprintf(name, sizeof(name), "ctx%d", s); glp_set_row_name(lp, s+1, name); glp_set_row_bnds(lp, s+1, GLP_LO, 0., 0.); for(w = 0; w < nw; w++) { int s2; for(s2 = 0; s2 < ns; s2++) { if(s2 == s) { ia[n] = s+1; ja[n] = w + nw*s2 + 1; ar[n] = v[s][w]; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); } else { ia[n] = s+1; ja[n] = w + nw*s2 + 1; ar[n] = 0.0; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); } n++; } } /* 1/tmax */ ia[n] = s+1; ja[n] = ns*nw+1; ar[n] = (-1) * flops[s]; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); n++; } /*we add another linear constraint : sum(all cpus) = 9 and sum(all gpus) = 3 */ glp_add_rows(lp, nw); for(w = 0; w < nw; w++) { char name[32]; snprintf(name, sizeof(name), "w%d", w); glp_set_row_name(lp, ns+w+1, name); for(s = 0; s < ns; s++) { int w2; for(w2 = 0; w2 < nw; w2++) { if(w2 == w) { ia[n] = ns+w+1; ja[n] = w2+s*nw + 1; ar[n] = 1.0; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); } else { ia[n] = ns+w+1; ja[n] = w2+s*nw + 1; ar[n] = 0.0; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); } n++; } } /* 1/tmax */ ia[n] = ns+w+1; ja[n] = ns*nw+1; ar[n] = 0.0; // printf("ia[%d]=%d ja[%d]=%d ar[%d]=%lf\n", n, ia[n], n, ja[n], n, ar[n]); n++; /*sum(all gpus) = 3*/ if(w == 0) glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[0], total_nw[0]); /*sum(all cpus) = 9*/ if(w == 1) glp_set_row_bnds(lp, ns+w+1, GLP_FX, total_nw[1], total_nw[1]); } STARPU_ASSERT(n == ne); glp_load_matrix(lp, ne-1, ia, ja, ar); glp_smcp parm; glp_init_smcp(&parm); parm.msg_lev = GLP_MSG_OFF; int ret = glp_simplex(lp, &parm); if (ret) { printf("error in simplex\n"); glp_delete_prob(lp); lp = NULL; return 0.0; } int stat = glp_get_prim_stat(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS) { glp_delete_prob(lp); printf("no_sol\n"); lp = NULL; return 0.0; } if (integer) { glp_iocp iocp; glp_init_iocp(&iocp); iocp.msg_lev = GLP_MSG_OFF; glp_intopt(lp, &iocp); stat = glp_mip_status(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS) { printf("no int sol\n"); glp_delete_prob(lp); lp = NULL; return 0.0; } } double vmax = glp_get_obj_val(lp); #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("vmax = %lf \n", vmax); #endif n = 1; for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { if (integer) res[s][w] = (double)glp_mip_col_val(lp, n); else res[s][w] = glp_get_col_prim(lp, n); #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("%d/%d: res %lf flops = %lf v = %lf\n", w,s, res[s][w], flops[s], v[s][w]); #endif n++; } } glp_delete_prob(lp); return vmax; } double sc_hypervisor_lp_simulate_distrib_flops_on_sample(int ns, int nw, double final_w_in_s[ns][nw], unsigned is_integer, double tmax, double **speed, double flops[ns], double **final_flops_on_w) { double w_in_s[ns][nw]; double flops_on_w[ns][nw]; int w, s; glp_prob *lp; // printf("try with tmax %lf\n", tmax); lp = glp_create_prob(); glp_set_prob_name(lp, "StarPU theoretical bound"); glp_set_obj_dir(lp, GLP_MAX); glp_set_obj_name(lp, "total execution time"); { int ne = 5 * ns * nw /* worker execution time */ + 1; /* glp dumbness */ int n = 1; int ia[ne], ja[ne]; double ar[ne]; /* Variables: number of flops assigned to worker w in context s, and the acknwoledgment that the worker w belongs to the context s */ glp_add_cols(lp, 2*nw*ns); #define colnum_sample(w, s) ((s)*nw+(w)+1) for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) glp_set_obj_coef(lp, nw*ns+colnum_sample(w,s), 1.); for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) { char name[32]; snprintf(name, sizeof(name), "flopsw%ds%dn", w, s); glp_set_col_name(lp, colnum_sample(w,s), name); glp_set_col_bnds(lp, colnum_sample(w,s), GLP_LO, 0., 0.); snprintf(name, sizeof(name), "w%ds%dn", w, s); glp_set_col_name(lp, nw*ns+colnum_sample(w,s), name); if (is_integer) { glp_set_col_kind(lp, nw*ns+colnum_sample(w, s), GLP_IV); glp_set_col_bnds(lp, nw*ns+colnum_sample(w,s), GLP_DB, 0, 1); } else glp_set_col_bnds(lp, nw*ns+colnum_sample(w,s), GLP_DB, 0.0, 1.0); } int curr_row_idx = 0; /* Total worker execution time */ glp_add_rows(lp, nw*ns); /*nflops[s][w]/v[s][w] < x[s][w]*tmax */ for(s = 0; s < ns; s++) { for (w = 0; w < nw; w++) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "worker %s", name); glp_set_row_name(lp, curr_row_idx+s*nw+w+1, title); /* nflosp[s][w] */ ia[n] = curr_row_idx+s*nw+w+1; ja[n] = colnum_sample(w, s); ar[n] = 1 / speed[s][w]; n++; /* x[s][w] = 1 | 0 */ ia[n] = curr_row_idx+s*nw+w+1; ja[n] = nw*ns+colnum_sample(w,s); ar[n] = (-1) * tmax; n++; glp_set_row_bnds(lp, curr_row_idx+s*nw+w+1, GLP_UP, 0.0, 0.0); } } curr_row_idx += nw*ns; /* sum(flops[s][w]) = flops[s] */ glp_add_rows(lp, ns); for (s = 0; s < ns; s++) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "flops %lf ctx%d", flops[s], s); glp_set_row_name(lp, curr_row_idx+s+1, title); for (w = 0; w < nw; w++) { ia[n] = curr_row_idx+s+1; ja[n] = colnum_sample(w, s); ar[n] = 1; n++; } glp_set_row_bnds(lp, curr_row_idx+s+1, GLP_FX, flops[s], flops[s]); } curr_row_idx += ns; /* sum(x[s][w]) = 1 */ glp_add_rows(lp, nw); for (w = 0; w < nw; w++) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "w%x", w); glp_set_row_name(lp, curr_row_idx+w+1, title); for(s = 0; s < ns; s++) { ia[n] = curr_row_idx+w+1; ja[n] = nw*ns+colnum_sample(w,s); ar[n] = 1; n++; } if(is_integer) glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1, 1); else glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_FX, 1.0, 1.0); } curr_row_idx += nw; /* sum(nflops[s][w]) > 0*/ glp_add_rows(lp, nw); for (w = 0; w < nw; w++) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "flopsw%x", w); glp_set_row_name(lp, curr_row_idx+w+1, title); for(s = 0; s < ns; s++) { ia[n] = curr_row_idx+w+1; ja[n] = colnum_sample(w,s); ar[n] = 1; n++; } glp_set_row_bnds(lp, curr_row_idx+w+1, GLP_LO, 0.1, 0.); } if(n != ne) printf("ns= %d nw = %d n = %d ne = %d\n", ns, nw, n, ne); STARPU_ASSERT(n == ne); glp_load_matrix(lp, ne-1, ia, ja, ar); } glp_smcp parm; glp_init_smcp(&parm); parm.msg_lev = GLP_MSG_OFF; int ret = glp_simplex(lp, &parm); if (ret) { glp_delete_prob(lp); lp = NULL; return 0.0; } if (is_integer) { glp_iocp iocp; glp_init_iocp(&iocp); iocp.msg_lev = GLP_MSG_OFF; glp_intopt(lp, &iocp); int stat = glp_mip_status(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS) { glp_delete_prob(lp); lp = NULL; return 0.0; } } int stat = glp_get_prim_stat(lp); /* if we don't have a solution return */ if(stat == GLP_NOFEAS) { glp_delete_prob(lp); lp = NULL; return 0.0; } double res = glp_get_obj_val(lp); for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) { flops_on_w[s][w] = glp_get_col_prim(lp, colnum_sample(w, s)); if (is_integer) w_in_s[s][w] = (double)glp_mip_col_val(lp, nw*ns+colnum_sample(w, s)); else w_in_s[s][w] = glp_get_col_prim(lp, nw*ns+colnum_sample(w,s)); // printf("w_in_s[s%d][w%d] = %lf flops[s%d][w%d] = %lf \n", s, w, w_in_s[s][w], s, w, flops_on_w[s][w]); } glp_delete_prob(lp); for(s = 0; s < ns; s++) for(w = 0; w < nw; w++) { final_w_in_s[s][w] = w_in_s[s][w]; final_flops_on_w[s][w] = flops_on_w[s][w]; } return res; } #endif // STARPU_HAVE_GLPK_H starpu-1.4.9+dfsg/sc_hypervisor/src/policies_utils/lp_tools.c000066400000000000000000000614661507764646700245410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "sc_hypervisor_lp.h" #include "sc_hypervisor_policy.h" #include "sc_hypervisor_intern.h" #include double sc_hypervisor_lp_get_nworkers_per_ctx(int nsched_ctxs, int ntypes_of_workers, double res[nsched_ctxs][ntypes_of_workers], int total_nw[ntypes_of_workers], struct types_of_workers *tw, unsigned *in_sched_ctxs) { unsigned *sched_ctxs = in_sched_ctxs == NULL ? sc_hypervisor_get_sched_ctxs() : in_sched_ctxs; #ifdef STARPU_HAVE_GLPK_H double v[nsched_ctxs][ntypes_of_workers]; double flops[nsched_ctxs]; /* unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); */ /* if(nhierarchy_levels <= 1) */ sc_hypervisor_update_resize_interval(sched_ctxs, nsched_ctxs, total_nw[0]); int nw = tw->nw; int i = 0; struct sc_hypervisor_wrapper* sc_w; for(i = 0; i < nsched_ctxs; i++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); int w; for(w = 0; w < nw; w++) v[i][w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); double ready_flops = starpu_sched_ctx_get_nready_flops(sc_w->sched_ctx); unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); if(nhierarchy_levels > 1) ready_flops = sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(sc_w->sched_ctx); #ifdef STARPU_SC_HYPERVISOR_DEBUG int nready_tasks = starpu_sched_ctx_get_nready_tasks(sc_w->sched_ctx); #endif if(sc_w->to_be_sized) { flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/ sc_w->to_be_sized = 0; } else { if(nhierarchy_levels > 1) flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/ else if(sc_w->remaining_flops < 0.0) flops[i] = ready_flops/1000000000.0; /* in gflops*/ else { if((ready_flops/1000000000.0) <= 0.000002) flops[i] = 0.0; else flops[i] = sc_w->remaining_flops/1000000000.0; /* in gflops*/ } } if(flops[i] < 0.0) flops[i] = 0.0; #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("%u: flops %lf remaining flops %lf ready flops %lf nready_tasks %d\n", sched_ctxs[i], flops[i], sc_w->remaining_flops/1000000000, ready_flops/1000000000, nready_tasks); #endif } sc_hypervisor_check_if_consider_max(tw); int w; for(w = 0; w < nw; w++) { double avg_speed = sc_hypervisor_get_avg_speed(sc_hypervisor_get_arch_for_index(w, tw)); if(avg_speed != -1.0) { #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("avg_speed for cpus is %lf \n", avg_speed); #endif unsigned consider_max_for_all = 0; for(i = 0; i < nsched_ctxs; i++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); if(!sc_w->consider_max) { #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("ctx %u: current speed is %lf and compare speed is min %lf max %lf\n", sched_ctxs[i], v[i][w], (0.1*avg_speed), (2*avg_speed)); #endif if(v[i][w] < 0.1*avg_speed || v[i][w] > 2*avg_speed) { sc_w->consider_max = 1; consider_max_for_all = 1; } #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("ctx %u consider max %d \n", sched_ctxs[i], sc_w->consider_max); #endif } } if(consider_max_for_all) { for(i = 0; i < nsched_ctxs; i++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); sc_w->consider_max = 1; #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("ctx %u consider max %d anyway \n", sched_ctxs[i], sc_w->consider_max); #endif } } } } if(nsched_ctxs == 1) { for(w = 0; w < nw; w++) res[0][w] = total_nw[w]; double optimal_v = 0.0; #ifdef STARPU_USE_CUDA optimal_v = res[0][0] * v[0][0] + res[0][1]* v[0][1]; #else optimal_v = res[0][0] * v[0][0]; #endif //STARPU_USE_CUDA _set_optimal_v(sched_ctxs[0], optimal_v); return 1.0; } unsigned tmp_sched_ctxs[STARPU_NMAX_SCHED_CTXS]; double tmp_flops[STARPU_NMAX_SCHED_CTXS]; double tmp_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers]; double tmp_res[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers]; int tmp_nsched_ctxs = 0; for(i = 0; i < nsched_ctxs; i++) { struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]); sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); if(config->max_nworkers != 0 || !sc_w->consider_max) { tmp_sched_ctxs[tmp_nsched_ctxs] = sched_ctxs[i]; tmp_flops[tmp_nsched_ctxs] = flops[i]; for(w = 0; w < ntypes_of_workers; w++) tmp_v[tmp_nsched_ctxs][w] = v[i][w]; tmp_nsched_ctxs++; } } if(tmp_nsched_ctxs == 0) return -1.0; double ret = sc_hypervisor_lp_simulate_distrib_flops(tmp_nsched_ctxs, ntypes_of_workers, tmp_v, tmp_flops, tmp_res, total_nw, tmp_sched_ctxs, -1.0); int j; for(i = 0; i < nsched_ctxs; i++) { unsigned found = 0; for(j = 0; j < tmp_nsched_ctxs; j++) { if(sched_ctxs[i] == tmp_sched_ctxs[j]) { for(w = 0; w < ntypes_of_workers; w++) res[i][w] = tmp_res[j][w]; found = 1; break; } } if(!found) { for(w = 0; w < ntypes_of_workers; w++) res[i][w] = 0.0; } } double vmax = 0.0; if(ret != 0.0) { /* redo the lp after cleaning out the contexts that got all the max workers required */ unsigned selected_sched_ctxs[STARPU_NMAX_SCHED_CTXS]; double selected_flops[STARPU_NMAX_SCHED_CTXS]; double selected_v[STARPU_NMAX_SCHED_CTXS][ntypes_of_workers]; int nselected = 0; int available_cpus = total_nw[0]; int used_cpus = 0; for(i = 0; i < nsched_ctxs; i++) { struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[i]); if(res[i][0] < config->max_nworkers && config->max_nworkers != 0 && flops[i] > 0.0001) { selected_flops[nselected] = flops[i]; selected_v[nselected][0] = v[i][0]; selected_sched_ctxs[nselected++] = sched_ctxs[i]; } else available_cpus -= res[i][0]; used_cpus += res[i][0]; } if(used_cpus < 0.8 * total_nw[0] && nselected > 1) { double old_ret = ret; if(nselected <= 0 || nselected == nsched_ctxs) { nselected = nsched_ctxs; for(i = 0; i < nsched_ctxs; i++) { selected_flops[i] = flops[i]; selected_v[i][0] = v[i][0]; selected_sched_ctxs[i] = sched_ctxs[i]; } } else total_nw[0] = available_cpus; double selected_res[nselected][ntypes_of_workers]; ret = sc_hypervisor_lp_simulate_distrib_flops(nselected, ntypes_of_workers, selected_v, selected_flops, selected_res, total_nw, selected_sched_ctxs, ret); if(ret != 0) { for(i = 0; i < nsched_ctxs; i++) { for(j = 0; j < nselected; j++) { if(sched_ctxs[i] == selected_sched_ctxs[j]) { res[i][0] = selected_res[j][0]; } } } } else ret = old_ret; } } /* if the lp could not give any workers to any context just split the workers btw the contexts */ if(ret == 0.0) { double rand_res[nw]; for(w = 0; w < nw; w++) rand_res[w] = total_nw[w]/nsched_ctxs; int s; for(s = 0; s < nsched_ctxs; s++) for(w = 0; w < nw; w++) res[s][w] = rand_res[w]; } else /* keep the first speed */ // if(ret != 0.0) { vmax = 1 / ret; } double optimal_v = 0.0; for(i = 0; i < nsched_ctxs; i++) { #ifdef STARPU_USE_CUDA optimal_v = res[i][0] * v[i][0] + res[i][1]* v[i][1]; #else optimal_v = res[i][0] * v[i][0]; #endif //STARPU_USE_CUDA unsigned no_workers = 1; for(w = 0; w < nw; w++) { if(res[i][w] != 0.0) { no_workers = 0; break; } } sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); /* if the hypervisor gave 0 workers to a context but the context still * has some last flops or a ready task that does not even have any flops * we give a worker (in shared mode) to the context in order to leave him * finish its work = we give -1.0 value instead of 0.0 and further on in * the distribution function we take this into account and revert the variable * to its 0.0 value */ // if(no_workers && (flops[i] != 0.0 || sc_w->nready_tasks > 0)) if(no_workers) { for(w = 0; w < nw; w++) res[i][w] = -1.0; } // if(optimal_v != 0.0) _set_optimal_v(sched_ctxs[i], optimal_v); } return vmax; #else//STARPU_HAVE_GLPK_H return 0.0; #endif//STARPU_HAVE_GLPK_H } double sc_hypervisor_lp_get_tmax(int nworkers, int *workers) { struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nworkers); int nw = tw->nw; int total_nw[nw]; sc_hypervisor_group_workers_by_type(tw, total_nw); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); double res[nsched_ctxs][nw]; double ret = sc_hypervisor_lp_get_nworkers_per_ctx(nsched_ctxs, nw, res, total_nw, tw, NULL) * 1000.0; free(tw); return ret; } void sc_hypervisor_lp_round_double_to_int(int ns, int nw, double res[ns][nw], int res_rounded[ns][nw]) { int s, w; double left_res[nw]; for(w = 0; w < nw; w++) left_res[w] = 0.0; for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { int x = floor(res[s][w]); double x_double = (double)x; double diff = res[s][w] - x_double; if(diff != 0.0) { if(diff > 0.5) { if(left_res[w] != 0.0) { if((diff + left_res[w]) > 0.5) { res_rounded[s][w] = x + 1; left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w])); } else { res_rounded[s][w] = x; left_res[w] = (-1.0) * (diff + left_res[w]); } } else { res_rounded[s][w] = x + 1; left_res[w] = (-1.0) * (x_double + 1.0 - res[s][w]); } } else { if((diff + left_res[w]) > 0.5) { res_rounded[s][w] = x + 1; left_res[w] = (-1.0) * (x_double + 1.0 - (res[s][w] + left_res[w])); } else { res_rounded[s][w] = x; left_res[w] = diff; } } } else res_rounded[s][w] = x; } } } void _lp_find_workers_to_give_away(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS], int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw) { int w; double target_res = 0.0; for(w = 0; w < nw; w++) { target_res += res[sched_ctx_idx][w]; if(res[sched_ctx_idx][w] == -1.0) res[sched_ctx_idx][w] = 0.0; } for(w = 0; w < nw; w++) { enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw); if(arch == STARPU_CPU_WORKER) { int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch); if(nworkers_ctx > res_rounded[sched_ctx_idx][w]) { int nworkers_to_move = nworkers_ctx - res_rounded[sched_ctx_idx][w]; int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &nworkers_to_move, arch); int i; if(target_res < 0.0 && nworkers_to_move > 0) { tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[0]; for(i = 1; i < nworkers_to_move; i++) tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i]; } else { for(i = 0; i < nworkers_to_move; i++) tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i]; } free(workers_to_move); } } else { double nworkers_ctx = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch) * 1.0; if(nworkers_ctx > res[sched_ctx_idx][w]) { double nworkers_to_move = nworkers_ctx - res[sched_ctx_idx][w]; int x = floor(nworkers_to_move); double x_double = (double)x; double diff = nworkers_to_move - x_double; if(diff == 0.0) { int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch); if(x > 0) { int i; for(i = 0; i < x; i++) tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i]; } free(workers_to_move); } else { x+=1; int *workers_to_move = sc_hypervisor_get_idlest_workers(sched_ctx, &x, arch); if(x > 0) { int i; for(i = 0; i < x-1; i++) tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[i]; if(diff > 0.8) tmp_workers_move[w][tmp_nw_move[w]++] = workers_to_move[x-1]; else if(diff > 0.3) tmp_workers_add[w][tmp_nw_add[w]++] = workers_to_move[x-1]; } free(workers_to_move); } } } } } void _lp_find_workers_to_accept(int nw, int ns, unsigned sched_ctx, int sched_ctx_idx, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], int tmp_nw_add[nw], int tmp_workers_add[nw][STARPU_NMAXWORKERS], int *nw_move, int workers_move[STARPU_NMAXWORKERS], int *nw_add, int workers_add[STARPU_NMAXWORKERS], int res_rounded[ns][nw], double res[ns][nw], struct types_of_workers *tw) { int w; int j = 0, k = 0; for(w = 0; w < nw; w++) { enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw); int nw_ctx2 = sc_hypervisor_get_nworkers_ctx(sched_ctx, arch); int nw_needed = res_rounded[sched_ctx_idx][w] - nw_ctx2; if(nw_needed > 0 && tmp_nw_move[w] > 0) { *nw_move += nw_needed >= tmp_nw_move[w] ? tmp_nw_move[w] : nw_needed; int i = 0; for(i = 0; i < STARPU_NMAXWORKERS; i++) { if(tmp_workers_move[w][i] != -1) { workers_move[j++] = tmp_workers_move[w][i]; tmp_workers_move[w][i] = -1; if(j == *nw_move) break; } } tmp_nw_move[w] -= *nw_move; } double needed = res[sched_ctx_idx][w] - (nw_ctx2 * 1.0); int x = floor(needed); double x_double = (double)x; double diff = needed - x_double; if((diff > 0.3 || needed > 0.3) && tmp_nw_add[w] > 0) { *nw_add = tmp_nw_add[w]; int i = 0; for(i = 0; i < STARPU_NMAXWORKERS; i++) { if(tmp_workers_add[w][i] != -1) { workers_add[k++] = tmp_workers_add[w][i]; tmp_workers_add[w][i] = -1; if(k == *nw_add) break; } } tmp_nw_add[w] -= *nw_add; } } } void _lp_find_workers_to_remove(int nw, int tmp_nw_move[nw], int tmp_workers_move[nw][STARPU_NMAXWORKERS], int *nw_move, int workers_move[STARPU_NMAXWORKERS]) { int w; for(w = 0; w < nw; w++) { if(tmp_nw_move[w] > 0) { *nw_move += tmp_nw_move[w]; int i = 0, j = 0; for(i = 0; i < STARPU_NMAXWORKERS; i++) { if(tmp_workers_move[w][i] != -1) { workers_move[j++] = tmp_workers_move[w][i]; tmp_workers_move[w][i] = -1; if(j == *nw_move) break; } } } } } void sc_hypervisor_lp_redistribute_resources_in_ctxs(int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], unsigned *sched_ctxs, struct types_of_workers *tw) { int s, s2, w; for(s = 0; s < ns; s++) { int tmp_workers_move[nw][STARPU_NMAXWORKERS]; int tmp_nw_move[nw]; int tmp_workers_add[nw][STARPU_NMAXWORKERS]; int tmp_nw_add[nw]; for(w = 0; w < nw; w++) { tmp_nw_move[w] = 0; tmp_nw_add[w] = 0; int i; for(i = 0; i < STARPU_NMAXWORKERS; i++) { tmp_workers_move[w][i] = -1; tmp_workers_add[w][i] = -1; } } /* find workers that ctx s has to give away */ _lp_find_workers_to_give_away(nw, ns, sched_ctxs[s], s, tmp_nw_move, tmp_workers_move, tmp_nw_add, tmp_workers_add, res_rounded, res, tw); for(s2 = 0; s2 < ns; s2++) { if(sched_ctxs[s2] != sched_ctxs[s]) { /* find workers that ctx s2 wants to accept from ctx s the rest of it will probably accepted by another ctx */ int workers_move[STARPU_NMAXWORKERS]; int nw_move = 0; int workers_add[STARPU_NMAXWORKERS]; int nw_add = 0; _lp_find_workers_to_accept(nw, ns, sched_ctxs[s2], s2, tmp_nw_move, tmp_workers_move, tmp_nw_add, tmp_workers_add, &nw_move, workers_move, &nw_add, workers_add, res_rounded, res, tw); if(nw_move > 0) { sc_hypervisor_move_workers(sched_ctxs[s], sched_ctxs[s2], workers_move, nw_move, !(_sc_hypervisor_use_lazy_resize())); nw_move = 0; } if(nw_add > 0) { sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s2]); nw_add = 0; } } } /* if there are workers that weren't accepted by anyone but ctx s wants to get rid of them just remove them from ctx s */ int workers_move[STARPU_NMAXWORKERS]; int nw_move = 0; _lp_find_workers_to_remove(nw, tmp_nw_move, tmp_workers_move, &nw_move, workers_move); if(nw_move > 0) sc_hypervisor_remove_workers_from_sched_ctx(workers_move, nw_move, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize())); } } int _lp_get_unwanted_workers(int *workers_add, int nw_add, unsigned sched_ctx, int *workers_remove) { int nw_remove = 0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); int worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); int i; unsigned found = 0; for(i = 0; i < nw_add; i++) { if(worker == workers_add[i]) { found = 1; break; } } if(!found) workers_remove[nw_remove++] = worker; } return nw_remove; } void sc_hypervisor_lp_distribute_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, int res_rounded[ns][nw], double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw) { int s, w; int start[nw]; for(w = 0; w < nw; w++) start[w] = 0; for(s = 0; s < ns; s++) { int workers_add[STARPU_NMAXWORKERS]; int nw_add = 0; double target_res = 0.0; for(w = 0; w < nw; w++) { target_res += res[s][w]; if(res[s][w] == -1.0) res[s][w] = 0.0; } for(w = 0; w < nw; w++) { enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw); if(arch == STARPU_CPU_WORKER) { int nworkers_to_add = res_rounded[s][w]; if(target_res < 0.0) { nworkers_to_add=1; int old_start = start[w]; if(start[w] != 0) start[w]--; int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch); start[w] = old_start; int i; for(i = 0; i < nworkers_to_add; i++) { workers_add[nw_add++] = workers_to_add[i]; } free(workers_to_add); } else { int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch); int i; for(i = 0; i < nworkers_to_add; i++) workers_add[nw_add++] = workers_to_add[i]; free(workers_to_add); } } else { double nworkers_to_add = res[s][w]; int x = floor(nworkers_to_add); double x_double = (double)x; double diff = nworkers_to_add - x_double; if(diff == 0.0) { int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch); int i; for(i = 0; i < x; i++) workers_add[nw_add++] = workers_to_add[i]; free(workers_to_add); } else { x+=1; int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch); int i; if(diff >= 0.3) for(i = 0; i < x; i++) workers_add[nw_add++] = workers_to_add[i]; else for(i = 0; i < x-1; i++) workers_add[nw_add++] = workers_to_add[i]; free(workers_to_add); } } } // sc_hypervisor_start_resize(sched_ctxs[s]); sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]); int workers_remove[STARPU_NMAXWORKERS]; int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove); sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize())); } } void sc_hypervisor_lp_distribute_floating_no_resources_in_ctxs(unsigned* sched_ctxs, int ns, int nw, double res[ns][nw], int *workers, int nworkers, struct types_of_workers *tw) { int s, w; int start[nw]; for(w = 0; w < nw; w++) start[w] = 0; for(s = 0; s < ns; s++) { int workers_add[STARPU_NMAXWORKERS]; int nw_add = 0; double target_res = 0.0; for(w = 0; w < nw; w++) { target_res += res[s][w]; if(res[s][w] == -1.0) res[s][w] = 0.0; } for(w = 0; w < nw; w++) { enum starpu_worker_archtype arch = sc_hypervisor_get_arch_for_index(w, tw); if(arch == STARPU_CPU_WORKER) { int nworkers_to_add = ceil(res[s][w]); double ceil_double = (double)nworkers_to_add; double diff = ceil_double - res[s][w]; if(target_res < 0.0) { nworkers_to_add=1; int old_start = start[w]; if(start[w] != 0) start[w]--; int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch); start[w] = old_start; int i; for(i = 0; i < nworkers_to_add; i++) { workers_add[nw_add++] = workers_to_add[i]; } free(workers_to_add); } else { int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &nworkers_to_add, arch); int i; for(i = 0; i < nworkers_to_add; i++) workers_add[nw_add++] = workers_to_add[i]; free(workers_to_add); } if(diff != 0.0) start[w]--; } else { double nworkers_to_add = res[s][w]; int x = floor(nworkers_to_add); double x_double = (double)x; double diff = nworkers_to_add - x_double; if(diff == 0.0) { int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch); int i; for(i = 0; i < x; i++) workers_add[nw_add++] = workers_to_add[i]; free(workers_to_add); } else { x+=1; int *workers_to_add = sc_hypervisor_get_idlest_workers_in_list(&start[w], workers, nworkers, &x, arch); int i; if(diff >= 0.3) for(i = 0; i < x; i++) workers_add[nw_add++] = workers_to_add[i]; else for(i = 0; i < x-1; i++) workers_add[nw_add++] = workers_to_add[i]; free(workers_to_add); } } } // sc_hypervisor_start_resize(sched_ctxs[s]); sc_hypervisor_add_workers_to_sched_ctx(workers_add, nw_add, sched_ctxs[s]); int workers_remove[STARPU_NMAXWORKERS]; int nw_remove = _lp_get_unwanted_workers(workers_add, nw_add, sched_ctxs[s], workers_remove); sc_hypervisor_remove_workers_from_sched_ctx(workers_remove, nw_remove, sched_ctxs[s], !(_sc_hypervisor_use_lazy_resize())); } } /* nw = all the workers (either in a list or on all machine) */ void sc_hypervisor_lp_place_resources_in_ctx(int ns, int nw, double w_in_s[ns][nw], unsigned *sched_ctxs_input, int *workers_input, unsigned do_size, struct types_of_workers *tw) { int w, s; int ntypes_of_workers = tw->nw; double nworkers[ns][ntypes_of_workers]; int nworkers_rounded[ns][ntypes_of_workers]; for(s = 0; s < ns; s++) { for(w = 0; w < ntypes_of_workers; w++) { nworkers[s][w] = 0.0; nworkers_rounded[s][w] = 0; } } for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) { enum starpu_worker_archtype arch = starpu_worker_get_type(w); int idx = sc_hypervisor_get_index_for_arch(arch, tw); nworkers[s][idx] += w_in_s[s][w]; if(arch == STARPU_CUDA_WORKER) { if(w_in_s[s][w] >= 0.3) nworkers_rounded[s][idx]++; } else { if(w_in_s[s][w] > 0.5) nworkers_rounded[s][idx]++; } } } if(!do_size) sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, sched_ctxs_input, tw); else { unsigned *current_sched_ctxs = sched_ctxs_input == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_input; unsigned has_workers = 0; for(s = 0; s < ns; s++) { int nworkers_ctx = sc_hypervisor_get_nworkers_ctx(current_sched_ctxs[s], STARPU_ANY_WORKER); if(nworkers_ctx != 0) { has_workers = 1; break; } } if(has_workers) sc_hypervisor_lp_redistribute_resources_in_ctxs(ns, ntypes_of_workers, nworkers_rounded, nworkers, current_sched_ctxs, tw); else sc_hypervisor_lp_distribute_resources_in_ctxs(current_sched_ctxs, ns, ntypes_of_workers, nworkers_rounded, nworkers, workers_input, nw, tw); } return; } void sc_hypervisor_lp_share_remaining_resources(int ns, unsigned *sched_ctxs, int nworkers, int *workers) { int s, w, worker, nw = 0; int remaining_workers[nworkers]; for(w = 0; w < nworkers; w++) { worker = workers == NULL ? w : workers[w]; unsigned found = 0; for(s = 0; s < ns; s++) { if(starpu_sched_ctx_contains_worker(worker, sched_ctxs[s])) { found = 1; break; } } if(!found) { remaining_workers[nw++] = worker; } } if(nw > 0) { for(s = 0; s < ns; s++) { for(w = 0; w < nw; w++) _sc_hypervisor_allow_compute_idle(sched_ctxs[s], remaining_workers[w], 0); sc_hypervisor_add_workers_to_sched_ctx(remaining_workers, nw, sched_ctxs[s]); } } } double sc_hypervisor_lp_find_tmax(double t1, double t2) { return t1 + ((t2 - t1)/2); } starpu-1.4.9+dfsg/sc_hypervisor/src/policies_utils/policy_tools.c000066400000000000000000000445171507764646700254230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_policy.h" #include "sc_hypervisor_intern.h" #include "sc_hypervisor_lp.h" static int _compute_priority(unsigned sched_ctx) { struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx); int total_priority = 0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); int worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); total_priority += config->priority[worker]; } return total_priority; } /* find the context with the lowest priority */ unsigned sc_hypervisor_find_lowest_prio_sched_ctx(unsigned req_sched_ctx, int nworkers_to_move) { int i; int highest_priority = -1; int current_priority = 0; unsigned sched_ctx = STARPU_NMAX_SCHED_CTXS; unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); struct sc_hypervisor_policy_config *config = NULL; for(i = 0; i < nsched_ctxs; i++) { if(sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS && sched_ctxs[i] != req_sched_ctx) { int nworkers = (int)starpu_sched_ctx_get_nworkers(sched_ctxs[i]); config = sc_hypervisor_get_config(sched_ctxs[i]); if((nworkers + nworkers_to_move) <= config->max_nworkers) { current_priority = _compute_priority(sched_ctxs[i]); if (highest_priority < current_priority) { highest_priority = current_priority; sched_ctx = sched_ctxs[i]; } } } } return sched_ctx; } int* sc_hypervisor_get_idlest_workers_in_list(int *start, int *workers, int nall_workers, int *nworkers, enum starpu_worker_archtype arch) { int *curr_workers = (int*)malloc((*nworkers)*sizeof(int)); int w, worker; int nfound_workers = 0; for(w = 0; w < nall_workers; w++) { if(nfound_workers >= *nworkers) break; worker = workers == NULL ? w : workers[w]; enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); if(arch == STARPU_ANY_WORKER || curr_arch == arch) { if(w >= *start) { curr_workers[nfound_workers++] = worker; *start = w+1; } } } if(nfound_workers < *nworkers) *nworkers = nfound_workers; return curr_workers; } /* get first nworkers with the highest idle time in the context */ int* sc_hypervisor_get_idlest_workers(unsigned sched_ctx, int *nworkers, enum starpu_worker_archtype arch) { struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx); int *curr_workers = (int*)malloc((*nworkers) * sizeof(int)); int i; for(i = 0; i < *nworkers; i++) curr_workers[i] = -1; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); int index; int worker; int considered = 0; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); for(index = 0; index < *nworkers; index++) { while(workers->has_next(workers, &it)) { considered = 0; worker = workers->get_next(workers, &it); enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); if(arch == STARPU_ANY_WORKER || curr_arch == arch) { if(!config->fixed_workers[worker]) { for(i = 0; i < index; i++) { if(curr_workers[i] == worker) { considered = 1; break; } } if(!considered) { /* the first iteration*/ if(curr_workers[index] < 0) curr_workers[index] = worker; /* small priority worker is the first to leave the ctx*/ else if(config->priority[worker] < config->priority[curr_workers[index]]) curr_workers[index] = worker; /* if we don't consider priorities check for the workers with the biggest idle time */ else if(config->priority[worker] == config->priority[curr_workers[index]]) { double worker_idle_time = sc_w->current_idle_time[worker]; double curr_worker_idle_time = sc_w->current_idle_time[curr_workers[index]]; if(worker_idle_time > curr_worker_idle_time) curr_workers[index] = worker; } } } } } if(curr_workers[index] < 0) { *nworkers = index; break; } } return curr_workers; } /* get the number of workers in the context that are allowed to be moved (that are not fixed) */ int sc_hypervisor_get_movable_nworkers(struct sc_hypervisor_policy_config *config, unsigned sched_ctx, enum starpu_worker_archtype arch) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); int potential_workers = 0; int worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); if(arch == STARPU_ANY_WORKER || curr_arch == arch) { if(!config->fixed_workers[worker]) potential_workers++; } } return potential_workers; } /* compute the number of workers that should be moved depending: * - on the min/max number of workers in a context imposed by the user, * - on the resource granularity imposed by the user for the resizing process*/ int sc_hypervisor_compute_nworkers_to_move(unsigned req_sched_ctx) { struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(req_sched_ctx); int nworkers = (int)starpu_sched_ctx_get_nworkers(req_sched_ctx); int nworkers_to_move = 0; int potential_moving_workers = (int)sc_hypervisor_get_movable_nworkers(config, req_sched_ctx, STARPU_ANY_WORKER); if(potential_moving_workers > 0) { if(potential_moving_workers <= config->min_nworkers) /* if we have to give more than min better give it all */ /* => empty ctx will block until having the required workers */ nworkers_to_move = potential_moving_workers; else if(potential_moving_workers > config->max_nworkers) { if((potential_moving_workers - config->granularity) > config->max_nworkers) // nworkers_to_move = config->granularity; nworkers_to_move = potential_moving_workers; else nworkers_to_move = potential_moving_workers - config->max_nworkers; } else if(potential_moving_workers > config->granularity) { if((nworkers - config->granularity) > config->min_nworkers) nworkers_to_move = config->granularity; else nworkers_to_move = potential_moving_workers - config->min_nworkers; } else { int nfixed_workers = nworkers - potential_moving_workers; if(nfixed_workers >= config->min_nworkers) nworkers_to_move = potential_moving_workers; else nworkers_to_move = potential_moving_workers - (config->min_nworkers - nfixed_workers); } if((nworkers - nworkers_to_move) > config->max_nworkers) nworkers_to_move = nworkers - config->max_nworkers; } return nworkers_to_move; } unsigned sc_hypervisor_policy_resize(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, unsigned force_resize, unsigned now) { int ret = 1; if(force_resize) STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); else ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { int nworkers_to_move = sc_hypervisor_compute_nworkers_to_move(sender_sched_ctx); if(nworkers_to_move > 0) { unsigned poor_sched_ctx = STARPU_NMAX_SCHED_CTXS; if(receiver_sched_ctx == STARPU_NMAX_SCHED_CTXS) { poor_sched_ctx = sc_hypervisor_find_lowest_prio_sched_ctx(sender_sched_ctx, (unsigned)nworkers_to_move); } else { poor_sched_ctx = receiver_sched_ctx; struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(poor_sched_ctx); int nworkers = (int)starpu_sched_ctx_get_nworkers(poor_sched_ctx); int nshared_workers = (int)starpu_sched_ctx_get_nshared_workers(sender_sched_ctx, poor_sched_ctx); if((nworkers+nworkers_to_move-nshared_workers) > config->max_nworkers) nworkers_to_move = nworkers > config->max_nworkers ? 0 : (config->max_nworkers - nworkers+nshared_workers); if(nworkers_to_move == 0) poor_sched_ctx = STARPU_NMAX_SCHED_CTXS; } if(poor_sched_ctx != STARPU_NMAX_SCHED_CTXS) { int *workers_to_move = sc_hypervisor_get_idlest_workers(sender_sched_ctx, &nworkers_to_move, STARPU_ANY_WORKER); sc_hypervisor_move_workers(sender_sched_ctx, poor_sched_ctx, workers_to_move, nworkers_to_move, now); struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(poor_sched_ctx); int i; for(i = 0; i < nworkers_to_move; i++) new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle; free(workers_to_move); } } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); return 1; } return 0; } unsigned sc_hypervisor_policy_resize_to_unknown_receiver(unsigned sender_sched_ctx, unsigned now) { return sc_hypervisor_policy_resize(sender_sched_ctx, STARPU_NMAX_SCHED_CTXS, 0, now); } double sc_hypervisor_get_slowest_ctx_exec_time(void) { unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); /* double curr_time = starpu_timing_now(); */ double slowest_time = 0.0; int s; struct sc_hypervisor_wrapper* sc_w; for(s = 0; s < nsched_ctxs; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); // double elapsed_time = (curr_time - sc_w->start_time)/1000000; struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_speed(sc_w); if(elapsed_time > slowest_time) slowest_time = elapsed_time; } return slowest_time; } double sc_hypervisor_get_fastest_ctx_exec_time(void) { unsigned *sched_ctxs = sc_hypervisor_get_sched_ctxs(); int nsched_ctxs = sc_hypervisor_get_nsched_ctxs(); double curr_time = starpu_timing_now(); double fastest_time = curr_time; int s; struct sc_hypervisor_wrapper* sc_w; for(s = 0; s < nsched_ctxs; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); double elapsed_time = (config->ispeed_ctx_sample/1000000000.0)/sc_hypervisor_get_ctx_speed(sc_w); if(elapsed_time < fastest_time) fastest_time = elapsed_time; } return fastest_time; } void sc_hypervisor_group_workers_by_type(struct types_of_workers *tw, int *total_nw) { unsigned w; for(w = 0; w < tw->nw; w++) total_nw[w] = 0; if(tw->ncpus != 0) { total_nw[0] = tw->ncpus; if(tw->ncuda != 0) total_nw[1] = tw->ncuda; } else { if(tw->ncuda != 0) total_nw[0] =tw->ncuda; } } enum starpu_worker_archtype sc_hypervisor_get_arch_for_index(unsigned w, struct types_of_workers *tw) { if(w == 0) { if(tw->ncpus != 0) return STARPU_CPU_WORKER; else return STARPU_CUDA_WORKER; } else if(tw->ncuda != 0) return STARPU_CUDA_WORKER; return STARPU_CPU_WORKER; } unsigned sc_hypervisor_get_index_for_arch(enum starpu_worker_archtype arch, struct types_of_workers *tw) { if(arch == STARPU_CPU_WORKER) { if(tw->ncpus != 0) return 0; } else { if(arch == STARPU_CUDA_WORKER) { if(tw->ncpus != 0) return 1; else return 0; } } return 0; } void sc_hypervisor_get_tasks_times(int nw, int nt, double times[nw][nt], int *workers, unsigned size_ctxs, struct sc_hypervisor_policy_task_pool *task_pools) { struct sc_hypervisor_policy_task_pool *tp; int w, t; for(w = 0; w < nw; w++) for(t = 0; t < nt; t++) times[w][t] = NAN; for (w = 0; w < nw; w++) { for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { int worker = workers == NULL ? w : workers[w]; struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(worker, STARPU_NMAX_SCHED_CTXS); double length = starpu_perfmodel_history_based_expected_perf(tp->cl->model, arch, tp->footprint); if (isnan(length)) times[w][t] = NAN; else { times[w][t] = (length / 1000.); double transfer_time = 0.0; unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, tp->sched_ctx_id); enum starpu_worker_archtype warch = starpu_worker_get_type(worker); if(!worker_in_ctx && !size_ctxs) { if(warch == STARPU_CUDA_WORKER) { double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); if(transfer_speed > 0.0) transfer_time += (tp->data_size / transfer_speed) / 1000. ; double latency = starpu_transfer_latency(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); transfer_time += latency/1000.; // transfer_time *=4; } else if (warch == STARPU_CPU_WORKER) { if(!starpu_sched_ctx_contains_type_of_worker(warch, tp->sched_ctx_id)) { double transfer_speed = starpu_transfer_bandwidth(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM); if(transfer_speed > 0.0) transfer_time += (tp->data_size / transfer_speed) / 1000. ; double latency = starpu_transfer_latency(starpu_worker_get_memory_node(worker), STARPU_MAIN_RAM); transfer_time += latency / 1000.; } } } // printf("%d/%d %s x %d time = %lf transfer_time = %lf\n", w, tp->sched_ctx_id, tp->cl->model->symbol, tp->n, times[w][t], transfer_time); times[w][t] += transfer_time; } // printf("sc%d w%d task %s nt %d times %lf s\n", tp->sched_ctx_id, w, tp->cl->model->symbol, tp->n, times[w][t]); } } } unsigned sc_hypervisor_check_idle(unsigned sched_ctx, int worker) { struct sc_hypervisor_wrapper* sc_w = sc_hypervisor_get_wrapper(sched_ctx); struct sc_hypervisor_policy_config *config = sc_w->config; if(config != NULL) { if(sc_w->idle_time[worker] > config->max_idle[worker]) { // printf("w%d/ctx%d: current idle %lf max_idle %lf\n", worker, sched_ctx, sc_w->idle_time[worker], config->max_idle[worker]); return 1; } } return 0; } /* check if there is a big speed gap between the contexts */ unsigned sc_hypervisor_check_speed_gap_btw_ctxs(unsigned *sched_ctxs_in, int ns_in, int *workers_in, int nworkers_in) { unsigned *sched_ctxs = sched_ctxs_in == NULL ? sc_hypervisor_get_sched_ctxs() : sched_ctxs_in; int ns = ns_in == -1 ? sc_hypervisor_get_nsched_ctxs() : ns_in; int *workers = workers_in; int nworkers = nworkers_in == -1 ? (int)starpu_worker_get_count() : nworkers_in; int i = 0, j = 0; struct sc_hypervisor_wrapper* sc_w; struct sc_hypervisor_wrapper* other_sc_w; double optimal_v[ns]; unsigned has_opt_v = 1; for(i = 0; i < ns; i++) { optimal_v[i] = _get_optimal_v(sched_ctxs[i]); if(optimal_v[i] == 0.0) { has_opt_v = 0; break; } } /*if an optimal speed has not been computed yet do it now */ if(!has_opt_v) { struct types_of_workers *tw = sc_hypervisor_get_types_of_workers(workers, nworkers); int nw = tw->nw; double nworkers_per_ctx[ns][nw]; int total_nw[nw]; sc_hypervisor_group_workers_by_type(tw, total_nw); // double vmax = sc_hypervisor_lp_get_nworkers_per_ctx(ns, nw, nworkers_per_ctx, total_nw, tw, sched_ctxs); // if(vmax != 0.0) { for(i = 0; i < ns; i++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); double v[nw]; optimal_v[i] = 0.0; int w; for(w = 0; w < nw; w++) { v[w] = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); optimal_v[i] += nworkers_per_ctx[i][w] == -1.0 ? 0.0 : nworkers_per_ctx[i][w]*v[w]; } _set_optimal_v(sched_ctxs[i], optimal_v[i]); } has_opt_v = 1; } free(tw); } /* if we have an optimal speed for each type of worker compare the monitored one with the * theoretical one */ if(has_opt_v) { for(i = 0; i < ns; i++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); double ctx_v = sc_hypervisor_get_ctx_speed(sc_w); if(ctx_v == -1.0) return 0; } for(i = 0; i < ns; i++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); double ctx_v = sc_hypervisor_get_ctx_speed(sc_w); ctx_v = ctx_v < 0.01 ? 0.0 : ctx_v; double max_vel = _get_max_speed_gap(); if(ctx_v != -1.0 && ((ctx_v < (1-max_vel)*optimal_v[i]) || ctx_v > (1+max_vel)*optimal_v[i])) { return 1; } } } else /* if we have not been able to compute a theoretical speed consider the env variable SC_MAX_SPEED_GAP and compare the speed of the contexts, whenever the difference btw them is greater than the max value the function returns true */ { for(i = 0; i < ns; i++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[i]); double ctx_v = sc_hypervisor_get_ctx_speed(sc_w); if(ctx_v != -1.0) { for(j = 0; j < ns; j++) { if(sched_ctxs[i] != sched_ctxs[j]) { unsigned snworkers = starpu_sched_ctx_get_nworkers(sched_ctxs[j]); if(snworkers == 0) return 1; other_sc_w = sc_hypervisor_get_wrapper(sched_ctxs[j]); double other_ctx_v = sc_hypervisor_get_ctx_speed(other_sc_w); if(other_ctx_v != -1.0) { double gap = ctx_v < other_ctx_v ? other_ctx_v / ctx_v : ctx_v / other_ctx_v; double max_vel = _get_max_speed_gap(); if(gap > max_vel) return 1; } } } } } } return 0; } unsigned sc_hypervisor_check_speed_gap_btw_ctxs_on_level(int level, int *workers_in, int nworkers_in, unsigned father_sched_ctx_id, unsigned **sched_ctxs, int *nsched_ctxs) { sc_hypervisor_get_ctxs_on_level(sched_ctxs, nsched_ctxs, level, father_sched_ctx_id); if(*nsched_ctxs > 0) return sc_hypervisor_check_speed_gap_btw_ctxs(*sched_ctxs, *nsched_ctxs, workers_in, nworkers_in); return 0; } unsigned sc_hypervisor_criteria_fulfilled(unsigned sched_ctx, int worker) { unsigned criteria = sc_hypervisor_get_resize_criteria(); if(criteria != SC_NOTHING) { if(criteria == SC_IDLE) return sc_hypervisor_check_idle(sched_ctx, worker); else return sc_hypervisor_check_speed_gap_btw_ctxs(NULL, -1, NULL, -1); } else return 0; } starpu-1.4.9+dfsg/sc_hypervisor/src/policies_utils/speed.c000066400000000000000000000262341507764646700240000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_policy.h" #include "sc_hypervisor_intern.h" #include double sc_hypervisor_get_ctx_speed(struct sc_hypervisor_wrapper* sc_w) { struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); double elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); double sample = config->ispeed_ctx_sample; double total_elapsed_flops = sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(sc_w); double total_flops = sc_w->total_flops; char *start_sample_prc_char = getenv("SC_HYPERVISOR_START_RESIZE"); double start_sample_prc = start_sample_prc_char ? atof(start_sample_prc_char) : 0.0; double start_sample = start_sample_prc > 0.0 ? (start_sample_prc / 100) * total_flops : sample; double redim_sample = elapsed_flops == total_elapsed_flops ? (start_sample > 0.0 ? start_sample : sample) : sample; double curr_time = starpu_timing_now(); double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */ unsigned can_compute_speed = 0; char *speed_sample_criteria = getenv("SC_HYPERVISOR_SAMPLE_CRITERIA"); if(speed_sample_criteria && (strcmp(speed_sample_criteria, "time") == 0)) can_compute_speed = elapsed_time > config->time_sample; else can_compute_speed = elapsed_flops >= redim_sample; if(can_compute_speed) { return (elapsed_flops/1000000000.0)/elapsed_time;/* in Gflops/s */ } return -1.0; } double sc_hypervisor_get_speed_per_worker(struct sc_hypervisor_wrapper *sc_w, unsigned worker) { if(!starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx)) return -1.0; double elapsed_flops = sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */ struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); double sample = config->ispeed_w_sample[worker] / 1000000000.0; /*in gflops */ double ctx_elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); double ctx_sample = config->ispeed_ctx_sample; if(ctx_elapsed_flops > ctx_sample && elapsed_flops == 0.0) return 0.00000000000001; if(elapsed_flops > sample) { double curr_time = starpu_timing_now(); double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */ elapsed_time -= sc_w->idle_time[worker]; /* size_t elapsed_data_used = sc_w->elapsed_data[worker]; */ /* enum starpu_worker_archtype arch = starpu_worker_get_type(worker); */ /* if(arch == STARPU_CUDA_WORKER) */ /* { */ /* /\* unsigned worker_in_ctx = starpu_sched_ctx_contains_worker(worker, sc_w->sched_ctx); *\/ */ /* /\* if(!worker_in_ctx) *\/ */ /* /\* { *\/ */ /* /\* double transfer_speed = starpu_transfer_bandwidth(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); *\/ */ /* /\* elapsed_time += (elapsed_data_used / transfer_speed) / 1000000 ; *\/ */ /* /\* } *\/ */ /* double latency = starpu_transfer_latency(STARPU_MAIN_RAM, starpu_worker_get_memory_node(worker)); */ /* // printf("%d/%d: latency %lf elapsed_time before %lf ntasks %d\n", worker, sc_w->sched_ctx, latency, elapsed_time, elapsed_tasks); */ /* elapsed_time += (elapsed_tasks * latency)/1000000; */ /* // printf("elapsed time after %lf \n", elapsed_time); */ /* } */ double vel = (elapsed_flops/elapsed_time);/* in Gflops/s */ return vel; } return -1.0; } /* compute an average value of the cpu/cuda speed */ double sc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch) { struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sc_w->sched_ctx); double ctx_elapsed_flops = sc_hypervisor_get_elapsed_flops_per_sched_ctx(sc_w); double ctx_sample = config->ispeed_ctx_sample; double curr_time = starpu_timing_now(); double elapsed_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */ unsigned can_compute_speed = 0; char *speed_sample_criteria = getenv("SC_HYPERVISOR_SAMPLE_CRITERIA"); if(speed_sample_criteria && (strcmp(speed_sample_criteria, "time") == 0)) can_compute_speed = elapsed_time > config->time_sample; else can_compute_speed = ctx_elapsed_flops > ctx_sample; if(can_compute_speed) { if(ctx_elapsed_flops == 0.0) return -1.0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx); int worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); double speed = 0.0; unsigned nworkers = 0; double all_workers_flops = 0.0; double max_workers_idle_time = 0.0; while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker); if(arch == req_arch && sc_w->compute_idle[worker]) { if(sc_w->exec_start_time[worker] != 0.0) { double current_exec_time = 0.0; if(sc_w->exec_start_time[worker] < sc_w->start_time) current_exec_time = (curr_time - sc_w->start_time) / 1000000.0; /* in seconds */ else current_exec_time = (curr_time - sc_w->exec_start_time[worker]) / 1000000.0; /* in seconds */ double suppl_flops = current_exec_time * sc_hypervisor_get_ref_speed_per_worker_type(sc_w, req_arch); all_workers_flops += suppl_flops; } all_workers_flops += sc_w->elapsed_flops[worker] / 1000000000.0; /*in gflops */ if(max_workers_idle_time < sc_w->idle_time[worker]) max_workers_idle_time = sc_w->idle_time[worker]; /* in seconds */ nworkers++; } } if(nworkers != 0 && all_workers_flops > 0.0001) { // elapsed_time -= max_workers_idle_time; speed = (all_workers_flops / elapsed_time) / nworkers; } else speed = -1.0; if(speed != -1.0) { /* if ref_speed started being corrupted bc of the old bad distribution register only the last frame otherwise make the average with the speed behavior of the application until now */ if(arch == STARPU_CUDA_WORKER) sc_w->ref_speed[0] = (sc_w->ref_speed[0] > 0.1) ? ((sc_w->ref_speed[0] + speed) / 2.0) : speed; else sc_w->ref_speed[1] = (sc_w->ref_speed[1] > 0.1) ? ((sc_w->ref_speed[1] + speed) / 2.0) : speed; } return speed; } return -1.0; } /* compute an average value of the cpu/cuda old speed */ double sc_hypervisor_get_ref_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch) { if(arch == STARPU_CUDA_WORKER && sc_w->ref_speed[0] > 0.0) return sc_w->ref_speed[0]; else if(arch == STARPU_CPU_WORKER && sc_w->ref_speed[1] > 0.0) return sc_w->ref_speed[1]; return -1.0; } /* returns the speed necessary for the linear programs (either the monitored one either a default value) */ double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch) { /* monitored speed in the last frame */ double speed = sc_hypervisor_get_speed_per_worker_type(sc_w, arch); if(speed == -1.0) { /* avg value of the monitored speed over the entier current execution */ speed = sc_hypervisor_get_ref_speed_per_worker_type(sc_w, arch); } if(speed == -1.0) { /* a default value */ speed = arch == STARPU_CPU_WORKER ? SC_HYPERVISOR_DEFAULT_CPU_SPEED : SC_HYPERVISOR_DEFAULT_CUDA_SPEED; } return speed; } double sc_hypervisor_get_avg_speed(enum starpu_worker_archtype arch) { double total_executed_flops = 0.0; double total_estimated_flops = 0.0; struct sc_hypervisor_wrapper *sc_w; double max_real_start_time = 0.0; int s; unsigned nworkers = starpu_worker_get_count_by_type(arch); unsigned *sched_ctxs; int nsched_ctxs; sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, 0, STARPU_NMAX_SCHED_CTXS); for(s = 0; s < nsched_ctxs; s++) { sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctxs[s]); int worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); enum starpu_worker_archtype req_arch = starpu_worker_get_type(worker); if(arch == req_arch) { total_executed_flops += sc_w->total_elapsed_flops[worker] / 1000000000.0; /*in gflops */; } } total_estimated_flops += sc_w->total_flops / 1000000000.0; /*in gflops */ if(max_real_start_time < sc_w->real_start_time) max_real_start_time = sc_w->real_start_time; } free(sched_ctxs); double speed = -1.0; #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("total_exec_flops %lf total_estimated_flops %lf max_real_start_time %lf nworkers %u \n", total_executed_flops, total_estimated_flops, max_real_start_time, nworkers); #endif if(total_executed_flops > 0.5*total_estimated_flops) { double curr_time = starpu_timing_now(); double time = (curr_time - max_real_start_time) / 1000000.0; /* in seconds */ #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("time = %lf\n", time); #endif speed = (total_executed_flops / time) / nworkers; } return speed; } void _consider_max_for_children(unsigned sched_ctx, unsigned consider_max) { struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(sched_ctx); sc_w->consider_max = consider_max; #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("ctx %u consider max %d \n", sched_ctx, sc_w->consider_max); #endif int level = starpu_sched_ctx_get_hierarchy_level(sched_ctx); unsigned *sched_ctxs_child; int nsched_ctxs_child = 0; sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, sched_ctx); int s; for(s = 0; s < nsched_ctxs_child; s++) _consider_max_for_children(sched_ctxs_child[s], consider_max); free(sched_ctxs_child); } void sc_hypervisor_check_if_consider_max(struct types_of_workers *tw) { unsigned *sched_ctxs; int nsched_ctxs; sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, 0, STARPU_NMAX_SCHED_CTXS); int nw = tw->nw; double avg_speed_per_tw[nw]; int w; for(w = 0; w < nw; w++) { avg_speed_per_tw[w] = sc_hypervisor_get_avg_speed(sc_hypervisor_get_arch_for_index(w, tw)); if(avg_speed_per_tw[w] == -1.0) { free(sched_ctxs); return; } } int s; for(s = 0; s < nsched_ctxs; s++) { for(w = 0; w < nw; w++) { struct sc_hypervisor_wrapper *sc_w = sc_hypervisor_get_wrapper(sched_ctxs[s]); double speed = sc_hypervisor_get_speed(sc_w, sc_hypervisor_get_arch_for_index(w, tw)); #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("%u: speed %lf avg_speed %lf min %lf max %lf\n", sched_ctxs[s], speed, avg_speed_per_tw[w], (avg_speed_per_tw[w]*0.5), (avg_speed_per_tw[w]*1.5)); #endif if(speed < avg_speed_per_tw[w]*0.5 || speed > avg_speed_per_tw[w]*1.5) _consider_max_for_children(sched_ctxs[s], 1); else _consider_max_for_children(sched_ctxs[s], 0); } } free(sched_ctxs); } starpu-1.4.9+dfsg/sc_hypervisor/src/policies_utils/task_pool.c000066400000000000000000000054051507764646700246700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sc_hypervisor_policy.h" void sc_hypervisor_policy_add_task_to_pool(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, struct sc_hypervisor_policy_task_pool **task_pools, size_t data_size) { struct sc_hypervisor_policy_task_pool *tp = NULL; for (tp = *task_pools; tp; tp = tp->next) { if (tp && tp->cl == cl && tp->footprint == footprint && tp->sched_ctx_id == sched_ctx) break; } if (!tp) { tp = (struct sc_hypervisor_policy_task_pool *) malloc(sizeof(struct sc_hypervisor_policy_task_pool)); tp->cl = cl; tp->footprint = footprint; tp->sched_ctx_id = sched_ctx; tp->n = 0; tp->next = *task_pools; tp->data_size = data_size; *task_pools = tp; } /* One more task of this kind */ tp->n++; } void sc_hypervisor_policy_remove_task_from_pool(struct starpu_task *task, uint32_t footprint, struct sc_hypervisor_policy_task_pool **task_pools) { /* count the tasks of the same type */ struct sc_hypervisor_policy_task_pool *tp = NULL; for (tp = *task_pools; tp; tp = tp->next) { if (tp && tp->cl == task->cl && tp->footprint == footprint && tp->sched_ctx_id == task->sched_ctx) break; } if (tp) { if(tp->n > 1) tp->n--; else { if(tp == *task_pools) { struct sc_hypervisor_policy_task_pool *next_tp = NULL; if((*task_pools)->next) next_tp = (*task_pools)->next; free(tp); tp = NULL; *task_pools = next_tp; } else { struct sc_hypervisor_policy_task_pool *prev_tp = NULL; for (prev_tp = *task_pools; prev_tp; prev_tp = prev_tp->next) { if (prev_tp->next == tp) prev_tp->next = tp->next; } free(tp); tp = NULL; } } } } struct sc_hypervisor_policy_task_pool* sc_hypervisor_policy_clone_task_pool(struct sc_hypervisor_policy_task_pool *tp) { if(tp == NULL) return NULL; struct sc_hypervisor_policy_task_pool *tmp_tp = (struct sc_hypervisor_policy_task_pool*)malloc(sizeof(struct sc_hypervisor_policy_task_pool)); memcpy(tmp_tp, tp, sizeof(struct sc_hypervisor_policy_task_pool)); tmp_tp->next = sc_hypervisor_policy_clone_task_pool(tp->next); return tmp_tp; } starpu-1.4.9+dfsg/sc_hypervisor/src/sc_config.c000066400000000000000000000155011507764646700215760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include static struct sc_hypervisor_policy_config* _create_config(void) { struct sc_hypervisor_policy_config *config = (struct sc_hypervisor_policy_config *)malloc(sizeof(struct sc_hypervisor_policy_config)); config->min_nworkers = -1; config->max_nworkers = -1; config->new_workers_max_idle = -1.0; config->ispeed_ctx_sample = 0.0; config->time_sample = 0.5; int i; for(i = 0; i < STARPU_NMAXWORKERS; i++) { config->granularity = -1; config->priority[i] = -1; config->fixed_workers[i] = -1; config->max_idle[i] = -1.0; config->min_working[i] = -1.0; config->ispeed_w_sample[i] = 0.0; } return config; } static void _update_config(struct sc_hypervisor_policy_config *old, struct sc_hypervisor_policy_config* new) { old->min_nworkers = new->min_nworkers != -1 ? new->min_nworkers : old->min_nworkers ; old->max_nworkers = new->max_nworkers != -1 ? new->max_nworkers : old->max_nworkers ; old->new_workers_max_idle = new->new_workers_max_idle != -1.0 ? new->new_workers_max_idle : old->new_workers_max_idle; old->granularity = new->granularity != -1 ? new->granularity : old->granularity; int i; for(i = 0; i < STARPU_NMAXWORKERS; i++) { old->priority[i] = new->priority[i] != -1 ? new->priority[i] : old->priority[i]; old->fixed_workers[i] = new->fixed_workers[i] != -1 ? new->fixed_workers[i] : old->fixed_workers[i]; old->max_idle[i] = new->max_idle[i] != -1.0 ? new->max_idle[i] : old->max_idle[i]; old->min_working[i] = new->min_working[i] != -1.0 ? new->min_working[i] : old->min_working[i]; } } void sc_hypervisor_set_config(unsigned sched_ctx, void *config) { if(hypervisor.sched_ctx_w[sched_ctx].config != NULL && config != NULL) { _update_config(hypervisor.sched_ctx_w[sched_ctx].config, config); } else { hypervisor.sched_ctx_w[sched_ctx].config = config; } return; } void _add_config(unsigned sched_ctx) { struct sc_hypervisor_policy_config *config = _create_config(); config->min_nworkers = 0; config->max_nworkers = starpu_worker_get_count(); config->new_workers_max_idle = MAX_IDLE_TIME; int i; for(i = 0; i < STARPU_NMAXWORKERS; i++) { config->granularity = 1; config->priority[i] = 0; config->fixed_workers[i] = 0; config->max_idle[i] = MAX_IDLE_TIME; config->min_working[i] = MIN_WORKING_TIME; } sc_hypervisor_set_config(sched_ctx, config); } void _remove_config(unsigned sched_ctx) { sc_hypervisor_set_config(sched_ctx, NULL); } struct sc_hypervisor_policy_config* sc_hypervisor_get_config(unsigned sched_ctx) { return hypervisor.sched_ctx_w[sched_ctx].config; } static struct sc_hypervisor_policy_config* _ctl(unsigned sched_ctx, va_list varg_list, unsigned later) { struct sc_hypervisor_policy_config *config = NULL; if(later) config = _create_config(); else config = sc_hypervisor_get_config(sched_ctx); assert(config != NULL); int arg_type; int i; int *workerids; int nworkers; while ((arg_type = va_arg(varg_list, int)) != SC_HYPERVISOR_NULL) { switch(arg_type) { case SC_HYPERVISOR_MAX_IDLE: workerids = va_arg(varg_list, int*); nworkers = va_arg(varg_list, int); double max_idle = va_arg(varg_list, double); for(i = 0; i < nworkers; i++) config->max_idle[workerids[i]] = max_idle; break; case SC_HYPERVISOR_MIN_WORKING: workerids = va_arg(varg_list, int*); nworkers = va_arg(varg_list, int); double min_working = va_arg(varg_list, double); for(i = 0; i < nworkers; i++) config->min_working[workerids[i]] = min_working; break; case SC_HYPERVISOR_PRIORITY: workerids = va_arg(varg_list, int*); nworkers = va_arg(varg_list, int); int priority = va_arg(varg_list, int); for(i = 0; i < nworkers; i++) config->priority[workerids[i]] = priority; break; case SC_HYPERVISOR_MIN_WORKERS: config->min_nworkers = va_arg(varg_list, unsigned); break; case SC_HYPERVISOR_MAX_WORKERS: config->max_nworkers = va_arg(varg_list, unsigned); break; case SC_HYPERVISOR_GRANULARITY: config->granularity = va_arg(varg_list, unsigned); break; case SC_HYPERVISOR_FIXED_WORKERS: workerids = va_arg(varg_list, int*); nworkers = va_arg(varg_list, int); for(i = 0; i < nworkers; i++) config->fixed_workers[workerids[i]] = 1; break; case SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE: config->new_workers_max_idle = va_arg(varg_list, double); break; case SC_HYPERVISOR_ISPEED_W_SAMPLE: workerids = va_arg(varg_list, int*); nworkers = va_arg(varg_list, int); double sample = va_arg(varg_list, double); for(i = 0; i < nworkers; i++) config->ispeed_w_sample[workerids[i]] = sample; break; case SC_HYPERVISOR_ISPEED_CTX_SAMPLE: config->ispeed_ctx_sample = va_arg(varg_list, double); break; case SC_HYPERVISOR_TIME_SAMPLE: config->time_sample = va_arg(varg_list, double); break; /* not important for the strateg, needed just to jump these args in the iteration of the args */ case SC_HYPERVISOR_TIME_TO_APPLY: va_arg(varg_list, int); break; case SC_HYPERVISOR_MIN_TASKS: va_arg(varg_list, int); break; } } return later ? config : NULL; } void sc_hypervisor_ctl(unsigned sched_ctx, ...) { va_list varg_list; va_start(varg_list, sched_ctx); int arg_type; int stop = 0; int task_tag = -1; while ((arg_type = va_arg(varg_list, int)) != SC_HYPERVISOR_NULL) { switch(arg_type) { case SC_HYPERVISOR_TIME_TO_APPLY: task_tag = va_arg(varg_list, int); stop = 1; break; case SC_HYPERVISOR_MIN_TASKS: hypervisor.min_tasks = va_arg(varg_list, int); hypervisor.check_min_tasks[sched_ctx] = 1; break; } if(stop) break; } va_end(varg_list); va_start(varg_list, sched_ctx); /* if config not null => save hypervisor configuration and consider it later */ struct sc_hypervisor_policy_config *config = _ctl(sched_ctx, varg_list, (task_tag > 0)); if(config != NULL) { struct configuration_entry *entry; entry = malloc(sizeof *entry); STARPU_ASSERT(entry != NULL); entry->task_tag = task_tag; entry->configuration = config; STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.conf_mut[sched_ctx]); HASH_ADD_INT(hypervisor.configurations[sched_ctx], task_tag, entry); STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.conf_mut[sched_ctx]); } va_end(varg_list); } starpu-1.4.9+dfsg/sc_hypervisor/src/sc_hypervisor.c000066400000000000000000001607511507764646700225530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include struct sc_hypervisor hypervisor; starpu_pthread_mutex_t act_hypervisor_mutex; double hyp_overhead = 0.0; unsigned imposed_resize = 0; unsigned type_of_tasks_known = 0; struct starpu_sched_ctx_performance_counters* perf_counters = NULL; static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time); static void notify_pushed_task(unsigned sched_ctx, int worker); static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, int hypervisor_tag, double flops); static void notify_poped_task(unsigned sched_ctx, int worker); static void notify_submitted_job(struct starpu_task *task, unsigned footprint, size_t data_size); static void notify_empty_ctx(unsigned sched_ctx, struct starpu_task *task); static void notify_delete_context(unsigned sched_ctx); extern struct sc_hypervisor_policy idle_policy; extern struct sc_hypervisor_policy app_driven_policy; extern struct sc_hypervisor_policy gflops_rate_policy; #ifdef STARPU_HAVE_GLPK_H extern struct sc_hypervisor_policy feft_lp_policy; extern struct sc_hypervisor_policy teft_lp_policy; extern struct sc_hypervisor_policy ispeed_lp_policy; extern struct sc_hypervisor_policy throughput_lp_policy; #endif // STARPU_HAVE_GLPK_ extern struct sc_hypervisor_policy ispeed_policy; extern struct sc_hypervisor_policy hard_coded_policy; extern struct sc_hypervisor_policy perf_count_policy; static struct sc_hypervisor_policy *predefined_policies[] = { &idle_policy, &app_driven_policy, #ifdef STARPU_HAVE_GLPK_H &feft_lp_policy, &teft_lp_policy, &ispeed_lp_policy, &throughput_lp_policy, #endif // STARPU_HAVE_GLPK_H &gflops_rate_policy, &ispeed_policy, &hard_coded_policy, &perf_count_policy }; static void _load_hypervisor_policy(struct sc_hypervisor_policy *policy) { STARPU_ASSERT(policy); hypervisor.policy.name = policy->name; hypervisor.policy.size_ctxs = policy->size_ctxs; hypervisor.policy.resize_ctxs = policy->resize_ctxs; hypervisor.policy.handle_poped_task = policy->handle_poped_task; hypervisor.policy.handle_pushed_task = policy->handle_pushed_task; hypervisor.policy.handle_idle_cycle = policy->handle_idle_cycle; hypervisor.policy.handle_idle_end = policy->handle_idle_end; hypervisor.policy.handle_post_exec_hook = policy->handle_post_exec_hook; hypervisor.policy.handle_submitted_job = policy->handle_submitted_job; hypervisor.policy.end_ctx = policy->end_ctx; hypervisor.policy.start_ctx = policy->start_ctx; hypervisor.policy.init_worker = policy->init_worker; } static struct sc_hypervisor_policy *_find_hypervisor_policy_from_name(const char *policy_name) { if (!policy_name) return NULL; unsigned i; for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++) { struct sc_hypervisor_policy *p; p = predefined_policies[i]; if (p->name) { if (strcmp(policy_name, p->name) == 0) { /* we found a policy with the requested name */ return p; } } } fprintf(stderr, "Warning: hypervisor policy \"%s\" was not found, try \"help\" to get a list\n", policy_name); /* nothing was found */ return NULL; } static void display_sched_help_message(void) { const char* policy_name = getenv("SC_HYPERVISOR_POLICY"); if (policy_name && (strcmp(policy_name, "help") == 0)) { fprintf(stderr, "SC_HYPERVISOR_POLICY can be either of\n"); /* display the description of all predefined policies */ unsigned i; for (i = 0; i < sizeof(predefined_policies)/sizeof(predefined_policies[0]); i++) { struct sc_hypervisor_policy *p = predefined_policies[i]; if (p->name) { fprintf(stderr, "%s\n", p->name); } } } } static struct sc_hypervisor_policy *_select_hypervisor_policy(struct sc_hypervisor_policy* hypervisor_policy) { struct sc_hypervisor_policy *selected_policy = NULL; if(hypervisor_policy && hypervisor_policy->custom) return hypervisor_policy; /* we look if the application specified the name of a policy to load */ const char *policy_name; if (hypervisor_policy && hypervisor_policy->name) { policy_name = hypervisor_policy->name; } else { policy_name = getenv("SC_HYPERVISOR_POLICY"); } if (policy_name) selected_policy = _find_hypervisor_policy_from_name(policy_name); /* Perhaps there was no policy that matched the name */ if (selected_policy) return selected_policy; /* If no policy was specified, we use the idle policy as a default */ return &idle_policy; } /* initializez the performance counters that starpu will use to retrieve hints for resizing */ void* sc_hypervisor_init(struct sc_hypervisor_policy *hypervisor_policy) { /* Perhaps we have to display some help */ display_sched_help_message(); hypervisor.min_tasks = 0; hypervisor.nsched_ctxs = 0; char* vel_gap = getenv("SC_HYPERVISOR_MAX_SPEED_GAP"); hypervisor.max_speed_gap = vel_gap ? atof(vel_gap) : SC_SPEED_MAX_GAP_DEFAULT; char* crit = getenv("SC_HYPERVISOR_TRIGGER_RESIZE"); hypervisor.resize_criteria = !crit ? SC_IDLE : strcmp(crit,"idle") == 0 ? SC_IDLE : (strcmp(crit,"speed") == 0 ? SC_SPEED : SC_NOTHING); STARPU_PTHREAD_MUTEX_INIT(&act_hypervisor_mutex, NULL); // hypervisor.start_executing_time = starpu_timing_now(); int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { hypervisor.resize[i] = 0; hypervisor.allow_remove[i] = 1; hypervisor.configurations[i] = NULL; hypervisor.sr = NULL; hypervisor.check_min_tasks[i] = 1; hypervisor.sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS; hypervisor.sched_ctx_w[i].sched_ctx = STARPU_NMAX_SCHED_CTXS; hypervisor.sched_ctx_w[i].config = NULL; hypervisor.sched_ctx_w[i].total_flops = 0.0; hypervisor.sched_ctx_w[i].submitted_flops = 0.0; hypervisor.sched_ctx_w[i].remaining_flops = 0.0; hypervisor.sched_ctx_w[i].start_time = 0.0; hypervisor.sched_ctx_w[i].real_start_time = 0.0; hypervisor.sched_ctx_w[i].hyp_react_start_time = 0.0; hypervisor.sched_ctx_w[i].resize_ack.receiver_sched_ctx = -1; hypervisor.sched_ctx_w[i].resize_ack.moved_workers = NULL; hypervisor.sched_ctx_w[i].resize_ack.nmoved_workers = 0; hypervisor.sched_ctx_w[i].resize_ack.acked_workers = NULL; STARPU_PTHREAD_MUTEX_INIT(&hypervisor.sched_ctx_w[i].mutex, NULL); hypervisor.optimal_v[i] = 0.0; hypervisor.sched_ctx_w[i].ref_speed[0] = -1.0; hypervisor.sched_ctx_w[i].ref_speed[1] = -1.0; hypervisor.sched_ctx_w[i].total_flops_available = 0; hypervisor.sched_ctx_w[i].to_be_sized = 0; hypervisor.sched_ctx_w[i].consider_max = 0; int j; for(j = 0; j < STARPU_NMAXWORKERS; j++) { hypervisor.sched_ctx_w[i].start_time_w[i] = 0.0; hypervisor.sched_ctx_w[i].current_idle_time[j] = 0.0; hypervisor.sched_ctx_w[i].idle_time[j] = 0.0; hypervisor.sched_ctx_w[i].idle_start_time[j] = 0.0; hypervisor.sched_ctx_w[i].exec_time[j] = 0.0; hypervisor.sched_ctx_w[i].exec_start_time[j] = 0.0; hypervisor.sched_ctx_w[i].pushed_tasks[j] = 0; hypervisor.sched_ctx_w[i].poped_tasks[j] = 0; hypervisor.sched_ctx_w[i].elapsed_flops[j] = 0.0; hypervisor.sched_ctx_w[i].elapsed_data[j] = 0; hypervisor.sched_ctx_w[i].elapsed_tasks[j] = 0; hypervisor.sched_ctx_w[i].total_elapsed_flops[j] = 0.0; hypervisor.sched_ctx_w[i].worker_to_be_removed[j] = 0; hypervisor.sched_ctx_w[i].compute_idle[j] = 1; hypervisor.sched_ctx_w[i].compute_partial_idle[j] = 0; } } struct sc_hypervisor_policy *selected_hypervisor_policy = _select_hypervisor_policy(hypervisor_policy); _load_hypervisor_policy(selected_hypervisor_policy); perf_counters = (struct starpu_sched_ctx_performance_counters*)malloc(sizeof(struct starpu_sched_ctx_performance_counters)); perf_counters->notify_idle_cycle = notify_idle_cycle; perf_counters->notify_pushed_task = notify_pushed_task; perf_counters->notify_poped_task = notify_poped_task; perf_counters->notify_post_exec_task = notify_post_exec_task; perf_counters->notify_submitted_job = notify_submitted_job; perf_counters->notify_empty_ctx = notify_empty_ctx; perf_counters->notify_delete_context = notify_delete_context; starpu_sched_ctx_notify_hypervisor_exists(); return (void*)perf_counters; } const char* sc_hypervisor_get_policy() { return hypervisor.policy.name; } /* the user can forbid the resizing process*/ void sc_hypervisor_stop_resize(unsigned sched_ctx) { imposed_resize = 1; hypervisor.resize[sched_ctx] = 0; } /* the user can restart the resizing process*/ void sc_hypervisor_start_resize(unsigned sched_ctx) { imposed_resize = 1; hypervisor.resize[sched_ctx] = 1; } static void _print_current_time() { char* stop_print = getenv("SC_HYPERVISOR_STOP_PRINT"); int sp = stop_print ? atoi(stop_print) : 1; if(!sp) { if(hypervisor.start_executing_time == 0.0) { fprintf(stdout, "Time: %lf\n", -1.0); return; } double curr_time = starpu_timing_now(); double elapsed_time = (curr_time - hypervisor.start_executing_time) / 1000000.0; /* in seconds */ fprintf(stdout, "Time: %lf\n", elapsed_time); int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS) { struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]]; double cpu_speed = sc_hypervisor_get_speed(sc_w, STARPU_CPU_WORKER); double cuda_speed = sc_hypervisor_get_speed(sc_w, STARPU_CUDA_WORKER); int ncpus = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CPU_WORKER); int ncuda = sc_hypervisor_get_nworkers_ctx(sc_w->sched_ctx, STARPU_CUDA_WORKER); fprintf(stdout, "%d: cpu_v = %lf cuda_v = %lf ncpus = %d ncuda = %d\n", hypervisor.sched_ctxs[i], cpu_speed, cuda_speed, ncpus, ncuda); } } } return; } void sc_hypervisor_shutdown(void) { int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS && hypervisor.nsched_ctxs > 0) { sc_hypervisor_stop_resize(hypervisor.sched_ctxs[i]); sc_hypervisor_unregister_ctx(hypervisor.sched_ctxs[i]); STARPU_PTHREAD_MUTEX_DESTROY(&hypervisor.sched_ctx_w[i].mutex); } } perf_counters->notify_idle_cycle = NULL; perf_counters->notify_pushed_task = NULL; perf_counters->notify_poped_task = NULL; perf_counters->notify_post_exec_task = NULL; perf_counters->notify_delete_context = NULL; free(perf_counters); perf_counters = NULL; STARPU_PTHREAD_MUTEX_DESTROY(&act_hypervisor_mutex); } void sc_hypervisor_print_overhead() { // hyp_overhead /= 1000000.0;* FILE *f; const char *sched_env = getenv("OVERHEAD_FILE"); if(!sched_env) f = fopen("overhead_microsec", "a"); else f = fopen(sched_env, "a"); fprintf(f, "%lf \n", hyp_overhead); fclose(f); } /* the hypervisor is in charge only of the contexts registered to it*/ void sc_hypervisor_register_ctx(unsigned sched_ctx, double total_flops) { if(hypervisor.policy.start_ctx) hypervisor.policy.start_ctx(sched_ctx); STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); hypervisor.configurations[sched_ctx] = NULL; hypervisor.resize_requests[sched_ctx] = NULL; STARPU_PTHREAD_MUTEX_INIT(&hypervisor.conf_mut[sched_ctx], NULL); STARPU_PTHREAD_MUTEX_INIT(&hypervisor.resize_mut[sched_ctx], NULL); _add_config(sched_ctx); hypervisor.sched_ctx_w[sched_ctx].sched_ctx = sched_ctx; hypervisor.sched_ctxs[hypervisor.nsched_ctxs++] = sched_ctx; hypervisor.sched_ctx_w[sched_ctx].total_flops = total_flops; hypervisor.sched_ctx_w[sched_ctx].remaining_flops = total_flops; hypervisor.resize[sched_ctx] = 0;//1; STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } static int _get_first_free_sched_ctx(unsigned *sched_ctxs, int nsched_ctxs) { int i; for(i = 0; i < nsched_ctxs; i++) if(sched_ctxs[i] == STARPU_NMAX_SCHED_CTXS) return i; return STARPU_NMAX_SCHED_CTXS; } /* rearange array of sched_ctxs in order not to have {MAXVAL, MAXVAL, 5, MAXVAL, 7} * and have instead {5, 7, MAXVAL, MAXVAL, MAXVAL} * it is easier afterwards to iterate the array */ static void _rearange_sched_ctxs(unsigned *sched_ctxs, int old_nsched_ctxs) { int first_free_id = STARPU_NMAX_SCHED_CTXS; int i; for(i = 0; i < old_nsched_ctxs; i++) { if(sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS) { first_free_id = _get_first_free_sched_ctx(sched_ctxs, old_nsched_ctxs); if(first_free_id != STARPU_NMAX_SCHED_CTXS) { sched_ctxs[first_free_id] = sched_ctxs[i]; sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS; } } } } /* unregistered contexts will no longer be resized */ void sc_hypervisor_unregister_ctx(unsigned sched_ctx) { #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("unregister ctx %d with remaining flops %lf \n", hypervisor.sched_ctx_w[sched_ctx].sched_ctx, hypervisor.sched_ctx_w[sched_ctx].remaining_flops); #endif if(hypervisor.policy.end_ctx) hypervisor.policy.end_ctx(sched_ctx); STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx); int *pus; unsigned npus = starpu_sched_ctx_get_workers_list(sched_ctx, &pus); if(npus) { starpu_sched_ctx_set_priority(pus, npus, father, 1); free(pus); } unsigned i; for(i = 0; i < hypervisor.nsched_ctxs; i++) { if(hypervisor.sched_ctxs[i] == sched_ctx) { hypervisor.sched_ctxs[i] = STARPU_NMAX_SCHED_CTXS; break; } } _rearange_sched_ctxs(hypervisor.sched_ctxs, hypervisor.nsched_ctxs); hypervisor.nsched_ctxs--; hypervisor.sched_ctx_w[sched_ctx].sched_ctx = STARPU_NMAX_SCHED_CTXS; _remove_config(sched_ctx); STARPU_PTHREAD_MUTEX_DESTROY(&hypervisor.conf_mut[sched_ctx]); STARPU_PTHREAD_MUTEX_DESTROY(&hypervisor.resize_mut[sched_ctx]); if(hypervisor.nsched_ctxs == 1) sc_hypervisor_stop_resize(hypervisor.sched_ctxs[0]); STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } void sc_hypervisor_reset_react_start_time(unsigned sched_ctx, unsigned now) { if(now) hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time = starpu_timing_now(); starpu_sched_ctx_update_start_resizing_sample(sched_ctx, starpu_timing_now()); } double _get_max_speed_gap() { return hypervisor.max_speed_gap; } unsigned sc_hypervisor_get_resize_criteria() { return hypervisor.resize_criteria; } static int get_ntasks(int *tasks) { int ntasks = 0; int j; for(j = 0; j < STARPU_NMAXWORKERS; j++) { ntasks += tasks[j]; } return ntasks; } int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archtype arch) { int nworkers_ctx = 0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); int worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); if(curr_arch == arch || arch == STARPU_ANY_WORKER) nworkers_ctx++; } return nworkers_ctx; } static void _set_elapsed_flops_per_sched_ctx(unsigned sched_ctx, double val) { int i; for(i = 0; i < STARPU_NMAXWORKERS; i++) { hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[i] = val; if(val == 0) { hypervisor.sched_ctx_w[sched_ctx].elapsed_data[i] = 0; hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[i] = 0; } } } double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w) { double ret_val = 0.0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx); int worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); ret_val += sc_w->elapsed_flops[worker]; } return ret_val; } double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w) { double ret_val = 0.0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sc_w->sched_ctx); int worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); ret_val += sc_w->total_elapsed_flops[worker]; } return ret_val; } double sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(unsigned sched_ctx) { double ready_flops = starpu_sched_ctx_get_nready_flops(sched_ctx); unsigned *sched_ctxs; int nsched_ctxs = 0; sc_hypervisor_get_ctxs_on_level(&sched_ctxs, &nsched_ctxs, starpu_sched_ctx_get_hierarchy_level(sched_ctx), sched_ctx); int s; for(s = 0; s < nsched_ctxs; s++) ready_flops += sc_hypervisor_get_nready_flops_of_all_sons_of_sched_ctx(sched_ctxs[s]); //ready_flops += starpu_get_nready_flops_of_sched_ctx(sched_ctxs[s]); free(sched_ctxs); return ready_flops; } static void _decrement_elapsed_flops_per_worker(unsigned sched_ctx, int worker, double flops) { if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) { unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx); hypervisor.sched_ctx_w[father].elapsed_flops[worker] -= flops; _decrement_elapsed_flops_per_worker(father, worker, flops); } return; } void _reset_resize_sample_info(unsigned sender_sched_ctx, unsigned receiver_sched_ctx) { double start_time = starpu_timing_now(); if(sender_sched_ctx != STARPU_NMAX_SCHED_CTXS) { /* info concerning only the gflops_rate strateg */ struct sc_hypervisor_wrapper *sender_sc_w = &hypervisor.sched_ctx_w[sender_sched_ctx]; sender_sc_w->start_time = start_time; unsigned nworkers = starpu_worker_get_count(); unsigned i; for(i = 0; i < nworkers; i++) { sender_sc_w->start_time_w[i] = start_time; sender_sc_w->idle_time[i] = 0.0; sender_sc_w->idle_start_time[i] = 0.0; hypervisor.sched_ctx_w[sender_sched_ctx].exec_time[i] = 0.0; // hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] = (hypervisor.sched_ctx_w[sender_sched_ctx].exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0; _decrement_elapsed_flops_per_worker(sender_sched_ctx, i, hypervisor.sched_ctx_w[sender_sched_ctx].elapsed_flops[i]); } _set_elapsed_flops_per_sched_ctx(sender_sched_ctx, 0.0); } if(receiver_sched_ctx != STARPU_NMAX_SCHED_CTXS) { struct sc_hypervisor_wrapper *receiver_sc_w = &hypervisor.sched_ctx_w[receiver_sched_ctx]; receiver_sc_w->start_time = start_time; unsigned nworkers = starpu_worker_get_count(); unsigned i; for(i = 0; i < nworkers; i++) { receiver_sc_w->start_time_w[i] = (receiver_sc_w->start_time_w[i] != 0.0) ? starpu_timing_now() : 0.0; receiver_sc_w->idle_time[i] = 0.0; receiver_sc_w->idle_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? 0.0 : starpu_timing_now(); // hypervisor.sched_ctx_w[receiver_sched_ctx].exec_start_time[i] = (receiver_sc_w->exec_start_time[i] != 0.0) ? starpu_timing_now() : 0.0; hypervisor.sched_ctx_w[receiver_sched_ctx].exec_time[i] = 0.0; _decrement_elapsed_flops_per_worker(receiver_sched_ctx, i, hypervisor.sched_ctx_w[receiver_sched_ctx].elapsed_flops[i]); } _set_elapsed_flops_per_sched_ctx(receiver_sched_ctx, 0.0); } return; } /* actually move the workers: the cpus are moved, gpus are only shared */ /* forbids another resize request before this one is take into account */ void sc_hypervisor_move_workers(unsigned sender_sched_ctx, unsigned receiver_sched_ctx, int* workers_to_move, unsigned nworkers_to_move, unsigned now) { if(nworkers_to_move > 0 && hypervisor.resize[sender_sched_ctx]) { _print_current_time(); #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("resize ctx %u with %u workers", sender_sched_ctx, nworkers_to_move); unsigned j; for(j = 0; j < nworkers_to_move; j++) printf(" %d", workers_to_move[j]); printf("\n"); #endif hypervisor.allow_remove[receiver_sched_ctx] = 0; starpu_sched_ctx_add_workers(workers_to_move, nworkers_to_move, receiver_sched_ctx); if(now) { #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("remove now from ctx %u:", sender_sched_ctx); for(j = 0; j < nworkers_to_move; j++) printf(" %d", workers_to_move[j]); printf("\n"); #endif starpu_sched_ctx_remove_workers(workers_to_move, nworkers_to_move, sender_sched_ctx); hypervisor.allow_remove[receiver_sched_ctx] = 1; _reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx); } else { int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); if(ret != EBUSY) { hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.receiver_sched_ctx = receiver_sched_ctx; hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_move * sizeof(int)); hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.nmoved_workers = nworkers_to_move; hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_move * sizeof(int)); unsigned i; for(i = 0; i < nworkers_to_move; i++) { hypervisor.sched_ctx_w[sender_sched_ctx].current_idle_time[workers_to_move[i]] = 0.0; hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.moved_workers[i] = workers_to_move[i]; hypervisor.sched_ctx_w[sender_sched_ctx].resize_ack.acked_workers[i] = 0; } hypervisor.resize[sender_sched_ctx] = 0; if(imposed_resize) imposed_resize = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); } } struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(receiver_sched_ctx); unsigned i; for(i = 0; i < nworkers_to_move; i++) new_config->max_idle[workers_to_move[i]] = new_config->max_idle[workers_to_move[i]] !=MAX_IDLE_TIME ? new_config->max_idle[workers_to_move[i]] : new_config->new_workers_max_idle; } return; } void sc_hypervisor_add_workers_to_sched_ctx(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx) { if(nworkers_to_add > 0 && hypervisor.resize[sched_ctx]) { _print_current_time(); #ifdef STARPU_SC_HYPERVISOR_DEBUG unsigned j; printf("add to ctx %u:", sched_ctx); for(j = 0; j < nworkers_to_add; j++) printf(" %d", workers_to_add[j]); printf("\n"); #endif starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, sched_ctx); struct sc_hypervisor_policy_config *new_config = sc_hypervisor_get_config(sched_ctx); unsigned i; for(i = 0; i < nworkers_to_add; i++) new_config->max_idle[workers_to_add[i]] = new_config->max_idle[workers_to_add[i]] != MAX_IDLE_TIME ? new_config->max_idle[workers_to_add[i]] : new_config->new_workers_max_idle; _reset_resize_sample_info(STARPU_NMAX_SCHED_CTXS, sched_ctx); } return; } unsigned sc_hypervisor_can_resize(unsigned sched_ctx) { return hypervisor.resize[sched_ctx]; } void sc_hypervisor_remove_workers_from_sched_ctx(int* workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx, unsigned now) { if(nworkers_to_remove > 0 && hypervisor.resize[sched_ctx] && hypervisor.allow_remove[sched_ctx]) { _print_current_time(); unsigned nworkers = 0; int workers[nworkers_to_remove]; if(now) { #ifdef STARPU_SC_HYPERVISOR_DEBUG unsigned j; printf("remove explicitley now from ctx %u:", sched_ctx); for(j = 0; j < nworkers_to_remove; j++) printf(" %d", workers_to_remove[j]); printf("\n"); #endif starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, sched_ctx); _reset_resize_sample_info(sched_ctx, STARPU_NMAX_SCHED_CTXS); } else { #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("try to remove from ctx %u: ", sched_ctx); unsigned j; for(j = 0; j < nworkers_to_remove; j++) printf(" %d", workers_to_remove[j]); printf("\n"); #endif int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sched_ctx].mutex); if(ret != EBUSY) { unsigned i; for(i = 0; i < nworkers_to_remove; i++) if(starpu_sched_ctx_contains_worker(workers_to_remove[i], sched_ctx)) workers[nworkers++] = workers_to_remove[i]; hypervisor.sched_ctx_w[sched_ctx].resize_ack.receiver_sched_ctx = -1; hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers = (int*)malloc(nworkers_to_remove * sizeof(int)); hypervisor.sched_ctx_w[sched_ctx].resize_ack.nmoved_workers = (int)nworkers; hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers = (int*)malloc(nworkers_to_remove * sizeof(int)); for(i = 0; i < nworkers; i++) { hypervisor.sched_ctx_w[sched_ctx].current_idle_time[workers[i]] = 0.0; hypervisor.sched_ctx_w[sched_ctx].resize_ack.moved_workers[i] = workers[i]; hypervisor.sched_ctx_w[sched_ctx].resize_ack.acked_workers[i] = 0; } hypervisor.resize[sched_ctx] = 0; if(imposed_resize) imposed_resize = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); } } } return; } static unsigned _ack_resize_completed(unsigned sched_ctx, int worker) { if(worker != -1 && !starpu_sched_ctx_contains_worker(worker, sched_ctx)) return 0; struct sc_hypervisor_resize_ack *resize_ack = NULL; unsigned sender_sched_ctx = STARPU_NMAX_SCHED_CTXS; int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { if(hypervisor.sched_ctxs[i] != STARPU_NMAX_SCHED_CTXS) { struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[hypervisor.sched_ctxs[i]]; STARPU_PTHREAD_MUTEX_LOCK(&sc_w->mutex); unsigned only_remove = 0; if(sc_w->resize_ack.receiver_sched_ctx == -1 && hypervisor.sched_ctxs[i] != sched_ctx && sc_w->resize_ack.nmoved_workers > 0 && starpu_sched_ctx_contains_worker(worker, hypervisor.sched_ctxs[i])) { int j; for(j = 0; j < sc_w->resize_ack.nmoved_workers; j++) if(sc_w->resize_ack.moved_workers[j] == worker) { only_remove = 1; _reset_resize_sample_info(sched_ctx, STARPU_NMAX_SCHED_CTXS); break; } } if(only_remove || (sc_w->resize_ack.receiver_sched_ctx != -1 && sc_w->resize_ack.receiver_sched_ctx == (int)sched_ctx)) { resize_ack = &sc_w->resize_ack; sender_sched_ctx = hypervisor.sched_ctxs[i]; STARPU_PTHREAD_MUTEX_UNLOCK(&sc_w->mutex); break; } STARPU_PTHREAD_MUTEX_UNLOCK(&sc_w->mutex); } } /* if there is no ctx waiting for its ack return 1*/ if(resize_ack == NULL) { return 1; } int ret = starpu_pthread_mutex_trylock(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); if(ret != EBUSY) { int *moved_workers = resize_ack->moved_workers; int nmoved_workers = resize_ack->nmoved_workers; int *acked_workers = resize_ack->acked_workers; if(worker != -1) { for(i = 0; i < nmoved_workers; i++) { int moved_worker = moved_workers[i]; if(moved_worker == worker && acked_workers[i] == 0) { acked_workers[i] = 1; } } } int nacked_workers = 0; for(i = 0; i < nmoved_workers; i++) { nacked_workers += (acked_workers[i] == 1); } unsigned resize_completed = (nacked_workers == nmoved_workers); int receiver_sched_ctx = sched_ctx; if(resize_completed) { /* if the permission to resize is not allowed by the user don't do it whatever the application says */ if(!((hypervisor.resize[sender_sched_ctx] == 0 || hypervisor.resize[receiver_sched_ctx] == 0) && imposed_resize)) { /* int j; */ /* printf("remove after ack from ctx %d:", sender_sched_ctx); */ /* for(j = 0; j < nmoved_workers; j++) */ /* printf(" %d", moved_workers[j]); */ /* printf("\n"); */ starpu_sched_ctx_remove_workers(moved_workers, nmoved_workers, sender_sched_ctx); _reset_resize_sample_info(sender_sched_ctx, receiver_sched_ctx); hypervisor.resize[sender_sched_ctx] = 1; hypervisor.allow_remove[receiver_sched_ctx] = 1; /* if the user allowed resizing leave the decisions to the application */ if(imposed_resize) imposed_resize = 0; resize_ack->receiver_sched_ctx = -1; resize_ack->nmoved_workers = 0; free(resize_ack->moved_workers); free(resize_ack->acked_workers); } STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); return resize_completed; } STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sender_sched_ctx].mutex); } return 0; } /* Enqueue a resize request for 'sched_ctx', to be executed when the * 'task_tag' tasks of 'sched_ctx' complete. */ void sc_hypervisor_post_resize_request(unsigned sched_ctx, int task_tag) { struct resize_request_entry *entry; entry = malloc(sizeof *entry); STARPU_ASSERT(entry != NULL); entry->sched_ctx = sched_ctx; entry->task_tag = task_tag; STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.resize_mut[sched_ctx]); HASH_ADD_INT(hypervisor.resize_requests[sched_ctx], task_tag, entry); STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.resize_mut[sched_ctx]); } void sc_hypervisor_resize_ctxs(unsigned *sched_ctxs, int nsched_ctxs , int *workers, int nworkers) { if(hypervisor.policy.resize_ctxs) hypervisor.policy.resize_ctxs(sched_ctxs, nsched_ctxs, workers, nworkers); } void _sc_hypervisor_allow_compute_idle(unsigned sched_ctx, int worker, unsigned allow) { hypervisor.sched_ctx_w[sched_ctx].compute_idle[worker] = allow; } int _update_max_hierarchically(unsigned *sched_ctxs, int nsched_ctxs) { int s; unsigned leaves[hypervisor.nsched_ctxs]; int nleaves = 0; sc_hypervisor_get_leaves(hypervisor.sched_ctxs, hypervisor.nsched_ctxs, leaves, &nleaves); int max = 0; for(s = 0; s < nsched_ctxs; s++) { struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs[s]); unsigned found = 0; int l = 0; for(l = 0; l < nleaves; l++) { if(leaves[l] == sched_ctxs[s]) { found = 1; break; } } if(!found) { config->max_nworkers = 0; int level = starpu_sched_ctx_get_hierarchy_level(sched_ctxs[s]); unsigned *sched_ctxs_child; int nsched_ctxs_child = 0; sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, sched_ctxs[s]); if(nsched_ctxs_child > 0) { config->max_nworkers += _update_max_hierarchically(sched_ctxs_child, nsched_ctxs_child); free(sched_ctxs_child); int max_possible_workers = starpu_worker_get_count(); if(config->max_nworkers < 0) config->max_nworkers = 0; if(config->max_nworkers > max_possible_workers) config->max_nworkers = max_possible_workers; } #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("ctx %u has max %d \n", sched_ctxs[s], config->max_nworkers); #endif } max += config->max_nworkers; } return max; } void _update_max_diff_hierarchically(unsigned father, double diff) { int level = starpu_sched_ctx_get_hierarchy_level(father); unsigned *sched_ctxs_child; int nsched_ctxs_child = 0; sc_hypervisor_get_ctxs_on_level(&sched_ctxs_child, &nsched_ctxs_child, level+1, father); if(nsched_ctxs_child > 0) { int s; double total_nflops = 0.0; for(s = 0; s < nsched_ctxs_child; s++) { total_nflops += hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops < 0.0 ? 0.0 : hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops; } int accumulated_diff = 0; for(s = 0; s < nsched_ctxs_child; s++) { struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs_child[s]); double remaining_flops = hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops < 0.0 ? 0.0 : hypervisor.sched_ctx_w[sched_ctxs_child[s]].remaining_flops; int current_diff = total_nflops == 0.0 ? 0.0 : floor((remaining_flops / total_nflops) * diff); accumulated_diff += current_diff; if(s == (nsched_ctxs_child - 1) && accumulated_diff < diff) current_diff += (diff - accumulated_diff); config->max_nworkers += current_diff; #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("%u: redib max_nworkers incr %d diff = %d \n", sched_ctxs_child[s], config->max_nworkers, current_diff); #endif _update_max_diff_hierarchically(sched_ctxs_child[s], current_diff); } free(sched_ctxs_child); } return; } void sc_hypervisor_update_resize_interval(unsigned *sched_ctxs, int nsched_ctxs, int max_workers) { (void) max_workers; unsigned leaves[hypervisor.nsched_ctxs]; int nleaves = 0; sc_hypervisor_get_leaves(hypervisor.sched_ctxs, hypervisor.nsched_ctxs, leaves, &nleaves); int l; unsigned sched_ctx; int total_max_nworkers = 0; // int max_cpus = starpu_cpu_worker_get_count(); unsigned configured = 0; int i; for(i = 0; i < nsched_ctxs; i++) { unsigned found = 0; for(l = 0; l < nleaves; l++) { if(leaves[l] == sched_ctxs[i]) { found = 1; break; } } if(!found) continue; sched_ctx = sched_ctxs[i]; if(hypervisor.sched_ctx_w[sched_ctx].to_be_sized) continue; struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctx); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx); int worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); double elapsed_time_worker[STARPU_NMAXWORKERS]; double norm_idle_time = 0.0; double end_time = starpu_timing_now(); while(workers->has_next(workers, &it)) { double idle_time = 0.0; worker = workers->get_next(workers, &it); if(hypervisor.sched_ctx_w[sched_ctx].compute_idle[worker]) { if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0) elapsed_time_worker[worker] = 0.0; else elapsed_time_worker[worker] = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker]) / 1000000.0; if(hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] == 0.0) { idle_time = hypervisor.sched_ctx_w[sched_ctx].idle_time[worker]; /* in seconds */ } else { double idle = (end_time - hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker]) / 1000000.0; /* in seconds */ idle_time = hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] + idle; } norm_idle_time += (elapsed_time_worker[worker] == 0.0 ? 0.0 : (idle_time / elapsed_time_worker[worker])); /* printf("%d/%d: start time %lf elapsed time %lf idle time %lf norm_idle_time %lf \n", */ /* worker, sched_ctx, hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker], elapsed_time_worker[worker], idle_time, norm_idle_time); */ } } double norm_exec_time = 0.0; for(worker = 0; worker < STARPU_NMAXWORKERS; worker++) { double exec_time = 0.0; if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0) elapsed_time_worker[worker] = 0.0; else elapsed_time_worker[worker] = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker]) / 1000000.0; if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] == 0.0) { exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker]; } else { double current_exec_time = 0.0; if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] < hypervisor.sched_ctx_w[sched_ctx].start_time) current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */ else current_exec_time = (end_time - hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ exec_time = hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] + current_exec_time; } norm_exec_time += elapsed_time_worker[worker] == 0.0 ? 0.0 : exec_time / elapsed_time_worker[worker]; } #ifdef STARPU_SC_HYPERVISOR_DEBUG double curr_time = starpu_timing_now(); double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].start_time) / 1000000.0; /* in seconds */ int nready_tasks = starpu_sched_ctx_get_nready_tasks(sched_ctx); #endif /* if(norm_idle_time >= 0.9) */ /* { */ /* config->max_nworkers = lrint(norm_exec_time); */ /* } */ /* else */ /* { */ /* if(norm_idle_time < 0.1) */ /* config->max_nworkers = lrint(norm_exec_time) + nready_tasks - 1; //workers->nworkers + hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; */ /* else */ /* config->max_nworkers = lrint(norm_exec_time); */ /* } */ config->max_nworkers = lrint(norm_exec_time); // config->max_nworkers = hypervisor.sched_ctx_w[sched_ctx].nready_tasks - 1; /* if(config->max_nworkers < 0) */ /* config->max_nworkers = 0; */ /* if(config->max_nworkers > max_workers) */ /* config->max_nworkers = max_workers; */ #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("%u: ready tasks %d norm_idle_time %lf elapsed_time %lf norm_exec_time %lf nworker %d max %d \n", sched_ctx, nready_tasks, norm_idle_time, elapsed_time, norm_exec_time, workers->nworkers, config->max_nworkers); #endif total_max_nworkers += config->max_nworkers; configured = 1; } unsigned nhierarchy_levels = sc_hypervisor_get_nhierarchy_levels(); if(nhierarchy_levels > 1 && configured) { unsigned *sched_ctxs2; int nsched_ctxs2; sc_hypervisor_get_ctxs_on_level(&sched_ctxs2, &nsched_ctxs2, 0, STARPU_NMAX_SCHED_CTXS); if(nsched_ctxs2 > 0) { _update_max_hierarchically(sched_ctxs2, nsched_ctxs2); int s; int current_total_max_nworkers = 0; double max_nflops = 0.0; unsigned max_nflops_sched_ctx = sched_ctxs2[0]; for(s = 0; s < nsched_ctxs2; s++) { struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(sched_ctxs2[s]); current_total_max_nworkers += config->max_nworkers; if(max_nflops < hypervisor.sched_ctx_w[sched_ctxs2[s]].remaining_flops) { max_nflops = hypervisor.sched_ctx_w[sched_ctxs2[s]].remaining_flops; max_nflops_sched_ctx = sched_ctxs2[s]; } } int max_possible_workers = starpu_worker_get_count(); /*if the sum of the max cpus is smaller than the total cpus available increase the max for the ones having more ready tasks to exec */ if(current_total_max_nworkers < max_possible_workers) { int diff = max_possible_workers - current_total_max_nworkers; struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nflops_sched_ctx); config->max_nworkers += diff; #ifdef STARPU_SC_HYPERVISOR_DEBUG printf("%u: redib max_nworkers incr %d \n", max_nflops_sched_ctx, config->max_nworkers); #endif _update_max_diff_hierarchically(max_nflops_sched_ctx, diff); } free(sched_ctxs2); } } /*if the sum of the max cpus is smaller than the total cpus available increase the max for the ones having more ready tasks to exec */ /* if(configured && total_max_nworkers < max_workers) */ /* { */ /* int diff = max_workers - total_max_nworkers; */ /* int max_nready = -1; */ /* unsigned max_nready_sched_ctx = sched_ctxs[0]; */ /* for(i = 0; i < nsched_ctxs; i++) */ /* { */ /* int nready_tasks = starpu_sched_ctx_get_nready_tasks(sched_ctxs[i]); */ /* if(max_nready < nready_tasks) */ /* { */ /* max_nready = nready_tasks; */ /* max_nready_sched_ctx = sched_ctxs[i]; */ /* } */ /* } */ /* struct sc_hypervisor_policy_config *config = sc_hypervisor_get_config(max_nready_sched_ctx); */ /* config->max_nworkers += diff; */ /* printf("%d: redib max_nworkers incr %d \n", max_nready_sched_ctx, config->max_nworkers); */ /* } */ } /* notifies the hypervisor that a new task was pushed on the queue of the worker */ static void notify_pushed_task(unsigned sched_ctx, int worker) { hypervisor.sched_ctx_w[sched_ctx].pushed_tasks[worker]++; if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time == 0.0) hypervisor.sched_ctx_w[sched_ctx].start_time = starpu_timing_now(); if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0) { hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] = starpu_timing_now(); } int ntasks = get_ntasks(hypervisor.sched_ctx_w[sched_ctx].pushed_tasks); if((hypervisor.min_tasks == 0 || (!(hypervisor.resize[sched_ctx] == 0 && imposed_resize) && ntasks == hypervisor.min_tasks)) && hypervisor.check_min_tasks[sched_ctx]) { hypervisor.resize[sched_ctx] = 1; if(imposed_resize) imposed_resize = 0; hypervisor.check_min_tasks[sched_ctx] = 0; } if(hypervisor.policy.handle_pushed_task) hypervisor.policy.handle_pushed_task(sched_ctx, worker); } unsigned choose_ctx_to_steal(int worker) { int j; int ns = hypervisor.nsched_ctxs; int max_ready_tasks = 0; unsigned chosen_ctx = STARPU_NMAX_SCHED_CTXS; for(j = 0; j < ns; j++) { unsigned other_ctx = hypervisor.sched_ctxs[j]; int nready = starpu_sched_ctx_get_nready_tasks(other_ctx); if(!starpu_sched_ctx_contains_worker(worker, other_ctx) && max_ready_tasks < nready) { max_ready_tasks = nready; chosen_ctx = other_ctx; } } return chosen_ctx; } /* notifies the hypervisor that the worker spent another cycle in idle time */ static void notify_idle_cycle(unsigned sched_ctx, int worker, double idle_time) { if(hypervisor.start_executing_time == 0.0) return; struct sc_hypervisor_wrapper *sc_w = &hypervisor.sched_ctx_w[sched_ctx]; sc_w->current_idle_time[worker] += idle_time; if(sc_w->idle_start_time[worker] == 0.0 && sc_w->hyp_react_start_time != 0.0) sc_w->idle_start_time[worker] = starpu_timing_now(); if(sc_w->idle_start_time[worker] > 0.0) { double end_time = starpu_timing_now(); sc_w->idle_time[worker] += (end_time - sc_w->idle_start_time[worker]) / 1000000.0; /* in seconds */ } hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] = starpu_timing_now(); if(hypervisor.resize[sched_ctx] && hypervisor.policy.handle_idle_cycle) { if(sc_w->hyp_react_start_time == 0.0) sc_hypervisor_reset_react_start_time(sched_ctx, 1); double curr_time = starpu_timing_now(); double elapsed_time = (curr_time - sc_w->hyp_react_start_time) / 1000000.0; /* in seconds */ if(sc_w->sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > sc_w->config->time_sample) { unsigned idle_everywhere = 0; unsigned *sched_ctxs = NULL; unsigned nsched_ctxs = 0; int ret = starpu_pthread_mutex_trylock(&act_hypervisor_mutex); if(ret != EBUSY) { if(sc_hypervisor_check_idle(sched_ctx, worker)) { idle_everywhere = 1; nsched_ctxs = starpu_worker_get_sched_ctx_list(worker, &sched_ctxs); unsigned s; for(s = 0; s < nsched_ctxs; s++) { if(hypervisor.sched_ctx_w[sched_ctxs[s]].sched_ctx != STARPU_NMAX_SCHED_CTXS) { if(!sc_hypervisor_check_idle(sched_ctxs[s], worker)) idle_everywhere = 0; } } free(sched_ctxs); } STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); } if(idle_everywhere) { double hyp_overhead_start = starpu_timing_now(); if(elapsed_time > (sc_w->config->time_sample*2)) hypervisor.policy.handle_idle_cycle(sched_ctx, worker); double hyp_overhead_end = starpu_timing_now(); hyp_overhead += (hyp_overhead_end - hyp_overhead_start); if(elapsed_time > (sc_w->config->time_sample*2)) sc_hypervisor_reset_react_start_time(sched_ctx, 1); else sc_hypervisor_reset_react_start_time(sched_ctx, 0); } } } return; } void _update_real_start_time_hierarchically(unsigned sched_ctx) { hypervisor.sched_ctx_w[sched_ctx].real_start_time = starpu_timing_now(); if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) { _update_real_start_time_hierarchically(starpu_sched_ctx_get_inheritor(sched_ctx)); } return; } /* notifies the hypervisor that the worker is no longer idle and a new task was pushed on its queue */ static void notify_poped_task(unsigned sched_ctx, int worker) { if(hypervisor.start_executing_time == 0.0) hypervisor.start_executing_time = starpu_timing_now(); if(!hypervisor.resize[sched_ctx]) hypervisor.resize[sched_ctx] = 1; if(hypervisor.sched_ctx_w[sched_ctx].total_flops != 0.0 && hypervisor.sched_ctx_w[sched_ctx].real_start_time == 0.0) _update_real_start_time_hierarchically(sched_ctx); if(hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] == 0.0) { hypervisor.sched_ctx_w[sched_ctx].start_time_w[worker] = starpu_timing_now(); } hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] = starpu_timing_now(); if(hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] > 0.0) { int ns = hypervisor.nsched_ctxs; int j; for(j = 0; j < ns; j++) { if(hypervisor.sched_ctxs[j] != sched_ctx) { if(hypervisor.sched_ctx_w[hypervisor.sched_ctxs[j]].idle_start_time[worker] > 0.0) hypervisor.sched_ctx_w[hypervisor.sched_ctxs[j]].compute_partial_idle[worker] = 1; } } double end_time = starpu_timing_now(); double idle = (end_time - hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker]) / 1000000.0; /* in seconds */ if(hypervisor.sched_ctx_w[sched_ctx].compute_partial_idle[worker]) hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] += idle / 2.0; else hypervisor.sched_ctx_w[sched_ctx].idle_time[worker] += idle; hypervisor.sched_ctx_w[sched_ctx].compute_partial_idle[worker] = 0; hypervisor.sched_ctx_w[sched_ctx].idle_start_time[worker] = 0.0; } if(hypervisor.resize[sched_ctx]) hypervisor.sched_ctx_w[sched_ctx].current_idle_time[worker] = 0.0; if(hypervisor.policy.handle_idle_end) hypervisor.policy.handle_idle_end(sched_ctx, worker); } static void _update_counters_hierarchically(int worker, unsigned sched_ctx, double flops, size_t data_size) { hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++; hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += flops; hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ; hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ; hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += flops; STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= flops; STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) _update_counters_hierarchically(worker, starpu_sched_ctx_get_inheritor(sched_ctx), flops, data_size); return; } /* notifies the hypervisor that a tagged task has just been executed */ static void notify_post_exec_task(struct starpu_task *task, size_t data_size, uint32_t footprint, int task_tag, double flops) { unsigned sched_ctx = task->sched_ctx; int worker = starpu_worker_get_id_check(); if(hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] != 0.0) { double current_time = starpu_timing_now(); double exec_time = (current_time - hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker]) / 1000000.0; /* in seconds */ hypervisor.sched_ctx_w[sched_ctx].exec_time[worker] += exec_time; hypervisor.sched_ctx_w[sched_ctx].exec_start_time[worker] = 0.0; } hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker]++; hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[worker] += flops; hypervisor.sched_ctx_w[sched_ctx].elapsed_data[worker] += data_size ; hypervisor.sched_ctx_w[sched_ctx].elapsed_tasks[worker]++ ; hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[worker] += flops; STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); hypervisor.sched_ctx_w[sched_ctx].remaining_flops -= flops; STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); if(_sc_hypervisor_use_lazy_resize()) _ack_resize_completed(sched_ctx, worker); if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) { _update_counters_hierarchically(worker, starpu_sched_ctx_get_inheritor(sched_ctx), flops, data_size); } if(hypervisor.resize[sched_ctx]) { if(hypervisor.policy.handle_poped_task) { if(hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time == 0.0) sc_hypervisor_reset_react_start_time(sched_ctx, 1); double curr_time = starpu_timing_now(); double elapsed_time = (curr_time - hypervisor.sched_ctx_w[sched_ctx].hyp_react_start_time) / 1000000.0; /* in seconds */ if(hypervisor.sched_ctx_w[sched_ctx].sched_ctx != STARPU_NMAX_SCHED_CTXS && elapsed_time > hypervisor.sched_ctx_w[sched_ctx].config->time_sample) { double hyp_overhead_start = starpu_timing_now(); if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2)) hypervisor.policy.handle_poped_task(sched_ctx, worker, task, footprint); double hyp_overhead_end = starpu_timing_now(); hyp_overhead += (hyp_overhead_end - hyp_overhead_start); if(elapsed_time > (hypervisor.sched_ctx_w[sched_ctx].config->time_sample*2)) sc_hypervisor_reset_react_start_time(sched_ctx, 1); else sc_hypervisor_reset_react_start_time(sched_ctx, 0); } else /* no need to consider resizing, just remove the task from the pool if the strategy requires it*/ hypervisor.policy.handle_poped_task(sched_ctx, -2, task, footprint); } } /* STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); */ /* _ack_resize_completed(sched_ctx, worker); */ /* STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); */ if(hypervisor.sched_ctx_w[sched_ctx].poped_tasks[worker] % 200 == 0) _print_current_time(); if(task_tag <= 0) return; unsigned conf_sched_ctx; unsigned i; unsigned ns = hypervisor.nsched_ctxs; for(i = 0; i < ns; i++) { struct configuration_entry *entry; conf_sched_ctx = hypervisor.sched_ctxs[i]; STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.conf_mut[conf_sched_ctx]); HASH_FIND_INT(hypervisor.configurations[conf_sched_ctx], &task_tag, entry); if (entry != NULL) { struct sc_hypervisor_policy_config *config = entry->configuration; sc_hypervisor_set_config(conf_sched_ctx, config); HASH_DEL(hypervisor.configurations[conf_sched_ctx], entry); free(config); } STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.conf_mut[conf_sched_ctx]); } if(hypervisor.resize[sched_ctx]) { STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.resize_mut[sched_ctx]); if(hypervisor.policy.handle_post_exec_hook) { /* Check whether 'task_tag' is in the 'resize_requests' set. */ struct resize_request_entry *entry; HASH_FIND_INT(hypervisor.resize_requests[sched_ctx], &task_tag, entry); if (entry != NULL) { hypervisor.policy.handle_post_exec_hook(sched_ctx, task_tag); HASH_DEL(hypervisor.resize_requests[sched_ctx], entry); free(entry); } } STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.resize_mut[sched_ctx]); } return; } static void notify_submitted_job(struct starpu_task *task, uint32_t footprint, size_t data_size) { (void)footprint; (void)data_size; unsigned sched_ctx = task->sched_ctx; STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); hypervisor.sched_ctx_w[sched_ctx].submitted_flops += task->flops; STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); /* signaled by the user - no need to wait for them */ /* if(hypervisor.policy.handle_submitted_job && !type_of_tasks_known) */ /* hypervisor.policy.handle_submitted_job(task->cl, task->sched_ctx, footprint, data_size); */ } static void notify_empty_ctx(unsigned sched_ctx_id, struct starpu_task *task) { (void)sched_ctx_id; (void)task; sc_hypervisor_resize_ctxs(NULL, -1 , NULL, -1); } void sc_hypervisor_set_type_of_task(struct starpu_codelet *cl, unsigned sched_ctx, uint32_t footprint, size_t data_size) { type_of_tasks_known = 1; if(hypervisor.policy.handle_submitted_job) hypervisor.policy.handle_submitted_job(cl, sched_ctx, footprint, data_size); } static void notify_delete_context(unsigned sched_ctx) { _print_current_time(); sc_hypervisor_unregister_ctx(sched_ctx); } void sc_hypervisor_size_ctxs(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) { // STARPU_PTHREAD_MUTEX_LOCK(&act_hypervisor_mutex); unsigned curr_nsched_ctxs = sched_ctxs == NULL ? hypervisor.nsched_ctxs : (unsigned)nsched_ctxs; unsigned *curr_sched_ctxs = sched_ctxs == NULL ? hypervisor.sched_ctxs : sched_ctxs; // STARPU_PTHREAD_MUTEX_UNLOCK(&act_hypervisor_mutex); unsigned s; for(s = 0; s < curr_nsched_ctxs; s++) hypervisor.resize[curr_sched_ctxs[s]] = 1; if(hypervisor.policy.size_ctxs) hypervisor.policy.size_ctxs(curr_sched_ctxs, curr_nsched_ctxs, workers, nworkers); } struct sc_hypervisor_wrapper* sc_hypervisor_get_wrapper(unsigned sched_ctx) { return &hypervisor.sched_ctx_w[sched_ctx]; } unsigned* sc_hypervisor_get_sched_ctxs() { return hypervisor.sched_ctxs; } int sc_hypervisor_get_nsched_ctxs() { int ns; ns = hypervisor.nsched_ctxs; return ns; } int _sc_hypervisor_use_lazy_resize(void) { char* lazy = getenv("SC_HYPERVISOR_LAZY_RESIZE"); return lazy ? atoi(lazy) : 1; } void sc_hypervisor_save_size_req(unsigned *sched_ctxs, int nsched_ctxs, int *workers, int nworkers) { hypervisor.sr = (struct size_request*)malloc(sizeof(struct size_request)); hypervisor.sr->sched_ctxs = sched_ctxs; hypervisor.sr->nsched_ctxs = nsched_ctxs; hypervisor.sr->workers = workers; hypervisor.sr->nworkers = nworkers; } unsigned sc_hypervisor_get_size_req(unsigned **sched_ctxs, int* nsched_ctxs, int **workers, int *nworkers) { if(hypervisor.sr != NULL) { *sched_ctxs = hypervisor.sr->sched_ctxs; *nsched_ctxs = hypervisor.sr->nsched_ctxs; *workers = hypervisor.sr->workers; *nworkers = hypervisor.sr->nworkers; return 1; } return 0; } void sc_hypervisor_free_size_req(void) { if(hypervisor.sr != NULL) { free(hypervisor.sr); hypervisor.sr = NULL; } } double _get_optimal_v(unsigned sched_ctx) { return hypervisor.optimal_v[sched_ctx]; } void _set_optimal_v(unsigned sched_ctx, double optimal_v) { hypervisor.optimal_v[sched_ctx] = optimal_v; } static struct types_of_workers* _init_structure_types_of_workers(void) { struct types_of_workers *tw = (struct types_of_workers*)malloc(sizeof(struct types_of_workers)); tw->ncpus = 0; tw->ncuda = 0; tw->nw = 0; return tw; } struct types_of_workers* sc_hypervisor_get_types_of_workers(int *workers, unsigned nworkers) { struct types_of_workers *tw = _init_structure_types_of_workers(); unsigned w; for(w = 0; w < nworkers; w++) { enum starpu_worker_archtype arch = workers == NULL ? starpu_worker_get_type((int)w) : starpu_worker_get_type(workers[w]); if(arch == STARPU_CPU_WORKER) tw->ncpus++; if(arch == STARPU_CUDA_WORKER) tw->ncuda++; } if(tw->ncpus > 0) tw->nw++; if(tw->ncuda > 0) tw->nw++; return tw; } void sc_hypervisor_update_diff_total_flops(unsigned sched_ctx, double diff_total_flops) { // double hyp_overhead_start = starpu_timing_now(); STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); hypervisor.sched_ctx_w[sched_ctx].total_flops += diff_total_flops; hypervisor.sched_ctx_w[sched_ctx].remaining_flops += diff_total_flops; STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); /* double hyp_overhead_end = starpu_timing_now(); */ /* hyp_overhead += (hyp_overhead_end - hyp_overhead_start); */ if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) sc_hypervisor_update_diff_total_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_total_flops); return; } void sc_hypervisor_update_diff_elapsed_flops(unsigned sched_ctx, double diff_elapsed_flops) { // double hyp_overhead_start = starpu_timing_now(); int workerid = starpu_worker_get_id(); if(workerid != -1) { // STARPU_PTHREAD_MUTEX_LOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); hypervisor.sched_ctx_w[sched_ctx].elapsed_flops[workerid] += diff_elapsed_flops; hypervisor.sched_ctx_w[sched_ctx].total_elapsed_flops[workerid] += diff_elapsed_flops; // STARPU_PTHREAD_MUTEX_UNLOCK(&hypervisor.sched_ctx_w[sched_ctx].mutex); } /* double hyp_overhead_end = starpu_timing_now(); */ /* hyp_overhead += (hyp_overhead_end - hyp_overhead_start); */ if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) sc_hypervisor_update_diff_elapsed_flops(starpu_sched_ctx_get_inheritor(sched_ctx), diff_elapsed_flops); return; } void sc_hypervisor_get_ctxs_on_level(unsigned **sched_ctxs, int *nsched_ctxs, unsigned hierarchy_level, unsigned father_sched_ctx_id) { unsigned s; *nsched_ctxs = 0; *sched_ctxs = (unsigned*)malloc(hypervisor.nsched_ctxs * sizeof(unsigned)); for(s = 0; s < hypervisor.nsched_ctxs; s++) { /* if father == STARPU_NMAX_SCHED_CTXS we take all the ctxs in this level */ if(starpu_sched_ctx_get_hierarchy_level(hypervisor.sched_ctxs[s]) == hierarchy_level && (starpu_sched_ctx_get_inheritor(hypervisor.sched_ctxs[s]) == father_sched_ctx_id || father_sched_ctx_id == STARPU_NMAX_SCHED_CTXS)) (*sched_ctxs)[(*nsched_ctxs)++] = hypervisor.sched_ctxs[s]; } if(*nsched_ctxs == 0) { free(*sched_ctxs); *sched_ctxs = NULL; } return; } unsigned sc_hypervisor_get_nhierarchy_levels(void) { unsigned nlevels = 0; unsigned level = 0; unsigned levels[STARPU_NMAX_SCHED_CTXS]; unsigned s, l; for(s = 0; s < hypervisor.nsched_ctxs; s++) { level = starpu_sched_ctx_get_hierarchy_level(hypervisor.sched_ctxs[s]); unsigned found = 0; for(l = 0; l < nlevels; l++) if(levels[l] == level) found = 1; if(!found) levels[nlevels++] = level; } return nlevels; } void sc_hypervisor_get_leaves(unsigned *sched_ctxs, int nsched_ctxs, unsigned *leaves, int *nleaves) { int s, s2; for(s = 0; s < nsched_ctxs; s++) { unsigned is_someones_father = 0; for(s2 = 0; s2 < nsched_ctxs; s2++) { unsigned father = starpu_sched_ctx_get_inheritor(sched_ctxs[s2]); if(sched_ctxs[s] == father) { is_someones_father = 1; break; } } if(!is_someones_father) leaves[(*nleaves)++] = sched_ctxs[s]; } return; } void sc_hypervisor_init_worker(int workerid, unsigned sched_ctx) { if(hypervisor.policy.init_worker) hypervisor.policy.init_worker(workerid, sched_ctx); } starpu-1.4.9+dfsg/sc_hypervisor/src/sc_hypervisor_intern.h000066400000000000000000000067301507764646700241330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "uthash.h" #define SC_SPEED_MAX_GAP_DEFAULT 50 #define SC_HYPERVISOR_DEFAULT_CPU_SPEED 5.0 #define SC_HYPERVISOR_DEFAULT_CUDA_SPEED 100.0 struct size_request { int *workers; int nworkers; unsigned *sched_ctxs; int nsched_ctxs; }; /* Entry in the resize request hash table. */ struct resize_request_entry { /* Key: the tag of tasks concerned by this resize request. */ uint32_t task_tag; /* Value: identifier of the scheduling context needing to be resized. * The value doesn't matter since the hash table is used only to test * membership of a task tag. */ unsigned sched_ctx; /* Bookkeeping. */ UT_hash_handle hh; }; /* structure to indicate when the moving of workers was actually done (moved workers can be seen in the new ctx) */ struct resize_ack { /* receiver context */ int receiver_sched_ctx; /* list of workers required to be moved */ int *moved_workers; /* number of workers required to be moved */ int nmoved_workers; /* list of workers that actually got in the receiver ctx */ int *acked_workers; }; struct configuration_entry { /* Key: the tag of tasks concerned by this configuration. */ uint32_t task_tag; /* Value: configuration of the scheduling context. */ struct sc_hypervisor_policy_config *configuration; /* Bookkeeping. */ UT_hash_handle hh; }; struct sc_hypervisor { struct sc_hypervisor_wrapper sched_ctx_w[STARPU_NMAX_SCHED_CTXS]; unsigned sched_ctxs[STARPU_NMAX_SCHED_CTXS]; unsigned nsched_ctxs; unsigned resize[STARPU_NMAX_SCHED_CTXS]; unsigned allow_remove[STARPU_NMAX_SCHED_CTXS]; int min_tasks; struct sc_hypervisor_policy policy; struct configuration_entry *configurations[STARPU_NMAX_SCHED_CTXS]; /* Set of pending resize requests for any context/tag pair. */ struct resize_request_entry *resize_requests[STARPU_NMAX_SCHED_CTXS]; starpu_pthread_mutex_t conf_mut[STARPU_NMAX_SCHED_CTXS]; starpu_pthread_mutex_t resize_mut[STARPU_NMAX_SCHED_CTXS]; struct size_request *sr; int check_min_tasks[STARPU_NMAX_SCHED_CTXS]; /* time when the hypervisor started */ double start_executing_time; /* max speed diff btw ctx before triggering resizing */ double max_speed_gap; /* criteria to trigger resizing */ unsigned resize_criteria; /* value of the speed to compare the speed of the context to */ double optimal_v[STARPU_NMAX_SCHED_CTXS]; }; struct sc_hypervisor_adjustment { int workerids[STARPU_NMAXWORKERS]; int nworkers; }; extern struct sc_hypervisor hypervisor; void _add_config(unsigned sched_ctx); void _remove_config(unsigned sched_ctx); double _get_max_speed_gap(); double _get_optimal_v(unsigned sched_ctx); void _set_optimal_v(unsigned sched_ctx, double optimal_v); int _sc_hypervisor_use_lazy_resize(void); void _sc_hypervisor_allow_compute_idle(unsigned sched_ctx, int worker, unsigned allow); starpu-1.4.9+dfsg/sc_hypervisor/src/uthash.h000066400000000000000000002066331507764646700211550ustar00rootroot00000000000000/* Copyright (c) 2003-2010, Troy D. Hanson http://uthash.sourceforge.net All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef UTHASH_H #define UTHASH_H #include /* memcmp,strlen */ #include /* ptrdiff_t */ /* These macros use decltype or the earlier __typeof GNU extension. As decltype is only available in newer compilers (VS2010 or gcc 4.3+ when compiling c++ source) this code uses whatever method is needed or, for VS2008 where neither is available, uses casting workarounds. */ #ifdef _MSC_VER /* MS compiler */ #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ #define DECLTYPE(x) (decltype(x)) #else /* VS2008 or older (or VS2010 in C mode) */ #define NO_DECLTYPE #define DECLTYPE(x) #endif #else /* GNU, Sun and other compilers */ #define DECLTYPE(x) (__typeof(x)) #endif #ifdef NO_DECLTYPE #define DECLTYPE_ASSIGN(dst,src) \ do { \ char **_da_dst = (char**)(&(dst)); \ *_da_dst = (char*)(src); \ } while(0) #else #define DECLTYPE_ASSIGN(dst,src) \ do { \ (dst) = DECLTYPE(dst)(src); \ } while(0) #endif /* a number of the hash function use uint32_t which isn't defined on win32 */ #ifdef _MSC_VER typedef unsigned int uint32_t; #else #include /* uint32_t */ #endif #define UTHASH_VERSION 1.9.3 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ #define uthash_free(ptr,sz) free(ptr) /* free fcn */ #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ #define uthash_expand_fyi(tbl) /* can be defined to log expands */ /* initial number of buckets */ #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ /* calculate the element whose hash handle address is hhe */ #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) #define HASH_FIND(hh,head,keyptr,keylen,out) \ do { \ unsigned _hf_bkt=0,_hf_hashv=0; \ out=NULL; \ if (head) { \ HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ keyptr,keylen,out); \ } \ } \ } while (0) #ifdef HASH_BLOOM #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) #define HASH_BLOOM_MAKE(tbl) \ do { \ (tbl)->bloom_nbits = HASH_BLOOM; \ (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ } while (0); #define HASH_BLOOM_FREE(tbl) \ do { \ uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ } while (0); #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) #define HASH_BLOOM_ADD(tbl,hashv) \ HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) #define HASH_BLOOM_TEST(tbl,hashv) \ HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) #else #define HASH_BLOOM_MAKE(tbl) #define HASH_BLOOM_FREE(tbl) #define HASH_BLOOM_ADD(tbl,hashv) #define HASH_BLOOM_TEST(tbl,hashv) (1) #endif #define HASH_MAKE_TABLE(hh,head) \ do { \ (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ sizeof(UT_hash_table)); \ if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ (head)->hh.tbl->tail = &((head)->hh); \ (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ memset((head)->hh.tbl->buckets, 0, \ HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ HASH_BLOOM_MAKE((head)->hh.tbl); \ (head)->hh.tbl->signature = HASH_SIGNATURE; \ } while(0) #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add) #ifdef STARPU_DEBUG /* Check that we don't insert the same key several times */ #define HASH_CHECK_KEY(hh,head,keyptr,keylen,out) \ do { \ __typeof__(out) _out; \ HASH_FIND(hh,head,keyptr,keylen,_out); \ STARPU_ASSERT_MSG(!_out,"Cannot insert the same key twice"); \ } while(0) #else #define HASH_CHECK_KEY(hh,head,keyptr,keylen,out) #endif #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ do { \ unsigned _ha_bkt=0; \ HASH_CHECK_KEY(hh,head,keyptr,keylen_in,add); \ (add)->hh.next = NULL; \ (add)->hh.key = (char*)keyptr; \ (add)->hh.keylen = keylen_in; \ if (!(head)) { \ head = (add); \ (head)->hh.prev = NULL; \ HASH_MAKE_TABLE(hh,head); \ } else { \ (head)->hh.tbl->tail->next = (add); \ (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ (head)->hh.tbl->tail = &((add)->hh); \ } \ (head)->hh.tbl->num_items++; \ (add)->hh.tbl = (head)->hh.tbl; \ HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ (add)->hh.hashv, _ha_bkt); \ HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ HASH_FSCK(hh,head); \ } while(0) #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ do { \ bkt = ((hashv) & ((num_bkts) - 1)); \ } while(0) /* delete "delptr" from the hash table. * "the usual" patch-up process for the app-order doubly-linked-list. * The use of _hd_hh_del below deserves special explanation. * These used to be expressed using (delptr) but that led to a bug * if someone used the same symbol for the head and deletee, like * HASH_DELETE(hh,users,users); * We want that to work, but by changing the head (users) below * we were forfeiting our ability to further refer to the deletee (users) * in the patch-up process. Solution: use scratch space to * copy the deletee pointer, then the latter references are via that * scratch pointer rather than through the repointed (users) symbol. */ #define HASH_DELETE(hh,head,delptr) \ do { \ unsigned _hd_bkt; \ struct UT_hash_handle *_hd_hh_del; \ if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ uthash_free((head)->hh.tbl->buckets, \ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ HASH_BLOOM_FREE((head)->hh.tbl); \ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ head = NULL; \ } else { \ _hd_hh_del = &((delptr)->hh); \ if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ (head)->hh.tbl->tail = \ (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ (head)->hh.tbl->hho); \ } \ if ((delptr)->hh.prev) { \ ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ } else { \ DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ } \ if (_hd_hh_del->next) { \ ((UT_hash_handle*)((char*)_hd_hh_del->next + \ (head)->hh.tbl->hho))->prev = \ _hd_hh_del->prev; \ } \ HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ (head)->hh.tbl->num_items--; \ } \ HASH_FSCK(hh,head); \ } while (0) /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ #define HASH_FIND_STR(head,findstr,out) \ HASH_FIND(hh,head,findstr,strlen(findstr),out) #define HASH_ADD_STR(head,strfield,add) \ HASH_ADD(hh,head,strfield[0],strlen(add->strfield),add) #define HASH_FIND_INT(head,findint,out) \ HASH_FIND(hh,head,findint,sizeof(int),out) #define HASH_ADD_INT(head,intfield,add) \ HASH_ADD(hh,head,intfield,sizeof(int),add) #define HASH_FIND_PTR(head,findptr,out) \ HASH_FIND(hh,head,findptr,sizeof(void *),out) #define HASH_ADD_PTR(head,ptrfield,add) \ HASH_ADD(hh,head,ptrfield,sizeof(void *),add) #define HASH_DEL(head,delptr) \ HASH_DELETE(hh,head,delptr) /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. */ #ifdef HASH_DEBUG #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) #define HASH_FSCK(hh,head) \ do { \ unsigned _bkt_i; \ unsigned _count, _bkt_count; \ char *_prev; \ struct UT_hash_handle *_thh; \ if (head) { \ _count = 0; \ for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ _bkt_count = 0; \ _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ _prev = NULL; \ while (_thh) { \ if (_prev != (char*)(_thh->hh_prev)) { \ HASH_OOPS("invalid hh_prev %p, actual %p\n", \ _thh->hh_prev, _prev ); \ } \ _bkt_count++; \ _prev = (char*)(_thh); \ _thh = _thh->hh_next; \ } \ _count += _bkt_count; \ if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ HASH_OOPS("invalid bucket count %u, actual %u\n", \ (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ } \ } \ if (_count != (head)->hh.tbl->num_items) { \ HASH_OOPS("invalid hh item count %u, actual %u\n", \ (head)->hh.tbl->num_items, _count ); \ } \ /* traverse hh in app order; check next/prev integrity, count */ \ _count = 0; \ _prev = NULL; \ _thh = &(head)->hh; \ while (_thh) { \ _count++; \ if (_prev !=(char*)(_thh->prev)) { \ HASH_OOPS("invalid prev %p, actual %p\n", \ _thh->prev, _prev ); \ } \ _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ (head)->hh.tbl->hho) : NULL ); \ } \ if (_count != (head)->hh.tbl->num_items) { \ HASH_OOPS("invalid app item count %u, actual %u\n", \ (head)->hh.tbl->num_items, _count ); \ } \ } \ } while (0) #else #define HASH_FSCK(hh,head) #endif /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to * the descriptor to which this macro is defined for tuning the hash function. * The app can #include to get the prototype for write(2). */ #ifdef HASH_EMIT_KEYS #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ do { \ unsigned _klen = fieldlen; \ write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ write(HASH_EMIT_KEYS, keyptr, fieldlen); \ } while (0) #else #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) #endif /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ #ifdef HASH_FUNCTION #define HASH_FCN HASH_FUNCTION #else #define HASH_FCN HASH_JEN #endif /* The Bernstein hash function, used in Perl prior to v5.6 */ #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _hb_keylen=keylen; \ char *_hb_key=(char*)(key); \ (hashv) = 0; \ while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ bkt = (hashv) & (num_bkts-1); \ } while (0) /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _sx_i; \ char *_hs_key=(char*)(key); \ hashv = 0; \ for(_sx_i=0; _sx_i < keylen; _sx_i++) \ hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ bkt = hashv & (num_bkts-1); \ } while (0) #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _fn_i; \ char *_hf_key=(char*)(key); \ hashv = 2166136261UL; \ for(_fn_i=0; _fn_i < keylen; _fn_i++) \ hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ bkt = hashv & (num_bkts-1); \ } while(0); #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _ho_i; \ char *_ho_key=(char*)(key); \ hashv = 0; \ for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ hashv += _ho_key[_ho_i]; \ hashv += (hashv << 10); \ hashv ^= (hashv >> 6); \ } \ hashv += (hashv << 3); \ hashv ^= (hashv >> 11); \ hashv += (hashv << 15); \ bkt = hashv & (num_bkts-1); \ } while(0) #define HASH_JEN_MIX(a,b,c) \ do { \ a -= b; a -= c; a ^= ( c >> 13 ); \ b -= c; b -= a; b ^= ( a << 8 ); \ c -= a; c -= b; c ^= ( b >> 13 ); \ a -= b; a -= c; a ^= ( c >> 12 ); \ b -= c; b -= a; b ^= ( a << 16 ); \ c -= a; c -= b; c ^= ( b >> 5 ); \ a -= b; a -= c; a ^= ( c >> 3 ); \ b -= c; b -= a; b ^= ( a << 10 ); \ c -= a; c -= b; c ^= ( b >> 15 ); \ } while (0) #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _hj_i,_hj_j,_hj_k; \ char *_hj_key=(char*)(key); \ hashv = 0xfeedbeef; \ _hj_i = _hj_j = 0x9e3779b9; \ _hj_k = keylen; \ while (_hj_k >= 12) { \ _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + ( (unsigned)_hj_key[2] << 16 ) \ + ( (unsigned)_hj_key[3] << 24 ) ); \ _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + ( (unsigned)_hj_key[6] << 16 ) \ + ( (unsigned)_hj_key[7] << 24 ) ); \ hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + ( (unsigned)_hj_key[10] << 16 ) \ + ( (unsigned)_hj_key[11] << 24 ) ); \ \ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ \ _hj_key += 12; \ _hj_k -= 12; \ } \ hashv += keylen; \ switch ( _hj_k ) { \ case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ /* FALLTHRU */ \ case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ /* FALLTHRU */ \ case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ /* FALLTHRU */ \ case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ /* FALLTHRU */ \ case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ /* FALLTHRU */ \ case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ /* FALLTHRU */ \ case 5: _hj_j += _hj_key[4]; \ /* FALLTHRU */ \ case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ /* FALLTHRU */ \ case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ /* FALLTHRU */ \ case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ /* FALLTHRU */ \ case 1: _hj_i += _hj_key[0]; \ /* FALLTHRU */ \ default: break; \ } \ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ bkt = hashv & (num_bkts-1); \ } while(0) /* The Paul Hsieh hash function */ #undef get16bits #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) #define get16bits(d) (*((const uint16_t *) (d))) #endif #if !defined (get16bits) #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ +(uint32_t)(((const uint8_t *)(d))[0]) ) #endif #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ do { \ char *_sfh_key=(char*)(key); \ uint32_t _sfh_tmp, _sfh_len = keylen; \ \ int _sfh_rem = _sfh_len & 3; \ _sfh_len >>= 2; \ hashv = 0xcafebabe; \ \ /* Main loop */ \ for (;_sfh_len > 0; _sfh_len--) { \ hashv += get16bits (_sfh_key); \ _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ hashv = (hashv << 16) ^ _sfh_tmp; \ _sfh_key += 2*sizeof (uint16_t); \ hashv += hashv >> 11; \ } \ \ /* Handle end cases */ \ switch (_sfh_rem) { \ case 3: hashv += get16bits (_sfh_key); \ hashv ^= hashv << 16; \ hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ hashv += hashv >> 11; \ break; \ case 2: hashv += get16bits (_sfh_key); \ hashv ^= hashv << 11; \ hashv += hashv >> 17; \ break; \ case 1: hashv += *_sfh_key; \ hashv ^= hashv << 10; \ hashv += hashv >> 1; \ break; \ default: break; \ } \ \ /* Force "avalanching" of final 127 bits */ \ hashv ^= hashv << 3; \ hashv += hashv >> 5; \ hashv ^= hashv << 4; \ hashv += hashv >> 17; \ hashv ^= hashv << 25; \ hashv += hashv >> 6; \ bkt = hashv & (num_bkts-1); \ } while(0); #ifdef HASH_USING_NO_STRICT_ALIASING /* The MurmurHash exploits some CPU's (e.g. x86) tolerance for unaligned reads. * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. * So MurmurHash comes in two versions, the faster unaligned one and the slower * aligned one. We only use the faster one on CPU's where we know it's safe. * * Note the preprocessor built-in defines can be emitted using: * * gcc -m64 -dM -E - < /dev/null (on gcc) * cc -## a.c (where a.c is a simple test file) (Sun Studio) */ #if (defined(__i386__) || defined(__x86_64__)) #define HASH_MUR HASH_MUR_UNALIGNED #else #define HASH_MUR HASH_MUR_ALIGNED #endif /* Appleby's MurmurHash fast version for unaligned-tolerant archs like i386 */ #define HASH_MUR_UNALIGNED(key,keylen,num_bkts,hashv,bkt) \ do { \ const unsigned int _mur_m = 0x5bd1e995; \ const int _mur_r = 24; \ hashv = 0xcafebabe ^ keylen; \ char *_mur_key = (char *)(key); \ uint32_t _mur_tmp, _mur_len = keylen; \ \ for (;_mur_len >= 4; _mur_len-=4) { \ _mur_tmp = *(uint32_t *)_mur_key; \ _mur_tmp *= _mur_m; \ _mur_tmp ^= _mur_tmp >> _mur_r; \ _mur_tmp *= _mur_m; \ hashv *= _mur_m; \ hashv ^= _mur_tmp; \ _mur_key += 4; \ } \ \ switch(_mur_len) \ { \ case 3: hashv ^= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: hashv ^= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: hashv ^= _mur_key[0]; \ hashv *= _mur_m; \ /* FALLTHRU */ \ default: break; \ }; \ \ hashv ^= hashv >> 13; \ hashv *= _mur_m; \ hashv ^= hashv >> 15; \ \ bkt = hashv & (num_bkts-1); \ } while(0) /* Appleby's MurmurHash version for alignment-sensitive archs like Sparc */ #define HASH_MUR_ALIGNED(key,keylen,num_bkts,hashv,bkt) \ do { \ const unsigned int _mur_m = 0x5bd1e995; \ const int _mur_r = 24; \ hashv = 0xcafebabe ^ (keylen); \ char *_mur_key = (char *)(key); \ uint32_t _mur_len = keylen; \ int _mur_align = (int)_mur_key & 3; \ \ if (_mur_align && (_mur_len >= 4)) { \ unsigned _mur_t = 0, _mur_d = 0; \ switch(_mur_align) { \ case 1: _mur_t |= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: _mur_t |= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 3: _mur_t |= _mur_key[0]; \ /* FALLTHRU */ \ default: break; \ } \ _mur_t <<= (8 * _mur_align); \ _mur_key += 4-_mur_align; \ _mur_len -= 4-_mur_align; \ int _mur_sl = 8 * (4-_mur_align); \ int _mur_sr = 8 * _mur_align; \ \ for (;_mur_len >= 4; _mur_len-=4) { \ _mur_d = *(unsigned *)_mur_key; \ _mur_t = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ unsigned _mur_k = _mur_t; \ _mur_k *= _mur_m; \ _mur_k ^= _mur_k >> _mur_r; \ _mur_k *= _mur_m; \ hashv *= _mur_m; \ hashv ^= _mur_k; \ _mur_t = _mur_d; \ _mur_key += 4; \ } \ _mur_d = 0; \ if(_mur_len >= _mur_align) { \ switch(_mur_align) { \ case 3: _mur_d |= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: _mur_d |= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: _mur_d |= _mur_key[0]; \ /* FALLTHRU */ \ default: break; \ } \ unsigned _mur_k = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ _mur_k *= _mur_m; \ _mur_k ^= _mur_k >> _mur_r; \ _mur_k *= _mur_m; \ hashv *= _mur_m; \ hashv ^= _mur_k; \ _mur_k += _mur_align; \ _mur_len -= _mur_align; \ \ switch(_mur_len) \ { \ case 3: hashv ^= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: hashv ^= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: hashv ^= _mur_key[0]; \ hashv *= _mur_m; \ /* FALLTHRU */ \ default: break; \ } \ } else { \ switch(_mur_len) \ { \ case 3: _mur_d ^= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: _mur_d ^= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: _mur_d ^= _mur_key[0]; \ /* FALLTHRU */ \ case 0: hashv ^= (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ hashv *= _mur_m; \ /* FALLTHRU */ \ default: break; \ } \ } \ \ hashv ^= hashv >> 13; \ hashv *= _mur_m; \ hashv ^= hashv >> 15; \ } else { \ for (;_mur_len >= 4; _mur_len-=4) { \ unsigned _mur_k = *(unsigned*)_mur_key; \ _mur_k *= _mur_m; \ _mur_k ^= _mur_k >> _mur_r; \ _mur_k *= _mur_m; \ hashv *= _mur_m; \ hashv ^= _mur_k; \ _mur_key += 4; \ } \ switch(_mur_len) \ { \ case 3: hashv ^= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: hashv ^= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: hashv ^= _mur_key[0]; \ hashv *= _mur_m; \ /* FALLTHRU */ \ default: break; \ } \ \ hashv ^= hashv >> 13; \ hashv *= _mur_m; \ hashv ^= hashv >> 15; \ } \ bkt = hashv & (num_bkts-1); \ } while(0) #endif /* HASH_USING_NO_STRICT_ALIASING */ /* key comparison function; return 0 if keys equal */ #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) /* iterate over items in a known bucket to find desired item */ #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ do { \ if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ else out=NULL; \ while (out) { \ if (out->hh.keylen == keylen_in) { \ if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \ } \ if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \ else out = NULL; \ } \ } while(0) /* add an item to a bucket */ #define HASH_ADD_TO_BKT(head,addhh) \ do { \ head.count++; \ (addhh)->hh_next = head.hh_head; \ (addhh)->hh_prev = NULL; \ if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ (head).hh_head=addhh; \ if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ && (addhh)->tbl->noexpand != 1) { \ HASH_EXPAND_BUCKETS((addhh)->tbl); \ } \ } while(0) /* remove an item from a given bucket */ #define HASH_DEL_IN_BKT(hh,head,hh_del) \ (head).count--; \ if ((head).hh_head == hh_del) { \ (head).hh_head = hh_del->hh_next; \ } \ if (hh_del->hh_prev) { \ hh_del->hh_prev->hh_next = hh_del->hh_next; \ } \ if (hh_del->hh_next) { \ hh_del->hh_next->hh_prev = hh_del->hh_prev; \ } /* Bucket expansion has the effect of doubling the number of buckets * and redistributing the items into the new buckets. Ideally the * items will distribute more or less evenly into the new buckets * (the extent to which this is true is a measure of the quality of * the hash function as it applies to the key domain). * * With the items distributed into more buckets, the chain length * (item count) in each bucket is reduced. Thus by expanding buckets * the hash keeps a bound on the chain length. This bounded chain * length is the essence of how a hash provides constant time lookup. * * The calculation of tbl->ideal_chain_maxlen below deserves some * explanation. First, keep in mind that we're calculating the ideal * maximum chain length based on the *new* (doubled) bucket count. * In fractions this is just n/b (n=number of items,b=new num buckets). * Since the ideal chain length is an integer, we want to calculate * ceil(n/b). We don't depend on floating point arithmetic in this * hash, so to calculate ceil(n/b) with integers we could write * * ceil(n/b) = (n/b) + ((n%b)?1:0) * * and in fact a previous version of this hash did just that. * But now we have improved things a bit by recognizing that b is * always a power of two. We keep its base 2 log handy (call it lb), * so now we can write this with a bit shift and logical AND: * * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) * */ #define HASH_EXPAND_BUCKETS(tbl) \ do { \ unsigned _he_bkt; \ unsigned _he_bkt_i; \ struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ memset(_he_new_buckets, 0, \ 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ tbl->ideal_chain_maxlen = \ (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ tbl->nonideal_items = 0; \ for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ { \ _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ while (_he_thh) { \ _he_hh_nxt = _he_thh->hh_next; \ HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ tbl->nonideal_items++; \ _he_newbkt->expand_mult = _he_newbkt->count / \ tbl->ideal_chain_maxlen; \ } \ _he_thh->hh_prev = NULL; \ _he_thh->hh_next = _he_newbkt->hh_head; \ if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ _he_thh; \ _he_newbkt->hh_head = _he_thh; \ _he_thh = _he_hh_nxt; \ } \ } \ uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ tbl->num_buckets *= 2; \ tbl->log2_num_buckets++; \ tbl->buckets = _he_new_buckets; \ tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ (tbl->ineff_expands+1) : 0; \ if (tbl->ineff_expands > 1) { \ tbl->noexpand=1; \ uthash_noexpand_fyi(tbl); \ } \ uthash_expand_fyi(tbl); \ } while(0) /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ /* Note that HASH_SORT assumes the hash handle name to be hh. * HASH_SRT was added to allow the hash handle name to be passed in. */ #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) #define HASH_SRT(hh,head,cmpfcn) \ do { \ unsigned _hs_i; \ unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ if (head) { \ _hs_insize = 1; \ _hs_looping = 1; \ _hs_list = &((head)->hh); \ while (_hs_looping) { \ _hs_p = _hs_list; \ _hs_list = NULL; \ _hs_tail = NULL; \ _hs_nmerges = 0; \ while (_hs_p) { \ _hs_nmerges++; \ _hs_q = _hs_p; \ _hs_psize = 0; \ for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ _hs_psize++; \ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ ((void*)((char*)(_hs_q->next) + \ (head)->hh.tbl->hho)) : NULL); \ if (! (_hs_q) ) break; \ } \ _hs_qsize = _hs_insize; \ while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ if (_hs_psize == 0) { \ _hs_e = _hs_q; \ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ ((void*)((char*)(_hs_q->next) + \ (head)->hh.tbl->hho)) : NULL); \ _hs_qsize--; \ } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ _hs_e = _hs_p; \ _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ ((void*)((char*)(_hs_p->next) + \ (head)->hh.tbl->hho)) : NULL); \ _hs_psize--; \ } else if (( \ cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ ) <= 0) { \ _hs_e = _hs_p; \ _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ ((void*)((char*)(_hs_p->next) + \ (head)->hh.tbl->hho)) : NULL); \ _hs_psize--; \ } else { \ _hs_e = _hs_q; \ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ ((void*)((char*)(_hs_q->next) + \ (head)->hh.tbl->hho)) : NULL); \ _hs_qsize--; \ } \ if ( _hs_tail ) { \ _hs_tail->next = ((_hs_e) ? \ ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ } else { \ _hs_list = _hs_e; \ } \ _hs_e->prev = ((_hs_tail) ? \ ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ _hs_tail = _hs_e; \ } \ _hs_p = _hs_q; \ } \ _hs_tail->next = NULL; \ if ( _hs_nmerges <= 1 ) { \ _hs_looping=0; \ (head)->hh.tbl->tail = _hs_tail; \ DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ } \ _hs_insize *= 2; \ } \ HASH_FSCK(hh,head); \ } \ } while (0) /* This function selects items from one hash into another hash. * The end result is that the selected items have dual presence * in both hashes. There is no copy of the items made; rather * they are added into the new hash through a secondary hash * hash handle that must be present in the structure. */ #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ do { \ unsigned _src_bkt, _dst_bkt; \ void *_last_elt=NULL, *_elt; \ UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ if (src) { \ for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ _src_hh; \ _src_hh = _src_hh->hh_next) { \ _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ if (cond(_elt)) { \ _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ _dst_hh->key = _src_hh->key; \ _dst_hh->keylen = _src_hh->keylen; \ _dst_hh->hashv = _src_hh->hashv; \ _dst_hh->prev = _last_elt; \ _dst_hh->next = NULL; \ if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ if (!dst) { \ DECLTYPE_ASSIGN(dst,_elt); \ HASH_MAKE_TABLE(hh_dst,dst); \ } else { \ _dst_hh->tbl = (dst)->hh_dst.tbl; \ } \ HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ (dst)->hh_dst.tbl->num_items++; \ _last_elt = _elt; \ _last_elt_hh = _dst_hh; \ } \ } \ } \ } \ HASH_FSCK(hh_dst,dst); \ } while (0) #define HASH_CLEAR(hh,head) \ do { \ if (head) { \ uthash_free((head)->hh.tbl->buckets, \ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ (head)=NULL; \ } \ } while(0) #ifdef NO_DECLTYPE #define HASH_ITER(hh,head,el,tmp) \ for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) #else #define HASH_ITER(hh,head,el,tmp) \ for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) #endif /* obtain a count of items in the hash */ #define HASH_COUNT(head) HASH_CNT(hh,head) #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) typedef struct UT_hash_bucket { struct UT_hash_handle *hh_head; unsigned count; /* expand_mult is normally set to 0. In this situation, the max chain length * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If * the bucket's chain exceeds this length, bucket expansion is triggered). * However, setting expand_mult to a non-zero value delays bucket expansion * (that would be triggered by additions to this particular bucket) * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. * (The multiplier is simply expand_mult+1). The whole idea of this * multiplier is to reduce bucket expansions, since they are expensive, in * situations where we know that a particular bucket tends to be overused. * It is better to let its chain length grow to a longer yet-still-bounded * value, than to do an O(n) bucket expansion too often. */ unsigned expand_mult; } UT_hash_bucket; /* random signature used only to find hash tables in external analysis */ #define HASH_SIGNATURE 0xa0111fe1 #define HASH_BLOOM_SIGNATURE 0xb12220f2 typedef struct UT_hash_table { UT_hash_bucket *buckets; unsigned num_buckets, log2_num_buckets; unsigned num_items; struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ /* in an ideal situation (all buckets used equally), no bucket would have * more than ceil(#items/#buckets) items. that's the ideal chain length. */ unsigned ideal_chain_maxlen; /* nonideal_items is the number of items in the hash whose chain position * exceeds the ideal chain maxlen. these items pay the penalty for an uneven * hash distribution; reaching them in a chain traversal takes >ideal steps */ unsigned nonideal_items; /* ineffective expands occur when a bucket doubling was performed, but * afterward, more than half the items in the hash had nonideal chain * positions. If this happens on two consecutive expansions we inhibit any * further expansion, as it's not helping; this happens when the hash * function isn't a good fit for the key domain. When expansion is inhibited * the hash will still work, albeit no longer in constant time. */ unsigned ineff_expands, noexpand; uint32_t signature; /* used only to find hash tables in external analysis */ #ifdef HASH_BLOOM uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ uint8_t *bloom_bv; char bloom_nbits; #endif } UT_hash_table; typedef struct UT_hash_handle { struct UT_hash_table *tbl; void *prev; /* prev element in app order */ void *next; /* next element in app order */ struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ struct UT_hash_handle *hh_next; /* next hh in bucket order */ void *key; /* ptr to enclosing struct's key */ unsigned keylen; /* enclosing struct's key len */ unsigned hashv; /* result of hash-fcn(key) */ } UT_hash_handle; #endif /* UTHASH_H */ starpu-1.4.9+dfsg/socl/000077500000000000000000000000001507764646700147505ustar00rootroot00000000000000starpu-1.4.9+dfsg/socl/Makefile.am000066400000000000000000000015351507764646700170100ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-subdirtests.mk SUBDIRS = src examples EXTRA_DIST = README SOCL_vendorsdir = @datarootdir@/starpu/opencl/vendors dist_SOCL_vendors_DATA = @SOCL_VENDORS@ starpu-1.4.9+dfsg/socl/Makefile.in000066400000000000000000000732111507764646700170210ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = socl ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(dist_SOCL_vendors_DATA) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(SOCL_vendorsdir)" DATA = $(dist_SOCL_vendors_DATA) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-subdirtests.mk README DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = src examples EXTRA_DIST = README SOCL_vendorsdir = @datarootdir@/starpu/opencl/vendors dist_SOCL_vendors_DATA = @SOCL_VENDORS@ all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign socl/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign socl/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-dist_SOCL_vendorsDATA: $(dist_SOCL_vendors_DATA) @$(NORMAL_INSTALL) @list='$(dist_SOCL_vendors_DATA)'; test -n "$(SOCL_vendorsdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(SOCL_vendorsdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(SOCL_vendorsdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(SOCL_vendorsdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(SOCL_vendorsdir)" || exit $$?; \ done uninstall-dist_SOCL_vendorsDATA: @$(NORMAL_UNINSTALL) @list='$(dist_SOCL_vendors_DATA)'; test -n "$(SOCL_vendorsdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(SOCL_vendorsdir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(DATA) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(SOCL_vendorsdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dist_SOCL_vendorsDATA install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-dist_SOCL_vendorsDATA .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am \ install-dist_SOCL_vendorsDATA install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags tags-am uninstall uninstall-am \ uninstall-dist_SOCL_vendorsDATA .PRECIOUS: Makefile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/socl/README000066400000000000000000000002451507764646700156310ustar00rootroot00000000000000StarPU's OpenCL interface ========================= This directory contains an OpenCL implementation that can be used as a replacement of the classic StarPU's API. starpu-1.4.9+dfsg/socl/examples/000077500000000000000000000000001507764646700165665ustar00rootroot00000000000000starpu-1.4.9+dfsg/socl/examples/Makefile.am000066400000000000000000000034341507764646700206260ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk include $(top_srcdir)/make/starpu-loader.mk AM_CFLAGS += $(MAGMA_CFLAGS) AM_CPPFLAGS = $(STARPU_H_CPPFLAGS) -DCL_TARGET_OPENCL_VERSION=120 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/socl/src/libsocl-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(STARPU_OPENCL_LDFLAGS) SOCL_EXAMPLES = if !STARPU_SIMGRID TESTS = $(SOCL_EXAMPLES) endif check_PROGRAMS = $(SOCL_EXAMPLES) CLEANFILES = *.gcno *.gcda starpu_idle_microsec.log examplebindir = $(libdir)/starpu/examples/socl/ examplebin_PROGRAMS = examplebin_PROGRAMS += \ basic/basic \ basicsplit/basicsplit \ testmap/testmap \ clinfo/clinfo \ matmul/matmul \ mandelbrot/mandelbrot \ mansched/mansched SOCL_EXAMPLES += \ basic/basic \ basicsplit/basicsplit \ testmap/testmap \ clinfo/clinfo \ matmul/matmul \ mansched/mansched matmul_matmul_LDADD = -lm #mandelbrot_mandelbrot_CPPFLAGS = $(AM_CPPFLAGS) #if STARPU_HAVE_X11 #mandelbrot_mandelbrot_CPPFLAGS += $(X_CFLAGS) #mandelbrot_mandelbrot_LDADD = $(X_PRE_LIBS) $(X_LIBS) -lX11 $(X_EXTRA_LIBS) #endif starpu-1.4.9+dfsg/socl/examples/Makefile.in000066400000000000000000002002471507764646700206400ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_2) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader @STARPU_SIMGRID_FALSE@TESTS = $(am__EXEEXT_1) check_PROGRAMS = $(am__EXEEXT_1) examplebin_PROGRAMS = basic/basic$(EXEEXT) \ basicsplit/basicsplit$(EXEEXT) testmap/testmap$(EXEEXT) \ clinfo/clinfo$(EXEEXT) matmul/matmul$(EXEEXT) \ mandelbrot/mandelbrot$(EXEEXT) mansched/mansched$(EXEEXT) subdir = socl/examples ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__EXEEXT_1 = basic/basic$(EXEEXT) basicsplit/basicsplit$(EXEEXT) \ testmap/testmap$(EXEEXT) clinfo/clinfo$(EXEEXT) \ matmul/matmul$(EXEEXT) mansched/mansched$(EXEEXT) am__installdirs = "$(DESTDIR)$(examplebindir)" @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_2 = loader$(EXEEXT) PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) basic_basic_SOURCES = basic/basic.c am__dirstamp = $(am__leading_dot)dirstamp basic_basic_OBJECTS = basic/basic.$(OBJEXT) basic_basic_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = basicsplit_basicsplit_SOURCES = basicsplit/basicsplit.c basicsplit_basicsplit_OBJECTS = basicsplit/basicsplit.$(OBJEXT) basicsplit_basicsplit_LDADD = $(LDADD) clinfo_clinfo_SOURCES = clinfo/clinfo.c clinfo_clinfo_OBJECTS = clinfo/clinfo.$(OBJEXT) clinfo_clinfo_LDADD = $(LDADD) loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) mandelbrot_mandelbrot_SOURCES = mandelbrot/mandelbrot.c mandelbrot_mandelbrot_OBJECTS = mandelbrot/mandelbrot.$(OBJEXT) mandelbrot_mandelbrot_LDADD = $(LDADD) mansched_mansched_SOURCES = mansched/mansched.c mansched_mansched_OBJECTS = mansched/mansched.$(OBJEXT) mansched_mansched_LDADD = $(LDADD) matmul_matmul_SOURCES = matmul/matmul.c matmul_matmul_OBJECTS = matmul/matmul.$(OBJEXT) matmul_matmul_DEPENDENCIES = testmap_testmap_SOURCES = testmap/testmap.c testmap_testmap_OBJECTS = testmap/testmap.$(OBJEXT) testmap_testmap_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ basic/$(DEPDIR)/basic.Po basicsplit/$(DEPDIR)/basicsplit.Po \ clinfo/$(DEPDIR)/clinfo.Po mandelbrot/$(DEPDIR)/mandelbrot.Po \ mansched/$(DEPDIR)/mansched.Po matmul/$(DEPDIR)/matmul.Po \ testmap/$(DEPDIR)/testmap.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = basic/basic.c basicsplit/basicsplit.c clinfo/clinfo.c \ loader.c mandelbrot/mandelbrot.c mansched/mansched.c \ matmul/matmul.c testmap/testmap.c DIST_SOURCES = basic/basic.c basicsplit/basicsplit.c clinfo/clinfo.c \ loader.c mandelbrot/mandelbrot.c mansched/mansched.c \ matmul/matmul.c testmap/testmap.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) AM_RECURSIVE_TARGETS = check recheck TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ \ $(top_builddir)/socl/src/libsocl-@STARPU_EFFECTIVE_VERSION@.la \ $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) \ $(STARPU_OPENCL_LDFLAGS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(am__append_4) $(am__append_6) LAUNCHER = $(am__append_3) $(am__append_5) # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(MAGMA_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 AM_CPPFLAGS = $(STARPU_H_CPPFLAGS) -DCL_TARGET_OPENCL_VERSION=120 AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ SOCL_EXAMPLES = basic/basic basicsplit/basicsplit testmap/testmap \ clinfo/clinfo matmul/matmul mansched/mansched CLEANFILES = *.gcno *.gcda starpu_idle_microsec.log examplebindir = $(libdir)/starpu/examples/socl/ matmul_matmul_LDADD = -lm all: all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign socl/examples/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign socl/examples/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list basic/$(am__dirstamp): @$(MKDIR_P) basic @: > basic/$(am__dirstamp) basic/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) basic/$(DEPDIR) @: > basic/$(DEPDIR)/$(am__dirstamp) basic/basic.$(OBJEXT): basic/$(am__dirstamp) \ basic/$(DEPDIR)/$(am__dirstamp) basic/basic$(EXEEXT): $(basic_basic_OBJECTS) $(basic_basic_DEPENDENCIES) $(EXTRA_basic_basic_DEPENDENCIES) basic/$(am__dirstamp) @rm -f basic/basic$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basic_basic_OBJECTS) $(basic_basic_LDADD) $(LIBS) basicsplit/$(am__dirstamp): @$(MKDIR_P) basicsplit @: > basicsplit/$(am__dirstamp) basicsplit/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) basicsplit/$(DEPDIR) @: > basicsplit/$(DEPDIR)/$(am__dirstamp) basicsplit/basicsplit.$(OBJEXT): basicsplit/$(am__dirstamp) \ basicsplit/$(DEPDIR)/$(am__dirstamp) basicsplit/basicsplit$(EXEEXT): $(basicsplit_basicsplit_OBJECTS) $(basicsplit_basicsplit_DEPENDENCIES) $(EXTRA_basicsplit_basicsplit_DEPENDENCIES) basicsplit/$(am__dirstamp) @rm -f basicsplit/basicsplit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(basicsplit_basicsplit_OBJECTS) $(basicsplit_basicsplit_LDADD) $(LIBS) clinfo/$(am__dirstamp): @$(MKDIR_P) clinfo @: > clinfo/$(am__dirstamp) clinfo/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) clinfo/$(DEPDIR) @: > clinfo/$(DEPDIR)/$(am__dirstamp) clinfo/clinfo.$(OBJEXT): clinfo/$(am__dirstamp) \ clinfo/$(DEPDIR)/$(am__dirstamp) clinfo/clinfo$(EXEEXT): $(clinfo_clinfo_OBJECTS) $(clinfo_clinfo_DEPENDENCIES) $(EXTRA_clinfo_clinfo_DEPENDENCIES) clinfo/$(am__dirstamp) @rm -f clinfo/clinfo$(EXEEXT) $(AM_V_CCLD)$(LINK) $(clinfo_clinfo_OBJECTS) $(clinfo_clinfo_LDADD) $(LIBS) loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) mandelbrot/$(am__dirstamp): @$(MKDIR_P) mandelbrot @: > mandelbrot/$(am__dirstamp) mandelbrot/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mandelbrot/$(DEPDIR) @: > mandelbrot/$(DEPDIR)/$(am__dirstamp) mandelbrot/mandelbrot.$(OBJEXT): mandelbrot/$(am__dirstamp) \ mandelbrot/$(DEPDIR)/$(am__dirstamp) mandelbrot/mandelbrot$(EXEEXT): $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_DEPENDENCIES) $(EXTRA_mandelbrot_mandelbrot_DEPENDENCIES) mandelbrot/$(am__dirstamp) @rm -f mandelbrot/mandelbrot$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mandelbrot_mandelbrot_OBJECTS) $(mandelbrot_mandelbrot_LDADD) $(LIBS) mansched/$(am__dirstamp): @$(MKDIR_P) mansched @: > mansched/$(am__dirstamp) mansched/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) mansched/$(DEPDIR) @: > mansched/$(DEPDIR)/$(am__dirstamp) mansched/mansched.$(OBJEXT): mansched/$(am__dirstamp) \ mansched/$(DEPDIR)/$(am__dirstamp) mansched/mansched$(EXEEXT): $(mansched_mansched_OBJECTS) $(mansched_mansched_DEPENDENCIES) $(EXTRA_mansched_mansched_DEPENDENCIES) mansched/$(am__dirstamp) @rm -f mansched/mansched$(EXEEXT) $(AM_V_CCLD)$(LINK) $(mansched_mansched_OBJECTS) $(mansched_mansched_LDADD) $(LIBS) matmul/$(am__dirstamp): @$(MKDIR_P) matmul @: > matmul/$(am__dirstamp) matmul/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) matmul/$(DEPDIR) @: > matmul/$(DEPDIR)/$(am__dirstamp) matmul/matmul.$(OBJEXT): matmul/$(am__dirstamp) \ matmul/$(DEPDIR)/$(am__dirstamp) matmul/matmul$(EXEEXT): $(matmul_matmul_OBJECTS) $(matmul_matmul_DEPENDENCIES) $(EXTRA_matmul_matmul_DEPENDENCIES) matmul/$(am__dirstamp) @rm -f matmul/matmul$(EXEEXT) $(AM_V_CCLD)$(LINK) $(matmul_matmul_OBJECTS) $(matmul_matmul_LDADD) $(LIBS) testmap/$(am__dirstamp): @$(MKDIR_P) testmap @: > testmap/$(am__dirstamp) testmap/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) testmap/$(DEPDIR) @: > testmap/$(DEPDIR)/$(am__dirstamp) testmap/testmap.$(OBJEXT): testmap/$(am__dirstamp) \ testmap/$(DEPDIR)/$(am__dirstamp) testmap/testmap$(EXEEXT): $(testmap_testmap_OBJECTS) $(testmap_testmap_DEPENDENCIES) $(EXTRA_testmap_testmap_DEPENDENCIES) testmap/$(am__dirstamp) @rm -f testmap/testmap$(EXEEXT) $(AM_V_CCLD)$(LINK) $(testmap_testmap_OBJECTS) $(testmap_testmap_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f basic/*.$(OBJEXT) -rm -f basicsplit/*.$(OBJEXT) -rm -f clinfo/*.$(OBJEXT) -rm -f mandelbrot/*.$(OBJEXT) -rm -f mansched/*.$(OBJEXT) -rm -f matmul/*.$(OBJEXT) -rm -f testmap/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basic/$(DEPDIR)/basic.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@basicsplit/$(DEPDIR)/basicsplit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@clinfo/$(DEPDIR)/clinfo.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mandelbrot/$(DEPDIR)/mandelbrot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@mansched/$(DEPDIR)/mansched.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@matmul/$(DEPDIR)/matmul.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@testmap/$(DEPDIR)/testmap.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf basic/.libs basic/_libs -rm -rf basicsplit/.libs basicsplit/_libs -rm -rf clinfo/.libs clinfo/_libs -rm -rf mandelbrot/.libs mandelbrot/_libs -rm -rf mansched/.libs mansched/_libs -rm -rf matmul/.libs matmul/_libs -rm -rf testmap/.libs testmap/_libs ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? basic/basic.log: basic/basic$(EXEEXT) @p='basic/basic$(EXEEXT)'; \ b='basic/basic'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) basicsplit/basicsplit.log: basicsplit/basicsplit$(EXEEXT) @p='basicsplit/basicsplit$(EXEEXT)'; \ b='basicsplit/basicsplit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) testmap/testmap.log: testmap/testmap$(EXEEXT) @p='testmap/testmap$(EXEEXT)'; \ b='testmap/testmap'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) clinfo/clinfo.log: clinfo/clinfo$(EXEEXT) @p='clinfo/clinfo$(EXEEXT)'; \ b='clinfo/clinfo'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) matmul/matmul.log: matmul/matmul$(EXEEXT) @p='matmul/matmul$(EXEEXT)'; \ b='matmul/matmul'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) mansched/mansched.log: mansched/mansched$(EXEEXT) @p='mansched/mansched$(EXEEXT)'; \ b='mansched/mansched'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-am all-am: Makefile $(PROGRAMS) installdirs: for dir in "$(DESTDIR)$(examplebindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f basic/$(DEPDIR)/$(am__dirstamp) -rm -f basic/$(am__dirstamp) -rm -f basicsplit/$(DEPDIR)/$(am__dirstamp) -rm -f basicsplit/$(am__dirstamp) -rm -f clinfo/$(DEPDIR)/$(am__dirstamp) -rm -f clinfo/$(am__dirstamp) -rm -f mandelbrot/$(DEPDIR)/$(am__dirstamp) -rm -f mandelbrot/$(am__dirstamp) -rm -f mansched/$(DEPDIR)/$(am__dirstamp) -rm -f mansched/$(am__dirstamp) -rm -f matmul/$(DEPDIR)/$(am__dirstamp) -rm -f matmul/$(am__dirstamp) -rm -f testmap/$(DEPDIR)/$(am__dirstamp) -rm -f testmap/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f basic/$(DEPDIR)/basic.Po -rm -f basicsplit/$(DEPDIR)/basicsplit.Po -rm -f clinfo/$(DEPDIR)/clinfo.Po -rm -f mandelbrot/$(DEPDIR)/mandelbrot.Po -rm -f mansched/$(DEPDIR)/mansched.Po -rm -f matmul/$(DEPDIR)/matmul.Po -rm -f testmap/$(DEPDIR)/testmap.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-examplebinPROGRAMS install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f basic/$(DEPDIR)/basic.Po -rm -f basicsplit/$(DEPDIR)/basicsplit.Po -rm -f clinfo/$(DEPDIR)/clinfo.Po -rm -f mandelbrot/$(DEPDIR)/mandelbrot.Po -rm -f mansched/$(DEPDIR)/mansched.Po -rm -f matmul/$(DEPDIR)/matmul.Po -rm -f testmap/$(DEPDIR)/testmap.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-examplebinPROGRAMS .MAKE: check-am install-am install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am \ install-examplebinPROGRAMS install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-examplebinPROGRAMS .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS #mandelbrot_mandelbrot_CPPFLAGS = $(AM_CPPFLAGS) #if STARPU_HAVE_X11 #mandelbrot_mandelbrot_CPPFLAGS += $(X_CFLAGS) #mandelbrot_mandelbrot_LDADD = $(X_PRE_LIBS) $(X_LIBS) -lX11 $(X_EXTRA_LIBS) #endif # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/socl/examples/basic/000077500000000000000000000000001507764646700176475ustar00rootroot00000000000000starpu-1.4.9+dfsg/socl/examples/basic/basic.c000066400000000000000000000164441507764646700211050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef __APPLE_CC__ #include #else #include #endif #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) #define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) #ifdef UNUSED #elif defined(__GNUC__) # define UNUSED(x) UNUSED_ ## x __attribute__((unused)) #else # define UNUSED(x) x #endif #define SIZE 1024 #define TYPE float #define REALSIZE (SIZE * sizeof(TYPE)) const char *kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \ size_t x = get_global_id(0);\n \ size_t y = get_global_id(1);\n \ size_t w = get_global_size(0); \n \ int idx = y*w+x; \n \ #ifdef SOCL_DEVICE_TYPE_GPU \n \ d[idx] = s1[idx] + s2[idx];\n \ #endif \n \ #ifdef SOCL_DEVICE_TYPE_CPU \n \ d[idx] = s1[idx] + 2* s2[idx];\n \ #endif \n \ #ifdef SOCL_DEVICE_TYPE_ACCELERATOR \n \ d[idx] = s1[idx] + 3 * s2[idx];\n \ #endif \n \ #ifdef SOCL_DEVICE_TYPE_UNKNOWN \n \ d[idx] = s1[idx] + 4 * s2[idx];\n \ #endif \n \ }"; int main(int UNUSED(argc), char** UNUSED(argv)) { cl_platform_id platforms[15]; cl_uint num_platforms; cl_device_id devices[15]; cl_uint num_devices; cl_context context; cl_program program; cl_kernel kernel; cl_mem s1m, s2m, dm; cl_command_queue cq; cl_int err; unsigned int i; TYPE s1[SIZE],s2[SIZE],d[SIZE]; { for (i=0; i #include #include #include #ifdef __APPLE_CC__ #include #else #include #endif #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) #define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) #ifdef UNUSED #elif defined(__GNUC__) # define UNUSED(x) UNUSED_ ## x __attribute__((unused)) #else # define UNUSED(x) x #endif #define SIZE 1024 #define TYPE float #define REALSIZE (SIZE * sizeof(TYPE)) const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \ size_t x = get_global_id(0);\n \ size_t y = get_global_id(1);\n \ size_t w = get_global_size(0); \n \ int idx = y*w+x; \n \ #ifdef SOCL_DEVICE_TYPE_GPU \n \ d[idx] = s1[idx] + s2[idx];\n \ #endif \n \ #ifdef SOCL_DEVICE_TYPE_CPU \n \ d[idx] = s1[idx] + 2* s2[idx];\n \ #endif \n \ #ifdef SOCL_DEVICE_TYPE_ACCELERATOR \n \ d[idx] = s1[idx] + 3 * s2[idx];\n \ #endif \n \ #ifdef SOCL_DEVICE_TYPE_UNKNOWN \n \ d[idx] = s1[idx] + 4 * s2[idx];\n \ #endif \n \ }"; cl_kernel kernel; cl_context context; TYPE s1[SIZE],s2[SIZE],d[SIZE]; typedef cl_int (*split_func_t)(cl_command_queue, cl_uint, cl_uint, const size_t *, const size_t *, const size_t *, const cl_event, cl_event *); void add(cl_command_queue cq, cl_uint size, TYPE * _s1, TYPE *_s2, TYPE*_d, cl_uint num_events, cl_event * events, cl_event *event) { cl_int err; printf("Creating buffers...\n"); cl_mem s1m = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, size * sizeof(TYPE), _s1, &err); check(err, "clCreateBuffer s1"); cl_mem s2m = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, size * sizeof(TYPE), _s2, &err); check(err, "clCreateBuffer s2"); cl_mem dm = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, size * sizeof(TYPE), _d, &err); check(err, "clCreateBuffer d"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &s1m); check(err, "clSetKernelArg 0"); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &s2m); check(err, "clSetKernelArg 1"); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dm); check(err, "clSetKernelArg 2"); printf("Enqueueing NDRangeKernel...\n"); size_t local[3] = {16, 1, 1}; size_t global[3] = {size, 1, 1}; cl_event eventK; err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, global, local, num_events, events, &eventK); check(err, "clEnqueueNDRangeKernel"); clEnqueueMapBuffer(cq, dm, CL_FALSE, CL_MAP_READ, 0, size * sizeof(TYPE), 1, &eventK, event, &err); check(err, "clEnqueueMapBuffer"); clReleaseMemObject(s1m); clReleaseMemObject(s2m); clReleaseMemObject(dm); } cl_int split_func(cl_command_queue cq, cl_uint split_factor, void * UNUSED(data), cl_event before, cl_event * after) { cl_event evs[split_factor]; printf("Partition with factor %d\n", split_factor); cl_uint size = ((SIZE)/split_factor) - (SIZE/split_factor % 16); cl_uint i; for (i=0; i #include #ifdef __APPLE_CC__ #include #else #include #endif static inline void checkErr(cl_int err, const char * name) { if (err != CL_SUCCESS) { fprintf(stderr, "ERROR: %s (%d)\n", name, err); exit(1); } } int main(void) { cl_int err; cl_uint num_platforms; // Platform info err = clGetPlatformIDs(0, NULL, &num_platforms); if (num_platforms == 0) { printf("No OpenCL platform found.\n"); exit(77); } checkErr(err, "Unable to get platform count"); cl_platform_id platforms[num_platforms]; err = clGetPlatformIDs(num_platforms, platforms, NULL); checkErr(err, "Unable to get platform list"); // Iteratate over platforms printf("Number of platforms:\t\t\t\t %d\n", num_platforms); { unsigned int i; for (i=0; i #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/socl/examples/mandelbrot/000077500000000000000000000000001507764646700207155ustar00rootroot00000000000000starpu-1.4.9+dfsg/socl/examples/mandelbrot/mandelbrot.c000066400000000000000000000312761507764646700232210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include /* Uncomment this to activate X11 display */ //#define USE_X11 #define SHORT_LOG 1 #define ROUND_ROBIN #ifdef USE_X11 #include #include int use_x11 = 1; #else int use_x11 = 0; #endif int demo = 0; int frames = -1; #include #include #include #ifdef __APPLE_CC__ #include #else #include #endif #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) #define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) #ifdef UNUSED #elif defined(__GNUC__) # define UNUSED(x) UNUSED_ ## x __attribute__((unused)) #else # define UNUSED(x) x #endif const char * kernel_src = "\ #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n\ #define TYPE double \n\ #define MIN(a,b) (((a)<(b))? (a) : (b))\n\ __kernel void mandelbrot_kernel(__global uint * a,\n \ TYPE leftX, TYPE topY,\n \ TYPE stepX, TYPE stepY,\n \ uint maxIt, uint iby, uint block_size)\n \ {\n \ TYPE xc = leftX + get_global_id(0) * stepX;\n \ TYPE yc = iby*block_size*stepY + topY + get_global_id(1) * stepY;\n \ int it;\n \ TYPE x,y;\n \ x = y = (TYPE)0.0;\n \ for (it=0;it (TYPE)4) break; \n \ TYPE twoxy = (TYPE)2*x*y;\n \ x = x2 - y2 + xc;\n \ y = twoxy + yc;\n \ }\n \ uint v = MIN((1024*((float)(it)/(2000))), 256);\n \ a[get_global_id(0) + get_global_id(1)*get_global_size(0)] = (v<<16|(255-v)<<8); \n \ }"; static cl_uint nblocks = 8; static cl_uint height = 768; static cl_uint width = 1024; static cl_uint maxIt = 20000; static cl_uint group_size = 64; static double leftX = -0.745; static double rightX = -0.74375; static double topY = .15; static double bottomY = .14875; #ifdef USE_X11 /* X11 data */ static Display *dpy; static Window win; static XImage *bitmap; static GC gc; static KeySym Left=-1, Right, Down, Up, Alt ; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; static void exit_x11(void) { XDestroyImage(bitmap); XDestroyWindow(dpy, win); XCloseDisplay(dpy); } static void init_x11(int width, int height, cl_uint *buffer) { /* Attempt to open the display */ dpy = XOpenDisplay(NULL); /* Failure */ if (!dpy) exit(0); unsigned long white = WhitePixel(dpy,DefaultScreen(dpy)); unsigned long black = BlackPixel(dpy,DefaultScreen(dpy)); win = XCreateSimpleWindow(dpy, DefaultRootWindow(dpy), 0, 0, width, height, 0, black, white); /* We want to be notified when the window appears */ XSelectInput(dpy, win, StructureNotifyMask); /* Make it appear */ XMapWindow(dpy, win); XTextProperty tp; char name[128] = "Mandelbrot"; char *n = name; Status st = XStringListToTextProperty(&n, 1, &tp); if (st) XSetWMName(dpy, win, &tp); /* Wait for the MapNotify event */ XFlush(dpy); int depth = DefaultDepth(dpy, DefaultScreen(dpy)); Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy)); /* Make bitmap */ bitmap = XCreateImage(dpy, visual, depth, ZPixmap, 0, (char *)buffer, width, height, 32, 0); /* Init GC */ gc = XCreateGC(dpy, win, 0, NULL); XSetForeground(dpy, gc, black); XSelectInput(dpy, win, ExposureMask | KeyPressMask | StructureNotifyMask); Atom wmDeleteMessage; wmDeleteMessage = XInternAtom(dpy, "WM_DELETE_WINDOW", False); XSetWMProtocols(dpy, win, &wmDeleteMessage, 1); Left = XStringToKeysym ("Left"); Right = XStringToKeysym ("Right"); Up = XStringToKeysym ("Up"); Down = XStringToKeysym ("Down"); Alt = XStringToKeysym ("Alt"); } static int handle_events(void) { XEvent event; XNextEvent(dpy, &event); KeySym key; char text[255]; double coef = 0.05; if (event.type == KeyPress) { XLookupString(&event.xkey,text,255,&key,0); if (key == Left) { double widthX = rightX - leftX; leftX -= coef*widthX; rightX -= coef*widthX; } else if (key == Right) { double widthX = rightX - leftX; leftX += coef*widthX; rightX += coef*widthX; } else if (key == Down) { double heightY = topY - bottomY; topY += coef*heightY; bottomY += coef*heightY; } else if (key == Up) { double heightY = topY - bottomY; topY -= coef*heightY; bottomY -= coef*heightY; } else { double widthX = rightX - leftX; double heightY = topY - bottomY; if (text[0] == '-') { /* Zoom out */ leftX -= (coef/2)*widthX; rightX += (coef/2)*widthX; topY += (coef/2)*heightY; bottomY -= (coef/2)*heightY; } else if (text[0] == '+') { /* Zoom in */ leftX += (coef/2)*widthX; rightX -= (coef/2)*widthX; topY -= (coef/2)*heightY; bottomY += (coef/2)*heightY; } } if (text[0]=='q') { return -1; } } if (event.type==ButtonPress) { /* tell where the mouse Button was Pressed */ printf("You pressed a button at (%i,%i)\n", event.xbutton.x,event.xbutton.y); } return 0; } #endif //USE_X11 static void parse_args(int argc, char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-h") == 0) { fprintf(stderr, "Usage: %s [-h] [ -width 1024] [-height 768] [-nblocks 16] [-group_size 64] [-no-x11] [-demo] [-frames N] [-pos leftx:rightx:bottomy:topy]\n", argv[0]); exit(-1); } if (strcmp(argv[i], "-width") == 0) { char *argptr; width = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-frames") == 0) { char *argptr; frames = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-height") == 0) { char *argptr; height = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-group_size") == 0) { char *argptr; group_size = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks") == 0) { char *argptr; nblocks = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-pos") == 0) { int ret = sscanf(argv[++i], "%lf:%lf:%lf:%lf", &leftX, &rightX, &bottomY, &topY); assert(ret == 4); } if (strcmp(argv[i], "-demo") == 0) { demo = 1; leftX = -50.22749575062760; rightX = 48.73874621262927; topY = -49.35016705749115; bottomY = 49.64891691946615; } if (strcmp(argv[i], "-no-x11") == 0) { #ifdef USE_X11 use_x11 = 0; #endif } } } int main(int argc, char **argv) { #define MAX_DEVICES 20 cl_platform_id platforms[15]; cl_uint num_platforms; cl_device_id devices[15]; cl_uint num_devices; cl_context context; cl_program program; cl_kernel kernel; cl_command_queue cq[MAX_DEVICES]; cl_int err; cl_uint i; parse_args(argc, argv); cl_uint block_size = height/nblocks; assert((height % nblocks) == 0); assert((width % group_size) == 0); clGetPlatformIDs(0, NULL, &num_platforms); if (num_platforms == 0) { printf("No OpenCL platform found\n"); exit(0); } err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL); check(err, "clGetPlatformIDs"); unsigned int platform_idx; for (platform_idx=0; platform_idx #include #include #include #ifdef __APPLE_CC__ #include #else #include #endif #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) #define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) #ifdef UNUSED #elif defined(__GNUC__) # define UNUSED(x) UNUSED_ ## x __attribute__((unused)) #else # define UNUSED(x) x #endif #define SIZE 1024 #define TYPE float #define REALSIZE (SIZE * sizeof(TYPE)) const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \ size_t x = get_global_id(0); \ size_t y = get_global_id(1); \ size_t w = get_global_size(0); \ int idx = y*w+x; \ d[idx] = s1[idx] + s2[idx]; \ }"; int main(int UNUSED(argc), char** UNUSED(argv)) { cl_platform_id platforms[15]; cl_uint num_platforms; cl_device_id devices[15]; cl_uint num_devices; cl_context context; cl_program program; cl_kernel kernel; cl_mem s1m, s2m, dm; cl_command_queue cq; unsigned int d; cl_int err; TYPE s1[SIZE],s2[SIZE],dst[SIZE]; { int i; for (i=0; i #else #include #endif #include #include #include #include #include #include #include #include #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) #define check(exp) do { err = exp; if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): " #exp "\n", err); exit(EXIT_FAILURE); }} while(0) #define check2(exp) exp; if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): " #exp "\n", err); exit(EXIT_FAILURE); } #define check3(exp, err) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): " #exp "\n", err); exit(EXIT_FAILURE); } } while(0) // Thread block size #define BLOCK_SIZE 16 // Kernel thread-block size #define WORK_SIZE 64 // Kernel global size in lines of A (or C) #define TYPE float // Basic Matrix dimensions #define WA (128L * BLOCK_SIZE) // Matrix A width #ifdef STARPU_QUICK_CHECK #define HA (128L * BLOCK_SIZE) // Matrix A height #else #define HA (512L * BLOCK_SIZE) // Matrix A height #endif #define WB (128L * BLOCK_SIZE) // Matrix B width #define HB WA // Matrix B height #define WC WB // Matrix C width #define HC HA // Matrix C height #define BLOCKS (HA / WORK_SIZE) //////////////////////////////////////////////////////////////////////////////// // declaration, forward void printDiff(TYPE*, TYPE*, int, int, int, TYPE); void computeReference(TYPE*, const TYPE*, const TYPE*, unsigned int, unsigned int, unsigned int); #define str(x) #x #define CODE "\ #define TYPE float\n\ __kernel void sgemmNN(int wa, int ha, int wb, __global TYPE* A, __global TYPE* B, __global TYPE* C) {\n\ #define BS 16\n \ #define BLOCK_SIZE 16\n \ int bx = get_group_id(0);\n \ int by = get_group_id(1);\n \ \n \ int tx = get_local_id(0);\n \ int ty = get_local_id(1);\n \ \n \ int gx = get_global_id(0);\n \ int gy = get_global_id(1);\n \ __local float As[BS][BS+1]; \ __local float Bs[BS][BS+1]; \ \n \ unsigned int block_w = min(wb - bx * BLOCK_SIZE, BLOCK_SIZE);\n \ unsigned int block_h = min(ha - by * BLOCK_SIZE, BLOCK_SIZE);\n \ \n \ int valid = (gx < wb && gy < ha);\n \ \n \ TYPE Csub = (TYPE)0.0;\n \ \n \ int pos = 0;\n \ while (pos < wa) {\n \ unsigned int size = min(wa-pos, BLOCK_SIZE);\n \ if (tx < size && gy < ha)\n \ As[tx][ty] = A[pos + tx + wa * gy];\n \ if (ty < size && gx < wb)\n \ Bs[tx][ty] = B[gx + wb * (pos+ty)];\n \ \n \ barrier(CLK_LOCAL_MEM_FENCE);\n \ \n \ if (valid) {\n \ for (int k = 0; k < size; ++k)\n \ Csub += As[k][ty] * Bs[tx][k];\n \ }\n \ pos += size;\n \ barrier(CLK_LOCAL_MEM_FENCE);\n \ }\n \ \n \ if (valid)\n \ C[wb * gy + gx] = Csub;\n \ }" static char * code = CODE; int check = 0; static void __attribute__((unused)) parse_args(int argc, const char **argv) { int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-check") == 0) { check = 1; } if (strcmp(argv[i], "-h") == 0) { printf("usage : %s [-check]\n", argv[0]); } } } // Round Up Division function size_t roundUp(int group_size, int global_size) { int r = global_size % group_size; if(r == 0) { return global_size; } else { return global_size + group_size - r; } } void fillArray(TYPE* data, int size) { int i; const TYPE fScale = (TYPE)(1.0f / (float)RAND_MAX); for (i = 0; i < size; ++i) { data[i] = fScale * rand(); } } void printArray(float* data, int size) { int i; for (i = 0; i < size; ++i) { printf("%d: %.3f\n", i, data[i]); } } /** * Compare two float arrays using L2-norm with an epsilon tolerance for equality * @return shrTRUE if \a reference and \a data are identical, otherwise shrFALSE * @param reference handle to the reference data / gold image * @param data handle to the computed data * @param len number of elements in reference and data * @param epsilon epsilon to use for the comparison */ int shrCompareL2fe(const float* reference, const float* data, const unsigned int len, const float epsilon) { assert(epsilon >= 0); float error = 0; float ref = 0; unsigned int i; for(i = 0; i < len; ++i) { float diff = reference[i] - data[i]; error += diff * diff; ref += reference[i] * reference[i]; } float normRef = sqrtf(ref); if (fabs(ref) < 1e-7) { #ifdef _DEBUG fprintf(stderr, "ERROR, reference l2-norm is 0\n"); #endif return 0; } float normError = sqrtf(error); error = normError / normRef; int result = error < epsilon; #ifdef _DEBUG if(!result) { fprintf(stderr, "ERROR, l2-norm error %lf is greater than epsilon %lf \n", error, epsilon); } #endif return result; } int main(int argc, const char** argv) { cl_uint platform_count; cl_platform_id platforms[5]; cl_int err = CL_SUCCESS; unsigned int i, p; cl_device_type dev_type = CL_DEVICE_TYPE_ALL; void * ptrs[BLOCKS]; cl_command_queue cqs[BLOCKS]; cl_mem d_A[BLOCKS]; cl_mem d_C[BLOCKS]; cl_mem d_B[BLOCKS]; cl_event GPUDone[BLOCKS]; cl_event GPUExecution[BLOCKS]; struct timeval start, end; int workOffset[BLOCKS]; int workSize[BLOCKS]; unsigned int sizePerGPU = HC / BLOCKS; unsigned int sizeMod = HC % BLOCKS; size_t A_size = WA * HA; size_t A_mem_size = sizeof(TYPE) * A_size; TYPE* A_data; size_t B_size = WB * HB; size_t B_mem_size = sizeof(TYPE) * B_size; TYPE* B_data; size_t C_size = WC * HC; size_t C_mem_size = sizeof(TYPE) * C_size; TYPE* C_data; parse_args(argc, argv); check(clGetPlatformIDs(5, platforms, &platform_count)); if (platform_count == 0) { printf("No platform found\n"); exit(77); } cl_uint device_count; cl_uint devs[platform_count]; cl_device_id * devices[platform_count]; cl_context ctx[platform_count]; cl_command_queue * commandQueue[platform_count]; device_count = 0; for (p=0; p %.6f...\n", listLength, listTol); int i,j,k; int error_count=0; for (j = 0; j < height; j++) { if (error_count < listLength) { printf("\n Row %d:\n", j); } for (i = 0; i < width; i++) { k = j * width + i; float diff = fabs(data1[k] - data2[k]); if (diff > listTol) { if (error_count < listLength) { printf(" Loc(%d,%d)\tCPU=%.5f\tGPU=%.5f\tDiff=%.6f\n", i, j, data1[k], data2[k], diff); } error_count++; } } } printf(" \n Total Errors = %d\n\n", error_count); } /** * Compute reference data set * C = A * B * @param C reference data, computed but preallocated * @param A matrix A as provided to device * @param B matrix B as provided to device * @param hA height of matrix A * @param wB width of matrix B */ void computeReference(TYPE* C, const TYPE* A, const TYPE* B, unsigned int hA, unsigned int wA, unsigned int wB) { unsigned int i,j,k; for (i = 0; i < hA; ++i) for (j = 0; j < wB; ++j) { double sum = 0; for (k = 0; k < wA; ++k) { double a = A[i * wA + k]; double b = B[k * wB + j]; sum += a * b; } C[i * wB + j] = (TYPE)sum; } } #endif /* STARPU_NON_BLOCKING_DRIVERS */ starpu-1.4.9+dfsg/socl/examples/testmap/000077500000000000000000000000001507764646700202435ustar00rootroot00000000000000starpu-1.4.9+dfsg/socl/examples/testmap/testmap.c000066400000000000000000000166121507764646700220720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef __APPLE_CC__ #include #else #include #endif #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) #define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) #ifdef UNUSED #elif defined(__GNUC__) # define UNUSED(x) UNUSED_ ## x __attribute__((unused)) #else # define UNUSED(x) x #endif #define SIZE 1024 #define TYPE float #define REALSIZE (SIZE * sizeof(TYPE)) const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \ size_t x = get_global_id(0);\n \ size_t y = get_global_id(1);\n \ size_t w = get_global_size(0); \n \ int idx = y*w+x; \n \ #ifdef SOCL_DEVICE_TYPE_GPU \n \ d[idx] = s1[idx] + s2[idx];\n \ #endif \n \ #ifdef SOCL_DEVICE_TYPE_CPU \n \ d[idx] = s1[idx] + 2* s2[idx];\n \ #endif \n \ #ifdef SOCL_DEVICE_TYPE_ACCELERATOR \n \ d[idx] = s1[idx] + 3 * s2[idx];\n \ #endif \n \ #ifdef SOCL_DEVICE_TYPE_UNKNOWN \n \ d[idx] = s1[idx] + 4 * s2[idx];\n \ #endif \n \ }"; int main(int UNUSED(argc), char** UNUSED(argv)) { cl_platform_id platforms[15]; cl_uint num_platforms; cl_device_id devices[15]; cl_uint num_devices; cl_context context; cl_program program; cl_kernel kernel; cl_mem s1m, s2m, dm; cl_command_queue cq; cl_int err; unsigned int i; TYPE * s1, *s2, d[SIZE]; printf("Querying platform...\n"); clGetPlatformIDs(0, NULL, &num_platforms); if (num_platforms == 0) { printf("No OpenCL platform found.\n"); exit(77); } err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, &num_platforms); check(err, "clGetPlatformIDs"); int platform_idx = -1; for (i=0; i #else #include "cl_platform.h" #endif #ifdef __cplusplus extern "C" { #endif /******************************************************************************/ typedef struct _cl_platform_id * cl_platform_id; typedef struct _cl_device_id * cl_device_id; typedef struct _cl_context * cl_context; typedef struct _cl_command_queue * cl_command_queue; typedef struct _cl_mem * cl_mem; typedef struct _cl_program * cl_program; typedef struct _cl_kernel * cl_kernel; typedef struct _cl_event * cl_event; typedef struct _cl_sampler * cl_sampler; typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ typedef cl_ulong cl_bitfield; typedef cl_bitfield cl_device_type; typedef cl_uint cl_platform_info; typedef cl_uint cl_device_info; typedef cl_bitfield cl_device_fp_config; typedef cl_uint cl_device_mem_cache_type; typedef cl_uint cl_device_local_mem_type; typedef cl_bitfield cl_device_exec_capabilities; typedef cl_bitfield cl_command_queue_properties; typedef intptr_t cl_device_partition_property; typedef cl_bitfield cl_device_affinity_domain; typedef intptr_t cl_context_properties; typedef cl_uint cl_context_info; typedef cl_uint cl_command_queue_info; typedef cl_uint cl_channel_order; typedef cl_uint cl_channel_type; typedef cl_bitfield cl_mem_flags; typedef cl_uint cl_mem_object_type; typedef cl_uint cl_mem_info; typedef cl_bitfield cl_mem_migration_flags; typedef cl_uint cl_image_info; typedef cl_uint cl_buffer_create_type; typedef cl_uint cl_addressing_mode; typedef cl_uint cl_filter_mode; typedef cl_uint cl_sampler_info; typedef cl_bitfield cl_map_flags; typedef cl_uint cl_program_info; typedef cl_uint cl_program_build_info; typedef cl_uint cl_program_binary_type; typedef cl_int cl_build_status; typedef cl_uint cl_kernel_info; typedef cl_uint cl_kernel_arg_info; typedef cl_uint cl_kernel_arg_address_qualifier; typedef cl_uint cl_kernel_arg_access_qualifier; typedef cl_bitfield cl_kernel_arg_type_qualifier; typedef cl_uint cl_kernel_work_group_info; typedef cl_uint cl_event_info; typedef cl_uint cl_command_type; typedef cl_uint cl_profiling_info; typedef struct _cl_image_format { cl_channel_order image_channel_order; cl_channel_type image_channel_data_type; } cl_image_format; typedef struct _cl_image_desc { cl_mem_object_type image_type; size_t image_width; size_t image_height; size_t image_depth; size_t image_array_size; size_t image_row_pitch; size_t image_slice_pitch; cl_uint num_mip_levels; cl_uint num_samples; cl_mem buffer; } cl_image_desc; typedef struct _cl_buffer_region { size_t origin; size_t size; } cl_buffer_region; /******************************************************************************/ /* Error Codes */ #define CL_SUCCESS 0 #define CL_DEVICE_NOT_FOUND -1 #define CL_DEVICE_NOT_AVAILABLE -2 #define CL_COMPILER_NOT_AVAILABLE -3 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 #define CL_OUT_OF_RESOURCES -5 #define CL_OUT_OF_HOST_MEMORY -6 #define CL_PROFILING_INFO_NOT_AVAILABLE -7 #define CL_MEM_COPY_OVERLAP -8 #define CL_IMAGE_FORMAT_MISMATCH -9 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 #define CL_BUILD_PROGRAM_FAILURE -11 #define CL_MAP_FAILURE -12 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 #define CL_COMPILE_PROGRAM_FAILURE -15 #define CL_LINKER_NOT_AVAILABLE -16 #define CL_LINK_PROGRAM_FAILURE -17 #define CL_DEVICE_PARTITION_FAILED -18 #define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 #define CL_INVALID_VALUE -30 #define CL_INVALID_DEVICE_TYPE -31 #define CL_INVALID_PLATFORM -32 #define CL_INVALID_DEVICE -33 #define CL_INVALID_CONTEXT -34 #define CL_INVALID_QUEUE_PROPERTIES -35 #define CL_INVALID_COMMAND_QUEUE -36 #define CL_INVALID_HOST_PTR -37 #define CL_INVALID_MEM_OBJECT -38 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 #define CL_INVALID_IMAGE_SIZE -40 #define CL_INVALID_SAMPLER -41 #define CL_INVALID_BINARY -42 #define CL_INVALID_BUILD_OPTIONS -43 #define CL_INVALID_PROGRAM -44 #define CL_INVALID_PROGRAM_EXECUTABLE -45 #define CL_INVALID_KERNEL_NAME -46 #define CL_INVALID_KERNEL_DEFINITION -47 #define CL_INVALID_KERNEL -48 #define CL_INVALID_ARG_INDEX -49 #define CL_INVALID_ARG_VALUE -50 #define CL_INVALID_ARG_SIZE -51 #define CL_INVALID_KERNEL_ARGS -52 #define CL_INVALID_WORK_DIMENSION -53 #define CL_INVALID_WORK_GROUP_SIZE -54 #define CL_INVALID_WORK_ITEM_SIZE -55 #define CL_INVALID_GLOBAL_OFFSET -56 #define CL_INVALID_EVENT_WAIT_LIST -57 #define CL_INVALID_EVENT -58 #define CL_INVALID_OPERATION -59 #define CL_INVALID_GL_OBJECT -60 #define CL_INVALID_BUFFER_SIZE -61 #define CL_INVALID_MIP_LEVEL -62 #define CL_INVALID_GLOBAL_WORK_SIZE -63 #define CL_INVALID_PROPERTY -64 #define CL_INVALID_IMAGE_DESCRIPTOR -65 #define CL_INVALID_COMPILER_OPTIONS -66 #define CL_INVALID_LINKER_OPTIONS -67 #define CL_INVALID_DEVICE_PARTITION_COUNT -68 /* OpenCL Version */ #define CL_VERSION_1_0 1 #define CL_VERSION_1_1 1 #define CL_VERSION_1_2 1 /* cl_bool */ #define CL_FALSE 0 #define CL_TRUE 1 #define CL_BLOCKING CL_TRUE #define CL_NON_BLOCKING CL_FALSE /* cl_platform_info */ #define CL_PLATFORM_PROFILE 0x0900 #define CL_PLATFORM_VERSION 0x0901 #define CL_PLATFORM_NAME 0x0902 #define CL_PLATFORM_VENDOR 0x0903 #define CL_PLATFORM_EXTENSIONS 0x0904 /* cl_device_type - bitfield */ #define CL_DEVICE_TYPE_DEFAULT (1 << 0) #define CL_DEVICE_TYPE_CPU (1 << 1) #define CL_DEVICE_TYPE_GPU (1 << 2) #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) #define CL_DEVICE_TYPE_CUSTOM (1 << 4) #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF /* cl_device_info */ #define CL_DEVICE_TYPE 0x1000 #define CL_DEVICE_VENDOR_ID 0x1001 #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 #define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B #define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C #define CL_DEVICE_ADDRESS_BITS 0x100D #define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F #define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 #define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 #define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 #define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 #define CL_DEVICE_IMAGE_SUPPORT 0x1016 #define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 #define CL_DEVICE_MAX_SAMPLERS 0x1018 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A #define CL_DEVICE_SINGLE_FP_CONFIG 0x101B #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E #define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 #define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 #define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 #define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 #define CL_DEVICE_ENDIAN_LITTLE 0x1026 #define CL_DEVICE_AVAILABLE 0x1027 #define CL_DEVICE_COMPILER_AVAILABLE 0x1028 #define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 #define CL_DEVICE_QUEUE_PROPERTIES 0x102A #define CL_DEVICE_NAME 0x102B #define CL_DEVICE_VENDOR 0x102C #define CL_DRIVER_VERSION 0x102D #define CL_DEVICE_PROFILE 0x102E #define CL_DEVICE_VERSION 0x102F #define CL_DEVICE_EXTENSIONS 0x1030 #define CL_DEVICE_PLATFORM 0x1031 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 /* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C #define CL_DEVICE_OPENCL_C_VERSION 0x103D #define CL_DEVICE_LINKER_AVAILABLE 0x103E #define CL_DEVICE_BUILT_IN_KERNELS 0x103F #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 #define CL_DEVICE_PARENT_DEVICE 0x1042 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 #define CL_DEVICE_PARTITION_PROPERTIES 0x1044 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 #define CL_DEVICE_PARTITION_TYPE 0x1046 #define CL_DEVICE_REFERENCE_COUNT 0x1047 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 #define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 /* cl_device_fp_config - bitfield */ #define CL_FP_DENORM (1 << 0) #define CL_FP_INF_NAN (1 << 1) #define CL_FP_ROUND_TO_NEAREST (1 << 2) #define CL_FP_ROUND_TO_ZERO (1 << 3) #define CL_FP_ROUND_TO_INF (1 << 4) #define CL_FP_FMA (1 << 5) #define CL_FP_SOFT_FLOAT (1 << 6) #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) /* cl_device_mem_cache_type */ #define CL_NONE 0x0 #define CL_READ_ONLY_CACHE 0x1 #define CL_READ_WRITE_CACHE 0x2 /* cl_device_local_mem_type */ #define CL_LOCAL 0x1 #define CL_GLOBAL 0x2 /* cl_device_exec_capabilities - bitfield */ #define CL_EXEC_KERNEL (1 << 0) #define CL_EXEC_NATIVE_KERNEL (1 << 1) /* cl_command_queue_properties - bitfield */ #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) #define CL_QUEUE_PROFILING_ENABLE (1 << 1) /* cl_context_info */ #define CL_CONTEXT_REFERENCE_COUNT 0x1080 #define CL_CONTEXT_DEVICES 0x1081 #define CL_CONTEXT_PROPERTIES 0x1082 #define CL_CONTEXT_NUM_DEVICES 0x1083 /* cl_context_properties */ #define CL_CONTEXT_PLATFORM 0x1084 #define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 /* cl_device_partition_property */ #define CL_DEVICE_PARTITION_EQUALLY 0x1086 #define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 /* cl_device_affinity_domain */ #define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) /* cl_command_queue_info */ #define CL_QUEUE_CONTEXT 0x1090 #define CL_QUEUE_DEVICE 0x1091 #define CL_QUEUE_REFERENCE_COUNT 0x1092 #define CL_QUEUE_PROPERTIES 0x1093 /* cl_mem_flags - bitfield */ #define CL_MEM_READ_WRITE (1 << 0) #define CL_MEM_WRITE_ONLY (1 << 1) #define CL_MEM_READ_ONLY (1 << 2) #define CL_MEM_USE_HOST_PTR (1 << 3) #define CL_MEM_ALLOC_HOST_PTR (1 << 4) #define CL_MEM_COPY_HOST_PTR (1 << 5) // reserved (1 << 6) #define CL_MEM_HOST_WRITE_ONLY (1 << 7) #define CL_MEM_HOST_READ_ONLY (1 << 8) #define CL_MEM_HOST_NO_ACCESS (1 << 9) /* cl_mem_migration_flags - bitfield */ #define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) #define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) /* cl_channel_order */ #define CL_R 0x10B0 #define CL_A 0x10B1 #define CL_RG 0x10B2 #define CL_RA 0x10B3 #define CL_RGB 0x10B4 #define CL_RGBA 0x10B5 #define CL_BGRA 0x10B6 #define CL_ARGB 0x10B7 #define CL_INTENSITY 0x10B8 #define CL_LUMINANCE 0x10B9 #define CL_Rx 0x10BA #define CL_RGx 0x10BB #define CL_RGBx 0x10BC /* cl_channel_type */ #define CL_SNORM_INT8 0x10D0 #define CL_SNORM_INT16 0x10D1 #define CL_UNORM_INT8 0x10D2 #define CL_UNORM_INT16 0x10D3 #define CL_UNORM_SHORT_565 0x10D4 #define CL_UNORM_SHORT_555 0x10D5 #define CL_UNORM_INT_101010 0x10D6 #define CL_SIGNED_INT8 0x10D7 #define CL_SIGNED_INT16 0x10D8 #define CL_SIGNED_INT32 0x10D9 #define CL_UNSIGNED_INT8 0x10DA #define CL_UNSIGNED_INT16 0x10DB #define CL_UNSIGNED_INT32 0x10DC #define CL_HALF_FLOAT 0x10DD #define CL_FLOAT 0x10DE /* cl_mem_object_type */ #define CL_MEM_OBJECT_BUFFER 0x10F0 #define CL_MEM_OBJECT_IMAGE2D 0x10F1 #define CL_MEM_OBJECT_IMAGE3D 0x10F2 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 #define CL_MEM_OBJECT_IMAGE1D 0x10F4 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 /* cl_mem_info */ #define CL_MEM_TYPE 0x1100 #define CL_MEM_FLAGS 0x1101 #define CL_MEM_SIZE 0x1102 #define CL_MEM_HOST_PTR 0x1103 #define CL_MEM_MAP_COUNT 0x1104 #define CL_MEM_REFERENCE_COUNT 0x1105 #define CL_MEM_CONTEXT 0x1106 #define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 #define CL_MEM_OFFSET 0x1108 /* cl_image_info */ #define CL_IMAGE_FORMAT 0x1110 #define CL_IMAGE_ELEMENT_SIZE 0x1111 #define CL_IMAGE_ROW_PITCH 0x1112 #define CL_IMAGE_SLICE_PITCH 0x1113 #define CL_IMAGE_WIDTH 0x1114 #define CL_IMAGE_HEIGHT 0x1115 #define CL_IMAGE_DEPTH 0x1116 #define CL_IMAGE_ARRAY_SIZE 0x1117 #define CL_IMAGE_BUFFER 0x1118 #define CL_IMAGE_NUM_MIP_LEVELS 0x1119 #define CL_IMAGE_NUM_SAMPLES 0x111A /* cl_addressing_mode */ #define CL_ADDRESS_NONE 0x1130 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 #define CL_ADDRESS_CLAMP 0x1132 #define CL_ADDRESS_REPEAT 0x1133 #define CL_ADDRESS_MIRRORED_REPEAT 0x1134 /* cl_filter_mode */ #define CL_FILTER_NEAREST 0x1140 #define CL_FILTER_LINEAR 0x1141 /* cl_sampler_info */ #define CL_SAMPLER_REFERENCE_COUNT 0x1150 #define CL_SAMPLER_CONTEXT 0x1151 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152 #define CL_SAMPLER_ADDRESSING_MODE 0x1153 #define CL_SAMPLER_FILTER_MODE 0x1154 /* cl_map_flags - bitfield */ #define CL_MAP_READ (1 << 0) #define CL_MAP_WRITE (1 << 1) #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) /* cl_program_info */ #define CL_PROGRAM_REFERENCE_COUNT 0x1160 #define CL_PROGRAM_CONTEXT 0x1161 #define CL_PROGRAM_NUM_DEVICES 0x1162 #define CL_PROGRAM_DEVICES 0x1163 #define CL_PROGRAM_SOURCE 0x1164 #define CL_PROGRAM_BINARY_SIZES 0x1165 #define CL_PROGRAM_BINARIES 0x1166 #define CL_PROGRAM_NUM_KERNELS 0x1167 #define CL_PROGRAM_KERNEL_NAMES 0x1168 /* cl_program_build_info */ #define CL_PROGRAM_BUILD_STATUS 0x1181 #define CL_PROGRAM_BUILD_OPTIONS 0x1182 #define CL_PROGRAM_BUILD_LOG 0x1183 #define CL_PROGRAM_BINARY_TYPE 0x1184 /* cl_program_binary_type */ #define CL_PROGRAM_BINARY_TYPE_NONE 0x0 #define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 #define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 #define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 /* cl_build_status */ #define CL_BUILD_SUCCESS 0 #define CL_BUILD_NONE -1 #define CL_BUILD_ERROR -2 #define CL_BUILD_IN_PROGRESS -3 /* cl_kernel_info */ #define CL_KERNEL_FUNCTION_NAME 0x1190 #define CL_KERNEL_NUM_ARGS 0x1191 #define CL_KERNEL_REFERENCE_COUNT 0x1192 #define CL_KERNEL_CONTEXT 0x1193 #define CL_KERNEL_PROGRAM 0x1194 #define CL_KERNEL_ATTRIBUTES 0x1195 /* cl_kernel_arg_info */ #define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 #define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 #define CL_KERNEL_ARG_TYPE_NAME 0x1198 #define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 #define CL_KERNEL_ARG_NAME 0x119A /* cl_kernel_arg_address_qualifier */ #define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B #define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C #define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D #define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E /* cl_kernel_arg_access_qualifier */ #define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 #define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 #define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 #define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 /* cl_kernel_arg_type_qualifer */ #define CL_KERNEL_ARG_TYPE_NONE 0 #define CL_KERNEL_ARG_TYPE_CONST (1 << 0) #define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) #define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) /* cl_kernel_work_group_info */ #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 #define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 /* cl_event_info */ #define CL_EVENT_COMMAND_QUEUE 0x11D0 #define CL_EVENT_COMMAND_TYPE 0x11D1 #define CL_EVENT_REFERENCE_COUNT 0x11D2 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 #define CL_EVENT_CONTEXT 0x11D4 /* cl_command_type */ #define CL_COMMAND_NDRANGE_KERNEL 0x11F0 #define CL_COMMAND_TASK 0x11F1 #define CL_COMMAND_NATIVE_KERNEL 0x11F2 #define CL_COMMAND_READ_BUFFER 0x11F3 #define CL_COMMAND_WRITE_BUFFER 0x11F4 #define CL_COMMAND_COPY_BUFFER 0x11F5 #define CL_COMMAND_READ_IMAGE 0x11F6 #define CL_COMMAND_WRITE_IMAGE 0x11F7 #define CL_COMMAND_COPY_IMAGE 0x11F8 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA #define CL_COMMAND_MAP_BUFFER 0x11FB #define CL_COMMAND_MAP_IMAGE 0x11FC #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD #define CL_COMMAND_MARKER 0x11FE #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 #define CL_COMMAND_READ_BUFFER_RECT 0x1201 #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203 #define CL_COMMAND_USER 0x1204 #define CL_COMMAND_BARRIER 0x1205 #define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 #define CL_COMMAND_FILL_BUFFER 0x1207 #define CL_COMMAND_FILL_IMAGE 0x1208 /* command execution status */ #define CL_COMPLETE 0x0 #define CL_RUNNING 0x1 #define CL_SUBMITTED 0x2 #define CL_QUEUED 0x3 /* cl_buffer_create_type */ #define CL_BUFFER_CREATE_TYPE_REGION 0x1220 /* cl_profiling_info */ #define CL_PROFILING_COMMAND_QUEUED 0x1280 #define CL_PROFILING_COMMAND_SUBMIT 0x1281 #define CL_PROFILING_COMMAND_START 0x1282 #define CL_PROFILING_COMMAND_END 0x1283 /********************************************************************************************************/ /* Platform API */ extern CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo(cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Device APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, cl_device_id * /* devices */, cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevices(cl_device_id /* in_device */, const cl_device_partition_property * /* properties */, cl_uint /* num_devices */, cl_device_id * /* out_devices */, cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; /* Context APIs */ extern CL_API_ENTRY cl_context CL_API_CALL clCreateContext(const cl_context_properties * /* properties */, cl_uint /* num_devices */, const cl_device_id * /* devices */, void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties * /* properties */, cl_device_type /* device_type */, void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Command Queue APIs */ extern CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfo(cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Memory Object APIs */ extern CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBuffer(cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateImage(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, const cl_image_desc * /* image_desc */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormats(cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, cl_uint /* num_entries */, cl_image_format * /* image_formats */, cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfo(cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetImageInfo(cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorCallback( cl_mem /* memobj */, void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; /* Sampler APIs */ extern CL_API_ENTRY cl_sampler CL_API_CALL clCreateSampler(cl_context /* context */, cl_bool /* normalized_coords */, cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfo(cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Program Object APIs */ extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSource(cl_context /* context */, cl_uint /* count */, const char ** /* strings */, const size_t * /* lengths */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernels(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* kernel_names */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clCompileProgram(cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, cl_uint /* num_input_headers */, const cl_program * /* input_headers */, const char ** /* header_include_names */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_program CL_API_CALL clLinkProgram(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, cl_uint /* num_input_programs */, const cl_program * /* input_programs */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfo(cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfo(cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Kernel Object APIs */ extern CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program /* program */, const char * /* kernel_name */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgram(cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */, cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel /* kernel */, cl_uint /* arg_index */, size_t /* arg_size */, const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfo(cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfo(cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Event Object APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clWaitForEvents(cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetEventInfo(cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent(cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatus(cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clSetEventCallback( cl_event /* event */, cl_int /* command_exec_callback_type */, void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; /* Profiling APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfo(cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Flush and Finish APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; /* Enqueued Commands APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, size_t /* offset */, size_t /* size */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRect(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, size_t /* offset */, size_t /* size */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, const void * /* pattern */, size_t /* pattern_size */, size_t /* offset */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBuffer(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, size_t /* src_offset */, size_t /* dst_offset */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRect(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, const size_t * /* src_origin */, const size_t * /* dst_origin */, const size_t * /* region */, size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */, size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */, size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */, size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImage(cl_command_queue /* command_queue */, cl_mem /* image */, const void * /* fill_color */, const size_t * /* origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImage(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL clEnqueueMapBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, size_t /* offset */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL clEnqueueMapImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */, const cl_mem * /* mem_objects */, cl_mem_migration_flags /* flags */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, const size_t * /* global_work_offset */, const size_t * /* global_work_size */, const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueTask(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernel(cl_command_queue /* command_queue */, void (CL_CALLBACK * /*user_func*/)(void *), void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem * /* mem_list */, const void ** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clSetPrintfCallback(cl_context /* context */, void (CL_CALLBACK * /* pfn_notify */)(cl_context /* program */, cl_uint /*printf_data_len */, char * /* printf_data_ptr */, void * /* user_data */), void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; /* Extension function access * * Returns the extension function address for the given function name, * or NULL if a valid function can not be found. The client must * check to make sure the address is not NULL, before using or * calling the returned function address. */ extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */, const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2; #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS #warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1! /* * WARNING: * This API introduces mutable state into the OpenCL implementation. It has been REMOVED * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. * * Software developers previously relying on this API are instructed to set the command queue * properties when creating the queue, instead. */ extern CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueueProperty(cl_command_queue /* command_queue */, cl_command_queue_properties /* properties */, cl_bool /* enable */, cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; #endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS extern CL_API_ENTRY cl_mem CL_API_CALL clCreateImage2D(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateImage3D(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarker(cl_command_queue /* command_queue */, cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue /* command_queue */, cl_uint /* num_events */, const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY cl_int CL_API_CALL clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; #endif /* CL_USE_DEPRECATED_OPENCL_1_2_APIS */ #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_H */ starpu-1.4.9+dfsg/socl/src/CL/cl_d3d10.h000066400000000000000000000113651507764646700175050ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_CL_D3D10_H #define __OPENCL_CL_D3D10_H #include #include "cl.h" #include "cl_platform.h" #ifdef __cplusplus extern "C" { #endif /****************************************************************************** * cl_khr_d3d10_sharing */ #define cl_khr_d3d10_sharing 1 typedef cl_uint cl_d3d10_device_source_khr; typedef cl_uint cl_d3d10_device_set_khr; /******************************************************************************/ // Error Codes #define CL_INVALID_D3D10_DEVICE_KHR -1002 #define CL_INVALID_D3D10_RESOURCE_KHR -1003 #define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004 #define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005 // cl_d3d10_device_source_nv #define CL_D3D10_DEVICE_KHR 0x4010 #define CL_D3D10_DXGI_ADAPTER_KHR 0x4011 // cl_d3d10_device_set_nv #define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012 #define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013 // cl_context_info #define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 #define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C // cl_mem_info #define CL_MEM_D3D10_RESOURCE_KHR 0x4015 // cl_image_info #define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016 // cl_command_type #define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017 #define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018 /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef __cplusplus } #endif #endif // __OPENCL_CL_D3D10_H starpu-1.4.9+dfsg/socl/src/CL/cl_d3d11.h000066400000000000000000000113571507764646700175070ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_CL_D3D11_H #define __OPENCL_CL_D3D11_H #include #include "cl.h" #include "cl_platform.h" #ifdef __cplusplus extern "C" { #endif /****************************************************************************** * cl_khr_d3d11_sharing */ #define cl_khr_d3d11_sharing 1 typedef cl_uint cl_d3d11_device_source_khr; typedef cl_uint cl_d3d11_device_set_khr; /******************************************************************************/ // Error Codes #define CL_INVALID_D3D11_DEVICE_KHR -1006 #define CL_INVALID_D3D11_RESOURCE_KHR -1007 #define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008 #define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009 // cl_d3d11_device_source #define CL_D3D11_DEVICE_KHR 0x4019 #define CL_D3D11_DXGI_ADAPTER_KHR 0x401A // cl_d3d11_device_set #define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B #define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C // cl_context_info #define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D #define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D // cl_mem_info #define CL_MEM_D3D11_RESOURCE_KHR 0x401E // cl_image_info #define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F // cl_command_type #define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020 #define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021 /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void * d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #ifdef __cplusplus } #endif #endif // __OPENCL_CL_D3D11_H starpu-1.4.9+dfsg/socl/src/CL/cl_dx9_media_sharing.h000066400000000000000000000120371507764646700222450ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H #define __OPENCL_CL_DX9_MEDIA_SHARING_H #include "cl.h" #include "cl_platform.h" #ifdef __cplusplus extern "C" { #endif /****************************************************************************** /* cl_khr_dx9_media_sharing */ #define cl_khr_dx9_media_sharing 1 typedef cl_uint cl_dx9_media_adapter_type_khr; typedef cl_uint cl_dx9_media_adapter_set_khr; #if defined(_WIN32) #include typedef struct _cl_dx9_surface_info_khr { IDirect3DSurface9 *resource; HANDLE shared_handle; } cl_dx9_surface_info_khr; #endif /******************************************************************************/ // Error Codes #define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010 #define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011 #define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012 #define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013 // cl_media_adapter_type_khr #define CL_ADAPTER_D3D9_KHR 0x2020 #define CL_ADAPTER_D3D9EX_KHR 0x2021 #define CL_ADAPTER_DXVA_KHR 0x2022 // cl_media_adapter_set_khr #define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023 #define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024 // cl_context_info #define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 #define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 #define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 // cl_mem_info #define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028 #define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029 // cl_image_info #define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A // cl_command_type #define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B #define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr * media_adapter_type, void * media_adapters, cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, void * surface_info, cl_uint plane, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #ifdef __cplusplus } #endif #endif // __OPENCL_CL_DX9_MEDIA_SHARING_H starpu-1.4.9+dfsg/socl/src/CL/cl_ext.h000066400000000000000000000233341507764646700174710ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ /* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */ /* cl_ext.h contains OpenCL extensions which don't have external */ /* (OpenGL, D3D) dependencies. */ #ifndef __CL_EXT_H #define __CL_EXT_H #ifdef __cplusplus extern "C" { #endif #ifdef __APPLE__ #include #include #else #include "cl.h" #endif /* cl_khr_fp64 extension - no extension #define since it has no functions */ #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 /* cl_khr_fp16 extension - no extension #define since it has no functions */ #define CL_DEVICE_HALF_FP_CONFIG 0x1033 /* Memory object destruction * * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR * * Registers a user callback function that will be called when the memory object is deleted and its resources * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback * stack associated with memobj. The registered user callback functions are called in the reverse order in * which they were registered. The user callback functions are called and then the memory object is deleted * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be * notified when the memory referenced by host_ptr, specified when the memory object is created and used as * the storage bits for the memory object, can be reused or freed. * * The application may not call CL api's with the cl_mem object passed to the pfn_notify. * * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) * before using. */ #define cl_APPLE_SetMemObjectDestructor 1 cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */, void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /* Context Logging Functions * * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) * before using. * * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger */ #define cl_APPLE_ContextLoggingFunctions 1 extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */, const void * /* private_info */, size_t /* cb */, void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */, const void * /* private_info */, size_t /* cb */, void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */, const void * /* private_info */, size_t /* cb */, void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /************************ * cl_khr_icd extension * ************************/ #define cl_khr_icd 1 /* cl_platform_info */ #define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 /* Additional Error Codes */ #define CL_PLATFORM_NOT_FOUND_KHR -1001 extern CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */); typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */); /****************************************** * cl_nv_device_attribute_query extension * ******************************************/ /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 #define CL_DEVICE_WARP_SIZE_NV 0x4003 #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 /********************************* * cl_amd_device_attribute_query * *********************************/ #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 #ifdef CL_VERSION_1_1 /*********************************** * cl_ext_device_fission extension * ***********************************/ #define cl_ext_device_fission 1 extern CL_API_ENTRY cl_int CL_API_CALL clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef cl_ulong cl_device_partition_property_ext; extern CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevicesEXT( cl_device_id /*in_device*/, const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int ( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/, const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; /* cl_device_partition_property_ext */ #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 /* clDeviceGetInfo selectors */ #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 /* error codes */ #define CL_DEVICE_PARTITION_FAILED_EXT -1057 #define CL_INVALID_PARTITION_COUNT_EXT -1058 #define CL_INVALID_PARTITION_NAME_EXT -1059 /* CL_AFFINITY_DOMAINs */ #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 /* cl_device_partition_property_ext list terminators */ #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) #endif /* CL_VERSION_1_1 */ #ifdef __cplusplus } #endif #endif /* __CL_EXT_H */ starpu-1.4.9+dfsg/socl/src/CL/cl_gl.h000066400000000000000000000164371507764646700173010ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2011 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ #ifndef __OPENCL_CL_GL_H #define __OPENCL_CL_GL_H #ifdef __APPLE__ #include #else #include "cl.h" #endif #ifdef __cplusplus extern "C" { #endif typedef cl_uint cl_gl_object_type; typedef cl_uint cl_gl_texture_info; typedef cl_uint cl_gl_platform_info; typedef struct __GLsync *cl_GLsync; /* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ #define CL_GL_OBJECT_BUFFER 0x2000 #define CL_GL_OBJECT_TEXTURE2D 0x2001 #define CL_GL_OBJECT_TEXTURE3D 0x2002 #define CL_GL_OBJECT_RENDERBUFFER 0x2003 #define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E #define CL_GL_OBJECT_TEXTURE1D 0x200F #define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 #define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 /* cl_gl_texture_info */ #define CL_GL_TEXTURE_TARGET 0x2004 #define CL_GL_MIPMAP_LEVEL 0x2005 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLBuffer(cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* bufobj */, int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture(cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLRenderbuffer(cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* renderbuffer */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetGLObjectInfo(cl_mem /* memobj */, cl_gl_object_type * /* gl_object_type */, cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetGLTextureInfo(cl_mem /* memobj */, cl_gl_texture_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS #ifndef BUILDING_SOCL #warning CL_USE_DEPRECATED_OPENCL_1_1_APIS is defined. These APIs are unsupported and untested in OpenCL 1.2! #endif extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture2D(cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture3D(cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; #endif /* CL_USE_DEPRECATED_OPENCL_1_2_APIS */ /* cl_khr_gl_sharing extension */ #define cl_khr_gl_sharing 1 typedef cl_uint cl_gl_context_info; /* Additional Error Codes */ #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 /* cl_gl_context_info */ #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 #define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 /* Additional cl_context_properties */ #define CL_GL_CONTEXT_KHR 0x2008 #define CL_EGL_DISPLAY_KHR 0x2009 #define CL_GLX_DISPLAY_KHR 0x200A #define CL_WGL_HDC_KHR 0x200B #define CL_CGL_SHAREGROUP_KHR 0x200C extern CL_API_ENTRY cl_int CL_API_CALL clGetGLContextInfoKHR(const cl_context_properties * /* properties */, cl_gl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( const cl_context_properties * properties, cl_gl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_GL_H */ starpu-1.4.9+dfsg/socl/src/CL/cl_gl_ext.h000066400000000000000000000051031507764646700201450ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ /* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ /* OpenGL dependencies. */ #ifndef __OPENCL_CL_GL_EXT_H #define __OPENCL_CL_GL_EXT_H #ifdef __cplusplus extern "C" { #endif #ifdef __APPLE__ #include #else #include "cl_gl.h" #endif /* * For each extension, follow this template * cl_VEN_extname extension */ /* #define cl_VEN_extname 1 * ... define new types, if any * ... define new tokens, if any * ... define new APIs, if any * * If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header * This allows us to avoid having to decide whether to include GL headers or GLES here. */ /* * cl_khr_gl_event extension * See section 9.9 in the OpenCL 1.1 spec for more information */ #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D extern CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromGLsyncKHR(cl_context /* context */, cl_GLsync /* cl_GLsync */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_GL_EXT_H */ starpu-1.4.9+dfsg/socl/src/CL/cl_platform.h000066400000000000000000001127651507764646700205240ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */ #ifndef __CL_PLATFORM_H #define __CL_PLATFORM_H #ifdef __APPLE__ /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ #include #endif #ifdef __cplusplus extern "C" { #endif #if defined(_WIN32) #define CL_API_ENTRY #define CL_API_CALL __stdcall #define CL_CALLBACK __stdcall #else #define CL_API_ENTRY #define CL_API_CALL #define CL_CALLBACK #endif #ifdef __APPLE__ #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #define CL_API_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #define CL_API_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK #else #define CL_EXTENSION_WEAK_LINK #define CL_API_SUFFIX__VERSION_1_0 #define CL_EXT_SUFFIX__VERSION_1_0 #define CL_API_SUFFIX__VERSION_1_1 #define CL_EXT_SUFFIX__VERSION_1_1 #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED #define CL_API_SUFFIX__VERSION_1_2 #define CL_EXT_SUFFIX__VERSION_1_2 #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED #endif #if (defined (_WIN32) && defined(_MSC_VER)) /* scalar types */ typedef signed __int8 cl_char; typedef unsigned __int8 cl_uchar; typedef signed __int16 cl_short; typedef unsigned __int16 cl_ushort; typedef signed __int32 cl_int; typedef unsigned __int32 cl_uint; typedef signed __int64 cl_long; typedef unsigned __int64 cl_ulong; typedef unsigned __int16 cl_half; typedef float cl_float; typedef double cl_double; /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 #define CL_SCHAR_MIN (-127-1) #define CL_CHAR_MAX CL_SCHAR_MAX #define CL_CHAR_MIN CL_SCHAR_MIN #define CL_UCHAR_MAX 255 #define CL_SHRT_MAX 32767 #define CL_SHRT_MIN (-32767-1) #define CL_USHRT_MAX 65535 #define CL_INT_MAX 2147483647 #define CL_INT_MIN (-2147483647-1) #define CL_UINT_MAX 0xffffffffU #define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) #define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) #define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) #define CL_FLT_DIG 6 #define CL_FLT_MANT_DIG 24 #define CL_FLT_MAX_10_EXP +38 #define CL_FLT_MAX_EXP +128 #define CL_FLT_MIN_10_EXP -37 #define CL_FLT_MIN_EXP -125 #define CL_FLT_RADIX 2 #define CL_FLT_MAX 340282346638528859811704183484516925440.0f #define CL_FLT_MIN 1.175494350822287507969e-38f #define CL_FLT_EPSILON 0x1.0p-23f #define CL_DBL_DIG 15 #define CL_DBL_MANT_DIG 53 #define CL_DBL_MAX_10_EXP +308 #define CL_DBL_MAX_EXP +1024 #define CL_DBL_MIN_10_EXP -307 #define CL_DBL_MIN_EXP -1021 #define CL_DBL_RADIX 2 #define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 #define CL_DBL_MIN 2.225073858507201383090e-308 #define CL_DBL_EPSILON 2.220446049250313080847e-16 #define CL_M_E 2.718281828459045090796 #define CL_M_LOG2E 1.442695040888963387005 #define CL_M_LOG10E 0.434294481903251816668 #define CL_M_LN2 0.693147180559945286227 #define CL_M_LN10 2.302585092994045901094 #define CL_M_PI 3.141592653589793115998 #define CL_M_PI_2 1.570796326794896557999 #define CL_M_PI_4 0.785398163397448278999 #define CL_M_1_PI 0.318309886183790691216 #define CL_M_2_PI 0.636619772367581382433 #define CL_M_2_SQRTPI 1.128379167095512558561 #define CL_M_SQRT2 1.414213562373095145475 #define CL_M_SQRT1_2 0.707106781186547572737 #define CL_M_E_F 2.71828174591064f #define CL_M_LOG2E_F 1.44269502162933f #define CL_M_LOG10E_F 0.43429449200630f #define CL_M_LN2_F 0.69314718246460f #define CL_M_LN10_F 2.30258512496948f #define CL_M_PI_F 3.14159274101257f #define CL_M_PI_2_F 1.57079637050629f #define CL_M_PI_4_F 0.78539818525314f #define CL_M_1_PI_F 0.31830987334251f #define CL_M_2_PI_F 0.63661974668503f #define CL_M_2_SQRTPI_F 1.12837922573090f #define CL_M_SQRT2_F 1.41421353816986f #define CL_M_SQRT1_2_F 0.70710676908493f #define CL_NAN (CL_INFINITY - CL_INFINITY) #define CL_HUGE_VALF ((cl_float) 1e50) #define CL_HUGE_VAL ((cl_double) 1e500) #define CL_MAXFLOAT CL_FLT_MAX #define CL_INFINITY CL_HUGE_VALF #else #include /* scalar types */ typedef int8_t cl_char; typedef uint8_t cl_uchar; typedef int16_t cl_short __attribute__((aligned(2))); typedef uint16_t cl_ushort __attribute__((aligned(2))); typedef int32_t cl_int __attribute__((aligned(4))); typedef uint32_t cl_uint __attribute__((aligned(4))); typedef int64_t cl_long __attribute__((aligned(8))); typedef uint64_t cl_ulong __attribute__((aligned(8))); typedef uint16_t cl_half __attribute__((aligned(2))); typedef float cl_float __attribute__((aligned(4))); typedef double cl_double __attribute__((aligned(8))); /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 #define CL_SCHAR_MIN (-127-1) #define CL_CHAR_MAX CL_SCHAR_MAX #define CL_CHAR_MIN CL_SCHAR_MIN #define CL_UCHAR_MAX 255 #define CL_SHRT_MAX 32767 #define CL_SHRT_MIN (-32767-1) #define CL_USHRT_MAX 65535 #define CL_INT_MAX 2147483647 #define CL_INT_MIN (-2147483647-1) #define CL_UINT_MAX 0xffffffffU #define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) #define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) #define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) #define CL_FLT_DIG 6 #define CL_FLT_MANT_DIG 24 #define CL_FLT_MAX_10_EXP +38 #define CL_FLT_MAX_EXP +128 #define CL_FLT_MIN_10_EXP -37 #define CL_FLT_MIN_EXP -125 #define CL_FLT_RADIX 2 #define CL_FLT_MAX 0x1.fffffep127f #define CL_FLT_MIN 0x1.0p-126f #define CL_FLT_EPSILON 0x1.0p-23f #define CL_DBL_DIG 15 #define CL_DBL_MANT_DIG 53 #define CL_DBL_MAX_10_EXP +308 #define CL_DBL_MAX_EXP +1024 #define CL_DBL_MIN_10_EXP -307 #define CL_DBL_MIN_EXP -1021 #define CL_DBL_RADIX 2 #define CL_DBL_MAX 0x1.fffffffffffffp1023 #define CL_DBL_MIN 0x1.0p-1022 #define CL_DBL_EPSILON 0x1.0p-52 #define CL_M_E 2.718281828459045090796 #define CL_M_LOG2E 1.442695040888963387005 #define CL_M_LOG10E 0.434294481903251816668 #define CL_M_LN2 0.693147180559945286227 #define CL_M_LN10 2.302585092994045901094 #define CL_M_PI 3.141592653589793115998 #define CL_M_PI_2 1.570796326794896557999 #define CL_M_PI_4 0.785398163397448278999 #define CL_M_1_PI 0.318309886183790691216 #define CL_M_2_PI 0.636619772367581382433 #define CL_M_2_SQRTPI 1.128379167095512558561 #define CL_M_SQRT2 1.414213562373095145475 #define CL_M_SQRT1_2 0.707106781186547572737 #define CL_M_E_F 2.71828174591064f #define CL_M_LOG2E_F 1.44269502162933f #define CL_M_LOG10E_F 0.43429449200630f #define CL_M_LN2_F 0.69314718246460f #define CL_M_LN10_F 2.30258512496948f #define CL_M_PI_F 3.14159274101257f #define CL_M_PI_2_F 1.57079637050629f #define CL_M_PI_4_F 0.78539818525314f #define CL_M_1_PI_F 0.31830987334251f #define CL_M_2_PI_F 0.63661974668503f #define CL_M_2_SQRTPI_F 1.12837922573090f #define CL_M_SQRT2_F 1.41421353816986f #define CL_M_SQRT1_2_F 0.70710676908493f #if defined( __GNUC__ ) #define CL_HUGE_VALF __builtin_huge_valf() #define CL_HUGE_VAL __builtin_huge_val() #define CL_NAN __builtin_nanf( "" ) #else #define CL_HUGE_VALF ((cl_float) 1e50) #define CL_HUGE_VAL ((cl_double) 1e500) float nanf( const char * ); #define CL_NAN nanf( "" ) #endif #define CL_MAXFLOAT CL_FLT_MAX #define CL_INFINITY CL_HUGE_VALF #endif #include /* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */ typedef unsigned int cl_GLuint; typedef int cl_GLint; typedef unsigned int cl_GLenum; /* * Vector types * * Note: OpenCL requires that all types be naturally aligned. * This means that vector types must be naturally aligned. * For example, a vector of four floats must be aligned to * a 16 byte boundary (calculated as 4 * the natural 4-byte * alignment of the float). The alignment qualifiers here * will only function properly if your compiler supports them * and if you don't actively work to defeat them. For example, * in order for a cl_float4 to be 16 byte aligned in a struct, * the start of the struct must itself be 16-byte aligned. * * Maintaining proper alignment is the user's responsibility. */ /* Define basic vector types */ #if defined( __VEC__ ) #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ typedef vector unsigned char __cl_uchar16; typedef vector signed char __cl_char16; typedef vector unsigned short __cl_ushort8; typedef vector signed short __cl_short8; typedef vector unsigned int __cl_uint4; typedef vector signed int __cl_int4; typedef vector float __cl_float4; #define __CL_UCHAR16__ 1 #define __CL_CHAR16__ 1 #define __CL_USHORT8__ 1 #define __CL_SHORT8__ 1 #define __CL_UINT4__ 1 #define __CL_INT4__ 1 #define __CL_FLOAT4__ 1 #endif #if defined( __SSE__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef float __cl_float4 __attribute__((vector_size(16))); #else typedef __m128 __cl_float4; #endif #define __CL_FLOAT4__ 1 #endif #if defined( __SSE2__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); typedef cl_char __cl_char16 __attribute__((vector_size(16))); typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); typedef cl_short __cl_short8 __attribute__((vector_size(16))); typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); typedef cl_int __cl_int4 __attribute__((vector_size(16))); typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); typedef cl_long __cl_long2 __attribute__((vector_size(16))); typedef cl_double __cl_double2 __attribute__((vector_size(16))); #else typedef __m128i __cl_uchar16; typedef __m128i __cl_char16; typedef __m128i __cl_ushort8; typedef __m128i __cl_short8; typedef __m128i __cl_uint4; typedef __m128i __cl_int4; typedef __m128i __cl_ulong2; typedef __m128i __cl_long2; typedef __m128d __cl_double2; #endif #define __CL_UCHAR16__ 1 #define __CL_CHAR16__ 1 #define __CL_USHORT8__ 1 #define __CL_SHORT8__ 1 #define __CL_INT4__ 1 #define __CL_UINT4__ 1 #define __CL_ULONG2__ 1 #define __CL_LONG2__ 1 #define __CL_DOUBLE2__ 1 #endif #if defined( __MMX__ ) #include #if defined( __GNUC__ ) typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); typedef cl_char __cl_char8 __attribute__((vector_size(8))); typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); typedef cl_short __cl_short4 __attribute__((vector_size(8))); typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); typedef cl_int __cl_int2 __attribute__((vector_size(8))); typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); typedef cl_long __cl_long1 __attribute__((vector_size(8))); typedef cl_float __cl_float2 __attribute__((vector_size(8))); #else typedef __m64 __cl_uchar8; typedef __m64 __cl_char8; typedef __m64 __cl_ushort4; typedef __m64 __cl_short4; typedef __m64 __cl_uint2; typedef __m64 __cl_int2; typedef __m64 __cl_ulong1; typedef __m64 __cl_long1; typedef __m64 __cl_float2; #endif #define __CL_UCHAR8__ 1 #define __CL_CHAR8__ 1 #define __CL_USHORT4__ 1 #define __CL_SHORT4__ 1 #define __CL_INT2__ 1 #define __CL_UINT2__ 1 #define __CL_ULONG1__ 1 #define __CL_LONG1__ 1 #define __CL_FLOAT2__ 1 #endif #if defined( __AVX__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef cl_float __cl_float8 __attribute__((vector_size(32))); typedef cl_double __cl_double4 __attribute__((vector_size(32))); #else typedef __m256 __cl_float8; typedef __m256d __cl_double4; #endif #define __CL_FLOAT8__ 1 #define __CL_DOUBLE4__ 1 #endif /* Define alignment keys */ #if defined( __GNUC__ ) #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) #elif defined( _WIN32) && (_MSC_VER) /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ /* #include */ /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ #define CL_ALIGNED(_x) #else #warning Need to implement some method to align data here #define CL_ALIGNED(_x) #endif /* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) /* .xyzw and .s0123...{f|F} are supported */ #define CL_HAS_NAMED_VECTOR_FIELDS 1 /* .hi and .lo are supported */ #define CL_HAS_HI_LO_VECTOR_FIELDS 1 #endif /* Define cl_vector types */ /* ---- cl_charn ---- */ typedef union { cl_char CL_ALIGNED(2) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_char x, y; }; __extension__ struct{ cl_char s0, s1; }; __extension__ struct{ cl_char lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2; #endif }cl_char2; typedef union { cl_char CL_ALIGNED(4) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_char x, y, z, w; }; __extension__ struct{ cl_char s0, s1, s2, s3; }; __extension__ struct{ cl_char2 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[2]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4; #endif }cl_char4; /* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ typedef cl_char4 cl_char3; typedef union { cl_char CL_ALIGNED(8) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_char x, y, z, w; }; __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_char4 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[4]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4[2]; #endif #if defined( __CL_CHAR8__ ) __cl_char8 v8; #endif }cl_char8; typedef union { cl_char CL_ALIGNED(16) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_char8 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[8]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4[4]; #endif #if defined( __CL_CHAR8__ ) __cl_char8 v8[2]; #endif #if defined( __CL_CHAR16__ ) __cl_char16 v16; #endif }cl_char16; /* ---- cl_ucharn ---- */ typedef union { cl_uchar CL_ALIGNED(2) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uchar x, y; }; __extension__ struct{ cl_uchar s0, s1; }; __extension__ struct{ cl_uchar lo, hi; }; #endif #if defined( __cl_uchar2__) __cl_uchar2 v2; #endif }cl_uchar2; typedef union { cl_uchar CL_ALIGNED(4) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uchar x, y, z, w; }; __extension__ struct{ cl_uchar s0, s1, s2, s3; }; __extension__ struct{ cl_uchar2 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[2]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4; #endif }cl_uchar4; /* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ typedef cl_uchar4 cl_uchar3; typedef union { cl_uchar CL_ALIGNED(8) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uchar x, y, z, w; }; __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_uchar4 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[4]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4[2]; #endif #if defined( __CL_UCHAR8__ ) __cl_uchar8 v8; #endif }cl_uchar8; typedef union { cl_uchar CL_ALIGNED(16) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_uchar8 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[8]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4[4]; #endif #if defined( __CL_UCHAR8__ ) __cl_uchar8 v8[2]; #endif #if defined( __CL_UCHAR16__ ) __cl_uchar16 v16; #endif }cl_uchar16; /* ---- cl_shortn ---- */ typedef union { cl_short CL_ALIGNED(4) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_short x, y; }; __extension__ struct{ cl_short s0, s1; }; __extension__ struct{ cl_short lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2; #endif }cl_short2; typedef union { cl_short CL_ALIGNED(8) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_short x, y, z, w; }; __extension__ struct{ cl_short s0, s1, s2, s3; }; __extension__ struct{ cl_short2 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[2]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4; #endif }cl_short4; /* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ typedef cl_short4 cl_short3; typedef union { cl_short CL_ALIGNED(16) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_short x, y, z, w; }; __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_short4 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[4]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4[2]; #endif #if defined( __CL_SHORT8__ ) __cl_short8 v8; #endif }cl_short8; typedef union { cl_short CL_ALIGNED(32) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_short8 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[8]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4[4]; #endif #if defined( __CL_SHORT8__ ) __cl_short8 v8[2]; #endif #if defined( __CL_SHORT16__ ) __cl_short16 v16; #endif }cl_short16; /* ---- cl_ushortn ---- */ typedef union { cl_ushort CL_ALIGNED(4) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ushort x, y; }; __extension__ struct{ cl_ushort s0, s1; }; __extension__ struct{ cl_ushort lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2; #endif }cl_ushort2; typedef union { cl_ushort CL_ALIGNED(8) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ushort x, y, z, w; }; __extension__ struct{ cl_ushort s0, s1, s2, s3; }; __extension__ struct{ cl_ushort2 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[2]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4; #endif }cl_ushort4; /* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ typedef cl_ushort4 cl_ushort3; typedef union { cl_ushort CL_ALIGNED(16) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ushort x, y, z, w; }; __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_ushort4 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[4]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4[2]; #endif #if defined( __CL_USHORT8__ ) __cl_ushort8 v8; #endif }cl_ushort8; typedef union { cl_ushort CL_ALIGNED(32) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_ushort8 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[8]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4[4]; #endif #if defined( __CL_USHORT8__ ) __cl_ushort8 v8[2]; #endif #if defined( __CL_USHORT16__ ) __cl_ushort16 v16; #endif }cl_ushort16; /* ---- cl_intn ---- */ typedef union { cl_int CL_ALIGNED(8) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_int x, y; }; __extension__ struct{ cl_int s0, s1; }; __extension__ struct{ cl_int lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2; #endif }cl_int2; typedef union { cl_int CL_ALIGNED(16) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_int x, y, z, w; }; __extension__ struct{ cl_int s0, s1, s2, s3; }; __extension__ struct{ cl_int2 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[2]; #endif #if defined( __CL_INT4__) __cl_int4 v4; #endif }cl_int4; /* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ typedef cl_int4 cl_int3; typedef union { cl_int CL_ALIGNED(32) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_int x, y, z, w; }; __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_int4 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[4]; #endif #if defined( __CL_INT4__) __cl_int4 v4[2]; #endif #if defined( __CL_INT8__ ) __cl_int8 v8; #endif }cl_int8; typedef union { cl_int CL_ALIGNED(64) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_int8 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[8]; #endif #if defined( __CL_INT4__) __cl_int4 v4[4]; #endif #if defined( __CL_INT8__ ) __cl_int8 v8[2]; #endif #if defined( __CL_INT16__ ) __cl_int16 v16; #endif }cl_int16; /* ---- cl_uintn ---- */ typedef union { cl_uint CL_ALIGNED(8) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uint x, y; }; __extension__ struct{ cl_uint s0, s1; }; __extension__ struct{ cl_uint lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2; #endif }cl_uint2; typedef union { cl_uint CL_ALIGNED(16) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uint x, y, z, w; }; __extension__ struct{ cl_uint s0, s1, s2, s3; }; __extension__ struct{ cl_uint2 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[2]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4; #endif }cl_uint4; /* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ typedef cl_uint4 cl_uint3; typedef union { cl_uint CL_ALIGNED(32) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uint x, y, z, w; }; __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_uint4 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[4]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4[2]; #endif #if defined( __CL_UINT8__ ) __cl_uint8 v8; #endif }cl_uint8; typedef union { cl_uint CL_ALIGNED(64) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_uint8 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[8]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4[4]; #endif #if defined( __CL_UINT8__ ) __cl_uint8 v8[2]; #endif #if defined( __CL_UINT16__ ) __cl_uint16 v16; #endif }cl_uint16; /* ---- cl_longn ---- */ typedef union { cl_long CL_ALIGNED(16) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_long x, y; }; __extension__ struct{ cl_long s0, s1; }; __extension__ struct{ cl_long lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2; #endif }cl_long2; typedef union { cl_long CL_ALIGNED(32) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_long x, y, z, w; }; __extension__ struct{ cl_long s0, s1, s2, s3; }; __extension__ struct{ cl_long2 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[2]; #endif #if defined( __CL_LONG4__) __cl_long4 v4; #endif }cl_long4; /* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ typedef cl_long4 cl_long3; typedef union { cl_long CL_ALIGNED(64) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_long x, y, z, w; }; __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_long4 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[4]; #endif #if defined( __CL_LONG4__) __cl_long4 v4[2]; #endif #if defined( __CL_LONG8__ ) __cl_long8 v8; #endif }cl_long8; typedef union { cl_long CL_ALIGNED(128) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_long8 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[8]; #endif #if defined( __CL_LONG4__) __cl_long4 v4[4]; #endif #if defined( __CL_LONG8__ ) __cl_long8 v8[2]; #endif #if defined( __CL_LONG16__ ) __cl_long16 v16; #endif }cl_long16; /* ---- cl_ulongn ---- */ typedef union { cl_ulong CL_ALIGNED(16) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ulong x, y; }; __extension__ struct{ cl_ulong s0, s1; }; __extension__ struct{ cl_ulong lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2; #endif }cl_ulong2; typedef union { cl_ulong CL_ALIGNED(32) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ulong x, y, z, w; }; __extension__ struct{ cl_ulong s0, s1, s2, s3; }; __extension__ struct{ cl_ulong2 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[2]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4; #endif }cl_ulong4; /* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ typedef cl_ulong4 cl_ulong3; typedef union { cl_ulong CL_ALIGNED(64) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ulong x, y, z, w; }; __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_ulong4 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[4]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4[2]; #endif #if defined( __CL_ULONG8__ ) __cl_ulong8 v8; #endif }cl_ulong8; typedef union { cl_ulong CL_ALIGNED(128) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_ulong8 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[8]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4[4]; #endif #if defined( __CL_ULONG8__ ) __cl_ulong8 v8[2]; #endif #if defined( __CL_ULONG16__ ) __cl_ulong16 v16; #endif }cl_ulong16; /* --- cl_floatn ---- */ typedef union { cl_float CL_ALIGNED(8) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_float x, y; }; __extension__ struct{ cl_float s0, s1; }; __extension__ struct{ cl_float lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2; #endif }cl_float2; typedef union { cl_float CL_ALIGNED(16) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_float x, y, z, w; }; __extension__ struct{ cl_float s0, s1, s2, s3; }; __extension__ struct{ cl_float2 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[2]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4; #endif }cl_float4; /* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ typedef cl_float4 cl_float3; typedef union { cl_float CL_ALIGNED(32) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_float x, y, z, w; }; __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_float4 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[4]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4[2]; #endif #if defined( __CL_FLOAT8__ ) __cl_float8 v8; #endif }cl_float8; typedef union { cl_float CL_ALIGNED(64) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_float8 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[8]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4[4]; #endif #if defined( __CL_FLOAT8__ ) __cl_float8 v8[2]; #endif #if defined( __CL_FLOAT16__ ) __cl_float16 v16; #endif }cl_float16; /* --- cl_doublen ---- */ typedef union { cl_double CL_ALIGNED(16) s[2]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_double x, y; }; __extension__ struct{ cl_double s0, s1; }; __extension__ struct{ cl_double lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2; #endif }cl_double2; typedef union { cl_double CL_ALIGNED(32) s[4]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_double x, y, z, w; }; __extension__ struct{ cl_double s0, s1, s2, s3; }; __extension__ struct{ cl_double2 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[2]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4; #endif }cl_double4; /* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ typedef cl_double4 cl_double3; typedef union { cl_double CL_ALIGNED(64) s[8]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_double x, y, z, w; }; __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; __extension__ struct{ cl_double4 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[4]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4[2]; #endif #if defined( __CL_DOUBLE8__ ) __cl_double8 v8; #endif }cl_double8; typedef union { cl_double CL_ALIGNED(128) s[16]; #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) __extension__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __extension__ struct{ cl_double8 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[8]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4[4]; #endif #if defined( __CL_DOUBLE8__ ) __cl_double8 v8[2]; #endif #if defined( __CL_DOUBLE16__ ) __cl_double16 v16; #endif }cl_double16; /* Macro to facilitate debugging * Usage: * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. * The first line ends with: CL_PROGRAM_STRING_BEGIN \" * Each line thereafter of OpenCL C source must end with: \n\ * The last line ends in "; * * Example: * * const char *my_program = CL_PROGRAM_STRING_BEGIN "\ * kernel void foo( int a, float * b ) \n\ * { \n\ * // my comment \n\ * *b[ get_global_id(0)] = a; \n\ * } \n\ * "; * * This should correctly set up the line, (column) and file information for your source * string so you can do source level debugging. */ #define __CL_STRINGIFY( _x ) # _x #define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) #define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" #ifdef __cplusplus } #endif #endif /* __CL_PLATFORM_H */ starpu-1.4.9+dfsg/socl/src/CL/opencl.h000066400000000000000000000033161507764646700174710ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_H #define __OPENCL_H #ifdef __cplusplus extern "C" { #endif #ifdef __APPLE__ #include #include #include #include #else #include "cl.h" #include "cl_gl.h" #include "cl_gl_ext.h" #include "cl_ext.h" #endif #ifdef __cplusplus } #endif #endif /* __OPENCL_H */ starpu-1.4.9+dfsg/socl/src/Makefile.am000066400000000000000000000072001507764646700175720ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk CLEANFILES = *.gcno *.gcda AM_CPPFLAGS = -DBUILDING_SOCL -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_srcdir)/socl/src $(STARPU_H_CPPFLAGS) LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(STARPU_OPENCL_LDFLAGS) SUBDIRS = lib_LTLIBRARIES = libsocl-@STARPU_EFFECTIVE_VERSION@.la noinst_HEADERS = \ command.h \ command_list.h \ command_queue.h \ debug.h \ event.h \ gc.h \ getinfo.h \ mem_objects.h \ ocl_icd.h \ socl.h \ task.h \ util.h \ init.h \ CL/cl_d3d10.h \ CL/cl_ext.h \ CL/cl.h \ CL/cl_d3d11.h \ CL/cl_gl_ext.h \ CL/cl_platform.h \ CL/cl_dx9_media_sharing.h \ CL/cl_gl.h \ CL/opencl.h libsocl_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ -version-info $(LIBSOCL_INTERFACE_CURRENT):$(LIBSOCL_INTERFACE_REVISION):$(LIBSOCL_INTERFACE_AGE) libsocl_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ command.c \ command_list.c \ command_queue.c \ debug.c \ event.c \ gc.c \ init.c \ mem_objects.c \ socl.c \ task.c \ util.c \ cl_getplatformids.c \ cl_getplatforminfo.c \ cl_getdeviceids.c \ cl_getdeviceinfo.c \ cl_releasecontext.c \ cl_createcontext.c \ cl_createcontextfromtype.c \ cl_retaincontext.c \ cl_getcontextinfo.c \ cl_releasecommandqueue.c \ cl_createcommandqueue.c \ cl_retaincommandqueue.c \ cl_getcommandqueueinfo.c \ cl_setcommandqueueproperty.c \ cl_releaseevent.c \ cl_waitforevents.c \ cl_geteventinfo.c \ cl_retainevent.c \ cl_enqueuemarker.c \ cl_enqueuewaitforevents.c \ cl_enqueuebarrier.c \ cl_flush.c \ cl_finish.c \ cl_releasememobject.c \ cl_createbuffer.c \ cl_createimage2d.c \ cl_createimage3d.c \ cl_retainmemobject.c \ cl_getsupportedimageformats.c \ cl_getmemobjectinfo.c \ cl_getimageinfo.c \ cl_createsampler.c \ cl_retainsampler.c \ cl_releasesampler.c \ cl_getsamplerinfo.c \ cl_releaseprogram.c \ cl_createprogramwithsource.c \ cl_createprogramwithbinary.c \ cl_retainprogram.c \ cl_buildprogram.c \ cl_unloadcompiler.c \ cl_getprograminfo.c \ cl_getprogrambuildinfo.c \ cl_releasekernel.c \ cl_createkernel.c \ cl_createkernelsinprogram.c \ cl_retainkernel.c \ cl_setkernelarg.c \ cl_getkernelinfo.c \ cl_getkernelworkgroupinfo.c \ cl_enqueuereadbuffer.c \ cl_enqueuewritebuffer.c \ cl_enqueuecopybuffer.c \ cl_enqueuereadimage.c \ cl_enqueuewriteimage.c \ cl_enqueuecopyimage.c \ cl_enqueuecopyimagetobuffer.c \ cl_enqueuecopybuffertoimage.c \ cl_enqueuemapbuffer.c \ cl_enqueuemapimage.c \ cl_enqueueunmapmemobject.c \ cl_enqueuetask.c \ cl_enqueuendrangekernel.c \ cl_enqueuenativekernel.c \ cl_enqueuemarkerwithwaitlist.c \ cl_enqueuebarrierwithwaitlist.c \ cl_geteventprofilinginfo.c \ cl_getextensionfunctionaddress.c \ cl_icdgetplatformidskhr.c starpu-1.4.9+dfsg/socl/src/Makefile.in000066400000000000000000001655001507764646700176130ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = socl/src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(libdir)" LTLIBRARIES = $(lib_LTLIBRARIES) libsocl_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = am_libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = command.lo \ command_list.lo command_queue.lo debug.lo event.lo gc.lo \ init.lo mem_objects.lo socl.lo task.lo util.lo \ cl_getplatformids.lo cl_getplatforminfo.lo cl_getdeviceids.lo \ cl_getdeviceinfo.lo cl_releasecontext.lo cl_createcontext.lo \ cl_createcontextfromtype.lo cl_retaincontext.lo \ cl_getcontextinfo.lo cl_releasecommandqueue.lo \ cl_createcommandqueue.lo cl_retaincommandqueue.lo \ cl_getcommandqueueinfo.lo cl_setcommandqueueproperty.lo \ cl_releaseevent.lo cl_waitforevents.lo cl_geteventinfo.lo \ cl_retainevent.lo cl_enqueuemarker.lo \ cl_enqueuewaitforevents.lo cl_enqueuebarrier.lo cl_flush.lo \ cl_finish.lo cl_releasememobject.lo cl_createbuffer.lo \ cl_createimage2d.lo cl_createimage3d.lo cl_retainmemobject.lo \ cl_getsupportedimageformats.lo cl_getmemobjectinfo.lo \ cl_getimageinfo.lo cl_createsampler.lo cl_retainsampler.lo \ cl_releasesampler.lo cl_getsamplerinfo.lo cl_releaseprogram.lo \ cl_createprogramwithsource.lo cl_createprogramwithbinary.lo \ cl_retainprogram.lo cl_buildprogram.lo cl_unloadcompiler.lo \ cl_getprograminfo.lo cl_getprogrambuildinfo.lo \ cl_releasekernel.lo cl_createkernel.lo \ cl_createkernelsinprogram.lo cl_retainkernel.lo \ cl_setkernelarg.lo cl_getkernelinfo.lo \ cl_getkernelworkgroupinfo.lo cl_enqueuereadbuffer.lo \ cl_enqueuewritebuffer.lo cl_enqueuecopybuffer.lo \ cl_enqueuereadimage.lo cl_enqueuewriteimage.lo \ cl_enqueuecopyimage.lo cl_enqueuecopyimagetobuffer.lo \ cl_enqueuecopybuffertoimage.lo cl_enqueuemapbuffer.lo \ cl_enqueuemapimage.lo cl_enqueueunmapmemobject.lo \ cl_enqueuetask.lo cl_enqueuendrangekernel.lo \ cl_enqueuenativekernel.lo cl_enqueuemarkerwithwaitlist.lo \ cl_enqueuebarrierwithwaitlist.lo cl_geteventprofilinginfo.lo \ cl_getextensionfunctionaddress.lo cl_icdgetplatformidskhr.lo libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ $(am_libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = libsocl_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) $(LDFLAGS) -o \ $@ AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/cl_buildprogram.Plo \ ./$(DEPDIR)/cl_createbuffer.Plo \ ./$(DEPDIR)/cl_createcommandqueue.Plo \ ./$(DEPDIR)/cl_createcontext.Plo \ ./$(DEPDIR)/cl_createcontextfromtype.Plo \ ./$(DEPDIR)/cl_createimage2d.Plo \ ./$(DEPDIR)/cl_createimage3d.Plo \ ./$(DEPDIR)/cl_createkernel.Plo \ ./$(DEPDIR)/cl_createkernelsinprogram.Plo \ ./$(DEPDIR)/cl_createprogramwithbinary.Plo \ ./$(DEPDIR)/cl_createprogramwithsource.Plo \ ./$(DEPDIR)/cl_createsampler.Plo \ ./$(DEPDIR)/cl_enqueuebarrier.Plo \ ./$(DEPDIR)/cl_enqueuebarrierwithwaitlist.Plo \ ./$(DEPDIR)/cl_enqueuecopybuffer.Plo \ ./$(DEPDIR)/cl_enqueuecopybuffertoimage.Plo \ ./$(DEPDIR)/cl_enqueuecopyimage.Plo \ ./$(DEPDIR)/cl_enqueuecopyimagetobuffer.Plo \ ./$(DEPDIR)/cl_enqueuemapbuffer.Plo \ ./$(DEPDIR)/cl_enqueuemapimage.Plo \ ./$(DEPDIR)/cl_enqueuemarker.Plo \ ./$(DEPDIR)/cl_enqueuemarkerwithwaitlist.Plo \ ./$(DEPDIR)/cl_enqueuenativekernel.Plo \ ./$(DEPDIR)/cl_enqueuendrangekernel.Plo \ ./$(DEPDIR)/cl_enqueuereadbuffer.Plo \ ./$(DEPDIR)/cl_enqueuereadimage.Plo \ ./$(DEPDIR)/cl_enqueuetask.Plo \ ./$(DEPDIR)/cl_enqueueunmapmemobject.Plo \ ./$(DEPDIR)/cl_enqueuewaitforevents.Plo \ ./$(DEPDIR)/cl_enqueuewritebuffer.Plo \ ./$(DEPDIR)/cl_enqueuewriteimage.Plo ./$(DEPDIR)/cl_finish.Plo \ ./$(DEPDIR)/cl_flush.Plo \ ./$(DEPDIR)/cl_getcommandqueueinfo.Plo \ ./$(DEPDIR)/cl_getcontextinfo.Plo \ ./$(DEPDIR)/cl_getdeviceids.Plo \ ./$(DEPDIR)/cl_getdeviceinfo.Plo \ ./$(DEPDIR)/cl_geteventinfo.Plo \ ./$(DEPDIR)/cl_geteventprofilinginfo.Plo \ ./$(DEPDIR)/cl_getextensionfunctionaddress.Plo \ ./$(DEPDIR)/cl_getimageinfo.Plo \ ./$(DEPDIR)/cl_getkernelinfo.Plo \ ./$(DEPDIR)/cl_getkernelworkgroupinfo.Plo \ ./$(DEPDIR)/cl_getmemobjectinfo.Plo \ ./$(DEPDIR)/cl_getplatformids.Plo \ ./$(DEPDIR)/cl_getplatforminfo.Plo \ ./$(DEPDIR)/cl_getprogrambuildinfo.Plo \ ./$(DEPDIR)/cl_getprograminfo.Plo \ ./$(DEPDIR)/cl_getsamplerinfo.Plo \ ./$(DEPDIR)/cl_getsupportedimageformats.Plo \ ./$(DEPDIR)/cl_icdgetplatformidskhr.Plo \ ./$(DEPDIR)/cl_releasecommandqueue.Plo \ ./$(DEPDIR)/cl_releasecontext.Plo \ ./$(DEPDIR)/cl_releaseevent.Plo \ ./$(DEPDIR)/cl_releasekernel.Plo \ ./$(DEPDIR)/cl_releasememobject.Plo \ ./$(DEPDIR)/cl_releaseprogram.Plo \ ./$(DEPDIR)/cl_releasesampler.Plo \ ./$(DEPDIR)/cl_retaincommandqueue.Plo \ ./$(DEPDIR)/cl_retaincontext.Plo \ ./$(DEPDIR)/cl_retainevent.Plo ./$(DEPDIR)/cl_retainkernel.Plo \ ./$(DEPDIR)/cl_retainmemobject.Plo \ ./$(DEPDIR)/cl_retainprogram.Plo \ ./$(DEPDIR)/cl_retainsampler.Plo \ ./$(DEPDIR)/cl_setcommandqueueproperty.Plo \ ./$(DEPDIR)/cl_setkernelarg.Plo \ ./$(DEPDIR)/cl_unloadcompiler.Plo \ ./$(DEPDIR)/cl_waitforevents.Plo ./$(DEPDIR)/command.Plo \ ./$(DEPDIR)/command_list.Plo ./$(DEPDIR)/command_queue.Plo \ ./$(DEPDIR)/debug.Plo ./$(DEPDIR)/event.Plo ./$(DEPDIR)/gc.Plo \ ./$(DEPDIR)/init.Plo ./$(DEPDIR)/mem_objects.Plo \ ./$(DEPDIR)/socl.Plo ./$(DEPDIR)/task.Plo ./$(DEPDIR)/util.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) DIST_SOURCES = $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac HEADERS = $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) $(STARPU_OPENCL_LDFLAGS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) CLEANFILES = *.gcno *.gcda AM_CPPFLAGS = -DBUILDING_SOCL -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_srcdir)/socl/src $(STARPU_H_CPPFLAGS) SUBDIRS = lib_LTLIBRARIES = libsocl-@STARPU_EFFECTIVE_VERSION@.la noinst_HEADERS = \ command.h \ command_list.h \ command_queue.h \ debug.h \ event.h \ gc.h \ getinfo.h \ mem_objects.h \ ocl_icd.h \ socl.h \ task.h \ util.h \ init.h \ CL/cl_d3d10.h \ CL/cl_ext.h \ CL/cl.h \ CL/cl_d3d11.h \ CL/cl_gl_ext.h \ CL/cl_platform.h \ CL/cl_dx9_media_sharing.h \ CL/cl_gl.h \ CL/opencl.h libsocl_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ -version-info $(LIBSOCL_INTERFACE_CURRENT):$(LIBSOCL_INTERFACE_REVISION):$(LIBSOCL_INTERFACE_AGE) libsocl_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ command.c \ command_list.c \ command_queue.c \ debug.c \ event.c \ gc.c \ init.c \ mem_objects.c \ socl.c \ task.c \ util.c \ cl_getplatformids.c \ cl_getplatforminfo.c \ cl_getdeviceids.c \ cl_getdeviceinfo.c \ cl_releasecontext.c \ cl_createcontext.c \ cl_createcontextfromtype.c \ cl_retaincontext.c \ cl_getcontextinfo.c \ cl_releasecommandqueue.c \ cl_createcommandqueue.c \ cl_retaincommandqueue.c \ cl_getcommandqueueinfo.c \ cl_setcommandqueueproperty.c \ cl_releaseevent.c \ cl_waitforevents.c \ cl_geteventinfo.c \ cl_retainevent.c \ cl_enqueuemarker.c \ cl_enqueuewaitforevents.c \ cl_enqueuebarrier.c \ cl_flush.c \ cl_finish.c \ cl_releasememobject.c \ cl_createbuffer.c \ cl_createimage2d.c \ cl_createimage3d.c \ cl_retainmemobject.c \ cl_getsupportedimageformats.c \ cl_getmemobjectinfo.c \ cl_getimageinfo.c \ cl_createsampler.c \ cl_retainsampler.c \ cl_releasesampler.c \ cl_getsamplerinfo.c \ cl_releaseprogram.c \ cl_createprogramwithsource.c \ cl_createprogramwithbinary.c \ cl_retainprogram.c \ cl_buildprogram.c \ cl_unloadcompiler.c \ cl_getprograminfo.c \ cl_getprogrambuildinfo.c \ cl_releasekernel.c \ cl_createkernel.c \ cl_createkernelsinprogram.c \ cl_retainkernel.c \ cl_setkernelarg.c \ cl_getkernelinfo.c \ cl_getkernelworkgroupinfo.c \ cl_enqueuereadbuffer.c \ cl_enqueuewritebuffer.c \ cl_enqueuecopybuffer.c \ cl_enqueuereadimage.c \ cl_enqueuewriteimage.c \ cl_enqueuecopyimage.c \ cl_enqueuecopyimagetobuffer.c \ cl_enqueuecopybuffertoimage.c \ cl_enqueuemapbuffer.c \ cl_enqueuemapimage.c \ cl_enqueueunmapmemobject.c \ cl_enqueuetask.c \ cl_enqueuendrangekernel.c \ cl_enqueuenativekernel.c \ cl_enqueuemarkerwithwaitlist.c \ cl_enqueuebarrierwithwaitlist.c \ cl_geteventprofilinginfo.c \ cl_getextensionfunctionaddress.c \ cl_icdgetplatformidskhr.c all: all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign socl/src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign socl/src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; \ locs=`for p in $$list; do echo $$p; done | \ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ sort -u`; \ test -z "$$locs" || { \ echo rm -f $${locs}; \ rm -f $${locs}; \ } libsocl-@STARPU_EFFECTIVE_VERSION@.la: $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libsocl_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(AM_V_CCLD)$(libsocl_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libsocl_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_buildprogram.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createbuffer.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createcommandqueue.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createcontext.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createcontextfromtype.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createimage2d.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createimage3d.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createkernel.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createkernelsinprogram.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createprogramwithbinary.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createprogramwithsource.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_createsampler.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuebarrier.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuebarrierwithwaitlist.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuecopybuffer.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuecopybuffertoimage.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuecopyimage.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuecopyimagetobuffer.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuemapbuffer.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuemapimage.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuemarker.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuemarkerwithwaitlist.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuenativekernel.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuendrangekernel.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuereadbuffer.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuereadimage.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuetask.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueueunmapmemobject.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuewaitforevents.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuewritebuffer.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_enqueuewriteimage.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_finish.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_flush.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getcommandqueueinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getcontextinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getdeviceids.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getdeviceinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_geteventinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_geteventprofilinginfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getextensionfunctionaddress.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getimageinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getkernelinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getkernelworkgroupinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getmemobjectinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getplatformids.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getplatforminfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getprogrambuildinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getprograminfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getsamplerinfo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_getsupportedimageformats.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_icdgetplatformidskhr.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasecommandqueue.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasecontext.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releaseevent.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasekernel.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasememobject.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releaseprogram.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_releasesampler.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retaincommandqueue.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retaincontext.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainevent.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainkernel.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainmemobject.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainprogram.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_retainsampler.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_setcommandqueueproperty.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_setkernelarg.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_unloadcompiler.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cl_waitforevents.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/command.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/command_list.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/command_queue.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/debug.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gc.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mem_objects.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/socl.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/cl_buildprogram.Plo -rm -f ./$(DEPDIR)/cl_createbuffer.Plo -rm -f ./$(DEPDIR)/cl_createcommandqueue.Plo -rm -f ./$(DEPDIR)/cl_createcontext.Plo -rm -f ./$(DEPDIR)/cl_createcontextfromtype.Plo -rm -f ./$(DEPDIR)/cl_createimage2d.Plo -rm -f ./$(DEPDIR)/cl_createimage3d.Plo -rm -f ./$(DEPDIR)/cl_createkernel.Plo -rm -f ./$(DEPDIR)/cl_createkernelsinprogram.Plo -rm -f ./$(DEPDIR)/cl_createprogramwithbinary.Plo -rm -f ./$(DEPDIR)/cl_createprogramwithsource.Plo -rm -f ./$(DEPDIR)/cl_createsampler.Plo -rm -f ./$(DEPDIR)/cl_enqueuebarrier.Plo -rm -f ./$(DEPDIR)/cl_enqueuebarrierwithwaitlist.Plo -rm -f ./$(DEPDIR)/cl_enqueuecopybuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuecopybuffertoimage.Plo -rm -f ./$(DEPDIR)/cl_enqueuecopyimage.Plo -rm -f ./$(DEPDIR)/cl_enqueuecopyimagetobuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuemapbuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuemapimage.Plo -rm -f ./$(DEPDIR)/cl_enqueuemarker.Plo -rm -f ./$(DEPDIR)/cl_enqueuemarkerwithwaitlist.Plo -rm -f ./$(DEPDIR)/cl_enqueuenativekernel.Plo -rm -f ./$(DEPDIR)/cl_enqueuendrangekernel.Plo -rm -f ./$(DEPDIR)/cl_enqueuereadbuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuereadimage.Plo -rm -f ./$(DEPDIR)/cl_enqueuetask.Plo -rm -f ./$(DEPDIR)/cl_enqueueunmapmemobject.Plo -rm -f ./$(DEPDIR)/cl_enqueuewaitforevents.Plo -rm -f ./$(DEPDIR)/cl_enqueuewritebuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuewriteimage.Plo -rm -f ./$(DEPDIR)/cl_finish.Plo -rm -f ./$(DEPDIR)/cl_flush.Plo -rm -f ./$(DEPDIR)/cl_getcommandqueueinfo.Plo -rm -f ./$(DEPDIR)/cl_getcontextinfo.Plo -rm -f ./$(DEPDIR)/cl_getdeviceids.Plo -rm -f ./$(DEPDIR)/cl_getdeviceinfo.Plo -rm -f ./$(DEPDIR)/cl_geteventinfo.Plo -rm -f ./$(DEPDIR)/cl_geteventprofilinginfo.Plo -rm -f ./$(DEPDIR)/cl_getextensionfunctionaddress.Plo -rm -f ./$(DEPDIR)/cl_getimageinfo.Plo -rm -f ./$(DEPDIR)/cl_getkernelinfo.Plo -rm -f ./$(DEPDIR)/cl_getkernelworkgroupinfo.Plo -rm -f ./$(DEPDIR)/cl_getmemobjectinfo.Plo -rm -f ./$(DEPDIR)/cl_getplatformids.Plo -rm -f ./$(DEPDIR)/cl_getplatforminfo.Plo -rm -f ./$(DEPDIR)/cl_getprogrambuildinfo.Plo -rm -f ./$(DEPDIR)/cl_getprograminfo.Plo -rm -f ./$(DEPDIR)/cl_getsamplerinfo.Plo -rm -f ./$(DEPDIR)/cl_getsupportedimageformats.Plo -rm -f ./$(DEPDIR)/cl_icdgetplatformidskhr.Plo -rm -f ./$(DEPDIR)/cl_releasecommandqueue.Plo -rm -f ./$(DEPDIR)/cl_releasecontext.Plo -rm -f ./$(DEPDIR)/cl_releaseevent.Plo -rm -f ./$(DEPDIR)/cl_releasekernel.Plo -rm -f ./$(DEPDIR)/cl_releasememobject.Plo -rm -f ./$(DEPDIR)/cl_releaseprogram.Plo -rm -f ./$(DEPDIR)/cl_releasesampler.Plo -rm -f ./$(DEPDIR)/cl_retaincommandqueue.Plo -rm -f ./$(DEPDIR)/cl_retaincontext.Plo -rm -f ./$(DEPDIR)/cl_retainevent.Plo -rm -f ./$(DEPDIR)/cl_retainkernel.Plo -rm -f ./$(DEPDIR)/cl_retainmemobject.Plo -rm -f ./$(DEPDIR)/cl_retainprogram.Plo -rm -f ./$(DEPDIR)/cl_retainsampler.Plo -rm -f ./$(DEPDIR)/cl_setcommandqueueproperty.Plo -rm -f ./$(DEPDIR)/cl_setkernelarg.Plo -rm -f ./$(DEPDIR)/cl_unloadcompiler.Plo -rm -f ./$(DEPDIR)/cl_waitforevents.Plo -rm -f ./$(DEPDIR)/command.Plo -rm -f ./$(DEPDIR)/command_list.Plo -rm -f ./$(DEPDIR)/command_queue.Plo -rm -f ./$(DEPDIR)/debug.Plo -rm -f ./$(DEPDIR)/event.Plo -rm -f ./$(DEPDIR)/gc.Plo -rm -f ./$(DEPDIR)/init.Plo -rm -f ./$(DEPDIR)/mem_objects.Plo -rm -f ./$(DEPDIR)/socl.Plo -rm -f ./$(DEPDIR)/task.Plo -rm -f ./$(DEPDIR)/util.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-libLTLIBRARIES install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/cl_buildprogram.Plo -rm -f ./$(DEPDIR)/cl_createbuffer.Plo -rm -f ./$(DEPDIR)/cl_createcommandqueue.Plo -rm -f ./$(DEPDIR)/cl_createcontext.Plo -rm -f ./$(DEPDIR)/cl_createcontextfromtype.Plo -rm -f ./$(DEPDIR)/cl_createimage2d.Plo -rm -f ./$(DEPDIR)/cl_createimage3d.Plo -rm -f ./$(DEPDIR)/cl_createkernel.Plo -rm -f ./$(DEPDIR)/cl_createkernelsinprogram.Plo -rm -f ./$(DEPDIR)/cl_createprogramwithbinary.Plo -rm -f ./$(DEPDIR)/cl_createprogramwithsource.Plo -rm -f ./$(DEPDIR)/cl_createsampler.Plo -rm -f ./$(DEPDIR)/cl_enqueuebarrier.Plo -rm -f ./$(DEPDIR)/cl_enqueuebarrierwithwaitlist.Plo -rm -f ./$(DEPDIR)/cl_enqueuecopybuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuecopybuffertoimage.Plo -rm -f ./$(DEPDIR)/cl_enqueuecopyimage.Plo -rm -f ./$(DEPDIR)/cl_enqueuecopyimagetobuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuemapbuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuemapimage.Plo -rm -f ./$(DEPDIR)/cl_enqueuemarker.Plo -rm -f ./$(DEPDIR)/cl_enqueuemarkerwithwaitlist.Plo -rm -f ./$(DEPDIR)/cl_enqueuenativekernel.Plo -rm -f ./$(DEPDIR)/cl_enqueuendrangekernel.Plo -rm -f ./$(DEPDIR)/cl_enqueuereadbuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuereadimage.Plo -rm -f ./$(DEPDIR)/cl_enqueuetask.Plo -rm -f ./$(DEPDIR)/cl_enqueueunmapmemobject.Plo -rm -f ./$(DEPDIR)/cl_enqueuewaitforevents.Plo -rm -f ./$(DEPDIR)/cl_enqueuewritebuffer.Plo -rm -f ./$(DEPDIR)/cl_enqueuewriteimage.Plo -rm -f ./$(DEPDIR)/cl_finish.Plo -rm -f ./$(DEPDIR)/cl_flush.Plo -rm -f ./$(DEPDIR)/cl_getcommandqueueinfo.Plo -rm -f ./$(DEPDIR)/cl_getcontextinfo.Plo -rm -f ./$(DEPDIR)/cl_getdeviceids.Plo -rm -f ./$(DEPDIR)/cl_getdeviceinfo.Plo -rm -f ./$(DEPDIR)/cl_geteventinfo.Plo -rm -f ./$(DEPDIR)/cl_geteventprofilinginfo.Plo -rm -f ./$(DEPDIR)/cl_getextensionfunctionaddress.Plo -rm -f ./$(DEPDIR)/cl_getimageinfo.Plo -rm -f ./$(DEPDIR)/cl_getkernelinfo.Plo -rm -f ./$(DEPDIR)/cl_getkernelworkgroupinfo.Plo -rm -f ./$(DEPDIR)/cl_getmemobjectinfo.Plo -rm -f ./$(DEPDIR)/cl_getplatformids.Plo -rm -f ./$(DEPDIR)/cl_getplatforminfo.Plo -rm -f ./$(DEPDIR)/cl_getprogrambuildinfo.Plo -rm -f ./$(DEPDIR)/cl_getprograminfo.Plo -rm -f ./$(DEPDIR)/cl_getsamplerinfo.Plo -rm -f ./$(DEPDIR)/cl_getsupportedimageformats.Plo -rm -f ./$(DEPDIR)/cl_icdgetplatformidskhr.Plo -rm -f ./$(DEPDIR)/cl_releasecommandqueue.Plo -rm -f ./$(DEPDIR)/cl_releasecontext.Plo -rm -f ./$(DEPDIR)/cl_releaseevent.Plo -rm -f ./$(DEPDIR)/cl_releasekernel.Plo -rm -f ./$(DEPDIR)/cl_releasememobject.Plo -rm -f ./$(DEPDIR)/cl_releaseprogram.Plo -rm -f ./$(DEPDIR)/cl_releasesampler.Plo -rm -f ./$(DEPDIR)/cl_retaincommandqueue.Plo -rm -f ./$(DEPDIR)/cl_retaincontext.Plo -rm -f ./$(DEPDIR)/cl_retainevent.Plo -rm -f ./$(DEPDIR)/cl_retainkernel.Plo -rm -f ./$(DEPDIR)/cl_retainmemobject.Plo -rm -f ./$(DEPDIR)/cl_retainprogram.Plo -rm -f ./$(DEPDIR)/cl_retainsampler.Plo -rm -f ./$(DEPDIR)/cl_setcommandqueueproperty.Plo -rm -f ./$(DEPDIR)/cl_setkernelarg.Plo -rm -f ./$(DEPDIR)/cl_unloadcompiler.Plo -rm -f ./$(DEPDIR)/cl_waitforevents.Plo -rm -f ./$(DEPDIR)/command.Plo -rm -f ./$(DEPDIR)/command_list.Plo -rm -f ./$(DEPDIR)/command_queue.Plo -rm -f ./$(DEPDIR)/debug.Plo -rm -f ./$(DEPDIR)/event.Plo -rm -f ./$(DEPDIR)/gc.Plo -rm -f ./$(DEPDIR)/init.Plo -rm -f ./$(DEPDIR)/mem_objects.Plo -rm -f ./$(DEPDIR)/socl.Plo -rm -f ./$(DEPDIR)/task.Plo -rm -f ./$(DEPDIR)/util.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-libLTLIBRARIES .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-libLTLIBRARIES install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ uninstall-libLTLIBRARIES .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/socl/src/cl_buildprogram.c000066400000000000000000000072531507764646700210570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" struct bp_data { cl_program program; char * options; const cl_device_id * device_list; cl_uint num_devices; }; static void soclBuildProgram_task(void *data) { struct bp_data *d = (struct bp_data*)data; cl_device_id device; cl_int err; unsigned int i; int wid = starpu_worker_get_id_check(); /* Check if the kernel has to be built for this device */ for (i=0; i <= d->num_devices; i++) { if (i == d->num_devices) return; if (d->device_list[i]->worker_id == wid) break; } int range = starpu_worker_get_range(); starpu_opencl_get_device(wid, &device); DEBUG_MSG("[Worker %d] Building program...\n", wid); cl_device_type dev_type; clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &dev_type, NULL); char * dev_type_str = (dev_type == CL_DEVICE_TYPE_CPU ? "CPU" : dev_type == CL_DEVICE_TYPE_GPU ? "GPU" : dev_type == CL_DEVICE_TYPE_ACCELERATOR ? "ACCELERATOR" : "UNKNOWN"); char opts[4096]; snprintf(opts, sizeof(opts), "-DSOCL_DEVICE_TYPE_%s %s", dev_type_str, (d->options != NULL ? d->options : "")); err = clBuildProgram(d->program->cl_programs[range], 1, &device, opts, NULL, NULL); if (err != CL_SUCCESS) { size_t len; clGetProgramBuildInfo(d->program->cl_programs[range], device, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); char * buffer = malloc(len+1); buffer[len] = '\0'; clGetProgramBuildInfo(d->program->cl_programs[range], device, CL_PROGRAM_BUILD_LOG, len, buffer, NULL); DEBUG_CL("clBuildProgram", err); ERROR_MSG("clBuildProgram: %s\n Aborting.\n", buffer); free(buffer); } DEBUG_MSG("[Worker %d] Done building.\n", wid); } CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclBuildProgram(cl_program program, cl_uint num_devices, const cl_device_id * device_list, const char * options, void (CL_CALLBACK *pfn_notify)(cl_program program, void * user_data), void * user_data) { struct bp_data *data; program->options = options != NULL ? strdup(options) : NULL; program->options_size = options != NULL ? strlen(options)+1 : 0; data = (struct bp_data*)malloc(sizeof(struct bp_data)); gc_entity_store(&data->program, program); data->options = (char*)options; /* If the device list is empty, we compile for every device in the context associated to the program */ if (device_list == NULL) { num_devices = program->context->num_devices; device_list = program->context->devices; } data->num_devices = num_devices; data->device_list = device_list; /*FIXME: starpu_execute_on_specific_workers is synchronous. * However pfn_notify is useful only because build is supposed to be asynchronous */ unsigned workers[num_devices]; unsigned i; for (i=0; iworker_id; } starpu_execute_on_specific_workers(soclBuildProgram_task, data, num_devices, workers, "SOCL_BUILD_PROGRAM"); if (pfn_notify != NULL) pfn_notify(program, user_data); gc_entity_unstore(&data->program); free(data); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_createbuffer.c000066400000000000000000000100741507764646700210200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" static void release_callback_memobject(void * e) { cl_mem mem = (cl_mem)e; /* Release references */ gc_entity_unstore(&mem->context); //Delete this mem_object from the mem_object list mem_object_release(mem); /* Destruct object */ starpu_data_unregister_submit(mem->handle); if (!(mem->flags & CL_MEM_USE_HOST_PTR)) free(mem->ptr); } /** * \brief Create a buffer * * A buffer has always an allocated region in host memory. If CL_MEM_USE_HOST_PTR * is set, we use memory pointed by host_ptr, otherwise some host memory is * allocated. * * If CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR are set, memory pointed by host_ptr * is not coherent. To enforce coherency, you have to map the buffer (clEnqueueMapBuffer). * * If CL_MEM_COPY_HOST_PTR is set, the buffer will be duplicated in host memory. You * should avoid it. * */ CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_mem CL_API_CALL soclCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void * host_ptr, cl_int * errcode_ret) { cl_mem mem; if (errcode_ret != NULL) *errcode_ret = CL_SUCCESS; //Check flags if (((flags & CL_MEM_READ_ONLY) && (flags & CL_MEM_WRITE_ONLY)) || ((flags & CL_MEM_READ_WRITE) && (flags & CL_MEM_READ_ONLY)) || ((flags & CL_MEM_READ_WRITE) && (flags & CL_MEM_WRITE_ONLY)) || ((flags & CL_MEM_USE_HOST_PTR) && (flags & CL_MEM_ALLOC_HOST_PTR)) || ((flags & CL_MEM_USE_HOST_PTR) && (flags & CL_MEM_COPY_HOST_PTR))) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_VALUE; return NULL; } if (size == 0) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_BUFFER_SIZE; return NULL; } if ((host_ptr == NULL && (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) || (host_ptr != NULL && !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_HOST_PTR; return NULL; } //Alloc cl_mem structure mem = (cl_mem)gc_entity_alloc(sizeof(struct _cl_mem), release_callback_memobject, "buffer"); if (mem == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_OUT_OF_HOST_MEMORY; return NULL; } mem->ptr = NULL; mem->map_count = 0; gc_entity_store(&mem->context, context); mem->flags = flags; mem->size = size; mem->host_ptr = host_ptr; #ifdef DEBUG static int id = 0; mem->id = id++; #endif mem_object_store(mem); //TODO: we shouldn't allocate the buffer ourselves. StarPU allocates it if a NULL pointer is given // If not MEM_USE_HOST_PTR, we need to alloc the buffer ourselves if (!(flags & CL_MEM_USE_HOST_PTR)) { mem->ptr = malloc(size); if (mem->ptr == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_MEM_OBJECT_ALLOCATION_FAILURE; free(mem); return NULL; } //The buffer doesn't contain meaningful data mem->scratch = 1; } else { //The buffer may contain meaningful data mem->scratch = 0; mem->ptr = host_ptr; } // Access mode mem->mode = (flags & CL_MEM_READ_ONLY) ? CL_MEM_READ_ONLY : (flags & CL_MEM_WRITE_ONLY) ? CL_MEM_WRITE_ONLY : CL_MEM_READ_WRITE; // Perform data copy if necessary if (flags & CL_MEM_COPY_HOST_PTR) memcpy(mem->ptr, host_ptr, size); // Create StarPU buffer (on home node? what's this?) starpu_variable_data_register(&mem->handle, STARPU_MAIN_RAM, (uintptr_t)mem->ptr, size); DEBUG_MSG("[Buffer %d] Initialized (cl_mem %p handle %p)\n", mem->id, mem, mem->handle); return mem; } starpu-1.4.9+dfsg/socl/src/cl_createcommandqueue.c000066400000000000000000000044521507764646700222350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" static void release_callback_command_queue(void * e) { cl_command_queue cq = (cl_command_queue)e; //Disable StarPU profiling if necessary if (cq->properties & CL_QUEUE_PROFILING_ENABLE) { profiling_queue_count -= 1; if (profiling_queue_count == 0) starpu_profiling_status_set(STARPU_PROFILING_DISABLE); } /* Release references */ gc_entity_unstore(&cq->context); /* Destruct object */ STARPU_PTHREAD_MUTEX_DESTROY(&cq->mutex); } CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_command_queue CL_API_CALL soclCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int * errcode_ret) { cl_command_queue cq; cq = (cl_command_queue)gc_entity_alloc(sizeof(struct _cl_command_queue), release_callback_command_queue, "command_queue"); if (cq == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_OUT_OF_HOST_MEMORY; return NULL; } cq->properties = properties; gc_entity_store(&cq->context, context); char * fd = getenv("SOCL_FORCE_DYNAMIC"); int force_dynamic = fd == NULL ? 0 : atoi(fd); cq->device = force_dynamic ? NULL : device; #ifdef DEBUG static int id = 0; cq->id = id++; #endif //Enable StarPU profiling if necessary if (properties & CL_QUEUE_PROFILING_ENABLE) { if (profiling_queue_count == 0) starpu_profiling_status_set(STARPU_PROFILING_ENABLE); profiling_queue_count += 1; } cq->commands = NULL; cq->barrier = NULL; STARPU_PTHREAD_MUTEX_INIT(&cq->mutex, NULL); if (errcode_ret != NULL) *errcode_ret = CL_SUCCESS; return cq; } starpu-1.4.9+dfsg/socl/src/cl_createcontext.c000066400000000000000000000074101507764646700212330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" static void release_callback_context(void * e) { cl_context context = (cl_context)e; /* Destruct object */ if (context->properties != NULL) free(context->properties); //FIXME: should we free StarPU contexts? //starpu_sched_ctx_finished_submit(context->sched_ctx); free(context->devices); } static char * defaultScheduler = "dmda"; static char * defaultName = "default"; CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_context CL_API_CALL soclCreateContext(const cl_context_properties * properties, cl_uint num_devices, const cl_device_id * devices, void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void * user_data, cl_int * errcode_ret) { if (pfn_notify == NULL && user_data != NULL) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_VALUE; return NULL; } //Check properties if (properties != NULL) { const cl_context_properties *p = properties; int i = 0; while (p[i] != 0) { switch (p[i]) { case CL_CONTEXT_PLATFORM: i++; if (p[i] != ((cl_context_properties)&socl_platform)) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_PLATFORM; return NULL; } break; case CL_CONTEXT_SCHEDULER_SOCL: case CL_CONTEXT_NAME_SOCL: i++; if (p[i] == 0) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_PROPERTY; return NULL; } break; } i++; } } cl_context ctx; ctx = (cl_context)gc_entity_alloc(sizeof(struct _cl_context), release_callback_context, "context"); if (ctx == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_OUT_OF_HOST_MEMORY; return NULL; } ctx->num_properties = 0; ctx->properties = NULL; char * sched = getenv("STARPU_SCHED"); char * scheduler = sched == NULL ? defaultScheduler : sched; char * name = defaultName; // Properties if (properties != NULL) { //Count properties const cl_context_properties * p = properties; do { ctx->num_properties++; p++; } while (*p != 0); //Copy properties ctx->properties = malloc(sizeof(cl_context_properties) * ctx->num_properties); memcpy(ctx->properties, properties, sizeof(cl_context_properties) * ctx->num_properties); //Selected scheduler cl_uint i = 0; for (i=0; inum_properties; i++) { if (p[i] == CL_CONTEXT_SCHEDULER_SOCL) { i++; scheduler = (char*)p[i]; } if (p[i] == CL_CONTEXT_NAME_SOCL) { i++; name = (char*)p[i]; } } } ctx->pfn_notify = pfn_notify; ctx->user_data = user_data; ctx->num_devices = num_devices; #ifdef DEBUG static int id = 0; ctx->id = id++; #endif ctx->devices = malloc(sizeof(cl_device_id) * num_devices); memcpy(ctx->devices, devices, sizeof(cl_device_id)*num_devices); // Create context int workers[num_devices]; unsigned int i; for (i=0; idevices[i]->worker_id; } ctx->sched_ctx = starpu_sched_ctx_create(workers, num_devices, name, STARPU_SCHED_CTX_POLICY_NAME, scheduler, 0); if (errcode_ret != NULL) *errcode_ret = CL_SUCCESS; return ctx; } starpu-1.4.9+dfsg/socl/src/cl_createcontextfromtype.c000066400000000000000000000027501507764646700230230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2012-2012 Vincent Danjean * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "init.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_context CL_API_CALL soclCreateContextFromType(const cl_context_properties * properties, cl_device_type device_type, void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void * user_data, cl_int * errcode_ret) { if (socl_init_starpu() < 0) return NULL; //TODO: appropriate error messages cl_uint num_devices; soclGetDeviceIDs(&socl_platform, device_type, 0, NULL, &num_devices); cl_device_id devices[num_devices]; soclGetDeviceIDs(&socl_platform, device_type, num_devices, devices, NULL); return soclCreateContext(properties, num_devices, devices, pfn_notify, user_data, errcode_ret); } starpu-1.4.9+dfsg/socl/src/cl_createimage2d.c000066400000000000000000000023271507764646700210610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_mem CL_API_CALL soclCreateImage2D(cl_context UNUSED(context), cl_mem_flags UNUSED(flags), const cl_image_format * UNUSED(image_format), size_t UNUSED(image_width), size_t UNUSED(image_height), size_t UNUSED(image_row_pitch), void * UNUSED(host_ptr), cl_int * errcode_ret) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_OPERATION; return NULL; } starpu-1.4.9+dfsg/socl/src/cl_createimage3d.c000066400000000000000000000024771507764646700210700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_mem CL_API_CALL soclCreateImage3D(cl_context UNUSED(context), cl_mem_flags UNUSED(flags), const cl_image_format * UNUSED(image_format), size_t UNUSED(image_width), size_t UNUSED(image_height), size_t UNUSED(image_depth), size_t UNUSED(image_row_pitch), size_t UNUSED(image_slice_pitch), void * UNUSED(host_ptr), cl_int * errcode_ret) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_OPERATION; return NULL; } starpu-1.4.9+dfsg/socl/src/cl_createkernel.c000066400000000000000000000125331507764646700210310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" static void soclCreateKernel_task(void *data) { struct _cl_kernel *k = (struct _cl_kernel *)data; int range = starpu_worker_get_range(); cl_int err; if (k->program->cl_programs[range] == NULL) { k->errcodes[range] = CL_SUCCESS; DEBUG_MSG("[Device %u] Kernel creation skipped: program has not been built for this device.\n", starpu_worker_get_id_check()); return; } DEBUG_MSG("[Device %u] Creating kernel...\n", starpu_worker_get_id_check()); k->cl_kernels[range] = clCreateKernel(k->program->cl_programs[range], k->kernel_name, &err); if (err != CL_SUCCESS) { k->errcodes[range] = err; ERROR_STOP("[Device %u] Unable to create kernel. Error %d. Aborting.\n", starpu_worker_get_id_check(), err); return; } /* One worker creates argument structures */ if (STARPU_BOOL_COMPARE_AND_SWAP(&k->num_args, 0, 666)) { unsigned int i; cl_uint num_args; err = clGetKernelInfo(k->cl_kernels[range], CL_KERNEL_NUM_ARGS, sizeof(num_args), &num_args, NULL); if (err != CL_SUCCESS) { DEBUG_CL("clGetKernelInfo", err); ERROR_STOP("Unable to get kernel argument count. Aborting.\n"); } k->num_args = num_args; DEBUG_MSG("Kernel has %u arguments\n", num_args); k->arg_size = (size_t*)malloc(sizeof(size_t) * num_args); k->arg_value = (void**)malloc(sizeof(void*) * num_args); k->arg_type = (enum kernel_arg_type*)malloc(sizeof(enum kernel_arg_type) * num_args); /* Settings default type to NULL */ for (i=0; iarg_value[i] = NULL; k->arg_type[i] = Null; } } } static void release_callback_kernel(void * e) { cl_kernel kernel = (cl_kernel)e; //Free args unsigned int i; for (i=0; inum_args; i++) { switch (kernel->arg_type[i]) { case Null: case Buffer: break; case Immediate: free(kernel->arg_value[i]); break; } } if (kernel->arg_size != NULL) free(kernel->arg_size); if (kernel->arg_value != NULL) free(kernel->arg_value); if (kernel->arg_type != NULL) free(kernel->arg_type); //Release real kernels... for (i=0; icl_kernels[i] != NULL) { cl_int err = clReleaseKernel(kernel->cl_kernels[i]); if (err != CL_SUCCESS) DEBUG_CL("clReleaseKernel", err); } } //Release perfmodel //FIXME: we cannot release performance models before StarPU shutdown as it //will use them to store kernel execution times //free(kernel->perfmodel); //free(kernel->kernel_name); gc_entity_unstore(&kernel->program); free(kernel->cl_kernels); free(kernel->errcodes); } CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_kernel CL_API_CALL soclCreateKernel(cl_program program, const char * kernel_name, cl_int * errcode_ret) { cl_kernel k; if (program == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_PROGRAM; return NULL; } //TODO: check programs (see opencl specs) /* Create Kernel structure */ k = (cl_kernel)gc_entity_alloc(sizeof(struct _cl_kernel), release_callback_kernel, "kernel"); if (k == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_OUT_OF_HOST_MEMORY; return NULL; } gc_entity_store(&k->program, program); k->kernel_name = strdup(kernel_name); k->perfmodel = malloc(sizeof(struct starpu_perfmodel)); memset(k->perfmodel, 0, sizeof(struct starpu_perfmodel)); k->perfmodel->type = STARPU_HISTORY_BASED; k->perfmodel->symbol = k->kernel_name; k->num_args = 0; k->arg_value = NULL; k->arg_size = NULL; k->split_func = NULL; k->split_space = 0; k->split_data = NULL; k->split_perfs = NULL; STARPU_PTHREAD_MUTEX_INIT(&k->split_lock, NULL); #ifdef DEBUG static int id = 0; k->id = id++; #endif k->cl_kernels = (cl_kernel*)malloc(socl_device_count * sizeof(cl_kernel)); k->errcodes = (cl_int*)malloc(socl_device_count * sizeof(cl_int)); { unsigned int i; for (i=0; icl_kernels[i] = NULL; k->errcodes[i] = -9999; } } /* Create kernel on each device */ DEBUG_MSG("[Kernel %d] Create %u kernels (name \"%s\")\n", k->id, socl_device_count, kernel_name); starpu_execute_on_each_worker_ex(soclCreateKernel_task, k, STARPU_OPENCL, "SOCL_CREATE_KERNEL"); if (errcode_ret != NULL) { unsigned int i; *errcode_ret = CL_SUCCESS; for (i=0; ierrcodes[i]) { #define CASE_RET(e) case e: *errcode_ret = e; return k CASE_RET(CL_INVALID_PROGRAM); CASE_RET(CL_INVALID_PROGRAM_EXECUTABLE); CASE_RET(CL_INVALID_KERNEL_NAME); CASE_RET(CL_INVALID_KERNEL_DEFINITION); CASE_RET(CL_INVALID_VALUE); CASE_RET(CL_OUT_OF_RESOURCES); CASE_RET(CL_OUT_OF_HOST_MEMORY); #undef CASE_RET } } if (k->num_args == 666) { *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE; return k; } } return k; } starpu-1.4.9+dfsg/socl/src/cl_createkernelsinprogram.c000066400000000000000000000017361507764646700231360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclCreateKernelsInProgram(cl_program UNUSED(program), cl_uint UNUSED(num_kernels), cl_kernel * UNUSED(kernels), cl_uint * UNUSED(num_kernels_ret)) { //TODO return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_createprogramwithbinary.c000066400000000000000000000023761507764646700233250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_program CL_API_CALL soclCreateProgramWithBinary(cl_context UNUSED(context), cl_uint UNUSED(num_devices), const cl_device_id * UNUSED(device_list), const size_t * UNUSED(lengths), const unsigned char ** UNUSED(binaries), cl_int * UNUSED(binary_status), cl_int * errcode_ret) { //TODO if (errcode_ret != NULL) *errcode_ret = CL_INVALID_OPERATION; return NULL; } starpu-1.4.9+dfsg/socl/src/cl_createprogramwithsource.c000066400000000000000000000077441507764646700233450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" struct cpws_data { struct _cl_program *program; cl_int *errcodes; cl_uint count; char **strings; size_t *lengths; }; static void soclCreateProgramWithSource_task(void *data) { struct cpws_data *d = (struct cpws_data*)data; cl_context context; int wid = starpu_worker_get_id_check(); DEBUG_MSG("Worker id: %d\n", wid); int range = starpu_worker_get_range(); starpu_opencl_get_context(wid, &context); d->program->cl_programs[range] = clCreateProgramWithSource(context, d->count, (const char**)d->strings, d->lengths, &d->errcodes[range]); } static void release_callback_program(void * e) { cl_program program = (cl_program)e; unsigned int i; for (i=0; icl_programs[i] != NULL) { cl_int err = clReleaseProgram(program->cl_programs[i]); if (err != CL_SUCCESS) DEBUG_CL("clReleaseProgram", err); } } /* Release references */ gc_entity_unstore(&program->context); free(program->cl_programs); if (program->options != NULL) free(program->options); } CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_program CL_API_CALL soclCreateProgramWithSource(cl_context context, cl_uint count, const char ** strings, const size_t * lengths, cl_int * errcode_ret) { cl_program p; struct cpws_data *data; unsigned int i; if (errcode_ret != NULL) *errcode_ret = CL_SUCCESS; /* Check arguments */ if (count == 0 || strings == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_VALUE; return NULL; } /* Alloc cl_program structure */ p = (cl_program)gc_entity_alloc(sizeof(struct _cl_program), release_callback_program, "program"); if (p == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_OUT_OF_HOST_MEMORY; return NULL; } gc_entity_store(&p->context, context); p->options = NULL; #ifdef DEBUG static int id = 0; p->id = id++; #endif p->cl_programs = (cl_program*)malloc(sizeof(cl_program) * socl_device_count); if (p->cl_programs == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_OUT_OF_HOST_MEMORY; return NULL; } { for (i=0; icl_programs[i] = NULL; } /* Construct structure to pass arguments to workers */ data = (struct cpws_data*)malloc(sizeof(struct cpws_data)); if (data == NULL) { if (errcode_ret != NULL) *errcode_ret = CL_OUT_OF_HOST_MEMORY; free(p->cl_programs); return NULL; } data->count = count; data->program = p; data->strings = (char**)strings; data->lengths = (size_t*)lengths; data->errcodes = (cl_int*)malloc(sizeof(cl_int) * socl_device_count); for (i=0; ierrcodes[i] = CL_SUCCESS; } /* Init real cl_program for each OpenCL device */ unsigned workers[context->num_devices]; for (i=0; inum_devices; i++) { workers[i] = context->devices[i]->worker_id; } starpu_execute_on_specific_workers(soclCreateProgramWithSource_task, data, context->num_devices, workers, "SOCL_CREATE_PROGRAM"); if (errcode_ret != NULL) { *errcode_ret = CL_SUCCESS; for (i=0; ierrcodes[i] != CL_SUCCESS) { DEBUG_MSG("Worker [%u] failed\n", i); DEBUG_CL("clCreateProgramWithSource", data->errcodes[i]); *errcode_ret = data->errcodes[i]; break; } } } free(data->errcodes); free(data); return p; } starpu-1.4.9+dfsg/socl/src/cl_createsampler.c000066400000000000000000000021011507764646700212020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_sampler CL_API_CALL soclCreateSampler(cl_context UNUSED(context), cl_bool UNUSED(normalized_coords), cl_addressing_mode UNUSED(addressing_mode), cl_filter_mode UNUSED(filter_mode), cl_int * errcode_ret) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_OPERATION; return NULL; } starpu-1.4.9+dfsg/socl/src/cl_enqueuebarrier.c000066400000000000000000000020671507764646700214040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueBarrier(cl_command_queue cq) { command_barrier cmd = command_barrier_create(); command_queue_enqueue(cq, cmd, 0, NULL); return CL_SUCCESS; } cl_int command_barrier_submit(command_barrier cmd) { struct starpu_task *task; task = task_create(CL_COMMAND_BARRIER); return task_submit(task, cmd); } starpu-1.4.9+dfsg/socl/src/cl_enqueuebarrierwithwaitlist.c000066400000000000000000000021331507764646700240530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_2 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueBarrierWithWaitList(cl_command_queue cq, cl_uint num_events, const cl_event * events, cl_event * event) { command_barrier cmd = command_barrier_create(); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); RETURN_EVENT(ev, event); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_enqueuecopybuffer.c000066400000000000000000000066111507764646700221210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" static void soclEnqueueCopyBuffer_opencl_task(void *descr[], void *args) { int wid; cl_command_queue cq; cl_event ev; command_copy_buffer cmd = (command_copy_buffer)args; cl_event event = command_event_get(cmd); event->prof_start = _socl_nanotime(); gc_entity_release(event); wid = starpu_worker_get_id_check(); starpu_opencl_get_queue(wid, &cq); cl_mem src = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); cl_mem dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[1]); clEnqueueCopyBuffer(cq, src,dst, cmd->src_offset, cmd->dst_offset, cmd->cb, 0, NULL, &ev); clWaitForEvents(1, &ev); clReleaseEvent(ev); gc_entity_release_cmd(cmd); } static void soclEnqueueCopyBuffer_cpu_task(void *descr[], void *args) { command_copy_buffer cmd = (command_copy_buffer)args; cl_event ev = command_event_get(cmd); ev->prof_start = _socl_nanotime(); gc_entity_release(ev); char * src = (void*)STARPU_VARIABLE_GET_PTR(descr[0]); char * dst = (void*)STARPU_VARIABLE_GET_PTR(descr[1]); memcpy(dst+cmd->dst_offset, src+cmd->src_offset, cmd->cb); gc_entity_release_cmd(cmd); } static struct starpu_perfmodel copy_buffer_perfmodel = { .type = STARPU_HISTORY_BASED, .symbol = "SOCL_COPY_BUFFER" }; static struct starpu_codelet codelet_copybuffer = { .where = STARPU_CPU | STARPU_OPENCL, .model = ©_buffer_perfmodel, .cpu_funcs = { &soclEnqueueCopyBuffer_cpu_task }, .opencl_funcs = { &soclEnqueueCopyBuffer_opencl_task }, .modes = {STARPU_R, STARPU_RW}, .nbuffers = 2 }; cl_int command_copy_buffer_submit(command_copy_buffer cmd) { struct starpu_task * task = task_create(CL_COMMAND_COPY_BUFFER); task->handles[0] = cmd->src_buffer->handle; task->handles[1] = cmd->dst_buffer->handle; task->cl = &codelet_copybuffer; /* Execute the task on a specific worker? */ if (cmd->_command.event->cq->device != NULL) { task->execute_on_a_specific_worker = 1; task->workerid = cmd->_command.event->cq->device->worker_id; } gc_entity_store_cmd(&task->cl_arg, cmd); task->cl_arg_size = sizeof(*cmd); cmd->dst_buffer->scratch = 0; task_submit(task, cmd); return CL_SUCCESS; } CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueCopyBuffer(cl_command_queue cq, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events, const cl_event * events, cl_event * event) { command_copy_buffer cmd = command_copy_buffer_create(src_buffer, dst_buffer, src_offset, dst_offset, cb); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); RETURN_EVENT(ev, event); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_enqueuecopybuffertoimage.c000066400000000000000000000023241507764646700234640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueCopyBufferToImage(cl_command_queue UNUSED(command_queue), cl_mem UNUSED(src_buffer), cl_mem UNUSED(dst_image), size_t UNUSED(src_offset), const size_t * UNUSED(dst_origin), const size_t * UNUSED(region), cl_uint UNUSED(num_events_in_wait_list), const cl_event * UNUSED(event_wait_list), cl_event * UNUSED(event)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_enqueuecopyimage.c000066400000000000000000000023451507764646700217320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueCopyImage(cl_command_queue UNUSED(command_queue), cl_mem UNUSED(src_image), cl_mem UNUSED(dst_image), const size_t * UNUSED(src_origin), const size_t * UNUSED(dst_origin), const size_t * UNUSED(region), cl_uint UNUSED(num_events_in_wait_list), const cl_event * UNUSED(event_wait_list), cl_event * UNUSED(event)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_enqueuecopyimagetobuffer.c000066400000000000000000000023241507764646700234640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueCopyImageToBuffer(cl_command_queue UNUSED(command_queue), cl_mem UNUSED(src_image), cl_mem UNUSED(dst_buffer), const size_t * UNUSED(src_origin), const size_t * UNUSED(region), size_t UNUSED(dst_offset), cl_uint UNUSED(num_events_in_wait_list), const cl_event * UNUSED(event_wait_list), cl_event * UNUSED(event)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_enqueuemapbuffer.c000066400000000000000000000041041507764646700217170ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" static void mapbuffer_task(void *args) { command_map_buffer cmd = (command_map_buffer)args; cl_event ev = command_event_get(cmd); ev->prof_start = _socl_nanotime(); gc_entity_release(ev); enum starpu_data_access_mode mode = (cmd->map_flags == CL_MAP_READ ? STARPU_R : STARPU_RW); starpu_data_acquire_cb(cmd->buffer->handle, mode, command_completed_task_callback, cmd); } static struct starpu_codelet codelet_mapbuffer = { .name = "SOCL_MAP_BUFFER" }; cl_int command_map_buffer_submit(command_map_buffer cmd) { gc_entity_retain(cmd); cpu_task_submit(cmd, mapbuffer_task, cmd, 0, 0, &codelet_mapbuffer, 0, NULL); return CL_SUCCESS; } CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY void * CL_API_CALL soclEnqueueMapBuffer(cl_command_queue cq, cl_mem buffer, cl_bool blocking, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events, const cl_event * events, cl_event * event, cl_int * errcode_ret) { command_map_buffer cmd = command_map_buffer_create(buffer, map_flags, offset, cb); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); if (errcode_ret != NULL) *errcode_ret = CL_SUCCESS; MAY_BLOCK_THEN_RETURN_EVENT(ev,blocking,event); return (void*)(starpu_variable_get_local_ptr(buffer->handle) + offset); } starpu-1.4.9+dfsg/socl/src/cl_enqueuemapimage.c000066400000000000000000000025701507764646700215350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY void * CL_API_CALL soclEnqueueMapImage(cl_command_queue UNUSED(command_queue), cl_mem UNUSED(image), cl_bool UNUSED(blocking_map), cl_map_flags UNUSED(map_flags), const size_t * UNUSED(origin), const size_t * UNUSED(region), size_t * UNUSED(image_row_pitch), size_t * UNUSED(image_slice_pitch), cl_uint UNUSED(num_events_in_wait_list), const cl_event * UNUSED(event_wait_list), cl_event * UNUSED(event), cl_int * errcode_ret) { if (errcode_ret != NULL) *errcode_ret = CL_INVALID_OPERATION; return NULL; } starpu-1.4.9+dfsg/socl/src/cl_enqueuemarker.c000066400000000000000000000023041507764646700212310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueMarker(cl_command_queue cq, cl_event * event) { if (event == NULL) return CL_INVALID_VALUE; command_marker cmd = command_marker_create(); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, 0, NULL); RETURN_EVENT(ev, event); return CL_SUCCESS; } cl_int command_marker_submit(command_marker cmd) { struct starpu_task *task; task = task_create(CL_COMMAND_MARKER); return task_submit(task, cmd); } starpu-1.4.9+dfsg/socl/src/cl_enqueuemarkerwithwaitlist.c000066400000000000000000000022631507764646700237120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_2 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueMarkerWithWaitList(cl_command_queue cq, cl_uint num_events, const cl_event * events, cl_event * event) { if (events == NULL) return soclEnqueueBarrierWithWaitList(cq, num_events, events, event); command_marker cmd = command_marker_create(); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); RETURN_EVENT(ev, event); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_enqueuenativekernel.c000066400000000000000000000023471507764646700224460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueNativeKernel(cl_command_queue UNUSED(command_queue), __attribute__((unused)) void (*user_func)(void *), void * UNUSED(args), size_t UNUSED(cb_args), cl_uint UNUSED(num_mem_objects), const cl_mem * UNUSED(mem_list), const void ** UNUSED(args_mem_loc), cl_uint UNUSED(num_events_in_wait_list), const cl_event * UNUSED(event_wait_list), cl_event * UNUSED(event)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_enqueuendrangekernel.c000066400000000000000000000152441507764646700225760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "event.h" void soclEnqueueNDRangeKernel_task(void *descr[], void *args) { command_ndrange_kernel cmd = (command_ndrange_kernel)args; cl_command_queue cq; int wid; cl_int err; cl_event ev = command_event_get(cmd); ev->prof_start = _socl_nanotime(); gc_entity_release(ev); wid = starpu_worker_get_id_check(); starpu_opencl_get_queue(wid, &cq); DEBUG_MSG("[worker %d] [kernel %d] Executing kernel...\n", wid, cmd->kernel->id); int range = starpu_worker_get_range(); /* Set arguments */ { unsigned int i; int buf = 0; for (i=0; inum_args; i++) { switch (cmd->arg_types[i]) { case Null: err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], NULL); break; case Buffer: { cl_mem mem; mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[buf]); err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], &mem); buf++; } break; case Immediate: err = clSetKernelArg(cmd->kernel->cl_kernels[range], i, cmd->arg_sizes[i], cmd->args[i]); break; } if (err != CL_SUCCESS) { DEBUG_CL("clSetKernelArg", err); DEBUG_ERROR("Aborting\n"); } } } /* Calling Kernel */ cl_event event; err = clEnqueueNDRangeKernel(cq, cmd->kernel->cl_kernels[range], cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size, 0, NULL, &event); if (err != CL_SUCCESS) { ERROR_MSG("Worker[%d] Unable to Enqueue kernel (error %d)\n", wid, err); DEBUG_CL("clEnqueueNDRangeKernel", err); DEBUG_MSG("Workdim %u, global_work_offset %p, global_work_size %p, local_work_size %p\n", cmd->work_dim, cmd->global_work_offset, cmd->global_work_size, cmd->local_work_size); DEBUG_MSG("Global work size: %ld %ld %ld\n", (long)cmd->global_work_size[0], (long)(cmd->work_dim > 1 ? cmd->global_work_size[1] : 1), (long)(cmd->work_dim > 2 ? cmd->global_work_size[2] : 1)); if (cmd->local_work_size != NULL) DEBUG_MSG("Local work size: %ld %ld %ld\n", (long)cmd->local_work_size[0], (long)(cmd->work_dim > 1 ? cmd->local_work_size[1] : 1), (long)(cmd->work_dim > 2 ? cmd->local_work_size[2] : 1)); } else { /* Waiting for kernel to terminate */ clWaitForEvents(1, &event); clReleaseEvent(event); } } /** * Real kernel enqueuing command */ cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd) { starpu_task task = task_create(CL_COMMAND_NDRANGE_KERNEL); task->cl = &cmd->codelet; task->cl->model = cmd->kernel->perfmodel; task->cl_arg = cmd; task->cl_arg_size = sizeof(cmd); /* Execute the task on a specific worker? */ if (cmd->_command.event->cq->device != NULL) { task->execute_on_a_specific_worker = 1; task->workerid = cmd->_command.event->cq->device->worker_id; } struct starpu_codelet * codelet = task->cl; /* We need to detect which parameters are OpenCL's memory objects and * we retrieve their corresponding StarPU buffers */ cmd->num_buffers = 0; cmd->buffers = malloc(sizeof(cl_mem) * cmd->num_args); unsigned int i; for (i=0; inum_args; i++) { if (cmd->arg_types[i] == Buffer) { cl_mem buf = *(cl_mem*)cmd->args[i]; gc_entity_store(&cmd->buffers[cmd->num_buffers], buf); task->handles[cmd->num_buffers] = buf->handle; /* Determine best StarPU buffer access mode */ int mode; if (buf->mode == CL_MEM_READ_ONLY) mode = STARPU_R; else if (buf->mode == CL_MEM_WRITE_ONLY) { mode = STARPU_W; buf->scratch = 0; } else if (buf->scratch) { //RW but never accessed in RW or W mode mode = STARPU_W; buf->scratch = 0; } else { mode = STARPU_RW; buf->scratch = 0; } codelet->modes[cmd->num_buffers] = mode; cmd->num_buffers += 1; } } codelet->nbuffers = cmd->num_buffers; task_submit(task, cmd); return CL_SUCCESS; } CL_API_SUFFIX__VERSION_1_1 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueNDRangeKernel(cl_command_queue cq, cl_kernel kernel, cl_uint work_dim, const size_t * global_work_offset, const size_t * global_work_size, const size_t * local_work_size, cl_uint num_events, const cl_event * events, cl_event * event) { if (kernel->split_func != NULL && !STARPU_PTHREAD_MUTEX_TRYLOCK(&kernel->split_lock)) { cl_event beforeEvent, afterEvent, totalEvent; totalEvent = event_create(); gc_entity_store(&totalEvent->cq, cq); command_marker cmd = command_marker_create(); beforeEvent = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); cl_uint iter = 1; cl_uint split_min = CL_UINT_MAX; cl_uint split_min_iter = 1; while (iter < kernel->split_space && kernel->split_perfs[iter] != 0) { if (kernel->split_perfs[iter] < split_min) { split_min = kernel->split_perfs[iter]; split_min_iter = iter; } iter++; } if (iter == kernel->split_space) { iter = split_min_iter; } cl_int ret = kernel->split_func(cq, iter, kernel->split_data, beforeEvent, &afterEvent); if (ret == CL_SUCCESS) { //FIXME: blocking call soclWaitForEvents(1, &afterEvent); /* Store perf */ cl_ulong start,end; soclGetEventProfilingInfo(beforeEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &start, NULL); soclGetEventProfilingInfo(afterEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL); soclReleaseEvent(afterEvent); kernel->split_perfs[iter] = end-start; STARPU_PTHREAD_MUTEX_UNLOCK(&kernel->split_lock); event_complete(totalEvent); totalEvent->prof_start = start; totalEvent->prof_submit = start; totalEvent->prof_queued = start; totalEvent->prof_end = end; RETURN_EVENT(totalEvent,event); } else { STARPU_PTHREAD_MUTEX_UNLOCK(&kernel->split_lock); soclReleaseEvent(totalEvent); } return ret; } else { command_ndrange_kernel cmd = command_ndrange_kernel_create(kernel, work_dim, global_work_offset, global_work_size, local_work_size); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); RETURN_EVENT(ev, event); } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_enqueuereadbuffer.c000066400000000000000000000072311507764646700220610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" static void soclEnqueueReadBuffer_cpu_task(void *descr[], void *args) { command_read_buffer cmd = (command_read_buffer)args; cl_event ev = command_event_get(cmd); ev->prof_start = _socl_nanotime(); gc_entity_release(ev); char * ptr = (void*)STARPU_VARIABLE_GET_PTR(descr[0]); DEBUG_MSG("[Buffer %d] Reading %ld bytes from %p to %p\n", cmd->buffer->id, (long)cmd->cb, ptr+cmd->offset, cmd->ptr); //This fix is for people who use USE_HOST_PTR and still use ReadBuffer to sync the buffer in host mem at host_ptr. //They should use buffer mapping facilities instead. if (ptr+cmd->offset != cmd->ptr) memcpy(cmd->ptr, ptr+cmd->offset, cmd->cb); gc_entity_release_cmd(cmd); } static void soclEnqueueReadBuffer_opencl_task(void *descr[], void *args) { command_read_buffer cmd = (command_read_buffer)args; cl_event event = command_event_get(cmd); event->prof_start = _socl_nanotime(); gc_entity_release(event); cl_mem mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); DEBUG_MSG("[Buffer %d] Reading %ld bytes from offset %ld into %p\n", cmd->buffer->id, (long)cmd->cb, (long)cmd->offset, cmd->ptr); int wid = starpu_worker_get_id_check(); cl_command_queue cq; starpu_opencl_get_queue(wid, &cq); cl_event ev; cl_int ret = clEnqueueReadBuffer(cq, mem, CL_TRUE, cmd->offset, cmd->cb, cmd->ptr, 0, NULL, &ev); if (ret != CL_SUCCESS) ERROR_CL("clEnqueueReadBuffer", ret); clWaitForEvents(1, &ev); clReleaseEvent(ev); gc_entity_release_cmd(cmd); } static struct starpu_perfmodel read_buffer_perfmodel = { .type = STARPU_HISTORY_BASED, .symbol = "SOCL_READ_BUFFER" }; static struct starpu_codelet codelet_readbuffer = { .where = STARPU_OPENCL, .model = &read_buffer_perfmodel, .cpu_funcs = { &soclEnqueueReadBuffer_cpu_task }, .opencl_funcs = { &soclEnqueueReadBuffer_opencl_task }, .modes = {STARPU_R}, .nbuffers = 1 }; cl_int command_read_buffer_submit(command_read_buffer cmd) { struct starpu_task * task = task_create(CL_COMMAND_READ_BUFFER); task->handles[0] = cmd->buffer->handle; task->cl = &codelet_readbuffer; /* Execute the task on a specific worker? */ if (cmd->_command.event->cq->device != NULL) { task->execute_on_a_specific_worker = 1; task->workerid = cmd->_command.event->cq->device->worker_id; } gc_entity_store_cmd(&task->cl_arg, cmd); task->cl_arg_size = sizeof(*cmd); task_submit(task, cmd); return CL_SUCCESS; } CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueReadBuffer(cl_command_queue cq, cl_mem buffer, cl_bool blocking, size_t offset, size_t cb, void * ptr, cl_uint num_events, const cl_event * events, cl_event * event) { command_read_buffer cmd = command_read_buffer_create(buffer, offset, cb, ptr); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); MAY_BLOCK_THEN_RETURN_EVENT(ev, blocking, event); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_enqueuereadimage.c000066400000000000000000000024721507764646700216740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueReadImage(cl_command_queue UNUSED(command_queue), cl_mem UNUSED(image), cl_bool UNUSED(blocking_read), const size_t * UNUSED(origin), const size_t * UNUSED(region), size_t UNUSED(row_pitch), size_t UNUSED(slice_pitch), void * UNUSED(ptr), cl_uint UNUSED(num_events_in_wait_list), const cl_event * UNUSED(event_wait_list), cl_event * UNUSED(event)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_enqueuetask.c000066400000000000000000000021421507764646700207120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueTask(cl_command_queue cq, cl_kernel kernel, cl_uint num_events, const cl_event * events, cl_event * event) { command_ndrange_kernel cmd = command_task_create(kernel); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); RETURN_EVENT(ev, event); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_enqueueunmapmemobject.c000066400000000000000000000027521507764646700227650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" cl_int command_unmap_mem_object_submit(command_unmap_mem_object cmd) { /* Aliases */ cl_mem buffer = cmd->buffer; static struct starpu_codelet codelet = { .name = "SOCL_UNMAP_MEM_OBJECT" }; cpu_task_submit(cmd, (void(*)(void*))starpu_data_release, buffer->handle, 0, 1, &codelet, 0, NULL); return CL_SUCCESS; } CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueUnmapMemObject(cl_command_queue cq, cl_mem buffer, void * ptr, cl_uint num_events, const cl_event * events, cl_event * event) { command_unmap_mem_object cmd = command_unmap_mem_object_create(buffer, ptr); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); RETURN_EVENT(ev, event); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_enqueuewaitforevents.c000066400000000000000000000017471507764646700226620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueWaitForEvents(cl_command_queue cq, cl_uint num_events, const cl_event * events) { command_marker cmd = command_marker_create(); command_queue_enqueue(cq, cmd, num_events, events); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_enqueuewritebuffer.c000066400000000000000000000106641507764646700223040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" static void soclEnqueueWriteBuffer_cpu_task(void *descr[], void *args) { command_write_buffer cmd = (command_write_buffer)args; cl_event ev = command_event_get(cmd); ev->prof_start = _socl_nanotime(); gc_entity_release(ev); char * ptr = (void*)STARPU_VARIABLE_GET_PTR(descr[0]); DEBUG_MSG("[Buffer %d] Writing %ld bytes from %p to %p\n", cmd->buffer->id, (long)cmd->cb, cmd->ptr, ptr+cmd->offset); //FIXME: Fix for people who use USE_HOST_PTR, modify data at host_ptr and use WriteBuffer to commit the change. // StarPU may have erased host mem at host_ptr (for instance by retrieving current buffer data at host_ptr) // Buffer mapping facilities should be used instead // Maybe we should report the bug here... for now, we just avoid memcpy crash due to overlapping regions... if (ptr+cmd->offset != cmd->ptr) memcpy(ptr+cmd->offset, cmd->ptr, cmd->cb); gc_entity_release_cmd(cmd); } static void soclEnqueueWriteBuffer_opencl_task(void *descr[], void *args) { command_write_buffer cmd = (command_write_buffer)args; cl_event event = command_event_get(cmd); event->prof_start = _socl_nanotime(); gc_entity_release(event); cl_mem mem = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); DEBUG_MSG("[Buffer %d] Writing %ld bytes to offset %ld from %p\n", cmd->buffer->id, (long)cmd->cb, (long)cmd->offset, cmd->ptr); int wid = starpu_worker_get_id_check(); cl_command_queue cq; starpu_opencl_get_queue(wid, &cq); cl_event ev; cl_int err = clEnqueueWriteBuffer(cq, mem, CL_TRUE, cmd->offset, cmd->cb, cmd->ptr, 0, NULL, &ev); if (err != CL_SUCCESS) ERROR_CL("clEnqueueWriteBuffer", err); clWaitForEvents(1, &ev); clReleaseEvent(ev); gc_entity_release_cmd(cmd); } static struct starpu_perfmodel write_buffer_perfmodel = { .type = STARPU_HISTORY_BASED, .symbol = "SOCL_WRITE_BUFFER" }; static struct starpu_codelet codelet_writebuffer = { .where = STARPU_OPENCL, .model = &write_buffer_perfmodel, .cpu_funcs = { &soclEnqueueWriteBuffer_cpu_task }, .opencl_funcs = { &soclEnqueueWriteBuffer_opencl_task }, .modes = {STARPU_W}, .nbuffers = 1 }; static struct starpu_codelet codelet_writebuffer_partial = { .where = STARPU_OPENCL, .model = &write_buffer_perfmodel, .cpu_funcs = { &soclEnqueueWriteBuffer_cpu_task }, .opencl_funcs = { &soclEnqueueWriteBuffer_opencl_task }, .modes = {STARPU_RW}, .nbuffers = 1 }; cl_int command_write_buffer_submit(command_write_buffer cmd) { /* Aliases */ cl_mem buffer = cmd->buffer; size_t cb = cmd->cb; struct starpu_task *task; task = task_create(CL_COMMAND_WRITE_BUFFER); task->handles[0] = buffer->handle; //If only a subpart of the buffer is written, RW access mode is required if (cb != buffer->size) task->cl = &codelet_writebuffer_partial; else task->cl = &codelet_writebuffer; gc_entity_store_cmd(&task->cl_arg, cmd); task->cl_arg_size = sizeof(*cmd); /* Execute the task on a specific worker? */ if (cmd->_command.event->cq->device != NULL) { task->execute_on_a_specific_worker = 1; task->workerid = cmd->_command.event->cq->device->worker_id; } //The buffer now contains meaningful data cmd->buffer->scratch = 0; task_submit(task, cmd); return CL_SUCCESS; } CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueWriteBuffer(cl_command_queue cq, cl_mem buffer, cl_bool blocking, size_t offset, size_t cb, const void * ptr, cl_uint num_events, const cl_event * events, cl_event * event) { command_write_buffer cmd = command_write_buffer_create(buffer, offset, cb, ptr); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, num_events, events); MAY_BLOCK_THEN_RETURN_EVENT(ev, blocking, event); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_enqueuewriteimage.c000066400000000000000000000025071507764646700221120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclEnqueueWriteImage(cl_command_queue UNUSED(command_queue), cl_mem UNUSED(image), cl_bool UNUSED(blocking_write), const size_t * UNUSED(origin), const size_t * UNUSED(region), size_t UNUSED(input_row_pitch), size_t UNUSED(input_slice_pitch), const void * UNUSED(ptr), cl_uint UNUSED(num_events_in_wait_list), const cl_event * UNUSED(event_wait_list), cl_event * UNUSED(event)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_finish.c000066400000000000000000000017671507764646700176540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclFinish(cl_command_queue cq) { command_barrier cmd = command_barrier_create(); cl_event ev = command_event_get(cmd); command_queue_enqueue(cq, cmd, 0, NULL); MAY_BLOCK_THEN_RETURN_EVENT(ev, CL_TRUE, (cl_event*)NULL); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_flush.c000066400000000000000000000015071507764646700175050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclFlush(cl_command_queue UNUSED(command_queue)) { return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getcommandqueueinfo.c000066400000000000000000000024661507764646700224300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetCommandQueueInfo(cl_command_queue cq, cl_command_queue_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (cq == NULL) return CL_INVALID_COMMAND_QUEUE; switch (param_name) { INFO_CASE(CL_QUEUE_CONTEXT, cq->context); INFO_CASE(CL_QUEUE_DEVICE, cq->device); INFO_CASE(CL_QUEUE_REFERENCE_COUNT, cq->_entity.refs); INFO_CASE(CL_QUEUE_PROPERTIES, cq->properties); default: return CL_INVALID_VALUE; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getcontextinfo.c000066400000000000000000000026051507764646700214240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetContextInfo(cl_context context, cl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (context == NULL) return CL_INVALID_CONTEXT; switch (param_name) { INFO_CASE(CL_CONTEXT_REFERENCE_COUNT, context->_entity.refs); INFO_CASE_EX(CL_CONTEXT_DEVICES, context->devices, context->num_devices * sizeof(cl_device_id)); INFO_CASE_EX(CL_CONTEXT_PROPERTIES, context->properties, context->num_properties * sizeof(cl_context_properties)); default: return CL_INVALID_VALUE; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getdeviceids.c000066400000000000000000000051041507764646700210200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2012-2012 Vincent Danjean * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "init.h" /** * \brief Return one device of each kind * * \param[in] platform Must be StarPU platform ID or NULL */ CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) { if (socl_init_starpu() < 0) { *num_devices = 0; return CL_SUCCESS; } if (_starpu_init_failed) { *num_devices = 0; return CL_SUCCESS; } if (platform != NULL && platform != &socl_platform) return CL_INVALID_PLATFORM; if ((devices != NULL && num_entries == 0) || (devices == NULL && num_devices == NULL)) return CL_INVALID_VALUE; if (!(device_type & (CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT)) && (device_type != CL_DEVICE_TYPE_ALL)) return CL_INVALID_DEVICE_TYPE; int ndevs = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); int workers[ndevs]; starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, workers, ndevs); if (socl_devices == NULL) { socl_device_count = ndevs; socl_devices = malloc(sizeof(struct _cl_device_id) * ndevs); int i; for (i=0; i < ndevs; i++) { int devid = starpu_worker_get_devid(workers[i]); socl_devices[i].dispatch = &socl_master_dispatch; socl_devices[i].worker_id = workers[i]; socl_devices[i].device_id = devid; } } int i; unsigned int num = 0; for (i=0; i < ndevs; i++) { int devid = socl_devices[i].device_id; cl_device_id dev; starpu_opencl_get_device(devid, &dev); cl_device_type typ; clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof(typ), &typ, NULL); if (typ & device_type) { if (devices != NULL && num < num_entries) devices[num] = &socl_devices[i]; num++; } } if (num_devices != NULL) *num_devices = num; return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getdeviceinfo.c000066400000000000000000000030621507764646700211750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { //FIXME: we do not check if the device is valid /* if (device != &socl_virtual_device && device is not a valid StarPU worker identifier) return CL_INVALID_DEVICE;*/ int devid = device->device_id; cl_device_id dev; starpu_opencl_get_device(devid, &dev); int ret = CL_SUCCESS; switch (param_name) { case CL_DEVICE_PLATFORM: { cl_platform_id p = &socl_platform; INFO_CASE_EX2(p); } case CL_DEVICE_IMAGE_SUPPORT: { cl_bool res = CL_FALSE; INFO_CASE_EX2(res); } default: ret = clGetDeviceInfo(dev, param_name, param_value_size, param_value, param_value_size_ret); } return ret; } starpu-1.4.9+dfsg/socl/src/cl_geteventinfo.c000066400000000000000000000026061507764646700210620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetEventInfo(cl_event event, cl_event_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (event == NULL) return CL_INVALID_EVENT; #define STAT_CASE(starpu,opencl) case starpu: \ status = opencl; \ break; switch (param_name) { INFO_CASE(CL_EVENT_COMMAND_QUEUE, event->cq); INFO_CASE(CL_EVENT_COMMAND_TYPE, event->command->typ); INFO_CASE(CL_EVENT_COMMAND_EXECUTION_STATUS, event->status); INFO_CASE(CL_EVENT_REFERENCE_COUNT, event->_entity.refs); default: return CL_INVALID_VALUE; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_geteventprofilinginfo.c000066400000000000000000000025631507764646700227760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetEventProfilingInfo(cl_event event, cl_profiling_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { switch (param_name) { INFO_CASE_VALUE(CL_PROFILING_COMMAND_QUEUED, cl_ulong, event->prof_queued); INFO_CASE_VALUE(CL_PROFILING_COMMAND_SUBMIT, cl_ulong, event->prof_submit); INFO_CASE_VALUE(CL_PROFILING_COMMAND_START, cl_ulong, event->prof_start); INFO_CASE_VALUE(CL_PROFILING_COMMAND_END, cl_ulong, event->prof_end); default: return CL_INVALID_VALUE; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getextensionfunctionaddress.c000066400000000000000000000027541507764646700242210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2012-2012 Vincent Danjean * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "socl.h" #include "init.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY void * CL_API_CALL soclGetExtensionFunctionAddress(const char * func_name) { if (func_name != NULL && strcmp(func_name, "clShutdown") == 0) { return (void*)soclShutdown; } return NULL; } CL_API_ENTRY void * CL_API_CALL soclGetExtensionFunctionAddressForPlatform(cl_platform_id p, const char * func_name) CL_API_SUFFIX__VERSION_1_2 { if (p != &socl_platform) return NULL; return soclGetExtensionFunctionAddress(func_name); } CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_0 { if(func_name != NULL && strcmp("clIcdGetPlatformIDsKHR", func_name) == 0) return (void *)soclIcdGetPlatformIDsKHR; return NULL; } starpu-1.4.9+dfsg/socl/src/cl_getimageinfo.c000066400000000000000000000020261507764646700210170ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetImageInfo(cl_mem UNUSED(image), cl_image_info UNUSED(param_name), size_t UNUSED(param_value_size), void * UNUSED(param_value), size_t * UNUSED(param_value_size_ret)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_getkernelinfo.c000066400000000000000000000026211507764646700212160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetKernelInfo(cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (kernel == NULL) return CL_INVALID_KERNEL; switch (param_name) { INFO_CASE_EX(CL_KERNEL_FUNCTION_NAME, kernel->kernel_name, strlen(kernel->kernel_name)+1); INFO_CASE(CL_KERNEL_NUM_ARGS, kernel->num_args); INFO_CASE(CL_KERNEL_REFERENCE_COUNT, kernel->_entity.refs); INFO_CASE(CL_KERNEL_PROGRAM, kernel->program); INFO_CASE(CL_KERNEL_CONTEXT, kernel->program->context); default: return CL_INVALID_VALUE; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getkernelworkgroupinfo.c000066400000000000000000000024721507764646700232020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { int range = starpu_worker_get_range_by_id(device->worker_id); cl_device_id dev; starpu_opencl_get_device(device->device_id, &dev); return clGetKernelWorkGroupInfo(kernel->cl_kernels[range], dev, param_name, param_value_size, param_value, param_value_size_ret); } starpu-1.4.9+dfsg/socl/src/cl_getmemobjectinfo.c000066400000000000000000000026311507764646700217040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetMemObjectInfo(cl_mem mem, cl_mem_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { static cl_mem_object_type mot = CL_MEM_OBJECT_BUFFER; switch (param_name) { INFO_CASE(CL_MEM_TYPE, mot); INFO_CASE(CL_MEM_FLAGS, mem->flags); INFO_CASE(CL_MEM_SIZE, mem->size); INFO_CASE(CL_MEM_HOST_PTR, mem->host_ptr); INFO_CASE(CL_MEM_MAP_COUNT, mem->map_count); INFO_CASE(CL_MEM_REFERENCE_COUNT, mem->_entity.refs); INFO_CASE(CL_MEM_CONTEXT, mem->context); default: return CL_INVALID_VALUE; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getplatformids.c000066400000000000000000000024731507764646700214130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" extern int _starpu_init_failed; /** * \brief Get StarPU platform ID */ CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetPlatformIDs(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms) { if (_starpu_init_failed) { if (num_platforms != NULL) *num_platforms = 0; return CL_SUCCESS; } if ((num_entries == 0 && platforms != NULL) || (num_platforms == NULL && platforms == NULL)) return CL_INVALID_VALUE; else { if (platforms != NULL) platforms[0] = &socl_platform; if (num_platforms != NULL) *num_platforms = 1; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getplatforminfo.c000066400000000000000000000032311507764646700215600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2012-2012 Vincent Danjean * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" /** * \brief Get information about StarPU platform * * \param[in] platform StarPU platform ID or NULL */ CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (platform != NULL && platform != &socl_platform) return CL_INVALID_PLATFORM; switch (param_name) { INFO_CASE_STRING(CL_PLATFORM_PROFILE, SOCL_PROFILE); INFO_CASE_STRING(CL_PLATFORM_VERSION, SOCL_VERSION); INFO_CASE_STRING(CL_PLATFORM_NAME, SOCL_PLATFORM_NAME); INFO_CASE_STRING(CL_PLATFORM_VENDOR, SOCL_VENDOR); INFO_CASE_STRING(CL_PLATFORM_EXTENSIONS, SOCL_PLATFORM_EXTENSIONS); INFO_CASE_STRING(CL_PLATFORM_ICD_SUFFIX_KHR, SOCL_PLATFORM_ICD_SUFFIX_KHR); default: return CL_INVALID_VALUE; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getprogrambuildinfo.c000066400000000000000000000025771507764646700224370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetProgramBuildInfo(cl_program program, cl_device_id UNUSED(device), cl_program_build_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (program == NULL) return CL_INVALID_PROGRAM; switch (param_name) { //TODO //INFO_CASE(CL_PROGRAM_BUILD_STATUS, program->build_status); INFO_CASE_EX(CL_PROGRAM_BUILD_OPTIONS, program->options, program->options_size); //TODO //INFO_CASE(CL_PROGRAM_BUILD_LOG, program->build_log); default: return CL_INVALID_VALUE; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getprograminfo.c000066400000000000000000000031121507764646700214010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (program == NULL) return CL_INVALID_PROGRAM; switch (param_name) { INFO_CASE(CL_PROGRAM_REFERENCE_COUNT, program->_entity.refs); INFO_CASE(CL_PROGRAM_CONTEXT, program->context); INFO_CASE(CL_PROGRAM_NUM_DEVICES, program->context->num_devices); INFO_CASE_EX(CL_PROGRAM_DEVICES, program->context->devices, sizeof(cl_device_id)*program->context->num_devices); //TODO /*INFO_CASE(CL_PROGRAM_SOURCE, program->source); INFO_CASE(CL_PROGRAM_BINARY_SIZE, program->binary_sizes); INFO_CASE(CL_PROGRAM_BINARIES, program->binaries);*/ default: return CL_INVALID_VALUE; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_getsamplerinfo.c000066400000000000000000000020521507764646700213770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "getinfo.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetSamplerInfo(cl_sampler UNUSED(sampler), cl_sampler_info UNUSED(param_name), size_t UNUSED(param_value_size), void * UNUSED(param_value), size_t * UNUSED(param_value_size_ret)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_getsupportedimageformats.c000066400000000000000000000021351507764646700235060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclGetSupportedImageFormats(cl_context UNUSED(context), cl_mem_flags UNUSED(flags), cl_mem_object_type UNUSED(image_type), cl_uint UNUSED(num_entries), cl_image_format * UNUSED(image_formats), cl_uint * UNUSED(num_image_formats)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_icdgetplatformidskhr.c000066400000000000000000000023161507764646700225740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2012-2012 Vincent Danjean * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" extern int _starpu_init_failed; CL_EXT_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclIcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms) { if ((num_entries == 0 && platforms != NULL) || (num_platforms == NULL && platforms == NULL)) return CL_INVALID_VALUE; else { if (platforms != NULL) platforms[0] = &socl_platform; if (num_platforms != NULL) *num_platforms = 1; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_releasecommandqueue.c000066400000000000000000000015331507764646700224070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclReleaseCommandQueue(cl_command_queue cq) { gc_entity_release(cq); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_releasecontext.c000066400000000000000000000016161507764646700214120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclReleaseContext(cl_context context) { if (context == NULL) return CL_INVALID_CONTEXT; gc_entity_release(context); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_releaseevent.c000066400000000000000000000016021507764646700210420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclReleaseEvent(cl_event event) { if (event == NULL) return CL_INVALID_EVENT; gc_entity_release(event); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_releasekernel.c000066400000000000000000000016101507764646700212000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclReleaseKernel(cl_kernel kernel) { if (kernel == NULL) return CL_INVALID_KERNEL; gc_entity_release(kernel); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_releasememobject.c000066400000000000000000000015201507764646700216650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclReleaseMemObject(cl_mem mem) { gc_entity_release(mem); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_releaseprogram.c000066400000000000000000000016161507764646700213750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclReleaseProgram(cl_program program) { if (program == NULL) return CL_INVALID_PROGRAM; gc_entity_release(program); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_releasesampler.c000066400000000000000000000015161507764646700213700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclReleaseSampler(cl_sampler UNUSED(sampler)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_retaincommandqueue.c000066400000000000000000000016201507764646700222460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclRetainCommandQueue(cl_command_queue cq) { if (cq == NULL) return CL_INVALID_COMMAND_QUEUE; gc_entity_retain(cq); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_retaincontext.c000066400000000000000000000016161507764646700212540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclRetainContext(cl_context context) { if (context == NULL) return CL_INVALID_CONTEXT; gc_entity_retain(context); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_retainevent.c000066400000000000000000000016001507764646700207020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclRetainEvent(cl_event event) { if (event == NULL) return CL_INVALID_EVENT; gc_entity_retain(event); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_retainkernel.c000066400000000000000000000016111507764646700210430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclRetainKernel(cl_kernel kernel) { if (kernel == NULL) return CL_INVALID_KERNEL; gc_entity_retain(kernel); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_retainmemobject.c000066400000000000000000000016011507764646700215270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclRetainMemObject(cl_mem mem) { if (mem == NULL) return CL_INVALID_MEM_OBJECT; gc_entity_retain(mem); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_retainprogram.c000066400000000000000000000016141507764646700212350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclRetainProgram(cl_program program) { if (program == NULL) return CL_INVALID_PROGRAM; gc_entity_retain(program); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_retainsampler.c000066400000000000000000000015151507764646700212310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclRetainSampler(cl_sampler UNUSED(sampler)) { return CL_INVALID_OPERATION; } starpu-1.4.9+dfsg/socl/src/cl_setcommandqueueproperty.c000066400000000000000000000034271507764646700233730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclSetCommandQueueProperty(cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties * old_properties) { if (command_queue == NULL) return CL_INVALID_COMMAND_QUEUE; if (old_properties != NULL) *old_properties = command_queue->properties; if (enable) { //Enable StarPU profiling if necessary if (properties & (~command_queue->properties) & CL_QUEUE_PROFILING_ENABLE) { if (profiling_queue_count == 0) starpu_profiling_status_set(STARPU_PROFILING_ENABLE); profiling_queue_count += 1; } //Set new properties command_queue->properties |= properties; } else { //Disable StarPU profiling if necessary if ((~properties) & command_queue->properties & CL_QUEUE_PROFILING_ENABLE) { profiling_queue_count -= 1; if (profiling_queue_count == 0) starpu_profiling_status_set(STARPU_PROFILING_DISABLE); } //Set new properties command_queue->properties &= ~properties; } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_setkernelarg.c000066400000000000000000000057761507764646700210660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void * arg_value) { if (kernel == NULL) return CL_INVALID_KERNEL; if (arg_index == (cl_uint)-1) { kernel->split_func = arg_value; return CL_SUCCESS; } else if (arg_index == (cl_uint)-2) { kernel->split_space = *(cl_uint*)arg_value; if (kernel->split_perfs != NULL) { free(kernel->split_perfs); } kernel->split_perfs = calloc(kernel->split_space, sizeof(cl_ulong)); return CL_SUCCESS; } else if (arg_index == (cl_uint)-3) { kernel->split_data = (void *)arg_value; return CL_SUCCESS; } if (arg_index >= kernel->num_args) return CL_INVALID_ARG_INDEX; //FIXME: we don't return CL_INVALID_ARG_VALUE if "arg_value is NULL for an argument that is not declared with __local qualifier or vice-versa" //FIXME: we don't return CL_INVALID_MEM_OBJECT //FIXME: we don't return CL_INVALID_ARG_SIZE /* Free previous argument (set to NULL) */ switch (kernel->arg_type[arg_index]) { case Null: break; case Buffer: kernel->arg_type[arg_index] = Null; free(kernel->arg_value[arg_index]); kernel->arg_value[arg_index] = NULL; break; case Immediate: free(kernel->arg_value[arg_index]); kernel->arg_type[arg_index] = Null; kernel->arg_value[arg_index] = NULL; break; } kernel->arg_type[arg_index] = Null; kernel->arg_size[arg_index] = arg_size; DEBUG_MSG("[Kernel %d] Set argument %u: argsize %ld argvalue %p\n", kernel->id, arg_index, (long)arg_size, arg_value); /* Argument is not Null */ if (arg_value != NULL) { cl_mem buf = NULL; /* Check if argument is a memory object */ if ((arg_size == sizeof(cl_mem)) && ((buf = mem_object_fetch(arg_value)) != NULL)) { DEBUG_MSG("Found buffer %d \n", buf->id); kernel->arg_type[arg_index] = Buffer; kernel->arg_value[arg_index] = malloc(sizeof(void*)); *(cl_mem*)kernel->arg_value[arg_index] = buf; //We do not use gc_entity_store here because kernels do not hold reference on buffers (see OpenCL spec) } else { /* Argument must be an immediate buffer */ DEBUG_MSG("Immediate data\n"); kernel->arg_type[arg_index] = Immediate; kernel->arg_value[arg_index] = malloc(arg_size); memcpy(kernel->arg_value[arg_index], arg_value, arg_size); } } return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_unloadcompiler.c000066400000000000000000000014561507764646700214040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclUnloadCompiler(void) { return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/cl_waitforevents.c000066400000000000000000000022471507764646700212660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" CL_API_SUFFIX__VERSION_1_0 CL_API_ENTRY cl_int CL_API_CALL soclWaitForEvents(cl_uint num_events, const cl_event * event_list) { unsigned int i; #ifdef DEBUG DEBUG_MSG("Waiting for events: "); for (i=0; iid, i == (num_events-1) ? "" : ", "); } DEBUG_MSG_NOHEAD("\n"); #endif for (i=0; iid); DEBUG_MSG("Stop waiting :)\n"); return CL_SUCCESS; } starpu-1.4.9+dfsg/socl/src/command.c000066400000000000000000000232511507764646700173240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include /* Forward extern declaration */ extern void soclEnqueueNDRangeKernel_task(void *descr[], void *args); cl_event command_event_get_ex(cl_command cmd) { cl_event ev = cmd->event; gc_entity_retain(ev); return ev; } static void command_release_callback(void *a) { cl_command cmd = (cl_command)a; // Call command specific release callback if (cmd->release_callback != NULL) cmd->release_callback(cmd); // Generic command destructor cl_uint i; for (i=0; inum_events; i++) { gc_entity_unstore(&cmd->events[i]); } cmd->num_events = 0; free(cmd->events); /* Remove from command queue */ cl_command_queue cq = cmd->event->cq; if (cq != NULL) { /* Lock command queue */ STARPU_PTHREAD_MUTEX_LOCK(&cq->mutex); /* Remove barrier if applicable */ if (cq->barrier == cmd) cq->barrier = NULL; /* Remove from the list of out-of-order commands */ cq->commands = command_list_remove(cq->commands, cmd); /* Unlock command queue */ STARPU_PTHREAD_MUTEX_UNLOCK(&cq->mutex); } // Events may survive to commands that created them cmd->event->command = NULL; gc_entity_unstore(&cmd->event); } void command_init_ex(cl_command cmd, cl_command_type typ, void (*cb)(void*)) { gc_entity_init(&cmd->_entity, command_release_callback, "command"); cmd->release_callback = cb; cmd->typ = typ; cmd->num_events = 0; cmd->events = NULL; cmd->event = event_create(); // we do not use gc_entity_store here because if nobody requires the event, it should be destroyed with the command cmd->event->command = cmd; cmd->task = NULL; cmd->submitted = 0; } void command_submit_ex(cl_command cmd) { #define SUBMIT(typ,name) case typ: \ name##_submit((name)cmd); \ break; assert(cmd->submitted == 0); switch(cmd->typ) { SUBMIT(CL_COMMAND_NDRANGE_KERNEL, command_ndrange_kernel); SUBMIT(CL_COMMAND_TASK, command_ndrange_kernel); SUBMIT(CL_COMMAND_READ_BUFFER, command_read_buffer); SUBMIT(CL_COMMAND_WRITE_BUFFER, command_write_buffer); SUBMIT(CL_COMMAND_COPY_BUFFER, command_copy_buffer); SUBMIT(CL_COMMAND_MAP_BUFFER, command_map_buffer); SUBMIT(CL_COMMAND_UNMAP_MEM_OBJECT, command_unmap_mem_object); SUBMIT(CL_COMMAND_MARKER, command_marker); SUBMIT(CL_COMMAND_BARRIER, command_barrier); default: ERROR_STOP("Trying to submit unknown command (type %x)", cmd->typ); } cmd->submitted = 1; #undef SUBMIT } cl_int command_submit_deep_ex(cl_command cmd) { if (cmd->submitted == 1) return CL_SUCCESS; /* We set this in order to avoid cyclic dependencies */ cmd->submitted = 1; unsigned int i; for (i=0; inum_events; i++) command_submit_deep(cmd->events[i]->command); cmd->submitted = 0; command_submit_ex(cmd); return CL_SUCCESS; } void command_graph_dump_ex(cl_command cmd) { unsigned int i; for (i=0; inum_events; i++) command_graph_dump_ex(cmd->events[i]->command); const char * typ_str = (cmd->typ == CL_COMMAND_NDRANGE_KERNEL ? "ndrange_kernel" : cmd->typ == CL_COMMAND_TASK ? "task" : cmd->typ == CL_COMMAND_READ_BUFFER ? "read_buffer" : cmd->typ == CL_COMMAND_WRITE_BUFFER ? "write_buffer" : cmd->typ == CL_COMMAND_COPY_BUFFER ? "copy_buffer" : cmd->typ == CL_COMMAND_MAP_BUFFER ? "map_buffer" : cmd->typ == CL_COMMAND_UNMAP_MEM_OBJECT ? "unmap_mem_object" : cmd->typ == CL_COMMAND_MARKER ? "marker" : cmd->typ == CL_COMMAND_BARRIER ? "barrier" : "unknown"); printf("CMD %p TYPE %s DEPS", cmd, typ_str); for (i=0; inum_events; i++) printf(" %p", cmd->events[i]->command); printf("\n"); } #define nullOrDup(name,size) cmd->name = memdup_safe(name,size) #define nullOrFree(name) if (cmd->name != NULL) free((void*)cmd->name) #define dup(name) cmd->name = name void command_ndrange_kernel_release(void * arg) { command_ndrange_kernel cmd = (command_ndrange_kernel)arg; gc_entity_unstore(&cmd->kernel); nullOrFree(global_work_offset); nullOrFree(global_work_size); nullOrFree(local_work_size); free(cmd->arg_sizes); free(cmd->arg_types); unsigned int i; for (i=0; inum_args; i++) { free(cmd->args[i]); cmd->args[i] = NULL; } free(cmd->args); for (i=0; inum_buffers; i++) gc_entity_unstore(&cmd->buffers[i]); free(cmd->buffers); } command_ndrange_kernel command_ndrange_kernel_create(cl_kernel kernel, cl_uint work_dim, const size_t * global_work_offset, const size_t * global_work_size, const size_t * local_work_size) { command_ndrange_kernel cmd = calloc(1, sizeof(struct command_ndrange_kernel_t)); command_init(cmd, CL_COMMAND_NDRANGE_KERNEL, command_ndrange_kernel_release); gc_entity_store(&cmd->kernel, kernel); dup(work_dim); nullOrDup(global_work_offset, work_dim*sizeof(size_t)); nullOrDup(global_work_size, work_dim*sizeof(size_t)); nullOrDup(local_work_size, work_dim*sizeof(size_t)); starpu_codelet_init(&cmd->codelet); cmd->codelet.where = STARPU_OPENCL; cmd->codelet.energy_model = NULL; cmd->codelet.opencl_funcs[0] = &soclEnqueueNDRangeKernel_task; /* Kernel is mutable, so we duplicate its parameters... */ cmd->num_args = kernel->num_args; cmd->arg_sizes = memdup(kernel->arg_size, sizeof(size_t) * kernel->num_args); cmd->arg_types = memdup(kernel->arg_type, sizeof(enum kernel_arg_type) * kernel->num_args); cmd->args = memdup_deep_varsize_safe(kernel->arg_value, kernel->num_args, kernel->arg_size); return cmd; } command_ndrange_kernel command_task_create (cl_kernel kernel) { static cl_uint task_work_dim = 3; static const size_t task_global_work_offset[3] = {0,0,0}; static const size_t task_global_work_size[3] = {1,1,1}; static const size_t * task_local_work_size = NULL; command_ndrange_kernel cmd = command_ndrange_kernel_create(kernel, task_work_dim, task_global_work_offset, task_global_work_size, task_local_work_size); /* This is the only difference with command_ndrange_kernel_create */ cmd->_command.typ = CL_COMMAND_TASK; return cmd; } command_barrier command_barrier_create () { command_barrier cmd = malloc(sizeof(struct command_barrier_t)); command_init(cmd, CL_COMMAND_BARRIER, NULL); return cmd; } command_marker command_marker_create () { command_marker cmd = malloc(sizeof(struct command_marker_t)); command_init(cmd, CL_COMMAND_MARKER, NULL); return cmd; } void command_map_buffer_release(void * UNUSED(arg)) { /* We DO NOT unstore (release) the buffer as unmap will do it gc_entity_unstore(&cmd->buffer); */ } command_map_buffer command_map_buffer_create(cl_mem buffer, cl_map_flags map_flags, size_t offset, size_t cb) { command_map_buffer cmd = malloc(sizeof(struct command_map_buffer_t)); command_init(cmd, CL_COMMAND_MAP_BUFFER, command_map_buffer_release); gc_entity_store(&cmd->buffer, buffer); dup(map_flags); dup(offset); dup(cb); return cmd; } void command_unmap_mem_object_release(void * arg) { command_unmap_mem_object cmd = (command_unmap_mem_object)arg; /* We release the buffer twice because map buffer command did not */ gc_entity_release(cmd->buffer); gc_entity_unstore(&cmd->buffer); } command_unmap_mem_object command_unmap_mem_object_create(cl_mem buffer, void * ptr) { command_unmap_mem_object cmd = malloc(sizeof(struct command_unmap_mem_object_t)); command_init(cmd, CL_COMMAND_UNMAP_MEM_OBJECT, command_unmap_mem_object_release); gc_entity_store(&cmd->buffer, buffer); dup(ptr); return cmd; } void command_read_buffer_release(void *arg) { command_read_buffer cmd = (command_read_buffer)arg; gc_entity_unstore(&cmd->buffer); } command_read_buffer command_read_buffer_create(cl_mem buffer, size_t offset, size_t cb, void * ptr) { command_read_buffer cmd = malloc(sizeof(struct command_read_buffer_t)); command_init(cmd, CL_COMMAND_READ_BUFFER, command_read_buffer_release); gc_entity_store(&cmd->buffer, buffer); dup(offset); dup(cb); dup(ptr); return cmd; } void command_write_buffer_release(void *arg) { command_write_buffer cmd = (command_write_buffer)arg; gc_entity_unstore(&cmd->buffer); } command_write_buffer command_write_buffer_create(cl_mem buffer, size_t offset, size_t cb, const void * ptr) { command_write_buffer cmd = malloc(sizeof(struct command_write_buffer_t)); command_init(cmd, CL_COMMAND_WRITE_BUFFER, command_write_buffer_release); gc_entity_store(&cmd->buffer, buffer); dup(offset); dup(cb); dup(ptr); return cmd; } void command_copy_buffer_release(void *arg) { command_copy_buffer cmd = (command_copy_buffer)arg; gc_entity_unstore(&cmd->src_buffer); gc_entity_unstore(&cmd->dst_buffer); } command_copy_buffer command_copy_buffer_create(cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb) { command_copy_buffer cmd = malloc(sizeof(struct command_copy_buffer_t)); command_init(cmd, CL_COMMAND_COPY_BUFFER, command_copy_buffer_release); gc_entity_store(&cmd->src_buffer, src_buffer); gc_entity_store(&cmd->dst_buffer, dst_buffer); dup(src_offset); dup(dst_offset); dup(cb); return cmd; } #undef nullOrDup #undef nodeNullOrDup #undef dup #undef nodeDup #undef memdup starpu-1.4.9+dfsg/socl/src/command.h000066400000000000000000000133101507764646700173240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #ifndef SOCL_COMMANDS_H #define SOCL_COMMANDS_H typedef struct cl_command_t * cl_command; #define gc_entity_store_cmd(dest,cmd) gc_entity_store(dest, &cmd->_command) #define gc_entity_release_cmd(cmd) gc_entity_release(&cmd->_command) /** * Initialize a command structure * * Command constructors for each kind of command use this method * Implicit and explicit dependencies must be passed as parameters */ void command_init_ex(cl_command cmd, cl_command_type typ, void (*cb)(void*)); #define command_init(cmd,typ,cb) \ command_init_ex((cl_command)cmd,typ,cb) void command_release(cl_command cmd); /** Submit a command for execution */ void command_submit_ex(cl_command cmd); #define command_submit(cmd) \ command_submit_ex(&(cmd)->_command) /** Submit a command and its dependencies */ cl_int command_submit_deep_ex(cl_command cmd); #define command_submit_deep(cmd) (command_submit_deep_ex((cl_command)cmd)) void command_graph_dump_ex(cl_command cmd); #define command_graph_dump(cmd) (command_graph_dump_ex((cl_command)cmd)) /************************** * OpenCL Commands **************************/ struct cl_command_t { CL_ENTITY; cl_command_type typ; /* Command type */ cl_uint num_events; /* Number of dependencies */ cl_event * events; /* Dependencies */ cl_event event; /* Event for this command */ starpu_task task; /* Associated StarPU task, if any */ char submitted; /* True if the command has been submitted to StarPU */ void (*release_callback)(void*); /* Command specific destructor */ }; #define command_type_get(cmd) (((cl_command)cmd)->typ) cl_event command_event_get_ex(cl_command cmd); #define command_event_get(cmd) command_event_get_ex(&cmd->_command) #define command_num_events_get_ex(cmd) (cmd->num_events) #define command_num_events_get(cmd) ((cmd)->_command.num_events) #define command_events_get_ex(cmd) ((cmd)->events) #define command_events_get(cmd) ((cmd)->_command.events) #define command_task_get(cmd) ((cmd)->_command.task) #define command_cq_get(cmd) ((cmd)->_command.cq) #define CL_COMMAND struct cl_command_t _command; typedef struct command_ndrange_kernel_t { CL_COMMAND cl_kernel kernel; struct starpu_codelet codelet; cl_uint work_dim; const size_t * global_work_offset; const size_t * global_work_size; const size_t * local_work_size; cl_uint num_args; size_t * arg_sizes; enum kernel_arg_type * arg_types; void ** args; cl_uint num_buffers; cl_mem * buffers; } * command_ndrange_kernel; typedef struct command_read_buffer_t { CL_COMMAND cl_mem buffer; size_t offset; size_t cb; void * ptr; } * command_read_buffer; typedef struct command_write_buffer_t { CL_COMMAND cl_mem buffer; size_t offset; size_t cb; const void * ptr; } * command_write_buffer; typedef struct command_copy_buffer_t { CL_COMMAND cl_mem src_buffer; cl_mem dst_buffer; size_t src_offset; size_t dst_offset; size_t cb; } * command_copy_buffer; typedef struct command_map_buffer_t { CL_COMMAND cl_mem buffer; cl_map_flags map_flags; size_t offset; size_t cb; } * command_map_buffer; typedef struct command_unmap_mem_object_t { CL_COMMAND cl_mem buffer; void * ptr; } * command_unmap_mem_object; typedef struct command_marker_t { CL_COMMAND } * command_marker; typedef struct command_barrier_t { CL_COMMAND } * command_barrier; /************************* * Constructor functions *************************/ command_ndrange_kernel command_ndrange_kernel_create (cl_kernel kernel, cl_uint work_dim, const size_t * global_work_offset, const size_t * global_work_size, const size_t * local_work_size); command_ndrange_kernel command_task_create (cl_kernel kernel); command_barrier command_barrier_create (); command_marker command_marker_create (); command_map_buffer command_map_buffer_create(cl_mem buffer, cl_map_flags map_flags, size_t offset, size_t cb); command_unmap_mem_object command_unmap_mem_object_create(cl_mem buffer, void * ptr); command_read_buffer command_read_buffer_create(cl_mem buffer, size_t offset, size_t cb, void * ptr); command_write_buffer command_write_buffer_create(cl_mem buffer, size_t offset, size_t cb, const void * ptr); command_copy_buffer command_copy_buffer_create(cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb); /************************* * Submit functions *************************/ cl_int command_ndrange_kernel_submit(command_ndrange_kernel cmd); cl_int command_read_buffer_submit(command_read_buffer cmd); cl_int command_write_buffer_submit(command_write_buffer cmd); cl_int command_copy_buffer_submit(command_copy_buffer cmd); cl_int command_map_buffer_submit(command_map_buffer cmd); cl_int command_unmap_mem_object_submit(command_unmap_mem_object cmd); cl_int command_marker_submit(command_marker cmd); cl_int command_barrier_submit(command_barrier cmd); #endif /* SOCL_COMMANDS_H */ starpu-1.4.9+dfsg/socl/src/command_list.c000066400000000000000000000025641507764646700203630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" command_list command_list_cons(cl_command cmd, command_list ls) { command_list e = malloc(sizeof(struct command_list_t)); e->cmd = cmd; e->next = ls; e->prev = NULL; if (ls != NULL) ls->prev = e; return e; } /** * Remove every occurrence of cmd in the list l */ command_list command_list_remove(command_list l, cl_command cmd) { command_list e = l; while (e != NULL) { if (e->cmd == cmd) { if (e->prev != NULL) e->prev->next = e->next; if (e->next != NULL) e->next->prev = e->prev; command_list old = e; if (l == old) { // list head has been removed l = old->next; } e = old->next; free(old); } else { e = e->next; } } return l; } starpu-1.4.9+dfsg/socl/src/command_list.h000066400000000000000000000017111507764646700203610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" typedef struct command_list_t * command_list; struct command_list_t { cl_command cmd; command_list next; command_list prev; }; command_list command_list_cons(cl_command cmd, command_list ls); command_list command_list_remove(command_list l, cl_command cmd); starpu-1.4.9+dfsg/socl/src/command_queue.c000066400000000000000000000060111507764646700205230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2012-2012 Vincent Danjean * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "task.h" #include "gc.h" /** * WARNING: command queues do NOT hold references on events. Only events hold references * on command queues. This way, event release will automatically remove the event from * its command queue. */ void command_queue_enqueue_ex(cl_command_queue cq, cl_command cmd, cl_uint num_events, const cl_event * events) { cl_event ev = command_event_get_ex(cmd); ev->prof_queued = _socl_nanotime(); gc_entity_release(ev); /* Check if the command is a barrier */ int is_barrier = (cmd->typ == CL_COMMAND_BARRIER || !(cq->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); /* Add references to the command queue */ gc_entity_store(&cmd->event->cq, cq); /* Lock command queue */ STARPU_PTHREAD_MUTEX_LOCK(&cq->mutex); /*** Number of dependencies ***/ int ndeps = num_events; /* Add dependency to last barrier if applicable */ if (cq->barrier != NULL) ndeps++; /* Add dependencies to out-of-order events (if any) */ if (is_barrier) { command_list cl = cq->commands; while (cl != NULL) { ndeps++; cl = cl->next; } } /*** Dependencies ***/ cl_event * deps = malloc(ndeps * sizeof(cl_event)); int n = 0; /* Add dependency to last barrier if applicable */ if (cq->barrier != NULL) gc_entity_store(&deps[n++], cq->barrier->event); /* Add dependencies to out-of-order events (if any) */ if (is_barrier) { command_list cl = cq->commands; while (cl != NULL) { gc_entity_store(&deps[n++], cl->cmd->event); cl = cl->next; } } /* Add explicit dependencies */ unsigned i; for (i=0; inum_events = ndeps; cmd->events = deps; /* Insert command in the queue */ if (is_barrier) { /* Remove out-of-order commands */ cq->commands = NULL; /* Register the command as the last barrier */ cq->barrier = cmd; } else { /* Add command to the list of out-of-order commands */ cq->commands = command_list_cons(cmd, cq->commands); } /* Submit command * We need to do it before unlocking because we don't want events to get * released while we use them to set dependencies */ command_submit_ex(cmd); /* Unlock command queue */ STARPU_PTHREAD_MUTEX_UNLOCK(&cq->mutex); gc_entity_release(cmd); } starpu-1.4.9+dfsg/socl/src/command_queue.h000066400000000000000000000022311507764646700205300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SOCL_COMMAND_QUEUE_H #define SOCL_COMMAND_QUEUE_H void command_queue_enqueue_ex(cl_command_queue cq, /* Command queue */ cl_command cmd, /* Command to enqueue */ cl_uint num_events, /* Number of explicit dependencies */ const cl_event * events /* Explicit dependencies */ ); #define command_queue_enqueue(cq, cmd, num_events, events)\ command_queue_enqueue_ex(cq, (cl_command)cmd, num_events, events) #endif /* SOCL_COMMAND_QUEUE_H */ starpu-1.4.9+dfsg/socl/src/debug.c000066400000000000000000000051761507764646700170020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" void ERROR_CL(char *s, cl_int err) { #define ERR_CASE(a) case a: ERROR_MSG("[OpenCL] %s CL error: %s\n", s, #a); break; switch(err) { case CL_SUCCESS: DEBUG_MSG("[OpenCL] %s SUCCESS.\n", s); break; ERR_CASE(CL_DEVICE_NOT_FOUND); ERR_CASE(CL_DEVICE_NOT_AVAILABLE); ERR_CASE(CL_COMPILER_NOT_AVAILABLE); ERR_CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE); ERR_CASE(CL_OUT_OF_RESOURCES); ERR_CASE(CL_OUT_OF_HOST_MEMORY); ERR_CASE(CL_PROFILING_INFO_NOT_AVAILABLE); ERR_CASE(CL_MEM_COPY_OVERLAP); ERR_CASE(CL_IMAGE_FORMAT_MISMATCH); ERR_CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED); ERR_CASE(CL_BUILD_PROGRAM_FAILURE); ERR_CASE(CL_MAP_FAILURE); ERR_CASE(CL_INVALID_VALUE); ERR_CASE(CL_INVALID_DEVICE_TYPE); ERR_CASE(CL_INVALID_PLATFORM); ERR_CASE(CL_INVALID_DEVICE); ERR_CASE(CL_INVALID_CONTEXT); ERR_CASE(CL_INVALID_QUEUE_PROPERTIES); ERR_CASE(CL_INVALID_COMMAND_QUEUE); ERR_CASE(CL_INVALID_HOST_PTR); ERR_CASE(CL_INVALID_MEM_OBJECT); ERR_CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); ERR_CASE(CL_INVALID_IMAGE_SIZE); ERR_CASE(CL_INVALID_SAMPLER); ERR_CASE(CL_INVALID_BINARY); ERR_CASE(CL_INVALID_BUILD_OPTIONS); ERR_CASE(CL_INVALID_PROGRAM); ERR_CASE(CL_INVALID_PROGRAM_EXECUTABLE); ERR_CASE(CL_INVALID_KERNEL_NAME); ERR_CASE(CL_INVALID_KERNEL_DEFINITION); ERR_CASE(CL_INVALID_KERNEL); ERR_CASE(CL_INVALID_ARG_INDEX); ERR_CASE(CL_INVALID_ARG_VALUE); ERR_CASE(CL_INVALID_ARG_SIZE); ERR_CASE(CL_INVALID_KERNEL_ARGS); ERR_CASE(CL_INVALID_WORK_DIMENSION); ERR_CASE(CL_INVALID_WORK_GROUP_SIZE); ERR_CASE(CL_INVALID_WORK_ITEM_SIZE); ERR_CASE(CL_INVALID_GLOBAL_OFFSET); ERR_CASE(CL_INVALID_EVENT_WAIT_LIST); ERR_CASE(CL_INVALID_EVENT); ERR_CASE(CL_INVALID_OPERATION); ERR_CASE(CL_INVALID_GL_OBJECT); ERR_CASE(CL_INVALID_BUFFER_SIZE); ERR_CASE(CL_INVALID_MIP_LEVEL); ERR_CASE(CL_INVALID_GLOBAL_WORK_SIZE); default: ERROR_MSG("%s CL error: Error message not supported by ERROR_CL function (%d).\n", s, err); } } starpu-1.4.9+dfsg/socl/src/debug.h000066400000000000000000000034711507764646700170030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SOCL_DEBUG_H #define SOCL_DEBUG_H #include <../src/common/config.h> #ifdef STARPU_VERBOSE #define DEBUG #include #define DEBUG_MSG(...) do { if (!getenv("STARPU_SILENT")) { fprintf(stderr, "[SOCL] [%s] ", __starpu_func__); fprintf(stderr, __VA_ARGS__);}} while (0) #define DEBUG_MSG_NOHEAD(...) do { if (!getenv("STARPU_SILENT")) { fprintf(stderr, __VA_ARGS__);}} while (0); #define DEBUG_ERROR(...) do { if (!getenv("STARPU_SILENT")) { fprintf(stderr, "[SOCL] ERROR: "__VA_ARGS__); } exit(1); } while (0) #else #define DEBUG_MSG(...) while(0) #define DEBUG_MSG_NOHEAD(...) while(0) #define DEBUG_ERROR(...) while(0) #endif #define ERROR_MSG(...) do { fprintf(stderr, "[SOCL] [%s] ERROR: ", __starpu_func__); fprintf(stderr, __VA_ARGS__); } while (0) #define ERROR_MSG_NOHEAD(...) fprintf(stderr, __VA_ARGS__) #define ERROR_STOP(...) do { ERROR_MSG(__VA_ARGS__); exit(1); } while(0) void ERROR_CL(char *s, cl_int err); #ifdef STARPU_VERBOSE #define DEBUG_CL(args...) ERROR_CL(args) #else #define DEBUG_CL(...) while(0) #endif #ifdef DEBUG #define DEBUG_PARAM(p) p #else #define DEBUG_PARAM(p) UNUSED(p) #endif #endif /* SOCL_DEBUG_H */ starpu-1.4.9+dfsg/socl/src/event.c000066400000000000000000000032631507764646700170300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "event.h" #include "gc.h" static void release_callback_event(void * e); int event_unique_id() { static int id = 1; return STARPU_ATOMIC_ADD(&id,1) - 1; } /** * Create a new event * * Events have one-to-one relation with tag. Tag number is event ID */ cl_event event_create(void) { cl_event ev; ev = gc_entity_alloc(sizeof(struct _cl_event), release_callback_event, "event"); ev->id = event_unique_id(); ev->status = CL_SUBMITTED; ev->command = NULL; ev->prof_queued = 0L; ev->prof_submit = 0L; ev->prof_start = 0L; ev->prof_end = 0L; ev->cq = NULL; return ev; } void event_complete(cl_event ev) { ev->status = CL_COMPLETE; ev->prof_end = _socl_nanotime(); /* Trigger the tag associated to the command event */ DEBUG_MSG("Trigger event %d\n", ev->id); starpu_tag_notify_from_apps(ev->id); } static void release_callback_event(void * e) { cl_event event = (cl_event)e; gc_entity_unstore(&event->cq); /* Destruct object */ //FIXME //starpu_tag_remove(event->id); } starpu-1.4.9+dfsg/socl/src/event.h000066400000000000000000000017511507764646700170350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SOCL_EVENT_H #define SOCL_EVENT_H #include "socl.h" /** * Create a new event * * Events have one-to-one relation with tag. Tag number is event ID */ cl_event event_create(void); /** * Generate a unique tag id */ int event_unique_id(); void event_complete(cl_event ev); #endif /* SOCL_EVENT_H */ starpu-1.4.9+dfsg/socl/src/gc.c000066400000000000000000000107451507764646700163030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2012-2012 Vincent Danjean * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "gc.h" #include "event.h" #include "socl.h" #include /** * Garbage collection thread */ /* List of entities to be released */ static volatile entity gc_list = NULL; static volatile entity entities = NULL; /* Mutex and cond for release */ static starpu_pthread_mutex_t gc_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t gc_cond = STARPU_PTHREAD_COND_INITIALIZER; /* Set to 1 to stop release thread execution */ static volatile int gc_stop_required = 0; #define GC_LOCK STARPU_PTHREAD_MUTEX_LOCK(&gc_mutex) #define GC_UNLOCK { STARPU_PTHREAD_COND_SIGNAL(&gc_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&gc_mutex);} #define GC_UNLOCK_NO_SIGNAL STARPU_PTHREAD_MUTEX_UNLOCK(&gc_mutex) /* Thread routine */ static void * gc_thread_routine(void *UNUSED(arg)) { GC_LOCK; do { /* Make a copy of the gc_list to allow callbacks to add things into it */ entity rs = gc_list; gc_list = NULL; GC_UNLOCK_NO_SIGNAL; entity r = rs; while (r != NULL) { /* Call entity release callback */ if (r->release_callback != NULL) { r->release_callback(r); } /* Release entity */ entity next = r->next; free(r); r = next; } GC_LOCK; /* Check if new entities have been added */ if (gc_list != NULL) continue; /* Stop if required */ if (gc_stop_required) { GC_UNLOCK_NO_SIGNAL; break; } /* Otherwise we sleep */ STARPU_PTHREAD_COND_WAIT(&gc_cond, &gc_mutex); } while (1); starpu_pthread_exit(NULL); } static starpu_pthread_t gc_thread; /* Start garbage collection */ void gc_start(void) { STARPU_PTHREAD_CREATE(&gc_thread, NULL, gc_thread_routine, NULL); } /* Stop garbage collection */ void gc_stop(void) { GC_LOCK; gc_stop_required = 1; GC_UNLOCK; STARPU_PTHREAD_JOIN(gc_thread, NULL); } int gc_entity_release_ex(entity e, const char * DEBUG_PARAM(caller)) { DEBUG_MSG("[%s] Decrementing refcount of %s %p to ", caller, e->name, (void *)e); /* Decrement reference count */ int refs = STARPU_ATOMIC_ADD(&e->refs, -1); DEBUG_MSG_NOHEAD("%d\n", refs); assert(refs >= 0); if (refs != 0) return 0; DEBUG_MSG("[%s] Releasing %s %p\n", caller, e->name, (void *)e); GC_LOCK; /* Remove entity from the entities list */ if (e->prev != NULL) e->prev->next = e->next; if (e->next != NULL) e->next->prev = e->prev; if (entities == e) entities = e->next; /* Put entity in the release queue */ e->next = gc_list; gc_list = e; GC_UNLOCK; return 1; } /** * Initialize entity */ void gc_entity_init(void *arg, void (*release_callback)(void*), char * name) { DEBUG_MSG("Initializing entity %p (%s)\n", arg, name); struct entity * e = (entity)arg; e->dispatch = &socl_master_dispatch; e->refs = 1; e->release_callback = release_callback; e->prev = NULL; e->name = name; GC_LOCK; e->next = entities; if (entities != NULL) entities->prev = e; entities = e; GC_UNLOCK_NO_SIGNAL; } /** * Allocate and initialize entity */ void * gc_entity_alloc(unsigned int size, void (*release_callback)(void*), char * name) { void * e = malloc(size); gc_entity_init(e, release_callback, name); return e; } /** Retain entity */ void gc_entity_retain_ex(void *arg, const char * DEBUG_PARAM(caller)) { struct entity * e = (entity)arg; #ifdef DEBUG int refs = #else (void) #endif STARPU_ATOMIC_ADD(&e->refs, 1); DEBUG_MSG("[%s] Incrementing refcount of %s %p to %d\n", caller, e->name, e, refs); } int gc_active_entity_count(void) { int i = 0; entity e = entities; while (e != NULL) { i++; e = e->next; } return i; } void gc_print_remaining_entities(void) { DEBUG_MSG("Remaining entities:\n"); GC_LOCK; entity e = entities; while (e != NULL) { DEBUG_MSG(" - %s %p\n", e->name, (void *)e); e = e->next; } GC_UNLOCK; } #undef GC_LOCK #undef GC_UNLOCK #undef GC_UNLOCK_NO_SIGNAL starpu-1.4.9+dfsg/socl/src/gc.h000066400000000000000000000030671507764646700163070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SOCL_GC_H #define SOCL_GC_H #include "socl.h" void gc_start(void); void gc_stop(void); void gc_entity_init(void *arg, void (*release_callback)(void*), char*name); void * gc_entity_alloc(unsigned int size, void (*release_callback)(void*), char * name); void gc_entity_retain_ex(void *arg, const char *); #define gc_entity_retain(arg) gc_entity_retain_ex(arg, __starpu_func__) /** Decrement reference counter and release entity if applicable */ int gc_entity_release_ex(entity e, const char*); int gc_active_entity_count(void); void gc_print_remaining_entities(void); #define gc_entity_release(a) gc_entity_release_ex(&(a)->_entity, __starpu_func__) #define gc_entity_store(dest,e) \ do {\ void * _e = e;\ gc_entity_retain(_e); \ *dest = _e;\ } while(0); #define gc_entity_unstore(dest) \ do {\ gc_entity_release(*dest); \ *dest = NULL;\ } while(0); #endif starpu-1.4.9+dfsg/socl/src/getinfo.h000066400000000000000000000036011507764646700173430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SOCL_GETINFO_H #define SOCL_GETINFO_H #define INFO_CASE_EX2(var) if (param_value != NULL) { \ if (param_value_size < sizeof(var)) \ return CL_INVALID_VALUE; \ memcpy(param_value, &var, sizeof(var)); \ } \ if (param_value_size_ret != NULL) \ *param_value_size_ret = sizeof(var); \ break; #define INFO_CASE(param, var) case param: \ INFO_CASE_EX2(var) #define INFO_CASE_STRING_EX2(var) if (param_value != NULL) { \ if (param_value_size < strlen(var)+1) \ return CL_INVALID_VALUE; \ strcpy(param_value, var); \ } \ if (param_value_size_ret != NULL) \ *param_value_size_ret = strlen(var)+1; \ break; #define INFO_CASE_STRING(param, var) case param: \ INFO_CASE_STRING_EX2(var) #define INFO_CASE_VALUE(param, type, value) case param: {\ type tmp = (value);\ INFO_CASE_EX2(tmp);\ } //warning: var is a reference #define INFO_CASE_EX(param, var, size) case param: \ if (param_value != NULL) { \ if (param_value_size < size) \ return CL_INVALID_VALUE; \ memcpy(param_value, var, size); \ } \ if (param_value_size_ret != NULL) \ *param_value_size_ret = size; \ break; #endif /* SOCL_GETINFO_H */ starpu-1.4.9+dfsg/socl/src/init.c000066400000000000000000000070331507764646700166510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2012-2012 Vincent Danjean * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../src/common/utils.h" #include "socl.h" #include "gc.h" #include "mem_objects.h" int _starpu_init_failed; static enum initialization _socl_init = UNINITIALIZED; static starpu_pthread_mutex_t _socl_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t _socl_cond = STARPU_PTHREAD_COND_INITIALIZER; static pthread_t _socl_thread_init; static struct starpu_conf conf; int socl_init_starpu(void) { STARPU_PTHREAD_MUTEX_LOCK(&_socl_mutex); if (_socl_init == INITIALIZED) { STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); return 0; } if (_socl_init == CHANGING) { /* Avoid recursion when starpu_init calls hwloc initialization which uses its opencl plugin */ if (pthread_equal(_socl_thread_init, pthread_self())) { STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); return -1; } /* Somebody else is initializing already, wait for him */ while (_socl_init != INITIALIZED) STARPU_PTHREAD_COND_WAIT(&_socl_cond, &_socl_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); return 0; } _socl_init = CHANGING; _socl_thread_init = pthread_self(); STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); starpu_conf_init(&conf); conf.precedence_over_environment_variables = 1; conf.ncuda = 0; conf.ncpus = 0; _starpu_init_failed = starpu_init(&conf); if (_starpu_init_failed != 0) { DEBUG_MSG("Error when calling starpu_init: %d\n", _starpu_init_failed); } else { if (starpu_opencl_worker_get_count() == 0) { DEBUG_MSG("StarPU didn't find any OpenCL device. Try disabling CUDA support in StarPU (export STARPU_NCUDA=0).\n"); _starpu_init_failed = -ENODEV; } } /* Disable dataflow implicit dependencies */ starpu_data_set_default_sequential_consistency_flag(0); STARPU_PTHREAD_MUTEX_LOCK(&_socl_mutex); _socl_init = INITIALIZED; STARPU_PTHREAD_COND_BROADCAST(&_socl_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); return 0; } /** * Initialize SOCL */ __attribute__((constructor)) static void socl_init() { mem_object_init(); gc_start(); } void soclShutdown() { static int shutdown = 0; if (!shutdown) { shutdown = 1; STARPU_PTHREAD_MUTEX_LOCK(&_socl_mutex); if(_socl_init) starpu_task_wait_for_all(); gc_stop(); if(_socl_init) starpu_task_wait_for_all(); int active_entities = gc_active_entity_count(); if (active_entities != 0) { DEBUG_MSG("Unreleased entities: %d\n", active_entities); gc_print_remaining_entities(); } if(_socl_init && _starpu_init_failed != -ENODEV) starpu_shutdown(); STARPU_PTHREAD_MUTEX_UNLOCK(&_socl_mutex); if (socl_devices != NULL) { free(socl_devices); socl_devices = NULL; } } } /** * Shutdown SOCL */ __attribute__((destructor)) static void socl_shutdown() { char * skip_str = getenv("SOCL_SKIP_DESTRUCTOR"); int skip = (skip_str != NULL ? atoi(skip_str) : 0); if (!skip) soclShutdown(); } starpu-1.4.9+dfsg/socl/src/init.h000066400000000000000000000017031507764646700166540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "gc.h" #include "mem_objects.h" #ifndef SOCL_INIT_H #define SOCL_INIT_H extern int _starpu_init_failed; extern volatile int _starpu_init; /** * Initialize StarPU */ int socl_init_starpu(void); void soclShutdown(void); #endif /* SOCL_INIT_H */ starpu-1.4.9+dfsg/socl/src/mem_objects.c000066400000000000000000000042131507764646700201720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #define mem_object_hash_key 257 static cl_mem p_mem_objects[mem_object_hash_key] = {NULL}; static starpu_pthread_spinlock_t p_mem_objects_spinlock[mem_object_hash_key]; #define LOCK(i) starpu_pthread_spin_lock(&p_mem_objects_spinlock[i]); #define UNLOCK(i) starpu_pthread_spin_unlock(&p_mem_objects_spinlock[i]); void mem_object_init(void) { int i; for (i=0; i> 4; uintptr_t t3 = t2 % mem_object_hash_key; return (int)t3; } void mem_object_store(cl_mem m) { int hash = mem_object_hash(m); LOCK(hash); m->prev = NULL; m->next = p_mem_objects[hash]; if (p_mem_objects[hash] != NULL) p_mem_objects[hash]->prev = m; p_mem_objects[hash] = m; UNLOCK(hash); } void mem_object_release(cl_mem m) { int hash = mem_object_hash(m); LOCK(hash); if (m->prev != NULL) m->prev->next = m->next; if (m->next != NULL) m->next->prev = m->prev; if (p_mem_objects[hash] == m) { p_mem_objects[hash] = m->next; } UNLOCK(hash) } cl_mem mem_object_fetch(const void * addr) { int hash = mem_object_hash(*(cl_mem*)addr); LOCK(hash); cl_mem buf; for (buf = p_mem_objects[hash]; buf != NULL; buf = buf->next) { if (*(cl_mem*)addr == buf) { UNLOCK(hash); return buf; } } UNLOCK(hash); return NULL; } #undef LOCK #undef UNLOCK #undef mem_object_hash_key starpu-1.4.9+dfsg/socl/src/mem_objects.h000066400000000000000000000016231507764646700202010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SOCL_MEM_OBJECTS_H #define SOCL_MEM_OBJECTS_H void mem_object_init(void); void mem_object_store(cl_mem m); void mem_object_release(cl_mem m); cl_mem mem_object_fetch(const void * addr); #endif /* SOCL_MEM_OBJECTS_H */ starpu-1.4.9+dfsg/socl/src/ocl_icd.h000066400000000000000000001016541507764646700173130ustar00rootroot00000000000000/** Copyright (c) 2012, Brice Videau Copyright (c) 2012, Vincent Danjean All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Do not edit this file. It is automatically generated. */ #include "CL/cl.h" #include "CL/cl_gl.h" #include "CL/cl_ext.h" #define OCL_ICD_API_VERSION 1 #define OCL_ICD_IDENTIFIED_FUNCTIONS 102 struct _cl_icd_dispatch { CL_API_ENTRY cl_int (CL_API_CALL*clGetPlatformIDs)( cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL* clGetPlatformInfo)( cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetDeviceIDs)( cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, cl_device_id * /* devices */, cl_uint * /* num_devices */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetDeviceInfo)( cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_context (CL_API_CALL*clCreateContext)( const cl_context_properties * /* properties */, cl_uint /* num_devices */, const cl_device_id * /* devices */, void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_context (CL_API_CALL*clCreateContextFromType)( const cl_context_properties * /* properties */, cl_device_type /* device_type */, void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clRetainContext)( cl_context /* context */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clReleaseContext)( cl_context /* context */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetContextInfo)( cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_command_queue (CL_API_CALL*clCreateCommandQueue)( cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clRetainCommandQueue)( cl_command_queue /* command_queue */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clReleaseCommandQueue)( cl_command_queue /* command_queue */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetCommandQueueInfo)( cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clSetCommandQueueProperty)( cl_command_queue /* command_queue */, cl_command_queue_properties /* properties */, cl_bool /* enable */, cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateBuffer)( cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateImage2D)( cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateImage3D)( cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clRetainMemObject)( cl_mem /* memobj */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clReleaseMemObject)( cl_mem /* memobj */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetSupportedImageFormats)( cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, cl_uint /* num_entries */, cl_image_format * /* image_formats */, cl_uint * /* num_image_formats */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetMemObjectInfo)( cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetImageInfo)( cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_sampler (CL_API_CALL*clCreateSampler)( cl_context /* context */, cl_bool /* normalized_coords */, cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clRetainSampler)( cl_sampler /* sampler */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clReleaseSampler)( cl_sampler /* sampler */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetSamplerInfo)( cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_program (CL_API_CALL*clCreateProgramWithSource)( cl_context /* context */, cl_uint /* count */, const char ** /* strings */, const size_t * /* lengths */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_program (CL_API_CALL*clCreateProgramWithBinary)( cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clRetainProgram)( cl_program /* program */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clReleaseProgram)( cl_program /* program */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clBuildProgram)( cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clUnloadCompiler)( void ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetProgramInfo)( cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetProgramBuildInfo)( cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_kernel (CL_API_CALL*clCreateKernel)( cl_program /* program */, const char * /* kernel_name */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clCreateKernelsInProgram)( cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */, cl_uint * /* num_kernels_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clRetainKernel)( cl_kernel /* kernel */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clReleaseKernel)( cl_kernel /* kernel */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clSetKernelArg)( cl_kernel /* kernel */, cl_uint /* arg_index */, size_t /* arg_size */, const void * /* arg_value */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetKernelInfo)( cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)( cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clWaitForEvents)( cl_uint /* num_events */, const cl_event * /* event_list */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetEventInfo)( cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clRetainEvent)( cl_event /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clReleaseEvent)( cl_event /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetEventProfilingInfo)( cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clFlush)( cl_command_queue /* command_queue */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clFinish)( cl_command_queue /* command_queue */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueReadBuffer)( cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, size_t /* offset */, size_t /* cb */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueWriteBuffer)( cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, size_t /* offset */, size_t /* cb */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyBuffer)( cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, size_t /* src_offset */, size_t /* dst_offset */, size_t /* cb */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueReadImage)( cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */, size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueWriteImage)( cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */, size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyImage)( cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)( cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)( cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY void * (CL_API_CALL*clEnqueueMapBuffer)( cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, size_t /* offset */, size_t /* cb */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY void * (CL_API_CALL*clEnqueueMapImage)( cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueUnmapMemObject)( cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueNDRangeKernel)( cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, const size_t * /* global_work_offset */, const size_t * /* global_work_size */, const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueTask)( cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueNativeKernel)( cl_command_queue /* command_queue */, void (*user_func)(void *), void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem * /* mem_list */, const void ** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueMarker)( cl_command_queue /* command_queue */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueWaitForEvents)( cl_command_queue /* command_queue */, cl_uint /* num_events */, const cl_event * /* event_list */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueBarrier)( cl_command_queue /* command_queue */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY void * (CL_API_CALL*clGetExtensionFunctionAddress)( const char * /* func_name */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLBuffer)( cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* bufobj */, int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLTexture2D)( cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLTexture3D)( cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLRenderbuffer)( cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* renderbuffer */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetGLObjectInfo)( cl_mem /* memobj */, cl_gl_object_type * /* gl_object_type */, cl_GLuint * /* gl_object_name */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetGLTextureInfo)( cl_mem /* memobj */, cl_gl_texture_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueAcquireGLObjects)( cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueReleaseGLObjects)( cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL*clGetGLContextInfoKHR)( const cl_context_properties * /* properties */, cl_gl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_0; CL_API_ENTRY cl_int (CL_API_CALL* clUnknown75)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown76)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown77)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown78)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown79)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown80)( void); CL_API_ENTRY cl_int (CL_API_CALL*clSetEventCallback)( cl_event /* event */, cl_int /* command_exec_callback_type */, void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */ ) CL_API_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateSubBuffer)( cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, const void * /* buffer_create_info */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)( cl_mem /* memobj */, void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_event (CL_API_CALL*clCreateUserEvent)( cl_context /* context */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_int (CL_API_CALL*clSetUserEventStatus)( cl_event /* event */, cl_int /* execution_status */ ) CL_API_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueReadBufferRect)( cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, const size_t * /* buffer_origin */, const size_t * /* host_origin */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueWriteBufferRect)( cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, const size_t * /* buffer_origin */, const size_t * /* host_origin */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueCopyBufferRect)( cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, const size_t * /* src_origin */, const size_t * /* dst_origin */, const size_t * /* region */, size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */, size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_int (CL_API_CALL* clCreateSubDevicesEXT)( cl_device_id /*in_device*/, const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_int (CL_API_CALL* clRetainDeviceEXT)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_int (CL_API_CALL* clReleaseDeviceEXT)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; CL_API_ENTRY cl_int (CL_API_CALL* clUnknown92)( void); CL_API_ENTRY cl_int (CL_API_CALL*clCreateSubDevices)( cl_device_id /* in_device */, const cl_device_partition_property * /* properties */, cl_uint /* num_devices */, cl_device_id * /* out_devices */, cl_uint * /* num_devices_ret */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clRetainDevice)( cl_device_id /* device */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clReleaseDevice)( cl_device_id /* device */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateImage)( cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, const cl_image_desc * /* image_desc */, void * /* host_ptr */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)( cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* kernel_names */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clCompileProgram)( cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, cl_uint /* num_input_headers */, const cl_program * /* input_headers */, const char ** /* header_include_names */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_program (CL_API_CALL*clLinkProgram)( cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, cl_uint /* num_input_programs */, const cl_program * /* input_programs */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clUnloadPlatformCompiler)( cl_platform_id /* platform */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clGetKernelArgInfo)( cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueFillBuffer)( cl_command_queue /* command_queue */, cl_mem /* buffer */, const void * /* pattern */, size_t /* pattern_size */, size_t /* offset */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueFillImage)( cl_command_queue /* command_queue */, cl_mem /* image */, const void * /* fill_color */, const size_t * /* origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)( cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */, const cl_mem * /* mem_objects */, cl_mem_migration_flags /* flags */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)( cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)( cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY void * (CL_API_CALL* clGetExtensionFunctionAddressForPlatform)( cl_platform_id /* platform */, const char * /* func_name */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_mem (CL_API_CALL*clCreateFromGLTexture)( cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; CL_API_ENTRY cl_int (CL_API_CALL* clUnknown109)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown110)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown111)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown112)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown113)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown114)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown115)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown116)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown117)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown118)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown119)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown120)( void); CL_API_ENTRY cl_int (CL_API_CALL* clUnknown121)( void); }; starpu-1.4.9+dfsg/socl/src/socl.c000066400000000000000000000117121507764646700166450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2012-2012 Vincent Danjean * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" struct _cl_icd_dispatch socl_master_dispatch = { soclGetPlatformIDs, soclGetPlatformInfo, soclGetDeviceIDs, soclGetDeviceInfo, soclCreateContext, soclCreateContextFromType, soclRetainContext, soclReleaseContext, soclGetContextInfo, soclCreateCommandQueue, soclRetainCommandQueue, soclReleaseCommandQueue, soclGetCommandQueueInfo, soclSetCommandQueueProperty, soclCreateBuffer, soclCreateImage2D, soclCreateImage3D, soclRetainMemObject, soclReleaseMemObject, soclGetSupportedImageFormats, soclGetMemObjectInfo, soclGetImageInfo, soclCreateSampler, soclRetainSampler, soclReleaseSampler, soclGetSamplerInfo, soclCreateProgramWithSource, soclCreateProgramWithBinary, soclRetainProgram, soclReleaseProgram, soclBuildProgram, soclUnloadCompiler, soclGetProgramInfo, soclGetProgramBuildInfo, soclCreateKernel, soclCreateKernelsInProgram, soclRetainKernel, soclReleaseKernel, soclSetKernelArg, soclGetKernelInfo, soclGetKernelWorkGroupInfo, soclWaitForEvents, soclGetEventInfo, soclRetainEvent, soclReleaseEvent, soclGetEventProfilingInfo, soclFlush, soclFinish, soclEnqueueReadBuffer, soclEnqueueWriteBuffer, soclEnqueueCopyBuffer, soclEnqueueReadImage, soclEnqueueWriteImage, soclEnqueueCopyImage, soclEnqueueCopyImageToBuffer, soclEnqueueCopyBufferToImage, soclEnqueueMapBuffer, soclEnqueueMapImage, soclEnqueueUnmapMemObject, soclEnqueueNDRangeKernel, soclEnqueueTask, soclEnqueueNativeKernel, soclEnqueueMarker, soclEnqueueWaitForEvents, soclEnqueueBarrier, soclGetExtensionFunctionAddress, (void *) NULL, // clCreateFromGLBuffer, (void *) NULL, // clCreateFromGLTexture2D, (void *) NULL, // clCreateFromGLTexture3D, (void *) NULL, // clCreateFromGLRenderbuffer, (void *) NULL, // clGetGLObjectInfo, (void *) NULL, // clGetGLTextureInfo, (void *) NULL, // clEnqueueAcquireGLObjects, (void *) NULL, // clEnqueueReleaseGLObjects, (void *) NULL, // clGetGLContextInfoKHR, (void *) NULL, // (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, // clSetEventCallback, (void *) NULL, // clCreateSubBuffer, (void *) NULL, // clSetMemObjectDestructorCallback, (void *) NULL, // clCreateUserEvent, (void *) NULL, // clSetUserEventStatus, (void *) NULL, // clEnqueueReadBufferRect, (void *) NULL, // clEnqueueWriteBufferRect, (void *) NULL, // clEnqueueCopyBufferRect, (void *) NULL, // clCreateSubDevicesEXT, (void *) NULL, // clRetainDeviceEXT, (void *) NULL, // clReleaseDeviceEXT, (void *) NULL, (void *) NULL, // clCreateSubDevices, (void *) NULL, // clRetainDevice, (void *) NULL, // clReleaseDevice, (void *) NULL, // clCreateImage, (void *) NULL, // clCreateProgramWithBuiltInKernels, (void *) NULL, // clCompileProgram, (void *) NULL, // clLinkProgram, (void *) NULL, // clUnloadPlatformCompiler, (void *) NULL, // clGetKernelArgInfo, (void *) NULL, // clEnqueueFillBuffer, (void *) NULL, // clEnqueueFillImage, (void *) NULL, // clEnqueueMigrateMemObjects, soclEnqueueMarkerWithWaitList, // clEnqueueMarkerWithWaitList, soclEnqueueBarrierWithWaitList, // clEnqueueBarrierWithWaitList, soclGetExtensionFunctionAddressForPlatform, // clGetExtensionFunctionAddressForPlatform, (void *) NULL, // clCreateFromGLTexture, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL }; struct _cl_platform_id socl_platform = {&socl_master_dispatch}; const char * __attribute__ ((aligned (16))) SOCL_PROFILE = "FULL_PROFILE"; const char * __attribute__ ((aligned (16))) SOCL_VERSION = "OpenCL 1.0 SOCL Edition (0.1.0)"; const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_NAME = "SOCL Platform"; const char * __attribute__ ((aligned (16))) SOCL_VENDOR = "Inria"; const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_EXTENSIONS = "cl_khr_icd"; const char * __attribute__ ((aligned (16))) SOCL_PLATFORM_ICD_SUFFIX_KHR ="SOCL"; /* Command queues with profiling enabled * This allows us to disable StarPU profiling it * is equal to 0 */ int __attribute__ ((aligned (16))) profiling_queue_count = 0; struct _cl_device_id * socl_devices = NULL; unsigned int socl_device_count = 0; starpu-1.4.9+dfsg/socl/src/socl.h000066400000000000000000000641741507764646700166640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SOCL_H #define SOCL_H #define CL_CONTEXT_SCHEDULER_SOCL 0xFF01 #define CL_CONTEXT_NAME_SOCL 0xFF02 #include #include #include #include #include "CL/cl.h" #include "ocl_icd.h" #include typedef struct starpu_task * starpu_task; #ifdef UNUSED #elif defined(__GNUC__) #define UNUSED(x) UNUSED_ ## x __attribute__((unused)) #else #define UNUSED(x) x #endif /** * Entity that can be managed by the garbage collector */ typedef struct entity * entity; struct entity { struct _cl_icd_dispatch * dispatch; /* Reference count */ size_t refs; /* Callback called on release */ void (*release_callback)(void*entity); /* Entity identifier (used for debugging purpose) */ char * name; /* Next entity in garbage collector queue */ entity prev; entity next; }; /* OpenCL entities (context, command queues, buffers...) must use * this macro as their first field */ #define CL_ENTITY struct entity _entity; #include "command.h" #include "command_list.h" #include "command_queue.h" #include "debug.h" #include "event.h" #include "gc.h" #include "mem_objects.h" #include "task.h" #include "util.h" struct _cl_platform_id { struct _cl_icd_dispatch *dispatch; }; struct _cl_device_id { struct _cl_icd_dispatch *dispatch; int device_id; int worker_id; }; #define RETURN_EVENT(ev, event) \ if ((event) != NULL) { \ *event = ev; \ } \ else { \ gc_entity_release(ev); \ } #define MAY_BLOCK_THEN_RETURN_EVENT(ev,blocking,event) \ if ((blocking) == CL_TRUE) { \ soclWaitForEvents(1, &ev); \ } \ RETURN_EVENT(ev,event); \ /* Constants */ extern const char * SOCL_PROFILE; extern const char * SOCL_VERSION; extern const char * SOCL_PLATFORM_NAME; extern const char * SOCL_VENDOR; extern const char * SOCL_PLATFORM_EXTENSIONS; extern const char * SOCL_PLATFORM_ICD_SUFFIX_KHR; struct _cl_context { CL_ENTITY; void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *); void *user_data; /* Associated devices */ cl_device_id * devices; cl_uint num_devices; /* Scheduling context */ unsigned sched_ctx; /* Properties */ cl_context_properties * properties; cl_uint num_properties; /* ID */ #ifdef DEBUG int id; #endif }; struct _cl_command_queue { CL_ENTITY; cl_command_queue_properties properties; cl_device_id device; cl_context context; /* Stored commands */ command_list commands; /* Last enqueued barrier-like event */ cl_command barrier; /* Mutex */ starpu_pthread_mutex_t mutex; /* ID */ #ifdef DEBUG int id; #endif }; struct _cl_event { CL_ENTITY; /* Command queue */ cl_command_queue cq; /* Command */ cl_command command; /* Event status */ cl_int status; /* ID * This ID is used as a tag for StarPU dependencies */ int id; /* Profiling info */ cl_ulong prof_queued, prof_submit, prof_start, prof_end; }; struct _cl_mem { CL_ENTITY; /* StarPU handle */ starpu_data_handle_t handle; /* Pointer to data in host memory */ void *ptr; /* Buffer size */ size_t size; /* Indicates how many references (mapping, MEM_USE_HOST_PTR...) require * coherence in host memory. If set to zero, no coherency is maintained * (this is the most efficient) */ int map_count; /* Creation flags */ cl_mem_flags flags; /* Creation context */ cl_context context; /* Access mode */ int mode; /* Host ptr */ void * host_ptr; /* Fields used to store cl_mems in mem_objects list */ cl_mem prev; cl_mem next; /* Indicates if a buffer may contain meaningful data. Otherwise we don't have to transfer it */ int scratch; /* ID */ #ifdef DEBUG int id; #endif }; struct _cl_program { CL_ENTITY; /* Real OpenCL Programs * There is one entry for each device (even non OpenCL ones) * in order to index this array with dev_id */ cl_program *cl_programs; /* Context used to create this program */ cl_context context; /* Options */ char * options; unsigned int options_size; /* ID */ #ifdef DEBUG int id; #endif }; enum kernel_arg_type { Null, Buffer, Immediate }; typedef cl_int (*split_func_t)(cl_command_queue, cl_uint, void *, const cl_event, cl_event *); struct _cl_kernel { CL_ENTITY; /* Associated program */ cl_program program; /* StarPU codelet */ struct starpu_perfmodel * perfmodel; /* Kernel name */ char * kernel_name; /* Real OpenCL kernels */ cl_kernel *cl_kernels; /* clCreateKernel return codes */ cl_int *errcodes; /* Arguments */ unsigned int num_args; size_t *arg_size; enum kernel_arg_type *arg_type; void **arg_value; /* Partition function */ cl_uint split_space; split_func_t split_func; cl_ulong * split_perfs; void * split_data; starpu_pthread_mutex_t split_lock; /* ID */ #ifdef DEBUG int id; #endif }; /* Global vars */ /* Command queues with profiling enabled * This allows us to disable StarPU profiling it * is equal to 0 */ extern int profiling_queue_count; /***************************************************************************/ /* Platform API */ extern CL_API_ENTRY cl_int CL_API_CALL soclGetPlatformIDs(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetPlatformInfo(cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Device APIs */ extern CL_API_ENTRY cl_int CL_API_CALL soclGetDeviceIDs(cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, cl_device_id * /* devices */, cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetDeviceInfo(cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Context APIs */ extern CL_API_ENTRY cl_context CL_API_CALL soclCreateContext(const cl_context_properties * /* properties */, cl_uint /* num_devices */, const cl_device_id * /* devices */, void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_context CL_API_CALL soclCreateContextFromType(const cl_context_properties * /* properties */, cl_device_type /* device_type */, void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */, void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetContextInfo(cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Command Queue APIs */ extern CL_API_ENTRY cl_command_queue CL_API_CALL soclCreateCommandQueue(cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetCommandQueueInfo(cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclSetCommandQueueProperty(cl_command_queue /* command_queue */, cl_command_queue_properties /* properties */, cl_bool /* enable */, cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0; /* Memory Object APIs */ extern CL_API_ENTRY cl_mem CL_API_CALL soclCreateBuffer(cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL soclCreateImage2D(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL soclCreateImage3D(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetSupportedImageFormats(cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, cl_uint /* num_entries */, cl_image_format * /* image_formats */, cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetMemObjectInfo(cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetImageInfo(cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Sampler APIs */ extern CL_API_ENTRY cl_sampler CL_API_CALL soclCreateSampler(cl_context /* context */, cl_bool /* normalized_coords */, cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetSamplerInfo(cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Program Object APIs */ extern CL_API_ENTRY cl_program CL_API_CALL soclCreateProgramWithSource(cl_context /* context */, cl_uint /* count */, const char ** /* strings */, const size_t * /* lengths */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_program CL_API_CALL soclCreateProgramWithBinary(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclBuildProgram(cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, void (CL_CALLBACK *pfn_notify)(cl_program /* program */, void * /* user_data */), void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetProgramInfo(cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetProgramBuildInfo(cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Kernel Object APIs */ extern CL_API_ENTRY cl_kernel CL_API_CALL soclCreateKernel(cl_program /* program */, const char * /* kernel_name */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclCreateKernelsInProgram(cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */, cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclSetKernelArg(cl_kernel /* kernel */, cl_uint /* arg_index */, size_t /* arg_size */, const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetKernelInfo(cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetKernelWorkGroupInfo(cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Event Object APIs */ extern CL_API_ENTRY cl_int CL_API_CALL soclWaitForEvents(cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclGetEventInfo(cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; /* Profiling APIs */ extern CL_API_ENTRY cl_int CL_API_CALL soclGetEventProfilingInfo(cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Flush and Finish APIs */ extern CL_API_ENTRY cl_int CL_API_CALL soclFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; /* Enqueued Commands APIs */ extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueReadBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, size_t /* offset */, size_t /* cb */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueWriteBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, size_t /* offset */, size_t /* cb */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueCopyBuffer(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, size_t /* src_offset */, size_t /* dst_offset */, size_t /* cb */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueReadImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */, size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueWriteImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */, size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueCopyImage(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL soclEnqueueMapBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, size_t /* offset */, size_t /* cb */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL soclEnqueueMapImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueUnmapMemObject(cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueNDRangeKernel(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, const size_t * /* global_work_offset */, const size_t * /* global_work_size */, const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueTask(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueNativeKernel(cl_command_queue /* command_queue */, void (*user_func)(void *), void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem * /* mem_list */, const void ** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueMarker(cl_command_queue /* command_queue */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueWaitForEvents(cl_command_queue /* command_queue */, cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL soclEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */ ) CL_API_SUFFIX__VERSION_1_2; /* Extension function access * * Returns the extension function address for the given function name, * or NULL if a valid function can not be found. The client must * check to make sure the address is not NULL, before using or * calling the returned function address. */ extern CL_API_ENTRY void * CL_API_CALL soclGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0; extern void * CL_API_CALL soclGetExtensionFunctionAddressForPlatform(cl_platform_id p, const char * func_name) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL soclIcdGetPlatformIDsKHR(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */) CL_EXT_SUFFIX__VERSION_1_0; extern struct _cl_icd_dispatch socl_master_dispatch; extern struct _cl_platform_id socl_platform; extern struct _cl_device_id * socl_devices; extern unsigned int socl_device_count; #endif /* SOCL_H */ starpu-1.4.9+dfsg/socl/src/task.c000066400000000000000000000101741507764646700166500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "gc.h" #include "event.h" void command_completed(cl_command cmd) { starpu_task task = cmd->task; cl_event ev = command_event_get_ex(cmd); ev->status = CL_COMPLETE; ev->prof_end = _socl_nanotime(); /* Commands without codelets (marker, barrier, unmap...) take no time */ if (task->cl == NULL) ev->prof_start = ev->prof_end; /* Trigger the tag associated to the command event */ DEBUG_MSG("Trigger event %d\n", ev->id); starpu_tag_notify_from_apps(ev->id); gc_entity_release(ev); } void command_completed_task_callback(void *arg) { cl_command cmd = (cl_command)arg; command_completed(cmd); /* Release the command stored task callback parameter */ gc_entity_release(cmd); } /* * Create a StarPU task */ starpu_task task_create(cl_command_type typ) { struct starpu_task * task; /* Create StarPU task */ task = starpu_task_create(); /* Set task common settings */ task->destroy = 0; task->detach = 0; task->use_tag = 1; task->tag_id = event_unique_id(); return task; } void task_depends_on(starpu_task task, cl_uint num_events, cl_event *events) { if (num_events != 0) { cl_uint i; starpu_tag_t * tags = malloc(num_events * sizeof(starpu_tag_t)); DEBUG_MSG("Task %p depends on events:", task); for (i=0; iid; DEBUG_MSG_NOHEAD(" %d", events[i]->id); } DEBUG_MSG_NOHEAD("\n"); starpu_tag_declare_deps_array(task->tag_id, num_events, tags); free(tags); } } cl_int task_submit_ex(starpu_task task, cl_command cmd) { /* Associated the task to the command */ cmd->task = task; cl_uint num_events = command_num_events_get_ex(cmd); cl_event * events = command_events_get_ex(cmd); task_depends_on(task, num_events, events); task->callback_func = command_completed_task_callback; gc_entity_store(&task->callback_arg, cmd); cl_event ev = command_event_get_ex(cmd); ev->prof_submit = _socl_nanotime(); gc_entity_release(ev); /* Submit task */ int ret = (task->cl != NULL && task->where == STARPU_OPENCL ? starpu_task_submit_to_ctx(task, cmd->event->cq->context->sched_ctx) : starpu_task_submit(task)); if (ret != 0) DEBUG_ERROR("Unable to submit a task. Error %d\n", ret); return CL_SUCCESS; } /********************************* * CPU task helper *********************************/ struct cputask_arg { void (*callback)(void*); void * arg; int free_arg; cl_command cmd; int complete_cmd; }; static void cputask_task(void *args) { struct cputask_arg * arg = (struct cputask_arg*)args; arg->callback(arg->arg); if (arg->complete_cmd) command_completed(arg->cmd); if (arg->free_arg) { assert(arg->arg != NULL); free(arg->arg); arg->arg = NULL; } gc_entity_unstore(&arg->cmd); free(arg); } void cpu_task_submit_ex(cl_command cmd, void (*callback)(void*), void *arg, int free_arg, int complete_cmd, struct starpu_codelet * codelet, unsigned num_events, cl_event * events) { struct cputask_arg * a = malloc(sizeof(struct cputask_arg)); a->callback = callback; a->arg = arg; a->free_arg = free_arg; gc_entity_store(&a->cmd, cmd); a->complete_cmd = complete_cmd; codelet->where = STARPU_OPENCL | STARPU_CPU | STARPU_CUDA; starpu_task task = task_create(CL_COMMAND_TASK); if (num_events != 0) { task_depends_on(task, num_events, events); } task->callback_func = cputask_task; task->callback_arg = a; cmd->task = task; int ret = starpu_task_submit(task); if (ret != 0) DEBUG_ERROR("Unable to submit a task. Error %d\n", ret); } starpu-1.4.9+dfsg/socl/src/task.h000066400000000000000000000037611507764646700166610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef SOCL_TASK_H #define SOCL_TASK_H #include "socl.h" starpu_task task_create(cl_command_type typ) STARPU_ATTRIBUTE_MALLOC; void task_dependency_add(starpu_task task, cl_uint num_events, cl_event *events); void command_completed(cl_command cmd); void command_completed_task_callback(void *); /* Execute callback(arg) in a CPU task (with no buffer) * Associate this task to the command cmd (i.e. when this task completes, the command is completed) * Additional dependencies can be specified (num_events, events). * The codelet is used to give a fixed name to the task without allocating a * new codelet structure each time. This function will fill the other fields * as appropriate */ void cpu_task_submit_ex(cl_command cmd, void (*callback)(void*), void *arg, int free_arg, int release_cmd, struct starpu_codelet *, unsigned num_events, cl_event * events); #define cpu_task_submit(cmd, args...) cpu_task_submit_ex((cl_command)cmd, args) /** * Associate a StarPU task to a command and submit it * * When the task terminates, the command is set as terminated too */ cl_int task_submit_ex(starpu_task task, cl_command cmd); #define task_submit(task,cmd) task_submit_ex(task, (cl_command)cmd) /** * Add task dependencies */ void task_depends_on(starpu_task task, cl_uint num_events, cl_event *events); #endif /* SOCL_TASK_H */ starpu-1.4.9+dfsg/socl/src/util.c000066400000000000000000000032141507764646700166600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "socl.h" #include "common/timing.h" int starpu_worker_get_range_by_id(int id) { int i, oid = 0; for (i=0; i and not the internal src/ header which contains the # static inline definition dist-hook: failed=0 ; \ look=""; \ for i in $$( $(GREP) "static inline" $$(find $(srcdir) -name \*.h) | $(SED) -e 's/.*static inline //g' | $(GREP) -v ENAME\#\# | $(SED) -n -e 's/[^(]* \(\|\*\)\([^ (]*\)(.*/\2/' -e 'p;s/^_*//;p' | $(GREP) -v _starpu_spin_init | $(GREP) -v starpu_sched_ctx_worker_is_master_for_child_ctx) ; do \ if [ -z "$$look" ] ; then \ look="$$i" ; \ else \ look="$$look\|$$i" ; \ fi ; \ done ; \ echo "$$look" ; \ for j in $(shell find . -name \*.o) ; do \ nm $$j | $(GREP) -e "U \($$look\)$$" && { echo $$j ; failed=1 ; } ; \ done ; \ [ $$failed == 0 ] nm -n .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.so | grep -v " [Ua-z] " | grep -v ' W '| grep -ve " _\?_\?_\?f\?starpu" | grep -ve " \(_init\|main\|smpi_simulated_main_\|_fini\|_edata\|__bss_start\|_end\|fut_getstamp\|__gcov_\|mangle_path\)" | (! grep .) starpu-1.4.9+dfsg/src/Makefile.in000066400000000000000000004437361507764646700166650ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Simon Archipoff # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(MPICC_LDFLAGS) @STARPU_HAVE_WINDOWS_TRUE@am__append_4 = -Xlinker --output-def -Xlinker .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.def @STARPU_HAVE_DARWIN_TRUE@am__append_5 = \ @STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,_starpu_main \ @STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,_smpi_main \ @STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,__starpu_mpi_simgrid_init \ @STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,_smpi_simulated_main_ \ @STARPU_HAVE_DARWIN_TRUE@ -Wl,-U,_starpu_mpi_world_rank @STARPU_HAVE_LEVELDB_TRUE@am__append_6 = core/disk_ops/disk_leveldb.cpp @STARPU_HAVE_HDF5_TRUE@am__append_7 = core/disk_ops/disk_hdf5.c @STARPU_USE_HIP_TRUE@am__append_8 = drivers/hip/driver_hip.c @STARPU_USE_CUDA0_TRUE@am__append_9 = drivers/cuda/driver_cuda0.c @STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_TRUE@am__append_10 = drivers/cuda/driver_cuda1.c @STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_FALSE@@STARPU_USE_CUDA_TRUE@am__append_11 = drivers/cuda/driver_cuda.c @STARPU_SIMGRID_TRUE@@STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_FALSE@@STARPU_USE_CUDA_FALSE@am__append_12 = drivers/cuda/driver_cuda.c @STARPU_USE_OPENCL_TRUE@am__append_13 = \ @STARPU_USE_OPENCL_TRUE@ drivers/opencl/driver_opencl.c \ @STARPU_USE_OPENCL_TRUE@ drivers/opencl/driver_opencl_utils.c @STARPU_SIMGRID_TRUE@@STARPU_USE_OPENCL_FALSE@am__append_14 = drivers/opencl/driver_opencl.c @STARPU_USE_MAX_FPGA_TRUE@am__append_15 = drivers/max/driver_max_fpga.c @STARPU_LINUX_SYS_TRUE@am__append_16 = core/disk_ops/disk_unistd_o_direct.c @STARPU_HAVE_HWLOC_TRUE@am__append_17 = \ @STARPU_HAVE_HWLOC_TRUE@ sched_policies/scheduler_maker.c \ @STARPU_HAVE_HWLOC_TRUE@ sched_policies/hierarchical_heft.c @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@am__append_18 = parallel_worker/starpu_parallel_worker_create.c ######################################### # # # Generic MP compilation # # # ######################################### @STARPU_USE_MP_TRUE@am__append_19 = drivers/mp_common/mp_common.c \ @STARPU_USE_MP_TRUE@ drivers/mp_common/source_common.c \ @STARPU_USE_MP_TRUE@ drivers/mp_common/sink_common.c @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_20 = drivers/mpi/driver_mpi_common.c \ @STARPU_USE_MPI_MASTER_SLAVE_TRUE@ drivers/mpi/driver_mpi_source.c \ @STARPU_USE_MPI_MASTER_SLAVE_TRUE@ drivers/mpi/driver_mpi_sink.c @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_21 = drivers/tcpip/driver_tcpip_common.c \ @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ drivers/tcpip/driver_tcpip_source.c \ @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ drivers/tcpip/driver_tcpip_sink.c subdir = src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(xmldir)" LTLIBRARIES = $(lib_LTLIBRARIES) libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = am__libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST = \ common/barrier.c common/barrier_counter.c common/hash.c \ common/rwlock.c common/starpu_spinlock.c common/timing.c \ common/fxt.c common/utils.c common/thread.c common/rbtree.c \ common/graph.c common/inlines.c common/knobs.c core/jobs.c \ core/task.c core/task_bundle.c core/tree.c core/devices.c \ core/drivers.c core/workers.c core/combined_workers.c \ core/topology.c core/disk.c core/debug.c core/errorcheck.c \ core/progress_hook.c core/idle_hook.c core/dependencies/cg.c \ core/dependencies/dependencies.c \ core/dependencies/implicit_data_deps.c \ core/dependencies/tags.c core/dependencies/task_deps.c \ core/dependencies/data_concurrency.c \ core/dependencies/data_arbiter_concurrency.c \ core/disk_ops/disk_stdio.c core/disk_ops/disk_unistd.c \ core/disk_ops/unistd/disk_unistd_global.c \ core/perfmodel/perfmodel_history.c \ core/perfmodel/energy_model.c core/perfmodel/perfmodel_bus.c \ core/perfmodel/perfmodel.c core/perfmodel/perfmodel_print.c \ core/perfmodel/perfmodel_nan.c core/perfmodel/regression.c \ core/perfmodel/multiple_regression.c core/sched_policy.c \ core/simgrid.c core/simgrid_cpp.cpp core/sched_ctx.c \ core/sched_ctx_list.c core/parallel_task.c \ core/detect_combined_workers.c \ sched_policies/eager_central_policy.c \ sched_policies/eager_central_priority_policy.c \ sched_policies/work_stealing_policy.c \ sched_policies/deque_modeling_policy_data_aware.c \ sched_policies/random_policy.c sched_policies/fifo_queues.c \ sched_policies/parallel_heft.c sched_policies/parallel_eager.c \ sched_policies/heteroprio.c sched_policies/graph_test_policy.c \ drivers/driver_common/driver_common.c \ drivers/disk/driver_disk.c datawizard/node_ops.c \ datawizard/memory_nodes.c datawizard/write_back.c \ datawizard/coherency.c datawizard/data_request.c \ datawizard/datawizard.c datawizard/copy_driver.c \ datawizard/filters.c datawizard/sort_data_handles.c \ datawizard/malloc.c datawizard/memory_manager.c \ datawizard/memalloc.c datawizard/memstats.c \ datawizard/footprint.c datawizard/datastats.c \ datawizard/user_interactions.c datawizard/reduction.c \ datawizard/interfaces/data_interface.c \ datawizard/interfaces/bcsr_interface.c \ datawizard/interfaces/coo_interface.c \ datawizard/interfaces/csr_interface.c \ datawizard/interfaces/vector_filters.c \ datawizard/interfaces/vector_interface.c \ datawizard/interfaces/matrix_filters.c \ datawizard/interfaces/matrix_interface.c \ datawizard/interfaces/block_filters.c \ datawizard/interfaces/block_interface.c \ datawizard/interfaces/tensor_filters.c \ datawizard/interfaces/tensor_interface.c \ datawizard/interfaces/ndim_filters.c \ datawizard/interfaces/ndim_interface.c \ datawizard/interfaces/bcsr_filters.c \ datawizard/interfaces/csr_filters.c \ datawizard/interfaces/variable_interface.c \ datawizard/interfaces/void_interface.c \ datawizard/interfaces/multiformat_interface.c \ util/execute_on_all.c util/starpu_create_sync_task.c \ util/file.c util/fstarpu.c util/misc.c \ util/openmp_runtime_support.c \ util/openmp_runtime_support_environment.c \ util/openmp_runtime_support_omp_api.c util/starpu_data_cpy.c \ util/starpu_task_insert.c util/starpu_task_insert_utils.c \ debug/traces/starpu_fxt.c debug/traces/starpu_fxt_mpi.c \ debug/traces/starpu_fxt_dag.c debug/traces/starpu_paje.c \ debug/traces/anim.c debug/latency.c debug/structures_size.c \ profiling/profiling.c profiling/bound.c \ profiling/profiling_helpers.c profiling/callbacks.c \ worker_collection/worker_list.c \ worker_collection/worker_tree.c \ sched_policies/component_worker.c \ sched_policies/component_sched.c \ sched_policies/component_fifo.c sched_policies/prio_deque.c \ sched_policies/helper_mct.c sched_policies/component_prio.c \ sched_policies/component_random.c \ sched_policies/component_eager.c \ sched_policies/component_eager_prio.c \ sched_policies/component_eager_calibration.c \ sched_policies/component_mct.c sched_policies/component_heft.c \ sched_policies/component_heteroprio.c \ sched_policies/component_best_implementation.c \ sched_policies/component_perfmodel_select.c \ sched_policies/component_composed.c \ sched_policies/component_work_stealing.c \ sched_policies/component_stage.c \ sched_policies/component_userchoice.c \ sched_policies/modular_eager.c \ sched_policies/modular_eager_prio.c \ sched_policies/modular_eager_prefetching.c \ sched_policies/modular_gemm.c sched_policies/modular_prio.c \ sched_policies/modular_prio_prefetching.c \ sched_policies/modular_random.c \ sched_policies/modular_parallel_random.c \ sched_policies/modular_random_prefetching.c \ sched_policies/modular_parallel_heft.c \ sched_policies/modular_heft.c \ sched_policies/modular_heft_prio.c \ sched_policies/modular_heteroprio.c \ sched_policies/modular_heteroprio_heft.c \ sched_policies/modular_heft2.c sched_policies/modular_ws.c \ sched_policies/modular_ez.c core/disk_ops/disk_leveldb.cpp \ core/disk_ops/disk_hdf5.c drivers/cpu/driver_cpu.c \ drivers/hip/driver_hip_init.c drivers/cuda/driver_cuda_init.c \ drivers/hip/driver_hip.c drivers/hip/starpu_hipblas.c \ drivers/cuda/driver_cuda0.c drivers/cuda/driver_cuda1.c \ drivers/cuda/driver_cuda.c drivers/cuda/starpu_cublas.c \ drivers/cuda/starpu_cublas_v2.c drivers/cuda/starpu_cublasLt.c \ drivers/cuda/starpu_cusparse.c drivers/cuda/starpu_cusolver.c \ drivers/opencl/driver_opencl_init.c \ drivers/opencl/driver_opencl.c \ drivers/opencl/driver_opencl_utils.c \ drivers/max/driver_max_fpga_init.c \ drivers/max/driver_max_fpga.c \ core/disk_ops/disk_unistd_o_direct.c \ sched_policies/scheduler_maker.c \ sched_policies/hierarchical_heft.c \ parallel_worker/starpu_parallel_worker_create.c \ drivers/mp_common/mp_common.c \ drivers/mp_common/source_common.c \ drivers/mp_common/sink_common.c drivers/mpi/driver_mpi_init.c \ drivers/mpi/driver_mpi_common.c \ drivers/mpi/driver_mpi_source.c drivers/mpi/driver_mpi_sink.c \ drivers/tcpip/driver_tcpip_init.c \ drivers/tcpip/driver_tcpip_common.c \ drivers/tcpip/driver_tcpip_source.c \ drivers/tcpip/driver_tcpip_sink.c am__dirstamp = $(am__leading_dot)dirstamp @STARPU_HAVE_LEVELDB_TRUE@am__objects_1 = \ @STARPU_HAVE_LEVELDB_TRUE@ core/disk_ops/disk_leveldb.lo @STARPU_HAVE_HDF5_TRUE@am__objects_2 = core/disk_ops/disk_hdf5.lo @STARPU_USE_HIP_TRUE@am__objects_3 = drivers/hip/driver_hip.lo @STARPU_USE_CUDA0_TRUE@am__objects_4 = drivers/cuda/driver_cuda0.lo @STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_TRUE@am__objects_5 = drivers/cuda/driver_cuda1.lo @STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_FALSE@@STARPU_USE_CUDA_TRUE@am__objects_6 = drivers/cuda/driver_cuda.lo @STARPU_SIMGRID_TRUE@@STARPU_USE_CUDA0_FALSE@@STARPU_USE_CUDA1_FALSE@@STARPU_USE_CUDA_FALSE@am__objects_7 = drivers/cuda/driver_cuda.lo @STARPU_USE_OPENCL_TRUE@am__objects_8 = \ @STARPU_USE_OPENCL_TRUE@ drivers/opencl/driver_opencl.lo \ @STARPU_USE_OPENCL_TRUE@ drivers/opencl/driver_opencl_utils.lo @STARPU_SIMGRID_TRUE@@STARPU_USE_OPENCL_FALSE@am__objects_9 = drivers/opencl/driver_opencl.lo @STARPU_USE_MAX_FPGA_TRUE@am__objects_10 = \ @STARPU_USE_MAX_FPGA_TRUE@ drivers/max/driver_max_fpga.lo @STARPU_LINUX_SYS_TRUE@am__objects_11 = \ @STARPU_LINUX_SYS_TRUE@ core/disk_ops/disk_unistd_o_direct.lo @STARPU_HAVE_HWLOC_TRUE@am__objects_12 = \ @STARPU_HAVE_HWLOC_TRUE@ sched_policies/scheduler_maker.lo \ @STARPU_HAVE_HWLOC_TRUE@ sched_policies/hierarchical_heft.lo @STARPU_HAVE_HWLOC_TRUE@@STARPU_HAVE_OPENMP_TRUE@@STARPU_HWLOC_HAVE_TOPOLOGY_DUP_TRUE@am__objects_13 = parallel_worker/starpu_parallel_worker_create.lo @STARPU_USE_MP_TRUE@am__objects_14 = drivers/mp_common/mp_common.lo \ @STARPU_USE_MP_TRUE@ drivers/mp_common/source_common.lo \ @STARPU_USE_MP_TRUE@ drivers/mp_common/sink_common.lo @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__objects_15 = drivers/mpi/driver_mpi_common.lo \ @STARPU_USE_MPI_MASTER_SLAVE_TRUE@ drivers/mpi/driver_mpi_source.lo \ @STARPU_USE_MPI_MASTER_SLAVE_TRUE@ drivers/mpi/driver_mpi_sink.lo @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__objects_16 = drivers/tcpip/driver_tcpip_common.lo \ @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ drivers/tcpip/driver_tcpip_source.lo \ @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ drivers/tcpip/driver_tcpip_sink.lo am_libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ common/barrier.lo common/barrier_counter.lo common/hash.lo \ common/rwlock.lo common/starpu_spinlock.lo common/timing.lo \ common/fxt.lo common/utils.lo common/thread.lo \ common/rbtree.lo common/graph.lo common/inlines.lo \ common/knobs.lo core/jobs.lo core/task.lo core/task_bundle.lo \ core/tree.lo core/devices.lo core/drivers.lo core/workers.lo \ core/combined_workers.lo core/topology.lo core/disk.lo \ core/debug.lo core/errorcheck.lo core/progress_hook.lo \ core/idle_hook.lo core/dependencies/cg.lo \ core/dependencies/dependencies.lo \ core/dependencies/implicit_data_deps.lo \ core/dependencies/tags.lo core/dependencies/task_deps.lo \ core/dependencies/data_concurrency.lo \ core/dependencies/data_arbiter_concurrency.lo \ core/disk_ops/disk_stdio.lo core/disk_ops/disk_unistd.lo \ core/disk_ops/unistd/disk_unistd_global.lo \ core/perfmodel/perfmodel_history.lo \ core/perfmodel/energy_model.lo core/perfmodel/perfmodel_bus.lo \ core/perfmodel/perfmodel.lo core/perfmodel/perfmodel_print.lo \ core/perfmodel/perfmodel_nan.lo core/perfmodel/regression.lo \ core/perfmodel/multiple_regression.lo core/sched_policy.lo \ core/simgrid.lo core/simgrid_cpp.lo core/sched_ctx.lo \ core/sched_ctx_list.lo core/parallel_task.lo \ core/detect_combined_workers.lo \ sched_policies/eager_central_policy.lo \ sched_policies/eager_central_priority_policy.lo \ sched_policies/work_stealing_policy.lo \ sched_policies/deque_modeling_policy_data_aware.lo \ sched_policies/random_policy.lo sched_policies/fifo_queues.lo \ sched_policies/parallel_heft.lo \ sched_policies/parallel_eager.lo sched_policies/heteroprio.lo \ sched_policies/graph_test_policy.lo \ drivers/driver_common/driver_common.lo \ drivers/disk/driver_disk.lo datawizard/node_ops.lo \ datawizard/memory_nodes.lo datawizard/write_back.lo \ datawizard/coherency.lo datawizard/data_request.lo \ datawizard/datawizard.lo datawizard/copy_driver.lo \ datawizard/filters.lo datawizard/sort_data_handles.lo \ datawizard/malloc.lo datawizard/memory_manager.lo \ datawizard/memalloc.lo datawizard/memstats.lo \ datawizard/footprint.lo datawizard/datastats.lo \ datawizard/user_interactions.lo datawizard/reduction.lo \ datawizard/interfaces/data_interface.lo \ datawizard/interfaces/bcsr_interface.lo \ datawizard/interfaces/coo_interface.lo \ datawizard/interfaces/csr_interface.lo \ datawizard/interfaces/vector_filters.lo \ datawizard/interfaces/vector_interface.lo \ datawizard/interfaces/matrix_filters.lo \ datawizard/interfaces/matrix_interface.lo \ datawizard/interfaces/block_filters.lo \ datawizard/interfaces/block_interface.lo \ datawizard/interfaces/tensor_filters.lo \ datawizard/interfaces/tensor_interface.lo \ datawizard/interfaces/ndim_filters.lo \ datawizard/interfaces/ndim_interface.lo \ datawizard/interfaces/bcsr_filters.lo \ datawizard/interfaces/csr_filters.lo \ datawizard/interfaces/variable_interface.lo \ datawizard/interfaces/void_interface.lo \ datawizard/interfaces/multiformat_interface.lo \ util/execute_on_all.lo util/starpu_create_sync_task.lo \ util/file.lo util/fstarpu.lo util/misc.lo \ util/openmp_runtime_support.lo \ util/openmp_runtime_support_environment.lo \ util/openmp_runtime_support_omp_api.lo util/starpu_data_cpy.lo \ util/starpu_task_insert.lo util/starpu_task_insert_utils.lo \ debug/traces/starpu_fxt.lo debug/traces/starpu_fxt_mpi.lo \ debug/traces/starpu_fxt_dag.lo debug/traces/starpu_paje.lo \ debug/traces/anim.lo debug/latency.lo debug/structures_size.lo \ profiling/profiling.lo profiling/bound.lo \ profiling/profiling_helpers.lo profiling/callbacks.lo \ worker_collection/worker_list.lo \ worker_collection/worker_tree.lo \ sched_policies/component_worker.lo \ sched_policies/component_sched.lo \ sched_policies/component_fifo.lo sched_policies/prio_deque.lo \ sched_policies/helper_mct.lo sched_policies/component_prio.lo \ sched_policies/component_random.lo \ sched_policies/component_eager.lo \ sched_policies/component_eager_prio.lo \ sched_policies/component_eager_calibration.lo \ sched_policies/component_mct.lo \ sched_policies/component_heft.lo \ sched_policies/component_heteroprio.lo \ sched_policies/component_best_implementation.lo \ sched_policies/component_perfmodel_select.lo \ sched_policies/component_composed.lo \ sched_policies/component_work_stealing.lo \ sched_policies/component_stage.lo \ sched_policies/component_userchoice.lo \ sched_policies/modular_eager.lo \ sched_policies/modular_eager_prio.lo \ sched_policies/modular_eager_prefetching.lo \ sched_policies/modular_gemm.lo sched_policies/modular_prio.lo \ sched_policies/modular_prio_prefetching.lo \ sched_policies/modular_random.lo \ sched_policies/modular_parallel_random.lo \ sched_policies/modular_random_prefetching.lo \ sched_policies/modular_parallel_heft.lo \ sched_policies/modular_heft.lo \ sched_policies/modular_heft_prio.lo \ sched_policies/modular_heteroprio.lo \ sched_policies/modular_heteroprio_heft.lo \ sched_policies/modular_heft2.lo sched_policies/modular_ws.lo \ sched_policies/modular_ez.lo $(am__objects_1) $(am__objects_2) \ drivers/cpu/driver_cpu.lo drivers/hip/driver_hip_init.lo \ drivers/cuda/driver_cuda_init.lo $(am__objects_3) \ drivers/hip/starpu_hipblas.lo $(am__objects_4) \ $(am__objects_5) $(am__objects_6) $(am__objects_7) \ drivers/cuda/starpu_cublas.lo drivers/cuda/starpu_cublas_v2.lo \ drivers/cuda/starpu_cublasLt.lo \ drivers/cuda/starpu_cusparse.lo \ drivers/cuda/starpu_cusolver.lo \ drivers/opencl/driver_opencl_init.lo $(am__objects_8) \ $(am__objects_9) drivers/max/driver_max_fpga_init.lo \ $(am__objects_10) $(am__objects_11) $(am__objects_12) \ $(am__objects_13) $(am__objects_14) \ drivers/mpi/driver_mpi_init.lo $(am__objects_15) \ drivers/tcpip/driver_tcpip_init.lo $(am__objects_16) libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ $(am_libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \ $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) $(LDFLAGS) \ -o $@ AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = common/$(DEPDIR)/barrier.Plo \ common/$(DEPDIR)/barrier_counter.Plo common/$(DEPDIR)/fxt.Plo \ common/$(DEPDIR)/graph.Plo common/$(DEPDIR)/hash.Plo \ common/$(DEPDIR)/inlines.Plo common/$(DEPDIR)/knobs.Plo \ common/$(DEPDIR)/rbtree.Plo common/$(DEPDIR)/rwlock.Plo \ common/$(DEPDIR)/starpu_spinlock.Plo \ common/$(DEPDIR)/thread.Plo common/$(DEPDIR)/timing.Plo \ common/$(DEPDIR)/utils.Plo core/$(DEPDIR)/combined_workers.Plo \ core/$(DEPDIR)/debug.Plo \ core/$(DEPDIR)/detect_combined_workers.Plo \ core/$(DEPDIR)/devices.Plo core/$(DEPDIR)/disk.Plo \ core/$(DEPDIR)/drivers.Plo core/$(DEPDIR)/errorcheck.Plo \ core/$(DEPDIR)/idle_hook.Plo core/$(DEPDIR)/jobs.Plo \ core/$(DEPDIR)/parallel_task.Plo \ core/$(DEPDIR)/progress_hook.Plo core/$(DEPDIR)/sched_ctx.Plo \ core/$(DEPDIR)/sched_ctx_list.Plo \ core/$(DEPDIR)/sched_policy.Plo core/$(DEPDIR)/simgrid.Plo \ core/$(DEPDIR)/simgrid_cpp.Plo core/$(DEPDIR)/task.Plo \ core/$(DEPDIR)/task_bundle.Plo core/$(DEPDIR)/topology.Plo \ core/$(DEPDIR)/tree.Plo core/$(DEPDIR)/workers.Plo \ core/dependencies/$(DEPDIR)/cg.Plo \ core/dependencies/$(DEPDIR)/data_arbiter_concurrency.Plo \ core/dependencies/$(DEPDIR)/data_concurrency.Plo \ core/dependencies/$(DEPDIR)/dependencies.Plo \ core/dependencies/$(DEPDIR)/implicit_data_deps.Plo \ core/dependencies/$(DEPDIR)/tags.Plo \ core/dependencies/$(DEPDIR)/task_deps.Plo \ core/disk_ops/$(DEPDIR)/disk_hdf5.Plo \ core/disk_ops/$(DEPDIR)/disk_leveldb.Plo \ core/disk_ops/$(DEPDIR)/disk_stdio.Plo \ core/disk_ops/$(DEPDIR)/disk_unistd.Plo \ core/disk_ops/$(DEPDIR)/disk_unistd_o_direct.Plo \ core/disk_ops/unistd/$(DEPDIR)/disk_unistd_global.Plo \ core/perfmodel/$(DEPDIR)/energy_model.Plo \ core/perfmodel/$(DEPDIR)/multiple_regression.Plo \ core/perfmodel/$(DEPDIR)/perfmodel.Plo \ core/perfmodel/$(DEPDIR)/perfmodel_bus.Plo \ core/perfmodel/$(DEPDIR)/perfmodel_history.Plo \ core/perfmodel/$(DEPDIR)/perfmodel_nan.Plo \ core/perfmodel/$(DEPDIR)/perfmodel_print.Plo \ core/perfmodel/$(DEPDIR)/regression.Plo \ datawizard/$(DEPDIR)/coherency.Plo \ datawizard/$(DEPDIR)/copy_driver.Plo \ datawizard/$(DEPDIR)/data_request.Plo \ datawizard/$(DEPDIR)/datastats.Plo \ datawizard/$(DEPDIR)/datawizard.Plo \ datawizard/$(DEPDIR)/filters.Plo \ datawizard/$(DEPDIR)/footprint.Plo \ datawizard/$(DEPDIR)/malloc.Plo \ datawizard/$(DEPDIR)/memalloc.Plo \ datawizard/$(DEPDIR)/memory_manager.Plo \ datawizard/$(DEPDIR)/memory_nodes.Plo \ datawizard/$(DEPDIR)/memstats.Plo \ datawizard/$(DEPDIR)/node_ops.Plo \ datawizard/$(DEPDIR)/reduction.Plo \ datawizard/$(DEPDIR)/sort_data_handles.Plo \ datawizard/$(DEPDIR)/user_interactions.Plo \ datawizard/$(DEPDIR)/write_back.Plo \ datawizard/interfaces/$(DEPDIR)/bcsr_filters.Plo \ datawizard/interfaces/$(DEPDIR)/bcsr_interface.Plo \ datawizard/interfaces/$(DEPDIR)/block_filters.Plo \ datawizard/interfaces/$(DEPDIR)/block_interface.Plo \ datawizard/interfaces/$(DEPDIR)/coo_interface.Plo \ datawizard/interfaces/$(DEPDIR)/csr_filters.Plo \ datawizard/interfaces/$(DEPDIR)/csr_interface.Plo \ datawizard/interfaces/$(DEPDIR)/data_interface.Plo \ datawizard/interfaces/$(DEPDIR)/matrix_filters.Plo \ datawizard/interfaces/$(DEPDIR)/matrix_interface.Plo \ datawizard/interfaces/$(DEPDIR)/multiformat_interface.Plo \ datawizard/interfaces/$(DEPDIR)/ndim_filters.Plo \ datawizard/interfaces/$(DEPDIR)/ndim_interface.Plo \ datawizard/interfaces/$(DEPDIR)/tensor_filters.Plo \ datawizard/interfaces/$(DEPDIR)/tensor_interface.Plo \ datawizard/interfaces/$(DEPDIR)/variable_interface.Plo \ datawizard/interfaces/$(DEPDIR)/vector_filters.Plo \ datawizard/interfaces/$(DEPDIR)/vector_interface.Plo \ datawizard/interfaces/$(DEPDIR)/void_interface.Plo \ debug/$(DEPDIR)/latency.Plo \ debug/$(DEPDIR)/structures_size.Plo \ debug/traces/$(DEPDIR)/anim.Plo \ debug/traces/$(DEPDIR)/starpu_fxt.Plo \ debug/traces/$(DEPDIR)/starpu_fxt_dag.Plo \ debug/traces/$(DEPDIR)/starpu_fxt_mpi.Plo \ debug/traces/$(DEPDIR)/starpu_paje.Plo \ drivers/cpu/$(DEPDIR)/driver_cpu.Plo \ drivers/cuda/$(DEPDIR)/driver_cuda.Plo \ drivers/cuda/$(DEPDIR)/driver_cuda0.Plo \ drivers/cuda/$(DEPDIR)/driver_cuda1.Plo \ drivers/cuda/$(DEPDIR)/driver_cuda_init.Plo \ drivers/cuda/$(DEPDIR)/starpu_cublas.Plo \ drivers/cuda/$(DEPDIR)/starpu_cublasLt.Plo \ drivers/cuda/$(DEPDIR)/starpu_cublas_v2.Plo \ drivers/cuda/$(DEPDIR)/starpu_cusolver.Plo \ drivers/cuda/$(DEPDIR)/starpu_cusparse.Plo \ drivers/disk/$(DEPDIR)/driver_disk.Plo \ drivers/driver_common/$(DEPDIR)/driver_common.Plo \ drivers/hip/$(DEPDIR)/driver_hip.Plo \ drivers/hip/$(DEPDIR)/driver_hip_init.Plo \ drivers/hip/$(DEPDIR)/starpu_hipblas.Plo \ drivers/max/$(DEPDIR)/driver_max_fpga.Plo \ drivers/max/$(DEPDIR)/driver_max_fpga_init.Plo \ drivers/mp_common/$(DEPDIR)/mp_common.Plo \ drivers/mp_common/$(DEPDIR)/sink_common.Plo \ drivers/mp_common/$(DEPDIR)/source_common.Plo \ drivers/mpi/$(DEPDIR)/driver_mpi_common.Plo \ drivers/mpi/$(DEPDIR)/driver_mpi_init.Plo \ drivers/mpi/$(DEPDIR)/driver_mpi_sink.Plo \ drivers/mpi/$(DEPDIR)/driver_mpi_source.Plo \ drivers/opencl/$(DEPDIR)/driver_opencl.Plo \ drivers/opencl/$(DEPDIR)/driver_opencl_init.Plo \ drivers/opencl/$(DEPDIR)/driver_opencl_utils.Plo \ drivers/tcpip/$(DEPDIR)/driver_tcpip_common.Plo \ drivers/tcpip/$(DEPDIR)/driver_tcpip_init.Plo \ drivers/tcpip/$(DEPDIR)/driver_tcpip_sink.Plo \ drivers/tcpip/$(DEPDIR)/driver_tcpip_source.Plo \ parallel_worker/$(DEPDIR)/starpu_parallel_worker_create.Plo \ profiling/$(DEPDIR)/bound.Plo \ profiling/$(DEPDIR)/callbacks.Plo \ profiling/$(DEPDIR)/profiling.Plo \ profiling/$(DEPDIR)/profiling_helpers.Plo \ sched_policies/$(DEPDIR)/component_best_implementation.Plo \ sched_policies/$(DEPDIR)/component_composed.Plo \ sched_policies/$(DEPDIR)/component_eager.Plo \ sched_policies/$(DEPDIR)/component_eager_calibration.Plo \ sched_policies/$(DEPDIR)/component_eager_prio.Plo \ sched_policies/$(DEPDIR)/component_fifo.Plo \ sched_policies/$(DEPDIR)/component_heft.Plo \ sched_policies/$(DEPDIR)/component_heteroprio.Plo \ sched_policies/$(DEPDIR)/component_mct.Plo \ sched_policies/$(DEPDIR)/component_perfmodel_select.Plo \ sched_policies/$(DEPDIR)/component_prio.Plo \ sched_policies/$(DEPDIR)/component_random.Plo \ sched_policies/$(DEPDIR)/component_sched.Plo \ sched_policies/$(DEPDIR)/component_stage.Plo \ sched_policies/$(DEPDIR)/component_userchoice.Plo \ sched_policies/$(DEPDIR)/component_work_stealing.Plo \ sched_policies/$(DEPDIR)/component_worker.Plo \ sched_policies/$(DEPDIR)/deque_modeling_policy_data_aware.Plo \ sched_policies/$(DEPDIR)/eager_central_policy.Plo \ sched_policies/$(DEPDIR)/eager_central_priority_policy.Plo \ sched_policies/$(DEPDIR)/fifo_queues.Plo \ sched_policies/$(DEPDIR)/graph_test_policy.Plo \ sched_policies/$(DEPDIR)/helper_mct.Plo \ sched_policies/$(DEPDIR)/heteroprio.Plo \ sched_policies/$(DEPDIR)/hierarchical_heft.Plo \ sched_policies/$(DEPDIR)/modular_eager.Plo \ sched_policies/$(DEPDIR)/modular_eager_prefetching.Plo \ sched_policies/$(DEPDIR)/modular_eager_prio.Plo \ sched_policies/$(DEPDIR)/modular_ez.Plo \ sched_policies/$(DEPDIR)/modular_gemm.Plo \ sched_policies/$(DEPDIR)/modular_heft.Plo \ sched_policies/$(DEPDIR)/modular_heft2.Plo \ sched_policies/$(DEPDIR)/modular_heft_prio.Plo \ sched_policies/$(DEPDIR)/modular_heteroprio.Plo \ sched_policies/$(DEPDIR)/modular_heteroprio_heft.Plo \ sched_policies/$(DEPDIR)/modular_parallel_heft.Plo \ sched_policies/$(DEPDIR)/modular_parallel_random.Plo \ sched_policies/$(DEPDIR)/modular_prio.Plo \ sched_policies/$(DEPDIR)/modular_prio_prefetching.Plo \ sched_policies/$(DEPDIR)/modular_random.Plo \ sched_policies/$(DEPDIR)/modular_random_prefetching.Plo \ sched_policies/$(DEPDIR)/modular_ws.Plo \ sched_policies/$(DEPDIR)/parallel_eager.Plo \ sched_policies/$(DEPDIR)/parallel_heft.Plo \ sched_policies/$(DEPDIR)/prio_deque.Plo \ sched_policies/$(DEPDIR)/random_policy.Plo \ sched_policies/$(DEPDIR)/scheduler_maker.Plo \ sched_policies/$(DEPDIR)/work_stealing_policy.Plo \ util/$(DEPDIR)/execute_on_all.Plo util/$(DEPDIR)/file.Plo \ util/$(DEPDIR)/fstarpu.Plo util/$(DEPDIR)/misc.Plo \ util/$(DEPDIR)/openmp_runtime_support.Plo \ util/$(DEPDIR)/openmp_runtime_support_environment.Plo \ util/$(DEPDIR)/openmp_runtime_support_omp_api.Plo \ util/$(DEPDIR)/starpu_create_sync_task.Plo \ util/$(DEPDIR)/starpu_data_cpy.Plo \ util/$(DEPDIR)/starpu_task_insert.Plo \ util/$(DEPDIR)/starpu_task_insert_utils.Plo \ worker_collection/$(DEPDIR)/worker_list.Plo \ worker_collection/$(DEPDIR)/worker_tree.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CXXFLAGS) $(CXXFLAGS) AM_V_CXX = $(am__v_CXX_@AM_V@) am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) am__v_CXX_0 = @echo " CXX " $@; am__v_CXX_1 = CXXLD = $(CXX) CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) am__v_CXXLD_0 = @echo " CXXLD " $@; am__v_CXXLD_1 = SOURCES = $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) DIST_SOURCES = \ $(am__libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST) RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac DATA = $(xml_DATA) HEADERS = $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ -lm $(LIBSTARPU_LDFLAGS) $(OPENMP_CFLAGS) \ $(am__append_3) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) AM_CPPFLAGS = -I$(top_srcdir)/include/ -DBUILDING_STARPU \ -DSTARPU_DATADIR='"$(datadir)"' $(STARPU_H_CPPFLAGS) \ $(OPENMP_CFLAGS) $(FXT_CFLAGS) SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo EXTRA_DIST = dolib.c core/perfmodel/starpu-perfmodel.dtd xml_DATA = $(srcdir)/core/perfmodel/starpu-perfmodel.dtd xmldir = $(pkgdatadir) ldflags = $(am__append_4) libstarpu_so_version = $(LIBSTARPU_INTERFACE_CURRENT):$(LIBSTARPU_INTERFACE_REVISION):$(LIBSTARPU_INTERFACE_AGE) @STARPU_HAVE_WINDOWS_TRUE@LC_MESSAGES = C lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) \ -no-undefined -version-info $(libstarpu_so_version) \ $(am__append_5) noinst_HEADERS = \ core/dependencies/data_concurrency.h \ core/dependencies/cg.h \ core/dependencies/tags.h \ core/dependencies/implicit_data_deps.h \ core/disk.h \ core/disk_ops/unistd/disk_unistd_global.h \ core/progress_hook.h \ core/idle_hook.h \ core/sched_policy.h \ core/sched_ctx.h \ core/sched_ctx_list.h \ core/perfmodel/perfmodel.h \ core/perfmodel/regression.h \ core/perfmodel/multiple_regression.h \ core/jobs.h \ core/devices.h \ core/task.h \ core/drivers.h \ core/workers.h \ core/topology.h \ core/debug.h \ core/errorcheck.h \ core/combined_workers.h \ core/simgrid.h \ core/task_bundle.h \ core/detect_combined_workers.h \ sched_policies/helper_mct.h \ sched_policies/fifo_queues.h \ sched_policies/heteroprio.h \ datawizard/node_ops.h \ datawizard/footprint.h \ datawizard/datawizard.h \ datawizard/data_request.h \ datawizard/filters.h \ datawizard/write_back.h \ datawizard/datastats.h \ datawizard/malloc.h \ datawizard/memstats.h \ datawizard/memory_manager.h \ datawizard/memalloc.h \ datawizard/copy_driver.h \ datawizard/coherency.h \ datawizard/sort_data_handles.h \ datawizard/memory_nodes.h \ datawizard/interfaces/data_interface.h \ common/barrier.h \ common/timing.h \ common/list.h \ common/rwlock.h \ common/starpu_spinlock.h \ common/fxt.h \ common/utils.h \ common/thread.h \ common/barrier.h \ common/uthash.h \ common/barrier_counter.h \ common/rbtree.h \ common/rbtree_i.h \ common/prio_list.h \ common/graph.h \ common/knobs.h \ drivers/driver_common/driver_common.h \ drivers/mp_common/mp_common.h \ drivers/mp_common/source_common.h \ drivers/mp_common/sink_common.h \ drivers/cpu/driver_cpu.h \ drivers/cuda/driver_cuda.h \ drivers/hip/driver_hip.h \ drivers/opencl/driver_opencl.h \ drivers/opencl/driver_opencl_utils.h \ drivers/max/driver_max_fpga.h \ debug/starpu_debug_helpers.h \ drivers/mpi/driver_mpi_common.h \ drivers/mpi/driver_mpi_source.h \ drivers/mpi/driver_mpi_sink.h \ drivers/tcpip/driver_tcpip_common.h \ drivers/tcpip/driver_tcpip_common_func.h \ drivers/tcpip/driver_tcpip_source.h \ drivers/tcpip/driver_tcpip_sink.h \ drivers/disk/driver_disk.h \ debug/traces/starpu_fxt.h \ parallel_worker/starpu_parallel_worker_create.h \ profiling/bound.h \ profiling/profiling.h \ profiling/callbacks.h \ util/openmp_runtime_support.h \ util/starpu_task_insert_utils.h \ util/starpu_data_cpy.h \ sched_policies/prio_deque.h \ sched_policies/sched_component.h ######################################### # # # MPI Master/Slave compilation # # # ######################################### ######################################### # # # TCPIP Master/Slave compilation # # # ######################################### libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = common/barrier.c \ common/barrier_counter.c common/hash.c common/rwlock.c \ common/starpu_spinlock.c common/timing.c common/fxt.c \ common/utils.c common/thread.c common/rbtree.c common/graph.c \ common/inlines.c common/knobs.c core/jobs.c core/task.c \ core/task_bundle.c core/tree.c core/devices.c core/drivers.c \ core/workers.c core/combined_workers.c core/topology.c \ core/disk.c core/debug.c core/errorcheck.c \ core/progress_hook.c core/idle_hook.c core/dependencies/cg.c \ core/dependencies/dependencies.c \ core/dependencies/implicit_data_deps.c \ core/dependencies/tags.c core/dependencies/task_deps.c \ core/dependencies/data_concurrency.c \ core/dependencies/data_arbiter_concurrency.c \ core/disk_ops/disk_stdio.c core/disk_ops/disk_unistd.c \ core/disk_ops/unistd/disk_unistd_global.c \ core/perfmodel/perfmodel_history.c \ core/perfmodel/energy_model.c core/perfmodel/perfmodel_bus.c \ core/perfmodel/perfmodel.c core/perfmodel/perfmodel_print.c \ core/perfmodel/perfmodel_nan.c core/perfmodel/regression.c \ core/perfmodel/multiple_regression.c core/sched_policy.c \ core/simgrid.c core/simgrid_cpp.cpp core/sched_ctx.c \ core/sched_ctx_list.c core/parallel_task.c \ core/detect_combined_workers.c \ sched_policies/eager_central_policy.c \ sched_policies/eager_central_priority_policy.c \ sched_policies/work_stealing_policy.c \ sched_policies/deque_modeling_policy_data_aware.c \ sched_policies/random_policy.c sched_policies/fifo_queues.c \ sched_policies/parallel_heft.c sched_policies/parallel_eager.c \ sched_policies/heteroprio.c sched_policies/graph_test_policy.c \ drivers/driver_common/driver_common.c \ drivers/disk/driver_disk.c datawizard/node_ops.c \ datawizard/memory_nodes.c datawizard/write_back.c \ datawizard/coherency.c datawizard/data_request.c \ datawizard/datawizard.c datawizard/copy_driver.c \ datawizard/filters.c datawizard/sort_data_handles.c \ datawizard/malloc.c datawizard/memory_manager.c \ datawizard/memalloc.c datawizard/memstats.c \ datawizard/footprint.c datawizard/datastats.c \ datawizard/user_interactions.c datawizard/reduction.c \ datawizard/interfaces/data_interface.c \ datawizard/interfaces/bcsr_interface.c \ datawizard/interfaces/coo_interface.c \ datawizard/interfaces/csr_interface.c \ datawizard/interfaces/vector_filters.c \ datawizard/interfaces/vector_interface.c \ datawizard/interfaces/matrix_filters.c \ datawizard/interfaces/matrix_interface.c \ datawizard/interfaces/block_filters.c \ datawizard/interfaces/block_interface.c \ datawizard/interfaces/tensor_filters.c \ datawizard/interfaces/tensor_interface.c \ datawizard/interfaces/ndim_filters.c \ datawizard/interfaces/ndim_interface.c \ datawizard/interfaces/bcsr_filters.c \ datawizard/interfaces/csr_filters.c \ datawizard/interfaces/variable_interface.c \ datawizard/interfaces/void_interface.c \ datawizard/interfaces/multiformat_interface.c \ util/execute_on_all.c util/starpu_create_sync_task.c \ util/file.c util/fstarpu.c util/misc.c \ util/openmp_runtime_support.c \ util/openmp_runtime_support_environment.c \ util/openmp_runtime_support_omp_api.c util/starpu_data_cpy.c \ util/starpu_task_insert.c util/starpu_task_insert_utils.c \ debug/traces/starpu_fxt.c debug/traces/starpu_fxt_mpi.c \ debug/traces/starpu_fxt_dag.c debug/traces/starpu_paje.c \ debug/traces/anim.c debug/latency.c debug/structures_size.c \ profiling/profiling.c profiling/bound.c \ profiling/profiling_helpers.c profiling/callbacks.c \ worker_collection/worker_list.c \ worker_collection/worker_tree.c \ sched_policies/component_worker.c \ sched_policies/component_sched.c \ sched_policies/component_fifo.c sched_policies/prio_deque.c \ sched_policies/helper_mct.c sched_policies/component_prio.c \ sched_policies/component_random.c \ sched_policies/component_eager.c \ sched_policies/component_eager_prio.c \ sched_policies/component_eager_calibration.c \ sched_policies/component_mct.c sched_policies/component_heft.c \ sched_policies/component_heteroprio.c \ sched_policies/component_best_implementation.c \ sched_policies/component_perfmodel_select.c \ sched_policies/component_composed.c \ sched_policies/component_work_stealing.c \ sched_policies/component_stage.c \ sched_policies/component_userchoice.c \ sched_policies/modular_eager.c \ sched_policies/modular_eager_prio.c \ sched_policies/modular_eager_prefetching.c \ sched_policies/modular_gemm.c sched_policies/modular_prio.c \ sched_policies/modular_prio_prefetching.c \ sched_policies/modular_random.c \ sched_policies/modular_parallel_random.c \ sched_policies/modular_random_prefetching.c \ sched_policies/modular_parallel_heft.c \ sched_policies/modular_heft.c \ sched_policies/modular_heft_prio.c \ sched_policies/modular_heteroprio.c \ sched_policies/modular_heteroprio_heft.c \ sched_policies/modular_heft2.c sched_policies/modular_ws.c \ sched_policies/modular_ez.c $(am__append_6) $(am__append_7) \ drivers/cpu/driver_cpu.c drivers/hip/driver_hip_init.c \ drivers/cuda/driver_cuda_init.c $(am__append_8) \ drivers/hip/starpu_hipblas.c $(am__append_9) $(am__append_10) \ $(am__append_11) $(am__append_12) drivers/cuda/starpu_cublas.c \ drivers/cuda/starpu_cublas_v2.c drivers/cuda/starpu_cublasLt.c \ drivers/cuda/starpu_cusparse.c drivers/cuda/starpu_cusolver.c \ drivers/opencl/driver_opencl_init.c $(am__append_13) \ $(am__append_14) drivers/max/driver_max_fpga_init.c \ $(am__append_15) $(am__append_16) $(am__append_17) \ $(am__append_18) $(am__append_19) \ drivers/mpi/driver_mpi_init.c $(am__append_20) \ drivers/tcpip/driver_tcpip_init.c $(am__append_21) all: all-recursive .SUFFIXES: .SUFFIXES: .c .cpp .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; \ locs=`for p in $$list; do echo $$p; done | \ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ sort -u`; \ test -z "$$locs" || { \ echo rm -f $${locs}; \ rm -f $${locs}; \ } common/$(am__dirstamp): @$(MKDIR_P) common @: > common/$(am__dirstamp) common/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) common/$(DEPDIR) @: > common/$(DEPDIR)/$(am__dirstamp) common/barrier.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/barrier_counter.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/hash.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/rwlock.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/starpu_spinlock.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/timing.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/fxt.lo: common/$(am__dirstamp) common/$(DEPDIR)/$(am__dirstamp) common/utils.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/thread.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/rbtree.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/graph.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/inlines.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) common/knobs.lo: common/$(am__dirstamp) \ common/$(DEPDIR)/$(am__dirstamp) core/$(am__dirstamp): @$(MKDIR_P) core @: > core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) core/$(DEPDIR) @: > core/$(DEPDIR)/$(am__dirstamp) core/jobs.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/task.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/task_bundle.lo: core/$(am__dirstamp) \ core/$(DEPDIR)/$(am__dirstamp) core/tree.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/devices.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/drivers.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/workers.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/combined_workers.lo: core/$(am__dirstamp) \ core/$(DEPDIR)/$(am__dirstamp) core/topology.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/disk.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/debug.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/errorcheck.lo: core/$(am__dirstamp) \ core/$(DEPDIR)/$(am__dirstamp) core/progress_hook.lo: core/$(am__dirstamp) \ core/$(DEPDIR)/$(am__dirstamp) core/idle_hook.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/dependencies/$(am__dirstamp): @$(MKDIR_P) core/dependencies @: > core/dependencies/$(am__dirstamp) core/dependencies/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) core/dependencies/$(DEPDIR) @: > core/dependencies/$(DEPDIR)/$(am__dirstamp) core/dependencies/cg.lo: core/dependencies/$(am__dirstamp) \ core/dependencies/$(DEPDIR)/$(am__dirstamp) core/dependencies/dependencies.lo: core/dependencies/$(am__dirstamp) \ core/dependencies/$(DEPDIR)/$(am__dirstamp) core/dependencies/implicit_data_deps.lo: \ core/dependencies/$(am__dirstamp) \ core/dependencies/$(DEPDIR)/$(am__dirstamp) core/dependencies/tags.lo: core/dependencies/$(am__dirstamp) \ core/dependencies/$(DEPDIR)/$(am__dirstamp) core/dependencies/task_deps.lo: core/dependencies/$(am__dirstamp) \ core/dependencies/$(DEPDIR)/$(am__dirstamp) core/dependencies/data_concurrency.lo: \ core/dependencies/$(am__dirstamp) \ core/dependencies/$(DEPDIR)/$(am__dirstamp) core/dependencies/data_arbiter_concurrency.lo: \ core/dependencies/$(am__dirstamp) \ core/dependencies/$(DEPDIR)/$(am__dirstamp) core/disk_ops/$(am__dirstamp): @$(MKDIR_P) core/disk_ops @: > core/disk_ops/$(am__dirstamp) core/disk_ops/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) core/disk_ops/$(DEPDIR) @: > core/disk_ops/$(DEPDIR)/$(am__dirstamp) core/disk_ops/disk_stdio.lo: core/disk_ops/$(am__dirstamp) \ core/disk_ops/$(DEPDIR)/$(am__dirstamp) core/disk_ops/disk_unistd.lo: core/disk_ops/$(am__dirstamp) \ core/disk_ops/$(DEPDIR)/$(am__dirstamp) core/disk_ops/unistd/$(am__dirstamp): @$(MKDIR_P) core/disk_ops/unistd @: > core/disk_ops/unistd/$(am__dirstamp) core/disk_ops/unistd/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) core/disk_ops/unistd/$(DEPDIR) @: > core/disk_ops/unistd/$(DEPDIR)/$(am__dirstamp) core/disk_ops/unistd/disk_unistd_global.lo: \ core/disk_ops/unistd/$(am__dirstamp) \ core/disk_ops/unistd/$(DEPDIR)/$(am__dirstamp) core/perfmodel/$(am__dirstamp): @$(MKDIR_P) core/perfmodel @: > core/perfmodel/$(am__dirstamp) core/perfmodel/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) core/perfmodel/$(DEPDIR) @: > core/perfmodel/$(DEPDIR)/$(am__dirstamp) core/perfmodel/perfmodel_history.lo: core/perfmodel/$(am__dirstamp) \ core/perfmodel/$(DEPDIR)/$(am__dirstamp) core/perfmodel/energy_model.lo: core/perfmodel/$(am__dirstamp) \ core/perfmodel/$(DEPDIR)/$(am__dirstamp) core/perfmodel/perfmodel_bus.lo: core/perfmodel/$(am__dirstamp) \ core/perfmodel/$(DEPDIR)/$(am__dirstamp) core/perfmodel/perfmodel.lo: core/perfmodel/$(am__dirstamp) \ core/perfmodel/$(DEPDIR)/$(am__dirstamp) core/perfmodel/perfmodel_print.lo: core/perfmodel/$(am__dirstamp) \ core/perfmodel/$(DEPDIR)/$(am__dirstamp) core/perfmodel/perfmodel_nan.lo: core/perfmodel/$(am__dirstamp) \ core/perfmodel/$(DEPDIR)/$(am__dirstamp) core/perfmodel/regression.lo: core/perfmodel/$(am__dirstamp) \ core/perfmodel/$(DEPDIR)/$(am__dirstamp) core/perfmodel/multiple_regression.lo: core/perfmodel/$(am__dirstamp) \ core/perfmodel/$(DEPDIR)/$(am__dirstamp) core/sched_policy.lo: core/$(am__dirstamp) \ core/$(DEPDIR)/$(am__dirstamp) core/simgrid.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/simgrid_cpp.lo: core/$(am__dirstamp) \ core/$(DEPDIR)/$(am__dirstamp) core/sched_ctx.lo: core/$(am__dirstamp) core/$(DEPDIR)/$(am__dirstamp) core/sched_ctx_list.lo: core/$(am__dirstamp) \ core/$(DEPDIR)/$(am__dirstamp) core/parallel_task.lo: core/$(am__dirstamp) \ core/$(DEPDIR)/$(am__dirstamp) core/detect_combined_workers.lo: core/$(am__dirstamp) \ core/$(DEPDIR)/$(am__dirstamp) sched_policies/$(am__dirstamp): @$(MKDIR_P) sched_policies @: > sched_policies/$(am__dirstamp) sched_policies/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) sched_policies/$(DEPDIR) @: > sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/eager_central_policy.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/eager_central_priority_policy.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/work_stealing_policy.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/deque_modeling_policy_data_aware.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/random_policy.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/fifo_queues.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/parallel_heft.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/parallel_eager.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/heteroprio.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/graph_test_policy.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) drivers/driver_common/$(am__dirstamp): @$(MKDIR_P) drivers/driver_common @: > drivers/driver_common/$(am__dirstamp) drivers/driver_common/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/driver_common/$(DEPDIR) @: > drivers/driver_common/$(DEPDIR)/$(am__dirstamp) drivers/driver_common/driver_common.lo: \ drivers/driver_common/$(am__dirstamp) \ drivers/driver_common/$(DEPDIR)/$(am__dirstamp) drivers/disk/$(am__dirstamp): @$(MKDIR_P) drivers/disk @: > drivers/disk/$(am__dirstamp) drivers/disk/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/disk/$(DEPDIR) @: > drivers/disk/$(DEPDIR)/$(am__dirstamp) drivers/disk/driver_disk.lo: drivers/disk/$(am__dirstamp) \ drivers/disk/$(DEPDIR)/$(am__dirstamp) datawizard/$(am__dirstamp): @$(MKDIR_P) datawizard @: > datawizard/$(am__dirstamp) datawizard/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/$(DEPDIR) @: > datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/node_ops.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/memory_nodes.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/write_back.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/coherency.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/data_request.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/datawizard.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/copy_driver.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/filters.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/sort_data_handles.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/malloc.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/memory_manager.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/memalloc.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/memstats.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/footprint.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/datastats.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/user_interactions.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/reduction.lo: datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces @: > datawizard/interfaces/$(am__dirstamp) datawizard/interfaces/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/$(DEPDIR) @: > datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/data_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/bcsr_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/coo_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/csr_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/vector_filters.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/vector_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/matrix_filters.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/matrix_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/block_filters.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/block_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/tensor_filters.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/tensor_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/ndim_filters.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/ndim_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/bcsr_filters.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/csr_filters.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/variable_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/void_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat_interface.lo: \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) util/$(am__dirstamp): @$(MKDIR_P) util @: > util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) util/$(DEPDIR) @: > util/$(DEPDIR)/$(am__dirstamp) util/execute_on_all.lo: util/$(am__dirstamp) \ util/$(DEPDIR)/$(am__dirstamp) util/starpu_create_sync_task.lo: util/$(am__dirstamp) \ util/$(DEPDIR)/$(am__dirstamp) util/file.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp) util/fstarpu.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp) util/misc.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp) util/openmp_runtime_support.lo: util/$(am__dirstamp) \ util/$(DEPDIR)/$(am__dirstamp) util/openmp_runtime_support_environment.lo: util/$(am__dirstamp) \ util/$(DEPDIR)/$(am__dirstamp) util/openmp_runtime_support_omp_api.lo: util/$(am__dirstamp) \ util/$(DEPDIR)/$(am__dirstamp) util/starpu_data_cpy.lo: util/$(am__dirstamp) \ util/$(DEPDIR)/$(am__dirstamp) util/starpu_task_insert.lo: util/$(am__dirstamp) \ util/$(DEPDIR)/$(am__dirstamp) util/starpu_task_insert_utils.lo: util/$(am__dirstamp) \ util/$(DEPDIR)/$(am__dirstamp) debug/traces/$(am__dirstamp): @$(MKDIR_P) debug/traces @: > debug/traces/$(am__dirstamp) debug/traces/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) debug/traces/$(DEPDIR) @: > debug/traces/$(DEPDIR)/$(am__dirstamp) debug/traces/starpu_fxt.lo: debug/traces/$(am__dirstamp) \ debug/traces/$(DEPDIR)/$(am__dirstamp) debug/traces/starpu_fxt_mpi.lo: debug/traces/$(am__dirstamp) \ debug/traces/$(DEPDIR)/$(am__dirstamp) debug/traces/starpu_fxt_dag.lo: debug/traces/$(am__dirstamp) \ debug/traces/$(DEPDIR)/$(am__dirstamp) debug/traces/starpu_paje.lo: debug/traces/$(am__dirstamp) \ debug/traces/$(DEPDIR)/$(am__dirstamp) debug/traces/anim.lo: debug/traces/$(am__dirstamp) \ debug/traces/$(DEPDIR)/$(am__dirstamp) debug/$(am__dirstamp): @$(MKDIR_P) debug @: > debug/$(am__dirstamp) debug/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) debug/$(DEPDIR) @: > debug/$(DEPDIR)/$(am__dirstamp) debug/latency.lo: debug/$(am__dirstamp) \ debug/$(DEPDIR)/$(am__dirstamp) debug/structures_size.lo: debug/$(am__dirstamp) \ debug/$(DEPDIR)/$(am__dirstamp) profiling/$(am__dirstamp): @$(MKDIR_P) profiling @: > profiling/$(am__dirstamp) profiling/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) profiling/$(DEPDIR) @: > profiling/$(DEPDIR)/$(am__dirstamp) profiling/profiling.lo: profiling/$(am__dirstamp) \ profiling/$(DEPDIR)/$(am__dirstamp) profiling/bound.lo: profiling/$(am__dirstamp) \ profiling/$(DEPDIR)/$(am__dirstamp) profiling/profiling_helpers.lo: profiling/$(am__dirstamp) \ profiling/$(DEPDIR)/$(am__dirstamp) profiling/callbacks.lo: profiling/$(am__dirstamp) \ profiling/$(DEPDIR)/$(am__dirstamp) worker_collection/$(am__dirstamp): @$(MKDIR_P) worker_collection @: > worker_collection/$(am__dirstamp) worker_collection/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) worker_collection/$(DEPDIR) @: > worker_collection/$(DEPDIR)/$(am__dirstamp) worker_collection/worker_list.lo: worker_collection/$(am__dirstamp) \ worker_collection/$(DEPDIR)/$(am__dirstamp) worker_collection/worker_tree.lo: worker_collection/$(am__dirstamp) \ worker_collection/$(DEPDIR)/$(am__dirstamp) sched_policies/component_worker.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_sched.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_fifo.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/prio_deque.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/helper_mct.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_prio.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_random.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_eager.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_eager_prio.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_eager_calibration.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_mct.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_heft.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_heteroprio.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_best_implementation.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_perfmodel_select.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_composed.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_work_stealing.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_stage.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/component_userchoice.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_eager.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_eager_prio.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_eager_prefetching.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_gemm.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_prio.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_prio_prefetching.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_random.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_parallel_random.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_random_prefetching.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_parallel_heft.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_heft.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_heft_prio.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_heteroprio.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_heteroprio_heft.lo: \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_heft2.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_ws.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/modular_ez.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) core/disk_ops/disk_leveldb.lo: core/disk_ops/$(am__dirstamp) \ core/disk_ops/$(DEPDIR)/$(am__dirstamp) core/disk_ops/disk_hdf5.lo: core/disk_ops/$(am__dirstamp) \ core/disk_ops/$(DEPDIR)/$(am__dirstamp) drivers/cpu/$(am__dirstamp): @$(MKDIR_P) drivers/cpu @: > drivers/cpu/$(am__dirstamp) drivers/cpu/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/cpu/$(DEPDIR) @: > drivers/cpu/$(DEPDIR)/$(am__dirstamp) drivers/cpu/driver_cpu.lo: drivers/cpu/$(am__dirstamp) \ drivers/cpu/$(DEPDIR)/$(am__dirstamp) drivers/hip/$(am__dirstamp): @$(MKDIR_P) drivers/hip @: > drivers/hip/$(am__dirstamp) drivers/hip/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/hip/$(DEPDIR) @: > drivers/hip/$(DEPDIR)/$(am__dirstamp) drivers/hip/driver_hip_init.lo: drivers/hip/$(am__dirstamp) \ drivers/hip/$(DEPDIR)/$(am__dirstamp) drivers/cuda/$(am__dirstamp): @$(MKDIR_P) drivers/cuda @: > drivers/cuda/$(am__dirstamp) drivers/cuda/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/cuda/$(DEPDIR) @: > drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/cuda/driver_cuda_init.lo: drivers/cuda/$(am__dirstamp) \ drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/hip/driver_hip.lo: drivers/hip/$(am__dirstamp) \ drivers/hip/$(DEPDIR)/$(am__dirstamp) drivers/hip/starpu_hipblas.lo: drivers/hip/$(am__dirstamp) \ drivers/hip/$(DEPDIR)/$(am__dirstamp) drivers/cuda/driver_cuda0.lo: drivers/cuda/$(am__dirstamp) \ drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/cuda/driver_cuda1.lo: drivers/cuda/$(am__dirstamp) \ drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/cuda/driver_cuda.lo: drivers/cuda/$(am__dirstamp) \ drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/cuda/starpu_cublas.lo: drivers/cuda/$(am__dirstamp) \ drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/cuda/starpu_cublas_v2.lo: drivers/cuda/$(am__dirstamp) \ drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/cuda/starpu_cublasLt.lo: drivers/cuda/$(am__dirstamp) \ drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/cuda/starpu_cusparse.lo: drivers/cuda/$(am__dirstamp) \ drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/cuda/starpu_cusolver.lo: drivers/cuda/$(am__dirstamp) \ drivers/cuda/$(DEPDIR)/$(am__dirstamp) drivers/opencl/$(am__dirstamp): @$(MKDIR_P) drivers/opencl @: > drivers/opencl/$(am__dirstamp) drivers/opencl/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/opencl/$(DEPDIR) @: > drivers/opencl/$(DEPDIR)/$(am__dirstamp) drivers/opencl/driver_opencl_init.lo: drivers/opencl/$(am__dirstamp) \ drivers/opencl/$(DEPDIR)/$(am__dirstamp) drivers/opencl/driver_opencl.lo: drivers/opencl/$(am__dirstamp) \ drivers/opencl/$(DEPDIR)/$(am__dirstamp) drivers/opencl/driver_opencl_utils.lo: drivers/opencl/$(am__dirstamp) \ drivers/opencl/$(DEPDIR)/$(am__dirstamp) drivers/max/$(am__dirstamp): @$(MKDIR_P) drivers/max @: > drivers/max/$(am__dirstamp) drivers/max/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/max/$(DEPDIR) @: > drivers/max/$(DEPDIR)/$(am__dirstamp) drivers/max/driver_max_fpga_init.lo: drivers/max/$(am__dirstamp) \ drivers/max/$(DEPDIR)/$(am__dirstamp) drivers/max/driver_max_fpga.lo: drivers/max/$(am__dirstamp) \ drivers/max/$(DEPDIR)/$(am__dirstamp) core/disk_ops/disk_unistd_o_direct.lo: core/disk_ops/$(am__dirstamp) \ core/disk_ops/$(DEPDIR)/$(am__dirstamp) sched_policies/scheduler_maker.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/hierarchical_heft.lo: sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) parallel_worker/$(am__dirstamp): @$(MKDIR_P) parallel_worker @: > parallel_worker/$(am__dirstamp) parallel_worker/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) parallel_worker/$(DEPDIR) @: > parallel_worker/$(DEPDIR)/$(am__dirstamp) parallel_worker/starpu_parallel_worker_create.lo: \ parallel_worker/$(am__dirstamp) \ parallel_worker/$(DEPDIR)/$(am__dirstamp) drivers/mp_common/$(am__dirstamp): @$(MKDIR_P) drivers/mp_common @: > drivers/mp_common/$(am__dirstamp) drivers/mp_common/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/mp_common/$(DEPDIR) @: > drivers/mp_common/$(DEPDIR)/$(am__dirstamp) drivers/mp_common/mp_common.lo: drivers/mp_common/$(am__dirstamp) \ drivers/mp_common/$(DEPDIR)/$(am__dirstamp) drivers/mp_common/source_common.lo: drivers/mp_common/$(am__dirstamp) \ drivers/mp_common/$(DEPDIR)/$(am__dirstamp) drivers/mp_common/sink_common.lo: drivers/mp_common/$(am__dirstamp) \ drivers/mp_common/$(DEPDIR)/$(am__dirstamp) drivers/mpi/$(am__dirstamp): @$(MKDIR_P) drivers/mpi @: > drivers/mpi/$(am__dirstamp) drivers/mpi/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/mpi/$(DEPDIR) @: > drivers/mpi/$(DEPDIR)/$(am__dirstamp) drivers/mpi/driver_mpi_init.lo: drivers/mpi/$(am__dirstamp) \ drivers/mpi/$(DEPDIR)/$(am__dirstamp) drivers/mpi/driver_mpi_common.lo: drivers/mpi/$(am__dirstamp) \ drivers/mpi/$(DEPDIR)/$(am__dirstamp) drivers/mpi/driver_mpi_source.lo: drivers/mpi/$(am__dirstamp) \ drivers/mpi/$(DEPDIR)/$(am__dirstamp) drivers/mpi/driver_mpi_sink.lo: drivers/mpi/$(am__dirstamp) \ drivers/mpi/$(DEPDIR)/$(am__dirstamp) drivers/tcpip/$(am__dirstamp): @$(MKDIR_P) drivers/tcpip @: > drivers/tcpip/$(am__dirstamp) drivers/tcpip/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) drivers/tcpip/$(DEPDIR) @: > drivers/tcpip/$(DEPDIR)/$(am__dirstamp) drivers/tcpip/driver_tcpip_init.lo: drivers/tcpip/$(am__dirstamp) \ drivers/tcpip/$(DEPDIR)/$(am__dirstamp) drivers/tcpip/driver_tcpip_common.lo: drivers/tcpip/$(am__dirstamp) \ drivers/tcpip/$(DEPDIR)/$(am__dirstamp) drivers/tcpip/driver_tcpip_source.lo: drivers/tcpip/$(am__dirstamp) \ drivers/tcpip/$(DEPDIR)/$(am__dirstamp) drivers/tcpip/driver_tcpip_sink.lo: drivers/tcpip/$(am__dirstamp) \ drivers/tcpip/$(DEPDIR)/$(am__dirstamp) libstarpu-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpu_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(AM_V_CXXLD)$(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f common/*.$(OBJEXT) -rm -f common/*.lo -rm -f core/*.$(OBJEXT) -rm -f core/*.lo -rm -f core/dependencies/*.$(OBJEXT) -rm -f core/dependencies/*.lo -rm -f core/disk_ops/*.$(OBJEXT) -rm -f core/disk_ops/*.lo -rm -f core/disk_ops/unistd/*.$(OBJEXT) -rm -f core/disk_ops/unistd/*.lo -rm -f core/perfmodel/*.$(OBJEXT) -rm -f core/perfmodel/*.lo -rm -f datawizard/*.$(OBJEXT) -rm -f datawizard/*.lo -rm -f datawizard/interfaces/*.$(OBJEXT) -rm -f datawizard/interfaces/*.lo -rm -f debug/*.$(OBJEXT) -rm -f debug/*.lo -rm -f debug/traces/*.$(OBJEXT) -rm -f debug/traces/*.lo -rm -f drivers/cpu/*.$(OBJEXT) -rm -f drivers/cpu/*.lo -rm -f drivers/cuda/*.$(OBJEXT) -rm -f drivers/cuda/*.lo -rm -f drivers/disk/*.$(OBJEXT) -rm -f drivers/disk/*.lo -rm -f drivers/driver_common/*.$(OBJEXT) -rm -f drivers/driver_common/*.lo -rm -f drivers/hip/*.$(OBJEXT) -rm -f drivers/hip/*.lo -rm -f drivers/max/*.$(OBJEXT) -rm -f drivers/max/*.lo -rm -f drivers/mp_common/*.$(OBJEXT) -rm -f drivers/mp_common/*.lo -rm -f drivers/mpi/*.$(OBJEXT) -rm -f drivers/mpi/*.lo -rm -f drivers/opencl/*.$(OBJEXT) -rm -f drivers/opencl/*.lo -rm -f drivers/tcpip/*.$(OBJEXT) -rm -f drivers/tcpip/*.lo -rm -f parallel_worker/*.$(OBJEXT) -rm -f parallel_worker/*.lo -rm -f profiling/*.$(OBJEXT) -rm -f profiling/*.lo -rm -f sched_policies/*.$(OBJEXT) -rm -f sched_policies/*.lo -rm -f util/*.$(OBJEXT) -rm -f util/*.lo -rm -f worker_collection/*.$(OBJEXT) -rm -f worker_collection/*.lo distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/barrier.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/barrier_counter.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/fxt.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/graph.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/hash.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/inlines.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/knobs.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/rbtree.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/rwlock.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/starpu_spinlock.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/thread.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/timing.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/utils.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/combined_workers.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/debug.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/detect_combined_workers.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/devices.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/disk.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/drivers.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/errorcheck.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/idle_hook.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/jobs.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/parallel_task.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/progress_hook.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/sched_ctx.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/sched_ctx_list.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/sched_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/simgrid.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/simgrid_cpp.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/task.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/task_bundle.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/topology.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/tree.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/workers.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/cg.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/data_arbiter_concurrency.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/data_concurrency.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/dependencies.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/implicit_data_deps.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/tags.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/dependencies/$(DEPDIR)/task_deps.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_hdf5.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_leveldb.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_stdio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_unistd.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/$(DEPDIR)/disk_unistd_o_direct.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/disk_ops/unistd/$(DEPDIR)/disk_unistd_global.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/energy_model.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/multiple_regression.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel_bus.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel_history.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel_nan.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/perfmodel_print.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@core/perfmodel/$(DEPDIR)/regression.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/coherency.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/copy_driver.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_request.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/datastats.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/datawizard.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/filters.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/footprint.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/malloc.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/memalloc.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/memory_manager.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/memory_nodes.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/memstats.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/node_ops.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/reduction.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sort_data_handles.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/user_interactions.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/write_back.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/bcsr_filters.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/bcsr_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/block_filters.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/block_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/coo_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/csr_filters.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/csr_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/data_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/matrix_filters.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/matrix_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/multiformat_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/ndim_filters.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/ndim_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/tensor_filters.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/tensor_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/variable_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/vector_filters.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/vector_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/void_interface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@debug/$(DEPDIR)/latency.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@debug/$(DEPDIR)/structures_size.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/anim.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/starpu_fxt.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/starpu_fxt_dag.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/starpu_fxt_mpi.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@debug/traces/$(DEPDIR)/starpu_paje.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cpu/$(DEPDIR)/driver_cpu.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/driver_cuda.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/driver_cuda0.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/driver_cuda1.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/driver_cuda_init.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cublas.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cublasLt.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cublas_v2.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cusolver.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/cuda/$(DEPDIR)/starpu_cusparse.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/disk/$(DEPDIR)/driver_disk.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/driver_common/$(DEPDIR)/driver_common.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/hip/$(DEPDIR)/driver_hip.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/hip/$(DEPDIR)/driver_hip_init.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/hip/$(DEPDIR)/starpu_hipblas.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/max/$(DEPDIR)/driver_max_fpga.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/max/$(DEPDIR)/driver_max_fpga_init.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/mp_common/$(DEPDIR)/mp_common.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/mp_common/$(DEPDIR)/sink_common.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/mp_common/$(DEPDIR)/source_common.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/mpi/$(DEPDIR)/driver_mpi_common.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/mpi/$(DEPDIR)/driver_mpi_init.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/mpi/$(DEPDIR)/driver_mpi_sink.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/mpi/$(DEPDIR)/driver_mpi_source.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/opencl/$(DEPDIR)/driver_opencl.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/opencl/$(DEPDIR)/driver_opencl_init.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/opencl/$(DEPDIR)/driver_opencl_utils.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/tcpip/$(DEPDIR)/driver_tcpip_common.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/tcpip/$(DEPDIR)/driver_tcpip_init.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/tcpip/$(DEPDIR)/driver_tcpip_sink.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@drivers/tcpip/$(DEPDIR)/driver_tcpip_source.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_worker/$(DEPDIR)/starpu_parallel_worker_create.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/bound.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/callbacks.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/profiling.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@profiling/$(DEPDIR)/profiling_helpers.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_best_implementation.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_composed.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_eager.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_eager_calibration.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_eager_prio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_fifo.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_heft.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_heteroprio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_mct.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_perfmodel_select.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_prio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_random.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_sched.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_stage.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_userchoice.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_work_stealing.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/component_worker.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/deque_modeling_policy_data_aware.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/eager_central_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/eager_central_priority_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/fifo_queues.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/graph_test_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/helper_mct.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/heteroprio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/hierarchical_heft.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_eager.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_eager_prefetching.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_eager_prio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_ez.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_gemm.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heft.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heft2.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heft_prio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heteroprio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_heteroprio_heft.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_parallel_heft.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_parallel_random.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_prio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_prio_prefetching.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_random.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_random_prefetching.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/modular_ws.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/parallel_eager.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/parallel_heft.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/prio_deque.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/random_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/scheduler_maker.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/work_stealing_policy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/execute_on_all.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/file.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/fstarpu.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/misc.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/openmp_runtime_support.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/openmp_runtime_support_environment.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/openmp_runtime_support_omp_api.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/starpu_create_sync_task.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/starpu_data_cpy.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/starpu_task_insert.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/starpu_task_insert_utils.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@worker_collection/$(DEPDIR)/worker_list.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@worker_collection/$(DEPDIR)/worker_tree.Plo@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< .cpp.o: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< .cpp.obj: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .cpp.lo: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf common/.libs common/_libs -rm -rf core/.libs core/_libs -rm -rf core/dependencies/.libs core/dependencies/_libs -rm -rf core/disk_ops/.libs core/disk_ops/_libs -rm -rf core/disk_ops/unistd/.libs core/disk_ops/unistd/_libs -rm -rf core/perfmodel/.libs core/perfmodel/_libs -rm -rf datawizard/.libs datawizard/_libs -rm -rf datawizard/interfaces/.libs datawizard/interfaces/_libs -rm -rf debug/.libs debug/_libs -rm -rf debug/traces/.libs debug/traces/_libs -rm -rf drivers/cpu/.libs drivers/cpu/_libs -rm -rf drivers/cuda/.libs drivers/cuda/_libs -rm -rf drivers/disk/.libs drivers/disk/_libs -rm -rf drivers/driver_common/.libs drivers/driver_common/_libs -rm -rf drivers/hip/.libs drivers/hip/_libs -rm -rf drivers/max/.libs drivers/max/_libs -rm -rf drivers/mp_common/.libs drivers/mp_common/_libs -rm -rf drivers/mpi/.libs drivers/mpi/_libs -rm -rf drivers/opencl/.libs drivers/opencl/_libs -rm -rf drivers/tcpip/.libs drivers/tcpip/_libs -rm -rf parallel_worker/.libs parallel_worker/_libs -rm -rf profiling/.libs profiling/_libs -rm -rf sched_policies/.libs sched_policies/_libs -rm -rf util/.libs util/_libs -rm -rf worker_collection/.libs worker_collection/_libs install-xmlDATA: $(xml_DATA) @$(NORMAL_INSTALL) @list='$(xml_DATA)'; test -n "$(xmldir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(xmldir)'"; \ $(MKDIR_P) "$(DESTDIR)$(xmldir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(xmldir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(xmldir)" || exit $$?; \ done uninstall-xmlDATA: @$(NORMAL_UNINSTALL) @list='$(xml_DATA)'; test -n "$(xmldir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(xmldir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$(top_distdir)" distdir="$(distdir)" \ dist-hook check-am: all-am check: check-recursive @STARPU_HAVE_MS_LIB_FALSE@all-local: @STARPU_HAVE_WINDOWS_FALSE@all-local: all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS) all-local installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(xmldir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f common/$(DEPDIR)/$(am__dirstamp) -rm -f common/$(am__dirstamp) -rm -f core/$(DEPDIR)/$(am__dirstamp) -rm -f core/$(am__dirstamp) -rm -f core/dependencies/$(DEPDIR)/$(am__dirstamp) -rm -f core/dependencies/$(am__dirstamp) -rm -f core/disk_ops/$(DEPDIR)/$(am__dirstamp) -rm -f core/disk_ops/$(am__dirstamp) -rm -f core/disk_ops/unistd/$(DEPDIR)/$(am__dirstamp) -rm -f core/disk_ops/unistd/$(am__dirstamp) -rm -f core/perfmodel/$(DEPDIR)/$(am__dirstamp) -rm -f core/perfmodel/$(am__dirstamp) -rm -f datawizard/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/$(am__dirstamp) -rm -f datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/$(am__dirstamp) -rm -f debug/$(DEPDIR)/$(am__dirstamp) -rm -f debug/$(am__dirstamp) -rm -f debug/traces/$(DEPDIR)/$(am__dirstamp) -rm -f debug/traces/$(am__dirstamp) -rm -f drivers/cpu/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/cpu/$(am__dirstamp) -rm -f drivers/cuda/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/cuda/$(am__dirstamp) -rm -f drivers/disk/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/disk/$(am__dirstamp) -rm -f drivers/driver_common/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/driver_common/$(am__dirstamp) -rm -f drivers/hip/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/hip/$(am__dirstamp) -rm -f drivers/max/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/max/$(am__dirstamp) -rm -f drivers/mp_common/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/mp_common/$(am__dirstamp) -rm -f drivers/mpi/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/mpi/$(am__dirstamp) -rm -f drivers/opencl/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/opencl/$(am__dirstamp) -rm -f drivers/tcpip/$(DEPDIR)/$(am__dirstamp) -rm -f drivers/tcpip/$(am__dirstamp) -rm -f parallel_worker/$(DEPDIR)/$(am__dirstamp) -rm -f parallel_worker/$(am__dirstamp) -rm -f profiling/$(DEPDIR)/$(am__dirstamp) -rm -f profiling/$(am__dirstamp) -rm -f sched_policies/$(DEPDIR)/$(am__dirstamp) -rm -f sched_policies/$(am__dirstamp) -rm -f util/$(DEPDIR)/$(am__dirstamp) -rm -f util/$(am__dirstamp) -rm -f worker_collection/$(DEPDIR)/$(am__dirstamp) -rm -f worker_collection/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_HAVE_WINDOWS_FALSE@install-exec-hook: clean: clean-recursive clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ mostlyclean-am distclean: distclean-recursive -rm -f common/$(DEPDIR)/barrier.Plo -rm -f common/$(DEPDIR)/barrier_counter.Plo -rm -f common/$(DEPDIR)/fxt.Plo -rm -f common/$(DEPDIR)/graph.Plo -rm -f common/$(DEPDIR)/hash.Plo -rm -f common/$(DEPDIR)/inlines.Plo -rm -f common/$(DEPDIR)/knobs.Plo -rm -f common/$(DEPDIR)/rbtree.Plo -rm -f common/$(DEPDIR)/rwlock.Plo -rm -f common/$(DEPDIR)/starpu_spinlock.Plo -rm -f common/$(DEPDIR)/thread.Plo -rm -f common/$(DEPDIR)/timing.Plo -rm -f common/$(DEPDIR)/utils.Plo -rm -f core/$(DEPDIR)/combined_workers.Plo -rm -f core/$(DEPDIR)/debug.Plo -rm -f core/$(DEPDIR)/detect_combined_workers.Plo -rm -f core/$(DEPDIR)/devices.Plo -rm -f core/$(DEPDIR)/disk.Plo -rm -f core/$(DEPDIR)/drivers.Plo -rm -f core/$(DEPDIR)/errorcheck.Plo -rm -f core/$(DEPDIR)/idle_hook.Plo -rm -f core/$(DEPDIR)/jobs.Plo -rm -f core/$(DEPDIR)/parallel_task.Plo -rm -f core/$(DEPDIR)/progress_hook.Plo -rm -f core/$(DEPDIR)/sched_ctx.Plo -rm -f core/$(DEPDIR)/sched_ctx_list.Plo -rm -f core/$(DEPDIR)/sched_policy.Plo -rm -f core/$(DEPDIR)/simgrid.Plo -rm -f core/$(DEPDIR)/simgrid_cpp.Plo -rm -f core/$(DEPDIR)/task.Plo -rm -f core/$(DEPDIR)/task_bundle.Plo -rm -f core/$(DEPDIR)/topology.Plo -rm -f core/$(DEPDIR)/tree.Plo -rm -f core/$(DEPDIR)/workers.Plo -rm -f core/dependencies/$(DEPDIR)/cg.Plo -rm -f core/dependencies/$(DEPDIR)/data_arbiter_concurrency.Plo -rm -f core/dependencies/$(DEPDIR)/data_concurrency.Plo -rm -f core/dependencies/$(DEPDIR)/dependencies.Plo -rm -f core/dependencies/$(DEPDIR)/implicit_data_deps.Plo -rm -f core/dependencies/$(DEPDIR)/tags.Plo -rm -f core/dependencies/$(DEPDIR)/task_deps.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_hdf5.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_leveldb.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_stdio.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_unistd.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_unistd_o_direct.Plo -rm -f core/disk_ops/unistd/$(DEPDIR)/disk_unistd_global.Plo -rm -f core/perfmodel/$(DEPDIR)/energy_model.Plo -rm -f core/perfmodel/$(DEPDIR)/multiple_regression.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel_bus.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel_history.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel_nan.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel_print.Plo -rm -f core/perfmodel/$(DEPDIR)/regression.Plo -rm -f datawizard/$(DEPDIR)/coherency.Plo -rm -f datawizard/$(DEPDIR)/copy_driver.Plo -rm -f datawizard/$(DEPDIR)/data_request.Plo -rm -f datawizard/$(DEPDIR)/datastats.Plo -rm -f datawizard/$(DEPDIR)/datawizard.Plo -rm -f datawizard/$(DEPDIR)/filters.Plo -rm -f datawizard/$(DEPDIR)/footprint.Plo -rm -f datawizard/$(DEPDIR)/malloc.Plo -rm -f datawizard/$(DEPDIR)/memalloc.Plo -rm -f datawizard/$(DEPDIR)/memory_manager.Plo -rm -f datawizard/$(DEPDIR)/memory_nodes.Plo -rm -f datawizard/$(DEPDIR)/memstats.Plo -rm -f datawizard/$(DEPDIR)/node_ops.Plo -rm -f datawizard/$(DEPDIR)/reduction.Plo -rm -f datawizard/$(DEPDIR)/sort_data_handles.Plo -rm -f datawizard/$(DEPDIR)/user_interactions.Plo -rm -f datawizard/$(DEPDIR)/write_back.Plo -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/block_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/block_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/coo_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/csr_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/csr_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/data_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/matrix_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/matrix_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/multiformat_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/ndim_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/ndim_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/tensor_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/tensor_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/variable_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/vector_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/vector_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/void_interface.Plo -rm -f debug/$(DEPDIR)/latency.Plo -rm -f debug/$(DEPDIR)/structures_size.Plo -rm -f debug/traces/$(DEPDIR)/anim.Plo -rm -f debug/traces/$(DEPDIR)/starpu_fxt.Plo -rm -f debug/traces/$(DEPDIR)/starpu_fxt_dag.Plo -rm -f debug/traces/$(DEPDIR)/starpu_fxt_mpi.Plo -rm -f debug/traces/$(DEPDIR)/starpu_paje.Plo -rm -f drivers/cpu/$(DEPDIR)/driver_cpu.Plo -rm -f drivers/cuda/$(DEPDIR)/driver_cuda.Plo -rm -f drivers/cuda/$(DEPDIR)/driver_cuda0.Plo -rm -f drivers/cuda/$(DEPDIR)/driver_cuda1.Plo -rm -f drivers/cuda/$(DEPDIR)/driver_cuda_init.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cublas.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cublasLt.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cublas_v2.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cusolver.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cusparse.Plo -rm -f drivers/disk/$(DEPDIR)/driver_disk.Plo -rm -f drivers/driver_common/$(DEPDIR)/driver_common.Plo -rm -f drivers/hip/$(DEPDIR)/driver_hip.Plo -rm -f drivers/hip/$(DEPDIR)/driver_hip_init.Plo -rm -f drivers/hip/$(DEPDIR)/starpu_hipblas.Plo -rm -f drivers/max/$(DEPDIR)/driver_max_fpga.Plo -rm -f drivers/max/$(DEPDIR)/driver_max_fpga_init.Plo -rm -f drivers/mp_common/$(DEPDIR)/mp_common.Plo -rm -f drivers/mp_common/$(DEPDIR)/sink_common.Plo -rm -f drivers/mp_common/$(DEPDIR)/source_common.Plo -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_common.Plo -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_init.Plo -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_sink.Plo -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_source.Plo -rm -f drivers/opencl/$(DEPDIR)/driver_opencl.Plo -rm -f drivers/opencl/$(DEPDIR)/driver_opencl_init.Plo -rm -f drivers/opencl/$(DEPDIR)/driver_opencl_utils.Plo -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_common.Plo -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_init.Plo -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_sink.Plo -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_source.Plo -rm -f parallel_worker/$(DEPDIR)/starpu_parallel_worker_create.Plo -rm -f profiling/$(DEPDIR)/bound.Plo -rm -f profiling/$(DEPDIR)/callbacks.Plo -rm -f profiling/$(DEPDIR)/profiling.Plo -rm -f profiling/$(DEPDIR)/profiling_helpers.Plo -rm -f sched_policies/$(DEPDIR)/component_best_implementation.Plo -rm -f sched_policies/$(DEPDIR)/component_composed.Plo -rm -f sched_policies/$(DEPDIR)/component_eager.Plo -rm -f sched_policies/$(DEPDIR)/component_eager_calibration.Plo -rm -f sched_policies/$(DEPDIR)/component_eager_prio.Plo -rm -f sched_policies/$(DEPDIR)/component_fifo.Plo -rm -f sched_policies/$(DEPDIR)/component_heft.Plo -rm -f sched_policies/$(DEPDIR)/component_heteroprio.Plo -rm -f sched_policies/$(DEPDIR)/component_mct.Plo -rm -f sched_policies/$(DEPDIR)/component_perfmodel_select.Plo -rm -f sched_policies/$(DEPDIR)/component_prio.Plo -rm -f sched_policies/$(DEPDIR)/component_random.Plo -rm -f sched_policies/$(DEPDIR)/component_sched.Plo -rm -f sched_policies/$(DEPDIR)/component_stage.Plo -rm -f sched_policies/$(DEPDIR)/component_userchoice.Plo -rm -f sched_policies/$(DEPDIR)/component_work_stealing.Plo -rm -f sched_policies/$(DEPDIR)/component_worker.Plo -rm -f sched_policies/$(DEPDIR)/deque_modeling_policy_data_aware.Plo -rm -f sched_policies/$(DEPDIR)/eager_central_policy.Plo -rm -f sched_policies/$(DEPDIR)/eager_central_priority_policy.Plo -rm -f sched_policies/$(DEPDIR)/fifo_queues.Plo -rm -f sched_policies/$(DEPDIR)/graph_test_policy.Plo -rm -f sched_policies/$(DEPDIR)/helper_mct.Plo -rm -f sched_policies/$(DEPDIR)/heteroprio.Plo -rm -f sched_policies/$(DEPDIR)/hierarchical_heft.Plo -rm -f sched_policies/$(DEPDIR)/modular_eager.Plo -rm -f sched_policies/$(DEPDIR)/modular_eager_prefetching.Plo -rm -f sched_policies/$(DEPDIR)/modular_eager_prio.Plo -rm -f sched_policies/$(DEPDIR)/modular_ez.Plo -rm -f sched_policies/$(DEPDIR)/modular_gemm.Plo -rm -f sched_policies/$(DEPDIR)/modular_heft.Plo -rm -f sched_policies/$(DEPDIR)/modular_heft2.Plo -rm -f sched_policies/$(DEPDIR)/modular_heft_prio.Plo -rm -f sched_policies/$(DEPDIR)/modular_heteroprio.Plo -rm -f sched_policies/$(DEPDIR)/modular_heteroprio_heft.Plo -rm -f sched_policies/$(DEPDIR)/modular_parallel_heft.Plo -rm -f sched_policies/$(DEPDIR)/modular_parallel_random.Plo -rm -f sched_policies/$(DEPDIR)/modular_prio.Plo -rm -f sched_policies/$(DEPDIR)/modular_prio_prefetching.Plo -rm -f sched_policies/$(DEPDIR)/modular_random.Plo -rm -f sched_policies/$(DEPDIR)/modular_random_prefetching.Plo -rm -f sched_policies/$(DEPDIR)/modular_ws.Plo -rm -f sched_policies/$(DEPDIR)/parallel_eager.Plo -rm -f sched_policies/$(DEPDIR)/parallel_heft.Plo -rm -f sched_policies/$(DEPDIR)/prio_deque.Plo -rm -f sched_policies/$(DEPDIR)/random_policy.Plo -rm -f sched_policies/$(DEPDIR)/scheduler_maker.Plo -rm -f sched_policies/$(DEPDIR)/work_stealing_policy.Plo -rm -f util/$(DEPDIR)/execute_on_all.Plo -rm -f util/$(DEPDIR)/file.Plo -rm -f util/$(DEPDIR)/fstarpu.Plo -rm -f util/$(DEPDIR)/misc.Plo -rm -f util/$(DEPDIR)/openmp_runtime_support.Plo -rm -f util/$(DEPDIR)/openmp_runtime_support_environment.Plo -rm -f util/$(DEPDIR)/openmp_runtime_support_omp_api.Plo -rm -f util/$(DEPDIR)/starpu_create_sync_task.Plo -rm -f util/$(DEPDIR)/starpu_data_cpy.Plo -rm -f util/$(DEPDIR)/starpu_task_insert.Plo -rm -f util/$(DEPDIR)/starpu_task_insert_utils.Plo -rm -f worker_collection/$(DEPDIR)/worker_list.Plo -rm -f worker_collection/$(DEPDIR)/worker_tree.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-xmlDATA install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-libLTLIBRARIES @$(NORMAL_INSTALL) $(MAKE) $(AM_MAKEFLAGS) install-exec-hook install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f common/$(DEPDIR)/barrier.Plo -rm -f common/$(DEPDIR)/barrier_counter.Plo -rm -f common/$(DEPDIR)/fxt.Plo -rm -f common/$(DEPDIR)/graph.Plo -rm -f common/$(DEPDIR)/hash.Plo -rm -f common/$(DEPDIR)/inlines.Plo -rm -f common/$(DEPDIR)/knobs.Plo -rm -f common/$(DEPDIR)/rbtree.Plo -rm -f common/$(DEPDIR)/rwlock.Plo -rm -f common/$(DEPDIR)/starpu_spinlock.Plo -rm -f common/$(DEPDIR)/thread.Plo -rm -f common/$(DEPDIR)/timing.Plo -rm -f common/$(DEPDIR)/utils.Plo -rm -f core/$(DEPDIR)/combined_workers.Plo -rm -f core/$(DEPDIR)/debug.Plo -rm -f core/$(DEPDIR)/detect_combined_workers.Plo -rm -f core/$(DEPDIR)/devices.Plo -rm -f core/$(DEPDIR)/disk.Plo -rm -f core/$(DEPDIR)/drivers.Plo -rm -f core/$(DEPDIR)/errorcheck.Plo -rm -f core/$(DEPDIR)/idle_hook.Plo -rm -f core/$(DEPDIR)/jobs.Plo -rm -f core/$(DEPDIR)/parallel_task.Plo -rm -f core/$(DEPDIR)/progress_hook.Plo -rm -f core/$(DEPDIR)/sched_ctx.Plo -rm -f core/$(DEPDIR)/sched_ctx_list.Plo -rm -f core/$(DEPDIR)/sched_policy.Plo -rm -f core/$(DEPDIR)/simgrid.Plo -rm -f core/$(DEPDIR)/simgrid_cpp.Plo -rm -f core/$(DEPDIR)/task.Plo -rm -f core/$(DEPDIR)/task_bundle.Plo -rm -f core/$(DEPDIR)/topology.Plo -rm -f core/$(DEPDIR)/tree.Plo -rm -f core/$(DEPDIR)/workers.Plo -rm -f core/dependencies/$(DEPDIR)/cg.Plo -rm -f core/dependencies/$(DEPDIR)/data_arbiter_concurrency.Plo -rm -f core/dependencies/$(DEPDIR)/data_concurrency.Plo -rm -f core/dependencies/$(DEPDIR)/dependencies.Plo -rm -f core/dependencies/$(DEPDIR)/implicit_data_deps.Plo -rm -f core/dependencies/$(DEPDIR)/tags.Plo -rm -f core/dependencies/$(DEPDIR)/task_deps.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_hdf5.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_leveldb.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_stdio.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_unistd.Plo -rm -f core/disk_ops/$(DEPDIR)/disk_unistd_o_direct.Plo -rm -f core/disk_ops/unistd/$(DEPDIR)/disk_unistd_global.Plo -rm -f core/perfmodel/$(DEPDIR)/energy_model.Plo -rm -f core/perfmodel/$(DEPDIR)/multiple_regression.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel_bus.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel_history.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel_nan.Plo -rm -f core/perfmodel/$(DEPDIR)/perfmodel_print.Plo -rm -f core/perfmodel/$(DEPDIR)/regression.Plo -rm -f datawizard/$(DEPDIR)/coherency.Plo -rm -f datawizard/$(DEPDIR)/copy_driver.Plo -rm -f datawizard/$(DEPDIR)/data_request.Plo -rm -f datawizard/$(DEPDIR)/datastats.Plo -rm -f datawizard/$(DEPDIR)/datawizard.Plo -rm -f datawizard/$(DEPDIR)/filters.Plo -rm -f datawizard/$(DEPDIR)/footprint.Plo -rm -f datawizard/$(DEPDIR)/malloc.Plo -rm -f datawizard/$(DEPDIR)/memalloc.Plo -rm -f datawizard/$(DEPDIR)/memory_manager.Plo -rm -f datawizard/$(DEPDIR)/memory_nodes.Plo -rm -f datawizard/$(DEPDIR)/memstats.Plo -rm -f datawizard/$(DEPDIR)/node_ops.Plo -rm -f datawizard/$(DEPDIR)/reduction.Plo -rm -f datawizard/$(DEPDIR)/sort_data_handles.Plo -rm -f datawizard/$(DEPDIR)/user_interactions.Plo -rm -f datawizard/$(DEPDIR)/write_back.Plo -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/block_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/block_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/coo_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/csr_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/csr_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/data_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/matrix_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/matrix_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/multiformat_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/ndim_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/ndim_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/tensor_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/tensor_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/variable_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/vector_filters.Plo -rm -f datawizard/interfaces/$(DEPDIR)/vector_interface.Plo -rm -f datawizard/interfaces/$(DEPDIR)/void_interface.Plo -rm -f debug/$(DEPDIR)/latency.Plo -rm -f debug/$(DEPDIR)/structures_size.Plo -rm -f debug/traces/$(DEPDIR)/anim.Plo -rm -f debug/traces/$(DEPDIR)/starpu_fxt.Plo -rm -f debug/traces/$(DEPDIR)/starpu_fxt_dag.Plo -rm -f debug/traces/$(DEPDIR)/starpu_fxt_mpi.Plo -rm -f debug/traces/$(DEPDIR)/starpu_paje.Plo -rm -f drivers/cpu/$(DEPDIR)/driver_cpu.Plo -rm -f drivers/cuda/$(DEPDIR)/driver_cuda.Plo -rm -f drivers/cuda/$(DEPDIR)/driver_cuda0.Plo -rm -f drivers/cuda/$(DEPDIR)/driver_cuda1.Plo -rm -f drivers/cuda/$(DEPDIR)/driver_cuda_init.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cublas.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cublasLt.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cublas_v2.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cusolver.Plo -rm -f drivers/cuda/$(DEPDIR)/starpu_cusparse.Plo -rm -f drivers/disk/$(DEPDIR)/driver_disk.Plo -rm -f drivers/driver_common/$(DEPDIR)/driver_common.Plo -rm -f drivers/hip/$(DEPDIR)/driver_hip.Plo -rm -f drivers/hip/$(DEPDIR)/driver_hip_init.Plo -rm -f drivers/hip/$(DEPDIR)/starpu_hipblas.Plo -rm -f drivers/max/$(DEPDIR)/driver_max_fpga.Plo -rm -f drivers/max/$(DEPDIR)/driver_max_fpga_init.Plo -rm -f drivers/mp_common/$(DEPDIR)/mp_common.Plo -rm -f drivers/mp_common/$(DEPDIR)/sink_common.Plo -rm -f drivers/mp_common/$(DEPDIR)/source_common.Plo -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_common.Plo -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_init.Plo -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_sink.Plo -rm -f drivers/mpi/$(DEPDIR)/driver_mpi_source.Plo -rm -f drivers/opencl/$(DEPDIR)/driver_opencl.Plo -rm -f drivers/opencl/$(DEPDIR)/driver_opencl_init.Plo -rm -f drivers/opencl/$(DEPDIR)/driver_opencl_utils.Plo -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_common.Plo -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_init.Plo -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_sink.Plo -rm -f drivers/tcpip/$(DEPDIR)/driver_tcpip_source.Plo -rm -f parallel_worker/$(DEPDIR)/starpu_parallel_worker_create.Plo -rm -f profiling/$(DEPDIR)/bound.Plo -rm -f profiling/$(DEPDIR)/callbacks.Plo -rm -f profiling/$(DEPDIR)/profiling.Plo -rm -f profiling/$(DEPDIR)/profiling_helpers.Plo -rm -f sched_policies/$(DEPDIR)/component_best_implementation.Plo -rm -f sched_policies/$(DEPDIR)/component_composed.Plo -rm -f sched_policies/$(DEPDIR)/component_eager.Plo -rm -f sched_policies/$(DEPDIR)/component_eager_calibration.Plo -rm -f sched_policies/$(DEPDIR)/component_eager_prio.Plo -rm -f sched_policies/$(DEPDIR)/component_fifo.Plo -rm -f sched_policies/$(DEPDIR)/component_heft.Plo -rm -f sched_policies/$(DEPDIR)/component_heteroprio.Plo -rm -f sched_policies/$(DEPDIR)/component_mct.Plo -rm -f sched_policies/$(DEPDIR)/component_perfmodel_select.Plo -rm -f sched_policies/$(DEPDIR)/component_prio.Plo -rm -f sched_policies/$(DEPDIR)/component_random.Plo -rm -f sched_policies/$(DEPDIR)/component_sched.Plo -rm -f sched_policies/$(DEPDIR)/component_stage.Plo -rm -f sched_policies/$(DEPDIR)/component_userchoice.Plo -rm -f sched_policies/$(DEPDIR)/component_work_stealing.Plo -rm -f sched_policies/$(DEPDIR)/component_worker.Plo -rm -f sched_policies/$(DEPDIR)/deque_modeling_policy_data_aware.Plo -rm -f sched_policies/$(DEPDIR)/eager_central_policy.Plo -rm -f sched_policies/$(DEPDIR)/eager_central_priority_policy.Plo -rm -f sched_policies/$(DEPDIR)/fifo_queues.Plo -rm -f sched_policies/$(DEPDIR)/graph_test_policy.Plo -rm -f sched_policies/$(DEPDIR)/helper_mct.Plo -rm -f sched_policies/$(DEPDIR)/heteroprio.Plo -rm -f sched_policies/$(DEPDIR)/hierarchical_heft.Plo -rm -f sched_policies/$(DEPDIR)/modular_eager.Plo -rm -f sched_policies/$(DEPDIR)/modular_eager_prefetching.Plo -rm -f sched_policies/$(DEPDIR)/modular_eager_prio.Plo -rm -f sched_policies/$(DEPDIR)/modular_ez.Plo -rm -f sched_policies/$(DEPDIR)/modular_gemm.Plo -rm -f sched_policies/$(DEPDIR)/modular_heft.Plo -rm -f sched_policies/$(DEPDIR)/modular_heft2.Plo -rm -f sched_policies/$(DEPDIR)/modular_heft_prio.Plo -rm -f sched_policies/$(DEPDIR)/modular_heteroprio.Plo -rm -f sched_policies/$(DEPDIR)/modular_heteroprio_heft.Plo -rm -f sched_policies/$(DEPDIR)/modular_parallel_heft.Plo -rm -f sched_policies/$(DEPDIR)/modular_parallel_random.Plo -rm -f sched_policies/$(DEPDIR)/modular_prio.Plo -rm -f sched_policies/$(DEPDIR)/modular_prio_prefetching.Plo -rm -f sched_policies/$(DEPDIR)/modular_random.Plo -rm -f sched_policies/$(DEPDIR)/modular_random_prefetching.Plo -rm -f sched_policies/$(DEPDIR)/modular_ws.Plo -rm -f sched_policies/$(DEPDIR)/parallel_eager.Plo -rm -f sched_policies/$(DEPDIR)/parallel_heft.Plo -rm -f sched_policies/$(DEPDIR)/prio_deque.Plo -rm -f sched_policies/$(DEPDIR)/random_policy.Plo -rm -f sched_policies/$(DEPDIR)/scheduler_maker.Plo -rm -f sched_policies/$(DEPDIR)/work_stealing_policy.Plo -rm -f util/$(DEPDIR)/execute_on_all.Plo -rm -f util/$(DEPDIR)/file.Plo -rm -f util/$(DEPDIR)/fstarpu.Plo -rm -f util/$(DEPDIR)/misc.Plo -rm -f util/$(DEPDIR)/openmp_runtime_support.Plo -rm -f util/$(DEPDIR)/openmp_runtime_support_environment.Plo -rm -f util/$(DEPDIR)/openmp_runtime_support_omp_api.Plo -rm -f util/$(DEPDIR)/starpu_create_sync_task.Plo -rm -f util/$(DEPDIR)/starpu_data_cpy.Plo -rm -f util/$(DEPDIR)/starpu_task_insert.Plo -rm -f util/$(DEPDIR)/starpu_task_insert_utils.Plo -rm -f worker_collection/$(DEPDIR)/worker_list.Plo -rm -f worker_collection/$(DEPDIR)/worker_tree.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-libLTLIBRARIES uninstall-xmlDATA .MAKE: $(am__recursive_targets) install-am install-exec-am \ install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ am--depfiles check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ ctags-am dist-hook distclean distclean-compile \ distclean-generic distclean-libtool distclean-tags distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-exec-hook install-html \ install-html-am install-info install-info-am \ install-libLTLIBRARIES install-man install-pdf install-pdf-am \ install-ps install-ps-am install-strip install-xmlDATA \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ uninstall-libLTLIBRARIES uninstall-xmlDATA .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null @STARPU_HAVE_WINDOWS_TRUE@export LC_MESSAGES @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@.libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib: libstarpu-@STARPU_EFFECTIVE_VERSION@.la dolib @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ ./dolib "$(STARPU_MS_LIB)" $(STARPU_MS_LIB_ARCH) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.def @STARPU_EFFECTIVE_VERSION@ $(libstarpu_so_version) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@all-local: .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib @STARPU_HAVE_WINDOWS_TRUE@install-exec-hook: @STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.def $(DESTDIR)$(libdir) @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib $(DESTDIR)$(libdir) @STARPU_HAVE_MS_LIB_TRUE@@STARPU_HAVE_WINDOWS_TRUE@ $(INSTALL) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.exp $(DESTDIR)$(libdir) ######################################### # If some external references appear (U), it means the corresponding .c file has # only included and not the internal src/ header which contains the # static inline definition dist-hook: failed=0 ; \ look=""; \ for i in $$( $(GREP) "static inline" $$(find $(srcdir) -name \*.h) | $(SED) -e 's/.*static inline //g' | $(GREP) -v ENAME\#\# | $(SED) -n -e 's/[^(]* \(\|\*\)\([^ (]*\)(.*/\2/' -e 'p;s/^_*//;p' | $(GREP) -v _starpu_spin_init | $(GREP) -v starpu_sched_ctx_worker_is_master_for_child_ctx) ; do \ if [ -z "$$look" ] ; then \ look="$$i" ; \ else \ look="$$look\|$$i" ; \ fi ; \ done ; \ echo "$$look" ; \ for j in $(shell find . -name \*.o) ; do \ nm $$j | $(GREP) -e "U \($$look\)$$" && { echo $$j ; failed=1 ; } ; \ done ; \ [ $$failed == 0 ] nm -n .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.so | grep -v " [Ua-z] " | grep -v ' W '| grep -ve " _\?_\?_\?f\?starpu" | grep -ve " \(_init\|main\|smpi_simulated_main_\|_fini\|_edata\|__bss_start\|_end\|fut_getstamp\|__gcov_\|mangle_path\)" | (! grep .) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/src/common/000077500000000000000000000000001507764646700160675ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/common/barrier.c000066400000000000000000000050311507764646700176600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include int _starpu_barrier_init(struct _starpu_barrier *barrier, int count) { barrier->count = count; barrier->reached_start = 0; barrier->reached_exit = 0; barrier->reached_flops = 0.0; STARPU_PTHREAD_MUTEX_INIT(&barrier->mutex, NULL); STARPU_PTHREAD_MUTEX_INIT(&barrier->mutex_exit, NULL); STARPU_PTHREAD_COND_INIT(&barrier->cond, NULL); return 0; } static int _starpu_barrier_test(struct _starpu_barrier *barrier) { /* * Check whether any threads are known to be waiting; report * "BUSY" if so. */ STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex_exit); if (barrier->reached_exit != barrier->count) { STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit); return EBUSY; } STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit); return 0; } int _starpu_barrier_destroy(struct _starpu_barrier *barrier) { int ret; do { ret = _starpu_barrier_test(barrier); } while (ret == EBUSY); _STARPU_DEBUG("reached_exit %u\n", barrier->reached_exit); STARPU_PTHREAD_MUTEX_DESTROY(&barrier->mutex); STARPU_PTHREAD_MUTEX_DESTROY(&barrier->mutex_exit); STARPU_PTHREAD_COND_DESTROY(&barrier->cond); return 0; } int _starpu_barrier_wait(struct _starpu_barrier *barrier) { int ret=0; // Wait until all threads enter the barrier STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); barrier->reached_exit=0; barrier->reached_start++; if (barrier->reached_start == barrier->count) { barrier->reached_start = 0; STARPU_PTHREAD_COND_BROADCAST(&barrier->cond); ret = STARPU_PTHREAD_BARRIER_SERIAL_THREAD; } else { STARPU_PTHREAD_COND_WAIT(&barrier->cond,&barrier->mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); // Count number of threads that exit the barrier STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex_exit); barrier->reached_exit ++; STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit); return ret; } starpu-1.4.9+dfsg/src/common/barrier.h000066400000000000000000000024141507764646700176670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __COMMON_BARRIER_H__ #define __COMMON_BARRIER_H__ #include #pragma GCC visibility push(hidden) /** @file */ struct _starpu_barrier { unsigned count; unsigned reached_start; unsigned reached_exit; double reached_flops; starpu_pthread_mutex_t mutex; starpu_pthread_mutex_t mutex_exit; starpu_pthread_cond_t cond; }; int _starpu_barrier_init(struct _starpu_barrier *barrier, int count); int _starpu_barrier_destroy(struct _starpu_barrier *barrier); int _starpu_barrier_wait(struct _starpu_barrier *barrier); #pragma GCC visibility pop #endif // __COMMON_BARRIER_H__ starpu-1.4.9+dfsg/src/common/barrier_counter.c000066400000000000000000000132631507764646700214250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include int _starpu_barrier_counter_init(struct _starpu_barrier_counter *barrier_c, unsigned count) { _starpu_barrier_init(&barrier_c->barrier, count); barrier_c->min_threshold = 0; barrier_c->max_threshold = 0; STARPU_PTHREAD_COND_INIT(&barrier_c->cond2, NULL); return 0; } int _starpu_barrier_counter_destroy(struct _starpu_barrier_counter *barrier_c) { _starpu_barrier_destroy(&barrier_c->barrier); STARPU_PTHREAD_COND_DESTROY(&barrier_c->cond2); return 0; } int _starpu_barrier_counter_wait_for_empty_counter(struct _starpu_barrier_counter *barrier_c) { struct _starpu_barrier *barrier = &barrier_c->barrier; int ret; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); ret = barrier->reached_start; while (barrier->reached_start > 0) STARPU_PTHREAD_COND_WAIT(&barrier->cond, &barrier->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return ret; } int _starpu_barrier_counter_wait_until_counter_reaches_down_to_n(struct _starpu_barrier_counter *barrier_c, unsigned n) { struct _starpu_barrier *barrier = &barrier_c->barrier; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); while (barrier->reached_start > n) { if (barrier_c->max_threshold < n) barrier_c->max_threshold = n; STARPU_PTHREAD_COND_WAIT(&barrier->cond, &barrier->mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return 0; } int _starpu_barrier_counter_wait_until_counter_reaches_up_to_n(struct _starpu_barrier_counter *barrier_c, unsigned n) { struct _starpu_barrier *barrier = &barrier_c->barrier; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); while (barrier->reached_start < n) { if (barrier_c->min_threshold > n) barrier_c->min_threshold = n; STARPU_PTHREAD_COND_WAIT(&barrier_c->cond2, &barrier->mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return 0; } int _starpu_barrier_counter_wait_for_full_counter(struct _starpu_barrier_counter *barrier_c) { struct _starpu_barrier *barrier = &barrier_c->barrier; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); while (barrier->reached_start < barrier->count) STARPU_PTHREAD_COND_WAIT(&barrier_c->cond2, &barrier->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return 0; } int _starpu_barrier_counter_decrement_until_empty_counter(struct _starpu_barrier_counter *barrier_c, double flops) { struct _starpu_barrier *barrier = &barrier_c->barrier; int ret = 0; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); barrier->reached_flops -= flops; if (--barrier->reached_start == 0) { ret = 1; STARPU_PTHREAD_COND_BROADCAST(&barrier->cond); } if (barrier_c->max_threshold && barrier->reached_start == barrier_c->max_threshold) { /* have those not happy enough tell us how much again */ barrier_c->max_threshold = 0; STARPU_PTHREAD_COND_BROADCAST(&barrier->cond); } STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return ret; } int _starpu_barrier_counter_increment_until_full_counter(struct _starpu_barrier_counter *barrier_c, double flops) { struct _starpu_barrier *barrier = &barrier_c->barrier; int ret = 0; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); barrier->reached_flops += flops; if(++barrier->reached_start == barrier->count) { ret = 1; STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2); } if (barrier_c->min_threshold && barrier->reached_start == barrier_c->min_threshold) { /* have those not happy enough tell us how much again */ barrier_c->min_threshold = 0; STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2); } STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return ret; } int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c, double flops) { struct _starpu_barrier *barrier = &barrier_c->barrier; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); barrier->reached_start++; barrier->reached_flops += flops; STARPU_PTHREAD_COND_BROADCAST(&barrier_c->cond2); STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return 0; } int _starpu_barrier_counter_check(struct _starpu_barrier_counter *barrier_c) { struct _starpu_barrier *barrier = &barrier_c->barrier; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); if(barrier->reached_start == 0) STARPU_PTHREAD_COND_BROADCAST(&barrier->cond); STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return 0; } int _starpu_barrier_counter_get_reached_start(struct _starpu_barrier_counter *barrier_c) { struct _starpu_barrier *barrier = &barrier_c->barrier; int ret; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); ret = barrier->reached_start; STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return ret; } int _starpu_barrier_counter_get_reached_exit(struct _starpu_barrier_counter *barrier_c) { struct _starpu_barrier *barrier = &barrier_c->barrier; int ret; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); ret = barrier->reached_exit; STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return ret; } double _starpu_barrier_counter_get_reached_flops(struct _starpu_barrier_counter *barrier_c) { struct _starpu_barrier *barrier = &barrier_c->barrier; double ret; STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex); ret = barrier->reached_flops; STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex); return ret; } starpu-1.4.9+dfsg/src/common/barrier_counter.h000066400000000000000000000045061507764646700214320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __BARRIER_COUNTER_H__ #define __BARRIER_COUNTER_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) struct _starpu_barrier_counter { struct _starpu_barrier barrier; unsigned min_threshold; unsigned max_threshold; starpu_pthread_cond_t cond2; }; int _starpu_barrier_counter_init(struct _starpu_barrier_counter *barrier_c, unsigned count); int _starpu_barrier_counter_destroy(struct _starpu_barrier_counter *barrier_c); int _starpu_barrier_counter_wait_for_empty_counter(struct _starpu_barrier_counter *barrier_c); int _starpu_barrier_counter_wait_until_counter_reaches_down_to_n(struct _starpu_barrier_counter *barrier_c, unsigned n); int _starpu_barrier_counter_wait_until_counter_reaches_up_to_n(struct _starpu_barrier_counter *barrier_c, unsigned n); int _starpu_barrier_counter_wait_for_full_counter(struct _starpu_barrier_counter *barrier_c); int _starpu_barrier_counter_decrement_until_empty_counter(struct _starpu_barrier_counter *barrier_c, double flops); int _starpu_barrier_counter_increment_until_full_counter(struct _starpu_barrier_counter *barrier_c, double flops); int _starpu_barrier_counter_increment(struct _starpu_barrier_counter *barrier_c, double flops); int _starpu_barrier_counter_check(struct _starpu_barrier_counter *barrier_c); int _starpu_barrier_counter_get_reached_start(struct _starpu_barrier_counter *barrier_c); int _starpu_barrier_counter_get_reached_exit(struct _starpu_barrier_counter *barrier_c); double _starpu_barrier_counter_get_reached_flops(struct _starpu_barrier_counter *barrier_c); #pragma GCC visibility pop #endif starpu-1.4.9+dfsg/src/common/config-src-build.h.in000066400000000000000000000000561507764646700217750ustar00rootroot00000000000000#undef STARPU_SRC_DIR #undef STARPU_BUILD_DIR starpu-1.4.9+dfsg/src/common/config.h.in000066400000000000000000001014601507764646700201140ustar00rootroot00000000000000/* src/common/config.h.in. Generated from configure.ac by autoheader. */ /* enable FUT traces */ #undef CONFIG_FUT /* Define to 1 if you have the header file. */ #undef HAVE_AIO_H /* Define to 1 if you have the header file. */ #undef HAVE_AYUDAME_H /* Define to 1 if you have the `cblas_sgemv' function. */ #undef HAVE_CBLAS_SGEMV /* Define to 1 if you have the `clEnqueueMarkerWithWaitList' function. */ #undef HAVE_CLENQUEUEMARKERWITHWAITLIST /* Define to 1 if you have the `clGetExtensionFunctionAddressForPlatform' function. */ #undef HAVE_CLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM /* Define to 1 if you have the `clock_gettime' function. */ #undef HAVE_CLOCK_GETTIME /* Define to 1 if you have the header file. */ #undef HAVE_CL_CL_EXT_H /* Define to 1 if you have the `copy_file_range' function. */ #undef HAVE_COPY_FILE_RANGE /* Define to 1 if you have the header file. */ #undef HAVE_CUBLASLT_H /* Define to 1 if you have the header file. */ #undef HAVE_CUDA_GL_INTEROP_H /* define if the compiler supports basic C++11 syntax */ #undef HAVE_CXX11 /* Define to 1 if you have the declaration of `cusparseSetStream', and to 0 if you don't. */ #undef HAVE_DECL_CUSPARSESETSTREAM /* Define to 1 if you have the declaration of `enable_fut_flush', and to 0 if you don't. */ #undef HAVE_DECL_ENABLE_FUT_FLUSH /* Define to 1 if you have the declaration of `fut_setup_flush_callback', and to 0 if you don't. */ #undef HAVE_DECL_FUT_SETUP_FLUSH_CALLBACK /* Define to 1 if you have the declaration of `fut_set_filename', and to 0 if you don't. */ #undef HAVE_DECL_FUT_SET_FILENAME /* Define to 1 if you have the declaration of `hwloc_cuda_get_device_osdev_by_index', and to 0 if you don't. */ #undef HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX /* Define to 1 if you have the declaration of `hwloc_distances_obj_pair_values', and to 0 if you don't. */ #undef HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES /* Define to 1 if you have the declaration of `hwloc_hip_get_device_osdev_by_index', and to 0 if you don't. */ #undef HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX /* Define to 1 if you have the declaration of `nvmlDeviceGetTotalEnergyConsumption', and to 0 if you don't. */ #undef HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION /* Define to 1 if you have the declaration of `smpi_process_set_user_data', and to 0 if you don't. */ #undef HAVE_DECL_SMPI_PROCESS_SET_USER_DATA /* Define to 1 if you have the header file. */ #undef HAVE_DLB_H /* Define to 1 if you have the header file. */ #undef HAVE_DLFCN_H /* Define to 1 if you have the `dlopen' function. */ #undef HAVE_DLOPEN /* Define to 1 if you have the `enable_fut_flush' function. */ #undef HAVE_ENABLE_FUT_FLUSH /* Define to 1 if you have the `fut_setup_flush_callback' function. */ #undef HAVE_FUT_SETUP_FLUSH_CALLBACK /* Define to 1 if you have the `fut_set_filename' function. */ #undef HAVE_FUT_SET_FILENAME /* Define to 1 if you have the `fxt_blockev_leave' function. */ #undef HAVE_FXT_BLOCKEV_LEAVE /* Define to 1 if you have the `fxt_close' function. */ #undef HAVE_FXT_CLOSE /* Define to 1 if you have the `getpagesize' function. */ #undef HAVE_GETPAGESIZE /* Define to 1 if you have the `getrlimit' function. */ #undef HAVE_GETRLIMIT /* Define to 1 if you have the header file. */ #undef HAVE_GLPK_H /* Define to 1 if you have the header file. */ #undef HAVE_HDF5_H /* Define to 1 if you have the header file. */ #undef HAVE_HIP_HIP_RUNTIME_API_H /* Define to 1 if you have the header file. */ #undef HAVE_HIP_HIP_RUNTIME_H /* Define to 1 if you have the `hwloc_cpukinds_get_nr' function. */ #undef HAVE_HWLOC_CPUKINDS_GET_NR /* Define to 1 if you have the `hwloc_get_area_memlocation' function. */ #undef HAVE_HWLOC_GET_AREA_MEMLOCATION /* Define to 1 if you have the header file. */ #undef HAVE_HWLOC_GLIBC_SCHED_H /* Define to 1 if you have the `hwloc_topology_dup' function. */ #undef HAVE_HWLOC_TOPOLOGY_DUP /* Define to 1 if you have the `hwloc_topology_set_components' function. */ #undef HAVE_HWLOC_TOPOLOGY_SET_COMPONENTS /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_LEVELDB_DB_H /* Define to 1 if you have the `atlas' library (-latlas). */ #undef HAVE_LIBATLAS /* Define to 1 if you have the `blas-openblas' library (-lblas-openblas). */ #undef HAVE_LIBBLAS_OPENBLAS /* Define to 1 if you have the `cblas' library (-lcblas). */ #undef HAVE_LIBCBLAS /* Define to 1 if you have the `dl' library (-ldl). */ #undef HAVE_LIBDL /* Define to 1 if you have the `dlb' library (-ldlb). */ #undef HAVE_LIBDLB /* Define to 1 if you have the `f77blas' library (-lf77blas). */ #undef HAVE_LIBF77BLAS /* Define to 1 if you have the `gfortran' library (-lgfortran). */ #undef HAVE_LIBGFORTRAN /* Define to 1 if you have the `GL' library (-lGL). */ #undef HAVE_LIBGL /* Define to 1 if you have the `glpk' library (-lglpk). */ #undef HAVE_LIBGLPK /* Define to 1 if you have the `GLU' library (-lGLU). */ #undef HAVE_LIBGLU /* Define to 1 if you have the `glut' library (-lglut). */ #undef HAVE_LIBGLUT /* Define to 1 if you have the `goto' library (-lgoto). */ #undef HAVE_LIBGOTO /* Define to 1 if you have the `goto2' library (-lgoto2). */ #undef HAVE_LIBGOTO2 /* Define to 1 if you have the `hdf5' library (-lhdf5). */ #undef HAVE_LIBHDF5 /* Define to 1 if you have the `ifcore' library (-lifcore). */ #undef HAVE_LIBIFCORE /* Define to 1 if you have the `leveldb' library (-lleveldb). */ #undef HAVE_LIBLEVELDB /* Define to 1 if you have the `openblas' library (-lopenblas). */ #undef HAVE_LIBOPENBLAS /* Define to 1 if you have the `rt' library (-lrt). */ #undef HAVE_LIBRT /* Define to 1 if you have the `simgrid' library (-lsimgrid). */ #undef HAVE_LIBSIMGRID /* Define to 1 if you have the `ws2_32' library (-lws2_32). */ #undef HAVE_LIBWS2_32 /* Define to 1 if you have the header file. */ #undef HAVE_MALLOC_H /* Define to 1 if you have the `memalign' function. */ #undef HAVE_MEMALIGN /* Define to 1 if you have the `mkdtemp' function. */ #undef HAVE_MKDTEMP /* Define to 1 if you have the `mkostemp' function. */ #undef HAVE_MKOSTEMP /* Define to 1 if you have a working `mmap' system call. */ #undef HAVE_MMAP /* Function MPI_Comm_f2c is available */ #undef HAVE_MPI_COMM_F2C /* Define to 1 if you have the `MSG_environment_get_routing_root' function. */ #undef HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT /* Define to 1 if you have the `MSG_get_as_by_name' function. */ #undef HAVE_MSG_GET_AS_BY_NAME /* Define to 1 if you have the `MSG_host_get_speed' function. */ #undef HAVE_MSG_HOST_GET_SPEED /* Define to 1 if you have the header file. */ #undef HAVE_MSG_MSG_H /* Define to 1 if you have the `MSG_process_attach' function. */ #undef HAVE_MSG_PROCESS_ATTACH /* Define to 1 if you have the `MSG_process_self_name' function. */ #undef HAVE_MSG_PROCESS_SELF_NAME /* Define to 1 if you have the `MSG_process_userdata_init' function. */ #undef HAVE_MSG_PROCESS_USERDATA_INIT /* Define to 1 if you have the `MSG_zone_get_by_name' function. */ #undef HAVE_MSG_ZONE_GET_BY_NAME /* Define to 1 if you have the `MSG_zone_get_hosts' function. */ #undef HAVE_MSG_ZONE_GET_HOSTS /* Define to 1 if you have the `nm_trace_add_synchro_point' function. */ #undef HAVE_NM_TRACE_ADD_SYNCHRO_POINT /* PIOman (from PM2) is available */ #undef HAVE_PIOMAN /* Define to 1 if you have the `piom_ltask_set_bound_thread_os_indexes' function. */ #undef HAVE_PIOM_LTASK_SET_BOUND_THREAD_OS_INDEXES /* Define to 1 if you have the `posix_memalign' function. */ #undef HAVE_POSIX_MEMALIGN /* Define to 1 if you have the `poti_init_custom' function. */ #undef HAVE_POTI_INIT_CUSTOM /* Define to 1 if you have the `poti_user_NewEvent' function. */ #undef HAVE_POTI_USER_NEWEVENT /* Define to 1 if you have the `pread' function. */ #undef HAVE_PREAD /* Define to 1 if you have the `pthread_setaffinity_np' function. */ #undef HAVE_PTHREAD_SETAFFINITY_NP /* pthread_spin_lock is available */ #undef HAVE_PTHREAD_SPIN_LOCK /* Define to 1 if you have the `pwrite' function. */ #undef HAVE_PWRITE /* Define to 1 if you have the header file. */ #undef HAVE_PYTHON_H /* Define to 1 if you have the header file. */ #undef HAVE_ROCBLAS_ROCBLAS_H /* Define to 1 if you have the `scandir' function. */ #undef HAVE_SCANDIR /* Define to 1 if you have the `sg_actor_attach' function. */ #undef HAVE_SG_ACTOR_ATTACH /* Define to 1 if you have the `sg_actor_attach_pthread' function. */ #undef HAVE_SG_ACTOR_ATTACH_PTHREAD /* Define to 1 if you have the `sg_actor_data' function. */ #undef HAVE_SG_ACTOR_DATA /* Define to 1 if you have the `sg_actor_execute' function. */ #undef HAVE_SG_ACTOR_EXECUTE /* Define to 1 if you have the `sg_actor_get_data' function. */ #undef HAVE_SG_ACTOR_GET_DATA /* Define to 1 if you have the `sg_actor_init' function. */ #undef HAVE_SG_ACTOR_INIT /* Define to 1 if you have the `sg_actor_on_exit' function. */ #undef HAVE_SG_ACTOR_ON_EXIT /* Define to 1 if you have the `sg_actor_ref' function. */ #undef HAVE_SG_ACTOR_REF /* Define to 1 if you have the `sg_actor_self' function. */ #undef HAVE_SG_ACTOR_SELF /* Define to 1 if you have the `sg_actor_self_execute' function. */ #undef HAVE_SG_ACTOR_SELF_EXECUTE /* Define to 1 if you have the `sg_actor_set_data' function. */ #undef HAVE_SG_ACTOR_SET_DATA /* Define to 1 if you have the `sg_actor_set_stacksize' function. */ #undef HAVE_SG_ACTOR_SET_STACKSIZE /* Define to 1 if you have the `sg_actor_sleep_for' function. */ #undef HAVE_SG_ACTOR_SLEEP_FOR /* Define to 1 if you have the `sg_cfg_set_int' function. */ #undef HAVE_SG_CFG_SET_INT /* Define to 1 if you have the `sg_config_continue_after_help' function. */ #undef HAVE_SG_CONFIG_CONTINUE_AFTER_HELP /* Define to 1 if you have the `sg_host_get_properties' function. */ #undef HAVE_SG_HOST_GET_PROPERTIES /* Define to 1 if you have the `sg_host_get_property_names' function. */ #undef HAVE_SG_HOST_GET_PROPERTY_NAMES /* Define to 1 if you have the `sg_host_get_route' function. */ #undef HAVE_SG_HOST_GET_ROUTE /* Define to 1 if you have the `sg_host_get_route_links' function. */ #undef HAVE_SG_HOST_GET_ROUTE_LINKS /* Define to 1 if you have the `sg_host_get_speed' function. */ #undef HAVE_SG_HOST_GET_SPEED /* Define to 1 if you have the `sg_host_list' function. */ #undef HAVE_SG_HOST_LIST /* Define to 1 if you have the `sg_host_route' function. */ #undef HAVE_SG_HOST_ROUTE /* Define to 1 if you have the `sg_host_self' function. */ #undef HAVE_SG_HOST_SELF /* Define to 1 if you have the `sg_host_sendto' function. */ #undef HAVE_SG_HOST_SENDTO /* Define to 1 if you have the `sg_host_send_to' function. */ #undef HAVE_SG_HOST_SEND_TO /* Define to 1 if you have the `sg_host_speed' function. */ #undef HAVE_SG_HOST_SPEED /* Define to 1 if you have the `sg_link_bandwidth_set' function. */ #undef HAVE_SG_LINK_BANDWIDTH_SET /* Define to 1 if you have the `sg_link_get_name' function. */ #undef HAVE_SG_LINK_GET_NAME /* Define to 1 if you have the `sg_link_name' function. */ #undef HAVE_SG_LINK_NAME /* Define to 1 if you have the `sg_link_set_bandwidth' function. */ #undef HAVE_SG_LINK_SET_BANDWIDTH /* Define to 1 if you have the `sg_zone_get_all_hosts' function. */ #undef HAVE_SG_ZONE_GET_ALL_HOSTS /* Define to 1 if you have the `sg_zone_get_by_name' function. */ #undef HAVE_SG_ZONE_GET_BY_NAME /* Define to 1 if you have the `sg_zone_get_hosts' function. */ #undef HAVE_SG_ZONE_GET_HOSTS /* Define to 1 if you have the `simcall_process_create' function. */ #undef HAVE_SIMCALL_PROCESS_CREATE /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_ACTOR_H /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_BARRIER_H /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_COND_H /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_ENGINE_H /* Define to 1 if you have the `simgrid_get_clock' function. */ #undef HAVE_SIMGRID_GET_CLOCK /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_HOST_H /* Define to 1 if you have the `simgrid_init' function. */ #undef HAVE_SIMGRID_INIT /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_LINK_H /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_MSG_H /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_MUTEX_H /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_SEMAPHORE_H /* Define to 1 if you have the `simgrid_set_maestro' function. */ #undef HAVE_SIMGRID_SET_MAESTRO /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_SIMDAG_H /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_VERSION_H /* Define to 1 if you have the header file. */ #undef HAVE_SIMGRID_ZONE_H /* Define to 1 if you have the `smpi_process_set_user_data' function. */ #undef HAVE_SMPI_PROCESS_SET_USER_DATA /* Define to 1 if you have the `SMPI_thread_create' function. */ #undef HAVE_SMPI_THREAD_CREATE /* Define to 1 if the system has the type `smx_actor_t'. */ #undef HAVE_SMX_ACTOR_T /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H /* Define to 1 if you have the header file. */ #undef HAVE_STDIO_H /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H /* Define to 1 if you have the `sysconf' function. */ #undef HAVE_SYSCONF /* Define to 1 if you have the header file. */ #undef HAVE_SYS_PARAM_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* Define to 1 if you have the header file. */ #undef HAVE_VALGRIND_HELGRIND_H /* Define to 1 if you have the header file. */ #undef HAVE_VALGRIND_MEMCHECK_H /* Define to 1 if you have the header file. */ #undef HAVE_VALGRIND_VALGRIND_H /* Define to 1 if you have the `xbt_barrier_init' function. */ #undef HAVE_XBT_BARRIER_INIT /* Define to 1 if you have the header file. */ #undef HAVE_XBT_BASE_H /* Define to 1 if you have the header file. */ #undef HAVE_XBT_CONFIG_H /* Define to 1 if you have the `xbt_mutex_try_acquire' function. */ #undef HAVE_XBT_MUTEX_TRY_ACQUIRE /* Define to 1 if you have the header file. */ #undef HAVE_XBT_SYNCHRO_H /* Define to the sub-directory where libtool stores uninstalled libraries. */ #undef LT_OBJDIR /* Name of package */ #undef PACKAGE /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the home page for this package. */ #undef PACKAGE_URL /* Define to the version of this package. */ #undef PACKAGE_VERSION /* The size of `void *', as computed by sizeof. */ #undef SIZEOF_VOID_P /* display DLB resource management verbose debug messages */ #undef STARPURM_DLB_VERBOSE /* Define to 1 if dlb support is enabled. */ #undef STARPURM_HAVE_DLB /* Define to 1 if DLB callbacks expect an user argument */ #undef STARPURM_HAVE_DLB_CALLBACK_ARG /* Define to 1 if StarPU has support for worker callbacks. */ #undef STARPURM_STARPU_HAVE_WORKER_CALLBACKS /* display resource management verbose debug messages */ #undef STARPURM_VERBOSE /* use ARMPL library */ #undef STARPU_ARMPL /* use STARPU_ATLAS library */ #undef STARPU_ATLAS /* Define this to enable hierarchical dags support */ #undef STARPU_BUBBLE /* display verbose bubble debug messages */ #undef STARPU_BUBBLE_VERBOSE /* location of StarPU build directory */ #undef STARPU_BUILD_DIR /* use built-in min_dgels */ #undef STARPU_BUILT_IN_MIN_DGELS /* Define to 1 if you are building with coverity */ #undef STARPU_COVERITY /* Define to 1 to enforce data locality */ #undef STARPU_DATA_LOCALITY_ENFORCE /* enable debugging statements */ #undef STARPU_DEBUG /* enable developer warnings */ #undef STARPU_DEVEL /* Define to 1 to disable asynchronous copy between CPU and GPU devices */ #undef STARPU_DISABLE_ASYNCHRONOUS_COPY /* Define to 1 to disable asynchronous copy between CPU and CUDA devices */ #undef STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY /* Define to 1 to disable asynchronous copy between CPU and Maxeler FPGA devices */ #undef STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY /* Define to 1 to disable asynchronous copy between MPI Master and MPI Slave devices */ #undef STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY /* Define to 1 to disable asynchronous copy between CPU and OpenCL devices */ #undef STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY /* Define to 1 to disable asynchronous copy between TCP/IP Master and TCP/IP Slave devices */ #undef STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY /* display verbose debug messages */ #undef STARPU_EXTRA_VERBOSE /* enable additional locking systems FxT traces */ #undef STARPU_FXT_LOCK_TRACES /* how many MPI nodes fxt files can be manipulated when generating traces */ #undef STARPU_FXT_MAX_FILES /* Path to the GNU debugger. */ #undef STARPU_GDB_PATH /* use STARPU_GOTO library */ #undef STARPU_GOTO /* Define to 1 if the target supports __atomic_compare_exchange_n */ #undef STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N /* Define to 1 if the target supports __atomic_compare_exchange_n_8 */ #undef STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8 /* Define to 1 if the target supports __atomic_exchange_n */ #undef STARPU_HAVE_ATOMIC_EXCHANGE_N /* Define to 1 if the target supports __atomic_exchange_n_8 */ #undef STARPU_HAVE_ATOMIC_EXCHANGE_N_8 /* Define to 1 if the target supports __atomic_fetch_add */ #undef STARPU_HAVE_ATOMIC_FETCH_ADD /* Define to 1 if the target supports __atomic_fetch_add_8 */ #undef STARPU_HAVE_ATOMIC_FETCH_ADD_8 /* Define to 1 if the target supports __atomic_fetch_or */ #undef STARPU_HAVE_ATOMIC_FETCH_OR /* Define to 1 if the target supports __atomic_fetch_or_8 */ #undef STARPU_HAVE_ATOMIC_FETCH_OR_8 /* Define to 1 if the target supports __atomic_test_and_set */ #undef STARPU_HAVE_ATOMIC_TEST_AND_SET /* The blas library is available */ #undef STARPU_HAVE_BLAS /* Define to 1 if CUDA device properties include BusID */ #undef STARPU_HAVE_BUSID /* The blas library has blas.h */ #undef STARPU_HAVE_CBLAS_H /* Define to 1 if CUDA device properties include canMapHostMemory */ #undef STARPU_HAVE_CUDA_CANMAPHOST /* Peer transfers are supported in CUDA */ #undef STARPU_HAVE_CUDA_MEMCPY_PEER /* Define to 1 if CUDA device properties include managedMemory */ #undef STARPU_HAVE_CUDA_MNGMEM /* Define to 1 if CUDA device properties include pageableMemoryAccess */ #undef STARPU_HAVE_CUDA_PAGEABLEMEM /* Define to 1 if CUDA pointer attributes include a type field instead of old memoryType field */ #undef STARPU_HAVE_CUDA_POINTER_TYPE /* Define to 1 if CUDA device properties include unifiedAddressing */ #undef STARPU_HAVE_CUDA_UNIFIEDADDR /* cufftDoubleComplex is available */ #undef STARPU_HAVE_CUFFTDOUBLECOMPLEX /* CURAND is available */ #undef STARPU_HAVE_CURAND /* compiler supports cxx11 */ #undef STARPU_HAVE_CXX11 /* Define this on darwin. */ #undef STARPU_HAVE_DARWIN /* Define to 1 if CUDA device properties include DomainID */ #undef STARPU_HAVE_DOMAINID /* Define to 1 if you have the header file. */ #undef STARPU_HAVE_F77_H /* Define this if a Fortran compiler is available */ #undef STARPU_HAVE_FC /* Define to 1 if you have the libfftw3 library. */ #undef STARPU_HAVE_FFTW /* Define to 1 if you have the libfftw3f library. */ #undef STARPU_HAVE_FFTWF /* Define to 1 if you have the libfftw3l library. */ #undef STARPU_HAVE_FFTWL /* Define to 1 if you have the header file. */ #undef STARPU_HAVE_GLPK_H /* Define to 1 if you have the header file. */ #undef STARPU_HAVE_HDF5 /* Define to 1 if you have the header file. */ #undef STARPU_HAVE_HELGRIND_H /* Peer transfers are supported in HIP */ #undef STARPU_HAVE_HIP_MEMCPY_PEER /* Define to 1 if you have the hwloc library. */ #undef STARPU_HAVE_HWLOC /* Define this if icc is available */ #undef STARPU_HAVE_ICC /* Define to 1 if you have the header file. */ #undef STARPU_HAVE_LEVELDB /* Define to 1 if you have the cublasLt library */ #undef STARPU_HAVE_LIBCUBLASLT /* Define to 1 if you have the cusolver library */ #undef STARPU_HAVE_LIBCUSOLVER /* Define to 1 if you have the cusparse library */ #undef STARPU_HAVE_LIBCUSPARSE /* libnuma is available */ #undef STARPU_HAVE_LIBNUMA /* Define to 1 if you have the MAGMA library. */ #undef STARPU_HAVE_MAGMA /* Define to 1 if you have the header file. */ #undef STARPU_HAVE_MALLOC_H /* Define to 1 if you have the `memalign' function. */ #undef STARPU_HAVE_MEMALIGN /* Define to 1 if you have the header file. */ #undef STARPU_HAVE_MEMCHECK_H /* Define to 1 if the function MPI_Comm_create_group is available. */ #undef STARPU_HAVE_MPI_COMM_CREATE_GROUP /* is available */ #undef STARPU_HAVE_MPI_EXT /* Define to 1 if you have mpi_sync_clocks and it is meant to be used */ #undef STARPU_HAVE_MPI_SYNC_CLOCKS /* Define to 1 if you have msg.h in msg/. */ #undef STARPU_HAVE_MSG_MSG_H /* Define to 1 if the function nearbyintf is available. */ #undef STARPU_HAVE_NEARBYINTF /* Define to 1 if you have the nvml.h header */ #undef STARPU_HAVE_NVML_H /* Define to 1 if you have the `posix_memalign' function. */ #undef STARPU_HAVE_POSIX_MEMALIGN /* Define to 1 if you have libpoti and it is meant to be used */ #undef STARPU_HAVE_POTI /* variable program_invocation_short_name is available */ #undef STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME /* pthread_barrier is available */ #undef STARPU_HAVE_PTHREAD_BARRIER /* pthread_setname_np is available */ #undef STARPU_HAVE_PTHREAD_SETNAME_NP /* pthread_spin_lock is available */ #undef STARPU_HAVE_PTHREAD_SPIN_LOCK /* Define to 1 if the function rintf is available. */ #undef STARPU_HAVE_RINTF /* Define this to 1 when s4u::Engine::on_time_advance_cb is available */ #undef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB /* Define to 1 if the function sched_yield is available. */ #undef STARPU_HAVE_SCHED_YIELD /* Define to 1 if the function setenv is available. */ #undef STARPU_HAVE_SETENV /* Define to 1 if you have actor.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_ACTOR_H /* Define to 1 if you have barrier.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_BARRIER_H /* Define to 1 if you have cond.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_COND_H /* Define to 1 if you have engine.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_ENGINE_H /* Define to 1 if you have host.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_HOST_H /* Define to 1 if you have link.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_LINK_H /* Define to 1 if you have msg.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_MSG_H /* Define to 1 if you have mutex.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_MUTEX_H /* Define to 1 if you have semaphore.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_SEMAPHORE_H /* Define to 1 if you have simdag.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_SIMDAG_H /* Define to 1 if you have version.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_VERSION_H /* Define to 1 if you have zone.h in simgrid/. */ #undef STARPU_HAVE_SIMGRID_ZONE_H /* Define to 1 if you have the smx_actor_t type. */ #undef STARPU_HAVE_SMX_ACTOR_T /* statement expressions are available */ #undef STARPU_HAVE_STATEMENT_EXPRESSIONS /* Define to 1 if the function strerro_r is available. */ #undef STARPU_HAVE_STRERROR_R /* struct timespec is defined */ #undef STARPU_HAVE_STRUCT_TIMESPEC /* Define to 1 if the target supports __sync_bool_compare_and_swap */ #undef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP /* Define to 1 if the target supports __sync_bool_compare_and_swap_8 */ #undef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8 /* Define to 1 if the target supports __sync_fetch_and_add */ #undef STARPU_HAVE_SYNC_FETCH_AND_ADD /* Define to 1 if the target supports __sync_fetch_and_add_8 */ #undef STARPU_HAVE_SYNC_FETCH_AND_ADD_8 /* Define to 1 if the target supports __sync_fetch_and_or */ #undef STARPU_HAVE_SYNC_FETCH_AND_OR /* Define to 1 if the target supports __sync_fetch_and_or_8 */ #undef STARPU_HAVE_SYNC_FETCH_AND_OR_8 /* Define to 1 if the target supports __sync_lock_test_and_set */ #undef STARPU_HAVE_SYNC_LOCK_TEST_AND_SET /* Define to 1 if the target supports __sync_synchronize */ #undef STARPU_HAVE_SYNC_SYNCHRONIZE /* Define to 1 if the target supports __sync_val_compare_and_swap */ #undef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP /* Define to 1 if the target supports __sync_val_compare_and_swap_8 */ #undef STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8 /* Define to 1 if you have the header file. */ #undef STARPU_HAVE_UNISTD_H /* Define to 1 if the function unsetenv is available. */ #undef STARPU_HAVE_UNSETENV /* Define to 1 if you have the header file. */ #undef STARPU_HAVE_VALGRIND_H /* Define this on windows. */ #undef STARPU_HAVE_WINDOWS /* enable X11 */ #undef STARPU_HAVE_X11 /* Define to 1 if you have base.h in xbt/. */ #undef STARPU_HAVE_XBT_BASE_H /* Define to 1 if you have config.h in xbt/. */ #undef STARPU_HAVE_XBT_CONFIG_H /* Define to 1 if you have synchro.h in xbt/. */ #undef STARPU_HAVE_XBT_SYNCHRO_H /* calibration heuristic value */ #undef STARPU_HISTORYMAXERROR /* Define to 1 on Linux */ #undef STARPU_LINUX_SYS /* enable long check */ #undef STARPU_LONG_CHECK /* Major version number of StarPU. */ #undef STARPU_MAJOR_VERSION /* Maximum number of CPUs supported */ #undef STARPU_MAXCPUS /* maximum number of CUDA devices */ #undef STARPU_MAXCUDADEVS /* maximum number of HIP devices */ #undef STARPU_MAXHIPDEVS /* maximum number of implementations */ #undef STARPU_MAXIMPLEMENTATIONS /* maximum number of Maxeler FPGA devices */ #undef STARPU_MAXMAXFPGADEVS /* maximum number of MPI devices */ #undef STARPU_MAXMPIDEVS /* maximum number of memory nodes */ #undef STARPU_MAXNODES /* maximum number of NUMA nodes */ #undef STARPU_MAXNUMANODES /* maximum number of OPENCL devices */ #undef STARPU_MAXOPENCLDEVS /* maximum number of TCP/IP devices */ #undef STARPU_MAXTCPIPDEVS /* enable memory stats */ #undef STARPU_MEMORY_STATS /* Minor version number of StarPU. */ #undef STARPU_MINOR_VERSION /* use MKL library */ #undef STARPU_MKL /* use user defined library */ #undef STARPU_MLR_MODEL /* enable performance model debug */ #undef STARPU_MODEL_DEBUG /* display MPI verbose debug messages */ #undef STARPU_MPI_EXTRA_VERBOSE /* enable StarPU MPI pedantic isend */ #undef STARPU_MPI_PEDANTIC_ISEND /* display MPI verbose debug messages */ #undef STARPU_MPI_VERBOSE /* Using native windows threads */ #undef STARPU_NATIVE_WINTHREADS /* enable new check */ #undef STARPU_NEW_CHECK /* how many buffers can be manipulated per task */ #undef STARPU_NMAXBUFS /* Maximum number of device per device arch */ #undef STARPU_NMAXDEVS /* Maximum number of workers */ #undef STARPU_NMAXWORKERS /* Maximum number of worker combinations */ #undef STARPU_NMAX_COMBINEDWORKERS /* Maximum number of sched_ctxs supported */ #undef STARPU_NMAX_SCHED_CTXS /* drivers must progress */ #undef STARPU_NON_BLOCKING_DRIVERS /* disable assertions */ #undef STARPU_NO_ASSERT /* Define to 1 if you use the openblas library. */ #undef STARPU_OPENBLAS /* Define to 1 on OpenBSD systems */ #undef STARPU_OPENBSD_SYS /* Define this to enable using an OpenCL simulator */ #undef STARPU_OPENCL_SIMULATOR /* enable OpenGL rendering of some examples */ #undef STARPU_OPENGL_RENDER /* Define this to enable OpenMP runtime support */ #undef STARPU_OPENMP /* Define this to enable LLVM OpenMP runtime support */ #undef STARPU_OPENMP_LLVM /* Define to 1 if you have the libpapi library */ #undef STARPU_PAPI /* Define this to enable parallel worker support */ #undef STARPU_PARALLEL_WORKER /* enable performance debug */ #undef STARPU_PERF_DEBUG /* performance models location */ #undef STARPU_PERF_MODEL_DIR /* Define this to enable profiling tool support */ #undef STARPU_PROF_TOOL /* Define to 1 if `PTHREAD_COND_INITIALIZER' is just zeroes */ #undef STARPU_PTHREAD_COND_INITIALIZER_ZERO /* Define to 1 if `PTHREAD_MUTEX_INITIALIZER' is just zeroes */ #undef STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO /* Define to 1 if `PTHREAD_RWLOCK_INITIALIZER' is just zeroes */ #undef STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO /* Python cloudpickle package available */ #undef STARPU_PYTHON_HAVE_CLOUDPICKLE /* Python joblib package available */ #undef STARPU_PYTHON_HAVE_JOBLIB /* Python3 numpy package available */ #undef STARPU_PYTHON_HAVE_NUMPY /* enable quick check */ #undef STARPU_QUICK_CHECK /* Release version number of StarPU. */ #undef STARPU_RELEASE_VERSION /* enable debug sc_hypervisor */ #undef STARPU_SC_HYPERVISOR_DEBUG /* Define this to enable simgrid execution */ #undef STARPU_SIMGRID /* Define to 1 if you have the `simgrid_init' function. */ #undef STARPU_SIMGRID_HAVE_SIMGRID_INIT /* Define to 1 if you have the `xbt_barrier_init' function. */ #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT /* Define this to enable Model Checker in simgrid execution */ #undef STARPU_SIMGRID_MC /* check spinlock use */ #undef STARPU_SPINLOCK_CHECK /* location of StarPU sources */ #undef STARPU_SRC_DIR /* Only static compilation was made */ #undef STARPU_STATIC_ONLY /* use user defined library */ #undef STARPU_SYSTEM_BLAS /* enable data allocation cache */ #undef STARPU_USE_ALLOCATION_CACHE /* Define to 1 if Ayudame 1 is available and should be used */ #undef STARPU_USE_AYUDAME1 /* Define to 1 if Ayudame 2 is available and should be used */ #undef STARPU_USE_AYUDAME2 /* CPU driver is activated */ #undef STARPU_USE_CPU /* CUDA support is activated */ #undef STARPU_USE_CUDA /* Define to 1 if the CUDA0 driver is to be tested */ #undef STARPU_USE_CUDA0 /* Define to 1 if the CUDA1 driver is to be tested */ #undef STARPU_USE_CUDA1 /* Define to 1 if CUDA Mapped host memory may be used */ #undef STARPU_USE_CUDA_MAP /* Define to 1 if drandr48 is available and should be used */ #undef STARPU_USE_DRAND48 /* Define to 1 if erandr48_r is available */ #undef STARPU_USE_ERAND48_R /* enable FxT traces */ #undef STARPU_USE_FXT /* Define to 1 if the HIP driver is to be tested */ #undef STARPU_USE_HIP /* HIPBLAS support is enabled */ #undef STARPU_USE_HIPBLAS /* Maxeler FPGA support is activated */ #undef STARPU_USE_MAX_FPGA /* Message-passing SINKs support is enabled */ #undef STARPU_USE_MP /* whether the StarPU MPI library is available */ #undef STARPU_USE_MPI /* whether the StarPU MPI failure tolerance mechanisms are requested */ #undef STARPU_USE_MPI_FT /* whether the StarPU MPI failure tolerance mechanisms stats are watched */ #undef STARPU_USE_MPI_FT_STATS /* MPI Master Slave support is enabled */ #undef STARPU_USE_MPI_MASTER_SLAVE /* whether the StarPU MPI library (with a native MPI implementation) is available */ #undef STARPU_USE_MPI_MPI /* whether the StarPU MPI library (with a NewMadeleine implementation) is available */ #undef STARPU_USE_MPI_NMAD /* OpenCL support is activated */ #undef STARPU_USE_OPENCL /* enable sc_hypervisor lib */ #undef STARPU_USE_SC_HYPERVISOR /* TCPIP Master Slave support is enabled */ #undef STARPU_USE_TCPIP_MASTER_SLAVE /* Define to 1 to disable STARPU_SKIP_IF_VALGRIND when running tests. */ #undef STARPU_VALGRIND_FULL /* display verbose debug messages */ #undef STARPU_VERBOSE /* workers must call callbacks on sleep/wake-up */ #undef STARPU_WORKER_CALLBACKS /* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for backward compatibility; new code need not use it. */ #undef STDC_HEADERS /* Version number of package */ #undef VERSION /* Define to 1 if the X Window System is missing or not being used. */ #undef X_DISPLAY_MISSING /* Define to the equivalent of the C99 'restrict' keyword, or to nothing if this is not supported. Do not define if restrict is supported only directly. */ #undef restrict /* Work around a bug in older versions of Sun C++, which did not #define __restrict__ or support _Restrict or __restrict__ even though the corresponding Sun C compiler ended up with "#define restrict _Restrict" or "#define restrict __restrict__" in the previous line. This workaround can be removed once we assume Oracle Developer Studio 12.5 (2016) or later. */ #if defined __SUNPRO_CC && !defined __RESTRICT && !defined __restrict__ # define _Restrict # define __restrict__ #endif starpu-1.4.9+dfsg/src/common/fxt.c000066400000000000000000000341321507764646700170370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include /* we need to identify each task to generate the DAG. */ unsigned long _starpu_job_cnt = 0; #ifdef STARPU_USE_FXT #include #include #include #ifdef STARPU_HAVE_WINDOWS #include #endif #ifdef __linux__ #include /* for SYS_gettid */ #elif defined(__FreeBSD__) #include /* for thr_self() */ #endif /* By default, record all events but the VERBOSE_EXTRA ones, which are very costly: */ #define KEYMASKALL_DEFAULT FUT_KEYMASKALL & (~_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA) & (~_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA) static char _starpu_prof_file_user[1024]; int _starpu_fxt_started = 0; int _starpu_fxt_willstart = 1; starpu_pthread_mutex_t _starpu_fxt_started_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; starpu_pthread_cond_t _starpu_fxt_started_cond = STARPU_PTHREAD_COND_INITIALIZER; /* and their submission order. */ unsigned long _starpu_submit_order = 0; static int _starpu_written = 0; static int _starpu_id; /* If we use several MPI processes, we can't use STARPU_GENERATE_TRACE=1, * because each MPI process will handle its own trace file, so store the world * size to warn the user if needed and avoid processing partial traces. */ static int _starpu_mpi_worldsize = 1; /* Event mask used to initialize FxT. By default all events are recorded just * after FxT starts, but this can be changed by calling * starpu_fxt_autostart_profiling(0) */ static unsigned int initial_key_mask = KEYMASKALL_DEFAULT; /* Event mask used when events are actually recorded, e.g. between * starpu_fxt_start|stop_profiling() calls if autostart is disabled, or at * anytime otherwise. Can be changed by the user at runtime, by setting * STARPU_FXT_EVENTS env var. */ static unsigned int profiling_key_mask = 0; #ifdef STARPU_SIMGRID /* Give virtual time to FxT */ uint64_t fut_getstamp(void) { return starpu_timing_now()*1000.; } #endif long _starpu_gettid(void) { /* TODO: test at configure whether __thread is available, and use that * to cache the value. * Don't use the TSD, this is getting called before we would have the * time to allocate it. */ #ifdef STARPU_SIMGRID # ifdef HAVE_SG_ACTOR_SELF return (uintptr_t) sg_actor_self(); # else return (uintptr_t) MSG_process_self(); # endif #else #if defined(__linux__) return syscall(SYS_gettid); #elif defined(__FreeBSD__) long tid; thr_self(&tid); return tid; #elif defined(_WIN32) && !defined(__CYGWIN__) return (long) GetCurrentThreadId(); #else return (long) starpu_pthread_self(); #endif #endif } static void _starpu_profile_set_tracefile(void) { char *user; char *fxt_prefix = starpu_getenv("STARPU_FXT_PREFIX"); if (!fxt_prefix) fxt_prefix = "/tmp"; else _starpu_mkpath_and_check(fxt_prefix, S_IRWXU); char suffix[64]; char *fxt_suffix = starpu_getenv("STARPU_FXT_SUFFIX"); if (!fxt_suffix) { user = starpu_getenv("USER"); if (!user) user = ""; snprintf(suffix, sizeof(suffix), "prof_file_%s_%d", user, _starpu_id); } else { snprintf(suffix, sizeof(suffix), "%s_%d", fxt_suffix, _starpu_id); } snprintf(_starpu_prof_file_user, sizeof(_starpu_prof_file_user), "%s/%s", fxt_prefix, suffix); } static inline unsigned int _starpu_profile_get_user_keymask(void) { if (profiling_key_mask != 0) return profiling_key_mask; char *fxt_events = starpu_getenv("STARPU_FXT_EVENTS"); if (fxt_events) { profiling_key_mask = _STARPU_FUT_KEYMASK_META; // contains mandatory events, even when profiling is disabled char delim[] = "|,"; char* sub = strtok(fxt_events, delim); for (; sub != NULL; sub = strtok(NULL, delim)) { if (!strcasecmp(sub, "USER")) profiling_key_mask |= _STARPU_FUT_KEYMASK_USER; else if (!strcasecmp(sub, "TASK")) profiling_key_mask |= _STARPU_FUT_KEYMASK_TASK; else if (!strcasecmp(sub, "TASK_VERBOSE")) profiling_key_mask |= _STARPU_FUT_KEYMASK_TASK_VERBOSE; else if (!strcasecmp(sub, "DATA")) profiling_key_mask |= _STARPU_FUT_KEYMASK_DATA; else if (!strcasecmp(sub, "DATA_VERBOSE")) profiling_key_mask |= _STARPU_FUT_KEYMASK_DATA_VERBOSE; else if (!strcasecmp(sub, "WORKER")) profiling_key_mask |= _STARPU_FUT_KEYMASK_WORKER; else if (!strcasecmp(sub, "WORKER_VERBOSE")) profiling_key_mask |= _STARPU_FUT_KEYMASK_WORKER_VERBOSE; else if (!strcasecmp(sub, "DSM")) profiling_key_mask |= _STARPU_FUT_KEYMASK_DSM; else if (!strcasecmp(sub, "DSM_VERBOSE")) profiling_key_mask |= _STARPU_FUT_KEYMASK_DSM_VERBOSE; else if (!strcasecmp(sub, "SCHED")) profiling_key_mask |= _STARPU_FUT_KEYMASK_SCHED; else if (!strcasecmp(sub, "SCHED_VERBOSE")) profiling_key_mask |= _STARPU_FUT_KEYMASK_SCHED_VERBOSE; else if (!strcasecmp(sub, "LOCK")) profiling_key_mask |= _STARPU_FUT_KEYMASK_LOCK; else if (!strcasecmp(sub, "LOCK_VERBOSE")) profiling_key_mask |= _STARPU_FUT_KEYMASK_LOCK_VERBOSE; else if (!strcasecmp(sub, "EVENT")) profiling_key_mask |= _STARPU_FUT_KEYMASK_EVENT; else if (!strcasecmp(sub, "EVENT_VERBOSE")) profiling_key_mask |= _STARPU_FUT_KEYMASK_EVENT_VERBOSE; else if (!strcasecmp(sub, "MPI")) profiling_key_mask |= _STARPU_FUT_KEYMASK_MPI; else if (!strcasecmp(sub, "MPI_VERBOSE")) profiling_key_mask |= _STARPU_FUT_KEYMASK_MPI_VERBOSE; else if (!strcasecmp(sub, "HYP")) profiling_key_mask |= _STARPU_FUT_KEYMASK_HYP; else if (!strcasecmp(sub, "HYP_VERBOSE")) profiling_key_mask |= _STARPU_FUT_KEYMASK_HYP_VERBOSE; else if (!strcasecmp(sub, "TASK_VERBOSE_EXTRA")) profiling_key_mask |= _STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA; else if (!strcasecmp(sub, "MPI_VERBOSE_EXTRA")) profiling_key_mask |= _STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA; /* Added categories here should also be added in the documentation * 501_environment_variable.doxy. */ else _STARPU_MSG("Unknown event type '%s'\n", sub); } } else { /* If user doesn't want to filter events, all events are recorded: */ profiling_key_mask = KEYMASKALL_DEFAULT; } return profiling_key_mask; } void starpu_profiling_set_id(int new_id) { _STARPU_DEBUG("Set id to <%d>\n", new_id); _starpu_id = new_id; _starpu_profile_set_tracefile(); #ifdef HAVE_FUT_SET_FILENAME fut_set_filename(_starpu_prof_file_user); #endif } void _starpu_profiling_set_mpi_worldsize(int worldsize) { STARPU_ASSERT(worldsize >= 1); _starpu_mpi_worldsize = worldsize; int generate_trace = starpu_getenv_number("STARPU_GENERATE_TRACE"); if (generate_trace == 1 && _starpu_mpi_worldsize > 1) { /** TODO: make it work ! * The problem is that when STARPU_GENERATE_TRACE is used, each MPI * process will generate the trace corresponding to its own execution * (which makes no sense in MPI execution with several processes). * Although letting only one StarPU process generating the trace by * using the trace files of all MPI processes is not the most * complicated thing to do, one case is not easy to deal with: what to * do when each process stored its trace file in the local memory of * the node (e.g. /tmp/) ? */ _STARPU_MSG("You can't use STARPU_GENERATE_TRACE=1 with several MPI processes. Use starpu_fxt_tool after application execution.\n"); } } void starpu_fxt_autostart_profiling(int autostart) { /* By calling this function with autostart = 0 before starpu_init(), * FxT will record only required event to properly work later (KEYMASK_META), and * won't record anything else. */ if (autostart) initial_key_mask = _starpu_profile_get_user_keymask(); else initial_key_mask = _STARPU_FUT_KEYMASK_META; } void starpu_fxt_start_profiling() { unsigned threadid = _starpu_gettid(); fut_keychange(FUT_ENABLE, _starpu_profile_get_user_keymask(), threadid); _STARPU_TRACE_META("start_profiling"); } void starpu_fxt_stop_profiling() { unsigned threadid = _starpu_gettid(); _STARPU_TRACE_META("stop_profiling"); fut_keychange(FUT_SETMASK, _STARPU_FUT_KEYMASK_META, threadid); } int starpu_fxt_is_enabled() { return starpu_getenv_number_default("STARPU_FXT_TRACE", 0); } #ifdef HAVE_FUT_SETUP_FLUSH_CALLBACK void _starpu_fxt_flush_callback() { _STARPU_MSG("FxT is flushing trace to disk ! This can impact performance.\n"); _STARPU_MSG("Maybe you should increase the value of STARPU_TRACE_BUFFER_SIZE ?\n"); starpu_fxt_trace_user_event_string("fxt flush"); } #endif void _starpu_fxt_init_profiling(uint64_t trace_buffer_size) { unsigned threadid; STARPU_PTHREAD_MUTEX_LOCK(&_starpu_fxt_started_mutex); if (!(_starpu_fxt_willstart = starpu_fxt_is_enabled())) { STARPU_PTHREAD_COND_BROADCAST(&_starpu_fxt_started_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_fxt_started_mutex); return; } STARPU_ASSERT(!_starpu_fxt_started); _starpu_fxt_started = 1; _starpu_written = 0; _starpu_profile_set_tracefile(); STARPU_HG_DISABLE_CHECKING(fut_active); #ifdef HAVE_FUT_SET_FILENAME fut_set_filename(_starpu_prof_file_user); #endif #ifdef HAVE_ENABLE_FUT_FLUSH // when the event buffer is full, fxt stops recording events. // The trace may thus be incomplete. // Enable the fut_flush function which is called when the // fxt event buffer is full to flush the buffer to disk, // therefore allowing to record the remaining events. enable_fut_flush(); #endif threadid = _starpu_gettid(); #ifdef HAVE_FUT_SETUP_FLUSH_CALLBACK if (fut_setup_flush_callback(trace_buffer_size / sizeof(unsigned long), initial_key_mask, threadid, &_starpu_fxt_flush_callback) < 0) #else if (fut_setup(trace_buffer_size / sizeof(unsigned long), initial_key_mask, threadid) < 0) #endif { perror("fut_setup"); STARPU_ABORT(); } STARPU_PTHREAD_COND_BROADCAST(&_starpu_fxt_started_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_fxt_started_mutex); return; } int _starpu_generate_paje_trace_read_option(const char *option, struct starpu_fxt_options *options) { if (strcmp(option, "-c") == 0) { options->per_task_colour = 1; } else if (strcmp(option, "-no-events") == 0) { options->no_events = 1; } else if (strcmp(option, "-no-counter") == 0) { options->no_counter = 1; } else if (strcmp(option, "-no-bus") == 0) { options->no_bus = 1; } else if (strcmp(option, "-no-flops") == 0) { options->no_flops = 1; } else if (strcmp(option, "-no-smooth") == 0) { options->no_smooth = 1; } else if (strcmp(option, "-no-acquire") == 0) { options->no_acquire = 1; } else if (strcmp(option, "-memory-states") == 0) { options->memory_states = 1; } else if (strcmp(option, "-internal") == 0) { options->internal = 1; } else if (strcmp(option, "-label-deps") == 0) { options->label_deps = 1; } else if (strcmp(option, "-number-events") == 0) { options->number_events_path = strdup("number_events.data"); } else if (strcmp(option, "-use-task-color") == 0) { options->use_task_color = 1; } else { return 1; } return 0; } static void _starpu_generate_paje_trace(char *input_fxt_filename, char *output_paje_filename, char *dirname) { /* We take default options */ struct starpu_fxt_options options; starpu_fxt_options_init(&options); char *trace_options = starpu_getenv("STARPU_GENERATE_TRACE_OPTIONS"); if (trace_options) { char *option = strtok(trace_options, " "); while (option) { int ret = _starpu_generate_paje_trace_read_option(option, &options); if (ret == 1) _STARPU_MSG("Option <%s> is not a valid option for starpu_fxt_tool\n", option); option = strtok(NULL, " "); } } options.ninputfiles = 1; options.filenames[0] = input_fxt_filename; free(options.out_paje_path); options.out_paje_path = strdup(output_paje_filename); options.file_prefix = ""; options.file_rank = -1; options.dir = dirname; starpu_fxt_generate_trace(&options); starpu_fxt_options_shutdown(&options); } void _starpu_fxt_dump_file(void) { if (!_starpu_fxt_started) return; char hostname[128]; gethostname(hostname, 128); int ret = fut_endup(_starpu_prof_file_user); if (ret < 0) _STARPU_MSG("Problem when writing FxT traces into file %s:%s\n", hostname, _starpu_prof_file_user); #ifdef STARPU_VERBOSE else _STARPU_MSG("Writing FxT traces into file %s:%s\n", hostname, _starpu_prof_file_user); #endif } void _starpu_stop_fxt_profiling(void) { if (!_starpu_fxt_started) return; if (!_starpu_written) { _starpu_fxt_dump_file(); /* Should we generate a Paje trace directly ? */ int generate_trace = starpu_getenv_number("STARPU_GENERATE_TRACE"); if (_starpu_mpi_worldsize == 1 && generate_trace == 1) { _starpu_set_catch_signals(0); char *fxt_prefix = starpu_getenv("STARPU_FXT_PREFIX"); _starpu_generate_paje_trace(_starpu_prof_file_user, "paje.trace", fxt_prefix); } int ret = fut_done(); if (ret < 0) { /* Something went wrong with the FxT trace (eg. there * was too many events) */ _STARPU_MSG("Warning: the FxT trace could not be generated properly\n"); } _starpu_written = 1; _starpu_fxt_started = 0; } } #else // STARPU_USE_FXT void starpu_fxt_autostart_profiling(int autostart STARPU_ATTRIBUTE_UNUSED) { } void starpu_fxt_start_profiling() { } void starpu_fxt_stop_profiling() { } #endif // STARPU_USE_FXT void starpu_fxt_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_USE_FXT _STARPU_TRACE_USER_EVENT(code); #endif } void starpu_fxt_trace_user_meta_string(const char *s STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_USE_FXT _STARPU_TRACE_META(s); #endif } void starpu_fxt_trace_user_event_string(const char *s STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_USE_FXT _STARPU_TRACE_EVENT(s); #endif } starpu-1.4.9+dfsg/src/common/fxt.h000066400000000000000000002132241507764646700170450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2018,2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __FXT_H__ #define __FXT_H__ /** @file */ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 /* ou _BSD_SOURCE ou _SVID_SOURCE */ #endif #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #ifdef STARPU_USE_FXT #include #include #endif #pragma GCC visibility push(hidden) /* some key to identify the worker kind */ #define _STARPU_FUT_WORKER_KEY(kind) (kind + 0x100) #define _STARPU_FUT_KEY_WORKER(key) (key - 0x100) #define _STARPU_FUT_WORKER_INIT_START 0x5100 #define _STARPU_FUT_WORKER_INIT_END 0x5101 #define _STARPU_FUT_START_CODELET_BODY 0x5102 #define _STARPU_FUT_END_CODELET_BODY 0x5103 #define _STARPU_FUT_JOB_PUSH 0x5104 #define _STARPU_FUT_JOB_POP 0x5105 #define _STARPU_FUT_UPDATE_TASK_CNT 0x5106 #define _STARPU_FUT_START_FETCH_INPUT_ON_TID 0x5107 #define _STARPU_FUT_END_FETCH_INPUT_ON_TID 0x5108 #define _STARPU_FUT_START_PUSH_OUTPUT_ON_TID 0x5109 #define _STARPU_FUT_END_PUSH_OUTPUT_ON_TID 0x5110 #define _STARPU_FUT_TAG 0x5111 #define _STARPU_FUT_TAG_DEPS 0x5112 #define _STARPU_FUT_TASK_DEPS 0x5113 #define _STARPU_FUT_DATA_COPY 0x5114 #define _STARPU_FUT_WORK_STEALING 0x5115 #define _STARPU_FUT_WORKER_DEINIT_START 0x5116 #define _STARPU_FUT_WORKER_DEINIT_END 0x5117 #define _STARPU_FUT_WORKER_SLEEP_START 0x5118 #define _STARPU_FUT_WORKER_SLEEP_END 0x5119 #define _STARPU_FUT_TASK_SUBMIT 0x511a #define _STARPU_FUT_CODELET_DATA_HANDLE 0x511b #define _STARPU_FUT_MODEL_NAME 0x511c #define _STARPU_FUT_DATA_NAME 0x511d #define _STARPU_FUT_DATA_COORDINATES 0x511e #define _STARPU_FUT_HANDLE_DATA_UNREGISTER 0x511f #define _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS 0x5120 #define _STARPU_FUT_NEW_MEM_NODE 0x5122 #define _STARPU_FUT_START_CALLBACK 0x5123 #define _STARPU_FUT_END_CALLBACK 0x5124 #define _STARPU_FUT_TASK_DONE 0x5125 #define _STARPU_FUT_TAG_DONE 0x5126 #define _STARPU_FUT_START_ALLOC 0x5127 #define _STARPU_FUT_END_ALLOC 0x5128 #define _STARPU_FUT_START_ALLOC_REUSE 0x5129 #define _STARPU_FUT_END_ALLOC_REUSE 0x5130 #define _STARPU_FUT_USED_MEM 0x512a #define _STARPU_FUT_TASK_NAME 0x512b #define _STARPU_FUT_DATA_WONT_USE 0x512c #define _STARPU_FUT_TASK_COLOR 0x512d #define _STARPU_FUT_DATA_DOING_WONT_USE 0x512e #define _STARPU_FUT_TASK_LINE 0x512f #define _STARPU_FUT_START_MEMRECLAIM 0x5131 #define _STARPU_FUT_END_MEMRECLAIM 0x5132 #define _STARPU_FUT_START_DRIVER_COPY 0x5133 #define _STARPU_FUT_END_DRIVER_COPY 0x5134 #define _STARPU_FUT_START_DRIVER_COPY_ASYNC 0x5135 #define _STARPU_FUT_END_DRIVER_COPY_ASYNC 0x5136 #define _STARPU_FUT_START_PROGRESS_ON_TID 0x5137 #define _STARPU_FUT_END_PROGRESS_ON_TID 0x5138 #define _STARPU_FUT_USER_EVENT 0x5139 #define _STARPU_FUT_SET_PROFILING 0x513a #define _STARPU_FUT_TASK_WAIT_FOR_ALL 0x513b #define _STARPU_FUT_EVENT 0x513c #define _STARPU_FUT_THREAD_EVENT 0x513d #define _STARPU_FUT_CODELET_DETAILS 0x513e #define _STARPU_FUT_CODELET_DATA 0x513f #define _STARPU_FUT_LOCKING_MUTEX 0x5140 #define _STARPU_FUT_MUTEX_LOCKED 0x5141 #define _STARPU_FUT_UNLOCKING_MUTEX 0x5142 #define _STARPU_FUT_MUTEX_UNLOCKED 0x5143 #define _STARPU_FUT_TRYLOCK_MUTEX 0x5144 #define _STARPU_FUT_RDLOCKING_RWLOCK 0x5145 #define _STARPU_FUT_RWLOCK_RDLOCKED 0x5146 #define _STARPU_FUT_WRLOCKING_RWLOCK 0x5147 #define _STARPU_FUT_RWLOCK_WRLOCKED 0x5148 #define _STARPU_FUT_UNLOCKING_RWLOCK 0x5149 #define _STARPU_FUT_RWLOCK_UNLOCKED 0x514a #define _STARPU_FUT_LOCKING_SPINLOCK 0x514b #define _STARPU_FUT_SPINLOCK_LOCKED 0x514c #define _STARPU_FUT_UNLOCKING_SPINLOCK 0x514d #define _STARPU_FUT_SPINLOCK_UNLOCKED 0x514e #define _STARPU_FUT_TRYLOCK_SPINLOCK 0x514f #define _STARPU_FUT_COND_WAIT_BEGIN 0x5150 #define _STARPU_FUT_COND_WAIT_END 0x5151 #define _STARPU_FUT_MEMORY_FULL 0x5152 #define _STARPU_FUT_DATA_LOAD 0x5153 #define _STARPU_FUT_START_UNPARTITION_ON_TID 0x5154 #define _STARPU_FUT_END_UNPARTITION_ON_TID 0x5155 #define _STARPU_FUT_START_FREE 0x5156 #define _STARPU_FUT_END_FREE 0x5157 #define _STARPU_FUT_START_WRITEBACK 0x5158 #define _STARPU_FUT_END_WRITEBACK 0x5159 #define _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO 0x515a #define _STARPU_FUT_SCHED_COMPONENT_POP_PRIO 0x515b #define _STARPU_FUT_START_WRITEBACK_ASYNC 0x515c #define _STARPU_FUT_END_WRITEBACK_ASYNC 0x515d #define _STARPU_FUT_HYPERVISOR_BEGIN 0x5160 #define _STARPU_FUT_HYPERVISOR_END 0x5161 #define _STARPU_FUT_BARRIER_WAIT_BEGIN 0x5162 #define _STARPU_FUT_BARRIER_WAIT_END 0x5163 #define _STARPU_FUT_WORKER_SCHEDULING_START 0x5164 #define _STARPU_FUT_WORKER_SCHEDULING_END 0x5165 #define _STARPU_FUT_WORKER_SCHEDULING_PUSH 0x5166 #define _STARPU_FUT_WORKER_SCHEDULING_POP 0x5167 #define _STARPU_FUT_START_EXECUTING 0x5168 #define _STARPU_FUT_END_EXECUTING 0x5169 #define _STARPU_FUT_SCHED_COMPONENT_NEW 0x516a #define _STARPU_FUT_SCHED_COMPONENT_CONNECT 0x516b #define _STARPU_FUT_SCHED_COMPONENT_PUSH 0x516c #define _STARPU_FUT_SCHED_COMPONENT_PULL 0x516d #define _STARPU_FUT_TASK_SUBMIT_START 0x516e #define _STARPU_FUT_TASK_SUBMIT_END 0x516f #define _STARPU_FUT_TASK_BUILD_START 0x5170 #define _STARPU_FUT_TASK_BUILD_END 0x5171 #define _STARPU_FUT_TASK_MPI_DECODE_START 0x5172 #define _STARPU_FUT_TASK_MPI_DECODE_END 0x5173 #define _STARPU_FUT_TASK_MPI_PRE_START 0x5174 #define _STARPU_FUT_TASK_MPI_PRE_END 0x5175 #define _STARPU_FUT_TASK_MPI_POST_START 0x5176 #define _STARPU_FUT_TASK_MPI_POST_END 0x5177 #define _STARPU_FUT_TASK_WAIT_START 0x5178 #define _STARPU_FUT_TASK_WAIT_END 0x5179 #define _STARPU_FUT_TASK_WAIT_FOR_ALL_START 0x517a #define _STARPU_FUT_TASK_WAIT_FOR_ALL_END 0x517b #define _STARPU_FUT_HANDLE_DATA_REGISTER 0x517c #define _STARPU_FUT_START_FETCH_INPUT 0x517e #define _STARPU_FUT_END_FETCH_INPUT 0x517f #define _STARPU_FUT_TASK_THROTTLE_START 0x5180 #define _STARPU_FUT_TASK_THROTTLE_END 0x5181 #define _STARPU_FUT_DATA_STATE_INVALID 0x5182 #define _STARPU_FUT_DATA_STATE_OWNER 0x5183 #define _STARPU_FUT_DATA_STATE_SHARED 0x5184 #define _STARPU_FUT_DATA_REQUEST_CREATED 0x5185 #define _STARPU_FUT_PAPI_TASK_EVENT_VALUE 0x5186 #define _STARPU_FUT_TASK_EXCLUDE_FROM_DAG 0x5187 #define _STARPU_FUT_TASK_END_DEP 0x5188 #ifdef STARPU_BUBBLE #define _STARPU_FUT_TASK_BUBBLE 0x5189 #endif #define _STARPU_FUT_START_PARALLEL_SYNC 0x518a #define _STARPU_FUT_END_PARALLEL_SYNC 0x518b /* Predefined FUT key masks */ #define _STARPU_FUT_KEYMASK_META FUT_KEYMASK0 #define _STARPU_FUT_KEYMASK_USER FUT_KEYMASK1 #define _STARPU_FUT_KEYMASK_TASK FUT_KEYMASK2 #define _STARPU_FUT_KEYMASK_TASK_VERBOSE FUT_KEYMASK3 #define _STARPU_FUT_KEYMASK_DATA FUT_KEYMASK4 #define _STARPU_FUT_KEYMASK_DATA_VERBOSE FUT_KEYMASK5 #define _STARPU_FUT_KEYMASK_WORKER FUT_KEYMASK6 #define _STARPU_FUT_KEYMASK_WORKER_VERBOSE FUT_KEYMASK7 #define _STARPU_FUT_KEYMASK_DSM FUT_KEYMASK8 #define _STARPU_FUT_KEYMASK_DSM_VERBOSE FUT_KEYMASK9 #define _STARPU_FUT_KEYMASK_SCHED FUT_KEYMASK10 #define _STARPU_FUT_KEYMASK_SCHED_VERBOSE FUT_KEYMASK11 #define _STARPU_FUT_KEYMASK_LOCK FUT_KEYMASK12 #define _STARPU_FUT_KEYMASK_LOCK_VERBOSE FUT_KEYMASK13 #define _STARPU_FUT_KEYMASK_EVENT FUT_KEYMASK14 #define _STARPU_FUT_KEYMASK_EVENT_VERBOSE FUT_KEYMASK15 #define _STARPU_FUT_KEYMASK_MPI FUT_KEYMASK16 #define _STARPU_FUT_KEYMASK_MPI_VERBOSE FUT_KEYMASK17 #define _STARPU_FUT_KEYMASK_HYP FUT_KEYMASK18 #define _STARPU_FUT_KEYMASK_HYP_VERBOSE FUT_KEYMASK19 #define _STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA FUT_KEYMASK20 #define _STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA FUT_KEYMASK21 /* When doing modifications to keymasks: * - also adapt _starpu_profile_get_user_keymask() in src/common/fxt.c * - adapt KEYMASKALL_DEFAULT in src/common/fxt.c * - adapt the documentation in 501_environment_variable.doxy and/or * 380_offline_performance_tools.doxy */ extern unsigned long _starpu_job_cnt; static inline unsigned long _starpu_fxt_get_job_id(void) { unsigned long ret = STARPU_ATOMIC_ADDL(&_starpu_job_cnt, 1); STARPU_ASSERT_MSG(ret != 0, "Oops, job_id wrapped! There are too many tasks for tracking them for profiling"); return ret; } #ifdef STARPU_USE_FXT /* Some versions of FxT do not include the declaration of the function */ #ifdef HAVE_ENABLE_FUT_FLUSH #if !HAVE_DECL_ENABLE_FUT_FLUSH void enable_fut_flush(); #endif #endif #ifdef HAVE_FUT_SET_FILENAME #if !HAVE_DECL_FUT_SET_FILENAME void fut_set_filename(char *filename); #endif #endif extern int _starpu_fxt_started STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; extern int _starpu_fxt_willstart STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; extern starpu_pthread_mutex_t _starpu_fxt_started_mutex STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; extern starpu_pthread_cond_t _starpu_fxt_started_cond STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** Wait until FXT is started (or not). Returns if FXT was started */ static inline int _starpu_fxt_wait_initialisation() { STARPU_PTHREAD_MUTEX_LOCK(&_starpu_fxt_started_mutex); while (_starpu_fxt_willstart && !_starpu_fxt_started) STARPU_PTHREAD_COND_WAIT(&_starpu_fxt_started_cond, &_starpu_fxt_started_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_fxt_started_mutex); return _starpu_fxt_started; } extern unsigned long _starpu_submit_order; static inline unsigned long _starpu_fxt_get_submit_order(void) { unsigned long ret = STARPU_ATOMIC_ADDL(&_starpu_submit_order, 1); STARPU_ASSERT_MSG(_starpu_submit_order != 0, "Oops, submit_order wrapped! There are too many tasks for tracking them for profiling"); return ret; } long _starpu_gettid(void) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_generate_paje_trace_read_option(const char *option, struct starpu_fxt_options *options) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** Initialize the FxT library. */ void _starpu_fxt_init_profiling(uint64_t trace_buffer_size); /** Stop the FxT library, and generate the trace file. */ void _starpu_stop_fxt_profiling(void); /** In case we use MPI, tell the profiling system how many processes are used. */ void _starpu_profiling_set_mpi_worldsize(int worldsize) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** Generate the trace file. Used when catching signals SIGINT and SIGSEGV */ void _starpu_fxt_dump_file(void); #ifdef FUT_NEEDS_COMMIT #define _STARPU_FUT_COMMIT(size) fut_commitstampedbuffer(size) #else #define _STARPU_FUT_COMMIT(size) do { } while (0) #endif #ifdef FUT_RAW_ALWAYS_PROBE1STR #define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) FUT_RAW_ALWAYS_PROBE1STR(CODE, P1, str) #else #define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) \ do { \ if(STARPU_UNLIKELY(fut_active)) { \ /* No more than FXT_MAX_PARAMS args are allowed */ \ /* we add a \0 just in case ... */ \ size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 1)*sizeof(unsigned long));\ unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ unsigned nbargs = 1 + nbargs_str; \ size_t total_len = FUT_SIZE(nbargs); \ unsigned long *futargs = \ fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ *(futargs++) = (unsigned long)(P1); \ snprintf((char *)futargs, len, "%s", str); \ ((char *)futargs)[len - 1] = '\0'; \ _STARPU_FUT_COMMIT(total_len); \ }} while (0) #endif #ifdef FUT_FULL_PROBE1STR #define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str) FUT_FULL_PROBE1STR(CODE, P1, str) #else /** Sometimes we need something a little more specific than the wrappers from * FxT: these macro permit to put add an event with 3 (or 4) numbers followed * by a string. */ #define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str) \ do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str); \ } \ } while (0) #endif #ifdef FUT_ALWAYS_PROBE2STR #define _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str) FUT_RAW_ALWAYS_PROBE2STR(CODE, P1, P2, str) #else #define _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str) \ do { \ /* No more than FXT_MAX_PARAMS args are allowed */ \ /* we add a \0 just in case ... */ \ size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 2)*sizeof(unsigned long));\ unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ unsigned nbargs = 2 + nbargs_str; \ size_t total_len = FUT_SIZE(nbargs); \ unsigned long *futargs = \ fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ *(futargs++) = (unsigned long)(P1); \ *(futargs++) = (unsigned long)(P2); \ snprintf((char *)futargs, len, "%s", str); \ ((char *)futargs)[len - 1] = '\0'; \ _STARPU_FUT_COMMIT(total_len); \ } while (0) #endif #ifdef FUT_FULL_PROBE2STR #define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str) FUT_FULL_PROBE2STR(CODE, P1, P2, str) #else #define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str) \ do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str); \ } \ } while (0) #endif #ifdef FUT_ALWAYS_PROBE3STR #define _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) FUT_RAW_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) #else #define _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) \ do { \ /* No more than FXT_MAX_PARAMS args are allowed */ \ /* we add a \0 just in case ... */ \ size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 3)*sizeof(unsigned long));\ unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ unsigned nbargs = 3 + nbargs_str; \ size_t total_len = FUT_SIZE(nbargs); \ unsigned long *futargs = \ fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ *(futargs++) = (unsigned long)(P1); \ *(futargs++) = (unsigned long)(P2); \ *(futargs++) = (unsigned long)(P3); \ snprintf((char *)futargs, len, "%s", str); \ ((char *)futargs)[len - 1] = '\0'; \ _STARPU_FUT_COMMIT(total_len); \ } while (0) #endif #ifdef FUT_FULL_PROBE3STR #define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str) FUT_FULL_PROBE3STR(CODE, P1, P2, P3, str) #else #define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str) \ do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str); \ } \ } while (0) #endif #ifdef FUT_ALWAYS_PROBE4STR #define _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) FUT_RAW_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) #else #define _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) \ do { \ /* No more than FXT_MAX_PARAMS args are allowed */ \ /* we add a \0 just in case ... */ \ size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 4)*sizeof(unsigned long));\ unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ unsigned nbargs = 4 + nbargs_str; \ size_t total_len = FUT_SIZE(nbargs); \ unsigned long *futargs = \ fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ *(futargs++) = (unsigned long)(P1); \ *(futargs++) = (unsigned long)(P2); \ *(futargs++) = (unsigned long)(P3); \ *(futargs++) = (unsigned long)(P4); \ snprintf((char *)futargs, len, "%s", str); \ ((char *)futargs)[len - 1] = '\0'; \ _STARPU_FUT_COMMIT(total_len); \ } while (0) #endif #ifdef FUT_FULL_PROBE4STR #define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str) FUT_FULL_PROBE4STR(CODE, P1, P2, P3, P4, str) #else #define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str) \ do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str); \ } \ } while (0) #endif #ifdef FUT_ALWAYS_PROBE5STR #define _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) FUT_RAW_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) #else #define _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) \ do { \ /* No more than FXT_MAX_PARAMS args are allowed */ \ /* we add a \0 just in case ... */ \ size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 5)*sizeof(unsigned long));\ unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ unsigned nbargs = 5 + nbargs_str; \ size_t total_len = FUT_SIZE(nbargs); \ unsigned long *futargs = \ fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ *(futargs++) = (unsigned long)(P1); \ *(futargs++) = (unsigned long)(P2); \ *(futargs++) = (unsigned long)(P3); \ *(futargs++) = (unsigned long)(P4); \ *(futargs++) = (unsigned long)(P5); \ snprintf((char *)futargs, len, "%s", str); \ ((char *)futargs)[len - 1] = '\0'; \ _STARPU_FUT_COMMIT(total_len); \ } while (0) #endif #ifdef FUT_FULL_PROBE5STR #define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str) FUT_FULL_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) #else #define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str) \ do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str); \ } \ } while (0) #endif #ifdef FUT_ALWAYS_PROBE6STR #define _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) FUT_RAW_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) #else #define _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) \ do { \ /* No more than FXT_MAX_PARAMS args are allowed */ \ /* we add a \0 just in case ... */ \ size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 6)*sizeof(unsigned long));\ unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ unsigned nbargs = 6 + nbargs_str; \ size_t total_len = FUT_SIZE(nbargs); \ unsigned long *futargs = \ fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ *(futargs++) = (unsigned long)(P1); \ *(futargs++) = (unsigned long)(P2); \ *(futargs++) = (unsigned long)(P3); \ *(futargs++) = (unsigned long)(P4); \ *(futargs++) = (unsigned long)(P5); \ *(futargs++) = (unsigned long)(P6); \ snprintf((char *)futargs, len, "%s", str); \ ((char *)futargs)[len - 1] = '\0'; \ _STARPU_FUT_COMMIT(total_len); \ } while (0) #endif #ifdef FUT_FULL_PROBE6STR #define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str) FUT_FULL_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) #else #define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str) \ do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str); \ } \ } while (0) #endif #ifdef FUT_ALWAYS_PROBE7STR #define _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_RAW_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) #else #define _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) \ do { \ /* No more than FXT_MAX_PARAMS args are allowed */ \ /* we add a \0 just in case ... */ \ size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 7)*sizeof(unsigned long));\ unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ unsigned nbargs = 7 + nbargs_str; \ size_t total_len = FUT_SIZE(nbargs); \ unsigned long *futargs = \ fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ *(futargs++) = (unsigned long)(P1); \ *(futargs++) = (unsigned long)(P2); \ *(futargs++) = (unsigned long)(P3); \ *(futargs++) = (unsigned long)(P4); \ *(futargs++) = (unsigned long)(P5); \ *(futargs++) = (unsigned long)(P6); \ *(futargs++) = (unsigned long)(P7); \ snprintf((char *)futargs, len, "%s", str); \ ((char *)futargs)[len - 1] = '\0'; \ _STARPU_FUT_COMMIT(total_len); \ } while (0) #endif #ifdef FUT_FULL_PROBE7STR #define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_FULL_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) #else #define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str) \ do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str); \ } \ } while (0) #endif #ifndef FUT_RAW_PROBE7 #define FUT_RAW_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ if(STARPU_UNLIKELY(fut_active)) { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(7)); \ *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7); \ _STARPU_FUT_COMMIT(FUT_SIZE(7)); \ } \ } while (0) #endif #ifndef FUT_RAW_ALWAYS_PROBE1 #define FUT_RAW_ALWAYS_PROBE1(CODE,P1) do { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(1)); \ *(__args++)=(unsigned long)(P1); \ fut_commitstampedbuffer(FUT_SIZE(1)); \ } while (0) #endif #define FUT_DO_ALWAYS_PROBE1(CODE,P1) do { \ FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \ } while (0) #ifndef FUT_RAW_ALWAYS_PROBE2 #define FUT_RAW_ALWAYS_PROBE2(CODE,P1,P2) do { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(2)); \ *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2); \ fut_commitstampedbuffer(FUT_SIZE(2)); \ } while (0) #endif #define FUT_DO_ALWAYS_PROBE2(CODE,P1,P2) do { \ FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \ } while (0) #ifndef FUT_RAW_ALWAYS_PROBE3 #define FUT_RAW_ALWAYS_PROBE3(CODE,P1,P2,P3) do { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(3)); \ *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3); \ fut_commitstampedbuffer(FUT_SIZE(3)); \ } while (0) #endif #define FUT_DO_ALWAYS_PROBE3(CODE,P1,P2,P3) do { \ FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \ } while (0) #ifndef FUT_RAW_ALWAYS_PROBE4 #define FUT_RAW_ALWAYS_PROBE4(CODE,P1,P2,P3,P4) do { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(4)); \ *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4); \ fut_commitstampedbuffer(FUT_SIZE(4)); \ } while (0) #endif #define FUT_DO_ALWAYS_PROBE4(CODE,P1,P2,P3,P4) do { \ FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \ } while (0) #ifndef FUT_RAW_ALWAYS_PROBE5 #define FUT_RAW_ALWAYS_PROBE5(CODE,P1,P2,P3,P4,P5) do { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(5)); \ *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5); \ fut_commitstampedbuffer(FUT_SIZE(5)); \ } while (0) #endif #define FUT_DO_ALWAYS_PROBE5(CODE,P1,P2,P3,P4,P5) do { \ FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \ } while (0) #ifndef FUT_RAW_ALWAYS_PROBE6 #define FUT_RAW_ALWAYS_PROBE6(CODE,P1,P2,P3,P4,P5,P6) do { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(6)); \ *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6); \ fut_commitstampedbuffer(FUT_SIZE(6)); \ } while (0) #endif #define FUT_DO_ALWAYS_PROBE6(CODE,P1,P2,P3,P4,P5,P6) do { \ FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \ } while (0) #ifndef FUT_RAW_ALWAYS_PROBE7 #define FUT_RAW_ALWAYS_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(7)); \ *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7); \ fut_commitstampedbuffer(FUT_SIZE(7)); \ } while (0) #endif #define FUT_DO_ALWAYS_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \ } while (0) #ifndef FUT_RAW_ALWAYS_PROBE8 #define FUT_RAW_ALWAYS_PROBE8(CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(8)); \ *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7);*(__args++)=(unsigned long)(P8); \ fut_commitstampedbuffer(FUT_SIZE(8)); \ } while (0) #endif #define FUT_DO_ALWAYS_PROBE8(CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ FUT_RAW_ALWAYS_PROBE8(FUT_CODE(CODE, 8),P1,P2,P3,P4,P5,P6,P7,P8); \ } while (0) #ifndef FUT_RAW_ALWAYS_PROBE9 #define FUT_RAW_ALWAYS_PROBE9(CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ unsigned long *__args __attribute__((unused))= \ fut_getstampedbuffer(CODE, \ FUT_SIZE(9)); \ *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7);*(__args++)=(unsigned long)(P8);*(__args++)=(unsigned long)(P9); \ fut_commitstampedbuffer(FUT_SIZE(9)); \ } while (0) #endif #define FUT_DO_ALWAYS_PROBE9(CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ FUT_RAW_ALWAYS_PROBE9(FUT_CODE(CODE, 9),P1,P2,P3,P4,P5,P6,P7,P8,P9); \ } while (0) /* full probes */ #ifndef FUT_FULL_PROBE0 #define FUT_FULL_PROBE0(KEYMASK,CODE) do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ FUT_RAW_ALWAYS_PROBE0(FUT_CODE(CODE, 0)); \ } \ } while(0) #endif #ifndef FUT_FULL_PROBE1 #define FUT_FULL_PROBE1(KEYMASK,CODE,P1) do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \ } \ } while(0) #endif #ifndef FUT_FULL_PROBE2 #define FUT_FULL_PROBE2(KEYMASK,CODE,P1,P2) do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \ } \ } while(0) #endif #ifndef FUT_FULL_PROBE3 #define FUT_FULL_PROBE3(KEYMASK,CODE,P1,P2,P3) do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \ } \ } while(0) #endif #ifndef FUT_FULL_PROBE4 #define FUT_FULL_PROBE4(KEYMASK,CODE,P1,P2,P3,P4) do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \ } \ } while(0) #endif #ifndef FUT_FULL_PROBE5 #define FUT_FULL_PROBE5(KEYMASK,CODE,P1,P2,P3,P4,P5) do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \ } \ } while(0) #endif #ifndef FUT_FULL_PROBE6 #define FUT_FULL_PROBE6(KEYMASK,CODE,P1,P2,P3,P4,P5,P6) do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \ } \ } while(0) #endif #ifndef FUT_FULL_PROBE7 #define FUT_FULL_PROBE7(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7) do { \ if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \ } \ } while(0) #endif #ifndef FUT_FULL_PROBE8 #define FUT_FULL_PROBE8(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ if(KEYMASK & fut_active) { \ FUT_RAW_ALWAYS_PROBE8(FUT_CODE(CODE, 8),P1,P2,P3,P4,P5,P6,P7,P8); \ } \ } while(0) #endif #ifndef FUT_FULL_PROBE9 #define FUT_FULL_PROBE9(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ if(KEYMASK & fut_active) { \ FUT_RAW_ALWAYS_PROBE9(FUT_CODE(CODE, 9),P1,P2,P3,P4,P5,P6,P7,P8,P9); \ } \ } while(0) #endif #define _STARPU_TRACE_NEW_MEM_NODE(nodeid) do {\ if (_starpu_fxt_started) \ FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, _starpu_gettid()); \ } while (0) #define _STARPU_TRACE_REGISTER_THREAD(cpuid) do {\ if (_starpu_fxt_started) \ FUT_DO_ALWAYS_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid()); \ } while (0) #define _STARPU_TRACE_WORKER_INIT_START(workerkind, workerid, devid, memnode, bindid, sync) do {\ if (_starpu_fxt_started) \ FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, _STARPU_FUT_WORKER_KEY(workerkind), workerid, devid, memnode, bindid, sync, _starpu_gettid()); \ } while (0) #define _STARPU_TRACE_WORKER_INIT_END(__workerid) do {\ if (_starpu_fxt_started) \ FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid(), (__workerid)); \ } while (0) #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) \ do { \ if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK|_STARPU_FUT_KEYMASK_TASK_VERBOSE|_STARPU_FUT_KEYMASK_DATA|_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA) & fut_active)) { \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid)); \ { \ if (rank == 0 && (job)->task->cl) \ { \ const int __nbuffers = STARPU_TASK_GET_NBUFFERS((job)->task); \ char __buf[FXT_MAX_PARAMS*sizeof(long)]; \ int __i; \ for (__i = 0; __i < __nbuffers; __i++) \ { \ starpu_data_handle_t __handle = STARPU_TASK_GET_HANDLE((job)->task, __i); \ void *__interface = _STARPU_TASK_GET_INTERFACES((job)->task)[__i]; \ if (__handle->ops->describe) \ { \ __handle->ops->describe(__interface, __buf, sizeof(__buf)); \ _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_CODELET_DATA, workerid, __buf); \ } \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_CODELET_DATA_HANDLE, (job)->job_id, (__handle), _starpu_data_get_size(__handle), STARPU_TASK_GET_MODE((job)->task, __i)); \ /* Regarding the memory location: * - if the data interface doesn't provide to_pointer operation, NULL will be returned * and the location will be -1, which is fine; * - we have to check whether the memory is on an actual NUMA node (and not on GPU * memory, for instance); * - looking at memory location before executing the task isn't the best choice: * the page can be not allocated yet. A solution would be to get the memory * location at the end of the task, but there is no FxT probe where we iterate over * handles, after task execution. * */ \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA, _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS, (job)->job_id, (__i), starpu_worker_get_memory_node_kind(starpu_worker_get_type(workerid)) == STARPU_CPU_RAM && starpu_task_get_current_data_node(__i) >= 0 ? starpu_get_memory_location_bitmap(starpu_data_handle_to_pointer(__handle, (unsigned) starpu_task_get_current_data_node(__i)), starpu_data_get_size(__handle)) : -1); \ } \ } \ const size_t __job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \ const uint32_t __job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\ FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id)); \ } \ } \ } while(0) #define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) \ do { \ if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \ const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job));\ char _archname[32]=""; \ if (perf_arch) starpu_perfmodel_get_arch_name(perf_arch, _archname, 32, 0); \ _STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), _archname); \ } \ } while(0) #define _STARPU_TRACE_START_EXECUTING(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_EXECUTING, _starpu_gettid(), (job)->job_id); #define _STARPU_TRACE_END_EXECUTING(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_EXECUTING, _starpu_gettid(), (job)->job_id); #define _STARPU_TRACE_START_PARALLEL_SYNC(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_PARALLEL_SYNC, _starpu_gettid(), (job)->job_id); #define _STARPU_TRACE_END_PARALLEL_SYNC(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_PARALLEL_SYNC, _starpu_gettid(), (job)->job_id); #define _STARPU_TRACE_START_CALLBACK(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_CALLBACK, job, _starpu_gettid()); #define _STARPU_TRACE_END_CALLBACK(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_CALLBACK, job, _starpu_gettid()); #define _STARPU_TRACE_JOB_PUSH(task, prio) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_PUSH, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid()); #define _STARPU_TRACE_JOB_POP(task, prio) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_POP, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid()); #define _STARPU_TRACE_UPDATE_TASK_CNT(counter) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_UPDATE_TASK_CNT, counter, _starpu_gettid()) #define _STARPU_TRACE_START_FETCH_INPUT(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_FETCH_INPUT_ON_TID, job, _starpu_gettid()); #define _STARPU_TRACE_END_FETCH_INPUT(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_FETCH_INPUT_ON_TID, job, _starpu_gettid()); #define _STARPU_TRACE_START_PUSH_OUTPUT(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_PUSH_OUTPUT_ON_TID, job, _starpu_gettid()); #define _STARPU_TRACE_END_PUSH_OUTPUT(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_PUSH_OUTPUT_ON_TID, job, _starpu_gettid()); #define _STARPU_TRACE_WORKER_END_FETCH_INPUT(job, id) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_FETCH_INPUT, job, id); #define _STARPU_TRACE_WORKER_START_FETCH_INPUT(job, id) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_FETCH_INPUT, job, id); #define _STARPU_TRACE_TAG(tag, job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG, tag, (job)->job_id) #define _STARPU_TRACE_TAG_DEPS(tag_child, tag_father) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DEPS, tag_child, tag_father) #define _STARPU_TRACE_TASK_DEPS(job_prev, job_succ) \ _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DEPS, (job_prev)->job_id, (job_succ)->job_id, (job_succ)->task->type, 1, "task") #define _STARPU_TRACE_TASK_END_DEP(job_prev, job_succ) \ FUT_DO_PROBE2(_STARPU_FUT_TASK_END_DEP, (job_prev)->job_id, (job_succ)->job_id) #define _STARPU_TRACE_GHOST_TASK_DEPS(ghost_prev_id, job_succ) \ _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_DEPS, (ghost_prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "ghost") #ifdef STARPU_BUBBLE #define _STARPU_TRACE_BUBBLE_TASK_DEPS(prev_id, job_succ) \ _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_DEPS, (prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "bubble") #endif #define _STARPU_TRACE_TASK_EXCLUDE_FROM_DAG(job) \ do { \ unsigned exclude_from_dag = (job)->exclude_from_dag; \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_EXCLUDE_FROM_DAG, (job)->job_id, (long unsigned)exclude_from_dag); \ } while(0) #define _STARPU_TRACE_TASK_NAME_LINE_COLOR(job) \ do { \ _STARPU_TRACE_TASK_COLOR(job); \ _STARPU_TRACE_TASK_NAME(job); \ _STARPU_TRACE_TASK_LINE(job); \ } while(0) #define _STARPU_TRACE_TASK_LINE(job) \ do { \ if ((job)->task->file) \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_LINE, (job)->job_id, (job)->task->line, (job)->task->file); \ } while(0) #ifdef STARPU_BUBBLE #define _STARPU_TRACE_BUBBLE(job) \ do { \ if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ unsigned int is_bubble=(job)->is_bubble; \ unsigned long bubble_parent=(job)->task->bubble_parent; \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_BUBBLE, (job)->job_id, is_bubble, bubble_parent); \ } \ } while(0) #endif #define _STARPU_TRACE_TASK_NAME(job) \ do { \ if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ const char *model_name = _starpu_job_get_model_name((job)); \ const char *name = _starpu_job_get_task_name((job)); \ if (name) \ { \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), name); \ } \ else { \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), "unknown");\ } \ if (model_name) \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, _starpu_gettid(), model_name); \ } \ } while(0) #define _STARPU_TRACE_TASK_COLOR(job) \ do { \ if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ if ((job)->task->color != 0) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color); \ else if ((job)->task->cl && (job)->task->cl->color != 0) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color); \ } \ } while(0) #define _STARPU_TRACE_TASK_DONE(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid()) #define _STARPU_TRACE_TAG_DONE(tag) \ do { \ if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ struct _starpu_job *job = (tag)->job; \ const char *model_name = _starpu_job_get_task_name((job)); \ if (model_name) \ { \ _STARPU_FUT_FULL_PROBE3STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 1, model_name); \ } \ else { \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\ } \ } \ } while(0) #define _STARPU_TRACE_DATA_NAME(handle, name) \ _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_NAME, handle, name) #define _STARPU_TRACE_DATA_COORDINATES(handle, dim, v) do {\ switch (dim) { \ case 1: FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0]); break; \ case 2: FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1]); break; \ case 3: FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2]); break; \ case 4: FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3]); break; \ default: FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3], v[4]); break; \ } \ } while (0) #define _STARPU_TRACE_DATA_COPY(src_node, dst_node, size) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_COPY, src_node, dst_node, size) #define _STARPU_TRACE_DATA_WONT_USE(handle) \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_DATA_WONT_USE, handle, _starpu_fxt_get_submit_order(), _starpu_fxt_get_job_id(), _starpu_gettid()) #define _STARPU_TRACE_DATA_DOING_WONT_USE(handle) \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_DOING_WONT_USE, handle) #define _STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch, handle) \ FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch, handle) #define _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch) \ FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch) #define _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY_ASYNC, src_node, dst_node) #define _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY_ASYNC, src_node, dst_node) #define _STARPU_TRACE_WORK_STEALING(empty_q, victim_q) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_SCHED_VERBOSE, _STARPU_FUT_WORK_STEALING, empty_q, victim_q) #define _STARPU_TRACE_WORKER_DEINIT_START do {\ if (_starpu_fxt_started) \ FUT_DO_ALWAYS_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, _starpu_gettid()); \ } while(0) #define _STARPU_TRACE_WORKER_DEINIT_END(workerkind) do {\ if (_starpu_fxt_started) \ FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, _STARPU_FUT_WORKER_KEY(workerkind), _starpu_gettid()); \ } while(0) #define _STARPU_TRACE_WORKER_SCHEDULING_START \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_START, _starpu_gettid()); #define _STARPU_TRACE_WORKER_SCHEDULING_END \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_END, _starpu_gettid()); #define _STARPU_TRACE_WORKER_SCHEDULING_PUSH \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_PUSH, _starpu_gettid()); #define _STARPU_TRACE_WORKER_SCHEDULING_POP \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_POP, _starpu_gettid()); #define _STARPU_TRACE_WORKER_SLEEP_START \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_START, _starpu_gettid()); #define _STARPU_TRACE_WORKER_SLEEP_END \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid()); #define _STARPU_TRACE_TASK_SUBMIT(job, iter, subiter) \ FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_SUBMIT, (job)->job_id, iter, subiter, (job)->task->no_submitorder?0:_starpu_fxt_get_submit_order(), (job)->task->priority, (job)->task->type, _starpu_gettid()); #define _STARPU_TRACE_TASK_SUBMIT_START() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_START, _starpu_gettid()); #define _STARPU_TRACE_TASK_SUBMIT_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_END, _starpu_gettid()); #define _STARPU_TRACE_TASK_THROTTLE_START() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_START, _starpu_gettid()); #define _STARPU_TRACE_TASK_THROTTLE_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_END, _starpu_gettid()); #define _STARPU_TRACE_TASK_BUILD_START() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_START, _starpu_gettid()); #define _STARPU_TRACE_TASK_BUILD_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_END, _starpu_gettid()); #define _STARPU_TRACE_TASK_MPI_DECODE_START() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_START, _starpu_gettid()); #define _STARPU_TRACE_TASK_MPI_DECODE_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_END, _starpu_gettid()); #define _STARPU_TRACE_TASK_MPI_PRE_START() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_START, _starpu_gettid()); #define _STARPU_TRACE_TASK_MPI_PRE_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_END, _starpu_gettid()); #define _STARPU_TRACE_TASK_MPI_POST_START() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_START, _starpu_gettid()); #define _STARPU_TRACE_TASK_MPI_POST_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_END, _starpu_gettid()); #define _STARPU_TRACE_TASK_WAIT_START(job) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_START, (job)->job_id, _starpu_gettid()); #define _STARPU_TRACE_TASK_WAIT_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_END, _starpu_gettid()); #define _STARPU_TRACE_TASK_WAIT_FOR_ALL_START() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_START, _starpu_gettid()); #define _STARPU_TRACE_TASK_WAIT_FOR_ALL_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_END, _starpu_gettid()); #define _STARPU_TRACE_START_ALLOC(memnode, size, handle, is_prefetch) \ FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_ALLOC, memnode, _starpu_gettid(), size, handle, is_prefetch); #define _STARPU_TRACE_END_ALLOC(memnode, handle, r) \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_ALLOC, memnode, _starpu_gettid(), handle, r); #define _STARPU_TRACE_START_ALLOC_REUSE(memnode, size, handle, is_prefetch) \ FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_ALLOC_REUSE, memnode, _starpu_gettid(), size, handle, is_prefetch); #define _STARPU_TRACE_END_ALLOC_REUSE(memnode, handle, r) \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid(), handle, r); #define _STARPU_TRACE_START_FREE(memnode, size, handle) \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_FREE, memnode, _starpu_gettid(), size, handle); #define _STARPU_TRACE_END_FREE(memnode, handle) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_FREE, memnode, _starpu_gettid(), handle); #define _STARPU_TRACE_START_WRITEBACK(memnode, handle) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK, memnode, _starpu_gettid(), handle); #define _STARPU_TRACE_END_WRITEBACK(memnode, handle) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK, memnode, _starpu_gettid(), handle); #define _STARPU_TRACE_USED_MEM(memnode,used) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_USED_MEM, memnode, used, _starpu_gettid()); #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid()); #define _STARPU_TRACE_END_MEMRECLAIM(memnode, is_prefetch) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid()); #define _STARPU_TRACE_START_WRITEBACK_ASYNC(memnode) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK_ASYNC, memnode, _starpu_gettid()); #define _STARPU_TRACE_END_WRITEBACK_ASYNC(memnode) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK_ASYNC, memnode, _starpu_gettid()); #define _STARPU_TRACE_PAPI_TASK_EVENT(event_id, task, value) \ FUT_DO_PROBE3(_STARPU_FUT_PAPI_TASK_EVENT_VALUE, event_id, _starpu_get_job_associated_to_task(task)->job_id, value) /* We skip these events because they are called so often that they cause FxT to * fail and make the overall trace unreadable anyway. */ #define _STARPU_TRACE_START_PROGRESS(memnode) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_PROGRESS_ON_TID, memnode, _starpu_gettid()); #define _STARPU_TRACE_END_PROGRESS(memnode) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_PROGRESS_ON_TID, memnode, _starpu_gettid()); #define _STARPU_TRACE_USER_EVENT(code) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_USER, _STARPU_FUT_USER_EVENT, code, _starpu_gettid()); #define _STARPU_TRACE_META(S) \ FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_EVENT,S) #define _STARPU_TRACE_SET_PROFILING(status) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_SET_PROFILING, status, _starpu_gettid()); #define _STARPU_TRACE_TASK_WAIT_FOR_ALL \ FUT_FULL_PROBE0(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_WAIT_FOR_ALL) #define _STARPU_TRACE_EVENT_ALWAYS(S) do {\ if (_starpu_fxt_started) \ FUT_DO_ALWAYS_PROBESTR(_STARPU_FUT_EVENT,S) \ } while(0) #define _STARPU_TRACE_EVENT(S) \ FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT, _STARPU_FUT_EVENT,S) #define _STARPU_TRACE_EVENT_VERBOSE(S) \ FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT_VERBOSE, _STARPU_FUT_EVENT,S) #define _STARPU_TRACE_THREAD_EVENT(S) \ _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_THREAD_EVENT, _starpu_gettid(), S) #define _STARPU_TRACE_HYPERVISOR_BEGIN() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_BEGIN, _starpu_gettid()); #define _STARPU_TRACE_HYPERVISOR_END() \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_END, _starpu_gettid()); #ifdef STARPU_FXT_LOCK_TRACES #define _STARPU_TRACE_LOCKING_MUTEX() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_LOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \ } while (0) #define _STARPU_TRACE_MUTEX_LOCKED() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_LOCKED,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_UNLOCKING_MUTEX() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_MUTEX_UNLOCKED() do {\ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_UNLOCKED,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_TRYLOCK_MUTEX() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_TRYLOCK_MUTEX,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_RDLOCKING_RWLOCK() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_RDLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_RWLOCK_RDLOCKED() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_RDLOCKED,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_WRLOCKING_RWLOCK() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_WRLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_RWLOCK_WRLOCKED() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_WRLOCKED,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_UNLOCKING_RWLOCK() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_RWLOCK_UNLOCKED() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_UNLOCKED,__LINE__,_starpu_gettid(),file); \ } while(0) #define STARPU_TRACE_SPINLOCK_CONDITITION (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER) #define _STARPU_TRACE_LOCKING_SPINLOCK(file, line) do {\ if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ const char *xfile; \ xfile = strrchr(file,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_LOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \ } \ } while(0) #define _STARPU_TRACE_SPINLOCK_LOCKED(file, line) do { \ if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ const char *xfile; \ xfile = strrchr(file,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_LOCKED,line,_starpu_gettid(),xfile); \ } \ } while(0) #define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line) do { \ if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ const char *xfile; \ xfile = strrchr(file,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_UNLOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \ } \ } while(0) #define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line) do { \ if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ const char *xfile; \ xfile = strrchr(file,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_UNLOCKED,line,_starpu_gettid(),xfile); \ } \ } while(0) #define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line) do { \ if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ const char *xfile; \ xfile = strrchr(file,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_TRYLOCK_SPINLOCK,line,_starpu_gettid(),xfile); \ } \ } while(0) #define _STARPU_TRACE_COND_WAIT_BEGIN() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_COND_WAIT_END() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_END,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_BARRIER_WAIT_BEGIN() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \ } while(0) #define _STARPU_TRACE_BARRIER_WAIT_END() do { \ const char *file; \ file = strrchr(__FILE__,'/') + 1; \ _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_END,__LINE__,_starpu_gettid(),file); \ } while(0) #else // !STARPU_FXT_LOCK_TRACES #define _STARPU_TRACE_LOCKING_MUTEX() do {} while(0) #define _STARPU_TRACE_MUTEX_LOCKED() do {} while(0) #define _STARPU_TRACE_UNLOCKING_MUTEX() do {} while(0) #define _STARPU_TRACE_MUTEX_UNLOCKED() do {} while(0) #define _STARPU_TRACE_TRYLOCK_MUTEX() do {} while(0) #define _STARPU_TRACE_RDLOCKING_RWLOCK() do {} while(0) #define _STARPU_TRACE_RWLOCK_RDLOCKED() do {} while(0) #define _STARPU_TRACE_WRLOCKING_RWLOCK() do {} while(0) #define _STARPU_TRACE_RWLOCK_WRLOCKED() do {} while(0) #define _STARPU_TRACE_UNLOCKING_RWLOCK() do {} while(0) #define _STARPU_TRACE_RWLOCK_UNLOCKED() do {} while(0) #define _STARPU_TRACE_LOCKING_SPINLOCK(file, line) do {(void) file; (void)line;} while(0) #define _STARPU_TRACE_SPINLOCK_LOCKED(file, line) do {(void) file; (void)line;} while(0) #define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line) do {(void) file; (void)line;} while(0) #define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line) do {(void) file; (void)line;} while(0) #define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line) do {(void) file; (void)line;} while(0) #define _STARPU_TRACE_COND_WAIT_BEGIN() do {} while(0) #define _STARPU_TRACE_COND_WAIT_END() do {} while(0) #define _STARPU_TRACE_BARRIER_WAIT_BEGIN() do {} while(0) #define _STARPU_TRACE_BARRIER_WAIT_END() do {} while(0) #endif // STARPU_FXT_LOCK_TRACES #define _STARPU_TRACE_MEMORY_FULL(size) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_MEMORY_FULL,size,_starpu_gettid()); #define _STARPU_TRACE_DATA_LOAD(workerid,size) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_LOAD, workerid, size); #define _STARPU_TRACE_START_UNPARTITION(handle, memnode) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle); #define _STARPU_TRACE_END_UNPARTITION(handle, memnode) \ FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle); #define _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len) \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO, _starpu_gettid(), workerid, ntasks, exp_len); #define _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len) \ FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_POP_PRIO, _starpu_gettid(), workerid, ntasks, exp_len); #define _STARPU_TRACE_SCHED_COMPONENT_NEW(component) \ if (STARPU_UNLIKELY(fut_active)) _STARPU_FUT_ALWAYS_PROBE1STR(_STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name); #define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child) \ if (STARPU_UNLIKELY(fut_active)) FUT_RAW_ALWAYS_PROBE2(FUT_CODE(_STARPU_FUT_SCHED_COMPONENT_CONNECT,2), parent, child); #define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, prio) \ FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH, _starpu_gettid(), from, to, task, prio); #define _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task) \ FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PULL, _starpu_gettid(), from, to, task, (task)->priority); #define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle) do { \ if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_META) & fut_active)) { \ const size_t __data_size = handle->ops->get_size(handle); \ const starpu_ssize_t __max_data_size = _starpu_data_get_max_size(handle); \ char __buf[(FXT_MAX_PARAMS-4)*sizeof(long)]; \ void *__interface = handle->per_node[0].data_interface; \ if (handle->ops->describe) \ handle->ops->describe(__interface, __buf, sizeof(__buf)); \ else \ __buf[0] = 0; \ _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_HANDLE_DATA_REGISTER, handle, __data_size, __max_data_size, handle->home_node, __buf); \ } \ } while (0) #define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle) \ FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_HANDLE_DATA_UNREGISTER, handle) //Coherency Data Traces #define _STARPU_TRACE_DATA_STATE_INVALID(handle, node) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_INVALID, handle, node) #define _STARPU_TRACE_DATA_STATE_OWNER(handle, node) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_OWNER, handle, node) #define _STARPU_TRACE_DATA_STATE_SHARED(handle, node) \ FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_SHARED, handle, node) #define _STARPU_TRACE_DATA_REQUEST_CREATED(handle, orig, dest, prio, is_pre, req) \ FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_REQUEST_CREATED, orig, dest, prio, handle, is_pre, req) #else // !STARPU_USE_FXT /* Dummy macros in case FxT is disabled */ #define _STARPU_TRACE_NEW_MEM_NODE(nodeid) do {(void)(nodeid);} while(0) #define _STARPU_TRACE_REGISTER_THREAD(cpuid) do {(void)(cpuid);} while(0) #define _STARPU_TRACE_WORKER_INIT_START(a,b,c,d,e,f) do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e); (void)(f);} while(0) #define _STARPU_TRACE_WORKER_INIT_END(workerid) do {(void)(workerid);} while(0) #define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) do {(void)(job); (void)(nimpl); (void)(perf_arch); (void)(workerid); (void)(rank);} while(0) #define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) do {(void)(job); (void)(nimpl); (void)(perf_arch); (void)(workerid); (void)(rank);} while(0) #define _STARPU_TRACE_START_EXECUTING(job) do {(void)(job);} while(0) #define _STARPU_TRACE_END_EXECUTING(job) do {(void)(job);} while(0) #define _STARPU_TRACE_START_PARALLEL_SYNC(job) do {(void)(job);} while(0) #define _STARPU_TRACE_END_PARALLEL_SYNC(job) do {(void)(job);} while(0) #define _STARPU_TRACE_START_CALLBACK(job) do {(void)(job);} while(0) #define _STARPU_TRACE_END_CALLBACK(job) do {(void)(job);} while(0) #define _STARPU_TRACE_JOB_PUSH(task, prio) do {(void)(task); (void)(prio);} while(0) #define _STARPU_TRACE_JOB_POP(task, prio) do {(void)(task); (void)(prio);} while(0) #define _STARPU_TRACE_UPDATE_TASK_CNT(counter) do {(void)(counter);} while(0) #define _STARPU_TRACE_START_FETCH_INPUT(job) do {(void)(job);} while(0) #define _STARPU_TRACE_END_FETCH_INPUT(job) do {(void)(job);} while(0) #define _STARPU_TRACE_START_PUSH_OUTPUT(job) do {(void)(job);} while(0) #define _STARPU_TRACE_END_PUSH_OUTPUT(job) do {(void)(job);} while(0) #define _STARPU_TRACE_TAG(tag, job) do {(void)(tag); (void)(job);} while(0) #define _STARPU_TRACE_TAG_DEPS(a, b) do {(void)(a); (void)(b);} while(0) #define _STARPU_TRACE_TASK_DEPS(a, b) do {(void)(a); (void)(b);} while(0) #define _STARPU_TRACE_TASK_END_DEP(a, b) do {(void)(a); (void)(b);} while(0) #define _STARPU_TRACE_GHOST_TASK_DEPS(a, b) do {(void)(a); (void)(b);} while(0) #define _STARPU_TRACE_TASK_EXCLUDE_FROM_DAG(a) do {(void)(a);} while(0) #define _STARPU_TRACE_TASK_NAME_LINE_COLOR(a) do {(void)(a);} while(0) #define _STARPU_TRACE_TASK_NAME(a) do {(void)(a);} while(0) #define _STARPU_TRACE_TASK_LINE(a) do {(void)(a);} while(0) #define _STARPU_TRACE_TASK_COLOR(a) do {(void)(a);} while(0) #define _STARPU_TRACE_TASK_DONE(a) do {(void)(a);} while(0) #define _STARPU_TRACE_TAG_DONE(a) do {(void)(a);} while(0) #define _STARPU_TRACE_DATA_NAME(a, b) do {(void)(a); (void)(b);} while(0) #define _STARPU_TRACE_DATA_COORDINATES(a, b, c) do {(void)(a); (void)(b); (void)(c);} while(0) #define _STARPU_TRACE_DATA_COPY(a, b, c) do {(void)(a); (void)(b); (void)(c);} while(0) #define _STARPU_TRACE_DATA_WONT_USE(a) do {(void)(a);} while(0) #define _STARPU_TRACE_DATA_DOING_WONT_USE(a) do {(void)(a);} while(0) #define _STARPU_TRACE_START_DRIVER_COPY(a,b,c,d,e,f) do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e); (void)(f);} while(0) #define _STARPU_TRACE_END_DRIVER_COPY(a,b,c,d,e) do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e);} while(0) #define _STARPU_TRACE_START_DRIVER_COPY_ASYNC(a,b) do {(void)(a); (void)(b);} while(0) #define _STARPU_TRACE_END_DRIVER_COPY_ASYNC(a,b) do {(void)(a); (void)(b);} while(0) #define _STARPU_TRACE_WORK_STEALING(a, b) do {(void)(a); (void)(b);} while(0) #define _STARPU_TRACE_WORKER_DEINIT_START do {} while(0) #define _STARPU_TRACE_WORKER_DEINIT_END(a) do {(void)(a);} while(0) #define _STARPU_TRACE_WORKER_SCHEDULING_START do {} while(0) #define _STARPU_TRACE_WORKER_SCHEDULING_END do {} while(0) #define _STARPU_TRACE_WORKER_SCHEDULING_PUSH do {} while(0) #define _STARPU_TRACE_WORKER_SCHEDULING_POP do {} while(0) #define _STARPU_TRACE_WORKER_SLEEP_START do {} while(0) #define _STARPU_TRACE_WORKER_SLEEP_END do {} while(0) #define _STARPU_TRACE_TASK_SUBMIT(job, a, b) do {(void)(job); (void)(a);(void)(b);} while(0) #define _STARPU_TRACE_TASK_SUBMIT_START() do {} while(0) #define _STARPU_TRACE_TASK_SUBMIT_END() do {} while(0) #define _STARPU_TRACE_TASK_THROTTLE_START() do {} while(0) #define _STARPU_TRACE_TASK_THROTTLE_END() do {} while(0) #define _STARPU_TRACE_TASK_BUILD_START() do {} while(0) #define _STARPU_TRACE_TASK_BUILD_END() do {} while(0) #define _STARPU_TRACE_TASK_MPI_DECODE_START() do {} while(0) #define _STARPU_TRACE_TASK_MPI_DECODE_END() do {} while(0) #define _STARPU_TRACE_TASK_MPI_PRE_START() do {} while(0) #define _STARPU_TRACE_TASK_MPI_PRE_END() do {} while(0) #define _STARPU_TRACE_TASK_MPI_POST_START() do {} while(0) #define _STARPU_TRACE_TASK_MPI_POST_END() do {} while(0) #define _STARPU_TRACE_TASK_WAIT_START(job) do {(void)(job);} while(0) #define _STARPU_TRACE_TASK_WAIT_END() do {} while(0) #define _STARPU_TRACE_TASK_WAIT_FOR_ALL_START() do {} while(0) #define _STARPU_TRACE_TASK_WAIT_FOR_ALL_END() do {} while(0) #define _STARPU_TRACE_START_ALLOC(memnode, size, handle, is_prefetch) do {(void)(memnode); (void)(size); (void)(handle);} while(0) #define _STARPU_TRACE_END_ALLOC(memnode, handle, r) do {(void)(memnode); (void)(handle); (void)(r);} while(0) #define _STARPU_TRACE_START_ALLOC_REUSE(a, size, handle, is_prefetch) do {(void)(a); (void)(size); (void)(handle);} while(0) #define _STARPU_TRACE_END_ALLOC_REUSE(a, handle, r) do {(void)(a); (void)(handle); (void)(r);} while(0) #define _STARPU_TRACE_START_FREE(memnode, size, handle) do {(void)(memnode); (void)(size); (void)(handle);} while(0) #define _STARPU_TRACE_END_FREE(memnode, handle) do {(void)(memnode); (void)(handle);} while(0) #define _STARPU_TRACE_START_WRITEBACK(memnode, handle) do {(void)(memnode); (void)(handle);} while(0) #define _STARPU_TRACE_END_WRITEBACK(memnode, handle) do {(void)(memnode); (void)(handle);} while(0) #define _STARPU_TRACE_USED_MEM(memnode,used) do {(void)(memnode); (void)(used);} while (0) #define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch) do {(void)(memnode); (void)(is_prefetch);} while(0) #define _STARPU_TRACE_END_MEMRECLAIM(memnode,is_prefetch) do {(void)(memnode); (void)(is_prefetch);} while(0) #define _STARPU_TRACE_START_WRITEBACK_ASYNC(memnode) do {(void)(memnode);} while(0) #define _STARPU_TRACE_END_WRITEBACK_ASYNC(memnode) do {(void)(memnode);} while(0) #define _STARPU_TRACE_START_PROGRESS(memnode) do {(void)(memnode);} while(0) #define _STARPU_TRACE_END_PROGRESS(memnode) do {(void)(memnode);} while(0) #define _STARPU_TRACE_USER_EVENT(code) do {(void)(code);} while(0) #define _STARPU_TRACE_SET_PROFILING(status) do {(void)(status);} while(0) #define _STARPU_TRACE_TASK_WAIT_FOR_ALL() do {} while(0) #define _STARPU_TRACE_EVENT_ALWAYS(S) do {(void)(S);} while(0) #define _STARPU_TRACE_EVENT(S) do {(void)(S);} while(0) #define _STARPU_TRACE_EVENT_VERBOSE(S) do {(void)(S);} while(0) #define _STARPU_TRACE_THREAD_EVENT(S) do {(void)(S);} while(0) #define _STARPU_TRACE_LOCKING_MUTEX() do {} while(0) #define _STARPU_TRACE_MUTEX_LOCKED() do {} while(0) #define _STARPU_TRACE_UNLOCKING_MUTEX() do {} while(0) #define _STARPU_TRACE_MUTEX_UNLOCKED() do {} while(0) #define _STARPU_TRACE_TRYLOCK_MUTEX() do {} while(0) #define _STARPU_TRACE_RDLOCKING_RWLOCK() do {} while(0) #define _STARPU_TRACE_RWLOCK_RDLOCKED() do {} while(0) #define _STARPU_TRACE_WRLOCKING_RWLOCK() do {} while(0) #define _STARPU_TRACE_RWLOCK_WRLOCKED() do {} while(0) #define _STARPU_TRACE_UNLOCKING_RWLOCK() do {} while(0) #define _STARPU_TRACE_RWLOCK_UNLOCKED() do {} while(0) #define _STARPU_TRACE_LOCKING_SPINLOCK(file, line) do {(void)(file); (void)(line);} while(0) #define _STARPU_TRACE_SPINLOCK_LOCKED(file, line) do {(void)(file); (void)(line);} while(0) #define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line) do {(void)(file); (void)(line);} while(0) #define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line) do {(void)(file); (void)(line);} while(0) #define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line) do {(void)(file); (void)(line);} while(0) #define _STARPU_TRACE_COND_WAIT_BEGIN() do {} while(0) #define _STARPU_TRACE_COND_WAIT_END() do {} while(0) #define _STARPU_TRACE_BARRIER_WAIT_BEGIN() do {} while(0) #define _STARPU_TRACE_BARRIER_WAIT_END() do {} while(0) #define _STARPU_TRACE_MEMORY_FULL(size) do {(void)(size);} while(0) #define _STARPU_TRACE_DATA_LOAD(workerid,size) do {(void)(workerid); (void)(size);} while(0) #define _STARPU_TRACE_START_UNPARTITION(handle, memnode) do {(void)(handle); (void)(memnode);} while(0) #define _STARPU_TRACE_END_UNPARTITION(handle, memnode) do {(void)(handle); (void)(memnode);} while(0) #define _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len) do {(void)(workerid); (void)(ntasks); (void)(exp_len);} while(0) #define _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len) do {(void)(workerid); (void)(ntasks); (void)(exp_len);} while(0) #define _STARPU_TRACE_HYPERVISOR_BEGIN() do {} while(0) #define _STARPU_TRACE_HYPERVISOR_END() do {} while(0) #define _STARPU_TRACE_SCHED_COMPONENT_NEW(component) do {(void)(component);} while (0) #define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child) do {(void)(parent); (void)(child);} while (0) #define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, prio) do {(void)(from); (void)(to); (void)(task); (void)(prio);} while (0) #define _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task) do {(void)(from); (void)(to); (void)(task);} while (0) #define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle) do {(void)(handle);} while (0) #define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle) do {(void)(handle);} while (0) #define _STARPU_TRACE_WORKER_START_FETCH_INPUT(job, id) do {(void)(job); (void)(id);} while(0) #define _STARPU_TRACE_WORKER_END_FETCH_INPUT(job, id) do {(void)(job); (void)(id);} while(0) #define _STARPU_TRACE_DATA_STATE_INVALID(handle, node) do {(void)(handle); (void)(node);} while(0) #define _STARPU_TRACE_DATA_STATE_OWNER(handle, node) do {(void)(handle); (void)(node);} while(0) #define _STARPU_TRACE_DATA_STATE_SHARED(handle, node) do {(void)(handle); (void)(node);} while(0) #define _STARPU_TRACE_DATA_REQUEST_CREATED(handle, orig, dest, prio, is_pre, req) do {(void)(handle); (void)(orig); (void)(dest); (void)(prio); (void)(is_pre); (void)(req); } while(0) #define _STARPU_TRACE_PAPI_TASK_EVENT(event_id, task, value) do {(void)(event_id); (void)(task); (void)(value);} while(0) #ifdef STARPU_BUBBLE #define _STARPU_TRACE_BUBBLE_TASK_DEPS(a, b) do {(void)(a); (void)(b);} while(0) #define _STARPU_TRACE_BUBBLE(a) do {(void)(a);} while(0) #endif #endif // STARPU_USE_FXT #pragma GCC visibility pop #endif // __FXT_H__ starpu-1.4.9+dfsg/src/common/graph.c000066400000000000000000000350561507764646700173450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This stores the task graph structure, to used by the schedulers which need * it. We do not always enable it since it is costly. To avoid interfering * too much with execution, it may be a bit outdated, i.e. still contain jobs * which have completed very recently. * * This is because we drop nodes lazily: when a job terminates, we just add the * node to the dropped list (to avoid having to take the mutex on the whole * graph). The graph gets updated whenever the graph mutex becomes available. */ #include #include #include #include /* Protects the whole task graph except the dropped list */ static starpu_pthread_rwlock_t graph_lock; /* Whether we should enable recording the task graph */ int _starpu_graph_record; /* This list contains all nodes without incoming dependency */ static struct _starpu_graph_node_multilist_top top; /* This list contains all nodes without outgoing dependency */ static struct _starpu_graph_node_multilist_bottom bottom; /* This list contains all nodes */ static struct _starpu_graph_node_multilist_all all; /* Protects the dropped list, always taken before graph lock */ static starpu_pthread_mutex_t dropped_lock; /* This list contains all dropped nodes, i.e. the job terminated by the corresponding node is still int he graph */ static struct _starpu_graph_node_multilist_dropped dropped; void _starpu_graph_init(void) { STARPU_PTHREAD_RWLOCK_INIT(&graph_lock, NULL); _starpu_graph_node_multilist_head_init_top(&top); _starpu_graph_node_multilist_head_init_bottom(&bottom); _starpu_graph_node_multilist_head_init_all(&all); STARPU_PTHREAD_MUTEX_INIT(&dropped_lock, NULL); _starpu_graph_node_multilist_head_init_dropped(&dropped); } /* LockWR the graph lock */ void _starpu_graph_wrlock(void) { starpu_worker_relax_on(); STARPU_PTHREAD_RWLOCK_WRLOCK(&graph_lock); starpu_worker_relax_off(); } void _starpu_graph_drop_node(struct _starpu_graph_node *node); /* This flushes the list of nodes to be dropped. Both the dropped_lock and * graph_lock mutexes have to be held on entry, and are released. */ void _starpu_graph_drop_dropped_nodes(void) { struct _starpu_graph_node_multilist_dropped dropping; /* Pick up the list of dropped nodes */ _starpu_graph_node_multilist_move_dropped(&dropped, &dropping); STARPU_PTHREAD_MUTEX_UNLOCK(&dropped_lock); /* And now process it if it's not empty. */ if (!_starpu_graph_node_multilist_empty_dropped(&dropping)) { struct _starpu_graph_node *node, *next; for (node = _starpu_graph_node_multilist_begin_dropped(&dropping); node != _starpu_graph_node_multilist_end_dropped(&dropping); node = next) { next = _starpu_graph_node_multilist_next_dropped(node); _starpu_graph_drop_node(node); } } STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock); } /* UnlockWR the graph lock */ void _starpu_graph_wrunlock(void) { starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock); starpu_worker_relax_off(); _starpu_graph_drop_dropped_nodes(); } /* LockRD the graph lock */ void _starpu_graph_rdlock(void) { starpu_worker_relax_on(); STARPU_PTHREAD_RWLOCK_RDLOCK(&graph_lock); starpu_worker_relax_off(); } /* UnlockRD the graph lock */ void _starpu_graph_rdunlock(void) { STARPU_PTHREAD_RWLOCK_UNLOCK(&graph_lock); /* Take the opportunity to try to take it WR */ if (STARPU_PTHREAD_RWLOCK_TRYWRLOCK(&graph_lock) == 0) /* Good, flush dropped nodes */ _starpu_graph_wrunlock(); } static void __starpu_graph_foreach(void (*func)(void *data, struct _starpu_graph_node *node), void *data) { struct _starpu_graph_node *node; for (node = _starpu_graph_node_multilist_begin_all(&all); node != _starpu_graph_node_multilist_end_all(&all); node = _starpu_graph_node_multilist_next_all(node)) func(data, node); } /* Add a node to the graph */ void _starpu_graph_add_job(struct _starpu_job *job) { struct _starpu_graph_node *node; _STARPU_CALLOC(node, 1, sizeof(*node)); node->job = job; job->graph_node = node; STARPU_PTHREAD_MUTEX_INIT0(&node->mutex, NULL); _starpu_graph_wrlock(); /* It does not have any dependency yet, add to all lists */ _starpu_graph_node_multilist_push_back_top(&top, node); _starpu_graph_node_multilist_push_back_bottom(&bottom, node); _starpu_graph_node_multilist_push_back_all(&all, node); _starpu_graph_wrunlock(); } /* Add a node to an array of nodes */ static unsigned add_node(struct _starpu_graph_node *node, struct _starpu_graph_node ***nodes, unsigned *n_nodes, unsigned *alloc_nodes, unsigned **slot) { unsigned ret; if (*n_nodes == *alloc_nodes) { if (*alloc_nodes) *alloc_nodes *= 2; else *alloc_nodes = 4; _STARPU_REALLOC(*nodes, *alloc_nodes * sizeof(**nodes)); if (slot) { _STARPU_REALLOC(*slot, *alloc_nodes * sizeof(**slot)); } } ret = (*n_nodes)++; (*nodes)[ret] = node; return ret; } /* Add a dependency between nodes */ void _starpu_graph_add_job_dep(struct _starpu_job *job, struct _starpu_job *prev_job) { unsigned rank_incoming, rank_outgoing; _starpu_graph_wrlock(); struct _starpu_graph_node *node = job->graph_node; struct _starpu_graph_node *prev_node = prev_job->graph_node; if (!node || !prev_node) { /* Already gone */ _starpu_graph_wrunlock(); return; } if (_starpu_graph_node_multilist_queued_bottom(prev_node)) /* Previous node is not at bottom any more */ _starpu_graph_node_multilist_erase_bottom(&bottom, prev_node); if (_starpu_graph_node_multilist_queued_top(node)) /* Next node is not at top any more */ _starpu_graph_node_multilist_erase_top(&top, node); node->total_incoming++; rank_incoming = add_node(prev_node, &node->incoming, &node->n_incoming, &node->alloc_incoming, &node->incoming_slot); rank_outgoing = add_node(node, &prev_node->outgoing, &prev_node->n_outgoing, &prev_node->alloc_outgoing, &prev_node->outgoing_slot); prev_node->outgoing_slot[rank_outgoing] = rank_incoming; node->incoming_slot[rank_incoming] = rank_outgoing; _starpu_graph_wrunlock(); } /* Drop a node, and thus its dependencies */ void _starpu_graph_drop_node(struct _starpu_graph_node *node) { unsigned i; STARPU_ASSERT(!node->job); if (_starpu_graph_node_multilist_queued_bottom(node)) _starpu_graph_node_multilist_erase_bottom(&bottom, node); if (_starpu_graph_node_multilist_queued_top(node)) _starpu_graph_node_multilist_erase_top(&top, node); if (_starpu_graph_node_multilist_queued_all(node)) _starpu_graph_node_multilist_erase_all(&all, node); /* Drop ourself from the incoming part of the outgoing nodes. */ for (i = 0; i < node->n_outgoing; i++) { struct _starpu_graph_node *next = node->outgoing[i]; if (next) next->incoming[node->outgoing_slot[i]] = NULL; } /* Drop ourself from the outgoing part of the incoming nodes, * in case we happen to get dropped before it. */ for (i = 0; i < node->n_incoming; i++) { struct _starpu_graph_node *prev = node->incoming[i]; if (prev) prev->outgoing[node->incoming_slot[i]] = NULL; } node->n_outgoing = 0; free(node->outgoing); node->outgoing = NULL; free(node->outgoing_slot); node->outgoing_slot = NULL; node->alloc_outgoing = 0; node->n_incoming = 0; free(node->incoming); node->incoming = NULL; free(node->incoming_slot); node->incoming_slot = NULL; node->alloc_incoming = 0; free(node); } /* Drop a job */ void _starpu_graph_drop_job(struct _starpu_job *job) { struct _starpu_graph_node *node = job->graph_node; job->graph_node = NULL; if (!node) return; starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&node->mutex); starpu_worker_relax_off(); /* Will not be able to use the job any more */ node->job = NULL; STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex); starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&dropped_lock); starpu_worker_relax_off(); /* Queue for removal when lock becomes available */ _starpu_graph_node_multilist_push_back_dropped(&dropped, node); if (STARPU_PTHREAD_RWLOCK_TRYWRLOCK(&graph_lock) == 0) { /* Graph wrlock is available, drop nodes immediately */ _starpu_graph_drop_dropped_nodes(); } else STARPU_PTHREAD_MUTEX_UNLOCK(&dropped_lock); } static void _starpu_graph_set_n(void *data, struct _starpu_graph_node *node) { int value = (intptr_t) data; node->graph_n = value; } /* Call func for each vertex of the task graph, from bottom to top, in topological order */ static void _starpu_graph_compute_bottom_up(void (*func)(struct _starpu_graph_node *next_node, struct _starpu_graph_node *prev_node, void *data), void *data) { struct _starpu_graph_node *node, *node2; struct _starpu_graph_node **current_set = NULL, **next_set = NULL, **swap_set; unsigned current_n, next_n, i, j; unsigned current_alloc = 0, next_alloc = 0, swap_alloc; /* Classical flow algorithm: start from bottom, and propagate depths to top */ /* Set number of processed outgoing edges to 0 for each node */ __starpu_graph_foreach(_starpu_graph_set_n, (void*) 0); /* Start with the bottom of the graph */ current_n = 0; for (node = _starpu_graph_node_multilist_begin_bottom(&bottom); node != _starpu_graph_node_multilist_end_bottom(&bottom); node = _starpu_graph_node_multilist_next_bottom(node)) add_node(node, ¤t_set, ¤t_n, ¤t_alloc, NULL); /* Now propagate to top as long as we have current nodes */ while (current_n) { /* Next set is initially empty */ next_n = 0; /* For each node in the current set */ for (i = 0; i < current_n; i++) { node = current_set[i]; /* For each parent of this node */ for (j = 0; j < node->n_incoming; j++) { node2 = node->incoming[j]; if (!node2) continue; node2->graph_n++; func(node, node2, data); if ((unsigned) node2->graph_n == node2->n_outgoing) /* All outgoing edges were processed, can now add to next set */ add_node(node2, &next_set, &next_n, &next_alloc, NULL); } } /* Swap next set with current set */ swap_set = next_set; swap_alloc = next_alloc; next_set = current_set; next_alloc = current_alloc; current_set = swap_set; current_alloc = swap_alloc; current_n = next_n; } free(current_set); free(next_set); } static void compute_depth(struct _starpu_graph_node *next_node, struct _starpu_graph_node *prev_node, void *data) { (void)data; if (prev_node->depth < next_node->depth + 1) prev_node->depth = next_node->depth + 1; } void _starpu_graph_compute_depths(void) { struct _starpu_graph_node *node; _starpu_graph_wrlock(); /* The bottom of the graph has depth 0 */ for (node = _starpu_graph_node_multilist_begin_bottom(&bottom); node != _starpu_graph_node_multilist_end_bottom(&bottom); node = _starpu_graph_node_multilist_next_bottom(node)) node->depth = 0; _starpu_graph_compute_bottom_up(compute_depth, NULL); _starpu_graph_wrunlock(); } void _starpu_graph_compute_descendants(void) { struct _starpu_graph_node *node, *node2, *node3; struct _starpu_graph_node **current_set = NULL, **next_set = NULL, **swap_set; unsigned current_n, next_n, i, j; unsigned current_alloc = 0, next_alloc = 0, swap_alloc; _starpu_graph_wrlock(); /* Yes, this is O(|V|.(|V|+|E|)) */ /* We could get O(|V|.|E|) by doing a topological sort first. * * |E| is usually O(|V|), though (bounded number of data dependencies, * and we use synchronization tasks) */ for (node = _starpu_graph_node_multilist_begin_all(&all); node != _starpu_graph_node_multilist_end_all(&all); node = _starpu_graph_node_multilist_next_all(node)) { unsigned descendants; /* Mark all nodes as unseen */ for (node2 = _starpu_graph_node_multilist_begin_all(&all); node2 != _starpu_graph_node_multilist_end_all(&all); node2 = _starpu_graph_node_multilist_next_all(node2)) node2->graph_n = 0; /* Start with the node we want to compute the number of descendants of */ current_n = 0; add_node(node, ¤t_set, ¤t_n, ¤t_alloc, NULL); node->graph_n = 1; descendants = 0; /* While we have descendants, count their descendants */ while (current_n) { /* Next set is initially empty */ next_n = 0; /* For each node in the current set */ for (i = 0; i < current_n; i++) { node2 = current_set[i]; /* For each child of this node2 */ for (j = 0; j < node2->n_outgoing; j++) { node3 = node2->outgoing[j]; if (!node3) continue; if (node3->graph_n) /* Already seen */ continue; /* Add this node */ node3->graph_n = 1; descendants++; add_node(node3, &next_set, &next_n, &next_alloc, NULL); } } /* Swap next set with current set */ swap_set = next_set; swap_alloc = next_alloc; next_set = current_set; next_alloc = current_alloc; current_set = swap_set; current_alloc = swap_alloc; current_n = next_n; } node->descendants = descendants; } _starpu_graph_wrunlock(); free(current_set); free(next_set); } void _starpu_graph_foreach(void (*func)(void *data, struct _starpu_graph_node *node), void *data) { _starpu_graph_wrlock(); __starpu_graph_foreach(func, data); _starpu_graph_wrunlock(); } struct _starpu_graph_node *_starpu_graph_task_node(struct starpu_task *task) { // Can job be NULL? In other words, can a task not be associated with any job? struct _starpu_job *job = _starpu_get_job_associated_to_task(task); return job->graph_node; } struct starpu_task *_starpu_graph_node_task(struct _starpu_graph_node *node) { struct _starpu_job *job = node->job; struct starpu_task *task = NULL; if (job) task = job->task; return task; } void _starpu_graph_node_outgoing(struct _starpu_graph_node *node, unsigned *n_outgoing, struct _starpu_graph_node ***outgoing) { unsigned n, added = 0; _starpu_graph_rdlock(); if (*n_outgoing < node->n_outgoing) { // Reallocate the 'outgoing' array if its size is smaller than the node's number of outgoing nodes _STARPU_REALLOC(*outgoing, node->n_outgoing * sizeof(**outgoing)); } *n_outgoing = node->n_outgoing; for (n = 0; n < *n_outgoing; ++n) { struct _starpu_graph_node *successor = node->outgoing[n]; if (successor) *outgoing[added++] = node; } _starpu_graph_rdunlock(); } starpu-1.4.9+dfsg/src/common/graph.h000066400000000000000000000106711507764646700173460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __GRAPH_H__ #define __GRAPH_H__ #include #include #pragma GCC visibility push(hidden) /** @file */ MULTILIST_CREATE_TYPE(_starpu_graph_node, all) MULTILIST_CREATE_TYPE(_starpu_graph_node, top) MULTILIST_CREATE_TYPE(_starpu_graph_node, bottom) MULTILIST_CREATE_TYPE(_starpu_graph_node, dropped) struct _starpu_graph_node { /** protects access to the job */ starpu_pthread_mutex_t mutex; /** pointer to the job, if it is still alive, NULL otherwise */ struct _starpu_job *job; /** * Fields for graph analysis for scheduling heuristics */ /** Member of list of all jobs without incoming dependency */ struct _starpu_graph_node_multilist_top top; /** Member of list of all jobs without outgoing dependency */ struct _starpu_graph_node_multilist_bottom bottom; /** Member of list of all jobs */ struct _starpu_graph_node_multilist_all all; /** Member of list of dropped jobs */ struct _starpu_graph_node_multilist_dropped dropped; /** set of incoming dependencies */ /** May contain NULLs for terminated jobs */ struct _starpu_graph_node **incoming; /** Index within corresponding outgoing array */ unsigned *incoming_slot; /** Number of slots used */ unsigned n_incoming; /** Size of incoming */ unsigned alloc_incoming; /** set of outgoing dependencies */ struct _starpu_graph_node **outgoing; /** Total number of incoming dependencies, including those who completed */ unsigned total_incoming; /** Index within corresponding incoming array */ unsigned *outgoing_slot; /** Number of slots used */ unsigned n_outgoing; /** Size of outgoing */ unsigned alloc_outgoing; /** Rank from bottom, in number of jobs * Only available if _starpu_graph_compute_depths was called */ unsigned depth; /** Number of children, grand-children, etc. * Only available if _starpu_graph_compute_descendants was called */ unsigned descendants; /** Variable available for graph flow */ int graph_n; }; MULTILIST_CREATE_INLINES(struct _starpu_graph_node, _starpu_graph_node, all) MULTILIST_CREATE_INLINES(struct _starpu_graph_node, _starpu_graph_node, top) MULTILIST_CREATE_INLINES(struct _starpu_graph_node, _starpu_graph_node, bottom) MULTILIST_CREATE_INLINES(struct _starpu_graph_node, _starpu_graph_node, dropped) extern int _starpu_graph_record; void _starpu_graph_init(void); void _starpu_graph_wrlock(void); void _starpu_graph_rdlock(void); void _starpu_graph_wrunlock(void); void _starpu_graph_rdunlock(void); /** Add a job to the graph, called before any _starpu_graph_add_job_dep call */ void _starpu_graph_add_job(struct _starpu_job *job); /** Add a dependency between jobs */ void _starpu_graph_add_job_dep(struct _starpu_job *job, struct _starpu_job *prev_job); /** Remove a job from the graph */ void _starpu_graph_drop_job(struct _starpu_job *job); /** Really drop the nodes from the graph now */ void _starpu_graph_drop_dropped_nodes(void); /** * This make StarPU compute for each task the depth, i.e. the length * of the longest path to a task without outgoing dependencies. * This does not take job duration into account, just the number */ void _starpu_graph_compute_depths(void); /** Compute the descendants of jobs in the graph */ void _starpu_graph_compute_descendants(void); /** * This calls \e func for each node of the task graph, passing also \e * data as it * Apply func on each job of the graph */ void _starpu_graph_foreach(void (*func)(void *data, struct _starpu_graph_node *node), void *data); struct _starpu_graph_node *_starpu_graph_task_node(struct starpu_task *task); struct starpu_task *_starpu_graph_node_task(struct _starpu_graph_node *node); void _starpu_graph_node_outgoing(struct _starpu_graph_node *node, unsigned *n_outgoing, struct _starpu_graph_node ***outgoing); #pragma GCC visibility pop #endif /* __GRAPH_H__ */ starpu-1.4.9+dfsg/src/common/hash.c000066400000000000000000000037541507764646700171670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #define _STARPU_CRC32C_POLY_BE 0x1EDC6F41 static inline uint32_t STARPU_ATTRIBUTE_PURE starpu_crc32c_be_8(uint8_t inputbyte, uint32_t inputcrc) { unsigned i; uint32_t crc; crc = inputcrc ^ (((uint32_t) inputbyte) << 24); for (i = 0; i < 8; i++) crc = (crc << 1) ^ ((crc & 0x80000000) ? _STARPU_CRC32C_POLY_BE : 0); return crc; } uint32_t starpu_hash_crc32c_be_n(const void *input, size_t n, uint32_t inputcrc) { uint8_t *p = (uint8_t *)input; size_t i; uint32_t crc = inputcrc; for (i = 0; i < n; i++) crc = starpu_crc32c_be_8(p[i], crc); return crc; } uint32_t starpu_hash_crc32c_be_ptr(void *input, uint32_t inputcrc) { return starpu_hash_crc32c_be_n(&input, sizeof(input), inputcrc); } uint32_t starpu_hash_crc32c_be(uint32_t input, uint32_t inputcrc) { uint8_t *p = (uint8_t *)&input; uint32_t crc = inputcrc; crc = starpu_crc32c_be_8(p[0], crc); crc = starpu_crc32c_be_8(p[1], crc); crc = starpu_crc32c_be_8(p[2], crc); crc = starpu_crc32c_be_8(p[3], crc); return crc; } uint32_t starpu_hash_crc32c_string(const char *str, uint32_t inputcrc) { uint32_t hash = inputcrc; size_t len = strlen(str); unsigned i; for (i = 0; i < len; i++) { hash = starpu_crc32c_be_8((uint8_t)str[i], hash); } return hash; } starpu-1.4.9+dfsg/src/common/inlines.c000066400000000000000000000015401507764646700176740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This includes the inline definitions in a .c file so that they can also be * referenced from outside */ #define LIST_INLINE #define PRIO_LIST_INLINE #include starpu-1.4.9+dfsg/src/common/knobs.c000066400000000000000000000714621507764646700173610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Performance counters and configurable knobs */ #include #include #include #include #include #include #include /* Performance Monitoring */ struct perf_counter_array { int size; struct starpu_perf_counter *array; int updater_array_size; void (**updater_array)(struct starpu_perf_counter_sample *sample, void *context); }; static struct perf_counter_array global_counters = { .size = 0, .array = NULL, .updater_array_size = 0, .updater_array = NULL }; static struct perf_counter_array per_worker_counters = { .size = 0, .array = NULL, .updater_array_size = 0, .updater_array = NULL }; static struct perf_counter_array per_codelet_counters = { .size = 0, .array = NULL, .updater_array_size = 0, .updater_array = NULL }; static struct starpu_perf_counter_sample global_sample = { .scope = starpu_perf_counter_scope_global, .listener = NULL, .value_array = NULL }; /* - */ void _starpu_perf_counter_sample_init(struct starpu_perf_counter_sample *sample, enum starpu_perf_counter_scope scope) { STARPU_ASSERT_PERF_COUNTER_SCOPE_DEFINED(scope); sample->scope = scope; sample->listener = NULL; sample->value_array = NULL; _starpu_spin_init(&sample->lock); } void _starpu_perf_counter_sample_exit(struct starpu_perf_counter_sample *sample) { STARPU_ASSERT(sample->listener == NULL); sample->listener = NULL; if (sample->value_array) { free(sample->value_array); } sample->value_array = NULL; sample->scope = starpu_perf_counter_scope_undefined; _starpu_spin_destroy(&sample->lock); } /* - */ void _starpu_perf_counter_init(struct _starpu_machine_config *pconfig) { if (pconfig->conf.start_perf_counter_collection) { /* start perf counter collection immediately */ pconfig->perf_counter_pause_depth = 0; } else { /* defer perf counter collection until call to * starpu_perf_counter_start_collection () */ pconfig->perf_counter_pause_depth = 1; } STARPU_ASSERT(!_starpu_machine_is_running()); _starpu_perf_counter_sample_init(&global_sample, starpu_perf_counter_scope_global); /* call counter registration routines in each modules */ _starpu__task_c__register_counters(); } void _starpu_perf_counter_exit(void) { STARPU_ASSERT(!_starpu_machine_is_running()); _starpu_perf_counter_unregister_all_scopes(); _starpu_perf_counter_sample_exit(&global_sample); } /* - */ void starpu_perf_counter_collection_start() { STARPU_HG_DISABLE_CHECKING(_starpu_config.perf_counter_pause_depth); (void)STARPU_ATOMIC_ADD(&_starpu_config.perf_counter_pause_depth, -1); } void starpu_perf_counter_collection_stop() { STARPU_HG_DISABLE_CHECKING(_starpu_config.perf_counter_pause_depth); (void)STARPU_ATOMIC_ADD(&_starpu_config.perf_counter_pause_depth, +1); } /* - */ int starpu_perf_counter_scope_name_to_id(const char * const name) { if (strcmp(name, "global") == 0) return starpu_perf_counter_scope_global; if (strcmp(name, "per_worker") == 0) return starpu_perf_counter_scope_per_worker; if (strcmp(name, "per_codelet") == 0) return starpu_perf_counter_scope_per_codelet; return -1; } const char *starpu_perf_counter_scope_id_to_name(const enum starpu_perf_counter_scope scope) { switch (scope) { case starpu_perf_counter_scope_global: return "global"; case starpu_perf_counter_scope_per_worker: return "per_worker"; case starpu_perf_counter_scope_per_codelet: return "per_codelet"; default: return NULL; }; } /* - */ int starpu_perf_counter_type_name_to_id(const char * const name) { if (strcmp(name, "int32") == 0) return starpu_perf_counter_type_int32; if (strcmp(name, "int64") == 0) return starpu_perf_counter_type_int64; if (strcmp(name, "float") == 0) return starpu_perf_counter_type_float; if (strcmp(name, "double") == 0) return starpu_perf_counter_type_double; return -1; } const char *starpu_perf_counter_type_id_to_name(const enum starpu_perf_counter_type type) { switch (type) { case starpu_perf_counter_type_int32: return "int32"; case starpu_perf_counter_type_int64: return "int64"; case starpu_perf_counter_type_float: return "float"; case starpu_perf_counter_type_double: return "double"; default: return NULL; }; } static struct perf_counter_array *_get_counters(const enum starpu_perf_counter_scope scope) { STARPU_ASSERT_PERF_COUNTER_SCOPE_DEFINED(scope); switch (scope) { case starpu_perf_counter_scope_global: return &global_counters; case starpu_perf_counter_scope_per_worker: return &per_worker_counters; case starpu_perf_counter_scope_per_codelet: return &per_codelet_counters; default: STARPU_ABORT(); }; return NULL; }; /* - */ int _starpu_perf_counter_register(enum starpu_perf_counter_scope scope, const char *name, enum starpu_perf_counter_type type, const char *help) { STARPU_ASSERT(!_starpu_machine_is_running()); struct perf_counter_array * const counters = _get_counters(scope); STARPU_ASSERT_PERF_COUNTER_TYPE_DEFINED(type); const int index = counters->size++; _STARPU_REALLOC(counters->array, counters->size * sizeof(*counters->array)); struct starpu_perf_counter * const new_counter = &counters->array[index]; const int id = _starpu_perf_counter_id_build(scope, index); new_counter->id = id; new_counter->name = name; new_counter->help = help; new_counter->type = type; return id; } static void _unregister_counter_scope(enum starpu_perf_counter_scope scope) { STARPU_ASSERT(!_starpu_machine_is_running()); struct perf_counter_array * const counters = _get_counters(scope); free(counters->array); counters->array = NULL; free(counters->updater_array); counters->updater_array = NULL; counters->size = 0; } void _starpu_perf_counter_unregister_all_scopes(void) { STARPU_ASSERT(!_starpu_machine_is_running()); _unregister_counter_scope(starpu_perf_counter_scope_global); _unregister_counter_scope(starpu_perf_counter_scope_per_worker); _unregister_counter_scope(starpu_perf_counter_scope_per_codelet); } /* - */ int starpu_perf_counter_nb(enum starpu_perf_counter_scope scope) { const struct perf_counter_array * const counters = _get_counters(scope); return counters->size; } int starpu_perf_counter_nth_to_id(enum starpu_perf_counter_scope scope, int nth) { return _starpu_perf_counter_id_build(scope, nth); } int starpu_perf_counter_name_to_id(enum starpu_perf_counter_scope scope, const char *name) { const struct perf_counter_array * const counters = _get_counters(scope); int index; for (index = 0; index < counters->size; index++) { if (strcmp(name, counters->array[index].name) == 0) { return _starpu_perf_counter_id_build(scope, index); } } return -1; } const char *starpu_perf_counter_id_to_name(int id) { const int scope = _starpu_perf_counter_id_get_scope(id); const int index = _starpu_perf_counter_id_get_index(id); const struct perf_counter_array * const counters = _get_counters(scope); if (index < 0 || index >= counters->size) return NULL; return counters->array[index].name; } const char *starpu_perf_counter_get_help_string(int id) { const int scope = _starpu_perf_counter_id_get_scope(id); const int index = _starpu_perf_counter_id_get_index(id); const struct perf_counter_array * const counters = _get_counters(scope); STARPU_ASSERT(index >= 0 && index < counters->size); return counters->array[index].help; } int starpu_perf_counter_get_type_id(int id) { const int scope = _starpu_perf_counter_id_get_scope(id); const int index = _starpu_perf_counter_id_get_index(id); const struct perf_counter_array * const counters = _get_counters(scope); STARPU_ASSERT(index >= 0 && index < counters->size); return counters->array[index].type; } /* - */ void starpu_perf_counter_list_avail(enum starpu_perf_counter_scope scope) { const struct perf_counter_array * const counters = _get_counters(scope); int index; for (index = 0; index < counters->size; index++) { const struct starpu_perf_counter * const counter = &counters->array[index]; printf("0x%08x:%s [%s] - %s\n", _starpu_perf_counter_id_build(scope, index), counter->name, starpu_perf_counter_type_id_to_name(counter->type), counter->help); } } void starpu_perf_counter_list_all_avail(void) { printf("scope: global\n"); starpu_perf_counter_list_avail(starpu_perf_counter_scope_global); printf("scope: per_worker\n"); starpu_perf_counter_list_avail(starpu_perf_counter_scope_per_worker); printf("scope: per_codelet\n"); starpu_perf_counter_list_avail(starpu_perf_counter_scope_per_codelet); } /* - */ struct starpu_perf_counter_set *starpu_perf_counter_set_alloc(enum starpu_perf_counter_scope scope) { struct perf_counter_array *counters = _get_counters(scope); struct starpu_perf_counter_set *set; _STARPU_MALLOC(set, sizeof(*set)); set->scope = scope; set->size = counters->size; _STARPU_CALLOC(set->index_array, set->size, sizeof(*set->index_array)); return set; } void starpu_perf_counter_set_free(struct starpu_perf_counter_set *set) { memset(set->index_array, 0, set->size*sizeof(*set->index_array)); free(set->index_array); memset(set, 0, sizeof(*set)); free(set); } /* - */ void starpu_perf_counter_set_enable_id(struct starpu_perf_counter_set *set, int id) { const int index = _starpu_perf_counter_id_get_index(id); STARPU_ASSERT(index >= 0 && index < set->size); set->index_array[index] = 1; } void starpu_perf_counter_set_disable_id(struct starpu_perf_counter_set *set, int id) { const int index = _starpu_perf_counter_id_get_index(id); STARPU_ASSERT(index >= 0 && index < set->size); set->index_array[index] = 0; } /* - */ struct starpu_perf_counter_listener *starpu_perf_counter_listener_init(struct starpu_perf_counter_set *set, void (*callback)(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context), void *user_arg) { struct starpu_perf_counter_listener *listener; _STARPU_MALLOC(listener, sizeof(*listener)); listener->set = set; listener->callback = callback; listener->user_arg = user_arg; return listener; } void starpu_perf_counter_listener_exit(struct starpu_perf_counter_listener *listener) { memset(listener, 0, sizeof(*listener)); free(listener); } /* - */ static void set_listener(struct starpu_perf_counter_sample *sample, struct starpu_perf_counter_listener *listener) { _starpu_spin_lock(&sample->lock); STARPU_ASSERT(sample->listener == NULL); STARPU_ASSERT(listener->set != NULL); STARPU_ASSERT(listener->set->scope == sample->scope); sample->listener = listener; /* Assume a single listener, for now, which sets the set of counters to monitor */ STARPU_ASSERT(sample->value_array == NULL); _STARPU_CALLOC(sample->value_array, sample->listener->set->size, sizeof(*sample->value_array)); _starpu_spin_unlock(&sample->lock); } void starpu_perf_counter_set_global_listener(struct starpu_perf_counter_listener *listener) { set_listener(&global_sample, listener); } void starpu_perf_counter_set_per_worker_listener(unsigned workerid, struct starpu_perf_counter_listener *listener) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); set_listener(&worker->perf_counter_sample, listener); } void starpu_perf_counter_set_all_per_worker_listeners(struct starpu_perf_counter_listener *listener) { unsigned nworkers = _starpu_worker_get_count(); unsigned workerid; for (workerid = 0; workerid < nworkers; workerid++) { starpu_perf_counter_set_per_worker_listener(workerid, listener); } } void starpu_perf_counter_set_per_codelet_listener(struct starpu_codelet *cl, struct starpu_perf_counter_listener *listener) { STARPU_ASSERT(cl->perf_counter_values == NULL); _STARPU_CALLOC(cl->perf_counter_values, 1, sizeof(*cl->perf_counter_values)); STARPU_ASSERT(cl->perf_counter_sample == NULL); _STARPU_MALLOC(cl->perf_counter_sample, sizeof(*cl->perf_counter_sample)); _starpu_perf_counter_sample_init(cl->perf_counter_sample, starpu_perf_counter_scope_per_codelet); set_listener(cl->perf_counter_sample, listener); } /* - */ static void unset_listener(struct starpu_perf_counter_sample *sample) { _starpu_spin_lock(&sample->lock); STARPU_ASSERT(sample->listener != NULL); memset(sample->value_array, 0, sample->listener->set->size * sizeof(*sample->value_array)); free(sample->value_array); sample->value_array = NULL; sample->listener = NULL; _starpu_spin_unlock(&sample->lock); } void starpu_perf_counter_unset_global_listener() { unset_listener(&global_sample); } void starpu_perf_counter_unset_per_worker_listener(unsigned workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); unset_listener(&worker->perf_counter_sample); } void starpu_perf_counter_unset_all_per_worker_listeners(void) { unsigned nworkers = _starpu_worker_get_count(); unsigned workerid; for (workerid = 0; workerid < nworkers; workerid++) { starpu_perf_counter_unset_per_worker_listener(workerid); } } void starpu_perf_counter_unset_per_codelet_listener(struct starpu_codelet *cl) { STARPU_ASSERT(cl->perf_counter_sample != NULL); unset_listener(cl->perf_counter_sample); _starpu_perf_counter_sample_exit(cl->perf_counter_sample); free(cl->perf_counter_sample); cl->perf_counter_sample = NULL; free(cl->perf_counter_values); cl->perf_counter_values = NULL; } /* - */ void _starpu_perf_counter_register_updater(enum starpu_perf_counter_scope scope, void (*updater)(struct starpu_perf_counter_sample *sample, void *context)) { STARPU_ASSERT(!_starpu_machine_is_running()); struct perf_counter_array *counters = _get_counters(scope); int upd_id; upd_id = counters->updater_array_size++; _STARPU_REALLOC(counters->updater_array, counters->updater_array_size * sizeof(*counters->updater_array)); counters->updater_array[upd_id] = updater; } /* - */ static void update_sample(struct starpu_perf_counter_sample *sample, void *context) { if (sample->listener == NULL) return; _starpu_spin_lock(&sample->lock); struct perf_counter_array *counters = _get_counters(sample->scope); /* for now, we assume that a sample will only be updated if it has a listener plugged, with a non-empty set */ if (sample->listener != NULL && sample->listener->set != NULL) { if (counters->updater_array_size > 0) { int upd_id; for (upd_id = 0; upd_id < counters->updater_array_size; upd_id++) { counters->updater_array[upd_id](sample, context); } if (sample->listener != NULL) { sample->listener->callback(sample->listener, sample, context); } } } _starpu_spin_unlock(&sample->lock); } void _starpu_perf_counter_update_global_sample(void) { update_sample(&global_sample, NULL); } void _starpu_perf_counter_update_per_worker_sample(unsigned workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); update_sample(&worker->perf_counter_sample, worker); } void _starpu_perf_counter_update_per_codelet_sample(struct starpu_codelet *cl) { update_sample(cl->perf_counter_sample, cl); } #define STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(STRING, TYPE) \ TYPE starpu_perf_counter_sample_get_##STRING##_value(struct starpu_perf_counter_sample *sample, const int counter_id) \ { \ STARPU_ASSERT(starpu_perf_counter_get_type_id(counter_id) == starpu_perf_counter_type_##STRING); \ STARPU_ASSERT(sample->listener != NULL && sample->listener->set != NULL); \ STARPU_ASSERT(_starpu_perf_counter_id_get_scope(counter_id) == sample->listener->set->scope); \ \ const struct starpu_perf_counter_set * const set = sample->listener->set; \ const int index = _starpu_perf_counter_id_get_index(counter_id); \ STARPU_ASSERT(index < set->size); \ STARPU_ASSERT(set->index_array[index] > 0); \ return sample->value_array[index].STRING##_val; \ } STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(int32, int32_t); STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(int64, int64_t); STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(float, float); STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE(double, double); #undef STARPU_PERF_COUNTER_SAMPLE_GET_TYPED_VALUE /* -------------------------------------------------------------------- */ /* Performance Steering */ struct perf_knob_array { int size; struct starpu_perf_knob *array; }; static struct perf_knob_array global_knobs = { .size = 0, .array = NULL }; static struct perf_knob_array per_worker_knobs = { .size = 0, .array = NULL }; static struct perf_knob_array per_scheduler_knobs = { .size = 0, .array = NULL }; void _starpu_perf_knob_init(void) { STARPU_ASSERT(!_starpu_machine_is_running()); /* call knob registration routines in each modules */ _starpu__workers_c__register_knobs(); _starpu__task_c__register_knobs(); _starpu__dmda_c__register_knobs(); } void _starpu_perf_knob_exit(void) { STARPU_ASSERT(!_starpu_machine_is_running()); _starpu_perf_knob_unregister_all_scopes(); _starpu__workers_c__unregister_knobs(); _starpu__task_c__unregister_knobs(); _starpu__dmda_c__unregister_knobs(); } /* - */ int starpu_perf_knob_scope_name_to_id(const char * const name) { if (strcmp(name, "global") == 0) return starpu_perf_knob_scope_global; if (strcmp(name, "per_worker") == 0) return starpu_perf_knob_scope_per_worker; if (strcmp(name, "per_scheduler") == 0) return starpu_perf_knob_scope_per_scheduler; return -1; } const char *starpu_perf_knob_scope_id_to_name(const enum starpu_perf_knob_scope scope) { switch (scope) { case starpu_perf_knob_scope_global: return "global"; case starpu_perf_knob_scope_per_worker: return "per_worker"; case starpu_perf_knob_scope_per_scheduler: return "per_scheduler"; default: return NULL; }; } /* - */ int starpu_perf_knob_type_name_to_id(const char * const name) { if (strcmp(name, "int32") == 0) return starpu_perf_knob_type_int32; if (strcmp(name, "int64") == 0) return starpu_perf_knob_type_int64; if (strcmp(name, "float") == 0) return starpu_perf_knob_type_float; if (strcmp(name, "double") == 0) return starpu_perf_knob_type_double; return -1; } const char *starpu_perf_knob_type_id_to_name(const enum starpu_perf_knob_type type) { switch (type) { case starpu_perf_knob_type_int32: return "int32"; case starpu_perf_knob_type_int64: return "int64"; case starpu_perf_knob_type_float: return "float"; case starpu_perf_knob_type_double: return "double"; default: return NULL; }; } static struct perf_knob_array *_get_knobs(const enum starpu_perf_knob_scope scope) { STARPU_ASSERT_PERF_KNOB_SCOPE_DEFINED(scope); switch (scope) { case starpu_perf_knob_scope_global: return &global_knobs; case starpu_perf_knob_scope_per_worker: return &per_worker_knobs; case starpu_perf_knob_scope_per_scheduler: return &per_scheduler_knobs; default: STARPU_ABORT(); }; return NULL; }; /* - */ struct starpu_perf_knob_group *_starpu_perf_knob_group_register(enum starpu_perf_knob_scope scope, void (*set_func)(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value), void (*get_func)(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value)) { STARPU_ASSERT_PERF_KNOB_SCOPE_DEFINED(scope); STARPU_ASSERT(set_func != NULL); STARPU_ASSERT(get_func != NULL); struct starpu_perf_knob_group *new_group; _STARPU_MALLOC(new_group, sizeof(*new_group)); new_group->scope = scope; new_group->set = set_func; new_group->get = get_func; new_group->array_size = 0; new_group->array = NULL; return new_group; } void _starpu_perf_knob_group_unregister(struct starpu_perf_knob_group *group) { STARPU_ASSERT((group->array_size > 0 && group->array != NULL) || (group->array_size = 0 && group->array == NULL)); if (group->array != NULL) { free(group->array); } memset(group, 0, sizeof(*group)); free(group); } /* - */ int _starpu_perf_knob_register(struct starpu_perf_knob_group *group, const char *name, enum starpu_perf_knob_type type, const char *help) { STARPU_ASSERT(!_starpu_machine_is_running()); struct perf_knob_array * const knobs = _get_knobs(group->scope); STARPU_ASSERT_PERF_KNOB_TYPE_DEFINED(type); const int index = knobs->size++; _STARPU_REALLOC(knobs->array, knobs->size * sizeof(*knobs->array)); struct starpu_perf_knob * const new_knob = &knobs->array[index]; const int id = _starpu_perf_knob_id_build(group->scope, index); new_knob->id = id; new_knob->name = name; new_knob->help = help; new_knob->type = type; new_knob->group = group; new_knob->id_in_group = group->array_size++; _STARPU_REALLOC(group->array, group->array_size * sizeof(*group->array)); group->array[new_knob->id_in_group] = new_knob; return id; } static void _unregister_knob_scope(enum starpu_perf_knob_scope scope) { STARPU_ASSERT(!_starpu_machine_is_running()); struct perf_knob_array * const knobs = _get_knobs(scope); free(knobs->array); knobs->array = NULL; knobs->size = 0; } void _starpu_perf_knob_unregister_all_scopes(void) { STARPU_ASSERT(!_starpu_machine_is_running()); _unregister_knob_scope(starpu_perf_knob_scope_global); _unregister_knob_scope(starpu_perf_knob_scope_per_worker); _unregister_knob_scope(starpu_perf_knob_scope_per_scheduler); } /* - */ int starpu_perf_knob_nb(enum starpu_perf_knob_scope scope) { const struct perf_knob_array * const knobs = _get_knobs(scope); return knobs->size; } int starpu_perf_knob_nth_to_id(enum starpu_perf_knob_scope scope, int nth) { return _starpu_perf_knob_id_build(scope, nth); } int starpu_perf_knob_name_to_id(enum starpu_perf_knob_scope scope, const char *name) { const struct perf_knob_array * const knobs = _get_knobs(scope); int index; for (index = 0; index < knobs->size; index++) { if (strcmp(name, knobs->array[index].name) == 0) { return _starpu_perf_knob_id_build(scope, index); } } return -1; } const char *starpu_perf_knob_id_to_name(int id) { const int scope = _starpu_perf_knob_id_get_scope(id); const int index = _starpu_perf_knob_id_get_index(id); const struct perf_knob_array * const knobs = _get_knobs(scope); if (index < 0 || index >= knobs->size) return NULL; return knobs->array[index].name; } const char *starpu_perf_knob_get_help_string(int id) { const int scope = _starpu_perf_knob_id_get_scope(id); const int index = _starpu_perf_knob_id_get_index(id); const struct perf_knob_array * const knobs = _get_knobs(scope); STARPU_ASSERT(index >= 0 && index < knobs->size); return knobs->array[index].help; } int starpu_perf_knob_get_type_id(int id) { const int scope = _starpu_perf_knob_id_get_scope(id); const int index = _starpu_perf_knob_id_get_index(id); const struct perf_knob_array * const knobs = _get_knobs(scope); STARPU_ASSERT(index >= 0 && index < knobs->size); return knobs->array[index].type; } static struct starpu_perf_knob *get_knob(int id) { const int scope = _starpu_perf_knob_id_get_scope(id); struct perf_knob_array *knobs = _get_knobs(scope); const int index = _starpu_perf_knob_id_get_index(id); STARPU_ASSERT(index >= 0 && index < knobs->size); return &knobs->array[index]; } /* - */ void starpu_perf_knob_list_avail(enum starpu_perf_knob_scope scope) { const struct perf_knob_array * const knobs = _get_knobs(scope); int index; for (index = 0; index < knobs->size; index++) { const struct starpu_perf_knob * const knob = &knobs->array[index]; printf("0x%08x:%s [%s] - %s\n", _starpu_perf_knob_id_build(scope, index), knob->name, starpu_perf_knob_type_id_to_name(knob->type), knob->help); } } void starpu_perf_knob_list_all_avail(void) { printf("scope: global\n"); starpu_perf_knob_list_avail(starpu_perf_knob_scope_global); printf("scope: per_worker\n"); starpu_perf_knob_list_avail(starpu_perf_knob_scope_per_worker); printf("scope: per_scheduler\n"); starpu_perf_knob_list_avail(starpu_perf_knob_scope_per_scheduler); } #define __STARPU_PERF_KNOB_SET_TYPED_VALUE(SCOPE_NAME, STRING, TYPE) \ void starpu_perf_knob_set_##SCOPE_NAME##_##STRING##_value(const int knob_id, const TYPE value) \ { \ STARPU_ASSERT(_starpu_perf_knob_id_get_scope(knob_id) == starpu_perf_knob_scope_global); \ const struct starpu_perf_knob * const knob = get_knob(knob_id); \ STARPU_ASSERT(starpu_perf_knob_get_type_id(knob_id) == starpu_perf_knob_type_##STRING); \ const struct starpu_perf_knob_group * const knob_group = knob->group; \ const struct starpu_perf_knob_value kv = { .val_##TYPE = value }; \ knob_group->set(knob, NULL, &kv); \ } __STARPU_PERF_KNOB_SET_TYPED_VALUE(global, int32, int32_t); __STARPU_PERF_KNOB_SET_TYPED_VALUE(global, int64, int64_t); __STARPU_PERF_KNOB_SET_TYPED_VALUE(global, float, float); __STARPU_PERF_KNOB_SET_TYPED_VALUE(global, double, double); #undef __STARPU_PERF_KNOB_SAMPLE_SET_TYPED_VALUE #define __STARPU_PERF_KNOB_GET_TYPED_VALUE(SCOPE_NAME, STRING, TYPE) \ TYPE starpu_perf_knob_get_##SCOPE_NAME##_##STRING##_value(const int knob_id) \ { \ STARPU_ASSERT(_starpu_perf_knob_id_get_scope(knob_id) == starpu_perf_knob_scope_global); \ const struct starpu_perf_knob * const knob = get_knob(knob_id); \ STARPU_ASSERT(starpu_perf_knob_get_type_id(knob_id) == starpu_perf_knob_type_##STRING); \ const struct starpu_perf_knob_group * const knob_group = knob->group; \ struct starpu_perf_knob_value kv; \ knob_group->get(knob, NULL, &kv); \ return kv.val_##TYPE; \ } __STARPU_PERF_KNOB_GET_TYPED_VALUE(global, int32, int32_t); __STARPU_PERF_KNOB_GET_TYPED_VALUE(global, int64, int64_t); __STARPU_PERF_KNOB_GET_TYPED_VALUE(global, float, float); __STARPU_PERF_KNOB_GET_TYPED_VALUE(global, double, double); #undef __STARPU_PERF_KNOB_SAMPLE_GET_TYPED_VALUE #define __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(SCOPE_NAME, STRING, TYPE, CONTEXT_TYPE, CONTEXT_VAR) \ void starpu_perf_knob_set_##SCOPE_NAME##_##STRING##_value(const int knob_id, CONTEXT_TYPE CONTEXT_VAR, const TYPE value) \ { \ STARPU_ASSERT(_starpu_perf_knob_id_get_scope(knob_id) == starpu_perf_knob_scope_##SCOPE_NAME); \ const struct starpu_perf_knob * const knob = get_knob(knob_id); \ STARPU_ASSERT(starpu_perf_knob_get_type_id(knob_id) == starpu_perf_knob_type_##STRING); \ const struct starpu_perf_knob_group * const knob_group = knob->group; \ const struct starpu_perf_knob_value kv = { .val_##TYPE = value }; \ knob_group->set(knob, &CONTEXT_VAR, &kv); \ } __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_worker, int32, int32_t, unsigned, workerid); __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_worker, int64, int64_t, unsigned, workerid); __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_worker, float, float, unsigned, workerid); __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_worker, double, double, unsigned, workerid); __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, int32, int32_t, const char *, sched_policy_name); __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, int64, int64_t, const char *, sched_policy_name); __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, float, float, const char *, sched_policy_name); __STARPU_PERF_KNOB_SET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, double, double, const char *, sched_policy_name); #undef __STARPU_PERF_KNOB_SAMPLE_SET_TYPED_VALUE_WITH_CONTEXT #define __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(SCOPE_NAME, STRING, TYPE, CONTEXT_TYPE, CONTEXT_VAR) \ TYPE starpu_perf_knob_get_##SCOPE_NAME##_##STRING##_value(const int knob_id, CONTEXT_TYPE CONTEXT_VAR) \ { \ STARPU_ASSERT(_starpu_perf_knob_id_get_scope(knob_id) == starpu_perf_knob_scope_##SCOPE_NAME); \ const struct starpu_perf_knob * const knob = get_knob(knob_id); \ STARPU_ASSERT(starpu_perf_knob_get_type_id(knob_id) == starpu_perf_knob_type_##STRING); \ const struct starpu_perf_knob_group * const knob_group = knob->group; \ struct starpu_perf_knob_value kv; \ knob_group->get(knob, &CONTEXT_VAR, &kv); \ return kv.val_##TYPE; \ } __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_worker, int32, int32_t, unsigned, workerid); __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_worker, int64, int64_t, unsigned, workerid); __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_worker, float, float, unsigned, workerid); __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_worker, double, double, unsigned, workerid); __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, int32, int32_t, const char *, sched_policy_name); __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, int64, int64_t, const char *, sched_policy_name); __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, float, float, const char *, sched_policy_name); __STARPU_PERF_KNOB_GET_TYPED_VALUE_WITH_CONTEXT(per_scheduler, double, double, const char *, sched_policy_name); #undef __STARPU_PERF_KNOB_SAMPLE_GET_TYPED_VALUE_WITH_CONTEXT starpu-1.4.9+dfsg/src/common/knobs.h000066400000000000000000000326771507764646700173730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Performance counters and configurable knobs */ #ifndef __KNOBS_H__ #define __KNOBS_H__ /** @file */ #include #include #include #pragma GCC visibility push(hidden) /** Performance Monitoring */ #define STARPU_ASSERT_PERF_COUNTER_SCOPE_DEFINED(t) STARPU_ASSERT( \ (t == starpu_perf_counter_scope_global) \ || (t == starpu_perf_counter_scope_per_worker) \ || (t == starpu_perf_counter_scope_per_codelet) \ ) #define STARPU_ASSERT_PERF_COUNTER_TYPE_DEFINED(t) STARPU_ASSERT( \ (t == starpu_perf_counter_type_int32) \ || (t == starpu_perf_counter_type_int64) \ || (t == starpu_perf_counter_type_float) \ || (t == starpu_perf_counter_type_double) \ ) #define _STARPU_PERF_COUNTER_ID_SCOPE_BITS 4 #if defined(STARPU_VAL_COMPARE_AND_SWAP64) && defined (STARPU_ATOMIC_ADD64) #define STARPU_PERF_COUNTER_64 #endif struct starpu_perf_counter_sample; struct _starpu_worker; #define __STARPU_PERF_COUNTER_UPDATE_32BIT(OPNAME,OP,TYPENAME,TYPE) \ static inline void _starpu_perf_counter_update_##OPNAME##_##TYPENAME(TYPE *ptr, TYPE value) \ { \ STARPU_ASSERT(sizeof(TYPE) == sizeof(uint32_t)); \ typedef uint32_t __attribute__((__may_alias__)) alias_uint32_t; \ typedef TYPE __attribute__((__may_alias__)) alias_##TYPE; \ \ uint32_t raw_old = *(uint32_t *)ptr; \ \ while(value OP *(alias_##TYPE*)&raw_old) \ { \ uint32_t raw_old_check = STARPU_VAL_COMPARE_AND_SWAP32((uint32_t *)ptr, raw_old, *(alias_uint32_t*)&value); \ if (raw_old_check == raw_old) \ break; \ raw_old = raw_old_check; \ } \ } #ifdef STARPU_PERF_COUNTER_64 typedef int64_t starpu_perf_counter_int64_t; typedef double starpu_perf_counter_double; #define __STARPU_PERF_COUNTER_UPDATE_64BIT(OPNAME,OP,TYPENAME,TYPE) \ static inline void _starpu_perf_counter_update_##OPNAME##_##TYPENAME(TYPE *ptr, TYPE value) \ { \ STARPU_ASSERT(sizeof(TYPE) == sizeof(uint64_t)); \ typedef uint64_t __attribute__((__may_alias__)) alias_uint64_t; \ typedef TYPE __attribute__((__may_alias__)) alias_##TYPE; \ \ uint64_t raw_old = *(uint64_t *)ptr; \ \ while(value OP *(alias_##TYPE*)&raw_old) \ { \ uint64_t raw_old_check = STARPU_VAL_COMPARE_AND_SWAP64((uint64_t *)ptr, raw_old, *(alias_uint64_t*)&value); \ if (raw_old_check == raw_old) \ break; \ raw_old = raw_old_check; \ } \ } #else /* No native 64bit atomic operation, revert to lower precision */ typedef int32_t starpu_perf_counter_int64_t; typedef float starpu_perf_counter_double; #define __STARPU_PERF_COUNTER_UPDATE_64BIT(OPNAME,OP,TYPENAME,TYPE) \ __STARPU_PERF_COUNTER_UPDATE_32BIT(OPNAME,OP,TYPENAME,TYPE) #endif /* Atomic max */ __STARPU_PERF_COUNTER_UPDATE_32BIT(max,>=,int32,int32_t); __STARPU_PERF_COUNTER_UPDATE_32BIT(max,>=,float,float); __STARPU_PERF_COUNTER_UPDATE_64BIT(max,>=,int64,starpu_perf_counter_int64_t); __STARPU_PERF_COUNTER_UPDATE_64BIT(max,>=,double,starpu_perf_counter_double); /* Atomic min */ __STARPU_PERF_COUNTER_UPDATE_32BIT(min,<=,int32,int32_t); __STARPU_PERF_COUNTER_UPDATE_32BIT(min,<=,float,float); __STARPU_PERF_COUNTER_UPDATE_64BIT(min,<=,int64,starpu_perf_counter_int64_t); __STARPU_PERF_COUNTER_UPDATE_64BIT(min,<=,double,starpu_perf_counter_double); #undef __STARPU_PERF_COUNTER_UPDATE_32BIT #undef __STARPU_PERF_COUNTER_UPDATE_64BIT /** Floating point atomic accumulate */ #define __STARPU_PERF_COUNTER_UPDATE_ACC_FLOAT(TYPENAME, TYPE) \ static inline void _starpu_perf_counter_update_acc_##TYPENAME(TYPE *ptr, TYPE acc_value) \ { \ STARPU_ASSERT(sizeof(TYPE) == sizeof(uint32_t)); \ typedef uint32_t __attribute__((__may_alias__)) alias_uint32_t; \ typedef TYPE __attribute__((__may_alias__)) alias_float; \ uint32_t raw_old = *(uint32_t *)ptr; \ while (1) \ { \ TYPE value = acc_value + *(alias_float*)&raw_old; \ uint32_t raw_old_check = STARPU_VAL_COMPARE_AND_SWAP32((uint32_t *)ptr, raw_old, *(alias_uint32_t*)&value); \ if (raw_old_check == raw_old) \ break; \ raw_old = raw_old_check; \ } \ } __STARPU_PERF_COUNTER_UPDATE_ACC_FLOAT(float, float); #ifdef STARPU_PERF_COUNTER_64 static inline void _starpu_perf_counter_update_acc_double(double *ptr, double acc_value) { STARPU_ASSERT(sizeof(double) == sizeof(uint64_t)); typedef uint64_t __attribute__((__may_alias__)) alias_uint64_t; typedef double __attribute__((__may_alias__)) alias_double; uint64_t raw_old = *(uint64_t *)ptr; while (1) { double value = acc_value + *(alias_double*)&raw_old; uint64_t raw_old_check = STARPU_VAL_COMPARE_AND_SWAP64((uint64_t *)ptr, raw_old, *(alias_uint64_t*)&value); if (raw_old_check == raw_old) break; raw_old = raw_old_check; } } #else __STARPU_PERF_COUNTER_UPDATE_ACC_FLOAT(double, starpu_perf_counter_double); #endif #ifdef STARPU_ATOMIC_ADD64 #define STARPU_PERF_COUNTER_ADD64(ptr, val) STARPU_ATOMIC_ADD64((ptr), (val)) #else #define STARPU_PERF_COUNTER_ADD64(ptr, val) STARPU_ATOMIC_ADD((ptr), (val)) #endif struct starpu_perf_counter { int id; const char *name; const char *help; enum starpu_perf_counter_type type; }; struct starpu_perf_counter_set { enum starpu_perf_counter_scope scope; int size; int *index_array; }; union starpu_perf_counter_value { int32_t int32_val; starpu_perf_counter_int64_t int64_val; float float_val; starpu_perf_counter_double double_val; }; struct starpu_perf_counter_listener { struct starpu_perf_counter_set *set; void (*callback)(struct starpu_perf_counter_listener *listener, struct starpu_perf_counter_sample *sample, void *context); void *user_arg; }; struct starpu_perf_counter_sample { enum starpu_perf_counter_scope scope; struct starpu_perf_counter_listener *listener; union starpu_perf_counter_value *value_array; struct _starpu_spinlock lock; }; struct starpu_perf_counter_sample_cl_values { struct { starpu_perf_counter_int64_t total_submitted; starpu_perf_counter_int64_t peak_submitted; starpu_perf_counter_int64_t current_submitted; starpu_perf_counter_int64_t peak_ready; starpu_perf_counter_int64_t current_ready; starpu_perf_counter_int64_t total_executed; starpu_perf_counter_double cumul_execution_time; } task; }; typedef void (*starpu_perf_counter_sample_updater)(struct starpu_perf_counter_sample *sample, void *context); static inline enum starpu_perf_counter_scope _starpu_perf_counter_id_get_scope(const int counter_id) { STARPU_ASSERT(counter_id >= 0); return counter_id & ((1 << _STARPU_PERF_COUNTER_ID_SCOPE_BITS) - 1); } static inline int _starpu_perf_counter_id_get_index(const int counter_id) { STARPU_ASSERT(counter_id >= 0); return counter_id >> _STARPU_PERF_COUNTER_ID_SCOPE_BITS; } static inline int _starpu_perf_counter_id_build(const enum starpu_perf_counter_scope scope, const int index) { STARPU_ASSERT_PERF_COUNTER_SCOPE_DEFINED(scope); STARPU_ASSERT(index >= 0); return (index << _STARPU_PERF_COUNTER_ID_SCOPE_BITS) | scope; } void _starpu_perf_counter_sample_init(struct starpu_perf_counter_sample *sample, enum starpu_perf_counter_scope scope); void _starpu_perf_counter_sample_exit(struct starpu_perf_counter_sample *sample); void _starpu_perf_counter_init(struct _starpu_machine_config *pconfig); void _starpu_perf_counter_exit(void); int _starpu_perf_counter_register(enum starpu_perf_counter_scope scope, const char *name, enum starpu_perf_counter_type type, const char *help); void _starpu_perf_counter_unregister_all_scopes(void); void _starpu_perf_counter_register_updater(enum starpu_perf_counter_scope scope, void (*updater)(struct starpu_perf_counter_sample *sample, void *context)); void _starpu_perf_counter_update_global_sample(void); void _starpu_perf_counter_update_per_worker_sample(unsigned workerid); void _starpu_perf_counter_update_per_codelet_sample(struct starpu_codelet *cl); #define __STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(STRING, TYPE) \ static inline void _starpu_perf_counter_sample_set_##STRING##_value(struct starpu_perf_counter_sample *sample, const int counter_id, const TYPE value) \ { \ STARPU_ASSERT(starpu_perf_counter_get_type_id(counter_id) == starpu_perf_counter_type_##STRING); \ STARPU_ASSERT(sample->listener != NULL && sample->listener->set != NULL); \ STARPU_ASSERT(_starpu_perf_counter_id_get_scope(counter_id) == sample->listener->set->scope); \ \ const struct starpu_perf_counter_set * const set = sample->listener->set; \ const int index = _starpu_perf_counter_id_get_index(counter_id); \ STARPU_ASSERT(index < set->size); \ if (set->index_array[index] > 0) \ { \ sample->value_array[index].STRING##_val = value; \ } \ } __STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(int32, int32_t); __STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(int64, starpu_perf_counter_int64_t); __STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(float, float); __STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE(double, starpu_perf_counter_double); #undef __STARPU_PERF_COUNTER_SAMPLE_SET_TYPED_VALUE #define __STARPU_PERF_COUNTER_REG(PREFIX, SCOPE, CTR, TYPESTRING, HELP) \ do \ { \ __##CTR = _starpu_perf_counter_register(SCOPE, \ PREFIX "." #CTR, starpu_perf_counter_type_ ## TYPESTRING, \ HELP); \ } \ while (0) /* global counter variables */ extern starpu_perf_counter_int64_t _starpu_task__g_total_submitted__value; extern starpu_perf_counter_int64_t _starpu_task__g_peak_submitted__value; extern starpu_perf_counter_int64_t _starpu_task__g_current_submitted__value; extern starpu_perf_counter_int64_t _starpu_task__g_peak_ready__value; extern starpu_perf_counter_int64_t _starpu_task__g_current_ready__value; /* performance counter registration routines per modules */ void _starpu__task_c__register_counters(void); /* module: task.c */ /* -------------------------------------------------------------------- */ /* Performance Steering */ #define STARPU_ASSERT_PERF_KNOB_SCOPE_DEFINED(t) STARPU_ASSERT( \ (t == starpu_perf_knob_scope_global) \ || (t == starpu_perf_knob_scope_per_worker) \ || (t == starpu_perf_knob_scope_per_scheduler) \ ) #define STARPU_ASSERT_PERF_KNOB_TYPE_DEFINED(t) STARPU_ASSERT( \ (t == starpu_perf_knob_type_int32) \ || (t == starpu_perf_knob_type_int64) \ || (t == starpu_perf_knob_type_float) \ || (t == starpu_perf_knob_type_double) \ ) #define _STARPU_PERF_KNOBS_ID_SCOPE_BITS 4 struct starpu_perf_knob; struct starpu_perf_knob_value { enum starpu_perf_knob_type type; union { int32_t val_int32_t; starpu_perf_counter_int64_t val_int64_t; float val_float; starpu_perf_counter_double val_double; }; }; struct starpu_perf_knob_group { enum starpu_perf_knob_scope scope; void (*set)(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value); void (*get)(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value); int array_size; struct starpu_perf_knob **array; }; struct starpu_perf_knob { int id; int id_in_group; const char *name; const char *help; enum starpu_perf_knob_type type; struct starpu_perf_knob_group *group; }; #define __STARPU_PERF_KNOB_REG(PREFIX, SCOPE, CTR, TYPESTRING, HELP) \ do \ { \ __##CTR = _starpu_perf_knob_register(SCOPE, \ PREFIX "." #CTR, starpu_perf_knob_type_ ## TYPESTRING, \ HELP); \ } \ while (0) static inline int _starpu_perf_knob_id_get_scope(const int knob_id) { STARPU_ASSERT(knob_id >= 0); return knob_id & ((1 << _STARPU_PERF_KNOBS_ID_SCOPE_BITS) - 1); } static inline int _starpu_perf_knob_id_get_index(const int knob_id) { STARPU_ASSERT(knob_id >= 0); return knob_id >> _STARPU_PERF_KNOBS_ID_SCOPE_BITS; } static inline int _starpu_perf_knob_id_build(const enum starpu_perf_knob_scope scope, const int index) { STARPU_ASSERT_PERF_KNOB_SCOPE_DEFINED(scope); STARPU_ASSERT(index >= 0); return (index << _STARPU_PERF_KNOBS_ID_SCOPE_BITS) | scope; } void _starpu_perf_knob_init(void); void _starpu_perf_knob_exit(void); struct starpu_perf_knob_group *_starpu_perf_knob_group_register(enum starpu_perf_knob_scope scope, void (*set_func)(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value), void (*get_func)(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value)); void _starpu_perf_knob_group_unregister(struct starpu_perf_knob_group *group); int _starpu_perf_knob_register(struct starpu_perf_knob_group *group, const char *name, enum starpu_perf_knob_type type, const char *help); void _starpu_perf_knob_unregister_all_scopes(void); /* performance knob registration routines per modules */ void _starpu__workers_c__register_knobs(void); /* module: workers.c */ void _starpu__task_c__register_knobs(void); /* module: task.c */ void _starpu__dmda_c__register_knobs(void); /* module: dmda.c */ void _starpu__workers_c__unregister_knobs(void); /* module: workers.c */ void _starpu__task_c__unregister_knobs(void); /* module: task.c */ void _starpu__dmda_c__unregister_knobs(void); /* module: dmda.c */ #pragma GCC visibility pop #endif // __KNOBS_H__ starpu-1.4.9+dfsg/src/common/list.h000066400000000000000000000443631507764646700172250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __LIST_H__ #define __LIST_H__ /** @file */ #include /** @remarks list how-to * ********************************************************* * LIST_TYPE(FOO, content); * * - declares the following types: * * + for cells : struct FOO * + for lists : struct FOO_list * + for iterators : struct FOO * * - declares the following inlines (all O(1) except stated otherwise, n is the number of elements) : * * * Create a cell * struct FOO* FOO_new(void); * * * Suppress a cell * void FOO_delete(struct FOO*); * * * Create a list (initially empty) * struct FOO_list* FOO_list_new(void); * * * Initializes a list (initially empty) * void FOO_list_init(struct FOO_list*); * * * Initializes a list (initially empty), assuming that the content of FOO_list was already zeroed * void FOO_list_init0(struct FOO_list*); * * * Suppresses a liste * void FOO_list_delete(struct FOO_list*); * * * Check whether a list is empty * int FOO_list_empty(struct FOO_list*); * * * Remove a given cell from the list * void FOO_list_erase(struct FOO_list*, struct FOO*); * * * Add a cell at the back of the list * void FOO_list_push_back(struct FOO_list*, struct FOO*); * * * Add a cell at the front of the list * void FOO_list_push_front(struct FOO_list*, struct FOO*); * * * Add a cell before a given cell of a list * void FOO_list_insert_before(struct FOO_list*, struct FOO*new, struct FOO*); * * * Add a cell after a given cell of a list * void FOO_list_insert_after(struct FOO_list*, struct FOO*new, struct FOO*); * * * Append the second list at the end of the first list * struct FOO* FOO_list_push_list_back(struct FOO_list*, struct FOO_list*); * * * Prepend the first list at the beginning of the second list * struct FOO* FOO_list_push_list_front(struct FOO_list*, struct FOO_list*); * * * Return and remove the node at the back of the list * struct FOO* FOO_list_pop_back(struct FOO_list*); * * * Return and remove the node at the front of the list * struct FOO* FOO_list_pop_front(struct FOO_list*); * * * Return the node at the back of the list * struct FOO* FOO_list_back(struct FOO_list*); * * * Return the node at the front of the list * struct FOO* FOO_list_front(struct FOO_list*); * * * Check that the list chaining is coherent (O(n)) * int FOO_list_check(struct FOO_list*); * * * Return the first cell of the list (from the front) * struct FOO* FOO_list_begin(struct FOO_list*); * * * Return the value to be tested at the end of the list (at the back) * struct FOO* FOO_list_end(struct FOO_list*); * * * Return the next element of the list (from the front) * struct FOO* FOO_list_next(struct FOO*) * * * Return the last element of the list (from the back) * struct FOO* FOO_list_last(struct FOO_list*); * * * Return the value to be tested at the beginning of the list (at the front) * struct FOO* FOO_list_alpha(struct FOO_list*); * * * Return the previous element of the list (from the back) * struct FOO* FOO_list_prev(struct FOO*) * * * Return the size of the list in O(n) * int FOO_list_size(struct FOO_list*) * * * Return the position of the cell in the list (indexed from 0) (O(n) on average) * int FOO_list_member(struct FOO_list*, struct FOO*) * * * Test whether the cell is in the list (O(n) on average) * int FOO_list_ismember(struct FOO_list*, struct FOO*) * * ********************************************************* * Usage example: * - initially you'd have: * struct my_struct * { * int a; * int b; * }; * - to make a list of it, we replace the declaration above with: * LIST_TYPE(my_struct, * int a; * int b; * ); * which creates the struct my_struct and struct my_struct_list types. * * - setting up an empty list: * struct my_struct_list l; * my_struct_list_init(&l); * * - allocating an empty list: * struct my_struct_list * l = my_struct_list_new(); * - add a cell 'e' at the front of list 'l': * struct my_struct * e = my_struct_new(); * e->a = 0; * e->b = 0; * my_struct_list_push_front(&l, e); * * - iterating over a list from the front: * struct my_struct * i; * for(i = my_struct_list_begin(&l); * i != my_struct_list_end(&l); * i = my_struct_list_next(i)) * { * printf("a=%d; b=%d\n", i->a, i->b); * } * * - iterating over a list from the back: * struct my_struct * i; * for(i = my_struct_list_last(&l); * i != my_struct_list_alpha(&l); * i = my_struct_list_prev(i)) * { * printf("a=%d; b=%d\n", i->a, i->b); * } * ********************************************************* */ #ifndef LIST_INLINE #define LIST_INLINE static inline #endif /**@hideinitializer * Generates a new type for list of elements */ #define LIST_TYPE(ENAME, DECL) \ LIST_CREATE_TYPE(ENAME, DECL) #define LIST_CREATE_TYPE(ENAME, DECL) \ /** from automatic type: struct ENAME */ \ struct ENAME \ { \ struct ENAME *_prev; /**< @internal previous cell */ \ struct ENAME *_next; /**< @internal next cell */ \ DECL \ }; \ LIST_CREATE_TYPE_NOSTRUCT(ENAME, _prev, _next) /**@hideinitializer * The effective type declaration for lists */ #define LIST_CREATE_TYPE_NOSTRUCT(ENAME, _prev, _next) \ /** @internal */ \ /* NOTE: this must not be greater than the struct defined in include/starpu_task_list.h */ \ struct ENAME##_list \ { \ struct ENAME *_head; /**< @internal head of the list */ \ struct ENAME *_tail; /**< @internal tail of the list */ \ }; \ /** @internal */LIST_INLINE struct ENAME *ENAME##_new(void) \ { struct ENAME *e; _STARPU_MALLOC(e, sizeof(struct ENAME)); \ e->_next = NULL; e->_prev = NULL; return e; } \ /** @internal */LIST_INLINE void ENAME##_delete(struct ENAME *e) \ { free(e); } \ /** @internal */LIST_INLINE void ENAME##_list_push_front(struct ENAME##_list *l, struct ENAME *e) \ { if(l->_tail == NULL) l->_tail = e; else l->_head->_prev = e; \ e->_prev = NULL; e->_next = l->_head; l->_head = e; } \ /** @internal */LIST_INLINE void ENAME##_list_push_back(struct ENAME##_list *l, struct ENAME *e) \ { if(l->_head == NULL) l->_head = e; else l->_tail->_next = e; \ e->_next = NULL; e->_prev = l->_tail; l->_tail = e; } \ /** @internal */LIST_INLINE void ENAME##_list_insert_before(struct ENAME##_list *l, struct ENAME *e, struct ENAME *o) \ { struct ENAME *p = o->_prev; if (p) { p->_next = e; e->_prev = p; } else { l->_head = e; e->_prev = NULL; } \ e->_next = o; o->_prev = e; } \ /** @internal */LIST_INLINE void ENAME##_list_insert_after(struct ENAME##_list *l, struct ENAME *e, struct ENAME *o) \ { struct ENAME *n = o->_next; if (n) { n->_prev = e; e->_next = n; } else { l->_tail = e; e->_next = NULL; } \ e->_prev = o; o->_next = e; } \ /** @internal */LIST_INLINE void ENAME##_list_push_list_front(struct ENAME##_list *l1, struct ENAME##_list *l2) \ { if (l2->_head == NULL) { l2->_head = l1->_head; l2->_tail = l1->_tail; } \ else if (l1->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l2->_head = l1->_head; } } \ /** @internal */LIST_INLINE void ENAME##_list_push_list_back(struct ENAME##_list *l1, struct ENAME##_list *l2) \ { if(l1->_head == NULL) { l1->_head = l2->_head; l1->_tail = l2->_tail; } \ else if (l2->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l1->_tail = l2->_tail; } } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_front(const struct ENAME##_list *l) \ { return l->_head; } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_back(const struct ENAME##_list *l) \ { return l->_tail; } \ /** @internal */LIST_INLINE void ENAME##_list_init(struct ENAME##_list *l) \ { l->_head=NULL; l->_tail=NULL; } \ /** @internal */LIST_INLINE void ENAME##_list_init0(struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \ { } \ /** @internal */LIST_INLINE struct ENAME##_list *ENAME##_list_new(void) \ { struct ENAME##_list *l; _STARPU_MALLOC(l, sizeof(struct ENAME##_list)); \ ENAME##_list_init(l); return l; } \ /** @internal */LIST_INLINE int ENAME##_list_empty(const struct ENAME##_list *l) \ { return (l->_head == NULL); } \ /** @internal */LIST_INLINE void ENAME##_list_delete(struct ENAME##_list *l) \ { free(l); } \ /** @internal */LIST_INLINE void ENAME##_list_erase(struct ENAME##_list *l, struct ENAME *c) \ { struct ENAME *p = c->_prev; if(p) p->_next = c->_next; else l->_head = c->_next; \ if(c->_next) c->_next->_prev = p; else l->_tail = p; } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_pop_front(struct ENAME##_list *l) \ { struct ENAME *e = ENAME##_list_front(l); \ ENAME##_list_erase(l, e); return e; } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_pop_back(struct ENAME##_list *l) \ { struct ENAME *e = ENAME##_list_back(l); \ ENAME##_list_erase(l, e); return e; } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_begin(const struct ENAME##_list *l) \ { return l->_head; } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_end(const struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \ { return NULL; } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_next(const struct ENAME *i) \ { return i->_next; } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_last(const struct ENAME##_list *l) \ { return l->_tail; } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_alpha(const struct ENAME##_list *l STARPU_ATTRIBUTE_UNUSED) \ { return NULL; } \ /** @internal */LIST_INLINE struct ENAME *ENAME##_list_prev(const struct ENAME *i) \ { return i->_prev; } \ /** @internal */LIST_INLINE int ENAME##_list_ismember(const struct ENAME##_list *l, const struct ENAME *e) \ { struct ENAME *i=l->_head; while(i!=NULL){ if (i == e) return 1; i=i->_next; } return 0; } \ /** @internal */LIST_INLINE int ENAME##_list_member(const struct ENAME##_list *l, const struct ENAME *e) \ { struct ENAME *i=l->_head; int k=0; while(i!=NULL){if (i == e) return k; k++; i=i->_next; } return -1; } \ /** @internal */LIST_INLINE int ENAME##_list_size(const struct ENAME##_list *l) \ { struct ENAME *i=l->_head; int k=0; while(i!=NULL){k++;i=i->_next;} return k; } \ /** @internal */LIST_INLINE int ENAME##_list_check(const struct ENAME##_list *l) \ { struct ENAME *i=l->_head; while(i) \ { if ((i->_next == NULL) && i != l->_tail) return 0; \ if (i->_next == i) return 0; \ i=i->_next;} return 1; } \ /** @internal */LIST_INLINE void ENAME##_list_move(struct ENAME##_list *ldst, struct ENAME##_list *lsrc) \ { ENAME##_list_init(ldst); ldst->_head = lsrc->_head; ldst->_tail = lsrc->_tail; lsrc->_head = NULL; lsrc->_tail = NULL; } #ifdef STARPU_DEBUG #define STARPU_ASSERT_MULTILIST(expr) STARPU_ASSERT(expr) #else #define STARPU_ASSERT_MULTILIST(expr) ((void) 0) #endif /* * This is an implementation of list allowing to be member of several lists. * - One should first call MULTILIST_CREATE_TYPE for the ENAME and for each * MEMBER type * - Then the main element type should include fields of type * ENAME_multilist_MEMBER * - Then one should call MULTILIST_CREATE_INLINES to create the inlines which * manipulate lists for this MEMBER type. * * ********************************************************* * Usage example: * * - initially you'd have: * struct my_struct * { * int a; * int b; * }; * * - to make multilists of it, we add MULTILIST_CREATE_TYPE calls before, the * multilist fields, and MULTILIST_CREATE_INLINES calls after:: * * MULTILIST_CREATE_TYPE(my_struct, foo); * MULTILIST_CREATE_TYPE(my_struct, bar); * * struct my_struct * { * struct my_struct_multilist_foo foo; * struct my_struct_multilist_bar bar; * int a; * int b; * }; * * MULTILIST_CREATE_INLINES(struct my_struct, my_struct, foo); * MULTILIST_CREATE_INLINES(struct my_struct, my_struct, bar); * * - creating a new element and initialize the multilist fields: * * struct my_struct *e = malloc(sizeof(*e)); * my_struct_multilist_init_foo(e); * my_struct_multilist_init_bar(e); * e->a = 0; * e->b = 0; * * - setting up an empty list: * * struct my_struct_multilist_foo l; * my_struct_multilist_head_init_foo(&l); * * - add element 'e' at the front of list 'l': * my_struct_multilist_push_front_foo(&l, e); * * - TODO implementation: popping from the front: * struct my_struct *i; * i = my_struct_multilist_front_foo(&l); * * - iterating over a list from the front: * struct my_struct *i; * for(i = my_struct_multilist_begin_foo(&l); * i != my_struct_multilist_end_foo(&l); * i = my_struct_multilist_next_foo(i)) * { * printf("a=%d; b=%d\n", i->a, i->b); * } */ /* Create the ENAME_multilist_MEMBER, to be used both as head and as member of main element type */ #define MULTILIST_CREATE_TYPE(ENAME, MEMBER) \ struct ENAME##_multilist_##MEMBER { \ struct ENAME##_multilist_##MEMBER *next; \ struct ENAME##_multilist_##MEMBER *prev; \ }; /* Create the inlines */ #define MULTILIST_CREATE_INLINES(TYPE, ENAME, MEMBER) \ /* Cast from list element to real type. */ \ LIST_INLINE TYPE *ENAME##_of_multilist_##MEMBER(struct ENAME##_multilist_##MEMBER *elt) { \ return ((TYPE *) ((uintptr_t) (elt) - ((uintptr_t) (&((TYPE *) 0)->MEMBER)))); \ } \ \ /* Initialize a list head. */ \ LIST_INLINE void ENAME##_multilist_head_init_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ head->next = head; \ head->prev = head; \ } \ \ /* Initialize a list element. */ \ LIST_INLINE void ENAME##_multilist_init_##MEMBER(TYPE *e) { \ (e)->MEMBER.next = NULL; \ (e)->MEMBER.prev = NULL; \ } \ \ /* Push element to head of a list. */ \ LIST_INLINE void ENAME##_multilist_push_front_##MEMBER(struct ENAME##_multilist_##MEMBER *head, TYPE *e) { \ STARPU_ASSERT_MULTILIST(e->MEMBER.prev == NULL); \ STARPU_ASSERT_MULTILIST(e->MEMBER.next == NULL); \ e->MEMBER.next = head->next; \ e->MEMBER.prev = head; \ head->next->prev = &e->MEMBER; \ head->next = &e->MEMBER; \ } \ \ /* Push element to tail of a list. */ \ LIST_INLINE void ENAME##_multilist_push_back_##MEMBER(struct ENAME##_multilist_##MEMBER *head, TYPE *e) { \ STARPU_ASSERT_MULTILIST(e->MEMBER.prev == NULL); \ STARPU_ASSERT_MULTILIST(e->MEMBER.next == NULL); \ e->MEMBER.prev = head->prev; \ e->MEMBER.next = head; \ head->prev->next = &e->MEMBER; \ head->prev = &e->MEMBER; \ } \ \ /* Erase element from a list. */ \ LIST_INLINE void ENAME##_multilist_erase_##MEMBER(struct ENAME##_multilist_##MEMBER *head STARPU_ATTRIBUTE_UNUSED, TYPE *e) { \ STARPU_ASSERT_MULTILIST(e->MEMBER.next->prev == &e->MEMBER); \ e->MEMBER.next->prev = e->MEMBER.prev; \ STARPU_ASSERT_MULTILIST(e->MEMBER.prev->next == &e->MEMBER); \ e->MEMBER.prev->next = e->MEMBER.next; \ e->MEMBER.next = NULL; \ e->MEMBER.prev = NULL; \ } \ \ /* Test whether the element was queued on the list. */ \ LIST_INLINE int ENAME##_multilist_queued_##MEMBER(TYPE *e) { \ return ((e)->MEMBER.next != NULL); \ } \ \ /* Test whether the list is empty. */ \ LIST_INLINE int ENAME##_multilist_empty_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ return head->next == head; \ } \ \ /* Test whether the element is alone in a list. */ \ LIST_INLINE int ENAME##_multilist_alone_##MEMBER(TYPE *e) { \ return (e)->MEMBER.next == (e)->MEMBER.prev; \ } \ \ /* Return the first element of the list. */ \ LIST_INLINE TYPE *ENAME##_multilist_begin_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ return ENAME##_of_multilist_##MEMBER(head->next); \ } \ /* Return the value to be tested at the end of the list. */ \ LIST_INLINE TYPE *ENAME##_multilist_end_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ return ENAME##_of_multilist_##MEMBER(head); \ } \ /* Return the next element of the list. */ \ LIST_INLINE TYPE *ENAME##_multilist_next_##MEMBER(TYPE *e) { \ return ENAME##_of_multilist_##MEMBER(e->MEMBER.next); \ } \ /* Return the first element of the list. */ \ LIST_INLINE TYPE *ENAME##_multilist_front_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ return ENAME##_of_multilist_##MEMBER(head->next); \ } \ /* Return the last element of the list. */ \ LIST_INLINE TYPE *ENAME##_multilist_back_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ return ENAME##_of_multilist_##MEMBER(head->prev); \ } \ \ /* Return the first element of the list and erase it. */ \ LIST_INLINE TYPE *ENAME##_multilist_pop_front_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ TYPE *e = ENAME##_multilist_front_##MEMBER(head); \ ENAME##_multilist_erase_##MEMBER(head, e); return e; \ } \ /* Return the last element of the list and erase it. */ \ LIST_INLINE TYPE *ENAME##_multilist_pop_back_##MEMBER(struct ENAME##_multilist_##MEMBER *head) { \ TYPE *e = ENAME##_multilist_back_##MEMBER(head); \ ENAME##_multilist_erase_##MEMBER(head, e); return e; \ } \ \ \ /* Move a list from its head to another head. Passing newhead == NULL allows to detach the list from any head. */ \ LIST_INLINE void ENAME##_multilist_move_##MEMBER(struct ENAME##_multilist_##MEMBER *head, struct ENAME##_multilist_##MEMBER *newhead) { \ if (ENAME##_multilist_empty_##MEMBER(head)) \ ENAME##_multilist_head_init_##MEMBER(newhead); \ else { \ if (newhead) { \ newhead->next = head->next; \ newhead->next->prev = newhead; \ } else { \ head->next->prev = head->prev; \ } \ if (newhead) { \ newhead->prev = head->prev; \ newhead->prev->next = newhead; \ } else { \ head->prev->next = head->next; \ } \ head->next = head; \ head->prev = head; \ } \ } #endif /* __LIST_H__ */ starpu-1.4.9+dfsg/src/common/prio_list.h000066400000000000000000000615151507764646700202540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /** @file */ /* * This implements list with priorities (as an int), by using two stages: * - an RB tree stage sorted by priority, whose leaves are... * - ... double-linked lists sorted by insertion order. * * We always keep the 0-priority list allocated, to avoid keeping * allocating/deallocating it when all priorities are 0. * * We maintain an "empty" flag, to allow lockless FOO_prio_list_empty call. * * PRIO_LIST_TYPE(FOO, priority_field) * * - Declares the following type: * + priority list: struct FOO_prio_list * * - Declares the following inlines (all O(1) except stated otherwise, n is the * number of elements, p is the number of different priorities): * * * Initialize a new priority list * void FOO_prio_list_init(struct FOO_prio_list*) * * * Initialize a new priority list, assuming that the content of FOO_prio_list was already zeroed * void FOO_prio_list_init0(struct FOO_prio_list*) * * * Free an empty priority list * void FOO_prio_list_deinit(struct FOO_prio_list*) * * * Add a new cell at the end of the list of the priority of the cell (O(log2 p)) * void FOO_prio_list_push_back(struct FOO_prio_list*, struct FOO*) * * * Add a new cell at the beginning of the list of the priority of the cell (O(log2 p)) * void FOO_prio_list_push_front(struct FOO_prio_list*, struct FOO*) * * * Test whether the priority list is empty * void FOO_prio_list_empty(struct FOO_prio_list*) * * * Remove given cell from the priority list * void FOO_prio_list_erase(struct FOO_prio_list*, struct FOO*) * * * Return and remove the first cell of highest priority of the priority list * void FOO_prio_list_pop_front_highest(struct FOO_prio_list*) * * Return and remove the first cell of lowest priority of the priority list * void FOO_prio_list_pop_front_lowest(struct FOO_prio_list*) * * * Return and remove the last cell of highest priority of the priority list * void FOO_prio_list_pop_back_highest(struct FOO_prio_list*) * * Return and remove the last cell of lowest priority of the priority list * void FOO_prio_list_pop_back_lowest(struct FOO_prio_list*) * * * Return the first cell of highest priority of the priority list * void FOO_prio_list_front_highest(struct FOO_prio_list*) * * Return the first cell of lowest priority of the priority list * void FOO_prio_list_front_lowest(struct FOO_prio_list*) * * * Return the last cell of highest priority of sthe priority list * void FOO_prio_list_back_highest(struct FOO_prio_list*) * * Return the last cell of lowest priority of sthe priority list * void FOO_prio_list_back_lowest(struct FOO_prio_list*) * * * Append second priority list at ends of the first priority list (O(log2 p)) * void FOO_prio_list_push_prio_list_back(struct FOO_prio_list*, struct FOO_prio_list*) * * * Append second priority list at beginning of the first priority list (O(log2 p)) * void FOO_prio_list_push_prio_list_front(struct FOO_prio_list*, struct FOO_prio_list*) * * * Test whether cell is part of the list (O(n)) * void FOO_prio_list_ismember(struct FOO_prio_list*, struct FOO*) * * * Return the first cell of the list * struct FOO* FOO_prio_list_begin(struct FOO_prio_list*); * * * Return the value to test at the end of the list * struct FOO* FOO_prio_list_end(struct FOO_prio_list*); * * * Return the next cell of the list * struct FOO* FOO_prio_list_next(struct FOO_prio_list*, struct FOO*) * * * Return the last cell of the list * struct FOO* FOO_prio_list_last(struct FOO_prio_list*); * * * Return the value to test at the beginning of the list * struct FOO* FOO_prio_list_alpha(struct FOO_prio_list*); * * * Return the previous cell of the list * struct FOO* FOO_prio_list_prev(struct FOO_prio_list*, struct FOO*) * * Return the previous cell of the same priority, or the last cell of next highest priority * struct FOO* FOO_prio_list_prev_highest(struct FOO_prio_list*, struct FOO*) * * Return the next cell of the same priority, or the first cell of next lowest priority * struct FOO* FOO_prio_list_next_lowest(struct FOO_prio_list*, struct FOO*) * * PRIO_LIST_TYPE assumes that LIST_TYPE has already been called to create the * final structure. * * ********************************************************* * Usage example: * LIST_TYPE(my_struct, * int a; * int b; * int prio; * ); * PRIO_LIST_TYPE(my_struct, prio); * * and then my_struct_prio_list_* inlines are available */ #ifndef __PRIO_LIST_H__ #define __PRIO_LIST_H__ #include #ifndef PRIO_LIST_INLINE #define PRIO_LIST_INLINE static inline #endif #define PRIO_LIST_TYPE(ENAME, PRIOFIELD) \ PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD) #ifndef STARPU_DEBUG #define PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD) \ /* The main type: an RB binary tree */ \ struct ENAME##_prio_list { \ struct starpu_rbtree tree; \ int empty; \ }; \ /* The second stage: a list */ \ struct ENAME##_prio_list_stage { \ struct starpu_rbtree_node node; /* Keep this first so ENAME##_node_to_list_stage can work. */ \ int prio; \ struct ENAME##_list list; \ }; \ PRIO_LIST_INLINE struct ENAME##_prio_list_stage *ENAME##_node_to_list_stage(struct starpu_rbtree_node *node) \ { \ /* This assumes node is first member of stage */ \ return (struct ENAME##_prio_list_stage *) node; \ } \ PRIO_LIST_INLINE const struct ENAME##_prio_list_stage *ENAME##_node_to_list_stage_const(const struct starpu_rbtree_node *node) \ { \ /* This assumes node is first member of stage */ \ return (struct ENAME##_prio_list_stage *) node; \ } \ PRIO_LIST_INLINE void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \ { \ starpu_rbtree_init(&priolist->tree); \ priolist->empty = 1; \ } \ PRIO_LIST_INLINE void ENAME##_prio_list_init0(struct ENAME##_prio_list *priolist) \ { \ starpu_rbtree_init0(&priolist->tree); \ priolist->empty = 1; \ } \ PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \ { \ if (starpu_rbtree_empty(&priolist->tree)) \ return; \ struct starpu_rbtree_node *root = priolist->tree.root; \ struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage(root); \ assert(ENAME##_list_empty(&stage->list)); \ assert(!root->children[0] && !root->children[1]); \ starpu_rbtree_remove(&priolist->tree, root); \ free(stage); \ } \ PRIO_LIST_INLINE int ENAME##_prio_list_cmp_fn(int prio, const struct starpu_rbtree_node *node) \ { \ /* Sort by decreasing order */ \ const struct ENAME##_prio_list_stage *e2 = ENAME##_node_to_list_stage_const(node); \ if (e2->prio < prio) \ return -1; \ if (e2->prio == prio) \ return 0; \ /* e2->prio > prio */ \ return 1; \ } \ PRIO_LIST_INLINE struct ENAME##_prio_list_stage *ENAME##_prio_list_add(struct ENAME##_prio_list *priolist, int prio) \ { \ uintptr_t slot; \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ node = starpu_rbtree_lookup_slot(&priolist->tree, prio, ENAME##_prio_list_cmp_fn, slot); \ if (node) \ stage = ENAME##_node_to_list_stage(node); \ else { \ _STARPU_CALLOC(stage, 1, sizeof(*stage)); \ starpu_rbtree_node_init0(&stage->node); \ stage->prio = prio; \ ENAME##_list_init0(&stage->list); \ starpu_rbtree_insert_slot(&priolist->tree, slot, &stage->node); \ } \ return stage; \ } \ PRIO_LIST_INLINE void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \ { \ struct ENAME##_prio_list_stage *stage = ENAME##_prio_list_add(priolist, e->PRIOFIELD); \ ENAME##_list_push_back(&stage->list, e); \ priolist->empty = 0; \ } \ PRIO_LIST_INLINE void ENAME##_prio_list_push_front(struct ENAME##_prio_list *priolist, struct ENAME *e) \ { \ struct ENAME##_prio_list_stage *stage = ENAME##_prio_list_add(priolist, e->PRIOFIELD); \ ENAME##_list_push_front(&stage->list, e); \ priolist->empty = 0; \ } \ PRIO_LIST_INLINE int ENAME##_prio_list_empty(const struct ENAME##_prio_list *priolist) \ { \ return priolist->empty; \ } \ /* Version of list_empty which does not use the cached empty flag, * typically used to compute the value of the flag */ \ PRIO_LIST_INLINE int ENAME##_prio_list_empty_slow(const struct ENAME##_prio_list *priolist) \ { \ if (starpu_rbtree_empty(&priolist->tree)) \ return 1; \ struct starpu_rbtree_node *root = priolist->tree.root; \ const struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage_const(root); \ if (ENAME##_list_empty(&stage->list) && !root->children[0] && !root->children[1]) \ /* Just one empty list */ \ return 1; \ return 0; \ } \ /* To be called when removing an element from a stage, to potentially remove this stage */ \ PRIO_LIST_INLINE void ENAME##_prio_list_check_empty_stage(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list_stage *stage) \ { \ if (ENAME##_list_empty(&stage->list)) { \ if (stage->prio != 0) \ { \ /* stage got empty, remove it */ \ starpu_rbtree_remove(&priolist->tree, &stage->node); \ free(stage); \ } \ priolist->empty = ENAME##_prio_list_empty_slow(priolist); \ } \ } \ PRIO_LIST_INLINE void ENAME##_prio_list_erase(struct ENAME##_prio_list *priolist, struct ENAME *e) \ { \ struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, e->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ assert(node); \ struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage(node); \ ENAME##_list_erase(&stage->list, e); \ ENAME##_prio_list_check_empty_stage(priolist, stage); \ } \ PRIO_LIST_INLINE int ENAME##_prio_list_get_next_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \ { \ struct ENAME##_prio_list_stage *stage; \ while(1) { \ struct starpu_rbtree_node *next; \ if (!node) \ /* Tree is empty */ \ return 0; \ stage = ENAME##_node_to_list_stage(node); \ if (!ENAME##_list_empty(&stage->list)) \ break; \ /* Empty list, skip to next tree entry */ \ next = starpu_rbtree_next(node); \ /* drop it if not 0-prio */ \ if (stage->prio != 0) \ { \ starpu_rbtree_remove(&priolist->tree, node); \ free(stage); \ } \ node = next; \ } \ *pnode = node; \ *pstage = stage; \ return 1; \ } \ PRIO_LIST_INLINE int ENAME##_prio_list_get_prev_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node *node, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \ { \ struct ENAME##_prio_list_stage *stage; \ while(1) { \ struct starpu_rbtree_node *prev; \ if (!node) \ /* Tree is empty */ \ return 0; \ stage = ENAME##_node_to_list_stage(node); \ if (!ENAME##_list_empty(&stage->list)) \ break; \ /* Empty list, skip to prev tree entry */ \ prev = starpu_rbtree_prev(node); \ /* drop it if not 0-prio */ \ if (stage->prio != 0) \ { \ starpu_rbtree_remove(&priolist->tree, node); \ free(stage); \ } \ node = prev; \ } \ *pnode = node; \ *pstage = stage; \ return 1; \ } \ PRIO_LIST_INLINE int ENAME##_prio_list_get_first_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \ { \ struct starpu_rbtree_node *node = starpu_rbtree_first(&priolist->tree); \ return ENAME##_prio_list_get_next_nonempty_stage(priolist, node, pnode, pstage); \ } \ PRIO_LIST_INLINE int ENAME##_prio_list_get_last_nonempty_stage(struct ENAME##_prio_list *priolist, struct starpu_rbtree_node **pnode, struct ENAME##_prio_list_stage **pstage) \ { \ struct starpu_rbtree_node *node = starpu_rbtree_last(&priolist->tree); \ return ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, pnode, pstage); \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front_highest(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ struct ENAME *ret; \ if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ ret = ENAME##_list_pop_front(&stage->list); \ ENAME##_prio_list_check_empty_stage(priolist, stage); \ return ret; \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front_lowest(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ struct ENAME *ret; \ if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ ret = ENAME##_list_pop_front(&stage->list); \ ENAME##_prio_list_check_empty_stage(priolist, stage); \ return ret; \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_front_highest(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ return ENAME##_list_front(&stage->list); \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_front_lowest(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ return ENAME##_list_front(&stage->list); \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back_highest(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ struct ENAME *ret; \ if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ ret = ENAME##_list_pop_back(&stage->list); \ ENAME##_prio_list_check_empty_stage(priolist, stage); \ return ret; \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back_lowest(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ struct ENAME *ret; \ if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ ret = ENAME##_list_pop_back(&stage->list); \ ENAME##_prio_list_check_empty_stage(priolist, stage); \ return ret; \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_back_highest(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ return ENAME##_list_back(&stage->list); \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_back_lowest(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ return ENAME##_list_back(&stage->list); \ } \ PRIO_LIST_INLINE void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \ { \ struct starpu_rbtree_node *node_toadd, *tmp; \ starpu_rbtree_for_each_remove(&priolist_toadd->tree, node_toadd, tmp) { \ struct ENAME##_prio_list_stage *stage_toadd = ENAME##_node_to_list_stage(node_toadd); \ uintptr_t slot; \ struct starpu_rbtree_node *node = starpu_rbtree_lookup_slot(&priolist->tree, stage_toadd->prio, ENAME##_prio_list_cmp_fn, slot); \ if (node) \ { \ /* Catenate the lists */ \ if (!ENAME##_list_empty(&stage_toadd->list)) { \ struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage(node); \ ENAME##_list_push_list_back(&stage->list, &stage_toadd->list); \ free(node_toadd); \ priolist->empty = 0; \ } \ } \ else \ { \ if (!ENAME##_list_empty(&stage_toadd->list)) { \ /* Just move the node between the trees */ \ starpu_rbtree_insert_slot(&priolist->tree, slot, node_toadd); \ priolist->empty = 0; \ } \ else \ { \ /* Actually empty, don't bother moving the list */ \ free(node_toadd); \ } \ } \ } \ } \ PRIO_LIST_INLINE int ENAME##_prio_list_ismember(const struct ENAME##_prio_list *priolist, const struct ENAME *e) \ { \ struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, e->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ if (node) { \ const struct ENAME##_prio_list_stage *stage = ENAME##_node_to_list_stage_const(node); \ return ENAME##_list_ismember(&stage->list, e); \ } \ return 0; \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ if (!ENAME##_prio_list_get_first_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ return ENAME##_list_begin(&stage->list); \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \ { return NULL; } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist, const struct ENAME *i) \ { \ struct ENAME *next = ENAME##_list_next(i); \ if (next != ENAME##_list_end(NULL)) \ return next; \ struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ assert(node); \ struct ENAME##_prio_list_stage *stage; \ node = starpu_rbtree_next(node); \ if (!ENAME##_prio_list_get_next_nonempty_stage(priolist, node, &node, &stage)) \ return NULL; \ return ENAME##_list_begin(&stage->list); \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \ { \ struct starpu_rbtree_node *node; \ struct ENAME##_prio_list_stage *stage; \ if (!ENAME##_prio_list_get_last_nonempty_stage(priolist, &node, &stage)) \ return NULL; \ return ENAME##_list_last(&stage->list); \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED) \ { return NULL; } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist, const struct ENAME *i) \ { \ struct ENAME *next = ENAME##_list_prev(i); \ if (next != ENAME##_list_alpha(NULL)) \ return next; \ struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ assert(node); \ struct ENAME##_prio_list_stage *stage; \ node = starpu_rbtree_prev(node); \ if (!ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, &node, &stage)) \ return NULL; \ return ENAME##_list_last(&stage->list); \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev_highest(struct ENAME##_prio_list *priolist, const struct ENAME *i) \ { \ struct ENAME *next = ENAME##_list_prev(i); \ if (next != ENAME##_list_alpha(NULL)) \ return next; \ struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ assert(node); \ struct ENAME##_prio_list_stage *stage; \ node = starpu_rbtree_next(node); \ if (!ENAME##_prio_list_get_next_nonempty_stage(priolist, node, &node, &stage)) \ return NULL; \ return ENAME##_list_last(&stage->list); \ } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next_lowest(struct ENAME##_prio_list *priolist, const struct ENAME *i) \ { \ struct ENAME *next = ENAME##_list_next(i); \ if (next != ENAME##_list_end(NULL)) \ return next; \ struct starpu_rbtree_node *node = starpu_rbtree_lookup(&priolist->tree, i->PRIOFIELD, ENAME##_prio_list_cmp_fn); \ assert(node); \ struct ENAME##_prio_list_stage *stage; \ node = starpu_rbtree_prev(node); \ if (!ENAME##_prio_list_get_prev_nonempty_stage(priolist, node, &node, &stage)) \ return NULL; \ return ENAME##_list_begin(&stage->list); \ } \ #else /* gdbinit can't recurse in a tree. Use a mere list in debugging mode. */ #define PRIO_LIST_CREATE_TYPE(ENAME, PRIOFIELD) \ struct ENAME##_prio_list { struct ENAME##_list list; }; \ PRIO_LIST_INLINE void ENAME##_prio_list_init(struct ENAME##_prio_list *priolist) \ { ENAME##_list_init(&(priolist)->list); } \ PRIO_LIST_INLINE void ENAME##_prio_list_init0(struct ENAME##_prio_list *priolist) \ { ENAME##_list_init0(&(priolist)->list); } \ PRIO_LIST_INLINE void ENAME##_prio_list_deinit(struct ENAME##_prio_list *priolist) \ { (void) (priolist); /* ENAME##_list_deinit(&(priolist)->list); */ } \ PRIO_LIST_INLINE void ENAME##_prio_list_push_back(struct ENAME##_prio_list *priolist, struct ENAME *e) \ { \ struct ENAME *cur; \ for (cur = ENAME##_list_begin(&(priolist)->list); \ cur != ENAME##_list_end(&(priolist)->list); \ cur = ENAME##_list_next(cur)) \ if ((e)->PRIOFIELD > cur->PRIOFIELD) \ break; \ if (cur == ENAME##_list_end(&(priolist)->list)) \ ENAME##_list_push_back(&(priolist)->list, (e)); \ else \ ENAME##_list_insert_before(&(priolist)->list, (e), cur); \ } \ PRIO_LIST_INLINE void ENAME##_prio_list_push_front(struct ENAME##_prio_list *priolist, struct ENAME *e) \ { \ struct ENAME *cur; \ for (cur = ENAME##_list_begin(&(priolist)->list); \ cur != ENAME##_list_end(&(priolist)->list); \ cur = ENAME##_list_next(cur)) \ if ((e)->PRIOFIELD >= cur->PRIOFIELD) \ break; \ if (cur == ENAME##_list_end(&(priolist)->list)) \ ENAME##_list_push_back(&(priolist)->list, (e)); \ else \ ENAME##_list_insert_before(&(priolist)->list, (e), cur); \ } \ PRIO_LIST_INLINE int ENAME##_prio_list_empty(const struct ENAME##_prio_list *priolist) \ { return ENAME##_list_empty(&(priolist)->list); } \ PRIO_LIST_INLINE void ENAME##_prio_list_erase(struct ENAME##_prio_list *priolist, struct ENAME *e) \ { ENAME##_list_erase(&(priolist)->list, (e)); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front_highest(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_pop_front(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_front_lowest(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_pop_front(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back_highest(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_pop_back(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_pop_back_lowest(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_pop_back(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_front_highest(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_front(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_front_lowest(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_front(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_back_highest(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_back(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_back_lowest(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_back(&(priolist)->list); } \ PRIO_LIST_INLINE void ENAME##_prio_list_push_prio_list_back(struct ENAME##_prio_list *priolist, struct ENAME##_prio_list *priolist_toadd) \ { ENAME##_list_push_list_back(&(priolist)->list, &(priolist_toadd)->list); } \ PRIO_LIST_INLINE int ENAME##_prio_list_ismember(const struct ENAME##_prio_list *priolist, const struct ENAME *e) \ { return ENAME##_list_ismember(&(priolist)->list, (e)); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_begin(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_begin(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_end(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_end(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \ { return ENAME##_list_next(i); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_last(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_last(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_alpha(struct ENAME##_prio_list *priolist) \ { return ENAME##_list_alpha(&(priolist)->list); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \ { return ENAME##_list_prev(i); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_prev_highest(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \ { return ENAME##_list_prev(i); } \ PRIO_LIST_INLINE struct ENAME *ENAME##_prio_list_next_lowest(struct ENAME##_prio_list *priolist STARPU_ATTRIBUTE_UNUSED, const struct ENAME *i) \ { return ENAME##_list_next(i); } \ #endif #endif // __PRIO_LIST_H__ starpu-1.4.9+dfsg/src/common/rbtree.c000066400000000000000000000307131507764646700175220ustar00rootroot00000000000000/* * Copyright (c) 2010, 2012 Richard Braun. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #define unlikely(expr) __builtin_expect(!!(expr), 0) /* * Return the index of a node in the children array of its parent. * * The parent parameter must not be null, and must be the parent of the * given node. */ static inline int starpu_rbtree_index(const struct starpu_rbtree_node *node, const struct starpu_rbtree_node *parent) { assert(parent != NULL); assert((node == NULL) || (starpu_rbtree_parent(node) == parent)); if (parent->children[STARPU_RBTREE_LEFT] == node) return STARPU_RBTREE_LEFT; assert(parent->children[STARPU_RBTREE_RIGHT] == node); return STARPU_RBTREE_RIGHT; } /* * Return the color of a node. */ static inline int starpu_rbtree_color(const struct starpu_rbtree_node *node) { return node->parent & STARPU_RBTREE_COLOR_MASK; } /* * Return true if the node is red. */ static inline int starpu_rbtree_is_red(const struct starpu_rbtree_node *node) { return starpu_rbtree_color(node) == STARPU_RBTREE_COLOR_RED; } /* * Return true if the node is black. */ static inline int starpu_rbtree_is_black(const struct starpu_rbtree_node *node) { return starpu_rbtree_color(node) == STARPU_RBTREE_COLOR_BLACK; } /* * Set the parent of a node, retaining its current color. */ static inline void starpu_rbtree_set_parent(struct starpu_rbtree_node *node, struct starpu_rbtree_node *parent) { assert(starpu_rbtree_check_alignment(node)); assert(starpu_rbtree_check_alignment(parent)); node->parent = (uintptr_t)parent | (node->parent & STARPU_RBTREE_COLOR_MASK); } /* * Set the color of a node, retaining its current parent. */ static inline void starpu_rbtree_set_color(struct starpu_rbtree_node *node, int color) { assert((color & ~STARPU_RBTREE_COLOR_MASK) == 0); node->parent = (node->parent & STARPU_RBTREE_PARENT_MASK) | color; } /* * Set the color of a node to red, retaining its current parent. */ static inline void starpu_rbtree_set_red(struct starpu_rbtree_node *node) { starpu_rbtree_set_color(node, STARPU_RBTREE_COLOR_RED); } /* * Set the color of a node to black, retaining its current parent. */ static inline void starpu_rbtree_set_black(struct starpu_rbtree_node *node) { starpu_rbtree_set_color(node, STARPU_RBTREE_COLOR_BLACK); } /* * Perform a tree rotation, rooted at the given node. * * The direction parameter defines the rotation direction and is either * STARPU_RBTREE_LEFT or STARPU_RBTREE_RIGHT. */ static void starpu_rbtree_rotate(struct starpu_rbtree *tree, struct starpu_rbtree_node *node, int direction) { struct starpu_rbtree_node *parent, *rnode; int left, right; left = direction; right = 1 - left; parent = starpu_rbtree_parent(node); rnode = node->children[right]; node->children[right] = rnode->children[left]; if (rnode->children[left] != NULL) starpu_rbtree_set_parent(rnode->children[left], node); rnode->children[left] = node; starpu_rbtree_set_parent(rnode, parent); if (unlikely(parent == NULL)) tree->root = rnode; else parent->children[starpu_rbtree_index(node, parent)] = rnode; starpu_rbtree_set_parent(node, rnode); } void starpu_rbtree_insert_rebalance(struct starpu_rbtree *tree, struct starpu_rbtree_node *parent, int index, struct starpu_rbtree_node *node) { struct starpu_rbtree_node *grand_parent, *tmp; assert(starpu_rbtree_check_alignment(parent)); assert(starpu_rbtree_check_alignment(node)); node->parent = (uintptr_t)parent | STARPU_RBTREE_COLOR_RED; node->children[STARPU_RBTREE_LEFT] = NULL; node->children[STARPU_RBTREE_RIGHT] = NULL; if (unlikely(parent == NULL)) tree->root = node; else parent->children[index] = node; for (;;) { struct starpu_rbtree_node *uncle; int left, right; if (parent == NULL) { starpu_rbtree_set_black(node); break; } if (starpu_rbtree_is_black(parent)) break; grand_parent = starpu_rbtree_parent(parent); assert(grand_parent != NULL); left = starpu_rbtree_index(parent, grand_parent); right = 1 - left; uncle = grand_parent->children[right]; /* * Uncle is red. Flip colors and repeat at grand parent. */ if ((uncle != NULL) && starpu_rbtree_is_red(uncle)) { starpu_rbtree_set_black(uncle); starpu_rbtree_set_black(parent); starpu_rbtree_set_red(grand_parent); node = grand_parent; parent = starpu_rbtree_parent(node); continue; } /* * Node is the right child of its parent. Rotate left at parent. */ if (parent->children[right] == node) { starpu_rbtree_rotate(tree, parent, left); tmp = node; node = parent; parent = tmp; } /* * Node is the left child of its parent. Handle colors, rotate right * at grand parent, and leave. */ starpu_rbtree_set_black(parent); starpu_rbtree_set_red(grand_parent); starpu_rbtree_rotate(tree, grand_parent, right); break; } assert(starpu_rbtree_is_black(tree->root)); } void starpu_rbtree_remove(struct starpu_rbtree *tree, struct starpu_rbtree_node *node) { struct starpu_rbtree_node *child, *parent, *brother; int color, left, right; if (node->children[STARPU_RBTREE_LEFT] == NULL) child = node->children[STARPU_RBTREE_RIGHT]; else if (node->children[STARPU_RBTREE_RIGHT] == NULL) child = node->children[STARPU_RBTREE_LEFT]; else { struct starpu_rbtree_node *successor; /* * Two-children case: replace the node with its successor. */ successor = node->children[STARPU_RBTREE_RIGHT]; while (successor->children[STARPU_RBTREE_LEFT] != NULL) successor = successor->children[STARPU_RBTREE_LEFT]; color = starpu_rbtree_color(successor); child = successor->children[STARPU_RBTREE_RIGHT]; parent = starpu_rbtree_parent(node); if (unlikely(parent == NULL)) tree->root = successor; else parent->children[starpu_rbtree_index(node, parent)] = successor; parent = starpu_rbtree_parent(successor); /* * Set parent directly to keep the original color. */ successor->parent = node->parent; successor->children[STARPU_RBTREE_LEFT] = node->children[STARPU_RBTREE_LEFT]; starpu_rbtree_set_parent(successor->children[STARPU_RBTREE_LEFT], successor); if (node == parent) parent = successor; else { successor->children[STARPU_RBTREE_RIGHT] = node->children[STARPU_RBTREE_RIGHT]; starpu_rbtree_set_parent(successor->children[STARPU_RBTREE_RIGHT], successor); parent->children[STARPU_RBTREE_LEFT] = child; if (child != NULL) starpu_rbtree_set_parent(child, parent); } goto update_color; } /* * Node has at most one child. */ color = starpu_rbtree_color(node); parent = starpu_rbtree_parent(node); if (child != NULL) starpu_rbtree_set_parent(child, parent); if (unlikely(parent == NULL)) tree->root = child; else parent->children[starpu_rbtree_index(node, parent)] = child; /* * The node has been removed, update the colors. The child pointer can * be null, in which case it is considered a black leaf. */ update_color: if (color == STARPU_RBTREE_COLOR_RED) return; for (;;) { if ((child != NULL) && starpu_rbtree_is_red(child)) { starpu_rbtree_set_black(child); break; } if (parent == NULL) break; left = starpu_rbtree_index(child, parent); right = 1 - left; brother = parent->children[right]; /* * Brother is red. Recolor and rotate left at parent so that brother * becomes black. */ if (starpu_rbtree_is_red(brother)) { starpu_rbtree_set_black(brother); starpu_rbtree_set_red(parent); starpu_rbtree_rotate(tree, parent, left); brother = parent->children[right]; } /* * Brother has no red child. Recolor and repeat at parent. */ if (((brother->children[STARPU_RBTREE_LEFT] == NULL) || starpu_rbtree_is_black(brother->children[STARPU_RBTREE_LEFT])) && ((brother->children[STARPU_RBTREE_RIGHT] == NULL) || starpu_rbtree_is_black(brother->children[STARPU_RBTREE_RIGHT]))) { starpu_rbtree_set_red(brother); child = parent; parent = starpu_rbtree_parent(child); continue; } /* * Brother's right child is black. Recolor and rotate right at brother. */ if ((brother->children[right] == NULL) || starpu_rbtree_is_black(brother->children[right])) { starpu_rbtree_set_black(brother->children[left]); starpu_rbtree_set_red(brother); starpu_rbtree_rotate(tree, brother, right); brother = parent->children[right]; } /* * Brother's left child is black. Exchange parent and brother colors * (we already know brother is black), set brother's right child black, * rotate left at parent and leave. */ starpu_rbtree_set_color(brother, starpu_rbtree_color(parent)); starpu_rbtree_set_black(parent); starpu_rbtree_set_black(brother->children[right]); starpu_rbtree_rotate(tree, parent, left); break; } assert((tree->root == NULL) || starpu_rbtree_is_black(tree->root)); } struct starpu_rbtree_node * starpu_rbtree_nearest(struct starpu_rbtree_node *parent, int index, int direction) { assert(starpu_rbtree_check_index(direction)); if (parent == NULL) return NULL; assert(starpu_rbtree_check_index(index)); if (index != direction) return parent; return starpu_rbtree_walk(parent, direction); } struct starpu_rbtree_node * starpu_rbtree_firstlast(const struct starpu_rbtree *tree, int direction) { struct starpu_rbtree_node *prev, *cur; assert(starpu_rbtree_check_index(direction)); prev = NULL; for (cur = tree->root; cur != NULL; cur = cur->children[direction]) prev = cur; return prev; } struct starpu_rbtree_node * starpu_rbtree_walk(struct starpu_rbtree_node *node, int direction) { int left, right; assert(starpu_rbtree_check_index(direction)); left = direction; right = 1 - left; if (node == NULL) return NULL; if (node->children[left] != NULL) { node = node->children[left]; while (node->children[right] != NULL) node = node->children[right]; } else { for (;;) { struct starpu_rbtree_node *parent; int index; parent = starpu_rbtree_parent(node); if (parent == NULL) return NULL; index = starpu_rbtree_index(node, parent); node = parent; if (index == right) break; } } return node; } /* * Return the left-most deepest child node of the given node. */ static struct starpu_rbtree_node * starpu_rbtree_find_deepest(struct starpu_rbtree_node *node) { struct starpu_rbtree_node *parent; assert(node != NULL); for (;;) { parent = node; node = node->children[STARPU_RBTREE_LEFT]; if (node == NULL) { node = parent->children[STARPU_RBTREE_RIGHT]; if (node == NULL) return parent; } } } struct starpu_rbtree_node * starpu_rbtree_postwalk_deepest(const struct starpu_rbtree *tree) { struct starpu_rbtree_node *node; node = tree->root; if (node == NULL) return NULL; return starpu_rbtree_find_deepest(node); } struct starpu_rbtree_node * starpu_rbtree_postwalk_unlink(struct starpu_rbtree_node *node) { struct starpu_rbtree_node *parent; int index; if (node == NULL) return NULL; assert(node->children[STARPU_RBTREE_LEFT] == NULL); assert(node->children[STARPU_RBTREE_RIGHT] == NULL); parent = starpu_rbtree_parent(node); if (parent == NULL) return NULL; index = starpu_rbtree_index(node, parent); parent->children[index] = NULL; node = parent->children[STARPU_RBTREE_RIGHT]; if (node == NULL) return parent; return starpu_rbtree_find_deepest(node); } starpu-1.4.9+dfsg/src/common/rbtree.h000066400000000000000000000245171507764646700175340ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011 Richard Braun. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * Red-black tree. */ #ifndef _KERN_RBTREE_H #define _KERN_RBTREE_H /** @file */ #include #include #include #include #include #define MACRO_BEGIN ({ #define MACRO_END }) /* * Indexes of the left and right nodes in the children array of a node. */ #define STARPU_RBTREE_LEFT 0 #define STARPU_RBTREE_RIGHT 1 /** * Red-black node. */ struct starpu_rbtree_node; /** * Red-black tree. */ struct starpu_rbtree; /** * Static tree initializer. */ #define STARPU_RBTREE_INITIALIZER { NULL } #include "rbtree_i.h" /** * Initialize a tree. */ static inline void starpu_rbtree_init(struct starpu_rbtree *tree) { tree->root = NULL; } /** * This version assumes that the content of tree was already zeroed */ static inline void starpu_rbtree_init0(struct starpu_rbtree *tree STARPU_ATTRIBUTE_UNUSED) { } /** * Initialize a node. * * A node is in no tree when its parent points to itself. */ static inline void starpu_rbtree_node_init(struct starpu_rbtree_node *node) { assert(starpu_rbtree_check_alignment(node)); node->parent = (uintptr_t)node | STARPU_RBTREE_COLOR_RED; node->children[STARPU_RBTREE_LEFT] = NULL; node->children[STARPU_RBTREE_RIGHT] = NULL; } /** * This version assumes that the content of node was already zeroed */ static inline void starpu_rbtree_node_init0(struct starpu_rbtree_node *node) { assert(starpu_rbtree_check_alignment(node)); node->parent = (uintptr_t)node | STARPU_RBTREE_COLOR_RED; //node->children[STARPU_RBTREE_LEFT] = NULL; //node->children[STARPU_RBTREE_RIGHT] = NULL; } /** * Return true if node is in no tree. */ static inline int starpu_rbtree_node_unlinked(const struct starpu_rbtree_node *node) { return starpu_rbtree_parent(node) == node; } /** * Macro that evaluates to the address of the structure containing the * given node based on the given type and member. */ #define starpu_rbtree_entry(node, type, member) structof(node, type, member) /** * Return true if tree is empty. */ static inline int starpu_rbtree_empty(const struct starpu_rbtree *tree) { return tree->root == NULL; } /** * Look up a node in a tree. * * Note that implementing the lookup algorithm as a macro gives two benefits: * First, it avoids the overhead of a callback function. Next, the type of the * cmp_fn parameter isn't rigid. The only guarantee offered by this * implementation is that the key parameter is the first parameter given to * cmp_fn. This way, users can pass only the value they need for comparison * instead of e.g. allocating a full structure on the stack. * * See starpu_rbtree_insert(). */ #define starpu_rbtree_lookup(tree, key, cmp_fn) \ MACRO_BEGIN \ struct starpu_rbtree_node *___cur; \ int ___diff; \ \ ___cur = (tree)->root; \ \ while (___cur != NULL) { \ ___diff = cmp_fn(key, ___cur); \ \ if (___diff == 0) \ break; \ \ ___cur = ___cur->children[starpu_rbtree_d2i(___diff)]; \ } \ \ ___cur; \ MACRO_END /** * Look up a node or one of its nearest nodes in a tree. * * This macro essentially acts as starpu_rbtree_lookup() but if no entry matched * the key, an additional step is performed to obtain the next or previous * node, depending on the direction (left or right). * * The constraints that apply to the key parameter are the same as for * starpu_rbtree_lookup(). */ #define starpu_rbtree_lookup_nearest(tree, key, cmp_fn, dir) \ MACRO_BEGIN \ struct starpu_rbtree_node *___cur, *___prev; \ int ___diff, ___index; \ \ ___prev = NULL; \ ___index = -1; \ ___cur = (tree)->root; \ \ while (___cur != NULL) { \ ___diff = cmp_fn(key, ___cur); \ \ if (___diff == 0) \ break; \ \ ___prev = ___cur; \ ___index = starpu_rbtree_d2i(___diff); \ ___cur = ___cur->children[___index]; \ } \ \ if (___cur == NULL) \ ___cur = starpu_rbtree_nearest(___prev, ___index, dir); \ \ ___cur; \ MACRO_END /** * Insert a node in a tree. * * This macro performs a standard lookup to obtain the insertion point of * the given node in the tree (it is assumed that the inserted node never * compares equal to any other entry in the tree) and links the node. It * then checks red-black rules violations, and rebalances the tree if * necessary. * * Unlike starpu_rbtree_lookup(), the cmp_fn parameter must compare two complete * entries, so it is suggested to use two different comparison inline * functions, such as myobj_cmp_lookup() and myobj_cmp_insert(). There is no * guarantee about the order of the nodes given to the comparison function. * * See starpu_rbtree_lookup(). */ #define starpu_rbtree_insert(tree, node, cmp_fn) \ MACRO_BEGIN \ struct starpu_rbtree_node *___cur, *___prev; \ int ___diff, ___index; \ \ ___prev = NULL; \ ___index = -1; \ ___cur = (tree)->root; \ \ while (___cur != NULL) { \ ___diff = cmp_fn(node, ___cur); \ assert(___diff != 0); \ ___prev = ___cur; \ ___index = starpu_rbtree_d2i(___diff); \ ___cur = ___cur->children[___index]; \ } \ \ starpu_rbtree_insert_rebalance(tree, ___prev, ___index, node); \ MACRO_END /** * Look up a node/slot pair in a tree. * * This macro essentially acts as starpu_rbtree_lookup() but in addition to a node, * it also returns a slot, which identifies an insertion point in the tree. * If the returned node is null, the slot can be used by starpu_rbtree_insert_slot() * to insert without the overhead of an additional lookup. The slot is a * simple uintptr_t integer. * * The constraints that apply to the key parameter are the same as for * starpu_rbtree_lookup(). */ #define starpu_rbtree_lookup_slot(tree, key, cmp_fn, slot) \ MACRO_BEGIN \ struct starpu_rbtree_node *___cur, *___prev; \ int ___diff, ___index; \ \ ___prev = NULL; \ ___index = 0; \ ___cur = (tree)->root; \ \ while (___cur != NULL) { \ ___diff = cmp_fn(key, ___cur); \ \ if (___diff == 0) \ break; \ \ ___prev = ___cur; \ ___index = starpu_rbtree_d2i(___diff); \ ___cur = ___cur->children[___index]; \ } \ \ (slot) = starpu_rbtree_slot(___prev, ___index); \ ___cur; \ MACRO_END /** * Insert a node at an insertion point in a tree. * * This macro essentially acts as starpu_rbtree_insert() except that it doesn't * obtain the insertion point with a standard lookup. The insertion point * is obtained by calling starpu_rbtree_lookup_slot(). In addition, the new node * must not compare equal to an existing node in the tree (i.e. the slot * must denote a null node). */ static inline void starpu_rbtree_insert_slot(struct starpu_rbtree *tree, uintptr_t slot, struct starpu_rbtree_node *node) { struct starpu_rbtree_node *parent; int index; parent = starpu_rbtree_slot_parent(slot); index = starpu_rbtree_slot_index(slot); starpu_rbtree_insert_rebalance(tree, parent, index, node); } /** * Remove a node from a tree. * * After completion, the node is stale. */ void starpu_rbtree_remove(struct starpu_rbtree *tree, struct starpu_rbtree_node *node); /** * Return the first node of a tree. */ /* TODO: optimize by maintaining the first node of the tree */ #define starpu_rbtree_first(tree) starpu_rbtree_firstlast(tree, STARPU_RBTREE_LEFT) /** * Return the last node of a tree. */ /* TODO: optimize by maintaining the first node of the tree */ /* TODO: could be useful to optimize the case when the key being inserted is * bigger that the biggest node */ #define starpu_rbtree_last(tree) starpu_rbtree_firstlast(tree, STARPU_RBTREE_RIGHT) /** * Return the node previous to the given node. */ #define starpu_rbtree_prev(node) starpu_rbtree_walk(node, STARPU_RBTREE_LEFT) /** * Return the node next to the given node. */ #define starpu_rbtree_next(node) starpu_rbtree_walk(node, STARPU_RBTREE_RIGHT) /** * Forge a loop to process all nodes of a tree, removing them when visited. * * This macro can only be used to destroy a tree, so that the resources used * by the entries can be released by the user. It basically removes all nodes * without doing any color checking. * * After completion, all nodes and the tree root member are stale. */ #define starpu_rbtree_for_each_remove(tree, node, tmp) \ for (node = starpu_rbtree_postwalk_deepest(tree), \ tmp = starpu_rbtree_postwalk_unlink(node); \ node != NULL; \ node = tmp, tmp = starpu_rbtree_postwalk_unlink(node)) \ #endif /* _KERN_RBTREE_H */ starpu-1.4.9+dfsg/src/common/rbtree_i.h000066400000000000000000000140731507764646700200400ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011 Richard Braun. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _KERN_RBTREE_I_H #define _KERN_RBTREE_I_H #include /** @file */ /** * Red-black node structure. * * To reduce the number of branches and the instruction cache footprint, * the left and right child pointers are stored in an array, and the symmetry * of most tree operations is exploited by using left/right variables when * referring to children. * * In addition, this implementation assumes that all nodes are 4-byte aligned, * so that the least significant bit of the parent member can be used to store * the color of the node. This is true for all modern 32 and 64 bits * architectures, as long as the nodes aren't embedded in structures with * special alignment constraints such as member packing. */ struct starpu_rbtree_node { uintptr_t parent; struct starpu_rbtree_node *children[2]; }; /** * Red-black tree structure. */ struct starpu_rbtree { struct starpu_rbtree_node *root; }; /** * Masks applied on the parent member of a node to obtain either the * color or the parent address. */ #define STARPU_RBTREE_COLOR_MASK ((uintptr_t) 0x1) #define STARPU_RBTREE_PARENT_MASK (~((uintptr_t) 0x3)) /** * Node colors. */ #define STARPU_RBTREE_COLOR_RED 0 #define STARPU_RBTREE_COLOR_BLACK 1 /** * Masks applied on slots to obtain either the child index or the parent * address. */ #define STARPU_RBTREE_SLOT_INDEX_MASK ((uintptr_t) 0x1) #define STARPU_RBTREE_SLOT_PARENT_MASK (~STARPU_RBTREE_SLOT_INDEX_MASK) /** * Return true if the given pointer is suitably aligned. */ static inline int starpu_rbtree_check_alignment(const struct starpu_rbtree_node *node) { return ((uintptr_t)node & (~STARPU_RBTREE_PARENT_MASK)) == 0; } /** * Return true if the given index is a valid child index. */ static inline int starpu_rbtree_check_index(int index) { return index == (index & 1); } /** * Convert the result of a comparison into an index in the children array * (0 or 1). * * This function is mostly used when looking up a node. */ static inline int starpu_rbtree_d2i(int diff) { return !(diff <= 0); } /** * Return the parent of a node. */ static inline struct starpu_rbtree_node * starpu_rbtree_parent(const struct starpu_rbtree_node *node) { return (struct starpu_rbtree_node *)(node->parent & STARPU_RBTREE_PARENT_MASK); } /** * Translate an insertion point into a slot. */ static inline uintptr_t starpu_rbtree_slot(struct starpu_rbtree_node *parent, int index) { assert(starpu_rbtree_check_alignment(parent)); assert(starpu_rbtree_check_index(index)); return (uintptr_t)parent | index; } /** * Extract the parent address from a slot. */ static inline struct starpu_rbtree_node * starpu_rbtree_slot_parent(uintptr_t slot) { return (struct starpu_rbtree_node *)(slot & STARPU_RBTREE_SLOT_PARENT_MASK); } /** * Extract the index from a slot. */ static inline int starpu_rbtree_slot_index(uintptr_t slot) { return slot & STARPU_RBTREE_SLOT_INDEX_MASK; } /** * Insert a node in a tree, rebalancing it if necessary. * * The index parameter is the index in the children array of the parent where * the new node is to be inserted. It is ignored if the parent is null. * * This function is intended to be used by the starpu_rbtree_insert() macro only. */ void starpu_rbtree_insert_rebalance(struct starpu_rbtree *tree, struct starpu_rbtree_node *parent, int index, struct starpu_rbtree_node *node); /** * Return the previous or next node relative to a location in a tree. * * The parent and index parameters define the location, which can be empty. * The direction parameter is either STARPU_RBTREE_LEFT (to obtain the previous * node) or STARPU_RBTREE_RIGHT (to obtain the next one). */ struct starpu_rbtree_node * starpu_rbtree_nearest(struct starpu_rbtree_node *parent, int index, int direction); /** * Return the first or last node of a tree. * * The direction parameter is either STARPU_RBTREE_LEFT (to obtain the first node) * or STARPU_RBTREE_RIGHT (to obtain the last one). */ struct starpu_rbtree_node * starpu_rbtree_firstlast(const struct starpu_rbtree *tree, int direction); /** * Return the node next to, or previous to the given node. * * The direction parameter is either STARPU_RBTREE_LEFT (to obtain the previous node) * or STARPU_RBTREE_RIGHT (to obtain the next one). */ struct starpu_rbtree_node * starpu_rbtree_walk(struct starpu_rbtree_node *node, int direction); /** * Return the left-most deepest node of a tree, which is the starting point of * the postorder traversal performed by starpu_rbtree_for_each_remove(). */ struct starpu_rbtree_node * starpu_rbtree_postwalk_deepest(const struct starpu_rbtree *tree); /** * Unlink a node from its tree and return the next (right) node in postorder. */ struct starpu_rbtree_node * starpu_rbtree_postwalk_unlink(struct starpu_rbtree_node *node); #endif /* _KERN_RBTREE_I_H */ starpu-1.4.9+dfsg/src/common/rwlock.c000066400000000000000000000061701507764646700175400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /** * A dummy implementation of a rw_lock using spinlocks ... */ #include "rwlock.h" static void _starpu_take_busy_lock(struct _starpu_rw_lock *lock) { uint32_t prev; do { prev = STARPU_TEST_AND_SET(&lock->busy, 1); } while (prev); } static void _starpu_release_busy_lock(struct _starpu_rw_lock *lock) { STARPU_RELEASE(&lock->busy); } void _starpu_init_rw_lock(struct _starpu_rw_lock *lock) { STARPU_ASSERT(lock); lock->writer = 0; lock->readercnt = 0; lock->busy = 0; } int _starpu_take_rw_lock_write_try(struct _starpu_rw_lock *lock) { _starpu_take_busy_lock(lock); if (lock->readercnt > 0 || lock->writer) { /* fail to take the lock */ _starpu_release_busy_lock(lock); return -1; } else { STARPU_ASSERT(lock->readercnt == 0); STARPU_ASSERT(lock->writer == 0); /* no one was either writing nor reading */ lock->writer = 1; _starpu_release_busy_lock(lock); return 0; } } int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock) { _starpu_take_busy_lock(lock); if (lock->writer) { /* there is a writer ... */ _starpu_release_busy_lock(lock); return -1; } else { STARPU_ASSERT(lock->writer == 0); /* no one is writing */ /* XXX check wrap arounds ... */ lock->readercnt++; _starpu_release_busy_lock(lock); return 0; } } void _starpu_take_rw_lock_write(struct _starpu_rw_lock *lock) { do { _starpu_take_busy_lock(lock); if (lock->readercnt > 0 || lock->writer) { /* fail to take the lock */ _starpu_release_busy_lock(lock); } else { STARPU_ASSERT(lock->readercnt == 0); STARPU_ASSERT(lock->writer == 0); /* no one was either writing nor reading */ lock->writer = 1; _starpu_release_busy_lock(lock); return; } } while (1); } void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock) { do { _starpu_take_busy_lock(lock); if (lock->writer) { /* there is a writer ... */ _starpu_release_busy_lock(lock); } else { STARPU_ASSERT(lock->writer == 0); /* no one is writing */ /* XXX check wrap arounds ... */ lock->readercnt++; _starpu_release_busy_lock(lock); return; } } while (1); } void _starpu_release_rw_lock(struct _starpu_rw_lock *lock) { _starpu_take_busy_lock(lock); /* either writer or reader (exactly one !) */ if (lock->writer) { STARPU_ASSERT(lock->readercnt == 0); lock->writer = 0; } else { /* reading mode */ STARPU_ASSERT(lock->writer == 0); lock->readercnt--; } _starpu_release_busy_lock(lock); } starpu-1.4.9+dfsg/src/common/rwlock.h000066400000000000000000000032511507764646700175420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __RWLOCKS_H__ #define __RWLOCKS_H__ #include #include #pragma GCC visibility push(hidden) /** @file */ /** Dummy implementation of a RW-lock using a spinlock. */ struct _starpu_rw_lock { uint32_t busy; uint8_t writer; uint16_t readercnt; }; /** Initialize the RW-lock */ void _starpu_init_rw_lock(struct _starpu_rw_lock *lock); /** Grab the RW-lock in a write mode */ void _starpu_take_rw_lock_write(struct _starpu_rw_lock *lock); /** Grab the RW-lock in a read mode */ void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock); /** Try to grab the RW-lock in a write mode. Returns 0 in case of success, -1 * otherwise. */ int _starpu_take_rw_lock_write_try(struct _starpu_rw_lock *lock); /** Try to grab the RW-lock in a read mode. Returns 0 in case of success, -1 * otherwise. */ int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock); /** Unlock the RW-lock. */ void _starpu_release_rw_lock(struct _starpu_rw_lock *lock); #pragma GCC visibility pop #endif starpu-1.4.9+dfsg/src/common/starpu_spinlock.c000066400000000000000000000026701507764646700214600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #if defined(STARPU_SPINLOCK_CHECK) int _starpu_spin_init(struct _starpu_spinlock *lock) { starpu_pthread_mutexattr_t errcheck_attr; int ret; ret = starpu_pthread_mutexattr_init(&errcheck_attr); STARPU_CHECK_RETURN_VALUE(ret, "starpu_pthread_mutexattr_init"); ret = starpu_pthread_mutexattr_settype(&errcheck_attr, PTHREAD_MUTEX_ERRORCHECK); STARPU_ASSERT(!ret); ret = starpu_pthread_mutex_init(&lock->errcheck_lock, &errcheck_attr); starpu_pthread_mutexattr_destroy(&errcheck_attr); return ret; } int _starpu_spin_destroy(struct _starpu_spinlock *lock) { return starpu_pthread_mutex_destroy(&lock->errcheck_lock); } #endif starpu-1.4.9+dfsg/src/common/starpu_spinlock.h000066400000000000000000000107501507764646700214630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_SPINLOCK_H__ #define __STARPU_SPINLOCK_H__ /** @file */ #include #include #include #include #include #include #ifdef STARPU_SPINLOCK_CHECK /* We don't care about performance */ struct _starpu_spinlock { starpu_pthread_mutex_t errcheck_lock; const char *last_taker; }; int _starpu_spin_init(struct _starpu_spinlock *lock); int _starpu_spin_destroy(struct _starpu_spinlock *lock); static inline int __starpu_spin_lock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { _STARPU_TRACE_LOCKING_SPINLOCK(file, line); int ret = starpu_pthread_mutex_lock(&lock->errcheck_lock); STARPU_ASSERT(!ret); lock->last_taker = func; _STARPU_TRACE_SPINLOCK_LOCKED(file, line); return ret; } static inline void _starpu_spin_checklocked(struct _starpu_spinlock *lock STARPU_ATTRIBUTE_UNUSED) { STARPU_ASSERT(starpu_pthread_mutex_trylock(&lock->errcheck_lock) != 0); } static inline int __starpu_spin_trylock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line); int ret = starpu_pthread_mutex_trylock(&lock->errcheck_lock); STARPU_ASSERT(!ret || (ret == EBUSY)); if (STARPU_LIKELY(!ret)) { lock->last_taker = func; _STARPU_TRACE_SPINLOCK_LOCKED(file, line); } return ret; } static inline int __starpu_spin_unlock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line); int ret = starpu_pthread_mutex_unlock(&lock->errcheck_lock); STARPU_ASSERT(!ret); _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line); return ret; } #else /* We do care about performance, inline as much as possible */ struct _starpu_spinlock { starpu_pthread_spinlock_t lock; }; static inline int _starpu_spin_init(struct _starpu_spinlock *lock) { int ret = starpu_pthread_spin_init(&lock->lock, 0); STARPU_ASSERT(!ret); return ret; } #define _starpu_spin_destroy(_lock) starpu_pthread_spin_destroy(&(_lock)->lock) static inline int __starpu_spin_lock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { _STARPU_TRACE_LOCKING_SPINLOCK(file, line); int ret = starpu_pthread_spin_lock(&lock->lock); STARPU_ASSERT(!ret); _STARPU_TRACE_SPINLOCK_LOCKED(file, line); return ret; } #define _starpu_spin_checklocked(_lock) _starpu_pthread_spin_checklocked(&(_lock)->lock) static inline int __starpu_spin_trylock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line); int ret = starpu_pthread_spin_trylock(&lock->lock); STARPU_ASSERT(!ret || (ret == EBUSY)); if (STARPU_LIKELY(!ret)) _STARPU_TRACE_SPINLOCK_LOCKED(file, line); return ret; } static inline int __starpu_spin_unlock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line); int ret = starpu_pthread_spin_unlock(&lock->lock); STARPU_ASSERT(!ret); _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line); return ret; } #endif #define _starpu_spin_lock(lock) \ __starpu_spin_lock(lock, __FILE__, __LINE__, __starpu_func__) #define _starpu_spin_trylock(lock) \ __starpu_spin_trylock(lock, __FILE__, __LINE__, __starpu_func__) #define _starpu_spin_unlock(lock) \ __starpu_spin_unlock(lock, __FILE__, __LINE__, __starpu_func__) #define STARPU_SPIN_MAXTRY 10 #endif // __STARPU_SPINLOCK_H__ starpu-1.4.9+dfsg/src/common/thread.c000066400000000000000000000714751507764646700175200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef STARPU_DEBUG #include #endif #include #include #include #include #include #ifdef STARPU_SIMGRID #ifdef STARPU_HAVE_SIMGRID_MUTEX_H #include #include #elif defined(STARPU_HAVE_XBT_SYNCHRO_H) #include #else #include #endif #include #if !defined(HAVE_SG_ACTOR_GET_DATA) && !defined(HAVE_SG_ACTOR_DATA) && \ (defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data)) && \ !(defined(HAVE_MSG_PROCESS_SELF_NAME) || defined(MSG_process_self_name)) #include #endif #else #if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) #include #include /* Private futexes are not so old, cope with old kernels. */ #ifdef FUTEX_WAIT_PRIVATE static int _starpu_futex_wait = FUTEX_WAIT_PRIVATE; static int _starpu_futex_wake = FUTEX_WAKE_PRIVATE; #else static int _starpu_futex_wait = FUTEX_WAIT; static int _starpu_futex_wake = FUTEX_WAKE; #endif #endif #endif /* !STARPU_SIMGRID */ #ifdef STARPU_SIMGRID int starpu_pthread_equal(starpu_pthread_t t1, starpu_pthread_t t2) { return t1 == t2; } starpu_pthread_t starpu_pthread_self(void) { #ifdef HAVE_SG_ACTOR_SELF return sg_actor_self(); #else return MSG_process_self(); #endif } int starpu_pthread_create_on(const char *name, starpu_pthread_t *thread, const starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED, void *(*start_routine) (void *), void *arg, starpu_sg_host_t host) { char **_args; int ret; _STARPU_MALLOC(_args, 3*sizeof(char*)); ret = asprintf(&_args[0], "%p", start_routine); STARPU_ASSERT(ret); ret = asprintf(&_args[1], "%p", arg); STARPU_ASSERT(ret); _args[2] = NULL; if (!host) host = _starpu_simgrid_get_host_by_name("MAIN"); void *tsd; _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*)); #ifndef HAVE_SG_ACTOR_SET_STACKSIZE if (attr && attr->stacksize) _starpu_simgrid_set_stack_size(attr->stacksize); #endif #ifdef HAVE_SG_ACTOR_INIT *thread= sg_actor_init(name, host); #ifdef HAVE_SG_ACTOR_SET_STACKSIZE if (attr && attr->stacksize) sg_actor_set_stacksize(*thread, attr->stacksize); #endif #ifdef HAVE_SG_ACTOR_SET_DATA sg_actor_set_data(*thread, tsd); #else sg_actor_data_set(*thread, tsd); #endif sg_actor_start(*thread, _starpu_simgrid_thread_start, 2, _args); #else *thread = MSG_process_create_with_arguments(name, _starpu_simgrid_thread_start, tsd, host, 2, _args); #if defined(HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_GET_DATA) #ifdef HAVE_SG_ACTOR_SET_DATA sg_actor_set_data(*thread, tsd); #else sg_actor_data_set(*thread, tsd); #endif #endif #endif #ifndef HAVE_SG_ACTOR_SET_STACKSIZE if (attr && attr->stacksize) _starpu_simgrid_set_stack_size(_starpu_default_stack_size); #endif #if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 # ifdef HAVE_SG_ACTOR_REF sg_actor_ref(*thread); # else MSG_process_ref(*thread); # endif #endif return 0; } int starpu_pthread_create(starpu_pthread_t *thread, const starpu_pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) { return starpu_pthread_create_on("", thread, attr, start_routine, arg, NULL); } int starpu_pthread_join(starpu_pthread_t thread STARPU_ATTRIBUTE_UNUSED, void **retval STARPU_ATTRIBUTE_UNUSED) { #if SIMGRID_VERSION >= 31400 # ifdef STARPU_HAVE_SIMGRID_ACTOR_H sg_actor_join(thread, 1000000); # else MSG_process_join(thread, 1000000); # endif #if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 # ifdef HAVE_SG_ACTOR_REF sg_actor_unref(thread); # else MSG_process_unref(thread); # endif #endif #else starpu_sleep(1); #endif return 0; } int starpu_pthread_detach(starpu_pthread_t thread STARPU_ATTRIBUTE_UNUSED) { #if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 # ifdef HAVE_SG_ACTOR_REF sg_actor_unref(thread); # else MSG_process_unref(thread); # endif #endif return 0; } int starpu_pthread_exit(void *retval STARPU_ATTRIBUTE_UNUSED) { #ifdef HAVE_SG_ACTOR_SELF sg_actor_kill(sg_actor_self()); #else MSG_process_kill(MSG_process_self()); #endif STARPU_ABORT_MSG("MSG_process_kill(MSG_process_self()) returned?!"); } int starpu_pthread_attr_init(starpu_pthread_attr_t *attr) { attr->stacksize = 0; return 0; } int starpu_pthread_attr_destroy(starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED) { return 0; } int starpu_pthread_attr_setstacksize(starpu_pthread_attr_t *attr, size_t stacksize) { attr->stacksize = stacksize; return 0; } int starpu_pthread_attr_setdetachstate(starpu_pthread_attr_t *attr STARPU_ATTRIBUTE_UNUSED, int detachstate STARPU_ATTRIBUTE_UNUSED) { return 0; } int starpu_pthread_mutex_init(starpu_pthread_mutex_t *mutex, const starpu_pthread_mutexattr_t *mutexattr STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_HAVE_SIMGRID_MUTEX_H *mutex = sg_mutex_init(); #else *mutex = xbt_mutex_init(); #endif return 0; } int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex) { if (*mutex) #ifdef STARPU_HAVE_SIMGRID_MUTEX_H sg_mutex_destroy(*mutex); #else xbt_mutex_destroy(*mutex); #endif return 0; } int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex) { _STARPU_TRACE_LOCKING_MUTEX(); /* Note: this is actually safe, because simgrid only preempts within * simgrid functions */ if (!*mutex) { /* Here we may get preempted */ #ifdef STARPU_HAVE_SIMGRID_MUTEX_H sg_mutex_t new_mutex = sg_mutex_init(); #else xbt_mutex_t new_mutex = xbt_mutex_init(); #endif if (!*mutex) *mutex = new_mutex; else /* Somebody already initialized it while we were * calling sg_mutex_init, this one is now useless */ #ifdef STARPU_HAVE_SIMGRID_MUTEX_H sg_mutex_destroy(new_mutex); #else xbt_mutex_destroy(new_mutex); #endif } #ifdef STARPU_HAVE_SIMGRID_MUTEX_H sg_mutex_lock(*mutex); #else xbt_mutex_acquire(*mutex); #endif _STARPU_TRACE_MUTEX_LOCKED(); return 0; } int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) { _STARPU_TRACE_UNLOCKING_MUTEX(); #ifdef STARPU_HAVE_SIMGRID_MUTEX_H sg_mutex_unlock(*mutex); #else xbt_mutex_release(*mutex); #endif _STARPU_TRACE_MUTEX_UNLOCKED(); return 0; } int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex) { int ret; _STARPU_TRACE_TRYLOCK_MUTEX(); #ifdef STARPU_HAVE_SIMGRID_MUTEX_H ret = sg_mutex_try_lock(*mutex); #elif defined(HAVE_XBT_MUTEX_TRY_ACQUIRE) || defined(xbt_mutex_try_acquire) ret = xbt_mutex_try_acquire(*mutex); #else ret = simcall_mutex_trylock((smx_mutex_t)*mutex); #endif ret = ret ? 0 : EBUSY; _STARPU_TRACE_MUTEX_LOCKED(); return ret; } int starpu_pthread_mutexattr_gettype(const starpu_pthread_mutexattr_t *attr STARPU_ATTRIBUTE_UNUSED, int *type STARPU_ATTRIBUTE_UNUSED) { return 0; } int starpu_pthread_mutexattr_settype(starpu_pthread_mutexattr_t *attr STARPU_ATTRIBUTE_UNUSED, int type STARPU_ATTRIBUTE_UNUSED) { return 0; } int starpu_pthread_mutexattr_destroy(starpu_pthread_mutexattr_t *attr STARPU_ATTRIBUTE_UNUSED) { return 0; } int starpu_pthread_mutexattr_init(starpu_pthread_mutexattr_t *attr STARPU_ATTRIBUTE_UNUSED) { return 0; } /* Indexed by key-1 */ static int used_key[MAX_TSD]; int starpu_pthread_key_create(starpu_pthread_key_t *key, void (*destr_function) (void *) STARPU_ATTRIBUTE_UNUSED) { unsigned i; /* Note: no synchronization here, we are actually monothreaded anyway. */ for (i = 0; i < MAX_TSD; i++) { if (!used_key[i]) { used_key[i] = 1; break; } } STARPU_ASSERT(i < MAX_TSD); /* key 0 is for process pointer argument */ *key = i+1; return 0; } int starpu_pthread_key_delete(starpu_pthread_key_t key) { used_key[key-1] = 0; return 0; } /* We need it only when using smpi */ #pragma weak smpi_process_get_user_data #if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA && !defined(smpi_process_get_user_data) extern void *smpi_process_get_user_data(); #endif int starpu_pthread_setspecific(starpu_pthread_key_t key, const void *pointer) { void **array; #ifdef HAVE_SG_ACTOR_GET_DATA array = sg_actor_get_data(sg_actor_self()); #elif defined(HAVE_SG_ACTOR_DATA) array = sg_actor_data(sg_actor_self()); #else #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data) #if defined(HAVE_MSG_PROCESS_SELF_NAME) || defined(MSG_process_self_name) const char *process_name = MSG_process_self_name(); #else const char *process_name = SIMIX_process_self_get_name(); #endif char *end; /* Test whether it is an MPI rank */ strtol(process_name, &end, 10); if (!*end || !strcmp(process_name, "wait for mpi transfer") || (!strcmp(process_name, "main") && _starpu_simgrid_running_smpi())) /* Special-case the SMPI process */ array = smpi_process_get_user_data(); else #endif array = MSG_process_get_data(MSG_process_self()); #endif array[key] = (void*) pointer; return 0; } void* starpu_pthread_getspecific(starpu_pthread_key_t key) { void **array; #ifdef HAVE_SG_ACTOR_GET_DATA array = sg_actor_get_data(sg_actor_self()); #elif defined(HAVE_SG_ACTOR_DATA) array = sg_actor_data(sg_actor_self()); #else #if defined(HAVE_SMPI_PROCESS_SET_USER_DATA) || defined(smpi_process_get_user_data) #if defined(HAVE_MSG_PROCESS_SELF_NAME) || defined(MSG_process_self_name) const char *process_name = MSG_process_self_name(); #else const char *process_name = SIMIX_process_self_get_name(); #endif char *end; /* Test whether it is an MPI rank */ strtol(process_name, &end, 10); if (!*end || !strcmp(process_name, "wait for mpi transfer") || (!strcmp(process_name, "main") && _starpu_simgrid_running_smpi())) /* Special-case the SMPI processes */ array = smpi_process_get_user_data(); else #endif array = MSG_process_get_data(MSG_process_self()); #endif if (!array) return NULL; return array[key]; } int starpu_pthread_cond_init(starpu_pthread_cond_t *cond, starpu_pthread_condattr_t *cond_attr STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_HAVE_SIMGRID_COND_H *cond = sg_cond_init(); #else *cond = xbt_cond_init(); #endif return 0; } static void _starpu_pthread_cond_auto_init(starpu_pthread_cond_t *cond) { /* Note: this is actually safe, because simgrid only preempts within * simgrid functions */ if (!*cond) { /* Here we may get preempted */ #ifdef STARPU_HAVE_SIMGRID_COND_H sg_cond_t new_cond = sg_cond_init(); #else xbt_cond_t new_cond = xbt_cond_init(); #endif if (!*cond) *cond = new_cond; else /* Somebody already initialized it while we were * calling xbt_cond_init, this one is now useless */ #ifdef STARPU_HAVE_SIMGRID_COND_H sg_cond_destroy(new_cond); #else xbt_cond_destroy(new_cond); #endif } } int starpu_pthread_cond_signal(starpu_pthread_cond_t *cond) { _starpu_pthread_cond_auto_init(cond); #ifdef STARPU_HAVE_SIMGRID_COND_H sg_cond_notify_one(*cond); #else xbt_cond_signal(*cond); #endif return 0; } int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond) { _starpu_pthread_cond_auto_init(cond); #ifdef STARPU_HAVE_SIMGRID_COND_H sg_cond_notify_all(*cond); #else xbt_cond_broadcast(*cond); #endif return 0; } int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex) { _STARPU_TRACE_COND_WAIT_BEGIN(); _starpu_pthread_cond_auto_init(cond); #ifdef STARPU_HAVE_SIMGRID_COND_H sg_cond_wait(*cond, *mutex); #else xbt_cond_wait(*cond, *mutex); #endif _STARPU_TRACE_COND_WAIT_END(); return 0; } int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex, const struct timespec *abstime) { #if SIMGRID_VERSION >= 31800 struct timespec now, delta; double delay; int ret = 0; _starpu_clock_gettime(&now); delta.tv_sec = abstime->tv_sec - now.tv_sec; delta.tv_nsec = abstime->tv_nsec - now.tv_nsec; delay = (double) delta.tv_sec + (double) delta.tv_nsec / 1000000000.; _STARPU_TRACE_COND_WAIT_BEGIN(); _starpu_pthread_cond_auto_init(cond); #ifdef STARPU_HAVE_SIMGRID_COND_H ret = sg_cond_wait_for(*cond, *mutex, delay) ? ETIMEDOUT : 0; #else ret = xbt_cond_timedwait(*cond, *mutex, delay) ? ETIMEDOUT : 0; #endif _STARPU_TRACE_COND_WAIT_END(); return ret; #else STARPU_ASSERT_MSG(0, "simgrid version is too old for this"); #endif } int starpu_pthread_cond_destroy(starpu_pthread_cond_t *cond) { if (*cond) #ifdef STARPU_HAVE_SIMGRID_COND_H sg_cond_destroy(*cond); #else xbt_cond_destroy(*cond); #endif return 0; } /* TODO: use rwlocks * https://framagit.org/simgrid/simgrid/-/issues/92 */ int starpu_pthread_rwlock_init(starpu_pthread_rwlock_t *restrict rwlock, const starpu_pthread_rwlockattr_t *restrict attr STARPU_ATTRIBUTE_UNUSED) { return starpu_pthread_mutex_init(rwlock, NULL); } int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock) { return starpu_pthread_mutex_destroy(rwlock); } int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock) { _STARPU_TRACE_RDLOCKING_RWLOCK(); int p_ret = starpu_pthread_mutex_lock(rwlock); _STARPU_TRACE_RWLOCK_RDLOCKED(); return p_ret; } int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock) { int p_ret = starpu_pthread_mutex_trylock(rwlock); if (!p_ret) _STARPU_TRACE_RWLOCK_RDLOCKED(); return p_ret; } int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock) { _STARPU_TRACE_WRLOCKING_RWLOCK(); int p_ret = starpu_pthread_mutex_lock(rwlock); _STARPU_TRACE_RWLOCK_WRLOCKED(); return p_ret; } int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock) { int p_ret = starpu_pthread_mutex_trylock(rwlock); if (!p_ret) _STARPU_TRACE_RWLOCK_RDLOCKED(); return p_ret; } int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock) { _STARPU_TRACE_UNLOCKING_RWLOCK(); int p_ret = starpu_pthread_mutex_unlock(rwlock); _STARPU_TRACE_RWLOCK_UNLOCKED(); return p_ret; } #ifdef STARPU_HAVE_SIMGRID_BARRIER_H int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr STARPU_ATTRIBUTE_UNUSED, unsigned count) { *barrier = sg_barrier_init(count); return 0; } int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier) { if (*barrier) sg_barrier_destroy(*barrier); return 0; } int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) { int ret; _STARPU_TRACE_BARRIER_WAIT_BEGIN(); ret = sg_barrier_wait(*barrier); _STARPU_TRACE_BARRIER_WAIT_END(); return ret; } #elif defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) || defined(xbt_barrier_init) int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr STARPU_ATTRIBUTE_UNUSED, unsigned count) { *barrier = xbt_barrier_init(count); return 0; } int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier) { if (*barrier) xbt_barrier_destroy(*barrier); return 0; } int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) { int ret; _STARPU_TRACE_BARRIER_WAIT_BEGIN(); ret = xbt_barrier_wait(*barrier); _STARPU_TRACE_BARRIER_WAIT_END(); return ret; } #endif /* defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) */ int starpu_pthread_queue_init(starpu_pthread_queue_t *q) { STARPU_PTHREAD_MUTEX_INIT(&q->mutex, NULL); q->queue = NULL; q->allocqueue = 0; q->nqueue = 0; return 0; } int starpu_pthread_wait_init(starpu_pthread_wait_t *w) { STARPU_PTHREAD_MUTEX_INIT(&w->mutex, NULL); STARPU_PTHREAD_COND_INIT(&w->cond, NULL); w->block = 1; return 0; } int starpu_pthread_queue_register(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q) { STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); if (q->nqueue == q->allocqueue) { /* Make room for the new waiter */ unsigned newalloc; newalloc = q->allocqueue * 2; if (!newalloc) newalloc = 1; _STARPU_REALLOC(q->queue, newalloc * sizeof(*(q->queue))); q->allocqueue = newalloc; } q->queue[q->nqueue++] = w; STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); return 0; } int starpu_pthread_queue_unregister(starpu_pthread_wait_t *w, starpu_pthread_queue_t *q) { unsigned i; STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); for (i = 0; i < q->nqueue; i++) { if (q->queue[i] == w) { memmove(&q->queue[i], &q->queue[i+1], (q->nqueue - i - 1) * sizeof(*(q->queue))); break; } } STARPU_ASSERT(i < q->nqueue); q->nqueue--; STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); return 0; } int starpu_pthread_wait_reset(starpu_pthread_wait_t *w) { STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); w->block = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); return 0; } int starpu_pthread_wait_wait(starpu_pthread_wait_t *w) { STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); while (w->block == 1) STARPU_PTHREAD_COND_WAIT(&w->cond, &w->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); return 0; } /* pthread_cond_timedwait not yet available on windows, but we don't run simgrid there anyway */ #ifdef STARPU_SIMGRID int starpu_pthread_wait_timedwait(starpu_pthread_wait_t *w, const struct timespec *abstime) { STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); while (w->block == 1) STARPU_PTHREAD_COND_TIMEDWAIT(&w->cond, &w->mutex, abstime); STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); return 0; } #endif int starpu_pthread_queue_signal(starpu_pthread_queue_t *q) { starpu_pthread_wait_t *w; STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); if (q->nqueue) { /* TODO: better try to wake a sleeping one if possible */ w = q->queue[0]; STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); w->block = 0; STARPU_PTHREAD_COND_SIGNAL(&w->cond); STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); return 0; } int starpu_pthread_queue_broadcast(starpu_pthread_queue_t *q) { unsigned i; starpu_pthread_wait_t *w; STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); for (i = 0; i < q->nqueue; i++) { w = q->queue[i]; STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); w->block = 0; STARPU_PTHREAD_COND_SIGNAL(&w->cond); STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); return 0; } int starpu_pthread_wait_destroy(starpu_pthread_wait_t *w) { STARPU_PTHREAD_MUTEX_LOCK(&w->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&w->mutex); STARPU_PTHREAD_MUTEX_DESTROY(&w->mutex); STARPU_PTHREAD_COND_DESTROY(&w->cond); return 0; } int starpu_pthread_queue_destroy(starpu_pthread_queue_t *q) { STARPU_ASSERT(!q->nqueue); STARPU_PTHREAD_MUTEX_LOCK(&q->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&q->mutex); STARPU_PTHREAD_MUTEX_DESTROY(&q->mutex); free(q->queue); return 0; } #endif /* STARPU_SIMGRID */ #if (defined(STARPU_SIMGRID) && !defined(STARPU_HAVE_SIMGRID_BARRIER_H) && !defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) && !defined(xbt_barrier_init)) || (!defined(STARPU_SIMGRID) && !defined(STARPU_HAVE_PTHREAD_BARRIER)) int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr STARPU_ATTRIBUTE_UNUSED, unsigned count) { int ret = starpu_pthread_mutex_init(&barrier->mutex, NULL); if (!ret) ret = starpu_pthread_cond_init(&barrier->cond, NULL); if (!ret) ret = starpu_pthread_cond_init(&barrier->cond_destroy, NULL); barrier->count = count; barrier->done = 0; barrier->busy = 0; return ret; } int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier) { starpu_pthread_mutex_lock(&barrier->mutex); while (barrier->busy) { starpu_pthread_cond_wait(&barrier->cond_destroy, &barrier->mutex); } starpu_pthread_mutex_unlock(&barrier->mutex); int ret = starpu_pthread_mutex_destroy(&barrier->mutex); if (!ret) ret = starpu_pthread_cond_destroy(&barrier->cond); if (!ret) ret = starpu_pthread_cond_destroy(&barrier->cond_destroy); return ret; } int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) { int ret = 0; _STARPU_TRACE_BARRIER_WAIT_BEGIN(); starpu_pthread_mutex_lock(&barrier->mutex); barrier->done++; if (barrier->done == barrier->count) { barrier->done = 0; starpu_pthread_cond_broadcast(&barrier->cond); ret = STARPU_PTHREAD_BARRIER_SERIAL_THREAD; } else { barrier->busy++; starpu_pthread_cond_wait(&barrier->cond, &barrier->mutex); barrier->busy--; starpu_pthread_cond_broadcast(&barrier->cond_destroy); } starpu_pthread_mutex_unlock(&barrier->mutex); _STARPU_TRACE_BARRIER_WAIT_END(); return ret; } #endif /* defined(STARPU_SIMGRID) || !defined(STARPU_HAVE_PTHREAD_BARRIER) */ #ifdef STARPU_FXT_LOCK_TRACES #if !defined(STARPU_SIMGRID) && !defined(_MSC_VER) /* !STARPU_SIMGRID */ int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex) { _STARPU_TRACE_LOCKING_MUTEX(); int p_ret = pthread_mutex_lock(mutex); _STARPU_TRACE_MUTEX_LOCKED(); return p_ret; } int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) { _STARPU_TRACE_UNLOCKING_MUTEX(); int p_ret = pthread_mutex_unlock(mutex); _STARPU_TRACE_MUTEX_UNLOCKED(); return p_ret; } int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex) { int ret; _STARPU_TRACE_TRYLOCK_MUTEX(); ret = pthread_mutex_trylock(mutex); if (!ret) _STARPU_TRACE_MUTEX_LOCKED(); return ret; } int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex) { _STARPU_TRACE_COND_WAIT_BEGIN(); int p_ret = pthread_cond_wait(cond, mutex); _STARPU_TRACE_COND_WAIT_END(); return p_ret; } int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock) { _STARPU_TRACE_RDLOCKING_RWLOCK(); int p_ret = pthread_rwlock_rdlock(rwlock); _STARPU_TRACE_RWLOCK_RDLOCKED(); return p_ret; } int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock) { _STARPU_TRACE_RDLOCKING_RWLOCK(); int p_ret = pthread_rwlock_tryrdlock(rwlock); if (!p_ret) _STARPU_TRACE_RWLOCK_RDLOCKED(); return p_ret; } int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock) { _STARPU_TRACE_WRLOCKING_RWLOCK(); int p_ret = pthread_rwlock_wrlock(rwlock); _STARPU_TRACE_RWLOCK_WRLOCKED(); return p_ret; } int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock) { _STARPU_TRACE_WRLOCKING_RWLOCK(); int p_ret = pthread_rwlock_trywrlock(rwlock); if (!p_ret) _STARPU_TRACE_RWLOCK_WRLOCKED(); return p_ret; } int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock) { _STARPU_TRACE_UNLOCKING_RWLOCK(); int p_ret = pthread_rwlock_unlock(rwlock); _STARPU_TRACE_RWLOCK_UNLOCKED(); return p_ret; } #endif /* !defined(STARPU_SIMGRID) && !defined(_MSC_VER) */ #if !defined(STARPU_SIMGRID) && !defined(_MSC_VER) && defined(STARPU_HAVE_PTHREAD_BARRIER) int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) { int ret; _STARPU_TRACE_BARRIER_WAIT_BEGIN(); ret = pthread_barrier_wait(barrier); _STARPU_TRACE_BARRIER_WAIT_END(); return ret; } #endif /* STARPU_SIMGRID, _MSC_VER, STARPU_HAVE_PTHREAD_BARRIER */ #endif /* STARPU_FXT_LOCK_TRACES */ /* "sched" variants, to be used (through the STARPU_PTHREAD_MUTEX_*LOCK_SCHED * macros of course) which record when the mutex is held or not */ int starpu_pthread_mutex_lock_sched(starpu_pthread_mutex_t *mutex) { return starpu_pthread_mutex_lock(mutex); } int starpu_pthread_mutex_unlock_sched(starpu_pthread_mutex_t *mutex) { return starpu_pthread_mutex_unlock(mutex); } int starpu_pthread_mutex_trylock_sched(starpu_pthread_mutex_t *mutex) { return starpu_pthread_mutex_trylock(mutex); } #ifdef STARPU_DEBUG void starpu_pthread_mutex_check_sched(starpu_pthread_mutex_t *mutex, char *file, int line) { int workerid = starpu_worker_get_id(); STARPU_ASSERT_MSG(workerid == -1 || !_starpu_worker_mutex_is_sched_mutex(workerid, mutex), "%s:%d is locking/unlocking a sched mutex but not using STARPU_PTHREAD_MUTEX_LOCK_SCHED", file, line); } #endif #if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(HAVE_PTHREAD_SPIN_LOCK) #undef starpu_pthread_spin_init int starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared) { return _starpu_pthread_spin_init(lock, pshared); } #undef starpu_pthread_spin_destroy int starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED) { return _starpu_pthread_spin_destroy(lock); } #undef starpu_pthread_spin_lock int starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock) { return _starpu_pthread_spin_lock(lock); } #endif #if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) #if !defined(STARPU_SIMGRID) && defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) int _starpu_pthread_spin_do_lock(starpu_pthread_spinlock_t *lock) { if (STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1) == 0) /* Got it on first try! */ return 0; /* Busy, spin a bit. */ unsigned i; for (i = 0; i < 128; i++) { /* Pause a bit before retrying */ STARPU_UYIELD(); /* And synchronize with other threads */ STARPU_SYNCHRONIZE(); if (!lock->taken) /* Holder released it, try again */ if (STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1) == 0) /* Got it! */ return 0; } /* We have spent enough time with spinning, let's block */ /* This avoids typical 10ms pauses when the application thread tries to submit tasks. */ while (1) { /* Tell releaser to wake us */ unsigned prev = STARPU_VAL_EXCHANGE(&lock->taken, 2); if (prev == 0) /* Ah, it just got released and we actually acquired * it! * Note: the sad thing is that we have just written 2, * so will spuriously try to wake a thread on unlock, * but we can not avoid it since we do not know whether * there are other threads sleeping or not. */ return 0; /* Now start sleeping (unless it was released in between) * We are sure to get woken because either * - some thread has not released the lock yet, and lock->taken * is 2, so it will wake us. * - some other thread started blocking, and will set * lock->taken back to 2 */ if (syscall(SYS_futex, &lock->taken, _starpu_futex_wait, 2, NULL, NULL, 0)) if (errno == ENOSYS) _starpu_futex_wait = FUTEX_WAIT; } } #endif #undef starpu_pthread_spin_trylock int starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock) { return _starpu_pthread_spin_trylock(lock); } #undef starpu_pthread_spin_unlock int starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock) { return _starpu_pthread_spin_unlock(lock); } #if !defined(STARPU_SIMGRID) && defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) void _starpu_pthread_spin_do_unlock(starpu_pthread_spinlock_t *lock) { /* * Somebody to wake. Clear 'taken' and wake him. * Note that he may not be sleeping yet, but if he is not, we won't * since the value of 'taken' will have changed. */ lock->taken = 0; STARPU_SYNCHRONIZE(); if (syscall(SYS_futex, &lock->taken, _starpu_futex_wake, 1, NULL, NULL, 0) == -1) switch (errno) { case ENOSYS: _starpu_futex_wake = FUTEX_WAKE; if (syscall(SYS_futex, &lock->taken, _starpu_futex_wake, 1, NULL, NULL, 0) == -1) STARPU_ASSERT_MSG(0, "futex(wake) returned %d!", errno); break; case 0: break; default: STARPU_ASSERT_MSG(0, "futex returned %d!", errno); break; } } #endif #endif /* defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) */ #ifdef STARPU_SIMGRID int starpu_sem_destroy(starpu_sem_t *sem) { #ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H sg_sem_destroy(*sem); #else MSG_sem_destroy(*sem); #endif return 0; } int starpu_sem_init(starpu_sem_t *sem, int pshared, unsigned value) { STARPU_ASSERT_MSG(pshared == 0, "pshared semaphores not supported under simgrid"); #ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H *sem = sg_sem_init(value); #else *sem = MSG_sem_init(value); #endif return 0; } int starpu_sem_post(starpu_sem_t *sem) { #ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H sg_sem_release(*sem); #else MSG_sem_release(*sem); #endif return 0; } int starpu_sem_wait(starpu_sem_t *sem) { #ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H sg_sem_acquire(*sem); #else MSG_sem_acquire(*sem); #endif return 0; } int starpu_sem_trywait(starpu_sem_t *sem) { #ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H if (sg_sem_would_block(*sem)) #else if (MSG_sem_would_block(*sem)) #endif return EAGAIN; starpu_sem_wait(sem); return 0; } int starpu_sem_getvalue(starpu_sem_t *sem, int *sval) { #if SIMGRID_VERSION > 31300 # ifdef STARPU_HAVE_SIMGRID_SEMAPHORE_H *sval = sg_sem_get_capacity(*sem); # else *sval = MSG_sem_get_capacity(*sem); # endif return 0; #else (void) sem; (void) sval; STARPU_ABORT_MSG("sigmrid up to 3.13 did not have working MSG_sem_get_capacity"); #endif } #elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */ int starpu_sem_wait(starpu_sem_t *sem) { int ret; while((ret = sem_wait(sem)) == -1 && errno == EINTR) ; return ret; } int starpu_sem_trywait(starpu_sem_t *sem) { int ret; while((ret = sem_trywait(sem)) == -1 && errno == EINTR) ; return ret; } #endif starpu-1.4.9+dfsg/src/common/thread.h000066400000000000000000000120721507764646700175110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __COMMON_THREAD_H__ #define __COMMON_THREAD_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) #if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) int _starpu_pthread_spin_do_lock(starpu_pthread_spinlock_t *lock) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; #endif #if defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) static inline int _starpu_pthread_spin_init(starpu_pthread_spinlock_t *lock, int pshared STARPU_ATTRIBUTE_UNUSED) { lock->taken = 0; return 0; } #define starpu_pthread_spin_init _starpu_pthread_spin_init static inline int _starpu_pthread_spin_destroy(starpu_pthread_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED) { /* we don't do anything */ return 0; } #define starpu_pthread_spin_destroy _starpu_pthread_spin_destroy static inline int _starpu_pthread_spin_lock(starpu_pthread_spinlock_t *lock) { #ifdef STARPU_SIMGRID if (STARPU_LIKELY(!lock->taken)) { lock->taken = 1; return 0; } #ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB /* There is contention, count that a bit */ starpu_sleep(0.000001); /* And try again */ if (STARPU_LIKELY(!lock->taken)) { lock->taken = 1; return 0; } /* Really no luck, really wait for it */ STARPU_PTHREAD_MUTEX_LOCK(&_starpu_simgrid_time_advance_mutex); #endif while (lock->taken) { #ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB STARPU_PTHREAD_COND_WAIT(&_starpu_simgrid_time_advance_cond, &_starpu_simgrid_time_advance_mutex); #else /* Give hand to another thread, hopefully the one which has the * spinlock and probably just has also a short-lived mutex. */ starpu_sleep(0.000001); #endif STARPU_UYIELD(); } lock->taken = 1; #ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_simgrid_time_advance_mutex); #endif return 0; #elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) if (STARPU_LIKELY(STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1) == 0)) /* Got it on first try! */ return 0; return _starpu_pthread_spin_do_lock(lock); #else /* !SIMGRID && !LINUX */ uint32_t prev; do { prev = STARPU_TEST_AND_SET(&lock->taken, 1); if (STARPU_UNLIKELY(prev)) STARPU_UYIELD(); } while (STARPU_UNLIKELY(prev)); return 0; #endif } #define starpu_pthread_spin_lock _starpu_pthread_spin_lock static inline void _starpu_pthread_spin_checklocked(starpu_pthread_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_SIMGRID STARPU_ASSERT(lock->taken); #elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) STARPU_ASSERT(lock->taken == 1 || lock->taken == 2); #else STARPU_ASSERT(lock->taken); #endif } static inline int _starpu_pthread_spin_trylock(starpu_pthread_spinlock_t *lock) { #ifdef STARPU_SIMGRID if (STARPU_UNLIKELY(lock->taken)) return EBUSY; lock->taken = 1; return 0; #elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) unsigned prev; prev = STARPU_VAL_COMPARE_AND_SWAP(&lock->taken, 0, 1); return (prev == 0)?0:EBUSY; #else /* !SIMGRID && !LINUX */ uint32_t prev; prev = STARPU_TEST_AND_SET(&lock->taken, 1); return (prev == 0)?0:EBUSY; #endif } #define starpu_pthread_spin_trylock _starpu_pthread_spin_trylock #if defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) void _starpu_pthread_spin_do_unlock(starpu_pthread_spinlock_t *lock) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; #endif static inline int _starpu_pthread_spin_unlock(starpu_pthread_spinlock_t *lock) { #ifdef STARPU_SIMGRID lock->taken = 0; #elif defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG) STARPU_ASSERT(lock->taken != 0); STARPU_SYNCHRONIZE(); unsigned next = STARPU_ATOMIC_ADD(&lock->taken, -1); if (STARPU_LIKELY(next == 0)) /* Nobody to wake, we are done */ return 0; _starpu_pthread_spin_do_unlock(lock); #else /* !SIMGRID && !LINUX */ STARPU_RELEASE(&lock->taken); #endif return 0; } #define starpu_pthread_spin_unlock _starpu_pthread_spin_unlock #else /* defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) */ static inline void _starpu_pthread_spin_checklocked(starpu_pthread_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED) { STARPU_ASSERT(pthread_spin_trylock((pthread_spinlock_t *)lock) != 0); } #endif /* defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) */ #pragma GCC visibility pop #endif /* __COMMON_THREAD_H__ */ starpu-1.4.9+dfsg/src/common/timing.c000066400000000000000000000143401507764646700175240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #ifdef STARPU_SIMGRID #include #ifdef HAVE_SIMGRID_ENGINE_H #include #endif #endif #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #endif #ifdef STARPU_SIMGRID void _starpu_timing_init(void) { } void _starpu_clock_gettime(struct timespec *ts) { #ifdef HAVE_SIMGRID_GET_CLOCK double now = simgrid_get_clock(); #else double now = MSG_get_clock(); #endif ts->tv_sec = floor(now); ts->tv_nsec = floor((now - ts->tv_sec) * 1000000000); } #elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) #include #ifndef _POSIX_C_SOURCE /* for clock_gettime */ #define _POSIX_C_SOURCE 199309L #endif #ifdef __linux__ #ifndef CLOCK_MONOTONIC_RAW #define CLOCK_MONOTONIC_RAW 4 #endif #endif static struct timespec _starpu_reference_start_time_ts; /* Modern CPUs' clocks are usually not synchronized so we use a monotonic clock * to have consistent timing measurements. */ static void _starpu_clock_readtime(struct timespec *ts) { #if 0 /* def CLOCK_MONOTONIC_RAW */ /* The CLOCK_MONOTONIC_RAW clock is not * subject to NTP adjustments, but is not available on all systems (in that * case we use the CLOCK_MONOTONIC clock instead). */ /* In the distributed case, we *do* want NTP adjustments, to get * somehow-coherent traces, so this is disabled */ static int raw_supported = 0; switch (raw_supported) { case -1: break; case 1: clock_gettime(CLOCK_MONOTONIC_RAW, ts); return; case 0: if (clock_gettime(CLOCK_MONOTONIC_RAW, ts)) { raw_supported = -1; break; } else { raw_supported = 1; return; } } #endif clock_gettime(CLOCK_MONOTONIC, ts); } void _starpu_timing_init(void) { _starpu_clock_gettime(&_starpu_reference_start_time_ts); } void _starpu_clock_gettime(struct timespec *ts) { struct timespec absolute_ts; /* Read the current time */ _starpu_clock_readtime(&absolute_ts); /* Compute the relative time since initialization */ starpu_timespec_sub(&absolute_ts, &_starpu_reference_start_time_ts, ts); } #else // !HAVE_CLOCK_GETTIME #if defined(__i386__) || defined(__pentium__) || defined(__pentiumpro__) || defined(__i586__) || defined(__i686__) || defined(__k6__) || defined(__k7__) || defined(__x86_64__) union starpu_u_tick { uint64_t tick; struct { uint32_t low; uint32_t high; } sub; }; #define STARPU_GET_TICK(t) __asm__ volatile("rdtsc" : "=a" ((t).sub.low), "=d" ((t).sub.high)) #define STARPU_TICK_RAW_DIFF(t1, t2) ((t2).tick - (t1).tick) #define STARPU_TICK_DIFF(t1, t2) (STARPU_TICK_RAW_DIFF(t1, t2) - _starpu_residual) static union starpu_u_tick _starpu_reference_start_tick; static double _starpu_scale = 0.0; static unsigned long long _starpu_residual = 0; static int _starpu_inited = 0; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif void _starpu_timing_init(void) { static union starpu_u_tick t1, t2; int i; if (_starpu_inited) return; _starpu_residual = (unsigned long long)1 << 63; for(i = 0; i < 20; i++) { STARPU_GET_TICK(t1); STARPU_GET_TICK(t2); _starpu_residual = STARPU_MIN(_starpu_residual, STARPU_TICK_RAW_DIFF(t1, t2)); } { struct timeval tv1,tv2; STARPU_GET_TICK(t1); mygettimeofday(&tv1,0); starpu_sleep(0.5); STARPU_GET_TICK(t2); mygettimeofday(&tv2,0); _starpu_scale = ((tv2.tv_sec*1e6 + tv2.tv_usec) - (tv1.tv_sec*1e6 + tv1.tv_usec)) / (double)(STARPU_TICK_DIFF(t1, t2)); } STARPU_GET_TICK(_starpu_reference_start_tick); _starpu_inited = 1; } void _starpu_clock_gettime(struct timespec *ts) { union starpu_u_tick tick_now; STARPU_GET_TICK(tick_now); uint64_t elapsed_ticks = STARPU_TICK_DIFF(_starpu_reference_start_tick, tick_now); /* We convert this number into nano-seconds so that we can fill the * timespec structure. */ uint64_t elapsed_ns = (uint64_t)(((double)elapsed_ticks)*(_starpu_scale*1000.0)); long tv_nsec = (elapsed_ns % 1000000000); time_t tv_sec = (elapsed_ns / 1000000000); ts->tv_sec = tv_sec; ts->tv_nsec = tv_nsec; } #else // !HAVE_CLOCK_GETTIME & no rdtsc #warning StarPU could not find a timer, clock will always return 0 void _starpu_timing_init(void) { } void _starpu_clock_gettime(struct timespec *ts) { ts->tv_sec = 0; ts->tv_nsec = 0; } #endif #endif // HAVE_CLOCK_GETTIME /* Returns the time elapsed between start and end in microseconds */ double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end) { struct timespec diff; starpu_timespec_sub(end, start, &diff); double us = (diff.tv_sec*1e6) + (diff.tv_nsec*1e-3); return us; } double starpu_timing_timespec_to_us(struct timespec *ts) { return (1000000.0*ts->tv_sec) + (0.001*ts->tv_nsec); } double starpu_timing_now(void) { #ifdef STARPU_SIMGRID # ifdef HAVE_SIMGRID_GET_CLOCK return simgrid_get_clock()*1000000; # else return MSG_get_clock()*1000000; # endif #else struct timespec now; _starpu_clock_gettime(&now); return starpu_timing_timespec_to_us(&now); #endif } starpu-1.4.9+dfsg/src/common/timing.h000066400000000000000000000022331507764646700175270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef TIMING_H #define TIMING_H /** @file */ #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #pragma GCC visibility push(hidden) /** * _starpu_timing_init must be called prior to using any of these timing * functions. */ void _starpu_timing_init(void); void _starpu_clock_gettime(struct timespec *ts) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; #pragma GCC visibility pop #endif /* TIMING_H */ starpu-1.4.9+dfsg/src/common/uthash.h000066400000000000000000002067461507764646700175530ustar00rootroot00000000000000/* Copyright (c) 2003-2010, Troy D. Hanson http://uthash.sourceforge.net All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef UTHASH_H #define UTHASH_H /** @file */ #include /* memcmp,strlen */ #include /* ptrdiff_t */ /* These macros use decltype or the earlier __typeof GNU extension. As decltype is only available in newer compilers (VS2010 or gcc 4.3+ when compiling c++ source) this code uses whatever method is needed or, for VS2008 where neither is available, uses casting workarounds. */ #ifdef _MSC_VER /* MS compiler */ #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ #define DECLTYPE(x) (decltype(x)) #else /* VS2008 or older (or VS2010 in C mode) */ #define NO_DECLTYPE #define DECLTYPE(x) #endif #else /* GNU, Sun and other compilers */ #define DECLTYPE(x) (__typeof(x)) #endif #ifdef NO_DECLTYPE #define DECLTYPE_ASSIGN(dst,src) \ do { \ char **_da_dst = (char**)(&(dst)); \ *_da_dst = (char*)(src); \ } while(0) #else #define DECLTYPE_ASSIGN(dst,src) \ do { \ (dst) = DECLTYPE(dst)(src); \ } while(0) #endif /* a number of the hash function use uint32_t which isn't defined on win32 */ #ifdef _MSC_VER typedef unsigned int uint32_t; #else #include /* uint32_t */ #endif #pragma GCC visibility push(hidden) #define UTHASH_VERSION 1.9.3 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ #define uthash_free(ptr,sz) free(ptr) /* free fcn */ #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ #define uthash_expand_fyi(tbl) /* can be defined to log expands */ /* initial number of buckets */ #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ /* calculate the element whose hash handle address is hhe */ #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) #define HASH_FIND(hh,head,keyptr,keylen,out) \ do { \ unsigned _hf_bkt=0,_hf_hashv=0; \ out=NULL; \ if (head) { \ HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ keyptr,keylen,out); \ } \ } \ } while (0) #ifdef HASH_BLOOM #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) #define HASH_BLOOM_MAKE(tbl) \ do { \ (tbl)->bloom_nbits = HASH_BLOOM; \ (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ } while (0) #define HASH_BLOOM_FREE(tbl) \ do { \ uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ } while (0) #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) #define HASH_BLOOM_ADD(tbl,hashv) \ HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) #define HASH_BLOOM_TEST(tbl,hashv) \ HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) #else #define HASH_BLOOM_MAKE(tbl) #define HASH_BLOOM_FREE(tbl) #define HASH_BLOOM_ADD(tbl,hashv) #define HASH_BLOOM_TEST(tbl,hashv) (1) #endif #define HASH_MAKE_TABLE(hh,head) \ do { \ (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ sizeof(UT_hash_table)); \ if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ (head)->hh.tbl->tail = &((head)->hh); \ (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ memset((head)->hh.tbl->buckets, 0, \ HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ HASH_BLOOM_MAKE((head)->hh.tbl); \ (head)->hh.tbl->signature = HASH_SIGNATURE; \ } while(0) #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add) #ifdef STARPU_DEBUG /* Check that we don't insert the same key several times */ #define HASH_CHECK_KEY(hh,head,keyptr,keylen,out) \ do { \ __typeof__(out) _out; \ HASH_FIND(hh,head,keyptr,keylen,_out); \ STARPU_ASSERT_MSG(!_out,"Cannot insert the same key twice"); \ } while(0) #else #define HASH_CHECK_KEY(hh,head,keyptr,keylen,out) #endif #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ do { \ unsigned _ha_bkt=0; \ HASH_CHECK_KEY(hh,head,keyptr,keylen_in,add); \ (add)->hh.next = NULL; \ (add)->hh.key = (char*)keyptr; \ (add)->hh.keylen = keylen_in; \ if (!(head)) { \ head = (add); \ (head)->hh.prev = NULL; \ HASH_MAKE_TABLE(hh,head); \ } else { \ (head)->hh.tbl->tail->next = (add); \ (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ (head)->hh.tbl->tail = &((add)->hh); \ } \ (head)->hh.tbl->num_items++; \ (add)->hh.tbl = (head)->hh.tbl; \ HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ (add)->hh.hashv, _ha_bkt); \ HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ HASH_FSCK(hh,head); \ } while(0) #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ do { \ bkt = ((hashv) & ((num_bkts) - 1)); \ } while(0) /* delete "delptr" from the hash table. * "the usual" patch-up process for the app-order doubly-linked-list. * The use of _hd_hh_del below deserves special explanation. * These used to be expressed using (delptr) but that led to a bug * if someone used the same symbol for the head and deletee, like * HASH_DELETE(hh,users,users); * We want that to work, but by changing the head (users) below * we were forfeiting our ability to further refer to the deletee (users) * in the patch-up process. Solution: use scratch space to * copy the deletee pointer, then the latter references are via that * scratch pointer rather than through the repointed (users) symbol. */ #define HASH_DELETE(hh,head,delptr) \ do { \ unsigned _hd_bkt; \ struct UT_hash_handle *_hd_hh_del; \ if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ uthash_free((head)->hh.tbl->buckets, \ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ HASH_BLOOM_FREE((head)->hh.tbl); \ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ head = NULL; \ } else { \ _hd_hh_del = &((delptr)->hh); \ if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ (head)->hh.tbl->tail = \ (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ (head)->hh.tbl->hho); \ } \ if ((delptr)->hh.prev) { \ ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ } else { \ DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ } \ if (_hd_hh_del->next) { \ ((UT_hash_handle*)((char*)_hd_hh_del->next + \ (head)->hh.tbl->hho))->prev = \ _hd_hh_del->prev; \ } \ HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ (head)->hh.tbl->num_items--; \ } \ HASH_FSCK(hh,head); \ } while (0) /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ #define HASH_FIND_STR(head,findstr,out) \ HASH_FIND(hh,head,findstr,strlen(findstr),out) #define HASH_ADD_STR(head,strfield,add) \ HASH_ADD(hh,head,strfield[0],strlen(add->strfield),add) #define HASH_FIND_INT(head,findint,out) \ HASH_FIND(hh,head,findint,sizeof(int),out) #define HASH_ADD_INT(head,intfield,add) \ HASH_ADD(hh,head,intfield,sizeof(int),add) #define HASH_FIND_PTR(head,findptr,out) \ HASH_FIND(hh,head,findptr,sizeof(void *),out) #define HASH_ADD_PTR(head,ptrfield,add) \ HASH_ADD(hh,head,ptrfield,sizeof(void *),add) #define HASH_DEL(head,delptr) \ HASH_DELETE(hh,head,delptr) /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. */ #ifdef HASH_DEBUG #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) #define HASH_FSCK(hh,head) \ do { \ unsigned _bkt_i; \ unsigned _count, _bkt_count; \ char *_prev; \ struct UT_hash_handle *_thh; \ if (head) { \ _count = 0; \ for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ _bkt_count = 0; \ _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ _prev = NULL; \ while (_thh) { \ if (_prev != (char*)(_thh->hh_prev)) { \ HASH_OOPS("invalid hh_prev %p, actual %p\n", \ _thh->hh_prev, _prev ); \ } \ _bkt_count++; \ _prev = (char*)(_thh); \ _thh = _thh->hh_next; \ } \ _count += _bkt_count; \ if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ HASH_OOPS("invalid bucket count %u, actual %u\n", \ (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ } \ } \ if (_count != (head)->hh.tbl->num_items) { \ HASH_OOPS("invalid hh item count %u, actual %u\n", \ (head)->hh.tbl->num_items, _count ); \ } \ /* traverse hh in app order; check next/prev integrity, count */ \ _count = 0; \ _prev = NULL; \ _thh = &(head)->hh; \ while (_thh) { \ _count++; \ if (_prev !=(char*)(_thh->prev)) { \ HASH_OOPS("invalid prev %p, actual %p\n", \ _thh->prev, _prev ); \ } \ _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ (head)->hh.tbl->hho) : NULL ); \ } \ if (_count != (head)->hh.tbl->num_items) { \ HASH_OOPS("invalid app item count %u, actual %u\n", \ (head)->hh.tbl->num_items, _count ); \ } \ } \ } while (0) #else #define HASH_FSCK(hh,head) #endif /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to * the descriptor to which this macro is defined for tuning the hash function. * The app can #include to get the prototype for write(2). */ #ifdef HASH_EMIT_KEYS #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ do { \ unsigned _klen = fieldlen; \ write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ write(HASH_EMIT_KEYS, keyptr, fieldlen); \ } while (0) #else #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) #endif /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ #ifdef HASH_FUNCTION #define HASH_FCN HASH_FUNCTION #else #define HASH_FCN HASH_JEN #endif /* The Bernstein hash function, used in Perl prior to v5.6 */ #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _hb_keylen=keylen; \ char *_hb_key=(char*)(key); \ (hashv) = 0; \ while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ bkt = (hashv) & (num_bkts-1); \ } while (0) /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _sx_i; \ char *_hs_key=(char*)(key); \ hashv = 0; \ for(_sx_i=0; _sx_i < keylen; _sx_i++) \ hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ bkt = hashv & (num_bkts-1); \ } while (0) #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _fn_i; \ char *_hf_key=(char*)(key); \ hashv = 2166136261UL; \ for(_fn_i=0; _fn_i < keylen; _fn_i++) \ hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ bkt = hashv & (num_bkts-1); \ } while(0) #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _ho_i; \ char *_ho_key=(char*)(key); \ hashv = 0; \ for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ hashv += _ho_key[_ho_i]; \ hashv += (hashv << 10); \ hashv ^= (hashv >> 6); \ } \ hashv += (hashv << 3); \ hashv ^= (hashv >> 11); \ hashv += (hashv << 15); \ bkt = hashv & (num_bkts-1); \ } while(0) #define HASH_JEN_MIX(a,b,c) \ do { \ a -= b; a -= c; a ^= ( c >> 13 ); \ b -= c; b -= a; b ^= ( a << 8 ); \ c -= a; c -= b; c ^= ( b >> 13 ); \ a -= b; a -= c; a ^= ( c >> 12 ); \ b -= c; b -= a; b ^= ( a << 16 ); \ c -= a; c -= b; c ^= ( b >> 5 ); \ a -= b; a -= c; a ^= ( c >> 3 ); \ b -= c; b -= a; b ^= ( a << 10 ); \ c -= a; c -= b; c ^= ( b >> 15 ); \ } while (0) #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ do { \ unsigned _hj_i,_hj_j,_hj_k; \ char *_hj_key=(char*)(key); \ hashv = 0xfeedbeef; \ _hj_i = _hj_j = 0x9e3779b9; \ _hj_k = keylen; \ while (_hj_k >= 12) { \ _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + ( (unsigned)_hj_key[2] << 16 ) \ + ( (unsigned)_hj_key[3] << 24 ) ); \ _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + ( (unsigned)_hj_key[6] << 16 ) \ + ( (unsigned)_hj_key[7] << 24 ) ); \ hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + ( (unsigned)_hj_key[10] << 16 ) \ + ( (unsigned)_hj_key[11] << 24 ) ); \ \ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ \ _hj_key += 12; \ _hj_k -= 12; \ } \ hashv += keylen; \ switch ( _hj_k ) { \ case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ /* FALLTHRU */ \ case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ /* FALLTHRU */ \ case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ /* FALLTHRU */ \ case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ /* FALLTHRU */ \ case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ /* FALLTHRU */ \ case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ /* FALLTHRU */ \ case 5: _hj_j += _hj_key[4]; \ /* FALLTHRU */ \ case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ /* FALLTHRU */ \ case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ /* FALLTHRU */ \ case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ /* FALLTHRU */ \ case 1: _hj_i += _hj_key[0]; \ /* FALLTHRU */ \ default: break; \ } \ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ bkt = hashv & (num_bkts-1); \ } while(0) /* The Paul Hsieh hash function */ #undef get16bits #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) #define get16bits(d) (*((const uint16_t *) (d))) #endif #if !defined (get16bits) #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ +(uint32_t)(((const uint8_t *)(d))[0]) ) #endif #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ do { \ char *_sfh_key=(char*)(key); \ uint32_t _sfh_tmp, _sfh_len = keylen; \ \ int _sfh_rem = _sfh_len & 3; \ _sfh_len >>= 2; \ hashv = 0xcafebabe; \ \ /* Main loop */ \ for (;_sfh_len > 0; _sfh_len--) { \ hashv += get16bits (_sfh_key); \ _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ hashv = (hashv << 16) ^ _sfh_tmp; \ _sfh_key += 2*sizeof (uint16_t); \ hashv += hashv >> 11; \ } \ \ /* Handle end cases */ \ switch (_sfh_rem) { \ case 3: hashv += get16bits (_sfh_key); \ hashv ^= hashv << 16; \ hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ hashv += hashv >> 11; \ break; \ case 2: hashv += get16bits (_sfh_key); \ hashv ^= hashv << 11; \ hashv += hashv >> 17; \ break; \ case 1: hashv += *_sfh_key; \ hashv ^= hashv << 10; \ hashv += hashv >> 1; \ break; \ default: break; \ } \ \ /* Force "avalanching" of final 127 bits */ \ hashv ^= hashv << 3; \ hashv += hashv >> 5; \ hashv ^= hashv << 4; \ hashv += hashv >> 17; \ hashv ^= hashv << 25; \ hashv += hashv >> 6; \ bkt = hashv & (num_bkts-1); \ } while(0) #ifdef HASH_USING_NO_STRICT_ALIASING /* The MurmurHash exploits some CPU's (e.g. x86) tolerance for unaligned reads. * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. * So MurmurHash comes in two versions, the faster unaligned one and the slower * aligned one. We only use the faster one on CPU's where we know it's safe. * * Note the preprocessor built-in defines can be emitted using: * * gcc -m64 -dM -E - < /dev/null (on gcc) * cc -## a.c (where a.c is a simple test file) (Sun Studio) */ #if (defined(__i386__) || defined(__x86_64__)) #define HASH_MUR HASH_MUR_UNALIGNED #else #define HASH_MUR HASH_MUR_ALIGNED #endif /* Appleby's MurmurHash fast version for unaligned-tolerant archs like i386 */ #define HASH_MUR_UNALIGNED(key,keylen,num_bkts,hashv,bkt) \ do { \ const unsigned int _mur_m = 0x5bd1e995; \ const int _mur_r = 24; \ hashv = 0xcafebabe ^ keylen; \ char *_mur_key = (char *)(key); \ uint32_t _mur_tmp, _mur_len = keylen; \ \ for (;_mur_len >= 4; _mur_len-=4) { \ _mur_tmp = *(uint32_t *)_mur_key; \ _mur_tmp *= _mur_m; \ _mur_tmp ^= _mur_tmp >> _mur_r; \ _mur_tmp *= _mur_m; \ hashv *= _mur_m; \ hashv ^= _mur_tmp; \ _mur_key += 4; \ } \ \ switch(_mur_len) \ { \ case 3: hashv ^= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: hashv ^= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: hashv ^= _mur_key[0]; \ hashv *= _mur_m; \ /* FALLTHRU */ \ default: break; \ }; \ \ hashv ^= hashv >> 13; \ hashv *= _mur_m; \ hashv ^= hashv >> 15; \ \ bkt = hashv & (num_bkts-1); \ } while(0) /* Appleby's MurmurHash version for alignment-sensitive archs like Sparc */ #define HASH_MUR_ALIGNED(key,keylen,num_bkts,hashv,bkt) \ do { \ const unsigned int _mur_m = 0x5bd1e995; \ const int _mur_r = 24; \ hashv = 0xcafebabe ^ (keylen); \ char *_mur_key = (char *)(key); \ uint32_t _mur_len = keylen; \ int _mur_align = (int)_mur_key & 3; \ \ if (_mur_align && (_mur_len >= 4)) { \ unsigned _mur_t = 0, _mur_d = 0; \ switch(_mur_align) { \ case 1: _mur_t |= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: _mur_t |= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 3: _mur_t |= _mur_key[0]; \ /* FALLTHRU */ \ default: break; \ } \ _mur_t <<= (8 * _mur_align); \ _mur_key += 4-_mur_align; \ _mur_len -= 4-_mur_align; \ int _mur_sl = 8 * (4-_mur_align); \ int _mur_sr = 8 * _mur_align; \ \ for (;_mur_len >= 4; _mur_len-=4) { \ _mur_d = *(unsigned *)_mur_key; \ _mur_t = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ unsigned _mur_k = _mur_t; \ _mur_k *= _mur_m; \ _mur_k ^= _mur_k >> _mur_r; \ _mur_k *= _mur_m; \ hashv *= _mur_m; \ hashv ^= _mur_k; \ _mur_t = _mur_d; \ _mur_key += 4; \ } \ _mur_d = 0; \ if(_mur_len >= _mur_align) { \ switch(_mur_align) { \ case 3: _mur_d |= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: _mur_d |= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: _mur_d |= _mur_key[0]; \ /* FALLTHRU */ \ default: break; \ } \ unsigned _mur_k = (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ _mur_k *= _mur_m; \ _mur_k ^= _mur_k >> _mur_r; \ _mur_k *= _mur_m; \ hashv *= _mur_m; \ hashv ^= _mur_k; \ _mur_k += _mur_align; \ _mur_len -= _mur_align; \ \ switch(_mur_len) \ { \ case 3: hashv ^= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: hashv ^= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: hashv ^= _mur_key[0]; \ hashv *= _mur_m; \ /* FALLTHRU */ \ default: break; \ } \ } else { \ switch(_mur_len) \ { \ case 3: _mur_d ^= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: _mur_d ^= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: _mur_d ^= _mur_key[0]; \ /* FALLTHRU */ \ case 0: hashv ^= (_mur_t >> _mur_sr) | (_mur_d << _mur_sl); \ hashv *= _mur_m; \ /* FALLTHRU */ \ default: break; \ } \ } \ \ hashv ^= hashv >> 13; \ hashv *= _mur_m; \ hashv ^= hashv >> 15; \ } else { \ for (;_mur_len >= 4; _mur_len-=4) { \ unsigned _mur_k = *(unsigned*)_mur_key; \ _mur_k *= _mur_m; \ _mur_k ^= _mur_k >> _mur_r; \ _mur_k *= _mur_m; \ hashv *= _mur_m; \ hashv ^= _mur_k; \ _mur_key += 4; \ } \ switch(_mur_len) \ { \ case 3: hashv ^= _mur_key[2] << 16; \ /* FALLTHRU */ \ case 2: hashv ^= _mur_key[1] << 8; \ /* FALLTHRU */ \ case 1: hashv ^= _mur_key[0]; \ hashv *= _mur_m; \ /* FALLTHRU */ \ default: break; \ } \ \ hashv ^= hashv >> 13; \ hashv *= _mur_m; \ hashv ^= hashv >> 15; \ } \ bkt = hashv & (num_bkts-1); \ } while(0) #endif /* HASH_USING_NO_STRICT_ALIASING */ /* key comparison function; return 0 if keys equal */ #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) /* iterate over items in a known bucket to find desired item */ #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ do { \ if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ else out=NULL; \ while (out) { \ if (out->hh.keylen == keylen_in) { \ if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \ } \ if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \ else out = NULL; \ } \ } while(0) /* add an item to a bucket */ #define HASH_ADD_TO_BKT(head,addhh) \ do { \ head.count++; \ (addhh)->hh_next = head.hh_head; \ (addhh)->hh_prev = NULL; \ if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ (head).hh_head=addhh; \ if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ && (addhh)->tbl->noexpand != 1) { \ HASH_EXPAND_BUCKETS((addhh)->tbl); \ } \ } while(0) /* remove an item from a given bucket */ #define HASH_DEL_IN_BKT(hh,head,hh_del) \ (head).count--; \ if ((head).hh_head == hh_del) { \ (head).hh_head = hh_del->hh_next; \ } \ if (hh_del->hh_prev) { \ hh_del->hh_prev->hh_next = hh_del->hh_next; \ } \ if (hh_del->hh_next) { \ hh_del->hh_next->hh_prev = hh_del->hh_prev; \ } /* Bucket expansion has the effect of doubling the number of buckets * and redistributing the items into the new buckets. Ideally the * items will distribute more or less evenly into the new buckets * (the extent to which this is true is a measure of the quality of * the hash function as it applies to the key domain). * * With the items distributed into more buckets, the chain length * (item count) in each bucket is reduced. Thus by expanding buckets * the hash keeps a bound on the chain length. This bounded chain * length is the essence of how a hash provides constant time lookup. * * The calculation of tbl->ideal_chain_maxlen below deserves some * explanation. First, keep in mind that we're calculating the ideal * maximum chain length based on the *new* (doubled) bucket count. * In fractions this is just n/b (n=number of items,b=new num buckets). * Since the ideal chain length is an integer, we want to calculate * ceil(n/b). We don't depend on floating point arithmetic in this * hash, so to calculate ceil(n/b) with integers we could write * * ceil(n/b) = (n/b) + ((n%b)?1:0) * * and in fact a previous version of this hash did just that. * But now we have improved things a bit by recognizing that b is * always a power of two. We keep its base 2 log handy (call it lb), * so now we can write this with a bit shift and logical AND: * * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) * */ #define HASH_EXPAND_BUCKETS(tbl) \ do { \ unsigned _he_bkt; \ unsigned _he_bkt_i; \ struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ memset(_he_new_buckets, 0, \ 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ tbl->ideal_chain_maxlen = \ (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ tbl->nonideal_items = 0; \ for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ { \ _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ while (_he_thh) { \ _he_hh_nxt = _he_thh->hh_next; \ HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ tbl->nonideal_items++; \ _he_newbkt->expand_mult = _he_newbkt->count / \ tbl->ideal_chain_maxlen; \ } \ _he_thh->hh_prev = NULL; \ _he_thh->hh_next = _he_newbkt->hh_head; \ if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ _he_thh; \ _he_newbkt->hh_head = _he_thh; \ _he_thh = _he_hh_nxt; \ } \ } \ uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ tbl->num_buckets *= 2; \ tbl->log2_num_buckets++; \ tbl->buckets = _he_new_buckets; \ tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ (tbl->ineff_expands+1) : 0; \ if (tbl->ineff_expands > 1) { \ tbl->noexpand=1; \ uthash_noexpand_fyi(tbl); \ } \ uthash_expand_fyi(tbl); \ } while(0) /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ /* Note that HASH_SORT assumes the hash handle name to be hh. * HASH_SRT was added to allow the hash handle name to be passed in. */ #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) #define HASH_SRT(hh,head,cmpfcn) \ do { \ unsigned _hs_i; \ unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ if (head) { \ _hs_insize = 1; \ _hs_looping = 1; \ _hs_list = &((head)->hh); \ while (_hs_looping) { \ _hs_p = _hs_list; \ _hs_list = NULL; \ _hs_tail = NULL; \ _hs_nmerges = 0; \ while (_hs_p) { \ _hs_nmerges++; \ _hs_q = _hs_p; \ _hs_psize = 0; \ for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ _hs_psize++; \ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ ((void*)((char*)(_hs_q->next) + \ (head)->hh.tbl->hho)) : NULL); \ if (! (_hs_q) ) break; \ } \ _hs_qsize = _hs_insize; \ while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ if (_hs_psize == 0) { \ _hs_e = _hs_q; \ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ ((void*)((char*)(_hs_q->next) + \ (head)->hh.tbl->hho)) : NULL); \ _hs_qsize--; \ } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ _hs_e = _hs_p; \ _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ ((void*)((char*)(_hs_p->next) + \ (head)->hh.tbl->hho)) : NULL); \ _hs_psize--; \ } else if (( \ cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ ) <= 0) { \ _hs_e = _hs_p; \ _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ ((void*)((char*)(_hs_p->next) + \ (head)->hh.tbl->hho)) : NULL); \ _hs_psize--; \ } else { \ _hs_e = _hs_q; \ _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ ((void*)((char*)(_hs_q->next) + \ (head)->hh.tbl->hho)) : NULL); \ _hs_qsize--; \ } \ if ( _hs_tail ) { \ _hs_tail->next = ((_hs_e) ? \ ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ } else { \ _hs_list = _hs_e; \ } \ _hs_e->prev = ((_hs_tail) ? \ ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ _hs_tail = _hs_e; \ } \ _hs_p = _hs_q; \ } \ _hs_tail->next = NULL; \ if ( _hs_nmerges <= 1 ) { \ _hs_looping=0; \ (head)->hh.tbl->tail = _hs_tail; \ DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ } \ _hs_insize *= 2; \ } \ HASH_FSCK(hh,head); \ } \ } while (0) /* This function selects items from one hash into another hash. * The end result is that the selected items have dual presence * in both hashes. There is no copy of the items made; rather * they are added into the new hash through a secondary hash * hash handle that must be present in the structure. */ #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ do { \ unsigned _src_bkt, _dst_bkt; \ void *_last_elt=NULL, *_elt; \ UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ if (src) { \ for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ _src_hh; \ _src_hh = _src_hh->hh_next) { \ _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ if (cond(_elt)) { \ _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ _dst_hh->key = _src_hh->key; \ _dst_hh->keylen = _src_hh->keylen; \ _dst_hh->hashv = _src_hh->hashv; \ _dst_hh->prev = _last_elt; \ _dst_hh->next = NULL; \ if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ if (!dst) { \ DECLTYPE_ASSIGN(dst,_elt); \ HASH_MAKE_TABLE(hh_dst,dst); \ } else { \ _dst_hh->tbl = (dst)->hh_dst.tbl; \ } \ HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ (dst)->hh_dst.tbl->num_items++; \ _last_elt = _elt; \ _last_elt_hh = _dst_hh; \ } \ } \ } \ } \ HASH_FSCK(hh_dst,dst); \ } while (0) #define HASH_CLEAR(hh,head) \ do { \ if (head) { \ uthash_free((head)->hh.tbl->buckets, \ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ (head)=NULL; \ } \ } while(0) #ifdef NO_DECLTYPE #define HASH_ITER(hh,head,el,tmp) \ for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) #else #define HASH_ITER(hh,head,el,tmp) \ for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) #endif /* obtain a count of items in the hash */ #define HASH_COUNT(head) HASH_CNT(hh,head) #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) typedef struct UT_hash_bucket { struct UT_hash_handle *hh_head; unsigned count; /* expand_mult is normally set to 0. In this situation, the max chain length * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If * the bucket's chain exceeds this length, bucket expansion is triggered). * However, setting expand_mult to a non-zero value delays bucket expansion * (that would be triggered by additions to this particular bucket) * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. * (The multiplier is simply expand_mult+1). The whole idea of this * multiplier is to reduce bucket expansions, since they are expensive, in * situations where we know that a particular bucket tends to be overused. * It is better to let its chain length grow to a longer yet-still-bounded * value, than to do an O(n) bucket expansion too often. */ unsigned expand_mult; } UT_hash_bucket; /* random signature used only to find hash tables in external analysis */ #define HASH_SIGNATURE 0xa0111fe1 #define HASH_BLOOM_SIGNATURE 0xb12220f2 typedef struct UT_hash_table { UT_hash_bucket *buckets; unsigned num_buckets, log2_num_buckets; unsigned num_items; struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ /* in an ideal situation (all buckets used equally), no bucket would have * more than ceil(#items/#buckets) items. that's the ideal chain length. */ unsigned ideal_chain_maxlen; /* nonideal_items is the number of items in the hash whose chain position * exceeds the ideal chain maxlen. these items pay the penalty for an uneven * hash distribution; reaching them in a chain traversal takes >ideal steps */ unsigned nonideal_items; /* ineffective expands occur when a bucket doubling was performed, but * afterward, more than half the items in the hash had nonideal chain * positions. If this happens on two consecutive expansions we inhibit any * further expansion, as it's not helping; this happens when the hash * function isn't a good fit for the key domain. When expansion is inhibited * the hash will still work, albeit no longer in constant time. */ unsigned ineff_expands, noexpand; uint32_t signature; /* used only to find hash tables in external analysis */ #ifdef HASH_BLOOM uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ uint8_t *bloom_bv; char bloom_nbits; #endif } UT_hash_table; typedef struct UT_hash_handle { struct UT_hash_table *tbl; void *prev; /* prev element in app order */ void *next; /* next element in app order */ struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ struct UT_hash_handle *hh_next; /* next hh in bucket order */ void *key; /* ptr to enclosing struct's key */ unsigned keylen; /* enclosing struct's key len */ unsigned hashv; /* result of hash-fcn(key) */ } UT_hash_handle; #pragma GCC visibility pop #endif /* UTHASH_H */ starpu-1.4.9+dfsg/src/common/utils.c000066400000000000000000000374271507764646700174100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #if defined(_WIN32) && !defined(__CYGWIN__) #include #include #define mkdir(path, mode) mkdir(path) #if !defined(__MINGW32__) #define ftruncate(fd, length) _chsize(fd, length) #endif #endif #ifndef O_BINARY #define O_BINARY 0 #endif #if !defined(O_DIRECT) && defined(F_NOCACHE) #define O_DIRECT F_NOCACHE #endif #ifndef O_DIRECT #define O_DIRECT 0 #endif int _starpu_silent; void _starpu_util_init(void) { _starpu_silent = starpu_getenv_number_default("STARPU_SILENT", 0); STARPU_HG_DISABLE_CHECKING(_starpu_silent); } #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__MINGW32__) #include static char * dirname(char * path) { char drive[_MAX_DRIVE]; char dir[_MAX_DIR]; /* Remove trailing slash */ while (strlen(path) > 0 && (*(path+strlen(path)-1) == '/' || *(path+strlen(path)-1) == '\\')) *(path+strlen(path)-1) = '\0'; _splitpath(path, drive, dir, NULL, NULL); _makepath(path, drive, dir, NULL, NULL); return path; } #else #include #endif /* Function with behaviour like `mkdir -p'. This function was adapted from * http://niallohiggins.com/2009/01/08/mkpath-mkdir-p-alike-in-c-for-unix/ */ int _starpu_mkpath(const char *s, mode_t mode) { int olderrno; char *q, *r = NULL, *path = NULL, *up = NULL; int rv = -1; while (s[0] == '/' && s[1] == '/') s++; if (strcmp(s, ".") == 0 || strcmp(s, "/") == 0 #if defined(_WIN32) /* C:/ or C:\ */ || (s[0] && s[1] == ':' && (s[2] == '/' || s[2] == '\\') && !s[3]) #endif ) return 0; if ((path = strdup(s)) == NULL) STARPU_ABORT(); if ((q = strdup(s)) == NULL) STARPU_ABORT(); if ((r = dirname(q)) == NULL) goto out; if ((up = strdup(r)) == NULL) STARPU_ABORT(); if ((_starpu_mkpath(up, mode) == -1) && (errno != EEXIST)) goto out; struct stat sb; if (stat(path, &sb) == 0) { if (!S_ISDIR(sb.st_mode)) { _STARPU_MSG("Error: %s already exists and is not a directory:\n", path); STARPU_ABORT(); } /* It already exists and is a directory. */ rv = 0; } else { if ((mkdir(path, mode) == -1) && (errno != EEXIST)) rv = -1; else rv = 0; } out: olderrno = errno; if (up) free(up); free(q); free(path); errno = olderrno; return rv; } void _starpu_mkpath_and_check(const char *path, mode_t mode) { int ret; ret = _starpu_mkpath(path, mode); if (ret == -1 && errno != EEXIST) { _STARPU_MSG("Error making StarPU directory %s:\n", path); perror("mkdir"); STARPU_ABORT(); } } char *_starpu_mkdtemp_internal(char *tmpl) { int len = (int)strlen(tmpl); int i; int count = 1; int ret; int first_letter = (int)'a'; int nb_letters = 25; int len_template = 6; // Initialize template for(i=len-len_template ; i0 && (host = strtok(NULL, " "))); if(rank>=0) { _STARPU_MSG("Missing hostnames in STARPU_MPI_HOSTNAMES\n"); STARPU_ABORT(); } } snprintf(hostname, size-1, "%s", host); free(srv_hosts); hostname[size-1] = 0; } else if (forced_hostname && forced_hostname[0]) { snprintf(hostname, size-1, "%s", forced_hostname); hostname[size-1] = 0; } else { char *c; gethostname(hostname, size-1); hostname[size-1] = 0; c = strchr(hostname, '.'); if (c) *c = 0; } } void starpu_sleep(float nb_sec) { #ifdef STARPU_SIMGRID # ifdef HAVE_SG_ACTOR_SLEEP_FOR sg_actor_sleep_for(nb_sec); # else MSG_process_sleep(nb_sec); # endif #elif defined(STARPU_HAVE_WINDOWS) Sleep(nb_sec * 1000); #else struct timespec req, rem; req.tv_sec = nb_sec; req.tv_nsec = (nb_sec - (float) req.tv_sec) * 1000000000; while (nanosleep(&req, &rem)) req = rem; #endif } void starpu_usleep(float nb_micro_sec) { #ifdef STARPU_SIMGRID # ifdef HAVE_SG_ACTOR_SLEEP_FOR sg_actor_sleep_for(nb_micro_sec / 1000000); # else MSG_process_sleep(nb_micro_sec / 1000000); # endif #elif defined(STARPU_HAVE_WINDOWS) Sleep(nb_micro_sec / 1000); #elif HAVE_UNISTD_H usleep(nb_micro_sec); #else #error no implementation of usleep #endif } char *starpu_getenv(const char *str) { #ifndef STARPU_SIMGRID #if defined(STARPU_DEVEL) || defined(STARPU_DEBUG) struct _starpu_worker * worker; worker = _starpu_get_local_worker_key(); if (worker && worker->worker_is_initialized) _STARPU_DISP("getenv should not be called from running workers, only for main() or worker initialization, since it is not reentrant\n"); #endif #endif return getenv(str); } static int _strings_ncmp(const char *strings[], const char *str) { int pos = 0; while (strings[pos]) { if ((strlen(str) == strlen(strings[pos]) && strncasecmp(str, strings[pos], strlen(strings[pos])) == 0)) break; pos++; } if (strings[pos] == NULL) return -1; return pos; } int starpu_get_env_string_var_default(const char *str, const char *strings[], int defvalue) { int val; char *strval; strval = starpu_getenv(str); if (!strval) { val = defvalue; } else { val = _strings_ncmp(strings, strval); if (val < 0) { int i; _STARPU_MSG("\n"); _STARPU_MSG("Invalid value '%s' for environment variable '%s'\n", strval, str); _STARPU_MSG("Valid values are:\n"); for(i=0;strings[i]!=NULL;i++) _STARPU_MSG("\t%s\n",strings[i]); _STARPU_MSG("\n"); STARPU_ABORT(); } } return val; } static void remove_spaces(char *str) { int i = 0; int j = 0; while (str[j] != '\0') { if (isspace(str[j])) { j++; continue; } if (j > i) { str[i] = str[j]; } i++; j++; } if (j > i) { str[i] = str[j]; } } int starpu_get_env_size_default(const char *str, int defval) { int val; char *strval; strval = starpu_getenv(str); if (!strval) { val = defval; } else { char *value = strdup(strval); if (value == NULL) _STARPU_ERROR("memory allocation failed\n"); remove_spaces(value); if (value[0] == '\0') { free(value); val = defval; } else { char *endptr = NULL; int mult = 1024; errno = 0; int v = (int)strtol(value, &endptr, 10); if (errno != 0) _STARPU_ERROR("could not parse environment variable '%s' with value '%s', strtol failed with error %s\n", str, value, strerror(errno)); if (*endptr != '\0') { switch (*endptr) { case 'b': case 'B': mult = 1; break; case 'k': case 'K': mult = 1024; break; case 'm': case 'M': mult = 1024*1024; break; case 'g': case 'G': mult = 1024*1024*1024; break; default: _STARPU_ERROR("could not parse environment variable '%s' with value '%s' size suffix invalid\n", str, value); } } val = v*mult; free(value); } } return val; } void starpu_display_bindings(void) { #if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID) int value = starpu_getenv_number_default("STARPU_DISPLAY_BINDINGS", 0); int ret = 0; if (value == 2) ret = system("lstopo --ps -"); else ret = system("hwloc-ps -a -t -c"); if (ret) { _STARPU_DISP("%s returned %d\n", value==2?"lstopo":"hwloc-ps", ret); fflush(stderr); } fflush(stdout); #else _STARPU_DISP("hwloc not available to display bindings.\n"); #endif } starpu-1.4.9+dfsg/src/common/utils.h000066400000000000000000000214241507764646700174030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __COMMON_UTILS_H__ #define __COMMON_UTILS_H__ /** @file */ #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #ifdef STARPU_HAVE_SCHED_YIELD #include #endif #include #ifdef STARPU_HAVE_HELGRIND_H #include #endif #pragma GCC visibility push(hidden) #define _STARPU_STRINGIFY_(x) #x #define _STARPU_STRINGIFY(x) _STARPU_STRINGIFY_(x) #ifndef DO_CREQ_v_WW #define DO_CREQ_v_WW(_creqF, _ty1F, _arg1F, _ty2F, _arg2F) ((void)0) #endif #ifndef DO_CREQ_v_W #define DO_CREQ_v_W(_creqF, _ty1F, _arg1F) ((void)0) #endif #ifndef ANNOTATE_HAPPENS_BEFORE #define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) #endif #ifndef ANNOTATE_HAPPENS_BEFORE_FORGET_ALL #define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(obj) ((void)0) #endif #ifndef ANNOTATE_HAPPENS_AFTER #define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) #endif #ifndef VALGRIND_HG_DISABLE_CHECKING #define VALGRIND_HG_DISABLE_CHECKING(start, len) ((void)0) #endif #ifndef VALGRIND_HG_ENABLE_CHECKING #define VALGRIND_HG_ENABLE_CHECKING(start, len) ((void)0) #endif #ifndef VALGRIND_STACK_REGISTER #define VALGRIND_STACK_REGISTER(stackbottom, stacktop) 0 #endif #ifndef VALGRIND_STACK_DEREGISTER #define VALGRIND_STACK_DEREGISTER(id) ((void)0) #endif #ifndef RUNNING_ON_VALGRIND #define RUNNING_ON_VALGRIND 0 #endif #ifdef STARPU_SANITIZE_THREAD #define STARPU_RUNNING_ON_VALGRIND 1 #else #define STARPU_RUNNING_ON_VALGRIND RUNNING_ON_VALGRIND #endif #define STARPU_HG_DISABLE_CHECKING(variable) VALGRIND_HG_DISABLE_CHECKING(&(variable), sizeof(variable)) #define STARPU_HG_ENABLE_CHECKING(variable) VALGRIND_HG_ENABLE_CHECKING(&(variable), sizeof(variable)) #define STARPU_DEBUG_PREFIX "[starpu]" /* This is needed in some places to make valgrind yield to another thread to be * able to progress. */ #if defined(__i386__) || defined(__x86_64__) #define _STARPU_UYIELD() __asm__ __volatile("rep; nop") #else #define _STARPU_UYIELD() ((void)0) #endif #if defined(STARPU_HAVE_SCHED_YIELD) && defined(STARPU_HAVE_HELGRIND_H) #define STARPU_VALGRIND_YIELD() do { if (STARPU_RUNNING_ON_VALGRIND) sched_yield(); } while (0) #define STARPU_UYIELD() do { if (STARPU_RUNNING_ON_VALGRIND) sched_yield(); else _STARPU_UYIELD(); } while (0) #else #define STARPU_VALGRIND_YIELD() do { } while (0) #define STARPU_UYIELD() _STARPU_UYIELD() #endif #ifdef STARPU_VERBOSE # define _STARPU_DEBUG(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); fflush(stderr); }} while(0) # define _STARPU_DEBUG_NO_HEADER(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, fmt , ## __VA_ARGS__); fflush(stderr); }} while(0) #else # define _STARPU_DEBUG(fmt, ...) do { } while (0) # define _STARPU_DEBUG_NO_HEADER(fmt, ...) do { } while (0) #endif #ifdef STARPU_EXTRA_VERBOSE # define _STARPU_EXTRA_DEBUG(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); fflush(stderr); }} while(0) #else # define _STARPU_EXTRA_DEBUG(fmt, ...) do { } while (0) #endif #ifdef STARPU_EXTRA_VERBOSE # define _STARPU_LOG_IN() do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s:%s@%d] -->\n", starpu_pthread_self(), __starpu_func__,__FILE__, __LINE__); }} while(0) # define _STARPU_LOG_OUT() do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s:%s@%d] <--\n", starpu_pthread_self(), __starpu_func__, __FILE__, __LINE__); }} while(0) # define _STARPU_LOG_OUT_TAG(outtag) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s:%s@%d] <-- (%s)\n", starpu_pthread_self(), __starpu_func__, __FILE__, __LINE__, outtag); }} while(0) #else # define _STARPU_LOG_IN() # define _STARPU_LOG_OUT() # define _STARPU_LOG_OUT_TAG(outtag) #endif /* TODO: cache */ #if defined(STARPU_USE_MPI) #if !defined HOST_NAME_MAX #define HOST_NAME_MAX 256 #endif #define _STARPU_MSG(fmt, ...) do { char _msghost[HOST_NAME_MAX]; gethostname(_msghost, HOST_NAME_MAX); fprintf(stderr, STARPU_DEBUG_PREFIX"[%s][%s] " fmt, _msghost, __starpu_func__, ## __VA_ARGS__); } while(0) #define _STARPU_DISP(fmt, ...) do { if (!_starpu_silent) { char _disphost[HOST_NAME_MAX]; gethostname(_disphost, HOST_NAME_MAX); fprintf(stderr, STARPU_DEBUG_PREFIX"[%s][%s] " fmt, _disphost, __starpu_func__, ## __VA_ARGS__); }} while(0) #define _STARPU_ERROR(fmt, ...) \ do { \ char _errorhost[HOST_NAME_MAX]; \ gethostname(_errorhost, HOST_NAME_MAX); \ fprintf(stderr, "\n\n[starpu][%s][%s] Error: " fmt, _errorhost, __starpu_func__, ## __VA_ARGS__); \ fprintf(stderr, "\n\n"); \ STARPU_ABORT(); \ } while (0) #else /* STARPU_USE_MPI */ #define _STARPU_MSG(fmt, ...) do { fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); } while(0) #define _STARPU_DISP(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); }} while(0) #define _STARPU_ERROR(fmt, ...) \ do { \ fprintf(stderr, "\n\n[starpu][%s] Error: " fmt ,__starpu_func__ ,## __VA_ARGS__); \ fprintf(stderr, "\n\n"); \ STARPU_ABORT(); \ } while (0) #endif /* STARPU_USE_MPI */ #ifdef _MSC_VER # if defined(__cplusplus) # define _STARPU_DECLTYPE(x) (decltype(x)) # else # define _STARPU_DECLTYPE(x) # endif #else # define _STARPU_DECLTYPE(x) (__typeof(x)) #endif #define _STARPU_MALLOC(ptr, size) do { ptr = _STARPU_DECLTYPE(ptr) malloc(size); STARPU_ASSERT_MSG(ptr != NULL || size == 0, "Cannot allocate %ld bytes\n", (long) (size)); } while (0) #define _STARPU_CALLOC(ptr, nmemb, size) do { ptr = _STARPU_DECLTYPE(ptr) calloc(nmemb, size); STARPU_ASSERT_MSG(ptr != NULL || size == 0, "Cannot allocate %ld bytes\n", (long) (nmemb*size)); } while (0) #define _STARPU_REALLOC(ptr, size) do { void *_new_ptr = realloc(ptr, size); STARPU_ASSERT_MSG(_new_ptr != NULL || size == 0, "Cannot reallocate %ld bytes\n", (long) (size)); ptr = _STARPU_DECLTYPE(ptr) _new_ptr;} while (0) #ifdef _MSC_VER #define _STARPU_IS_ZERO(a) (a == 0.0) #else #define _STARPU_IS_ZERO(a) (fpclassify(a) == FP_ZERO) #endif char *_starpu_mkdtemp_internal(char *tmpl) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; char *_starpu_mkdtemp(char *tmpl) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_mkpath(const char *s, mode_t mode); void _starpu_mkpath_and_check(const char *s, mode_t mode) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; char *_starpu_mktemp(const char *directory, int flags, int *fd); /** This version creates a hierarchy of n temporary directories, useful when * creating a lot of temporary files to be stored in the same place */ char *_starpu_mktemp_many(const char *directory, int depth, int flags, int *fd); void _starpu_rmtemp_many(char *path, int depth); void _starpu_rmdir_many(char *path, int depth); int _starpu_fftruncate(FILE *file, size_t length); int _starpu_ftruncate(int fd, size_t length); int _starpu_frdlock(FILE *file); int _starpu_frdunlock(FILE *file); int _starpu_fwrlock(FILE *file); int _starpu_fwrunlock(FILE *file); char *_starpu_get_home_path(void); void _starpu_gethostname(char *hostname, size_t size) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** If FILE is currently on a comment line, eat it. */ void _starpu_drop_comments(FILE *f); struct _starpu_job; /** Returns the symbol associated to that job if any. */ const char *_starpu_job_get_model_name(struct _starpu_job *j); /** Returns the name associated to that job if any. */ const char *_starpu_job_get_task_name(struct _starpu_job *j); struct starpu_codelet; /** Returns the symbol associated to that job if any. */ const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl); /** Returns the name of a codelet, or fallback to the name of the perfmodel. */ const char *_starpu_codelet_get_name(struct starpu_codelet *cl); int _starpu_check_mutex_deadlock(starpu_pthread_mutex_t *mutex); void _starpu_util_init(void); enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED }; #pragma GCC visibility pop #endif // __COMMON_UTILS_H__ starpu-1.4.9+dfsg/src/core/000077500000000000000000000000001507764646700155275ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/core/combined_workers.c000066400000000000000000000134361507764646700212360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include // for qsort #include #include #include #ifdef __GLIBC__ #include #endif #if defined(_WIN32) && !defined(__CYGWIN__) #include #endif static int compar_int(const void *pa, const void *pb) { int a = *((int *)pa); int b = *((int *)pb); return a - b; } static void sort_workerid_array(int nworkers, int workerid_array[]) { qsort(workerid_array, nworkers, sizeof(int), compar_int); } /* Create a new worker id for a combination of workers. This method should * typically be called at the initialization of the scheduling policy. This * worker should be the combination of the list of id's contained in the * workerid_array array which has nworkers entries. This function returns * the identifier of the combined worker in case of success, a negative value * is returned otherwise. */ int starpu_combined_worker_assign_workerid(int nworkers, int workerid_array[]) { int new_workerid; /* Return the number of actual workers. */ struct _starpu_machine_config *config = _starpu_get_machine_config(); int basic_worker_count = (int)config->topology.nworkers; int combined_worker_id = (int)config->topology.ncombinedworkers; /* We sort the ids */ sort_workerid_array(nworkers, workerid_array); /* Test that all workers are not combined workers already. */ int i; for (i = 0; i < nworkers; i++) { int id = workerid_array[i]; /* We only combine valid "basic" workers */ if ((id < 0) || (id >= basic_worker_count)) return -EINVAL; /* We only combine CPUs */ STARPU_ASSERT(config->workers[id].arch == STARPU_CPU_WORKER); STARPU_ASSERT(config->workers[id].worker_mask == STARPU_CPU); } /* Get an id for that combined worker. Note that this is not thread * safe because this method should only be called when the scheduler * is being initialized. */ new_workerid = basic_worker_count + combined_worker_id; STARPU_ASSERT_MSG_ALWAYS(new_workerid < STARPU_NMAXWORKERS, "Too many combined workers (%d) for parallel task execution. Please use configure option --enable-maxcpus to increase it beyond the current value %d", new_workerid, STARPU_MAXCPUS); config->topology.ncombinedworkers++; // fprintf(stderr, "COMBINED WORKERS "); // for (i = 0; i < nworkers; i++) // { // fprintf(stderr, "%d ", workerid_array[i]); // } // fprintf(stderr, "into worker %d\n", new_workerid); for(i = 0; i < nworkers; i++) _starpu_get_worker_struct(workerid_array[i])->combined_workerid = new_workerid; struct _starpu_combined_worker *combined_worker = &config->combined_workers[combined_worker_id]; combined_worker->worker_size = nworkers; _STARPU_MALLOC(combined_worker->perf_arch.devices, sizeof(struct starpu_perfmodel_device)); combined_worker->perf_arch.ndevices = 1; combined_worker->perf_arch.devices[0].type = config->workers[workerid_array[0]].perf_arch.devices[0].type; combined_worker->perf_arch.devices[0].devid = config->workers[workerid_array[0]].perf_arch.devices[0].devid; combined_worker->perf_arch.devices[0].ncores = nworkers; combined_worker->worker_mask = config->workers[workerid_array[0]].worker_mask; #ifdef STARPU_USE_MP combined_worker->count = nworkers -1; STARPU_PTHREAD_MUTEX_INIT(&combined_worker->count_mutex,NULL); #endif /* We assume that the memory node should either be that of the first * entry, and it is very likely that every worker in the combination * should be on the same memory node.*/ int first_id = workerid_array[0]; combined_worker->memory_node = config->workers[first_id].memory_node; /* Save the list of combined workers */ memcpy(&combined_worker->combined_workerid, workerid_array, nworkers*sizeof(int)); /* Note that we maintain both the cpu_set and the hwloc_cpu_set so that * the application is not forced to use hwloc when it is available. */ #ifdef __GLIBC__ CPU_ZERO(&combined_worker->cpu_set); #endif /* __GLIBC__ */ #ifdef STARPU_HAVE_HWLOC combined_worker->hwloc_cpu_set = hwloc_bitmap_alloc(); #endif for (i = 0; i < nworkers; i++) { #if defined(__GLIBC__) || defined(STARPU_HAVE_HWLOC) int id = workerid_array[i]; #ifdef __GLIBC__ #ifdef CPU_OR CPU_OR(&combined_worker->cpu_set, &combined_worker->cpu_set, &config->workers[id].cpu_set); #else int j; for (j = 0; j < CPU_SETSIZE; j++) { if (CPU_ISSET(j, &config->workers[id].cpu_set)) CPU_SET(j, &combined_worker->cpu_set); } #endif #endif /* __GLIBC__ */ #ifdef STARPU_HAVE_HWLOC hwloc_bitmap_or(combined_worker->hwloc_cpu_set, combined_worker->hwloc_cpu_set, config->workers[id].hwloc_cpu_set); #endif #endif } starpu_sched_ctx_add_combined_workers(&new_workerid, 1, STARPU_GLOBAL_SCHED_CTX); return new_workerid; } int starpu_combined_worker_get_description(int workerid, int *worker_size, int **combined_workerid) { /* Check that this is the id of a combined worker */ struct _starpu_combined_worker *worker; worker = _starpu_get_combined_worker_struct(workerid); STARPU_ASSERT(worker); if (worker_size) *worker_size = worker->worker_size; if (combined_workerid) *combined_workerid = worker->combined_workerid; return 0; } starpu-1.4.9+dfsg/src/core/combined_workers.h000066400000000000000000000016161507764646700212400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __COMBINED_WORKERS_H__ #define __COMBINED_WORKERS_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) #pragma GCC visibility pop #endif // __COMBINED_WORKERS_H__ starpu-1.4.9+dfsg/src/core/debug.c000066400000000000000000000064501507764646700167660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef STARPU_VERBOSE /* we want a single writer at the same time to have a log that is readable */ static starpu_pthread_mutex_t logfile_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static FILE *logfile = NULL; #endif int _starpu_debug #ifdef STARPU_DEBUG = 1 #else = 0 #endif ; /* Tell gdb whether FXT is compiled in or not */ int _starpu_use_fxt #ifdef STARPU_USE_FXT = 1 #endif ; void _starpu_open_debug_logfile(void) { #ifdef STARPU_VERBOSE /* what is the name of the file ? default = "starpu.log" */ char *logfile_name; logfile_name = starpu_getenv("STARPU_LOGFILENAME"); if (!logfile_name) { logfile_name = "starpu.log"; } logfile = fopen(logfile_name, "w+"); STARPU_ASSERT_MSG(logfile, "Could not open file %s for verbose logs (%s). You can specify another file destination with the STARPU_LOGFILENAME environment variable", logfile_name, strerror(errno)); #endif } void _starpu_close_debug_logfile(void) { #ifdef STARPU_VERBOSE if (logfile) { fclose(logfile); logfile = NULL; } #endif } void _starpu_print_to_logfile(const char *format STARPU_ATTRIBUTE_UNUSED, ...) { #ifdef STARPU_VERBOSE va_list args; va_start(args, format); STARPU_PTHREAD_MUTEX_LOCK(&logfile_mutex); vfprintf(logfile, format, args); STARPU_PTHREAD_MUTEX_UNLOCK(&logfile_mutex); va_end(args); #endif } /* Record codelet to give ayudame nice function ids starting from 0. */ #if defined(STARPU_USE_AYUDAME1) static struct ayudame_codelet { char *name; struct starpu_codelet *cl; } *codelets; static unsigned ncodelets, ncodelets_alloc; static starpu_pthread_mutex_t ayudame_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl) { unsigned i; const char *name; if (!cl) return 0; name = _starpu_codelet_get_model_name(cl); STARPU_PTHREAD_MUTEX_LOCK(&ayudame_mutex); for (i=0; i < ncodelets; i++) { if (codelets[i].cl == cl && ((!name && !codelets[i].name) || ((name && codelets[i].name) && !strcmp(codelets[i].name, name)))) { STARPU_PTHREAD_MUTEX_UNLOCK(&ayudame_mutex); return i + 1; } } if (ncodelets == ncodelets_alloc) { if (!ncodelets_alloc) ncodelets_alloc = 16; else ncodelets_alloc *= 2; _STARPU_REALLOC(codelets, ncodelets_alloc * sizeof(*codelets)); } codelets[ncodelets].cl = cl; if (name) /* codelet might be freed by user */ codelets[ncodelets].name = strdup(name); else codelets[ncodelets].name = NULL; i = ncodelets++; if (name) AYU_event(AYU_REGISTERFUNCTION, i+1, (void*) name); STARPU_PTHREAD_MUTEX_UNLOCK(&ayudame_mutex); return i + 1; } #endif /* AYUDAME1 */ starpu-1.4.9+dfsg/src/core/debug.h000066400000000000000000000222451507764646700167730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DEBUG_H__ #define __DEBUG_H__ /** @file */ #include #include #include #include #include #if defined(STARPU_USE_AYUDAME1) /* Ayudame 1 API */ # include # ifndef AYU_RT_STARPU # define AYU_RT_STARPU 4 # endif # define STARPU_AYU_EVENT AYU_event # define STARPU_AYU_PREINIT() \ if (AYU_event) \ { \ enum ayu_runtime_t ayu_rt = AYU_RT_STARPU; \ AYU_event(AYU_PREINIT, 0, (void*) &ayu_rt); \ } # define STARPU_AYU_INIT() \ if (AYU_event) \ { \ AYU_event(AYU_INIT, 0, NULL); \ } # define STARPU_AYU_FINISH() \ if (AYU_event) \ { \ AYU_event(AYU_FINISH, 0, NULL); \ } # define STARPU_AYU_ADDDEPENDENCY(previous, handle, job_id) \ if (AYU_event) \ { \ uintptr_t __AYU_data[3] = { (previous), (uintptr_t) (handle), (uintptr_t) (handle) }; \ AYU_event(AYU_ADDDEPENDENCY, (job_id), __AYU_data); \ } # define STARPU_AYU_REMOVETASK(job_id) \ if (AYU_event) \ { \ AYU_event(AYU_REMOVETASK, (job_id), NULL); \ } # define STARPU_AYU_ADDTASK(job_id, task) \ if (AYU_event) \ { \ int64_t __AYU_data[2] = { \ ((struct starpu_task *)(task))!=NULL?_starpu_ayudame_get_func_id(((struct starpu_task *)(task))->cl):0, \ ((struct starpu_task *)(task))!=NULL?((struct starpu_task *)(task))->priority-STARPU_MIN_PRIO:0 \ }; \ AYU_event(AYU_ADDTASK, (job_id), __AYU_data); \ } # define STARPU_AYU_PRERUNTASK(job_id, workerid) \ if (AYU_event) \ { \ intptr_t __id = (workerid); \ AYU_event(AYU_PRERUNTASK, (job_id), &__id); \ } # define STARPU_AYU_RUNTASK(job_id) \ if (AYU_event) \ { \ AYU_event(AYU_RUNTASK, (job_id), NULL); \ } # define STARPU_AYU_POSTRUNTASK(job_id) \ if (AYU_event) \ { \ AYU_event(AYU_POSTRUNTASK, (job_id), NULL); \ } # define STARPU_AYU_ADDTOTASKQUEUE(job_id, worker_id) \ if (AYU_event) \ { \ intptr_t __id = (worker_id); \ AYU_event(AYU_ADDTASKTOQUEUE, (job_id), &__id); \ } # define STARPU_AYU_BARRIER() \ if (AYU_event) \ { \ AYU_event(AYU_BARRIER, 0, NULL); \ } #elif defined(STARPU_USE_AYUDAME2) /* Ayudame 2 API */ # include # define STARPU_AYU_EVENT ayu_event # define STARPU_AYU_PREINIT() # define STARPU_AYU_INIT() # define STARPU_AYU_FINISH() \ if (ayu_event){ \ ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ ayu_event_data_t __data; \ __data.common.client_id = __cli_id; \ ayu_event(AYU_FINISH, __data); \ } # define STARPU_AYU_ADDDEPENDENCY(previous, handle, job_id) \ if (ayu_event) \ { \ ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ ayu_event_data_t __data; \ uint64_t __dep_id=0; \ __dep_id |= (previous) << 0; \ __dep_id |= (job_id) << 24; \ __dep_id |= (uintptr_t) (handle) << 48; \ __data.common.client_id = __cli_id; \ __data.add_dependency.dependency_id = __dep_id; \ __data.add_dependency.from_id=(previous); \ __data.add_dependency.to_id=(job_id); \ __data.add_dependency.dependency_label = "dep"; \ ayu_event(AYU_ADDDEPENDENCY, __data); \ ayu_wipe_data(&__data); \ \ char __buf[32]; \ snprintf(__buf, sizeof(__buf), "%llu", (unsigned long long)(uintptr_t) (handle)); \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = __dep_id; \ __data.set_property.key = "dep_address_value"; \ __data.set_property.value = __buf; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ } # define STARPU_AYU_REMOVETASK(job_id) \ if (ayu_event) \ { \ ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ ayu_event_data_t __data; \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = (job_id); \ __data.set_property.key = "state"; \ __data.set_property.value = "finished"; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ } # define STARPU_AYU_ADDTASK(job_id, task) \ if (ayu_event) \ { \ ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ ayu_event_data_t __data; \ __data.common.client_id = __cli_id; \ __data.add_task.task_id = (job_id); \ __data.add_task.scope_id = 0; \ __data.add_task.task_label = "task"; \ ayu_event(AYU_ADDTASK, __data); \ ayu_wipe_data(&__data); \ \ if ((task) != NULL) \ { \ char __buf[32]; \ snprintf(__buf, sizeof(__buf), "%d", ((struct starpu_task *)(task))->priority); \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = (job_id); \ __data.set_property.key = "priority"; \ __data.set_property.value = __buf; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ \ const char *__name = ((struct starpu_task *)(task))->name != NULL?((struct starpu_task *)(task))->name: \ ((struct starpu_task *)(task))->cl->name != NULL?((struct starpu_task *)(task))->cl->name:""; \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = (job_id); \ __data.set_property.key = "function_name"; \ __data.set_property.value = __name; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ } \ } # define STARPU_AYU_PRERUNTASK(job_id, workerid) \ if (ayu_event) \ { \ ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ ayu_event_data_t __data; \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = (job_id); \ __data.set_property.key = "state"; \ __data.set_property.value = "running"; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ \ char __buf[32]; \ snprintf(__buf, sizeof(__buf), "%d", (workerid)); \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = (job_id); \ __data.set_property.key = "worker"; \ __data.set_property.value = __buf; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ } # define STARPU_AYU_RUNTASK(job_id) \ if (ayu_event) { \ ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ ayu_event_data_t __data; \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = (job_id); \ __data.set_property.key = "state"; \ __data.set_property.value = "running"; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ } # define STARPU_AYU_POSTRUNTASK(job_id) \ if (ayu_event) \ { \ /* TODO ADD thread id core id etc */ \ ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ ayu_event_data_t __data; \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = (job_id); \ __data.set_property.key = "state"; \ __data.set_property.value = "finished"; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ } # define STARPU_AYU_ADDTOTASKQUEUE(job_id, worker_id) \ if (ayu_event) \ { \ ayu_client_id_t __cli_id = get_client_id(AYU_CLIENT_STARPU); \ ayu_event_data_t __data; \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = (job_id); \ __data.set_property.key = "state"; \ __data.set_property.value = "queued"; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ \ char __buf[32]; \ snprintf(__buf, sizeof(__buf), "%d", (int)(worker_id)); \ __data.common.client_id = __cli_id; \ __data.set_property.property_owner_id = (job_id); \ __data.set_property.key = "worker"; \ __data.set_property.value = __buf; \ ayu_event(AYU_SETPROPERTY, __data); \ ayu_wipe_data(&__data); \ } # define STARPU_AYU_BARRIER() \ if (ayu_event) \ { \ /* How to generate a barrier event with Ayudame 2? */ \ } #else # define STARPU_AYU_EVENT (0) # define STARPU_AYU_PREINIT() # define STARPU_AYU_INIT() # define STARPU_AYU_FINISH() # define STARPU_AYU_ADDDEPENDENCY(previous, handle, next_job) # define STARPU_AYU_REMOVETASK(job_id) # define STARPU_AYU_ADDTASK(job_id, task) # define STARPU_AYU_PRERUNTASK(job_id, workerid) # define STARPU_AYU_RUNTASK(job_id) # define STARPU_AYU_POSTRUNTASK(job_id) # define STARPU_AYU_ADDTOTASKQUEUE(job_id, worker_id) # define STARPU_AYU_BARRIER() #endif #pragma GCC visibility push(hidden) /** Create a file that will contain StarPU's log */ void _starpu_open_debug_logfile(void); /** Close StarPU's log file */ void _starpu_close_debug_logfile(void); /** Write into StarPU's log file */ void _starpu_print_to_logfile(const char *format, ...) STARPU_ATTRIBUTE_FORMAT(printf, 1, 2); /** Tell gdb whether FXT is compiled in or not */ extern int _starpu_use_fxt; #if defined(STARPU_USE_AYUDAME1) /** Get an Ayudame id for CL */ int64_t _starpu_ayudame_get_func_id(struct starpu_codelet *cl); #endif void _starpu_watchdog_init(void); void _starpu_watchdog_shutdown(void); #pragma GCC visibility pop #endif // __DEBUG_H__ starpu-1.4.9+dfsg/src/core/dependencies/000077500000000000000000000000001507764646700201555ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/core/dependencies/cg.c000066400000000000000000000257241507764646700207240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include void _starpu_cg_list_init0(struct _starpu_cg_list *list) { _starpu_spin_init(&list->lock); //list->ndeps = 0; //list->ndeps_completed = 0; #ifdef STARPU_DEBUG //list->deps = NULL; //list->done = NULL; #endif //list->terminated = 0; //list->nsuccs = 0; #ifdef STARPU_DYNAMIC_DEPS_SIZE /* this is a small initial default value ... may be changed */ //list->succ_list_size = 0; //list->succ = NULL; #endif } void _starpu_cg_list_deinit(struct _starpu_cg_list *list) { unsigned id; for (id = 0; id < list->nsuccs; id++) { struct _starpu_cg *cg = list->succ[id]; /* We remove the reference on the completion group, and free it * if there is no more reference. */ unsigned ntags = STARPU_ATOMIC_ADD(&cg->ntags, -1); if (ntags == 0) { #ifdef STARPU_DEBUG free(list->succ[id]->deps); free(list->succ[id]->done); #endif free(list->succ[id]); } } #ifdef STARPU_DYNAMIC_DEPS_SIZE free(list->succ); #endif #ifdef STARPU_DEBUG free(list->deps); free(list->done); #endif _starpu_spin_destroy(&list->lock); } /* Returns whether the completion was already terminated, and caller should * thus immediately proceed. */ int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct _starpu_cg *cg) { int ret; STARPU_ASSERT(cg); _starpu_spin_lock(&successors->lock); ret = successors->terminated; /* where should that cg should be put in the array ? */ unsigned index = successors->nsuccs++; #ifdef STARPU_DYNAMIC_DEPS_SIZE if (index >= successors->succ_list_size) { /* the successor list is too small */ if (successors->succ_list_size > 0) successors->succ_list_size *= 2; else successors->succ_list_size = 4; _STARPU_REALLOC(successors->succ, successors->succ_list_size*sizeof(struct _starpu_cg *)); } #else STARPU_ASSERT(index < STARPU_NMAXDEPS); #endif successors->succ[index] = cg; _starpu_spin_unlock(&successors->lock); return ret; } int _starpu_list_task_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]) { unsigned i; unsigned n = 0; _starpu_spin_lock(&successors->lock); for (i = 0; i < successors->nsuccs; i++) { struct _starpu_cg *cg = successors->succ[i]; if (cg->cg_type != STARPU_CG_TASK) continue; if (n < ndeps) { task_array[n] = cg->succ.job->task; n++; } } _starpu_spin_unlock(&successors->lock); return n; } int _starpu_list_task_scheduled_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]) { unsigned i; unsigned n = 0; _starpu_spin_lock(&successors->lock); for (i = 0; i < successors->nsuccs; i++) { struct _starpu_cg *cg = successors->succ[i]; if (cg->cg_type != STARPU_CG_TASK) continue; if (n < ndeps) { struct starpu_task *task = cg->succ.job->task; if (task->cl == NULL || task->where == STARPU_NOWHERE || task->execute_on_a_specific_worker) /* will not be scheduled */ continue; task_array[n] = task; n++; } } _starpu_spin_unlock(&successors->lock); return n; } int _starpu_list_tag_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, starpu_tag_t tag_array[]) { unsigned i; unsigned n = 0; _starpu_spin_lock(&successors->lock); for (i = 0; i < successors->nsuccs; i++) { struct _starpu_cg *cg = successors->succ[i]; if (cg->cg_type != STARPU_CG_TAG) continue; if (n < ndeps) { tag_array[n] = cg->succ.tag->id; n++; } } _starpu_spin_unlock(&successors->lock); return n; } void _starpu_notify_cg(void *pred STARPU_ATTRIBUTE_UNUSED, struct _starpu_cg *cg) { STARPU_ASSERT(cg); unsigned remaining = STARPU_ATOMIC_ADD(&cg->remaining, -1); ANNOTATE_HAPPENS_BEFORE(&cg->remaining); if (remaining == 0) { ANNOTATE_HAPPENS_AFTER(&cg->remaining); /* Note: This looks racy to helgrind when the tasks are not * autoregenerated, since they then unsubcribe from the * completion group in parallel, thus decreasing ntags. This is * however not a problem since it means we will not reuse this * cg, and remaining will not be used, so a bogus value won't * hurt. */ cg->remaining = cg->ntags; /* the group is now completed */ switch (cg->cg_type) { case STARPU_CG_APPS: { /* this is a cg for an application waiting on a set of * tags, wake the thread */ STARPU_PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex); cg->succ.succ_apps.completed = 1; STARPU_PTHREAD_COND_SIGNAL(&cg->succ.succ_apps.cg_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&cg->succ.succ_apps.cg_mutex); break; } case STARPU_CG_TAG: { struct _starpu_cg_list *tag_successors; struct _starpu_tag *tag; tag = cg->succ.tag; _starpu_spin_lock(&tag->lock); tag_successors = &tag->tag_successors; tag_successors->ndeps_completed++; /* Note: the tag is already locked by the * caller. */ if ((tag->state == STARPU_BLOCKED) && (tag_successors->ndeps == tag_successors->ndeps_completed)) { /* reset the counter so that we can reuse the completion group */ tag_successors->ndeps_completed = 0; /* This releases the lock */ _starpu_tag_set_ready(tag); } else _starpu_spin_unlock(&tag->lock); break; } case STARPU_CG_TASK: { struct _starpu_cg_list *job_successors; struct _starpu_job *j; j = cg->succ.job; STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); job_successors = &j->job_successors; #ifdef STARPU_DEBUG if (!j->task->regenerate) { unsigned i; /* Remove backward cg pointers for easier debugging */ if (job_successors->deps) { for (i = 0; i < job_successors->ndeps; i++) if (job_successors->deps[i] == cg) break; STARPU_ASSERT(i < job_successors->ndeps); job_successors->done[i] = 1; } if (cg->deps) { for (i = 0; i < cg->ndeps; i++) if (cg->deps[i] == pred) break; STARPU_ASSERT(i < cg->ndeps); cg->done[i] = 1; } } #endif unsigned ndeps_completed = STARPU_ATOMIC_ADD(&job_successors->ndeps_completed, 1); STARPU_ASSERT(job_successors->ndeps >= ndeps_completed); /* Need to atomically test submitted and check * dependencies, since this is concurrent with * _starpu_submit_job */ if (j->submitted && job_successors->ndeps == ndeps_completed && j->task->status == STARPU_TASK_BLOCKED_ON_TASK) { /* That task has already passed tag checks, * do not do them again since the tag has been cleared! */ _starpu_enforce_deps_starting_from_task(j); } else STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); break; } default: STARPU_ABORT(); } } } /* Called when a job has just started, so we can notify tasks which were waiting * only for this one when they can expect to start */ /* Note: in case of a tag, it must be already locked */ void _starpu_notify_job_ready_soon_cg(void *pred STARPU_ATTRIBUTE_UNUSED, struct _starpu_cg *cg, _starpu_notify_job_start_data *data) { STARPU_ASSERT(cg); if (cg->remaining == 1) { /* the group is to be completed */ switch (cg->cg_type) { case STARPU_CG_APPS: /* Not a task */ break; case STARPU_CG_TAG: { struct _starpu_cg_list *tag_successors; struct _starpu_tag *tag; tag = cg->succ.tag; tag_successors = &tag->tag_successors; /* Note: the tag is already locked by the * caller. */ if ((tag->state == STARPU_BLOCKED) && (tag_successors->ndeps == tag_successors->ndeps_completed + 1)) { /* This is to be ready */ _starpu_enforce_deps_notify_job_ready_soon(tag->job, data, 1); } break; } case STARPU_CG_TASK: { struct _starpu_cg_list *job_successors; struct _starpu_job *j; j = cg->succ.job; job_successors = &j->job_successors; if (job_successors->ndeps == job_successors->ndeps_completed + 1 && j->task->status == STARPU_TASK_BLOCKED_ON_TASK) { /* This is to be ready */ _starpu_enforce_deps_notify_job_ready_soon(j, data, 0); } break; } default: STARPU_ABORT(); } } } /* Caller just has to promise that the list will not disappear. * _starpu_notify_cg_list protects the list itself. * No job lock should be held, since we might want to immediately call the callback of an empty task. */ void _starpu_notify_cg_list(void *pred, struct _starpu_cg_list *successors) { unsigned succ; _starpu_spin_lock(&successors->lock); successors->terminated = 1; /* Note: some thread might be concurrently adding other items */ for (succ = 0; succ < successors->nsuccs; succ++) { struct _starpu_cg *cg = successors->succ[succ]; STARPU_ASSERT(cg); unsigned cg_type = cg->cg_type; if (cg_type == STARPU_CG_APPS) { /* Remove the temporary ref to the cg */ memmove(&successors->succ[succ], &successors->succ[succ+1], (successors->nsuccs-(succ+1)) * sizeof(successors->succ[succ])); succ--; successors->nsuccs--; } _starpu_spin_unlock(&successors->lock); _starpu_notify_cg(pred, cg); _starpu_spin_lock(&successors->lock); } _starpu_spin_unlock(&successors->lock); } /* Called when a job has just started, so we can notify tasks which were waiting * only for this one when they can expect to start */ /* Caller just has to promise that the list will not disappear. * _starpu_notify_cg_list protects the list itself. * No job lock should be held, since we might want to immediately call the callback of an empty task. */ void _starpu_notify_job_start_cg_list(void *pred, struct _starpu_cg_list *successors, _starpu_notify_job_start_data *data) { unsigned succ; _starpu_spin_lock(&successors->lock); /* Note: some thread might be concurrently adding other items */ for (succ = 0; succ < successors->nsuccs; succ++) { struct _starpu_cg *cg = successors->succ[succ]; _starpu_spin_unlock(&successors->lock); STARPU_ASSERT(cg); unsigned cg_type = cg->cg_type; struct _starpu_tag *cgtag = NULL; if (cg_type == STARPU_CG_TAG) { cgtag = cg->succ.tag; STARPU_ASSERT(cgtag); _starpu_spin_lock(&cgtag->lock); } _starpu_notify_job_ready_soon_cg(pred, cg, data); if (cg_type == STARPU_CG_TAG) _starpu_spin_unlock(&cgtag->lock); _starpu_spin_lock(&successors->lock); } _starpu_spin_unlock(&successors->lock); } starpu-1.4.9+dfsg/src/core/dependencies/cg.h000066400000000000000000000105301507764646700207160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __CG_H__ #define __CG_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) /** * we do not necessarily want to allocate room for 256 dependencies, but we * want to handle the few situation where there are a lot of dependencies as * well */ #define STARPU_DYNAMIC_DEPS_SIZE 1 /* randomly chosen ! */ #ifndef STARPU_DYNAMIC_DEPS_SIZE #define STARPU_NMAXDEPS 256 #endif struct _starpu_job; /** Completion Group list, records both the number of expected notifications * before the completion can start, and the list of successors when the * completion is finished. */ struct _starpu_cg_list { /** Protects atomicity of the list and the terminated flag */ struct _starpu_spinlock lock; /** Number of notifications to be waited for */ unsigned ndeps; /* how many deps ? */ unsigned ndeps_completed; /* how many deps are done ? */ #ifdef STARPU_DEBUG /** Array of the notifications, size ndeps */ struct _starpu_cg **deps; /** Which ones have notified, size ndeps */ char *done; #endif /** Whether the completion is finished. * For restartable/restarted tasks, only the first iteration is taken into account here. */ unsigned terminated; /** List of successors */ unsigned nsuccs; /* how many successors ? */ #ifdef STARPU_DYNAMIC_DEPS_SIZE /** How many allocated items in succ */ unsigned succ_list_size; struct _starpu_cg **succ; #else struct _starpu_cg *succ[STARPU_NMAXDEPS]; #endif }; enum _starpu_cg_type { STARPU_CG_APPS=(1<<0), STARPU_CG_TAG=(1<<1), STARPU_CG_TASK=(1<<2) }; /** Completion Group */ struct _starpu_cg { /** number of tags depended on */ unsigned ntags; /** number of remaining tags */ unsigned remaining; #ifdef STARPU_DEBUG unsigned ndeps; /** array of predecessors, size ndeps */ void **deps; /** which ones have notified, size ndeps */ char *done; #endif enum _starpu_cg_type cg_type; union { /** STARPU_CG_TAG */ struct _starpu_tag *tag; /** STARPU_CG_TASK */ struct _starpu_job *job; /** STARPU_CG_APPS * in case this completion group is related to an application, * we have to explicitly wake the waiting thread instead of * reschedule the corresponding task */ struct { unsigned completed; starpu_pthread_mutex_t cg_mutex; starpu_pthread_cond_t cg_cond; } succ_apps; } succ; }; typedef struct _starpu_notify_job_start_data _starpu_notify_job_start_data; void _starpu_notify_dependencies(struct _starpu_job *j); void _starpu_job_notify_start(struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch); void _starpu_job_notify_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data); void _starpu_cg_list_init0(struct _starpu_cg_list *list); void _starpu_cg_list_deinit(struct _starpu_cg_list *list); int _starpu_add_successor_to_cg_list(struct _starpu_cg_list *successors, struct _starpu_cg *cg); int _starpu_list_task_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]); int _starpu_list_task_scheduled_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, struct starpu_task *task_array[]); int _starpu_list_tag_successors_in_cg_list(struct _starpu_cg_list *successors, unsigned ndeps, starpu_tag_t tag_array[]); void _starpu_notify_cg(void *pred, struct _starpu_cg *cg); void _starpu_notify_cg_list(void *pred, struct _starpu_cg_list *successors); void _starpu_notify_job_start_cg_list(void *pred, struct _starpu_cg_list *successors, _starpu_notify_job_start_data *data); void _starpu_notify_task_dependencies(struct _starpu_job *j); void _starpu_notify_job_start_tasks(struct _starpu_job *j, _starpu_notify_job_start_data *data); #pragma GCC visibility pop #endif // __CG_H__ starpu-1.4.9+dfsg/src/core/dependencies/data_arbiter_concurrency.c000066400000000000000000000653721507764646700253710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include /* TODO factorize with data_concurrency.c and btw support redux */ //#define LOCK_OR_DELEGATE /* * This implements a solution for the dining philosophers problem (see * data_concurrency.c for the rationale) based on a centralized arbiter. This * allows to get a more parallel solution than the Dijkstra solution, by * avoiding strictly serialized executions, and instead opportunistically find * which tasks can take data. * * These are the algorithms implemented below: * * * at termination of task T: * * - for each handle h of T: * - mutex_lock(&arbiter) * - release reference on h * - call _starpu_notify_arbitered_dependencies which does the following * - for each task Tc waiting for h: * - for each data Tc_h it is waiting for: * - if Tc_h is busy, goto fail * // Ok, now really take them * - For each data Tc_h it is waiting: * - lock(Tc_h) * - take reference on h (it should be still available since we hold the arbiter) * - unlock(Tc_h) * // Ok, we managed to find somebody, we're finished! * _starpu_push_task(Tc); * break; * fail: * - unrecord T as waiting on h * - record T as waiting on Tc_h * // No luck, let's try another task * continue; * // Release the arbiter mutex a bit from time to time * - mutex_unlock(&arbiter) * * * at submission of task T (_starpu_submit_job_enforce_arbitered_deps): * * - mutex_lock(&arbiter) * - for each handle h of T: * - lock(h) * - try to take a reference on h, goto fail on failure * - unlock(h) * // Success! * - mutex_unlock(&arbiter); * - return 0; * * fail: * // couldn't take everything, record task T and abort * - record T as waiting on h * // drop spurious references * - for each handle h of T already taken: * - lock(h) * - release reference on h * - unlock(h) * - mutex_unlock(&arbiter) * - return 1; * * at acquire (_starpu_attempt_to_submit_arbitered_data_request): * - mutex_lock(&arbiter) * - try to take a reference on h * - on failure, record as waiting on h * - mutex_unlock(&arbiter); * - return 0 if succeeded, 1 if failed; */ static int _starpu_arbiter_filter_modes(int mode) { /* Do not care about some flags */ mode &= ~STARPU_COMMUTE; mode &= ~STARPU_SSEND; mode &= ~STARPU_LOCALITY; mode &= ~STARPU_NOFOOTPRINT; if (mode == STARPU_RW) mode = STARPU_W; return mode; } struct starpu_arbiter { #ifdef LOCK_OR_DELEGATE /* The list of task to perform */ struct LockOrDelegateListNode* dlTaskListHead; /* To protect the list of tasks */ struct _starpu_spinlock dlListLock; /* Whether somebody is working on the list */ int working; #else /* LOCK_OR_DELEGATE */ starpu_pthread_mutex_t mutex; #endif /* LOCK_OR_DELEGATE */ }; #ifdef LOCK_OR_DELEGATE /* In case of congestion, we don't want to needlessly wait for the arbiter lock * while we can just delegate the work to the worker already managing some * dependencies. * * So we push work on the dlTastListHead queue and only one worker will process * the list. */ /* A LockOrDelegate task list */ struct LockOrDelegateListNode { void (*func)(void*); void* data; struct LockOrDelegateListNode* next; }; /* Post a task to perform if possible, otherwise put it in the list * If we can perform this task, we may also perform all the tasks in the list * This function return 1 if the task (and maybe some others) has been done * by the calling thread and 0 otherwise (if the task has just been put in the list) */ static int _starpu_LockOrDelegatePostOrPerform(starpu_arbiter_t arbiter, void (*func)(void*), void* data) { struct LockOrDelegateListNode *newNode, *iter, *next; int did = 0; _STARPU_MALLOC(newNode, sizeof(*newNode)); newNode->data = data; newNode->func = func; _starpu_spin_lock(&arbiter->dlListLock); if (arbiter->working) { /* Somebody working on it, insert the node */ newNode->next = arbiter->dlTaskListHead; arbiter->dlTaskListHead = newNode; } else { /* Nobody working on the list, we'll work */ arbiter->working = 1; /* work on what was pushed so far first */ iter = arbiter->dlTaskListHead; arbiter->dlTaskListHead = NULL; _starpu_spin_unlock(&arbiter->dlListLock); while (iter != NULL) { (*iter->func)(iter->data); next = iter->next; free(iter); iter = next; } /* And then do our job */ (*func)(data); free(newNode); did = 1; _starpu_spin_lock(&arbiter->dlListLock); /* And finish working on anything that could have been pushed * in the meanwhile */ while (arbiter->dlTaskListHead != 0) { iter = arbiter->dlTaskListHead; arbiter->dlTaskListHead = arbiter->dlTaskListHead->next; _starpu_spin_unlock(&arbiter->dlListLock); (*iter->func)(iter->data); free(iter); _starpu_spin_lock(&arbiter->dlListLock); } arbiter->working = 0; } _starpu_spin_unlock(&arbiter->dlListLock); return did; } #endif /* Try to submit just one data request, in case the request can be processed * immediately, return 0, if there is still a dependency that is not compatible * with the current mode, the request is put in the per-handle list of * "requesters", and this function returns 1. */ #ifdef LOCK_OR_DELEGATE struct starpu_submit_arbitered_args { unsigned request_from_codelet; starpu_data_handle_t handle; enum starpu_data_access_mode mode; void (*callback)(void *); void *argcb; struct _starpu_job *j; unsigned buffer_index; }; static unsigned ___starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *argcb, struct _starpu_job *j, unsigned buffer_index); static void __starpu_attempt_to_submit_arbitered_data_request(void *inData) { struct starpu_submit_arbitered_args* args = inData; unsigned request_from_codelet = args->request_from_codelet; starpu_data_handle_t handle = args->handle; enum starpu_data_access_mode mode = args->mode; void (*callback)(void*) = args->callback; void *argcb = args->argcb; struct _starpu_job *j = args->j; unsigned buffer_index = args->buffer_index; free(args); if (!___starpu_attempt_to_submit_arbitered_data_request(request_from_codelet, handle, mode, callback, argcb, j, buffer_index)) /* Success, but we have no way to report it to original caller, * so call callback ourself */ callback(argcb); } unsigned _starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *argcb, struct _starpu_job *j, unsigned buffer_index) { struct starpu_submit_arbitered_args* args; _STARPU_MALLOC(args, sizeof(*args)); args->request_from_codelet = request_from_codelet; args->handle = handle; args->mode = mode; args->callback = callback; args->argcb = argcb; args->j = j; args->buffer_index = buffer_index; /* The function will delete args */ _starpu_LockOrDelegatePostOrPerform(handle->arbiter, &__starpu_attempt_to_submit_arbitered_data_request, args); return 1; } unsigned ___starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *argcb, struct _starpu_job *j, unsigned buffer_index) { STARPU_ASSERT(handle->arbiter); #else // LOCK_OR_DELEGATE unsigned _starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *argcb, struct _starpu_job *j, unsigned buffer_index) { starpu_arbiter_t arbiter = handle->arbiter; STARPU_PTHREAD_MUTEX_LOCK(&arbiter->mutex); #endif // LOCK_OR_DELEGATE mode = _starpu_arbiter_filter_modes(mode); STARPU_ASSERT_MSG(!(mode & STARPU_REDUX), "REDUX with arbiter is not implemented\n"); /* Take the lock protecting the header. We try to do some progression * in case this is called from a worker, otherwise we just wait for the * lock to be available. */ if (request_from_codelet) { int cpt = 0; while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock)) { cpt++; _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_NOT_ALLOC); } if (cpt == STARPU_SPIN_MAXTRY) _starpu_spin_lock(&handle->header_lock); } else { _starpu_spin_lock(&handle->header_lock); } /* If there is currently nobody accessing the piece of data, or it's * not another writer and if this is the same type of access as the * current one, we can proceed. */ unsigned put_in_list = 1; if ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode))) { /* TODO: Detect whether this is the end of a reduction phase etc. like in data_concurrency.c */ if (0) { } else { put_in_list = 0; } } if (put_in_list) { /* there cannot be multiple writers or a new writer * while the data is in read mode */ handle->busy_count++; /* enqueue the request */ struct _starpu_data_requester *r = _starpu_data_requester_new(); r->mode = mode; r->is_requested_by_codelet = request_from_codelet; r->j = j; r->buffer_index = buffer_index; r->prio = j ? j->task->priority : 0; r->ready_data_callback = callback; r->argcb = argcb; _starpu_data_requester_prio_list_push_back(&handle->arbitered_req_list, r); /* failed */ put_in_list = 1; } else { handle->refcnt++; handle->busy_count++; /* Do not write to handle->current_mode if it is already * R. This avoids a spurious warning from helgrind when * the following happens: * acquire(R) in thread A * acquire(R) in thread B * release_data_on_node() in thread A * helgrind would shout that the latter reads current_mode * unsafely. * * This actually basically explains helgrind that it is a * shared R acquisition. */ if (mode != STARPU_R || handle->current_mode != mode) handle->current_mode = mode; /* success */ put_in_list = 0; } _starpu_spin_unlock(&handle->header_lock); #ifndef LOCK_OR_DELEGATE STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); #endif // LOCK_OR_DELEGATE return put_in_list; } #ifdef LOCK_OR_DELEGATE /* These are the arguments passed to _submit_job_enforce_arbitered_deps */ struct starpu_enforce_arbitered_args { struct _starpu_job *j; unsigned buf; unsigned nbuffers; }; static void ___starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers); static void __starpu_submit_job_enforce_arbitered_deps(void* inData) { struct starpu_enforce_arbitered_args* args = inData; struct _starpu_job *j = args->j; unsigned buf = args->buf; unsigned nbuffers = args->nbuffers; /* we are in charge of freeing the args */ free(args); ___starpu_submit_job_enforce_arbitered_deps(j, buf, nbuffers); } void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers) { struct starpu_enforce_arbitered_args* args; _STARPU_MALLOC(args, sizeof(*args)); starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf); args->j = j; args->buf = buf; args->nbuffers = nbuffers; /* The function will delete args */ _starpu_LockOrDelegatePostOrPerform(handle->arbiter, &__starpu_submit_job_enforce_arbitered_deps, args); } static void ___starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers) { starpu_arbiter_t arbiter = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf)->arbiter; #else // LOCK_OR_DELEGATE void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers) { struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); starpu_arbiter_t arbiter = descrs[buf].handle->arbiter; STARPU_PTHREAD_MUTEX_LOCK(&arbiter->mutex); #endif STARPU_ASSERT(arbiter); const unsigned start_buf_arbiter = buf; unsigned idx_buf_arbiter; int idx_buf_arbiterdup; unsigned all_arbiter_available = 1; starpu_data_handle_t handle; enum starpu_data_access_mode mode; int node; for (idx_buf_arbiter = start_buf_arbiter; idx_buf_arbiter < nbuffers; idx_buf_arbiter++) { handle = descrs[idx_buf_arbiter].handle; mode = descrs[idx_buf_arbiter].mode & ~STARPU_COMMUTE; node = descrs[idx_buf_arbiter].orig_node; mode = _starpu_arbiter_filter_modes(mode); STARPU_ASSERT_MSG(!(mode & STARPU_REDUX), "REDUX with arbiter is not implemented\n"); for (idx_buf_arbiterdup = (int) idx_buf_arbiter-1; idx_buf_arbiterdup >= 0; idx_buf_arbiterdup--) { starpu_data_handle_t handle_dup = descrs[idx_buf_arbiterdup].handle; int node_dup = descrs[idx_buf_arbiterdup].orig_node; if (handle_dup == handle && node_dup == node) /* We have already requested this data, skip it. This * depends on ordering putting writes before reads, see * _starpu_compar_handles. */ goto next; if (!_starpu_handles_same_root(handle_dup, handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } if (handle->arbiter != arbiter) { /* another arbiter */ break; } /* Try to take handle */ _starpu_spin_lock(&handle->header_lock); if ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode))) { /* Got it */ handle->refcnt++; handle->busy_count++; if (mode != STARPU_R || handle->current_mode != mode) handle->current_mode = mode; _starpu_spin_unlock(&handle->header_lock); } else { /* a handle does not have a refcnt == 0, stop */ _starpu_spin_unlock(&handle->header_lock); all_arbiter_available = 0; break; } next: ; } if (all_arbiter_available == 0) { /* Oups, record ourself as waiting for this data */ struct _starpu_data_requester *r = _starpu_data_requester_new(); r->mode = mode; r->is_requested_by_codelet = 1; r->j = j; r->buffer_index = start_buf_arbiter; r->prio = j->task->priority; r->ready_data_callback = NULL; r->argcb = NULL; /* store node in list */ _starpu_data_requester_prio_list_push_front(&handle->arbitered_req_list, r); _starpu_spin_lock(&handle->header_lock); handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); /* and cancel all taken */ unsigned idx_buf_cancel; unsigned idx_buf_canceldup; for (idx_buf_cancel = start_buf_arbiter; idx_buf_cancel < idx_buf_arbiter ; idx_buf_cancel++) { starpu_data_handle_t cancel_handle = descrs[idx_buf_cancel].handle; int cancel_node = descrs[idx_buf_cancel].orig_node; if (cancel_handle->arbiter != arbiter) /* Will have to process another arbiter, will do that later */ break; for (idx_buf_canceldup = idx_buf_cancel+1; idx_buf_canceldup < idx_buf_arbiter; idx_buf_canceldup++) { starpu_data_handle_t handle_dup = descrs[idx_buf_canceldup].handle; int node_dup = descrs[idx_buf_canceldup].orig_node; if (handle_dup == cancel_handle && node_dup == cancel_node) goto next2; if (!_starpu_handles_same_root(handle_dup, cancel_handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } _starpu_spin_lock(&cancel_handle->header_lock); /* reset the counter because finally we do not take the data */ STARPU_ASSERT(cancel_handle->refcnt >= 1); cancel_handle->refcnt--; STARPU_ASSERT(cancel_handle->busy_count > 0); cancel_handle->busy_count--; if (!_starpu_data_check_not_busy(cancel_handle)) _starpu_spin_unlock(&cancel_handle->header_lock); next2: ; } #ifndef LOCK_OR_DELEGATE STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); #endif return; } #ifndef LOCK_OR_DELEGATE STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); #endif // all_arbiter_available is true if (idx_buf_arbiter < nbuffers) /* Other arbitered data, process them */ _starpu_submit_job_enforce_arbitered_deps(j, idx_buf_arbiter, nbuffers); else /* Finished with all data, can eventually push! */ _starpu_push_task(j); } #ifdef LOCK_OR_DELEGATE void ___starpu_notify_arbitered_dependencies(starpu_data_handle_t handle); void __starpu_notify_arbitered_dependencies(void* inData) { starpu_data_handle_t handle = inData; ___starpu_notify_arbitered_dependencies(handle); } void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle) { _starpu_LockOrDelegatePostOrPerform(handle->arbiter, &__starpu_notify_arbitered_dependencies, handle); } void ___starpu_notify_arbitered_dependencies(starpu_data_handle_t handle) #else // LOCK_OR_DELEGATE void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode) #endif { starpu_arbiter_t arbiter = handle->arbiter; #ifndef LOCK_OR_DELEGATE STARPU_PTHREAD_MUTEX_LOCK(&arbiter->mutex); #endif /* Since the request has been posted the handle may have been proceed and released */ if (_starpu_data_requester_prio_list_empty(&handle->arbitered_req_list)) { /* No waiter, just remove our reference */ _starpu_spin_lock(&handle->header_lock); if (down_to_mode == STARPU_NONE) { STARPU_ASSERT(handle->refcnt > 0); handle->refcnt--; STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; } else { /* Downgrade from W or RW down to R, keeping the same reference, * but thus allowing other readers without allowing writers. */ STARPU_ASSERT(down_to_mode == STARPU_R && handle->current_mode == STARPU_W); handle->current_mode = down_to_mode; } #ifndef LOCK_OR_DELEGATE STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); #endif if (_starpu_data_check_not_busy(handle)) /* Handle was even destroyed, don't unlock it. */ return; _starpu_spin_unlock(&handle->header_lock); return; } /* There is a waiter, remove our reference */ _starpu_spin_lock(&handle->header_lock); if (down_to_mode == STARPU_NONE) { STARPU_ASSERT(handle->refcnt > 0); handle->refcnt--; STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; } else { /* Downgrade from W or RW down to R, keeping the same reference, * but thus allowing other readers without allowing writers. */ STARPU_ASSERT(down_to_mode == STARPU_R && handle->current_mode == STARPU_W); handle->current_mode = down_to_mode; } /* There should be at least one busy_count reference for the waiter * (thus we don't risk to see the handle disappear below) */ STARPU_ASSERT(handle->busy_count > 0); _starpu_spin_unlock(&handle->header_lock); /* Note: we may be putting back our own requests, so avoid looping by * extracting the list */ struct _starpu_data_requester_prio_list l = handle->arbitered_req_list; _starpu_data_requester_prio_list_init(&handle->arbitered_req_list); while (!_starpu_data_requester_prio_list_empty(&l)) { struct _starpu_data_requester *r = _starpu_data_requester_prio_list_pop_front_highest(&l); if (!r->is_requested_by_codelet) { /* data_acquire_cb, process it */ enum starpu_data_access_mode r_mode = r->mode; int put_in_list = 1; r_mode = _starpu_arbiter_filter_modes(r_mode); _starpu_spin_lock(&handle->header_lock); handle->busy_count++; if ((handle->refcnt == 0) || (!(r_mode == STARPU_W) && (handle->current_mode == r_mode))) { handle->refcnt++; handle->current_mode = r_mode; put_in_list = 0; } _starpu_spin_unlock(&handle->header_lock); if (put_in_list) _starpu_data_requester_prio_list_push_front(&l, r); /* Put back remaining requests */ _starpu_data_requester_prio_list_push_prio_list_back(&handle->arbitered_req_list, &l); #ifndef LOCK_OR_DELEGATE STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); #endif if (!put_in_list) { r->ready_data_callback(r->argcb); _starpu_data_requester_delete(r); } _starpu_spin_lock(&handle->header_lock); STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; if (!_starpu_data_check_not_busy(handle)) _starpu_spin_unlock(&handle->header_lock); return; } /* A task waiting for a set of data, try to acquire them */ struct _starpu_job* j = r->j; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); unsigned idx_buf_arbiter; int idx_buf_arbiterdup; unsigned all_arbiter_available = 1; starpu_data_handle_t handle_arbiter; enum starpu_data_access_mode mode; int node_arbiter; unsigned start_buf_arbiter = r->buffer_index; struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); for (idx_buf_arbiter = start_buf_arbiter; idx_buf_arbiter < nbuffers; idx_buf_arbiter++) { handle_arbiter = descrs[idx_buf_arbiter].handle; node_arbiter = descrs[idx_buf_arbiter].orig_node; if (handle_arbiter->arbiter != arbiter) /* Will have to process another arbiter, will do that later */ break; for (idx_buf_arbiterdup = (int) idx_buf_arbiter-1; idx_buf_arbiterdup >= 0; idx_buf_arbiterdup--) { starpu_data_handle_t handle_dup = descrs[idx_buf_arbiterdup].handle; int node_dup = descrs[idx_buf_arbiterdup].orig_node; if (handle_dup == handle_arbiter && node_dup == node_arbiter) /* We have already requested this data, skip it. This * depends on ordering putting writes before reads, see * _starpu_compar_handles. */ goto next; if (!_starpu_handles_same_root(handle_dup, handle_arbiter)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } mode = descrs[idx_buf_arbiter].mode; mode = _starpu_arbiter_filter_modes(mode); /* we post all arbiter */ _starpu_spin_lock(&handle_arbiter->header_lock); if (!((handle_arbiter->refcnt == 0) || (!(mode == STARPU_W) && (handle_arbiter->current_mode == mode)))) { /* handle is not available, record ourself */ _starpu_spin_unlock(&handle_arbiter->header_lock); all_arbiter_available = 0; break; } /* mark the handle as taken */ handle_arbiter->refcnt++; handle_arbiter->busy_count++; handle_arbiter->current_mode = mode; _starpu_spin_unlock(&handle_arbiter->header_lock); next: ; } if (all_arbiter_available) { /* Success! Drop request */ _starpu_data_requester_delete(r); _starpu_spin_lock(&handle->header_lock); STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; if (!_starpu_data_check_not_busy(handle)) _starpu_spin_unlock(&handle->header_lock); /* Put back remaining requests */ _starpu_data_requester_prio_list_push_prio_list_back(&handle->arbitered_req_list, &l); #ifndef LOCK_OR_DELEGATE STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); #endif if (idx_buf_arbiter < nbuffers) /* Other arbitered data, process them */ _starpu_submit_job_enforce_arbitered_deps(j, idx_buf_arbiter, nbuffers); else /* Finished with all data, can eventually push! */ _starpu_push_task(j); return; } else { /* all handles are not available - record that task on the first unavailable handle */ /* store node in list */ r->mode = mode; _starpu_data_requester_prio_list_push_front(&handle_arbiter->arbitered_req_list, r); /* Move check_busy reference too */ _starpu_spin_lock(&handle->header_lock); STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; if (!_starpu_data_check_not_busy(handle)) _starpu_spin_unlock(&handle->header_lock); _starpu_spin_lock(&handle_arbiter->header_lock); handle_arbiter->busy_count++; _starpu_spin_unlock(&handle_arbiter->header_lock); /* and revert the mark */ unsigned idx_buf_cancel; unsigned idx_buf_canceldup; for (idx_buf_cancel = start_buf_arbiter; idx_buf_cancel < idx_buf_arbiter ; idx_buf_cancel++) { starpu_data_handle_t cancel_handle = descrs[idx_buf_cancel].handle; int cancel_node = descrs[idx_buf_cancel].orig_node; if (cancel_handle->arbiter != arbiter) break; for (idx_buf_canceldup = idx_buf_cancel+1; idx_buf_canceldup < idx_buf_arbiter; idx_buf_canceldup++) { starpu_data_handle_t handle_dup = descrs[idx_buf_canceldup].handle; int node_dup = descrs[idx_buf_canceldup].orig_node; if (handle_dup == cancel_handle && node_dup == cancel_node) goto next2; if (!_starpu_handles_same_root(handle_dup, cancel_handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } _starpu_spin_lock(&cancel_handle->header_lock); STARPU_ASSERT(cancel_handle->refcnt >= 1); cancel_handle->refcnt--; STARPU_ASSERT(cancel_handle->busy_count > 0); cancel_handle->busy_count--; if (!_starpu_data_check_not_busy(cancel_handle)) _starpu_spin_unlock(&cancel_handle->header_lock); next2: ; } } } /* no task has been pushed */ #ifndef LOCK_OR_DELEGATE STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); #endif return; } starpu_arbiter_t starpu_arbiter_create(void) { starpu_arbiter_t res; _STARPU_MALLOC(res, sizeof(*res)); #ifdef LOCK_OR_DELEGATE res->dlTaskListHead = NULL; _starpu_spin_init(&res->dlListLock); res->working = 0; #else /* LOCK_OR_DELEGATE */ STARPU_PTHREAD_MUTEX_INIT(&res->mutex, NULL); #endif /* LOCK_OR_DELEGATE */ return res; } void starpu_data_assign_arbiter(starpu_data_handle_t handle, starpu_arbiter_t arbiter) { if (handle->arbiter && handle->arbiter == _starpu_global_arbiter) /* Just for testing purpose */ return; STARPU_ASSERT_MSG(!handle->arbiter, "handle can only be assigned one arbiter"); STARPU_ASSERT_MSG(!handle->refcnt, "arbiter can be assigned to handle only right after initialization"); STARPU_ASSERT_MSG(!handle->busy_count, "arbiter can be assigned to handle only right after initialization"); handle->arbiter = arbiter; } void starpu_arbiter_destroy(starpu_arbiter_t arbiter) { #ifdef LOCK_OR_DELEGATE _starpu_spin_lock(&arbiter->dlListLock); STARPU_ASSERT(!arbiter->dlTaskListHead); STARPU_ASSERT(!arbiter->working); _starpu_spin_unlock(&arbiter->dlListLock); _starpu_spin_destroy(&arbiter->dlListLock); #else /* LOCK_OR_DELEGATE */ STARPU_PTHREAD_MUTEX_LOCK(&arbiter->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&arbiter->mutex); STARPU_PTHREAD_MUTEX_DESTROY(&arbiter->mutex); #endif /* LOCK_OR_DELEGATE */ free(arbiter); } starpu-1.4.9+dfsg/src/core/dependencies/data_concurrency.c000066400000000000000000000546161507764646700236600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include /* * We have a kind of dining philosophers problem: various tasks are accessing * various data concurrently in different modes: STARPU_R, STARPU_RW, STARPU_W, * STARPU_SCRATCH and STARPU_REDUX. STARPU_RW is managed as a STARPU_W access. * We have the following constraints: * * - A single STARPU_W access is allowed at a time. * - Concurrent STARPU_R accesses are allowed. * - Concurrent STARPU_SCRATCH accesses are allowed. * - Concurrent STARPU_REDUX accesses are allowed. * * What we do here is implementing the Dijkstra solutions: handles are sorted * by pointer value order, and tasks call * _starpu_attempt_to_submit_data_request for each requested data in that order * (see _starpu_sort_task_handles call in _starpu_concurrent_data_access). * * _starpu_attempt_to_submit_data_request will either: * - obtain access to the data, and thus the task can proceed with acquiring * other data (see _submit_job_access_data) * - queue a request on the data handle * * When a task finishes, it calls _starpu_notify_data_dependencies for each * data, to free its acquisitions. This will look whether the first queued * request can be fulfilled, and in such case make the task try to acquire its * next data. * * The same mechanism is used for application data acquisition * (starpu_data_acquire). * * For data with an arbiter, we have a second step, performed after this first * step, implemented in data_arbiter_concurrency.c */ /* * Check to see whether the first queued request can proceed, and return it in * such case. */ /* the handle header lock must be taken by the caller */ static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_handle_t handle) { struct _starpu_data_requester_prio_list *req_list; if (handle->reduction_refcnt > 0) { req_list = &handle->reduction_req_list; } else { if (_starpu_data_requester_prio_list_empty(&handle->reduction_req_list)) req_list = &handle->req_list; else req_list = &handle->reduction_req_list; } /* if there is no one to unlock ... */ if (_starpu_data_requester_prio_list_empty(req_list)) return NULL; /* if there is no reference to the data anymore, we can use it */ if (handle->refcnt == 0) return _starpu_data_requester_prio_list_pop_front_highest(req_list); /* Already writing to it, do not let another write access through */ if (handle->current_mode == STARPU_W) return NULL; /* data->current_mode == STARPU_R, so we can process more readers */ struct _starpu_data_requester *r = _starpu_data_requester_prio_list_front_highest(req_list); enum starpu_data_access_mode r_mode = r->mode; if (r_mode == STARPU_RW) r_mode = STARPU_W; /* If this is a STARPU_R, STARPU_SCRATCH or STARPU_REDUX type of * access, we only proceed if the current mode is the same as the * requested mode. */ if (r_mode == handle->current_mode) return _starpu_data_requester_prio_list_pop_front_highest(req_list); else return NULL; } /* Try to submit a data request, in case the request can be processed * immediately, return 0, if there is still a dependency that is not compatible * with the current mode, the request is put in the per-handle list of * "requesters", and this function returns 1. */ /* No lock is held, this acquires and releases the handle header lock */ static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_codelet, starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *argcb, struct _starpu_job *j, unsigned buffer_index) { if (handle->arbiter) return _starpu_attempt_to_submit_arbitered_data_request(request_from_codelet, handle, mode, callback, argcb, j, buffer_index); /* Do not care about some flags */ mode &= ~STARPU_COMMUTE; mode &= ~STARPU_SSEND; mode &= ~STARPU_LOCALITY; mode &= ~STARPU_NOFOOTPRINT; if (mode == STARPU_RW) mode = STARPU_W; /* Take the lock protecting the header. We try to do some progression * in case this is called from a worker, otherwise we just wait for the * lock to be available. */ if (request_from_codelet) { int cpt = 0; while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock)) { cpt++; _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_NOT_ALLOC); } if (cpt == STARPU_SPIN_MAXTRY) _starpu_spin_lock(&handle->header_lock); } else { _starpu_spin_lock(&handle->header_lock); } /* If we have a request that is not used for the reduction, and that a * reduction is pending, we put it at the end of normal list, and we * use the reduction_req_list instead */ unsigned pending_reduction = (handle->reduction_refcnt > 0); unsigned frozen = 0; /* If we are currently performing a reduction, we freeze any request * that is not explicitly a reduction task. */ unsigned is_a_reduction_task = (request_from_codelet && j && j->reduction_task); if (pending_reduction && !is_a_reduction_task) frozen = 1; /* If there is currently nobody accessing the piece of data, or it's * not another writer and if this is the same type of access as the * current one, we can proceed. */ unsigned put_in_list = 1; enum starpu_data_access_mode previous_mode = handle->current_mode; if (!frozen && ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode)))) { /* Detect whether this is the end of a reduction phase */ /* We don't want to start multiple reductions of the * same handle at the same time ! */ if ((handle->reduction_refcnt == 0) && (previous_mode == STARPU_REDUX) && (mode != STARPU_REDUX)) { _starpu_data_end_reduction_mode(handle, j?j->task->priority:STARPU_DEFAULT_PRIO); /* Since we need to perform a mode change, we freeze * the request if needed. */ put_in_list = (handle->reduction_refcnt > 0); } else { put_in_list = 0; } } if (put_in_list) { /* there cannot be multiple writers or a new writer * while the data is in read mode */ handle->busy_count++; /* enqueue the request */ struct _starpu_data_requester *r = _starpu_data_requester_new(); r->mode = mode; r->is_requested_by_codelet = request_from_codelet; r->j = j; r->buffer_index = buffer_index; r->prio = j ? j->task->priority : 0; r->ready_data_callback = callback; r->argcb = argcb; /* We put the requester in a specific list if this is a reduction task */ struct _starpu_data_requester_prio_list *req_list = is_a_reduction_task?&handle->reduction_req_list:&handle->req_list; _starpu_data_requester_prio_list_push_back(req_list, r); /* failed */ put_in_list = 1; } else { handle->refcnt++; handle->busy_count++; /* Do not write to handle->current_mode if it is already * R. This avoids a spurious warning from helgrind when * the following happens: * acquire(R) in thread A * acquire(R) in thread B * release_data_on_node() in thread A * helgrind would shout that the latter reads current_mode * unsafely. * * This actually basically explains helgrind that it is a * shared R acquisition. */ if (mode != STARPU_R || handle->current_mode != mode) handle->current_mode = mode; if ((mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX)) _starpu_data_start_reduction_mode(handle); /* success */ put_in_list = 0; } _starpu_spin_unlock(&handle->header_lock); return put_in_list; } /* Take a data, without waiting for it to be available (it is assumed to be). * This is typical used for nodeps tasks, for which a previous task has already * waited for the proper conditions, and we just need to take another reference * for overall reference coherency. * No lock is held, this acquires and releases the handle header lock */ static void _starpu_take_data(unsigned request_from_codelet, starpu_data_handle_t handle, enum starpu_data_access_mode mode, struct _starpu_job *j) { STARPU_ASSERT_MSG(!handle->arbiter, "TODO"); /* Do not care about some flags */ mode &= ~STARPU_COMMUTE; mode &= ~STARPU_SSEND; mode &= ~STARPU_LOCALITY; mode &= ~STARPU_NOFOOTPRINT; if (mode == STARPU_RW) mode = STARPU_W; /* Take the lock protecting the header. We try to do some progression * in case this is called from a worker, otherwise we just wait for the * lock to be available. */ if (request_from_codelet) { int cpt = 0; while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock)) { cpt++; _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_NOT_ALLOC); } if (cpt == STARPU_SPIN_MAXTRY) _starpu_spin_lock(&handle->header_lock); } else { _starpu_spin_lock(&handle->header_lock); } /* If we are currently performing a reduction, we freeze any request * that is not explicitly a reduction task. */ unsigned is_a_reduction_task = (request_from_codelet && j && j->reduction_task); STARPU_ASSERT_MSG(!is_a_reduction_task, "TODO"); enum starpu_data_access_mode previous_mode = handle->current_mode; STARPU_ASSERT_MSG(mode == previous_mode, "mode was %d, but requested %d", previous_mode, mode); handle->refcnt++; handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); } /* No lock is held */ unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *argcb) { return _starpu_attempt_to_submit_data_request(0, handle, mode, callback, argcb, NULL, 0); } /* No lock is held */ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, unsigned buffer_index) { /* Note that we do not access j->task->handles, but j->ordered_buffers * which is a sorted copy of it. */ struct _starpu_data_descr *buffer = &(_STARPU_JOB_GET_ORDERED_BUFFERS(j)[buffer_index]); starpu_data_handle_t handle = buffer->handle; enum starpu_data_access_mode mode = buffer->mode & ~STARPU_COMMUTE; return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index); } /* Try to acquire all data of the given job, one by one in handle pointer value order */ /* No lock is held */ static unsigned _submit_job_access_data(struct _starpu_job *j, unsigned start_buffer_index) { unsigned buf; int bufdup; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); for (buf = start_buffer_index; buf < nbuffers; buf++) { starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf); int node = _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(j, buf); for (bufdup = (int) buf-1; bufdup >= 0; bufdup--) { starpu_data_handle_t handle_dup = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, bufdup); int node_dup = _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(j, bufdup); if (handle_dup == handle && node_dup == node) /* We have already requested this data, skip it. This * depends on ordering putting writes before reads, see * _starpu_compar_handles. */ goto next; if (!_starpu_handles_same_root(handle_dup, handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } STARPU_ASSERT(j->task->status == STARPU_TASK_BLOCKED || j->task->status == STARPU_TASK_BLOCKED_ON_TAG || j->task->status == STARPU_TASK_BLOCKED_ON_TASK || j->task->status == STARPU_TASK_BLOCKED_ON_DATA); j->task->status = STARPU_TASK_BLOCKED_ON_DATA; if(handle->arbiter) { /* We arrived on an arbitered data, we stop and proceed * with the arbiter second step. */ _starpu_submit_job_enforce_arbitered_deps(j, buf, nbuffers); return 1; } if (attempt_to_submit_data_request_from_job(j, buf)) { return 1; } next: ; } return 0; } static void take_data_from_job(struct _starpu_job *j, unsigned buffer_index) { /* Note that we do not access j->task->handles, but j->ordered_buffers * which is a sorted copy of it. */ struct _starpu_data_descr *buffer = &(_STARPU_JOB_GET_ORDERED_BUFFERS(j)[buffer_index]); starpu_data_handle_t handle = buffer->handle; enum starpu_data_access_mode mode = buffer->mode & ~STARPU_COMMUTE; _starpu_take_data(1, handle, mode, j); } /* Immediately acquire all data of the given job, one by one in handle pointer value order */ /* No lock is held */ static void _submit_job_take_data_deps(struct _starpu_job *j, unsigned start_buffer_index) { unsigned buf; int bufdup; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); for (buf = start_buffer_index; buf < nbuffers; buf++) { starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buf); int node = _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(j, buf); for (bufdup = (int) buf-1; bufdup >= 0; bufdup--) { starpu_data_handle_t handle_dup = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, bufdup); int node_dup = _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(j, bufdup); if (handle_dup == handle && node_dup == node) /* We have already requested this data, skip it. This * depends on ordering putting writes before reads, see * _starpu_compar_handles. */ goto next; if (!_starpu_handles_same_root(handle_dup, handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } if(handle->arbiter) { /* We arrived on an arbitered data, we stop and proceed * with the arbiter second step. */ STARPU_ASSERT_MSG(0, "TODO"); //_starpu_submit_job_take_arbitered_deps(j, buf, nbuffers); } take_data_from_job(j, buf); next: ; } } /* This is called when the tag+task dependencies are to be finished releasing. */ void _starpu_enforce_data_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data) { unsigned buf; if (j->task->cl) { unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); for (buf = 0; buf < nbuffers; buf++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, buf); if (handle->arbiter) /* Oops, it's the arbiter's decision */ return; } /* We need to check data availability only if sequential consistency * dependencies have not been used */ if (!j->sequential_consistency) { for (buf = 0; buf < nbuffers; buf++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, buf); enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(j->task, buf) & ~STARPU_COMMUTE; if (handle->reduction_refcnt) /* Reduction pending, don't bother trying */ return; if (handle->refcnt != 0 && (mode == STARPU_W || handle->current_mode != mode)) /* Incompatible modes, not ready immediately */ return; } } } /* Ok, it really looks like this job will be ready soon */ _starpu_job_notify_ready_soon(j, data); } void _starpu_job_set_ordered_buffers(struct _starpu_job *j) { /* Compute an ordered list of the different pieces of data so that we * grab then according to a total order, thus avoiding a deadlock * condition */ unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); struct starpu_task *task = j->task; struct _starpu_data_descr *buffers = _STARPU_JOB_GET_ORDERED_BUFFERS(j); for (i=0 ; icl ? STARPU_CODELET_GET_NODE(task->cl, i) : STARPU_SPECIFIC_NODE_NONE; buffers[i].node = -1; } _starpu_sort_task_handles(buffers, nbuffers); for (i=0 ; icl && task->cl->specific_nodes) for (i=1 ; icl, buffers[i].index) != STARPU_CODELET_GET_NODE(task->cl, buffers[i-1].index)) { STARPU_ASSERT_MSG(!(buffers[i].mode & STARPU_W) && !(buffers[i-1].mode & STARPU_W), "Cannot request the same data on different nodes with write mode"); STARPU_ASSERT_MSG(!(buffers[i].mode & STARPU_REDUX) && !(buffers[i-1].mode & STARPU_REDUX), "Cannot request the same data on different nodes with redux mode"); } } } /* Sort the data used by the given job by handle pointer value order, and * try to acquire them in that order */ /* No lock is held */ unsigned _starpu_concurrent_data_access(struct _starpu_job *j) { struct starpu_codelet *cl = j->task->cl; if ((cl == NULL) || (STARPU_TASK_GET_NBUFFERS(j->task) == 0)) return 0; return _submit_job_access_data(j, 0); } /* This request got fulfilled, continue with the other requests of the * corresponding job */ /* No lock is held */ static unsigned unlock_one_requester(struct _starpu_data_requester *r) { struct _starpu_job *j = r->j; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); unsigned buffer_index = r->buffer_index; if (buffer_index + 1 < nbuffers) /* not all buffers are protected yet */ return _submit_job_access_data(j, buffer_index + 1); else return 0; } /* Sort the data used by the given job by handle pointer value order, and * immediately acquire them in that order */ /* No lock is held */ void _starpu_submit_job_take_data_deps(struct _starpu_job *j) { struct starpu_codelet *cl = j->task->cl; if ((cl == NULL) || (STARPU_TASK_GET_NBUFFERS(j->task) == 0)) return; _submit_job_take_data_deps(j, 0); } /* This is called when a task is finished with a piece of data * (or on starpu_data_release) * * The header lock must already be taken by the caller. * This may free the handle if it was lazily unregistered (1 is returned in * that case). The handle pointer thus becomes invalid for the caller. */ int _starpu_notify_data_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode) { _starpu_spin_checklocked(&handle->header_lock); if (down_to_mode != STARPU_NONE && handle->current_mode == down_to_mode) { /* No change, nothing to do */ return 0; } if (handle->arbiter) { /* Keep our reference for now, _starpu_notify_arbitered_dependencies * will drop it when it needs to */ STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->req_list)); STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->reduction_req_list)); _starpu_spin_unlock(&handle->header_lock); /* _starpu_notify_arbitered_dependencies will handle its own locking */ _starpu_notify_arbitered_dependencies(handle, down_to_mode); /* We have already unlocked */ return 1; } STARPU_ASSERT(_starpu_data_requester_prio_list_empty(&handle->arbitered_req_list)); if (down_to_mode == STARPU_NONE) { /* A data access has finished so we remove a reference. */ STARPU_ASSERT(handle->refcnt > 0); handle->refcnt--; STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; if (_starpu_data_check_not_busy(handle)) /* Handle was destroyed, nothing left to do. */ return 1; } else { /* Downgrade from W or RW down to R, keeping the same reference, * but thus allowing other readers without allowing writers. */ STARPU_ASSERT(down_to_mode == STARPU_R && (handle->current_mode == STARPU_RW || handle->current_mode == STARPU_W)); handle->current_mode = down_to_mode; } /* In case there is a pending reduction, and that this is the last * requester, we may go back to a "normal" coherency model. */ if (handle->reduction_refcnt > 0) { //fprintf(stderr, "NOTIFY REDUCTION TASK RED REFCNT %d\n", handle->reduction_refcnt); handle->reduction_refcnt--; if (handle->reduction_refcnt == 0) _starpu_data_end_reduction_mode_terminate(handle); } if (handle->unlocking_reqs) /* * Our caller is already running the unlock loop below (we were * most probably called from the ready_data_callback call * below). Avoid looping again (which would potentially mean * unbounded recursion), our caller will continue doing the * unlock work for us. */ return 0; handle->unlocking_reqs = 1; struct _starpu_data_requester *r; while ((r = may_unlock_data_req_list_head(handle))) { /* STARPU_RW accesses are treated as STARPU_W */ enum starpu_data_access_mode r_mode = r->mode; if (r_mode == STARPU_RW) r_mode = STARPU_W; int put_in_list = 1; if ((handle->reduction_refcnt == 0) && (handle->current_mode == STARPU_REDUX) && (r_mode != STARPU_REDUX)) { _starpu_data_end_reduction_mode(handle, r->prio); /* Since we need to perform a mode change, we freeze * the request if needed. */ put_in_list = (handle->reduction_refcnt > 0); } else { put_in_list = 0; } if (put_in_list) { /* We need to put the request back because we must * perform a reduction before. */ _starpu_data_requester_prio_list_push_front(&handle->req_list, r); } else { /* The data is now attributed to that request so we put a * reference on it. */ handle->refcnt++; handle->busy_count++; enum starpu_data_access_mode previous_mode = handle->current_mode; handle->current_mode = r_mode; /* In case we enter in a reduction mode, we invalidate all per * worker replicates. Note that the "per_node" replicates are * kept intact because we'll reduce a valid copy of the * "per-node replicate" with the per-worker replicates .*/ if ((r_mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX)) _starpu_data_start_reduction_mode(handle); _starpu_spin_unlock(&handle->header_lock); if (r->is_requested_by_codelet) { if (!unlock_one_requester(r)) _starpu_push_task(r->j); } else { STARPU_ASSERT(r->ready_data_callback); /* execute the callback associated with the data requester */ r->ready_data_callback(r->argcb); } _starpu_data_requester_delete(r); _starpu_spin_lock(&handle->header_lock); STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; if (_starpu_data_check_not_busy(handle)) return 1; } } handle->unlocking_reqs = 0; return 0; } starpu-1.4.9+dfsg/src/core/dependencies/data_concurrency.h000066400000000000000000000036761507764646700236650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DATA_CONCURRENCY_H__ #define __DATA_CONCURRENCY_H__ /** @file */ #include #pragma GCC visibility push(hidden) void _starpu_job_set_ordered_buffers(struct _starpu_job *j); unsigned _starpu_concurrent_data_access(struct _starpu_job *j); void _starpu_submit_job_enforce_arbitered_deps(struct _starpu_job *j, unsigned buf, unsigned nbuffers); void _starpu_submit_job_take_data_deps(struct _starpu_job *j); void _starpu_enforce_data_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data); int _starpu_notify_data_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode); void _starpu_notify_arbitered_dependencies(starpu_data_handle_t handle, enum starpu_data_access_mode down_to_mode); unsigned _starpu_attempt_to_submit_data_request_from_apps(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *argcb); unsigned _starpu_attempt_to_submit_arbitered_data_request(unsigned request_from_codelet, starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *argcb, struct _starpu_job *j, unsigned buffer_index); #pragma GCC visibility pop #endif // __DATA_CONCURRENCY_H__ starpu-1.4.9+dfsg/src/core/dependencies/dependencies.c000066400000000000000000000065041507764646700227540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include /* We assume that the job will not disappear under our hands */ void _starpu_notify_dependencies(struct _starpu_job *j) { STARPU_ASSERT(j); STARPU_ASSERT(j->task); /* unlock tasks depending on that task */ _starpu_notify_task_dependencies(j); /* unlock tags depending on that task */ if (j->task->use_tag) _starpu_notify_tag_dependencies(j->tag); } /* TODO: make this a hashtable indexed by func+data and pass that through data. */ static starpu_notify_ready_soon_func notify_ready_soon_func; static void *notify_ready_soon_func_data; struct _starpu_notify_job_start_data { double delay; }; void starpu_task_notify_ready_soon_register(starpu_notify_ready_soon_func f, void *data) { STARPU_ASSERT(!notify_ready_soon_func); notify_ready_soon_func = f; notify_ready_soon_func_data = data; } /* Called when a job has just started, so we can notify tasks which were waiting * only for this one when they can expect to start */ static void __starpu_job_notify_start(struct _starpu_job *j, double delay); void _starpu_job_notify_start(struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch) { double delay; if (!notify_ready_soon_func) return; delay = starpu_task_expected_length(j->task, perf_arch, j->nimpl); if (isnan(delay) || _STARPU_IS_ZERO(delay)) return; __starpu_job_notify_start(j, delay); } static void __starpu_job_notify_start(struct _starpu_job *j, double delay) { _starpu_notify_job_start_data data = { .delay = delay }; _starpu_notify_job_start_tasks(j, &data); if (j->task->use_tag) _starpu_notify_job_start_tag_dependencies(j->tag, &data); /* TODO: check data notification */ } /* Called when the last dependency of this job has just started, so we know that * this job will be released after the given delay. */ void _starpu_job_notify_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data) { struct starpu_task *task = j->task; /* Notify that this task will start after the given delay */ notify_ready_soon_func(notify_ready_soon_func_data, task, data->delay); /* Notify some known transitions as well */ if (!task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE) /* This task will immediately terminate, so transition this */ __starpu_job_notify_start(_starpu_get_job_associated_to_task(task), data->delay); if (j->quick_next) /* This job is actually a pre_sync job with a post_sync job to be released right after */ _starpu_job_notify_ready_soon(j->quick_next, data); } starpu-1.4.9+dfsg/src/core/dependencies/implicit_data_deps.c000066400000000000000000000644601507764646700241510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #if 0 # define _STARPU_DEP_DEBUG(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__); #else # define _STARPU_DEP_DEBUG(fmt, ...) #endif static void (*write_hook)(starpu_data_handle_t); void _starpu_implicit_data_deps_write_hook(void (*func)(starpu_data_handle_t)) { STARPU_ASSERT_MSG(!write_hook || write_hook == func, "only one implicit data deps hook at a time\n"); write_hook = func; } static void _starpu_add_ghost_dependency(starpu_data_handle_t handle, unsigned long previous, struct starpu_task *next) { struct _starpu_job *next_job = _starpu_get_job_associated_to_task(next); _starpu_bound_job_id_dep(handle, next_job, previous); STARPU_AYU_ADDDEPENDENCY(previous, handle, next_job->job_id); } static void _starpu_add_dependency(starpu_data_handle_t handle, struct starpu_task *previous, struct starpu_task *next) { _starpu_add_ghost_dependency(handle, _starpu_get_job_associated_to_task(previous)->job_id, next); } /* Add post_sync_task as new accessor among the existing ones, making pre_sync_task depend on the last synchronization task if any. */ static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot) { /* Add this task to the list of readers */ STARPU_ASSERT(!post_sync_task_dependency_slot->prev); STARPU_ASSERT(!post_sync_task_dependency_slot->next); post_sync_task_dependency_slot->task = post_sync_task; post_sync_task_dependency_slot->next = handle->last_submitted_accessors.next; post_sync_task_dependency_slot->prev = &handle->last_submitted_accessors; post_sync_task_dependency_slot->next->prev = post_sync_task_dependency_slot; handle->last_submitted_accessors.next = post_sync_task_dependency_slot; /* This task depends on the previous synchronization task if any */ if (handle->last_sync_task && handle->last_sync_task != post_sync_task) { *submit_pre_sync= 1; struct starpu_task *task_array[1] = {handle->last_sync_task}; _starpu_task_declare_deps_array(pre_sync_task, 1, task_array, 0); _starpu_add_dependency(handle, handle->last_sync_task, pre_sync_task); _STARPU_DEP_DEBUG("dep %p -> %p\n", handle->last_sync_task, pre_sync_task); } else { _STARPU_DEP_DEBUG("No dep\n"); } /* There was perhaps no last submitted writer but a * ghost one, we should report that here, and keep the * ghost writer valid */ if ( ( #ifdef STARPU_USE_FXT 1 #else _starpu_bound_recording #endif || STARPU_AYU_EVENT ) && handle->last_submitted_ghost_sync_id_is_valid) { _STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_sync_id, _starpu_get_job_associated_to_task(pre_sync_task)); _starpu_add_ghost_dependency(handle, handle->last_submitted_ghost_sync_id, pre_sync_task); _STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_sync_id, pre_sync_task); } if (*submit_pre_sync && !pre_sync_task->cl) { /* Add a reference to be released in _starpu_handle_job_termination */ _starpu_spin_lock(&handle->header_lock); handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); _starpu_get_job_associated_to_task(pre_sync_task)->implicit_dep_handle = handle; } } /* This adds a new synchronization task which depends on all the previous accessors */ static void _starpu_add_sync_task(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task, struct starpu_task *ignored_task) { /* Count the existing accessors */ unsigned naccessors = 0; struct _starpu_task_wrapper_dlist *l; l = handle->last_submitted_accessors.next; while (l != &handle->last_submitted_accessors) { if (l->task == ignored_task) { /* Don't make pre_sync_task depend on post_sync_task! * but still drop from the list. * This happens notably when a task accesses several * times to the same data. */ struct _starpu_task_wrapper_dlist *next; l->prev->next = l->next; l->next->prev = l->prev; l->task = NULL; l->prev = NULL; next = l->next; l->next = NULL; l = next; } else { naccessors++; l = l->next; } } _STARPU_DEP_DEBUG("%d accessors\n", naccessors); if (naccessors > 0) { /* Put all tasks in the list into task_array */ struct starpu_task *task_array[naccessors]; unsigned i = 0; l = handle->last_submitted_accessors.next; while (l != &handle->last_submitted_accessors) { STARPU_ASSERT(l->task); STARPU_ASSERT(l->task != ignored_task); task_array[i++] = l->task; _starpu_add_dependency(handle, l->task, pre_sync_task); _STARPU_DEP_DEBUG("dep %p -> %p\n", l->task, pre_sync_task); struct _starpu_task_wrapper_dlist *prev = l; l = l->next; prev->task = NULL; prev->next = NULL; prev->prev = NULL; } _starpu_task_declare_deps_array(pre_sync_task, naccessors, task_array, 0); } #ifndef STARPU_USE_FXT if (_starpu_bound_recording) #endif { /* Declare all dependencies with ghost accessors */ struct _starpu_jobid_list *ghost_accessors_id = handle->last_submitted_ghost_accessors_id; while (ghost_accessors_id) { unsigned long id = ghost_accessors_id->id; _STARPU_TRACE_GHOST_TASK_DEPS(id, _starpu_get_job_associated_to_task(pre_sync_task)); _starpu_add_ghost_dependency(handle, id, pre_sync_task); _STARPU_DEP_DEBUG("dep ID%lu -> %p\n", id, pre_sync_task); struct _starpu_jobid_list *prev = ghost_accessors_id; ghost_accessors_id = ghost_accessors_id->next; free(prev); } handle->last_submitted_ghost_accessors_id = NULL; } handle->last_submitted_accessors.next = &handle->last_submitted_accessors; handle->last_submitted_accessors.prev = &handle->last_submitted_accessors; handle->last_sync_task = post_sync_task; if (!post_sync_task->cl) { /* Add a reference to be released in _starpu_handle_job_termination */ _starpu_spin_lock(&handle->header_lock); handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); _starpu_get_job_associated_to_task(post_sync_task)->implicit_dep_handle = handle; } } /* This function adds the implicit task dependencies introduced by data * sequential consistency. Two tasks are provided: pre_sync and post_sync which * respectively indicates which task is going to depend on the previous deps * and on which task future deps should wait. In the case of a dependency * introduced by a task submission, both tasks are just the submitted task, but * in the case of user interactions with the DSM, these may be different tasks. * */ /* NB : handle->sequential_consistency_mutex must be hold by the caller; * returns a task, to be submitted after releasing that mutex. */ /* *submit_pre_sync is whether the pre_sync_task will be submitted or not. The * caller should set it to 1 if it intends to submit it anyway, or to 0 * if it may not submit it (because it has no other use for the task than * synchronization). In the latter case, * _starpu_detect_implicit_data_deps_with_handle will set it to 1 in case the * task really needs to be submitted, or leave it to 0 if there is nothing to be * waited for anyway. */ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot, starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency) { struct starpu_task *task = NULL; /* Do not care about some flags */ mode &= ~ STARPU_SSEND; mode &= ~ STARPU_LOCALITY; mode &= ~ STARPU_NOFOOTPRINT; STARPU_ASSERT(!(mode & STARPU_SCRATCH)); _STARPU_LOG_IN(); if (handle->sequential_consistency && task_handle_sequential_consistency) { struct _starpu_job *pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task); struct _starpu_job *post_sync_job = _starpu_get_job_associated_to_task(post_sync_task); if (mode & STARPU_R && !handle->initialized) { STARPU_ASSERT_MSG(handle->init_cl, "Handle %p is not initialized, it cannot be read", handle); /* The task will initialize it with init_cl */ handle->initialized = 1; } if (mode & STARPU_W || mode == STARPU_REDUX) { STARPU_ASSERT_MSG(!handle->readonly, "Read-only handle %p can not be written to", handle); handle->initialized = 1; /* We will change our value, disconnect from our readonly duplicates */ if (handle->readonly_dup) { STARPU_ASSERT(handle->readonly_dup->readonly_dup_of == handle); handle->readonly_dup->readonly_dup_of = NULL; handle->readonly_dup = NULL; } if (write_hook) write_hook(handle); } /* Skip tasks that are associated to a reduction phase so that * they do not interfere with the application. */ if (pre_sync_job->reduction_task) { *submit_pre_sync = 1; return NULL; } if (post_sync_job->reduction_task) { *submit_pre_sync = 0; return NULL; } /* In case we are generating the DAG, we add an implicit * dependency between the pre and the post sync tasks in case * they are not the same. */ if (pre_sync_task != post_sync_task #ifndef STARPU_USE_FXT && _starpu_bound_recording #endif ) { _STARPU_TRACE_GHOST_TASK_DEPS(pre_sync_job->job_id, post_sync_job); _starpu_bound_task_dep(post_sync_job, pre_sync_job); } enum starpu_data_access_mode previous_mode = handle->last_submitted_mode; _STARPU_DEP_DEBUG("Handle %p Tasks %p %p %x->%x\n", handle, pre_sync_task, post_sync_task, previous_mode, mode); /* * Tasks can access the data concurrently only if they have the * same access mode, which can only be either: * - write with STARPU_COMMUTE * - read * - redux * * In other cases, the tasks have to depend on each other. */ if ((mode & STARPU_W && mode & STARPU_COMMUTE && previous_mode & STARPU_W && previous_mode & STARPU_COMMUTE) || (mode == STARPU_R && previous_mode == STARPU_R) || (mode == STARPU_REDUX && previous_mode == STARPU_REDUX)) { _STARPU_DEP_DEBUG("concurrently\n"); /* Can access concurrently with current tasks */ if (handle->last_sync_task != NULL) *submit_pre_sync = 1; _starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot); } else { /* Can not access concurrently, have to wait for existing accessors */ struct _starpu_task_wrapper_dlist *l = handle->last_submitted_accessors.next; _STARPU_DEP_DEBUG("dependency\n"); if ((l != &handle->last_submitted_accessors && l->next != &handle->last_submitted_accessors) || (handle->last_submitted_ghost_accessors_id && handle->last_submitted_ghost_accessors_id->next) || (l != &handle->last_submitted_accessors && handle->last_submitted_ghost_accessors_id)) { /* Several previous accessors */ *submit_pre_sync = 1; if (mode == STARPU_W) { _STARPU_DEP_DEBUG("several predecessors, and this is a W-only task, thus can serve directly as a synchronization task.\n"); /* Optimization: this task can not * combine with others anyway, use it * as synchronization task by making it * wait for the previous ones. */ _starpu_add_sync_task(handle, pre_sync_task, post_sync_task, post_sync_task); } else { _STARPU_DEP_DEBUG("several predecessors, adding sync task\n"); /* insert an empty synchronization task * which waits for the whole set, * instead of creating a quadratic * number of dependencies. */ struct starpu_task *sync_task = starpu_task_create(); STARPU_ASSERT(sync_task); if (previous_mode == STARPU_REDUX) sync_task->name = "_starpu_sync_task_redux"; else if (mode == STARPU_COMMUTE || previous_mode == STARPU_COMMUTE) sync_task->name = "_starpu_sync_task_commute"; else sync_task->name = "_starpu_sync_task"; sync_task->cl = NULL; sync_task->type = post_sync_task->type; sync_task->priority = post_sync_task->priority; /* Make this task wait for the previous ones */ _starpu_add_sync_task(handle, sync_task, sync_task, post_sync_task); /* And the requested task wait for this one */ _starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot); task = sync_task; } } else { struct _starpu_jobid_list *ghost_accessors_id = handle->last_submitted_ghost_accessors_id; /* At most one previous accessor or one ghost */ if (l != &handle->last_submitted_accessors) { /* One accessor, make it the sync task, * and start depending on it. */ *submit_pre_sync = 1; _STARPU_DEP_DEBUG("One previous accessor, depending on it\n"); handle->last_sync_task = l->task; l->next = NULL; l->prev = NULL; handle->last_submitted_accessors.next = &handle->last_submitted_accessors; handle->last_submitted_accessors.prev = &handle->last_submitted_accessors; handle->last_submitted_ghost_sync_id_is_valid = 0; } else if (ghost_accessors_id) { /* One ghost, just remember its id */ _STARPU_DEP_DEBUG("No more currently running accessor, but a ghost id, taking it.\n"); handle->last_submitted_ghost_sync_id = ghost_accessors_id->id; handle->last_submitted_ghost_sync_id_is_valid = 1; STARPU_ASSERT(!ghost_accessors_id->next); handle->last_submitted_ghost_accessors_id = NULL; free(ghost_accessors_id); } else { _STARPU_DEP_DEBUG("No previous accessor, no dependency\n"); } _starpu_add_accessor(handle, pre_sync_task, submit_pre_sync, post_sync_task, post_sync_task_dependency_slot); } } handle->last_submitted_mode = mode; } else { *submit_pre_sync = 0; } _STARPU_LOG_OUT(); return task; } int _starpu_test_implicit_data_deps_with_handle(starpu_data_handle_t handle, enum starpu_data_access_mode mode) { /* Do not care about some flags */ mode &= ~ STARPU_SSEND; mode &= ~ STARPU_LOCALITY; mode &= ~ STARPU_NOFOOTPRINT; STARPU_ASSERT(!(mode & STARPU_SCRATCH)); if (handle->sequential_consistency) { if (handle->last_sync_task) return -EAGAIN; if (handle->last_submitted_accessors.next != &handle->last_submitted_accessors) return -EAGAIN; if (mode & STARPU_W || mode == STARPU_REDUX) handle->initialized = 1; handle->last_submitted_mode = mode; } return 0; } /* Create the implicit dependencies for a newly submitted task */ void _starpu_detect_implicit_data_deps(struct starpu_task *task) { STARPU_ASSERT(task->cl); _STARPU_LOG_IN(); if (!task->sequential_consistency) return; /* We don't want to enforce a sequential consistency for tasks that are * not visible to the application. */ struct _starpu_job *j = _starpu_get_job_associated_to_task(task); if (j->reduction_task) return; j->sequential_consistency = 1; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); struct _starpu_task_wrapper_dlist *dep_slots = _STARPU_JOB_GET_DEP_SLOTS(j); unsigned buffer; int bufferdup; for (buffer = 0; buffer < nbuffers; buffer++) { starpu_data_handle_t handle = descrs[buffer].handle; enum starpu_data_access_mode mode = descrs[buffer].mode; struct starpu_task *new_task; /* Scratch memory does not introduce any deps */ if (mode & STARPU_SCRATCH) continue; for (bufferdup = (int) buffer-1; bufferdup >= 0; bufferdup--) { starpu_data_handle_t handle_dup = descrs[bufferdup].handle; enum starpu_data_access_mode mode_dup = descrs[bufferdup].mode; if (handle_dup == handle && mode_dup == mode) /* We have already added dependencies for this * data, skip it. This reduces the number of * dependencies, and allows notify_soon to work * when a task uses the same data several times * (otherwise it will not be able to find out that the two * dependencies will be over at the same time) */ goto next; if (!_starpu_handles_same_root(handle_dup, handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); unsigned index = descrs[buffer].index; unsigned task_handle_sequential_consistency = task->handles_sequential_consistency ? task->handles_sequential_consistency[index] : handle->sequential_consistency; int submit_pre_sync = 1; if (!task_handle_sequential_consistency) j->sequential_consistency = 0; new_task = _starpu_detect_implicit_data_deps_with_handle(task, &submit_pre_sync, task, &dep_slots[buffer], handle, mode, task_handle_sequential_consistency); STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); if (new_task) { int ret = _starpu_task_submit_internally(new_task); STARPU_ASSERT(!ret); } next: ; } _STARPU_LOG_OUT(); } /* This function is called when a task has been executed so that we don't * create dependencies to task that do not exist anymore. */ /* NB: We maintain a list of "ghost deps" in case FXT is enabled. Ghost * dependencies are the dependencies that are implicitly enforced by StarPU * even if they do not imply a real dependency. For instance in the following * sequence, f(Ar) g(Ar) h(Aw), we expect to have h depend on both f and g, but * if h is submitted after the termination of f or g, StarPU will not create a * dependency as this is not needed anymore. */ /* the sequential_consistency_mutex of the handle has to be already held */ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, struct _starpu_task_wrapper_dlist *task_dependency_slot, starpu_data_handle_t handle) { STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); if (handle->sequential_consistency) { /* If this is the last writer, there is no point in adding * extra deps to that tasks that does not exists anymore */ if (task == handle->last_sync_task) { handle->last_sync_task = NULL; #ifndef STARPU_USE_FXT if (_starpu_bound_recording) #endif { /* Save the previous writer as the ghost last writer */ handle->last_submitted_ghost_sync_id_is_valid = 1; struct _starpu_job *ghost_job = _starpu_get_job_associated_to_task(task); handle->last_submitted_ghost_sync_id = ghost_job->job_id; } } /* Same if this is one of the readers: we go through the list * of readers and remove the task if it is found. */ if (task_dependency_slot && task_dependency_slot->next) { #ifdef STARPU_DEBUG /* Make sure we are removing ourself from the proper handle */ struct _starpu_task_wrapper_dlist *l; for (l = task_dependency_slot->prev; l->task; l = l->prev) ; STARPU_ASSERT(l == &handle->last_submitted_accessors); for (l = task_dependency_slot->next; l->task; l = l->next) ; STARPU_ASSERT(l == &handle->last_submitted_accessors); #endif STARPU_ASSERT(task_dependency_slot->task == task); task_dependency_slot->next->prev = task_dependency_slot->prev; task_dependency_slot->prev->next = task_dependency_slot->next; task_dependency_slot->task = NULL; task_dependency_slot->next = NULL; task_dependency_slot->prev = NULL; #ifndef STARPU_USE_FXT if (_starpu_bound_recording) #endif { /* Save the job id of the reader task in the ghost reader linked list list */ struct _starpu_job *ghost_reader_job = _starpu_get_job_associated_to_task(task); struct _starpu_jobid_list *link; _STARPU_MALLOC(link, sizeof(struct _starpu_jobid_list)); link->next = handle->last_submitted_ghost_accessors_id; link->id = ghost_reader_job->job_id; handle->last_submitted_ghost_accessors_id = link; } } } STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); } /* This is the same as _starpu_release_data_enforce_sequential_consistency, but * for all data of a task */ void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j) { struct starpu_task *task = j->task; if (!task->cl) return; struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); struct _starpu_task_wrapper_dlist *slots = _STARPU_JOB_GET_DEP_SLOTS(j); unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned index; int indexdup; /* Release all implicit dependencies */ for (index = 0; index < nbuffers; index++) { starpu_data_handle_t handle = descrs[index].handle; enum starpu_data_access_mode mode = descrs[index].mode; for (indexdup = (int) index-1; indexdup >= 0; indexdup--) { starpu_data_handle_t handle_dup = descrs[indexdup].handle; enum starpu_data_access_mode mode_dup = descrs[indexdup].mode; if (handle_dup == handle && mode_dup == mode) /* See _starpu_detect_implicit_data_deps */ goto next; if (!_starpu_handles_same_root(handle_dup, handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } _starpu_release_data_enforce_sequential_consistency(task, &slots[index], handle); next: ; } for (index = 0; index < nbuffers; index++) { starpu_data_handle_t handle = descrs[index].handle; int node = descrs[index].orig_node; for (indexdup = index+1; indexdup < (int) nbuffers; indexdup++) { starpu_data_handle_t handle_dup = descrs[indexdup].handle; int node_dup = descrs[indexdup].orig_node; if (handle_dup == handle && node_dup == node) /* We will release this data, skip it for now. This * depends on ordering putting writes before reads, see * _starpu_compar_handles */ goto next2; if (!_starpu_handles_same_root(handle_dup, handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } /* Release the reference acquired in _starpu_push_task_output */ _starpu_spin_lock(&handle->header_lock); STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; if (!_starpu_data_check_not_busy(handle)) _starpu_spin_unlock(&handle->header_lock); next2: ; } } void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data_handle_t handle) { _STARPU_LOG_IN(); STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); if (handle->sequential_consistency) { handle->post_sync_tasks_cnt++; struct _starpu_task_wrapper_list *link; _STARPU_MALLOC(link, sizeof(struct _starpu_task_wrapper_list)); link->task = post_sync_task; link->next = handle->post_sync_tasks; handle->post_sync_tasks = link; } STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); _STARPU_LOG_OUT(); } void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle, enum starpu_data_access_mode mode) { struct _starpu_task_wrapper_list *post_sync_tasks = NULL; unsigned do_submit_tasks = 0; unsigned last_cnt; /* Here helgrind would shout that this is an unprotected access, but * count can only be zero if we don't have to care about * post_sync_tasks_cnt at all. */ if (handle->post_sync_tasks_cnt) { STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); last_cnt = handle->post_sync_tasks_cnt; if (mode == STARPU_NONE) /* Last release from us */ handle->post_sync_tasks_cnt--; if (last_cnt == 1) { /* unlock all tasks : we need not hold the lock while unlocking all these tasks */ do_submit_tasks = 1; post_sync_tasks = handle->post_sync_tasks; handle->post_sync_tasks = NULL; } STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); } if (do_submit_tasks) { struct _starpu_task_wrapper_list *link = post_sync_tasks; while (link) { /* There is no need to depend on that task now, since it was already unlocked */ _starpu_release_data_enforce_sequential_consistency(link->task, &_starpu_get_job_associated_to_task(link->task)->implicit_dep_slot, handle); int ret = _starpu_task_submit_internally(link->task); STARPU_ASSERT(!ret); struct _starpu_task_wrapper_list *tmp = link; link = link->next; free(tmp); } } } /* If sequential consistency mode is enabled, this function blocks until the * handle is available in the requested access mode. */ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_data_access_mode mode, const char *sync_name) { /* If sequential consistency is enabled, wait until data is available */ STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); int sequential_consistency = handle->sequential_consistency; if (sequential_consistency) { struct starpu_task *sync_task, *new_task; int submit_pre_sync = 0; sync_task = starpu_task_create(); sync_task->name = sync_name; sync_task->detach = 0; sync_task->destroy = 1; sync_task->type = STARPU_TASK_TYPE_INTERNAL; /* It is not really a RW access, but we want to make sure that * all previous accesses are done */ new_task = _starpu_detect_implicit_data_deps_with_handle(sync_task, &submit_pre_sync, sync_task, &_starpu_get_job_associated_to_task(sync_task)->implicit_dep_slot, handle, mode, sequential_consistency); STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); if (new_task) { int ret = _starpu_task_submit_internally(new_task); STARPU_ASSERT(!ret); } if (submit_pre_sync) { int ret = _starpu_task_submit_internally(sync_task); STARPU_ASSERT(!ret); ret = starpu_task_wait(sync_task); STARPU_ASSERT(ret == 0); } else { starpu_task_destroy(sync_task); } } else { STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); } return 0; } /* This data is about to be freed, clean our stuff */ void _starpu_data_clear_implicit(starpu_data_handle_t handle) { struct _starpu_jobid_list *list; STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); list = handle->last_submitted_ghost_accessors_id; while (list) { struct _starpu_jobid_list *next = list->next; free(list); list = next; } STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); } starpu-1.4.9+dfsg/src/core/dependencies/implicit_data_deps.h000066400000000000000000000044511507764646700241500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __IMPLICIT_DATA_DEPS_H__ #define __IMPLICIT_DATA_DEPS_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, int *submit_pre_sync, struct starpu_task *post_sync_task, struct _starpu_task_wrapper_dlist *post_sync_task_dependency_slot, starpu_data_handle_t handle, enum starpu_data_access_mode mode, unsigned task_handle_sequential_consistency); int _starpu_test_implicit_data_deps_with_handle(starpu_data_handle_t handle, enum starpu_data_access_mode mode); void _starpu_detect_implicit_data_deps(struct starpu_task *task); void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, struct _starpu_task_wrapper_dlist *task_dependency_slot, starpu_data_handle_t handle); void _starpu_release_task_enforce_sequential_consistency(struct _starpu_job *j); void _starpu_add_post_sync_tasks(struct starpu_task *post_sync_task, starpu_data_handle_t handle); void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle, enum starpu_data_access_mode mode); /** Register a hook to be called when a write is submitted */ void _starpu_implicit_data_deps_write_hook(void (*func)(starpu_data_handle_t)) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** This function blocks until the handle is available in the requested mode */ int _starpu_data_wait_until_available(starpu_data_handle_t handle, enum starpu_data_access_mode mode, const char *sync_name); void _starpu_data_clear_implicit(starpu_data_handle_t handle); #pragma GCC visibility pop #endif // __IMPLICIT_DATA_DEPS_H__ starpu-1.4.9+dfsg/src/core/dependencies/tags.c000066400000000000000000000341571507764646700212710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #define STARPU_AYUDAME_OFFSET 4000000000000000000ULL struct _starpu_tag_table { UT_hash_handle hh; starpu_tag_t id; struct _starpu_tag *tag; }; #define HASH_ADD_UINT64_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint64_t),add) #define HASH_FIND_UINT64_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint64_t),out) static struct _starpu_tag_table *tag_htbl = NULL; static starpu_pthread_rwlock_t tag_global_rwlock; static struct _starpu_cg *create_cg_apps(unsigned ntags) { struct _starpu_cg *cg; _STARPU_MALLOC(cg, sizeof(struct _starpu_cg)); cg->ntags = ntags; cg->remaining = ntags; cg->cg_type = STARPU_CG_APPS; cg->succ.succ_apps.completed = 0; STARPU_PTHREAD_MUTEX_INIT(&cg->succ.succ_apps.cg_mutex, NULL); STARPU_PTHREAD_COND_INIT(&cg->succ.succ_apps.cg_cond, NULL); return cg; } static struct _starpu_cg *create_cg_tag(unsigned ntags, struct _starpu_tag *tag) { struct _starpu_cg *cg; _STARPU_MALLOC(cg, sizeof(struct _starpu_cg)); cg->ntags = ntags; cg->remaining = ntags; #ifdef STARPU_DEBUG cg->ndeps = ntags; cg->deps = NULL; cg->done = NULL; #endif cg->cg_type = STARPU_CG_TAG; cg->succ.tag = tag; tag->tag_successors.ndeps++; #ifdef STARPU_DEBUG _STARPU_REALLOC(tag->tag_successors.deps, tag->tag_successors.ndeps * sizeof(tag->tag_successors.deps[0])); _STARPU_REALLOC(tag->tag_successors.done, tag->tag_successors.ndeps * sizeof(tag->tag_successors.done[0])); tag->tag_successors.deps[tag->tag_successors.ndeps-1] = cg; tag->tag_successors.done[tag->tag_successors.ndeps-1] = 0; #endif return cg; } static struct _starpu_tag *_starpu_tag_init(starpu_tag_t id) { struct _starpu_tag *tag; _STARPU_CALLOC(tag, 1, sizeof(struct _starpu_tag)); //tag->job = NULL; //tag->is_assigned = 0; //tag->is_submitted = 0; tag->id = id; tag->state = STARPU_INVALID_STATE; _starpu_cg_list_init0(&tag->tag_successors); _starpu_spin_init(&tag->lock); return tag; } static void _starpu_tag_free(void *_tag) { struct _starpu_tag *tag = (struct _starpu_tag *) _tag; if (tag) { _starpu_spin_lock(&tag->lock); unsigned nsuccs = tag->tag_successors.nsuccs; unsigned succ; for (succ = 0; succ < nsuccs; succ++) { struct _starpu_cg *cg = tag->tag_successors.succ[succ]; unsigned ntags = STARPU_ATOMIC_ADD(&cg->ntags, -1); unsigned STARPU_ATTRIBUTE_UNUSED remaining = STARPU_ATOMIC_ADD(&cg->remaining, -1); if (!ntags && (cg->cg_type == STARPU_CG_TAG)) { /* Last tag this cg depends on, cg becomes unreferenced */ #ifdef STARPU_DEBUG free(cg->deps); free(cg->done); #endif free(cg); } } #ifdef STARPU_DYNAMIC_DEPS_SIZE free(tag->tag_successors.succ); #endif #ifdef STARPU_DEBUG free(tag->tag_successors.deps); free(tag->tag_successors.done); #endif _starpu_spin_unlock(&tag->lock); _starpu_spin_destroy(&tag->lock); free(tag); } } /* * Statically initializing tag_global_rwlock seems to lead to weird errors * on Darwin, so we do it dynamically. */ void _starpu_init_tags(void) { STARPU_PTHREAD_RWLOCK_INIT(&tag_global_rwlock, NULL); } void starpu_tag_remove(starpu_tag_t id) { struct _starpu_tag_table *entry; STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); STARPU_AYU_REMOVETASK(id + STARPU_AYUDAME_OFFSET); STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); HASH_FIND_UINT64_T(tag_htbl, &id, entry); if (entry) HASH_DEL(tag_htbl, entry); STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); if (entry) { _starpu_tag_free(entry->tag); free(entry); } } void starpu_tag_clear(void) { STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); /* XXX: _starpu_tag_free takes the tag spinlocks while we are keeping * the global rwlock. This contradicts the lock order of * starpu_tag_wait_array. Should not be a problem in practice since * starpu_tag_clear is called at shutdown only. */ struct _starpu_tag_table *entry=NULL, *tmp=NULL; HASH_ITER(hh, tag_htbl, entry, tmp) { HASH_DEL(tag_htbl, entry); _starpu_tag_free(entry->tag); free(entry); } STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); } static struct _starpu_tag *_gettag_struct(starpu_tag_t id) { /* search if the tag is already declared or not */ struct _starpu_tag_table *entry; struct _starpu_tag *tag; HASH_FIND_UINT64_T(tag_htbl, &id, entry); if (entry != NULL) tag = entry->tag; else { /* the tag does not exist yet : create an entry */ tag = _starpu_tag_init(id); struct _starpu_tag_table *entry2; _STARPU_MALLOC(entry2, sizeof(*entry2)); entry2->id = id; entry2->tag = tag; HASH_ADD_UINT64_T(tag_htbl, id, entry2); STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); STARPU_AYU_ADDTASK(id + STARPU_AYUDAME_OFFSET, NULL); } return tag; } static struct _starpu_tag *gettag_struct(starpu_tag_t id) { struct _starpu_tag *tag; STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); tag = _gettag_struct(id); STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); return tag; } /* lock should be taken, and this releases it */ void _starpu_tag_set_ready(struct _starpu_tag *tag) { /* mark this tag as ready to run */ tag->state = STARPU_READY; /* declare it to the scheduler ! */ struct _starpu_job *j = tag->job; STARPU_ASSERT(!STARPU_AYU_EVENT || tag->id < STARPU_AYUDAME_OFFSET); STARPU_AYU_PRERUNTASK(tag->id + STARPU_AYUDAME_OFFSET, -1); STARPU_AYU_POSTRUNTASK(tag->id + STARPU_AYUDAME_OFFSET); /* In case the task job is going to be scheduled immediately, and if * the task is "empty", calling _starpu_push_task would directly try to enforce * the dependencies of the task, and therefore it would try to grab the * lock again, resulting in a deadlock. */ _starpu_spin_unlock(&tag->lock); /* enforce data dependencies */ STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); _starpu_enforce_deps_starting_from_task(j); } /* the lock of the tag must already be taken ! */ static void _starpu_tag_add_succ(struct _starpu_tag *tag, struct _starpu_cg *cg) { STARPU_ASSERT(tag); _starpu_add_successor_to_cg_list(&tag->tag_successors, cg); if (tag->state == STARPU_DONE) { /* the tag was already completed sooner */ _starpu_notify_cg(tag, cg); } } void _starpu_notify_tag_dependencies(struct _starpu_tag *tag) { _starpu_spin_lock(&tag->lock); if (tag->state == STARPU_DONE) { _starpu_spin_unlock(&tag->lock); return; } tag->state = STARPU_DONE; _STARPU_TRACE_TAG_DONE(tag); _starpu_notify_cg_list(tag, &tag->tag_successors); _starpu_spin_unlock(&tag->lock); } /* Called when a job has just started, so we can notify tasks which were waiting * only for this one when they can expect to start */ void _starpu_notify_job_start_tag_dependencies(struct _starpu_tag *tag, _starpu_notify_job_start_data *data) { _starpu_notify_job_start_cg_list(tag, &tag->tag_successors, data); } void starpu_tag_restart(starpu_tag_t id) { struct _starpu_tag *tag = gettag_struct(id); _starpu_spin_lock(&tag->lock); STARPU_ASSERT_MSG(tag->state == STARPU_DONE || tag->state == STARPU_INVALID_STATE || tag->state == STARPU_ASSOCIATED || tag->state == STARPU_BLOCKED, "Only completed tags can be restarted (%llu was %d)", (unsigned long long) id, tag->state); tag->state = STARPU_BLOCKED; _starpu_spin_unlock(&tag->lock); } void starpu_tag_notify_from_apps(starpu_tag_t id) { struct _starpu_tag *tag = gettag_struct(id); _starpu_notify_tag_dependencies(tag); } void _starpu_notify_restart_tag_dependencies(struct _starpu_tag *tag) { _starpu_spin_lock(&tag->lock); if (tag->state == STARPU_DONE) { tag->state = STARPU_BLOCKED; _starpu_spin_unlock(&tag->lock); return; } _STARPU_TRACE_TAG_DONE(tag); tag->state = STARPU_BLOCKED; _starpu_notify_cg_list(tag, &tag->tag_successors); _starpu_spin_unlock(&tag->lock); } void starpu_tag_notify_restart_from_apps(starpu_tag_t id) { struct _starpu_tag *tag = gettag_struct(id); _starpu_notify_restart_tag_dependencies(tag); } void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job) { _STARPU_TRACE_TAG(id, job); job->task->use_tag = 1; struct _starpu_tag *tag= gettag_struct(id); _starpu_spin_lock(&tag->lock); /* Note: a tag can be shared by several tasks, when it is used to * detect when either of them are finished. We however don't allow * several tasks to share a tag when it is used to wake them by * dependency */ if (tag->job != job) tag->is_assigned++; tag->job = job; job->tag = tag; /* the tag is now associated to a job */ /* When the same tag may be signaled several times by different tasks, * and it's already done, we should not reset the "done" state. * When the tag is simply used by the same task several times, we have * to do so. */ if (job->task->regenerate || job->submitted == 2 || tag->state != STARPU_DONE) tag->state = STARPU_ASSOCIATED; STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); STARPU_AYU_ADDDEPENDENCY(id+STARPU_AYUDAME_OFFSET, 0, job->job_id); STARPU_AYU_ADDDEPENDENCY(job->job_id, 0, id+STARPU_AYUDAME_OFFSET); _starpu_spin_unlock(&tag->lock); } void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t *array) { if (!ndeps) return; unsigned i; /* create the associated completion group */ struct _starpu_tag *tag_child = gettag_struct(id); _starpu_spin_lock(&tag_child->lock); struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child); _starpu_spin_unlock(&tag_child->lock); #ifdef STARPU_DEBUG _STARPU_MALLOC(cg->deps, ndeps * sizeof(cg->deps[0])); _STARPU_MALLOC(cg->done, ndeps * sizeof(cg->done[0])); #endif for (i = 0; i < ndeps; i++) { starpu_tag_t dep_id = array[i]; #ifdef STARPU_DEBUG cg->deps[i] = (void*) (uintptr_t) dep_id; cg->done[i] = 0; #endif /* id depends on dep_id * so cg should be among dep_id's successors*/ _STARPU_TRACE_TAG_DEPS(id, dep_id); _starpu_bound_tag_dep(id, dep_id); struct _starpu_tag *tag_dep = gettag_struct(dep_id); STARPU_ASSERT(tag_dep != tag_child); _starpu_spin_lock(&tag_dep->lock); _starpu_tag_add_succ(tag_dep, cg); STARPU_ASSERT(!STARPU_AYU_EVENT || dep_id < STARPU_AYUDAME_OFFSET); STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); STARPU_AYU_ADDDEPENDENCY(dep_id+STARPU_AYUDAME_OFFSET, 0, id+STARPU_AYUDAME_OFFSET); _starpu_spin_unlock(&tag_dep->lock); } } void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...) { if (!ndeps) return; unsigned i; /* create the associated completion group */ struct _starpu_tag *tag_child = gettag_struct(id); _starpu_spin_lock(&tag_child->lock); struct _starpu_cg *cg = create_cg_tag(ndeps, tag_child); _starpu_spin_unlock(&tag_child->lock); va_list pa; va_start(pa, ndeps); for (i = 0; i < ndeps; i++) { starpu_tag_t dep_id; dep_id = va_arg(pa, starpu_tag_t); /* id depends on dep_id * so cg should be among dep_id's successors*/ _STARPU_TRACE_TAG_DEPS(id, dep_id); _starpu_bound_tag_dep(id, dep_id); struct _starpu_tag *tag_dep = gettag_struct(dep_id); STARPU_ASSERT(tag_dep != tag_child); _starpu_spin_lock(&tag_dep->lock); _starpu_tag_add_succ(tag_dep, cg); STARPU_ASSERT(!STARPU_AYU_EVENT || dep_id < STARPU_AYUDAME_OFFSET); STARPU_ASSERT(!STARPU_AYU_EVENT || id < STARPU_AYUDAME_OFFSET); STARPU_AYU_ADDDEPENDENCY(dep_id+STARPU_AYUDAME_OFFSET, 0, id+STARPU_AYUDAME_OFFSET); _starpu_spin_unlock(&tag_dep->lock); } va_end(pa); } /* this function may be called by the application (outside callbacks !) */ int starpu_tag_wait_array(unsigned ntags, starpu_tag_t *id) { unsigned i; unsigned current; struct _starpu_tag *tag_array[ntags]; _STARPU_LOG_IN(); /* It is forbidden to block within callbacks or codelets */ STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_tag_wait must not be called from a task or callback"); starpu_do_schedule(); STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); /* only wait the tags that are not done yet */ for (i = 0, current = 0; i < ntags; i++) { struct _starpu_tag *tag = _gettag_struct(id[i]); _starpu_spin_lock(&tag->lock); if (tag->state == STARPU_DONE) { /* that tag is done already */ _starpu_spin_unlock(&tag->lock); } else { tag_array[current] = tag; current++; } } STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); if (current == 0) { /* all deps are already fulfilled */ _STARPU_LOG_OUT_TAG("all deps are already fulfilled"); return 0; } /* there is at least one task that is not finished */ struct _starpu_cg *cg = create_cg_apps(current); for (i = 0; i < current; i++) { _starpu_tag_add_succ(tag_array[i], cg); _starpu_spin_unlock(&tag_array[i]->lock); } STARPU_PTHREAD_MUTEX_LOCK(&cg->succ.succ_apps.cg_mutex); while (!cg->succ.succ_apps.completed) STARPU_PTHREAD_COND_WAIT(&cg->succ.succ_apps.cg_cond, &cg->succ.succ_apps.cg_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&cg->succ.succ_apps.cg_mutex); STARPU_PTHREAD_MUTEX_DESTROY(&cg->succ.succ_apps.cg_mutex); STARPU_PTHREAD_COND_DESTROY(&cg->succ.succ_apps.cg_cond); free(cg); _STARPU_LOG_OUT(); return 0; } int starpu_tag_wait(starpu_tag_t id) { return starpu_tag_wait_array(1, &id); } struct starpu_task *starpu_tag_get_task(starpu_tag_t id) { struct _starpu_tag_table *entry; struct _starpu_tag *tag; STARPU_PTHREAD_RWLOCK_WRLOCK(&tag_global_rwlock); HASH_FIND_UINT64_T(tag_htbl, &id, entry); STARPU_PTHREAD_RWLOCK_UNLOCK(&tag_global_rwlock); if (!entry) return NULL; tag = entry->tag; if (!tag->job) return NULL; return tag->job->task; } starpu-1.4.9+dfsg/src/core/dependencies/tags.h000066400000000000000000000044541507764646700212730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __TAGS_H__ #define __TAGS_H__ /** @file */ #include #include #include #include #pragma GCC visibility push(hidden) #define _STARPU_TAG_SIZE (sizeof(starpu_tag_t)*8) enum _starpu_tag_state { /** this tag is not declared by any task */ STARPU_INVALID_STATE, /** _starpu_tag_declare was called to associate the tag to a task */ STARPU_ASSOCIATED, /** some task dependencies are not fulfilled yet */ STARPU_BLOCKED, /** the task can be (or has been) submitted to the scheduler (all deps fulfilled) */ STARPU_READY, // useless ... // /** the task has been submitted to the scheduler */ // STARPU_SCHEDULED, /** the task has been performed */ STARPU_DONE }; struct _starpu_job; struct _starpu_tag { /** Lock for this structure. Locking order is in dependency order: a tag * must not be locked before locking a tag it depends on */ struct _starpu_spinlock lock; /** an identifier for the task */ starpu_tag_t id; enum _starpu_tag_state state; struct _starpu_cg_list tag_successors; /** which job is associated to the tag if any ? */ struct _starpu_job *job; unsigned is_assigned; unsigned is_submitted; }; void _starpu_init_tags(void); void _starpu_notify_tag_dependencies(struct _starpu_tag *tag); void _starpu_notify_job_start_tag_dependencies(struct _starpu_tag *tag, _starpu_notify_job_start_data *data); void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job); /** lock should be taken, and this releases it */ void _starpu_tag_set_ready(struct _starpu_tag *tag); #pragma GCC visibility pop #endif // __TAGS_H__ starpu-1.4.9+dfsg/src/core/dependencies/task_deps.c000066400000000000000000000175361507764646700223120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include static struct _starpu_cg *create_cg_task(unsigned ntags, struct _starpu_job *j) { struct _starpu_cg *cg; _STARPU_MALLOC(cg, sizeof(struct _starpu_cg)); cg->ntags = ntags; cg->remaining = ntags; #ifdef STARPU_DEBUG cg->ndeps = ntags; cg->deps = NULL; cg->done = NULL; #endif cg->cg_type = STARPU_CG_TASK; cg->succ.job = j; j->job_successors.ndeps++; #ifdef STARPU_DEBUG _STARPU_REALLOC(j->job_successors.deps, j->job_successors.ndeps * sizeof(j->job_successors.deps[0])); _STARPU_REALLOC(j->job_successors.done, j->job_successors.ndeps * sizeof(j->job_successors.done[0])); j->job_successors.deps[j->job_successors.ndeps-1] = cg; j->job_successors.done[j->job_successors.ndeps-1] = 0; #endif return cg; } static void _starpu_task_add_succ(struct _starpu_job *j, struct _starpu_cg *cg) { STARPU_ASSERT(j); if (_starpu_add_successor_to_cg_list(&j->job_successors, cg)) /* the task was already completed sooner */ _starpu_notify_cg(j, cg); } void _starpu_notify_task_dependencies(struct _starpu_job *j) { _starpu_notify_cg_list(j, &j->job_successors); } /* Called when a job has just started, so we can notify tasks which were waiting * only for this one when they can expect to start */ void _starpu_notify_job_start_tasks(struct _starpu_job *j, _starpu_notify_job_start_data *data) { _starpu_notify_job_start_cg_list(j, &j->job_successors, data); } /* task depends on the tasks in task array */ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[], int check) { if (ndeps == 0) return; struct _starpu_job *job; job = _starpu_get_job_associated_to_task(task); STARPU_PTHREAD_MUTEX_LOCK(&job->sync_mutex); if (check) { int ret = !job->submitted || !task->destroy || task->detach; #ifdef STARPU_OPENMP ret = ret || job->continuation; #endif STARPU_ASSERT_MSG(ret, "Task dependencies have to be set before submission (submitted %u destroy %u detach %u)", job->submitted, task->destroy, task->detach); } else STARPU_ASSERT_MSG(job->terminated <= 1, "Task dependencies have to be set before termination (terminated %u)", job->terminated); struct _starpu_cg *cg = create_cg_task(ndeps, job); STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); #ifdef STARPU_DEBUG _STARPU_MALLOC(cg->deps, ndeps * sizeof(cg->deps[0])); _STARPU_MALLOC(cg->done, ndeps * sizeof(cg->done[0])); #endif unsigned i; for (i = 0; i < ndeps; i++) { struct starpu_task *dep_task = task_array[i]; struct _starpu_job *dep_job; struct _starpu_cg *back_cg = NULL; dep_job = _starpu_get_job_associated_to_task(dep_task); STARPU_ASSERT_MSG(dep_task != task, "A task cannot be made to depend on itself"); #ifdef STARPU_DEBUG cg->deps[i] = dep_job; cg->done[i] = 0; #endif STARPU_ASSERT_MSG(dep_job != job, "A task must not depend on itself."); STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex); if (check) { STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || !dep_job->task->detach || starpu_task_get_current() == dep_task, "Unless it is not to be destroyed automatically, task dependencies have to be set before submission"); STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission"); STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission"); } else STARPU_ASSERT_MSG(dep_job->terminated <= 1, "Task dependencies have to be set before termination (terminated %u)", dep_job->terminated); if (dep_job->task->regenerate) { /* Make sure we don't regenerate the dependency before this task is finished */ back_cg = create_cg_task(1, dep_job); /* Just do not take that dependency into account for the first submission */ dep_job->job_successors.ndeps_completed++; } STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex); _STARPU_TRACE_TASK_DEPS(dep_job, job); _starpu_bound_task_dep(job, dep_job); if (check) { STARPU_AYU_ADDDEPENDENCY(dep_job->job_id, 0, job->job_id); } if (_starpu_graph_record) _starpu_graph_add_job_dep(job, dep_job); _starpu_task_add_succ(dep_job, cg); if (dep_job->task->regenerate) _starpu_task_add_succ(job, back_cg); } } void starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]) { _starpu_task_declare_deps_array(task, ndeps, task_array, 1); } void starpu_task_declare_deps(struct starpu_task *task, unsigned ndeps, ...) { if (ndeps == 0) return; struct starpu_task *tasks[ndeps]; unsigned i; va_list pa; va_start(pa, ndeps); for (i = 0; i < ndeps; i++) { tasks[i] = va_arg(pa, struct starpu_task *); } va_end(pa); starpu_task_declare_deps_array(task, ndeps, tasks); } void starpu_task_declare_end_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]) { unsigned i; starpu_task_end_dep_add(task, ndeps); for (i = 0; i < ndeps; i++) { struct starpu_task *dep_task = task_array[i]; struct _starpu_job *dep_job = _starpu_get_job_associated_to_task(dep_task); int done = 0; STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || !dep_job->task->detach || starpu_task_get_current() == dep_task, "Unless it is not to be destroyed automatically, task end dependencies have to be set before submission"); STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission"); STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission"); STARPU_ASSERT_MSG(!dep_job->end_rdep, "multiple end dependencies are not supported yet"); STARPU_ASSERT_MSG(!dep_job->task->regenerate, "end dependencies are not supported yet for regenerated tasks"); STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex); dep_job->end_rdep = task; if (dep_job->terminated) /* It's actually already over */ done = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex); if (done) starpu_task_end_dep_release(task); } } void starpu_task_declare_end_deps(struct starpu_task *task, unsigned ndeps, ...) { if (ndeps == 0) return; struct starpu_task *tasks[ndeps]; unsigned i; va_list pa; va_start(pa, ndeps); for (i = 0; i < ndeps; i++) { tasks[i] = va_arg(pa, struct starpu_task *); } va_end(pa); starpu_task_declare_end_deps_array(task, ndeps, tasks); } int starpu_task_get_task_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]) { struct _starpu_job *j = _starpu_get_job_associated_to_task(task); return _starpu_list_task_successors_in_cg_list(&j->job_successors, ndeps, task_array); } int starpu_task_get_task_scheduled_succs(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[]) { struct _starpu_job *j = _starpu_get_job_associated_to_task(task); return _starpu_list_task_scheduled_successors_in_cg_list(&j->job_successors, ndeps, task_array); } starpu-1.4.9+dfsg/src/core/detect_combined_workers.c000066400000000000000000000240511507764646700225610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include int _starpu_initialized_combined_workers; #ifdef STARPU_HAVE_HWLOC #include static void find_workers(hwloc_obj_t obj, int cpu_workers[STARPU_NMAXWORKERS], unsigned *n) { struct _starpu_hwloc_userdata *data = obj->userdata; if (!data->worker_list) /* Not something we run something on, don't care */ return; if (data->worker_list == (void*) -1) { /* Intra node, recurse */ unsigned i; for (i = 0; i < obj->arity; i++) find_workers(obj->children[i], cpu_workers, n); return; } /* Got to a PU leaf */ struct _starpu_worker_list *workers = data->worker_list; struct _starpu_worker *worker; for(worker = _starpu_worker_list_begin(workers); worker != _starpu_worker_list_end(workers); worker = _starpu_worker_list_next(worker)) { /* is it a CPU worker? */ if (worker->perf_arch.devices[0].type == STARPU_CPU_WORKER && worker->perf_arch.devices[0].ncores == 1) { _STARPU_DEBUG("worker %d is part of it\n", worker->workerid); /* Add it to the combined worker */ cpu_workers[(*n)++] = worker->workerid; } } } static void synthesize_intermediate_workers(hwloc_obj_t *children, unsigned min, unsigned max, unsigned arity, unsigned n, unsigned synthesize_arity) { unsigned nworkers, i, j; unsigned chunk_size = (n + synthesize_arity-1) / synthesize_arity; unsigned chunk_start; int cpu_workers[STARPU_NMAXWORKERS]; int ret; if (n <= synthesize_arity) /* Not too many children, do not synthesize */ return; _STARPU_DEBUG("%u children > %u, synthesizing intermediate combined workers of size %u\n", n, synthesize_arity, chunk_size); n = 0; j = 0; nworkers = 0; chunk_start = 0; for (i = 0 ; i < arity; i++) { if (((struct _starpu_hwloc_userdata*)children[i]->userdata)->worker_list) { n++; _STARPU_DEBUG("child %u\n", i); find_workers(children[i], cpu_workers, &nworkers); j++; } /* Completed a chunk, or last bit (but not if it's just 1 subobject) */ if (j == chunk_size || (i == arity-1 && j > 1)) { if (nworkers >= min && nworkers <= max) { unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); _STARPU_DEBUG("Adding it\n"); ret = starpu_combined_worker_assign_workerid(nworkers, cpu_workers); STARPU_ASSERT(ret >= 0); workers->add(workers,ret); } /* Recurse there */ synthesize_intermediate_workers(children+chunk_start, min, max, i - chunk_start, n, synthesize_arity); /* And restart another one */ n = 0; j = 0; nworkers = 0; chunk_start = i+1; } } } static void find_and_assign_combinations(hwloc_obj_t obj, unsigned min, unsigned max, unsigned synthesize_arity) { char name[64]; unsigned i, n, nworkers; int cpu_workers[STARPU_NMAXWORKERS]; #if HWLOC_API_VERSION >= 0x10000 hwloc_obj_attr_snprintf(name, sizeof(name), obj, "#", 0); #else hwloc_obj_snprintf(name, sizeof(name), _starpu_get_machine_config()->topology.hwtopology, obj, "#", 0); #endif _STARPU_DEBUG("Looking at %s\n", name); for (n = 0, i = 0; i < obj->arity; i++) if (((struct _starpu_hwloc_userdata *)obj->children[i]->userdata)->worker_list) /* it has a CPU worker */ n++; if (n == 1) { /* If there is only one child, we go to the next level right away */ find_and_assign_combinations(obj->children[0], min, max, synthesize_arity); return; } /* Add this object */ nworkers = 0; find_workers(obj, cpu_workers, &nworkers); if (nworkers >= min && nworkers <= max) { _STARPU_DEBUG("Adding it\n"); unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); int newworkerid = starpu_combined_worker_assign_workerid(nworkers, cpu_workers); STARPU_ASSERT(newworkerid >= 0); workers->add(workers,newworkerid); } /* Add artificial intermediate objects recursively */ synthesize_intermediate_workers(obj->children, min, max, obj->arity, n, synthesize_arity); /* And recurse */ for (i = 0; i < obj->arity; i++) if (((struct _starpu_hwloc_userdata*) obj->children[i]->userdata)->worker_list == (void*) -1) find_and_assign_combinations(obj->children[i], min, max, synthesize_arity); } static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers) { struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; int synthesize_arity = starpu_getenv_number("STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER"); int min = starpu_getenv_number("STARPU_MIN_WORKERSIZE"); if (min < 2) min = 2; int max = starpu_getenv_number("STARPU_MAX_WORKERSIZE"); if (max == -1) max = INT_MAX; if (synthesize_arity == -1) synthesize_arity = 2; STARPU_ASSERT_MSG(synthesize_arity > 0, "STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER must be greater than 0"); /* First, mark nodes which contain CPU workers, simply by setting their userdata field */ int i; for (i = 0; i < nworkers; i++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); if (worker->perf_arch.devices[0].type == STARPU_CPU_WORKER && worker->perf_arch.devices[0].ncores == 1) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid); obj = obj->parent; while (obj) { ((struct _starpu_hwloc_userdata*) obj->userdata)->worker_list = (void*) -1; obj = obj->parent; } } } find_and_assign_combinations(hwloc_get_root_obj(topology->hwtopology), min, max, synthesize_arity); } #else /* STARPU_HAVE_HWLOC */ static void assign_combinations_without_hwloc(struct starpu_worker_collection* worker_collection, int* workers, unsigned n, int min, int max) { int size,i; //if the maximum number of worker is already reached if(worker_collection->nworkers >= STARPU_NMAXWORKERS - 1) return; for (size = min; size <= max; size *= 2) { unsigned first; for (first = 0; first < n; first += size) { if (first + size <= n) { int found_workerids[size]; for (i = 0; i < size; i++) found_workerids[i] = workers[first + i]; /* We register this combination */ int newworkerid; newworkerid = starpu_combined_worker_assign_workerid(size, found_workerids); STARPU_ASSERT(newworkerid >= 0); worker_collection->add(worker_collection, newworkerid); //if the maximum number of worker is reached, then return if(worker_collection->nworkers >= STARPU_NMAXWORKERS - 1) return; } } } } static void find_and_assign_combinations_without_hwloc(int *workerids, int nworkers) { int i; unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; int min, max; struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); /* We put the id of all CPU workers in this array */ int cpu_workers[STARPU_NMAXWORKERS]; unsigned ncpus = 0; for (i = 0; i < nworkers; i++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); if (worker->arch == STARPU_CPU_WORKER) cpu_workers[ncpus++] = i; } min = starpu_getenv_number("STARPU_MIN_WORKERSIZE"); if (min < 2) min = 2; max = starpu_getenv_number("STARPU_MAX_WORKERSIZE"); if (max == -1 || max > (int) ncpus) max = ncpus; assign_combinations_without_hwloc(workers,cpu_workers,ncpus,min,max); } #endif /* STARPU_HAVE_HWLOC */ static void combine_all_cpu_workers(int *workerids, int nworkers) { unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); int cpu_workers[STARPU_NMAXWORKERS]; int ncpus = 0; int i; int min; int max; for (i = 0; i < nworkers; i++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); if (worker->arch == STARPU_CPU_WORKER) cpu_workers[ncpus++] = workerids[i]; } min = starpu_getenv_number("STARPU_MIN_WORKERSIZE"); if (min < 1) min = 1; max = starpu_getenv_number("STARPU_MAX_WORKERSIZE"); if (max == -1 || max > ncpus) max = ncpus; for (i = min; i <= max; i++) { int newworkerid = starpu_combined_worker_assign_workerid(i, cpu_workers); STARPU_ASSERT(newworkerid >= 0); workers->add(workers, newworkerid); } } void _starpu_sched_find_worker_combinations(int *workerids, int nworkers) { /* FIXME: this seems to be lacking shutdown support? */ if (_starpu_initialized_combined_workers) return; _starpu_initialized_combined_workers = 1; struct _starpu_machine_config *config = _starpu_get_machine_config(); if (config->conf.single_combined_worker > 0) combine_all_cpu_workers(workerids, nworkers); else { #ifdef STARPU_HAVE_HWLOC find_and_assign_combinations_with_hwloc(workerids, nworkers); #else find_and_assign_combinations_without_hwloc(workerids, nworkers); #endif } } void starpu_sched_find_all_worker_combinations(void) { const unsigned nbasic_workers = starpu_worker_get_count(); int basic_workerids[nbasic_workers]; unsigned i; for(i = 0; i < nbasic_workers; i++) { basic_workerids[i] = i; } _starpu_sched_find_worker_combinations(basic_workerids, nbasic_workers); } starpu-1.4.9+dfsg/src/core/detect_combined_workers.h000066400000000000000000000016641507764646700225730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #pragma GCC visibility push(hidden) /** @file */ /** Initialize combined workers */ void _starpu_sched_find_worker_combinations(int *workerids, int nworkers); extern int _starpu_initialized_combined_workers; #pragma GCC visibility pop starpu-1.4.9+dfsg/src/core/devices.c000066400000000000000000000054611507764646700173230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include struct _starpu_device_entry { UT_hash_handle hh; unsigned deviceid; }; static struct _starpu_device_entry *gpu_devices_already_used; void _starpu_devices_gpu_set_used(int devid) { struct _starpu_device_entry *entry; HASH_FIND_INT(gpu_devices_already_used, &devid, entry); if (!entry) { _STARPU_MALLOC(entry, sizeof(*entry)); entry->deviceid = devid; HASH_ADD_INT(gpu_devices_already_used, deviceid, entry); } } void _starpu_devices_gpu_clear(struct _starpu_machine_config *config, enum starpu_worker_archtype type) { struct _starpu_machine_topology *topology = &config->topology; unsigned tmp[STARPU_NMAXWORKERS]; unsigned nb=0; int i; for(i=0 ; iworkers_devid[type][i]; HASH_FIND_INT(gpu_devices_already_used, &devid, entry); if (entry == NULL) { tmp[nb] = devid; nb++; } } for (i=nb ; iworkers_devid[type], tmp, sizeof(unsigned)*STARPU_NMAXWORKERS); } void _starpu_devices_drop_duplicate(unsigned ids[STARPU_NMAXWORKERS]) { struct _starpu_device_entry *devices_already_used = NULL; unsigned tmp[STARPU_NMAXWORKERS]; unsigned nb=0; int i; for(i=0 ; ideviceid = devid; HASH_ADD_INT(devices_already_used, deviceid, entry2); tmp[nb] = devid; nb ++; } } struct _starpu_device_entry *entry=NULL, *tempo=NULL; HASH_ITER(hh, devices_already_used, entry, tempo) { HASH_DEL(devices_already_used, entry); free(entry); } for (i=nb ; i #include #include #pragma GCC visibility push(hidden) /** Drop duplicate values from \p ids. */ void _starpu_devices_drop_duplicate(unsigned ids[STARPU_NMAXWORKERS]); /** Set gpu \p devid as already used. */ void _starpu_devices_gpu_set_used(int devid); /** Drop from the topology information the gpus which are already used. */ void _starpu_devices_gpu_clear(struct _starpu_machine_config *config, enum starpu_worker_archtype type); /** Clean the list of gpus which are already used. */ void _starpu_devices_gpu_clean(); #pragma GCC visibility pop #endif // __DEVICES_H__ starpu-1.4.9+dfsg/src/core/disk.c000066400000000000000000000410751507764646700166340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct disk_register { void *base; struct starpu_disk_ops *functions; /* disk condition (1 = all authorizations, */ int flag; }; static int add_disk_in_list(unsigned node, struct starpu_disk_ops *func, void *base); static struct disk_register *disk_register_list[STARPU_MAXNODES]; static int disk_number = 0; int starpu_disk_swap_node = -1; static void add_async_event(struct _starpu_async_channel * channel, void * event) { if (!event) return; struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&channel->event); if (disk_event->requests == NULL) { disk_event->requests = _starpu_disk_backend_event_list_new(); } struct _starpu_disk_backend_event * backend_event = _starpu_disk_backend_event_new(); backend_event->backend_event = event; /* Store event at the end of the list */ _starpu_disk_backend_event_list_push_back(disk_event->requests, backend_event); } int starpu_disk_register(struct starpu_disk_ops *func, void *parameter, starpu_ssize_t size) { STARPU_ASSERT_MSG(size < 0 || size >= STARPU_DISK_SIZE_MIN, "Minimum disk size is %d Bytes ! (Here %d) \n", (int) STARPU_DISK_SIZE_MIN, (int) size); /* register disk */ unsigned disk_memnode = _starpu_memory_node_register(STARPU_DISK_RAM, 0); /* Connect the disk memory node to all numa memory nodes */ int nb_numa_nodes = starpu_memory_nodes_get_numa_count(); int numa_node; for (numa_node = 0; numa_node < nb_numa_nodes; numa_node++) { _starpu_register_bus(disk_memnode, numa_node); _starpu_register_bus(numa_node, disk_memnode); } /* Any worker can manage disk memnode */ struct _starpu_machine_config *config = _starpu_get_machine_config(); unsigned worker; for (worker = 0; worker < starpu_worker_get_count(); worker++) { /* But prefer to use only CPU workers if possible */ if (starpu_worker_get_type(worker) == STARPU_CPU_WORKER) { struct _starpu_worker *workerarg = &config->workers[worker]; _starpu_memory_node_add_nworkers(disk_memnode); _starpu_worker_drives_memory_node(workerarg, disk_memnode); } } if (!_starpu_memory_node_get_nworkers(disk_memnode)) { /* Bleh, no CPU worker to drive the disk, use non-CPU workers too */ for (worker = 0; worker < starpu_worker_get_count(); worker++) { if (starpu_worker_get_type(worker) != STARPU_CPU_WORKER) { struct _starpu_worker *workerarg = &config->workers[worker]; _starpu_memory_node_add_nworkers(disk_memnode); _starpu_worker_drives_memory_node(workerarg, disk_memnode); } } } //Add bus for disk <-> disk copy if (func->copy != NULL) { int disk; for (disk = 0; disk < STARPU_MAXNODES; disk++) if (disk_register_list[disk] != NULL && disk_register_list[disk]->functions->copy != NULL && disk_register_list[disk]->functions->copy == func->copy) { _starpu_register_bus(disk_memnode, disk); _starpu_register_bus(disk, disk_memnode); } } /* connect disk */ void *base = func->plug(parameter, size); /* remember it */ int n STARPU_ATTRIBUTE_UNUSED = add_disk_in_list(disk_memnode, func, base); #ifdef STARPU_SIMGRID char name[16]; snprintf(name, sizeof(name), "DISK%d", n); starpu_sg_host_t host = _starpu_simgrid_get_host_by_name(name); STARPU_ASSERT_MSG(host, "Could not find disk %s in platform file", name); _starpu_simgrid_memory_node_set_host(disk_memnode, host); #endif int ret = func->bandwidth(disk_memnode, base); /* have a problem with the disk */ if (ret == 0) return -ENOENT; if (size >= 0) _starpu_memory_manager_set_global_memory_size(disk_memnode, size); _starpu_mem_chunk_disk_register(disk_memnode); return disk_memnode; } void _starpu_disk_unregister(void) { int i; /* search disk and delete it */ for (i = 0; i < STARPU_MAXNODES; ++i) { if (disk_register_list[i] == NULL) continue; _starpu_set_disk_flag(i, STARPU_DISK_NO_RECLAIM); _starpu_free_all_automatically_allocated_buffers(i); /* don't forget to unplug */ disk_register_list[i]->functions->unplug(disk_register_list[i]->base); free(disk_register_list[i]); disk_register_list[i] = NULL; disk_number--; } /* no disk in the list -> delete the list */ STARPU_ASSERT_MSG(disk_number == 0, "Some disks are not unregistered !"); } /* interface between user and disk memory */ void *_starpu_disk_alloc(unsigned node, size_t size) { return disk_register_list[node]->functions->alloc(disk_register_list[node]->base, size); } void _starpu_disk_free(unsigned node, void *obj, size_t size) { disk_register_list[node]->functions->free(disk_register_list[node]->base, obj, size); } /* src_node == disk node and dst_node == STARPU_MAIN_RAM */ int _starpu_disk_read(unsigned src_node, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel *channel) { void *event = NULL; if (channel != NULL) { if (disk_register_list[src_node]->functions->async_read == NULL) channel = NULL; else { double start; _starpu_disk_get_event(&channel->event)->memory_node = src_node; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); event = disk_register_list[src_node]->functions->async_read(disk_register_list[src_node]->base, obj, buf, offset, size); starpu_interface_end_driver_copy_async(src_node, dst_node, start); add_async_event(channel, event); } } /* asynchronous request failed or synchronous request is asked */ if (channel == NULL || !event) { disk_register_list[src_node]->functions->read(disk_register_list[src_node]->base, obj, buf, offset, size); return 0; } return -EAGAIN; } /* src_node == STARPU_MAIN_RAM and dst_node == disk node */ int _starpu_disk_write(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel *channel) { void *event = NULL; if (channel != NULL) { if (disk_register_list[dst_node]->functions->async_write == NULL) channel = NULL; else { double start; _starpu_disk_get_event(&channel->event)->memory_node = dst_node; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); event = disk_register_list[dst_node]->functions->async_write(disk_register_list[dst_node]->base, obj, buf, offset, size); starpu_interface_end_driver_copy_async(src_node, dst_node, start); add_async_event(channel, event); } } /* asynchronous request failed or synchronous request is asked */ if (channel == NULL || !event) { disk_register_list[dst_node]->functions->write(disk_register_list[dst_node]->base, obj, buf, offset, size); return 0; } return -EAGAIN; } int _starpu_disk_copy(unsigned node_src, void *obj_src, off_t offset_src, unsigned node_dst, void *obj_dst, off_t offset_dst, size_t size, struct _starpu_async_channel *channel) { /* both nodes have same copy function */ void * event = NULL; if (channel) { _starpu_disk_get_event(&channel->event)->memory_node = node_src; event = disk_register_list[node_src]->functions->copy(disk_register_list[node_src]->base, obj_src, offset_src, disk_register_list[node_dst]->base, obj_dst, offset_dst, size); add_async_event(channel, event); } /* Something goes wrong with copy disk to disk... */ if (!event) { if (channel || starpu_asynchronous_copy_disabled()) disk_register_list[node_src]->functions->copy = NULL; /* perform a read, and after a write... */ void * ptr; int ret = _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, &ptr, size, 0); STARPU_ASSERT_MSG(ret == 0, "Cannot allocate %zu bytes to perform disk to disk operation", size); ret = _starpu_disk_read(node_src, STARPU_MAIN_RAM, obj_src, ptr, offset_src, size, NULL); STARPU_ASSERT_MSG(ret == 0, "Cannot read %zu bytes to perform disk to disk copy", size); ret = _starpu_disk_write(STARPU_MAIN_RAM, node_dst, obj_dst, ptr, offset_dst, size, NULL); STARPU_ASSERT_MSG(ret == 0, "Cannot write %zu bytes to perform disk to disk copy", size); _starpu_free_flags_on_node(STARPU_MAIN_RAM, ptr, size, 0); return 0; } STARPU_ASSERT(event); return -EAGAIN; } int _starpu_disk_full_read(unsigned src_node, unsigned dst_node, void *obj, void **ptr, size_t *size, struct _starpu_async_channel *channel) { void *event = NULL; if (channel != NULL) { if (disk_register_list[src_node]->functions->async_full_read == NULL) channel = NULL; else { double start; _starpu_disk_get_event(&channel->event)->memory_node = src_node; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); event = disk_register_list[src_node]->functions->async_full_read(disk_register_list[src_node]->base, obj, ptr, size, dst_node); starpu_interface_end_driver_copy_async(src_node, dst_node, start); add_async_event(channel, event); } } /* asynchronous request failed or synchronous request is asked */ if (channel == NULL || !event) { disk_register_list[src_node]->functions->full_read(disk_register_list[src_node]->base, obj, ptr, size, dst_node); return 0; } return -EAGAIN; } int _starpu_disk_full_write(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, void *obj, void *ptr, size_t size, struct _starpu_async_channel *channel) { void *event = NULL; if (channel != NULL) { if (disk_register_list[dst_node]->functions->async_full_write == NULL) channel = NULL; else { double start; _starpu_disk_get_event(&channel->event)->memory_node = dst_node; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); event = disk_register_list[dst_node]->functions->async_full_write(disk_register_list[dst_node]->base, obj, ptr, size); starpu_interface_end_driver_copy_async(src_node, dst_node, start); add_async_event(channel, event); } } /* asynchronous request failed or synchronous request is asked */ if (channel == NULL || !event) { disk_register_list[dst_node]->functions->full_write(disk_register_list[dst_node]->base, obj, ptr, size); return 0; } return -EAGAIN; } void *starpu_disk_open(unsigned node, void *pos, size_t size) { return disk_register_list[node]->functions->open(disk_register_list[node]->base, pos, size); } void starpu_disk_close(unsigned node, void *obj, size_t size) { disk_register_list[node]->functions->close(disk_register_list[node]->base, obj, size); } void starpu_disk_wait_request(struct _starpu_async_channel *async_channel) { struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); unsigned node = disk_event->memory_node; if (disk_event->requests != NULL && !_starpu_disk_backend_event_list_empty(disk_event->requests)) { struct _starpu_disk_backend_event * event = _starpu_disk_backend_event_list_begin(disk_event->requests); struct _starpu_disk_backend_event * next; /* Wait all events in the list and remove them */ while (event != _starpu_disk_backend_event_list_end(disk_event->requests)) { next = _starpu_disk_backend_event_list_next(event); disk_register_list[node]->functions->wait_request(event->backend_event); disk_register_list[node]->functions->free_request(event->backend_event); _starpu_disk_backend_event_list_erase(disk_event->requests, event); _starpu_disk_backend_event_delete(event); event = next; } /* Remove the list because it doesn't contain any event */ _starpu_disk_backend_event_list_delete(disk_event->requests); disk_event->requests = NULL; } } int starpu_disk_test_request(struct _starpu_async_channel *async_channel) { struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); unsigned node = disk_event->memory_node; if (disk_event->requests != NULL && !_starpu_disk_backend_event_list_empty(disk_event->requests)) { struct _starpu_disk_backend_event * event = _starpu_disk_backend_event_list_begin(disk_event->requests); struct _starpu_disk_backend_event * next; /* Wait all events in the list and remove them */ while (event != _starpu_disk_backend_event_list_end(disk_event->requests)) { next = _starpu_disk_backend_event_list_next(event); int res = disk_register_list[node]->functions->test_request(event->backend_event); if (res) { disk_register_list[node]->functions->free_request(event->backend_event); _starpu_disk_backend_event_list_erase(disk_event->requests, event); _starpu_disk_backend_event_delete(event); } event = next; } /* Remove the list because it doesn't contain any event */ if (_starpu_disk_backend_event_list_empty(disk_event->requests)) { _starpu_disk_backend_event_list_delete(disk_event->requests); disk_event->requests = NULL; } } return disk_event->requests == NULL; } void starpu_disk_free_request(struct _starpu_async_channel *async_channe STARPU_ATTRIBUTE_UNUSED) { /* It does not have any sense to use this function currently because requests are freed in test of wait functions */ STARPU_ABORT(); /* struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); int position = get_location_with_node(disk_event->memory_node); if (disk_event->backend_event) disk_register_list[position]->functions->free_request(disk_event->backend_event); */ } static int add_disk_in_list(unsigned node, struct starpu_disk_ops *func, void *base) { int n; struct disk_register *dr; _STARPU_MALLOC(dr, sizeof(struct disk_register)); dr->base = base; dr->flag = STARPU_DISK_ALL; dr->functions = func; n = disk_number++; disk_register_list[node] = dr; return n; } int _starpu_disk_can_copy(unsigned node1, unsigned node2) { STARPU_ASSERT(starpu_node_get_kind(node1) == STARPU_DISK_RAM && starpu_node_get_kind(node2) == STARPU_DISK_RAM); if (disk_register_list[node1]->functions == disk_register_list[node2]->functions) /* they must have a copy function */ if (disk_register_list[node1]->functions->copy != NULL) return 1; return 0; } void _starpu_set_disk_flag(unsigned node, int flag) { disk_register_list[node]->flag = flag; } int _starpu_get_disk_flag(unsigned node) { return disk_register_list[node]->flag; } void _starpu_swap_init(void) { char *backend; char *path; starpu_ssize_t size; struct starpu_disk_ops *ops; path = starpu_getenv("STARPU_DISK_SWAP"); if (!path) return; backend = starpu_getenv("STARPU_DISK_SWAP_BACKEND"); if (!backend) { ops = &starpu_disk_unistd_ops; } else if (!strcmp(backend, "stdio")) { ops = &starpu_disk_stdio_ops; } else if (!strcmp(backend, "unistd")) { ops = &starpu_disk_unistd_ops; } else if (!strcmp(backend, "unistd_o_direct")) { #ifdef STARPU_LINUX_SYS ops = &starpu_disk_unistd_o_direct_ops; #else _STARPU_DISP("Warning: o_direct support is not compiled in, could not enable disk swap\n"); return; #endif } else if (!strcmp(backend, "leveldb")) { #ifdef STARPU_HAVE_LEVELDB ops = &starpu_disk_leveldb_ops; #else _STARPU_DISP("Warning: leveldb support is not compiled in, could not enable disk swap\n"); return; #endif } else if (!strcmp(backend, "hdf5")) { #ifdef STARPU_HAVE_HDF5 ops = &starpu_disk_hdf5_ops; #else _STARPU_DISP("Warning: hdf5 support is not compiled in, could not enable disk swap\n"); return; #endif } else { _STARPU_DISP("Warning: unknown disk swap backend %s, could not enable disk swap\n", backend); return; } size = starpu_getenv_number_default("STARPU_DISK_SWAP_SIZE", -1); starpu_disk_swap_node = starpu_disk_register(ops, path, ((size_t) size) << 20); if (starpu_disk_swap_node < 0) { _STARPU_DISP("Warning: could not enable disk swap %s on %s with size %ld, could not enable disk swap\n", backend, path, (long) size); return; } } starpu-1.4.9+dfsg/src/core/disk.h000066400000000000000000000060651507764646700166410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DISK_H__ #define __DISK_H__ /** @file */ #define STARPU_DISK_ALL 1 #define STARPU_DISK_NO_RECLAIM 2 #ifdef __cplusplus extern "C" { #endif #include #include #pragma GCC visibility push(hidden) /** interface to manipulate memory disk */ void * _starpu_disk_alloc (unsigned node, size_t size) STARPU_ATTRIBUTE_MALLOC; void _starpu_disk_free (unsigned node, void *obj, size_t size); /** src_node is a disk node, dst_node is for the moment the STARPU_MAIN_RAM */ int _starpu_disk_read(unsigned src_node, unsigned dst_node, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel * async_channel); /** src_node is for the moment the STARU_MAIN_RAM, dst_node is a disk node */ int _starpu_disk_write(unsigned src_node, unsigned dst_node, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel * async_channel); int _starpu_disk_full_read(unsigned src_node, unsigned dst_node, void * obj, void ** ptr, size_t * size, struct _starpu_async_channel * async_channel); int _starpu_disk_full_write(unsigned src_node, unsigned dst_node, void * obj, void * ptr, size_t size, struct _starpu_async_channel * async_channel); int _starpu_disk_copy(unsigned node_src, void* obj_src, off_t offset_src, unsigned node_dst, void* obj_dst, off_t offset_dst, size_t size, struct _starpu_async_channel * async_channel); /** force the request to compute */ void starpu_disk_wait_request(struct _starpu_async_channel *async_channel); /** return 1 if the request is finished, 0 if not finished */ int starpu_disk_test_request(struct _starpu_async_channel *async_channel); void starpu_disk_free_request(struct _starpu_async_channel *async_channel); /** interface to compare memory disk */ int _starpu_disk_can_copy(unsigned node1, unsigned node2); /** change disk flag */ void _starpu_set_disk_flag(unsigned node, int flag); int _starpu_get_disk_flag(unsigned node); /** unregister disk */ void _starpu_disk_unregister(void); void _starpu_swap_init(void); static inline struct _starpu_disk_event *_starpu_disk_get_event(union _starpu_async_channel_event *_event) { struct _starpu_disk_event *event; STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); event = (struct _starpu_disk_event *) _event; return event; } #ifdef __cplusplus } #endif #pragma GCC visibility pop #endif /* __DISK_H__ */ starpu-1.4.9+dfsg/src/core/disk_ops/000077500000000000000000000000001507764646700173425ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/core/disk_ops/disk_hdf5.c000066400000000000000000000712331507764646700213540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #ifndef O_BINARY #define O_BINARY 0 #endif #define NITER _starpu_calibration_minimum #define STARPU_CHUNK_DIM 4096 /* ------------------- use HDF5 to write on disk ------------------- */ #ifndef H5_HAVE_THREADSAFE static int nb_disk_open = 0; static volatile int init_finished = 0; static starpu_pthread_t global_thread; /* This thread will perform each write/read because we don't have asynchronous functions */ static volatile int global_run; /* Ask to the thread if he can continue */ static starpu_pthread_mutex_t global_mutex; /* Mutex is used to protect work_list and if HDF5 library is not safe */ static starpu_pthread_cond_t global_cond; static struct _starpu_hdf5_work_list global_work_list; /* This list contains the work for the hdf5 thread */ #endif #ifdef H5_HAVE_THREADSAFE #define HDF5_VAR_THREAD fileBase->thread #define HDF5_VAR_RUN fileBase->run #define HDF5_VAR_MUTEX fileBase->mutex #define HDF5_VAR_COND fileBase->cond #define HDF5_VAR_WORK_LIST fileBase->work_list #else #define HDF5_VAR_THREAD global_thread #define HDF5_VAR_RUN global_run #define HDF5_VAR_MUTEX global_mutex #define HDF5_VAR_COND global_cond #define HDF5_VAR_WORK_LIST global_work_list #endif enum hdf5_work_type { READ, WRITE, FULL_READ, FULL_WRITE, COPY }; LIST_TYPE(_starpu_hdf5_work, enum hdf5_work_type type; struct starpu_hdf5_base * base_src; struct starpu_hdf5_obj * obj_src; off_t offset_src; struct starpu_hdf5_base * base_dst; struct starpu_hdf5_obj * obj_dst; off_t offset_dst; void * ptr; size_t size; void * event; ); struct starpu_hdf5_base { hid_t fileID; char * path; unsigned created; /* StarPU creates the HDF5 file */ unsigned next_dataset_id; starpu_pthread_t thread; /* This thread will perform each write/read because we don't have asynchronous functions */ int run; /* Ask to the thread if he can continue */ starpu_pthread_mutex_t mutex; /* Mutex is used to protect work_list and if HDF5 library is not safe */ starpu_pthread_cond_t cond; struct _starpu_hdf5_work_list work_list; /* This list contains the work for the hdf5 thread */ }; struct starpu_hdf5_obj { hid_t dataset; /* describe this object in HDF5 file */ char * path; /* path where data are stored in HDF5 file */ size_t size; }; static inline void _starpu_hdf5_protect_start(void * base STARPU_ATTRIBUTE_UNUSED) { #ifndef H5_HAVE_THREADSAFE if (base != NULL) STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); #endif } static inline void _starpu_hdf5_protect_stop(void * base STARPU_ATTRIBUTE_UNUSED) { #ifndef H5_HAVE_THREADSAFE if (base != NULL) STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); #endif } /* ------------------ Functions for internal thread -------------------- */ /* TODO : Dataspace may not be NATIVE_CHAR for opened data */ static void starpu_hdf5_full_read_internal(struct _starpu_hdf5_work * work) { herr_t status; status = H5Dread(work->obj_src->dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, work->ptr); STARPU_ASSERT_MSG(status >= 0, "Can not read data associed to this dataset (%s)\n", work->obj_src->path); } /* TODO : Dataspace may not be NATIVE_CHAR for opened data */ static void starpu_hdf5_full_write_internal(struct _starpu_hdf5_work * work) { herr_t status; /* Update size of dataspace */ if (work->size > work->obj_dst->size) { /* Get official datatype */ hid_t datatype = H5Dget_type(work->obj_dst->dataset); hsize_t sizeDatatype = H5Tget_size(datatype); /* Count in number of elements */ hsize_t extendsdim[1] = {work->size/sizeDatatype}; status = H5Dset_extent (work->obj_dst->dataset, extendsdim); STARPU_ASSERT_MSG(status >= 0, "Error when extending HDF5 dataspace !\n"); work->obj_dst->size = work->size; } /* Write ALL the dataspace */ status = H5Dwrite(work->obj_dst->dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, work->ptr); STARPU_ASSERT_MSG(status >= 0, "Can not write data to this dataset (%s)\n", work->obj_dst->path); } static void starpu_hdf5_read_internal(struct _starpu_hdf5_work * work) { herr_t status; /* Get official datatype */ hid_t datatype = H5Dget_type(work->obj_src->dataset); hsize_t sizeDatatype = H5Tget_size(datatype); /* count in element, not in byte */ work->offset_src /= sizeDatatype; work->size /= sizeDatatype; /* duplicate the dataspace in the dataset */ hid_t dataspace_select = H5Dget_space(work->obj_src->dataset); STARPU_ASSERT_MSG(dataspace_select >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); /* Select what we want of the duplicated dataspace (it's called an hyperslab). This operation is done on place */ hsize_t offsets[1] = {work->offset_src}; hsize_t count[1] = {work->size}; /* stride and block size are NULL which is equivalent of a shift of 1 */ status = H5Sselect_hyperslab(dataspace_select, H5S_SELECT_SET, offsets, NULL, count, NULL); STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); /* create the dataspace for the received data which describes ptr */ hsize_t dims_receive[1] = {work->size}; hid_t dataspace_receive = H5Screate_simple(1, dims_receive, NULL); STARPU_ASSERT_MSG(dataspace_receive >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); /* Receiver has to be an hyperslabs */ offsets[0] = 0; count[0] = work->size; H5Sselect_hyperslab(dataspace_receive, H5S_SELECT_SET, offsets, NULL, count, NULL); STARPU_ASSERT_MSG(dataspace_receive >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); status = H5Dread(work->obj_src->dataset, datatype, dataspace_receive, dataspace_select, H5P_DEFAULT, work->ptr); STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); /* don't need these dataspaces */ status = H5Sclose(dataspace_select); STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); status = H5Sclose(dataspace_receive); STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path); } static void starpu_hdf5_write_internal(struct _starpu_hdf5_work * work) { herr_t status; /* Get official datatype */ hid_t datatype = H5Dget_type(work->obj_dst->dataset); hsize_t sizeDatatype = H5Tget_size(datatype); /* Update size of dataspace */ if (work->size + work->offset_dst > work->obj_dst->size) { /* Count in number of elements */ hsize_t extendsdim[1] = {(work->offset_dst + work->size)/sizeDatatype}; status = H5Dset_extent (work->obj_dst->dataset, extendsdim); STARPU_ASSERT_MSG(status >= 0, "Error when extending HDF5 dataspace !\n"); work->obj_dst->size = work->offset_dst + work->size; } /* count in element, not in byte */ work->offset_dst /= sizeDatatype; work->size /= sizeDatatype; /* duplicate the dataspace in the dataset */ hid_t dataspace_select = H5Dget_space(work->obj_dst->dataset); STARPU_ASSERT_MSG(dataspace_select >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); /* Select what we want of the duplicated dataspace (it's called an hyperslab). This operation is done on place */ hsize_t offsets[1] = {work->offset_dst}; hsize_t count[1] = {work->size}; /* stride and block size are NULL which is equivalent of a shift of 1 */ status = H5Sselect_hyperslab(dataspace_select, H5S_SELECT_SET, offsets, NULL, count, NULL); STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); /* create the dataspace for the received data which describes ptr */ hsize_t dims_send[1] = {work->size}; hid_t dataspace_send = H5Screate_simple(1, dims_send, NULL); STARPU_ASSERT_MSG(dataspace_send >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); /* Receiver has to be an hyperslabs */ offsets[0] = 0; count[0] = work->size; H5Sselect_hyperslab(dataspace_send, H5S_SELECT_SET, offsets, NULL, count, NULL); STARPU_ASSERT_MSG(dataspace_send >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); status = H5Dwrite(work->obj_dst->dataset, datatype, dataspace_send, dataspace_select, H5P_DEFAULT, work->ptr); STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); /* don't need these dataspaces */ status = H5Sclose(dataspace_select); STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); status = H5Sclose(dataspace_send); STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path); } static unsigned warned = 0; static void starpu_hdf5_copy_internal(struct _starpu_hdf5_work * work) { herr_t status; /* HDF5 H50copy supports only same size in both areas and copies the entire object */ if (work->offset_src == 0 && work->offset_dst == 0 && work->size == work->obj_src->size && work->size == work->obj_dst->size) { H5Dclose(work->obj_dst->dataset); /* Dirty : Delete dataspace because H5Ocopy only works if destination does not exist */ H5Ldelete(work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT); status = H5Ocopy(work->base_src->fileID, work->obj_src->path, work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT, H5P_DEFAULT); STARPU_ASSERT_MSG(status >= 0, "Can not copy data (%s) associed to this disk (%s) to the data (%s) on this disk (%s)\n", work->obj_src->path, work->base_src->path, work->obj_dst->path, work->base_dst->path); work->obj_dst->dataset = H5Dopen2(work->base_dst->fileID, work->obj_dst->path, H5P_DEFAULT); } else { if (!warned) { _STARPU_DISP("Direct disk to disk copy is not supported for a piece of data. Data will be transferred to RAM memory and then, be pushed on disk \n"); warned = 1; } void * ptr; int ret = _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, &ptr, work->size, 0); STARPU_ASSERT_MSG(ret == 0, "Cannot allocate %lu bytes to perform disk to disk operation", (unsigned long)work->size); /* buffer is only used internally to store intermediate data */ work->ptr = ptr; starpu_hdf5_read_internal(work); starpu_hdf5_write_internal(work); _starpu_free_flags_on_node(STARPU_MAIN_RAM, ptr, work->size, 0); } } static void * _starpu_hdf5_internal_thread(void * arg) { #ifdef H5_HAVE_THREADSAFE struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) arg; #endif while (HDF5_VAR_RUN || !_starpu_hdf5_work_list_empty(&HDF5_VAR_WORK_LIST)) { STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); if (_starpu_hdf5_work_list_empty(&HDF5_VAR_WORK_LIST) && HDF5_VAR_RUN) STARPU_PTHREAD_COND_WAIT(&HDF5_VAR_COND, &HDF5_VAR_MUTEX); STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); /* We are the only consumer here, don't need to protect here */ if (!_starpu_hdf5_work_list_empty(&HDF5_VAR_WORK_LIST)) { STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); struct _starpu_hdf5_work * work = _starpu_hdf5_work_list_pop_back(&HDF5_VAR_WORK_LIST); STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); if (work->base_src < work->base_dst) { _starpu_hdf5_protect_start(work->base_src); #ifdef H5_HAVE_THREADSAFE _starpu_hdf5_protect_start(work->base_dst); #endif } else { _starpu_hdf5_protect_start(work->base_dst); #ifdef H5_HAVE_THREADSAFE if (work->base_src != work->base_dst) _starpu_hdf5_protect_start(work->base_src); #endif } switch(work->type) { case READ: starpu_hdf5_read_internal(work); break; case WRITE: starpu_hdf5_write_internal(work); break; case FULL_READ: starpu_hdf5_full_read_internal(work); break; case FULL_WRITE: starpu_hdf5_full_write_internal(work); break; case COPY: starpu_hdf5_copy_internal(work); break; default: STARPU_ABORT(); } if (work->base_src < work->base_dst) { _starpu_hdf5_protect_stop(work->base_src); #ifdef H5_HAVE_THREADSAFE _starpu_hdf5_protect_stop(work->base_dst); #endif } else { _starpu_hdf5_protect_stop(work->base_dst); #ifdef H5_HAVE_THREADSAFE if (work->base_src != work->base_dst) _starpu_hdf5_protect_stop(work->base_src); #endif } /* Update event to tell it's finished */ starpu_sem_post((starpu_sem_t *) work->event); free(work); } } return NULL; } static void _starpu_hdf5_create_thread(struct starpu_hdf5_base * fileBase) { _starpu_hdf5_work_list_init(&HDF5_VAR_WORK_LIST); HDF5_VAR_RUN = 1; STARPU_PTHREAD_COND_INIT(&HDF5_VAR_COND, NULL); STARPU_PTHREAD_CREATE(&HDF5_VAR_THREAD, NULL, _starpu_hdf5_internal_thread, (void *) fileBase); } /* returns the size in BYTES */ static hsize_t _starpu_get_size_obj(struct starpu_hdf5_obj * obj) { herr_t status; hid_t dataspace = H5Dget_space(obj->dataset); STARPU_ASSERT_MSG(dataspace >= 0, "Can not get the size of this HDF5 dataset (%s)\n", obj->path); hsize_t dims[1]; status = H5Sget_simple_extent_dims(dataspace, dims, NULL); STARPU_ASSERT_MSG(status >= 0, "Can not get the size of this HDF5 dataset (%s)\n", obj->path); hid_t datatype = H5Dget_type(obj->dataset); STARPU_ASSERT_MSG(datatype >= 0, "Can not get the size of this HDF5 dataset (%s)\n", obj->path); hsize_t sizeDatatype = H5Tget_size(datatype); STARPU_ASSERT_MSG(sizeDatatype > 0, "Can not get the size of this HDF5 dataset (%s)\n", obj->path); H5Sclose(dataspace); H5Tclose(datatype); return dims[0]*sizeDatatype; } static void starpu_hdf5_send_work(void *base_src, void *obj_src, off_t offset_src, void *base_dst, void *obj_dst, off_t offset_dst, void *buf, size_t size, void * event, enum hdf5_work_type type) { struct starpu_hdf5_obj * dataObj_src = (struct starpu_hdf5_obj *) obj_src; struct starpu_hdf5_obj * dataObj_dst = (struct starpu_hdf5_obj *) obj_dst; struct starpu_hdf5_base * fileBase_src = (struct starpu_hdf5_base *) base_src; struct starpu_hdf5_base * fileBase_dst = (struct starpu_hdf5_base *) base_dst; struct _starpu_hdf5_work * work; _STARPU_MALLOC(work, sizeof(*work)); work->type = type; work->base_src = fileBase_src; work->obj_src = dataObj_src; work->offset_src = offset_src; work->base_dst = fileBase_dst; work->obj_dst = dataObj_dst; work->offset_dst = offset_dst; work->ptr = buf; work->size = size; work->event = event; #ifdef H5_HAVE_THREADSAFE struct starpu_hdf5_base * fileBase; if (fileBase_src != NULL) fileBase = fileBase_src; else fileBase = fileBase_dst; #endif STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); _starpu_hdf5_work_list_push_front(&HDF5_VAR_WORK_LIST, work); /* Wake up internal thread */ STARPU_PTHREAD_COND_BROADCAST(&HDF5_VAR_COND); STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); } static struct starpu_hdf5_obj * _starpu_hdf5_data_alloc(struct starpu_hdf5_base * fileBase, char * name, size_t size) { struct starpu_hdf5_obj * obj; _STARPU_MALLOC(obj, sizeof(*obj)); _starpu_hdf5_protect_start((void *) fileBase); /* create a dataspace with one dimension of size elements */ hsize_t dim[1] = {size}; hsize_t maxdim[1] = {H5S_UNLIMITED}; hid_t dataspace = H5Screate_simple(1, dim, maxdim); if (dataspace < 0) { free(obj); return NULL; } hsize_t chunkdim[1] = {STARPU_CHUNK_DIM}; hid_t prop = H5Pcreate (H5P_DATASET_CREATE); herr_t status = H5Pset_chunk (prop, 1, chunkdim); STARPU_ASSERT_MSG(status >= 0, "Error when setting HDF5 property \n"); /* create a dataset at location name, with data described by the dataspace. * Each element are like char in C (expected one byte) */ obj->dataset = H5Dcreate2(fileBase->fileID, name, H5T_NATIVE_CHAR, dataspace, H5P_DEFAULT, prop, H5P_DEFAULT); H5Sclose(dataspace); H5Pclose(prop); if (obj->dataset < 0) { free(obj); return NULL; } obj->path = name; obj->size = size; _starpu_hdf5_protect_stop((void *) fileBase); return obj; } static struct starpu_hdf5_obj * _starpu_hdf5_data_open(struct starpu_hdf5_base * fileBase, char * name, size_t size) { struct starpu_hdf5_obj * obj; _STARPU_MALLOC(obj, sizeof(*obj)); _starpu_hdf5_protect_start((void *) fileBase); /* create a dataset at location name, with data described by the dataspace. * Each element are like char in C (expected one byte) */ obj->dataset = H5Dopen2(fileBase->fileID, name, H5P_DEFAULT); _starpu_hdf5_protect_stop((void *) fileBase); if (obj->dataset < 0) { free(obj); return NULL; } obj->path = name; obj->size = size; return obj; } static void *starpu_hdf5_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_hdf5_base * fileBase; _STARPU_MALLOC(fileBase, sizeof(struct starpu_hdf5_base)); #ifndef H5_HAVE_THREADSAFE int actual_nb_disk = STARPU_ATOMIC_ADD(&nb_disk_open, 1); if (actual_nb_disk == 1) { #endif STARPU_PTHREAD_MUTEX_INIT(&HDF5_VAR_MUTEX, NULL); #ifndef H5_HAVE_THREADSAFE } else { while (!init_finished) STARPU_UYIELD(); } #endif _starpu_hdf5_protect_start(fileBase); struct stat buf; if (stat(parameter, &buf) != 0 || !S_ISREG(buf.st_mode)) { /* The file doesn't exist or the directory exists => create the datafile */ int id; _starpu_mkpath(parameter, S_IRWXU); fileBase->path = _starpu_mktemp(parameter, O_RDWR | O_BINARY, &id); if (!fileBase->path) { free(fileBase); _STARPU_ERROR("Can not create the HDF5 file (%s)", (char *) parameter); return NULL; } /* just use _starpu_mktemp_many to create a file, close the file descriptor */ close(id); /* Truncate it */ fileBase->fileID = H5Fcreate((char *)fileBase->path, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); if (fileBase->fileID < 0) { free(fileBase); _STARPU_ERROR("Can not create the HDF5 file (%s)", (char *) parameter); return NULL; } fileBase->created = 1; } else { /* Well, open it ! */ char *path = strdup((char *)parameter); STARPU_ASSERT(path); fileBase->fileID = H5Fopen((char *)parameter, H5F_ACC_RDWR, H5P_DEFAULT); if (fileBase->fileID < 0) { free(fileBase); free(path); _STARPU_ERROR("Can not open the HDF5 file (%s)", (char *) parameter); return NULL; } fileBase->created = 0; fileBase->path = path; } #ifndef H5_HAVE_THREADSAFE if (actual_nb_disk == 1) { #endif _starpu_hdf5_create_thread(fileBase); #ifndef H5_HAVE_THREADSAFE init_finished = 1; } #endif #if H5_VERS_MAJOR > 1 || (H5_VERS_MAJOR == 1 && H5_VERS_MINOR > 10) || (H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 10 && H5_VERS_RELEASE > 0) H5Pset_file_space_strategy(fileBase->fileID, H5F_FSPACE_STRATEGY_FSM_AGGR, 0, 0); #endif _starpu_hdf5_protect_stop(fileBase); fileBase->next_dataset_id = 0; return (void *) fileBase; } /* free memory allocated for the base */ static void starpu_hdf5_unplug(void *base) { #ifndef H5_HAVE_THREADSAFE int actual_nb_disk = STARPU_ATOMIC_ADD(&nb_disk_open, -1); #endif struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; herr_t status; STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); #ifndef H5_HAVE_THREADSAFE if (actual_nb_disk == 0) { #endif HDF5_VAR_RUN = 0; STARPU_PTHREAD_COND_BROADCAST(&HDF5_VAR_COND); STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); STARPU_PTHREAD_JOIN(HDF5_VAR_THREAD, NULL); STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); STARPU_PTHREAD_COND_DESTROY(&HDF5_VAR_COND); STARPU_ASSERT(_starpu_hdf5_work_list_empty(&HDF5_VAR_WORK_LIST)); /* the internal thread is deleted */ #ifndef H5_HAVE_THREADSAFE } #endif status = H5Fclose(fileBase->fileID); STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); #ifndef H5_HAVE_THREADSAFE if (actual_nb_disk == 0) { #endif STARPU_PTHREAD_MUTEX_DESTROY(&HDF5_VAR_MUTEX); #ifndef H5_HAVE_THREADSAFE init_finished = 0; } #endif STARPU_ASSERT_MSG(status >= 0, "Can not unplug this HDF5 disk (%s)\n", fileBase->path); if (fileBase->created) { unlink(fileBase->path); } else { /* Warn user about repack, because unlink dataset doesn't delete data in file */ _STARPU_DISP("This disk (%s) was used to store temporary data. You may use the h5repack command to reduce the size of the file... \n", fileBase->path); } free(fileBase->path); free(fileBase); } static void *starpu_hdf5_alloc(void *base, size_t size) { struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; struct starpu_hdf5_obj * obj; char * name; char * prefix = "STARPU_"; char name_id[16]; /* Save the name of the dataset */ STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX); snprintf(name_id, sizeof(name_id), "%u", fileBase->next_dataset_id); fileBase->next_dataset_id++; STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX); /* name in HDF5 is like a path */ _STARPU_MALLOC(name, 1+strlen(prefix)+strlen(name_id)+1); snprintf(name, 1+strlen(prefix)+strlen(name_id)+1, "/%s%s", prefix, name_id); obj = _starpu_hdf5_data_alloc(fileBase, name, size); if (!obj) { free(name); } return (void *) obj; } static void starpu_hdf5_free(void *base, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj; herr_t status; _starpu_hdf5_protect_start(base); status = H5Dclose(dataObj->dataset); STARPU_ASSERT_MSG(status >= 0, "Can not free this HDF5 dataset (%s)\n", dataObj->path); /* remove the dataset link in the HDF5 * But it doesn't delete the space in the file */ status = H5Ldelete(fileBase->fileID, dataObj->path, H5P_DEFAULT); STARPU_ASSERT_MSG(status >= 0, "Can not delete the link associed to this dataset (%s)\n", dataObj->path); _starpu_hdf5_protect_stop(base); free(dataObj->path); free(dataObj); } static void *starpu_hdf5_open(void *base, void *pos, size_t size) { struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; struct starpu_hdf5_obj * obj; char *name; name = strdup((char *)pos); STARPU_ASSERT(name); obj = _starpu_hdf5_data_open(fileBase, name, size); if (!obj) { free(name); } return (void *) obj; } static void starpu_hdf5_close(void *base, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj; herr_t status; _starpu_hdf5_protect_start(base); status = H5Dclose(dataObj->dataset); STARPU_ASSERT_MSG(status >= 0, "Can not close this HDF5 dataset (%s)\n", dataObj->path); _starpu_hdf5_protect_stop(base); free(dataObj->path); free(dataObj); } static void starpu_hdf5_wait(void * event) { starpu_sem_t * finished = (starpu_sem_t *) event; starpu_sem_wait(finished); } static int starpu_hdf5_test(void * event) { starpu_sem_t * finished = (starpu_sem_t *) event; return starpu_sem_trywait(finished) == 0; } static int starpu_hdf5_full_read(void *base, void *obj, void **ptr, size_t *size, unsigned dst_node) { struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj; starpu_sem_t finished; starpu_sem_init(&finished, 0, 0); _starpu_hdf5_protect_start(base); *size = _starpu_get_size_obj(dataObj); _starpu_hdf5_protect_stop(base); _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); starpu_hdf5_send_work(base, obj, 0, NULL, NULL, 0, *ptr, *size, (void*) &finished, FULL_READ); starpu_hdf5_wait(&finished); starpu_sem_destroy(&finished); return 0; } static int starpu_hdf5_full_write(void *base, void *obj, void *ptr, size_t size) { starpu_sem_t finished; starpu_sem_init(&finished, 0, 0); starpu_hdf5_send_work(NULL, NULL, 0, base, obj, 0, ptr, size, (void*) &finished, FULL_WRITE); starpu_hdf5_wait(&finished); starpu_sem_destroy(&finished); return 0; } static int starpu_hdf5_read(void *base, void *obj, void *buf, off_t offset, size_t size) { starpu_sem_t finished; starpu_sem_init(&finished, 0, 0); starpu_hdf5_send_work(base, obj, offset, NULL, NULL, 0, buf, size, (void*) &finished, READ); starpu_hdf5_wait(&finished); starpu_sem_destroy(&finished); return 0; } static int starpu_hdf5_write(void *base, void *obj, const void *buf, off_t offset, size_t size) { starpu_sem_t finished; starpu_sem_init(&finished, 0, 0); starpu_hdf5_send_work(NULL, NULL, 0, base, obj, offset, (void *) buf, size, (void*) &finished, WRITE); starpu_hdf5_wait(&finished); starpu_sem_destroy(&finished); return 0; } static void * starpu_hdf5_async_read(void *base, void *obj, void *buf, off_t offset, size_t size) { starpu_sem_t * finished; _STARPU_MALLOC(finished, sizeof(*finished)); starpu_sem_init(finished, 0, 0); starpu_hdf5_send_work(base, obj, offset, NULL, NULL, 0, buf, size, (void*) finished, READ); return finished; } static void * starpu_hdf5_async_write(void *base, void *obj, void *buf, off_t offset, size_t size) { starpu_sem_t * finished; _STARPU_MALLOC(finished, sizeof(*finished)); starpu_sem_init(finished, 0, 0); starpu_hdf5_send_work(NULL, NULL, 0, base, obj, offset, (void *) buf, size, (void*) finished, WRITE); return finished; } void * starpu_hdf5_async_full_read (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node) { struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj; starpu_sem_t * finished; _STARPU_MALLOC(finished, sizeof(*finished)); starpu_sem_init(finished, 0, 0); _starpu_hdf5_protect_start(base); *size = _starpu_get_size_obj(dataObj); _starpu_hdf5_protect_stop(base); _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); starpu_hdf5_send_work(base, obj, 0, NULL, NULL, 0, *ptr, *size, (void*) finished, FULL_READ); return finished; } void * starpu_hdf5_async_full_write (void * base, void * obj, void * ptr, size_t size) { starpu_sem_t * finished; _STARPU_MALLOC(finished, sizeof(*finished)); starpu_sem_init(finished, 0, 0); starpu_hdf5_send_work(NULL, NULL, 0, base, obj, 0, ptr, size, (void*) finished, FULL_WRITE); return finished; } void * starpu_hdf5_copy(void *base_src, void* obj_src, off_t offset_src, void *base_dst, void* obj_dst, off_t offset_dst, size_t size) { starpu_sem_t * finished; _STARPU_MALLOC(finished, sizeof(*finished)); starpu_sem_init(finished, 0, 0); starpu_hdf5_send_work(base_src, obj_src, offset_src, base_dst, obj_dst, offset_dst, NULL, size, (void*) finished, COPY); return finished; } static void starpu_hdf5_free_request(void * event) { starpu_sem_destroy(event); free(event); } static int get_hdf5_bandwidth_between_disk_and_main_ram(unsigned node, void *base) { unsigned iter; double timing_slowness, timing_latency; double start; double end; char *buf; struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base; srand(time(NULL)); starpu_malloc_flags((void **) &buf, STARPU_DISK_SIZE_MIN, 0); STARPU_ASSERT(buf != NULL); /* allocate memory */ void *mem = _starpu_disk_alloc(node, STARPU_DISK_SIZE_MIN); /* fail to alloc */ if (mem == NULL) return 0; memset(buf, 0, STARPU_DISK_SIZE_MIN); /* Measure upload slowness */ start = starpu_timing_now(); for (iter = 0; iter < NITER; ++iter) { _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, 0, STARPU_DISK_SIZE_MIN, NULL); } end = starpu_timing_now(); timing_slowness = end - start; /* free memory */ starpu_free_flags(buf, STARPU_DISK_SIZE_MIN, 0); starpu_malloc_flags((void**) &buf, sizeof(char), 0); STARPU_ASSERT(buf != NULL); *buf = 0; /* Measure latency */ start = starpu_timing_now(); for (iter = 0; iter < NITER; ++iter) { _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, rand() % (STARPU_DISK_SIZE_MIN -1) , 1, NULL); } end = starpu_timing_now(); timing_latency = end - start; _starpu_disk_free(node, mem, STARPU_DISK_SIZE_MIN); starpu_free_flags(buf, sizeof(char), 0); _starpu_save_bandwidth_and_latency_disk((NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, (NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, timing_latency/NITER, timing_latency/NITER, node, fileBase->path); return 1; } struct starpu_disk_ops starpu_disk_hdf5_ops = { .alloc = starpu_hdf5_alloc, .free = starpu_hdf5_free, .open = starpu_hdf5_open, .close = starpu_hdf5_close, .read = starpu_hdf5_read, .write = starpu_hdf5_write, .plug = starpu_hdf5_plug, .unplug = starpu_hdf5_unplug, .copy = starpu_hdf5_copy, .bandwidth = get_hdf5_bandwidth_between_disk_and_main_ram, .full_read = starpu_hdf5_full_read, .full_write = starpu_hdf5_full_write, .async_read = starpu_hdf5_async_read, .async_write = starpu_hdf5_async_write, .async_full_read = starpu_hdf5_async_full_read, .async_full_write = starpu_hdf5_async_full_write, .wait_request = starpu_hdf5_wait, .test_request = starpu_hdf5_test, .free_request = starpu_hdf5_free_request }; starpu-1.4.9+dfsg/src/core/disk_ops/disk_leveldb.cpp000066400000000000000000000240101507764646700224720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #define NITER _starpu_calibration_minimum /* ------------------- use leveldb to write on disk ------------------- */ struct starpu_leveldb_obj { char * key; size_t size; starpu_pthread_mutex_t mutex; }; struct starpu_leveldb_base { char *path; leveldb::DB* db; /* if StarPU creates the leveldb */ bool created; }; /* allocation memory on disk */ static void *starpu_leveldb_alloc(void *base, size_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; struct starpu_leveldb_obj *obj = (struct starpu_leveldb_obj *)malloc(sizeof(struct starpu_leveldb_obj)); STARPU_ASSERT(obj); STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL); size_t len = 6 + 1 + 2+sizeof(void*)*2 + 1; char *key = (char *)malloc(len*sizeof(char)); STARPU_ASSERT(key); snprintf(key, len, "STARPU-%p", obj); /* create and add a key with a small memory */ leveldb::Status s = base_tmp->db->Put(leveldb::WriteOptions(), key, "a"); STARPU_ASSERT(s.ok()); /* obj->size is the real size in the disk */ obj->key = key; obj->size = sizeof(char); return (void *) obj; } /* free memory on disk */ static void starpu_leveldb_free(void *base , void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; base_tmp->db->Delete(leveldb::WriteOptions(), tmp->key); STARPU_PTHREAD_MUTEX_DESTROY(&tmp->mutex); free(tmp->key); free(tmp); } /* open an existing memory on disk */ static void *starpu_leveldb_open(void *base STARPU_ATTRIBUTE_UNUSED, void *pos, size_t size) { struct starpu_leveldb_obj *obj = (struct starpu_leveldb_obj *)malloc(sizeof(struct starpu_leveldb_obj)); STARPU_ASSERT(obj); STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL); obj->key = strdup((char*) pos); obj->size = size; return (void *) obj; } /* free memory without delete it */ static void starpu_leveldb_close(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; STARPU_PTHREAD_MUTEX_DESTROY(&tmp->mutex); free(tmp->key); free(tmp); } /* in the leveldb, we are obliged to read and to write the entire data * so, we have to use buffers to have offset and size options */ static int starpu_leveldb_read(void *base, void *obj, void *buf, off_t offset, size_t size) { struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); /* leveldb need a string to store data */ std::string value; leveldb::Status s = base_tmp->db->Get(leveldb::ReadOptions(), tmp->key, &value); uintptr_t value_read = (uintptr_t)(value.c_str()); /* use buffer */ if(s.ok()) memcpy(buf, (void *) (value_read+offset), size); else STARPU_ASSERT(s.ok()); STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); return 0; } static int starpu_leveldb_full_read(void *base, void *obj, void **ptr, size_t *size, unsigned dst_node) { struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); /* leveldb need a string to store data */ std::string value; leveldb::Status s = base_tmp->db->Get(leveldb::ReadOptions(), tmp->key, &value); STARPU_ASSERT(s.ok()); *size = value.length(); _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); STARPU_ASSERT(*ptr); /* use buffer */ memcpy(*ptr, value.c_str(), *size); STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); return 0; } /* write on the memory disk */ static int starpu_leveldb_write(void *base, void *obj, const void *buf, off_t offset, size_t size) { struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; void *buffer; leveldb::Status s; STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); if (offset == 0 && size >= tmp->size) { /* We overwrite everything, no need to get the old value */ buffer = (void*) buf; } else { uintptr_t buf_tmp = (uintptr_t) buf; buffer = malloc((tmp->size > (offset + size)) ? tmp->size : (offset + size)); STARPU_ASSERT(buffer); /* we read the data */ std::string value; s = base_tmp->db->Get(leveldb::ReadOptions(), tmp->key, &value); uintptr_t value_read = (uintptr_t)(value.c_str()); STARPU_ASSERT(s.ok()); memcpy(buffer, (void *) value_read, tmp->size); /* put the new data on their new place */ memcpy((void *) ((uintptr_t) buffer + offset), (void *) buf_tmp, size); } /* and write them */ s = base_tmp->db->Put(leveldb::WriteOptions(), tmp->key, (char *)buffer); STARPU_ASSERT(s.ok()); /* if the new size is higher than the old, we update it - first write after the alloc */ tmp->size = (tmp->size > size) ? tmp->size : size; if (buffer != buf) free(buffer); STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); return 0; } static int starpu_leveldb_full_write(void *base, void *obj, void *ptr, size_t size) { struct starpu_leveldb_obj *tmp = (struct starpu_leveldb_obj *) obj; struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; /* update file size to achieve correct writes */ tmp->size = size; leveldb::WriteOptions write_options; write_options.sync = true; leveldb::Status s = base_tmp->db->Put(write_options, tmp->key, (char *)ptr); STARPU_ASSERT(s.ok()); return 0; } /* create a new copy of parameter == base */ static void *starpu_leveldb_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_leveldb_base *tmp = (struct starpu_leveldb_base *)malloc(sizeof(struct starpu_leveldb_base)); STARPU_ASSERT(tmp); leveldb::Status status; leveldb::DB *db; leveldb::Options options; options.create_if_missing = true; /* try to create the database */ options.error_if_exists = true; status = leveldb::DB::Open(options, (char *) parameter, &db); tmp->created = true; /* if it has already been created before */ if (!status.ok()) { options.error_if_exists = false; status = leveldb::DB::Open(options, (char *) parameter, &db); STARPU_ASSERT_MSG(status.ok(), "StarPU leveldb plug failed !"); tmp->created = false; } tmp->db = db; tmp->path = strdup((const char*) parameter); STARPU_ASSERT(status.ok()); return (void *) tmp; } /* free memory allocated for the base */ static void starpu_leveldb_unplug(void *base) { struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; if(base_tmp->created) delete base_tmp->db; free(base_tmp->path); free(base); } static int get_leveldb_bandwidth_between_disk_and_main_ram(unsigned node, void *base) { unsigned iter; double timing_slowness, timing_latency; double start; double end; struct starpu_leveldb_base *base_tmp = (struct starpu_leveldb_base *) base; srand(time (NULL)); char *buf = (char *)malloc(STARPU_DISK_SIZE_MIN*sizeof(char)); STARPU_ASSERT(buf); /* allocate memory */ void *mem = _starpu_disk_alloc(node, STARPU_DISK_SIZE_MIN); /* fail to alloc */ if (mem == NULL) { free(buf); return 0; } /* Measure upload slowness */ start = starpu_timing_now(); for (iter = 0; iter < NITER; ++iter) { _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, 0, STARPU_DISK_SIZE_MIN, NULL); } end = starpu_timing_now(); timing_slowness = end - start; /* free memory */ free(buf); buf = (char *)malloc(sizeof(char)); STARPU_ASSERT(buf); /* Measure latency */ start = starpu_timing_now(); for (iter = 0; iter < NITER; ++iter) { _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, rand() % (STARPU_DISK_SIZE_MIN -1) , 1, NULL); } end = starpu_timing_now(); timing_latency = end - start; _starpu_disk_free(node, mem, STARPU_DISK_SIZE_MIN); free(buf); _starpu_save_bandwidth_and_latency_disk((NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, (NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, timing_latency/NITER, timing_latency/NITER, node, base_tmp->path); return 1; } #if __cplusplus >= 201103L struct starpu_disk_ops starpu_disk_leveldb_ops = { .plug = starpu_leveldb_plug, .unplug = starpu_leveldb_unplug, .bandwidth = get_leveldb_bandwidth_between_disk_and_main_ram, .alloc = starpu_leveldb_alloc, .free = starpu_leveldb_free, .open = starpu_leveldb_open, .close = starpu_leveldb_close, .read = starpu_leveldb_read, .write = starpu_leveldb_write, .full_read = starpu_leveldb_full_read, .full_write = starpu_leveldb_full_write, .async_write = NULL, .async_read = NULL, .async_full_read = NULL, .async_full_write = NULL, .copy = NULL, .wait_request = NULL, .test_request = NULL, .free_request = NULL }; #else struct starpu_disk_ops starpu_disk_leveldb_ops = { starpu_leveldb_plug, starpu_leveldb_unplug, get_leveldb_bandwidth_between_disk_and_main_ram, starpu_leveldb_alloc, starpu_leveldb_free, starpu_leveldb_open, starpu_leveldb_close, starpu_leveldb_read, starpu_leveldb_write, starpu_leveldb_full_read, starpu_leveldb_full_write, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; #endif starpu-1.4.9+dfsg/src/core/disk_ops/disk_stdio.c000066400000000000000000000254301507764646700216460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_HAVE_WINDOWS # include #endif #define NITER _starpu_calibration_minimum #ifndef O_BINARY #define O_BINARY 0 #endif #define MAX_OPEN_FILES 64 #define TEMP_HIERARCHY_DEPTH 2 /* ------------------- use STDIO to write on disk ------------------- */ static unsigned starpu_stdio_opened_files; struct starpu_stdio_obj { int descriptor; FILE * file; char * path; size_t size; starpu_pthread_mutex_t mutex; }; struct starpu_stdio_base { char * path; int created; }; static struct starpu_stdio_obj *_starpu_stdio_init(int descriptor, char *path, size_t size) { struct starpu_stdio_obj *obj; _STARPU_MALLOC(obj, sizeof(struct starpu_stdio_obj)); FILE *f = fdopen(descriptor,"rb+"); if (f == NULL) { free(obj); return NULL; } STARPU_HG_DISABLE_CHECKING(starpu_stdio_opened_files); if (starpu_stdio_opened_files >= MAX_OPEN_FILES) { /* Too many opened files, avoid keeping this one opened */ fclose(f); f = NULL; descriptor = -1; } else (void) STARPU_ATOMIC_ADD(&starpu_stdio_opened_files, 1); STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL); obj->descriptor = descriptor; obj->file = f; obj->path = path; obj->size = size; return (void *) obj; } static FILE *_starpu_stdio_reopen(struct starpu_stdio_obj *obj) { int id = open(obj->path, O_RDWR); STARPU_ASSERT(id >= 0); FILE *f = fdopen(id,"rb+"); STARPU_ASSERT(f); return f; } static void _starpu_stdio_reclose(FILE *f) { fclose(f); } static void _starpu_stdio_close(struct starpu_stdio_obj *obj) { if (obj->descriptor < 0) return; if (starpu_stdio_opened_files < MAX_OPEN_FILES) (void) STARPU_ATOMIC_ADD(&starpu_stdio_opened_files, -1); fclose(obj->file); } static void _starpu_stdio_fini(struct starpu_stdio_obj *obj) { STARPU_PTHREAD_MUTEX_DESTROY(&obj->mutex); free(obj->path); free(obj); } /* allocation memory on disk */ static void *starpu_stdio_alloc(void *base, size_t size) { struct starpu_stdio_obj *obj; struct starpu_stdio_base * fileBase = (struct starpu_stdio_base *) base; int id; char *baseCpy = _starpu_mktemp_many(fileBase->path, TEMP_HIERARCHY_DEPTH, O_RDWR | O_BINARY, &id); /* fail */ if (!baseCpy) return NULL; int val = _starpu_ftruncate(id,size); /* fail */ if (val < 0) { _STARPU_DISP("Could not truncate file, ftruncate failed with error '%s'\n", strerror(errno)); close(id); unlink(baseCpy); free(baseCpy); return NULL; } obj = _starpu_stdio_init(id, baseCpy, size); if (!obj) { close(id); unlink(baseCpy); free(baseCpy); } return obj; } /* free memory on disk */ static void starpu_stdio_free(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; _starpu_stdio_close(tmp); unlink(tmp->path); _starpu_rmtemp_many(tmp->path, TEMP_HIERARCHY_DEPTH); _starpu_stdio_fini(tmp); } /* open an existing memory on disk */ static void *starpu_stdio_open(void *base, void *pos, size_t size) { struct starpu_stdio_base * fileBase = (struct starpu_stdio_base *) base; struct starpu_stdio_obj *obj; /* create template */ char *baseCpy; _STARPU_MALLOC(baseCpy, strlen(fileBase->path)+1+strlen(pos)+1); snprintf(baseCpy, strlen(fileBase->path)+1+strlen(pos)+1, "%s/%s", fileBase->path, (char *)pos); int id = open(baseCpy, O_RDWR); if (id < 0) { free(baseCpy); return NULL; } obj = _starpu_stdio_init(id, baseCpy, size); if (!obj) free(baseCpy); return obj; } /* free memory without delete it */ static void starpu_stdio_close(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; _starpu_stdio_close(tmp); _starpu_stdio_fini(tmp); } /* read the memory disk */ static int starpu_stdio_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size) { struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; FILE *f = tmp->file; if (f) STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); else f = _starpu_stdio_reopen(obj); int res = fseek(f, offset, SEEK_SET); STARPU_ASSERT_MSG(res == 0, "Stdio read failed"); starpu_ssize_t nb = fread(buf, 1, size, f); STARPU_ASSERT_MSG(nb >= 0, "Stdio read failed"); if (tmp->file) STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); else _starpu_stdio_reclose(f); return 0; } static int starpu_stdio_full_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void **ptr, size_t *size, unsigned dst_node) { struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; FILE *f = tmp->file; starpu_ssize_t ssize; if (f) STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); else f = _starpu_stdio_reopen(obj); int res = fseek(f, 0, SEEK_END); STARPU_ASSERT_MSG(res == 0, "Stdio write failed"); ssize = ftell(f); STARPU_ASSERT_MSG(ssize >= 0, "Stdio write failed"); *size = ssize; if (tmp->file) STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); /* Alloc aligned buffer */ _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); if (tmp->file) STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); res = fseek(f, 0, SEEK_SET); STARPU_ASSERT_MSG(res == 0, "Stdio read failed"); starpu_ssize_t nb = fread(*ptr, 1, *size, f); STARPU_ASSERT_MSG(nb >= 0, "Stdio read failed"); if (tmp->file) STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); else _starpu_stdio_reclose(f); return 0; } /* write on the memory disk */ static int starpu_stdio_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, const void *buf, off_t offset, size_t size) { struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; FILE *f = tmp->file; if (f) STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); else f = _starpu_stdio_reopen(obj); int res = fseek(f, offset, SEEK_SET); STARPU_ASSERT_MSG(res == 0, "Stdio write failed"); fwrite(buf, 1, size, f); if (tmp->file) STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); else _starpu_stdio_reclose(f); return 0; } static int starpu_stdio_full_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *ptr, size_t size) { struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) obj; FILE *f = tmp->file; if (!f) f = _starpu_stdio_reopen(obj); /* update file size to realise the next good full_read */ if(size != tmp->size) { int val = _starpu_fftruncate(f,size); STARPU_ASSERT(val == 0); tmp->size = size; } int res = fseek(f, 0, SEEK_SET); STARPU_ASSERT_MSG(res == 0, "Stdio write failed"); fwrite(ptr, 1, size, f); if (!tmp->file) _starpu_stdio_reclose(f); return 0; } static void *starpu_stdio_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_stdio_base * base; struct stat buf; _STARPU_MALLOC(base, sizeof(*base)); base->created = 0; base->path = strdup((char *) parameter); STARPU_ASSERT(base->path); if (!(stat(base->path, &buf) == 0 && S_ISDIR(buf.st_mode))) { _starpu_mkpath(base->path, S_IRWXU); base->created = 1; } return (void *) base; } /* free memory allocated for the base */ static void starpu_stdio_unplug(void *base) { struct starpu_stdio_base * fileBase = (struct starpu_stdio_base *) base; if (fileBase->created) rmdir(fileBase->path); free(fileBase->path); free(fileBase); } static int get_stdio_bandwidth_between_disk_and_main_ram(unsigned node, void *base) { unsigned iter; double timing_slowness, timing_latency; double start; double end; char *buf; struct starpu_stdio_base * fileBase = (struct starpu_stdio_base *) base; srand(time(NULL)); starpu_malloc_flags((void **) &buf, STARPU_DISK_SIZE_MIN, 0); STARPU_ASSERT(buf != NULL); /* allocate memory */ void *mem = _starpu_disk_alloc(node, STARPU_DISK_SIZE_MIN); /* fail to alloc */ if (mem == NULL) return 0; struct starpu_stdio_obj *tmp = (struct starpu_stdio_obj *) mem; memset(buf, 0, STARPU_DISK_SIZE_MIN); /* Measure upload slowness */ start = starpu_timing_now(); for (iter = 0; iter < NITER; ++iter) { FILE *f = tmp->file; _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, 0, STARPU_DISK_SIZE_MIN, NULL); if (!f) f = _starpu_stdio_reopen(tmp); /* clean cache memory */ int res = fflush(f); STARPU_ASSERT_MSG(res == 0, "Slowness computation failed \n"); #ifdef STARPU_HAVE_WINDOWS res = _commit(fileno(f)); #else res = fsync(fileno(f)); #endif STARPU_ASSERT_MSG(res == 0, "Slowness computation failed \n"); if (!tmp->file) _starpu_stdio_reclose(f); } end = starpu_timing_now(); timing_slowness = end - start; /* free memory */ starpu_free_flags(buf, STARPU_DISK_SIZE_MIN, 0); starpu_malloc_flags((void**) &buf, sizeof(char), 0); STARPU_ASSERT(buf != NULL); *buf = 0; /* Measure latency */ start = starpu_timing_now(); for (iter = 0; iter < NITER; ++iter) { FILE *f = tmp->file; _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, rand() % (STARPU_DISK_SIZE_MIN -1) , 1, NULL); if (!f) f = _starpu_stdio_reopen(tmp); int res = fflush(f); STARPU_ASSERT_MSG(res == 0, "Latency computation failed"); #ifdef STARPU_HAVE_WINDOWS res = _commit(fileno(f)); #else res = fsync(fileno(f)); #endif STARPU_ASSERT_MSG(res == 0, "Latency computation failed"); if (!tmp->file) _starpu_stdio_reclose(f); } end = starpu_timing_now(); timing_latency = end - start; _starpu_disk_free(node, mem, STARPU_DISK_SIZE_MIN); starpu_free_flags(buf, sizeof(char), 0); _starpu_save_bandwidth_and_latency_disk((NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, (NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, timing_latency/NITER, timing_latency/NITER, node, fileBase->path); return 1; } struct starpu_disk_ops starpu_disk_stdio_ops = { .alloc = starpu_stdio_alloc, .free = starpu_stdio_free, .open = starpu_stdio_open, .close = starpu_stdio_close, .read = starpu_stdio_read, .write = starpu_stdio_write, .plug = starpu_stdio_plug, .unplug = starpu_stdio_unplug, .copy = NULL, .bandwidth = get_stdio_bandwidth_between_disk_and_main_ram, .full_read = starpu_stdio_full_read, .full_write = starpu_stdio_full_write }; starpu-1.4.9+dfsg/src/core/disk_ops/disk_unistd.c000066400000000000000000000053651507764646700220370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include /* ------------------- use UNISTD to write on disk ------------------- */ /* allocation memory on disk */ static void *starpu_unistd_alloc(void *base, size_t size) { struct starpu_unistd_global_obj *obj; _STARPU_MALLOC(obj, sizeof(struct starpu_unistd_global_obj)); /* only flags change between unistd and unistd_o_direct */ obj->flags = O_RDWR | O_BINARY; return starpu_unistd_global_alloc(obj, base, size); } /* open an existing memory on disk */ static void *starpu_unistd_open(void *base, void *pos, size_t size) { struct starpu_unistd_global_obj *obj; _STARPU_MALLOC(obj, sizeof(struct starpu_unistd_global_obj)); /* only flags change between unistd and unistd_o_direct */ obj->flags = O_RDWR | O_BINARY; return starpu_unistd_global_open(obj, base, pos, size); } struct starpu_disk_ops starpu_disk_unistd_ops = { .alloc = starpu_unistd_alloc, .free = starpu_unistd_global_free, .open = starpu_unistd_open, .close = starpu_unistd_global_close, .read = starpu_unistd_global_read, .write = starpu_unistd_global_write, .plug = starpu_unistd_global_plug, .unplug = starpu_unistd_global_unplug, #ifdef STARPU_UNISTD_USE_COPY .copy = starpu_unistd_global_copy, #else .copy = NULL, #endif .bandwidth = _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram, #ifdef HAVE_AIO_H .async_read = starpu_unistd_global_async_read, .async_write = starpu_unistd_global_async_write, .async_full_read = starpu_unistd_global_async_full_read, .async_full_write = starpu_unistd_global_async_full_write, .wait_request = starpu_unistd_global_wait_request, .test_request = starpu_unistd_global_test_request, .free_request = starpu_unistd_global_free_request, #endif .full_read = starpu_unistd_global_full_read, .full_write = starpu_unistd_global_full_write }; starpu-1.4.9+dfsg/src/core/disk_ops/disk_unistd_o_direct.c000066400000000000000000000151511507764646700237010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include /* ------------------- use UNISTD to write on disk ------------------- */ /* allocation memory on disk */ static void *starpu_unistd_o_direct_alloc(void *base, size_t size) { struct starpu_unistd_global_obj *obj; _STARPU_MALLOC(obj, sizeof(struct starpu_unistd_global_obj)); /* only flags change between unistd and unistd_o_direct */ obj->flags = O_RDWR | O_DIRECT | O_BINARY; return starpu_unistd_global_alloc(obj, base, size); } /* open an existing memory on disk */ static void *starpu_unistd_o_direct_open(void *base, void *pos, size_t size) { struct starpu_unistd_global_obj *obj; _STARPU_MALLOC(obj, sizeof(struct starpu_unistd_global_obj)); /* only flags change between unistd and unistd_o_direct */ obj->flags = O_RDWR | O_DIRECT | O_BINARY; return starpu_unistd_global_open(obj, base, pos, size); } /* read the memory disk */ static int starpu_unistd_o_direct_read(void *base, void *obj, void *buf, off_t offset, size_t size) { STARPU_ASSERT_MSG((size % getpagesize()) == 0, "You can only read a multiple of page size %u Bytes (Here %d)", getpagesize(), (int) size); STARPU_ASSERT_MSG((((uintptr_t) buf) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); return starpu_unistd_global_read(base, obj, buf, offset, size); } /* write on the memory disk */ static int starpu_unistd_o_direct_write(void *base, void *obj, const void *buf, off_t offset, size_t size) { STARPU_ASSERT_MSG((size % getpagesize()) == 0, "You can only write a multiple of page size %u Bytes (Here %d)", getpagesize(), (int) size); STARPU_ASSERT_MSG((((uintptr_t)buf) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); return starpu_unistd_global_write(base, obj, buf, offset, size); } /* create a new copy of parameter == base */ static void *starpu_unistd_o_direct_plug(void *parameter, starpu_ssize_t size) { starpu_malloc_set_align(getpagesize()); return starpu_unistd_global_plug(parameter, size); } #if defined(HAVE_AIO_H) || defined(HAVE_LIBAIO_H) void *starpu_unistd_o_direct_global_async_read(void *base, void *obj, void *buf, off_t offset, size_t size) { STARPU_ASSERT_MSG((size % getpagesize()) == 0, "The unistd_o_direct variant can only read a multiple of page size %lu Bytes (Here %lu). Use the non-o_direct unistd variant if your data is not a multiple of %lu", (unsigned long) getpagesize(), (unsigned long) size, (unsigned long) getpagesize()); STARPU_ASSERT_MSG((((uintptr_t) buf) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); return starpu_unistd_global_async_read(base, obj, buf, offset, size); } void *starpu_unistd_o_direct_global_async_write(void *base, void *obj, void *buf, off_t offset, size_t size) { STARPU_ASSERT_MSG((size % getpagesize()) == 0, "The unistd_o_direct variant can only write a multiple of page size %lu Bytes (Here %lu). Use the non-o_direct unistd variant if your data is not a multiple of %lu", (unsigned long) getpagesize(), (unsigned long) size, (unsigned long) getpagesize()); STARPU_ASSERT_MSG((((uintptr_t)buf) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); return starpu_unistd_global_async_write(base, obj, buf, offset, size); } #endif #ifdef STARPU_UNISTD_USE_COPY void * starpu_unistd_o_direct_global_copy(void *base_src, void* obj_src, off_t offset_src, void *base_dst, void* obj_dst, off_t offset_dst, size_t size) { STARPU_ASSERT_MSG((size % getpagesize()) == 0, "The unistd_o_direct variant can only write a multiple of page size %lu Bytes (Here %lu). Use the non-o_direct unistd variant if your data is not a multiple of %lu", (unsigned long) getpagesize(), (unsigned long) size, (unsigned long) getpagesize()); return starpu_unistd_global_copy(base_src, obj_src, offset_src, base_dst, obj_dst, offset_dst, size); } #endif int starpu_unistd_o_direct_global_full_write(void *base, void *obj, void *ptr, size_t size) { STARPU_ASSERT_MSG((size % getpagesize()) == 0, "The unistd_o_direct variant can only write a multiple of page size %lu Bytes (Here %lu). Use the non-o_direct unistd variant if your data is not a multiple of %lu", (unsigned long) getpagesize(), (unsigned long) size, (unsigned long) getpagesize()); STARPU_ASSERT_MSG((((uintptr_t)ptr) % getpagesize()) == 0, "You have to use starpu_malloc function to get aligned buffers for the unistd_o_direct variant"); return starpu_unistd_global_full_write(base, obj, ptr, size); } struct starpu_disk_ops starpu_disk_unistd_o_direct_ops = { .alloc = starpu_unistd_o_direct_alloc, .free = starpu_unistd_global_free, .open = starpu_unistd_o_direct_open, .close = starpu_unistd_global_close, .read = starpu_unistd_o_direct_read, .write = starpu_unistd_o_direct_write, .plug = starpu_unistd_o_direct_plug, .unplug = starpu_unistd_global_unplug, #ifdef STARPU_UNISTD_USE_COPY .copy = starpu_unistd_o_direct_global_copy, #else .copy = NULL, #endif .bandwidth = _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram, #if defined(HAVE_AIO_H) || defined(HAVE_LIBAIO_H) .async_read = starpu_unistd_o_direct_global_async_read, .async_write = starpu_unistd_o_direct_global_async_write, .wait_request = starpu_unistd_global_wait_request, .test_request = starpu_unistd_global_test_request, .free_request = starpu_unistd_global_free_request, .async_full_read = starpu_unistd_global_async_full_read, .async_full_write = starpu_unistd_global_async_full_write, #endif .full_read = starpu_unistd_global_full_read, .full_write = starpu_unistd_o_direct_global_full_write }; starpu-1.4.9+dfsg/src/core/disk_ops/unistd/000077500000000000000000000000001507764646700206505ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/core/disk_ops/unistd/disk_unistd_global.c000066400000000000000000000761231507764646700246650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #if defined(HAVE_LIBAIO_H) #include #elif defined(HAVE_AIO_H) #include #endif #ifdef HAVE_UNISTD_H # include #endif #include #include #include #include #include #include #include #include #ifdef STARPU_HAVE_WINDOWS # include #endif #define NITER _starpu_calibration_minimum #ifdef O_DIRECT # define MEM_SIZE getpagesize() #else # define MEM_SIZE 1 #endif #define MAX_OPEN_FILES 64 #define TEMP_HIERARCHY_DEPTH 2 #if !defined(HAVE_COPY_FILE_RANGE) && defined(__linux__) && defined(__NR_copy_file_range) static starpu_ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) { return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags); } #endif static unsigned starpu_unistd_opened_files; #ifdef STARPU_UNISTD_USE_COPY LIST_TYPE(starpu_unistd_work_copy, int fd_src; int fd_dst; starpu_loff_t off_src; starpu_loff_t off_dst; struct starpu_unistd_global_obj * obj_src; struct starpu_unistd_global_obj * obj_dst; size_t len; unsigned flags; starpu_sem_t finished; ); struct starpu_unistd_copy_thread { int run; starpu_pthread_t thread; starpu_pthread_cond_t cond; starpu_pthread_mutex_t mutex; struct starpu_unistd_work_copy_list list; }; static struct starpu_unistd_copy_thread copy_thread[STARPU_MAXNODES][STARPU_MAXNODES]; static unsigned starpu_unistd_nb_disk_opened = 0; /* copy_file_range syscall can return ENOSYS. Use global var to catch * and prevent StarPU using direct disk to disk copy */ static int starpu_unistd_copy_works = 1; #endif struct starpu_unistd_base { char * path; int created; /* To know which thread handles the copy function */ #ifdef STARPU_UNISTD_USE_COPY unsigned disk_index; #endif #if defined(HAVE_LIBAIO_H) io_context_t ctx; struct starpu_unistd_aiocb_link * hashtable; starpu_pthread_mutex_t mutex; #endif }; #if defined(HAVE_LIBAIO_H) struct starpu_unistd_aiocb_link { UT_hash_handle hh; void * starpu_aiocb; void * aiocb; }; struct starpu_unistd_aiocb { int finished; struct iocb iocb; struct starpu_unistd_global_obj *obj; struct starpu_unistd_base *base; size_t len; }; #elif defined(HAVE_AIO_H) struct starpu_unistd_aiocb { struct aiocb aiocb; struct starpu_unistd_global_obj *obj; }; #endif enum starpu_unistd_wait_type { STARPU_UNISTD_AIOCB, STARPU_UNISTD_COPY }; union starpu_unistd_wait_event { struct starpu_unistd_work_copy * event_copy; #if defined(HAVE_LIBAIO_H) || defined(HAVE_AIO_H) struct starpu_unistd_aiocb event_aiocb; #endif }; struct starpu_unistd_wait { enum starpu_unistd_wait_type type; union starpu_unistd_wait_event event; }; /* ------------------- use UNISTD to write on disk ------------------- */ static void _starpu_unistd_init(struct starpu_unistd_global_obj *obj, int descriptor, char *path, size_t size) { STARPU_HG_DISABLE_CHECKING(starpu_unistd_opened_files); #ifdef STARPU_UNISTD_USE_COPY STARPU_HG_DISABLE_CHECKING(starpu_unistd_copy_works); #endif if (starpu_unistd_opened_files >= MAX_OPEN_FILES) { /* Too many opened files, avoid keeping this one opened */ close(descriptor); descriptor = -1; } else (void) STARPU_ATOMIC_ADD(&starpu_unistd_opened_files, 1); STARPU_PTHREAD_MUTEX_INIT(&obj->mutex, NULL); obj->descriptor = descriptor; obj->path = path; obj->size = size; } static int _starpu_unistd_reopen(struct starpu_unistd_global_obj *obj) { int id = open(obj->path, obj->flags); STARPU_ASSERT_MSG(id >= 0, "Reopening file %s failed: errno %d", obj->path, errno); return id; } static void _starpu_unistd_reclose(int id) { close(id); } static void _starpu_unistd_close(struct starpu_unistd_global_obj *obj) { if (obj->descriptor < 0) return; if (starpu_unistd_opened_files < MAX_OPEN_FILES) (void) STARPU_ATOMIC_ADD(&starpu_unistd_opened_files, -1); close(obj->descriptor); } static void _starpu_unistd_fini(struct starpu_unistd_global_obj *obj) { STARPU_PTHREAD_MUTEX_DESTROY(&obj->mutex); free(obj->path); obj->path = NULL; free(obj); } /* allocation memory on disk */ void *starpu_unistd_global_alloc(struct starpu_unistd_global_obj *obj, void *base, size_t size) { int id; struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; char *baseCpy = _starpu_mktemp_many(fileBase->path, TEMP_HIERARCHY_DEPTH, obj->flags, &id); /* fail */ if (!baseCpy) { free(obj); return NULL; } int val = _starpu_ftruncate(id,size); /* fail */ if (val < 0) { _STARPU_DISP("Could not truncate file, ftruncate failed with error '%s'\n", strerror(errno)); close(id); unlink(baseCpy); free(baseCpy); free(obj); return NULL; } _starpu_unistd_init(obj, id, baseCpy, size); return obj; } /* free memory on disk */ void starpu_unistd_global_free(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; _starpu_unistd_close(tmp); unlink(tmp->path); _starpu_rmtemp_many(tmp->path, TEMP_HIERARCHY_DEPTH); _starpu_unistd_fini(tmp); } /* open an existing memory on disk */ void *starpu_unistd_global_open(struct starpu_unistd_global_obj *obj, void *base, void *pos, size_t size) { struct starpu_unistd_base *fileBase = (struct starpu_unistd_base *) base; /* create template */ char *baseCpy; _STARPU_MALLOC(baseCpy, strlen(fileBase->path)+1+strlen(pos)+1); snprintf(baseCpy, strlen(fileBase->path)+1+strlen(pos)+1, "%s/%s", fileBase->path, (char *)pos); int id = open(baseCpy, obj->flags); if (id < 0) { free(obj); free(baseCpy); return NULL; } _starpu_unistd_init(obj, id, baseCpy, size); return obj; } /* free memory without delete it */ void starpu_unistd_global_close(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, size_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; _starpu_unistd_close(tmp); _starpu_unistd_fini(tmp); } /* read the memory disk */ int starpu_unistd_global_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size) { struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; starpu_ssize_t nb; int fd = tmp->descriptor; starpu_ssize_t bytes_to_write = size; #ifdef HAVE_PREAD if (fd >= 0) { while (bytes_to_write > 0) { nb = pread(fd, buf, bytes_to_write, offset); STARPU_ASSERT_MSG(nb >= 0, "Starpu Disk unistd pread failed: size %lu got errno %d", (unsigned long) size, errno); bytes_to_write -= nb; buf = (char*) buf + nb; offset += nb; } } else #endif { if (tmp->descriptor >= 0) STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); else fd = _starpu_unistd_reopen(obj); int res = lseek(fd, offset, SEEK_SET); STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd lseek for read failed: offset %lu got errno %d", (unsigned long) offset, errno); while (bytes_to_write > 0) { nb = read(fd, buf, bytes_to_write); STARPU_ASSERT_MSG(nb >= 0, "Starpu Disk unistd read failed: offset %lu got errno %d", (unsigned long) offset, errno); bytes_to_write -= nb; buf = (char*) buf + nb; offset += nb; } if (tmp->descriptor >= 0) STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); else _starpu_unistd_reclose(fd); } return 0; } #if defined(HAVE_LIBAIO_H) void *starpu_unistd_global_async_read(void *base, void *obj, void *buf, off_t offset, size_t size) { struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; struct starpu_unistd_global_obj *tmp = obj; struct starpu_unistd_wait * event; _STARPU_CALLOC(event, 1,sizeof(*event)); event->type = STARPU_UNISTD_AIOCB; struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct iocb *iocb = &starpu_aiocb->iocb; starpu_aiocb->obj = obj; int fd = tmp->descriptor; int err; if (fd < 0) fd = _starpu_unistd_reopen(obj); starpu_aiocb->len = size; starpu_aiocb->finished = 0; starpu_aiocb->base = fileBase; io_prep_pread(iocb, fd, buf, size, offset); if ((err = io_submit(fileBase->ctx, 1, &iocb)) < 0) { _STARPU_DISP("Warning: io_submit returned %d (%s)\n", err, strerror(err)); if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); iocb = NULL; } struct starpu_unistd_aiocb_link *l; _STARPU_MALLOC(l, sizeof(*l)); l->aiocb = iocb; l->starpu_aiocb = starpu_aiocb; STARPU_PTHREAD_MUTEX_LOCK(&fileBase->mutex); HASH_ADD_PTR(fileBase->hashtable, aiocb, l); STARPU_PTHREAD_MUTEX_UNLOCK(&fileBase->mutex); return event; } #elif defined(HAVE_AIO_H) void *starpu_unistd_global_async_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size) { struct starpu_unistd_global_obj *tmp = obj; struct starpu_unistd_wait * event; _STARPU_CALLOC(event, 1,sizeof(*event)); event->type = STARPU_UNISTD_AIOCB; struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct aiocb *aiocb = &starpu_aiocb->aiocb; starpu_aiocb->obj = obj; int fd = tmp->descriptor; if (fd < 0) fd = _starpu_unistd_reopen(obj); aiocb->aio_fildes = fd; aiocb->aio_offset = offset; aiocb->aio_nbytes = size; aiocb->aio_buf = buf; aiocb->aio_reqprio = 0; aiocb->aio_lio_opcode = LIO_NOP; if (aio_read(aiocb) < 0) { _STARPU_DISP("Warning: aio_read returned %d (%s)\n", errno, strerror(errno)); if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); } return event; } #endif int starpu_unistd_global_full_read(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void **ptr, size_t *size, unsigned dst_node) { struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; int fd = tmp->descriptor; if (fd < 0) fd = _starpu_unistd_reopen(obj); #ifdef STARPU_HAVE_WINDOWS *size = _filelength(fd); #else struct stat st; int ret = fstat(fd, &st); STARPU_ASSERT(ret==0); *size = st.st_size; #endif if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); /* Allocated aligned buffer */ _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); return starpu_unistd_global_read(base, obj, *ptr, 0, *size); } /* write on the memory disk */ int starpu_unistd_global_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, const void *buf, off_t offset, size_t size) { struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; starpu_ssize_t res; int fd = tmp->descriptor; starpu_ssize_t bytes_to_write = size; #ifdef HAVE_PWRITE if (fd >= 0) { while (bytes_to_write > 0) { res = pwrite(fd, buf, bytes_to_write, offset); STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd pwrite failed: offset %lu got errno %d", (unsigned long) offset, errno); bytes_to_write -= res; buf = (char*) buf + res; offset += res; } } else #endif { if (tmp->descriptor >= 0) STARPU_PTHREAD_MUTEX_LOCK(&tmp->mutex); else fd = _starpu_unistd_reopen(obj); res = lseek(fd, offset, SEEK_SET); STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd lseek for write failed: offset %lu got errno %d", (unsigned long) offset, errno); while (bytes_to_write > 0) { res = write(fd, buf, bytes_to_write); STARPU_ASSERT_MSG(res >= 0, "Starpu Disk unistd write failed: offset %lu got errno %d", (unsigned long) offset, errno); bytes_to_write -= res; buf = (char*) buf + res; offset += res; } if (tmp->descriptor >= 0) STARPU_PTHREAD_MUTEX_UNLOCK(&tmp->mutex); else _starpu_unistd_reclose(fd); } return 0; } #if defined(HAVE_LIBAIO_H) void *starpu_unistd_global_async_write(void *base, void *obj, void *buf, off_t offset, size_t size) { struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; struct starpu_unistd_global_obj *tmp = obj; struct starpu_unistd_wait * event; _STARPU_CALLOC(event, 1,sizeof(*event)); event->type = STARPU_UNISTD_AIOCB; struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct iocb *iocb = &starpu_aiocb->iocb; starpu_aiocb->obj = obj; int fd = tmp->descriptor; int err; if (fd < 0) fd = _starpu_unistd_reopen(obj); starpu_aiocb->len = size; starpu_aiocb->finished = 0; starpu_aiocb->base = fileBase; io_prep_pwrite(iocb, fd, buf, size, offset); if ((err = io_submit(fileBase->ctx, 1, &iocb)) < 0) { _STARPU_DISP("Warning: io_submit returned %d (%s)\n", err, strerror(err)); if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); iocb = NULL; } struct starpu_unistd_aiocb_link *l; _STARPU_MALLOC(l, sizeof(*l)); l->aiocb = iocb; l->starpu_aiocb = starpu_aiocb; STARPU_PTHREAD_MUTEX_LOCK(&fileBase->mutex); HASH_ADD_PTR(fileBase->hashtable, aiocb, l); STARPU_PTHREAD_MUTEX_UNLOCK(&fileBase->mutex); return event; } #elif defined(HAVE_AIO_H) void *starpu_unistd_global_async_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size) { struct starpu_unistd_global_obj *tmp = obj; struct starpu_unistd_wait * event; _STARPU_CALLOC(event, 1,sizeof(*event)); event->type = STARPU_UNISTD_AIOCB; struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct aiocb *aiocb = &starpu_aiocb->aiocb; starpu_aiocb->obj = obj; int fd = tmp->descriptor; if (fd < 0) fd = _starpu_unistd_reopen(obj); aiocb->aio_fildes = fd; aiocb->aio_offset = offset; aiocb->aio_nbytes = size; aiocb->aio_buf = buf; aiocb->aio_reqprio = 0; aiocb->aio_lio_opcode = LIO_NOP; if (aio_write(aiocb) < 0) { _STARPU_DISP("Warning: aio_write returned %d (%s)\n", errno, strerror(errno)); if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); aiocb = NULL; } return event; } #endif int starpu_unistd_global_full_write(void *base STARPU_ATTRIBUTE_UNUSED, void *obj, void *ptr, size_t size) { struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; /* update file size to realise the next good full_read */ if(size != tmp->size) { int fd = tmp->descriptor; if (fd < 0) fd = _starpu_unistd_reopen(obj); int val = _starpu_ftruncate(fd,size); if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); STARPU_ASSERT(val == 0); tmp->size = size; } return starpu_unistd_global_write(base, obj, ptr, 0, size); } #if defined(HAVE_AIO_H) void * starpu_unistd_global_async_full_read (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node) { struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; int fd = tmp->descriptor; if (fd < 0) fd = _starpu_unistd_reopen(obj); #ifdef STARPU_HAVE_WINDOWS *size = _filelength(fd); #else struct stat st; int ret = fstat(fd, &st); STARPU_ASSERT(ret==0); *size = st.st_size; #endif #ifdef STARPU_LINUX_SYS /* on Linux, read() (and similar system calls) will transfer at most 0x7ffff000 bytes, see read(2) */ /* FIXME: make starpu_unistd_global_test_request and starpu_unistd_global_wait_request * resubmit an updated request whenever the request completion is truncated */ if (*size > 0x7ffff000) return NULL; #endif if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); /* Allocated aligned buffer */ _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); return starpu_unistd_global_async_read(base, obj, *ptr, 0, *size); } void * starpu_unistd_global_async_full_write (void * base, void * obj, void * ptr, size_t size) { struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj; #ifdef STARPU_LINUX_SYS /* on Linux, write() (and similar system calls) will transfer at most 0x7ffff000 bytes, see write(2) */ /* FIXME: make starpu_unistd_global_test_request and starpu_unistd_global_wait_request * resubmit an updated request whenever the request completion is truncated */ if (size > 0x7ffff000) return NULL; #endif /* update file size to realise the next good full_read */ if(size != tmp->size) { int fd = tmp->descriptor; if (fd < 0) fd = _starpu_unistd_reopen(obj); int val = _starpu_ftruncate(fd,size); if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); STARPU_ASSERT(val == 0); tmp->size = size; } return starpu_unistd_global_async_write(base, obj, ptr, 0, size); } #endif #ifdef STARPU_UNISTD_USE_COPY static void * starpu_unistd_internal_thread(void * arg) { struct starpu_unistd_copy_thread * internal_copy_thread = (struct starpu_unistd_copy_thread *) arg; while (internal_copy_thread->run || !starpu_unistd_work_copy_list_empty(&internal_copy_thread->list)) { STARPU_PTHREAD_MUTEX_LOCK(&internal_copy_thread->mutex); if (internal_copy_thread->run && starpu_unistd_work_copy_list_empty(&internal_copy_thread->list)) STARPU_PTHREAD_COND_WAIT(&internal_copy_thread->cond, &internal_copy_thread->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&internal_copy_thread->mutex); if (!starpu_unistd_work_copy_list_empty(&internal_copy_thread->list)) { STARPU_PTHREAD_MUTEX_LOCK(&internal_copy_thread->mutex); struct starpu_unistd_work_copy * work = starpu_unistd_work_copy_list_pop_back(&internal_copy_thread->list); STARPU_PTHREAD_MUTEX_UNLOCK(&internal_copy_thread->mutex); starpu_ssize_t ret = copy_file_range(work->fd_src, &work->off_src, work->fd_dst, &work->off_dst, work->len, work->flags); if (ret == -1 && (errno == ENOSYS || errno == EINVAL)) { void *buf; /* System call not supported, or glibc * compatibility layer does not work (e.g. * because we use O_DIRECT and glibc doesn't * align the buffer), avoid submitting more * copies. */ starpu_unistd_copy_works = 0; /* And do the copy by hand for this time */ starpu_malloc(&buf, work->len); ret = pread(work->fd_src, buf, work->len, work->off_src); STARPU_ASSERT_MSG(ret >= 0, "Reading failed (errno %d)", errno); STARPU_ASSERT_MSG((size_t) ret == work->len, "Reading failed (value %ld instead of %ld)", (long)ret, (long)work->len); ret = pwrite(work->fd_dst, buf, work->len, work->off_dst); STARPU_ASSERT_MSG(ret >= 0, "Writing failed (errno %d)", errno); STARPU_ASSERT_MSG((size_t) ret == work->len, "Writing failed (value %ld instead of %ld)", (long)ret, (long)work->len); starpu_free_noflag(buf, work->len); } else { STARPU_ASSERT_MSG(ret >= 0, "Copy_file_range failed (errno %d)", errno); STARPU_ASSERT_MSG((size_t) ret == work->len, "Copy_file_range failed (value %ld instead of %ld)", (long)ret, (long)work->len); } starpu_sem_post(&work->finished); /* Don't free work, it's done when tested/waited are completed */ } } return NULL; } static void initialize_working_thread(struct starpu_unistd_copy_thread *internal_copy_thread) { STARPU_PTHREAD_MUTEX_INIT(&internal_copy_thread->mutex, NULL); STARPU_PTHREAD_COND_INIT(&internal_copy_thread->cond, NULL); internal_copy_thread->run = 1; starpu_unistd_work_copy_list_init(&internal_copy_thread->list); STARPU_PTHREAD_CREATE(&internal_copy_thread->thread, NULL, starpu_unistd_internal_thread, internal_copy_thread); } #endif /* create a new copy of parameter == base */ void *starpu_unistd_global_plug(void *parameter, starpu_ssize_t size STARPU_ATTRIBUTE_UNUSED) { struct starpu_unistd_base * base; struct stat buf; _STARPU_MALLOC(base, sizeof(*base)); base->created = 0; base->path = strdup((char *) parameter); STARPU_ASSERT(base->path); if (!(stat(base->path, &buf) == 0 && S_ISDIR(buf.st_mode))) { _starpu_mkpath(base->path, S_IRWXU); base->created = 1; } #if defined(HAVE_LIBAIO_H) STARPU_PTHREAD_MUTEX_INIT(&base->mutex, NULL); base->hashtable = NULL; unsigned nb_event = MAX_PENDING_REQUESTS_PER_NODE + MAX_PENDING_PREFETCH_REQUESTS_PER_NODE + MAX_PENDING_IDLE_REQUESTS_PER_NODE; memset(&base->ctx, 0, sizeof(base->ctx)); int ret = io_setup(nb_event, &base->ctx); STARPU_ASSERT(ret == 0); #endif #ifdef STARPU_UNISTD_USE_COPY base->disk_index = starpu_unistd_nb_disk_opened; starpu_unistd_nb_disk_opened++; unsigned i; for (i = 0; i < starpu_unistd_nb_disk_opened; i++) { initialize_working_thread(©_thread[i][base->disk_index]); /* don't initialize twice this case */ if (i != base->disk_index) initialize_working_thread(©_thread[base->disk_index][i]); } #endif return (void *) base; } #ifdef STARPU_UNISTD_USE_COPY static void ending_working_thread(struct starpu_unistd_copy_thread *internal_copy_thread) { STARPU_PTHREAD_MUTEX_LOCK(&internal_copy_thread->mutex); internal_copy_thread->run = 0; STARPU_PTHREAD_COND_BROADCAST(&internal_copy_thread->cond); STARPU_PTHREAD_MUTEX_UNLOCK(&internal_copy_thread->mutex); STARPU_PTHREAD_JOIN(internal_copy_thread->thread, NULL); STARPU_PTHREAD_MUTEX_DESTROY(&internal_copy_thread->mutex); STARPU_PTHREAD_COND_DESTROY(&internal_copy_thread->cond); } #endif /* free memory allocated for the base */ void starpu_unistd_global_unplug(void *base) { struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; #if defined(HAVE_LIBAIO_H) STARPU_PTHREAD_MUTEX_DESTROY(&fileBase->mutex); io_destroy(fileBase->ctx); #endif if (fileBase->created) rmdir(fileBase->path); #ifdef STARPU_UNISTD_USE_COPY unsigned i; for (i = 0; i < fileBase->disk_index+1; i++) { ending_working_thread(©_thread[i][fileBase->disk_index]); /* don't uninitialize twice this case */ if (i != fileBase->disk_index) ending_working_thread(©_thread[fileBase->disk_index][i]); } starpu_unistd_nb_disk_opened--; #endif free(fileBase->path); free(fileBase); } int _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram(unsigned node, void *base) { int res; unsigned iter; double timing_slowness, timing_latency; double start; double end; struct starpu_unistd_base * fileBase = (struct starpu_unistd_base *) base; srand(time(NULL)); char *buf; starpu_malloc_flags((void *) &buf, STARPU_DISK_SIZE_MIN, 0); STARPU_ASSERT(buf != NULL); memset(buf, 0, STARPU_DISK_SIZE_MIN); /* allocate memory */ void *mem = _starpu_disk_alloc(node, STARPU_DISK_SIZE_MIN); /* fail to alloc */ if (mem == NULL) return 0; struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) mem; /* Measure upload slowness */ start = starpu_timing_now(); for (iter = 0; iter < NITER; ++iter) { int fd = tmp->descriptor; _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, 0, STARPU_DISK_SIZE_MIN, NULL); if (fd < 0) fd = _starpu_unistd_reopen(tmp); #ifdef STARPU_HAVE_WINDOWS res = _commit(fd); #else res = fsync(fd); #endif if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); STARPU_ASSERT_MSG(res == 0, "bandwidth computation failed"); } end = starpu_timing_now(); timing_slowness = end - start; /* free memory */ starpu_free_flags(buf, STARPU_DISK_SIZE_MIN, 0); starpu_malloc_flags((void *) &buf, MEM_SIZE, 0); STARPU_ASSERT(buf != NULL); memset(buf, 0, MEM_SIZE); /* Measure latency */ start = starpu_timing_now(); for (iter = 0; iter < NITER; ++iter) { int fd = tmp->descriptor; _starpu_disk_write(STARPU_MAIN_RAM, node, mem, buf, (rand() % (STARPU_DISK_SIZE_MIN/MEM_SIZE)) * MEM_SIZE, MEM_SIZE, NULL); if (fd < 0) fd = _starpu_unistd_reopen(tmp); #ifdef STARPU_HAVE_WINDOWS res = _commit(fd); #else res = fsync(fd); #endif if (tmp->descriptor < 0) _starpu_unistd_reclose(fd); STARPU_ASSERT_MSG(res == 0, "Latency computation failed"); } end = starpu_timing_now(); timing_latency = end - start; _starpu_disk_free(node, mem, STARPU_DISK_SIZE_MIN); starpu_free_flags(buf, MEM_SIZE, 0); _starpu_save_bandwidth_and_latency_disk((NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, (NITER/timing_slowness)*STARPU_DISK_SIZE_MIN, timing_latency/NITER, timing_latency/NITER, node, fileBase->path); return 1; } void starpu_unistd_global_wait_request(void *async_channel) { struct starpu_unistd_wait * event = async_channel; switch (event->type) { case STARPU_UNISTD_AIOCB : { #if defined(HAVE_LIBAIO_H) struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct io_event ev; int values = -1; int myerrno = EAGAIN; while(!starpu_aiocb->finished || (values <= 0 && (myerrno == EAGAIN || myerrno == EINTR))) { /* Wait the answer of the request timeout IS NULL */ values = io_getevents(starpu_aiocb->base->ctx, 1, 1, &ev, NULL); if (values < 0) myerrno = -values; if (values > 0) { //we may catch an other request... STARPU_PTHREAD_MUTEX_LOCK(&starpu_aiocb->base->mutex); struct starpu_unistd_aiocb_link *l = NULL; HASH_FIND_PTR(starpu_aiocb->base->hashtable, &ev.obj, l); STARPU_ASSERT(l != NULL); HASH_DEL(starpu_aiocb->base->hashtable, l); STARPU_PTHREAD_MUTEX_UNLOCK(&starpu_aiocb->base->mutex); struct starpu_unistd_aiocb *aiocb = l->starpu_aiocb; STARPU_ASSERT_MSG(ev.res == aiocb->len, "Aio request was truncated"); aiocb->finished = 1; free(l); } } #elif defined(HAVE_AIO_H) struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct aiocb *aiocb = &starpu_aiocb->aiocb; int values = -1; int ret, myerrno = EAGAIN; starpu_ssize_t size; while(values < 0 && (myerrno == EAGAIN || myerrno == EINTR)) { /* Wait the answer of the request TIMESTAMP IS NULL */ values = aio_suspend((const struct aiocb **) &aiocb, 1, NULL); myerrno = errno; } ret = aio_error(aiocb); STARPU_ASSERT_MSG(!ret, "aio_error returned %d", ret); size = aio_return(aiocb); STARPU_ASSERT(size == (starpu_ssize_t) aiocb->aio_nbytes); #endif break; } #ifdef STARPU_UNISTD_USE_COPY case STARPU_UNISTD_COPY : { starpu_sem_wait(&event->event.event_copy->finished); break; } #endif default : STARPU_ABORT_MSG(); break; } } int starpu_unistd_global_test_request(void *async_channel) { struct starpu_unistd_wait * event = async_channel; switch (event->type) { case STARPU_UNISTD_AIOCB : { #if defined(HAVE_LIBAIO_H) struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct io_event ev; struct timespec ts; int ret; if (starpu_aiocb->finished) return 1; memset(&ts, 0, sizeof(ts)); /* Test the answer of the request */ ret = io_getevents(starpu_aiocb->base->ctx, 0, 1, &ev, &ts); if (ret == 1) { //we may catch an other request... STARPU_PTHREAD_MUTEX_LOCK(&starpu_aiocb->base->mutex); struct starpu_unistd_aiocb_link *l = NULL; HASH_FIND_PTR(starpu_aiocb->base->hashtable, &ev.obj, l); STARPU_ASSERT(l != NULL); HASH_DEL(starpu_aiocb->base->hashtable, l); STARPU_PTHREAD_MUTEX_UNLOCK(&starpu_aiocb->base->mutex); struct starpu_unistd_aiocb *aiocb = l->starpu_aiocb; STARPU_ASSERT_MSG(ev.res == aiocb->len, "Aio request was truncated"); aiocb->finished = 1; free(l); if (starpu_aiocb->finished) return 1; } return 0; #elif defined(HAVE_AIO_H) struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct aiocb *aiocb = &starpu_aiocb->aiocb; int ret; #if defined(__GLIBC__) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 22)) /* glibc's aio_error was not threadsafe before glibc 2.22 */ struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 }; ret = aio_suspend((const struct aiocb **) &aiocb, 1, &ts); if (ret < 0 && (errno == EAGAIN || errno == EINTR)) return 0; STARPU_ASSERT_MSG(!ret, "aio_suspend returned %d %d\n", ret, errno); #endif starpu_ssize_t size; /* Test the answer of the request */ ret = aio_error(aiocb); if (ret == 0) { /* request is finished */ size = aio_return(aiocb); STARPU_ASSERT_MSG(size == (starpu_ssize_t) aiocb->aio_nbytes, "AIO op got %ld bytes instead of %ld bytes\n", (long) size, (long) aiocb->aio_nbytes); return 1; } if (ret == EINTR || ret == EINPROGRESS || ret == EAGAIN) return 0; /* an error occurred */ STARPU_ABORT_MSG("aio_error returned %d", ret); #endif break; } #ifdef STARPU_UNISTD_USE_COPY case STARPU_UNISTD_COPY : { return starpu_sem_trywait(&event->event.event_copy->finished) == 0; } #endif default : STARPU_ABORT_MSG(); break; } return 0; } void starpu_unistd_global_free_request(void *async_channel) { struct starpu_unistd_wait * event = async_channel; switch (event->type) { case STARPU_UNISTD_AIOCB : { #if defined(HAVE_LIBAIO_H) struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct iocb *iocb = &starpu_aiocb->iocb; if (starpu_aiocb->obj->descriptor < 0) _starpu_unistd_reclose(iocb->aio_fildes); free(event); #elif defined(HAVE_AIO_H) struct starpu_unistd_aiocb *starpu_aiocb = &event->event.event_aiocb; struct aiocb *aiocb = &starpu_aiocb->aiocb; if (starpu_aiocb->obj->descriptor < 0) _starpu_unistd_reclose(aiocb->aio_fildes); free(event); #endif break; } #ifdef STARPU_UNISTD_USE_COPY case STARPU_UNISTD_COPY : { starpu_sem_destroy(&event->event.event_copy->finished); int fd_src = event->event.event_copy->obj_src->descriptor; if (fd_src < 0) _starpu_unistd_reclose(event->event.event_copy->fd_src); int fd_dst = event->event.event_copy->obj_dst->descriptor; if (fd_dst < 0) _starpu_unistd_reclose(event->event.event_copy->fd_dst); starpu_unistd_work_copy_delete(event->event.event_copy); free(event); break; } #endif default : STARPU_ABORT_MSG(); break; } } #ifdef STARPU_UNISTD_USE_COPY void * starpu_unistd_global_copy(void *base_src, void* obj_src, off_t offset_src, void *base_dst, void* obj_dst, off_t offset_dst, size_t size) { struct starpu_unistd_global_obj * unistd_obj_src = obj_src; struct starpu_unistd_global_obj * unistd_obj_dst = obj_dst; struct starpu_unistd_base * unistd_base_src = base_src; struct starpu_unistd_base * unistd_base_dst = base_dst; if (starpu_unistd_copy_works == 0) /* It didn't work previously, don't bother submitting more. */ return NULL; struct starpu_unistd_wait * event; _STARPU_CALLOC(event, 1,sizeof(*event)); event->type = STARPU_UNISTD_COPY; int fd_src = unistd_obj_src->descriptor; if (fd_src < 0) fd_src = _starpu_unistd_reopen(obj_src); int fd_dst = unistd_obj_dst->descriptor; if (fd_dst < 0) fd_dst = _starpu_unistd_reopen(obj_dst); struct starpu_unistd_work_copy * work = starpu_unistd_work_copy_new(); work->fd_src = fd_src; work->fd_dst = fd_dst; work->obj_src = unistd_obj_src; work->obj_dst = unistd_obj_dst; work->off_src = offset_src; work->off_dst = offset_dst; work->len = size; /* currently not used by copy_file_range */ work->flags = 0; starpu_sem_init(&work->finished, 0, 0); event->event.event_copy = work; struct starpu_unistd_copy_thread * thread = ©_thread[unistd_base_src->disk_index][unistd_base_dst->disk_index]; STARPU_PTHREAD_MUTEX_LOCK(&thread->mutex); starpu_unistd_work_copy_list_push_front(&thread->list, work); STARPU_PTHREAD_COND_BROADCAST(&thread->cond); STARPU_PTHREAD_MUTEX_UNLOCK(&thread->mutex); return event; } #endif starpu-1.4.9+dfsg/src/core/disk_ops/unistd/disk_unistd_global.h000066400000000000000000000060251507764646700246640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DISK_UNISTD_GLOBAL_H__ #define __DISK_UNISTD_GLOBAL_H__ /** @file */ #include #ifdef __linux__ #include #endif #pragma GCC visibility push(hidden) #ifndef O_BINARY #define O_BINARY 0 #endif #define STARPU_UNISTD_USE_COPY 1 #if !defined(HAVE_COPY_FILE_RANGE) && !defined(__NR_copy_file_range) #undef STARPU_UNISTD_USE_COPY #endif #ifdef __linux__ typedef loff_t starpu_loff_t; #else typedef off_t starpu_loff_t; #endif struct starpu_unistd_global_obj { int descriptor; char * path; size_t size; int flags; starpu_pthread_mutex_t mutex; }; void * starpu_unistd_global_alloc (struct starpu_unistd_global_obj * obj, void *base, size_t size); void starpu_unistd_global_free (void *base, void *obj, size_t size); void * starpu_unistd_global_open (struct starpu_unistd_global_obj * obj, void *base, void *pos, size_t size); void starpu_unistd_global_close (void *base, void *obj, size_t size); int starpu_unistd_global_read (void *base, void *obj, void *buf, off_t offset, size_t size); int starpu_unistd_global_write (void *base, void *obj, const void *buf, off_t offset, size_t size); void * starpu_unistd_global_plug (void *parameter, starpu_ssize_t size); void starpu_unistd_global_unplug (void *base); int _starpu_get_unistd_global_bandwidth_between_disk_and_main_ram(unsigned node, void *base); void* starpu_unistd_global_async_read (void *base, void *obj, void *buf, off_t offset, size_t size); void* starpu_unistd_global_async_write (void *base, void *obj, void *buf, off_t offset, size_t size); void * starpu_unistd_global_async_full_write (void * base, void * obj, void * ptr, size_t size); void * starpu_unistd_global_async_full_read (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node); void starpu_unistd_global_wait_request(void * async_channel); int starpu_unistd_global_test_request(void * async_channel); void starpu_unistd_global_free_request(void * async_channel); int starpu_unistd_global_full_read(void *base, void * obj, void ** ptr, size_t * size, unsigned dst_node); int starpu_unistd_global_full_write (void * base, void * obj, void * ptr, size_t size); #ifdef STARPU_UNISTD_USE_COPY void * starpu_unistd_global_copy(void *base_src, void* obj_src, off_t offset_src, void *base_dst, void* obj_dst, off_t offset_dst, size_t size); #endif #pragma GCC visibility pop #endif starpu-1.4.9+dfsg/src/core/drivers.c000066400000000000000000000035541507764646700173600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include int starpu_driver_init(struct starpu_driver *d) { STARPU_ASSERT(d); struct _starpu_worker *worker = _starpu_get_worker_from_driver(d); if (worker->driver_ops == NULL) return -EINVAL; else return worker->driver_ops->init(worker); } int starpu_driver_run(struct starpu_driver *d) { if (!d) { _STARPU_DEBUG("Invalid argument\n"); return -EINVAL; } struct _starpu_worker *worker = _starpu_get_worker_from_driver(d); if (worker->driver_ops == NULL) return -EINVAL; else return worker->driver_ops->run(worker); } int starpu_driver_run_once(struct starpu_driver *d) { STARPU_ASSERT(d); struct _starpu_worker *worker = _starpu_get_worker_from_driver(d); if (worker->driver_ops == NULL) return -EINVAL; else return worker->driver_ops->run_once(worker); } int starpu_driver_deinit(struct starpu_driver *d) { STARPU_ASSERT(d); struct _starpu_worker *worker = _starpu_get_worker_from_driver(d); if (worker->driver_ops == NULL) return -EINVAL; else return worker->driver_ops->deinit(worker); } starpu-1.4.9+dfsg/src/core/drivers.h000066400000000000000000000030441507764646700173570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVERS_H__ #define __DRIVERS_H__ #pragma GCC visibility push(hidden) /** @file */ struct _starpu_driver_ops { int (*init)(struct _starpu_worker *worker); /**< Initialize the thread for running the worker */ int (*run)(struct _starpu_worker *worker); /**< Actually run the worker */ int (*run_once)(struct _starpu_worker *worker); /**< Run just one loop of the worker */ int (*deinit)(struct _starpu_worker *worker); /**< Deinitialize the thread after running a worker */ int (*set_devid)(struct starpu_driver *driver, struct _starpu_worker *worker); /**< Sets into \p driver the id for worker \p worker */ int (*is_devid)(struct starpu_driver *driver, struct _starpu_worker *worker); /**< Tests whether \p driver has the id for worker \p worker */ }; #pragma GCC visibility pop #endif // __DRIVERS_H__ starpu-1.4.9+dfsg/src/core/errorcheck.c000066400000000000000000000072271507764646700200320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include void _starpu_add_worker_status(struct _starpu_worker *worker, enum _starpu_worker_status_index st, struct timespec *time) { starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(worker->workerid, &sched_mutex, &sched_cond); STARPU_PTHREAD_MUTEX_LOCK_SCHED(sched_mutex); STARPU_ASSERT(!(worker->status & (1 << st))); if (starpu_profiling_status_get()) _starpu_worker_start_state(worker->workerid, st, time); worker->status |= (1 << st); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(sched_mutex); } void _starpu_add_local_worker_status(enum _starpu_worker_status_index st, struct timespec *time) { struct _starpu_worker *worker = _starpu_get_local_worker_key(); /* It is possible that we call this function from the application (and * thereforce outside a worker), for instance if we are executing the * callback function of a task with a "NULL" codelet. */ if (worker) _starpu_add_worker_status(worker, st, time); } void _starpu_clear_worker_status(struct _starpu_worker *worker, enum _starpu_worker_status_index st, struct timespec *time) { starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(worker->workerid, &sched_mutex, &sched_cond); STARPU_PTHREAD_MUTEX_LOCK_SCHED(sched_mutex); STARPU_ASSERT((worker->status & (1 << st))); if (starpu_profiling_status_get()) _starpu_worker_stop_state(worker->workerid, st, time); worker->status &= ~(1 << st); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(sched_mutex); } void _starpu_clear_local_worker_status(enum _starpu_worker_status_index st, struct timespec *time) { struct _starpu_worker *worker = _starpu_get_local_worker_key(); /* It is possible that we call this function from the application (and * thereforce outside a worker), for instance if we are executing the * callback function of a task with a "NULL" codelet. */ if (worker) _starpu_clear_worker_status(worker, st, time); } enum _starpu_worker_status _starpu_get_local_worker_status(void) { struct _starpu_worker *worker = _starpu_get_local_worker_key(); if (STARPU_UNLIKELY(!worker)) return STATUS_INVALID; return worker->status; } /* It is forbidden to call blocking operations with Callback and during the * execution of a task. */ unsigned _starpu_worker_may_perform_blocking_calls(void) { enum _starpu_worker_status st = _starpu_get_local_worker_status(); #ifdef STARPU_OPENMP /* When the current task is an OpenMP task, we may need to block, * especially when unregistering data used by child tasks. However, * we don't want to blindly disable the check for non OpenMP tasks. */ const struct starpu_task * const task = starpu_task_get_current(); const int blocking_call_check_override = task && task->omp_task; #else /* STARPU_OPENMP */ const int blocking_call_check_override = 0; #endif /* STARPU_OPENMP */ return blocking_call_check_override || (st == STATUS_INVALID) || (!(st & STATUS_CALLBACK) && !(st & STATUS_EXECUTING)); } starpu-1.4.9+dfsg/src/core/errorcheck.h000066400000000000000000000063521507764646700200350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __ERRORCHECK_H__ #define __ERRORCHECK_H__ /** @file */ #include #pragma GCC visibility push(hidden) /** This type enumerates the actions that can be done by a worker. * Some can be happening during others, that is why * enum _starpu_worker_status * is a bitset indexed by the values of enum _starpu_worker_status_index. */ enum _starpu_worker_status_index { STATUS_INDEX_INITIALIZING = 0, STATUS_INDEX_EXECUTING, STATUS_INDEX_CALLBACK, STATUS_INDEX_WAITING, STATUS_INDEX_SLEEPING, STATUS_INDEX_SCHEDULING, STATUS_INDEX_NR, }; /** This type describes in which state a worker may be. */ enum _starpu_worker_status { /** invalid status (for instance if we request the status of some thread * that is not controlled by StarPU */ STATUS_INVALID = -1, /** Nothing particular, thus just overhead */ STATUS_UNKNOWN = 0, /** during the initialization */ STATUS_INITIALIZING = 1 << STATUS_INDEX_INITIALIZING, /** during the execution of a codelet */ STATUS_EXECUTING = 1 << STATUS_INDEX_EXECUTING, /** during the execution of the callback */ STATUS_CALLBACK = 1 << STATUS_INDEX_CALLBACK, /** while waiting for a data transfer */ STATUS_WAITING = 1 << STATUS_INDEX_WAITING, /** while sleeping because there is no task to do */ STATUS_SLEEPING = 1 << STATUS_INDEX_SLEEPING, /** while executing the scheduler code */ STATUS_SCHEDULING = 1 << STATUS_INDEX_SCHEDULING, }; struct _starpu_worker; /** Specify what the local worker is currently doing (eg. executing a callback). * This permits to detect if this is legal to do a blocking call for instance. */ void _starpu_add_worker_status(struct _starpu_worker *worker, enum _starpu_worker_status_index st, struct timespec *time); void _starpu_add_local_worker_status(enum _starpu_worker_status_index st, struct timespec *time); /** Clear the fact that the local worker was currently doing something(eg. executing a callback). */ void _starpu_clear_worker_status(struct _starpu_worker *worker, enum _starpu_worker_status_index st, struct timespec *time); void _starpu_clear_local_worker_status(enum _starpu_worker_status_index st, struct timespec *time); /** Indicate what type of operation the worker is currently doing. */ enum _starpu_worker_status _starpu_get_local_worker_status(void); /** It is forbidden to do blocking calls during some operations such as callback * or during the execution of a task. This function indicates whether it is * legal to call a blocking operation in the current context. */ unsigned _starpu_worker_may_perform_blocking_calls(void); #pragma GCC visibility pop #endif // __ERRORCHECK_H__ starpu-1.4.9+dfsg/src/core/idle_hook.c000066400000000000000000000055421507764646700176360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #define NMAXHOOKS 16 struct progression_hook { unsigned (*func)(void *arg); void *arg; unsigned active; }; /* protect the hook table */ static starpu_pthread_rwlock_t idle_hook_rwlock; static struct progression_hook idle_hooks[NMAXHOOKS] = {{NULL, NULL, 0}}; static int active_idle_hook_cnt = 0; /* * Statically initializing idle_hook_rwlock seems to lead to weird errors * on Darwin, so we do it dynamically. */ void _starpu_init_idle_hooks(void) { STARPU_PTHREAD_RWLOCK_INIT(&idle_hook_rwlock, NULL); STARPU_HG_DISABLE_CHECKING(active_idle_hook_cnt); } int starpu_idle_hook_register(unsigned (*func)(void *arg), void *arg) { int hook; STARPU_PTHREAD_RWLOCK_WRLOCK(&idle_hook_rwlock); for (hook = 0; hook < NMAXHOOKS; hook++) { if (!idle_hooks[hook].active) { /* We found an empty slot */ idle_hooks[hook].func = func; idle_hooks[hook].arg = arg; idle_hooks[hook].active = 1; active_idle_hook_cnt++; STARPU_PTHREAD_RWLOCK_UNLOCK(&idle_hook_rwlock); return hook; } } STARPU_PTHREAD_RWLOCK_UNLOCK(&idle_hook_rwlock); starpu_wake_all_blocked_workers(); /* We could not find an empty slot */ return -1; } void starpu_idle_hook_deregister(int hook_id) { STARPU_PTHREAD_RWLOCK_WRLOCK(&idle_hook_rwlock); if (idle_hooks[hook_id].active) active_idle_hook_cnt--; idle_hooks[hook_id].active = 0; STARPU_PTHREAD_RWLOCK_UNLOCK(&idle_hook_rwlock); } unsigned _starpu_execute_registered_idle_hooks(void) { if (active_idle_hook_cnt == 0) return 1; /* By default, it is possible to block, but if some idle hooks * requires that it's not blocking, we disable blocking. */ unsigned may_block = 1; unsigned hook; for (hook = 0; hook < NMAXHOOKS; hook++) { unsigned active; STARPU_PTHREAD_RWLOCK_RDLOCK(&idle_hook_rwlock); active = idle_hooks[hook].active; STARPU_PTHREAD_RWLOCK_UNLOCK(&idle_hook_rwlock); unsigned may_block_hook = 1; if (active) may_block_hook = idle_hooks[hook].func(idle_hooks[hook].arg); /* As soon as one hook tells that the driver cannot be * blocking, we don't allow it. */ if (!may_block_hook) may_block = 0; } return may_block; } starpu-1.4.9+dfsg/src/core/idle_hook.h000066400000000000000000000016511507764646700176400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __IDLE_HOOK_H__ #define __IDLE_HOOK_H__ #pragma GCC visibility push(hidden) /** @file */ void _starpu_init_idle_hooks(void); unsigned _starpu_execute_registered_idle_hooks(void); #pragma GCC visibility pop #endif /* !__IDLE_HOOK_H__ */ starpu-1.4.9+dfsg/src/core/jobs.c000066400000000000000000001011631507764646700166320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int max_memory_use; static int task_progress; static unsigned long njobs_finished; static unsigned long njobs, maxnjobs; #ifdef STARPU_DEBUG /* List of all jobs, for debugging */ static struct _starpu_job_multilist_all_submitted all_jobs_list; static starpu_pthread_mutex_t all_jobs_list_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; #endif void _starpu_job_crash(); void _starpu_job_init(void) { max_memory_use = starpu_getenv_number_default("STARPU_MAX_MEMORY_USE", 0); task_progress = starpu_getenv_number_default("STARPU_TASK_PROGRESS", 0); #ifdef STARPU_DEBUG _starpu_job_multilist_head_init_all_submitted(&all_jobs_list); #endif _starpu_crash_add_hook(&_starpu_job_crash); } void _starpu_job_memory_use(int check) { if (max_memory_use) { _STARPU_DISP("Memory used for %lu tasks: %lu MiB\n", maxnjobs, (unsigned long) (maxnjobs * (sizeof(struct starpu_task) + sizeof(struct _starpu_job))) >> 20); if (check) STARPU_ASSERT_MSG(njobs == 0, "Some tasks have not been cleaned, did you forget to call starpu_task_destroy or starpu_task_clean?"); } } void _starpu_job_crash() { _starpu_job_memory_use(0); } void _starpu_job_fini(void) { _starpu_job_memory_use(1); } void _starpu_exclude_task_from_dag(struct starpu_task *task) { struct _starpu_job *j = _starpu_get_job_associated_to_task(task); j->exclude_from_dag = 1; _STARPU_TRACE_TASK_EXCLUDE_FROM_DAG(j); } /* create an internal struct _starpu_job structure to encapsulate the task */ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_task *task) { struct _starpu_job *job; _STARPU_LOG_IN(); /* As most of the fields must be initialized at NULL, let's put 0 * everywhere */ _STARPU_CALLOC(job, 1, sizeof(*job)); if (task->dyn_handles) { _STARPU_MALLOC(job->dyn_ordered_buffers, STARPU_TASK_GET_NBUFFERS(task) * sizeof(job->dyn_ordered_buffers[0])); _STARPU_CALLOC(job->dyn_dep_slots, STARPU_TASK_GET_NBUFFERS(task), sizeof(job->dyn_dep_slots[0])); } job->task = task; if ( #if defined(STARPU_DEBUG) 1 #elif defined(STARPU_USE_FXT) fut_active #else _starpu_bound_recording || _starpu_task_break_on_push != -1 || _starpu_task_break_on_sched != -1 || _starpu_task_break_on_pop != -1 || _starpu_task_break_on_exec != -1 || STARPU_AYU_EVENT #endif ) { job->job_id = _starpu_fxt_get_job_id(); STARPU_AYU_ADDTASK(job->job_id, task); STARPU_ASSERT(job->job_id != ULONG_MAX); } if (max_memory_use) { unsigned long jobs = STARPU_ATOMIC_ADDL(&njobs, 1); if (jobs > maxnjobs) maxnjobs = jobs; } _starpu_cg_list_init0(&job->job_successors); STARPU_PTHREAD_MUTEX_INIT0(&job->sync_mutex, NULL); STARPU_PTHREAD_COND_INIT0(&job->sync_cond, NULL); /* By default we have sequential tasks */ job->task_size = 1; job->workerid = -1; if (task->use_tag) _starpu_tag_declare(task->tag_id, job); if (_starpu_graph_record) _starpu_graph_add_job(job); _STARPU_LOG_OUT(); return job; } struct _starpu_job* _starpu_get_job_associated_to_task_slow(struct starpu_task *task, struct _starpu_job *job) { if (job == _STARPU_JOB_UNSET) { job = STARPU_VAL_COMPARE_AND_SWAP_PTR(&task->starpu_private, _STARPU_JOB_UNSET, _STARPU_JOB_SETTING); if (job != _STARPU_JOB_UNSET && job != _STARPU_JOB_SETTING) { /* Actually available in the meanwhile */ STARPU_RMB(); return job; } if (job == _STARPU_JOB_UNSET) { /* Ok, we have to do it */ job = _starpu_job_create(task); STARPU_WMB(); task->starpu_private = job; return job; } } /* Saw _STARPU_JOB_SETTING, somebody is doing it, wait for it. * This is rare enough that busy-reading is fine enough. */ while ((job = *(struct _starpu_job *volatile*) &task->starpu_private) == _STARPU_JOB_SETTING) { STARPU_UYIELD(); STARPU_SYNCHRONIZE(); } STARPU_RMB(); return job; } void _starpu_job_destroy(struct _starpu_job *j) { /* Wait for any code that was still working on the job (and was * probably our waker) */ STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); STARPU_PTHREAD_COND_DESTROY(&j->sync_cond); STARPU_PTHREAD_MUTEX_DESTROY(&j->sync_mutex); if (j->task_size > 1) { STARPU_PTHREAD_BARRIER_DESTROY(&j->before_work_barrier); STARPU_PTHREAD_BARRIER_DESTROY(&j->after_work_barrier); STARPU_ASSERT(j->after_work_busy_barrier == 0); } _starpu_cg_list_deinit(&j->job_successors); if (j->dyn_ordered_buffers) { free(j->dyn_ordered_buffers); j->dyn_ordered_buffers = NULL; } if (j->dyn_dep_slots) { free(j->dyn_dep_slots); j->dyn_dep_slots = NULL; } if (_starpu_graph_record && j->graph_node) _starpu_graph_drop_job(j); if (max_memory_use) (void) STARPU_ATOMIC_ADDL(&njobs, -1); free(j); } int _starpu_job_finished(struct _starpu_job *j) { int ret; STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); ret = j->terminated == 2; STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); return ret; } void _starpu_wait_job(struct _starpu_job *j) { STARPU_ASSERT(j->task); STARPU_ASSERT(!j->task->detach); _STARPU_LOG_IN(); STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); /* We wait for the flag to have a value of 2 which means that both the * codelet's implementation and its callback have been executed. That * way, _starpu_wait_job won't return until the entire task was really * executed (so that we cannot destroy the task while it is still being * manipulated by the driver). */ while (j->terminated != 2) { STARPU_PTHREAD_COND_WAIT(&j->sync_cond, &j->sync_mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); _STARPU_LOG_OUT(); } #ifdef STARPU_OPENMP int _starpu_test_job_termination(struct _starpu_job *j) { STARPU_ASSERT(j->task); STARPU_ASSERT(!j->task->detach); /* Disable Helgrind race complaint, since we really just want to poll j->terminated */ if (STARPU_RUNNING_ON_VALGRIND) { int v = STARPU_PTHREAD_MUTEX_TRYLOCK(&j->sync_mutex); if (v != EBUSY) { STARPU_ASSERT(v == 0); int ret = (j->terminated == 2); STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); return ret; } else { return 0; } } else { STARPU_SYNCHRONIZE(); return j->terminated == 2; } } void _starpu_job_prepare_for_continuation_ext(struct _starpu_job *j, unsigned continuation_resubmit, void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg) { STARPU_ASSERT(!j->continuation); /* continuation are not supported for parallel tasks for now */ STARPU_ASSERT(j->task_size == 1); j->continuation = 1; j->continuation_resubmit = continuation_resubmit; j->continuation_callback_on_sleep = continuation_callback_on_sleep; j->continuation_callback_on_sleep_arg = continuation_callback_on_sleep_arg; j->job_successors.ndeps = 0; j->job_successors.ndeps_completed = 0; } /* Prepare a currently running job for accepting a new set of * dependencies in anticipation of becoming a continuation. */ void _starpu_job_prepare_for_continuation(struct _starpu_job *j) { _starpu_job_prepare_for_continuation_ext(j, 1, NULL, NULL); } void _starpu_job_set_omp_cleanup_callback(struct _starpu_job *j, void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg) { j->omp_cleanup_callback = omp_cleanup_callback; j->omp_cleanup_callback_arg = omp_cleanup_callback_arg; } #endif void _starpu_handle_job_submission(struct _starpu_job *j) { /* Need to atomically set submitted to 1 and check dependencies, since * this is concucrent with _starpu_notify_cg */ j->terminated = 0; if (!j->submitted) j->submitted = 1; else j->submitted = 2; #ifdef STARPU_DEBUG STARPU_PTHREAD_MUTEX_LOCK(&all_jobs_list_mutex); _starpu_job_multilist_push_back_all_submitted(&all_jobs_list, j); STARPU_PTHREAD_MUTEX_UNLOCK(&all_jobs_list_mutex); #endif } void starpu_task_end_dep_release(struct starpu_task *t) { struct _starpu_job *j = _starpu_get_job_associated_to_task(t); #ifdef STARPU_USE_FXT struct starpu_task *current = starpu_task_get_current(); if (current) { struct _starpu_job *jcurrent = _starpu_get_job_associated_to_task(current); _STARPU_TRACE_TASK_END_DEP(jcurrent, j); } #endif _starpu_handle_job_termination(j); } void starpu_task_end_dep_add(struct starpu_task *t, int nb_deps) { struct _starpu_job *j = _starpu_get_job_associated_to_task(t); STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); t->nb_termination_call_required += nb_deps; STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); } void _starpu_handle_job_termination(struct _starpu_job *j) { if (j->task->nb_termination_call_required != 0) { STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); int nb = j->task->nb_termination_call_required; j->task->nb_termination_call_required -= 1; STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); if (nb != 0) return; } if (task_progress) { unsigned long jobs = STARPU_ATOMIC_ADDL(&njobs_finished, 1); fprintf(stderr,"\r%lu tasks finished (last %lu %p on %d)...", jobs, j->job_id, j->task, starpu_worker_get_id()); } struct starpu_task *task = j->task; struct starpu_task *end_rdep = NULL; unsigned sched_ctx = task->sched_ctx; double flops = task->flops; const unsigned continuation = #ifdef STARPU_OPENMP j->continuation #else 0 #endif ; if (!continuation) { void (*epilogue_callback)(void *) = task->epilogue_callback_func; /* the epilogue callback is executed before the dependencies release*/ if (epilogue_callback) { enum _starpu_worker_status old_status = _starpu_get_local_worker_status(); /* so that we can check whether we are doing blocking calls * within the callback */ if (!(old_status & STATUS_CALLBACK)) _starpu_add_local_worker_status(STATUS_INDEX_CALLBACK, NULL); /* Perhaps we have nested callbacks (eg. with chains of empty * tasks). So we store the current task and we will restore it * later. */ struct starpu_task *current_task = starpu_task_get_current(); _starpu_set_current_task(task); _STARPU_TRACE_START_CALLBACK(j); epilogue_callback(task->epilogue_callback_arg); _STARPU_TRACE_END_CALLBACK(j); _starpu_set_current_task(current_task); if (!(old_status & STATUS_CALLBACK)) _starpu_clear_local_worker_status(STATUS_INDEX_CALLBACK, NULL); } } #ifdef STARPU_DEBUG STARPU_PTHREAD_MUTEX_LOCK(&all_jobs_list_mutex); _starpu_job_multilist_erase_all_submitted(&all_jobs_list, j); STARPU_PTHREAD_MUTEX_UNLOCK(&all_jobs_list_mutex); #endif STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); STARPU_ASSERT(task->status == STARPU_TASK_RUNNING); #ifdef STARPU_OPENMP if (continuation) { task->status = STARPU_TASK_STOPPED; } else #endif { task->status = STARPU_TASK_FINISHED; /* already prepare for next run */ struct _starpu_cg_list *job_successors = &j->job_successors; job_successors->ndeps_completed = 0; /* We must have set the j->terminated flag early, so that it is * possible to express task dependencies within the callback * function. A value of 1 means that the codelet was executed but that * the callback is not done yet. */ j->terminated = 1; end_rdep = j->end_rdep; } STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); #ifdef STARPU_USE_SC_HYPERVISOR size_t data_size = 0; #endif //STARPU_USE_SC_HYPERVISOR /* We release handle reference count */ if (task->cl && !continuation #ifdef STARPU_BUBBLE && !j->is_bubble #endif ) { unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); #ifdef STARPU_USE_SC_HYPERVISOR for(i = 0; i < nbuffers; i++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); if (handle != NULL) data_size += _starpu_data_get_size(handle); } #endif //STARPU_USE_SC_HYPERVISOR for (i = 0; i < nbuffers; i++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); _starpu_spin_lock(&handle->header_lock); handle->busy_count--; if (!_starpu_data_check_not_busy(handle)) _starpu_spin_unlock(&handle->header_lock); } } /* Check nowhere before releasing the sequential consistency (which may * unregister the handle and free its switch_cl, and thus task->cl here. */ unsigned nowhere = !task->cl || task->cl->where == STARPU_NOWHERE || task->where == STARPU_NOWHERE; /* If the job was executed on a combined worker there is no need for the * scheduler to process it : the task structure doesn't contain any valuable * data as it's not linked to an actual worker */ /* control task should not execute post_exec_hook */ if(j->task_size == 1 && !nowhere && !j->internal #ifdef STARPU_OPENMP /* If this is a continuation, we do not execute the post_exec_hook. The * post_exec_hook will be run only when the continued task fully * completes. * * Note: If needed, a specific hook could be added to handle stopped * tasks */ && !continuation #endif ) { _starpu_sched_post_exec_hook(task); #ifdef STARPU_USE_SC_HYPERVISOR int workerid = starpu_worker_get_id(); _starpu_sched_ctx_post_exec_task_cb(workerid, task, data_size, j->footprint); #endif //STARPU_USE_SC_HYPERVISOR } /* Remove ourself from the graph before notifying dependencies */ if (_starpu_graph_record) _starpu_graph_drop_job(j); /* Get callback pointer for codelet before notifying dependencies, in case dependencies free the codelet (see starpu_data_unregister for instance) */ void (*callback)(void *) = task->callback_func; if (!callback && task->cl) callback = task->cl->callback_func; /* If this is a continuation, we do not release task dependencies now. * Task dependencies will be released only when the continued task * fully completes */ if (!continuation) { /* Tell other tasks that we don't exist any more, thus no need for * implicit dependencies any more. */ _starpu_release_task_enforce_sequential_consistency(j); } /* Task does not have a cl, but has explicit data dependencies, we need * to tell them that we will not exist any more before notifying the * tasks waiting for us * * For continuations, implicit dependency handles are only released * when the task fully completes */ if (j->implicit_dep_handle && !continuation) { starpu_data_handle_t handle = j->implicit_dep_handle; _starpu_release_data_enforce_sequential_consistency(j->task, &j->implicit_dep_slot, handle); /* Release reference taken while setting implicit_dep_handle */ _starpu_spin_lock(&handle->header_lock); handle->busy_count--; if (!_starpu_data_check_not_busy(handle)) _starpu_spin_unlock(&handle->header_lock); } if (!continuation) { /* If this is a continuation, we do not notify task/tag dependencies * now. Task/tag dependencies will be notified only when the continued * task fully completes */ /* in case there are dependencies, wake up the proper tasks */ if (end_rdep) starpu_task_end_dep_release(end_rdep); _starpu_notify_dependencies(j); /* If this is a continuation, we do not execute the callback * now. The callback will be executed only when the continued * task fully completes */ /* the callback is executed after the dependencies so that we may remove the tag * of the task itself */ if (callback) { struct timespec *time = NULL; int profiling = starpu_profiling_status_get(); if (profiling && task->profiling_info) { time = &task->profiling_info->callback_start_time; _starpu_clock_gettime(time); } enum _starpu_worker_status old_status = _starpu_get_local_worker_status(); /* so that we can check whether we are doing blocking calls * within the callback */ if (!(old_status & STATUS_CALLBACK)) _starpu_add_local_worker_status(STATUS_INDEX_CALLBACK, time); /* Perhaps we have nested callbacks (eg. with chains of empty * tasks). So we store the current task and we will restore it * later. */ struct starpu_task *current_task = starpu_task_get_current(); _starpu_set_current_task(task); _STARPU_TRACE_START_CALLBACK(j); callback(task->callback_arg); _STARPU_TRACE_END_CALLBACK(j); _starpu_set_current_task(current_task); if (profiling && task->profiling_info) { time = &task->profiling_info->callback_end_time; _starpu_clock_gettime(time); } if (!(old_status & STATUS_CALLBACK)) _starpu_clear_local_worker_status(STATUS_INDEX_CALLBACK, time); } } /* Note: For now, we keep the TASK_DONE trace event for continuation, * however we could add a specific event for stopped tasks if needed. */ _STARPU_TRACE_TASK_DONE(j); STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); /* NB: we do not save those values before the callback, in case the * application changes some parameters eventually (eg. a task may not * be generated if the application is terminated). */ unsigned destroy = task->destroy; unsigned detach = task->detach; unsigned regenerate = task->regenerate; unsigned synchronous = task->synchronous; if (!continuation) { #ifdef STARPU_OPENMP if (j->omp_cleanup_callback) { j->omp_cleanup_callback(j->omp_cleanup_callback_arg); j->omp_cleanup_callback = NULL; j->omp_cleanup_callback_arg = NULL; } #endif /* A value of 2 is put to specify that not only the codelet but * also the callback were executed. */ j->terminated = 2; } task->prefetched = 0; STARPU_PTHREAD_COND_BROADCAST(&j->sync_cond); STARPU_AYU_REMOVETASK(j->job_id); STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); /* we do not deallocate the job structure if some is going to * wait after the task */ if (detach && !continuation) { /* no one is going to synchronize with that task so we release * the data structures now. In case the job was already locked * by the caller, it is its responsibility to destroy the task. * */ if (destroy) _starpu_task_destroy(task); } /* A continuation is not much different from a regenerated task. */ if (regenerate || continuation) { STARPU_ASSERT_MSG((detach && !destroy && !synchronous) || continuation , "Regenerated task must be detached (was %u), and not have destroy=1 (was %u) or synchronous=1 (was %u)", detach, destroy, synchronous); STARPU_AYU_ADDTASK(j->job_id, j->exclude_from_dag?NULL:task); { #ifdef STARPU_OPENMP unsigned continuation_resubmit = j->continuation_resubmit; void (*continuation_callback_on_sleep)(void *arg) = j->continuation_callback_on_sleep; void *continuation_callback_on_sleep_arg = j->continuation_callback_on_sleep_arg; j->continuation_resubmit = 1; j->continuation_callback_on_sleep = NULL; j->continuation_callback_on_sleep_arg = NULL; if (!continuation || continuation_resubmit) #endif { /* We reuse the same job structure */ task->status = STARPU_TASK_BLOCKED; int ret = _starpu_submit_job(j, 0); STARPU_ASSERT(!ret); } #ifdef STARPU_OPENMP if (continuation && continuation_callback_on_sleep != NULL) { continuation_callback_on_sleep(continuation_callback_on_sleep_arg); } #endif } } _starpu_decrement_nready_tasks_of_sched_ctx(sched_ctx, flops); _starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx); struct _starpu_worker *worker; worker = _starpu_get_local_worker_key(); if (worker) { STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); if(worker->removed_from_ctx[sched_ctx] == 1 && worker->shares_tasks_lists[sched_ctx] == 1) { _starpu_worker_gets_out_of_ctx(sched_ctx, worker); worker->removed_from_ctx[sched_ctx] = 0; } STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } } /* This function is called when a new task is submitted to StarPU * it returns 1 if the tag deps are not fulfilled, 0 otherwise */ static unsigned _starpu_not_all_tag_deps_are_fulfilled(struct _starpu_job *j) { unsigned ret; if (!j->task->use_tag) { /* this task does not use tags, so we can go on */ return 0; } struct _starpu_tag *tag = j->tag; struct _starpu_cg_list *tag_successors = &tag->tag_successors; _starpu_spin_lock(&tag->lock); STARPU_ASSERT_MSG(tag->is_assigned == 1 || !tag_successors->ndeps, "a tag can be assigned only one task to wake (%llu had %u assigned tasks, and %u successors)", (unsigned long long) tag->id, tag->is_assigned, tag_successors->ndeps); if (tag_successors->ndeps != tag_successors->ndeps_completed) { tag->state = STARPU_BLOCKED; j->task->status = STARPU_TASK_BLOCKED_ON_TAG; ret = 1; } else { /* existing deps (if any) are fulfilled */ /* If the same tag is being signaled by several tasks, do not * clear a DONE state. If it's the same job submitted several * times with the same tag, we have to do it */ if (j->submitted == 2 || tag->state != STARPU_DONE) tag->state = STARPU_READY; /* already prepare for next run */ tag_successors->ndeps_completed = 0; ret = 0; } _starpu_spin_unlock(&tag->lock); return ret; } static unsigned _starpu_not_all_task_deps_are_fulfilled(struct _starpu_job *j) { unsigned ret; struct _starpu_cg_list *job_successors = &j->job_successors; if (!j->submitted || (job_successors->ndeps != job_successors->ndeps_completed)) { STARPU_ASSERT(j->task->status == STARPU_TASK_BLOCKED || j->task->status == STARPU_TASK_BLOCKED_ON_TAG); j->task->status = STARPU_TASK_BLOCKED_ON_TASK; ret = 1; } else { /* existing deps (if any) are fulfilled */ ret = 0; } return ret; } #ifdef STARPU_BUBBLE int _starpu_bubble_unpartition_data_if_needed(struct _starpu_job *j) { //_STARPU_DEBUG("[%s(%p)]\n", starpu_task_get_name(j->task), j->task); int unpartition_needed = 0; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); unsigned nhandle = 0; unsigned i; struct starpu_task *control_task = NULL; for (i = 0; i < nbuffers; i++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(j->task, i); enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(j->task, i); STARPU_PTHREAD_MUTEX_LOCK(&handle->unpartition_mutex); /** * Version A * * We create a control task with the required data * dependencies that will be automatically/magically * handled by _starpu_data_partition_access_submit() * called in _starpu_task_submit_head(). */ if (handle->nplans > 0) { if (unpartition_needed == 0) { control_task = starpu_task_create(); control_task->name = "ucontrol"; _starpu_task_declare_deps_array(j->task, 1, &control_task, 0); unpartition_needed = 1; } //STARPU_TASK_SET_HANDLE(control_task, handle, nhandle); control_task->handles[nhandle] = handle; //STARPU_TASK_SET_MODE(control_task, mode, nhandle); control_task->modes[nhandle] = mode; nhandle ++; } /** * Version B * * We find a way to call directly * _starpu_data_partition_access_submit() here, and we * (re-)plug the current task onto the last task * generated by * _starpu_data_partition_access_submit(). */ else { //_starpu_data_partition_access_submit(handle, (mode & STARPU_W) != 0); // + replug on the current task } STARPU_PTHREAD_MUTEX_UNLOCK(&handle->unpartition_mutex); } // No data has been partitioned, let's keep going if (unpartition_needed == 0) { return 0; } // Add the dependency on the unpartition tasks STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); j->task->status = STARPU_TASK_BLOCKED_ON_TASK; STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); STARPU_ASSERT(control_task); int ret = starpu_task_submit(control_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit(control_task)"); return 1; } static int _starpu_turn_task_into_bubble(struct _starpu_job *j) { if (j->already_turned_into_bubble) { /* * We have first checked all dependencies of the bubble, * and secondly checked in a second stage the additional * partition/unpartition dependencies */ STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); return 0; } j->already_turned_into_bubble = 1; //_STARPU_DEBUG("[%s(%p)]\n", starpu_task_get_name(j->task), j->task); if (j->is_bubble == 1) { STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); return 0; } else if (j->task->cl == NULL) { STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); return 0; } else { STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); return _starpu_bubble_unpartition_data_if_needed(j); } } void _starpu_bubble_execute(struct _starpu_job *j) { _STARPU_TRACE_BUBBLE(j); _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); _STARPU_TRACE_START_CODELET_BODY(j, 0, NULL, 0, 0); STARPU_ASSERT_MSG(j->task->bubble_gen_dag_func!=NULL || (j->task->cl && j->task->cl->bubble_gen_dag_func!=NULL), "task->bubble_gen_dag_func MUST be defined\n"); #ifdef STARPU_VERBOSE struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); unsigned long long timestamp = 1000000000ULL*tp.tv_sec + tp.tv_nsec; _STARPU_DEBUG("{%llu} [%s(%p)] Running bubble\n", timestamp, starpu_task_get_name(j->task), j->task); #endif if (j->task->bubble_gen_dag_func) j->task->bubble_gen_dag_func(j->task, j->task->bubble_gen_dag_func_arg); else j->task->cl->bubble_gen_dag_func(j->task, j->task->bubble_gen_dag_func_arg); j->task->where = STARPU_NOWHERE; _STARPU_TRACE_END_CODELET_BODY(j, 0, NULL, 0, 0); } #endif /* * In order, we enforce tag, task and data dependencies. The task is * passed to the scheduler only once all these constraints are fulfilled. * * The job mutex has to be taken for atomicity with task submission, and * is released here. */ unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j) { unsigned ret; _STARPU_LOG_IN(); /* enforce tag dependencies */ if (_starpu_not_all_tag_deps_are_fulfilled(j)) { STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); _STARPU_LOG_OUT_TAG("not_all_tag_deps_are_fulfilled"); return 0; } /* enforce task dependencies */ if (_starpu_not_all_task_deps_are_fulfilled(j)) { STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); _STARPU_LOG_OUT_TAG("not_all_task_deps_are_fulfilled"); return 0; } #ifdef STARPU_BUBBLE /* Wait for all dependencies at the correct level to be * fulfilled before adding missing partition/unpartition * * If partition/unpartition are submitted we will enter the if * case and come back later when these new dependencies are * fulfilled */ if (_starpu_turn_task_into_bubble(j)) { _STARPU_LOG_OUT_TAG("bubble"); return 0; } #else STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); #endif #ifdef STARPU_BUBBLE if (j->is_bubble == 1) { _starpu_bubble_execute(j); } else #endif { /* respect data concurrent access */ if (_starpu_concurrent_data_access(j)) { _STARPU_LOG_OUT_TAG("concurrent_data_access"); return 0; } } #ifdef STARPU_BUBBLE if (j->task->bubble_parent != 0) _STARPU_TRACE_BUBBLE_TASK_DEPS(j->task->bubble_parent, j); #endif ret = _starpu_push_task(j); _STARPU_LOG_OUT(); return ret; } /* Tag deps are already fulfilled */ unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j) { unsigned ret; /* enforce task dependencies */ if (_starpu_not_all_task_deps_are_fulfilled(j)) { STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); return 0; } #ifdef STARPU_BUBBLE if (_starpu_turn_task_into_bubble(j)) { _STARPU_LOG_OUT_TAG("bubble"); return 0; } #else STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); #endif #ifdef STARPU_BUBBLE if (j->is_bubble == 1) { _starpu_bubble_execute(j); } else #endif { /* respect data concurrent access */ if (_starpu_concurrent_data_access(j)) return 0; } #ifdef STARPU_BUBBLE if (j->task->bubble_parent != 0) _STARPU_TRACE_BUBBLE_TASK_DEPS(j->task->bubble_parent, j); #endif ret = _starpu_push_task(j); return ret; } #ifdef STARPU_OPENMP /* When waking up a continuation, we only enforce new task dependencies */ unsigned _starpu_reenforce_task_deps_and_schedule(struct _starpu_job *j) { unsigned ret; _STARPU_LOG_IN(); STARPU_ASSERT(j->discontinuous); /* enforce task dependencies */ if (_starpu_not_all_task_deps_are_fulfilled(j)) { STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); _STARPU_LOG_OUT_TAG("not_all_task_deps_are_fulfilled"); return 0; } STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); ret = _starpu_push_task(j); _STARPU_LOG_OUT(); return ret; } #endif unsigned _starpu_take_deps_and_schedule(struct _starpu_job *j) { unsigned ret; STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); /* Take references */ _starpu_submit_job_take_data_deps(j); #ifdef STARPU_BUBBLE if (j->task->bubble_parent != 0) _STARPU_TRACE_BUBBLE_TASK_DEPS(j->task->bubble_parent, j); #endif /* And immediately push task */ ret = _starpu_push_task(j); return ret; } /* This is called when a tag or task dependency is to be released. */ void _starpu_enforce_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data, int tag) { if (!j->submitted) /* It's not even submitted actually */ return; struct _starpu_cg_list *job_successors = &j->job_successors; /* tag is 1 when we got woken up by a tag dependency about to be * released, and thus we have to check the exact numbner of * dependencies. Otherwise it's a task dependency which is about to be * released. */ if (job_successors->ndeps != job_successors->ndeps_completed + 1 - tag) /* There are still other dependencies */ return; _starpu_enforce_data_deps_notify_job_ready_soon(j, data); } /* Ordered tasks are simply recorded as they arrive in the local_ordered_tasks * ring buffer, indexed by order, and pulled from its head. */ /* TODO: replace with perhaps a heap */ /* This function must be called with worker->sched_mutex taken */ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker) { struct starpu_task *task = NULL; if (worker->local_ordered_tasks_size) { task = worker->local_ordered_tasks[worker->current_ordered_task]; if (task) { worker->local_ordered_tasks[worker->current_ordered_task] = NULL; STARPU_ASSERT(task->workerorder == worker->current_ordered_task_order); /* Next ordered task is there, return it */ worker->current_ordered_task = (worker->current_ordered_task + 1) % worker->local_ordered_tasks_size; worker->current_ordered_task_order++; _starpu_pop_task_end(task); return task; } } if (!starpu_task_prio_list_empty(&worker->local_tasks)) task = starpu_task_prio_list_pop_front_highest(&worker->local_tasks); _starpu_pop_task_end(task); return task; } int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task) { /* Check that the worker is able to execute the task ! */ STARPU_ASSERT(task && task->cl); if (STARPU_UNLIKELY(!(worker->worker_mask & task->where))) return -ENODEV; starpu_worker_lock(worker->workerid); if (task->execute_on_a_specific_worker && task->workerorder) { STARPU_ASSERT_MSG(task->workerorder >= worker->current_ordered_task_order, "worker order values must not have duplicates (%u pushed to worker %d, but %u already passed)", task->workerorder, worker->workerid, worker->current_ordered_task_order); /* Put it in the ordered task ring */ unsigned needed = task->workerorder - worker->current_ordered_task_order + 1; if (worker->local_ordered_tasks_size < needed) { /* Increase the size */ unsigned alloc = worker->local_ordered_tasks_size; struct starpu_task **new; if (!alloc) alloc = 1; while (alloc < needed) alloc *= 2; _STARPU_MALLOC(new, alloc * sizeof(*new)); if (worker->local_ordered_tasks_size) { /* Put existing tasks at the beginning of the new ring */ unsigned copied = worker->local_ordered_tasks_size - worker->current_ordered_task; memcpy(new, &worker->local_ordered_tasks[worker->current_ordered_task], copied * sizeof(*new)); memcpy(new + copied, worker->local_ordered_tasks, (worker->local_ordered_tasks_size - copied) * sizeof(*new)); } memset(new + worker->local_ordered_tasks_size, 0, (alloc - worker->local_ordered_tasks_size) * sizeof(*new)); free(worker->local_ordered_tasks); worker->local_ordered_tasks = new; worker->local_ordered_tasks_size = alloc; worker->current_ordered_task = 0; } worker->local_ordered_tasks[(worker->current_ordered_task + task->workerorder - worker->current_ordered_task_order) % worker->local_ordered_tasks_size] = task; } else { starpu_task_prio_list_push_back(&worker->local_tasks, task); } starpu_wake_worker_locked(worker->workerid); starpu_push_task_end(task); starpu_worker_unlock(worker->workerid); return 0; } starpu-1.4.9+dfsg/src/core/jobs.h000066400000000000000000000256531507764646700166500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __JOBS_H__ #define __JOBS_H__ /** @file */ #include #include #include #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #include #include #include #include #include #pragma GCC visibility push(hidden) struct _starpu_worker; /** codelet function */ typedef void (*_starpu_cl_func_t)(void **, void *); #define _STARPU_MAY_PERFORM(j, arch) ((j)->task->where & STARPU_##arch) struct _starpu_data_descr { starpu_data_handle_t handle; enum starpu_data_access_mode mode; int orig_node; /** This is the original node in the codelet */ int node; /** This is the value actually chosen, only set by _starpu_fetch_task_input for coherency with _starpu_fetch_task_input_tail and __starpu_push_task_output */ int index; int orderedindex; /** For this field the array is actually indexed by parameter order, and this provides the ordered index */ }; #ifdef STARPU_DEBUG MULTILIST_CREATE_TYPE(_starpu_job, all_submitted) #endif /** A job is the internal representation of a task. */ struct _starpu_job { /** Each job is attributed a unique id. This however only defined when recording traces or using jobid-based task breakpoints */ unsigned long job_id; /** The task associated to that job */ struct starpu_task *task; /** A task that this will unlock quickly, e.g. we are the pre_sync part * of a data acquisition, and the caller promised that data release will * happen immediately, so that the post_sync task will be started * immediately after. */ struct _starpu_job *quick_next; /** These synchronization structures are used to wait for the job to be * available or terminated for instance. */ starpu_pthread_mutex_t sync_mutex; starpu_pthread_cond_t sync_cond; /** To avoid deadlocks, we reorder the different buffers accessed to by * the task so that we always grab the rw-lock associated to the * handles in the same order. */ struct _starpu_data_descr ordered_buffers[STARPU_NMAXBUFS]; struct _starpu_task_wrapper_dlist dep_slots[STARPU_NMAXBUFS]; struct _starpu_data_descr *dyn_ordered_buffers; struct _starpu_task_wrapper_dlist *dyn_dep_slots; /** If a tag is associated to the job, this points to the internal data * structure that describes the tag status. */ struct _starpu_tag *tag; /** Maintain a list of all the completion groups that depend on the job. * */ struct _starpu_cg_list job_successors; /** Task whose termination depends on this task */ struct starpu_task *end_rdep; /** For tasks with cl==NULL but submitted with explicit data dependency, * the handle for this dependency, so as to remove the task from the * last_writer/readers */ starpu_data_handle_t implicit_dep_handle; struct _starpu_task_wrapper_dlist implicit_dep_slot; /** Indicates whether the task associated to that job has already been * submitted to StarPU (1) or not (0) (using starpu_task_submit). * Becomes and stays 2 when the task is submitted several times. * * Protected by j->sync_mutex. */ unsigned submitted:2; /** Indicates whether the task associated to this job is terminated or * not. * * Protected by j->sync_mutex. */ unsigned terminated:2; #ifdef STARPU_OPENMP /** Job is a continuation or a regular task. */ unsigned continuation; /** If 0, the prepared continuation is not resubmitted automatically * when going to sleep, if 1, the prepared continuation is immediately * resubmitted when going to sleep. */ unsigned continuation_resubmit; /** Callback function called when: * - The continuation starpu task is ready to be submitted again if * continuation_resubmit = 0; * - The continuation starpu task has just been re-submitted if * continuation_resubmit = 1. */ void (*continuation_callback_on_sleep)(void *arg); void *continuation_callback_on_sleep_arg; void (*omp_cleanup_callback)(void *arg); void *omp_cleanup_callback_arg; /** Job has been stopped at least once. */ unsigned discontinuous; /** Cumulated execution time for discontinuous jobs */ struct timespec cumulated_ts; /** Cumulated energy consumption for discontinuous jobs */ double cumulated_energy_consumed; #endif /** The value of the footprint that identifies the job may be stored in * this structure. */ uint32_t footprint; unsigned footprint_is_computed:1; /** Should that task appear in the debug tools ? (eg. the DAG generated * with dot) */ unsigned exclude_from_dag:1; /** Is that task internal to StarPU? */ unsigned internal:1; /** Did that task use sequential consistency for its data? */ unsigned sequential_consistency:1; /** During the reduction of a handle, StarPU may have to submit tasks to * perform the reduction itself: those task should not be stalled while * other tasks are blocked until the handle has been properly reduced, * so we need a flag to differentiate them from "normal" tasks. */ unsigned reduction_task:1; /** The implementation associated to the job */ unsigned nimpl; /** Number of workers executing that task (>1 if the task is parallel) * */ int task_size; /** The worker the task is running on (or -1 when not running yet) */ int workerid; /** In case we have assigned this job to a combined workerid */ int combined_workerid; /** How many workers are currently running an alias of that job (for * parallel tasks only). */ int active_task_alias_count; struct bound_task *bound_task; /** Parallel workers may have to synchronize before/after the execution of a parallel task. */ starpu_pthread_barrier_t before_work_barrier; starpu_pthread_barrier_t after_work_barrier; unsigned after_work_busy_barrier; struct _starpu_graph_node *graph_node; #ifdef STARPU_DEBUG /** Linked-list of all jobs, for debugging */ struct _starpu_job_multilist_all_submitted all_submitted; #endif #ifdef STARPU_BUBBLE int already_turned_into_bubble; unsigned is_bubble:1; #endif }; #ifdef STARPU_DEBUG MULTILIST_CREATE_INLINES(struct _starpu_job, _starpu_job, all_submitted) #endif void _starpu_job_init(void); void _starpu_job_fini(void); /** Create an internal struct _starpu_job *structure to encapsulate the task. */ struct _starpu_job* _starpu_job_create(struct starpu_task *task) STARPU_ATTRIBUTE_MALLOC; /** Destroy the data structure associated to the job structure */ void _starpu_job_destroy(struct _starpu_job *j); /** Test for the termination of the job */ int _starpu_job_finished(struct _starpu_job *j); /** Wait for the termination of the job */ void _starpu_wait_job(struct _starpu_job *j); #ifdef STARPU_OPENMP /** Test for the termination of the job */ int _starpu_test_job_termination(struct _starpu_job *j); /** Prepare the job for accepting new dependencies before becoming a continuation. */ void _starpu_job_prepare_for_continuation_ext(struct _starpu_job *j, unsigned continuation_resubmit, void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg); void _starpu_job_prepare_for_continuation(struct _starpu_job *j); void _starpu_job_set_omp_cleanup_callback(struct _starpu_job *j, void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg); #endif /** Specify that the task should not appear in the DAG generated by debug tools. */ void _starpu_exclude_task_from_dag(struct starpu_task *task); /** try to submit job j, enqueue it if it's not schedulable yet. The job's sync mutex is supposed to be held already */ unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j); unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j); #ifdef STARPU_OPENMP /** When waking up a continuation, we only enforce new task dependencies */ unsigned _starpu_reenforce_task_deps_and_schedule(struct _starpu_job *j); #endif unsigned _starpu_take_deps_and_schedule(struct _starpu_job *j); void _starpu_enforce_deps_notify_job_ready_soon(struct _starpu_job *j, _starpu_notify_job_start_data *data, int tag); /** Called at the submission of the job */ void _starpu_handle_job_submission(struct _starpu_job *j); /** This function must be called after the execution of a job, this triggers all * job's dependencies and perform the callback function if any. */ void _starpu_handle_job_termination(struct _starpu_job *j); /** Get the sum of the size of the data accessed by the job. */ size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl, struct _starpu_job *j); /** Get a task from the local pool of tasks that were explicitly attributed to * that worker. */ struct starpu_task *_starpu_pop_local_task(struct _starpu_worker *worker); /** Put a task into the pool of tasks that are explicitly attributed to the * specified worker. */ int _starpu_push_local_task(struct _starpu_worker *worker, struct starpu_task *task); #define _STARPU_JOB_GET_ORDERED_BUFFER_INDEX(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].index : job->ordered_buffers[i].index) #define _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].handle : job->ordered_buffers[i].handle) #define _STARPU_JOB_GET_ORDERED_BUFFER_MODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].mode : job->ordered_buffers[i].mode) #define _STARPU_JOB_GET_ORDERED_BUFFER_NODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].node : job->ordered_buffers[i].node) #define _STARPU_JOB_GET_ORDERED_BUFFER_ORIG_NODE(job, i) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers[i].orig_node : job->ordered_buffers[i].orig_node) #define _STARPU_JOB_SET_ORDERED_BUFFER(job, buffer, i) do { if (job->dyn_ordered_buffers) job->dyn_ordered_buffers[i] = buffer; else job->ordered_buffers[i] = buffer;} while(0) #define _STARPU_JOB_GET_ORDERED_BUFFERS(job) ((job->dyn_ordered_buffers) ? job->dyn_ordered_buffers : &job->ordered_buffers[0]) #define _STARPU_JOB_GET_DEP_SLOTS(job) (((job)->dyn_dep_slots) ? (job)->dyn_dep_slots : (job)->dep_slots) #pragma GCC visibility pop #endif // __JOBS_H__ starpu-1.4.9+dfsg/src/core/parallel_task.c000066400000000000000000000041621507764646700205140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include struct starpu_task *starpu_task_dup(struct starpu_task *task) { struct starpu_task *task_dup; _STARPU_MALLOC(task_dup, sizeof(struct starpu_task)); /* TODO perhaps this is a bit too much overhead and we should only copy * part of the structure ? */ *task_dup = *task; return task_dup; } void starpu_parallel_task_barrier_init_n(struct starpu_task* task, int worker_size) { struct _starpu_job *j = _starpu_get_job_associated_to_task(task); j->task_size = worker_size; j->combined_workerid = -1; j->active_task_alias_count = 0; //fprintf(stderr, "POP -> size %d best_size %d\n", worker_size, best_size); STARPU_PTHREAD_BARRIER_INIT(&j->before_work_barrier, NULL, worker_size); STARPU_PTHREAD_BARRIER_INIT(&j->after_work_barrier, NULL, worker_size); j->after_work_busy_barrier = worker_size; return; } void starpu_parallel_task_barrier_init(struct starpu_task* task, int workerid) { /* The master needs to dispatch the task between the * different combined workers */ struct _starpu_combined_worker *combined_worker = _starpu_get_combined_worker_struct(workerid); int worker_size = combined_worker->worker_size; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); starpu_parallel_task_barrier_init_n(task, worker_size); j->combined_workerid = workerid; } starpu-1.4.9+dfsg/src/core/perfmodel/000077500000000000000000000000001507764646700175045ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/core/perfmodel/energy_model.c000066400000000000000000000203101507764646700223150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef STARPU_PAPI #include #endif #ifdef STARPU_HAVE_HWLOC #include #endif #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #include /* Driver porters: adding your driver here is optional, only needed for the support of energy profiling. */ #ifdef STARPU_USE_CUDA #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION #include #include #include #endif #endif #define ERROR_RETURN(retval, function) do { PAPI_perror(function); fprintf(stderr, "Error %d %s:line %d\n", retval,__FILE__,__LINE__); return(retval); } while (0) #if 0 #define debug(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define debug(fmt, ...) #endif #ifdef STARPU_PAPI #ifdef STARPU_HAVE_HWLOC static const int N_EVTS = 2; static int n_recorded; static int nsockets; static const char* event_names[] = { "rapl::RAPL_ENERGY_PKG:cpu=%d", "rapl::RAPL_ENERGY_DRAM:cpu=%d" }; static int add_event(int EventSet, int socket); /* PAPI variables*/ /*must be initialized to PAPI_NULL before calling PAPI_create_event*/ static int EventSet = PAPI_NULL; #endif #endif static double t1; #ifdef STARPU_USE_CUDA #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION static unsigned long long energy_begin, energy_end; static nvmlDevice_t device; #endif #endif int starpu_energy_start(int workerid STARPU_ATTRIBUTE_UNUSED, enum starpu_worker_archtype archi) { t1 = starpu_timing_now(); /* Driver porters: adding your driver here is optional, only needed for the support of energy measurement. */ switch (archi) { #ifdef STARPU_PAPI #ifdef STARPU_HAVE_HWLOC case STARPU_CPU_WORKER: { STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n"); int retval, number; struct _starpu_machine_config *config = _starpu_get_machine_config(); hwloc_topology_t topology = config->topology.hwtopology; nsockets = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE); if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) ERROR_RETURN(retval, "PAPI_library_init"); /* Creating the eventset */ if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) ERROR_RETURN(retval, "PAPI_create_eventset"); int i; for (i = 0 ; i < nsockets ; i ++) { /* return the index of socket */ hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, i); STARPU_ASSERT(obj); if ((retval = add_event(EventSet, obj->os_index)) != PAPI_OK) { if (retval == PAPI_EPERM) _STARPU_DISP("PAPI could not access counters due to permissions errors. Perhaps your system requires to run measurements as root?\n"); else if (retval == PAPI_ENOEVNT) _STARPU_DISP("PAPI could not access counters. Perhaps your system requires to run measurements as root?\n"); ERROR_RETURN(retval, "PAPI_add_named_event"); } } /* get the number of events in the event set */ number = 0; if ((retval = PAPI_list_events(EventSet, NULL, &number)) != PAPI_OK) ERROR_RETURN(retval, "PAPI_list_events"); debug("There are %d events in the event set\n", number); /* Start counting */ if ((retval = PAPI_start(EventSet)) != PAPI_OK) ERROR_RETURN(retval, "PAPI_start"); return retval; } #endif #endif #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION case STARPU_CUDA_WORKER: { if (!_starpu_nvmlDeviceGetHandleByIndex || !_starpu_nvmlDeviceGetTotalEnergyConsumption) return -1; STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n"); int devid = starpu_worker_get_devid(workerid); int ret = _starpu_nvmlDeviceGetHandleByIndex(devid, &device); if (ret != NVML_SUCCESS) { _STARPU_DISP("Could not get CUDA device %d from nvml\n", devid); return -1; } ret = _starpu_nvmlDeviceGetTotalEnergyConsumption(device, &energy_begin); if (ret != NVML_SUCCESS) { _STARPU_DISP("Could not measure energy used by CUDA device %d\n", devid); return -1; } return 0; } break; #endif default: printf("Error: worker is not supported ! \n"); return -1; } } int starpu_energy_stop(struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi) { double energy = 0.; int retval = 0; unsigned cpuid = 0; double t2 = starpu_timing_now(); double t STARPU_ATTRIBUTE_UNUSED = t2 - t1; switch (archi) { #ifdef STARPU_PAPI #ifdef STARPU_HAVE_HWLOC case STARPU_CPU_WORKER: { STARPU_ASSERT_MSG(workerid == -1, "For CPUs we cannot measure each worker separately, use where = STARPU_CPU and leave workerid as -1\n"); /*This is where we store the values we read from the eventset */ long long values[nsockets*n_recorded]; /* Stop counting and store the values into the array */ if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) ERROR_RETURN(retval, "PAPI_stop"); int k,s; for(s = 0 ; s < nsockets ; s ++) { for(k = 0 ; k < n_recorded; k++) { double delta = values[s * n_recorded + k]*0.23/1.0e9; energy += delta; debug("%-40s%12.6f J\t(for %f us, Average Power %.1fW)\n", event_names[k], delta, t, delta/(t*1.0E-6)); } } /*removes all events from a PAPI event set */ if ((retval = PAPI_cleanup_eventset(EventSet)) != PAPI_OK) ERROR_RETURN(retval, "PAPI_cleanup_eventset"); /*deallocates the memory associated with an empty PAPI EventSet*/ if ((retval = PAPI_destroy_eventset(&EventSet)) != PAPI_OK) ERROR_RETURN(retval, "PAPI_destroy_eventset"); break; } #endif #endif #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION case STARPU_CUDA_WORKER: { if (!_starpu_nvmlDeviceGetTotalEnergyConsumption) return -1; STARPU_ASSERT_MSG(workerid != -1, "For CUDA GPUs we measure each GPU separately, please specify a worker\n"); int ret = _starpu_nvmlDeviceGetTotalEnergyConsumption(device, &energy_end); if (ret != NVML_SUCCESS) return -1; energy = (energy_end - energy_begin) / 1000.; debug("energy consumption on device %d is %f mJ (for %f us, Average power %0.1fW)\n", 0, energy * 1000., t, energy / (t*1.0E-6)); break; } #endif default: { printf("Error: worker type %d is not supported! \n", archi); return -1; } } struct starpu_perfmodel_arch *arch; if (workerid == -1) /* Just take one of them */ workerid = starpu_worker_get_by_type(archi, 0); arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS); starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, energy / ntasks, ntasks); return retval; } #ifdef STARPU_PAPI #ifdef STARPU_HAVE_HWLOC static int add_event(int eventSet, int socket) { int retval, i; for (i = 0; i < N_EVTS; i++) { char buf[255]; snprintf(buf, sizeof(buf), event_names[i], socket); /* printf("Activating multiplex\n"); */ /* retval = PAPI_set_multiplex(eventSet); */ /* if(retval != PAPI_OK) { */ /* _STARPU_DISP("cannot set multiplex\n"); */ /* return retval; */ /* } */ retval = PAPI_add_named_event(eventSet, buf); if (retval != PAPI_OK) { if (!strcmp(event_names[i], "rapl::RAPL_ENERGY_DRAM:cpu=%d")) { /* Ok, too bad */ _STARPU_DISP("Note: DRAM energy measurement not available\n"); n_recorded = i; return PAPI_OK; } _STARPU_DISP("cannot add event '%s': %d\n", buf, retval); return retval; } } n_recorded = i; return(PAPI_OK); } #endif #endif starpu-1.4.9+dfsg/src/core/perfmodel/multiple_regression.c000066400000000000000000000265011507764646700237470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2018-2018 Umeà University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Code for computing multiple linear regression */ #include typedef long int integer; typedef double doublereal; #ifdef STARPU_MLR_MODEL #ifdef STARPU_BUILT_IN_MIN_DGELS int _starpu_dgels_(char *trans, integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); #else int dgels_(char *trans, integer *m, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info); #endif #endif //STARPU_MLR_MODEL static unsigned long count_file_lines(FILE *f) { unsigned long lines=0; while(!feof(f)) { int ch = fgetc(f); if(ch == '\n') { lines++; } } rewind(f); return lines; } static void dump_multiple_regression_list(double *mpar, double *my, int start, unsigned nparameters, struct starpu_perfmodel_history_list *list_history) { struct starpu_perfmodel_history_list *ptr = list_history; int i = start; unsigned j; while (ptr) { my[i] = ptr->entry->duration; for(j=0; jentry->parameters[j]; ptr = ptr->next; i++; } } static void load_old_calibration(double *mx, double *my, unsigned nparameters, char *filepath) { char buffer[1024]; char *line; int i=0; FILE *f = fopen(filepath, "a+"); STARPU_ASSERT_MSG(f, "Could not load performance model from file %s\n", filepath); line = fgets(buffer,sizeof(buffer),f);//skipping first line STARPU_ASSERT(line); while((line=fgets(buffer,sizeof(buffer),f))!=NULL) { char *record = strtok(line,","); STARPU_ASSERT_MSG(record, "Could not load performance model from file %s\n", filepath); my[i] = atof(record); record = strtok(NULL,","); int j=0; while(record != NULL) { mx[i*nparameters+j] = atof(record) ; ++j; record = strtok(NULL,","); } ++i ; } fclose(f); } static unsigned long find_long_list_size(struct starpu_perfmodel_history_list *list_history) { long cnt = 0; struct starpu_perfmodel_history_list *ptr = list_history; while (ptr) { cnt++; ptr = ptr->next; } return cnt; } #ifdef STARPU_MLR_MODEL int dgels_multiple_reg_coeff(double *mpar, double *my, unsigned long nn, unsigned ncoeff, unsigned nparameters, double *coeff, unsigned **combinations) { /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* = 'N': the linear system involves A; */ /* = 'T': the linear system involves A**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of */ /* columns of the matrices B and X. NRHS >=0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, */ /* if M >= N, A is overwritten by details of its QR */ /* factorization as returned by DGEQRF; */ /* if M < N, A is overwritten by details of its LQ */ /* factorization as returned by DGELQF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the matrix B of right hand side vectors, stored */ /* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ /* if TRANS = 'T'. */ /* On exit, if INFO = 0, B is overwritten by the solution */ /* vectors, stored columnwise: */ /* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ /* squares solution vectors; the residual sum of squares for the */ /* solution in each column is given by the sum of squares of */ /* elements N+1 to M in that column; */ /* if TRANS = 'N' and m < n, rows 1 to N of B contain the */ /* minimum norm solution vectors; */ /* if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ /* minimum norm solution vectors; */ /* if TRANS = 'T' and m < n, rows 1 to M of B contain the */ /* least squares solution vectors; the residual sum of squares */ /* for the solution in each column is given by the sum of */ /* squares of elements M+1 to N in that column. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= MAX(1,M,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* LWORK >= max(1, MN + max(MN, NRHS)). */ /* For optimal performance, */ /* LWORK >= max(1, MN + max(MN, NRHS) * NB). */ /* where MN = min(M,N) and NB is the optimum block size. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element of the */ /* triangular factor of A is zero, so that A does not have */ /* full rank; the least squares solution could not be */ /* computed. */ /* ===================================================================== */ if(nn <= ncoeff) { _STARPU_DISP("Warning: This function is not intended for the use when number of parameters is larger than the number of observations. Check how your matrices A and B were allocated or simply add more benchmarks.\n Multiple linear regression model will not be written into perfmodel file.\n"); return 1; } char trans = 'N'; integer m = nn; integer n = ncoeff; integer nrhs = 1; // number of columns of B and X (which are vectors therefore nrhs=1) doublereal *X; _STARPU_MALLOC(X, sizeof(double)*n*m); // (/!\ modified at the output) contain the model and the different values of pararmters doublereal *Y; _STARPU_MALLOC(Y, sizeof(double)*m); double coefficient; int i, j; unsigned k; for (i=0; i < m; i++) { Y[i] = my[i]; X[i] = 1.; for (j=1; j < n; j++) { coefficient = 1.; for(k=0; k < nparameters; k++) { coefficient *= pow(mpar[i*nparameters+k],combinations[j-1][k]); } X[i+j*m] = coefficient; } } integer lda = m; integer ldb = m; // integer info = 0; integer lwork = n*2; doublereal *work; // (output) _STARPU_MALLOC(work, sizeof(double)*lwork); /* // Running LAPACK dgels_ */ #ifdef STARPU_BUILT_IN_MIN_DGELS _starpu_dgels_(&trans, &m, &n, &nrhs, X, &lda, Y, &ldb, work, &lwork, &info); #else dgels_(&trans, &m, &n, &nrhs, X, &lda, Y, &ldb, work, &lwork, &info); #endif /* Check for the full rank */ if(info != 0) { _STARPU_DISP("Warning: Problems when executing dgels_ function. It seems like the diagonal element %ld is zero.\n Multiple linear regression model will not be written into perfmodel file.\n", info); free(X); free(Y); free(work); return 1; } /* Copy computed coefficients */ for(i=0; i<(int) ncoeff; i++) coeff[i] = Y[i]; free(X); free(Y); free(work); return 0; } #endif //STARPU_MLR_MODEL /* * Validating the accuracy of the coefficients. * For the the validation is extremely basic, but it should be improved. */ void starpu_validate_mlr(double *coeff, unsigned ncoeff, const char *codelet_name) { unsigned i; if (coeff[0] < 0) _STARPU_DISP("Warning: Constant computed by least square method is negative (%f). The model %s is likely to be inaccurate.\n", coeff[0], codelet_name); for(i=1; i 0) load_old_calibration(mpar, my, nparameters, filepath); /* Filling X and Y matrices with measured values */ dump_multiple_regression_list(mpar, my, old_lines, nparameters, ptr); if (ncoeff!=0 && combinations!=NULL) { #ifdef STARPU_MLR_MODEL /* Computing coefficients using multiple linear regression */ if(dgels_multiple_reg_coeff(mpar, my, n, ncoeff, nparameters, coeff, combinations)) { free(mpar); free(my); return 1; } /* Basic validation of the model accuracy */ starpu_validate_mlr(coeff, ncoeff, codelet_name); #else _STARPU_DISP("Warning: StarPU was compiled without '--enable-mlr' option, thus multiple linear regression model will not be computed.\n"); for(i=0; i 0) { f = fopen(filepath, "a+"); STARPU_ASSERT_MSG(f, "Could not save performance model into the file %s\n", filepath); } else { f = fopen(filepath, "w+"); STARPU_ASSERT_MSG(f, "Could not save performance model into the file %s\n", filepath); fprintf(f, "Duration"); for(j=0; j #include #include #include #include #pragma GCC visibility push(hidden) int _starpu_multiple_regression(struct starpu_perfmodel_history_list *ptr, double *coeff, unsigned ncoeff, unsigned nparameters, const char **parameters_names, unsigned **combinations, const char *codelet_name); #pragma GCC visibility pop #endif // __MULTIPLE_REGRESSION_H__ starpu-1.4.9+dfsg/src/core/perfmodel/perfmodel.c000066400000000000000000000700531507764646700216320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #include #include #include #ifdef STARPU_HAVE_WINDOWS #include #endif static int _starpu_expected_transfer_time_writeback; void _starpu_init_perfmodel(void) { _starpu_expected_transfer_time_writeback = starpu_getenv_number_default("STARPU_EXPECTED_TRANSFER_TIME_WRITEBACK", 0); } /* This flag indicates whether performance models should be calibrated or not. * 0: models need not be calibrated * 1: models must be calibrated * 2: models must be calibrated, existing models are overwritten. */ static unsigned calibrate_flag = 0; void _starpu_set_calibrate_flag(unsigned val) { calibrate_flag = val; } unsigned _starpu_get_calibrate_flag(void) { return calibrate_flag; } struct starpu_perfmodel_arch* starpu_worker_get_perf_archtype(int workerid, unsigned sched_ctx_id) { STARPU_ASSERT(workerid>=0); if(sched_ctx_id != STARPU_NMAX_SCHED_CTXS) { unsigned child_sched_ctx = starpu_sched_ctx_worker_is_master_for_child_ctx(workerid, sched_ctx_id); if(child_sched_ctx != STARPU_NMAX_SCHED_CTXS) return _starpu_sched_ctx_get_perf_archtype(child_sched_ctx); struct _starpu_sched_ctx *stream_ctx = _starpu_worker_get_ctx_stream(workerid); if(stream_ctx != NULL) return _starpu_sched_ctx_get_perf_archtype(stream_ctx->id); } struct _starpu_machine_config *config = _starpu_get_machine_config(); /* This workerid may either be a basic worker or a combined worker */ unsigned nworkers = config->topology.nworkers; if (workerid < (int)config->topology.nworkers) return &config->workers[workerid].perf_arch; /* We have a combined worker */ unsigned ncombinedworkers = config->topology.ncombinedworkers; STARPU_ASSERT(workerid < (int)(ncombinedworkers + nworkers)); return &config->combined_workers[workerid - nworkers].perf_arch; } /* * PER WORKER model */ static double per_worker_task_expected_perf(struct starpu_perfmodel *model, unsigned workerid, struct starpu_task *task, unsigned nimpl) { double (*worker_cost_function)(struct starpu_task *task, unsigned workerid, unsigned nimpl); worker_cost_function = model->worker_cost_function; STARPU_ASSERT_MSG(worker_cost_function, "STARPU_PER_WORKER needs worker_cost_function to be defined"); return worker_cost_function(task, workerid, nimpl); } /* * PER ARCH model */ static double per_arch_task_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, struct starpu_task *task, unsigned nimpl) { int comb; double (*per_arch_cost_function)(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl); if (model->arch_cost_function) return model->arch_cost_function(task, arch, nimpl); comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); STARPU_ASSERT_MSG(comb != -1, "Didn't find the proper arch combination\n"); STARPU_ASSERT_MSG(model->state->per_arch[comb] != NULL, "STARPU_PER_ARCH needs per-arch cost_function to be defined"); per_arch_cost_function = model->state->per_arch[comb][nimpl].cost_function; STARPU_ASSERT_MSG(per_arch_cost_function, "STARPU_PER_ARCH needs per-arch cost_function to be defined"); return per_arch_cost_function(task, arch, nimpl); } /* * Common model */ double starpu_worker_get_relative_speedup(struct starpu_perfmodel_arch* perf_arch) { double speedup = 0; int dev; for(dev = 0; dev < perf_arch->ndevices; dev++) { enum starpu_worker_archtype archtype = perf_arch->devices[dev].type; double coef = starpu_driver_info[archtype].alpha; speedup += coef * (perf_arch->devices[dev].ncores); } return speedup; } static double common_task_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct starpu_task *task, unsigned nimpl) { double exp; double alpha; STARPU_ASSERT_MSG(model->cost_function, "STARPU_COMMON requires common cost_function to be defined"); exp = model->cost_function(task, nimpl); alpha = starpu_worker_get_relative_speedup(arch); STARPU_ASSERT(!_STARPU_IS_ZERO(alpha)); return exp/alpha; } void _starpu_init_and_load_perfmodel(struct starpu_perfmodel *model) { if (!model || model->is_loaded) return; starpu_perfmodel_init(model); if (model->is_loaded) return; switch (model->type) { case STARPU_PER_WORKER: case STARPU_PER_ARCH: case STARPU_COMMON: /* Nothing more to do than init */ break; case STARPU_HISTORY_BASED: case STARPU_NL_REGRESSION_BASED: _starpu_load_history_based_model(model, 1); break; case STARPU_REGRESSION_BASED: case STARPU_MULTIPLE_REGRESSION_BASED: _starpu_load_history_based_model(model, 0); break; default: STARPU_ABORT(); } model->is_loaded = 1; } static double starpu_model_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl) { double exp_perf = 0.0; if (model) { _starpu_init_and_load_perfmodel(model); struct _starpu_job *j = _starpu_get_job_associated_to_task(task); switch (model->type) { case STARPU_PER_ARCH: exp_perf = per_arch_task_expected_perf(model, arch, task, nimpl); STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); break; case STARPU_COMMON: exp_perf = common_task_expected_perf(model, arch, task, nimpl); STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); break; case STARPU_HISTORY_BASED: exp_perf = _starpu_history_based_job_expected_perf(model, arch, j, nimpl); STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); break; case STARPU_REGRESSION_BASED: exp_perf = _starpu_regression_based_job_expected_perf(model, arch, j, nimpl); STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); break; case STARPU_NL_REGRESSION_BASED: exp_perf = _starpu_non_linear_regression_based_job_expected_perf(model, arch, j,nimpl); STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); break; case STARPU_MULTIPLE_REGRESSION_BASED: exp_perf = _starpu_multiple_regression_based_job_expected_perf(model, arch, j, nimpl); STARPU_ASSERT_MSG(isnan(exp_perf)||exp_perf>=0,"exp_perf=%lf\n",exp_perf); break; default: STARPU_ABORT(); } } /* no model was found */ return exp_perf; } static double starpu_model_worker_expected_perf(struct starpu_task *task, struct starpu_perfmodel *model, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl) { if (!model) return 0.0; if (model->type == STARPU_PER_WORKER) return per_worker_task_expected_perf(model, workerid, task, nimpl); else { struct starpu_perfmodel_arch *per_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id); return starpu_model_expected_perf(task, model, per_arch, nimpl); } } double starpu_task_expected_length(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { if (!task->cl) /* Tasks without codelet don't actually take time */ return 0.0; return starpu_model_expected_perf(task, task->cl->model, arch, nimpl); } double starpu_task_worker_expected_length(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl) { if (!task->cl) /* Tasks without codelet don't actually take time */ return 0.0; return starpu_model_worker_expected_perf(task, task->cl->model, workerid, sched_ctx_id, nimpl); } double starpu_task_expected_length_average(struct starpu_task *task, unsigned sched_ctx_id) { if (!task->cl) /* Tasks without codelet don't actually take time */ return 0.0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); double harmsum = 0.0; unsigned n = 0; struct starpu_sched_ctx_iterator it; workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned nimpl; unsigned impl_mask; unsigned workerid = workers->get_next(workers, &it); if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask)) continue; double best_expected = DBL_MAX; for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if (!(impl_mask & (1U << nimpl))) { /* no one on that queue may execute this task */ continue; } double expected = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, nimpl); if (expected < best_expected) best_expected = expected; } harmsum += 1. / best_expected; n++; } return n/harmsum; } double starpu_task_expected_energy(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { if (!task->cl) /* Tasks without codelet don't actually take time */ return 0.0; return starpu_model_expected_perf(task, task->cl->energy_model, arch, nimpl); } double starpu_task_worker_expected_energy(struct starpu_task *task, unsigned workerid, unsigned sched_ctx_id, unsigned nimpl) { if (!task->cl) /* Tasks without codelet don't actually take time */ return 0.0; return starpu_model_worker_expected_perf(task, task->cl->energy_model, workerid, sched_ctx_id, nimpl); } double starpu_task_expected_energy_average(struct starpu_task *task, unsigned sched_ctx_id) { if (!task->cl) /* Tasks without codelet don't actually take time */ return 0.0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); double harmsum = 0.0; unsigned n = 0; struct starpu_sched_ctx_iterator it; workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned nimpl; unsigned impl_mask; unsigned workerid = workers->get_next(workers, &it); if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask)) continue; double best_expected = DBL_MAX; for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if (!(impl_mask & (1U << nimpl))) { /* no one on that queue may execute this task */ continue; } double expected = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id, nimpl); if (expected < best_expected) best_expected = expected; } harmsum += 1. / best_expected; n++; } return n/harmsum; } double starpu_task_expected_conversion_time(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { unsigned i; double sum = 0.0; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); #ifdef STARPU_DEVEL #warning TODO: conversion time with combined arch perfmodel #endif STARPU_ASSERT_MSG(arch->ndevices == 1, "TODO"); for (i = 0; i < nbuffers; i++) { starpu_data_handle_t handle; struct starpu_task *conversion_task; enum starpu_node_kind node_kind; handle = STARPU_TASK_GET_HANDLE(task, i); if (!_starpu_data_is_multiformat_handle(handle)) continue; node_kind = starpu_worker_get_memory_node_kind(arch->devices[0].type); if (!_starpu_handle_needs_conversion_task_for_arch(handle, node_kind)) continue; conversion_task = _starpu_create_conversion_task_for_arch(handle, node_kind); sum += starpu_task_expected_length(conversion_task, arch, nimpl); _starpu_spin_lock(&handle->header_lock); handle->refcnt--; handle->busy_count--; if (!_starpu_data_check_not_busy(handle)) _starpu_spin_unlock(&handle->header_lock); starpu_task_clean(conversion_task); free(conversion_task); } return sum; } /* Predict the transfer time (in µs) to move a handle between memory nodes */ static double _starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, enum starpu_data_access_mode mode, size_t size) { double duration = 0.; #define MAX_REQUESTS 4 unsigned src_nodes[MAX_REQUESTS]; unsigned dst_nodes[MAX_REQUESTS]; unsigned handling_nodes[MAX_REQUESTS]; int nhops = _starpu_determine_request_path(handle, src_node, dst_node, mode, MAX_REQUESTS, src_nodes, dst_nodes, handling_nodes, 0); int i; for (i = 0; i < nhops; i++) duration += starpu_transfer_predict(src_nodes[i], dst_nodes[i], size); return duration; } /* Predict the transfer time (in µs) to move a handle to a memory node */ double starpu_data_expected_transfer_time(starpu_data_handle_t handle, unsigned memory_node, enum starpu_data_access_mode mode) { /* FIXME: Fix write-only mode with _starpu_expected_transfer_time_writeback */ /* FIXME: count time_writeback only if the data is not dirty. Once it is dirty, we shouldn't * count the writeback penalty again. */ /* If we don't need to read the content of the handle */ if (!(mode & STARPU_R)) return 0.0; if (starpu_data_is_on_node(handle, memory_node)) return 0.0; size_t size = _starpu_data_get_size(handle); /* XXX in case we have an abstract piece of data (eg. with the * void interface, this does not introduce any overhead, and we * don't even want to consider the latency that is not * relevant). */ if (size == 0) return 0.0; double duration = 0.; _starpu_spin_lock(&handle->header_lock); int src_node = _starpu_select_src_node(handle, memory_node); _starpu_spin_unlock(&handle->header_lock); if (src_node >= 0) { duration += _starpu_data_expected_transfer_time(handle, src_node, memory_node, mode, size); } /* Else, will just create it in place. Ideally we should take the * time to create it into account */ if (_starpu_expected_transfer_time_writeback && (mode & STARPU_W) && handle->home_node >= 0) { /* Will have to write back the produced data, artificially count * the time to bring it back to its home node */ duration += _starpu_data_expected_transfer_time(handle, memory_node, handle->home_node, STARPU_R, size); } return duration; } /* Data transfer performance modeling */ double starpu_task_expected_data_transfer_time(unsigned memory_node, struct starpu_task *task) { unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned buffer; double penalty = 0.0; for (buffer = 0; buffer < nbuffers; buffer++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer); enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer); int node = _starpu_task_data_get_node_on_node(task, buffer, memory_node); if (node >= 0) penalty += starpu_data_expected_transfer_time(handle, node, mode); } return penalty; } /* Data transfer performance modeling */ double starpu_task_expected_data_transfer_time_for(struct starpu_task *task, unsigned worker) { unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned buffer; double penalty = 0.0; for (buffer = 0; buffer < nbuffers; buffer++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer); enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer); int node = _starpu_task_data_get_node_on_worker(task, buffer, worker); if (node >= 0) penalty += starpu_data_expected_transfer_time(handle, node, mode); } return penalty; } /* Return the expected duration of the entire task bundle in µs */ double starpu_task_bundle_expected_length(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch* arch, unsigned nimpl) { double expected_length = 0.0; /* We expect the length of the bundle the be the sum of the different tasks length. */ STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); struct _starpu_task_bundle_entry *entry; entry = bundle->list; while (entry) { if(!entry->task->scheduled) { double task_length = starpu_task_expected_length(entry->task, arch, nimpl); /* In case the task is not calibrated, we consider the task * ends immediately. */ if (task_length > 0.0) expected_length += task_length; } entry = entry->next; } STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); return expected_length; } /* Return the expected energy consumption of the entire task bundle in J */ double starpu_task_bundle_expected_energy(starpu_task_bundle_t bundle, struct starpu_perfmodel_arch* arch, unsigned nimpl) { double expected_energy = 0.0; /* We expect total consumption of the bundle the be the sum of the different tasks consumption. */ STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); struct _starpu_task_bundle_entry *entry; entry = bundle->list; while (entry) { double task_energy = starpu_task_expected_energy(entry->task, arch, nimpl); /* In case the task is not calibrated, we consider the task * ends immediately. */ if (task_energy > 0.0) expected_energy += task_energy; entry = entry->next; } STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); return expected_energy; } /* Return the time (in µs) expected to transfer all data used within the bundle */ double starpu_task_bundle_expected_data_transfer_time(starpu_task_bundle_t bundle, unsigned memory_node) { STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); struct _starpu_handle_list *handles = NULL; /* We list all the handle that are accessed within the bundle. */ /* For each task in the bundle */ struct _starpu_task_bundle_entry *entry = bundle->list; while (entry) { struct starpu_task *task = entry->task; if (task->cl) { unsigned b; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (b = 0; b < nbuffers; b++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, b); enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, b); if (!(mode & STARPU_R)) continue; /* Insert the handle in the sorted list in case * it's not already in that list. */ _starpu_insertion_handle_sorted(&handles, handle, mode); } } entry = entry->next; } STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); /* Compute the sum of data transfer time, and destroy the list */ double total_exp = 0.0; while (handles) { struct _starpu_handle_list *current = handles; handles = handles->next; double exp; exp = starpu_data_expected_transfer_time(current->handle, memory_node, current->mode); total_exp += exp; free(current); } return total_exp; } #define _PERF_MODEL_DIR_MAXLEN 256 #define _PERF_MODEL_DIR_MAXNB 20 static char *_perf_model_paths[_PERF_MODEL_DIR_MAXNB]; static int _perf_model_paths_nb=0; static int _perf_model_bus_location = -1; static int _perf_model_bus_directory_existence_was_tested[_PERF_MODEL_DIR_MAXNB]; static char *_perf_model_dir_bus = NULL; static char *_perf_model_dirs_codelet[_PERF_MODEL_DIR_MAXNB]; static int _perf_model_codelet_directory_existence_was_tested[_PERF_MODEL_DIR_MAXNB]; static void _starpu_set_perf_model_dirs(); void _starpu_find_perf_model_codelet(const char *symbol, const char *hostname, char *path, size_t maxlen) { const char *dot = strrchr(symbol, '.'); int i=0; _starpu_set_perf_model_dirs(); for(i=0 ; _perf_model_paths[i]!=NULL ; i++) { snprintf(path, maxlen, "%scodelets/%d/%s%s%s", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION, symbol, dot?"":".", dot?"":hostname); //_STARPU_MSG("checking file %s\n", path); int res = access(path, F_OK); if (res == 0) { return; } } // The file was not found path[0] = '\0'; } void _starpu_find_perf_model_codelet_debug(const char *symbol, const char *hostname, const char *arch, char *path, size_t maxlen) { const char *dot = strrchr(symbol, '.'); int i=0; _starpu_set_perf_model_dirs(); for(i=0 ; _perf_model_paths[i]!=NULL ; i++) { snprintf(path, maxlen, "%scodelets/%d/%s%s%s", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION, symbol, dot?"":".", dot?"":hostname); //_STARPU_MSG("checking file %s\n", path); int res = access(path, F_OK); if (res == 0) { snprintf(path, maxlen, "%sdebug/%s%s%s%s", _perf_model_paths[i], symbol, dot?"":".", dot?"":hostname, arch); return; } } // The file was not found path[0] = '\0'; } void _starpu_set_default_perf_model_codelet(const char *symbol, const char *hostname, char *path, size_t maxlen) { _starpu_create_codelet_sampling_directory_if_needed(0); const char *dot = strrchr(symbol, '.'); snprintf(path, maxlen, "%scodelets/%d/%s%s%s", _perf_model_paths[0], _STARPU_PERFMODEL_VERSION, symbol, dot?"":".", dot?"":hostname); } char *_starpu_get_perf_model_dir_default() { _starpu_create_codelet_sampling_directory_if_needed(0); return _perf_model_paths[0]; } char *_starpu_get_perf_model_dir_bus() { int loc = _starpu_create_bus_sampling_directory_if_needed(-1); if (loc == -ENOENT) return NULL; if (_perf_model_dir_bus == NULL) { _STARPU_MALLOC(_perf_model_dir_bus, _PERF_MODEL_DIR_MAXLEN); snprintf(_perf_model_dir_bus, _PERF_MODEL_DIR_MAXLEN, "%sbus/", _perf_model_paths[_perf_model_bus_location]); } return _perf_model_dir_bus; } char **_starpu_get_perf_model_dirs_codelet() { if (_perf_model_dirs_codelet[0] == NULL) { int i; for(i=0 ; i<_perf_model_paths_nb ; i++) { _STARPU_MALLOC(_perf_model_dirs_codelet[i], _PERF_MODEL_DIR_MAXLEN); snprintf(_perf_model_dirs_codelet[i], _PERF_MODEL_DIR_MAXLEN, "%scodelets/%d/", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION); _starpu_create_codelet_sampling_directory_if_needed(i); } } return _perf_model_dirs_codelet; } static void _perf_model_add_dir(char *dir, int only_is_valid, char *var) { STARPU_ASSERT_MSG(_perf_model_paths_nb < _PERF_MODEL_DIR_MAXNB, "Maximum number of performance models directory"); if (dir == NULL || strlen(dir) == 0) { _STARPU_MSG("Warning: directory <%s> as set %s is empty\n", dir, var); return; } int add=1; if (only_is_valid) { DIR *ddir = opendir(dir); if (ddir == NULL) { add = 0; _STARPU_MSG("Warning: directory <%s> as set %s does not exist\n", dir, var); } else closedir(ddir); } if (add == 1) { _STARPU_DEBUG("Adding directory <%s> as set %s at location %d\n", dir, var, _perf_model_paths_nb); _STARPU_MALLOC(_perf_model_paths[_perf_model_paths_nb], _PERF_MODEL_DIR_MAXLEN); snprintf(_perf_model_paths[_perf_model_paths_nb], _PERF_MODEL_DIR_MAXLEN, "%s/", dir); _perf_model_bus_directory_existence_was_tested[_perf_model_paths_nb] = 0; _perf_model_codelet_directory_existence_was_tested[_perf_model_paths_nb] = 0; _perf_model_paths_nb ++; _perf_model_paths[_perf_model_paths_nb] = NULL; } } void _starpu_set_perf_model_dirs() { if (_perf_model_paths_nb != 0) return; char *env = starpu_getenv("STARPU_PERF_MODEL_DIR"); if (env) { _perf_model_add_dir(env, 0, "by variable STARPU_PERF_MODEL_DIR"); } #ifdef STARPU_PERF_MODEL_DIR _perf_model_add_dir((char *)STARPU_PERF_MODEL_DIR, 0, "by configure parameter"); #else char home[_PERF_MODEL_DIR_MAXLEN]; snprintf(home, _PERF_MODEL_DIR_MAXLEN, "%s/.starpu/sampling", _starpu_get_home_path()); _perf_model_add_dir(home, 0, "by STARPU_HOME directory"); #endif env = starpu_getenv("STARPU_PERF_MODEL_PATH"); if (env) { char *saveptr, *token; token = strtok_r(env, ":", &saveptr); for (; token != NULL; token = strtok_r(NULL, ",", &saveptr)) { _perf_model_add_dir(token, 1, "by variable STARPU_PERF_MODEL_PATH"); } } _perf_model_add_dir(STARPU_SAMPLING_DIR, 1, "by installation directory"); } int _starpu_set_default_perf_model_bus() { assert(_perf_model_bus_location < 0); _perf_model_bus_location = 0; return _perf_model_bus_location; } int _starpu_get_perf_model_bus() { if (_perf_model_bus_location != -1) return _perf_model_bus_location; char hostname[65]; int i=0; _starpu_set_perf_model_dirs(); _starpu_gethostname(hostname, sizeof(hostname)); while(_perf_model_paths[i]) { char path[PATH_LENGTH]; snprintf(path, PATH_LENGTH, "%sbus/%s.config", _perf_model_paths[i], hostname); _STARPU_DEBUG("checking path %s\n", path); int res = access(path, F_OK); if (res == 0) { _perf_model_bus_location = i; return _perf_model_bus_location; } i++; } return -ENOENT; } int _starpu_create_bus_sampling_directory_if_needed(int location) { if (location < 0) location = _starpu_get_perf_model_bus(); if (location == -ENOENT) return -ENOENT; STARPU_ASSERT_MSG(location < _perf_model_paths_nb, "Location %d for performance models file is invalid", location); if (!_perf_model_bus_directory_existence_was_tested[location]) { char *dir = _perf_model_paths[location]; _STARPU_DEBUG("creating directories at <%s>\n", dir); /* The performance of the codelets are stored in * $STARPU_PERF_MODEL_DIR/codelets/ while those of the bus are stored in * $STARPU_PERF_MODEL_DIR/bus/ so that we don't have name collisions */ _starpu_mkpath_and_check(dir, S_IRWXU); /* Performance of the memory subsystem */ char bus[_PERF_MODEL_DIR_MAXLEN]; snprintf(bus, _PERF_MODEL_DIR_MAXLEN, "%s/bus/", dir); _starpu_mkpath_and_check(bus, S_IRWXU); _perf_model_bus_directory_existence_was_tested[location] = 1; } return 0; } void _starpu_create_codelet_sampling_directory_if_needed(int location) { STARPU_ASSERT_MSG(location < _perf_model_paths_nb, "Location %d for performance models file is invalid", location); if (!_perf_model_codelet_directory_existence_was_tested[location]) { char *dir = _perf_model_paths[location]; if (dir) { _STARPU_DEBUG("creating directories at <%s>\n", dir); /* Per-task performance models */ char codelet[_PERF_MODEL_DIR_MAXLEN]; snprintf(codelet, _PERF_MODEL_DIR_MAXLEN, "%scodelets/%d/", dir, _STARPU_PERFMODEL_VERSION); _starpu_mkpath_and_check(codelet, S_IRWXU); /* Performance debug measurements */ char debug[_PERF_MODEL_DIR_MAXLEN]; snprintf(debug, _PERF_MODEL_DIR_MAXLEN, "%sdebug/", dir); _starpu_mkpath(debug, S_IRWXU); _perf_model_codelet_directory_existence_was_tested[location] = 1; } } } void starpu_perfmodel_free_sampling(void) { int i; for(i=0 ; i<_perf_model_paths_nb ; i++) { free(_perf_model_paths[i]); _perf_model_paths[i] = NULL; _perf_model_bus_directory_existence_was_tested[i] = 0; _perf_model_codelet_directory_existence_was_tested[i] = 0; free(_perf_model_dirs_codelet[i]); _perf_model_dirs_codelet[i] = NULL; } _perf_model_paths_nb = 0; _perf_model_bus_location = -1; free(_perf_model_dir_bus); _perf_model_dir_bus = NULL; _starpu_free_arch_combs(); } static double nop_cost_function(struct starpu_task *t STARPU_ATTRIBUTE_UNUSED, struct starpu_perfmodel_arch *a STARPU_ATTRIBUTE_UNUSED, unsigned i STARPU_ATTRIBUTE_UNUSED) { return 0.000001; } struct starpu_perfmodel starpu_perfmodel_nop = { .type = STARPU_PER_ARCH, .arch_cost_function = nop_cost_function, }; /* This function is intended to be used by external tools that should read * the performance model files */ int starpu_perfmodel_list(FILE *output) { #ifdef HAVE_SCANDIR struct dirent **list; int i=0; _starpu_set_perf_model_dirs(); for(i=0 ; _perf_model_paths[i]!=NULL ; i++) { char pcodelet[_PERF_MODEL_DIR_MAXLEN]; int n; snprintf(pcodelet, _PERF_MODEL_DIR_MAXLEN, "%scodelets/%d/", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION); n = scandir(pcodelet, &list, NULL, alphasort); if (n < 0) { _STARPU_DISP("Could not open the perfmodel directory <%s>: %s\n", pcodelet, strerror(errno)); } else { int j; fprintf(output, "codelet directory: <%s>\n", pcodelet); for (j = 0; j < n; j++) { if (strcmp(list[j]->d_name, ".") && strcmp(list[j]->d_name, "..")) fprintf(output, "file: <%s>\n", list[j]->d_name); free(list[j]); } free(list); } } return 0; #else (void)output; _STARPU_MSG("Listing perfmodels is not implemented on pure Windows yet\n"); return 1; #endif } void starpu_perfmodel_directory(FILE *output) { int i; _starpu_set_perf_model_dirs(); for(i=0 ; _perf_model_paths[i]!=NULL ; i++) { char pcodelet[_PERF_MODEL_DIR_MAXLEN]; snprintf(pcodelet, _PERF_MODEL_DIR_MAXLEN, "%scodelets/%d/", _perf_model_paths[i], _STARPU_PERFMODEL_VERSION); fprintf(output, "directory: <%s>\n", pcodelet); } } starpu-1.4.9+dfsg/src/core/perfmodel/perfmodel.h000066400000000000000000000125171507764646700216400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __PERFMODEL_H__ #define __PERFMODEL_H__ /** @file */ #include #include #include #include #pragma GCC visibility push(hidden) #ifdef __cplusplus extern "C" { #endif void _starpu_init_perfmodel(void); /** * Performance models files are stored in a directory whose name * include the version of the performance model format. The version * number is also written in the file itself. * When updating the format, the variable _STARPU_PERFMODEL_VERSION * should be updated. It is then possible to switch easily between * different versions of StarPU having different performance model * formats. */ #define _STARPU_PERFMODEL_VERSION 45 #define PATH_LENGTH 256 #define STR_SHORT_LENGTH 32 #define STR_LONG_LENGTH 256 #define STR_VERY_LONG_LENGTH 1024 struct _starpu_perfmodel_state { struct starpu_perfmodel_per_arch** per_arch; /*STARPU_MAXIMPLEMENTATIONS*/ int** per_arch_is_set; /*STARPU_MAXIMPLEMENTATIONS*/ starpu_pthread_rwlock_t model_rwlock; int *nimpls; int *nimpls_set; /** The number of combinations currently used by the model */ int ncombs; /** The number of combinations allocated in the array nimpls and ncombs */ int ncombs_set; int *combs; }; struct starpu_data_descr; struct _starpu_job; struct starpu_perfmodel_arch; extern unsigned _starpu_calibration_minimum; void _starpu_find_perf_model_codelet(const char *symbol, const char *hostname, char *path, size_t maxlen); void _starpu_find_perf_model_codelet_debug(const char *symbol, const char *hostname, const char *arch, char *path, size_t maxlen); void _starpu_set_default_perf_model_codelet(const char *symbol, const char *hostname, char *path, size_t maxlen); char *_starpu_get_perf_model_dir_default(); char **_starpu_get_perf_model_dirs_codelet() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; char *_starpu_get_perf_model_dir_bus(); double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); double _starpu_history_based_job_expected_deviation(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history); void _starpu_init_and_load_perfmodel(struct starpu_perfmodel *model); void _starpu_initialize_registered_performance_models(void); void _starpu_deinitialize_registered_performance_models(void); void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model); double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); double _starpu_multiple_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl); void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, unsigned cpuid, double measured, unsigned nimpl, unsigned number); int _starpu_perfmodel_create_comb_if_needed(struct starpu_perfmodel_arch* arch); int _starpu_create_bus_sampling_directory_if_needed(int location); void _starpu_create_codelet_sampling_directory_if_needed(int location); void _starpu_load_bus_performance_files(void); void _starpu_init_bus_performance(void); int _starpu_get_perf_model_bus(); int _starpu_set_default_perf_model_bus(); void _starpu_set_calibrate_flag(unsigned val); unsigned _starpu_get_calibrate_flag(void); #if defined(STARPU_USE_CUDA) unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid); #endif #if defined(STARPU_USE_OPENCL) unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid); #endif void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, double latency_write, double latency_read, unsigned node, const char *name); void _starpu_write_double(FILE *f, const char *format, double val) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_read_double(FILE *f, char *format, double *val) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; void _starpu_simgrid_get_platform_path(int version, char *path, size_t maxlen); void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb); void _starpu_free_arch_combs(void); #if defined(STARPU_HAVE_HWLOC) hwloc_topology_t _starpu_perfmodel_get_hwtopology(); #endif #ifdef __cplusplus } #endif #pragma GCC visibility pop #endif // __PERFMODEL_H__ starpu-1.4.9+dfsg/src/core/perfmodel/perfmodel_bus.c000066400000000000000000002653511507764646700225120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifdef STARPU_USE_CUDA #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif #include #endif #include #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #include #include #include #ifdef STARPU_USE_OPENCL #include #endif #ifdef STARPU_HAVE_WINDOWS #include #endif #ifdef STARPU_HAVE_HWLOC #include #ifdef STARPU_HAVE_NVML_H #undef nvmlDeviceGetPciInfo #define nvmlDeviceGetPciInfo _starpu_nvmlDeviceGetPciInfo #undef nvmlDeviceGetUUID #define nvmlDeviceGetUUID _starpu_nvmlDeviceGetUUID #include #endif #ifndef HWLOC_API_VERSION #define HWLOC_OBJ_PU HWLOC_OBJ_PROC #endif #if HWLOC_API_VERSION < 0x00010b00 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE #endif #endif #if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX #include #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE #include #endif #define SIZE (32*1024*1024*sizeof(char)) #define NITER 32 #ifndef STARPU_SIMGRID static void _starpu_bus_force_sampling(int location); #endif /* timing is in µs per byte (i.e. slowness, inverse of bandwidth) */ struct dev_timing { int numa_id; int numa_distance; double timing_htod; double latency_htod; double timing_dtoh; double latency_dtoh; }; static double raw_bandwidth_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; /* MB/s, indexed by device ids */ static double bandwidth_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; /* MB/s, indexed by memory nodes */ static double raw_latency_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; /* µs, indexed by devices ids */ static double latency_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; /* µs, indexed by memory nodes */ static unsigned was_benchmarked = 0; #ifndef STARPU_SIMGRID static unsigned ncpus = 0; #endif static unsigned nmem[STARPU_NRAM]; #define nnumas (nmem[STARPU_CPU_RAM]) #define ncuda (nmem[STARPU_CUDA_RAM]) #define nopencl (nmem[STARPU_OPENCL_RAM]) #define nmpims (nmem[STARPU_MPI_MS_RAM]) #define ntcpip_ms (nmem[STARPU_TCPIP_MS_RAM]) #ifndef STARPU_SIMGRID /* Benchmarking the performance of the bus */ static double numa_latency[STARPU_MAXNUMANODES][STARPU_MAXNUMANODES]; static double numa_timing[STARPU_MAXNUMANODES][STARPU_MAXNUMANODES]; static uint64_t cuda_size[STARPU_MAXCUDADEVS]; static char cuda_devname[STARPU_MAXCUDADEVS][256]; #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) static int gpu_numa[STARPU_NRAM][STARPU_NMAXDEVS]; /* hwloc NUMA logical ID */ #endif #endif /* preference order of NUMA nodes (logical indexes) */ static unsigned affinity_matrix[STARPU_NRAM][STARPU_NMAXDEVS][STARPU_MAXNUMANODES]; #ifndef STARPU_SIMGRID static double timing_dtod[STARPU_NRAM][STARPU_NMAXDEVS][STARPU_NMAXDEVS]; static double latency_dtod[STARPU_NRAM][STARPU_NMAXDEVS][STARPU_NMAXDEVS]; static struct dev_timing timing_per_numa[STARPU_NRAM][STARPU_NMAXDEVS][STARPU_MAXNUMANODES]; #ifdef STARPU_USE_CUDA static char cudadev_direct[STARPU_MAXNODES][STARPU_MAXNODES]; #endif static uint64_t opencl_size[STARPU_MAXOPENCLDEVS]; static char opencl_devname[STARPU_MAXOPENCLDEVS][64]; #endif #ifdef STARPU_HAVE_HWLOC static hwloc_topology_t hwtopology; #if HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES static struct hwloc_distances_s *numa_distances; #endif hwloc_topology_t _starpu_perfmodel_get_hwtopology() { return hwtopology; } static int find_cpu_from_numa_node(unsigned numa_id) { hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa_id); if (obj) { #if HWLOC_API_VERSION >= 0x00020000 /* From hwloc 2.0, NUMAnode objects do not contain CPUs, they * are contained in a group which contain the CPUs. */ obj = obj->parent; #endif } else { /* No such NUMA node, probably hwloc 1.x with no NUMA * node, just take one CPU from the whole system */ obj = hwloc_get_root_obj(hwtopology); } STARPU_ASSERT(obj); hwloc_obj_t current = obj; while (current->type != HWLOC_OBJ_PU) { current = current->first_child; /* If we don't find a "PU" obj before the leave, perhaps we are * just not allowed to use it. */ if (!current) return -1; } STARPU_ASSERT(current->type == HWLOC_OBJ_PU); return current->logical_index; } #endif #if (defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)) && !defined(STARPU_SIMGRID) static void set_numa_distance(int dev, unsigned numa, enum starpu_node_kind arch, struct dev_timing *dev_timing_per_cpu) { /* A priori we don't know the distance */ dev_timing_per_cpu->numa_distance = -1; #ifdef STARPU_HAVE_HWLOC if (nnumas <= 1) return; if (!starpu_driver_info[starpu_memory_node_get_worker_archtype(arch)].get_hwloc_obj) return; hwloc_obj_t obj = starpu_driver_info[starpu_memory_node_get_worker_archtype(arch)].get_hwloc_obj(hwtopology, dev); if (!obj) return; hwloc_obj_t numa_obj = _starpu_numa_get_obj(obj); if (!numa_obj) return; if (numa_obj->logical_index == numa) { _STARPU_DEBUG("GPU is on NUMA %d, distance zero\n", numa); dev_timing_per_cpu->numa_distance = 0; return; } #if HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES if (!numa_distances) return; hwloc_obj_t drive_numa_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa); hwloc_uint64_t gpu2drive, drive2gpu; if (!drive_numa_obj) return; _STARPU_DEBUG("GPU is on NUMA %d vs %d\n", numa_obj->logical_index, numa); if (hwloc_distances_obj_pair_values(numa_distances, numa_obj, drive_numa_obj, &gpu2drive, &drive2gpu) == 0) { _STARPU_DEBUG("got distance G2H %lu H2G %lu\n", (unsigned long) gpu2drive, (unsigned long) drive2gpu); dev_timing_per_cpu->numa_distance = (gpu2drive + drive2gpu) / 2; } #endif #endif } /* TODO: factorize by using starpu_malloc, and the driver's malloc/free_on_node, copy_data_from/to. * Will probably need to introduce a method for cudaDeviceReset, * for MPI_Barrier, and for determining which combinations should be measured. */ #ifdef STARPU_USE_CUDA static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, unsigned numa, int cpu, struct dev_timing *dev_timing_per_cpu) { _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); size_t size = SIZE; /* Initialize CUDA context on the device */ /* We do not need to enable OpenGL interoperability at this point, * since we cleanly shutdown CUDA before returning. */ cudaSetDevice(dev); /* hack to avoid third party libs to rebind threads */ _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); /* hack to force the initialization */ cudaFree(0); /* hack to avoid third party libs to rebind threads */ _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); /* Get the maximum size which can be allocated on the device */ struct cudaDeviceProp prop; cudaError_t cures; cures = cudaGetDeviceProperties(&prop, dev); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); cuda_size[dev] = prop.totalGlobalMem; strncpy(cuda_devname[dev], prop.name, sizeof(cuda_devname[dev])); cuda_devname[dev][sizeof(cuda_devname[dev])-1] = 0; if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4; /* Allocate a buffer on the device */ unsigned char *d_buffer; cures = cudaMalloc((void **)&d_buffer, size); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); /* hack to avoid third party libs to rebind threads */ _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); /* Allocate a buffer on the host */ unsigned char *h_buffer; #if defined(STARPU_HAVE_HWLOC) if (nnumas > 1) { /* different NUMA nodes available */ hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa); STARPU_ASSERT(obj); #if HWLOC_API_VERSION >= 0x00020000 h_buffer = hwloc_alloc_membind(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); #else h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); #endif } else #endif { /* we use STARPU_MAIN_RAM */ _STARPU_MALLOC(h_buffer, size); } cudaHostRegister((void *)h_buffer, size, 0); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); /* hack to avoid third party libs to rebind threads */ _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); /* Fill them */ memset(h_buffer, 0, size); cudaMemset(d_buffer, 0, size); cudaDeviceSynchronize(); /* hack to avoid third party libs to rebind threads */ _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); unsigned iter; double timing; double start; double end; /* Measure upload bandwidth */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { cudaMemcpy(d_buffer, h_buffer, size, cudaMemcpyHostToDevice); cudaDeviceSynchronize(); } end = starpu_timing_now(); timing = end - start; dev_timing_per_cpu->timing_htod = timing/NITER/size; /* Measure download bandwidth */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { cudaMemcpy(h_buffer, d_buffer, size, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); } end = starpu_timing_now(); timing = end - start; dev_timing_per_cpu->timing_dtoh = timing/NITER/size; /* Measure upload latency */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { cudaMemcpy(d_buffer, h_buffer, 1, cudaMemcpyHostToDevice); cudaDeviceSynchronize(); } end = starpu_timing_now(); timing = end - start; dev_timing_per_cpu->latency_htod = timing/NITER; /* Measure download latency */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { cudaMemcpy(h_buffer, d_buffer, 1, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); } end = starpu_timing_now(); timing = end - start; dev_timing_per_cpu->latency_dtoh = timing/NITER; /* Free buffers */ cudaHostUnregister(h_buffer); #if defined(STARPU_HAVE_HWLOC) if (nnumas > 1) { /* different NUMA nodes available */ hwloc_free(hwtopology, h_buffer, size); } else #endif { free(h_buffer); } cudaFree(d_buffer); #if CUDART_VERSION >= 4000 cudaDeviceReset(); #else cudaThreadExit(); #endif } #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst, double *timingr, double *latencyr) { size_t size = SIZE; int can; /* Get the maximum size which can be allocated on the device */ struct cudaDeviceProp prop; cudaError_t cures; cures = cudaGetDeviceProperties(&prop, src); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4; cures = cudaGetDeviceProperties(&prop, dst); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); if (size > prop.totalGlobalMem/4) size = prop.totalGlobalMem/4; /* Initialize CUDA context on the source */ /* We do not need to enable OpenGL interoperability at this point, * since we cleanly shutdown CUDA before returning. */ cudaSetDevice(src); if (starpu_getenv_number("STARPU_ENABLE_CUDA_GPU_GPU_DIRECT") != 0) { cures = cudaDeviceCanAccessPeer(&can, src, dst); (void) cudaGetLastError(); if (!cures && can) { cures = cudaDeviceEnablePeerAccess(dst, 0); (void) cudaGetLastError(); if (!cures) { _STARPU_DISP("GPU-Direct %d -> %d\n", dst, src); cudadev_direct[src][dst] = 1; } } } /* Allocate a buffer on the device */ unsigned char *s_buffer; cures = cudaMalloc((void **)&s_buffer, size); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); cudaMemset(s_buffer, 0, size); cudaDeviceSynchronize(); /* Initialize CUDA context on the destination */ /* We do not need to enable OpenGL interoperability at this point, * since we cleanly shutdown CUDA before returning. */ cudaSetDevice(dst); if (starpu_getenv_number("STARPU_ENABLE_CUDA_GPU_GPU_DIRECT") != 0) { cures = cudaDeviceCanAccessPeer(&can, dst, src); (void) cudaGetLastError(); if (!cures && can) { cures = cudaDeviceEnablePeerAccess(src, 0); (void) cudaGetLastError(); if (!cures) { _STARPU_DISP("GPU-Direct %d -> %d\n", src, dst); cudadev_direct[dst][src] = 1; } } } /* Allocate a buffer on the device */ unsigned char *d_buffer; cures = cudaMalloc((void **)&d_buffer, size); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); cudaMemset(d_buffer, 0, size); cudaDeviceSynchronize(); unsigned iter; double timing; double start; double end; /* Measure upload bandwidth */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { cudaMemcpyPeer(d_buffer, dst, s_buffer, src, size); cudaDeviceSynchronize(); } end = starpu_timing_now(); timing = end - start; *timingr = timing/NITER/size; /* Measure upload latency */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { cudaMemcpyPeer(d_buffer, dst, s_buffer, src, 1); cudaDeviceSynchronize(); } end = starpu_timing_now(); timing = end - start; *latencyr = timing/NITER; /* Free buffers */ cudaFree(d_buffer); cudaSetDevice(src); cudaFree(s_buffer); #if CUDART_VERSION >= 4000 cudaDeviceReset(); #else cudaThreadExit(); #endif } #endif #endif #ifdef STARPU_USE_OPENCL static void measure_bandwidth_between_host_and_dev_on_numa_with_opencl(int dev, unsigned numa, int cpu, struct dev_timing *dev_timing_per_cpu) { cl_context context; cl_command_queue queue; cl_int err=0; size_t size = SIZE; int not_initialized; _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); /* Is the context already initialised ? */ starpu_opencl_get_context(dev, &context); not_initialized = (context == NULL); if (not_initialized == 1) _starpu_opencl_init_context(dev); /* Get context and queue */ starpu_opencl_get_context(dev, &context); starpu_opencl_get_queue(dev, &queue); /* Get the maximum size which can be allocated on the device */ cl_device_id device; cl_ulong maxMemAllocSize, totalGlobalMem; starpu_opencl_get_device(dev, &device); err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(maxMemAllocSize), &maxMemAllocSize, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); if (size > (size_t)maxMemAllocSize/4) size = maxMemAllocSize/4; err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE , sizeof(totalGlobalMem), &totalGlobalMem, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); opencl_size[dev] = totalGlobalMem; err = clGetDeviceInfo(device, CL_DEVICE_NAME , sizeof(opencl_devname[dev]), &opencl_devname[dev], NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); if (_starpu_opencl_get_device_type(dev) == CL_DEVICE_TYPE_CPU) { /* Let's not use too much RAM when running OpenCL on a CPU: it * would make the OS swap like crazy. */ size /= 2; } /* hack to avoid third party libs to rebind threads */ _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); /* Allocate a buffer on the device */ cl_mem d_buffer; d_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &err); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); /* hack to avoid third party libs to rebind threads */ _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); /* Allocate a buffer on the host */ unsigned char *h_buffer; #if defined(STARPU_HAVE_HWLOC) if (nnumas > 1) { /* different NUMA nodes available */ hwloc_obj_t obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa); STARPU_ASSERT(obj); #if HWLOC_API_VERSION >= 0x00020000 h_buffer = hwloc_alloc_membind(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); #else h_buffer = hwloc_alloc_membind_nodeset(hwtopology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); #endif } else #endif { /* we use STARPU_MAIN_RAM */ _STARPU_MALLOC(h_buffer, size); } /* hack to avoid third party libs to rebind threads */ _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); /* Fill them */ memset(h_buffer, 0, size); err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); /* hack to avoid third party libs to rebind threads */ _starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL); unsigned iter; double timing; double start; double end; /* Measure upload bandwidth */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); } end = starpu_timing_now(); timing = end - start; dev_timing_per_cpu->timing_htod = timing/NITER/size; /* Measure download bandwidth */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { err = clEnqueueReadBuffer(queue, d_buffer, CL_TRUE, 0, size, h_buffer, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); } end = starpu_timing_now(); timing = end - start; dev_timing_per_cpu->timing_dtoh = timing/NITER/size; /* Measure upload latency */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { err = clEnqueueWriteBuffer(queue, d_buffer, CL_TRUE, 0, 1, h_buffer, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); } end = starpu_timing_now(); timing = end - start; dev_timing_per_cpu->latency_htod = timing/NITER; /* Measure download latency */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { err = clEnqueueReadBuffer(queue, d_buffer, CL_TRUE, 0, 1, h_buffer, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); } end = starpu_timing_now(); timing = end - start; dev_timing_per_cpu->latency_dtoh = timing/NITER; /* Free buffers */ err = clReleaseMemObject(d_buffer); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); #if defined(STARPU_HAVE_HWLOC) if (nnumas > 1) { /* different NUMA nodes available */ hwloc_free(hwtopology, h_buffer, size); } else #endif { free(h_buffer); } /* Uninitiliaze OpenCL context on the device */ if (not_initialized == 1) _starpu_opencl_deinit_context(dev); } #endif static void measure_bandwidth_between_host_and_dev(int dev, struct dev_timing dev_timing_per_numa[STARPU_NMAXDEVS][STARPU_MAXNUMANODES], enum starpu_node_kind type) { /* We measure the bandwidth between each GPU and each NUMA node */ unsigned numa_id; for (numa_id = 0; numa_id < nnumas; numa_id++) { /* Store STARPU_memnode for later */ dev_timing_per_numa[dev][numa_id].numa_id = numa_id; /* Chose one CPU connected to this NUMA node */ int cpu_id = 0; #ifdef STARPU_HAVE_HWLOC cpu_id = find_cpu_from_numa_node(numa_id); #endif if (cpu_id < 0) continue; _STARPU_DISP("with NUMA %d...\n", numa_id); /* Check hwloc location of GPU */ set_numa_distance(dev, numa_id, type, &dev_timing_per_numa[dev][numa_id]); #ifdef STARPU_USE_CUDA if (type == STARPU_CUDA_RAM) measure_bandwidth_between_host_and_dev_on_numa_with_cuda(dev, numa_id, cpu_id, &dev_timing_per_numa[dev][numa_id]); #endif #ifdef STARPU_USE_OPENCL if (type == STARPU_OPENCL_RAM) measure_bandwidth_between_host_and_dev_on_numa_with_opencl(dev, numa_id, cpu_id, &dev_timing_per_numa[dev][numa_id]); #endif } /* TODO: also measure the available aggregated bandwidth on a NUMA node, and through the interconnect */ #if defined(STARPU_HAVE_HWLOC) hwloc_obj_t obj = NULL; if (starpu_driver_info[starpu_memory_node_get_worker_archtype(type)].get_hwloc_obj) obj = starpu_driver_info[starpu_memory_node_get_worker_archtype(type)].get_hwloc_obj(hwtopology, dev); if (obj) obj = _starpu_numa_get_obj(obj); if (obj) gpu_numa[type][dev] = obj->logical_index; else #endif gpu_numa[type][dev] = -1; #ifdef STARPU_VERBOSE for (numa_id = 0; numa_id < nnumas; numa_id++) { double bandwidth_dtoh = dev_timing_per_numa[dev][numa_id].timing_dtoh; double bandwidth_htod = dev_timing_per_numa[dev][numa_id].timing_htod; double bandwidth_sum2 = bandwidth_dtoh*bandwidth_dtoh + bandwidth_htod*bandwidth_htod; _STARPU_DISP("(%10s) BANDWIDTH GPU %d NUMA %u - htod %.0fMB/s - dtoh %.0fMB/s - %.0fMB/s\n", starpu_memory_driver_info[type].name_upper, dev, numa_id, 1/bandwidth_htod, 1/bandwidth_dtoh, 1/sqrt(bandwidth_sum2)); } #endif } #endif /* defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) */ #if !defined(STARPU_SIMGRID) static void measure_bandwidth_latency_between_numa(int numa_src, int numa_dst, double *timing_nton, double *latency_nton) { #if defined(STARPU_HAVE_HWLOC) if (nnumas > 1) { /* different NUMA nodes available */ double start, end, timing; unsigned iter; /* Chose one CPU connected to this NUMA node */ int cpu_id = 0; cpu_id = find_cpu_from_numa_node(numa_src); if (cpu_id < 0) /* We didn't find a CPU attached to the numa_src NUMA nodes */ goto no_calibration; _starpu_bind_thread_on_cpu(cpu_id, STARPU_NOWORKERID, NULL); unsigned char *h_buffer; hwloc_obj_t obj_src = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa_src); STARPU_ASSERT(obj_src); #if HWLOC_API_VERSION >= 0x00020000 h_buffer = hwloc_alloc_membind(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); #else h_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_src->nodeset, HWLOC_MEMBIND_BIND, 0); #endif unsigned char *d_buffer; hwloc_obj_t obj_dst = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, numa_dst); STARPU_ASSERT(obj_dst); #if HWLOC_API_VERSION >= 0x00020000 d_buffer = hwloc_alloc_membind(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); #else d_buffer = hwloc_alloc_membind_nodeset(hwtopology, SIZE, obj_dst->nodeset, HWLOC_MEMBIND_BIND, 0); #endif memset(h_buffer, 0, SIZE); start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { memcpy(d_buffer, h_buffer, SIZE); } end = starpu_timing_now(); timing = end - start; *timing_nton = timing/NITER/SIZE; start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { memcpy(d_buffer, h_buffer, 1); } end = starpu_timing_now(); timing = end - start; *latency_nton = timing/NITER; hwloc_free(hwtopology, h_buffer, SIZE); hwloc_free(hwtopology, d_buffer, SIZE); } else no_calibration: #endif { /* Cannot make a real calibration */ numa_timing[numa_src][numa_dst] = 0.01; numa_latency[numa_src][numa_dst] = 0; } } #endif static void benchmark_all_memory_nodes(void) { #ifdef STARPU_SIMGRID _STARPU_DISP("Can not measure bus in simgrid mode, please run starpu_calibrate_bus in non-simgrid mode to make sure the bus performance model was calibrated\n"); STARPU_ABORT(); #else /* !SIMGRID */ unsigned i, j; _STARPU_DEBUG("Benchmarking the speed of the bus\n"); #ifdef STARPU_DEVEL #warning FIXME: when running several StarPU processes on the same node (MPI rank per numa), we need to use a lock to avoid concurrent benchmarking. #endif #ifdef STARPU_HAVE_HWLOC int ret; ret = hwloc_topology_init(&hwtopology); STARPU_ASSERT_MSG(ret == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); _starpu_topology_filter(hwtopology); ret = hwloc_topology_load(hwtopology); STARPU_ASSERT_MSG(ret == 0, "Could not load Hwloc topology (%s)\n", strerror(errno)); #if HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES unsigned n = 1; hwloc_distances_get_by_name(hwtopology, "NUMALatency", &n, &numa_distances, 0); if (!n) numa_distances = NULL; #endif #endif #ifdef STARPU_HAVE_HWLOC hwloc_bitmap_t former_cpuset = hwloc_bitmap_alloc(); hwloc_get_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD); #elif defined(__linux__) /* Save the current cpu binding */ cpu_set_t former_process_affinity; int ret; ret = sched_getaffinity(0, sizeof(former_process_affinity), &former_process_affinity); if (ret) { perror("sched_getaffinity"); STARPU_ABORT(); } #else #warning Missing binding support, StarPU will not be able to properly benchmark NUMA topology #endif for (i = 0; i < nnumas; i++) for (j = 0; j < nnumas; j++) if (i != j) { _STARPU_DISP("NUMA %d -> %d...\n", i, j); measure_bandwidth_latency_between_numa(i, j, &numa_timing[i][j], &numa_latency[i][j]); } #ifdef STARPU_USE_CUDA ncuda = _starpu_get_cuda_device_count(); for (i = 0; i < ncuda; i++) { _STARPU_DISP("CUDA %u...\n", i); /* measure bandwidth between Host and Device i */ measure_bandwidth_between_host_and_dev(i, timing_per_numa[STARPU_CUDA_RAM], STARPU_CUDA_RAM); } #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER for (i = 0; i < ncuda; i++) { for (j = 0; j < ncuda; j++) if (i != j) { _STARPU_DISP("CUDA %u -> %u...\n", i, j); /* measure bandwidth between Host and Device i */ measure_bandwidth_between_dev_and_dev_cuda(i, j, &timing_dtod[STARPU_CUDA_RAM][i][j], &latency_dtod[STARPU_CUDA_RAM][i][j]); } } #endif #endif #ifdef STARPU_USE_OPENCL nopencl = _starpu_opencl_get_device_count(); for (i = 0; i < nopencl; i++) { _STARPU_DISP("OpenCL %u...\n", i); /* measure bandwidth between Host and Device i */ measure_bandwidth_between_host_and_dev(i, timing_per_numa[STARPU_OPENCL_RAM], STARPU_OPENCL_RAM); } #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE double mpi_time_device_to_device[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS] = {{0.0}}; double mpi_latency_device_to_device[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS] = {{0.0}}; /* FIXME: rather make _starpu_mpi_common_measure_bandwidth_latency directly fill timing_per_numa */ _starpu_mpi_common_measure_bandwidth_latency(mpi_time_device_to_device, mpi_latency_device_to_device); for (i = 0; i < nmpims; i++) { for (j = 0; j < nnumas; j++) { timing_per_numa[STARPU_MPI_MS_RAM][i][j].numa_id = j; timing_per_numa[STARPU_MPI_MS_RAM][i][j].numa_distance = -1; timing_per_numa[STARPU_MPI_MS_RAM][i][j].timing_htod = mpi_time_device_to_device[0][i+1]; timing_per_numa[STARPU_MPI_MS_RAM][i][j].latency_htod = mpi_latency_device_to_device[0][i+1]; timing_per_numa[STARPU_MPI_MS_RAM][i][j].timing_dtoh = mpi_time_device_to_device[i+1][0]; timing_per_numa[STARPU_MPI_MS_RAM][i][j].latency_dtoh = mpi_latency_device_to_device[i+1][0]; } for (j = 0; j < nmpims; j++) { timing_dtod[STARPU_MPI_MS_RAM][i][j] = mpi_time_device_to_device[i+1][j+1]; } } #endif /* STARPU_USE_MPI_MASTER_SLAVE */ #ifdef STARPU_USE_TCPIP_MASTER_SLAVE double tcpip_time_device_to_device[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS] = {{0.0}}; double tcpip_latency_device_to_device[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS] = {{0.0}}; /* FIXME: rather make _starpu_mpi_common_measure_bandwidth_latency directly fill timing_per_numa */ _starpu_tcpip_common_measure_bandwidth_latency(tcpip_time_device_to_device, tcpip_latency_device_to_device); for (i = 0; i < ntcpip_ms; i++) { for (j = 0; j < nnumas; j++) { timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].numa_id = j; timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].numa_distance = -1; timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].timing_htod = tcpip_time_device_to_device[0][i+1]; timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].latency_htod = tcpip_latency_device_to_device[0][i+1]; timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].timing_dtoh = tcpip_time_device_to_device[i+1][0]; timing_per_numa[STARPU_TCPIP_MS_RAM][i][j].latency_dtoh = tcpip_latency_device_to_device[i+1][0]; } for (j = 0; j < ntcpip_ms; j++) { timing_dtod[STARPU_TCPIP_MS_RAM][i][j] = tcpip_time_device_to_device[i+1][j+1]; } } #endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ #ifdef STARPU_HAVE_HWLOC hwloc_set_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD); hwloc_bitmap_free(former_cpuset); #elif defined(__linux__) /* Restore the former affinity */ ret = sched_setaffinity(0, sizeof(former_process_affinity), &former_process_affinity); if (ret) { perror("sched_setaffinity"); STARPU_ABORT(); } #endif #ifdef STARPU_HAVE_HWLOC #if HAVE_DECL_HWLOC_DISTANCES_OBJ_PAIR_VALUES if (numa_distances) hwloc_distances_release(hwtopology, numa_distances); numa_distances = NULL; #endif hwloc_topology_destroy(hwtopology); #endif _STARPU_DEBUG("Benchmarking the speed of the bus is done.\n"); was_benchmarked = 1; #endif /* !SIMGRID */ } static void get_bus_path(const char *type, char *path, size_t maxlen) { char hostname[65]; char *bus; bus = _starpu_get_perf_model_dir_bus(); _starpu_gethostname(hostname, sizeof(hostname)); snprintf(path, maxlen, "%s%s.%s", bus?_starpu_get_perf_model_dir_bus():"INVALID_LOCATION/", hostname, type); } /* * Affinity */ static void get_affinity_path(char *path, size_t maxlen) { get_bus_path("affinity", path, maxlen); } #ifndef STARPU_SIMGRID static void load_bus_affinity_file_content(void) { FILE *f; int locked; char path[PATH_LENGTH]; get_affinity_path(path, sizeof(path)); _STARPU_DEBUG("loading affinities from %s\n", path); f = fopen(path, "r"); STARPU_ASSERT_MSG(f, "Error when reading from file '%s'", path); locked = _starpu_frdlock(f) == 0; unsigned gpu; enum starpu_node_kind type; unsigned ok = 1; for (type = STARPU_CUDA_RAM; ok && type < STARPU_NRAM; type++) { for (gpu = 0; ok && gpu < nmem[type]; gpu++) { int ret; unsigned dummy; _starpu_drop_comments(f); ret = fscanf(f, "%u\t", &dummy); if (ret != 1) { /* Old perfmodel file, ignore rest */ ok = 0; break; } STARPU_ASSERT(dummy == gpu); unsigned numa; for (numa = 0; numa < nnumas; numa++) { ret = fscanf(f, "%u\t", &affinity_matrix[type][gpu][numa]); STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path); } ret = fscanf(f, "\n"); STARPU_ASSERT_MSG(ret == 0, "Error when reading from file '%s'", path); } } if (locked) _starpu_frdunlock(f); fclose(f); } /* NB: we want to sort the bandwidth by DECREASING order */ static int compar_dev_timing(const void *left_dev_timing, const void *right_dev_timing) { const struct dev_timing *left = (const struct dev_timing *)left_dev_timing; const struct dev_timing *right = (const struct dev_timing *)right_dev_timing; if (left->numa_distance == 0 && right->numa_distance != 0) /* We prefer left */ return -1; if (right->numa_distance == 0 && left->numa_distance != 0) /* We prefer right */ return 1; if (left->numa_distance >= 0 && right->numa_distance >= 0) { return left->numa_distance > right->numa_distance ? 1 : left->numa_distance < right->numa_distance ? -1 : 0; } double left_dtoh = left->timing_dtoh; double left_htod = left->timing_htod; double right_dtoh = right->timing_dtoh; double right_htod = right->timing_htod; double timing_sum2_left = left_dtoh*left_dtoh + left_htod*left_htod; double timing_sum2_right = right_dtoh*right_dtoh + right_htod*right_htod; /* it's for a decreasing sorting */ return timing_sum2_left > timing_sum2_right ? 1 : timing_sum2_left < timing_sum2_right ? -1 : 0; } static void write_bus_affinity_file_content(void) { STARPU_ASSERT(was_benchmarked); FILE *f; char path[PATH_LENGTH]; int locked; get_affinity_path(path, sizeof(path)); _STARPU_DEBUG("writing affinities to %s\n", path); f = fopen(path, "a+"); if (!f) { perror("fopen write_buf_affinity_file_content"); _STARPU_DISP("path '%s'\n", path); fflush(stderr); STARPU_ABORT(); } locked = _starpu_fwrlock(f) == 0; fseek(f, 0, SEEK_SET); _starpu_fftruncate(f, 0); unsigned numa; unsigned gpu; enum starpu_node_kind type; fprintf(f, "# GPU\t"); for (numa = 0; numa < nnumas; numa++) fprintf(f, "NUMA%u\t", numa); fprintf(f, "\n"); for (type = STARPU_CUDA_RAM; type < STARPU_NRAM; type++) { /* Use an other array to sort bandwidth */ struct dev_timing timing_per_numa_sorted[STARPU_NMAXDEVS][STARPU_MAXNUMANODES]; memcpy(timing_per_numa_sorted, timing_per_numa[type], sizeof(timing_per_numa[type])); for (gpu = 0; gpu < nmem[type]; gpu++) { fprintf(f, "%u\t", gpu); qsort(timing_per_numa_sorted[gpu], nnumas, sizeof(struct dev_timing), compar_dev_timing); for (numa = 0; numa < nnumas; numa++) { fprintf(f, "%d\t", timing_per_numa_sorted[gpu][numa].numa_id); } fprintf(f, "\n"); } } if (locked) _starpu_fwrunlock(f); fclose(f); } static void generate_bus_affinity_file(void) { if (!was_benchmarked) benchmark_all_memory_nodes(); write_bus_affinity_file_content(); } static int check_bus_affinity_file(void) { int ret = 1; FILE *f; int locked; unsigned dummy; char path[PATH_LENGTH]; get_affinity_path(path, sizeof(path)); _STARPU_DEBUG("loading affinities from %s\n", path); f = fopen(path, "r"); STARPU_ASSERT_MSG(f, "Error when reading from file '%s'", path); locked = _starpu_frdlock(f) == 0; ret = fscanf(f, "# GPU\t"); STARPU_ASSERT_MSG(ret == 0, "Error when reading from file '%s'", path); ret = fscanf(f, "NUMA%u\t", &dummy); if (locked) _starpu_frdunlock(f); fclose(f); return ret == 1; } static void load_bus_affinity_file(void) { int exist, check = 1; char path[PATH_LENGTH]; get_affinity_path(path, sizeof(path)); /* access return 0 if file exists */ exist = access(path, F_OK); if (exist == 0) /* return 0 if it's not good */ check = check_bus_affinity_file(); if (check == 0) _STARPU_DISP("Affinity File is too old for this version of StarPU ! Rebuilding it...\n"); if (check == 0 || exist != 0) { /* File does not exist yet */ generate_bus_affinity_file(); } load_bus_affinity_file_content(); } unsigned *_starpu_get_cuda_affinity_vector(unsigned gpuid) { return affinity_matrix[STARPU_CUDA_RAM][gpuid]; } unsigned *_starpu_get_opencl_affinity_vector(unsigned gpuid) { return affinity_matrix[STARPU_OPENCL_RAM][gpuid]; } void starpu_bus_print_affinity(FILE *f) { enum starpu_node_kind type; fprintf(f, "# GPU\tNUMA in preference order (logical index)\n"); for (type = STARPU_CUDA_RAM; type < STARPU_NRAM; type++) { unsigned gpu; if (!nmem[type]) continue; fprintf(f, "# %s\n", starpu_memory_driver_info[type].name_upper); for(gpu = 0 ; gpu. Expected a number. Did you change the maximum number of GPUs at ./configure time?\n", path); fclose(f); return 0; } n = getc(f); if (n == '\n') break; if (n != '\t') { _STARPU_DISP("bogus character '%c' (%d) in latency file %s\n", n, n, path); fclose(f); return 0; } raw_latency_matrix[src][dst] = latency; /* Look out for \t\n */ n = getc(f); if (n == '\n') break; ungetc(n, f); n = '\t'; } /* No more values, take NAN */ for (; dst < STARPU_MAXNODES; dst++) raw_latency_matrix[src][dst] = NAN; while (n == '\t') { /* Look out for \t\n */ n = getc(f); if (n == '\n') break; ungetc(n, f); n = _starpu_read_double(f, "%le", &latency); if (n && !isnan(latency)) { _STARPU_DISP("Too many nodes in latency file %s for this configuration (%d). Did you change the maximum number of GPUs at ./configure time?\n", path, STARPU_MAXNODES); fclose(f); return 0; } n = getc(f); } if (n != '\n') { _STARPU_DISP("Bogus character '%c' (%d) in latency file %s\n", n, n, path); fclose(f); return 0; } /* Look out for EOF */ n = getc(f); if (n == EOF) break; ungetc(n, f); } if (locked) _starpu_frdunlock(f); fclose(f); /* No more values, take NAN */ for (; src < STARPU_MAXNODES; src++) for (dst = 0; dst < STARPU_MAXNODES; dst++) raw_latency_matrix[src][dst] = NAN; return 1; } #if !defined(STARPU_SIMGRID) static double search_bus_best_latency(int src, enum starpu_node_kind type, int htod) { /* Search the best latency for this node */ double best = 0.0; double actual = 0.0; unsigned check = 0; unsigned numa; for (numa = 0; numa < nnumas; numa++) { if (htod) actual = timing_per_numa[type][src][numa].latency_htod; else actual = timing_per_numa[type][src][numa].latency_dtoh; if (!check || actual < best) { best = actual; check = 1; } } return best; } static void write_bus_latency_file_content(void) { enum starpu_node_kind type; unsigned src, dst, maxnode; /* Boundaries to check if src or dst are inside the interval */ unsigned b_low, b_up; FILE *f; int locked; STARPU_ASSERT(was_benchmarked); char path[PATH_LENGTH]; get_latency_path(path, sizeof(path)); _STARPU_DEBUG("writing latencies to %s\n", path); f = fopen(path, "a+"); if (!f) { perror("fopen write_bus_latency_file_content"); _STARPU_DISP("path '%s'\n", path); fflush(stderr); STARPU_ABORT(); } locked = _starpu_fwrlock(f) == 0; fseek(f, 0, SEEK_SET); _starpu_fftruncate(f, 0); fprintf(f, "# "); for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) { for (dst = 0; dst < nmem[type]; dst++) { fprintf(f, "to %s %d\t", _starpu_node_get_prefix(type), dst); } } fprintf(f, "\n"); maxnode = 0; for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) maxnode += nmem[type]; for (src = 0; src < STARPU_MAXNODES; src++) { for (dst = 0; dst < STARPU_MAXNODES; dst++) { /* µs */ double latency = 0.0; if ((src >= maxnode) || (dst >= maxnode)) { /* convention */ latency = NAN; } else if (src == dst) { latency = 0.0; } else { b_low = b_up = 0; /* ---- Begin NUMA ---- */ b_up += nnumas; if (src >= b_low && src < b_up && dst >= b_low && dst < b_up) latency += numa_latency[src-b_low][dst-b_low]; /* copy interval to check numa index later */ unsigned numa_low = b_low; unsigned numa_up = b_up; b_low += nnumas; /* ---- End NUMA ---- */ for (type = STARPU_CUDA_RAM; type < STARPU_NRAM; type++) { b_up += nmem[type]; /* Check if it's direct GPU-GPU transfer */ if (src >= b_low && src < b_up && dst >= b_low && dst < b_up && timing_dtod[type][src-b_low][dst-b_low]) latency += latency_dtod[type][src-b_low][dst-b_low]; else { /* Check if it's GPU <-> NUMA link */ if (src >=b_low && src < b_up && dst >= numa_low && dst < numa_up) latency += timing_per_numa[type][(src-b_low)][dst-numa_low].latency_dtoh; if (dst >= b_low && dst < b_up && src >= numa_low && dst < numa_up) latency += timing_per_numa[type][(dst-b_low)][src-numa_low].latency_htod; /* To other devices, take the best latency */ if (src >= b_low && src < b_up && !(dst >= numa_low && dst < numa_up)) latency += search_bus_best_latency(src-b_low, type, 0); if (dst >= b_low && dst < b_up && !(src >= numa_low && dst < numa_up)) latency += search_bus_best_latency(dst-b_low, type, 1); } b_low += nmem[type]; } } if (dst > 0) fputc('\t', f); _starpu_write_double(f, "%e", latency); } fprintf(f, "\n"); } if (locked) _starpu_fwrunlock(f); fclose(f); } #endif static void generate_bus_latency_file(void) { if (!was_benchmarked) benchmark_all_memory_nodes(); #ifndef STARPU_SIMGRID write_bus_latency_file_content(); #endif } static void load_bus_latency_file(void) { int res; char path[PATH_LENGTH]; get_latency_path(path, sizeof(path)); res = access(path, F_OK); if (res || !load_bus_latency_file_content()) { /* File does not exist yet or is bogus */ generate_bus_latency_file(); res = load_bus_latency_file_content(); STARPU_ASSERT(res); } } /* * Bandwidth */ static void get_bandwidth_path(char *path, size_t maxlen) { get_bus_path("bandwidth", path, maxlen); } static int load_bus_bandwidth_file_content(void) { int n; unsigned src, dst; FILE *f; double bandwidth; int locked; char path[PATH_LENGTH]; get_bandwidth_path(path, sizeof(path)); _STARPU_DEBUG("loading bandwidth from %s\n", path); f = fopen(path, "r"); if (!f) { perror("fopen load_bus_bandwidth_file_content"); _STARPU_DISP("path '%s'\n", path); fflush(stderr); STARPU_ABORT(); } locked = _starpu_frdlock(f) == 0; for (src = 0; src < STARPU_MAXNODES; src++) { _starpu_drop_comments(f); for (dst = 0; dst < STARPU_MAXNODES; dst++) { n = _starpu_read_double(f, "%le", &bandwidth); if (n != 1) { _STARPU_DISP("Error while reading bandwidth file <%s>. Expected a number\n", path); fclose(f); return 0; } n = getc(f); if (n == '\n') break; if (n != '\t') { _STARPU_DISP("bogus character '%c' (%d) in bandwidth file %s\n", n, n, path); fclose(f); return 0; } int limit_bandwidth = starpu_getenv_number("STARPU_LIMIT_BANDWIDTH"); if (limit_bandwidth >= 0) { #ifndef STARPU_SIMGRID _STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but simgrid not enabled, thus ignored\n", limit_bandwidth); #else #ifdef HAVE_SG_LINK_BANDWIDTH_SET bandwidth = limit_bandwidth; #else _STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but this requires simgrid 3.26\n", limit_bandwidth); #endif #endif } raw_bandwidth_matrix[src][dst] = bandwidth; /* Look out for \t\n */ n = getc(f); if (n == '\n') break; ungetc(n, f); n = '\t'; } /* No more values, take NAN */ for (; dst < STARPU_MAXNODES; dst++) raw_bandwidth_matrix[src][dst] = NAN; while (n == '\t') { /* Look out for \t\n */ n = getc(f); if (n == '\n') break; ungetc(n, f); n = _starpu_read_double(f, "%le", &bandwidth); if (n && !isnan(bandwidth)) { _STARPU_DISP("Too many nodes in bandwidth file %s for this configuration (%d)\n", path, STARPU_MAXNODES); fclose(f); return 0; } n = getc(f); } if (n != '\n') { _STARPU_DISP("Bogus character '%c' (%d) in bandwidth file %s\n", n, n, path); fclose(f); return 0; } /* Look out for EOF */ n = getc(f); if (n == EOF) break; ungetc(n, f); } if (locked) _starpu_frdunlock(f); fclose(f); /* No more values, take NAN */ for (; src < STARPU_MAXNODES; src++) for (dst = 0; dst < STARPU_MAXNODES; dst++) raw_bandwidth_matrix[src][dst] = NAN; return 1; } #if !defined(STARPU_SIMGRID) static double search_bus_best_timing(int src, enum starpu_node_kind type, int htod) { /* Search the best latency for this node */ double best = 0.0; double actual = 0.0; unsigned check = 0; unsigned numa; for (numa = 0; numa < nnumas; numa++) { if (htod) actual = timing_per_numa[type][src][numa].timing_htod; else actual = timing_per_numa[type][src][numa].timing_dtoh; if (!check || actual < best) { best = actual; check = 1; } } return best; } static void write_bus_bandwidth_file_content(void) { enum starpu_node_kind type; unsigned src, dst, maxnode; unsigned b_low, b_up; FILE *f; int locked; STARPU_ASSERT(was_benchmarked); char path[PATH_LENGTH]; get_bandwidth_path(path, sizeof(path)); _STARPU_DEBUG("writing bandwidth to %s\n", path); f = fopen(path, "a+"); STARPU_ASSERT_MSG(f, "Error when opening file (writing) '%s'", path); locked = _starpu_fwrlock(f) == 0; fseek(f, 0, SEEK_SET); _starpu_fftruncate(f, 0); fprintf(f, "# "); for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) { for (dst = 0; dst < nmem[type]; dst++) { fprintf(f, "to %s %d\t", _starpu_node_get_prefix(type), dst); } } fprintf(f, "\n"); maxnode = 0; for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) maxnode += nmem[type]; for (src = 0; src < STARPU_MAXNODES; src++) { for (dst = 0; dst < STARPU_MAXNODES; dst++) { double bandwidth; if ((src >= maxnode) || (dst >= maxnode)) { bandwidth = NAN; } else if (src != dst) { double slowness = 0.0; /* Total bandwidth is the harmonic mean of bandwidths */ b_low = b_up = 0; /* Begin NUMA */ b_up += nnumas; if (src >= b_low && src < b_up && dst >= b_low && dst < b_up) slowness += numa_timing[src-b_low][dst-b_low]; /* copy interval to check numa index later */ unsigned numa_low = b_low; unsigned numa_up = b_up; b_low += nnumas; /* End NUMA */ for (type = STARPU_CUDA_RAM; type < STARPU_NRAM; type++) { b_up += nmem[type]; /* Check if it's direct GPU-GPU transfer */ if (src >= b_low && src < b_up && dst >= b_low && dst < b_up && timing_dtod[type][src-b_low][dst-b_low]) slowness += timing_dtod[type][src-b_low][dst-b_low]; else { /* Check if it's GPU <-> NUMA link */ if (src >= b_low && src < b_up && dst >= numa_low && dst < numa_up) slowness += timing_per_numa[type][(src-b_low)][dst-numa_low].timing_dtoh; if (dst >= b_low && dst < b_up && src >= numa_low && src < numa_up) slowness += timing_per_numa[type][(dst-b_low)][src-numa_low].timing_htod; /* To other devices, take the best slowness */ if (src >= b_low && src < b_up && !(dst >= numa_low && dst < numa_up)) slowness += search_bus_best_timing(src-b_low, type, 0); if (dst >= b_low && dst < b_up && !(src >= numa_low && src < numa_up)) slowness += search_bus_best_timing(dst-b_low, type, 1); } b_low += nmem[type]; } bandwidth = 1.0/slowness; } else { /* convention */ bandwidth = 0.0; } if (dst) fputc('\t', f); _starpu_write_double(f, "%e", bandwidth); } fprintf(f, "\n"); } if (locked) _starpu_fwrunlock(f); fclose(f); } #endif /* STARPU_SIMGRID */ void starpu_bus_print_filenames(FILE *output) { char bandwidth_path[PATH_LENGTH]; char affinity_path[PATH_LENGTH]; char latency_path[PATH_LENGTH]; get_bandwidth_path(bandwidth_path, sizeof(bandwidth_path)); get_affinity_path(affinity_path, sizeof(affinity_path)); get_latency_path(latency_path, sizeof(latency_path)); fprintf(output, "bandwidth: <%s>\n", bandwidth_path); fprintf(output, " affinity: <%s>\n", affinity_path); fprintf(output, " latency: <%s>\n", latency_path); } void starpu_bus_print_bandwidth(FILE *f) { unsigned src, dst, maxnode = starpu_memory_nodes_get_count(); fprintf(f, "from/to\t"); for (dst = 0; dst < maxnode; dst++) { char name[128]; starpu_memory_node_get_name(dst, name, sizeof(name)); fprintf(f, "%s\t", name); } fprintf(f, "\n"); for (src = 0; src < maxnode; src++) { char name[128]; starpu_memory_node_get_name(src, name, sizeof(name)); fprintf(f, "%s\t", name); for (dst = 0; dst < maxnode; dst++) fprintf(f, "%.0f\t", bandwidth_matrix[src][dst]); fprintf(f, "\n"); } fprintf(f, "\n"); for (src = 0; src < maxnode; src++) { char name[128]; starpu_memory_node_get_name(src, name, sizeof(name)); fprintf(f, "%s\t", name); for (dst = 0; dst < maxnode; dst++) fprintf(f, "%.0f\t", latency_matrix[src][dst]); fprintf(f, "\n"); } #ifndef STARPU_SIMGRID #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) if (ncuda != 0 || nopencl != 0) fprintf(f, "\nGPU\tNUMA in preference order (logical index), host-to-device, device-to-host\n"); for (src = 0; src < ncuda + nopencl; src++) { struct dev_timing *timing; struct _starpu_machine_config * config = _starpu_get_machine_config(); unsigned nhwnumas = _starpu_topology_get_nhwnumanodes(config); unsigned numa; #ifdef STARPU_USE_CUDA if (src < ncuda) { fprintf(f, "CUDA_%u\t", src); for (numa = 0; numa < nhwnumas; numa++) { timing = &timing_per_numa[STARPU_CUDA_RAM][src][numa]; if (timing->timing_htod) fprintf(f, "%2d %.0f %.0f\t", timing->numa_id, 1/timing->timing_htod, 1/timing->timing_dtoh); else fprintf(f, "%2u\t", affinity_matrix[STARPU_CUDA_RAM][src][numa]); } } #ifdef STARPU_USE_OPENCL else #endif #endif #ifdef STARPU_USE_OPENCL { fprintf(f, "OpenCL%u\t", src-ncuda); for (numa = 0; numa < nhwnumas; numa++) { timing = &timing_per_numa[STARPU_OPENCL_RAM][(src-ncuda)][numa]; if (timing->timing_htod) fprintf(f, "%2d %.0f %.0f\t", timing->numa_id, 1/timing->timing_htod, 1/timing->timing_dtoh); else fprintf(f, "%2u\t", affinity_matrix[STARPU_OPENCL_RAM][src-ncuda][numa]); } } #endif fprintf(f, "\n"); } #endif #endif } static void generate_bus_bandwidth_file(void) { if (!was_benchmarked) benchmark_all_memory_nodes(); #ifndef STARPU_SIMGRID write_bus_bandwidth_file_content(); #endif } static void load_bus_bandwidth_file(void) { int res; char path[PATH_LENGTH]; get_bandwidth_path(path, sizeof(path)); res = access(path, F_OK); if (res || !load_bus_bandwidth_file_content()) { /* File does not exist yet or is bogus */ generate_bus_bandwidth_file(); res = load_bus_bandwidth_file_content(); STARPU_ASSERT(res); } } #ifndef STARPU_SIMGRID /* * Config */ static void get_config_path(char *path, size_t maxlen) { get_bus_path("config", path, maxlen); } #if defined(STARPU_USE_MPI_MASTER_SLAVE) /* check if the master or one slave has to recalibrate */ static int mpi_check_recalibrate(int my_recalibrate) { int nb_mpi = _starpu_mpi_src_get_device_count() + 1; int mpi_recalibrate[nb_mpi]; int i; MPI_Allgather(&my_recalibrate, 1, MPI_INT, mpi_recalibrate, 1, MPI_INT, MPI_COMM_WORLD); for (i = 0; i < nb_mpi; i++) { if (mpi_recalibrate[i]) { return 1; } } return 0; } #endif static void compare_value_and_recalibrate(enum starpu_node_kind type, const char * msg, unsigned val_file, unsigned val_detected) { int recalibrate = 0; if (val_file != val_detected && !((type == STARPU_MPI_MS_RAM || type == STARPU_TCPIP_MS_RAM) && !val_detected)) recalibrate = 1; #ifdef STARPU_USE_MPI_MASTER_SLAVE //Send to each other to know if we had to recalibrate because someone cannot have the correct value in the config file if (_starpu_config.conf.nmpi_ms != 0) recalibrate = mpi_check_recalibrate(recalibrate); #endif if (recalibrate) { #ifdef STARPU_USE_MPI_MASTER_SLAVE /* Only the master prints the message */ if (_starpu_mpi_common_is_src_node()) #endif _STARPU_DISP("Current configuration does not match the bus performance model (%s: (stored) %d != (current) %d), recalibrating...\n", msg, val_file, val_detected); int location = _starpu_get_perf_model_bus(); _starpu_bus_force_sampling(location); #ifdef STARPU_USE_MPI_MASTER_SLAVE if (_starpu_mpi_common_is_src_node()) #endif _STARPU_DISP("... done\n"); } } static void check_bus_config_file(void) { struct _starpu_machine_config *config = _starpu_get_machine_config(); int recalibrate = 0; char path[PATH_LENGTH]; int location = _starpu_get_perf_model_bus(); if (location < 0 || config->conf.bus_calibrate > 0) recalibrate = 1; #if defined(STARPU_USE_MPI_MASTER_SLAVE) if (_starpu_config.conf.nmpi_ms != 0) //Send to each other to know if we had to recalibrate because someone cannot have the config file recalibrate = mpi_check_recalibrate(recalibrate); #endif if (recalibrate) { if (location < 0) _STARPU_DISP("No performance model for the bus, calibrating...\n"); _starpu_bus_force_sampling(location); if (location < 0) _STARPU_DISP("... done\n"); } else { FILE *f; int ret; enum starpu_node_kind type; unsigned read_cpus = -1; unsigned n_read[STARPU_NRAM]; int locked; unsigned ok; get_config_path(path, sizeof(path)); // Loading configuration from file f = fopen(path, "r"); STARPU_ASSERT_MSG(f, "Error when reading from file '%s'", path); locked = _starpu_frdlock(f) == 0; _starpu_drop_comments(f); ret = fscanf(f, "%u\t", &read_cpus); STARPU_ASSERT_MSG(ret == 1, "Error when reading from file '%s'", path); _starpu_drop_comments(f); for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) n_read[type] = -1; ok = 1; for (type = STARPU_CPU_RAM; ok && type < STARPU_NRAM; type++) { if (ok) ret = fscanf(f, "%u\t", &n_read[type]); if (!ok || ret != 1) { ok = 0; n_read[type] = 0; } _starpu_drop_comments(f); } if (locked) _starpu_frdunlock(f); fclose(f); // Loading current configuration ncpus = _starpu_topology_get_nhwcpu(config); /* TODO: factorize these calls */ nnumas = _starpu_topology_get_nhwnumanodes(config); #ifdef STARPU_USE_CUDA ncuda = _starpu_get_cuda_device_count(); #endif #ifdef STARPU_USE_OPENCL nopencl = _starpu_opencl_get_device_count(); #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE nmpims = _starpu_mpi_src_get_device_count(); #endif /* STARPU_USE_MPI_MASTER_SLAVE */ #ifdef STARPU_USE_TCPIP_MASTER_SLAVE ntcpip_ms = _starpu_tcpip_src_get_device_count(); #endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ // Checking if both configurations match compare_value_and_recalibrate(STARPU_CPU_RAM, "CPUS", read_cpus, ncpus); for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) { compare_value_and_recalibrate(type, starpu_memory_driver_info[type].name_upper, n_read[type], nmem[type]); } } } static void write_bus_config_file_content(void) { FILE *f; char path[PATH_LENGTH]; int locked; enum starpu_node_kind type; STARPU_ASSERT(was_benchmarked); get_config_path(path, sizeof(path)); _STARPU_DEBUG("writing config to %s\n", path); f = fopen(path, "a+"); STARPU_ASSERT_MSG(f, "Error when opening file (writing) '%s'", path); locked = _starpu_fwrlock(f) == 0; fseek(f, 0, SEEK_SET); _starpu_fftruncate(f, 0); fprintf(f, "# Current configuration\n"); fprintf(f, "%u # Number of CPUs\n", ncpus); for (type = STARPU_CPU_RAM; type < STARPU_NRAM; type++) fprintf(f, "%u # Number of %s nodes\n", nmem[type], starpu_memory_driver_info[type].name_upper); if (locked) _starpu_fwrunlock(f); fclose(f); } static void generate_bus_config_file(void) { if (!was_benchmarked) benchmark_all_memory_nodes(); write_bus_config_file_content(); } #endif /* !SIMGRID */ void _starpu_simgrid_get_platform_path(int version, char *path, size_t maxlen) { if (version == 3) get_bus_path("platform.xml", path, maxlen); else get_bus_path("platform.v4.xml", path, maxlen); } #ifndef STARPU_SIMGRID /* * Compute the precise PCI tree bandwidth and link shares * * We only have measurements from one leaf to another. We assume that the * available bandwidth is greater at lower levels, and thus measurements from * increasingly far GPUs provide the PCI bridges bandwidths at each level. * * The bandwidth of a PCI bridge is thus computed as the maximum of the speed * of the various transfers that we have achieved through it. We thus browse * the PCI tree three times: * * - first through all CUDA-CUDA possible transfers to compute the maximum * measured bandwidth on each PCI link and hub used for that. * - then through the whole tree to emit links for each PCI link and hub. * - then through all CUDA-CUDA possible transfers again to emit routes. */ #if defined(STARPU_USE_CUDA) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) /* Records, for each PCI link and hub, the maximum bandwidth seen through it */ struct pci_userdata { /* Uplink max measurement */ double bw_up; double bw_down; /* Hub max measurement */ double bw; }; /* Allocate a pci_userdata structure for the given object */ static void allocate_userdata(hwloc_obj_t obj) { struct pci_userdata *data; if (obj->userdata) return; _STARPU_MALLOC(obj->userdata, sizeof(*data)); data = obj->userdata; data->bw_up = 0.0; data->bw_down = 0.0; data->bw = 0.0; } /* Update the maximum bandwidth seen going to upstream */ static void update_bandwidth_up(hwloc_obj_t obj, double bandwidth) { struct pci_userdata *data; if (obj->type != HWLOC_OBJ_BRIDGE && obj->type != HWLOC_OBJ_PCI_DEVICE) return; allocate_userdata(obj); data = obj->userdata; if (data->bw_up < bandwidth) data->bw_up = bandwidth; } /* Update the maximum bandwidth seen going from upstream */ static void update_bandwidth_down(hwloc_obj_t obj, double bandwidth) { struct pci_userdata *data; if (obj->type != HWLOC_OBJ_BRIDGE && obj->type != HWLOC_OBJ_PCI_DEVICE) return; allocate_userdata(obj); data = obj->userdata; if (data->bw_down < bandwidth) data->bw_down = bandwidth; } /* Update the maximum bandwidth seen going through this Hub */ static void update_bandwidth_through(hwloc_obj_t obj, double bandwidth) { struct pci_userdata *data; allocate_userdata(obj); data = obj->userdata; if (data->bw < bandwidth) data->bw = bandwidth; } /* find_* functions perform the first step: computing maximum bandwidths */ /* Our traffic had to go through the host, go back from target up to the host, * updating uplink downstream bandwidth along the way */ static void find_platform_backward_path(hwloc_obj_t obj, double bandwidth) { if (!obj) /* Oops, we should have seen a host bridge. Well, too bad. */ return; /* Update uplink bandwidth of PCI Hub */ update_bandwidth_down(obj, bandwidth); /* Update internal bandwidth of PCI Hub */ update_bandwidth_through(obj, bandwidth); if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) /* Finished */ return; /* Continue up */ find_platform_backward_path(obj->parent, bandwidth); } /* Same, but update uplink upstream bandwidth */ static void find_platform_forward_path(hwloc_obj_t obj, double bandwidth) { if (!obj) /* Oops, we should have seen a host bridge. Well, too bad. */ return; /* Update uplink bandwidth of PCI Hub */ update_bandwidth_up(obj, bandwidth); /* Update internal bandwidth of PCI Hub */ update_bandwidth_through(obj, bandwidth); if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) /* Finished */ return; /* Continue up */ find_platform_forward_path(obj->parent, bandwidth); } /* Find the path from obj1 through parent down to obj2 (without ever going up), * and update the maximum bandwidth along the path */ static int find_platform_path_down(hwloc_obj_t parent, hwloc_obj_t obj1, hwloc_obj_t obj2, double bandwidth) { unsigned i; /* Base case, path is empty */ if (parent == obj2) return 1; /* Try to go down from parent */ for (i = 0; i < parent->arity; i++) if (parent->children[i] != obj1 && find_platform_path_down(parent->children[i], NULL, obj2, bandwidth)) { /* Found it down there, update bandwidth of parent */ update_bandwidth_down(parent->children[i], bandwidth); update_bandwidth_through(parent, bandwidth); return 1; } #if HWLOC_API_VERSION >= 0x00020000 hwloc_obj_t io; for (io = parent->io_first_child; io; io = io->next_sibling) if (io != obj1 && find_platform_path_down(io, NULL, obj2, bandwidth)) { /* Found it down there, update bandwidth of parent */ update_bandwidth_down(io, bandwidth); update_bandwidth_through(parent, bandwidth); return 1; } #endif return 0; } /* Find the path from obj1 to obj2, and update the maximum bandwidth along the * path */ static int find_platform_path_up(hwloc_obj_t obj1, hwloc_obj_t obj2, double bandwidth) { int ret; hwloc_obj_t parent = obj1->parent; if (!parent) { /* Oops, we should have seen a host bridge. Act as if we had seen it. */ find_platform_backward_path(obj2, bandwidth); return 1; } if (find_platform_path_down(parent, obj1, obj2, bandwidth)) /* obj2 was a mere (sub)child of our parent */ return 1; /* obj2 is not a (sub)child of our parent, we have to go up through the parent */ if (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) { /* We have to go up to the Interconnect, so obj2 is not in the same PCI * tree, so we're for for obj1 to Interconnect, and just find the path * from obj2 to Interconnect too. */ find_platform_backward_path(obj2, bandwidth); update_bandwidth_up(parent, bandwidth); update_bandwidth_through(parent, bandwidth); return 1; } /* Not at host yet, just go up */ ret = find_platform_path_up(parent, obj2, bandwidth); update_bandwidth_up(parent, bandwidth); update_bandwidth_through(parent, bandwidth); return ret; } static hwloc_obj_t get_hwloc_cuda_obj(hwloc_topology_t topology, unsigned devid) { hwloc_obj_t res; struct cudaDeviceProp props; cudaError_t cures; res = hwloc_cuda_get_device_osdev_by_index(topology, devid); if (res) return res; cures = cudaGetDeviceProperties(&props, devid); if (cures == cudaSuccess) { res = hwloc_get_pcidev_by_busid(topology, props.pciDomainID, props.pciBusID, props.pciDeviceID, 0); if (res) return res; #if defined(STARPU_HAVE_NVML_H) && !defined(STARPU_USE_CUDA0) && !defined(STARPU_USE_CUDA1) nvmlDevice_t nvmldev = _starpu_cuda_get_nvmldev(&props); if (nvmldev && _starpu_nvmlDeviceGetIndex && _starpu_nvmlDeviceGetPciInfo && _starpu_nvmlDeviceGetUUID) { unsigned int index; if (_starpu_nvmlDeviceGetIndex(nvmldev, &index) == NVML_SUCCESS) { res = hwloc_nvml_get_device_osdev_by_index(topology, index); if (res) return res; } res = hwloc_nvml_get_device_osdev(topology, nvmldev); if (res) return res; } #endif } return NULL; } /* find the path between cuda i and cuda j, and update the maximum bandwidth along the path */ static int find_platform_cuda_path(hwloc_topology_t topology, unsigned i, unsigned j, double bandwidth) { hwloc_obj_t cudai, cudaj; cudai = get_hwloc_cuda_obj(topology, i); cudaj = get_hwloc_cuda_obj(topology, j); if (!cudai || !cudaj) return 0; return find_platform_path_up(cudai, cudaj, bandwidth); } /* emit_topology_bandwidths performs the second step: emitting link names */ /* Emit the link name of the object */ static void emit_pci_hub(FILE *f, hwloc_obj_t obj) { STARPU_ASSERT(obj->type == HWLOC_OBJ_BRIDGE); fprintf(f, "PCI:%04x:[%02x-%02x]", obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); } static void emit_pci_dev(FILE *f, struct hwloc_pcidev_attr_s *pcidev) { fprintf(f, "PCI:%04x:%02x:%02x.%1x", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); } /* Emit the links of the object */ static void emit_topology_bandwidths(FILE *f, hwloc_obj_t obj, const char *Bps, const char *s) { unsigned i; if (obj->userdata) { struct pci_userdata *data = obj->userdata; if (obj->type == HWLOC_OBJ_BRIDGE) { /* Uplink */ fprintf(f, " \n", data->bw_up, Bps, s); fprintf(f, " \n", data->bw_down, Bps, s); /* PCI Switches are assumed to have infinite internal bandwidth */ if (!obj->name || !strstr(obj->name, "Switch")) { /* We assume that PCI Hubs have double bandwidth in * order to support full duplex but not more */ fprintf(f, " \n", data->bw * 2, Bps, s); } } else if (obj->type == HWLOC_OBJ_PCI_DEVICE) { fprintf(f, " attr->pcidev); fprintf(f, " up\" bandwidth=\"%f%s\" latency=\"0.000000%s\"/>\n", data->bw_up, Bps, s); fprintf(f, " attr->pcidev); fprintf(f, " down\" bandwidth=\"%f%s\" latency=\"0.000000%s\"/>\n", data->bw_down, Bps, s); } } for (i = 0; i < obj->arity; i++) emit_topology_bandwidths(f, obj->children[i], Bps, s); #if HWLOC_API_VERSION >= 0x00020000 hwloc_obj_t io; for (io = obj->io_first_child; io; io = io->next_sibling) emit_topology_bandwidths(f, io, Bps, s); #endif } /* emit_pci_link_* functions perform the third step: emitting the routes */ static void emit_pci_link(FILE *f, hwloc_obj_t obj, const char *suffix) { if (obj->type == HWLOC_OBJ_BRIDGE) { fprintf(f, " \n", suffix); } else if (obj->type == HWLOC_OBJ_PCI_DEVICE) { fprintf(f, " attr->pcidev); fprintf(f, " %s\"/>\n", suffix); } } /* Go to upstream */ static void emit_pci_link_up(FILE *f, hwloc_obj_t obj) { emit_pci_link(f, obj, "up"); } /* Go from upstream */ static void emit_pci_link_down(FILE *f, hwloc_obj_t obj) { emit_pci_link(f, obj, "down"); } /* Go through PCI hub */ static void emit_pci_link_through(FILE *f, hwloc_obj_t obj) { /* We don't care about traffic going through PCI switches */ if (obj->type == HWLOC_OBJ_BRIDGE) { if (!obj->name || !strstr(obj->name, "Switch")) emit_pci_link(f, obj, "through"); else { fprintf(f, " \n"); } } } /* Our traffic has to go through the host, go back from target up to the host, * using uplink downstream along the way */ static void emit_platform_backward_path(FILE *f, hwloc_obj_t obj) { if (!obj) /* Oops, we should have seen a host bridge. Well, too bad. */ return; /* Go through PCI Hub */ emit_pci_link_through(f, obj); /* Go through uplink */ emit_pci_link_down(f, obj); if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) { /* Finished, go through NUMA */ hwloc_obj_t numa = _starpu_numa_get_obj(obj); if (numa) fprintf(f, " \n", numa->logical_index); else fprintf(f, " \n"); return; } /* Continue up */ emit_platform_backward_path(f, obj->parent); } /* Same, but use upstream link */ static void emit_platform_forward_path(FILE *f, hwloc_obj_t obj) { if (!obj) /* Oops, we should have seen a host bridge. Well, too bad. */ return; /* Go through PCI Hub */ emit_pci_link_through(f, obj); /* Go through uplink */ emit_pci_link_up(f, obj); if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) { /* Finished, go through NUMA */ hwloc_obj_t numa = _starpu_numa_get_obj(obj); if (numa) fprintf(f, " \n", numa->logical_index); else fprintf(f, " \n"); return; } /* Continue up */ emit_platform_forward_path(f, obj->parent); } /* Find the path from obj1 through parent down to obj2 (without ever going up), * and use the links along the path */ static int emit_platform_path_down(FILE *f, hwloc_obj_t parent, hwloc_obj_t obj1, hwloc_obj_t obj2) { unsigned i; /* Base case, path is empty */ if (parent == obj2) return 1; /* Try to go down from parent */ for (i = 0; i < parent->arity; i++) if (parent->children[i] != obj1 && emit_platform_path_down(f, parent->children[i], NULL, obj2)) { /* Found it down there, path goes through this hub */ emit_pci_link_down(f, parent->children[i]); emit_pci_link_through(f, parent); return 1; } #if HWLOC_API_VERSION >= 0x00020000 hwloc_obj_t io; for (io = parent->io_first_child; io; io = io->next_sibling) if (io != obj1 && emit_platform_path_down(f, io, NULL, obj2)) { /* Found it down there, path goes through this hub */ emit_pci_link_down(f, io); emit_pci_link_through(f, parent); return 1; } #endif return 0; } /* Find the path from obj1 to obj2, and use the links along the path */ static int emit_platform_path_up(FILE *f, hwloc_obj_t obj1, hwloc_obj_t obj2) { int ret; hwloc_obj_t parent = obj1->parent; if (!parent) { /* Oops, we should have seen a host bridge. Act as if we had seen it. */ emit_platform_backward_path(f, obj2); return 1; } if (emit_platform_path_down(f, parent, obj1, obj2)) /* obj2 was a mere (sub)child of our parent */ return 1; /* obj2 is not a (sub)child of our parent, we have to go up through the parent */ if (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) { /* We have to go up to the Interconnect, so obj2 is not in the same PCI * tree, so we're for for obj1 to Interconnect, and just find the path * from obj2 to Interconnect too. */ emit_platform_backward_path(f, obj2); hwloc_obj_t numa2 = _starpu_numa_get_obj(obj2); hwloc_obj_t numa1 = _starpu_numa_get_obj(obj1); if (!numa1 || !numa2 || numa1 != numa2) { fprintf(f, " \n"); if (numa1) fprintf(f, " \n", numa1->logical_index); } emit_pci_link_up(f, parent); emit_pci_link_through(f, parent); return 1; } /* Not at host yet, just go up */ ret = emit_platform_path_up(f, parent, obj2); emit_pci_link_up(f, parent); emit_pci_link_through(f, parent); return ret; } /* Clean our mess in the topology before destroying it */ static void clean_topology(hwloc_obj_t obj) { unsigned i; if (obj->userdata) { free(obj->userdata); obj->userdata = NULL; } for (i = 0; i < obj->arity; i++) clean_topology(obj->children[i]); #if HWLOC_API_VERSION >= 0x00020000 hwloc_obj_t io; for (io = obj->io_first_child; io; io = io->next_sibling) clean_topology(io); #endif } #endif static void write_bus_platform_file_content(int version) { FILE *f; char path[PATH_LENGTH]; unsigned i; const char *speed, *flops, *Bps, *s; char dash; int locked; if (version == 3) { speed = "power"; flops = ""; Bps = ""; s = ""; dash = '_'; } else { speed = "speed"; flops = "f"; Bps = "Bps"; s = "s"; dash = '-'; } STARPU_ASSERT(was_benchmarked); _starpu_simgrid_get_platform_path(version, path, sizeof(path)); _STARPU_DEBUG("writing platform to %s\n", path); f = fopen(path, "a+"); if (!f) { perror("fopen write_bus_platform_file_content"); _STARPU_DISP("path '%s'\n", path); fflush(stderr); STARPU_ABORT(); } locked = _starpu_fwrlock(f) == 0; fseek(f, 0, SEEK_SET); _starpu_fftruncate(f, 0); fprintf(f, "\n" "\n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n", version == 3 ? "http://simgrid.gforge.inria.fr/simgrid.dtd" : "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd", version, dash, dash, dash, dash, speed, flops); for (i = 0; i < ncpus; i++) /* TODO: host memory for out-of-core simulation */ fprintf(f, " \n", i, speed, flops); for (i = 0; i < ncuda; i++) { fprintf(f, " \n", i, speed, flops); fprintf(f, " \n", cuda_devname[i]); fprintf(f, " \n", (unsigned long long) cuda_size[i]); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER fprintf(f, " \n"); #endif /* TODO: record cudadev_direct instead of assuming it's NUMA nodes */ fprintf(f, " \n"); } for (i = 0; i < nopencl; i++) { fprintf(f, " \n", i, speed, flops); fprintf(f, " \n", opencl_devname[i]); fprintf(f, " \n", (unsigned long long) opencl_size[i]); fprintf(f, " \n"); } fprintf(f, "\n \n", speed, flops); /* * Compute maximum bandwidth, taken as host bandwidth */ double max_bandwidth = 0; double max_bandwidth_numa[nnumas]; unsigned numa; for (numa = 0; numa < nnumas; numa++) max_bandwidth_numa[numa] = 0.; #ifdef STARPU_USE_CUDA for (i = 0; i < ncuda; i++) { for (numa = 0; numa < nnumas; numa++) { double down_bw = 1.0 / timing_per_numa[STARPU_CUDA_RAM][i][numa].timing_dtoh; double up_bw = 1.0 / timing_per_numa[STARPU_CUDA_RAM][i][numa].timing_htod; if (max_bandwidth < down_bw) max_bandwidth = down_bw; if (max_bandwidth_numa[numa] < down_bw) max_bandwidth_numa[numa] = down_bw; if (max_bandwidth < up_bw) max_bandwidth = up_bw; if (max_bandwidth_numa[numa] < up_bw) max_bandwidth_numa[numa] = up_bw; } } #endif #ifdef STARPU_USE_OPENCL for (i = 0; i < nopencl; i++) { for (numa = 0; numa < nnumas; numa++) { double down_bw = 1.0 / timing_per_numa[STARPU_OPENCL_RAM][i][numa].timing_dtoh; double up_bw = 1.0 / timing_per_numa[STARPU_OPENCL_RAM][i][numa].timing_htod; if (max_bandwidth < down_bw) max_bandwidth = down_bw; if (max_bandwidth_numa[numa] < down_bw) max_bandwidth_numa[numa] = down_bw; if (max_bandwidth < up_bw) max_bandwidth = up_bw; if (max_bandwidth_numa[numa] < up_bw) max_bandwidth_numa[numa] = up_bw; } } #endif for (numa = 0; numa < nnumas; numa++) fprintf(f, " \n", numa, max_bandwidth_numa[numa]*1000000, Bps, s); fprintf(f, " \n\n", max_bandwidth*1000000, Bps, s); /* * OpenCL links */ #ifdef STARPU_USE_OPENCL for (i = 0; i < nopencl; i++) { char i_name[17]; snprintf(i_name, sizeof(i_name), "OpenCL%u", i); fprintf(f, " \n", i_name, 1000000 / search_bus_best_timing(i, STARPU_OPENCL_RAM, 1), Bps, search_bus_best_latency(i, STARPU_OPENCL_RAM, 1)/1000000., s); fprintf(f, " \n", i_name, 1000000 / search_bus_best_timing(i, STARPU_OPENCL_RAM, 0), Bps, search_bus_best_latency(i, STARPU_OPENCL_RAM, 0)/1000000., s); } fprintf(f, "\n"); #endif /* * CUDA links and routes */ #ifdef STARPU_USE_CUDA /* Write RAM/CUDA bandwidths and latencies */ for (i = 0; i < ncuda; i++) { char i_name[16]; snprintf(i_name, sizeof(i_name), "CUDA%u", i); fprintf(f, " \n", i_name, 1000000. / search_bus_best_timing(i, STARPU_CUDA_RAM, 1), Bps, search_bus_best_latency(i, STARPU_CUDA_RAM, 1)/1000000., s); fprintf(f, " \n", i_name, 1000000. / search_bus_best_timing(i, STARPU_CUDA_RAM, 0), Bps, search_bus_best_latency(i, STARPU_CUDA_RAM, 0)/1000000., s); } fprintf(f, "\n"); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER /* Write CUDA/CUDA bandwidths and latencies */ for (i = 0; i < ncuda; i++) { unsigned j; char i_name[16]; snprintf(i_name, sizeof(i_name), "CUDA%u", i); for (j = 0; j < ncuda; j++) { char j_name[16]; if (j == i) continue; snprintf(j_name, sizeof(j_name), "CUDA%u", j); fprintf(f, " \n", i_name, j_name, 1000000. / timing_dtod[STARPU_CUDA_RAM][i][j], Bps, latency_dtod[STARPU_CUDA_RAM][i][j]/1000000., s); } } #endif #if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX && defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) /* If we have enough hwloc information, write PCI bandwidths and routes */ if (!starpu_getenv_number_default("STARPU_PCI_FLAT", 0) && ncuda > 0) { int ret; hwloc_topology_t topology; ret = hwloc_topology_init(&topology); STARPU_ASSERT_MSG(ret == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); _starpu_topology_filter(topology); ret = hwloc_topology_load(topology); STARPU_ASSERT_MSG(ret == 0, "Could not load Hwloc topology (%s)\n", strerror(errno)); char nvlink[ncuda][ncuda]; char nvlinkhost[ncuda]; char nvswitch[ncuda]; memset(nvlink, 0, sizeof(nvlink)); memset(nvlinkhost, 0, sizeof(nvlinkhost)); memset(nvswitch, 0, sizeof(nvswitch)); /* TODO: move to drivers */ #if defined(STARPU_HAVE_NVML_H) && !defined(STARPU_USE_CUDA0) && !defined(STARPU_USE_CUDA1) /* First find NVLinks */ struct cudaDeviceProp props[ncuda]; for (i = 0; i < ncuda; i++) { cudaError_t cures = cudaGetDeviceProperties(&props[i], i); if (cures != cudaSuccess) props[i].name[0] = 0; } if (_starpu_nvmlDeviceGetNvLinkState && _starpu_nvmlDeviceGetNvLinkRemotePciInfo) for (i = 0; i < ncuda; i++) { unsigned j; if (!props[i].name[0]) continue; nvmlDevice_t nvmldev; nvmldev = _starpu_cuda_get_nvmldev(&props[i]); if (!nvmldev) continue; for (j = 0; j < NVML_NVLINK_MAX_LINKS; j++) { nvmlEnableState_t active; nvmlReturn_t nvmlret; nvmlPciInfo_t pci; unsigned k; nvmlret = _starpu_nvmlDeviceGetNvLinkState(nvmldev, j, &active); if (nvmlret != NVML_SUCCESS) continue; if (active != NVML_FEATURE_ENABLED) continue; nvmlret = _starpu_nvmlDeviceGetNvLinkRemotePciInfo(nvmldev, j, &pci); if (nvmlret != NVML_SUCCESS) continue; hwloc_obj_t obj = hwloc_get_pcidev_by_busid(topology, pci.domain, pci.bus, pci.device, 0); if (obj && obj->type == HWLOC_OBJ_PCI_DEVICE && (obj->attr->pcidev.class_id >> 8 == 0x06)) { /* This is a PCI bridge */ switch (obj->attr->pcidev.vendor_id) { case 0x1014: /* IBM OpenCAPI port, direct CPU-GPU NVLink */ /* TODO: NUMA affinity */ nvlinkhost[i] = 1; continue; case 0x10de: nvswitch[i] = 1; continue; } } /* Otherwise, link to another GPU? */ for (k = 0; k < ncuda; k++) { if ((int) pci.domain == props[k].pciDomainID && (int) pci.bus == props[k].pciBusID && (int) pci.device == props[k].pciDeviceID) { nvlink[i][k] = 1; nvlink[k][i] = 1; break; } } if (k < ncuda) /* Yes it was another GPU */ continue; /* No idea what this is */ _STARPU_DISP("Warning: NVLink to unknown PCI card %04x:%02x:%02x: %04x\n", pci.domain, pci.bus, pci.device, pci.pciDeviceId); } } for (i = 0; i < ncuda; i++) { unsigned j; for (j = i+1; j < ncuda; j++) { if (nvswitch[i] && nvswitch[j]) { static int warned = 0; if (!warned) { warned = 1; /* TODO: follow answers to https://forums.developer.nvidia.com/t/how-to-distinguish-different-nvswitch/241983 */ _STARPU_DISP("Warning: NVSwitch not tested yet with several switches, assuming there is only one NVSwitch in the system\n"); } nvlink[i][j] = 1; nvlink[j][i] = 1; } } } #endif /* Find paths and record measured bandwidth along the path */ for (i = 0; i < ncuda; i++) { unsigned j; for (j = 0; j < ncuda; j++) if (i != j && !nvlink[i][j] && !nvlinkhost[i] && !nvlinkhost[j]) if (!find_platform_cuda_path(topology, i, j, 1000000. / timing_dtod[STARPU_CUDA_RAM][i][j])) { _STARPU_DISP("Warning: could not get CUDA location from hwloc\n"); clean_topology(hwloc_get_root_obj(topology)); hwloc_topology_destroy(topology); goto flat_cuda; } /* Record RAM/CUDA bandwidths */ if (!nvlinkhost[i]) { find_platform_forward_path(get_hwloc_cuda_obj(topology, i), 1000000. / search_bus_best_timing(i, STARPU_CUDA_RAM, 0)); find_platform_backward_path(get_hwloc_cuda_obj(topology, i), 1000000. / search_bus_best_timing(i, STARPU_CUDA_RAM, 1)); } } /* Ok, found path in all cases, can emit advanced platform routes */ fprintf(f, "\n"); emit_topology_bandwidths(f, hwloc_get_root_obj(topology), Bps, s); fprintf(f, "\n"); for (i = 0; i < ncuda; i++) { unsigned j; for (j = 0; j < ncuda; j++) if (i != j) { fprintf(f, " \n", i, j); fprintf(f, " \n", i, j); if (!nvlink[i][j]) { if (nvlinkhost[i] && nvlinkhost[j]) { /* FIXME: if they are directly connected through PCI, is NVLink host preferred? */ if (gpu_numa[STARPU_CUDA_RAM][i] >= 0) fprintf(f, " \n", gpu_numa[STARPU_CUDA_RAM][i]); fprintf(f, " \n"); if (gpu_numa[STARPU_CUDA_RAM][j] >= 0) fprintf(f, " \n", gpu_numa[STARPU_CUDA_RAM][j]); } else emit_platform_path_up(f, get_hwloc_cuda_obj(topology, i), get_hwloc_cuda_obj(topology, j)); } fprintf(f, " \n"); } fprintf(f, " \n", i); fprintf(f, " \n", i); if (nvlinkhost[i]) { if (gpu_numa[STARPU_CUDA_RAM][i] >= 0) fprintf(f, " \n", gpu_numa[STARPU_CUDA_RAM][i]); } else emit_platform_forward_path(f, get_hwloc_cuda_obj(topology, i)); fprintf(f, " \n"); fprintf(f, " \n", i); fprintf(f, " \n", i); if (nvlinkhost[i]) { if (gpu_numa[STARPU_CUDA_RAM][i] >= 0) fprintf(f, " \n", gpu_numa[STARPU_CUDA_RAM][i]); } else emit_platform_backward_path(f, get_hwloc_cuda_obj(topology, i)); fprintf(f, " \n"); } clean_topology(hwloc_get_root_obj(topology)); hwloc_topology_destroy(topology); } else { flat_cuda: #else { #endif /* If we don't have enough hwloc information, write trivial routes always through host */ for (i = 0; i < ncuda; i++) { char i_name[16]; snprintf(i_name, sizeof(i_name), "CUDA%u", i); fprintf(f, " \n", i_name); fprintf(f, " \n", i_name); fprintf(f, " \n"); fprintf(f, " \n"); fprintf(f, " \n", i_name); fprintf(f, " \n", i_name); fprintf(f, " \n"); fprintf(f, " \n"); } #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER for (i = 0; i < ncuda; i++) { unsigned j; char i_name[16]; snprintf(i_name, sizeof(i_name), "CUDA%u", i); for (j = 0; j < ncuda; j++) { char j_name[16]; if (j == i) continue; snprintf(j_name, sizeof(j_name), "CUDA%u", j); fprintf(f, " \n", i_name, j_name); fprintf(f, " \n", i_name, j_name); fprintf(f, " \n"); fprintf(f, " \n"); } } #endif } /* defined(STARPU_HAVE_HWLOC) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) */ fprintf(f, "\n"); #endif /* STARPU_USE_CUDA */ /* * OpenCL routes */ #ifdef STARPU_USE_OPENCL for (i = 0; i < nopencl; i++) { char i_name[17]; snprintf(i_name, sizeof(i_name), "OpenCL%u", i); fprintf(f, " \n", i_name); fprintf(f, " \n", i_name); fprintf(f, " \n"); fprintf(f, " \n"); fprintf(f, " \n", i_name); fprintf(f, " \n", i_name); fprintf(f, " \n"); fprintf(f, " \n"); } #endif fprintf(f, " \n" " \n" ); if (locked) _starpu_fwrunlock(f); fclose(f); } static void generate_bus_platform_file(void) { if (!was_benchmarked) benchmark_all_memory_nodes(); write_bus_platform_file_content(3); write_bus_platform_file_content(4); } static void check_bus_platform_file(void) { int res; char path[PATH_LENGTH]; _starpu_simgrid_get_platform_path(4, path, sizeof(path)); res = access(path, F_OK); if (!res) { _starpu_simgrid_get_platform_path(3, path, sizeof(path)); res = access(path, F_OK); } if (res) { /* File does not exist yet */ generate_bus_platform_file(); } } /* * Generic */ static void _starpu_bus_force_sampling(int location) { _STARPU_DEBUG("Force bus sampling ...\n"); if (location < 0) { location = _starpu_set_default_perf_model_bus(); } _starpu_create_bus_sampling_directory_if_needed(location); generate_bus_affinity_file(); generate_bus_latency_file(); generate_bus_bandwidth_file(); generate_bus_config_file(); generate_bus_platform_file(); } #endif /* !SIMGRID */ void _starpu_load_bus_performance_files(void) { _starpu_create_bus_sampling_directory_if_needed(-1); struct _starpu_machine_config * config = _starpu_get_machine_config(); nnumas = _starpu_topology_get_nhwnumanodes(config); #ifndef STARPU_SIMGRID ncpus = _starpu_topology_get_nhwcpu(config); #endif /* TODO: factorize these calls */ #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) ncuda = _starpu_get_cuda_device_count(); #endif #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) nopencl = _starpu_opencl_get_device_count(); #endif #if defined(STARPU_USE_MPI_MASTER_SLAVE) nmpims = _starpu_mpi_src_get_device_count(); #endif #if defined(STARPU_USE_TCPIP_MASTER_SLAVE) ntcpip_ms = _starpu_tcpip_src_get_device_count(); #endif #ifndef STARPU_SIMGRID check_bus_config_file(); #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* be sure that master wrote the perf files */ if (_starpu_config.conf.nmpi_ms != 0) _starpu_mpi_common_barrier(); #endif #ifndef STARPU_SIMGRID load_bus_affinity_file(); #endif load_bus_latency_file(); load_bus_bandwidth_file(); #ifndef STARPU_SIMGRID check_bus_platform_file(); #endif } static unsigned _get_raw_memory_node_index(unsigned node) { enum starpu_node_kind type = starpu_node_get_kind(node), cur; int devid = starpu_memory_node_get_devid(node); unsigned base; base = 0; for (cur = STARPU_CPU_RAM; cur < type; cur++) base += nmem[cur]; return base + devid; } void _starpu_init_bus_performance(void) { unsigned src, dst, raw_src, raw_dst; for (src = 0; src < STARPU_MAXNODES; src++) { for (dst = 0; dst < STARPU_MAXNODES; dst++) { raw_src = _get_raw_memory_node_index(src); raw_dst = _get_raw_memory_node_index(dst); bandwidth_matrix[src][dst] = raw_bandwidth_matrix[raw_src][raw_dst]; latency_matrix[src][dst] = raw_latency_matrix[raw_src][raw_dst]; } } } /* (in MB/s) */ double starpu_transfer_bandwidth(unsigned src_node, unsigned dst_node) { return bandwidth_matrix[src_node][dst_node]; } /* (in µs) */ double starpu_transfer_latency(unsigned src_node, unsigned dst_node) { return latency_matrix[src_node][dst_node]; } /* (in µs) */ double starpu_transfer_predict(unsigned src_node, unsigned dst_node, size_t size) { if (src_node == dst_node) return 0; double bandwidth = bandwidth_matrix[src_node][dst_node]; double latency = latency_matrix[src_node][dst_node]; struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology; int busid = starpu_bus_get_id(src_node, dst_node); #if 0 int direct = starpu_bus_get_direct(busid); #endif float ngpus = starpu_bus_get_ngpus(busid); if (ngpus != 1) ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER]; #ifdef STARPU_DEVEL #warning FIXME: ngpus should not be used e.g. for slow disk transfers... #endif #if 0 /* Ideally we should take into account that some GPUs are directly * connected through a PCI switch, which has less contention that the * Host bridge, but doing that seems to *decrease* performance... */ if (direct) { float neighbours = starpu_bus_get_ngpus(busid); /* Count transfers of these GPUs, and count transfers between * other GPUs and these GPUs */ ngpus = neighbours + (ngpus - neighbours) * neighbours / ngpus; } #endif if (isnan(latency) || isnan(bandwidth)) { static int warned = 0; if (!warned) { _STARPU_DISP("Warning: no bus performance model was calibrated between nodes %d and %d, ignoring transfer time\n", src_node, dst_node); warned = 1; } return 0; } return latency + (size/bandwidth)*2*ngpus; } /* calculate save bandwidth and latency */ /* bandwidth in MB/s - latency in µs */ void _starpu_save_bandwidth_and_latency_disk(double bandwidth_write, double bandwidth_read, double latency_write, double latency_read, unsigned node, const char *name) { unsigned int i, j; double slowness_disk_between_main_ram, slowness_main_ram_between_node; int print_stats = starpu_getenv_number_default("STARPU_BUS_STATS", 0); if (print_stats) { fprintf(stderr, "\n#---------------------\n"); fprintf(stderr, "Data transfer speed for %s (node %u):\n", name, node); } /* save bandwidth */ for(i = 0; i < STARPU_MAXNODES; ++i) { for(j = 0; j < STARPU_MAXNODES; ++j) { if (i == j && j == node) /* source == destination == node */ { bandwidth_matrix[i][j] = 0; } else if (i == node) /* source == disk */ { /* convert in slowness */ if(bandwidth_read != 0) slowness_disk_between_main_ram = 1/bandwidth_read; else slowness_disk_between_main_ram = 0; if(bandwidth_matrix[STARPU_MAIN_RAM][j] != 0) slowness_main_ram_between_node = 1/bandwidth_matrix[STARPU_MAIN_RAM][j]; else slowness_main_ram_between_node = 0; bandwidth_matrix[i][j] = 1/(slowness_disk_between_main_ram+slowness_main_ram_between_node); if (!isnan(bandwidth_matrix[i][j]) && print_stats) fprintf(stderr,"%u -> %u: %.0f MB/s\n", i, j, bandwidth_matrix[i][j]); } else if (j == node) /* destination == disk */ { /* convert in slowness */ if(bandwidth_write != 0) slowness_disk_between_main_ram = 1/bandwidth_write; else slowness_disk_between_main_ram = 0; if(bandwidth_matrix[i][STARPU_MAIN_RAM] != 0) slowness_main_ram_between_node = 1/bandwidth_matrix[i][STARPU_MAIN_RAM]; else slowness_main_ram_between_node = 0; bandwidth_matrix[i][j] = 1/(slowness_disk_between_main_ram+slowness_main_ram_between_node); if (!isnan(bandwidth_matrix[i][j]) && print_stats) fprintf(stderr,"%u -> %u: %.0f MB/s\n", i, j, bandwidth_matrix[i][j]); } else if (j > node || i > node) /* not affected by the node */ { bandwidth_matrix[i][j] = NAN; } } } /* save latency */ for(i = 0; i < STARPU_MAXNODES; ++i) { for(j = 0; j < STARPU_MAXNODES; ++j) { if (i == j && j == node) /* source == destination == node */ { latency_matrix[i][j] = 0; } else if (i == node) /* source == disk */ { latency_matrix[i][j] = (latency_write+latency_matrix[STARPU_MAIN_RAM][j]); if (!isnan(latency_matrix[i][j]) && print_stats) fprintf(stderr,"%u -> %u: %.0f us\n", i, j, latency_matrix[i][j]); } else if (j == node) /* destination == disk */ { latency_matrix[i][j] = (latency_read+latency_matrix[i][STARPU_MAIN_RAM]); if (!isnan(latency_matrix[i][j]) && print_stats) fprintf(stderr,"%u -> %u: %.0f us\n", i, j, latency_matrix[i][j]); } else if (j > node || i > node) /* not affected by the node */ { latency_matrix[i][j] = NAN; } } } if (print_stats) fprintf(stderr, "\n#---------------------\n"); } starpu-1.4.9+dfsg/src/core/perfmodel/perfmodel_history.c000066400000000000000000002156651507764646700234250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom SudParis * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #if !defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__) #include #include #endif #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_HAVE_WINDOWS #include #endif #define HASH_ADD_UINT32_T(head,field,add) HASH_ADD(hh,head,field,sizeof(uint32_t),add) #define HASH_FIND_UINT32_T(head,find,out) HASH_FIND(hh,head,find,sizeof(uint32_t),out) static struct starpu_perfmodel_arch **arch_combs; static int current_arch_comb; static int nb_arch_combs; static starpu_pthread_rwlock_t arch_combs_mutex = STARPU_PTHREAD_RWLOCK_INITIALIZER; static int historymaxerror; static char ignore_devid[STARPU_NARCH]; /* How many executions a codelet will have to be measured before we * consider that calibration will provide a value good enough for scheduling */ unsigned _starpu_calibration_minimum; struct starpu_perfmodel_history_table { UT_hash_handle hh; uint32_t footprint; struct starpu_perfmodel_history_entry *history_entry; }; /* We want more than 10% variance on X to trust regression */ #define VALID_REGRESSION(reg_model) \ ((reg_model)->minx < (9*(reg_model)->maxx)/10 && (reg_model)->nsample >= _starpu_calibration_minimum) static starpu_pthread_rwlock_t registered_models_rwlock; LIST_TYPE(_starpu_perfmodel, struct starpu_perfmodel *model; ) static struct _starpu_perfmodel_list registered_models; static char _starpu_perfmodel_hostname[STR_LONG_LENGTH]; void starpu_perfmodel_initialize(void) { /* make sure performance model directories exist (or create them) */ _starpu_create_bus_sampling_directory_if_needed(-1); _starpu_perfmodel_list_init(®istered_models); STARPU_PTHREAD_RWLOCK_INIT(®istered_models_rwlock, NULL); STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL); _starpu_gethostname(_starpu_perfmodel_hostname, sizeof(_starpu_perfmodel_hostname)); } void _starpu_initialize_registered_performance_models(void) { starpu_perfmodel_initialize(); struct _starpu_machine_config *conf = _starpu_get_machine_config(); /* FIXME: just iterate over all archs */ unsigned ncores = conf->topology.nhwdevices[STARPU_CPU_WORKER]; unsigned ncuda = conf->topology.nhwdevices[STARPU_CUDA_WORKER]; unsigned nopencl = conf->topology.nhwdevices[STARPU_OPENCL_WORKER]; enum starpu_worker_archtype archtype; #if STARPU_MAXMPIDEVS > 0 || STARPU_MAXTCPIPDEVS > 0 unsigned i; #endif unsigned nmpi = 0; unsigned ntcpip = 0; #if STARPU_MAXMPIDEVS > 0 STARPU_ASSERT(conf->topology.nhwdevices[STARPU_MPI_MS_WORKER] < STARPU_NMAXDEVS); for(i = 0; i < conf->topology.nhwdevices[STARPU_MPI_MS_WORKER]; i++) nmpi += conf->topology.nhwworker[STARPU_MPI_MS_WORKER][i]; #endif #if STARPU_MAXTCPIPDEVS > 0 for(i = 0; i < conf->topology.nhwdevices[STARPU_TCPIP_MS_WORKER]; i++) { ntcpip += conf->topology.nhwworker[STARPU_TCPIP_MS_WORKER][i]; } #endif // We used to allocate 2**(ncores + ncuda + nopencl + nmpi + ntcpip), this is too big // We now allocate only 2*(ncores + ncuda + nopencl + nmpi + ntcpip), and reallocate when necessary in starpu_perfmodel_arch_comb_add nb_arch_combs = 2 * (ncores + ncuda + nopencl + nmpi + ntcpip); _STARPU_MALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*)); current_arch_comb = 0; historymaxerror = starpu_getenv_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR); _starpu_calibration_minimum = starpu_getenv_number_default("STARPU_CALIBRATE_MINIMUM", 10); for (archtype = 0; archtype < STARPU_NARCH; archtype++) { char name[128]; const char *arch = starpu_worker_get_type_as_env_var(archtype); int def = archtype == STARPU_CPU_WORKER ? 1 : 0; snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch); ignore_devid[archtype] = starpu_getenv_number_default(name, def); } } void _starpu_perfmodel_malloc_per_arch(struct starpu_perfmodel *model, int comb, int nb_impl) { int i; _STARPU_MALLOC(model->state->per_arch[comb], nb_impl*sizeof(struct starpu_perfmodel_per_arch)); for(i = 0; i < nb_impl; i++) { memset(&model->state->per_arch[comb][i], 0, sizeof(struct starpu_perfmodel_per_arch)); } model->state->nimpls_set[comb] = nb_impl; } void _starpu_perfmodel_malloc_per_arch_is_set(struct starpu_perfmodel *model, int comb, int nb_impl) { int i; _STARPU_MALLOC(model->state->per_arch_is_set[comb], nb_impl*sizeof(int)); for(i = 0; i < nb_impl; i++) { model->state->per_arch_is_set[comb][i] = 0; } } int _starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices) { int comb, ncomb; ncomb = current_arch_comb; for(comb = 0; comb < ncomb; comb++) { int found = 0; if(arch_combs[comb]->ndevices == ndevices) { int dev1, dev2; int nfounded = 0; for(dev1 = 0; dev1 < arch_combs[comb]->ndevices; dev1++) { for(dev2 = 0; dev2 < ndevices; dev2++) { if(arch_combs[comb]->devices[dev1].type == devices[dev2].type && (ignore_devid[devices[dev2].type] || arch_combs[comb]->devices[dev1].devid == devices[dev2].devid) && arch_combs[comb]->devices[dev1].ncores == devices[dev2].ncores) nfounded++; } } if(nfounded == ndevices) found = 1; } if (found) return comb; } return -1; } int starpu_perfmodel_arch_comb_get(int ndevices, struct starpu_perfmodel_device *devices) { int ret; STARPU_PTHREAD_RWLOCK_RDLOCK(&arch_combs_mutex); ret = _starpu_perfmodel_arch_comb_get(ndevices, devices); STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); return ret; } int starpu_perfmodel_arch_comb_add(int ndevices, struct starpu_perfmodel_device* devices) { STARPU_PTHREAD_RWLOCK_WRLOCK(&arch_combs_mutex); int comb = _starpu_perfmodel_arch_comb_get(ndevices, devices); if (comb != -1) { /* Somebody else added it in between */ STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); return comb; } if (current_arch_comb >= nb_arch_combs) { // We need to allocate more arch_combs nb_arch_combs = current_arch_comb+10; _STARPU_REALLOC(arch_combs, nb_arch_combs*sizeof(struct starpu_perfmodel_arch*)); } _STARPU_MALLOC(arch_combs[current_arch_comb], sizeof(struct starpu_perfmodel_arch)); _STARPU_MALLOC(arch_combs[current_arch_comb]->devices, ndevices*sizeof(struct starpu_perfmodel_device)); arch_combs[current_arch_comb]->ndevices = ndevices; int dev; for(dev = 0; dev < ndevices; dev++) { arch_combs[current_arch_comb]->devices[dev].type = devices[dev].type; arch_combs[current_arch_comb]->devices[dev].devid = devices[dev].devid; arch_combs[current_arch_comb]->devices[dev].ncores = devices[dev].ncores; } comb = current_arch_comb++; STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); return comb; } void _starpu_free_arch_combs(void) { int i; STARPU_PTHREAD_RWLOCK_WRLOCK(&arch_combs_mutex); for(i = 0; i < current_arch_comb; i++) { free(arch_combs[i]->devices); free(arch_combs[i]); } current_arch_comb = 0; free(arch_combs); arch_combs = NULL; STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); STARPU_PTHREAD_RWLOCK_DESTROY(&arch_combs_mutex); STARPU_PTHREAD_RWLOCK_INIT(&arch_combs_mutex, NULL); } int starpu_perfmodel_get_narch_combs() { return current_arch_comb; } struct starpu_perfmodel_arch *starpu_perfmodel_arch_comb_fetch(int comb) { return arch_combs[comb]; } static size_t __starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned impl, struct _starpu_job *j) { struct starpu_task *task = j->task; int comb = arch == NULL ? -1 : starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); if (model && model->state->per_arch && comb != -1 && comb < model->state->ncombs_set && model->state->per_arch[comb] && model->state->per_arch[comb][impl].size_base) { return model->state->per_arch[comb][impl].size_base(task, arch, impl); } else if (model && model->size_base) { return model->size_base(task, impl); } else { unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); size_t size = 0; unsigned buffer; for (buffer = 0; buffer < nbuffers; buffer++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer); enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer); if (mode & STARPU_NOFOOTPRINT) continue; size += _starpu_data_get_size(handle); } return size; } } size_t _starpu_job_get_data_size(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned impl, struct _starpu_job *j) { size_t ret; if (model) STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); ret = __starpu_job_get_data_size(model, arch, impl, j); if (model) STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); return ret; } /* * History based model */ static void insert_history_entry(struct starpu_perfmodel_history_entry *entry, struct starpu_perfmodel_history_list **list, struct starpu_perfmodel_history_table **history_ptr) { struct starpu_perfmodel_history_list *link; struct starpu_perfmodel_history_table *table; _STARPU_MALLOC(link, sizeof(struct starpu_perfmodel_history_list)); link->next = *list; link->entry = entry; *list = link; /* detect concurrency issue */ //HASH_FIND_UINT32_T(*history_ptr, &entry->footprint, table); //STARPU_ASSERT(table == NULL); _STARPU_MALLOC(table, sizeof(*table)); table->footprint = entry->footprint; table->history_entry = entry; HASH_ADD_UINT32_T(*history_ptr, footprint, table); } #ifndef STARPU_SIMGRID static void check_reg_model(struct starpu_perfmodel *model, int comb, int impl) { struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl]; struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression; /* * Linear Regression model */ /* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */ double alpha = nan(""), beta = nan(""); if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED) { if (reg_model->nsample > 1) { alpha = reg_model->alpha; beta = reg_model->beta; } } /* TODO: check: * reg_model->sumlnx * reg_model->sumlnx2 * reg_model->sumlny * reg_model->sumlnxlny * alpha * beta * reg_model->minx * reg_model->maxx */ (void)alpha; (void)beta; /* * Non-Linear Regression model */ double a = nan(""), b = nan(""), c = nan(""); if (model->type == STARPU_NL_REGRESSION_BASED) _starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c); /* TODO: check: * a * b * c */ /* * Multiple Regression Model */ if (model->type == STARPU_MULTIPLE_REGRESSION_BASED) { /* TODO: check: */ } } static void dump_reg_model(FILE *f, struct starpu_perfmodel *model, int comb, int impl) { struct starpu_perfmodel_per_arch *per_arch_model; per_arch_model = &model->state->per_arch[comb][impl]; struct starpu_perfmodel_regression_model *reg_model; reg_model = &per_arch_model->regression; /* * Linear Regression model */ /* Unless we have enough measurements, we put NaN in the file to indicate the model is invalid */ double alpha = nan(""), beta = nan(""); if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED) { if (reg_model->nsample > 1) { alpha = reg_model->alpha; beta = reg_model->beta; } } fprintf(f, "# sumlnx\tsumlnx2\t\tsumlny\t\tsumlnxlny\talpha\t\tbeta\t\tn\tminx\t\tmaxx\n"); fprintf(f, "%-15e\t%-15e\t%-15e\t%-15e\t", reg_model->sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny); _starpu_write_double(f, "%-15e", alpha); fprintf(f, "\t"); _starpu_write_double(f, "%-15e", beta); fprintf(f, "\t%u\t%-15lu\t%-15lu\n", reg_model->nsample, reg_model->minx, reg_model->maxx); /* * Non-Linear Regression model */ double a = nan(""), b = nan(""), c = nan(""); if (model->type == STARPU_NL_REGRESSION_BASED) { if (_starpu_regression_non_linear_power(per_arch_model->list, &a, &b, &c) != 0) _STARPU_DISP("Warning: could not compute a non-linear regression for model %s\n", model->symbol); } fprintf(f, "# a\t\tb\t\tc\n"); _starpu_write_double(f, "%-15e", a); fprintf(f, "\t"); _starpu_write_double(f, "%-15e", b); fprintf(f, "\t"); _starpu_write_double(f, "%-15e", c); fprintf(f, "\n"); /* * Multiple Regression Model */ if (model->type != STARPU_MULTIPLE_REGRESSION_BASED) { fprintf(f, "# not multiple-regression-base\n"); fprintf(f, "0\n"); } else { if (reg_model->ncoeff==0 && model->ncombinations!=0 && model->combinations!=NULL) { reg_model->ncoeff = model->ncombinations + 1; } _STARPU_MALLOC(reg_model->coeff, reg_model->ncoeff*sizeof(double)); _starpu_multiple_regression(per_arch_model->list, reg_model->coeff, reg_model->ncoeff, model->nparameters, model->parameters_names, model->combinations, model->symbol); fprintf(f, "# n\tintercept\t"); if (reg_model->ncoeff==0 || model->ncombinations==0 || model->combinations==NULL) fprintf(f, "\n1\tnan"); else { unsigned i; for (i=0; i < model->ncombinations; i++) { if (model->parameters_names == NULL) fprintf(f, "c%u", i+1); else { unsigned j; int first=1; for(j=0; j < model->nparameters; j++) { if (model->combinations[i][j] > 0) { if (first) first=0; else fprintf(f, "*"); if(model->parameters_names[j] != NULL) fprintf(f, "%s", model->parameters_names[j]); else fprintf(f, "P%u", j); if (model->combinations[i][j] > 1) fprintf(f, "^%d", model->combinations[i][j]); } } } fprintf(f, "\t\t"); } fprintf(f, "\n%u", reg_model->ncoeff); for (i=0; i < reg_model->ncoeff; i++) fprintf(f, "\t%-15e", reg_model->coeff[i]); } } } #endif static void scan_reg_model(FILE *f, const char *path, struct starpu_perfmodel_regression_model *reg_model) { int res; /* * Linear Regression model */ _starpu_drop_comments(f); res = fscanf(f, "%le\t%le\t%le\t%le\t", ®_model->sumlnx, ®_model->sumlnx2, ®_model->sumlny, ®_model->sumlnxlny); STARPU_ASSERT_MSG(res == 4, "Incorrect performance model file %s", path); res = _starpu_read_double(f, "%le", ®_model->alpha); STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); res = _starpu_read_double(f, "\t%le", ®_model->beta); STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); res = fscanf(f, "\t%u\t%lu\t%lu\n", ®_model->nsample, ®_model->minx, ®_model->maxx); STARPU_ASSERT_MSG(res == 3, "Incorrect performance model file %s", path); /* If any of the parameters describing the linear regression model is NaN, the model is invalid */ unsigned invalid = (isnan(reg_model->alpha)||isnan(reg_model->beta)); reg_model->valid = !invalid && VALID_REGRESSION(reg_model); /* * Non-Linear Regression model */ _starpu_drop_comments(f); res = _starpu_read_double(f, "%le", ®_model->a); STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); res = _starpu_read_double(f, "\t%le", ®_model->b); STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); res = _starpu_read_double(f, "%le", ®_model->c); STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); res = fscanf(f, "\n"); STARPU_ASSERT_MSG(res == 0, "Incorrect performance model file %s", path); /* If any of the parameters describing the non-linear regression model is NaN, the model is invalid */ unsigned nl_invalid = (isnan(reg_model->a)||isnan(reg_model->b)||isnan(reg_model->c)); reg_model->nl_valid = !nl_invalid && VALID_REGRESSION(reg_model); _starpu_drop_comments(f); // Read how many coefficients is there res = fscanf(f, "%u", ®_model->ncoeff); STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); /* * Multiple Regression Model */ if (reg_model->ncoeff != 0) { _STARPU_MALLOC(reg_model->coeff, reg_model->ncoeff*sizeof(double)); unsigned multi_invalid = 0; unsigned i; for (i=0; i < reg_model->ncoeff; i++) { res = _starpu_read_double(f, "%le", ®_model->coeff[i]); STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); multi_invalid = (multi_invalid||isnan(reg_model->coeff[i])); } reg_model->multi_valid = !multi_invalid; } res = fscanf(f, "\n"); STARPU_ASSERT_MSG(res == 0, "Incorrect performance model file %s", path); } #ifndef STARPU_SIMGRID static void check_history_entry(struct starpu_perfmodel_history_entry *entry) { STARPU_ASSERT_MSG(entry->deviation >= 0, "entry=%p, entry->deviation=%lf\n", entry, entry->deviation); STARPU_ASSERT_MSG(entry->sum >= 0, "entry=%p, entry->sum=%lf\n", entry, entry->sum); STARPU_ASSERT_MSG(entry->sum2 >= 0, "entry=%p, entry->sum2=%lf\n", entry, entry->sum2); STARPU_ASSERT_MSG(entry->mean >= 0, "entry=%p, entry->mean=%lf\n", entry, entry->mean); STARPU_ASSERT_MSG(isnan(entry->flops)||entry->flops >= 0, "entry=%p, entry->flops=%lf\n", entry, entry->flops); STARPU_ASSERT_MSG(entry->duration >= 0, "entry=%p, entry->duration=%lf\n", entry, entry->duration); } static void dump_history_entry(FILE *f, struct starpu_perfmodel_history_entry *entry) { fprintf(f, "%08x\t%-15lu\t%-15e\t%-15e\t%-15e\t%-15e\t%-15e\t%u\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample); } #endif static void scan_history_entry(FILE *f, const char *path, struct starpu_perfmodel_history_entry *entry) { int res; _starpu_drop_comments(f); /* In case entry is NULL, we just drop these values */ unsigned nsample; uint32_t footprint; unsigned long size; /* in bytes */ double flops; double mean; double deviation; double sum; double sum2; char line[STR_LONG_LENGTH]; char *ret; ret = fgets(line, sizeof(line), f); STARPU_ASSERT(ret); STARPU_ASSERT(strchr(line, '\n')); /* Read the values from the file */ res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &flops, &mean, &deviation, &sum, &sum2, &nsample); if (res != 8) { flops = 0.; /* Read the values from the file */ res = sscanf(line, "%x\t%lu\t%le\t%le\t%le\t%le\t%u", &footprint, &size, &mean, &deviation, &sum, &sum2, &nsample); STARPU_ASSERT_MSG(res == 7, "Incorrect performance model file %s", path); } if (entry) { STARPU_ASSERT_MSG(isnan(flops) || flops >=0, "Negative flops %lf in performance model file %s", flops, path); STARPU_ASSERT_MSG(mean >=0, "Negative mean %lf in performance model file %s", mean, path); STARPU_ASSERT_MSG(deviation >=0, "Negative deviation %lf in performance model file %s", deviation, path); STARPU_ASSERT_MSG(sum >=0, "Negative sum %lf in performance model file %s", sum, path); STARPU_ASSERT_MSG(sum2 >=0, "Negative sum2 %lf in performance model file %s", sum2, path); entry->footprint = footprint; entry->size = size; entry->flops = flops; entry->mean = mean; entry->deviation = deviation; entry->sum = sum; entry->sum2 = sum2; entry->nsample = nsample; } } static void parse_per_arch_model_file(FILE *f, const char *path, struct starpu_perfmodel_per_arch *per_arch_model, unsigned scan_history, struct starpu_perfmodel *model) { unsigned nentries; struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression; _starpu_drop_comments(f); int res = fscanf(f, "%u\n", &nentries); STARPU_ASSERT_MSG(res == 1, "Incorrect performance model file %s", path); scan_reg_model(f, path, reg_model); /* parse entries */ unsigned i; for (i = 0; i < nentries; i++) { struct starpu_perfmodel_history_entry *entry = NULL; if (scan_history) { _STARPU_CALLOC(entry, 1, sizeof(struct starpu_perfmodel_history_entry)); /* Tell helgrind that we do not care about * racing access to the sampling, we only want a * good-enough estimation */ STARPU_HG_DISABLE_CHECKING(entry->nsample); STARPU_HG_DISABLE_CHECKING(entry->mean); //entry->nerror = 0; } scan_history_entry(f, path, entry); /* insert the entry in the hashtable and the list structures */ /* TODO: Insert it at the end of the list, to avoid reversing * the order... But efficiently! We may have a lot of entries */ if (scan_history) insert_history_entry(entry, &per_arch_model->list, &per_arch_model->history); } if (model && model->type == STARPU_PERFMODEL_INVALID) { /* Tool loading a perfmodel without having the corresponding codelet */ if (reg_model->ncoeff != 0) model->type = STARPU_MULTIPLE_REGRESSION_BASED; else if (!isnan(reg_model->a) && !isnan(reg_model->b) && !isnan(reg_model->c)) model->type = STARPU_NL_REGRESSION_BASED; else if (!isnan(reg_model->alpha) && !isnan(reg_model->beta)) model->type = STARPU_REGRESSION_BASED; else if (nentries) model->type = STARPU_HISTORY_BASED; /* else unknown, leave invalid */ } } static void parse_arch(FILE *f, const char *path, struct starpu_perfmodel *model, unsigned scan_history, int comb) { struct starpu_perfmodel_per_arch dummy; unsigned nimpls, impl, i, ret; /* Parsing number of implementation */ _starpu_drop_comments(f); ret = fscanf(f, "%u\n", &nimpls); STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); if(model != NULL) { /* Parsing each implementation */ unsigned implmax = STARPU_MIN(nimpls, STARPU_MAXIMPLEMENTATIONS); model->state->nimpls[comb] = implmax; if (!model->state->per_arch[comb]) { _starpu_perfmodel_malloc_per_arch(model, comb, STARPU_MAXIMPLEMENTATIONS); } if (!model->state->per_arch_is_set[comb]) { _starpu_perfmodel_malloc_per_arch_is_set(model, comb, STARPU_MAXIMPLEMENTATIONS); } for (impl = 0; impl < implmax; impl++) { struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl]; model->state->per_arch_is_set[comb][impl] = 1; parse_per_arch_model_file(f, path, per_arch_model, scan_history, model); } } else { impl = 0; } /* if the number of implementation is greater than STARPU_MAXIMPLEMENTATIONS * we skip the last implementation */ for (i = impl; i < nimpls; i++) parse_per_arch_model_file(f, path, &dummy, 0, NULL); } static void parse_comb(FILE *f, const char *path, struct starpu_perfmodel *model, unsigned scan_history, int comb) { int ndevices = 0; _starpu_drop_comments(f); int ret = fscanf(f, "%d\n", &ndevices); STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); struct starpu_perfmodel_device devices[ndevices]; int dev; for(dev = 0; dev < ndevices; dev++) { _starpu_drop_comments(f); int type; ret = fscanf(f, "%d\n", &type); STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); int dev_id; _starpu_drop_comments(f); ret = fscanf(f, "%d\n", &dev_id); STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); int ncores; _starpu_drop_comments(f); ret = fscanf(f, "%d\n", &ncores); STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); devices[dev].type = type; devices[dev].devid = dev_id; devices[dev].ncores = ncores; } int id_comb = starpu_perfmodel_arch_comb_get(ndevices, devices); if(id_comb == -1) id_comb = starpu_perfmodel_arch_comb_add(ndevices, devices); if (id_comb >= model->state->ncombs_set) _starpu_perfmodel_realloc(model, id_comb+1); model->state->combs[comb] = id_comb; parse_arch(f, path, model, scan_history, id_comb); } static int parse_model_file(FILE *f, const char *path, struct starpu_perfmodel *model, unsigned scan_history) { int ret, version=0; /* First check that it's not empty (very common corruption result, for which there is no solution) */ fseek(f, 0, SEEK_END); long pos = ftell(f); if (pos == 0) { _STARPU_DISP("Performance model file %s is empty, ignoring it\n", path); return 1; } rewind(f); /* Parsing performance model version */ _starpu_drop_comments(f); ret = fscanf(f, "%d\n", &version); STARPU_ASSERT_MSG(version == _STARPU_PERFMODEL_VERSION, "Incorrect performance model file %s with a model version %d not being the current model version (%d)\n", path, version, _STARPU_PERFMODEL_VERSION); STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); int ncombs = 0; _starpu_drop_comments(f); ret = fscanf(f, "%d\n", &ncombs); STARPU_ASSERT_MSG(ret == 1, "Incorrect performance model file %s", path); if(ncombs > 0) { model->state->ncombs = ncombs; } if (ncombs > model->state->ncombs_set) { // The model has more combs than the original number of arch_combs, we need to reallocate _starpu_perfmodel_realloc(model, ncombs); } int comb; for(comb = 0; comb < ncombs; comb++) parse_comb(f, path, model, scan_history, comb); return 0; } #ifndef STARPU_SIMGRID static void check_per_arch_model(struct starpu_perfmodel *model, int comb, unsigned impl) { struct starpu_perfmodel_per_arch *per_arch_model; per_arch_model = &model->state->per_arch[comb][impl]; /* count the number of elements in the lists */ struct starpu_perfmodel_history_list *ptr = NULL; unsigned nentries = 0; if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) { /* Dump the list of all entries in the history */ ptr = per_arch_model->list; while(ptr) { nentries++; ptr = ptr->next; } } /* header */ char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch_combs[comb], archname, sizeof(archname), impl); STARPU_ASSERT(strlen(archname)>0); check_reg_model(model, comb, impl); /* Dump the history into the model file in case it is necessary */ if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) { ptr = per_arch_model->list; while (ptr) { check_history_entry(ptr->entry); ptr = ptr->next; } } } static void dump_per_arch_model_file(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl) { struct starpu_perfmodel_per_arch *per_arch_model; per_arch_model = &model->state->per_arch[comb][impl]; /* count the number of elements in the lists */ struct starpu_perfmodel_history_list *ptr = NULL; unsigned nentries = 0; if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) { /* Dump the list of all entries in the history */ ptr = per_arch_model->list; while(ptr) { nentries++; ptr = ptr->next; } } /* header */ char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch_combs[comb], archname, sizeof(archname), impl); fprintf(f, "#####\n"); fprintf(f, "# Model for %s\n", archname); fprintf(f, "# number of entries\n%u\n", nentries); dump_reg_model(f, model, comb, impl); /* Dump the history into the model file in case it is necessary */ if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) { fprintf(f, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tdev (us or J)\tsum\t\tsum2\t\tn\n"); ptr = per_arch_model->list; while (ptr) { dump_history_entry(f, ptr->entry); ptr = ptr->next; } } fprintf(f, "\n"); } static void check_model(struct starpu_perfmodel *model) { int ncombs = model->state->ncombs; STARPU_ASSERT(ncombs >= 0); int i, impl, dev; for(i = 0; i < ncombs; i++) { int comb = model->state->combs[i]; STARPU_ASSERT(comb >= 0); int ndevices = arch_combs[comb]->ndevices; STARPU_ASSERT(ndevices >= 1); for(dev = 0; dev < ndevices; dev++) { STARPU_ASSERT(arch_combs[comb]->devices[dev].type < STARPU_NARCH); STARPU_ASSERT(arch_combs[comb]->devices[dev].devid >= 0); STARPU_ASSERT(arch_combs[comb]->devices[dev].ncores >= 0); } int nimpls = model->state->nimpls[comb]; STARPU_ASSERT(nimpls >= 1); for (impl = 0; impl < nimpls; impl++) { check_per_arch_model(model, comb, impl); } } } /* Driver porters: adding your driver here is optional, only needed for performance models. */ static void dump_model_file(FILE *f, struct starpu_perfmodel *model) { fprintf(f, "##################\n"); fprintf(f, "# Performance Model Version\n"); fprintf(f, "%d\n\n", _STARPU_PERFMODEL_VERSION); int ncombs = model->state->ncombs; fprintf(f, "####################\n"); fprintf(f, "# COMBs\n"); fprintf(f, "# number of combinations\n"); fprintf(f, "%d\n", ncombs); int i, impl, dev; for(i = 0; i < ncombs; i++) { int comb = model->state->combs[i]; int ndevices = arch_combs[comb]->ndevices; fprintf(f, "####################\n"); fprintf(f, "# COMB_%d\n", comb); fprintf(f, "# number of types devices\n"); fprintf(f, "%d\n", ndevices); for(dev = 0; dev < ndevices; dev++) { fprintf(f, "####################\n"); fprintf(f, "# DEV_%d\n", dev); fprintf(f, "# device type (CPU - %d, CUDA - %d, OPENCL - %d, MPI_MS - %d, TCPIP_MS - %d)\n", STARPU_CPU_WORKER, STARPU_CUDA_WORKER, STARPU_OPENCL_WORKER, STARPU_MPI_MS_WORKER, STARPU_TCPIP_MS_WORKER); fprintf(f, "%u\n", arch_combs[comb]->devices[dev].type); fprintf(f, "####################\n"); fprintf(f, "# DEV_%d\n", dev); fprintf(f, "# device id \n"); fprintf(f, "%u\n", arch_combs[comb]->devices[dev].devid); fprintf(f, "####################\n"); fprintf(f, "# DEV_%d\n", dev); fprintf(f, "# number of cores \n"); fprintf(f, "%u\n", arch_combs[comb]->devices[dev].ncores); } int nimpls = model->state->nimpls[comb]; fprintf(f, "##########\n"); fprintf(f, "# number of implementations\n"); fprintf(f, "%d\n", nimpls); for (impl = 0; impl < nimpls; impl++) { dump_per_arch_model_file(f, model, comb, impl); } } } #endif static void dump_history_entry_xml(FILE *f, struct starpu_perfmodel_history_entry *entry) { fprintf(f, " \n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->sum, entry->sum2, entry->nsample); } static void dump_reg_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, int impl) { struct starpu_perfmodel_per_arch *per_arch_model; per_arch_model = &model->state->per_arch[comb][impl]; struct starpu_perfmodel_regression_model *reg_model = &per_arch_model->regression; /* * Linear Regression model */ if (model->type == STARPU_REGRESSION_BASED) { fprintf(f, " \n"); fprintf(f, " sumlnx, reg_model->sumlnx2, reg_model->sumlny, reg_model->sumlnxlny); fprintf(f, " alpha=\""); _starpu_write_double(f, "%e", reg_model->alpha); fprintf(f, "\" beta=\""); _starpu_write_double(f, "%e", reg_model->beta); fprintf(f, "\" nsample=\"%u\" minx=\"%lu\" maxx=\"%lu\"/>\n", reg_model->nsample, reg_model->minx, reg_model->maxx); } /* * Non-Linear Regression model */ else if (model->type == STARPU_NL_REGRESSION_BASED) { fprintf(f, " \n"); fprintf(f, " a); fprintf(f, "\" b=\""); _starpu_write_double(f, "%e", reg_model->b); fprintf(f, "\" c=\""); _starpu_write_double(f, "%e", reg_model->c); fprintf(f, "\"/>\n"); } else if (model->type == STARPU_MULTIPLE_REGRESSION_BASED) { if (reg_model->ncoeff==0 || model->ncombinations==0 || model->combinations==NULL) fprintf(f, " \n"); else { unsigned i; fprintf(f, " \n", reg_model->coeff[0]); for (i=0; i < model->ncombinations; i++) { fprintf(f, " parameters_names == NULL) fprintf(f, "c%u", i+1); else { unsigned j; int first=1; for(j=0; j < model->nparameters; j++) { if (model->combinations[i][j] > 0) { if (first) first=0; else fprintf(f, "*"); if(model->parameters_names[j] != NULL) fprintf(f, "%s", model->parameters_names[j]); else fprintf(f, "P%u", j); if (model->combinations[i][j] > 1) fprintf(f, "^%d", model->combinations[i][j]); } } } fprintf(f, "\" coef=\"%e\"/>\n", reg_model->coeff[i+1]); } fprintf(f, " \n"); } } } static void dump_per_arch_model_xml(FILE *f, struct starpu_perfmodel *model, int comb, unsigned impl) { struct starpu_perfmodel_per_arch *per_arch_model; per_arch_model = &model->state->per_arch[comb][impl]; /* count the number of elements in the lists */ struct starpu_perfmodel_history_list *ptr; dump_reg_model_xml(f, model, comb, impl); /* Dump the history into the model file in case it is necessary */ ptr = per_arch_model->list; while (ptr) { dump_history_entry_xml(f, ptr->entry); ptr = ptr->next; } } void starpu_perfmodel_dump_xml(FILE *f, struct starpu_perfmodel *model) { _starpu_init_and_load_perfmodel(model); fprintf(f, "\n"); fprintf(f, "\n"); fprintf(f, "\n", model->symbol); fprintf(f, "\n"); fprintf(f, "\n", _STARPU_PERFMODEL_VERSION); STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); int ncombs = model->state->ncombs; int i, impl, dev; for(i = 0; i < ncombs; i++) { int comb = model->state->combs[i]; int ndevices = arch_combs[comb]->ndevices; fprintf(f, " \n"); for(dev = 0; dev < ndevices; dev++) { enum starpu_worker_archtype archtype = arch_combs[comb]->devices[dev].type; const char *type = starpu_driver_info[archtype].name_upper; STARPU_ASSERT(type); fprintf(f, " devices[dev].devid); if (arch_combs[comb]->devices[dev].type == STARPU_CPU_WORKER) fprintf(f, " ncores=\"%d\"", arch_combs[comb]->devices[dev].ncores); fprintf(f, "/>\n"); } int nimpls = model->state->nimpls[comb]; for (impl = 0; impl < nimpls; impl++) { fprintf(f, " \n", impl); char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch_combs[comb], archname, sizeof(archname), impl); fprintf(f, " \n", archname); dump_per_arch_model_xml(f, model, comb, impl); fprintf(f, " \n"); } fprintf(f, " \n"); } STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); fprintf(f, "\n"); } void _starpu_perfmodel_realloc(struct starpu_perfmodel *model, int nb) { int i; STARPU_ASSERT(nb > model->state->ncombs_set); #ifdef SSIZE_MAX STARPU_ASSERT((size_t) nb < SSIZE_MAX / sizeof(struct starpu_perfmodel_per_arch*)); #endif _STARPU_REALLOC(model->state->per_arch, nb*sizeof(struct starpu_perfmodel_per_arch*)); _STARPU_REALLOC(model->state->per_arch_is_set, nb*sizeof(int*)); _STARPU_REALLOC(model->state->nimpls, nb*sizeof(int)); _STARPU_REALLOC(model->state->nimpls_set, nb*sizeof(int)); _STARPU_REALLOC(model->state->combs, nb*sizeof(int)); for(i = model->state->ncombs_set; i < nb; i++) { model->state->per_arch[i] = NULL; model->state->per_arch_is_set[i] = NULL; model->state->nimpls[i] = 0; model->state->nimpls_set[i] = 0; } model->state->ncombs_set = nb; } void starpu_perfmodel_init(struct starpu_perfmodel *model) { int already_init; int ncombs; STARPU_ASSERT(model); STARPU_PTHREAD_RWLOCK_RDLOCK(®istered_models_rwlock); already_init = model->is_init; STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); if (already_init) return; /* The model is still not loaded so we grab the lock in write mode, and * if it's not loaded once we have the lock, we do load it. */ STARPU_PTHREAD_RWLOCK_WRLOCK(®istered_models_rwlock); /* Was the model initialized since the previous test ? */ if (model->is_init) { STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); return; } model->path = NULL; _STARPU_MALLOC(model->state, sizeof(struct _starpu_perfmodel_state)); STARPU_PTHREAD_RWLOCK_INIT(&model->state->model_rwlock, NULL); STARPU_PTHREAD_RWLOCK_RDLOCK(&arch_combs_mutex); model->state->ncombs_set = ncombs = nb_arch_combs; STARPU_PTHREAD_RWLOCK_UNLOCK(&arch_combs_mutex); _STARPU_CALLOC(model->state->per_arch, ncombs, sizeof(struct starpu_perfmodel_per_arch*)); _STARPU_CALLOC(model->state->per_arch_is_set, ncombs, sizeof(int*)); _STARPU_CALLOC(model->state->nimpls, ncombs, sizeof(int)); _STARPU_CALLOC(model->state->nimpls_set, ncombs, sizeof(int)); _STARPU_MALLOC(model->state->combs, ncombs*sizeof(int)); model->state->ncombs = 0; /* add the model to a linked list */ struct _starpu_perfmodel *node = _starpu_perfmodel_new(); node->model = model; //model->debug_modelid = debug_modelid++; /* put this model at the beginning of the list */ _starpu_perfmodel_list_push_front(®istered_models, node); model->is_init = 1; STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); } static void get_model_debug_path(struct starpu_perfmodel *model, const char *arch, char *path, size_t maxlen) { STARPU_ASSERT(path); _starpu_find_perf_model_codelet_debug(model->symbol, _starpu_perfmodel_hostname, arch, path, maxlen); } void starpu_perfmodel_get_model_path(const char *symbol, char *path, size_t maxlen) { _starpu_find_perf_model_codelet(symbol, _starpu_perfmodel_hostname, path, maxlen); } void starpu_perfmodel_get_model_path_default_location(const char *symbol, char *path, size_t maxlen) { _starpu_set_default_perf_model_codelet(symbol, _starpu_perfmodel_hostname, path, maxlen); } #ifndef STARPU_SIMGRID void starpu_save_history_based_model(struct starpu_perfmodel *model) { STARPU_ASSERT(model); STARPU_ASSERT(model->symbol); int locked; /* TODO checks */ /* filename = $STARPU_PERF_MODEL_DIR/codelets/symbol.hostname */ char path[STR_LONG_LENGTH]; starpu_perfmodel_get_model_path(model->symbol, path, sizeof(path)); if (path[0] == '\0') starpu_perfmodel_get_model_path_default_location(model->symbol, path, sizeof(path)); free(model->path); model->path = strdup(path); _STARPU_DEBUG("Opening performance model file <%s> for model <%s>\n", path, model->symbol); /* overwrite existing file, or create it */ FILE *f; f = fopen(path, "a+"); STARPU_ASSERT_MSG(f, "Could not save performance model %s\n", path); locked = _starpu_fwrlock(f) == 0; check_model(model); fseek(f, 0, SEEK_SET); _starpu_fftruncate(f, 0); dump_model_file(f, model); if (locked) _starpu_fwrunlock(f); fclose(f); } #endif static void _starpu_dump_registered_models(void) { #ifndef STARPU_SIMGRID STARPU_PTHREAD_RWLOCK_WRLOCK(®istered_models_rwlock); struct _starpu_perfmodel *node; _STARPU_DEBUG("DUMP MODELS !\n"); for (node = _starpu_perfmodel_list_begin(®istered_models); node != _starpu_perfmodel_list_end(®istered_models); node = _starpu_perfmodel_list_next(node)) { if (node->model->is_init && (node->model->type != STARPU_PER_WORKER && node->model->type != STARPU_PER_ARCH && node->model->type != STARPU_COMMON)) starpu_save_history_based_model(node->model); } STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); #endif } void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model) { if(model->is_init && model->state && model->state->per_arch != NULL) { int i; for(i=0 ; istate->ncombs_set ; i++) { if (model->state->per_arch[i]) { int impl; for(impl=0 ; implstate->nimpls_set[i] ; impl++) { struct starpu_perfmodel_per_arch *archmodel = &model->state->per_arch[i][impl]; if (archmodel->history) { struct starpu_perfmodel_history_list *list; struct starpu_perfmodel_history_table *entry=NULL, *tmp=NULL; HASH_ITER(hh, archmodel->history, entry, tmp) { HASH_DEL(archmodel->history, entry); free(entry); } archmodel->history = NULL; list = archmodel->list; while (list) { struct starpu_perfmodel_history_list *plist; free(list->entry); plist = list; list = list->next; free(plist); } archmodel->list = NULL; } } free(model->state->per_arch[i]); model->state->per_arch[i] = NULL; free(model->state->per_arch_is_set[i]); model->state->per_arch_is_set[i] = NULL; } } free(model->state->per_arch); model->state->per_arch = NULL; free(model->state->per_arch_is_set); model->state->per_arch_is_set = NULL; free(model->state->nimpls); model->state->nimpls = NULL; free(model->state->nimpls_set); model->state->nimpls_set = NULL; free(model->state->combs); model->state->combs = NULL; model->state->ncombs = 0; } model->is_init = 0; model->is_loaded = 0; } void _starpu_deinitialize_registered_performance_models(void) { if (_starpu_get_calibrate_flag()) _starpu_dump_registered_models(); STARPU_PTHREAD_RWLOCK_WRLOCK(®istered_models_rwlock); struct _starpu_perfmodel *node, *nnode; _STARPU_DEBUG("FREE MODELS !\n"); for (node = _starpu_perfmodel_list_begin(®istered_models); node != _starpu_perfmodel_list_end(®istered_models); node = nnode) { struct starpu_perfmodel *model = node->model; nnode = _starpu_perfmodel_list_next(node); STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock); _starpu_deinitialize_performance_model(model); STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); free(node->model->path); node->model->path = NULL; free(node->model->state); node->model->state = NULL; _starpu_perfmodel_list_erase(®istered_models, node); _starpu_perfmodel_delete(node); } STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); STARPU_PTHREAD_RWLOCK_DESTROY(®istered_models_rwlock); starpu_perfmodel_free_sampling(); } /* We first try to grab the global lock in read mode to check whether the model * was loaded or not (this is very likely to have been already loaded). If the * model was not loaded yet, we take the lock in write mode, and if the model * is still not loaded once we have the lock, we do load it. */ void _starpu_load_history_based_model(struct starpu_perfmodel *model, unsigned scan_history) { STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock); if(!model->is_loaded) { char path[STR_LONG_LENGTH]; // Check if a symbol is defined before trying to load the model from a file STARPU_ASSERT_MSG(model->symbol, "history-based performance models must have a symbol"); starpu_perfmodel_get_model_path(model->symbol, path, sizeof(path)); unsigned calibrate_flag = _starpu_get_calibrate_flag(); model->benchmarking = calibrate_flag; model->is_loaded = 1; if (path[0] == '\0') { _STARPU_DEBUG("No performance model file for model %s ...\n", model->symbol); STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); return; } free(model->path); model->path = strdup(path); _STARPU_DEBUG("Opening performance model file %s for model %s ...\n", path, model->symbol); if (calibrate_flag == 2) { /* The user specified that the performance model should * be overwritten, so we don't load the existing file ! * */ _STARPU_DEBUG("Overwrite existing file\n"); } else { /* We try to load the file */ FILE *f; f = fopen(path, "r"); if (f) { int locked; locked = _starpu_frdlock(f) == 0; parse_model_file(f, path, model, scan_history); if (locked) _starpu_frdunlock(f); fclose(f); _STARPU_DEBUG("Performance model file %s for model %s is loaded\n", path, model->symbol); } else { _STARPU_DEBUG("Performance model file %s does not exist or is not readable: %s\n", path, strerror(errno)); } } } STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); } /* This function is intended to be used by external tools that should read the * performance model files */ /* TODO: write an clear function, to free symbol and history */ int starpu_perfmodel_load_symbol(const char *symbol, struct starpu_perfmodel *model) { char path[STR_LONG_LENGTH]; model->symbol = strdup(symbol); starpu_perfmodel_get_model_path(model->symbol, path, sizeof(path)); _STARPU_DEBUG("get_model_path -> %s\n", path); if (path[0] != '\0') { return starpu_perfmodel_load_file(path, model); } else { const char *dot = strrchr(symbol, '.'); if (dot) { char *symbol2 = strdup(symbol); symbol2[dot-symbol] = '\0'; int ret; _STARPU_DISP("note: loading history from %s instead of %s\n", symbol2, symbol); ret = starpu_perfmodel_load_symbol(symbol2, model); free(symbol2); return ret; } else { _STARPU_DISP("There is no performance model for symbol %s\n", symbol); return 1; } } } int starpu_perfmodel_load_file(const char *filename, struct starpu_perfmodel *model) { int res, ret = 0; FILE *f = fopen(filename, "r"); int locked; STARPU_ASSERT(f); starpu_perfmodel_init(model); model->path = strdup(filename); locked = _starpu_frdlock(f) == 0; ret = parse_model_file(f, filename, model, 1); if (locked) _starpu_frdunlock(f); res = fclose(f); STARPU_ASSERT(res == 0); if (ret) starpu_perfmodel_unload_model(model); else model->is_loaded = 1; return ret; } int starpu_perfmodel_unload_model(struct starpu_perfmodel *model) { if (model->symbol) { free((char *)model->symbol); model->symbol = NULL; } starpu_perfmodel_deinit(model); return 0; } int starpu_perfmodel_deinit(struct starpu_perfmodel *model) { _starpu_deinitialize_performance_model(model); free(model->path); free(model->state); model->state = NULL; STARPU_PTHREAD_RWLOCK_WRLOCK(®istered_models_rwlock); struct _starpu_perfmodel *node; for (node = _starpu_perfmodel_list_begin(®istered_models); node != _starpu_perfmodel_list_end(®istered_models); node = _starpu_perfmodel_list_next(node)) { if (node->model == model) { _starpu_perfmodel_list_erase(®istered_models, node); _starpu_perfmodel_delete(node); break; } } STARPU_PTHREAD_RWLOCK_UNLOCK(®istered_models_rwlock); return 0; } const char* starpu_perfmodel_get_archtype_name(enum starpu_worker_archtype archtype) { const char *name = starpu_driver_info[archtype].name_lower; STARPU_ASSERT(name); return name; } void starpu_perfmodel_get_arch_name(struct starpu_perfmodel_arch* arch, char *archname, size_t maxlen,unsigned impl) { int i; int comb = _starpu_perfmodel_create_comb_if_needed(arch); STARPU_ASSERT(comb != -1); char devices[STR_VERY_LONG_LENGTH]; int written = 0; devices[0] = '\0'; for(i=0 ; indevices ; i++) { written += snprintf(devices + written, sizeof(devices)-written, "%s%d%s", starpu_perfmodel_get_archtype_name(arch->devices[i].type), arch->devices[i].devid, i != arch->ndevices-1 ? "_":""); } snprintf(archname, maxlen, "%s_impl%u (Comb%d)", devices, impl, comb); } void starpu_perfmodel_debugfilepath(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, char *path, size_t maxlen, unsigned nimpl) { int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); STARPU_ASSERT(comb != -1); char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); STARPU_ASSERT(path); get_model_debug_path(model, archname, path, maxlen); } double _starpu_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl) { int comb; double exp = NAN; size_t size = 0; struct starpu_perfmodel_regression_model *regmodel = NULL; comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); if (comb == -1) goto docal; STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); size = __starpu_job_get_data_size(model, arch, nimpl, j); if (comb >= model->state->ncombs_set || model->state->per_arch[comb] == NULL) { // The model has not been executed on this combination STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); goto docal; } regmodel = &model->state->per_arch[comb][nimpl].regression; if (regmodel->valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1) exp = regmodel->alpha*pow((double)size, regmodel->beta); STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); docal: STARPU_HG_DISABLE_CHECKING(model->benchmarking); if (isnan(exp) && !model->benchmarking) { char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); _STARPU_DISP("Warning: model %s is not calibrated enough for %s size %lu (only %u measurements from size %lu to %lu), forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. You probably need to run again to continue calibrating the model, until this warning disappears.\n", model->symbol, archname, (unsigned long) size, regmodel?regmodel->nsample:0, regmodel?regmodel->minx:0, regmodel?regmodel->maxx:0); _starpu_set_calibrate_flag(1); model->benchmarking = 1; } return exp; } double _starpu_non_linear_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl) { int comb; double exp = NAN; size_t size = 0; struct starpu_perfmodel_regression_model *regmodel; struct starpu_perfmodel_history_table *entry = NULL; comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); if (comb == -1) goto docal; STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); size = __starpu_job_get_data_size(model, arch, nimpl, j); if (comb >= model->state->ncombs_set || model->state->per_arch[comb] == NULL) { // The model has not been executed on this combination STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); goto docal; } regmodel = &model->state->per_arch[comb][nimpl].regression; if (regmodel->nl_valid && size >= regmodel->minx * 0.9 && size <= regmodel->maxx * 1.1) { exp = regmodel->a*pow((double)size, regmodel->b) + regmodel->c; STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); } else { uint32_t key = _starpu_compute_buffers_footprint(model, arch, nimpl, j); struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][nimpl]; struct starpu_perfmodel_history_table *history; history = per_arch_model->history; HASH_FIND_UINT32_T(history, &key, entry); STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); /* Here helgrind would shout that this is unprotected access. * We do not care about racing access to the mean, we only want * a good-enough estimation */ if (entry && entry->history_entry && entry->history_entry->nsample >= _starpu_calibration_minimum) exp = entry->history_entry->mean; docal: STARPU_HG_DISABLE_CHECKING(model->benchmarking); if (isnan(exp) && !model->benchmarking) { char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); _STARPU_DISP("Warning: model %s is not calibrated enough for %s size %lu (only %u measurements), forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. You probably need to run again to continue calibrating the model, until this warning disappears.\n", model->symbol, archname, (unsigned long) size, entry && entry->history_entry ? entry->history_entry->nsample : 0); _starpu_set_calibrate_flag(1); model->benchmarking = 1; } } return exp; } double _starpu_multiple_regression_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j, unsigned nimpl) { int comb; double expected_duration=NAN; struct starpu_perfmodel_regression_model *reg_model = NULL; comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); if(comb == -1) goto docal; STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); if (comb >= model->state->ncombs_set || model->state->per_arch[comb] == NULL) { // The model has not been executed on this combination STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); goto docal; } reg_model = &model->state->per_arch[comb][nimpl].regression; STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); if (reg_model->coeff == NULL) goto docal; double *parameters; _STARPU_MALLOC(parameters, model->nparameters*sizeof(double)); model->parameters(j->task, parameters); expected_duration=reg_model->coeff[0]; unsigned i; for (i=0; i < model->ncombinations; i++) { double parameter_value=1.; unsigned k; for (k=0; k < model->nparameters; k++) parameter_value *= pow(parameters[k],model->combinations[i][k]); expected_duration += reg_model->coeff[i+1]*parameter_value; } docal: STARPU_HG_DISABLE_CHECKING(model->benchmarking); if (isnan(expected_duration) && !model->benchmarking) { char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); _STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. You probably need to run again to continue calibrating the model, until this warning disappears.\n", model->symbol, archname); _starpu_set_calibrate_flag(1); model->benchmarking = 1; } // In the unlikely event that predicted duration is negative // in case multiple linear regression is not so accurate if (expected_duration < 0) expected_duration = 0.00001; //Make sure that the injected time is in milliseconds return expected_duration; } double __starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl,size_t offset) { int comb; double exp = NAN; struct starpu_perfmodel_per_arch *per_arch_model; struct starpu_perfmodel_history_entry *entry = NULL; struct starpu_perfmodel_history_table *history, *elt; uint32_t key; double *data; comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); key = _starpu_compute_buffers_footprint(model, arch, nimpl, j); if(comb == -1) goto docal; STARPU_PTHREAD_RWLOCK_RDLOCK(&model->state->model_rwlock); if (comb >= model->state->ncombs_set || model->state->per_arch[comb] == NULL) { // The model has not been executed on this combination STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); goto docal; } per_arch_model = &model->state->per_arch[comb][nimpl]; history = per_arch_model->history; HASH_FIND_UINT32_T(history, &key, elt); entry = (elt == NULL) ? NULL : elt->history_entry; if (entry) data = (double*) ((char*) entry + offset); STARPU_ASSERT_MSG(!entry || *data >= 0, "entry=%p, entry data=%lf\n", entry, entry?*data:NAN); STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); /* Here helgrind would shout that this is unprotected access. * We do not care about racing access to the mean/deviation, we only want * a good-enough estimation */ if (entry && entry->nsample) { #ifdef STARPU_SIMGRID if (entry->nsample < _starpu_calibration_minimum) { char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); _STARPU_DISP("Warning: model %s is not calibrated enough for %s size %ld footprint %x (only %u measurements). Using it anyway for the simulation\n", model->symbol, archname, j->task?(long int)_starpu_job_get_data_size(model, arch, nimpl, j):-1, key, entry->nsample); } #else if (entry->nsample >= _starpu_calibration_minimum) #endif { STARPU_ASSERT_MSG(*data >= 0, "entry data=%lf\n", *data); /* TODO: report differently if we've scheduled really enough * of that task and the scheduler should perhaps put it aside */ /* Calibrated enough */ exp = *data; } } docal: #ifdef STARPU_SIMGRID if (isnan(exp)) { char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); _STARPU_DISP("Warning: model %s is not calibrated at all for %s size %ld footprint %x. Assuming it can not work there\n", model->symbol, archname, j->task?(long int)_starpu_job_get_data_size(model, arch, nimpl, j):-1, key); exp = 0.; } #else STARPU_HG_DISABLE_CHECKING(model->benchmarking); if (isnan(exp) && !model->benchmarking) { char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl); _STARPU_DISP("Warning: model %s is not calibrated enough for %s size %ld footprint %x (only %u measurements), forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this. You probably need to run again to continue calibrating the model, until this warning disappears.\n", model->symbol, archname, j->task?(long int)_starpu_job_get_data_size(model, arch, nimpl, j):-1, key, entry ? entry->nsample : 0); _starpu_set_calibrate_flag(1); model->benchmarking = 1; } #endif STARPU_ASSERT_MSG(isnan(exp)||exp >= 0, "exp=%lf\n", exp); return exp; } double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl) { return __starpu_history_based_job_expected_perf(model, arch, j, nimpl, offsetof(struct starpu_perfmodel_history_entry, mean)); } double _starpu_history_based_job_expected_deviation(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, struct _starpu_job *j,unsigned nimpl) { return __starpu_history_based_job_expected_perf(model, arch, j, nimpl, offsetof(struct starpu_perfmodel_history_entry, deviation)); } double starpu_perfmodel_history_based_expected_perf(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, uint32_t footprint) { struct _starpu_job j = { .footprint = footprint, .footprint_is_computed = 1, }; return _starpu_history_based_job_expected_perf(model, arch, &j, j.nimpl); } int _starpu_perfmodel_create_comb_if_needed(struct starpu_perfmodel_arch* arch) { int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); if(comb == -1) comb = starpu_perfmodel_arch_comb_add(arch->ndevices, arch->devices); return comb; } void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned cpuid STARPU_ATTRIBUTE_UNUSED, double measured, unsigned impl, unsigned number) { STARPU_ASSERT_MSG(measured >= 0, "measured=%lf\n", measured); if (model) { int c; unsigned found = 0; int comb = _starpu_perfmodel_create_comb_if_needed(arch); STARPU_PTHREAD_RWLOCK_WRLOCK(&model->state->model_rwlock); for(c = 0; c < model->state->ncombs; c++) { if(model->state->combs[c] == comb) { found = 1; break; } } if(!found) { if (model->state->ncombs + 1 >= model->state->ncombs_set) { // The number of combinations is bigger than the one which was initially allocated, we need to reallocate, // do not only reallocate 1 extra comb, rather reallocate 5 to avoid too frequent calls to _starpu_perfmodel_realloc _starpu_perfmodel_realloc(model, model->state->ncombs_set+5); } model->state->combs[model->state->ncombs++] = comb; } if(!model->state->per_arch[comb]) { _starpu_perfmodel_malloc_per_arch(model, comb, STARPU_MAXIMPLEMENTATIONS); _starpu_perfmodel_malloc_per_arch_is_set(model, comb, STARPU_MAXIMPLEMENTATIONS); model->state->nimpls[comb] = 0; } struct starpu_perfmodel_per_arch *per_arch_model = &model->state->per_arch[comb][impl]; if (model->state->per_arch_is_set[comb][impl] == 0) { // We are adding a new implementation for the given comb and the given impl model->state->nimpls[comb]++; model->state->per_arch_is_set[comb][impl] = 1; } if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED || model->type == STARPU_REGRESSION_BASED) { struct starpu_perfmodel_history_entry *entry; struct starpu_perfmodel_history_table *elt; struct starpu_perfmodel_history_list **list; uint32_t key = _starpu_compute_buffers_footprint(model, arch, impl, j); list = &per_arch_model->list; HASH_FIND_UINT32_T(per_arch_model->history, &key, elt); entry = (elt == NULL) ? NULL : elt->history_entry; if (!entry) { /* this is the first entry with such a footprint */ _STARPU_CALLOC(entry, 1, sizeof(struct starpu_perfmodel_history_entry)); /* Tell helgrind that we do not care about * racing access to the sampling, we only want a * good-enough estimation */ STARPU_HG_DISABLE_CHECKING(entry->nsample); STARPU_HG_DISABLE_CHECKING(entry->mean); /* For history-based, do not take the first measurement into account, it is very often quite bogus */ /* TODO: it'd be good to use a better estimation heuristic, like the median, or latest n values, etc. */ if (number != 1 || model->type != STARPU_HISTORY_BASED) { entry->sum = measured * number; entry->sum2 = measured*measured * number; entry->nsample = number; entry->mean = measured; } entry->size = __starpu_job_get_data_size(model, arch, impl, j); entry->flops = j->task->flops; entry->footprint = key; insert_history_entry(entry, list, &per_arch_model->history); } else { /* There is already an entry with the same footprint */ double local_deviation = measured/entry->mean; if (entry->nsample && (100 * local_deviation > (100 + historymaxerror) || (100 / local_deviation > (100 + historymaxerror)))) { entry->nerror+=number; /* More errors than measurements, we're most probably completely wrong, we flush out all the entries */ if (entry->nerror >= entry->nsample) { char archname[STR_SHORT_LENGTH]; starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), impl); _STARPU_DISP("Too big deviation for model %s on %s: %fus vs average %fus, %u such errors against %u samples (%+f%%), flushing the performance model. Use the STARPU_HISTORY_MAX_ERROR environment variable to control the threshold (currently %d%%)\n", model->symbol, archname, measured, entry->mean, entry->nerror, entry->nsample, measured * 100. / entry->mean - 100, historymaxerror); entry->sum = 0.0; entry->sum2 = 0.0; entry->nsample = 0; entry->nerror = 0; entry->mean = 0.0; entry->deviation = 0.0; } } else { entry->sum += measured * number; entry->sum2 += measured*measured * number; entry->nsample += number; unsigned n = entry->nsample; entry->mean = entry->sum / n; entry->deviation = sqrt((fabs(entry->sum2 - (entry->sum*entry->sum)/n))/n); } if (j->task->flops != 0. && !isnan(entry->flops)) { if (entry->flops == 0.) entry->flops = j->task->flops; else if ((fabs(entry->flops - j->task->flops) / entry->flops) > 0.00001) { /* Incoherent flops! forget about trying to record flops */ _STARPU_DISP("Incoherent flops in model %s: %f vs previous %f, stopping recording flops\n", model->symbol, j->task->flops, entry->flops); entry->flops = NAN; } } } STARPU_ASSERT(entry); } if (model->type == STARPU_REGRESSION_BASED || model->type == STARPU_NL_REGRESSION_BASED) { struct starpu_perfmodel_regression_model *reg_model; reg_model = &per_arch_model->regression; /* update the regression model */ size_t job_size = __starpu_job_get_data_size(model, arch, impl, j); double logy, logx; logx = log((double)job_size); logy = log(measured); reg_model->sumlnx += logx; reg_model->sumlnx2 += logx*logx; reg_model->sumlny += logy; reg_model->sumlnxlny += logx*logy; if (reg_model->minx == 0 || job_size < reg_model->minx) reg_model->minx = job_size; if (reg_model->maxx == 0 || job_size > reg_model->maxx) reg_model->maxx = job_size; reg_model->nsample++; if (VALID_REGRESSION(reg_model)) { unsigned n = reg_model->nsample; double num = (n*reg_model->sumlnxlny - reg_model->sumlnx*reg_model->sumlny); double denom = (n*reg_model->sumlnx2 - reg_model->sumlnx*reg_model->sumlnx); reg_model->beta = num/denom; reg_model->alpha = exp((reg_model->sumlny - reg_model->beta*reg_model->sumlnx)/n); reg_model->valid = 1; } } if (model->type == STARPU_MULTIPLE_REGRESSION_BASED) { struct starpu_perfmodel_history_entry *entry; struct starpu_perfmodel_history_list **list; list = &per_arch_model->list; _STARPU_CALLOC(entry, 1, sizeof(struct starpu_perfmodel_history_entry)); _STARPU_MALLOC(entry->parameters, model->nparameters*sizeof(double)); model->parameters(j->task, entry->parameters); entry->tag = j->task->tag_id; STARPU_ASSERT(measured >= 0); entry->duration = measured; struct starpu_perfmodel_history_list *link; _STARPU_MALLOC(link, sizeof(struct starpu_perfmodel_history_list)); link->next = *list; link->entry = entry; *list = link; } #ifdef STARPU_MODEL_DEBUG struct starpu_task *task = j->task; starpu_perfmodel_debugfilepath(model, arch_combs[comb], per_arch_model->debug_path, STR_LONG_LENGTH, impl); FILE *f = fopen(per_arch_model->debug_path, "a+"); int locked; if (f == NULL) { _STARPU_DISP("Error <%s> when opening file <%s>\n", strerror(errno), per_arch_model->debug_path); STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); return; } locked = _starpu_fwrlock(f) == 0; if (!j->footprint_is_computed) (void) _starpu_compute_buffers_footprint(model, arch, impl, j); STARPU_ASSERT(j->footprint_is_computed); fprintf(f, "0x%x\t%lu\t%f\t%f\t%f\t%u\t\t", j->footprint, (unsigned long) __starpu_job_get_data_size(model, arch, impl, j), measured, task->predicted, task->predicted_transfer, cpuid); unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (i = 0; i < nbuffers; i++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); STARPU_ASSERT(handle->ops); STARPU_ASSERT(handle->ops->display); handle->ops->display(handle, f); } fprintf(f, "\n"); if (locked) _starpu_fwrunlock(f); fclose(f); #endif STARPU_PTHREAD_RWLOCK_UNLOCK(&model->state->model_rwlock); } } void starpu_perfmodel_update_history_n(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured, unsigned number) { struct _starpu_job *job = _starpu_get_job_associated_to_task(task); #ifdef STARPU_SIMGRID STARPU_ASSERT_MSG(0, "We are not supposed to update history when simulating execution"); #endif _starpu_init_and_load_perfmodel(model); /* Record measurement */ _starpu_update_perfmodel_history(job, model, arch, cpuid, measured, nimpl, number); /* and save perfmodel on termination */ _starpu_set_calibrate_flag(1); } void starpu_perfmodel_update_history(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch * arch, unsigned cpuid, unsigned nimpl, double measured) { starpu_perfmodel_update_history_n(model, task, arch, cpuid, nimpl, measured, 1); } int starpu_perfmodel_list_combs(FILE *output, struct starpu_perfmodel *model) { int comb; fprintf(output, "Model <%s>\n", model->symbol); for(comb = 0; comb < model->state->ncombs; comb++) { struct starpu_perfmodel_arch *arch; int device; arch = starpu_perfmodel_arch_comb_fetch(model->state->combs[comb]); fprintf(output, "\tComb %d: %d device%s\n", model->state->combs[comb], arch->ndevices, arch->ndevices>1?"s":""); for(device=0 ; devicendevices ; device++) { const char *name = starpu_perfmodel_get_archtype_name(arch->devices[device].type); fprintf(output, "\t\tDevice %d: type: %s - devid: %d - ncores: %d\n", device, name, arch->devices[device].devid, arch->devices[device].ncores); } } return 0; } struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_arch(struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned impl) { int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); if (comb == -1) return NULL; if (comb >= model->state->ncombs_set || !model->state->per_arch[comb]) return NULL; return &model->state->per_arch[comb][impl]; } static struct starpu_perfmodel_per_arch *_starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, va_list varg_list) { struct starpu_perfmodel_arch arch; va_list varg_list_copy; int i, arg_type; int is_cpu_set = 0; // We first count the number of devices arch.ndevices = 0; va_copy(varg_list_copy, varg_list); while ((arg_type = va_arg(varg_list_copy, int)) != -1) { int devid = va_arg(varg_list_copy, int); int ncores = va_arg(varg_list_copy, int); arch.ndevices ++; if (arg_type == STARPU_CPU_WORKER) { STARPU_ASSERT_MSG(is_cpu_set == 0, "STARPU_CPU_WORKER can only be specified once\n"); STARPU_ASSERT_MSG(devid==0, "STARPU_CPU_WORKER must be followed by a value 0 for the device id"); is_cpu_set = 1; } else { STARPU_ASSERT_MSG(ncores==1, "%s must be followed by a value 1 for ncores", starpu_worker_get_type_as_string(arg_type)); } } va_end(varg_list_copy); // We set the devices _STARPU_MALLOC(arch.devices, arch.ndevices * sizeof(struct starpu_perfmodel_device)); va_copy(varg_list_copy, varg_list); for(i=0 ; i= model->state->ncombs_set) _starpu_perfmodel_realloc(model, comb+1); // Get the per_arch object if (model->state->per_arch[comb] == NULL) { _starpu_perfmodel_malloc_per_arch(model, comb, STARPU_MAXIMPLEMENTATIONS); _starpu_perfmodel_malloc_per_arch_is_set(model, comb, STARPU_MAXIMPLEMENTATIONS); model->state->nimpls[comb] = 0; } model->state->per_arch_is_set[comb][impl] = 1; model->state->nimpls[comb] ++; return &model->state->per_arch[comb][impl]; } struct starpu_perfmodel_per_arch *starpu_perfmodel_get_model_per_devices(struct starpu_perfmodel *model, int impl, ...) { va_list varg_list; struct starpu_perfmodel_per_arch *per_arch; va_start(varg_list, impl); per_arch = _starpu_perfmodel_get_model_per_devices(model, impl, varg_list); va_end(varg_list); return per_arch; } int starpu_perfmodel_set_per_devices_cost_function(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func, ...) { va_list varg_list; struct starpu_perfmodel_per_arch *per_arch; va_start(varg_list, func); per_arch = _starpu_perfmodel_get_model_per_devices(model, impl, varg_list); per_arch->cost_function = func; va_end(varg_list); return 0; } int starpu_perfmodel_set_per_devices_size_base(struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func, ...) { va_list varg_list; struct starpu_perfmodel_per_arch *per_arch; va_start(varg_list, func); per_arch = _starpu_perfmodel_get_model_per_devices(model, impl, varg_list); per_arch->size_base = func; va_end(varg_list); return 0; } starpu-1.4.9+dfsg/src/core/perfmodel/perfmodel_nan.c000066400000000000000000000040531507764646700224630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include /** Some systems cannot read NAN values, yes, it is really bad ... */ #if defined(STARPU_HAVE_WINDOWS) || defined(STARPU_OPENBSD_SYS) # define _STARPU_OWN_NAN 1 #else # define _STARPU_OWN_NAN 0 #endif #if _STARPU_OWN_NAN == 1 static void _starpu_read_spaces(FILE *f) { int c = getc(f); if (isspace(c)) { while (isspace(c)) c = getc(f); ungetc(c, f); } else { ungetc(c, f); } } #endif /* _STARPU_OWN_NAN */ void _starpu_write_double(FILE *f, const char *format, double val) { #if _STARPU_OWN_NAN == 1 if (isnan(val)) { fprintf(f, "NaN"); } else { fprintf(f, format, val); } #else fprintf(f, format, val); #endif } int _starpu_read_double(FILE *f, char *format, double *val) { #if _STARPU_OWN_NAN == 1 _starpu_read_spaces(f); int x1 = getc(f); if (x1 == 'N') { int x2 = getc(f); int x3 = getc(f); if (x2 == 'a' && x3 == 'N') { #ifdef _MSC_VER unsigned long long _mynan = 0x7fffffffffffffffull; double mynan = *(double*)&_mynan; #else double mynan = NAN; #endif *val = mynan; return 1; } else { return 0; } } else { ungetc(x1, f); return fscanf(f, format, val); } #else return fscanf(f, format, val); #endif } starpu-1.4.9+dfsg/src/core/perfmodel/perfmodel_print.c000066400000000000000000000231041507764646700230410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "perfmodel.h" static void _starpu_perfmodel_print_history_based(struct starpu_perfmodel_per_arch *per_arch_model, char *parameter, uint32_t *footprint, FILE *output) { struct starpu_perfmodel_history_list *ptr; ptr = per_arch_model->list; if (!parameter && ptr) fprintf(output, "# hash\t\tsize\t\tflops\t\tmean (us or J)\tstddev (us or J)\t\tn\n"); while (ptr) { struct starpu_perfmodel_history_entry *entry = ptr->entry; if (!footprint || entry->footprint == *footprint) { if (!parameter) { /* There isn't a parameter that is explicitly requested, so we display all parameters */ fprintf(output, "%08x\t%-15lu\t%-15e\t%-15e\t%-15e\t%u\n", entry->footprint, (unsigned long) entry->size, entry->flops, entry->mean, entry->deviation, entry->nsample); } else { /* only display the parameter that was specifically requested */ if (strcmp(parameter, "mean") == 0) { fprintf(output, "%-15e\n", entry->mean); } if (strcmp(parameter, "stddev") == 0) { fprintf(output, "%-15e\n", entry->deviation); return; } } } ptr = ptr->next; } } void starpu_perfmodel_print(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output) { int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); STARPU_ASSERT(comb != -1); struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][nimpl]; if (arch_model->regression.nsample || arch_model->regression.valid || arch_model->regression.nl_valid || arch_model->list) { char archname[32]; starpu_perfmodel_get_arch_name(arch, archname, 32, nimpl); fprintf(output, "# performance model for %s\n", archname); } if (parameter == NULL) { /* no specific parameter was requested, so we display everything */ if (arch_model->regression.nsample) { fprintf(output, "\tRegression : #sample = %u\n", arch_model->regression.nsample); } /* Only display the regression model if we could actually build a model */ if (arch_model->regression.valid) { fprintf(output, "\tLinear: y = alpha size ^ beta\n"); fprintf(output, "\t\talpha = %e\n", arch_model->regression.alpha); fprintf(output, "\t\tbeta = %e\n", arch_model->regression.beta); } else { //fprintf(output, "\tLinear model is INVALID\n"); } if (arch_model->regression.nl_valid) { fprintf(output, "\tNon-Linear: y = a size ^b + c\n"); fprintf(output, "\t\ta = %e\n", arch_model->regression.a); fprintf(output, "\t\tb = %e\n", arch_model->regression.b); fprintf(output, "\t\tc = %e\n", arch_model->regression.c); } else { //fprintf(output, "\tNon-Linear model is INVALID\n"); } _starpu_perfmodel_print_history_based(arch_model, parameter, footprint, output); #if 0 char debugname[1024]; starpu_perfmodel_debugfilepath(model, arch, debugname, 1024, nimpl); _STARPU_MSG("\t debug file path : %s\n", debugname); #endif } else { /* only display the parameter that was specifically requested */ if (strcmp(parameter, "a") == 0) { fprintf(output, "%e\n", arch_model->regression.a); return; } if (strcmp(parameter, "b") == 0) { fprintf(output, "%e\n", arch_model->regression.b); return; } if (strcmp(parameter, "c") == 0) { fprintf(output, "%e\n", arch_model->regression.c); return; } if (strcmp(parameter, "alpha") == 0) { fprintf(output, "%e\n", arch_model->regression.alpha); return; } if (strcmp(parameter, "beta") == 0) { fprintf(output, "%e\n", arch_model->regression.beta); return; } if (strcmp(parameter, "path-file-debug") == 0) { char debugname[256]; starpu_perfmodel_debugfilepath(model, arch, debugname, 256, nimpl); fprintf(output, "%s\n", debugname); return; } if ((strcmp(parameter, "mean") == 0) || (strcmp(parameter, "stddev") == 0)) { _starpu_perfmodel_print_history_based(arch_model, parameter, footprint, output); return; } /* TODO display if it's valid ? */ _STARPU_ERROR("Unknown parameter requested, aborting.\n"); } } /* FIXME: Generalize to any arch */ int starpu_perfmodel_print_all(struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output) { _starpu_init_and_load_perfmodel(model); if (arch == NULL) { int comb, impl; for(comb = 0; comb < starpu_perfmodel_get_narch_combs(); comb++) { struct starpu_perfmodel_arch *arch_comb = starpu_perfmodel_arch_comb_fetch(comb); int nimpls = model->state ? model->state->nimpls[comb] : 0; for(impl = 0; impl < nimpls; impl++) starpu_perfmodel_print(model, arch_comb, impl, parameter, footprint, output); } } else { if (strcmp(arch, "cpu") == 0) { int implid; struct starpu_perfmodel_arch perf_arch; perf_arch.ndevices = 1; _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device)); perf_arch.devices[0].type = STARPU_CPU_WORKER; perf_arch.devices[0].devid = 0; perf_arch.devices[0].ncores = 1; int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices); STARPU_ASSERT(comb != -1); int nimpls = model->state->nimpls[comb]; for (implid = 0; implid < nimpls; implid++) starpu_perfmodel_print(model, &perf_arch,implid, parameter, footprint, output); /* Display all codelets on cpu */ free(perf_arch.devices); return 0; } int k; if (sscanf(arch, "cpu:%d", &k) == 1) { /* For combined CPU workers */ if ((k < 1) || (k > STARPU_MAXCPUS)) { _STARPU_ERROR("Invalid CPU size\n"); } int implid; struct starpu_perfmodel_arch perf_arch; perf_arch.ndevices = 1; _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device)); perf_arch.devices[0].type = STARPU_CPU_WORKER; perf_arch.devices[0].devid = 0; perf_arch.devices[0].ncores = k; int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices); STARPU_ASSERT(comb != -1); int nimpls = model->state->nimpls[comb]; for (implid = 0; implid < nimpls; implid++) starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output); free(perf_arch.devices); return 0; } if (strcmp(arch, "cuda") == 0) { int implid; struct starpu_perfmodel_arch perf_arch; perf_arch.ndevices = 1; _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device)); perf_arch.devices[0].type = STARPU_CUDA_WORKER; perf_arch.devices[0].ncores = 1; int comb; for(comb = 0; comb < starpu_perfmodel_get_narch_combs(); comb++) { struct starpu_perfmodel_arch *arch_comb = starpu_perfmodel_arch_comb_fetch(comb); if(arch_comb->ndevices == 1 && arch_comb->devices[0].type == STARPU_CUDA_WORKER) { perf_arch.devices[0].devid = arch_comb->devices[0].devid; int nimpls = model->state->nimpls[comb]; for (implid = 0; implid < nimpls; implid++) starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output); } } free(perf_arch.devices); return 0; } /* TODO: There must be a cleaner way ! */ int gpuid; int nmatched; nmatched = sscanf(arch, "cuda_%d", &gpuid); if (nmatched == 0) nmatched = sscanf(arch, "cuda%d", &gpuid); if (nmatched == 1) { struct starpu_perfmodel_arch perf_arch; perf_arch.ndevices = 1; _STARPU_MALLOC(perf_arch.devices, sizeof(struct starpu_perfmodel_device)); perf_arch.devices[0].type = STARPU_CUDA_WORKER; perf_arch.devices[0].devid = gpuid; perf_arch.devices[0].ncores = 1; int comb = starpu_perfmodel_arch_comb_get(perf_arch.ndevices, perf_arch.devices); STARPU_ASSERT(comb != -1); int nimpls = model->state->nimpls[comb]; int implid; for (implid = 0; implid < nimpls; implid++) starpu_perfmodel_print(model, &perf_arch, implid, parameter, footprint, output); return 0; } _STARPU_MSG("Unknown architecture requested\n"); return -1; } return 0; } int starpu_perfmodel_print_estimations(struct starpu_perfmodel *model, uint32_t footprint, FILE *output) { unsigned workerid; for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS); int comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); struct starpu_perfmodel_per_arch *arch_model; struct starpu_perfmodel_history_list *ptr = NULL; if (comb >= 0 && model->state->per_arch[comb]) { arch_model = &model->state->per_arch[comb][0]; for (ptr = arch_model->list; ptr; ptr = ptr->next) { struct starpu_perfmodel_history_entry *entry = ptr->entry; if (entry->footprint == footprint) { fprintf(output, "%s%e", workerid?" ":"", entry->mean); break; } } } if (!ptr) { /* Didn't find any entry :/ */ fprintf(output, "%sinf", workerid?" ":""); } } return 0; } starpu-1.4.9+dfsg/src/core/perfmodel/regression.c000066400000000000000000000135401507764646700220330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #define MAXREGITER 1000 #define EPS 1.0e-10 /* * For measurements close to C, we do not want to try to fit, since we are * fitting the distance to C, which won't actually really get smaller */ #define C_RADIUS 1 /* * smoothly ramp from 0 to 1 between 0 and 1 * <= 0: stay 0 * >= 1: stay 1 */ static double level(double x) { if (x <= 0.) return 0.; if (x >= 1.) return 1.; if (x < 0.5) return -2*x*x+4*x-1; return 2*x*x; } static double fixpop(unsigned pop, double c, double y) { double distance = (y-c)/c; return pop * level((distance - C_RADIUS) / C_RADIUS); } static double compute_b(double c, unsigned n, size_t *x, double *y, unsigned *pop) { double b; /* X = log (x) , Y = log (y - c) */ double sumxy = 0.0; double sumx = 0.0; double sumx2 = 0.0; double sumy = 0.0; double nn = 0; unsigned i; for (i = 0; i < n; i++) { double xi = log(x[i]); double yi = log(y[i]-c); double popi = fixpop(pop[i], c, y[i]); if (popi <= 0) continue; sumxy += xi*yi*popi; sumx += xi*popi; sumx2 += xi*xi*popi; sumy += yi*popi; nn += popi; } b = (nn * sumxy - sumx * sumy) / (nn*sumx2 - sumx*sumx); return b; } static double compute_a(double c, double b, unsigned n, size_t *x, double *y, unsigned *pop) { double a; /* X = log (x) , Y = log (y - c) */ double sumx = 0.0; double sumy = 0.0; double nn = 0; unsigned i; for (i = 0; i < n; i++) { double xi = log(x[i]); double yi = log(y[i]-c); double popi = fixpop(pop[i], c, y[i]); if (popi <= 0) continue; sumx += xi*popi; sumy += yi*popi; nn += popi; } a = (sumy - b*sumx) / nn; return a; } /* returns r */ static double test_r(double c, unsigned n, size_t *x, double *y, unsigned *pop) { double r; // printf("test c = %e\n", c); /* X = log (x) , Y = log (y - c) */ double sumxy = 0.0; double sumx = 0.0; double sumx2 = 0.0; double sumy = 0.0; double sumy2 = 0.0; double nn = 0; unsigned i; for (i = 0; i < n; i++) { double xi = log(x[i]); double yi = log(y[i]-c); double popi = fixpop(pop[i], c, y[i]); if (popi <= 0) continue; // printf("Xi = %e, Yi = %e\n", xi, yi); sumxy += xi*yi*popi; sumx += xi*popi; sumx2 += xi*xi*popi; sumy += yi*popi; sumy2 += yi*yi*popi; nn += popi; } //printf("sumxy %e\n", sumxy); //printf("sumx %e\n", sumx); //printf("sumx2 %e\n", sumx2); //printf("sumy %e\n", sumy); //printf("sumy2 %e\n", sumy2); r = (nn * sumxy - sumx * sumy) / sqrt((nn* sumx2 - sumx*sumx) * (nn*sumy2 - sumy*sumy)); return r; } static unsigned find_list_size(struct starpu_perfmodel_history_list *list_history) { unsigned cnt = 0; struct starpu_perfmodel_history_list *ptr = list_history; while (ptr) { if (ptr->entry->nsample) cnt++; ptr = ptr->next; } return cnt; } static int compar(const void *_a, const void *_b) { double a = *(double*) _a; double b = *(double*) _b; if (a < b) return -1; if (a > b) return 1; return 0; } static double get_list_fourth(double *y, unsigned n) { double sorted[n]; memcpy(sorted, y, n * sizeof(*sorted)); qsort(sorted, n, sizeof(*sorted), compar); return sorted[n/3]; } static void dump_list(size_t *x, double *y, unsigned *pop, struct starpu_perfmodel_history_list *list_history) { struct starpu_perfmodel_history_list *ptr = list_history; unsigned i = 0; while (ptr) { if (ptr->entry->nsample) { x[i] = ptr->entry->size; y[i] = ptr->entry->mean; pop[i] = ptr->entry->nsample; i++; } ptr = ptr->next; } } /* y = ax^b + c * return 0 if success, -1 otherwise * if success, a, b and c are modified * */ /* See in Cedric Augonnet's PhD thesis's Appendix B for the rationale * Scheduling Tasks over Multicore machines enhanced with Accelerators: a * Runtime System’s Perspective */ int _starpu_regression_non_linear_power(struct starpu_perfmodel_history_list *ptr, double *a, double *b, double *c) { unsigned n = find_list_size(ptr); if (!n) return -1; size_t *x; _STARPU_MALLOC(x, n*sizeof(size_t)); double *y; _STARPU_MALLOC(y, n*sizeof(double)); STARPU_ASSERT(y); unsigned *pop; _STARPU_MALLOC(pop, n*sizeof(unsigned)); STARPU_ASSERT(y); dump_list(x, y, pop, ptr); double cmin = 0.0; double cmax = get_list_fourth(y, n); unsigned iter; double err = 100000.0; /* unsigned i; for (i = 0; i < 100; i++) { double ci = cmin + (cmax-cmin)*i/100.; fprintf(stderr,"%f: %f\n", ci, 1.0 - test_r(ci, n, x, y, pop)); } */ /* Use dichotomy to find c that gives the best matching */ for (iter = 0; iter < MAXREGITER; iter++) { double c1, c2; double r1, r2; c1 = cmin + (0.33)*(cmax - cmin); c2 = cmin + (0.67)*(cmax - cmin); r1 = test_r(c1, n, x, y, pop); r2 = test_r(c2, n, x, y, pop); double err1, err2; err1 = fabs(1.0 - r1); err2 = fabs(1.0 - r2); //fprintf(stderr,"%f - %f: %f - %f: %f - %f\n", cmin, c1, err1, c2, err2, cmax); if (err1 < err2) { /* 1 is better */ cmax = c2; } else { /* 2 is better */ cmin = c1; } if (fabs(err - STARPU_MIN(err1, err2)) < EPS) break; err = STARPU_MIN(err1, err2); } *c = (cmin + cmax)/2; *b = compute_b(*c, n, x, y, pop); *a = exp(compute_a(*c, *b, n, x, y, pop)); free(x); free(y); free(pop); return 0; } starpu-1.4.9+dfsg/src/core/perfmodel/regression.h000066400000000000000000000020661507764646700220410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __REGRESSION_H__ #define __REGRESSION_H__ /** @file */ #include #include #include #include #include #pragma GCC visibility push(hidden) int _starpu_regression_non_linear_power(struct starpu_perfmodel_history_list *ptr, double *a, double *b, double *c); #pragma GCC visibility pop #endif // __REGRESSION_H__ starpu-1.4.9+dfsg/src/core/perfmodel/starpu-perfmodel.dtd000066400000000000000000000041771507764646700235030ustar00rootroot00000000000000 starpu-1.4.9+dfsg/src/core/progress_hook.c000066400000000000000000000056041507764646700205640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #define NMAXHOOKS 16 struct progression_hook { unsigned (*func)(void *arg); void *arg; unsigned active; }; /* protect the hook table */ static starpu_pthread_rwlock_t progression_hook_rwlock; static struct progression_hook hooks[NMAXHOOKS] = {{NULL, NULL, 0}}; static int active_hook_cnt = 0; /* * Statically initializing progression_hook_rwlock seems to lead to weird errors * on Darwin, so we do it dynamically. */ void _starpu_init_progression_hooks(void) { STARPU_PTHREAD_RWLOCK_INIT(&progression_hook_rwlock, NULL); STARPU_HG_DISABLE_CHECKING(active_hook_cnt); } int starpu_progression_hook_register(unsigned (*func)(void *arg), void *arg) { int hook; STARPU_PTHREAD_RWLOCK_WRLOCK(&progression_hook_rwlock); for (hook = 0; hook < NMAXHOOKS; hook++) { if (!hooks[hook].active) { /* We found an empty slot */ hooks[hook].func = func; hooks[hook].arg = arg; hooks[hook].active = 1; active_hook_cnt++; STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock); return hook; } } STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock); starpu_wake_all_blocked_workers(); /* We could not find an empty slot */ return -1; } void starpu_progression_hook_deregister(int hook_id) { STARPU_PTHREAD_RWLOCK_WRLOCK(&progression_hook_rwlock); if (hooks[hook_id].active) active_hook_cnt--; hooks[hook_id].active = 0; STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock); } unsigned _starpu_execute_registered_progression_hooks(void) { if (active_hook_cnt == 0) return 1; /* By default, it is possible to block, but if some progression hooks * requires that it's not blocking, we disable blocking. */ unsigned may_block = 1; unsigned hook; for (hook = 0; hook < NMAXHOOKS; hook++) { unsigned active; STARPU_PTHREAD_RWLOCK_RDLOCK(&progression_hook_rwlock); active = hooks[hook].active; STARPU_PTHREAD_RWLOCK_UNLOCK(&progression_hook_rwlock); unsigned may_block_hook = 1; if (active) may_block_hook = hooks[hook].func(hooks[hook].arg); /* As soon as one hook tells that the driver cannot be * blocking, we don't allow it. */ if (!may_block_hook) may_block = 0; } return may_block; } starpu-1.4.9+dfsg/src/core/progress_hook.h000066400000000000000000000017031507764646700205650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __PROGRESS_HOOK_H__ #define __PROGRESS_HOOK_H__ #pragma GCC visibility push(hidden) /** @file */ void _starpu_init_progression_hooks(void); unsigned _starpu_execute_registered_progression_hooks(void); #pragma GCC visibility pop #endif /* !__PROGRESS_HOOK_H__ */ starpu-1.4.9+dfsg/src/core/sched_ctx.c000066400000000000000000002705471507764646700176560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2017-2017 Arthur Chevalier * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include enum _starpu_ctx_change_op { ctx_change_invalid = 0, ctx_change_add = 1, ctx_change_remove = 2 }; static starpu_pthread_mutex_t sched_ctx_manag = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_mutex_t finished_submit_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static struct starpu_task stop_submission_task = STARPU_TASK_INITIALIZER; static starpu_pthread_key_t sched_ctx_key; static unsigned with_hypervisor = 0; static double hyp_start_sample[STARPU_NMAX_SCHED_CTXS]; static double hyp_start_allow_sample[STARPU_NMAX_SCHED_CTXS]; static double flops[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS]; static size_t data_size[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS]; static double hyp_actual_start_sample[STARPU_NMAX_SCHED_CTXS]; static double window_size; static int nobind; static int occupied_sms = 0; static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config); static void _starpu_sched_ctx_put_new_master(unsigned sched_ctx_id); static void _starpu_sched_ctx_block_workers_in_parallel(unsigned sched_ctx_id, unsigned all); static void _starpu_sched_ctx_unblock_workers_in_parallel(unsigned sched_ctx_id, unsigned all); static void _starpu_sched_ctx_update_parallel_workers_with(unsigned sched_ctx_id); static void _starpu_sched_ctx_update_parallel_workers_without(unsigned sched_ctx_id); static void set_priority_on_notified_workers(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority); static void set_priority_hierarchically_on_notified_workers(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority); static void fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx); static void add_notified_workers(int *workers_to_add, int nworkers_to_add, unsigned sched_ctx_id); /* reused from combined_workers.c */ static int compar_int(const void *pa, const void *pb) { int a = *((int *)pa); int b = *((int *)pb); return a - b; } /* reused from combined_workers.c */ static void sort_workerid_array(int nworkers, int workerid_array[]) { qsort(workerid_array, nworkers, sizeof(int), compar_int); } /* notify workers that a ctx change operation is about to proceed. * * workerids must be sorted by ascending id * * Once this function returns, the notified workers must not start a new * scheduling operation until they are notified that the ctx change op is * done. */ static void notify_workers_about_changing_ctx_pending(const unsigned nworkers, const int * const workerids) { STARPU_ASSERT(!_starpu_worker_sched_op_pending()); const int cur_workerid = _starpu_worker_get_id(); unsigned i; for (i=0; i workerids[i-1])); if (starpu_worker_is_combined_worker(workerids[i])) continue; if (workerids[i] == cur_workerid) continue; struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); _starpu_worker_enter_changing_ctx_op(worker); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } } /* notify workers that a ctx change operation is complete. * * workerids must be sorted by ascending id * * Once this function returns, the workers may proceed with scheduling operations again. */ static void notify_workers_about_changing_ctx_done(const unsigned nworkers, const int * const workerids) { STARPU_ASSERT(!_starpu_worker_sched_op_pending()); const int cur_workerid = _starpu_worker_get_id(); unsigned i; for (i=0; i workerids[i-1])); if (starpu_worker_is_combined_worker(workerids[i])) continue; if (workerids[i] == cur_workerid) continue; struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); _starpu_worker_leave_changing_ctx_op(worker); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } } static void _starpu_worker_gets_into_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker) { unsigned ret_sched_ctx = _starpu_sched_ctx_elt_exists(worker->sched_ctx_list, sched_ctx_id); /* the worker was planning to go away in another ctx but finally he changed his mind & he's staying */ if (!ret_sched_ctx) { /* add context to worker */ _starpu_sched_ctx_list_add(&worker->sched_ctx_list, sched_ctx_id); worker->nsched_ctxs++; } worker->removed_from_ctx[sched_ctx_id] = 0; if(worker->tmp_sched_ctx == (int) sched_ctx_id) worker->tmp_sched_ctx = -1; return; } void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker) { unsigned ret_sched_ctx = _starpu_sched_ctx_elt_exists(worker->sched_ctx_list, sched_ctx_id); /* remove context from worker */ if(ret_sched_ctx) { /* don't remove scheduling data here, there might be tasks running and when post_exec executes scheduling data is not there any more, do it when deleting context, then we really won't need it anymore */ /* struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); */ /* if(sched_ctx && sched_ctx->sched_policy && sched_ctx->sched_policy->remove_workers) */ /* { */ /* _STARPU_SCHED_BEGIN; */ /* sched_ctx->sched_policy->remove_workers(sched_ctx_id, &worker->workerid, 1); */ /* _STARPU_SCHED_END; */ /* } */ if (!_starpu_sched_ctx_list_remove(&worker->sched_ctx_list, sched_ctx_id)) worker->nsched_ctxs--; } return; } #if 0 static void _starpu_update_workers_with_ctx(int *workerids, int nworkers, int sched_ctx_id) { int i; struct _starpu_worker *worker = NULL; for(i = 0; i < nworkers; i++) { worker = _starpu_get_worker_struct(workerids[i]); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); _starpu_worker_gets_into_ctx(sched_ctx_id, worker); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } return; } #endif static void _starpu_update_notified_workers_with_ctx(int *workerids, int nworkers, int sched_ctx_id) { int i; for(i = 0; i < nworkers; i++) { struct _starpu_worker *worker; worker = _starpu_get_worker_struct(workerids[i]); _starpu_worker_gets_into_ctx(sched_ctx_id, worker); } return; } #if 0 static void _starpu_update_workers_without_ctx(int *workerids, int nworkers, int sched_ctx_id, unsigned now) { int i; struct _starpu_worker *worker = NULL; for(i = 0; i < nworkers; i++) { worker = _starpu_get_worker_struct(workerids[i]); if(now) { STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); _starpu_worker_gets_out_of_ctx(sched_ctx_id, worker); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } else { STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); worker->removed_from_ctx[sched_ctx_id] = 1; STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } } return; } #endif static void _starpu_update_notified_workers_without_ctx(int *workerids, int nworkers, int sched_ctx_id, unsigned now) { int i; for(i = 0; i < nworkers; i++) { struct _starpu_worker *worker; worker = _starpu_get_worker_struct(workerids[i]); if(now) { _starpu_worker_gets_out_of_ctx(sched_ctx_id, worker); } else { worker->removed_from_ctx[sched_ctx_id] = 1; } } return; } void starpu_sched_ctx_stop_task_submission() { _starpu_exclude_task_from_dag(&stop_submission_task); int ret = _starpu_task_submit_internally(&stop_submission_task); STARPU_ASSERT(!ret); } /* must be called with sched_mutex locked */ void starpu_sched_ctx_worker_shares_tasks_lists(int workerid, int sched_ctx_id) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); worker->shares_tasks_lists[sched_ctx_id] = 1; } static void _do_add_notified_workers(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers) { int ndevices = 0; struct starpu_perfmodel_device devices[nworkers]; int i = 0; for(i = 0; i < nworkers; i++) { int workerid = workerids[i]; if (workerid >= (int) starpu_worker_get_count()) /* Combined worker, don't care */ continue; struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); int dev1, dev2; unsigned found = 0; for(dev1 = 0; dev1 < worker->perf_arch.ndevices; dev1++) { for(dev2 = 0; dev2 < ndevices; dev2++) { if(devices[dev2].type == worker->perf_arch.devices[dev1].type && devices[dev2].devid == worker->perf_arch.devices[dev1].devid) { devices[dev2].ncores += worker->perf_arch.devices[dev1].ncores; found = 1; break; } } if(!found) { devices[ndevices].type = worker->perf_arch.devices[dev1].type; devices[ndevices].devid = worker->perf_arch.devices[dev1].devid; devices[ndevices].ncores = worker->perf_arch.devices[dev1].ncores; ndevices++; } else found = 0; } } if(ndevices > 0) { if(sched_ctx->perf_arch.devices == NULL) { _STARPU_MALLOC(sched_ctx->perf_arch.devices, ndevices*sizeof(struct starpu_perfmodel_device)); } else { int nfinal_devices = 0; int dev1, dev2; unsigned found = 0; for(dev1 = 0; dev1 < ndevices; dev1++) { for(dev2 = 0; dev2 < sched_ctx->perf_arch.ndevices; dev2++) { if(sched_ctx->perf_arch.devices[dev2].type == devices[dev1].type && sched_ctx->perf_arch.devices[dev2].devid == devices[dev1].devid) found = 1; } if(!found) { nfinal_devices++; } else found = 0; } int nsize = (sched_ctx->perf_arch.ndevices+nfinal_devices); _STARPU_REALLOC(sched_ctx->perf_arch.devices, nsize*sizeof(struct starpu_perfmodel_device)); } int dev1, dev2; unsigned found = 0; for(dev1 = 0; dev1 < ndevices; dev1++) { for(dev2 = 0; dev2 < sched_ctx->perf_arch.ndevices; dev2++) { if(sched_ctx->perf_arch.devices[dev2].type == devices[dev1].type && sched_ctx->perf_arch.devices[dev2].devid == devices[dev1].devid) { if(sched_ctx->perf_arch.devices[dev2].type == STARPU_CPU_WORKER) sched_ctx->perf_arch.devices[dev2].ncores += devices[dev1].ncores; found = 1; } } if(!found) { sched_ctx->perf_arch.devices[sched_ctx->perf_arch.ndevices].type = devices[dev1].type; sched_ctx->perf_arch.devices[sched_ctx->perf_arch.ndevices].devid = devices[dev1].devid; if (sched_ctx->stream_worker != -1) sched_ctx->perf_arch.devices[sched_ctx->perf_arch.ndevices].ncores = sched_ctx->nsms; else sched_ctx->perf_arch.devices[sched_ctx->perf_arch.ndevices].ncores = devices[dev1].ncores; sched_ctx->perf_arch.ndevices++; } else found = 0; } } _starpu_sched_ctx_update_parallel_workers_with(sched_ctx->id); } static void _starpu_add_workers_to_new_sched_ctx(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers) { struct starpu_worker_collection *workers = sched_ctx->workers; struct _starpu_machine_config *config = _starpu_get_machine_config(); if (nworkers == -1) nworkers = config->topology.nworkers; if (!nworkers) return; int _workerids[nworkers]; int i; if (workerids == NULL) { for(i = 0; i < nworkers; i++) _workerids[i] = i; workerids = _workerids; } for(i = 0; i < nworkers; i++) { int workerid = workerids[i]; { workers->add(workers, workerid); } struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); worker->tmp_sched_ctx = (int)sched_ctx->id; STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } sort_workerid_array(nworkers, workerids); notify_workers_about_changing_ctx_pending(nworkers, workerids); _do_add_notified_workers(sched_ctx, workerids, nworkers); if(sched_ctx->sched_policy && sched_ctx->sched_policy->add_workers) { _STARPU_SCHED_BEGIN; sched_ctx->sched_policy->add_workers(sched_ctx->id, workerids, nworkers); _STARPU_SCHED_END; } notify_workers_about_changing_ctx_done(nworkers, workerids); } static void _starpu_remove_workers_from_sched_ctx(struct _starpu_sched_ctx *sched_ctx, int *workerids, int nworkers, int *removed_workers, int *n_removed_workers) { struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_perfmodel_device devices[workers->nworkers]; int ndevices = 0; int i = 0; for(i = 0; i < nworkers; i++) { if(workers->nworkers > 0) { if(_starpu_worker_belongs_to_a_sched_ctx(workerids[i], sched_ctx->id)) { int worker = workers->remove(workers, workerids[i]); if(worker >= 0) removed_workers[(*n_removed_workers)++] = worker; } } } unsigned found = 0; int dev; struct starpu_sched_ctx_iterator it; if(workers->init_iterator) workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int worker = workers->get_next(workers, &it); struct _starpu_worker *str_worker = _starpu_get_worker_struct(worker); for(dev = 0; dev < str_worker->perf_arch.ndevices; dev++) { int dev2; for(dev2 = 0; dev2 < ndevices; dev2++) { if(devices[dev2].type == str_worker->perf_arch.devices[dev].type && devices[dev2].devid == str_worker->perf_arch.devices[dev].devid) { if(devices[dev2].type == STARPU_CPU_WORKER) devices[dev2].ncores += str_worker->perf_arch.devices[dev].ncores; } found = 1; } if(!found) { devices[ndevices].type = str_worker->perf_arch.devices[dev].type; devices[ndevices].devid = str_worker->perf_arch.devices[dev].devid; devices[ndevices].ncores = str_worker->perf_arch.devices[dev].ncores; ndevices++; } else found = 0; } found = 0; } sched_ctx->perf_arch.ndevices = ndevices; for(dev = 0; dev < ndevices; dev++) { sched_ctx->perf_arch.devices[dev].type = devices[dev].type; sched_ctx->perf_arch.devices[dev].devid = devices[dev].devid; sched_ctx->perf_arch.devices[dev].ncores = devices[dev].ncores; } _starpu_sched_ctx_update_parallel_workers_without(sched_ctx->id); return; } static void _starpu_sched_ctx_free_scheduling_data(struct _starpu_sched_ctx *sched_ctx) { if(sched_ctx->sched_policy && sched_ctx->sched_policy->remove_workers) { int *workerids = NULL; unsigned nworkers_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &workerids); if(nworkers_ctx > 0) { _STARPU_SCHED_BEGIN; sched_ctx->sched_policy->remove_workers(sched_ctx->id, workerids, nworkers_ctx); _STARPU_SCHED_END; } free(workerids); } return; } #ifdef STARPU_HAVE_HWLOC static void _starpu_sched_ctx_create_hwloc_tree(struct _starpu_sched_ctx *sched_ctx) { sched_ctx->hwloc_workers_set = hwloc_bitmap_alloc(); struct starpu_worker_collection *workers = sched_ctx->workers; struct _starpu_worker *worker; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned workerid = workers->get_next(workers, &it); if(!starpu_worker_is_combined_worker(workerid)) { worker = _starpu_get_worker_struct(workerid); hwloc_bitmap_or(sched_ctx->hwloc_workers_set, sched_ctx->hwloc_workers_set, worker->hwloc_cpu_set); } } return; } #endif /* Must be called with sched_ctx_manag mutex held */ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *policy, int *workerids, int nworkers_ctx, unsigned is_initial_sched, const char *sched_ctx_name, int min_prio_set, int min_prio, int max_prio_set, int max_prio, unsigned awake_workers, void (*sched_policy_callback)(unsigned), void * user_data, int nsub_ctxs, int *sub_ctxs, int nsms) { struct _starpu_machine_config *config = _starpu_get_machine_config(); STARPU_ASSERT_MSG_ALWAYS(config->topology.nsched_ctxs < STARPU_NMAX_SCHED_CTXS, "There is too many sched_ctx %d, only %d are configured", config->topology.nsched_ctxs, STARPU_NMAX_SCHED_CTXS); unsigned id = _starpu_get_first_free_sched_ctx(config); struct _starpu_sched_ctx *sched_ctx = &config->sched_ctxs[id]; STARPU_ASSERT(sched_ctx->do_schedule == 0); sched_ctx->id = id; int nworkers = config->topology.nworkers; int i; STARPU_ASSERT(nworkers_ctx <= nworkers); starpu_task_list_init(&sched_ctx->empty_ctx_tasks); starpu_task_list_init(&sched_ctx->waiting_tasks); if (policy) { _STARPU_MALLOC(sched_ctx->sched_policy, sizeof(struct starpu_sched_policy)); } else { sched_ctx->sched_policy = NULL; } sched_ctx->is_initial_sched = is_initial_sched; sched_ctx->name = sched_ctx_name; sched_ctx->inheritor = STARPU_GLOBAL_SCHED_CTX; sched_ctx->finished_submit = 0; sched_ctx->min_priority_is_set = min_prio_set; if (sched_ctx->min_priority_is_set) sched_ctx->min_priority = min_prio; else sched_ctx->min_priority = 0; sched_ctx->max_priority_is_set = max_prio_set; if (sched_ctx->max_priority_is_set) sched_ctx->max_priority = max_prio; else sched_ctx->max_priority = 0; _starpu_barrier_counter_init(&sched_ctx->tasks_barrier, 0); _starpu_barrier_counter_init(&sched_ctx->ready_tasks_barrier, 0); sched_ctx->ready_flops = 0.0; for (i = 0; i < (int) (sizeof(sched_ctx->iterations)/sizeof(sched_ctx->iterations[0])); i++) sched_ctx->iterations[i] = -1; sched_ctx->iteration_level = 0; sched_ctx->main_master = -1; sched_ctx->perf_arch.devices = NULL; sched_ctx->perf_arch.ndevices = 0; sched_ctx->callback_sched = sched_policy_callback; sched_ctx->user_data = user_data; sched_ctx->sms_start_idx = 0; sched_ctx->sms_end_idx = STARPU_NMAXSMS; sched_ctx->nsms = nsms; sched_ctx->stream_worker = -1; memset(&sched_ctx->lock_write_owner, 0, sizeof(sched_ctx->lock_write_owner)); STARPU_PTHREAD_RWLOCK_INIT(&sched_ctx->rwlock, NULL); if(nsms > 0) { STARPU_ASSERT_MSG(workerids, "workerids is needed when setting nsms"); sched_ctx->sms_start_idx = occupied_sms; sched_ctx->sms_end_idx = occupied_sms+nsms; occupied_sms += nsms; _STARPU_DEBUG("ctx %u: stream worker %d nsms %d occupied sms %d\n", sched_ctx->id, workerids[0], nsms, occupied_sms); STARPU_ASSERT_MSG_ALWAYS(occupied_sms <= STARPU_NMAXSMS , "STARPU:requested more sms than available"); _starpu_worker_set_stream_ctx(workerids[0], sched_ctx); sched_ctx->stream_worker = workerids[0]; } sched_ctx->nesting_sched_ctx = STARPU_NMAX_SCHED_CTXS; sched_ctx->nsub_ctxs = 0; sched_ctx->parallel_view = 0; /*init the strategy structs and the worker_collection of the resources of the context */ if(policy) { _starpu_init_sched_policy(config, sched_ctx, policy); sched_ctx->awake_workers = 1; } else { sched_ctx->awake_workers = awake_workers; starpu_sched_ctx_create_worker_collection(sched_ctx->id, STARPU_WORKER_LIST); } /*add sub_ctxs before add workers, in order to be able to associate them if necessary */ if(nsub_ctxs != 0) { for(i = 0; i < nsub_ctxs; i++) sched_ctx->sub_ctxs[i] = sub_ctxs[i]; sched_ctx->nsub_ctxs = nsub_ctxs; } /* starpu_do_schedule() starts to consider the new sched_ctx for scheduling * once 'sched_cts->do_schedule == 1' becomes visible. * Make sure the sched_ctx struct and the policy struct initialization are complete at this time. */ STARPU_WMB(); sched_ctx->do_schedule = 1; _starpu_add_workers_to_new_sched_ctx(sched_ctx, workerids, nworkers_ctx); #ifdef STARPU_HAVE_HWLOC /* build hwloc tree of the context */ _starpu_sched_ctx_create_hwloc_tree(sched_ctx); #endif //STARPU_HAVE_HWLOC /* if we create the initial big sched ctx we can update workers' status here because they haven't been launched yet */ if(is_initial_sched) { for(i = 0; i < nworkers; i++) { struct _starpu_worker *worker = _starpu_get_worker_struct(i); if(!_starpu_sched_ctx_list_add(&worker->sched_ctx_list, sched_ctx->id)) worker->nsched_ctxs++; } } (void)STARPU_ATOMIC_ADD(&config->topology.nsched_ctxs,1); return sched_ctx; } int starpu_sched_ctx_get_nsms(unsigned sched_ctx) { struct _starpu_sched_ctx *sc = _starpu_get_sched_ctx_struct(sched_ctx); return sc->nsms; } void starpu_sched_ctx_get_sms_interval(int stream_workerid, int *start, int *end) { struct _starpu_sched_ctx *sc = _starpu_worker_get_ctx_stream(stream_workerid); *start = sc->sms_start_idx; *end = sc->sms_end_idx; } int starpu_sched_ctx_get_sub_ctxs(unsigned sched_ctx, int *ctxs) { struct _starpu_sched_ctx *sc = _starpu_get_sched_ctx_struct(sched_ctx); int i; for(i = 0; i < sc->nsub_ctxs; i++) ctxs[i] = sc->sub_ctxs[i]; return sc->nsub_ctxs; } int starpu_sched_ctx_get_stream_worker(unsigned sub_ctx) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sub_ctx); struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_sched_ctx_iterator it; int worker = -1; workers->init_iterator(workers, &it); if(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); } return worker; } unsigned starpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx_name, ...) { STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); va_list varg_list; int arg_type; int min_prio_set = 0; int max_prio_set = 0; int min_prio = 0; int max_prio = 0; int nsms = 0; int *sub_ctxs = NULL; int nsub_ctxs = 0; void *user_data = NULL; struct starpu_sched_policy *sched_policy = NULL; unsigned hierarchy_level = 0; unsigned nesting_sched_ctx = STARPU_NMAX_SCHED_CTXS; unsigned awake_workers = 0; void (*init_sched)(unsigned) = NULL; va_start(varg_list, sched_ctx_name); while ((arg_type = va_arg(varg_list, int)) != 0) { if (arg_type == STARPU_SCHED_CTX_POLICY_NAME) { char *policy_name = va_arg(varg_list, char *); struct _starpu_machine_config *config = _starpu_get_machine_config(); sched_policy = _starpu_select_sched_policy(config, policy_name); } else if (arg_type == STARPU_SCHED_CTX_POLICY_STRUCT) { sched_policy = va_arg(varg_list, struct starpu_sched_policy *); } else if (arg_type == STARPU_SCHED_CTX_POLICY_MIN_PRIO) { min_prio = va_arg(varg_list, int); min_prio_set = 1; } else if (arg_type == STARPU_SCHED_CTX_POLICY_MAX_PRIO) { max_prio = va_arg(varg_list, int); max_prio_set = 1; } else if (arg_type == STARPU_SCHED_CTX_HIERARCHY_LEVEL) { hierarchy_level = va_arg(varg_list, unsigned); } else if (arg_type == STARPU_SCHED_CTX_NESTED) { nesting_sched_ctx = va_arg(varg_list, unsigned); } else if (arg_type == STARPU_SCHED_CTX_AWAKE_WORKERS) { awake_workers = 1; } else if (arg_type == STARPU_SCHED_CTX_POLICY_INIT) { #ifdef __NVCOMPILER init_sched = (void(*)(unsigned))va_arg(varg_list, void *); #else init_sched = va_arg(varg_list, void(*)(unsigned)); #endif } else if (arg_type == STARPU_SCHED_CTX_USER_DATA) { user_data = va_arg(varg_list, void *); } else if (arg_type == STARPU_SCHED_CTX_SUB_CTXS) { sub_ctxs = va_arg(varg_list, int*); nsub_ctxs = va_arg(varg_list, int); } else if (arg_type == STARPU_SCHED_CTX_CUDA_NSMS) { nsms = va_arg(varg_list, int); } else { STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type); } } va_end(varg_list); /* Make sure the user doesn't use invalid worker IDs. */ int num_workers = starpu_worker_get_count(); int i; for (i = 0; i < nworkers; i++) { if (workerids[i] < 0 || workerids[i] >= num_workers) { _STARPU_ERROR("Invalid worker ID (%d) specified!\n", workerids[i]); STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); return STARPU_NMAX_SCHED_CTXS; } } struct _starpu_sched_ctx *sched_ctx; sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers, init_sched, user_data, nsub_ctxs, sub_ctxs, nsms); sched_ctx->hierarchy_level = hierarchy_level; sched_ctx->nesting_sched_ctx = nesting_sched_ctx; int *added_workerids; unsigned nw_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &added_workerids); sort_workerid_array(nw_ctx, added_workerids); notify_workers_about_changing_ctx_pending(nw_ctx, added_workerids); _starpu_sched_ctx_lock_write(sched_ctx->id); _starpu_update_notified_workers_with_ctx(added_workerids, nw_ctx, sched_ctx->id); notify_workers_about_changing_ctx_done(nw_ctx, added_workerids); _starpu_sched_ctx_unlock_write(sched_ctx->id); free(added_workerids); #ifdef STARPU_USE_SC_HYPERVISOR sched_ctx->perf_counters = NULL; #endif STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); return sched_ctx->id; } int fstarpu_sched_ctx_create(int *workerids, int nworkers, const char *sched_ctx_name, void **arglist) { STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); int arg_i = 0; int min_prio_set = 0; int max_prio_set = 0; int min_prio = 0; int max_prio = 0; int nsms = 0; int *sub_ctxs = NULL; int nsub_ctxs = 0; void *user_data = NULL; struct starpu_sched_policy *sched_policy = NULL; unsigned hierarchy_level = 0; unsigned nesting_sched_ctx = STARPU_NMAX_SCHED_CTXS; unsigned awake_workers = 0; void (*init_sched)(unsigned) = NULL; while (arglist[arg_i] != NULL) { const int arg_type = (int)(intptr_t)arglist[arg_i]; if (arg_type == STARPU_SCHED_CTX_POLICY_NAME) { arg_i++; char *policy_name = arglist[arg_i]; struct _starpu_machine_config *config = _starpu_get_machine_config(); sched_policy = _starpu_select_sched_policy(config, policy_name); } else if (arg_type == STARPU_SCHED_CTX_POLICY_STRUCT) { arg_i++; sched_policy = arglist[arg_i]; } else if (arg_type == STARPU_SCHED_CTX_POLICY_MIN_PRIO) { arg_i++; min_prio = *(int *)arglist[arg_i]; min_prio_set = 1; } else if (arg_type == STARPU_SCHED_CTX_POLICY_MAX_PRIO) { arg_i++; max_prio = *(int *)arglist[arg_i]; max_prio_set = 1; } else if (arg_type == STARPU_SCHED_CTX_HIERARCHY_LEVEL) { arg_i++; int val = *(int *)arglist[arg_i]; STARPU_ASSERT(val >= 0); hierarchy_level = (unsigned)val; } else if (arg_type == STARPU_SCHED_CTX_NESTED) { arg_i++; int val = *(int *)arglist[arg_i]; STARPU_ASSERT(val >= 0); nesting_sched_ctx = (unsigned)val; } else if (arg_type == STARPU_SCHED_CTX_AWAKE_WORKERS) { awake_workers = 1; } else if (arg_type == STARPU_SCHED_CTX_POLICY_INIT) { arg_i++; init_sched = arglist[arg_i]; } else if (arg_type == STARPU_SCHED_CTX_USER_DATA) { arg_i++; user_data = arglist[arg_i]; } else if (arg_type == STARPU_SCHED_CTX_SUB_CTXS) { arg_i++; sub_ctxs = (int*)arglist[arg_i]; arg_i++; nsub_ctxs = *(int*)arglist[arg_i]; } else if (arg_type == STARPU_SCHED_CTX_CUDA_NSMS) { arg_i++; nsms = *(int*)arglist[arg_i]; } else { STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type); } arg_i++; } if (workerids && nworkers != -1) { /* Make sure the user doesn't use invalid worker IDs. */ int num_workers = starpu_worker_get_count(); int i; for (i = 0; i < nworkers; i++) { if (workerids[i] < 0 || workerids[i] >= num_workers) { _STARPU_ERROR("Invalid worker ID (%d) specified!\n", workerids[i]); STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); return STARPU_NMAX_SCHED_CTXS; } } } struct _starpu_sched_ctx *sched_ctx; sched_ctx = _starpu_create_sched_ctx(sched_policy, workerids, nworkers, 0, sched_ctx_name, min_prio_set, min_prio, max_prio_set, max_prio, awake_workers, init_sched, user_data, nsub_ctxs, sub_ctxs, nsms); sched_ctx->hierarchy_level = hierarchy_level; sched_ctx->nesting_sched_ctx = nesting_sched_ctx; int *added_workerids; unsigned nw_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &added_workerids); sort_workerid_array(nw_ctx, added_workerids); notify_workers_about_changing_ctx_pending(nw_ctx, added_workerids); _starpu_sched_ctx_lock_write(sched_ctx->id); _starpu_update_notified_workers_with_ctx(added_workerids, nw_ctx, sched_ctx->id); notify_workers_about_changing_ctx_done(nw_ctx, added_workerids); _starpu_sched_ctx_unlock_write(sched_ctx->id); free(added_workerids); #ifdef STARPU_USE_SC_HYPERVISOR sched_ctx->perf_counters = NULL; #endif STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); return (int)sched_ctx->id; } void starpu_sched_ctx_register_close_callback(unsigned sched_ctx_id, void (*close_callback)(unsigned sched_ctx_id, void* args), void *args) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); sched_ctx->close_callback = close_callback; sched_ctx->close_args = args; return; } #ifdef STARPU_USE_SC_HYPERVISOR void starpu_sched_ctx_set_perf_counters(unsigned sched_ctx_id, void* perf_counters) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); sched_ctx->perf_counters = (struct starpu_sched_ctx_performance_counters *)perf_counters; return; } #endif /* * free all structures for the context * Must be called with sched_ctx_manag mutex held */ static void _starpu_delete_sched_ctx(struct _starpu_sched_ctx *sched_ctx) { STARPU_ASSERT(sched_ctx->id != STARPU_NMAX_SCHED_CTXS); STARPU_ASSERT(sched_ctx->do_schedule == 1); sched_ctx->do_schedule = 0; struct _starpu_machine_config *config = _starpu_get_machine_config(); if(sched_ctx->sched_policy) { _starpu_deinit_sched_policy(sched_ctx); free(sched_ctx->sched_policy); sched_ctx->sched_policy = NULL; } else { starpu_sched_ctx_delete_worker_collection(sched_ctx->id); } if (sched_ctx->perf_arch.devices) { free(sched_ctx->perf_arch.devices); sched_ctx->perf_arch.devices = NULL; } sched_ctx->min_priority_is_set = 0; sched_ctx->max_priority_is_set = 0; sched_ctx->id = STARPU_NMAX_SCHED_CTXS; #ifdef STARPU_HAVE_HWLOC hwloc_bitmap_free(sched_ctx->hwloc_workers_set); #endif //STARPU_HAVE_HWLOC config->topology.nsched_ctxs--; } void starpu_sched_ctx_delete(unsigned sched_ctx_id) { STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_ASSERT(sched_ctx); #ifdef STARPU_USE_SC_HYPERVISOR if (sched_ctx_id != 0 && sched_ctx_id != STARPU_NMAX_SCHED_CTXS && sched_ctx->perf_counters != NULL) { _STARPU_TRACE_HYPERVISOR_BEGIN(); sched_ctx->perf_counters->notify_delete_context(sched_ctx_id); _STARPU_TRACE_HYPERVISOR_END(); } #endif //STARPU_USE_SC_HYPERVISOR _starpu_sched_ctx_lock_write(sched_ctx_id); unsigned inheritor_sched_ctx_id = sched_ctx->inheritor; struct _starpu_sched_ctx *inheritor_sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx->inheritor); _starpu_sched_ctx_lock_write(inheritor_sched_ctx_id); STARPU_ASSERT(sched_ctx->id != STARPU_NMAX_SCHED_CTXS); int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { struct _starpu_sched_ctx *psched_ctx = _starpu_get_sched_ctx_struct(i); if (psched_ctx->inheritor == sched_ctx_id) { _starpu_sched_ctx_lock_write(i); psched_ctx->inheritor = inheritor_sched_ctx_id; _starpu_sched_ctx_unlock_write(i); } } int *workerids; unsigned nworkers_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &workerids); int backup_workerids[nworkers_ctx]; memcpy(backup_workerids, workerids, nworkers_ctx*sizeof(backup_workerids[0])); sort_workerid_array(nworkers_ctx, backup_workerids); notify_workers_about_changing_ctx_pending(nworkers_ctx, backup_workerids); /*if both of them have all the resources is pointless*/ /*trying to transfer resources from one ctx to the other*/ struct _starpu_machine_config *config = _starpu_get_machine_config(); unsigned nworkers = config->topology.nworkers; if(nworkers_ctx > 0 && inheritor_sched_ctx && inheritor_sched_ctx->id != STARPU_NMAX_SCHED_CTXS && !(nworkers_ctx == nworkers && nworkers_ctx == inheritor_sched_ctx->workers->nworkers)) { add_notified_workers(workerids, nworkers_ctx, inheritor_sched_ctx_id); } notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids); _starpu_sched_ctx_unlock_write(sched_ctx_id); int wait_status = _starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id); _starpu_sched_ctx_lock_write(sched_ctx_id); notify_workers_about_changing_ctx_pending(nworkers_ctx, backup_workerids); if(!wait_status) { if(!sched_ctx->sched_policy) _starpu_sched_ctx_unblock_workers_in_parallel(sched_ctx_id, 0); /*if btw the mutex release & the mutex lock the context has changed take care to free all scheduling data before deleting the context */ /* announce upcoming context changes, then wait for sched_op operations to * complete before altering the sched_ctx under sched_mutex protection */ _starpu_update_notified_workers_without_ctx(workerids, nworkers_ctx, sched_ctx_id, 1); _starpu_sched_ctx_free_scheduling_data(sched_ctx); notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids); occupied_sms -= sched_ctx->nsms; _starpu_sched_ctx_unlock_write(sched_ctx_id); _starpu_sched_ctx_unlock_write(inheritor_sched_ctx_id); STARPU_PTHREAD_RWLOCK_DESTROY(&sched_ctx->rwlock); STARPU_PTHREAD_RWLOCK_INIT(&sched_ctx->rwlock, NULL); _starpu_delete_sched_ctx(sched_ctx); } else { notify_workers_about_changing_ctx_done(nworkers_ctx, backup_workerids); occupied_sms -= sched_ctx->nsms; _starpu_sched_ctx_unlock_write(sched_ctx_id); _starpu_sched_ctx_unlock_write(inheritor_sched_ctx_id); } /* workerids is malloc-ed in starpu_sched_ctx_get_workers_list, don't forget to free it when you don't use it anymore */ free(workerids); STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); } /* called after the workers are terminated so we don't have anything else to do but free the memory*/ void _starpu_delete_all_sched_ctxs() { STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); unsigned i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(i); if(sched_ctx->id != STARPU_NMAX_SCHED_CTXS) { _starpu_sched_ctx_lock_write(i); _starpu_sched_ctx_free_scheduling_data(sched_ctx); _starpu_barrier_counter_destroy(&sched_ctx->tasks_barrier); _starpu_barrier_counter_destroy(&sched_ctx->ready_tasks_barrier); _starpu_sched_ctx_unlock_write(i); STARPU_PTHREAD_RWLOCK_DESTROY(&sched_ctx->rwlock); _starpu_delete_sched_ctx(sched_ctx); } } STARPU_PTHREAD_KEY_DELETE(sched_ctx_key); STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); } static void _starpu_check_workers(int *workerids, int nworkers) { struct _starpu_machine_config *config = _starpu_get_machine_config(); int nworkers_conf = config->topology.nworkers; int i; for(i = 0; i < nworkers; i++) { /* take care the user does not ask for a resource that does not exist */ STARPU_ASSERT_MSG(workerids[i] >= 0 && workerids[i] <= nworkers_conf, "requested to add workerid = %d, but that is beyond the range 0 to %d", workerids[i], nworkers_conf); } } /* ctx_mutex must be held when calling this function */ static void fetch_tasks_from_empty_ctx_list(struct _starpu_sched_ctx *sched_ctx) { struct starpu_task_list list; starpu_task_list_move(&list, &sched_ctx->empty_ctx_tasks); _starpu_sched_ctx_unlock_write(sched_ctx->id); while(!starpu_task_list_empty(&list)) { struct starpu_task *old_task = starpu_task_list_pop_back(&list); if(old_task == &stop_submission_task) break; /* if no workers are able to execute the task, it will be put * in the empty_ctx_tasks list forever again */ unsigned able = _starpu_workers_able_to_execute_task(old_task, sched_ctx); STARPU_ASSERT(able); int ret = _starpu_push_task_to_workers(old_task); /* if we should stop poping from empty ctx tasks */ if (ret == -EAGAIN) break; } _starpu_sched_ctx_lock_write(sched_ctx->id); } unsigned _starpu_can_push_task(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task) { if(sched_ctx->sched_policy && sched_ctx->sched_policy->simulate_push_task) { if (window_size == 0.0) return 1; _starpu_sched_ctx_lock_read(sched_ctx->id); double expected_end = sched_ctx->sched_policy->simulate_push_task(task); _starpu_sched_ctx_unlock_read(sched_ctx->id); double expected_len = 0.0; if(hyp_actual_start_sample[sched_ctx->id] != 0.0) { expected_len = expected_end - hyp_actual_start_sample[sched_ctx->id] ; } else { _STARPU_MSG("%u: sc start is 0.0\n", sched_ctx->id); expected_len = expected_end - starpu_timing_now(); } if(expected_len < 0.0) _STARPU_MSG("exp len negative %lf \n", expected_len); expected_len /= 1000000.0; // _STARPU_MSG("exp_end %lf start %lf expected_len %lf \n", expected_end, hyp_actual_start_sample[sched_ctx->id], expected_len); if(expected_len > (window_size + 0.2*window_size)) return 0; } return 1; } void _starpu_fetch_task_from_waiting_list(struct _starpu_sched_ctx *sched_ctx) { if(starpu_task_list_empty(&sched_ctx->waiting_tasks)) return; struct starpu_task *old_task = starpu_task_list_back(&sched_ctx->waiting_tasks); if(_starpu_can_push_task(sched_ctx, old_task)) { old_task = starpu_task_list_pop_back(&sched_ctx->waiting_tasks); _starpu_push_task_to_workers(old_task); } return; } void _starpu_push_task_to_waiting_list(struct _starpu_sched_ctx *sched_ctx, struct starpu_task *task) { starpu_task_list_push_front(&sched_ctx->waiting_tasks, task); return; } static void set_priority_hierarchically_on_notified_workers(int* workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx, unsigned priority) { if(starpu_sched_ctx_get_hierarchy_level(sched_ctx) > 0) { unsigned father = starpu_sched_ctx_get_inheritor(sched_ctx); set_priority_on_notified_workers(workers_to_add, nworkers_to_add, father, priority); set_priority_hierarchically_on_notified_workers(workers_to_add, nworkers_to_add, father, priority); } return; } static void add_notified_workers(int *workerids, int nworkers, unsigned sched_ctx_id) { if (!nworkers) return; struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); /* if the context has not already been deleted */ if(sched_ctx->id == STARPU_NMAX_SCHED_CTXS) return; int added_workers[nworkers]; int n_added_workers = 0; { struct starpu_worker_collection *workers = sched_ctx->workers; int i = 0; for(i = 0; i < nworkers; i++) { if (workerids[i] >= (int) starpu_worker_get_count()) /* Combined worker, don't care */ continue; int workerid = workers->add(workers, workerids[i]); if(workerid >= 0) { added_workers[n_added_workers] = workerid; n_added_workers++; } else { struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); worker->removed_from_ctx[sched_ctx->id] = 0; } } } _do_add_notified_workers(sched_ctx, workerids, nworkers); if(n_added_workers > 0) { if(sched_ctx->sched_policy && sched_ctx->sched_policy->add_workers) { _STARPU_SCHED_BEGIN; sched_ctx->sched_policy->add_workers(sched_ctx->id, added_workers, n_added_workers); _STARPU_SCHED_END; } _starpu_update_notified_workers_with_ctx(added_workers, n_added_workers, sched_ctx->id); } set_priority_on_notified_workers(workerids, nworkers, sched_ctx_id, 1); set_priority_hierarchically_on_notified_workers(workerids, nworkers, sched_ctx_id, 0); fetch_tasks_from_empty_ctx_list(sched_ctx); } /* Queue a new ctx change operation in the list of deferred ctx changes of the current worker. * * The set of workers to notify should contain all workers directly or * indirectly affected by the change. In particular, all workers of * sched_ctx_id should be notified even if they are not part of the change */ static void _defer_ctx_change(int sched_ctx_id, enum _starpu_ctx_change_op op, int nworkers_to_notify, int *workerids_to_notify, int nworkers_to_change, int *workerids_to_change) { STARPU_ASSERT(_starpu_worker_sched_op_pending()); if (nworkers_to_change == 0) return; int workerid = starpu_worker_get_id_check(); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_ctx_change_list *l = &worker->ctx_change_list; struct _starpu_ctx_change *chg = _starpu_ctx_change_new(); chg->sched_ctx_id = sched_ctx_id; STARPU_ASSERT(op == ctx_change_add || op == ctx_change_remove); chg->op = op; STARPU_ASSERT(workerids_to_change != NULL); chg->nworkers_to_change = nworkers_to_change; _STARPU_MALLOC(chg->workerids_to_change, nworkers_to_change * sizeof(chg->workerids_to_change[0])); memcpy(chg->workerids_to_change, workerids_to_change, nworkers_to_change * sizeof(chg->workerids_to_change[0])); if (nworkers_to_notify != 0) { STARPU_ASSERT(workerids_to_notify != NULL); chg->nworkers_to_notify = nworkers_to_notify; _STARPU_MALLOC(chg->workerids_to_notify, nworkers_to_notify * sizeof(chg->workerids_to_notify[0])); memcpy(chg->workerids_to_notify, workerids_to_notify, nworkers_to_notify * sizeof(chg->workerids_to_notify[0])); } else { STARPU_ASSERT(workerids_to_notify == NULL); chg->nworkers_to_notify = 0; chg->workerids_to_notify = 0; } _starpu_ctx_change_list_push_back(l, chg); } void starpu_sched_ctx_add_workers(int *workers_to_add, unsigned nworkers_to_add, unsigned sched_ctx_id) { STARPU_ASSERT(workers_to_add != NULL && nworkers_to_add > 0); _starpu_check_workers(workers_to_add, nworkers_to_add); int *ctx_workerids = NULL; _starpu_sched_ctx_lock_read(sched_ctx_id); unsigned ctx_nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &ctx_workerids); _starpu_sched_ctx_unlock_read(sched_ctx_id); int cumulated_workerids[ctx_nworkers + nworkers_to_add]; memcpy(cumulated_workerids, ctx_workerids, ctx_nworkers*sizeof(cumulated_workerids[0])); unsigned cumulated_nworkers = ctx_nworkers; { unsigned i; for (i=0; iworkers->nworkers]; int n_removed_workers = 0; _starpu_remove_workers_from_sched_ctx(sched_ctx, workerids, nworkers, removed_workers, &n_removed_workers); if(n_removed_workers > 0) { _starpu_update_notified_workers_without_ctx(removed_workers, n_removed_workers, sched_ctx_id, 0); set_priority_on_notified_workers(removed_workers, n_removed_workers, sched_ctx_id, 1); } } void starpu_sched_ctx_remove_workers(int *workers_to_remove, unsigned nworkers_to_remove, unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); _starpu_check_workers(workers_to_remove, nworkers_to_remove); int *ctx_workerids = NULL; _starpu_sched_ctx_lock_read(sched_ctx_id); unsigned ctx_nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &ctx_workerids); _starpu_sched_ctx_unlock_read(sched_ctx_id); int cumulated_workerids[ctx_nworkers + nworkers_to_remove]; memcpy(cumulated_workerids, ctx_workerids, ctx_nworkers*sizeof(cumulated_workerids[0])); unsigned cumulated_nworkers = ctx_nworkers; { unsigned i; for (i=0; iid != STARPU_NMAX_SCHED_CTXS) { if (_starpu_worker_sched_op_pending()) { _defer_ctx_change(sched_ctx_id, ctx_change_remove, cumulated_nworkers, cumulated_workerids, nworkers_to_remove, workers_to_remove); } else { sort_workerid_array(cumulated_nworkers, cumulated_workerids); notify_workers_about_changing_ctx_pending(cumulated_nworkers, cumulated_workerids); _starpu_sched_ctx_lock_write(sched_ctx_id); remove_notified_workers(workers_to_remove, nworkers_to_remove, sched_ctx_id); notify_workers_about_changing_ctx_done(cumulated_nworkers, cumulated_workerids); _starpu_sched_ctx_unlock_write(sched_ctx_id); } } } int _starpu_workers_able_to_execute_task(struct starpu_task *task, struct _starpu_sched_ctx *sched_ctx) { unsigned able = 0; _starpu_sched_ctx_lock_read(sched_ctx->id); struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_sched_ctx_iterator it; workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); STARPU_ASSERT_MSG(worker < STARPU_NMAXWORKERS, "worker id %u", worker); if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) { able++; break; } } _starpu_sched_ctx_unlock_read(sched_ctx->id); return able; } /* unused sched_ctx have the id STARPU_NMAX_SCHED_CTXS */ void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config) { STARPU_PTHREAD_KEY_CREATE(&sched_ctx_key, NULL); window_size = starpu_getenv_float_default("STARPU_WINDOW_TIME_SIZE", 0.0); nobind = starpu_getenv_number("STARPU_WORKERS_NOBIND"); unsigned i; for(i = 0; i <= STARPU_NMAX_SCHED_CTXS; i++) { config->sched_ctxs[i].do_schedule = 0; config->sched_ctxs[i].id = STARPU_NMAX_SCHED_CTXS; STARPU_PTHREAD_RWLOCK_INIT0(&config->sched_ctxs[i].rwlock, NULL); } return; } /* sched_ctx aren't necessary one next to another */ /* for eg when we remove one its place is free */ /* when we add new one we reuse its place */ static unsigned _starpu_get_first_free_sched_ctx(struct _starpu_machine_config *config) { unsigned i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) if(config->sched_ctxs[i].id == STARPU_NMAX_SCHED_CTXS) return i; STARPU_ASSERT(0); return STARPU_NMAX_SCHED_CTXS; } int _starpu_wait_for_all_tasks_of_sched_ctx(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_all must not be called from a task or callback"); _starpu_barrier_counter_wait_for_empty_counter(&sched_ctx->tasks_barrier); return 0; } int _starpu_wait_for_n_submitted_tasks_of_sched_ctx(unsigned sched_ctx_id, unsigned n) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_n_submitted_tasks must not be called from a task or callback"); return _starpu_barrier_counter_wait_until_counter_reaches_down_to_n(&sched_ctx->tasks_barrier, n); } void _starpu_decrement_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id) { struct _starpu_machine_config *config = _starpu_get_machine_config(); #ifndef STARPU_SANITIZE_THREAD if (!config->watchdog_ok) config->watchdog_ok = 1; #endif struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); int reached = _starpu_barrier_counter_get_reached_exit(&sched_ctx->tasks_barrier); int finished = reached == 1; /* when finished decrementing the tasks if the user signaled he will not submit tasks anymore we can move all its workers to the inheritor context */ if(finished && sched_ctx->inheritor != STARPU_NMAX_SCHED_CTXS) { STARPU_PTHREAD_MUTEX_LOCK(&finished_submit_mutex); if(sched_ctx->finished_submit) { STARPU_PTHREAD_MUTEX_UNLOCK(&finished_submit_mutex); if(sched_ctx->id != STARPU_NMAX_SCHED_CTXS) { if(sched_ctx->close_callback) sched_ctx->close_callback(sched_ctx->id, sched_ctx->close_args); int *workerids = NULL; unsigned nworkers = starpu_sched_ctx_get_workers_list(sched_ctx->id, &workerids); if(nworkers > 0) { starpu_sched_ctx_add_workers(workerids, nworkers, sched_ctx->inheritor); free(workerids); } } _starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->tasks_barrier, 0.0); return; } STARPU_PTHREAD_MUTEX_UNLOCK(&finished_submit_mutex); } /* We also need to check for config->submitting = 0 (i.e. the * user called starpu_drivers_request_termination()), in which * case we need to set config->running to 0 and wake workers, * so they can terminate, just like * starpu_drivers_request_termination() does. */ STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex); if(config->submitting == 0) { if(sched_ctx->id != STARPU_NMAX_SCHED_CTXS) { if(sched_ctx->close_callback) sched_ctx->close_callback(sched_ctx->id, sched_ctx->close_args); } ANNOTATE_HAPPENS_AFTER(&config->running); config->running = 0; ANNOTATE_HAPPENS_BEFORE(&config->running); int s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) { _starpu_check_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id); } } } STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex); _starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->tasks_barrier, 0.0); return; } void _starpu_increment_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); _starpu_barrier_counter_increment(&sched_ctx->tasks_barrier, 0.0); } int _starpu_get_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return _starpu_barrier_counter_get_reached_start(&sched_ctx->tasks_barrier); } int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return _starpu_barrier_counter_check(&sched_ctx->tasks_barrier); } unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task) { unsigned ret = 1; struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if(!sched_ctx->is_initial_sched) { _starpu_sched_ctx_lock_write(sched_ctx->id); } _starpu_barrier_counter_increment(&sched_ctx->ready_tasks_barrier, ready_flops); if(!sched_ctx->is_initial_sched) { if(!_starpu_can_push_task(sched_ctx, task)) { _starpu_push_task_to_waiting_list(sched_ctx, task); ret = 0; } _starpu_sched_ctx_unlock_write(sched_ctx->id); } return ret; } void _starpu_decrement_nready_tasks_of_sched_ctx_locked(unsigned sched_ctx_id, double ready_flops) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); _starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->ready_tasks_barrier, ready_flops); } void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if(!sched_ctx->is_initial_sched) { _starpu_sched_ctx_lock_write(sched_ctx->id); } _starpu_barrier_counter_decrement_until_empty_counter(&sched_ctx->ready_tasks_barrier, ready_flops); if(!sched_ctx->is_initial_sched) { _starpu_fetch_task_from_waiting_list(sched_ctx); _starpu_sched_ctx_unlock_write(sched_ctx->id); } } int starpu_sched_ctx_get_nready_tasks(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return _starpu_barrier_counter_get_reached_start(&sched_ctx->ready_tasks_barrier); } double starpu_sched_ctx_get_nready_flops(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return _starpu_barrier_counter_get_reached_flops(&sched_ctx->ready_tasks_barrier); } int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); _starpu_barrier_counter_wait_for_empty_counter(&sched_ctx->ready_tasks_barrier); return 0; } /* * FIXME: This should rather be * void starpu_sched_ctx_set_context(unsigned sched_ctx) */ void starpu_sched_ctx_set_context(unsigned *sched_ctx) { if (sched_ctx) STARPU_PTHREAD_SETSPECIFIC(sched_ctx_key, (void*)(uintptr_t)(*sched_ctx + 1)); else STARPU_PTHREAD_SETSPECIFIC(sched_ctx_key, (void*)(uintptr_t) 0); } unsigned starpu_sched_ctx_get_context() { unsigned id = (unsigned)(uintptr_t)STARPU_PTHREAD_GETSPECIFIC(sched_ctx_key); if (id == 0) return STARPU_NMAX_SCHED_CTXS; else return id - 1; } unsigned _starpu_sched_ctx_get_current_context() { unsigned sched_ctx = starpu_sched_ctx_get_context(); if (sched_ctx == STARPU_NMAX_SCHED_CTXS) return _starpu_get_initial_sched_ctx()->id; else return sched_ctx; } void starpu_sched_ctx_notify_hypervisor_exists() { with_hypervisor = 1; int i, j; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { hyp_start_sample[i] = starpu_timing_now(); hyp_start_allow_sample[i] = 0.0; for(j = 0; j < STARPU_NMAXWORKERS; j++) { flops[i][j] = 0.0; data_size[i][j] = 0; } hyp_actual_start_sample[i] = 0.0; } } unsigned starpu_sched_ctx_check_if_hypervisor_exists() { return with_hypervisor; } void starpu_sched_ctx_update_start_resizing_sample(unsigned sched_ctx_id, double start_sample) { hyp_actual_start_sample[sched_ctx_id] = start_sample; } unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id) { (void) sched_ctx_id; return 1; #if 0 double now = starpu_timing_now(); if(hyp_start_allow_sample[sched_ctx_id] > 0.0) { double allow_sample = (now - hyp_start_allow_sample[sched_ctx_id]) / 1000000.0; if(allow_sample < 0.001) return 1; else { hyp_start_allow_sample[sched_ctx_id] = 0.0; hyp_start_sample[sched_ctx_id] = starpu_timing_now(); return 0; } } double forbid_sample = (now - hyp_start_sample[sched_ctx_id]) / 1000000.0; if(forbid_sample > 0.01) { // hyp_start_sample[sched_ctx_id] = starpu_timing_now(); hyp_start_allow_sample[sched_ctx_id] = starpu_timing_now(); return 1; } return 0; #endif } void starpu_sched_ctx_set_policy_data(unsigned sched_ctx_id, void* policy_data) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); sched_ctx->policy_data = policy_data; } void* starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->policy_data; } struct starpu_sched_policy *starpu_sched_ctx_get_sched_policy(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->sched_policy; } struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type worker_collection_type) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); _STARPU_MALLOC(sched_ctx->workers, sizeof(struct starpu_worker_collection)); switch(worker_collection_type) { #ifdef STARPU_HAVE_HWLOC case STARPU_WORKER_TREE: sched_ctx->workers->has_next = starpu_worker_tree.has_next; sched_ctx->workers->get_next = starpu_worker_tree.get_next; sched_ctx->workers->add = starpu_worker_tree.add; sched_ctx->workers->remove = starpu_worker_tree.remove; sched_ctx->workers->init = starpu_worker_tree.init; sched_ctx->workers->deinit = starpu_worker_tree.deinit; sched_ctx->workers->init_iterator = starpu_worker_tree.init_iterator; sched_ctx->workers->init_iterator_for_parallel_tasks = starpu_worker_tree.init_iterator_for_parallel_tasks; sched_ctx->workers->type = STARPU_WORKER_TREE; break; #endif // case STARPU_WORKER_LIST: default: sched_ctx->workers->has_next = starpu_worker_list.has_next; sched_ctx->workers->get_next = starpu_worker_list.get_next; sched_ctx->workers->add = starpu_worker_list.add; sched_ctx->workers->remove = starpu_worker_list.remove; sched_ctx->workers->init = starpu_worker_list.init; sched_ctx->workers->deinit = starpu_worker_list.deinit; sched_ctx->workers->init_iterator = starpu_worker_list.init_iterator; sched_ctx->workers->init_iterator_for_parallel_tasks = starpu_worker_list.init_iterator_for_parallel_tasks; sched_ctx->workers->type = STARPU_WORKER_LIST; break; } /* construct the collection of workers(list/tree/etc.) */ sched_ctx->workers->init(sched_ctx->workers); return sched_ctx->workers; } void starpu_sched_ctx_display_workers(unsigned sched_ctx_id, FILE *f) { int *workerids = NULL; unsigned nworkers; unsigned i; nworkers = starpu_sched_ctx_get_workers_list(sched_ctx_id, &workerids); fprintf(f, "[sched_ctx %u]: %u worker%s\n", sched_ctx_id, nworkers, nworkers>1?"s":""); for (i = 0; i < nworkers; i++) { char name[256]; starpu_worker_get_name(workerids[i], name, 256); fprintf(f, "\t\t%s\n", name); } free(workerids); } unsigned starpu_sched_ctx_get_workers_list_raw(unsigned sched_ctx_id, int **workerids) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); *workerids = sched_ctx->workers->workerids; return sched_ctx->workers->nworkers; } unsigned starpu_sched_ctx_get_workers_list(unsigned sched_ctx_id, int **workerids) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); struct starpu_worker_collection *workers = sched_ctx->workers; unsigned nworkers = 0; struct starpu_sched_ctx_iterator it; if(!workers) return 0; _STARPU_MALLOC(*workerids, workers->nworkers*sizeof(int)); workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int worker = workers->get_next(workers, &it); (*workerids)[nworkers++] = worker; } return nworkers; } void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); sched_ctx->workers->deinit(sched_ctx->workers); free(sched_ctx->workers); sched_ctx->workers = NULL; } struct starpu_worker_collection* starpu_sched_ctx_get_worker_collection(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->workers; } int _starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu_worker_archtype arch) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); struct starpu_worker_collection *workers = sched_ctx->workers; int npus = 0; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int worker = workers->get_next(workers, &it); enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); if(curr_arch == arch || arch == STARPU_ANY_WORKER) pus[npus++] = worker; } return npus; } unsigned starpu_sched_ctx_get_nworkers(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if(sched_ctx != NULL) return sched_ctx->workers->nworkers; else return 0; } unsigned starpu_sched_ctx_get_nshared_workers(unsigned sched_ctx_id, unsigned sched_ctx_id2) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); struct _starpu_sched_ctx *sched_ctx2 = _starpu_get_sched_ctx_struct(sched_ctx_id2); struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_worker_collection *workers2 = sched_ctx2->workers; int shared_workers = 0; struct starpu_sched_ctx_iterator it1, it2; workers->init_iterator(workers, &it1); workers2->init_iterator(workers2, &it2); while(workers->has_next(workers, &it1)) { int worker = workers->get_next(workers, &it1); while(workers2->has_next(workers2, &it2)) { int worker2 = workers2->get_next(workers2, &it2); if(worker == worker2) shared_workers++; } } return shared_workers; } unsigned starpu_sched_ctx_contains_worker(int workerid, unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); struct starpu_worker_collection *workers = sched_ctx->workers; if(workers) { unsigned i; for (i = 0; i < workers->nworkers; i++) if (workerid == workers->workerids[i]) return 1; } return 0; } unsigned starpu_sched_ctx_contains_type_of_worker(enum starpu_worker_archtype arch, unsigned sched_ctx_id) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); unsigned i; for (i = 0; i < workers->nworkers; i++) { int worker = workers->workerids[i]; enum starpu_worker_archtype curr_arch = starpu_worker_get_type(worker); if(curr_arch == arch) return 1; } return 0; } unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id) { struct _starpu_machine_config *config = _starpu_get_machine_config(); int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { struct _starpu_sched_ctx *sched_ctx = &config->sched_ctxs[i]; if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS && sched_ctx->id != sched_ctx_id) if(starpu_sched_ctx_contains_worker(workerid, sched_ctx->id)) return 1; } return 0; } unsigned starpu_sched_ctx_worker_get_id(unsigned sched_ctx_id) { int workerid = starpu_worker_get_id(); if(workerid != -1) if(starpu_sched_ctx_contains_worker(workerid, sched_ctx_id)) return workerid; return -1; } unsigned starpu_sched_ctx_get_ctx_for_task(struct starpu_task *task) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); unsigned ret_sched_ctx = task->sched_ctx; if (task->possibly_parallel && !sched_ctx->sched_policy && sched_ctx->nesting_sched_ctx != STARPU_NMAX_SCHED_CTXS) ret_sched_ctx = sched_ctx->nesting_sched_ctx; return ret_sched_ctx; } unsigned starpu_sched_ctx_overlapping_ctxs_on_worker(int workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); return worker->nsched_ctxs > 1; } void starpu_sched_ctx_set_inheritor(unsigned sched_ctx_id, unsigned inheritor) { STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); STARPU_ASSERT(inheritor < STARPU_NMAX_SCHED_CTXS); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); sched_ctx->inheritor = inheritor; return; } unsigned starpu_sched_ctx_get_inheritor(unsigned sched_ctx_id) { STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->inheritor; } unsigned starpu_sched_ctx_get_hierarchy_level(unsigned sched_ctx_id) { STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->hierarchy_level; } void starpu_sched_ctx_finished_submit(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_PTHREAD_MUTEX_LOCK(&finished_submit_mutex); sched_ctx->finished_submit = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&finished_submit_mutex); return; } #ifdef STARPU_USE_SC_HYPERVISOR void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size2, uint32_t footprint) { if (workerid < 0) return; struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); if(sched_ctx != NULL && task->sched_ctx != _starpu_get_initial_sched_ctx()->id && task->sched_ctx != STARPU_NMAX_SCHED_CTXS && sched_ctx->perf_counters != NULL) { flops[task->sched_ctx][workerid] += task->flops; data_size[task->sched_ctx][workerid] += data_size2; if(_starpu_sched_ctx_allow_hypervisor(sched_ctx->id) || task->hypervisor_tag > 0) { _STARPU_TRACE_HYPERVISOR_BEGIN(); sched_ctx->perf_counters->notify_post_exec_task(task, data_size[task->sched_ctx][workerid], footprint, task->hypervisor_tag, flops[task->sched_ctx][workerid]); _STARPU_TRACE_HYPERVISOR_END(); flops[task->sched_ctx][workerid] = 0.0; data_size[task->sched_ctx][workerid] = 0; } } } void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if(sched_ctx != NULL && sched_ctx_id != _starpu_get_initial_sched_ctx()->id && sched_ctx_id != STARPU_NMAX_SCHED_CTXS && sched_ctx->perf_counters != NULL && _starpu_sched_ctx_allow_hypervisor(sched_ctx_id)) { _STARPU_TRACE_HYPERVISOR_BEGIN(); sched_ctx->perf_counters->notify_pushed_task(sched_ctx_id, workerid); _STARPU_TRACE_HYPERVISOR_END(); } } #endif //STARPU_USE_SC_HYPERVISOR int starpu_sched_get_min_priority(void) { return starpu_sched_ctx_get_min_priority(_starpu_sched_ctx_get_current_context()); } int starpu_sched_get_max_priority(void) { return starpu_sched_ctx_get_max_priority(_starpu_sched_ctx_get_current_context()); } int starpu_sched_set_min_priority(int min_prio) { return starpu_sched_ctx_set_min_priority(_starpu_sched_ctx_get_current_context(), min_prio); } int starpu_sched_set_max_priority(int max_prio) { return starpu_sched_ctx_set_max_priority(_starpu_sched_ctx_get_current_context(), max_prio); } int starpu_sched_ctx_get_min_priority(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->min_priority; } int starpu_sched_ctx_get_max_priority(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->max_priority; } int starpu_sched_ctx_set_min_priority(unsigned sched_ctx_id, int min_prio) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); sched_ctx->min_priority = min_prio; return 0; } int starpu_sched_ctx_set_max_priority(unsigned sched_ctx_id, int max_prio) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); sched_ctx->max_priority = max_prio; return 0; } int starpu_sched_ctx_min_priority_is_set(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->min_priority_is_set; } int starpu_sched_ctx_max_priority_is_set(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->max_priority_is_set; } static void set_priority_on_notified_workers(int *workers, int nworkers, unsigned sched_ctx_id, unsigned priority) { if(nworkers != -1) { int w; struct _starpu_worker *worker = NULL; for(w = 0; w < nworkers; w++) { if (workers[w] >= (int) starpu_worker_get_count()) /* Combined worker, don't care */ continue; worker = _starpu_get_worker_struct(workers[w]); _starpu_sched_ctx_list_move(&worker->sched_ctx_list, sched_ctx_id, priority); } } } void starpu_sched_ctx_set_priority(int *workerids, int nworkers, unsigned sched_ctx_id, unsigned priority) { if(nworkers != -1) { notify_workers_about_changing_ctx_pending(nworkers, workerids); _starpu_sched_ctx_lock_write(sched_ctx_id); int w; for(w = 0; w < nworkers; w++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[w]); _starpu_sched_ctx_list_move(&worker->sched_ctx_list, sched_ctx_id, priority); } notify_workers_about_changing_ctx_done(nworkers, workerids); _starpu_sched_ctx_unlock_write(sched_ctx_id); } } unsigned starpu_sched_ctx_get_priority(int workerid, unsigned sched_ctx_id) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); return _starpu_sched_ctx_elt_get_priority(worker->sched_ctx_list, sched_ctx_id); } unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker) { /* The worker being checked must have its status set to sleeping during * the check, to allow for an other worker being checked concurrently * to make the safe, pessimistic assumption that it is the last worker * awake. In the worst case, both workers will follow this pessimistic * path and perform one more scheduling loop */ STARPU_HG_DISABLE_CHECKING(_starpu_config.workers[worker->workerid].status); STARPU_ASSERT(_starpu_config.workers[worker->workerid].status & STATUS_SLEEPING); STARPU_HG_ENABLE_CHECKING(_starpu_config.workers[worker->workerid].status); struct _starpu_sched_ctx_list_iterator list_it; _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); unsigned last_worker_awake = 1; struct starpu_worker_collection *workers = sched_ctx->workers; /* workers can be NULL in some corner cases, since we do not lock sched_ctx here */ if (workers != NULL) { struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int workerid = workers->get_next(workers, &it); if(workerid != worker->workerid) { if(starpu_worker_is_combined_worker(workerid)) { continue; } /* The worker status is intendedly checked * without taking locks. If multiple workers * are concurrently assessing whether they are * the last worker awake, they will follow the * pessimistic path and assume that they are * the last worker awake */ STARPU_HG_DISABLE_CHECKING(_starpu_config.workers[workerid].status); const int cond = !(_starpu_config.workers[workerid].status & STATUS_SLEEPING); STARPU_HG_ENABLE_CHECKING(_starpu_config.workers[workerid].status); if (cond) { last_worker_awake = 0; break; } } } } if(last_worker_awake) return 1; } return 0; } void starpu_sched_ctx_bind_current_thread_to_cpuid(unsigned cpuid) { _starpu_bind_thread_on_cpu(cpuid, STARPU_NOWORKERID, NULL); } unsigned starpu_sched_ctx_worker_is_master_for_child_ctx(int workerid, unsigned sched_ctx_id) { if (_starpu_get_nsched_ctxs() <= 1) return STARPU_NMAX_SCHED_CTXS; struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_sched_ctx_list_iterator list_it; _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); if(sched_ctx-> main_master == workerid && sched_ctx->nesting_sched_ctx == sched_ctx_id) return sched_ctx->id; } return STARPU_NMAX_SCHED_CTXS; } unsigned starpu_sched_ctx_master_get_context(int masterid) { struct _starpu_worker *worker = _starpu_get_worker_struct(masterid); struct _starpu_sched_ctx_list_iterator list_it; _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); if(sched_ctx->main_master == masterid) return sched_ctx->id; } return STARPU_NMAX_SCHED_CTXS; } struct _starpu_sched_ctx *__starpu_sched_ctx_get_sched_ctx_for_worker_and_job(struct _starpu_worker *worker, struct _starpu_job *j) { struct _starpu_sched_ctx_list_iterator list_it; struct _starpu_sched_ctx *ret = NULL; starpu_worker_lock(worker->workerid); _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); if (j->task->sched_ctx == sched_ctx->id) { ret = sched_ctx; break; } } starpu_worker_unlock(worker->workerid); return ret; } void starpu_sched_ctx_revert_task_counters_ctx_locked(unsigned sched_ctx_id, double ready_flops) { _starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx_id); _starpu_decrement_nready_tasks_of_sched_ctx_locked(sched_ctx_id, ready_flops); } void starpu_sched_ctx_revert_task_counters(unsigned sched_ctx_id, double ready_flops) { _starpu_decrement_nsubmitted_tasks_of_sched_ctx(sched_ctx_id); _starpu_decrement_nready_tasks_of_sched_ctx(sched_ctx_id, ready_flops); } void starpu_sched_ctx_move_task_to_ctx_locked(struct starpu_task *task, unsigned sched_ctx, unsigned with_repush) { /* Restore state just like out of dependency layers */ STARPU_ASSERT(task->status == STARPU_TASK_READY); task->status = STARPU_TASK_BLOCKED; /* TODO: make something cleaner which differentiates between calls from push or pop (have mutex or not) and from another worker or not */ task->sched_ctx = sched_ctx; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx); if(with_repush) _starpu_repush_task(j); else _starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task); } #if 0 void starpu_sched_ctx_move_task_to_ctx(struct starpu_task *task, unsigned sched_ctx, unsigned manage_mutex, unsigned with_repush) { /* TODO: make something cleaner which differentiates between calls from push or pop (have mutex or not) and from another worker or not */ int workerid = starpu_worker_get_id(); struct _starpu_worker *worker = NULL; if(workerid != -1 && manage_mutex) { worker = _starpu_get_worker_struct(workerid); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } task->sched_ctx = sched_ctx; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx); if(with_repush) _starpu_repush_task(j); else _starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task); if(workerid != -1 && manage_mutex) STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); } #endif void starpu_sched_ctx_list_task_counters_increment(unsigned sched_ctx_id, int workerid) { /* Note : often we don't have any sched_mutex taken here but we should, so take it */ struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); /* FIXME: why do we push events only when the worker belongs to more than one ctx? */ if (worker->nsched_ctxs > 1) { starpu_worker_lock(workerid); _starpu_sched_ctx_list_push_event(worker->sched_ctx_list, sched_ctx_id); starpu_worker_unlock(workerid); } } void starpu_sched_ctx_list_task_counters_decrement(unsigned sched_ctx_id, int workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->nsched_ctxs > 1) _starpu_sched_ctx_list_pop_event(worker->sched_ctx_list, sched_ctx_id); } void starpu_sched_ctx_list_task_counters_reset(unsigned sched_ctx_id, int workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->nsched_ctxs > 1) _starpu_sched_ctx_list_pop_all_event(worker->sched_ctx_list, sched_ctx_id); } void starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id) { /* TODO: add proper, but light-enough locking to sched_ctx counters */ /* Note that with 1 ctx we will default to the global context, hence our counters are useless */ if (_starpu_get_nsched_ctxs() > 1) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { int worker = workers->get_next(workers, &it); starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, worker); } } } void starpu_sched_ctx_list_task_counters_increment_all(struct starpu_task *task, unsigned sched_ctx_id) { /* TODO: add proper, but light-enough locking to sched_ctx counters */ /* Note that with 1 ctx we will default to the global context, hence our counters are useless */ if (_starpu_get_nsched_ctxs() > 1) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; _starpu_sched_ctx_lock_write(sched_ctx_id); workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { int worker = workers->get_next(workers, &it); starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, worker); } _starpu_sched_ctx_unlock_write(sched_ctx_id); } } void starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(struct starpu_task *task, unsigned sched_ctx_id) { if (_starpu_get_nsched_ctxs() > 1) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { int workerid = workers->get_next(workers, &it); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->nsched_ctxs > 1) { starpu_worker_lock(workerid); starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); starpu_worker_unlock(workerid); } } } } void starpu_sched_ctx_list_task_counters_decrement_all(struct starpu_task *task, unsigned sched_ctx_id) { if (_starpu_get_nsched_ctxs() > 1) { struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; _starpu_sched_ctx_lock_write(sched_ctx_id); workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { int workerid = workers->get_next(workers, &it); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->nsched_ctxs > 1) { starpu_worker_lock(workerid); starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); starpu_worker_unlock(workerid); } } _starpu_sched_ctx_unlock_write(sched_ctx_id); } } void starpu_sched_ctx_list_task_counters_reset_all(struct starpu_task *task, unsigned sched_ctx_id) { if (_starpu_get_nsched_ctxs() > 1) { _starpu_sched_ctx_lock_write(sched_ctx_id); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { int workerid = workers->get_next(workers, &it); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->nsched_ctxs > 1) { starpu_worker_lock(workerid); starpu_sched_ctx_list_task_counters_reset(sched_ctx_id, workerid); starpu_worker_unlock(workerid); } } _starpu_sched_ctx_unlock_write(sched_ctx_id); } } static void _starpu_sched_ctx_block_workers_in_parallel(unsigned sched_ctx_id, unsigned all) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); int current_worker_id = starpu_worker_get_id(); int master, temp_master = 0; struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_sched_ctx_iterator it; /* temporarily put a master if needed */ if (sched_ctx->main_master == -1) { _starpu_sched_ctx_put_new_master(sched_ctx_id); temp_master = 1; } master = sched_ctx->main_master; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int workerid = workers->get_next(workers, &it); if(starpu_worker_get_type(workerid) == STARPU_CPU_WORKER && (workerid != master || all) && (current_worker_id == -1 || workerid != current_worker_id)) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); _starpu_worker_request_blocking_in_parallel(worker); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } } if (temp_master) sched_ctx->main_master = -1; } static void _starpu_sched_ctx_unblock_workers_in_parallel(unsigned sched_ctx_id, unsigned all) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); int current_worker_id = starpu_worker_get_id(); int master, temp_master = 0; struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_sched_ctx_iterator it; /* temporarily put a master if needed */ if (sched_ctx->main_master == -1) { _starpu_sched_ctx_put_new_master(sched_ctx_id); temp_master = 1; } master = sched_ctx->main_master; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int workerid = workers->get_next(workers, &it); if(starpu_worker_get_type(workerid) == STARPU_CPU_WORKER && (workerid != master || all)) { if (current_worker_id == -1 || workerid != current_worker_id) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex); _starpu_worker_request_unblocking_in_parallel(worker); STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex); } } } if (temp_master) sched_ctx->main_master = -1; return; } void* starpu_sched_ctx_exec_parallel_code(void* (*func)(void*), void* param, unsigned sched_ctx_id) { _starpu_sched_ctx_block_workers_in_parallel(sched_ctx_id, 1); /* execute parallel code */ void* ret = func(param); /* wake up starpu workers */ _starpu_sched_ctx_unblock_workers_in_parallel(sched_ctx_id, 1); return ret; } static void _starpu_sched_ctx_update_parallel_workers_with(unsigned sched_ctx_id) { struct _starpu_sched_ctx * sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if(sched_ctx->sched_policy) return; _starpu_sched_ctx_put_new_master(sched_ctx_id); if(!sched_ctx->awake_workers) { _starpu_sched_ctx_block_workers_in_parallel(sched_ctx_id, 0); } } static void _starpu_sched_ctx_update_parallel_workers_without(unsigned sched_ctx_id) { struct _starpu_sched_ctx * sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if(sched_ctx->sched_policy) return; _starpu_sched_ctx_put_new_master(sched_ctx_id); if(!sched_ctx->awake_workers) { _starpu_sched_ctx_unblock_workers_in_parallel(sched_ctx_id, 0); } } void starpu_sched_ctx_get_available_cpuids(unsigned sched_ctx_id, int **cpuids, int *ncpuids) { int current_worker_id = starpu_worker_get_id(); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); struct starpu_worker_collection *workers = sched_ctx->workers; _STARPU_MALLOC((*cpuids), workers->nworkers*sizeof(int)); int w = 0; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int workerid = workers->get_next(workers, &it); int master = sched_ctx->main_master; if(master == current_worker_id || workerid == current_worker_id || current_worker_id == -1) { (*cpuids)[w++] = starpu_worker_get_bindid(workerid); } } *ncpuids = w; return; } static void _starpu_sched_ctx_put_new_master(unsigned sched_ctx_id) { int *workerids; struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); unsigned nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &workerids); unsigned i; for (i=0; imain_master = workerids[i]; break; } } STARPU_ASSERT_MSG(iperf_arch; } int starpu_sched_ctx_get_worker_rank(unsigned sched_ctx_id) { int idx = 0; int curr_workerid = starpu_worker_get_id(); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if(sched_ctx->sched_policy || !sched_ctx->awake_workers) return -1; struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int worker = workers->get_next(workers, &it); if(worker == curr_workerid) return idx; idx++; } return -1; } void (*starpu_sched_ctx_get_sched_policy_callback(unsigned sched_ctx_id))(unsigned) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return sched_ctx->callback_sched; } unsigned starpu_sched_ctx_has_starpu_scheduler(unsigned sched_ctx_id, unsigned *awake_workers) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); *awake_workers = sched_ctx->awake_workers; return sched_ctx->sched_policy != NULL; } void *starpu_sched_ctx_get_user_data(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_ASSERT(sched_ctx != NULL); return sched_ctx->user_data; } void starpu_sched_ctx_set_user_data(unsigned sched_ctx_id, void* user_data) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_ASSERT(sched_ctx != NULL); sched_ctx->user_data = user_data; } void _starpu_worker_apply_deferred_ctx_changes(void) { int workerid = starpu_worker_get_id_check(); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_ctx_change_list *l = &worker->ctx_change_list; STARPU_ASSERT(!_starpu_worker_sched_op_pending()); while (!_starpu_ctx_change_list_empty(l)) { struct _starpu_ctx_change *chg = _starpu_ctx_change_list_pop_front(l); STARPU_ASSERT(chg->workerids_to_change != NULL); if (chg->nworkers_to_notify) { STARPU_ASSERT(chg->workerids_to_notify != NULL); notify_workers_about_changing_ctx_pending(chg->nworkers_to_notify, chg->workerids_to_notify); } else { STARPU_ASSERT(chg->workerids_to_notify == NULL); notify_workers_about_changing_ctx_pending(chg->nworkers_to_change, chg->workerids_to_change); } _starpu_sched_ctx_lock_write(chg->sched_ctx_id); switch (chg->op) { case ctx_change_add: { add_notified_workers(chg->workerids_to_change, chg->nworkers_to_change, chg->sched_ctx_id); } break; case ctx_change_remove: { remove_notified_workers(chg->workerids_to_change, chg->nworkers_to_change, chg->sched_ctx_id); { int i; for (i = 0; i < chg->nworkers_to_change; i++) { struct _starpu_worker *w = _starpu_get_worker_struct(chg->workerids_to_change[i]); if(w->removed_from_ctx[chg->sched_ctx_id] == 1 && w->shares_tasks_lists[chg->sched_ctx_id] == 1) { _starpu_worker_gets_out_of_ctx(chg->sched_ctx_id, w); w->removed_from_ctx[chg->sched_ctx_id] = 0; } } } } break; default: STARPU_ASSERT_MSG(0, "invalid ctx change opcode\n"); } if (chg->nworkers_to_notify) { notify_workers_about_changing_ctx_done(chg->nworkers_to_notify, chg->workerids_to_notify); } else { notify_workers_about_changing_ctx_done(chg->nworkers_to_change, chg->workerids_to_change); } _starpu_sched_ctx_unlock_write(chg->sched_ctx_id); free(chg->workerids_to_notify); free(chg->workerids_to_change); _starpu_ctx_change_delete(chg); } } /* * TODO: verify starpu_sched_ctx_create_inside_interval correctness before re-enabling the functions below */ #if 0 static void _get_workers(int min, int max, int *workers, int *nw, enum starpu_worker_archtype arch, unsigned allow_overlap) { int pus[max]; int npus = 0; int i; struct _starpu_machine_config *config = _starpu_get_machine_config(); if(config->topology.nsched_ctxs == 1) { /*we have all available resources */ npus = _starpu_worker_get_nids_by_type(arch, pus, max); /*TODO: hierarchical ctxs: get max good workers: close one to another */ for(i = 0; i < npus; i++) workers[(*nw)++] = pus[i]; } else { unsigned enough_resources = 0; npus = _starpu_worker_get_nids_ctx_free_by_type(arch, pus, max); for(i = 0; i < npus; i++) workers[(*nw)++] = pus[i]; if(npus == max) /*we have enough available resources */ enough_resources = 1; if(!enough_resources && npus >= min) /*we have enough available resources */ enough_resources = 1; if(!enough_resources) { /* try to get resources from ctx who have more than the min of workers they need */ int s; for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) { int _npus = 0; int _pus[STARPU_NMAXWORKERS]; _npus = _starpu_get_workers_of_sched_ctx(config->sched_ctxs[s].id, _pus, arch); int ctx_min = arch == STARPU_CPU_WORKER ? config->sched_ctxs[s].min_ncpus : config->sched_ctxs[s].min_ngpus; if(_npus > ctx_min) { int n=0; if(npus < min) { n = (_npus - ctx_min) > (min - npus) ? min - npus : (_npus - ctx_min); npus += n; } /*TODO: hierarchical ctxs: get n good workers: close to the other ones I already assigned to the ctx */ for(i = 0; i < n; i++) workers[(*nw)++] = _pus[i]; starpu_sched_ctx_remove_workers(_pus, n, config->sched_ctxs[s].id); } } } if(npus >= min) enough_resources = 1; } if(!enough_resources) { /* if there is no available workers to satisfy the minimum required give them workers proportional to their requirements*/ int global_npus = starpu_worker_get_count_by_type(arch); int req_npus = 0; int s; for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++) if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) req_npus += arch == STARPU_CPU_WORKER ? config->sched_ctxs[s].min_ncpus : config->sched_ctxs[s].min_ngpus; req_npus += min; for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) { int ctx_min = arch == STARPU_CPU_WORKER ? config->sched_ctxs[s].min_ncpus : config->sched_ctxs[s].min_ngpus; double needed_npus = ((double)ctx_min * (double)global_npus) / (double)req_npus; int _npus = 0; int _pus[STARPU_NMAXWORKERS]; _npus = _starpu_get_workers_of_sched_ctx(config->sched_ctxs[s].id, _pus, arch); if(needed_npus < (double)_npus) { double npus_to_rem = (double)_npus - needed_npus; int x = floor(npus_to_rem); double x_double = (double)x; double diff = npus_to_rem - x_double; int npus_to_remove = diff >= 0.5 ? x+1 : x; int pus_to_remove[npus_to_remove]; int c = 0; /*TODO: hierarchical ctxs: get npus_to_remove good workers: close to the other ones I already assigned to the ctx */ for(i = _npus-1; i >= (_npus - npus_to_remove); i--) { workers[(*nw)++] = _pus[i]; pus_to_remove[c++] = _pus[i]; } if(!allow_overlap) starpu_sched_ctx_remove_workers(pus_to_remove, npus_to_remove, config->sched_ctxs[s].id); } } } } } } unsigned starpu_sched_ctx_create_inside_interval(const char *policy_name, const char *sched_ctx_name, int min_ncpus, int max_ncpus, int min_ngpus, int max_ngpus, unsigned allow_overlap) { struct _starpu_machine_config *config = _starpu_get_machine_config(); struct starpu_sched_policy *selected_policy = _starpu_select_sched_policy(config, policy_name); struct _starpu_sched_ctx *sched_ctx = NULL; int workers[max_ncpus + max_ngpus]; int nw = 0; STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx_manag); _get_workers(min_ncpus, max_ncpus, workers, &nw, STARPU_CPU_WORKER, allow_overlap); _get_workers(min_ngpus, max_ngpus, workers, &nw, STARPU_CUDA_WORKER, allow_overlap); STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx_manag); int i; _STARPU_DEBUG("%d: ", nw); for(i = 0; i < nw; i++) _STARPU_DEBUG_NO_HEADER("%d ", workers[i]); _STARPU_DEBUG_NO_HEADER("\n"); sched_ctx = _starpu_create_sched_ctx(selected_policy, workers, nw, 0, sched_ctx_name, 0, 0, 0, 0, 1, NULL, NULL,0, NULL, 0); sched_ctx->min_ncpus = min_ncpus; sched_ctx->max_ncpus = max_ncpus; sched_ctx->min_ngpus = min_ngpus; sched_ctx->max_ngpus = max_ngpus; int *added_workerids; unsigned nw_ctx = starpu_sched_ctx_get_workers_list(sched_ctx->id, &added_workerids); #warning TODO: verify call below, shouldn t it be _starpu_update_workers_with_ctx? _starpu_update_workers_without_ctx(added_workerids, nw_ctx, sched_ctx->id, 0); free(added_workerids); #ifdef STARPU_USE_SC_HYPERVISOR sched_ctx->perf_counters = NULL; #endif return sched_ctx->id; } #endif starpu-1.4.9+dfsg/src/core/sched_ctx.h000066400000000000000000000275571507764646700176640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SCHED_CONTEXT_H__ #define __SCHED_CONTEXT_H__ /** @file */ #include #include #include #include #include #include #include #include #include #include #include "sched_ctx_list.h" #ifdef STARPU_HAVE_HWLOC #include #endif #pragma GCC visibility push(hidden) #define NO_RESIZE -1 #define REQ_RESIZE 0 #define DO_RESIZE 1 #define STARPU_GLOBAL_SCHED_CTX 0 #define STARPU_NMAXSMS 13 struct _starpu_sched_ctx { /** id of the context used in user mode*/ unsigned id; /** boolean indicating whether the scheduling_ctx will be considered for scheduling (1) or not (0)*/ unsigned do_schedule; /** name of context */ const char *name; /** policy of the context */ struct starpu_sched_policy *sched_policy; /** data necessary for the policy */ void *policy_data; /** pointer for application use */ void *user_data; struct starpu_worker_collection *workers; /** we keep an initial sched which we never delete */ unsigned is_initial_sched; /** wait for the tasks submitted to the context to be executed */ struct _starpu_barrier_counter tasks_barrier; /** wait for the tasks ready of the context to be executed */ struct _starpu_barrier_counter ready_tasks_barrier; /** amount of ready flops in a context */ double ready_flops; /** Iteration number, as advertised by application */ long iterations[2]; int iteration_level; /*ready tasks that couldn't be pushed because the ctx has no workers*/ struct starpu_task_list empty_ctx_tasks; /*ready tasks that couldn't be pushed because the the window of tasks was already full*/ struct starpu_task_list waiting_tasks; /** min CPUs to execute*/ int min_ncpus; /** max CPUs to execute*/ int max_ncpus; /** min GPUs to execute*/ int min_ngpus; /** max GPUs to execute*/ int max_ngpus; /** in case we delete the context leave resources to the inheritor*/ unsigned inheritor; /** indicates whether the application finished submitting tasks to this context*/ unsigned finished_submit; /** By default we have a binary type of priority: either a task is a priority * task (level 1) or it is not (level 0). */ int min_priority; int max_priority; int min_priority_is_set; int max_priority_is_set; /** hwloc tree structure of workers */ #ifdef STARPU_HAVE_HWLOC hwloc_bitmap_t hwloc_workers_set; #endif #ifdef STARPU_USE_SC_HYPERVISOR /** a structure containing a series of performance counters determining the resize procedure */ struct starpu_sched_ctx_performance_counters *perf_counters; #endif //STARPU_USE_SC_HYPERVISOR /** callback called when the context finished executed its submitted tasks */ void (*close_callback)(unsigned sched_ctx_id, void* args); void *close_args; /** value placing the contexts in their hierarchy */ unsigned hierarchy_level; /** if we execute non-StarPU code inside the context we have a single master worker that stays awake, if not master is -1 */ int main_master; /** ctx nesting the current ctx */ unsigned nesting_sched_ctx; /** perf model for the device comb of the ctx */ struct starpu_perfmodel_arch perf_arch; /** For parallel workers, say whether it is viewed as sequential or not. This is a helper for the prologue code. */ unsigned parallel_view; /** for ctxs without policy: flag to indicate that we want to get the threads to sleep in order to replace them with other threads or leave them awake & use them in the parallel code*/ unsigned awake_workers; /** callback function called when initializing the scheduler */ void (*callback_sched)(unsigned); int sub_ctxs[STARPU_NMAXWORKERS]; int nsub_ctxs; /** nr of SMs assigned to this ctx if we partition gpus*/ int nsms; int sms_start_idx; int sms_end_idx; int stream_worker; starpu_pthread_rwlock_t rwlock; starpu_pthread_t lock_write_owner; }; /** per-worker list of deferred ctx_change ops */ LIST_TYPE(_starpu_ctx_change, int sched_ctx_id; int op; int nworkers_to_notify; int *workerids_to_notify; int nworkers_to_change; int *workerids_to_change; ); struct _starpu_machine_config; /** init sched_ctx_id of all contextes*/ void _starpu_init_all_sched_ctxs(struct _starpu_machine_config *config); /** allocate all structures belonging to a context */ struct _starpu_sched_ctx* _starpu_create_sched_ctx(struct starpu_sched_policy *policy, int *workerid, int nworkerids, unsigned is_init_sched, const char *sched_name, int min_prio_set, int min_prio, int max_prio_set, int max_prio, unsigned awake_workers, void (*sched_policy_callback)(unsigned), void *user_data, int nsub_ctxs, int *sub_ctxs, int nsms); /** delete all sched_ctx */ void _starpu_delete_all_sched_ctxs(); /** This function waits until all the tasks that were already submitted to a specific * context have been executed. */ int _starpu_wait_for_all_tasks_of_sched_ctx(unsigned sched_ctx_id); /** This function waits until at most n tasks are still submitted. */ int _starpu_wait_for_n_submitted_tasks_of_sched_ctx(unsigned sched_ctx_id, unsigned n); /** In order to implement starpu_wait_for_all_tasks_of_ctx, we keep track of the number of * task currently submitted to the context */ void _starpu_decrement_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); void _starpu_increment_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); int _starpu_get_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); int _starpu_check_nsubmitted_tasks_of_sched_ctx(unsigned sched_ctx_id); void _starpu_decrement_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops); unsigned _starpu_increment_nready_tasks_of_sched_ctx(unsigned sched_ctx_id, double ready_flops, struct starpu_task *task); int _starpu_wait_for_no_ready_of_sched_ctx(unsigned sched_ctx_id); /** Get workers belonging to a certain context, it returns the number * of workers take care: no mutex taken, the list of workers might not * be updated */ int _starpu_get_workers_of_sched_ctx(unsigned sched_ctx_id, int *pus, enum starpu_worker_archtype arch); /** Let the worker know it does not belong to the context and that it * should stop poping from it */ void _starpu_worker_gets_out_of_ctx(unsigned sched_ctx_id, struct _starpu_worker *worker); /** Check if the worker belongs to another sched_ctx */ unsigned _starpu_worker_belongs_to_a_sched_ctx(int workerid, unsigned sched_ctx_id); /** indicates whether this worker should go to sleep or not (if it is * the last one awake in a context he should better keep awake) */ unsigned _starpu_sched_ctx_last_worker_awake(struct _starpu_worker *worker); /** If starpu_sched_ctx_set_context() has been called, returns the context * id set by its last call, or the id of the initial context */ unsigned _starpu_sched_ctx_get_current_context() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** verify that some worker can execute a certain task */ int _starpu_workers_able_to_execute_task(struct starpu_task *task, struct _starpu_sched_ctx *sched_ctx); unsigned _starpu_sched_ctx_allow_hypervisor(unsigned sched_ctx_id); struct starpu_perfmodel_arch * _starpu_sched_ctx_get_perf_archtype(unsigned sched_ctx); #ifdef STARPU_USE_SC_HYPERVISOR /** Notifies the hypervisor that a tasks was poped from the workers' list */ void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, size_t data_size, uint32_t footprint); #endif //STARPU_USE_SC_HYPERVISOR void starpu_sched_ctx_add_combined_workers(int *combined_workers_to_add, unsigned n_combined_workers_to_add, unsigned sched_ctx_id); /** if the worker is the master of a parallel context, and the job is meant to be executed on this parallel context, return a pointer to the context */ struct _starpu_sched_ctx *__starpu_sched_ctx_get_sched_ctx_for_worker_and_job(struct _starpu_worker *worker, struct _starpu_job *j); #define _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(w,j) \ (_starpu_get_nsched_ctxs() <= 1 ? _starpu_get_sched_ctx_struct(0) : __starpu_sched_ctx_get_sched_ctx_for_worker_and_job((w),(j))) static inline struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id); static inline int _starpu_sched_ctx_check_write_locked(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); return starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()); } #define STARPU_SCHED_CTX_CHECK_LOCK(sched_ctx_id) STARPU_ASSERT(_starpu_sched_ctx_check_write_locked((sched_ctx_id))) static inline void _starpu_sched_ctx_lock_write(unsigned sched_ctx_id) { STARPU_ASSERT(sched_ctx_id <= STARPU_NMAX_SCHED_CTXS); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_PTHREAD_RWLOCK_WRLOCK(&sched_ctx->rwlock); sched_ctx->lock_write_owner = starpu_pthread_self(); } static inline void _starpu_sched_ctx_unlock_write(unsigned sched_ctx_id) { STARPU_ASSERT(sched_ctx_id <= STARPU_NMAX_SCHED_CTXS); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_ASSERT(starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); memset(&sched_ctx->lock_write_owner, 0, sizeof(sched_ctx->lock_write_owner)); STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock); } static inline void _starpu_sched_ctx_lock_read(unsigned sched_ctx_id) { STARPU_ASSERT(sched_ctx_id <= STARPU_NMAX_SCHED_CTXS); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_PTHREAD_RWLOCK_RDLOCK(&sched_ctx->rwlock); } static inline void _starpu_sched_ctx_unlock_read(unsigned sched_ctx_id) { STARPU_ASSERT(sched_ctx_id <= STARPU_NMAX_SCHED_CTXS); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self())); STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner); STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock); } static inline unsigned _starpu_sched_ctx_worker_is_master_for_child_ctx(unsigned sched_ctx_id, unsigned workerid, struct starpu_task *task) { unsigned child_sched_ctx = starpu_sched_ctx_worker_is_master_for_child_ctx(workerid, sched_ctx_id); if(child_sched_ctx != STARPU_NMAX_SCHED_CTXS) { starpu_sched_ctx_move_task_to_ctx_locked(task, child_sched_ctx, 1); starpu_sched_ctx_revert_task_counters_ctx_locked(sched_ctx_id, task->flops); return 1; } return 0; } /** Go through the list of deferred ctx changes of the current worker and apply * any ctx change operation found until the list is empty */ void _starpu_worker_apply_deferred_ctx_changes(void); #pragma GCC visibility pop #endif // __SCHED_CONTEXT_H__ starpu-1.4.9+dfsg/src/core/sched_ctx_list.c000066400000000000000000000233271507764646700207010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "sched_ctx_list.h" struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_find(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { struct _starpu_sched_ctx_list *l = NULL; struct _starpu_sched_ctx_elt *e = NULL; unsigned found = 0; for (l = list; l && !found; l=l->next) { e=l->head; //Go in a circle once before stopping do { if (e->sched_ctx == sched_ctx) { found = 1; break; } e = e->next; } while (e != l->head); } return found ? e : NULL; } void _starpu_sched_ctx_elt_init(struct _starpu_sched_ctx_elt *elt, unsigned sched_ctx) { elt->sched_ctx = sched_ctx; elt->task_number = 0; elt->last_poped = 0; elt->parent = NULL; elt->next = NULL; elt->prev = NULL; } void _starpu_sched_ctx_elt_ensure_consistency(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(list, sched_ctx); if (elt && elt->task_number>0) elt->task_number = 0; } /* Adds a new element after the head of the given list. */ struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_after(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { struct _starpu_sched_ctx_elt *head, *next; struct _starpu_sched_ctx_elt *elt; _STARPU_MALLOC(elt, sizeof(struct _starpu_sched_ctx_elt)); _starpu_sched_ctx_elt_init(elt, sched_ctx); elt->parent = list; head = list->head; if (head != NULL) { next = head->next; head->next = elt; elt->prev = head; /** We know next != NULL since it is at least head **/ elt->next = next; next->prev = elt; } else { elt->next = elt; elt->prev = elt; list->head = elt; } return elt; } /* Adds a new element before the head of the given list. */ struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_before(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { struct _starpu_sched_ctx_elt *head, *prev; struct _starpu_sched_ctx_elt *elt; _STARPU_MALLOC(elt, sizeof(struct _starpu_sched_ctx_elt)); _starpu_sched_ctx_elt_init(elt, sched_ctx); elt->parent = list; head = list->head; if (head != NULL) { prev = head->prev; head->prev = elt; elt->next = head; elt->prev = prev; prev->next = elt; } else { elt->next = elt; elt->prev = elt; list->head = elt; } return elt; } struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { return _starpu_sched_ctx_elt_add_after(list, sched_ctx); } /* Remove elt from list */ void _starpu_sched_ctx_elt_remove(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_elt *elt) { elt->prev->next = elt->next; elt->next->prev = elt->prev; if (elt->next == elt) //singleton list->head = NULL; else if (elt->next != elt && list->head == elt) list->head = elt->next; free(elt); return; } int _starpu_sched_ctx_elt_exists(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { struct _starpu_sched_ctx_elt *e; e = _starpu_sched_ctx_elt_find(list, sched_ctx); return (e == NULL) ? 0 : 1; } int _starpu_sched_ctx_elt_get_priority(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { struct _starpu_sched_ctx_elt *e; e = _starpu_sched_ctx_elt_find(list, sched_ctx); return (e == NULL) ? 0 : e->parent->priority; } struct _starpu_sched_ctx_list* _starpu_sched_ctx_list_find(struct _starpu_sched_ctx_list *list, unsigned prio) { struct _starpu_sched_ctx_list *l = NULL; for (l = list; l != NULL ; l=l->next) { if (l->priority == prio) break; } return l; } /* Adds sched_ctx in a priority list. We consider that we don't add two times * the same sched_ctx. Returns head of list. */ struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_add_prio(struct _starpu_sched_ctx_list **list, unsigned prio, unsigned sched_ctx) { struct _starpu_sched_ctx_list *parent_list = NULL, *prev = NULL, *last = NULL; struct _starpu_sched_ctx_list *l; for (l = *list; l != NULL; l=l->next) { if (l->priority <= prio) break; last = l; } if (l != NULL && l->priority == prio) { parent_list = l; } else //l's priority is inferior or inexistent, add before { _STARPU_MALLOC(parent_list, sizeof(struct _starpu_sched_ctx_list)); parent_list->priority = prio; parent_list->next = l; parent_list->head = NULL; parent_list->prev = NULL; if (l != NULL) { prev = l->prev; l->prev = parent_list; if (prev != NULL) { prev->next = parent_list; parent_list->prev = prev; } else { *list = parent_list; } } else { if (last == NULL) { *list = parent_list; } else { last->next = parent_list; parent_list->prev = last; } } } return _starpu_sched_ctx_elt_add(parent_list, sched_ctx); } int _starpu_sched_ctx_list_add(struct _starpu_sched_ctx_list **list, unsigned sched_ctx) { return _starpu_sched_ctx_list_add_prio(list, 0, sched_ctx) != NULL ? 0 : -1; } void _starpu_sched_ctx_list_remove_elt(struct _starpu_sched_ctx_list **list, struct _starpu_sched_ctx_elt *rm) { struct _starpu_sched_ctx_list *parent; parent = rm->parent; _starpu_sched_ctx_elt_remove(parent, rm); /* Automatically clean up useless prio list */ if (parent->head == NULL) { if (parent->prev == NULL) { *list = parent->next; if (parent->next != NULL) parent->next->prev = NULL; } else { parent->prev->next = parent->next; if (parent->next != NULL) parent->next->prev = parent->prev; } free(parent); parent = NULL; } return; } /* Searches for a context and remove it */ int _starpu_sched_ctx_list_remove(struct _starpu_sched_ctx_list **list, unsigned sched_ctx) { struct _starpu_sched_ctx_elt *rm; rm = _starpu_sched_ctx_elt_find(*list, sched_ctx); if (rm == NULL) return -1; _starpu_sched_ctx_list_remove_elt(list, rm); return 0; } int _starpu_sched_ctx_list_move(struct _starpu_sched_ctx_list **list, unsigned sched_ctx, unsigned prio_to) { struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(*list, sched_ctx); long task_number = 0; if (elt == NULL) return -1; task_number = elt->task_number; _starpu_sched_ctx_list_remove_elt(list, elt); elt = _starpu_sched_ctx_list_add_prio(list, prio_to, sched_ctx); elt->task_number = task_number; return 0; } int _starpu_sched_ctx_list_exists(struct _starpu_sched_ctx_list *list, unsigned prio) { struct _starpu_sched_ctx_list *l; l = _starpu_sched_ctx_list_find(list, prio); return ((l == NULL && list->priority == prio) || l != NULL) ? 1 : 0; } void _starpu_sched_ctx_list_remove_all(struct _starpu_sched_ctx_list *list) { while (list->head != NULL) _starpu_sched_ctx_elt_remove(list, list->head); free(list); } void _starpu_sched_ctx_list_delete(struct _starpu_sched_ctx_list **list) { while(*list) { struct _starpu_sched_ctx_list *next = (*list)->next; _starpu_sched_ctx_list_remove_all(*list); *list = NULL; if(next) *list = next; } } int _starpu_sched_ctx_list_iterator_init(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_list_iterator *it) { it->list_head = list; it->cursor = NULL; return 0; } int _starpu_sched_ctx_list_iterator_has_next(struct _starpu_sched_ctx_list_iterator *it) { if (it->cursor == NULL) { if (it->list_head != NULL) return it->list_head->head != NULL; else return 0; } else { struct _starpu_sched_ctx_list *parent = it->cursor->parent; if (it->cursor->next == parent->head) return parent->next != NULL; } return 1; } struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_iterator_get_next(struct _starpu_sched_ctx_list_iterator *it) { struct _starpu_sched_ctx_elt *ret=NULL, *current; struct _starpu_sched_ctx_list *parent; current = it->cursor; if (current != NULL) { parent = it->cursor->parent; if (current->next == parent->head) { if (parent->next != NULL) { it->cursor = parent->next->head; ret = it->cursor; } else { /* if everything fails (e.g. worker removed from ctx since related has_next call) just return head, it'll save us a synchro */ it->cursor = NULL; ret = it->list_head->head; } } else { it->cursor = current->next; ret = it->cursor; } } else { it->cursor = it->list_head->head; ret = it->cursor; } return ret; } int _starpu_sched_ctx_list_push_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(list, sched_ctx); if (elt == NULL) return -1; elt->task_number++; return 0; } int _starpu_sched_ctx_list_pop_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(list, sched_ctx); if (elt == NULL) return -1; elt->task_number--; /** Balance circular lists **/ elt->parent->head = elt->next; return 0; } int _starpu_sched_ctx_list_pop_all_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) { struct _starpu_sched_ctx_elt *elt = _starpu_sched_ctx_elt_find(list, sched_ctx); if (elt == NULL) return -1; elt->task_number = 0; /** Balance circular lists **/ elt->parent->head = elt->next; return 0; } starpu-1.4.9+dfsg/src/core/sched_ctx_list.h000066400000000000000000000114551507764646700207050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SCHED_CONTEXT_LIST_H__ #define __SCHED_CONTEXT_LIST_H__ #pragma GCC visibility push(hidden) /** @file */ /** Represents a non circular list of priorities and contains a list of sched context */ struct _starpu_sched_ctx_elt; struct _starpu_sched_ctx_list { struct _starpu_sched_ctx_list *prev; struct _starpu_sched_ctx_list *next; struct _starpu_sched_ctx_elt *head; unsigned priority; }; /** Represents a circular list of sched context. */ struct _starpu_sched_ctx_elt { struct _starpu_sched_ctx_elt *prev; struct _starpu_sched_ctx_elt *next; struct _starpu_sched_ctx_list *parent; unsigned sched_ctx; long task_number; unsigned last_poped; }; struct _starpu_sched_ctx_list_iterator { struct _starpu_sched_ctx_list *list_head; struct _starpu_sched_ctx_elt *cursor; }; /** Element (sched_ctx) level operations */ struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_find(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; void _starpu_sched_ctx_elt_ensure_consistency(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; void _starpu_sched_ctx_elt_init(struct _starpu_sched_ctx_elt *elt, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_after(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add_before(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; struct _starpu_sched_ctx_elt* _starpu_sched_ctx_elt_add(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; void _starpu_sched_ctx_elt_remove(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_elt *elt) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_sched_ctx_elt_exists(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_sched_ctx_elt_get_priority(struct _starpu_sched_ctx_list *list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** List (priority) level operations */ struct _starpu_sched_ctx_list* _starpu_sched_ctx_list_find(struct _starpu_sched_ctx_list *list, unsigned prio) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_add_prio(struct _starpu_sched_ctx_list **list, unsigned prio, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_sched_ctx_list_add(struct _starpu_sched_ctx_list **list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; void _starpu_sched_ctx_list_remove_elt(struct _starpu_sched_ctx_list **list, struct _starpu_sched_ctx_elt *rm) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_sched_ctx_list_remove(struct _starpu_sched_ctx_list **list, unsigned sched_ctx) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_sched_ctx_list_move(struct _starpu_sched_ctx_list **list, unsigned sched_ctx, unsigned prio_to) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_sched_ctx_list_exists(struct _starpu_sched_ctx_list *list, unsigned prio) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; void _starpu_sched_ctx_list_remove_all(struct _starpu_sched_ctx_list *list) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; void _starpu_sched_ctx_list_delete(struct _starpu_sched_ctx_list **list) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** Task number management */ int _starpu_sched_ctx_list_push_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx); int _starpu_sched_ctx_list_pop_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx); int _starpu_sched_ctx_list_pop_all_event(struct _starpu_sched_ctx_list *list, unsigned sched_ctx); /** Iterator operations */ int _starpu_sched_ctx_list_iterator_init(struct _starpu_sched_ctx_list *list, struct _starpu_sched_ctx_list_iterator *it) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _starpu_sched_ctx_list_iterator_has_next(struct _starpu_sched_ctx_list_iterator *it) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; struct _starpu_sched_ctx_elt* _starpu_sched_ctx_list_iterator_get_next(struct _starpu_sched_ctx_list_iterator *it) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; #pragma GCC visibility pop #endif // __SCHED_CONTEXT_H__ starpu-1.4.9+dfsg/src/core/sched_policy.c000066400000000000000000001151201507764646700203400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #ifdef HAVE_DLOPEN #include #endif static int use_prefetch = 0; static double idle[STARPU_NMAXWORKERS]; static double idle_start[STARPU_NMAXWORKERS]; long _starpu_task_break_on_push = -1; long _starpu_task_break_on_sched = -1; long _starpu_task_break_on_pop = -1; long _starpu_task_break_on_exec = -1; static const char *starpu_idle_file; static void *dl_sched_handle = NULL; static const char *sched_lib = NULL; void _starpu_sched_init(void) { _starpu_task_break_on_push = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_PUSH", -1); _starpu_task_break_on_sched = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_SCHED", -1); _starpu_task_break_on_pop = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_POP", -1); _starpu_task_break_on_exec = starpu_getenv_number_default("STARPU_TASK_BREAK_ON_EXEC", -1); starpu_idle_file = starpu_getenv("STARPU_IDLE_FILE"); } int starpu_get_prefetch_flag(void) { return use_prefetch; } static struct starpu_sched_policy *predefined_policies[] = { &_starpu_sched_modular_eager_policy, &_starpu_sched_modular_eager_prefetching_policy, &_starpu_sched_modular_eager_prio_policy, &_starpu_sched_modular_gemm_policy, &_starpu_sched_modular_prio_policy, &_starpu_sched_modular_prio_prefetching_policy, &_starpu_sched_modular_random_policy, &_starpu_sched_modular_random_prio_policy, &_starpu_sched_modular_random_prefetching_policy, &_starpu_sched_modular_random_prio_prefetching_policy, &_starpu_sched_modular_parallel_random_policy, &_starpu_sched_modular_parallel_random_prio_policy, &_starpu_sched_modular_ws_policy, &_starpu_sched_modular_dmda_policy, &_starpu_sched_modular_dmdap_policy, &_starpu_sched_modular_dmdar_policy, &_starpu_sched_modular_dmdas_policy, &_starpu_sched_modular_heft_policy, &_starpu_sched_modular_heft_prio_policy, &_starpu_sched_modular_heft2_policy, &_starpu_sched_modular_heteroprio_policy, &_starpu_sched_modular_heteroprio_heft_policy, &_starpu_sched_modular_parallel_heft_policy, &_starpu_sched_eager_policy, &_starpu_sched_prio_policy, &_starpu_sched_random_policy, &_starpu_sched_lws_policy, &_starpu_sched_ws_policy, &_starpu_sched_dm_policy, &_starpu_sched_dmda_policy, &_starpu_sched_dmda_prio_policy, &_starpu_sched_dmda_ready_policy, &_starpu_sched_dmda_sorted_policy, &_starpu_sched_dmda_sorted_decision_policy, &_starpu_sched_parallel_heft_policy, &_starpu_sched_peager_policy, &_starpu_sched_heteroprio_policy, &_starpu_sched_graph_test_policy, #ifdef STARPU_HAVE_HWLOC //&_starpu_sched_tree_heft_hierarchical_policy, #endif NULL }; struct starpu_sched_policy **starpu_sched_get_predefined_policies() { return predefined_policies; } struct starpu_sched_policy *_starpu_get_sched_policy(struct _starpu_sched_ctx *sched_ctx) { return sched_ctx->sched_policy; } struct starpu_sched_policy *starpu_sched_get_sched_policy_in_ctx(unsigned sched_ctx_id) { struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_sched_ctx *sched_ctx = &config->sched_ctxs[sched_ctx_id]; return sched_ctx->sched_policy; } struct starpu_sched_policy *starpu_sched_get_sched_policy(void) { unsigned nsched_ctxs = _starpu_get_nsched_ctxs(); unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context(); return starpu_sched_get_sched_policy_in_ctx(sched_ctx_id); } /* * Methods to initialize the scheduling policy */ static void load_sched_policy(struct starpu_sched_policy *sched_policy, struct _starpu_sched_ctx *sched_ctx) { STARPU_ASSERT(sched_policy); #ifdef STARPU_VERBOSE if (sched_policy->policy_name) { if (sched_policy->policy_description) _STARPU_DEBUG("Use %s scheduler (%s)\n", sched_policy->policy_name, sched_policy->policy_description); else _STARPU_DEBUG("Use %s scheduler \n", sched_policy->policy_name); } #endif *(sched_ctx->sched_policy) = *sched_policy; } static void load_sched_lib() { /* check if the requested policy can be loaded dynamically */ sched_lib = starpu_getenv("STARPU_SCHED_LIB"); if (sched_lib) { #ifdef HAVE_DLOPEN if (dl_sched_handle) { dlclose(dl_sched_handle); dl_sched_handle = NULL; } dl_sched_handle = dlopen(sched_lib, RTLD_NOW); if (!dl_sched_handle) _STARPU_MSG("Warning: scheduling dynamic library '%s' can not be loaded\n", sched_lib); #else _STARPU_MSG("Environment variable 'STARPU_SCHED_LIB' defined but the dlopen functionality is unavailable on the system\n"); #endif } } static struct starpu_sched_policy *find_sched_policy_from_name(const char *policy_name) { if (!policy_name) return NULL; if (strcmp(policy_name, "") == 0) return NULL; /* check if the requested policy can be loaded dynamically */ load_sched_lib(); #ifdef HAVE_DLOPEN if (dl_sched_handle) { struct starpu_sched_policy *(*func_sched)(const char *); *(void**)(&func_sched) = dlsym(dl_sched_handle, "starpu_get_sched_lib_policy"); if (!func_sched) { /* no such symbol */ _STARPU_MSG("Warning: the library '%s' does not define the function 'starpu_get_sched_lib_policy' (error '%s')\n", sched_lib, dlerror()); dlclose(dl_sched_handle); dl_sched_handle = NULL; } else { struct starpu_sched_policy *dl_sched_policy = func_sched(policy_name); if (dl_sched_policy) return dl_sched_policy; else { dlclose(dl_sched_handle); dl_sched_handle = NULL; } } } #endif if (strncmp(policy_name, "heft", 4) == 0) { _STARPU_MSG("Warning: heft is now called \"dmda\".\n"); return &_starpu_sched_dmda_policy; } struct starpu_sched_policy **policy; for(policy=predefined_policies ; *policy!=NULL ; policy++) { struct starpu_sched_policy *p = *policy; if (p->policy_name) { if (strcmp(policy_name, p->policy_name) == 0) { /* we found a policy with the requested name */ return p; } } } if (strcmp(policy_name, "help") == 0) return NULL; _STARPU_MSG("Warning: scheduling policy '%s' was not found, try 'help' to get a list\n", policy_name); /* nothing was found */ return NULL; } static void display_sched_help_message(FILE *stream) { const char *sched_env = starpu_getenv("STARPU_SCHED"); if (sched_env && (strcmp(sched_env, "help") == 0)) { /* display the description of all predefined policies */ struct starpu_sched_policy **policy; fprintf(stream, "\nThe variable STARPU_SCHED can be set to one of the following strings:\n"); for(policy=predefined_policies ; *policy!=NULL ; policy++) { struct starpu_sched_policy *p = *policy; fprintf(stream, "%-30s\t-> %s\n", p->policy_name, p->policy_description); } fprintf(stream, "\n"); load_sched_lib(); #ifdef HAVE_DLOPEN if (dl_sched_handle) { struct starpu_sched_policy **(*func_scheds)(void); *(void**)(&func_scheds) = dlsym(dl_sched_handle, "starpu_get_sched_lib_policies"); if (func_scheds) { fprintf(stream, "(dynamically available policies)\n"); struct starpu_sched_policy **dl_sched_policies = func_scheds(); for(policy=dl_sched_policies ; *policy!=NULL ; policy++) { struct starpu_sched_policy *p = *policy; fprintf(stream, "%-30s\t-> %s\n", p->policy_name, p->policy_description); } fprintf(stream, "\n"); } } #endif } } struct starpu_sched_policy *_starpu_select_sched_policy(struct _starpu_machine_config *config, const char *required_policy) { struct starpu_sched_policy *selected_policy = NULL; struct starpu_conf *user_conf = &config->conf; if(required_policy) selected_policy = find_sched_policy_from_name(required_policy); /* If there is a policy that matches the required name, return it */ if (selected_policy) return selected_policy; /* First, we check whether the application explicitly gave a scheduling policy or not */ if (user_conf && (user_conf->sched_policy)) return user_conf->sched_policy; /* Otherwise, we look if the application specified the name of a policy to load */ const char *sched_pol_name; sched_pol_name = starpu_getenv("STARPU_SCHED"); if (sched_pol_name == NULL && user_conf && user_conf->sched_policy_name) sched_pol_name = user_conf->sched_policy_name; if (sched_pol_name) selected_policy = find_sched_policy_from_name(sched_pol_name); /* If there is a policy that matches the name, return it */ if (selected_policy) return selected_policy; /* If no policy was specified, we use the lws policy by default */ return &_starpu_sched_lws_policy; } void _starpu_init_sched_policy(struct _starpu_machine_config *config, struct _starpu_sched_ctx *sched_ctx, struct starpu_sched_policy *selected_policy) { /* Perhaps we have to display some help */ display_sched_help_message(stderr); /* Prefetch is activated by default */ use_prefetch = starpu_getenv_number("STARPU_PREFETCH"); if (use_prefetch == -1) use_prefetch = 1; /* Set calibrate flag */ _starpu_set_calibrate_flag(config->conf.calibrate); load_sched_policy(selected_policy, sched_ctx); if (starpu_getenv_number_default("STARPU_WORKER_TREE", 0)) { #ifdef STARPU_HAVE_HWLOC sched_ctx->sched_policy->worker_type = STARPU_WORKER_TREE; #else _STARPU_DISP("STARPU_WORKER_TREE ignored, please rebuild StarPU with hwloc support to enable it.\n"); #endif } starpu_sched_ctx_create_worker_collection(sched_ctx->id, sched_ctx->sched_policy->worker_type); _STARPU_SCHED_BEGIN; sched_ctx->sched_policy->init_sched(sched_ctx->id); _STARPU_SCHED_END; } void _starpu_deinit_sched_policy(struct _starpu_sched_ctx *sched_ctx) { struct starpu_sched_policy *policy = sched_ctx->sched_policy; if (policy->deinit_sched) { _STARPU_SCHED_BEGIN; policy->deinit_sched(sched_ctx->id); _STARPU_SCHED_END; } starpu_sched_ctx_delete_worker_collection(sched_ctx->id); #ifdef HAVE_DLOPEN if (dl_sched_handle) { dlclose(dl_sched_handle); dl_sched_handle = NULL; } #endif } void _starpu_sched_task_submit(struct starpu_task *task) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); if (!sched_ctx->sched_policy) return; if (!sched_ctx->sched_policy->submit_hook) return; _STARPU_SCHED_BEGIN; sched_ctx->sched_policy->submit_hook(task); _STARPU_SCHED_END; } void _starpu_sched_do_schedule(unsigned sched_ctx_id) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if (!sched_ctx->sched_policy) return; if (!sched_ctx->sched_policy->do_schedule) return; _STARPU_SCHED_BEGIN; sched_ctx->sched_policy->do_schedule(sched_ctx_id); _STARPU_SCHED_END; } static void _starpu_push_task_on_specific_worker_notify_sched(struct starpu_task *task, struct _starpu_worker *worker, int workerid, int perf_workerid) { /* if we push a task on a specific worker, notify all the sched_ctxs the worker belongs to */ struct _starpu_sched_ctx_list_iterator list_it; _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { struct _starpu_sched_ctx_elt *e = _starpu_sched_ctx_list_iterator_get_next(&list_it); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); if (sched_ctx->sched_policy != NULL && sched_ctx->sched_policy->push_task_notify) { _STARPU_SCHED_BEGIN; sched_ctx->sched_policy->push_task_notify(task, workerid, perf_workerid, sched_ctx->id); _STARPU_SCHED_END; } } } /* Enqueue a task into the list of tasks explicitly attached to a worker. In * case workerid identifies a combined worker, a task will be enqueued into * each worker of the combination. */ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int workerid) { int nbasic_workers = (int)starpu_worker_get_count(); /* Is this a basic worker or a combined worker ? */ int is_basic_worker = (workerid < nbasic_workers); struct _starpu_worker *worker = NULL; struct _starpu_combined_worker *combined_worker = NULL; if (is_basic_worker) { worker = _starpu_get_worker_struct(workerid); } else { combined_worker = _starpu_get_combined_worker_struct(workerid); } if (use_prefetch) starpu_prefetch_task_input_for(task, workerid); if (is_basic_worker) _starpu_push_task_on_specific_worker_notify_sched(task, worker, workerid, workerid); else { /* Notify all workers of the combined worker */ int worker_size = combined_worker->worker_size; int *combined_workerid = combined_worker->combined_workerid; int j; for (j = 0; j < worker_size; j++) { int subworkerid = combined_workerid[j]; _starpu_push_task_on_specific_worker_notify_sched(task, _starpu_get_worker_struct(subworkerid), subworkerid, workerid); } } #ifdef STARPU_USE_SC_HYPERVISOR starpu_sched_ctx_call_pushed_task_cb(workerid, task->sched_ctx); #endif //STARPU_USE_SC_HYPERVISOR if (is_basic_worker) { unsigned node = starpu_worker_get_memory_node(workerid); if (_starpu_task_uses_multiformat_handles(task)) { unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned i; for (i = 0; i < nbuffers; i++) { struct starpu_task *conversion_task; starpu_data_handle_t handle; handle = STARPU_TASK_GET_HANDLE(task, i); if (!_starpu_handle_needs_conversion_task(handle, node)) continue; conversion_task = _starpu_create_conversion_task(handle, node); conversion_task->mf_skip = 1; conversion_task->execute_on_a_specific_worker = 1; conversion_task->workerid = workerid; _starpu_task_submit_conversion_task(conversion_task, workerid); //_STARPU_DEBUG("Pushing a conversion task\n"); } for (i = 0; i < nbuffers; i++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); handle->mf_node = node; } } // if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id) return _starpu_push_local_task(worker, task); } else { /* This is a combined worker so we create task aliases */ int worker_size = combined_worker->worker_size; int *combined_workerid = combined_worker->combined_workerid; int ret = 0; struct _starpu_job *job = _starpu_get_job_associated_to_task(task); job->task_size = worker_size; job->combined_workerid = workerid; job->active_task_alias_count = 0; STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, worker_size); STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, worker_size); job->after_work_busy_barrier = worker_size; /* Note: we have to call that early, or else the task may have * disappeared already */ starpu_push_task_end(task); int j; for (j = 0; j < worker_size; j++) { struct starpu_task *alias = starpu_task_dup(task); alias->destroy = 1; _STARPU_TRACE_JOB_PUSH(alias, alias->priority); worker = _starpu_get_worker_struct(combined_workerid[j]); ret |= _starpu_push_local_task(worker, alias); } return ret; } } /* the generic interface that call the proper underlying implementation */ int _starpu_push_task(struct _starpu_job *j) { #ifdef STARPU_SIMGRID if (_starpu_simgrid_task_push_cost()) starpu_sleep(0.000001); #endif if(j->task->prologue_callback_func) { _starpu_set_current_task(j->task); j->task->prologue_callback_func(j->task->prologue_callback_arg); _starpu_set_current_task(NULL); } if (j->task->transaction) { /* If task is part of a transaction and its epoch is cancelled, switch its * 'where' field to STARPU_NOWHERE to skip its execution */ struct starpu_transaction *p_trs = j->task->transaction; STARPU_ASSERT(j->task->transaction->state == _starpu_trs_initialized); _starpu_spin_lock(&p_trs->lock); STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_list_front(&p_trs->epoch_list); STARPU_ASSERT(p_epoch == j->task->trs_epoch); STARPU_ASSERT(p_epoch->state == _starpu_trs_epoch_confirmed || p_epoch->state == _starpu_trs_epoch_cancelled); if (p_epoch->state == _starpu_trs_epoch_cancelled) { j->task->where = STARPU_NOWHERE; } _starpu_spin_unlock(&p_trs->lock); } return _starpu_repush_task(j); } int _starpu_repush_task(struct _starpu_job *j) { struct starpu_task *task = j->task; struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); int ret; _STARPU_LOG_IN(); unsigned can_push = _starpu_increment_nready_tasks_of_sched_ctx(task->sched_ctx, task->flops, task); STARPU_ASSERT(task->status == STARPU_TASK_BLOCKED || task->status == STARPU_TASK_BLOCKED_ON_TAG || task->status == STARPU_TASK_BLOCKED_ON_TASK || task->status == STARPU_TASK_BLOCKED_ON_DATA); task->status = STARPU_TASK_READY; const unsigned continuation = #ifdef STARPU_OPENMP j->continuation #else 0 #endif ; if (!_starpu_perf_counter_paused() && !j->internal && !continuation) { (void) STARPU_PERF_COUNTER_ADD64(& _starpu_task__g_current_submitted__value, -1); int64_t value = STARPU_PERF_COUNTER_ADD64(& _starpu_task__g_current_ready__value, 1); _starpu_perf_counter_update_max_int64(&_starpu_task__g_peak_ready__value, value); if (task->cl && task->cl->perf_counter_values) { struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values; (void)STARPU_PERF_COUNTER_ADD64(&pcv->task.current_submitted, -1); value = STARPU_PERF_COUNTER_ADD64(&pcv->task.current_ready, 1); _starpu_perf_counter_update_max_int64(&pcv->task.peak_ready, value); } } STARPU_AYU_ADDTOTASKQUEUE(j->job_id, -1); /* if the context does not have any workers save the tasks in a temp list */ if ((task->cl != NULL && task->where != STARPU_NOWHERE) && (!sched_ctx->is_initial_sched)) { /*if there are workers in the ctx that are not able to execute tasks we consider the ctx empty */ unsigned able = _starpu_workers_able_to_execute_task(task, sched_ctx); if(!able) { _starpu_sched_ctx_lock_write(sched_ctx->id); starpu_task_list_push_front(&sched_ctx->empty_ctx_tasks, task); _starpu_sched_ctx_unlock_write(sched_ctx->id); #ifdef STARPU_USE_SC_HYPERVISOR if(sched_ctx->id != 0 && sched_ctx->perf_counters != NULL && sched_ctx->perf_counters->notify_empty_ctx) { _STARPU_TRACE_HYPERVISOR_BEGIN(); sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task); _STARPU_TRACE_HYPERVISOR_END(); } #endif return 0; } } if(!can_push) return 0; /* in case there is no codelet associated to the task (that's a control * task), we directly execute its callback and enforce the * corresponding dependencies */ if (task->cl == NULL || task->where == STARPU_NOWHERE) { _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); if (!_starpu_perf_counter_paused() && !j->internal) { (void)STARPU_PERF_COUNTER_ADD64(& _starpu_task__g_current_ready__value, -1); if (task->cl && task->cl->perf_counter_values) { struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values; (void)STARPU_PERF_COUNTER_ADD64(&pcv->task.current_ready, -1); } } task->status = STARPU_TASK_RUNNING; if (task->prologue_callback_pop_func) { _starpu_set_current_task(task); task->prologue_callback_pop_func(task->prologue_callback_pop_arg); _starpu_set_current_task(NULL); } if (task->cl && task->cl->specific_nodes) { /* Nothing to do, but we are asked to fetch data on some memory nodes */ _starpu_fetch_nowhere_task_input(j); } else { if (task->cl #ifdef STARPU_BUBBLE && !j->is_bubble #endif ) __starpu_push_task_output(j); _starpu_handle_job_termination(j); _STARPU_LOG_OUT_TAG("handle_job_termination"); } return 0; } ret = _starpu_push_task_to_workers(task); if (ret == -EAGAIN) /* pushed to empty context, that's fine */ ret = 0; return ret; } int _starpu_push_task_to_workers(struct starpu_task *task) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); _STARPU_TRACE_JOB_PUSH(task, task->priority); /* if the contexts still does not have workers put the task back to its place in the empty ctx list */ if(!sched_ctx->is_initial_sched) { /*if there are workers in the ctx that are not able to execute tasks we consider the ctx empty */ unsigned able = _starpu_workers_able_to_execute_task(task, sched_ctx); if (!able) { _starpu_sched_ctx_lock_write(sched_ctx->id); starpu_task_list_push_back(&sched_ctx->empty_ctx_tasks, task); _starpu_sched_ctx_unlock_write(sched_ctx->id); #ifdef STARPU_USE_SC_HYPERVISOR if(sched_ctx->id != 0 && sched_ctx->perf_counters != NULL && sched_ctx->perf_counters->notify_empty_ctx) { _STARPU_TRACE_HYPERVISOR_BEGIN(); sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task); _STARPU_TRACE_HYPERVISOR_END(); } #endif return -EAGAIN; } } _starpu_profiling_set_task_push_start_time(task); int ret = 0; if (STARPU_UNLIKELY(task->execute_on_a_specific_worker)) { ret = _starpu_push_task_on_specific_worker(task, task->workerid); } else { struct _starpu_machine_config *config = _starpu_get_machine_config(); if(!sched_ctx->sched_policy) { /* Note: we have to call that early, or else the task may have * disappeared already */ starpu_push_task_end(task); if(!sched_ctx->awake_workers) ret = _starpu_push_task_on_specific_worker(task, sched_ctx->main_master); else { struct starpu_worker_collection *workers = sched_ctx->workers; struct _starpu_job *job = _starpu_get_job_associated_to_task(task); job->task_size = workers->nworkers; job->combined_workerid = -1; // workerid; its a ctx not combined worker job->active_task_alias_count = 0; STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, workers->nworkers); STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, workers->nworkers); job->after_work_busy_barrier = workers->nworkers; struct starpu_sched_ctx_iterator it; if(workers->init_iterator) workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned workerid = workers->get_next(workers, &it); struct starpu_task *alias; if (job->task_size > 1) { alias = starpu_task_dup(task); _STARPU_TRACE_JOB_PUSH(alias, alias->priority); alias->destroy = 1; } else alias = task; ret |= _starpu_push_task_on_specific_worker(alias, workerid); } } } else { /* When a task can only be executed on a given arch and we have * only one memory node for that arch, we can systematically * prefetch before the scheduling decision. */ if (!sched_ctx->sched_policy->prefetches && starpu_get_prefetch_flag() && starpu_memory_nodes_get_count() > 1) { enum starpu_worker_archtype type; for (type = 0; type < STARPU_NARCH; type++) { if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type)) { if (config->arch_nodeid[type] >= 0) starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]); break; } } } STARPU_ASSERT(sched_ctx->sched_policy->push_task); /* check out if there are any workers in the context */ unsigned nworkers = starpu_sched_ctx_get_nworkers(sched_ctx->id); if (nworkers == 0) ret = -1; else { struct _starpu_worker *worker = _starpu_get_local_worker_key(); if (worker) { STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); _starpu_worker_enter_sched_op(worker); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } _STARPU_TASK_BREAK_ON(task, push); _STARPU_SCHED_BEGIN; ret = sched_ctx->sched_policy->push_task(task); _STARPU_SCHED_END; if (worker) { STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); _starpu_worker_leave_sched_op(worker); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } } } if(ret == -1) { _STARPU_MSG("repush task \n"); _STARPU_TRACE_JOB_POP(task, task->priority); ret = _starpu_push_task_to_workers(task); } } /* Note: from here, the task might have been destroyed already! */ _STARPU_LOG_OUT(); return ret; } /* This is called right after the scheduler has pushed a task to a queue * but just before releasing mutexes: we need the task to still be alive! */ int starpu_push_task_end(struct starpu_task *task) { _starpu_profiling_set_task_push_end_time(task); task->scheduled = 1; return 0; } /* This is called right after the scheduler has pushed a task to a queue * but just before releasing mutexes: we need the task to still be alive! */ int _starpu_pop_task_end(struct starpu_task *task) { if (!task) return 0; _STARPU_TRACE_JOB_POP(task, task->priority); return 0; } /* * Given a handle that needs to be converted in order to be used on the given * node, returns a task that takes care of the conversion. */ struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle, unsigned int node) { return _starpu_create_conversion_task_for_arch(handle, starpu_node_get_kind(node)); } struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t handle, enum starpu_node_kind node_kind) { struct starpu_task *conversion_task; /* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) struct starpu_multiformat_interface *format_interface; #endif conversion_task = starpu_task_create(); conversion_task->name = "conversion_task"; conversion_task->synchronous = 0; STARPU_TASK_SET_HANDLE(conversion_task, handle, 0); #if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) /* The node does not really matter here */ format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #endif _starpu_spin_lock(&handle->header_lock); handle->refcnt++; handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); /* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ switch(node_kind) { case STARPU_CPU_RAM: switch (starpu_node_get_kind(handle->mf_node)) { case STARPU_CPU_RAM: STARPU_ABORT(); #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) case STARPU_CUDA_RAM: { struct starpu_multiformat_data_interface_ops *mf_ops; mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); conversion_task->cl = mf_ops->cuda_to_cpu_cl; break; } #endif #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) case STARPU_OPENCL_RAM: { struct starpu_multiformat_data_interface_ops *mf_ops; mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); conversion_task->cl = mf_ops->opencl_to_cpu_cl; break; } #endif default: _STARPU_ERROR("Oops : %u\n", handle->mf_node); } break; #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) case STARPU_CUDA_RAM: { struct starpu_multiformat_data_interface_ops *mf_ops; mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); conversion_task->cl = mf_ops->cpu_to_cuda_cl; break; } #endif #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) case STARPU_OPENCL_RAM: { struct starpu_multiformat_data_interface_ops *mf_ops; mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); conversion_task->cl = mf_ops->cpu_to_opencl_cl; break; } #endif default: STARPU_ABORT(); } _starpu_codelet_check_deprecated_fields(conversion_task->cl); STARPU_TASK_SET_MODE(conversion_task, STARPU_RW, 0); return conversion_task; } static struct _starpu_sched_ctx* _get_next_sched_ctx_to_pop_into(struct _starpu_worker *worker) { struct _starpu_sched_ctx_elt *e = NULL; struct _starpu_sched_ctx_list_iterator list_it; int found = 0; _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { e = _starpu_sched_ctx_list_iterator_get_next(&list_it); if (e->task_number > 0) return _starpu_get_sched_ctx_struct(e->sched_ctx); } _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { e = _starpu_sched_ctx_list_iterator_get_next(&list_it); if (e->last_poped) { e->last_poped = 0; if (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { e = _starpu_sched_ctx_list_iterator_get_next(&list_it); found = 1; } break; } } if (!found) e = worker->sched_ctx_list->head; e->last_poped = 1; return _starpu_get_sched_ctx_struct(e->sched_ctx); } struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker) { struct starpu_task *task; int worker_id; unsigned node; /* We can't tell in advance which task will be picked up, so we measure * a timestamp, and will attribute it afterwards to the task. */ int profiling = starpu_profiling_status_get(); struct timespec pop_start_time; if (profiling) _starpu_clock_gettime(&pop_start_time); pick: /* perhaps there is some local task to be executed first */ task = _starpu_pop_local_task(worker); if (task) _STARPU_TASK_BREAK_ON(task, pop); /* get tasks from the stacks of the strategy */ if(!task) { struct _starpu_sched_ctx *sched_ctx ; #ifndef STARPU_NON_BLOCKING_DRIVERS int been_here[STARPU_NMAX_SCHED_CTXS]; int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) been_here[i] = 0; while(!task) #endif { if(worker->nsched_ctxs == 1) sched_ctx = _starpu_get_initial_sched_ctx(); else { while(1) { /** Caution * If you use multiple contexts your scheduler *needs* * to update the variable task_number of the ctx list. * In order to get the best performances. * This is done using functions : * starpu_sched_ctx_list_task_counters_increment...(...) * starpu_sched_ctx_list_task_counters_decrement...(...) **/ sched_ctx = _get_next_sched_ctx_to_pop_into(worker); if(worker->removed_from_ctx[sched_ctx->id] == 1 && worker->shares_tasks_lists[sched_ctx->id] == 1) { _starpu_worker_gets_out_of_ctx(sched_ctx->id, worker); worker->removed_from_ctx[sched_ctx->id] = 0; sched_ctx = NULL; } else break; } } if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS) { if (sched_ctx->sched_policy && sched_ctx->sched_policy->pop_task) { /* Note: we do not push the scheduling state here, because * otherwise when a worker is idle, we'd keep * pushing/popping a scheduling state here, while what we * want to see in the trace is a permanent idle state. */ task = sched_ctx->sched_policy->pop_task(sched_ctx->id); if (task) _STARPU_TASK_BREAK_ON(task, pop); _starpu_pop_task_end(task); } } if(!task) { /* it doesn't matter if it shares tasks list or not in the scheduler, if it does not have any task to pop just get it out of here */ /* however if it shares a task list it will be removed as soon as he finishes this job (in handle_job_termination) */ if(worker->removed_from_ctx[sched_ctx->id]) { _starpu_worker_gets_out_of_ctx(sched_ctx->id, worker); worker->removed_from_ctx[sched_ctx->id] = 0; } #ifdef STARPU_USE_SC_HYPERVISOR if(worker->pop_ctx_priority) { struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters; if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id)) { // _STARPU_TRACE_HYPERVISOR_BEGIN(); perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0); // _STARPU_TRACE_HYPERVISOR_END(); } } #endif //STARPU_USE_SC_HYPERVISOR #ifndef STARPU_NON_BLOCKING_DRIVERS if(been_here[sched_ctx->id] || worker->nsched_ctxs == 1) break; been_here[sched_ctx->id] = 1; #endif } } } if (!task) { if (starpu_idle_file) idle_start[worker->workerid] = starpu_timing_now(); return NULL; } if(starpu_idle_file && idle_start[worker->workerid] != 0.0) { double idle_end = starpu_timing_now(); idle[worker->workerid] += (idle_end - idle_start[worker->workerid]); idle_start[worker->workerid] = 0.0; } #ifdef STARPU_USE_SC_HYPERVISOR struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters; if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id)) { // _STARPU_TRACE_HYPERVISOR_BEGIN(); perf_counters->notify_poped_task(task->sched_ctx, worker->workerid); // _STARPU_TRACE_HYPERVISOR_END(); } #endif //STARPU_USE_SC_HYPERVISOR /* Make sure we do not bother with all the multiformat-specific code if * it is not necessary. */ if (!_starpu_task_uses_multiformat_handles(task)) goto profiling; /* This is either a conversion task, or a regular task for which the * conversion tasks have already been created and submitted */ if (task->mf_skip) goto profiling; /* * This worker may not be able to execute this task. In this case, we * should return the task anyway. It will be pushed back almost immediately. * This way, we avoid computing and executing the conversions tasks. * Here, we do not care about what implementation is used. */ worker_id = starpu_worker_get_id_check(); if (!starpu_worker_can_execute_task_first_impl(worker_id, task, NULL)) return task; node = starpu_worker_get_memory_node(worker_id); /* * We do have a task that uses multiformat handles. Let's create the * required conversion tasks. */ unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (i = 0; i < nbuffers; i++) { struct starpu_task *conversion_task; starpu_data_handle_t handle; handle = STARPU_TASK_GET_HANDLE(task, i); if (!_starpu_handle_needs_conversion_task(handle, node)) continue; conversion_task = _starpu_create_conversion_task(handle, node); conversion_task->mf_skip = 1; conversion_task->execute_on_a_specific_worker = 1; conversion_task->workerid = worker_id; /* * Next tasks will need to know where these handles have gone. */ handle->mf_node = node; _starpu_task_submit_conversion_task(conversion_task, worker_id); } task->mf_skip = 1; starpu_task_prio_list_push_back(&worker->local_tasks, task); goto pick; profiling: if (profiling) { struct starpu_profiling_task_info *profiling_info; profiling_info = task->profiling_info; /* The task may have been created before profiling was enabled, * so we check if the profiling_info structure is available * even though we already tested if profiling is enabled. */ if (profiling_info) { profiling_info->pop_start_time = pop_start_time; _starpu_clock_gettime(&profiling_info->pop_end_time); } } if(task->prologue_callback_pop_func) { _starpu_set_current_task(task); task->prologue_callback_pop_func(task->prologue_callback_pop_arg); _starpu_set_current_task(NULL); } return task; } void _starpu_sched_pre_exec_hook(struct starpu_task *task) { unsigned sched_ctx_id = starpu_sched_ctx_get_ctx_for_task(task); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if (sched_ctx->sched_policy && sched_ctx->sched_policy->pre_exec_hook) { _STARPU_SCHED_BEGIN; sched_ctx->sched_policy->pre_exec_hook(task, sched_ctx_id); _STARPU_SCHED_END; } if(!sched_ctx->sched_policy) { int workerid = starpu_worker_get_id(); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_sched_ctx_list_iterator list_it; _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { struct _starpu_sched_ctx *other_sched_ctx; struct _starpu_sched_ctx_elt *e; e = _starpu_sched_ctx_list_iterator_get_next(&list_it); other_sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); if (other_sched_ctx != sched_ctx && other_sched_ctx->sched_policy != NULL && other_sched_ctx->sched_policy->pre_exec_hook) { _STARPU_SCHED_BEGIN; other_sched_ctx->sched_policy->pre_exec_hook(task, other_sched_ctx->id); _STARPU_SCHED_END; } } } } void _starpu_sched_post_exec_hook(struct starpu_task *task) { STARPU_ASSERT(task->cl != NULL && task->cl->where != STARPU_NOWHERE); unsigned sched_ctx_id = starpu_sched_ctx_get_ctx_for_task(task); struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); if (sched_ctx->sched_policy && sched_ctx->sched_policy->post_exec_hook) { _STARPU_SCHED_BEGIN; sched_ctx->sched_policy->post_exec_hook(task, sched_ctx_id); _STARPU_SCHED_END; } if(!sched_ctx->sched_policy) { int workerid = starpu_worker_get_id(); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_sched_ctx_list_iterator list_it; _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { struct _starpu_sched_ctx *other_sched_ctx; struct _starpu_sched_ctx_elt *e; e = _starpu_sched_ctx_list_iterator_get_next(&list_it); other_sched_ctx = _starpu_get_sched_ctx_struct(e->sched_ctx); if (other_sched_ctx != sched_ctx && other_sched_ctx->sched_policy != NULL && other_sched_ctx->sched_policy->post_exec_hook) { _STARPU_SCHED_BEGIN; other_sched_ctx->sched_policy->post_exec_hook(task, other_sched_ctx->id); _STARPU_SCHED_END; } } } } int starpu_push_local_task(int workerid, struct starpu_task *task, int back STARPU_ATTRIBUTE_UNUSED) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); return _starpu_push_local_task(worker, task); } void _starpu_print_idle_time() { if(!starpu_idle_file) return; double all_idle = 0.0; int i = 0; for(i = 0; i < STARPU_NMAXWORKERS; i++) all_idle += idle[i]; FILE *f; f = fopen(starpu_idle_file, "a"); if (!f) { _STARPU_MSG("couldn't open %s: %s\n", starpu_idle_file, strerror(errno)); } else { fprintf(f, "%lf \n", all_idle); fclose(f); } } void starpu_sched_task_break(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED) { _STARPU_TASK_BREAK_ON(task, sched); } starpu-1.4.9+dfsg/src/core/sched_policy.h000066400000000000000000000136301507764646700203500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SCHED_POLICY_H__ #define __SCHED_POLICY_H__ /** @file */ #include #include #include #include #include #include #pragma GCC visibility push(hidden) #define _STARPU_SCHED_BEGIN \ _STARPU_TRACE_WORKER_SCHEDULING_PUSH; \ _SIMGRID_TIMER_BEGIN(_starpu_simgrid_sched_cost()) #define _STARPU_SCHED_END \ _SIMGRID_TIMER_END; \ _STARPU_TRACE_WORKER_SCHEDULING_POP void _starpu_sched_init(void); struct starpu_machine_config; struct starpu_sched_policy *_starpu_get_sched_policy(struct _starpu_sched_ctx *sched_ctx); void _starpu_init_sched_policy(struct _starpu_machine_config *config, struct _starpu_sched_ctx *sched_ctx, struct starpu_sched_policy *policy); void _starpu_deinit_sched_policy(struct _starpu_sched_ctx *sched_ctx); struct starpu_sched_policy *_starpu_select_sched_policy(struct _starpu_machine_config *config, const char *required_policy); void _starpu_sched_task_submit(struct starpu_task *task); void _starpu_sched_do_schedule(unsigned sched_ctx_id); int _starpu_push_task(struct _starpu_job *task); int _starpu_repush_task(struct _starpu_job *task); /** actually pushes the tasks to the specific worker or to the scheduler */ int _starpu_push_task_to_workers(struct starpu_task *task); /** pop a task that can be executed on the worker */ struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker); void _starpu_sched_post_exec_hook(struct starpu_task *task); int _starpu_pop_task_end(struct starpu_task *task); struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle, unsigned int node) STARPU_ATTRIBUTE_MALLOC; struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t handle, enum starpu_node_kind node_kind) STARPU_ATTRIBUTE_MALLOC; void _starpu_sched_pre_exec_hook(struct starpu_task *task); void _starpu_print_idle_time(); /* * Predefined policies */ extern struct starpu_sched_policy _starpu_sched_lws_policy; extern struct starpu_sched_policy _starpu_sched_ws_policy; extern struct starpu_sched_policy _starpu_sched_prio_policy; extern struct starpu_sched_policy _starpu_sched_random_policy; extern struct starpu_sched_policy _starpu_sched_dm_policy; extern struct starpu_sched_policy _starpu_sched_dmda_policy STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; extern struct starpu_sched_policy _starpu_sched_dmda_prio_policy; extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy; extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy; extern struct starpu_sched_policy _starpu_sched_dmda_sorted_decision_policy; extern struct starpu_sched_policy _starpu_sched_eager_policy; extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; extern struct starpu_sched_policy _starpu_sched_peager_policy; extern struct starpu_sched_policy _starpu_sched_heteroprio_policy; extern struct starpu_sched_policy _starpu_sched_modular_eager_policy; extern struct starpu_sched_policy _starpu_sched_modular_eager_prefetching_policy; extern struct starpu_sched_policy _starpu_sched_modular_eager_prio_policy; extern struct starpu_sched_policy _starpu_sched_modular_gemm_policy; extern struct starpu_sched_policy _starpu_sched_modular_prio_policy; extern struct starpu_sched_policy _starpu_sched_modular_prio_prefetching_policy; extern struct starpu_sched_policy _starpu_sched_modular_random_policy; extern struct starpu_sched_policy _starpu_sched_modular_random_prio_policy; extern struct starpu_sched_policy _starpu_sched_modular_random_prefetching_policy; extern struct starpu_sched_policy _starpu_sched_modular_random_prio_prefetching_policy; extern struct starpu_sched_policy _starpu_sched_modular_parallel_random_policy; extern struct starpu_sched_policy _starpu_sched_modular_parallel_random_prio_policy; extern struct starpu_sched_policy _starpu_sched_modular_ws_policy; extern struct starpu_sched_policy _starpu_sched_modular_dmda_policy; extern struct starpu_sched_policy _starpu_sched_modular_dmdap_policy; extern struct starpu_sched_policy _starpu_sched_modular_dmdar_policy; extern struct starpu_sched_policy _starpu_sched_modular_dmdas_policy; extern struct starpu_sched_policy _starpu_sched_modular_heft_policy; extern struct starpu_sched_policy _starpu_sched_modular_heft_prio_policy; extern struct starpu_sched_policy _starpu_sched_modular_heft2_policy; extern struct starpu_sched_policy _starpu_sched_modular_heteroprio_policy; extern struct starpu_sched_policy _starpu_sched_modular_heteroprio_heft_policy; extern struct starpu_sched_policy _starpu_sched_modular_parallel_heft_policy; extern struct starpu_sched_policy _starpu_sched_graph_test_policy; extern struct starpu_sched_policy _starpu_sched_tree_heft_hierarchical_policy; extern long _starpu_task_break_on_push; extern long _starpu_task_break_on_sched; extern long _starpu_task_break_on_pop; extern long _starpu_task_break_on_exec; #ifdef SIGTRAP #define _STARPU_TASK_BREAK_ON(task, what) do { \ if (_starpu_get_job_associated_to_task(task)->job_id == (unsigned long) _starpu_task_break_on_##what) \ raise(SIGTRAP); \ } while(0) #else #define _STARPU_TASK_BREAK_ON(task, what) ((void) 0) #endif #pragma GCC visibility pop #endif // __SCHED_POLICY_H__ starpu-1.4.9+dfsg/src/core/simgrid.c000066400000000000000000001310511507764646700173320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #if defined(HAVE_SIMGRID_SIMDAG_H) && (SIMGRID_VERSION >= 31300) #include #endif #ifdef STARPU_SIMGRID #ifdef HAVE_GETRLIMIT #include #endif #if (defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH)) \ && !defined(HAVE_SIMGRID_SET_MAESTRO) #include #endif #ifdef STARPU_HAVE_SIMGRID_HOST_H #include #endif #ifdef STARPU_HAVE_SIMGRID_LINK_H #include #endif #ifdef STARPU_HAVE_SIMGRID_ENGINE_H #include #endif #ifdef STARPU_HAVE_XBT_CONFIG_H #include #endif #include #include #pragma weak starpu_main extern int starpu_main(int argc, char *argv[]); #if SIMGRID_VERSION < 31600 #pragma weak smpi_main extern int smpi_main(int (*realmain) (int argc, char *argv[]), int argc, char *argv[]); #endif #pragma weak _starpu_mpi_simgrid_init extern int _starpu_mpi_simgrid_init(int argc, char *argv[]); #pragma weak smpi_process_set_user_data #if !HAVE_DECL_SMPI_PROCESS_SET_USER_DATA && !defined(smpi_process_set_user_data) extern void smpi_process_set_user_data(void *); #endif static double _starpu_simgrid_dynamic_energy = 0.0; /* 1 when MSG_init was done, 2 when initialized through redirected main, 3 when * initialized through MSG_process_attach */ static int simgrid_started; static int simgrid_transfer_cost = 1; static int runners_running; starpu_pthread_queue_t _starpu_simgrid_transfer_queue[STARPU_MAXNODES]; static struct transfer_runner { struct transfer *first_transfer, *last_transfer; starpu_sem_t sem; starpu_pthread_t runner; } transfer_runner[STARPU_MAXNODES][STARPU_MAXNODES]; static void *transfer_execute(void *arg); starpu_pthread_queue_t _starpu_simgrid_task_queue[STARPU_NMAXWORKERS]; static struct worker_runner { struct task *first_task, *last_task; starpu_sem_t sem; starpu_pthread_t runner; } worker_runner[STARPU_NMAXWORKERS]; static void *task_execute(void *arg); struct _starpu_simgrid_event { unsigned finished; starpu_pthread_queue_t *queue; }; static inline struct _starpu_simgrid_event *_starpu_simgrid_event(union _starpu_async_channel_event *_event) { struct _starpu_simgrid_event *event; STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); event = (void *) _event; return event; } size_t _starpu_default_stack_size = 8192; void _starpu_simgrid_set_stack_size(size_t stack_size) { #ifdef HAVE_SG_CFG_SET_INT sg_cfg_set_int("contexts/stack-size", stack_size); #elif SIMGRID_VERSION >= 31300 xbt_cfg_set_int("contexts/stack-size", stack_size); #else extern xbt_cfg_t _sg_cfg_set; xbt_cfg_set_int(_sg_cfg_set, "contexts/stack_size", stack_size); #endif } #ifdef HAVE_SG_ACTOR_ON_EXIT static void on_exit_backtrace(int failed, void *data STARPU_ATTRIBUTE_UNUSED) { if (failed) xbt_backtrace_display_current(); } #endif void _starpu_simgrid_actor_setup(void) { #ifdef HAVE_SG_ACTOR_ON_EXIT sg_actor_on_exit(on_exit_backtrace, NULL); #endif } #if defined(HAVE_SG_ZONE_GET_BY_NAME) || defined(sg_zone_get_by_name) #define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME sg_netzone_t _starpu_simgrid_get_as_by_name(const char *name) { return sg_zone_get_by_name(name); } #elif defined(HAVE_MSG_ZONE_GET_BY_NAME) || defined(MSG_zone_get_by_name) #define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME msg_as_t _starpu_simgrid_get_as_by_name(const char *name) { return MSG_zone_get_by_name(name); } #elif defined(HAVE_MSG_GET_AS_BY_NAME) || defined(MSG_get_as_by_name) #define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME msg_as_t _starpu_simgrid_get_as_by_name(const char *name) { return MSG_get_as_by_name(name); } #elif defined(HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT) || defined(MSG_environment_as_get_routing_sons) #define HAVE_STARPU_SIMGRID_GET_AS_BY_NAME static msg_as_t __starpu_simgrid_get_as_by_name(msg_as_t root, const char *name) { xbt_dict_t dict; xbt_dict_cursor_t cursor; const char *key; msg_as_t as, ret; dict = MSG_environment_as_get_routing_sons(root); xbt_dict_foreach(dict, cursor, key, as) { if (!strcmp(MSG_environment_as_get_name(as), name)) return as; ret = __starpu_simgrid_get_as_by_name(as, name); if (ret) return ret; } return NULL; } msg_as_t _starpu_simgrid_get_as_by_name(const char *name) { return __starpu_simgrid_get_as_by_name(MSG_environment_get_routing_root(), name); } #endif /* HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT */ int _starpu_simgrid_get_nbhosts(const char *prefix) { #ifdef HAVE_STARPU_SIMGRID_GET_AS_BY_NAME char new_prefix[32+strlen(prefix)]; #endif int ret; #ifdef HAVE_SG_HOST_LIST sg_host_t *hosts_list = NULL; #endif #if defined(HAVE_SG_ZONE_GET_ALL_HOSTS) const_sg_host_t *hosts = NULL; #else xbt_dynar_t hosts = NULL; #endif int i; int nb = 0; unsigned len = strlen(prefix); if (_starpu_simgrid_running_smpi()) { #ifdef HAVE_STARPU_SIMGRID_GET_AS_BY_NAME char name[32]; STARPU_ASSERT(starpu_mpi_world_rank); snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%d", starpu_mpi_world_rank()); #if defined(HAVE_SG_ZONE_GET_ALL_HOSTS) hosts = sg_zone_get_all_hosts(_starpu_simgrid_get_as_by_name(name), &nb); #elif defined(HAVE_MSG_ZONE_GET_HOSTS) || defined(HAVE_SG_ZONE_GET_HOSTS) || defined(MSG_zone_get_hosts) || defined(sg_zone_get_hosts) hosts = xbt_dynar_new(sizeof(sg_host_t), NULL); # if defined(HAVE_SG_ZONE_GET_HOSTS) || defined(sg_zone_get_hosts) sg_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts); # else MSG_zone_get_hosts(_starpu_simgrid_get_as_by_name(name), hosts); # endif #else hosts = MSG_environment_as_get_hosts(_starpu_simgrid_get_as_by_name(name)); #endif snprintf(new_prefix, sizeof(new_prefix), "%s-%s", name, prefix); prefix = new_prefix; len = strlen(prefix); #else STARPU_ABORT_MSG("can not continue without an implementation for _starpu_simgrid_get_as_by_name"); #endif /* HAVE_STARPU_SIMGRID_GET_AS_BY_NAME */ } else { #ifdef HAVE_SG_HOST_LIST hosts_list = sg_host_list(); nb = sg_host_count(); #elif defined(STARPU_HAVE_SIMGRID_HOST_H) hosts = sg_hosts_as_dynar(); #else hosts = MSG_hosts_as_dynar(); #endif } #if !defined(HAVE_SG_ZONE_GET_ALL_HOSTS) if (hosts) nb = xbt_dynar_length(hosts); #endif ret = 0; for (i = 0; i < nb; i++) { const char *name; #ifdef HAVE_SG_HOST_LIST if (hosts_list) name = sg_host_get_name(hosts_list[i]); else #endif #if defined(HAVE_SG_ZONE_GET_ALL_HOSTS) name = sg_host_get_name(hosts[i]); #elif defined(STARPU_HAVE_SIMGRID_HOST_H) name = sg_host_get_name(xbt_dynar_get_as(hosts, i, sg_host_t)); #else name = MSG_host_get_name(xbt_dynar_get_as(hosts, i, msg_host_t)); #endif if (!strncmp(name, prefix, len)) ret++; } #if !defined(HAVE_SG_ZONE_GET_ALL_HOSTS) if (hosts) xbt_dynar_free(&hosts); #endif return ret; } static starpu_sg_host_t _starpu_simgrid_get_host(const char *prefix, unsigned devid) { char name[32]; snprintf(name, sizeof(name), "%s%u", prefix, devid); return _starpu_simgrid_get_host_by_name(name); } unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devid) { starpu_sg_host_t host; const char *memsize; host = _starpu_simgrid_get_host(prefix, devid); if (!host) return 0; #if defined(HAVE_SG_HOST_GET_PROPERTY_NAMES) if (!sg_host_get_property_names(host, NULL)) #elif defined(HAVE_SG_HOST_GET_PROPERTIES) if (!sg_host_get_properties(host)) #else if (!MSG_host_get_properties(host)) #endif return 0; #ifdef HAVE_SG_HOST_GET_PROPERTIES memsize = sg_host_get_property_value(host, "memsize"); #else memsize = MSG_host_get_property_value(host, "memsize"); #endif if (!memsize) return 0; return atoll(memsize); } const char *_starpu_simgrid_get_devname(const char *prefix, unsigned devid) { starpu_sg_host_t host; host = _starpu_simgrid_get_host(prefix, devid); if (!host) return 0; #if defined(HAVE_SG_HOST_GET_PROPERTY_NAMES) if (!sg_host_get_property_names(host, NULL)) #elif defined(HAVE_SG_HOST_GET_PROPERTIES) if (!sg_host_get_properties(host)) #else if (!MSG_host_get_properties(host)) #endif return 0; #ifdef HAVE_SG_HOST_GET_PROPERTIES return sg_host_get_property_value(host, "model"); #else return MSG_host_get_property_value(host, "model"); #endif } starpu_sg_host_t _starpu_simgrid_get_host_by_name(const char *name) { if (_starpu_simgrid_running_smpi()) { char mpiname[32]; STARPU_ASSERT(starpu_mpi_world_rank); snprintf(mpiname, sizeof(mpiname), STARPU_MPI_AS_PREFIX"%d-%s", starpu_mpi_world_rank(), name); #ifdef STARPU_HAVE_SIMGRID_HOST_H return sg_host_by_name(mpiname); #else return MSG_get_host_by_name(mpiname); #endif } else #ifdef STARPU_HAVE_SIMGRID_HOST_H return sg_host_by_name(name); #else return MSG_get_host_by_name(name); #endif } starpu_sg_host_t _starpu_simgrid_get_host_by_worker(struct _starpu_worker *worker) { const char *prefix; char name[16]; starpu_sg_host_t host; prefix = starpu_driver_info[worker->arch].name_upper; STARPU_ASSERT(prefix); snprintf(name, sizeof(name), "%s%u", prefix, worker->devid); host = _starpu_simgrid_get_host_by_name(name); STARPU_ASSERT_MSG(host, "Could not find host %s!", name); return host; } #ifdef STARPU_USE_MPI /* Simgrid up to 3.15 would rename main into smpi_simulated_main_, and call that * from SMPI initialization * In case the MPI application didn't use smpicc to build the file containing * main(), but included our #define main starpu_main, try to cope by calling * starpu_main */ int _starpu_smpi_simulated_main_(int argc, char *argv[]) { if (!starpu_main) { _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main\n"); } return starpu_main(argc, argv); } int smpi_simulated_main_(int argc, char *argv[]) __attribute__((weak, alias("_starpu_smpi_simulated_main_"))); #endif /* This is used to start a non-MPI simgrid environment */ void _starpu_start_simgrid(int *argc, char **argv) { char path[256]; if (simgrid_started) return; simgrid_started = 1; #if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT) simgrid_init(argc, argv); #else MSG_init(argc, argv); #endif /* Simgrid uses tiny stacks by default. This comes unexpected to our users. */ #ifdef HAVE_GETRLIMIT struct rlimit rlim; if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur != 0 && rlim.rlim_cur != RLIM_INFINITY) _starpu_default_stack_size = rlim.rlim_cur / 1024; #endif _starpu_simgrid_set_stack_size(_starpu_default_stack_size); /* Load XML platform */ #if SIMGRID_VERSION < 31300 _starpu_simgrid_get_platform_path(3, path, sizeof(path)); #else _starpu_simgrid_get_platform_path(4, path, sizeof(path)); #endif if (access(path, R_OK) != 0) { fprintf(stderr, "Machine performance file <%s> does not exist, please re-run in non-simgrid mode to calibrate it, or fix the STARPU_HOSTNAME and STARPU_PERF_MODEL_DIR environment variables\n", path); _exit(EXIT_FAILURE); } #if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT) simgrid_load_platform(path); #else MSG_create_environment(path); #endif int limit_bandwidth = starpu_getenv_number("STARPU_LIMIT_BANDWIDTH"); if (limit_bandwidth >= 0) { #if defined(HAVE_SG_LINK_BANDWIDTH_SET) || defined(HAVE_SG_LINK_SET_BANDWIDTH) sg_link_t *links = sg_link_list(); int count = sg_link_count(), i; for (i = 0; i < count; i++) { #ifdef HAVE_SG_LINK_SET_BANDWIDTH sg_link_set_bandwidth(links[i], limit_bandwidth * 1000000.); #else sg_link_bandwidth_set(links[i], limit_bandwidth * 1000000.); #endif } #else _STARPU_DISP("Warning: STARPU_LIMIT_BANDWIDTH set to %d but this requires simgrid 3.26, thus ignored\n", limit_bandwidth); #endif } simgrid_transfer_cost = starpu_getenv_number_default("STARPU_SIMGRID_TRANSFER_COST", 1); } static int run_starpu_main(int argc, char *argv[]) { /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */ starpu_sleep(0.000001); _starpu_simgrid_actor_setup(); if (!starpu_main) { _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main\n"); } return starpu_main(argc, argv); } static int main_ret; static _starpu_simgrid_main_ret do_starpu_main(int argc, char *argv[]) { main_ret = run_starpu_main(argc, argv); _STARPU_SIMGRID_MAIN_RETURN; } /* We need it only when using smpi */ #pragma weak smpi_process_get_user_data extern void *smpi_process_get_user_data(); /* This is hopefully called before the application and simgrid */ #undef main #pragma weak main int main(int argc, char **argv) { #ifdef HAVE_SG_CONFIG_CONTINUE_AFTER_HELP sg_config_continue_after_help(); #endif if (_starpu_simgrid_running_smpi()) { if (!smpi_process_get_user_data) { _STARPU_ERROR("Your version of simgrid does not provide smpi_process_get_user_data, we can not continue without it\n"); } #if SIMGRID_VERSION >= 31600 /* Recent versions of simgrid dlopen() us, so we don't need to * do circumvolutions, just init MPI early and run the application's main */ return _starpu_mpi_simgrid_init(argc, argv); #else /* Oops, we are running old SMPI, let it start Simgrid, and we'll * take back hand in _starpu_simgrid_init from starpu_init() */ return smpi_main(_starpu_mpi_simgrid_init, argc, argv); #endif } /* Already initialized? It probably has been done through a * constructor and MSG_process_attach, directly jump to real main */ if (simgrid_started == 3) { return run_starpu_main(argc, argv); } /* Managed to catch application's main, initialize simgrid first */ _starpu_start_simgrid(&argc, argv); simgrid_started = 2; /* Create a simgrid process for main */ char **argv_cpy; _STARPU_MALLOC(argv_cpy, argc * sizeof(char*)); int i; for (i = 0; i < argc; i++) argv_cpy[i] = strdup(argv[i]); /* Run the application in a separate thread */ _starpu_simgrid_actor_create("main", &do_starpu_main, _starpu_simgrid_get_host_by_name("MAIN"), argc, argv_cpy); /* And run maestro in the main thread */ #if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT) simgrid_run(); #else MSG_main(); #endif return main_ret; } #if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH) static void maestro(void *data STARPU_ATTRIBUTE_UNUSED) { #if defined(STARPU_SIMGRID_HAVE_SIMGRID_INIT) && defined(HAVE_SG_ACTOR_INIT) simgrid_run(); #else MSG_main(); #endif } #endif /* This is called early from starpu_init, so thread functions etc. can work */ void _starpu_simgrid_init_early(int *argc STARPU_ATTRIBUTE_UNUSED, char ***argv STARPU_ATTRIBUTE_UNUSED) { #ifdef HAVE_SG_CONFIG_CONTINUE_AFTER_HELP sg_config_continue_after_help(); #endif #if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH) if (simgrid_started < 2 && !_starpu_simgrid_running_smpi()) { /* "Cannot create_maestro with this ContextFactory. * Try using --cfg=contexts/factory:thread instead." * See https://github.com/simgrid/simgrid/issues/141 */ _STARPU_DISP("Warning: In simgrid mode, the file containing the main() function of this application should to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main to avoid having to use --cfg=contexts/factory:thread which reduces performance\n"); #if SIMGRID_VERSION >= 31400 /* Only recent versions of simgrid support setting sg_cfg_set_string before starting simgrid */ # ifdef HAVE_SG_CFG_SET_INT sg_cfg_set_string("contexts/factory", "thread"); # else xbt_cfg_set_string("contexts/factory", "thread"); # endif #endif /* We didn't catch application's main. */ /* Start maestro as a separate thread */ #ifdef HAVE_SIMGRID_SET_MAESTRO simgrid_set_maestro(maestro, NULL); #else SIMIX_set_maestro(maestro, NULL); #endif /* Initialize simgrid */ int no_argc = 1; char *starpu = "starpu", *no_argv [] = { starpu, NULL }; _starpu_start_simgrid(argc ? argc : &no_argc, argv ? *argv : no_argv); /* And attach the main thread to the main simgrid process */ void **tsd; _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*)); #if (defined(HAVE_SG_ACTOR_ATTACH_PTHREAD) || defined(HAVE_SG_ACTOR_ATTACH)) && (defined (HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_GET_DATA)) #ifdef HAVE_SG_ACTOR_ATTACH_PTHREAD sg_actor_t actor = sg_actor_attach_pthread("main", NULL, _starpu_simgrid_get_host_by_name("MAIN")); #else sg_actor_t actor = sg_actor_attach("main", NULL, _starpu_simgrid_get_host_by_name("MAIN"), NULL); #endif #ifdef HAVE_SG_ACTOR_SET_DATA sg_actor_set_data(actor, tsd); #else sg_actor_data_set(actor, tsd); #endif #else MSG_process_attach("main", tsd, _starpu_simgrid_get_host_by_name("MAIN"), NULL); #endif /* We initialized through MSG_process_attach */ simgrid_started = 3; } #endif if (!simgrid_started && !starpu_main && !_starpu_simgrid_running_smpi()) { /* Oops, we don't have MSG_process_attach and didn't catch the * 'main' symbol, there is no way for us */ _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h or starpu_simgrid_wrap.h included, to properly rename it into starpu_main\n"); } if (_starpu_simgrid_running_smpi()) { #ifndef STARPU_STATIC_ONLY _STARPU_ERROR("Simgrid currently does not support privatization for dynamically-linked libraries in SMPI. Please reconfigure and build StarPU with --disable-shared"); #endif #if defined(HAVE_MSG_PROCESS_USERDATA_INIT) && !(defined(HAVE_SG_ACTOR_DATA) || defined(HAVE_SG_ACTOR_GET_DATA)) MSG_process_userdata_init(); #endif void **tsd; _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*)); #ifdef HAVE_SG_ACTOR_DATA #ifdef HAVE_SG_ACTOR_SET_DATA sg_actor_set_data(sg_actor_self(), tsd); #else sg_actor_data_set(sg_actor_self(), tsd); #endif #else smpi_process_set_user_data(tsd); #endif } unsigned i; for (i = 0; i < STARPU_MAXNODES; i++) starpu_pthread_queue_init(&_starpu_simgrid_transfer_queue[i]); for (i = 0; i < STARPU_NMAXWORKERS; i++) starpu_pthread_queue_init(&_starpu_simgrid_task_queue[i]); _starpu_simgrid_cpp_init(); } /* This is called late from starpu_init, to start task executors */ void _starpu_simgrid_init(void) { unsigned i; runners_running = 1; for (i = 0; i < starpu_worker_get_count(); i++) { char s[32]; snprintf(s, sizeof(s), "worker %u runner", i); starpu_sem_init(&worker_runner[i].sem, 0, 0); starpu_pthread_create_on(s, &worker_runner[i].runner, NULL, task_execute, (void*)(uintptr_t) i, _starpu_simgrid_get_host_by_worker(_starpu_get_worker_struct(i))); } } void _starpu_simgrid_deinit_late(void) { #if defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach) || defined(HAVE_SG_ACTOR_ATTACH) if (simgrid_started == 3) { /* Started with MSG_process_attach, now detach */ #ifdef HAVE_SG_ACTOR_ATTACH sg_actor_detach(); #else MSG_process_detach(); #endif simgrid_started = 0; } #endif } void _starpu_simgrid_deinit(void) { unsigned i, j; runners_running = 0; for (i = 0; i < STARPU_MAXNODES; i++) { for (j = 0; j < STARPU_MAXNODES; j++) { struct transfer_runner *t = &transfer_runner[i][j]; if (t->runner) { starpu_sem_post(&t->sem); #ifdef STARPU_HAVE_SIMGRID_ACTOR_H sg_actor_join(t->runner, 1000000); #elif SIMGRID_VERSION >= 31400 MSG_process_join(t->runner, 1000000); #else starpu_sleep(1); #endif STARPU_ASSERT(t->first_transfer == NULL); STARPU_ASSERT(t->last_transfer == NULL); starpu_sem_destroy(&t->sem); } } /* FIXME: queue not empty at this point, needs proper unregistration */ /* starpu_pthread_queue_destroy(&_starpu_simgrid_transfer_queue[i]); */ } for (i = 0; i < starpu_worker_get_count(); i++) { struct worker_runner *w = &worker_runner[i]; starpu_sem_post(&w->sem); #ifdef STARPU_HAVE_SIMGRID_ACTOR_H sg_actor_join(w->runner, 1000000); #elif SIMGRID_VERSION >= 31400 MSG_process_join(w->runner, 1000000); #else starpu_sleep(1); #endif STARPU_ASSERT(w->first_task == NULL); STARPU_ASSERT(w->last_task == NULL); starpu_sem_destroy(&w->sem); starpu_pthread_queue_destroy(&_starpu_simgrid_task_queue[i]); } #if SIMGRID_VERSION >= 31300 /* clean-atexit introduced in simgrid 3.13 */ # ifdef HAVE_SG_CFG_SET_INT if (sg_cfg_get_boolean("debug/clean-atexit")) # elif SIMGRID_VERSION >= 32300 if (xbt_cfg_get_boolean("debug/clean-atexit")) # else if (xbt_cfg_get_boolean("clean-atexit")) # endif { _starpu_simgrid_deinit_late(); } #endif } /* * Tasks */ struct task { #if defined(HAVE_SG_ACTOR_SELF_EXECUTE) || defined(HAVE_SG_ACTOR_EXECUTE) double flops; #else msg_task_t task; #endif double energy; /* communication termination signalization */ unsigned *finished; /* Next task on this worker */ struct task *next; }; /* Actually execute the task. */ static void *task_execute(void *arg) { unsigned workerid = (uintptr_t) arg; struct worker_runner *w = &worker_runner[workerid]; _STARPU_DEBUG("worker runner %u started\n", workerid); while (1) { struct task *task; starpu_sem_wait(&w->sem); if (!runners_running) break; task = w->first_task; w->first_task = task->next; if (w->last_task == task) w->last_task = NULL; _STARPU_DEBUG("task %p started\n", task); #ifdef HAVE_SG_ACTOR_EXECUTE sg_actor_execute(task->flops); #elif defined(HAVE_SG_ACTOR_SELF_EXECUTE) sg_actor_self_execute(task->flops); #else MSG_task_execute(task->task); MSG_task_destroy(task->task); #endif starpu_energy_use(task->energy); _STARPU_DEBUG("task %p finished\n", task); *task->finished = 1; /* The worker which started this task may be sleeping out of tasks, wake it */ _starpu_wake_worker_relax(workerid); free(task); } _STARPU_DEBUG("worker %u stopped\n", workerid); return 0; } /* Wait for completion of all asynchronous tasks for this worker */ void _starpu_simgrid_wait_tasks(int workerid) { struct task *task = worker_runner[workerid].last_task; if (!task) return; unsigned *finished = task->finished; starpu_pthread_wait_t wait; starpu_pthread_wait_init(&wait); starpu_pthread_queue_register(&wait, &_starpu_simgrid_task_queue[workerid]); while(1) { starpu_pthread_wait_reset(&wait); if (*finished) break; starpu_pthread_wait_wait(&wait); } starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_task_queue[workerid]); starpu_pthread_wait_destroy(&wait); } /* Task execution submitted by StarPU */ void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, double length, double energy, unsigned *finished) { struct starpu_task *starpu_task = j->task; double flops; #if !(defined(HAVE_SG_ACTOR_SELF_EXECUTE) || defined(HAVE_SG_ACTOR_EXECUTE)) msg_task_t simgrid_task; #endif if (j->internal) /* This is not useful to include in simulation (and probably * doesn't have a perfmodel anyway) */ return; if (isnan(length)) { length = starpu_task_worker_expected_length(starpu_task, workerid, sched_ctx_id, j->nimpl); if (STARPU_UNLIKELY(_STARPU_IS_ZERO(length) || isnan(length))) { fprintf(stderr, "Codelet %s does not have a perfmodel, or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated, or fix the STARPU_HOSTNAME and STARPU_PERF_MODEL_DIR environment variables\n", _starpu_job_get_model_name(j)); _exit(EXIT_FAILURE); } /* TODO: option to add variance according to performance model, * to be able to easily check scheduling robustness */ } if (isnan(energy)) { energy = starpu_task_worker_expected_energy(starpu_task, workerid, sched_ctx_id, j->nimpl); /* TODO: option to add variance according to performance model, * to be able to easily check scheduling robustness */ } #ifdef HAVE_SG_HOST_GET_SPEED flops = length/1000000.0*sg_host_get_speed(sg_host_self()); #else #if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed) # if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self) flops = length/1000000.0*sg_host_speed(sg_host_self()); # else flops = length/1000000.0*sg_host_speed(MSG_host_self()); # endif #elif defined HAVE_MSG_HOST_GET_SPEED || defined(MSG_host_get_speed) flops = length/1000000.0*MSG_host_get_speed(MSG_host_self()); #else flops = length/1000000.0*MSG_get_host_speed(MSG_host_self()); #endif #endif #if !(defined(HAVE_SG_ACTOR_SELF_EXECUTE) || defined(HAVE_SG_ACTOR_EXECUTE)) simgrid_task = MSG_task_create(_starpu_job_get_task_name(j), flops, 0, NULL); #endif if (finished == NULL) { /* Synchronous execution */ /* First wait for previous tasks */ _starpu_simgrid_wait_tasks(workerid); #ifdef HAVE_SG_ACTOR_EXECUTE sg_actor_execute(flops); #elif defined(HAVE_SG_ACTOR_SELF_EXECUTE) sg_actor_self_execute(flops); #else MSG_task_execute(simgrid_task); MSG_task_destroy(simgrid_task); #endif starpu_energy_use(energy); } else { /* Asynchronous execution */ struct task *task; struct worker_runner *w = &worker_runner[workerid]; _STARPU_MALLOC(task, sizeof(*task)); #if defined(HAVE_SG_ACTOR_SELF_EXECUTE) || defined(HAVE_SG_ACTOR_EXECUTE) task->flops = flops; #else task->task = simgrid_task; #endif task->energy = energy; task->finished = finished; *finished = 0; task->next = NULL; /* Sleep 10µs for the GPU task queueing */ if (_starpu_simgrid_cuda_queue_cost()) starpu_sleep(0.000010); if (w->last_task) { /* Already running a task, queue */ w->last_task->next = task; w->last_task = task; } else { STARPU_ASSERT(!w->first_task); w->first_task = task; w->last_task = task; } starpu_sem_post(&w->sem); } } /* * Transfers */ /* Note: simgrid is not parallel, so there is no need to hold locks for management of transfers. */ LIST_TYPE(transfer, #if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) size_t size; #else msg_task_t task; #endif int src_node; int dst_node; int run_node; /* communication termination signalization */ unsigned *finished; /* transfers which wait for this transfer */ struct transfer **wake; unsigned nwake; /* Number of transfers that this transfer waits for */ unsigned nwait; /* Next transfer on this stream */ struct transfer *next; ) static struct transfer_list pending; /* Tell for two transfers whether they should be handled in sequence */ static int transfers_are_sequential(struct transfer *new_transfer, struct transfer *old_transfer) { int new_is_cuda STARPU_ATTRIBUTE_UNUSED, old_is_cuda STARPU_ATTRIBUTE_UNUSED; int new_is_opencl STARPU_ATTRIBUTE_UNUSED, old_is_opencl STARPU_ATTRIBUTE_UNUSED; int new_is_gpu_gpu, old_is_gpu_gpu; new_is_cuda = starpu_node_get_kind(new_transfer->src_node) == STARPU_CUDA_RAM; new_is_cuda |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_CUDA_RAM; old_is_cuda = starpu_node_get_kind(old_transfer->src_node) == STARPU_CUDA_RAM; old_is_cuda |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_CUDA_RAM; new_is_opencl = starpu_node_get_kind(new_transfer->src_node) == STARPU_OPENCL_RAM; new_is_opencl |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_OPENCL_RAM; old_is_opencl = starpu_node_get_kind(old_transfer->src_node) == STARPU_OPENCL_RAM; old_is_opencl |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_OPENCL_RAM; new_is_gpu_gpu = new_transfer->src_node && new_transfer->dst_node; old_is_gpu_gpu = old_transfer->src_node && old_transfer->dst_node; /* We ignore cuda-opencl transfers, they can not happen */ STARPU_ASSERT(!((new_is_cuda && old_is_opencl) || (old_is_cuda && new_is_opencl))); /* The following constraints have been observed with CUDA alone */ /* Same source/destination, sequential */ if (new_transfer->src_node == old_transfer->src_node && new_transfer->dst_node == old_transfer->dst_node) return 1; /* Crossed GPU-GPU, sequential */ if (new_is_gpu_gpu && new_transfer->src_node == old_transfer->dst_node && old_transfer->src_node == new_transfer->dst_node) return 1; /* GPU-GPU transfers are sequential with any RAM->GPU transfer */ if (new_is_gpu_gpu && (old_transfer->dst_node == new_transfer->src_node || old_transfer->dst_node == new_transfer->dst_node)) return 1; if (old_is_gpu_gpu && (new_transfer->dst_node == old_transfer->src_node || new_transfer->dst_node == old_transfer->dst_node)) return 1; /* StarPU's constraint on CUDA transfers is using one stream per * source/destination pair, which is already handled above */ return 0; } static void transfer_queue(struct transfer *transfer) { unsigned src = transfer->src_node; unsigned dst = transfer->dst_node; struct transfer_runner *t = &transfer_runner[src][dst]; if (!t->runner) { /* No runner yet, start it */ static starpu_pthread_mutex_t mutex; /* process_create may yield */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (!t->runner) { char s[64]; snprintf(s, sizeof(s), "transfer %u-%u runner", src, dst); starpu_pthread_create_on(s, &t->runner, NULL, transfer_execute, (void*)(uintptr_t)((src<<16) + dst), _starpu_simgrid_get_memnode_host(src)); starpu_sem_init(&t->sem, 0, 0); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } if (t->last_transfer) { /* Already running a transfer, queue */ t->last_transfer->next = transfer; t->last_transfer = transfer; } else { STARPU_ASSERT(!t->first_transfer); t->first_transfer = transfer; t->last_transfer = transfer; } starpu_sem_post(&t->sem); } /* Actually execute the transfer, and then start transfers waiting for this one. */ static void *transfer_execute(void *arg) { unsigned src_dst = (uintptr_t) arg; unsigned src = src_dst >> 16; unsigned dst = src_dst & 0xffff; struct transfer_runner *t = &transfer_runner[src][dst]; _STARPU_DEBUG("transfer runner %u-%u started\n", src, dst); while (1) { struct transfer *transfer; starpu_sem_wait(&t->sem); if (!runners_running) break; transfer = t->first_transfer; t->first_transfer = transfer->next; if (t->last_transfer == transfer) t->last_transfer = NULL; #if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) if (transfer->size) #else if (transfer->task) #endif { _STARPU_DEBUG("transfer %p started\n", transfer); #if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) #ifdef HAVE_SG_HOST_SENDTO sg_host_sendto #else sg_host_send_to #endif (_starpu_simgrid_memory_node_get_host(transfer->src_node), _starpu_simgrid_memory_node_get_host(transfer->dst_node), transfer->size); #else MSG_task_execute(transfer->task); MSG_task_destroy(transfer->task); #endif _STARPU_DEBUG("transfer %p finished\n", transfer); } *transfer->finished = 1; transfer_list_erase(&pending, transfer); /* The workers which started this request may be sleeping out of tasks, wake it */ _starpu_wake_all_blocked_workers_on_node(transfer->run_node); unsigned i; /* Wake transfers waiting for my termination */ /* Note: due to possible preemption inside process_create, the array * may grow while doing this */ for (i = 0; i < transfer->nwake; i++) { struct transfer *wake = transfer->wake[i]; STARPU_ASSERT(wake->nwait > 0); wake->nwait--; if (!wake->nwait) { _STARPU_DEBUG("triggering transfer %p\n", wake); transfer_queue(wake); } } free(transfer->wake); free(transfer); } return 0; } /* Look for sequentialization between this transfer and pending transfers, and submit this one */ static void transfer_submit(struct transfer *transfer) { struct transfer *old; for (old = transfer_list_begin(&pending); old != transfer_list_end(&pending); old = transfer_list_next(old)) { if (transfers_are_sequential(transfer, old)) { _STARPU_DEBUG("transfer %p(%d->%d) waits for %p(%d->%d)\n", transfer, transfer->src_node, transfer->dst_node, old, old->src_node, old->dst_node); /* Make new wait for the old */ transfer->nwait++; /* Make old wake the new */ _STARPU_REALLOC(old->wake, (old->nwake + 1) * sizeof(old->wake)); old->wake[old->nwake] = transfer; old->nwake++; } } transfer_list_push_front(&pending, transfer); if (!transfer->nwait) { _STARPU_DEBUG("transfer %p waits for nobody, starting\n", transfer); transfer_queue(transfer); } } int _starpu_simgrid_wait_transfer_event(void *_event) { struct _starpu_simgrid_event *event = _event; /* this is not associated to a request so it's synchronous */ starpu_pthread_wait_t wait; starpu_pthread_wait_init(&wait); starpu_pthread_queue_register(&wait, event->queue); while(1) { starpu_pthread_wait_reset(&wait); if (event->finished) break; starpu_pthread_wait_wait(&wait); } starpu_pthread_queue_unregister(&wait, event->queue); starpu_pthread_wait_destroy(&wait); return 0; } int _starpu_simgrid_test_transfer_event(void *_event) { struct _starpu_simgrid_event *event = _event; return event->finished; } /* Wait for completion of all transfers */ static void _starpu_simgrid_wait_transfers(void) { unsigned finished = 0; struct transfer *sync = transfer_new(); struct transfer *cur; #if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) sync->size = 0; #else sync->task = NULL; #endif sync->finished = &finished; sync->src_node = STARPU_MAIN_RAM; sync->dst_node = STARPU_MAIN_RAM; sync->run_node = STARPU_MAIN_RAM; sync->wake = NULL; sync->nwake = 0; sync->nwait = 0; sync->next = NULL; for (cur = transfer_list_begin(&pending); cur != transfer_list_end(&pending); cur = transfer_list_next(cur)) { sync->nwait++; _STARPU_REALLOC(cur->wake, (cur->nwake + 1) * sizeof(cur->wake)); cur->wake[cur->nwake] = sync; cur->nwake++; } if (sync->nwait == 0) { /* No transfer to wait for */ free(sync); return; } /* Push synchronization pseudo-transfer */ transfer_list_push_front(&pending, sync); /* And wait for it */ starpu_pthread_wait_t wait; starpu_pthread_wait_init(&wait); starpu_pthread_queue_register(&wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]); while(1) { starpu_pthread_wait_reset(&wait); if (finished) break; starpu_pthread_wait_wait(&wait); } starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_transfer_queue[STARPU_MAIN_RAM]); starpu_pthread_wait_destroy(&wait); } /* Data transfer issued by StarPU */ int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req) { /* Simgrid does not like 0-bytes transfers */ if (!size) return 0; /* Explicitly disabled by user? */ if (!simgrid_transfer_cost) return 0; struct _starpu_simgrid_event *event, myevent; double start = 0.; struct transfer *transfer = transfer_new(); _STARPU_DEBUG("creating transfer %p for %lu bytes\n", transfer, (unsigned long) size); #if defined(HAVE_SG_HOST_SEND_TO) || defined(HAVE_SG_HOST_SENDTO) transfer->size = size; #else msg_task_t task; starpu_sg_host_t *hosts; double *computation; double *communication; _STARPU_CALLOC(hosts, 2, sizeof(*hosts)); _STARPU_CALLOC(computation, 2, sizeof(*computation)); _STARPU_CALLOC(communication, 4, sizeof(*communication)); hosts[0] = _starpu_simgrid_memory_node_get_host(src_node); hosts[1] = _starpu_simgrid_memory_node_get_host(dst_node); STARPU_ASSERT(hosts[0] != hosts[1]); communication[1] = size; task = MSG_parallel_task_create("copy", 2, hosts, computation, communication, NULL); transfer->task = task; #endif transfer->src_node = src_node; transfer->dst_node = dst_node; transfer->run_node = starpu_worker_get_local_memory_node(); if (req) event = _starpu_simgrid_event(&req->async_channel.event); else event = &myevent; event->finished = 0; transfer->finished = &event->finished; event->queue = &_starpu_simgrid_transfer_queue[transfer->run_node]; transfer->wake = NULL; transfer->nwake = 0; transfer->nwait = 0; transfer->next = NULL; if (req) starpu_interface_start_driver_copy_async(src_node, dst_node, &start); /* Sleep 10µs for the GPU transfer queueing */ if (_starpu_simgrid_cuda_queue_cost()) starpu_sleep(0.000010); transfer_submit(transfer); /* Note: from here, transfer might be already freed */ if (req) { starpu_interface_end_driver_copy_async(src_node, dst_node, start); starpu_interface_data_copy(src_node, dst_node, size); return -EAGAIN; } else { /* this is not associated to a request so it's synchronous */ _starpu_simgrid_wait_transfer_event(event); return 0; } } /* Sync all GPUs (used on CUDA Free, typically) */ void _starpu_simgrid_sync_gpus(void) { _starpu_simgrid_wait_transfers(); } _starpu_simgrid_main_ret _starpu_simgrid_thread_start(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[]) { void *(*f)(void*) = (void*) (uintptr_t) strtol(argv[0], NULL, 16); void *arg = (void*) (uintptr_t) strtol(argv[1], NULL, 16); /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */ starpu_sleep(0.000001); _starpu_simgrid_actor_setup(); /* _args is freed with process context */ f(arg); _STARPU_SIMGRID_MAIN_RETURN; } starpu_pthread_t _starpu_simgrid_actor_create(const char *name, xbt_main_func_t code, starpu_sg_host_t host, int argc, char *argv[]) { void **tsd; starpu_pthread_t actor; _STARPU_CALLOC(tsd, MAX_TSD+1, sizeof(void*)); #ifdef HAVE_SG_ACTOR_INIT actor = sg_actor_init(name, host); #ifdef HAVE_SG_ACTOR_SET_DATA sg_actor_set_data(actor, tsd); #else sg_actor_data_set(actor, tsd); #endif sg_actor_start(actor, code, argc, argv); #else actor = MSG_process_create_with_arguments(name, code, tsd, host, argc, argv); #ifdef HAVE_SG_ACTOR_DATA #ifdef HAVE_SG_ACTOR_SET_DATA sg_actor_set_data(actor, tsd); #else sg_actor_data_set(actor, tsd); #endif #endif #endif return actor; } starpu_sg_host_t _starpu_simgrid_get_memnode_host(unsigned node) { const char *fmt; char name[16]; switch (starpu_node_get_kind(node)) { case STARPU_CPU_RAM: /* We do not specify %u as NUMA effects are not taken into account */ fmt = "RAM"; break; case STARPU_CUDA_RAM: fmt = "CUDA%u"; break; case STARPU_OPENCL_RAM: fmt = "OpenCL%u"; break; case STARPU_DISK_RAM: fmt = "DISK%u"; break; default: STARPU_ABORT(); break; } snprintf(name, sizeof(name), fmt, starpu_memory_node_get_devid(node)); return _starpu_simgrid_get_host_by_name(name); } void _starpu_simgrid_count_ngpus(void) { #if (defined(HAVE_SG_LINK_GET_NAME) || defined(HAVE_SG_LINK_NAME) || defined sg_link_name) && (SIMGRID_VERSION >= 31300) unsigned src, dst; starpu_sg_host_t ramhost = _starpu_simgrid_get_host_by_name("RAM"); /* For each pair of memory nodes, get the route */ for (src = 1; src < STARPU_MAXNODES; src++) for (dst = 1; dst < STARPU_MAXNODES; dst++) { int busid; starpu_sg_host_t srchost, dsthost; #if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) const_sg_link_t *routes; const_sg_link_t link; #else xbt_dynar_t route_dynar = xbt_dynar_new(sizeof(starpu_sg_link_t), NULL); starpu_sg_link_t link; #endif int i, routesize; int through; unsigned src2; unsigned ngpus; const char *name; if (dst == src) continue; busid = starpu_bus_get_id(src, dst); if (busid == -1) continue; srchost = _starpu_simgrid_get_memnode_host(src); dsthost = _starpu_simgrid_get_memnode_host(dst); #if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) || defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route) #if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) routes = sg_host_get_route_links(srchost, dsthost, &routesize); #elif defined(HAVE_SG_HOST_GET_ROUTE) sg_host_get_route(srchost, dsthost, route_dynar); routesize = xbt_dynar_length(route_dynar); #else sg_host_route(srchost, dsthost, route_dynar); routesize = xbt_dynar_length(route_dynar); #endif #else const starpu_sg_link_t *route = SD_route_get_list(srchost, dsthost); routesize = SD_route_get_size(srchost, dsthost); for (i = 0; i < routesize; i++) xbt_dynar_push(route_dynar, &route[i]); free(route); #endif if (routesize == 1) { /* Direct link! no need to count anything */ starpu_bus_set_ngpus(busid, 1); continue; } /* If it goes through "Host", do not care, there is no * direct transfer support */ for (i = 0; i < routesize; i++) { #ifdef HAVE_SG_HOST_GET_ROUTE_LINKS link = routes[i]; #else xbt_dynar_get_cpy(route_dynar, i, &link); #endif if ( #ifdef HAVE_SG_LINK_GET_NAME !strcmp(sg_link_get_name(link), "Host") #else !strcmp(sg_link_name(link), "Host") #endif ) break; } if (i < routesize) continue; /* Get the PCI bridge between down and up links */ through = -1; for (i = 0; i < routesize; i++) { #ifdef HAVE_SG_HOST_GET_ROUTE_LINKS link = routes[i]; #else xbt_dynar_get_cpy(route_dynar, i, &link); #endif #ifdef HAVE_SG_LINK_GET_NAME name = sg_link_get_name(link); #else name = sg_link_name(link); #endif size_t len = strlen(name); if (!strcmp(" through", name+len-8)) through = i; else if (!strcmp(" up", name+len-3)) break; } /* Didn't find it ?! */ if (through == -1) { _STARPU_DISP("Didn't find through-link for %d->%d\n", src, dst); continue; } #ifdef HAVE_SG_HOST_GET_ROUTE_LINKS link = routes[through]; #else xbt_dynar_get_cpy(route_dynar, through, &link); #endif #ifdef HAVE_SG_LINK_GET_NAME name = sg_link_get_name(link); #else name = sg_link_name(link); #endif /* * count how many direct routes go through it between * GPUs and RAM */ ngpus = 0; for (src2 = 1; src2 < STARPU_MAXNODES; src2++) { int numa; int nnumas = starpu_memory_nodes_get_numa_count(); int found = 0; for (numa = 0; numa < nnumas; numa++) if (starpu_bus_get_id(src2, numa) != -1) { found = 1; break; } if (!found) continue; starpu_sg_host_t srchost2 = _starpu_simgrid_get_memnode_host(src2); int routesize2; #if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) const_sg_link_t *routes2; #else xbt_dynar_t route_dynar2 = xbt_dynar_new(sizeof(starpu_sg_link_t), NULL); #endif #if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) || defined(HAVE_SG_HOST_GET_ROUTE) || defined(HAVE_SG_HOST_ROUTE) || defined(sg_host_route) #if defined(HAVE_SG_HOST_GET_ROUTE_LINKS) routes2 = sg_host_get_route_links(srchost2, ramhost, &routesize2); #elif defined(HAVE_SG_HOST_GET_ROUTE) sg_host_get_route(srchost2, ramhost, route_dynar2); routesize2 = xbt_dynar_length(route_dynar2); #else sg_host_route(srchost2, ramhost, route_dynar2); routesize2 = xbt_dynar_length(route_dynar2); #endif #else const starpu_sg_link_t *route2 = SD_route_get_list(srchost2, ramhost); routesize2 = SD_route_get_size(srchost2, ramhost); for (i = 0; i < routesize2; i++) xbt_dynar_push(route_dynar2, &route2[i]); free(route2); #endif for (i = 0; i < routesize2; i++) { #ifdef HAVE_SG_HOST_GET_ROUTE_LINKS link = routes2[i]; #else xbt_dynar_get_cpy(route_dynar2, i, &link); #endif if ( #ifdef HAVE_SG_LINK_GET_NAME !strcmp(name, sg_link_get_name(link)) #else !strcmp(name, sg_link_name(link)) #endif ) { /* This GPU goes through this PCI bridge to access RAM */ ngpus++; break; } } } _STARPU_DEBUG("%d->%d through %s, %u GPUs\n", src, dst, name, ngpus); starpu_bus_set_ngpus(busid, ngpus); } #endif } #if 0 static size_t used; void _starpu_simgrid_data_new(size_t size) { // Note: this is just declarative //_STARPU_DISP("data new: %zd, now %zd\n", size, used); } void _starpu_simgrid_data_increase(size_t size) { used += size; _STARPU_DISP("data increase: %zd, now %zd\n", size, used); } void _starpu_simgrid_data_alloc(size_t size) { used += size; _STARPU_DISP("data alloc: %zd, now %zd\n", size, used); } void _starpu_simgrid_data_free(size_t size) { used -= size; _STARPU_DISP("data free: %zd, now %zd\n", size, used); } void _starpu_simgrid_data_transfer(size_t size, unsigned src_node, unsigned dst_node) { _STARPU_DISP("data transfer %zd from %u to %u\n", size, src_node, dst_node); } #endif void starpu_energy_use(float joules) { _starpu_simgrid_dynamic_energy += joules; } double starpu_energy_used(void) { float idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); return _starpu_simgrid_dynamic_energy + idle_power * starpu_timing_now() / 1000000; } #endif starpu-1.4.9+dfsg/src/core/simgrid.h000066400000000000000000000146571507764646700173530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SIMGRID_H__ #define __SIMGRID_H__ /** @file */ /* Note: when changing something here, update the include list in configure.ac * in the part that tries to enable stdc++11 */ #ifdef STARPU_SIMGRID #ifdef STARPU_HAVE_SIMGRID_MSG_H #include #elif defined(STARPU_HAVE_MSG_MSG_H) #include #endif #ifdef STARPU_HAVE_XBT_BASE_H #include #endif #ifdef STARPU_HAVE_SIMGRID_VERSION_H #include #endif #ifdef STARPU_HAVE_SIMGRID_ZONE_H #include #endif #ifdef STARPU_HAVE_SIMGRID_HOST_H #include #endif #if defined(HAVE_SIMGRID_SIMDAG_H) && (SIMGRID_VERSION >= 31300) #include #endif #include #endif #ifdef __cplusplus extern "C" { #endif #ifdef STARPU_SIMGRID #pragma GCC visibility push(hidden) struct _starpu_pthread_args { void *(*f)(void*); void *arg; }; #if (SIMGRID_VERSION >= 32600) typedef void _starpu_simgrid_main_ret; #define _STARPU_SIMGRID_MAIN_RETURN do { } while (0) #else typedef int _starpu_simgrid_main_ret; #define _STARPU_SIMGRID_MAIN_RETURN return 0 #endif #if (SIMGRID_VERSION >= 31500) && (SIMGRID_VERSION != 31559) typedef sg_link_t starpu_sg_link_t; #else typedef SD_link_t starpu_sg_link_t; #endif _starpu_simgrid_main_ret _starpu_simgrid_thread_start(int argc, char *argv[]); #define MAX_TSD 16 #define STARPU_MPI_AS_PREFIX "StarPU-MPI" #define _starpu_simgrid_running_smpi() (getenv("SMPI_GLOBAL_SIZE") != NULL) void _starpu_start_simgrid(int *argc, char **argv); void _starpu_simgrid_init_early(int *argc, char ***argv); void _starpu_simgrid_init(void); void _starpu_simgrid_cpp_init(void); void _starpu_simgrid_deinit(void); void _starpu_simgrid_deinit_late(void); void _starpu_simgrid_actor_setup(void); void _starpu_simgrid_wait_tasks(int workerid); struct _starpu_job; void _starpu_simgrid_submit_job(int workerid, int sched_ctx_id, struct _starpu_job *job, struct starpu_perfmodel_arch* perf_arch, double length, double energy, unsigned *finished); struct _starpu_data_request; int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req); int _starpu_simgrid_wait_transfer_event(void *event); int _starpu_simgrid_test_transfer_event(void *event); void _starpu_simgrid_sync_gpus(void); /** Return the number of hosts prefixed by PREFIX */ int _starpu_simgrid_get_nbhosts(const char *prefix); unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devid); const char *_starpu_simgrid_get_devname(const char *prefix, unsigned devid); starpu_sg_host_t _starpu_simgrid_get_host_by_name(const char *name); starpu_sg_host_t _starpu_simgrid_get_memnode_host(unsigned node); struct _starpu_worker; starpu_sg_host_t _starpu_simgrid_get_host_by_worker(struct _starpu_worker *worker); void _starpu_simgrid_get_platform_path(int version, char *path, size_t maxlen); #if defined(HAVE_SG_ZONE_GET_BY_NAME) || defined(sg_zone_get_by_name) sg_netzone_t _starpu_simgrid_get_as_by_name(const char *name); #else msg_as_t _starpu_simgrid_get_as_by_name(const char *name); #endif #pragma weak starpu_mpi_world_rank extern int starpu_mpi_world_rank(void); #pragma weak _starpu_mpi_simgrid_init int _starpu_mpi_simgrid_init(int argc, char *argv[]); extern starpu_pthread_queue_t _starpu_simgrid_transfer_queue[STARPU_MAXNODES]; extern starpu_pthread_queue_t _starpu_simgrid_task_queue[STARPU_NMAXWORKERS]; #ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB extern starpu_pthread_mutex_t _starpu_simgrid_time_advance_mutex; extern starpu_pthread_cond_t _starpu_simgrid_time_advance_cond; #endif #define _starpu_simgrid_cuda_malloc_cost() starpu_getenv_number_default("STARPU_SIMGRID_CUDA_MALLOC_COST", 1) #define _starpu_simgrid_cuda_queue_cost() starpu_getenv_number_default("STARPU_SIMGRID_CUDA_QUEUE_COST", 1) #define _starpu_simgrid_task_submit_cost() starpu_getenv_number_default("STARPU_SIMGRID_TASK_SUBMIT_COST", 1) #define _starpu_simgrid_task_push_cost() starpu_getenv_number_default("STARPU_SIMGRID_TASK_PUSH_COST", 1) #define _starpu_simgrid_fetching_input_cost() starpu_getenv_number_default("STARPU_SIMGRID_FETCHING_INPUT_COST", 1) #define _starpu_simgrid_sched_cost() starpu_getenv_number_default("STARPU_SIMGRID_SCHED_COST", 0) /** Called at initialization to count how many GPUs are interfering with each * bus */ void _starpu_simgrid_count_ngpus(void); extern size_t _starpu_default_stack_size; void _starpu_simgrid_set_stack_size(size_t stack_size); void _starpu_simgrid_xbt_thread_create(const char *name, starpu_pthread_attr_t *attr, void_f_pvoid_t code, void *param); #define _SIMGRID_TIMER_BEGIN(cond) \ { \ xbt_os_timer_t __timer = NULL; \ if (cond) { \ __timer = xbt_os_timer_new(); \ xbt_os_threadtimer_start(__timer); \ } #define _SIMGRID_TIMER_END \ if (__timer) { \ xbt_os_threadtimer_stop(__timer); \ starpu_sleep(xbt_os_timer_elapsed(__timer));\ xbt_os_timer_free(__timer); \ } \ } #pragma GCC visibility pop #else // !STARPU_SIMGRID #define _SIMGRID_TIMER_BEGIN(cond) { #define _SIMGRID_TIMER_END } #endif /** Experimental functions for OOC stochastic analysis */ /* disk <-> MAIN_RAM only */ #if defined(STARPU_SIMGRID) && 0 void _starpu_simgrid_data_new(size_t size); void _starpu_simgrid_data_increase(size_t size); void _starpu_simgrid_data_alloc(size_t size); void _starpu_simgrid_data_free(size_t size); void _starpu_simgrid_data_transfer(size_t size, unsigned src_node, unsigned dst_node); #else #define _starpu_simgrid_data_new(size) (void)0 #define _starpu_simgrid_data_increase(size) (void)0 #define _starpu_simgrid_data_alloc(size) (void)0 #define _starpu_simgrid_data_free(size) (void)0 #define _starpu_simgrid_data_transfer(size, src_node, dst_node) (void)0 #endif #ifdef __cplusplus } #endif #endif // __SIMGRID_H__ starpu-1.4.9+dfsg/src/core/simgrid_cpp.cpp000066400000000000000000000117311507764646700205360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef STARPU_SIMGRID #include #include #if SIMGRID_VERSION >= 32190 #include #else #include #endif #include #ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB #include #endif #ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB starpu_pthread_mutex_t _starpu_simgrid_time_advance_mutex; starpu_pthread_cond_t _starpu_simgrid_time_advance_cond; #endif void _starpu_simgrid_cpp_init(void) { #ifdef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB STARPU_PTHREAD_MUTEX_INIT(&_starpu_simgrid_time_advance_mutex, NULL); STARPU_PTHREAD_COND_INIT(&_starpu_simgrid_time_advance_cond, NULL); simgrid::s4u::Engine::on_time_advance_cb([](double) { STARPU_PTHREAD_COND_BROADCAST(&_starpu_simgrid_time_advance_cond); }); #endif } /* thread_create function which implements inheritance of MPI privatization */ /* See https://github.com/simgrid/simgrid/issues/139 */ typedef struct { void_f_pvoid_t code; void *userparam; #if SIMGRID_VERSION < 32501 void *father_data; #endif } thread_data_t; #if SIMGRID_VERSION >= 32501 static void *_starpu_simgrid_xbt_thread_create_wrapper(void *arg) { thread_data_t *t = (thread_data_t *) arg; /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */ starpu_sleep(0.000001); #ifdef HAVE_SMPI_THREAD_CREATE /* Make this actor inherit SMPI data from father actor */ SMPI_thread_create(); #endif t->code(t->userparam); free(t); return NULL; } #else #if SIMGRID_VERSION >= 32190 static void _starpu_simgrid_xbt_thread_create_wrapper(void) #else static int _starpu_simgrid_xbt_thread_create_wrapper(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED) #endif { /* FIXME: Ugly work-around for bug in simgrid: the MPI context is not properly set at MSG process startup */ starpu_sleep(0.000001); #ifdef HAVE_SMX_ACTOR_T smx_actor_t #else smx_process_t #endif self = SIMIX_process_self(); #if SIMGRID_VERSION < 31300 thread_data_t *t = (thread_data_t *) SIMIX_process_self_get_data(self); #else thread_data_t *t = (thread_data_t *) SIMIX_process_self_get_data(); #endif simcall_process_set_data(self, t->father_data); t->code(t->userparam); simcall_process_set_data(self, NULL); free(t); #if SIMGRID_VERSION < 32190 return 0; #endif } #endif void _starpu_simgrid_xbt_thread_create(const char *name, starpu_pthread_attr_t *attr, void_f_pvoid_t code, void *param) { #if SIMGRID_VERSION >= 32501 starpu_pthread_t t; thread_data_t *res = (thread_data_t *) malloc(sizeof(thread_data_t)); res->userparam = param; res->code = code; starpu_pthread_create_on(name, &t, attr, _starpu_simgrid_xbt_thread_create_wrapper, res, sg_host_self()); #else if (attr && attr->stacksize) _starpu_simgrid_set_stack_size(attr->stacksize); #if SIMGRID_VERSION >= 32190 || defined(HAVE_SIMCALL_PROCESS_CREATE) || defined(simcall_process_create) #ifdef HAVE_SMX_ACTOR_T smx_actor_t process STARPU_ATTRIBUTE_UNUSED; #else smx_process_t process STARPU_ATTRIBUTE_UNUSED; #endif thread_data_t *res = (thread_data_t *) malloc(sizeof(thread_data_t)); res->userparam = param; res->code = code; #if SIMGRID_VERSION < 31300 res->father_data = SIMIX_process_self_get_data(SIMIX_process_self()); #else res->father_data = SIMIX_process_self_get_data(); #endif #if SIMGRID_VERSION < 31200 simcall_process_create(&process, #else process = simcall_process_create( #endif name, _starpu_simgrid_xbt_thread_create_wrapper, res, #if SIMGRID_VERSION < 31400 SIMIX_host_self_get_name(), #else # if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self) sg_host_self(), # else SIMIX_host_self(), # endif #endif #if SIMGRID_VERSION < 31500 || SIMGRID_VERSION == 31559 -1.0, #endif #if SIMGRID_VERSION < 32190 0, NULL, #endif /*props */ NULL #if SIMGRID_VERSION < 31500 || SIMGRID_VERSION == 31559 , 0 #endif ); #else STARPU_ABORT_MSG("Can't run StarPU-Simgrid-MPI with a Simgrid version which does not provide simcall_process_create and does not fix https://github.com/simgrid/simgrid/issues/139 , sorry."); #endif if (attr && attr->stacksize) _starpu_simgrid_set_stack_size(_starpu_default_stack_size); #endif } #endif starpu-1.4.9+dfsg/src/core/task.c000066400000000000000000002030361507764646700166410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011 Télécom Sud Paris * Copyright (C) 2013 Thibaut Lambert * Copyright (C) 2016 Uppsala University * Copyright (C) 2017 Erwan Leria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_HAVE_WINDOWS #include #endif /* global counters */ static int __g_total_submitted; static int __g_peak_submitted; static int __g_peak_ready; /* global counter variables */ starpu_perf_counter_int64_t _starpu_task__g_total_submitted__value; starpu_perf_counter_int64_t _starpu_task__g_peak_submitted__value; starpu_perf_counter_int64_t _starpu_task__g_current_submitted__value; starpu_perf_counter_int64_t _starpu_task__g_peak_ready__value; starpu_perf_counter_int64_t _starpu_task__g_current_ready__value; /* per-worker counters */ static int __w_total_executed; static int __w_cumul_execution_time; /* per-codelet counters */ static int __c_total_submitted; static int __c_peak_submitted; static int __c_peak_ready; static int __c_total_executed; static int __c_cumul_execution_time; /* - */ /* per-scheduler knobs */ static int __s_max_priority_cap_knob; static int __s_min_priority_cap_knob; /* knob variables */ static int __s_max_priority_cap__value; static int __s_min_priority_cap__value; static struct starpu_perf_knob_group * __kg_starpu_task__per_scheduler; /* - */ static void global_sample_updater(struct starpu_perf_counter_sample *sample, void *context) { STARPU_ASSERT(context == NULL); /* no context for the global updater */ (void)context; _starpu_perf_counter_sample_set_int64_value(sample, __g_total_submitted, _starpu_task__g_total_submitted__value); _starpu_perf_counter_sample_set_int64_value(sample, __g_peak_submitted, _starpu_task__g_peak_submitted__value); _starpu_perf_counter_sample_set_int64_value(sample, __g_peak_ready, _starpu_task__g_peak_ready__value); } static void per_worker_sample_updater(struct starpu_perf_counter_sample *sample, void *context) { STARPU_ASSERT(context != NULL); struct _starpu_worker *worker = context; _starpu_perf_counter_sample_set_int64_value(sample, __w_total_executed, worker->__w_total_executed__value); _starpu_perf_counter_sample_set_double_value(sample, __w_cumul_execution_time, worker->__w_cumul_execution_time__value); } static void per_codelet_sample_updater(struct starpu_perf_counter_sample *sample, void *context) { STARPU_ASSERT(sample->listener != NULL && sample->listener->set != NULL); struct starpu_perf_counter_set *set = sample->listener->set; STARPU_ASSERT(set->scope == starpu_perf_counter_scope_per_codelet); STARPU_ASSERT(context != NULL); struct starpu_codelet *cl = context; _starpu_perf_counter_sample_set_int64_value(sample, __c_total_submitted, cl->perf_counter_values->task.total_submitted); _starpu_perf_counter_sample_set_int64_value(sample, __c_peak_submitted, cl->perf_counter_values->task.peak_submitted); _starpu_perf_counter_sample_set_int64_value(sample, __c_peak_ready, cl->perf_counter_values->task.peak_ready); _starpu_perf_counter_sample_set_int64_value(sample, __c_total_executed, cl->perf_counter_values->task.total_executed); _starpu_perf_counter_sample_set_double_value(sample, __c_cumul_execution_time, cl->perf_counter_values->task.cumul_execution_time); } void _starpu__task_c__register_counters(void) { { const enum starpu_perf_counter_scope scope = starpu_perf_counter_scope_global; __STARPU_PERF_COUNTER_REG("starpu.task", scope, g_total_submitted, int64, "number of tasks submitted globally (since StarPU initialization)"); __STARPU_PERF_COUNTER_REG("starpu.task", scope, g_peak_submitted, int64, "maximum simultaneous number of tasks submitted and not yet ready, globally (since StarPU initialization)"); __STARPU_PERF_COUNTER_REG("starpu.task", scope, g_peak_ready, int64, "maximum simultaneous number of tasks ready and not yet executing, globally (since StarPU initialization)"); _starpu_perf_counter_register_updater(scope, global_sample_updater); } { const enum starpu_perf_counter_scope scope = starpu_perf_counter_scope_per_worker; __STARPU_PERF_COUNTER_REG("starpu.task", scope, w_total_executed, int64, "number of tasks executed on this worker (since StarPU initialization)"); __STARPU_PERF_COUNTER_REG("starpu.task", scope, w_cumul_execution_time, double, "cumulated execution time of tasks executed on this worker (microseconds, since StarPU initialization)"); _starpu_perf_counter_register_updater(scope, per_worker_sample_updater); } { const enum starpu_perf_counter_scope scope = starpu_perf_counter_scope_per_codelet; __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_total_submitted, int64, "number of codelet's task instances submitted using this codelet (since enabled)"); __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_peak_submitted, int64, "maximum simultaneous number of codelet's task instances submitted and not yet ready (since enabled)"); __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_peak_ready, int64, "maximum simultaneous number of codelet's task instances ready and not yet executing (since enabled)"); __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_total_executed, int64, "number of codelet's task instances executed using this codelet (since enabled)"); __STARPU_PERF_COUNTER_REG("starpu.task", scope, c_cumul_execution_time, double, "cumulated execution time of codelet's task instances (since enabled)"); _starpu_perf_counter_register_updater(scope, per_codelet_sample_updater); } } /* - */ static void sched_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value) { const char * const sched_policy_name = *(const char **)context; (void) sched_policy_name; if (knob->id == __s_max_priority_cap_knob) { STARPU_ASSERT(value->val_int32_t <= STARPU_MAX_PRIO); STARPU_ASSERT(value->val_int32_t >= STARPU_MIN_PRIO); STARPU_ASSERT(value->val_int32_t >= __s_min_priority_cap__value); __s_max_priority_cap__value = value->val_int32_t; } else if (knob->id == __s_min_priority_cap_knob) { STARPU_ASSERT(value->val_int32_t <= STARPU_MAX_PRIO); STARPU_ASSERT(value->val_int32_t >= STARPU_MIN_PRIO); STARPU_ASSERT(value->val_int32_t <= __s_max_priority_cap__value); __s_min_priority_cap__value = value->val_int32_t; } else { STARPU_ASSERT(0); abort(); } } static void sched_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value) { const char * const sched_policy_name = *(const char **)context; (void) sched_policy_name; if (knob->id == __s_max_priority_cap_knob) { value->val_int32_t = __s_max_priority_cap__value; } else if (knob->id == __s_min_priority_cap_knob) { value->val_int32_t = __s_min_priority_cap__value; } else { STARPU_ASSERT(0); abort(); } } void _starpu__task_c__register_knobs(void) { #if 0 { const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_global; __kg_starpu_global = _starpu_perf_knob_group_register(scope, global_knobs__set, global_knobs__get); } #endif #if 0 { const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_worker; __kg_starpu_worker__per_worker = _starpu_perf_knob_group_register(scope, worker_knobs__set, worker_knobs__get); } #endif { const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_scheduler; __kg_starpu_task__per_scheduler = _starpu_perf_knob_group_register(scope, sched_knobs__set, sched_knobs__get); /* TODO: priority capping knobs actually work globally for now, the sched policy name is ignored */ __STARPU_PERF_KNOB_REG("starpu.task", __kg_starpu_task__per_scheduler, s_max_priority_cap_knob, int32, "force task priority to this value or below (priority value)"); __s_max_priority_cap__value = STARPU_MAX_PRIO; __STARPU_PERF_KNOB_REG("starpu.task", __kg_starpu_task__per_scheduler, s_min_priority_cap_knob, int32, "force task priority to this value or above (priority value)"); __s_min_priority_cap__value = STARPU_MIN_PRIO; } } void _starpu__task_c__unregister_knobs(void) { _starpu_perf_knob_group_unregister(__kg_starpu_task__per_scheduler); __kg_starpu_task__per_scheduler = NULL; } /* - */ /* XXX this should be reinitialized when StarPU is shutdown (or we should make * sure that no task remains !) */ /* TODO we could make this hierarchical to avoid contention ? */ //static starpu_pthread_cond_t submitted_cond = STARPU_PTHREAD_COND_INITIALIZER; /* This key stores the task currently handled by the thread, note that we * cannot use the worker structure to store that information because it is * possible that we have a task with a NULL codelet, which means its callback * could be executed by a user thread as well. */ static starpu_pthread_key_t current_task_key; static int limit_min_submitted_tasks; static int limit_max_submitted_tasks; static int watchdog_crash; static int watchdog_delay; /* * Function to call when watchdog detects that no task has finished for more than STARPU_WATCHDOG_TIMEOUT seconds */ static void (*watchdog_hook)(void *) = NULL; static void * watchdog_hook_arg = NULL; #define _STARPU_TASK_MAGIC 42 /* Called once at starpu_init */ void _starpu_task_init(void) { STARPU_PTHREAD_KEY_CREATE(¤t_task_key, NULL); limit_min_submitted_tasks = starpu_getenv_number("STARPU_LIMIT_MIN_SUBMITTED_TASKS"); limit_max_submitted_tasks = starpu_getenv_number("STARPU_LIMIT_MAX_SUBMITTED_TASKS"); watchdog_crash = starpu_getenv_number_default("STARPU_WATCHDOG_CRASH", 0); watchdog_delay = starpu_getenv_number_default("STARPU_WATCHDOG_DELAY", 0); } void _starpu_task_deinit(void) { STARPU_PTHREAD_KEY_DELETE(current_task_key); } void starpu_set_limit_min_submitted_tasks(int limit_min) { limit_min_submitted_tasks = limit_min; } void starpu_set_limit_max_submitted_tasks(int limit_max) { limit_max_submitted_tasks = limit_max; } void starpu_task_init(struct starpu_task *task) { /* TODO: memcpy from a template instead? benchmark it */ STARPU_ASSERT(task); /* As most of the fields must be initialised at NULL, let's put 0 * everywhere */ memset(task, 0, sizeof(struct starpu_task)); task->sequential_consistency = 1; task->where = -1; /* Now we can initialise fields which recquire custom value */ /* Note: remember to update STARPU_TASK_INITIALIZER as well */ #if STARPU_DEFAULT_PRIO != 0 task->priority = STARPU_DEFAULT_PRIO; #endif task->detach = 1; #if STARPU_TASK_INIT != 0 task->status = STARPU_TASK_INIT; #endif task->predicted = NAN; task->predicted_transfer = NAN; task->predicted_start = NAN; task->magic = _STARPU_TASK_MAGIC; task->sched_ctx = STARPU_NMAX_SCHED_CTXS; task->flops = 0.0; } /* Free all the resources allocated for a task, without deallocating the task * structure itself (this is required for statically allocated tasks). * All values previously set by the user, like codelet and handles, remain * unchanged */ void starpu_task_clean(struct starpu_task *task) { STARPU_ASSERT(task); task->magic = 0; /* If a buffer was allocated to store the profiling info, we free it. */ if (task->profiling_info) { free(task->profiling_info); task->profiling_info = NULL; } /* If case the task is (still) part of a bundle */ starpu_task_bundle_t bundle = task->bundle; if (bundle) starpu_task_bundle_remove(bundle, task); if (task->dyn_handles) { free(task->dyn_handles); task->dyn_handles = NULL; free(task->dyn_interfaces); task->dyn_interfaces = NULL; } if (task->dyn_modes) { free(task->dyn_modes); task->dyn_modes = NULL; } struct _starpu_job *j = (struct _starpu_job *)task->starpu_private; if (j) { _starpu_job_destroy(j); task->starpu_private = NULL; } } struct starpu_task * STARPU_ATTRIBUTE_MALLOC starpu_task_create(void) { struct starpu_task *task; _STARPU_MALLOC(task, sizeof(struct starpu_task)); starpu_task_init(task); /* Dynamically allocated tasks are destroyed by default */ task->destroy = 1; return task; } static struct starpu_codelet _starpu_data_sync_cl = { .where = STARPU_NOWHERE, .nbuffers = STARPU_VARIABLE_NBUFFERS }; struct starpu_task * STARPU_ATTRIBUTE_MALLOC starpu_task_create_sync(starpu_data_handle_t handle, enum starpu_data_access_mode mode) { struct starpu_task *task = starpu_task_create(); task->cl = &_starpu_data_sync_cl; STARPU_TASK_SET_HANDLE(task, handle, 0); STARPU_TASK_SET_MODE(task, mode, 0); task->nbuffers = 1; return task; } /* Free the resource allocated during starpu_task_create. This function can be * called automatically after the execution of a task by setting the "destroy" * flag of the starpu_task structure (default behaviour). Calling this function * on a statically allocated task results in an undefined behaviour. */ void _starpu_task_destroy(struct starpu_task *task) { /* If starpu_task_destroy is called in a callback, we just set the destroy flag. The task will be destroyed after the callback returns */ if (task == starpu_task_get_current() && _starpu_get_local_worker_status() & STATUS_CALLBACK) { task->destroy = 1; } else { starpu_task_clean(task); /* TODO handle the case of task with detach = 1 and destroy = 1 */ /* TODO handle the case of non terminated tasks -> assertion failure, it's too dangerous to be doing something like this */ /* Does user want StarPU release cl_arg ? */ if (task->cl_arg_free) free(task->cl_arg); /* Does user want StarPU release cl_ret ? */ if (task->cl_ret_free) free(task->cl_ret); /* Does user want StarPU release callback_arg ? */ if (task->callback_arg_free) free(task->callback_arg); /* Does user want StarPU release epilogue callback_arg ? */ if (task->epilogue_callback_arg_free) free(task->epilogue_callback_arg); /* Does user want StarPU release prologue_callback_arg ? */ if (task->prologue_callback_arg_free) free(task->prologue_callback_arg); /* Does user want StarPU release prologue_pop_arg ? */ if (task->prologue_callback_pop_arg_free) free(task->prologue_callback_pop_arg); free(task); } } void starpu_task_destroy(struct starpu_task *task) { STARPU_ASSERT(task); STARPU_ASSERT_MSG(!task->destroy || !task->detach, "starpu_task_destroy must not be called for task with destroy = 1 and detach = 1"); _starpu_task_destroy(task); } void starpu_task_set_destroy(struct starpu_task *task) { STARPU_ASSERT(task); struct _starpu_job *j = _starpu_get_job_associated_to_task(task); STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); STARPU_ASSERT_MSG(!task->destroy, "starpu_task_set_destroy must not be called for task with destroy = 1"); if (j->terminated == 2) { STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); /* It's already over, _starpu_handle_job_termination will not * destroy it, do it ourself */ _starpu_task_destroy(task); } else { /* Let _starpu_handle_job_termination destroy it */ task->destroy = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); } } int starpu_task_finished(struct starpu_task *task) { STARPU_ASSERT(task); STARPU_ASSERT_MSG(!task->detach, "starpu_task_finished can only be called on tasks with detach = 0"); return _starpu_job_finished(_starpu_get_job_associated_to_task(task)); } int starpu_task_wait(struct starpu_task *task) { _STARPU_LOG_IN(); STARPU_ASSERT(task); STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0"); if (task->detach || task->synchronous) { _STARPU_DEBUG("Task is detached or synchronous. Waiting returns immediately\n"); _STARPU_LOG_OUT_TAG("einval"); return -EINVAL; } STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait must not be called from a task or callback"); struct _starpu_job *j = _starpu_get_job_associated_to_task(task); _STARPU_TRACE_TASK_WAIT_START(j); starpu_do_schedule(); _starpu_wait_job(j); /* as this is a synchronous task, the liberation of the job structure was deferred */ if (task->destroy) _starpu_task_destroy(task); _starpu_perf_counter_update_global_sample(); _STARPU_TRACE_TASK_WAIT_END(); _STARPU_LOG_OUT(); return 0; } int starpu_task_wait_array(struct starpu_task **tasks, unsigned nb_tasks) { unsigned i; for (i = 0; i < nb_tasks; i++) { int ret = starpu_task_wait(tasks[i]); if (ret) return ret; } return 0; } #ifdef STARPU_OPENMP int _starpu_task_test_termination(struct starpu_task *task) { STARPU_ASSERT(task); STARPU_ASSERT_MSG(!task->detach, "starpu_task_wait can only be called on tasks with detach = 0"); if (task->detach || task->synchronous) { _STARPU_DEBUG("Task is detached or synchronous\n"); _STARPU_LOG_OUT_TAG("einval"); return -EINVAL; } struct _starpu_job *j = (struct _starpu_job *)task->starpu_private; int ret = _starpu_test_job_termination(j); if (ret) { if (task->destroy) _starpu_task_destroy(task); } return ret; } #endif /* NB in case we have a regenerable task, it is possible that the job was * already counted. */ int _starpu_submit_job(struct _starpu_job *j, int nodeps) { struct starpu_task *task = j->task; int ret; #ifdef STARPU_OPENMP const unsigned continuation = j->continuation; #else const unsigned continuation = 0; #endif _STARPU_LOG_IN(); /* notify bound computation of a new task */ _starpu_bound_record(j); _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx); _starpu_sched_task_submit(task); #ifdef STARPU_USE_SC_HYPERVISOR struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx); if(sched_ctx != NULL && j->task->sched_ctx != _starpu_get_initial_sched_ctx()->id && j->task->sched_ctx != STARPU_NMAX_SCHED_CTXS && sched_ctx->perf_counters != NULL) { struct starpu_perfmodel_arch arch; _STARPU_MALLOC(arch.devices, sizeof(struct starpu_perfmodel_device)); arch.ndevices = 1; arch.devices[0].type = STARPU_CPU_WORKER; arch.devices[0].devid = 0; arch.devices[0].ncores = 1; _starpu_compute_buffers_footprint(j->task->cl->model, &arch, 0, j); free(arch.devices); size_t data_size = 0; if (j->task->cl) { unsigned i, nbuffers = STARPU_TASK_GET_NBUFFERS(j->task); for(i = 0; i < nbuffers; i++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); if (handle != NULL) data_size += _starpu_data_get_size(handle); } } _STARPU_TRACE_HYPERVISOR_BEGIN(); sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size); _STARPU_TRACE_HYPERVISOR_END(); } #endif//STARPU_USE_SC_HYPERVISOR /* We retain handle reference count */ if (task->cl && !continuation) { unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (i=0; iheader_lock); handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); } } STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); _starpu_handle_job_submission(j); #ifdef STARPU_OPENMP if (continuation) { j->discontinuous = 1; j->continuation = 0; } #endif if (nodeps) { ret = _starpu_take_deps_and_schedule(j); } else { #ifdef STARPU_OPENMP if (continuation) { ret = _starpu_reenforce_task_deps_and_schedule(j); } else #endif { ret = _starpu_enforce_deps_and_schedule(j); } } _STARPU_LOG_OUT(); return ret; } /* Note: this is racy, so valgrind would complain. But since we'll always put * the same values, this is not a problem. */ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl) { if (!cl) return; if (cl->checked) { STARPU_RMB(); return; } uint32_t where = cl->where; int is_where_unset = where == 0; unsigned i, some_impl; /* Check deprecated and unset fields (where, _func, * _funcs) */ #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) /* CPU */ if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS && cl->cpu_funcs[0]) { _STARPU_DISP("[warning] [struct starpu_codelet] both cpu_func and cpu_funcs are set. Ignoring cpu_func.\n"); cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS; } if (cl->cpu_func && cl->cpu_func != STARPU_MULTIPLE_CPU_IMPLEMENTATIONS) { cl->cpu_funcs[0] = cl->cpu_func; cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS; } some_impl = 0; for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (cl->cpu_funcs[i]) { some_impl = 1; break; } if (some_impl && cl->cpu_func == 0) { cl->cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS; } if (some_impl && is_where_unset) { where |= STARPU_CPU; } #endif #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) /* CUDA */ if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS && cl->cuda_funcs[0]) { _STARPU_DISP("[warning] [struct starpu_codelet] both cuda_func and cuda_funcs are set. Ignoring cuda_func.\n"); cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS; } if (cl->cuda_func && cl->cuda_func != STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS) { cl->cuda_funcs[0] = cl->cuda_func; cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS; } some_impl = 0; for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (cl->cuda_funcs[i]) { some_impl = 1; break; } if (some_impl && cl->cuda_func == 0) { cl->cuda_func = STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS; } if (some_impl && is_where_unset) { where |= STARPU_CUDA; } #endif #if defined(STARPU_USE_HIP) some_impl = 0; for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (cl->hip_funcs[i]) { some_impl = 1; break; } if (some_impl && is_where_unset) { where |= STARPU_HIP; } #endif #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) /* OpenCL */ if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS && cl->opencl_funcs[0]) { _STARPU_DISP("[warning] [struct starpu_codelet] both opencl_func and opencl_funcs are set. Ignoring opencl_func.\n"); cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS; } if (cl->opencl_func && cl->opencl_func != STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS) { cl->opencl_funcs[0] = cl->opencl_func; cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS; } some_impl = 0; for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (cl->opencl_funcs[i]) { some_impl = 1; break; } if (some_impl && cl->opencl_func == 0) { cl->opencl_func = STARPU_MULTIPLE_OPENCL_IMPLEMENTATIONS; } if (some_impl && is_where_unset) { where |= STARPU_OPENCL; } #endif #ifdef STARPU_USE_MAX_FPGA /* FPGA */ some_impl = 0; for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (cl->max_fpga_funcs[i]) { some_impl = 1; break; } if (some_impl && is_where_unset) { where |= STARPU_MAX_FPGA; } #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE some_impl = 0; for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (cl->cpu_funcs_name[i]) { some_impl = 1; break; } if (some_impl && is_where_unset) { where |= STARPU_MPI_MS; } #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE some_impl = 0; for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (cl->cpu_funcs_name[i]) { some_impl = 1; break; } if (some_impl && is_where_unset) { where |= STARPU_TCPIP_MS; } #endif cl->where = where; STARPU_WMB(); cl->checked = 1; } void _starpu_task_check_deprecated_fields(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED) { /* None any more */ } static int _starpu_task_submit_head(struct starpu_task *task) { unsigned is_sync = task->synchronous; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); if (task->status == STARPU_TASK_STOPPED || task->status == STARPU_TASK_FINISHED) task->status = STARPU_TASK_INIT; else STARPU_ASSERT(task->status == STARPU_TASK_INIT); #ifdef STARPU_BUBBLE if ((j->task->bubble_func && j->task->bubble_func(j->task, j->task->bubble_func_arg)) || (j->task->cl && j->task->cl->bubble_func && j->task->cl->bubble_func(j->task, j->task->bubble_func_arg))) j->is_bubble = 1; else j->is_bubble = 0; #endif if (j->internal) { // Internal tasks are submitted to initial context task->sched_ctx = _starpu_get_initial_sched_ctx()->id; // And we don't want them to interfere with submit order ids task->no_submitorder = 1; } else if (task->sched_ctx == STARPU_NMAX_SCHED_CTXS) { // If the task has not specified a context, we set the current context task->sched_ctx = _starpu_sched_ctx_get_current_context(); } if (is_sync) { /* Perhaps it is not possible to submit a synchronous * (blocking) task */ STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "submitting a synchronous task must not be done from a task or a callback"); task->detach = 0; } #ifdef STARPU_DEBUG if (task->workerids) { unsigned i; for (i = 0; i < task->workerids_len; i++) if (task->workerids[i] != 0) break; STARPU_ASSERT_MSG(i < task->workerids_len, "The workerids array can't contain only zeros, it would not be executable at all."); } #endif _starpu_task_check_deprecated_fields(task); _starpu_codelet_check_deprecated_fields(task->cl); if (task->where== -1 && task->cl) task->where = task->cl->where; if (task->cl) { unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); _STARPU_TRACE_UPDATE_TASK_CNT(0); /* Check buffers */ if (task->dyn_handles == NULL) STARPU_ASSERT_MSG_ALWAYS(STARPU_TASK_GET_NBUFFERS(task) <= STARPU_NMAXBUFS, "Codelet %p has too many buffers (%d vs max %d). Either use --enable-maxbuffers configure option to increase the max, or use dyn_handles instead of handles.", task->cl, STARPU_TASK_GET_NBUFFERS(task), STARPU_NMAXBUFS); if (STARPU_UNLIKELY(task->dyn_handles)) { _STARPU_MALLOC(task->dyn_interfaces, nbuffers * sizeof(void *)); } struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); for (i = 0; i < nbuffers; i++) { starpu_data_handle_t handle = descrs[i].handle; enum starpu_data_access_mode mode = descrs[i].mode; int node = task->cl->specific_nodes ? STARPU_CODELET_GET_NODE(task->cl, i) : -1; /* Make sure handles are valid */ STARPU_ASSERT_MSG(handle->magic == _STARPU_TASK_MAGIC, "data %p is invalid (was it already unregistered?)", handle); /* Make sure handles are not partitioned */ STARPU_ASSERT_MSG(handle->nchildren == 0, "only unpartitioned data (or the pieces of a partitioned data) can be used in a task"); /* Make sure the specified node exists */ STARPU_ASSERT_MSG(node == STARPU_SPECIFIC_NODE_LOCAL || node == STARPU_SPECIFIC_NODE_CPU || node == STARPU_SPECIFIC_NODE_SLOW || node == STARPU_SPECIFIC_NODE_LOCAL_OR_CPU || node == STARPU_SPECIFIC_NODE_NONE || (node >= 0 && node < (int) starpu_memory_nodes_get_count()), "The codelet-specified memory node does not exist"); /* Provide the home interface for now if any, * for can_execute hooks */ if (handle->home_node != -1) _STARPU_TASK_SET_INTERFACE(task, starpu_data_get_interface_on_node(handle, handle->home_node), i); if (!(task->cl->flags & STARPU_CODELET_NOPLANS) && ((handle->nplans && !handle->nchildren) || handle->siblings) #ifdef STARPU_BUBBLE && !j->is_bubble /* * => require to set the is_bubble a soon as possible and not in the turn_task_into_bubble. */ #endif && !(mode & STARPU_NOPLAN)) /* This handle is involved with asynchronous * partitioning as a parent or a child, make * sure the right plan is active, submit * appropriate partitioning / unpartitioning if * not */ _starpu_data_partition_access_submit(handle, (mode & (STARPU_W|STARPU_REDUX)) != 0); } /* Check the type of worker(s) required by the task exist */ if (STARPU_UNLIKELY(!_starpu_worker_exists(task))) { _STARPU_LOG_OUT_TAG("ENODEV"); return -ENODEV; } /* In case we require that a task should be explicitly * executed on a specific worker, we make sure that the worker * is able to execute this task. */ if (STARPU_UNLIKELY(task->execute_on_a_specific_worker && !starpu_combined_worker_can_execute_task(task->workerid, task, 0))) { _STARPU_LOG_OUT_TAG("ENODEV"); return -ENODEV; } if (task->cl->model) _starpu_init_and_load_perfmodel(task->cl->model); if (task->cl->energy_model) _starpu_init_and_load_perfmodel(task->cl->energy_model); } return 0; } /* application should submit new tasks to StarPU through this function */ int _starpu_task_submit(struct starpu_task *task, int nodeps) { _STARPU_LOG_IN(); STARPU_ASSERT(task); STARPU_ASSERT_MSG(task->magic == _STARPU_TASK_MAGIC, "Tasks must be created with starpu_task_create, or initialized with starpu_task_init."); STARPU_ASSERT_MSG(starpu_is_initialized(), "starpu_init must be called (and return no error) before submitting tasks."); int ret; { /* task knobs */ if (task->priority > __s_max_priority_cap__value) task->priority = __s_max_priority_cap__value; if (task->priority < __s_min_priority_cap__value) task->priority = __s_min_priority_cap__value; } if (task->transaction != NULL) { /* If task is part of a transaction, add its handle to the task * handle list with a STARPU_R access mode to allow concurrency among the epoch * tasks while serializing it with epoch and transactions operations */ STARPU_ASSERT(task->cl->nbuffers == STARPU_VARIABLE_NBUFFERS); STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&task->transaction->epoch_list)); task->trs_epoch = _starpu_trs_epoch_list_back(&task->transaction->epoch_list); int nbuffers = task->nbuffers; int allocated_nbuffers = (task->dyn_handles != NULL)?nbuffers:0; task->nbuffers++; starpu_task_insert_data_process_arg(task->cl, task, &allocated_nbuffers, &nbuffers, STARPU_R, task->transaction->handle); } unsigned is_sync = task->synchronous; starpu_task_bundle_t bundle = task->bundle; STARPU_ASSERT_MSG(!(nodeps && bundle), "not supported\n"); /* internally, StarPU manipulates a struct _starpu_job * which is a wrapper around a * task structure, it is possible that this job structure was already * allocated. */ struct _starpu_job *j = _starpu_get_job_associated_to_task(task); const unsigned continuation = #ifdef STARPU_OPENMP j->continuation #else 0 #endif ; if (!_starpu_perf_counter_paused() && !j->internal && !continuation) { (void) STARPU_PERF_COUNTER_ADD64(&_starpu_task__g_total_submitted__value, 1); int64_t value = STARPU_PERF_COUNTER_ADD64(&_starpu_task__g_current_submitted__value, 1); _starpu_perf_counter_update_max_int64(&_starpu_task__g_peak_submitted__value, value); _starpu_perf_counter_update_global_sample(); if (task->cl && task->cl->perf_counter_values) { struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values; (void) STARPU_PERF_COUNTER_ADD64(&pcv->task.total_submitted, 1); value = STARPU_PERF_COUNTER_ADD64(&pcv->task.current_submitted, 1); _starpu_perf_counter_update_max_int64(&pcv->task.peak_submitted, value); _starpu_perf_counter_update_per_codelet_sample(task->cl); } } STARPU_ASSERT_MSG(!(nodeps && continuation), "not supported\n"); if (!j->internal && limit_max_submitted_tasks >= 0 && limit_min_submitted_tasks >= 0) { int nsubmitted_tasks = starpu_task_nsubmitted(); if (limit_max_submitted_tasks < nsubmitted_tasks && limit_min_submitted_tasks < nsubmitted_tasks) { starpu_do_schedule(); _STARPU_TRACE_TASK_THROTTLE_START(); starpu_task_wait_for_n_submitted(limit_min_submitted_tasks); _STARPU_TRACE_TASK_THROTTLE_END(); } } _STARPU_TRACE_TASK_SUBMIT_START(); if (task->cl && !continuation) { _starpu_job_set_ordered_buffers(j); } ret = _starpu_task_submit_head(task); if (ret) { _STARPU_TRACE_TASK_SUBMIT_END(); return ret; } if (!continuation) { #ifndef STARPU_NO_ASSERT STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); STARPU_ASSERT_MSG(!j->submitted || j->terminated >= 1, "Tasks can not be submitted a second time before being terminated. Please use different task structures, or use the regenerate flag to let the task resubmit itself automatically."); STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); #endif _STARPU_TRACE_TASK_SUBMIT(j, _starpu_get_sched_ctx_struct(task->sched_ctx)->iterations[0], _starpu_get_sched_ctx_struct(task->sched_ctx)->iterations[1]); } /* If this is a continuation, we don't modify the implicit data dependencies detected earlier. */ if (task->cl && !continuation && !nodeps #ifdef STARPU_BUBBLE && !j->is_bubble #endif ) { _starpu_detect_implicit_data_deps(task); } if (STARPU_UNLIKELY(bundle)) { /* We need to make sure that models for other tasks of the * bundle are also loaded, so the scheduler can estimate the * duration of the whole bundle */ STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); struct _starpu_task_bundle_entry *entry; entry = bundle->list; while (entry) { if (entry->task->cl->model) _starpu_init_and_load_perfmodel(entry->task->cl->model); if (entry->task->cl->energy_model) _starpu_init_and_load_perfmodel(entry->task->cl->energy_model); entry = entry->next; } STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); } /* If profiling is activated, we allocate a structure to store the * appropriate info. */ struct starpu_profiling_task_info *info = task->profiling_info; int profiling = starpu_profiling_status_get(); if (!info) { info = _starpu_allocate_profiling_info_if_needed(task); task->profiling_info = info; } /* The task is considered as block until we are sure there remains not * dependency. */ task->status = STARPU_TASK_BLOCKED; if (STARPU_UNLIKELY(profiling)) _starpu_clock_gettime(&info->submit_time); ret = _starpu_submit_job(j, nodeps); #ifdef STARPU_SIMGRID if (_starpu_simgrid_task_submit_cost()) starpu_sleep(0.000001); #endif if (is_sync) { if (starpu_is_paused()) { static int warned; if (!warned) { warned = 1; _STARPU_DISP("[warning]: A task with synchronous=1 was submitted after calling starpu_pause(). We will thus hang until starpu_resume() gets called.\n"); } } _starpu_sched_do_schedule(task->sched_ctx); _starpu_wait_job(j); if (task->destroy) _starpu_task_destroy(task); } _STARPU_TRACE_TASK_SUBMIT_END(); _STARPU_LOG_OUT(); return ret; } #undef starpu_task_submit int starpu_task_submit(struct starpu_task *task) { #ifdef STARPU_BUBBLE_VERBOSE struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); unsigned long long timestamp = 1000000000ULL*tp.tv_sec + tp.tv_nsec; _STARPU_DEBUG("{%llu} [%s(%p)] Submission | id %lu\n", timestamp, starpu_task_get_name(task), task, starpu_task_get_job_id(task)); #endif return _starpu_task_submit(task, 0); } int _starpu_task_submit_internally(struct starpu_task *task) { struct _starpu_job *j = _starpu_get_job_associated_to_task(task); j->internal = 1; return starpu_task_submit(task); } /* application should submit new tasks to StarPU through this function */ int starpu_task_submit_to_ctx(struct starpu_task *task, unsigned sched_ctx_id) { task->sched_ctx = sched_ctx_id; return starpu_task_submit(task); } /* The StarPU core can submit tasks directly to the scheduler or a worker, * skipping dependencies completely (when it knows what it is doing). */ int starpu_task_submit_nodeps(struct starpu_task *task) { return _starpu_task_submit(task, 1); } /* * worker->sched_mutex must be locked when calling this function. */ int _starpu_task_submit_conversion_task(struct starpu_task *task, unsigned int workerid) { int ret; STARPU_ASSERT(task->cl); STARPU_ASSERT(task->execute_on_a_specific_worker); struct _starpu_job *j = _starpu_get_job_associated_to_task(task); _starpu_job_set_ordered_buffers(j); ret = _starpu_task_submit_head(task); STARPU_ASSERT(ret == 0); /* We retain handle reference count that would have been acquired by data dependencies. */ unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (i=0; iheader_lock); handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); } _starpu_increment_nsubmitted_tasks_of_sched_ctx(j->task->sched_ctx); _starpu_sched_task_submit(task); STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); _starpu_handle_job_submission(j); _starpu_increment_nready_tasks_of_sched_ctx(j->task->sched_ctx, j->task->flops, j->task); _starpu_job_set_ordered_buffers(j); STARPU_ASSERT(task->status == STARPU_TASK_INIT); task->status = STARPU_TASK_READY; _starpu_profiling_set_task_push_start_time(task); unsigned node = starpu_worker_get_memory_node(workerid); if (starpu_get_prefetch_flag()) starpu_prefetch_task_input_on_node(task, node); struct _starpu_worker *worker; worker = _starpu_get_worker_struct(workerid); starpu_task_prio_list_push_back(&worker->local_tasks, task); starpu_wake_worker_locked(worker->workerid); _starpu_profiling_set_task_push_end_time(task); STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); return 0; } void starpu_codelet_init(struct starpu_codelet *cl) { memset(cl, 0, sizeof(struct starpu_codelet)); } #define _STARPU_CODELET_WORKER_NAME_LEN 32 void starpu_codelet_display_stats(struct starpu_codelet *cl) { unsigned worker; unsigned nworkers = starpu_worker_get_count(); if (cl->name) fprintf(stderr, "Statistics for codelet %s\n", cl->name); else if (cl->model && cl->model->symbol) fprintf(stderr, "Statistics for codelet %s\n", cl->model->symbol); unsigned long total = 0; for (worker = 0; worker < nworkers; worker++) total += cl->per_worker_stats[worker]; for (worker = 0; worker < nworkers; worker++) { char name[_STARPU_CODELET_WORKER_NAME_LEN]; starpu_worker_get_name(worker, name, _STARPU_CODELET_WORKER_NAME_LEN); fprintf(stderr, "\t%s -> %lu / %lu (%2.2f %%)\n", name, cl->per_worker_stats[worker], total, (100.0f*cl->per_worker_stats[worker])/total); } } /* * We wait for all tasks that have been submitted to the scheduling context and its nested contexts */ void _starpu_do_schedule_in_nested_ctx(unsigned sched_ctx_id) { struct _starpu_machine_config *config = _starpu_get_machine_config(); unsigned s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS && config->sched_ctxs[s].do_schedule == 1 && config->sched_ctxs[s].nesting_sched_ctx == sched_ctx_id && s != sched_ctx_id) { _starpu_do_schedule_in_nested_ctx(s); } } _starpu_sched_do_schedule(sched_ctx_id); } int _starpu_task_wait_for_all_in_nested_ctx_and_return_nb_waited_tasks(unsigned sched_ctx_id) { struct _starpu_machine_config *config = _starpu_get_machine_config(); unsigned nb_waited_tasks = 0; unsigned s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS && config->sched_ctxs[s].nesting_sched_ctx == sched_ctx_id && s != sched_ctx_id) { _STARPU_DEBUG("Recursively waiting for tasks submitted to sub context %u of %u\n", s, sched_ctx_id); nb_waited_tasks += _starpu_task_wait_for_all_in_nested_ctx_and_return_nb_waited_tasks(s); } } nb_waited_tasks += _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(sched_ctx_id); return nb_waited_tasks; } /* * We wait for all the tasks that have already been submitted. Note that a * regenerable is not considered finished until it was explicitly set as * non-regenerale anymore (eg. from a callback). */ int _starpu_task_wait_for_all_and_return_nb_waited_tasks(void) { unsigned nsched_ctxs = _starpu_get_nsched_ctxs(); unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context(); /* if there is no indication about which context to wait, we wait for all tasks submitted to starpu */ if (sched_ctx_id == STARPU_NMAX_SCHED_CTXS) { _STARPU_DEBUG("Waiting for all tasks\n"); STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_all must not be called from a task or callback"); STARPU_AYU_BARRIER(); struct _starpu_machine_config *config = _starpu_get_machine_config(); if(config->topology.nsched_ctxs == 1) { _starpu_sched_do_schedule(0); return _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(0); } else { int s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].do_schedule == 1) { _starpu_sched_do_schedule(config->sched_ctxs[s].id); } } for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].do_schedule == 1) { starpu_task_wait_for_all_in_ctx(config->sched_ctxs[s].id); } } return 0; } } else { // _starpu_sched_do_schedule(sched_ctx_id); // _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id); // return _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(sched_ctx_id); _starpu_do_schedule_in_nested_ctx(sched_ctx_id); _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id); return _starpu_task_wait_for_all_in_nested_ctx_and_return_nb_waited_tasks(sched_ctx_id); } } int starpu_task_wait_for_all(void) { _starpu_task_wait_for_all_and_return_nb_waited_tasks(); if (!_starpu_perf_counter_paused()) _starpu_perf_counter_update_global_sample(); return 0; } int _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(unsigned sched_ctx) { _STARPU_TRACE_TASK_WAIT_FOR_ALL_START(); int ret = _starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx); _STARPU_TRACE_TASK_WAIT_FOR_ALL_END(); /* TODO: improve Temanejo into knowing about contexts ... */ STARPU_AYU_BARRIER(); return ret; } int starpu_task_wait_for_all_in_ctx(unsigned sched_ctx) { _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(sched_ctx); if (!_starpu_perf_counter_paused()) _starpu_perf_counter_update_global_sample(); return 0; } /* * We wait until there's a certain number of the tasks that have already been * submitted left. Note that a regenerable is not considered finished until it * was explicitly set as non-regenerale anymore (eg. from a callback). */ int starpu_task_wait_for_n_submitted(unsigned n) { unsigned nsched_ctxs = _starpu_get_nsched_ctxs(); unsigned sched_ctx_id = nsched_ctxs == 1 ? 0 : starpu_sched_ctx_get_context(); /* if there is no indication about which context to wait, we wait for all tasks submitted to starpu */ if (sched_ctx_id == STARPU_NMAX_SCHED_CTXS) { _STARPU_DEBUG("Waiting for all tasks\n"); STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_n_submitted must not be called from a task or callback"); struct _starpu_machine_config *config = _starpu_get_machine_config(); if(config->topology.nsched_ctxs == 1) _starpu_wait_for_n_submitted_tasks_of_sched_ctx(0, n); else { int s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].do_schedule == 1) { _starpu_wait_for_n_submitted_tasks_of_sched_ctx(config->sched_ctxs[s].id, n); } } } } else { _STARPU_DEBUG("Waiting for tasks submitted to context %u\n", sched_ctx_id); _starpu_wait_for_n_submitted_tasks_of_sched_ctx(sched_ctx_id, n); } if (!_starpu_perf_counter_paused()) _starpu_perf_counter_update_global_sample(); return 0; } int starpu_task_wait_for_n_submitted_in_ctx(unsigned sched_ctx, unsigned n) { _starpu_wait_for_n_submitted_tasks_of_sched_ctx(sched_ctx, n); if (!_starpu_perf_counter_paused()) _starpu_perf_counter_update_global_sample(); return 0; } /* * We wait until there is no ready task any more (i.e. StarPU will not be able * to progress any more). */ int starpu_task_wait_for_no_ready(void) { STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_task_wait_for_no_ready must not be called from a task or callback"); struct _starpu_machine_config *config = _starpu_get_machine_config(); if(config->topology.nsched_ctxs == 1) { _starpu_sched_do_schedule(0); _starpu_wait_for_no_ready_of_sched_ctx(0); } else { int s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].do_schedule == 1) { _starpu_sched_do_schedule(config->sched_ctxs[s].id); } } for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].do_schedule == 1) { _starpu_wait_for_no_ready_of_sched_ctx(config->sched_ctxs[s].id); } } } if (!_starpu_perf_counter_paused()) _starpu_perf_counter_update_global_sample(); return 0; } void starpu_iteration_push(unsigned long iteration) { unsigned id = _starpu_sched_ctx_get_current_context(); STARPU_ASSERT(id <= STARPU_NMAX_SCHED_CTXS); struct _starpu_sched_ctx *ctx = _starpu_get_sched_ctx_struct(id); unsigned level = ctx->iteration_level++; if (level < sizeof(ctx->iterations)/sizeof(ctx->iterations[0])) ctx->iterations[level] = iteration; } void starpu_iteration_pop(void) { unsigned id = _starpu_sched_ctx_get_current_context(); STARPU_ASSERT(id <= STARPU_NMAX_SCHED_CTXS); struct _starpu_sched_ctx *ctx = _starpu_get_sched_ctx_struct(id); STARPU_ASSERT_MSG(ctx->iteration_level > 0, "calls to starpu_iteration_pop must match starpu_iteration_push calls"); unsigned level = ctx->iteration_level--; if (level < sizeof(ctx->iterations)/sizeof(ctx->iterations[0])) ctx->iterations[level] = -1; } void starpu_do_schedule(void) { struct _starpu_machine_config *config = _starpu_get_machine_config(); if(config->topology.nsched_ctxs == 1) _starpu_sched_do_schedule(0); else { int s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].do_schedule == 1) { _starpu_sched_do_schedule(config->sched_ctxs[s].id); } } } } void starpu_drivers_request_termination(void) { struct _starpu_machine_config *config = _starpu_get_machine_config(); STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex); int nsubmitted = starpu_task_nsubmitted(); config->submitting = 0; if (nsubmitted == 0) { ANNOTATE_HAPPENS_AFTER(&config->running); config->running = 0; ANNOTATE_HAPPENS_BEFORE(&config->running); STARPU_WMB(); int s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].do_schedule == 1) { _starpu_check_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id); } } } STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex); } int starpu_task_nsubmitted(void) { int nsubmitted = 0; struct _starpu_machine_config *config = _starpu_get_machine_config(); if(config->topology.nsched_ctxs == 1) nsubmitted = _starpu_get_nsubmitted_tasks_of_sched_ctx(0); else { int s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].do_schedule == 1) { nsubmitted += _starpu_get_nsubmitted_tasks_of_sched_ctx(config->sched_ctxs[s].id); } } } return nsubmitted; } int starpu_task_nready(void) { int nready = 0; struct _starpu_machine_config *config = _starpu_get_machine_config(); if(config->topology.nsched_ctxs == 1) nready = starpu_sched_ctx_get_nready_tasks(0); else { int s; for(s = 0; s < STARPU_NMAX_SCHED_CTXS; s++) { if(config->sched_ctxs[s].do_schedule == 1) { nready += starpu_sched_ctx_get_nready_tasks(config->sched_ctxs[s].id); } } } return nready; } /* Return the task currently executed by the worker, or NULL if this is called * either from a thread that is not a task or simply because there is no task * being executed at the moment. */ struct starpu_task *starpu_task_get_current(void) { return (struct starpu_task *) STARPU_PTHREAD_GETSPECIFIC(current_task_key); } void _starpu_set_current_task(struct starpu_task *task) { STARPU_PTHREAD_SETSPECIFIC(current_task_key, task); } struct starpu_task *starpu_worker_get_current_task(unsigned workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->pipeline_length) return worker->current_tasks[worker->first_task]; else return worker->current_task; } int starpu_task_get_current_data_node(unsigned i) { struct starpu_task *task = starpu_task_get_current(); if (!task) return -1; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); unsigned orderedindex = descrs[i].orderedindex; return descrs[orderedindex].node; } #ifdef STARPU_OPENMP /* Prepare the fields of the current task for accepting a new set of * dependencies in anticipation of becoming a continuation. * * When the task becomes 'continued', it will only be queued again when the new * set of dependencies is fulfilled. */ void _starpu_task_prepare_for_continuation(void) { _starpu_job_prepare_for_continuation(_starpu_get_job_associated_to_task(starpu_task_get_current())); } void _starpu_task_prepare_for_continuation_ext(unsigned continuation_resubmit, void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg) { _starpu_job_prepare_for_continuation_ext(_starpu_get_job_associated_to_task(starpu_task_get_current()), continuation_resubmit, continuation_callback_on_sleep, continuation_callback_on_sleep_arg); } void _starpu_task_set_omp_cleanup_callback(struct starpu_task *task, void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg) { _starpu_job_set_omp_cleanup_callback(_starpu_get_job_associated_to_task(task), omp_cleanup_callback, omp_cleanup_callback_arg); } #endif /* * Returns 0 if tasks does not use any multiformat handle, 1 otherwise. */ int _starpu_task_uses_multiformat_handles(struct starpu_task *task) { unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (i = 0; i < nbuffers; i++) { if (_starpu_data_is_multiformat_handle(STARPU_TASK_GET_HANDLE(task, i))) return 1; } return 0; } /* * Checks whether the given handle needs to be converted in order to be used on * the node given as the second argument. */ int _starpu_handle_needs_conversion_task(starpu_data_handle_t handle, unsigned int node) { return _starpu_handle_needs_conversion_task_for_arch(handle, starpu_node_get_kind(node)); } int _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle, enum starpu_node_kind node_kind) { /* * Here, we assume that CUDA devices and OpenCL devices use the * same data structure. A conversion is only needed when moving * data from a CPU to a GPU, or the other way around. */ switch (node_kind) { case STARPU_CPU_RAM: case STARPU_MPI_MS_RAM: case STARPU_TCPIP_MS_RAM: switch(starpu_node_get_kind(handle->mf_node)) { case STARPU_CPU_RAM: case STARPU_MPI_MS_RAM: case STARPU_TCPIP_MS_RAM: return 0; default: return 1; } break; default: switch(starpu_node_get_kind(handle->mf_node)) { case STARPU_CPU_RAM: case STARPU_MPI_MS_RAM: case STARPU_TCPIP_MS_RAM: return 1; default: return 0; } break; } /* that instruction should never be reached */ return -EINVAL; } void starpu_task_set_implementation(struct starpu_task *task, unsigned impl) { _starpu_get_job_associated_to_task(task)->nimpl = impl; } unsigned starpu_task_get_implementation(struct starpu_task *task) { return _starpu_get_job_associated_to_task(task)->nimpl; } unsigned long starpu_task_get_job_id(struct starpu_task *task) { return _starpu_get_job_associated_to_task(task)->job_id; } static starpu_pthread_t watchdog_thread; static int sleep_some(float timeout) { /* If we do a sleep(timeout), we might have to wait too long at the end of the computation. */ /* To avoid that, we do several sleep() of 1s (and check after each if starpu is still running) */ float t; for (t = timeout ; t > 1.; t--) { starpu_sleep(1.); if (!_starpu_machine_is_running()) /* Application finished, don't bother finishing the sleep */ return 0; } /* and one final sleep (of less than 1 s) with the rest (if needed) */ if (t > 0.) starpu_sleep(t); _starpu_crash_call_hooks(); return 1; } /* Check from times to times that StarPU does finish some tasks */ static void *watchdog_func(void *arg) { char *timeout_env = arg; float timeout, delay; #ifdef _MSC_VER timeout = ((float) _atoi64(timeout_env)) / 1000000; #else timeout = ((float) atoll(timeout_env)) / 1000000; #endif delay = ((float) watchdog_delay) / 1000000; struct _starpu_machine_config *config = _starpu_get_machine_config(); starpu_pthread_setname("watchdog"); if (!sleep_some(delay)) return NULL; STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex); while (_starpu_machine_is_running()) { int last_nsubmitted = starpu_task_nsubmitted(); config->watchdog_ok = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex); if (!sleep_some(timeout)) return NULL; STARPU_PTHREAD_MUTEX_LOCK(&config->submitted_mutex); if (!config->watchdog_ok && last_nsubmitted && last_nsubmitted == starpu_task_nsubmitted()) { if (watchdog_hook == NULL) _STARPU_MSG("The StarPU watchdog detected that no task finished for %fs (can be configured through STARPU_WATCHDOG_TIMEOUT)\n", timeout); else watchdog_hook(watchdog_hook_arg); if (watchdog_crash) { _STARPU_MSG("Crashing the process\n"); raise(SIGABRT); } else if (watchdog_hook == NULL) _STARPU_MSG("Set the STARPU_WATCHDOG_CRASH environment variable if you want to abort the process in such a case\n"); } /* Only shout again after another period */ config->watchdog_ok = 1; } STARPU_PTHREAD_MUTEX_UNLOCK(&config->submitted_mutex); return NULL; } void starpu_task_watchdog_set_hook(void (*hook)(void *), void *hook_arg) { watchdog_hook = hook; watchdog_hook_arg = hook_arg; } void _starpu_watchdog_init() { struct _starpu_machine_config *config = _starpu_get_machine_config(); char *timeout_env = starpu_getenv("STARPU_WATCHDOG_TIMEOUT"); STARPU_PTHREAD_MUTEX_INIT(&config->submitted_mutex, NULL); if (!timeout_env) return; STARPU_PTHREAD_CREATE(&watchdog_thread, NULL, watchdog_func, timeout_env); } void _starpu_watchdog_shutdown(void) { char *timeout_env = starpu_getenv("STARPU_WATCHDOG_TIMEOUT"); if (!timeout_env) return; STARPU_PTHREAD_JOIN(watchdog_thread, NULL); } /* Transaction clean up callback called when the transaction trs_end * task completes. */ static void _starpu_transaction_callback(void *_p_trs) { struct starpu_transaction *p_trs = _p_trs; _starpu_spin_destroy(&p_trs->lock); starpu_data_unregister_submit(p_trs->handle); starpu_free(p_trs); } /* Task function for the trs_begin and trs_begin_no_sync codelets. */ static void _starpu_transaction_begin(void *buffers[], void *cl_args) { struct starpu_transaction *p_trs = cl_args; STARPU_ASSERT(p_trs->state == _starpu_trs_initialized); _starpu_spin_lock(&p_trs->lock); STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_list_front(&p_trs->epoch_list); STARPU_ASSERT(p_epoch->state == _starpu_trs_epoch_inactive); _starpu_spin_unlock(&p_trs->lock); int epoch_confirmed = 1; /* If the transaction has a user 'do_start_func', we call it to * decide whether the new epoch is confirmed or cancelled. */ if (p_trs->do_start_func != NULL) { void * sync_buf = p_epoch->do_sync ? buffers[1] : NULL; epoch_confirmed = p_trs->do_start_func(sync_buf, p_epoch->do_start_arg); } if (epoch_confirmed) { p_epoch->state = _starpu_trs_epoch_confirmed; } else { p_epoch->state = _starpu_trs_epoch_cancelled; } STARPU_WMB(); } /* Task function for the trs_end codelet, in charge of cleaning the last epoch. */ static void _starpu_transaction_end(void *buffers[], void *cl_args) { (void)buffers; struct starpu_transaction *p_trs = cl_args; _starpu_spin_lock(&p_trs->lock); STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_list_pop_front(&p_trs->epoch_list); STARPU_ASSERT(p_epoch->state == _starpu_trs_epoch_confirmed || p_epoch->state == _starpu_trs_epoch_cancelled); _starpu_spin_unlock(&p_trs->lock); p_epoch->state = _starpu_trs_epoch_terminated; _starpu_trs_epoch_delete(p_epoch); p_epoch = NULL; /* TODO: transition to end */ STARPU_ASSERT(_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); } /* Task function for the trs_next_epoch codelet, in charge of transitioning from a * an epoch to the next. */ static void _starpu_transaction_next_epoch(void *buffers[], void *cl_args) { struct starpu_transaction *p_trs = cl_args; _starpu_spin_lock(&p_trs->lock); STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); struct _starpu_trs_epoch *p_previous_epoch = _starpu_trs_epoch_list_pop_front(&p_trs->epoch_list); STARPU_ASSERT((p_previous_epoch->state == _starpu_trs_epoch_confirmed) || (p_previous_epoch->state == _starpu_trs_epoch_cancelled)); STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); struct _starpu_trs_epoch *p_next_epoch = _starpu_trs_epoch_list_front(&p_trs->epoch_list); STARPU_ASSERT(p_next_epoch->state == _starpu_trs_epoch_inactive); _starpu_spin_unlock(&p_trs->lock); p_previous_epoch->state = _starpu_trs_epoch_terminated; _starpu_trs_epoch_delete(p_previous_epoch); /* TODO: transition to next epoch */ int epoch_confirmed = 1; if (p_trs->do_start_func != NULL) { void * sync_buf = p_next_epoch->do_sync ? buffers[1] : NULL; epoch_confirmed = p_trs->do_start_func(sync_buf, p_next_epoch->do_start_arg); } if (epoch_confirmed) { p_next_epoch->state = _starpu_trs_epoch_confirmed; } else { p_next_epoch->state = _starpu_trs_epoch_cancelled; } STARPU_WMB(); } /* Transaction begin codelet, without implicit sync on a previously * accessed data. */ struct starpu_codelet _starpu_codelet_trs_begin_no_sync = { .cpu_funcs = {_starpu_transaction_begin}, .modes = {STARPU_W}, .nbuffers = 1, .model = &starpu_perfmodel_nop, .name = "starpu_transaction_begin_no_sync" }; /* Transaction begin codelet, with an implicit sync on a previously * accessed data. */ struct starpu_codelet _starpu_codelet_trs_begin = { .cpu_funcs = {_starpu_transaction_begin}, .modes = {STARPU_W, STARPU_RW}, .nbuffers = 2, .model = &starpu_perfmodel_nop, .name = "starpu_transaction_begin" }; /* Transaction end codelet. */ struct starpu_codelet _starpu_codelet_trs_end = { .cpu_funcs = {_starpu_transaction_end}, .modes = {STARPU_RW}, .nbuffers = 1, .model = &starpu_perfmodel_nop, .name = "starpu_transaction_end" }; /* Epoch transition codelet. */ struct starpu_codelet _starpu_codelet_trs_next_epoch = { .cpu_funcs = {_starpu_transaction_next_epoch}, .modes = {STARPU_RW}, .nbuffers = 1, .model = &starpu_perfmodel_nop, .name = "starpu_transaction_next_epoch" }; /* Main entry point for creating and activating a transaction object. * * . do_start_func: a boolean function to decide whether each new epoch start should * be confirmed or not. * . do_start_sync_handle: a starpu data handle on which the transaction * start should depend on, or NULL if no sync is required. The handle is * passed to do_start_func() * . do_start_arg: an argument passed to do_start_func().*/ static struct starpu_transaction *_do_starpu_transaction_open(int(*do_start_func)(void *buffer, void *arg), starpu_data_handle_t do_start_sync_handle, void *do_start_arg) { struct starpu_transaction *p_trs = NULL; int ret = starpu_malloc((void **)&p_trs, sizeof(*p_trs)); STARPU_ASSERT(ret == 0); _starpu_spin_init(&p_trs->lock); _starpu_trs_epoch_list_init(&p_trs->epoch_list); p_trs->do_start_func = do_start_func; p_trs->dummy_data = 0; starpu_variable_data_register(&p_trs->handle, STARPU_MAIN_RAM, (uintptr_t)&p_trs->dummy_data, sizeof(p_trs->dummy_data)); struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_new(); struct starpu_task *task = starpu_task_create(); task->callback_func = NULL; task->cl_arg = p_trs; task->handles[0] = p_trs->handle; if (do_start_sync_handle != NULL) { p_epoch->do_sync = 1; task->cl = &_starpu_codelet_trs_begin; task->handles[1] = do_start_sync_handle; } else { p_epoch->do_sync = 0; task->cl = &_starpu_codelet_trs_begin_no_sync; } p_epoch->is_begin = 1; p_epoch->state = _starpu_trs_epoch_inactive; p_epoch->do_start_arg = do_start_arg; _starpu_trs_epoch_list_push_back(&p_trs->epoch_list, p_epoch); p_trs->state = _starpu_trs_initialized; ret = starpu_task_submit(task); if (ret == -ENODEV) { starpu_data_unregister(p_trs->handle); starpu_free(p_trs); return NULL; } STARPU_ASSERT(ret == 0); return p_trs; } struct starpu_transaction *starpu_transaction_open(int(*do_start_func)(void *buffer, void *arg), void *do_start_arg) { return _do_starpu_transaction_open(do_start_func, NULL, do_start_arg); } void starpu_transaction_close(struct starpu_transaction *p_trs) { STARPU_ASSERT(p_trs->state == _starpu_trs_initialized); struct starpu_task *task = starpu_task_create(); task->cl = &_starpu_codelet_trs_end; task->callback_func = _starpu_transaction_callback; task->callback_arg = p_trs; task->handles[0] = p_trs->handle; task->cl_arg = p_trs; _starpu_spin_lock(&p_trs->lock); STARPU_ASSERT(!_starpu_trs_epoch_list_empty(&p_trs->epoch_list)); struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_list_back(&p_trs->epoch_list); _starpu_spin_unlock(&p_trs->lock); p_epoch->is_end = 1; int ret = starpu_task_submit(task); STARPU_ASSERT(ret == 0); } void starpu_transaction_next_epoch(struct starpu_transaction *p_trs, void *do_start_arg) { STARPU_ASSERT(p_trs->state == _starpu_trs_initialized); struct _starpu_trs_epoch *p_epoch = _starpu_trs_epoch_new(); struct starpu_task *task = starpu_task_create(); task->cl = &_starpu_codelet_trs_next_epoch; task->handles[0] = p_trs->handle; task->cl_arg = p_trs; p_epoch->do_sync = 0; p_epoch->do_start_arg = do_start_arg; p_epoch->state = _starpu_trs_epoch_inactive; _starpu_spin_lock(&p_trs->lock); _starpu_trs_epoch_list_push_back(&p_trs->epoch_list, p_epoch); _starpu_spin_unlock(&p_trs->lock); int ret = starpu_task_submit(task); STARPU_ASSERT(ret == 0); } static void _starpu_ft_check_support(const struct starpu_task *task) { unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned i; for (i = 0; i < nbuffers; i++) { enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, i); STARPU_ASSERT_MSG (mode == STARPU_R || mode == STARPU_W, "starpu_task_failed is only supported for tasks with access modes STARPU_R and STARPU_W"); } } struct starpu_task *starpu_task_ft_create_retry (const struct starpu_task *meta_task, const struct starpu_task *template_task, void (*check_ft)(void *)) { /* Create a new task to actually perform the result */ struct starpu_task *new_task = starpu_task_create(); *new_task = *template_task; new_task->prologue_callback_func = NULL; /* XXX: cl_arg needs to be duplicated */ STARPU_ASSERT_MSG(!meta_task->cl_arg_free || !meta_task->cl_arg, "not supported yet"); STARPU_ASSERT_MSG(!meta_task->callback_func, "not supported"); new_task->callback_func = check_ft; new_task->callback_arg = (void*) meta_task; new_task->callback_arg_free = 0; new_task->prologue_callback_arg_free = 0; STARPU_ASSERT_MSG(!new_task->prologue_callback_pop_arg_free, "not supported"); new_task->use_tag = 0; new_task->synchronous = 0; new_task->destroy = 1; new_task->regenerate = 0; new_task->no_submitorder = 1; new_task->failed = 0; new_task->scheduled = 0; new_task->prefetched = 0; new_task->status = STARPU_TASK_INIT; new_task->profiling_info = NULL; new_task->prev = NULL; new_task->next = NULL; new_task->starpu_private = NULL; new_task->omp_task = NULL; return new_task; } static void _starpu_default_check_ft(void *arg) { struct starpu_task *meta_task = arg; struct starpu_task *current_task = starpu_task_get_current(); struct starpu_task *new_task; int ret; if (!current_task->failed) { starpu_task_ft_success(meta_task); return; } new_task = starpu_task_ft_create_retry (meta_task, current_task, _starpu_default_check_ft); ret = starpu_task_submit_nodeps(new_task); STARPU_ASSERT(!ret); } void starpu_task_ft_prologue(void *arg) { struct starpu_task *meta_task = starpu_task_get_current(); struct starpu_task *new_task; void (*check_ft)(void*) = arg; int ret; if (!check_ft) check_ft = _starpu_default_check_ft; /* Create a task which will do the actual computation */ new_task = starpu_task_ft_create_retry (meta_task, meta_task, check_ft); ret = starpu_task_submit_nodeps(new_task); STARPU_ASSERT(!ret); /* Make the parent task wait for the result getting correct */ starpu_task_end_dep_add(meta_task, 1); meta_task->where = STARPU_NOWHERE; } void starpu_task_ft_failed(struct starpu_task *task) { _starpu_ft_check_support(task); task->failed = 1; } void starpu_task_ft_success(struct starpu_task *meta_task) { starpu_task_end_dep_release(meta_task); } char *starpu_task_status_get_as_string(enum starpu_task_status status) { switch(status) { case(STARPU_TASK_INIT) : return "STARPU_TASK_INIT"; case(STARPU_TASK_BLOCKED): return "STARPU_TASK_BLOCKED"; case(STARPU_TASK_READY): return "STARPU_TASK_READY"; case(STARPU_TASK_RUNNING): return "STARPU_TASK_RUNNING"; case(STARPU_TASK_FINISHED): return "STARPU_TASK_FINISHED"; case(STARPU_TASK_BLOCKED_ON_TAG): return "STARPU_TASK_BLOCKED_ON_TAG"; case(STARPU_TASK_BLOCKED_ON_TASK): return "STARPU_TASK_BLOCKED_ON_TASK"; case(STARPU_TASK_BLOCKED_ON_DATA): return "STARPU_TASK_BLOCKED_ON_DATA"; case(STARPU_TASK_STOPPED): return "STARPU_TASK_STOPPED"; default: return "STARPU_TASK_unknown_status"; } } void starpu_codelet_nop_func(void *descr[], void *arg) { (void)descr; (void)arg; } struct starpu_codelet starpu_codelet_nop = { .cpu_funcs = {starpu_codelet_nop_func}, .cuda_funcs = {starpu_codelet_nop_func}, .hip_funcs = {starpu_codelet_nop_func}, .opencl_funcs = {starpu_codelet_nop_func}, .cpu_funcs_name = {"starpu_codelet_nop_func"}, .model = NULL, .nbuffers = 0 }; starpu-1.4.9+dfsg/src/core/task.h000066400000000000000000000155611507764646700166520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __CORE_TASK_H__ #define __CORE_TASK_H__ /** @file */ #include #include #include #include #pragma GCC visibility push(hidden) /** Internal version of starpu_task_destroy: don't check task->destroy flag */ void _starpu_task_destroy(struct starpu_task *task); #ifdef STARPU_OPENMP /** Test for the termination of the task. * Call starpu_task_destroy if required and the task is terminated. */ int _starpu_task_test_termination(struct starpu_task *task); #endif /** A pthread key is used to store the task currently executed on the thread. * _starpu_task_init initializes this pthread key and * _starpu_set_current_task updates its current value. */ void _starpu_task_init(void); void _starpu_task_deinit(void); void _starpu_set_current_task(struct starpu_task *task); int _starpu_submit_job(struct _starpu_job *j, int nodeps); void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[], int check); #define _STARPU_JOB_UNSET ((struct _starpu_job *) NULL) #define _STARPU_JOB_SETTING ((struct _starpu_job *) 1) /** Returns the job structure (which is the internal data structure associated * to a task). */ struct _starpu_job *_starpu_get_job_associated_to_task_slow(struct starpu_task *task, struct _starpu_job *job); static inline struct _starpu_job *_starpu_get_job_associated_to_task(struct starpu_task *task) { STARPU_ASSERT(task); struct _starpu_job *job = *(struct _starpu_job * volatile *) &task->starpu_private; if (STARPU_LIKELY(job != _STARPU_JOB_UNSET && job != _STARPU_JOB_SETTING)) { /* Already available */ STARPU_RMB(); return job; } return _starpu_get_job_associated_to_task_slow(task, job); } /** Submits starpu internal tasks to the initial context */ int _starpu_task_submit_internally(struct starpu_task *task); int _starpu_handle_needs_conversion_task(starpu_data_handle_t handle, unsigned int node); int _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle, enum starpu_node_kind node_kind); #ifdef STARPU_OPENMP /** Prepare the current task for accepting new dependencies before becoming a continuation. */ void _starpu_task_prepare_for_continuation_ext(unsigned continuation_resubmit, void (*continuation_callback_on_sleep)(void *arg), void *continuation_callback_on_sleep_arg); void _starpu_task_prepare_for_continuation(void); void _starpu_task_set_omp_cleanup_callback(struct starpu_task *task, void (*omp_cleanup_callback)(void *arg), void *omp_cleanup_callback_arg); #endif int _starpu_task_uses_multiformat_handles(struct starpu_task *task); int _starpu_task_submit_conversion_task(struct starpu_task *task, unsigned int workerid); void _starpu_task_check_deprecated_fields(struct starpu_task *task); void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl); static inline starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) { return cl->cpu_funcs[nimpl]; } static inline starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) { return cl->cuda_funcs[nimpl]; } static inline starpu_hip_func_t _starpu_task_get_hip_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) { return cl->hip_funcs[nimpl]; } static inline starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) { return cl->opencl_funcs[nimpl]; } static inline starpu_max_fpga_func_t _starpu_task_get_fpga_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) { return cl->max_fpga_funcs[nimpl]; } static inline const char *_starpu_task_get_cpu_name_nth_implementation(struct starpu_codelet *cl, unsigned nimpl) { return cl->cpu_funcs_name[nimpl]; } #define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0) #define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces) void _starpu_watchdog_init(void); void _starpu_watchdog_shutdown(void); int _starpu_task_wait_for_all_and_return_nb_waited_tasks(void); int _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(unsigned sched_ctx); #pragma GCC visibility pop #ifdef BUILDING_STARPU LIST_CREATE_TYPE_NOSTRUCT(starpu_task, prev, next); PRIO_LIST_CREATE_TYPE(starpu_task, priority); #endif /** transaction states */ enum _starpu_trs_state { _starpu_trs_uninitialized = 0, _starpu_trs_initialized = 1, }; /** transaction epoch states */ enum _starpu_trs_epoch_state { _starpu_trs_epoch_uninitialized = 0, /** epoch is initialized but its entry task has not yet been executed to decide whether to confirm of cancel its execution */ _starpu_trs_epoch_inactive = 1, /** epoch has been confirmed for execution, its tasks will be actually executed */ _starpu_trs_epoch_confirmed = 2, /** epoch has been cancelled, its task will be skipped */ _starpu_trs_epoch_cancelled = 3, /** the exit task of the epoch has been executed */ _starpu_trs_epoch_terminated = 4, }; LIST_TYPE(_starpu_trs_epoch, enum _starpu_trs_epoch_state state; /** if 1, the epoch entry task will wait on some user-supplied handle * TODO: only used for first epoch on transaction opening for now, add for next epoch */ int do_sync; /** if 1, the epoch is the first of the transaction */ int is_begin; /** if 1, the epoch will be the last, and the transaction will be closed after its execution */ int is_end; /** inline argument supplied by the user and passed to the user function deciding whether to start * or cancel the epoch execution */ void *do_start_arg; ); struct starpu_transaction { /** epoch list lock */ struct _starpu_spinlock lock; struct _starpu_trs_epoch_list epoch_list; /** handle of the transaction object */ starpu_data_handle_t handle; /** dummy data area referenced by the handle */ int dummy_data; /** user function to decide whether to start or cancel an epoch execution, buffer[0] will * optionally refer to an user suppled handle's object */ int (*do_start_func)(void *buffer, void* arg); enum _starpu_trs_state state; /** flags, unused for now */ int flags; }; #endif // __CORE_TASK_H__ starpu-1.4.9+dfsg/src/core/task_bundle.c000066400000000000000000000137101507764646700201700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include /* Initialize a task bundle */ void starpu_task_bundle_create(starpu_task_bundle_t *bundle) { _STARPU_CALLOC(*bundle, 1, sizeof(struct _starpu_task_bundle)); STARPU_PTHREAD_MUTEX_INIT0(&(*bundle)->mutex, NULL); /* Of course at the beginning a bundle is open, * user can insert and remove tasks from it */ //(*bundle)->closed = 0; /* Start with an empty list */ //(*bundle)->list = NULL; } int starpu_task_bundle_insert(starpu_task_bundle_t bundle, struct starpu_task *task) { STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); if (bundle->closed) { /* The bundle is closed, we cannot add task anymore */ STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); return -EPERM; } if (task->status != STARPU_TASK_INIT) { /* The task has already been submitted, it's too late to put it * into a bundle now. */ STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); return -EINVAL; } /* Insert a task at the end of the bundle */ struct _starpu_task_bundle_entry *entry; _STARPU_MALLOC(entry, sizeof(struct _starpu_task_bundle_entry)); entry->task = task; entry->next = NULL; if (!bundle->list) { bundle->list = entry; } else { struct _starpu_task_bundle_entry *item; item = bundle->list; while (item->next) item = item->next; item->next = entry; } /* Mark the task as belonging the bundle */ task->bundle = bundle; STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); return 0; } int starpu_task_bundle_remove(starpu_task_bundle_t bundle, struct starpu_task *task) { struct _starpu_task_bundle_entry *item; STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); item = bundle->list; /* List is empty, there is no way the task * belong to it */ if (!item) { STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); return -ENOENT; } STARPU_ASSERT_MSG(task->bundle == bundle, "Task %p was not in bundle %p, but in bundle %p", task, bundle, task->bundle); task->bundle = NULL; if (item->task == task) { /* Remove the first element */ bundle->list = item->next; free(item); /* If the list is now empty, deinitialize the bundle */ if (bundle->closed && bundle->list == NULL) { STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); _starpu_task_bundle_destroy(bundle); return 0; } STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); return 0; } /* Go through the list until we find the right task, * then we delete it */ while (item->next) { struct _starpu_task_bundle_entry *next; next = item->next; if (next->task == task) { /* Remove the next element */ item->next = next->next; STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); free(next); return 0; } item = next; } STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); /* We could not find the task in the bundle */ return -ENOENT; } /* Close a bundle. No task can be added to a closed bundle. Tasks can still be * removed from a closed bundle. A closed bundle automatically gets * deinitialized when it becomes empty. A closed bundle cannot be reopened. */ void starpu_task_bundle_close(starpu_task_bundle_t bundle) { STARPU_PTHREAD_MUTEX_LOCK(&bundle->mutex); /* If the bundle is already empty, we deinitialize it now as the * user closed it and thus don't intend to insert new tasks in it. */ if (bundle->list == NULL) { STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); _starpu_task_bundle_destroy(bundle); return; } /* Mark the bundle as closed */ bundle->closed = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&bundle->mutex); } void _starpu_task_bundle_destroy(starpu_task_bundle_t bundle) { /* Remove all entries from the bundle (which is likely to be empty) */ while (bundle->list) { struct _starpu_task_bundle_entry *entry = bundle->list; bundle->list = bundle->list->next; free(entry); } STARPU_PTHREAD_MUTEX_DESTROY(&bundle->mutex); free(bundle); } void _starpu_insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_data_access_mode mode) { STARPU_ASSERT(listp); struct _starpu_handle_list *list = *listp; /* If the list is empty or the handle's address the smallest among the * list, we insert it as first element */ if (!list || list->handle > handle) { struct _starpu_handle_list *link; _STARPU_MALLOC(link, sizeof(struct _starpu_handle_list)); link->handle = handle; link->mode = mode; link->next = list; *listp = link; return; } struct _starpu_handle_list *prev = list; /* Look for the same handle if already present in the list. * Else place it right before the smallest following handle */ while (list && (handle >= list->handle)) { prev = list; list = list->next; } if (prev->handle == handle) { /* The handle is already in the list, the merge both the access modes */ prev->mode = (enum starpu_data_access_mode) ((int) prev->mode | (int) mode); } else { /* The handle was not in the list, we insert it after 'prev', thus right before * 'list' which is the smallest following handle */ struct _starpu_handle_list *link; _STARPU_MALLOC(link, sizeof(struct _starpu_handle_list)); link->handle = handle; link->mode = mode; link->next = prev->next; prev->next = link; } } starpu-1.4.9+dfsg/src/core/task_bundle.h000066400000000000000000000074261507764646700202040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __CORE_TASK_BUNDLE_H__ #define __CORE_TASK_BUNDLE_H__ /** @file */ #include #pragma GCC visibility push(hidden) /** struct _starpu_task_bundle_entry * ================================ * Purpose * ======= * Structure used to describe a linked list containing tasks in _starpu_task_bundle. * * Fields * ====== * task Pointer to the task structure. * * next Pointer to the next element in the linked list. */ struct _starpu_task_bundle_entry { struct starpu_task *task; struct _starpu_task_bundle_entry *next; }; /** struct _starpu_task_bundle * ========================== * Purpose * ======= * Structure describing a list of tasks that should be scheduled on the same * worker whenever it's possible. * It must be considered as a hint given to the scheduler as there is no guarantee that * they will be executed on the same worker. * * Fields * ====== * mutex Mutex protecting the structure. * * list Array of tasks included in the bundle. * * closed Used to know if the user is still willing to * add/remove some tasks in the bundle. Especially useful for * the runtime to know whether it is safe to destroy a bundle. */ struct _starpu_task_bundle { /** Mutex protecting the bundle */ starpu_pthread_mutex_t mutex; struct _starpu_task_bundle_entry *list; int closed; }; /** struct _starpu_handle_list * ========================== * Purpose * ======= * Structure describing a list of handles sorted by address to speed-up * when looking for an element. * The list cannot contain duplicate handles. * * Fields * ====== * handle Pointer to the handle structure. * * access_mode Total access mode over the whole bundle. * * next Pointer to the next element in the linked list. */ struct _starpu_handle_list { starpu_data_handle_t handle; enum starpu_data_access_mode mode; struct _starpu_handle_list *next; }; /** _starpu_task_bundle_destroy * ========================== * Purpose * ======= * Destroy and deinitialize a bundle, * memory previously allocated is freed. * * Arguments * ========= * bundle (input) * Bundle to destroy. */ void _starpu_task_bundle_destroy(starpu_task_bundle_t bundle); /** _starpu_insertion_handle_sorted * ======================== * Purpose * ======= * Insert an handle in a _starpu_handle_list, elements are sorted * in increasing order, considering their physical address. * As the list doesn't accept duplicate elements, a handle with the * same address as an handle contained in the list is not inserted, but * its mode access is merged with the one of the latter. * * Arguments * ========= * listp (input, output) * Pointer to the first element of the list. * In the case of an empty list or an inserted handle with small address, * it should have changed when the call returns. * * handle (input) * Handle to insert in the list. * * mode (input) * Access mode of the handle. */ void _starpu_insertion_handle_sorted(struct _starpu_handle_list **listp, starpu_data_handle_t handle, enum starpu_data_access_mode mode); #pragma GCC visibility pop #endif // __CORE_TASK_BUNDLE_H__ starpu-1.4.9+dfsg/src/core/topology.c000066400000000000000000002167761507764646700175720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_HAVE_HWLOC #include #ifndef HWLOC_API_VERSION #define HWLOC_OBJ_PU HWLOC_OBJ_PROC #endif #if HWLOC_API_VERSION < 0x00010b00 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE #endif #endif #ifdef STARPU_HAVE_WINDOWS #include #endif #ifdef STARPU_SIMGRID #include #endif static int main_thread_cpuid = -1; static unsigned topology_is_initialized = 0; static int nobind; static int numa_enabled = -1; /* For checking whether two workers share the same PU, indexed by PU number */ static int cpu_worker[STARPU_MAXCPUS]; static char * cpu_name[STARPU_MAXCPUS]; static unsigned nb_numa_nodes = 0; static int numa_memory_nodes_to_hwloclogid[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in hwloc logid */ static int numa_memory_nodes_to_physicalid[STARPU_MAXNUMANODES]; /* indexed by StarPU numa node to convert in physical id */ static unsigned numa_bus_id[STARPU_MAXNUMANODES*STARPU_MAXNUMANODES]; #define STARPU_NUMA_UNINITIALIZED (-2) #define STARPU_NUMA_MAIN_RAM (-1) unsigned _starpu_may_bind_automatically[STARPU_NARCH] = { 0 }; unsigned starpu_memory_nodes_get_numa_count(void) { return nb_numa_nodes; } #if defined(STARPU_HAVE_HWLOC) hwloc_obj_t _starpu_numa_get_obj(hwloc_obj_t obj) { #if HWLOC_API_VERSION >= 0x00020000 while (obj && obj->memory_first_child == NULL) obj = obj->parent; if (!obj) return NULL; return obj->memory_first_child; #else while (obj && obj->type != HWLOC_OBJ_NUMANODE) obj = obj->parent; /* Note: If we don't find a "node" obj before the root, this means * hwloc does not know whether there are numa nodes or not, so * we should not use a per-node sampling in that case. */ return obj; #endif } static int numa_get_logical_id(hwloc_obj_t obj) { STARPU_ASSERT(obj); obj = _starpu_numa_get_obj(obj); if (!obj) return 0; return obj->logical_index; } static int numa_get_physical_id(hwloc_obj_t obj) { STARPU_ASSERT(obj); obj = _starpu_numa_get_obj(obj); if (!obj) return 0; return obj->os_index; } #endif int _starpu_get_logical_numa_node_worker(unsigned workerid) { #if defined(STARPU_HAVE_HWLOC) STARPU_ASSERT(numa_enabled != -1); if (numa_enabled) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; switch(worker->arch) { case STARPU_CPU_WORKER: { hwloc_obj_t obj; obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid); return numa_get_logical_id(obj); } default: STARPU_ABORT(); } } else #endif { (void) workerid; /* unused */ return STARPU_NUMA_MAIN_RAM; } } /* This returns the exact NUMA node next to a worker */ static int _starpu_get_physical_numa_node_worker(unsigned workerid) { #if defined(STARPU_HAVE_HWLOC) STARPU_ASSERT(numa_enabled != -1); if (numa_enabled) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; switch(worker->arch) { case STARPU_CPU_WORKER: { hwloc_obj_t obj; obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid); return numa_get_physical_id(obj); } default: STARPU_ABORT(); } } else #endif { (void) workerid; /* unused */ return STARPU_NUMA_MAIN_RAM; } } /* This returns the CPU NUMA memory close to a worker */ static int _starpu_get_logical_close_numa_node_worker(unsigned workerid) { #if defined(STARPU_HAVE_HWLOC) STARPU_ASSERT(numa_enabled != -1); if (numa_enabled) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_machine_config *config = (struct _starpu_machine_config *)_starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; hwloc_obj_t obj = NULL; if (starpu_driver_info[worker->arch].get_hwloc_obj) obj = starpu_driver_info[worker->arch].get_hwloc_obj(topology->hwtopology, worker->devid); if (!obj) obj = hwloc_get_obj_by_type(topology->hwtopology, HWLOC_OBJ_PU, worker->bindid); return numa_get_logical_id(obj); } else #endif { (void) workerid; /* unused */ return STARPU_NUMA_MAIN_RAM; } } //TODO change this in an array int starpu_memory_nodes_numa_hwloclogid_to_id(int logid) { unsigned n; for (n = 0; n < nb_numa_nodes; n++) if (numa_memory_nodes_to_hwloclogid[n] == logid) return n; return -1; } int starpu_memory_nodes_numa_id_to_hwloclogid(unsigned id) { STARPU_ASSERT(id < STARPU_MAXNUMANODES); return numa_memory_nodes_to_hwloclogid[id]; } int starpu_memory_nodes_numa_devid_to_id(unsigned id) { STARPU_ASSERT(id < STARPU_MAXNUMANODES); return numa_memory_nodes_to_physicalid[id]; } //TODO change this in an array int starpu_memory_nodes_numa_id_to_devid(int osid) { unsigned n; for (n = 0; n < nb_numa_nodes; n++) if (numa_memory_nodes_to_physicalid[n] == osid) return n; return -1; } // TODO: cache the values instead of looking in hwloc each time /* Avoid using this one, prefer _starpu_task_data_get_node_on_worker */ int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index, unsigned local_node) { int node = STARPU_SPECIFIC_NODE_LOCAL; if (task->cl->specific_nodes) node = STARPU_CODELET_GET_NODE(task->cl, index); switch (node) { case STARPU_SPECIFIC_NODE_LOCAL: // TODO: rather find MCDRAM node = local_node; break; case STARPU_SPECIFIC_NODE_CPU: switch (starpu_node_get_kind(local_node)) { case STARPU_CPU_RAM: node = local_node; break; default: // TODO: rather take close NUMA node node = STARPU_MAIN_RAM; break; } break; case STARPU_SPECIFIC_NODE_SLOW: // TODO: rather leave in DDR node = local_node; break; case STARPU_SPECIFIC_NODE_LOCAL_OR_CPU: { enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index); if (mode & STARPU_R) { if (mode & STARPU_R && task->handles[index]->per_node[local_node].state != STARPU_INVALID) { /* It is here already, rather access it from here */ node = local_node; } else { /* It is not here already, do not bother moving it */ node = STARPU_MAIN_RAM; } } else { /* Nothing to read, consider where to write */ starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index); if (handle->wt_mask & (1 << STARPU_MAIN_RAM)) /* Write through, better simply write to the main memory */ node = STARPU_MAIN_RAM; else /* Better keep temporary data on the accelerator to save PCI bandwidth */ node = local_node; } break; } case STARPU_SPECIFIC_NODE_NONE: return -1; } return node; } int _starpu_task_data_get_node_on_worker(struct starpu_task *task, unsigned index, unsigned worker) { unsigned local_node = starpu_worker_get_memory_node(worker); int node = STARPU_SPECIFIC_NODE_LOCAL; if (task->cl->specific_nodes) node = STARPU_CODELET_GET_NODE(task->cl, index); switch (node) { case STARPU_SPECIFIC_NODE_LOCAL: // TODO: rather find MCDRAM node = local_node; break; case STARPU_SPECIFIC_NODE_CPU: node = starpu_memory_nodes_numa_hwloclogid_to_id(_starpu_get_logical_close_numa_node_worker(worker)); if (node == -1) node = STARPU_MAIN_RAM; break; case STARPU_SPECIFIC_NODE_SLOW: // TODO: rather leave in DDR node = local_node; break; case STARPU_SPECIFIC_NODE_LOCAL_OR_CPU: { enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index); if (mode & STARPU_R) { if (task->handles[index]->per_node[local_node].state != STARPU_INVALID) { /* It is here already, rather access it from here */ node = local_node; } else { /* It is not here already, do not bother moving it */ node = STARPU_MAIN_RAM; } } else { /* Nothing to read, consider where to write */ starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index); if (handle->wt_mask & (1 << STARPU_MAIN_RAM)) /* Write through, better simply write to the main memory */ node = STARPU_MAIN_RAM; else /* Better keep temporary data on the accelerator to save PCI bandwidth */ node = local_node; } break; } case STARPU_SPECIFIC_NODE_NONE: return -1; } return node; } struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d) { unsigned nworkers = starpu_worker_get_count(); unsigned workerid; for (workerid = 0; workerid < nworkers; workerid++) { if (starpu_worker_get_type(workerid) == d->type) { struct _starpu_worker *worker; worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker->driver_ops); STARPU_ASSERT_MSG(worker->driver_ops->is_devid, "The driver operation 'is_devid' is not defined"); if (worker->driver_ops->is_devid(d, worker)) return worker; } } return NULL; } void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid, int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus, enum starpu_worker_archtype type) { char *strval; unsigned i; *current = 0; /* conf->workers_gpuid indicates the successive GPU identifier that * should be used to bind the workers. It should be either filled * according to the user's explicit parameters (from starpu_conf) or * according to the varname env. variable. Otherwise, a * round-robin policy is used to distributed the workers over the * cores. */ /* what do we use, explicit value, env. variable, or round-robin ? */ strval = starpu_getenv(varname); if (strval) { /* varname certainly contains less entries than * STARPU_NMAXWORKERS, so we reuse its entries in a round * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 * 2". */ unsigned wrap = 0; unsigned number_of_entries = 0; char *endptr; /* we use the content of the varname * env. variable */ for (i = 0; i < STARPU_NMAXWORKERS; i++) { if (!wrap) { long int val; val = strtol(strval, &endptr, 10); if (endptr != strval) { workers_gpuid[i] = (unsigned)val; strval = endptr; } else { /* there must be at least one entry */ STARPU_ASSERT(i != 0); number_of_entries = i; /* there is no more values in the * string */ wrap = 1; workers_gpuid[i] = workers_gpuid[0]; } } else { workers_gpuid[i] = workers_gpuid[i % number_of_entries]; } } } else if (explicit_workers_gpuid) { /* we use the explicit value from the user */ memcpy(workers_gpuid, explicit_workers_gpuid, STARPU_NMAXWORKERS*sizeof(unsigned)); } else { /* by default, we take a round robin policy */ if (nhwgpus > 0) for (i = 0; i < STARPU_NMAXWORKERS; i++) workers_gpuid[i] = (unsigned)(i % nhwgpus); /* StarPU can use sampling techniques to bind threads * correctly */ _starpu_may_bind_automatically[type] = 1; } } int _starpu_get_next_devid(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, enum starpu_worker_archtype arch) { if (topology->nworkers == STARPU_NMAXWORKERS) // Already full! return -1; unsigned i = ((config->current_devid[arch]++) % config->topology.ndevices[arch]); return (int)config->topology.workers_devid[arch][i]; } #ifndef STARPU_SIMGRID #ifdef STARPU_HAVE_HWLOC static void _starpu_allocate_topology_userdata(hwloc_obj_t obj) { unsigned i; _STARPU_CALLOC(obj->userdata, 1, sizeof(struct _starpu_hwloc_userdata)); for (i = 0; i < obj->arity; i++) _starpu_allocate_topology_userdata(obj->children[i]); #if HWLOC_API_VERSION >= 0x00020000 hwloc_obj_t child; for (child = obj->io_first_child; child; child = child->next_sibling) _starpu_allocate_topology_userdata(child); #endif } static void _starpu_deallocate_topology_userdata(hwloc_obj_t obj) { unsigned i; struct _starpu_hwloc_userdata *data = obj->userdata; STARPU_ASSERT(!data->worker_list || data->worker_list == (void*)-1); free(data); for (i = 0; i < obj->arity; i++) _starpu_deallocate_topology_userdata(obj->children[i]); #if HWLOC_API_VERSION >= 0x00020000 hwloc_obj_t child; for (child = obj->io_first_child; child; child = child->next_sibling) _starpu_deallocate_topology_userdata(child); #endif } #endif #endif static void _starpu_init_topology(struct _starpu_machine_config *config) { /* Discover the topology, meaning finding all the available PUs for the compiled drivers. These drivers MUST have been initialized before calling this function. The discovered topology is filled in CONFIG. */ struct _starpu_machine_topology *topology = &config->topology; if (topology_is_initialized) return; #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) if (config->conf.nopencl != 0) _starpu_opencl_init(); #endif #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) if (config->conf.ncuda != 0) _starpu_init_cuda(); #endif #if defined(STARPU_USE_HIP) if (config->conf.nhip != 0) _starpu_init_hip(); #endif #if defined(STARPU_USE_MAX_FPGA) if (config->conf.nmax_fpga != 0) _starpu_init_max_fpga(); #endif nobind = starpu_getenv_number("STARPU_WORKERS_NOBIND"); topology->nhwdevices[STARPU_CPU_WORKER] = 1; topology->nhwworker[STARPU_CPU_WORKER][0] = 0; topology->nhwpus = 0; topology->nusedpus = 0; topology->firstusedpu = 0; #ifndef STARPU_SIMGRID #ifdef STARPU_HAVE_HWLOC int err; err = hwloc_topology_init(&topology->hwtopology); STARPU_ASSERT_MSG(err == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); char *hwloc_input = starpu_getenv("STARPU_HWLOC_INPUT"); if (hwloc_input && hwloc_input[0]) { err = hwloc_topology_set_xml(topology->hwtopology, hwloc_input); if (err < 0) _STARPU_DISP("Could not load hwloc input %s\n", hwloc_input); } _starpu_topology_filter(topology->hwtopology); err = hwloc_topology_load(topology->hwtopology); STARPU_ASSERT_MSG(err == 0, "Could not load Hwloc topology (%s)%s%s%s\n", strerror(errno), hwloc_input ? " (input " : "", hwloc_input ? hwloc_input : "", hwloc_input ? ")" : ""); #ifdef HAVE_HWLOC_CPUKINDS_GET_NR int nr_kinds = hwloc_cpukinds_get_nr(topology->hwtopology, 0); if (nr_kinds > 1) _STARPU_DISP("Warning: there are several kinds of CPU on this system. For now StarPU assumes all CPU are equal\n"); #endif _starpu_allocate_topology_userdata(hwloc_get_root_obj(topology->hwtopology)); #endif #endif #ifdef STARPU_SIMGRID config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = config->topology.nusedpus = _starpu_simgrid_get_nbhosts("CPU"); #elif defined(STARPU_HAVE_HWLOC) /* Discover the CPUs relying on the hwloc interface and fills CONFIG * accordingly. */ config->cpu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_CORE); config->pu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_PU); /* Would be very odd */ STARPU_ASSERT(config->cpu_depth != HWLOC_TYPE_DEPTH_MULTIPLE); if (config->cpu_depth == HWLOC_TYPE_DEPTH_UNKNOWN) { /* unknown, using logical processors as fallback */ _STARPU_DISP("Warning: The OS did not report CPU cores. Assuming there is only one hardware thread per core.\n"); config->cpu_depth = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_PU); } topology->nhwworker[STARPU_CPU_WORKER][0] = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->cpu_depth); topology->nhwpus = topology->nusedpus = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->pu_depth); if (starpu_getenv_number_default("STARPU_WORKERS_GETBIND", 1)) { /* Respect the existing binding */ hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_t log_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_t check_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_t log_coreset = hwloc_bitmap_alloc(); unsigned n, i, j, first, last, weight; int ret; #ifdef STARPU_VERBOSE char *str; #endif do { /* Get the process binding (e.g. provided by the job scheduler) */ ret = hwloc_get_cpubind(topology->hwtopology, cpuset, HWLOC_CPUBIND_THREAD); if (ret) { _STARPU_DISP("Warning: could not get current CPU binding: %s\n", strerror(errno)); break; } #ifdef STARPU_VERBOSE hwloc_bitmap_asprintf(&str, cpuset); _STARPU_DEBUG("Got cpu physical binding: %s\n", str); free(str); #endif /* Compute logical sets */ n = hwloc_get_nbobjs_by_depth(topology->hwtopology, config->pu_depth); for (i = 0; i < n; i++) { hwloc_obj_t pu = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, i), core; if (!hwloc_bitmap_isset(cpuset, pu->os_index)) continue; hwloc_bitmap_set(log_cpuset, i); core = pu; if (config->cpu_depth != config->pu_depth) { while (core && core->type != HWLOC_OBJ_CORE) core = core->parent; if (!core) { _STARPU_DISP("Warning: hwloc did not report a core above PU %d\n", i); break; } } /* Include all PUs from the core to make the set contiguous, we will pick up just one from it by default */ for (j = 0; j < core->arity; j++) hwloc_bitmap_set(check_cpuset, core->children[j]->logical_index); hwloc_bitmap_set(log_coreset, core->logical_index); } #ifdef STARPU_VERBOSE hwloc_bitmap_asprintf(&str, log_cpuset); _STARPU_DEBUG("This maps to logical binding: %s\n", str); free(str); hwloc_bitmap_asprintf(&str, check_cpuset); _STARPU_DEBUG("Which we extend to: %s\n", str); free(str); hwloc_bitmap_asprintf(&str, log_coreset); _STARPU_DEBUG("The logical core binding is thus: %s\n", str); free(str); #endif /* Check that PU numbers are consecutive */ first = hwloc_bitmap_first(check_cpuset); last = hwloc_bitmap_last(check_cpuset); weight = hwloc_bitmap_weight(check_cpuset); if (last - first + 1 != weight) { _STARPU_DISP("Warning: hwloc reported non-consecutive binding (first %u last %d weight %u, this is not supported yet, sorry, please use STARPU_WORKERS_CPUID or STARPU_WORKERS_COREID to set this by hand\n", first, last, weight); break; } if (hwloc_bitmap_weight(log_cpuset) == 1 || hwloc_bitmap_weight(log_coreset) == 1) { const char *omp_bind = starpu_getenv("OMP_PROC_BIND"); _STARPU_DISP("Warning: the current CPU binding set contains only one CPU.\n"); if (omp_bind && strcasecmp(omp_bind, "false")) _STARPU_DISP("The OMP_PROC_BIND environment variable is set to %s.\n", omp_bind); else _STARPU_DISP("Maybe you need to tell your job scheduler to bind on all allocated cores (e.g. --exclusive --ntasks-per-node=1 or --cpus-per-task for Slurm, or --bind-to board for openmpi).\n"); _STARPU_DISP("You can use STARPU_WORKERS_GETBIND=0 to bypass it, but make sure you are not oversubscribing the machine.\n"); } topology->nusedpus = weight; topology->firstusedpu = hwloc_bitmap_first(log_cpuset);; } while(0); hwloc_bitmap_free(cpuset); hwloc_bitmap_free(check_cpuset); topology->log_cpuset = log_cpuset; topology->log_coreset = log_coreset; } #elif defined(HAVE_SYSCONF) /* Discover the CPUs relying on the sysconf(3) function and fills * CONFIG accordingly. */ config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = config->topology.nusedpus = sysconf(_SC_NPROCESSORS_ONLN); #elif defined(_WIN32) /* Discover the CPUs on Cygwin and MinGW systems. */ SYSTEM_INFO sysinfo; GetSystemInfo(&sysinfo); config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = config->topology.nusedpus = sysinfo.dwNumberOfProcessors; #else #warning no way to know number of cores, assuming 1 config->topology.nhwworker[STARPU_CPU_WORKER][0] = config->topology.nhwpus = config->topology.nusedpus = 1; #endif if (!starpu_getenv_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1)) config->topology.nhwdevices[STARPU_CPU_WORKER] = config->topology.nhwworker[STARPU_CPU_WORKER][0]; if (config->conf.ncuda != 0) _starpu_cuda_discover_devices(config); if (config->conf.nhip != 0) _starpu_hip_discover_devices(config); if (config->conf.nopencl != 0) _starpu_opencl_discover_devices(config); if (config->conf.nmax_fpga != 0) _starpu_max_fpga_discover_devices(config); #ifdef STARPU_USE_MPI_MASTER_SLAVE config->topology.nhwdevices[STARPU_MPI_MS_WORKER] = _starpu_mpi_src_get_device_count(); #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE config->topology.nhwdevices[STARPU_TCPIP_MS_WORKER] = _starpu_tcpip_src_get_device_count(); #endif topology_is_initialized = 1; } /* * Bind workers on the different processors */ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *config) { char *strval; unsigned i; struct _starpu_machine_topology *topology = &config->topology; STARPU_ASSERT_MSG(topology->nhwworker[STARPU_CPU_WORKER][0], "Unexpected value for topology->nhwworker[STARPU_CPU_WORKER][0] %u", topology->nhwworker[STARPU_CPU_WORKER][0]); int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; int scale = 1; config->current_bindid = 0; if (starpu_getenv("STARPU_WORKERS_CPUID") && starpu_getenv("STARPU_WORKERS_COREID")) { _STARPU_DISP("Warning: STARPU_WORKERS_CPUID and STARPU_WORKERS_COREID cannot be set at the same time. STARPU_WORKERS_CPUID will be used.\n"); } if (topology->nhwpus % topology->nhwworker[STARPU_CPU_WORKER][0]) { _STARPU_DISP("Warning: hwloc reported %d logical CPUs for %d cores, this is not homogeneous, will assume %d logical CPUs per core\n", topology->nhwpus, topology->nhwworker[STARPU_CPU_WORKER][0], nhyperthreads); } /* conf->workers_bindid indicates the successive logical PU identifier that * should be used to bind the workers. It should be either filled * according to the user's explicit parameters (from starpu_conf) or * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a * round-robin policy is used to distributed the workers over the * cores. */ /* what do we use, explicit value, env. variable, or round-robin ? */ strval = starpu_getenv("STARPU_WORKERS_CPUID"); if (strval == NULL) { strval = starpu_getenv("STARPU_WORKERS_COREID"); if (strval) scale = nhyperthreads; } if (strval) { /* STARPU_WORKERS_CPUID certainly contains less entries than * STARPU_NMAXWORKERS, so we reuse its entries in a round * robin fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 * 2". */ unsigned wrap = 0; unsigned number_of_entries = 0; char *endptr; /* we use the content of the STARPU_WORKERS_CPUID * env. variable */ for (i = 0; i < STARPU_NMAXWORKERS; i++) { if (!wrap) { long int val; val = strtol(strval, &endptr, 10); if (endptr != strval) { if (scale > 1) { #if defined(STARPU_HAVE_HWLOC) if (config->topology.log_coreset && !hwloc_bitmap_isset(config->topology.log_coreset, val)) _STARPU_DISP("Warning: logical core id %ld is not in the CPU binding provided by the OS\n", val); #endif if (val * scale >= topology->nhwpus) _STARPU_DISP("Warning: logical core id %ld is beyond the number of cores (%d), will wrap around it\n", val, topology->nhwpus / scale); } else { #if defined(STARPU_HAVE_HWLOC) if (config->topology.log_cpuset && !hwloc_bitmap_isset(config->topology.log_cpuset, val)) _STARPU_DISP("Warning: logical CPU id %ld is not in the CPU binding provided by the OS\n", val); #endif if (val >= topology->nhwpus) _STARPU_DISP("Warning: logical CPU id %ld is beyond the number of CPUs (%d), will wrap around it\n", val, topology->nhwpus); } topology->workers_bindid[i] = (unsigned)((val * scale) % topology->nhwpus); strval = endptr; if (*strval == '-') { /* range of values */ long int endval; strval++; if (*strval && *strval != ' ' && *strval != ',') { endval = strtol(strval, &endptr, 10); strval = endptr; } else { endval = topology->nhwpus / scale - 1; if (*strval) strval++; } for (val++; val <= endval && i < STARPU_NMAXWORKERS-1; val++) { i++; topology->workers_bindid[i] = (unsigned)((val * scale) % topology->nhwpus); } } number_of_entries = i+1; if (*strval == ',') strval++; } else { /* there must be at least one entry */ STARPU_ASSERT(i != 0); number_of_entries = i; /* there is no more values in the * string */ wrap = 1; topology->workers_bindid[i] = topology->workers_bindid[0]; } } else { topology->workers_bindid[i] = topology->workers_bindid[i % number_of_entries]; } } topology->workers_nbindid = number_of_entries; } else if (config->conf.use_explicit_workers_bindid) { /* we use the explicit value from the user */ memcpy(topology->workers_bindid, config->conf.workers_bindid, STARPU_NMAXWORKERS*sizeof(unsigned)); topology->workers_nbindid = STARPU_NMAXWORKERS; } else { int nth_per_core = starpu_getenv_number_default("STARPU_NTHREADS_PER_CORE", 1); int k; int nbindids=0; STARPU_ASSERT_MSG(nth_per_core > 0 && nth_per_core <= nhyperthreads , "Incorrect number of hyperthreads"); i = 0; /* PU number currently assigned */ k = 0; /* Number of threads already put on the current core */ while(i < topology->nusedpus) { if (k >= nth_per_core) { /* We have already put enough workers on this * core, skip remaining PUs from this core, and * proceed with next core */ i += nhyperthreads-nth_per_core; k = 0; continue; } /* Add a worker to this core, by using this logical PU */ unsigned allocated = topology->firstusedpu + (unsigned)i; #if defined(STARPU_HAVE_HWLOC) if (config->topology.log_cpuset && !hwloc_bitmap_isset(config->topology.log_cpuset, allocated)) _STARPU_DISP("Warning: logical CPU id %u is not in the CPU binding provided by the OS, did you specify an STARPU_NTHREADS_PER_CORE value that is not covered by the OS-provided CPU binding?\n", allocated); #endif topology->workers_bindid[nbindids++] = allocated; k++; i++; } topology->workers_nbindid = nbindids; } for (i = 0; i < STARPU_MAXCPUS;i++) cpu_worker[i] = STARPU_NOWORKERID; /* no binding yet */ memset(&config->currently_bound, 0, sizeof(config->currently_bound)); memset(&config->currently_shared, 0, sizeof(config->currently_shared)); } static void _starpu_deinitialize_workers_bindid(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED) { unsigned i; for (i = 0; i < STARPU_MAXCPUS;i++) { if (cpu_name[i]) { free(cpu_name[i]); cpu_name[i] = NULL; } } } unsigned _starpu_get_next_bindid(struct _starpu_machine_config *config, unsigned flags, unsigned *preferred_binding, unsigned npreferred) { struct _starpu_machine_topology *topology = &config->topology; STARPU_ASSERT_MSG(topology_is_initialized, "The StarPU core is not initialized yet, have you called starpu_init?"); unsigned current_preferred; unsigned nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; unsigned workers_nbindid = topology->workers_nbindid; unsigned i; if (npreferred) { STARPU_ASSERT_MSG(preferred_binding, "Passing NULL pointer for parameter preferred_binding with a non-0 value of parameter npreferred"); } /* loop over the preference list */ for (current_preferred = 0; current_preferred < npreferred; current_preferred++) { /* can we bind the worker on the preferred core ? */ unsigned requested_core = preferred_binding[current_preferred]; unsigned requested_bindid = requested_core * nhyperthreads; /* Look at the remaining PUs to be bound to */ for (i = 0; i < workers_nbindid; i++) { if (topology->workers_bindid[i] == requested_bindid) { if ((!config->currently_bound[i] || (config->currently_shared[i] && !(flags & STARPU_THREAD_ACTIVE)))) { /* the PU is available, or shareable with us, we use it ! */ _STARPU_DEBUG("PU %d is %sbound and %sshared and we %sshare, use it\n", requested_bindid, config->currently_bound[i] ? "" : "not ", config->currently_shared[i] ? "" : "not ", flags & STARPU_THREAD_ACTIVE ? "don't ": ""); config->currently_bound[i] = 1; if (!(flags & STARPU_THREAD_ACTIVE)) config->currently_shared[i] = 1; return requested_bindid; } break; } } } if (!(flags & STARPU_THREAD_ACTIVE)) { /* Try to find a shareable PU */ for (i = 0; i < workers_nbindid; i++) if (config->currently_shared[i]) { _STARPU_DEBUG("PU %d is available for sharing\n", topology->workers_bindid[i]); return topology->workers_bindid[i]; } } /* Try to find an available PU from last used PU */ for (i = config->current_bindid; i < workers_nbindid; i++) if (!config->currently_bound[i]) /* Found a cpu ready for use, use it! */ break; if (i == workers_nbindid) { _STARPU_DEBUG("Looped over %d cpus, restarting from 0\n", workers_nbindid); /* Finished binding on all cpus, restart from start in * case the user really wants overloading */ memset(&config->currently_bound, 0, sizeof(config->currently_bound)); i = 0; } STARPU_ASSERT(i < workers_nbindid); unsigned bindid = topology->workers_bindid[i]; _STARPU_DEBUG("binding on PU %d\n", bindid); config->currently_bound[i] = 1; if (!(flags & STARPU_THREAD_ACTIVE)) config->currently_shared[i] = 1; config->current_bindid = i; return bindid; } unsigned starpu_get_next_bindid(unsigned flags, unsigned *preferred, unsigned npreferred) { return _starpu_get_next_bindid(_starpu_get_machine_config(), flags, preferred, npreferred); } unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config) { _starpu_init_topology(config); return config->topology.nhwworker[STARPU_CPU_WORKER][0]; } unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config) { _starpu_init_topology(config); return config->topology.nhwpus; } unsigned _starpu_topology_get_nhwnumanodes(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED) { #if defined(STARPU_HAVE_HWLOC) _starpu_init_topology(config); struct _starpu_machine_topology *topology = &config->topology; int nnumanodes = hwloc_get_nbobjs_by_type(topology->hwtopology, HWLOC_OBJ_NUMANODE); unsigned res = nnumanodes > 0 ? nnumanodes : 1; if (res > STARPU_MAXNUMANODES) { _STARPU_DISP("Warning: Number of NUMA nodes discovered %d is higher than configured %d, reducing to that. Use configure option --enable-maxnumanodes=xxx to increase the maximum value of supported NUMA nodes.\n", res, STARPU_MAXNUMANODES); res = STARPU_MAXNUMANODES; } return res; #else return 1; #endif } unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED) { unsigned res; #if defined(STARPU_HAVE_HWLOC) if (numa_enabled == -1) numa_enabled = starpu_getenv_number_default("STARPU_USE_NUMA", 0); if (numa_enabled) res = _starpu_topology_get_nhwnumanodes(config); else #endif res = 1; return res; } #if defined(STARPU_HAVE_HWLOC) /* Record the logical numbers of the cores within this obj */ static unsigned _starpu_topology_get_core_binding(unsigned *binding, unsigned nbinding, hwloc_obj_t obj) { unsigned found = 0; unsigned n; if (nbinding && obj->type == HWLOC_OBJ_CORE) { *binding = obj->logical_index; found++; } for (n = 0; n < obj->arity; n++) { found += _starpu_topology_get_core_binding(binding + found, nbinding - found, obj->children[n]); } return found; } #endif /* Record the logical numbers of the cores within these numa nodes */ unsigned _starpu_topology_get_numa_core_binding(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, const unsigned *numa_binding STARPU_ATTRIBUTE_UNUSED, unsigned nnuma STARPU_ATTRIBUTE_UNUSED, unsigned *binding STARPU_ATTRIBUTE_UNUSED, unsigned nbinding STARPU_ATTRIBUTE_UNUSED) { #if defined(STARPU_HAVE_HWLOC) unsigned n; unsigned cur = 0; for (n = 0; n < nnuma; n++) { hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa_binding[n]); if (!obj) { /* NUMA nodes not available, fall back to the whole machine */ return _starpu_topology_get_core_binding(binding, nbinding, hwloc_get_root_obj(config->topology.hwtopology)); } #if HWLOC_API_VERSION >= 0x00020000 /* Get the actual topology object */ obj = obj->parent; #endif cur += _starpu_topology_get_core_binding(binding + cur, nbinding - cur, obj); if (cur == nbinding) break; } return cur; #else return 0; #endif } #ifdef STARPU_HAVE_HWLOC void _starpu_topology_filter(hwloc_topology_t topology) { #if HWLOC_API_VERSION >= 0x20000 hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM); #else hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_IO); #endif #ifdef HAVE_HWLOC_TOPOLOGY_SET_COMPONENTS /* Driver porters: adding your driver here is optional, it is just to avoid hwloc components which take a lot of time to start. */ # ifndef STARPU_USE_CUDA hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "cuda"); hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "nvml"); # endif # ifndef STARPU_USE_HIP hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "hip"); hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "rsmi"); # endif # ifndef STARPU_USE_OPENCL hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "opencl"); # endif #endif } #endif void _starpu_topology_check_ndevices(int *ndevices, unsigned nhwdevices, int overflow, unsigned max, int reserved, const char *nname, const char *dname, const char *configurename) { if (!*ndevices) return; STARPU_ASSERT_MSG(*ndevices >= -1, "%s can not be negative and different from -1 (is is %d)", nname, *ndevices); if (*ndevices == -1) { /* Nothing was specified, so let's choose ! */ if (reserved > 0) { if (nhwdevices < (unsigned) reserved) { _STARPU_DISP("Warning: %u %s devices were requested to be reserved, but only %d were available,\n", reserved, dname, nhwdevices); nhwdevices = 0; } else { nhwdevices -= reserved; } } if (nhwdevices > max) { _STARPU_MSG("# Warning: %u %s devices available. Only %u enabled. Use configure option --enable-%s=xxx to update the maximum value of supported %s devices.\n", nhwdevices, dname, max, configurename, dname); nhwdevices = max; } *ndevices = nhwdevices; } else { if (!overflow && *ndevices > (int) nhwdevices) { /* The user requires more devices than there is available */ _STARPU_DISP("Warning: %d %s devices requested. Only %d available.\n", *ndevices, dname, nhwdevices); *ndevices = nhwdevices; } if (reserved > 0) { if (*ndevices < (int) reserved) { _STARPU_DISP("Warning: %u %s devices were requested to be reserved, but only %d were configured,\n", reserved, dname, *ndevices); *ndevices = 0; } else *ndevices -= reserved; } /* Let's make sure this value is OK. */ if (*ndevices > (int) max) { _STARPU_DISP("Warning: %d %s devices requested. Only %d enabled. Use configure option --enable-%s=xxx to update the maximum value of supported %s devices.\n", *ndevices, dname, max, configurename, dname); *ndevices = max; } } } void _starpu_topology_configure_workers(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, enum starpu_worker_archtype type, int devnum, int devid, int homogeneous, int worker_devid, unsigned nworker_per_device, unsigned ncores, struct _starpu_worker_set *worker_set, struct _starpu_worker_set *driver_worker_set) { topology->nworker[type][devnum] = nworker_per_device; topology->devid[type][devnum] = devid; unsigned i; for (i = 0; i < nworker_per_device; i++) { if (topology->nworkers == STARPU_NMAXWORKERS) // We are full break; int worker_idx = topology->nworkers++; if (worker_set == ALLOC_WORKER_SET) { /* Just one worker in the set */ _STARPU_CALLOC(config->workers[worker_idx].set, 1, sizeof(struct _starpu_worker_set)); config->workers[worker_idx].set->workers = &config->workers[worker_idx]; config->workers[worker_idx].set->nworkers = 1; if (type != STARPU_CPU_WORKER) _starpu_cpu_busy_cpu(1); } else { config->workers[worker_idx].set = worker_set; if ((!worker_set || worker_set->workers == &config->workers[worker_idx]) && (!driver_worker_set || driver_worker_set == worker_set) && type != STARPU_CPU_WORKER) _starpu_cpu_busy_cpu(1); } config->workers[worker_idx].driver_worker_set = driver_worker_set; config->workers[worker_idx].arch = type; _STARPU_MALLOC(config->workers[worker_idx].perf_arch.devices, sizeof(struct starpu_perfmodel_device)); config->workers[worker_idx].perf_arch.ndevices = 1; config->workers[worker_idx].perf_arch.devices[0].type = type; config->workers[worker_idx].perf_arch.devices[0].devid = homogeneous ? 0 : worker_devid ? (int) i : devid; config->workers[worker_idx].perf_arch.devices[0].ncores = ncores; config->workers[worker_idx].devid = worker_devid ? (int) i : devid; config->workers[worker_idx].devnum = worker_devid ? (int) i : devnum; config->workers[worker_idx].subworkerid = worker_devid ? 0 : i; config->workers[worker_idx].worker_mask = STARPU_WORKER_TO_MASK(type); config->worker_mask |= STARPU_WORKER_TO_MASK(type); } } #ifdef STARPU_HAVE_HWLOC static unsigned _starpu_topology_count_ngpus(hwloc_obj_t obj) { struct _starpu_hwloc_userdata *data = obj->userdata; unsigned n = data->ngpus; unsigned i; for (i = 0; i < obj->arity; i++) n += _starpu_topology_count_ngpus(obj->children[i]); #if HWLOC_API_VERSION >= 0x00020000 hwloc_obj_t child; for (child = obj->io_first_child; child; child = child->next_sibling) n += _starpu_topology_count_ngpus(child); #endif data->ngpus = n; //#ifdef STARPU_VERBOSE // { // char name[64]; // hwloc_obj_type_snprintf(name, sizeof(name), obj, 0); // _STARPU_DEBUG("hwloc obj %s has %u GPUs below\n", name, n); // } //#endif return n; } #endif static int _starpu_init_machine_config(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED) { int i; for (i = 0; i < STARPU_NMAXWORKERS; i++) { config->workers[i].workerid = i; config->workers[i].set = NULL; } struct _starpu_machine_topology *topology = &config->topology; topology->nworkers = 0; topology->ncombinedworkers = 0; topology->nsched_ctxs = 0; _starpu_init_topology(config); _starpu_initialize_workers_bindid(config); /* Reserve thread for main() */ main_thread_cpuid = starpu_getenv_number_default("STARPU_MAIN_THREAD_CPUID", -1); int main_thread_coreid = starpu_getenv_number_default("STARPU_MAIN_THREAD_COREID", -1); if (main_thread_cpuid >= 0 && main_thread_coreid >= 0) { _STARPU_DISP("Warning: STARPU_MAIN_THREAD_CPUID and STARPU_MAIN_THREAD_COREID cannot be set at the same time. STARPU_MAIN_THREAD_CPUID will be used.\n"); } if (main_thread_cpuid == -1 && main_thread_coreid >= 0) main_thread_cpuid = main_thread_coreid * _starpu_get_nhyperthreads(); if (main_thread_coreid == -1 && main_thread_cpuid >= 0) main_thread_coreid = main_thread_cpuid / _starpu_get_nhyperthreads(); int main_thread_bind = starpu_getenv_number_default("STARPU_MAIN_THREAD_BIND", 0); int main_thread_activity = STARPU_NONACTIVETHREAD; if (main_thread_bind) { main_thread_activity = STARPU_ACTIVETHREAD; if (main_thread_cpuid == -1) main_thread_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); else { unsigned coreid = main_thread_coreid; unsigned got_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, &coreid, 1); if (got_cpuid != (unsigned) main_thread_cpuid) _STARPU_DISP("Warning: Could not reserve requested logical core %d (logical cpu %d) for main, got %d instead\n", main_thread_coreid, main_thread_cpuid, got_cpuid); } } if (main_thread_cpuid >= 0) _starpu_bind_thread_on_cpu(main_thread_cpuid, main_thread_activity, "main"); /* Reserve thread for MPI */ int mpi_thread_cpuid = starpu_getenv_number_default("STARPU_MPI_THREAD_CPUID", -1); int mpi_thread_coreid = starpu_getenv_number_default("STARPU_MPI_THREAD_COREID", -1); if (mpi_thread_coreid == -1 && mpi_thread_cpuid >= 0) mpi_thread_coreid = mpi_thread_cpuid / _starpu_get_nhyperthreads(); if (mpi_thread_cpuid == -1 && mpi_thread_coreid >= 0) mpi_thread_cpuid = mpi_thread_coreid * _starpu_get_nhyperthreads(); if (mpi_thread_coreid >= 0) { unsigned coreid = mpi_thread_coreid; unsigned got_cpuid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, &coreid, 1); if (got_cpuid != (unsigned) mpi_thread_cpuid) _STARPU_DISP("Warning: Could not reserve requested logical core %d (logical cpu %d) for MPI, got %d instead\n", mpi_thread_coreid, mpi_thread_cpuid, got_cpuid); } #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) _starpu_init_cuda_config(topology, config); #endif #if defined(STARPU_USE_HIP) _starpu_init_hip_config(topology, config); #endif /* We put the OpenCL section after the CUDA section: we rather use NVidia GPUs in CUDA mode than in OpenCL mode */ #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) _starpu_init_opencl_config(topology, config); #endif #ifdef STARPU_USE_MAX_FPGA _starpu_init_max_fpga_config(topology, config); #endif #if defined(STARPU_USE_MPI_MASTER_SLAVE) _starpu_init_mpi_config(topology, config, &config->conf, no_mp_config); #endif #if defined(STARPU_USE_TCPIP_MASTER_SLAVE) _starpu_init_tcpip_config(topology, config, &config->conf, no_mp_config); #endif /* we put the CPU section after the accelerator : in case there was an * accelerator found, we devote one cpu */ #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) _starpu_init_cpu_config(topology, config); #endif if (topology->nworkers == 0) { _STARPU_DEBUG("No worker found, aborting ...\n"); return -ENODEV; } return 0; } void _starpu_destroy_machine_config(struct _starpu_machine_config *config, int no_mp_config) { _starpu_close_debug_logfile(); unsigned worker; if (!no_mp_config) for (worker = 0; worker < config->topology.nworkers; worker++) { struct _starpu_worker *workerarg = &config->workers[worker]; int bindid = workerarg->bindid; free(workerarg->perf_arch.devices); #ifdef STARPU_HAVE_HWLOC hwloc_bitmap_free(workerarg->hwloc_cpu_set); if (bindid != -1) { hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology, config->pu_depth, bindid); struct _starpu_hwloc_userdata *data = worker_obj->userdata; if (data->worker_list) { _starpu_worker_list_delete(data->worker_list); data->worker_list = NULL; } } #endif if (bindid != -1) { free(config->bindid_workers[bindid].workerids); config->bindid_workers[bindid].workerids = NULL; } } free(config->bindid_workers); config->bindid_workers = NULL; config->nbindid = 0; unsigned combined_worker_id; for(combined_worker_id=0 ; combined_worker_id < config->topology.ncombinedworkers ; combined_worker_id++) { struct _starpu_combined_worker *combined_worker = &config->combined_workers[combined_worker_id]; #ifdef STARPU_HAVE_HWLOC hwloc_bitmap_free(combined_worker->hwloc_cpu_set); #endif free(combined_worker->perf_arch.devices); } #ifdef STARPU_HAVE_HWLOC _starpu_deallocate_topology_userdata(hwloc_get_root_obj(config->topology.hwtopology)); hwloc_bitmap_free(config->topology.log_cpuset); hwloc_bitmap_free(config->topology.log_coreset); hwloc_topology_destroy(config->topology.hwtopology); #endif topology_is_initialized = 0; _starpu_devices_gpu_clean(); int i; for (i=0; itopology.hwtopology, config->pu_depth, cpuid); hwloc_bitmap_t set = obj->cpuset; return hwloc_bitmap_first(set); #else return cpuid; #endif } void _starpu_do_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED) { #ifndef STARPU_SIMGRID if (nobind > 0) return; if (cpuid < 0) return; #ifdef STARPU_HAVE_HWLOC struct _starpu_machine_config *config = _starpu_get_machine_config(); const struct hwloc_topology_support *support = hwloc_topology_get_support(config->topology.hwtopology); if (support->cpubind->set_thisthread_cpubind) { hwloc_obj_t obj = hwloc_get_obj_by_depth(config->topology.hwtopology, config->pu_depth, cpuid); hwloc_bitmap_t set = obj->cpuset; int res; hwloc_bitmap_singlify(set); res = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD); if (res) { perror("hwloc_set_cpubind"); STARPU_ABORT(); } } #elif defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__) int res; /* fix the thread on the correct cpu */ cpu_set_t aff_mask; CPU_ZERO(&aff_mask); CPU_SET(cpuid, &aff_mask); starpu_pthread_t self = starpu_pthread_self(); res = pthread_setaffinity_np(self, sizeof(aff_mask), &aff_mask); if (res) { const char *msg = strerror(res); _STARPU_MSG("pthread_setaffinity_np: %s\n", msg); STARPU_ABORT(); } #elif defined(_WIN32) DWORD mask = 1 << cpuid; if (!SetThreadAffinityMask(GetCurrentThread(), mask)) { _STARPU_ERROR("SetThreadMaskAffinity(%lx) failed\n", mask); } #else #warning no CPU binding support #endif #endif } int _starpu_bind_thread_on_cpu(int cpuid STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, const char *name STARPU_ATTRIBUTE_UNUSED) { int ret = 0; #ifndef STARPU_SIMGRID if (nobind > 0) return ret; if (cpuid < 0) return ret; #ifdef STARPU_HAVE_HWLOC struct _starpu_machine_config *config = _starpu_get_machine_config(); _starpu_init_topology(config); if (workerid != STARPU_NOWORKERID && cpuid < STARPU_MAXCPUS) { /* TODO: mutex... */ int previous = cpu_worker[cpuid]; /* We would like the PU to be available, or we are perhaps fine to share it */ if (!(previous == STARPU_NOWORKERID || (previous == STARPU_NONACTIVETHREAD && workerid == STARPU_NONACTIVETHREAD) || (previous >= 0 && previous == workerid) || (name && cpu_name[cpuid] && !strcmp(name, cpu_name[cpuid])))) { char hostname[65]; gethostname(hostname, sizeof(hostname)); if (previous == STARPU_ACTIVETHREAD) _STARPU_DISP("[%s] Warning: active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid); else if (previous == STARPU_NONACTIVETHREAD) _STARPU_DISP("[%s] Warning: non-active thread %s was already bound to PU %d\n", hostname, cpu_name[cpuid], cpuid); else _STARPU_DISP("[%s] Warning: worker %d was already bound to PU %d\n", hostname, previous, cpuid); if (workerid == STARPU_ACTIVETHREAD) _STARPU_DISP("and we were told to also bind active thread %s to it.\n", name); else if (workerid == STARPU_NONACTIVETHREAD) _STARPU_DISP("and we were told to also bind non-active thread %s to it.\n", name); else _STARPU_DISP("and we were told to also bind worker %d to it.\n", workerid); _STARPU_DISP("This will strongly degrade performance.\n"); if (workerid >= 0) /* This shouldn't happen for workers */ _STARPU_DISP("[%s] Maybe check starpu_machine_display's output to determine what wrong binding happened. Hwloc reported a total of %d cores and %d threads, and to use %d threads from logical %d, perhaps there is misdetection between hwloc, the kernel and the BIOS, or an administrative allocation issue from e.g. the job scheduler? You may want to try to use export STARPU_WORKERS_GETBIND=0 to ignore the job scheduler binding\n", hostname, config->topology.nhwworker[STARPU_CPU_WORKER][0], config->topology.nhwpus, config->topology.nusedpus, config->topology.firstusedpu); ret = -1; } else { cpu_worker[cpuid] = workerid; if (name) { if (cpu_name[cpuid]) free(cpu_name[cpuid]); cpu_name[cpuid] = strdup(name); } } } #endif _starpu_do_bind_thread_on_cpu(cpuid); #endif return ret; } int starpu_bind_thread_on(int cpuid, unsigned flags, const char *name) { int workerid; STARPU_ASSERT_MSG(name, "starpu_bind_thread_on must be provided with a name"); starpu_pthread_setname(name); if (flags & STARPU_THREAD_ACTIVE) workerid = STARPU_ACTIVETHREAD; else workerid = STARPU_NONACTIVETHREAD; return _starpu_bind_thread_on_cpu(cpuid, workerid, name); } void _starpu_bind_thread_on_cpus(struct _starpu_combined_worker *combined_worker STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_SIMGRID return; #endif #ifdef STARPU_HAVE_HWLOC const struct hwloc_topology_support *support; struct _starpu_machine_config *config = _starpu_get_machine_config(); _starpu_init_topology(config); support = hwloc_topology_get_support(config->topology.hwtopology); if (support->cpubind->set_thisthread_cpubind) { hwloc_bitmap_t set = combined_worker->hwloc_cpu_set; int ret; ret = hwloc_set_cpubind(config->topology.hwtopology, set, HWLOC_CPUBIND_THREAD); if (ret) { perror("binding thread"); STARPU_ABORT(); } } #else #ifdef __GLIBC__ sched_setaffinity(0,sizeof(combined_worker->cpu_set),&combined_worker->cpu_set); #else # warning no parallel worker CPU binding support #endif #endif } void starpu_bind_thread_on_main(void) { _starpu_do_bind_thread_on_cpu(main_thread_cpuid); } void starpu_bind_thread_on_cpu(int cpuid) { _starpu_do_bind_thread_on_cpu(cpuid); } void starpu_bind_thread_on_worker(unsigned workerid) { unsigned basic_worker_count = starpu_worker_get_count(); if (workerid < basic_worker_count) _starpu_do_bind_thread_on_cpu(starpu_worker_get_bindid(workerid)); else _starpu_bind_thread_on_cpus(_starpu_get_combined_worker_struct(workerid)); } static size_t _starpu_cpu_get_global_mem_size(int nodeid, struct _starpu_machine_config *config) { size_t global_mem; starpu_ssize_t limit = -1; #if defined(STARPU_HAVE_HWLOC) struct _starpu_machine_topology *topology = &config->topology; STARPU_ASSERT(numa_enabled != -1); if (numa_enabled) { int depth_node = hwloc_get_type_depth(topology->hwtopology, HWLOC_OBJ_NUMANODE); if (depth_node == HWLOC_TYPE_DEPTH_UNKNOWN) { #if HWLOC_API_VERSION >= 0x00020000 global_mem = hwloc_get_root_obj(topology->hwtopology)->total_memory; #else global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory; #endif } else { char name[32]; hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, depth_node, nodeid); #if HWLOC_API_VERSION >= 0x00020000 global_mem = obj->attr->numanode.local_memory; #else global_mem = obj->memory.local_memory; #endif snprintf(name, sizeof(name), "STARPU_LIMIT_CPU_NUMA_%d_MEM", obj->os_index); limit = starpu_getenv_number(name); } } else { /* Do not limit ourself to a single NUMA node */ #if HWLOC_API_VERSION >= 0x00020000 global_mem = hwloc_get_root_obj(topology->hwtopology)->total_memory; #else global_mem = hwloc_get_root_obj(topology->hwtopology)->memory.total_memory; #endif } #else /* STARPU_HAVE_HWLOC */ #ifdef STARPU_DEVEL # warning TODO: use sysinfo when available to get global size #endif global_mem = 0; #endif if (limit == -1) limit = starpu_getenv_number("STARPU_LIMIT_CPU_NUMA_MEM"); if (limit == -1) { limit = starpu_getenv_number("STARPU_LIMIT_CPU_MEM"); if (limit != -1 && numa_enabled) { _STARPU_DISP("NUMA is enabled and STARPU_LIMIT_CPU_MEM is set to %luMB. Assuming that it should be distributed over the %d NUMA node(s). You probably want to use STARPU_LIMIT_CPU_NUMA_MEM instead.\n", (long) limit, _starpu_topology_get_nnumanodes(config)); limit /= _starpu_topology_get_nnumanodes(config); } } /* Don't eat all memory for ourself */ global_mem *= 0.9; if (limit < 0) // No limit is defined, we return the global memory size return global_mem; else if (global_mem && (size_t)limit * 1024*1024 > global_mem) { if (numa_enabled) _STARPU_DISP("The requested limit %ldMB for NUMA node %d is higher that available memory %luMB, using the latter\n", (unsigned long) limit, nodeid, (unsigned long) global_mem / (1024*1024)); else _STARPU_DISP("The requested limit %ldMB is higher that available memory %luMB, using the latter\n", (long) limit, (unsigned long) global_mem / (1024*1024)); return global_mem; } else // We limit the memory return limit*1024*1024; } //TODO : Check SIMGRID static void _starpu_init_numa_node(struct _starpu_machine_config *config) { nb_numa_nodes = 0; unsigned i; for (i = 0; i < STARPU_MAXNUMANODES; i++) { numa_memory_nodes_to_hwloclogid[i] = STARPU_NUMA_UNINITIALIZED; numa_memory_nodes_to_physicalid[i] = STARPU_NUMA_UNINITIALIZED; } #ifdef STARPU_SIMGRID char name[16]; starpu_sg_host_t host; #endif numa_enabled = starpu_getenv_number_default("STARPU_USE_NUMA", 0); /* NUMA mode activated */ if (numa_enabled) { /* Take all NUMA nodes used by CPU workers */ unsigned worker; for (worker = 0; worker < config->topology.nworkers; worker++) { struct _starpu_worker *workerarg = &config->workers[worker]; if (workerarg->arch == STARPU_CPU_WORKER) { int numa_logical_id = _starpu_get_logical_numa_node_worker(worker); /* Convert logical id to StarPU id to check if this NUMA node is already saved or not */ int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(numa_logical_id); /* This shouldn't happen */ if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES) { _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES); STARPU_ABORT(); } if (numa_starpu_id == -1) { int devid = numa_logical_id == STARPU_NUMA_MAIN_RAM ? 0 : numa_logical_id; int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, devid); _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(devid, config)); STARPU_ASSERT_MSG_ALWAYS(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES); _starpu_memory_node_set_mapped(memnode); numa_memory_nodes_to_hwloclogid[memnode] = numa_logical_id; int numa_physical_id = _starpu_get_physical_numa_node_worker(worker); numa_memory_nodes_to_physicalid[memnode] = numa_physical_id; nb_numa_nodes++; #ifdef STARPU_SIMGRID snprintf(name, sizeof(name), "RAM%d", memnode); host = _starpu_simgrid_get_host_by_name(name); STARPU_ASSERT(host); _starpu_simgrid_memory_node_set_host(memnode, host); #endif } } } /* If we found NUMA nodes from CPU workers, it's good */ if (nb_numa_nodes != 0) return; _STARPU_DISP("No NUMA nodes found when checking CPU workers...\n"); #ifdef STARPU_HAVE_HWLOC _STARPU_DISP("Take NUMA nodes attached to GPU devices...\n"); for (i = 0; i < STARPU_NARCH; i++) { if (!starpu_driver_info[i].get_hwloc_obj) continue; unsigned j; for (j = 0; j < config->topology.ndevices[i]; j++) { hwloc_obj_t obj = starpu_driver_info[i].get_hwloc_obj(config->topology.hwtopology, config->topology.devid[i][j]); if (obj) obj = _starpu_numa_get_obj(obj); /* Hwloc cannot recognize some devices */ if (!obj) continue; int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(obj->logical_index); /* This shouldn't happen */ if (numa_starpu_id == -1 && nb_numa_nodes == STARPU_MAXNUMANODES) { _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES); STARPU_ABORT(); } if (numa_starpu_id == -1) { int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, obj->logical_index); _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(obj->logical_index, config)); STARPU_ASSERT_MSG_ALWAYS(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available)", memnode, STARPU_MAXNUMANODES); _starpu_memory_node_set_mapped(memnode); numa_memory_nodes_to_hwloclogid[memnode] = obj->logical_index; numa_memory_nodes_to_physicalid[memnode] = obj->os_index; nb_numa_nodes++; #ifdef STARPU_SIMGRID snprintf(name, sizeof(name), "RAM%d", memnode); host = _starpu_simgrid_get_host_by_name(name); STARPU_ASSERT(host); _starpu_simgrid_memory_node_set_host(memnode, host); #endif } } } #endif } #ifdef STARPU_HAVE_HWLOC //Found NUMA nodes from CUDA nodes if (nb_numa_nodes != 0) return; /* In case, we do not find any NUMA nodes when checking NUMA nodes attached to GPUs, we take all of them */ if (numa_enabled) _STARPU_DISP("No NUMA nodes found when checking GPUs devices...\n"); #endif if (numa_enabled) _STARPU_DISP("Finally, take all NUMA nodes available... \n"); unsigned nnuma = _starpu_topology_get_nnumanodes(config); if (nnuma > STARPU_MAXNUMANODES) { _STARPU_MSG("Warning: %u NUMA nodes available. Only %u enabled. Use configure option --enable-maxnumanodes=xxx to update the maximum value of supported NUMA nodes.\n", _starpu_topology_get_nnumanodes(config), STARPU_MAXNUMANODES); nnuma = STARPU_MAXNUMANODES; } unsigned numa; for (numa = 0; numa < nnuma; numa++) { unsigned numa_logical_id; unsigned numa_physical_id; #if defined(STARPU_HAVE_HWLOC) hwloc_obj_t obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, numa); if (obj) { numa_logical_id = obj->logical_index; numa_physical_id = obj->os_index; } else #endif { numa_logical_id = 0; numa_physical_id = 0; } int memnode = _starpu_memory_node_register(STARPU_CPU_RAM, numa_logical_id); STARPU_ASSERT(memnode < STARPU_MAXNUMANODES); _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cpu_get_global_mem_size(numa_logical_id, config)); _starpu_memory_node_set_mapped(memnode); numa_memory_nodes_to_hwloclogid[memnode] = numa_logical_id; numa_memory_nodes_to_physicalid[memnode] = numa_physical_id; nb_numa_nodes++; if (numa == 0) STARPU_ASSERT_MSG(memnode == STARPU_MAIN_RAM, "Wrong Memory Node : %d (expected %d) \n", memnode, STARPU_MAIN_RAM); STARPU_ASSERT_MSG_ALWAYS(memnode < STARPU_MAXNUMANODES, "Wrong Memory Node : %d (only %d available) \n", memnode, STARPU_MAXNUMANODES); #ifdef STARPU_SIMGRID if (nnuma > 1) { snprintf(name, sizeof(name), "RAM%d", memnode); host = _starpu_simgrid_get_host_by_name(name); } else { /* In this case, nnuma has only one node */ host = _starpu_simgrid_get_host_by_name("RAM"); } STARPU_ASSERT(host); _starpu_simgrid_memory_node_set_host(memnode, host); #endif } STARPU_ASSERT_MSG(nb_numa_nodes > 0, "No NUMA node found... We need at least one memory node !\n"); } static void _starpu_init_numa_bus() { unsigned i, j; for (i = 0; i < nb_numa_nodes; i++) for (j = 0; j < nb_numa_nodes; j++) if (i != j) numa_bus_id[i*nb_numa_nodes+j] = _starpu_register_bus(i, j); } #if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID) static int _starpu_find_pu_driving_numa_from(hwloc_obj_t root, unsigned node) { unsigned i; int found = 0; if (!root->arity) { if (root->type == HWLOC_OBJ_PU) { struct _starpu_hwloc_userdata *userdata = root->userdata; if (userdata->pu_worker) { /* Cool, found a worker! */ _STARPU_DEBUG("found PU %d to drive memory node %d\n", userdata->pu_worker->bindid, node); _starpu_worker_drives_memory_node(userdata->pu_worker, node); found = 1; } } } for (i = 0; i < root->arity; i++) { if (_starpu_find_pu_driving_numa_from(root->children[i], node)) found = 1; } return found; } /* Look upward to find a level containing the given NUMA node and workers to drive it */ static int _starpu_find_pu_driving_numa_up(hwloc_obj_t root, unsigned node) { if (_starpu_find_pu_driving_numa_from(root, node)) /* Ok, we already managed to find drivers */ return 1; if (!root->parent) /* And no parent!? nobody can drive this... */ return 0; /* Try from parent */ return _starpu_find_pu_driving_numa_up(root->parent, node); } #endif static void _starpu_init_workers_binding_and_memory(struct _starpu_machine_config *config, int no_mp_config) { /* We will store all the busid of the different (src, dst) * combinations in a matrix which we initialize here. */ _starpu_initialize_busid_matrix(); unsigned bindid; for (bindid = 0; bindid < config->nbindid; bindid++) { free(config->bindid_workers[bindid].workerids); config->bindid_workers[bindid].workerids = NULL; config->bindid_workers[bindid].nworkers = 0; } /* First determine the CPU binding */ unsigned worker; if (!no_mp_config) for (worker = 0; worker < config->topology.nworkers; worker++) { struct _starpu_worker *workerarg = &config->workers[worker]; unsigned devid STARPU_ATTRIBUTE_UNUSED = workerarg->devid; /* select the worker binding */ starpu_driver_info[workerarg->arch].init_worker_binding(config, no_mp_config, workerarg); _STARPU_DEBUG("worker %u type %d devid %u bound to cpu %d\n", worker, workerarg->arch, devid, workerarg->bindid); #ifdef __GLIBC__ if (workerarg->bindid != -1) { /* Save the initial cpuset */ CPU_ZERO(&workerarg->cpu_set); CPU_SET(workerarg->bindid, &workerarg->cpu_set); } #endif /* __GLIBC__ */ #ifdef STARPU_HAVE_HWLOC if (workerarg->bindid == -1) { workerarg->hwloc_cpu_set = hwloc_bitmap_alloc(); workerarg->hwloc_obj = NULL; } else { /* Put the worker descriptor in the userdata field of the * hwloc object describing the CPU */ hwloc_obj_t worker_obj = hwloc_get_obj_by_depth(config->topology.hwtopology, config->pu_depth, workerarg->bindid); struct _starpu_hwloc_userdata *data = worker_obj->userdata; if (data->worker_list == NULL) data->worker_list = _starpu_worker_list_new(); _starpu_worker_list_push_front(data->worker_list, workerarg); /* Clear the cpu set and set the cpu */ workerarg->hwloc_cpu_set = hwloc_bitmap_dup(worker_obj->cpuset); workerarg->hwloc_obj = worker_obj; } #endif if (workerarg->bindid != -1) { bindid = workerarg->bindid; unsigned old_nbindid = config->nbindid; if (bindid >= old_nbindid) { /* More room needed */ if (!old_nbindid) config->nbindid = STARPU_NMAXWORKERS; else config->nbindid = 2 * old_nbindid; if (bindid >= config->nbindid) { config->nbindid = bindid+1; } _STARPU_REALLOC(config->bindid_workers, config->nbindid * sizeof(config->bindid_workers[0])); memset(&config->bindid_workers[old_nbindid], 0, (config->nbindid - old_nbindid) * sizeof(config->bindid_workers[0])); } /* Add slot for this worker */ /* Don't care about amortizing the cost, there are usually very few workers sharing the same bindid */ config->bindid_workers[bindid].nworkers++; _STARPU_REALLOC(config->bindid_workers[bindid].workerids, config->bindid_workers[bindid].nworkers * sizeof(config->bindid_workers[bindid].workerids[0])); config->bindid_workers[bindid].workerids[config->bindid_workers[bindid].nworkers-1] = worker; } } /* Then initialize NUMA nodes accordingly */ _starpu_init_numa_node(config); _starpu_init_numa_bus(); #ifdef STARPU_SIMGRID _starpu_simgrid_count_ngpus(); #else #ifdef STARPU_HAVE_HWLOC _starpu_topology_count_ngpus(hwloc_get_root_obj(config->topology.hwtopology)); #endif #endif /* Eventually initialize accelerators memory nodes */ if (!no_mp_config) for (worker = 0; worker < config->topology.nworkers; worker++) { struct _starpu_worker *workerarg = &config->workers[worker]; unsigned devid STARPU_ATTRIBUTE_UNUSED = workerarg->devid; /* select the memory node that contains worker's memory */ starpu_driver_info[workerarg->arch].init_worker_memory(config, no_mp_config, workerarg); _STARPU_DEBUG("worker %u type %d devid %u STARPU memory node %u\n", worker, workerarg->arch, devid, workerarg->memory_node); } #if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID) /* If some NUMA nodes don't have drivers, attribute some */ unsigned node, nnodes = starpu_memory_nodes_get_count();; if (!no_mp_config) for (node = 0; node < nnodes; node++) { if (starpu_node_get_kind(node) != STARPU_CPU_RAM) /* Only RAM nodes can be processed by any CPU */ continue; for (worker = 0; worker < config->topology.nworkers; worker++) { if (_starpu_worker_drives_memory[worker][node]) break; } if (worker < config->topology.nworkers) /* Already somebody driving it */ continue; /* Nobody driving this node! Attribute some */ _STARPU_DEBUG("nobody drives memory node %d\n", node); hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_NUMANODE, starpu_memory_nodes_numa_id_to_hwloclogid(node)); int ret = _starpu_find_pu_driving_numa_up(numa_node_obj, node); STARPU_ASSERT_MSG(ret, "oops, didn't find any worker to drive memory node %d!?", node); } #endif } int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_config) { int ret; unsigned i; enum starpu_worker_archtype type; /* First determine which devices we will use */ ret = _starpu_init_machine_config(config, no_mp_config); if (ret) return ret; /* for the data management library */ _starpu_memory_nodes_init(); _starpu_datastats_init(); /* Now determine CPU binding and memory nodes */ _starpu_init_workers_binding_and_memory(config, no_mp_config); _starpu_mem_chunk_init_last(); for (type = 0; type < STARPU_NARCH; type++) config->arch_nodeid[type] = -1; for (i = 0; i < starpu_worker_get_count(); i++) { type = starpu_worker_get_type(i); if (config->arch_nodeid[type] == -1) config->arch_nodeid[type] = starpu_worker_get_memory_node(i); else if (config->arch_nodeid[type] != (int) starpu_worker_get_memory_node(i)) config->arch_nodeid[type] = -2; } _starpu_init_bus_performance(); return 0; } void _starpu_destroy_topology(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED) { #if defined(STARPU_USE_MPI_MASTER_SLAVE) _starpu_deinit_mpi_config(config); #endif #if defined(STARPU_USE_TCPIP_MASTER_SLAVE) _starpu_deinit_tcpip_config(config); #endif /* cleanup StarPU internal data structures */ _starpu_memory_nodes_deinit(); _starpu_destroy_machine_config(config, 0); _starpu_deinitialize_workers_bindid(config); } void starpu_topology_print(FILE *output) { struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; unsigned pu; unsigned worker; unsigned nworkers = starpu_worker_get_count(); unsigned ncombinedworkers = topology->ncombinedworkers; unsigned nthreads_per_core = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; #ifdef STARPU_HAVE_HWLOC hwloc_topology_t topo = topology->hwtopology; hwloc_obj_t pu_obj; hwloc_obj_t last_numa_obj = (void*) -1, numa_obj; hwloc_obj_t last_package_obj = (void*) -1, package_obj; #endif for (pu = 0; pu < topology->nhwpus; pu++) { #ifdef STARPU_HAVE_HWLOC pu_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, pu); numa_obj = _starpu_numa_get_obj(pu_obj); if (numa_obj != last_numa_obj) { if (numa_obj) fprintf(output, "numa %2u", numa_obj->logical_index); else fprintf(output, "No numa"); last_numa_obj = numa_obj; } fprintf(output, "\t"); package_obj = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_SOCKET, pu_obj); if (package_obj != last_package_obj) { if (package_obj) fprintf(output, "pack %2u", package_obj->logical_index); else fprintf(output, "no pack"); last_package_obj = package_obj; } fprintf(output, "\t"); #endif if ((pu % nthreads_per_core) == 0) fprintf(output, "core %-5u ", pu / nthreads_per_core); else fprintf(output, " "); fprintf(output, "PU %-5u ", pu); for (worker = 0; worker < nworkers + ncombinedworkers; worker++) { if (worker < nworkers) { struct _starpu_worker *workerarg = &config->workers[worker]; if (workerarg->bindid == (int) pu) { char name[256]; starpu_worker_get_name(worker, name, sizeof(name)); fprintf(output, "%-10s ", name); } } else { int worker_size, i; int *combined_workerid; starpu_combined_worker_get_description(worker, &worker_size, &combined_workerid); for (i = 0; i < worker_size; i++) { if (topology->workers_bindid[combined_workerid[i]] == pu) fprintf(output, "comb %-5u ", worker-nworkers); } } } fprintf(output, "\n"); } } int starpu_get_pu_os_index(unsigned logical_index) { #ifdef STARPU_HAVE_HWLOC struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; hwloc_topology_t topo = topology->hwtopology; hwloc_obj_t obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, logical_index); STARPU_ASSERT(obj); return obj->os_index; #else return logical_index; #endif } #ifdef STARPU_HAVE_HWLOC hwloc_topology_t starpu_get_hwloc_topology(void) { struct _starpu_machine_config *config = _starpu_get_machine_config(); return config->topology.hwtopology; } #endif unsigned _starpu_get_nhyperthreads() { struct _starpu_machine_config *config = _starpu_get_machine_config(); return config->topology.nhwpus / config->topology.nhwworker[STARPU_CPU_WORKER][0]; } long starpu_get_memory_location_bitmap(void* ptr, size_t size) { if (ptr == NULL || size == 0) { return -1; } #ifdef HAVE_HWLOC_GET_AREA_MEMLOCATION // implies STARPU_HAVE_HWLOC struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; hwloc_bitmap_t set = hwloc_bitmap_alloc(); int ret = hwloc_get_area_memlocation(topology->hwtopology, ptr, size, set, HWLOC_MEMBIND_BYNODESET); if (ret != 0) { hwloc_bitmap_free(set); return -1; } if (hwloc_bitmap_iszero(set) || hwloc_bitmap_isfull(set)) { // If the page isn't allocated yet, the bitmap is empty: hwloc_bitmap_free(set); return -1; } /* We could maybe use starpu_bitmap, but that seems a little bit * overkill and it would make recording it in traces harder. */ long ret_bitmap = 0; unsigned i = 0; hwloc_bitmap_foreach_begin(i, set) { hwloc_obj_t numa_node = hwloc_get_numanode_obj_by_os_index(topology->hwtopology, i); if (numa_node) { ret_bitmap |= (1 << numa_node->logical_index); } else { // We can't find a matching NUMA node, this can happen on machine without NUMA node hwloc_bitmap_free(set); return -1; } } hwloc_bitmap_foreach_end(); hwloc_bitmap_free(set); return ret_bitmap; #else /* we could use move_pages(), but please, rather use hwloc (version >= 1.11.3)! */ return -1; #endif } starpu-1.4.9+dfsg/src/core/topology.h000066400000000000000000000167601507764646700175660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __TOPOLOGY_H__ #define __TOPOLOGY_H__ /** @file */ #include #include #include #include #include #pragma GCC visibility push(hidden) struct _starpu_machine_config; #ifndef STARPU_SIMGRID #ifdef STARPU_HAVE_HWLOC /** This is allocated for each hwloc object */ struct _starpu_hwloc_userdata { /** List of workers running on this obj */ struct _starpu_worker_list *worker_list; /** Number of GPUs sharing this PCI link */ unsigned ngpus; /** Worker running this PU */ struct _starpu_worker *pu_worker; }; #endif #endif struct _starpu_worker_set; struct _starpu_machine_topology; /** Detect the number of memory nodes and where to bind the different workers. */ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_config); /** * Initialize a series of workers. * * - If \p explicit_workers_gpuid is non-null, it will be used as the list of device * IDs of the actual hardware devices to be used. * - If \p current is non-null, it points to the next device ID to be used * - \p workers_gpuid is filled with the set of device IDs actually used in the end * - \p varname is the name of the environment variable that users can use to * override the set of device IDs to be used. * - \p nhwgpus is the number of actual devices available on the system. * - \p type is the type of devices. */ void _starpu_initialize_workers_deviceid(int *explicit_workers_gpuid, int *current, int *workers_gpuid, const char *varname, unsigned nhwgpus, enum starpu_worker_archtype type); /** Get the next devid for architecture \p type */ int _starpu_get_next_devid(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, enum starpu_worker_archtype arch); /** Check that \p *ndevices is not larger than \p nhwdevices (unless \p overflow is 1), and is not larger than \p max. * Cap it otherwise, and advise using the \p configurename ./configure option in the \p max case. */ void _starpu_topology_check_ndevices(int *ndevices, unsigned nhwdevices, int overflow, unsigned max, int reserved, const char *nname, const char *dname, const char *configurename); /** Configures the topology according to the desired worker distribution on the device. * - homogeneous tells to use devid 0 for the perfmodel (all devices have the same performance) * - worker_devid tells to set a devid per worker, and subworkerid to 0, rather * than sharing the devid and giving a different subworkerid to each worker. */ /** Request to allocate a worker set for each worker */ #define ALLOC_WORKER_SET ((struct _starpu_worker_set*) -1) /** Request to set a different perfmodel devid per worker */ #define DEVID_PER_WORKER -2 void _starpu_topology_configure_workers(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, enum starpu_worker_archtype type, int devnum, int devid, int homogeneous, int worker_devid, unsigned nworker_per_device, unsigned ncores, struct _starpu_worker_set *worker_set, struct _starpu_worker_set *driver_worker_set); extern unsigned _starpu_may_bind_automatically[STARPU_NARCH]; /** This function gets the identifier of the next core on which to bind a * worker. In case a list of preferred cores was specified (logical indexes), * we look for a an available core among the list if possible, otherwise a * round-robin policy is used. */ unsigned _starpu_get_next_bindid(struct _starpu_machine_config *config, unsigned flags, unsigned *preferred_binding, unsigned npreferred); /** Should be called instead of _starpu_destroy_topology when _starpu_build_topology returns a non zero value. */ void _starpu_destroy_machine_config(struct _starpu_machine_config *config, int no_mp_config); /** Destroy all resources used to store the topology of the machine. */ void _starpu_destroy_topology(struct _starpu_machine_config *config); #ifdef STARPU_HAVE_HWLOC /** Return the hwloc object of the NUMA node corresponding to the given hwloc object */ hwloc_obj_t _starpu_numa_get_obj(hwloc_obj_t obj); #endif /** returns the number of physical cpus */ unsigned _starpu_topology_get_nhwcpu(struct _starpu_machine_config *config); /** returns the number of logical cpus */ unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config); /** returns the number of logical cpus */ unsigned _starpu_topology_get_nhwpu(struct _starpu_machine_config *config); /** returns the number of hardware NUMA nodes */ unsigned _starpu_topology_get_nhwnumanodes(struct _starpu_machine_config *config); /** returns the number of NUMA nodes to be exposed by StarPU as memory nodes, can be just 1 when STARPU_USE_NUMA is 0 */ unsigned _starpu_topology_get_nnumanodes(struct _starpu_machine_config *config); /** given a list of numa nodes (logical indexes) \p numa_binding, fill \p binding with the corresponding cores (logical indexes) */ unsigned _starpu_topology_get_numa_core_binding(struct _starpu_machine_config *config, const unsigned *numa_binding, unsigned nnuma, unsigned *binding, unsigned nbinding); int starpu_memory_nodes_numa_hwloclogid_to_id(int logid); /* This returns the exact NUMA node next to a worker */ int _starpu_get_logical_numa_node_worker(unsigned workerid); /** returns the number of hyperthreads per core */ unsigned _starpu_get_nhyperthreads() STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; #ifdef STARPU_HAVE_HWLOC /** Small convenient function to filter hwloc topology depending on HWLOC API version */ void _starpu_topology_filter(hwloc_topology_t topology); #endif #define STARPU_NOWORKERID -1 #define STARPU_ACTIVETHREAD -2 #define STARPU_NONACTIVETHREAD -2 /** Bind the current thread on the CPU logically identified by "cpuid". The * logical ordering of the processors is either that of hwloc (if available), * or the ordering exposed by the OS. */ int _starpu_bind_thread_on_cpu(int cpuid, int workerid, const char *name); struct _starpu_combined_worker; /** Bind the current thread on the set of CPUs for the given combined worker. */ void _starpu_bind_thread_on_cpus(struct _starpu_combined_worker *combined_worker); struct _starpu_worker *_starpu_get_worker_from_driver(struct starpu_driver *d); unsigned starpu_memory_nodes_get_numa_count(void) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int starpu_memory_nodes_numa_id_to_hwloclogid(unsigned id); /** Get the memory node for data number i when task is to be executed on memory node \p target_node. Returns -1 if the data does not need to be loaded. */ int _starpu_task_data_get_node_on_node(struct starpu_task *task, unsigned index, unsigned target_node); /** Get the memory node for data number i when task is to be executed on worker \p worker. Returns -1 if the data does not need to be loaded. */ int _starpu_task_data_get_node_on_worker(struct starpu_task *task, unsigned index, unsigned worker); #pragma GCC visibility pop #endif // __TOPOLOGY_H__ starpu-1.4.9+dfsg/src/core/tree.c000066400000000000000000000074221507764646700166370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "starpu_tree.h" #include "workers.h" void starpu_tree_reset_visited(struct starpu_tree *tree, char *visited) { if(tree->arity == 0) { int *workerids; int nworkers = starpu_bindid_get_workerids(tree->id, &workerids); int w; for(w = 0; w < nworkers; w++) { visited[workerids[w]] = 0; } } int i; for(i = 0; i < tree->arity; i++) starpu_tree_reset_visited(&tree->nodes[i], visited); } void starpu_tree_prepare_children(unsigned arity, struct starpu_tree *father) { _STARPU_MALLOC(father->nodes, arity*sizeof(struct starpu_tree)); father->arity = arity; } void starpu_tree_insert(struct starpu_tree *tree, int id, int level, int is_pu, int arity, struct starpu_tree *father) { tree->level = level; tree->arity = arity; tree->nodes = NULL; tree->id = is_pu ? id : level; tree->is_pu = is_pu; tree->father = father; } struct starpu_tree* starpu_tree_get(struct starpu_tree *tree, int id) { if(tree->arity == 0) { if(tree->is_pu && tree->id == id) return tree; else return NULL; } int i; for(i = 0; i < tree->arity; i++) { struct starpu_tree *found_tree = starpu_tree_get(&tree->nodes[i], id); if(found_tree) return found_tree; } return NULL; } static struct starpu_tree* _get_down_to_leaves(struct starpu_tree *node, char *visited, char *present) { struct starpu_tree *found_tree = NULL; int i; for(i = 0; i < node->arity; i++) { if(node->nodes[i].arity == 0) { if(node->nodes[i].is_pu) { int *workerids; int nworkers = starpu_bindid_get_workerids(node->nodes[i].id, &workerids); int w; for(w = 0; w < nworkers; w++) { if(!visited[workerids[w]] && present[workerids[w]]) return &node->nodes[i]; } } } else { found_tree =_get_down_to_leaves(&node->nodes[i], visited, present); if(found_tree) return found_tree; } } return NULL; } struct starpu_tree* starpu_tree_get_neighbour(struct starpu_tree *tree, struct starpu_tree *node, char *visited, char *present) { struct starpu_tree *father = node == NULL ? tree : node->father; int st, n; if (father == NULL) return NULL; if (father == tree && father->arity == 0) return tree; for(st = 0; st < father->arity; st++) { if(&father->nodes[st] == node) break; } for(n = 0; n < father->arity; n++) { int i = (st+n)%father->arity; if(&father->nodes[i] != node) { if(father->nodes[i].arity == 0) { if(father->nodes[i].is_pu) { int *workerids; int nworkers = starpu_bindid_get_workerids(father->nodes[i].id, &workerids); int w; for(w = 0; w < nworkers; w++) { if(!visited[workerids[w]] && present[workerids[w]]) return &father->nodes[i]; } } } else { struct starpu_tree *leaf = _get_down_to_leaves(&father->nodes[i], visited, present); if(leaf) return leaf; } } } if(tree == father) return NULL; return starpu_tree_get_neighbour(tree, father, visited, present); } void starpu_tree_free(struct starpu_tree *tree) { int i; for(i = 0; i < tree->arity; i++) starpu_tree_free(&tree->nodes[i]); free(tree->nodes); tree->nodes = NULL; tree->arity = 0; } starpu-1.4.9+dfsg/src/core/workers.c000066400000000000000000002663511507764646700174040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef __linux__ #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_SIMGRID #include #endif #if defined(_WIN32) && !defined(__CYGWIN__) #include #endif #include #if defined(_WIN32) #ifdef __GNUC__ #define ffs(arg) __builtin_ffs(arg) #else #define ffs(arg) _bit_scan_forward(arg) #endif #endif static int asynchronous_copy_disabled[STARPU_MAX_RAM+1]; /* global knobs */ static int __g_calibrate_knob; static int __g_enable_catch_signal_knob; /* per-worker knobs */ static int __w_bind_to_pu_knob; static int __w_enable_worker_knob; static struct starpu_perf_knob_group * __kg_starpu_global; static struct starpu_perf_knob_group * __kg_starpu_worker__per_worker; static void global_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value) { /* context is not used for global knobs */ STARPU_ASSERT(context == NULL); (void)context; if (knob->id == __g_calibrate_knob) { _starpu_set_calibrate_flag((unsigned)value->val_int32_t); } else if (knob->id == __g_enable_catch_signal_knob) { _starpu_set_catch_signals(!!value->val_int32_t); } else { STARPU_ASSERT(0); abort(); } } static void global_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value) { /* context is not used for global knobs */ STARPU_ASSERT(context == NULL); (void)context; if (knob->id == __g_calibrate_knob) { value->val_int32_t = (int32_t)_starpu_get_calibrate_flag(); } else if (knob->id == __g_enable_catch_signal_knob) { value->val_int32_t = _starpu_get_catch_signals(); } else { STARPU_ASSERT(0); abort(); } } static void worker_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value) { const unsigned workerid = *(unsigned *)context; struct _starpu_worker * const worker = _starpu_get_worker_struct(workerid); if (knob->id == __w_bind_to_pu_knob) { STARPU_ASSERT(value->val_int32_t >= 0); worker->bindid_requested = value->val_int32_t; } else if (knob->id == __w_enable_worker_knob) { worker->enable_knob = !!value->val_int32_t; } else { STARPU_ASSERT(0); abort(); } } static void worker_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value) { const unsigned workerid = *(unsigned *)context; struct _starpu_worker * const worker = _starpu_get_worker_struct(workerid); if (knob->id == __w_bind_to_pu_knob) { value->val_int32_t = worker->bindid; } else if (knob->id == __w_enable_worker_knob) { value->val_int32_t = worker->enable_knob; } else { STARPU_ASSERT(0); abort(); } } void _starpu__workers_c__register_knobs(void) { { const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_global; __kg_starpu_global = _starpu_perf_knob_group_register(scope, global_knobs__set, global_knobs__get); __STARPU_PERF_KNOB_REG("starpu.global", __kg_starpu_global, g_calibrate_knob, int32, "enable or disable performance models calibration (override STARPU_CALIBRATE env var)"); __STARPU_PERF_KNOB_REG("starpu.global", __kg_starpu_global, g_enable_catch_signal_knob, int32, "enable or disable signal catching (override STARPU_CATCH_SIGNALS env var)"); } { const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_worker; __kg_starpu_worker__per_worker = _starpu_perf_knob_group_register(scope, worker_knobs__set, worker_knobs__get); __STARPU_PERF_KNOB_REG("starpu.worker", __kg_starpu_worker__per_worker, w_bind_to_pu_knob, int32, "bind worker to PU (PU logical number, override StarPU binding env vars)"); __STARPU_PERF_KNOB_REG("starpu.worker", __kg_starpu_worker__per_worker, w_enable_worker_knob, int32, "enable assigning task to that worker (1:Enabled | [0:Disabled])"); } #if 0 { const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_scheduler; __kg_starpu_worker__per_scheduler = _starpu_perf_knob_group_register(scope, sched_knobs__set, sched_knobs__get); } #endif } void _starpu__workers_c__unregister_knobs(void) { _starpu_perf_knob_group_unregister(__kg_starpu_global); _starpu_perf_knob_group_unregister(__kg_starpu_worker__per_worker); __kg_starpu_global = NULL; __kg_starpu_worker__per_worker = NULL; } /* acquire/release semantic for concurrent initialization/de-initialization */ static starpu_pthread_mutex_t init_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t init_cond = STARPU_PTHREAD_COND_INITIALIZER; static int init_count = 0; static enum initialization initialized = UNINITIALIZED; int _starpu_keys_initialized; starpu_pthread_key_t _starpu_worker_key; starpu_pthread_key_t _starpu_worker_set_key; struct _starpu_machine_config _starpu_config; static int check_entire_platform; int _starpu_worker_parallel_blocks; /* Pointers to argc and argv */ static int *my_argc = 0; static char ***my_argv = NULL; void _starpu__workers_c__register_kobs(void) { /* TODO */ } struct _starpu_driver_info starpu_driver_info[STARPU_NARCH]; void _starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct _starpu_driver_info *info) { STARPU_ASSERT(archtype < STARPU_NARCH); starpu_driver_info[archtype] = *info; } struct _starpu_memory_driver_info starpu_memory_driver_info[STARPU_MAX_RAM+1]; void _starpu_memory_driver_info_register(enum starpu_node_kind kind, const struct _starpu_memory_driver_info *info) { starpu_memory_driver_info[kind] = *info; } /* Initialize value of static argc and argv, called when the process begins */ void _starpu_set_argc_argv(int *argc_param, char ***argv_param) { my_argc = argc_param; my_argv = argv_param; } int *_starpu_get_argc() { return my_argc; } char ***_starpu_get_argv() { return my_argv; } int starpu_is_initialized(void) { return initialized != UNINITIALIZED; } void starpu_wait_initialized(void) { STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); while (initialized != INITIALIZED) STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); } /* Makes sure that at least one of the workers of type can execute * , for at least one of its implementations. */ static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task, enum starpu_worker_archtype arch) { _starpu_codelet_check_deprecated_fields(task->cl); /* make sure there is a worker on the machine able to execute the task, independent of the sched_ctx, this latter may receive latter on the necessary worker - the user or the hypervisor should take care this happens */ if (check_entire_platform && !task->cl->can_execute) { if (!_starpu_get_machine_config()->topology.ndevices[arch]) return 0; unsigned impl; for (impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++) { switch (arch) { case STARPU_CPU_WORKER: if (task->cl->cpu_funcs[impl] != NULL) return 1; break; case STARPU_CUDA_WORKER: if (task->cl->cuda_funcs[impl] != NULL) return 1; break; case STARPU_HIP_WORKER: if (task->cl->hip_funcs[impl] != NULL) return 1; break; case STARPU_OPENCL_WORKER: if (task->cl->opencl_funcs[impl] != NULL) return 1; break; case STARPU_MAX_FPGA_WORKER: if (task->cl->max_fpga_funcs[impl] != NULL) return 1; break; case STARPU_MPI_MS_WORKER: case STARPU_TCPIP_MS_WORKER: if (task->cl->cpu_funcs_name[impl] != NULL) return 1; break; default: STARPU_ABORT(); } } return 0; } struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int i = workers->get_next(workers, &it); if (starpu_worker_get_type(i) != arch) continue; unsigned impl; for (impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++) { /* We could call task->cl->can_execute(i, task, impl) here, it would definitely work. It is probably cheaper to check whether it is necessary in order to avoid a useless function call, though. */ unsigned test_implementation = 0; switch (arch) { case STARPU_CPU_WORKER: if (task->cl->cpu_funcs[impl] != NULL) test_implementation = 1; break; case STARPU_CUDA_WORKER: if (task->cl->cuda_funcs[impl] != NULL) test_implementation = 1; break; case STARPU_HIP_WORKER: if (task->cl->hip_funcs[impl] != NULL) test_implementation = 1; break; case STARPU_OPENCL_WORKER: if (task->cl->opencl_funcs[impl] != NULL) test_implementation = 1; break; case STARPU_MAX_FPGA_WORKER: if (task->cl->max_fpga_funcs[impl] != NULL) test_implementation = 1; break; case STARPU_MPI_MS_WORKER: case STARPU_TCPIP_MS_WORKER: if (task->cl->cpu_funcs_name[impl] != NULL) test_implementation = 1; break; default: STARPU_ABORT(); } if (!test_implementation) /* No implementation here, cannot execute */ continue; if (task->cl->can_execute && !task->cl->can_execute(i, task, impl)) /* The implementation cannot be executed here */ continue; return 1; } } return 0; } /* * in case a task is submitted, we may check whether there exists a worker * that may execute the task or not */ uint32_t _starpu_worker_exists(struct starpu_task *task) { _starpu_codelet_check_deprecated_fields(task->cl); if (task->where == STARPU_NOWHERE) return 1; /* if the task belongs to the init context we can check out all the worker mask of the machine if not we should iterate on the workers of the ctx and verify if it exists a worker able to exec the task */ if(task->sched_ctx == 0) { if (!(task->where & _starpu_config.worker_mask)) return 0; if (!task->cl->can_execute) return 1; } #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) if ((task->where & STARPU_CPU) && _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER)) return 1; #endif #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) if ((task->where & STARPU_CUDA) && _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER)) return 1; #endif #if defined(STARPU_USE_HIP) if ((task->where & STARPU_HIP) && _starpu_worker_exists_and_can_execute(task, STARPU_HIP_WORKER)) return 1; #endif #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) if ((task->where & STARPU_OPENCL) && _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER)) return 1; #endif #if defined(STARPU_USE_MAX_FPGA) if ((task->where & STARPU_MAX_FPGA) && _starpu_worker_exists_and_can_execute(task, STARPU_MAX_FPGA_WORKER)) return 1; #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE if ((task->where & STARPU_MPI_MS) && _starpu_worker_exists_and_can_execute(task, STARPU_MPI_MS_WORKER)) return 1; #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE if ((task->where & STARPU_TCPIP_MS) && _starpu_worker_exists_and_can_execute(task, STARPU_TCPIP_MS_WORKER)) return 1; #endif return 0; } uint32_t _starpu_can_submit_ms_task(void) { return (STARPU_MPI_MS & _starpu_config.worker_mask) || (STARPU_TCPIP_MS & _starpu_config.worker_mask) ; } uint32_t _starpu_can_submit_cuda_task(void) { return STARPU_CUDA & _starpu_config.worker_mask; } uint32_t _starpu_can_submit_hip_task(void) { return STARPU_HIP & _starpu_config.worker_mask; } uint32_t _starpu_can_submit_cpu_task(void) { return STARPU_CPU & _starpu_config.worker_mask; } uint32_t _starpu_can_submit_opencl_task(void) { return STARPU_OPENCL & _starpu_config.worker_mask; } static inline int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch, struct starpu_codelet *cl, unsigned nimpl) { switch(arch) { case STARPU_ANY_WORKER: { int cpu_func_enabled=1, cuda_func_enabled=1, hip_func_enabled=1, opencl_func_enabled=1; #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) starpu_cpu_func_t cpu_func = _starpu_task_get_cpu_nth_implementation(cl, nimpl); cpu_func_enabled = cpu_func != NULL && starpu_cpu_worker_get_count(); #endif #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) starpu_cuda_func_t cuda_func = _starpu_task_get_cuda_nth_implementation(cl, nimpl); cuda_func_enabled = cuda_func != NULL && starpu_cuda_worker_get_count(); #endif #if defined(STARPU_USE_HIP) starpu_hip_func_t hip_func = _starpu_task_get_hip_nth_implementation(cl, nimpl); hip_func_enabled = hip_func != NULL && starpu_hip_worker_get_count(); #endif #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) starpu_opencl_func_t opencl_func = _starpu_task_get_opencl_nth_implementation(cl, nimpl); opencl_func_enabled = opencl_func != NULL && starpu_opencl_worker_get_count(); #endif return cpu_func_enabled && cuda_func_enabled && opencl_func_enabled && hip_func_enabled; } case STARPU_CPU_WORKER: { starpu_cpu_func_t func = _starpu_task_get_cpu_nth_implementation(cl, nimpl); return func != NULL; } case STARPU_CUDA_WORKER: { starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl); return func != NULL; } case STARPU_HIP_WORKER: { starpu_hip_func_t func = _starpu_task_get_hip_nth_implementation(cl, nimpl); return func != NULL; } case STARPU_OPENCL_WORKER: { starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl); return func != NULL; } case STARPU_MAX_FPGA_WORKER: { starpu_max_fpga_func_t func = _starpu_task_get_fpga_nth_implementation(cl, nimpl); return func != NULL; } case STARPU_MPI_MS_WORKER: case STARPU_TCPIP_MS_WORKER: { const char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl); return func_name != NULL; } default: STARPU_ASSERT_MSG(0, "Unknown arch type %d", arch); } return 0; } int _starpu_enforce_locality(unsigned workerid, struct starpu_task *task) { if (!_starpu_config.conf.data_locality_enforce) return 1; unsigned i, requested_node = starpu_worker_get_memory_node(workerid); int owner = -1, shared=-1; for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); unsigned nnodes = starpu_memory_nodes_get_count(); unsigned node; for (node = 0; node < nnodes; node++) { enum _starpu_cache_state state = handle->per_node[node].state; if (state == STARPU_OWNER) owner = node; if (state == STARPU_SHARED) shared = node; } if (owner != -1 && owner != (int)requested_node) return 0; if (shared != -1 && handle->per_node[requested_node].state != STARPU_SHARED) return 0; } return 1; } /* Test if this task can be processed on this worker, regardless of the implementation */ /* must be called with sched_mutex locked to protect state_blocked */ static inline int _starpu_can_execute_task_any_impl(unsigned workerid, struct starpu_task *task) { if (!_starpu_enforce_locality(workerid, task)) return 0; if (!_starpu_config.workers[workerid].enable_knob) return 0; if (task->workerids_len) { size_t div = sizeof(*task->workerids) * 8; if (workerid / div >= task->workerids_len || ! (task->workerids[workerid / div] & (1UL << workerid % div))) return 0; } /* if the worker is blocked in a parallel ctx don't submit tasks on it */ #ifdef STARPU_DEVEL #warning FIXME: this is very expensive, while can_execute is supposed to be not very costly so schedulers can call it a lot #endif if(starpu_worker_is_blocked_in_parallel(workerid)) return 0; if (!(task->where & _starpu_config.workers[workerid].worker_mask)) return 0; return 1; } /* must be called with sched_mutex locked to protect state_blocked_in_parallel */ int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl) { /* TODO: check that the task operand sizes will fit on that device */ return _starpu_can_execute_task_any_impl(workerid, task) && _starpu_can_use_nth_implementation(_starpu_config.workers[workerid].arch, task->cl, nimpl) && (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl)); } /* must be called with sched_mutex locked to protect state_blocked_in_parallel */ int starpu_worker_can_execute_task_impl(unsigned workerid, struct starpu_task *task, unsigned *impl_mask) { if (!_starpu_can_execute_task_any_impl(workerid, task)) return 0; unsigned mask; int i; enum starpu_worker_archtype arch; struct starpu_codelet *cl; /* TODO: check that the task operand sizes will fit on that device */ cl = task->cl; mask = 0; arch = _starpu_config.workers[workerid].arch; if (!task->cl->can_execute) { for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (_starpu_can_use_nth_implementation(arch, cl, i)) { mask |= 1U << i; if (!impl_mask) break; } } else { for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (_starpu_can_use_nth_implementation(arch, cl, i) && (!task->cl->can_execute || task->cl->can_execute(workerid, task, i))) { mask |= 1U << i; if (!impl_mask) break; } } if (impl_mask) *impl_mask = mask; return mask != 0; } /* must be called with sched_mutex locked to protect state_blocked */ int starpu_worker_can_execute_task_first_impl(unsigned workerid, struct starpu_task *task, unsigned *nimpl) { if (!_starpu_can_execute_task_any_impl(workerid, task)) return 0; int i; enum starpu_worker_archtype arch; struct starpu_codelet *cl; /* TODO: check that the task operand sizes will fit on that device */ cl = task->cl; arch = _starpu_config.workers[workerid].arch; if (!task->cl->can_execute) { for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (_starpu_can_use_nth_implementation(arch, cl, i)) { if (nimpl) *nimpl = i; return 1; } } else { for (i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if (_starpu_can_use_nth_implementation(arch, cl, i) && (task->cl->can_execute(workerid, task, i))) { if (nimpl) *nimpl = i; return 1; } } return 0; } int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl) { /* TODO: check that the task operand sizes will fit on that device */ struct starpu_codelet *cl = task->cl; unsigned nworkers = _starpu_config.topology.nworkers; /* Is this a parallel worker ? */ if (workerid < nworkers) { if (!_starpu_can_execute_task_any_impl(workerid, task)) return 0; return !!((task->where & _starpu_config.workers[workerid].worker_mask) && _starpu_can_use_nth_implementation(_starpu_config.workers[workerid].arch, task->cl, nimpl) && (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl))); } else { if (!_starpu_enforce_locality(workerid, task)) return 0; if (task->workerids_len) { size_t div = sizeof(*task->workerids) * 8; if (workerid / div >= task->workerids_len || ! (task->workerids[workerid / div] & (1UL << workerid % div))) return 0; } if (cl->type == STARPU_SPMD #ifdef STARPU_HAVE_HWLOC || cl->type == STARPU_FORKJOIN #else #ifdef __GLIBC__ || cl->type == STARPU_FORKJOIN #endif #endif ) { /* TODO we should add other types of constraints */ /* Is the worker larger than requested ? */ int worker_size = (int)_starpu_config.combined_workers[workerid - nworkers].worker_size; int worker0 = _starpu_config.combined_workers[workerid - nworkers].combined_workerid[0]; return !!((worker_size <= task->cl->max_parallelism) && _starpu_can_use_nth_implementation(_starpu_config.workers[worker0].arch, task->cl, nimpl) && (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl))); } else { /* We have a sequential task but a parallel worker */ return 0; } } } /* * Runtime initialization methods */ static void _starpu_init_worker_queue(struct _starpu_worker *worker) { _starpu_memory_node_register_condition(worker, &worker->sched_cond, worker->memory_node); } /* * Returns 0 if the given driver is one of the drivers that must be launched by * the application itself, and not by StarPU, 1 otherwise. */ static unsigned _starpu_may_launch_driver(struct starpu_conf *conf, struct starpu_driver *d) { if (conf->n_not_launched_drivers == 0 || conf->not_launched_drivers == NULL) return 1; /* Is in conf->not_launched_drivers ? */ unsigned i; for (i = 0; i < conf->n_not_launched_drivers; i++) { if (d->type != conf->not_launched_drivers[i].type) continue; /* Driver porters: adding your driver here is optional, only * needed for supporting running the driver in a thread provided by * the application. */ switch (d->type) { case STARPU_CPU_WORKER: if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id) return 0; break; case STARPU_CUDA_WORKER: if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id) return 0; break; case STARPU_HIP_WORKER: if (d->id.hip_id == conf->not_launched_drivers[i].id.hip_id) return 0; break; #ifdef STARPU_USE_OPENCL case STARPU_OPENCL_WORKER: if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id) return 0; break; #endif default: STARPU_ABORT(); } } return 1; } #ifdef STARPU_PERF_DEBUG struct itimerval prof_itimer; #endif void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machine_config *pconfig) { workerarg->config = pconfig; STARPU_PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL); /* arch initialized by topology.c */ /* worker_mask initialized by topology.c */ /* perf_arch initialized by topology.c */ /* worker_thread initialized by _starpu_launch_drivers */ /* devid initialized by topology.c */ /* subworkerid initialized by topology.c */ /* bindid initialized by topology.c */ /* workerid initialized by topology.c */ workerarg->combined_workerid = workerarg->workerid; workerarg->current_rank = 0; workerarg->worker_size = 1; STARPU_PTHREAD_COND_INIT(&workerarg->started_cond, NULL); STARPU_PTHREAD_COND_INIT(&workerarg->ready_cond, NULL); /* memory_node initialized by topology.c */ STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL); STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL); starpu_task_prio_list_init(&workerarg->local_tasks); _starpu_ctx_change_list_init(&workerarg->ctx_change_list); workerarg->local_ordered_tasks = NULL; workerarg->local_ordered_tasks_size = 0; workerarg->current_ordered_task = 0; workerarg->current_ordered_task_order = 1; workerarg->current_task = NULL; #ifdef STARPU_SIMGRID starpu_pthread_wait_init(&workerarg->wait); starpu_pthread_queue_register(&workerarg->wait, &_starpu_simgrid_task_queue[workerarg->workerid]); #endif workerarg->task_transferring = NULL; workerarg->nb_buffers_transferred = 0; STARPU_HG_DISABLE_CHECKING(workerarg->nb_buffers_transferred); workerarg->nb_buffers_totransfer = 0; workerarg->first_task = 0; workerarg->ntasks = 0; /* set initialized by topology.c */ workerarg->pipeline_length = 0; workerarg->pipeline_stuck = 0; workerarg->worker_is_running = 0; workerarg->worker_is_initialized = 0; workerarg->wait_for_worker_initialization = 0; workerarg->status = STATUS_INITIALIZING; workerarg->state_keep_awake = 0; /* name initialized by driver */ /* short_name initialized by driver */ workerarg->run_by_starpu = 1; workerarg->driver_ops = NULL; workerarg->sched_ctx_list = NULL; workerarg->tmp_sched_ctx = -1; workerarg->nsched_ctxs = 0; _starpu_barrier_counter_init(&workerarg->tasks_barrier, 0); workerarg->has_prev_init = 0; int ctx; for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++) workerarg->removed_from_ctx[ctx] = 0; workerarg->spinning_backoff = 1; for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++) { workerarg->shares_tasks_lists[ctx] = 0; workerarg->poped_in_ctx[ctx] = 0; } workerarg->reverse_phase[0] = 0; workerarg->reverse_phase[1] = 0; workerarg->pop_ctx_priority = 1; workerarg->is_slave_somewhere = 0; workerarg->state_relax_refcnt = 1; #ifdef STARPU_SPINLOCK_CHECK workerarg->relax_on_file = __FILE__; workerarg->relax_on_line = __LINE__; workerarg->relax_on_func = __starpu_func__; workerarg->relax_off_file = NULL; workerarg->relax_off_line = 0; workerarg->relax_off_func = NULL; #endif workerarg->state_sched_op_pending = 0; workerarg->state_changing_ctx_waiting = 0; workerarg->state_changing_ctx_notice = 0; workerarg->state_blocked_in_parallel_observed = 0; workerarg->state_blocked_in_parallel = 0; workerarg->state_block_in_parallel_req = 0; workerarg->state_block_in_parallel_ack = 0; workerarg->state_unblock_in_parallel_req = 0; workerarg->state_unblock_in_parallel_ack = 0; workerarg->block_in_parallel_ref_count = 0; _starpu_perf_counter_sample_init(&workerarg->perf_counter_sample, starpu_perf_counter_scope_per_worker); workerarg->enable_knob = 1; workerarg->bindid_requested = -1; /* cpu_set/hwloc_cpu_set/hwloc_obj initialized in topology.c */ } static void _starpu_worker_deinit(struct _starpu_worker *workerarg) { (void) workerarg; #ifdef STARPU_SIMGRID starpu_pthread_queue_unregister(&workerarg->wait, &_starpu_simgrid_task_queue[workerarg->workerid]); starpu_pthread_wait_destroy(&workerarg->wait); #endif _starpu_perf_counter_sample_exit(&workerarg->perf_counter_sample); } #ifdef STARPU_USE_FXT void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync) { unsigned devid = worker->devid; unsigned memnode = worker->memory_node; _STARPU_TRACE_WORKER_INIT_START(archtype, worker->workerid, devid, memnode, worker->bindid, sync); } #endif void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync STARPU_ATTRIBUTE_UNUSED) { (void) archtype; int devid = worker->devid; (void) devid; #ifdef STARPU_USE_FXT _STARPU_TRACE_REGISTER_THREAD(worker->bindid); _starpu_worker_start(worker, archtype, sync); #endif _starpu_set_local_worker_key(worker); STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); worker->worker_is_running = 1; STARPU_PTHREAD_COND_SIGNAL(&worker->started_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); _starpu_bind_thread_on_cpu(worker->bindid, worker->workerid, NULL); #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID) setitimer(ITIMER_PROF, &prof_itimer, NULL); #endif _STARPU_DEBUG("worker %p %d for dev %d is ready on logical cpu %d\n", worker, worker->workerid, devid, worker->bindid); #ifdef STARPU_HAVE_HWLOC _STARPU_DEBUG("worker %p %d cpuset start at %d\n", worker, worker->workerid, hwloc_bitmap_first(worker->hwloc_cpu_set)); #endif } static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig) { pconfig->running = 1; pconfig->pause_depth = 0; pconfig->submitting = 1; STARPU_HG_DISABLE_CHECKING(pconfig->watchdog_ok); unsigned nworkers = pconfig->topology.nworkers; unsigned worker; #if defined(STARPU_PERF_DEBUG) && !defined(STARPU_SIMGRID) /* Get itimer of the main thread, to set it for the worker threads */ getitimer(ITIMER_PROF, &prof_itimer); #endif STARPU_AYU_INIT(); /* Launch workers asynchronously */ for (worker = 0; worker < nworkers; worker++) { struct _starpu_worker *workerarg = &pconfig->workers[worker]; workerarg->wait_for_worker_initialization = 0; _STARPU_DEBUG("initialising worker %u/%u\n", worker, nworkers); _starpu_init_worker_queue(workerarg); struct _starpu_worker_set *worker_set = workerarg->set; struct _starpu_worker_set *driver_worker_set = workerarg->driver_worker_set; /* For worker sets, we only start a thread for the first worker. */ if (!worker_set || worker_set->workers == workerarg) { struct starpu_driver driver; if (worker_set) { worker_set->set_is_initialized = 0; worker_set->wait_for_set_initialization = !driver_worker_set || driver_worker_set == worker_set; } workerarg->driver_ops = starpu_driver_info[workerarg->arch].driver_ops; workerarg->wait_for_worker_initialization = starpu_driver_info[workerarg->arch].wait_for_worker_initialization; if (workerarg->driver_ops) { driver.type = workerarg->arch; if (workerarg->driver_ops->set_devid) workerarg->driver_ops->set_devid(&driver, workerarg); } if ((driver_worker_set && driver_worker_set != worker_set) || (workerarg->driver_ops && !_starpu_may_launch_driver(&pconfig->conf, &driver))) workerarg->run_by_starpu = 0; } else workerarg->run_by_starpu = 0; } for (worker = 0; worker < nworkers; worker++) { struct _starpu_worker *workerarg = &pconfig->workers[worker]; struct _starpu_worker_set *worker_set = workerarg->set; /* For worker sets, we only start a thread for the first worker. */ if (workerarg->run_by_starpu) { starpu_pthread_t *worker_thread; if (worker_set) worker_thread = &worker_set->worker_thread; else worker_thread = &workerarg->worker_thread; /* For driver worker sets, we only start a thread for the first worker set. */ STARPU_PTHREAD_CREATE_ON( starpu_driver_info[workerarg->arch].name_upper, worker_thread, NULL, starpu_driver_info[workerarg->arch].run_worker, workerarg, _starpu_simgrid_get_host_by_worker(workerarg)); } #ifdef STARPU_USE_FXT /* In tracing mode, make sure the thread is really started * before starting another one, to make sure they appear in * order in the trace. */ if (fut_active && workerarg->run_by_starpu) { STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex); while (!workerarg->worker_is_running) STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex); } #endif } for (worker = 0; worker < nworkers; worker++) { struct _starpu_worker *workerarg = &pconfig->workers[worker]; _STARPU_DEBUG("waiting for worker %u initialization\n", worker); if (!workerarg->run_by_starpu) continue; struct _starpu_worker_set *worker_set = workerarg->set; if (worker_set && worker_set->wait_for_set_initialization == 1) { STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex); while (!worker_set->set_is_initialized) STARPU_PTHREAD_COND_WAIT(&worker_set->ready_cond, &worker_set->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex); worker_set->started = 1; worker_set->wait_for_set_initialization = 0; } else if (workerarg->wait_for_worker_initialization == 1) { STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex); while (!workerarg->worker_is_initialized) STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex); workerarg->wait_for_worker_initialization = 0; } } _STARPU_DEBUG("finished launching drivers\n"); } void starpu_worker_wait_for_initialisation() { unsigned nworkers = starpu_worker_get_count(); unsigned workerid; for (workerid = 0; workerid < nworkers; workerid++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); _STARPU_DEBUG("waiting for worker %u initialization\n", workerid); if (!worker->run_by_starpu) break; struct _starpu_worker_set *worker_set = worker->set; if (worker_set) { STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex); while (!worker_set->set_is_initialized) STARPU_PTHREAD_COND_WAIT(&worker_set->ready_cond, &worker_set->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex); } else { STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); while (!worker->worker_is_initialized) STARPU_PTHREAD_COND_WAIT(&worker->ready_cond, &worker->mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); } } } /* Initialize the starpu_conf with default values */ int starpu_conf_init(struct starpu_conf *conf) { if (!conf) return -EINVAL; memset(conf, 0, sizeof(*conf)); conf->magic = 42; conf->will_use_mpi = 0; conf->sched_policy_name = starpu_getenv("STARPU_SCHED"); conf->sched_policy = NULL; conf->global_sched_ctx_min_priority = starpu_getenv_number("STARPU_MIN_PRIO"); conf->global_sched_ctx_max_priority = starpu_getenv_number("STARPU_MAX_PRIO"); conf->catch_signals = starpu_getenv_number_default("STARPU_CATCH_SIGNALS", 1); /* Note that starpu_getenv_number returns -1 in case the variable is * not defined */ /* Backward compatibility: check the value of STARPU_NCPUS if * STARPU_NCPU is not set. */ conf->ncpus = starpu_getenv_number("STARPU_NCPU"); if (conf->ncpus == -1) conf->ncpus = starpu_getenv_number("STARPU_NCPUS"); conf->reserve_ncpus = starpu_getenv_number("STARPU_RESERVE_NCPU"); conf->ncuda = starpu_getenv_number("STARPU_NCUDA"); conf->nhip = starpu_getenv_number("STARPU_NHIP"); conf->nopencl = starpu_getenv_number("STARPU_NOPENCL"); conf->nmax_fpga = starpu_getenv_number("STARPU_NMAX_FPGA"); conf->nmpi_ms = starpu_getenv_number("STARPU_NMPI_MS"); conf->ntcpip_ms = starpu_getenv_number("STARPU_NTCPIP_MS"); conf->calibrate = starpu_getenv_number("STARPU_CALIBRATE"); conf->bus_calibrate = starpu_getenv_number("STARPU_BUS_CALIBRATE"); if (conf->calibrate == -1) conf->calibrate = 0; if (conf->bus_calibrate == -1) conf->bus_calibrate = 0; conf->use_explicit_workers_bindid = 0; /* TODO */ conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */ conf->use_explicit_workers_hip_gpuid = 0; /* TODO */ conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */ conf->use_explicit_workers_max_fpga_deviceid = 0; /* TODO */ conf->use_explicit_workers_mpi_ms_deviceid = 0; /* TODO */ conf->single_combined_worker = starpu_getenv_number("STARPU_SINGLE_COMBINED_WORKER"); if (conf->single_combined_worker == -1) conf->single_combined_worker = 0; #if defined(STARPU_DATA_LOCALITY_ENFORCE) conf->data_locality_enforce = 1; #else conf->data_locality_enforce = starpu_getenv_number("STARPU_DATA_LOCALITY_ENFORCE"); if (conf->data_locality_enforce == -1) conf->data_locality_enforce = 0; #endif #if defined(STARPU_DISABLE_ASYNCHRONOUS_COPY) conf->disable_asynchronous_copy = 1; #else conf->disable_asynchronous_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_COPY"); if (conf->disable_asynchronous_copy == -1) conf->disable_asynchronous_copy = 0; #endif #if defined(STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY) conf->disable_asynchronous_cuda_copy = 1; #else conf->disable_asynchronous_cuda_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY"); if (conf->disable_asynchronous_cuda_copy == -1) conf->disable_asynchronous_cuda_copy = 0; #endif #if defined(STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY) conf->disable_asynchronous_hip_copy = 1; #else conf->disable_asynchronous_hip_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY"); if (conf->disable_asynchronous_hip_copy == -1) conf->disable_asynchronous_hip_copy = 0; #endif #if defined(STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY) conf->disable_asynchronous_opencl_copy = 1; #else conf->disable_asynchronous_opencl_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY"); if (conf->disable_asynchronous_opencl_copy == -1) conf->disable_asynchronous_opencl_copy = 0; #endif #if defined(STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY) conf->disable_asynchronous_max_fpga_copy = 1; #else conf->disable_asynchronous_max_fpga_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY"); if (conf->disable_asynchronous_max_fpga_copy == -1) conf->disable_asynchronous_max_fpga_copy = 0; #endif #if defined(STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY) conf->disable_asynchronous_mpi_ms_copy = 1; #else conf->disable_asynchronous_mpi_ms_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY"); if(conf->disable_asynchronous_mpi_ms_copy == -1) conf->disable_asynchronous_mpi_ms_copy = 0; #endif #if defined(STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY) conf->disable_asynchronous_tcpip_ms_copy = 1; #else conf->disable_asynchronous_tcpip_ms_copy = starpu_getenv_number("STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY"); if(conf->disable_asynchronous_tcpip_ms_copy == -1) conf->disable_asynchronous_tcpip_ms_copy = 0; #endif conf->enable_map = starpu_getenv_number("STARPU_ENABLE_MAP"); if (conf->enable_map == -1) conf->enable_map = 0; /* 64MiB by default */ conf->trace_buffer_size = ((uint64_t) starpu_getenv_number_default("STARPU_TRACE_BUFFER_SIZE", 64)) << 20; conf->driver_spinning_backoff_min = (unsigned) starpu_getenv_number_default("STARPU_BACKOFF_MIN", 1); conf->driver_spinning_backoff_max = (unsigned) starpu_getenv_number_default("STARPU_BACKOFF_MAX", 32); /* Do not start performance counter collection by default */ conf->start_perf_counter_collection = 0; conf->cuda_only_fast_alloc_other_memnodes = starpu_getenv_number_default("STARPU_CUDA_ONLY_FAST_ALLOC_OTHER_MEMNODES", 0); return 0; } int starpu_conf_noworker(struct starpu_conf *conf) { conf->ncpus = 0; conf->ncuda = 0; conf->nhip = 0; conf->nopencl = 0; conf->nmax_fpga = 0; conf->nmpi_ms = 0; conf->ntcpip_ms = 0; return 0; } static void _starpu_conf_set_value_against_environment(char *name, int *value, int precedence_over_env) { if (precedence_over_env == 0) { int number; number = starpu_getenv_number(name); if (number != -1) { *value = number; } } } void _starpu_conf_check_environment(struct starpu_conf *conf) { char *sched = starpu_getenv("STARPU_SCHED"); if (sched) { conf->sched_policy_name = sched; } _starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_NCPU", &conf->ncpus, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_RESERVE_NCPU", &conf->reserve_ncpus, conf->precedence_over_environment_variables); int main_thread_bind = starpu_getenv_number_default("STARPU_MAIN_THREAD_BIND", 0); if (main_thread_bind) { /* Reserve a core for main */ if (conf->reserve_ncpus == -1) conf->reserve_ncpus = 1; else conf->reserve_ncpus++; } _starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_NHIP", &conf->nhip, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_NMAX_FPGA", &conf->nmax_fpga, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_NMPI_MS", &conf->nmpi_ms, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_NTCPIP_MS", &conf->ntcpip_ms, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_BUS_CALIBRATE", &conf->bus_calibrate, conf->precedence_over_environment_variables); #ifdef STARPU_SIMGRID if (conf->calibrate == 2) { _STARPU_DISP("Warning: History will be cleared due to calibrate or STARPU_CALIBRATE being set to 2. This will prevent simgrid from having task simulation times!\n"); } if (conf->bus_calibrate) { _STARPU_DISP("Warning: Bus calibration will be cleared due to bus_calibrate or STARPU_BUS_CALIBRATE being set. This will prevent simgrid from having data transfer simulation times!\n"); } #endif _starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_HIP_COPY", &conf->disable_asynchronous_hip_copy, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY", &conf->disable_asynchronous_max_fpga_copy, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY", &conf->disable_asynchronous_mpi_ms_copy, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY", &conf->disable_asynchronous_tcpip_ms_copy, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_ENABLE_MAP", &conf->enable_map, conf->precedence_over_environment_variables); asynchronous_copy_disabled[STARPU_CPU_RAM] = 0; asynchronous_copy_disabled[STARPU_CUDA_RAM] = conf->disable_asynchronous_cuda_copy; asynchronous_copy_disabled[STARPU_HIP_RAM] = conf->disable_asynchronous_hip_copy; asynchronous_copy_disabled[STARPU_OPENCL_RAM] = conf->disable_asynchronous_opencl_copy; asynchronous_copy_disabled[STARPU_MAX_FPGA_RAM] = conf->disable_asynchronous_max_fpga_copy; asynchronous_copy_disabled[STARPU_DISK_RAM] = 0; asynchronous_copy_disabled[STARPU_MPI_MS_RAM] = conf->disable_asynchronous_mpi_ms_copy; asynchronous_copy_disabled[STARPU_TCPIP_MS_RAM] = conf->disable_asynchronous_tcpip_ms_copy; _starpu_conf_set_value_against_environment("STARPU_MIN_PRIO", &conf->global_sched_ctx_min_priority, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_MAX_PRIO", &conf->global_sched_ctx_max_priority, conf->precedence_over_environment_variables); _starpu_conf_set_value_against_environment("STARPU_CATCH_SIGNALS", &conf->catch_signals, conf->precedence_over_environment_variables); } struct starpu_tree* starpu_workers_get_tree(void) { return _starpu_config.topology.tree; } #ifdef STARPU_HAVE_HWLOC #if HWLOC_API_VERSION >= 0x20000 #define NORMAL_CHILD(obj) 1 #else #define NORMAL_CHILD(obj) ((obj)->type < HWLOC_OBJ_BRIDGE) #endif static void _fill_tree(struct starpu_tree *tree, hwloc_obj_t curr_obj, unsigned depth, hwloc_topology_t topology, struct starpu_tree *father) { unsigned i, j; unsigned arity; #if HWLOC_API_VERSION >= 0x20000 arity = curr_obj->arity; #else arity = 0; for(i = 0; i < curr_obj->arity; i++) { if (!NORMAL_CHILD(curr_obj->children[i])) /* I/O stuff, stop caring */ break; arity++; } #endif if (arity == 1) { /* Nothing interestin here, skip level */ _fill_tree(tree, curr_obj->children[0], depth+1, topology, father); return; } starpu_tree_insert(tree, curr_obj->logical_index, depth, curr_obj->type == HWLOC_OBJ_PU, arity, father); starpu_tree_prepare_children(arity, tree); j = 0; for(i = 0; i < arity; i++) { hwloc_obj_t child = curr_obj->children[i]; if (!NORMAL_CHILD(child)) /* I/O stuff, stop caring (shouldn't happen, though) */ break; #if 0 char string[128]; hwloc_obj_snprintf(string, sizeof(string), topology, child, "#", 0); printf("%*s%s %d is_pu %d \n", 0, "", string, child->logical_index, child->type == HWLOC_OBJ_PU); #endif _fill_tree(&tree->nodes[j], child, depth+1, topology, tree); j++; } } #endif static void _starpu_build_tree(void) { #ifdef STARPU_HAVE_HWLOC struct starpu_tree *tree; _STARPU_MALLOC(tree, sizeof(struct starpu_tree)); _starpu_config.topology.tree = tree; hwloc_obj_t root = hwloc_get_root_obj(_starpu_config.topology.hwtopology); #if 0 char string[128]; hwloc_obj_snprintf(string, sizeof(string), topology, root, "#", 0); printf("%*s%s %d is_pu = %d \n", 0, "", string, root->logical_index, root->type == HWLOC_OBJ_PU); #endif /* level, is_pu, is in the tree (it will be true only after add) */ _fill_tree(tree, root, 0, _starpu_config.topology.hwtopology, NULL); #endif } typedef void (*hook_func_t)(void); static hook_func_t _hook_funcs[10]; static int _hook_func_nb=0; void _starpu_crash_add_hook(void (*hook_func)(void)) { STARPU_ASSERT_MSG(_hook_func_nb < 10, "The number of crash funcs has exceeded the limit\n"); _hook_funcs[_hook_func_nb] = hook_func; _hook_func_nb++; } void _starpu_crash_call_hooks() { int i; /*_STARPU_DISP("Time: %f\n", starpu_timing_now());*/ for(i=0 ; i<_hook_func_nb; i++) _hook_funcs[i](); } static starpu_pthread_mutex_t sig_handlers_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static void (*act_sigint)(int); static void (*act_sigsegv)(int); static void (*act_sigabrt)(int); #ifdef SIGTRAP static void (*act_sigtrap)(int); #endif void _starpu_handler(int sig) { #ifdef STARPU_VERBOSE _STARPU_MSG("Catching signal '%d'\n", sig); #endif #ifdef STARPU_USE_FXT _starpu_fxt_dump_file(); #endif if (sig == SIGINT) { void (*sig_act)(int) = act_sigint; if (sig_act == NULL) sig_act = SIG_DFL; signal(SIGINT, sig_act); } if (sig == SIGSEGV) { void (*sig_act)(int) = act_sigsegv; if (sig_act == NULL) sig_act = SIG_DFL; signal(SIGSEGV, sig_act); } if (sig == SIGABRT) { void (*sig_act)(int) = act_sigabrt; if (sig_act == NULL) sig_act = SIG_DFL; signal(SIGABRT, sig_act); } #ifdef SIGTRAP if (sig == SIGTRAP) { void (*sig_act)(int) = act_sigtrap; if (sig_act == NULL) sig_act = SIG_DFL; signal(SIGTRAP, sig_act); } #endif _starpu_crash_call_hooks(); #ifdef STARPU_VERBOSE _STARPU_MSG("Rearming signal '%d'\n", sig); #endif raise(sig); } void _starpu_catch_signals(void) { if (_starpu_config.conf.catch_signals == 1) { static void (*old_sig_act)(int); old_sig_act = signal(SIGINT, _starpu_handler); if (old_sig_act != _starpu_handler) act_sigint = old_sig_act; old_sig_act = signal(SIGSEGV, _starpu_handler); if (old_sig_act != _starpu_handler) act_sigsegv = old_sig_act; old_sig_act = signal(SIGABRT, _starpu_handler); if (old_sig_act != _starpu_handler) act_sigabrt = old_sig_act; #ifdef SIGTRAP old_sig_act = signal(SIGTRAP, _starpu_handler); if (old_sig_act != _starpu_handler) act_sigtrap = old_sig_act; #endif } else { if (act_sigint != NULL) { signal(SIGINT, act_sigint); act_sigint = NULL; } if (act_sigsegv != NULL) { signal(SIGSEGV, act_sigsegv); act_sigsegv = NULL; } if (act_sigabrt != NULL) { signal(SIGABRT, act_sigsegv); act_sigabrt = NULL; } #ifdef SIGTRAP if (act_sigtrap != NULL) { signal(SIGTRAP, act_sigtrap); act_sigtrap = NULL; } #endif } } void _starpu_set_catch_signals(int do_catch_signal) { STARPU_PTHREAD_MUTEX_LOCK(&sig_handlers_mutex); _starpu_config.conf.catch_signals = do_catch_signal; _starpu_catch_signals(); STARPU_PTHREAD_MUTEX_UNLOCK(&sig_handlers_mutex); } int _starpu_get_catch_signals(void) { return _starpu_config.conf.catch_signals; } void starpu_drivers_preinit(void) { _starpu_cpu_preinit(); _starpu_cuda_preinit(); _starpu_hip_preinit(); _starpu_opencl_preinit(); _starpu_max_fpga_preinit(); _starpu_mpi_ms_preinit(); _starpu_tcpip_ms_preinit(); _starpu_disk_preinit(); } int starpu_init(struct starpu_conf *user_conf) { return starpu_initialize(user_conf, NULL, NULL); } int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv) { int is_a_sink = 0; /* Always defined. If the MP infrastructure is not * used, we cannot be a sink. */ unsigned worker; #if !defined(STARPU_SIMGRID) && !defined(STARPU_USE_MP) (void)argc; (void)argv; #endif STARPU_HG_DISABLE_CHECKING(_starpu_worker_parallel_blocks); #ifdef STARPU_SIMGRID /* This initializes the simgrid thread library, thus needs to be early */ _starpu_simgrid_init_early(argc, argv); #endif STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); while (initialized == CHANGING) /* Wait for the other one changing it */ STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex); init_count++; if (initialized == INITIALIZED) { /* He initialized it, don't do it again, and let the others get the mutex */ STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); return 0; } /* initialized == UNINITIALIZED */ initialized = CHANGING; STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); /* This initializes _starpu_silent, thus needs to be early */ _starpu_util_init(); int rc = _starpu_prof_tool_try_load(); (void) rc; /* unused for now */ #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_init_begin, 0, 0, starpu_prof_tool_driver_cpu, -1, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_init(&pi, NULL, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_init_begin(&pi, NULL, NULL); #endif #ifdef STARPU_USE_MP _starpu_set_argc_argv(argc, argv); #ifdef STARPU_USE_MPI_MASTER_SLAVE if (_starpu_mpi_common_mp_init() == -ENODEV) { STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); init_count--; initialized = UNINITIALIZED; /* Let somebody else try to do it */ STARPU_PTHREAD_COND_SIGNAL(&init_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); return -ENODEV; } #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE if (_starpu_tcpip_common_mp_init() == -ENODEV) { STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); init_count--; initialized = UNINITIALIZED; /* Let somebody else try to do it */ STARPU_PTHREAD_COND_SIGNAL(&init_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); return -ENODEV; } #endif /* If StarPU was configured to use MP sinks, we have to control the * kind on node we are running on : host or sink ? */ if (starpu_getenv("STARPU_SINK")) is_a_sink = 1; #endif /* STARPU_USE_MP */ int ret; #ifdef STARPU_OPENMP _starpu_omp_dummy_init(); #endif #ifdef STARPU_SIMGRID /* Warn when the lots of stacks malloc()-ated by simgrid for transfer * processes will take a long time to get initialized */ char *perturb = starpu_getenv("MALLOC_PERTURB_"); if (perturb && perturb[0] && atoi(perturb) != 0) _STARPU_DISP("Warning: MALLOC_PERTURB_ is set to non-zero, this makes simgrid run very slow\n"); #else #ifdef __GNUC__ #ifndef __OPTIMIZE__ _STARPU_DISP("Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n"); #endif #endif #ifdef STARPU_SPINLOCK_CHECK _STARPU_DISP("Warning: StarPU was configured with --enable-spinlock-check, which slows down a bit\n"); #endif #if 0 #ifndef STARPU_NO_ASSERT _STARPU_DISP("Warning: StarPU was configured without --enable-fast\n"); #endif #endif #ifdef STARPU_MEMORY_STATS _STARPU_DISP("Warning: StarPU was configured with --enable-memory-stats, which slows down a bit\n"); #endif #ifdef STARPU_VERBOSE _STARPU_DISP("Warning: StarPU was configured with --enable-verbose, which slows down a bit\n"); #endif #ifdef STARPU_USE_FXT if (starpu_fxt_is_enabled()) _STARPU_DISP("Warning: FxT is enabled, which slows down a bit, limits scalability and makes worker initialization sequential\n"); #else if (starpu_getenv_number("STARPU_FXT_TRACE") > 0) _STARPU_DISP("Warning: FxT trace is requested but StarPU was configured without FxT support\n"); #endif #ifdef STARPU_FXT_LOCK_TRACES _STARPU_DISP("Warning: StarPU was configured with --enable-fxt-lock, which slows down things a huge lot, and is really only meant for StarPU insides debugging. Did you really want to enable that?\n"); #endif #ifdef STARPU_PERF_DEBUG _STARPU_DISP("Warning: StarPU was configured with --enable-perf-debug, which slows down a bit\n"); #endif #ifdef STARPU_MODEL_DEBUG _STARPU_DISP("Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n"); #endif #ifdef __linux__ { struct utsname buf; if (uname(&buf) == 0 && (!strncmp(buf.release, "4.7.", 4) || !strncmp(buf.release, "4.8.", 4))) _STARPU_DISP("Warning: This system is running a 4.7 or 4.8 kernel. These have a severe scheduling performance regression issue, please upgrade to at least 4.9.\n"); } #endif #endif if (starpu_getenv("STARPU_ENABLE_STATS")) { _STARPU_DISP("Warning: STARPU_ENABLE_STATS is enabled, which slows down a bit\n"); } #ifndef STARPU_SIMGRID if (starpu_getenv_number_default("STARPU_SIMGRID", 0)) { _STARPU_DISP("Simulation mode requested, but this libstarpu was built without simgrid support, please recompile\n"); STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); init_count--; initialized = UNINITIALIZED; /* Let somebody else try to do it */ STARPU_PTHREAD_COND_SIGNAL(&init_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); return -EINVAL; } #endif #if defined(_WIN32) && !defined(__CYGWIN__) WSADATA wsadata; WSAStartup(MAKEWORD(1,0), &wsadata); #endif STARPU_AYU_PREINIT(); /* store the pointer to the user explicit configuration during the * initialization */ if (user_conf == NULL) starpu_conf_init(&_starpu_config.conf); else { if (user_conf->magic != 42) { _STARPU_DISP("starpu_conf structure needs to be initialized with starpu_conf_init\n"); STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); init_count--; initialized = UNINITIALIZED; /* Let somebody else try to do it */ STARPU_PTHREAD_COND_SIGNAL(&init_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); return -EINVAL; } _starpu_config.conf = *user_conf; } _starpu_conf_check_environment(&_starpu_config.conf); if (is_a_sink && _starpu_config.conf.nmpi_ms == 0 && _starpu_config.conf.ntcpip_ms == 0) { /* MS was explicitly disabled, abort sinks and leave source alone */ #ifdef STARPU_USE_MPI_MASTER_SLAVE if (_starpu_mpi_common_is_mp_initialized()) _starpu_mpi_common_mp_deinit(); #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE if (_starpu_tcpip_common_is_mp_initialized()) _starpu_tcpip_common_mp_deinit(); #endif STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); init_count--; initialized = UNINITIALIZED; /* Let somebody else try to do it */ STARPU_PTHREAD_COND_SIGNAL(&init_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); return -ENODEV; } /* Make a copy of arrays */ if (_starpu_config.conf.sched_policy_name) _starpu_config.conf.sched_policy_name = strdup(_starpu_config.conf.sched_policy_name); if (_starpu_config.conf.n_cuda_opengl_interoperability) { size_t size = _starpu_config.conf.n_cuda_opengl_interoperability * sizeof(*_starpu_config.conf.cuda_opengl_interoperability); unsigned *copy; _STARPU_MALLOC(copy, size); memcpy(copy, _starpu_config.conf.cuda_opengl_interoperability, size); _starpu_config.conf.cuda_opengl_interoperability = copy; } if (_starpu_config.conf.n_not_launched_drivers) { size_t size = _starpu_config.conf.n_not_launched_drivers * sizeof(*_starpu_config.conf.not_launched_drivers); struct starpu_driver *copy; _STARPU_MALLOC(copy, size); memcpy(copy, _starpu_config.conf.not_launched_drivers, size); _starpu_config.conf.not_launched_drivers = copy; } _hook_func_nb = 0; /* Let drivers register themselves */ starpu_drivers_preinit(); _starpu_sched_init(); _starpu_job_init(); _starpu_graph_init(); _starpu_init_all_sched_ctxs(&_starpu_config); _starpu_init_progression_hooks(); _starpu_init_idle_hooks(); _starpu_init_tags(); _starpu_init_perfmodel(); #ifdef STARPU_USE_FXT _starpu_fxt_init_profiling(_starpu_config.conf.trace_buffer_size); #endif _starpu_open_debug_logfile(); _starpu_data_interface_init(); _starpu_timing_init(); _starpu_load_bus_performance_files(); /* Note: nothing before here should be allocating anything, in case we * actually return ENODEV here */ /* Depending on whether we are a MP sink or not, we must build the * topology with MP nodes or not. */ ret = _starpu_build_topology(&_starpu_config, is_a_sink); /* sink doesn't exit even if no worker discovered */ if (ret && !is_a_sink) { starpu_perfmodel_free_sampling(); STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); init_count--; _starpu_destroy_machine_config(&_starpu_config, is_a_sink); free((char*) _starpu_config.conf.sched_policy_name); if (_starpu_config.conf.n_cuda_opengl_interoperability) free(_starpu_config.conf.cuda_opengl_interoperability); if (_starpu_config.conf.n_not_launched_drivers) free(_starpu_config.conf.not_launched_drivers); #ifdef STARPU_USE_MPI_MASTER_SLAVE if (_starpu_mpi_common_is_mp_initialized()) _starpu_mpi_common_mp_deinit(); #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE if (_starpu_tcpip_common_is_mp_initialized()) _starpu_tcpip_common_mp_deinit(); #endif initialized = UNINITIALIZED; /* Let somebody else try to do it */ STARPU_PTHREAD_COND_SIGNAL(&init_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); #ifdef STARPU_USE_FXT _starpu_stop_fxt_profiling(); #endif return ret; } _starpu_profiling_init(); _starpu_task_init(); for (worker = 0; worker < _starpu_config.topology.nworkers; worker++) _starpu_worker_init(&_starpu_config.workers[worker], &_starpu_config); //FIXME: find out if the variable STARPU_CHECK_ENTIRE_PLATFORM is really needed, for now, just set 1 as a default value check_entire_platform = 1;//starpu_getenv_number("STARPU_CHECK_ENTIRE_PLATFORM"); _starpu_config.disable_kernels = starpu_getenv_number("STARPU_DISABLE_KERNELS"); STARPU_PTHREAD_KEY_CREATE(&_starpu_worker_key, NULL); STARPU_PTHREAD_KEY_CREATE(&_starpu_worker_set_key, NULL); _starpu_keys_initialized = 1; STARPU_WMB(); if (!is_a_sink) { _starpu_build_tree(); struct starpu_sched_policy *selected_policy = _starpu_select_sched_policy(&_starpu_config, _starpu_config.conf.sched_policy_name); _starpu_create_sched_ctx(selected_policy, NULL, -1, 1, "init", (_starpu_config.conf.global_sched_ctx_min_priority != -1), _starpu_config.conf.global_sched_ctx_min_priority, (_starpu_config.conf.global_sched_ctx_max_priority != -1), _starpu_config.conf.global_sched_ctx_max_priority, 1, _starpu_config.conf.sched_policy_callback, NULL, 0, NULL, 0); } _starpu_initialize_registered_performance_models(); _starpu_perf_counter_init(&_starpu_config); _starpu_perf_knob_init(); #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) _starpu_cuda_init(); #endif #if defined(STARPU_USE_HIP) _starpu_hip_init(); #endif #ifdef STARPU_SIMGRID _starpu_simgrid_init(); #endif if (!is_a_sink) { /* Launch "basic" workers (ie. non-combined workers) */ _starpu_launch_drivers(&_starpu_config); /* Allocate swap, if any */ _starpu_swap_init(); } _starpu_watchdog_init(); _starpu_profiling_start(); STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); initialized = INITIALIZED; /* Tell everybody that we initialized */ STARPU_PTHREAD_COND_BROADCAST(&init_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); _STARPU_DEBUG("Initialisation finished\n"); #ifdef STARPU_USE_MP /* Finally, if we are a MP sink, we never leave this function. Else, * we enter an infinite event loop which listen for MP commands from * the source. */ if (is_a_sink) { _starpu_sink_common_worker(); /* We should normally never leave the loop as we don't want to * really initialize STARPU */ STARPU_ASSERT(0); } #endif _starpu_catch_signals(); /* if MPI is enabled, binding display will be done later, after MPI initialization */ if (!_starpu_config.conf.will_use_mpi && starpu_getenv_number_default("STARPU_DISPLAY_BINDINGS", 0)) { fprintf(stdout, "== Binding ==\n"); starpu_display_bindings(); fprintf(stdout, "== End of binding ==\n"); fflush(stdout); } #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info_init(starpu_prof_tool_event_init_end, 0, starpu_prof_tool_driver_cpu, &(_starpu_config.conf)); pi.conf = &_starpu_config.conf; starpu_prof_tool_callbacks.starpu_prof_tool_event_init_end(&pi, NULL, NULL); #endif return 0; } /* * Handle runtime termination */ static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig) { int status = 0; unsigned workerid; unsigned n; starpu_wake_all_blocked_workers(); for (workerid = 0; workerid < pconfig->topology.nworkers; workerid++) { _STARPU_DEBUG("wait for worker %u\n", workerid); struct _starpu_worker_set *set = pconfig->workers[workerid].set; struct _starpu_worker *worker = &pconfig->workers[workerid]; /* in case StarPU termination code is called from a callback, * we have to check if starpu_pthread_self() is the worker itself */ if (set && set->nworkers > 0) { if (set->started) { if (!starpu_pthread_equal(starpu_pthread_self(), set->worker_thread)) status = starpu_pthread_join(set->worker_thread, NULL); if (status) { #ifdef STARPU_VERBOSE _STARPU_DEBUG("starpu_pthread_join -> %d\n", status); #endif } set->started = 0; } } else { if (!worker->run_by_starpu) goto out; if (!starpu_pthread_equal(starpu_pthread_self(), worker->worker_thread)) status = starpu_pthread_join(worker->worker_thread, NULL); if (status) { #ifdef STARPU_VERBOSE _STARPU_DEBUG("starpu_pthread_join -> %d\n", status); #endif } } out: STARPU_ASSERT(starpu_task_prio_list_empty(&worker->local_tasks)); for (n = 0; n < worker->local_ordered_tasks_size; n++) STARPU_ASSERT(worker->local_ordered_tasks[n] == NULL); _starpu_sched_ctx_list_delete(&worker->sched_ctx_list); free(worker->local_ordered_tasks); STARPU_ASSERT(_starpu_ctx_change_list_empty(&worker->ctx_change_list)); } #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info_init(starpu_prof_tool_event_terminate, 0, starpu_prof_tool_driver_cpu, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_terminate(&pi, NULL, NULL); #endif } /* Condition variable and mutex used to pause/resume. */ static starpu_pthread_cond_t pause_cond = STARPU_PTHREAD_COND_INITIALIZER; static starpu_pthread_mutex_t pause_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; void _starpu_may_pause(void) { /* pause_depth is just protected by a memory barrier */ STARPU_RMB(); if (STARPU_UNLIKELY(_starpu_config.pause_depth > 0)) { STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex); if (_starpu_config.pause_depth > 0) { STARPU_PTHREAD_COND_WAIT(&pause_cond, &pause_mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex); } } void starpu_pause() { STARPU_HG_DISABLE_CHECKING(_starpu_config.pause_depth); _starpu_config.pause_depth += 1; starpu_fxt_trace_user_event_string("starpu_pause"); } void starpu_resume() { STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex); _starpu_config.pause_depth -= 1; if (!_starpu_config.pause_depth) { STARPU_PTHREAD_COND_BROADCAST(&pause_cond); } STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex); starpu_fxt_trace_user_event_string("starpu_resume"); } int starpu_is_paused() { STARPU_RMB(); return _starpu_config.pause_depth > 0; } unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_NON_BLOCKING_DRIVERS return 0; #else /* do not block if a sched_ctx change operation is pending */ if (worker->state_changing_ctx_notice) return 0; unsigned can_block = 1; struct starpu_driver driver; driver.type = worker->arch; switch (driver.type) { case STARPU_CPU_WORKER: driver.id.cpu_id = worker->devid; break; case STARPU_CUDA_WORKER: driver.id.cuda_id = worker->devid; break; case STARPU_HIP_WORKER: driver.id.hip_id = worker->devid; break; #ifdef STARPU_USE_OPENCL case STARPU_OPENCL_WORKER: starpu_opencl_get_device(worker->devid, &driver.id.opencl_id); break; #endif default: goto always_launch; } if (!_starpu_may_launch_driver(&_starpu_config.conf, &driver)) return 0; always_launch: #ifndef STARPU_SIMGRID if (!_starpu_check_that_no_data_request_exists(memnode)) can_block = 0; #endif if (!_starpu_machine_is_running()) can_block = 0; if (!_starpu_execute_registered_progression_hooks()) can_block = 0; return can_block; #endif } static void _starpu_kill_all_workers(struct _starpu_machine_config *pconfig) { /* set the flag which will tell workers to stop */ ANNOTATE_HAPPENS_AFTER(&_starpu_config.running); pconfig->running = 0; /* running is just protected by a memory barrier */ ANNOTATE_HAPPENS_BEFORE(&_starpu_config.running); STARPU_WMB(); starpu_wake_all_blocked_workers(); } void starpu_display_stats() { starpu_profiling_bus_helper_display_summary(); starpu_profiling_worker_helper_display_summary(); } void starpu_shutdown(void) { unsigned worker; STARPU_PTHREAD_MUTEX_LOCK(&init_mutex); init_count--; STARPU_ASSERT_MSG(init_count >= 0, "Number of calls to starpu_shutdown() can not be higher than the number of calls to starpu_init()\n"); if (init_count) { _STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n"); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); return; } /* We're last */ initialized = CHANGING; STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); /* If the workers are frozen, no progress can be made. */ STARPU_ASSERT_MSG(_starpu_config.pause_depth <= 0, "Did you forget to call starpu_resume before starpu_shutdown?"); starpu_task_wait_for_no_ready(); starpu_worker_wait_for_initialisation(); /* tell all workers to shutdown */ _starpu_kill_all_workers(&_starpu_config); unsigned i; unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count(); for (i=0; i= 0 && type < STARPU_NARCH) && (starpu_driver_info[type].name_upper != NULL); } enum starpu_worker_archtype starpu_arch_mask_to_worker_archtype(unsigned mask) { STARPU_ASSERT(mask && !(mask & (mask-1))); // ensures that only one bit of the mask is set enum starpu_worker_archtype worker_type = ffs(mask)-2; // ffs(mask) is the indice of the lesser bit STARPU_ASSERT(worker_type < STARPU_NARCH); // worker_type is positive and lesser than arch number STARPU_ASSERT(starpu_worker_archtype_is_valid(worker_type)); // worker_type is a valid worker architecture return worker_type; } #undef starpu_worker_get_count unsigned starpu_worker_get_count(void) { return _starpu_config.topology.nworkers; } void starpu_worker_get_current_task_exp_end(unsigned workerid, struct timespec *date) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); *date = worker->cl_expend; } unsigned starpu_worker_is_blocked_in_parallel(int workerid) { if (!_starpu_worker_parallel_blocks) return 0; int relax_own_observation_state = 0; struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); struct _starpu_worker *cur_worker = NULL; int cur_workerid = starpu_worker_get_id(); if (workerid != cur_workerid) { /* in order to observe the 'blocked' state of a worker from * another worker, we must avoid race conditions between * 'blocked' state changes and state observations. This is the * purpose of this 'if' block. */ cur_worker = cur_workerid >= 0 ? _starpu_get_worker_struct(cur_workerid) : NULL; relax_own_observation_state = (cur_worker != NULL) && (cur_worker->state_relax_refcnt == 0); if (relax_own_observation_state && !worker->state_relax_refcnt) { /* moreover, when a worker (cur_worker != NULL) * observes another worker, we need to take special * care to avoid live locks, thus the observing worker * must enter the relaxed state (if not relaxed * already) before doing the observation in mutual * exclusion */ STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&cur_worker->sched_mutex); cur_worker->state_relax_refcnt = 1; STARPU_PTHREAD_COND_BROADCAST(&cur_worker->sched_cond); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cur_worker->sched_mutex); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); } /* the observer waits for a safe window to observe the state, * and also waits for any pending blocking state change * requests to be processed, in order to not obtain an * ephemeral information */ while (!worker->state_relax_refcnt || worker->state_block_in_parallel_req || worker->state_unblock_in_parallel_req) { STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); } } unsigned ret = _starpu_config.workers[workerid].state_blocked_in_parallel; /* once a worker state has been observed, the worker is 'tainted' for the next one full sched_op, * to avoid changing the observed worker state - on which the observer * made a scheduling decision - after the fact. */ worker->state_blocked_in_parallel_observed = 1; STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); if (relax_own_observation_state) { STARPU_PTHREAD_MUTEX_LOCK_SCHED(&cur_worker->sched_mutex); cur_worker->state_relax_refcnt = 0; STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cur_worker->sched_mutex); } return ret; } unsigned starpu_worker_is_slave_somewhere(int workerid) { starpu_worker_lock(workerid); unsigned ret = _starpu_config.workers[workerid].is_slave_somewhere; starpu_worker_unlock(workerid); return ret; } int starpu_worker_get_count_by_type(enum starpu_worker_archtype type) { unsigned n = 0; if (type != STARPU_ANY_WORKER) { if (type >= STARPU_NARCH) return -EINVAL; unsigned i; for (i = 0; i < _starpu_config.topology.ndevices[type]; i++) n += _starpu_config.topology.nworker[type][i]; return n; } for (type = 0; type < STARPU_NARCH; type++) n += starpu_worker_get_count_by_type(type); return n; } unsigned starpu_combined_worker_get_count(void) { return _starpu_config.topology.ncombinedworkers; } unsigned starpu_cpu_worker_get_count(void) { return starpu_worker_get_count_by_type(STARPU_CPU_WORKER); } unsigned starpu_cuda_worker_get_count(void) { return starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); } unsigned starpu_hip_worker_get_count(void) { return starpu_worker_get_count_by_type(STARPU_HIP_WORKER); } unsigned starpu_opencl_worker_get_count(void) { return starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); } int starpu_asynchronous_copy_disabled(void) { return _starpu_config.conf.disable_asynchronous_copy; } int starpu_asynchronous_cuda_copy_disabled(void) { return _starpu_config.conf.disable_asynchronous_cuda_copy; } int starpu_asynchronous_hip_copy_disabled(void) { return _starpu_config.conf.disable_asynchronous_hip_copy; } int starpu_asynchronous_opencl_copy_disabled(void) { return _starpu_config.conf.disable_asynchronous_opencl_copy; } int starpu_asynchronous_max_fpga_copy_disabled(void) { return _starpu_config.conf.disable_asynchronous_max_fpga_copy; } int starpu_asynchronous_mpi_ms_copy_disabled(void) { return _starpu_config.conf.disable_asynchronous_mpi_ms_copy; } int starpu_asynchronous_tcpip_ms_copy_disabled(void) { return _starpu_config.conf.disable_asynchronous_tcpip_ms_copy; } /* Return whether memory mapping is disabled (0) or enabled (1) */ int starpu_map_enabled(void) { return _starpu_config.conf.enable_map; } int starpu_asynchronous_copy_disabled_for(enum starpu_node_kind kind) { return asynchronous_copy_disabled[kind]; } unsigned starpu_mpi_ms_worker_get_count(void) { return starpu_worker_get_count_by_type(STARPU_MPI_MS_WORKER); } unsigned starpu_tcpip_ms_worker_get_count(void) { return starpu_worker_get_count_by_type(STARPU_TCPIP_MS_WORKER); } /* When analyzing performance, it is useful to see what is the processing unit * that actually performed the task. This function returns the id of the * processing unit actually executing it, therefore it makes no sense to use it * within the callbacks of SPU functions for instance. If called by some thread * that is not controlled by StarPU, starpu_worker_get_id returns -1. */ #undef starpu_worker_get_id int starpu_worker_get_id(void) { struct _starpu_worker * worker; worker = _starpu_get_local_worker_key(); if (worker) { return worker->workerid; } else { /* there is no worker associated to that thread, perhaps it is * a thread from the application or this is some SPU worker */ return -1; } } #define starpu_worker_get_id _starpu_worker_get_id #undef _starpu_worker_get_id_check unsigned _starpu_worker_get_id_check(const char *f, int l) { (void) f; (void) l; int id = _starpu_worker_get_id(); STARPU_ASSERT_MSG(id>=0, "%s:%d Cannot be called from outside a worker\n", f, l); return id; } int starpu_combined_worker_get_id(void) { struct _starpu_worker *worker; worker = _starpu_get_local_worker_key(); if (worker) { return worker->combined_workerid; } else { /* there is no worker associated to that thread, perhaps it is * a thread from the application or this is some SPU worker */ return -1; } } int starpu_combined_worker_get_size(void) { struct _starpu_worker *worker; worker = _starpu_get_local_worker_key(); if (worker) { return worker->worker_size; } else { /* there is no worker associated to that thread, perhaps it is * a thread from the application or this is some SPU worker */ return -1; } } int starpu_combined_worker_get_rank(void) { struct _starpu_worker *worker; worker = _starpu_get_local_worker_key(); if (worker) { return worker->current_rank; } else { /* there is no worker associated to that thread, perhaps it is * a thread from the application or this is some SPU worker */ return -1; } } int starpu_worker_get_subworkerid(int id) { return _starpu_config.workers[id].subworkerid; } int starpu_worker_get_devid(int id) { return _starpu_config.workers[id].devid; } int starpu_worker_get_devnum(int id) { return _starpu_config.workers[id].devnum; } unsigned starpu_worker_is_combined_worker(int id) { return id >= (int)_starpu_config.topology.nworkers; } struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id) { unsigned basic_worker_count = starpu_worker_get_count(); //_STARPU_DEBUG("basic_worker_count:%d\n",basic_worker_count); STARPU_ASSERT(id >= basic_worker_count); return &_starpu_config.combined_workers[id - basic_worker_count]; } enum starpu_worker_archtype starpu_worker_get_type(int id) { enum starpu_worker_archtype type = _starpu_config.workers[id].arch; STARPU_ASSERT(type < STARPU_NARCH); return type; } unsigned starpu_worker_get_ids_by_type(enum starpu_worker_archtype type, int *workerids, unsigned maxsize) { unsigned nworkers = starpu_worker_get_count(); unsigned cnt = 0; unsigned id; for (id = 0; id < nworkers; id++) { if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type) { /* Perhaps the array is too small ? */ if (cnt >= maxsize) return -ERANGE; workerids[cnt++] = id; } } return cnt; } int starpu_worker_get_by_type(enum starpu_worker_archtype type, int num) { unsigned nworkers = starpu_worker_get_count(); int cnt = 0; unsigned id; for (id = 0; id < nworkers; id++) { if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type) { if (num == cnt) return id; cnt++; } } /* Not found */ return -1; } int starpu_worker_get_by_devid(enum starpu_worker_archtype type, int devid) { unsigned nworkers = starpu_worker_get_count(); unsigned id; for (id = 0; id < nworkers; id++) if (starpu_worker_get_type(id) == type && starpu_worker_get_devid(id) == devid) return id; /* Not found */ return -1; } int starpu_worker_get_devids(enum starpu_worker_archtype type, int *devids, int num) { unsigned nworkers = starpu_worker_get_count(); int workerids[nworkers]; unsigned ndevice_workers = starpu_worker_get_ids_by_type(type, workerids, nworkers); unsigned ndevids = 0; if(ndevice_workers > 0) { unsigned id, devid; int cnt = 0; unsigned found = 0; for(id = 0; id < ndevice_workers; id++) { int curr_devid; curr_devid = _starpu_config.workers[workerids[id]].devid; for(devid = 0; devid < ndevids; devid++) { if(curr_devid == devids[devid]) { found = 1; break; } } if(!found) { devids[ndevids++] = curr_devid; cnt++; } else found = 0; if(cnt == num) break; } } return ndevids; } unsigned starpu_worker_type_can_execute_task(enum starpu_worker_archtype worker_type, const struct starpu_task *task) { return (STARPU_WORKER_TO_MASK(worker_type) & task->where) != 0; } void starpu_worker_get_name(int id, char *dst, size_t maxlen) { char *name = _starpu_config.workers[id].name; snprintf(dst, maxlen, "%s", name); } int starpu_worker_get_bindid(int workerid) { return _starpu_config.workers[workerid].bindid; } int starpu_bindid_get_workerids(int bindid, int **workerids) { if (bindid >= (int) _starpu_config.nbindid) return 0; *workerids = _starpu_config.bindid_workers[bindid].workerids; return _starpu_config.bindid_workers[bindid].nworkers; } int starpu_worker_get_stream_workerids(unsigned devid, int *workerids, enum starpu_worker_archtype type) { unsigned nworkers = starpu_worker_get_count(); int nw = 0; unsigned id; for (id = 0; id < nworkers; id++) { if (_starpu_config.workers[id].devid == devid && (type == STARPU_ANY_WORKER || _starpu_config.workers[id].arch == type)) workerids[nw++] = id; } return nw; } void starpu_worker_get_sched_condition(int workerid, starpu_pthread_mutex_t **sched_mutex, starpu_pthread_cond_t **sched_cond) { STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS); *sched_cond = &_starpu_config.workers[workerid].sched_cond; *sched_mutex = &_starpu_config.workers[workerid].sched_mutex; } /* returns 1 if the call results in initiating a transition of worker WORKERID * from sleeping state to awake * returns 0 if worker WORKERID is not sleeping or the wake-up transition * already has been initiated */ static int starpu_wakeup_worker_locked(int workerid, starpu_pthread_cond_t *sched_cond, starpu_pthread_mutex_t *mutex STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_SIMGRID starpu_pthread_queue_broadcast(&_starpu_simgrid_task_queue[workerid]); #endif if (_starpu_config.workers[workerid].status & STATUS_SLEEPING) { int ret = 0; if (_starpu_config.workers[workerid].state_keep_awake != 1) { _starpu_config.workers[workerid].state_keep_awake = 1; ret = 1; } /* cond_broadcast is required over cond_signal since * the condition is share for multiple purpose */ STARPU_PTHREAD_COND_BROADCAST(sched_cond); return ret; } else if (_starpu_config.workers[workerid].status & STATUS_SCHEDULING) { _starpu_config.workers[workerid].state_keep_awake = 1; return 0; } return 0; } static int starpu_wakeup_worker_no_relax(int workerid, starpu_pthread_cond_t *sched_cond, starpu_pthread_mutex_t *sched_mutex) { int success; STARPU_PTHREAD_MUTEX_LOCK_SCHED(sched_mutex); success = starpu_wakeup_worker_locked(workerid, sched_cond, sched_mutex); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(sched_mutex); return success; } int starpu_wake_worker_locked(int workerid) { starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond); return starpu_wakeup_worker_locked(workerid, sched_cond, sched_mutex); } int starpu_wake_worker_no_relax(int workerid) { starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(workerid, &sched_mutex, &sched_cond); return starpu_wakeup_worker_no_relax(workerid, sched_cond, sched_mutex); } int _starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize) { unsigned nworkers = starpu_worker_get_count(); int cnt = 0; unsigned id; for (id = 0; id < nworkers; id++) { if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type) { /* Perhaps the array is too small ? */ if (cnt >= maxsize) return cnt; workerids[cnt++] = id; } } return cnt; } int _starpu_worker_get_nids_ctx_free_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize) { unsigned nworkers = starpu_worker_get_count(); int cnt = 0; unsigned id; for (id = 0; id < nworkers; id++) { if (type == STARPU_ANY_WORKER || starpu_worker_get_type(id) == type) { /* Perhaps the array is too small ? */ if (cnt >= maxsize) return cnt; unsigned found = 0; int s; for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++) { if(_starpu_config.sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS) { struct starpu_worker_collection *workers = _starpu_config.sched_ctxs[s].workers; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); if(worker == id) { found = 1; break; } } if(found) break; } } if(!found) workerids[cnt++] = id; } } return cnt; } void starpu_get_version(int *major, int *minor, int *release) { *major = STARPU_MAJOR_VERSION; *minor = STARPU_MINOR_VERSION; *release = STARPU_RELEASE_VERSION; } unsigned starpu_worker_get_sched_ctx_list(int workerid, unsigned **sched_ctxs) { unsigned s = 0; unsigned nsched_ctxs = _starpu_worker_get_nsched_ctxs(workerid); _STARPU_MALLOC(*sched_ctxs, nsched_ctxs*sizeof(unsigned)); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_sched_ctx_elt *e = NULL; struct _starpu_sched_ctx_list_iterator list_it; _starpu_sched_ctx_list_iterator_init(worker->sched_ctx_list, &list_it); while (_starpu_sched_ctx_list_iterator_has_next(&list_it)) { e = _starpu_sched_ctx_list_iterator_get_next(&list_it); (*sched_ctxs)[s++] = e->sched_ctx; } return nsched_ctxs; } const char *starpu_worker_get_type_as_string(enum starpu_worker_archtype type) { STARPU_ASSERT(type < STARPU_NARCH); const char *ret = starpu_driver_info[type].name_upper; if (!ret) ret = "unknown"; return ret; } enum starpu_worker_archtype starpu_worker_get_type_from_string(const char *name) { enum starpu_worker_archtype type; for (type = 0; type < STARPU_NARCH; type++) { if (!strcmp(name, starpu_driver_info[type].name_upper)) return type; } return STARPU_UNKNOWN_WORKER; } const char *starpu_worker_get_type_as_env_var(enum starpu_worker_archtype type) { STARPU_ASSERT(type < STARPU_NARCH); const char *ret = starpu_driver_info[type].name_var; if (!ret) ret = "UNKNOWN"; return ret; } void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx) { STARPU_ASSERT(workerid < starpu_worker_get_count()); struct _starpu_worker *w = _starpu_get_worker_struct(workerid); w->stream_ctx = sched_ctx; } struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid) { if (stream_workerid >= starpu_worker_get_count()) return NULL; struct _starpu_worker *w = _starpu_get_worker_struct(stream_workerid); return w->stream_ctx; } unsigned starpu_worker_get_sched_ctx_id_stream(unsigned stream_workerid) { if (stream_workerid >= starpu_worker_get_count()) return STARPU_NMAX_SCHED_CTXS; struct _starpu_worker *w = _starpu_get_worker_struct(stream_workerid); return w->stream_ctx != NULL ? w->stream_ctx->id : STARPU_NMAX_SCHED_CTXS; } void starpu_worker_display_count(FILE *output, enum starpu_worker_archtype type) { int nworkers = starpu_worker_get_count_by_type(type); if (nworkers <= 0) { fprintf(output, "No %s worker\n", starpu_worker_get_type_as_string(type)); } else { int ids[nworkers]; starpu_worker_get_ids_by_type(type, ids, nworkers); fprintf(output, "%d %s worker%s\n", nworkers, starpu_worker_get_type_as_string(type), nworkers==1?"":"s"); } } void starpu_worker_display_names(FILE *output, enum starpu_worker_archtype type) { int nworkers; if (!starpu_driver_info[type].name_upper) return; nworkers = starpu_worker_get_count_by_type(type); if (nworkers <= 0) { fprintf(output, "No %s worker\n", starpu_worker_get_type_as_string(type)); } else { int i, ids[nworkers]; starpu_worker_get_ids_by_type(type, ids, nworkers); fprintf(output, "%d %s worker%s:\n", nworkers, starpu_worker_get_type_as_string(type), nworkers==1?"":"s"); for(i = 0; i < nworkers; i++) { char name[256]; starpu_worker_get_name(ids[i], name, 256); fprintf(output, "\t%s\n", name); } } } void starpu_worker_display_all(FILE *output) { enum starpu_worker_archtype type; for (type = 0; type < STARPU_NARCH; type++) starpu_worker_display_names(output, type); } void _starpu_worker_refuse_task(struct _starpu_worker *worker, struct starpu_task *task) { if (worker->pipeline_length || worker->arch == STARPU_OPENCL_WORKER) { int j; for (j = 0; j < worker->ntasks; j++) { const int j_mod = (j+worker->first_task)%STARPU_MAX_PIPELINE; if (task == worker->current_tasks[j_mod]) { worker->current_tasks[j_mod] = NULL; if (j == 0) { worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE; worker->current_task = NULL; _starpu_set_current_task(NULL); } break; } } STARPU_ASSERT(jntasks); } else { worker->current_task = NULL; _starpu_set_current_task(NULL); } worker->ntasks--; task->prefetched = 0; int res = _starpu_push_task_to_workers(task); STARPU_ASSERT_MSG(res == 0, "_starpu_push_task_to_workers() unexpectedly returned = %d\n", res); } int starpu_worker_sched_op_pending(void) { return _starpu_worker_sched_op_pending(); } #undef starpu_worker_relax_on void starpu_worker_relax_on(void) { _starpu_worker_relax_on(); } #undef starpu_worker_relax_off void starpu_worker_relax_off(void) { _starpu_worker_relax_off(); } #undef starpu_worker_get_relax_state int starpu_worker_get_relax_state(void) { return _starpu_worker_get_relax_state(); } #undef starpu_worker_lock void starpu_worker_lock(int workerid) { _starpu_worker_lock(workerid); } #undef starpu_worker_trylock int starpu_worker_trylock(int workerid) { return _starpu_worker_trylock(workerid); } #undef starpu_worker_unlock void starpu_worker_unlock(int workerid) { _starpu_worker_unlock(workerid); } #undef starpu_worker_lock_self void starpu_worker_lock_self(void) { _starpu_worker_lock_self(); } #undef starpu_worker_unlock_self void starpu_worker_unlock_self(void) { _starpu_worker_unlock_self(); } #undef starpu_wake_worker_relax int starpu_wake_worker_relax(int workerid) { return _starpu_wake_worker_relax(workerid); } #ifdef STARPU_HAVE_HWLOC hwloc_cpuset_t starpu_worker_get_hwloc_cpuset(int workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); return hwloc_bitmap_dup(worker->hwloc_cpu_set); } hwloc_obj_t starpu_worker_get_hwloc_obj(int workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); return worker->hwloc_obj; } #endif /* Light version of _starpu_wake_worker_relax, which, when possible, * speculatively sets keep_awake on the target worker without waiting that * worker to enter the relaxed state. */ int starpu_wake_worker_relax_light(int workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); int cur_workerid = starpu_worker_get_id(); if (workerid != cur_workerid) { starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); while (!worker->state_relax_refcnt) { /* Attempt a fast path if the worker is not really asleep */ if (_starpu_config.workers[workerid].status & STATUS_SCHEDULING) { _starpu_config.workers[workerid].state_keep_awake = 1; STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); starpu_worker_relax_off(); return 1; } STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); } } else { STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); } int ret = starpu_wake_worker_locked(workerid); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); if (workerid != cur_workerid) { starpu_worker_relax_off(); } return ret; } #ifdef STARPU_WORKER_CALLBACKS void starpu_worker_set_going_to_sleep_callback(void (*callback)(unsigned workerid)) { STARPU_ASSERT(_starpu_config.conf.callback_worker_going_to_sleep); _starpu_config.conf.callback_worker_going_to_sleep = callback; } void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid)) { STARPU_ASSERT(_starpu_config.conf.callback_worker_waking_up); _starpu_config.conf.callback_worker_waking_up = callback; } #endif enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type) { STARPU_ASSERT(type < STARPU_NARCH); enum starpu_node_kind kind = starpu_driver_info[type].memory_kind; STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type); return kind; } starpu-1.4.9+dfsg/src/core/workers.h000066400000000000000000001401451507764646700174010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __WORKERS_H__ #define __WORKERS_H__ /** \addtogroup workers */ /* @{ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_HAVE_HWLOC #include #endif #include #include #include #include #include #ifdef STARPU_USE_MPI_MASTER_SLAVE #include #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE #include #endif #include #include #include #pragma GCC visibility push(hidden) #define STARPU_MAX_PIPELINE 4 struct mc_cache_entry; struct _starpu_node { /* * used by memalloc.c */ /** This per-node RW-locks protect mc_list and memchunk_cache entries */ /* Note: handle header lock is always taken before this (normal add/remove case) */ struct _starpu_spinlock mc_lock; /** Potentially in use memory chunks. The beginning of the list is clean (home * node has a copy of the data, or the data is being transferred there), the * remainder of the list may not be clean. */ struct _starpu_mem_chunk_list mc_list; /** This is a shortcut inside the mc_list to the first potentially dirty MC. All * MC before this are clean, MC before this only *may* be clean. */ struct _starpu_mem_chunk *mc_dirty_head; /* TODO: introduce head of data to be evicted */ /** Number of elements in mc_list, number of elements in the clean part of * mc_list plus the non-automatically allocated elements (which are thus always * considered as clean) */ unsigned mc_nb, mc_clean_nb; struct mc_cache_entry *mc_cache; int mc_cache_nb; starpu_ssize_t mc_cache_size; /** Whether some thread is currently tidying this node */ unsigned tidying; /** Whether some thread is currently reclaiming memory for this node */ unsigned reclaiming; /** This records that we tried to prefetch data but went out of memory, so will * probably fail again to prefetch data, thus not trace each and every * attempt. */ volatile int prefetch_out_of_memory; /** Whether this memory node can evict data to another node */ unsigned evictable; /* * used by data_request.c */ /** requests that have not been treated at all */ struct _starpu_data_request_prio_list data_requests[STARPU_MAXNODES][2]; struct _starpu_data_request_prio_list prefetch_requests[STARPU_MAXNODES][2]; /* Contains both task_prefetch and prefetch */ struct _starpu_data_request_prio_list idle_requests[STARPU_MAXNODES][2]; starpu_pthread_mutex_t data_requests_list_mutex[STARPU_MAXNODES][2]; /** requests that are not terminated (eg. async transfers) */ struct _starpu_data_request_prio_list data_requests_pending[STARPU_MAXNODES][2]; unsigned data_requests_npending[STARPU_MAXNODES][2]; starpu_pthread_mutex_t data_requests_pending_list_mutex[STARPU_MAXNODES][2]; /* * used by malloc.c */ int malloc_on_node_default_flags; /** One list of chunks per node */ struct _starpu_chunk_list chunks; /** Number of completely free chunks */ int nfreechunks; /** This protects chunks and nfreechunks */ starpu_pthread_mutex_t chunk_mutex; /* * used by memory_manager.c */ size_t global_size; size_t used_size; /* This is used as an optimization to avoid to wake up allocating threads for * each and every deallocation, only to find that there is still not enough * room. */ /* Minimum amount being waited for */ size_t waiting_size; starpu_pthread_mutex_t lock_nodes; starpu_pthread_cond_t cond_nodes; /** Keep this last, to make sure to separate node data in separate cache lines. */ char padding[STARPU_CACHELINE_SIZE]; }; struct _starpu_ctx_change_list; /** This is initialized by _starpu_worker_init() */ LIST_TYPE(_starpu_worker, struct _starpu_machine_config *config; starpu_pthread_mutex_t mutex; enum starpu_worker_archtype arch; /**< what is the type of worker ? */ uint32_t worker_mask; /**< what is the type of worker ? */ struct starpu_perfmodel_arch perf_arch; /**< in case there are different models of the same arch */ starpu_pthread_t worker_thread; /**< the thread which runs the worker */ unsigned devid; /**< which cpu/gpu/etc is controlled by the worker ? */ unsigned devnum; /**< number of the device controlled by the worker, i.e. ranked from 0 and contiguous */ unsigned subworkerid; /**< which sub-worker this one is for the cpu/gpu */ int bindid; /**< which cpu is the driver bound to ? (logical index) */ int workerid; /**< uniquely identify the worker among all processing units types */ int combined_workerid; /**< combined worker currently using this worker */ int current_rank; /**< current rank in case the worker is used in a parallel fashion */ int worker_size; /**< size of the worker in case we use a combined worker */ starpu_pthread_cond_t started_cond; /**< indicate when the worker is ready */ starpu_pthread_cond_t ready_cond; /**< indicate when the worker is ready */ unsigned memory_node; /**< which memory node is the worker associated with ? */ unsigned numa_memory_node; /**< which numa memory node is the worker associated with? (logical index) */ /** * condition variable used for passive waiting operations on worker * STARPU_PTHREAD_COND_BROADCAST must be used instead of STARPU_PTHREAD_COND_SIGNAL, * since the condition is shared for multiple purpose */ starpu_pthread_cond_t sched_cond; starpu_pthread_mutex_t sched_mutex; /**< mutex protecting sched_cond */ unsigned state_relax_refcnt; /**< mark scheduling sections where other workers can safely access the worker state */ #ifdef STARPU_SPINLOCK_CHECK const char *relax_on_file; int relax_on_line; const char *relax_on_func; const char *relax_off_file; int relax_off_line; const char *relax_off_func; #endif unsigned state_sched_op_pending; /**< a task pop is ongoing even though sched_mutex may temporarily be unlocked */ unsigned state_changing_ctx_waiting; /**< a thread is waiting for operations such as pop to complete before acquiring sched_mutex and modifying the worker ctx*/ unsigned state_changing_ctx_notice; /**< the worker ctx is about to change or being changed, wait for flag to be cleared before starting new scheduling operations */ unsigned state_blocked_in_parallel; /**< worker is currently blocked on a parallel section */ unsigned state_blocked_in_parallel_observed; /**< the blocked state of the worker has been observed by another worker during a relaxed section */ unsigned state_block_in_parallel_req; /**< a request for state transition from unblocked to blocked is pending */ unsigned state_block_in_parallel_ack; /**< a block request has been honored */ unsigned state_unblock_in_parallel_req; /**< a request for state transition from blocked to unblocked is pending */ unsigned state_unblock_in_parallel_ack; /**< an unblock request has been honored */ /** * cumulative blocking depth * - =0 worker unblocked * - >0 worker blocked * - transition from 0 to 1 triggers a block_req * - transition from 1 to 0 triggers a unblock_req */ unsigned block_in_parallel_ref_count; starpu_pthread_t thread_changing_ctx; /**< thread currently changing a sched_ctx containing the worker */ /** list of deferred context changes * * when the current thread is a worker, _and_ this worker is in a * scheduling operation, new ctx changes are queued to this list for * subsequent processing once worker completes the ongoing scheduling * operation */ struct _starpu_ctx_change_list ctx_change_list; struct starpu_task_prio_list local_tasks; /**< this queue contains tasks that have been explicitly submitted to that queue */ struct starpu_task **local_ordered_tasks; /**< this queue contains tasks that have been explicitly submitted to that queue with an explicit order */ unsigned local_ordered_tasks_size; /**< this records the size of local_ordered_tasks */ unsigned current_ordered_task; /**< this records the index (within local_ordered_tasks) of the next ordered task to be executed */ unsigned current_ordered_task_order; /**< this records the order of the next ordered task to be executed */ struct starpu_task *current_task; /**< task currently executed by this worker (non-pipelined version) */ struct starpu_task *current_tasks[STARPU_MAX_PIPELINE]; /**< tasks currently executed by this worker (pipelined version) */ #ifdef STARPU_SIMGRID starpu_pthread_wait_t wait; #endif struct timespec cl_start; /**< Codelet start time of the task currently running */ struct timespec cl_expend; /**< Codelet expected end time of the task currently running */ struct timespec cl_end; /**< Codelet end time of the last task running */ unsigned char first_task; /**< Index of first task in the pipeline */ unsigned char ntasks; /**< number of tasks in the pipeline */ unsigned char pipeline_length; /**< number of tasks to be put in the pipeline */ unsigned char pipeline_stuck; /**< whether a task prevents us from pipelining */ struct _starpu_worker_set *set; /**< in case this worker belongs to a worker set */ struct _starpu_worker_set *driver_worker_set; /**< in case this worker belongs to a driver worker set */ unsigned worker_is_running; unsigned worker_is_initialized; unsigned wait_for_worker_initialization; enum _starpu_worker_status status; /**< what is the worker doing now ? (eg. CALLBACK) */ unsigned state_keep_awake; /**< !0 if a task has been pushed to the worker and the task has not yet been seen by the worker, the worker should no go to sleep before processing this task*/ char name[128]; char short_name[32]; unsigned run_by_starpu; /**< Is this run by StarPU or directly by the application ? */ const struct _starpu_driver_ops *driver_ops; struct _starpu_sched_ctx_list *sched_ctx_list; int tmp_sched_ctx; unsigned nsched_ctxs; /**< the no of contexts a worker belongs to*/ struct _starpu_barrier_counter tasks_barrier; /**< wait for the tasks submitted */ unsigned has_prev_init; /**< had already been inited in another ctx */ unsigned removed_from_ctx[STARPU_NMAX_SCHED_CTXS+1]; unsigned spinning_backoff ; /**< number of cycles to pause when spinning */ unsigned nb_buffers_transferred; /**< number of piece of data already send to worker */ unsigned nb_buffers_totransfer; /**< number of piece of data already send to worker */ struct starpu_task *task_transferring; /**< The buffers of this task are being sent */ /** * indicate whether the workers shares tasks lists with other workers * in this case when removing him from a context it disappears instantly */ unsigned shares_tasks_lists[STARPU_NMAX_SCHED_CTXS+1]; unsigned poped_in_ctx[STARPU_NMAX_SCHED_CTXS+1]; /**< boolean to chose the next ctx a worker will pop into */ /** * boolean indicating at which moment we checked all ctxs and change phase for the booleab poped_in_ctx * one for each of the 2 priorities */ unsigned reverse_phase[2]; unsigned pop_ctx_priority; /**< indicate which priority of ctx is currently active: the values are 0 or 1*/ unsigned is_slave_somewhere; /**< bool to indicate if the worker is slave in a ctx */ struct _starpu_sched_ctx *stream_ctx; #ifdef __GLIBC__ cpu_set_t cpu_set; #endif /* __GLIBC__ */ #ifdef STARPU_HAVE_HWLOC hwloc_bitmap_t hwloc_cpu_set; hwloc_obj_t hwloc_obj; #endif struct starpu_profiling_worker_info profiling_info; /* TODO: rather use rwlock? */ starpu_pthread_mutex_t profiling_info_mutex; /* In case the worker is still sleeping when the user request profiling info, * we need to account for the time elapsed while sleeping. */ unsigned profiling_registered_start[STATUS_INDEX_NR]; struct timespec profiling_registered_start_date[STATUS_INDEX_NR]; enum _starpu_worker_status profiling_status; struct timespec profiling_status_start_date; struct starpu_perf_counter_sample perf_counter_sample; int64_t __w_total_executed__value; double __w_cumul_execution_time__value; int enable_knob; int bindid_requested; /** Keep this last, to make sure to separate worker data in separate cache lines. */ char padding[STARPU_CACHELINE_SIZE]; ); struct _starpu_combined_worker { struct starpu_perfmodel_arch perf_arch; /**< in case there are different models of the same arch */ uint32_t worker_mask; /**< what is the type of workers ? */ int worker_size; unsigned memory_node; /**< which memory node is associated that worker to ? */ int combined_workerid[STARPU_NMAXWORKERS]; #ifdef STARPU_USE_MP int count; starpu_pthread_mutex_t count_mutex; #endif #ifdef __GLIBC__ cpu_set_t cpu_set; #endif /* __GLIBC__ */ #ifdef STARPU_HAVE_HWLOC hwloc_bitmap_t hwloc_cpu_set; #endif /** Keep this last, to make sure to separate worker data in separate cache lines. */ char padding[STARPU_CACHELINE_SIZE]; }; /** * in case a single CPU worker may control multiple * accelerators */ struct _starpu_worker_set { starpu_pthread_mutex_t mutex; starpu_pthread_t worker_thread; /**< the thread which runs the worker */ unsigned nworkers; unsigned started; /**< Only one thread for the whole set */ void *retval; struct _starpu_worker *workers; starpu_pthread_cond_t ready_cond; /**< indicate when the set is ready */ unsigned set_is_initialized; unsigned wait_for_set_initialization; }; struct _starpu_machine_topology { /** Total number of workers. */ unsigned nworkers; /** Total number of combined workers. */ unsigned ncombinedworkers; unsigned nsched_ctxs; #ifdef STARPU_HAVE_HWLOC /** Topology as detected by hwloc. */ hwloc_topology_t hwtopology; hwloc_bitmap_t log_cpuset; hwloc_bitmap_t log_coreset; #endif /** custom hwloc tree*/ struct starpu_tree *tree; /** Total number of PUs (i.e. threads), as detected by the topology code. May * be different from the actual number of CPU workers. */ unsigned nhwpus; /** First PU to be used. May be different from 0 for administrative reasons * (e.g. from job scheduler). */ unsigned firstusedpu; /** Number of PUs (i.e. threads) to be used. May be different from nhwpus for * administrative reasons (e.g. from job scheduler). */ unsigned nusedpus; /** Total number of devices, as detected. May be different from the * actual number of devices run by StarPU. */ unsigned nhwdevices[STARPU_NARCH]; /** Total number of worker for each device, as detected. May be different from the * actual number of workers run by StarPU. */ unsigned nhwworker[STARPU_NARCH][STARPU_NMAXDEVS]; /** Actual number of devices used by StarPU. */ unsigned ndevices[STARPU_NARCH]; /** Number of worker per device */ unsigned nworker[STARPU_NARCH][STARPU_NMAXDEVS]; /** Device ids actually used */ int devid[STARPU_NARCH][STARPU_NMAXDEVS]; /** Whether we should have one thread per stream */ int cuda_th_per_stream; /** Whether we should have one thread per device */ int cuda_th_per_dev; /** Whether we should have one thread per stream (for hip) */ int hip_th_per_stream; /** Whether we should have one thread per device (for hip) */ int hip_th_per_dev; /** Indicates the successive logical PU identifier that should be used * to bind the workers. It is either filled according to the * user's explicit parameters (from starpu_conf) or according * to the STARPU_WORKERS_CPUID env. variable. Otherwise, a * round-robin policy is used to distributed the workers over * the cores. */ unsigned workers_bindid[STARPU_NMAXWORKERS]; /** Indicates how many different values there are in * _starpu_machine_topology::workers_bindid, i.e. the length of the * cycle of the values there. */ unsigned workers_nbindid; /** Indicates the successive device identifiers that should be * used by the driver. It is either filled according to * the user's explicit parameters (from starpu_conf) or * according to the corresponding env. variable. * Otherwise, they are taken in ID order. */ unsigned workers_devid[STARPU_NARCH][STARPU_NMAXWORKERS]; }; struct _starpu_machine_config { struct _starpu_machine_topology topology; #ifdef STARPU_HAVE_HWLOC int cpu_depth; int pu_depth; #endif /** Where to bind next worker ? */ int current_bindid; char currently_bound[STARPU_NMAXWORKERS]; char currently_shared[STARPU_NMAXWORKERS]; /** Which next device will we use for each arch? */ int current_devid[STARPU_NARCH]; /** Which TCPIP do we use? */ int current_tcpip_deviceid; /** Memory node for different worker types, if only one */ int arch_nodeid [STARPU_NARCH]; /** Separate out previous variables from per-worker data. */ char padding1[STARPU_CACHELINE_SIZE]; /** Basic workers : each of this worker is running its own driver and * can be combined with other basic workers. */ struct _starpu_worker workers[STARPU_NMAXWORKERS]; /** Memory nodes */ struct _starpu_node nodes[STARPU_MAXNODES]; /** Combined workers: these worker are a combination of basic workers * that can run parallel tasks together. */ struct _starpu_combined_worker combined_workers[STARPU_NMAX_COMBINEDWORKERS]; starpu_pthread_mutex_t submitted_mutex; /** Separate out previous mutex from the rest of the data. */ char padding2[STARPU_CACHELINE_SIZE]; /** Translation table from bindid to worker IDs */ struct { int *workerids; unsigned nworkers; /**< size of workerids */ } *bindid_workers; unsigned nbindid; /**< size of bindid_workers */ /** This bitmask indicates which kinds of worker are available. For * instance it is possible to test if there is a CUDA worker with * the result of (worker_mask & STARPU_CUDA). */ uint32_t worker_mask; /** either the user given configuration passed to starpu_init or a default configuration */ struct starpu_conf conf; /** this flag is set until the runtime is stopped */ unsigned running; int disable_kernels; /** Number of calls to starpu_pause() - calls to starpu_resume(). When >0, * StarPU should pause. */ int pause_depth; /** all the sched ctx of the current instance of starpu */ struct _starpu_sched_ctx sched_ctxs[STARPU_NMAX_SCHED_CTXS+1]; /** this flag is set until the application is finished submitting tasks */ unsigned submitting; int watchdog_ok; /** When >0, StarPU should stop performance counters collection. */ int perf_counter_pause_depth; }; struct _starpu_machine_topology; /** Provides information for a device driver */ struct _starpu_driver_info { const char *name_upper; /**< Name of worker type in upper case */ const char *name_var; /**< Name of worker type for environment variables */ const char *name_lower; /**< Name of worker type in lower case */ enum starpu_node_kind memory_kind; /**< Kind of memory in device */ double alpha; /**< Typical relative speed compared to a CPU core */ unsigned wait_for_worker_initialization; /**< Whether we should make the core wait for worker initialization before starting other workers initialization */ const struct _starpu_driver_ops *driver_ops; /**< optional: Driver operations */ void *(*run_worker)(void *); /**< Actually run the worker */ void (*init_worker_binding)(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); /**< Setup worker CPU binding */ void (*init_worker_memory)(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); /**< Setup worker memory node */ #ifdef STARPU_HAVE_HWLOC hwloc_obj_t (*get_hwloc_obj)(hwloc_topology_t topology, int devid); /**< optional: Return the hwloc object corresponding to this device */ #endif }; /** Device driver information, indexed by enum starpu_worker_archtype */ extern struct _starpu_driver_info starpu_driver_info[STARPU_NARCH]; void _starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct _starpu_driver_info *info); /** Provides information for a memory node driver */ struct _starpu_memory_driver_info { const char *name_upper; /**< Name of memory in upper case */ enum starpu_worker_archtype worker_archtype; /**< Kind of device */ const struct _starpu_node_ops *ops; /**< Memory node operations */ }; /** Memory driver information, indexed by enum starpu_node_kind */ extern struct _starpu_memory_driver_info starpu_memory_driver_info[STARPU_MAX_RAM+1]; void _starpu_memory_driver_info_register(enum starpu_node_kind kind, const struct _starpu_memory_driver_info *info); extern int _starpu_worker_parallel_blocks; extern struct _starpu_machine_config _starpu_config; extern int _starpu_keys_initialized; extern starpu_pthread_key_t _starpu_worker_key; extern starpu_pthread_key_t _starpu_worker_set_key; void _starpu_set_catch_signals(int do_catch_signal); /** Three functions to manage argv, argc */ void _starpu_set_argc_argv(int *argc, char ***argv); int *_starpu_get_argc(); char ***_starpu_get_argv(); /** Fill conf with environment variables */ void _starpu_conf_check_environment(struct starpu_conf *conf); /** Called by the driver when it is ready to pause */ void _starpu_may_pause(void); /** Has starpu_shutdown already been called ? */ static inline unsigned _starpu_machine_is_running(void) { unsigned ret; /* running is just protected by a memory barrier */ STARPU_RMB(); ANNOTATE_HAPPENS_AFTER(&_starpu_config.running); ret = _starpu_config.running; ANNOTATE_HAPPENS_BEFORE(&_starpu_config.running); return ret; } /** initialise a worker */ void _starpu_worker_init(struct _starpu_worker *workerarg, struct _starpu_machine_config *pconfig); /** Check if there is a worker that may execute the task. */ uint32_t _starpu_worker_exists(struct starpu_task *) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** Is there a worker that can execute MS code ? */ uint32_t _starpu_can_submit_ms_task(void); /** Is there a worker that can execute CUDA code ? */ uint32_t _starpu_can_submit_cuda_task(void); /** Is there a worker that can execute HIP code ? */ uint32_t _starpu_can_submit_hip_task(void); /** Is there a worker that can execute CPU code ? */ uint32_t _starpu_can_submit_cpu_task(void); /** Is there a worker that can execute OpenCL code ? */ uint32_t _starpu_can_submit_opencl_task(void); /** Check whether there is anything that the worker should do instead of * sleeping (waiting on something to happen). */ unsigned _starpu_worker_can_block(unsigned memnode, struct _starpu_worker *worker); /** This function initializes the current driver for the given worker */ void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync); /** This function initializes the current thread for the given worker */ void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync); static inline unsigned _starpu_worker_get_count(void) { return _starpu_config.topology.nworkers; } #define starpu_worker_get_count _starpu_worker_get_count /** The _starpu_worker structure describes all the state of a StarPU worker. * This function sets the pthread key which stores a pointer to this structure. * */ static inline void _starpu_set_local_worker_key(struct _starpu_worker *worker) { STARPU_ASSERT(_starpu_keys_initialized); STARPU_PTHREAD_SETSPECIFIC(_starpu_worker_key, worker); } /** Returns the _starpu_worker structure that describes the state of the * current worker. */ static inline struct _starpu_worker *_starpu_get_local_worker_key(void) { if (!_starpu_keys_initialized) return NULL; return (struct _starpu_worker *) STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_key); } /** The _starpu_worker_set structure describes all the state of a StarPU worker_set. * This function sets the pthread key which stores a pointer to this structure. * */ static inline void _starpu_set_local_worker_set_key(struct _starpu_worker_set *worker) { STARPU_ASSERT(_starpu_keys_initialized); STARPU_PTHREAD_SETSPECIFIC(_starpu_worker_set_key, worker); } /** Returns the _starpu_worker_set structure that describes the state of the * current worker_set. */ static inline struct _starpu_worker_set *_starpu_get_local_worker_set_key(void) { if (!_starpu_keys_initialized) return NULL; return (struct _starpu_worker_set *) STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_set_key); } /** Returns the _starpu_worker structure that describes the state of the * specified worker. */ static inline struct _starpu_worker *_starpu_get_worker_struct(unsigned id) { STARPU_ASSERT(id < STARPU_NMAXWORKERS); return &_starpu_config.workers[id]; } /** Returns the _starpu_node structure that describes the state of the * specified node. */ static inline struct _starpu_node *_starpu_get_node_struct(unsigned id) { STARPU_ASSERT(id < STARPU_MAXNODES); return &_starpu_config.nodes[id]; } /** Returns the starpu_sched_ctx structure that describes the state of the * specified ctx */ static inline struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id) { return (id > STARPU_NMAX_SCHED_CTXS) ? NULL : &_starpu_config.sched_ctxs[id]; } struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id); /** Returns the structure that describes the overall machine configuration (eg. * all workers and topology). */ static inline struct _starpu_machine_config *_starpu_get_machine_config(void) { return &_starpu_config; } /** Return whether kernels should be run (<=0) or not (>0) */ static inline int _starpu_get_disable_kernels(void) { return _starpu_config.disable_kernels; } /** Retrieve the status which indicates what the worker is currently doing. */ static inline enum _starpu_worker_status _starpu_worker_get_status(int workerid) { return _starpu_config.workers[workerid].status; } /** Change the status of the worker which indicates what the worker is currently * doing (eg. executing a callback). */ static inline void _starpu_worker_add_status(int workerid, enum _starpu_worker_status_index status) { STARPU_ASSERT(!(_starpu_config.workers[workerid].status & (1 << status))); if (starpu_profiling_status_get()) _starpu_worker_start_state(workerid, status, NULL); _starpu_config.workers[workerid].status |= (1 << status); } /** Change the status of the worker which indicates what the worker is currently * doing (eg. executing a callback). */ static inline void _starpu_worker_clear_status(int workerid, enum _starpu_worker_status_index status) { STARPU_ASSERT((_starpu_config.workers[workerid].status & (1 << status))); if (starpu_profiling_status_get()) _starpu_worker_stop_state(workerid, status, NULL); _starpu_config.workers[workerid].status &= ~(1 << status); } /** We keep an initial sched ctx which might be used in case no other ctx is available */ static inline struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void) { return &_starpu_config.sched_ctxs[STARPU_GLOBAL_SCHED_CTX]; } int _starpu_worker_get_nids_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); /** * returns workers not belonging to any context, be careful no mutex is used, * the list might not be updated */ int _starpu_worker_get_nids_ctx_free_by_type(enum starpu_worker_archtype type, int *workerids, int maxsize); static inline unsigned _starpu_worker_mutex_is_sched_mutex(int workerid, starpu_pthread_mutex_t *mutex) { struct _starpu_worker *w = _starpu_get_worker_struct(workerid); return &w->sched_mutex == mutex; } static inline int _starpu_worker_get_nsched_ctxs(int workerid) { return _starpu_config.workers[workerid].nsched_ctxs; } /** Get the total number of sched_ctxs created till now */ static inline unsigned _starpu_get_nsched_ctxs(void) { /* topology.nsched_ctxs may be increased asynchronously in sched_ctx_create */ STARPU_RMB(); return _starpu_config.topology.nsched_ctxs; } /** Inlined version when building the core. */ static inline int _starpu_worker_get_id(void) { struct _starpu_worker * worker; worker = _starpu_get_local_worker_key(); if (worker) { return worker->workerid; } else { /* there is no worker associated to that thread, perhaps it is * a thread from the application or this is some SPU worker */ return -1; } } #define starpu_worker_get_id _starpu_worker_get_id /** Similar behaviour to starpu_worker_get_id() but fails when called from outside a worker */ /** This returns an unsigned object on purpose, so that the caller is sure to get a positive value */ static inline unsigned __starpu_worker_get_id_check(const char *f, int l) { (void) l; (void) f; int id = starpu_worker_get_id(); STARPU_ASSERT_MSG(id>=0, "%s:%d Cannot be called from outside a worker\n", f, l); return id; } #define _starpu_worker_get_id_check(f,l) __starpu_worker_get_id_check(f,l) void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx); struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid); /** Send a request to the worker to block, before a parallel task is about to * begin. * * Must be called with worker's sched_mutex held. */ static inline void _starpu_worker_request_blocking_in_parallel(struct _starpu_worker * const worker) { _starpu_worker_parallel_blocks = 1; /* flush pending requests to start on a fresh transaction epoch */ while (worker->state_unblock_in_parallel_req) STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); /* announce blocking intent */ STARPU_ASSERT(worker->block_in_parallel_ref_count < UINT_MAX); worker->block_in_parallel_ref_count++; if (worker->block_in_parallel_ref_count == 1) { /* only the transition from 0 to 1 triggers the block_in_parallel_req */ STARPU_ASSERT(!worker->state_blocked_in_parallel); STARPU_ASSERT(!worker->state_block_in_parallel_req); STARPU_ASSERT(!worker->state_block_in_parallel_ack); STARPU_ASSERT(!worker->state_unblock_in_parallel_req); STARPU_ASSERT(!worker->state_unblock_in_parallel_ack); /* trigger the block_in_parallel_req */ worker->state_block_in_parallel_req = 1; STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); #ifdef STARPU_SIMGRID starpu_pthread_queue_broadcast(&_starpu_simgrid_task_queue[worker->workerid]); #endif /* wait for block_in_parallel_req to be processed */ while (!worker->state_block_in_parallel_ack) STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); STARPU_ASSERT(worker->block_in_parallel_ref_count >= 1); STARPU_ASSERT(worker->state_block_in_parallel_req); STARPU_ASSERT(worker->state_blocked_in_parallel); /* reset block_in_parallel_req state flags */ worker->state_block_in_parallel_req = 0; worker->state_block_in_parallel_ack = 0; /* broadcast block_in_parallel_req state flags reset */ STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); } } /** Send a request to the worker to unblock, after a parallel task is complete. * * Must be called with worker's sched_mutex held. */ static inline void _starpu_worker_request_unblocking_in_parallel(struct _starpu_worker * const worker) { /* flush pending requests to start on a fresh transaction epoch */ while (worker->state_block_in_parallel_req) STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); /* unblocking may be requested unconditionally * thus, check is unblocking is really needed */ if (worker->state_blocked_in_parallel) { if (worker->block_in_parallel_ref_count == 1) { /* only the transition from 1 to 0 triggers the unblock_in_parallel_req */ STARPU_ASSERT(!worker->state_block_in_parallel_req); STARPU_ASSERT(!worker->state_block_in_parallel_ack); STARPU_ASSERT(!worker->state_unblock_in_parallel_req); STARPU_ASSERT(!worker->state_unblock_in_parallel_ack); /* trigger the unblock_in_parallel_req */ worker->state_unblock_in_parallel_req = 1; STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); /* wait for the unblock_in_parallel_req to be processed */ while (!worker->state_unblock_in_parallel_ack) STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); STARPU_ASSERT(worker->state_unblock_in_parallel_req); STARPU_ASSERT(!worker->state_blocked_in_parallel); /* reset unblock_in_parallel_req state flags */ worker->state_unblock_in_parallel_req = 0; worker->state_unblock_in_parallel_ack = 0; /* broadcast unblock_in_parallel_req state flags reset */ STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); } /* announce unblocking complete */ STARPU_ASSERT(worker->block_in_parallel_ref_count > 0); worker->block_in_parallel_ref_count--; } } /** Called by the the worker to process incoming requests to block or unblock on * parallel task boundaries. * * Must be called with worker's sched_mutex held. */ static inline void _starpu_worker_process_block_in_parallel_requests(struct _starpu_worker * const worker) { while (worker->state_block_in_parallel_req) { STARPU_ASSERT(!worker->state_blocked_in_parallel); STARPU_ASSERT(!worker->state_block_in_parallel_ack); STARPU_ASSERT(!worker->state_unblock_in_parallel_req); STARPU_ASSERT(!worker->state_unblock_in_parallel_ack); STARPU_ASSERT(worker->block_in_parallel_ref_count > 0); /* enter effective blocked state */ worker->state_blocked_in_parallel = 1; /* notify block_in_parallel_req processing */ worker->state_block_in_parallel_ack = 1; STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); /* block */ while (!worker->state_unblock_in_parallel_req) STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); STARPU_ASSERT(worker->state_blocked_in_parallel); STARPU_ASSERT(!worker->state_block_in_parallel_req); STARPU_ASSERT(!worker->state_block_in_parallel_ack); STARPU_ASSERT(!worker->state_unblock_in_parallel_ack); STARPU_ASSERT(worker->block_in_parallel_ref_count > 0); /* leave effective blocked state */ worker->state_blocked_in_parallel = 0; /* notify unblock_in_parallel_req processing */ worker->state_unblock_in_parallel_ack = 1; STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); } } #ifdef STARPU_SPINLOCK_CHECK #define _starpu_worker_enter_sched_op(worker) __starpu_worker_enter_sched_op((worker), __FILE__, __LINE__, __starpu_func__) static inline void __starpu_worker_enter_sched_op(struct _starpu_worker * const worker, const char*file, int line, const char* func) #else /** Mark the beginning of a scheduling operation by the worker. No worker * blocking operations on parallel tasks and no scheduling context change * operations must be performed on contexts containing the worker, on * contexts about to add the worker and on contexts about to remove the * worker, while the scheduling operation is in process. The sched mutex * of the worker may only be acquired permanently by another thread when * no scheduling operation is in process, or when a scheduling operation * is in process _and_ worker->state_relax_refcnt!=0. If a * scheduling operation is in process _and_ * worker->state_relax_refcnt==0, a thread other than the worker * must wait on condition worker->sched_cond for * worker->state_relax_refcnt!=0 to become true, before acquiring * the worker sched mutex permanently. * * Must be called with worker's sched_mutex held. */ static inline void _starpu_worker_enter_sched_op(struct _starpu_worker * const worker) #endif { STARPU_ASSERT(!worker->state_sched_op_pending); if (!worker->state_blocked_in_parallel_observed) { /* process pending block requests before entering a sched_op region */ _starpu_worker_process_block_in_parallel_requests(worker); while (worker->state_changing_ctx_notice) { STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); /* new block requests may have been triggered during the wait, * need to check again */ _starpu_worker_process_block_in_parallel_requests(worker); } } else { /* if someone observed the worker state since the last call, postpone block request * processing for one sched_op turn more, because the observer will not have seen * new block requests between its observation and now. * * however, the worker still has to wait for context change operations to complete * before entering sched_op again*/ while (worker->state_changing_ctx_notice) { STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); } } /* no block request and no ctx change ahead, * enter sched_op */ worker->state_sched_op_pending = 1; worker->state_blocked_in_parallel_observed = 0; worker->state_relax_refcnt = 0; #ifdef STARPU_SPINLOCK_CHECK worker->relax_on_file = file; worker->relax_on_line = line; worker->relax_on_func = func; #endif } void _starpu_worker_apply_deferred_ctx_changes(void); #ifdef STARPU_SPINLOCK_CHECK #define _starpu_worker_leave_sched_op(worker) __starpu_worker_leave_sched_op((worker), __FILE__, __LINE__, __starpu_func__) static inline void __starpu_worker_leave_sched_op(struct _starpu_worker * const worker, const char*file, int line, const char* func) #else /** Mark the end of a scheduling operation by the worker. * * Must be called with worker's sched_mutex held. */ static inline void _starpu_worker_leave_sched_op(struct _starpu_worker * const worker) #endif { STARPU_ASSERT(worker->state_sched_op_pending); worker->state_relax_refcnt = 1; #ifdef STARPU_SPINLOCK_CHECK worker->relax_off_file = file; worker->relax_off_line = line; worker->relax_off_func = func; #endif worker->state_sched_op_pending = 0; STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); _starpu_worker_apply_deferred_ctx_changes(); } static inline int _starpu_worker_sched_op_pending(void) { int workerid = starpu_worker_get_id(); if (workerid == -1) return 0; struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); return worker->state_sched_op_pending; } /** Must be called before altering a context related to the worker * whether about adding the worker to a context, removing it from a * context or modifying the set of workers of a context of which the * worker is a member, to mark the beginning of a context change * operation. The sched mutex of the worker must be held before calling * this function. * * Must be called with worker's sched_mutex held. */ static inline void _starpu_worker_enter_changing_ctx_op(struct _starpu_worker * const worker) { STARPU_ASSERT(!starpu_pthread_equal(worker->thread_changing_ctx, starpu_pthread_self())); /* flush pending requests to start on a fresh transaction epoch */ while (worker->state_changing_ctx_notice) STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); /* announce changing_ctx intent * * - an already started sched_op is allowed to complete * - no new sched_op may be started */ worker->state_changing_ctx_notice = 1; worker->thread_changing_ctx = starpu_pthread_self(); /* allow for an already started sched_op to complete */ if (worker->state_sched_op_pending) { /* request sched_op to broadcast when way is cleared */ worker->state_changing_ctx_waiting = 1; /* wait for sched_op completion */ STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); #ifdef STARPU_SIMGRID starpu_pthread_queue_broadcast(&_starpu_simgrid_task_queue[worker->workerid]); #endif do { STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); } while (worker->state_sched_op_pending); /* reset flag so other sched_ops won't have to broadcast state */ worker->state_changing_ctx_waiting = 0; } } /** Mark the end of a context change operation. * * Must be called with worker's sched_mutex held. */ static inline void _starpu_worker_leave_changing_ctx_op(struct _starpu_worker * const worker) { worker->thread_changing_ctx = (starpu_pthread_t)0; worker->state_changing_ctx_notice = 0; STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); } #ifdef STARPU_SPINLOCK_CHECK #define _starpu_worker_relax_on() __starpu_worker_relax_on(__FILE__, __LINE__, __starpu_func__) static inline void __starpu_worker_relax_on(const char*file, int line, const char* func) #else /** Temporarily allow other worker to access current worker state, when still scheduling, * but the scheduling has not yet been made or is already done */ static inline void _starpu_worker_relax_on(void) #endif { struct _starpu_worker *worker = _starpu_get_local_worker_key(); if (worker == NULL) return; if (!worker->state_sched_op_pending) return; STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); #ifdef STARPU_SPINLOCK_CHECK STARPU_ASSERT_MSG(worker->state_relax_refcntrelax_on_func, worker->relax_on_file, worker->relax_on_line); #else STARPU_ASSERT(worker->state_relax_refcntstate_relax_refcnt++; #ifdef STARPU_SPINLOCK_CHECK worker->relax_on_file = file; worker->relax_on_line = line; worker->relax_on_func = func; #endif STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } #define starpu_worker_relax_on _starpu_worker_relax_on #ifdef STARPU_SPINLOCK_CHECK #define _starpu_worker_relax_on_locked(worker) __starpu_worker_relax_on_locked(worker,__FILE__, __LINE__, __starpu_func__) static inline void __starpu_worker_relax_on_locked(struct _starpu_worker *worker, const char*file, int line, const char* func) #else /** Same, but with current worker mutex already held */ static inline void _starpu_worker_relax_on_locked(struct _starpu_worker *worker) #endif { if (!worker->state_sched_op_pending) return; #ifdef STARPU_SPINLOCK_CHECK STARPU_ASSERT_MSG(worker->state_relax_refcntrelax_on_func, worker->relax_on_file, worker->relax_on_line); #else STARPU_ASSERT(worker->state_relax_refcntstate_relax_refcnt++; #ifdef STARPU_SPINLOCK_CHECK worker->relax_on_file = file; worker->relax_on_line = line; worker->relax_on_func = func; #endif STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); } #ifdef STARPU_SPINLOCK_CHECK #define _starpu_worker_relax_off() __starpu_worker_relax_off(__FILE__, __LINE__, __starpu_func__) static inline void __starpu_worker_relax_off(const char*file, int line, const char* func) #else static inline void _starpu_worker_relax_off(void) #endif { int workerid = starpu_worker_get_id(); if (workerid == -1) return; struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); if (!worker->state_sched_op_pending) return; STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); #ifdef STARPU_SPINLOCK_CHECK STARPU_ASSERT_MSG(worker->state_relax_refcnt>0, "relax last turn off in %s (%s:%d)\n", worker->relax_on_func, worker->relax_on_file, worker->relax_on_line); #else STARPU_ASSERT(worker->state_relax_refcnt>0); #endif worker->state_relax_refcnt--; #ifdef STARPU_SPINLOCK_CHECK worker->relax_off_file = file; worker->relax_off_line = line; worker->relax_off_func = func; #endif STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } #define starpu_worker_relax_off _starpu_worker_relax_off #ifdef STARPU_SPINLOCK_CHECK #define _starpu_worker_relax_off_locked() __starpu_worker_relax_off_locked(__FILE__, __LINE__, __starpu_func__) static inline void __starpu_worker_relax_off_locked(const char*file, int line, const char* func) #else static inline void _starpu_worker_relax_off_locked(void) #endif { int workerid = starpu_worker_get_id(); if (workerid == -1) return; struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); if (!worker->state_sched_op_pending) return; #ifdef STARPU_SPINLOCK_CHECK STARPU_ASSERT_MSG(worker->state_relax_refcnt>0, "relax last turn off in %s (%s:%d)\n", worker->relax_on_func, worker->relax_on_file, worker->relax_on_line); #else STARPU_ASSERT(worker->state_relax_refcnt>0); #endif worker->state_relax_refcnt--; #ifdef STARPU_SPINLOCK_CHECK worker->relax_off_file = file; worker->relax_off_line = line; worker->relax_off_func = func; #endif } static inline int _starpu_worker_get_relax_state(void) { int workerid = starpu_worker_get_id(); if (workerid < 0) return 1; struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); return worker->state_relax_refcnt != 0; } #define starpu_worker_get_relax_state _starpu_worker_get_relax_state /** lock a worker for observing contents * * notes: * - if the observed worker is not in state_relax_refcnt, the function block until the state is reached */ static inline void _starpu_worker_lock(int workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); int cur_workerid = starpu_worker_get_id(); if (workerid != cur_workerid) { starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); while (!worker->state_relax_refcnt) { STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); } } else { STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); } } #define starpu_worker_lock _starpu_worker_lock static inline int _starpu_worker_trylock(int workerid) { struct _starpu_worker *cur_worker = _starpu_get_local_worker_key(); STARPU_ASSERT(cur_worker != NULL); int cur_workerid = cur_worker->workerid; struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); /* Start with ourself */ int ret = STARPU_PTHREAD_MUTEX_TRYLOCK_SCHED(&cur_worker->sched_mutex); if (ret) return ret; if (workerid == cur_workerid) /* We only needed to lock ourself */ return 0; /* Now try to lock the other worker */ ret = STARPU_PTHREAD_MUTEX_TRYLOCK_SCHED(&worker->sched_mutex); if (!ret) { /* Good, check that it is relaxed */ ret = !worker->state_relax_refcnt; if (ret) STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } if (!ret) _starpu_worker_relax_on_locked(cur_worker); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cur_worker->sched_mutex); return ret; } #define starpu_worker_trylock _starpu_worker_trylock static inline void _starpu_worker_unlock(int workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); int cur_workerid = starpu_worker_get_id(); if (workerid != cur_workerid) { starpu_worker_relax_off(); } } #define starpu_worker_unlock _starpu_worker_unlock static inline void _starpu_worker_lock_self(void) { int workerid = starpu_worker_get_id_check(); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); } #define starpu_worker_lock_self _starpu_worker_lock_self static inline void _starpu_worker_unlock_self(void) { int workerid = starpu_worker_get_id_check(); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_ASSERT(worker != NULL); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); } #define starpu_worker_unlock_self _starpu_worker_unlock_self static inline int _starpu_wake_worker_relax(int workerid) { _starpu_worker_lock(workerid); int ret = starpu_wake_worker_locked(workerid); _starpu_worker_unlock(workerid); return ret; } #define starpu_wake_worker_relax _starpu_wake_worker_relax int starpu_wake_worker_relax_light(int workerid) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** * Allow a worker pulling a task it cannot execute to properly refuse it and * send it back to the scheduler. */ void _starpu_worker_refuse_task(struct _starpu_worker *worker, struct starpu_task *task); void _starpu_set_catch_signals(int do_catch_signal); int _starpu_get_catch_signals(void); /** Performance Monitoring */ static inline int _starpu_perf_counter_paused(void) { STARPU_RMB(); return STARPU_UNLIKELY(_starpu_config.perf_counter_pause_depth > 0); } void _starpu_crash_add_hook(void (*hook_func)(void)); void _starpu_crash_call_hooks(); uint32_t _starpu_worker_exists(struct starpu_task *task); /* @}*/ #pragma GCC visibility pop #endif // __WORKERS_H__ starpu-1.4.9+dfsg/src/datawizard/000077500000000000000000000000001507764646700167315ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/datawizard/coherency.c000066400000000000000000001427211507764646700210630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2018,2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_SIMGRID #include #endif static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node); int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination) { int src_node = -1; unsigned i; unsigned nnodes = starpu_memory_nodes_get_count(); /* first find a valid copy, either a STARPU_OWNER or a STARPU_SHARED */ unsigned node; size_t size = _starpu_data_get_size(handle); double cost = INFINITY; unsigned src_node_mask = 0; for (node = 0; node < nnodes; node++) { if (handle->per_node[node].state != STARPU_INVALID) { /* we found a copy ! */ src_node_mask |= (1<init_cl) { /* No copy yet, but applicationg told us how to build it. */ return -1; } /* we should have found at least one copy ! */ STARPU_ASSERT_MSG(src_node_mask != 0, "The data for the handle %p is requested, but the handle does not have a valid value. Perhaps some initialization task is missing?", handle); /* Without knowing the size, we won't know the cost */ if (!size) cost = 0; /* Check whether we have transfer cost for all nodes, if so, take the minimum */ if (cost) for (i = 0; i < nnodes; i++) { if (src_node_mask & (1<per_node[src_node].allocated || handle->per_node[src_node].mapped != STARPU_UNMAPPED); STARPU_ASSERT(handle->per_node[src_node].initialized); return src_node; } int i_ram = -1; int i_gpu = -1; int i_disk = -1; /* Revert to dumb strategy: take RAM unless only a GPU has it */ for (i = 0; i < nnodes; i++) { if (src_node_mask & (1<ops->copy_methods->can_copy; /* Avoid transfers which the interface does not want */ if (can_copy) { void *src_interface = handle->per_node[i].data_interface; void *dst_interface = handle->per_node[destination].data_interface; unsigned handling_node; if (!link_supports_direct_transfers(handle, i, destination, &handling_node)) { /* Avoid through RAM if the interface does not want it */ void *ram_interface = handle->per_node[STARPU_MAIN_RAM].data_interface; if ((!can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM, i) && !can_copy(src_interface, i, ram_interface, STARPU_MAIN_RAM, STARPU_MAIN_RAM)) || (!can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination, STARPU_MAIN_RAM) && !can_copy(ram_interface, STARPU_MAIN_RAM, dst_interface, destination, destination))) continue; } } /* however GPU are expensive sources, really ! * Unless peer transfer is supported (and it would then have been selected above). * Other should be ok */ if (starpu_node_get_kind(i) == STARPU_CPU_RAM || starpu_node_get_kind(i) == STARPU_MPI_MS_RAM) i_ram = i; else if (starpu_node_get_kind(i) == STARPU_DISK_RAM) i_disk = i; else i_gpu = i; } } /* we have to use cpu_ram in first */ if (i_ram != -1) src_node = i_ram; else if (i_gpu != -1) /* otherwise a gpu */ src_node = i_gpu; else /* no luck we have to use the disk memory */ src_node = i_disk; STARPU_ASSERT(src_node != -1); STARPU_ASSERT(handle->per_node[src_node].allocated || handle->per_node[src_node].mapped != STARPU_UNMAPPED); STARPU_ASSERT(handle->per_node[src_node].initialized); return src_node; } /* this may be called once the data is fetched with header and STARPU_RW-lock hold */ void _starpu_update_data_state(starpu_data_handle_t handle, struct _starpu_data_replicate *requesting_replicate, enum starpu_data_access_mode mode) { if (mode == STARPU_UNMAP) { /* Unmap request, invalidate */ requesting_replicate->state = STARPU_INVALID; return; } /* There is nothing to do for relaxed coherency modes (scratch or * reductions) */ if (!(mode & STARPU_RW)) return; unsigned nnodes = starpu_memory_nodes_get_count(); /* the data is present now */ unsigned requesting_node = requesting_replicate->memory_node; if (mode & STARPU_W) { /* the requesting node now has the only valid copy */ unsigned node; for (node = 0; node < nnodes; node++) { if (requesting_replicate->mapped == (int) node && !_starpu_node_needs_map_update(requesting_node)) /* The mapped node will be kept up to date */ continue; if (handle->per_node[node].mapped == (int) requesting_node && !_starpu_node_needs_map_update(node)) /* The mapping node will be kept up to date */ continue; if (handle->per_node[node].state != STARPU_INVALID) _STARPU_TRACE_DATA_STATE_INVALID(handle, node); handle->per_node[node].state = STARPU_INVALID; } if (requesting_replicate->state != STARPU_OWNER) _STARPU_TRACE_DATA_STATE_OWNER(handle, requesting_node); requesting_replicate->state = STARPU_OWNER; if (handle->home_node != -1 && handle->per_node[handle->home_node].state == STARPU_INVALID) /* Notify that this MC is now dirty */ _starpu_memchunk_dirty(requesting_replicate->mc, requesting_replicate->memory_node); } else { /* read only */ if (requesting_replicate->state != STARPU_OWNER) { /* there was at least another copy of the data */ unsigned node; for (node = 0; node < nnodes; node++) { struct _starpu_data_replicate *replicate = &handle->per_node[node]; if (replicate->state != STARPU_INVALID) { if (replicate->state != STARPU_SHARED) _STARPU_TRACE_DATA_STATE_SHARED(handle, node); replicate->state = STARPU_SHARED; } } if (requesting_replicate->state != STARPU_SHARED) _STARPU_TRACE_DATA_STATE_SHARED(handle, requesting_node); requesting_replicate->state = STARPU_SHARED; } } } static int worker_supports_direct_access(unsigned node, unsigned handling_node) { if (node == handling_node) return 1; if (!_starpu_memory_node_get_nworkers(handling_node)) /* No worker to process the request from that node */ return 0; const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(node); if (node_ops && node_ops->is_direct_access_supported) return node_ops->is_direct_access_supported(node, handling_node); else return 0; } static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node, unsigned *handling_node) { STARPU_ASSERT_MSG(handle->ops->copy_methods, "The handle %s does not define a copy_methods\n", handle->ops->name); int (*can_copy)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, unsigned handling_node) = handle->ops->copy_methods->can_copy; void *src_interface = handle->per_node[src_node].data_interface; void *dst_interface = handle->per_node[dst_node].data_interface; /* Note: with CUDA, performance seems a bit better when issuing the transfer from the destination (tested without GPUDirect, but GPUDirect probably behave the same) */ if (worker_supports_direct_access(src_node, dst_node) && (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, dst_node))) { *handling_node = dst_node; return 1; } if (worker_supports_direct_access(dst_node, src_node) && (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, src_node))) { *handling_node = src_node; return 1; } return 0; } /* Now, we use slowness/bandwidth to compare numa nodes, is it better to use latency ? */ static unsigned chose_best_numa_between_src_and_dest(int src, int dst) { double timing_best; int best_numa = -1; unsigned numa; const unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count(); for(numa = 0; numa < nb_numa_nodes; numa++) { double actual = 1.0/starpu_transfer_bandwidth(src, numa) + 1.0/starpu_transfer_bandwidth(numa, dst); /* Compare slowness : take the lowest */ if (best_numa < 0 || actual < timing_best) { best_numa = numa; timing_best = actual; } } STARPU_ASSERT(best_numa >= 0); return best_numa; } /* Determines the path of a request : each hop is defined by (src,dst) and the * node that handles the hop. The returned value indicates the number of hops, * and the max_len is the maximum number of hops (ie. the size of the * src_nodes, dst_nodes and handling_nodes arrays. */ int _starpu_determine_request_path(starpu_data_handle_t handle, int src_node, int dst_node, enum starpu_data_access_mode mode, int max_len, unsigned *src_nodes, unsigned *dst_nodes, unsigned *handling_nodes, unsigned write_invalidation) { if ((mode & STARPU_R) && src_node >= 0 && dst_node >= 0) { struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node]; struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node]; if (src_replicate->mapped != STARPU_UNMAPPED) { /* Device -> map */ STARPU_ASSERT(max_len >= 1); *src_nodes++ = src_node; *dst_nodes++ = src_replicate->mapped; *handling_nodes++ = src_node; max_len--; /* map -> Device */ int consumed = _starpu_determine_request_path(handle, src_replicate->mapped, dst_node, mode, max_len, src_nodes, dst_nodes, handling_nodes, write_invalidation); return consumed + 1; } else if (dst_replicate->mapped != STARPU_UNMAPPED) { /* Device -> map */ int consumed = _starpu_determine_request_path(handle, src_node, dst_replicate->mapped, mode, max_len, src_nodes, dst_nodes, handling_nodes, write_invalidation); src_nodes += consumed; dst_nodes += consumed; handling_nodes += consumed; max_len -= consumed; /* map -> Device */ STARPU_ASSERT(max_len >= 1); *src_nodes++ = dst_replicate->mapped; *dst_nodes++ = dst_node; *handling_nodes++ = dst_node; max_len--; return consumed + 1; } } if (src_node == dst_node || !(mode & STARPU_R)) { if (dst_node == -1 || starpu_node_get_kind(dst_node) == STARPU_DISK_RAM) handling_nodes[0] = src_node; else handling_nodes[0] = dst_node; if (write_invalidation) /* The invalidation request will be enough */ return 0; /* The destination node should only allocate the data, no transfer is required */ STARPU_ASSERT(max_len >= 1); src_nodes[0] = dst_node; // ignored dst_nodes[0] = dst_node; return 1; } if (src_node < 0) { /* Will just initialize the destination */ STARPU_ASSERT(max_len >= 1); src_nodes[0] = dst_node; // ignored dst_nodes[0] = dst_node; return 1; } unsigned handling_node; int link_is_valid = link_supports_direct_transfers(handle, src_node, dst_node, &handling_node); if (!link_is_valid) { int (*can_copy)(void *, unsigned, void *, unsigned, unsigned) = handle->ops->copy_methods->can_copy; void *src_interface = handle->per_node[src_node].data_interface; void *dst_interface = handle->per_node[dst_node].data_interface; /* We need an intermediate hop to implement data staging * through main memory. */ STARPU_ASSERT(max_len >= 2); STARPU_ASSERT(src_node >= 0); unsigned numa = chose_best_numa_between_src_and_dest(src_node, dst_node); /* GPU -> RAM */ src_nodes[0] = src_node; dst_nodes[0] = numa; if (starpu_node_get_kind(src_node) == STARPU_DISK_RAM) /* Disks don't have their own driver thread */ handling_nodes[0] = dst_node; else if (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, src_node)) { handling_nodes[0] = src_node; } else { STARPU_ASSERT_MSG(can_copy(src_interface, src_node, dst_interface, dst_node, dst_node), "interface %d refuses all kinds of transfers from node %d to node %d\n", handle->ops->interfaceid, src_node, dst_node); handling_nodes[0] = dst_node; } /* RAM -> GPU */ src_nodes[1] = numa; dst_nodes[1] = dst_node; if (starpu_node_get_kind(dst_node) == STARPU_DISK_RAM) /* Disks don't have their own driver thread */ handling_nodes[1] = src_node; else if (!can_copy || can_copy(src_interface, src_node, dst_interface, dst_node, dst_node)) { handling_nodes[1] = dst_node; } else { STARPU_ASSERT_MSG(can_copy(src_interface, src_node, dst_interface, dst_node, src_node), "interface %d refuses all kinds of transfers from node %d to node %d\n", handle->ops->interfaceid, src_node, dst_node); handling_nodes[1] = src_node; } return 2; } else { STARPU_ASSERT(max_len >= 1); src_nodes[0] = src_node; dst_nodes[0] = dst_node; handling_nodes[0] = handling_node; return 1; } } /* handle->lock should be taken. r is returned locked. The node parameter * indicate either the source of the request, or the destination for a * write-only request. */ static struct _starpu_data_request *_starpu_search_existing_data_request(struct _starpu_data_replicate *replicate, unsigned node, enum starpu_data_access_mode mode, struct starpu_task *task, enum starpu_is_prefetch is_prefetch) { struct _starpu_data_request *r; /* Make sure we don't have anything else than R/W */ STARPU_ASSERT(mode != STARPU_UNMAP); for (r = replicate->request[node]; r; r = r->next_same_req) { _starpu_spin_checklocked(&r->handle->header_lock); if (r->canceled) /* Do not reuse a cancelled request */ continue; if (task && r->task && task != r->task) /* Do not collapse requests for different tasks */ continue; _starpu_spin_lock(&r->lock); /* perhaps we need to "upgrade" the request */ if (is_prefetch < r->prefetch) _starpu_update_prefetch_status(r, is_prefetch); /* TODO: abort on unmapping request */ if (mode & STARPU_R) { /* in case the existing request did not imply a memory * transfer yet, we have to take a second refcnt now * for the source, in addition to the refcnt for the * destination * (so that the source remains valid) */ if (!(r->mode & STARPU_R)) { replicate->refcnt++; replicate->handle->busy_count++; } r->mode = (enum starpu_data_access_mode) ((int) r->mode | (int) STARPU_R); } if (mode & STARPU_W) r->mode = (enum starpu_data_access_mode) ((int) r->mode | (int) STARPU_W); /* We collapse with this request */ return r; } return NULL; } /* * This function is called when the data is needed on the local node, this * returns a pointer to the local copy * * R STARPU_W STARPU_RW * Owner OK OK OK * Shared OK 1 1 * Invalid 2 3 4 * * case 1 : shared + (read)write : * no data copy but shared->Invalid/Owner * case 2 : invalid + read : * data copy + invalid->shared + owner->shared (STARPU_ASSERT(there is a valid)) * case 3 : invalid + write : * no data copy + invalid->owner + (owner,shared)->invalid * case 4 : invalid + R/STARPU_W : * data copy + if (STARPU_W) (invalid->owner + owner->invalid) * else (invalid,owner->shared) */ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_handle_t handle, struct _starpu_data_replicate *dst_replicate, enum starpu_data_access_mode mode, struct starpu_task *task, enum starpu_is_prefetch is_prefetch, unsigned async, void (*callback_func)(void *), void *callback_arg, int prio, const char *origin) { /* We don't care about commuting for data requests, that was handled before. */ mode &= ~STARPU_COMMUTE; /* This function is called with handle's header lock taken */ _starpu_spin_checklocked(&handle->header_lock); /* TODO: If writing copying to RAM, first update maps into RAM, even if RAM is already up to date. */ int requesting_node = dst_replicate ? dst_replicate->memory_node : -1; unsigned nwait = 0; if (mode & STARPU_W) { /* We will write to the buffer. We will have to wait for all * existing requests before the last request which will * invalidate all their results (which were possibly spurious, * e.g. too aggressive eviction). */ unsigned i, j; unsigned nnodes = starpu_memory_nodes_get_count(); for (i = 0; i < nnodes; i++) for (j = 0; j < nnodes; j++) { struct _starpu_data_request *r; for (r = handle->per_node[i].request[j]; r; r = r->next_same_req) nwait++; } /* If the request is not detached (i.e. the caller really wants * proper ownership), no new requests will appear because a * reference will be kept on the dst replicate, which will * notably prevent data reclaiming. */ } if ((!dst_replicate || dst_replicate->state != STARPU_INVALID) && (!nwait || is_prefetch)) { if (dst_replicate) { #ifdef STARPU_MEMORY_STATS enum _starpu_cache_state old_state = dst_replicate->state; #endif /* the data is already available and we don't have to wait for * any request, so we can stop */ _starpu_update_data_state(handle, dst_replicate, mode); _starpu_msi_cache_hit(requesting_node); #ifdef STARPU_MEMORY_STATS _starpu_memory_handle_stats_cache_hit(handle, requesting_node); /* XXX Broken ? */ if (old_state == STARPU_SHARED && dst_replicate->state == STARPU_OWNER) _starpu_memory_handle_stats_shared_to_owner(handle, requesting_node); #endif if (dst_replicate->mc) { if (is_prefetch == STARPU_TASK_PREFETCH) /* Make sure it stays there */ dst_replicate->nb_tasks_prefetch++; _starpu_memchunk_recently_used(dst_replicate->mc, requesting_node); } if (task) { unsigned j; unsigned nnodes = starpu_memory_nodes_get_count(); /* Cancel any existing (prefetch) request */ struct _starpu_data_request *r2; for (j = 0; j < nnodes; j++) { for (r2 = dst_replicate->request[j]; r2; r2 = r2->next_same_req) { if (r2->task && r2->task == task) { r2->canceled = 1; break; } } } } } _starpu_spin_unlock(&handle->header_lock); if (callback_func) callback_func(callback_arg); _STARPU_LOG_OUT_TAG("data available"); return NULL; } if (dst_replicate) _starpu_msi_cache_miss(requesting_node); /* the only remaining situation is that the local copy was invalid */ STARPU_ASSERT((dst_replicate && dst_replicate->state == STARPU_INVALID) || nwait); /* find someone who already has the data */ int src_node = -1; if (dst_replicate && mode & STARPU_R) { if (dst_replicate->state == STARPU_INVALID) src_node = _starpu_select_src_node(handle, requesting_node); else src_node = requesting_node; if (src_node < 0) { /* We will create it, no need to read an existing value */ mode &= ~STARPU_R; } } else if (dst_replicate) { /* if the data is in write only mode (and not SCRATCH or REDUX), there is no need for a source, data will be initialized by the task itself */ if (mode & STARPU_W && is_prefetch <= STARPU_TASK_PREFETCH) dst_replicate->initialized = 1; if (starpu_node_get_kind(requesting_node) == STARPU_CPU_RAM && !nwait && !_starpu_malloc_willpin_on_node(requesting_node)) { /* FIXME: also try to map */ /* And this is the main RAM without pinning, really no need for a * request, just quickly allocate and be done */ if (dst_replicate->mapped != STARPU_UNMAPPED || _starpu_allocate_memory_on_node(handle, dst_replicate, is_prefetch, 0) == 0) { if (is_prefetch <= STARPU_TASK_PREFETCH) _starpu_update_data_state(handle, dst_replicate, mode); if (dst_replicate->mc) { if (is_prefetch == STARPU_TASK_PREFETCH) /* Make sure it stays there */ dst_replicate->nb_tasks_prefetch++; _starpu_memchunk_recently_used(dst_replicate->mc, requesting_node); } _starpu_spin_unlock(&handle->header_lock); if (callback_func) callback_func(callback_arg); _STARPU_LOG_OUT_TAG("data immediately allocated"); return NULL; } } } #define MAX_REQUESTS 4 /* We can safely assume that there won't be more than 2 hops in the * current implementation */ unsigned src_nodes[MAX_REQUESTS], dst_nodes[MAX_REQUESTS], handling_nodes[MAX_REQUESTS]; /* keep one slot for the last W request, if any */ int write_invalidation = (mode & STARPU_W) && nwait && !is_prefetch; int nhops = _starpu_determine_request_path(handle, src_node, requesting_node, mode, MAX_REQUESTS, src_nodes, dst_nodes, handling_nodes, write_invalidation); STARPU_ASSERT(nhops >= 0 && nhops <= MAX_REQUESTS-1); struct _starpu_data_request *requests[nhops + write_invalidation]; /* Did we reuse a request for that hop ? */ int reused_requests[nhops + write_invalidation]; /* Construct an array with a list of requests, possibly reusing existing requests */ int hop; for (hop = 0; hop < nhops; hop++) { struct _starpu_data_request *r; unsigned hop_src_node = src_nodes[hop]; unsigned hop_dst_node = dst_nodes[hop]; unsigned hop_handling_node = handling_nodes[hop]; struct _starpu_data_replicate *hop_src_replicate; struct _starpu_data_replicate *hop_dst_replicate; /* Only the first request is independent */ unsigned ndeps = (hop == 0)?0:1; hop_src_replicate = &handle->per_node[hop_src_node]; hop_dst_replicate = (hop != nhops - 1)?&handle->per_node[hop_dst_node]:dst_replicate; /* Try to reuse a request if possible */ #ifdef STARPU_DEVEL #warning We do not actually want to reuse an existing request when our request is for a task with low priority, that will get executed much later. We don t want to wire down the data in between, at worse that could hog the complete gpu memory... #endif r = _starpu_search_existing_data_request(hop_dst_replicate, (mode & STARPU_R)?hop_src_node:hop_dst_node, mode, task, is_prefetch); reused_requests[hop] = !!r; if (!r) { /* Create a new request if there was no request to reuse */ r = _starpu_create_data_request(handle, hop_src_replicate, hop_dst_replicate, hop_handling_node, mode, ndeps, task, is_prefetch, prio, 0, origin); nwait++; } requests[hop] = r; } /* Chain these requests */ for (hop = 0; hop < nhops; hop++) { struct _starpu_data_request *r; r = requests[hop]; if (hop != nhops - 1) { if (!reused_requests[hop + 1]) { r->next_req[r->next_req_count++] = requests[hop + 1]; STARPU_ASSERT(r->next_req_count <= STARPU_MAXNODES); } } else { if (is_prefetch == STARPU_TASK_PREFETCH) /* Make last request add the prefetch count on the mc to keep the data * there until the task gets to execute. */ r->nb_tasks_prefetch++; if (!write_invalidation) /* The last request will perform the callback after termination */ _starpu_data_request_append_callback(r, callback_func, callback_arg); } if (reused_requests[hop]) _starpu_spin_unlock(&r->lock); } if (write_invalidation) { /* Some requests were still pending, we have to add yet another * request, depending on them, which will invalidate their * result. */ struct _starpu_data_request *r = _starpu_create_data_request(handle, dst_replicate, dst_replicate, requesting_node, STARPU_W, nwait, task, is_prefetch, prio, 1, origin); /* and perform the callback after termination */ _starpu_data_request_append_callback(r, callback_func, callback_arg); /* We will write to the buffer. We will have to wait for all * existing requests before the last request which will * invalidate all their results (which were possibly spurious, * e.g. too aggressive eviction). */ unsigned i, j; unsigned nnodes = starpu_memory_nodes_get_count(); for (i = 0; i < nnodes; i++) for (j = 0; j < nnodes; j++) { struct _starpu_data_request *r2; for (r2 = handle->per_node[i].request[j]; r2; r2 = r2->next_same_req) { _starpu_spin_lock(&r2->lock); if (is_prefetch < r2->prefetch) /* Hasten the request we will have to wait for */ _starpu_update_prefetch_status(r2, is_prefetch); r2->next_req[r2->next_req_count++] = r; STARPU_ASSERT(r2->next_req_count <= STARPU_MAXNODES + 1); _starpu_spin_unlock(&r2->lock); nwait--; } } STARPU_ASSERT(nwait == 0); nhops++; requests[nhops - 1] = r; /* existing requests will post this one */ reused_requests[nhops - 1] = 1; } STARPU_ASSERT(nhops); if (!async) requests[nhops - 1]->refcnt++; /* we only submit the first request, the remaining will be * automatically submitted afterward */ if (!reused_requests[0]) _starpu_post_data_request(requests[0]); return requests[nhops - 1]; } int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *dst_replicate, enum starpu_data_access_mode mode, unsigned detached, struct starpu_task *task, enum starpu_is_prefetch is_prefetch, unsigned async, void (*callback_func)(void *), void *callback_arg, int prio, const char *origin) { _STARPU_LOG_IN(); _starpu_spin_lock(&handle->header_lock); if (mode & STARPU_R && is_prefetch > STARPU_FETCH) { unsigned src_node_mask = 0; unsigned nnodes = starpu_memory_nodes_get_count(); unsigned n; for (n = 0; n < nnodes; n++) { if (handle->per_node[n].state != STARPU_INVALID) { /* we found a copy ! */ src_node_mask |= (1<init_cl, "Could not find a valid copy of the data, and no handle initialization function"); _starpu_spin_unlock(&handle->header_lock); return 0; } } if (!detached) { /* Take references which will be released by _starpu_release_data_on_node */ if (dst_replicate) dst_replicate->refcnt++; else if (node == STARPU_ACQUIRE_NO_NODE_LOCK_ALL) { int i; for (i = 0; i < STARPU_MAXNODES; i++) handle->per_node[i].refcnt++; } handle->busy_count++; } struct _starpu_data_request *r; r = _starpu_create_request_to_fetch_data(handle, dst_replicate, mode, task, is_prefetch, async, callback_func, callback_arg, prio, origin); /* If no request was created, the handle was already up-to-date on the * node. In this case, _starpu_create_request_to_fetch_data has already * unlocked the header. */ if (!r) return 0; _starpu_spin_unlock(&handle->header_lock); int ret = async?0:_starpu_wait_data_request_completion(r, 1); _STARPU_LOG_OUT(); return ret; } static int idle_prefetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, struct starpu_task *task, int prio) { return _starpu_fetch_data_on_node(handle, node, replicate, mode, 1, task, STARPU_IDLEFETCH, 1, NULL, NULL, prio, "idle_prefetch_data_on_node"); } static int task_prefetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, struct starpu_task *task, int prio) { return _starpu_fetch_data_on_node(handle, node, replicate, mode, 1, task, STARPU_TASK_PREFETCH, 1, NULL, NULL, prio, "task_prefetch_data_on_node"); } static int STARPU_ATTRIBUTE_UNUSED prefetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, struct starpu_task *task, int prio) { return _starpu_fetch_data_on_node(handle, node, replicate, mode, 1, task, STARPU_PREFETCH, 1, NULL, NULL, prio, "prefetch_data_on_node"); } static int fetch_data(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, struct starpu_task *task, int prio) { return _starpu_fetch_data_on_node(handle, node, replicate, mode, 0, task, STARPU_FETCH, 0, NULL, NULL, prio, "fetch_data"); } uint32_t _starpu_get_data_refcnt(starpu_data_handle_t handle, unsigned node) { return handle->per_node[node].refcnt; } size_t _starpu_data_get_size(starpu_data_handle_t handle) { return handle->ops->get_size(handle); } size_t _starpu_data_get_alloc_size(starpu_data_handle_t handle) { if (handle->ops->get_alloc_size) return handle->ops->get_alloc_size(handle); else return handle->ops->get_size(handle); } starpu_ssize_t _starpu_data_get_max_size(starpu_data_handle_t handle) { if (handle->ops->get_max_size) return handle->ops->get_max_size(handle); else return -1; } uint32_t _starpu_data_get_footprint(starpu_data_handle_t handle) { return handle->footprint; } /* in case the data was accessed on a write mode, do not forget to * make it accessible again once it is possible ! */ void _starpu_release_data_on_node(starpu_data_handle_t handle, uint32_t default_wt_mask, enum starpu_data_access_mode down_to_mode, struct _starpu_data_replicate *replicate) { uint32_t wt_mask; size_t max_wt_mask = sizeof(wt_mask) * 8; unsigned wt_count = starpu_memory_nodes_get_count(); if (max_wt_mask > STARPU_MAXNODES) max_wt_mask = STARPU_MAXNODES; if (wt_count > max_wt_mask) wt_count = max_wt_mask; wt_mask = default_wt_mask | handle->wt_mask; wt_mask &= (1ULL<memory_node; if (replicate->state != STARPU_INVALID && handle->current_mode & STARPU_W) if (wt_mask && (memory_node >= max_wt_mask || wt_mask & ~(1<header_lock)) { cpt++; _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC); } if (cpt == STARPU_SPIN_MAXTRY) _starpu_spin_lock(&handle->header_lock); if (down_to_mode == STARPU_NONE) { /* Release refcnt taken by fetch_data_on_node */ replicate->refcnt--; STARPU_ASSERT_MSG(replicate->refcnt >= 0, "handle %p released too many times", handle); STARPU_ASSERT_MSG(handle->busy_count > 0, "handle %p released too many times", handle); handle->busy_count--; } if (!_starpu_notify_data_dependencies(handle, down_to_mode)) _starpu_spin_unlock(&handle->header_lock); } int _starpu_prefetch_task_input_prio(struct starpu_task *task, int target_node, int worker, int prio, enum starpu_is_prefetch prefetch) { #ifdef STARPU_OPENMP struct _starpu_job *j = _starpu_get_job_associated_to_task(task); /* do not attempt to prefetch task input if this is an OpenMP task resuming after blocking */ if (j->discontinuous != 0) return 0; #endif STARPU_ASSERT_MSG(prefetch != STARPU_PREFETCH || !task->prefetched, "Prefetching was already requested for this task! Did you set 'prefetches' to 1 in the starpu_sched_policy structure?"); unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned index; for (index = 0; index < nbuffers; index++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index); enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index); if (mode & (STARPU_SCRATCH|STARPU_REDUX)) continue; int node; if (target_node >= 0) node = _starpu_task_data_get_node_on_node(task, index, target_node); else node = _starpu_task_data_get_node_on_worker(task, index, worker); if (node < 0) continue; struct _starpu_data_replicate *replicate = &handle->per_node[node]; if (prefetch == STARPU_PREFETCH) task_prefetch_data_on_node(handle, node, replicate, mode, task, prio); else idle_prefetch_data_on_node(handle, node, replicate, mode, task, prio); } if (prefetch == STARPU_PREFETCH) task->prefetched = 1; return 0; } int starpu_prefetch_task_input_prio(struct starpu_task *task, int target_node, int worker, int prio) { return _starpu_prefetch_task_input_prio(task, target_node, worker, prio, STARPU_PREFETCH); } int starpu_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned target_node, int prio) { return starpu_prefetch_task_input_prio(task, target_node, -1, prio); } int starpu_prefetch_task_input_on_node(struct starpu_task *task, unsigned node) { int prio = task->priority; if (task->workerorder) prio = INT_MAX - task->workerorder; return starpu_prefetch_task_input_on_node_prio(task, node, prio); } int starpu_idle_prefetch_task_input_prio(struct starpu_task *task, int target_node, int worker, int prio) { return _starpu_prefetch_task_input_prio(task, target_node, worker, prio, STARPU_IDLEFETCH); } int starpu_idle_prefetch_task_input_on_node_prio(struct starpu_task *task, unsigned target_node, int prio) { return starpu_idle_prefetch_task_input_prio(task, target_node, -1, prio); } int starpu_idle_prefetch_task_input_on_node(struct starpu_task *task, unsigned node) { int prio = task->priority; if (task->workerorder) prio = INT_MAX - task->workerorder; return starpu_idle_prefetch_task_input_on_node_prio(task, node, prio); } int starpu_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio) { return starpu_prefetch_task_input_prio(task, -1, worker, prio); } int starpu_prefetch_task_input_for(struct starpu_task *task, unsigned worker) { int prio = task->priority; if (task->workerorder) prio = INT_MAX - task->workerorder; return starpu_prefetch_task_input_for_prio(task, worker, prio); } int starpu_idle_prefetch_task_input_for_prio(struct starpu_task *task, unsigned worker, int prio) { return starpu_idle_prefetch_task_input_prio(task, -1, worker, prio); } int starpu_idle_prefetch_task_input_for(struct starpu_task *task, unsigned worker) { int prio = task->priority; if (task->workerorder) prio = INT_MAX - task->workerorder; return starpu_idle_prefetch_task_input_for_prio(task, worker, prio); } static struct _starpu_data_replicate *get_replicate(starpu_data_handle_t handle, enum starpu_data_access_mode mode, int workerid, unsigned node) { if (mode & (STARPU_SCRATCH|STARPU_REDUX)) { STARPU_ASSERT(workerid >= 0); if (STARPU_RUNNING_ON_VALGRIND || !handle->per_worker) { _starpu_spin_lock(&handle->header_lock); if (!handle->per_worker) _starpu_data_initialize_per_worker(handle); _starpu_spin_unlock(&handle->header_lock); } return &handle->per_worker[workerid]; } else /* That's a "normal" buffer (R/W) */ return &handle->per_node[node]; } /* Callback used when a buffer is send asynchronously to the sink */ static void _starpu_fetch_task_input_cb(void *arg) { struct _starpu_worker * worker = (struct _starpu_worker *) arg; /* increase the number of buffer received */ STARPU_WMB(); (void)STARPU_ATOMIC_ADD(&worker->nb_buffers_transferred, 1); #ifdef STARPU_SIMGRID starpu_pthread_queue_broadcast(&_starpu_simgrid_transfer_queue[worker->memory_node]); #endif } /* Synchronously or asynchronously fetch data for a given task (if it's not there already) * Returns the number of data acquired here. */ /* _starpu_fetch_task_input must be called before * executing the task. __starpu_push_task_output but be called after the * execution of the task. */ /* The driver can either just call _starpu_fetch_task_input with async==0, * or to improve overlapping, it can call _starpu_fetch_task_input with * async==1, then wait for transfers to complete, then call * _starpu_fetch_task_input_tail to complete the fetch. */ int _starpu_fetch_task_input(struct starpu_task *task, struct _starpu_job *j, int async) { struct _starpu_worker *worker = _starpu_get_local_worker_key(); int workerid = worker->workerid; if (async) { worker->task_transferring = task; worker->nb_buffers_transferred = 0; if (worker->ntasks <= 1) _STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid); } else _STARPU_TRACE_START_FETCH_INPUT(NULL); int profiling = starpu_profiling_status_get(); if (profiling && task->profiling_info) _starpu_clock_gettime(&task->profiling_info->acquire_data_start_time); struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned nacquires; unsigned index; int indexdup; nacquires = 0; for (index = 0; index < nbuffers; index++) { int ret; starpu_data_handle_t handle = descrs[index].handle; enum starpu_data_access_mode mode = descrs[index].mode; int orig_node = descrs[index].orig_node; int node = _starpu_task_data_get_node_on_worker(task, descrs[index].index, workerid); /* We set this here for coherency with __starpu_push_task_output */ descrs[index].node = node; if (mode == STARPU_NONE || (mode & ((1<= STARPU_ACCESS_MODE_MAX || (mode >> STARPU_MODE_SHIFT) >= (STARPU_SHIFTED_MODE_MAX >> STARPU_MODE_SHIFT)) STARPU_ASSERT_MSG(0, "mode %d (0x%x) is bogus\n", mode, mode); if (node < 0) continue; struct _starpu_data_replicate *local_replicate; for (indexdup = (int) index-1; indexdup >= 0; indexdup--) { starpu_data_handle_t handle_dup = descrs[indexdup].handle; int node_dup = descrs[indexdup].orig_node; if (handle_dup == handle && node_dup == orig_node) /* We have already taken this data, skip it. This * depends on ordering putting writes before reads, see * _starpu_compar_handles */ goto next; if (!_starpu_handles_same_root(handle_dup, handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } local_replicate = get_replicate(handle, mode, workerid, node); if (async) { ret = _starpu_fetch_data_on_node(handle, node, local_replicate, mode, 0, task, STARPU_FETCH, 1, _starpu_fetch_task_input_cb, worker, task->priority, "_starpu_fetch_task_input"); #ifdef STARPU_SIMGRID if (_starpu_simgrid_fetching_input_cost()) starpu_sleep(0.000001); #endif if (STARPU_UNLIKELY(ret)) { /* Ooops, not enough memory, make worker wait for these for now, and the synchronous call will finish by forcing eviction*/ worker->nb_buffers_totransfer = nacquires; _starpu_add_worker_status(worker, STATUS_INDEX_WAITING, NULL); return 0; } } else { ret = fetch_data(handle, node, local_replicate, mode, task, task->priority); #ifdef STARPU_SIMGRID if (_starpu_simgrid_fetching_input_cost()) starpu_sleep(0.000001); #endif if (STARPU_UNLIKELY(ret)) goto enomem; } nacquires++; next: ; } _starpu_add_worker_status(worker, STATUS_INDEX_WAITING, NULL); if (async) { worker->nb_buffers_totransfer = nacquires; return 0; } _starpu_fetch_task_input_tail(task, j, worker); return 0; enomem: _STARPU_TRACE_END_FETCH_INPUT(NULL); _STARPU_DISP("something went wrong with buffer %u\n", index); /* try to unreference all the input that were successfully taken */ unsigned index2; for (index2 = 0; index2 < index; index2++) { starpu_data_handle_t handle = descrs[index2].handle; enum starpu_data_access_mode mode = descrs[index2].mode; int orig_node = descrs[index2].orig_node; int node = descrs[index2].node; struct _starpu_data_replicate *local_replicate; for (indexdup = (int) index2+1; indexdup < (int) index; indexdup++) { starpu_data_handle_t handle_dup = descrs[indexdup].handle; int node_dup = descrs[indexdup].orig_node; if (handle_dup == handle && node_dup == orig_node) /* We have already released this data, skip it. This * depends on ordering putting writes before reads, see * _starpu_compar_handles */ goto next2; if (!_starpu_handles_same_root(handle_dup, handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } local_replicate = get_replicate(handle, mode, workerid, node); _starpu_release_data_on_node(handle, 0, STARPU_NONE, local_replicate); next2: ; } return -1; } /* Now that we have taken the data locks in locking order, fill the codelet interfaces in function order. */ void _starpu_fetch_task_input_tail(struct starpu_task *task, struct _starpu_job *j, struct _starpu_worker *worker) { int workerid = worker->workerid; int profiling = starpu_profiling_status_get(); unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); unsigned index; unsigned long total_size = 0; for (index = 0; index < nbuffers; index++) { starpu_data_handle_t handle = descrs[index].handle; enum starpu_data_access_mode mode = descrs[index].mode; int node = descrs[index].node; if (node < 0) continue; struct _starpu_data_replicate *local_replicate; int needs_init; local_replicate = get_replicate(handle, mode, workerid, node); _starpu_spin_lock(&handle->header_lock); if (local_replicate->mc) { if (task->prefetched && local_replicate->initialized && /* See prefetch conditions in * starpu_prefetch_task_input_on_node_prio and alike */ !(mode & (STARPU_SCRATCH|STARPU_REDUX)) && (mode & STARPU_R)) { /* Allocations or transfer prefetches should have been done by now and marked * this mc as needed for us. * Now that we added a reference for the task, we can relieve that. */ /* Note: the replicate might have been evicted in between, thus not 100% sure * that our prefetch request is still recorded here. */ if (local_replicate->nb_tasks_prefetch > 0) local_replicate->nb_tasks_prefetch--; } } if (!(mode & STARPU_R) && (mode & STARPU_W)) { /* The task will be initializing it. Possibly we have * only prefetched the allocation, and now we have to * record that we'll modify it. */ local_replicate->initialized = 1; _starpu_update_data_state(handle, local_replicate, mode); } needs_init = !local_replicate->initialized; _starpu_spin_unlock(&handle->header_lock); _STARPU_TASK_SET_INTERFACE(task , local_replicate->data_interface, descrs[index].index); /* If the replicate was not initialized yet, we have to do it now */ if (!(mode & STARPU_SCRATCH) && needs_init) _starpu_init_data_replicate(handle, local_replicate, workerid); #ifdef STARPU_USE_FXT if (fut_active) total_size += _starpu_data_get_size(handle); #endif } _STARPU_TRACE_DATA_LOAD(workerid,total_size); if (profiling && task->profiling_info) _starpu_clock_gettime(&task->profiling_info->acquire_data_end_time); _STARPU_TRACE_END_FETCH_INPUT(NULL); _starpu_clear_worker_status(worker, STATUS_INDEX_WAITING, NULL); } /* Release task data dependencies */ void __starpu_push_task_output(struct _starpu_job *j) { #ifdef STARPU_OPENMP STARPU_ASSERT(!j->continuation); #endif int profiling = starpu_profiling_status_get(); struct starpu_task *task = j->task; if (profiling && task->profiling_info) _starpu_clock_gettime(&task->profiling_info->release_data_start_time); struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); int workerid = starpu_worker_get_id(); unsigned index; int indexdup; for (index = 0; index < nbuffers; index++) { starpu_data_handle_t handle = descrs[index].handle; enum starpu_data_access_mode mode = descrs[index].mode; int orig_node = descrs[index].orig_node; int node = descrs[index].node; struct _starpu_data_replicate *local_replicate = NULL; for (indexdup = (int) index-1; indexdup >= 0; indexdup--) { starpu_data_handle_t handle_dup = descrs[indexdup].handle; int node_dup = descrs[indexdup].orig_node; if (handle_dup == handle && node_dup == orig_node) /* We have already released this data, skip it. This * depends on ordering putting writes before reads, see * _starpu_compar_handles */ goto next; if (!_starpu_handles_same_root(handle_dup, handle)) /* We are not checking within the same parent any more, no need to continue checking other handles */ break; } if (node != -1) local_replicate = get_replicate(handle, mode, workerid, node); /* Keep a reference for future * _starpu_release_task_enforce_sequential_consistency call */ _starpu_spin_lock(&handle->header_lock); handle->busy_count++; if (node == -1) { /* NOWHERE case, just notify dependencies */ if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) _starpu_spin_unlock(&handle->header_lock); } else { _starpu_spin_unlock(&handle->header_lock); _starpu_release_data_on_node(handle, 0, STARPU_NONE, local_replicate); } next: ; } if (profiling && task->profiling_info) _starpu_clock_gettime(&task->profiling_info->release_data_end_time); } /* Version for a driver running on a worker: we show the driver state in the trace */ void _starpu_push_task_output(struct _starpu_job *j) { _STARPU_TRACE_START_PUSH_OUTPUT(NULL); __starpu_push_task_output(j); _STARPU_TRACE_END_PUSH_OUTPUT(NULL); } struct fetch_nowhere_wrapper { struct _starpu_job *j; unsigned pending; }; static void _starpu_fetch_nowhere_task_input_cb(void *arg); /* Asynchronously fetch data for a task which will have no content */ void _starpu_fetch_nowhere_task_input(struct _starpu_job *j) { int profiling = starpu_profiling_status_get(); struct starpu_task *task = j->task; if (profiling && task->profiling_info) _starpu_clock_gettime(&task->profiling_info->acquire_data_start_time); struct _starpu_data_descr *descrs = _STARPU_JOB_GET_ORDERED_BUFFERS(j); unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned nfetchbuffers = 0; struct fetch_nowhere_wrapper *wrapper; unsigned index; for (index = 0; index < nbuffers; index++) { /* Note here we just follow what was requested, and not use _starpu_task_data_get_node* */ int node = -1; if (task->cl->specific_nodes) node = STARPU_CODELET_GET_NODE(task->cl, descrs[index].index); descrs[index].node = node; if (node != -1) nfetchbuffers++; } if (!nfetchbuffers) { /* Nothing to fetch actually, already finished! */ __starpu_push_task_output(j); _starpu_handle_job_termination(j); _STARPU_LOG_OUT_TAG("handle_job_termination"); return; } _STARPU_MALLOC(wrapper, (sizeof(*wrapper))); wrapper->j = j; /* +1 for the call below */ wrapper->pending = nfetchbuffers + 1; for (index = 0; index < nbuffers; index++) { starpu_data_handle_t handle = descrs[index].handle; enum starpu_data_access_mode mode = descrs[index].mode; int node = descrs[index].node; if (node == -1) continue; if (mode == STARPU_NONE || (mode & ((1<= STARPU_ACCESS_MODE_MAX || (mode >> STARPU_MODE_SHIFT) >= (STARPU_SHIFTED_MODE_MAX >> STARPU_MODE_SHIFT)) STARPU_ASSERT_MSG(0, "mode %d (0x%x) is bogus\n", mode, mode); STARPU_ASSERT(mode != STARPU_SCRATCH && mode != STARPU_REDUX); struct _starpu_data_replicate *local_replicate; local_replicate = get_replicate(handle, mode, -1, node); _starpu_fetch_data_on_node(handle, node, local_replicate, mode, 0, task, STARPU_FETCH, 1, _starpu_fetch_nowhere_task_input_cb, wrapper, 0, "_starpu_fetch_nowhere_task_input"); } if (profiling && task->profiling_info) _starpu_clock_gettime(&task->profiling_info->acquire_data_end_time); /* Finished working with the task, release our reference */ _starpu_fetch_nowhere_task_input_cb(wrapper); } static void _starpu_fetch_nowhere_task_input_cb(void *arg) { /* One more transfer finished */ struct fetch_nowhere_wrapper *wrapper = arg; unsigned pending = STARPU_ATOMIC_ADD(&wrapper->pending, -1); ANNOTATE_HAPPENS_BEFORE(&wrapper->pending); if (pending == 0) { ANNOTATE_HAPPENS_AFTER(&wrapper->pending); /* Finished transferring, task is over */ struct _starpu_job *j = wrapper->j; free(wrapper); __starpu_push_task_output(j); _starpu_handle_job_termination(j); _STARPU_LOG_OUT_TAG("handle_job_termination"); } } /* NB : this value can only be an indication of the status of a data at some point, but there is no strong guarantee ! */ unsigned starpu_data_is_on_node(starpu_data_handle_t handle, unsigned node) { unsigned ret = 0; // XXX : this is just a hint, so we don't take the lock ... // STARPU_PTHREAD_SPIN_LOCK(&handle->header_lock); if (handle->per_node[node].state != STARPU_INVALID) { ret = 1; } else { unsigned i; unsigned nnodes = starpu_memory_nodes_get_count(); for (i = 0; i < nnodes; i++) { if (handle->per_node[node].request[i]) { ret = 1; break; } } } // STARPU_PTHREAD_SPIN_UNLOCK(&handle->header_lock); return ret; } /* Unmap the data from this node, e.g. before partitioning or unregistering */ void _starpu_data_unmap(starpu_data_handle_t handle, unsigned node) { struct _starpu_data_request *r = NULL; STARPU_ASSERT(handle); _starpu_spin_lock(&handle->header_lock); if (handle->per_node[node].mapped != STARPU_UNMAPPED) { r = _starpu_create_data_request(handle, &handle->per_node[handle->per_node[node].mapped], &handle->per_node[node], node, STARPU_UNMAP, 0, NULL, STARPU_FETCH, 0, 0, __func__); r->refcnt++; _starpu_post_data_request(r); } _starpu_spin_unlock(&handle->header_lock); if (r) _starpu_wait_data_request_completion(r, 1); } starpu-1.4.9+dfsg/src/datawizard/coherency.h000066400000000000000000000376431507764646700210760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __COHERENCY__H__ #define __COHERENCY__H__ /** @file */ #include #include #include #include #include #include #include #include #include #include #include #pragma GCC visibility push(hidden) enum _starpu_cache_state { STARPU_OWNER, STARPU_SHARED, STARPU_INVALID }; /** this should contain the information relative to a given data replicate */ struct _starpu_data_replicate { starpu_data_handle_t handle; /** describe the actual data layout, as manipulated by data interfaces in *_interface.c */ void *data_interface; /** How many requests or tasks are currently working with this replicate */ int refcnt; char memory_node; /** describes the state of the local data in term of coherency */ enum _starpu_cache_state state: 2; /** A buffer that is used for SCRATCH or reduction cannot be used with * filters. */ unsigned relaxed_coherency:2; /** We may need to initialize the replicate with some value before using it. */ unsigned initialized:1; /** is the data locally allocated ? */ unsigned allocated:1; /** was it automatically allocated ? (else it's the application-provided * buffer, don't ever try to free it!) */ /** perhaps the allocation was perform higher in the hierarchy * for now this is just translated into !automatically_allocated * */ unsigned automatically_allocated:1; /** is the write side enabled on the mapping? * This is important for drivers which may actually make a copy instead * of a map. * * Only meaningful when mapped != STARPU_UNMAPPED */ unsigned map_write:1; #define STARPU_UNMAPPED -1 /** >= 0 when the data just a mapping of a replicate from that memory node, * otherwise STARPU_UNMAPPED */ int mapped; /** To help the scheduling policies to make some decision, we may keep a track of the tasks that are likely to request this data on the current node. It is the responsibility of the scheduling _policy_ to set that flag when it assigns a task to a queue, policies which do not use this hint can simply ignore it. */ uint32_t requested; /** This tracks the list of requests to provide the value */ struct _starpu_data_request *request[STARPU_MAXNODES]; /** This points to the last entry of request, to easily append to the list */ struct _starpu_data_request *last_request[STARPU_MAXNODES]; /* Which request is loading data here */ struct _starpu_data_request *load_request; /** The number of prefetches that we made for this replicate for various tasks * This is also the number of tasks that we will wait to see use the mc before * we attempt to evict it. */ unsigned nb_tasks_prefetch; /** Pointer to memchunk for LRU strategy */ struct _starpu_mem_chunk * mc; }; struct _starpu_data_requester_prio_list; struct _starpu_jobid_list { unsigned long id; struct _starpu_jobid_list *next; }; /** This structure describes a simply-linked list of task */ struct _starpu_task_wrapper_list { struct starpu_task *task; struct _starpu_task_wrapper_list *next; }; /** This structure describes a doubly-linked list of task */ struct _starpu_task_wrapper_dlist { struct starpu_task *task; struct _starpu_task_wrapper_dlist *next; struct _starpu_task_wrapper_dlist *prev; }; extern int _starpu_has_not_important_data; typedef void (*_starpu_data_handle_unregister_hook)(starpu_data_handle_t); /** This is initialized in both _starpu_register_new_data and _starpu_data_partition */ struct _starpu_data_state { int magic; struct _starpu_data_requester_prio_list req_list; /** the number of requests currently in the scheduling engine (not in * the req_list anymore), i.e. the number of holders of the * current_mode rwlock */ unsigned refcnt; /** whether we are already unlocking data requests */ unsigned unlocking_reqs; /** Current access mode. Is always either STARPU_R, STARPU_W, * STARPU_SCRATCH or STARPU_REDUX, but never a combination such as * STARPU_RW. */ enum starpu_data_access_mode current_mode; /** protect meta data */ struct _starpu_spinlock header_lock; /** Condition to make application wait for all transfers before freeing handle */ /** busy_count is the number of handle->refcnt, handle->per_node[*]->refcnt, number of starpu_data_requesters, and number of tasks that have released it but are still registered on the implicit data dependency lists. */ /** Core code which releases busy_count has to call * _starpu_data_check_not_busy to let starpu_data_unregister proceed */ unsigned busy_count; /** Is starpu_data_unregister waiting for busy_count? */ unsigned busy_waiting; starpu_pthread_mutex_t busy_mutex; starpu_pthread_cond_t busy_cond; /** In case we user filters, the handle may describe a sub-data */ struct _starpu_data_state *root_handle; /** root of the tree */ struct _starpu_data_state *father_handle; /** father of the node, NULL if the current node is the root */ starpu_data_handle_t *active_children; /** The currently active set of read-write children */ unsigned active_nchildren; starpu_data_handle_t **active_readonly_children; /** The currently active set of read-only children */ unsigned *active_readonly_nchildren; /** Size of active_readonly_children[i] array */ unsigned nactive_readonly_children; /** Size of active_readonly_children and active_readonly_nchildren arrays. Actual use is given by 'partitioned' */ /** Our siblings in the father partitioning */ unsigned nsiblings; /** How many siblings */ starpu_data_handle_t *siblings; unsigned sibling_index; /** indicate which child this node is from the father's perspective (if any) */ unsigned depth; /** what's the depth of the tree ? */ #ifdef STARPU_BUBBLE starpu_pthread_mutex_t unpartition_mutex; #endif /** Synchronous partitioning */ starpu_data_handle_t children; unsigned nchildren; /** How many partition plans this handle has */ unsigned nplans; /** Switch codelet for asynchronous partitioning */ struct starpu_codelet *switch_cl; /** size of dyn_nodes recorded in switch_cl */ unsigned switch_cl_nparts; /** Whether a partition plan is currently submitted and the * corresponding unpartition has not been yet * * Or the number of partition plans currently submitted in readonly * mode. */ unsigned partitioned; /** Whether a partition plan is currently submitted in readonly mode */ unsigned part_readonly:1; /** Whether our father is currently partitioned into ourself */ unsigned active:1; unsigned active_ro:1; /** describe the state of the data in term of coherency * This is execution-time state. */ struct _starpu_data_replicate per_node[STARPU_MAXNODES]; struct _starpu_data_replicate *per_worker; struct starpu_data_interface_ops *ops; /** Footprint which identifies data layout */ uint32_t footprint; /* The following bitfields are set from the application initialization */ /** in some case, the application may explicitly tell StarPU that a * piece of data is not likely to be used soon again */ unsigned is_not_important:1; /** Can the data be pushed to the disk? */ unsigned ooc:1; /** Does StarPU have to enforce some implicit data-dependencies ? */ unsigned sequential_consistency:1; /** Whether we shall not ever write to this handle, thus allowing various optimizations */ unsigned readonly:1; /** where is the data home, i.e. which node it was registered from ? -1 if none yet */ int home_node; /** what is the default write-through mask for that data ? */ uint32_t wt_mask; /** for a readonly handle, the number of times that we have returned again the same handle and thus the number of times we have to ignore unregistration requests */ unsigned aliases; /** for a non-readonly handle, a readonly-only duplicate, that we can return from starpu_data_dup_ro */ starpu_data_handle_t readonly_dup; /** for a readonly handle, the non-readonly handle that is referencing is in its readonly_dup field. */ starpu_data_handle_t readonly_dup_of; /* The following bitfields are set from the application submission thread */ /** Is the data initialized, or a task is already submitted to initialize it * This is submission-time initialization state. */ unsigned initialized:1; #ifdef STARPU_OPENMP unsigned removed_from_context_hash:1; #endif /* The following field is set by StarPU at execution time */ /** Whether lazy unregistration was requested through starpu_data_unregister_submit */ unsigned char lazy_unregister; /** This lock should protect any operation to enforce * sequential_consistency */ starpu_pthread_mutex_t sequential_consistency_mutex; /** The last submitted task (or application data request) that declared * it would modify the piece of data ? Any task accessing the data in a * read-only mode should depend on that task implicitly if the * sequential_consistency flag is enabled. */ enum starpu_data_access_mode last_submitted_mode; struct starpu_task *last_sync_task; struct _starpu_task_wrapper_dlist last_submitted_accessors; /** If FxT is enabled, we keep track of "ghost dependencies": that is to * say the dependencies that are not needed anymore, but that should * appear in the post-mortem DAG. For instance if we have the sequence * f(Aw) g(Aw), and that g is submitted after the termination of f, we * want to have f->g appear in the DAG even if StarPU does not need to * enforce this dependency anymore.*/ unsigned last_submitted_ghost_sync_id_is_valid; unsigned long last_submitted_ghost_sync_id; struct _starpu_jobid_list *last_submitted_ghost_accessors_id; /** protected by sequential_consistency_mutex */ struct _starpu_task_wrapper_list *post_sync_tasks; unsigned post_sync_tasks_cnt; /* * Reductions */ /** During reduction we need some specific methods: redux_func performs * the reduction of an interface into another one (eg. "+="), and init_func * initializes the data interface to a default value that is stable by * reduction (eg. 0 for +=). */ struct starpu_codelet *redux_cl; struct starpu_codelet *init_cl; void *redux_cl_arg; void *init_cl_arg; /** Are we currently performing a reduction on that handle ? If so the * reduction_refcnt should be non null until there are pending tasks * that are performing the reduction. */ unsigned reduction_refcnt; /** List of requesters that are specific to the pending reduction. This * list is used when the requests in the req_list list are frozen until * the end of the reduction. */ struct _starpu_data_requester_prio_list reduction_req_list; starpu_data_handle_t *reduction_tmp_handles; /** Final request for write invalidation */ struct _starpu_data_request *write_invalidation_req; /** Used for MPI */ void *mpi_data; _starpu_memory_stats_t memory_stats; unsigned int mf_node; //XXX /** hook to be called when unregistering the data */ _starpu_data_handle_unregister_hook unregister_hook; struct starpu_arbiter *arbiter; /** This is protected by the arbiter mutex */ struct _starpu_data_requester_prio_list arbitered_req_list; /** Data maintained by schedulers themselves */ /** Last worker that took this data in locality mode, or -1 if nobody * took it yet */ int last_locality; /** Application-provided coordinates. The maximum dimension (5) is * relatively arbitrary. */ unsigned dimensions; int coordinates[5]; /** A generic pointer to data in the user land (could be anything and this * is not manage by StarPU) */ void *user_data; /** A generic pointer to data in the scheduler (could be anything and this * is managed by the scheduler) */ void *sched_data; }; /** This does not take a reference on the handle, the caller has to do it, * e.g. through _starpu_attempt_to_submit_data_request_from_apps() * detached means that the core is allowed to drop the request. The caller * should thus *not* take a reference since it can not know whether the request will complete * async means that _starpu_fetch_data_on_node will wait for completion of the request */ int _starpu_fetch_data_on_node(starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, unsigned detached, struct starpu_task *task, enum starpu_is_prefetch is_prefetch, unsigned async, void (*callback_func)(void *), void *callback_arg, int prio, const char *origin); /** This releases a reference on the handle */ void _starpu_release_data_on_node(struct _starpu_data_state *state, uint32_t default_wt_mask, enum starpu_data_access_mode down_to_mode, struct _starpu_data_replicate *replicate); void _starpu_update_data_state(starpu_data_handle_t handle, struct _starpu_data_replicate *requesting_replicate, enum starpu_data_access_mode mode); uint32_t _starpu_get_data_refcnt(struct _starpu_data_state *state, unsigned node); size_t _starpu_data_get_size(starpu_data_handle_t handle); size_t _starpu_data_get_alloc_size(starpu_data_handle_t handle); starpu_ssize_t _starpu_data_get_max_size(starpu_data_handle_t handle); uint32_t _starpu_data_get_footprint(starpu_data_handle_t handle); void __starpu_push_task_output(struct _starpu_job *j); /** Version with driver trace */ void _starpu_push_task_output(struct _starpu_job *j); struct _starpu_worker; STARPU_ATTRIBUTE_WARN_UNUSED_RESULT /** Fetch the data parameters for task \p task * Setting \p async to 1 allows to only start the fetches, and call * \p _starpu_fetch_task_input_tail later when the transfers are finished */ int _starpu_fetch_task_input(struct starpu_task *task, struct _starpu_job *j, int async); void _starpu_fetch_task_input_tail(struct starpu_task *task, struct _starpu_job *j, struct _starpu_worker *worker); void _starpu_fetch_nowhere_task_input(struct _starpu_job *j); int _starpu_select_src_node(struct _starpu_data_state *state, unsigned destination); int _starpu_determine_request_path(starpu_data_handle_t handle, int src_node, int dst_node, enum starpu_data_access_mode mode, int max_len, unsigned *src_nodes, unsigned *dst_nodes, unsigned *handling_nodes, unsigned write_invalidation); /** is_prefetch is whether the DSM may drop the request (when there is not enough memory for instance * async is whether the caller wants a reference on the last request, to be * able to wait for it (which will release that reference). */ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_handle_t handle, struct _starpu_data_replicate *dst_replicate, enum starpu_data_access_mode mode, struct starpu_task *task, enum starpu_is_prefetch is_prefetch, unsigned async, void (*callback_func)(void *), void *callback_arg, int prio, const char *origin); void _starpu_init_data_replicate(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, int workerid); void _starpu_data_start_reduction_mode(starpu_data_handle_t handle); void _starpu_data_end_reduction_mode(starpu_data_handle_t handle, int priority); void _starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle); void _starpu_data_unmap(starpu_data_handle_t handle, unsigned node); void _starpu_data_set_unregister_hook(starpu_data_handle_t handle, _starpu_data_handle_unregister_hook func) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; #pragma GCC visibility pop #endif // __COHERENCY__H__ starpu-1.4.9+dfsg/src/datawizard/copy_driver.c000066400000000000000000000674401507764646700214350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_SIMGRID #include #endif void _starpu_wake_all_blocked_workers_on_node(unsigned nodeid) { /* wake up all workers on that memory node */ struct _starpu_memory_node_descr * const descr = _starpu_memory_node_get_description(); const int cur_workerid = starpu_worker_get_id(); struct _starpu_worker *cur_worker = cur_workerid>=0?_starpu_get_worker_struct(cur_workerid):NULL; STARPU_PTHREAD_RWLOCK_RDLOCK(&descr->conditions_rwlock); unsigned nconds = descr->condition_count[nodeid]; unsigned cond_id; for (cond_id = 0; cond_id < nconds; cond_id++) { struct _starpu_cond_and_worker *condition; condition = &descr->conditions_attached_to_node[nodeid][cond_id]; if (condition->worker == cur_worker) { if (condition->cond == &condition->worker->sched_cond) { condition->worker->state_keep_awake = 1; } /* No need to wake myself, and I might be called from * the scheduler with mutex locked, through * starpu_prefetch_task_input_on_node */ continue; } /* wake anybody waiting on that condition */ STARPU_PTHREAD_MUTEX_LOCK_SCHED(&condition->worker->sched_mutex); if (condition->cond == &condition->worker->sched_cond) { condition->worker->state_keep_awake = 1; } STARPU_PTHREAD_COND_BROADCAST(condition->cond); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&condition->worker->sched_mutex); } STARPU_PTHREAD_RWLOCK_UNLOCK(&descr->conditions_rwlock); #ifdef STARPU_SIMGRID starpu_pthread_queue_broadcast(&_starpu_simgrid_transfer_queue[nodeid]); #endif } void starpu_wake_all_blocked_workers(void) { /* workers may be blocked on the various queues' conditions */ struct _starpu_memory_node_descr * const descr = _starpu_memory_node_get_description(); const int cur_workerid = starpu_worker_get_id(); struct _starpu_worker *cur_worker = cur_workerid>=0?_starpu_get_worker_struct(cur_workerid):NULL; STARPU_PTHREAD_RWLOCK_RDLOCK(&descr->conditions_rwlock); unsigned nconds = descr->total_condition_count; unsigned cond_id; for (cond_id = 0; cond_id < nconds; cond_id++) { struct _starpu_cond_and_worker *condition; condition = &descr->conditions_all[cond_id]; if (condition->worker == cur_worker) { if (condition->cond == &condition->worker->sched_cond) { condition->worker->state_keep_awake = 1; } /* No need to wake myself, and I might be called from * the scheduler with mutex locked, through * starpu_prefetch_task_input_on_node */ continue; } /* wake anybody waiting on that condition */ STARPU_PTHREAD_MUTEX_LOCK_SCHED(&condition->worker->sched_mutex); if (condition->cond == &condition->worker->sched_cond) { condition->worker->state_keep_awake = 1; } STARPU_PTHREAD_COND_BROADCAST(condition->cond); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&condition->worker->sched_mutex); } STARPU_PTHREAD_RWLOCK_UNLOCK(&descr->conditions_rwlock); #ifdef STARPU_SIMGRID unsigned workerid, nodeid; for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) starpu_pthread_queue_broadcast(&_starpu_simgrid_task_queue[workerid]); for (nodeid = 0; nodeid < starpu_memory_nodes_get_count(); nodeid++) starpu_pthread_queue_broadcast(&_starpu_simgrid_transfer_queue[nodeid]); #endif } #ifdef STARPU_USE_FXT /* we need to identify each communication so that we can match the beginning * and the end of a communication in the trace, so we use a unique identifier * per communication */ static unsigned long communication_cnt = 0; #endif int _starpu_copy_interface_any_to_any(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); int ret = 0; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_copy_disabled_for(src_kind) || starpu_asynchronous_copy_disabled_for(dst_kind) || !copy_methods->any_to_any) { /* this is not associated to a request so it's synchronous */ STARPU_ASSERT_MSG(copy_methods->any_to_any, "Interface <%s> does not define copy_methods->any_to_any", handle->ops->name); copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { if (dst_kind == STARPU_CPU_RAM) req->async_channel.node_ops = starpu_memory_driver_info[src_kind].ops; else req->async_channel.node_ops = starpu_memory_driver_info[dst_kind].ops; STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); } return ret; } static int copy_data_1_to_1_generic(starpu_data_handle_t handle, struct _starpu_data_replicate *src_replicate, struct _starpu_data_replicate *dst_replicate, struct _starpu_data_request *req) { unsigned src_node = (unsigned)src_replicate->memory_node; unsigned dst_node = (unsigned)dst_replicate->memory_node; STARPU_ASSERT(src_replicate->refcnt); STARPU_ASSERT(dst_replicate->refcnt); STARPU_ASSERT(src_replicate->allocated); STARPU_ASSERT(dst_replicate->allocated); #ifdef STARPU_SIMGRID if (src_node == STARPU_MAIN_RAM || dst_node == STARPU_MAIN_RAM) _starpu_simgrid_data_transfer(handle->ops->get_size(handle), src_node, dst_node); return _starpu_simgrid_transfer(handle->ops->get_size(handle), src_node, dst_node, req); #else /* !SIMGRID */ enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); void *src_interface = src_replicate->data_interface; void *dst_interface = dst_replicate->data_interface; const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); if (src_node_ops && src_node_ops->copy_interface_to[dst_kind]) { return src_node_ops->copy_interface_to[dst_kind](handle, src_interface, src_node, dst_interface, dst_node, req); } else if (dst_node_ops && dst_node_ops->copy_interface_from[src_kind]) { return dst_node_ops->copy_interface_from[src_kind](handle, src_interface, src_node, dst_interface, dst_node, req); } else { STARPU_ABORT_MSG("No copy_interface_to function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); } #endif /* !SIMGRID */ } static int update_map_generic(starpu_data_handle_t handle, struct _starpu_data_replicate *src_replicate, struct _starpu_data_replicate *dst_replicate, struct _starpu_data_request *req STARPU_ATTRIBUTE_UNUSED) { int src_node = src_replicate->memory_node; int dst_node = dst_replicate->memory_node; STARPU_ASSERT(src_replicate->refcnt); STARPU_ASSERT(dst_replicate->refcnt); STARPU_ASSERT((src_replicate->mapped == dst_node && dst_replicate->allocated) ||(src_replicate->allocated && dst_replicate->mapped == src_node)); void *src_interface = src_replicate->data_interface; void *dst_interface = dst_replicate->data_interface; handle->ops->update_map(src_interface, src_node, dst_interface, dst_node); return 0; } int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_data_handle_t handle, struct _starpu_data_replicate *src_replicate, struct _starpu_data_replicate *dst_replicate, unsigned donotread, struct _starpu_data_request *req, enum _starpu_may_alloc may_alloc, enum starpu_is_prefetch prefetch STARPU_ATTRIBUTE_UNUSED) { if (!donotread) { STARPU_ASSERT(src_replicate->allocated || src_replicate->mapped != STARPU_UNMAPPED); STARPU_ASSERT(src_replicate->refcnt); } unsigned src_node = src_replicate->memory_node; unsigned dst_node = dst_replicate->memory_node; if (!dst_replicate->allocated && dst_replicate->mapped == STARPU_UNMAPPED && dst_node != src_node && handle->ops->map_data && (_starpu_memory_node_get_mapped(dst_replicate->memory_node) /* || handle wants it */)) { /* Memory node which can just map the main memory, try to map. */ if (!handle->ops->map_data( src_replicate->data_interface, src_replicate->memory_node, dst_replicate->data_interface, dst_replicate->memory_node)) { dst_replicate->mapped = src_node; if (_starpu_node_needs_map_update(dst_node)) { /* Driver porters: adding your driver here is optional, it is only needed when implementing support for memory mapping */ switch (starpu_node_get_kind(dst_node)) { case STARPU_OPENCL_RAM: /* OpenCL mappings write access defaults to the device */ dst_replicate->map_write = 1; break; case STARPU_CUDA_RAM: dst_replicate->map_write = 0; break; case STARPU_CPU_RAM: default: /* Should not happen */ STARPU_ABORT(); break; } } } } /* first make sure the destination has an allocated buffer */ if (!dst_replicate->allocated && dst_replicate->mapped == STARPU_UNMAPPED) { if (may_alloc==_STARPU_DATAWIZARD_DO_NOT_ALLOC || _starpu_is_reclaiming(dst_node)) /* We're not supposed to allocate there at the moment */ return -ENOMEM; int ret_alloc = _starpu_allocate_memory_on_node(handle, dst_replicate, prefetch, may_alloc==_STARPU_DATAWIZARD_ONLY_FAST_ALLOC); if (ret_alloc) return -ENOMEM; } STARPU_ASSERT(dst_replicate->allocated || dst_replicate->mapped != STARPU_UNMAPPED); STARPU_ASSERT(dst_replicate->refcnt); /* In the case of a mapped data, we are here requested either * - because the destination will write to it, and thus needs write * access. * - because the source was modified, and the destination needs to get * updated. * All in all, any data change will actually trigger both. */ if (!donotread && dst_replicate->mapped != STARPU_UNMAPPED) { STARPU_ASSERT(src_replicate->memory_node == dst_replicate->mapped); if (_starpu_node_needs_map_update(dst_node)) { /* We need to flush from RAM to the device */ if (!dst_replicate->map_write) { update_map_generic(handle, src_replicate, dst_replicate, req); dst_replicate->map_write = 1; } } dst_replicate->initialized = 1; } else if (!donotread && src_replicate->mapped != STARPU_UNMAPPED) { STARPU_ASSERT(dst_replicate->memory_node == src_replicate->mapped); if (_starpu_node_needs_map_update(src_node)) { /* We need to flush from the device to the RAM */ if (src_replicate->map_write) { update_map_generic(handle, src_replicate, dst_replicate, req); src_replicate->map_write = 0; } } dst_replicate->initialized = 1; } /* if there is no need to actually read the data, * we do not perform any transfer */ else if (!donotread) { unsigned long STARPU_ATTRIBUTE_UNUSED com_id = 0; size_t size = _starpu_data_get_size(handle); _starpu_bus_update_profiling_info((int)src_node, (int)dst_node, size); #ifdef STARPU_USE_FXT if (fut_active) { com_id = STARPU_ATOMIC_ADDL(&communication_cnt, 1); if (req) req->com_id = com_id; } #endif dst_replicate->initialized = 1; _STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch, handle); int ret_copy = copy_data_1_to_1_generic(handle, src_replicate, dst_replicate, req); if (!req) /* Synchronous, this is already finished */ _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch); return ret_copy; } return 0; } void starpu_interface_data_copy(unsigned src_node, unsigned dst_node, size_t size) { _STARPU_TRACE_DATA_COPY(src_node, dst_node, size); } void starpu_interface_start_driver_copy_async(unsigned src_node, unsigned dst_node, double *start) { *start = starpu_timing_now(); _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node); } void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node, double start) { double end = starpu_timing_now(); double elapsed = end - start; if (elapsed > 300) { static int warned = 0; STARPU_HG_DISABLE_CHECKING(warned); if (!warned) { char src_name[16], dst_name[16]; warned = 1; starpu_memory_node_get_name(src_node, src_name, sizeof(src_name)); starpu_memory_node_get_name(dst_node, dst_name, sizeof(dst_name)); _STARPU_DISP("Warning: the submission of asynchronous transfer from %s to %s took a very long time (%f ms)\nFor proper asynchronous transfer overlapping, data registered to StarPU must be allocated with starpu_malloc() or pinned with starpu_memory_pin()\n", src_name, dst_name, elapsed / 1000.); } } _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node); } /* This can be used by interfaces to easily transfer a piece of data without * caring about the particular transfer methods. */ /* This should either return 0 if the transfer is complete, or -EAGAIN if the * transfer is still pending, and will have to be waited for by * _starpu_driver_test_request_completion/_starpu_driver_wait_request_completion */ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, void *async_data) { struct _starpu_async_channel *async_channel = async_data; enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); if (src_node_ops && src_node_ops->copy_data_to[dst_kind]) { return src_node_ops->copy_data_to[dst_kind](src, src_offset, src_node, dst, dst_offset, dst_node, size, async_channel); } else if (dst_node_ops && dst_node_ops->copy_data_from[src_kind]) { return dst_node_ops->copy_data_from[src_kind](src, src_offset, src_node, dst, dst_offset, dst_node, size, async_channel); } else { STARPU_ABORT_MSG("No copy_data_to function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); return -1; } } int starpu_interface_copy2d(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, void *async_data) { int ret = 0; unsigned i; struct _starpu_async_channel *async_channel = async_data; enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); STARPU_ASSERT_MSG(ld_src >= blocksize, "block size %lu is bigger than ld %lu in source", (unsigned long) blocksize, (unsigned long) ld_src); STARPU_ASSERT_MSG(ld_dst >= blocksize, "block size %lu is bigger than ld %lu in destination", (unsigned long) blocksize, (unsigned long) ld_dst); if (ld_src == blocksize && ld_dst == blocksize) /* Optimize contiguous case */ return starpu_interface_copy(src, src_offset, src_node, dst, dst_offset, dst_node, blocksize * numblocks, async_data); if (src_node_ops && src_node_ops->copy2d_data_to[dst_kind]) /* Hardware-optimized non-contiguous case */ return src_node_ops->copy2d_data_to[dst_kind](src, src_offset, src_node, dst, dst_offset, dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel); if (dst_node_ops && dst_node_ops->copy2d_data_from[src_kind]) /* Hardware-optimized non-contiguous case */ return dst_node_ops->copy2d_data_from[src_kind](src, src_offset, src_node, dst, dst_offset, dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel); for (i = 0; i < numblocks; i++) { if (starpu_interface_copy(src, src_offset + i*ld_src, src_node, dst, dst_offset + i*ld_dst, dst_node, blocksize, async_data)) ret = -EAGAIN; } return ret; } int starpu_interface_copy3d(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, void *async_data) { int ret = 0; unsigned i; struct _starpu_async_channel *async_channel = async_data; enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); STARPU_ASSERT_MSG(ld1_src >= blocksize, "block size %lu is bigger than ld %lu in source", (unsigned long) blocksize, (unsigned long) ld1_src); STARPU_ASSERT_MSG(ld1_dst >= blocksize, "block size %lu is bigger than ld %lu in destination", (unsigned long) blocksize, (unsigned long) ld1_dst); STARPU_ASSERT_MSG(ld2_src >= numblocks_1 * ld1_src, "block group size %lu is bigger than group ld %lu in source", (unsigned long) (numblocks_1 * ld1_src), (unsigned long) ld2_src); STARPU_ASSERT_MSG(ld2_dst >= numblocks_1 * ld1_dst, "block group size %lu is bigger than group ld %lu in destination", (unsigned long) (numblocks_1 * ld1_dst), (unsigned long) ld2_dst); if (ld2_src == blocksize * numblocks_1 && ld2_dst == blocksize * numblocks_1) /* Optimize contiguous case */ return starpu_interface_copy(src, src_offset, src_node, dst, dst_offset, dst_node, blocksize * numblocks_1 * numblocks_2, async_data); if (src_node_ops && src_node_ops->copy3d_data_to[dst_kind]) /* Hardware-optimized non-contiguous case */ return src_node_ops->copy3d_data_to[dst_kind](src, src_offset, src_node, dst, dst_offset, dst_node, blocksize, numblocks_1, ld1_src, ld1_dst, numblocks_2, ld2_src, ld2_dst, async_channel); if (dst_node_ops && dst_node_ops->copy3d_data_from[src_kind]) /* Hardware-optimized non-contiguous case */ return dst_node_ops->copy3d_data_from[src_kind](src, src_offset, src_node, dst, dst_offset, dst_node, blocksize, numblocks_1, ld1_src, ld1_dst, numblocks_2, ld2_src, ld2_dst, async_channel); for (i = 0; i < numblocks_2; i++) { if (starpu_interface_copy2d(src, src_offset + i*ld2_src, src_node, dst, dst_offset + i*ld2_dst, dst_node, blocksize, numblocks_1, ld1_src, ld1_dst, async_data)) ret = -EAGAIN; } return ret; } int starpu_interface_copy4d(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, size_t numblocks_3, size_t ld3_src, size_t ld3_dst, void *async_data) { int ret = 0; unsigned i; STARPU_ASSERT_MSG(ld1_src >= blocksize, "block size %lu is bigger than ld %lu in source", (unsigned long) blocksize, (unsigned long) ld1_src); STARPU_ASSERT_MSG(ld1_dst >= blocksize, "block size %lu is bigger than ld %lu in destination", (unsigned long) blocksize, (unsigned long) ld1_dst); STARPU_ASSERT_MSG(ld2_src >= numblocks_1 * ld1_src, "block group size %lu is bigger than group ld %lu in source", (unsigned long) (numblocks_1 * ld1_src), (unsigned long) ld2_src); STARPU_ASSERT_MSG(ld2_dst >= numblocks_1 * ld1_dst, "block group size %lu is bigger than group ld %lu in destination", (unsigned long) (numblocks_1 * ld1_dst), (unsigned long) ld2_dst); STARPU_ASSERT_MSG(ld3_src >= numblocks_2 * ld2_src, "block group group size %lu is bigger than group group ld %lu in source", (unsigned long) (numblocks_2 * ld2_src), (unsigned long) ld3_src); STARPU_ASSERT_MSG(ld3_dst >= numblocks_2 * ld2_dst, "block group group size %lu is bigger than group group ld %lu in destination", (unsigned long) (numblocks_2 * ld2_dst), (unsigned long) ld3_dst); if (ld3_src == blocksize * numblocks_1 * numblocks_2 && ld3_dst == blocksize * numblocks_1 * numblocks_2) /* Optimize contiguous case */ return starpu_interface_copy(src, src_offset, src_node, dst, dst_offset, dst_node, blocksize * numblocks_1 * numblocks_2 * numblocks_3, async_data); /* Probably won't ever have a 4D interface in drivers :) */ for (i = 0; i < numblocks_3; i++) { if (starpu_interface_copy3d(src, src_offset + i*ld3_src, src_node, dst, dst_offset + i*ld3_dst, dst_node, blocksize, numblocks_1, ld1_src, ld1_dst, numblocks_2, ld2_src, ld2_dst, async_data)) ret = -EAGAIN; } return ret; } uintptr_t starpu_interface_map(uintptr_t src, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret) { enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(dst_node); if (node_ops && node_ops->map[src_kind]) { return node_ops->map[src_kind](src, src_offset, src_node, dst_node, size, ret); } else { *ret = -EIO; return 0; } } int starpu_interface_unmap(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size) { enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(dst_node); if (node_ops && node_ops->unmap[src_kind]) { return node_ops->unmap[src_kind](src, src_offset, src_node, dst, dst_node, size); } else { STARPU_ABORT_MSG("No unmap function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); return -1; } } int starpu_interface_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) { enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); const struct _starpu_node_ops *src_node_ops = _starpu_memory_node_get_node_ops(src_node); const struct _starpu_node_ops *dst_node_ops = _starpu_memory_node_get_node_ops(dst_node); if (src_node_ops && src_node_ops->update_map[dst_kind]) { return src_node_ops->update_map[dst_kind](src, src_offset, src_node, dst, dst_offset, dst_node, size); } else if (dst_node_ops && dst_node_ops->update_map[src_kind]) { return dst_node_ops->update_map[src_kind](src, src_offset, src_node, dst, dst_offset, dst_node, size); } else { STARPU_ABORT_MSG("No unmap function defined from node %s to node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(src_node)), _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); return -1; } } static size_t _get_size(uint32_t* nn, size_t ndim) { size_t size = 1; unsigned i; for (i=0; i 0) { for (i = 0; i < ndim-1; i++) { STARPU_ASSERT_MSG(ldn_src[i+1] >= nn[i] * ldn_src[i], "block size %lu is bigger than ld %lu in source", (unsigned long) nn[i] * ldn_src[i], (unsigned long) ldn_src[i+1]); STARPU_ASSERT_MSG(ldn_dst[i+1] >= nn[i] * ldn_dst[i], "block size %lu is bigger than ld %lu in destination", (unsigned long) nn[i] * ldn_dst[i], (unsigned long) ldn_dst[i+1]); } if (ldn_src[ndim-1] == _get_size(nn, ndim-1) && ldn_dst[ndim-1] == _get_size(nn, ndim-1)) /* Optimize contiguous case */ return starpu_interface_copy(src, src_offset, src_node, dst, dst_offset, dst_node, _get_size(nn, ndim) * elemsize, async_data); } if(ndim > 4) { for (i = 0; i < nn[ndim-1]; i++) { if (starpu_interface_copynd(src, src_offset + i*ldn_src[ndim-1]*elemsize, src_node, dst, dst_offset + i*ldn_dst[ndim-1]*elemsize, dst_node, elemsize, ndim-1, nn, ldn_src, ldn_dst, async_data)) ret = -EAGAIN; } } else if(ndim == 4) { return starpu_interface_copy4d(src, src_offset, src_node, dst, dst_offset, dst_node, nn[0] * elemsize, nn[1], ldn_src[1] * elemsize, ldn_dst[1] * elemsize, nn[2], ldn_src[2] * elemsize, ldn_dst[2] * elemsize, nn[3], ldn_src[3] * elemsize, ldn_dst[3] * elemsize, async_data); } else if(ndim == 3) { return starpu_interface_copy3d(src, src_offset, src_node, dst, dst_offset, dst_node, nn[0] * elemsize, nn[1], ldn_src[1] * elemsize, ldn_dst[1] * elemsize, nn[2], ldn_src[2] * elemsize, ldn_dst[2] * elemsize, async_data); } else if(ndim == 2) { return starpu_interface_copy2d(src, src_offset, src_node, dst, dst_offset, dst_node, nn[0] * elemsize, nn[1], ldn_src[1] * elemsize, ldn_dst[1] * elemsize, async_data); } else if (ndim == 1) { return starpu_interface_copy(src, src_offset, src_node, dst, dst_offset, dst_node, nn[0] * elemsize, async_data); } else if (ndim == 0) { return starpu_interface_copy(src, 0, src_node, dst, 0, dst_node, elemsize, async_data); } return ret; } /* Only used at starpu_shutdown */ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_channel) { #ifdef STARPU_SIMGRID _starpu_simgrid_wait_transfer_event(&async_channel->event); #else /* !SIMGRID */ const struct _starpu_node_ops *node_ops = async_channel->node_ops; if (node_ops && node_ops->wait_request_completion != NULL) { node_ops->wait_request_completion(async_channel); } else { STARPU_ABORT_MSG("No wait_request_completion function defined for node %s\n", node_ops?node_ops->name:"unknown"); } #endif /* !SIMGRID */ } unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *async_channel) { #ifdef STARPU_SIMGRID return _starpu_simgrid_test_transfer_event(&async_channel->event); #else /* !SIMGRID */ const struct _starpu_node_ops *node_ops = async_channel->node_ops; if (node_ops && node_ops->test_request_completion != NULL) { return node_ops->test_request_completion(async_channel); } else { STARPU_ABORT_MSG("No test_request_completion function defined for node %s\n", node_ops?node_ops->name:"unknown"); } #endif /* !SIMGRID */ } starpu-1.4.9+dfsg/src/datawizard/copy_driver.h000066400000000000000000000056551507764646700214420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __COPY_DRIVER_H__ #define __COPY_DRIVER_H__ /** @file */ #ifdef HAVE_AIO_H #include #endif #include #include #pragma GCC visibility push(hidden) #ifdef __cplusplus extern "C" { #endif struct _starpu_data_request; struct _starpu_data_replicate; enum _starpu_may_alloc { _STARPU_DATAWIZARD_DO_NOT_ALLOC, _STARPU_DATAWIZARD_DO_ALLOC, _STARPU_DATAWIZARD_ONLY_FAST_ALLOC }; LIST_TYPE(_starpu_disk_backend_event, void *backend_event; ); struct _starpu_disk_event { unsigned memory_node; unsigned node; struct _starpu_disk_backend_event_list * requests; void * ptr; size_t size; starpu_data_handle_t handle; }; /** this is a structure that can be queried to see whether an asynchronous * transfer has terminated or not */ union _starpu_async_channel_event { char data[40]; }; struct _starpu_async_channel { union _starpu_async_channel_event event; const struct _starpu_node_ops *node_ops; /** Which node to polling when needing ACK msg */ struct _starpu_mp_node *polling_node_sender; struct _starpu_mp_node *polling_node_receiver; /** Used to know if the acknowlegdment msg is arrived from sinks */ volatile int starpu_mp_common_finished_sender; volatile int starpu_mp_common_finished_receiver; }; void _starpu_wake_all_blocked_workers_on_node(unsigned nodeid); int _starpu_driver_copy_data_1_to_1(starpu_data_handle_t handle, struct _starpu_data_replicate *src_replicate, struct _starpu_data_replicate *dst_replicate, unsigned donotread, struct _starpu_data_request *req, enum _starpu_may_alloc may_alloc, enum starpu_is_prefetch prefetch); int _starpu_copy_interface_any_to_any(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req); /* Just test for request completion */ unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *async_channel); /* Wait for request completion. Only used at starpu_shutdown */ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_channel); #ifdef __cplusplus } #endif #pragma GCC visibility pop #endif // __COPY_DRIVER_H__ starpu-1.4.9+dfsg/src/datawizard/data_request.c000066400000000000000000001022571507764646700215650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2018,2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include void _starpu_init_data_request_lists(void) { unsigned i, j; enum _starpu_data_request_inout k; for (i = 0; i < STARPU_MAXNODES; i++) { struct _starpu_node *node = _starpu_get_node_struct(i); for (j = 0; j < STARPU_MAXNODES; j++) { for (k = _STARPU_DATA_REQUEST_IN; k <= _STARPU_DATA_REQUEST_OUT; k++) { _starpu_data_request_prio_list_init(&node->data_requests[j][k]); _starpu_data_request_prio_list_init(&node->prefetch_requests[j][k]); _starpu_data_request_prio_list_init(&node->idle_requests[j][k]); #ifndef STARPU_DEBUG /* Tell helgrind that we are fine with checking for list_empty * in _starpu_handle_node_data_requests, we will call it * periodically anyway */ STARPU_HG_DISABLE_CHECKING(node->data_requests[j][k].tree.root); STARPU_HG_DISABLE_CHECKING(node->prefetch_requests[j][k].tree.root); STARPU_HG_DISABLE_CHECKING(node->idle_requests[j][k].tree.root); #endif _starpu_data_request_prio_list_init(&node->data_requests_pending[j][k]); node->data_requests_npending[j][k] = 0; STARPU_PTHREAD_MUTEX_INIT(&node->data_requests_list_mutex[j][k], NULL); STARPU_PTHREAD_MUTEX_INIT(&node->data_requests_pending_list_mutex[j][k], NULL); } } STARPU_HG_DISABLE_CHECKING(node->data_requests_npending); } } void _starpu_deinit_data_request_lists(void) { unsigned i, j; enum _starpu_data_request_inout k; for (i = 0; i < STARPU_MAXNODES; i++) { struct _starpu_node *node = _starpu_get_node_struct(i); for (j = 0; j < STARPU_MAXNODES; j++) { for (k = _STARPU_DATA_REQUEST_IN; k <= _STARPU_DATA_REQUEST_OUT; k++) { _starpu_data_request_prio_list_deinit(&node->data_requests[j][k]); _starpu_data_request_prio_list_deinit(&node->prefetch_requests[j][k]); _starpu_data_request_prio_list_deinit(&node->idle_requests[j][k]); _starpu_data_request_prio_list_deinit(&node->data_requests_pending[j][k]); STARPU_PTHREAD_MUTEX_DESTROY(&node->data_requests_pending_list_mutex[j][k]); STARPU_PTHREAD_MUTEX_DESTROY(&node->data_requests_list_mutex[j][k]); } } } } /* Unlink the request from the handle. New requests can then be made. */ /* this should be called with the lock r->handle->header_lock taken */ static void _starpu_data_request_unlink(struct _starpu_data_request *r) { _starpu_spin_checklocked(&r->handle->header_lock); /* If this is a write invalidation request, we store it in the handle */ if (r->handle->write_invalidation_req == r) { STARPU_ASSERT(r->mode == STARPU_W); r->handle->write_invalidation_req = NULL; } else { unsigned node; struct _starpu_data_request **prevp, *prev; if (r->mode & STARPU_R) /* If this is a read request, we store the pending requests * between src and dst. */ node = r->src_replicate->memory_node; else /* If this is a write only request, then there is no source and * we use the destination node to cache the request. */ node = r->dst_replicate->memory_node; /* Look for ourself in the list, we should be not very far. */ for (prevp = &r->dst_replicate->request[node], prev = NULL; *prevp && *prevp != r; prev = *prevp, prevp = &prev->next_same_req) ; STARPU_ASSERT(*prevp == r); *prevp = r->next_same_req; if (!r->next_same_req) { /* I was last */ STARPU_ASSERT(r->dst_replicate->last_request[node] == r); if (prev) r->dst_replicate->last_request[node] = prev; else r->dst_replicate->last_request[node] = NULL; } } } static void _starpu_data_request_destroy(struct _starpu_data_request *r) { //fprintf(stderr, "DESTROY REQ %p (%d) refcnt %d\n", r, node, r->refcnt); _starpu_data_request_delete(r); } /* handle->lock should already be taken ! */ struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t handle, struct _starpu_data_replicate *src_replicate, struct _starpu_data_replicate *dst_replicate, int handling_node, enum starpu_data_access_mode mode, unsigned ndeps, struct starpu_task *task, enum starpu_is_prefetch is_prefetch, int prio, unsigned is_write_invalidation, const char *origin) { struct _starpu_data_request *r = _starpu_data_request_new(); _starpu_spin_checklocked(&handle->header_lock); _starpu_spin_init(&r->lock); _STARPU_TRACE_DATA_REQUEST_CREATED(handle, src_replicate?src_replicate->memory_node:-1, dst_replicate?dst_replicate->memory_node:-1, prio, is_prefetch, r); r->origin = origin; r->handle = handle; r->src_replicate = src_replicate; r->dst_replicate = dst_replicate; r->mode = mode; r->async_channel.node_ops = NULL; r->async_channel.starpu_mp_common_finished_sender = 0; r->async_channel.starpu_mp_common_finished_receiver = 0; r->async_channel.polling_node_sender = NULL; r->async_channel.polling_node_receiver = NULL; memset(&r->async_channel.event, 0, sizeof(r->async_channel.event)); if (handling_node == -1) handling_node = STARPU_MAIN_RAM; r->handling_node = handling_node; if (is_write_invalidation) { r->peer_node = handling_node; r->inout = _STARPU_DATA_REQUEST_IN; } else if (dst_replicate->memory_node == handling_node) { if (src_replicate) r->peer_node = src_replicate->memory_node; else r->peer_node = handling_node; r->inout = _STARPU_DATA_REQUEST_IN; } else { r->peer_node = dst_replicate->memory_node; r->inout = _STARPU_DATA_REQUEST_OUT; } STARPU_ASSERT(starpu_node_get_kind(handling_node) == STARPU_CPU_RAM || _starpu_memory_node_get_nworkers(handling_node)); r->completed = 0; r->added_ref = 0; r->canceled = 0; r->prefetch = is_prefetch; r->task = task; r->nb_tasks_prefetch = 0; r->prio = prio; r->retval = -1; r->ndeps = ndeps; r->next_same_req = NULL; r->next_req_count = 0; r->callbacks = NULL; r->com_id = 0; _starpu_spin_lock(&r->lock); /* For a fetch, take a reference as soon as now on the target, to avoid * replicate eviction */ if (is_prefetch == STARPU_FETCH && dst_replicate) { r->added_ref = 1; dst_replicate->refcnt++; } handle->busy_count++; if (is_write_invalidation) { STARPU_ASSERT(!handle->write_invalidation_req); handle->write_invalidation_req = r; } else { unsigned node; if (mode & STARPU_R) node = src_replicate->memory_node; else node = dst_replicate->memory_node; if (!dst_replicate->request[node]) dst_replicate->request[node] = r; else dst_replicate->last_request[node]->next_same_req = r; dst_replicate->last_request[node] = r; if (mode & STARPU_R) { /* Take a reference on the source for the request to be * able to read it */ src_replicate->refcnt++; handle->busy_count++; } } r->refcnt = 1; _starpu_spin_unlock(&r->lock); return r; } int _starpu_wait_data_request_completion(struct _starpu_data_request *r, enum _starpu_may_alloc may_alloc) { int retval; int do_delete = 0; int completed; #ifdef STARPU_SIMGRID unsigned local_node = starpu_worker_get_local_memory_node(); starpu_pthread_wait_t wait; starpu_pthread_wait_init(&wait); /* We need to get woken both when requests finish on our node, and on * the target node of the request we are waiting for */ starpu_pthread_queue_register(&wait, &_starpu_simgrid_transfer_queue[local_node]); starpu_pthread_queue_register(&wait, &_starpu_simgrid_transfer_queue[(unsigned) r->dst_replicate->memory_node]); #endif struct _starpu_worker *worker = _starpu_get_local_worker_key(); enum _starpu_worker_status old_status = STATUS_UNKNOWN; if (worker) { old_status = worker->status; if (!(old_status & STATUS_WAITING)) _starpu_add_worker_status(worker, STATUS_INDEX_WAITING, NULL); } do { #ifdef STARPU_SIMGRID starpu_pthread_wait_reset(&wait); #endif STARPU_SYNCHRONIZE(); if (STARPU_RUNNING_ON_VALGRIND) completed = 1; else completed = r->completed; if (completed) { _starpu_spin_lock(&r->lock); if (r->completed) break; _starpu_spin_unlock(&r->lock); } #ifndef STARPU_SIMGRID #ifndef STARPU_NON_BLOCKING_DRIVERS /* XXX: shouldn't be needed, and doesn't work with chained requests anyway */ _starpu_wake_all_blocked_workers_on_node(r->handling_node); #endif #endif _starpu_datawizard_progress(may_alloc); #ifdef STARPU_SIMGRID starpu_pthread_wait_wait(&wait); #endif } while (1); if (worker) { if (!(old_status & STATUS_WAITING)) _starpu_clear_worker_status(worker, STATUS_INDEX_WAITING, NULL); } #ifdef STARPU_SIMGRID starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_transfer_queue[local_node]); starpu_pthread_queue_unregister(&wait, &_starpu_simgrid_transfer_queue[(unsigned) r->dst_replicate->memory_node]); starpu_pthread_wait_destroy(&wait); #endif retval = r->retval; if (retval) _STARPU_DISP("REQUEST %p completed with retval %d!\n", r, r->retval); r->refcnt--; /* if nobody is waiting on that request, we can get rid of it */ if (r->refcnt == 0) do_delete = 1; _starpu_spin_unlock(&r->lock); if (do_delete) _starpu_data_request_destroy(r); return retval; } /* this is non blocking */ void _starpu_post_data_request(struct _starpu_data_request *r) { unsigned handling_node = r->handling_node; STARPU_ASSERT(starpu_node_get_kind(handling_node) == STARPU_CPU_RAM || _starpu_memory_node_get_nworkers(handling_node)); // _STARPU_DEBUG("POST REQUEST\n"); /* If some dependencies are not fulfilled yet, we don't actually post the request */ if (r->ndeps > 0) return; struct _starpu_node *node_struct = _starpu_get_node_struct(handling_node); if (r->mode & STARPU_R) { STARPU_ASSERT(r->src_replicate->allocated || r->src_replicate->mapped != STARPU_UNMAPPED); STARPU_ASSERT(r->src_replicate->refcnt); } /* insert the request in the proper list */ STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[r->peer_node][r->inout]); if (r->prefetch >= STARPU_IDLEFETCH) _starpu_data_request_prio_list_push_back(&node_struct->idle_requests[r->peer_node][r->inout], r); else if (r->prefetch > STARPU_FETCH) _starpu_data_request_prio_list_push_back(&node_struct->prefetch_requests[r->peer_node][r->inout], r); else _starpu_data_request_prio_list_push_back(&node_struct->data_requests[r->peer_node][r->inout], r); STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[r->peer_node][r->inout]); #ifndef STARPU_NON_BLOCKING_DRIVERS _starpu_wake_all_blocked_workers_on_node(handling_node); #endif } /* We assume that r->lock is taken by the caller */ void _starpu_data_request_append_callback(struct _starpu_data_request *r, void (*callback_func)(void *), void *callback_arg) { STARPU_ASSERT(r); if (callback_func) { struct _starpu_callback_list *link; _STARPU_MALLOC(link, sizeof(struct _starpu_callback_list)); link->callback_func = callback_func; link->callback_arg = callback_arg; link->next = r->callbacks; r->callbacks = link; } } /* This method is called with handle's header_lock taken, and unlocks it */ static void starpu_handle_data_request_completion(struct _starpu_data_request *r) { unsigned do_delete = 0; starpu_data_handle_t handle = r->handle; enum starpu_data_access_mode mode = r->mode; struct _starpu_data_replicate *src_replicate = r->src_replicate; struct _starpu_data_replicate *dst_replicate = r->dst_replicate; if (r->canceled < 2 && dst_replicate) { #ifdef STARPU_MEMORY_STATS enum _starpu_cache_state old_src_replicate_state = src_replicate->state; #endif _starpu_spin_checklocked(&handle->header_lock); _starpu_update_data_state(handle, r->dst_replicate, mode); dst_replicate->load_request = NULL; #ifdef STARPU_MEMORY_STATS if (src_replicate->state == STARPU_INVALID) { if (old_src_replicate_state == STARPU_OWNER) _starpu_memory_handle_stats_invalidated(handle, src_replicate->memory_node); else { /* XXX Currently only ex-OWNER are tagged as invalidated */ /* XXX Have to check all old state of every node in case a SHARED data become OWNED by the dst_replicate */ } } if (dst_replicate->state == STARPU_SHARED) _starpu_memory_handle_stats_loaded_shared(handle, dst_replicate->memory_node); else if (dst_replicate->state == STARPU_OWNER) { _starpu_memory_handle_stats_loaded_owner(handle, dst_replicate->memory_node); } #endif } #ifdef STARPU_USE_FXT if (fut_active && r->canceled < 2 && r->com_id > 0) { unsigned src_node = src_replicate->memory_node; unsigned dst_node = dst_replicate->memory_node; size_t size = _starpu_data_get_size(handle); _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, r->com_id, r->prefetch); } #endif /* Once the request has been fulfilled, we may submit the requests that * were chained to that request. */ unsigned chained_req; for (chained_req = 0; chained_req < r->next_req_count; chained_req++) { struct _starpu_data_request *next_req = r->next_req[chained_req]; STARPU_ASSERT(next_req->ndeps > 0); next_req->ndeps--; _starpu_post_data_request(next_req); } r->completed = 1; #ifdef STARPU_SIMGRID /* Wake potential worker which was waiting for it */ if (dst_replicate) _starpu_wake_all_blocked_workers_on_node(dst_replicate->memory_node); #endif /* Remove a reference on the destination replicate for the request */ if (dst_replicate) { if (r->canceled < 2 && dst_replicate->mc) /* Make sure it stays there for the task. */ dst_replicate->nb_tasks_prefetch += r->nb_tasks_prefetch; if (r->added_ref) { STARPU_ASSERT(dst_replicate->refcnt > 0); dst_replicate->refcnt--; } } STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; /* In case the source was "locked" by the request too */ if (mode & STARPU_R) { STARPU_ASSERT(src_replicate->refcnt > 0); src_replicate->refcnt--; STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; } _starpu_data_request_unlink(r); unsigned destroyed = _starpu_data_check_not_busy(handle); r->refcnt--; /* if nobody is waiting on that request, we can get rid of it */ if (r->refcnt == 0) do_delete = 1; r->retval = 0; /* In case there are one or multiple callbacks, we execute them now. */ struct _starpu_callback_list *callbacks = r->callbacks; _starpu_spin_unlock(&r->lock); if (do_delete) _starpu_data_request_destroy(r); if (!destroyed) _starpu_spin_unlock(&handle->header_lock); /* We do the callback once the lock is released so that they can do * blocking operations with the handle (eg. release it) */ while (callbacks) { callbacks->callback_func(callbacks->callback_arg); struct _starpu_callback_list *next = callbacks->next; free(callbacks); callbacks = next; } } void _starpu_data_request_complete_wait(void *arg) { struct _starpu_data_request *r = arg; _starpu_spin_lock(&r->handle->header_lock); _starpu_spin_lock(&r->lock); starpu_handle_data_request_completion(r); } /* TODO : accounting to see how much time was spent working for other people ... */ static int starpu_handle_data_request(struct _starpu_data_request *r, enum _starpu_may_alloc may_alloc) { starpu_data_handle_t handle = r->handle; #ifndef STARPU_SIMGRID if (_starpu_spin_trylock(&handle->header_lock)) return -EBUSY; if (_starpu_spin_trylock(&r->lock)) { _starpu_spin_unlock(&handle->header_lock); return -EBUSY; } #else /* Have to wait for the handle, whatever it takes, in simgrid, * since we can not afford going to sleep, since nobody would wake us * up. */ _starpu_spin_lock(&handle->header_lock); _starpu_spin_lock(&r->lock); #endif struct _starpu_data_replicate *src_replicate = r->src_replicate; struct _starpu_data_replicate *dst_replicate = r->dst_replicate; if (r->canceled) { /* Ok, canceled before starting copies etc. */ r->canceled = 2; /* Nothing left to do */ starpu_handle_data_request_completion(r); return 0; } if (dst_replicate) { struct _starpu_data_request *r2 = dst_replicate->load_request; if (r2 && r2 != r) { /* Oh, some other transfer is already loading the value. Just wait for it */ r->canceled = 2; _starpu_spin_unlock(&r->lock); _starpu_spin_lock(&r2->lock); if (r->prefetch < r2->prefetch) /* Upgrade the existing request */ _starpu_update_prefetch_status(r2, r->prefetch); _starpu_data_request_append_callback(r2, _starpu_data_request_complete_wait, r); _starpu_spin_unlock(&r2->lock); _starpu_spin_unlock(&handle->header_lock); return 0; } /* We are loading this replicate. * Note: we might fail to allocate memory, but we will keep on and others will wait for us. */ dst_replicate->load_request = r; } enum starpu_data_access_mode r_mode = r->mode; STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate); STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->allocated || src_replicate->mapped != STARPU_UNMAPPED); STARPU_ASSERT(!(r_mode & STARPU_R) || src_replicate->refcnt); /* For prefetches, we take a reference on the destination only now that * we will really try to fetch the data (instead of in * _starpu_create_data_request) */ if (dst_replicate && r->prefetch > STARPU_FETCH) { r->added_ref = 1; /* Note: we might get upgraded while trying to allocate */ dst_replicate->refcnt++; } _starpu_spin_unlock(&r->lock); if (r_mode == STARPU_UNMAP) { /* Unmap request, simply do it */ STARPU_ASSERT(dst_replicate->mapped == src_replicate->memory_node); STARPU_ASSERT(handle->ops->unmap_data); handle->ops->unmap_data(src_replicate->data_interface, src_replicate->memory_node, dst_replicate->data_interface, dst_replicate->memory_node); dst_replicate->mapped = STARPU_UNMAPPED; r->retval = 0; } /* FIXME: the request may get upgraded from here to freeing it... */ /* perform the transfer */ /* the header of the data must be locked by the worker that submitted the request */ if (dst_replicate && dst_replicate->state == STARPU_INVALID) r->retval = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, !(r_mode & STARPU_R), r, may_alloc, r->prefetch); else /* Already valid actually, no need to transfer anything */ r->retval = 0; if (r->retval == -ENOMEM) { /* If there was not enough memory, we will try to redo the * request later. */ if (r->prefetch > STARPU_FETCH) { STARPU_ASSERT(r->added_ref); /* Drop ref until next try */ r->added_ref = 0; dst_replicate->refcnt--; } _starpu_spin_unlock(&handle->header_lock); return -ENOMEM; } if (r->retval == -EAGAIN) { /* The request was successful, but could not be terminated * immediately. We will handle the completion of the request * asynchronously. The request is put in the list of "pending" * requests in the meantime. */ _starpu_spin_unlock(&handle->header_lock); struct _starpu_node *node_struct = _starpu_get_node_struct(r->handling_node); STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_pending_list_mutex[r->peer_node][r->inout]); _starpu_data_request_prio_list_push_back(&node_struct->data_requests_pending[r->peer_node][r->inout], r); node_struct->data_requests_npending[r->peer_node][r->inout]++; STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[r->peer_node][r->inout]); return -EAGAIN; } /* the request has been handled */ _starpu_spin_lock(&r->lock); starpu_handle_data_request_completion(r); return 0; } static int __starpu_handle_node_data_requests(struct _starpu_data_request_prio_list reqlist[STARPU_MAXNODES][2], unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned n, unsigned *pushed, enum starpu_is_prefetch prefetch) { struct _starpu_data_request *r; unsigned i; int ret = 0; *pushed = 0; #ifdef STARPU_NON_BLOCKING_DRIVERS /* This is racy, but not posing problems actually, since we know we * will come back here to probe again regularly anyway. * Thus, do not expose this optimization to helgrind */ if (!STARPU_RUNNING_ON_VALGRIND && _starpu_data_request_prio_list_empty(&reqlist[peer_node][inout])) return 0; #endif struct _starpu_node *node_struct = _starpu_get_node_struct(handling_node); /* We create a new list to pickup some requests from the main list, and * we handle the request(s) one by one from it, without concurrency issues. */ struct _starpu_data_request_list local_list, remain_list; _starpu_data_request_list_init(&local_list); #ifdef STARPU_NON_BLOCKING_DRIVERS /* take all the entries from the request list */ if (STARPU_PTHREAD_MUTEX_TRYLOCK(&node_struct->data_requests_list_mutex[peer_node][inout])) { /* List is busy, do not bother with it */ return -EBUSY; } #else STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); #endif for (i = node_struct->data_requests_npending[peer_node][inout]; i < n && ! _starpu_data_request_prio_list_empty(&reqlist[peer_node][inout]); i++) { r = _starpu_data_request_prio_list_pop_front_highest(&reqlist[peer_node][inout]); _starpu_data_request_list_push_back(&local_list, r); } if (!_starpu_data_request_prio_list_empty(&reqlist[peer_node][inout])) /* We have left some requests */ ret = -EBUSY; STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); if (_starpu_data_request_list_empty(&local_list)) /* there is no request */ return 0; /* This will contain the remaining requests */ _starpu_data_request_list_init(&remain_list); double start = starpu_timing_now(); /* for all entries of the list */ while (!_starpu_data_request_list_empty(&local_list)) { int res; if (node_struct->data_requests_npending[peer_node][inout] >= n) { /* Too many requests at the same time, skip pushing * more for now */ ret = -EBUSY; break; } r = _starpu_data_request_list_pop_front(&local_list); res = starpu_handle_data_request(r, may_alloc); if (res != 0 && res != -EAGAIN) { /* handle is busy, or not enough memory, postpone for now */ ret = res; /* Prefetch requests might have gotten promoted while in tmp list */ _starpu_data_request_list_push_back(&remain_list, r); if (prefetch > STARPU_FETCH) /* Prefetching more there would make the situation even worse */ break; } else (*pushed)++; if (starpu_timing_now() - start >= MAX_PUSH_TIME) { /* We have spent a lot of time doing requests, skip pushing more for now */ ret = -EBUSY; break; } } /* Gather remainder */ _starpu_data_request_list_push_list_back(&remain_list, &local_list); if (!_starpu_data_request_list_empty(&remain_list)) { STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); while (!_starpu_data_request_list_empty(&remain_list)) { r = _starpu_data_request_list_pop_back(&remain_list); if (r->prefetch >= STARPU_IDLEFETCH) _starpu_data_request_prio_list_push_front(&node_struct->idle_requests[r->peer_node][r->inout], r); else if (r->prefetch > STARPU_FETCH) _starpu_data_request_prio_list_push_front(&node_struct->prefetch_requests[r->peer_node][r->inout], r); else _starpu_data_request_prio_list_push_front(&node_struct->data_requests[r->peer_node][r->inout], r); } STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); #ifdef STARPU_SIMGRID if (*pushed) { /* We couldn't process the request due to missing * space. Advance the clock a bit to let eviction have * the time to make some room for us. Ideally we should * rather have the caller block, and explicitly wait * for eviction to happen. */ starpu_sleep(0.000001); _starpu_wake_all_blocked_workers_on_node(handling_node); } #elif !defined(STARPU_NON_BLOCKING_DRIVERS) _starpu_wake_all_blocked_workers_on_node(handling_node); #endif } return ret; } int _starpu_handle_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed) { return __starpu_handle_node_data_requests(_starpu_get_node_struct(handling_node)->data_requests, handling_node, peer_node, inout, may_alloc, MAX_PENDING_REQUESTS_PER_NODE, pushed, STARPU_FETCH); } int _starpu_handle_node_prefetch_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed) { return __starpu_handle_node_data_requests(_starpu_get_node_struct(handling_node)->prefetch_requests, handling_node, peer_node, inout, may_alloc, MAX_PENDING_PREFETCH_REQUESTS_PER_NODE, pushed, STARPU_PREFETCH); } int _starpu_handle_node_idle_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed) { return __starpu_handle_node_data_requests(_starpu_get_node_struct(handling_node)->idle_requests, handling_node, peer_node, inout, may_alloc, MAX_PENDING_IDLE_REQUESTS_PER_NODE, pushed, STARPU_IDLEFETCH); } static int _handle_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, unsigned force) { // _STARPU_DEBUG("_starpu_handle_pending_node_data_requests ...\n"); // struct _starpu_data_request_prio_list new_data_requests_pending; unsigned taken, kept; struct _starpu_node *node_struct = _starpu_get_node_struct(handling_node); #ifdef STARPU_NON_BLOCKING_DRIVERS /* Here helgrind would should that this is an un protected access. * We however don't care about missing an entry, we will get called * again sooner or later. */ if (!STARPU_RUNNING_ON_VALGRIND && _starpu_data_request_prio_list_empty(&node_struct->data_requests_pending[peer_node][inout])) return 0; #endif #ifdef STARPU_NON_BLOCKING_DRIVERS if (!force) { if (STARPU_PTHREAD_MUTEX_TRYLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout])) { /* List is busy, do not bother with it */ return 0; } } else #endif /* We really want to handle requests */ STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); if (_starpu_data_request_prio_list_empty(&node_struct->data_requests_pending[peer_node][inout])) { /* there is no request */ STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); return 0; } /* for all entries of the list */ struct _starpu_data_request_prio_list local_list = node_struct->data_requests_pending[peer_node][inout]; _starpu_data_request_prio_list_init(&node_struct->data_requests_pending[peer_node][inout]); STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); _starpu_data_request_prio_list_init(&new_data_requests_pending); taken = 0; kept = 0; while (!_starpu_data_request_prio_list_empty(&local_list)) { struct _starpu_data_request *r; r = _starpu_data_request_prio_list_pop_front_highest(&local_list); taken++; starpu_data_handle_t handle = r->handle; #ifndef STARPU_SIMGRID if (force) /* Have to wait for the handle, whatever it takes */ #endif /* Or when running in simgrid, in which case we can not * afford going to sleep, since nobody would wake us * up. */ _starpu_spin_lock(&handle->header_lock); #ifndef STARPU_SIMGRID else if (_starpu_spin_trylock(&handle->header_lock)) { /* Handle is busy, retry this later */ _starpu_data_request_prio_list_push_back(&new_data_requests_pending, r); kept++; continue; } #endif /* This shouldn't be too hard to acquire */ _starpu_spin_lock(&r->lock); /* wait until the transfer is terminated */ if (force) { /* We are doing starpu_shutdown */ _starpu_driver_wait_request_completion(&r->async_channel); starpu_handle_data_request_completion(r); } else { if (_starpu_driver_test_request_completion(&r->async_channel)) { /* The request was completed */ starpu_handle_data_request_completion(r); } else { /* The request was not completed, so we put it * back again on the list of pending requests * so that it can be handled later on. */ _starpu_spin_unlock(&r->lock); _starpu_spin_unlock(&handle->header_lock); _starpu_data_request_prio_list_push_back(&new_data_requests_pending, r); kept++; } } } _starpu_data_request_prio_list_deinit(&local_list); STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); node_struct->data_requests_npending[peer_node][inout] -= taken - kept; if (kept) _starpu_data_request_prio_list_push_prio_list_back(&node_struct->data_requests_pending[peer_node][inout], &new_data_requests_pending); STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); return taken - kept; } int _starpu_handle_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout) { return _handle_pending_node_data_requests(handling_node, peer_node, inout, 0); } /* Only used at starpu_shutdown */ int _starpu_handle_all_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout) { return _handle_pending_node_data_requests(handling_node, peer_node, inout, 1); } /* Note: the returned value will be outdated since the locks are not taken at * entry/exit */ static int __starpu_check_that_no_data_request_exists(unsigned node, unsigned peer_node, enum _starpu_data_request_inout inout) { int no_request; int no_pending; struct _starpu_node *node_struct = _starpu_get_node_struct(node); STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); no_request = _starpu_data_request_prio_list_empty(&node_struct->data_requests[peer_node][inout]) && _starpu_data_request_prio_list_empty(&node_struct->prefetch_requests[peer_node][inout]) && _starpu_data_request_prio_list_empty(&node_struct->idle_requests[peer_node][inout]); STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[peer_node][inout]); STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); no_pending = !node_struct->data_requests_npending[peer_node][inout]; STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_pending_list_mutex[peer_node][inout]); return no_request && no_pending; } int _starpu_check_that_no_data_request_exists(unsigned node) { unsigned peer_node, nnodes = starpu_memory_nodes_get_count(); for (peer_node = 0; peer_node < nnodes; peer_node++) if (!__starpu_check_that_no_data_request_exists(node, peer_node, _STARPU_DATA_REQUEST_IN) || !__starpu_check_that_no_data_request_exists(node, peer_node, _STARPU_DATA_REQUEST_OUT)) return 0; return 1; } /* Note: the returned value will be outdated since the locks are not taken at * entry/exit */ int _starpu_check_that_no_data_request_is_pending(unsigned node, unsigned peer_node, enum _starpu_data_request_inout inout) { return !_starpu_get_node_struct(node)->data_requests_npending[peer_node][inout]; } void _starpu_update_prefetch_status(struct _starpu_data_request *r, enum starpu_is_prefetch prefetch) { struct _starpu_node *node_struct = _starpu_get_node_struct(r->handling_node); _starpu_spin_checklocked(&r->handle->header_lock); STARPU_ASSERT(r->prefetch > prefetch); if (prefetch == STARPU_FETCH && !r->added_ref) { /* That would have been done by _starpu_create_data_request */ r->added_ref = 1; r->dst_replicate->refcnt++; } r->prefetch=prefetch; if (prefetch >= STARPU_IDLEFETCH) /* No possible actual change */ return; /* We have to promote chained_request too! */ unsigned chained_req; for (chained_req = 0; chained_req < r->next_req_count; chained_req++) { struct _starpu_data_request *next_req = r->next_req[chained_req]; if (next_req->prefetch > prefetch) _starpu_update_prefetch_status(next_req, prefetch); } STARPU_PTHREAD_MUTEX_LOCK(&node_struct->data_requests_list_mutex[r->peer_node][r->inout]); int found = 1; /* The request can be in a different list (handling request or the temp list) * we have to check that it is really in the prefetch or idle list. */ if (_starpu_data_request_prio_list_ismember(&node_struct->prefetch_requests[r->peer_node][r->inout], r)) _starpu_data_request_prio_list_erase(&node_struct->prefetch_requests[r->peer_node][r->inout], r); else if (_starpu_data_request_prio_list_ismember(&node_struct->idle_requests[r->peer_node][r->inout], r)) _starpu_data_request_prio_list_erase(&node_struct->idle_requests[r->peer_node][r->inout], r); else found = 0; if (found) { if (prefetch > STARPU_FETCH) _starpu_data_request_prio_list_push_back(&node_struct->prefetch_requests[r->peer_node][r->inout],r); else _starpu_data_request_prio_list_push_back(&node_struct->data_requests[r->peer_node][r->inout],r); } STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->data_requests_list_mutex[r->peer_node][r->inout]); #ifndef STARPU_NON_BLOCKING_DRIVERS _starpu_wake_all_blocked_workers_on_node(r->handling_node); #endif } starpu-1.4.9+dfsg/src/datawizard/data_request.h000066400000000000000000000157461507764646700216000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /** @file */ /* This one includes us, so make sure to include it first */ #include #ifndef __DATA_REQUEST_H__ #define __DATA_REQUEST_H__ #include #include #include #include #include #pragma GCC visibility push(hidden) /* TODO: This should be tuned according to driver capabilities * Data interfaces should also have to declare how many asynchronous requests * they have actually started (think of e.g. csr). */ #define MAX_PENDING_REQUESTS_PER_NODE 5 #define MAX_PENDING_PREFETCH_REQUESTS_PER_NODE 2 #define MAX_PENDING_IDLE_REQUESTS_PER_NODE 1 /** Maximum time in us that we can afford pushing requests before going back to the driver loop, e.g. for checking GPU task termination */ #define MAX_PUSH_TIME 1000 struct _starpu_data_replicate; struct _starpu_callback_list { void (*callback_func)(void *); void *callback_arg; struct _starpu_callback_list *next; }; enum _starpu_data_request_inout { _STARPU_DATA_REQUEST_IN, _STARPU_DATA_REQUEST_OUT }; /** This represents a data request, i.e. we want some data to get transferred * from a source to a destination. */ LIST_TYPE(_starpu_data_request, struct _starpu_spinlock lock; unsigned refcnt; const char *origin; /** Name of the function that triggered the request */ starpu_data_handle_t handle; struct _starpu_data_replicate *src_replicate; struct _starpu_data_replicate *dst_replicate; /** Which memory node will actually perform the transfer. * This is important in the CUDA/OpenCL case, where only the worker for * the node can make the CUDA/OpenCL calls. */ unsigned handling_node; unsigned peer_node; enum _starpu_data_request_inout inout; /* * What the destination node wants to do with the data: write to it, * read it, or read and write to it. Only in the two latter cases we * need an actual transfer, the first only needs an allocation. * * With mapped buffers, an additional case is mode = 0, which means * unmapping the buffer. */ enum starpu_data_access_mode mode; /** Elements needed to make the transfer asynchronous */ struct _starpu_async_channel async_channel; /** Whether the transfer is completed. */ unsigned completed:1; /** Whether we have already added our reference to the dst replicate. */ unsigned added_ref:1; /** Whether the request was canceled before being handled (because the transfer already happened another way). */ unsigned canceled:2; /** Whether this is just a prefetch request */ enum starpu_is_prefetch prefetch:3; /** Task this request is for */ struct starpu_task *task; /** Number of tasks which used this as a prefetch */ unsigned nb_tasks_prefetch; /** Priority of the request. Default is 0 */ int prio; /** The value returned by the transfer function */ int retval; /** The request will not actually be submitted until there remains * dependencies. */ unsigned ndeps; /** Some further tasks may have requested prefetches for the same data * much later on, link with them */ struct _starpu_data_request *next_same_req; /** in case we have a chain of request (eg. for nvidia multi-GPU), this * is the list of requests which are waiting for this one. */ struct _starpu_data_request *next_req[STARPU_MAXNODES+1]; /** The number of requests in next_req */ unsigned next_req_count; struct _starpu_callback_list *callbacks; unsigned long com_id; ) PRIO_LIST_TYPE(_starpu_data_request, prio) /** Everyone that wants to access some piece of data will post a request. * Not only StarPU internals, but also the application may put such requests */ LIST_TYPE(_starpu_data_requester, /** what kind of access is requested ? */ enum starpu_data_access_mode mode; /** applications may also directly manipulate data */ unsigned is_requested_by_codelet; /** in case this is a codelet that will do the access */ struct _starpu_job *j; unsigned buffer_index; int prio; /** if this is more complicated ... (eg. application request) * NB: this callback is not called with the lock taken ! */ void (*ready_data_callback)(void *argcb); void *argcb; ) PRIO_LIST_TYPE(_starpu_data_requester, prio) void _starpu_init_data_request_lists(void); void _starpu_deinit_data_request_lists(void); void _starpu_post_data_request(struct _starpu_data_request *r); /** returns 0 if we have pushed all requests, -EBUSY or -ENOMEM otherwise */ int _starpu_handle_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed); int _starpu_handle_node_prefetch_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed); int _starpu_handle_node_idle_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned *pushed); int _starpu_handle_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout); /* Only used at starpu_shutdown */ int _starpu_handle_all_pending_node_data_requests(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout); int _starpu_check_that_no_data_request_exists(unsigned handling_node); int _starpu_check_that_no_data_request_is_pending(unsigned handling_node, unsigned peer_node, enum _starpu_data_request_inout inout); struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t handle, struct _starpu_data_replicate *src_replicate, struct _starpu_data_replicate *dst_replicate, int handling_node, enum starpu_data_access_mode mode, unsigned ndeps, struct starpu_task *task, enum starpu_is_prefetch is_prefetch, int prio, unsigned is_write_invalidation, const char *origin) STARPU_ATTRIBUTE_MALLOC; int _starpu_wait_data_request_completion(struct _starpu_data_request *r, enum _starpu_may_alloc may_alloc); void _starpu_data_request_append_callback(struct _starpu_data_request *r, void (*callback_func)(void *), void *callback_arg); void _starpu_update_prefetch_status(struct _starpu_data_request *r, enum starpu_is_prefetch prefetch); #pragma GCC visibility pop #endif // __DATA_REQUEST_H__ starpu-1.4.9+dfsg/src/datawizard/datastats.c000066400000000000000000000066731507764646700211010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include int _starpu_enable_stats = 0; void _starpu_datastats_init() { _starpu_enable_stats = !!starpu_getenv("STARPU_ENABLE_STATS"); } /* measure the cache hit ratio for each node */ static unsigned hit_cnt[STARPU_MAXNODES]; static unsigned miss_cnt[STARPU_MAXNODES]; void __starpu_msi_cache_hit(unsigned node) { STARPU_HG_DISABLE_CHECKING(hit_cnt[node]); hit_cnt[node]++; } void __starpu_msi_cache_miss(unsigned node) { STARPU_HG_DISABLE_CHECKING(miss_cnt[node]); miss_cnt[node]++; } void _starpu_display_msi_stats(FILE *stream) { if (!starpu_enable_stats()) return; unsigned node; unsigned total_hit_cnt = 0; unsigned total_miss_cnt = 0; fprintf(stream, "\n#---------------------\n"); fprintf(stream, "MSI cache stats :\n"); for (node = 0; node < STARPU_MAXNODES; node++) { total_hit_cnt += hit_cnt[node]; total_miss_cnt += miss_cnt[node]; } fprintf(stream, "TOTAL MSI stats\thit %u (%2.2f %%)\tmiss %u (%2.2f %%)\n", total_hit_cnt, (100.0f*total_hit_cnt)/(total_hit_cnt+total_miss_cnt), total_miss_cnt, (100.0f*total_miss_cnt)/(total_hit_cnt+total_miss_cnt)); for (node = 0; node < STARPU_MAXNODES; node++) { if (hit_cnt[node]+miss_cnt[node]) { char name[128]; starpu_memory_node_get_name(node, name, sizeof(name)); fprintf(stream, "memory node %s\n", name); fprintf(stream, "\thit : %u (%2.2f %%)\n", hit_cnt[node], (100.0f*hit_cnt[node])/(hit_cnt[node]+miss_cnt[node])); fprintf(stream, "\tmiss : %u (%2.2f %%)\n", miss_cnt[node], (100.0f*miss_cnt[node])/(hit_cnt[node]+miss_cnt[node])); } } fprintf(stream, "#---------------------\n"); } /* measure the efficiency of our allocation cache */ static unsigned alloc_cnt[STARPU_MAXNODES]; static unsigned alloc_cache_hit_cnt[STARPU_MAXNODES]; void __starpu_allocation_cache_hit(unsigned node) { STARPU_HG_DISABLE_CHECKING(alloc_cache_hit_cnt[node]); alloc_cache_hit_cnt[node]++; } void __starpu_data_allocation_inc_stats(unsigned node) { STARPU_HG_DISABLE_CHECKING(alloc_cnt[node]); alloc_cnt[node]++; } void _starpu_display_alloc_cache_stats(FILE *stream) { if (!starpu_enable_stats()) return; fprintf(stream, "\n#---------------------\n"); fprintf(stream, "Allocation cache stats:\n"); unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { if (alloc_cnt[node]) { char name[128]; starpu_memory_node_get_name(node, name, sizeof(name)); fprintf(stream, "memory node %s\n", name); fprintf(stream, "\ttotal alloc : %u\n", alloc_cnt[node]); fprintf(stream, "\tcached alloc: %u (%2.2f %%)\n", alloc_cache_hit_cnt[node], (100.0f*alloc_cache_hit_cnt[node])/(alloc_cnt[node])); } } fprintf(stream, "#---------------------\n"); } starpu-1.4.9+dfsg/src/datawizard/datastats.h000066400000000000000000000036261507764646700211010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DATASTATS_H__ #define __DATASTATS_H__ /** @file */ #include #include #include #include #pragma GCC visibility push(hidden) extern int _starpu_enable_stats; void _starpu_datastats_init(); static inline int starpu_enable_stats(void) { return _starpu_enable_stats; } void __starpu_msi_cache_hit(unsigned node); void __starpu_msi_cache_miss(unsigned node); #define _starpu_msi_cache_hit(node) do { \ if (starpu_enable_stats()) \ __starpu_msi_cache_hit(node); \ } while (0) #define _starpu_msi_cache_miss(node) do { \ if (starpu_enable_stats()) \ __starpu_msi_cache_miss(node); \ } while (0) void _starpu_display_msi_stats(FILE *stream); void __starpu_allocation_cache_hit(unsigned node STARPU_ATTRIBUTE_UNUSED); void __starpu_data_allocation_inc_stats(unsigned node STARPU_ATTRIBUTE_UNUSED); #define _starpu_allocation_cache_hit(node) do { \ if (starpu_enable_stats()) \ __starpu_allocation_cache_hit(node); \ } while (0) #define _starpu_data_allocation_inc_stats(node) do { \ if (starpu_enable_stats()) \ __starpu_data_allocation_inc_stats(node); \ } while (0) void _starpu_display_alloc_cache_stats(FILE *stream); #pragma GCC visibility pop #endif // __DATASTATS_H__ starpu-1.4.9+dfsg/src/datawizard/datawizard.c000066400000000000000000000125071507764646700212340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #ifdef STARPU_SIMGRID #include #endif static int ____starpu_datawizard_progress(unsigned memory_node, unsigned peer_start, unsigned peer_end, enum _starpu_data_request_inout inout, enum _starpu_may_alloc may_alloc, unsigned push_requests) { int ret = 0; unsigned peer_node; /* in case some other driver requested data */ for (peer_node = peer_start; peer_node < peer_end; peer_node++) { if (_starpu_handle_pending_node_data_requests(memory_node, peer_node, inout)) ret = 1; } starpu_memchunk_tidy(memory_node); if (ret || push_requests) { /* Some transfers have finished, or the driver requests to really push more */ unsigned pushed; unsigned ok = 1; for (peer_node = peer_start; ok && peer_node < peer_end; peer_node++) { if (_starpu_handle_node_data_requests(memory_node, peer_node, inout, may_alloc, &pushed) == -ENOMEM) ok = 0; if (pushed) ret = 1; } if (ok) { unsigned doidle = 1; /* We pushed all pending requests, we can afford pushing * prefetch requests */ for (peer_node = peer_start; ok && peer_node < peer_end; peer_node++) { if (_starpu_handle_node_prefetch_requests(memory_node, peer_node, inout, may_alloc, &pushed) == -ENOMEM) ok = 0; if (pushed) ret = 1; if (!_starpu_check_that_no_data_request_is_pending(memory_node, peer_node, inout)) doidle = 0; } if (doidle) /* No pending transfer, push some idle transfer */ for (peer_node = peer_start; ok && peer_node < peer_end; peer_node++) { if (_starpu_handle_node_idle_requests(memory_node, peer_node, inout, may_alloc, &pushed) == -ENOMEM) ok = 0; if (pushed) ret = 1; } } } return ret; } static int ___starpu_datawizard_progress(unsigned memory_node, unsigned nnodes, enum _starpu_may_alloc may_alloc, unsigned push_requests) { int ret = 0; unsigned peer_node; #ifdef STARPU_SIMGRID starpu_sleep(0.000001); #endif STARPU_UYIELD(); /* First handle all incoming transfers */ ret |= ____starpu_datawizard_progress(memory_node, 0, nnodes, _STARPU_DATA_REQUEST_IN, may_alloc, push_requests); /* Then handle outgoing transfers */ for (peer_node = 0; peer_node < nnodes; peer_node++) ret |= ____starpu_datawizard_progress(memory_node, peer_node, peer_node+1, _STARPU_DATA_REQUEST_OUT, may_alloc, push_requests); return ret; } int __starpu_datawizard_progress(enum _starpu_may_alloc may_alloc, unsigned push_requests) { struct _starpu_worker *worker = _starpu_get_local_worker_key(); unsigned memnode; if (!worker) { /* Call from main application, only make RAM requests progress */ int ret = 0; int nnumas = starpu_memory_nodes_get_numa_count(); int numa; for (numa = 0; numa < nnumas; numa++) ret |= ___starpu_datawizard_progress(numa, nnumas, may_alloc, push_requests); _starpu_execute_registered_progression_hooks(); return ret; } /* processing requests may release some tasks, we cannot be already * scheduling a task. */ if (worker->state_sched_op_pending) return 0; if (worker->set) /* Running one of the workers of a worker set. The reference for * driving memory is its worker 0 (see registrations in topology.c) */ worker = &worker->set->workers[0]; unsigned current_worker_id = worker->workerid; int ret = 0; unsigned nnodes = starpu_memory_nodes_get_count(); for (memnode = 0; memnode < nnodes; memnode++) { if (_starpu_worker_drives_memory[current_worker_id][memnode] == 1) { if(_starpu_config.conf.cuda_only_fast_alloc_other_memnodes && worker->arch == STARPU_CUDA_WORKER && worker->memory_node != memnode) ret |= ___starpu_datawizard_progress(memnode, nnodes, _STARPU_DATAWIZARD_ONLY_FAST_ALLOC, push_requests); else ret |= ___starpu_datawizard_progress(memnode, nnodes, may_alloc, push_requests); } } _starpu_execute_registered_progression_hooks(); return ret; } void _starpu_datawizard_progress(enum _starpu_may_alloc may_alloc) { __starpu_datawizard_progress(may_alloc, 1); } /* Only used at starpu_shutdown */ void _starpu_datawizard_handle_all_pending_node_data_requests(unsigned memnode) { unsigned nnodes = starpu_memory_nodes_get_count(); unsigned memnode2; for (memnode2 = 0; memnode2 < nnodes; memnode2++) { _starpu_handle_all_pending_node_data_requests(memnode, memnode2, _STARPU_DATA_REQUEST_IN); _starpu_handle_all_pending_node_data_requests(memnode, memnode2, _STARPU_DATA_REQUEST_OUT); } } starpu-1.4.9+dfsg/src/datawizard/datawizard.h000066400000000000000000000037151507764646700212420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DATAWIZARD_H__ #define __DATAWIZARD_H__ /** @file */ #include #include #include #include #include #include #include #include #include #include #pragma GCC visibility push(hidden) /** Make data transfers progress on all memory nodes driven by the current worker. * * If \p push_requests is 1, it can start new transfers * * If \p may_alloc is _STARPU_DATAWIZARD_DO_ALLOC, it can allocate destination data for transfers * (this is not possible e.g. when spinning for a handle lock) */ int __starpu_datawizard_progress(enum _starpu_may_alloc may_alloc, unsigned push_requests); /** Call __starpu_datawizard_progress with push_requests = 1 */ void _starpu_datawizard_progress(enum _starpu_may_alloc may_alloc); /* Only used at starpu_shutdown */ /** Check for all pending data request progress on node \p memory_node */ void _starpu_datawizard_handle_all_pending_node_data_requests(unsigned memnode); #pragma GCC visibility pop #endif // __DATAWIZARD_H__ starpu-1.4.9+dfsg/src/datawizard/filters.c000066400000000000000000001262551507764646700205600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //#define STARPU_VERBOSE #include #include #include #include #include /* * This function applies a data filter on all the elements of a partition */ static void map_filter(starpu_data_handle_t root_handle, struct starpu_data_filter *f) { /* we need to apply the data filter on all leaf of the tree */ if (root_handle->nchildren == 0) { /* this is a leaf */ starpu_data_partition(root_handle, f); } else { /* try to apply the data filter recursively */ unsigned child; for (child = 0; child < root_handle->nchildren; child++) { starpu_data_handle_t handle_child = starpu_data_get_child(root_handle, child); map_filter(handle_child, f); } } } void starpu_data_vmap_filters(starpu_data_handle_t root_handle, unsigned nfilters, va_list pa) { unsigned i; for (i = 0; i < nfilters; i++) { struct starpu_data_filter *next_filter; next_filter = va_arg(pa, struct starpu_data_filter *); STARPU_ASSERT(next_filter); map_filter(root_handle, next_filter); } } void starpu_data_map_filters(starpu_data_handle_t root_handle, unsigned nfilters, ...) { va_list pa; va_start(pa, nfilters); starpu_data_vmap_filters(root_handle, nfilters, pa); va_end(pa); } void starpu_data_map_filters_parray(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter **filters) { int i; STARPU_ASSERT(nfilters >= 0); for (i = 0; i < nfilters; i++) { struct starpu_data_filter *next_filter = filters[i]; STARPU_ASSERT(next_filter); map_filter(root_handle, next_filter); } } void starpu_data_map_filters_array(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter *filters) { int i; STARPU_ASSERT(nfilters >= 0); for (i = 0; i < nfilters; i++) { map_filter(root_handle, &filters[i]); } } void fstarpu_data_map_filters(starpu_data_handle_t root_handle, int nfilters, struct starpu_data_filter **filters) { starpu_data_map_filters_parray(root_handle, nfilters, filters); } int starpu_data_get_nb_children(starpu_data_handle_t handle) { return handle->nchildren; } starpu_data_handle_t starpu_data_get_child(starpu_data_handle_t handle, unsigned i) { STARPU_ASSERT_MSG(handle->nchildren != 0, "Data %p has to be partitioned before accessing children", handle); STARPU_ASSERT_MSG(i < handle->nchildren, "Invalid child index %u in handle %p, maximum %u", i, handle, handle->nchildren); return &handle->children[i]; } /* * example starpu_data_get_sub_data(starpu_data_handle_t root_handle, 3, 42, 0, 1); */ starpu_data_handle_t starpu_data_get_sub_data(starpu_data_handle_t root_handle, unsigned depth, ...) { va_list pa; va_start(pa, depth); starpu_data_handle_t handle = starpu_data_vget_sub_data(root_handle, depth, pa); va_end(pa); return handle; } starpu_data_handle_t starpu_data_vget_sub_data(starpu_data_handle_t root_handle, unsigned depth, va_list pa) { STARPU_ASSERT(root_handle); starpu_data_handle_t current_handle = root_handle; /* the variable number of argument must correlate the depth in the tree */ unsigned i; for (i = 0; i < depth; i++) { unsigned next_child; next_child = va_arg(pa, unsigned); STARPU_ASSERT_MSG(current_handle->nchildren != 0, "Data %p has to be partitioned before accessing children", current_handle); STARPU_ASSERT_MSG(next_child < current_handle->nchildren, "Bogus child number %u, data %p only has %u children", next_child, current_handle, current_handle->nchildren); current_handle = ¤t_handle->children[next_child]; } return current_handle; } starpu_data_handle_t fstarpu_data_get_sub_data(starpu_data_handle_t root_handle, int depth, int *indices) { STARPU_ASSERT(root_handle); starpu_data_handle_t current_handle = root_handle; STARPU_ASSERT(depth >= 0); /* the variable number of argument must correlate the depth in the tree */ int i; for (i = 0; i < depth; i++) { int next_child; next_child = indices[i]; STARPU_ASSERT(next_child >= 0); STARPU_ASSERT_MSG(current_handle->nchildren != 0, "Data %p has to be partitioned before accessing children", current_handle); STARPU_ASSERT_MSG((unsigned) next_child < current_handle->nchildren, "Bogus child number %d, data %p only has %u children", next_child, current_handle, current_handle->nchildren); current_handle = ¤t_handle->children[next_child]; } return current_handle; } static unsigned _starpu_data_partition_nparts(starpu_data_handle_t initial_handle, struct starpu_data_filter *f) { /* how many parts ? */ if (f->get_nchildren) return f->get_nchildren(f, initial_handle); else return f->nchildren; } static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_data_handle_t *childrenp, unsigned nparts, struct starpu_data_filter *f, int inherit_state) { unsigned i; unsigned node; unsigned found = STARPU_MAXNODES; for (node = 0; node < STARPU_MAXNODES; node++) _starpu_data_unmap(initial_handle, node); /* first take care to properly lock the data header */ _starpu_spin_lock(&initial_handle->header_lock); initial_handle->nplans++; STARPU_ASSERT_MSG(nparts > 0, "Partitioning data %p in 0 piece does not make sense", initial_handle); /* allocate the children */ if (inherit_state) { _STARPU_CALLOC(initial_handle->children, nparts, sizeof(struct _starpu_data_state)); /* this handle now has children */ initial_handle->nchildren = nparts; } for (node = 0; node < STARPU_MAXNODES; node++) { if (initial_handle->per_node[node].state != STARPU_INVALID) found = node; STARPU_ASSERT(initial_handle->per_node[node].mapped == STARPU_UNMAPPED); } if (found == STARPU_MAXNODES) { /* This is lazy allocation, allocate it now in main RAM, so as * to have somewhere to gather pieces later */ /* FIXME: mark as unevictable! */ int home_node = initial_handle->home_node; if (home_node < 0 || (starpu_node_get_kind(home_node) != STARPU_CPU_RAM)) home_node = STARPU_MAIN_RAM; int ret = _starpu_allocate_memory_on_node(initial_handle, &initial_handle->per_node[home_node], STARPU_FETCH, 0); #ifdef STARPU_DEVEL #warning we should reclaim memory if allocation failed #endif STARPU_ASSERT(!ret); } if (nparts && !inherit_state) { STARPU_ASSERT_MSG(childrenp, "Passing NULL pointer for parameter childrenp while parameter inherit_state is 0"); } for (i = 0; i < nparts; i++) { starpu_data_handle_t child; if (inherit_state) child = &initial_handle->children[i]; else child = childrenp[i]; STARPU_ASSERT(child); struct starpu_data_interface_ops *ops; /* each child may have his own interface type */ /* what's this child's interface ? */ if (f->get_child_ops) ops = f->get_child_ops(f, i); else ops = initial_handle->ops; /* As most of the fields must be initialized at NULL, let's put * 0 everywhere */ memset(child, 0, sizeof(*child)); _starpu_data_handle_init(child, ops, initial_handle->mf_node); child->root_handle = initial_handle->root_handle; child->father_handle = initial_handle; child->nsiblings = nparts; if (inherit_state) { //child->siblings = NULL; } else child->siblings = childrenp; child->sibling_index = i; child->depth = initial_handle->depth + 1; child->active = inherit_state; child->home_node = initial_handle->home_node; child->wt_mask = initial_handle->wt_mask; child->aliases = initial_handle->aliases; //child->readonly_dup = NULL; //child->readonly_dup_of = NULL; child->is_not_important = initial_handle->is_not_important; child->sequential_consistency = initial_handle->sequential_consistency; child->initialized = initial_handle->initialized; child->readonly = initial_handle->readonly; child->ooc = initial_handle->ooc; /* The methods used for reduction are propagated to the * children. */ child->redux_cl = initial_handle->redux_cl; child->init_cl = initial_handle->init_cl; for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *initial_replicate; struct _starpu_data_replicate *child_replicate; initial_replicate = &initial_handle->per_node[node]; child_replicate = &child->per_node[node]; if (inherit_state) child_replicate->state = initial_replicate->state; else child_replicate->state = STARPU_INVALID; if (inherit_state || !initial_replicate->automatically_allocated) child_replicate->allocated = initial_replicate->allocated; else { //child_replicate->allocated = 0; } /* Do not allow memory reclaiming within the child for parent bits */ //child_replicate->automatically_allocated = 0; //child_replicate->refcnt = 0; child_replicate->memory_node = node; //child_replicate->relaxed_coherency = 0; child_replicate->mapped = STARPU_UNMAPPED; if (inherit_state) child_replicate->initialized = initial_replicate->initialized; else { //child_replicate->initialized = 0; } //child_replicate->nb_tasks_prefetch = 0; /* update the interface */ void *initial_interface = starpu_data_get_interface_on_node(initial_handle, node); void *child_interface = starpu_data_get_interface_on_node(child, node); STARPU_ASSERT_MSG(!(!inherit_state && child_replicate->automatically_allocated && child_replicate->allocated), "partition planning is currently not supported when handle has some automatically allocated buffers"); f->filter_func(initial_interface, child_interface, f, i, nparts); } /* We compute the size and the footprint of the child once and * store it in the handle */ child->footprint = _starpu_compute_data_footprint(child); _STARPU_TRACE_HANDLE_DATA_REGISTER(child); } /* now let the header */ _starpu_spin_unlock(&initial_handle->header_lock); } static void _starpu_empty_codelet_function(void *buffers[], void *args) { (void) buffers; // unused; (void) args; // unused; } void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gathering_node) { unsigned child; unsigned worker; unsigned nworkers = starpu_worker_get_count(); unsigned node; unsigned sizes[root_handle->nchildren]; _STARPU_TRACE_START_UNPARTITION(root_handle, gathering_node); _starpu_spin_lock(&root_handle->header_lock); STARPU_ASSERT_MSG(root_handle->nchildren != 0, "data %p is not partitioned, can not unpartition it", root_handle); /* first take all the children lock (in order !) */ for (child = 0; child < root_handle->nchildren; child++) { starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); /* make sure the intermediate children is unpartitionned as well */ if (child_handle->nchildren > 0) starpu_data_unpartition(child_handle, gathering_node); /* If this is a multiformat handle, we must convert the data now */ #ifdef STARPU_DEVEL #warning TODO: _starpu_fetch_data_on_node should be doing it #endif if (_starpu_data_is_multiformat_handle(child_handle) && starpu_node_get_kind(child_handle->mf_node) != STARPU_CPU_RAM) { struct starpu_codelet cl = { .where = STARPU_CPU, .cpu_funcs = { _starpu_empty_codelet_function }, .modes = { STARPU_RW }, .nbuffers = 1 }; struct starpu_task *task = starpu_task_create(); task->name = "convert_data"; STARPU_TASK_SET_HANDLE(task, child_handle, 0); task->cl = &cl; task->synchronous = 1; if (_starpu_task_submit_internally(task) != 0) _STARPU_ERROR("Could not submit the conversion task while unpartitionning\n"); } int ret; /* for now we pretend that the RAM is almost unlimited and that gathering * data should be possible from the node that does the unpartionning ... we * don't want to have the programming deal with memory shortage at that time, * really */ /* Acquire the child data on the gathering node. This will trigger collapsing any reduction */ ret = starpu_data_acquire_on_node(child_handle, gathering_node, STARPU_RW); STARPU_ASSERT(ret == 0); starpu_data_release_on_node(child_handle, gathering_node); _starpu_spin_lock(&child_handle->header_lock); child_handle->busy_waiting = 1; _starpu_spin_unlock(&child_handle->header_lock); /* Make sure it is not mapped */ for (node = 0; node < STARPU_MAXNODES; node++) _starpu_data_unmap(child_handle, node); /* Wait for all requests to finish (notably WT and UNMAP requests) */ STARPU_PTHREAD_MUTEX_LOCK(&child_handle->busy_mutex); while (1) { /* Here helgrind would shout that this an unprotected access, * but this is actually fine: all threads who do busy_count-- * are supposed to call _starpu_data_check_not_busy, which will * wake us up through the busy_mutex/busy_cond. */ if (!child_handle->busy_count) break; /* This is woken by _starpu_data_check_not_busy, always called * after decrementing busy_count */ STARPU_PTHREAD_COND_WAIT(&child_handle->busy_cond, &child_handle->busy_mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&child_handle->busy_mutex); _starpu_spin_lock(&child_handle->header_lock); sizes[child] = _starpu_data_get_alloc_size(child_handle); if (child_handle->unregister_hook) { child_handle->unregister_hook(child_handle); } if (child_handle->per_worker) { for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *local = &child_handle->per_worker[worker]; STARPU_ASSERT(local->state == STARPU_INVALID); if (local->allocated && local->automatically_allocated) _starpu_request_mem_chunk_removal(child_handle, local, starpu_worker_get_memory_node(worker), sizes[child]); } } _starpu_memory_stats_free(child_handle); } /* the gathering_node should now have a valid copy of all the children. * For all nodes, if the node had all copies and none was locally * allocated then the data is still valid there, else, it's invalidated * for the gathering node, if we have some locally allocated data, we * copy all the children (XXX this should not happen so we just do not * do anything since this is transparent ?) */ unsigned still_valid[STARPU_MAXNODES]; /* we do 2 passes : the first pass determines whether the data is still * valid or not, the second pass is needed to choose between STARPU_SHARED and * STARPU_OWNER */ unsigned nvalids = 0; /* still valid ? */ for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *local; /* until an issue is found the data is assumed to be valid */ unsigned isvalid = 1; for (child = 0; child < root_handle->nchildren; child++) { starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); local = &child_handle->per_node[node]; if (local->state == STARPU_INVALID || local->automatically_allocated == 1) { /* One of the bits is missing or is not inside the parent */ isvalid = 0; } if (local->mc && local->allocated && local->automatically_allocated) /* free the child data copy in a lazy fashion */ _starpu_request_mem_chunk_removal(child_handle, local, node, sizes[child]); } local = &root_handle->per_node[node]; if (!local->allocated) /* Even if we have all the bits, if we don't have the * whole data, it's not valid */ isvalid = 0; if (!isvalid && local->mc && local->allocated && local->automatically_allocated && !local->refcnt) /* free the data copy in a lazy fashion */ _starpu_request_mem_chunk_removal(root_handle, local, node, _starpu_data_get_alloc_size(root_handle)); /* if there was no invalid copy, the node still has a valid copy */ still_valid[node] = isvalid; if (isvalid) nvalids++; } /* either shared or owned */ STARPU_ASSERT(nvalids > 0); enum _starpu_cache_state newstate = (nvalids == 1)?STARPU_OWNER:STARPU_SHARED; for (node = 0; node < STARPU_MAXNODES; node++) { root_handle->per_node[node].state = still_valid[node]?newstate:STARPU_INVALID; } for (child = 0; child < root_handle->nchildren; child++) { starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); _starpu_data_free_interfaces(child_handle); _starpu_spin_unlock(&child_handle->header_lock); _starpu_spin_destroy(&child_handle->header_lock); } /* Set the initialized state */ starpu_data_handle_t first_child = starpu_data_get_child(root_handle, 0); root_handle->initialized = first_child->initialized; for (child = 1; child < root_handle->nchildren; child++) { starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); STARPU_ASSERT_MSG(child_handle->initialized == root_handle->initialized, "Inconsistent state between children initialization"); } if (root_handle->initialized) { for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *root_replicate; root_replicate = &root_handle->per_node[node]; root_replicate->initialized = still_valid[node]; } } for (child = 0; child < root_handle->nchildren; child++) { starpu_data_handle_t child_handle = starpu_data_get_child(root_handle, child); _starpu_data_clear_implicit(child_handle); free(child_handle->active_readonly_children); free(child_handle->active_readonly_nchildren); STARPU_PTHREAD_MUTEX_DESTROY(&child_handle->busy_mutex); STARPU_PTHREAD_COND_DESTROY(&child_handle->busy_cond); STARPU_PTHREAD_MUTEX_DESTROY(&child_handle->sequential_consistency_mutex); #ifdef STARPU_BUBBLE STARPU_PTHREAD_MUTEX_DESTROY(&child_handle->unpartition_mutex); #endif STARPU_HG_ENABLE_CHECKING(child_handle->post_sync_tasks_cnt); STARPU_HG_ENABLE_CHECKING(child_handle->busy_count); _starpu_data_requester_prio_list_deinit(&child_handle->req_list); _starpu_data_requester_prio_list_deinit(&child_handle->reduction_req_list); if (child_handle->switch_cl) { free(child_handle->switch_cl->dyn_nodes); free(child_handle->switch_cl); } _STARPU_TRACE_HANDLE_DATA_UNREGISTER(child_handle); } /* there is no child anymore */ starpu_data_handle_t children = root_handle->children; root_handle->children = NULL; root_handle->nchildren = 0; root_handle->nplans--; /* now the parent may be used again so we release the lock */ _starpu_spin_unlock(&root_handle->header_lock); free(children); _STARPU_TRACE_END_UNPARTITION(root_handle, gathering_node); } void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f) { unsigned nparts = _starpu_data_partition_nparts(initial_handle, f); STARPU_ASSERT_MSG(initial_handle->nchildren == 0, "there should not be multiple filters applied on the same data %p, further filtering has to be done on children", initial_handle); STARPU_ASSERT_MSG(initial_handle->nplans == 0, "partition planning and synchronous partitioning is not supported"); initial_handle->children = NULL; /* Make sure to wait for previous tasks working on the whole data */ starpu_data_acquire_on_node(initial_handle, STARPU_ACQUIRE_NO_NODE, initial_handle->initialized?STARPU_RW:STARPU_W); starpu_data_release_on_node(initial_handle, STARPU_ACQUIRE_NO_NODE); _starpu_data_partition(initial_handle, NULL, nparts, f, 1); } void starpu_data_partition_plan(starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *childrenp) { unsigned i; unsigned nparts = _starpu_data_partition_nparts(initial_handle, f); STARPU_ASSERT_MSG(initial_handle->nchildren == 0, "partition planning and synchronous partitioning is not supported"); STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); struct starpu_codelet *cl = initial_handle->switch_cl; int home_node = initial_handle->home_node; starpu_data_handle_t *children; if (home_node == -1) /* Nothing better for now */ /* TODO: pass -1, and make _starpu_fetch_nowhere_task_input * really call _starpu_fetch_data_on_node, and make that update * the coherency. */ home_node = STARPU_MAIN_RAM; _STARPU_MALLOC(children, nparts * sizeof(*children)); for (i = 0; i < nparts; i++) { _STARPU_CALLOC(children[i], 1, sizeof(struct _starpu_data_state)); childrenp[i] = children[i]; } _starpu_data_partition(initial_handle, children, nparts, f, 0); if (!cl) { /* Create a codelet that will make the coherency on the home node */ _STARPU_CALLOC(initial_handle->switch_cl, 1, sizeof(*initial_handle->switch_cl)); cl = initial_handle->switch_cl; cl->where = STARPU_NOWHERE; cl->nbuffers = STARPU_VARIABLE_NBUFFERS; cl->flags = STARPU_CODELET_NOPLANS; cl->name = "data_partition_switch"; cl->specific_nodes = 1; } if (initial_handle->switch_cl_nparts < nparts) { /* First initialization, or previous initialization was with fewer parts, enlarge it */ _STARPU_REALLOC(cl->dyn_nodes, (nparts+1) * sizeof(*cl->dyn_nodes)); for (i = initial_handle->switch_cl_nparts; i < nparts+1; i++) cl->dyn_nodes[i] = home_node; initial_handle->switch_cl_nparts = nparts; } } void starpu_data_partition_clean_node(starpu_data_handle_t root_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node) { unsigned i; if (children[0]->active) { starpu_data_unpartition_submit(root_handle, nparts, children, gather_node); } free(children[0]->siblings); for (i = 0; i < nparts; i++) { children[i]->siblings = NULL; starpu_data_unregister_submit(children[i]); } _starpu_spin_lock(&root_handle->header_lock); root_handle->nplans--; _starpu_spin_unlock(&root_handle->header_lock); } void starpu_data_partition_clean(starpu_data_handle_t root_handle, unsigned nparts, starpu_data_handle_t *children) { #ifdef STARPU_DEVEL #warning FIXME: better choose gathering node #endif starpu_data_partition_clean_node(root_handle, nparts, children, root_handle->home_node); } static void _starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, unsigned char *handles_sequential_consistency) { unsigned i; STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); _starpu_spin_lock(&initial_handle->header_lock); STARPU_ASSERT_MSG(initial_handle->partitioned == 0, "One can't submit several partition plannings at the same time"); STARPU_ASSERT_MSG(initial_handle->part_readonly == 0, "One can't submit a partition planning while a readonly partitioning is active"); STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); initial_handle->partitioned++; initial_handle->active_nchildren = children[0]->nsiblings; initial_handle->active_children = children[0]->siblings; _starpu_spin_unlock(&initial_handle->header_lock); for (i = 0; i < nparts; i++) { _starpu_spin_lock(&children[i]->header_lock); children[i]->active = 1; _starpu_spin_unlock(&children[i]->header_lock); } if (!initial_handle->initialized) /* No need for coherency, it is not initialized */ return; struct starpu_data_descr descr[nparts]; for (i = 0; i < nparts; i++) { STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); descr[i].handle = children[i]; descr[i].mode = STARPU_W; } /* TODO: assert nparts too */ int ret; if (handles_sequential_consistency) ret = starpu_task_insert(initial_handle->switch_cl, STARPU_RW, initial_handle, STARPU_DATA_MODE_ARRAY, descr, nparts, STARPU_NAME, "partition", STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handles_sequential_consistency, 0); else ret = starpu_task_insert(initial_handle->switch_cl, STARPU_RW, initial_handle, STARPU_DATA_MODE_ARRAY, descr, nparts, STARPU_NAME, "partition", 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); if (!handles_sequential_consistency || handles_sequential_consistency[0]) _starpu_data_invalidate_submit_noplan(initial_handle); } void starpu_data_partition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency) { unsigned i; unsigned char handles_sequential_consistency[nparts+1]; handles_sequential_consistency[0] = sequential_consistency; for(i=1 ; isequential_consistency; _starpu_data_partition_submit(initial_handle, nparts, children, handles_sequential_consistency); } void starpu_data_partition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) { _starpu_data_partition_submit(initial_handle, nparts, children, NULL); } void starpu_data_partition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) { starpu_data_partition_readonly_submit_sequential_consistency(initial_handle, nparts, children, initial_handle->sequential_consistency); } void starpu_data_partition_readonly_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int sequential_consistency) { unsigned i; STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); _starpu_spin_lock(&initial_handle->header_lock); STARPU_ASSERT_MSG(initial_handle->partitioned == 0 || initial_handle->part_readonly, "One can't submit a readonly partition planning at the same time as a readwrite partition planning"); STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); initial_handle->partitioned++; initial_handle->part_readonly = 1; if (initial_handle->nactive_readonly_children < initial_handle->partitioned) { _STARPU_REALLOC(initial_handle->active_readonly_children, initial_handle->partitioned * sizeof(initial_handle->active_readonly_children[0])); _STARPU_REALLOC(initial_handle->active_readonly_nchildren, initial_handle->partitioned * sizeof(initial_handle->active_readonly_nchildren[0])); initial_handle->nactive_readonly_children = initial_handle->partitioned; } initial_handle->active_readonly_children[initial_handle->partitioned-1] = children[0]->siblings; initial_handle->active_readonly_nchildren[initial_handle->partitioned-1] = children[0]->nsiblings; _starpu_spin_unlock(&initial_handle->header_lock); for (i = 0; i < nparts; i++) { _starpu_spin_lock(&children[i]->header_lock); children[i]->active = 1; children[i]->active_ro = 1; _starpu_spin_unlock(&children[i]->header_lock); } STARPU_ASSERT_MSG(initial_handle->initialized || initial_handle->init_cl, "It is odd to read-only-partition a data which does not have a value yet"); struct starpu_data_descr descr[nparts]; char handles_sequential_consistency[nparts+1]; handles_sequential_consistency[0] = sequential_consistency; for (i = 0; i < nparts; i++) { STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); descr[i].handle = children[i]; descr[i].mode = STARPU_W; handles_sequential_consistency[i+1] = (char) children[i]->sequential_consistency; } /* TODO: assert nparts too */ int ret = starpu_task_insert(initial_handle->switch_cl, STARPU_R, initial_handle, STARPU_DATA_MODE_ARRAY, descr, nparts, STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handles_sequential_consistency, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } void starpu_data_partition_readwrite_upgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) { STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); _starpu_spin_lock(&initial_handle->header_lock); STARPU_ASSERT_MSG(initial_handle->partitioned == 1, "One can't upgrade a readonly partition planning to readwrite while other readonly partition plannings are active"); STARPU_ASSERT_MSG(initial_handle->part_readonly == 1, "One can only upgrade a readonly partition planning"); STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); initial_handle->part_readonly = 0; initial_handle->active_nchildren = initial_handle->active_readonly_nchildren[0]; initial_handle->active_children = initial_handle->active_readonly_children[0]; initial_handle->active_readonly_children[0] = NULL; initial_handle->active_readonly_nchildren[0] = 0; _starpu_spin_unlock(&initial_handle->header_lock); unsigned i; struct starpu_data_descr descr[nparts]; for (i = 0; i < nparts; i++) { STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); children[i]->active_ro = 0; descr[i].handle = children[i]; descr[i].mode = STARPU_W; } /* TODO: assert nparts too */ int ret = starpu_task_insert(initial_handle->switch_cl, STARPU_RW, initial_handle, STARPU_DATA_MODE_ARRAY, descr, nparts, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); _starpu_data_invalidate_submit_noplan(initial_handle); } void starpu_data_partition_readonly_downgrade_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) { unsigned i; STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); _starpu_spin_lock(&initial_handle->header_lock); STARPU_ASSERT_MSG(initial_handle->partitioned == 1, "One can't downgrade a read-write partition planning to read-only while other partition plannings are active"); STARPU_ASSERT_MSG(initial_handle->part_readonly == 0, "Partition is already read-only"); STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); initial_handle->part_readonly = 1; if (initial_handle->nactive_readonly_children < initial_handle->partitioned) { _STARPU_REALLOC(initial_handle->active_readonly_children, initial_handle->partitioned * sizeof(initial_handle->active_readonly_children[0])); _STARPU_REALLOC(initial_handle->active_readonly_nchildren, initial_handle->partitioned * sizeof(initial_handle->active_readonly_nchildren[0])); initial_handle->nactive_readonly_children = initial_handle->partitioned; } initial_handle->active_readonly_children[initial_handle->partitioned-1] = children[0]->siblings; initial_handle->active_readonly_nchildren[initial_handle->partitioned-1] = children[0]->nsiblings; initial_handle->active_children = NULL; initial_handle->active_nchildren = 0; _starpu_spin_unlock(&initial_handle->header_lock); for (i = 0; i < nparts; i++) { _starpu_spin_lock(&children[i]->header_lock); children[i]->active = 1; children[i]->active_ro = 1; _starpu_spin_unlock(&children[i]->header_lock); } struct starpu_data_descr descr[nparts]; unsigned n; for (i = 0, n = 0; i < nparts; i++) { STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); if (!children[i]->initialized) /* Dropped value, do not care about coherency for this one */ continue; descr[n].handle = children[i]; descr[n].mode = STARPU_R; n++; } /* TODO: assert nparts too */ int ret = starpu_task_insert(initial_handle->switch_cl, initial_handle->initialized?STARPU_RW:STARPU_W, initial_handle, STARPU_DATA_MODE_ARRAY, descr, n, ///STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handles_sequential_consistency, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } void _starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, unsigned char *handles_sequential_consistency, void (*callback_func)(void *), void *callback_arg) { unsigned i; STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); STARPU_ASSERT_MSG(gather_node == initial_handle->home_node || gather_node == -1, "gathering node different from home node is currently not supported"); _starpu_spin_lock(&initial_handle->header_lock); STARPU_ASSERT_MSG(initial_handle->partitioned >= 1, "No partition planning is active for handle %p", initial_handle); STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); if (initial_handle->part_readonly) { /* Replace this children set with the last set in the list of readonly children sets */ for (i = 0; i < initial_handle->partitioned-1; i++) { if (initial_handle->active_readonly_children[i] == children[0]->siblings) { initial_handle->active_readonly_children[i] = initial_handle->active_readonly_children[initial_handle->partitioned-1]; initial_handle->active_readonly_nchildren[i] = initial_handle->active_readonly_nchildren[initial_handle->partitioned-1]; initial_handle->active_readonly_children[initial_handle->partitioned-1] = NULL; initial_handle->active_readonly_nchildren[initial_handle->partitioned-1] = 0; break; } } } else { initial_handle->active_nchildren = 0; initial_handle->active_children = NULL; } initial_handle->partitioned--; if (!initial_handle->partitioned) initial_handle->part_readonly = 0; initial_handle->active_nchildren = 0; initial_handle->active_children = NULL; _starpu_spin_unlock(&initial_handle->header_lock); for (i = 0; i < nparts; i++) { _starpu_spin_lock(&children[i]->header_lock); children[i]->active = 0; children[i]->active_ro = 0; _starpu_spin_unlock(&children[i]->header_lock); } unsigned n; struct starpu_data_descr descr[nparts]; for (i = 0, n = 0; i < nparts; i++) { STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); if (!children[i]->initialized) /* Dropped value, do not care about coherency for this one */ continue; descr[n].handle = children[i]; descr[n].mode = STARPU_RW; n++; } /* TODO: assert nparts too */ int ret; if (handles_sequential_consistency) ret = starpu_task_insert(initial_handle->switch_cl, STARPU_W, initial_handle, STARPU_DATA_MODE_ARRAY, descr, n, STARPU_NAME, "unpartition", STARPU_HANDLES_SEQUENTIAL_CONSISTENCY, handles_sequential_consistency, STARPU_CALLBACK_WITH_ARG_NFREE, callback_func, callback_arg, 0); else ret = starpu_task_insert(initial_handle->switch_cl, STARPU_W, initial_handle, STARPU_DATA_MODE_ARRAY, descr, n, STARPU_NAME, "unpartition", STARPU_CALLBACK_WITH_ARG_NFREE, callback_func, callback_arg, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); for (i = 0; i < nparts; i++) { if (!handles_sequential_consistency || handles_sequential_consistency[i+1]) _starpu_data_invalidate_submit_noplan(children[i]); } } void starpu_data_unpartition_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node) { _starpu_data_unpartition_submit(initial_handle, nparts, children, gather_node, NULL, NULL, NULL); } void starpu_data_unpartition_submit_sequential_consistency_cb(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency, void (*callback_func)(void *), void *callback_arg) { unsigned i; unsigned char handles_sequential_consistency[nparts+1]; handles_sequential_consistency[0] = sequential_consistency; for(i=1 ; isequential_consistency; _starpu_data_unpartition_submit(initial_handle, nparts, children, gather_node, handles_sequential_consistency, callback_func, callback_arg); } void starpu_data_unpartition_submit_sequential_consistency(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node, int sequential_consistency) { unsigned i; unsigned char handles_sequential_consistency[nparts+1]; handles_sequential_consistency[0] = sequential_consistency; for(i=1 ; isequential_consistency; _starpu_data_unpartition_submit(initial_handle, nparts, children, gather_node, handles_sequential_consistency, NULL, NULL); } void starpu_data_unpartition_readonly_submit(starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gather_node) { STARPU_ASSERT_MSG(initial_handle->sequential_consistency, "partition planning is currently only supported for data with sequential consistency"); STARPU_ASSERT_MSG(gather_node == initial_handle->home_node || gather_node == -1, "gathering node different from home node is currently not supported"); _starpu_spin_lock(&initial_handle->header_lock); STARPU_ASSERT_MSG(initial_handle->partitioned >= 1, "No partition planning is active for handle %p", initial_handle); STARPU_ASSERT_MSG(nparts > 0, "One can't partition into 0 parts"); initial_handle->part_readonly = 1; _starpu_spin_unlock(&initial_handle->header_lock); unsigned i, n; struct starpu_data_descr descr[nparts]; for (i = 0, n = 0; i < nparts; i++) { STARPU_ASSERT_MSG(children[i]->father_handle == initial_handle, "child(%d) %p is partitioned from %p and not from the given parameter %p", i, children[i], children[i]->father_handle, initial_handle); if (!children[i]->initialized) /* Dropped value, do not care about coherency for this one */ continue; descr[n].handle = children[i]; descr[n].mode = STARPU_R; n++; } /* TODO: assert nparts too */ int ret = starpu_task_insert(initial_handle->switch_cl, STARPU_W, initial_handle, STARPU_DATA_MODE_ARRAY, descr, n, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert") } /* Unpartition everything below ancestor */ static void starpu_data_unpartition_submit_r(starpu_data_handle_t ancestor, int gathering_node) { unsigned i, j, nsiblings; if (!ancestor->partitioned) /* It's already unpartitioned */ return; _STARPU_DEBUG("ancestor %p needs unpartitioning\n", ancestor); if (ancestor->part_readonly) { unsigned n = ancestor->partitioned; /* Uh, has to go through all read-only partitions */ for (i = 0; i < n; i++) { /* Note: active_readonly_children is emptied by starpu_data_unpartition_submit_r calls below, that's why we always refer to [0] here */ starpu_data_handle_t *children = ancestor->active_readonly_children[0]; _STARPU_DEBUG("unpartition readonly children %p etc.\n", children[0]); nsiblings = children[0]->nsiblings; for (j = 0; j < nsiblings; j++) { /* Make sure our children are unpartitioned */ starpu_data_unpartition_submit_r(children[j], gathering_node); } /* And unpartition them */ starpu_data_unpartition_submit(ancestor, nsiblings, children, gathering_node); } } else { _STARPU_DEBUG("unpartition children %p\n", ancestor->active_children); /* Only one partition */ nsiblings = ancestor->active_children[0]->nsiblings; for (i = 0; i < nsiblings; i++) starpu_data_unpartition_submit_r(ancestor->active_children[i], gathering_node); /* And unpartition ourself */ starpu_data_unpartition_submit(ancestor, nsiblings, ancestor->active_children, gathering_node); } } /* Make ancestor partition itself properly for target */ static void _starpu_data_partition_access_look_up(starpu_data_handle_t ancestor, starpu_data_handle_t target, int write) { /* First make sure ancestor has proper state, if not, ask father */ if (!ancestor->active || (write && ancestor->active_ro)) { /* (The root is always active-rw) */ STARPU_ASSERT(ancestor->father_handle); _STARPU_DEBUG("ancestor %p is not ready: %s, asking father %p\n", ancestor, ancestor->active ? ancestor->active_ro ? "RO" : "RW" : "NONE", ancestor->father_handle); _starpu_data_partition_access_look_up(ancestor->father_handle, ancestor, write); _STARPU_DEBUG("ancestor %p is now ready\n", ancestor); } else _STARPU_DEBUG("ancestor %p was ready\n", ancestor); /* We shouldn't be called for nothing */ STARPU_ASSERT(!ancestor->partitioned || !target || ancestor->active_children != target->siblings || (ancestor->part_readonly && write)); /* Then unpartition ancestor if needed */ if (ancestor->partitioned && /* Not the right children, unpartition ourself */ ((target && write && ancestor->active_children != target->siblings) || (target && !write && !ancestor->part_readonly) || /* We are partitioned and we want to write or some child * is writing and we want to read, unpartition ourself*/ (!target && (write || !ancestor->part_readonly)))) { #ifdef STARPU_DEVEL #warning FIXME: better choose gathering node #endif starpu_data_unpartition_submit_r(ancestor, ancestor->home_node); } if (!target) { _STARPU_DEBUG("ancestor %p is done\n", ancestor); /* No child target, nothing more to do actually. */ return; } /* Then partition ancestor towards target, if needed */ if (ancestor->partitioned) { /* That must be readonly, otherwise we would have unpartitioned it */ STARPU_ASSERT(ancestor->part_readonly); if (write) { _STARPU_DEBUG("ancestor %p is already partitioned RO, turn RW\n", ancestor); /* Already partitioned, normally it's already for the target */ STARPU_ASSERT(ancestor->active_children == target->siblings); /* And we are here just because we haven't partitioned rw */ STARPU_ASSERT(ancestor->part_readonly && write); /* So we just need to upgrade ro to rw */ starpu_data_partition_readwrite_upgrade_submit(ancestor, target->nsiblings, target->siblings); } else { _STARPU_DEBUG("ancestor %p is already partitioned RO, but not to target, partition towards target too\n", ancestor); /* So we just need to upgrade ro to rw */ starpu_data_partition_readonly_submit(ancestor, target->nsiblings, target->siblings); } } else { /* Just need to partition properly for the child */ if (write) { _STARPU_DEBUG("partition ancestor %p RW\n", ancestor); starpu_data_partition_submit(ancestor, target->nsiblings, target->siblings); } else { _STARPU_DEBUG("partition ancestor %p RO\n", ancestor); starpu_data_partition_readonly_submit(ancestor, target->nsiblings, target->siblings); } } } void _starpu_data_partition_access_submit(starpu_data_handle_t target, int write) { _STARPU_DEBUG("accessing %p %s\n", target, write ? "RW" : "RO"); _starpu_data_partition_access_look_up(target, NULL, write); } void starpu_filter_nparts_compute_chunk_size_and_offset(unsigned n, unsigned nparts, size_t elemsize, unsigned id, unsigned blocksize, unsigned *chunk_size, size_t *offset) { *chunk_size = n/nparts; unsigned remainder = n % nparts; if (id < remainder) (*chunk_size)++; /* * Computing the total offset. The formula may not be really clear, but * it really just is: * * total = 0; * for (i = 0; i < id; i++) * { * total += n/nparts; * if (i < n%nparts) * total++; * } * offset = total * elemsize * blocksize; */ if (offset != NULL) *offset = (id *(n/nparts) + STARPU_MIN(remainder, id)) * (size_t) blocksize * elemsize; } starpu-1.4.9+dfsg/src/datawizard/filters.h000066400000000000000000000021651507764646700205560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __FILTERS_H__ #define __FILTERS_H__ /** @file */ #include #include #include #include #include #pragma GCC visibility push(hidden) /** submit asynchronous unpartitioning / partitioning to make target active read-only or read-write */ void _starpu_data_partition_access_submit(starpu_data_handle_t target, int write); #pragma GCC visibility pop #endif starpu-1.4.9+dfsg/src/datawizard/footprint.c000066400000000000000000000070231507764646700211230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include uint32_t starpu_task_data_footprint(struct starpu_task *task) { uint32_t footprint = 0; unsigned buffer; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (buffer = 0; buffer < nbuffers; buffer++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, buffer); enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, buffer); if (mode & STARPU_NOFOOTPRINT) continue; uint32_t handle_footprint = _starpu_data_get_footprint(handle); footprint = starpu_hash_crc32c_be(handle_footprint, footprint); } return footprint; } uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, struct starpu_perfmodel_arch* arch, unsigned nimpl, struct _starpu_job *j) { if (j->footprint_is_computed) return j->footprint; uint32_t footprint = 0; struct starpu_task *task = j->task; if (model) { if (model->footprint) { footprint = model->footprint(task); } else { struct starpu_perfmodel_per_arch *per_arch; if (arch) per_arch = starpu_perfmodel_get_model_per_arch(model, arch, nimpl); if (arch && per_arch != NULL && per_arch->size_base) { size_t size = per_arch->size_base(task, arch, nimpl); footprint = starpu_hash_crc32c_be_n(&size, sizeof(size), footprint); } else if (model->size_base) { size_t size = model->size_base(task, nimpl); footprint = starpu_hash_crc32c_be_n(&size, sizeof(size), footprint); } else { footprint = starpu_task_data_footprint(task); } } } else { footprint = starpu_task_data_footprint(task); } j->footprint = footprint; j->footprint_is_computed = 1; return footprint; } uint32_t _starpu_compute_data_footprint(starpu_data_handle_t handle) { uint32_t interfaceid = (uint32_t)starpu_data_get_interface_id(handle); uint32_t init = interfaceid < STARPU_MAX_INTERFACE_ID ? interfaceid : 0; STARPU_ASSERT(handle->ops->footprint); uint32_t handle_footprint = handle->ops->footprint(handle); return starpu_hash_crc32c_be(handle_footprint, init); } uint32_t _starpu_compute_data_alloc_footprint(starpu_data_handle_t handle) { uint32_t interfaceid = (uint32_t)starpu_data_get_interface_id(handle); uint32_t init = interfaceid < STARPU_MAX_INTERFACE_ID ? interfaceid : 0; uint32_t handle_footprint; if (handle->ops->alloc_footprint) handle_footprint = handle->ops->alloc_footprint(handle); else handle_footprint = handle->ops->footprint(handle); return starpu_hash_crc32c_be(handle_footprint, init); } uint32_t starpu_task_footprint(struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { struct _starpu_job *j = _starpu_get_job_associated_to_task(task); return _starpu_compute_buffers_footprint(model, arch, nimpl, j); } starpu-1.4.9+dfsg/src/datawizard/footprint.h000066400000000000000000000027531507764646700211350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __FOOTPRINT_H__ #define __FOOTPRINT_H__ /** @file */ #include #include #include #pragma GCC visibility push(hidden) /** Compute the footprint that characterizes the job and cache it into the job * structure. */ uint32_t _starpu_compute_buffers_footprint(struct starpu_perfmodel *model, struct starpu_perfmodel_arch * arch, unsigned nimpl, struct _starpu_job *j); /** Compute the footprint that characterizes the layout of the data handle. */ uint32_t _starpu_compute_data_footprint(starpu_data_handle_t handle); /** Compute the footprint that characterizes the allocation of the data handle. */ uint32_t _starpu_compute_data_alloc_footprint(starpu_data_handle_t handle); #pragma GCC visibility pop #endif // __FOOTPRINT_H__ starpu-1.4.9+dfsg/src/datawizard/interfaces/000077500000000000000000000000001507764646700210545ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/datawizard/interfaces/bcsr_filters.c000066400000000000000000000100121507764646700236730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include void starpu_bcsr_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nparts) { struct starpu_bcsr_interface *bcsr_father = (struct starpu_bcsr_interface *) father_interface; struct starpu_bcsr_interface *bcsr_child = (struct starpu_bcsr_interface *) child_interface; size_t elemsize = bcsr_father->elemsize; uint32_t firstentry = bcsr_father->firstentry; uint32_t r = bcsr_father->r; uint32_t c = bcsr_father->c; uint32_t *ram_rowptr = bcsr_father->ram_rowptr; uint32_t *rowptr = bcsr_father->rowptr; unsigned child_nrow; size_t child_rowoffset; STARPU_ASSERT_MSG(bcsr_father->id == STARPU_BCSR_INTERFACE_ID, "%s can only be applied on a bcsr data", __func__); bcsr_child->id = bcsr_father->id; starpu_filter_nparts_compute_chunk_size_and_offset(bcsr_father->nrow, nparts, 1, id, 1, &child_nrow, &child_rowoffset); /* child blocks indexes between these (0-based) */ uint32_t start_block = ram_rowptr[child_rowoffset] - firstentry; uint32_t end_block = ram_rowptr[child_rowoffset + child_nrow] - firstentry; bcsr_child->nnz = end_block - start_block; bcsr_child->nrow = child_nrow; bcsr_child->firstentry = firstentry + start_block; bcsr_child->r = bcsr_father->r; bcsr_child->c = bcsr_father->c; bcsr_child->elemsize = elemsize; bcsr_child->ram_colind = bcsr_father->ram_colind + start_block; bcsr_child->ram_rowptr = ram_rowptr + child_rowoffset; if (bcsr_father->nzval) { bcsr_child->nzval = bcsr_father->nzval + start_block * r*c * elemsize; bcsr_child->colind = bcsr_father->colind + start_block; bcsr_child->rowptr = rowptr + child_rowoffset; } } void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nparts) { struct starpu_bcsr_interface *bcsr_father = (struct starpu_bcsr_interface *) father_interface; /* each chunk becomes a small dense matrix */ struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface; size_t elemsize = bcsr_father->elemsize; uint32_t firstentry = bcsr_father->firstentry; /* size of the tiles */ uint32_t r = bcsr_father->r; uint32_t c = bcsr_father->c; uint32_t ptr_offset = c*r*id*elemsize; STARPU_ASSERT_MSG(bcsr_father->id == STARPU_BCSR_INTERFACE_ID, "%s can only be applied on a bcsr data", __func__); matrix_child->id = STARPU_MATRIX_INTERFACE_ID; matrix_child->nx = c; matrix_child->ny = r; matrix_child->ld = c; matrix_child->elemsize = elemsize; matrix_child->allocsize = c*r*elemsize; if (bcsr_father->nzval) { uint8_t *nzval = (uint8_t *)(bcsr_father->nzval); matrix_child->dev_handle = matrix_child->ptr = (uintptr_t)&nzval[firstentry + ptr_offset]; matrix_child->offset = 0; } } unsigned starpu_bcsr_filter_canonical_block_get_nchildren(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, starpu_data_handle_t handle) { return (unsigned)starpu_bcsr_get_nnz(handle); } struct starpu_data_interface_ops *starpu_bcsr_filter_canonical_block_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_matrix_ops; } starpu-1.4.9+dfsg/src/datawizard/interfaces/bcsr_interface.c000066400000000000000000000373571507764646700242100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif /* * BCSR : blocked CSR, we use blocks of size (r x c) */ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); static const struct starpu_data_copy_methods bcsr_copy_data_methods_s = { .any_to_any = copy_any_to_any, }; static void register_bcsr_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static void *bcsr_to_pointer(void *data_interface, unsigned node); static starpu_ssize_t allocate_bcsr_buffer_on_node(void *data_interface, unsigned dst_node); static void free_bcsr_buffer_on_node(void *data_interface, unsigned node); static size_t bcsr_interface_get_size(starpu_data_handle_t handle); static int bcsr_compare(void *data_interface_a, void *data_interface_b); static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle); static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); static int pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); struct starpu_data_interface_ops starpu_interface_bcsr_ops = { .register_data_handle = register_bcsr_handle, .allocate_data_on_node = allocate_bcsr_buffer_on_node, .free_data_on_node = free_bcsr_buffer_on_node, .copy_methods = &bcsr_copy_data_methods_s, .get_size = bcsr_interface_get_size, .interfaceid = STARPU_BCSR_INTERFACE_ID, .interface_size = sizeof(struct starpu_bcsr_interface), .footprint = footprint_bcsr_interface_crc32, .compare = bcsr_compare, .describe = describe, .to_pointer = bcsr_to_pointer, .name = "STARPU_BCSR_INTERFACE", .pack_data = pack_data, .peek_data = peek_data, .unpack_data = unpack_data, .pack_meta = NULL, .unpack_meta = NULL, .free_meta = NULL }; static void *bcsr_to_pointer(void *data_interface, unsigned node) { (void) node; struct starpu_bcsr_interface *bcsr_interface = data_interface; return (void*) bcsr_interface->nzval; } static void register_bcsr_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_bcsr_interface *bcsr_interface = (struct starpu_bcsr_interface *) data_interface; int node; uint32_t *ram_colind = NULL; uint32_t *ram_rowptr = NULL; if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { ram_colind = bcsr_interface->colind; ram_rowptr = bcsr_interface->rowptr; } for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_bcsr_interface *local_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->nzval = bcsr_interface->nzval; local_interface->colind = bcsr_interface->colind; local_interface->rowptr = bcsr_interface->rowptr; } else { local_interface->nzval = 0; local_interface->colind = NULL; local_interface->rowptr = NULL; } local_interface->ram_colind = ram_colind; local_interface->ram_rowptr = ram_rowptr; local_interface->id = bcsr_interface->id; local_interface->nnz = bcsr_interface->nnz; local_interface->nrow = bcsr_interface->nrow; local_interface->firstentry = bcsr_interface->firstentry; local_interface->r = bcsr_interface->r; local_interface->c = bcsr_interface->c; local_interface->elemsize = bcsr_interface->elemsize; } } void starpu_bcsr_data_register(starpu_data_handle_t *handleptr, int home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, uint32_t r, uint32_t c, size_t elemsize) { struct starpu_bcsr_interface bcsr_interface = { .id = STARPU_BCSR_INTERFACE_ID, .nzval = nzval, .colind = colind, .rowptr = rowptr, .nnz = nnz, .nrow = nrow, .firstentry = firstentry, .r = r, .c = c, .elemsize = elemsize }; #ifndef STARPU_SIMGRID if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { if (nnz) { if (r && c && elemsize) { STARPU_ASSERT_ACCESSIBLE(nzval); STARPU_ASSERT_ACCESSIBLE(nzval + nnz*elemsize*r*c - 1); } STARPU_ASSERT_ACCESSIBLE(colind); STARPU_ASSERT_ACCESSIBLE((uintptr_t) colind + nnz*sizeof(uint32_t) - 1); } STARPU_ASSERT_ACCESSIBLE(rowptr); STARPU_ASSERT_ACCESSIBLE((uintptr_t) rowptr + (nrow+1)*sizeof(uint32_t) - 1); } #endif starpu_data_register(handleptr, home_node, &bcsr_interface, &starpu_interface_bcsr_ops); } static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle) { uint32_t hash; hash = starpu_hash_crc32c_be(starpu_bcsr_get_nnz(handle), 0); hash = starpu_hash_crc32c_be(starpu_bcsr_get_c(handle), hash); hash = starpu_hash_crc32c_be(starpu_bcsr_get_r(handle), hash); return hash; } static int bcsr_compare(void *data_interface_a, void *data_interface_b) { struct starpu_bcsr_interface *bcsr_a = (struct starpu_bcsr_interface *) data_interface_a; struct starpu_bcsr_interface *bcsr_b = (struct starpu_bcsr_interface *) data_interface_b; /* Two matrices are considered compatible if they have the same size */ return (bcsr_a->nnz == bcsr_b->nnz) && (bcsr_a->nrow == bcsr_b->nrow) && (bcsr_a->r == bcsr_b->r) && (bcsr_a->c == bcsr_b->c) && (bcsr_a->elemsize == bcsr_b->elemsize); } /* offer an access to the data parameters */ uint32_t starpu_bcsr_get_nnz(starpu_data_handle_t handle) { struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); #endif return data_interface->nnz; } uint32_t starpu_bcsr_get_nrow(starpu_data_handle_t handle) { struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); #endif return data_interface->nrow; } uint32_t starpu_bcsr_get_firstentry(starpu_data_handle_t handle) { struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); #endif return data_interface->firstentry; } uint32_t starpu_bcsr_get_r(starpu_data_handle_t handle) { struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); #endif return data_interface->r; } uint32_t starpu_bcsr_get_c(starpu_data_handle_t handle) { struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); #endif return data_interface->c; } size_t starpu_bcsr_get_elemsize(starpu_data_handle_t handle) { struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); #endif return data_interface->elemsize; } uintptr_t starpu_bcsr_get_local_nzval(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); #endif return data_interface->nzval; } uint32_t *starpu_bcsr_get_local_colind(starpu_data_handle_t handle) { int node; node = starpu_worker_get_local_memory_node(); /* XXX 0 */ struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); #endif return data_interface->colind; } uint32_t *starpu_bcsr_get_local_rowptr(starpu_data_handle_t handle) { int node; node = starpu_worker_get_local_memory_node(); /* XXX 0 */ struct starpu_bcsr_interface *data_interface = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(data_interface->id == STARPU_BCSR_INTERFACE_ID, "Error. The given data is not a bcsr."); #endif return data_interface->rowptr; } static size_t bcsr_interface_get_size(starpu_data_handle_t handle) { size_t size; uint32_t nnz = starpu_bcsr_get_nnz(handle); uint32_t nrow = starpu_bcsr_get_nrow(handle); uint32_t r = starpu_bcsr_get_r(handle); uint32_t c = starpu_bcsr_get_c(handle); size_t elemsize = starpu_bcsr_get_elemsize(handle); size = nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); return size; } /* memory allocation/deallocation primitives for the BLAS interface */ /* returns the size of the allocated area */ static starpu_ssize_t allocate_bcsr_buffer_on_node(void *data_interface_, unsigned dst_node) { uintptr_t addr_nzval, addr_colind, addr_rowptr; starpu_ssize_t allocated_memory; /* we need the 3 arrays to be allocated */ struct starpu_bcsr_interface *bcsr_interface = (struct starpu_bcsr_interface *) data_interface_; uint32_t nnz = bcsr_interface->nnz; uint32_t nrow = bcsr_interface->nrow; size_t elemsize = bcsr_interface->elemsize; uint32_t r = bcsr_interface->r; uint32_t c = bcsr_interface->c; STARPU_ASSERT_MSG(r && c, "partitioning bcsr with several memory nodes is not supported yet"); if (nnz) { addr_nzval = starpu_malloc_on_node(dst_node, nnz*r*c*elemsize); if (!addr_nzval) goto fail_nzval; addr_colind = starpu_malloc_on_node(dst_node, nnz*sizeof(uint32_t)); if (!addr_colind) goto fail_colind; } else { addr_nzval = addr_colind = 0; } addr_rowptr = starpu_malloc_on_node(dst_node, (nrow+1)*sizeof(uint32_t)); if (!addr_rowptr) goto fail_rowptr; /* allocation succeeded */ allocated_memory = nnz*r*c*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); /* update the data properly in consequence */ bcsr_interface->nzval = addr_nzval; bcsr_interface->colind = (uint32_t*) addr_colind; bcsr_interface->rowptr = (uint32_t*) addr_rowptr; return allocated_memory; fail_rowptr: if (nnz) starpu_free_on_node(dst_node, addr_colind, nnz*sizeof(uint32_t)); fail_colind: if (nnz) starpu_free_on_node(dst_node, addr_nzval, nnz*r*c*elemsize); fail_nzval: /* allocation failed */ return -ENOMEM; } static void free_bcsr_buffer_on_node(void *data_interface, unsigned node) { struct starpu_bcsr_interface *bcsr_interface = (struct starpu_bcsr_interface *) data_interface; uint32_t nnz = bcsr_interface->nnz; uint32_t nrow = bcsr_interface->nrow; size_t elemsize = bcsr_interface->elemsize; uint32_t r = bcsr_interface->r; uint32_t c = bcsr_interface->c; if (nnz) { starpu_free_on_node(node, bcsr_interface->nzval, nnz*r*c*elemsize); bcsr_interface->nzval = 0; starpu_free_on_node(node, (uintptr_t) bcsr_interface->colind, nnz*sizeof(uint32_t)); bcsr_interface->colind = NULL; } starpu_free_on_node(node, (uintptr_t) bcsr_interface->rowptr, (nrow+1)*sizeof(uint32_t)); bcsr_interface->rowptr = NULL; } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_bcsr_interface *src_bcsr = (struct starpu_bcsr_interface *) src_interface; struct starpu_bcsr_interface *dst_bcsr = (struct starpu_bcsr_interface *) dst_interface; uint32_t nnz = src_bcsr->nnz; uint32_t nrow = src_bcsr->nrow; size_t elemsize = src_bcsr->elemsize; uint32_t r = src_bcsr->r; uint32_t c = src_bcsr->c; int ret = 0; if (nnz) { if (starpu_interface_copy(src_bcsr->nzval, 0, src_node, dst_bcsr->nzval, 0, dst_node, nnz*elemsize*r*c, async_data)) ret = -EAGAIN; if (starpu_interface_copy((uintptr_t)src_bcsr->colind, 0, src_node, (uintptr_t)dst_bcsr->colind, 0, dst_node, nnz*sizeof(uint32_t), async_data)) ret = -EAGAIN; } if (starpu_interface_copy((uintptr_t)src_bcsr->rowptr, 0, src_node, (uintptr_t)dst_bcsr->rowptr, 0, dst_node, (nrow+1)*sizeof(uint32_t), async_data)) ret = -EAGAIN; starpu_interface_data_copy(src_node, dst_node, nnz*elemsize*r*c + (nnz+nrow+1)*sizeof(uint32_t)); return ret; } static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_bcsr_interface *bcsr = (struct starpu_bcsr_interface *) data_interface; return snprintf(buf, size, "b%ux%ux%ux%ux%u", (unsigned) bcsr->nnz, (unsigned) bcsr->nrow, (unsigned) bcsr->r, (unsigned) bcsr->c, (unsigned) bcsr->elemsize); } static int pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_bcsr_interface *bcsr = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, node); // We first pack colind *count = bcsr->nnz * sizeof(bcsr->colind[0]); // Then rowptr *count += (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0]); // Then nnzval *count += bcsr->r * bcsr->c * bcsr->nnz * bcsr->elemsize; if (ptr != NULL) { *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); char *tmp = *ptr; if (bcsr->nnz) { memcpy(tmp, (void*)bcsr->colind, bcsr->nnz * sizeof(bcsr->colind[0])); tmp += bcsr->nnz * sizeof(bcsr->colind[0]); memcpy(tmp, (void*)bcsr->rowptr, (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0])); tmp += (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0]); } memcpy(tmp, (void*)bcsr->nzval, bcsr->r * bcsr->c * bcsr->nnz * bcsr->elemsize); } return 0; } static int peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_bcsr_interface *bcsr = (struct starpu_bcsr_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == (bcsr->nnz * sizeof(bcsr->colind[0]))+((bcsr->nrow + 1) * sizeof(bcsr->rowptr[0]))+(bcsr->r * bcsr->c * bcsr->nnz * bcsr->elemsize)); char *tmp = ptr; if (bcsr->nnz) { memcpy((void*)bcsr->colind, tmp, bcsr->nnz * sizeof(bcsr->colind[0])); tmp += bcsr->nnz * sizeof(bcsr->colind[0]); memcpy((void*)bcsr->rowptr, tmp, (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0])); tmp += (bcsr->nrow + 1) * sizeof(bcsr->rowptr[0]); } memcpy((void*)bcsr->nzval, tmp, bcsr->r * bcsr->c * bcsr->nnz * bcsr->elemsize); return 0; } static int unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); return 0; } starpu-1.4.9+dfsg/src/datawizard/interfaces/block_filters.c000066400000000000000000000227221507764646700240470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include static void _starpu_block_filter_block(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts, uintptr_t shadow_size) { struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface; struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface; unsigned blocksize; /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny, in case depth, it's nz*/ uint32_t nn; uint32_t nx; uint32_t ny; uint32_t nz; switch(dim) { /* horizontal*/ case 1: /* actual number of elements */ nx = block_father->nx - 2 * shadow_size; ny = block_father->ny; nz = block_father->nz; nn = nx; blocksize = 1; break; /* vertical*/ case 2: nx = block_father->nx; /* actual number of elements */ ny = block_father->ny - 2 * shadow_size; nz = block_father->nz; nn = ny; blocksize = block_father->ldy; break; /* depth*/ case 3: nx = block_father->nx; ny = block_father->ny; /* actual number of elements */ nz = block_father->nz - 2 * shadow_size; nn = nz; blocksize = block_father->ldz; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } size_t elemsize = block_father->elemsize; STARPU_ASSERT_MSG(nparts <= nn, "cannot split %u elements in %u parts", nn, nparts); uint32_t child_nn; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(nn, nparts, elemsize, id, blocksize, &child_nn, &offset); child_nn += 2 * shadow_size; STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__); block_child->id = block_father->id; switch(dim) { case 1: block_child->nx = child_nn; block_child->ny = ny; block_child->nz = nz; break; case 2: block_child->nx = nx; block_child->ny = child_nn; block_child->nz = nz; break; case 3: block_child->nx = nx; block_child->ny = ny; block_child->nz = child_nn; break; } block_child->elemsize = elemsize; if (block_father->dev_handle) { if (block_father->ptr) block_child->ptr = block_father->ptr + offset; block_child->ldy = block_father->ldy; block_child->ldz = block_father->ldz; block_child->dev_handle = block_father->dev_handle; block_child->offset = block_father->offset + offset; } } void starpu_block_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_block_filter_block(1, father_interface, child_interface, f, id, nparts, 0); } void starpu_block_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_block_filter_block(1, father_interface, child_interface, f, id, nparts, shadow_size); } void starpu_block_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_block_filter_block(2, father_interface, child_interface, f, id, nparts, 0); } void starpu_block_filter_vertical_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_block_filter_block(2, father_interface, child_interface, f, id, nparts, shadow_size); } void starpu_block_filter_depth_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_block_filter_block(3, father_interface, child_interface, f, id, nparts, 0); } void starpu_block_filter_depth_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_block_filter_block(3, father_interface, child_interface, f, id, nparts, shadow_size); } static void _starpu_block_filter_pick_matrix(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface; struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface; unsigned blocksize; uint32_t nn; uint32_t nx = block_father->nx; uint32_t ny = block_father->ny; uint32_t nz = block_father->nz; switch(dim) { /* along y-axis */ case 1: nn = ny; blocksize = block_father->ldy; break; /* along z-axis */ case 2: nn = nz; blocksize = block_father->ldz; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } size_t elemsize = block_father->elemsize; size_t chunk_pos = (size_t)f->filter_arg_ptr; STARPU_ASSERT_MSG(nparts <= nn, "cannot get %u matrix", nparts); STARPU_ASSERT_MSG((chunk_pos + id) < nn, "the chosen matrix should be in the block"); size_t offset = (chunk_pos + id) * blocksize * elemsize; STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__); matrix_child->id = STARPU_MATRIX_INTERFACE_ID; switch(dim) { /* along y-axis */ case 1: matrix_child->nx = nx; matrix_child->ny = nz; break; /* along z-axis */ case 2: matrix_child->nx = nx; matrix_child->ny = ny; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } matrix_child->elemsize = elemsize; matrix_child->allocsize = matrix_child->nx * matrix_child->ny * elemsize; if (block_father->dev_handle) { if (block_father->ptr) matrix_child->ptr = block_father->ptr + offset; switch(dim) { /* along y-axis */ case 1: matrix_child->ld = block_father->ldz; break; /* along z-axis */ case 2: matrix_child->ld = block_father->ldy; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } matrix_child->dev_handle = block_father->dev_handle; matrix_child->offset = block_father->offset + offset; } } void starpu_block_filter_pick_matrix_z(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_block_filter_pick_matrix(2, father_interface, child_interface, f, id, nparts); } void starpu_block_filter_pick_matrix_y(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_block_filter_pick_matrix(1, father_interface, child_interface, f, id, nparts); } struct starpu_data_interface_ops *starpu_block_filter_pick_matrix_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_matrix_ops; } void starpu_block_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) { struct starpu_block_interface *block_father = (struct starpu_block_interface *) father_interface; /* each chunk becomes a variable */ struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; uint32_t nx = block_father->nx; uint32_t ny = block_father->ny; uint32_t nz = block_father->nz; unsigned ldy = block_father->ldy; unsigned ldz = block_father->ldz; size_t elemsize = block_father->elemsize; uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; // int i; // for(i=0; i<3; i++) // { // printf("pos is %d\n", chunk_pos[i]); // } STARPU_ASSERT_MSG((chunk_pos[0] < nx)&&(chunk_pos[1] < ny)&&(chunk_pos[2] < nz), "the chosen variable should be in the block"); size_t offset = (chunk_pos[2] * ldz + chunk_pos[1] * ldy + chunk_pos[0]) * elemsize; STARPU_ASSERT_MSG(block_father->id == STARPU_BLOCK_INTERFACE_ID, "%s can only be applied on a block data", __func__); /* update the child's interface */ variable_child->id = STARPU_VARIABLE_INTERFACE_ID; variable_child->elemsize = elemsize; /* is the information on this node valid ? */ if (block_father->dev_handle) { if (block_father->ptr) variable_child->ptr = block_father->ptr + offset; variable_child->dev_handle = block_father->dev_handle; variable_child->offset = block_father->offset + offset; } } struct starpu_data_interface_ops *starpu_block_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_variable_ops; } starpu-1.4.9+dfsg/src/datawizard/interfaces/block_interface.c000066400000000000000000000432111507764646700243330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); static int map_block(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int unmap_block(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int update_map_block(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static const struct starpu_data_copy_methods block_copy_data_methods_s = { .any_to_any = copy_any_to_any, }; static void register_block_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static void *block_to_pointer(void *data_interface, unsigned node); static starpu_ssize_t allocate_block_buffer_on_node(void *data_interface_, unsigned dst_node); static void free_block_buffer_on_node(void *data_interface, unsigned node); static size_t block_interface_get_size(starpu_data_handle_t handle); static uint32_t footprint_block_interface_crc32(starpu_data_handle_t handle); static int block_compare(void *data_interface_a, void *data_interface_b); static void display_block_interface(starpu_data_handle_t handle, FILE *f); static int pack_block_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_block_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_block_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); struct starpu_data_interface_ops starpu_interface_block_ops = { .register_data_handle = register_block_handle, .allocate_data_on_node = allocate_block_buffer_on_node, .to_pointer = block_to_pointer, .free_data_on_node = free_block_buffer_on_node, .map_data = map_block, .unmap_data = unmap_block, .update_map = update_map_block, .copy_methods = &block_copy_data_methods_s, .get_size = block_interface_get_size, .footprint = footprint_block_interface_crc32, .compare = block_compare, .interfaceid = STARPU_BLOCK_INTERFACE_ID, .interface_size = sizeof(struct starpu_block_interface), .display = display_block_interface, .pack_data = pack_block_handle, .peek_data = peek_block_handle, .unpack_data = unpack_block_handle, .describe = describe, .name = "STARPU_BLOCK_INTERFACE", .pack_meta = NULL, .unpack_meta = NULL, .free_meta = NULL }; static void *block_to_pointer(void *data_interface, unsigned node) { (void) node; struct starpu_block_interface *block_interface = data_interface; return (void*) block_interface->ptr; } static void register_block_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_block_interface *local_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = block_interface->ptr; local_interface->dev_handle = block_interface->dev_handle; local_interface->offset = block_interface->offset; local_interface->ldy = block_interface->ldy; local_interface->ldz = block_interface->ldz; } else { local_interface->ptr = 0; local_interface->dev_handle = 0; local_interface->offset = 0; local_interface->ldy = 0; local_interface->ldz = 0; } local_interface->id = block_interface->id; local_interface->nx = block_interface->nx; local_interface->ny = block_interface->ny; local_interface->nz = block_interface->nz; local_interface->elemsize = block_interface->elemsize; } } /* declare a new data with the BLAS interface */ void starpu_block_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t nx, uint32_t ny, uint32_t nz, size_t elemsize) { STARPU_ASSERT_MSG(ldy >= nx, "ldy = %u should not be less than nx = %u.", ldy, nx); STARPU_ASSERT_MSG(ldz/ldy >= ny, "ldz/ldy = %u/%u = %u should not be less than ny = %u.", ldz, ldy, ldz/ldy, ny); struct starpu_block_interface block_interface = { .id = STARPU_BLOCK_INTERFACE_ID, .ptr = ptr, .dev_handle = ptr, .offset = 0, .ldy = ldy, .ldz = ldz, .nx = nx, .ny = ny, .nz = nz, .elemsize = elemsize }; #ifndef STARPU_SIMGRID if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { if (nx && ny && nz && elemsize) { STARPU_ASSERT_ACCESSIBLE(ptr); STARPU_ASSERT_ACCESSIBLE(ptr + (nz-1)*ldz*elemsize + (ny-1)*ldy*elemsize + nx*elemsize - 1); } } #endif starpu_data_register(handleptr, home_node, &block_interface, &starpu_interface_block_ops); } void starpu_block_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz) { struct starpu_block_interface *block_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); block_interface->ptr = ptr; block_interface->dev_handle = dev_handle; block_interface->offset = offset; block_interface->ldy = ldy; block_interface->ldz = ldz; } static uint32_t footprint_block_interface_crc32(starpu_data_handle_t handle) { uint32_t hash; hash = starpu_hash_crc32c_be(starpu_block_get_nx(handle), 0); hash = starpu_hash_crc32c_be(starpu_block_get_ny(handle), hash); hash = starpu_hash_crc32c_be(starpu_block_get_nz(handle), hash); return hash; } static int block_compare(void *data_interface_a, void *data_interface_b) { struct starpu_block_interface *block_a = (struct starpu_block_interface *) data_interface_a; struct starpu_block_interface *block_b = (struct starpu_block_interface *) data_interface_b; /* Two blocks are considered compatible if they have the same size */ return (block_a->nx == block_b->nx) && (block_a->ny == block_b->ny) && (block_a->nz == block_b->nz) && (block_a->elemsize == block_b->elemsize); } static void display_block_interface(starpu_data_handle_t handle, FILE *f) { struct starpu_block_interface *block_interface; block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%u\t%u\t%u\t", block_interface->nx, block_interface->ny, block_interface->nz); } #define IS_CONTIGUOUS_MATRIX(nx, ny, ldy) ((nx) == (ldy)) #define IS_CONTIGUOUS_BLOCK(nx, ny, nz, ldy, ldz) ((nx) * (ny) == (ldz)) static int pack_block_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, node); uint32_t ldy = block_interface->ldy; uint32_t ldz = block_interface->ldz; uint32_t nx = block_interface->nx; uint32_t ny = block_interface->ny; uint32_t nz = block_interface->nz; size_t elemsize = block_interface->elemsize; *count = nx*ny*nz*elemsize; if (ptr != NULL) { uint32_t z, y; char *block = (void *)block_interface->ptr; *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); char *cur = *ptr; if (IS_CONTIGUOUS_BLOCK(nx, ny, nz, ldy, ldz)) memcpy(cur, block, nx * ny * nz * elemsize); else { char *block_z = block; for(z=0 ; zldy; uint32_t ldz = block_interface->ldz; uint32_t nx = block_interface->nx; uint32_t ny = block_interface->ny; uint32_t nz = block_interface->nz; size_t elemsize = block_interface->elemsize; STARPU_ASSERT(count == elemsize * nx * ny * nz); uint32_t z, y; char *cur = ptr; char *block = (void *)block_interface->ptr; if (IS_CONTIGUOUS_BLOCK(nx, ny, nz, ldy, ldz)) memcpy(block, cur, nx * ny * nz * elemsize); else { char *block_z = block; for(z=0 ; zid == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); #endif size = block_interface->nx*block_interface->ny*block_interface->nz*block_interface->elemsize; return size; } /* offer an access to the data parameters */ uint32_t starpu_block_get_nx(starpu_data_handle_t handle) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); #endif return block_interface->nx; } uint32_t starpu_block_get_ny(starpu_data_handle_t handle) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); #endif return block_interface->ny; } uint32_t starpu_block_get_nz(starpu_data_handle_t handle) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); #endif return block_interface->nz; } uint32_t starpu_block_get_local_ldy(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); #endif return block_interface->ldy; } uint32_t starpu_block_get_local_ldz(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); #endif return block_interface->ldz; } uintptr_t starpu_block_get_local_ptr(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); #endif return block_interface->ptr; } size_t starpu_block_get_elemsize(starpu_data_handle_t handle) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(block_interface->id == STARPU_BLOCK_INTERFACE_ID, "Error. The given data is not a block."); #endif return block_interface->elemsize; } /* memory allocation/deallocation primitives for the BLOCK interface */ /* returns the size of the allocated area */ static starpu_ssize_t allocate_block_buffer_on_node(void *data_interface_, unsigned dst_node) { uintptr_t addr = 0, handle; struct starpu_block_interface *dst_block = (struct starpu_block_interface *) data_interface_; uint32_t nx = dst_block->nx; uint32_t ny = dst_block->ny; uint32_t nz = dst_block->nz; size_t elemsize = dst_block->elemsize; starpu_ssize_t allocated_memory; handle = starpu_malloc_on_node(dst_node, nx*ny*nz*elemsize); if (!handle) return -ENOMEM; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) addr = handle; allocated_memory = nx*ny*nz*elemsize; /* update the data properly in consequence */ dst_block->ptr = addr; dst_block->dev_handle = handle; dst_block->offset = 0; dst_block->ldy = nx; dst_block->ldz = nx*ny; return allocated_memory; } static void free_block_buffer_on_node(void *data_interface, unsigned node) { struct starpu_block_interface *block_interface = (struct starpu_block_interface *) data_interface; uint32_t nx = block_interface->nx; uint32_t ny = block_interface->ny; uint32_t nz = block_interface->nz; size_t elemsize = block_interface->elemsize; starpu_free_on_node(node, block_interface->dev_handle, nx*ny*nz*elemsize); block_interface->ptr = 0; block_interface->dev_handle = 0; } static int map_block(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_block_interface *src_block = src_interface; struct starpu_block_interface *dst_block = dst_interface; int ret; uintptr_t mapped; /* map area ldz*(nz-1)+ldy*(ny-1)+nx */ mapped = starpu_interface_map(src_block->dev_handle, src_block->offset, src_node, dst_node, (src_block->ldz*(src_block->nz-1)+src_block->ldy*(src_block->ny-1)+src_block->nx)*src_block->elemsize, &ret); if (mapped) { dst_block->dev_handle = mapped; dst_block->offset = 0; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) dst_block->ptr = mapped; dst_block->ldy = src_block->ldy; dst_block->ldz = src_block->ldz; return 0; } return ret; } static int unmap_block(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_block_interface *src_block = src_interface; struct starpu_block_interface *dst_block = dst_interface; int ret = starpu_interface_unmap(src_block->dev_handle, src_block->offset, src_node, dst_block->dev_handle, dst_node, (src_block->ldz*(src_block->nz-1)+src_block->ldy*(src_block->ny-1)+src_block->nx)*src_block->elemsize); dst_block->dev_handle = 0; return ret; } static int update_map_block(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_block_interface *src_block = src_interface; struct starpu_block_interface *dst_block = dst_interface; return starpu_interface_update_map(src_block->dev_handle, src_block->offset, src_node, dst_block->dev_handle, dst_block->offset, dst_node, (src_block->ldz*(src_block->nz-1)+src_block->ldy*(src_block->ny-1)+src_block->nx)*src_block->elemsize); } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_block_interface *src_block = (struct starpu_block_interface *) src_interface; struct starpu_block_interface *dst_block = (struct starpu_block_interface *) dst_interface; int ret = 0; uint32_t nx = dst_block->nx; uint32_t ny = dst_block->ny; uint32_t nz = dst_block->nz; size_t elemsize = dst_block->elemsize; uint32_t ldy_src = src_block->ldy; uint32_t ldz_src = src_block->ldz; uint32_t ldy_dst = dst_block->ldy; uint32_t ldz_dst = dst_block->ldz; if (starpu_interface_copy3d(src_block->dev_handle, src_block->offset, src_node, dst_block->dev_handle, dst_block->offset, dst_node, nx * elemsize, ny, ldy_src * elemsize, ldy_dst * elemsize, nz, ldz_src * elemsize, ldz_dst * elemsize, async_data)) ret = -EAGAIN; starpu_interface_data_copy(src_node, dst_node, nx*ny*nz*elemsize); return ret; } static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_block_interface *block = (struct starpu_block_interface *) data_interface; return snprintf(buf, size, "B%ux%ux%ux%u", (unsigned) block->nx, (unsigned) block->ny, (unsigned) block->nz, (unsigned) block->elemsize); } starpu-1.4.9+dfsg/src/datawizard/interfaces/coo_interface.c000066400000000000000000000176221507764646700240300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { size_t size = 0; struct starpu_coo_interface *src_coo, *dst_coo; int ret = 0; src_coo = (struct starpu_coo_interface *) src_interface; dst_coo = (struct starpu_coo_interface *) dst_interface; size = src_coo->n_values * sizeof(src_coo->columns[0]); if (starpu_interface_copy( (uintptr_t) src_coo->columns, 0, src_node, (uintptr_t) dst_coo->columns, 0, dst_node, size, async_data)) ret = -EAGAIN; /* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */ if (starpu_interface_copy( (uintptr_t) src_coo->rows, 0, src_node, (uintptr_t) dst_coo->rows, 0, dst_node, size, async_data)) ret = -EAGAIN; size = src_coo->n_values * src_coo->elemsize; if (starpu_interface_copy( src_coo->values, 0, src_node, dst_coo->values, 0, dst_node, size, async_data)) ret = -EAGAIN; starpu_interface_data_copy(src_node, dst_node, src_coo->n_values * (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize)); return ret; } static const struct starpu_data_copy_methods coo_copy_data_methods = { .any_to_any = copy_any_to_any, }; static void register_coo_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_coo_interface *coo_interface = (struct starpu_coo_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_coo_interface *local_interface; local_interface = (struct starpu_coo_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->values = coo_interface->values; local_interface->columns = coo_interface->columns; local_interface->rows = coo_interface->rows; } else { local_interface->values = 0; local_interface->columns = 0; local_interface->rows = 0; } local_interface->id = coo_interface->id; local_interface->nx = coo_interface->nx; local_interface->ny = coo_interface->ny; local_interface->n_values = coo_interface->n_values; local_interface->elemsize = coo_interface->elemsize; } } static starpu_ssize_t allocate_coo_buffer_on_node(void *data_interface, unsigned dst_node) { uint32_t *addr_columns; uint32_t *addr_rows; uintptr_t addr_values; struct starpu_coo_interface *coo_interface = (struct starpu_coo_interface *) data_interface; uint32_t n_values = coo_interface->n_values; size_t elemsize = coo_interface->elemsize; addr_columns = (void*) starpu_malloc_on_node(dst_node, n_values * sizeof(coo_interface->columns[0])); if (STARPU_UNLIKELY(addr_columns == NULL)) goto fail_columns; addr_rows = (void*) starpu_malloc_on_node(dst_node, n_values * sizeof(coo_interface->rows[0])); if (STARPU_UNLIKELY(addr_rows == NULL)) goto fail_rows; addr_values = starpu_malloc_on_node(dst_node, n_values * elemsize); if (STARPU_UNLIKELY(addr_values == (uintptr_t) NULL)) goto fail_values; coo_interface->columns = addr_columns; coo_interface->rows = addr_rows; coo_interface->values = addr_values; return n_values * (sizeof(coo_interface->columns[0]) + sizeof(coo_interface->rows[0]) + elemsize); fail_values: starpu_free_on_node(dst_node, (uintptr_t) addr_rows, n_values * sizeof(coo_interface->rows[0])); fail_rows: starpu_free_on_node(dst_node, (uintptr_t) addr_columns, n_values * sizeof(coo_interface->columns[0])); fail_columns: return -ENOMEM; } static void free_coo_buffer_on_node(void *data_interface, unsigned node) { struct starpu_coo_interface *coo_interface = (struct starpu_coo_interface *) data_interface; uint32_t n_values = coo_interface->n_values; size_t elemsize = coo_interface->elemsize; starpu_free_on_node(node, (uintptr_t) coo_interface->columns, n_values * sizeof(coo_interface->columns[0])); coo_interface->columns = NULL; starpu_free_on_node(node, (uintptr_t) coo_interface->rows, n_values * sizeof(coo_interface->rows[0])); coo_interface->rows = NULL; starpu_free_on_node(node, coo_interface->values, n_values * elemsize); coo_interface->values = 0; } static size_t coo_interface_get_size(starpu_data_handle_t handle) { struct starpu_coo_interface *coo_interface; coo_interface = (struct starpu_coo_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return coo_interface->nx * coo_interface->ny * coo_interface->elemsize; } static uint32_t coo_interface_footprint(starpu_data_handle_t handle) { struct starpu_coo_interface *coo_interface; coo_interface = (struct starpu_coo_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return starpu_hash_crc32c_be(coo_interface->nx * coo_interface->ny, 0); } static int coo_compare(void *a, void *b) { struct starpu_coo_interface *coo_a, *coo_b; coo_a = (struct starpu_coo_interface *) a; coo_b = (struct starpu_coo_interface *) b; return coo_a->nx == coo_b->nx && coo_a->ny == coo_b->ny && coo_a->n_values == coo_b->n_values && coo_a->elemsize == coo_b->elemsize; } static void display_coo_interface(starpu_data_handle_t handle, FILE *f) { struct starpu_coo_interface *coo_interface; coo_interface = (struct starpu_coo_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny); } static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_coo_interface *coo = (struct starpu_coo_interface *) data_interface; return snprintf(buf, size, "M%ux%ux%ux%u", (unsigned) coo->nx, (unsigned) coo->ny, (unsigned) coo->n_values, (unsigned) coo->elemsize); } struct starpu_data_interface_ops starpu_interface_coo_ops = { .register_data_handle = register_coo_handle, .allocate_data_on_node = allocate_coo_buffer_on_node, .to_pointer = NULL, .free_data_on_node = free_coo_buffer_on_node, .copy_methods = &coo_copy_data_methods, .get_size = coo_interface_get_size, .footprint = coo_interface_footprint, .compare = coo_compare, .interfaceid = STARPU_COO_INTERFACE_ID, .interface_size = sizeof(struct starpu_coo_interface), .display = display_coo_interface, .describe = describe, .name = "STARPU_COO_INTERFACE" }; void starpu_coo_data_register(starpu_data_handle_t *handleptr, int home_node, uint32_t nx, uint32_t ny, uint32_t n_values, uint32_t *columns, uint32_t *rows, uintptr_t values, size_t elemsize) { struct starpu_coo_interface coo_interface = { .id = STARPU_COO_INTERFACE_ID, .values = values, .columns = columns, .rows = rows, .nx = nx, .ny = ny, .n_values = n_values, .elemsize = elemsize, }; #ifndef STARPU_SIMGRID if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { if (n_values) { STARPU_ASSERT_ACCESSIBLE(columns); STARPU_ASSERT_ACCESSIBLE((uintptr_t) columns + n_values*sizeof(uint32_t) - 1); STARPU_ASSERT_ACCESSIBLE(rows); STARPU_ASSERT_ACCESSIBLE((uintptr_t) rows + n_values*sizeof(uint32_t) - 1); } STARPU_ASSERT_ACCESSIBLE(values); STARPU_ASSERT_ACCESSIBLE(values + n_values*elemsize - 1); } #endif starpu_data_register(handleptr, home_node, &coo_interface, &starpu_interface_coo_ops); } starpu-1.4.9+dfsg/src/datawizard/interfaces/csr_filters.c000066400000000000000000000044131507764646700235410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include void starpu_csr_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpu_csr_interface *csr_father = (struct starpu_csr_interface *) father_interface; struct starpu_csr_interface *csr_child = (struct starpu_csr_interface *) child_interface; uint32_t nrow = csr_father->nrow; size_t elemsize = csr_father->elemsize; uint32_t firstentry = csr_father->firstentry; uint32_t *ram_rowptr = csr_father->ram_rowptr; size_t first_index; unsigned child_nrow; starpu_filter_nparts_compute_chunk_size_and_offset(nrow, nchunks, 1, id, 1, &child_nrow, &first_index); uint32_t local_firstentry = ram_rowptr[first_index] - firstentry; uint32_t local_lastentry = ram_rowptr[first_index + child_nrow] - firstentry; uint32_t local_nnz = local_lastentry - local_firstentry; STARPU_ASSERT_MSG(csr_father->id == STARPU_CSR_INTERFACE_ID, "%s can only be applied on a csr data", __func__); csr_child->id = csr_father->id; csr_child->nnz = local_nnz; csr_child->nrow = child_nrow; csr_child->firstentry = local_firstentry; csr_child->elemsize = elemsize; csr_child->ram_colind = &csr_father->ram_colind[local_firstentry]; csr_child->ram_rowptr = &ram_rowptr[first_index]; if (csr_father->nzval) { csr_child->rowptr = &csr_father->rowptr[first_index]; csr_child->colind = &csr_father->colind[local_firstentry]; csr_child->nzval = csr_father->nzval + local_firstentry * elemsize; } } starpu-1.4.9+dfsg/src/datawizard/interfaces/csr_interface.c000066400000000000000000000335701507764646700240370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); static const struct starpu_data_copy_methods csr_copy_data_methods_s = { .any_to_any = copy_any_to_any, }; static void register_csr_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static starpu_ssize_t allocate_csr_buffer_on_node(void *data_interface_, unsigned dst_node); static void free_csr_buffer_on_node(void *data_interface, unsigned node); static size_t csr_interface_get_size(starpu_data_handle_t handle); static int csr_compare(void *data_interface_a, void *data_interface_b); static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle); static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); static int pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); struct starpu_data_interface_ops starpu_interface_csr_ops = { .register_data_handle = register_csr_handle, .allocate_data_on_node = allocate_csr_buffer_on_node, .free_data_on_node = free_csr_buffer_on_node, .copy_methods = &csr_copy_data_methods_s, .get_size = csr_interface_get_size, .interfaceid = STARPU_CSR_INTERFACE_ID, .interface_size = sizeof(struct starpu_csr_interface), .footprint = footprint_csr_interface_crc32, .compare = csr_compare, .describe = describe, .name = "STARPU_CSR_INTERFACE", .pack_data = pack_data, .peek_data = peek_data, .unpack_data = unpack_data, .pack_meta = NULL, .unpack_meta = NULL, .free_meta = NULL }; static void register_csr_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) data_interface; uint32_t *ram_colind = NULL; uint32_t *ram_rowptr = NULL; if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { ram_colind = csr_interface->colind; ram_rowptr = csr_interface->rowptr; } int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_csr_interface *local_interface = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->nzval = csr_interface->nzval; local_interface->colind = csr_interface->colind; } else { local_interface->nzval = 0; local_interface->colind = NULL; } local_interface->ram_colind = ram_colind; local_interface->ram_rowptr = ram_rowptr; local_interface->id = csr_interface->id; local_interface->rowptr = csr_interface->rowptr; local_interface->nnz = csr_interface->nnz; local_interface->nrow = csr_interface->nrow; local_interface->firstentry = csr_interface->firstentry; local_interface->elemsize = csr_interface->elemsize; } } /* declare a new data with the BLAS interface */ void starpu_csr_data_register(starpu_data_handle_t *handleptr, int home_node, uint32_t nnz, uint32_t nrow, uintptr_t nzval, uint32_t *colind, uint32_t *rowptr, uint32_t firstentry, size_t elemsize) { struct starpu_csr_interface csr_interface = { .id = STARPU_CSR_INTERFACE_ID, .nnz = nnz, .nrow = nrow, .nzval = nzval, .colind = colind, .rowptr = rowptr, .firstentry = firstentry, .elemsize = elemsize }; #ifndef STARPU_SIMGRID if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { if (nnz) { if (elemsize) { STARPU_ASSERT_ACCESSIBLE(nzval); STARPU_ASSERT_ACCESSIBLE(nzval + nnz*elemsize - 1); } STARPU_ASSERT_ACCESSIBLE(colind); STARPU_ASSERT_ACCESSIBLE((uintptr_t) colind + nnz*sizeof(uint32_t) - 1); } STARPU_ASSERT_ACCESSIBLE(rowptr); STARPU_ASSERT_ACCESSIBLE((uintptr_t) rowptr + (nrow+1)*sizeof(uint32_t) - 1); } #endif starpu_data_register(handleptr, home_node, &csr_interface, &starpu_interface_csr_ops); } static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(starpu_csr_get_nnz(handle), 0); } static int csr_compare(void *data_interface_a, void *data_interface_b) { struct starpu_csr_interface *csr_a = (struct starpu_csr_interface *) data_interface_a; struct starpu_csr_interface *csr_b = (struct starpu_csr_interface *) data_interface_b; /* Two matrices are considered compatible if they have the same size */ return (csr_a->nnz == csr_b->nnz) && (csr_a->nrow == csr_b->nrow) && (csr_a->elemsize == csr_b->elemsize); } /* offer an access to the data parameters */ uint32_t starpu_csr_get_nnz(starpu_data_handle_t handle) { struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); #endif return csr_interface->nnz; } uint32_t starpu_csr_get_nrow(starpu_data_handle_t handle) { struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); #endif return csr_interface->nrow; } uint32_t starpu_csr_get_firstentry(starpu_data_handle_t handle) { struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); #endif return csr_interface->firstentry; } size_t starpu_csr_get_elemsize(starpu_data_handle_t handle) { struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); #endif return csr_interface->elemsize; } uintptr_t starpu_csr_get_local_nzval(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); #endif return csr_interface->nzval; } uint32_t *starpu_csr_get_local_colind(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); #endif return csr_interface->colind; } uint32_t *starpu_csr_get_local_rowptr(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(csr_interface->id == STARPU_CSR_INTERFACE_ID, "Error. The given data is not a csr."); #endif return csr_interface->rowptr; } static size_t csr_interface_get_size(starpu_data_handle_t handle) { size_t size; uint32_t nnz = starpu_csr_get_nnz(handle); uint32_t nrow = starpu_csr_get_nrow(handle); size_t elemsize = starpu_csr_get_elemsize(handle); size = nnz*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); return size; } /* memory allocation/deallocation primitives for the BLAS interface */ /* returns the size of the allocated area */ static starpu_ssize_t allocate_csr_buffer_on_node(void *data_interface_, unsigned dst_node) { uintptr_t addr_nzval = 0; uint32_t *addr_colind = NULL, *addr_rowptr = NULL; starpu_ssize_t allocated_memory; /* we need the 3 arrays to be allocated */ struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) data_interface_; uint32_t nnz = csr_interface->nnz; uint32_t nrow = csr_interface->nrow; size_t elemsize = csr_interface->elemsize; if (nnz) { addr_nzval = starpu_malloc_on_node(dst_node, nnz*elemsize); if (!addr_nzval) goto fail_nzval; addr_colind = (uint32_t*) starpu_malloc_on_node(dst_node, nnz*sizeof(uint32_t)); if (!addr_colind) goto fail_colind; } else { addr_nzval = 0; addr_colind = NULL; } addr_rowptr = (uint32_t*) starpu_malloc_on_node(dst_node, (nrow+1)*sizeof(uint32_t)); if (!addr_rowptr) goto fail_rowptr; /* allocation succeeded */ allocated_memory = nnz*elemsize + nnz*sizeof(uint32_t) + (nrow+1)*sizeof(uint32_t); /* update the data properly in consequence */ csr_interface->nzval = addr_nzval; csr_interface->colind = addr_colind; csr_interface->rowptr = addr_rowptr; return allocated_memory; fail_rowptr: if (nnz) starpu_free_on_node(dst_node, (uintptr_t) addr_colind, nnz*sizeof(uint32_t)); fail_colind: if (nnz) starpu_free_on_node(dst_node, addr_nzval, nnz*elemsize); fail_nzval: /* allocation failed */ return -ENOMEM; } static void free_csr_buffer_on_node(void *data_interface, unsigned node) { struct starpu_csr_interface *csr_interface = (struct starpu_csr_interface *) data_interface; uint32_t nnz = csr_interface->nnz; uint32_t nrow = csr_interface->nrow; size_t elemsize = csr_interface->elemsize; if (nnz) { starpu_free_on_node(node, csr_interface->nzval, nnz*elemsize); csr_interface->nzval = 0; starpu_free_on_node(node, (uintptr_t) csr_interface->colind, nnz*sizeof(uint32_t)); csr_interface->colind = NULL; } starpu_free_on_node(node, (uintptr_t) csr_interface->rowptr, (nrow+1)*sizeof(uint32_t)); csr_interface->rowptr = NULL; } /* as not all platform easily have a BLAS lib installed ... */ static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_csr_interface *src_csr = (struct starpu_csr_interface *) src_interface; struct starpu_csr_interface *dst_csr = (struct starpu_csr_interface *) dst_interface; uint32_t nnz = src_csr->nnz; uint32_t nrow = src_csr->nrow; size_t elemsize = src_csr->elemsize; int ret = 0; if (nnz) { if (starpu_interface_copy(src_csr->nzval, 0, src_node, dst_csr->nzval, 0, dst_node, nnz*elemsize, async_data)) ret = -EAGAIN; if (starpu_interface_copy((uintptr_t)src_csr->colind, 0, src_node, (uintptr_t)dst_csr->colind, 0, dst_node, nnz*sizeof(uint32_t), async_data)) ret = -EAGAIN; } if (starpu_interface_copy((uintptr_t)src_csr->rowptr, 0, src_node, (uintptr_t)dst_csr->rowptr, 0, dst_node, (nrow+1)*sizeof(uint32_t), async_data)) ret = -EAGAIN; starpu_interface_data_copy(src_node, dst_node, nnz*elemsize + (nnz+nrow+1)*sizeof(uint32_t)); return ret; } static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_csr_interface *csr = (struct starpu_csr_interface *) data_interface; return snprintf(buf, size, "C%ux%ux%u", (unsigned) csr->nnz, (unsigned) csr->nrow, (unsigned) csr->elemsize); } static int pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_csr_interface *csr = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, node); // We first pack colind *count = csr->nnz * sizeof(csr->colind[0]); // Then rowptr *count += (csr->nrow + 1) * sizeof(csr->rowptr[0]); // Then nnzval *count += csr->nnz * csr->elemsize; if (ptr != NULL) { *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); char *tmp = *ptr; if (csr->nnz) { memcpy(tmp, (void*)csr->colind, csr->nnz * sizeof(csr->colind[0])); tmp += csr->nnz * sizeof(csr->colind[0]); memcpy(tmp, (void*)csr->rowptr, (csr->nrow + 1) * sizeof(csr->rowptr[0])); tmp += (csr->nrow + 1) * sizeof(csr->rowptr[0]); } memcpy(tmp, (void*)csr->nzval, csr->nnz * csr->elemsize); } return 0; } static int peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_csr_interface *csr = (struct starpu_csr_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == (csr->nnz * sizeof(csr->colind[0]))+((csr->nrow + 1) * sizeof(csr->rowptr[0]))+(csr->nnz * csr->elemsize)); char *tmp = ptr; if (csr->nnz) { memcpy((void*)csr->colind, tmp, csr->nnz * sizeof(csr->colind[0])); tmp += csr->nnz * sizeof(csr->colind[0]); memcpy((void*)csr->rowptr, tmp, (csr->nrow + 1) * sizeof(csr->rowptr[0])); tmp += (csr->nrow + 1) * sizeof(csr->rowptr[0]); } memcpy((void*)csr->nzval, tmp, csr->nnz * csr->elemsize); return 0; } static int unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); return 0; } starpu-1.4.9+dfsg/src/datawizard/interfaces/data_interface.c000066400000000000000000001142461507764646700241610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_OPENMP #include #endif static struct starpu_data_interface_ops **_id_to_ops_array; static unsigned _id_to_ops_array_size; /* Hash table mapping host pointers to data handles. */ static int32_t nregistered, maxnregistered; static int _data_interface_number = STARPU_MAX_INTERFACE_ID; starpu_arbiter_t _starpu_global_arbiter; static int max_memory_use; static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned coherent, unsigned nowait); void _starpu_data_interface_fini(void); void _starpu_data_interface_init(void) { max_memory_use = starpu_getenv_number_default("STARPU_MAX_MEMORY_USE", 0); /* Just for testing purpose */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) _starpu_global_arbiter = starpu_arbiter_create(); _starpu_crash_add_hook(&_starpu_data_interface_fini); } void _starpu_data_interface_fini(void) { if (max_memory_use) _STARPU_DISP("Memory used for %d data handles: %lu MiB\n", maxnregistered, (unsigned long) (maxnregistered * sizeof(struct _starpu_data_state)) >> 20); } void _starpu_data_interface_shutdown() { free(_id_to_ops_array); _id_to_ops_array = NULL; _id_to_ops_array_size = 0; _starpu_data_interface_fini(); } struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id) { switch (interface_id) { case STARPU_MATRIX_INTERFACE_ID: return &starpu_interface_matrix_ops; case STARPU_BLOCK_INTERFACE_ID: return &starpu_interface_block_ops; case STARPU_VECTOR_INTERFACE_ID: return &starpu_interface_vector_ops; case STARPU_CSR_INTERFACE_ID: return &starpu_interface_csr_ops; case STARPU_BCSR_INTERFACE_ID: return &starpu_interface_bcsr_ops; case STARPU_VARIABLE_INTERFACE_ID: return &starpu_interface_variable_ops; case STARPU_VOID_INTERFACE_ID: return &starpu_interface_void_ops; case STARPU_MULTIFORMAT_INTERFACE_ID: return &starpu_interface_multiformat_ops; case STARPU_COO_INTERFACE_ID: return &starpu_interface_coo_ops; case STARPU_TENSOR_INTERFACE_ID: return &starpu_interface_tensor_ops; case STARPU_NDIM_INTERFACE_ID: return &starpu_interface_ndim_ops; default: { if (interface_id-STARPU_MAX_INTERFACE_ID > _id_to_ops_array_size || _id_to_ops_array == NULL || _id_to_ops_array[interface_id-STARPU_MAX_INTERFACE_ID]==NULL) { _STARPU_MSG("There is no 'struct starpu_data_interface_ops' registered for interface %d\n", interface_id); STARPU_ABORT(); return NULL; } else return _id_to_ops_array[interface_id-STARPU_MAX_INTERFACE_ID]; } } } /* * Start monitoring a piece of data */ static void _starpu_register_new_data(starpu_data_handle_t handle, int home_node, uint32_t wt_mask) { STARPU_ASSERT(handle); /* first take care to properly lock the data */ _starpu_spin_lock(&handle->header_lock); handle->root_handle = handle; //handle->father_handle = NULL; //handle->nsiblings = 0; //handle->siblings = NULL; //handle->sibling_index = 0; /* could be anything for the root */ handle->depth = 1; /* the tree is just a node yet */ handle->active = 1; /* Store some values directly in the handle not to recompute them all * the time. */ handle->footprint = _starpu_compute_data_footprint(handle); handle->home_node = home_node; handle->wt_mask = wt_mask; //handle->aliases = 0; //handle->readonly_dup = NULL; //handle->readonly_dup_of = NULL; //handle->is_not_important = 0; handle->sequential_consistency = starpu_data_get_default_sequential_consistency_flag(); handle->initialized = home_node != -1; //handle->readonly = 0; handle->ooc = 1; /* By default, there are no methods available to perform a reduction */ //handle->redux_cl = NULL; //handle->init_cl = NULL; /* that new data is invalid from all nodes perpective except for the * home node */ unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *replicate; replicate = &handle->per_node[node]; replicate->memory_node = node; //replicate->relaxed_coherency = 0; //replicate->refcnt = 0; //replicate->nb_tasks_prefetch = 0; if ((int) node == home_node) { /* this is the home node with the only valid copy */ replicate->state = STARPU_OWNER; replicate->allocated = 1; //replicate->automatically_allocated = 0; replicate->initialized = 1; } else { /* the value is not available here yet */ replicate->state = STARPU_INVALID; //replicate->allocated = 0; //replicate->initialized = 0; } replicate->mapped = STARPU_UNMAPPED; } /* now the data is available ! */ _starpu_spin_unlock(&handle->header_lock); (void)STARPU_ATOMIC_ADD(&nregistered, 1); _starpu_perf_counter_update_max_int32(&maxnregistered, nregistered); } void _starpu_data_initialize_per_worker(starpu_data_handle_t handle) { unsigned worker; unsigned nworkers = starpu_worker_get_count(); _starpu_spin_checklocked(&handle->header_lock); _STARPU_CALLOC(handle->per_worker, nworkers, sizeof(*handle->per_worker)); size_t interfacesize = handle->ops->interface_size; for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *replicate; //unsigned node; replicate = &handle->per_worker[worker]; //replicate->allocated = 0; //replicate->automatically_allocated = 0; replicate->state = STARPU_INVALID; //replicate->refcnt = 0; replicate->handle = handle; //replicate->nb_tasks_prefetch = 0; //for (node = 0; node < STARPU_MAXNODES; node++) //{ // replicate->request[node] = NULL; // replicate->last_request[node] = NULL; //} //replicate->load_request = NULL; /* Assuming being used for SCRATCH for now, patched when entering REDUX mode */ replicate->relaxed_coherency = 1; //replicate->initialized = 0; replicate->memory_node = starpu_worker_get_memory_node(worker); replicate->mapped = STARPU_UNMAPPED; _STARPU_CALLOC(replicate->data_interface, 1, interfacesize); /* duplicate the content of the interface on node 0 */ memcpy(replicate->data_interface, handle->per_node[STARPU_MAIN_RAM].data_interface, interfacesize); } } void starpu_data_ptr_register(starpu_data_handle_t handle, unsigned node) { struct _starpu_data_replicate *replicate = &handle->per_node[node]; _starpu_spin_lock(&handle->header_lock); STARPU_ASSERT_MSG(replicate->allocated == 0, "starpu_data_ptr_register must be called right after starpu_data_register"); replicate->allocated = 1; replicate->automatically_allocated = 0; _starpu_spin_unlock(&handle->header_lock); } int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_interface_ops *interface_ops, unsigned int mf_node) { unsigned node; /* Tell helgrind that our access to busy_count in * starpu_data_unregister is actually safe */ STARPU_HG_DISABLE_CHECKING(handle->busy_count); handle->magic = 42; /* When not specified, the fields are initialized in _starpu_register_new_data and _starpu_data_partition */ _starpu_data_requester_prio_list_init0(&handle->req_list); //handle->refcnt = 0; //handle->unlocking_reqs = 0; //handle->current_mode = STARPU_NONE; _starpu_spin_init(&handle->header_lock); //handle->busy_count = 0; //handle->busy_waiting = 0; STARPU_PTHREAD_MUTEX_INIT0(&handle->busy_mutex, NULL); STARPU_PTHREAD_COND_INIT0(&handle->busy_cond, NULL); #ifdef STARPU_BUBBLE STARPU_PTHREAD_MUTEX_INIT0(&handle->unpartition_mutex, NULL); #endif //handle->root_handle //handle->father_handle //handle->active_children = NULL; //handle->active_nchildren = 0; //handle->active_readonly_children = NULL; //handle->active_readonly_nchildren = NULL; //handle->nactive_readonly_children = 0; //handle->nsiblings //handle->siblings //handle->sibling_index //handle->depth /* there is no hierarchy yet */ //handle->children = NULL; //handle->nchildren = 0; //handle->nplans = 0; //handle->switch_cl = NULL; //handle->switch_cl_nparts = 0; //handle->partitioned = 0; //handle->part_readonly = 0; //handle->active //handle->active_ro = 0; //handle->per_node below handle->ops = interface_ops; size_t interfacesize = interface_ops->interface_size; for (node = 0; node < STARPU_MAXNODES; node++) { _starpu_memory_stats_init_per_node(handle, node); struct _starpu_data_replicate *replicate; replicate = &handle->per_node[node]; /* relaxed_coherency = 0 */ replicate->handle = handle; _STARPU_CALLOC(replicate->data_interface, 1, interfacesize); if (handle->ops->init) handle->ops->init(replicate->data_interface); } //handle->per_worker = NULL; //handle->ops above //handle->footprint //handle->home_node //handle->wt_mask //handle->aliases = 0; //handle->is_not_important //handle->sequential_consistency //handle->initialized //handle->readonly //handle->ooc //handle->lazy_unregister = 0; //handle->removed_from_context_hash = 0; STARPU_PTHREAD_MUTEX_INIT0(&handle->sequential_consistency_mutex, NULL); handle->last_submitted_mode = STARPU_R; //handle->last_sync_task = NULL; //handle->last_submitted_accessors.task = NULL; handle->last_submitted_accessors.next = &handle->last_submitted_accessors; handle->last_submitted_accessors.prev = &handle->last_submitted_accessors; #ifdef STARPU_USE_FXT //handle->last_submitted_ghost_sync_id_is_valid = 0; //handle->last_submitted_ghost_sync_id = 0; //handle->last_submitted_ghost_accessors_id = NULL; #endif //handle->post_sync_tasks = NULL; /* Tell helgrind that the race in _starpu_unlock_post_sync_tasks is fine */ STARPU_HG_DISABLE_CHECKING(handle->post_sync_tasks_cnt); //handle->post_sync_tasks_cnt = 0; //handle->redux_cl //handle->init_cl //handle->reduction_refcnt = 0; _starpu_data_requester_prio_list_init0(&handle->reduction_req_list); //handle->reduction_tmp_handles = NULL; //handle->write_invalidation_req = NULL; //handle->mpi_data = NULL; /* invalid until set */ _starpu_memory_stats_init(handle); handle->mf_node = mf_node; //handle->unregister_hook = NULL; if (_starpu_global_arbiter) /* Just for testing purpose */ starpu_data_assign_arbiter(handle, _starpu_global_arbiter); else { //handle->arbiter = NULL; } _starpu_data_requester_prio_list_init0(&handle->arbitered_req_list); handle->last_locality = -1; //handle->dimensions = 0; //handle->coordinates = {}; //handle->user_data = NULL; //handle->sched_data = NULL; return 0; } static starpu_data_handle_t _starpu_data_handle_allocate(struct starpu_data_interface_ops *interface_ops, unsigned int mf_node) { starpu_data_handle_t handle; _STARPU_CALLOC(handle, 1, sizeof(struct _starpu_data_state)); _starpu_data_handle_init(handle, interface_ops, mf_node); return handle; } void _starpu_data_register_ops(struct starpu_data_interface_ops *ops) { /* check the interfaceid is set */ STARPU_ASSERT(ops->interfaceid != STARPU_UNKNOWN_INTERFACE_ID); if ((unsigned)ops->interfaceid >= STARPU_MAX_INTERFACE_ID) { if ((unsigned)ops->interfaceid > _id_to_ops_array_size) { if (!_id_to_ops_array_size) { _id_to_ops_array_size = 16; } else { _id_to_ops_array_size *= 2; } _STARPU_REALLOC(_id_to_ops_array, _id_to_ops_array_size * sizeof(struct starpu_data_interface_ops *)); } _id_to_ops_array[ops->interfaceid-STARPU_MAX_INTERFACE_ID] = ops; } } void starpu_data_register_ops(struct starpu_data_interface_ops *ops) { if (ops->interfaceid == STARPU_UNKNOWN_INTERFACE_ID) { ops->interfaceid = starpu_data_interface_get_next_id(); } _starpu_data_register_ops(ops); } void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void *data_interface, struct starpu_data_interface_ops *ops) { STARPU_ASSERT_MSG(home_node >= -1 && home_node < (int)starpu_memory_nodes_get_count(), "Invalid memory node number"); starpu_data_handle_t handle = _starpu_data_handle_allocate(ops, home_node); STARPU_ASSERT(handleptr); *handleptr = handle; if (ops->interfaceid == STARPU_UNKNOWN_INTERFACE_ID) { ops->interfaceid = starpu_data_interface_get_next_id(); } /* fill the interface fields with the appropriate method */ STARPU_ASSERT(ops->register_data_handle); ops->register_data_handle(handle, home_node, data_interface); _starpu_data_register_ops(ops); _starpu_register_new_data(handle, home_node, 0); _STARPU_TRACE_HANDLE_DATA_REGISTER(handle); } void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc) { void *local_interface = starpu_data_get_interface_on_node(handlesrc, STARPU_MAIN_RAM); starpu_data_register(handledst, -1, local_interface, handlesrc->ops); } void *starpu_data_handle_to_pointer(starpu_data_handle_t handle, unsigned node) { /* Check whether the operation is supported and the node has actually * been allocated. */ if (!starpu_data_test_if_allocated_on_node(handle, node)) return NULL; if (handle->ops->to_pointer) { return handle->ops->to_pointer(starpu_data_get_interface_on_node(handle, node), node); } /* Deprecated */ if (handle->ops->handle_to_pointer) { return handle->ops->handle_to_pointer(handle, node); } return NULL; } void *starpu_data_get_local_ptr(starpu_data_handle_t handle) { return starpu_data_handle_to_pointer(handle, starpu_worker_get_local_memory_node()); } struct starpu_data_interface_ops* starpu_data_get_interface_ops(starpu_data_handle_t handle) { return handle->ops; } void _starpu_data_free_interfaces(starpu_data_handle_t handle) { unsigned node; unsigned nworkers = starpu_worker_get_count(); if (handle->ops->unregister_data_handle) handle->ops->unregister_data_handle(handle); for (node = 0; node < STARPU_MAXNODES; node++) free(handle->per_node[node].data_interface); if (handle->per_worker) { unsigned worker; for (worker = 0; worker < nworkers; worker++) free(handle->per_worker[worker].data_interface); free(handle->per_worker); } } struct _starpu_unregister_callback_arg { unsigned memory_node; starpu_data_handle_t handle; unsigned terminated; starpu_pthread_mutex_t mutex; starpu_pthread_cond_t cond; }; /* Check whether we should tell starpu_data_unregister that the data handle is * not busy any more. * The header is supposed to be locked. * This may free the handle, if it was lazily unregistered (1 is returned in * that case). The handle pointer thus becomes invalid for the caller. * * Note: we inline some of the tests in the _starpu_data_check_not_busy macro. */ int __starpu_data_check_not_busy(starpu_data_handle_t handle) { if (STARPU_LIKELY(handle->busy_count)) return 0; /* Not busy any more, perhaps have to unregister etc. */ if (STARPU_UNLIKELY(handle->busy_waiting)) { STARPU_PTHREAD_MUTEX_LOCK(&handle->busy_mutex); STARPU_PTHREAD_COND_BROADCAST(&handle->busy_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex); } /* The handle has been destroyed in between (eg. this was a temporary * handle created for a reduction.) */ if (STARPU_UNLIKELY(handle->lazy_unregister)) { handle->lazy_unregister = 0; _starpu_spin_unlock(&handle->header_lock); _starpu_data_unregister(handle, 0, 1); /* Warning: in case we unregister the handle, we must be sure * that the caller will not try to unlock the header after * !*/ return 1; } return 0; } static void _starpu_check_if_valid_and_fetch_data_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, const char *origin) { unsigned node; unsigned nnodes = starpu_memory_nodes_get_count(); int valid = 0; _starpu_spin_lock(&handle->header_lock); for (node = 0; node < nnodes; node++) { if (handle->per_node[node].state != STARPU_INVALID) { /* we found a copy ! */ valid = 1; break; } } _starpu_spin_unlock(&handle->header_lock); if (valid) { int ret = _starpu_fetch_data_on_node(handle, handle->home_node, replicate, STARPU_R, 0, NULL, STARPU_FETCH, 0, NULL, NULL, 0, origin); STARPU_ASSERT(!ret); _starpu_release_data_on_node(handle, 0, STARPU_NONE, replicate); } else { _starpu_spin_lock(&handle->header_lock); if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) _starpu_spin_unlock(&handle->header_lock); } } static void _starpu_data_unregister_fetch_data_callback(void *_arg) { struct _starpu_unregister_callback_arg *arg = (struct _starpu_unregister_callback_arg *) _arg; starpu_data_handle_t handle = arg->handle; STARPU_ASSERT(handle); struct _starpu_data_replicate *replicate = &handle->per_node[arg->memory_node]; _starpu_check_if_valid_and_fetch_data_on_node(handle, replicate, "_starpu_data_unregister_fetch_data_callback"); /* unlock the caller */ STARPU_PTHREAD_MUTEX_LOCK(&arg->mutex); arg->terminated = 1; STARPU_PTHREAD_COND_SIGNAL(&arg->cond); STARPU_PTHREAD_MUTEX_UNLOCK(&arg->mutex); } void _starpu_data_set_unregister_hook(starpu_data_handle_t handle, _starpu_data_handle_unregister_hook func) { STARPU_ASSERT(handle->unregister_hook == NULL); handle->unregister_hook = func; } /* * We are about to unregister this R/O data. There might be still other aliases, * in which case this returns 0. If not, users are not supposed to see it * any more, so detach it from their sight and return 1 to let unregistration happen. */ static int _starpu_ro_data_detach(starpu_data_handle_t handle) { _starpu_spin_lock(&handle->header_lock); if (handle->aliases) { handle->aliases--; _starpu_spin_unlock(&handle->header_lock); return 0; } if (handle->readonly_dup) { STARPU_ASSERT(handle->readonly_dup->readonly_dup_of == handle); handle->readonly_dup->readonly_dup_of = NULL; handle->readonly_dup = NULL; } if (handle->readonly_dup_of) { STARPU_ASSERT(handle->readonly_dup_of->readonly_dup == handle); handle->readonly_dup_of->readonly_dup = NULL; handle->readonly_dup_of = NULL; } /* So that unregistration can use write dependencies to wait for * anything to finish */ handle->readonly = 0; _starpu_spin_unlock(&handle->header_lock); return 1; } /* Unregister the data handle, perhaps we don't need to update the home_node * (in that case coherent is set to 0) * nowait is for internal use when we already know for sure that we won't have to wait. */ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned coherent, unsigned nowait) { STARPU_ASSERT(handle); STARPU_ASSERT_MSG(handle->nchildren == 0, "data %p needs to be unpartitioned before unregistration", handle); STARPU_ASSERT_MSG(handle->nplans == 0, "data %p needs its partition plans to be cleaned before unregistration", handle); STARPU_ASSERT_MSG(handle->partitioned == 0, "data %p needs its partitioned plans to be unpartitioned before unregistration", handle); /* TODO: also check that it has the latest coherency */ STARPU_ASSERT(!(nowait && handle->busy_count != 0)); if (!_starpu_ro_data_detach(handle)) return; int sequential_consistency = handle->sequential_consistency; if (sequential_consistency && !nowait) { /* We will acquire it in write mode to catch all dependencies, * but possibly it's not actually initialized. Fake it to avoid getting caught doing it */ handle->initialized = 1; STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_data_unregister must not be called from a task or callback, perhaps you can use starpu_data_unregister_submit instead"); /* If sequential consistency is enabled, wait until data is available */ if ((handle->nplans && !handle->nchildren) || handle->siblings) _starpu_data_partition_access_submit(handle, !handle->readonly); _starpu_data_wait_until_available(handle, handle->readonly?STARPU_R:STARPU_RW, "starpu_data_unregister"); } if (coherent && !nowait) { STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "starpu_data_unregister must not be called from a task or callback, perhaps you can use starpu_data_unregister_submit instead"); /* Fetch data in the home of the data to ensure we have a valid copy * where we registered it */ int home_node = handle->home_node; if (home_node >= 0) { struct _starpu_unregister_callback_arg arg = { 0 }; arg.handle = handle; arg.memory_node = (unsigned)home_node; arg.terminated = 0; STARPU_PTHREAD_MUTEX_INIT0(&arg.mutex, NULL); STARPU_PTHREAD_COND_INIT0(&arg.cond, NULL); if (!_starpu_attempt_to_submit_data_request_from_apps(handle, STARPU_R, _starpu_data_unregister_fetch_data_callback, &arg)) { /* no one has locked this data yet, so we proceed immediately */ struct _starpu_data_replicate *home_replicate = &handle->per_node[home_node]; _starpu_check_if_valid_and_fetch_data_on_node(handle, home_replicate, "_starpu_data_unregister"); } else { STARPU_PTHREAD_MUTEX_LOCK(&arg.mutex); while (!arg.terminated) STARPU_PTHREAD_COND_WAIT(&arg.cond, &arg.mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&arg.mutex); } STARPU_PTHREAD_MUTEX_DESTROY(&arg.mutex); STARPU_PTHREAD_COND_DESTROY(&arg.cond); } /* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ /* If this handle uses a multiformat interface, we may have to convert * this piece of data back into the CPU format. * XXX : This is quite hacky, could we submit a task instead ? */ if (_starpu_data_is_multiformat_handle(handle) && (starpu_node_get_kind(handle->mf_node) != STARPU_CPU_RAM)) { _STARPU_DEBUG("Conversion needed\n"); void *buffers[1]; struct starpu_multiformat_interface *format_interface; home_node = handle->home_node; if (home_node < 0 || (starpu_node_get_kind(home_node) != STARPU_CPU_RAM)) home_node = STARPU_MAIN_RAM; format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, home_node); struct starpu_codelet *cl = NULL; enum starpu_node_kind node_kind = starpu_node_get_kind(handle->mf_node); switch (node_kind) { #ifdef STARPU_USE_CUDA case STARPU_CUDA_RAM: { struct starpu_multiformat_data_interface_ops *mf_ops; mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); cl = mf_ops->cuda_to_cpu_cl; break; } #endif #ifdef STARPU_USE_OPENCL case STARPU_OPENCL_RAM: { struct starpu_multiformat_data_interface_ops *mf_ops; mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface); cl = mf_ops->opencl_to_cpu_cl; break; } #endif case STARPU_CPU_RAM: /* Impossible ! */ default: STARPU_ABORT(); } buffers[0] = format_interface; _starpu_cl_func_t func = _starpu_task_get_cpu_nth_implementation(cl, 0); STARPU_ASSERT(func); func(buffers, NULL); } } /* Prevent any further unregistration */ handle->magic = 0; _starpu_spin_lock(&handle->header_lock); if (!coherent) { /* Should we postpone the unregister operation ? */ if (handle->lazy_unregister) { if (handle->busy_count > 0) { _starpu_spin_unlock(&handle->header_lock); return; } handle->lazy_unregister = 0; } } /* Tell holders of references that we're starting waiting */ handle->busy_waiting = 1; _starpu_spin_unlock(&handle->header_lock); /* Request unmapping of any mapped data */ unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) _starpu_data_unmap(handle, node); retry_busy: /* Wait for all requests to finish (notably WT requests) */ STARPU_PTHREAD_MUTEX_LOCK(&handle->busy_mutex); while (1) { /* Here helgrind would shout that this an unprotected access, * but this is actually fine: all threads who do busy_count-- * are supposed to call _starpu_data_check_not_busy, which will * wake us up through the busy_mutex/busy_cond. */ if (!handle->busy_count) break; /* This is woken by _starpu_data_check_not_busy, always called * after decrementing busy_count */ STARPU_PTHREAD_COND_WAIT(&handle->busy_cond, &handle->busy_mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex); /* Unregister MPI things after having waited for MPI reqs etc. to settle down */ if (handle->unregister_hook) { handle->unregister_hook(handle); handle->unregister_hook = NULL; } /* Wait for finished requests to release the handle */ _starpu_spin_lock(&handle->header_lock); if (handle->busy_count) { /* Bad luck: some request went in in between, wait again... */ _starpu_spin_unlock(&handle->header_lock); goto retry_busy; } size_t size = _starpu_data_get_alloc_size(handle); /* Destroy the data now */ for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *local = &handle->per_node[node]; STARPU_ASSERT(!local->refcnt); if (local->allocated) { /* free the data copy in a lazy fashion */ if (local->automatically_allocated) _starpu_request_mem_chunk_removal(handle, local, node, size); } } if (handle->per_worker) { unsigned worker; unsigned nworkers = starpu_worker_get_count(); for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *local = &handle->per_worker[worker]; STARPU_ASSERT(!local->refcnt); /* free the data copy in a lazy fashion */ if (local->allocated && local->automatically_allocated) _starpu_request_mem_chunk_removal(handle, local, starpu_worker_get_memory_node(worker), size); } } _starpu_data_free_interfaces(handle); _starpu_memory_stats_free(handle); _starpu_spin_unlock(&handle->header_lock); _starpu_spin_destroy(&handle->header_lock); _starpu_data_clear_implicit(handle); free(handle->active_readonly_children); free(handle->active_readonly_nchildren); STARPU_PTHREAD_MUTEX_DESTROY(&handle->busy_mutex); STARPU_PTHREAD_COND_DESTROY(&handle->busy_cond); STARPU_PTHREAD_MUTEX_DESTROY(&handle->sequential_consistency_mutex); #ifdef STARPU_BUBBLE STARPU_PTHREAD_MUTEX_DESTROY(&handle->unpartition_mutex); #endif STARPU_HG_ENABLE_CHECKING(handle->post_sync_tasks_cnt); STARPU_HG_ENABLE_CHECKING(handle->busy_count); _starpu_data_requester_prio_list_deinit(&handle->req_list); _starpu_data_requester_prio_list_deinit(&handle->reduction_req_list); if (handle->switch_cl) { free(handle->switch_cl->dyn_nodes); free(handle->switch_cl); } _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle); free(handle); (void)STARPU_ATOMIC_ADD(&nregistered, -1); } void starpu_data_unregister(starpu_data_handle_t handle) { STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle); STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle); _starpu_data_unregister(handle, 1, 0); } void starpu_data_unregister_no_coherency(starpu_data_handle_t handle) { STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle); _starpu_data_unregister(handle, 0, 0); } static void _starpu_data_unregister_submit_cb(void *arg) { starpu_data_handle_t handle = arg; _starpu_spin_lock(&handle->header_lock); handle->lazy_unregister = 1; /* The handle should be busy since we are working on it. * when we releases the handle below, it will be destroyed by * _starpu_data_check_not_busy */ STARPU_ASSERT(handle->busy_count); _starpu_spin_unlock(&handle->header_lock); starpu_data_release_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL); } void starpu_data_unregister_submit(starpu_data_handle_t handle) { STARPU_ASSERT_MSG(handle->magic == 42, "data %p is invalid (was it already registered?)", handle); STARPU_ASSERT_MSG(!handle->lazy_unregister, "data %p can not be unregistered twice", handle); if (!_starpu_ro_data_detach(handle)) return; /* Wait for all task dependencies on this handle before putting it for free */ starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, handle->initialized?STARPU_RW:STARPU_W, _starpu_data_unregister_submit_cb, handle); } static void __starpu_data_deinitialize(starpu_data_handle_t handle) { #ifdef STARPU_DEBUG { /* There shouldn't be any pending request since we acquired the data in W mode */ unsigned i, j, nnodes = starpu_memory_nodes_get_count(); for (i = 0; i < nnodes; i++) for (j = 0; j < nnodes; j++) STARPU_ASSERT_MSG(!handle->per_node[i].request[j], "request for handle %p pending from %u to %u while invalidating data!", handle, j, i); } #endif unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *local = &handle->per_node[node]; if (local->state != STARPU_INVALID) _STARPU_TRACE_DATA_STATE_INVALID(handle, node); local->state = STARPU_INVALID; local->initialized = 0; } if (handle->per_worker) { unsigned worker; unsigned nworkers = starpu_worker_get_count(); for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *local = &handle->per_worker[worker]; local->state = STARPU_INVALID; } } } static void _starpu_data_invalidate(void *data) { starpu_data_handle_t handle = data; size_t size = _starpu_data_get_alloc_size(handle); _starpu_spin_lock(&handle->header_lock); //_STARPU_DEBUG("Really invalidating data %p\n", data); __starpu_data_deinitialize(handle); unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *local = &handle->per_node[node]; if (local->refcnt > 1) /* Avoid freeing chunk still in use by others than this function */ continue; if (local->mc && local->allocated && local->automatically_allocated) { unsigned mapping; for (mapping = 0; mapping < STARPU_MAXNODES; mapping++) if (handle->per_node[mapping].mapped == (int) node) break; if (mapping == STARPU_MAXNODES) { /* free the data copy in a lazy fashion */ _starpu_request_mem_chunk_removal(handle, local, node, size); } } } if (handle->per_worker) { unsigned worker; unsigned nworkers = starpu_worker_get_count(); for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *local = &handle->per_worker[worker]; if (local->mc && local->allocated && local->automatically_allocated && local->refcnt <= 1) /* free the data copy in a lazy fashion */ _starpu_request_mem_chunk_removal(handle, local, starpu_worker_get_memory_node(worker), size); } } _starpu_spin_unlock(&handle->header_lock); starpu_data_release_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL); } static void _starpu_data_deinitialize(void *data) { starpu_data_handle_t handle = data; _starpu_spin_lock(&handle->header_lock); //_STARPU_DEBUG("Really deinitializing data %p\n", data); __starpu_data_deinitialize(handle); unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *local = &handle->per_node[node]; if (local->mc && local->allocated && local->automatically_allocated) /* note that the data is now clean */ _starpu_memchunk_clean(local->mc, node); } if (handle->per_worker) { unsigned worker; unsigned nworkers = starpu_worker_get_count(); for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *local = &handle->per_worker[worker]; if (local->mc && local->allocated && local->automatically_allocated) /* note that the data is now clean */ _starpu_memchunk_clean(local->mc, starpu_worker_get_memory_node(worker)); } } _starpu_spin_unlock(&handle->header_lock); starpu_data_release_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL); } void starpu_data_deinitialize(starpu_data_handle_t handle) { STARPU_ASSERT(handle); starpu_data_acquire_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W); _starpu_data_deinitialize(handle); handle->initialized = 0; } void starpu_data_deinitialize_submit(starpu_data_handle_t handle) { STARPU_ASSERT(handle); starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W, _starpu_data_deinitialize, handle); handle->initialized = 0; } void _starpu_data_deinitialize_submit_noplan(starpu_data_handle_t handle) { STARPU_ASSERT(handle); starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W | STARPU_NOPLAN, _starpu_data_deinitialize, handle); handle->initialized = 0; } void starpu_data_invalidate(starpu_data_handle_t handle) { STARPU_ASSERT(handle); starpu_data_acquire_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W); _starpu_data_invalidate(handle); handle->initialized = 0; } void starpu_data_invalidate_submit(starpu_data_handle_t handle) { STARPU_ASSERT(handle); starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W, _starpu_data_invalidate, handle); handle->initialized = 0; } void _starpu_data_invalidate_submit_noplan(starpu_data_handle_t handle) { STARPU_ASSERT(handle); starpu_data_acquire_on_node_cb(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_W | STARPU_NOPLAN, _starpu_data_invalidate, handle); handle->initialized = 0; } enum starpu_data_interface_id starpu_data_get_interface_id(starpu_data_handle_t handle) { return handle->ops->interfaceid; } void *starpu_data_get_interface_on_node(starpu_data_handle_t handle, unsigned memory_node) { return handle->per_node[memory_node].data_interface; } int starpu_data_interface_get_next_id(void) { _data_interface_number += 1; return _data_interface_number-1; } int starpu_data_pack_node(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT_MSG(handle->ops->pack_data, "The datatype interface %s (%d) does not have a pack operation", handle->ops->name, handle->ops->interfaceid); return handle->ops->pack_data(handle, node, ptr, count); } int starpu_data_pack(starpu_data_handle_t handle, void **ptr, starpu_ssize_t *count) { return starpu_data_pack_node(handle, starpu_worker_get_local_memory_node(), ptr, count); } int starpu_data_peek_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT_MSG(handle->ops->peek_data, "The datatype interface %s (%d) does not have a peek operation", handle->ops->name, handle->ops->interfaceid); int ret; ret = handle->ops->peek_data(handle, node, ptr, count); return ret; } int starpu_data_peek(starpu_data_handle_t handle, void *ptr, size_t count) { return starpu_data_peek_node(handle, starpu_worker_get_local_memory_node(), ptr, count); } int starpu_data_unpack_node(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT_MSG(handle->ops->unpack_data, "The datatype interface %s (%d) does not have an unpack operation", handle->ops->name, handle->ops->interfaceid); int ret; ret = handle->ops->unpack_data(handle, node, ptr, count); return ret; } int starpu_data_unpack(starpu_data_handle_t handle, void *ptr, size_t count) { return starpu_data_unpack_node(handle, starpu_worker_get_local_memory_node(), ptr, count); } size_t starpu_data_get_size(starpu_data_handle_t handle) { return handle->ops->get_size(handle); } size_t starpu_data_get_alloc_size(starpu_data_handle_t handle) { if (handle->ops->get_alloc_size) return handle->ops->get_alloc_size(handle); else return handle->ops->get_size(handle); } void starpu_data_set_name(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, const char *name STARPU_ATTRIBUTE_UNUSED) { _STARPU_TRACE_DATA_NAME(handle, name); } int starpu_data_get_home_node(starpu_data_handle_t handle) { return handle->home_node; } void starpu_data_set_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]) { unsigned i; unsigned max_dimensions = sizeof(handle->coordinates)/sizeof(handle->coordinates[0]); if (dimensions > max_dimensions) dimensions = max_dimensions; handle->dimensions = dimensions; for (i = 0; i < dimensions; i++) handle->coordinates[i] = dims[i]; _STARPU_TRACE_DATA_COORDINATES(handle, dimensions, dims); } void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...) { int dims[dimensions]; unsigned i; va_list varg_list; va_start(varg_list, dimensions); for (i = 0; i < dimensions; i++) dims[i] = va_arg(varg_list, int); va_end(varg_list); starpu_data_set_coordinates_array(handle, dimensions, dims); } unsigned starpu_data_get_coordinates_array(starpu_data_handle_t handle, unsigned dimensions, int dims[]) { unsigned i; if (dimensions > handle->dimensions) dimensions = handle->dimensions; for (i = 0; i < dimensions; i++) dims[i] = handle->coordinates[i]; return dimensions; } void starpu_data_print(starpu_data_handle_t handle, unsigned node, FILE *stream) { if (handle->ops == NULL) fprintf(stream, "Undefined"); else { switch (handle->ops->interfaceid) { case(STARPU_MATRIX_INTERFACE_ID): fprintf(stream, "Matrix"); break; case(STARPU_BLOCK_INTERFACE_ID): fprintf(stream, "Block"); break; case(STARPU_VECTOR_INTERFACE_ID): fprintf(stream, "Vector"); break; case(STARPU_CSR_INTERFACE_ID): fprintf(stream, "CSR"); break; case(STARPU_BCSR_INTERFACE_ID): fprintf(stream, "BCSR"); break; case(STARPU_VARIABLE_INTERFACE_ID): fprintf(stream, "Variable"); break; case(STARPU_VOID_INTERFACE_ID): fprintf(stream, "Void"); break; case(STARPU_MULTIFORMAT_INTERFACE_ID): fprintf(stream, "Multfiformat"); break; case(STARPU_COO_INTERFACE_ID): fprintf(stream, "COO"); break; case(STARPU_TENSOR_INTERFACE_ID): fprintf(stream, "Tensor"); break; case(STARPU_UNKNOWN_INTERFACE_ID): fprintf(stream, "UNKNOWN"); break; default: fprintf(stream, "User interface with id %d", handle->ops->interfaceid); break; } } void *data_interface = NULL; if (starpu_data_test_if_allocated_on_node(handle, node)) data_interface = starpu_data_get_interface_on_node(handle, node); if (starpu_data_test_if_allocated_on_node(handle, handle->home_node)) data_interface = starpu_data_get_interface_on_node(handle, handle->home_node); if (handle->ops && handle->ops->describe && data_interface) { char buffer[1024]; handle->ops->describe(data_interface, buffer, sizeof(buffer)); fprintf(stream, " %s\n", buffer); } else fprintf(stream, "\n"); } starpu-1.4.9+dfsg/src/datawizard/interfaces/data_interface.h000066400000000000000000000053401507764646700241600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DATA_INTERFACE_H__ #define __DATA_INTERFACE_H__ /** @file */ #include #include #include #ifdef STARPU_OPENMP #include #endif #pragma GCC visibility push(hidden) /** Generic type representing an interface, for now it's only used before * execution on message-passing devices but it can be useful in other cases. */ union _starpu_interface { /* struct starpu_void_interface void; void doesn't have any data */ struct starpu_variable_interface variable; struct starpu_vector_interface vector; struct starpu_matrix_interface matrix; struct starpu_block_interface block; struct starpu_tensor_interface tensor; struct starpu_csr_interface csr; struct starpu_bcsr_interface bcsr; struct starpu_coo_interface coo; }; /** Some data interfaces or filters use this interface internally */ extern struct starpu_data_interface_ops starpu_interface_multiformat_ops; void _starpu_data_free_interfaces(starpu_data_handle_t handle); extern int _starpu_data_handle_init(starpu_data_handle_t handle, struct starpu_data_interface_ops *interface_ops, unsigned int mf_node); void _starpu_data_initialize_per_worker(starpu_data_handle_t handle); extern struct starpu_arbiter *_starpu_global_arbiter; extern void _starpu_data_interface_init(void); extern int __starpu_data_check_not_busy(starpu_data_handle_t handle) STARPU_ATTRIBUTE_WARN_UNUSED_RESULT; #define _starpu_data_check_not_busy(handle) \ (STARPU_UNLIKELY(!handle->busy_count && \ (handle->busy_waiting || handle->lazy_unregister)) ? \ __starpu_data_check_not_busy(handle) : 0) extern void _starpu_data_interface_shutdown(void); struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; #define _starpu_data_is_multiformat_handle(handle) handle->ops->is_multiformat void _starpu_data_deinitialize_submit_noplan(starpu_data_handle_t handle); void _starpu_data_invalidate_submit_noplan(starpu_data_handle_t handle); #pragma GCC visibility pop #endif // __DATA_INTERFACE_H__ starpu-1.4.9+dfsg/src/datawizard/interfaces/matrix_filters.c000066400000000000000000000173241507764646700242630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include /* * an example of a dummy partition function : blocks ... */ static void _starpu_matrix_filter_block(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks, uintptr_t shadow_size) { struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface; struct starpu_matrix_interface *matrix_child = (struct starpu_matrix_interface *) child_interface; unsigned blocksize; /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny*/ uint32_t nn; uint32_t nx; uint32_t ny; switch(dim) { /* horizontal*/ case 1: /* actual number of elements */ nx = matrix_father->nx - 2 * shadow_size; ny = matrix_father->ny; nn = nx; blocksize = 1; break; /* vertical*/ case 2: nx = matrix_father->nx; /* actual number of elements */ ny = matrix_father->ny - 2 * shadow_size; nn = ny; blocksize = matrix_father->ld; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } size_t elemsize = matrix_father->elemsize; STARPU_ASSERT_MSG(nchunks <= nn, "cannot split %u elements in %u parts", nn, nchunks); uint32_t child_nn; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(nn, nchunks, elemsize, id, blocksize, &child_nn, &offset); child_nn += 2 * shadow_size; STARPU_ASSERT_MSG(matrix_father->id == STARPU_MATRIX_INTERFACE_ID, "%s can only be applied on a matrix data", __func__); /* update the child's interface */ matrix_child->id = matrix_father->id; switch(dim) { case 1: matrix_child->nx = child_nn; matrix_child->ny = ny; break; case 2: matrix_child->nx = nx; matrix_child->ny = child_nn; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } matrix_child->elemsize = elemsize; /* is the information on this node valid ? */ if (matrix_father->dev_handle) { if (matrix_father->ptr) matrix_child->ptr = matrix_father->ptr + offset; matrix_child->ld = matrix_father->ld; matrix_child->dev_handle = matrix_father->dev_handle; matrix_child->offset = matrix_father->offset + offset; matrix_child->allocsize = matrix_child->ld * matrix_child->ny * elemsize; } else matrix_child->allocsize = matrix_child->nx * matrix_child->ny * elemsize; } void starpu_matrix_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { _starpu_matrix_filter_block(1, father_interface, child_interface, f, id, nchunks, 0); } /* * an example of a dummy partition function : blocks ... */ void starpu_matrix_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_matrix_filter_block(1, father_interface, child_interface, f, id, nchunks, shadow_size); } void starpu_matrix_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { _starpu_matrix_filter_block(2, father_interface, child_interface, f, id, nchunks, 0); } void starpu_matrix_filter_vertical_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_matrix_filter_block(2, father_interface, child_interface, f, id, nchunks, shadow_size); } void starpu_matrix_filter_pick_vector_y(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface; /* each chunk becomes a vector */ struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; unsigned blocksize; uint32_t nx; uint32_t ny; /* actual number of elements */ nx = matrix_father->nx; ny = matrix_father->ny; blocksize = nx; size_t elemsize = matrix_father->elemsize; uintptr_t chunk_pos = (uintptr_t)f->filter_arg_ptr; STARPU_ASSERT_MSG((chunk_pos + nchunks) <= ny, "cannot get %u vectors", nchunks); STARPU_ASSERT_MSG((chunk_pos + id) < ny, "the chosen vector should be in the matrix"); size_t offset = (chunk_pos + id) * blocksize * elemsize; STARPU_ASSERT_MSG(matrix_father->id == STARPU_MATRIX_INTERFACE_ID, "%s can only be applied on a matrix data", __func__); /* update the child's interface */ vector_child->id = STARPU_VECTOR_INTERFACE_ID; vector_child->nx = nx; vector_child->elemsize = elemsize; vector_child->allocsize = vector_child->nx * elemsize; /* is the information on this node valid ? */ if (matrix_father->dev_handle) { if (matrix_father->ptr) vector_child->ptr = matrix_father->ptr + offset; vector_child->dev_handle = matrix_father->dev_handle; vector_child->offset = matrix_father->offset + offset; } } struct starpu_data_interface_ops *starpu_matrix_filter_pick_vector_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_vector_ops; } void starpu_matrix_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) { struct starpu_matrix_interface *matrix_father = (struct starpu_matrix_interface *) father_interface; /* each chunk becomes a variable */ struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; unsigned blocksize; uint32_t nx; uint32_t ld; uint32_t ny; /* actual number of elements */ nx = matrix_father->nx; ld = matrix_father->ld; ny = matrix_father->ny; blocksize = ld; size_t elemsize = matrix_father->elemsize; uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; // int i; // for(i=0; i<2; i++) // { // printf("pos is %d\n", chunk_pos[i]); // } STARPU_ASSERT_MSG((chunk_pos[0] < nx)&&(chunk_pos[1] < ny), "the chosen variable should be in the matrix"); size_t offset = (((chunk_pos[1]) * blocksize) + chunk_pos[0]) * elemsize; STARPU_ASSERT_MSG(matrix_father->id == STARPU_MATRIX_INTERFACE_ID, "%s can only be applied on a matrix data", __func__); /* update the child's interface */ variable_child->id = STARPU_VARIABLE_INTERFACE_ID; variable_child->elemsize = elemsize; /* is the information on this node valid ? */ if (matrix_father->dev_handle) { if (matrix_father->ptr) variable_child->ptr = matrix_father->ptr + offset; variable_child->dev_handle = matrix_father->dev_handle; variable_child->offset = matrix_father->offset + offset; } } struct starpu_data_interface_ops *starpu_matrix_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_variable_ops; } starpu-1.4.9+dfsg/src/datawizard/interfaces/matrix_interface.c000066400000000000000000000512141507764646700245470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); static int map_matrix(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int unmap_matrix(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int update_map_matrix(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static const struct starpu_data_copy_methods matrix_copy_data_methods_s = { .any_to_any = copy_any_to_any, }; static void matrix_init(void *data_interface); static void register_matrix_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static void *matrix_to_pointer(void *data_interface, unsigned node); static starpu_ssize_t allocate_matrix_buffer_on_node(void *data_interface_, unsigned dst_node); static void free_matrix_buffer_on_node(void *data_interface, unsigned node); static void cache_matrix_buffer_on_node(void *cached_interface, void *src_data_interface, unsigned node); static void reuse_matrix_buffer_on_node(void *dst_data_interface, const void *cached_interface, unsigned node); static size_t matrix_interface_get_size(starpu_data_handle_t handle); static size_t matrix_interface_get_alloc_size(starpu_data_handle_t handle); static uint32_t footprint_matrix_interface_crc32(starpu_data_handle_t handle); static uint32_t alloc_footprint_matrix_interface_crc32(starpu_data_handle_t handle); static int matrix_compare(void *data_interface_a, void *data_interface_b); static int matrix_alloc_compare(void *data_interface_a, void *data_interface_b); static void display_matrix_interface(starpu_data_handle_t handle, FILE *f); static int pack_matrix_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_matrix_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_matrix_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); struct starpu_data_interface_ops starpu_interface_matrix_ops = { .init = matrix_init, .register_data_handle = register_matrix_handle, .allocate_data_on_node = allocate_matrix_buffer_on_node, .to_pointer = matrix_to_pointer, .free_data_on_node = free_matrix_buffer_on_node, .cache_data_on_node = cache_matrix_buffer_on_node, .reuse_data_on_node = reuse_matrix_buffer_on_node, .map_data = map_matrix, .unmap_data = unmap_matrix, .update_map = update_map_matrix, .copy_methods = &matrix_copy_data_methods_s, .get_size = matrix_interface_get_size, .get_alloc_size = matrix_interface_get_alloc_size, .footprint = footprint_matrix_interface_crc32, .alloc_footprint = alloc_footprint_matrix_interface_crc32, .compare = matrix_compare, .alloc_compare = matrix_alloc_compare, .interfaceid = STARPU_MATRIX_INTERFACE_ID, .interface_size = sizeof(struct starpu_matrix_interface), .display = display_matrix_interface, .pack_data = pack_matrix_handle, .peek_data = peek_matrix_handle, .unpack_data = unpack_matrix_handle, .describe = describe, .name = "STARPU_MATRIX_INTERFACE", .pack_meta = NULL, .unpack_meta = NULL, .free_meta = NULL }; static void matrix_init(void *data_interface) { struct starpu_matrix_interface *matrix_interface = data_interface; matrix_interface->allocsize = -1; } static void register_matrix_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_matrix_interface *local_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = matrix_interface->ptr; local_interface->dev_handle = matrix_interface->dev_handle; local_interface->offset = matrix_interface->offset; local_interface->ld = matrix_interface->ld; } else { local_interface->ptr = 0; local_interface->dev_handle = 0; local_interface->offset = 0; local_interface->ld = 0; } local_interface->id = matrix_interface->id; local_interface->nx = matrix_interface->nx; local_interface->ny = matrix_interface->ny; local_interface->elemsize = matrix_interface->elemsize; local_interface->allocsize = matrix_interface->allocsize; } } static void *matrix_to_pointer(void *data_interface, unsigned node) { (void) node; struct starpu_matrix_interface *matrix_interface = data_interface; return (void*) matrix_interface->ptr; } /* declare a new data with the matrix interface */ void starpu_matrix_data_register_allocsize(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize, size_t allocsize) { STARPU_ASSERT_MSG(ld >= nx, "ld = %u should not be less than nx = %u.", ld, nx); struct starpu_matrix_interface matrix_interface = { .id = STARPU_MATRIX_INTERFACE_ID, .ptr = ptr, .ld = ld, .nx = nx, .ny = ny, .elemsize = elemsize, .dev_handle = ptr, .offset = 0, .allocsize = allocsize, }; #ifndef STARPU_SIMGRID if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { if (nx && ny && elemsize) { STARPU_ASSERT_ACCESSIBLE(ptr); STARPU_ASSERT_ACCESSIBLE(ptr + (ny-1)*ld*elemsize + nx*elemsize - 1); } } #endif starpu_data_register(handleptr, home_node, &matrix_interface, &starpu_interface_matrix_ops); } void starpu_matrix_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t ld, uint32_t nx, uint32_t ny, size_t elemsize) { starpu_matrix_data_register_allocsize(handleptr, home_node, ptr, ld, nx, ny, elemsize, nx * ny * elemsize); } void starpu_matrix_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ld) { struct starpu_matrix_interface *matrix_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); matrix_interface->ptr = ptr; matrix_interface->dev_handle = dev_handle; matrix_interface->offset = offset; matrix_interface->ld = ld; } static uint32_t footprint_matrix_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(starpu_matrix_get_nx(handle), starpu_matrix_get_ny(handle)); } static uint32_t alloc_footprint_matrix_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(starpu_matrix_get_allocsize(handle), 0); } static int matrix_compare(void *data_interface_a, void *data_interface_b) { struct starpu_matrix_interface *matrix_a = (struct starpu_matrix_interface *) data_interface_a; struct starpu_matrix_interface *matrix_b = (struct starpu_matrix_interface *) data_interface_b; /* Two matrices are considered compatible if they have the same size */ return (matrix_a->nx == matrix_b->nx) && (matrix_a->ny == matrix_b->ny) && (matrix_a->elemsize == matrix_b->elemsize); } static int matrix_alloc_compare(void *data_interface_a, void *data_interface_b) { struct starpu_matrix_interface *matrix_a = (struct starpu_matrix_interface *) data_interface_a; struct starpu_matrix_interface *matrix_b = (struct starpu_matrix_interface *) data_interface_b; /* Two matrices are considered allocation-compatible if they have the same size */ return (matrix_a->allocsize == matrix_b->allocsize); } static void display_matrix_interface(starpu_data_handle_t handle, FILE *f) { struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%u\t%u\t", matrix_interface->nx, matrix_interface->ny); } #define IS_CONTIGUOUS_MATRIX(nx, ny, ld) ((nx) == (ld)) //#define DYNAMIC_MATRICES struct pack_matrix_header { #ifdef DYNAMIC_MATRICES /* Receiving matrices with different sizes from MPI */ /* FIXME: that would break alignment for O_DIRECT disk access... * while in the disk case, we do know the matrix size anyway */ /* FIXME: rather make MPI pack the data interface in the envelope for us? */ uint32_t nx; uint32_t ny; size_t elemsize; #endif }; static int pack_matrix_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, node); uint32_t ld = matrix_interface->ld; uint32_t nx = matrix_interface->nx; uint32_t ny = matrix_interface->ny; size_t elemsize = matrix_interface->elemsize; *count = nx*ny*elemsize + sizeof(struct pack_matrix_header); if (ptr != NULL) { char *matrix = (void *)matrix_interface->ptr; *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); struct pack_matrix_header *header = *ptr; #ifdef DYNAMIC_MATRICES header->nx = nx; header->ny = ny; header->elemsize = elemsize; #endif char *cur = (char*) *ptr + sizeof(*header); if (IS_CONTIGUOUS_MATRIX(nx, ny, ld)) memcpy(cur, matrix, nx*ny*elemsize); else { uint32_t y; for(y=0 ; yld; uint32_t nx = matrix_interface->nx; uint32_t ny = matrix_interface->ny; size_t elemsize = matrix_interface->elemsize; struct pack_matrix_header *header = ptr; #ifdef DYNAMIC_MATRICES STARPU_ASSERT(count >= sizeof(*header)); if (IS_CONTIGUOUS_MATRIX(nx, ny, ld)) { /* We can store whatever can fit */ STARPU_ASSERT_MSG(header->elemsize == elemsize, "Data element size %u needs to be same as the received data element size %u", (unsigned) elemsize, (unsigned) header->elemsize); STARPU_ASSERT_MSG(header->nx * header->ny * header->elemsize <= matrix_interface->allocsize, "Initial size of data %lu needs to be big enough for received data %ux%ux%u", (unsigned long) matrix_interface->allocsize, (unsigned) header->nx, (unsigned) header->ny, (unsigned) header->elemsize); /* Better keep it contiguous */ matrix_interface->ld = ld = header->nx; } else { STARPU_ASSERT_MSG(header->nx <= nx, "Initial nx %u of data needs to be big enough for received data nx %u\n", nx, header->nx); STARPU_ASSERT_MSG(header->ny <= ny, "Initial ny %u of data needs to be big enough for received data ny %u\n", ny, header->ny); } matrix_interface->nx = nx = header->nx; matrix_interface->ny = ny = header->ny; #endif char *cur = (char*) ptr + sizeof(*header); STARPU_ASSERT(count == sizeof(*header) + elemsize * nx * ny); char *matrix = (void *)matrix_interface->ptr; if (IS_CONTIGUOUS_MATRIX(nx, ny, ld)) memcpy(matrix, ptr, nx*ny*elemsize); else { uint32_t y; for(y=0 ; yid == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); #endif return matrix_interface->nx * matrix_interface->ny * matrix_interface->elemsize; } static size_t matrix_interface_get_alloc_size(starpu_data_handle_t handle) { struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); #endif STARPU_ASSERT_MSG(matrix_interface->allocsize != (size_t)-1, "The matrix allocation size needs to be defined"); return matrix_interface->allocsize; } /* offer an access to the data parameters */ uint32_t starpu_matrix_get_nx(starpu_data_handle_t handle) { struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); #endif return matrix_interface->nx; } uint32_t starpu_matrix_get_ny(starpu_data_handle_t handle) { struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); #endif return matrix_interface->ny; } uint32_t starpu_matrix_get_local_ld(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); #endif return matrix_interface->ld; } uintptr_t starpu_matrix_get_local_ptr(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); #endif return matrix_interface->ptr; } size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle) { struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); #endif return matrix_interface->elemsize; } size_t starpu_matrix_get_allocsize(starpu_data_handle_t handle) { struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(matrix_interface->id == STARPU_MATRIX_INTERFACE_ID, "Error. The given data is not a matrix."); #endif return matrix_interface->allocsize; } /* memory allocation/deallocation primitives for the matrix interface */ /* returns the size of the allocated area */ static starpu_ssize_t allocate_matrix_buffer_on_node(void *data_interface_, unsigned dst_node) { uintptr_t addr = 0, handle; struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) data_interface_; uint32_t ld = matrix_interface->nx; // by default starpu_ssize_t allocated_memory = matrix_interface->allocsize; handle = starpu_malloc_on_node(dst_node, allocated_memory); if (!handle) return -ENOMEM; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) addr = handle; /* update the data properly in consequence */ matrix_interface->ptr = addr; matrix_interface->dev_handle = handle; matrix_interface->offset = 0; matrix_interface->ld = ld; return allocated_memory; } static void free_matrix_buffer_on_node(void *data_interface, unsigned node) { struct starpu_matrix_interface *matrix_interface = (struct starpu_matrix_interface *) data_interface; starpu_free_on_node(node, matrix_interface->dev_handle, matrix_interface->allocsize); matrix_interface->ptr = 0; matrix_interface->dev_handle = 0; } static void cache_matrix_buffer_on_node(void *cached_interface, void *src_data_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) { struct starpu_matrix_interface *cached_matrix_interface = cached_interface; struct starpu_matrix_interface *src_matrix_interface = src_data_interface; cached_matrix_interface->ptr = src_matrix_interface->ptr; src_matrix_interface->ptr = 0; cached_matrix_interface->dev_handle = src_matrix_interface->dev_handle; src_matrix_interface->dev_handle = 0; cached_matrix_interface->allocsize = src_matrix_interface->allocsize; STARPU_ASSERT(src_matrix_interface->offset == 0); } static void reuse_matrix_buffer_on_node(void *dst_data_interface, const void *cached_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) { struct starpu_matrix_interface *dst_matrix_interface = dst_data_interface; const struct starpu_matrix_interface *cached_matrix_interface = cached_interface; dst_matrix_interface->ptr = cached_matrix_interface->ptr; dst_matrix_interface->dev_handle = cached_matrix_interface->dev_handle; dst_matrix_interface->offset = 0; dst_matrix_interface->ld = dst_matrix_interface->nx; // by default } static int map_matrix(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_matrix_interface *src_matrix = src_interface; struct starpu_matrix_interface *dst_matrix = dst_interface; int ret; uintptr_t mapped; /* map area ld*(ny-1)+nx */ mapped = starpu_interface_map(src_matrix->dev_handle, src_matrix->offset, src_node, dst_node, (src_matrix->ld*(src_matrix->ny-1)+src_matrix->nx)*src_matrix->elemsize, &ret); if (mapped) { dst_matrix->dev_handle = mapped; dst_matrix->offset = 0; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) dst_matrix->ptr = mapped; dst_matrix->ld = src_matrix->ld; return 0; } return ret; } static int unmap_matrix(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_matrix_interface *src_matrix = src_interface; struct starpu_matrix_interface *dst_matrix = dst_interface; int ret = starpu_interface_unmap(src_matrix->dev_handle, src_matrix->offset, src_node, dst_matrix->dev_handle, dst_node, (src_matrix->ld*(src_matrix->ny-1)+src_matrix->nx)*src_matrix->elemsize); dst_matrix->dev_handle = 0; return ret; } static int update_map_matrix(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_matrix_interface *src_matrix = src_interface; struct starpu_matrix_interface *dst_matrix = dst_interface; return starpu_interface_update_map(src_matrix->dev_handle, src_matrix->offset, src_node, dst_matrix->dev_handle, dst_matrix->offset, dst_node, (src_matrix->ld*(src_matrix->ny-1)+src_matrix->nx)*src_matrix->elemsize); } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_matrix_interface *src_matrix = (struct starpu_matrix_interface *) src_interface; struct starpu_matrix_interface *dst_matrix = (struct starpu_matrix_interface *) dst_interface; int ret = 0; uint32_t nx = dst_matrix->nx; uint32_t ny = dst_matrix->ny; size_t elemsize = dst_matrix->elemsize; uint32_t ld_src = src_matrix->ld; uint32_t ld_dst = dst_matrix->ld; if (starpu_interface_copy2d(src_matrix->dev_handle, src_matrix->offset, src_node, dst_matrix->dev_handle, dst_matrix->offset, dst_node, nx * elemsize, ny, ld_src * elemsize, ld_dst * elemsize, async_data)) ret = -EAGAIN; starpu_interface_data_copy(src_node, dst_node, (size_t)nx*ny*elemsize); return ret; } static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_matrix_interface *matrix = (struct starpu_matrix_interface *) data_interface; return snprintf(buf, size, "M%ux%ux%u", (unsigned) matrix->nx, (unsigned) matrix->ny, (unsigned) matrix->elemsize); } starpu-1.4.9+dfsg/src/datawizard/interfaces/multiformat_interface.c000066400000000000000000000554161507764646700256160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ #include #ifdef BUILDING_STARPU #include #endif /* Driver porters: adding your driver here is optional, only needed for the support of multiple formats. */ static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); #ifdef STARPU_USE_CUDA static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream); static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream); static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED); static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream); #endif #ifdef STARPU_USE_OPENCL static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node); static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event); static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event); #endif static const struct starpu_data_copy_methods multiformat_copy_data_methods_s = { .ram_to_ram = copy_ram_to_ram, #ifdef STARPU_USE_CUDA .ram_to_cuda = copy_ram_to_cuda, .cuda_to_ram = copy_cuda_to_ram, .ram_to_cuda_async = copy_ram_to_cuda_async, .cuda_to_ram_async = copy_cuda_to_ram_async, .cuda_to_cuda = copy_cuda_to_cuda, .cuda_to_cuda_async = copy_cuda_to_cuda_async, #else #ifdef STARPU_SIMGRID /* Enable GPU-GPU transfers in simgrid */ .cuda_to_cuda_async = (void *)1, #endif #endif #ifdef STARPU_USE_OPENCL .ram_to_opencl = copy_ram_to_opencl, .opencl_to_ram = copy_opencl_to_ram, .opencl_to_opencl = copy_opencl_to_opencl, .ram_to_opencl_async = copy_ram_to_opencl_async, .opencl_to_ram_async = copy_opencl_to_ram_async, #endif }; static void register_multiformat_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node); static void *multiformat_to_pointer(void *data_interface, unsigned node); static void free_multiformat_buffer_on_node(void *data_interface, unsigned node); static size_t multiformat_interface_get_size(starpu_data_handle_t handle); static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle); static int multiformat_compare(void *data_interface_a, void *data_interface_b); static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f); static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle); static struct starpu_multiformat_data_interface_ops* get_mf_ops(void *data_interface) { struct starpu_multiformat_interface *mf; mf = (struct starpu_multiformat_interface *) data_interface; return mf->ops; } struct starpu_data_interface_ops starpu_interface_multiformat_ops = { .register_data_handle = register_multiformat_handle, .allocate_data_on_node = allocate_multiformat_buffer_on_node, .to_pointer = multiformat_to_pointer, .free_data_on_node = free_multiformat_buffer_on_node, .copy_methods = &multiformat_copy_data_methods_s, .get_size = multiformat_interface_get_size, .footprint = footprint_multiformat_interface_crc32, .compare = multiformat_compare, .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID, .interface_size = sizeof(struct starpu_multiformat_interface), .display = display_multiformat_interface, .is_multiformat = 1, .get_mf_ops = get_mf_ops }; static void *multiformat_to_pointer(void *data_interface, unsigned node) { struct starpu_multiformat_interface *multiformat_interface = data_interface; switch(starpu_node_get_kind(node)) { case STARPU_CPU_RAM: return multiformat_interface->cpu_ptr; #ifdef STARPU_USE_CUDA case STARPU_CUDA_RAM: return multiformat_interface->cuda_ptr; #endif #ifdef STARPU_USE_OPENCL case STARPU_OPENCL_RAM: return multiformat_interface->opencl_ptr; #endif default: STARPU_ABORT(); } return NULL; } static void register_multiformat_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_multiformat_interface *multiformat_interface; multiformat_interface = (struct starpu_multiformat_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_multiformat_interface *local_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->cpu_ptr = multiformat_interface->cpu_ptr; #ifdef STARPU_USE_CUDA local_interface->cuda_ptr = multiformat_interface->cuda_ptr; #endif #ifdef STARPU_USE_OPENCL local_interface->opencl_ptr = multiformat_interface->opencl_ptr; #endif } else { local_interface->cpu_ptr = NULL; #ifdef STARPU_USE_CUDA local_interface->cuda_ptr = NULL; #endif #ifdef STARPU_USE_OPENCL local_interface->opencl_ptr = NULL; #endif } local_interface->id = multiformat_interface->id; local_interface->nx = multiformat_interface->nx; local_interface->ops = multiformat_interface->ops; } } void starpu_multiformat_data_register(starpu_data_handle_t *handleptr, int home_node, void *ptr, uint32_t nobjects, struct starpu_multiformat_data_interface_ops *format_ops) { struct starpu_multiformat_interface multiformat = { .id = STARPU_MULTIFORMAT_INTERFACE_ID, .cpu_ptr = ptr, .cuda_ptr = NULL, .opencl_ptr = NULL, .nx = nobjects, .ops = format_ops }; starpu_data_register(handleptr, home_node, &multiformat, &starpu_interface_multiformat_ops); } static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(starpu_multiformat_get_nx(handle), 0); } static int multiformat_compare(void *data_interface_a, void *data_interface_b) { struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a; struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b; return (multiformat_a->nx == multiformat_b->nx) && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize) #ifdef STARPU_USE_CUDA && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize) #endif #ifdef STARPU_USE_OPENCL && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize) #endif ; } static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f) { struct starpu_multiformat_interface *multiformat_interface; multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%u\t", multiformat_interface->nx); } /* XXX : returns CPU size */ static size_t multiformat_interface_get_size(starpu_data_handle_t handle) { size_t size; struct starpu_multiformat_interface *multiformat_interface; multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize; return size; } uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle) { struct starpu_multiformat_interface *multiformat_interface; multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return multiformat_interface->nx; } static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node) { struct starpu_multiformat_interface *multiformat_interface; multiformat_interface = (struct starpu_multiformat_interface *) data_interface_; uintptr_t addr = 0; starpu_ssize_t allocated_memory = 0; size_t size; size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize; allocated_memory += size; addr = starpu_malloc_on_node(dst_node, size); if (!addr) goto fail_cpu; multiformat_interface->cpu_ptr = (void *) addr; #ifdef STARPU_USE_CUDA size = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize; allocated_memory += size; addr = starpu_malloc_on_node(dst_node, size); if (!addr) goto fail_cuda; multiformat_interface->cuda_ptr = (void *) addr; #endif #ifdef STARPU_USE_OPENCL size = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize; allocated_memory += size; addr = starpu_malloc_on_node(dst_node, size); if (!addr) goto fail_opencl; multiformat_interface->opencl_ptr = (void *) addr; #endif return allocated_memory; #ifdef STARPU_USE_OPENCL starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->opencl_ptr, multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize); fail_opencl: #endif #ifdef STARPU_USE_CUDA starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize); fail_cuda: #endif starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cpu_ptr, multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize); fail_cpu: return -ENOMEM; } static void free_multiformat_buffer_on_node(void *data_interface, unsigned node) { struct starpu_multiformat_interface *multiformat_interface; multiformat_interface = (struct starpu_multiformat_interface *) data_interface; starpu_free_on_node(node, (uintptr_t) multiformat_interface->cpu_ptr, multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize); multiformat_interface->cpu_ptr = NULL; #ifdef STARPU_USE_CUDA starpu_free_on_node(node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize); multiformat_interface->cuda_ptr = NULL; #endif #ifdef STARPU_USE_OPENCL starpu_free_on_node(node, (uintptr_t) multiformat_interface->opencl_ptr, multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize); multiformat_interface->opencl_ptr = NULL; #endif } /* * Copy methods */ static int copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_multiformat_interface *src_multiformat; struct starpu_multiformat_interface *dst_multiformat; src_multiformat = (struct starpu_multiformat_interface *) src_interface; dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; STARPU_ASSERT(src_multiformat != NULL); STARPU_ASSERT(dst_multiformat != NULL); STARPU_ASSERT(dst_multiformat->ops != NULL); size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize; memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size); starpu_interface_data_copy(src_node, dst_node, size); return 0; } #ifdef STARPU_USE_CUDA static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, enum cudaMemcpyKind kind) { struct starpu_multiformat_interface *src_multiformat; struct starpu_multiformat_interface *dst_multiformat; src_multiformat = (struct starpu_multiformat_interface *) src_interface; dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; size_t size; cudaError_t status; switch (kind) { case cudaMemcpyHostToDevice: { size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; if (src_multiformat->cuda_ptr == NULL) { src_multiformat->cuda_ptr = malloc(size); if (src_multiformat->cuda_ptr == NULL) return -ENOMEM; } status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind); if (!status) status = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(status)) STARPU_CUDA_REPORT_ERROR(status); break; } case cudaMemcpyDeviceToHost: { size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind); if (!status) status = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(status)) STARPU_CUDA_REPORT_ERROR(status); break; } case cudaMemcpyDeviceToDevice: { size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind); if (!status) status = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(status)) STARPU_CUDA_REPORT_ERROR(status); break; } default: STARPU_ABORT(); } starpu_interface_data_copy(src_node, dst_node, size); return 0; } static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node) { return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice); } static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node) { return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost); } static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cudaStream_t stream, enum cudaMemcpyKind kind) { struct starpu_multiformat_interface *src_multiformat; struct starpu_multiformat_interface *dst_multiformat; src_multiformat = (struct starpu_multiformat_interface *) src_interface; dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; size_t size; cudaError_t status; double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); switch (kind) { case cudaMemcpyHostToDevice: { size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; if (src_multiformat->cuda_ptr == NULL) { src_multiformat->cuda_ptr = malloc(size); if (src_multiformat->cuda_ptr == NULL) return -ENOMEM; } status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream); if (STARPU_UNLIKELY(status)) { STARPU_CUDA_REPORT_ERROR(status); } break; } case cudaMemcpyDeviceToHost: { size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream); if (!status) status = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(status)) STARPU_CUDA_REPORT_ERROR(status); break; } case cudaMemcpyDeviceToDevice: { size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream); if (STARPU_UNLIKELY(status)) STARPU_CUDA_REPORT_ERROR(status); break; } default: STARPU_ABORT(); } starpu_interface_end_driver_copy_async(src_node, dst_node, start); return 0; } static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream) { return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice); } static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream) { return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost); } #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER static int copy_cuda_peer_common(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream) { struct starpu_multiformat_interface *src_multiformat; struct starpu_multiformat_interface *dst_multiformat; src_multiformat = (struct starpu_multiformat_interface *) src_interface; dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; STARPU_ASSERT(src_multiformat != NULL); STARPU_ASSERT(dst_multiformat != NULL); STARPU_ASSERT(src_multiformat->ops != NULL); cudaError_t status; int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize; int src_dev = starpu_memory_node_get_devid(src_node); int dst_dev = starpu_memory_node_get_devid(dst_node); if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev, src_multiformat->cuda_ptr, src_dev, size, stream); starpu_interface_end_driver_copy_async(src_node, dst_node, start); /* All good ! Still, returning -EAGAIN, because we will need to check the transfer completion later */ if (status == cudaSuccess) return -EAGAIN; } /* Either a synchronous transfer was requested, or the asynchronous one failed. */ status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev, src_multiformat->cuda_ptr, src_dev, size); if (!status) status = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(status != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(status); starpu_interface_data_copy(src_node, dst_node, size); return 0; } #endif static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) { if (src_node == dst_node) { return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice); } else { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER return copy_cuda_peer_common(src_interface, src_node, dst_interface, dst_node, NULL); #else STARPU_ABORT(); #endif } } static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream) { if (src_node == dst_node) { return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToDevice); } else { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER return copy_cuda_peer_common(src_interface, src_node, dst_interface, dst_node, stream); #else STARPU_ABORT(); #endif } } #endif /* STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) { int err, ret; size_t size; struct starpu_multiformat_interface *src_multiformat; struct starpu_multiformat_interface *dst_multiformat; src_multiformat = (struct starpu_multiformat_interface *) src_interface; dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; STARPU_ASSERT(src_multiformat != NULL); STARPU_ASSERT(dst_multiformat != NULL); STARPU_ASSERT(src_multiformat->ops != NULL); size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize; err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr, src_node, (cl_mem) dst_multiformat->cpu_ptr, dst_node, size, 0, event, &ret); if (STARPU_UNLIKELY(err)) STARPU_OPENCL_REPORT_ERROR(err); if (!event) starpu_interface_data_copy(src_node, dst_node, size); return ret; } static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event) { int err, ret; size_t size; struct starpu_multiformat_interface *src_multiformat; struct starpu_multiformat_interface *dst_multiformat; src_multiformat = (struct starpu_multiformat_interface *) src_interface; dst_multiformat = (struct starpu_multiformat_interface *) dst_interface; STARPU_ASSERT(src_multiformat != NULL); STARPU_ASSERT(dst_multiformat != NULL); STARPU_ASSERT(src_multiformat->ops != NULL); STARPU_ASSERT(dst_multiformat->ops != NULL); size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize; if (dst_multiformat->opencl_ptr == NULL) { /* XXX : it is weird that we might have to allocate memory here... */ dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize); STARPU_ASSERT_MSG(dst_multiformat->opencl_ptr != NULL || dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize == 0, "Cannot allocate %ld bytes\n", (long) (dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize)); } err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr, src_node, dst_multiformat->opencl_ptr, dst_node, size, 0, event, &ret); if (STARPU_UNLIKELY(err)) STARPU_OPENCL_REPORT_ERROR(err); if (!event) starpu_interface_data_copy(src_node, dst_node, size); return ret; } static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) { return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL); } static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) { return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL); } static int copy_opencl_to_opencl(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { (void) src_interface; (void) dst_interface; (void) src_node; (void) dst_node; STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)"); return 0; } #endif starpu-1.4.9+dfsg/src/datawizard/interfaces/ndim_filters.c000066400000000000000000000502511507764646700237020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include static void _interface_assignment_ndim_to_tensor(void *ndim_interface, void *child_interface); static void _interface_assignment_ndim_to_block(void *ndim_interface, void *child_interface); static void _interface_assignment_ndim_to_matrix(void *ndim_interface, void *child_interface); static void _interface_assignment_ndim_to_vector(void *ndim_interface, void *child_interface); static void _interface_assignment_ndim_to_variable(void *ndim_interface, void *child_interface); static void _interface_deallocate(void * ndim_interface); static void _starpu_ndim_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts, uintptr_t shadow_size) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; struct starpu_ndim_interface *ndim_child = (struct starpu_ndim_interface *) child_interface; STARPU_ASSERT_MSG(ndim_father->id == STARPU_NDIM_INTERFACE_ID, "%s can only be applied on a ndim array data", __func__); size_t ndim = ndim_father->ndim; STARPU_ASSERT_MSG(ndim > 0, "ndim %u must be greater than 0!\n", (unsigned) ndim); unsigned dim = 0; if (ndim > 1) dim = f->filter_arg; STARPU_ASSERT_MSG(dim < ndim, "dim %u must be less than %u!\n", dim, (unsigned) ndim); uint32_t father_nn = 0; uint32_t ni[ndim]; unsigned i; for (i=0; inn[i] - 2 * shadow_size; father_nn = ni[i]; } else { ni[i] = ndim_father->nn[i]; } } STARPU_ASSERT_MSG(nparts <= father_nn, "cannot split %u elements in %u parts", father_nn, nparts); unsigned blocksize = ndim_father->ldn[dim]; size_t elemsize = ndim_father->elemsize; uint32_t child_nn; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(father_nn, nparts, elemsize, id, blocksize, &child_nn, &offset); child_nn += 2 * shadow_size; ndim_child->id = ndim_father->id; _STARPU_MALLOC(ndim_child->nn, ndim*sizeof(uint32_t)); for (i=0; inn[i] = ni[i]; } else { ndim_child->nn[i] = child_nn; } } _STARPU_MALLOC(ndim_child->ldn, ndim*sizeof(uint32_t)); ndim_child->ndim = ndim; ndim_child->elemsize = elemsize; ndim_child->allocsize = elemsize; if (ndim_father->dev_handle) { if (ndim_father->ptr) ndim_child->ptr = ndim_father->ptr + offset; for (i=0; ildn[i] = ndim_father->ldn[i]; } if (ndim >= 1) ndim_child->allocsize *= ndim_child->ldn[ndim-1] * ndim_child->nn[ndim-1]; ndim_child->dev_handle = ndim_father->dev_handle; ndim_child->offset = ndim_father->offset + offset; } else { for (i=0; iallocsize *= ndim_child->nn[i]; } } void starpu_ndim_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_ndim_filter_block(father_interface, child_interface, f, id, nparts, 0); } void starpu_ndim_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_ndim_filter_block(father_interface, child_interface, f, id, nparts, shadow_size); } void starpu_ndim_filter_to_tensor(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 4, "can only be applied on a 4-dim array"); if (ndim_father->dev_handle) STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot transfer to a tensor if ldn[0] does not equal to 1"); struct starpu_ndim_interface ndim_child; memset(&ndim_child, 0, sizeof(ndim_child)); _starpu_ndim_filter_block(father_interface, &ndim_child, f, id, nparts, 0); _interface_assignment_ndim_to_tensor(&ndim_child, child_interface); _interface_deallocate(&ndim_child); } void starpu_ndim_filter_to_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 3, "can only be applied on a 3-dim array"); if (ndim_father->dev_handle) STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot transfer to a block if ldn[0] does not equal to 1"); struct starpu_ndim_interface ndim_child; memset(&ndim_child, 0, sizeof(ndim_child)); _starpu_ndim_filter_block(father_interface, &ndim_child, f, id, nparts, 0); _interface_assignment_ndim_to_block(&ndim_child, child_interface); _interface_deallocate(&ndim_child); } void starpu_ndim_filter_to_matrix(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 2, "can only be applied on a 2-dim array"); if (ndim_father->dev_handle) STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot transfer to a matrix if ldn[0] does not equal to 1"); struct starpu_ndim_interface ndim_child; memset(&ndim_child, 0, sizeof(ndim_child)); _starpu_ndim_filter_block(father_interface, &ndim_child, f, id, nparts, 0); _interface_assignment_ndim_to_matrix(&ndim_child, child_interface); _interface_deallocate(&ndim_child); } void starpu_ndim_filter_to_vector(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 1, "can only be applied on a 1-dim array"); if (ndim_father->dev_handle) STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot transfer to a vector if ldn[0] does not equal to 1"); struct starpu_ndim_interface ndim_child; memset(&ndim_child, 0, sizeof(ndim_child)); _starpu_ndim_filter_block(father_interface, &ndim_child, f, id, nparts, 0); _interface_assignment_ndim_to_vector(&ndim_child, child_interface); _interface_deallocate(&ndim_child); } void starpu_ndim_filter_to_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 0, "can only be applied on a 0-dim array (a variable)"); STARPU_ASSERT_MSG(id == 0 && nparts == 1, "cannot split a variable"); _interface_assignment_ndim_to_variable(father_interface, child_interface); } void starpu_ndim_filter_pick_ndim(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; struct starpu_ndim_interface *ndim_child = (struct starpu_ndim_interface *) child_interface; STARPU_ASSERT_MSG(ndim_father->id == STARPU_NDIM_INTERFACE_ID, "%s can only be applied on a ndim array data", __func__); ndim_child->id = STARPU_NDIM_INTERFACE_ID; size_t ndim = ndim_father->ndim; STARPU_ASSERT_MSG(ndim > 0, "ndim %u must be greater than 0!\n", (unsigned) ndim); unsigned dim = 0; if (ndim > 1) dim = f->filter_arg; STARPU_ASSERT_MSG(dim < ndim, "dim %u must be less than %u!\n", dim, (unsigned) ndim); uint32_t father_nn = 0; uint32_t ni[ndim]; unsigned i; for (i=0; inn[i]; if(i==dim) father_nn = ni[i]; } STARPU_ASSERT_MSG(nparts <= father_nn, "cannot split %u elements in %u parts", father_nn, nparts); unsigned blocksize = ndim_father->ldn[dim]; size_t elemsize = ndim_father->elemsize; size_t chunk_pos = (size_t)f->filter_arg_ptr; STARPU_ASSERT_MSG((chunk_pos + id) < father_nn, "the chosen sub (n-1)dim array should be in the ndim array"); size_t offset = (chunk_pos + id) * blocksize * elemsize; int j; _STARPU_MALLOC(ndim_child->nn, (ndim-1)*sizeof(uint32_t)); if (ndim > 1) { j = 0; for (i=0; inn[j] = ni[i]; j++; } } } _STARPU_MALLOC(ndim_child->ldn, (ndim-1)*sizeof(uint32_t)); ndim_child->ndim = ndim-1; ndim_child->elemsize = elemsize; ndim_child->allocsize = elemsize; if (ndim_father->dev_handle) { if (ndim_father->ptr) ndim_child->ptr = ndim_father->ptr + offset; if (ndim > 1) { j = 0; for (i=0; ildn[j] = ndim_father->ldn[i]; j++; } } ndim_child->allocsize *= ndim_child->ldn[ndim-2] * ndim_child->nn[ndim-2]; } ndim_child->dev_handle = ndim_father->dev_handle; ndim_child->offset = ndim_father->offset + offset; } else { for (i=0; iallocsize *= ndim_child->nn[i]; } } void starpu_ndim_filter_5d_pick_tensor(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 5, "can only be applied on a 5-dim array"); if (ndim_father->dev_handle) STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a tensor if ldn[0] does not equal to 1"); struct starpu_ndim_interface ndim_child; memset(&ndim_child, 0, sizeof(ndim_child)); starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); _interface_assignment_ndim_to_tensor(&ndim_child, child_interface); _interface_deallocate(&ndim_child); } void starpu_ndim_filter_4d_pick_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 4, "can only be applied on a 4-dim array"); if (ndim_father->dev_handle) STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a block if ldn[0] does not equal to 1"); struct starpu_ndim_interface ndim_child; memset(&ndim_child, 0, sizeof(ndim_child)); starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); _interface_assignment_ndim_to_block(&ndim_child, child_interface); _interface_deallocate(&ndim_child); } void starpu_ndim_filter_3d_pick_matrix(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 3, "can only be applied on a 3-dim array"); if (ndim_father->dev_handle) STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a matrix if ldn[0] does not equal to 1"); struct starpu_ndim_interface ndim_child; memset(&ndim_child, 0, sizeof(ndim_child)); starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); _interface_assignment_ndim_to_matrix(&ndim_child, child_interface); _interface_deallocate(&ndim_child); } void starpu_ndim_filter_2d_pick_vector(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 2, "can only be applied on a 2-dim array"); if (ndim_father->dev_handle) STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a vector if ldn[0] does not equal to 1"); struct starpu_ndim_interface ndim_child; memset(&ndim_child, 0, sizeof(ndim_child)); starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); _interface_assignment_ndim_to_vector(&ndim_child, child_interface); _interface_deallocate(&ndim_child); } void starpu_ndim_filter_1d_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; STARPU_ASSERT_MSG(ndim_father->ndim == 1, "can only be applied on a 1-dim array"); if (ndim_father->dev_handle) STARPU_ASSERT_MSG(ndim_father->ldn[0]==1, "cannot pick a variable if ldn[0] does not equal to 1"); struct starpu_ndim_interface ndim_child; memset(&ndim_child, 0, sizeof(ndim_child)); starpu_ndim_filter_pick_ndim(father_interface, &ndim_child, f, id, nparts); _interface_assignment_ndim_to_variable(&ndim_child, child_interface); _interface_deallocate(&ndim_child); } static void _interface_deallocate(void *ndim_interface) { struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; free(ndarr->nn); free(ndarr->ldn); } struct starpu_data_interface_ops *starpu_ndim_filter_pick_tensor_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_tensor_ops; } struct starpu_data_interface_ops *starpu_ndim_filter_pick_block_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_block_ops; } struct starpu_data_interface_ops *starpu_ndim_filter_pick_matrix_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_matrix_ops; } struct starpu_data_interface_ops *starpu_ndim_filter_pick_vector_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_vector_ops; } struct starpu_data_interface_ops *starpu_ndim_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_variable_ops; } struct starpu_data_interface_ops *starpu_ndim_filter_to_tensor_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_tensor_ops; } struct starpu_data_interface_ops *starpu_ndim_filter_to_block_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_block_ops; } struct starpu_data_interface_ops *starpu_ndim_filter_to_matrix_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_matrix_ops; } struct starpu_data_interface_ops *starpu_ndim_filter_to_vector_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_vector_ops; } struct starpu_data_interface_ops *starpu_ndim_filter_to_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_variable_ops; } static void _interface_assignment_ndim_to_tensor(void *ndim_interface, void *child_interface) { struct starpu_tensor_interface *tensor = (struct starpu_tensor_interface *) child_interface; struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; tensor->id = STARPU_TENSOR_INTERFACE_ID; tensor->nx = ndarr->nn[0]; tensor->ny = ndarr->nn[1]; tensor->nz = ndarr->nn[2]; tensor->nt = ndarr->nn[3]; tensor->elemsize = ndarr->elemsize; tensor->ptr = ndarr->ptr; tensor->ldy = ndarr->ldn[1]; tensor->ldz = ndarr->ldn[2]; tensor->ldt = ndarr->ldn[3]; tensor->dev_handle = ndarr->dev_handle; tensor->offset = ndarr->offset; } static void _interface_assignment_ndim_to_block(void *ndim_interface, void *child_interface) { struct starpu_block_interface *block = (struct starpu_block_interface *) child_interface; struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; block->id = STARPU_BLOCK_INTERFACE_ID; block->nx = ndarr->nn[0]; block->ny = ndarr->nn[1]; block->nz = ndarr->nn[2]; block->elemsize = ndarr->elemsize; block->ptr = ndarr->ptr; block->ldy = ndarr->ldn[1]; block->ldz = ndarr->ldn[2]; block->dev_handle = ndarr->dev_handle; block->offset = ndarr->offset; } static void _interface_assignment_ndim_to_matrix(void *ndim_interface, void *child_interface) { struct starpu_matrix_interface *matrix = (struct starpu_matrix_interface *) child_interface; struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; matrix->id = STARPU_MATRIX_INTERFACE_ID; matrix->nx = ndarr->nn[0]; matrix->ny = ndarr->nn[1]; matrix->elemsize = ndarr->elemsize; matrix->ptr = ndarr->ptr; matrix->ld = ndarr->ldn[1]; if (matrix->ptr) matrix->allocsize = matrix->ld * matrix->ny * matrix->elemsize; else matrix->allocsize = matrix->nx * matrix->ny * matrix->elemsize; matrix->dev_handle = ndarr->dev_handle; matrix->offset = ndarr->offset; } static void _interface_assignment_ndim_to_vector(void *ndim_interface, void *child_interface) { struct starpu_vector_interface *vector = (struct starpu_vector_interface *) child_interface; struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; vector->id = STARPU_VECTOR_INTERFACE_ID; vector->nx = ndarr->nn[0]; vector->elemsize = ndarr->elemsize; vector->allocsize = vector->nx * vector->elemsize; vector->ptr = ndarr->ptr; vector->dev_handle = ndarr->dev_handle; vector->offset = ndarr->offset; } static void _interface_assignment_ndim_to_variable(void *ndim_interface, void *child_interface) { struct starpu_variable_interface *variable = (struct starpu_variable_interface *) child_interface; struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) ndim_interface; variable->id = STARPU_VARIABLE_INTERFACE_ID; variable->elemsize = ndarr->elemsize; variable->ptr = ndarr->ptr; variable->dev_handle = ndarr->dev_handle; variable->offset = ndarr->offset; } void starpu_ndim_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) { struct starpu_ndim_interface *ndim_father = (struct starpu_ndim_interface *) father_interface; struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; STARPU_ASSERT_MSG(ndim_father->id == STARPU_NDIM_INTERFACE_ID, "%s can only be applied on a ndim array data", __func__); size_t ndim = ndim_father->ndim; STARPU_ASSERT_MSG(ndim > 0, "ndim %u must be greater than 0!\n", (unsigned) ndim); uint32_t nn[ndim]; unsigned ldn[ndim]; unsigned i; for (i=0; inn[i]; ldn[i] = ndim_father->ldn[i]; } size_t elemsize = ndim_father->elemsize; uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; int b = 1; size_t offset = 0; for (i = 0; i < ndim; i++) { if(chunk_pos[i] >= nn[i]) { b = 0; break; } offset += chunk_pos[i]*ldn[i]*elemsize; } STARPU_ASSERT_MSG(b == 1, "the chosen variable should be in the ndim array"); /* update the child's interface */ variable_child->id = STARPU_VARIABLE_INTERFACE_ID; variable_child->elemsize = elemsize; /* is the information on this node valid ? */ if (ndim_father->dev_handle) { if (ndim_father->ptr) variable_child->ptr = ndim_father->ptr + offset; variable_child->dev_handle = ndim_father->dev_handle; variable_child->offset = ndim_father->offset + offset; } } starpu-1.4.9+dfsg/src/datawizard/interfaces/ndim_interface.c000066400000000000000000000602341507764646700241740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif #include static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); static int map_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int unmap_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int update_map_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static size_t _get_size(uint32_t* nn, size_t ndim, size_t elemsize); static const struct starpu_data_copy_methods ndim_copy_data_methods_s = { .any_to_any = copy_any_to_any, }; static void register_ndim_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static void unregister_ndim_handle(starpu_data_handle_t handle); static void *ndim_to_pointer(void *data_interface, unsigned node); static starpu_ssize_t allocate_ndim_buffer_on_node(void *data_interface_, unsigned dst_node); static void free_ndim_buffer_on_node(void *data_interface, unsigned node); static void cache_ndim_buffer_on_node(void *cached_interface, void *src_data_interface, unsigned node); static void reuse_ndim_buffer_on_node(void *dst_data_interface, const void *cached_interface, unsigned node); static size_t ndim_interface_get_size(starpu_data_handle_t handle); static uint32_t footprint_ndim_interface_crc32(starpu_data_handle_t handle); static int ndim_compare(void *data_interface_a, void *data_interface_b); static int ndim_alloc_compare(void *data_interface_a, void *data_interface_b); static void display_ndim_interface(starpu_data_handle_t handle, FILE *f); static int pack_ndim_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_ndim_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_ndim_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); static int pack_meta_ndim_handle(void *data_interface, void **ptr, starpu_ssize_t *count); static int unpack_meta_ndim_handle(void **data_interface, void *ptr, starpu_ssize_t *count); static int free_meta_ndim_handle(void *data_interface); struct starpu_data_interface_ops starpu_interface_ndim_ops = { .register_data_handle = register_ndim_handle, .unregister_data_handle = unregister_ndim_handle, .allocate_data_on_node = allocate_ndim_buffer_on_node, .to_pointer = ndim_to_pointer, .free_data_on_node = free_ndim_buffer_on_node, .cache_data_on_node = cache_ndim_buffer_on_node, .reuse_data_on_node = reuse_ndim_buffer_on_node, .map_data = map_ndim, .unmap_data = unmap_ndim, .update_map = update_map_ndim, .copy_methods = &ndim_copy_data_methods_s, .get_size = ndim_interface_get_size, .footprint = footprint_ndim_interface_crc32, .compare = ndim_compare, .alloc_compare = ndim_alloc_compare, .interfaceid = STARPU_NDIM_INTERFACE_ID, .interface_size = sizeof(struct starpu_ndim_interface), .display = display_ndim_interface, .pack_data = pack_ndim_handle, .peek_data = peek_ndim_handle, .unpack_data = unpack_ndim_handle, .pack_meta = pack_meta_ndim_handle, .unpack_meta = unpack_meta_ndim_handle, .free_meta = free_meta_ndim_handle, .describe = describe, .name = "STARPU_NDIM_INTERFACE", .dontcache = 0 }; static void *ndim_to_pointer(void *data_interface, unsigned node) { (void) node; struct starpu_ndim_interface *ndim_interface = data_interface; return (void*) ndim_interface->ptr; } static void register_ndim_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) data_interface; size_t ndim = ndim_interface->ndim; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_ndim_interface *local_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = ndim_interface->ptr; local_interface->dev_handle = ndim_interface->dev_handle; local_interface->offset = ndim_interface->offset; uint32_t* ldn_org = ndim_interface->ldn; uint32_t* ldn_cpy; _STARPU_MALLOC(ldn_cpy, ndim*sizeof(uint32_t)); if (ndim) memcpy(ldn_cpy, ldn_org, ndim*sizeof(uint32_t)); local_interface->ldn = ldn_cpy; } else { local_interface->ptr = 0; local_interface->dev_handle = 0; local_interface->offset = 0; uint32_t* ldn_zero; _STARPU_CALLOC(ldn_zero, ndim, sizeof(uint32_t)); local_interface->ldn = ldn_zero; } local_interface->id = ndim_interface->id; uint32_t* nn_org = ndim_interface->nn; uint32_t* nn_cpy; _STARPU_MALLOC(nn_cpy, ndim*sizeof(uint32_t)); if (ndim) memcpy(nn_cpy, nn_org, ndim*sizeof(uint32_t)); local_interface->nn = nn_cpy; local_interface->ndim = ndim_interface->ndim; local_interface->elemsize = ndim_interface->elemsize; local_interface->allocsize = ndim_interface->allocsize; } } static void unregister_ndim_handle(starpu_data_handle_t handle) { unsigned home_node = starpu_data_get_home_node(handle); unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_ndim_interface *local_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = 0; local_interface->dev_handle = 0; } else { STARPU_ASSERT(local_interface->ptr == 0); STARPU_ASSERT(local_interface->dev_handle == 0); } free(local_interface->nn); local_interface->nn = NULL; free(local_interface->ldn); local_interface->ldn = NULL; } } /* declare a new data with the BLAS interface */ void starpu_ndim_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t* ldn, uint32_t* nn, size_t ndim, size_t elemsize) { unsigned i; size_t allocsize = _get_size(nn, ndim, elemsize); for (i=1; i= nn[i-1], "ldn[%u]/ldn[%u] = %u/%u = %u should not be less than nn[%u] = %u.", i, i-1, ldn[i], ldn[i-1], ldn[i]/ldn[i-1], i-1, nn[i-1]); } struct starpu_ndim_interface ndim_interface = { .id = STARPU_NDIM_INTERFACE_ID, .ptr = ptr, .dev_handle = ptr, .offset = 0, .ldn = ldn, .nn = nn, .ndim = ndim, .elemsize = elemsize, .allocsize = allocsize, }; #ifndef STARPU_SIMGRID if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { uint32_t nn0 = ndim?nn[0]:1; int b = 1; size_t buffersize = 0; for (i = 1; i < ndim; i++) { if (nn[i]) { buffersize += (nn[i]-1)*ldn[i]*elemsize; } else { b = 0; break; } } buffersize += nn0*elemsize; if (b && elemsize) { STARPU_ASSERT_ACCESSIBLE(ptr); STARPU_ASSERT_ACCESSIBLE(ptr + buffersize - 1); } } #endif starpu_data_register(handleptr, home_node, &ndim_interface, &starpu_interface_ndim_ops); } void starpu_ndim_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t* ldn) { struct starpu_ndim_interface *ndim_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); ndim_interface->ptr = ptr; ndim_interface->dev_handle = dev_handle; ndim_interface->offset = offset; if (ndim_interface->ndim) memcpy(ndim_interface->ldn, ldn, ndim_interface->ndim*sizeof(uint32_t)); } static uint32_t footprint_ndim_interface_crc32(starpu_data_handle_t handle) { uint32_t hash; hash = starpu_hash_crc32c_be(starpu_ndim_get_elemsize(handle), 0); unsigned i; for (i=0; indim != ndim_b->ndim) return 0; if (ndim_a->elemsize != ndim_b->elemsize) return 0; unsigned i; /* Two matrices are considered compatible if they have the same size */ for (i=0; indim; i++) { if (ndim_a->nn[i] != ndim_b->nn[i]) return 0; } return 1; } static int ndim_alloc_compare(void *data_interface_a, void *data_interface_b) { struct starpu_ndim_interface *ndim_a = (struct starpu_ndim_interface *) data_interface_a; struct starpu_ndim_interface *ndim_b = (struct starpu_ndim_interface *) data_interface_b; return ndim_a->allocsize == ndim_b->allocsize; } static void display_ndim_interface(starpu_data_handle_t handle, FILE *f) { struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); unsigned i; for (i=0; indim; i++) fprintf(f, "%u\t", ndim_interface->nn[i]); if (ndim_interface->ndim == 0) { fprintf(f, "%lu\t", (unsigned long)ndim_interface->elemsize); } } static int _is_contiguous_ndim(uint32_t* nn, uint32_t* ldn, size_t ndim) { if (ndim == 0) return 1; unsigned i; uint32_t ldi = 1; for (i = 0; ildn; uint32_t* nn = ndim_interface->nn; size_t ndim = ndim_interface->ndim; size_t elemsize = ndim_interface->elemsize; *count = _get_size(nn, ndim, elemsize); if (ptr != NULL) { char *ndptr = (void *)ndim_interface->ptr; *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); char *cur = *ptr; _pack_cpy_ndim_ptr(cur, ndptr, nn, ldn, ndim, elemsize); } return 0; } static int peek_ndim_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); uint32_t* ldn = ndim_interface->ldn; uint32_t* nn = ndim_interface->nn; size_t ndim = ndim_interface->ndim; size_t elemsize = ndim_interface->elemsize; STARPU_ASSERT(count == _get_size(nn, ndim, elemsize)); char *cur = ptr; char *ndptr = (void *)ndim_interface->ptr; _peek_cpy_ndim_ptr(ndptr, cur, nn, ldn, ndim, elemsize); return 0; } static int unpack_ndim_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { peek_ndim_handle(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); return 0; } static size_t ndim_interface_get_size(starpu_data_handle_t handle) { struct starpu_ndim_interface *ndim_interface; ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); #endif return _get_size(ndim_interface->nn, ndim_interface->ndim, ndim_interface->elemsize); } /* offer an access to the data parameters */ uint32_t* starpu_ndim_get_nn(starpu_data_handle_t handle) { struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); #endif return ndim_interface->nn; } uint32_t starpu_ndim_get_ni(starpu_data_handle_t handle, size_t i) { struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); STARPU_ASSERT_MSG(ndim_interface->ndim > 0, "The function can only be called when array dimension is greater than 0."); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); #endif return ndim_interface->nn[i]; } uint32_t* starpu_ndim_get_local_ldn(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); #endif return ndim_interface->ldn; } uint32_t starpu_ndim_get_local_ldi(starpu_data_handle_t handle, size_t i) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT_MSG(ndim_interface->ndim > 0, "The function can only be called when array dimension is greater than 0."); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); #endif return ndim_interface->ldn[i]; } uintptr_t starpu_ndim_get_local_ptr(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); #endif return ndim_interface->ptr; } size_t starpu_ndim_get_ndim(starpu_data_handle_t handle) { struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); #endif return ndim_interface->ndim; } size_t starpu_ndim_get_elemsize(starpu_data_handle_t handle) { struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(ndim_interface->id == STARPU_NDIM_INTERFACE_ID, "Error. The given data is not a ndim array."); #endif return ndim_interface->elemsize; } /* memory allocation/deallocation primitives for the NDIM interface */ /* For a newly-allocated interface, the ld values are trivial */ static void set_trivial_ndim_ld(struct starpu_ndim_interface *dst_ndarr) { size_t ndim = dst_ndarr->ndim; uint32_t* nn = dst_ndarr->nn; if (ndim > 0) { uint32_t ntmp = 1; dst_ndarr->ldn[0] = 1; size_t i; for (i=1; ildn[i] = ntmp; } } } /* returns the size of the allocated area */ static starpu_ssize_t allocate_ndim_buffer_on_node(void *data_interface_, unsigned dst_node) { uintptr_t addr = 0, handle; struct starpu_ndim_interface *dst_ndarr = (struct starpu_ndim_interface *) data_interface_; size_t arrsize = dst_ndarr->allocsize; handle = starpu_malloc_on_node(dst_node, arrsize); if (!handle) return -ENOMEM; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) addr = handle; /* update the data properly in consequence */ dst_ndarr->ptr = addr; dst_ndarr->dev_handle = handle; dst_ndarr->offset = 0; set_trivial_ndim_ld(dst_ndarr); return arrsize; } static void free_ndim_buffer_on_node(void *data_interface, unsigned node) { struct starpu_ndim_interface *ndim_interface = (struct starpu_ndim_interface *) data_interface; starpu_free_on_node(node, ndim_interface->dev_handle, ndim_interface->allocsize); ndim_interface->ptr = 0; ndim_interface->dev_handle = 0; } static void cache_ndim_buffer_on_node(void *cached_interface, void *src_data_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) { struct starpu_ndim_interface *cached_ndarr = (struct starpu_ndim_interface *) cached_interface; struct starpu_ndim_interface *src_ndarr = (struct starpu_ndim_interface *) src_data_interface; cached_ndarr->ptr = src_ndarr->ptr; src_ndarr->ptr = 0; cached_ndarr->dev_handle = src_ndarr->dev_handle; src_ndarr->dev_handle = 0; cached_ndarr->allocsize = src_ndarr->allocsize; STARPU_ASSERT(src_ndarr->offset == 0); } static void reuse_ndim_buffer_on_node(void *dst_data_interface, const void *cached_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) { struct starpu_ndim_interface *dst_ndarr = (struct starpu_ndim_interface *) dst_data_interface; const struct starpu_ndim_interface *cached_ndarr = (const struct starpu_ndim_interface *) cached_interface; dst_ndarr->ptr = cached_ndarr->ptr; dst_ndarr->dev_handle = cached_ndarr->dev_handle; dst_ndarr->offset = 0; set_trivial_ndim_ld(dst_ndarr); } static size_t _get_mapsize(uint32_t* nn, uint32_t* ldn, size_t ndim, size_t elemsize) { uint32_t nn0 = ndim?nn[0]:1; size_t buffersize = 0; unsigned i; for (i = 1; i < ndim; i++) { buffersize += ldn[i]*(nn[i]-1)*elemsize; } buffersize += nn0*elemsize; return buffersize; } static int map_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_ndim_interface *src_ndarr = src_interface; struct starpu_ndim_interface *dst_ndarr = dst_interface; int ret; uintptr_t mapped; size_t ndim = src_ndarr->ndim; /* map area ldn[ndim-1]*(nn[ndim-1]-1) + ldn[ndim-2]*(nn[ndim-2]-1) + ... + ldn[1]*(nn[1]-1) + nn0*/ mapped = starpu_interface_map(src_ndarr->dev_handle, src_ndarr->offset, src_node, dst_node, _get_mapsize(src_ndarr->nn, src_ndarr->ldn, ndim, src_ndarr->elemsize), &ret); if (mapped) { dst_ndarr->dev_handle = mapped; dst_ndarr->offset = 0; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) dst_ndarr->ptr = mapped; size_t i; for (i=0; ildn[i] = src_ndarr->ldn[i]; } return 0; } return ret; } static int unmap_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_ndim_interface *src_ndarr = src_interface; struct starpu_ndim_interface *dst_ndarr = dst_interface; size_t ndim = src_ndarr->ndim; int ret = starpu_interface_unmap(src_ndarr->dev_handle, src_ndarr->offset, src_node, dst_ndarr->dev_handle, dst_node, _get_mapsize(src_ndarr->nn, src_ndarr->ldn, ndim, src_ndarr->elemsize)); dst_ndarr->dev_handle = 0; return ret; } static int update_map_ndim(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_ndim_interface *src_ndarr = src_interface; struct starpu_ndim_interface *dst_ndarr = dst_interface; size_t ndim = src_ndarr->ndim; return starpu_interface_update_map(src_ndarr->dev_handle, src_ndarr->offset, src_node, dst_ndarr->dev_handle, dst_ndarr->offset, dst_node, _get_mapsize(src_ndarr->nn, src_ndarr->ldn, ndim, src_ndarr->elemsize)); } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_ndim_interface *src_ndarr = (struct starpu_ndim_interface *) src_interface; struct starpu_ndim_interface *dst_ndarr = (struct starpu_ndim_interface *) dst_interface; int ret = 0; uint32_t* nn = dst_ndarr->nn; size_t ndim = dst_ndarr->ndim; size_t elemsize = dst_ndarr->elemsize; uint32_t* ldn_src = src_ndarr->ldn; uint32_t* ldn_dst = dst_ndarr->ldn; if (starpu_interface_copynd(src_ndarr->dev_handle, src_ndarr->offset, src_node, dst_ndarr->dev_handle, dst_ndarr->offset, dst_node, elemsize, ndim, nn, ldn_src, ldn_dst, async_data)) ret = -EAGAIN; starpu_interface_data_copy(src_node, dst_node, _get_size(nn, ndim, elemsize)); return ret; } static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) data_interface; size_t ndim = ndarr->ndim; int n = 0; size_t ret; unsigned i; for (i=0; ielemsize:ndarr->nn[i])); n += ret; if(size > ret) size -= ret; else size = 0; } return n; } static starpu_ssize_t size_meta_ndim_handle(struct starpu_ndim_interface *ndarr) { starpu_ssize_t count; count = sizeof(ndarr->ndim) + sizeof(ndarr->offset) + sizeof(ndarr->allocsize) + sizeof(ndarr->elemsize); count += ndarr->ndim * (sizeof(ndarr->ldn[0]) + sizeof(ndarr->nn[0])) + sizeof(ndarr->ptr) + sizeof(ndarr->dev_handle); return count; } #define _pack(dst, src) do { memcpy(dst, &src, sizeof(src)); dst += sizeof(src); } while (0) static int pack_meta_ndim_handle(void *data_interface, void **ptr, starpu_ssize_t *count) { struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) data_interface; *count = size_meta_ndim_handle(ndarr); _STARPU_CALLOC(*ptr, *count, 1); char *cur = *ptr; _pack(cur, ndarr->ndim); _pack(cur, ndarr->offset); _pack(cur, ndarr->allocsize); _pack(cur, ndarr->elemsize); _pack(cur, ndarr->ptr); _pack(cur, ndarr->dev_handle); memcpy(cur, ndarr->ldn, ndarr->ndim*sizeof(ndarr->ldn[0])); cur += ndarr->ndim*sizeof(ndarr->ldn[0]); memcpy(cur, ndarr->nn, ndarr->ndim*sizeof(ndarr->nn[0])); return 0; } #define _unpack(dst, src) do { memcpy(&dst, src, sizeof(dst)); src += sizeof(dst); } while(0) static int unpack_meta_ndim_handle(void **data_interface, void *ptr, starpu_ssize_t *count) { _STARPU_CALLOC(*data_interface, 1, sizeof(struct starpu_ndim_interface)); struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *)(*data_interface); char *cur = ptr; ndarr->id = STARPU_NDIM_INTERFACE_ID; _unpack(ndarr->ndim, cur); _unpack(ndarr->offset, cur); _unpack(ndarr->allocsize, cur); _unpack(ndarr->elemsize, cur); _unpack(ndarr->ptr, cur); _unpack(ndarr->dev_handle, cur); _STARPU_MALLOC(ndarr->ldn, ndarr->ndim*sizeof(ndarr->ldn[0])); memcpy(ndarr->ldn, cur, ndarr->ndim*sizeof(ndarr->ldn[0])); cur += ndarr->ndim*sizeof(ndarr->ldn[0]); _STARPU_MALLOC(ndarr->nn, ndarr->ndim*sizeof(ndarr->nn[0])); memcpy(ndarr->nn, cur, ndarr->ndim*sizeof(ndarr->nn[0])); *count = size_meta_ndim_handle(ndarr); return 0; } static int free_meta_ndim_handle(void *data_interface) { struct starpu_ndim_interface *ndarr = (struct starpu_ndim_interface *) data_interface; free(ndarr->ldn); ndarr->ldn = NULL; free(ndarr->nn); ndarr->nn = NULL; return 0; } starpu-1.4.9+dfsg/src/datawizard/interfaces/tensor_filters.c000066400000000000000000000270021507764646700242630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include static void _starpu_tensor_filter_block(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts, uintptr_t shadow_size) { struct starpu_tensor_interface *tensor_father = (struct starpu_tensor_interface *) father_interface; struct starpu_tensor_interface *tensor_child = (struct starpu_tensor_interface *) child_interface; unsigned blocksize; /* the element will be split, in case horizontal, it's nx, in case vertical, it's ny, in case depth, it's nz, in case time, it's nt*/ uint32_t nn; uint32_t nx; uint32_t ny; uint32_t nz; uint32_t nt; switch(dim) { case 1: /* horizontal*/ /* actual number of elements */ nx = tensor_father->nx - 2 * shadow_size; ny = tensor_father->ny; nz = tensor_father->nz; nt = tensor_father->nt; nn = nx; blocksize = 1; break; case 2: /* vertical*/ nx = tensor_father->nx; /* actual number of elements */ ny = tensor_father->ny - 2 * shadow_size; nz = tensor_father->nz; nt = tensor_father->nt; nn = ny; blocksize = tensor_father->ldy; break; case 3: /* depth*/ nx = tensor_father->nx; ny = tensor_father->ny; /* actual number of elements */ nz = tensor_father->nz - 2 * shadow_size; nt = tensor_father->nt; nn = nz; blocksize = tensor_father->ldz; break; case 4: /* time*/ nx = tensor_father->nx; ny = tensor_father->ny; nz = tensor_father->nz; /* actual number of elements */ nt = tensor_father->nt - 2 * shadow_size; nn = nt; blocksize = tensor_father->ldt; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } size_t elemsize = tensor_father->elemsize; STARPU_ASSERT_MSG(nparts <= nn, "cannot split %u elements in %u parts", nn, nparts); uint32_t child_nn; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(nn, nparts, elemsize, id, blocksize, &child_nn, &offset); child_nn += 2 * shadow_size; STARPU_ASSERT_MSG(tensor_father->id == STARPU_TENSOR_INTERFACE_ID, "%s can only be applied on a tensor data", __func__); tensor_child->id = tensor_father->id; switch(dim) { case 1: tensor_child->nx = child_nn; tensor_child->ny = ny; tensor_child->nz = nz; tensor_child->nt = nt; break; case 2: tensor_child->nx = nx; tensor_child->ny = child_nn; tensor_child->nz = nz; tensor_child->nt = nt; break; case 3: tensor_child->nx = nx; tensor_child->ny = ny; tensor_child->nz = child_nn; tensor_child->nt = nt; break; case 4: tensor_child->nx = nx; tensor_child->ny = ny; tensor_child->nz = nz; tensor_child->nt = child_nn; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } tensor_child->elemsize = elemsize; if (tensor_father->dev_handle) { if (tensor_father->ptr) tensor_child->ptr = tensor_father->ptr + offset; tensor_child->ldy = tensor_father->ldy; tensor_child->ldz = tensor_father->ldz; tensor_child->ldt = tensor_father->ldt; tensor_child->dev_handle = tensor_father->dev_handle; tensor_child->offset = tensor_father->offset + offset; } } void starpu_tensor_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_tensor_filter_block(1, father_interface, child_interface, f, id, nparts, 0); } void starpu_tensor_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_tensor_filter_block(1, father_interface, child_interface, f, id, nparts, shadow_size); } void starpu_tensor_filter_vertical_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_tensor_filter_block(2, father_interface, child_interface, f, id, nparts, 0); } void starpu_tensor_filter_vertical_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_tensor_filter_block(2, father_interface, child_interface, f, id, nparts, shadow_size); } void starpu_tensor_filter_depth_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_tensor_filter_block(3, father_interface, child_interface, f, id, nparts, 0); } void starpu_tensor_filter_depth_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_tensor_filter_block(3, father_interface, child_interface, f, id, nparts, shadow_size); } void starpu_tensor_filter_time_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_tensor_filter_block(4, father_interface, child_interface, f, id, nparts, 0); } void starpu_tensor_filter_time_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_tensor_filter_block(4, father_interface, child_interface, f, id, nparts, shadow_size); } static void _starpu_tensor_filter_pick_block(int dim, void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { struct starpu_tensor_interface *tensor_father = (struct starpu_tensor_interface *) father_interface; struct starpu_block_interface *block_child = (struct starpu_block_interface *) child_interface; unsigned blocksize; uint32_t nn; uint32_t nx = tensor_father->nx; uint32_t ny = tensor_father->ny; uint32_t nz = tensor_father->nz; uint32_t nt = tensor_father->nt; switch(dim) { /* along y-axis */ case 1: nn = ny; blocksize = tensor_father->ldy; break; /* along z-axis */ case 2: nn = nz; blocksize = tensor_father->ldz; break; /* along t-axis */ case 3: nn = nt; blocksize = tensor_father->ldt; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } size_t elemsize = tensor_father->elemsize; size_t chunk_pos = (size_t)f->filter_arg_ptr; STARPU_ASSERT_MSG(nparts <= nn, "cannot get %u blocks", nparts); STARPU_ASSERT_MSG((chunk_pos + id) < nn, "the chosen block should be in the tensor"); size_t offset = (chunk_pos + id) * blocksize * elemsize; STARPU_ASSERT_MSG(tensor_father->id == STARPU_TENSOR_INTERFACE_ID, "%s can only be applied on a tensor data", __func__); block_child->id = STARPU_BLOCK_INTERFACE_ID; switch(dim) { /* along y-axis */ case 1: block_child->nx = nx; block_child->ny = nz; block_child->nz = nt; break; /* along z-axis */ case 2: block_child->nx = nx; block_child->ny = ny; block_child->nz = nt; break; /* along t-axis */ case 3: block_child->nx = nx; block_child->ny = ny; block_child->nz = nz; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } block_child->elemsize = elemsize; if (tensor_father->dev_handle) { if (tensor_father->ptr) block_child->ptr = tensor_father->ptr + offset; switch(dim) { /* along y-axis */ case 1: block_child->ldy = tensor_father->ldz; block_child->ldz = tensor_father->ldt; break; /* along z-axis */ case 2: block_child->ldy = tensor_father->ldy; block_child->ldz = tensor_father->ldt; break; /* along t-axis */ case 3: block_child->ldy = tensor_father->ldy; block_child->ldz = tensor_father->ldz; break; default: STARPU_ASSERT_MSG(0, "Unknown value for dim"); } block_child->dev_handle = tensor_father->dev_handle; block_child->offset = tensor_father->offset + offset; } } void starpu_tensor_filter_pick_block_t(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_tensor_filter_pick_block(3, father_interface, child_interface, f, id, nparts); } void starpu_tensor_filter_pick_block_z(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_tensor_filter_pick_block(2, father_interface, child_interface, f, id, nparts); } void starpu_tensor_filter_pick_block_y(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nparts) { _starpu_tensor_filter_pick_block(1, father_interface, child_interface, f, id, nparts); } struct starpu_data_interface_ops *starpu_tensor_filter_pick_block_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_block_ops; } void starpu_tensor_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) { struct starpu_tensor_interface *tensor_father = (struct starpu_tensor_interface *) father_interface; /* each chunk becomes a variable */ struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; uint32_t nx = tensor_father->nx; uint32_t ny = tensor_father->ny; uint32_t nz = tensor_father->nz; uint32_t nt = tensor_father->nt; unsigned ldy = tensor_father->ldy; unsigned ldz = tensor_father->ldz; unsigned ldt = tensor_father->ldt; size_t elemsize = tensor_father->elemsize; uint32_t* chunk_pos = (uint32_t*)f->filter_arg_ptr; // int i; // for(i=0; i<4; i++) // { // printf("pos is %d\n", chunk_pos[i]); // } STARPU_ASSERT_MSG((chunk_pos[0] < nx)&&(chunk_pos[1] < ny)&&(chunk_pos[2] < nz)&&(chunk_pos[3] < nt), "the chosen variable should be in the tensor"); size_t offset = (chunk_pos[3] * ldt + chunk_pos[2] * ldz + chunk_pos[1] * ldy + chunk_pos[0]) * elemsize; STARPU_ASSERT_MSG(tensor_father->id == STARPU_TENSOR_INTERFACE_ID, "%s can only be applied on a tensor data", __func__); /* update the child's interface */ variable_child->id = STARPU_VARIABLE_INTERFACE_ID; variable_child->elemsize = elemsize; /* is the information on this node valid ? */ if (tensor_father->dev_handle) { if (tensor_father->ptr) variable_child->ptr = tensor_father->ptr + offset; variable_child->dev_handle = tensor_father->dev_handle; variable_child->offset = tensor_father->offset + offset; } } struct starpu_data_interface_ops *starpu_tensor_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_variable_ops; } starpu-1.4.9+dfsg/src/datawizard/interfaces/tensor_interface.c000066400000000000000000000510561507764646700245610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); static int map_tensor(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int unmap_tensor(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int update_map_tensor(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static const struct starpu_data_copy_methods tensor_copy_data_methods_s = { .any_to_any = copy_any_to_any, }; static void register_tensor_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static void *tensor_to_pointer(void *data_interface, unsigned node); static starpu_ssize_t allocate_tensor_buffer_on_node(void *data_interface_, unsigned dst_node); static void free_tensor_buffer_on_node(void *data_interface, unsigned node); static size_t tensor_interface_get_size(starpu_data_handle_t handle); static uint32_t footprint_tensor_interface_crc32(starpu_data_handle_t handle); static int tensor_compare(void *data_interface_a, void *data_interface_b); static void display_tensor_interface(starpu_data_handle_t handle, FILE *f); static int pack_tensor_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_tensor_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_tensor_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); struct starpu_data_interface_ops starpu_interface_tensor_ops = { .register_data_handle = register_tensor_handle, .allocate_data_on_node = allocate_tensor_buffer_on_node, .to_pointer = tensor_to_pointer, .free_data_on_node = free_tensor_buffer_on_node, .map_data = map_tensor, .unmap_data = unmap_tensor, .update_map = update_map_tensor, .copy_methods = &tensor_copy_data_methods_s, .get_size = tensor_interface_get_size, .footprint = footprint_tensor_interface_crc32, .compare = tensor_compare, .interfaceid = STARPU_TENSOR_INTERFACE_ID, .interface_size = sizeof(struct starpu_tensor_interface), .display = display_tensor_interface, .pack_data = pack_tensor_handle, .peek_data = peek_tensor_handle, .unpack_data = unpack_tensor_handle, .describe = describe, .name = "STARPU_TENSOR_INTERFACE", .pack_meta = NULL, .unpack_meta = NULL, .free_meta = NULL }; static void *tensor_to_pointer(void *data_interface, unsigned node) { (void) node; struct starpu_tensor_interface *tensor_interface = data_interface; return (void*) tensor_interface->ptr; } static void register_tensor_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_tensor_interface *local_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = tensor_interface->ptr; local_interface->dev_handle = tensor_interface->dev_handle; local_interface->offset = tensor_interface->offset; local_interface->ldy = tensor_interface->ldy; local_interface->ldz = tensor_interface->ldz; local_interface->ldt = tensor_interface->ldt; } else { local_interface->ptr = 0; local_interface->dev_handle = 0; local_interface->offset = 0; local_interface->ldy = 0; local_interface->ldz = 0; local_interface->ldt = 0; } local_interface->id = tensor_interface->id; local_interface->nx = tensor_interface->nx; local_interface->ny = tensor_interface->ny; local_interface->nz = tensor_interface->nz; local_interface->nt = tensor_interface->nt; local_interface->elemsize = tensor_interface->elemsize; } } /* declare a new data with the BLAS interface */ void starpu_tensor_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t ldy, uint32_t ldz, uint32_t ldt, uint32_t nx, uint32_t ny, uint32_t nz, uint32_t nt, size_t elemsize) { STARPU_ASSERT_MSG(ldy >= nx, "ldy = %u should not be less than nx = %u.", ldy, nx); STARPU_ASSERT_MSG(ldz/ldy >= ny, "ldz/ldy = %u/%u = %u should not be less than ny = %u.", ldz, ldy, ldz/ldy, ny); STARPU_ASSERT_MSG(ldt/ldz >= nz, "ldt/ldz = %u/%u = %u should not be less than nz = %u.", ldt, ldz, ldt/ldz, nz); struct starpu_tensor_interface tensor_interface = { .id = STARPU_TENSOR_INTERFACE_ID, .ptr = ptr, .dev_handle = ptr, .offset = 0, .ldy = ldy, .ldz = ldz, .ldt = ldt, .nx = nx, .ny = ny, .nz = nz, .nt = nt, .elemsize = elemsize }; #ifndef STARPU_SIMGRID if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { if (nx && ny && nz && nt && elemsize) { STARPU_ASSERT_ACCESSIBLE(ptr); STARPU_ASSERT_ACCESSIBLE(ptr + (nt-1)*ldt*elemsize + (nz-1)*ldz*elemsize + (ny-1)*ldy*elemsize + nx*elemsize - 1); } } #endif starpu_data_register(handleptr, home_node, &tensor_interface, &starpu_interface_tensor_ops); } void starpu_tensor_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset, uint32_t ldy, uint32_t ldz, uint32_t ldt) { struct starpu_tensor_interface *tensor_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); tensor_interface->ptr = ptr; tensor_interface->dev_handle = dev_handle; tensor_interface->offset = offset; tensor_interface->ldy = ldy; tensor_interface->ldz = ldz; tensor_interface->ldt = ldt; } static uint32_t footprint_tensor_interface_crc32(starpu_data_handle_t handle) { uint32_t hash; hash = starpu_hash_crc32c_be(starpu_tensor_get_nx(handle), 0); hash = starpu_hash_crc32c_be(starpu_tensor_get_ny(handle), hash); hash = starpu_hash_crc32c_be(starpu_tensor_get_nz(handle), hash); hash = starpu_hash_crc32c_be(starpu_tensor_get_nt(handle), hash); return hash; } static int tensor_compare(void *data_interface_a, void *data_interface_b) { struct starpu_tensor_interface *tensor_a = (struct starpu_tensor_interface *) data_interface_a; struct starpu_tensor_interface *tensor_b = (struct starpu_tensor_interface *) data_interface_b; /* Two tensors are considered compatible if they have the same size */ return (tensor_a->nx == tensor_b->nx) && (tensor_a->ny == tensor_b->ny) && (tensor_a->nz == tensor_b->nz) && (tensor_a->nt == tensor_b->nt) && (tensor_a->elemsize == tensor_b->elemsize); } static void display_tensor_interface(starpu_data_handle_t handle, FILE *f) { struct starpu_tensor_interface *tensor_interface; tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%u\t%u\t%u\t%u\t", tensor_interface->nx, tensor_interface->ny, tensor_interface->nz, tensor_interface->nt); } #define IS_CONTIGUOUS_MATRIX(nx, ny, ldy) ((nx) == (ldy)) #define IS_CONTIGUOUS_BLOCK(nx, ny, nz, ldy, ldz) ((nx) * (ny) == (ldz)) #define IS_CONTIGUOUS_TENSOR(nx, ny, nz, nt, ldy, ldz, ldt) ((nx) * (ny) * (nz) == (ldt)) static int pack_tensor_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, node); uint32_t ldy = tensor_interface->ldy; uint32_t ldz = tensor_interface->ldz; uint32_t ldt = tensor_interface->ldt; uint32_t nx = tensor_interface->nx; uint32_t ny = tensor_interface->ny; uint32_t nz = tensor_interface->nz; uint32_t nt = tensor_interface->nt; size_t elemsize = tensor_interface->elemsize; *count = nx*ny*nz*nt*elemsize; if (ptr != NULL) { uint32_t t, z, y; char *block = (void *)tensor_interface->ptr; *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); char *cur = *ptr; if (IS_CONTIGUOUS_TENSOR(nx, ny, nz, nt, ldy, ldz, ldt)) memcpy(cur, block, nx * ny * nz * nt * elemsize); else { char *block_t = block; for(t=0 ; tldy; uint32_t ldz = tensor_interface->ldz; uint32_t ldt = tensor_interface->ldt; uint32_t nx = tensor_interface->nx; uint32_t ny = tensor_interface->ny; uint32_t nz = tensor_interface->nz; uint32_t nt = tensor_interface->nt; size_t elemsize = tensor_interface->elemsize; STARPU_ASSERT(count == elemsize * nx * ny * nz * nt); uint32_t t, z, y; char *cur = ptr; char *block = (void *)tensor_interface->ptr; if (IS_CONTIGUOUS_TENSOR(nx, ny, nz, nt, ldy, ldz, ldt)) memcpy(block, cur, nx * ny * nz * nt * elemsize); else { char *block_t = block; for(t=0 ; tid == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif size = tensor_interface->nx*tensor_interface->ny*tensor_interface->nz*tensor_interface->nt*tensor_interface->elemsize; return size; } /* offer an access to the data parameters */ uint32_t starpu_tensor_get_nx(starpu_data_handle_t handle) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif return tensor_interface->nx; } uint32_t starpu_tensor_get_ny(starpu_data_handle_t handle) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif return tensor_interface->ny; } uint32_t starpu_tensor_get_nz(starpu_data_handle_t handle) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif return tensor_interface->nz; } uint32_t starpu_tensor_get_nt(starpu_data_handle_t handle) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif return tensor_interface->nt; } uint32_t starpu_tensor_get_local_ldy(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif return tensor_interface->ldy; } uint32_t starpu_tensor_get_local_ldz(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif return tensor_interface->ldz; } uint32_t starpu_tensor_get_local_ldt(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif return tensor_interface->ldt; } uintptr_t starpu_tensor_get_local_ptr(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif return tensor_interface->ptr; } size_t starpu_tensor_get_elemsize(starpu_data_handle_t handle) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(tensor_interface->id == STARPU_TENSOR_INTERFACE_ID, "Error. The given data is not a block."); #endif return tensor_interface->elemsize; } /* memory allocation/deallocation primitives for the BLOCK interface */ /* returns the size of the allocated area */ static starpu_ssize_t allocate_tensor_buffer_on_node(void *data_interface_, unsigned dst_node) { uintptr_t addr = 0, handle; struct starpu_tensor_interface *dst_block = (struct starpu_tensor_interface *) data_interface_; uint32_t nx = dst_block->nx; uint32_t ny = dst_block->ny; uint32_t nz = dst_block->nz; uint32_t nt = dst_block->nt; size_t elemsize = dst_block->elemsize; starpu_ssize_t allocated_memory; handle = starpu_malloc_on_node(dst_node, nx*ny*nz*nt*elemsize); if (!handle) return -ENOMEM; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) addr = handle; allocated_memory = nx*ny*nz*nt*elemsize; /* update the data properly in consequence */ dst_block->ptr = addr; dst_block->dev_handle = handle; dst_block->offset = 0; dst_block->ldy = nx; dst_block->ldz = nx*ny; dst_block->ldt = nx*ny*nz; return allocated_memory; } static void free_tensor_buffer_on_node(void *data_interface, unsigned node) { struct starpu_tensor_interface *tensor_interface = (struct starpu_tensor_interface *) data_interface; uint32_t nx = tensor_interface->nx; uint32_t ny = tensor_interface->ny; uint32_t nz = tensor_interface->nz; uint32_t nt = tensor_interface->nt; size_t elemsize = tensor_interface->elemsize; starpu_free_on_node(node, tensor_interface->dev_handle, nx*ny*nz*nt*elemsize); tensor_interface->ptr = 0; tensor_interface->dev_handle = 0; } static int map_tensor(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_tensor_interface *src_tensor = src_interface; struct starpu_tensor_interface *dst_tensor = dst_interface; int ret; uintptr_t mapped; /* map area ldt*(nt-1) + ldz*(nz-1) + ldy*(ny-1) + nx*/ mapped = starpu_interface_map(src_tensor->dev_handle, src_tensor->offset, src_node, dst_node, (src_tensor->ldt*(src_tensor->nt-1)+src_tensor->ldz*(src_tensor->nz-1)+src_tensor->ldy*(src_tensor->ny-1)+src_tensor->nx)*src_tensor->elemsize, &ret); if (mapped) { dst_tensor->dev_handle = mapped; dst_tensor->offset = 0; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) dst_tensor->ptr = mapped; dst_tensor->ldy = src_tensor->ldy; dst_tensor->ldz = src_tensor->ldz; dst_tensor->ldt = src_tensor->ldt; return 0; } return ret; } static int unmap_tensor(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_tensor_interface *src_tensor = src_interface; struct starpu_tensor_interface *dst_tensor = dst_interface; int ret = starpu_interface_unmap(src_tensor->dev_handle, src_tensor->offset, src_node, dst_tensor->dev_handle, dst_node, (src_tensor->ldt*(src_tensor->nt-1)+src_tensor->ldz*(src_tensor->nz-1)+src_tensor->ldy*(src_tensor->ny-1)+src_tensor->nx)*src_tensor->elemsize); dst_tensor->dev_handle = 0; return ret; } static int update_map_tensor(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_tensor_interface *src_tensor = src_interface; struct starpu_tensor_interface *dst_tensor = dst_interface; return starpu_interface_update_map(src_tensor->dev_handle, src_tensor->offset, src_node, dst_tensor->dev_handle, dst_tensor->offset, dst_node, (src_tensor->ldt*(src_tensor->nt-1)+src_tensor->ldz*(src_tensor->nz-1)+src_tensor->ldy*(src_tensor->ny-1)+src_tensor->nx)*src_tensor->elemsize); } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_tensor_interface *src_block = (struct starpu_tensor_interface *) src_interface; struct starpu_tensor_interface *dst_block = (struct starpu_tensor_interface *) dst_interface; int ret = 0; uint32_t nx = dst_block->nx; uint32_t ny = dst_block->ny; uint32_t nz = dst_block->nz; uint32_t nt = dst_block->nt; size_t elemsize = dst_block->elemsize; uint32_t ldy_src = src_block->ldy; uint32_t ldz_src = src_block->ldz; uint32_t ldt_src = src_block->ldt; uint32_t ldy_dst = dst_block->ldy; uint32_t ldz_dst = dst_block->ldz; uint32_t ldt_dst = dst_block->ldt; if (starpu_interface_copy4d(src_block->dev_handle, src_block->offset, src_node, dst_block->dev_handle, dst_block->offset, dst_node, nx * elemsize, ny, ldy_src * elemsize, ldy_dst * elemsize, nz, ldz_src * elemsize, ldz_dst * elemsize, nt, ldt_src * elemsize, ldt_dst * elemsize, async_data)) ret = -EAGAIN; starpu_interface_data_copy(src_node, dst_node, nx*ny*nz*nt*elemsize); return ret; } static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_tensor_interface *block = (struct starpu_tensor_interface *) data_interface; return snprintf(buf, size, "T%ux%ux%ux%ux%u", (unsigned) block->nx, (unsigned) block->ny, (unsigned) block->nz, (unsigned) block->nt, (unsigned) block->elemsize); } starpu-1.4.9+dfsg/src/datawizard/interfaces/variable_interface.c000066400000000000000000000267041507764646700250360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); static int map_variable(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int unmap_variable(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int update_map_variable(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static const struct starpu_data_copy_methods variable_copy_data_methods_s = { .any_to_any = copy_any_to_any, }; static void register_variable_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static starpu_ssize_t allocate_variable_buffer_on_node(void *data_interface_, unsigned dst_node); static void *variable_to_pointer(void *data_interface, unsigned node); static void free_variable_buffer_on_node(void *data_interface, unsigned node); static size_t variable_interface_get_size(starpu_data_handle_t handle); static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle); static int variable_compare(void *data_interface_a, void *data_interface_b); static void display_variable_interface(starpu_data_handle_t handle, FILE *f); static int pack_variable_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); struct starpu_data_interface_ops starpu_interface_variable_ops = { .register_data_handle = register_variable_handle, .allocate_data_on_node = allocate_variable_buffer_on_node, .to_pointer = variable_to_pointer, .free_data_on_node = free_variable_buffer_on_node, .map_data = map_variable, .unmap_data = unmap_variable, .update_map = update_map_variable, .copy_methods = &variable_copy_data_methods_s, .get_size = variable_interface_get_size, .footprint = footprint_variable_interface_crc32, .compare = variable_compare, .interfaceid = STARPU_VARIABLE_INTERFACE_ID, .interface_size = sizeof(struct starpu_variable_interface), .display = display_variable_interface, .pack_data = pack_variable_handle, .peek_data = peek_variable_handle, .unpack_data = unpack_variable_handle, .describe = describe, .name = "STARPU_VARIABLE_INTERFACE", .pack_meta = NULL, .unpack_meta = NULL, .free_meta = NULL }; static void *variable_to_pointer(void *data_interface, unsigned node) { (void) node; return (void*) STARPU_VARIABLE_GET_PTR(data_interface); } static void register_variable_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_variable_interface *local_interface = (struct starpu_variable_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = variable_interface->ptr; local_interface->dev_handle = variable_interface->dev_handle; local_interface->offset = variable_interface->offset; } else { local_interface->ptr = 0; local_interface->dev_handle = 0; local_interface->offset = 0; } local_interface->id = variable_interface->id; local_interface->elemsize = variable_interface->elemsize; } } /* declare a new data with the variable interface */ void starpu_variable_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, size_t elemsize) { struct starpu_variable_interface variable = { .id = STARPU_VARIABLE_INTERFACE_ID, .ptr = ptr, .dev_handle = ptr, .offset = 0, .elemsize = elemsize }; #ifndef STARPU_SIMGRID if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { if (elemsize) { STARPU_ASSERT_ACCESSIBLE(ptr); STARPU_ASSERT_ACCESSIBLE(ptr + elemsize - 1); } } #endif starpu_data_register(handleptr, home_node, &variable, &starpu_interface_variable_ops); } void starpu_variable_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset) { struct starpu_variable_interface *variable_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); variable_interface->ptr = ptr; variable_interface->dev_handle = dev_handle; variable_interface->offset = offset; } static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(starpu_variable_get_elemsize(handle), 0); } static int variable_compare(void *data_interface_a, void *data_interface_b) { struct starpu_variable_interface *variable_a = (struct starpu_variable_interface *) data_interface_a; struct starpu_variable_interface *variable_b = (struct starpu_variable_interface *) data_interface_b; /* Two variables are considered compatible if they have the same size */ return variable_a->elemsize == variable_b->elemsize; } static void display_variable_interface(starpu_data_handle_t handle, FILE *f) { struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%ld\t", (long)variable_interface->elemsize); } static int pack_variable_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) starpu_data_get_interface_on_node(handle, node); *count = variable_interface->elemsize; if (ptr != NULL) { *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); memcpy(*ptr, (void*)variable_interface->ptr, variable_interface->elemsize); } return 0; } static int peek_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == variable_interface->elemsize); memcpy((void*)variable_interface->ptr, ptr, variable_interface->elemsize); return 0; } static int unpack_variable_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { peek_variable_handle(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); return 0; } static size_t variable_interface_get_size(starpu_data_handle_t handle) { struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(variable_interface->id == STARPU_VARIABLE_INTERFACE_ID, "Error. The given data is not a variable."); #endif return variable_interface->elemsize; } uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); return STARPU_VARIABLE_GET_PTR(starpu_data_get_interface_on_node(handle, node)); } size_t starpu_variable_get_elemsize(starpu_data_handle_t handle) { return STARPU_VARIABLE_GET_ELEMSIZE(starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM)); } /* memory allocation/deallocation primitives for the variable interface */ /* returns the size of the allocated area */ static starpu_ssize_t allocate_variable_buffer_on_node(void *data_interface_, unsigned dst_node) { struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) data_interface_; size_t elemsize = variable_interface->elemsize; uintptr_t addr = starpu_malloc_on_node(dst_node, elemsize); if (!addr) return -ENOMEM; /* update the data properly in consequence */ variable_interface->ptr = addr; variable_interface->dev_handle = addr; variable_interface->offset = 0; return elemsize; } static void free_variable_buffer_on_node(void *data_interface, unsigned node) { struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *) data_interface; starpu_free_on_node(node, variable_interface->dev_handle, variable_interface->elemsize); variable_interface->ptr = 0; variable_interface->dev_handle = 0; } static int map_variable(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_variable_interface *src_variable = src_interface; struct starpu_variable_interface *dst_variable = dst_interface; int ret; uintptr_t mapped; mapped = starpu_interface_map(src_variable->dev_handle, src_variable->offset, src_node, dst_node, src_variable->elemsize, &ret); if (mapped) { dst_variable->dev_handle = mapped; dst_variable->offset = 0; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) dst_variable->ptr = mapped; return 0; } return ret; } static int unmap_variable(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_variable_interface *src_variable = src_interface; struct starpu_variable_interface *dst_variable = dst_interface; int ret = starpu_interface_unmap(src_variable->dev_handle, src_variable->offset, src_node, dst_variable->dev_handle, dst_node, src_variable->elemsize); dst_variable->dev_handle = 0; return ret; } static int update_map_variable(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_variable_interface *src_variable = src_interface; struct starpu_variable_interface *dst_variable = dst_interface; return starpu_interface_update_map(src_variable->dev_handle, src_variable->offset, src_node, dst_variable->dev_handle, dst_variable->offset, dst_node, src_variable->elemsize); } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_variable_interface *src_variable = (struct starpu_variable_interface *) src_interface; struct starpu_variable_interface *dst_variable = (struct starpu_variable_interface *) dst_interface; size_t elemsize = dst_variable->elemsize; uintptr_t ptr_src = src_variable->ptr; uintptr_t ptr_dst = dst_variable->ptr; int ret; ret = starpu_interface_copy(ptr_src, 0, src_node, ptr_dst, 0, dst_node, elemsize, async_data); starpu_interface_data_copy(src_node, dst_node, elemsize); return ret; } static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_variable_interface *variable = (struct starpu_variable_interface *) data_interface; return snprintf(buf, size, "v%u", (unsigned) variable->elemsize); } starpu-1.4.9+dfsg/src/datawizard/interfaces/vector_filters.c000066400000000000000000000200321507764646700242470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include static void _starpu_vector_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks, uintptr_t shadow_size) { struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; /* actual number of elements */ uint32_t nx = vector_father->nx - 2 * shadow_size; size_t elemsize = vector_father->elemsize; STARPU_ASSERT_MSG(nchunks <= nx, "cannot split %u elements in %u parts", nx, nchunks); uint32_t child_nx; size_t offset; starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1, &child_nx, &offset); child_nx += 2*shadow_size; STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); vector_child->id = vector_father->id; vector_child->nx = child_nx; vector_child->elemsize = elemsize; vector_child->allocsize = vector_child->nx * elemsize; if (vector_father->dev_handle) { if (vector_father->ptr) vector_child->ptr = vector_father->ptr + offset; vector_child->dev_handle = vector_father->dev_handle; vector_child->offset = vector_father->offset + offset; } } void starpu_vector_filter_block(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { _starpu_vector_filter_block(father_interface, child_interface, f, id, nchunks, 0); } void starpu_vector_filter_block_shadow(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { uintptr_t shadow_size = (uintptr_t) f->filter_arg_ptr; _starpu_vector_filter_block(father_interface, child_interface, f, id, nchunks, shadow_size); } void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) { /* there cannot be more than 2 chunks */ STARPU_ASSERT_MSG(id < 2, "Only %u parts", id); struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; uint32_t length_first = f->filter_arg; uint32_t nx = vector_father->nx; size_t elemsize = vector_father->elemsize; STARPU_ASSERT_MSG(length_first < nx, "First part is too long: %u vs %u", length_first, nx); STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); vector_child->id = vector_father->id; /* this is the first child */ if (id == 0) { vector_child->nx = length_first; vector_child->elemsize = elemsize; vector_child->allocsize = vector_child->nx * elemsize; if (vector_father->dev_handle) { if (vector_father->ptr) vector_child->ptr = vector_father->ptr; vector_child->offset = vector_father->offset; vector_child->dev_handle = vector_father->dev_handle; } } else /* the second child */ { vector_child->nx = nx - length_first; vector_child->elemsize = elemsize; vector_child->allocsize = vector_child->nx * elemsize; if (vector_father->dev_handle) { if (vector_father->ptr) vector_child->ptr = vector_father->ptr + length_first*elemsize; vector_child->offset = vector_father->offset + length_first*elemsize; vector_child->dev_handle = vector_father->dev_handle; } } } void starpu_vector_filter_list_long(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) { struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; long *length_tab = (long *) f->filter_arg_ptr; size_t elemsize = vector_father->elemsize; long chunk_size = length_tab[id]; STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); vector_child->id = vector_father->id; vector_child->nx = chunk_size; vector_child->elemsize = elemsize; vector_child->allocsize = vector_child->nx * elemsize; if (vector_father->dev_handle) { /* compute the current position */ unsigned current_pos = 0; unsigned i; for (i = 0; i < id; i++) current_pos += length_tab[i]; if (vector_father->ptr) vector_child->ptr = vector_father->ptr + current_pos*elemsize; vector_child->offset = vector_father->offset + current_pos*elemsize; vector_child->dev_handle = vector_father->dev_handle; } } void starpu_vector_filter_list(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, STARPU_ATTRIBUTE_UNUSED unsigned nchunks) { struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; uint32_t *length_tab = (uint32_t *) f->filter_arg_ptr; size_t elemsize = vector_father->elemsize; uint32_t chunk_size = length_tab[id]; STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); vector_child->id = vector_father->id; vector_child->nx = chunk_size; vector_child->elemsize = elemsize; vector_child->allocsize = vector_child->nx * elemsize; if (vector_father->dev_handle) { /* compute the current position */ unsigned current_pos = 0; unsigned i; for (i = 0; i < id; i++) current_pos += length_tab[i]; if (vector_father->ptr) vector_child->ptr = vector_father->ptr + current_pos*elemsize; vector_child->offset = vector_father->offset + current_pos*elemsize; vector_child->dev_handle = vector_father->dev_handle; } } void starpu_vector_filter_pick_variable(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; /* each chunk becomes a variable */ struct starpu_variable_interface *variable_child = (struct starpu_variable_interface *) child_interface; /* actual number of elements */ uint32_t nx = vector_father->nx; size_t elemsize = vector_father->elemsize; size_t chunk_pos = (size_t)f->filter_arg_ptr; STARPU_ASSERT_MSG(nchunks <= nx, "cannot get %u variables", nchunks); STARPU_ASSERT_MSG((chunk_pos + id) < nx, "the chosen variable should be in the vector"); size_t offset = (chunk_pos + id) * elemsize; STARPU_ASSERT_MSG(vector_father->id == STARPU_VECTOR_INTERFACE_ID, "%s can only be applied on a vector data", __func__); variable_child->id = STARPU_VARIABLE_INTERFACE_ID; variable_child->elemsize = elemsize; if (vector_father->dev_handle) { if (vector_father->ptr) variable_child->ptr = vector_father->ptr + offset; variable_child->dev_handle = vector_father->dev_handle; variable_child->offset = vector_father->offset + offset; } } struct starpu_data_interface_ops *starpu_vector_filter_pick_variable_child_ops(STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, STARPU_ATTRIBUTE_UNUSED unsigned child) { return &starpu_interface_variable_ops; } starpu-1.4.9+dfsg/src/datawizard/interfaces/vector_interface.c000066400000000000000000000403001507764646700245370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); static int map_vector(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int unmap_vector(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int update_map(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static const struct starpu_data_copy_methods vector_copy_data_methods_s = { .any_to_any = copy_any_to_any, }; static void vector_init(void *data_interface); static void register_vector_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static starpu_ssize_t allocate_vector_buffer_on_node(void *data_interface_, unsigned dst_node); static void *vector_to_pointer(void *data_interface, unsigned node); static void free_vector_buffer_on_node(void *data_interface, unsigned node); static void cache_vector_buffer_on_node(void *new_data_interface, void *data_interface, unsigned node); static void reuse_vector_buffer_on_node(void *data_interface, const void *new_data_interface, unsigned node); static size_t vector_interface_get_size(starpu_data_handle_t handle); static size_t vector_interface_get_alloc_size(starpu_data_handle_t handle); static uint32_t footprint_vector_interface_crc32(starpu_data_handle_t handle); static uint32_t alloc_footprint_vector_interface_crc32(starpu_data_handle_t handle); static int vector_compare(void *data_interface_a, void *data_interface_b); static int vector_alloc_compare(void *data_interface_a, void *data_interface_b); static void display_vector_interface(starpu_data_handle_t handle, FILE *f); static int pack_vector_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); struct starpu_data_interface_ops starpu_interface_vector_ops = { .init = vector_init, .register_data_handle = register_vector_handle, .allocate_data_on_node = allocate_vector_buffer_on_node, .to_pointer = vector_to_pointer, .free_data_on_node = free_vector_buffer_on_node, .cache_data_on_node = cache_vector_buffer_on_node, .reuse_data_on_node = reuse_vector_buffer_on_node, .map_data = map_vector, .unmap_data = unmap_vector, .update_map = update_map, .copy_methods = &vector_copy_data_methods_s, .get_size = vector_interface_get_size, .get_alloc_size = vector_interface_get_alloc_size, .footprint = footprint_vector_interface_crc32, .alloc_footprint = alloc_footprint_vector_interface_crc32, .compare = vector_compare, .alloc_compare = vector_alloc_compare, .interfaceid = STARPU_VECTOR_INTERFACE_ID, .interface_size = sizeof(struct starpu_vector_interface), .display = display_vector_interface, .pack_data = pack_vector_handle, .peek_data = peek_vector_handle, .unpack_data = unpack_vector_handle, .describe = describe, .name = "STARPU_VECTOR_INTERFACE", .pack_meta = NULL, .unpack_meta = NULL, .free_meta = NULL }; static void vector_init(void *data_interface) { struct starpu_vector_interface *vector_interface = data_interface; vector_interface->allocsize = -1; } static void *vector_to_pointer(void *data_interface, unsigned node) { (void) node; struct starpu_vector_interface *vector_interface = data_interface; return (void*) vector_interface->ptr; } static void register_vector_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_vector_interface *local_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { local_interface->ptr = vector_interface->ptr; local_interface->dev_handle = vector_interface->dev_handle; local_interface->offset = vector_interface->offset; } else { local_interface->ptr = 0; local_interface->dev_handle = 0; local_interface->offset = 0; } local_interface->id = vector_interface->id; local_interface->nx = vector_interface->nx; local_interface->elemsize = vector_interface->elemsize; local_interface->allocsize = vector_interface->allocsize; local_interface->slice_base = vector_interface->slice_base; } } /* declare a new data with the vector interface */ void starpu_vector_data_register_allocsize(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize, size_t allocsize) { struct starpu_vector_interface vector = { .id = STARPU_VECTOR_INTERFACE_ID, .ptr = ptr, .nx = nx, .elemsize = elemsize, .dev_handle = ptr, .slice_base = 0, .offset = 0, .allocsize = allocsize, }; #if (!defined(STARPU_SIMGRID) && !defined(STARPU_OPENMP)) if (home_node >= 0 && starpu_node_get_kind(home_node) == STARPU_CPU_RAM) { if (nx && elemsize) { STARPU_ASSERT_ACCESSIBLE(ptr); STARPU_ASSERT_ACCESSIBLE(ptr + nx*elemsize - 1); } } #endif starpu_data_register(handleptr, home_node, &vector, &starpu_interface_vector_ops); } void starpu_vector_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize) { starpu_vector_data_register_allocsize(handleptr, home_node, ptr, nx, elemsize, nx * elemsize); } void starpu_vector_ptr_register(starpu_data_handle_t handle, unsigned node, uintptr_t ptr, uintptr_t dev_handle, size_t offset) { struct starpu_vector_interface *vector_interface = starpu_data_get_interface_on_node(handle, node); starpu_data_ptr_register(handle, node); vector_interface->ptr = ptr; vector_interface->dev_handle = dev_handle; vector_interface->offset = offset; } static uint32_t footprint_vector_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(starpu_vector_get_nx(handle), 0); } static uint32_t alloc_footprint_vector_interface_crc32(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(starpu_vector_get_allocsize(handle), 0); } static int vector_compare(void *data_interface_a, void *data_interface_b) { struct starpu_vector_interface *vector_a = (struct starpu_vector_interface *) data_interface_a; struct starpu_vector_interface *vector_b = (struct starpu_vector_interface *) data_interface_b; /* Two vectors are considered compatible if they have the same size */ return (vector_a->nx == vector_b->nx) && (vector_a->elemsize == vector_b->elemsize); } static int vector_alloc_compare(void *data_interface_a, void *data_interface_b) { struct starpu_vector_interface *vector_a = (struct starpu_vector_interface *) data_interface_a; struct starpu_vector_interface *vector_b = (struct starpu_vector_interface *) data_interface_b; /* Two vectors are considered allocation-compatible if they have the same size */ return (vector_a->allocsize == vector_b->allocsize); } static void display_vector_interface(starpu_data_handle_t handle, FILE *f) { struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%u\t", vector_interface->nx); } static int pack_vector_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, node); *count = vector_interface->nx*vector_interface->elemsize; if (ptr != NULL) { *ptr = (void *)starpu_malloc_on_node_flags(node, *count, 0); memcpy(*ptr, (void*)vector_interface->ptr, vector_interface->elemsize*vector_interface->nx); } return 0; } static int peek_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, node); STARPU_ASSERT(count == vector_interface->elemsize * vector_interface->nx); memcpy((void*)vector_interface->ptr, ptr, count); return 0; } static int unpack_vector_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { peek_vector_handle(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t)ptr, count, 0); return 0; } static size_t vector_interface_get_size(starpu_data_handle_t handle) { size_t size; struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); #endif size = vector_interface->nx * vector_interface->elemsize; return size; } static size_t vector_interface_get_alloc_size(starpu_data_handle_t handle) { size_t size; struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); #endif size = vector_interface->allocsize; STARPU_ASSERT_MSG(size != (size_t)-1, "The vector allocation size needs to be defined"); return size; } /* offer an access to the data parameters */ uint32_t starpu_vector_get_nx(starpu_data_handle_t handle) { struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); #endif return vector_interface->nx; } uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle) { unsigned node; node = starpu_worker_get_local_memory_node(); STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node)); struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, node); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); #endif return vector_interface->ptr; } size_t starpu_vector_get_elemsize(starpu_data_handle_t handle) { struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); #endif return vector_interface->elemsize; } size_t starpu_vector_get_allocsize(starpu_data_handle_t handle) { struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); #ifdef STARPU_DEBUG STARPU_ASSERT_MSG(vector_interface->id == STARPU_VECTOR_INTERFACE_ID, "Error. The given data is not a vector."); #endif return vector_interface->allocsize; } /* memory allocation/deallocation primitives for the vector interface */ /* returns the size of the allocated area */ static starpu_ssize_t allocate_vector_buffer_on_node(void *data_interface_, unsigned dst_node) { uintptr_t addr = 0, handle; struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) data_interface_; starpu_ssize_t allocated_memory = vector_interface->allocsize; handle = starpu_malloc_on_node(dst_node, allocated_memory); if (!handle) return -ENOMEM; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) addr = handle; /* update the data properly in consequence */ vector_interface->ptr = addr; vector_interface->dev_handle = handle; vector_interface->offset = 0; return allocated_memory; } static void free_vector_buffer_on_node(void *data_interface, unsigned node) { struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) data_interface; starpu_free_on_node(node, vector_interface->dev_handle, vector_interface->allocsize); vector_interface->ptr = 0; vector_interface->dev_handle = 0; } static void cache_vector_buffer_on_node(void *new_data_interface, void *data_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) { struct starpu_vector_interface *new_vector_interface = new_data_interface; struct starpu_vector_interface *vector_interface = data_interface; new_vector_interface->ptr = vector_interface->ptr; vector_interface->ptr = 0; new_vector_interface->dev_handle = vector_interface->dev_handle; vector_interface->dev_handle = 0; new_vector_interface->allocsize = vector_interface->allocsize; STARPU_ASSERT(vector_interface->offset == 0); } static void reuse_vector_buffer_on_node(void *data_interface, const void *new_data_interface, unsigned node STARPU_ATTRIBUTE_UNUSED) { struct starpu_vector_interface *vector_interface = data_interface; const struct starpu_vector_interface *new_vector_interface = new_data_interface; vector_interface->ptr = new_vector_interface->ptr; vector_interface->dev_handle = new_vector_interface->dev_handle; vector_interface->offset = 0; } static int map_vector(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_vector_interface *src_vector = src_interface; struct starpu_vector_interface *dst_vector = dst_interface; int ret; uintptr_t mapped; mapped = starpu_interface_map(src_vector->dev_handle, src_vector->offset, src_node, dst_node, src_vector->nx*src_vector->elemsize, &ret); if (mapped) { dst_vector->dev_handle = mapped; dst_vector->offset = 0; if (starpu_node_get_kind(dst_node) != STARPU_OPENCL_RAM) dst_vector->ptr = mapped; return 0; } return ret; } static int unmap_vector(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_vector_interface *src_vector = src_interface; struct starpu_vector_interface *dst_vector = dst_interface; int ret = starpu_interface_unmap(src_vector->dev_handle, src_vector->offset, src_node, dst_vector->dev_handle, dst_node, src_vector->nx*src_vector->elemsize); dst_vector->dev_handle = 0; return ret; } static int update_map(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpu_vector_interface *src_vector = src_interface; struct starpu_vector_interface *dst_vector = dst_interface; return starpu_interface_update_map(src_vector->dev_handle, src_vector->offset, src_node, dst_vector->dev_handle, dst_vector->offset, dst_node, src_vector->nx*src_vector->elemsize); } static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpu_vector_interface *src_vector = src_interface; struct starpu_vector_interface *dst_vector = dst_interface; int ret; ret = starpu_interface_copy(src_vector->dev_handle, src_vector->offset, src_node, dst_vector->dev_handle, dst_vector->offset, dst_node, src_vector->nx*src_vector->elemsize, async_data); starpu_interface_data_copy(src_node, dst_node, src_vector->nx*src_vector->elemsize); return ret; } static starpu_ssize_t describe(void *data_interface, char *buf, size_t size) { struct starpu_vector_interface *vector = (struct starpu_vector_interface *) data_interface; return snprintf(buf, size, "V%ux%u", (unsigned) vector->nx, (unsigned) vector->elemsize); } starpu-1.4.9+dfsg/src/datawizard/interfaces/void_interface.c000066400000000000000000000143201507764646700242010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef BUILDING_STARPU #include #endif static int dummy_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); static int map_void(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int unmap_void(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static int update_map_void(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node); static const struct starpu_data_copy_methods void_copy_data_methods_s = { .any_to_any = dummy_copy, }; static void register_void_handle(starpu_data_handle_t handle, int home_node, void *data_interface); static starpu_ssize_t allocate_void_buffer_on_node(void *data_interface_, unsigned dst_node); static void free_void_buffer_on_node(void *data_interface, unsigned node); static size_t void_interface_get_size(starpu_data_handle_t handle); static uint32_t footprint_void_interface_crc32(starpu_data_handle_t handle); static int void_compare(void *data_interface_a, void *data_interface_b); static void display_void_interface(starpu_data_handle_t handle, FILE *f); static int pack_void_handle(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count); static int peek_void_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static int unpack_void_handle(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count); static starpu_ssize_t describe(void *data_interface, char *buf, size_t size); struct starpu_data_interface_ops starpu_interface_void_ops = { .register_data_handle = register_void_handle, .allocate_data_on_node = allocate_void_buffer_on_node, .free_data_on_node = free_void_buffer_on_node, .map_data = map_void, .unmap_data = unmap_void, .update_map = update_map_void, .copy_methods = &void_copy_data_methods_s, .get_size = void_interface_get_size, .footprint = footprint_void_interface_crc32, .compare = void_compare, .interfaceid = STARPU_VOID_INTERFACE_ID, .interface_size = 0, .display = display_void_interface, .pack_data = pack_void_handle, .peek_data = peek_void_handle, .unpack_data = unpack_void_handle, .describe = describe, .name = "STARPU_VOID_INTERFACE", .pack_meta = NULL, .unpack_meta = NULL, .free_meta = NULL }; static void register_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, int home_node STARPU_ATTRIBUTE_UNUSED, void *data_interface STARPU_ATTRIBUTE_UNUSED) { /* Since there is no real data to register, we don't do anything */ } /* declare a new data with the void interface */ void starpu_void_data_register(starpu_data_handle_t *handleptr) { starpu_data_register(handleptr, STARPU_MAIN_RAM, NULL, &starpu_interface_void_ops); } static uint32_t footprint_void_interface_crc32(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED) { return 0; } static int void_compare(void *data_interface_a STARPU_ATTRIBUTE_UNUSED, void *data_interface_b STARPU_ATTRIBUTE_UNUSED) { /* There is no allocation required, and therefore nothing to cache * anyway. */ return 1; } static void display_void_interface(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, FILE *f) { fprintf(f, "void\t"); } static int pack_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, unsigned node STARPU_ATTRIBUTE_UNUSED, void **ptr, starpu_ssize_t *count) { *count = 0; *ptr = NULL; return 0; } static int peek_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, unsigned node STARPU_ATTRIBUTE_UNUSED, void *ptr STARPU_ATTRIBUTE_UNUSED, size_t count STARPU_ATTRIBUTE_UNUSED) { return 0; } static int unpack_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, unsigned node STARPU_ATTRIBUTE_UNUSED, void *ptr STARPU_ATTRIBUTE_UNUSED, size_t count STARPU_ATTRIBUTE_UNUSED) { return 0; } static size_t void_interface_get_size(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED) { return 0; } /* memory allocation/deallocation primitives for the void interface */ /* returns the size of the allocated area */ static starpu_ssize_t allocate_void_buffer_on_node(void *data_interface STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) { /* Successfully allocated 0 bytes */ return 0; } static void free_void_buffer_on_node(void *data_interface STARPU_ATTRIBUTE_UNUSED , unsigned node STARPU_ATTRIBUTE_UNUSED) { /* There is no buffer actually */ } static int map_void(void *src_interface STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) { return 0; } static int unmap_void(void *src_interface STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) { return 0; } static int update_map_void(void *src_interface STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) { return 0; } static int dummy_copy(void *src_interface STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *async_data STARPU_ATTRIBUTE_UNUSED) { return 0; } static starpu_ssize_t describe(void *data_interface STARPU_ATTRIBUTE_UNUSED, char *buf, size_t size) { return snprintf(buf, size, "0"); } starpu-1.4.9+dfsg/src/datawizard/malloc.c000066400000000000000000001012221507764646700203420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2018,2022 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_SIMGRID #include #include #include #endif #ifdef STARPU_HAVE_HWLOC #include #ifndef HWLOC_API_VERSION #define HWLOC_OBJ_PU HWLOC_OBJ_PROC #endif #if HWLOC_API_VERSION < 0x00010b00 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE #endif #endif #ifndef O_BINARY #define O_BINARY 0 #endif #ifndef MAP_POPULATE #define MAP_POPULATE 0 #endif static size_t _malloc_align = sizeof(void*); static int disable_pinning; static int enable_suballocator; /* This file is used for implementing "folded" allocation */ #ifdef STARPU_SIMGRID static int bogusfile = -1; static unsigned long _starpu_malloc_simulation_fold; /* Table to control unique simulation mallocs */ #include struct unique_shared_alloc { size_t id; int count; void* addr; UT_hash_handle hh; }; static struct unique_shared_alloc* unique_shared_alloc_table = NULL; #endif static starpu_malloc_hook malloc_hook; static starpu_free_hook free_hook; void starpu_malloc_set_hooks(starpu_malloc_hook _malloc_hook, starpu_free_hook _free_hook) { malloc_hook = _malloc_hook; free_hook = _free_hook; } void starpu_malloc_set_align(size_t align) { STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align (%lu) must be a power of two", (unsigned long) align); if (_malloc_align < align) _malloc_align = align; } /* Driver porters: adding your driver here is optional, only needed for pinning host memory. */ #if (defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL) struct malloc_pinned_codelet_struct { void **ptr; size_t dim; }; #endif /* Would be difficult to do it this way, we need to remember the cl_mem to be able to free it later... */ //#ifdef STARPU_USE_OPENCL //static void malloc_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg) //{ // struct malloc_pinned_codelet_struct *s = arg; // // _STARPU_MALLOC(*(s->ptr), s->dim); // starpu_opencl_allocate_memory(devid, (void **)(s->ptr), s->dim, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR); //} //#endif #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) static void malloc_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg) { struct malloc_pinned_codelet_struct *s = arg; cudaError_t cures = cudaErrorMemoryAllocation; #if 0 //defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_MNGMEM) /* FIXME: check if devices actually support cudaMallocManaged or fallback to cudaHostAlloc() */ cures = cudaMallocManaged((void **)(s->ptr), s->dim, cudaMemAttachGlobal); #endif #if defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_CANMAPHOST) if (cures != cudaSuccess) cures = cudaHostAlloc((void **)(s->ptr), s->dim, cudaHostAllocPortable|cudaHostAllocMapped); #endif if (cures != cudaSuccess) cures = cudaHostAlloc((void **)(s->ptr), s->dim, cudaHostAllocPortable); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } #endif #if (defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER)) && !defined(STARPU_SIMGRID)// || defined(STARPU_USE_OPENCL) static struct starpu_perfmodel malloc_pinned_model = { .type = STARPU_HISTORY_BASED, .symbol = "malloc_pinned" }; static struct starpu_codelet malloc_pinned_cl = { .cuda_funcs = {malloc_pinned_cuda_codelet}, //#ifdef STARPU_USE_OPENCL // .opencl_funcs = {malloc_pinned_opencl_codelet}, //#endif .nbuffers = 0, .model = &malloc_pinned_model }; #endif /* Allocation in CPU RAM */ int starpu_malloc_flags(void **A, size_t dim, int flags) { return _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, A, dim, flags); } /* Return whether we should pin the allocated data */ static int _starpu_malloc_should_pin(int flags) { if (flags & STARPU_MALLOC_PINNED && disable_pinning <= 0) { if (_starpu_can_submit_cuda_task()) { return 1; } if (_starpu_can_submit_hip_task()) { return 1; } // if (_starpu_can_submit_opencl_task()) // return 1; } return 0; } int _starpu_malloc_willpin_on_node(unsigned dst_node) { int flags = _starpu_get_node_struct(dst_node)->malloc_on_node_default_flags; return (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0 && (_starpu_can_submit_cuda_task() || _starpu_can_submit_hip_task() /* || _starpu_can_submit_opencl_task() */ )); } int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int flags) { int ret=0; STARPU_ASSERT_MSG(A, "starpu_malloc needs to be passed the address of the pointer to be filled"); if (!starpu_is_initialized()) _STARPU_DISP("Warning: starpu_malloc needs to be called after starpu is initialized, to be able to pin memory for CUDA\n"); if (dim == 0) /* Make sure we succeed */ dim = 1; if (flags & STARPU_MALLOC_COUNT) { if (!(flags & STARPU_MALLOC_NORECLAIM)) while (starpu_memory_allocate(dst_node, dim, flags) != 0) { size_t freed; size_t reclaim = 2 * dim; _STARPU_DEBUG("There is not enough memory left, we are going to reclaim %ld\n", (long)reclaim); _STARPU_TRACE_START_MEMRECLAIM(dst_node,0); freed = _starpu_memory_reclaim_generic(dst_node, 0, reclaim, STARPU_FETCH); _STARPU_TRACE_END_MEMRECLAIM(dst_node,0); if (freed < dim && !(flags & STARPU_MEMORY_WAIT)) { // We could not reclaim enough memory *A = NULL; return -ENOMEM; } } else if (flags & STARPU_MEMORY_WAIT) starpu_memory_allocate(dst_node, dim, flags); else starpu_memory_allocate(dst_node, dim, flags | STARPU_MEMORY_OVERFLOW); } if (malloc_hook) { ret = malloc_hook(dst_node, A, dim, flags); goto end; } /* Note: synchronize this test with _starpu_malloc_willpin_on_node */ if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0) { if (_starpu_can_submit_cuda_task()) { #ifdef STARPU_SIMGRID /* FIXME: CUDA seems to be taking 650µs every 1MiB. * Ideally we would simulate this batching in 1MiB requests * instead of computing an average value. */ if (_starpu_simgrid_cuda_malloc_cost()) starpu_sleep((float) dim * 0.000650 / 1048576.); #else /* STARPU_SIMGRID */ #ifdef STARPU_USE_CUDA #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER cudaError_t cures = cudaErrorMemoryAllocation; #if 0 //defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_MNGMEM) /* FIXME: check if devices actually support cudaMallocManaged or fallback to cudaHostAlloc() */ cures = cudaMallocManaged(A, dim, cudaMemAttachGlobal); #endif #if defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_CANMAPHOST) if (cures != cudaSuccess) cures = cudaHostAlloc(A, dim, cudaHostAllocPortable|cudaHostAllocMapped); #endif if (cures != cudaSuccess) cures = cudaHostAlloc(A, dim, cudaHostAllocPortable); if (STARPU_UNLIKELY(cures)) { STARPU_CUDA_REPORT_ERROR(cures); ret = -ENOMEM; } goto end; #else int push_res; /* Old versions of CUDA are not thread-safe, we have to * run cudaHostAlloc from CUDA workers */ STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "without CUDA peer allocation support, pinned allocation must not be done from task or callback"); struct malloc_pinned_codelet_struct s = { .ptr = A, .dim = dim }; malloc_pinned_cl.where = STARPU_CUDA; struct starpu_task *task = starpu_task_create(); task->name = "cuda_malloc_pinned"; task->callback_func = NULL; task->cl = &malloc_pinned_cl; task->cl_arg = &s; task->type = STARPU_TASK_TYPE_INTERNAL; task->synchronous = 1; _starpu_exclude_task_from_dag(task); push_res = _starpu_task_submit_internally(task); STARPU_ASSERT(push_res != -ENODEV); goto end; #endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ #endif /* STARPU_USE_CUDA */ } if (_starpu_can_submit_hip_task()) { #ifdef STARPU_USE_HIP hipError_t hipres = hipErrorMemoryAllocation; #if 0 //defined(STARPU_USE_HIP_MAP) && defined(STARPU_HAVE_HIP_MNGMEM) /* FIXME: check if devices actually support hipMallocManaged or fallback to hipHostAlloc() */ hipres = hipMallocManaged(A, dim, hipMemAttachGlobal); #endif #if defined(STARPU_USE_HIP_MAP) && defined(STARPU_HAVE_HIP_CANMAPHOST) if (hipres != hipSuccess) hipres = hipHostMalloc(A, dim, hipHostMallocPortable|hipHostMallocMapped); #endif if (hipres != hipSuccess) { hipres = hipHostMalloc(A, dim, hipHostMallocPortable); } if (STARPU_UNLIKELY(hipres != hipSuccess)) { STARPU_HIP_REPORT_ERROR(hipres); ret = -ENOMEM; } goto end; #endif /* STARPU_USE_HIP */ // } // else if (_starpu_can_submit_opencl_task()) // { //#ifdef STARPU_USE_OPENCL // int push_res; // // STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "pinned OpenCL allocation must not be done from task or callback"); // // struct malloc_pinned_codelet_struct s = // { // .ptr = A, // .dim = dim // }; // // malloc_pinned_cl.where = STARPU_OPENCL; // struct starpu_task *task = starpu_task_create(); // task->name = "opencl_malloc_pinned"; // task->callback_func = NULL; // task->cl = &malloc_pinned_cl; // task->cl_arg = &s; // task->synchronous = 1; // task->type = STARPU_TASK_TYPE_INTERNAL; // // _starpu_exclude_task_from_dag(task); // // push_res = _starpu_task_submit_internally(task); // STARPU_ASSERT(push_res != -ENODEV); // goto end; //#endif /* STARPU_USE_OPENCL */ #endif /* STARPU_SIMGRID */ } } #ifdef STARPU_SIMGRID if (flags & STARPU_MALLOC_SIMULATION_FOLDED) { #if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 if(_starpu_simgrid_running_smpi()) { if(flags & STARPU_MALLOC_SIMULATION_UNIQUE) { struct unique_shared_alloc *block; HASH_FIND(hh, unique_shared_alloc_table, &dim, sizeof(dim), block); if(block==NULL) { block = (struct unique_shared_alloc*)malloc(sizeof(struct unique_shared_alloc)); block->addr = SMPI_SHARED_MALLOC(dim); block->count = 1; block->id = dim; HASH_ADD(hh, unique_shared_alloc_table, id, sizeof(dim), block); } else { block->count++; } *A = block->addr; } else { *A = SMPI_SHARED_MALLOC(dim); } } else #endif { /* Use "folded" allocation: the same file is mapped several * times contiguously, to get a memory area one can read/write, * without consuming memory */ /* First reserve memory area */ void *buf = mmap (NULL, dim, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); unsigned i; if (buf == MAP_FAILED) { _STARPU_DISP("Warning: could not allocate %luMiB of memory, you need to run \"sysctl vm.overcommit_memory=1\" as root to allow so big allocations\n", (unsigned long) (dim >> 20)); ret = -ENOMEM; *A = NULL; } else { if (bogusfile == -1) { char *path = starpu_getenv("TMPDIR"); if (!path) path = starpu_getenv("TEMP"); if (!path) path = starpu_getenv("TMP"); if (!path) path = "/tmp"; /* Create bogus file if not done already */ char *name = _starpu_mktemp(path, O_RDWR | O_BINARY, &bogusfile); char *dumb; if (!name) { ret = errno; munmap(buf, dim); *A = NULL; goto end; } unlink(name); free(name); _STARPU_CALLOC(dumb, 1,_starpu_malloc_simulation_fold); write(bogusfile, dumb, _starpu_malloc_simulation_fold); free(dumb); } /* Map the bogus file in place of the anonymous memory */ for (i = 0; i < dim / _starpu_malloc_simulation_fold; i++) { void *pos = (void*) ((unsigned long) buf + i * _starpu_malloc_simulation_fold); void *res = mmap(pos, _starpu_malloc_simulation_fold, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED|MAP_POPULATE, bogusfile, 0); STARPU_ASSERT_MSG(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", strerror(errno)); } if (dim % _starpu_malloc_simulation_fold) { void *pos = (void*) ((unsigned long) buf + i * _starpu_malloc_simulation_fold); void *res = mmap(pos, dim % _starpu_malloc_simulation_fold, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED|MAP_POPULATE, bogusfile, 0); STARPU_ASSERT_MSG(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", strerror(errno)); } *A = buf; } } } #endif #ifdef HAVE_MMAP #ifdef STARPU_USE_MP if(_starpu_can_submit_ms_task()) { *A = _starpu_map_allocate(dim, dst_node); if (!*A) ret = -ENOMEM; else { #ifdef STARPU_HAVE_HWLOC struct _starpu_machine_config *config = _starpu_get_machine_config(); hwloc_topology_t hwtopology = config->topology.hwtopology; hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, starpu_memory_nodes_numa_id_to_hwloclogid(dst_node)); if (numa_node_obj) { hwloc_bitmap_t nodeset = numa_node_obj->nodeset; #if HWLOC_API_VERSION >= 0x00020000 hwloc_set_area_membind(hwtopology, *A, dim, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET | HWLOC_MEMBIND_NOCPUBIND); #else hwloc_set_area_membind_nodeset(hwtopology, *A, dim, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_NOCPUBIND); #endif } #endif } } else #endif #endif #ifdef STARPU_HAVE_HWLOC if (starpu_memory_nodes_get_numa_count() > 1) { struct _starpu_machine_config *config = _starpu_get_machine_config(); hwloc_topology_t hwtopology = config->topology.hwtopology; hwloc_obj_t numa_node_obj = hwloc_get_obj_by_type(hwtopology, HWLOC_OBJ_NUMANODE, starpu_memory_nodes_numa_id_to_hwloclogid(dst_node)); hwloc_bitmap_t nodeset = numa_node_obj->nodeset; #if HWLOC_API_VERSION >= 0x00020000 *A = hwloc_alloc_membind(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET | HWLOC_MEMBIND_NOCPUBIND); #else *A = hwloc_alloc_membind_nodeset(hwtopology, dim, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_NOCPUBIND); #endif //fprintf(stderr, "Allocation %lu bytes on NUMA node %d [%p]\n", (unsigned long) dim, starpu_memnode_get_numaphysid(dst_node), *A); if (!*A) ret = -ENOMEM; } #endif /* STARPU_HAVE_HWLOC */ else #ifdef STARPU_HAVE_POSIX_MEMALIGN if (_malloc_align != sizeof(void*)) { if (posix_memalign(A, _malloc_align, dim)) { ret = -ENOMEM; *A = NULL; } } else #elif defined(STARPU_HAVE_MEMALIGN) if (_malloc_align != sizeof(void*)) { *A = memalign(_malloc_align, dim); if (!*A) ret = -ENOMEM; } else #endif /* STARPU_HAVE_POSIX_MEMALIGN */ { *A = malloc(dim); if (!*A) ret = -ENOMEM; } end: if (ret == 0) { STARPU_ASSERT_MSG(*A, "Failed to allocated memory of size %lu b\n", (unsigned long)dim); } else if (flags & STARPU_MALLOC_COUNT) { starpu_memory_deallocate(dst_node, dim); } return ret; } int starpu_malloc(void **A, size_t dim) { return starpu_malloc_flags(A, dim, STARPU_MALLOC_PINNED); } #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) static void free_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg) { cudaError_t cures; #if 0 //defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_MNGMEM) /* FIXME: check if devices actually support cudaMallocManaged or fallback to cudaHostAlloc() */ cures = cudaFree(arg); #else cures = cudaFreeHost(arg); #endif if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } #endif //#ifdef STARPU_USE_OPENCL //static void free_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg) //{ // // free(arg); // int err = clReleaseMemObject(arg); // if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); //} //#endif #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) // || defined(STARPU_USE_OPENCL) static struct starpu_perfmodel free_pinned_model = { .type = STARPU_HISTORY_BASED, .symbol = "free_pinned" }; static struct starpu_codelet free_pinned_cl = { .cuda_funcs = {free_pinned_cuda_codelet}, //#ifdef STARPU_USE_OPENCL // .opencl_funcs = {free_pinned_opencl_codelet}, //#endif .nbuffers = 0, .model = &free_pinned_model }; #endif int starpu_free_flags(void *A, size_t dim, int flags) { return _starpu_free_flags_on_node(STARPU_MAIN_RAM, A, dim, flags); } int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags) { if (!A) return 0; if (dim == 0) dim = 1; if (free_hook) { free_hook(dst_node, A, dim, flags); goto out; } if (_starpu_malloc_should_pin(flags) && STARPU_RUNNING_ON_VALGRIND == 0) { if (_starpu_can_submit_cuda_task()) { #ifdef STARPU_SIMGRID /* TODO: simulate CUDA barrier */ #else /* !STARPU_SIMGRID */ #ifdef STARPU_USE_CUDA #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER if (!starpu_is_initialized()) { #endif /* This is especially useful when starpu_free is called even * though starpu_shutdown has already * been called, so we will not be able to submit a task. */ cudaError_t cures; #if 0 //defined(STARPU_USE_CUDA_MAP) && defined(STARPU_HAVE_CUDA_MNGMEM) /* FIXME: check if devices actually support cudaMallocManaged or fallback to cudaHostAlloc() */ cures = cudaFree(A); #else cures = cudaFreeHost(A); #endif if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); goto out; #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER } else { int push_res; STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "without CUDA peer allocation support, pinned deallocation must not be done from task or callback"); free_pinned_cl.where = STARPU_CUDA; struct starpu_task *task = starpu_task_create(); task->name = "cuda_free_pinned"; task->callback_func = NULL; task->cl = &free_pinned_cl; task->cl_arg = A; task->synchronous = 1; task->type = STARPU_TASK_TYPE_INTERNAL; _starpu_exclude_task_from_dag(task); push_res = _starpu_task_submit_internally(task); STARPU_ASSERT(push_res != -ENODEV); goto out; } #endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ #endif /* STARPU_USE_CUDA */ } if (_starpu_can_submit_hip_task()) { #ifdef STARPU_USE_HIP /* TODO: submit task */ /* This is especially useful when starpu_free is called even * though starpu_shutdown has already * been called, so we will not be able to submit a task. */ hipError_t hipres; hipres = hipHostFree(A); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); goto out; #endif /* STARPU_USE_HIP */ #endif /* STARPU_SIMGRID */ } // else if (_starpu_can_submit_opencl_task()) // { //#ifdef STARPU_USE_OPENCL // int push_res; // // STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "pinned OpenCL deallocation must not be done from task or callback"); // // free_pinned_cl.where = STARPU_OPENCL; // struct starpu_task *task = starpu_task_create(); // task->name = "opencl_free_pinned"; // task->callback_func = NULL; // task->cl = &free_pinned_cl; // task->cl_arg = A; // task->synchronous = 1; // task->type = STARPU_TASK_TYPE_INTERNAL; // // _starpu_exclude_task_from_dag(task); // // push_res = starpu_task_submit(task); // STARPU_ASSERT(push_res != -ENODEV); // goto out; // } //#endif } #ifdef STARPU_SIMGRID if (flags & STARPU_MALLOC_SIMULATION_FOLDED) { #if SIMGRID_VERSION >= 31500 && SIMGRID_VERSION != 31559 if(_starpu_simgrid_running_smpi()) { if(flags & STARPU_MALLOC_SIMULATION_UNIQUE) { struct unique_shared_alloc *block; HASH_FIND(hh, unique_shared_alloc_table, &dim, sizeof(dim), block); STARPU_ASSERT(block != NULL); block->count--; if(block->count == 0) { SMPI_SHARED_FREE(block->addr); HASH_DEL(unique_shared_alloc_table, block); free(block); } } else { SMPI_SHARED_FREE(A); } } else #endif munmap(A, dim); } #endif #ifdef HAVE_MMAP #ifdef STARPU_USE_MP else if(_starpu_can_submit_ms_task()) { _starpu_map_deallocate(A, dim); } #endif #endif #ifdef STARPU_HAVE_HWLOC else if (starpu_memory_nodes_get_numa_count() > 1) { struct _starpu_machine_config *config = _starpu_get_machine_config(); hwloc_topology_t hwtopology = config->topology.hwtopology; hwloc_free(hwtopology, A, dim); } #endif /* STARPU_HAVE_HWLOC */ else free(A); out: if (flags & STARPU_MALLOC_COUNT) { starpu_memory_deallocate(dst_node, dim); } return 0; } int starpu_free(void *A) { return starpu_free_flags(A, 0, STARPU_MALLOC_PINNED); } int starpu_free_noflag(void *A, size_t dim) { return starpu_free_flags(A, dim, STARPU_MALLOC_PINNED); } static uintptr_t _starpu_malloc_on_node(unsigned dst_node, size_t size, int flags) { uintptr_t addr = 0; if (size == 0) size = 1; /* Handle count first */ if (flags & STARPU_MALLOC_COUNT) { if (starpu_memory_allocate(dst_node, size, flags) != 0) return 0; /* And prevent double-count in starpu_malloc_flags */ flags &= ~STARPU_MALLOC_COUNT; } const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(dst_node); if (node_ops && node_ops->malloc_on_node) addr = node_ops->malloc_on_node(dst_node, size, flags & ~STARPU_MALLOC_COUNT); else STARPU_ABORT_MSG("No malloc_on_node function defined for node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); if (addr == 0) { // Allocation failed, gives the memory back to the memory manager _STARPU_TRACE_MEMORY_FULL(size); if (flags & STARPU_MALLOC_COUNT) starpu_memory_deallocate(dst_node, size); } return addr; } void _starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags) { int count = flags & STARPU_MALLOC_COUNT; flags &= ~STARPU_MALLOC_COUNT; if (size == 0) size = 1; const struct _starpu_node_ops *node_ops = _starpu_memory_node_get_node_ops(dst_node); if (node_ops && node_ops->free_on_node) node_ops->free_on_node(dst_node, addr, size, flags); else STARPU_ABORT_MSG("No free_on_node function defined for node %s\n", _starpu_node_get_prefix(starpu_node_get_kind(dst_node))); if (count) starpu_memory_deallocate(dst_node, size); } int starpu_memory_pin(void *addr STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED) { if (STARPU_MALLOC_PINNED && disable_pinning <= 0 && STARPU_RUNNING_ON_VALGRIND == 0) { #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) if (cudaHostRegister(addr, size, cudaHostRegisterPortable) != cudaSuccess) return -1; #endif #if defined(STARPU_USE_HIP) if (hipHostRegister(addr, size, hipHostRegisterPortable) != hipSuccess) return -1; #endif } return 0; } int starpu_memory_unpin(void *addr STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED) { if (STARPU_MALLOC_PINNED && disable_pinning <= 0 && STARPU_RUNNING_ON_VALGRIND == 0) { #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_CUDA_MEMCPY_PEER) if (cudaHostUnregister(addr) != cudaSuccess) return -1; #endif #if defined(STARPU_USE_HIP) if (hipHostUnregister(addr) != hipSuccess) return -1; #endif } return 0; } void _starpu_malloc_init(unsigned dst_node) { struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); _starpu_chunk_list_init(&node_struct->chunks); node_struct->nfreechunks = 0; STARPU_PTHREAD_MUTEX_INIT(&node_struct->chunk_mutex, NULL); disable_pinning = starpu_getenv_number("STARPU_DISABLE_PINNING"); enable_suballocator = starpu_getenv_number_default("STARPU_SUBALLOCATOR", 1); node_struct->malloc_on_node_default_flags = STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT; #ifdef STARPU_SIMGRID /* Reasonably "costless" */ _starpu_malloc_simulation_fold = starpu_getenv_number_default("STARPU_MALLOC_SIMULATION_FOLD", 1) << 20; #endif } void _starpu_malloc_shutdown(unsigned dst_node) { struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); struct _starpu_chunk *chunk, *next_chunk; STARPU_PTHREAD_MUTEX_LOCK(&node_struct->chunk_mutex); for (chunk = _starpu_chunk_list_begin(&node_struct->chunks); chunk != _starpu_chunk_list_end(&node_struct->chunks); chunk = next_chunk) { next_chunk = _starpu_chunk_list_next(chunk); _starpu_free_on_node_flags(dst_node, chunk->base, CHUNK_SIZE, node_struct->malloc_on_node_default_flags); _starpu_chunk_list_erase(&node_struct->chunks, chunk); free(chunk); } STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->chunk_mutex); STARPU_PTHREAD_MUTEX_DESTROY(&node_struct->chunk_mutex); } /* Create a new chunk */ static struct _starpu_chunk *_starpu_new_chunk(unsigned dst_node, int flags) { struct _starpu_chunk *chunk; uintptr_t base = _starpu_malloc_on_node(dst_node, CHUNK_SIZE, flags); if (!base) return NULL; /* Create a new chunk */ chunk = _starpu_chunk_new(); chunk->base = base; /* First block is just a fake block pointing to the free segments list */ chunk->bitmap[0].length = 0; chunk->bitmap[0].next = 1; /* At first we have only one big segment for the whole chunk */ chunk->bitmap[1].length = CHUNK_NBLOCKS; chunk->bitmap[1].next = -1; chunk->available_max = CHUNK_NBLOCKS; chunk->available = CHUNK_NBLOCKS; return chunk; } /* Return whether we should use our suballocator */ static int _starpu_malloc_should_suballoc(unsigned dst_node, size_t size, int flags) { return (enable_suballocator && (size <= CHUNK_ALLOC_MAX && (starpu_node_get_kind(dst_node) == STARPU_CUDA_RAM || (starpu_node_get_kind(dst_node) == STARPU_CPU_RAM && _starpu_malloc_should_pin(flags)) ))) || starpu_node_get_kind(dst_node) == STARPU_MAX_FPGA_RAM; } uintptr_t starpu_malloc_on_node_flags(unsigned dst_node, size_t size, int flags) { /* Big allocation, allocate normally */ if (!_starpu_malloc_should_suballoc(dst_node, size, flags)) return _starpu_malloc_on_node(dst_node, size, flags); struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); /* Round up allocation to block size */ int nblocks = (size + CHUNK_ALLOC_MIN - 1) / CHUNK_ALLOC_MIN; if (!nblocks) nblocks = 1; struct _starpu_chunk *chunk; int prevblock, block; int available_max; struct block *bitmap; STARPU_PTHREAD_MUTEX_LOCK(&node_struct->chunk_mutex); /* Try to find a big enough segment among the chunks */ for (chunk = _starpu_chunk_list_begin(&node_struct->chunks); chunk != _starpu_chunk_list_end(&node_struct->chunks); chunk = _starpu_chunk_list_next(chunk)) { if (chunk->available_max < nblocks) continue; bitmap = chunk->bitmap; available_max = 0; for (prevblock = block = 0; block != -1; prevblock = block, block = bitmap[prevblock].next) { STARPU_ASSERT(block >= 0 && block <= CHUNK_NBLOCKS); int length = bitmap[block].length; if (length >= nblocks) { if (length >= 2*nblocks) { /* This one this has quite some room, * put it front, to make finding it * easier next time. */ _starpu_chunk_list_erase(&node_struct->chunks, chunk); _starpu_chunk_list_push_front(&node_struct->chunks, chunk); } if (chunk->available == CHUNK_NBLOCKS) /* This one was empty, it's not empty any more */ node_struct->nfreechunks--; goto found; } if (length > available_max) available_max = length; } /* Didn't find a big enough segment in this chunk, its * available_max is out of date */ chunk->available_max = available_max; } /* Didn't find a big enough segment, create another chunk. */ chunk = _starpu_new_chunk(dst_node, flags); if (!chunk) { /* Really no memory any more, fail */ STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->chunk_mutex); errno = ENOMEM; return 0; } /* And make it easy to find. */ _starpu_chunk_list_push_front(&node_struct->chunks, chunk); bitmap = chunk->bitmap; prevblock = 0; block = 1; found: chunk->available -= nblocks; STARPU_ASSERT(bitmap[block].length >= nblocks); STARPU_ASSERT(block <= CHUNK_NBLOCKS); if (bitmap[block].length == nblocks) { /* Fits exactly, drop this segment from the skip list */ bitmap[prevblock].next = bitmap[block].next; } else { /* Still some room */ STARPU_ASSERT(block + nblocks <= CHUNK_NBLOCKS); bitmap[prevblock].next = block + nblocks; bitmap[block + nblocks].length = bitmap[block].length - nblocks; bitmap[block + nblocks].next = bitmap[block].next; } STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->chunk_mutex); return chunk->base + (block-1) * CHUNK_ALLOC_MIN; } void starpu_free_on_node_flags(unsigned dst_node, uintptr_t addr, size_t size, int flags) { /* Big allocation, deallocate normally */ if (!_starpu_malloc_should_suballoc(dst_node, size, flags)) { _starpu_free_on_node_flags(dst_node, addr, size, flags); return; } struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); struct _starpu_chunk *chunk; /* Round up allocation to block size */ int nblocks = (size + CHUNK_ALLOC_MIN - 1) / CHUNK_ALLOC_MIN; if (!nblocks) nblocks = 1; STARPU_PTHREAD_MUTEX_LOCK(&node_struct->chunk_mutex); for (chunk = _starpu_chunk_list_begin(&node_struct->chunks); chunk != _starpu_chunk_list_end(&node_struct->chunks); chunk = _starpu_chunk_list_next(chunk)) if (addr >= chunk->base && addr < chunk->base + CHUNK_SIZE) break; STARPU_ASSERT(chunk != _starpu_chunk_list_end(&node_struct->chunks)); struct block *bitmap = chunk->bitmap; int block = ((addr - chunk->base) / CHUNK_ALLOC_MIN) + 1, prevblock, nextblock; /* Look for free segment just before this one */ for (prevblock = 0; prevblock != -1; prevblock = nextblock) { STARPU_ASSERT(prevblock >= 0 && prevblock <= CHUNK_NBLOCKS); nextblock = bitmap[prevblock].next; STARPU_ASSERT_MSG(nextblock != block, "It seems data 0x%lx (size %u) on node %u is being freed a second time\n", (unsigned long) addr, (unsigned) size, dst_node); if (nextblock > block || nextblock == -1) break; } STARPU_ASSERT(prevblock != -1); chunk->available += nblocks; /* Insert in free segments list */ bitmap[block].next = nextblock; bitmap[prevblock].next = block; bitmap[block].length = nblocks; STARPU_ASSERT(nextblock >= -1 && nextblock <= CHUNK_NBLOCKS); if (nextblock == block + nblocks) { /* This freed segment is just before a free segment, merge them */ bitmap[block].next = bitmap[nextblock].next; bitmap[block].length += bitmap[nextblock].length; if (bitmap[block].length > chunk->available_max) chunk->available_max = bitmap[block].length; } if (prevblock > 0 && prevblock + bitmap[prevblock].length == block) { /* This free segment is just after a free segment, merge them */ bitmap[prevblock].next = bitmap[block].next; bitmap[prevblock].length += bitmap[block].length; if (bitmap[prevblock].length > chunk->available_max) chunk->available_max = bitmap[prevblock].length; block = prevblock; } if (chunk->available == CHUNK_NBLOCKS) { /* This chunk is now empty, but avoid chunk free/alloc * ping-pong by keeping some of these. */ if (node_struct->nfreechunks >= CHUNKS_NFREE && starpu_node_get_kind(dst_node) != STARPU_MAX_FPGA_RAM) { /* We already have free chunks, release this one */ _starpu_free_on_node_flags(dst_node, chunk->base, CHUNK_SIZE, flags); _starpu_chunk_list_erase(&node_struct->chunks, chunk); free(chunk); } else node_struct->nfreechunks++; } else { /* Freed some room, put this first in chunks list */ _starpu_chunk_list_erase(&node_struct->chunks, chunk); _starpu_chunk_list_push_front(&node_struct->chunks, chunk); } STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->chunk_mutex); } void starpu_malloc_on_node_set_default_flags(unsigned node, int flags) { STARPU_ASSERT_MSG(node < STARPU_MAXNODES, "bogus node value %u given to starpu_malloc_on_node_set_default_flags\n", node); _starpu_get_node_struct(node)->malloc_on_node_default_flags = flags; } uintptr_t starpu_malloc_on_node(unsigned dst_node, size_t size) { return starpu_malloc_on_node_flags(dst_node, size, _starpu_get_node_struct(dst_node)->malloc_on_node_default_flags); } void starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size) { starpu_free_on_node_flags(dst_node, addr, size, _starpu_get_node_struct(dst_node)->malloc_on_node_default_flags); } starpu-1.4.9+dfsg/src/datawizard/malloc.h000066400000000000000000000061521507764646700203550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022-2022 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __ALLOC_H__ #define __ALLOC_H__ #include #include #pragma GCC visibility push(hidden) /** @file */ void _starpu_malloc_init(unsigned dst_node); void _starpu_malloc_shutdown(unsigned dst_node); int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int flags); int _starpu_free_flags_on_node(unsigned dst_node, void *A, size_t dim, int flags); /** * Returns whether when allocating data on \p dst_node, we will do pinning, i.e. * the allocation will be very expensive, and should thus be moved out from the * critical path */ int _starpu_malloc_willpin_on_node(unsigned dst_node) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** * On CUDA which has very expensive malloc, for small sizes, allocate big * chunks divided in blocks, and we actually allocate segments of consecutive * blocks. * * We try to keep the list of chunks with increasing occupancy, so we can * quickly find free segments to allocate. */ #ifdef STARPU_USE_MAX_FPGA // FIXME: Maxeler FPGAs want 192 byte alignment #define CHUNK_SIZE (128*1024*192) #define CHUNK_ALLOC_MAX (CHUNK_SIZE / 8) #define CHUNK_ALLOC_MIN (128*192) #else /* Size of each chunk, 32MiB granularity brings 128 chunks to be allocated in * order to fill a 4GiB GPU. */ #define CHUNK_SIZE (32*1024*1024) /* Maximum segment size we will allocate in chunks */ #define CHUNK_ALLOC_MAX (CHUNK_SIZE / 8) /* Granularity of allocation, i.e. block size, StarPU will never allocate less * than this. * 16KiB (i.e. 64x64 float) granularity eats 2MiB RAM for managing a 4GiB GPU. */ #define CHUNK_ALLOC_MIN (16*1024) #endif /* Don't really deallocate chunks unless we have more than this many chunks * which are completely free. */ #define CHUNKS_NFREE 4 /* Number of blocks */ #define CHUNK_NBLOCKS (CHUNK_SIZE/CHUNK_ALLOC_MIN) /* Linked list for available segments */ struct block { int length; /* Number of consecutive free blocks */ int next; /* next free segment */ }; /* One chunk */ LIST_TYPE(_starpu_chunk, uintptr_t base; /* Available number of blocks, for debugging */ int available; /* Overestimation of the maximum size of available segments in this chunk */ int available_max; /* Bitmap describing availability of the block */ /* Block 0 is always empty, and is just the head of the free segments list */ struct block bitmap[CHUNK_NBLOCKS+1]; ) #pragma GCC visibility pop #endif starpu-1.4.9+dfsg/src/datawizard/memalloc.c000066400000000000000000001643531507764646700207020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2018,2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include /* When reclaiming memory to allocate, we reclaim data_size_coefficient*data_size */ const unsigned starpu_memstrategy_data_size_coefficient=2; /* Minimum percentage of available memory in each node */ static unsigned minimum_p; static unsigned target_p; /* Minimum percentage of number of clean buffer in each node */ static unsigned minimum_clean_p; static unsigned target_clean_p; /* Whether CPU memory has been explicitly limited by user */ static int limit_cpu_mem; /* TODO: no home doesn't mean always clean, should push to larger memory nodes */ #define MC_LIST_PUSH_BACK(node_struct, mc) do { \ _starpu_mem_chunk_list_push_back(&node_struct->mc_list, mc); \ if ((mc)->clean || (mc)->home) \ /* This is clean */ \ node_struct->mc_clean_nb++; \ else if (!node_struct->mc_dirty_head) \ /* This is the only dirty element for now */ \ node_struct->mc_dirty_head = mc; \ node_struct->mc_nb++; \ } while(0) /* Put new clean mc at the end of the clean part of mc_list, i.e. just before mc_dirty_head (if any) */ #define MC_LIST_PUSH_CLEAN(node_struct, mc) do { \ if (node_struct->mc_dirty_head) \ _starpu_mem_chunk_list_insert_before(&node_struct->mc_list, mc, node_struct->mc_dirty_head); \ else \ _starpu_mem_chunk_list_push_back(&node_struct->mc_list, mc); \ /* This is clean */ \ node_struct->mc_clean_nb++; \ node_struct->mc_nb++; \ } while (0) #define MC_LIST_ERASE(node_struct, mc) do { \ if ((mc)->clean || (mc)->home) \ node_struct->mc_clean_nb--; /* One clean element less */ \ if ((mc) == node_struct->mc_dirty_head) \ /* This was the dirty head */ \ node_struct->mc_dirty_head = _starpu_mem_chunk_list_next((mc)); \ /* One element less */ \ node_struct->mc_nb--; \ /* Remove element */ \ _starpu_mem_chunk_list_erase(&node_struct->mc_list, (mc)); \ /* Notify whoever asked for it */ \ if ((mc)->remove_notify) \ { \ *((mc)->remove_notify) = NULL; \ (mc)->remove_notify = NULL; \ } \ } while (0) /* Explicitly caches memory chunks that can be reused */ struct mc_cache_entry { UT_hash_handle hh; struct _starpu_mem_chunk_list list; uint32_t footprint; }; int _starpu_is_reclaiming(unsigned node) { struct _starpu_node *node_struct = _starpu_get_node_struct(node); return node_struct->tidying || node_struct->reclaiming; } static int can_evict(unsigned node) { return _starpu_get_node_struct(node)->evictable; } /* Called after initializing the set of memory nodes */ /* We use an accelerator -> CPU RAM -> disk storage hierarchy */ void _starpu_mem_chunk_init_last(void) { unsigned disk = 0; unsigned nnodes = starpu_memory_nodes_get_count(), i; for (i = 0; i < nnodes; i++) { enum starpu_node_kind kind = starpu_node_get_kind(i); struct _starpu_node *node_struct = _starpu_get_node_struct(i); if (kind == STARPU_DISK_RAM) /* Some disk, will be able to evict RAM */ /* TODO: disk hierarchy */ disk = 1; else if (kind != STARPU_CPU_RAM) /* This is an accelerator, we can evict to main RAM */ node_struct->evictable = 1; } if (disk) for (i = 0; i < nnodes; i++) { enum starpu_node_kind kind = starpu_node_get_kind(i); if (kind == STARPU_CPU_RAM) _starpu_get_node_struct(i)->evictable = 1; } } /* A disk was registered, RAM is now evictable */ void _starpu_mem_chunk_disk_register(unsigned disk_memnode) { (void) disk_memnode; unsigned nnodes = starpu_memory_nodes_get_count(), i; for (i = 0; i < nnodes; i++) { enum starpu_node_kind kind = starpu_node_get_kind(i); if (kind == STARPU_CPU_RAM) { struct _starpu_node *node_struct = _starpu_get_node_struct(i); STARPU_HG_DISABLE_CHECKING(node_struct->evictable); node_struct->evictable = 1; } } } static int get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node); static int choose_target(starpu_data_handle_t handle, unsigned node); void _starpu_init_mem_chunk_lists(void) { unsigned i; for (i = 0; i < STARPU_MAXNODES; i++) { struct _starpu_node *node = _starpu_get_node_struct(i); _starpu_spin_init(&node->mc_lock); _starpu_mem_chunk_list_init(&node->mc_list); STARPU_HG_DISABLE_CHECKING(node->mc_cache_size); STARPU_HG_DISABLE_CHECKING(node->mc_nb); STARPU_HG_DISABLE_CHECKING(node->mc_clean_nb); STARPU_HG_DISABLE_CHECKING(node->prefetch_out_of_memory); } /* We do not enable forcing available memory by default, since this makes StarPU spuriously free data when prefetching fills the memory. Clean buffers should be enough to be able to allocate data easily anyway. */ minimum_p = starpu_getenv_number_default("STARPU_MINIMUM_AVAILABLE_MEM", 0); target_p = starpu_getenv_number_default("STARPU_TARGET_AVAILABLE_MEM", 0); minimum_clean_p = starpu_getenv_number_default("STARPU_MINIMUM_CLEAN_BUFFERS", 5); target_clean_p = starpu_getenv_number_default("STARPU_TARGET_CLEAN_BUFFERS", 10); limit_cpu_mem = starpu_getenv_number("STARPU_LIMIT_CPU_MEM"); } void _starpu_deinit_mem_chunk_lists(void) { unsigned i; for (i = 0; i < STARPU_MAXNODES; i++) { struct _starpu_node *node = _starpu_get_node_struct(i); struct mc_cache_entry *entry=NULL, *tmp=NULL; STARPU_ASSERT(node->mc_nb == 0); STARPU_ASSERT(node->mc_clean_nb == 0); STARPU_ASSERT(node->mc_dirty_head == NULL); HASH_ITER(hh, node->mc_cache, entry, tmp) { STARPU_ASSERT(_starpu_mem_chunk_list_empty(&entry->list)); HASH_DEL(node->mc_cache, entry); free(entry); } STARPU_ASSERT(node->mc_cache_nb == 0); STARPU_ASSERT(node->mc_cache_size == 0); _starpu_spin_destroy(&node->mc_lock); } } /* * Manipulate subtrees */ static void unlock_all_subtree(starpu_data_handle_t handle) { /* lock all sub-subtrees children * Note that this is done in the reverse order of the * lock_all_subtree so that we avoid deadlock */ unsigned i; for (i =0; i < handle->nchildren; i++) { unsigned child = handle->nchildren - 1 - i; starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); unlock_all_subtree(child_handle); } _starpu_spin_unlock(&handle->header_lock); } static int lock_all_subtree(starpu_data_handle_t handle) { int child; /* lock parent */ if (_starpu_spin_trylock(&handle->header_lock)) /* the handle is busy, abort */ return 0; /* lock all sub-subtrees children */ for (child = 0; child < (int) handle->nchildren; child++) { if (!lock_all_subtree(starpu_data_get_child(handle, child))) { /* Some child is busy, abort */ while (--child >= 0) /* Unlock what we have already uselessly locked */ unlock_all_subtree(starpu_data_get_child(handle, child)); return 0; } } return 1; } static unsigned may_free_handle(starpu_data_handle_t handle, unsigned node) { STARPU_ASSERT(handle->per_node[node].mapped == STARPU_UNMAPPED); /* we only free if no one refers to the leaf */ uint32_t refcnt = _starpu_get_data_refcnt(handle, node); if (refcnt) return 0; if (handle->current_mode == STARPU_W) { if (handle->write_invalidation_req) /* Some request is invalidating it anyway */ return 0; unsigned n; for (n = 0; n < STARPU_MAXNODES; n++) if (_starpu_get_data_refcnt(handle, n)) /* Some task is writing to the handle somewhere */ return 0; } /* no problem was found */ return 1; } static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node) { if (!may_free_handle(handle, node)) return 0; /* look into all sub-subtrees children */ unsigned child; for (child = 0; child < handle->nchildren; child++) { unsigned res; starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); res = may_free_subtree(child_handle, node); if (!res) return 0; } /* no problem was found */ return 1; } /* Warn: this releases the header lock of the handle during the transfer * The handle may thus unexpectedly disappear. This returns 1 in that case. */ static int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node, unsigned dst_node) { STARPU_ASSERT(dst_node != src_node); if (handle->nchildren == 0) { struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node]; struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node]; STARPU_ASSERT(src_replicate->mapped == STARPU_UNMAPPED); STARPU_ASSERT(dst_replicate->mapped == STARPU_UNMAPPED); /* this is a leaf */ while (src_replicate->state == STARPU_OWNER) { /* This is the only copy, push it to destination */ struct _starpu_data_request *r; r = _starpu_create_request_to_fetch_data(handle, dst_replicate, STARPU_R, NULL, STARPU_FETCH, 0, NULL, NULL, 0, "transfer_subtree_to_node"); /* There is no way we don't need a request, since * source is OWNER, destination can't be having it */ STARPU_ASSERT(r); /* Keep the handle alive while we are working on it */ handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); _starpu_wait_data_request_completion(r, 1); _starpu_spin_lock(&handle->header_lock); handle->busy_count--; if (_starpu_data_check_not_busy(handle)) /* Actually disappeared, abort completely */ return -1; if (!may_free_subtree(handle, src_node)) /* Oops, while we released the header lock, a * task got in, abort. */ return 0; } STARPU_ASSERT(may_free_subtree(handle, src_node)); if (src_replicate->state == STARPU_SHARED) { unsigned i; unsigned last = 0; unsigned cnt = 0; /* some other node may have the copy */ if (src_replicate->state != STARPU_INVALID) _STARPU_TRACE_DATA_STATE_INVALID(handle, src_node); src_replicate->state = STARPU_INVALID; /* count the number of copies */ for (i = 0; i < STARPU_MAXNODES; i++) { if (handle->per_node[i].state == STARPU_SHARED) { cnt++; last = i; } } STARPU_ASSERT(cnt > 0); if (cnt == 1) { if (handle->per_node[last].state != STARPU_OWNER) _STARPU_TRACE_DATA_STATE_OWNER(handle, last); handle->per_node[last].state = STARPU_OWNER; } } else STARPU_ASSERT(src_replicate->state == STARPU_INVALID); /* Already dropped by somebody, in which case there is nothing to be done */ } else { /* transfer all sub-subtrees children */ unsigned child; for (child = 0; child < handle->nchildren; child++) { starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); int res = transfer_subtree_to_node(child_handle, src_node, dst_node); if (res == 0) return 0; /* There is no way children have disappeared since we * keep the parent lock held */ STARPU_ASSERT(res != -1); } } /* Success! */ return 1; } static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node) { unsigned child; replicate->allocated = 0; /* XXX why do we need that ? */ replicate->automatically_allocated = 0; for (child = 0; child < handle->nchildren; child++) { /* Notify children that their buffer has been deallocated too */ starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); notify_handle_children(child_handle, &child_handle->per_node[node], node); } } static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node) { size_t freed = 0; STARPU_ASSERT(mc->ops); STARPU_ASSERT(mc->ops->free_data_on_node); starpu_data_handle_t handle = mc->data; struct _starpu_data_replicate *replicate = mc->replicate; if (handle) _starpu_spin_checklocked(&handle->header_lock); if (mc->automatically_allocated && (!handle || replicate->refcnt == 0)) { void *data_interface; if (handle) { STARPU_ASSERT(replicate->allocated); STARPU_ASSERT(replicate->mapped == STARPU_UNMAPPED); } if (handle) data_interface = replicate->data_interface; else data_interface = mc->chunk_interface; STARPU_ASSERT(data_interface); _STARPU_TRACE_START_FREE(node, mc->size, handle); mc->ops->free_data_on_node(data_interface, node); _STARPU_TRACE_END_FREE(node, handle); if (handle) notify_handle_children(handle, replicate, node); freed = mc->size; if (handle) STARPU_ASSERT(replicate->refcnt == 0); } return freed; } /* mc_lock is held */ static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node) { size_t size; starpu_data_handle_t handle = mc->data; if (handle) { _starpu_spin_checklocked(&handle->header_lock); mc->size = _starpu_data_get_alloc_size(handle); mc->replicate->mc=NULL; } /* free the actual buffer */ size = free_memory_on_node(mc, node); /* remove the mem_chunk from the list */ MC_LIST_ERASE(_starpu_get_node_struct(node), mc); _starpu_mem_chunk_delete(mc); #ifdef STARPU_SIMGRID starpu_pthread_queue_broadcast(&_starpu_simgrid_transfer_queue[node]); #endif return size; } /* We assume that node->mc_lock is taken. is_already_in_mc_list indicates * that the mc is already in the list of buffers that are possibly used, and * therefore not in the cache. */ static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list) { void *data_interface; /* we found an appropriate mem chunk: so we get it out * of the "to free" list, and reassign it to the new * piece of data */ struct _starpu_data_replicate *old_replicate = mc->replicate; if (old_replicate) { old_replicate->mc = NULL; old_replicate->allocated = 0; old_replicate->automatically_allocated = 0; old_replicate->initialized = 0; data_interface = old_replicate->data_interface; } else data_interface = mc->chunk_interface; STARPU_ASSERT(new_replicate->data_interface); STARPU_ASSERT(data_interface); if (mc->ops->reuse_data_on_node) mc->ops->reuse_data_on_node(new_replicate->data_interface, data_interface, node); else memcpy(new_replicate->data_interface, data_interface, mc->size_interface); if (!old_replicate) { /* Free the copy that we made */ free(mc->chunk_interface); mc->chunk_interface = NULL; } /* XXX: We do not actually reuse the mc at the moment, only the interface */ /* mc->data = new_replicate->handle; */ /* mc->footprint, mc->ops, mc->size_interface, * mc->automatically_allocated should be unchanged ! */ /* remove the mem chunk from the list of active memory chunks, register_mem_chunk will put it back later */ if (is_already_in_mc_list) MC_LIST_ERASE(_starpu_get_node_struct(node), mc); free(mc); } int starpu_data_can_evict(starpu_data_handle_t handle, unsigned node, enum starpu_is_prefetch is_prefetch) { STARPU_ASSERT(node < STARPU_MAXNODES); /* This data should be written through to this node, avoid dropping it! */ if (node < sizeof(handle->wt_mask) * 8 && handle->wt_mask & (1<home_node) return 0; unsigned mapnode; for (mapnode = 0; mapnode < STARPU_MAXNODES; mapnode++) if (handle->per_node[mapnode].mapped == (int) node) /* This is mapped, we can't evict it */ /* TODO: rather check if that can be evicted as well, and if so unmap it before evicting this */ return 0; /* This data cannot be pushed outside CPU memory */ if (!handle->ooc && handle->home_node == -1 && starpu_node_get_kind(node) == STARPU_CPU_RAM && starpu_memory_nodes_get_numa_count() == 1) return 0; if (is_prefetch >= STARPU_TASK_PREFETCH && handle->per_node[node].nb_tasks_prefetch) /* We have not finished executing the tasks this was prefetched for */ return 0; if (!may_free_handle(handle, node)) /* Somebody refers to it */ return 0; return 1; } /* This function is called for memory chunks that are possibly in used (ie. not * in the cache). They should therefore still be associated to a handle. */ /* mc_lock is held and may be temporarily released! */ static size_t try_to_throw_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list, enum starpu_is_prefetch is_prefetch) { size_t freed = 0; starpu_data_handle_t handle; handle = mc->data; STARPU_ASSERT(handle); if (!starpu_data_can_evict(handle, node, is_prefetch)) return 0; /* REDUX memchunk */ if (mc->relaxed_coherency == 2) { /* TODO: reduce it back to e.g. main memory */ } else /* Either it's a "relaxed coherency" memchunk (SCRATCH), or it's a * memchunk that could be used with filters. */ if (mc->relaxed_coherency == 1) { if (_starpu_spin_trylock(&handle->header_lock)) /* Handle is busy, abort */ return 0; if (!mc->replicate) { /* _starpu_request_mem_chunk_removal removed it before us */ _starpu_spin_unlock(&handle->header_lock); return 0; } if (mc->replicate->refcnt == 0) { /* Note that there is no need to transfer any data or * to update the status in terms of MSI protocol * because this memchunk is associated to a replicate * in "relaxed coherency" mode. */ if (replicate) { /* Reuse for this replicate */ reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list); freed = 1; } else { /* Free */ freed = do_free_mem_chunk(mc, node); } } _starpu_spin_unlock(&handle->header_lock); } else if (lock_all_subtree(handle)) /* try to lock all the subtree */ { if (!(replicate && handle->per_node[node].state == STARPU_OWNER)) { /* check if they are all "free" */ if (may_free_subtree(handle, node)) { int target = -1; /* XXX Considering only owner to invalidate */ STARPU_ASSERT(handle->per_node[node].refcnt == 0); /* in case there was nobody using that buffer, throw it * away after writing it back to main memory */ /* choose the best target */ target = choose_target(handle, node); if (target != -1 && /* Only reuse memchunks which are easy to throw * away (which is likely thanks to periodic tidying). * If there are none, we prefer to let generic eviction * perhaps find other kinds of memchunks which will be * earlier in LRU, and easier to throw away. */ !(replicate && handle->per_node[node].state == STARPU_OWNER)) { int res; /* Should have been avoided in our caller */ STARPU_ASSERT(!mc->remove_notify); mc->remove_notify = &mc; _starpu_spin_unlock(&_starpu_get_node_struct(node)->mc_lock); #ifdef STARPU_MEMORY_STATS if (handle->per_node[node].state == STARPU_OWNER) _starpu_memory_handle_stats_invalidated(handle, node); #endif _STARPU_TRACE_START_WRITEBACK(node, handle); /* Note: this may need to allocate data etc. * and thus release the header lock, take * mc_lock, etc. */ res = transfer_subtree_to_node(handle, node, target); _STARPU_TRACE_END_WRITEBACK(node, handle); #ifdef STARPU_MEMORY_STATS _starpu_memory_handle_stats_loaded_owner(handle, target); #endif _starpu_spin_lock(&_starpu_get_node_struct(node)->mc_lock); if (!mc) { if (res == -1) { /* handle disappeared, abort without unlocking it */ return 0; } } else { STARPU_ASSERT(mc->remove_notify == &mc); mc->remove_notify = NULL; if (res == -1) { /* handle disappeared, abort without unlocking it */ return 0; } if (res == 1) { /* mc is still associated with the old * handle, now free it. */ if (handle->per_node[node].refcnt == 0) { /* And still nobody on it, now the actual buffer may be reused or freed */ if (replicate) { /* Reuse for this replicate */ reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list); freed = 1; } else { /* Free */ freed = do_free_mem_chunk(mc, node); } } } } } } } /* unlock the tree */ unlock_all_subtree(handle); } return freed; } static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a, void *data_interface_b, struct starpu_data_interface_ops *ops_b) { if (ops_a->interfaceid != ops_b->interfaceid) return -1; if (ops_a->dontcache || ops_b->dontcache) return -1; int ret; if (ops_a->alloc_compare) ret = ops_a->alloc_compare(data_interface_a, data_interface_b); else { STARPU_ASSERT_MSG(ops_a->compare, "the interface '%s' does define neither alloc_compare nor compare method", ops_a->name); ret = ops_a->compare(data_interface_a, data_interface_b); } return ret; } #ifdef STARPU_USE_ALLOCATION_CACHE /* This function must be called with node->mc_lock taken */ static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle, uint32_t footprint) { /* go through all buffers in the cache */ struct mc_cache_entry *entry; struct _starpu_node *node_struct = _starpu_get_node_struct(node); HASH_FIND(hh, node_struct->mc_cache, &footprint, sizeof(footprint), entry); if (!entry) /* No data with that footprint */ return NULL; struct _starpu_mem_chunk *mc; for (mc = _starpu_mem_chunk_list_begin(&entry->list); mc != _starpu_mem_chunk_list_end(&entry->list); mc = _starpu_mem_chunk_list_next(mc)) { /* Is that a false hit ? (this is _very_ unlikely) */ if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops) != 1) continue; /* Cache hit */ /* Remove from the cache */ _starpu_mem_chunk_list_erase(&entry->list, mc); node_struct->mc_cache_nb--; STARPU_ASSERT_MSG(node_struct->mc_cache_nb >= 0, "allocation cache for node %u has %d objects??", node, node_struct->mc_cache_nb); node_struct->mc_cache_size -= mc->size; STARPU_ASSERT_MSG(node_struct->mc_cache_size >= 0, "allocation cache for node %u has %ld bytes??", node, (long) node_struct->mc_cache_size); return mc; } /* This is a cache miss */ return NULL; } /* this function looks for a memory chunk that matches a given footprint in the * list of mem chunk that need to be freed. */ static int try_to_find_reusable_mc(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint) { struct _starpu_mem_chunk *mc; int success = 0; _starpu_spin_lock(&_starpu_get_node_struct(node)->mc_lock); /* go through all buffers in the cache */ mc = _starpu_memchunk_cache_lookup_locked(node, data, footprint); if (mc) { /* We found an entry in the cache so we can reuse it */ reuse_mem_chunk(node, replicate, mc, 0); success = 1; } _starpu_spin_unlock(&_starpu_get_node_struct(node)->mc_lock); return success; } #endif /* this function looks for a memory chunk that matches a given footprint in the * list of mem chunk that are not important */ static int try_to_reuse_not_important_mc(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint, enum starpu_is_prefetch is_prefetch) { struct _starpu_mem_chunk *mc, *orig_next_mc, *next_mc; int success = 0; struct _starpu_node *node_struct = _starpu_get_node_struct(node); _starpu_spin_lock(&node_struct->mc_lock); restart: /* now look for some non essential data in the active list */ for (mc = _starpu_mem_chunk_list_begin(&node_struct->mc_list); mc != _starpu_mem_chunk_list_end(&node_struct->mc_list) && !success; mc = next_mc) { /* there is a risk that the memory chunk is freed before next * iteration starts: so we compute the next element of the list * now */ orig_next_mc = next_mc = _starpu_mem_chunk_list_next(mc); if (mc->remove_notify) /* Somebody already working here, skip */ continue; if (!mc->data->is_not_important) /* Important data, skip */ continue; if (mc->footprint != footprint || _starpu_data_interface_compare(data->per_node[node].data_interface, data->ops, mc->data->per_node[node].data_interface, mc->ops) != 1) /* Not the right type of interface, skip */ continue; if (next_mc) { if (next_mc->remove_notify) /* Somebody already working here, skip */ continue; next_mc->remove_notify = &next_mc; } /* Note: this may unlock mc_list! */ success = try_to_throw_mem_chunk(mc, node, replicate, 1, is_prefetch); if (orig_next_mc) { if (!next_mc) /* Oops, somebody dropped the next item while we were * not keeping the mc_lock. Restart from the beginning * of the list */ goto restart; else { STARPU_ASSERT(next_mc->remove_notify == &next_mc); next_mc->remove_notify = NULL; } } } _starpu_spin_unlock(&node_struct->mc_lock); return success; } /* * Try to find a buffer currently in use on the memory node which has the given * footprint. */ static int try_to_reuse_potentially_in_use_mc(unsigned node, starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, uint32_t footprint, enum starpu_is_prefetch is_prefetch) { struct _starpu_mem_chunk *mc, *next_mc, *orig_next_mc; int success = 0; struct _starpu_node *node_struct = _starpu_get_node_struct(node); if (is_prefetch >= STARPU_IDLEFETCH) /* Do not evict a MC just for an idle fetch */ return 0; /* * We have to unlock mc_lock before locking header_lock, so we have * to be careful with the list. We try to do just one pass, by * remembering the next mc to be tried. If it gets dropped, we restart * from zero. So we continue until we go through the whole list without * finding anything to free. */ _starpu_spin_lock(&node_struct->mc_lock); restart: for (mc = _starpu_mem_chunk_list_begin(&node_struct->mc_list); mc != _starpu_mem_chunk_list_end(&node_struct->mc_list) && !success; mc = next_mc) { /* mc hopefully gets out of the list, we thus need to prefetch * the next element */ orig_next_mc = next_mc = _starpu_mem_chunk_list_next(mc); if (mc->remove_notify) /* Somebody already working here, skip */ continue; if (mc->footprint != footprint || _starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->data->per_node[node].data_interface, mc->ops) != 1) /* Not the right type of interface, skip */ continue; if (next_mc) { if (next_mc->remove_notify) /* Somebody already working here, skip */ continue; next_mc->remove_notify = &next_mc; } /* Note: this may unlock mc_list! */ success = try_to_throw_mem_chunk(mc, node, replicate, 1, is_prefetch); if (orig_next_mc) { if (!next_mc) /* Oops, somebody dropped the next item while we were * not keeping the mc_lock. Restart from the beginning * of the list */ goto restart; else { STARPU_ASSERT(next_mc->remove_notify == &next_mc); next_mc->remove_notify = NULL; } } } _starpu_spin_unlock(&node_struct->mc_lock); return success; } /* * Free the memory chunks that are explicitly tagged to be freed. */ static size_t flush_memchunk_cache(unsigned node, size_t reclaim) { struct _starpu_mem_chunk *mc; struct mc_cache_entry *entry=NULL, *tmp=NULL; struct _starpu_node *node_struct = _starpu_get_node_struct(node); size_t freed = 0; restart: _starpu_spin_lock(&node_struct->mc_lock); HASH_ITER(hh, node_struct->mc_cache, entry, tmp) { if (!_starpu_mem_chunk_list_empty(&entry->list)) { mc = _starpu_mem_chunk_list_pop_front(&entry->list); STARPU_ASSERT(!mc->data); STARPU_ASSERT(!mc->replicate); node_struct->mc_cache_nb--; STARPU_ASSERT(node_struct->mc_cache_nb >= 0); node_struct->mc_cache_size -= mc->size; STARPU_ASSERT(node_struct->mc_cache_size >= 0); _starpu_spin_unlock(&node_struct->mc_lock); freed += free_memory_on_node(mc, node); free(mc->chunk_interface); _starpu_mem_chunk_delete(mc); if (reclaim && freed >= reclaim) goto out; goto restart; } if (reclaim && freed >= reclaim) break; } _starpu_spin_unlock(&node_struct->mc_lock); out: return freed; } /* * Try to free the buffers currently in use on the memory node. If the force * flag is set, the memory is freed regardless of coherency concerns (this * should only be used at the termination of StarPU for instance). */ static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t reclaim, enum starpu_is_prefetch is_prefetch STARPU_ATTRIBUTE_UNUSED) { size_t freed = 0; struct _starpu_node *node_struct = _starpu_get_node_struct(node); struct _starpu_mem_chunk *mc, *next_mc; /* * We have to unlock mc_lock before locking header_lock, so we have * to be careful with the list. We try to do just one pass, by * remembering the next mc to be tried. If it gets dropped, we restart * from zero. So we continue until we go through the whole list without * finding anything to free. */ restart: _starpu_spin_lock(&node_struct->mc_lock); restart2: for (mc = _starpu_mem_chunk_list_begin(&node_struct->mc_list); mc != _starpu_mem_chunk_list_end(&node_struct->mc_list) && (!reclaim || freed < reclaim); mc = next_mc) { /* mc hopefully gets out of the list, we thus need to prefetch * the next element */ next_mc = _starpu_mem_chunk_list_next(mc); if (!force) { struct _starpu_mem_chunk *orig_next_mc = next_mc; if (mc->remove_notify) /* Somebody already working here, skip */ continue; if (next_mc) { if (next_mc->remove_notify) /* Somebody already working here, skip */ continue; next_mc->remove_notify = &next_mc; } /* Note: this may unlock mc_list! */ freed += try_to_throw_mem_chunk(mc, node, NULL, 0, is_prefetch); if (orig_next_mc) { if (!next_mc) /* Oops, somebody dropped the next item while we were * not keeping the mc_lock. Restart from the beginning * of the list */ goto restart2; else { STARPU_ASSERT(next_mc->remove_notify == &next_mc); next_mc->remove_notify = NULL; } } } else { /* Shutting down, really free */ starpu_data_handle_t handle = mc->data; if (_starpu_spin_trylock(&handle->header_lock)) { /* Ergl. We are shutting down, but somebody is * still locking the handle. That's not * supposed to happen, but better be safe by * letting it go through. */ _starpu_spin_unlock(&node_struct->mc_lock); goto restart; } /* We must free the memory now, because we are * terminating the drivers: note that data coherency is * not maintained in that case ! */ freed += do_free_mem_chunk(mc, node); _starpu_spin_unlock(&handle->header_lock); } } _starpu_spin_unlock(&node_struct->mc_lock); return freed; } size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim, enum starpu_is_prefetch is_prefetch) { size_t freed = 0; STARPU_ASSERT(node < STARPU_MAXNODES); if (reclaim && !force) { static unsigned warned; STARPU_HG_DISABLE_CHECKING(warned); if (!warned) { if (STARPU_ATOMIC_ADD(&warned, 1) == 1) { char name[32]; starpu_memory_node_get_name(node, name, sizeof(name)); _STARPU_DISP("Not enough memory left on node %s. Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges. You may want to tune the STARPU_MINIMUM_CLEAN_BUFFERS and STARPU_TARGET_CLEAN_BUFFERS environment variables up a bit to make StarPU maintain more clean memory available, to avoid ending up in this situation.\n", name, (unsigned long) ((reclaim+1048575) / 1048576)); } } } /* remove all buffers for which there was a removal request */ freed += flush_memchunk_cache(node, reclaim); /* try to free all allocated data potentially in use */ if (force || (reclaim && freedper_worker) replicate = &handle->per_worker[node]; else replicate = &handle->per_node[node]; _starpu_spin_lock(&handle->header_lock); struct _starpu_mem_chunk *mc = replicate->mc; int ret = -1; if (!mc) { _starpu_spin_unlock(&handle->header_lock); /* Nothing there */ goto out; } _starpu_spin_lock(&_starpu_get_node_struct(node)->mc_lock); /* Now we got the mc, we can unlock the header to let * try_to_throw_mem_chunk reacquire it */ _starpu_spin_unlock(&handle->header_lock); if (mc->remove_notify) /* Somebody already working here */ goto out_mc; if (try_to_throw_mem_chunk(mc, node, NULL, 0, STARPU_FETCH) == 0) goto out_mc; ret = 0; out_mc: _starpu_spin_unlock(&_starpu_get_node_struct(node)->mc_lock); out: return ret; } /* Periodic tidy of available memory */ void starpu_memchunk_tidy(unsigned node) { starpu_ssize_t total; starpu_ssize_t available; size_t target, amount; struct _starpu_node *node_struct = _starpu_get_node_struct(node); STARPU_ASSERT(node < STARPU_MAXNODES); if (!can_evict(node)) return; if (node_struct->mc_clean_nb < (node_struct->mc_nb * minimum_clean_p) / 100) { struct _starpu_mem_chunk *mc, *orig_next_mc, *next_mc; int skipped = 0; /* Whether we skipped a dirty MC, and we should thus stop updating mc_dirty_head. */ /* _STARPU_DEBUG("%d not clean: %d %d\n", node, node_struct->mc_clean_nb, node_struct->mc_nb); */ _STARPU_TRACE_START_WRITEBACK_ASYNC(node); _starpu_spin_lock(&node_struct->mc_lock); for (mc = node_struct->mc_dirty_head; mc && node_struct->mc_clean_nb < (node_struct->mc_nb * target_clean_p) / 100; mc = next_mc, mc && skipped ? 0 : (node_struct->mc_dirty_head = mc)) { starpu_data_handle_t handle; /* mc may get out of the list, we thus need to prefetch * the next element */ next_mc = _starpu_mem_chunk_list_next(mc); if (mc->home) /* Home node, it's always clean */ continue; if (mc->clean) /* already clean */ continue; if (next_mc && next_mc->remove_notify) { /* Somebody already working here, skip */ skipped = 1; continue; } handle = mc->data; STARPU_ASSERT(handle); /* This data cannot be pushed outside CPU memory */ if (!handle->ooc && handle->home_node == -1 && starpu_node_get_kind(node) == STARPU_CPU_RAM && starpu_memory_nodes_get_numa_count() == 1) continue; if (_starpu_spin_trylock(&handle->header_lock)) { /* the handle is busy, abort */ skipped = 1; continue; } if (handle->current_mode == STARPU_W) { if (handle->write_invalidation_req) { /* Some request is invalidating it anyway */ _starpu_spin_unlock(&handle->header_lock); continue; } unsigned n; for (n = 0; n < STARPU_MAXNODES; n++) if (_starpu_get_data_refcnt(handle, n)) break; if (n < STARPU_MAXNODES) { /* Some task is writing to the handle somewhere */ _starpu_spin_unlock(&handle->header_lock); skipped = 1; continue; } } if ( /* This data should be written through to this node, avoid * dropping it! */ (node < sizeof(handle->wt_mask) * 8 && handle->wt_mask & (1<nchildren /* REDUX, can't do anything with it, skip it */ || mc->relaxed_coherency == 2 ) { _starpu_spin_unlock(&handle->header_lock); continue; } if (handle->home_node != -1 && (handle->per_node[handle->home_node].state != STARPU_INVALID || mc->relaxed_coherency == 1)) { /* It's available in the home node, this should have been marked as clean already */ mc->clean = 1; node_struct->mc_clean_nb++; _starpu_spin_unlock(&handle->header_lock); continue; } int target_node; if (handle->home_node == -1) target_node = choose_target(handle, node); else target_node = handle->home_node; if (target_node == -1) { /* Nowhere to put it, can't do much */ _starpu_spin_unlock(&handle->header_lock); continue; } STARPU_ASSERT(target_node != (int) node); /* MC is dirty and nobody working on it, submit writeback */ /* MC will be clean, consider it as such */ mc->clean = 1; node_struct->mc_clean_nb++; orig_next_mc = next_mc; if (next_mc) { STARPU_ASSERT(!next_mc->remove_notify); next_mc->remove_notify = &next_mc; } _starpu_spin_unlock(&node_struct->mc_lock); if (!_starpu_create_request_to_fetch_data(handle, &handle->per_node[target_node], STARPU_R, NULL, STARPU_IDLEFETCH, 1, NULL, NULL, 0, "starpu_memchunk_tidy")) { /* No request was actually needed?? * Odd, but cope with it. */ handle = NULL; } _starpu_spin_lock(&node_struct->mc_lock); if (orig_next_mc) { if (!next_mc) /* Oops, somebody dropped the next item while we were * not keeping the mc_lock. Give up for now, and we'll * see the rest later */ ; else { STARPU_ASSERT(next_mc->remove_notify == &next_mc); next_mc->remove_notify = NULL; } } if (handle) _starpu_spin_unlock(&handle->header_lock); } _starpu_spin_unlock(&node_struct->mc_lock); _STARPU_TRACE_END_WRITEBACK_ASYNC(node); } total = starpu_memory_get_total(node); if (total <= 0) return; available = starpu_memory_get_available(node); /* Count cached allocation as being available */ available += node_struct->mc_cache_size; if (available >= (starpu_ssize_t) (total * minimum_p) / 100) /* Enough available space, do not trigger reclaiming */ return; /* Not enough available space, reclaim until we reach the target. */ target = (total * target_p) / 100; amount = target - available; if (!STARPU_RUNNING_ON_VALGRIND && node_struct->tidying) /* Some thread is already tidying this node, let it do it */ return; if (STARPU_ATOMIC_ADD(&node_struct->tidying, 1) > 1) /* Some thread got it before us, let it do it */ goto out; static unsigned warned; STARPU_HG_DISABLE_CHECKING(warned); if (!warned) { if (STARPU_ATOMIC_ADD(&warned, 1) == 1) { char name[32]; starpu_memory_node_get_name(node, name, sizeof(name)); _STARPU_DISP("Low memory left on node %s (%ldMiB over %luMiB). Your application data set seems too huge to fit on the device, StarPU will cope by trying to purge %lu MiB out. This message will not be printed again for further purges. The thresholds can be tuned using the STARPU_MINIMUM_AVAILABLE_MEM and STARPU_TARGET_AVAILABLE_MEM environment variables.\n", name, (long) (available / 1048576), (unsigned long) (total / 1048576), (unsigned long) ((amount+1048575) / 1048576)); } } _STARPU_TRACE_START_MEMRECLAIM(node,2); free_potentially_in_use_mc(node, 0, amount, STARPU_PREFETCH); _STARPU_TRACE_END_MEMRECLAIM(node,2); out: (void) STARPU_ATOMIC_ADD(&node_struct->tidying, -1); } static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned home, unsigned automatically_allocated) { struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new(); starpu_data_handle_t handle = replicate->handle; STARPU_ASSERT(handle); STARPU_ASSERT(handle->ops); mc->data = handle; mc->footprint = _starpu_compute_data_alloc_footprint(handle); mc->ops = handle->ops; mc->automatically_allocated = automatically_allocated; mc->relaxed_coherency = replicate->relaxed_coherency; mc->home = home; mc->clean = 1; mc->replicate = replicate; mc->replicate->mc = mc; mc->chunk_interface = NULL; mc->size_interface = interface_size; mc->remove_notify = NULL; mc->wontuse = 0; return mc; } static void register_mem_chunk(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned automatically_allocated) { unsigned dst_node = replicate->memory_node; struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); struct _starpu_mem_chunk *mc; /* the interface was already filled by ops->allocate_data_on_node */ size_t interface_size = replicate->handle->ops->interface_size; /* Put this memchunk in the list of memchunk in use */ mc = _starpu_memchunk_init(replicate, interface_size, (int) dst_node == handle->home_node, automatically_allocated); _starpu_spin_lock(&node_struct->mc_lock); MC_LIST_PUSH_BACK(node_struct, mc); _starpu_spin_unlock(&node_struct->mc_lock); } /* This function is called when the handle is destroyed (eg. when calling * unregister or unpartition). It puts all the memchunks that refer to the * specified handle into the cache. */ void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size) { STARPU_ASSERT(replicate->mapped == STARPU_UNMAPPED); struct _starpu_mem_chunk *mc = replicate->mc; struct _starpu_node *node_struct = _starpu_get_node_struct(node); STARPU_ASSERT(mc->data == handle); _starpu_spin_checklocked(&handle->header_lock); STARPU_ASSERT(node < STARPU_MAXNODES); /* Record the allocated size, so that later in memory * reclaiming we can estimate how much memory we free * by freeing this. */ mc->size = size; /* This memchunk doesn't have to do with the data any more. */ replicate->mc = NULL; mc->replicate = NULL; replicate->allocated = 0; replicate->automatically_allocated = 0; replicate->initialized = 0; _starpu_spin_lock(&node_struct->mc_lock); mc->data = NULL; /* remove it from the main list */ MC_LIST_ERASE(node_struct, mc); _starpu_spin_unlock(&node_struct->mc_lock); /* * Unless we have a memory limitation, we would fill * memory with cached data and then eventually swap. */ /* * This is particularly important when * STARPU_USE_ALLOCATION_CACHE is not enabled, as we * wouldn't even ever re-use these allocations! */ if (handle->ops->dontcache || (starpu_node_get_kind(node) == STARPU_CPU_RAM && !_starpu_malloc_willpin_on_node(node)) #ifndef STARPU_USE_ALLOCATION_CACHE || !_starpu_memory_manager_get_global_memory_size(node) #endif ) { /* Free data immediately */ mc->chunk_interface = replicate->data_interface; free_memory_on_node(mc, node); _starpu_mem_chunk_delete(mc); } else { /* Keep the interface parameters and pointers, for later reuse * while detached, or freed */ _STARPU_MALLOC(mc->chunk_interface, mc->size_interface); if (mc->ops->cache_data_on_node) mc->ops->cache_data_on_node(mc->chunk_interface, replicate->data_interface, node); else memcpy(mc->chunk_interface, replicate->data_interface, mc->size_interface); /* put it in the list of buffers to be removed */ uint32_t footprint = mc->footprint; struct mc_cache_entry *entry; _starpu_spin_lock(&node_struct->mc_lock); HASH_FIND(hh, node_struct->mc_cache, &footprint, sizeof(footprint), entry); if (!entry) { _STARPU_MALLOC(entry, sizeof(*entry)); _starpu_mem_chunk_list_init(&entry->list); entry->footprint = footprint; HASH_ADD(hh, node_struct->mc_cache, footprint, sizeof(entry->footprint), entry); } node_struct->mc_cache_nb++; node_struct->mc_cache_size += mc->size; _starpu_mem_chunk_list_push_front(&entry->list, mc); _starpu_spin_unlock(&node_struct->mc_lock); } } /* * In order to allocate a piece of data, we try to reuse existing buffers if * its possible. * 1 - we try to reuse a memchunk that is explicitly unused. * 2 - we go through the list of memory chunks and find one that is not * referenced and that has the same footprint to reuse it. * 3 - we call the usual driver's alloc method * 4 - we go through the list of memory chunks and release those that are * not referenced (or part of those). * */ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned dst_node, enum starpu_is_prefetch is_prefetch, int only_fast_alloc) { unsigned attempts = 0; starpu_ssize_t allocated_memory; int ret; starpu_ssize_t data_size = _starpu_data_get_alloc_size(handle); int told_reclaiming = 0; int reused = 0; struct _starpu_node *node_struct = _starpu_get_node_struct(dst_node); _starpu_spin_checklocked(&handle->header_lock); _starpu_data_allocation_inc_stats(dst_node); /* perhaps we can directly reuse a buffer in the free-list */ uint32_t footprint = _starpu_compute_data_alloc_footprint(handle); int prefetch_oom = is_prefetch && node_struct->prefetch_out_of_memory; #ifdef STARPU_USE_ALLOCATION_CACHE if (!prefetch_oom) _STARPU_TRACE_START_ALLOC_REUSE(dst_node, data_size, handle, is_prefetch); if (try_to_find_reusable_mc(dst_node, handle, replicate, footprint)) { _starpu_allocation_cache_hit(dst_node); if (!prefetch_oom) _STARPU_TRACE_END_ALLOC_REUSE(dst_node, handle, 1); return data_size; } if (!prefetch_oom) _STARPU_TRACE_END_ALLOC_REUSE(dst_node, handle, 0); #endif /* If this is RAM and pinned this will be slow In case we only want fast allocations return here */ if (only_fast_alloc && (starpu_node_get_kind(dst_node) != STARPU_CPU_RAM || _starpu_malloc_willpin_on_node(dst_node))) return -ENOMEM; STARPU_ASSERT(handle->ops); STARPU_ASSERT(handle->ops->allocate_data_on_node); STARPU_ASSERT(replicate->data_interface); size_t size = handle->ops->interface_size; if (!size) /* nul-size VLA is undefined... */ size = 1; char data_interface[size]; memcpy(data_interface, replicate->data_interface, handle->ops->interface_size); /* Take temporary reference on the replicate */ replicate->refcnt++; handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); do { if (!prefetch_oom) _STARPU_TRACE_START_ALLOC(dst_node, data_size, handle, is_prefetch); allocated_memory = handle->ops->allocate_data_on_node(data_interface, dst_node); if (!prefetch_oom) _STARPU_TRACE_END_ALLOC(dst_node, handle, allocated_memory); if (allocated_memory == -ENOMEM) { size_t handle_size = _starpu_data_get_alloc_size(handle); size_t reclaim = starpu_memstrategy_data_size_coefficient*handle_size; /* First try to flush data explicitly marked for freeing */ size_t freed = flush_memchunk_cache(dst_node, reclaim); if (freed >= reclaim) { /* That freed enough data, retry allocating */ node_struct->prefetch_out_of_memory = 0; continue; } reclaim -= freed; if (is_prefetch >= STARPU_IDLEFETCH) { /* It's just idle fetch, don't bother existing allocations */ /* And don't bother tracing allocation attempts */ node_struct->prefetch_out_of_memory = 1; /* TODO: ideally we should not even try to allocate when we know we have not freed anything */ continue; } /* Try to reuse an allocated data with the same interface (to avoid spurious free/alloc) */ if (_starpu_has_not_important_data && try_to_reuse_not_important_mc(dst_node, handle, replicate, footprint, is_prefetch)) break; if (try_to_reuse_potentially_in_use_mc(dst_node, handle, replicate, footprint, is_prefetch)) { reused = 1; allocated_memory = data_size; break; } if (!told_reclaiming) { /* Prevent prefetches and such from happening */ (void) STARPU_ATOMIC_ADD(&node_struct->reclaiming, 1); told_reclaiming = 1; } /* That was not enough, we have to really reclaim */ _STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch); freed = _starpu_memory_reclaim_generic(dst_node, 0, reclaim, is_prefetch); _STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch); if (!freed && is_prefetch >= STARPU_FETCH) { /* It's just prefetch, don't bother tracing allocation attempts */ node_struct->prefetch_out_of_memory = 1; /* TODO: ideally we should not even try to allocate when we know we have not freed anything */ continue; } node_struct->prefetch_out_of_memory = 0; } else node_struct->prefetch_out_of_memory = 0; } while((allocated_memory == -ENOMEM) && attempts++ < 2); int cpt = 0; while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock)) { cpt++; _starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_NOT_ALLOC); } if (cpt == STARPU_SPIN_MAXTRY) _starpu_spin_lock(&handle->header_lock); replicate->refcnt--; STARPU_ASSERT(replicate->refcnt >= 0); STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; ret = _starpu_data_check_not_busy(handle); STARPU_ASSERT(ret == 0); if (told_reclaiming) /* We've finished with reclaiming memory, let prefetches start again */ (void) STARPU_ATOMIC_ADD(&node_struct->reclaiming, -1); if (allocated_memory == -ENOMEM) { if (replicate->allocated) /* Didn't manage to allocate, but somebody else did */ allocated_memory = 0; goto out; } if (reused) { /* We just reused an allocation, nothing more to do */ } else if (replicate->allocated) { /* Argl, somebody allocated it in between already, drop this one */ _STARPU_TRACE_START_FREE(dst_node, data_size, handle); handle->ops->free_data_on_node(data_interface, dst_node); _STARPU_TRACE_END_FREE(dst_node, handle); allocated_memory = 0; } else /* Install newly-allocated interface */ memcpy(replicate->data_interface, data_interface, handle->ops->interface_size); out: return allocated_memory; } int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, enum starpu_is_prefetch is_prefetch, int only_fast_alloc) { starpu_ssize_t allocated_memory; unsigned dst_node = replicate->memory_node; STARPU_ASSERT(dst_node < STARPU_MAXNODES); STARPU_ASSERT(handle); _starpu_spin_checklocked(&handle->header_lock); /* A buffer is already allocated on the node */ if (replicate->allocated) return 0; STARPU_ASSERT(replicate->mapped == STARPU_UNMAPPED); STARPU_ASSERT(replicate->data_interface); allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node, is_prefetch, only_fast_alloc); /* perhaps we could really not handle that capacity misses */ if (allocated_memory == -ENOMEM) return -ENOMEM; if (replicate->allocated) /* Somebody allocated it in between already */ return 0; register_mem_chunk(handle, replicate, 1); replicate->allocated = 1; replicate->automatically_allocated = 1; return 0; } unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node) { return handle->per_node[memory_node].allocated || handle->per_node[memory_node].mapped != STARPU_UNMAPPED; } unsigned starpu_data_test_if_mapped_on_node(starpu_data_handle_t handle, unsigned memory_node) { STARPU_ASSERT(memory_node < STARPU_MAXNODES); return handle->per_node[memory_node].allocated; } /* This memchunk has been recently used, put it last on the mc_list, so we will * try to evict it as late as possible */ void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node) { if (!mc) /* user-allocated memory */ return; STARPU_ASSERT(node < STARPU_MAXNODES); if (!can_evict(node)) /* Don't bother */ return; struct _starpu_node *node_struct = _starpu_get_node_struct(node); _starpu_spin_lock(&node_struct->mc_lock); MC_LIST_ERASE(node_struct, mc); mc->wontuse = 0; MC_LIST_PUSH_BACK(node_struct, mc); _starpu_spin_unlock(&node_struct->mc_lock); } /* This memchunk will not be used in the close future, put it on the clean * list, so we will to evict it first */ void _starpu_memchunk_wont_use(struct _starpu_mem_chunk *mc, unsigned node) { if (!mc) /* user-allocated memory */ return; STARPU_ASSERT(node < STARPU_MAXNODES); if (!can_evict(node)) /* Don't bother */ return; struct _starpu_node *node_struct = _starpu_get_node_struct(node); _starpu_spin_lock(&node_struct->mc_lock); mc->wontuse = 1; if (mc->data && mc->data->home_node != -1) { MC_LIST_ERASE(node_struct, mc); /* Caller will schedule a clean transfer */ mc->clean = 1; MC_LIST_PUSH_CLEAN(node_struct, mc); } /* TODO: else push to head of data to be evicted */ _starpu_spin_unlock(&node_struct->mc_lock); } /* This memchunk content was dropped, and thus becomes clean */ void _starpu_memchunk_clean(struct _starpu_mem_chunk *mc, unsigned node) { if (!mc) /* user-allocated memory */ return; if (mc->home) /* Home is always clean */ return; STARPU_ASSERT(node < STARPU_MAXNODES); if (!can_evict(node)) /* Don't bother */ return; struct _starpu_node *node_struct = _starpu_get_node_struct(node); _starpu_spin_lock(&node_struct->mc_lock); if (!mc->clean) { node_struct->mc_clean_nb++; mc->clean = 1; } _starpu_spin_unlock(&node_struct->mc_lock); } /* This memchunk is being written to, and thus becomes dirty */ void _starpu_memchunk_dirty(struct _starpu_mem_chunk *mc, unsigned node) { if (!mc) /* user-allocated memory */ return; if (mc->home) /* Home is always clean */ return; STARPU_ASSERT(node < STARPU_MAXNODES); if (!can_evict(node)) /* Don't bother */ return; struct _starpu_node *node_struct = _starpu_get_node_struct(node); _starpu_spin_lock(&node_struct->mc_lock); if (mc->relaxed_coherency == 1) { /* SCRATCH, make it clean if not already*/ if (!mc->clean) { node_struct->mc_clean_nb++; mc->clean = 1; } } else { if (mc->clean) { node_struct->mc_clean_nb--; mc->clean = 0; } } _starpu_spin_unlock(&node_struct->mc_lock); } #ifdef STARPU_MEMORY_STATS void _starpu_memory_display_stats_by_node(FILE *stream, int node) { struct _starpu_node *node_struct = _starpu_get_node_struct(node); _starpu_spin_lock(&node_struct->mc_lock); if (!_starpu_mem_chunk_list_empty(&node_struct->mc_list)) { struct _starpu_mem_chunk *mc; fprintf(stream, "#-------\n"); fprintf(stream, "Data on Node #%d\n",node); for (mc = _starpu_mem_chunk_list_begin(&node_struct->mc_list); mc != _starpu_mem_chunk_list_end(&node_struct->mc_list); mc = _starpu_mem_chunk_list_next(mc)) { _starpu_memory_display_handle_stats(stream, mc->data); } } _starpu_spin_unlock(&node_struct->mc_lock); } void _starpu_data_display_memory_stats(FILE *stream) { unsigned node; fprintf(stream, "\n#---------------------\n"); fprintf(stream, "Memory stats :\n"); for (node = 0; node < STARPU_MAXNODES; node++) { _starpu_memory_display_stats_by_node(stream, node); } fprintf(stream, "\n#---------------------\n"); } #endif void starpu_data_display_memory_stats(void) { #ifdef STARPU_MEMORY_STATS _starpu_data_display_memory_stats(stderr); #endif } static int get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node) { int target = -1; unsigned nnodes = starpu_memory_nodes_get_count(); unsigned int i; double time_disk = 0.0; for (i = 0; i < nnodes; i++) { if (starpu_node_get_kind(i) == STARPU_DISK_RAM && i != node && (handle->per_node[i].allocated || _starpu_memory_manager_test_allocate_size(i, _starpu_data_get_alloc_size(handle)) == 1)) { /* if we can write on the disk */ if ((_starpu_get_disk_flag(i) & STARPU_DISK_NO_RECLAIM) == 0) { unsigned numa; unsigned nnumas = starpu_memory_nodes_get_numa_count(); for (numa = 0; numa < nnumas; numa++) { /* TODO : check if starpu_transfer_predict(node, i,...) is the same */ double time_tmp = starpu_transfer_predict(node, numa, _starpu_data_get_alloc_size(handle)) + starpu_transfer_predict(i, numa, _starpu_data_get_alloc_size(handle)); if (target == -1 || time_disk > time_tmp) { target = i; time_disk = time_tmp; } } } } } return target; } #ifdef STARPU_DEVEL # warning TODO: better choose NUMA node #endif /* Choose a target memory node to put the value of the handle, because the current location (node) is getting tight */ static int choose_target(starpu_data_handle_t handle, unsigned node) { int target = -1; size_t size_handle = _starpu_data_get_alloc_size(handle); if (handle->home_node != -1) /* try to push on RAM if we can before to push on disk */ if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && (starpu_node_get_kind(node) != STARPU_CPU_RAM)) { unsigned i; unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count(); for (i=0; iper_node[i].allocated || (starpu_ssize_t) size_handle < node_struct->mc_cache_size || _starpu_memory_manager_test_allocate_size(i, size_handle - node_struct->mc_cache_size) == 1) { target = i; break; } } if (target == -1) { target = get_better_disk_can_accept_size(handle, node); } } /* others memory nodes */ else { target = handle->home_node; } else { /* handle->home_node == -1 */ /* no place for data in RAM, we push on disk */ if (starpu_node_get_kind(node) == STARPU_CPU_RAM) { target = get_better_disk_can_accept_size(handle, node); } else { /* node != 0 */ /* try to push data to RAM if we can before to push on disk*/ unsigned i; unsigned nb_numa_nodes = starpu_memory_nodes_get_numa_count(); for (i=0; iper_node[i].allocated || (starpu_ssize_t) size_handle < node_struct->mc_cache_size || _starpu_memory_manager_test_allocate_size(i, size_handle - node_struct->mc_cache_size) == 1) { target = i; break; } } /* no place in RAM */ if (target == -1) { target = get_better_disk_can_accept_size(handle, node); } } } /* we haven't the right to write on the disk */ if (target != -1 && starpu_node_get_kind(target) == STARPU_DISK_RAM && (_starpu_get_disk_flag(target) & STARPU_DISK_NO_RECLAIM)) target = -1; return target; } void starpu_data_set_user_data(starpu_data_handle_t handle, void* user_data) { handle->user_data = user_data; } void *starpu_data_get_user_data(starpu_data_handle_t handle) { return handle->user_data; } void starpu_data_set_sched_data(starpu_data_handle_t handle, void* sched_data) { handle->sched_data = sched_data; } void *starpu_data_get_sched_data(starpu_data_handle_t handle) { return handle->sched_data; } starpu-1.4.9+dfsg/src/datawizard/memalloc.h000066400000000000000000000077011507764646700207000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MEMALLOC_H__ #define __MEMALLOC_H__ /** @file */ #include #include #include #include #include #include #include #pragma GCC visibility push(hidden) struct _starpu_data_replicate; /** While associated with a handle, the content is protected by the handle lock, except a few fields */ LIST_TYPE(_starpu_mem_chunk, /** protected by the mc_lock */ starpu_data_handle_t data; uint32_t footprint; /* * When re-using a memchunk, the footprint of the data is not * sufficient to determine whether two pieces of data have the same * layout (there could be collision in the hash function ...) so we * still keep a copy of the actual layout (ie. the data interface) to * stay on the safe side while the memchunk is detached from an actual * data. */ struct starpu_data_interface_ops *ops; void *chunk_interface; size_t size_interface; /** Whether StarPU automatically allocated this memory, or the application did */ unsigned automatically_allocated:1; /** A buffer that is used for SCRATCH or reduction cannot be used with * filters. */ unsigned relaxed_coherency:2; /** Whether this is the home chunk, or there is no home chunk (and it is thus always clean) */ unsigned home:1; /** Whether the memchunk is in the clean part of the mc_list */ unsigned clean:1; /** Was this chunk marked as "won't use"? */ unsigned wontuse:1; /** the size of the data is only set when calling _starpu_request_mem_chunk_removal(), * it is needed to estimate how much memory is in mc_cache, and by * free_memory_on_node() which is called when the handle is no longer * valid. * It should not be used otherwise. */ size_t size; struct _starpu_data_replicate *replicate; /** This is set when one keeps a pointer to this mc obtained from the * mc_list without mc_lock held. We need to clear the pointer if we * remove this entry from the mc_list, so we know we have to restart * from zero. This is protected by the corresponding mc_lock. */ struct _starpu_mem_chunk **remove_notify; ) void _starpu_init_mem_chunk_lists(void); void _starpu_deinit_mem_chunk_lists(void); void _starpu_mem_chunk_init_last(void); void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size); int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, enum starpu_is_prefetch is_prefetch, int only_fast_alloc); size_t _starpu_free_all_automatically_allocated_buffers(unsigned node); void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node); void _starpu_memchunk_wont_use(struct _starpu_mem_chunk *m, unsigned nodec); void _starpu_memchunk_clean(struct _starpu_mem_chunk *mc, unsigned node); void _starpu_memchunk_dirty(struct _starpu_mem_chunk *mc, unsigned node); size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim, enum starpu_is_prefetch is_prefetch); int _starpu_is_reclaiming(unsigned node); void _starpu_mem_chunk_disk_register(unsigned disk_memnode); #pragma GCC visibility pop #endif starpu-1.4.9+dfsg/src/datawizard/memory_manager.c000066400000000000000000000143651507764646700221100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include int _starpu_memory_manager_init() { int i; for(i=0 ; iglobal_size = 0; node->used_size = 0; /* This is accessed for statistics outside the lock, don't care * about that */ STARPU_HG_DISABLE_CHECKING(node->used_size); STARPU_HG_DISABLE_CHECKING(node->global_size); node->waiting_size = 0; STARPU_PTHREAD_MUTEX_INIT(&node->lock_nodes, NULL); STARPU_PTHREAD_COND_INIT(&node->cond_nodes, NULL); } return 0; } void _starpu_memory_manager_set_global_memory_size(unsigned node, size_t size) { struct _starpu_node *node_struct = _starpu_get_node_struct(node); STARPU_PTHREAD_MUTEX_LOCK(&node_struct->lock_nodes); if (!node_struct->global_size) { node_struct->global_size = size; _STARPU_DEBUG("Global size for node %u is %ld\n", node, (long)node_struct->global_size); } else { STARPU_ASSERT(node_struct->global_size == size); } STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->lock_nodes); } size_t _starpu_memory_manager_get_global_memory_size(unsigned node) { return _starpu_get_node_struct(node)->global_size; } int starpu_memory_allocate(unsigned node, size_t size, int flags) { struct _starpu_node *node_struct = _starpu_get_node_struct(node); int ret; STARPU_PTHREAD_MUTEX_LOCK(&node_struct->lock_nodes); if (flags & STARPU_MEMORY_WAIT) { struct _starpu_worker *worker = _starpu_get_local_worker_key(); enum _starpu_worker_status old_status = STATUS_UNKNOWN; if (worker) { old_status = worker->status; if (!(old_status & STATUS_WAITING)) _starpu_add_worker_status(worker, STATUS_INDEX_WAITING, NULL); } while (node_struct->used_size + size > node_struct->global_size) { /* Tell deallocators we need this amount */ if (!node_struct->waiting_size || size < node_struct->waiting_size) node_struct->waiting_size = size; /* Wait for it */ STARPU_PTHREAD_COND_WAIT(&node_struct->cond_nodes, &node_struct->lock_nodes); } if (worker) { if (!(old_status & STATUS_WAITING)) _starpu_clear_worker_status(worker, STATUS_INDEX_WAITING, NULL); } /* And take it */ node_struct->used_size += size; _STARPU_TRACE_USED_MEM(node, node_struct->used_size); ret = 0; } else if (flags & STARPU_MEMORY_OVERFLOW || node_struct->global_size == 0 || node_struct->used_size + size <= node_struct->global_size) { node_struct->used_size += size; _STARPU_TRACE_USED_MEM(node, node_struct->used_size); ret = 0; } else { ret = -ENOMEM; } STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->lock_nodes); return ret; } void starpu_memory_deallocate(unsigned node, size_t size) { struct _starpu_node *node_struct = _starpu_get_node_struct(node); STARPU_PTHREAD_MUTEX_LOCK(&node_struct->lock_nodes); node_struct->used_size -= size; _STARPU_TRACE_USED_MEM(node, node_struct->used_size); /* If there's now room for waiters, wake them */ if (node_struct->waiting_size && node_struct->global_size - node_struct->used_size >= node_struct->waiting_size) { /* And have those not happy enough tell us the size again */ node_struct->waiting_size = 0; STARPU_PTHREAD_COND_BROADCAST(&node_struct->cond_nodes); } STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->lock_nodes); } starpu_ssize_t starpu_memory_get_total(unsigned node) { size_t size = _starpu_get_node_struct(node)->global_size; if (size == 0) return -1; else return size; } starpu_ssize_t starpu_memory_get_total_all_nodes() { unsigned memnodes, i; memnodes = starpu_memory_nodes_get_count(); starpu_ssize_t total = 0; for(i=0 ; iglobal_size; if (size == 0) return -1; ret = size - _starpu_get_node_struct(node)->used_size; return ret; } starpu_ssize_t starpu_memory_get_available_all_nodes() { unsigned memnodes, i; memnodes = starpu_memory_nodes_get_count(); starpu_ssize_t avail = 0; for(i=0 ; iused_size; } size_t starpu_memory_get_used_all_nodes() { unsigned memnodes, i; memnodes = starpu_memory_nodes_get_count(); size_t used = 0; for(i=0 ; ilock_nodes); while (node_struct->used_size + size > node_struct->global_size) { /* Tell deallocators we need this amount */ if (!node_struct->waiting_size || size < node_struct->waiting_size) node_struct->waiting_size = size; /* Wait for it */ STARPU_PTHREAD_COND_WAIT(&node_struct->cond_nodes, &node_struct->lock_nodes); } STARPU_PTHREAD_MUTEX_UNLOCK(&node_struct->lock_nodes); } int _starpu_memory_manager_test_allocate_size(unsigned node, size_t size) { struct _starpu_node *node_struct = _starpu_get_node_struct(node); int ret; if (node_struct->global_size == 0) ret = 1; else if (node_struct->used_size + size <= node_struct->global_size) ret = 1; else ret = 0; return ret; } starpu-1.4.9+dfsg/src/datawizard/memory_manager.h000066400000000000000000000025471507764646700221140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MEMORY_MANAGER_H__ #define __MEMORY_MANAGER_H__ /** @file */ #include #pragma GCC visibility push(hidden) #ifdef __cplusplus extern "C" { #endif /** * Initialises the memory manager */ int _starpu_memory_manager_init(); /** * Initialises the global memory size for the given node * */ void _starpu_memory_manager_set_global_memory_size(unsigned node, size_t size); /** * Gets the global memory size for the given node * */ size_t _starpu_memory_manager_get_global_memory_size(unsigned node); int _starpu_memory_manager_test_allocate_size(unsigned node, size_t size); #ifdef __cplusplus } #endif #pragma GCC visibility pop #endif /* __MEMORY_MANAGER_H__ */ starpu-1.4.9+dfsg/src/datawizard/memory_nodes.c000066400000000000000000000156261507764646700216070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include char _starpu_worker_drives_memory[STARPU_NMAXWORKERS][STARPU_MAXNODES]; struct _starpu_memory_node_descr _starpu_descr; void _starpu_memory_nodes_init(void) { /* there is no node yet, subsequent nodes will be * added using _starpu_memory_node_register */ _starpu_descr.nnodes = 0; unsigned i; for (i = 0; i < STARPU_MAXNODES; i++) { _starpu_descr.nodes[i] = STARPU_UNUSED; _starpu_descr.nworkers[i] = 0; } memset(&_starpu_worker_drives_memory, 0, sizeof(_starpu_worker_drives_memory)); STARPU_HG_DISABLE_CHECKING(_starpu_worker_drives_memory); _starpu_init_mem_chunk_lists(); _starpu_init_data_request_lists(); _starpu_memory_manager_init(); STARPU_PTHREAD_RWLOCK_INIT(&_starpu_descr.conditions_rwlock, NULL); _starpu_descr.total_condition_count = 0; } void _starpu_memory_nodes_deinit(void) { _starpu_deinit_data_request_lists(); _starpu_deinit_mem_chunk_lists(); STARPU_PTHREAD_RWLOCK_DESTROY(&_starpu_descr.conditions_rwlock); } #undef starpu_node_get_kind enum starpu_node_kind starpu_node_get_kind(unsigned node) { return _starpu_node_get_kind(node); } #undef starpu_memory_nodes_get_count unsigned starpu_memory_nodes_get_count(void) { return _starpu_memory_nodes_get_count(); } unsigned starpu_memory_nodes_get_count_by_kind(enum starpu_node_kind kind) { unsigned nnodes = _starpu_memory_nodes_get_count(); unsigned id, cnt = 0; for (id = 0; id < nnodes; id++) if (_starpu_node_get_kind(id) == kind) cnt++; return cnt; } unsigned starpu_memory_node_get_ids_by_type(enum starpu_node_kind kind, unsigned *memory_nodes_ids, unsigned maxsize) { unsigned nnodes = _starpu_memory_nodes_get_count(); unsigned cnt = 0; unsigned id; for (id = 0; id < nnodes; id++) { if (_starpu_node_get_kind(id) == kind) { /* Perhaps the array is too small ? */ if (cnt >= maxsize) return -ERANGE; memory_nodes_ids[cnt++] = id; } } return cnt; } int starpu_memory_node_get_name(unsigned node, char *name, size_t size) { const char *prefix = _starpu_node_get_prefix(_starpu_descr.nodes[node]); return snprintf(name, size, "%s %d", prefix, _starpu_descr.devid[node]); } unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid) { const struct _starpu_node_ops *node_ops = starpu_memory_driver_info[kind].ops; unsigned node; /* ATOMIC_ADD returns the new value ... */ node = STARPU_ATOMIC_ADD(&_starpu_descr.nnodes, 1) - 1; STARPU_ASSERT_MSG_ALWAYS(node < STARPU_MAXNODES,"Too many nodes (%u) for maximum %d. Use configure option --enable-maxnodes=xxx to update the maximum number of nodes.", node + 1, STARPU_MAXNODES); _starpu_descr.nodes[node] = kind; _STARPU_TRACE_NEW_MEM_NODE(node); _starpu_descr.devid[node] = devid; _starpu_descr.node_ops[node] = node_ops; /* for now, there is no condition associated to that newly created node */ _starpu_descr.condition_count[node] = 0; _starpu_malloc_init(node); return node; } /* TODO move in a more appropriate file !! */ void _starpu_memory_node_register_condition(struct _starpu_worker *worker, starpu_pthread_cond_t *cond, unsigned nodeid) { unsigned cond_id; unsigned nconds_total, nconds; STARPU_PTHREAD_RWLOCK_WRLOCK(&_starpu_descr.conditions_rwlock); /* we only insert the queue if it's not already in the list */ nconds = _starpu_descr.condition_count[nodeid]; for (cond_id = 0; cond_id < nconds; cond_id++) { if (_starpu_descr.conditions_attached_to_node[nodeid][cond_id].cond == cond) { STARPU_ASSERT(_starpu_descr.conditions_attached_to_node[nodeid][cond_id].worker == worker); /* the condition is already in the list */ STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_descr.conditions_rwlock); return; } } /* it was not found locally */ _starpu_descr.conditions_attached_to_node[nodeid][cond_id].cond = cond; _starpu_descr.conditions_attached_to_node[nodeid][cond_id].worker = worker; _starpu_descr.condition_count[nodeid]++; /* do we have to add it in the global list as well ? */ nconds_total = _starpu_descr.total_condition_count; for (cond_id = 0; cond_id < nconds_total; cond_id++) { if (_starpu_descr.conditions_all[cond_id].cond == cond) { /* the queue is already in the global list */ STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_descr.conditions_rwlock); return; } } /* it was not in the global list either */ _starpu_descr.conditions_all[nconds_total].cond = cond; _starpu_descr.conditions_all[nconds_total].worker = worker; _starpu_descr.total_condition_count++; STARPU_PTHREAD_RWLOCK_UNLOCK(&_starpu_descr.conditions_rwlock); } void _starpu_memory_node_set_mapped(unsigned node) { if (starpu_map_enabled() == 1) _starpu_descr.mapped[node] = 1; #ifdef STARPU_VERBOSE else _STARPU_DISP("Warning: set_mapped requested on node %u, while map support is disabled\n", node); #endif } unsigned _starpu_memory_node_get_mapped(unsigned node) { return _starpu_descr.mapped[node]; } #undef starpu_worker_get_memory_node unsigned starpu_worker_get_memory_node(unsigned workerid) { (void) workerid; return _starpu_worker_get_memory_node(workerid); } void _starpu_worker_drives_memory_node(struct _starpu_worker *worker, unsigned memnode) { if (! _starpu_worker_drives_memory[worker->workerid][memnode]) { _starpu_worker_drives_memory[worker->workerid][memnode] = 1; #ifdef STARPU_SIMGRID starpu_pthread_queue_register(&worker->wait, &_starpu_simgrid_transfer_queue[memnode]); #endif _starpu_memory_node_register_condition(worker, &worker->sched_cond, memnode); } } #undef starpu_worker_get_local_memory_node unsigned starpu_worker_get_local_memory_node(void) { return _starpu_worker_get_local_memory_node(); } int starpu_memory_node_get_devid(unsigned node) { return _starpu_descr.devid[node]; } enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind) { enum starpu_worker_archtype archtype = starpu_memory_driver_info[node_kind].worker_archtype; STARPU_ASSERT_MSG(archtype != (enum starpu_worker_archtype) -1, "ambiguous memory node kind %d", node_kind); return archtype; } starpu-1.4.9+dfsg/src/datawizard/memory_nodes.h000066400000000000000000000142731507764646700216110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MEMORY_NODES_H__ #define __MEMORY_NODES_H__ /** @file */ #include #include #include #include #include #include #include #ifdef STARPU_SIMGRID #include #endif #pragma GCC visibility push(hidden) extern char _starpu_worker_drives_memory[STARPU_NMAXWORKERS][STARPU_MAXNODES]; struct _starpu_cond_and_worker { starpu_pthread_cond_t *cond; struct _starpu_worker *worker; }; // TODO: split out all these arrays into struct _starpu_node struct _starpu_memory_node_descr { unsigned nnodes; enum starpu_node_kind nodes[STARPU_MAXNODES]; const struct _starpu_node_ops *node_ops[STARPU_MAXNODES]; /** Get the device id associated to this node, or -1 if not applicable */ int devid[STARPU_MAXNODES]; unsigned nworkers[STARPU_MAXNODES]; #ifdef STARPU_SIMGRID starpu_sg_host_t host[STARPU_MAXNODES]; #endif // TODO move this 2 lists outside struct _starpu_memory_node_descr /** Every worker is associated to a condition variable on which the * worker waits when there is task available. It is possible that * multiple worker share the same condition variable, so we maintain a * list of all these condition variables so that we can wake up all * worker attached to a memory node that are waiting on a task. */ starpu_pthread_rwlock_t conditions_rwlock; struct _starpu_cond_and_worker conditions_attached_to_node[STARPU_MAXNODES][STARPU_NMAXWORKERS]; struct _starpu_cond_and_worker conditions_all[STARPU_MAXNODES*STARPU_NMAXWORKERS]; /** the number of queues attached to each node */ unsigned total_condition_count; unsigned condition_count[STARPU_MAXNODES]; unsigned mapped[STARPU_MAXNODES]; }; extern struct _starpu_memory_node_descr _starpu_descr; void _starpu_memory_nodes_init(void); void _starpu_memory_nodes_deinit(void); /** Record that there is an additional worker that uses this memory node */ static inline void _starpu_memory_node_add_nworkers(unsigned node) { _starpu_descr.nworkers[node]++; } /** Record that this worker will driver data transfers for this memory node. */ void _starpu_worker_drives_memory_node(struct _starpu_worker *worker, unsigned memnode); static inline const struct _starpu_node_ops *_starpu_memory_node_get_node_ops(unsigned node) { return _starpu_descr.node_ops[node]; } /** Get the number of workers that use this memory node */ static inline unsigned _starpu_memory_node_get_nworkers(unsigned node) { return _starpu_descr.nworkers[node]; } #ifdef STARPU_SIMGRID static inline void _starpu_simgrid_memory_node_set_host(unsigned node, starpu_sg_host_t host) { _starpu_descr.host[node] = host; } static inline starpu_sg_host_t _starpu_simgrid_memory_node_get_host(unsigned node) { return _starpu_descr.host[node]; } #endif /** Note that this memory node can map CPU data */ void _starpu_memory_node_set_mapped(unsigned node); /** Returns whether this memory node can map CPU data */ unsigned _starpu_memory_node_get_mapped(unsigned node); /** Registers a memory node. Returns the memory node number */ unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid); //void _starpu_memory_node_attach_queue(struct starpu_jobq_s *q, unsigned nodeid); /** Register a condition variable associated to worker which is associated to a * memory node itself. */ void _starpu_memory_node_register_condition(struct _starpu_worker *worker, starpu_pthread_cond_t *cond, unsigned nodeid); /** See starpu_memory_node_get_description() */ static inline struct _starpu_memory_node_descr *_starpu_memory_node_get_description(void) { return &_starpu_descr; } #define _starpu_node_needs_map_update(node) \ (starpu_node_get_kind(node) == STARPU_OPENCL_RAM) /** See starpu_node_get_kind() */ static inline enum starpu_node_kind _starpu_node_get_kind(unsigned node) { return _starpu_descr.nodes[node]; } #define starpu_node_get_kind _starpu_node_get_kind #if STARPU_MAXNODES == 1 #define _starpu_memory_nodes_get_count() 1 #else /** See starpu_memory_nodes_get_count() */ static inline unsigned _starpu_memory_nodes_get_count(void) { return _starpu_descr.nnodes; } #endif #define starpu_memory_nodes_get_count _starpu_memory_nodes_get_count #if STARPU_MAXNODES == 1 #define _starpu_worker_get_memory_node(workerid) 0 #else /** See starpu_worker_get_memory_node() */ static inline unsigned _starpu_worker_get_memory_node(unsigned workerid) { struct _starpu_machine_config *config = _starpu_get_machine_config(); /** This workerid may either be a basic worker or a combined worker */ unsigned nworkers = config->topology.nworkers; if (workerid < config->topology.nworkers) return config->workers[workerid].memory_node; /** We have a combined worker */ unsigned ncombinedworkers STARPU_ATTRIBUTE_UNUSED = config->topology.ncombinedworkers; STARPU_ASSERT_MSG(workerid < ncombinedworkers + nworkers, "Bad workerid %u, maximum %u", workerid, ncombinedworkers + nworkers); return config->combined_workers[workerid - nworkers].memory_node; } #endif #define starpu_worker_get_memory_node _starpu_worker_get_memory_node #if STARPU_MAXNODES == 1 #define _starpu_worker_get_local_memory_node() 0 #else /** See starpu_worker_get_local_memory_node */ static inline unsigned _starpu_worker_get_local_memory_node(void) { struct _starpu_worker *worker = _starpu_get_local_worker_key(); if (!worker) return STARPU_MAIN_RAM; return worker->memory_node; } #endif #define starpu_worker_get_local_memory_node _starpu_worker_get_local_memory_node #pragma GCC visibility pop #endif // __MEMORY_NODES_H__ starpu-1.4.9+dfsg/src/datawizard/memstats.c000066400000000000000000000067271507764646700207460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include void _starpu_memory_stats_init(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_MEMORY_STATS _STARPU_CALLOC(handle->memory_stats, 1, sizeof(struct _starpu_memory_stats)); #endif } void _starpu_memory_stats_init_per_node(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, unsigned node STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_MEMORY_STATS /* Stats initialization */ //handle->memory_stats->direct_access[node]=0; //handle->memory_stats->loaded_shared[node]=0; //handle->memory_stats->shared_to_owner[node]=0; //handle->memory_stats->loaded_owner[node]=0; //handle->memory_stats->invalidated[node]=0; #endif } void _starpu_memory_stats_free(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_MEMORY_STATS free(handle->memory_stats); #endif } #ifdef STARPU_MEMORY_STATS void _starpu_memory_display_handle_stats(FILE *stream, starpu_data_handle_t handle) { unsigned node; fprintf(stream, "#-----\n"); fprintf(stream, "Data : %p\n", handle); fprintf(stream, "Size : %d\n", (int)handle->ops->get_size(handle)); fprintf(stream, "\n"); fprintf(stream, "#--\n"); fprintf(stream, "Data access stats\n"); fprintf(stream, "/!\\ Work Underway\n"); for (node = 0; node < STARPU_MAXNODES; node++) { if (handle->memory_stats->direct_access[node]+handle->memory_stats->loaded_shared[node] +handle->memory_stats->invalidated[node]+handle->memory_stats->loaded_owner[node]) { fprintf(stream, "Node #%u\n", node); fprintf(stream, "\tDirect access : %u\n", handle->memory_stats->direct_access[node]); /* XXX Not Working yet. */ if (handle->memory_stats->shared_to_owner[node]) fprintf(stream, "\t\tShared to Owner : %u\n", handle->memory_stats->shared_to_owner[node]); fprintf(stream, "\tLoaded (Owner) : %u\n", handle->memory_stats->loaded_owner[node]); fprintf(stream, "\tLoaded (Shared) : %u\n", handle->memory_stats->loaded_shared[node]); fprintf(stream, "\tInvalidated (was Owner) : %u\n\n", handle->memory_stats->invalidated[node]); } } } void _starpu_memory_handle_stats_cache_hit(starpu_data_handle_t handle, unsigned node) { handle->memory_stats->direct_access[node]++; } void _starpu_memory_handle_stats_loaded_shared(starpu_data_handle_t handle, unsigned node) { handle->memory_stats->loaded_shared[node]++; } void _starpu_memory_handle_stats_loaded_owner(starpu_data_handle_t handle, unsigned node) { handle->memory_stats->loaded_owner[node]++; } void _starpu_memory_handle_stats_shared_to_owner(starpu_data_handle_t handle, unsigned node) { handle->memory_stats->shared_to_owner[node]++; } void _starpu_memory_handle_stats_invalidated(starpu_data_handle_t handle, unsigned node) { handle->memory_stats->invalidated[node]++; } #endif starpu-1.4.9+dfsg/src/datawizard/memstats.h000066400000000000000000000040161507764646700207400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MEMSTATS_H__ #define __MEMSTATS_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) #ifdef STARPU_MEMORY_STATS struct _starpu_memory_stats { /** Handle access stats per node */ unsigned direct_access[STARPU_MAXNODES]; unsigned loaded_shared[STARPU_MAXNODES]; unsigned loaded_owner[STARPU_MAXNODES]; unsigned shared_to_owner[STARPU_MAXNODES]; unsigned invalidated[STARPU_MAXNODES]; }; typedef struct _starpu_memory_stats * _starpu_memory_stats_t; #else typedef void * _starpu_memory_stats_t; #endif void _starpu_memory_stats_init(starpu_data_handle_t handle); void _starpu_memory_stats_init_per_node(starpu_data_handle_t handle, unsigned node); void _starpu_memory_stats_free(starpu_data_handle_t handle); void _starpu_memory_display_handle_stats(FILE *stream, starpu_data_handle_t handle); void _starpu_memory_handle_stats_cache_hit(starpu_data_handle_t handle, unsigned node); void _starpu_memory_handle_stats_loaded_shared(starpu_data_handle_t handle, unsigned node); void _starpu_memory_handle_stats_loaded_owner(starpu_data_handle_t handle, unsigned node); void _starpu_memory_handle_stats_shared_to_owner(starpu_data_handle_t handle, unsigned node); void _starpu_memory_handle_stats_invalidated(starpu_data_handle_t handle, unsigned node); #pragma GCC visibility pop #endif /* __MEMSTATS_H__ */ starpu-1.4.9+dfsg/src/datawizard/node_ops.c000066400000000000000000000022261507764646700207050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include const char* _starpu_node_get_prefix(enum starpu_node_kind kind) { const char *ret = starpu_memory_driver_info[kind].name_upper; STARPU_ASSERT(ret); return ret; } starpu-1.4.9+dfsg/src/datawizard/node_ops.h000066400000000000000000000200271507764646700207110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __NODE_OPS_H__ #define __NODE_OPS_H__ /** @file */ #include #include #include #pragma GCC visibility push(hidden) /** Request copying some data interface for handle \p handle: from interface \p * src_interface that exists on node \p src_node to interface \p dst_interface * that exists on node \p dst_node. * * If \p req is non-NULL, this can be used to start an asynchronous copy, in * which case -EAGAIN should be returned. Otherwise, 0 should be returned. * * _starpu_copy_interface_any_to_any can be used as a generic version, that * assumes that the data_interface implements the any_to_any method, and * copy_data_t will be used to queue the actual transfers. */ typedef int (*copy_interface_func_t)(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req); /** Request copying \p ssize bytes of data from \p src_ptr (plus offset \p src_offset) * in node \p src_node to \p dst_ptr (plus offset \p dst_offset) in node \p dst_node. * * If \p async_channel is non-NULL, this can be used to start an asynchronous copy, in * which case -EAGAIN should be returned. Otherwise, 0 should be returned. */ typedef int (*copy_data_t)(uintptr_t src_ptr, size_t src_offset, unsigned src_node, uintptr_t dst_ptr, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel); /** This is like copy_data_t, except that there are \p numblocks blocks of size * \p blocksize bytes to be transferred. On the source, their respective starts are \p * ld_src bytes apart, and on the destination their respective starts have to be * \p ld_dst bytes apart. (leading dimension) */ typedef int (*copy2d_data_t)(uintptr_t src_ptr, size_t src_offset, unsigned src_node, uintptr_t dst_ptr, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel); /** This is like copy_data_t, except that there are \p numblocks_2 metablocks to * be transferred. On the source, their respective starts are \p ld2_src bytes * apart, and on the destination their respective starts have to be \p ld2_dst * bytes apart. * * The metablocks are composed of \p numblocks_1 blocks of size \p blocksize * bytes. On the source, their respective starts are \p ld1_src bytes apart, and * on the destination their respective starts have to be \p ld1_dst bytes apart. */ typedef int (*copy3d_data_t)(uintptr_t src_ptr, size_t src_offset, unsigned src_node, uintptr_t dst_ptr, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, struct _starpu_async_channel *async_channel); /** Map \p size bytes of data from \p src (plus offset \p src_offset) in node \p src_node * on node \p dst_node. If successful, return the resulting pointer, otherwise fill *ret */ typedef uintptr_t (*map_t)(uintptr_t src, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret); /** Unmap \p size bytes of data from \p src (plus offset \p src_offset) in node \p src_node * on node \p dst_node. */ typedef int (*unmap_t)(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size); /** Update cache coherency for the mapping of \p size bytes of data from \p src (plus offset * \p src_offset) in node \p src_node on node \p dst_node. */ typedef int (*update_map_t)(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size); /** Reference all the methods for copying data from this kind of device to * another kind of device. */ struct _starpu_node_ops { /** Request copying a data interface from this type of node to another type of node. * As a first start, you can just use the generic _starpu_copy_interface_any_to_any. */ copy_interface_func_t copy_interface_to[STARPU_MAX_RAM+1]; /** Request copying a data interface to this type of node from another type of node. * As a first start, you can just use the generic _starpu_copy_interface_any_to_any. */ copy_interface_func_t copy_interface_from[STARPU_MAX_RAM+1]; /** Request copying a piece of data from this type of node to another type of node. * This method is required at least for STARPU_CPU_RAM. */ copy_data_t copy_data_to[STARPU_MAX_RAM+1]; /** Request copying a piece of data to this type of node from another type of node. * This method is required at least for STARPU_CPU_RAM. */ copy_data_t copy_data_from[STARPU_MAX_RAM+1]; /** Request copying a 2D piece of data (i.e. matrix tile with an ld) * from this type of node to another type of node. * This method is optional. */ copy2d_data_t copy2d_data_to[STARPU_MAX_RAM+1]; /** Request copying a 2D piece of data (i.e. matrix tile with an ld) * to this type of node from another type of node. * This method is optional. */ copy2d_data_t copy2d_data_from[STARPU_MAX_RAM+1]; /** Request copying a 3D piece of data (i.e. block piece with ldy and ldz) * from this type of node to another type of node. * This method is optional. */ copy3d_data_t copy3d_data_to[STARPU_MAX_RAM+1]; /** Request copying a 3D piece of data (i.e. block piece with ldy and ldz) * to this type of node from another type of node. * This method is optional. */ copy3d_data_t copy3d_data_from[STARPU_MAX_RAM+1]; /** Wait for the completion of asynchronous request \p async_channel. * Only used at starpu_shutdown. */ void (*wait_request_completion)(struct _starpu_async_channel *async_channel); /** Test whether asynchronous request \p async_channel has completed. */ unsigned (*test_request_completion)(struct _starpu_async_channel *async_channel); /** Return whether inter-device transfers are possible between \p node and \p handling_node. * If this returns 0, copy_interface_to will always be called with * CPU RAM as either source or destination. If this returns 1, * copy_interface_to may be called with both source and destination in * device memory. * * \p handling_node is the node that will initiate the transfer. This * allows to prefer starting from the driver itself. */ int (*is_direct_access_supported)(unsigned node, unsigned handling_node); /** Allocate \p size bytes of data on node \p dst_node. * \p flags can contain STARPU_MALLOC_* flags, only useful for CPU memory */ uintptr_t (*malloc_on_node)(unsigned dst_node, size_t size, int flags); /** Free data \p addr, which was a previous allocation of \p size bytes * of data on node \p dst_node with flags \p flags*/ void (*free_on_node)(unsigned dst_node, uintptr_t addr, size_t size, int flags); /** Map data a piece of data to this type of node from another type of node. * This method is optional */ map_t map[STARPU_MAX_RAM+1]; /** Unmap data a piece of data to this type of node from another type of node. * This method is optional */ unmap_t unmap[STARPU_MAX_RAM+1]; /** Update cache coherency for the mapping of a piece of data to this type of * node from another type of node. * This method is optional */ update_map_t update_map[STARPU_MAX_RAM+1]; /** Name of the type of memory, for debugging */ char *name; }; const char* _starpu_node_get_prefix(enum starpu_node_kind kind); #pragma GCC visibility pop #endif // __NODE_OPS_H__ starpu-1.4.9+dfsg/src/datawizard/reduction.c000066400000000000000000000361721507764646700211020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include void starpu_data_set_reduction_methods(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, struct starpu_codelet *init_cl) { starpu_data_set_reduction_methods_with_args(handle, redux_cl, NULL, init_cl, NULL); } void starpu_data_set_reduction_methods_with_args(starpu_data_handle_t handle, struct starpu_codelet *redux_cl, void *redux_cl_arg, struct starpu_codelet *init_cl, void *init_cl_arg) { _starpu_spin_lock(&handle->header_lock); if (init_cl) { STARPU_ASSERT_MSG(init_cl->nbuffers == 1, "The initialization method has to take one STARPU_W parameter"); STARPU_ASSERT_MSG(init_cl->modes[0] == STARPU_W, "The initialization method has to take one STARPU_W parameter"); } if (redux_cl) { STARPU_ASSERT_MSG(redux_cl->nbuffers == 2, "The reduction method has to take one STARPU_RW|STARPU_COMMUTE parameter and one STARPU_R parameter"); if (!(redux_cl->modes[0] & STARPU_COMMUTE)) { static int _warned = 0; STARPU_HG_DISABLE_CHECKING(_warned); if (!_warned) { _STARPU_DISP("Warning: The reduction method should use STARPU_COMMUTE for its first parameter\n"); _warned = 1; } redux_cl->modes[0] |= STARPU_COMMUTE; } STARPU_ASSERT_MSG(redux_cl->modes[0] == (STARPU_RW | STARPU_COMMUTE), "The first parameter of the reduction method has to use STARPU_RW|STARPU_COMMUTE"); STARPU_ASSERT_MSG(redux_cl->modes[1] == STARPU_R, "The second parameter of the reduction method has to use STARPU_R"); } _starpu_codelet_check_deprecated_fields(redux_cl); _starpu_codelet_check_deprecated_fields(init_cl); unsigned child; for (child = 0; child < handle->nchildren; child++) { /* make sure that the flags are applied to the children as well */ starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); if (child_handle->nchildren > 0) starpu_data_set_reduction_methods_with_args(child_handle, redux_cl, redux_cl_arg, init_cl, init_cl_arg); } handle->redux_cl = redux_cl; handle->init_cl = init_cl; handle->redux_cl_arg = redux_cl_arg; handle->init_cl_arg = init_cl_arg; _starpu_spin_unlock(&handle->header_lock); } void _starpu_init_data_replicate(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, int workerid) { STARPU_ASSERT(replicate); STARPU_ASSERT(replicate->allocated || replicate->mapped != STARPU_UNMAPPED); struct starpu_codelet *init_cl = handle->init_cl; STARPU_ASSERT_MSG(init_cl, "There is no initialisation codelet for the reduction of the handle %p. Maybe you forget to call starpu_data_set_reduction_methods() ?", handle->root_handle); _starpu_cl_func_t init_func = NULL; /* TODO Check that worker may execute the codelet */ switch (starpu_worker_get_type(workerid)) { case STARPU_CPU_WORKER: init_func = _starpu_task_get_cpu_nth_implementation(init_cl, 0); break; case STARPU_CUDA_WORKER: init_func = _starpu_task_get_cuda_nth_implementation(init_cl, 0); #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) /* We make sure we do manipulate the proper device */ starpu_cuda_set_device(starpu_worker_get_devid(workerid)); #endif break; case STARPU_HIP_WORKER: init_func = _starpu_task_get_hip_nth_implementation(init_cl, 0); #if defined(STARPU_HAVE_HIP_MEMCPY_PEER) && !defined(STARPU_SIMGRID) /* We make sure we do manipulate the proper device */ starpu_hip_set_device(starpu_worker_get_devid(workerid)); #endif break; case STARPU_OPENCL_WORKER: init_func = _starpu_task_get_opencl_nth_implementation(init_cl, 0); break; #ifdef STARPU_USE_MPI_MASTER_SLAVE case STARPU_MPI_MS_WORKER: init_func = _starpu_src_common_get_cpu_func_from_codelet(init_cl, 0); break; #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE case STARPU_TCPIP_MS_WORKER: init_func = _starpu_src_common_get_cpu_func_from_codelet(init_cl, 0); break; #endif default: STARPU_ABORT(); break; } STARPU_ASSERT(init_func); switch (starpu_worker_get_type(workerid)) { #ifdef STARPU_USE_MPI_MASTER_SLAVE case STARPU_MPI_MS_WORKER: { struct _starpu_mp_node *node = _starpu_mpi_ms_src_get_actual_thread_mp_node(); int subworkerid = _starpu_get_worker_struct(workerid)->subworkerid; void * arg; int arg_size; _starpu_src_common_execute_kernel(node, (void(*)(void))init_func, subworkerid, STARPU_SEQ, 0, 0, &handle, &(replicate->data_interface), 1, NULL, 0 , 1); _starpu_src_common_wait_completed_execution(node,subworkerid,&arg,&arg_size); break; } #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE case STARPU_TCPIP_MS_WORKER: { struct _starpu_mp_node *node = _starpu_tcpip_ms_src_get_actual_thread_mp_node(); int subworkerid = _starpu_get_worker_struct(workerid)->subworkerid; void * arg; int arg_size; _starpu_src_common_execute_kernel(node, (void(*)(void))init_func, subworkerid, STARPU_SEQ, 0, 0, &handle, &(replicate->data_interface), 1, NULL, 0 , 1); _starpu_src_common_wait_completed_execution(node,subworkerid,&arg,&arg_size); break; } #endif default: init_func(&replicate->data_interface, NULL); break; } replicate->initialized = 1; } /* Enable reduction mode. This function must be called with the header lock * taken. */ void _starpu_data_start_reduction_mode(starpu_data_handle_t handle) { STARPU_ASSERT(handle->reduction_refcnt == 0); if (!handle->per_worker) _starpu_data_initialize_per_worker(handle); unsigned worker; unsigned nworkers = starpu_worker_get_count(); for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *replicate; replicate = &handle->per_worker[worker]; replicate->initialized = 0; replicate->relaxed_coherency = 2; if (replicate->mc) replicate->mc->relaxed_coherency = 2; } } //#define NO_TREE_REDUCTION /* Force reduction. The lock should already have been taken. */ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle, int priority) { unsigned worker; unsigned node; unsigned empty; /* Whether the handle is initially unallocated */ /* Put every valid replicate in the same array */ unsigned replicate_count = 0; starpu_data_handle_t replicate_array[1 + STARPU_NMAXWORKERS]; _starpu_spin_checklocked(&handle->header_lock); for (node = 0; node < STARPU_MAXNODES; node++) { if (handle->per_node[node].state != STARPU_INVALID) break; } empty = node == STARPU_MAXNODES; #ifndef NO_TREE_REDUCTION if (!empty) /* Include the initial value into the reduction tree */ replicate_array[replicate_count++] = handle; #endif /* Register all valid per-worker replicates */ unsigned nworkers = starpu_worker_get_count(); STARPU_ASSERT(!handle->reduction_tmp_handles); _STARPU_MALLOC(handle->reduction_tmp_handles, nworkers*sizeof(handle->reduction_tmp_handles[0])); for (worker = 0; worker < nworkers; worker++) { if (handle->per_worker[worker].initialized) { /* Make sure the replicate is not removed */ handle->per_worker[worker].refcnt++; unsigned home_node = starpu_worker_get_memory_node(worker); starpu_data_register(&handle->reduction_tmp_handles[worker], home_node, handle->per_worker[worker].data_interface, handle->ops); starpu_data_set_sequential_consistency_flag(handle->reduction_tmp_handles[worker], 0); replicate_array[replicate_count++] = handle->reduction_tmp_handles[worker]; } else { handle->reduction_tmp_handles[worker] = NULL; } } #ifndef NO_TREE_REDUCTION if (empty) { /* Only the final copy will touch the actual handle */ handle->reduction_refcnt = 1; } else { unsigned step = 1; handle->reduction_refcnt = 0; while (step < replicate_count) { /* Each stage will touch the actual handle */ handle->reduction_refcnt++; step *= 2; } } #else /* We know that in this reduction algorithm there is exactly one task per valid replicate. */ handle->reduction_refcnt = replicate_count + empty; #endif // fprintf(stderr, "REDUX REFCNT = %d\n", handle->reduction_refcnt); if (replicate_count > #ifndef NO_TREE_REDUCTION !empty #else 0 #endif ) { /* Temporarily unlock the handle */ _starpu_spin_unlock(&handle->header_lock); #ifndef NO_TREE_REDUCTION /* We will store a pointer to the last task which should modify the * replicate */ struct starpu_task *last_replicate_deps[replicate_count]; memset(last_replicate_deps, 0, replicate_count*sizeof(struct starpu_task *)); struct starpu_task *redux_tasks[replicate_count]; /* Redux step-by-step for step from 1 to replicate_count/2, i.e. * 1-by-1, then 2-by-2, then 4-by-4, etc. */ unsigned step; unsigned redux_task_idx = 0; for (step = 1; step < replicate_count; step *=2) { unsigned i; for (i = 0; i < replicate_count; i+=2*step) { if (i + step < replicate_count) { /* Perform the reduction between replicates i * and i+step and put the result in replicate i */ struct starpu_task *redux_task = starpu_task_create(); redux_task->name = "redux_task_between_replicates"; redux_task->priority = priority; /* Mark these tasks so that StarPU does not block them * when they try to access the handle (normal tasks are * data requests to that handle are frozen until the * data is coherent again). */ struct _starpu_job *j = _starpu_get_job_associated_to_task(redux_task); j->reduction_task = 1; redux_task->cl = handle->redux_cl; redux_task->cl_arg = handle->redux_cl_arg; STARPU_ASSERT(redux_task->cl); if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0))) STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW|STARPU_COMMUTE, 0); if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1))) STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1); if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0) & STARPU_COMMUTE)) { static int warned; STARPU_HG_DISABLE_CHECKING(warned); if (!warned) { warned = 1; _STARPU_DISP("Warning: for reductions, codelet %p should have STARPU_COMMUTE along STARPU_RW\n", redux_task->cl); } } STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i], 0); STARPU_TASK_SET_HANDLE(redux_task, replicate_array[i+step], 1); int ndeps = 0; struct starpu_task *task_deps[2]; if (last_replicate_deps[i]) task_deps[ndeps++] = last_replicate_deps[i]; if (last_replicate_deps[i+step]) task_deps[ndeps++] = last_replicate_deps[i+step]; /* i depends on this task */ last_replicate_deps[i] = redux_task; /* we don't perform the reduction until both replicates are ready */ starpu_task_declare_deps_array(redux_task, ndeps, task_deps); /* We cannot submit tasks here : we do * not want to depend on tasks that have * been completed, so we juste store * this task : it will be submitted * later. */ redux_tasks[redux_task_idx++] = redux_task; } } } if (empty) /* The handle was empty, we just need to copy the reduced value. */ _starpu_data_cpy(handle, replicate_array[0], 1, NULL, 0, 1, last_replicate_deps[0], priority); /* Let's submit all the reduction tasks. */ unsigned i; for (i = 0; i < redux_task_idx; i++) { int ret = _starpu_task_submit_internally(redux_tasks[i]); STARPU_ASSERT(ret == 0); } #else if (empty) { struct starpu_task *redux_task = starpu_task_create(); redux_task->name = "redux_task_empty"; redux_task->priority = priority; /* Mark these tasks so that StarPU does not block them * when they try to access the handle (normal tasks are * data requests to that handle are frozen until the * data is coherent again). */ struct _starpu_job *j = _starpu_get_job_associated_to_task(redux_task); j->reduction_task = 1; redux_task->cl = handle->init_cl; redux_task->cl_arg = handle->init_cl_arg; STARPU_ASSERT(redux_task->cl); if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0))) STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_W, 0); STARPU_TASK_SET_HANDLE(redux_task, handle, 0); int ret = _starpu_task_submit_internally(redux_task); STARPU_ASSERT(!ret); } /* Create a set of tasks to perform the reduction */ unsigned replicate; for (replicate = 0; replicate < replicate_count; replicate++) { struct starpu_task *redux_task = starpu_task_create(); redux_task->name = "redux_task_reduction"; redux_task->priority = priority; /* Mark these tasks so that StarPU does not block them * when they try to access the handle (normal tasks are * data requests to that handle are frozen until the * data is coherent again). */ struct _starpu_job *j = _starpu_get_job_associated_to_task(redux_task); j->reduction_task = 1; redux_task->cl = handle->redux_cl; STARPU_ASSERT(redux_task->cl); if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0))) STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW, 0); if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1))) STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1); STARPU_TASK_SET_HANDLE(redux_task, handle, 0); STARPU_TASK_SET_HANDLE(redux_task, replicate_array[replicate], 1); int ret = _starpu_task_submit_internally(redux_task); STARPU_ASSERT(!ret); } #endif /* Get the header lock back */ _starpu_spin_lock(&handle->header_lock); } for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *replicate; replicate = &handle->per_worker[worker]; replicate->relaxed_coherency = 1; if (replicate->mc) replicate->mc->relaxed_coherency = 1; } } void _starpu_data_end_reduction_mode_terminate(starpu_data_handle_t handle) { unsigned nworkers = starpu_worker_get_count(); // fprintf(stderr, "_starpu_data_end_reduction_mode_terminate\n"); unsigned worker; _starpu_spin_checklocked(&handle->header_lock); for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *replicate; replicate = &handle->per_worker[worker]; replicate->initialized = 0; if (handle->reduction_tmp_handles[worker]) { // fprintf(stderr, "unregister handle %p\n", handle); _starpu_spin_lock(&handle->reduction_tmp_handles[worker]->header_lock); handle->reduction_tmp_handles[worker]->lazy_unregister = 1; _starpu_spin_unlock(&handle->reduction_tmp_handles[worker]->header_lock); starpu_data_unregister_no_coherency(handle->reduction_tmp_handles[worker]); handle->per_worker[worker].refcnt--; /* TODO put in cache */ } } free(handle->reduction_tmp_handles); handle->reduction_tmp_handles = NULL; } starpu-1.4.9+dfsg/src/datawizard/sort_data_handles.c000066400000000000000000000111101507764646700225450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include /* To avoid deadlocks in case we have multiple tasks accessing the same piece * of data (eg. task T1 needs A and B, and T2 needs B and A), we need to lock * them in order, so that we need a total order over data. We must also not * lock a child before its parent. */ static void find_data_path(struct _starpu_data_state *data, unsigned path[]) { unsigned depth = data->depth; struct _starpu_data_state *current = data; /* Compute the path from the root to the data */ unsigned level; /* level is the distance between the node and the current node */ for (level = 0; level < depth; level++) { path[depth - level - 1] = current->sibling_index; current = current->father_handle; } } static int _compar_data_paths(const unsigned pathA[], unsigned depthA, const unsigned pathB[], unsigned depthB) { unsigned level; unsigned depth = STARPU_MIN(depthA, depthB); for (level = 0; level < depth; level++) { if (pathA[level] != pathB[level]) return (pathA[level] < pathB[level])?-1:1; } /* If this is the same path */ if (depthA == depthB) return 0; /* A is a subdata of B or B is a subdata of A, so the smallest one is * the father of the other (we take this convention). */ return (depthA < depthB)?-1:1; } /* A comparison function between two handles makes it possible to use qsort to * sort a list of handles */ static int _starpu_compar_handles(const struct _starpu_data_descr *descrA, const struct _starpu_data_descr *descrB) { starpu_data_handle_t dataA = descrA->handle; starpu_data_handle_t dataB = descrB->handle; /* Perhaps we have the same piece of data */ if (dataA->root_handle == dataB->root_handle) { int Awrites = descrA->mode & STARPU_W; int Bwrites = descrB->mode & STARPU_W; int Areads = descrA->mode & STARPU_R; int Breads = descrB->mode & STARPU_R; /* Process write requests first, this is needed for proper * locking, see _submit_job_access_data, * _starpu_fetch_task_input, and _starpu_push_task_output */ if (Awrites && !Bwrites) /* Only A writes, take it first */ return -1; if (!Awrites && Bwrites) /* Only B writes, take it first */ return 1; /* Both A and B write */ if (Areads && !Breads) /* Only A reads, take it first */ return -1; if (!Areads && Breads) /* Only B reads, take it first */ return 1; /* Both A and B read and write */ /* Things get more complicated: we need to find the location of dataA * and dataB within the tree. */ unsigned dataA_path[dataA->depth]; unsigned dataB_path[dataB->depth]; find_data_path(dataA, dataA_path); find_data_path(dataB, dataB_path); return _compar_data_paths(dataA_path, dataA->depth, dataB_path, dataB->depth); } /* Put arbitered accesses after non-arbitered */ if (dataA->arbiter && !(dataB->arbiter)) return 1; if (dataB->arbiter && !(dataA->arbiter)) return -1; if (dataA->arbiter != dataB->arbiter) /* Both are arbitered, sort by arbiter pointer order */ return (dataA->arbiter < dataB->arbiter)?-1:1; /* If both are arbitered by the same arbiter (or they are both not * arbitered), we'll sort them by handle */ return (dataA->root_handle < dataB->root_handle)?-1:1; } int _starpu_handles_same_root(starpu_data_handle_t dataA, starpu_data_handle_t dataB) { return dataA->root_handle == dataB->root_handle; } static int _starpu_compar_buffer_descr(const void *_descrA, const void *_descrB) { const struct _starpu_data_descr *descrA = (const struct _starpu_data_descr *) _descrA; const struct _starpu_data_descr *descrB = (const struct _starpu_data_descr *) _descrB; return _starpu_compar_handles(descrA, descrB); } /* The descr array will be overwritten, so this must be a copy ! */ void _starpu_sort_task_handles(struct _starpu_data_descr descr[], unsigned nbuffers) { qsort(descr, nbuffers, sizeof(descr[0]), _starpu_compar_buffer_descr); } starpu-1.4.9+dfsg/src/datawizard/sort_data_handles.h000066400000000000000000000031001507764646700225520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SORT_DATA_HANDLES_H__ #define __SORT_DATA_HANDLES_H__ /** @file */ #include #include #include #include #include #include #include #pragma GCC visibility push(hidden) /** To avoid deadlocks, we reorder the different buffers accessed to by the task * so that we always grab the rw-lock associated to the handles in the same * order. */ void _starpu_sort_task_handles(struct _starpu_data_descr descr[], unsigned nbuffers); /** The reordering however puts alongside some different handles, just because * they have the same root. When avoiding to lock/acquire/load the same handle * several times, we need to keep looking among those. */ int _starpu_handles_same_root(starpu_data_handle_t dataA, starpu_data_handle_t dataB); #pragma GCC visibility pop #endif // SORT_DATA_HANDLES starpu-1.4.9+dfsg/src/datawizard/user_interactions.c000066400000000000000000000710161507764646700226420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include static void _starpu_data_check_initialized(starpu_data_handle_t handle, enum starpu_data_access_mode mode) { if (((handle->nplans && !handle->nchildren) || handle->siblings) && !(mode & STARPU_NOPLAN)) { _starpu_data_partition_access_submit(handle, (mode & STARPU_W) != 0); } if (!(mode & STARPU_R)) return; if (!handle->initialized && handle->init_cl) { int ret = starpu_task_insert(handle->init_cl, STARPU_W, handle, 0); STARPU_ASSERT(ret == 0); } STARPU_ASSERT_MSG(handle->initialized, "handle %p is not initialized while trying to read it\n", handle); } /* Explicitly ask StarPU to allocate room for a piece of data on the specified * memory node. */ int starpu_data_request_allocation(starpu_data_handle_t handle, unsigned node) { struct _starpu_data_request *r; STARPU_ASSERT(handle); _starpu_spin_lock(&handle->header_lock); r = _starpu_create_data_request(handle, NULL, &handle->per_node[node], node, STARPU_NONE, 0, NULL, STARPU_PREFETCH, 0, 0, "starpu_data_request_allocation"); /* we do not increase the refcnt associated to the request since we are * not waiting for its termination */ _starpu_post_data_request(r); _starpu_spin_unlock(&handle->header_lock); return 0; } struct user_interaction_wrapper { starpu_data_handle_t handle; enum starpu_data_access_mode mode; int node; starpu_pthread_cond_t cond; starpu_pthread_mutex_t lock; unsigned finished; unsigned detached; enum starpu_is_prefetch prefetch; unsigned async; int prio; void (*callback_acquired)(void *, int *node, enum starpu_data_access_mode mode); void (*callback)(void *); void *callback_arg; struct starpu_task *pre_sync_task; struct starpu_task *post_sync_task; }; static inline void _starpu_data_acquire_wrapper_init(struct user_interaction_wrapper *wrapper, starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode) { memset(wrapper, 0, sizeof(*wrapper)); wrapper->handle = handle; wrapper->node = node; wrapper->mode = mode; //wrapper->finished = 0; STARPU_PTHREAD_COND_INIT0(&wrapper->cond, NULL); STARPU_PTHREAD_MUTEX_INIT0(&wrapper->lock, NULL); } /* Called to signal completion of asynchronous data acquisition */ static inline void _starpu_data_acquire_wrapper_finished(struct user_interaction_wrapper *wrapper) { STARPU_PTHREAD_MUTEX_LOCK(&wrapper->lock); wrapper->finished = 1; STARPU_PTHREAD_COND_SIGNAL(&wrapper->cond); STARPU_PTHREAD_MUTEX_UNLOCK(&wrapper->lock); } /* Called to wait for completion of asynchronous data acquisition */ static inline void _starpu_data_acquire_wrapper_wait(struct user_interaction_wrapper *wrapper) { STARPU_PTHREAD_MUTEX_LOCK(&wrapper->lock); while (!wrapper->finished) STARPU_PTHREAD_COND_WAIT(&wrapper->cond, &wrapper->lock); STARPU_PTHREAD_MUTEX_UNLOCK(&wrapper->lock); } static inline void _starpu_data_acquire_wrapper_fini(struct user_interaction_wrapper *wrapper) { STARPU_PTHREAD_COND_DESTROY(&wrapper->cond); STARPU_PTHREAD_MUTEX_DESTROY(&wrapper->lock); } /* Called when the data acquisition is done, to launch the fetch into target memory */ static inline void _starpu_data_acquire_launch_fetch(struct user_interaction_wrapper *wrapper, int async, void (*callback)(void *), void *callback_arg) { int node = wrapper->node; starpu_data_handle_t handle = wrapper->handle; struct _starpu_data_replicate *replicate = node >= 0 ? &handle->per_node[node] : NULL; int ret = _starpu_fetch_data_on_node(handle, node, replicate, wrapper->mode, wrapper->detached, NULL, wrapper->prefetch, async, callback, callback_arg, wrapper->prio, "_starpu_data_acquire_launch_fetch"); STARPU_ASSERT(!ret); } /* * Non Blocking data request from application */ /* Called when fetch is done, call the callback */ static void _starpu_data_acquire_fetch_data_callback(void *arg) { struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; starpu_data_handle_t handle = wrapper->handle; /* At that moment, the caller holds a reference to the piece of data. * We enqueue the "post" sync task in the list associated to the handle * so that it is submitted by the starpu_data_release * function. */ if (wrapper->post_sync_task) _starpu_add_post_sync_tasks(wrapper->post_sync_task, handle); wrapper->callback(wrapper->callback_arg); _starpu_data_acquire_wrapper_fini(wrapper); free(wrapper); } /* Called when the data acquisition is done, launch the fetch into target memory */ static void _starpu_data_acquire_continuation_non_blocking(void *arg) { struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; if (wrapper->callback_acquired) /* This can change the node at will according to the current data situation */ wrapper->callback_acquired(wrapper->callback_arg, &wrapper->node, wrapper->mode); _starpu_data_acquire_launch_fetch(arg, 1, _starpu_data_acquire_fetch_data_callback, arg); } /* Called when the implicit data dependencies are done, launch the data acquisition */ static void starpu_data_acquire_cb_pre_sync_callback(void *arg) { struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; /* * we try to get the data, if we do not succeed immediately, * we set a callback function that will be executed * automatically when the data is available again, otherwise we * fetch the data directly */ if (!_starpu_attempt_to_submit_data_request_from_apps(wrapper->handle, wrapper->mode, _starpu_data_acquire_continuation_non_blocking, wrapper)) { /* no one has locked this data yet, so we proceed immediately */ _starpu_data_acquire_continuation_non_blocking(wrapper); } } /* The data must be released by calling starpu_data_release later on */ int starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback_acquired)(void *arg, int *node, enum starpu_data_access_mode mode), void (*callback)(void *arg), void *arg, int sequential_consistency, int quick, long *pre_sync_jobid, long *post_sync_jobid, int prio) { STARPU_ASSERT(handle); STARPU_ASSERT_MSG(handle->nchildren == 0, "Acquiring a partitioned data (%p) is not possible", handle); _STARPU_LOG_IN(); /* Check that previous tasks have set a value if needed */ _starpu_data_check_initialized(handle, mode); struct user_interaction_wrapper *wrapper; _STARPU_MALLOC(wrapper, sizeof(struct user_interaction_wrapper)); _starpu_data_acquire_wrapper_init(wrapper, handle, node, mode); wrapper->async = 1; wrapper->callback_acquired = callback_acquired; wrapper->callback = callback; wrapper->callback_arg = arg; wrapper->pre_sync_task = NULL; wrapper->post_sync_task = NULL; wrapper->prio = prio; STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); int handle_sequential_consistency = handle->sequential_consistency; if (handle_sequential_consistency && sequential_consistency) { struct starpu_task *new_task; struct _starpu_job *pre_sync_job, *post_sync_job; int submit_pre_sync = 0; wrapper->pre_sync_task = starpu_task_create(); wrapper->pre_sync_task->name = "_starpu_data_acquire_cb_pre"; wrapper->pre_sync_task->detach = 1; wrapper->pre_sync_task->callback_func = starpu_data_acquire_cb_pre_sync_callback; wrapper->pre_sync_task->callback_arg = wrapper; wrapper->pre_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE; wrapper->pre_sync_task->priority = prio; pre_sync_job = _starpu_get_job_associated_to_task(wrapper->pre_sync_task); if (pre_sync_jobid) *pre_sync_jobid = pre_sync_job->job_id; wrapper->post_sync_task = starpu_task_create(); wrapper->post_sync_task->name = "_starpu_data_acquire_cb_release"; wrapper->post_sync_task->detach = 1; wrapper->post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE; wrapper->post_sync_task->priority = prio; post_sync_job = _starpu_get_job_associated_to_task(wrapper->post_sync_task); if (post_sync_jobid) *post_sync_jobid = post_sync_job->job_id; if (quick) pre_sync_job->quick_next = post_sync_job; new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, &submit_pre_sync, wrapper->post_sync_task, &_starpu_get_job_associated_to_task(wrapper->post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency); STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); if (STARPU_UNLIKELY(new_task)) { int ret = _starpu_task_submit_internally(new_task); STARPU_ASSERT(!ret); } if (submit_pre_sync) { int ret = _starpu_task_submit_internally(wrapper->pre_sync_task); STARPU_ASSERT(!ret); } else { wrapper->pre_sync_task->detach = 0; starpu_task_destroy(wrapper->pre_sync_task); starpu_data_acquire_cb_pre_sync_callback(wrapper); } } else { if (pre_sync_jobid) *pre_sync_jobid = -1; if (post_sync_jobid) *post_sync_jobid = -1; STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); starpu_data_acquire_cb_pre_sync_callback(wrapper); } _STARPU_LOG_OUT(); return 0; } static int starpu_data_acquire_on_node_cb_sequential_consistency_quick(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency, int quick) { return starpu_data_acquire_on_node_cb_sequential_consistency_sync_jobids(handle, node, mode, NULL, callback, arg, sequential_consistency, quick, NULL, NULL, STARPU_DEFAULT_PRIO); } int starpu_data_acquire_on_node_cb_sequential_consistency(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency) { return starpu_data_acquire_on_node_cb_sequential_consistency_quick(handle, node, mode, callback, arg, sequential_consistency, 0); } int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg) { return starpu_data_acquire_on_node_cb_sequential_consistency(handle, node, mode, callback, arg, 1); } int starpu_data_acquire_cb(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg) { int home_node = handle->home_node; if (home_node < 0) home_node = STARPU_MAIN_RAM; return starpu_data_acquire_on_node_cb(handle, home_node, mode, callback, arg); } int starpu_data_acquire_cb_sequential_consistency(starpu_data_handle_t handle, enum starpu_data_access_mode mode, void (*callback)(void *), void *arg, int sequential_consistency) { int home_node = handle->home_node; if (home_node < 0) home_node = STARPU_MAIN_RAM; return starpu_data_acquire_on_node_cb_sequential_consistency(handle, home_node, mode, callback, arg, sequential_consistency); } /* * Blocking data request from application */ static inline void _starpu_data_acquire_continuation(void *arg) { struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; starpu_data_handle_t handle = wrapper->handle; STARPU_ASSERT(handle); _starpu_data_acquire_launch_fetch(wrapper, 0, NULL, NULL); _starpu_data_acquire_wrapper_finished(wrapper); } /* The data must be released by calling starpu_data_release later on */ int starpu_data_acquire_on_node(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode) { STARPU_ASSERT(handle); STARPU_ASSERT_MSG(handle->nchildren == 0, "Acquiring a partitioned data is not possible"); _STARPU_LOG_IN(); /* unless asynchronous, it is forbidden to call this function from a callback or a codelet */ STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "Acquiring a data synchronously is not possible from a codelet or from a task callback, use starpu_data_acquire_cb instead."); /* Check that previous tasks have set a value if needed */ _starpu_data_check_initialized(handle, mode); if (node >= 0 && _starpu_data_is_multiformat_handle(handle) && _starpu_handle_needs_conversion_task(handle, node)) { struct starpu_task *task = _starpu_create_conversion_task(handle, node); int ret; _starpu_spin_lock(&handle->header_lock); handle->refcnt--; handle->busy_count--; handle->mf_node = node; _starpu_spin_unlock(&handle->header_lock); task->synchronous = 1; ret = _starpu_task_submit_internally(task); STARPU_ASSERT(!ret); } struct user_interaction_wrapper wrapper; _starpu_data_acquire_wrapper_init(&wrapper, handle, node, mode); // _STARPU_DEBUG("TAKE sequential_consistency_mutex starpu_data_acquire\n"); STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); int sequential_consistency = handle->sequential_consistency; if (sequential_consistency) { struct starpu_task *new_task; int submit_pre_sync = 0; wrapper.pre_sync_task = starpu_task_create(); wrapper.pre_sync_task->name = "_starpu_data_acquire_pre"; wrapper.pre_sync_task->detach = 0; wrapper.pre_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE; wrapper.post_sync_task = starpu_task_create(); wrapper.post_sync_task->name = "_starpu_data_acquire_post"; wrapper.post_sync_task->detach = 1; wrapper.post_sync_task->type = STARPU_TASK_TYPE_DATA_ACQUIRE; new_task = _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, &submit_pre_sync, wrapper.post_sync_task, &_starpu_get_job_associated_to_task(wrapper.post_sync_task)->implicit_dep_slot, handle, mode, sequential_consistency); STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); if (STARPU_UNLIKELY(new_task)) { int ret = _starpu_task_submit_internally(new_task); STARPU_ASSERT(!ret); } if (submit_pre_sync) { wrapper.pre_sync_task->synchronous = 1; int ret = _starpu_task_submit_internally(wrapper.pre_sync_task); STARPU_ASSERT(!ret); } else { wrapper.pre_sync_task->detach = 0; starpu_task_destroy(wrapper.pre_sync_task); } } else { STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); } /* * we try to get the data, if we do not succeed immediately, * we set a callback function that will be executed * automatically when the data is available again, otherwise we * fetch the data directly */ if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _starpu_data_acquire_continuation, &wrapper)) { /* no one has locked this data yet, so we proceed immediately */ _starpu_data_acquire_launch_fetch(&wrapper, 0, NULL, NULL); } else { _starpu_data_acquire_wrapper_wait(&wrapper); } _starpu_data_acquire_wrapper_fini(&wrapper); /* At that moment, the caller holds a reference to the piece of data. * We enqueue the "post" sync task in the list associated to the handle * so that it is submitted by the starpu_data_release * function. */ if (sequential_consistency) _starpu_add_post_sync_tasks(wrapper.post_sync_task, handle); _STARPU_LOG_OUT(); return 0; } int starpu_data_acquire(starpu_data_handle_t handle, enum starpu_data_access_mode mode) { int home_node = handle->home_node; if (home_node < 0) home_node = STARPU_MAIN_RAM; return starpu_data_acquire_on_node(handle, home_node, mode); } int starpu_data_acquire_on_node_try(starpu_data_handle_t handle, int node, enum starpu_data_access_mode mode) { STARPU_ASSERT(handle); STARPU_ASSERT_MSG(handle->nchildren == 0, "Acquiring a partitioned data is not possible"); /* it is forbidden to call this function from a callback or a codelet */ STARPU_ASSERT_MSG(_starpu_worker_may_perform_blocking_calls(), "Acquiring a data synchronously is not possible from a codelet or from a task callback, use starpu_data_acquire_cb instead."); /* Check that previous tasks have set a value if needed */ _starpu_data_check_initialized(handle, mode); int ret; STARPU_ASSERT_MSG(!_starpu_data_is_multiformat_handle(handle), "not supported yet"); STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); ret = _starpu_test_implicit_data_deps_with_handle(handle, mode); STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); if (ret) return ret; struct user_interaction_wrapper wrapper; _starpu_data_acquire_wrapper_init(&wrapper, handle, node, mode); /* * we try to get the data, if we do not succeed immediately, * we set a callback function that will be executed * automatically when the data is available again, otherwise we * fetch the data directly */ if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _starpu_data_acquire_continuation, &wrapper)) { /* no one has locked this data yet, so we proceed immediately */ _starpu_data_acquire_launch_fetch(&wrapper, 0, NULL, NULL); } else { _starpu_data_acquire_wrapper_wait(&wrapper); } _starpu_data_acquire_wrapper_fini(&wrapper); return 0; } int starpu_data_acquire_try(starpu_data_handle_t handle, enum starpu_data_access_mode mode) { return starpu_data_acquire_on_node_try(handle, STARPU_MAIN_RAM, mode); } /* This function must be called after starpu_data_acquire so that the * application release the data */ void starpu_data_release_to_on_node(starpu_data_handle_t handle, enum starpu_data_access_mode mode, int node) { STARPU_ASSERT(handle); if (mode == STARPU_RW) /* They are equivalent here, and current_mode is never STARPU_RW */ mode = STARPU_W; STARPU_ASSERT_MSG(mode == STARPU_NONE || mode == handle->current_mode || (mode == STARPU_R && handle->current_mode == STARPU_W), "We only support releasing from W to R"); /* In case there are some implicit dependencies, unlock the "post sync" tasks */ _starpu_unlock_post_sync_tasks(handle, mode); /* The application can now release the rw-lock */ if (node >= 0) _starpu_release_data_on_node(handle, 0, mode, &handle->per_node[node]); else { _starpu_spin_lock(&handle->header_lock); if (node == STARPU_ACQUIRE_NO_NODE_LOCK_ALL) { int i; for (i = 0; i < STARPU_MAXNODES; i++) handle->per_node[i].refcnt--; } handle->busy_count--; if (!_starpu_notify_data_dependencies(handle, mode)) _starpu_spin_unlock(&handle->header_lock); } } void starpu_data_release_on_node(starpu_data_handle_t handle, int node) { starpu_data_release_to_on_node(handle, STARPU_NONE, node); } void starpu_data_release_to(starpu_data_handle_t handle, enum starpu_data_access_mode mode) { int home_node = handle->home_node; if (home_node < 0) home_node = STARPU_MAIN_RAM; starpu_data_release_to_on_node(handle, mode, home_node); } void starpu_data_release(starpu_data_handle_t handle) { starpu_data_release_to(handle, STARPU_NONE); } static void _prefetch_data_on_node(void *arg) { struct user_interaction_wrapper *wrapper = (struct user_interaction_wrapper *) arg; starpu_data_handle_t handle = wrapper->handle; _starpu_data_acquire_launch_fetch(wrapper, wrapper->async, NULL, NULL); if (wrapper->async) free(wrapper); else _starpu_data_acquire_wrapper_finished(wrapper); _starpu_spin_lock(&handle->header_lock); if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) _starpu_spin_unlock(&handle->header_lock); } /* Prefetch data. This is the execution-time part */ static int __starpu_prefetch_data_on_node_with_mode(starpu_data_handle_t handle, unsigned node, unsigned async, enum starpu_data_access_mode mode, enum starpu_is_prefetch prefetch, int prio) { STARPU_ASSERT(handle); /* it is forbidden to call this function from a callback or a codelet */ STARPU_ASSERT_MSG(async || _starpu_worker_may_perform_blocking_calls(), "Synchronous prefetch is not possible from a task or a callback"); struct user_interaction_wrapper *wrapper; _STARPU_MALLOC(wrapper, sizeof(*wrapper)); _starpu_data_acquire_wrapper_init(wrapper, handle, node, STARPU_R); wrapper->detached = async; wrapper->prefetch = prefetch; wrapper->async = async; wrapper->prio = prio; if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _prefetch_data_on_node, wrapper)) { /* we can immediately proceed */ struct _starpu_data_replicate *replicate = &handle->per_node[node]; _starpu_data_acquire_launch_fetch(wrapper, async, NULL, NULL); _starpu_data_acquire_wrapper_fini(wrapper); free(wrapper); /* remove the "lock"/reference */ _starpu_spin_lock(&handle->header_lock); if (!async) { /* Release our refcnt, like _starpu_release_data_on_node would do */ replicate->refcnt--; STARPU_ASSERT(replicate->refcnt >= 0); STARPU_ASSERT(handle->busy_count > 0); handle->busy_count--; } /* In case there was a temporary handle (eg. used for reduction), this * handle may have requested to be destroyed when the data is released * */ if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) _starpu_spin_unlock(&handle->header_lock); } else if (!async) { _starpu_data_acquire_wrapper_wait(wrapper); _starpu_data_acquire_wrapper_fini(wrapper); free(wrapper); } return 0; } /* Prefetch data. This is the submission-time part */ static int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle_t handle, unsigned node, unsigned async, enum starpu_data_access_mode mode, enum starpu_is_prefetch prefetch, int prio) { /* Check that previous tasks have set a value if needed */ /* Only valid at submission time, not execution time */ _starpu_data_check_initialized(handle, mode); return __starpu_prefetch_data_on_node_with_mode(handle, node, async, mode, prefetch, prio); } int starpu_data_fetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async) { return _starpu_prefetch_data_on_node_with_mode(handle, node, async, STARPU_R, STARPU_FETCH, STARPU_DEFAULT_PRIO); } int starpu_data_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio) { return _starpu_prefetch_data_on_node_with_mode(handle, node, async, STARPU_R, STARPU_PREFETCH, prio); } int starpu_data_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async) { return starpu_data_prefetch_on_node_prio(handle, node, async, STARPU_DEFAULT_PRIO); } int starpu_data_idle_prefetch_on_node_prio(starpu_data_handle_t handle, unsigned node, unsigned async, int prio) { return _starpu_prefetch_data_on_node_with_mode(handle, node, async, STARPU_R, STARPU_IDLEFETCH, prio); } int starpu_data_idle_prefetch_on_node(starpu_data_handle_t handle, unsigned node, unsigned async) { return starpu_data_idle_prefetch_on_node_prio(handle, node, async, STARPU_DEFAULT_PRIO); } /* Execution-time part */ static void _starpu_data_wont_use(void *data) { unsigned node; starpu_data_handle_t handle = data; _STARPU_TRACE_DATA_DOING_WONT_USE(handle); _starpu_spin_lock(&handle->header_lock); for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *local = &handle->per_node[node]; if (local->allocated && local->automatically_allocated) _starpu_memchunk_wont_use(local->mc, node); } if (handle->per_worker) { unsigned nworkers = starpu_worker_get_count(); unsigned worker; for (worker = 0; worker < nworkers; worker++) { struct _starpu_data_replicate *local = &handle->per_worker[worker]; if (local->allocated && local->automatically_allocated) _starpu_memchunk_wont_use(local->mc, starpu_worker_get_memory_node(worker)); } } _starpu_spin_unlock(&handle->header_lock); starpu_data_release_on_node(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL); if (handle->home_node != -1) __starpu_prefetch_data_on_node_with_mode(handle, handle->home_node, 1, STARPU_R, STARPU_IDLEFETCH, STARPU_DEFAULT_PRIO); else { if (handle->ooc) { /* Try to push it to some disk */ unsigned i; unsigned nnodes = starpu_memory_nodes_get_count(); for (i = 0; i < nnodes; i++) { if (starpu_node_get_kind(i) == STARPU_DISK_RAM) __starpu_prefetch_data_on_node_with_mode(handle, i, 1, STARPU_R, STARPU_IDLEFETCH, STARPU_DEFAULT_PRIO); } } } } void starpu_data_wont_use(starpu_data_handle_t handle) { if (!handle->initialized) /* No value atm actually */ return; if (starpu_data_get_nb_children(handle) != 0) { int i; for(i=0 ; ipartitioned != 0) { unsigned i; for(i=0 ; ipartitioned; i++) { unsigned j; for(j=0 ; jactive_readonly_nchildren[i] ; j++) starpu_data_wont_use(handle->active_readonly_children[i][j]); } } if (handle->active_nchildren != 0) { unsigned j; for(j=0 ; jactive_nchildren ; j++) starpu_data_wont_use(handle->active_children[j]); return; } _STARPU_TRACE_DATA_WONT_USE(handle); starpu_data_acquire_on_node_cb_sequential_consistency_quick(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_R, _starpu_data_wont_use, handle, 1, 1); } /* * It is possible to specify that a piece of data can be discarded without * impacting the application. */ int _starpu_has_not_important_data; void starpu_data_advise_as_important(starpu_data_handle_t handle, unsigned is_important) { if (!is_important) _starpu_has_not_important_data = 1; _starpu_spin_lock(&handle->header_lock); /* first take all the children lock (in order !) */ unsigned child; for (child = 0; child < handle->nchildren; child++) { /* make sure the intermediate children is advised as well */ starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); if (child_handle->nchildren > 0) starpu_data_advise_as_important(child_handle, is_important); } handle->is_not_important = !is_important; /* now the parent may be used again so we release the lock */ _starpu_spin_unlock(&handle->header_lock); } void starpu_data_set_sequential_consistency_flag(starpu_data_handle_t handle, unsigned flag) { _starpu_spin_lock(&handle->header_lock); unsigned child; for (child = 0; child < handle->nchildren; child++) { /* make sure that the flags are applied to the children as well */ starpu_data_handle_t child_handle = starpu_data_get_child(handle, child); if (child_handle->nchildren > 0) starpu_data_set_sequential_consistency_flag(child_handle, flag); } STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); handle->sequential_consistency = flag; STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); _starpu_spin_unlock(&handle->header_lock); } unsigned starpu_data_get_sequential_consistency_flag(starpu_data_handle_t handle) { return handle->sequential_consistency; } void starpu_data_set_ooc_flag(starpu_data_handle_t handle, unsigned flag) { handle->ooc = flag; } unsigned starpu_data_get_ooc_flag(starpu_data_handle_t handle) { return handle->ooc; } /* By default, sequential consistency is enabled */ static unsigned default_sequential_consistency_flag = 1; unsigned starpu_data_get_default_sequential_consistency_flag(void) { return default_sequential_consistency_flag; } void starpu_data_set_default_sequential_consistency_flag(unsigned flag) { default_sequential_consistency_flag = flag; } /* Query the status of the handle on the specified memory node. */ void starpu_data_query_status2(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_loading, int *is_requested) { // XXX : this is just a hint, so we don't take the lock ... // _starpu_spin_lock(&handle->header_lock); if (is_allocated) *is_allocated = handle->per_node[memory_node].allocated || handle->per_node[memory_node].mapped != STARPU_UNMAPPED; if (is_valid) *is_valid = (handle->per_node[memory_node].state != STARPU_INVALID); if (is_loading) *is_loading = handle->per_node[memory_node].load_request != NULL; if (is_requested) { int requested = 0; unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { if (handle->per_node[memory_node].request[node]) { requested = 1; break; } } *is_requested = requested; } // _starpu_spin_unlock(&handle->header_lock); } void starpu_data_query_status(starpu_data_handle_t handle, int memory_node, int *is_allocated, int *is_valid, int *is_requested) { return starpu_data_query_status2(handle, memory_node, is_allocated, is_valid, NULL, is_requested); } starpu-1.4.9+dfsg/src/datawizard/write_back.c000066400000000000000000000060071507764646700212120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include static void wt_callback(void *arg) { starpu_data_handle_t handle = (starpu_data_handle_t) arg; _starpu_spin_lock(&handle->header_lock); if (!_starpu_notify_data_dependencies(handle, STARPU_NONE)) _starpu_spin_unlock(&handle->header_lock); } void _starpu_write_through_data(starpu_data_handle_t handle, unsigned requesting_node, uint32_t write_through_mask) { if ((write_through_mask & ~(1<header_lock)) { cpt++; __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); } if (cpt == STARPU_SPIN_MAXTRY) _starpu_spin_lock(&handle->header_lock); /* We need to keep a Read lock to avoid letting writers corrupt our copy. */ STARPU_ASSERT(handle->current_mode != STARPU_REDUX); STARPU_ASSERT(handle->current_mode != STARPU_SCRATCH); handle->refcnt++; handle->busy_count++; handle->current_mode = STARPU_R; struct _starpu_data_request *r; r = _starpu_create_request_to_fetch_data(handle, &handle->per_node[node], STARPU_R, NULL, STARPU_IDLEFETCH, 1, wt_callback, handle, 0, "_starpu_write_through_data"); /* If no request was created, the handle was already up-to-date on the * node */ if (r) _starpu_spin_unlock(&handle->header_lock); } } } } void starpu_data_set_wt_mask(starpu_data_handle_t handle, uint32_t wt_mask) { handle->wt_mask = wt_mask; /* in case the data has some children, set their wt_mask as well */ if (handle->nchildren > 0) { unsigned child; for (child = 0; child < handle->nchildren; child++) { starpu_data_handle_t handle_child = starpu_data_get_child(handle, child); starpu_data_set_wt_mask(handle_child, wt_mask); } } } starpu-1.4.9+dfsg/src/datawizard/write_back.h000066400000000000000000000022771507764646700212240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DW_WRITE_BACK_H__ #define __DW_WRITE_BACK_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) /** If a write-through mask is associated to that data handle, this propagates * the the current value of the data onto the different memory nodes in the * write_through_mask. */ void _starpu_write_through_data(starpu_data_handle_t handle, unsigned requesting_node, uint32_t write_through_mask); #pragma GCC visibility pop #endif // __DW_WRITE_BACK_H__ starpu-1.4.9+dfsg/src/debug/000077500000000000000000000000001507764646700156655ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/debug/latency.c000066400000000000000000000036261507764646700174770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include void _starpu_benchmark_ping_pong(starpu_data_handle_t handle, unsigned node0, unsigned node1, unsigned niter) { /* We assume that no one is using that handle !! */ unsigned iter; for (iter = 0; iter < niter; iter++) { int ret; _starpu_spin_lock(&handle->header_lock); handle->refcnt++; handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); struct _starpu_data_replicate *replicate_0 = &handle->per_node[node0]; ret = _starpu_fetch_data_on_node(handle, node0, replicate_0, STARPU_RW, 0, NULL, STARPU_FETCH, 0, NULL, NULL, 0, "_starpu_benchmark_ping_pong"); STARPU_ASSERT(!ret); _starpu_release_data_on_node(handle, 0, STARPU_NONE, replicate_0); _starpu_spin_lock(&handle->header_lock); handle->refcnt++; handle->busy_count++; _starpu_spin_unlock(&handle->header_lock); struct _starpu_data_replicate *replicate_1 = &handle->per_node[node1]; ret = _starpu_fetch_data_on_node(handle, node1, replicate_1, STARPU_RW, 0, NULL, STARPU_FETCH, 0, NULL, NULL, 0, "_starpu_benchmark_ping_pong"); STARPU_ASSERT(!ret); _starpu_release_data_on_node(handle, 0, STARPU_NONE, replicate_1); } } starpu-1.4.9+dfsg/src/debug/starpu_debug_helpers.h000066400000000000000000000025301507764646700222440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_DEBUG_HELPERS_H__ #define __STARPU_DEBUG_HELPERS_H__ /** @file */ #include #include #include #pragma GCC visibility push(hidden) #ifdef __cplusplus extern "C" { #endif /** Perform a ping pong between the two memory nodes */ void _starpu_benchmark_ping_pong(starpu_data_handle_t handle, unsigned node0, unsigned node1, unsigned niter) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; /** Display the size of different data structures */ void _starpu_debug_display_structures_size(FILE *stream) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; #ifdef __cplusplus } #endif #pragma GCC visibility pop #endif // __STARPU_DEBUG_HELPERS_H__ starpu-1.4.9+dfsg/src/debug/structures_size.c000066400000000000000000000034001507764646700213030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include void _starpu_debug_display_structures_size(FILE *stream) { fprintf(stream, "struct starpu_task\t\t%u bytes\t(%x)\n", (unsigned) sizeof(struct starpu_task), (unsigned) sizeof(struct starpu_task)); fprintf(stream, "struct _starpu_job\t\t%u bytes\t(%x)\n", (unsigned) sizeof(struct _starpu_job), (unsigned) sizeof(struct _starpu_job)); fprintf(stream, "struct _starpu_data_state\t%u bytes\t(%x)\n", (unsigned) sizeof(struct _starpu_data_state), (unsigned) sizeof(struct _starpu_data_state)); fprintf(stream, "struct _starpu_tag\t\t%u bytes\t(%x)\n", (unsigned) sizeof(struct _starpu_tag), (unsigned) sizeof(struct _starpu_tag)); fprintf(stream, "struct _starpu_cg\t\t%u bytes\t(%x)\n", (unsigned) sizeof(struct _starpu_cg), (unsigned) sizeof(struct _starpu_cg)); fprintf(stream, "struct _starpu_worker\t\t%u bytes\t(%x)\n", (unsigned) sizeof(struct _starpu_worker), (unsigned) sizeof(struct _starpu_worker)); } starpu-1.4.9+dfsg/src/debug/traces/000077500000000000000000000000001507764646700171465ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/debug/traces/anim.c000066400000000000000000000473511507764646700202500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2015-2015 Anthony Simonet * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "starpu_fxt.h" #ifdef STARPU_USE_FXT static struct component { UT_hash_handle hh; char *name; int workerid; uint64_t ptr; unsigned nchildren; struct component **children; struct component *parent; unsigned ntasks; unsigned npriotasks; } *components; static unsigned global_state = 1; static unsigned nsubmitted; static unsigned curq_size; static unsigned nflowing; #define COMPONENT_ADD(head, field, add) HASH_ADD(hh, head, field, sizeof(uint64_t), add); #define COMPONENT_FIND(head, find, out) HASH_FIND(hh, head, &find, sizeof(uint64_t), out); static struct component *fxt_component_root(void) { struct component *comp=NULL, *tmp=NULL; HASH_ITER(hh, components, comp, tmp) { while (comp->parent) comp = comp->parent; return comp; } return NULL; } void _starpu_fxt_component_new(uint64_t component, char *name) { struct component *comp; _STARPU_MALLOC(comp, sizeof(*comp)); if (!strncmp(name, "worker ", 7)) { comp->name = strdup("worker"); comp->workerid = atoi(name+7); } else { comp->name = strdup(name); comp->workerid = -1; } comp->ptr = component; comp->nchildren = 0; comp->children = NULL; comp->parent = NULL; comp->ntasks = 0; comp->npriotasks = 0; COMPONENT_ADD(components, ptr, comp); } void _starpu_fxt_component_deinit(void) { struct component *comp, *tmp; HASH_ITER(hh, components, comp, tmp) { HASH_DEL(components, comp); free(comp->children); free(comp->name); free(comp); } } static void fxt_component_dump(FILE *file, struct component *comp, unsigned depth) { unsigned i; fprintf(file,"%*s%s (%d %"PRIx64", %d tasks %d prio tasks)\n", 2*depth, "", comp->name, depth, comp->ptr, comp->ntasks, comp->npriotasks); for (i = 0; i < comp->nchildren; i++) if (comp->children[i]->parent == comp) fxt_component_dump(file, comp->children[i], depth+1); } void _starpu_fxt_component_dump(FILE *file) { fxt_component_dump(file, fxt_component_root(), 0); } static void fxt_worker_print(FILE *file, struct starpu_fxt_options *options, int workerid, unsigned comp_workerid, unsigned depth) { fprintf(file, "\t\t\t%*s\n", 2*depth, ""); fprintf(file, "\t\t\t%*s
    %s\n", 2*depth, "", (int) comp_workerid == workerid ? "_sched":"", options->worker_names[comp_workerid]); if (_starpu_last_codelet_symbol[comp_workerid][0]) fprintf(file, "\t\t\t%*s
    %s
    \n", 2*(depth+1), "", _starpu_last_codelet_symbol[comp_workerid]); else fprintf(file, "\t\t\t%*s
    \n", 2*(depth+1), ""); fprintf(file, "\t\t\t%*s
    ", 2*depth, ""); } static void fxt_component_print(FILE *file, struct starpu_fxt_options *options, int workerid, struct component *from, struct component *to, struct component *comp, unsigned depth) { unsigned i, n; unsigned ntasks = comp->ntasks + comp->npriotasks; if (from == comp) /* Additionally show now-empty slot */ ntasks++; for (i = 0, n = 0; i < comp->nchildren; i++) if (comp->children[i]->parent == comp) n++; fprintf(file, "\t\t\t%*s\n", 2*depth, ""); if (comp->nchildren > 0) { fprintf(file, "\t\t\t%*s\n", 2*depth, ""); for (i = 0; i < comp->nchildren; i++) if (comp->children[i]->parent == comp) { fprintf(file, "\t\t\t%*s\n", 2*depth, ""); } fprintf(file, "\t\t\t%*s\n", 2*depth, ""); } if (!strcmp(comp->name, "worker")) { fprintf(file, "\t\t\t%*s\n", 2*depth, ""); fprintf(file, "\t\t\t%*s\n", 2*depth, ""); fprintf(file, "\t\t\t%*s\n", 2*depth, ""); } fprintf(file, "\t\t\t%*s
    %s\n", 2*depth, "", n, comp->name); if (!strcmp(comp->name,"prio") || !strcmp(comp->name,"fifo") || !strcmp(comp->name,"heft") || !strcmp(comp->name,"work_stealing")) { /* Show task queue */ #define N 3 n = ntasks; if (n > N) n = N; for (i = 0; i < N-n; i++) fprintf(file, "\t\t\t%*s
    \n", 2*depth, ""); if (ntasks) { if (ntasks > N) fprintf(file, "\t\t\t%*s
    %u
    \n", 2*depth, "", from == comp ? (comp->npriotasks >= N ? "last_task_full_prio" : "last_task_full") : (comp->npriotasks >= N ? "task_prio" : "task"), comp->ntasks + comp->npriotasks); else fprintf(file, "\t\t\t%*s
    \n", 2*depth, "", from == comp ? "last_task_empty" : (comp->ntasks ? "task" : "task_prio")); for (i = 1; i < n; i++) fprintf(file, "\t\t\t%*s
    \n", 2*depth, "", n - i > comp->npriotasks ? "task" : "task_prio"); } } else { if (ntasks == 0) fprintf(file, "\t\t\t%*s
    \n", 2*depth, ""); else if (ntasks == 1) fprintf(file, "\t\t\t%*s
    \n", 2*depth, "", from == comp ? "last_task_empty" : (comp->npriotasks ? "task_prio" : "task")); else fprintf(file, "\t\t\t%*s
    %u
    \n", 2*depth, "", from == comp ? (comp->npriotasks ? "last_task_full_prio" : "last_task_full") : (comp->npriotasks ? "task_prio" : "task"), comp->ntasks + comp->npriotasks); } fprintf(file, "\t\t\t%*s
    \n", 2*depth, ""); fxt_component_print(file, options, workerid, from, to, comp->children[i], depth+1); fprintf(file, "\t\t\t%*s
    \n", 2*depth, ""); fxt_worker_print(file, options, workerid, comp->workerid, depth+1); fprintf(file, "\t\t\t%*s
    ", 2*depth, ""); } void _starpu_fxt_component_print(FILE *file, struct starpu_fxt_options *options, int workerid, struct component *from, struct component *to) { fprintf(file, "
    \n"); fxt_component_print(file, options, workerid, from, to, fxt_component_root(), 0); fprintf(file, "
    \n"); } void _starpu_fxt_component_print_header(FILE *file) { /* CSS and Javascript code from Anthony Simonet */ fprintf(file, "\n"); fprintf(file, "\n"); fprintf(file, "\t\n"); fprintf(file, "\t\t\n"); fprintf(file, "\t\t\n"); fprintf(file, "\t\t\n"); fprintf(file, "\t\t\n"); //fprintf(file, "\t\t\n"); //fprintf(file, "\t\t\n"); fprintf(file, "\t\t\n"); fprintf(file, "\t\t\n"); fprintf(file, "\t\t\n"); fprintf(file, "\t\n"); fprintf(file, "\t\n"); } static void fxt_component_print_step(FILE *file, struct starpu_fxt_options *options, double timestamp, int workerid, unsigned push, struct component *from, struct component *to) { fprintf(file, "\t\t
    \n", global_state, global_state > 1 ? "none":"block", global_state); fprintf(file, "\t\t

    Time %f, %u submitted %u ready, %s

    \n", timestamp, nsubmitted, curq_size-nflowing, push?"push":"pull"); //fprintf(file, "\t\t\t
    \n");
    	//_starpu_fxt_component_dump(file);
    	//fprintf(file, "\t\t\t
    \n"); _starpu_fxt_component_print(file, options, workerid, from, to); fprintf(file,"\t\t
    "); global_state++; } void _starpu_fxt_component_connect(uint64_t parent, uint64_t child) { struct component *parent_p, *child_p; unsigned n; COMPONENT_FIND(components, parent, parent_p); COMPONENT_FIND(components, child, child_p); STARPU_ASSERT(parent_p); STARPU_ASSERT(child_p); n = ++parent_p->nchildren; _STARPU_REALLOC(parent_p->children, n * sizeof(*parent_p->children)); parent_p->children[n-1] = child_p; if (!child_p->parent) child_p->parent = parent_p; } void _starpu_fxt_component_update_ntasks(unsigned _nsubmitted, unsigned _curq_size) { nsubmitted = _nsubmitted; curq_size = _curq_size; } void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task STARPU_ATTRIBUTE_UNUSED, unsigned prio) { struct component *from_p = NULL, *to_p = NULL; if (to == from) return; if (from) { COMPONENT_FIND(components, from, from_p); STARPU_ASSERT(from_p); } if (to) { COMPONENT_FIND(components, to, to_p); STARPU_ASSERT(to_p); } if (from_p) { if (prio) from_p->npriotasks--; else from_p->ntasks--; } else nflowing++; if (to_p) { if (prio) to_p->npriotasks++; else to_p->ntasks++; } // fprintf(stderr,"push from %s to %s\n", from_p?from_p->name:"none", to_p?to_p->name:"none"); fxt_component_print_step(output, options, timestamp, workerid, 1, from_p, to_p); } void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task STARPU_ATTRIBUTE_UNUSED, unsigned prio) { struct component *from_p = NULL, *to_p = NULL; if (to == from) return; if (from) { COMPONENT_FIND(components, from, from_p); STARPU_ASSERT(from_p); } if (to) { COMPONENT_FIND(components, to, to_p); STARPU_ASSERT(to_p); } if (from_p) { if (prio) from_p->npriotasks--; else from_p->ntasks--; } if (to_p) { if (prio) to_p->npriotasks++; else to_p->ntasks++; } else nflowing--; // fprintf(stderr,"pull from %s to %s\n", from_p?from_p->name:"none", to_p?to_p->name:"none"); fxt_component_print_step(output, options, timestamp, workerid, 0, from_p, to_p); } void _starpu_fxt_component_finish(FILE *file) { /* Javascript code from Anthony Simonet */ fprintf(file, "\t\t\n"); fprintf(file, "\t\t
    \n"); fprintf(file, "\t\t
    \n"); fprintf(file, "\t\t\t

    \n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t

    \n"); fprintf(file, "\t\t\t\tAuto speed (state/s): 4\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t

    \n"); fprintf(file, "\t\t\t

    \n"); fprintf(file, "\t\t\t
    \n"); fprintf(file, "\t\t\t\tGo to state\n"); fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t\t\n"); /* Dummy input preventing the page from being refreshed when enter is pressed. */ fprintf(file, "\t\t\t\t\n"); fprintf(file, "\t\t\t
    \n"); fprintf(file, "\t\t\t
    \n"); fprintf(file, "\t\t
    \n"); fprintf(file, "\t\n"); fprintf(file, "\n"); } #endif starpu-1.4.9+dfsg/src/debug/traces/starpu_fxt.c000066400000000000000000005013241507764646700215160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #ifdef STARPU_PAPI #include #endif #ifdef STARPU_USE_FXT #include "starpu_fxt.h" #include #include #define CPUS_WORKER_COLORS_NB 8 #define ACCEL_WORKER_COLORS_NB 9 /* How many times longer an idle period has to be before the smoothing * heuristics avoids averaging codelet gflops */ #define IDLE_FACTOR 2 static char *cpus_worker_colors[CPUS_WORKER_COLORS_NB] = {"/greens9/7", "/greens9/6", "/greens9/5", "/greens9/4", "/greens9/9", "/greens9/3", "/greens9/2", "/greens9/1" }; static char *accel_worker_colors[ACCEL_WORKER_COLORS_NB] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2", "/ylorrd9/1"}; static char *worker_colors[STARPU_NMAXWORKERS]; static unsigned cpus_index = 0; static unsigned accel_index = 0; static uint64_t* number_events = NULL; static unsigned long fut_keymask; /* Get pointer to string starting at nth parameter */ static char *get_fxt_string(struct fxt_ev_64 *ev, int n) { char *s = (char *)&ev->param[n]; s[(FXT_MAX_PARAMS-n)*sizeof(unsigned long) - 1] = 0; return s; } /* * Paje trace file tools */ static FILE *out_paje_file; static FILE *distrib_time; static FILE *activity_file; static FILE *anim_file; static FILE *tasks_file; static FILE *data_file; #ifdef STARPU_PAPI static FILE *papi_file; #endif static FILE *trace_file; static FILE *comms_file; static FILE *sched_tasks_file; static FILE *number_events_file; struct data_parameter_info { unsigned long handle; unsigned long size; int mode; long numa_nodes_bitmap; }; struct task_info { UT_hash_handle hh; char *model_name; char *name; char *file; int line; int exclude_from_dag; int show; unsigned type; unsigned long job_id; unsigned long submit_order; long priority; int color; uint64_t tag; int workerid; int node; double submit_time; double start_time; double end_time; unsigned long footprint; unsigned long kflops; long iterations[2]; char *parameters; unsigned int ndeps; unsigned long *dependencies; unsigned int nend_deps; unsigned long *end_dependencies; char **dep_labels; unsigned long ndata; struct data_parameter_info *data; int mpi_rank; #ifdef STARPU_BUBBLE unsigned is_bubble; unsigned long bubble_parent; #endif }; static struct task_info *tasks_info; static struct task_info *get_task(unsigned long job_id, int mpi_rank) { struct task_info *task; HASH_FIND(hh, tasks_info, &job_id, sizeof(job_id), task); if (!task) { unsigned i; _STARPU_MALLOC(task, sizeof(*task)); task->model_name = NULL; task->name = NULL; task->file = NULL; task->line = -1; task->exclude_from_dag = 0; task->show = 0; task->type = 0; task->job_id = job_id; task->submit_order = 0; task->priority = 0; task->color = 0; task->tag = 0; task->workerid = -1; task->node = -1; task->submit_time = 0.; task->start_time = 0.; task->end_time = 0.; task->footprint = 0; task->kflops = 0.; for (i = 0; i < sizeof(task->iterations)/sizeof(task->iterations[0]); i++) task->iterations[i] = -1; task->parameters = NULL; task->ndeps = 0; task->dependencies = NULL; task->nend_deps = 0; task->end_dependencies = NULL; task->dep_labels = NULL; task->ndata = 0; task->data = NULL; task->mpi_rank = mpi_rank; #ifdef STARPU_BUBBLE task->is_bubble = 0; task->bubble_parent = 0; #endif HASH_ADD(hh, tasks_info, job_id, sizeof(task->job_id), task); } else STARPU_ASSERT(task->mpi_rank == mpi_rank); return task; } /* Return whether to show this task in the DAG or not */ static int show_task(struct task_info *task, struct starpu_fxt_options *options) { if (task->show) return 1; if (task->type & STARPU_TASK_TYPE_INTERNAL && !options->internal) return 0; if (task->type & STARPU_TASK_TYPE_DATA_ACQUIRE && options->no_acquire) return 0; return 1; } void _starpu_convert_numa_nodes_bitmap_to_str(long bitmap, char* str) { if (bitmap < 0) { sprintf(str, "%ld", bitmap); } else { long i = 0; int first = 1; for (; i < (long) (sizeof(bitmap)*8)-1; i++) { if (bitmap & ((long) 1 << i)) { if (first) { sprintf(str, "%ld", i); first = 0; } else { strcat(str, ","); char number[4]; sprintf(number, "%ld", i); strcat(str, number); } } } } } static void task_dump(struct task_info *task, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; unsigned i; if (task->exclude_from_dag) goto out; if (!tasks_file) goto out; if (task->name) fprintf(tasks_file, "Name: %s\n", task->name); if (task->model_name) fprintf(tasks_file, "Model: %s\n", task->model_name); if (task->file) { fprintf(tasks_file, "File: %s\n", task->file); fprintf(tasks_file, "Line: %d\n", task->line); } fprintf(tasks_file, "JobId: %s%lu\n", prefix, task->job_id); if (task->submit_order) fprintf(tasks_file, "SubmitOrder: %lu\n", task->submit_order); fprintf(tasks_file, "Priority: %ld\n", task->priority); if (task->dependencies) { fprintf(tasks_file, "DependsOn:"); for (i = 0; i < task->ndeps; i++) fprintf(tasks_file, " %s%lu", prefix, task->dependencies[i]); fprintf(tasks_file, "\n"); } if (task->dep_labels) { fprintf(tasks_file, "DepLabels:"); for (i = 0; i < task->ndeps; i++) fprintf(tasks_file, " %s", task->dep_labels[i]); fprintf(tasks_file, "\n"); } fprintf(tasks_file, "Tag: %"PRIx64"\n", task->tag); if (task->workerid >= 0) fprintf(tasks_file, "WorkerId: %d\n", task->workerid); if (task->node >= 0) fprintf(tasks_file, "MemoryNode: %d\n", task->node); if (task->submit_time != 0.) fprintf(tasks_file, "SubmitTime: %f\n", task->submit_time); if (task->start_time != 0.) fprintf(tasks_file, "StartTime: %f\n", task->start_time); if (task->end_time != 0.) fprintf(tasks_file, "EndTime: %f\n", task->end_time); fprintf(tasks_file, "Footprint: %lx\n", task->footprint); if (task->kflops != 0) fprintf(tasks_file, "GFlop: %f\n", ((double) task->kflops) / 1000000); if (task->iterations[0] != -1) { fprintf(tasks_file, "Iteration:"); for (i = 0; i < sizeof(task->iterations)/sizeof(task->iterations[0]); i++) { if (task->iterations[i] == -1) break; fprintf(tasks_file, " %ld", task->iterations[i]); } fprintf(tasks_file, "\n"); } if (task->parameters) fprintf(tasks_file, "Parameters: %s\n", task->parameters); if (task->data) { fprintf(tasks_file, "Handles:"); for (i = 0; i < task->ndata; i++) fprintf(tasks_file, " %lx", task->data[i].handle); fprintf(tasks_file, "\n"); fprintf(tasks_file, "Modes:"); for (i = 0; i < task->ndata; i++) fprintf(tasks_file, " %s%s%s%s%s%s", (task->data[i].mode & STARPU_R)?"R":"", (task->data[i].mode & STARPU_W)?"W":"", (task->data[i].mode & STARPU_SCRATCH)?"S":"", (task->data[i].mode & STARPU_REDUX)?"X":"", (task->data[i].mode & STARPU_MPI_REDUX)?"X-mpi":"", (task->data[i].mode & STARPU_COMMUTE)?"C":""); fprintf(tasks_file, "\n"); fprintf(tasks_file, "Sizes:"); for (i = 0; i < task->ndata; i++) fprintf(tasks_file, " %lu", task->data[i].size); fprintf(tasks_file, "\n"); fprintf(tasks_file, "NumaNodes:"); for (i = 0; i < task->ndata; i++) { char str[STARPU_TRACE_STR_LEN] = ""; _starpu_convert_numa_nodes_bitmap_to_str(task->data[i].numa_nodes_bitmap, str); fprintf(tasks_file, " %s", str); } fprintf(tasks_file, "\n"); } fprintf(tasks_file, "MPIRank: %d\n", task->mpi_rank); #ifdef STARPU_BUBBLE fprintf(tasks_file, "Bubble: %u\n", task->is_bubble); fprintf(tasks_file, "ParentBubble: %lu\n", task->bubble_parent); #endif if (task->nend_deps) { fprintf(tasks_file, "EndDependencies: "); unsigned int j=0; for(j=0 ; jnend_deps-1 ; j++) fprintf(tasks_file, "%lu, ", task->end_dependencies[j]); fprintf(tasks_file, "%lu ", task->end_dependencies[task->nend_deps-1]); } fprintf(tasks_file, "\n"); out: free(task->name); free(task->model_name); free(task->file); free(task->dependencies); if (task->dep_labels) { for (i = 0; i < task->ndeps; i++) free(task->dep_labels[i]); free(task->dep_labels); } free(task->parameters); free(task->data); HASH_DEL(tasks_info, task); free(task); } struct data_info { UT_hash_handle hh; unsigned long handle; char *name; size_t size; starpu_ssize_t max_size; char *description; unsigned dimensions; unsigned long *dims; int home_node; int mpi_rank; int mpi_owner; long mpi_tag; }; static struct data_info *data_info; static struct data_info *get_data(unsigned long handle, int mpi_rank) { struct data_info *data; HASH_FIND(hh, data_info, &handle, sizeof(handle), data); if (!data) { _STARPU_MALLOC(data, sizeof(*data)); data->handle = handle; data->name = NULL; data->size = 0; data->max_size = -1; data->description = 0; data->dimensions = 0; data->dims = NULL; data->home_node = STARPU_MAIN_RAM; data->mpi_rank = mpi_rank; data->mpi_owner = mpi_rank; data->mpi_tag = -1; HASH_ADD(hh, data_info, handle, sizeof(handle), data); } else STARPU_ASSERT(data->mpi_rank == mpi_rank); return data; } unsigned _starpu_fxt_data_get_coord(unsigned long handle, int mpi_rank, unsigned dim) { struct data_info *data = get_data(handle, mpi_rank); return data->dimensions >= dim+1 ? data->dims[dim] : 0; } const char *_starpu_fxt_data_get_name(unsigned long handle, int mpi_rank) { struct data_info *data = get_data(handle, mpi_rank); return data->name; } static void handle_papi_event(struct fxt_ev_64 *ev STARPU_ATTRIBUTE_UNUSED, struct starpu_fxt_options *options STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_PAPI int event_code = ev->param[0]; unsigned long task = ev->param[1]; long long int value = ev->param[2]; //char *prefix = options->file_prefix; if (papi_file) { char event_str[PAPI_MAX_STR_LEN]; PAPI_event_code_to_name(event_code, event_str); fprintf(papi_file, "JobId: %lu\n", task); fprintf(papi_file, "Event: %s\n", event_str); fprintf(papi_file, "Value: %lld\n", value); fprintf(papi_file, "\n"); } #endif } static void data_dump(struct data_info *data) { if (!data_file) goto out; fprintf(data_file, "Handle: %lx\n", data->handle); fprintf(data_file, "HomeNode: %d\n", data->home_node); if (data->mpi_rank >= 0) fprintf(data_file, "MPIRank: %d\n", data->mpi_rank); if (data->name) fprintf(data_file, "Name: %s\n", data->name); fprintf(data_file, "Size: %lu\n", (unsigned long) data->size); if (data->max_size != -1) fprintf(data_file, "MaxSize: %lu\n", (unsigned long) data->max_size); if (data->description) fprintf(data_file, "Description: %s\n", data->description); if (data->dimensions) { unsigned i; fprintf(data_file, "Coordinates:"); for (i = 0; i < data->dimensions; i++) fprintf(data_file, " %lu", data->dims[i]); fprintf(data_file, "\n"); } if (data->mpi_owner >= 0) fprintf(data_file, "MPIOwner: %d\n", data->mpi_owner); if (data->mpi_tag >= 0) fprintf(data_file, "MPITag: %ld\n", data->mpi_tag); fprintf(data_file, "\n"); out: free(data->dims); free(data->description); free(data->name); HASH_DEL(data_info, data); free(data); } static void set_next_cpu_worker_color(int workerid) { if (workerid >= STARPU_NMAXWORKERS) return; worker_colors[workerid] = cpus_worker_colors[cpus_index++]; if (cpus_index == CPUS_WORKER_COLORS_NB) cpus_index = 0; } static void set_next_accel_worker_color(int workerid) { if (workerid >= STARPU_NMAXWORKERS) return; worker_colors[workerid] = accel_worker_colors[accel_index++]; if (accel_index == ACCEL_WORKER_COLORS_NB) accel_index = 0; } static const char *get_worker_color(int workerid) { if (workerid >= STARPU_NMAXWORKERS) workerid = STARPU_NMAXWORKERS - 1; return worker_colors[workerid]; } static unsigned get_color_symbol_red(char *name) { /* choose some color ... that's disguting yes */ uint32_t hash_symbol = starpu_hash_crc32c_string(name, 0); return (unsigned)starpu_hash_crc32c_string("red", hash_symbol) % 1024; } static unsigned get_color_symbol_green(char *name) { /* choose some color ... that's disguting yes */ uint32_t hash_symbol = starpu_hash_crc32c_string(name, 0); return (unsigned)starpu_hash_crc32c_string("green", hash_symbol) % 1024; } static unsigned get_color_symbol_blue(char *name) { /* choose some color ... that's disguting yes */ uint32_t hash_symbol = starpu_hash_crc32c_string(name, 0); return (unsigned)starpu_hash_crc32c_string("blue", hash_symbol) % 1024; } /* Start time of last codelet for this worker */ static double last_codelet_start[STARPU_NMAXWORKERS]; /* End time of last codelet for this worker */ static double last_codelet_end[STARPU_NMAXWORKERS]; /* _STARPU_FUT_DO_PROBE5STR records only 3 longs */ char _starpu_last_codelet_symbol[STARPU_NMAXWORKERS][(FXT_MAX_PARAMS-5)*sizeof(unsigned long)]; static int last_codelet_parameter[STARPU_NMAXWORKERS]; #define MAX_PARAMETERS 8 static char last_codelet_parameter_description[STARPU_NMAXWORKERS][MAX_PARAMETERS][FXT_MAX_PARAMS*sizeof(unsigned long)]; /* If more than a period of time has elapsed, we flush the profiling info, * otherwise they are accumulated every time there is a new relevant event. */ #define ACTIVITY_PERIOD 75.0 static double last_activity_flush_timestamp[STARPU_NMAXWORKERS]; static double accumulated_sleep_time[STARPU_NMAXWORKERS]; static double accumulated_exec_time[STARPU_NMAXWORKERS]; static unsigned steal_number = 0; LIST_TYPE(_starpu_symbol_name, char *name; ) static struct _starpu_symbol_name_list symbol_list; /* List of on-going communications */ LIST_TYPE(_starpu_communication, unsigned comid; double comm_start; double bandwidth; unsigned src_node; unsigned dst_node; unsigned long size; const char *type; unsigned long handle; struct _starpu_communication *peer; ) static struct _starpu_communication_list communication_list; static double current_bandwidth_in_per_node[STARPU_MAXNODES] = {0.0}; static double current_bandwidth_out_per_node[STARPU_MAXNODES] = {0.0}; /* List of on-going computations */ LIST_TYPE(_starpu_computation, double comp_start; double gflops; struct _starpu_computation *peer; ) /* List of ongoing computations */ static struct _starpu_computation_list computation_list; /* Last computation for each worker */ static struct _starpu_computation *ongoing_computation[STARPU_NMAXWORKERS]; /* Current total GFlops */ static double current_computation; /* Time of last update of current total GFlops */ static double current_computation_time; /* * Generic tools */ #define WORKER_STATE (1 << 0) #define THREAD_STATE (1 << 1) #define COMM_THREAD_STATE (1 << 2) #define USER_THREAD_STATE (1 << 3) static struct { const char *short_name; const char *long_name; uint8_t flags; } states_list[] = { { "Fi", "FetchingInput", WORKER_STATE | THREAD_STATE }, { "Po", "PushingOutput", WORKER_STATE | THREAD_STATE }, { "P", "Progressing", WORKER_STATE | THREAD_STATE }, { "U", "Unpartitioning", WORKER_STATE | THREAD_STATE }, { "B", "Overhead", WORKER_STATE | THREAD_STATE }, { "Ps", "Parallel sync", WORKER_STATE | THREAD_STATE }, { "In", "Initializing", WORKER_STATE | THREAD_STATE }, { "D", "Deinitializing", WORKER_STATE | THREAD_STATE }, { "E", "Executing", WORKER_STATE | THREAD_STATE }, { "C", "Callback", WORKER_STATE | THREAD_STATE | USER_THREAD_STATE }, { "H", "Hypervisor", WORKER_STATE | THREAD_STATE }, { "Sc", "Scheduling", WORKER_STATE | THREAD_STATE | USER_THREAD_STATE }, { "I", "Idle", WORKER_STATE | THREAD_STATE }, { "Sl", "Sleeping", WORKER_STATE | THREAD_STATE | COMM_THREAD_STATE }, { "Bu", "Building task", THREAD_STATE | COMM_THREAD_STATE | USER_THREAD_STATE }, { "Su", "Submitting task", THREAD_STATE | COMM_THREAD_STATE | USER_THREAD_STATE }, { "Th", "Throttling task submission", THREAD_STATE | COMM_THREAD_STATE | USER_THREAD_STATE }, { "MD", "Decoding task for MPI", THREAD_STATE | USER_THREAD_STATE }, { "MPr", "Preparing task for MPI", THREAD_STATE | USER_THREAD_STATE }, { "MPo", "Post-processing task for MPI", THREAD_STATE | USER_THREAD_STATE }, { "P", "Processing", COMM_THREAD_STATE }, { "UT", "UserTesting", COMM_THREAD_STATE }, { "UW", "UserWaiting", COMM_THREAD_STATE }, { "SdS", "SendSubmitted", COMM_THREAD_STATE }, { "RvS", "ReceiveSubmitted", COMM_THREAD_STATE }, { "SdC", "SendCompleted", COMM_THREAD_STATE }, { "RvC", "ReceiveCompleted", COMM_THREAD_STATE }, { "W", "Waiting task", THREAD_STATE | USER_THREAD_STATE }, { "WA", "Waiting all tasks", THREAD_STATE | USER_THREAD_STATE }, { "No", "Nothing", THREAD_STATE | USER_THREAD_STATE }, }; static const char *get_state_name(const char *short_name, uint32_t states) { unsigned i; for (i = 0; i < sizeof(states_list) / sizeof(states_list[0]); i++) if ((states_list[i].flags & states) && !strcmp(states_list[i].short_name, short_name)) return states_list[i].long_name; return short_name; } static double compute_time_stamp(double ev_time, struct starpu_fxt_options *options) { double offset = 0; if (options->file_offset.nb_barriers < 2) { offset = (double) options->file_offset.offset_start; } else { /* Since a clock drift can happen during the execution, the offset to * apply at the beginning of the trace can be different from the one * to apply at the end of the trace. Thus, we make an interpolation to * know what is the offset at the considerated time. */ double xA = (double) options->file_offset.local_time_start; double xB = (double) options->file_offset.local_time_end; double yA = (double) options->file_offset.offset_start; double yB = (double) options->file_offset.offset_end; /* We interpolate offset only for times between the two synchronization * barriers, because outside of this interval, applying the * interpolated offset can lead to negative times... Moreover, * timestamps of events outside of this interval don't need to be * precise (events describing the machine, StarPU's initialization...) * */ if (ev_time <= xA) { offset = yA; } else if (ev_time >= xB) { offset = yB; } else { offset = ((yB-yA) / (xB-xA)) * (ev_time-xA) + yA; #ifndef STARPU_NO_ASSERT // Check that the offset is correctly inside the interval: if (yB > yA) { STARPU_ASSERT(offset >= yA && offset <= yB); } else { STARPU_ASSERT(offset <= yA && offset >= yB); } #endif } } STARPU_ASSERT((ev_time + offset) >= 0); return (ev_time + offset) / 1000000.0; } static double get_event_time_stamp(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double ev_time = (double) ev->time; return compute_time_stamp(ev_time, options); } /* * Auxiliary functions for poti handling names */ #ifdef STARPU_HAVE_POTI static char *memnode_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid) { snprintf(output, len, "%smn%lu", prefix, memnodeid); return output; } static char *memmanager_container_alias(char *output, int len, const char *prefix, long unsigned int memnodeid) { snprintf(output, len, "%smm%lu", prefix, memnodeid); return output; } static char *thread_container_alias(char *output, int len, const char *prefix, long unsigned int threadid) { snprintf(output, len, "%st%lu", prefix, threadid); return output; } static char *worker_container_alias(char *output, int len, const char *prefix, long unsigned int workerid) { snprintf(output, len, "%sw%lu", prefix, workerid); return output; } static char *mpicommthread_container_alias(char *output, int len, const char *prefix) { snprintf(output, len, "%smpict", prefix); return output; } static char *program_container_alias(char *output, int len, const char *prefix) { snprintf(output, len, "%sp", prefix); return output; } static char *scheduler_container_alias(char *output, int len, const char *prefix) { snprintf(output, len, "%ssched", prefix); return output; } #endif static int nworkers = 0; static struct worker_entry { UT_hash_handle hh; unsigned long tid; int workerid; int sync; /* Set only for workers which are part of the same set, i.e. on thread drivers several workers */ } *worker_ids; static int register_thread(unsigned long nodeid, unsigned long tid, int workerid, int sync) { struct worker_entry *entry = NULL; tid = nodeid*tid+tid; HASH_FIND(hh, worker_ids, &tid, sizeof(tid), entry); /* only register a thread once */ if (entry) return 0; _STARPU_MALLOC(entry, sizeof(*entry)); entry->tid = tid; entry->workerid = workerid; entry->sync = sync; HASH_ADD(hh, worker_ids, tid, sizeof(tid), entry); return 1; } static void free_worker_ids(void) { struct worker_entry *entry, *tmp; HASH_ITER(hh, worker_ids, entry, tmp) { HASH_DEL(worker_ids, entry); free(entry); } } static int register_worker_id(unsigned long nodeid, unsigned long tid, int workerid, int sync) { nworkers++; STARPU_ASSERT_MSG_ALWAYS(workerid < STARPU_NMAXWORKERS, "Too many workers in this trace, please increase in ./configure invocation the maximum number of CPUs and GPUs to the same value as was used for execution"); return register_thread(nodeid, tid, workerid, sync); } static int prefixTOnodeid (const char *prefix) { //if we are a single-node trace, prefix is empty, so return 0 if (strcmp(prefix, "")==0) return 0; char *str = strdup(prefix); str[strlen(prefix)-1] = '\0'; unsigned long nodeid = atoi(str); free(str); return nodeid; } /* Register user threads if not done already */ static void register_user_thread(double timestamp, unsigned long tid, const char *prefix) { if (register_thread(prefixTOnodeid(prefix), tid, -1, 0) && out_paje_file) { #ifdef STARPU_HAVE_POTI char program_container[STARPU_POTI_STR_LEN]; program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); char new_thread_container_alias[STARPU_POTI_STR_LEN]; thread_container_alias(new_thread_container_alias, STARPU_POTI_STR_LEN, prefix, tid); char new_thread_container_name[STARPU_POTI_STR_LEN]; snprintf(new_thread_container_name, sizeof(new_thread_container_name), "%sUserThread%lu", prefix, tid); poti_CreateContainer(timestamp, new_thread_container_alias, "UT", program_container, new_thread_container_alias); #else fprintf(out_paje_file, "7 %.9f %st%lu UT %sp %sUserThread%lu\n", timestamp, prefix, tid, prefix, prefix, tid); #endif } } static void register_mpi_thread(unsigned long nodeid, unsigned long tid) { register_thread(nodeid, tid, -2, 0); } static int find_worker_id(unsigned long nodeid, unsigned long tid) { struct worker_entry *entry; tid = nodeid*tid+tid; HASH_FIND(hh, worker_ids, &tid, sizeof(tid), entry); if (!entry) return -1; return entry->workerid; } /* check whether this thread manages several workers */ static int find_sync(unsigned long nodeid, unsigned long tid) { struct worker_entry *entry; tid = nodeid*tid+tid; HASH_FIND(hh, worker_ids, &tid, sizeof(tid), entry); if (!entry) return 0; return entry->sync; } static void update_accumulated_time(int worker, double sleep_time, double exec_time, double current_timestamp, int forceflush) { accumulated_sleep_time[worker] += sleep_time; accumulated_exec_time[worker] += exec_time; /* If sufficient time has elapsed since the last flush, we have a new * point in our graph */ double elapsed = current_timestamp - last_activity_flush_timestamp[worker]; if (forceflush || (elapsed > ACTIVITY_PERIOD)) { if (activity_file) fprintf(activity_file, "%d\t%.9f\t%.9f\t%.9f\t%.9f\n", worker, current_timestamp, elapsed, accumulated_exec_time[worker], accumulated_sleep_time[worker]); /* reset the accumulated times */ last_activity_flush_timestamp[worker] = current_timestamp; accumulated_sleep_time[worker] = 0.0; accumulated_exec_time[worker] = 0.0; } } static void memnode_set_state(double time, const char *prefix, unsigned int memnodeid, const char *name) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid); poti_SetState(time, container, "MS", name); #else fprintf(out_paje_file, "10 %.9f %smm%u MS %s\n", time, prefix, memnodeid, name); #endif } static void memnode_push_state(double time, const char *prefix, unsigned int memnodeid, const char *name) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid); poti_PushState(time, container, "MS", name); #else fprintf(out_paje_file, "11 %.9f %smm%u MS %s\n", time, prefix, memnodeid, name); #endif } static void memnode_pop_state(double time, const char *prefix, unsigned int memnodeid) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid); poti_PopState(time, container, "MS"); #else fprintf(out_paje_file, "12 %.9f %smm%u MS\n", time, prefix, memnodeid); #endif } static void memnode_event(double time, const char *prefix, unsigned int memnodeid, const char *name, unsigned long handle, unsigned long value, unsigned long info, long size_prio, unsigned int dest, struct starpu_fxt_options *options) { if (!options->memory_states) return; // If there is not a valid memory node, we cannot associate it if((int)memnodeid < 0) return; #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; char p_handle[STARPU_POTI_STR_LEN]; char p_value[STARPU_POTI_STR_LEN]; memmanager_container_alias(container, STARPU_POTI_STR_LEN, prefix, memnodeid); snprintf(p_handle, sizeof(p_handle), "%lx", handle); snprintf(p_value, sizeof(p_value), "%lx", value); #ifdef HAVE_POTI_USER_NEWEVENT char p_dest[STARPU_POTI_STR_LEN]; char p_info[STARPU_POTI_STR_LEN]; char p_size[STARPU_POTI_STR_LEN]; memmanager_container_alias(p_dest, STARPU_POTI_STR_LEN, prefix, dest); snprintf(p_info, sizeof(p_info), "%lu", info); snprintf(p_size, sizeof(p_size), "%ld", size_prio); poti_user_NewEvent(_starpu_poti_MemoryEvent, time, container, name, p_value, 4, p_handle, p_info, p_size, p_dest); #else poti_NewEvent(time, container, name, p_handle); #endif #else fprintf(out_paje_file, "22 %.9f %s %smm%u %lx %lx %lu %ld %smm%u\n", time, name, prefix, memnodeid, value, handle, info, size_prio, prefix, dest); #endif } static void worker_set_state(double time, const char *prefix, long unsigned int workerid, const char *name) { if (fut_keymask == FUT_KEYMASK0) return; if (!out_paje_file) return; #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid); poti_SetState(time, container, "WS", name); #else fprintf(out_paje_file, "10 %.9f %sw%lu WS \"%s\"\n", time, prefix, workerid, name); #endif } static void worker_push_state(double time, const char *prefix, long unsigned int workerid, const char *name) { if (fut_keymask == FUT_KEYMASK0) return; if (!out_paje_file) return; #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid); poti_PushState(time, container, "WS", name); #else fprintf(out_paje_file, "11 %.9f %sw%lu WS %s\n", time, prefix, workerid, name); #endif } static void worker_pop_state(double time, const char *prefix, long unsigned int workerid) { if (fut_keymask == FUT_KEYMASK0) return; if (!out_paje_file) return; #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid); poti_PopState(time, container, "WS"); #else fprintf(out_paje_file, "12 %.9f %sw%lu WS\n", time, prefix, workerid); #endif } static void thread_set_state(double time, const char *prefix, long unsigned int threadid, const char *name, long job_id) { if (find_sync(prefixTOnodeid(prefix), threadid)) /* Unless using worker sets, collapse thread and worker */ return worker_set_state(time, prefix, find_worker_id(prefixTOnodeid(prefix), threadid), name); if (!out_paje_file) return; #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); if (job_id >= 0) { char jobid_str[STARPU_POTI_STR_LEN]; snprintf(jobid_str, sizeof(jobid_str), "%s%lu", prefix, job_id); poti_user_SetState(_starpu_poti_JobState, time, container, "S", name, 1, jobid_str); } else poti_SetState(time, container, "S", name); #else if (job_id >= 0) fprintf(out_paje_file, "26 %.9f %st%lu S %s %ld\n", time, prefix, threadid, name, job_id); else fprintf(out_paje_file, "10 %.9f %st%lu S %s\n", time, prefix, threadid, name); #endif } #if 0 /* currently unused */ static void user_thread_set_state(double time, const char *prefix, long unsigned int threadid, const char *name) { register_user_thread(time, threadid, prefix); if (!out_paje_file) return; #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); poti_SetState(time, container, "US", name); #else fprintf(out_paje_file, "10 %.9f %st%lu US %s\n", time, prefix, threadid, name); #endif } #endif static void user_thread_push_state(double time, const char *prefix, long unsigned int threadid, const char *name) { register_user_thread(time, threadid, prefix); if (out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); poti_PushState(time, container, "US", name); #else fprintf(out_paje_file, "11 %.9f %st%lu US %s\n", time, prefix, threadid, name); #endif } } static void user_thread_pop_state(double time, const char *prefix, long unsigned int threadid) { register_user_thread(time, threadid, prefix); if (out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); poti_PopState(time, container, "US"); #else fprintf(out_paje_file, "12 %.9f %st%lu US\n", time, prefix, threadid); #endif } } static void thread_push_state(double time, const char *prefix, long unsigned int threadid, const char *name) { if (find_sync(prefixTOnodeid(prefix), threadid)) /* Unless using worker sets, collapse thread and worker */ return worker_push_state(time, prefix, find_worker_id(prefixTOnodeid(prefix), threadid), name); if (!out_paje_file) return; #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); poti_PushState(time, container, "S", name); #else fprintf(out_paje_file, "11 %.9f %st%lu S %s\n", time, prefix, threadid, name); #endif } static void thread_pop_state(double time, const char *prefix, long unsigned int threadid) { if (find_sync(prefixTOnodeid(prefix), threadid)) /* Unless using worker sets, collapse thread and worker */ return worker_pop_state(time, prefix, find_worker_id(prefixTOnodeid(prefix), threadid)); if (!out_paje_file) return; #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, threadid); poti_PopState(time, container, "S"); #else fprintf(out_paje_file, "12 %.9f %st%lu S\n", time, prefix, threadid); #endif } static void worker_set_detailed_state(double time, const char *prefix, long unsigned int workerid, const char *name, unsigned long size, const char *parameters, unsigned long footprint, unsigned long long tag, unsigned long job_id, double gflop, unsigned X, unsigned Y, unsigned Z STARPU_ATTRIBUTE_UNUSED, long iteration, long subiteration, const char* numa_nodes, struct starpu_fxt_options *options) { struct task_info *task = get_task(job_id, options->file_rank); #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, workerid); char size_str[STARPU_POTI_STR_LEN]; char parameters_str[STARPU_POTI_STR_LEN]; char footprint_str[STARPU_POTI_STR_LEN]; char tag_str[STARPU_POTI_STR_LEN]; char jobid_str[STARPU_POTI_STR_LEN]; char submitorder_str[STARPU_POTI_STR_LEN]; char priority_str[STARPU_POTI_STR_LEN]; char gflop_str[STARPU_POTI_STR_LEN]; char X_str[STARPU_POTI_STR_LEN], Y_str[STARPU_POTI_STR_LEN], Z_str[STARPU_POTI_STR_LEN]; char iteration_str[STARPU_POTI_STR_LEN], subiteration_str[STARPU_POTI_STR_LEN]; snprintf(size_str, sizeof(size_str), "%lu", size); snprintf(parameters_str, sizeof(parameters_str), "%s", parameters); snprintf(footprint_str, sizeof(footprint_str), "%08lx", footprint); snprintf(tag_str, sizeof(tag_str), "%016llx", tag); snprintf(jobid_str, sizeof(jobid_str), "%s%lu", prefix, job_id); snprintf(submitorder_str, sizeof(submitorder_str), "%s%lu", prefix, task->submit_order); snprintf(priority_str, sizeof(priority_str), "%lu", task->priority); snprintf(gflop_str, sizeof(gflop_str), "%f", gflop); snprintf(X_str, sizeof(X_str), "%u", X); snprintf(Y_str, sizeof(Y_str), "%u", Y); snprintf(Z_str, sizeof(Z_str), "%u", Z); snprintf(iteration_str, sizeof(iteration_str), "%ld", iteration); snprintf(subiteration_str, sizeof(subiteration_str), "%ld", subiteration); #ifdef HAVE_POTI_INIT_CUSTOM poti_user_SetState(_starpu_poti_extendedSetState, time, container, "WS", name, 13, size_str, parameters_str, footprint_str, tag_str, jobid_str, submitorder_str, priority_str, gflop_str, X_str, Y_str, /* Z_str, */ iteration_str, subiteration_str, numa_nodes); #else poti_SetState(time, container, "WS", name); #endif #else fprintf(out_paje_file, "20 %.9f %sw%lu WS \"%s\" %lu \"%s\" %08lx %016llx %s%lu %s%lu %lu %f %u %u "/*"%u "*/"%ld %ld \"%s\"\n", time, prefix, workerid, name, size, parameters, footprint, tag, prefix, job_id, prefix, task->submit_order, task->priority, gflop, X, Y, /*Z,*/ iteration, subiteration, numa_nodes); #endif } static void mpicommthread_set_state(double time, const char *prefix, const char *name) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix); poti_SetState(time, container, "CtS", name); #else fprintf(out_paje_file, "10 %.9f %smpict CtS %s\n", time, prefix, name); #endif } static void mpicommthread_push_state(double time, const char *prefix, const char *name) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix); poti_PushState(time, container, "CtS", name); #else fprintf(out_paje_file, "11 %.9f %smpict CtS %s\n", time, prefix, name); #endif } static void mpicommthread_pop_state(double time, const char *prefix) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; mpicommthread_container_alias(container, STARPU_POTI_STR_LEN, prefix); poti_PopState(time, container, "CtS"); #else fprintf(out_paje_file, "12 %.9f %smpict CtS\n", time, prefix); #endif } static void recfmt_dump_state(double time, const char *event, int workerid, long int threadid, const char *name, const char *type) { fprintf(trace_file, "E: %s\n", event); if (name) fprintf(trace_file, "N: %s\n", name); if (type) fprintf(trace_file, "C: %s\n", type); fprintf(trace_file, "W: %d\n", workerid); if (threadid == -1) fprintf(trace_file, "T: -1\n"); else fprintf(trace_file, "T: %ld\n", threadid); fprintf(trace_file, "S: %f\n", time); fprintf(trace_file, "\n"); } static void recfmt_set_state(double time, int workerid, long int threadid, const char *name, const char *type) { recfmt_dump_state(time, "SetState", workerid, threadid, name, type); } static void recfmt_push_state(double time, int workerid, long unsigned int threadid, const char *name, const char *type) { recfmt_dump_state(time, "PushState", workerid, threadid, name, type); } static void recfmt_pop_state(double time, int workerid, long unsigned int threadid) { recfmt_dump_state(time, "PopState", workerid, threadid, NULL, NULL); } static void recfmt_worker_set_state(double time, int workerid, const char *name, const char *type) { const char *state_name; /* Special case for task events. */ if (!strcmp(type, "Task")) state_name = name; else state_name = get_state_name(name, WORKER_STATE); recfmt_set_state(time, workerid, -1, state_name, type); } static void recfmt_thread_set_state(double time, unsigned long nodeid, long unsigned int threadid, const char *name, const char *type) { const char *state_name; /* Special case for the end event which is somehow a fake. */ if (!strcmp(name, "End") && !type) state_name = name; else state_name = get_state_name(name, THREAD_STATE); recfmt_set_state(time, find_worker_id(nodeid, threadid), threadid, state_name, type); } static void recfmt_thread_push_state(double time, unsigned long nodeid, long unsigned int threadid, const char *name, const char *type) { const char *state_name = get_state_name(name, THREAD_STATE); recfmt_push_state(time, find_worker_id(nodeid, threadid), threadid, state_name, type); } static void recfmt_thread_pop_state(double time, unsigned long nodeid, long unsigned int threadid) { recfmt_pop_state(time, find_worker_id(nodeid, threadid), threadid); } static void recfmt_mpicommthread_set_state(double time, const char *name) { const char *state_name = get_state_name(name, COMM_THREAD_STATE); recfmt_set_state(time, -1, 0, state_name, "MPI"); /* XXX */ } static void recfmt_mpicommthread_push_state(double time, const char *name) { const char *state_name = get_state_name(name, COMM_THREAD_STATE); recfmt_push_state(time, -1, 0, state_name, "MPI"); /* XXX */ } static void recfmt_mpicommthread_pop_state(double time) { recfmt_pop_state(time, -1, 0); } static void recfmt_user_thread_push_state(double time, long unsigned threadid, const char *name, const char *type) { const char *state_name = get_state_name(name, USER_THREAD_STATE); recfmt_push_state(time, -1, threadid, state_name, type); } static void recfmt_user_thread_pop_state(double time, long unsigned threadid) { recfmt_pop_state(time, -1, threadid); } /* * Fill both paje file and trace file */ static void do_worker_set_state(double time, const char *prefix, int workerid, const char *name, const char *type) { if (out_paje_file) worker_set_state(time, prefix, workerid, name); if (trace_file) recfmt_worker_set_state(time, workerid, name, type); } static void do_thread_set_state(double time, const char *prefix, long unsigned int threadid, const char *name, const char *type, long job_id) { if (out_paje_file) thread_set_state(time, prefix, threadid, name, job_id); if (trace_file) recfmt_thread_set_state(time, prefixTOnodeid(prefix), threadid, name, type); } static void do_thread_push_state(double time, const char *prefix, long unsigned int threadid, const char *name, const char *type) { if (out_paje_file) thread_push_state(time, prefix, threadid, name); if (trace_file) recfmt_thread_push_state(time, prefixTOnodeid(prefix), threadid, name, type); } static void do_thread_pop_state(double time, const char *prefix, long unsigned int threadid) { if (out_paje_file) thread_pop_state(time, prefix, threadid); if (trace_file) recfmt_thread_pop_state(time, prefixTOnodeid(prefix), threadid); } static void do_mpicommthread_set_state(double time, const char *prefix, const char *name) { if (out_paje_file) mpicommthread_set_state(time, prefix, name); if (trace_file) recfmt_mpicommthread_set_state(time, name); } static void do_mpicommthread_push_state(double time, const char *prefix, const char *name) { if (out_paje_file) mpicommthread_push_state(time, prefix, name); if (trace_file) recfmt_mpicommthread_push_state(time, name); } static void do_mpicommthread_pop_state(double time, const char *prefix) { if (out_paje_file) mpicommthread_pop_state(time, prefix); if (trace_file) recfmt_mpicommthread_pop_state(time); } static void do_user_thread_push_state(double time, const char *prefix, long unsigned threadid, const char *name, const char *type) { if (out_paje_file) user_thread_push_state(time, prefix, threadid, name); if (trace_file) recfmt_user_thread_push_state(time, threadid, name, type); } static void do_user_thread_pop_state(double time, const char *prefix, long unsigned threadid) { if (out_paje_file) user_thread_pop_state(time, prefix, threadid); if (trace_file) recfmt_user_thread_pop_state(time, threadid); } /* * Multiplex between thread types */ static void do_thread_push_state_worker(double time, const char *prefix, long unsigned int threadid, const char *name, const char *type, const char *user_type) { int worker = find_worker_id(prefixTOnodeid(prefix), threadid); if (worker >= 0) { do_thread_push_state(time, prefix, threadid, name, type); } else if (worker == -2) { /* MPI thread */ do_mpicommthread_push_state(time, prefix, name); } else { do_user_thread_push_state(time, prefix, threadid, name, user_type); } } static void do_thread_pop_state_worker(double time, const char *prefix, long unsigned int threadid) { int worker = find_worker_id(prefixTOnodeid(prefix), threadid); if (worker >= 0) { do_thread_pop_state(time, prefix, threadid); } else if (worker == -2) { /* MPI thread */ do_mpicommthread_pop_state(time, prefix); } else { do_user_thread_pop_state(time, prefix, threadid); } } /* * Initialization */ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; if (out_paje_file) { double now = get_event_time_stamp(ev, options); #ifdef STARPU_HAVE_POTI char program_container[STARPU_POTI_STR_LEN]; program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); char new_memnode_container_alias[STARPU_POTI_STR_LEN], new_memnode_container_name[STARPU_POTI_STR_LEN]; char new_memmanager_container_alias[STARPU_POTI_STR_LEN], new_memmanager_container_name[STARPU_POTI_STR_LEN]; memnode_container_alias(new_memnode_container_alias, STARPU_POTI_STR_LEN, prefix, ev->param[0]); /* TODO: ramkind */ snprintf(new_memnode_container_name, sizeof(new_memnode_container_name), "%sMEMNODE%"PRIu64"", prefix, ev->param[0]); poti_CreateContainer(now, new_memnode_container_alias, "Mn", program_container, new_memnode_container_name); memmanager_container_alias(new_memmanager_container_alias, STARPU_POTI_STR_LEN, prefix, ev->param[0]); /* TODO: ramkind */ snprintf(new_memmanager_container_name, sizeof(new_memmanager_container_name), "%sMEMMANAGER%"PRIu64"", prefix, ev->param[0]); poti_CreateContainer(now, new_memmanager_container_alias, "Mm", new_memnode_container_alias, new_memmanager_container_name); #else fprintf(out_paje_file, "7 %.9f %smn%"PRIu64" Mn %sp %sMEMNODE%"PRIu64"\n", now, prefix, ev->param[0], prefix, options->file_prefix, ev->param[0]); fprintf(out_paje_file, "7 %.9f %smm%"PRIu64" Mm %smn%"PRIu64" %sMEMMANAGER%"PRIu64"\n", now, prefix, ev->param[0], prefix, ev->param[0], options->file_prefix, ev->param[0]); #endif if (!options->no_bus) { #ifdef STARPU_HAVE_POTI poti_SetVariable(now, new_memmanager_container_alias, "use", 0.0); poti_SetVariable(now, new_memmanager_container_alias, "bwi_mm", 0.0); poti_SetVariable(now, new_memmanager_container_alias, "bwo_mm", 0.0); #else fprintf(out_paje_file, "13 %.9f %smm%"PRIu64" use 0.0\n", now, prefix, ev->param[0]); fprintf(out_paje_file, "13 %.9f %smm%"PRIu64" bwi_mm 0.0\n", now, prefix, ev->param[0]); fprintf(out_paje_file, "13 %.9f %smm%"PRIu64" bwo_mm 0.0\n", now, prefix, ev->param[0]); #endif } } } /* * Function that creates a synthetic stream id based on the order they appear from the trace */ static int create_ordered_stream_id(int nodeid, int devid) { static int stable[STARPU_FXT_MAX_FILES][STARPU_MAXCUDADEVS]; STARPU_ASSERT(nodeid < STARPU_FXT_MAX_FILES); STARPU_ASSERT(devid < STARPU_MAXCUDADEVS); return stable[nodeid][devid]++; } static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { /* arg0 : type of worker (cuda, cpu ..) arg1 : memory node arg2 : thread id */ char *prefix = options->file_prefix; int devid = ev->param[2]; int workerid = ev->param[1]; int nodeid = ev->param[3]; int bindid = ev->param[4]; int set = ev->param[5]; long unsigned int threadid = ev->param[6]; int new_thread; new_thread = register_worker_id(prefixTOnodeid(prefix), threadid, workerid, set); const char *kindstr; struct starpu_perfmodel_arch arch; arch.ndevices = 1; _STARPU_MALLOC(arch.devices, sizeof(struct starpu_perfmodel_device)); enum starpu_worker_archtype archtype = _STARPU_FUT_KEY_WORKER(ev->param[0]); STARPU_ASSERT(archtype < STARPU_NARCH); kindstr = starpu_worker_get_type_as_string(archtype); arch.devices[0].type = archtype; arch.devices[0].devid = 0; arch.devices[0].ncores = 1; if (archtype == STARPU_CPU_WORKER) set_next_cpu_worker_color(workerid); else set_next_accel_worker_color(workerid); double now = get_event_time_stamp(ev, options); if (out_paje_file) { char new_worker_container_name[STARPU_TRACE_STR_LEN]; if (arch.devices[0].type == STARPU_CUDA_WORKER) { // If CUDA, workers might be streams, so create an unique name for each of them int streamid = create_ordered_stream_id(prefixTOnodeid(prefix), devid); snprintf(new_worker_container_name, sizeof(new_worker_container_name), "%s%s%d_%d", prefix, kindstr, devid, streamid); } else { // If not CUDA, we suppose worker name is the prefix, the kindstr, and the devid snprintf(new_worker_container_name, sizeof(new_worker_container_name), "%s%s%d", prefix, kindstr, devid); } #ifdef STARPU_HAVE_POTI char new_thread_container_alias[STARPU_POTI_STR_LEN]; thread_container_alias(new_thread_container_alias, STARPU_POTI_STR_LEN, prefix, threadid); char new_worker_container_alias[STARPU_POTI_STR_LEN]; worker_container_alias(new_worker_container_alias, STARPU_POTI_STR_LEN, prefix, workerid); char memnode_container[STARPU_POTI_STR_LEN]; memnode_container_alias(memnode_container, STARPU_POTI_STR_LEN, prefix, nodeid); char new_thread_container_name[STARPU_POTI_STR_LEN]; snprintf(new_thread_container_name, sizeof(new_thread_container_name), "%sT%d", prefix, bindid); if (new_thread) poti_CreateContainer(now, new_thread_container_alias, "T", memnode_container, new_thread_container_name); poti_CreateContainer(now, new_worker_container_alias, "W", new_thread_container_alias, new_worker_container_name); if (!options->no_flops) poti_SetVariable(now, new_worker_container_alias, "gf", 0.0); #else if (new_thread) fprintf(out_paje_file, "7 %.9f %st%lu T %smn%d %sT%d\n", now, prefix, threadid, prefix, nodeid, prefix, bindid); fprintf(out_paje_file, "7 %.9f %sw%d W %st%lu %s\n", now, prefix, workerid, prefix, threadid, new_worker_container_name); if (!options->no_flops) fprintf(out_paje_file, "13 %.9f %sw%d gf 0.0\n", now, prefix, workerid); #endif } /* start initialization */ do_thread_set_state(now, prefix, threadid, "In", "Runtime", -1); if (activity_file) fprintf(activity_file, "name\t%d\t%s %d\n", workerid, kindstr, devid); snprintf(options->worker_names[workerid], sizeof(options->worker_names[workerid])-1, "%s %d", kindstr, devid); options->worker_names[workerid][sizeof(options->worker_names[workerid])-1] = 0; options->worker_archtypes[workerid] = arch; } static void handle_worker_init_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; int worker; if (ev->nb_params < 2) { worker = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); STARPU_ASSERT(worker >= 0); } else worker = ev->param[1]; do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[0], "B", "Runtime", -1); do_worker_set_state(get_event_time_stamp(ev, options), prefix, worker, "I", "Other"); /* Initialize the accumulated time counters */ last_activity_flush_timestamp[worker] = get_event_time_stamp(ev, options); accumulated_sleep_time[worker] = 0.0; accumulated_exec_time[worker] = 0.0; } static void handle_worker_deinit_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; long unsigned int threadid = ev->param[0]; do_thread_set_state(get_event_time_stamp(ev, options), prefix, threadid, "D", "Runtime", -1); } static void handle_worker_deinit_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; if (out_paje_file) { #ifdef STARPU_HAVE_POTI char worker_container[STARPU_POTI_STR_LEN]; thread_container_alias(worker_container, STARPU_POTI_STR_LEN, prefix, ev->param[1]); poti_DestroyContainer(get_event_time_stamp(ev, options), "T", worker_container); #else fprintf(out_paje_file, "8 %.9f %st%"PRIu64" T\n", get_event_time_stamp(ev, options), prefix, ev->param[1]); #endif } if (trace_file) recfmt_thread_set_state(get_event_time_stamp(ev, options), prefixTOnodeid(prefix), ev->param[1], "End", NULL); } #ifdef STARPU_HAVE_POTI static void create_paje_state_color(char *name, char *type, int ctx, float red, float green, float blue) { char color[STARPU_POTI_STR_LEN]; char alias[STARPU_POTI_STR_LEN]; snprintf(color, sizeof(color), "%f %f %f", red, green, blue); if (ctx) { snprintf(alias, sizeof(alias), "%s_%d", name, ctx); } else { snprintf(alias, sizeof(alias), "%s", name); } poti_DefineEntityValue(alias, type, name, color); } #endif static void create_paje_state_if_not_found(char *name, unsigned color, struct starpu_fxt_options *options) { struct _starpu_symbol_name *itor; for (itor = _starpu_symbol_name_list_begin(&symbol_list); itor != _starpu_symbol_name_list_end(&symbol_list); itor = _starpu_symbol_name_list_next(itor)) { if (!strcmp(name, itor->name)) { /* we found an entry */ return; } } /* it's the first time ... */ struct _starpu_symbol_name *entry = _starpu_symbol_name_new(); entry->name = strdup(name); STARPU_ASSERT(entry->name); _starpu_symbol_name_list_push_front(&symbol_list, entry); float red, green, blue; if (color != 0) { red = color / 0x100 / 0x100; green = (color / 0x100) & 0xff; blue = color & 0xff; } else if (options->per_task_colour) { /* choose some color ... that's disguting yes */ unsigned hash_symbol_red = get_color_symbol_red(name); unsigned hash_symbol_green = get_color_symbol_green(name); unsigned hash_symbol_blue = get_color_symbol_blue(name); uint32_t hash_sum = hash_symbol_red + hash_symbol_green + hash_symbol_blue; red = (1.0f * hash_symbol_red) / hash_sum; green = (1.0f * hash_symbol_green) / hash_sum; blue = (1.0f * hash_symbol_blue) / hash_sum; } else { /* Use the hardcoded value for execution mode */ red = 0.0f; green = 0.6f; blue = 0.4f; } /* create the Paje state */ if (out_paje_file) { #ifdef STARPU_HAVE_POTI create_paje_state_color(name, "WS", 0, red, green, blue); int i; for(i = 1; i < STARPU_NMAX_SCHED_CTXS; i++) { char ctx[10]; snprintf(ctx, sizeof(ctx), "Ctx%d", i); if (options->use_task_color) { create_paje_state_color(name, ctx, i, red, green, blue); } else { if(i%10 == 1) create_paje_state_color(name, ctx, i, 1.0, 0.39, 1.0); if(i%10 == 2) create_paje_state_color(name, ctx, i, .0, 1.0, 0.0); if(i%10 == 3) create_paje_state_color(name, ctx, i, 1.0, 1.0, .0); if(i%10 == 4) create_paje_state_color(name, ctx, i, .0, 0.95, 1.0); if(i%10 == 5) create_paje_state_color(name, ctx, i, .0, .0, .0); if(i%10 == 6) create_paje_state_color(name, ctx, i, .0, .0, 0.5); if(i%10 == 7) create_paje_state_color(name, ctx, i, 0.41, 0.41, 0.41); if(i%10 == 8) create_paje_state_color(name, ctx, i, 1.0, .0, 1.0); if(i%10 == 9) create_paje_state_color(name, ctx, i, .0, .0, 1.0); if(i%10 == 0) create_paje_state_color(name, ctx, i, 0.6, 0.80, 50.0); } } #else fprintf(out_paje_file, "6 %s WS %s \"%f %f %f\" \n", name, name, red, green, blue); int i; for(i = 1; i < STARPU_NMAX_SCHED_CTXS; i++) { if (options->use_task_color) { fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"%f %f %f\" \n", name, i, i, name, red, green, blue); } else { if(i%10 == 1) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"1.0 0.39 1.0\" \n", name, i, i, name); if(i%10 == 2) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 1.0 .0\" \n", name, i, i, name); if(i%10 == 3) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"0.87 0.87 .0\" \n", name, i, i, name); if(i%10 == 4) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 0.95 1.0\" \n", name, i, i, name); if(i%10 == 5) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 .0 .0\" \n", name, i, i, name); if(i%10 == 6) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 .0 0.5\" \n", name, i, i, name); if(i%10 == 7) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"0.41 0.41 0.41\" \n", name, i, i, name); if(i%10 == 8) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"1.0 .0 1.0\" \n", name, i, i, name); if(i%10 == 9) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \".0 .0 1.0\" \n", name, i, i, name); if(i%10 == 0) fprintf(out_paje_file, "6 %s_%d Ctx%d %s \"0.6 0.80 0.19\" \n", name, i, i, name); } } #endif } } static void handle_start_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { int worker = ev->param[2]; int node = ev->param[3]; if (worker < 0) return; struct task_info *task = get_task(ev->param[0], options->file_rank); char *name = task->name; create_paje_state_if_not_found(name, task->color, options); snprintf(_starpu_last_codelet_symbol[worker], sizeof(_starpu_last_codelet_symbol[worker]), "%.*s", (int) sizeof(_starpu_last_codelet_symbol[worker])-1, name); _starpu_last_codelet_symbol[worker][sizeof(_starpu_last_codelet_symbol[worker])-1] = 0; last_codelet_parameter[worker] = 0; double start_codelet_time = get_event_time_stamp(ev, options); double last_start_codelet_time = last_codelet_start[worker]; last_codelet_start[worker] = start_codelet_time; char *prefix = options->file_prefix; task->start_time = start_codelet_time; task->workerid = worker; task->node = node; do_worker_set_state(start_codelet_time, prefix, ev->param[2], name, "Task"); if (out_paje_file) { unsigned sched_ctx = ev->param[1]; if (sched_ctx != 0) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; char ctx[6]; snprintf(ctx, sizeof(ctx), "Ctx%u", sched_ctx); worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, ev->param[2]); poti_SetState(start_codelet_time, container, ctx, name); #else fprintf(out_paje_file, "10 %.9f %sw%"PRIu64" Ctx%d \"%s\"\n", start_codelet_time, prefix, ev->param[2], sched_ctx, name); #endif } } struct _starpu_computation *comp = ongoing_computation[worker]; if (!comp) { /* First task for this worker */ comp = ongoing_computation[worker] = _starpu_computation_new(); comp->peer = NULL; comp->comp_start = start_codelet_time; if (!options->no_flops) _starpu_computation_list_push_back(&computation_list, comp); } else if (options->no_smooth || (start_codelet_time - last_codelet_end[worker]) >= IDLE_FACTOR * (last_codelet_end[worker] - last_start_codelet_time)) { /* Long idle period, move previously-allocated comp to now */ comp->comp_start = start_codelet_time; if (!options->no_flops) { _starpu_computation_list_erase(&computation_list, comp); _starpu_computation_list_push_back(&computation_list, comp); } } } static void handle_model_name(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { struct task_info *task = get_task(ev->param[0], options->file_rank); char *name = get_fxt_string(ev, 2); task->model_name = strdup(name); } static void handle_codelet_data(struct fxt_ev_64 *ev STARPU_ATTRIBUTE_UNUSED, struct starpu_fxt_options *options STARPU_ATTRIBUTE_UNUSED) { int worker = ev->param[0]; if (worker < 0) return; int num = last_codelet_parameter[worker]++; if (num >= MAX_PARAMETERS) return; char *name = get_fxt_string(ev, 1); snprintf(last_codelet_parameter_description[worker][num], sizeof(last_codelet_parameter_description[worker][num]), "%.*s", (int) sizeof(last_codelet_parameter_description[worker][num])-1, name); last_codelet_parameter_description[worker][num][sizeof(last_codelet_parameter_description[worker][num])-1] = 0; } static void handle_codelet_data_handle(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { struct task_info *task = get_task(ev->param[0], options->file_rank); unsigned alloc = 0; if (task->ndata == 0) /* Start with 8=2^3, should be plenty in most cases */ alloc = 8; else if (task->ndata >= 8) { /* Allocate dependencies array by powers of two */ if (! ((task->ndata - 1) & task->ndata)) /* Is task->ndata a power of two? */ { /* We have filled the previous power of two, get another one */ alloc = task->ndata * 2; } } if (alloc) { _STARPU_REALLOC(task->data, sizeof(*task->data) * alloc); } task->data[task->ndata].handle = ev->param[1]; task->data[task->ndata].size = ev->param[2]; task->data[task->ndata].mode = ev->param[3]; task->data[task->ndata].numa_nodes_bitmap = -1; task->ndata++; } static void handle_codelet_data_handle_numa_access(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { struct task_info *task = get_task(ev->param[0], options->file_rank); unsigned i = (unsigned) ev->param[1]; STARPU_ASSERT(i < task->ndata); task->data[i].numa_nodes_bitmap = ev->param[2]; } static void handle_codelet_details(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { int worker = ev->param[5]; unsigned long job_id = ev->param[6]; if (worker < 0) return; char parameters[256]; size_t eaten = 0; if (!last_codelet_parameter[worker]) snprintf(parameters, sizeof(parameters) - 1, "nodata"); else { int i; for (i = 0; i < last_codelet_parameter[worker] && i < MAX_PARAMETERS; i++) { eaten += snprintf(parameters + eaten, sizeof(parameters) - eaten - 1, "%s%s", i?" ":"", last_codelet_parameter_description[worker][i]); } } parameters[sizeof(parameters)-1] = 0; struct task_info *task = get_task(job_id, options->file_rank); task->parameters = strdup(parameters); task->footprint = ev->param[2]; task->kflops = ev->param[3]; task->tag = ev->param[4]; unsigned i, X = 0, Y = 0, Z = 0; for (i = 0; i < task->ndata; i++) { if (task->data[i].mode & STARPU_W) { struct data_info *data = get_data(task->data[i].handle, options->file_rank); if (data->dimensions >= 1) X = data->dims[0]; if (data->dimensions >= 2) Y = data->dims[1]; if (data->dimensions >= 3) Z = data->dims[2]; break; } } char numa_nodes_str[STARPU_TRACE_STR_LEN] = ""; eaten = 0; for (i = 0; i < task->ndata; i++) { char str[STARPU_TRACE_STR_LEN] = ""; _starpu_convert_numa_nodes_bitmap_to_str(task->data[i].numa_nodes_bitmap, str); eaten += snprintf(numa_nodes_str + eaten, sizeof(numa_nodes_str) - eaten - 1, "%s%s", i ? "_" : "", str); } numa_nodes_str[sizeof(numa_nodes_str)-1] = 0; if (out_paje_file) { char *prefix = options->file_prefix; unsigned sched_ctx = ev->param[0]; /* Paje won't like spaces or tabs, replace with underscores */ char *c; for (c = parameters; *c; c++) if ((*c == ' ') || (*c == '\t')) *c = '_'; worker_set_detailed_state(last_codelet_start[worker], prefix, worker, _starpu_last_codelet_symbol[worker], ev->param[1], parameters, ev->param[2], ev->param[4], job_id, ((double) task->kflops) / 1000000, X, Y, Z, task->iterations[0], task->iterations[1], numa_nodes_str, options); if (sched_ctx != 0) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; char typectx[STARPU_POTI_STR_LEN]; snprintf(typectx, sizeof(typectx), "Ctx%u", sched_ctx); worker_container_alias(container, sizeof(container), prefix, worker); poti_SetState(last_codelet_start[worker], container, typectx, _starpu_last_codelet_symbol[worker]); char name[STARPU_POTI_STR_LEN]; snprintf(name, sizeof(name), "%s", _starpu_last_codelet_symbol[worker]); char size_str[STARPU_POTI_STR_LEN]; char parameters_str[STARPU_POTI_STR_LEN]; char footprint_str[STARPU_POTI_STR_LEN]; char tag_str[STARPU_POTI_STR_LEN]; char jobid_str[STARPU_POTI_STR_LEN]; char submitorder_str[STARPU_POTI_STR_LEN]; snprintf(size_str, sizeof(size_str), "%ld", ev->param[1]); snprintf(parameters_str, sizeof(parameters_str), "%s", parameters); snprintf(footprint_str, sizeof(footprint_str), "%08lx", ev->param[2]); snprintf(tag_str, sizeof(tag_str), "%016lx", ev->param[4]); snprintf(jobid_str, sizeof(jobid_str), "%s%lu", prefix, job_id); snprintf(submitorder_str, sizeof(submitorder_str), "%s%lu", prefix, task->submit_order); #ifdef HAVE_POTI_INIT_CUSTOM poti_user_SetState(_starpu_poti_semiExtendedSetState, last_codelet_start[worker], container, typectx, name, 6, size_str, parameters_str, footprint_str, tag_str, jobid_str, submitorder_str); #else poti_SetState(last_codelet_start[worker], container, typectx, name); #endif #else fprintf(out_paje_file, "21 %.9f %sw%d Ctx%u \"%s\" %ld %s %08lx %016lx %s%lu %s%lu\n", last_codelet_start[worker], prefix, worker, sched_ctx, _starpu_last_codelet_symbol[worker], ev->param[1], parameters, ev->param[2], ev->param[4], prefix, job_id, prefix, task->submit_order); #endif } } } static long dumped_codelets_count; static struct starpu_fxt_codelet_event *dumped_codelets; static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long job_id = ev->param[0]; size_t codelet_size = ev->param[1]; uint32_t codelet_hash = ev->param[2]; int worker = ev->param[3]; long unsigned int threadid = ev->param[4]; char *name = get_fxt_string(ev, 5); if (worker < 0) return; char *prefix = options->file_prefix; double end_codelet_time = get_event_time_stamp(ev, options); double last_end_codelet_time = last_codelet_end[worker]; last_codelet_end[worker] = end_codelet_time; const char *state = "I"; if (find_sync(prefixTOnodeid(prefix), threadid)) state = "B"; do_worker_set_state(end_codelet_time, prefix, worker, state, "Other"); struct task_info *task = get_task(job_id, options->file_rank); task->end_time = end_codelet_time; update_accumulated_time(worker, 0.0, end_codelet_time - task->start_time, end_codelet_time, 0); struct _starpu_computation *peer = ongoing_computation[worker]; double gflops_start = peer->comp_start; double codelet_length; double gflops; struct _starpu_computation *comp; codelet_length = end_codelet_time - gflops_start; gflops = (((double)task->kflops) / 1000000) / (codelet_length / 1000); if (options->no_flops) { _starpu_computation_delete(peer); } else { if (out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, worker); if (gflops_start != last_end_codelet_time) { if (last_end_codelet_time != 0) { poti_SetVariable(last_end_codelet_time, container, "gf", 0.); } } poti_SetVariable(gflops_start, container, "gf", gflops); #else if (gflops_start != last_end_codelet_time) { if (last_end_codelet_time != 0) { fprintf(out_paje_file, "13 %.9f %sw%d gf %f\n", last_end_codelet_time, prefix, worker, 0.); } } fprintf(out_paje_file, "13 %.9f %sw%d gf %f\n", gflops_start, prefix, worker, gflops); #endif } comp = _starpu_computation_new(); comp->comp_start = end_codelet_time; comp->gflops = -gflops; peer->gflops = +gflops; comp->peer = peer; peer->peer = comp; _starpu_computation_list_push_back(&computation_list, comp); } /* Prepare comp for next codelet */ comp = _starpu_computation_new(); comp->comp_start = end_codelet_time; comp->peer = NULL; if (!options->no_flops) _starpu_computation_list_push_back(&computation_list, comp); ongoing_computation[worker] = comp; if (distrib_time) fprintf(distrib_time, "%s\t%s%d\t%ld\t%"PRIx32"\t%.9f\n", _starpu_last_codelet_symbol[worker], prefix, worker, (unsigned long) codelet_size, codelet_hash, codelet_length); if (options->dumped_codelets) { dumped_codelets_count++; _STARPU_REALLOC(dumped_codelets, dumped_codelets_count*sizeof(struct starpu_fxt_codelet_event)); snprintf(dumped_codelets[dumped_codelets_count - 1].symbol, sizeof(dumped_codelets[dumped_codelets_count - 1].symbol)-1, "%s", _starpu_last_codelet_symbol[worker]); dumped_codelets[dumped_codelets_count - 1].symbol[sizeof(dumped_codelets[dumped_codelets_count - 1].symbol)-1] = 0; dumped_codelets[dumped_codelets_count - 1].workerid = worker; snprintf(dumped_codelets[dumped_codelets_count - 1].perfmodel_archname, sizeof(dumped_codelets[dumped_codelets_count - 1].perfmodel_archname), "%.*s", (int) sizeof(dumped_codelets[dumped_codelets_count - 1].perfmodel_archname)-1, name); dumped_codelets[dumped_codelets_count - 1].perfmodel_archname[sizeof(dumped_codelets[dumped_codelets_count - 1].perfmodel_archname)-1] = 0; dumped_codelets[dumped_codelets_count - 1].size = codelet_size; dumped_codelets[dumped_codelets_count - 1].hash = codelet_hash; dumped_codelets[dumped_codelets_count - 1].time = codelet_length; } _starpu_last_codelet_symbol[worker][0] = 0; } static void handle_start_executing(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; long unsigned int threadid = ev->param[0]; long job_id = ev->param[1]; if (!find_sync(prefixTOnodeid(prefix), threadid)) do_thread_set_state(get_event_time_stamp(ev, options), prefix, threadid, "E", "Runtime", job_id); } static void handle_end_executing(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; long unsigned int threadid = ev->param[0]; if (!find_sync(prefixTOnodeid(prefix), threadid)) do_thread_set_state(get_event_time_stamp(ev, options), prefix, threadid, "B", "Runtime", -1); } static void handle_start_parallel_sync(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; long unsigned int threadid = ev->param[0]; thread_push_state(get_event_time_stamp(ev, options), prefix, threadid, "Ps"); } static void handle_end_parallel_sync(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; long unsigned int threadid = ev->param[0]; thread_pop_state(get_event_time_stamp(ev, options), prefix, threadid); } static void handle_user_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { int worker; unsigned long code = ev->param[0]; #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], container[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%lu", code); #endif char *prefix = options->file_prefix; double now = get_event_time_stamp(ev, options); worker = find_worker_id(prefixTOnodeid(prefix), ev->param[1]); if (worker < 0) { if (out_paje_file) #ifdef STARPU_HAVE_POTI program_container_alias(container, STARPU_POTI_STR_LEN, prefix); #else fprintf(out_paje_file, "9 %.9f user_user_event %sp %lu\n", now, prefix, code); #endif } else { if (out_paje_file) #ifdef STARPU_HAVE_POTI thread_container_alias(container, STARPU_POTI_STR_LEN, prefix, ev->param[1]); #else fprintf(out_paje_file, "9 %.9f user_event %st%"PRIu64" %lu\n", now, prefix, ev->param[1], code); #endif } #ifdef STARPU_HAVE_POTI if (out_paje_file) poti_NewEvent(now, container, "user_event", paje_value); #endif } static void handle_start_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { do_thread_push_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], "C", "Runtime", "UNK"); /* XXX */ } static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { do_thread_pop_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[1]); } static void handle_hypervisor_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { do_thread_push_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], "H", "Runtime", "UNK"); /* XXX */ } static void handle_hypervisor_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { do_thread_pop_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]); } static void handle_worker_status_on_tid(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *newstatus) { char *prefix = options->file_prefix; if (find_worker_id(prefixTOnodeid(prefix), ev->param[1]) < 0) return; do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[1], newstatus, "Runtime", -1); } static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *newstatus) { int worker; worker = ev->param[1]; if (worker < 0) return; do_worker_set_state(get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], newstatus, "Runtime"); } static double last_sleep_start[STARPU_NMAXWORKERS]; static void handle_worker_scheduling_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; if (find_worker_id(prefixTOnodeid(prefix), ev->param[0]) < 0) return; do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[0], "Sc", "Runtime", -1); } static void handle_worker_scheduling_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; if (find_worker_id(prefixTOnodeid(prefix), ev->param[0]) < 0) return; do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[0], "B", "Runtime", -1); } static void handle_worker_scheduling_push(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { do_thread_push_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], "Sc", "Runtime", "User"); } static void handle_worker_scheduling_pop(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { do_thread_pop_state_worker(get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]); } static void handle_worker_sleep_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; int worker = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); if (worker < 0) return; double start_sleep_time = get_event_time_stamp(ev, options); last_sleep_start[worker] = start_sleep_time; do_thread_set_state(get_event_time_stamp(ev, options), prefix, ev->param[0], "Sl", "Other", -1); } static void handle_worker_sleep_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; int worker = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); if (worker < 0) return; double end_sleep_timestamp = get_event_time_stamp(ev, options); do_thread_set_state(end_sleep_timestamp, prefix, ev->param[0], "B", "Runtime", -1); double sleep_length = end_sleep_timestamp - last_sleep_start[worker]; update_accumulated_time(worker, sleep_length, 0.0, end_sleep_timestamp, 0); } static void handle_data_register(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long handle = ev->param[0]; char *prefix = options->file_prefix; struct data_info *data = get_data(handle, options->file_rank); char *description = get_fxt_string(ev, 4); data->size = ev->param[1]; data->max_size = ev->param[2]; data->home_node = ev->param[3]; if (description[0]) data->description = strdup(description); if (out_paje_file && !options->no_events) { #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], container[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%lx", handle); program_container_alias(container, STARPU_POTI_STR_LEN, prefix); poti_NewEvent(get_event_time_stamp(ev, options), container, "register", paje_value); #else fprintf(out_paje_file, "9 %.9f register %sp %lx\n", get_event_time_stamp(ev, options), prefix, handle); #endif } } static void handle_data_unregister(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long handle = ev->param[0]; char *prefix = options->file_prefix; struct data_info *data = get_data(handle, options->file_rank); if (out_paje_file && !options->no_events) { #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], container[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%lx", handle); program_container_alias(container, STARPU_POTI_STR_LEN, prefix); poti_NewEvent(get_event_time_stamp(ev, options), container, "unregister", paje_value); #else fprintf(out_paje_file, "9 %.9f unregister %sp %lx\n", get_event_time_stamp(ev, options), prefix, handle); #endif } data_dump(data); } static void handle_data_state(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *state) { unsigned long handle = ev->param[0]; unsigned node = ev->param[1]; char *prefix = options->file_prefix; if (out_paje_file) { #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], memnode_container[STARPU_POTI_STR_LEN]; memmanager_container_alias(memnode_container, STARPU_POTI_STR_LEN, prefix, node); snprintf(paje_value, sizeof(paje_value), "%lx", handle); poti_NewEvent(get_event_time_stamp(ev, options), memnode_container, state, paje_value); #else fprintf(out_paje_file, "9 %.9f %s %smm%u %lx\n", get_event_time_stamp(ev, options), state, prefix, node, handle); #endif } } static void handle_data_copy(void) { } static void handle_data_name(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long handle = ev->param[0]; char *name = get_fxt_string(ev, 1); struct data_info *data = get_data(handle, options->file_rank); data->name = strdup(name); } static void handle_data_coordinates(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long handle = ev->param[0]; unsigned dimensions = ev->param[1]; struct data_info *data = get_data(handle, options->file_rank); unsigned i; data->dimensions = dimensions; _STARPU_MALLOC(data->dims, dimensions * sizeof(*data->dims)); for (i = 0; i < dimensions; i++) data->dims[i] = ev->param[i+2]; } static void handle_data_wont_use(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long handle = ev->param[0]; unsigned long submit_order = ev->param[1]; unsigned long job_id = ev->param[2]; fprintf(tasks_file, "Control: WontUse\n"); fprintf(tasks_file, "JobId: %lu\n", job_id); fprintf(tasks_file, "SubmitOrder: %lu\n", submit_order); fprintf(tasks_file, "SubmitTime: %f\n", get_event_time_stamp(ev, options)); fprintf(tasks_file, "Handles: %lx\n", handle); fprintf(tasks_file, "MPIRank: %d\n", options->file_rank); fprintf(tasks_file, "\n"); } static void handle_data_doing_wont_use(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long handle = ev->param[0]; char *prefix = options->file_prefix; unsigned node = STARPU_MAIN_RAM; const char *event = "WU"; if (out_paje_file) { #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], memnode_container[STARPU_POTI_STR_LEN]; memmanager_container_alias(memnode_container, STARPU_POTI_STR_LEN, prefix, node); snprintf(paje_value, sizeof(paje_value), "%lx", handle); poti_NewEvent(get_event_time_stamp(ev, options), memnode_container, event, paje_value); #else fprintf(out_paje_file, "9 %.9f %s %smm%u %lx\n", get_event_time_stamp(ev, options), event, prefix, node, handle); #endif } } static void handle_mpi_data_set_rank(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long handle = ev->param[0]; unsigned long rank = ev->param[1]; struct data_info *data = get_data(handle, options->file_rank); data->mpi_owner = rank; } static void handle_mpi_data_set_tag(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long handle = ev->param[0]; long tag = ev->param[1]; struct data_info *data = get_data(handle, options->file_rank); data->mpi_tag = tag; } static const char *copy_link_type(enum starpu_is_prefetch prefetch) { switch (prefetch) { case STARPU_FETCH: return "F"; case STARPU_TASK_PREFETCH: return "TF"; case STARPU_PREFETCH: return "PF"; case STARPU_IDLEFETCH: return "IF"; default: STARPU_ASSERT(0); } } static void handle_checkpoint_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { /* Add an event in the trace */ if (out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; snprintf(container, sizeof(container), "%sp", options->file_prefix); poti_user_NewEvent(_starpu_poti_checkPointState, get_event_time_stamp(ev, options), container, "prog_event", "checkpoint_begin", 2, ev->param[0], ev->param[1]); #else fprintf(out_paje_file, "25 %.9f checkpoint_begin %sp 0 %lu %lu\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], ev->param[1]); #endif } } static void handle_checkpoint_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { /* Add an event in the trace */ if (out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; snprintf(container, sizeof(container), "%sp", options->file_prefix); poti_user_NewEvent(_starpu_poti_checkPointState, get_event_time_stamp(ev, options), container, "prog_event", "checkpoint_end", 2, ev->param[0], ev->param[1]); #else fprintf(out_paje_file, "25 %.9f checkpoint_end %sp 0 %lu %lu\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], ev->param[1]); #endif } } static void handle_start_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned src = ev->param[0]; unsigned dst = ev->param[1]; unsigned size = ev->param[2]; unsigned comid = ev->param[3]; enum starpu_is_prefetch prefetch = ev->param[4]; unsigned long handle = ev->param[5]; const char *link_type = copy_link_type(prefetch); char *prefix = options->file_prefix; if (!options->no_bus) { if (out_paje_file) { double time = get_event_time_stamp(ev, options); memnode_push_state(time, prefix, dst, "Co"); memnode_event(get_event_time_stamp(ev, options), options->file_prefix, dst, "DCo", handle, 0, comid, size, src, options); unsigned X = _starpu_fxt_data_get_coord(handle, options->file_rank, 0); unsigned Y = _starpu_fxt_data_get_coord(handle, options->file_rank, 1); const char *name = _starpu_fxt_data_get_name(handle, options->file_rank); if (!name) name = ""; #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN], src_memnode_container[STARPU_POTI_STR_LEN]; char program_container[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%u", size); snprintf(paje_key, sizeof(paje_key), "com_%u", comid); program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src); char str_handle[STARPU_POTI_STR_LEN]; snprintf(str_handle, sizeof(str_handle), "%lx", handle); char X_str[STARPU_POTI_STR_LEN]; snprintf(X_str, sizeof(X_str), "%u", X); char Y_str[STARPU_POTI_STR_LEN]; snprintf(Y_str, sizeof(Y_str), "%u", Y); poti_user_StartLink(_starpu_poti_CommLinkStart, time, program_container, link_type, src_memnode_container, paje_value, paje_key, 4, str_handle, name, X_str, Y_str); #else fprintf(out_paje_file, "24 %.9f %s %sp %u %smm%u com_%u %lx \"%s\" %u %u\n", time, link_type, prefix, size, prefix, src, comid, handle, name, X, Y); #endif } /* create a structure to store the start of the communication, this will be matched later */ struct _starpu_communication *com = _starpu_communication_new(); com->comid = comid; com->comm_start = get_event_time_stamp(ev, options); com->size = size; com->bandwidth = 0; com->src_node = src; com->dst_node = dst; com->type = link_type; com->peer = NULL; com->handle = handle; _starpu_communication_list_push_back(&communication_list, com); } } static void handle_work_stealing(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { if (out_paje_file) { unsigned dst = ev->param[0]; unsigned src = ev->param[1]; char *prefix = options->file_prefix; unsigned size = 0; double time = get_event_time_stamp(ev, options); #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN], src_worker_container[STARPU_POTI_STR_LEN], dst_worker_container[STARPU_POTI_STR_LEN]; char program_container[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%u", size); snprintf(paje_key, sizeof(paje_key), "steal_%u", steal_number); program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); worker_container_alias(src_worker_container, STARPU_POTI_STR_LEN, prefix, src); worker_container_alias(dst_worker_container, STARPU_POTI_STR_LEN, prefix, dst); poti_StartLink(time, program_container, "WSL", src_worker_container, paje_value, paje_key); poti_EndLink(time+0.000000001, program_container, "WSL", dst_worker_container, paje_value, paje_key); #else fprintf(out_paje_file, "18 %.9f WSL %sp %u %sw%u steal_%u\n", time, prefix, size, prefix, src, steal_number); fprintf(out_paje_file, "19 %.9f WSL %sp %u %sw%u steal_%u\n", time+0.000000001, prefix, size, prefix, dst, steal_number); #endif } steal_number++; } static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { int src = -1; unsigned long handle = 0; unsigned dst = ev->param[1]; unsigned long size = ev->param[2]; unsigned comid = ev->param[3]; enum starpu_is_prefetch prefetch = ev->param[4]; const char *link_type = copy_link_type(prefetch); char *prefix = options->file_prefix; if (!options->no_bus) { /* look for a data transfer to match */ #ifdef STARPU_DEVEL #warning FIXME: use hash table instead #endif struct _starpu_communication *itor; for (itor = _starpu_communication_list_begin(&communication_list); itor != _starpu_communication_list_end(&communication_list); itor = _starpu_communication_list_next(itor)) { if (itor->comid == comid) { double comm_end = get_event_time_stamp(ev, options); double bandwidth = (double)((0.001*size)/(comm_end - itor->comm_start)); itor->bandwidth = bandwidth; struct _starpu_communication *com = _starpu_communication_new(); com->comid = comid; com->comm_start = get_event_time_stamp(ev, options); com->bandwidth = -bandwidth; com->size = size; src = com->src_node = itor->src_node; com->dst_node = itor->dst_node; com->type = itor->type; link_type = itor->type; handle = itor->handle; com->peer = itor; itor->peer = com; _starpu_communication_list_push_back(&communication_list, com); break; } } if (out_paje_file) { double time = get_event_time_stamp(ev, options); memnode_pop_state(time, prefix, dst); memnode_event(get_event_time_stamp(ev, options), options->file_prefix, dst, "DCoE", handle, 0, comid, size, src, options); #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN]; char dst_memnode_container[STARPU_POTI_STR_LEN], program_container[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%lu", size); snprintf(paje_key, sizeof(paje_key), "com_%u", comid); program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst); poti_EndLink(time, program_container, link_type, dst_memnode_container, paje_value, paje_key); #else fprintf(out_paje_file, "19 %.9f %s %sp %lu %smm%u com_%u\n", time, link_type, prefix, size, prefix, dst, comid); #endif } } } static void handle_start_driver_copy_async(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned src = ev->param[0]; unsigned dst = ev->param[1]; char *prefix = options->file_prefix; if (!options->no_bus) if (out_paje_file) { memnode_push_state(get_event_time_stamp(ev, options), prefix, dst, "CoA"); memnode_event(get_event_time_stamp(ev, options), options->file_prefix, dst, "DCoA", 0, 0, 0, 0, src, options); } } static void handle_end_driver_copy_async(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned src = ev->param[0]; unsigned dst = ev->param[1]; char *prefix = options->file_prefix; if (!options->no_bus) if (out_paje_file) { memnode_pop_state(get_event_time_stamp(ev, options), prefix, dst); memnode_event(get_event_time_stamp(ev, options), options->file_prefix, dst, "DCoAE", 0, 0, 0, 0, src, options); } } /* Currently unused */ STARPU_ATTRIBUTE_UNUSED static void handle_memnode_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) { unsigned memnode = ev->param[0]; if (out_paje_file) memnode_set_state(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr); } static void handle_data_request(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) { unsigned memnode = ev->param[0]; unsigned dest = ev->param[1]; unsigned prio = ev->param[2]; unsigned long handle = ev->param[3]; unsigned prefe = ev->param[4]; unsigned long request = ev->param[5]; memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, request, prefe, prio, dest, options); } static void handle_memnode_event_start_3(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) { unsigned memnode = ev->param[0]; unsigned size = ev->param[2]; unsigned long handle = ev->param[3]; memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, 0, size, memnode, options); } static void handle_memnode_event_start_4(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) { unsigned memnode = ev->param[0]; //unsigned dest = ev->param[1]; // Not used unsigned size = ev->param[2]; unsigned long handle = ev->param[3]; unsigned prefe = ev->param[4]; memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, prefe, size, memnode, options); } static void handle_memnode_event_end_3(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) { unsigned memnode = ev->param[0]; unsigned long handle = ev->param[2]; unsigned info = ev->param[3]; memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, info, 0, memnode, options); } static void handle_memnode_event_start_2(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) { unsigned memnode = ev->param[0]; unsigned long handle = ev->param[2]; memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, 0, 0, memnode, options); } static void handle_memnode_event_end_2(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) { unsigned memnode = ev->param[0]; unsigned long handle = ev->param[2]; memnode_event(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr, handle, 0, 0, 0, memnode, options); } static void handle_push_memnode_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr) { unsigned memnode = ev->param[0]; if (out_paje_file) memnode_push_state(get_event_time_stamp(ev, options), options->file_prefix, memnode, eventstr); } static void handle_pop_memnode_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned memnode = ev->param[0]; if (out_paje_file) memnode_pop_state(get_event_time_stamp(ev, options), options->file_prefix, memnode); } static void handle_used_mem(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned memnode = ev->param[0]; if (out_paje_file) { #ifdef STARPU_HAVE_POTI char memnode_container[STARPU_POTI_STR_LEN]; memmanager_container_alias(memnode_container, STARPU_POTI_STR_LEN, options->file_prefix, memnode); poti_SetVariable(get_event_time_stamp(ev, options), memnode_container, "use", (double)ev->param[1] / (1<<20)); #else fprintf(out_paje_file, "13 %.9f %smm%u use %f\n", get_event_time_stamp(ev, options), options->file_prefix, memnode, (double)ev->param[1] / (1<<20)); #endif } } static void handle_task_submit_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, unsigned long tid, const char *eventstr) { char *prefix = options->file_prefix; double timestamp = get_event_time_stamp(ev, options); if (eventstr) do_thread_push_state_worker(timestamp, prefix, tid, eventstr, "Runtime", "User"); else do_thread_pop_state_worker(timestamp, prefix, tid); } /* * Number of task submitted to the scheduler */ static int curq_size = 0; static int nsubmitted = 0; static void handle_job_push(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double current_timestamp = get_event_time_stamp(ev, options); unsigned task = ev->param[0]; int priority = ev->param[1]; curq_size++; _starpu_fxt_component_update_ntasks(nsubmitted, curq_size); if (!options->no_counter && out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; scheduler_container_alias(container, STARPU_POTI_STR_LEN, options->file_prefix); poti_SetVariable(current_timestamp, container, "nready", (double)curq_size); char paje_value[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%u", task); snprintf(container, sizeof(container), "%sp", options->file_prefix); if (!options->no_events) poti_NewEvent(get_event_time_stamp(ev, options), container, "pu", paje_value); #else fprintf(out_paje_file, "13 %.9f %ssched nready %f\n", current_timestamp, options->file_prefix, (float)curq_size); if (!options->no_events) fprintf(out_paje_file, "9 %.9f %s %sp %u\n", get_event_time_stamp(ev, options), "pu", options->file_prefix, task); #endif } if (activity_file) fprintf(activity_file, "cnt_ready\t%.9f\t%d\n", current_timestamp, curq_size); if (sched_tasks_file) { fprintf(sched_tasks_file, "Type: push\n"); fprintf(sched_tasks_file, "Time: %.9f\n", current_timestamp); fprintf(sched_tasks_file, "Priority: %d\n", priority); if (options->file_rank < 0) fprintf(sched_tasks_file, "JobId: %u\n", task); else fprintf(sched_tasks_file, "JobId: %d_%u\n", options->file_rank, task); fprintf(sched_tasks_file, "\n"); } } static void handle_job_pop(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double current_timestamp = get_event_time_stamp(ev, options); unsigned task = ev->param[0]; int priority = ev->param[1]; curq_size--; nsubmitted--; _starpu_fxt_component_update_ntasks(nsubmitted, curq_size); if (!options->no_counter && out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; scheduler_container_alias(container, STARPU_POTI_STR_LEN, options->file_prefix); poti_SetVariable(current_timestamp, container, "nready", (double)curq_size); poti_SetVariable(current_timestamp, container, "nsubmitted", (double)nsubmitted); char paje_value[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%u", task); snprintf(container, sizeof(container), "%sp", options->file_prefix); if (!options->no_events) poti_NewEvent(get_event_time_stamp(ev, options), container, "po", paje_value); #else fprintf(out_paje_file, "13 %.9f %ssched nready %f\n", current_timestamp, options->file_prefix, (float)curq_size); fprintf(out_paje_file, "13 %.9f %ssched nsubmitted %f\n", current_timestamp, options->file_prefix, (float)nsubmitted); if (!options->no_events) fprintf(out_paje_file, "9 %.9f %s %sp %u\n", get_event_time_stamp(ev, options), "po", options->file_prefix, task); #endif } if (activity_file) { fprintf(activity_file, "cnt_ready\t%.9f\t%d\n", current_timestamp, curq_size); fprintf(activity_file, "cnt_submitted\t%.9f\t%d\n", current_timestamp, nsubmitted); } if (sched_tasks_file) { fprintf(sched_tasks_file, "Type: pop\n"); fprintf(sched_tasks_file, "Time: %.9f\n", current_timestamp); fprintf(sched_tasks_file, "Priority: %d\n", priority); if (options->file_rank < 0) fprintf(sched_tasks_file, "JobId: %u\n", task); else fprintf(sched_tasks_file, "JobId: %d_%u\n", options->file_rank, task); fprintf(sched_tasks_file, "\n"); } } static void handle_component_new(struct fxt_ev_64 *ev, struct starpu_fxt_options *options STARPU_ATTRIBUTE_UNUSED) { _starpu_fxt_component_new(ev->param[0], get_fxt_string(ev, 1)); } static void handle_component_connect(struct fxt_ev_64 *ev, struct starpu_fxt_options *options STARPU_ATTRIBUTE_UNUSED) { _starpu_fxt_component_connect(ev->param[0], ev->param[1]); } static void handle_component_push(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; double current_timestamp = get_event_time_stamp(ev, options); int workerid = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); _starpu_fxt_component_push(anim_file, options, current_timestamp, workerid, ev->param[1], ev->param[2], ev->param[3], ev->param[4]); } static void handle_component_pull(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; double current_timestamp = get_event_time_stamp(ev, options); int workerid = find_worker_id(prefixTOnodeid(prefix), ev->param[0]); _starpu_fxt_component_pull(anim_file, options, current_timestamp, workerid, ev->param[1], ev->param[2], ev->param[3], ev->param[4]); } static void handle_update_task_cnt(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double current_timestamp = get_event_time_stamp(ev, options); nsubmitted++; _starpu_fxt_component_update_ntasks(nsubmitted, curq_size); if (!options->no_counter && out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; scheduler_container_alias(container, STARPU_POTI_STR_LEN, options->file_prefix); poti_SetVariable(current_timestamp, container, "nsubmitted", (double)nsubmitted); #else fprintf(out_paje_file, "13 %.9f %ssched nsubmitted %f\n", current_timestamp, options->file_prefix, (float)nsubmitted); #endif } if (activity_file) fprintf(activity_file, "cnt_submitted\t%.9f\t%d\n", current_timestamp, nsubmitted); } static void handle_tag(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { uint64_t tag; unsigned long job; tag = ev->param[0]; job = ev->param[1]; if (options->label_deps) _starpu_fxt_dag_add_tag(options->file_prefix, tag, job, "tag"); else _starpu_fxt_dag_add_tag(options->file_prefix, tag, job, NULL); } static void handle_tag_deps(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { uint64_t child; uint64_t father; child = ev->param[0]; father = ev->param[1]; if (options->label_deps) _starpu_fxt_dag_add_tag_deps(options->file_prefix, child, father, "tag"); else _starpu_fxt_dag_add_tag_deps(options->file_prefix, child, father, NULL); } static void handle_task_deps(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long dep_prev = ev->param[0]; unsigned long dep_succ = ev->param[1]; unsigned dep_succ_type = ev->param[2]; char *name = get_fxt_string(ev,4); struct task_info *task = get_task(dep_succ, options->file_rank); struct task_info *prev_task = get_task(dep_prev, options->file_rank); unsigned alloc = 0; task->type = dep_succ_type; if (task->ndeps == 0) /* Start with 8=2^3, should be plenty in most cases */ alloc = 8; else if (task->ndeps >= 8) { /* Allocate dependencies array by powers of two */ if (! ((task->ndeps - 1) & task->ndeps)) /* Is task->ndeps a power of two? */ { /* We have filled the previous power of two, get another one */ alloc = task->ndeps * 2; } } if (alloc) { _STARPU_REALLOC(task->dependencies, sizeof(*task->dependencies) * alloc); _STARPU_REALLOC(task->dep_labels, sizeof(*task->dep_labels) * alloc); } task->dependencies[task->ndeps] = dep_prev; task->dep_labels[task->ndeps] = strdup(name); task->ndeps++; /* There is a dependency between both job id : dep_prev -> dep_succ */ if (show_task(task, options) && show_task(prev_task, options)) { if (!options->label_deps) name = NULL; /* We should show the name of the predecessor, then. */ prev_task->show = 1; _starpu_fxt_dag_add_task_deps(options->file_prefix, dep_prev, dep_succ, name); } } static void handle_task_end_dep(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long dep_prev = ev->param[0]; unsigned long dep_succ = ev->param[1]; struct task_info *task = get_task(dep_succ, options->file_rank); unsigned alloc = 0; if (task->nend_deps == 0) /* Start with 8=2^3, should be plenty in most cases */ alloc = 8; else if (task->nend_deps >= 8) { /* Allocate dependencies array by powers of two */ if (! ((task->nend_deps - 1) & task->nend_deps)) /* Is task->ndeps a power of two? */ { /* We have filled the previous power of two, get another one */ alloc = task->nend_deps * 2; } } if (alloc) { _STARPU_REALLOC(task->end_dependencies, sizeof(*task->end_dependencies) * alloc); } task->end_dependencies[task->nend_deps++] = dep_prev; if (!task->exclude_from_dag && show_task(task, options)) _starpu_fxt_dag_add_task_end_dep(options->file_prefix, dep_succ, dep_prev); } static void handle_task_submit(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long job_id = ev->param[0]; unsigned long iteration = ev->param[1]; unsigned long subiteration = ev->param[2]; unsigned long submit_order = ev->param[3]; long priority = (long) ev->param[4]; unsigned type = ev->param[5]; struct task_info *task = get_task(job_id, options->file_rank); task->submit_time = get_event_time_stamp(ev, options); task->submit_order = submit_order; task->priority = priority; task->iterations[0] = iteration; task->iterations[1] = subiteration; task->type = type; } static void handle_task_color(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long job_id = ev->param[0]; struct task_info *task = get_task(job_id, options->file_rank); int color = (long) ev->param[1]; task->color = color; } static void handle_task_exclude_from_dag(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long job_id = ev->param[0]; unsigned exclude_from_dag = ev->param[1]; struct task_info *task = get_task(job_id, options->file_rank); task->exclude_from_dag = exclude_from_dag; } static void handle_task_name(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long job_id = ev->param[0]; char *name = get_fxt_string(ev,2); char *prefix = options->file_prefix; struct task_info *task = get_task(job_id, options->file_rank); int worker = find_worker_id(prefixTOnodeid(prefix), ev->param[1]); const char *color; char buffer[32]; int code; if (task->color != 0) { snprintf(buffer, sizeof(buffer), "#%06x", task->color); color = &buffer[0]; code = ((task->color & 0xff) + ((task->color >> 8) & 0xff) + ((task->color >> 16) & 0xff)) / 256; } else if (options->per_task_colour) { unsigned red = get_color_symbol_red(name)/4; unsigned green = get_color_symbol_green(name)/4; unsigned blue = get_color_symbol_blue(name)/4; snprintf(buffer, sizeof(buffer), "#%s%x%s%x%s%x", red < 16 ? "0" : "", red, green < 16 ? "0" : "", green, blue < 16 ? "0" : "", blue); color = &buffer[0]; code = (red + green + blue) / 256; } else { color= (worker < 0)?"#aaaaaa":get_worker_color(worker); code = 0; } if (!task->name) task->name = strdup(name); char *fontcolor = code <= 1 ? "white" : "black"; if (!task->exclude_from_dag && show_task(task, options)) _starpu_fxt_dag_set_task_name(options->file_prefix, job_id, task->name, color, fontcolor); } #ifdef STARPU_BUBBLE static void handle_task_bubble(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long job_id = ev->param[0]; int is_bubble = (int)ev->param[1]; unsigned long bubble_parent = ev->param[2]; struct task_info *task = get_task(job_id, options->file_rank); task->is_bubble = is_bubble; task->bubble_parent = bubble_parent; if (!task->exclude_from_dag && show_task(task, options)) _starpu_fxt_dag_set_task_bubble(options->file_prefix, job_id, task->is_bubble, task->bubble_parent); } #endif static void handle_task_line(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned long job_id = ev->param[0]; int line = ev->param[1]; char *file = get_fxt_string(ev,2); struct task_info *task = get_task(job_id, options->file_rank); task->file = strdup(file); task->line = line; if (!task->exclude_from_dag && show_task(task, options)) _starpu_fxt_dag_set_task_line(options->file_prefix, job_id, task->file, line); } static void handle_task_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { /* Ideally, we would be able to dump tasks as they terminate, to save * memory. * We however may have to change their state later, e.g. the show field, * due to dependencies added way later. */ #if 0 unsigned long job_id; job_id = ev->param[0]; struct task_info *task = get_task(job_id, options->file_rank); task_dump(task, options); #else (void) ev; (void) options; #endif } static void handle_tag_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *prefix = options->file_prefix; uint64_t tag_id; tag_id = ev->param[0]; unsigned long has_name = ev->param[2]; char *name = has_name?get_fxt_string(ev,3):"unknown"; int worker = find_worker_id(prefixTOnodeid(prefix), ev->param[1]); const char *color; char buffer[32]; int code; if (options->per_task_colour) { unsigned red = get_color_symbol_red(name)/4; unsigned green = get_color_symbol_green(name)/4; unsigned blue = get_color_symbol_blue(name)/4; snprintf(buffer, sizeof(buffer), "#%s%x%s%x%s%x", red < 16 ? "0" : "", red, green < 16 ? "0" : "", green, blue < 16 ? "0" : "", blue); color = &buffer[0]; code = (red + green + blue) / 256; } else { color= (worker < 0)?"white":get_worker_color(worker); code = 1; } char *fontcolor = code <= 1 ? "white" : "black"; _starpu_fxt_dag_set_tag_done(options->file_prefix, tag_id, color, fontcolor); } static void handle_mpi_barrier(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { int rank = ev->param[0]; double sync_time = ev->param[3]; STARPU_ASSERT(rank == options->file_rank || options->file_rank == -1); /* Add an event in the trace */ if (out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN], paje_value[STARPU_POTI_STR_LEN]; snprintf(container, sizeof(container), "%sp", options->file_prefix); if (sync_time != 0) { snprintf(paje_value, sizeof(paje_value), "\"end of mpi_sync_clocks_barrier, rank %d\"", rank); poti_NewEvent(compute_time_stamp(sync_time, options), container, "prog_event", paje_value); } else { snprintf(paje_value, sizeof(paje_value), "\"end of MPI_Barrier, rank %d\"", rank); poti_NewEvent(get_event_time_stamp(ev, options), container, "prog_event", paje_value); } #else if (sync_time != 0) { fprintf(out_paje_file, "9 %.9f prog_event %sp \"end of mpi_sync_clocks_barrier, rank %d\"\n", compute_time_stamp(sync_time, options), options->file_prefix, rank); } else { fprintf(out_paje_file, "9 %.9f prog_event %sp \"end of MPI_Barrier, rank %d\"\n", get_event_time_stamp(ev, options), options->file_prefix, rank); } #endif } } static void show_mpi_thread(struct starpu_fxt_options *options) { char *prefix = options->file_prefix; if (out_paje_file) { double date = 0.; #ifdef STARPU_HAVE_POTI char program_container[STARPU_POTI_STR_LEN]; program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); char new_mpicommthread_container_alias[STARPU_POTI_STR_LEN]; mpicommthread_container_alias(new_mpicommthread_container_alias, STARPU_POTI_STR_LEN, prefix); snprintf(new_mpicommthread_container_alias, STARPU_POTI_STR_LEN, "%smpict", prefix); poti_CreateContainer(date, new_mpicommthread_container_alias, "MPICt", program_container, new_mpicommthread_container_alias); //set bandwidth variables to zero when they start poti_SetVariable(date, new_mpicommthread_container_alias, "bwi_mpi", 0.); poti_SetVariable(date, new_mpicommthread_container_alias, "bwo_mpi", 0.); #else fprintf(out_paje_file, "7 %.9f %smpict MPICt %sp %smpict\n", date, prefix, prefix, prefix); //set bandwidth variables to zero when they start fprintf(out_paje_file, "13 %.9f %smpict bwi_mpi 0.0\n", date, prefix); fprintf(out_paje_file, "13 %.9f %smpict bwo_mpi 0.0\n", date, prefix); #endif } } static void handle_mpi_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); char *prefix = options->file_prefix; register_mpi_thread(prefixTOnodeid(prefix), ev->param[2]); if (!(options->ninputfiles == 2 && options->file_rank == 1)) show_mpi_thread(options); do_mpicommthread_set_state(date, prefix, "Sl"); } static void handle_mpi_stop(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); char *prefix = options->file_prefix; if (out_paje_file) { #ifdef STARPU_HAVE_POTI char mpicommthread_container[STARPU_POTI_STR_LEN]; mpicommthread_container_alias(mpicommthread_container, STARPU_POTI_STR_LEN, prefix); poti_DestroyContainer(date, "MPICt", mpicommthread_container); #else fprintf(out_paje_file, "8 %.9f %smpict MPICt\n", date, prefix); #endif } } static void handle_mpi_isend_submit_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "SdS"); } static int mpi_warned; static void handle_mpi_isend_submit_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { unsigned type = ev->param[0]; int dest = ev->param[1]; int mpi_tag = ev->param[2]; size_t size = ev->param[3]; long jobid = ev->param[4]; unsigned long handle = ev->param[5]; int prio = ev->param[6]; double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "P"); if (options->file_rank < 0) { if (!mpi_warned) { _STARPU_MSG("Warning : Only one trace file is given. MPI transfers will not be displayed. Add all trace files to show them ! \n"); mpi_warned = 1; } } else _starpu_fxt_mpi_add_send_transfer(options->file_rank, dest, mpi_tag, size, date, jobid, handle, type, prio); } static void handle_mpi_isend_numa_node(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { int dest = ev->param[0]; long jobid = ev->param[1]; long numa_nodes_bitmap = ev->param[2]; if (options->file_rank >= 0) _starpu_fxt_mpi_send_transfer_set_numa_node(options->file_rank, dest, jobid, numa_nodes_bitmap); } static void handle_mpi_irecv_submit_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "RvS"); } static void handle_mpi_irecv_submit_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "P"); } static void handle_mpi_isend_complete_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "SdC"); } static void handle_mpi_isend_complete_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "P"); } static void handle_mpi_irecv_complete_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "RvC"); } static void handle_mpi_irecv_complete_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "P"); } static void handle_mpi_irecv_terminated(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { int src = ev->param[0]; int mpi_tag = ev->param[1]; long jobid = ev->param[2]; unsigned long handle = ev->param[4]; double date = get_event_time_stamp(ev, options); if (options->file_rank < 0) { if (!mpi_warned) { _STARPU_MSG("Warning : Only one trace file is given. MPI transfers will not be displayed. Add all trace files to show them ! \n"); mpi_warned = 1; } } else _starpu_fxt_mpi_add_recv_transfer(src, options->file_rank, mpi_tag, date, jobid, handle); } static void handle_mpi_irecv_numa_node(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { int src = ev->param[0]; long jobid = ev->param[1]; long numa_nodes_bitmap = ev->param[2]; if (options->file_rank >= 0) _starpu_fxt_mpi_recv_transfer_set_numa_node(src, options->file_rank, jobid, numa_nodes_bitmap); } static void handle_mpi_sleep_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "Sl"); } static void handle_mpi_sleep_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "Pl"); } static void handle_mpi_dtesting_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "DT"); } static void handle_mpi_dtesting_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "P"); } static void handle_mpi_utesting_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "UT"); } static void handle_mpi_utesting_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "P"); } static void handle_mpi_uwait_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "UW"); } static void handle_mpi_uwait_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_set_state(date, options->file_prefix, "P"); } static void handle_mpi_testing_detached_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_push_state(date, options->file_prefix, "TD"); } static void handle_mpi_testing_detached_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_pop_state(date, options->file_prefix); } static void handle_mpi_test_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_push_state(date, options->file_prefix, "MT"); } static void handle_mpi_test_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); do_mpicommthread_pop_state(date, options->file_prefix); } static void handle_mpi_polling_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); if (out_paje_file) mpicommthread_set_state(date, options->file_prefix, "Pl"); } static void handle_mpi_polling_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); if (out_paje_file) mpicommthread_set_state(date, options->file_prefix, "P"); } static void handle_mpi_driver_run_begin(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); if (out_paje_file) mpicommthread_set_state(date, options->file_prefix, "Dr"); } static void handle_mpi_driver_run_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { double date = get_event_time_stamp(ev, options); if (out_paje_file) mpicommthread_set_state(date, options->file_prefix, "Pl"); } static void handle_set_profiling(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { int status = ev->param[0]; if (activity_file) fprintf(activity_file, "set_profiling\t%.9f\t%d\n", get_event_time_stamp(ev, options), status); } static void handle_task_wait_for_all(void) { _starpu_fxt_dag_add_sync_point(); } static void handle_string_event(struct fxt_ev_64 *ev, const char *event, struct starpu_fxt_options *options) { /* Add an event in the trace */ if (out_paje_file) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; snprintf(container, sizeof(container), "%sp", options->file_prefix); poti_NewEvent(get_event_time_stamp(ev, options), container, "prog_event", event); #else fprintf(out_paje_file, "9 %.9f prog_event %sp \"%s\"\n", get_event_time_stamp(ev, options), options->file_prefix, event); #endif } if (trace_file) recfmt_dump_state(get_event_time_stamp(ev, options), "ProgEvent", -1, 0, event, "Program"); } static void handle_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { char *event = get_fxt_string(ev, 0); handle_string_event(ev, event, options); } static void handle_thread_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options) { /* Add an event in the trace */ if (out_paje_file) { char *event = get_fxt_string(ev, 1); #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; thread_container_alias(container, STARPU_POTI_STR_LEN, options->file_prefix, ev->param[0]); poti_NewEvent(get_event_time_stamp(ev, options), container, "thread_event", event); #else fprintf(out_paje_file, "9 %.9f thread_event %st%"PRIu64" %s\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[0], event); #endif } } static void _starpu_fxt_process_bandwidth(struct starpu_fxt_options *options) { char *prefix = options->file_prefix; struct _starpu_communication *itor, *next; /* Loop through completed communications */ for (itor = _starpu_communication_list_begin(&communication_list); itor != _starpu_communication_list_end(&communication_list); itor = next) { next = _starpu_communication_list_next(itor); if (!itor->peer) break; /* This communication is complete */ _starpu_communication_list_erase(&communication_list, itor); current_bandwidth_out_per_node[itor->src_node] += itor->bandwidth; if (out_paje_file) { #ifdef STARPU_HAVE_POTI char src_memnode_container[STARPU_POTI_STR_LEN]; memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->src_node); poti_SetVariable(itor->comm_start, src_memnode_container, "bwo_mm", current_bandwidth_out_per_node[itor->src_node]); #else fprintf(out_paje_file, "13 %.9f %smm%u bwo_mm %f\n", itor->comm_start, prefix, itor->src_node, current_bandwidth_out_per_node[itor->src_node]); #endif } current_bandwidth_in_per_node[itor->dst_node] += itor->bandwidth; if (out_paje_file) { #ifdef STARPU_HAVE_POTI char dst_memnode_container[STARPU_POTI_STR_LEN]; memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, itor->dst_node); poti_SetVariable(itor->comm_start, dst_memnode_container, "bwi_mm", current_bandwidth_in_per_node[itor->dst_node]); #else fprintf(out_paje_file, "13 %.9f %smm%u bwi_mm %f\n", itor->comm_start, prefix, itor->dst_node, current_bandwidth_in_per_node[itor->dst_node]); #endif } _starpu_communication_delete(itor); } } static void _starpu_fxt_process_computations(struct starpu_fxt_options *options) { char *prefix = options->file_prefix; /* Loop through completed computations */ struct _starpu_computation*itor; while (!_starpu_computation_list_empty(&computation_list) && _starpu_computation_list_begin(&computation_list)->peer) { /* This computation is complete */ itor = _starpu_computation_list_pop_front(&computation_list); if (out_paje_file && itor->comp_start != current_computation_time) { /* flush last value */ #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; scheduler_container_alias(container, STARPU_POTI_STR_LEN, prefix); poti_SetVariable(current_computation_time, container, "gft", (double)current_computation); #else fprintf(out_paje_file, "13 %.9f %ssched gft %f\n", current_computation_time, prefix, (float)current_computation); #endif } current_computation += itor->gflops; current_computation_time = itor->comp_start; _starpu_computation_delete(itor); } } static void _starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *options) { /* Open the trace file */ int fd_in; fd_in = open(filename_in, O_RDONLY); if (fd_in < 0) { STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_in, strerror(errno)); } static fxt_t fut; fut = fxt_fdopen(fd_in); if (!fut) { perror("fxt_fdopen :"); _exit(EXIT_FAILURE); } fxt_blockev_t block; block = fxt_blockev_enter(fut); char *prefix = options->file_prefix; /* TODO starttime ...*/ /* create the "program" container */ current_computation = 0.0; current_computation_time = 0.0; if (out_paje_file) { #ifdef STARPU_HAVE_POTI char new_program_container_alias[STARPU_POTI_STR_LEN], new_program_container_name[STARPU_POTI_STR_LEN]; program_container_alias(new_program_container_alias, STARPU_POTI_STR_LEN, prefix); snprintf(new_program_container_name, sizeof(new_program_container_name), "program %s", prefix); poti_CreateContainer (0, new_program_container_alias, "P", "MPIroot", new_program_container_name); char new_scheduler_container_alias[STARPU_POTI_STR_LEN], new_scheduler_container_name[STARPU_POTI_STR_LEN]; scheduler_container_alias(new_scheduler_container_alias, STARPU_POTI_STR_LEN, prefix); snprintf(new_scheduler_container_name, sizeof(new_scheduler_container_name), "%sscheduler", prefix); if (!options->no_counter || !options->no_flops) { poti_CreateContainer(0.0, new_scheduler_container_alias, "Sc", new_program_container_alias, new_scheduler_container_name); } if (!options->no_counter) { poti_SetVariable(0.0, new_scheduler_container_alias, "nsubmitted", 0.0); poti_SetVariable(0.0, new_scheduler_container_alias, "nready", 0.0); } if (!options->no_flops) { poti_SetVariable(0.0, new_scheduler_container_alias, "gft", 0.0); } #else fprintf(out_paje_file, "7 0.0 %sp P MPIroot %sprogram \n", prefix, prefix); if (!options->no_counter || !options->no_flops) { fprintf(out_paje_file, "7 0.0 %ssched Sc %sp %sscheduler\n", prefix, prefix, prefix); } if (!options->no_counter) { /* create a variable with the number of tasks */ fprintf(out_paje_file, "13 0.0 %ssched nsubmitted 0.0\n", prefix); fprintf(out_paje_file, "13 0.0 %ssched nready 0.0\n", prefix); } if (!options->no_flops) { fprintf(out_paje_file, "13 0.0 %ssched gft 0.0\n", prefix); } #endif } if ((options->ninputfiles == 2 && options->file_rank == 1)) /* put the mpi thread at the top, so MPI communications nicely show up in the middle */ show_mpi_thread(options); struct fxt_ev_64 ev; while(1) { unsigned i; int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev); for (i = ev.nb_params; i < FXT_MAX_PARAMS; i++) ev.param[i] = 0; if (ret != FXT_EV_OK) { break; } if (number_events_file != NULL) { assert(number_events != NULL); assert(ev.code <= FUT_SETUP_CODE); number_events[ev.code]++; } switch (ev.code) { case _STARPU_FUT_WORKER_INIT_START: handle_worker_init_start(&ev, options); break; case _STARPU_FUT_WORKER_INIT_END: handle_worker_init_end(&ev, options); break; case _STARPU_FUT_NEW_MEM_NODE: handle_new_mem_node(&ev, options); break; /* detect when the workers were idling or not */ case _STARPU_FUT_START_CODELET_BODY: handle_start_codelet_body(&ev, options); break; case _STARPU_FUT_MODEL_NAME: handle_model_name(&ev, options); break; case _STARPU_FUT_CODELET_DATA: handle_codelet_data(&ev, options); break; case _STARPU_FUT_CODELET_DATA_HANDLE: handle_codelet_data_handle(&ev, options); break; case _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS: handle_codelet_data_handle_numa_access(&ev, options); break; case _STARPU_FUT_CODELET_DETAILS: handle_codelet_details(&ev, options); break; case _STARPU_FUT_END_CODELET_BODY: handle_end_codelet_body(&ev, options); break; case _STARPU_FUT_START_EXECUTING: handle_start_executing(&ev, options); break; case _STARPU_FUT_END_EXECUTING: handle_end_executing(&ev, options); break; case _STARPU_FUT_START_PARALLEL_SYNC: handle_start_parallel_sync(&ev, options); break; case _STARPU_FUT_END_PARALLEL_SYNC: handle_end_parallel_sync(&ev, options); break; case _STARPU_FUT_START_CALLBACK: handle_start_callback(&ev, options); break; case _STARPU_FUT_END_CALLBACK: handle_end_callback(&ev, options); break; case _STARPU_FUT_UPDATE_TASK_CNT: handle_update_task_cnt(&ev, options); break; /* monitor stack size and generate sched_tasks.rec */ case _STARPU_FUT_JOB_PUSH: handle_job_push(&ev, options); break; case _STARPU_FUT_JOB_POP: handle_job_pop(&ev, options); break; case _STARPU_FUT_SCHED_COMPONENT_NEW: handle_component_new(&ev, options); break; case _STARPU_FUT_SCHED_COMPONENT_CONNECT: handle_component_connect(&ev, options); break; case _STARPU_FUT_SCHED_COMPONENT_PUSH: handle_component_push(&ev, options); break; case _STARPU_FUT_SCHED_COMPONENT_PULL: handle_component_pull(&ev, options); break; /* check the memory transfer overhead */ case _STARPU_FUT_START_FETCH_INPUT_ON_TID: handle_worker_status_on_tid(&ev, options, "Fi"); break; case _STARPU_FUT_START_PUSH_OUTPUT_ON_TID: handle_worker_status_on_tid(&ev, options, "Po"); break; case _STARPU_FUT_START_PROGRESS_ON_TID: handle_worker_status_on_tid(&ev, options, "P"); break; case _STARPU_FUT_START_UNPARTITION_ON_TID: handle_worker_status_on_tid(&ev, options, "U"); break; case _STARPU_FUT_END_FETCH_INPUT_ON_TID: case _STARPU_FUT_END_PROGRESS_ON_TID: case _STARPU_FUT_END_PUSH_OUTPUT_ON_TID: case _STARPU_FUT_END_UNPARTITION_ON_TID: handle_worker_status_on_tid(&ev, options, "B"); break; case _STARPU_FUT_START_FETCH_INPUT: handle_worker_status(&ev, options, "Fi"); break; case _STARPU_FUT_END_FETCH_INPUT: handle_worker_status(&ev, options, "B"); break; case _STARPU_FUT_WORKER_SCHEDULING_START: handle_worker_scheduling_start(&ev, options); break; case _STARPU_FUT_WORKER_SCHEDULING_END: handle_worker_scheduling_end(&ev, options); break; case _STARPU_FUT_WORKER_SCHEDULING_PUSH: handle_worker_scheduling_push(&ev, options); break; case _STARPU_FUT_WORKER_SCHEDULING_POP: handle_worker_scheduling_pop(&ev, options); break; case _STARPU_FUT_WORKER_SLEEP_START: handle_worker_sleep_start(&ev, options); break; case _STARPU_FUT_WORKER_SLEEP_END: handle_worker_sleep_end(&ev, options); break; case _STARPU_FUT_TAG: handle_tag(&ev, options); break; case _STARPU_FUT_TAG_DEPS: handle_tag_deps(&ev, options); break; case _STARPU_FUT_TASK_DEPS: handle_task_deps(&ev, options); break; case _STARPU_FUT_TASK_END_DEP: handle_task_end_dep(&ev, options); break; case _STARPU_FUT_TASK_SUBMIT: handle_task_submit(&ev, options); break; case _STARPU_FUT_TASK_BUILD_START: handle_task_submit_event(&ev, options, ev.param[0], "Bu"); break; case _STARPU_FUT_TASK_SUBMIT_START: handle_task_submit_event(&ev, options, ev.param[0], "Su"); break; case _STARPU_FUT_TASK_THROTTLE_START: handle_task_submit_event(&ev, options, ev.param[0], "Th"); break; case _STARPU_FUT_TASK_MPI_DECODE_START: handle_task_submit_event(&ev, options, ev.param[0], "MD"); break; case _STARPU_FUT_TASK_MPI_PRE_START: handle_task_submit_event(&ev, options, ev.param[0], "MPr"); break; case _STARPU_FUT_TASK_MPI_POST_START: handle_task_submit_event(&ev, options, ev.param[0], "MPo"); break; case _STARPU_FUT_TASK_WAIT_START: handle_task_submit_event(&ev, options, ev.param[1], "W"); break; case _STARPU_FUT_TASK_WAIT_FOR_ALL_START: handle_task_submit_event(&ev, options, ev.param[0], "WA"); break; case _STARPU_FUT_TASK_BUILD_END: case _STARPU_FUT_TASK_SUBMIT_END: case _STARPU_FUT_TASK_THROTTLE_END: case _STARPU_FUT_TASK_MPI_DECODE_END: case _STARPU_FUT_TASK_MPI_PRE_END: case _STARPU_FUT_TASK_MPI_POST_END: case _STARPU_FUT_TASK_WAIT_FOR_ALL_END: handle_task_submit_event(&ev, options, ev.param[0], NULL); break; case _STARPU_FUT_TASK_WAIT_END: handle_task_submit_event(&ev, options, ev.param[0], NULL); break; case _STARPU_FUT_TASK_EXCLUDE_FROM_DAG: handle_task_exclude_from_dag(&ev, options); break; case _STARPU_FUT_TASK_NAME: handle_task_name(&ev, options); break; #ifdef STARPU_BUBBLE case _STARPU_FUT_TASK_BUBBLE: handle_task_bubble(&ev, options); break; #endif case _STARPU_FUT_TASK_LINE: handle_task_line(&ev, options); break; case _STARPU_FUT_TASK_COLOR: handle_task_color(&ev, options); break; case _STARPU_FUT_TASK_DONE: handle_task_done(&ev, options); break; case _STARPU_FUT_TAG_DONE: handle_tag_done(&ev, options); break; case _STARPU_FUT_HANDLE_DATA_REGISTER: handle_data_register(&ev, options); break; case _STARPU_FUT_HANDLE_DATA_UNREGISTER: handle_data_unregister(&ev, options); break; case _STARPU_FUT_DATA_STATE_INVALID: if (options->memory_states) handle_data_state(&ev, options, "SI"); break; case _STARPU_FUT_DATA_STATE_OWNER: if (options->memory_states) handle_data_state(&ev, options, "SO"); break; case _STARPU_FUT_DATA_STATE_SHARED: if (options->memory_states) handle_data_state(&ev, options, "SS"); break; case _STARPU_FUT_DATA_REQUEST_CREATED: if (!options->no_bus && options->memory_states) { handle_data_request(&ev, options, "rc"); } break; case _STARPU_FUT_PAPI_TASK_EVENT_VALUE: handle_papi_event(&ev, options); break; case _STARPU_FUT_DATA_COPY: if (!options->no_bus) handle_data_copy(); break; case _STARPU_FUT_DATA_LOAD: break; case _STARPU_FUT_DATA_NAME: handle_data_name(&ev, options); break; case _STARPU_FUT_DATA_COORDINATES: handle_data_coordinates(&ev, options); break; case _STARPU_FUT_DATA_WONT_USE: handle_data_wont_use(&ev, options); break; case _STARPU_FUT_DATA_DOING_WONT_USE: if (options->memory_states) handle_data_doing_wont_use(&ev, options); break; case _STARPU_FUT_START_DRIVER_COPY: if (!options->no_bus) handle_start_driver_copy(&ev, options); break; case _STARPU_FUT_END_DRIVER_COPY: if (!options->no_bus) handle_end_driver_copy(&ev, options); break; case _STARPU_FUT_START_DRIVER_COPY_ASYNC: if (!options->no_bus) handle_start_driver_copy_async(&ev, options); break; case _STARPU_FUT_END_DRIVER_COPY_ASYNC: if (!options->no_bus) handle_end_driver_copy_async(&ev, options); break; case _STARPU_FUT_WORK_STEALING: handle_work_stealing(&ev, options); break; case _STARPU_FUT_WORKER_DEINIT_START: handle_worker_deinit_start(&ev, options); break; case _STARPU_FUT_WORKER_DEINIT_END: handle_worker_deinit_end(&ev, options); break; case _STARPU_FUT_START_ALLOC: if (!options->no_bus) { handle_push_memnode_event(&ev, options, "A"); handle_memnode_event_start_4(&ev, options, "Al"); } break; case _STARPU_FUT_START_ALLOC_REUSE: if (!options->no_bus) { handle_push_memnode_event(&ev, options, "Ar"); handle_memnode_event_start_4(&ev, options, "Alr"); } break; case _STARPU_FUT_END_ALLOC: if (!options->no_bus) { handle_pop_memnode_event(&ev, options); handle_memnode_event_end_3(&ev, options, "AlE"); } break; case _STARPU_FUT_END_ALLOC_REUSE: if (!options->no_bus) { handle_pop_memnode_event(&ev, options); handle_memnode_event_end_3(&ev, options, "AlrE"); } break; case _STARPU_FUT_START_FREE: if (!options->no_bus) { handle_push_memnode_event(&ev, options, "F"); handle_memnode_event_start_3(&ev, options, "Fe"); } break; case _STARPU_FUT_END_FREE: if (!options->no_bus) { handle_pop_memnode_event(&ev, options); handle_memnode_event_end_2(&ev, options, "FeE"); } break; case _STARPU_FUT_START_WRITEBACK: if (!options->no_bus) { handle_push_memnode_event(&ev, options, "W"); handle_memnode_event_start_2(&ev, options, "Wb"); } break; case _STARPU_FUT_END_WRITEBACK: if (!options->no_bus) { handle_pop_memnode_event(&ev, options); handle_memnode_event_start_2(&ev, options, "WbE"); } break; case _STARPU_FUT_START_WRITEBACK_ASYNC: if (!options->no_bus) handle_push_memnode_event(&ev, options, "Wa"); break; case _STARPU_FUT_END_WRITEBACK_ASYNC: if (!options->no_bus) handle_pop_memnode_event(&ev, options); break; case _STARPU_FUT_START_MEMRECLAIM: if (!options->no_bus) handle_push_memnode_event(&ev, options, "R"); break; case _STARPU_FUT_END_MEMRECLAIM: if (!options->no_bus) handle_pop_memnode_event(&ev, options); break; case _STARPU_FUT_USED_MEM: handle_used_mem(&ev, options); break; case _STARPU_FUT_USER_EVENT: if (!options->no_events) handle_user_event(&ev, options); break; case _STARPU_MPI_FUT_START: handle_mpi_start(&ev, options); break; case _STARPU_MPI_FUT_STOP: handle_mpi_stop(&ev, options); break; case _STARPU_MPI_FUT_BARRIER: handle_mpi_barrier(&ev, options); break; case _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN: handle_mpi_isend_submit_begin(&ev, options); break; case _STARPU_MPI_FUT_ISEND_SUBMIT_END: handle_mpi_isend_submit_end(&ev, options); break; case _STARPU_MPI_FUT_ISEND_NUMA_NODE: handle_mpi_isend_numa_node(&ev, options); break; case _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN: handle_mpi_irecv_submit_begin(&ev, options); break; case _STARPU_MPI_FUT_IRECV_SUBMIT_END: handle_mpi_irecv_submit_end(&ev, options); break; case _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN: handle_mpi_isend_complete_begin(&ev, options); break; case _STARPU_MPI_FUT_ISEND_COMPLETE_END: handle_mpi_isend_complete_end(&ev, options); break; case _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN: handle_mpi_irecv_complete_begin(&ev, options); break; case _STARPU_MPI_FUT_IRECV_COMPLETE_END: handle_mpi_irecv_complete_end(&ev, options); break; case _STARPU_MPI_FUT_ISEND_TERMINATED: break; case _STARPU_MPI_FUT_IRECV_TERMINATED: handle_mpi_irecv_terminated(&ev, options); break; case _STARPU_MPI_FUT_IRECV_NUMA_NODE: handle_mpi_irecv_numa_node(&ev, options); break; case _STARPU_MPI_FUT_SLEEP_BEGIN: handle_mpi_sleep_begin(&ev, options); break; case _STARPU_MPI_FUT_SLEEP_END: handle_mpi_sleep_end(&ev, options); break; case _STARPU_MPI_FUT_DTESTING_BEGIN: handle_mpi_dtesting_begin(&ev, options); break; case _STARPU_MPI_FUT_DTESTING_END: handle_mpi_dtesting_end(&ev, options); break; case _STARPU_MPI_FUT_UTESTING_BEGIN: handle_mpi_utesting_begin(&ev, options); break; case _STARPU_MPI_FUT_UTESTING_END: handle_mpi_utesting_end(&ev, options); break; case _STARPU_MPI_FUT_UWAIT_BEGIN: handle_mpi_uwait_begin(&ev, options); break; case _STARPU_MPI_FUT_UWAIT_END: handle_mpi_uwait_end(&ev, options); break; case _STARPU_MPI_FUT_DATA_SET_RANK: handle_mpi_data_set_rank(&ev, options); break; case _STARPU_MPI_FUT_DATA_SET_TAG: handle_mpi_data_set_tag(&ev, options); break; case _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN: handle_mpi_testing_detached_begin(&ev, options); break; case _STARPU_MPI_FUT_TESTING_DETACHED_END: handle_mpi_testing_detached_end(&ev, options); break; case _STARPU_MPI_FUT_TEST_BEGIN: handle_mpi_test_begin(&ev, options); break; case _STARPU_MPI_FUT_TEST_END: handle_mpi_test_end(&ev, options); break; case _STARPU_MPI_FUT_POLLING_BEGIN: handle_mpi_polling_begin(&ev, options); break; case _STARPU_MPI_FUT_POLLING_END: handle_mpi_polling_end(&ev, options); break; case _STARPU_MPI_FUT_DRIVER_RUN_BEGIN: handle_mpi_driver_run_begin(&ev, options); break; case _STARPU_MPI_FUT_DRIVER_RUN_END: handle_mpi_driver_run_end(&ev, options); break; case _STARPU_MPI_FUT_CHECKPOINT_BEGIN: handle_checkpoint_begin(&ev, options); break; case _STARPU_MPI_FUT_CHECKPOINT_END: handle_checkpoint_end(&ev, options); break; case _STARPU_FUT_SET_PROFILING: handle_set_profiling(&ev, options); break; case _STARPU_FUT_TASK_WAIT_FOR_ALL: handle_task_wait_for_all(); break; case _STARPU_FUT_EVENT: if (!options->no_events) handle_event(&ev, options); break; case _STARPU_FUT_THREAD_EVENT: if (!options->no_events) handle_thread_event(&ev, options); break; case _STARPU_FUT_LOCKING_MUTEX: break; case _STARPU_FUT_MUTEX_LOCKED: break; case _STARPU_FUT_UNLOCKING_MUTEX: break; case _STARPU_FUT_MUTEX_UNLOCKED: break; case _STARPU_FUT_TRYLOCK_MUTEX: break; case _STARPU_FUT_RDLOCKING_RWLOCK: break; case _STARPU_FUT_RWLOCK_RDLOCKED: break; case _STARPU_FUT_WRLOCKING_RWLOCK: break; case _STARPU_FUT_RWLOCK_WRLOCKED: break; case _STARPU_FUT_UNLOCKING_RWLOCK: break; case _STARPU_FUT_RWLOCK_UNLOCKED: break; case _STARPU_FUT_LOCKING_SPINLOCK: break; case _STARPU_FUT_SPINLOCK_LOCKED: break; case _STARPU_FUT_UNLOCKING_SPINLOCK: break; case _STARPU_FUT_SPINLOCK_UNLOCKED: break; case _STARPU_FUT_TRYLOCK_SPINLOCK: break; case _STARPU_FUT_COND_WAIT_BEGIN: break; case _STARPU_FUT_COND_WAIT_END: break; case _STARPU_FUT_BARRIER_WAIT_BEGIN: break; case _STARPU_FUT_BARRIER_WAIT_END: break; case _STARPU_FUT_MEMORY_FULL: break; case _STARPU_FUT_SCHED_COMPONENT_POP_PRIO: break; case _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO: break; case _STARPU_FUT_HYPERVISOR_BEGIN: handle_hypervisor_begin(&ev, options); break; case _STARPU_FUT_HYPERVISOR_END: handle_hypervisor_end(&ev, options); break; case FUT_SETUP_CODE: fut_keymask = ev.param[0]; break; case FUT_KEYCHANGE_CODE: fut_keymask = ev.param[0]; break; case FUT_START_FLUSH_CODE: handle_string_event(&ev, "fxt_start_flush", options); break; case FUT_STOP_FLUSH_CODE: handle_string_event(&ev, "fxt_stop_flush", options); break; /* We can safely ignore FUT internal events */ case FUT_CALIBRATE0_CODE: case FUT_CALIBRATE1_CODE: case FUT_CALIBRATE2_CODE: case FUT_NEW_LWP_CODE: case FUT_GCC_INSTRUMENT_ENTRY_CODE: break; default: #ifdef STARPU_VERBOSE _STARPU_MSG("unknown event.. %x at time %llx WITH OFFSET %llx\n", (unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time-options->file_offset.offset_start)); #endif break; } _starpu_fxt_process_bandwidth(options); if (!options->no_flops) _starpu_fxt_process_computations(options); } unsigned i; if (!options->no_flops) { /* computations are supposed to be over, unref any pending comp */ for (i = 0; i < STARPU_NMAXWORKERS; i++) { struct _starpu_computation *comp = ongoing_computation[i]; if (comp) { STARPU_ASSERT(!comp->peer); _starpu_computation_list_erase(&computation_list, comp); } } /* And flush completed computations */ _starpu_fxt_process_computations(options); } for (i = 0; i < STARPU_NMAXWORKERS; i++) { struct _starpu_computation *comp = ongoing_computation[i]; if (comp) { STARPU_ASSERT(!comp->peer); _starpu_computation_delete(comp); ongoing_computation[i] = 0; } } if (!options->no_bus) { while (!_starpu_communication_list_empty(&communication_list)) { struct _starpu_communication*itor; itor = _starpu_communication_list_pop_front(&communication_list); if (out_paje_file && !itor->peer) { /* Trace finished with this communication uncompleted, fake its termination */ unsigned comid = itor->comid; unsigned long size = itor->size; double time = current_computation_time; const char *link_type = itor->type; #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%lu", size); snprintf(paje_key, sizeof(paje_key), "com_%u", comid); char program_container[STARPU_POTI_STR_LEN]; program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix); #endif if (itor->bandwidth > 0) { unsigned dst = itor->dst_node; /* Fake termination of communication at end of time */ #ifdef STARPU_HAVE_POTI char dst_memnode_container[STARPU_POTI_STR_LEN]; memmanager_container_alias(dst_memnode_container, STARPU_POTI_STR_LEN, prefix, dst); poti_EndLink(time, program_container, link_type, dst_memnode_container, paje_value, paje_key); #else fprintf(out_paje_file, "19 %.9f %s %sp %lu %smm%u com_%u\n", time, link_type, prefix, size, prefix, dst, comid); #endif } else { /* Fake start of communication at start of time */ unsigned src = itor->src_node; unsigned X = _starpu_fxt_data_get_coord(itor->handle, options->file_rank, 0); unsigned Y = _starpu_fxt_data_get_coord(itor->handle, options->file_rank, 1); const char *name = _starpu_fxt_data_get_name(itor->handle, options->file_rank); if (!name) name = ""; #ifdef STARPU_HAVE_POTI char str_handle[STARPU_POTI_STR_LEN]; snprintf(str_handle, sizeof(str_handle), "%lx", itor->handle); char X_str[STARPU_POTI_STR_LEN]; snprintf(X_str, sizeof(X_str), "%u", X); char Y_str[STARPU_POTI_STR_LEN]; snprintf(Y_str, sizeof(Y_str), "%u", Y); char src_memnode_container[STARPU_POTI_STR_LEN]; memmanager_container_alias(src_memnode_container, STARPU_POTI_STR_LEN, prefix, src); poti_user_StartLink(_starpu_poti_CommLinkStart, 0., program_container, link_type, src_memnode_container, paje_value, paje_key, 4, str_handle, name, X_str, Y_str); #else fprintf(out_paje_file, "24 %.9f %s %sp %lu %smm%u com_%u %lx \"%s\" %u %u\n", 0., link_type, prefix, size, prefix, src, comid, itor->handle, name, X, Y); #endif } } _starpu_communication_delete(itor); } } if (out_paje_file && !options->no_flops) { for (i = 0; i < STARPU_NMAXWORKERS; i++) { if (last_codelet_end[i] != 0.0) { #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; worker_container_alias(container, STARPU_POTI_STR_LEN, prefix, i); poti_SetVariable(last_codelet_end[i], container, "gf", 0.); #else fprintf(out_paje_file, "13 %.9f %sw%u gf %f\n", last_codelet_end[i], prefix, i, 0.); #endif last_codelet_end[i] = 0.0; } } /* flush last value */ #ifdef STARPU_HAVE_POTI char container[STARPU_POTI_STR_LEN]; scheduler_container_alias(container, STARPU_POTI_STR_LEN, prefix); poti_SetVariable(current_computation_time, container, "gft", (double)current_computation); #else fprintf(out_paje_file, "13 %.9f %ssched gft %f\n", current_computation_time, prefix, (float)current_computation); #endif } { struct data_info *data=NULL, *tmp=NULL; HASH_ITER(hh, data_info, data, tmp) { data_dump(data); } } { struct task_info *task=NULL, *tmp=NULL; HASH_ITER(hh, tasks_info, task, tmp) { task_dump(task, options); } } for (i = 0; i < STARPU_NMAXWORKERS; i++) { free(options->worker_archtypes[i].devices); options->worker_archtypes[i].devices = NULL; } _starpu_fxt_component_deinit(); free_worker_ids(); #ifdef HAVE_FXT_BLOCKEV_LEAVE fxt_blockev_leave(block); #endif /* Close the trace file */ #ifdef HAVE_FXT_CLOSE fxt_close(fut); #else if (close(fd_in)) { perror("close failed :"); _exit(EXIT_FAILURE); } #endif } /* Initialize FxT options to default values */ void starpu_fxt_options_init(struct starpu_fxt_options *options) { memset(options, 0, sizeof(struct starpu_fxt_options)); options->out_paje_path = strdup("paje.trace"); options->dag_path = strdup("dag.dot"); options->tasks_path = strdup("tasks.rec"); options->comms_path = strdup("comms.rec"); options->data_path = strdup("data.rec"); options->papi_path = strdup("papi.rec"); options->anim_path = strdup("trace.html"); options->states_path = strdup("trace.rec"); options->distrib_time_path = strdup("distrib.data"); options->activity_path = strdup("activity.data"); options->sched_tasks_path = strdup("sched_tasks.rec"); } static void _set_dir(char *dir, char **option) { if (*option) { char *tmp = strdup(*option); free(*option); _STARPU_MALLOC(*option, 256); snprintf(*option, 256, "%s/%s", dir, tmp); free(tmp); } } static void _starpu_fxt_options_set_dir(struct starpu_fxt_options *options) { if (!options->dir) return; _starpu_mkpath_and_check(options->dir, S_IRWXU); _set_dir(options->dir, &options->out_paje_path); _set_dir(options->dir, &options->dag_path); _set_dir(options->dir, &options->tasks_path); _set_dir(options->dir, &options->comms_path); _set_dir(options->dir, &options->number_events_path); _set_dir(options->dir, &options->data_path); _set_dir(options->dir, &options->papi_path); _set_dir(options->dir, &options->anim_path); _set_dir(options->dir, &options->states_path); _set_dir(options->dir, &options->distrib_time_path); _set_dir(options->dir, &options->activity_path); _set_dir(options->dir, &options->sched_tasks_path); } void starpu_fxt_options_shutdown(struct starpu_fxt_options *options) { free(options->out_paje_path); free(options->dag_path); free(options->tasks_path); free(options->comms_path); free(options->number_events_path); free(options->data_path); free(options->papi_path); free(options->anim_path); free(options->states_path); free(options->distrib_time_path); free(options->activity_path); free(options->sched_tasks_path); } static void _starpu_fxt_distrib_file_init(struct starpu_fxt_options *options) { dumped_codelets_count = 0; dumped_codelets = NULL; if (options->distrib_time_path) { distrib_time = fopen(options->distrib_time_path, "w+"); if (distrib_time == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->distrib_time_path, strerror(errno)); } else { distrib_time = NULL; } } static void _starpu_fxt_distrib_file_close(struct starpu_fxt_options *options) { if (distrib_time) fclose(distrib_time); if (options->dumped_codelets) { *options->dumped_codelets = dumped_codelets; options->dumped_codelets_count = dumped_codelets_count; } } static void _starpu_fxt_activity_file_init(struct starpu_fxt_options *options) { if (options->activity_path) { activity_file = fopen(options->activity_path, "w+"); if (activity_file == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->activity_path, strerror(errno)); } else activity_file = NULL; } static void _starpu_fxt_sched_tasks_file_init(struct starpu_fxt_options *options) { if (options->sched_tasks_path) { sched_tasks_file = fopen(options->sched_tasks_path, "w+"); if (sched_tasks_file == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->sched_tasks_path, strerror(errno)); } else sched_tasks_file = NULL; } static void _starpu_fxt_anim_file_init(struct starpu_fxt_options *options) { if (options->anim_path) { anim_file = fopen(options->anim_path, "w+"); if (anim_file == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->anim_path, strerror(errno)); _starpu_fxt_component_print_header(anim_file); } else anim_file = NULL; } static void _starpu_fxt_tasks_file_init(struct starpu_fxt_options *options) { if (options->tasks_path) { tasks_file = fopen(options->tasks_path, "w+"); if (tasks_file == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->tasks_path, strerror(errno)); } else tasks_file = NULL; } static void _starpu_fxt_data_file_init(struct starpu_fxt_options *options) { if (options->data_path) { data_file = fopen(options->data_path, "w+"); if (data_file == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->data_path, strerror(errno)); } else data_file = NULL; } static void _starpu_fxt_comms_file_init(struct starpu_fxt_options *options) { if (options->comms_path) { comms_file = fopen(options->comms_path, "w+"); if (comms_file == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->comms_path, strerror(errno)); } else comms_file = NULL; } static void _starpu_fxt_number_events_file_init(struct starpu_fxt_options *options) { if (options->number_events_path) { number_events_file = fopen(options->number_events_path, "w+"); if (number_events_file == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->number_events_path, strerror(errno)); /* FUT_SETUP_CODE is the event with the maximal value */ _STARPU_CALLOC(number_events, FUT_SETUP_CODE+1, sizeof(uint64_t)); } else number_events_file = NULL; } static void _starpu_fxt_papi_file_init(struct starpu_fxt_options *options) { #ifdef STARPU_PAPI if (options->papi_path) { papi_file = fopen(options->papi_path, "w+"); if (papi_file == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->papi_path, strerror(errno)); } else papi_file = NULL; #else (void) options; // avoid warning about unused variable #endif } static void _starpu_fxt_write_trace_header(FILE *f) { fprintf(f, "#\n"); fprintf(f, "# E: Event type\n"); fprintf(f, "# N: Event name\n"); fprintf(f, "# C: Event category\n"); fprintf(f, "# W: Worker ID\n"); fprintf(f, "# T: Thread ID\n"); fprintf(f, "# S: Start time\n"); fprintf(f, "#\n"); fprintf(f, "\n"); } static void _starpu_fxt_trace_file_init(struct starpu_fxt_options *options) { if (options->states_path) { trace_file = fopen(options->states_path, "w+"); if (trace_file == NULL) STARPU_ABORT_MSG("Failed to open '%s' (err %s)", options->states_path, strerror(errno)); } else trace_file = NULL; if (trace_file) _starpu_fxt_write_trace_header(trace_file); } static void _starpu_fxt_activity_file_close(void) { if (activity_file) fclose(activity_file); } static void _starpu_fxt_sched_tasks_file_close(void) { if (sched_tasks_file) fclose(sched_tasks_file); } static void _starpu_fxt_anim_file_close(void) { //_starpu_fxt_component_dump(stderr); if (anim_file) { _starpu_fxt_component_finish(anim_file); fclose(anim_file); } } static void _starpu_fxt_tasks_file_close(void) { if (tasks_file) fclose(tasks_file); } static void _starpu_fxt_comms_file_close(void) { if (comms_file) fclose(comms_file); } static void _starpu_fxt_number_events_file_close(void) { if (number_events_file) { int i; assert(number_events != NULL); fprintf(number_events_file, "# Use starpu_fxt_number_events_to_names.py to convert event keys to event names.\n"); for (i = 0; i <= FUT_SETUP_CODE; i++) { if (number_events[i] > 0) fprintf(number_events_file, "0x%x\t%"PRIu64"\n", i, number_events[i]); } free(number_events); number_events = NULL; fclose(number_events_file); } } static void _starpu_fxt_data_file_close(void) { if (data_file) fclose(data_file); } static void _starpu_fxt_papi_file_close(void) { #ifdef STARPU_PAPI if (papi_file) fclose(papi_file); #endif } static void _starpu_fxt_trace_file_close(void) { if (trace_file) fclose(trace_file); } static void _starpu_fxt_paje_file_init(struct starpu_fxt_options *options) { /* create a new file */ if (options->out_paje_path) { out_paje_file = fopen(options->out_paje_path, "w+"); if (!out_paje_file) { _STARPU_MSG("error while opening %s\n", options->out_paje_path); perror("fopen"); _exit(EXIT_FAILURE); } #ifdef STARPU_HAVE_POTI #ifdef HAVE_POTI_INIT_CUSTOM fclose(out_paje_file); poti_init_custom(options->out_paje_path, 0, //if false, allow extended events 1, //if true, an old header (pj_dump -n) 0, //if false, the trace has no comments 1, //if true, events have aliases 1);//if true, relative timestamps #else poti_init(out_paje_file); #endif #endif _starpu_fxt_write_paje_header(out_paje_file, options); } else { out_paje_file = NULL; } /* create lists for symbols (kernel states) and communications */ _starpu_symbol_name_list_init(&symbol_list); _starpu_communication_list_init(&communication_list); if (!options->no_flops) _starpu_computation_list_init(&computation_list); } static void _starpu_fxt_paje_file_close(void) { struct _starpu_symbol_name *itor, *next; for (itor = _starpu_symbol_name_list_begin(&symbol_list); itor != _starpu_symbol_name_list_end(&symbol_list); itor = next) { next = _starpu_symbol_name_list_next(itor); _starpu_symbol_name_list_erase(&symbol_list, itor); free(itor->name); _starpu_symbol_name_delete(itor); } if (out_paje_file) fclose(out_paje_file); } static uint64_t _starpu_fxt_find_start_time(char *filename_in) { /* Open the trace file */ int fd_in; fd_in = open(filename_in, O_RDONLY); if (fd_in < 0) { STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_in, strerror(errno)); } static fxt_t fut; fut = fxt_fdopen(fd_in); if (!fut) { perror("fxt_fdopen :"); _exit(EXIT_FAILURE); } fxt_blockev_t block; block = fxt_blockev_enter(fut); struct fxt_ev_64 ev; int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev); STARPU_ASSERT(ret == FXT_EV_OK); #ifdef HAVE_FXT_BLOCKEV_LEAVE fxt_blockev_leave(block); #endif /* Close the trace file */ #ifdef HAVE_FXT_CLOSE fxt_close(fut); #else if (close(fd_in)) { perror("close failed :"); _exit(EXIT_FAILURE); } #endif return (ev.time); } struct inputrank { int input; int rank; }; static int inputrank_compar(const void *_a, const void *_b) { const struct inputrank *a = _a; const struct inputrank *b = _b; return a->rank - b->rank; } void starpu_fxt_generate_trace(struct starpu_fxt_options *options) { starpu_drivers_preinit(); _starpu_fxt_options_set_dir(options); _starpu_fxt_dag_init(options->dag_path); _starpu_fxt_distrib_file_init(options); _starpu_fxt_activity_file_init(options); _starpu_fxt_sched_tasks_file_init(options); _starpu_fxt_anim_file_init(options); _starpu_fxt_tasks_file_init(options); _starpu_fxt_data_file_init(options); _starpu_fxt_papi_file_init(options); _starpu_fxt_comms_file_init(options); _starpu_fxt_number_events_file_init(options); _starpu_fxt_trace_file_init(options); _starpu_fxt_paje_file_init(options); if (options->ninputfiles == 0) { return; } else if (options->ninputfiles == 1) { /* we usually only have a single trace */ uint64_t file_start_time = _starpu_fxt_find_start_time(options->filenames[0]); options->file_prefix = strdup(""); options->file_offset.nb_barriers = 0; options->file_offset.offset_start = -file_start_time; options->file_rank = -1; _starpu_fxt_parse_new_file(options->filenames[0], options); } else { unsigned inputfile, i; /* * Find the trace offsets: * - If there is no sync point * psi_k(x) = x - start_k * - If there is one sync point sync_k * psi_k(x) = x - sync_k + M * where M = max { sync_i - start_i | there exists sync_i} * - If there are two sync points: * Two offsets are computed, and then offset is interpolated * and applied in get_event_timestamp() for each timestamp. * More generally: * - psi_k(x) = x - offset_k */ int unique_keys[options->ninputfiles]; int rank_k[options->ninputfiles]; uint64_t start_k[options->ninputfiles]; struct starpu_fxt_mpi_offset sync_barriers[options->ninputfiles]; uint64_t M_start = 0; uint64_t M_end = 0; int key = -1; unsigned display_mpi = 0; /* Get all trace starts */ for (inputfile = 0; inputfile < options->ninputfiles; inputfile++) { uint64_t file_start = _starpu_fxt_find_start_time(options->filenames[inputfile]); start_k[inputfile] = file_start; } /* Look for all synchronization points, if they exist */ for (inputfile = 0; inputfile < options->ninputfiles; inputfile++) { sync_barriers[inputfile] = _starpu_fxt_mpi_find_sync_points(options->filenames[inputfile], &unique_keys[inputfile], &rank_k[inputfile]); if (sync_barriers[inputfile].nb_barriers > 0) { /* Let's start by making sure all trace files come from the same execution: */ if (key == -1) { key = unique_keys[inputfile]; // key is in [0, RAND_MAX] display_mpi = 1; } else if (key != unique_keys[inputfile]) { _STARPU_MSG("Warning: traces are coming from different run so we will not try to display MPI communications.\n"); display_mpi = 0; } /* Find what is the most important duration between start of the trace and sync point. * (see below why we need this information) */ STARPU_ASSERT(sync_barriers[inputfile].local_time_start >= start_k[inputfile]); uint64_t diff = sync_barriers[inputfile].local_time_start - start_k[inputfile]; if (diff > M_start) { M_start = diff; } if (sync_barriers[inputfile].nb_barriers == 2) { STARPU_ASSERT(sync_barriers[inputfile].local_time_end >= sync_barriers[inputfile].local_time_start); diff = sync_barriers[inputfile].local_time_end - start_k[inputfile]; if (diff > M_end) { M_end = diff; } } } } /* Compute the offset for each trace file. * Note: offsets will be applied with the following formula: * t_corrected = t + offset * The offset represents two steps: * 1. It changes the time origin of timestamps to the local sync * point time (since we are sure the sync point occurred at the same * global time on each node, it is a valid reference point), hence: * offset[k] = -sync_point[k] * 2. This will make timestamp of events before the sync point * happening before 0. We correct this by adding to the offset the * largest time difference between trace start and sync point among * all trace files (after step 1., it is the start time which is the * most in the past, so by taking this value, we are sure all events * in all processes will have a positive timestamp), hence: * offset[k] += M */ for (inputfile = 0; inputfile < options->ninputfiles; inputfile++) { if (sync_barriers[inputfile].nb_barriers) { sync_barriers[inputfile].offset_start = -sync_barriers[inputfile].local_time_start + M_start; if (sync_barriers[inputfile].nb_barriers == 2) { sync_barriers[inputfile].offset_end = -sync_barriers[inputfile].local_time_end + M_end; } } else { sync_barriers[inputfile].offset_start = -start_k[inputfile]; } } /* Sort input files by rank */ struct inputrank inputrank[options->ninputfiles]; for (inputfile = 0; inputfile < options->ninputfiles; inputfile++) { inputrank[inputfile].input = inputfile; inputrank[inputfile].rank = rank_k[inputfile]; } qsort(inputrank, options->ninputfiles, sizeof(inputrank[0]), inputrank_compar); int maxrank = inputrank[options->ninputfiles-1].rank; int logn; if (maxrank == 0) logn = 1; else logn = log10(maxrank)+1; /* generate the Paje trace for the different files */ for (i = 0; i < options->ninputfiles; i++) { inputfile = inputrank[i].input; int filerank = rank_k[inputfile]; STARPU_ASSERT(filerank == inputrank[i].rank); _STARPU_DISP("Parsing file %s (rank %0*d)\n", options->filenames[inputfile], logn, filerank); char file_prefix[32]; snprintf(file_prefix, sizeof(file_prefix), "%0*d_", logn, filerank); free(options->file_prefix); options->file_prefix = strdup(file_prefix); options->file_offset = sync_barriers[inputfile]; options->file_rank = filerank; _starpu_fxt_parse_new_file(options->filenames[inputfile], options); } /* display the MPI transfers if possible */ if (display_mpi) _starpu_fxt_display_mpi_transfers(options, rank_k, out_paje_file, comms_file); } /* close the different files */ _starpu_fxt_paje_file_close(); _starpu_fxt_activity_file_close(); _starpu_fxt_sched_tasks_file_close(); _starpu_fxt_distrib_file_close(options); _starpu_fxt_anim_file_close(); _starpu_fxt_tasks_file_close(); _starpu_fxt_data_file_close(); _starpu_fxt_papi_file_close(); _starpu_fxt_comms_file_close(); _starpu_fxt_number_events_file_close(); _starpu_fxt_trace_file_close(); _starpu_fxt_dag_terminate(); options->nworkers = nworkers; free(options->file_prefix); } #define DATA_STR_MAX_SIZE 15 struct parse_task { unsigned exec_time; unsigned data_total; unsigned workerid; char *codelet_name; }; static struct parse_task tasks[STARPU_NMAXWORKERS]; static struct starpu_data_trace_kernel { UT_hash_handle hh; char *name; FILE *file; } *kernels; static struct starpu_data_trace_kernel_job { UT_hash_handle hh; int jobid; char *name; } *kernel_jobs; static void record_kernel_job_name(int jobid, char *name) { struct starpu_data_trace_kernel_job *kernel_job; HASH_FIND_INT(kernel_jobs, &jobid, kernel_job); if (kernel_job == NULL) { _STARPU_MALLOC(kernel_job, sizeof(*kernel_job)); kernel_job->jobid = jobid; HASH_ADD_INT(kernel_jobs, jobid, kernel_job); } else { free(kernel_job->name); } kernel_job->name = strdup(name); } static char *extract_kernel_job_name(int jobid) { char *name = NULL; struct starpu_data_trace_kernel_job *kernel_job; HASH_FIND_INT(kernel_jobs, &jobid, kernel_job); if (kernel_job != NULL) { name = kernel_job->name; HASH_DEL(kernel_jobs, kernel_job); free(kernel_job); } return name; } #define NANO_SEC_TO_MILI_SEC 0.000001 static FILE *codelet_list; static void write_task(char *dir, struct parse_task *pt) { struct starpu_data_trace_kernel *kernel; char *codelet_name = pt->codelet_name; HASH_FIND_STR(kernels, codelet_name, kernel); //fprintf(stderr, "%p %p %s\n", kernel, kernels, codelet_name); if(kernel == NULL) { _STARPU_MALLOC(kernel, sizeof(*kernel)); kernel->name = strdup(codelet_name); char filename[256]; snprintf(filename, sizeof(filename), "%s/%s", dir, kernel->name); //fprintf(stderr, "%s\n", kernel->name); kernel->file = fopen(filename, "w+"); if(!kernel->file) { STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename, strerror(errno)); } HASH_ADD_STR(kernels, name, kernel); fprintf(codelet_list, "%s\n", codelet_name); } double time = pt->exec_time * NANO_SEC_TO_MILI_SEC; fprintf(kernel->file, "%lf %u %u\n", time, pt->data_total, pt->workerid); } void starpu_fxt_write_data_trace_in_dir(char *filename_in, char *dir) { int fd_in; fd_in = open(filename_in, O_RDONLY); if (fd_in < 0) { STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_in, strerror(errno)); } static fxt_t fut; fut = fxt_fdopen(fd_in); if (!fut) { perror("fxt_fdopen :"); _exit(EXIT_FAILURE); } char filename_out[512]; snprintf(filename_out, sizeof(filename_out), "%s/codelet_list", dir); codelet_list = fopen(filename_out, "w+"); if(!codelet_list) { STARPU_ABORT_MSG("Failed to open '%s' (err %s)", filename_out, strerror(errno)); } fxt_blockev_t block; block = fxt_blockev_enter(fut); while(1) { unsigned i; struct fxt_ev_64 ev; int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev); for (i = ev.nb_params; i < FXT_MAX_PARAMS; i++) ev.param[i] = 0; if (ret != FXT_EV_OK) { break; } switch (ev.code) { case _STARPU_FUT_WORKER_INIT_START: register_worker_id(0 /* TODO: Add nodeid here instead */, ev.param[6], ev.param[1], ev.param[5]); break; case _STARPU_FUT_TASK_NAME: { int jobid = (int)ev.param[0]; char *name = get_fxt_string(&ev,2); record_kernel_job_name(jobid, name); } break; case _STARPU_FUT_START_CODELET_BODY: { int workerid = ev.param[2]; tasks[workerid].workerid = (unsigned)workerid; tasks[workerid].exec_time = ev.time; } break; case _STARPU_FUT_END_CODELET_BODY: { int jobid = (int)ev.param[0]; int workerid = ev.param[3]; assert(workerid != -1); tasks[workerid].exec_time = ev.time - tasks[workerid].exec_time; char *name = extract_kernel_job_name(jobid); if (name == NULL) { name = strdup("unknown"); } tasks[workerid].codelet_name = name; write_task(dir, &tasks[workerid]); /* codelet_name is copied in write_task() when needed */ tasks[workerid].codelet_name = NULL; free(name); } break; case _STARPU_FUT_DATA_LOAD: { int workerid = ev.param[0]; tasks[workerid].data_total = ev.param[1]; } break; default: #ifdef STARPU_VERBOSE _STARPU_MSG("unknown event.. %x at time %llx WITH OFFSET %llx\n", (unsigned)ev.code, (long long unsigned)ev.time, (long long unsigned)(ev.time)); #endif break; } } #ifdef HAVE_FXT_BLOCKEV_LEAVE fxt_blockev_leave(block); #endif #ifdef HAVE_FXT_CLOSE fxt_close(fut); #else if (close(fd_in)) { perror("close failed :"); _exit(EXIT_FAILURE); } #endif if(fclose(codelet_list)) { perror("close failed :"); _exit(EXIT_FAILURE); } unsigned i; for (i = 0; i < STARPU_NMAXWORKERS; i++) free(tasks[i].codelet_name); free_worker_ids(); { struct starpu_data_trace_kernel *kernel=NULL, *tmp=NULL; HASH_ITER(hh, kernels, kernel, tmp) { if(fclose(kernel->file)) { perror("close failed :"); _exit(EXIT_FAILURE); } HASH_DEL(kernels, kernel); free(kernel->name); free(kernel); } } { struct starpu_data_trace_kernel_job *kernel_job=NULL, *tmp=NULL; HASH_ITER(hh, kernel_jobs, kernel_job, tmp) { HASH_DEL(kernel_jobs, kernel_job); free(kernel_job->name); free(kernel_job); } } } void starpu_fxt_write_data_trace(char *filename_in) { starpu_fxt_write_data_trace_in_dir(filename_in, "."); } #endif // STARPU_USE_FXT starpu-1.4.9+dfsg/src/debug/traces/starpu_fxt.h000066400000000000000000000115201507764646700215150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2018-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU__FXT_H__ #define __STARPU__FXT_H__ /** @file */ #include #include #include #ifdef STARPU_USE_FXT #include #include #include #include #include #include #include #include #include #include "../mpi/src/starpu_mpi_fxt.h" #include #include "../../../include/starpu_fxt.h" #ifdef STARPU_HAVE_POTI #include #define STARPU_POTI_STR_LEN 200 #endif #define STARPU_TRACE_STR_LEN 200 #pragma GCC visibility push(hidden) extern char _starpu_last_codelet_symbol[STARPU_NMAXWORKERS][(FXT_MAX_PARAMS-5)*sizeof(unsigned long)]; void _starpu_fxt_dag_init(char *dag_filename); void _starpu_fxt_dag_terminate(void); void _starpu_fxt_dag_add_tag(const char *prefix, uint64_t tag, unsigned long job_id, const char *label); void _starpu_fxt_dag_add_tag_deps(const char *prefix, uint64_t child, uint64_t father, const char *label); void _starpu_fxt_dag_set_tag_done(const char *prefix, uint64_t tag, const char *color, const char *fontcolor); void _starpu_fxt_dag_add_task_deps(const char *prefix, unsigned long dep_prev, unsigned long dep_succ, const char *label); void _starpu_fxt_dag_add_task_end_dep(const char *prefix, unsigned long prev, unsigned long succ); void _starpu_fxt_dag_set_task_name(const char *prefix, unsigned long job_id, const char *label, const char *color, const char *fontcolor); #ifdef STARPU_BUBBLE void _starpu_fxt_dag_set_task_bubble(const char *prefix, unsigned long job_id, int is_bubble, unsigned long bubble_parent); #endif void _starpu_fxt_dag_set_task_line(const char *prefix, unsigned long job_id, const char *file, int line); void _starpu_fxt_dag_add_send(int src, unsigned long dep_prev, unsigned long tag, unsigned long id); void _starpu_fxt_dag_add_receive(int dst, unsigned long dep_prev, unsigned long tag, unsigned long id); void _starpu_fxt_dag_add_sync_point(void); unsigned _starpu_fxt_data_get_coord(unsigned long handle, int mpi_rank, unsigned dim); const char * _starpu_fxt_data_get_name(unsigned long handle, int mpi_rank); void _starpu_convert_numa_nodes_bitmap_to_str(long bitmap, char str[]); /* * MPI */ struct starpu_fxt_mpi_offset _starpu_fxt_mpi_find_sync_points(char *filename_in, int *key, int *rank); void _starpu_fxt_mpi_add_send_transfer(int src, int dst, long mpi_tag, size_t size, float date, long jobid, unsigned long handle, unsigned type, int prio); void _starpu_fxt_mpi_send_transfer_set_numa_node(int src, int dest, long jobid, long numa_nodes_bitmap); void _starpu_fxt_mpi_add_recv_transfer(int src, int dst, long mpi_tag, float date, long jobid, unsigned long handle); void _starpu_fxt_mpi_recv_transfer_set_numa_node(int src, int dst, long jobid, long numa_nodes_bitmap); void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks, FILE *out_paje_file, FILE* out_comms_file); void _starpu_fxt_write_paje_header(FILE *file, struct starpu_fxt_options *options); extern int _starpu_poti_extendedSetState; extern int _starpu_poti_semiExtendedSetState; extern int _starpu_poti_MemoryEvent; extern int _starpu_poti_CommLinkStart; extern int _starpu_poti_MpiLinkStart; extern int _starpu_poti_checkPointState; extern int _starpu_poti_JobState; /* * Animation */ void _starpu_fxt_component_print_header(FILE *output); void _starpu_fxt_component_new(uint64_t component, char *name); void _starpu_fxt_component_connect(uint64_t parent, uint64_t child); void _starpu_fxt_component_update_ntasks(unsigned nsubmitted, unsigned curq_size); void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task, unsigned prio); void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task, unsigned prio); void _starpu_fxt_component_dump(FILE *output); void _starpu_fxt_component_finish(FILE *output); void _starpu_fxt_component_deinit(void); #pragma GCC visibility pop #endif // STARPU_USE_FXT #endif // __STARPU__FXT_H__ starpu-1.4.9+dfsg/src/debug/traces/starpu_fxt_dag.c000066400000000000000000000117151507764646700223310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef STARPU_USE_FXT #include "starpu_fxt.h" static FILE *out_file; static unsigned cluster_cnt; void _starpu_fxt_dag_init(char *out_path) { if (!out_path) { out_file = NULL; return; } /* create a new file */ out_file = fopen(out_path, "w+"); if (!out_file) { _STARPU_MSG("error while opening %s\n", out_path); perror("fopen"); _exit(EXIT_FAILURE); } cluster_cnt = 0; fprintf(out_file, "digraph G {\n"); fprintf(out_file, "\tcolor=white\n"); fprintf(out_file, "\trankdir=LR;\n"); /* Create a new cluster */ fprintf(out_file, "subgraph cluster_%u {\n", cluster_cnt); fprintf(out_file, "\tcolor=black;\n"); } void _starpu_fxt_dag_terminate(void) { if (!out_file) return; /* Close the last cluster */ fprintf(out_file, "}\n"); /* Close the graph */ fprintf(out_file, "}\n"); fclose(out_file); } void _starpu_fxt_dag_add_tag(const char *prefix, uint64_t tag, unsigned long job_id, const char *label) { if (out_file) { if (label) fprintf(out_file, "\t \"tag_%s%llx\"->\"task_%s%lu\"->\"tag_%s%llx\" [style=dashed] [label=\"%s\"]\n", prefix, (unsigned long long)tag, prefix, (unsigned long)job_id, prefix, (unsigned long long) tag, label); else fprintf(out_file, "\t \"tag_%s%llx\"->\"task_%s%lu\"->\"tag_%s%llx\" [style=dashed]\n", prefix, (unsigned long long)tag, prefix, (unsigned long)job_id, prefix, (unsigned long long) tag); } } void _starpu_fxt_dag_add_tag_deps(const char *prefix, uint64_t child, uint64_t father, const char *label) { if (out_file) { if (label) fprintf(out_file, "\t \"tag_%s%llx\"->\"tag_%s%llx\" [label=\"%s\"]\n", prefix, (unsigned long long)father, prefix, (unsigned long long)child, label); else fprintf(out_file, "\t \"tag_%s%llx\"->\"tag_%s%llx\"\n", prefix, (unsigned long long)father, prefix, (unsigned long long)child); } } void _starpu_fxt_dag_add_task_deps(const char *prefix, unsigned long dep_prev, unsigned long dep_succ, const char *label) { if (out_file) { if (label) fprintf(out_file, "\t \"task_%s%lu\"->\"task_%s%lu\" [label=\"%s\"]\n", prefix, dep_prev, prefix, dep_succ, label); else fprintf(out_file, "\t \"task_%s%lu\"->\"task_%s%lu\"\n", prefix, dep_prev, prefix, dep_succ); } } void _starpu_fxt_dag_set_tag_done(const char *prefix, uint64_t tag, const char *color, const char *fontcolor) { if (out_file) fprintf(out_file, "\t \"tag_%s%llx\" [ style=filled, fillcolor=\"%s\", fontcolor=\"%s\"]\n", prefix, (unsigned long long)tag, color, fontcolor); } void _starpu_fxt_dag_add_task_end_dep(const char *prefix, unsigned long prev, unsigned long succ) { if (out_file) fprintf(out_file, "\t \"task_%s%lu\" [ end_dep=\"%lu\"]\n", prefix, prev, succ); } void _starpu_fxt_dag_set_task_name(const char *prefix, unsigned long job_id, const char *label, const char *color, const char *fontcolor) { if (out_file) fprintf(out_file, "\t \"task_%s%lu\" [ style=filled, label=\"%s\", fillcolor=\"%s\", fontcolor=\"%s\"]\n", prefix, job_id, label, color, fontcolor); } #ifdef STARPU_BUBBLE void _starpu_fxt_dag_set_task_bubble(const char *prefix, unsigned long job_id, int is_bubble, unsigned long bubble_parent) { if (out_file) { fprintf(out_file, "\t \"task_%s%lu\" [ bubble=\"%d\" ", prefix, job_id, is_bubble); if (bubble_parent) fprintf(out_file, ", bubble_parent=\"%lu\"", bubble_parent); fprintf(out_file, "]\n"); } } #endif void _starpu_fxt_dag_set_task_line(const char *prefix, unsigned long job_id, const char *file, int line) { if (out_file) fprintf(out_file, "\t \"task_%s%lu\" [ href=\"%s#%d\" ]\n", prefix, job_id, file, line); } void _starpu_fxt_dag_add_send(int src, unsigned long dep_prev, unsigned long tag, unsigned long id) { if (out_file) fprintf(out_file, "\t \"task_%d_%lu\"->\"mpi_%lu_%lu\"\n", src, dep_prev, tag, id); } void _starpu_fxt_dag_add_receive(int dst, unsigned long dep_prev, unsigned long tag, unsigned long id) { if (out_file) fprintf(out_file, "\t \"mpi_%lu_%lu\"->\"task_%d_%lu\"\n", tag, id, dst, dep_prev); } void _starpu_fxt_dag_add_sync_point(void) { if (!out_file) return; /* Close the previous cluster */ fprintf(out_file, "}\n"); cluster_cnt++; /* Create a new cluster */ fprintf(out_file, "subgraph cluster_%u {\n", cluster_cnt); fprintf(out_file, "\tcolor=black;\n"); } #endif /* STARPU_USE_FXT */ starpu-1.4.9+dfsg/src/debug/traces/starpu_fxt_mpi.c000066400000000000000000000370451507764646700223670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef STARPU_USE_FXT #include "starpu_fxt.h" LIST_TYPE(mpi_transfer, unsigned matched; int src; int dst; long mpi_tag; size_t size; float date; long jobid; double bandwidth; unsigned long handle; char *name; unsigned X; unsigned Y; unsigned type; int prio; long numa_nodes_bitmap; ); struct starpu_fxt_mpi_offset _starpu_fxt_mpi_find_sync_points(char *filename_in, int *key, int *rank) { struct starpu_fxt_mpi_offset offset; offset.nb_barriers = 0; offset.local_time_start = 0; offset.local_time_end = 0; offset.offset_start = 0; offset.offset_end = 0; /* Open the trace file */ int fd_in; fd_in = open(filename_in, O_RDONLY); if (fd_in < 0) { perror("open failed :"); _exit(EXIT_FAILURE); } static fxt_t fut; fut = fxt_fdopen(fd_in); if (!fut) { perror("fxt_fdopen :"); _exit(EXIT_FAILURE); } fxt_blockev_t block; block = fxt_blockev_enter(fut); struct fxt_ev_64 ev; int ret; uint64_t local_sync_time; while (offset.nb_barriers < 2 && (ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev)) == FXT_EV_OK) { if (ev.code == _STARPU_MPI_FUT_BARRIER) { /* We found a sync point */ *rank = ev.param[0]; *key = ev.param[2]; local_sync_time = (uint64_t) ((double) ev.param[3]); // It is stored as a double in the trace if (local_sync_time == 0) { /* This clock synchronization was made with an * MPI_Barrier, consider the event timestamp as * a local synchronized barrier time: */ local_sync_time = ev.time; } if (offset.nb_barriers == 0) { offset.local_time_start = local_sync_time; } else { offset.local_time_end = local_sync_time; } offset.nb_barriers++; } } /* Close the trace file */ if (close(fd_in)) { perror("close failed :"); _exit(EXIT_FAILURE); } return offset; } /* * Deal with the actual MPI transfers performed with the MPI lib */ /* the list of MPI transfers found in the different traces */ static struct mpi_transfer *mpi_sends[STARPU_FXT_MAX_FILES] = {NULL}; static struct mpi_transfer *mpi_recvs[STARPU_FXT_MAX_FILES] = {NULL}; /* number of available slots in the lists */ static unsigned mpi_sends_list_size[STARPU_FXT_MAX_FILES] = {0}; static unsigned mpi_recvs_list_size[STARPU_FXT_MAX_FILES] = {0}; /* number of slots actually used in the list */ static unsigned mpi_sends_used[STARPU_FXT_MAX_FILES] = {0}; static unsigned mpi_recvs_used[STARPU_FXT_MAX_FILES] = {0}; /* number of slots already matched at the beginning of the list. This permits * going through the lists from the beginning to match each and every * transfer, thus avoiding a quadratic complexity. */ static unsigned mpi_recvs_matched[STARPU_FXT_MAX_FILES][STARPU_FXT_MAX_FILES] = { {0} }; void _starpu_fxt_mpi_add_send_transfer(int src, int dst, long mpi_tag, size_t size, float date, long jobid, unsigned long handle, unsigned type, int prio) { STARPU_ASSERT(src >= 0); if (src >= STARPU_FXT_MAX_FILES) return; unsigned slot = mpi_sends_used[src]++; if (mpi_sends_used[src] > mpi_sends_list_size[src]) { if (mpi_sends_list_size[src] > 0) { mpi_sends_list_size[src] *= 2; } else { mpi_sends_list_size[src] = 1; } _STARPU_REALLOC(mpi_sends[src], mpi_sends_list_size[src]*sizeof(struct mpi_transfer)); } mpi_sends[src][slot].matched = 0; mpi_sends[src][slot].src = src; mpi_sends[src][slot].dst = dst; mpi_sends[src][slot].mpi_tag = mpi_tag; mpi_sends[src][slot].size = size; mpi_sends[src][slot].date = date; mpi_sends[src][slot].jobid = jobid; mpi_sends[src][slot].handle = handle; mpi_sends[src][slot].X = _starpu_fxt_data_get_coord(handle, src, 0); mpi_sends[src][slot].Y = _starpu_fxt_data_get_coord(handle, src, 1); const char *name = _starpu_fxt_data_get_name(handle, src); if (!name) name = ""; mpi_sends[src][slot].name = strdup(name); mpi_sends[src][slot].type = type; mpi_sends[src][slot].prio = prio; mpi_sends[src][slot].numa_nodes_bitmap = -1; } void _starpu_fxt_mpi_send_transfer_set_numa_node(int src, int dest, long jobid, long numa_nodes_bitmap) { STARPU_ASSERT(src >= 0); if (src >= STARPU_FXT_MAX_FILES || jobid == -1) return; unsigned i, slot; for (i = 0; i < mpi_sends_used[src]; i++) { /* The probe is just after the one handled by * _starpu_fxt_mpi_add_send_transfer, so the send transfer should have been * added recently: */ slot = mpi_sends_used[src] - i - 1; if (mpi_sends[src][slot].dst == dest && mpi_sends[src][slot].jobid == jobid) { mpi_sends[src][slot].numa_nodes_bitmap = numa_nodes_bitmap; return; } } _STARPU_MSG("Warning: did not find the send transfer from %d to %d with jobid %ld\n", src, dest, jobid); } void _starpu_fxt_mpi_add_recv_transfer(int src, int dst, long mpi_tag, float date, long jobid, unsigned long handle) { if (dst >= STARPU_FXT_MAX_FILES) return; unsigned slot = mpi_recvs_used[dst]++; if (mpi_recvs_used[dst] > mpi_recvs_list_size[dst]) { if (mpi_recvs_list_size[dst] > 0) { mpi_recvs_list_size[dst] *= 2; } else { mpi_recvs_list_size[dst] = 1; } _STARPU_REALLOC(mpi_recvs[dst], mpi_recvs_list_size[dst]*sizeof(struct mpi_transfer)); } mpi_recvs[dst][slot].matched = 0; mpi_recvs[dst][slot].src = src; mpi_recvs[dst][slot].dst = dst; mpi_recvs[dst][slot].mpi_tag = mpi_tag; mpi_recvs[dst][slot].date = date; mpi_recvs[dst][slot].jobid = jobid; mpi_recvs[dst][slot].handle = handle; mpi_recvs[dst][slot].numa_nodes_bitmap = -1; } void _starpu_fxt_mpi_recv_transfer_set_numa_node(int src, int dst, long jobid, long numa_nodes_bitmap) { STARPU_ASSERT(src >= 0); if (src >= STARPU_FXT_MAX_FILES || jobid == -1) return; unsigned i, slot; for (i = 0; i < mpi_recvs_used[dst]; i++) { /* The probe is just after the one handled by * _starpu_fxt_mpi_add_send_transfer, so the send transfer should have been * added recently: */ slot = mpi_recvs_used[dst] - i - 1; if (mpi_recvs[dst][slot].src == src && mpi_recvs[dst][slot].jobid == jobid) { mpi_recvs[dst][slot].numa_nodes_bitmap = numa_nodes_bitmap; return; } } _STARPU_MSG("Warning: did not find the recv transfer from %d to %d with jobid %ld\n", src, dst, jobid); } static struct mpi_transfer *try_to_match_send_transfer(int src, int dst, long mpi_tag) { unsigned slot; unsigned firstslot = mpi_recvs_matched[src][dst]; unsigned all_previous_were_matched = 1; for (slot = firstslot; slot < mpi_recvs_used[dst]; slot++) { if (!mpi_recvs[dst][slot].matched) { if (mpi_recvs[dst][slot].mpi_tag == mpi_tag) { /* we found a match ! */ mpi_recvs[dst][slot].matched = 1; return &mpi_recvs[dst][slot]; } all_previous_were_matched = 0; } else { if (all_previous_were_matched) { /* All previous transfers are already matched, * we need not consider them anymore */ mpi_recvs_matched[src][dst] = slot; } } } /* If we reached that point, we could not find a match */ return NULL; } static unsigned long mpi_com_id = 0; static const char* get_mpi_type_str(unsigned mpi_type) { switch (mpi_type) { case _STARPU_MPI_FUT_POINT_TO_POINT_SEND: return "PointToPoint"; case _STARPU_MPI_FUT_COLLECTIVE_SEND: return "Collective"; default: return "Unknown"; } } static void display_all_transfers_from_trace(FILE *out_paje_file, FILE *out_comms_file, unsigned n) { unsigned slot[STARPU_FXT_MAX_FILES] = { 0 }, node; unsigned nb_wrong_comm_timing = 0; struct mpi_transfer_list pending_receives; /* Sorted list of matches which have not happened yet */ double current_out_bandwidth[STARPU_FXT_MAX_FILES] = { 0. }; double current_in_bandwidth[STARPU_FXT_MAX_FILES] = { 0. }; #ifdef STARPU_HAVE_POTI char mpi_container[STARPU_POTI_STR_LEN]; #endif //bwi_mpi and bwo_mpi are set to zero when MPI thread containers are created mpi_transfer_list_init(&pending_receives); while (1) { float start_date; struct mpi_transfer *cur, *match; int src; /* Find out which event comes first: a pending receive, or a new send */ if (mpi_transfer_list_empty(&pending_receives)) start_date = INFINITY; else start_date = mpi_transfer_list_front(&pending_receives)->date; src = STARPU_FXT_MAX_FILES; for (node = 0; node < n; node++) { if (slot[node] < mpi_sends_used[node] && mpi_sends[node][slot[node]].date < start_date) { /* next send for node is earlier than others */ src = node; start_date = mpi_sends[src][slot[src]].date; } } if (start_date == INFINITY) /* No event any more, we're finished! */ break; if (src == STARPU_FXT_MAX_FILES) { /* Pending match is earlier than all new sends, finish its communication */ match = mpi_transfer_list_pop_front(&pending_receives); current_out_bandwidth[match->src] -= match->bandwidth; current_in_bandwidth[match->dst] -= match->bandwidth; #ifdef STARPU_HAVE_POTI snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", match->src); poti_SetVariable(match->date, mpi_container, "bwo_mpi", current_out_bandwidth[match->src]); snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", match->dst); poti_SetVariable(match->date, mpi_container, "bwi_mpi", current_in_bandwidth[match->dst]); #else fprintf(out_paje_file, "13 %.9f %d_mpict bwo_mpi %f\n", match->date, match->src, current_out_bandwidth[match->src]); fprintf(out_paje_file, "13 %.9f %d_mpict bwi_mpi %f\n", match->date, match->dst, current_in_bandwidth[match->dst]); #endif continue; } cur = &mpi_sends[src][slot[src]]; int dst = cur->dst; long mpi_tag = cur->mpi_tag; size_t size = cur->size; unsigned long send_handle = cur->handle; long send_numa_nodes_bitmap = cur->numa_nodes_bitmap; if (dst < STARPU_FXT_MAX_FILES) match = try_to_match_send_transfer(src, dst, mpi_tag); else match = NULL; if (match) { float end_date = match->date; unsigned long recv_handle = match->handle; long recv_numa_nodes_bitmap = match->numa_nodes_bitmap; struct mpi_transfer *prev; if (end_date <= start_date) nb_wrong_comm_timing++; match->bandwidth = (0.001*size)/(end_date - start_date); current_out_bandwidth[src] += match->bandwidth; current_in_bandwidth[dst] += match->bandwidth; /* Insert in sorted list, most probably at the end so let's use a mere insertion sort */ for (prev = mpi_transfer_list_last(&pending_receives); prev != mpi_transfer_list_alpha(&pending_receives); prev = mpi_transfer_list_prev(prev)) if (prev->date <= end_date) { /* Found its place */ mpi_transfer_list_insert_after(&pending_receives, match, prev); break; } if (prev == mpi_transfer_list_alpha(&pending_receives)) { /* No element earlier than this one, put it at the head */ mpi_transfer_list_push_front(&pending_receives, match); } unsigned long id = mpi_com_id++; if (cur->jobid != -1) _starpu_fxt_dag_add_send(src, cur->jobid, mpi_tag, id); if (match->jobid != -1) _starpu_fxt_dag_add_receive(dst, match->jobid, mpi_tag, id); #ifdef STARPU_HAVE_POTI char paje_value[STARPU_POTI_STR_LEN], paje_key[STARPU_POTI_STR_LEN]; snprintf(paje_value, sizeof(paje_value), "%lu", (long unsigned) size); snprintf(paje_key, sizeof(paje_key), "mpicom_%lu", id); snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", src); char str_mpi_tag[STARPU_POTI_STR_LEN]; snprintf(str_mpi_tag, sizeof(str_mpi_tag), "%ld", mpi_tag); char str_priority[STARPU_POTI_STR_LEN]; snprintf(str_priority, sizeof(str_priority), "%d", cur->prio); char str_handle[STARPU_POTI_STR_LEN]; snprintf(str_handle, sizeof(str_handle), "%lx", send_handle); char X_str[STARPU_POTI_STR_LEN]; snprintf(X_str, sizeof(X_str), "%u", cur->X); char Y_str[STARPU_POTI_STR_LEN]; snprintf(Y_str, sizeof(Y_str), "%u", cur->Y); poti_user_StartLink(_starpu_poti_MpiLinkStart, start_date, "MPIroot", "MPIL", mpi_container, paje_value, paje_key, 7, str_mpi_tag, get_mpi_type_str(cur->type), str_priority, str_handle, cur->name, X_str, Y_str); poti_SetVariable(start_date, mpi_container, "bwo_mpi", current_out_bandwidth[src]); snprintf(mpi_container, sizeof(mpi_container), "%d_mpict", dst); poti_EndLink(end_date, "MPIroot", "MPIL", mpi_container, paje_value, paje_key); poti_SetVariable(start_date, mpi_container, "bwo_mpi", current_in_bandwidth[dst]); #else fprintf(out_paje_file, "13 %.9f %d_mpict bwo_mpi %f\n", start_date, src, current_out_bandwidth[src]); fprintf(out_paje_file, "13 %.9f %d_mpict bwi_mpi %f\n", start_date, dst, current_in_bandwidth[dst]); fprintf(out_paje_file, "23 %.9f MPIL MPIroot %lu %d_mpict mpicom_%lu %ld %s %d %lx \"%s\" %u %u\n", start_date, (unsigned long)size, src, id, mpi_tag, get_mpi_type_str(cur->type), cur->prio, send_handle, cur->name, cur->X, cur->Y); fprintf(out_paje_file, "19 %.9f MPIL MPIroot %lu %d_mpict mpicom_%lu\n", end_date, (unsigned long)size, dst, id); #endif if (out_comms_file != NULL) { fprintf(out_comms_file, "Src: %d\n", src); fprintf(out_comms_file, "Dst: %d\n", dst); fprintf(out_comms_file, "Tag: %ld\n", mpi_tag); fprintf(out_comms_file, "SendTime: %.9f\n", start_date); fprintf(out_comms_file, "RecvTime: %.9f\n", end_date); fprintf(out_comms_file, "SendHandle: %lx\n", send_handle); fprintf(out_comms_file, "RecvHandle: %lx\n", recv_handle); if (cur->jobid != -1) fprintf(out_comms_file, "SendJobId: %d_%ld\n", src, cur->jobid); if (match->jobid != -1) fprintf(out_comms_file, "RecvJobId: %d_%ld\n", dst, match->jobid); fprintf(out_comms_file, "Size: %lu\n", (unsigned long)size); fprintf(out_comms_file, "Priority: %d\n", cur->prio); fprintf(out_comms_file, "Type: %s\n", get_mpi_type_str(cur->type)); char str[STARPU_TRACE_STR_LEN] = ""; _starpu_convert_numa_nodes_bitmap_to_str(send_numa_nodes_bitmap, str); fprintf(out_comms_file, "SendNumaNodes: %s\n", str); _starpu_convert_numa_nodes_bitmap_to_str(recv_numa_nodes_bitmap, str); fprintf(out_comms_file, "RecvNumaNodes: %s\n", str); fprintf(out_comms_file, "\n"); } free(cur->name); } else { _STARPU_DISP("Warning, could not match MPI transfer from %d to %d (tag %lx) starting at %f\n", src, dst, mpi_tag, start_date); } slot[src]++; } if (nb_wrong_comm_timing == 1) _STARPU_MSG("Warning: a communication finished before it started !\n"); else if (nb_wrong_comm_timing > 1) _STARPU_MSG("Warning: %u communications finished before they started !\n", nb_wrong_comm_timing); } void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks STARPU_ATTRIBUTE_UNUSED, FILE *out_paje_file, FILE* out_comms_file) { if (options->ninputfiles > STARPU_FXT_MAX_FILES) { _STARPU_DISP("Warning: %u files given, maximum %u supported, truncating to %u\n", options->ninputfiles, STARPU_FXT_MAX_FILES, STARPU_FXT_MAX_FILES); options->ninputfiles = STARPU_FXT_MAX_FILES; } /* display the MPI transfers if possible */ if (out_paje_file) display_all_transfers_from_trace(out_paje_file, out_comms_file, options->ninputfiles); } #endif // STARPU_USE_FXT starpu-1.4.9+dfsg/src/debug/traces/starpu_paje.c000066400000000000000000000732061507764646700216370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2019 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpu_fxt.h" #include #ifdef STARPU_HAVE_POTI #include #endif #ifdef STARPU_USE_FXT #ifdef STARPU_HAVE_POTI int _starpu_poti_JobState; int _starpu_poti_checkPointState; #ifdef HAVE_POTI_INIT_CUSTOM int _starpu_poti_extendedSetState = -1; int _starpu_poti_semiExtendedSetState = -1; int _starpu_poti_MemoryEvent = -1; int _starpu_poti_CommLinkStart = -1; int _starpu_poti_MpiLinkStart = -1; #endif #endif void _starpu_fxt_write_paje_header(FILE *file STARPU_ATTRIBUTE_UNUSED, struct starpu_fxt_options *options) { unsigned i; #ifdef STARPU_HAVE_POTI #ifdef HAVE_POTI_INIT_CUSTOM poti_header(); /* see poti_init_custom to customize the header */ _starpu_poti_extendedSetState = poti_header_DeclareEvent (PAJE_SetState, 13, "Size string", "Params string", "Footprint string", "Tag string", "JobId string", "SubmitOrder string", "Priority string", "GFlop string", "X string", "Y string", /* "Z string", */ "Iteration string", "Subiteration string", "NumaNodes string"); _starpu_poti_semiExtendedSetState = poti_header_DeclareEvent (PAJE_SetState, 6, "Size string", "Params string", "Footprint string", "Tag string", "JobId string", "SubmitOrder string" ); #ifdef HAVE_POTI_USER_NEWEVENT _starpu_poti_CommLinkStart = poti_header_DeclareEvent(PAJE_StartLink, 4, "Handle string", "HName string", "X string", "Y string"); if (options->memory_states) { _starpu_poti_MemoryEvent = poti_header_DeclareEvent (PAJE_NewEvent, 4, "Handle string", "Info string", "Size string", "Dest string"); } _starpu_poti_MpiLinkStart = poti_header_DeclareEvent(PAJE_StartLink, 5, "MPITAG string", "MPIType string", "Priority string", "Handle string", "HName string", "X string", "Y string"); _starpu_poti_checkPointState = poti_header_DeclareEvent(PAJE_NewEvent, 2, "CheckpointInstance string", "CheckpointDomain string"); _starpu_poti_JobState = poti_header_DeclareEvent(PAJE_SetState, 1, "JobId string"); #endif #else poti_header(1,1); #endif #else fprintf(file, "%%EventDef PajeDefineContainerType 1\n"); fprintf(file, "%% Alias string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Name string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeDefineEventType 2\n"); fprintf(file, "%% Alias string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Name string\n"); fprintf(file, "%% Color color\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeDefineStateType 3\n"); fprintf(file, "%% Alias string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Name string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeDefineVariableType 4\n"); fprintf(file, "%% Alias string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Name string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeDefineLinkType 5\n"); fprintf(file, "%% Alias string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% StartContainerType string\n"); fprintf(file, "%% EndContainerType string\n"); fprintf(file, "%% Name string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeDefineEntityValue 6\n"); fprintf(file, "%% Alias string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Name string\n"); fprintf(file, "%% Color color\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeCreateContainer 7\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Alias string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Name string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeDestroyContainer 8\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Name string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeNewEvent 9\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeSetState 10\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajePushState 11\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajePopState 12\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeSetVariable 13\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Value double\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeAddVariable 14\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Value double\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeSubVariable 15\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Value double\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeStartLink 18\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%% StartContainer string\n"); fprintf(file, "%% Key string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeEndLink 19\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%% EndContainer string\n"); fprintf(file, "%% Key string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeSetState 20\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%% Size string\n"); fprintf(file, "%% Params string\n"); fprintf(file, "%% Footprint string\n"); fprintf(file, "%% Tag string\n"); fprintf(file, "%% JobId string\n"); fprintf(file, "%% SubmitOrder string\n"); fprintf(file, "%% Priority string\n"); fprintf(file, "%% GFlop string\n"); fprintf(file, "%% X string\n"); fprintf(file, "%% Y string\n"); /* fprintf(file, "%% Z string\n"); */ fprintf(file, "%% Iteration string\n"); fprintf(file, "%% Subiteration string\n"); fprintf(file, "%% NumaNodes string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeSetState 21\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%% Size string\n"); fprintf(file, "%% Params string\n"); fprintf(file, "%% Footprint string\n"); fprintf(file, "%% Tag string\n"); fprintf(file, "%% JobId string\n"); fprintf(file, "%% SubmitOrder string\n"); fprintf(file, "%%EndEventDef\n"); if (options->memory_states) { fprintf(file, "%%EventDef PajeNewEvent 22\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%% Handle string\n"); fprintf(file, "%% Info string\n"); fprintf(file, "%% Size string\n"); fprintf(file, "%% Tid string\n"); fprintf(file, "%%EndEventDef\n"); } fprintf(file, "%%EventDef PajeStartLink 23\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%% StartContainer string\n"); fprintf(file, "%% Key string\n"); fprintf(file, "%% MPITAG string\n"); fprintf(file, "%% MPIType string\n"); fprintf(file, "%% Priority string\n"); fprintf(file, "%% Handle string\n"); fprintf(file, "%% HName string\n"); fprintf(file, "%% X string\n"); fprintf(file, "%% Y string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeStartLink 24\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%% StartContainer string\n"); fprintf(file, "%% Key string\n"); fprintf(file, "%% Handle string\n"); fprintf(file, "%% HName string\n"); fprintf(file, "%% X string\n"); fprintf(file, "%% Y string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeNewEvent 25\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%% CheckpointInstance string\n"); fprintf(file, "%% CheckpointDomain string\n"); fprintf(file, "%%EndEventDef\n"); fprintf(file, "%%EventDef PajeSetState 26\n"); fprintf(file, "%% Time date\n"); fprintf(file, "%% Container string\n"); fprintf(file, "%% Type string\n"); fprintf(file, "%% Value string\n"); fprintf(file, "%% JobId string\n"); fprintf(file, "%%EndEventDef\n"); #endif #ifdef STARPU_HAVE_POTI poti_DefineContainerType("MPIP", "0", "MPI Program"); poti_DefineContainerType("P", "MPIP", "Program"); poti_DefineContainerType("Mn", "P", "Memory Node"); poti_DefineContainerType("T", "Mn", "Thread"); poti_DefineContainerType("UT", "P", "User Thread"); poti_DefineContainerType("Mm", "Mn", "Memory Manager"); poti_DefineContainerType("W", "T", "Worker"); poti_DefineContainerType("MPICt", "P", "MPI Communication Thread"); poti_DefineContainerType("Sc", "P", "Scheduler"); poti_DefineEventType("prog_event", "P", "program event type"); poti_DefineEventType("pu", "P", "task push"); poti_DefineEventType("po", "P", "task pop"); poti_DefineEventType("register", "P", "data registration"); poti_DefineEventType("unregister", "P", "data unregistration"); /* Types for the memory node */ poti_DefineEventType("SI", "Mm", "data state invalid"); poti_DefineEventType("SS", "Mm", "data state shared"); poti_DefineEventType("SO", "Mm", "data state owner"); poti_DefineEventType("WU", "Mm", "data wont use"); poti_DefineEventType("Al", "Mm", "Allocating Start"); poti_DefineEventType("rc", "Mm", "Request Created"); poti_DefineEventType("AlE", "Mm", "Allocating End"); poti_DefineEventType("Alr", "Mm", "Allocating Async Start"); poti_DefineEventType("AlrE", "Mm", "Allocating Async End"); poti_DefineEventType("Fe", "Mm", "Free Start"); poti_DefineEventType("FeE", "Mm", "Free End"); poti_DefineEventType("Wb", "Mm", "WritingBack Start"); poti_DefineEventType("WbE", "Mm", "WritingBack End"); poti_DefineEventType("DCo", "Mm", "DriverCopy Start"); poti_DefineEventType("DCoE", "Mm", "DriverCopy End"); poti_DefineEventType("DCoA", "Mm", "DriverCopyAsync Start"); poti_DefineEventType("DCoAE", "Mm", "DriverCopyAsync End"); poti_DefineVariableType("use", "Mm", "Used (MB)", "0 0 0"); poti_DefineVariableType("bwi_mm", "Mm", "Bandwidth In (MB/s)", "0 0 0"); poti_DefineVariableType("bwo_mm", "Mm", "Bandwidth Out (MB/s)", "0 0 0"); poti_DefineStateType("MS", "Mm", "Memory Node State"); poti_DefineEntityValue("A", "MS", "Allocating", ".4 .1 .0"); poti_DefineEntityValue("Ar", "MS", "AllocatingReuse", ".1 .1 .8"); poti_DefineEntityValue("F", "MS", "Freeing", ".6 .3 .0"); poti_DefineEntityValue("W", "MS", "WritingBack", ".0 .0 .5"); poti_DefineEntityValue("Wa", "MS", "WritingBackAsync", ".0 .0 .4"); poti_DefineEntityValue("R", "MS", "Reclaiming", ".0 .1 .6"); poti_DefineEntityValue("Co", "MS", "DriverCopy", ".3 .5 .1"); poti_DefineEntityValue("CoA", "MS", "DriverCopyAsync", ".1 .3 .1"); poti_DefineEntityValue("No", "MS", "Nothing", ".0 .0 .0"); /* Types for the Worker of the Memory Node */ poti_DefineEventType("user_event", "P", "user event type"); poti_DefineEventType("thread_event", "T", "thread event type"); poti_DefineVariableType("gf", "W", "GFlop/s", "0 0 0"); poti_DefineStateType("S", "T", "Thread State"); poti_DefineEntityValue("I", "S", "Idle", ".9 .1 0"); poti_DefineEntityValue("In", "S", "Initializing", "0.0 .7 1.0"); poti_DefineEntityValue("D", "S", "Deinitializing", "0.0 .1 .7"); poti_DefineEntityValue("Fi", "S", "FetchingInput", "1.0 .1 1.0"); poti_DefineEntityValue("Po", "S", "PushingOutput", "0.1 1.0 1.0"); poti_DefineEntityValue("C", "S", "Callback", ".0 .3 .8"); poti_DefineEntityValue("B", "S", "Overhead", ".5 .18 .0"); poti_DefineEntityValue("Ps", "S", "Parallel sync", ".5 .18 1.0"); poti_DefineEntityValue("E", "S", "Executing", ".0 .6 .5"); poti_DefineEntityValue("Sc", "S", "Scheduling", ".7 .36 .0"); poti_DefineEntityValue("Sl", "S", "Sleeping", ".9 .1 .0"); poti_DefineEntityValue("P", "S", "Progressing", ".1 .3 .1"); poti_DefineEntityValue("U", "S", "Unpartitioning", ".0 .0 1.0"); poti_DefineEntityValue("H", "S", "Hypervisor", ".5 .18 .0"); poti_DefineEntityValue("Bu", "S", "Building task", ".5 .18 .0"); poti_DefineEntityValue("Su", "S", "Submitting task", ".3 .09 .0"); poti_DefineEntityValue("Th", "S", "Throttling task submission", ".8 .6 .6"); poti_DefineEntityValue("MD", "S", "Decoding task for MPI", ".5 .18 .2"); poti_DefineEntityValue("MPr", "S", "Preparing task for MPI", ".4 .14 .2"); poti_DefineEntityValue("MPo", "S", "Post-processing task for MPI", ".3 .09 .2"); poti_DefineStateType("WS", "W", "Worker State"); poti_DefineEntityValue("I", "WS", "Idle", ".9 .1 .0"); poti_DefineEntityValue("In", "WS", "Initializing", "0.0 .7 1.0"); poti_DefineEntityValue("D", "WS", "Deinitializing", "0.0 .1 .7"); poti_DefineEntityValue("Fi", "WS", "FetchingInput", "1.0 .1 1.0"); poti_DefineEntityValue("Po", "WS", "PushingOutput", "0.1 1.0 1.0"); poti_DefineEntityValue("C", "WS", "Callback", ".0 .3 .8"); poti_DefineEntityValue("B", "WS", "Overhead", ".5 .18 .0"); poti_DefineEntityValue("Ps", "WS", "Parallel sync", ".5 .18 1.0"); poti_DefineEntityValue("E", "WS", "Executing", ".0 .6 .5"); poti_DefineEntityValue("Sc", "WS", "Scheduling", ".7 .36 .0"); poti_DefineEntityValue("Sl", "WS", "Sleeping", ".9 .1 .0"); poti_DefineEntityValue("P", "WS", "Progressing", ".1 .3 .1"); poti_DefineEntityValue("U", "WS", "Unpartitioning", ".0 .0 1.0"); poti_DefineEntityValue("H", "WS", "Hypervisor", ".5 .18 .0"); poti_DefineEntityValue("Bu", "WS", "Building task", ".5 .18 .0"); poti_DefineEntityValue("Su", "WS", "Submitting task", ".3 .09 .0"); poti_DefineEntityValue("Th", "WS", "Throttling task submission", ".8 .6 .6"); /* Types for the MPI Communication Thread of the Memory Node */ poti_DefineEventType("MPIev", "MPICt", "MPI event type"); poti_DefineVariableType("bwi_mpi", "MPICt", "Bandwidth In (MB/s)", "0 0 0"); poti_DefineVariableType("bwo_mpi", "MPICt", "Bandwidth Out (MB/s)", "0 0 0"); poti_DefineStateType("CtS", "MPICt", "Communication Thread State"); poti_DefineEntityValue("P", "CtS", "Processing", "0 0 0"); poti_DefineEntityValue("Pl", "CtS", "Polling", "1.0 .5 0"); poti_DefineEntityValue("Dr", "CtS", "DriverRun", ".1 .1 1.0"); poti_DefineEntityValue("Sl", "CtS", "Sleeping", ".9 .1 .0"); poti_DefineEntityValue("UT", "CtS", "UserTesting", ".2 .1 .6"); poti_DefineEntityValue("UW", "CtS", "UserWaiting", ".4 .1 .3"); poti_DefineEntityValue("SdS", "CtS", "SendSubmitted", "1.0 .1 1.0"); poti_DefineEntityValue("RvS", "CtS", "ReceiveSubmitted", "0.1 1.0 1.0"); poti_DefineEntityValue("SdC", "CtS", "SendCompleted", "1.0 .5 1.0"); poti_DefineEntityValue("RvC", "CtS", "ReceiveCompleted", "0.5 1.0 1.0"); poti_DefineEntityValue("TD", "CtS", "Testing Detached", ".0 .0 .6"); poti_DefineEntityValue("MT", "CtS", "MPI Test", ".0 .0 .8"); poti_DefineEntityValue("Bu", "CtS", "Building task", ".5 .18 .0"); poti_DefineEntityValue("Su", "CtS", "Submitting task", ".3 .09 .0"); poti_DefineEntityValue("Th", "CtS", "Throttling task submission", ".8 .6 .6"); poti_DefineEntityValue("C", "CtS", "Callback", ".0 .3 .8"); /* Type for other threads */ poti_DefineEventType("user_user_event", "UT", "user event type"); poti_DefineEventType("user_thread_event", "UT", "thread event type"); poti_DefineStateType("US", "UT", "User Thread State"); poti_DefineEntityValue("Bu", "US", "Building task", ".5 .18 .0"); poti_DefineEntityValue("Su", "US", "Submitting task", ".3 .09 .0"); poti_DefineEntityValue("C", "US", "Callback", ".0 .3 .8"); poti_DefineEntityValue("Sc", "US", "Scheduling", ".7 .36 .0"); poti_DefineEntityValue("Th", "US", "Throttling task submission", ".8 .6 .6"); poti_DefineEntityValue("MD", "US", "Decoding task for MPI", ".5 .18 .2"); poti_DefineEntityValue("MPr", "US", "Preparing task for MPI", ".4 .14 .2"); poti_DefineEntityValue("MPo", "US", "Post-processing task for MPI", ".3 .09 .2"); poti_DefineEntityValue("W", "US", "Waiting task", ".9 .1 .0"); poti_DefineEntityValue("WA", "US", "Waiting all tasks", ".9 .1 .0"); poti_DefineEntityValue("No", "US", "Nothing", ".0 .0 .0"); for (i=1; i #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #endif int main(int argc, char *argv[]) { char *prog, *arch, *def, *effective_version, *version, *lib; char s[1024]; char name[64]; int current, age, revision; if (argc != 7) { fprintf(stderr, "[dolib] bad number of arguments, expected %d, got %d\n", 7, argc); exit(EXIT_FAILURE); } prog = argv[1]; arch = argv[2]; def = argv[3]; effective_version = argv[4]; version = argv[5]; lib = argv[6]; if (sscanf(version, "%d:%d:%d", ¤t, &revision, &age) != 3) { fprintf(stderr, "version not formatted as current:revision:age (%s)\n", version); exit(EXIT_FAILURE); } _snprintf(name, sizeof(name), "libstarpu-%s-%d", effective_version, current - age); name[sizeof(name) - 1] = '\0'; fprintf(stdout, "[dolib] using soname '%s'\n", name); _snprintf(s, sizeof(s), "\"%s\" /machine:%s /def:%s /name:%s /out:%s", prog, arch, def, name, lib); s[sizeof(s) - 1] = '\0'; if (system(s)) { fprintf(stderr, "%s failed\n", s); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); } starpu-1.4.9+dfsg/src/drivers/000077500000000000000000000000001507764646700162555ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/cpu/000077500000000000000000000000001507764646700170445ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/cpu/driver_cpu.c000066400000000000000000000620731507764646700213620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * Copyright (C) 2010-2010 Mehdi Juhoor * Copyright (C) 2020,2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef STARPU_HAVE_HWLOC #include #ifndef HWLOC_API_VERSION #define HWLOC_OBJ_PU HWLOC_OBJ_PROC #endif #if HWLOC_API_VERSION < 0x00010b00 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE #endif #endif #ifdef STARPU_HAVE_WINDOWS #include #endif static unsigned already_busy_cpus; static void _starpu_cpu_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); static void _starpu_cpu_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); static void *_starpu_cpu_worker(void *); static struct _starpu_driver_info driver_info = { .name_upper = "CPU", .name_var = "CPU", .name_lower = "cpu", .memory_kind = STARPU_CPU_RAM, .alpha = 0.5f, .wait_for_worker_initialization = 1, #ifdef STARPU_USE_CPU .driver_ops = &_starpu_driver_cpu_ops, .run_worker = _starpu_cpu_worker, #endif .init_worker_binding = _starpu_cpu_init_worker_binding, .init_worker_memory = _starpu_cpu_init_worker_memory, }; static struct _starpu_node_ops _starpu_driver_cpu_node_ops; static struct _starpu_memory_driver_info memory_driver_info = { .name_upper = "NUMA", .worker_archtype = STARPU_CPU_WORKER, .ops = &_starpu_driver_cpu_node_ops, }; /* Early library initialization, before anything else, just initialize data */ void _starpu_cpu_preinit(void) { _starpu_driver_info_register(STARPU_CPU_WORKER, &driver_info); _starpu_memory_driver_info_register(STARPU_CPU_RAM, &memory_driver_info); already_busy_cpus = 0; } void _starpu_cpu_busy_cpu(unsigned num) { already_busy_cpus += num; } #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID) /* Determine which devices we will use */ void _starpu_init_cpu_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) { int ncpu = config->conf.ncpus; if (ncpu != 0) { STARPU_ASSERT_MSG(ncpu >= -1, "ncpus can not be negative and different from -1 (is is %d)", ncpu); int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; long avail_cpus = (long) (topology->nusedpus / nhyperthreads) - (long) already_busy_cpus; if (avail_cpus < 0) avail_cpus = 0; int nth_per_core = starpu_getenv_number_default("STARPU_NTHREADS_PER_CORE", 1); avail_cpus *= nth_per_core; _starpu_topology_check_ndevices(&ncpu, avail_cpus, 1, STARPU_MAXCPUS, config->conf.reserve_ncpus, "ncpus", "CPU cores", "maxcpus"); } topology->ndevices[STARPU_CPU_WORKER] = 1; unsigned homogeneous = starpu_getenv_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1); _starpu_topology_configure_workers(topology, config, STARPU_CPU_WORKER, 0, 0, homogeneous, 1, ncpu, 1, NULL, NULL); } #endif /* Bind the driver on a CPU core */ static void _starpu_cpu_init_worker_binding(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { /* Dedicate a cpu core to that worker */ workerarg->bindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, NULL, 0);; } /* Set up memory and buses */ static void _starpu_cpu_init_worker_memory(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned memory_node = -1; int numa_logical_id = _starpu_get_logical_numa_node_worker(workerarg->workerid); int numa_starpu_id = starpu_memory_nodes_numa_hwloclogid_to_id(numa_logical_id); if (numa_starpu_id < 0 || numa_starpu_id >= STARPU_MAXNUMANODES) numa_starpu_id = STARPU_MAIN_RAM; #if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_SIMGRID) hwloc_obj_t pu_obj = hwloc_get_obj_by_type(config->topology.hwtopology, HWLOC_OBJ_PU, workerarg->bindid); struct _starpu_hwloc_userdata *userdata = pu_obj->userdata; userdata->pu_worker = workerarg; #endif workerarg->numa_memory_node = memory_node = numa_starpu_id; _starpu_memory_node_add_nworkers(memory_node); _starpu_worker_drives_memory_node(workerarg, numa_starpu_id); workerarg->memory_node = memory_node; } #ifdef STARPU_USE_CPU /* This is run from the driver thread to initialize the driver CUDA context */ static int _starpu_cpu_driver_init(struct _starpu_worker *cpu_worker) { int devid = cpu_worker->devid; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, -1, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, -1, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); #endif _starpu_driver_start(cpu_worker, STARPU_CPU_WORKER, 1); snprintf(cpu_worker->name, sizeof(cpu_worker->name), "CPU %d", devid); snprintf(cpu_worker->short_name, sizeof(cpu_worker->short_name), "CPU %d", devid); starpu_pthread_setname(cpu_worker->short_name); _STARPU_TRACE_WORKER_INIT_END(cpu_worker->workerid); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&cpu_worker->sched_mutex); cpu_worker->status = STATUS_UNKNOWN; STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&cpu_worker->sched_mutex); /* tell the main thread that we are ready */ STARPU_PTHREAD_MUTEX_LOCK(&cpu_worker->mutex); cpu_worker->worker_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&cpu_worker->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&cpu_worker->mutex); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, -1, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); #endif return 0; } static int _starpu_cpu_driver_deinit(struct _starpu_worker *cpu_worker) { _STARPU_TRACE_WORKER_DEINIT_START; unsigned memnode = cpu_worker->memory_node; _starpu_datawizard_handle_all_pending_node_data_requests(memnode); /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); cpu_worker->worker_is_initialized = 0; _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CPU_WORKER); #ifdef STARPU_PROF_TOOL int workerid = cpu_worker->workerid; struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); #endif return 0; } #endif /* STARPU_USE_CPU */ static uintptr_t _starpu_cpu_malloc_on_node(unsigned dst_node, size_t size, int flags) { uintptr_t addr = 0; _starpu_malloc_flags_on_node(dst_node, (void**) &addr, size, #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) /* without memcpy_peer, we can not * allocated pinned memory, since it * requires waiting for a task, and we * may be called with a spinlock held */ flags & ~STARPU_MALLOC_PINNED #else flags #endif ); return addr; } static void _starpu_cpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) { _starpu_free_flags_on_node(dst_node, (void*)addr, size, #if defined(STARPU_USE_CUDA) && !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) flags & ~STARPU_MALLOC_PINNED #else flags #endif ); } static int _starpu_cpu_copy_interface(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CPU_RAM); int ret = 0; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; if (copy_methods->ram_to_ram) copy_methods->ram_to_ram(src_interface, src_node, dst_interface, dst_node); else { STARPU_ASSERT_MSG(copy_methods->any_to_any, "the interface '%s' does define neither ram_to_ram nor any_to_any copy method", handle->ops->name); copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req ? &req->async_channel : NULL); } return ret; } static int _starpu_cpu_copy_data(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CPU_RAM); (void) async_channel; memcpy((void *) (dst + dst_offset), (void *) (src + src_offset), size); return 0; } static int _starpu_cpu_is_direct_access_supported(unsigned node, unsigned handling_node) { (void) node; (void) handling_node; return 1; } static uintptr_t _starpu_cpu_map(uintptr_t src, size_t src_offset, unsigned src_node, unsigned dst_node, size_t size, int *ret) { (void) src_node; (void) dst_node; (void) size; *ret = 0; return src + src_offset; } static int _starpu_cpu_unmap(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, unsigned dst_node, size_t size) { (void) src; (void) src_offset; (void) src_node; (void) dst; (void) dst_node; (void) size; return 0; } static int _starpu_cpu_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) { (void) src; (void) src_offset; (void) src_node; (void) dst; (void) dst_offset; (void) dst_node; (void) size; /* Memory mappings are cache-coherent */ return 0; } #ifdef STARPU_USE_CPU /* Actually launch the job on a cpu worker. * Handle binding CPUs on cores. * In the case of a combined worker WORKER_TASK != J->TASK */ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *cpu_args, int rank, struct starpu_perfmodel_arch* perf_arch) { int is_parallel_task = (j->task_size > 1); int profiling = starpu_profiling_status_get(); struct starpu_task *task = j->task; struct starpu_codelet *cl = task->cl; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; int devid = cpu_args->devid; #endif STARPU_ASSERT(cl); if (is_parallel_task) { STARPU_PTHREAD_BARRIER_WAIT(&j->before_work_barrier); /* In the case of a combined worker, the scheduler needs to know * when each actual worker begins the execution */ _starpu_sched_pre_exec_hook(worker_task); } /* Give profiling variable */ _starpu_driver_start_job(cpu_args, j, perf_arch, rank, profiling); _starpu_cl_func_t func = _starpu_task_get_cpu_nth_implementation(cl, j->nimpl); /* In case this is a Fork-join parallel task, the worker does not * execute the kernel at all. */ if ((rank == 0) || (cl->type != STARPU_FORKJOIN)) { if (is_parallel_task && cl->type == STARPU_FORKJOIN) /* bind to parallel worker */ _starpu_bind_thread_on_cpus(_starpu_get_combined_worker_struct(j->combined_workerid)); STARPU_ASSERT_MSG(func, "when STARPU_CPU is defined in 'where', cpu_func or cpu_funcs has to be defined"); if (_starpu_get_disable_kernels() <= 0) { _STARPU_TRACE_START_EXECUTING(j); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_cpu_exec(&pi, NULL, NULL); #endif #ifdef STARPU_SIMGRID if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE) func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); else if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT) { _SIMGRID_TIMER_BEGIN(1); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); _SIMGRID_TIMER_END; } else { struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(cpu_args, j); _starpu_simgrid_submit_job(cpu_args->workerid, sched_ctx->id, j, perf_arch, NAN, NAN, NULL); } #else #ifdef STARPU_PAPI if (rank == 0) _starpu_profiling_papi_task_start_counters(task); #endif func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); #ifdef STARPU_PAPI if (rank == 0) _starpu_profiling_papi_task_stop_counters(task); #endif #endif #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec(&pi, NULL, NULL); #endif _STARPU_TRACE_END_EXECUTING(j); } if (is_parallel_task && cl->type == STARPU_FORKJOIN) /* rebind to single CPU */ _starpu_bind_thread_on_cpu(cpu_args->bindid, cpu_args->workerid, NULL); } else { #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec(&pi, NULL, NULL); #endif _STARPU_TRACE_START_EXECUTING(j); } if (is_parallel_task) { _STARPU_TRACE_START_PARALLEL_SYNC(j); STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier); _STARPU_TRACE_END_PARALLEL_SYNC(j); if (rank != 0) { #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec(&pi, NULL, NULL); #endif _STARPU_TRACE_END_EXECUTING(j); } } _starpu_driver_end_job(cpu_args, j, perf_arch, rank, profiling); if (is_parallel_task) { #ifdef STARPU_SIMGRID if (rank == 0) { /* Wait for other threads to exit barrier_wait so we * can safely drop the job structure */ starpu_sleep(0.0000001); j->after_work_busy_barrier = 0; } #else ANNOTATE_HAPPENS_BEFORE(&j->after_work_busy_barrier); (void) STARPU_ATOMIC_ADD(&j->after_work_busy_barrier, -1); if (rank == 0) { /* Wait with a busy barrier for other workers to have * finished with the blocking barrier before we can * safely drop the job structure */ while (j->after_work_busy_barrier > 0) { STARPU_UYIELD(); STARPU_SYNCHRONIZE(); } ANNOTATE_HAPPENS_AFTER(&j->after_work_busy_barrier); } #endif } if (rank == 0) { _starpu_driver_update_job_feedback(j, cpu_args, perf_arch, profiling); #ifdef STARPU_OPENMP if (!j->continuation) #endif { _starpu_push_task_output(j); } } return 0; } static int _starpu_cpu_driver_execute_task(struct _starpu_worker *cpu_worker, struct starpu_task *task, struct _starpu_job *j) { int res; int rank; int is_parallel_task = (j->task_size > 1); struct starpu_perfmodel_arch* perf_arch; rank = cpu_worker->current_rank; /* Get the rank in case it is a parallel task */ if (is_parallel_task) { if(j->combined_workerid != -1) { struct _starpu_combined_worker *combined_worker; combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid); cpu_worker->combined_workerid = j->combined_workerid; cpu_worker->worker_size = combined_worker->worker_size; perf_arch = &combined_worker->perf_arch; } else { struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(cpu_worker, j); STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", cpu_worker->workerid); perf_arch = &sched_ctx->perf_arch; } } else { cpu_worker->combined_workerid = cpu_worker->workerid; cpu_worker->worker_size = 1; struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(cpu_worker, j); if (sched_ctx && !sched_ctx->sched_policy && !sched_ctx->awake_workers && sched_ctx->main_master == cpu_worker->workerid) perf_arch = &sched_ctx->perf_arch; else perf_arch = &cpu_worker->perf_arch; } _starpu_set_current_task(j->task); cpu_worker->current_task = j->task; j->workerid = cpu_worker->workerid; #ifdef STARPU_BUBBLE_VERBOSE struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); unsigned long long timestamp = 1000000000ULL*tp.tv_sec + tp.tv_nsec; _STARPU_DEBUG("{%llu} [%s(%p)]\n", timestamp, starpu_task_get_name(task), task); #endif res = execute_job_on_cpu(j, task, cpu_worker, rank, perf_arch); _starpu_set_current_task(NULL); cpu_worker->current_task = NULL; if (res) { switch (res) { case -EAGAIN: _starpu_push_task_to_workers(task); return 0; default: STARPU_ABORT(); } } /* In the case of combined workers, we need to inform the * scheduler each worker's execution is over. * Then we free the workers' task alias */ if (is_parallel_task) { _starpu_sched_post_exec_hook(task); free(task); } if (rank == 0) _starpu_handle_job_termination(j); return 0; } /* One iteration of the main driver loop */ static int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker) { unsigned memnode = cpu_worker->memory_node; int workerid = cpu_worker->workerid; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif int res; struct _starpu_job *j; struct starpu_task *task = NULL, *pending_task; int rank = 0; #ifdef STARPU_SIMGRID starpu_pthread_wait_reset(&cpu_worker->wait); #endif /* Test if async transfers are completed */ pending_task = cpu_worker->task_transferring; if (pending_task != NULL && cpu_worker->nb_buffers_transferred == cpu_worker->nb_buffers_totransfer) { int ret; STARPU_RMB(); _STARPU_TRACE_END_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, cpu_worker->nb_buffers_totransfer, cpu_worker->nb_buffers_transferred); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif j = _starpu_get_job_associated_to_task(pending_task); _starpu_fetch_task_input_tail(pending_task, j, cpu_worker); /* Reset it */ cpu_worker->task_transferring = NULL; ret = _starpu_cpu_driver_execute_task(cpu_worker, pending_task, j); _STARPU_TRACE_START_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, cpu_worker->nb_buffers_totransfer, cpu_worker->nb_buffers_transferred); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif return ret; } res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); if (!pending_task) task = _starpu_get_worker_task(cpu_worker, workerid, memnode); #ifdef STARPU_SIMGRID #ifndef STARPU_OPENMP if (!res && !task) /* No progress, wait */ starpu_pthread_wait_wait(&cpu_worker->wait); #else #if SIMGRID_VERSION >= 31800 if (!res && !task) { /* No progress, wait (but at most 1s for OpenMP support) */ /* TODO: ideally, make OpenMP wake worker when run_once should return */ struct timespec abstime; _starpu_clock_gettime(&abstime); abstime.tv_sec++; starpu_pthread_wait_timedwait(&cpu_worker->wait, &abstime); } #else /* Previous simgrid versions don't really permit to use wait_timedwait in C */ starpu_sleep(0.001); #endif #endif #endif if (!task) { /* No task or task still pending transfers */ _starpu_execute_registered_idle_hooks(); return 0; } j = _starpu_get_job_associated_to_task(task); /* NOTE: j->task is != task for parallel tasks, which share the same * job. */ /* can a cpu perform that task ? */ if (!_STARPU_MAY_PERFORM(j, CPU)) { /* put it and the end of the queue ... XXX */ _starpu_push_task_to_workers(task); return 0; } #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_END_PROGRESS(memnode); /* Get the rank in case it is a parallel task */ if (j->task_size > 1) { STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); rank = j->active_task_alias_count++; STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); } else { rank = 0; } cpu_worker->current_rank = rank; #ifdef STARPU_OPENMP /* At this point, j->continuation as been cleared as the task is being * woken up, thus we use j->discontinuous instead for the check */ const unsigned continuation_wake_up = j->discontinuous; #else const unsigned continuation_wake_up = 0; #endif if (rank == 0 && !continuation_wake_up) { res = _starpu_fetch_task_input(task, j, 1); STARPU_ASSERT(res == 0); } else { int ret = _starpu_cpu_driver_execute_task(cpu_worker, task, j); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_END_PROGRESS(memnode); return ret; } #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_END_PROGRESS(memnode); return 0; } static void *_starpu_cpu_worker(void *arg) { struct _starpu_worker *worker = arg; _starpu_cpu_driver_init(worker); _STARPU_TRACE_START_PROGRESS(worker->memory_node); #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_cpu, worker->memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_cpu_driver_run_once(worker); } _STARPU_TRACE_END_PROGRESS(worker->memory_node); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_cpu, worker->memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif _starpu_cpu_driver_deinit(worker); return NULL; } static int _starpu_cpu_driver_run(struct _starpu_worker *worker) { _starpu_cpu_worker(worker); return 0; } static int _starpu_cpu_driver_set_devid(struct starpu_driver *driver, struct _starpu_worker *worker) { driver->id.cpu_id = worker->devid; return 0; } static int _starpu_cpu_driver_is_devid(struct starpu_driver *driver, struct _starpu_worker *worker) { return driver->id.cpu_id == worker->devid; } struct _starpu_driver_ops _starpu_driver_cpu_ops = { .init = _starpu_cpu_driver_init, .run = _starpu_cpu_driver_run, .run_once = _starpu_cpu_driver_run_once, .deinit = _starpu_cpu_driver_deinit, .set_devid = _starpu_cpu_driver_set_devid, .is_devid = _starpu_cpu_driver_is_devid, }; #endif /* STARPU_USE_CPU */ static struct _starpu_node_ops _starpu_driver_cpu_node_ops = { .name = "cpu driver", .malloc_on_node = _starpu_cpu_malloc_on_node, .free_on_node = _starpu_cpu_free_on_node, .is_direct_access_supported = _starpu_cpu_is_direct_access_supported, .copy_interface_to[STARPU_CPU_RAM] = _starpu_cpu_copy_interface, .copy_data_to[STARPU_CPU_RAM] = _starpu_cpu_copy_data, .map[STARPU_CPU_RAM] = _starpu_cpu_map, .unmap[STARPU_CPU_RAM] = _starpu_cpu_unmap, .update_map[STARPU_CPU_RAM] = _starpu_cpu_update_map, }; starpu-1.4.9+dfsg/src/drivers/cpu/driver_cpu.h000066400000000000000000000022711507764646700213610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_CPU_H__ #define __DRIVER_CPU_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) void _starpu_cpu_preinit(void); extern struct _starpu_driver_ops _starpu_driver_cpu_ops; /* Reserve one CPU core as busy for starting a driver thread */ void _starpu_cpu_busy_cpu(unsigned num); void _starpu_init_cpu_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config); #pragma GCC visibility pop #endif // __DRIVER_CPU_H__ starpu-1.4.9+dfsg/src/drivers/cuda/000077500000000000000000000000001507764646700171715ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/cuda/driver_cuda.c000066400000000000000000002513041507764646700216310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * Copyright (C) 2010-2010 Mehdi Juhoor * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_CUDA_GL_INTEROP_H #include #endif #ifdef STARPU_HAVE_NVML_H #include #endif #ifdef HAVE_DLOPEN #include #endif #ifdef STARPU_USE_CUDA #include #include #endif #include #include #include #include #include #include #include #ifdef STARPU_SIMGRID #include #endif #if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX #include #endif #ifdef STARPU_USE_CUDA #if CUDART_VERSION >= 5000 /* Avoid letting our streams spuriously synchronize with the NULL stream */ #define starpu_cudaStreamCreate(stream) cudaStreamCreateWithFlags(stream, cudaStreamNonBlocking) #else #define starpu_cudaStreamCreate(stream) cudaStreamCreate(stream) #endif /* At least CUDA 4.2 still didn't have working memcpy3D */ #if CUDART_VERSION < 5000 #define BUGGED_MEMCPY3D #endif #endif /* Consider a rough 10% overhead cost */ #define FREE_MARGIN 0.9 static size_t global_mem[STARPU_MAXCUDADEVS]; #ifdef STARPU_HAVE_NVML_H static nvmlDevice_t nvmlDev[STARPU_MAXCUDADEVS]; __typeof__(nvmlInit) *_starpu_nvmlInit; __typeof__(nvmlDeviceGetNvLinkState) *_starpu_nvmlDeviceGetNvLinkState; __typeof__(nvmlDeviceGetNvLinkRemotePciInfo) *_starpu_nvmlDeviceGetNvLinkRemotePciInfo; __typeof__(nvmlDeviceGetHandleByIndex) *_starpu_nvmlDeviceGetHandleByIndex; __typeof__(nvmlDeviceGetHandleByPciBusId) *_starpu_nvmlDeviceGetHandleByPciBusId; __typeof__(nvmlDeviceGetIndex) *_starpu_nvmlDeviceGetIndex; __typeof__(nvmlDeviceGetPciInfo) *_starpu_nvmlDeviceGetPciInfo; __typeof__(nvmlDeviceGetUUID) *_starpu_nvmlDeviceGetUUID; #if HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION __typeof__(nvmlDeviceGetTotalEnergyConsumption) *_starpu_nvmlDeviceGetTotalEnergyConsumption; #endif #endif int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES]; #ifdef STARPU_USE_CUDA static cudaStream_t streams[STARPU_NMAXWORKERS]; static char used_stream[STARPU_NMAXWORKERS]; /* TODO: ideally we'd have different streams for idle, prefetch and fetch, but apparently CUDA doesn't take priorities into account for transfers anyway? */ static cudaStream_t out_transfer_streams[STARPU_MAXCUDADEVS]; static cudaStream_t in_transfer_streams[STARPU_MAXCUDADEVS]; /* Note: streams are not thread-safe, so we define them for each CUDA worker * emitting a GPU-GPU transfer */ static cudaStream_t in_peer_transfer_streams[STARPU_MAXCUDADEVS][STARPU_MAXCUDADEVS]; static struct cudaDeviceProp props[STARPU_MAXCUDADEVS]; #ifndef STARPU_SIMGRID static cudaEvent_t task_events[STARPU_NMAXWORKERS][STARPU_MAX_PIPELINE]; #endif #endif /* STARPU_USE_CUDA */ #ifdef STARPU_SIMGRID static unsigned task_finished[STARPU_NMAXWORKERS][STARPU_MAX_PIPELINE]; static starpu_pthread_mutex_t cuda_alloc_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; #endif /* STARPU_SIMGRID */ static enum initialization cuda_device_init[STARPU_MAXCUDADEVS]; static int cuda_device_users[STARPU_MAXCUDADEVS]; static starpu_pthread_mutex_t cuda_device_init_mutex[STARPU_MAXCUDADEVS]; static starpu_pthread_cond_t cuda_device_init_cond[STARPU_MAXCUDADEVS]; #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) static struct _starpu_worker_set cuda_worker_set[STARPU_MAXCUDADEVS]; static unsigned cuda_bindid_init[STARPU_MAXCUDADEVS]; static unsigned cuda_bindid[STARPU_MAXCUDADEVS]; static unsigned cuda_memory_init[STARPU_MAXCUDADEVS]; static unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS]; static int cuda_globalbindid; #endif static int _starpu_cuda_peer_access(int devid, int peer_devid); int _starpu_nworker_per_cuda; static size_t _starpu_cuda_get_global_mem_size(unsigned devid) { return global_mem[devid]; } #ifdef STARPU_USE_CUDA static cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned dst_node) { int dst_devid = starpu_memory_node_get_devid(dst_node); cudaStream_t stream; stream = in_transfer_streams[dst_devid]; STARPU_ASSERT(stream); return stream; } static cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned src_node) { int src_devid = starpu_memory_node_get_devid(src_node); cudaStream_t stream; stream = out_transfer_streams[src_devid]; STARPU_ASSERT(stream); return stream; } static cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned dst_node) { int src_devid = starpu_memory_node_get_devid(src_node); int dst_devid = starpu_memory_node_get_devid(dst_node); cudaStream_t stream; stream = in_peer_transfer_streams[src_devid][dst_devid]; STARPU_ASSERT(stream); return stream; } cudaStream_t starpu_cuda_get_local_stream(void) { int worker = starpu_worker_get_id_check(); used_stream[worker] = 1; return streams[worker]; } const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid) { struct _starpu_machine_config *config = _starpu_get_machine_config(); unsigned devid = config->workers[workerid].devid; return &props[devid]; } #endif /* STARPU_USE_CUDA */ /* Early library initialization, before anything else, just initialize data */ void _starpu_cuda_init(void) { unsigned i; for (i = 0; i < STARPU_MAXCUDADEVS; i++) { STARPU_PTHREAD_MUTEX_INIT(&cuda_device_init_mutex[i], NULL); STARPU_PTHREAD_COND_INIT(&cuda_device_init_cond[i], NULL); } memset(&cuda_bindid_init, 0, sizeof(cuda_bindid_init)); memset(&cuda_memory_init, 0, sizeof(cuda_memory_init)); cuda_globalbindid = -1; } /* Return the number of devices usable in the system. * The value returned cannot be greater than MAXCUDADEVS */ unsigned _starpu_get_cuda_device_count(void) { int cnt; #ifdef STARPU_SIMGRID cnt = _starpu_simgrid_get_nbhosts("CUDA"); #else cudaError_t cures; cures = cudaGetDeviceCount(&cnt); if (STARPU_UNLIKELY(cures)) return 0; #endif if (cnt > STARPU_MAXCUDADEVS) { _STARPU_MSG("# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS); cnt = STARPU_MAXCUDADEVS; } return (unsigned)cnt; } /* This is run from initialize to determine the number of CUDA devices */ void _starpu_init_cuda(void) { } /* This is called to really discover the hardware */ void _starpu_cuda_discover_devices(struct _starpu_machine_config *config) { /* Discover the number of CUDA devices. Fill the result in CONFIG. */ #ifdef STARPU_SIMGRID config->topology.nhwdevices[STARPU_CUDA_WORKER] = _starpu_simgrid_get_nbhosts("CUDA"); #else int cnt; cudaError_t cures; cures = cudaGetDeviceCount(&cnt); if (STARPU_UNLIKELY(cures != cudaSuccess)) cnt = 0; config->topology.nhwdevices[STARPU_CUDA_WORKER] = cnt; #ifdef STARPU_HAVE_NVML_H void *nvml = dlopen("libnvidia-ml.so.1", RTLD_LAZY); if (nvml) { _starpu_nvmlInit = dlsym(nvml, "nvmlInit_v2"); if (!_starpu_nvmlInit) _starpu_nvmlInit = dlsym(nvml, "nvmlInit"); } if (_starpu_nvmlInit) { _starpu_nvmlDeviceGetNvLinkState = dlsym(nvml, "nvmlDeviceGetNvLinkState"); _starpu_nvmlDeviceGetHandleByIndex = dlsym(nvml, "nvmlDeviceGetHandleByIndex_v2"); if (!_starpu_nvmlDeviceGetHandleByIndex) _starpu_nvmlDeviceGetHandleByIndex = dlsym(nvml, "nvmlDeviceGetHandleByIndex"); _starpu_nvmlDeviceGetNvLinkRemotePciInfo = dlsym(nvml, "nvmlDeviceGetNvLinkRemotePciInfo_v2"); if (!_starpu_nvmlDeviceGetNvLinkRemotePciInfo) _starpu_nvmlDeviceGetNvLinkRemotePciInfo = dlsym(nvml, "nvmlDeviceGetNvLinkRemotePciInfo"); _starpu_nvmlDeviceGetHandleByPciBusId = dlsym(nvml, "nvmlDeviceGetHandleByPciBusId_v2"); if (!_starpu_nvmlDeviceGetHandleByPciBusId) _starpu_nvmlDeviceGetHandleByPciBusId = dlsym(nvml, "nvmlDeviceGetHandleByPciBusId"); _starpu_nvmlDeviceGetIndex = dlsym(nvml, "nvmlDeviceGetIndex"); _starpu_nvmlDeviceGetPciInfo = dlsym(nvml, "nvmlDeviceGetPciInfo_v3"); if (!_starpu_nvmlDeviceGetPciInfo) _starpu_nvmlDeviceGetPciInfo = dlsym(nvml, "nvmlDeviceGetPciInfo_v2"); if (!_starpu_nvmlDeviceGetPciInfo) _starpu_nvmlDeviceGetPciInfo = dlsym(nvml, "nvmlDeviceGetPciInfo"); _starpu_nvmlDeviceGetUUID = dlsym(nvml, "nvmlDeviceGetUUID"); #if HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION _starpu_nvmlDeviceGetTotalEnergyConsumption = dlsym(nvml, "nvmlDeviceGetTotalEnergyConsumption"); #endif _starpu_nvmlInit(); } #endif #endif } #ifdef STARPU_HAVE_HWLOC #ifdef STARPU_HAVE_NVML_H static int _starpu_cuda_direct_link(struct _starpu_machine_config *config, unsigned devid1, unsigned devid2) { unsigned i; struct cudaDeviceProp props_dev1; struct cudaDeviceProp props_dev2; cudaError_t cures; int nvswitch = 0; if (!_starpu_nvmlDeviceGetNvLinkState || !_starpu_nvmlDeviceGetNvLinkRemotePciInfo) return 0; cures = cudaGetDeviceProperties(&props_dev1, devid1); if (cures != cudaSuccess) return 0; cures = cudaGetDeviceProperties(&props_dev2, devid2); if (cures != cudaSuccess) return 0; nvmlDevice_t nvml_dev1 = _starpu_cuda_get_nvmldev(&props_dev1); if (!nvml_dev1) return 0; for (i = 0; i < NVML_NVLINK_MAX_LINKS; i++) { nvmlEnableState_t active; nvmlReturn_t ret; ret = _starpu_nvmlDeviceGetNvLinkState(nvml_dev1, i, &active); if (ret == NVML_ERROR_NOT_SUPPORTED) continue; if (active != NVML_FEATURE_ENABLED) continue; nvmlPciInfo_t pci; _starpu_nvmlDeviceGetNvLinkRemotePciInfo(nvml_dev1, i, &pci); hwloc_obj_t obj = hwloc_get_pcidev_by_busid(config->topology.hwtopology, pci.domain, pci.bus, pci.device, 0); if (obj && obj->type == HWLOC_OBJ_PCI_DEVICE && (obj->attr->pcidev.class_id >> 8 == 0x06) && (obj->attr->pcidev.vendor_id == 0x10de)) { /* This is an NVIDIA PCI bridge, i.e. an NVSwitch */ /* NVSwitch */ nvswitch = 1; break; } if ((int) pci.domain == props_dev2.pciDomainID && (int) pci.bus == props_dev2.pciBusID && (int) pci.device == props_dev2.pciDeviceID) /* We have a direct NVLink! */ return 1; } if (!nvswitch) { /* No direct NVLink or NVSwitch found for dev1 */ return 0; } nvmlDevice_t nvml_dev2 = _starpu_cuda_get_nvmldev(&props_dev2); if (!nvml_dev2) return 0; for (i = 0; i < NVML_NVLINK_MAX_LINKS; i++) { nvmlEnableState_t active; nvmlReturn_t ret; ret = _starpu_nvmlDeviceGetNvLinkState(nvml_dev2, i, &active); if (ret == NVML_ERROR_NOT_SUPPORTED) continue; if (active != NVML_FEATURE_ENABLED) continue; nvmlPciInfo_t pci; _starpu_nvmlDeviceGetNvLinkRemotePciInfo(nvml_dev2, i, &pci); hwloc_obj_t obj = hwloc_get_pcidev_by_busid(config->topology.hwtopology, pci.domain, pci.bus, pci.device, 0); if (obj && obj->type == HWLOC_OBJ_PCI_DEVICE && (obj->attr->pcidev.class_id >> 8 == 0x06) && (obj->attr->pcidev.vendor_id == 0x10de)) { /* This is an NVIDIA PCI bridge, i.e. an NVSwitch */ /* NVSwitch */ /* TODO: follow answers to https://forums.developer.nvidia.com/t/how-to-distinguish-different-nvswitch/241983 */ return 1; } } /* No NVSwitch found for dev2 */ return 0; } #endif #endif static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config) { struct _starpu_machine_topology *topology = &config->topology; struct starpu_conf *uconf = &config->conf; _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_cuda_gpuid == 0 ? NULL : (int *)uconf->workers_cuda_gpuid, &(config->current_devid[STARPU_CUDA_WORKER]), (int *)topology->workers_devid[STARPU_CUDA_WORKER], "STARPU_WORKERS_CUDAID", topology->nhwdevices[STARPU_CUDA_WORKER], STARPU_CUDA_WORKER); _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_CUDA_WORKER]); } /* Determine which devices we will use */ void _starpu_init_cuda_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) { int i; for (i = 0; i < (int) (sizeof(cuda_worker_set)/sizeof(cuda_worker_set[0])); i++) cuda_worker_set[i].workers = NULL; int ncuda = config->conf.ncuda; if (ncuda != 0) { /* The user did not disable CUDA. We need to * initialize CUDA early to count the number of * devices */ _starpu_init_cuda(); int nb_devices = _starpu_get_cuda_device_count(); _starpu_topology_check_ndevices(&ncuda, nb_devices, 0, STARPU_MAXCUDADEVS, 0, "ncuda", "CUDA", "maxcudadev"); } int nworker_per_cuda = starpu_getenv_number_default("STARPU_NWORKER_PER_CUDA", 1); STARPU_ASSERT_MSG(nworker_per_cuda > 0, "STARPU_NWORKER_PER_CUDA has to be > 0"); STARPU_ASSERT_MSG_ALWAYS(nworker_per_cuda < STARPU_NMAXWORKERS, "STARPU_NWORKER_PER_CUDA (%d) cannot be higher than STARPU_NMAXWORKERS (%d)\n", nworker_per_cuda, STARPU_NMAXWORKERS); #ifndef STARPU_NON_BLOCKING_DRIVERS if (nworker_per_cuda > 1) { _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_CUDA to 1 because blocking drivers are enabled\n"); nworker_per_cuda = 1; } _starpu_nworker_per_cuda = nworker_per_cuda; #endif /* Now we know how many CUDA devices will be used */ topology->ndevices[STARPU_CUDA_WORKER] = ncuda; _starpu_initialize_workers_cuda_gpuid(config); /* allow having one worker per stream */ topology->cuda_th_per_stream = starpu_getenv_number_default("STARPU_CUDA_THREAD_PER_WORKER", -1); topology->cuda_th_per_dev = starpu_getenv_number_default("STARPU_CUDA_THREAD_PER_DEV", -1); STARPU_ASSERT_MSG(!(topology->cuda_th_per_stream == 1 && topology->cuda_th_per_dev != -1), "It does not make sense to set both STARPU_CUDA_THREAD_PER_WORKER to 1 and to set STARPU_CUDA_THREAD_PER_DEV, please choose either per worker or per device or none"); /* per device by default */ if (topology->cuda_th_per_dev == -1) { if (topology->cuda_th_per_stream == 1) topology->cuda_th_per_dev = 0; else topology->cuda_th_per_dev = 1; } /* Not per stream by default */ if (topology->cuda_th_per_stream == -1) { topology->cuda_th_per_stream = 0; } if (!topology->cuda_th_per_dev) { cuda_worker_set[0].workers = &config->workers[topology->nworkers]; cuda_worker_set[0].nworkers = ncuda * nworker_per_cuda; } unsigned cudagpu; for (cudagpu = 0; (int) cudagpu < ncuda; cudagpu++) { int devid = _starpu_get_next_devid(topology, config, STARPU_CUDA_WORKER); if (devid == -1) { // There is no more devices left topology->ndevices[STARPU_CUDA_WORKER] = cudagpu; break; } struct _starpu_worker_set *worker_set; if(topology->cuda_th_per_stream) { worker_set = ALLOC_WORKER_SET; } else if (topology->cuda_th_per_dev) { worker_set = &cuda_worker_set[devid]; worker_set->workers = &config->workers[topology->nworkers]; worker_set->nworkers = nworker_per_cuda; } else { /* Same worker set for all devices */ worker_set = &cuda_worker_set[0]; } _starpu_topology_configure_workers(topology, config, STARPU_CUDA_WORKER, cudagpu, devid, 0, 0, nworker_per_cuda, // TODO: fix perfmodels etc. // nworker_per_cuda - 1, 1, worker_set, NULL); _starpu_devices_gpu_set_used(devid); /* TODO: move this to generic place */ #ifdef STARPU_HAVE_HWLOC { hwloc_obj_t obj = NULL; if (starpu_driver_info[STARPU_CUDA_WORKER].get_hwloc_obj) obj = starpu_driver_info[STARPU_CUDA_WORKER].get_hwloc_obj(topology->hwtopology, devid); if (obj) { struct _starpu_hwloc_userdata *data = obj->userdata; data->ngpus++; } else { _STARPU_DISP("Warning: could not find location of CUDA%u, do you have the hwloc CUDA plugin installed?\n", devid); } } #endif } } /* Bind the driver on a CPU core */ void _starpu_cuda_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { /* Perhaps the worker has some "favourite" bindings (logical core) */ unsigned preferred_binding[STARPU_NMAXWORKERS]; unsigned npreferred = 0; unsigned devid = workerarg->devid; #ifndef STARPU_SIMGRID if (_starpu_may_bind_automatically[STARPU_CUDA_WORKER]) { /* StarPU is allowed to bind threads automatically */ unsigned *preferred_numa_binding = _starpu_get_cuda_affinity_vector(devid); unsigned npreferred_numa = _starpu_topology_get_nhwnumanodes(config); npreferred = _starpu_topology_get_numa_core_binding(config, preferred_numa_binding, npreferred_numa, preferred_binding, STARPU_NMAXWORKERS); } #endif /* SIMGRID */ if (cuda_bindid_init[devid]) { if (config->topology.cuda_th_per_stream == 0) workerarg->bindid = cuda_bindid[devid]; else workerarg->bindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); } else { cuda_bindid_init[devid] = 1; if (config->topology.cuda_th_per_dev == 0 && config->topology.cuda_th_per_stream == 0) { if (cuda_globalbindid == -1) cuda_globalbindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); workerarg->bindid = cuda_bindid[devid] = cuda_globalbindid; } else workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); } } /* Set up memory and buses */ void _starpu_cuda_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned memory_node = -1; unsigned devid = workerarg->devid; unsigned numa; if (cuda_memory_init[devid]) { memory_node = cuda_memory_nodes[devid]; } else { cuda_memory_init[devid] = 1; memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid); #ifdef STARPU_USE_CUDA_MAP /* TODO: check node capabilities */ _starpu_memory_node_set_mapped(memory_node); #endif for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) { _starpu_cuda_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node); _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa); } #ifdef STARPU_SIMGRID const char* cuda_memcpy_peer; char name[16]; snprintf(name, sizeof(name), "CUDA%u", devid); starpu_sg_host_t host = _starpu_simgrid_get_host_by_name(name); STARPU_ASSERT(host); _starpu_simgrid_memory_node_set_host(memory_node, host); # ifdef STARPU_HAVE_SIMGRID_ACTOR_H cuda_memcpy_peer = sg_host_get_property_value(host, "memcpy_peer"); # else cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer"); # endif #endif /* SIMGRID */ if ( #ifdef STARPU_SIMGRID cuda_memcpy_peer && atoll(cuda_memcpy_peer) #elif defined(STARPU_HAVE_CUDA_MEMCPY_PEER) 1 #else /* MEMCPY_PEER */ 0 #endif /* MEMCPY_PEER */ ) { int worker2; for (worker2 = 0; worker2 < workerarg->workerid; worker2++) { struct _starpu_worker *workerarg2 = &config->workers[worker2]; int devid2 = workerarg2->devid; if (workerarg2->arch == STARPU_CUDA_WORKER) { unsigned memory_node2 = starpu_worker_get_memory_node(worker2); int bus21 = _starpu_register_bus(memory_node2, memory_node); int bus12 = _starpu_register_bus(memory_node, memory_node2); if (bus21 < 0 || bus12 < 0) /* Already registered because of e.g. several workers per CUDA */ continue; _starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES] = bus21; _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES] = bus12; #ifndef STARPU_SIMGRID #ifdef STARPU_HAVE_HWLOC #ifdef STARPU_HAVE_NVML_H if (_starpu_cuda_direct_link(config, devid, devid2)) { starpu_bus_set_ngpus(bus21, 1); starpu_bus_set_ngpus(bus12, 1); } else #endif #endif { #if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX hwloc_obj_t obj, obj2, ancestor; obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid); obj2 = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid2); ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2); if (ancestor) { struct _starpu_hwloc_userdata *data = ancestor->userdata; #ifdef STARPU_VERBOSE { char name[64]; hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0); _STARPU_DEBUG("CUDA%u and CUDA%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus); } #endif starpu_bus_set_ngpus(bus21, data->ngpus); starpu_bus_set_ngpus(bus12, data->ngpus); } #endif } #endif } } } } _starpu_memory_node_add_nworkers(memory_node); //This worker can also manage transfers on NUMA nodes for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa); _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node); workerarg->memory_node = memory_node; } /* Set the current CUDA device */ void starpu_cuda_set_device(unsigned devid STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_SIMGRID STARPU_ABORT(); #else cudaError_t cures; struct starpu_conf *conf = &_starpu_get_machine_config()->conf; #if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && defined(HAVE_CUDA_GL_INTEROP_H) unsigned i; #endif #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (conf->n_cuda_opengl_interoperability) { _STARPU_MSG("OpenGL interoperability was requested, but StarPU was built with multithread GPU control support, please reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n"); STARPU_ABORT(); } #elif !defined(HAVE_CUDA_GL_INTEROP_H) if (conf->n_cuda_opengl_interoperability) { _STARPU_MSG("OpenGL interoperability was requested, but cuda_gl_interop.h could not be compiled, please make sure that OpenGL headers were available before ./configure run.\n"); STARPU_ABORT(); } #else for (i = 0; i < conf->n_cuda_opengl_interoperability; i++) { if (conf->cuda_opengl_interoperability[i] == devid) { cures = cudaGLSetGLDevice(devid); goto done; } } #endif cures = cudaSetDevice(devid); #if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && defined(HAVE_CUDA_GL_INTEROP_H) done: #endif #ifdef STARPU_OPENMP /* When StarPU is used as Open Runtime support, * starpu_omp_shutdown() will usually be called from a * destructor, in which case cudaThreadExit() reports a * cudaErrorCudartUnloading here. There should not * be any remaining tasks running at this point so * we can probably ignore it without much consequences. */ if (STARPU_UNLIKELY(cures && cures != cudaErrorCudartUnloading)) STARPU_CUDA_REPORT_ERROR(cures); #else if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); #endif /* STARPU_OPENMP */ #endif } /* In case we want to cap the amount of memory available on the GPUs by the * mean of the STARPU_LIMIT_CUDA_MEM, we decrease the value of * global_mem[devid] which is the value returned by * _starpu_cuda_get_global_mem_size() to indicate how much memory can * be allocated on the device */ static void _starpu_cuda_limit_gpu_mem_if_needed(unsigned devid) { starpu_ssize_t limit; size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; #ifdef STARPU_SIMGRID totalGlobalMem = _starpu_simgrid_get_memsize("CUDA", devid); #elif defined(STARPU_USE_CUDA) /* Find the size of the memory on the device */ totalGlobalMem = props[devid].totalGlobalMem; #endif limit = starpu_getenv_number("STARPU_LIMIT_CUDA_MEM"); if (limit == -1) { char name[30]; snprintf(name, sizeof(name), "STARPU_LIMIT_CUDA_%u_MEM", devid); limit = starpu_getenv_number(name); } #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) if (limit == -1) { limit = totalGlobalMem / (1024*1024) * FREE_MARGIN; } #endif global_mem[devid] = limit * 1024*1024; #ifdef STARPU_USE_CUDA /* How much memory to waste ? */ to_waste = totalGlobalMem - global_mem[devid]; props[devid].totalGlobalMem -= to_waste; #endif /* STARPU_USE_CUDA */ _STARPU_DEBUG("CUDA device %u: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n", devid, (long) to_waste/(1024*1024), (long) limit, (long) totalGlobalMem/(1024*1024), (long) (totalGlobalMem - to_waste)/(1024*1024)); } /* Really initialize one device */ static void init_device_context(unsigned devid, unsigned memnode) { STARPU_ASSERT(devid < STARPU_MAXCUDADEVS); #ifndef STARPU_SIMGRID cudaError_t cures; /* TODO: cudaSetDeviceFlag(cudaDeviceMapHost) */ starpu_cuda_set_device(devid); #endif /* !STARPU_SIMGRID */ STARPU_PTHREAD_MUTEX_LOCK(&cuda_device_init_mutex[devid]); cuda_device_users[devid]++; if (cuda_device_init[devid] == UNINITIALIZED) /* Nobody started initialization yet, do it */ cuda_device_init[devid] = CHANGING; else { /* Somebody else is doing initialization, wait for it */ while (cuda_device_init[devid] != INITIALIZED) STARPU_PTHREAD_COND_WAIT(&cuda_device_init_cond[devid], &cuda_device_init_mutex[devid]); STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); return; } STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); #ifndef STARPU_SIMGRID #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (starpu_getenv_number("STARPU_ENABLE_CUDA_GPU_GPU_DIRECT") != 0) { int nworkers = starpu_worker_get_count(); int workerid; for (workerid = 0; workerid < nworkers; workerid++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0 && worker->devid != devid) { int can; cures = cudaDeviceCanAccessPeer(&can, devid, worker->devid); (void) cudaGetLastError(); if (!cures && can) { cures = cudaDeviceEnablePeerAccess(worker->devid, 0); (void) cudaGetLastError(); if (!cures) { _STARPU_DEBUG("Enabled GPU-Direct %d -> %d\n", worker->devid, devid); /* direct copies are made from the destination, see link_supports_direct_transfers */ starpu_bus_set_direct(_starpu_cuda_bus_ids[worker->devid+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES], 1); } } } } } #endif /* force CUDA to initialize the context for real */ cures = cudaFree(0); if (STARPU_UNLIKELY(cures)) { if (cures == cudaErrorDevicesUnavailable) { _STARPU_MSG("All CUDA-capable devices are busy or unavailable\n"); exit(77); } STARPU_CUDA_REPORT_ERROR(cures); } cures = cudaGetDeviceProperties(&props[devid], devid); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (props[devid].computeMode == cudaComputeModeExclusive) { _STARPU_MSG("CUDA is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n"); STARPU_ABORT(); } #endif cures = starpu_cudaStreamCreate(&in_transfer_streams[devid]); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); cures = starpu_cudaStreamCreate(&out_transfer_streams[devid]); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); int nworkers = starpu_worker_get_count(); int workerid; for (workerid = 0; workerid < nworkers; workerid++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0) { cures = starpu_cudaStreamCreate(&in_peer_transfer_streams[worker->devid][devid]); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } } #endif /* !STARPU_SIMGRID */ STARPU_PTHREAD_MUTEX_LOCK(&cuda_device_init_mutex[devid]); cuda_device_init[devid] = INITIALIZED; STARPU_PTHREAD_COND_BROADCAST(&cuda_device_init_cond[devid]); STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); _starpu_cuda_limit_gpu_mem_if_needed(devid); _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cuda_get_global_mem_size(devid)); } /* De-initialize one device */ static void deinit_device_context(unsigned devid STARPU_ATTRIBUTE_UNUSED) { #ifndef STARPU_SIMGRID starpu_cuda_set_device(devid); cudaStreamDestroy(in_transfer_streams[devid]); cudaStreamDestroy(out_transfer_streams[devid]); int nworkers = starpu_worker_get_count(); int workerid; for (workerid = 0; workerid < nworkers; workerid++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0) { cudaStreamDestroy(in_peer_transfer_streams[worker->devid][devid]); } } #endif /* !STARPU_SIMGRID */ } static void init_worker_context(unsigned workerid, unsigned devid STARPU_ATTRIBUTE_UNUSED) { int j; #ifdef STARPU_SIMGRID for (j = 0; j < STARPU_MAX_PIPELINE; j++) task_finished[workerid][j] = 0; #else /* !STARPU_SIMGRID */ cudaError_t cures; starpu_cuda_set_device(devid); for (j = 0; j < STARPU_MAX_PIPELINE; j++) { cures = cudaEventCreateWithFlags(&task_events[workerid][j], cudaEventDisableTiming); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } cures = starpu_cudaStreamCreate(&streams[workerid]); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); #endif /* !STARPU_SIMGRID */ } static void deinit_worker_context(unsigned workerid, unsigned devid STARPU_ATTRIBUTE_UNUSED) { unsigned j; #ifdef STARPU_SIMGRID for (j = 0; j < STARPU_MAX_PIPELINE; j++) task_finished[workerid][j] = 0; #else /* STARPU_SIMGRID */ starpu_cuda_set_device(devid); for (j = 0; j < STARPU_MAX_PIPELINE; j++) cudaEventDestroy(task_events[workerid][j]); cudaStreamDestroy(streams[workerid]); #endif /* STARPU_SIMGRID */ } #ifdef STARPU_HAVE_NVML_H nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *dev_props) { char busid[13]; nvmlDevice_t ret; snprintf(busid, sizeof(busid), "%04x:%02x:%02x.0", dev_props->pciDomainID, dev_props->pciBusID, dev_props->pciDeviceID); if (!_starpu_nvmlDeviceGetHandleByPciBusId || _starpu_nvmlDeviceGetHandleByPciBusId(busid, &ret) != NVML_SUCCESS) ret = NULL; return ret; } nvmlDevice_t starpu_cuda_get_nvmldev(unsigned devid) { return nvmlDev[devid]; } #endif /* This is run from the driver thread to initialize the driver CUDA context */ static int _starpu_cuda_driver_init(struct _starpu_worker *worker) { struct _starpu_worker_set *worker_set = worker->set; struct _starpu_worker *worker0 = &worker_set->workers[0]; int lastdevid = -1; unsigned i; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif _starpu_driver_start(worker0, STARPU_CUDA_WORKER, 0); _starpu_set_local_worker_set_key(worker_set); #ifdef STARPU_USE_FXT for (i = 1; i < worker_set->nworkers; i++) _starpu_worker_start(&worker_set->workers[i], STARPU_CUDA_WORKER, 0); #endif for (i = 0; i < worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned devid = worker->devid; unsigned memnode = worker->memory_node; #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); #endif if ((int) devid == lastdevid) { #ifdef STARPU_SIMGRID STARPU_ASSERT_MSG(0, "Simgrid mode does not support concurrent kernel execution yet\n"); #endif /* !STARPU_SIMGRID */ /* Already initialized */ continue; } lastdevid = devid; init_device_context(devid, memnode); #ifndef STARPU_SIMGRID if (worker->config->topology.nworker[STARPU_CUDA_WORKER][devid] > 1 && props[devid].concurrentKernels == 0) _STARPU_DISP("Warning: STARPU_NWORKER_PER_CUDA is %u, but CUDA device %u does not support concurrent kernel execution!\n", worker_set->nworkers, devid); #endif /* !STARPU_SIMGRID */ } /* one more time to avoid hacks from third party lib :) */ _starpu_bind_thread_on_cpu(worker0->bindid, worker0->workerid, NULL); for (i = 0; i < worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned devid = worker->devid; unsigned workerid = worker->workerid; unsigned subdev = worker->subworkerid; float size = (float) global_mem[devid] / (1<<30); #ifdef STARPU_SIMGRID const char *devname = _starpu_simgrid_get_devname("CUDA", devid); if (!devname) devname = "Simgrid"; #else /* get the device's name */ char devname[64]; strncpy(devname, props[devid].name, 63); devname[63] = 0; #endif #if defined(STARPU_HAVE_BUSID) && !defined(STARPU_SIMGRID) #if defined(STARPU_HAVE_DOMAINID) && !defined(STARPU_SIMGRID) #ifdef STARPU_HAVE_NVML_H nvmlDev[devid] = _starpu_cuda_get_nvmldev(&props[devid]); #endif if (props[devid].pciDomainID) snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %04x:%02x:%02x.0)", devid, subdev, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID); else #endif snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB %02x:%02x.0)", devid, subdev, devname, size, props[devid].pciBusID, props[devid].pciDeviceID); #else snprintf(worker->name, sizeof(worker->name), "CUDA %u.%u (%s %.1f GiB)", devid, subdev, devname, size); #endif snprintf(worker->short_name, sizeof(worker->short_name), "CUDA %u.%u", devid, subdev); _STARPU_DEBUG("cuda (%s) dev id %u worker %u thread is ready to run on CPU %d !\n", devname, devid, subdev, worker->bindid); worker->pipeline_length = starpu_getenv_number_default("STARPU_CUDA_PIPELINE", 2); if (worker->pipeline_length > STARPU_MAX_PIPELINE) { _STARPU_DISP("Warning: STARPU_CUDA_PIPELINE is %u, but STARPU_MAX_PIPELINE is only %u\n", worker->pipeline_length, STARPU_MAX_PIPELINE); worker->pipeline_length = STARPU_MAX_PIPELINE; } #if !defined(STARPU_SIMGRID) && !defined(STARPU_NON_BLOCKING_DRIVERS) if (worker->pipeline_length >= 1) { /* We need non-blocking drivers, to poll for CUDA task * termination */ _STARPU_DISP("Warning: reducing STARPU_CUDA_PIPELINE to 0 because blocking drivers are enabled (and simgrid is not enabled)\n"); worker->pipeline_length = 0; } #endif init_worker_context(workerid, worker->devid); _STARPU_TRACE_WORKER_INIT_END(workerid); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, worker->workerid, starpu_prof_tool_driver_gpu, 0, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); #endif } { char thread_name[16]; snprintf(thread_name, sizeof(thread_name), "CUDA %u", worker0->devid); starpu_pthread_setname(thread_name); } /* tell the main thread that this one is ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker0->mutex); worker0->status = STATUS_UNKNOWN; worker0->worker_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&worker0->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&worker0->mutex); /* tell the main thread that this one is ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex); worker_set->set_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&worker_set->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex); return 0; } static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) { struct _starpu_worker_set *worker_set = worker->set; int lastdevid = -1; unsigned i; _STARPU_TRACE_WORKER_DEINIT_START; for (i = 0; i < worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned devid = worker->devid; unsigned memnode = worker->memory_node; unsigned usersleft; if ((int) devid == lastdevid) /* Already initialized */ continue; lastdevid = devid; STARPU_PTHREAD_MUTEX_LOCK(&cuda_device_init_mutex[devid]); usersleft = --cuda_device_users[devid]; STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); if (!usersleft) { /* I'm last, deinitialize device */ _starpu_datawizard_handle_all_pending_node_data_requests(memnode); /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); _starpu_malloc_shutdown(memnode); deinit_device_context(devid); } STARPU_PTHREAD_MUTEX_LOCK(&cuda_device_init_mutex[devid]); cuda_device_init[devid] = UNINITIALIZED; STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_device_init_mutex[devid]); } for (i = 0; i < worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned workerid = worker->workerid; unsigned memnode = worker->memory_node; deinit_worker_context(workerid, worker->devid); #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); #endif } worker_set->workers[0].worker_is_initialized = 0; _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER); return 0; } static uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags) { uintptr_t addr = 0; (void) flags; #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) #ifdef STARPU_SIMGRID static uintptr_t last[STARPU_MAXNODES]; #ifdef STARPU_DEVEL #warning TODO: record used memory, using a simgrid property to know the available memory #endif /* Sleep for the allocation */ STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex); if (_starpu_simgrid_cuda_malloc_cost()) starpu_sleep(0.000175); if (!last[dst_node]) last[dst_node] = 1<<10; addr = last[dst_node]; last[dst_node]+=size; STARPU_ASSERT(last[dst_node] >= addr); STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex); #else unsigned devid = starpu_memory_node_get_devid(dst_node); #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) starpu_cuda_set_device(devid); #else struct _starpu_worker *worker = _starpu_get_local_worker_key(); if (!worker || worker->arch != STARPU_CUDA_WORKER || worker->devid != devid) STARPU_ASSERT_MSG(0, "CUDA peer access is not available with this version of CUDA"); #endif /* Check if there is free memory */ size_t cuda_mem_free, cuda_mem_total; cudaError_t status; status = cudaMemGetInfo(&cuda_mem_free, &cuda_mem_total); if (status == cudaSuccess && cuda_mem_free * FREE_MARGIN < size) { addr = 0; } else { status = cudaMalloc((void **)&addr, size); if (!addr || (status != cudaSuccess)) { if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation)) STARPU_CUDA_REPORT_ERROR(status); addr = 0; } } #endif #endif return addr; } static void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) { (void) dst_node; (void) addr; (void) size; (void) flags; #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) #ifdef STARPU_SIMGRID STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex); /* Sleep for the free */ if (_starpu_simgrid_cuda_malloc_cost()) starpu_sleep(0.000750); STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex); /* CUDA also synchronizes roughly everything on cudaFree */ _starpu_simgrid_sync_gpus(); #else cudaError_t err; unsigned devid = starpu_memory_node_get_devid(dst_node); #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) starpu_cuda_set_device(devid); #else struct _starpu_worker *worker = _starpu_get_local_worker_key(); if (!worker || worker->arch != STARPU_CUDA_WORKER || worker->devid != devid) STARPU_ASSERT_MSG(0, "CUDA peer access is not available with this version of CUDA"); #endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ err = cudaFree((void*)addr); #ifdef STARPU_OPENMP /* When StarPU is used as Open Runtime support, * starpu_omp_shutdown() will usually be called from a * destructor, in which case cudaThreadExit() reports a * cudaErrorCudartUnloading here. There should not * be any remaining tasks running at this point so * we can probably ignore it without much consequences. */ if (STARPU_UNLIKELY(err != cudaSuccess && err != cudaErrorCudartUnloading)) STARPU_CUDA_REPORT_ERROR(err); #else if (STARPU_UNLIKELY(err != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(err); #endif /* STARPU_OPENMP */ #endif /* STARPU_SIMGRID */ #endif } #ifdef STARPU_USE_CUDA int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER int peer_copy = 0; int src_dev = -1, dst_dev = -1; #endif cudaError_t cures = 0; if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER peer_copy = 1; src_dev = starpu_memory_node_get_devid(src_node); dst_dev = starpu_memory_node_get_devid(dst_node); #else STARPU_ABORT(); #endif } if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (peer_copy) { cures = cudaMemcpyPeerAsync((char *) dst_ptr, dst_dev, (char *) src_ptr, src_dev, ssize, stream); } else #endif { cures = cudaMemcpyAsync((char *)dst_ptr, (char *)src_ptr, ssize, kind, stream); } (void) cudaGetLastError(); starpu_interface_end_driver_copy_async(src_node, dst_node, start); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || cures) { /* do it in a synchronous fashion */ #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (peer_copy) { cures = cudaMemcpyPeer((char *) dst_ptr, dst_dev, (char *) src_ptr, src_dev, ssize); } else #endif { cures = cudaMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind); } (void) cudaGetLastError(); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } return -EAGAIN; } int starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, cudaStream_t stream, enum cudaMemcpyKind kind) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER int peer_copy = 0; int src_dev = -1, dst_dev = -1; #endif cudaError_t cures = 0; if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER # ifdef BUGGED_MEMCPY3D STARPU_ABORT_MSG("CUDA memcpy 3D peer buggy, but core triggered one?!"); # endif peer_copy = 1; src_dev = starpu_memory_node_get_devid(src_node); dst_dev = starpu_memory_node_get_devid(dst_node); #else STARPU_ABORT_MSG("CUDA memcpy 3D peer not available, but core triggered one ?!"); #endif } #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (peer_copy) { struct cudaMemcpy3DPeerParms p; memset(&p, 0, sizeof(p)); p.srcDevice = src_dev; p.dstDevice = dst_dev; p.srcPtr = make_cudaPitchedPtr((char *)src_ptr, ld_src, blocksize, numblocks); p.dstPtr = make_cudaPitchedPtr((char *)dst_ptr, ld_dst, blocksize, numblocks); p.extent = make_cudaExtent(blocksize, numblocks, 1); if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); cures = cudaMemcpy3DPeerAsync(&p, stream); (void) cudaGetLastError(); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || cures) { cures = cudaMemcpy3DPeer(&p); (void) cudaGetLastError(); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } } else #endif { if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); cures = cudaMemcpy2DAsync((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, blocksize, numblocks, kind, stream); starpu_interface_end_driver_copy_async(src_node, dst_node, start); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || cures) { cures = cudaMemcpy2D((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, blocksize, numblocks, kind); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } } return -EAGAIN; } #if 0 /* CUDA doesn't seem to be providing a way to set ld2?? */ int starpu_cuda_copy3d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, cudaStream_t stream, enum cudaMemcpyKind kind) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER int peer_copy = 0; int src_dev = -1, dst_dev = -1; #endif cudaError_t cures = 0; if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER peer_copy = 1; src_dev = starpu_memory_node_get_devid(src_node); dst_dev = starpu_memory_node_get_devid(dst_node); #else STARPU_ABORT_MSG("CUDA memcpy 3D peer not available, but core triggered one ?!"); #endif } #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (peer_copy) { struct cudaMemcpy3DPeerParms p; memset(&p, 0, sizeof(p)); p.srcDevice = src_dev; p.dstDevice = dst_dev; p.srcPtr = make_cudaPitchedPtr((char *)src_ptr, ld1_src, blocksize, numblocks); p.dstPtr = make_cudaPitchedPtr((char *)dst_ptr, ld1_dst, blocksize, numblocks); // FIXME: how to pass ld2_src / ld2_dst ?? p.extent = make_cudaExtent(blocksize, numblocks_1, numblocks_2); if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); cures = cudaMemcpy3DPeerAsync(&p, stream); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || cures) { cures = cudaMemcpy3DPeer(&p); (void) cudaGetLastError(); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } } else #endif { struct cudaMemcpy3DParms p; memset(&p, 0, sizeof(p)); p.srcPtr = make_cudaPitchedPtr((char *)src_ptr, ld1_src, blocksize, numblocks); p.dstPtr = make_cudaPitchedPtr((char *)dst_ptr, ld1_dst, blocksize, numblocks); // FIXME: how to pass ld2_src / ld2_dst ?? p.extent = make_cudaExtent(blocksize, numblocks, 1); p.kind = kind; if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); cures = cudaMemcpy3DAsync(&p, stream); starpu_interface_end_driver_copy_async(src_node, dst_node, start); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || cures) { cures = cudaMemcpy3D(&p); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } } return -EAGAIN; } #endif static inline cudaEvent_t *_starpu_cuda_event(union _starpu_async_channel_event *_event) { cudaEvent_t *event; STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); event = (void *) _event; return event; } static unsigned _starpu_cuda_test_request_completion(struct _starpu_async_channel *async_channel) { cudaEvent_t event; cudaError_t cures; unsigned success; event = *_starpu_cuda_event(&async_channel->event); cures = cudaEventQuery(event); success = (cures == cudaSuccess); if (success) cudaEventDestroy(event); else if (cures != cudaErrorNotReady) STARPU_CUDA_REPORT_ERROR(cures); return success; } /* Only used at starpu_shutdown */ static void _starpu_cuda_wait_request_completion(struct _starpu_async_channel *async_channel) { cudaEvent_t event; cudaError_t cures; event = *_starpu_cuda_event(&async_channel->event); cures = cudaEventSynchronize(event); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); cures = cudaEventDestroy(event); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER static void starpu_cuda_set_copy_device(unsigned src_node, unsigned dst_node) { enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); unsigned devid; if ((src_kind == STARPU_CUDA_RAM) && (dst_kind == STARPU_CUDA_RAM)) { /* GPU-GPU transfer, issue it from the destination */ devid = starpu_memory_node_get_devid(dst_node); } else { unsigned node = (dst_kind == STARPU_CUDA_RAM)?dst_node:src_node; devid = starpu_memory_node_get_devid(node); } starpu_cuda_set_device(devid); } #endif static int _starpu_cuda_copy_interface_from_cuda_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER starpu_cuda_set_copy_device(src_node, dst_node); #else STARPU_ASSERT(src_node == dst_node); #endif int ret = 1; cudaError_t cures; cudaStream_t stream; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* CUDA - CUDA transfer */ if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !(copy_methods->cuda_to_cuda_async || copy_methods->any_to_any)) { STARPU_ASSERT(copy_methods->cuda_to_cuda || copy_methods->any_to_any); /* this is not associated to a request so it's synchronous */ if (copy_methods->cuda_to_cuda) copy_methods->cuda_to_cuda(src_interface, src_node, dst_interface, dst_node); else copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); stream = starpu_cuda_get_peer_transfer_stream(src_node, dst_node); if (copy_methods->cuda_to_cuda_async) ret = copy_methods->cuda_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream); else { STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); } cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); } return ret; } static int _starpu_cuda_copy_interface_from_cuda_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER starpu_cuda_set_copy_device(src_node, dst_node); #endif int ret = 1; cudaError_t cures; cudaStream_t stream; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* only the proper CUBLAS thread can initiate this directly ! */ #if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node); #endif if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !(copy_methods->cuda_to_ram_async || copy_methods->any_to_any)) { /* this is not associated to a request so it's synchronous */ STARPU_ASSERT(copy_methods->cuda_to_ram || copy_methods->any_to_any); if (copy_methods->cuda_to_ram) copy_methods->cuda_to_ram(src_interface, src_node, dst_interface, dst_node); else copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); stream = starpu_cuda_get_out_transfer_stream(src_node); if (copy_methods->cuda_to_ram_async) ret = copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface, dst_node, stream); else { STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); } cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); } return ret; } static int _starpu_cuda_copy_interface_from_cpu_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER starpu_cuda_set_copy_device(src_node, dst_node); #endif int ret = 1; cudaError_t cures; cudaStream_t stream; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* STARPU_CPU_RAM -> CUBLAS_RAM */ /* only the proper CUBLAS thread can initiate this ! */ #if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node); #endif if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !(copy_methods->ram_to_cuda_async || copy_methods->any_to_any)) { /* this is not associated to a request so it's synchronous */ STARPU_ASSERT(copy_methods->ram_to_cuda || copy_methods->any_to_any); if (copy_methods->ram_to_cuda) copy_methods->ram_to_cuda(src_interface, src_node, dst_interface, dst_node); else copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); stream = starpu_cuda_get_in_transfer_stream(dst_node); if (copy_methods->ram_to_cuda_async) ret = copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface, dst_node, stream); else { STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); } cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); } return ret; } static int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL, cudaMemcpyDeviceToHost); } static int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER STARPU_ASSERT(src_node == dst_node); #endif return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL, cudaMemcpyDeviceToDevice); } static int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL, cudaMemcpyHostToDevice); } static int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL, cudaMemcpyDeviceToHost); } static int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER STARPU_ASSERT(src_node == dst_node); #endif return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL, cudaMemcpyDeviceToDevice); } static int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL, cudaMemcpyHostToDevice); } #ifdef STARPU_USE_CUDA_MAP static uintptr_t _starpu_cuda_map_ram(uintptr_t src_ptr STARPU_ATTRIBUTE_UNUSED, size_t src_offset, unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, int *ret STARPU_ATTRIBUTE_UNUSED) { /* TODO */ /* * Old interface: * * cudaHostAllocMapped and cudaHostGetDevicePointer * cudaSetDeviceFlags() must have been called with the cudaDeviceMapHost flag in order * for the cudaHostAllocMapped flag to have any effect. * * * * New interface: Unified Addressing * * Whether or not a device supports unified addressing may be queried * by calling cudaGetDeviceProperties() with the device property * cudaDeviceProp::unifiedAddressing. * Unified addressing is automatically enabled in 64-bit processes. * * Upon enabling direct access from a device that supports unified * addressing to another peer device that supports unified addressing * using cudaDeviceEnablePeerAccess() all memory allocated in the peer * device using cudaMalloc() and cudaMallocPitch() will immediately be * accessible by the current device. */ *ret = -EIO; if (starpu_node_get_kind(src_node) != STARPU_CPU_RAM) return 0; /* * mapping relevant cudaDeviceProps fields: * - .canMapHostMemory: "Can map host memory with cudaHostAlloc/cudaHostGetDevicePointer" * - .unifiedAddressing: "Device shares a unified address space with the host" * - .managedMemory: "Device supports allocating memory that will be automatically managed by the Unified Memory system" * - .pageableMemoryAccess: "Device supports coherently accessing pageable memory without calling cudaHostRegister on it" * - .concurrentManagedAccess: "Device can coherently access managed memory concurrently with the CPU" */ struct _starpu_worker *worker = _starpu_get_local_worker_key(); #ifdef STARPU_HAVE_CUDA_CANMAPHOST const int cuda_canMapHostMemory = props[worker->devid].canMapHostMemory; #else const int cuda_canMapHostMemory = 0; #endif #ifdef STARPU_HAVE_CUDA_UNIFIEDADDR const int cuda_unifiedAddressing = props[worker->devid].unifiedAddressing; #else const int cuda_unifiedAddressing = 0; #endif #ifdef STARPU_HAVE_CUDA_MNGMEM const int cuda_managedMemory = props[worker->devid].managedMemory; #else const int cuda_managedMemory = 0; #endif #ifdef STARPU_HAVE_CUDA_PAGEABLEMEM const int cuda_pageableMemoryAccess = props[worker->devid].pageableMemoryAccess; #else const int cuda_pageableMemoryAccess = 0; #endif uintptr_t dst_addr; if (cuda_pageableMemoryAccess) { dst_addr = (uintptr_t)(src_ptr+src_offset); *ret = 0; } else if (cuda_unifiedAddressing || cuda_managedMemory) { struct cudaPointerAttributes cuda_ptrattr; cudaError_t cures; cures = cudaPointerGetAttributes(&cuda_ptrattr, (void *)(src_ptr+src_offset)); if (STARPU_UNLIKELY(cures != cudaSuccess)) { if (cures == cudaErrorInvalidValue) { cudaGetLastError(); /* pointer does not support mapping */ return (uintptr_t)NULL; } STARPU_CUDA_REPORT_ERROR(cures); } #ifdef STARPU_HAVE_CUDA_POINTER_TYPE if (!(cuda_ptrattr.type == cudaMemoryTypeHost || cuda_ptrattr.type == cudaMemoryTypeManaged)) return 0; #else if (!(cuda_ptrattr.memoryType == cudaMemoryTypeHost #if CUDART_VERSION >= 10000 || cuda_ptrattr.memoryType == cudaMemoryTypeManaged #endif )) return 0; #endif dst_addr = (uintptr_t)cuda_ptrattr.devicePointer; *ret = 0; } else if (cuda_canMapHostMemory) { cudaError_t cures; void *pDevice; cures = cudaHostGetDevicePointer(&pDevice, (void*)(src_ptr+src_offset), 0); if (STARPU_UNLIKELY(cures != cudaSuccess)) { STARPU_CUDA_REPORT_ERROR(cures); } dst_addr = (uintptr_t)pDevice; *ret = 0; } else { dst_addr = (uintptr_t)NULL; } return dst_addr; } static int _starpu_cuda_unmap_ram(uintptr_t src_ptr STARPU_ATTRIBUTE_UNUSED, size_t src_offset STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, uintptr_t dst_ptr STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED) { #if defined(STARPU_HAVE_CUDA_CANMAPHOST) || defined(STARPU_HAVE_CUDA_UNIFIEDADDR) || defined(STARPU_HAVE_CUDA_MNGMEM) /* TODO */ return 0; #else return -EIO; #endif } static int _starpu_cuda_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) { (void) src; (void) src_offset; (void) src_node; (void) dst; (void) dst_offset; (void) dst_node; (void) size; /* CUDA mappings are coherent */ /* FIXME: not necessarily, depends on board capabilities */ return 0; } #endif /* STARPU_USE_CUDA_MAP */ #endif /* STARPU_USE_CUDA */ static int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_node) { /* GPUs not always allow direct remote access: if CUDA4 * is enabled, we allow two CUDA devices to communicate. */ #ifdef STARPU_SIMGRID (void) node; if (starpu_node_get_kind(handling_node) == STARPU_CUDA_RAM) { starpu_sg_host_t host = _starpu_simgrid_get_memnode_host(handling_node); # ifdef STARPU_HAVE_SIMGRID_ACTOR_H const char* cuda_memcpy_peer = sg_host_get_property_value(host, "memcpy_peer"); # else const char* cuda_memcpy_peer = MSG_host_get_property_value(host, "memcpy_peer"); # endif return cuda_memcpy_peer && atoll(cuda_memcpy_peer); } else return 0; #elif defined(STARPU_HAVE_CUDA_MEMCPY_PEER) (void) node; enum starpu_node_kind kind = starpu_node_get_kind(handling_node); return kind == STARPU_CUDA_RAM; #else /* STARPU_HAVE_CUDA_MEMCPY_PEER */ /* Direct GPU-GPU transfers are not allowed in general */ (void) node; (void) handling_node; return 0; #endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ } static void start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker, unsigned char pipeline_idx STARPU_ATTRIBUTE_UNUSED) { STARPU_ASSERT(j); struct starpu_task *task = j->task; int profiling = starpu_profiling_status_get(); #if !defined(STARPU_SIMGRID) && defined(STARPU_PROF_TOOL) struct starpu_prof_tool_info pi; #endif STARPU_ASSERT(task); struct starpu_codelet *cl = task->cl; STARPU_ASSERT(cl); _starpu_set_local_worker_key(worker); _starpu_set_current_task(task); j->workerid = worker->workerid; if (worker->ntasks == 1) { /* We are alone in the pipeline, the kernel will start now, record it */ _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); } #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) /* We make sure we do manipulate the proper device */ starpu_cuda_set_device(worker->devid); #endif starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, j->nimpl); STARPU_ASSERT_MSG(func, "when STARPU_CUDA is defined in 'where', cuda_func or cuda_funcs has to be defined"); if (_starpu_get_disable_kernels() <= 0) { _STARPU_TRACE_START_EXECUTING(j); #ifdef STARPU_SIMGRID int async = task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC; unsigned workerid = worker->workerid; if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE && !async) func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); else if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT && !async) { _SIMGRID_TIMER_BEGIN(1); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); _SIMGRID_TIMER_END; } else { struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); _starpu_simgrid_submit_job(workerid, sched_ctx->id, j, &worker->perf_arch, NAN, NAN, async ? &task_finished[workerid][pipeline_idx] : NULL); } #else #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION unsigned long long energy_start = 0; nvmlReturn_t nvmlRet = -1; if (profiling && _starpu_energy_profiling && task->profiling_info && _starpu_nvmlDeviceGetTotalEnergyConsumption) { nvmlRet = _starpu_nvmlDeviceGetTotalEnergyConsumption(nvmlDev[worker->devid], &energy_start); if (nvmlRet == NVML_SUCCESS) task->profiling_info->energy_consumed = energy_start / 1000.; } #endif #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); #endif func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); #endif #endif _STARPU_TRACE_END_EXECUTING(j); } } static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker); /* Execute a job, up to completion for synchronous jobs */ static void execute_job_on_cuda(struct starpu_task *task, struct _starpu_worker *worker) { int workerid = worker->workerid; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); unsigned char pipeline_idx = (worker->first_task + worker->ntasks - 1)%STARPU_MAX_PIPELINE; start_job_on_cuda(j, worker, pipeline_idx); #ifndef STARPU_SIMGRID if (!used_stream[workerid]) { used_stream[workerid] = 1; _STARPU_DISP("Warning: starpu_cuda_get_local_stream() was not used to submit kernel to CUDA on worker %d. CUDA will thus introduce a lot of useless synchronizations, which will prevent proper overlapping of data transfers and kernel execution. See the CUDA-specific part of the 'Check List When Performance Are Not There' of the StarPU handbook\n", workerid); } #endif if (task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC) { if (worker->pipeline_length == 0) { #ifdef STARPU_SIMGRID _starpu_simgrid_wait_tasks(workerid); #else /* Forced synchronous execution */ cudaStreamSynchronize(starpu_cuda_get_local_stream()); #endif finish_job_on_cuda(j, worker); } else { #ifndef STARPU_SIMGRID /* Record event to synchronize with task termination later */ cudaError_t cures = cudaEventRecord(task_events[workerid][pipeline_idx], starpu_cuda_get_local_stream()); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); #endif } } else /* Synchronous execution */ { #if !defined(STARPU_SIMGRID) STARPU_ASSERT_MSG(cudaStreamQuery(starpu_cuda_get_local_stream()) == cudaSuccess, "Unless when using the STARPU_CUDA_ASYNC flag, CUDA codelets have to wait for termination of their kernels on the starpu_cuda_get_local_stream() stream"); #endif finish_job_on_cuda(j, worker); } } static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) { int profiling = starpu_profiling_status_get(); #ifdef HAVE_NVMLDEVICEGETTOTALENERGYCONSUMPTION if (profiling && _starpu_energy_profiling && j->task->profiling_info && j->task->profiling_info->energy_consumed && _starpu_nvmlDeviceGetTotalEnergyConsumption) { unsigned long long energy_end; nvmlReturn_t nvmlRet; nvmlRet = _starpu_nvmlDeviceGetTotalEnergyConsumption(nvmlDev[worker->devid], &energy_end); #ifdef STARPU_DEVEL #warning TODO: measure idle consumption to subtract it #endif if (nvmlRet == NVML_SUCCESS) j->task->profiling_info->energy_consumed = (energy_end / 1000. - j->task->profiling_info->energy_consumed); } #endif if (worker->pipeline_length) worker->current_tasks[worker->first_task] = NULL; else worker->current_task = NULL; worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE; worker->ntasks--; _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); if(!sched_ctx) sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx); if(!sched_ctx->sched_policy) _starpu_driver_update_job_feedback(j, worker, &sched_ctx->perf_arch, profiling); else _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); _starpu_push_task_output(j); _starpu_set_current_task(NULL); _starpu_handle_job_termination(j); } /* One iteration of the main driver loop */ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) { struct _starpu_worker_set *worker_set = worker->set; struct _starpu_worker *worker0 = &worker_set->workers[0]; struct starpu_task *tasks[worker_set->nworkers], *task; struct _starpu_job *j; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif int i, res; int idle_tasks, idle_transfers; #ifdef STARPU_SIMGRID starpu_pthread_wait_reset(&worker0->wait); #endif _starpu_set_local_worker_key(worker0); /* First poll for completed jobs */ idle_tasks = 0; idle_transfers = 0; for (i = 0; i < (int) worker_set->nworkers; i++) { worker = &worker_set->workers[i]; int workerid = worker->workerid; unsigned memnode = worker->memory_node; if (!worker->ntasks) idle_tasks++; if (!worker->task_transferring) idle_transfers++; if (!worker->ntasks && !worker->task_transferring) { /* Even nothing to test */ continue; } /* First test for transfers pending for next task */ task = worker->task_transferring; if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) { STARPU_RMB(); _STARPU_TRACE_END_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif j = _starpu_get_job_associated_to_task(task); _starpu_set_local_worker_key(worker); _starpu_fetch_task_input_tail(task, j, worker); /* Reset it */ worker->task_transferring = NULL; if (worker->ntasks > 1 && !(task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC)) { /* We have to execute a non-asynchronous task but we * still have tasks in the pipeline... Record it to * prevent more tasks from coming, and do it later */ worker->pipeline_stuck = 1; } else { execute_job_on_cuda(task, worker); } _STARPU_TRACE_START_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, worker->workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif } /* Then test for termination of queued tasks */ if (!worker->ntasks) /* No queued task */ continue; if (worker->pipeline_length) task = worker->current_tasks[worker->first_task]; else task = worker->current_task; if (task == worker->task_transferring) /* Next task is still pending transfer */ continue; /* On-going asynchronous task, check for its termination first */ #ifdef STARPU_SIMGRID if (task_finished[workerid][worker->first_task]) #else /* !STARPU_SIMGRID */ cudaError_t cures = cudaEventQuery(task_events[workerid][worker->first_task]); if (cures != cudaSuccess) { STARPU_ASSERT_MSG(cures == cudaErrorNotReady, "CUDA error on task %p, codelet %p (%s): %s (%d)", task, task->cl, _starpu_codelet_get_model_name(task->cl), cudaGetErrorString(cures), cures); } else #endif /* !STARPU_SIMGRID */ { #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_END_PROGRESS(memnode); /* Asynchronous task completed! */ _starpu_set_local_worker_key(worker); finish_job_on_cuda(_starpu_get_job_associated_to_task(task), worker); /* See next task if any */ if (worker->ntasks) { if (worker->current_tasks[worker->first_task] != worker->task_transferring) { task = worker->current_tasks[worker->first_task]; j = _starpu_get_job_associated_to_task(task); if (task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC) { /* An asynchronous task, it was already * queued, it's now running, record its start time. */ _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, starpu_profiling_status_get()); } else { /* A synchronous task, we have finished * flushing the pipeline, we can now at * last execute it. */ _STARPU_TRACE_EVENT("sync_task"); execute_job_on_cuda(task, worker); _STARPU_TRACE_EVENT("end_sync_task"); worker->pipeline_stuck = 0; } } else /* Data for next task didn't have time to finish transferring :/ */ _STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid); } _STARPU_TRACE_START_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif } if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length) idle_tasks++; } #if defined(STARPU_NON_BLOCKING_DRIVERS) && !defined(STARPU_SIMGRID) if (!idle_tasks) { /* No task ready yet, no better thing to do than waiting */ __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, !idle_transfers); return 0; } #endif /* Something done, make some progress */ res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); /* And pull tasks */ res |= _starpu_get_multi_worker_task(worker_set->workers, tasks, worker_set->nworkers, worker0->memory_node); #ifdef STARPU_SIMGRID if (!res) starpu_pthread_wait_wait(&worker0->wait); #endif for (i = 0; i < (int) worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned memnode STARPU_ATTRIBUTE_UNUSED = worker->memory_node; task = tasks[i]; if (!task) continue; j = _starpu_get_job_associated_to_task(task); /* can CUDA do that task ? */ if (!_STARPU_MAY_PERFORM(j, CUDA)) { /* this is neither a cuda or a cublas task */ _starpu_worker_refuse_task(worker, task); continue; } /* Fetch data asynchronously */ #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_END_PROGRESS(memnode); _starpu_set_local_worker_key(worker); res = _starpu_fetch_task_input(task, j, 1); STARPU_ASSERT(res == 0); _STARPU_TRACE_START_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif // _STARPU_TRACE_END_PROGRESS(memnode); } return 0; } void *_starpu_cuda_worker(void *_arg) { struct _starpu_worker *worker = _arg; struct _starpu_worker_set* worker_set = worker->set; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif unsigned i; _starpu_cuda_driver_init(worker); for (i = 0; i < worker_set->nworkers; i++) { #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_START_PROGRESS(worker_set->workers[i].memory_node); } while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_cuda_driver_run_once(worker); } for (i = 0; i < worker_set->nworkers; i++) { _STARPU_TRACE_END_PROGRESS(worker_set->workers[i].memory_node); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif } _starpu_cuda_driver_deinit(worker); return NULL; } #ifdef STARPU_HAVE_HWLOC hwloc_obj_t _starpu_cuda_get_hwloc_obj(hwloc_topology_t topology, int devid) { #if !defined(STARPU_SIMGRID) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX return hwloc_cuda_get_device_osdev_by_index(topology, devid); #else return NULL; #endif } #endif #ifdef STARPU_USE_CUDA void starpu_cublas_report_error(const char *func, const char *file, int line, int status) { char *errormsg; switch (status) { case CUBLAS_STATUS_SUCCESS: errormsg = "success"; break; case CUBLAS_STATUS_NOT_INITIALIZED: errormsg = "not initialized"; break; case CUBLAS_STATUS_ALLOC_FAILED: errormsg = "alloc failed"; break; case CUBLAS_STATUS_INVALID_VALUE: errormsg = "invalid value"; break; case CUBLAS_STATUS_ARCH_MISMATCH: errormsg = "arch mismatch"; break; case CUBLAS_STATUS_EXECUTION_FAILED: errormsg = "execution failed"; break; case CUBLAS_STATUS_INTERNAL_ERROR: errormsg = "internal error"; break; default: errormsg = "unknown error"; break; } _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); STARPU_ABORT(); } void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status) { const char *errormsg = cudaGetErrorString(status); _STARPU_ERROR("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); } #ifdef STARPU_HAVE_LIBCUSOLVER void starpu_cusolver_report_error(const char *func, const char *file, int line, cusolverStatus_t status) { #define REPORT(error) case error: errormsg = #error; break; char *errormsg; switch (status) { REPORT(CUSOLVER_STATUS_SUCCESS); REPORT(CUSOLVER_STATUS_NOT_INITIALIZED); REPORT(CUSOLVER_STATUS_ALLOC_FAILED); REPORT(CUSOLVER_STATUS_INVALID_VALUE); REPORT(CUSOLVER_STATUS_ARCH_MISMATCH); REPORT(CUSOLVER_STATUS_MAPPING_ERROR); REPORT(CUSOLVER_STATUS_EXECUTION_FAILED); REPORT(CUSOLVER_STATUS_INTERNAL_ERROR); REPORT(CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED); REPORT(CUSOLVER_STATUS_NOT_SUPPORTED); REPORT(CUSOLVER_STATUS_ZERO_PIVOT); REPORT(CUSOLVER_STATUS_INVALID_LICENSE); #if defined(CUSOLVER_VER_MAJOR) && (CUSOLVER_VER_MAJOR >= 11) REPORT(CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER); REPORT(CUSOLVER_STATUS_IRS_INTERNAL_ERROR); REPORT(CUSOLVER_STATUS_IRS_NOT_SUPPORTED); REPORT(CUSOLVER_STATUS_IRS_OUT_OF_RANGE); REPORT(CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES); REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED); REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED); REPORT(CUSOLVER_STATUS_IRS_MATRIX_SINGULAR); REPORT(CUSOLVER_STATUS_INVALID_WORKSPACE); #endif default: errormsg = "unknown error"; break; } _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); STARPU_ABORT(); } #endif #endif /* STARPU_USE_CUDA */ static int _starpu_cuda_run_from_worker(struct _starpu_worker *worker) { /* Let's go ! */ _starpu_cuda_worker(worker); return 0; } static int _starpu_cuda_driver_set_devid(struct starpu_driver *driver, struct _starpu_worker *worker) { driver->id.cuda_id = worker->devid; return 0; } static int _starpu_cuda_driver_is_devid(struct starpu_driver *driver, struct _starpu_worker *worker) { return driver->id.cuda_id == worker->devid; } struct _starpu_driver_ops _starpu_driver_cuda_ops = { .init = _starpu_cuda_driver_init, .run = _starpu_cuda_run_from_worker, .run_once = _starpu_cuda_driver_run_once, .deinit = _starpu_cuda_driver_deinit, .set_devid = _starpu_cuda_driver_set_devid, .is_devid = _starpu_cuda_driver_is_devid, }; struct _starpu_node_ops _starpu_driver_cuda_node_ops = { .name = "cuda driver", .malloc_on_node = _starpu_cuda_malloc_on_node, .free_on_node = _starpu_cuda_free_on_node, .is_direct_access_supported = _starpu_cuda_is_direct_access_supported, #ifndef STARPU_SIMGRID .copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cpu, .copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda, .copy_interface_from[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cpu_to_cuda, .copy_interface_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda, .copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu, .copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, .copy_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cpu_to_cuda, .copy_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, .copy2d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cpu, .copy2d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, .copy2d_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cpu_to_cuda, .copy2d_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, #ifdef STARPU_USE_CUDA_MAP .map[STARPU_CPU_RAM] = _starpu_cuda_map_ram, .unmap[STARPU_CPU_RAM] = _starpu_cuda_unmap_ram, .update_map[STARPU_CPU_RAM] = _starpu_cuda_update_map, #endif .wait_request_completion = _starpu_cuda_wait_request_completion, .test_request_completion = _starpu_cuda_test_request_completion, #endif }; starpu-1.4.9+dfsg/src/drivers/cuda/driver_cuda.h000066400000000000000000000063511507764646700216360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2015-2015 Mathieu Lirzin * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_CUDA_H__ #define __DRIVER_CUDA_H__ /** @file */ #include void _starpu_cuda_preinit(void); #ifdef STARPU_USE_CUDA #include #include #ifdef STARPU_HAVE_NVML_H #include #endif #endif #include #include #include #pragma GCC visibility push(hidden) extern struct _starpu_driver_ops _starpu_driver_cuda_ops; extern struct _starpu_node_ops _starpu_driver_cuda_node_ops; extern int _starpu_nworker_per_cuda; void _starpu_cuda_init(void); unsigned _starpu_get_cuda_device_count(void); #ifdef STARPU_HAVE_HWLOC struct _starpu_machine_topology; hwloc_obj_t _starpu_cuda_get_hwloc_obj(hwloc_topology_t topology, int devid); #endif extern int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES]; #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) void _starpu_cuda_discover_devices (struct _starpu_machine_config *); void _starpu_init_cuda_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *); void _starpu_cuda_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg); void _starpu_cuda_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg); void _starpu_init_cuda(void); void _starpu_init_cublas_v2_func(void); void _starpu_shutdown_cublas_v2_func(void); void _starpu_cublas_v2_init(void); void _starpu_cublas_v2_shutdown(void); void *_starpu_cuda_worker(void *); #ifdef STARPU_HAVE_NVML_H nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *props); extern __typeof__(nvmlInit) *_starpu_nvmlInit; extern __typeof__(nvmlDeviceGetNvLinkState) *_starpu_nvmlDeviceGetNvLinkState; extern __typeof__(nvmlDeviceGetNvLinkRemotePciInfo) *_starpu_nvmlDeviceGetNvLinkRemotePciInfo; extern __typeof__(nvmlDeviceGetHandleByIndex) *_starpu_nvmlDeviceGetHandleByIndex; extern __typeof__(nvmlDeviceGetHandleByPciBusId) *_starpu_nvmlDeviceGetHandleByPciBusId; extern __typeof__(nvmlDeviceGetIndex) *_starpu_nvmlDeviceGetIndex; extern __typeof__(nvmlDeviceGetPciInfo) *_starpu_nvmlDeviceGetPciInfo; extern __typeof__(nvmlDeviceGetUUID) *_starpu_nvmlDeviceGetUUID; #if HAVE_DECL_NVMLDEVICEGETTOTALENERGYCONSUMPTION extern __typeof__(nvmlDeviceGetTotalEnergyConsumption) *_starpu_nvmlDeviceGetTotalEnergyConsumption; #endif #endif #else # define _starpu_cuda_discover_devices(config) ((void) config) #endif #pragma GCC visibility pop #endif // __DRIVER_CUDA_H__ starpu-1.4.9+dfsg/src/drivers/cuda/driver_cuda0.c000066400000000000000000000625031507764646700217120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This is a version of the CUDA driver with very minimal features: * - synchronous kernel execution * - synchronous data transfers * * This is not meant to be actually used :) * * It is only meant as a basic driver sample, easy to get inspired from for * writing other drivers. */ #include #include #include #include #include #include #include #include #include #include "driver_cuda.h" #include #include #include #include #include #include #include #ifdef STARPU_USE_CUDA #include #endif /* Consider a rough 10% overhead cost */ #define FREE_MARGIN 0.9 static size_t global_mem[STARPU_MAXCUDADEVS]; int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES]; /* Note: streams are not thread-safe, so we define them for each CUDA worker * emitting a GPU-GPU transfer */ static struct cudaDeviceProp props[STARPU_MAXCUDADEVS]; static unsigned cuda_bindid_init[STARPU_MAXCUDADEVS]; static unsigned cuda_bindid[STARPU_MAXCUDADEVS]; static unsigned cuda_memory_init[STARPU_MAXCUDADEVS]; static unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS]; int _starpu_nworker_per_cuda = 1; static size_t _starpu_cuda_get_global_mem_size(unsigned devid) { return global_mem[devid]; } cudaStream_t starpu_cuda_get_local_stream(void) { return NULL; } const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid) { struct _starpu_machine_config *config = _starpu_get_machine_config(); unsigned devid = config->workers[workerid].devid; return &props[devid]; } /* Early library initialization, before anything else, just initialize data */ void _starpu_cuda_init(void) { memset(&cuda_bindid_init, 0, sizeof(cuda_bindid_init)); memset(&cuda_memory_init, 0, sizeof(cuda_memory_init)); } /* Return the number of devices usable in the system. * The value returned cannot be greater than MAXCUDADEVS */ static unsigned _starpu_get_cuda_device_count(void) { int cnt; cudaError_t cures; cures = cudaGetDeviceCount(&cnt); if (STARPU_UNLIKELY(cures)) return 0; if (cnt > STARPU_MAXCUDADEVS) { _STARPU_MSG("# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS); cnt = STARPU_MAXCUDADEVS; } return (unsigned)cnt; } /* This is run from initialize to determine the number of CUDA devices */ void _starpu_init_cuda(void) { } /* This is called to return the real (non-clamped) number of devices */ void _starpu_cuda_discover_devices(struct _starpu_machine_config *config) { /* Discover the number of CUDA devices. Fill the result in CONFIG. */ int cnt; cudaError_t cures; cures = cudaGetDeviceCount(&cnt); if (STARPU_UNLIKELY(cures != cudaSuccess)) cnt = 0; config->topology.nhwdevices[STARPU_CUDA_WORKER] = cnt; } static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config) { struct _starpu_machine_topology *topology = &config->topology; struct starpu_conf *uconf = &config->conf; _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_cuda_gpuid == 0 ? NULL : (int *)uconf->workers_cuda_gpuid, &(config->current_devid[STARPU_CUDA_WORKER]), (int *)topology->workers_devid[STARPU_CUDA_WORKER], "STARPU_WORKERS_CUDAID", topology->nhwdevices[STARPU_CUDA_WORKER], STARPU_CUDA_WORKER); _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_CUDA_WORKER]); } /* Determine which devices we will use */ void _starpu_init_cuda_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) { int ncuda = config->conf.ncuda; if (ncuda != 0) { /* The user did not disable CUDA. We need to * initialize CUDA early to count the number of * devices */ _starpu_init_cuda(); int nb_devices = _starpu_get_cuda_device_count(); _starpu_topology_check_ndevices(&ncuda, nb_devices, 0, STARPU_MAXCUDADEVS, 0, "ncuda", "CUDA", "maxcudadev"); } /* Now we know how many CUDA devices will be used */ topology->ndevices[STARPU_CUDA_WORKER] = ncuda; _starpu_initialize_workers_cuda_gpuid(config); unsigned cudagpu; for (cudagpu = 0; (int) cudagpu < ncuda; cudagpu++) { int devid = _starpu_get_next_devid(topology, config, STARPU_CUDA_WORKER); if (devid == -1) { // There is no more devices left topology->ndevices[STARPU_CUDA_WORKER] = cudagpu; break; } _starpu_topology_configure_workers(topology, config, STARPU_CUDA_WORKER, cudagpu, devid, 0, 0, 1, 1, NULL, NULL); } /* Don't copy this, just here for other code to work fine */ topology->cuda_th_per_stream = 0; topology->cuda_th_per_dev = 1; } /* Bind the driver on a CPU core */ void _starpu_cuda_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { /* Perhaps the worker has some "favourite" bindings */ unsigned *preferred_binding = NULL; unsigned npreferred = 0; unsigned devid = workerarg->devid; if (cuda_bindid_init[devid]) { workerarg->bindid = cuda_bindid[devid]; } else { cuda_bindid_init[devid] = 1; workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); } } /* Set up memory and buses */ void _starpu_cuda_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned memory_node = -1; unsigned devid = workerarg->devid; unsigned numa; if (cuda_memory_init[devid]) { memory_node = cuda_memory_nodes[devid]; } else { cuda_memory_init[devid] = 1; memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid); for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) { _starpu_cuda_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node); _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa); } } _starpu_memory_node_add_nworkers(memory_node); //This worker can also manage transfers on NUMA nodes for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) _starpu_worker_drives_memory_node(workerarg, numa); _starpu_worker_drives_memory_node(workerarg, memory_node); workerarg->memory_node = memory_node; } /* Set the current CUDA device */ void starpu_cuda_set_device(unsigned devid STARPU_ATTRIBUTE_UNUSED) { cudaError_t cures; cures = cudaSetDevice(devid); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } static void _starpu_cuda_limit_gpu_mem_if_needed(unsigned devid) { starpu_ssize_t limit; size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; /* Find the size of the memory on the device */ totalGlobalMem = props[devid].totalGlobalMem; limit = totalGlobalMem / (1024*1024) * FREE_MARGIN; global_mem[devid] = limit * 1024*1024; } /* Really initialize one device */ static void init_device_context(unsigned devid, unsigned memnode) { STARPU_ASSERT(devid < STARPU_MAXCUDADEVS); cudaError_t cures; starpu_cuda_set_device(devid); /* force CUDA to initialize the context for real */ cures = cudaFree(0); if (STARPU_UNLIKELY(cures)) { if (cures == cudaErrorDevicesUnavailable) { _STARPU_MSG("All CUDA-capable devices are busy or unavailable\n"); exit(77); } STARPU_CUDA_REPORT_ERROR(cures); } cures = cudaGetDeviceProperties(&props[devid], devid); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); _starpu_cuda_limit_gpu_mem_if_needed(devid); _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cuda_get_global_mem_size(devid)); } /* De-initialize one device */ static void deinit_device_context(unsigned devid STARPU_ATTRIBUTE_UNUSED) { } /* This is run from the driver thread to initialize the driver CUDA context */ static int _starpu_cuda_driver_init(struct _starpu_worker *worker) { _starpu_driver_start(worker, STARPU_CUDA_WORKER, 0); _starpu_set_local_worker_key(worker); unsigned devid = worker->devid; unsigned memnode = worker->memory_node; init_device_context(devid, memnode); unsigned workerid = worker->workerid; float size = (float) global_mem[devid] / (1<<30); /* get the device's name */ char devname[64]; strncpy(devname, props[devid].name, 63); devname[63] = 0; snprintf(worker->name, sizeof(worker->name), "CUDA0 %u (%s %.1f GiB)", devid, devname, size); snprintf(worker->short_name, sizeof(worker->short_name), "CUDA %u", devid); _STARPU_DEBUG("cuda (%s) dev id %u thread is ready to run on CPU %d !\n", devname, devid, worker->bindid); _STARPU_TRACE_WORKER_INIT_END(workerid); { char thread_name[16]; snprintf(thread_name, sizeof(thread_name), "CUDA0 %u", worker->devid); starpu_pthread_setname(thread_name); } /* tell the main thread that this one is ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); worker->status = STATUS_UNKNOWN; worker->worker_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); return 0; } static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) { _STARPU_TRACE_WORKER_DEINIT_START; unsigned devid = worker->devid; unsigned memnode = worker->memory_node; /* I'm last, deinitialize device */ _starpu_datawizard_handle_all_pending_node_data_requests(memnode); /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); _starpu_malloc_shutdown(memnode); deinit_device_context(devid); worker->worker_is_initialized = 0; _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER); return 0; } static uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags) { uintptr_t addr = 0; (void) flags; unsigned devid = starpu_memory_node_get_devid(dst_node); starpu_cuda_set_device(devid); /* Check if there is free memory */ size_t cuda_mem_free, cuda_mem_total; cudaError_t status; status = cudaMemGetInfo(&cuda_mem_free, &cuda_mem_total); if (status == cudaSuccess && cuda_mem_free * FREE_MARGIN < size) { addr = 0; } else { status = cudaMalloc((void **)&addr, size); if (!addr || (status != cudaSuccess)) { if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation)) STARPU_CUDA_REPORT_ERROR(status); addr = 0; } } return addr; } static void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) { (void) dst_node; (void) addr; (void) size; (void) flags; cudaError_t err; unsigned devid = starpu_memory_node_get_devid(dst_node); starpu_cuda_set_device(devid); err = cudaFree((void*)addr); if (STARPU_UNLIKELY(err != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(err); } int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream STARPU_ATTRIBUTE_UNUSED, enum cudaMemcpyKind kind) { cudaError_t cures = 0; if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) { STARPU_ABORT(); } /* do it in a synchronous fashion */ cures = cudaMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind); (void) cudaGetLastError(); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } /* Driver porters: this is optional but really recommended */ int starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, cudaStream_t stream STARPU_ATTRIBUTE_UNUSED, enum cudaMemcpyKind kind) { cudaError_t cures = 0; if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) { STARPU_ABORT_MSG("CUDA memcpy 3D peer not available, but core triggered one ?!"); } cures = cudaMemcpy2D((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, blocksize, numblocks, kind); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } static int _starpu_cuda_copy_interface(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { (void) req; int ret = 1; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; STARPU_ASSERT(copy_methods->any_to_any); copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); return ret; } static int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, NULL, cudaMemcpyDeviceToHost); } static int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER STARPU_ASSERT(src_node == dst_node); #endif return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, NULL, cudaMemcpyDeviceToDevice); } static int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, NULL, cudaMemcpyHostToDevice); } /* Driver porters: these are optional but really recommended */ static int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, NULL, cudaMemcpyDeviceToHost); } static int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER STARPU_ASSERT(src_node == dst_node); #endif return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, NULL, cudaMemcpyDeviceToDevice); } static int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel STARPU_ATTRIBUTE_UNUSED) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, NULL, cudaMemcpyHostToDevice); } static int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_node) { /* Direct GPU-GPU transfers are not allowed in general */ (void) node; (void) handling_node; return 0; } static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) { STARPU_ASSERT(j); struct starpu_task *task = j->task; int profiling = starpu_profiling_status_get(); STARPU_ASSERT(task); struct starpu_codelet *cl = task->cl; STARPU_ASSERT(cl); _starpu_set_current_task(task); j->workerid = worker->workerid; /* Fetch data input synchronously */ int ret = _starpu_fetch_task_input(task, j, 0); if (ret != 0) { /* there was not enough memory so the codelet cannot be executed right now ... */ /* push the codelet back and try another one ... */ return -EAGAIN; } _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, j->nimpl); STARPU_ASSERT_MSG(func, "when STARPU_CUDA is defined in 'where', cuda_func or cuda_funcs has to be defined"); if (_starpu_get_disable_kernels() <= 0) { _STARPU_TRACE_START_EXECUTING(j); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); _STARPU_TRACE_END_EXECUTING(j); } return 0; } static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker); /* Execute a job, up to completion for synchronous jobs */ static int execute_job_on_cuda(struct starpu_task *task, struct _starpu_worker *worker) { int res; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); res = start_job_on_cuda(j, worker); if (res) { switch (res) { case -EAGAIN: _STARPU_DISP("ouch, CUDA could not actually run task %p, putting it back...\n", task); _starpu_push_task_to_workers(task); return -EAGAIN; default: STARPU_ABORT(); } } finish_job_on_cuda(j, worker); return 0; } static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) { int profiling = starpu_profiling_status_get(); worker->current_task = NULL; _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); _starpu_push_task_output(j); _starpu_set_current_task(NULL); _starpu_handle_job_termination(j); } /* One iteration of the main driver loop */ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) { struct starpu_task *task; struct _starpu_job *j; int res; unsigned memnode = worker->memory_node; /* Make some progress */ _starpu_datawizard_progress(1); if (memnode != STARPU_MAIN_RAM) { _starpu_datawizard_progress(1); } /* And pull a task */ task = _starpu_get_worker_task(worker, worker->workerid, worker->memory_node); if (!task) return 0; j = _starpu_get_job_associated_to_task(task); /* can CUDA do that task ? */ if (!_STARPU_MAY_PERFORM(j, CUDA)) { /* this is neither a cuda or a cublas task */ _starpu_worker_refuse_task(worker, task); return 0; } worker->current_task = task; res = execute_job_on_cuda(task, worker); if (res) { switch (res) { case -EAGAIN: _starpu_push_task_to_workers(task); return 0; default: STARPU_ABORT(); } } return 0; } void *_starpu_cuda_worker(void *_arg) { struct _starpu_worker *worker = _arg; _starpu_cuda_driver_init(worker); while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_cuda_driver_run_once(worker); } _starpu_cuda_driver_deinit(worker); return NULL; } void starpu_cublas_report_error(const char *func, const char *file, int line, int status) { char *errormsg; switch (status) { case CUBLAS_STATUS_SUCCESS: errormsg = "success"; break; case CUBLAS_STATUS_NOT_INITIALIZED: errormsg = "not initialized"; break; case CUBLAS_STATUS_ALLOC_FAILED: errormsg = "alloc failed"; break; case CUBLAS_STATUS_INVALID_VALUE: errormsg = "invalid value"; break; case CUBLAS_STATUS_ARCH_MISMATCH: errormsg = "arch mismatch"; break; case CUBLAS_STATUS_EXECUTION_FAILED: errormsg = "execution failed"; break; case CUBLAS_STATUS_INTERNAL_ERROR: errormsg = "internal error"; break; default: errormsg = "unknown error"; break; } _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); STARPU_ABORT(); } void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status) { const char *errormsg = cudaGetErrorString(status); _STARPU_ERROR("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); } #ifdef STARPU_HAVE_LIBCUSOLVER void starpu_cusolver_report_error(const char *func, const char *file, int line, cusolverStatus_t status) { #define REPORT(error) case error: errormsg = #error; break; char *errormsg; switch (status) { REPORT(CUSOLVER_STATUS_SUCCESS); REPORT(CUSOLVER_STATUS_NOT_INITIALIZED); REPORT(CUSOLVER_STATUS_ALLOC_FAILED); REPORT(CUSOLVER_STATUS_INVALID_VALUE); REPORT(CUSOLVER_STATUS_ARCH_MISMATCH); REPORT(CUSOLVER_STATUS_MAPPING_ERROR); REPORT(CUSOLVER_STATUS_EXECUTION_FAILED); REPORT(CUSOLVER_STATUS_INTERNAL_ERROR); REPORT(CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED); REPORT(CUSOLVER_STATUS_NOT_SUPPORTED); REPORT(CUSOLVER_STATUS_ZERO_PIVOT); REPORT(CUSOLVER_STATUS_INVALID_LICENSE); #if defined(CUSOLVER_VER_MAJOR) && (CUSOLVER_VER_MAJOR >= 11) REPORT(CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER); REPORT(CUSOLVER_STATUS_IRS_INTERNAL_ERROR); REPORT(CUSOLVER_STATUS_IRS_NOT_SUPPORTED); REPORT(CUSOLVER_STATUS_IRS_OUT_OF_RANGE); REPORT(CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES); REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED); REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED); REPORT(CUSOLVER_STATUS_IRS_MATRIX_SINGULAR); REPORT(CUSOLVER_STATUS_INVALID_WORKSPACE); #endif default: errormsg = "unknown error"; break; } _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); STARPU_ABORT(); } #endif static int _starpu_cuda_run_from_worker(struct _starpu_worker *worker) { /* Let's go ! */ _starpu_cuda_worker(worker); return 0; } struct _starpu_driver_ops _starpu_driver_cuda_ops = { .init = _starpu_cuda_driver_init, .run = _starpu_cuda_run_from_worker, .run_once = _starpu_cuda_driver_run_once, .deinit = _starpu_cuda_driver_deinit, }; struct _starpu_node_ops _starpu_driver_cuda_node_ops = { .name = "cuda0 driver", .malloc_on_node = _starpu_cuda_malloc_on_node, .free_on_node = _starpu_cuda_free_on_node, .is_direct_access_supported = _starpu_cuda_is_direct_access_supported, .copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface, .copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface, .copy_interface_from[STARPU_CPU_RAM] = _starpu_cuda_copy_interface, .copy_interface_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface, .copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu, .copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, .copy_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cpu_to_cuda, .copy_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, .copy2d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cpu, .copy2d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, .copy2d_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cpu_to_cuda, .copy2d_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, }; starpu-1.4.9+dfsg/src/drivers/cuda/driver_cuda1.c000066400000000000000000001275561507764646700217250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This is a version of the CUDA driver with reduced features: * - asynchronous kernel execution * - asynchronous data transfers * - peer2peer transfers * * This is not meant to be actually used :) * * It is only meant as a basic driver sample, easy to get inspired from for * writing other drivers. */ #include #include #include #include #include #include #include #include #include #include "driver_cuda.h" #include #include #include #include #include #include #include #if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX #include #endif #ifdef STARPU_USE_CUDA #include #endif #if CUDART_VERSION >= 5000 /* Avoid letting our streams spuriously synchronize with the NULL stream */ #define starpu_cudaStreamCreate(stream) cudaStreamCreateWithFlags(stream, cudaStreamNonBlocking) #else #define starpu_cudaStreamCreate(stream) cudaStreamCreate(stream) #endif /* Consider a rough 10% overhead cost */ #define FREE_MARGIN 0.9 static size_t global_mem[STARPU_MAXCUDADEVS]; int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES]; static cudaStream_t streams[STARPU_NMAXWORKERS]; static char used_stream[STARPU_NMAXWORKERS]; static cudaStream_t out_transfer_streams[STARPU_MAXCUDADEVS]; static cudaStream_t in_transfer_streams[STARPU_MAXCUDADEVS]; /* Note: streams are not thread-safe, so we define them for each CUDA worker * emitting a GPU-GPU transfer */ static cudaStream_t in_peer_transfer_streams[STARPU_MAXCUDADEVS][STARPU_MAXCUDADEVS]; static struct cudaDeviceProp props[STARPU_MAXCUDADEVS]; static cudaEvent_t task_events[STARPU_NMAXWORKERS]; static unsigned cuda_bindid_init[STARPU_MAXCUDADEVS]; static unsigned cuda_bindid[STARPU_MAXCUDADEVS]; static unsigned cuda_memory_init[STARPU_MAXCUDADEVS]; static unsigned cuda_memory_nodes[STARPU_MAXCUDADEVS]; int _starpu_nworker_per_cuda = 1; static size_t _starpu_cuda_get_global_mem_size(unsigned devid) { return global_mem[devid]; } cudaStream_t starpu_cuda_get_local_in_transfer_stream() { int worker = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(worker); cudaStream_t stream; stream = in_transfer_streams[devid]; STARPU_ASSERT(stream); return stream; } cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned dst_node) { int dst_devid = starpu_memory_node_get_devid(dst_node); cudaStream_t stream; stream = in_transfer_streams[dst_devid]; STARPU_ASSERT(stream); return stream; } cudaStream_t starpu_cuda_get_local_out_transfer_stream() { int worker = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(worker); cudaStream_t stream; stream = out_transfer_streams[devid]; STARPU_ASSERT(stream); return stream; } cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned src_node) { int src_devid = starpu_memory_node_get_devid(src_node); cudaStream_t stream; stream = out_transfer_streams[src_devid]; STARPU_ASSERT(stream); return stream; } cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned dst_node) { int src_devid = starpu_memory_node_get_devid(src_node); int dst_devid = starpu_memory_node_get_devid(dst_node); cudaStream_t stream; stream = in_peer_transfer_streams[src_devid][dst_devid]; STARPU_ASSERT(stream); return stream; } cudaStream_t starpu_cuda_get_local_stream(void) { int worker = starpu_worker_get_id_check(); used_stream[worker] = 1; return streams[worker]; } const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid) { struct _starpu_machine_config *config = _starpu_get_machine_config(); unsigned devid = config->workers[workerid].devid; return &props[devid]; } /* Early library initialization, before anything else, just initialize data */ void _starpu_cuda_init(void) { memset(&cuda_bindid_init, 0, sizeof(cuda_bindid_init)); memset(&cuda_memory_init, 0, sizeof(cuda_memory_init)); } /* Return the number of devices usable in the system. * The value returned cannot be greater than MAXCUDADEVS */ static unsigned _starpu_get_cuda_device_count(void) { int cnt; cudaError_t cures; cures = cudaGetDeviceCount(&cnt); if (STARPU_UNLIKELY(cures)) return 0; if (cnt > STARPU_MAXCUDADEVS) { _STARPU_MSG("# Warning: %d CUDA devices available. Only %d enabled. Use configure option --enable-maxcudadev=xxx to update the maximum value of supported CUDA devices.\n", cnt, STARPU_MAXCUDADEVS); cnt = STARPU_MAXCUDADEVS; } return (unsigned)cnt; } /* This is run from initialize to determine the number of CUDA devices */ void _starpu_init_cuda(void) { } /* This is called to really discover the hardware */ void _starpu_cuda_discover_devices(struct _starpu_machine_config *config) { /* Discover the number of CUDA devices. Fill the result in CONFIG. */ int cnt; cudaError_t cures; cures = cudaGetDeviceCount(&cnt); if (STARPU_UNLIKELY(cures != cudaSuccess)) cnt = 0; config->topology.nhwdevices[STARPU_CUDA_WORKER] = cnt; } static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config) { struct _starpu_machine_topology *topology = &config->topology; struct starpu_conf *uconf = &config->conf; _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_cuda_gpuid == 0 ? NULL : (int *)uconf->workers_cuda_gpuid, &(config->current_devid[STARPU_CUDA_WORKER]), (int *)topology->workers_devid[STARPU_CUDA_WORKER], "STARPU_WORKERS_CUDAID", topology->nhwdevices[STARPU_CUDA_WORKER], STARPU_CUDA_WORKER); _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_CUDA_WORKER]); } /* Determine which devices we will use */ void _starpu_init_cuda_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) { int ncuda = config->conf.ncuda; if (ncuda != 0) { /* The user did not disable CUDA. We need to * initialize CUDA early to count the number of * devices */ _starpu_init_cuda(); int nb_devices = _starpu_get_cuda_device_count(); _starpu_topology_check_ndevices(&ncuda, nb_devices, 0, STARPU_MAXCUDADEVS, 0, "ncuda", "CUDA", "maxcudadev"); } /* Now we know how many CUDA devices will be used */ topology->ndevices[STARPU_CUDA_WORKER] = ncuda; _starpu_initialize_workers_cuda_gpuid(config); unsigned cudagpu; for (cudagpu = 0; (int) cudagpu < ncuda; cudagpu++) { int devid = _starpu_get_next_devid(topology, config, STARPU_CUDA_WORKER); if (devid == -1) { // There is no more devices left topology->ndevices[STARPU_CUDA_WORKER] = cudagpu; break; } _starpu_topology_configure_workers(topology, config, STARPU_CUDA_WORKER, cudagpu, devid, 0, 0, 1, 1, NULL, NULL); } /* Don't copy this, just here for other code to work fine */ topology->cuda_th_per_stream = 0; topology->cuda_th_per_dev = 1; } /* Bind the driver on a CPU core */ void _starpu_cuda_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { /* Perhaps the worker has some "favourite" bindings */ unsigned *preferred_binding = NULL; unsigned npreferred = 0; unsigned devid = workerarg->devid; if (cuda_bindid_init[devid]) { workerarg->bindid = cuda_bindid[devid]; } else { cuda_bindid_init[devid] = 1; workerarg->bindid = cuda_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); } } /* Set up memory and buses */ void _starpu_cuda_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned memory_node = -1; unsigned devid = workerarg->devid; unsigned numa; if (cuda_memory_init[devid]) { memory_node = cuda_memory_nodes[devid]; } else { cuda_memory_init[devid] = 1; memory_node = cuda_memory_nodes[devid] = _starpu_memory_node_register(STARPU_CUDA_RAM, devid); for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) { _starpu_cuda_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node); _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa); } int worker2; for (worker2 = 0; worker2 < workerarg->workerid; worker2++) { struct _starpu_worker *workerarg2 = &config->workers[worker2]; int devid2 = workerarg2->devid; if (workerarg2->arch == STARPU_CUDA_WORKER) { unsigned memory_node2 = starpu_worker_get_memory_node(worker2); _starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node2, memory_node); _starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node, memory_node2); #if HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX { hwloc_obj_t obj, obj2, ancestor; obj = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid); obj2 = hwloc_cuda_get_device_osdev_by_index(config->topology.hwtopology, devid2); ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2); if (ancestor) { struct _starpu_hwloc_userdata *data = ancestor->userdata; #ifdef STARPU_VERBOSE { char name[64]; hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0); _STARPU_DEBUG("CUDA%u and CUDA%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus); } #endif starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES], data->ngpus); starpu_bus_set_ngpus(_starpu_cuda_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES], data->ngpus); } } #endif } } } _starpu_memory_node_add_nworkers(memory_node); //This worker can also manage transfers on NUMA nodes for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) _starpu_worker_drives_memory_node(workerarg, numa); _starpu_worker_drives_memory_node(workerarg, memory_node); workerarg->memory_node = memory_node; } /* Set the current CUDA device */ void starpu_cuda_set_device(unsigned devid STARPU_ATTRIBUTE_UNUSED) { cudaError_t cures; cures = cudaSetDevice(devid); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } static void _starpu_cuda_limit_gpu_mem_if_needed(unsigned devid) { starpu_ssize_t limit; size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; /* Find the size of the memory on the device */ totalGlobalMem = props[devid].totalGlobalMem; limit = totalGlobalMem / (1024*1024) * FREE_MARGIN; global_mem[devid] = limit * 1024*1024; } /* Really initialize one device */ static void init_device_context(unsigned devid, unsigned memnode) { STARPU_ASSERT(devid < STARPU_MAXCUDADEVS); cudaError_t cures; starpu_cuda_set_device(devid); /* force CUDA to initialize the context for real */ cures = cudaFree(0); if (STARPU_UNLIKELY(cures)) { if (cures == cudaErrorDevicesUnavailable) { _STARPU_MSG("All CUDA-capable devices are busy or unavailable\n"); exit(77); } STARPU_CUDA_REPORT_ERROR(cures); } cures = cudaGetDeviceProperties(&props[devid], devid); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (props[devid].computeMode == cudaComputeModeExclusive) { _STARPU_MSG("CUDA is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n"); STARPU_ABORT(); } #endif cures = starpu_cudaStreamCreate(&in_transfer_streams[devid]); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); cures = starpu_cudaStreamCreate(&out_transfer_streams[devid]); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); int nworkers = starpu_worker_get_count(); int workerid; for (workerid = 0; workerid < nworkers; workerid++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0) { cures = starpu_cudaStreamCreate(&in_peer_transfer_streams[worker->devid][devid]); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } } _starpu_cuda_limit_gpu_mem_if_needed(devid); _starpu_memory_manager_set_global_memory_size(memnode, _starpu_cuda_get_global_mem_size(devid)); } /* De-initialize one device */ static void deinit_device_context(unsigned devid STARPU_ATTRIBUTE_UNUSED) { starpu_cuda_set_device(devid); cudaStreamDestroy(in_transfer_streams[devid]); cudaStreamDestroy(out_transfer_streams[devid]); int nworkers = starpu_worker_get_count(); int workerid; for (workerid = 0; workerid < nworkers; workerid++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->arch == STARPU_CUDA_WORKER && worker->subworkerid == 0) { cudaStreamDestroy(in_peer_transfer_streams[worker->devid][devid]); } } } static void init_worker_context(unsigned workerid, unsigned devid) { cudaError_t cures; starpu_cuda_set_device(devid); cures = cudaEventCreateWithFlags(&task_events[workerid], cudaEventDisableTiming); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); cures = starpu_cudaStreamCreate(&streams[workerid]); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } static void deinit_worker_context(unsigned workerid, unsigned devid) { starpu_cuda_set_device(devid); cudaEventDestroy(task_events[workerid]); cudaStreamDestroy(streams[workerid]); } /* This is run from the driver thread to initialize the driver CUDA context */ static int _starpu_cuda_driver_init(struct _starpu_worker *worker) { _starpu_driver_start(worker, STARPU_CUDA_WORKER, 0); _starpu_set_local_worker_key(worker); unsigned devid = worker->devid; unsigned memnode = worker->memory_node; init_device_context(devid, memnode); unsigned workerid = worker->workerid; float size = (float) global_mem[devid] / (1<<30); /* get the device's name */ char devname[64]; strncpy(devname, props[devid].name, 63); devname[63] = 0; snprintf(worker->name, sizeof(worker->name), "CUDA1 %u (%s %.1f GiB)", devid, devname, size); snprintf(worker->short_name, sizeof(worker->short_name), "CUDA %u", devid); _STARPU_DEBUG("cuda (%s) dev id %u thread is ready to run on CPU %d !\n", devname, devid, worker->bindid); init_worker_context(workerid, worker->devid); _STARPU_TRACE_WORKER_INIT_END(workerid); { char thread_name[16]; snprintf(thread_name, sizeof(thread_name), "CUDA1 %u", worker->devid); starpu_pthread_setname(thread_name); } /* tell the main thread that this one is ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); worker->status = STATUS_UNKNOWN; worker->worker_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); return 0; } static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) { _STARPU_TRACE_WORKER_DEINIT_START; unsigned devid = worker->devid; unsigned memnode = worker->memory_node; /* I'm last, deinitialize device */ _starpu_datawizard_handle_all_pending_node_data_requests(memnode); /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); _starpu_malloc_shutdown(memnode); deinit_device_context(devid); unsigned workerid = worker->workerid; deinit_worker_context(workerid, worker->devid); worker->worker_is_initialized = 0; _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER); return 0; } static uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags) { uintptr_t addr = 0; (void) flags; unsigned devid = starpu_memory_node_get_devid(dst_node); starpu_cuda_set_device(devid); /* Check if there is free memory */ size_t cuda_mem_free, cuda_mem_total; cudaError_t status; status = cudaMemGetInfo(&cuda_mem_free, &cuda_mem_total); if (status == cudaSuccess && cuda_mem_free * FREE_MARGIN < size) { addr = 0; } else { status = cudaMalloc((void **)&addr, size); if (!addr || (status != cudaSuccess)) { if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation)) STARPU_CUDA_REPORT_ERROR(status); addr = 0; } } return addr; } static void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) { (void) dst_node; (void) addr; (void) size; (void) flags; cudaError_t err; unsigned devid = starpu_memory_node_get_devid(dst_node); starpu_cuda_set_device(devid); err = cudaFree((void*)addr); if (STARPU_UNLIKELY(err != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(err); } int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER int peer_copy = 0; int src_dev = -1, dst_dev = -1; #endif cudaError_t cures = 0; if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER peer_copy = 1; src_dev = starpu_memory_node_get_devid(src_node); dst_dev = starpu_memory_node_get_devid(dst_node); #else STARPU_ABORT(); #endif } if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (peer_copy) { cures = cudaMemcpyPeerAsync((char *) dst_ptr, dst_dev, (char *) src_ptr, src_dev, ssize, stream); } else #endif { cures = cudaMemcpyAsync((char *)dst_ptr, (char *)src_ptr, ssize, kind, stream); } (void) cudaGetLastError(); starpu_interface_end_driver_copy_async(src_node, dst_node, start); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || cures) { /* do it in a synchronous fashion */ #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (peer_copy) { cures = cudaMemcpyPeer((char *) dst_ptr, dst_dev, (char *) src_ptr, src_dev, ssize); } else #endif { cures = cudaMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind); } (void) cudaGetLastError(); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } return -EAGAIN; } /* Driver porters: this is optional but really recommended */ int starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, cudaStream_t stream, enum cudaMemcpyKind kind) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER int peer_copy = 0; int src_dev = -1, dst_dev = -1; #endif cudaError_t cures = 0; if (kind == cudaMemcpyDeviceToDevice && src_node != dst_node) { #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER # ifdef BUGGED_MEMCPY3D STARPU_ABORT_MSG("CUDA memcpy 3D peer buggy, but core triggered one?!"); # endif peer_copy = 1; src_dev = starpu_memory_node_get_devid(src_node); dst_dev = starpu_memory_node_get_devid(dst_node); #else STARPU_ABORT_MSG("CUDA memcpy 3D peer not available, but core triggered one ?!"); #endif } #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER if (peer_copy) { struct cudaMemcpy3DPeerParms p; memset(&p, 0, sizeof(p)); p.srcDevice = src_dev; p.dstDevice = dst_dev; p.srcPtr = make_cudaPitchedPtr((char *)src_ptr, ld_src, blocksize, numblocks); p.dstPtr = make_cudaPitchedPtr((char *)dst_ptr, ld_dst, blocksize, numblocks); p.extent = make_cudaExtent(blocksize, numblocks, 1); if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); cures = cudaMemcpy3DPeerAsync(&p, stream); (void) cudaGetLastError(); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || cures) { cures = cudaMemcpy3DPeer(&p); (void) cudaGetLastError(); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } } else #endif { if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); cures = cudaMemcpy2DAsync((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, blocksize, numblocks, kind, stream); starpu_interface_end_driver_copy_async(src_node, dst_node, start); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || cures) { cures = cudaMemcpy2D((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, blocksize, numblocks, kind); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return 0; } } return -EAGAIN; } static inline cudaEvent_t *_starpu_cuda_event(union _starpu_async_channel_event *_event) { cudaEvent_t *event; STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); event = (void *) _event; return event; } static unsigned _starpu_cuda_test_request_completion(struct _starpu_async_channel *async_channel) { cudaEvent_t event; cudaError_t cures; unsigned success; event = *_starpu_cuda_event(&async_channel->event); cures = cudaEventQuery(event); success = (cures == cudaSuccess); if (success) cudaEventDestroy(event); else if (cures != cudaErrorNotReady) STARPU_CUDA_REPORT_ERROR(cures); return success; } /* Only used at starpu_shutdown */ static void _starpu_cuda_wait_request_completion(struct _starpu_async_channel *async_channel) { cudaEvent_t event; cudaError_t cures; event = *_starpu_cuda_event(&async_channel->event); cures = cudaEventSynchronize(event); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); cures = cudaEventDestroy(event); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER static void starpu_cuda_set_copy_device(unsigned src_node, unsigned dst_node) { enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); unsigned devid; if ((src_kind == STARPU_CUDA_RAM) && (dst_kind == STARPU_CUDA_RAM)) { /* GPU-GPU transfer, issue it from the destination */ devid = starpu_memory_node_get_devid(dst_node); } else { unsigned node = (dst_kind == STARPU_CUDA_RAM)?dst_node:src_node; devid = starpu_memory_node_get_devid(node); } starpu_cuda_set_device(devid); } #endif static int _starpu_cuda_copy_interface_from_cuda_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER starpu_cuda_set_copy_device(src_node, dst_node); #else STARPU_ASSERT(src_node == dst_node); #endif int ret = 1; cudaError_t cures; cudaStream_t stream; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* CUDA - CUDA transfer */ if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !copy_methods->any_to_any) { STARPU_ASSERT(copy_methods->any_to_any); /* this is not associated to a request so it's synchronous */ copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); stream = starpu_cuda_get_peer_transfer_stream(src_node, dst_node); STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); } return ret; } static int _starpu_cuda_copy_interface_from_cuda_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER starpu_cuda_set_copy_device(src_node, dst_node); #endif int ret = 1; cudaError_t cures; cudaStream_t stream; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* only the proper CUBLAS thread can initiate this directly ! */ #if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node); #endif if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !copy_methods->any_to_any) { /* this is not associated to a request so it's synchronous */ STARPU_ASSERT(copy_methods->any_to_any); copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); stream = starpu_cuda_get_out_transfer_stream(src_node); STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); } return ret; } static int _starpu_cuda_copy_interface_from_cpu_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER starpu_cuda_set_copy_device(src_node, dst_node); #endif int ret = 1; cudaError_t cures; cudaStream_t stream; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* STARPU_CPU_RAM -> CUBLAS_RAM */ /* only the proper CUBLAS thread can initiate this ! */ #if !defined(STARPU_HAVE_CUDA_MEMCPY_PEER) STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node); #endif if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_cuda_copy_disabled() || !copy_methods->any_to_any) { /* this is not associated to a request so it's synchronous */ STARPU_ASSERT(copy_methods->any_to_any); copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_cuda_node_ops; cures = cudaEventCreateWithFlags(_starpu_cuda_event(&req->async_channel.event), cudaEventDisableTiming); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); stream = starpu_cuda_get_in_transfer_stream(dst_node); STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); cures = cudaEventRecord(*_starpu_cuda_event(&req->async_channel.event), stream); if (STARPU_UNLIKELY(cures != cudaSuccess)) STARPU_CUDA_REPORT_ERROR(cures); } return ret; } static int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL, cudaMemcpyDeviceToHost); } static int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER STARPU_ASSERT(src_node == dst_node); #endif return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL, cudaMemcpyDeviceToDevice); } static int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); return starpu_cuda_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL, cudaMemcpyHostToDevice); } static int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CPU_RAM); return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL, cudaMemcpyDeviceToHost); } static int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CUDA_RAM && dst_kind == STARPU_CUDA_RAM); #ifndef STARPU_HAVE_CUDA_MEMCPY_PEER STARPU_ASSERT(src_node == dst_node); #endif return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel?starpu_cuda_get_peer_transfer_stream(src_node, dst_node):NULL, cudaMemcpyDeviceToDevice); } static int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_CUDA_RAM); return starpu_cuda_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL, cudaMemcpyHostToDevice); } static int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_node) { #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) (void) node; enum starpu_node_kind kind = starpu_node_get_kind(handling_node); return kind == STARPU_CUDA_RAM; #else /* STARPU_HAVE_CUDA_MEMCPY_PEER */ /* Direct GPU-GPU transfers are not allowed in general */ (void) node; (void) handling_node; return 0; #endif /* STARPU_HAVE_CUDA_MEMCPY_PEER */ } static void start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) { STARPU_ASSERT(j); struct starpu_task *task = j->task; int profiling = starpu_profiling_status_get(); STARPU_ASSERT(task); struct starpu_codelet *cl = task->cl; STARPU_ASSERT(cl); _starpu_set_current_task(task); j->workerid = worker->workerid; _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); #if defined(STARPU_HAVE_CUDA_MEMCPY_PEER) /* We make sure we do manipulate the proper device */ starpu_cuda_set_device(worker->devid); #endif starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, j->nimpl); STARPU_ASSERT_MSG(func, "when STARPU_CUDA is defined in 'where', cuda_func or cuda_funcs has to be defined"); if (_starpu_get_disable_kernels() <= 0) { _STARPU_TRACE_START_EXECUTING(j); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); _STARPU_TRACE_END_EXECUTING(j); } } static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker); /* Execute a job, up to completion for synchronous jobs */ static void execute_job_on_cuda(struct starpu_task *task, struct _starpu_worker *worker) { int workerid = worker->workerid; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); start_job_on_cuda(j, worker); if (!used_stream[workerid]) { used_stream[workerid] = 1; _STARPU_DISP("Warning: starpu_cuda_get_local_stream() was not used to submit kernel to CUDA on worker %d. CUDA will thus introduce a lot of useless synchronizations, which will prevent proper overlapping of data transfers and kernel execution. See the CUDA-specific part of the 'Check List When Performance Are Not There' of the StarPU handbook\n", workerid); } if (task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC) { /* Record event to synchronize with task termination later */ cudaError_t cures = cudaEventRecord(task_events[workerid], starpu_cuda_get_local_stream()); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); } else /* Synchronous execution */ { #if !defined(STARPU_SIMGRID) STARPU_ASSERT_MSG(cudaStreamQuery(starpu_cuda_get_local_stream()) == cudaSuccess, "Unless when using the STARPU_CUDA_ASYNC flag, CUDA codelets have to wait for termination of their kernels on the starpu_cuda_get_local_stream() stream"); #endif finish_job_on_cuda(j, worker); } } static void finish_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worker) { int profiling = starpu_profiling_status_get(); worker->current_task = NULL; worker->ntasks--; _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); _starpu_push_task_output(j); _starpu_set_current_task(NULL); _starpu_handle_job_termination(j); } /* One iteration of the main driver loop */ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) { struct starpu_task *task; struct _starpu_job *j; int res; int idle_tasks, idle_transfers; /* First poll for completed jobs */ idle_tasks = 0; idle_transfers = 0; int workerid = worker->workerid; unsigned memnode = worker->memory_node; do /* This do {} while (0) is only to match the cuda driver worker for look */ { if (!worker->ntasks) idle_tasks++; if (!worker->task_transferring) idle_transfers++; if (!worker->ntasks && !worker->task_transferring) { /* Even nothing to test */ continue; } /* First test for transfers pending for next task */ task = worker->task_transferring; if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) { STARPU_RMB(); _STARPU_TRACE_END_PROGRESS(memnode); j = _starpu_get_job_associated_to_task(task); _starpu_fetch_task_input_tail(task, j, worker); /* Reset it */ worker->task_transferring = NULL; execute_job_on_cuda(task, worker); _STARPU_TRACE_START_PROGRESS(memnode); } /* Then test for termination of queued tasks */ if (!worker->ntasks) /* No queued task */ continue; task = worker->current_task; if (task == worker->task_transferring) /* Next task is still pending transfer */ continue; /* On-going asynchronous task, check for its termination first */ cudaError_t cures = cudaEventQuery(task_events[workerid]); if (cures != cudaSuccess) { STARPU_ASSERT_MSG(cures == cudaErrorNotReady, "CUDA error on task %p, codelet %p (%s): %s (%d)", task, task->cl, _starpu_codelet_get_model_name(task->cl), cudaGetErrorString(cures), cures); } else { _STARPU_TRACE_END_PROGRESS(memnode); /* Asynchronous task completed! */ finish_job_on_cuda(_starpu_get_job_associated_to_task(task), worker); _STARPU_TRACE_START_PROGRESS(memnode); } if (worker->ntasks < 1) idle_tasks++; } while(0); #if defined(STARPU_NON_BLOCKING_DRIVERS) if (!idle_tasks) { /* No task ready yet, no better thing to do than waiting */ __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, !idle_transfers); return 0; } #endif /* Something done, make some progress */ res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); if (worker->ntasks >= 1) return 0; /* And pull a task */ task = _starpu_get_worker_task(worker, worker->workerid, worker->memory_node); if (!task) return 0; worker->ntasks++; j = _starpu_get_job_associated_to_task(task); /* can CUDA do that task ? */ if (!_STARPU_MAY_PERFORM(j, CUDA)) { /* this is neither a cuda or a cublas task */ _starpu_worker_refuse_task(worker, task); return 0; } worker->current_task = task; /* Fetch data asynchronously */ _STARPU_TRACE_END_PROGRESS(memnode); _starpu_set_local_worker_key(worker); res = _starpu_fetch_task_input(task, j, 1); STARPU_ASSERT(res == 0); _STARPU_TRACE_START_PROGRESS(memnode); return 0; } void *_starpu_cuda_worker(void *_arg) { struct _starpu_worker *worker = _arg; _starpu_cuda_driver_init(worker); _STARPU_TRACE_START_PROGRESS(worker->memory_node); while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_cuda_driver_run_once(worker); } _STARPU_TRACE_END_PROGRESS(worker->memory_node); _starpu_cuda_driver_deinit(worker); return NULL; } #ifdef STARPU_HAVE_HWLOC hwloc_obj_t _starpu_cuda_get_hwloc_obj(hwloc_topology_t topology, int devid) { #if !defined(STARPU_SIMGRID) && HAVE_DECL_HWLOC_CUDA_GET_DEVICE_OSDEV_BY_INDEX return hwloc_cuda_get_device_osdev_by_index(topology, devid); #else return NULL; #endif } #endif void starpu_cublas_report_error(const char *func, const char *file, int line, int status) { char *errormsg; switch (status) { case CUBLAS_STATUS_SUCCESS: errormsg = "success"; break; case CUBLAS_STATUS_NOT_INITIALIZED: errormsg = "not initialized"; break; case CUBLAS_STATUS_ALLOC_FAILED: errormsg = "alloc failed"; break; case CUBLAS_STATUS_INVALID_VALUE: errormsg = "invalid value"; break; case CUBLAS_STATUS_ARCH_MISMATCH: errormsg = "arch mismatch"; break; case CUBLAS_STATUS_EXECUTION_FAILED: errormsg = "execution failed"; break; case CUBLAS_STATUS_INTERNAL_ERROR: errormsg = "internal error"; break; default: errormsg = "unknown error"; break; } _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); STARPU_ABORT(); } void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status) { const char *errormsg = cudaGetErrorString(status); _STARPU_ERROR("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); } #ifdef STARPU_HAVE_LIBCUSOLVER void starpu_cusolver_report_error(const char *func, const char *file, int line, cusolverStatus_t status) { #define REPORT(error) case error: errormsg = #error; break; char *errormsg; switch (status) { REPORT(CUSOLVER_STATUS_SUCCESS); REPORT(CUSOLVER_STATUS_NOT_INITIALIZED); REPORT(CUSOLVER_STATUS_ALLOC_FAILED); REPORT(CUSOLVER_STATUS_INVALID_VALUE); REPORT(CUSOLVER_STATUS_ARCH_MISMATCH); REPORT(CUSOLVER_STATUS_MAPPING_ERROR); REPORT(CUSOLVER_STATUS_EXECUTION_FAILED); REPORT(CUSOLVER_STATUS_INTERNAL_ERROR); REPORT(CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED); REPORT(CUSOLVER_STATUS_NOT_SUPPORTED); REPORT(CUSOLVER_STATUS_ZERO_PIVOT); REPORT(CUSOLVER_STATUS_INVALID_LICENSE); #if defined(CUSOLVER_VER_MAJOR) && (CUSOLVER_VER_MAJOR >= 11) REPORT(CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE); REPORT(CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER); REPORT(CUSOLVER_STATUS_IRS_INTERNAL_ERROR); REPORT(CUSOLVER_STATUS_IRS_NOT_SUPPORTED); REPORT(CUSOLVER_STATUS_IRS_OUT_OF_RANGE); REPORT(CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES); REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED); REPORT(CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED); REPORT(CUSOLVER_STATUS_IRS_MATRIX_SINGULAR); REPORT(CUSOLVER_STATUS_INVALID_WORKSPACE); #endif default: errormsg = "unknown error"; break; } _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); STARPU_ABORT(); } #endif static int _starpu_cuda_run_from_worker(struct _starpu_worker *worker) { /* Let's go ! */ _starpu_cuda_worker(worker); return 0; } struct _starpu_driver_ops _starpu_driver_cuda_ops = { .init = _starpu_cuda_driver_init, .run = _starpu_cuda_run_from_worker, .run_once = _starpu_cuda_driver_run_once, .deinit = _starpu_cuda_driver_deinit, }; struct _starpu_node_ops _starpu_driver_cuda_node_ops = { .name = "cuda1 driver", .malloc_on_node = _starpu_cuda_malloc_on_node, .free_on_node = _starpu_cuda_free_on_node, .is_direct_access_supported = _starpu_cuda_is_direct_access_supported, .copy_interface_to[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cpu, .copy_interface_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda, .copy_interface_from[STARPU_CPU_RAM] = _starpu_cuda_copy_interface_from_cpu_to_cuda, .copy_interface_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_interface_from_cuda_to_cuda, .copy_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cuda_to_cpu, .copy_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, .copy_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy_data_from_cpu_to_cuda, .copy_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy_data_from_cuda_to_cuda, .copy2d_data_to[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cpu, .copy2d_data_to[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, .copy2d_data_from[STARPU_CPU_RAM] = _starpu_cuda_copy2d_data_from_cpu_to_cuda, .copy2d_data_from[STARPU_CUDA_RAM] = _starpu_cuda_copy2d_data_from_cuda_to_cuda, .wait_request_completion = _starpu_cuda_wait_request_completion, .test_request_completion = _starpu_cuda_test_request_completion, }; starpu-1.4.9+dfsg/src/drivers/cuda/driver_cuda_init.c000066400000000000000000000032461507764646700226540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static struct _starpu_driver_info driver_info = { .name_upper = "CUDA", .name_var = "CUDA", .name_lower = "cuda", .memory_kind = STARPU_CUDA_RAM, .alpha = 13.33f, #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) .driver_ops = &_starpu_driver_cuda_ops, .run_worker = _starpu_cuda_worker, #if defined(STARPU_HAVE_HWLOC) && !defined(STARPU_USE_CUDA0) .get_hwloc_obj = _starpu_cuda_get_hwloc_obj, #endif .init_worker_binding = _starpu_cuda_init_worker_binding, .init_worker_memory = _starpu_cuda_init_worker_memory, #endif }; static struct _starpu_memory_driver_info memory_driver_info = { .name_upper = "CUDA", .worker_archtype = STARPU_CUDA_WORKER, #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID) .ops = &_starpu_driver_cuda_node_ops, #endif }; void _starpu_cuda_preinit(void) { _starpu_driver_info_register(STARPU_CUDA_WORKER, &driver_info); _starpu_memory_driver_info_register(STARPU_CUDA_RAM, &memory_driver_info); } starpu-1.4.9+dfsg/src/drivers/cuda/starpu_cublas.c000066400000000000000000000066171507764646700222160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef STARPU_USE_CUDA #include #include //#ifdef CUBLAS_V2_H_ //#error oops //#endif static int cublas_initialized[STARPU_NMAXWORKERS]; static starpu_pthread_mutex_t mutex[STARPU_MAXCUDADEVS]; static unsigned get_idx(void) { unsigned workerid = starpu_worker_get_id_check(); unsigned th_per_dev = _starpu_get_machine_config()->topology.cuda_th_per_dev; unsigned th_per_stream = _starpu_get_machine_config()->topology.cuda_th_per_stream; if (th_per_dev) return starpu_worker_get_devid(workerid); else if (th_per_stream) return workerid; else /* same thread for all devices */ return 0; } static void init_cublas_func(void *args STARPU_ATTRIBUTE_UNUSED) { unsigned idx = get_idx(); unsigned devid = starpu_worker_get_devid(starpu_worker_get_id_check()); STARPU_PTHREAD_MUTEX_LOCK(&mutex[devid]); if (!(cublas_initialized[idx]++)) { cublasStatus_t cublasst = cublasInit(); if (STARPU_UNLIKELY(cublasst)) STARPU_CUBLAS_REPORT_ERROR(cublasst); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex[devid]); _starpu_init_cublas_v2_func(); } static void set_cublas_stream_func(void *args STARPU_ATTRIBUTE_UNUSED) { cublasSetKernelStream(starpu_cuda_get_local_stream()); } static void shutdown_cublas_func(void *args STARPU_ATTRIBUTE_UNUSED) { unsigned idx = get_idx(); unsigned devid = starpu_worker_get_devid(starpu_worker_get_id_check()); STARPU_PTHREAD_MUTEX_LOCK(&mutex[devid]); if (!--cublas_initialized[idx]) cublasShutdown(); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex[devid]); _starpu_shutdown_cublas_v2_func(); } #endif void starpu_cublas_init(void) { #ifdef STARPU_USE_CUDA if (!starpu_cuda_worker_get_count()) return; unsigned i; for (i = 0; i < STARPU_MAXCUDADEVS; i++) STARPU_PTHREAD_MUTEX_INIT0(&mutex[i], NULL); starpu_execute_on_each_worker(init_cublas_func, NULL, STARPU_CUDA); starpu_execute_on_each_worker(set_cublas_stream_func, NULL, STARPU_CUDA); _starpu_cublas_v2_init(); #endif } void starpu_cublas_shutdown(void) { #ifdef STARPU_USE_CUDA if (!starpu_cuda_worker_get_count()) return; starpu_execute_on_each_worker(shutdown_cublas_func, NULL, STARPU_CUDA); _starpu_cublas_v2_shutdown(); #endif } void starpu_cublas_set_stream(void) { #ifdef STARPU_USE_CUDA if (!starpu_cuda_worker_get_count()) return; unsigned workerid = starpu_worker_get_id_check(); int devnum = starpu_worker_get_devnum(workerid); if (!_starpu_get_machine_config()->topology.cuda_th_per_dev || (!_starpu_get_machine_config()->topology.cuda_th_per_stream && _starpu_get_machine_config()->topology.nworker[STARPU_CUDA_WORKER][devnum] > 1)) cublasSetKernelStream(starpu_cuda_get_local_stream()); #endif } starpu-1.4.9+dfsg/src/drivers/cuda/starpu_cublasLt.c000066400000000000000000000042001507764646700225000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef STARPU_HAVE_LIBCUBLASLT #include static cublasLtHandle_t cublasLt_handles[STARPU_NMAXWORKERS]; static cublasLtHandle_t main_handle; static void init_cublasLt_func(void *args STARPU_ATTRIBUTE_UNUSED) { cublasLtCreate(&cublasLt_handles[starpu_worker_get_id_check()]); // No need for setting streams, because the cublasLt handles are not bundled with streams } static void shutdown_cublasLt_func(void *args STARPU_ATTRIBUTE_UNUSED) { cublasLtDestroy(cublasLt_handles[starpu_worker_get_id_check()]); } #endif void starpu_cublasLt_init(void) { #ifdef STARPU_HAVE_LIBCUBLASLT if (!starpu_cuda_worker_get_count()) return; starpu_execute_on_each_worker_ex(init_cublasLt_func, NULL, STARPU_CUDA, "init_cublasLt"); if (cublasLtCreate(&main_handle) != CUBLAS_STATUS_SUCCESS) main_handle = NULL; #endif } void starpu_cublasLt_shutdown(void) { #ifdef STARPU_HAVE_LIBCUBLASLT if (!starpu_cuda_worker_get_count()) return; starpu_execute_on_each_worker_ex(shutdown_cublasLt_func, NULL, STARPU_CUDA, "shutdown_cublasLt"); if (main_handle) cublasLtDestroy(main_handle); #endif } #ifdef STARPU_HAVE_LIBCUBLASLT cublasLtHandle_t starpu_cublasLt_get_local_handle(void) { if (!starpu_cuda_worker_get_count()) return NULL; int workerid = starpu_worker_get_id(); if (workerid >= 0) return cublasLt_handles[workerid]; else return main_handle; } #endif starpu-1.4.9+dfsg/src/drivers/cuda/starpu_cublas_v2.c000066400000000000000000000034071507764646700226170ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef STARPU_USE_CUDA #include #include #include //#ifdef CUBLAS_H_ //#error oops //#endif static cublasHandle_t cublas_handles[STARPU_NMAXWORKERS]; static cublasHandle_t main_handle; void _starpu_init_cublas_v2_func(void) { cublasCreate(&cublas_handles[starpu_worker_get_id_check()]); cublasSetStream(cublas_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); } void _starpu_shutdown_cublas_v2_func(void) { cublasDestroy(cublas_handles[starpu_worker_get_id_check()]); } void _starpu_cublas_v2_init(void) { if (cublasCreate(&main_handle) != CUBLAS_STATUS_SUCCESS) main_handle = NULL; } void _starpu_cublas_v2_shutdown(void) { if (main_handle) cublasDestroy(main_handle); } cublasHandle_t starpu_cublas_get_local_handle(void) { if (!starpu_cuda_worker_get_count()) return NULL; int workerid = starpu_worker_get_id(); if (workerid >= 0) return cublas_handles[workerid]; else return main_handle; } #endif starpu-1.4.9+dfsg/src/drivers/cuda/starpu_cusolver.c000066400000000000000000000072641507764646700226060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #ifdef STARPU_HAVE_LIBCUSOLVER #include #include #include static cusolverDnHandle_t cusolverDn_handles[STARPU_NMAXWORKERS]; static cusolverSpHandle_t cusolverSp_handles[STARPU_NMAXWORKERS]; static cusolverRfHandle_t cusolverRf_handles[STARPU_NMAXWORKERS]; static cusolverDnHandle_t mainDn_handle; static cusolverSpHandle_t mainSp_handle; static cusolverRfHandle_t mainRf_handle; static void init_cusolver_func(void *args STARPU_ATTRIBUTE_UNUSED) { cusolverDnCreate(&cusolverDn_handles[starpu_worker_get_id_check()]); cusolverDnSetStream(cusolverDn_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); cusolverSpCreate(&cusolverSp_handles[starpu_worker_get_id_check()]); cusolverSpSetStream(cusolverSp_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); cusolverRfCreate(&cusolverRf_handles[starpu_worker_get_id_check()]); // Not available? //cusolverRfSetStream(cusolverRf_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); } static void shutdown_cusolver_func(void *args STARPU_ATTRIBUTE_UNUSED) { cusolverDnDestroy(cusolverDn_handles[starpu_worker_get_id_check()]); cusolverSpDestroy(cusolverSp_handles[starpu_worker_get_id_check()]); cusolverRfDestroy(cusolverRf_handles[starpu_worker_get_id_check()]); } #endif void starpu_cusolver_init(void) { #ifdef STARPU_HAVE_LIBCUSOLVER if (!starpu_cuda_worker_get_count()) return; starpu_execute_on_each_worker(init_cusolver_func, NULL, STARPU_CUDA); if (cusolverDnCreate(&mainDn_handle) != CUSOLVER_STATUS_SUCCESS) mainDn_handle = NULL; if (cusolverSpCreate(&mainSp_handle) != CUSOLVER_STATUS_SUCCESS) mainSp_handle = NULL; if (cusolverRfCreate(&mainRf_handle) != CUSOLVER_STATUS_SUCCESS) mainRf_handle = NULL; #endif } void starpu_cusolver_shutdown(void) { #ifdef STARPU_HAVE_LIBCUSOLVER if (!starpu_cuda_worker_get_count()) return; starpu_execute_on_each_worker(shutdown_cusolver_func, NULL, STARPU_CUDA); if (mainDn_handle) cusolverDnDestroy(mainDn_handle); if (mainSp_handle) cusolverSpDestroy(mainSp_handle); if (mainRf_handle) cusolverRfDestroy(mainRf_handle); #endif } #ifdef STARPU_HAVE_LIBCUSOLVER cusolverDnHandle_t starpu_cusolverDn_get_local_handle(void) { if (!starpu_cuda_worker_get_count()) return NULL; int workerid = starpu_worker_get_id(); if (workerid >= 0) return cusolverDn_handles[workerid]; else return mainDn_handle; } cusolverSpHandle_t starpu_cusolverSp_get_local_handle(void) { if (!starpu_cuda_worker_get_count()) return NULL; int workerid = starpu_worker_get_id(); if (workerid >= 0) return cusolverSp_handles[workerid]; else return mainSp_handle; } cusolverRfHandle_t starpu_cusolverRf_get_local_handle(void) { if (!starpu_cuda_worker_get_count()) return NULL; int workerid = starpu_worker_get_id(); if (workerid >= 0) return cusolverRf_handles[workerid]; else return mainRf_handle; } #endif starpu-1.4.9+dfsg/src/drivers/cuda/starpu_cusparse.c000066400000000000000000000043571507764646700225710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef STARPU_HAVE_LIBCUSPARSE #include static cusparseHandle_t cusparse_handles[STARPU_NMAXWORKERS]; static cusparseHandle_t main_handle; static void init_cusparse_func(void *args STARPU_ATTRIBUTE_UNUSED) { cusparseCreate(&cusparse_handles[starpu_worker_get_id_check()]); #if HAVE_DECL_CUSPARSESETSTREAM cusparseSetStream(cusparse_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); #else cusparseSetKernelStream(cusparse_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream()); #endif } static void shutdown_cusparse_func(void *args STARPU_ATTRIBUTE_UNUSED) { cusparseDestroy(cusparse_handles[starpu_worker_get_id_check()]); } #endif void starpu_cusparse_init(void) { #ifdef STARPU_HAVE_LIBCUSPARSE if (!starpu_cuda_worker_get_count()) return; starpu_execute_on_each_worker(init_cusparse_func, NULL, STARPU_CUDA); if (cusparseCreate(&main_handle) != CUSPARSE_STATUS_SUCCESS) main_handle = NULL; #endif } void starpu_cusparse_shutdown(void) { #ifdef STARPU_HAVE_LIBCUSPARSE if (!starpu_cuda_worker_get_count()) return; starpu_execute_on_each_worker(shutdown_cusparse_func, NULL, STARPU_CUDA); if (main_handle) cusparseDestroy(main_handle); #endif } #ifdef STARPU_HAVE_LIBCUSPARSE cusparseHandle_t starpu_cusparse_get_local_handle(void) { if (!starpu_cuda_worker_get_count()) return NULL; int workerid = starpu_worker_get_id(); if (workerid >= 0) return cusparse_handles[workerid]; else return main_handle; } #endif starpu-1.4.9+dfsg/src/drivers/disk/000077500000000000000000000000001507764646700172075ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/disk/driver_disk.c000066400000000000000000000260451507764646700216670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include static struct _starpu_node_ops _starpu_driver_disk_node_ops; static struct _starpu_memory_driver_info memory_driver_info = { .name_upper = "Disk", .worker_archtype = (enum starpu_worker_archtype) -1, .ops = &_starpu_driver_disk_node_ops, }; void _starpu_disk_preinit(void) { _starpu_memory_driver_info_register(STARPU_DISK_RAM, &memory_driver_info); } static uintptr_t _starpu_disk_malloc_on_node(unsigned dst_node, size_t size, int flags) { (void) flags; uintptr_t addr = 0; addr = (uintptr_t) _starpu_disk_alloc(dst_node, size); return addr; } static void _starpu_disk_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) { (void) flags; _starpu_disk_free(dst_node, (void *) addr , size); } static int _starpu_disk_copy_src_to_disk(void * src, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel) { STARPU_ASSERT(starpu_node_get_kind(src_node) == STARPU_CPU_RAM); return _starpu_disk_write(src_node, dst_node, dst, src, dst_offset, size, async_channel); } static int _starpu_disk_copy_disk_to_src(void * src, size_t src_offset, unsigned src_node, void * dst, unsigned dst_node, size_t size, void * async_channel) { STARPU_ASSERT(starpu_node_get_kind(dst_node) == STARPU_CPU_RAM); return _starpu_disk_read(src_node, dst_node, src, dst, src_offset, size, async_channel); } static int _starpu_disk_copy_disk_to_disk(void * src, size_t src_offset, unsigned src_node, void * dst, size_t dst_offset, unsigned dst_node, size_t size, void * async_channel) { STARPU_ASSERT(starpu_node_get_kind(src_node) == STARPU_DISK_RAM && starpu_node_get_kind(dst_node) == STARPU_DISK_RAM); return _starpu_disk_copy(src_node, src, src_offset, dst_node, dst, dst_offset, size, async_channel); } static unsigned _starpu_disk_test_request_completion(struct _starpu_async_channel *async_channel) { struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); unsigned success = starpu_disk_test_request(async_channel); if (disk_event->ptr != NULL && success) { if (disk_event->handle != NULL) { /* read is finished, we can already unpack */ disk_event->handle->ops->unpack_data(disk_event->handle, disk_event->node, disk_event->ptr, disk_event->size); } else { /* write is finished, ptr was allocated in pack_data */ _starpu_free_flags_on_node(disk_event->node, disk_event->ptr, disk_event->size, 0); } } return success; } /* Only used at starpu_shutdown */ static void _starpu_disk_wait_request_completion(struct _starpu_async_channel *async_channel) { struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&async_channel->event); starpu_disk_wait_request(async_channel); if (disk_event->ptr != NULL) { if (disk_event->handle != NULL) { /* read is finished, we can already unpack */ disk_event->handle->ops->unpack_data(disk_event->handle, disk_event->node, disk_event->ptr, disk_event->size); } else { /* write is finished, ptr was allocated in pack_data */ _starpu_free_flags_on_node(disk_event->node, disk_event->ptr, disk_event->size, 0); } } } static int _starpu_disk_copy_interface_from_disk_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_CPU_RAM); int ret = 0; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&req->async_channel.event); if (req && !starpu_asynchronous_copy_disabled()) { req->async_channel.node_ops = &_starpu_driver_disk_node_ops; disk_event->requests = NULL; disk_event->ptr = NULL; disk_event->handle = NULL; } if(copy_methods->any_to_any) ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); else { void *obj = starpu_data_handle_to_pointer(handle, src_node); void * ptr = NULL; size_t size = 0; ret = _starpu_disk_full_read(src_node, dst_node, obj, &ptr, &size, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); if (ret == 0) { /* read is already finished, we can already unpack */ handle->ops->unpack_data(handle, dst_node, ptr, size); } else if (ret == -EAGAIN) { STARPU_ASSERT(req); disk_event->ptr = ptr; disk_event->node = dst_node; disk_event->size = size; disk_event->handle = handle; } STARPU_ASSERT(ret == 0 || ret == -EAGAIN); } return ret; } static int _starpu_disk_copy_interface_from_disk_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_DISK_RAM); int ret = 0; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; if (req && !starpu_asynchronous_copy_disabled()) { struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&req->async_channel.event); req->async_channel.node_ops = &_starpu_driver_disk_node_ops; disk_event->requests = NULL; disk_event->ptr = NULL; disk_event->handle = NULL; } ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); return ret; } static int _starpu_disk_copy_interface_from_cpu_to_disk(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_DISK_RAM); int ret = 0; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; struct _starpu_disk_event *disk_event = _starpu_disk_get_event(&req->async_channel.event); if (req && !starpu_asynchronous_copy_disabled()) { req->async_channel.node_ops = &_starpu_driver_disk_node_ops; disk_event->requests = NULL; disk_event->ptr = NULL; disk_event->handle = NULL; } if(copy_methods->any_to_any) ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); else { void *obj = starpu_data_handle_to_pointer(handle, dst_node); void * ptr = NULL; starpu_ssize_t size = 0; handle->ops->pack_data(handle, src_node, &ptr, &size); ret = _starpu_disk_full_write(src_node, dst_node, obj, ptr, size, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL); if (ret == 0) { /* write is already finished, ptr was allocated in pack_data */ _starpu_free_flags_on_node(src_node, ptr, size, 0); } else if (ret == -EAGAIN) { STARPU_ASSERT(req); disk_event->ptr = ptr; disk_event->node = src_node; disk_event->size = size; } STARPU_ASSERT(ret == 0 || ret == -EAGAIN); } return ret; } static int _starpu_disk_copy_data_from_disk_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_CPU_RAM); return _starpu_disk_copy_disk_to_src((void*) src, src_offset, src_node, (void*) (dst + dst_offset), dst_node, size, async_channel); } static int _starpu_disk_copy_data_from_disk_to_disk(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_DISK_RAM && dst_kind == STARPU_DISK_RAM); return _starpu_disk_copy_disk_to_disk((void*) src, src_offset, src_node, (void*) dst, dst_offset, dst_node, size, async_channel); } static int _starpu_disk_copy_data_from_cpu_to_disk(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_DISK_RAM); return _starpu_disk_copy_src_to_disk((void*) (src + src_offset), src_node, (void*) dst, dst_offset, dst_node, size, async_channel); } static int _starpu_disk_is_direct_access_supported(unsigned node, unsigned handling_node) { /* Each worker can manage disks but disk <-> disk is not always allowed */ switch (starpu_node_get_kind(handling_node)) { case STARPU_CPU_RAM: return 1; case STARPU_DISK_RAM: return _starpu_disk_can_copy(node, handling_node); default: return 0; } } static struct _starpu_node_ops _starpu_driver_disk_node_ops = { .name = "disk driver", .malloc_on_node = _starpu_disk_malloc_on_node, .free_on_node = _starpu_disk_free_on_node, .is_direct_access_supported = _starpu_disk_is_direct_access_supported, .copy_interface_to[STARPU_CPU_RAM] = _starpu_disk_copy_interface_from_disk_to_cpu, .copy_interface_to[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_disk_to_disk, .copy_interface_from[STARPU_CPU_RAM] = _starpu_disk_copy_interface_from_cpu_to_disk, .copy_interface_from[STARPU_DISK_RAM] = _starpu_disk_copy_interface_from_disk_to_disk, .copy_data_to[STARPU_CPU_RAM] = _starpu_disk_copy_data_from_disk_to_cpu, .copy_data_to[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_disk_to_disk, .copy_data_from[STARPU_CPU_RAM] = _starpu_disk_copy_data_from_cpu_to_disk, .copy_data_from[STARPU_DISK_RAM] = _starpu_disk_copy_data_from_disk_to_disk, /* TODO: copy2D/3D? */ .wait_request_completion = _starpu_disk_wait_request_completion, .test_request_completion = _starpu_disk_test_request_completion, }; starpu-1.4.9+dfsg/src/drivers/disk/driver_disk.h000066400000000000000000000016121507764646700216650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_DISK_H__ #define __DRIVER_DISK_H__ /** @file */ #pragma GCC visibility push(hidden) void _starpu_disk_preinit(void); #pragma GCC visibility pop #endif starpu-1.4.9+dfsg/src/drivers/driver_common/000077500000000000000000000000001507764646700211205ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/driver_common/driver_common.c000066400000000000000000000675771507764646700241550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_MMAP #include #endif #include #include #include void _starpu_driver_start_job(struct _starpu_worker *worker, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, int rank, int profiling) { struct starpu_task *task = j->task; struct starpu_codelet *cl = task->cl; int workerid = worker->workerid; unsigned calibrate_model = 0; if (worker->bindid_requested != -1) { typedef unsigned __attribute__((__may_alias__)) alias_unsigned; typedef int __attribute__((__may_alias__)) alias_int; unsigned raw_bindid_requested = STARPU_VAL_EXCHANGE((alias_unsigned *)&worker->bindid_requested, -1); int bindid_requested = *(alias_int *)&raw_bindid_requested; if (bindid_requested != -1) { worker->bindid = bindid_requested; _starpu_bind_thread_on_cpu(worker->bindid, worker->workerid, NULL); } } if (cl->model && cl->model->benchmarking) calibrate_model = 1; /* If the job is executed on a combined worker there is no need for the * scheduler to process on non-master : it doesn't contain any valuable data * as it's not linked to an actual worker */ if (j->task_size == 1 && rank == 0) _starpu_sched_pre_exec_hook(task); struct timespec start; struct starpu_profiling_task_info *profiling_info = task->profiling_info; if ((profiling && profiling_info) || (rank == 0 && (calibrate_model || !_starpu_perf_counter_paused()))) _starpu_clock_gettime(&start); _starpu_add_worker_status(worker, STATUS_INDEX_EXECUTING, &start); if (rank == 0) { STARPU_ASSERT(task->status == STARPU_TASK_READY); if (!_starpu_perf_counter_paused() && !j->internal) { (void)STARPU_PERF_COUNTER_ADD64(& _starpu_task__g_current_ready__value, -1); if (task->cl && task->cl->perf_counter_values) { struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values; (void)STARPU_PERF_COUNTER_ADD64(&pcv->task.current_ready, -1); } } task->status = STARPU_TASK_RUNNING; STARPU_AYU_RUNTASK(j->job_id); if (_starpu_codelet_profiling) cl->per_worker_stats[workerid]++; if ((profiling && profiling_info) || calibrate_model || !_starpu_perf_counter_paused()) { worker->cl_start = start; if (profiling && profiling_info) { profiling_info->start_time = start; profiling_info->workerid = workerid; } if (task->predicted && !isnan(task->predicted)) { struct timespec exp_end = start; exp_end.tv_sec += task->predicted / 1000000; exp_end.tv_nsec += fmod(task->predicted, 1000000.) * 1000; if (exp_end.tv_nsec >= 1000000000) { exp_end.tv_sec++; exp_end.tv_nsec -= 1000000000; } worker->cl_expend = exp_end; } else { worker->cl_expend.tv_sec = 0; worker->cl_expend.tv_nsec = 0; } } else { worker->cl_start.tv_sec = 0; worker->cl_start.tv_nsec = 0; } _starpu_job_notify_start(j, perf_arch); } // Find out if the worker is the master of a parallel context struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); if(!sched_ctx) sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx); _starpu_sched_ctx_lock_read(sched_ctx->id); if(!sched_ctx->sched_policy) { if(!sched_ctx->awake_workers && sched_ctx->main_master == worker->workerid) { struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_sched_ctx_iterator it; int new_rank = 0; if (workers->init_iterator) workers->init_iterator(workers, &it); while (workers->has_next(workers, &it)) { int _workerid = workers->get_next(workers, &it); if (_workerid != workerid) { new_rank++; struct _starpu_worker *_worker = _starpu_get_worker_struct(_workerid); _starpu_driver_start_job(_worker, j, &_worker->perf_arch, new_rank, profiling); } } } _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); _STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, &sched_ctx->perf_arch, workerid, rank); } else { _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); _STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, perf_arch, workerid, rank); } _starpu_sched_ctx_unlock_read(sched_ctx->id); _STARPU_TASK_BREAK_ON(task, exec); } void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, int rank, int profiling) { struct starpu_task *task = j->task; struct starpu_codelet *cl = task->cl; int workerid = worker->workerid; unsigned calibrate_model = 0; // Find out if the worker is the master of a parallel context struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); if(!sched_ctx) sched_ctx = _starpu_get_sched_ctx_struct(j->task->sched_ctx); if (!sched_ctx->sched_policy) { _starpu_perfmodel_create_comb_if_needed(&(sched_ctx->perf_arch)); _STARPU_TRACE_END_CODELET_BODY(j, j->nimpl, &(sched_ctx->perf_arch), workerid, rank); } else { _starpu_perfmodel_create_comb_if_needed(perf_arch); _STARPU_TRACE_END_CODELET_BODY(j, j->nimpl, perf_arch, workerid, rank); } if (cl && cl->model && cl->model->benchmarking) calibrate_model = 1; struct timespec end; struct starpu_profiling_task_info *profiling_info = task->profiling_info; if ((profiling && profiling_info) || (rank == 0 && (calibrate_model || !_starpu_perf_counter_paused() || worker->cl_start.tv_sec || worker->cl_start.tv_nsec))) _starpu_clock_gettime(&end); _starpu_clear_worker_status(worker, STATUS_INDEX_EXECUTING, &end); if (rank == 0) { if (worker->cl_start.tv_sec || worker->cl_start.tv_nsec) worker->cl_end = end; STARPU_AYU_POSTRUNTASK(j->job_id); } if(!sched_ctx->sched_policy && !sched_ctx->awake_workers && sched_ctx->main_master == worker->workerid) { struct starpu_worker_collection *workers = sched_ctx->workers; struct starpu_sched_ctx_iterator it; int new_rank = 0; if (workers->init_iterator) workers->init_iterator(workers, &it); while (workers->has_next(workers, &it)) { int _workerid = workers->get_next(workers, &it); if (_workerid != workerid) { new_rank++; struct _starpu_worker *_worker = _starpu_get_worker_struct(_workerid); _starpu_driver_end_job(_worker, j, &_worker->perf_arch, new_rank, profiling); } } } } void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker, struct starpu_perfmodel_arch* perf_arch, int profiling) { struct starpu_profiling_task_info *profiling_info = j->task->profiling_info; struct timespec measured_ts; int workerid = worker->workerid; struct starpu_codelet *cl = j->task->cl; int calibrate_model = 0; int updated = 0; _starpu_perfmodel_create_comb_if_needed(perf_arch); #ifndef STARPU_SIMGRID if (cl->model && cl->model->benchmarking) calibrate_model = 1; #endif if (worker->cl_start.tv_sec || worker->cl_start.tv_nsec) { starpu_timespec_sub(&worker->cl_end, &worker->cl_start, &measured_ts); double measured = starpu_timing_timespec_to_us(&measured_ts); STARPU_ASSERT_MSG(measured >= 0, "measured=%lf\n", measured); if (!_starpu_perf_counter_paused()) { worker->__w_total_executed__value++; worker->__w_cumul_execution_time__value += measured; _starpu_perf_counter_update_per_worker_sample(worker->workerid); if (cl->perf_counter_values) { struct starpu_perf_counter_sample_cl_values * const pcv = cl->perf_counter_values; (void)STARPU_PERF_COUNTER_ADD64(&pcv->task.total_executed, 1); _starpu_perf_counter_update_acc_double(&pcv->task.cumul_execution_time, measured); _starpu_perf_counter_update_per_codelet_sample(cl); } } if (profiling && profiling_info) { profiling_info->end_time = worker->cl_end; _starpu_worker_update_profiling_info_executing(workerid, 1, profiling_info->used_cycles, profiling_info->stall_cycles, profiling_info->energy_consumed, j->task->flops); updated = 1; } if (calibrate_model) { #ifdef STARPU_OPENMP double time_consumed = measured; unsigned do_update_time_model; if (j->continuation) { /* The job is only paused, thus we accumulate * its timing, but we don't update its * perfmodel now. */ starpu_timespec_accumulate(&j->cumulated_ts, &measured_ts); do_update_time_model = 0; } else { if (j->discontinuous) { /* The job was paused at least once but is now * really completing. We need to take into * account its past execution time in its * perfmodel. */ starpu_timespec_accumulate(&measured_ts, &j->cumulated_ts); time_consumed = starpu_timing_timespec_to_us(&measured_ts); } do_update_time_model = 1; } #else unsigned do_update_time_model = 1; const double time_consumed = measured; #endif if (j->task->failed) /* Do not record perfmodel for failed tasks, they may terminate earlier */ do_update_time_model = 0; if (do_update_time_model) { _starpu_update_perfmodel_history(j, j->task->cl->model, perf_arch, worker->devid, time_consumed, j->nimpl, 1); } } } if (!updated) _starpu_worker_update_profiling_info_executing(workerid, 1, 0, 0, 0, 0); if (profiling_info && profiling_info->energy_consumed && cl->energy_model && cl->energy_model->benchmarking) { #ifdef STARPU_OPENMP double energy_consumed = profiling_info->energy_consumed; unsigned do_update_energy_model; if (j->continuation) { j->cumulated_energy_consumed += energy_consumed; do_update_energy_model = 0; } else { if (j->discontinuous) { energy_consumed += j->cumulated_energy_consumed; } do_update_energy_model = 1; } #else const double energy_consumed = profiling_info->energy_consumed; unsigned do_update_energy_model = 1; #endif if (j->task->failed) /* Do not record perfmodel for failed tasks, they may terminate earlier */ do_update_energy_model = 0; if (do_update_energy_model) { _starpu_update_perfmodel_history(j, j->task->cl->energy_model, perf_arch, worker->devid, energy_consumed, j->nimpl, 1); } } } static void _starpu_worker_set_status_scheduling(int workerid) { if (!(_starpu_worker_get_status(workerid) & STATUS_SCHEDULING)) { if (!(_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) _STARPU_TRACE_WORKER_SCHEDULING_START; _starpu_worker_add_status(workerid, STATUS_INDEX_SCHEDULING); } } static void _starpu_worker_set_status_scheduling_done(int workerid) { STARPU_ASSERT(_starpu_worker_get_status(workerid) & STATUS_SCHEDULING); if (!(_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) _STARPU_TRACE_WORKER_SCHEDULING_END; _starpu_worker_clear_status(workerid, STATUS_INDEX_SCHEDULING); } static void _starpu_worker_set_status_sleeping(int workerid) { if (!(_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) { _STARPU_TRACE_WORKER_SLEEP_START; _starpu_worker_add_status(workerid, STATUS_INDEX_SLEEPING); } } static void _starpu_worker_set_status_wakeup(int workerid) { if ((_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) { _STARPU_TRACE_WORKER_SLEEP_END; _starpu_worker_clear_status(workerid, STATUS_INDEX_SLEEPING); } } #if !defined(STARPU_SIMGRID) static void _starpu_exponential_backoff(struct _starpu_worker *worker) { int delay = worker->spinning_backoff; if (worker->spinning_backoff < worker->config->conf.driver_spinning_backoff_max) worker->spinning_backoff<<=1; while(delay--) STARPU_UYIELD(); } #endif /* Workers may block when there is no work to do at all. */ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *worker, int workerid, unsigned memnode STARPU_ATTRIBUTE_UNUSED) { struct starpu_task *task; #if !defined(STARPU_SIMGRID) unsigned keep_awake = 0; #endif STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); _starpu_worker_enter_sched_op(worker); _starpu_worker_set_status_scheduling(workerid); #if !defined(STARPU_SIMGRID) if ((worker->pipeline_length == 0 && worker->current_task) || (worker->pipeline_length != 0 && worker->ntasks)) /* This worker is executing something */ keep_awake = 1; #endif /*if the worker is already executing a task then */ if (worker->pipeline_length && (worker->ntasks == worker->pipeline_length || worker->pipeline_stuck)) task = NULL; /* don't push a task if we are already transferring one */ else if (worker->task_transferring != NULL) task = NULL; /*else try to pop a task*/ else { STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); task = _starpu_pop_task(worker); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&worker->sched_mutex); #if !defined(STARPU_SIMGRID) if (worker->state_keep_awake) { keep_awake = 1; worker->state_keep_awake = 0; } #endif } #if !defined(STARPU_SIMGRID) if (task == NULL && !keep_awake) { /* Didn't get a task to run and none are running, go to sleep */ /* Note: we need to keep the sched condition mutex all along the path * from popping a task from the scheduler to blocking. Otherwise the * driver may go block just after the scheduler got a new task to be * executed, and thus hanging. */ _starpu_worker_set_status_sleeping(workerid); _starpu_worker_leave_sched_op(worker); STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); #ifndef STARPU_NON_BLOCKING_DRIVERS if (_starpu_worker_can_block(memnode, worker) && !worker->state_block_in_parallel_req && !worker->state_unblock_in_parallel_req && !_starpu_sched_ctx_last_worker_awake(worker)) { #ifdef STARPU_WORKER_CALLBACKS if (_starpu_config.conf.callback_worker_going_to_sleep != NULL) { _starpu_config.conf.callback_worker_going_to_sleep(workerid); } #endif do { STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); if (!worker->state_keep_awake && _starpu_worker_can_block(memnode, worker) && !worker->state_block_in_parallel_req && !worker->state_unblock_in_parallel_req) { _starpu_worker_set_status_sleeping(workerid); if (_starpu_sched_ctx_last_worker_awake(worker)) { break; } } else { break; } } while (1); worker->state_keep_awake = 0; _starpu_worker_set_status_scheduling_done(workerid); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); #ifdef STARPU_WORKER_CALLBACKS if (_starpu_config.conf.callback_worker_waking_up != NULL) { /* the wake up callback should be called once the sched_mutex has been unlocked, * so that an external resource manager can potentially defer the wake-up momentarily if * the corresponding computing unit is still in use by another runtime system */ _starpu_config.conf.callback_worker_waking_up(workerid); } #endif } else #endif { _starpu_worker_set_status_scheduling_done(workerid); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); if (_starpu_machine_is_running()) _starpu_exponential_backoff(worker); } return NULL; } #endif if (task) { _starpu_worker_set_status_scheduling_done(workerid); _starpu_worker_set_status_wakeup(workerid); } else { _starpu_worker_set_status_sleeping(workerid); } worker->spinning_backoff = worker->config->conf.driver_spinning_backoff_min; _starpu_worker_leave_sched_op(worker); STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); STARPU_AYU_PRERUNTASK(_starpu_get_job_associated_to_task(task)->job_id, workerid); return task; } int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_task ** tasks, int nworkers, unsigned memnode STARPU_ATTRIBUTE_UNUSED) { int i, count = 0; struct _starpu_job * j; int is_parallel_task; struct _starpu_combined_worker *combined_worker; #if !defined(STARPU_NON_BLOCKING_DRIVERS) && !defined(STARPU_SIMGRID) int executing = 0; #endif /*for each worker*/ #ifndef STARPU_NON_BLOCKING_DRIVERS /* This assumes only 1 worker */ STARPU_ASSERT_MSG(nworkers == 1, "Multiple workers is not yet possible in blocking drivers mode\n"); _starpu_set_local_worker_key(&workers[0]); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&workers[0].sched_mutex); _starpu_worker_enter_sched_op(&workers[0]); #endif for (i = 0; i < nworkers; i++) { unsigned keep_awake = 0; #if !defined(STARPU_NON_BLOCKING_DRIVERS) && !defined(STARPU_SIMGRID) if ((workers[i].pipeline_length == 0 && workers[i].current_task) || (workers[i].pipeline_length != 0 && workers[i].ntasks)) /* At least this worker is executing something */ executing = 1; #endif /*if the worker is already executing a task then */ if((workers[i].pipeline_length == 0 && workers[i].current_task) || (workers[i].pipeline_length != 0 && (workers[i].ntasks == workers[i].pipeline_length || workers[i].pipeline_stuck))) { tasks[i] = NULL; } /* don't push a task if we are already transferring one */ else if (workers[i].task_transferring != NULL) { tasks[i] = NULL; } /*else try to pop a task*/ else { #ifdef STARPU_NON_BLOCKING_DRIVERS _starpu_set_local_worker_key(&workers[i]); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&workers[i].sched_mutex); _starpu_worker_enter_sched_op(&workers[i]); #endif _starpu_worker_set_status_scheduling(workers[i].workerid); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&workers[i].sched_mutex); tasks[i] = _starpu_pop_task(&workers[i]); STARPU_PTHREAD_MUTEX_LOCK_SCHED(&workers[i].sched_mutex); if (workers[i].state_keep_awake) { keep_awake = workers[i].state_keep_awake; workers[i].state_keep_awake = 0; } if(tasks[i] != NULL || keep_awake) { _starpu_worker_set_status_scheduling_done(workers[i].workerid); _starpu_worker_set_status_wakeup(workers[i].workerid); STARPU_PTHREAD_COND_BROADCAST(&workers[i].sched_cond); #ifdef STARPU_NON_BLOCKING_DRIVERS _starpu_worker_leave_sched_op(&workers[i]); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&workers[i].sched_mutex); #endif count ++; if (tasks[i] == NULL) /* no task, but keep_awake */ continue; j = _starpu_get_job_associated_to_task(tasks[i]); is_parallel_task = (j->task_size > 1); if (workers[i].pipeline_length) workers[i].current_tasks[(workers[i].first_task + workers[i].ntasks)%STARPU_MAX_PIPELINE] = tasks[i]; else workers[i].current_task = j->task; workers[i].ntasks++; /* Get the rank in case it is a parallel task */ if (is_parallel_task) { STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); workers[i].current_rank = j->active_task_alias_count++; STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); if(j->combined_workerid != -1) { combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid); workers[i].combined_workerid = j->combined_workerid; workers[i].worker_size = combined_worker->worker_size; } } else { workers[i].combined_workerid = workers[i].workerid; workers[i].worker_size = 1; workers[i].current_rank = 0; } STARPU_AYU_PRERUNTASK(_starpu_get_job_associated_to_task(tasks[i])->job_id, workers[i].workerid); } else { _starpu_worker_set_status_sleeping(workers[i].workerid); #ifdef STARPU_NON_BLOCKING_DRIVERS _starpu_worker_leave_sched_op(&workers[i]); #endif STARPU_PTHREAD_COND_BROADCAST(&workers[i].sched_cond); #ifdef STARPU_NON_BLOCKING_DRIVERS STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&workers[i].sched_mutex); #endif } } } #if !defined(STARPU_NON_BLOCKING_DRIVERS) #if !defined(STARPU_SIMGRID) /* Block the assumed-to-be-only worker */ struct _starpu_worker *worker = &workers[0]; unsigned workerid = workers[0].workerid; if (!count && !executing) { /* Didn't get a task to run and none are running, go to sleep */ /* Note: we need to keep the sched condition mutex all along the path * from popping a task from the scheduler to blocking. Otherwise the * driver may go block just after the scheduler got a new task to be * executed, and thus hanging. */ _starpu_worker_set_status_sleeping(workerid); _starpu_worker_leave_sched_op(worker); if (_starpu_worker_can_block(memnode, worker) && !worker->state_block_in_parallel_req && !worker->state_unblock_in_parallel_req && !_starpu_sched_ctx_last_worker_awake(worker)) { #ifdef STARPU_WORKER_CALLBACKS if (_starpu_config.conf.callback_worker_going_to_sleep != NULL) { _starpu_config.conf.callback_worker_going_to_sleep(workerid); } #endif do { STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex); if (!worker->state_keep_awake && _starpu_worker_can_block(memnode, worker) && !worker->state_block_in_parallel_req && !worker->state_unblock_in_parallel_req) { _starpu_worker_set_status_sleeping(workerid); if (_starpu_sched_ctx_last_worker_awake(worker)) { break; } } else { break; } } while (1); worker->state_keep_awake = 0; _starpu_worker_set_status_scheduling_done(workerid); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); #ifdef STARPU_WORKER_CALLBACKS if (_starpu_config.conf.callback_worker_waking_up != NULL) { /* the wake up callback should be called once the sched_mutex has been unlocked, * so that an external resource manager can potentially defer the wake-up momentarily if * the corresponding computing unit is still in use by another runtime system */ _starpu_config.conf.callback_worker_waking_up(workerid); } #endif } else { _starpu_worker_set_status_scheduling_done(workerid); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&worker->sched_mutex); if (_starpu_machine_is_running()) _starpu_exponential_backoff(worker); } return 0; } _starpu_worker_set_status_wakeup(workerid); worker->spinning_backoff = worker->config->conf.driver_spinning_backoff_min; #endif /* !STARPU_SIMGRID */ _starpu_worker_leave_sched_op(&workers[0]); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&workers[0].sched_mutex); #endif /* !STARPU_NON_BLOCKING_DRIVERS */ return count; } #ifdef HAVE_MMAP /*generate and initialize rbtree map_tree*/ static struct starpu_rbtree map_tree = STARPU_RBTREE_INITIALIZER; static starpu_pthread_mutex_t map_tree_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; struct map_allocate_info { struct starpu_rbtree_node map_node; void* map_addr; size_t length; char name[]; }; /* the cmp_fn arg for rb_tree_insert() */ static unsigned int map_addr_cmp_insert(struct starpu_rbtree_node * left_elm, struct starpu_rbtree_node * right_elm) { unsigned int addr_left = (uintptr_t)((struct map_allocate_info *) left_elm)->map_addr; unsigned int addr_right = (uintptr_t)((struct map_allocate_info *) right_elm)->map_addr; return addr_left - addr_right; } /* the cmp_fn arg for starpu_rbtree_lookup() */ static unsigned int map_addr_cmp_lookup(uintptr_t addr_left, struct starpu_rbtree_node * right_elm) { unsigned int addr_right = (uintptr_t)((struct map_allocate_info *) right_elm)->map_addr; return addr_left - addr_right; } void *_starpu_map_allocate(size_t length, unsigned node) { /*file*/ int fd; char fd_name[32]; snprintf(fd_name,sizeof(fd_name), "starpu-%u-XXXXXX", node); while(1) { mktemp(fd_name); fd = shm_open(fd_name, O_RDWR|O_CREAT|O_EXCL, 0600); if(fd >= 0) break; /* if name is already existed, recreate one*/ else if (errno == EEXIST) continue; else { perror("fail to open file"); return NULL; } } /*fix the length of file*/ int ret = ftruncate(fd, length); if (ret < 0) { perror("fail to allocate room for mapping"); close(fd); return NULL; } void* map_addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); close(fd); if (map_addr == MAP_FAILED) { perror("fail to map"); return NULL; } struct map_allocate_info *map_info; _STARPU_MALLOC(map_info, sizeof(struct map_allocate_info)+strlen(fd_name)+1); map_info->map_addr = map_addr; map_info->length = length; memcpy(map_info->name, fd_name, strlen(fd_name)+1); starpu_rbtree_node_init(&map_info->map_node); STARPU_PTHREAD_MUTEX_LOCK(&map_tree_mutex); starpu_rbtree_insert(&map_tree, &map_info->map_node, map_addr_cmp_insert); STARPU_PTHREAD_MUTEX_UNLOCK(&map_tree_mutex); return map_addr; } int _starpu_map_deallocate(void* map_addr, size_t length) { STARPU_PTHREAD_MUTEX_LOCK(&map_tree_mutex); struct starpu_rbtree_node * currentNode = starpu_rbtree_lookup(&map_tree, (uintptr_t)map_addr, map_addr_cmp_lookup); STARPU_PTHREAD_MUTEX_UNLOCK(&map_tree_mutex); if (currentNode != NULL) { struct map_allocate_info * map_info = (struct map_allocate_info *) currentNode; if ((uintptr_t)map_addr == (uintptr_t)map_info->map_addr && ((uintptr_t)map_addr + length) == ((uintptr_t)map_info->map_addr + map_info->length)) { /*unlink the map fd name*/ if (shm_unlink(map_info->name) != 0) { _STARPU_DISP("warning: cannot unlink file %s: %s\n", map_info->name, strerror(errno)); } STARPU_PTHREAD_MUTEX_LOCK(&map_tree_mutex); starpu_rbtree_remove(&map_tree, &map_info->map_node); STARPU_PTHREAD_MUTEX_UNLOCK(&map_tree_mutex); free(map_info); } else { return -1; } } else { _STARPU_DISP("could not find mapped address %p\n", map_addr); } int res = munmap(map_addr, length); if (res < 0) { perror("fail to unmap"); return -1; } return 0; } /*lookup name from map_addr*/ char* _starpu_get_fdname_from_mapaddr(uintptr_t map_addr, size_t *offset, size_t length) { char* map_name = NULL; STARPU_PTHREAD_MUTEX_LOCK(&map_tree_mutex); struct starpu_rbtree_node * currentNode = starpu_rbtree_lookup_nearest(&map_tree, map_addr, map_addr_cmp_lookup, STARPU_RBTREE_LEFT); STARPU_PTHREAD_MUTEX_UNLOCK(&map_tree_mutex); if (currentNode != NULL) { struct map_allocate_info * map_info = (struct map_allocate_info *) currentNode; if ((map_addr >= (uintptr_t)map_info->map_addr) && map_addr + length <= ((uintptr_t)map_info->map_addr + map_info->length)) { map_name = strdup(map_info->name); *offset = map_addr - (uintptr_t)map_info->map_addr; } } return map_name; } /*map with giving file name*/ void *_starpu_sink_map(char *fd_name, size_t offset, size_t length) { /*file*/ int fd; fd = shm_open(fd_name, O_RDWR, 0600); if(fd < 0) { perror("fail to open file"); return NULL; } /* offset for mmap() must be page aligned */ off_t pa_offset = offset & ~(sysconf(_SC_PAGE_SIZE) - 1); void *map_sink_addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, pa_offset); close(fd); if (map_sink_addr == MAP_FAILED) { perror("fail to map"); return NULL; } return (void*)((uintptr_t)map_sink_addr + (offset - pa_offset)); } int _starpu_sink_unmap(uintptr_t map_addr, size_t length) { uintptr_t pa_addr = map_addr & ~(sysconf(_SC_PAGE_SIZE) - 1); size_t offset = map_addr-pa_addr; int res = munmap((void*)pa_addr, length + offset); if (res < 0) { perror("fail to unmap"); return -1; } return 0; } #else char* _starpu_get_fdname_from_mapaddr(uintptr_t map_addr, size_t *offset, size_t length) { (void)map_addr; (void)offset; (void)length; return NULL; } void *_starpu_sink_map(char *fd_name, size_t offset, size_t length) { (void)fd_name; (void)offset; (void)length; return NULL; } int _starpu_sink_unmap(uintptr_t map_addr, size_t length) { (void)map_addr; (void)length; return -1; } #endif starpu-1.4.9+dfsg/src/drivers/driver_common/driver_common.h000066400000000000000000000046501507764646700241410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_COMMON_H__ #define __DRIVER_COMMON_H__ /** @file */ #include #include #include #include /** The task job is about to start (or has already started when kernels are * queued in a pipeline), record profiling and trace information. */ void _starpu_driver_start_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, int rank, int profiling); /** The task job has ended, record profiling and trace information. */ void _starpu_driver_end_job(struct _starpu_worker *args, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, int rank, int profiling); /** Feed performance model with the terminated job statistics */ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_worker *worker_args, struct starpu_perfmodel_arch* perf_arch, int profiling); #pragma GCC visibility push(hidden) /** Get from the scheduler a task to be executed on the worker \p workerid */ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *args, int workerid, unsigned memnode); /** Get from the scheduler tasks to be executed on the workers \p workers */ int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_task ** tasks, int nworker, unsigned memnode); void *_starpu_map_allocate(size_t length, unsigned node); int _starpu_map_deallocate(void* map_addr, size_t length); char* _starpu_get_fdname_from_mapaddr(uintptr_t map_addr, size_t *offset, size_t length); void *_starpu_sink_map(char *fd_name, size_t offset, size_t length); int _starpu_sink_unmap(uintptr_t map_addr, size_t length); #pragma GCC visibility pop #endif // __DRIVER_COMMON_H__ starpu-1.4.9+dfsg/src/drivers/hip/000077500000000000000000000000001507764646700170355ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/hip/driver_hip.c000066400000000000000000001601051507764646700213370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022-2022 École de Technologie Supérieure (ETS, Montréal) * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX #include #endif #define starpu_hipStreamCreate(stream) hipStreamCreateWithFlags(stream, hipStreamNonBlocking) /* Consider a rough 10% overhead cost */ #define FREE_MARGIN 0.9 static size_t global_mem[STARPU_MAXHIPDEVS]; int _starpu_hip_bus_ids[STARPU_MAXHIPDEVS+STARPU_MAXNUMANODES][STARPU_MAXHIPDEVS+STARPU_MAXNUMANODES]; static hipStream_t streams[STARPU_NMAXWORKERS]; static char used_stream[STARPU_NMAXWORKERS]; static hipStream_t out_transfer_streams[STARPU_MAXHIPDEVS]; static hipStream_t in_transfer_streams[STARPU_MAXHIPDEVS]; /* Note: streams are not thread-safe, so we define them for each HIP worker * emitting a GPU-GPU transfer */ static hipStream_t in_peer_transfer_streams[STARPU_MAXHIPDEVS][STARPU_MAXHIPDEVS]; static struct hipDeviceProp_t props[STARPU_MAXHIPDEVS]; static hipEvent_t task_events[STARPU_NMAXWORKERS][STARPU_MAX_PIPELINE]; static unsigned hip_bindid_init[STARPU_MAXHIPDEVS]; static unsigned hip_bindid[STARPU_MAXHIPDEVS]; static unsigned hip_memory_init[STARPU_MAXHIPDEVS]; static unsigned hip_memory_nodes[STARPU_MAXHIPDEVS]; static struct _starpu_worker_set hip_worker_set[STARPU_MAXHIPDEVS]; static enum initialization hip_device_init[STARPU_MAXHIPDEVS]; static int hip_device_users[STARPU_MAXHIPDEVS]; static starpu_pthread_mutex_t hip_device_init_mutex[STARPU_MAXHIPDEVS]; static starpu_pthread_cond_t hip_device_init_cond[STARPU_MAXHIPDEVS]; static int hip_globalbindid; static int _starpu_hip_peer_access(int devid, int peer_devid); int _starpu_nworker_per_hip; static size_t _starpu_hip_get_global_mem_size(unsigned devid) { return global_mem[devid]; } static hipStream_t starpu_hip_get_in_transfer_stream(unsigned dst_node) { int dst_devid = starpu_memory_node_get_devid(dst_node); hipStream_t stream; stream = in_transfer_streams[dst_devid]; STARPU_ASSERT(stream); return stream; } static hipStream_t starpu_hip_get_out_transfer_stream(unsigned src_node) { int src_devid = starpu_memory_node_get_devid(src_node); hipStream_t stream; stream = out_transfer_streams[src_devid]; STARPU_ASSERT(stream); return stream; } static hipStream_t starpu_hip_get_peer_transfer_stream(unsigned src_node, unsigned dst_node) { int src_devid = starpu_memory_node_get_devid(src_node); int dst_devid = starpu_memory_node_get_devid(dst_node); hipStream_t stream; stream = in_peer_transfer_streams[src_devid][dst_devid]; STARPU_ASSERT(stream); return stream; } hipStream_t starpu_hip_get_local_stream(void) { int worker = starpu_worker_get_id_check(); used_stream[worker] = 1; return streams[worker]; } const struct hipDeviceProp_t *starpu_hip_get_device_properties(unsigned workerid) { struct _starpu_machine_config *config = _starpu_get_machine_config(); unsigned devid = config->workers[workerid].devid; return &props[devid]; } /* Early library initialization, before anything else, just initialize data */ void _starpu_hip_init(void) { int i; for (i = 0; i < STARPU_MAXHIPDEVS; i++) { STARPU_PTHREAD_MUTEX_INIT(&hip_device_init_mutex[i], NULL); STARPU_PTHREAD_COND_INIT(&hip_device_init_cond[i], NULL); } memset(&hip_bindid_init, 0, sizeof(hip_bindid_init)); memset(&hip_memory_init, 0, sizeof(hip_memory_init)); hip_globalbindid = -1; } /* Return the number of devices usable in the system. * The value returned cannot be greater than MAXHIPDEVS */ static unsigned _starpu_get_hip_device_count(void) { int cnt; hipError_t hipres; hipres = hipGetDeviceCount(&cnt); if (STARPU_UNLIKELY(hipres)) return 0; if (cnt > STARPU_MAXHIPDEVS) { _STARPU_MSG("# Warning: %d HIP devices available. Only %d enabled. Use configure option --enable-maxhipdev=xxx to update the maximum value of supported HIP devices.\n", cnt, STARPU_MAXHIPDEVS); cnt = STARPU_MAXHIPDEVS; } return (unsigned)cnt; } /* This is run from initialize to determine the number of HIP devices */ void _starpu_init_hip(void) { } /* This is called to really discover the hardware */ void _starpu_hip_discover_devices(struct _starpu_machine_config *config) { /* Discover the number of HIP devices. Fill the result in CONFIG. */ int cnt; hipError_t hipres; hipres = hipGetDeviceCount(&cnt); if (STARPU_UNLIKELY(hipres != hipSuccess)) cnt = 0; config->topology.nhwdevices[STARPU_HIP_WORKER] = cnt; } static void _starpu_initialize_workers_hip_gpuid(struct _starpu_machine_config *config) { struct _starpu_machine_topology *topology = &config->topology; struct starpu_conf *uconf = &config->conf; _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_hip_gpuid == 0 ? NULL : (int *)uconf->workers_hip_gpuid, &(config->current_devid[STARPU_HIP_WORKER]), (int *)topology->workers_devid[STARPU_HIP_WORKER], "STARPU_WORKERS_HIPID", topology->nhwdevices[STARPU_HIP_WORKER], STARPU_HIP_WORKER); _starpu_devices_gpu_clear(config, STARPU_HIP_WORKER); _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_HIP_WORKER]); } /* Determine which devices we will use */ void _starpu_init_hip_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) { int i; for (i = 0; i < (int) (sizeof(hip_worker_set)/sizeof(hip_worker_set[0])); i++) hip_worker_set[i].workers = NULL; int nhip = config->conf.nhip; if (nhip != 0) { /* The user did not disable HIP. We need to initialize HIP * early to count the number of devices */ _starpu_init_hip(); int nb_devices = _starpu_get_hip_device_count(); _starpu_topology_check_ndevices(&nhip, nb_devices, 0, STARPU_MAXHIPDEVS, 0, "nhip", "HIP", "maxhipdev"); } int nworker_per_hip = starpu_get_env_number_default("STARPU_NWORKER_PER_HIP", 1); STARPU_ASSERT_MSG(nworker_per_hip > 0, "STARPU_NWORKER_PER_HIP has to be > 0"); STARPU_ASSERT_MSG_ALWAYS(nworker_per_hip < STARPU_NMAXWORKERS, "STARPU_NWORKER_PER_HIP (%d) cannot be higher than STARPU_NMAXWORKERS (%d)\n", nworker_per_hip, STARPU_NMAXWORKERS); #ifndef STARPU_NON_BLOCKING_DRIVERS if (nworker_per_hip > 1) { _STARPU_DISP("Warning: reducing STARPU_NWORKER_PER_HIP to 1 because blocking drivers are enabled\n"); nworker_per_hip = 1; } _starpu_nworker_per_hip = nworker_per_hip; #endif /* Now we know how many HIP devices will be used */ topology->ndevices[STARPU_HIP_WORKER] = nhip; _starpu_initialize_workers_hip_gpuid(config); /* allow having one worker per stream */ topology->hip_th_per_stream = starpu_get_env_number_default("STARPU_HIP_THREAD_PER_WORKER", -1); topology->hip_th_per_dev = starpu_get_env_number_default("STARPU_HIP_THREAD_PER_DEV", -1); STARPU_ASSERT_MSG(!(topology->hip_th_per_stream == 1 && topology->hip_th_per_dev != -1), "It does not make sense to set both STARPU_HIP_THREAD_PER_WORKER to 1 and to set STARPU_HIP_THREAD_PER_DEV, please choose either per worker or per device or none"); /* per device by default */ if (topology->hip_th_per_dev == -1) { if (topology->hip_th_per_stream == 1) topology->hip_th_per_dev = 0; else topology->hip_th_per_dev = 1; } /* Not per stream by default */ if (topology->hip_th_per_stream == -1) { topology->hip_th_per_stream = 0; } if (!topology->hip_th_per_dev) { hip_worker_set[0].workers = &config->workers[topology->nworkers]; hip_worker_set[0].nworkers = nhip * nworker_per_hip; } unsigned hipgpu; for (hipgpu = 0; (int) hipgpu < nhip; hipgpu++) { int devid = _starpu_get_next_devid(topology, config, STARPU_HIP_WORKER); if (devid == -1) { // There is no more devices left topology->ndevices[STARPU_HIP_WORKER] = hipgpu; break; } struct _starpu_worker_set *worker_set; if(topology->hip_th_per_stream) { worker_set = ALLOC_WORKER_SET; } else if (topology->hip_th_per_dev) { worker_set = &hip_worker_set[devid]; worker_set->workers = &config->workers[topology->nworkers]; worker_set->nworkers = nworker_per_hip; } else { /* Same worker set for all devices */ worker_set = &hip_worker_set[0]; } _starpu_topology_configure_workers(topology, config, STARPU_HIP_WORKER, hipgpu, devid, 0, 0, nworker_per_hip, // TODO: fix perfmodels etc. // nworker_per_hip - 1, 1, worker_set, NULL); _starpu_devices_gpu_set_used(devid); /* TODO: move this to generic place */ #ifdef STARPU_HAVE_HWLOC { hwloc_obj_t obj = NULL; if (starpu_driver_info[STARPU_HIP_WORKER].get_hwloc_obj) obj = starpu_driver_info[STARPU_HIP_WORKER].get_hwloc_obj(topology->hwtopology, devid); if (obj) { struct _starpu_hwloc_userdata *data = obj->userdata; data->ngpus++; } else { _STARPU_DEBUG("Warning: could not find location of HIP%u, do you have the hwloc HIP plugin installed?\n", devid); } } #endif } } /* Bind the driver on a CPU core */ void _starpu_hip_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { /* Perhaps the worker has some "favourite" bindings */ unsigned *preferred_binding = NULL; unsigned npreferred = 0; unsigned devid = workerarg->devid; if (hip_bindid_init[devid]) { if (config->topology.hip_th_per_stream == 0) workerarg->bindid = hip_bindid[devid]; else workerarg->bindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); } else { hip_bindid_init[devid] = 1; if (config->topology.hip_th_per_dev == 0 && config->topology.hip_th_per_stream == 0) { if (hip_globalbindid == -1) hip_globalbindid = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); workerarg->bindid = hip_bindid[devid] = hip_globalbindid; } else { workerarg->bindid = hip_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); } } } /* Set up memory and buses */ void _starpu_hip_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned memory_node = -1; unsigned devid = workerarg->devid; unsigned numa; if (hip_memory_init[devid]) { memory_node = hip_memory_nodes[devid]; } else { hip_memory_init[devid] = 1; memory_node = hip_memory_nodes[devid] = _starpu_memory_node_register(STARPU_HIP_RAM, devid); #ifdef STARPU_USE_HIP_MAP /* TODO: check node capabilities */ _starpu_memory_node_set_mapped(memory_node); #endif for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) { _starpu_hip_bus_ids[numa][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(numa, memory_node); _starpu_hip_bus_ids[devid+STARPU_MAXNUMANODES][numa] = _starpu_register_bus(memory_node, numa); } if ( #if defined(STARPU_HAVE_HIP_MEMCPY_PEER) 1 #else /* MEMCPY_PEER */ 0 #endif /* MEMCPY_PEER */ ) { int worker2; for (worker2 = 0; worker2 < workerarg->workerid; worker2++) { struct _starpu_worker *workerarg2 = &config->workers[worker2]; int devid2 = workerarg2->devid; if (workerarg2->arch == STARPU_HIP_WORKER) { unsigned memory_node2 = starpu_worker_get_memory_node(worker2); _starpu_hip_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node2, memory_node); _starpu_hip_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES] = _starpu_register_bus(memory_node, memory_node2); #if HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX { hwloc_obj_t obj, obj2, ancestor; obj = hwloc_hip_get_device_osdev_by_index(config->topology.hwtopology, devid); obj2 = hwloc_hip_get_device_osdev_by_index(config->topology.hwtopology, devid2); ancestor = hwloc_get_common_ancestor_obj(config->topology.hwtopology, obj, obj2); if (ancestor) { struct _starpu_hwloc_userdata *data = ancestor->userdata; #ifdef STARPU_VERBOSE { char name[64]; hwloc_obj_type_snprintf(name, sizeof(name), ancestor, 0); _STARPU_DEBUG("HIP%u and HIP%u are linked through %s, along %u GPUs\n", devid, devid2, name, data->ngpus); } #endif starpu_bus_set_ngpus(_starpu_hip_bus_ids[devid2+STARPU_MAXNUMANODES][devid+STARPU_MAXNUMANODES], data->ngpus); starpu_bus_set_ngpus(_starpu_hip_bus_ids[devid+STARPU_MAXNUMANODES][devid2+STARPU_MAXNUMANODES], data->ngpus); } } #endif } } } } _starpu_memory_node_add_nworkers(memory_node); //This worker can also manage transfers on NUMA nodes for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa); _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node); workerarg->memory_node = memory_node; } /* Set the current HIP device */ void starpu_hip_set_device(int devid STARPU_ATTRIBUTE_UNUSED) { hipError_t hipres; int attempts = 0; hipres = hipSetDevice(devid); while (hipres == hipErrorDeinitialized && ++attempts < 10) { usleep(100000); hipres = hipSetDevice(devid); } if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); } static void _starpu_hip_limit_gpu_mem_if_needed(unsigned devid) { starpu_ssize_t limit; size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; /* Find the size of the memory on the device */ totalGlobalMem = props[devid].totalGlobalMem; limit = starpu_getenv_number("STARPU_LIMIT_HIP_MEM"); if (limit == -1) { char name[30]; snprintf(name, sizeof(name), "STARPU_LIMIT_HIP_%u_MEM", devid); limit = starpu_getenv_number(name); } #if defined(STARPU_USE_HIP) if (limit == -1) { limit = totalGlobalMem / (1024*1024) * FREE_MARGIN; } #endif global_mem[devid] = limit * 1024*1024; } /* Really initialize one device */ static void init_device_context(unsigned devid, unsigned memnode) { STARPU_ASSERT(devid < STARPU_MAXHIPDEVS); hipError_t hipres; int attempts = 0; starpu_hip_set_device(devid); STARPU_PTHREAD_MUTEX_LOCK(&hip_device_init_mutex[devid]); hip_device_users[devid]++; if (hip_device_init[devid] == UNINITIALIZED) /* Nobody started initialization yet, do it */ hip_device_init[devid] = CHANGING; else { /* Somebody else is doing initialization, wait for it */ while (hip_device_init[devid] != INITIALIZED) STARPU_PTHREAD_COND_WAIT(&hip_device_init_cond[devid], &hip_device_init_mutex[devid]); STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); return; } STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); /* force HIP to initialize the context for real */ hipres = hipInit(0); while (hipres == hipErrorDeinitialized && ++attempts < 100) { usleep(100000); hipres = hipInit(0); } if (STARPU_UNLIKELY(hipres)) { if (hipres != hipSuccess) { _STARPU_MSG("Failed to initialize HIP runtime\n"); exit(77); } STARPU_HIP_REPORT_ERROR(hipres); } hipres = hipGetDeviceProperties(&props[devid], devid); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); #ifdef STARPU_HAVE_HIP_MEMCPY_PEER if (props[devid].computeMode == hipComputeModeExclusive) { _STARPU_MSG("HIP is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-hip-memcpy-peer but that will disable the memcpy-peer optimizations\n"); STARPU_ABORT(); } #endif hipres = starpu_hipStreamCreate(&in_transfer_streams[devid]); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); hipres = starpu_hipStreamCreate(&out_transfer_streams[devid]); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); int nworkers = starpu_worker_get_count(); int workerid; for (workerid = 0; workerid < nworkers; workerid++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->arch == STARPU_HIP_WORKER && worker->subworkerid == 0) { hipres = starpu_hipStreamCreate(&in_peer_transfer_streams[worker->devid][devid]); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); } } STARPU_PTHREAD_MUTEX_LOCK(&hip_device_init_mutex[devid]); hip_device_init[devid] = INITIALIZED; STARPU_PTHREAD_COND_BROADCAST(&hip_device_init_cond[devid]); STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); _starpu_hip_limit_gpu_mem_if_needed(devid); _starpu_memory_manager_set_global_memory_size(memnode, _starpu_hip_get_global_mem_size(devid)); } /* De-initialize one device */ static void deinit_device_context(unsigned devid STARPU_ATTRIBUTE_UNUSED) { starpu_hip_set_device(devid); hipStreamDestroy(in_transfer_streams[devid]); hipStreamDestroy(out_transfer_streams[devid]); int nworkers = starpu_worker_get_count(); int workerid; for (workerid = 0; workerid < nworkers; workerid++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); if (worker->arch == STARPU_HIP_WORKER && worker->subworkerid == 0) { hipStreamDestroy(in_peer_transfer_streams[worker->devid][devid]); } } } static void init_worker_context(unsigned workerid, unsigned devid) { int j; hipError_t hipres; starpu_hip_set_device(devid); for (j = 0; j < STARPU_MAX_PIPELINE; j++) { hipres = hipEventCreateWithFlags(&task_events[workerid][j], hipEventDisableTiming); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); } hipres = starpu_hipStreamCreate(&streams[workerid]); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); } static void deinit_worker_context(unsigned workerid, unsigned devid STARPU_ATTRIBUTE_UNUSED) { unsigned j; starpu_hip_set_device(devid); for (j = 0; j < STARPU_MAX_PIPELINE; j++) hipEventDestroy(task_events[workerid][j]); hipStreamDestroy(streams[workerid]); } /* This is run from the driver thread to initialize the driver HIP context */ int _starpu_hip_driver_init(struct _starpu_worker *worker) { struct _starpu_worker_set *worker_set = worker->set; struct _starpu_worker *worker0 = &worker_set->workers[0]; int lastdevid = -1; unsigned i; _starpu_driver_start(worker0, STARPU_HIP_WORKER, 0); _starpu_set_local_worker_key(worker); #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif #ifdef STARPU_USE_FXT for (i = 1; i < worker_set->nworkers; i++) _starpu_worker_start(&worker_set->workers[i], STARPU_HIP_WORKER, 0); #endif for (i = 0; i < worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned devid = worker->devid; unsigned memnode = worker->memory_node; #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); #endif if ((int) devid == lastdevid) { /* Already initialized */ continue; } lastdevid = devid; init_device_context(devid, memnode); if (worker->config->topology.nworker[STARPU_HIP_WORKER][devid] > 1 && props[devid].concurrentKernels == 0) _STARPU_DISP("Warning: STARPU_NWORKER_PER_HIP is %u, but HIP device %u does not support concurrent kernel execution!\n", worker_set->nworkers, devid); } /* one more time to avoid hacks from third party lib :) */ _starpu_bind_thread_on_cpu(worker0->bindid, worker0->workerid, NULL); for (i = 0; i < worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned devid = worker->devid; unsigned workerid = worker->workerid; unsigned subdev = worker->subworkerid; float size = (float) global_mem[devid] / (1<<30); /* get the device's name */ char devname[64]; strncpy(devname, props[devid].name, 63); devname[63] = 0; #if defined(STARPU_HAVE_BUSID) #if defined(STARPU_HAVE_DOMAINID) if (props[devid].pciDomainID) snprintf(worker->name, sizeof(worker->name), "HIP %u.%u (%s %.1f GiB %04x:%02x:%02x.0)", devid, subdev, devname, size, props[devid].pciDomainID, props[devid].pciBusID, props[devid].pciDeviceID); else #endif snprintf(worker->name, sizeof(worker->name), "HIP %u.%u (%s %.1f GiB %02x:%02x.0)", devid, subdev, devname, size, props[devid].pciBusID, props[devid].pciDeviceID); #else snprintf(worker->name, sizeof(worker->name), "HIP %u.%u (%s %.1f GiB)", devid, subdev, devname, size); #endif snprintf(worker->short_name, sizeof(worker->short_name), "HIP %u.%u", devid, subdev); _STARPU_DEBUG("hip (%s) dev id %u worker %u thread is ready to run on CPU %d !\n", devname, devid, subdev, worker->bindid); worker->pipeline_length = starpu_get_env_number_default("STARPU_HIP_PIPELINE", 2); if (worker->pipeline_length > STARPU_MAX_PIPELINE) { _STARPU_DISP("Warning: STARPU_HIP_PIPELINE is %u, but STARPU_MAX_PIPELINE is only %u\n", worker->pipeline_length, STARPU_MAX_PIPELINE); worker->pipeline_length = STARPU_MAX_PIPELINE; } #if !defined(STARPU_NON_BLOCKING_DRIVERS) if (worker->pipeline_length >= 1) { /* We need non-blocking drivers, to poll for HIP task * termination */ _STARPU_DISP("Warning: reducing STARPU_HIP_PIPELINE to 0 because blocking drivers are enabled (and simgrid is not supported with this driver)\n"); worker->pipeline_length = 0; } #endif init_worker_context(workerid, worker->devid); _STARPU_TRACE_WORKER_INIT_END(workerid); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, worker->workerid, starpu_prof_tool_driver_gpu, 0, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); #endif } { char thread_name[16]; snprintf(thread_name, sizeof(thread_name), "HIP %u", worker->devid); starpu_pthread_setname(thread_name); } /* tell the main thread that this one is ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker0->mutex); worker0->status = STATUS_UNKNOWN; worker0->worker_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&worker0->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&worker0->mutex); /* tell the main thread that this one is also ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex); worker_set->set_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&worker_set->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex); return 0; } int _starpu_hip_driver_deinit(struct _starpu_worker *worker) { struct _starpu_worker_set *worker_set = worker->set; int lastdevid = -1; unsigned i; _STARPU_TRACE_WORKER_DEINIT_START; for (i = 0; i < worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned devid = worker->devid; unsigned memnode = worker->memory_node; unsigned usersleft; if ((int) devid == lastdevid) /* Already initialized */ continue; lastdevid = devid; STARPU_PTHREAD_MUTEX_LOCK(&hip_device_init_mutex[devid]); usersleft = --hip_device_users[devid]; STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); if (!usersleft) { /* I'm last, deinitialize device */ _starpu_datawizard_handle_all_pending_node_data_requests(memnode); /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); _starpu_malloc_shutdown(memnode); deinit_device_context(devid); } STARPU_PTHREAD_MUTEX_LOCK(&hip_device_init_mutex[devid]); hip_device_init[devid] = UNINITIALIZED; STARPU_PTHREAD_MUTEX_UNLOCK(&hip_device_init_mutex[devid]); } for (i = 0; i < worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned workerid = worker->workerid; unsigned memnode = worker->memory_node; deinit_worker_context(workerid, worker->devid); #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); #endif } worker_set->workers[0].worker_is_initialized = 0; _STARPU_TRACE_WORKER_DEINIT_END(STARPU_HIP_WORKER); return 0; } static uintptr_t _starpu_hip_malloc_on_node(unsigned dst_node, size_t size, int flags) { uintptr_t addr = 0; (void) flags; unsigned devid = starpu_memory_node_get_devid(dst_node); starpu_hip_set_device(devid); /* Check if there is free memory */ size_t hip_mem_free, hip_mem_total; hipError_t status; status = hipMemGetInfo(&hip_mem_free, &hip_mem_total); if (status == hipSuccess && hip_mem_free * FREE_MARGIN < size) { addr = 0; } else { status = hipMalloc((void **)&addr, size); if (!addr || (status != hipSuccess)) { if (STARPU_UNLIKELY(status != hipErrorOutOfMemory)) STARPU_HIP_REPORT_ERROR(status); addr = 0; } } return addr; } static void _starpu_hip_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) { (void) size; (void) flags; hipError_t err; unsigned devid = starpu_memory_node_get_devid(dst_node); starpu_hip_set_device(devid); err = hipFree((void*)addr); if (STARPU_UNLIKELY(err != hipSuccess)) STARPU_HIP_REPORT_ERROR(err); } int starpu_hip_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, hipStream_t stream, hipMemcpyKind kind) { #ifdef STARPU_HAVE_HIP_MEMCPY_PEER int peer_copy = 0; int src_dev = -1, dst_dev = -1; #endif hipError_t hipres = hipSuccess; if (kind == hipMemcpyDeviceToDevice && src_node != dst_node) { #ifdef STARPU_HAVE_HIP_MEMCPY_PEER peer_copy = 1; src_dev = starpu_memory_node_get_devid(src_node); dst_dev = starpu_memory_node_get_devid(dst_node); #else STARPU_ABORT(); #endif } if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); #ifdef STARPU_HAVE_HIP_MEMCPY_PEER if (peer_copy) { hipres = hipMemcpyPeerAsync((char *) dst_ptr, dst_dev, (char *) src_ptr, src_dev, ssize, stream); } else #endif { hipres = hipMemcpyAsync((char *)dst_ptr, (char *)src_ptr, ssize, kind, stream); } (void) hipGetLastError(); starpu_interface_end_driver_copy_async(src_node, dst_node, start); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || hipres) { /* do it in a synchronous fashion */ #ifdef STARPU_HAVE_HIP_MEMCPY_PEER if (peer_copy) { hipres = hipMemcpyPeer((char *) dst_ptr, dst_dev, (char *) src_ptr, src_dev, ssize); } else #endif { hipres = hipMemcpy((char *)dst_ptr, (char *)src_ptr, ssize, kind); } (void) hipGetLastError(); if (!hipres) hipres = hipDeviceSynchronize(); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); return 0; } return -EAGAIN; } /* Driver porters: this is optional but really recommended */ int starpu_hip_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, hipStream_t stream, hipMemcpyKind kind) { hipError_t hipres = hipSuccess; if (kind == hipMemcpyDeviceToDevice && src_node != dst_node) { #ifdef STARPU_HAVE_HIP_MEMCPY_PEER # ifdef BUGGED_MEMCPY3D STARPU_ABORT_MSG("HIP memcpy 3D peer buggy, but core triggered one?!"); # endif #else STARPU_ABORT_MSG("HIP memcpy 3D peer not available, but core triggered one ?!"); #endif } if (stream) { double start; starpu_interface_start_driver_copy_async(src_node, dst_node, &start); hipres = hipMemcpy2DAsync((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, blocksize, numblocks, kind, stream); starpu_interface_end_driver_copy_async(src_node, dst_node, start); } /* Test if the asynchronous copy has failed or if the caller only asked for a synchronous copy */ if (stream == NULL || hipres) { hipres = hipMemcpy2D((char *)dst_ptr, ld_dst, (char *)src_ptr, ld_src, blocksize, numblocks, kind); if (!hipres) hipres = hipDeviceSynchronize(); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); return 0; } return -EAGAIN; } static inline hipEvent_t *_starpu_hip_event(union _starpu_async_channel_event *_event) { hipEvent_t *event; STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); event = (void *) _event; return event; } static unsigned _starpu_hip_test_request_completion(struct _starpu_async_channel *async_channel) { hipEvent_t event; hipError_t hipres; unsigned success; event = *_starpu_hip_event(&async_channel->event); hipres = hipEventQuery(event); success = (hipres == hipSuccess); if (success) hipEventDestroy(event); else if (hipres != hipErrorNotReady) STARPU_HIP_REPORT_ERROR(hipres); return success; } /* Only used at starpu_shutdown */ static void _starpu_hip_wait_request_completion(struct _starpu_async_channel *async_channel) { hipEvent_t event; hipError_t hipres; event = *_starpu_hip_event(&async_channel->event); hipres = hipEventSynchronize(event); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); hipres = hipEventDestroy(event); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); } #ifdef STARPU_HAVE_HIP_MEMCPY_PEER static void starpu_hip_set_copy_device(unsigned src_node, unsigned dst_node) { enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); unsigned devid; if ((src_kind == STARPU_HIP_RAM) && (dst_kind == STARPU_HIP_RAM)) { /* GPU-GPU transfer, issue it from the destination */ devid = starpu_memory_node_get_devid(dst_node); } else { unsigned node = (dst_kind == STARPU_HIP_RAM)?dst_node:src_node; devid = starpu_memory_node_get_devid(node); } starpu_hip_set_device(devid); } #endif static int _starpu_hip_copy_interface_from_hip_to_hip(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_HIP_RAM); #ifdef STARPU_HAVE_HIP_MEMCPY_PEER starpu_hip_set_copy_device(src_node, dst_node); #else STARPU_ASSERT(src_node == dst_node); #endif int ret = 1; hipError_t hipres; hipStream_t stream; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* HIP - HIP transfer */ if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_hip_copy_disabled() || !copy_methods->any_to_any) { STARPU_ASSERT(copy_methods->any_to_any); /* this is not associated to a request so it's synchronous */ copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_hip_node_ops; hipres = hipEventCreateWithFlags(_starpu_hip_event(&req->async_channel.event), hipEventDisableTiming); if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); stream = starpu_hip_get_peer_transfer_stream(src_node, dst_node); STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); hipres = hipEventRecord(*_starpu_hip_event(&req->async_channel.event), stream); if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); } return ret; } static int _starpu_hip_copy_interface_from_hip_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_CPU_RAM); #ifdef STARPU_HAVE_HIP_MEMCPY_PEER starpu_hip_set_copy_device(src_node, dst_node); #endif int ret = 1; hipError_t hipres; hipStream_t stream; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* only the proper CUBLAS thread can initiate this directly ! */ #if !defined(STARPU_HAVE_HIP_MEMCPY_PEER) STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node); #endif if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_hip_copy_disabled() || !copy_methods->any_to_any) { /* this is not associated to a request so it's synchronous */ STARPU_ASSERT(copy_methods->any_to_any); copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_hip_node_ops; hipres = hipEventCreateWithFlags(_starpu_hip_event(&req->async_channel.event), hipEventDisableTiming); if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); stream = starpu_hip_get_out_transfer_stream(src_node); STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); hipres = hipEventRecord(*_starpu_hip_event(&req->async_channel.event), stream); if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); } return ret; } static int _starpu_hip_copy_interface_from_cpu_to_hip(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_HIP_RAM); #ifdef STARPU_HAVE_HIP_MEMCPY_PEER starpu_hip_set_copy_device(src_node, dst_node); #endif int ret = 1; hipError_t hipres; hipStream_t stream; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* STARPU_CPU_RAM -> CUBLAS_RAM */ /* only the proper CUBLAS thread can initiate this ! */ #if !defined(STARPU_HAVE_HIP_MEMCPY_PEER) STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node); #endif if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_hip_copy_disabled() || !copy_methods->any_to_any) { /* this is not associated to a request so it's synchronous */ STARPU_ASSERT(copy_methods->any_to_any); copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_hip_node_ops; hipres = hipEventCreateWithFlags(_starpu_hip_event(&req->async_channel.event), hipEventDisableTiming); if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); stream = starpu_hip_get_in_transfer_stream(dst_node); STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); hipres = hipEventRecord(*_starpu_hip_event(&req->async_channel.event), stream); if (STARPU_UNLIKELY(hipres != hipSuccess)) STARPU_HIP_REPORT_ERROR(hipres); } return ret; } static int _starpu_hip_copy_data_from_hip_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_CPU_RAM); return starpu_hip_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, async_channel?starpu_hip_get_out_transfer_stream(src_node):NULL, hipMemcpyDeviceToHost); } static int _starpu_hip_copy_data_from_hip_to_hip(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_HIP_RAM); #ifndef STARPU_HAVE_HIP_MEMCPY_PEER STARPU_ASSERT(src_node == dst_node); #endif return starpu_hip_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, async_channel?starpu_hip_get_peer_transfer_stream(src_node, dst_node):NULL, hipMemcpyDeviceToDevice); } static int _starpu_hip_copy_data_from_cpu_to_hip(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_HIP_RAM); return starpu_hip_copy_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, size, async_channel?starpu_hip_get_in_transfer_stream(dst_node):NULL, hipMemcpyHostToDevice); } static int _starpu_hip_copy2d_data_from_hip_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_CPU_RAM); return starpu_hip_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel?starpu_hip_get_out_transfer_stream(src_node):NULL, hipMemcpyDeviceToHost); } static int _starpu_hip_copy2d_data_from_hip_to_hip(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_HIP_RAM && dst_kind == STARPU_HIP_RAM); #ifndef STARPU_HAVE_HIP_MEMCPY_PEER STARPU_ASSERT(src_node == dst_node); #endif return starpu_hip_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel?starpu_hip_get_peer_transfer_stream(src_node, dst_node):NULL, hipMemcpyDeviceToDevice); } static int _starpu_hip_copy2d_data_from_cpu_to_hip(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_HIP_RAM); return starpu_hip_copy2d_async_sync((void*) (src + src_offset), src_node, (void*) (dst + dst_offset), dst_node, blocksize, numblocks, ld_src, ld_dst, async_channel?starpu_hip_get_in_transfer_stream(dst_node):NULL, hipMemcpyHostToDevice); } static int _starpu_hip_is_direct_access_supported(unsigned node, unsigned handling_node) { #if defined(STARPU_HAVE_HIP_MEMCPY_PEER) (void) node; enum starpu_node_kind kind = starpu_node_get_kind(handling_node); return kind == STARPU_HIP_RAM; #else /* STARPU_HAVE_HIP_MEMCPY_PEER */ /* Direct GPU-GPU transfers are not allowed in general */ (void) node; (void) handling_node; return 0; #endif /* STARPU_HAVE_HIP_MEMCPY_PEER */ } static void start_job_on_hip(struct _starpu_job *j, struct _starpu_worker *worker, unsigned char pipeline_idx STARPU_ATTRIBUTE_UNUSED) { STARPU_ASSERT(j); struct starpu_task *task = j->task; int profiling = starpu_profiling_status_get(); #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif STARPU_ASSERT(task); struct starpu_codelet *cl = task->cl; STARPU_ASSERT(cl); _starpu_set_local_worker_key(worker); _starpu_set_current_task(task); j->workerid = worker->workerid; if (worker->ntasks == 1) { /* We are alone in the pipeline, the kernel will start now, record it */ _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); } #if defined(STARPU_HAVE_HIP_MEMCPY_PEER) /* We make sure we do manipulate the proper device */ starpu_hip_set_device(worker->devid); #endif starpu_hip_func_t func = _starpu_task_get_hip_nth_implementation(cl, j->nimpl); STARPU_ASSERT_MSG(func, "when STARPU_HIP is defined in 'where', hip_func or hip_funcs has to be defined"); if (_starpu_get_disable_kernels() <= 0) { _STARPU_TRACE_START_EXECUTING(j); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); #endif func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); #endif _STARPU_TRACE_END_EXECUTING(j); } } static void finish_job_on_hip(struct _starpu_job *j, struct _starpu_worker *worker); /* Execute a job, up to completion for synchronous jobs */ static void execute_job_on_hip(struct starpu_task *task, struct _starpu_worker *worker) { int workerid = worker->workerid; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); unsigned char pipeline_idx = (worker->first_task + worker->ntasks - 1)%STARPU_MAX_PIPELINE; start_job_on_hip(j, worker, pipeline_idx); if (!used_stream[workerid]) { used_stream[workerid] = 1; _STARPU_DISP("Warning: starpu_hip_get_local_stream() was not used to submit kernel to HIP on worker %d. HIP will thus introduce a lot of useless synchronizations, which will prevent proper overlapping of data transfers and kernel execution. See the HIP-specific part of the 'Check List When Performance Are Not There' of the StarPU handbook\n", workerid); } if (task->cl->hip_flags[j->nimpl] & STARPU_HIP_ASYNC) { if (worker->pipeline_length == 0) { /* Forced synchronous execution */ hipStreamSynchronize(starpu_hip_get_local_stream()); finish_job_on_hip(j, worker); } else { /* Record event to synchronize with task termination later */ hipError_t hipres = hipEventRecord(task_events[workerid][pipeline_idx], starpu_hip_get_local_stream()); if (STARPU_UNLIKELY(hipres)) STARPU_HIP_REPORT_ERROR(hipres); } } else /* Synchronous execution */ { STARPU_ASSERT_MSG(hipStreamQuery(starpu_hip_get_local_stream()) == hipSuccess, "Unless when using the STARPU_HIP_ASYNC flag, HIP codelets have to wait for termination of their kernels on the starpu_hip_get_local_stream() stream"); finish_job_on_hip(j, worker); } } static void finish_job_on_hip(struct _starpu_job *j, struct _starpu_worker *worker) { int profiling = starpu_profiling_status_get(); if (worker->pipeline_length) worker->current_tasks[worker->first_task] = NULL; else worker->current_task = NULL; worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE; worker->ntasks--; _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); _starpu_push_task_output(j); _starpu_set_current_task(NULL); _starpu_handle_job_termination(j); } /* One iteration of the main driver loop */ int _starpu_hip_driver_run_once(struct _starpu_worker *worker) { struct _starpu_worker_set *worker_set = worker->set; struct _starpu_worker *worker0 = &worker_set->workers[0]; struct starpu_task *tasks[worker_set->nworkers]; struct starpu_task *task; struct _starpu_job *j; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif int i, res; int idle_tasks, idle_transfers; _starpu_set_local_worker_key(worker0); /* First poll for completed jobs */ idle_tasks = 0; idle_transfers = 0; for (i = 0; i < (int) worker_set->nworkers; i++) { worker = &worker_set->workers[i]; int workerid = worker->workerid; unsigned memnode = worker->memory_node; if (!worker->ntasks) idle_tasks++; if (!worker->task_transferring) idle_transfers++; if (!worker->ntasks && !worker->task_transferring) { /* Even nothing to test */ continue; } /* First test for transfers pending for next task */ task = worker->task_transferring; if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) { STARPU_RMB(); _STARPU_TRACE_END_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif j = _starpu_get_job_associated_to_task(task); _starpu_set_local_worker_key(worker); _starpu_fetch_task_input_tail(task, j, worker); /* Reset it */ worker->task_transferring = NULL; if (worker->ntasks > 1 && !(task->cl->hip_flags[j->nimpl] & STARPU_HIP_ASYNC)) { /* We have to execute a non-asynchronous task but we * still have tasks in the pipeline... Record it to * prevent more tasks from coming, and do it later */ worker->pipeline_stuck = 1; } else { execute_job_on_hip(task, worker); } _STARPU_TRACE_START_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, worker->workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif } /* Then test for termination of queued tasks */ if (!worker->ntasks) /* No queued task */ continue; if (worker->pipeline_length) task = worker->current_tasks[worker->first_task]; else task = worker->current_task; if (task == worker->task_transferring) /* Next task is still pending transfer */ continue; /* On-going asynchronous task, check for its termination first */ hipError_t hipres = hipEventQuery(task_events[workerid][worker->first_task]); if (hipres != hipSuccess) { STARPU_ASSERT_MSG(hipres == hipErrorNotReady, "HIP error on task %p, codelet %p (%s): %s (%d)", task, task->cl, _starpu_codelet_get_model_name(task->cl), hipGetErrorString(hipres), hipres); } else { _STARPU_TRACE_END_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif /* Asynchronous task completed! */ _starpu_set_local_worker_key(worker); finish_job_on_hip(_starpu_get_job_associated_to_task(task), worker); /* See next task if any */ if (worker->ntasks) { if (worker->current_tasks[worker->first_task] != worker->task_transferring) { task = worker->current_tasks[worker->first_task]; j = _starpu_get_job_associated_to_task(task); if (task->cl->hip_flags[j->nimpl] & STARPU_HIP_ASYNC) { /* An asynchronous task, it was already * queued, it's now running, record its start time. */ _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, starpu_profiling_status_get()); } else { /* A synchronous task, we have finished * flushing the pipeline, we can now at * last execute it. */ _STARPU_TRACE_EVENT("sync_task"); execute_job_on_hip(task, worker); _STARPU_TRACE_EVENT("end_sync_task"); worker->pipeline_stuck = 0; } } else /* Data for next task didn't have time to finish transferring :/ */ _STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid); } _STARPU_TRACE_START_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif } if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length) idle_tasks++; } #if defined(STARPU_NON_BLOCKING_DRIVERS) if (!idle_tasks) { /* No task ready yet, no better thing to do than waiting */ __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, !idle_transfers); return 0; } #endif /* Something done, make some progress */ res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); /* And pull tasks */ res |= _starpu_get_multi_worker_task(worker_set->workers, tasks, worker_set->nworkers, worker0->memory_node); for (i = 0; i < (int) worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned memnode STARPU_ATTRIBUTE_UNUSED = worker->memory_node; task = tasks[i]; if (!task) continue; j = _starpu_get_job_associated_to_task(task); /* can HIP do that task ? */ if (!_STARPU_MAY_PERFORM(j, HIP)) { /* this is neither a cuda or a cublas task */ _starpu_worker_refuse_task(worker, task); continue; } /* Fetch data asynchronously */ #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_END_PROGRESS(memnode); _starpu_set_local_worker_key(worker); res = _starpu_fetch_task_input(task, j, 1); STARPU_ASSERT(res == 0); _STARPU_TRACE_START_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif } return 0; } void *_starpu_hip_worker(void *_arg) { struct _starpu_worker *worker = _arg; struct _starpu_worker_set* worker_set = worker->set; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif unsigned i; _starpu_hip_driver_init(worker); for (i = 0; i < worker_set->nworkers; i++) { #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_START_PROGRESS(worker_set->workers[i].memory_node); } while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_hip_driver_run_once(worker); } for (i = 0; i < worker_set->nworkers; i++) { _STARPU_TRACE_END_PROGRESS(worker_set->workers[i].memory_node); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif } _starpu_hip_driver_deinit(worker); return NULL; } #ifdef STARPU_HAVE_HWLOC hwloc_obj_t _starpu_hip_get_hwloc_obj(hwloc_topology_t topology, int devid) { #if HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX return hwloc_hip_get_device_osdev_by_index(topology, devid); #else (void)topology; (void)devid; return NULL; #endif } #endif void starpu_hipblas_report_error(const char *func, const char *file, int line, int status) { #ifdef STARPU_USE_HIPBLAS char *errormsg; switch (status) { case HIPBLAS_STATUS_SUCCESS: errormsg = "success"; break; case HIPBLAS_STATUS_NOT_INITIALIZED: errormsg = "not initialized"; break; case HIPBLAS_STATUS_ALLOC_FAILED: errormsg = "alloc failed"; break; case HIPBLAS_STATUS_INVALID_VALUE: errormsg = "invalid value"; break; case HIPBLAS_STATUS_ARCH_MISMATCH: errormsg = "arch mismatch"; break; case HIPBLAS_STATUS_EXECUTION_FAILED: errormsg = "execution failed"; break; case HIPBLAS_STATUS_INTERNAL_ERROR: errormsg = "internal error"; break; default: errormsg = "unknown error"; break; } _STARPU_MSG("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); #endif STARPU_ABORT(); } void starpu_hip_report_error(const char *func, const char *file, int line, hipError_t status) { const char *errormsg = hipGetErrorString(status); _STARPU_ERROR("oops in %s (%s:%d)... %d: %s \n", func, file, line, status, errormsg); } int _starpu_hip_run_from_worker(struct _starpu_worker *worker) { /* Let's go ! */ _starpu_hip_worker(worker); return 0; } int _starpu_hip_driver_set_devid(struct starpu_driver *driver, struct _starpu_worker *worker) { driver->id.hip_id = worker->devid; return 0; } int _starpu_hip_driver_is_devid(struct starpu_driver *driver, struct _starpu_worker *worker) { return driver->id.hip_id == worker->devid; } struct _starpu_driver_ops _starpu_driver_hip_ops = { .init = _starpu_hip_driver_init, .run = _starpu_hip_run_from_worker, .run_once = _starpu_hip_driver_run_once, .deinit = _starpu_hip_driver_deinit, .set_devid = _starpu_hip_driver_set_devid, .is_devid = _starpu_hip_driver_is_devid, }; struct _starpu_node_ops _starpu_driver_hip_node_ops = { .name = "hip driver", .malloc_on_node = _starpu_hip_malloc_on_node, .free_on_node = _starpu_hip_free_on_node, .is_direct_access_supported = _starpu_hip_is_direct_access_supported, .copy_interface_to[STARPU_CPU_RAM] = _starpu_hip_copy_interface_from_hip_to_cpu, .copy_interface_to[STARPU_HIP_RAM] = _starpu_hip_copy_interface_from_hip_to_hip, .copy_interface_from[STARPU_CPU_RAM] = _starpu_hip_copy_interface_from_cpu_to_hip, .copy_interface_from[STARPU_HIP_RAM] = _starpu_hip_copy_interface_from_hip_to_hip, .copy_data_to[STARPU_CPU_RAM] = _starpu_hip_copy_data_from_hip_to_cpu, .copy_data_to[STARPU_HIP_RAM] = _starpu_hip_copy_data_from_hip_to_hip, .copy_data_from[STARPU_CPU_RAM] = _starpu_hip_copy_data_from_cpu_to_hip, .copy_data_from[STARPU_HIP_RAM] = _starpu_hip_copy_data_from_hip_to_hip, .copy2d_data_to[STARPU_CPU_RAM] = _starpu_hip_copy2d_data_from_hip_to_cpu, .copy2d_data_to[STARPU_HIP_RAM] = _starpu_hip_copy2d_data_from_hip_to_hip, .copy2d_data_from[STARPU_CPU_RAM] = _starpu_hip_copy2d_data_from_cpu_to_hip, .copy2d_data_from[STARPU_HIP_RAM] = _starpu_hip_copy2d_data_from_hip_to_hip, .wait_request_completion = _starpu_hip_wait_request_completion, .test_request_completion = _starpu_hip_test_request_completion, }; starpu-1.4.9+dfsg/src/drivers/hip/driver_hip.h000066400000000000000000000046611507764646700213500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_HIP_H__ #define __DRIVER_HIP_H__ /** @file */ #include void _starpu_hip_preinit(void); #ifdef STARPU_USE_HIP #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wundef" #pragma GCC diagnostic ignored "-Wunused-result" #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #ifndef __cplusplus #pragma GCC diagnostic ignored "-Wimplicit-int" #endif #pragma GCC diagnostic ignored "-Wreturn-type" #include #include #pragma GCC diagnostic pop // not needed yet #include #endif #include #include #include #pragma GCC visibility push(hidden) extern struct _starpu_driver_ops _starpu_driver_hip_ops; extern struct _starpu_node_ops _starpu_driver_hip_node_ops; extern int _starpu_nworker_per_hip; void _starpu_hip_init(void); #ifdef STARPU_HAVE_HWLOC struct _starpu_machine_topology; hwloc_obj_t _starpu_hip_get_hwloc_obj(hwloc_topology_t topology, int devid); #endif extern int _starpu_hip_bus_ids[STARPU_MAXHIPDEVS+STARPU_MAXNUMANODES][STARPU_MAXHIPDEVS+STARPU_MAXNUMANODES]; #if defined(STARPU_USE_HIP) void _starpu_hip_discover_devices(struct _starpu_machine_config *); void _starpu_init_hip_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *); void _starpu_hip_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg); void _starpu_hip_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config, struct _starpu_worker *workerarg); void _starpu_init_hip(void); void *_starpu_hip_worker(void *); #else # define _starpu_hip_discover_devices(config) ((void) config) #endif #pragma GCC visibility pop #endif // __DRIVER_HIP_H__ starpu-1.4.9+dfsg/src/drivers/hip/driver_hip_init.c000066400000000000000000000030631507764646700223610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "driver_hip.h" static struct _starpu_driver_info driver_info = { .name_upper = "HIP", .name_var = "HIP", .name_lower = "hip", .memory_kind = STARPU_HIP_RAM, .alpha = 13.33f, #if defined(STARPU_USE_HIP) .driver_ops = &_starpu_driver_hip_ops, .run_worker = _starpu_hip_worker, #if defined(STARPU_HAVE_HWLOC) .get_hwloc_obj = _starpu_hip_get_hwloc_obj, #endif .init_worker_binding = _starpu_hip_init_worker_binding, .init_worker_memory = _starpu_hip_init_worker_memory, #endif }; static struct _starpu_memory_driver_info memory_driver_info = { .name_upper = "HIP", .worker_archtype = STARPU_HIP_WORKER, #if defined(STARPU_USE_HIP) .ops = &_starpu_driver_hip_node_ops, #endif }; void _starpu_hip_preinit(void) { _starpu_driver_info_register(STARPU_HIP_WORKER, &driver_info); _starpu_memory_driver_info_register(STARPU_HIP_RAM, &memory_driver_info); } starpu-1.4.9+dfsg/src/drivers/hip/starpu_hipblas.c000066400000000000000000000076271507764646700222350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #ifdef STARPU_USE_HIP #ifdef STARPU_USE_HIPBLAS #include #include #ifdef STARPU_HIP_PLATFORM_AMD #include #endif #ifdef STARPU_HIP_PLATFORM_NVIDIA #include #endif static int hipblas_initialized[STARPU_NMAXWORKERS]; static hipblasHandle_t hipblas_handles[STARPU_NMAXWORKERS]; static hipblasHandle_t main_handle; static starpu_pthread_mutex_t mutex; static unsigned get_idx(void) { unsigned workerid = starpu_worker_get_id_check(); unsigned th_per_dev = _starpu_get_machine_config()->topology.hip_th_per_dev; unsigned th_per_stream = _starpu_get_machine_config()->topology.hip_th_per_stream; if (th_per_dev) return starpu_worker_get_devid(workerid); else if (th_per_stream) return workerid; else /* same thread for all devices */ return 0; } static void init_hipblas_func(void *args STARPU_ATTRIBUTE_UNUSED) { unsigned idx = get_idx(); hipblasStatus_t status = hipblasCreate(&hipblas_handles[starpu_worker_get_id_check()]); if (status != HIPBLAS_STATUS_SUCCESS) STARPU_HIPBLAS_REPORT_ERROR(status); status=hipblasSetStream(hipblas_handles[starpu_worker_get_id_check()], starpu_hip_get_local_stream()); if (status != HIPBLAS_STATUS_SUCCESS) STARPU_HIPBLAS_REPORT_ERROR(status); STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (!(hipblas_initialized[idx]++)) { #ifdef STARPU_HIP_PLATFORM_NVIDIA cublasInit(); #elif defined(STARPU_HIP_PLATFORM_AMD) rocblas_initialize(); #endif } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } static void shutdown_hipblas_func(void *args STARPU_ATTRIBUTE_UNUSED) { unsigned idx = get_idx(); STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (!--hipblas_initialized[idx]) { #ifdef STARPU_HIP_PLATFORM_NVIDIA cublasShutdown(); #elif defined(STARPU_HIP_PLATFORM_AMD) // no equivalent #endif } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); hipblasDestroy(hipblas_handles[starpu_worker_get_id_check()]); } #endif /* STARPU_USE_HIPBLAS */ #endif /* STARPU_USE_HIP */ void starpu_hipblas_init(void) { #ifdef STARPU_USE_HIP #ifdef STARPU_USE_HIPBLAS starpu_execute_on_each_worker(init_hipblas_func, NULL, STARPU_HIP); if (hipblasCreate(&main_handle) != HIPBLAS_STATUS_SUCCESS) main_handle = NULL; #endif #endif } void starpu_hipblas_shutdown(void) { #ifdef STARPU_USE_HIP #ifdef STARPU_USE_HIPBLAS starpu_execute_on_each_worker(shutdown_hipblas_func, NULL, STARPU_HIP); if (main_handle) hipblasDestroy(main_handle); #endif #endif } void starpu_hipblas_set_stream(void) { #ifdef STARPU_USE_HIP #ifdef STARPU_USE_HIPBLAS unsigned workerid = starpu_worker_get_id_check(); int devnum = starpu_worker_get_devnum(workerid); if (!_starpu_get_machine_config()->topology.hip_th_per_dev || (!_starpu_get_machine_config()->topology.hip_th_per_stream && _starpu_get_machine_config()->topology.nworker[STARPU_HIP_WORKER][devnum] > 1)) hipblasSetStream(hipblas_handles[starpu_worker_get_id_check()], starpu_hip_get_local_stream()); #endif #endif } #ifdef STARPU_USE_HIP #ifdef STARPU_USE_HIPBLAS hipblasHandle_t starpu_hipblas_get_local_handle(void) { int workerid = starpu_worker_get_id(); if (workerid >= 0) return hipblas_handles[workerid]; else return main_handle; } #endif #endif starpu-1.4.9+dfsg/src/drivers/max/000077500000000000000000000000001507764646700170425ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/max/driver_max_fpga.c000066400000000000000000000556561507764646700223640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include "driver_max_fpga.h" #include #include #include #include /* the number of FPGA devices */ static unsigned nmax_fpga; static size_t max_fpga_mem[STARPU_MAXMAXFPGADEVS]; static max_engine_t *engines[STARPU_MAXMAXFPGADEVS]; static fpga_mem current_address[STARPU_MAXMAXFPGADEVS]; static unsigned max_fpga_bindid_init[STARPU_MAXMAXFPGADEVS]; static unsigned max_fpga_bindid[STARPU_MAXMAXFPGADEVS]; static unsigned max_fpga_memory_init[STARPU_MAXMAXFPGADEVS]; static unsigned max_fpga_memory_nodes[STARPU_MAXMAXFPGADEVS]; static void _starpu_max_fpga_limit_max_fpga_mem(unsigned); static size_t _starpu_max_fpga_get_max_fpga_mem_size(unsigned devid); static size_t _starpu_max_fpga_get_max_fpga_mem_size(unsigned devid) { return max_fpga_mem[devid]; } max_engine_t *starpu_max_fpga_get_local_engine(void) { int worker = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(worker); STARPU_ASSERT_MSG(engines[devid], "engine for fpga %d on worker %d is NULL!?", devid, worker); return engines[devid]; } /* This is called to initialize FPGA and discover devices */ void _starpu_init_max_fpga() { memset(&max_fpga_bindid_init, 0, sizeof(max_fpga_bindid_init)); memset(&max_fpga_memory_init, 0, sizeof(max_fpga_memory_init)); } static void _starpu_initialize_workers_max_fpga_deviceid(struct _starpu_machine_config *config) { struct _starpu_machine_topology *topology = &config->topology; struct starpu_conf *uconf = &config->conf; _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_max_fpga_deviceid == 0 ? NULL : (int *)uconf->workers_max_fpga_deviceid, &(config->current_devid[STARPU_MAX_FPGA_WORKER]), (int *)topology->workers_max_fpga_deviceid, "STARPU_WORKERS_MAX_FPGAID", topology->nhwdevices[STARPU_MAX_FPGA_WORKER], STARPU_MAX_FPGA_WORKER); _starpu_devices_drop_duplicate(topology->workers_max_fpga_deviceid); } static unsigned _starpu_max_fpga_get_device_count(void) { return nmax_fpga; } /* This is called to really discover the hardware */ void _starpu_max_fpga_discover_devices (struct _starpu_machine_config *config) { //TODO: This is statically assigned, in the next round of integration // I will have to read from the struct fpga in fpga struct starpu_max_load *load = _starpu_config.conf.max_fpga_load; const char *sim_socket = max_config_get_string(MAX_CONFIG_USE_SIMULATION); int n; if (!load) { /* Nothing specified, single-FPGA execution with basic static * interface, file will be auto-loaded by SLiC. */ n = 1; } else { struct starpu_max_load *cur, *star = NULL; size_t nstar = 0; /* First check if we have a star, we will want to subtract non-star loads from it */ for (cur = load; cur->engine_id_pattern; cur++) if (!strcmp(cur->engine_id_pattern, "*") || strstr(cur->engine_id_pattern, ":*")) { STARPU_ASSERT_MSG(!cur[1].file, "in starpu_max_load array, * pattern must be last"); star = cur; if (sim_socket) /* not specified, assume 1 */ nstar = 1; else nstar = max_count_engines_free(cur->file, star->engine_id_pattern); break; } n = 0; /* Now check the non-star loads */ for (cur = load; cur != star && cur->engine_id_pattern; cur++) { size_t size; size = max_count_engines_free(load->file, load->engine_id_pattern); STARPU_ASSERT_MSG(size > 0, "cannot load starpu_max_load element %u on %s", (unsigned) (cur - load), load->engine_id_pattern); /* One FPGA more to be used */ n++; if (star) { size = max_count_engines_free(load->file, star->engine_id_pattern); if (size > 1) /* One of the star devices will be used to load this file */ nstar--; } } n += nstar; } //LMemInterface addLMemInterface() //// pour récupérer l'accès à la LMem if (n > STARPU_MAXMAXFPGADEVS) { _STARPU_DISP("Warning: %d Maxeler FPGA devices available. Only %d enabled. Use configure option --enable-maxmaxfpgadev=xxx to update the maximum value of supported Maxeler FPGA devices.\n", n, STARPU_MAXMAXFPGADEVS); n = STARPU_MAXMAXFPGADEVS; } config->topology.nhwdevices[STARPU_MAX_FPGA_WORKER] = nmax_fpga = n; } /* Determine which devices we will use */ void _starpu_init_max_fpga_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *) { int nmax_fpga = config->conf.nmax_fpga; if (nmax_fpga != 0) { /* The user did not disable FPGA. We need to initialize * FPGA early to count the number of devices */ _starpu_init_max_fpga(); int nb_devices = _starpu_max_fpga_get_device_count(); _starpu_topology_check_ndevices(&nmax_fpga, nb_devices, 0, STARPU_MAXMAXFPGADEVS, 0, "nmax_fpga", "Maxeler FPGA", "maxmaxfpgadev"); } /* Now we know how many MAX FPGA devices will be used */ topology->ndevices[STARPU_MAX_FPGA_WORKER] = nmax_fpga; _starpu_initialize_workers_max_fpga_deviceid(config); unsigned max_fpga; for (max_fpga = 0; (int) max_fpga < nmax_fpga; max_fpga++) { int devid = _starpu_get_next_devid(topology, config, STARPU_MAX_FPGA_WORKER); if (devid == -1) { // There is no more devices left topology->ndevices[STARPU_MAX_FPGA_WORKER] = max_fpga; break; } _starpu_topology_configure_workers(topology, config, STARPU_MAX_FPGA_WORKER, max_fpga, devid, 0, 0, 1, 1, NULL, NULL); } } /* Bind the driver on a CPU core */ void _starpu_max_fpga_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned *preferred_binding = NULL; unsigned npreferred = 0; if (max_fpga_bindid_init[devid]) { workerarg->bindid = max_fpga_bindid[devid]; } else { max_fpga_bindid_init[devid] = 1; workerarg->bindid = max_fpga_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); } } /* Set up memory and buses */ void _starpu_max_fpga_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned memory_node = -1; /* Perhaps the worker has some "favourite" bindings */ unsigned devid = workerarg->devid; unsigned numa; if (max_fpga_memory_init[devid]) { memory_node = max_fpga_memory_nodes[devid]; } else { max_fpga_memory_init[devid] = 1; memory_node = max_fpga_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MAX_FPGA_RAM, devid); _starpu_register_bus(STARPU_MAIN_RAM, memory_node); _starpu_register_bus(memory_node, STARPU_MAIN_RAM); } _starpu_memory_node_add_nworkers(memory_node); //This worker can manage transfers on NUMA nodes for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) _starpu_worker_drives_memory_node(workerarg, numa); _starpu_worker_drives_memory_node(workerarg, memory_node); workerarg->memory_node = memory_node; } static void _starpu_max_fpga_limit_max_fpga_mem(unsigned devid) { starpu_ssize_t limit=-1; //TODO limit = starpu_getenv_number("STARPU_LIMIT_MAX_FPGA_MEM"); if(limit != -1) max_fpga_mem[devid] = limit*1024*1024; } static void init_device_context(unsigned devid) { struct starpu_max_load *load = _starpu_config.conf.max_fpga_load; /* 0 would be seen as NULL, i.e. allocation failed... */ // FIXME: Maxeler FPGAs want 192-byte alignment // TODO: use int max_get_burst_size (max_file_t *maxfile, const char *name) current_address[devid] = (fpga_mem) (8192*192); max_fpga_mem[devid] = 128ULL*1024*1024*1024; _starpu_max_fpga_limit_max_fpga_mem(devid); if (!load) { /* Nothing specified, single-FPGA execution with basic static * interface, file will be auto-loaded by SLiC. */ return; } else { unsigned n; /* Which load we shall use */ for (n = 0; load->file; n++, load++) { if (!strcmp(load->engine_id_pattern, "*") || strstr(load->engine_id_pattern, ":*")) break; if (n == devid) break; } STARPU_ASSERT(load->file); if (!strcmp(load->engine_id_pattern, "*") || strstr(load->engine_id_pattern, ":*")) { char s[strlen(load->engine_id_pattern) + 32]; if (!strcmp(load->engine_id_pattern, "*")) snprintf(s, sizeof(s), "*:%u", (unsigned) devid); else { char *colon = strstr(load->engine_id_pattern, ":*"); snprintf(s, sizeof(s), "%.*s:%u", (int) (colon - load->engine_id_pattern), load->engine_id_pattern, (unsigned) devid); } /* FIXME: this assumes that the loads are in-order. * Ideally we'd detect which ones had an explicit load */ engines[devid] = max_load(load->file, s); STARPU_ASSERT_MSG(engines[devid], "engine %u (part of *) could not be loaded\n", n); } else { engines[n] = max_load(load->file, load->engine_id_pattern); STARPU_ASSERT_MSG(engines[n], "engine %u could not be loaded\n", n); } } } static int _starpu_max_fpga_driver_init(struct _starpu_worker *worker) { int devid = worker->devid; _starpu_driver_start(worker, STARPU_MAX_FPGA_WORKER, 1); /* FIXME: when we have NUMA support, properly turn node number into NUMA node number */ // TODO: drop test when we allocated a memory node for fpga if (worker->memory_node != STARPU_MAIN_RAM) _starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_max_fpga_get_max_fpga_mem_size(worker->devid)); // TODO: multiple fpga in same thread init_device_context(devid); snprintf(worker->name, sizeof(worker->name), "FPGA %d", devid); snprintf(worker->short_name, sizeof(worker->short_name), "FPGA %d", devid); starpu_pthread_setname(worker->short_name); _STARPU_TRACE_WORKER_INIT_END(worker->workerid); /* tell the main thread that we are ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); worker->status = STATUS_UNKNOWN; worker->worker_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); return 0; } static int _starpu_max_fpga_driver_deinit(struct _starpu_worker *fpga_worker) { _STARPU_TRACE_WORKER_DEINIT_START; unsigned memnode = fpga_worker->memory_node; _starpu_datawizard_handle_all_pending_node_data_requests(memnode); /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); fpga_worker->worker_is_initialized = 0; _STARPU_TRACE_WORKER_DEINIT_END(STARPU_MAX_FPGA_WORKER); return 0; } static uintptr_t _starpu_max_fpga_allocate_memory(unsigned dst_node, size_t size, int flags) { (void) flags; unsigned devid = starpu_memory_node_get_devid(dst_node); fpga_mem addr, next_addr; addr = current_address[devid]; next_addr = current_address[devid] + size; if (next_addr >= (fpga_mem) max_fpga_mem[devid]) { printf("Memory overflow on %u\n", devid); return 0; } current_address[devid] = next_addr; printf("fpga mem returned from allocation @: %p - %p\n",addr, addr + size); return (uintptr_t) addr; } static int _starpu_max_fpga_copy_ram_to_max_fpga(void *src, void *dst, size_t size) { printf("ram to fpga, fpga @= %p\n",dst); memcpy(dst,src,size); return 0; // LMemLoopback_writeLMem(dst, size, src); } /** Transfer SIZE bytes from the address pointed by SRC in the SRC_NODE memory node to the address pointed by DST in the DST_NODE memory node */ void copy_ram_to_max_fpga(void *src, void *dst, size_t size) { (void) src; (void) dst; (void) size; printf("ram to fpga, fpga @= %p\n",dst); // LMemLoopback_writeLMem(size, dst, src); } void copy_max_fpga_to_ram(void *src, void *dst, size_t size) { (void) src; (void) dst; (void) size; printf("ram to fpga, fpga @= %p\n",src); //LMemLoopback_readLMem(size, src, dst); } /** Transfer SIZE bytes from the address pointed by SRC in the SRC_NODE memory node to the address pointed by DST in the DST_NODE memory node */ static int _starpu_max_fpga_copy_max_fpga_to_ram(void *src, void *dst, size_t size) { printf("fpga to ram, fpga @= %p\n",src); memcpy(dst,src,size); return 0; //LMemLoopback_readLMem(src, size, dst); } /** Transfer SIZE bytes from the address pointed by SRC in the SRC_NODE memory node to the address pointed by DST in the DST_NODE memory node */ int _starpu_max_fpga_copy_fpga_to_fpga(void *src, void *dst, size_t size) { printf("fpga to ram, fpga @= %p\n",src); memcpy(dst,src,size); return 0; //LMemLoopback_XXXLMem(src, size, dst); } /* Asynchronous transfers */ static int _starpu_max_fpga_copy_ram_to_max_fpga_async(void *src, void *dst, size_t size) { printf("ram to fpga, fpga @= %p\n",dst); memcpy(dst,src,size); return 0; // Trouver dans la doc une version asynchrone de LMemLoopback_writeLMem(); } static int _starpu_max_fpga_copy_max_fpga_to_ram_async(void *src, void *dst, size_t size) { printf("fpga to ram, fpga @= %p\n",src); memcpy(dst,src,size); return 0; } static int _starpu_run_fpga(struct _starpu_worker *workerarg) { /* Let's go ! */ _starpu_max_fpga_worker(workerarg); return 0; } int _starpu_max_fpga_copy_data_from_cpu_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel) { return _starpu_max_fpga_copy_ram_to_max_fpga((char*) src + src_offset, (char*) dst + dst_offset, ssize); } int _starpu_max_fpga_copy_data_from_fpga_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel) { return _starpu_max_fpga_copy_max_fpga_to_ram((char*) src + src_offset, (char*) dst + dst_offset, ssize); } int _starpu_max_fpga_copy_data_from_fpga_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel) { return _starpu_max_fpga_copy_fpga_to_fpga((char*) src + src_offset, (char*) dst + dst_offset, ssize); } int _starpu_max_fpga_copy_interface_from_fpga_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_MAX_FPGA_RAM && dst_kind == STARPU_CPU_RAM); int ret = 1; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_max_fpga_copy_disabled() || !(copy_methods->max_fpga_to_ram_async || copy_methods->any_to_any)) { /* this is not associated to a request so it's synchronous */ STARPU_ASSERT(copy_methods->max_fpga_to_ram || copy_methods->any_to_any); if (copy_methods->max_fpga_to_ram) copy_methods->max_fpga_to_ram(src_interface, src_node, dst_interface, dst_node); else copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { //req->async_channel.type = STARPU_MAX_FPGA_RAM; if (copy_methods->max_fpga_to_ram_async) ret = copy_methods->max_fpga_to_ram_async(src_interface, src_node, dst_interface, dst_node); else { STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); } //_starpu_max_fpga_init_event(&(req->async_channel.event.fpga_event), src_node); } return ret; } int _starpu_max_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_MAX_FPGA_RAM); const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_max_fpga_copy_disabled() || !(copy_methods->ram_to_max_fpga_async || copy_methods->any_to_any)) { /* this is not associated to a request so it's synchronous */ STARPU_ASSERT(copy_methods->ram_to_max_fpga || copy_methods->any_to_any); if (copy_methods->ram_to_max_fpga) copy_methods->ram_to_max_fpga(src_interface, src_node, dst_interface, dst_node); else copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { //req->async_channel.type = STARPU_MAX_FPGA_RAM; if (copy_methods->ram_to_max_fpga_async) copy_methods->ram_to_max_fpga_async(src_interface, src_node, dst_interface, dst_node); else { STARPU_ASSERT(copy_methods->any_to_any); copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); } //_starpu_max_fpga_init_event(&(req->async_channel.event.fpga_event), dst_node); } return 0; } static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *fpga_args, int rank, struct starpu_perfmodel_arch* perf_arch) { int ret; int profiling = starpu_profiling_status_get(); struct starpu_task *task = worker_task; struct starpu_codelet *cl = task->cl; STARPU_ASSERT(cl); /* TODO: use asynchronous */ ret = _starpu_fetch_task_input(task, j, 0); if (ret != 0) { /* there was not enough memory so the codelet cannot be executed right now ... */ /* push the codelet back and try another one ... */ return -EAGAIN; } /* Give profiling variable */ _starpu_driver_start_job(fpga_args, j, perf_arch, rank, profiling); /* In case this is a Fork-join parallel task, the worker does not * execute the kernel at all. */ if ((rank == 0) || (cl->type != STARPU_FORKJOIN)) { _starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl); STARPU_ASSERT_MSG(func, "when STARPU_MAX_FPGA is defined in 'where', fpga_func or max_fpga_funcs has to be defined"); if (_starpu_get_disable_kernels() <= 0) { _STARPU_TRACE_START_EXECUTING(j); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); _STARPU_TRACE_END_EXECUTING(j); } } _starpu_driver_end_job(fpga_args, j, perf_arch, rank, profiling); _starpu_driver_update_job_feedback(j, fpga_args, perf_arch, profiling); _starpu_push_task_output(j); return 0; } int _starpu_max_fpga_driver_run_once(struct _starpu_worker *fpga_worker) { unsigned memnode = fpga_worker->memory_node; int workerid = fpga_worker->workerid; _STARPU_TRACE_START_PROGRESS(memnode); _starpu_datawizard_progress(1); if (memnode != STARPU_MAIN_RAM) { _starpu_datawizard_progress(1); } _STARPU_TRACE_END_PROGRESS(memnode); struct _starpu_job *j; struct starpu_task *task; int res; task = _starpu_get_worker_task(fpga_worker, workerid, memnode); if (!task) return 0; j = _starpu_get_job_associated_to_task(task); /* can a cpu perform that task ? */ if (!_STARPU_MAY_PERFORM(j, MAX_FPGA)) { /* put it at the end of the queue ... XXX */ _starpu_push_task_to_workers(task); return 0; } int rank = 0; int is_parallel_task = (j->task_size > 1); struct starpu_perfmodel_arch* perf_arch; if (is_parallel_task) { STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); rank = j->active_task_alias_count++; STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex); if(j->combined_workerid != -1) { struct _starpu_combined_worker *combined_worker; combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid); fpga_worker->combined_workerid = j->combined_workerid; fpga_worker->worker_size = combined_worker->worker_size; fpga_worker->current_rank = rank; perf_arch = &combined_worker->perf_arch; } else { struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(fpga_worker, j); STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", fpga_worker->workerid); perf_arch = &sched_ctx->perf_arch; } } else { fpga_worker->combined_workerid = fpga_worker->workerid; fpga_worker->worker_size = 1; fpga_worker->current_rank = 0; perf_arch = &fpga_worker->perf_arch; } _starpu_set_current_task(j->task); fpga_worker->current_task = j->task; j->workerid = fpga_worker->workerid; res = execute_job_on_fpga(j, task, fpga_worker, rank, perf_arch); _starpu_set_current_task(NULL); fpga_worker->current_task = NULL; if (res) { switch (res) { case -EAGAIN: _starpu_push_task_to_workers(task); return 0; default: STARPU_ABORT(); } } /* In the case of combined workers, we need to inform the * scheduler each worker's execution is over. * Then we free the workers' task alias */ if (is_parallel_task) { _starpu_sched_post_exec_hook(task); free(task); } if (rank == 0) _starpu_handle_job_termination(j); return 0; } void *_starpu_max_fpga_worker(void *_arg) { struct _starpu_worker* worker = _arg; unsigned memnode = worker->memory_node; _starpu_max_fpga_driver_init(worker); _STARPU_TRACE_START_PROGRESS(memnode); while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_max_fpga_driver_run_once(worker); } _STARPU_TRACE_END_PROGRESS(memnode); _starpu_max_fpga_driver_deinit(worker); return NULL; } struct _starpu_driver_ops _starpu_driver_max_fpga_ops = { .init = _starpu_max_fpga_driver_init, .run = _starpu_run_fpga, .run_once = _starpu_max_fpga_driver_run_once, .deinit = _starpu_max_fpga_driver_deinit }; // TODO: transfers struct _starpu_node_ops _starpu_driver_max_fpga_node_ops = { .name = "fpga driver", .malloc_on_node = _starpu_max_fpga_allocate_memory, .free_on_node = NULL, .is_direct_access_supported = NULL, //.copy_data_to[STARPU_CPU_RAM] = _starpu_max_fpga_copy_data_from_fpga_to_cpu, //.copy_data_to[STARPU_MAX_FPGA_RAM] = _starpu_max_fpga_copy_data_from_fpga_to_fpga, //.copy_data_from[STARPU_CPU_RAM] = _starpu_max_fpga_copy_data_from_cpu_to_fpga, //.copy_data_from[STARPU_MAX_FPGA_RAM] = _starpu_max_fpga_copy_data_from_fpga_to_fpga, //.copy_interface_to[STARPU_CPU_RAM] = _starpu_max_fpga_copy_interface_from_fpga_to_cpu, //.copy_interface_to[STARPU_MAX_FPGA_RAM] = _starpu_max_fpga_copy_interface_from_fpga_to_fpga, //.copy_interface_from[STARPU_CPU_RAM] = _starpu_max_fpga_copy_interface_from_cpu_to_fpga, //.copy_interface_from[STARPU_MAX_FPGA_RAM] = _starpu_max_fpga_copy_interface_from_fpga_to_fpga, .wait_request_completion = NULL, .test_request_completion = NULL, }; starpu-1.4.9+dfsg/src/drivers/max/driver_max_fpga.h000066400000000000000000000036241507764646700223550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_FPGA_H__ #define __DRIVER_FPGA_H__ //#ifdef NOT_DEFINED #ifdef STARPU_USE_MAX_FPGA #include #endif //#endif #include #include #include #include #include #include #include void _starpu_max_fpga_preinit(void); #ifdef STARPU_USE_MAX_FPGA typedef unsigned * fpga_mem; extern struct _starpu_driver_ops _starpu_driver_max_fpga_ops; extern struct _starpu_node_ops _starpu_driver_max_fpga_node_ops; void _starpu_init_max_fpga(void); void _starpu_init_max_fpga_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *); void _starpu_max_fpga_discover_devices (struct _starpu_machine_config *config); void _starpu_max_fpga_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); void _starpu_max_fpga_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); void *_starpu_max_fpga_worker(void *); #else #define _starpu_max_fpga_discover_devices(config) ((void) (config)) #endif #endif // __DRIVER_FPGA_H__ starpu-1.4.9+dfsg/src/drivers/max/driver_max_fpga_init.c000066400000000000000000000031101507764646700233610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static struct _starpu_driver_info driver_info = { .name_upper = "FPGA", .name_var = "FPGA", .name_lower = "fpga", .memory_kind = STARPU_MAX_FPGA_RAM, .alpha = 0.5, .wait_for_worker_initialization = 1, #ifdef STARPU_USE_MAX_FPGA .driver_ops = &_starpu_driver_max_fpga_ops, .run_worker = _starpu_max_fpga_worker, .init_worker_binding = _starpu_max_fpga_init_worker_binding, .init_worker_memory = _starpu_max_fpga_init_worker_memory, #endif }; static struct _starpu_memory_driver_info memory_driver_info = { .name_upper = "FPGA", .worker_archtype = STARPU_MAX_FPGA_WORKER, #ifdef STARPU_USE_MAX_FPGA .ops = &_starpu_driver_max_fpga_node_ops, #endif }; void _starpu_max_fpga_preinit(void) { _starpu_driver_info_register(STARPU_MAX_FPGA_WORKER, &driver_info); _starpu_memory_driver_info_register(STARPU_MAX_FPGA_RAM, &memory_driver_info); } starpu-1.4.9+dfsg/src/drivers/mp_common/000077500000000000000000000000001507764646700202415ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/mp_common/mp_common.c000066400000000000000000000427241507764646700224020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include const char *_starpu_mp_common_command_to_string(const enum _starpu_mp_command command) { switch(command) { /* Commands from master to slave */ case STARPU_MP_COMMAND_EXIT: return "EXIT"; case STARPU_MP_COMMAND_EXECUTE: return "EXECUTE"; case STARPU_MP_COMMAND_EXECUTE_DETACHED: return "EXECUTE_DETACHED"; case STARPU_MP_COMMAND_SINK_NBCORES: return "SINK_NBCORES"; case STARPU_MP_COMMAND_LOOKUP: return "LOOKUP"; case STARPU_MP_COMMAND_ALLOCATE: return "ALLOCATE"; case STARPU_MP_COMMAND_FREE: return "FREE"; case STARPU_MP_COMMAND_MAP: return "MAP"; case STARPU_MP_COMMAND_UNMAP: return "UNMAP"; case STARPU_MP_COMMAND_SYNC_WORKERS: return "SYNC_WORKERS"; /* Note: synchronous send */ case STARPU_MP_COMMAND_RECV_FROM_HOST: return "RECV_FROM_HOST"; case STARPU_MP_COMMAND_SEND_TO_HOST: return "SEND_TO_HOST"; case STARPU_MP_COMMAND_RECV_FROM_SINK: return "RECV_FROM_SINK"; case STARPU_MP_COMMAND_SEND_TO_SINK: return "SEND_TO_SINK"; /* Note: Asynchronous send */ case STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC: return "RECV_FROM_HOST_ASYNC"; case STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC: return "SEND_TO_HOST_ASYNC"; case STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC: return "RECV_FROM_SINK_ASYNC"; case STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC: return "SEND_TO_SINK_ASYNC"; /* Synchronous answers from slave to master */ case STARPU_MP_COMMAND_ERROR_EXECUTE: return "ERROR_EXECUTE"; case STARPU_MP_COMMAND_ERROR_EXECUTE_DETACHED: return "ERROR_EXECUTE_DETACHED"; case STARPU_MP_COMMAND_ANSWER_LOOKUP: return "ANSWER_LOOKUP"; case STARPU_MP_COMMAND_ERROR_LOOKUP: return "ERROR_LOOKUP"; case STARPU_MP_COMMAND_ANSWER_ALLOCATE: return "ANSWER_ALLOCATE"; case STARPU_MP_COMMAND_ERROR_ALLOCATE: return "ERROR_ALLOCATE"; case STARPU_MP_COMMAND_ANSWER_MAP: return "ANSWER_MAP"; case STARPU_MP_COMMAND_ERROR_MAP: return "ERROR_MAP"; case STARPU_MP_COMMAND_ANSWER_TRANSFER_COMPLETE: return "ANSWER_TRANSFER_COMPLETE"; case STARPU_MP_COMMAND_ANSWER_SINK_NBCORES: return "ANSWER_SINK_NBCORES"; case STARPU_MP_COMMAND_ANSWER_EXECUTION_SUBMITTED: return "ANSWER_EXECUTION_SUBMITTED"; case STARPU_MP_COMMAND_ANSWER_EXECUTION_DETACHED_SUBMITTED: return "ANSWER_EXECUTION_DETACHED_SUBMITTED"; /* Asynchronous notifications from slave to master */ case STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED: return "NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED"; case STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED: return "NOTIF_SEND_TO_HOST_ASYNC_COMPLETED"; case STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED: return "NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED"; case STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED: return "NOTIF_SEND_TO_SINK_ASYNC_COMPLETED"; case STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED: return "NOTIF_EXECUTION_COMPLETED"; case STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED: return "NOTIF_EXECUTION_DETACHED_COMPLETED"; case STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION: return "NOTIF_PRE_EXECUTION"; default: return ""; } } const char *_starpu_mp_common_node_kind_to_string(const int kind) { switch(kind) { case STARPU_NODE_MPI_SINK: return "MPI_SINK"; case STARPU_NODE_MPI_SOURCE: return "MPI_SOURCE"; case STARPU_NODE_TCPIP_SINK: return "TCPIP_SINK"; case STARPU_NODE_TCPIP_SOURCE: return "TCPIP_SOURCE"; default: return ""; } } /* Allocate and initialize the sink structure, when the function returns * all the pointer of functions are linked to the right ones. */ struct _starpu_mp_node * STARPU_ATTRIBUTE_MALLOC _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind, int peer_id) { struct _starpu_mp_node *node; _STARPU_MALLOC(node, sizeof(struct _starpu_mp_node)); node->kind = node_kind; node->peer_id = peer_id; switch(node->kind) { #ifdef STARPU_USE_MPI_MASTER_SLAVE case STARPU_NODE_MPI_SOURCE: { /* node->nb_mp_sinks = node->devid = */ node->peer_id = (_starpu_mpi_common_get_src_node() <= peer_id ? peer_id+1 : peer_id); node->mp_connection.mpi_remote_nodeid = node->peer_id; node->init = _starpu_mpi_source_init; node->launch_workers = NULL; node->deinit = _starpu_mpi_source_deinit; /* node->report_error = */ node->mp_recv_is_ready = _starpu_mpi_common_recv_is_ready; node->mp_send = _starpu_mpi_common_mp_send; node->mp_recv = _starpu_mpi_common_mp_recv; node->nt_recv_is_ready = _starpu_mpi_common_notif_recv_is_ready; node->nt_send_is_ready = _starpu_mpi_common_notif_send_is_ready; node->mp_wait = NULL; node->mp_signal = NULL; node->nt_send = _starpu_mpi_common_nt_send; node->nt_recv = _starpu_mpi_common_nt_recv; node->dt_send = _starpu_mpi_common_send; node->dt_recv = _starpu_mpi_common_recv; node->dt_send_to_device = _starpu_mpi_common_send_to_device; node->dt_recv_from_device = _starpu_mpi_common_recv_from_device; node->get_kernel_from_job = _starpu_src_common_get_cpu_func_from_job; node->lookup = NULL; node->bind_thread = NULL; node->execute = NULL; node->allocate = NULL; node->free = NULL; node->map = NULL; node->unmap = NULL; } break; case STARPU_NODE_MPI_SINK: { /* node->nb_mp_sinks = node->devid = */ node->mp_connection.mpi_remote_nodeid = _starpu_mpi_common_get_src_node(); node->init = _starpu_mpi_sink_init; node->launch_workers = _starpu_sink_launch_workers; node->deinit = _starpu_sink_deinit; /* node->report_error = */ node->mp_recv_is_ready = _starpu_mpi_common_recv_is_ready; node->mp_send = _starpu_mpi_common_mp_send; node->mp_recv = _starpu_mpi_common_mp_recv; node->nt_recv_is_ready = _starpu_mpi_common_notif_recv_is_ready; node->nt_send_is_ready = _starpu_mpi_common_notif_send_is_ready; node->mp_wait = NULL; node->mp_signal = NULL; node->nt_send = _starpu_mpi_common_nt_send; node->nt_recv = _starpu_mpi_common_nt_recv; node->dt_send = _starpu_mpi_common_send; node->dt_recv = _starpu_mpi_common_recv; node->dt_send_to_device = _starpu_mpi_common_send_to_device; node->dt_recv_from_device = _starpu_mpi_common_recv_from_device; node->dt_test = _starpu_mpi_common_test_event; node->get_kernel_from_job = NULL; node->lookup = _starpu_sink_common_cpu_lookup; node->bind_thread = _starpu_mpi_sink_bind_thread; node->execute = _starpu_sink_common_execute; node->allocate = _starpu_sink_common_allocate; node->free = _starpu_sink_common_free; node->map = _starpu_sink_common_map; node->unmap = _starpu_sink_common_unmap; } break; #endif /* STARPU_USE_MPI_MASTER_SLAVE */ #ifdef STARPU_USE_TCPIP_MASTER_SLAVE case STARPU_NODE_TCPIP_SOURCE: { /* node->nb_mp_sinks = node->devid = */ node->peer_id = (0 <= peer_id ? peer_id+1 : peer_id); node->mp_connection.tcpip_mp_connection = &tcpip_sock[node->peer_id]; node->init = _starpu_tcpip_source_init; node->launch_workers = NULL; node->deinit = _starpu_tcpip_source_deinit; /* node->report_error = */ node->mp_recv_is_ready = _starpu_tcpip_common_recv_is_ready; node->mp_send = _starpu_tcpip_common_mp_send; node->mp_recv = _starpu_tcpip_common_mp_recv; node->nt_recv_is_ready = _starpu_tcpip_common_notif_recv_is_ready; node->nt_send_is_ready = _starpu_tcpip_common_notif_send_is_ready; node->mp_wait = _starpu_tcpip_common_wait; node->mp_signal = _starpu_tcpip_common_signal; node->nt_send = _starpu_tcpip_common_nt_send; node->nt_recv = _starpu_tcpip_common_nt_recv; node->dt_send = _starpu_tcpip_common_send; node->dt_recv = _starpu_tcpip_common_recv; node->dt_send_to_device = _starpu_tcpip_common_send_to_device; node->dt_recv_from_device = _starpu_tcpip_common_recv_from_device; node->get_kernel_from_job = _starpu_src_common_get_cpu_func_from_job; node->lookup = NULL; node->bind_thread = NULL; node->execute = NULL; node->allocate = NULL; node->free = NULL; node->map = NULL; node->unmap = NULL; } break; case STARPU_NODE_TCPIP_SINK: { /* node->nb_mp_sinks = node->devid = */ node->mp_connection.tcpip_mp_connection = &tcpip_sock[0]; node->init = _starpu_tcpip_sink_init; node->launch_workers = _starpu_sink_launch_workers; node->deinit = _starpu_sink_deinit; /* node->report_error = */ node->mp_recv_is_ready = _starpu_tcpip_common_recv_is_ready; node->mp_send = _starpu_tcpip_common_mp_send; node->mp_recv = _starpu_tcpip_common_mp_recv; node->nt_recv_is_ready = _starpu_tcpip_common_notif_recv_is_ready; node->nt_send_is_ready = _starpu_tcpip_common_notif_send_is_ready; node->mp_wait = _starpu_tcpip_common_wait; node->mp_signal = _starpu_tcpip_common_signal; node->nt_send = _starpu_tcpip_common_nt_send; node->nt_recv = _starpu_tcpip_common_nt_recv; node->dt_send = _starpu_tcpip_common_send; node->dt_recv = _starpu_tcpip_common_recv; node->dt_send_to_device = _starpu_tcpip_common_send_to_device; node->dt_recv_from_device = _starpu_tcpip_common_recv_from_device; node->dt_test = _starpu_tcpip_common_test_event; node->get_kernel_from_job = NULL; node->lookup = _starpu_sink_common_cpu_lookup; node->bind_thread = _starpu_tcpip_sink_bind_thread; node->execute = _starpu_sink_common_execute; node->allocate = _starpu_sink_common_allocate; node->free = _starpu_sink_common_free; node->map = _starpu_sink_common_map; node->unmap = _starpu_sink_common_unmap; } break; #endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ default: STARPU_ASSERT(0); } /* Let's allocate the buffer, we want it to be big enough to contain * a command, an argument and the argument size */ _STARPU_MALLOC(node->buffer, BUFFER_SIZE); if (node->init) node->init(node); mp_message_list_init(&node->message_queue); STARPU_PTHREAD_MUTEX_INIT(&node->message_queue_mutex,NULL); STARPU_PTHREAD_MUTEX_INIT(&node->connection_mutex, NULL); _starpu_mp_event_list_init(&node->event_list); _starpu_mp_event_list_init(&node->event_queue); /* If the node is a sink then we must initialize some field */ if(node->kind == STARPU_NODE_MPI_SINK || node->kind == STARPU_NODE_TCPIP_SINK) { int i; STARPU_HG_DISABLE_CHECKING(node->is_running); node->is_running = 1; _STARPU_MALLOC(node->run_table, sizeof(struct mp_task *)*node->nb_cores); _STARPU_MALLOC(node->run_table_detached, sizeof(struct mp_task *)*node->nb_cores); _STARPU_MALLOC(node->sem_run_table, sizeof(sem_t)*node->nb_cores); for(i=0; inb_cores; i++) { node->run_table[i] = NULL; node->run_table_detached[i] = NULL; sem_init(&node->sem_run_table[i],0,0); } mp_barrier_list_init(&node->barrier_list); STARPU_PTHREAD_MUTEX_INIT(&node->barrier_mutex,NULL); STARPU_PTHREAD_BARRIER_INIT(&node->init_completed_barrier, NULL, node->nb_cores+1); node->launch_workers(node); } return node; } /* Deinitialize the sink structure and release the structure */ void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node) { if (node->deinit) node->deinit(node); STARPU_PTHREAD_MUTEX_DESTROY(&node->message_queue_mutex); /* If the node is a sink then we must destroy some field */ if(node->kind == STARPU_NODE_MPI_SINK || node->kind == STARPU_NODE_TCPIP_SINK) { int i; for(i=0; inb_cores; i++) { sem_destroy(&node->sem_run_table[i]); } free(node->run_table); free(node->run_table_detached); free(node->sem_run_table); STARPU_PTHREAD_MUTEX_DESTROY(&node->barrier_mutex); STARPU_PTHREAD_BARRIER_DESTROY(&node->init_completed_barrier); } free(node->buffer); free(node); } /* Send COMMAND to RECIPIENT, along with ARG if ARG_SIZE is non-zero */ static void __starpu_mp_common_send_command(const struct _starpu_mp_node *node, const enum _starpu_mp_command command, void *arg, int arg_size, int notif) { STARPU_ASSERT_MSG(arg_size <= BUFFER_SIZE, "Too much data (%d) for the static buffer (%d), increase BUFFER_SIZE perhaps?", arg_size, BUFFER_SIZE); //printf("SEND %s: %d/%s - arg_size %d by %lu \n", notif?"NOTIF":"CMD", command, _starpu_mp_common_command_to_string(command), arg_size, starpu_pthread_self()); /* MPI sizes are given through a int */ int command_size = sizeof(enum _starpu_mp_command); int arg_size_size = sizeof(int); /* Let's copy the data into the command line buffer */ memcpy(node->buffer, &command, command_size); memcpy((void*) ((uintptr_t)node->buffer + command_size), &arg_size, arg_size_size); if (!notif) node->mp_send(node, node->buffer, command_size + arg_size_size); else node->nt_send(node, node->buffer, command_size + arg_size_size); if (arg_size) { if (!notif) node->mp_send(node, arg, arg_size); else node->nt_send(node, arg, arg_size); } } /* Send COMMAND to RECIPIENT, along with ARG if ARG_SIZE is non-zero */ void _starpu_mp_common_send_command(const struct _starpu_mp_node *node, const enum _starpu_mp_command command, void *arg, int arg_size) { __starpu_mp_common_send_command(node, command, arg, arg_size, 0); } /* Send NOTIF COMMAND to RECIPIENT, along with ARG if ARG_SIZE is non-zero */ void _starpu_nt_common_send_command(const struct _starpu_mp_node *node, const enum _starpu_mp_command command, void *arg, int arg_size) { __starpu_mp_common_send_command(node, command, arg, arg_size, 1); } /* Return the command received from SENDER. In case SENDER sent an argument * beside the command, an address to a copy of this argument is returns in arg. * There is no need to free this address as it's not allocated at this time. * However, the data pointed by arg shouldn't be relied on after a new call to * STARPU_MP_COMMON_RECV_COMMAND as it might corrupt it. */ static enum _starpu_mp_command __starpu_mp_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size, int notif) { enum _starpu_mp_command command; /* MPI sizes are given through a int */ int command_size = sizeof(enum _starpu_mp_command); int arg_size_size = sizeof(int); if (!notif) node->mp_recv(node, node->buffer, command_size + arg_size_size); else node->nt_recv(node, node->buffer, command_size + arg_size_size); command = *((enum _starpu_mp_command *) node->buffer); *arg_size = *((int *) ((uintptr_t)node->buffer + command_size)); //printf("RECV %s : %d/%s - arg_size %d by %lu \n", notif?"NOTIF":"CMD", command, _starpu_mp_common_command_to_string(command), *arg_size, starpu_pthread_self()); /* If there is no argument (ie. arg_size == 0), * let's return the command right now */ if (!(*arg_size)) { *arg = NULL; return command; } STARPU_ASSERT(*arg_size <= BUFFER_SIZE); if (!notif) node->mp_recv(node, node->buffer, *arg_size); else node->nt_recv(node, node->buffer, *arg_size); *arg = node->buffer; return command; } /* Return the command received from SENDER*/ enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size) { return __starpu_mp_common_recv_command(node, arg, arg_size, 0); } /* Return the notif command received from SENDER*/ enum _starpu_mp_command _starpu_nt_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size) { return __starpu_mp_common_recv_command(node, arg, arg_size, 1); } void _starpu_sink_deinit(struct _starpu_mp_node *node) { int i; node->is_running = 0; for(i=0; inb_cores; i++) { sem_post(&node->sem_run_table[i]); STARPU_PTHREAD_JOIN(((starpu_pthread_t *)node->thread_table)[i],NULL); } free(node->thread_table); } void _starpu_sink_launch_workers(struct _starpu_mp_node *node) { //TODO int i; struct arg_sink_thread * arg; cpu_set_t cpuset; starpu_pthread_attr_t attr; starpu_pthread_t thread; for(i=0; i < node->nb_cores; i++) { int ret; ret = starpu_pthread_attr_init(&attr); STARPU_ASSERT(ret == 0); #if defined(HAVE_PTHREAD_SETAFFINITY_NP) && defined(__linux__) //init the set CPU_ZERO(&cpuset); CPU_SET(i,&cpuset); int nobind = starpu_getenv_number("STARPU_WORKERS_NOBIND"); if (nobind <= 0) { ret = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); STARPU_ASSERT(ret == 0); } #else #warning no CPU binding support #endif /*prepare the argument for the thread*/ _STARPU_MALLOC(arg, sizeof(struct arg_sink_thread)); arg->coreid = i; arg->node = node; STARPU_PTHREAD_CREATE(&thread, &attr, _starpu_sink_thread, arg); starpu_pthread_attr_destroy(&attr); ((starpu_pthread_t *)node->thread_table)[i] = thread; } } starpu-1.4.9+dfsg/src/drivers/mp_common/mp_common.h000066400000000000000000000217541507764646700224070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __MP_COMMON_H__ #define __MP_COMMON_H__ /** @file */ #include #include #include #include #include #include #include #include #pragma GCC visibility push(hidden) #ifdef STARPU_USE_MP #define BUFFER_SIZE 65536 #define STARPU_MP_SRC_NODE 0 #define STARPU_MP_SINK_NODE(a) ((a) + 1) #define STARPU_MP_COMMON_REPORT_ERROR(node, status) \ (node)->report_error(__starpu_func__, __FILE__, __LINE__, (status)) enum _starpu_mp_command { /* Commands from master to slave */ STARPU_MP_COMMAND_EXIT, STARPU_MP_COMMAND_EXECUTE, STARPU_MP_COMMAND_EXECUTE_DETACHED, STARPU_MP_COMMAND_SINK_NBCORES, STARPU_MP_COMMAND_LOOKUP, STARPU_MP_COMMAND_ALLOCATE, STARPU_MP_COMMAND_FREE, STARPU_MP_COMMAND_MAP, STARPU_MP_COMMAND_UNMAP, STARPU_MP_COMMAND_SYNC_WORKERS, /* Note: synchronous send */ STARPU_MP_COMMAND_RECV_FROM_HOST, STARPU_MP_COMMAND_SEND_TO_HOST, STARPU_MP_COMMAND_RECV_FROM_SINK, STARPU_MP_COMMAND_SEND_TO_SINK, /* Note: Asynchronous send */ STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC, STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC, STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC, STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC, /* Synchronous answers from slave to master */ STARPU_MP_COMMAND_ERROR_EXECUTE, STARPU_MP_COMMAND_ERROR_EXECUTE_DETACHED, STARPU_MP_COMMAND_ANSWER_LOOKUP, STARPU_MP_COMMAND_ERROR_LOOKUP, STARPU_MP_COMMAND_ANSWER_ALLOCATE, STARPU_MP_COMMAND_ERROR_ALLOCATE, STARPU_MP_COMMAND_ANSWER_MAP, STARPU_MP_COMMAND_ERROR_MAP, STARPU_MP_COMMAND_ANSWER_TRANSFER_COMPLETE, STARPU_MP_COMMAND_ANSWER_SINK_NBCORES, STARPU_MP_COMMAND_ANSWER_EXECUTION_SUBMITTED, STARPU_MP_COMMAND_ANSWER_EXECUTION_DETACHED_SUBMITTED, /* Asynchronous notifications from slave to master */ STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED, STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED, STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED, STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED, STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED, STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED, STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION, STARPU_MP_COMMAND_NOTIF_FIRST = STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED, STARPU_MP_COMMAND_NOTIF_LAST = STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION, }; const char *_starpu_mp_common_command_to_string(const enum _starpu_mp_command command); enum _starpu_mp_node_kind { STARPU_NODE_MPI_SINK, STARPU_NODE_MPI_SOURCE, STARPU_NODE_TCPIP_SINK, STARPU_NODE_TCPIP_SOURCE, STARPU_NODE_INVALID_KIND }; const char *_starpu_mp_common_node_kind_to_string(const int kind); union _starpu_mp_connection { #ifdef STARPU_USE_MPI_MASTER_SLAVE int mpi_remote_nodeid; #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE struct _starpu_tcpip_socket *tcpip_mp_connection; #endif }; struct _starpu_mp_transfer_command { size_t size; void *addr; void *event; }; struct _starpu_mp_transfer_command_to_device { size_t size; void *addr; void *event; int devid; char end[]; /* Keep last to compute non-padded size */ }; struct _starpu_mp_transfer_map_command { size_t offset; size_t size; char fd_name[]; }; struct _starpu_mp_transfer_unmap_command { uintptr_t addr; size_t size; }; LIST_TYPE(mp_barrier, int id; starpu_pthread_barrier_t before_work_barrier; starpu_pthread_barrier_t after_work_barrier; ); LIST_TYPE(mp_message, enum _starpu_mp_command type; char *buffer; int size; ); struct mp_task { void (*kernel)(void **, void *); enum starpu_data_interface_id *ids; void **interfaces; unsigned nb_interfaces; void *cl_arg; unsigned cl_arg_size; void *cl_ret; unsigned cl_ret_size; unsigned coreid; enum starpu_codelet_type type; int is_parallel_task; int combined_workerid; int detached; struct mp_barrier* mp_barrier; }; LIST_TYPE(_starpu_mp_event, struct _starpu_async_channel event; void * remote_event; enum _starpu_mp_command answer_cmd; ); /** Message-passing working node, whether source * or sink */ struct _starpu_mp_node { enum _starpu_mp_node_kind kind; int baseworkerid; /*the number of core on the device * Must be initialized during init function*/ int nb_cores; /*Is starpu running*/ int is_running; /** Buffer used for data transfers, allocated * during node initialization. * Size : BUFFER_SIZE */ void *buffer; /** For sink : -1. * For host : index of the sink = devid. */ int peer_id; /** Connection used for command passing between the host thread and the * sink it controls */ union _starpu_mp_connection mp_connection; /** Mutex to protect the interleaving of communications when using one thread per node, * for instance, when a thread transfers piece of data and an other wants to use * a sink_to_sink communication */ starpu_pthread_mutex_t connection_mutex; /** This list contains events * about asynchronous request */ struct _starpu_mp_event_list event_list; /*list where threads add events to send to the source node */ struct _starpu_mp_event_list event_queue; /** */ starpu_pthread_barrier_t init_completed_barrier; /** table to store pointer of the thread workers*/ void* thread_table; /*list where threads add messages to send to the source node */ struct mp_message_list message_queue; starpu_pthread_mutex_t message_queue_mutex; /*list of barrier for combined worker*/ struct mp_barrier_list barrier_list; starpu_pthread_mutex_t barrier_mutex; /*table where worker comme pick task*/ struct mp_task ** run_table; struct mp_task ** run_table_detached; sem_t * sem_run_table; /** Node general functions */ void (*init) (struct _starpu_mp_node *node); void (*launch_workers) (struct _starpu_mp_node *node); void (*deinit) (struct _starpu_mp_node *node); void (*report_error) (const char *, const char *, const int, const int); /** Message passing */ int (*mp_recv_is_ready) (const struct _starpu_mp_node *); void (*mp_send) (const struct _starpu_mp_node *, void *, int); void (*mp_recv) (const struct _starpu_mp_node *, void *, int); /** Notifications */ int (*nt_recv_is_ready) (const struct _starpu_mp_node *); int (*nt_send_is_ready) (const struct _starpu_mp_node *); void (*nt_send) (const struct _starpu_mp_node *, void *, int); void (*nt_recv) (const struct _starpu_mp_node *, void *, int); /*signal*/ void (*mp_wait) (struct _starpu_mp_node *); void (*mp_signal) (const struct _starpu_mp_node *); /** Data transfers */ void (*dt_send) (const struct _starpu_mp_node *, void *, int, void *); void (*dt_recv) (const struct _starpu_mp_node *, void *, int, void *); void (*dt_send_to_device) (const struct _starpu_mp_node *, int, void *, int, void *); void (*dt_recv_from_device) (const struct _starpu_mp_node *, int, void *, int, void *); /** Test async transfers */ unsigned int (*dt_test) (struct _starpu_async_channel *); void (*(*get_kernel_from_job) (const struct _starpu_mp_node *,struct _starpu_job *))(void); void (*(*lookup) (const struct _starpu_mp_node *, char*))(void); void (*bind_thread) (const struct _starpu_mp_node *, int,int *,int); void (*execute) (struct _starpu_mp_node *, void *, int); void (*allocate) (const struct _starpu_mp_node *, void *, int); void (*free) (const struct _starpu_mp_node *, void *, int); void (*map) (const struct _starpu_mp_node *, void *, int); void (*unmap) (const struct _starpu_mp_node *, void *, int); }; struct _starpu_mp_node * _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind, int peer_devid) STARPU_ATTRIBUTE_MALLOC; void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node); void _starpu_mp_common_send_command(const struct _starpu_mp_node *node, const enum _starpu_mp_command command, void *arg, int arg_size); void _starpu_nt_common_send_command(const struct _starpu_mp_node *node, const enum _starpu_mp_command command, void *arg, int arg_size); enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size); enum _starpu_mp_command _starpu_nt_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size); void _starpu_sink_deinit(struct _starpu_mp_node *node); void _starpu_sink_launch_workers(struct _starpu_mp_node *node); #endif /* STARPU_USE_MP */ #pragma GCC visibility pop #endif /* __MP_COMMON_H__ */ starpu-1.4.9+dfsg/src/drivers/mp_common/sink_common.c000066400000000000000000000761411507764646700227320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sink_common.h" /* Return the sink kind of the running process, based on the value of the * STARPU_SINK environment variable. * If there is no valid value retrieved, return STARPU_INVALID_KIND */ static enum _starpu_mp_node_kind _starpu_sink_common_get_kind(void) { /* Environment variable STARPU_SINK must be defined when running on sink * side : let's use it to get the kind of node we're running on */ char *node_kind = starpu_getenv("STARPU_SINK"); STARPU_ASSERT(node_kind); if (!strcmp(node_kind, "STARPU_MPI_MS")) return STARPU_NODE_MPI_SINK; else if (!strcmp(node_kind, "STARPU_TCPIP_MS")) return STARPU_NODE_TCPIP_SINK; else return STARPU_NODE_INVALID_KIND; } /* Send to host the number of cores of the sink device */ static void _starpu_sink_common_get_nb_cores(struct _starpu_mp_node *node) { // Process packet received from `_starpu_src_common_sink_cores'. _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ANSWER_SINK_NBCORES, &node->nb_cores, sizeof(int)); } /* Send to host the address of the function given in parameter */ static void _starpu_sink_common_lookup(const struct _starpu_mp_node *node, char *func_name) { void (*func)(void); func = node->lookup(node,func_name); //_STARPU_DEBUG("Looked up %s, got %p\n", func_name, func); /* If we couldn't find the function, let's send an error to the host. * The user probably made a mistake in the name */ if (func) _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ANSWER_LOOKUP, &func, sizeof(func)); else _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ERROR_LOOKUP, NULL, 0); } /* CPU version of sink lookup */ void (*_starpu_sink_common_cpu_lookup(const struct _starpu_mp_node * node STARPU_ATTRIBUTE_UNUSED, char* func_name))(void) { #ifdef RTLD_DEFAULT return dlsym(RTLD_DEFAULT, func_name); #else void *dl_handle = dlopen(NULL, RTLD_NOW); return dlsym(dl_handle, func_name); #endif } /* Allocate a memory space and send the address of this space to the host */ void _starpu_sink_common_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == sizeof(size_t)); void *addr; _STARPU_MALLOC(addr, *(size_t *)(arg)); /* If the allocation fail, let's send an error to the host. */ if (addr) _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ANSWER_ALLOCATE, &addr, sizeof(addr)); else _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ERROR_ALLOCATE, NULL, 0); } void _starpu_sink_common_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size) { STARPU_ASSERT(arg_size == sizeof(void *)); free(*(void **)(arg)); } /* Map a memory space and send the address of this space to the host */ void _starpu_sink_common_map(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT((unsigned int)arg_size >= sizeof(struct _starpu_mp_transfer_map_command)); struct _starpu_mp_transfer_map_command *map_cmd = (struct _starpu_mp_transfer_map_command *)arg; void *map_addr = _starpu_sink_map(map_cmd->fd_name, map_cmd->offset, map_cmd->size); /* If mapping fail, let's send an error to the host. */ if (map_addr) _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ANSWER_MAP, &map_addr, sizeof(map_addr)); else _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ERROR_MAP, NULL, 0); } void _starpu_sink_common_unmap(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size) { STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_unmap_command)); struct _starpu_mp_transfer_unmap_command *unmap_cmd = (struct _starpu_mp_transfer_unmap_command *)arg; _starpu_sink_unmap(unmap_cmd->addr, unmap_cmd->size); } static void _starpu_sink_common_copy_from_host_sync(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command)); struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg; mp_node->dt_recv(mp_node, cmd->addr, cmd->size, NULL); } static void _starpu_sink_common_copy_from_host_async(struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command)); struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg; /* For asynchronous transfers, we store events to test them later when they are finished */ struct _starpu_mp_event * sink_event = _starpu_mp_event_new(); /* Save the command to send */ sink_event->answer_cmd = STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED; sink_event->remote_event = cmd->event; /* Set the sender (host) ready because we don't want to wait its ack */ struct _starpu_async_channel * async_channel = &sink_event->event; async_channel->node_ops = NULL; async_channel->starpu_mp_common_finished_sender = -1; async_channel->starpu_mp_common_finished_receiver = 0; async_channel->polling_node_receiver = NULL; async_channel->polling_node_sender = NULL; mp_node->dt_recv(mp_node, cmd->addr, cmd->size, &sink_event->event); /* Push event on the list */ _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event); } static void _starpu_sink_common_copy_to_host_sync(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command)); struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg; /* Save values before sending command to prevent the overwriting */ size_t size = cmd->size; void * addr = cmd->addr; _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_SEND_TO_HOST, NULL, 0); mp_node->dt_send(mp_node, addr, size, NULL); } static void _starpu_sink_common_copy_to_host_async(struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command)); struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg; /* For asynchronous transfers, we need to say dt_send that we are in async mode * but we don't push event on list because we don't need to know if it's finished */ struct _starpu_mp_event * sink_event = _starpu_mp_event_new(); /* Save the command to send */ sink_event->answer_cmd = STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED; sink_event->remote_event = cmd->event; /* Set the receiver (host) ready because we don't want to wait its ack */ struct _starpu_async_channel * async_channel = &sink_event->event; async_channel->node_ops = NULL; async_channel->starpu_mp_common_finished_sender = 0; async_channel->starpu_mp_common_finished_receiver = -1; async_channel->polling_node_receiver = NULL; async_channel->polling_node_sender = NULL; mp_node->dt_send(mp_node, cmd->addr, cmd->size, &sink_event->event); /* Push event on the list */ _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event); } static void _starpu_sink_common_copy_from_sink_sync(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == offsetof(struct _starpu_mp_transfer_command_to_device, end)); struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg; mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size, NULL); _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ANSWER_TRANSFER_COMPLETE, NULL, 0); } static void _starpu_sink_common_copy_from_sink_async(struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == offsetof(struct _starpu_mp_transfer_command_to_device, end)); struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg; /* For asynchronous transfers, we store events to test them later when they are finished */ struct _starpu_mp_event * sink_event = _starpu_mp_event_new(); /* Save the command to send */ sink_event->answer_cmd = STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED; sink_event->remote_event = cmd->event; /* Set the sender ready because we don't want to wait its ack */ struct _starpu_async_channel * async_channel = &sink_event->event; async_channel->node_ops = NULL; async_channel->starpu_mp_common_finished_sender = -1; async_channel->starpu_mp_common_finished_receiver = 0; async_channel->polling_node_receiver = NULL; async_channel->polling_node_sender = NULL; mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size, &sink_event->event); /* Push event on the list */ _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event); } static void _starpu_sink_common_copy_to_sink_sync(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == offsetof(struct _starpu_mp_transfer_command_to_device, end)); struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg; mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size, NULL); } static void _starpu_sink_common_copy_to_sink_async(struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == offsetof(struct _starpu_mp_transfer_command_to_device, end)); struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg; /* For asynchronous transfers, we need to say dt_send that we are in async mode * but we don't push event on list because we don't need to know if it's finished */ struct _starpu_mp_event * sink_event = _starpu_mp_event_new(); /* Save the command to send */ sink_event->answer_cmd = STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED; sink_event->remote_event = cmd->event; /* Set the receiver ready because we don't want to wait its ack */ struct _starpu_async_channel * async_channel = &sink_event->event; async_channel->node_ops = NULL; async_channel->starpu_mp_common_finished_sender = 0; async_channel->starpu_mp_common_finished_receiver = -1; async_channel->polling_node_receiver = NULL; async_channel->polling_node_sender = NULL; mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size, &sink_event->event); /* Push event on the list */ _starpu_mp_event_list_push_back(&mp_node->event_list, sink_event); } /* Receive workers and combined workers and store them into the struct config */ static void _starpu_sink_common_recv_workers(struct _starpu_mp_node * node, void *arg, int arg_size) { /* Retrieve information from the message */ STARPU_ASSERT(arg_size == (sizeof(int)*5)); uintptr_t arg_ptr = (uintptr_t) arg; int i; int nworkers = *(int *)arg_ptr; arg_ptr += sizeof(nworkers); int worker_size = *(int *)arg_ptr; arg_ptr += sizeof(worker_size); int combined_worker_size = *(int *)arg_ptr; arg_ptr += sizeof(combined_worker_size); int baseworkerid = *(int *)arg_ptr; arg_ptr += sizeof(baseworkerid); /* Clear data we won't use */ struct _starpu_machine_config *config = _starpu_get_machine_config(); for(i=0; itopology.nworkers; i++) { free(config->workers[i].perf_arch.devices); config->workers[i].perf_arch.devices = NULL; } config->topology.nworkers = *(int *)arg_ptr; /* Retrieve workers */ struct _starpu_worker * workers = &config->workers[baseworkerid]; node->dt_recv(node,workers,worker_size, NULL); /* Update workers to have coherent field */ for(i=0; icombined_workers; node->dt_recv(node, combined_workers, combined_worker_size, NULL); node->baseworkerid = baseworkerid; STARPU_PTHREAD_BARRIER_WAIT(&node->init_completed_barrier); } /* Function looping on the sink, waiting for tasks to execute. * If the caller is the host, don't do anything. */ void _starpu_sink_common_worker(void) { struct _starpu_mp_node *node = NULL; enum _starpu_mp_command command; int arg_size = 0; void *arg = NULL; int exit_starpu = 0; enum _starpu_mp_node_kind node_kind = _starpu_sink_common_get_kind(); if (node_kind == STARPU_NODE_INVALID_KIND) _STARPU_ERROR("No valid sink kind retrieved, use the STARPU_SINK environment variable to specify this\n"); /* Create and initialize the node */ node = _starpu_mp_common_node_create(node_kind, -1); starpu_pthread_key_t worker_key; STARPU_PTHREAD_KEY_CREATE(&worker_key, NULL); while (!exit_starpu) { /* Wait send/recv is ready */ if (node->mp_wait) node->mp_wait(node); /* If we have received a message */ if(node->mp_recv_is_ready(node)) { command = _starpu_mp_common_recv_command(node, &arg, &arg_size); switch(command) { case STARPU_MP_COMMAND_EXIT: exit_starpu = 1; break; case STARPU_MP_COMMAND_EXECUTE_DETACHED: case STARPU_MP_COMMAND_EXECUTE: node->execute(node, arg, arg_size); break; case STARPU_MP_COMMAND_SINK_NBCORES: _starpu_sink_common_get_nb_cores(node); break; case STARPU_MP_COMMAND_LOOKUP: _starpu_sink_common_lookup(node, (char *) arg); break; case STARPU_MP_COMMAND_ALLOCATE: node->allocate(node, arg, arg_size); break; case STARPU_MP_COMMAND_FREE: node->free(node, arg, arg_size); break; case STARPU_MP_COMMAND_MAP: node->map(node, arg, arg_size); break; case STARPU_MP_COMMAND_UNMAP: node->unmap(node, arg, arg_size); break; case STARPU_MP_COMMAND_RECV_FROM_HOST: _starpu_sink_common_copy_from_host_sync(node, arg, arg_size); break; case STARPU_MP_COMMAND_SEND_TO_HOST: _starpu_sink_common_copy_to_host_sync(node, arg, arg_size); break; case STARPU_MP_COMMAND_RECV_FROM_SINK: _starpu_sink_common_copy_from_sink_sync(node, arg, arg_size); break; case STARPU_MP_COMMAND_SEND_TO_SINK: _starpu_sink_common_copy_to_sink_sync(node, arg, arg_size); break; case STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC: _starpu_sink_common_copy_from_host_async(node, arg, arg_size); break; case STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC: _starpu_sink_common_copy_to_host_async(node, arg, arg_size); break; case STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC: _starpu_sink_common_copy_from_sink_async(node, arg, arg_size); break; case STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC: _starpu_sink_common_copy_to_sink_async(node, arg, arg_size); break; case STARPU_MP_COMMAND_SYNC_WORKERS: _starpu_sink_common_recv_workers(node, arg, arg_size); break; default: _STARPU_MSG("Oops, command %x unrecognized\n", command); } } STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); /* If the list is not empty and we can send a notification */ while(!mp_message_list_empty(&node->message_queue) && node->nt_send_is_ready(node)) { /* We pop a message and send it to the host */ struct mp_message * message = mp_message_list_pop_back(&node->message_queue); STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); //_STARPU_DEBUG("telling host that we have finished the task %p sur %d.\n", task->kernel, task->coreid); STARPU_ASSERT(message->type >= STARPU_MP_COMMAND_NOTIF_FIRST && message->type <= STARPU_MP_COMMAND_NOTIF_LAST); _starpu_nt_common_send_command(node, message->type, message->buffer, message->size); free(message->buffer); mp_message_delete(message); STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); struct _starpu_mp_event * sink_event; struct _starpu_mp_event * sink_event_next; for (sink_event = _starpu_mp_event_list_begin(&node->event_list); sink_event != _starpu_mp_event_list_end(&node->event_list); sink_event = sink_event_next) { sink_event_next = _starpu_mp_event_list_next(sink_event); /*if event is completed move it into event queue*/ if(node->dt_test(&sink_event->event)) { _starpu_mp_event_list_erase(&node->event_list, sink_event); _starpu_mp_event_list_push_front(&node->event_queue, sink_event); } } /*if the list is not empty and we can send a notification*/ while(!_starpu_mp_event_list_empty(&node->event_queue) && node->nt_send_is_ready(node)) { struct _starpu_mp_event * sink_event_completed = _starpu_mp_event_list_pop_back(&node->event_queue); /* send ACK to host */ STARPU_ASSERT(sink_event_completed->answer_cmd >= STARPU_MP_COMMAND_NOTIF_FIRST && sink_event_completed->answer_cmd <= STARPU_MP_COMMAND_NOTIF_LAST); _starpu_nt_common_send_command(node, sink_event_completed->answer_cmd, &sink_event_completed->remote_event, sizeof(sink_event_completed->remote_event)); _starpu_mp_event_delete(sink_event_completed); } } STARPU_PTHREAD_KEY_DELETE(worker_key); /* Deinitialize the node and release it */ _starpu_mp_common_node_destroy(node); starpu_perfmodel_free_sampling(); _starpu_profiling_terminate(); _starpu_perf_knob_exit(); _starpu_perf_counter_exit(); _starpu_destroy_machine_config(&_starpu_config, 1); free((char*) _starpu_config.conf.sched_policy_name); if (_starpu_config.conf.n_cuda_opengl_interoperability) free(_starpu_config.conf.cuda_opengl_interoperability); if (_starpu_config.conf.n_not_launched_drivers) free(_starpu_config.conf.not_launched_drivers); #ifdef STARPU_USE_MPI_MASTER_SLAVE _starpu_mpi_common_mp_deinit(); #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE _starpu_tcpip_common_mp_deinit(); #endif exit(0); } /* Search for the mp_barrier correspondind to the specified combined worker * and create it if it doesn't exist */ static struct mp_barrier * _starpu_sink_common_get_barrier(struct _starpu_mp_node * node, int cb_workerid, int cb_workersize) { struct mp_barrier * b = NULL; STARPU_PTHREAD_MUTEX_LOCK(&node->barrier_mutex); /* Search if the barrier already exist */ for(b = mp_barrier_list_begin(&node->barrier_list); b != mp_barrier_list_end(&node->barrier_list) && b->id != cb_workerid; b = mp_barrier_list_next(b)); /* If we found the barrier */ if(b != NULL) { STARPU_PTHREAD_MUTEX_UNLOCK(&node->barrier_mutex); return b; } else { /* Else we create, initialize and add it to the list*/ b = mp_barrier_new(); b->id = cb_workerid; STARPU_PTHREAD_BARRIER_INIT(&b->before_work_barrier,NULL,cb_workersize); STARPU_PTHREAD_BARRIER_INIT(&b->after_work_barrier,NULL,cb_workersize); mp_barrier_list_push_back(&node->barrier_list,b); STARPU_PTHREAD_MUTEX_UNLOCK(&node->barrier_mutex); return b; } } /* Erase for the mp_barrier correspondind to the specified combined worker */ static void _starpu_sink_common_erase_barrier(struct _starpu_mp_node * node, struct mp_barrier *barrier) { STARPU_PTHREAD_MUTEX_LOCK(&node->barrier_mutex); mp_barrier_list_erase(&node->barrier_list,barrier); STARPU_PTHREAD_MUTEX_UNLOCK(&node->barrier_mutex); } /* Append the message given in parameter to the message list */ static void _starpu_sink_common_append_message(struct _starpu_mp_node *node, struct mp_message * message) { STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); mp_message_list_push_front(&node->message_queue,message); STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); /* Send the signal that message is in message_queue */ if(node->mp_signal) { node->mp_signal(node); } } /* Append to the message list a "STARPU_PRE_EXECUTION" message */ static void _starpu_sink_common_pre_execution_message(struct _starpu_mp_node *node, struct mp_task *task) { /* Init message to tell the sink that the execution has begun */ struct mp_message * message = mp_message_new(); message->type = STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION; _STARPU_MALLOC(message->buffer, sizeof(int)); *(int *) message->buffer = task->combined_workerid; message->size = sizeof(int); /* Append the message to the queue */ _starpu_sink_common_append_message(node, message); } /* Append to the message list a "STARPU_EXECUTION_COMPLETED" message and cl_ret */ static void _starpu_sink_common_execution_completed_message(struct _starpu_mp_node *node, struct mp_task *task) { /* Init message to tell the sink that the execution is completed */ struct mp_message * message = mp_message_new(); if (task->detached) message->type = STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED; else message->type = STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED; message->size = sizeof(int); /* If the user didn't give any cl_ret, there is no need to send it */ if (task->cl_ret) { STARPU_ASSERT(task->cl_ret_size); message->size += task->cl_ret_size; } _STARPU_MALLOC(message->buffer, message->size); *(int*) message->buffer = task->coreid; if (task->cl_ret) memcpy(message->buffer+sizeof(int), task->cl_ret, task->cl_ret_size); /* Append the message to the queue */ _starpu_sink_common_append_message(node, message); } /* Bind the thread which is running on the specified core to the combined worker */ static void _starpu_sink_common_bind_to_combined_worker(struct _starpu_mp_node *node, int coreid, struct _starpu_combined_worker * combined_worker) { int i; int * bind_set; _STARPU_MALLOC(bind_set, sizeof(int)*combined_worker->worker_size); for(i=0;iworker_size;i++) bind_set[i] = combined_worker->combined_workerid[i] - node->baseworkerid; node->bind_thread(node, coreid, bind_set, combined_worker->worker_size); } /* Get the current rank of the worker in the combined worker */ static int _starpu_sink_common_get_current_rank(int workerid, struct _starpu_combined_worker * combined_worker) { int i; for(i=0; iworker_size; i++) if(workerid == combined_worker->combined_workerid[i]) return i; STARPU_ASSERT(0); return -1; } /* Execute the task */ static void _starpu_sink_common_execute_kernel(struct _starpu_mp_node *node, int coreid, struct _starpu_worker * worker, int detached) { struct _starpu_combined_worker * combined_worker = NULL; struct mp_task* task; if (detached) task = node->run_table_detached[coreid]; else task = node->run_table[coreid]; /* If it's a parallel task */ if(task->is_parallel_task) { combined_worker = _starpu_get_combined_worker_struct(task->combined_workerid); worker->current_rank = _starpu_sink_common_get_current_rank(worker->workerid, combined_worker); worker->combined_workerid = task->combined_workerid; worker->worker_size = combined_worker->worker_size; /* Synchronize with others threads of the combined worker*/ STARPU_PTHREAD_BARRIER_WAIT(&task->mp_barrier->before_work_barrier); /* The first thread of the combined worker */ if(worker->current_rank == 0) { /* tell the sink that the execution has begun */ _starpu_sink_common_pre_execution_message(node,task); /* If the mode is FORKJOIN, * the first thread binds himself * on all core of the combined worker*/ if(task->type == STARPU_FORKJOIN) { _starpu_sink_common_bind_to_combined_worker(node, coreid, combined_worker); } } } else { worker->current_rank = 0; worker->combined_workerid = 0; worker->worker_size = 1; } if(task->type != STARPU_FORKJOIN || worker->current_rank == 0) { if (_starpu_get_disable_kernels() <= 0) { struct starpu_task s_task; starpu_task_init(&s_task); /*copy cl_arg and cl_arg_size from mp_task into starpu_task*/ s_task.cl_arg=task->cl_arg; s_task.cl_arg_size=task->cl_arg_size; _starpu_set_current_task(&s_task); /* execute the task */ task->kernel(task->interfaces,task->cl_arg); _starpu_set_current_task(NULL); /*copy cl_ret and cl_ret_size from starpu_task into mp_task*/ task->cl_ret=s_task.cl_ret; task->cl_ret_size=s_task.cl_ret_size; } } /* If it's a parallel task */ if(task->is_parallel_task) { /* Synchronize with others threads of the combined worker*/ STARPU_PTHREAD_BARRIER_WAIT(&task->mp_barrier->after_work_barrier); /* The first thread of the combined */ if(worker->current_rank == 0) { /* Erase the barrier from the list */ _starpu_sink_common_erase_barrier(node,task->mp_barrier); /* If the mode is FORKJOIN, * the first thread rebinds himself on his own core */ if(task->type == STARPU_FORKJOIN) node->bind_thread(node, coreid, &coreid, 1); } } if (detached) node->run_table_detached[coreid] = NULL; else node->run_table[coreid] = NULL; /* tell the sink that the execution is completed */ _starpu_sink_common_execution_completed_message(node,task); /*free the task*/ unsigned i; for (i = 0; i < task->nb_interfaces; i++) { struct starpu_data_interface_ops *ops = _starpu_data_interface_get_ops(task->ids[i]); if (ops->free_meta) { ops->free_meta(task->interfaces[i]); } free(task->interfaces[i]); } free(task->interfaces); free(task->ids); if (task->cl_arg != NULL) free(task->cl_arg); free(task); } /* The main function executed by the thread * thread_arg is a structure containing the information needed by the thread */ void* _starpu_sink_thread(void * thread_arg) { /* Retrieve the information from the structure */ struct _starpu_mp_node *node = ((struct arg_sink_thread *)thread_arg)->node; int coreid =((struct arg_sink_thread *)thread_arg)->coreid; /* free the structure */ free(thread_arg); STARPU_PTHREAD_BARRIER_WAIT(&node->init_completed_barrier); struct _starpu_worker *worker = &_starpu_get_machine_config()->workers[node->baseworkerid + coreid]; char *s; asprintf(&s, "slave %d core %d", node->baseworkerid, coreid); starpu_pthread_setname(s); free(s); node->bind_thread(node, coreid, &coreid, 1); _starpu_set_local_worker_key(worker); while(node->is_running) { /*Wait there is a task available */ sem_wait(&node->sem_run_table[coreid]); STARPU_ASSERT((node->run_table_detached[coreid]!=NULL) || (node->run_table[coreid]!=NULL) || node->is_running==0); if (node->run_table_detached[coreid] != NULL) _starpu_sink_common_execute_kernel(node, coreid, worker, 1); else if (node->run_table[coreid] != NULL) _starpu_sink_common_execute_kernel(node, coreid, worker, 0); else STARPU_ASSERT(!node->is_running); } starpu_pthread_exit(NULL); } /* Add the task to the specific thread and wake him up */ static void _starpu_sink_common_execute_thread(struct _starpu_mp_node *node, struct mp_task *task) { int detached = task->detached; /* Add the task to the specific thread */ if (detached) { STARPU_ASSERT(!node->run_table_detached[task->coreid]); node->run_table_detached[task->coreid] = task; } else { STARPU_ASSERT(!node->run_table[task->coreid]); node->run_table[task->coreid] = task; } /* Unlock the mutex to wake up the thread which will execute the task */ sem_post(&node->sem_run_table[task->coreid]); } /* Receive paquet from _starpu_src_common_execute_kernel in the form below : * [Function pointer on sink, number of interfaces, interfaces * (union _starpu_interface), cl_arg] * Then call the function given, passing as argument an array containing the * addresses of the received interfaces */ void _starpu_sink_common_execute(struct _starpu_mp_node *node, void *arg, int arg_size) { unsigned i; uintptr_t arg_ptr = (uintptr_t) arg; struct mp_task *task; _STARPU_CALLOC(task, 1, sizeof(struct mp_task)); task->kernel = *(void(**)(void **, void *)) arg_ptr; arg_ptr += sizeof(task->kernel); task->type = *(enum starpu_codelet_type *) arg_ptr; arg_ptr += sizeof(task->type); task->is_parallel_task = *(int *) arg_ptr; arg_ptr += sizeof(task->is_parallel_task); if(task->is_parallel_task) { task->combined_workerid= *(int *) arg_ptr; arg_ptr += sizeof(task->combined_workerid); task->mp_barrier = _starpu_sink_common_get_barrier(node,task->combined_workerid,_starpu_get_combined_worker_struct(task->combined_workerid)->worker_size); } task->coreid = *(unsigned *) arg_ptr; arg_ptr += sizeof(task->coreid); task->nb_interfaces = *(unsigned *) arg_ptr; arg_ptr += sizeof(task->nb_interfaces); task->detached = *(int *) arg_ptr; arg_ptr += sizeof(task->detached); _STARPU_MALLOC(task->interfaces, task->nb_interfaces * sizeof(*task->interfaces)); _STARPU_MALLOC(task->ids, task->nb_interfaces * sizeof(*task->ids)); /* The function needs an array pointing to each interface it needs * during execution. The interface is first identified by its * id, which will indicate if this is a basic interface or if * it needs to be unpacked through unpack_meta */ for (i = 0; i < task->nb_interfaces; i++) { // first extract the interface id memcpy(&(task->ids[i]), (void *)arg_ptr, sizeof(task->ids[i])); arg_ptr += sizeof(task->ids[i]); // and then the interface struct starpu_data_interface_ops *ops = _starpu_data_interface_get_ops(task->ids[i]); if (ops->unpack_meta) { STARPU_ASSERT_MSG(ops->pack_meta, "unpack_meta defined without pack_meta for interface %d", task->ids[i]); starpu_ssize_t count; ops->unpack_meta(&task->interfaces[i], (void*) arg_ptr, &count); arg_ptr += count; } else { union _starpu_interface *interface; _STARPU_MALLOC(interface, sizeof(union _starpu_interface)); memcpy(interface, (void*) arg_ptr, sizeof(union _starpu_interface)); task->interfaces[i] = interface; arg_ptr += sizeof(union _starpu_interface); } } /* Was cl_arg sent ? */ if (arg_size > arg_ptr - (uintptr_t) arg) { /* Copy cl_arg to prevent overwriting by an other task */ unsigned cl_arg_size = arg_size - (arg_ptr - (uintptr_t) arg); _STARPU_MALLOC(task->cl_arg, cl_arg_size); memcpy(task->cl_arg, (void *) arg_ptr, cl_arg_size); task->cl_arg_size=cl_arg_size; } else task->cl_arg = NULL; //_STARPU_DEBUG("telling host that we have submitted the task %p.\n", task->kernel); if (task->detached) _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ANSWER_EXECUTION_DETACHED_SUBMITTED, NULL, 0); else _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_ANSWER_EXECUTION_SUBMITTED, NULL, 0); //_STARPU_DEBUG("executing the task %p\n", task->kernel); _starpu_sink_common_execute_thread(node, task); } starpu-1.4.9+dfsg/src/drivers/mp_common/sink_common.h000066400000000000000000000037161507764646700227350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SINK_COMMON_H__ #define __SINK_COMMON_H__ /** @file */ #include #ifdef STARPU_USE_MP #include #pragma GCC visibility push(hidden) /** Represent the topology of sink devices, contains useful information about * their capabilities * XXX: unused. */ struct _starpu_sink_topology { unsigned nb_cpus; }; struct arg_sink_thread { struct _starpu_mp_node *node; int coreid; }; void _starpu_sink_common_worker(void); void (*_starpu_sink_common_cpu_lookup (const struct _starpu_mp_node * node STARPU_ATTRIBUTE_UNUSED, char* func_name))(void); void _starpu_sink_common_execute(struct _starpu_mp_node *node, void *arg, int arg_size); void _starpu_sink_common_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size); void _starpu_sink_common_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size); void _starpu_sink_common_map(const struct _starpu_mp_node *mp_node, void *arg, int arg_size); void _starpu_sink_common_unmap(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size); void* _starpu_sink_thread(void * thread_arg); #pragma GCC visibility pop #endif /* STARPU_USE_MP */ #endif /* __SINK_COMMON_H__ */ starpu-1.4.9+dfsg/src/drivers/mp_common/source_common.c000066400000000000000000001262301507764646700232610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include struct starpu_save_thread_env { struct starpu_task * current_task; struct _starpu_worker * current_worker; struct _starpu_worker_set * current_worker_set; #ifdef STARPU_OPENMP struct starpu_omp_thread * current_omp_thread; struct starpu_omp_task * current_omp_task; #endif }; #ifdef STARPU_USE_MPI_MASTER_SLAVE struct starpu_save_thread_env save_thread_env[STARPU_MAXMPIDEVS]; struct _starpu_mp_node *_starpu_src_nodes[STARPU_NARCH][STARPU_MAXMPIDEVS]; #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE struct starpu_save_thread_env save_thread_env[STARPU_MAXTCPIPDEVS]; struct _starpu_mp_node *_starpu_src_nodes[STARPU_NARCH][STARPU_MAXTCPIPDEVS]; #endif /* Mutex for concurrent access to the table. */ static starpu_pthread_mutex_t htbl_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; /* Structure used by host to store information about a kernel executable on * a MPI MS device : its name, and its address on each device. * If a kernel has been initialized, then a lookup has already been achieved and the * device knows how to call it, else the host still needs to do a lookup. */ static struct _starpu_sink_kernel { UT_hash_handle hh; char *name; starpu_cpu_func_t func[]; } *kernels[STARPU_NARCH]; static unsigned mp_node_memory_node(struct _starpu_mp_node *node) { return starpu_worker_get_memory_node(node->baseworkerid); } void _starpu_src_common_deinit(void) { enum starpu_worker_archtype arch; for (arch = 0; arch < STARPU_NARCH; arch++) { struct _starpu_sink_kernel *entry, *tmp; HASH_ITER(hh, kernels[arch], entry, tmp) { HASH_DEL(kernels[arch], entry); free(entry->name); free(entry); } } } /* Finalize the execution of a task by a worker*/ static int _starpu_src_common_finalize_job(struct _starpu_job *j, struct _starpu_worker *worker) { int profiling = starpu_profiling_status_get(); _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); int count = worker->current_rank; /* If it's a combined worker, we check if it's the last one of his combined */ if(j->task_size > 1) { struct _starpu_combined_worker * cb_worker = _starpu_get_combined_worker_struct(worker->combined_workerid); (void) STARPU_ATOMIC_ADD(&j->after_work_busy_barrier, -1); STARPU_PTHREAD_MUTEX_LOCK(&cb_worker->count_mutex); count = cb_worker->count--; if(count == 0) cb_worker->count = cb_worker->worker_size - 1; STARPU_PTHREAD_MUTEX_UNLOCK(&cb_worker->count_mutex); } /* Finalize the execution */ if(count == 0) { _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); _starpu_push_task_output(j); _starpu_handle_job_termination(j); } return 0; } /* Complete the execution of the job */ static int _starpu_src_common_process_completed_job(struct _starpu_mp_node *node, struct _starpu_worker_set *workerset, void * arg, int arg_size, int stored) { int coreid; uintptr_t arg_ptr = (uintptr_t) arg; coreid = *(int *) arg_ptr; arg_ptr += sizeof(coreid); struct _starpu_worker *worker = &workerset->workers[coreid]; struct _starpu_job *j = _starpu_get_job_associated_to_task(worker->current_task); struct starpu_task *task = j->task; STARPU_ASSERT(task); struct _starpu_worker * old_worker = _starpu_get_local_worker_key(); /* Was cl_ret sent ? */ if (arg_size > arg_ptr - (uintptr_t) arg) { /* Copy cl_ret into the task */ unsigned cl_ret_size = arg_size - (arg_ptr - (uintptr_t) arg); _STARPU_MALLOC(task->cl_ret, cl_ret_size); memcpy(task->cl_ret, (void *) arg_ptr, cl_ret_size); task->cl_ret_size=cl_ret_size; } else task->cl_ret = NULL; /* if arg is not copied we release the mutex */ if (!stored) STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); _starpu_set_local_worker_key(worker); _starpu_src_common_finalize_job(j, worker); _starpu_set_local_worker_key(old_worker); worker->current_task = NULL; return 0; } /* Tell the scheduler when the execution has begun */ static void _starpu_src_common_pre_exec(struct _starpu_mp_node *node, void * arg, int arg_size, int stored) { int cb_workerid, i; STARPU_ASSERT(sizeof(cb_workerid) == arg_size); cb_workerid = *(int *) arg; struct _starpu_combined_worker *combined_worker = _starpu_get_combined_worker_struct(cb_workerid); /* if arg is not copied we release the mutex */ if (!stored) STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); for(i=0; i < combined_worker->worker_size; i++) { struct _starpu_worker * worker = _starpu_get_worker_struct(combined_worker->combined_workerid[i]); _starpu_set_local_worker_key(worker); _starpu_sched_pre_exec_hook(worker->current_task); } } /* recv a message and handle asynchronous message * return 0 if the message has not been handle (it's certainly mean that it's a synchronous message) * return 1 if the message has been handle */ static int _starpu_src_common_handle_async(struct _starpu_mp_node *node, void * arg, int arg_size, enum _starpu_mp_command answer, int stored) { struct _starpu_worker_set * worker_set = NULL; switch(answer) { case STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED: { worker_set = _starpu_get_worker_struct(starpu_worker_get_id())->set; _starpu_src_common_process_completed_job(node, worker_set, arg, arg_size, stored); break; } case STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED: { _STARPU_ERROR("Detached execution completed should not arrive here... \n"); break; } case STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION: { _starpu_src_common_pre_exec(node, arg,arg_size, stored); break; } case STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED: case STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED: { struct _starpu_async_channel * event = *((struct _starpu_async_channel **) arg); event->starpu_mp_common_finished_receiver--; if (!stored) STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); break; } case STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED: case STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED: { struct _starpu_async_channel * event = *((struct _starpu_async_channel **) arg); event->starpu_mp_common_finished_sender--; if (!stored) STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); break; } default: return 0; break; } return 1; } /* Handle all message which have been stored in the message_queue */ static void _starpu_src_common_handle_stored_async(struct _starpu_mp_node *node) { int stopped_progress = 0; STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); /* while the list is not empty */ while(!mp_message_list_empty(&node->message_queue)) { /* We pop a message and handle it */ struct mp_message * message = mp_message_list_pop_back(&node->message_queue); /* Release mutex during handle */ stopped_progress = 1; _STARPU_TRACE_END_PROGRESS(mp_node_memory_node(node)); STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); _starpu_src_common_handle_async(node, message->buffer, message->size, message->type, 1); free(message->buffer); mp_message_delete(message); /* Take it again */ STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); if (stopped_progress) _STARPU_TRACE_START_PROGRESS(mp_node_memory_node(node)); } /* Store a message if is asynchronous * return 1 if the message has been stored * return 0 if the message is unknown or synchrone */ int _starpu_src_common_store_message(struct _starpu_mp_node *node, void * arg, int arg_size, enum _starpu_mp_command answer) { switch(answer) { case STARPU_MP_COMMAND_NOTIF_EXECUTION_COMPLETED: case STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED: case STARPU_MP_COMMAND_NOTIF_PRE_EXECUTION: { struct mp_message *message = mp_message_new(); message->type = answer; _STARPU_MALLOC(message->buffer, arg_size); memcpy(message->buffer, arg, arg_size); message->size = arg_size; STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex); mp_message_list_push_front(&node->message_queue,message); STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); /* Send the signal that message is in message_queue */ if(node->mp_signal) { node->mp_signal(node); } return 1; } /* For ASYNC commands don't store them, update event */ case STARPU_MP_COMMAND_NOTIF_RECV_FROM_HOST_ASYNC_COMPLETED: case STARPU_MP_COMMAND_NOTIF_RECV_FROM_SINK_ASYNC_COMPLETED: { struct _starpu_async_channel * event = *((struct _starpu_async_channel **) arg); event->starpu_mp_common_finished_receiver--; return 1; } case STARPU_MP_COMMAND_NOTIF_SEND_TO_HOST_ASYNC_COMPLETED: case STARPU_MP_COMMAND_NOTIF_SEND_TO_SINK_ASYNC_COMPLETED: { struct _starpu_async_channel * event = *((struct _starpu_async_channel **) arg); event->starpu_mp_common_finished_sender--; return 1; } default: return 0; } } /* Store all asynchronous messages and return when a synchronous message is received */ static enum _starpu_mp_command _starpu_src_common_wait_command_sync(struct _starpu_mp_node *node, void ** arg, int* arg_size) { enum _starpu_mp_command answer; int is_sync = 0; while(!is_sync) { answer = _starpu_mp_common_recv_command(node, arg, arg_size); if(!_starpu_src_common_store_message(node,*arg,*arg_size,answer)) is_sync=1; } return answer; } /* Handle a asynchrone message and return a error if a synchronous message is received */ static void _starpu_src_common_recv_async(struct _starpu_mp_node * node) { enum _starpu_mp_command answer; void *arg; int arg_size; answer = _starpu_nt_common_recv_command(node, &arg, &arg_size); if(!_starpu_src_common_handle_async(node,arg,arg_size,answer, 0)) { _STARPU_ERROR("incorrect command: unknown command or sync command"); } } /* Handle all asynchrone message while a completed execution message from a specific worker has been receive */ enum _starpu_mp_command _starpu_src_common_wait_completed_execution(struct _starpu_mp_node *node, int devid, void **arg, int * arg_size) { enum _starpu_mp_command answer; int completed = 0; /*While the waited completed execution message has not been receive*/ while(!completed) { answer = _starpu_nt_common_recv_command(node, arg, arg_size); if(answer == STARPU_MP_COMMAND_NOTIF_EXECUTION_DETACHED_COMPLETED) { int coreid; STARPU_ASSERT(sizeof(coreid) == *arg_size); coreid = *(int *) *arg; if(devid == coreid) completed = 1; else if(!_starpu_src_common_store_message(node, *arg, *arg_size, answer)) /* We receive a unknown or asynchronous message */ STARPU_ASSERT(0); } else { if(!_starpu_src_common_store_message(node, *arg, *arg_size, answer)) /* We receive a unknown or asynchronous message */ STARPU_ASSERT(0); } } return answer; } /* Send a request to the sink NODE for the number of cores on it. */ int _starpu_src_common_sink_nbcores(struct _starpu_mp_node *node, int *buf) { enum _starpu_mp_command answer; void *arg; int arg_size = sizeof(int); STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_SINK_NBCORES, NULL, 0); answer = _starpu_mp_common_recv_command(node, &arg, &arg_size); STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_SINK_NBCORES && arg_size == sizeof(int)); memcpy(buf, arg, arg_size); STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); return 0; } /* Send a request to the sink linked to NODE for the pointer to the * function defined by FUNC_NAME. * In case of success, it returns 0 and FUNC_PTR contains the pointer ; * else it returns -ESPIPE if the function was not found. */ int _starpu_src_common_lookup(struct _starpu_mp_node *node, void (**func_ptr)(void), const char *func_name) { enum _starpu_mp_command answer; void *arg; int arg_size; /* strlen ignore the terminating '\0' */ arg_size = (strlen(func_name) + 1) * sizeof(char); STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); //_STARPU_DEBUG("Looking up %s\n", func_name); _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_LOOKUP, (void *) func_name, arg_size); answer = _starpu_src_common_wait_command_sync(node, (void **) &arg, &arg_size); if (answer == STARPU_MP_COMMAND_ERROR_LOOKUP) { _STARPU_DISP("Error looking up symbol %s\n", func_name); STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); return -ESPIPE; } /* We have to be sure the device answered the right question and the * answer has the right size */ STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_LOOKUP); STARPU_ASSERT(arg_size == sizeof(*func_ptr)); memcpy(func_ptr, arg, arg_size); STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); //_STARPU_DEBUG("got %p\n", *func_ptr); return 0; } /* Send a message to the sink to execute a kernel. * The message sent has the form below : * [Function pointer on sink, number of interfaces, interfaces * (union _starpu_interface), cl_arg] */ /* Launch the execution of the function KERNEL points to on the sink linked * to NODE. Returns 0 in case of success, -EINVAL if kernel is an invalid * pointer. * Data interfaces in task are send to the sink. */ int _starpu_src_common_execute_kernel(struct _starpu_mp_node *node, void (*kernel)(void), unsigned coreid, enum starpu_codelet_type type, int is_parallel_task, int cb_workerid, starpu_data_handle_t *handles, void **interfaces, unsigned nb_interfaces, void *cl_arg, size_t cl_arg_size, int detached) { void *buffer, *arg =NULL; uintptr_t buffer_ptr; int buffer_size = 0, arg_size =0; unsigned i; starpu_ssize_t interface_size[nb_interfaces ? nb_interfaces : 1]; void *interface_ptr[nb_interfaces ? nb_interfaces : 1]; buffer_size = sizeof(kernel) + sizeof(type) + sizeof(is_parallel_task) + sizeof(coreid) + sizeof(nb_interfaces) + sizeof(detached); /*if the task is parallel*/ if(is_parallel_task) { buffer_size += sizeof(cb_workerid); } for (i = 0; i < nb_interfaces; i++) { buffer_size += sizeof(enum starpu_data_interface_id); starpu_data_handle_t handle = handles[i]; if (handle->ops->pack_meta) { handle->ops->pack_meta(interfaces[i], &interface_ptr[i], &interface_size[i]); buffer_size += interface_size[i]; } else { buffer_size += sizeof(union _starpu_interface); } } /* If the user didn't give any cl_arg, there is no need to send it */ if (cl_arg) { STARPU_ASSERT_MSG(cl_arg_size, "Execution of tasks on master-slave needs cl_arg_size to be set, to transfer the content of cl_arg"); buffer_size += cl_arg_size; } /* We give to send_command a buffer we just allocated, which contains * a pointer to the function (sink-side), core on which execute this * function (sink-side), number of interfaces we send, * an array of generic (union) interfaces and the value of cl_arg */ _STARPU_MALLOC(buffer, buffer_size); buffer_ptr = (uintptr_t) buffer; *(void(**)(void)) buffer = kernel; buffer_ptr += sizeof(kernel); *(enum starpu_codelet_type *) buffer_ptr = type; buffer_ptr += sizeof(type); *(int *) buffer_ptr = is_parallel_task; buffer_ptr += sizeof(is_parallel_task); if(is_parallel_task) { *(int *) buffer_ptr = cb_workerid ; buffer_ptr += sizeof(cb_workerid); } STARPU_ASSERT(coreid < (unsigned)node->nb_cores); *(unsigned *) buffer_ptr = coreid; buffer_ptr += sizeof(coreid); *(unsigned *) buffer_ptr = nb_interfaces; buffer_ptr += sizeof(nb_interfaces); *(int *) buffer_ptr = detached; buffer_ptr += sizeof(detached); /* Message-passing execution is a particular case as the codelet is * executed on a sink with a different memory, whereas a codelet is * executed on the host part for the other accelerators. * Thus we need to send a copy of each interface on the MP device */ for (i = 0; i < nb_interfaces; i++) { starpu_data_handle_t handle = handles[i]; enum starpu_data_interface_id id = starpu_data_get_interface_id(handle); memcpy((void*) buffer_ptr, &id, sizeof(id)); buffer_ptr += sizeof(id); if (handle->ops->pack_meta) { STARPU_ASSERT_MSG(handle->ops->unpack_meta, "pack_meta defined without unpack_meta for interface %d", id); memcpy((void *) buffer_ptr, interface_ptr[i], interface_size[i]); free(interface_ptr[i]); buffer_ptr += interface_size[i]; } else { /* Check that the interface exists in _starpu_interface */ STARPU_ASSERT_MSG(id == STARPU_VOID_INTERFACE_ID || id == STARPU_VARIABLE_INTERFACE_ID || id == STARPU_VECTOR_INTERFACE_ID || id == STARPU_MATRIX_INTERFACE_ID || id == STARPU_BLOCK_INTERFACE_ID || id == STARPU_TENSOR_INTERFACE_ID || id == STARPU_CSR_INTERFACE_ID || id == STARPU_BCSR_INTERFACE_ID || id == STARPU_COO_INTERFACE_ID, "Master-Slave currently cannot work with interface type %d (%s)", id, handle->ops->name); memcpy((void*) buffer_ptr, interfaces[i], handle->ops->interface_size); STARPU_ASSERT(handle->ops->interface_size <= sizeof(union _starpu_interface)); memset((char*) buffer_ptr + handle->ops->interface_size, 0, sizeof(union _starpu_interface) - handle->ops->interface_size); /* The sink side has no mean to get the type of each * interface, we use a union to make it generic and permit the * sink to go through the array */ buffer_ptr += sizeof(union _starpu_interface); } } if (cl_arg) memcpy((void*) buffer_ptr, cl_arg, cl_arg_size); STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); if (detached) _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_EXECUTE_DETACHED, buffer, buffer_size); else _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_EXECUTE, buffer, buffer_size); enum _starpu_mp_command answer = _starpu_src_common_wait_command_sync(node, &arg, &arg_size); if (answer == STARPU_MP_COMMAND_ERROR_EXECUTE_DETACHED) { STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); return -EINVAL; } if (detached) STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_EXECUTION_DETACHED_SUBMITTED); else STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_EXECUTION_SUBMITTED); STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); free(buffer); return 0; } /* Get the information and call the function to send to the sink a message to execute the task*/ static int _starpu_src_common_execute(struct _starpu_job *j, struct _starpu_worker *worker, struct _starpu_mp_node * node) { STARPU_ASSERT(j); struct starpu_task *task = j->task; int profiling = starpu_profiling_status_get(); STARPU_ASSERT(task); void (*kernel)(void) = node->get_kernel_from_job(node,j); _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); //_STARPU_DEBUG("\nworkerid:%d, subworkerid:%d, rank:%d, type:%d, cb_workerid:%d, task_size:%d\n\n",worker->devid, worker->subworkerid, worker->current_rank,task->cl->type,j->combined_workerid,j->task_size); _starpu_src_common_execute_kernel(node, kernel, worker->subworkerid, task->cl->type, (j->task_size > 1), j->combined_workerid, STARPU_TASK_GET_HANDLES(task), _STARPU_TASK_GET_INTERFACES(task), STARPU_TASK_GET_NBUFFERS(task), task->cl_arg, task->cl_arg_size, 0); return 0; } static struct _starpu_sink_kernel *starpu_src_common_register_kernel(const char *func_name) { STARPU_PTHREAD_MUTEX_LOCK(&htbl_mutex); struct _starpu_sink_kernel *kernel; unsigned workerid = starpu_worker_get_id_check(); enum starpu_worker_archtype archtype = starpu_worker_get_type(workerid); HASH_FIND_STR(kernels[archtype], func_name, kernel); if (kernel != NULL) { STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex); // Function already in the table. return kernel; } unsigned int nb_devices = _starpu_get_machine_config()->topology.ndevices[archtype]; _STARPU_MALLOC(kernel, sizeof(*kernel) + nb_devices * sizeof(starpu_cpu_func_t)); kernel->name = strdup(func_name); HASH_ADD_STR(kernels[archtype], name, kernel); unsigned int i; for (i = 0; i < nb_devices; ++i) kernel->func[i] = NULL; STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex); return kernel; } static starpu_cpu_func_t starpu_src_common_get_kernel(const char *func_name) { /* This function has to be called in the codelet only, by the thread * which will handle the task */ int workerid = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(workerid); enum starpu_worker_archtype archtype = starpu_worker_get_type(workerid); struct _starpu_sink_kernel *kernel = starpu_src_common_register_kernel(func_name); if (kernel->func[devid] == NULL) { struct _starpu_mp_node *node = _starpu_src_nodes[archtype][devid]; int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[devid], kernel->name); if (ret) { _STARPU_DISP("Could not resolve function %s on slave %d\n", kernel->name, devid); return NULL; } } return kernel->func[devid]; } starpu_cpu_func_t _starpu_src_common_get_cpu_func_from_codelet(struct starpu_codelet *cl, unsigned nimpl) { /* Try to use cpu_func_name. */ const char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl); STARPU_ASSERT_MSG(func_name, "when master-slave is used, cpu_funcs_name has to be defined and the function be non-static"); starpu_cpu_func_t kernel = starpu_src_common_get_kernel(func_name); STARPU_ASSERT_MSG(kernel, "when master-slave is used, cpu_funcs_name has to be defined and the function be non-static"); return kernel; } void(* _starpu_src_common_get_cpu_func_from_job(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *j))(void) { /* Try to use cpu_func_name. */ const char *func_name = _starpu_task_get_cpu_name_nth_implementation(j->task->cl, j->nimpl); STARPU_ASSERT_MSG(func_name, "when master-slave is used, cpu_funcs_name has to be defined and the function be non-static"); starpu_cpu_func_t kernel = starpu_src_common_get_kernel(func_name); STARPU_ASSERT_MSG(kernel, "when master-slave is used, cpu_funcs_name has to be defined and the function be non-static"); return (void (*)(void))kernel; } struct _starpu_mp_node *_starpu_src_common_get_mp_node_from_memory_node(int memory_node) { int devid = starpu_memory_node_get_devid(memory_node); enum starpu_worker_archtype archtype = starpu_memory_node_get_worker_archtype(starpu_node_get_kind(memory_node)); #ifdef STARPU_USE_MPI_MASTER_SLAVE STARPU_ASSERT_MSG_ALWAYS(devid >= 0 && devid < STARPU_MAXMPIDEVS, "bogus devid %d for memory node %d\n", devid, memory_node); #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE STARPU_ASSERT_MSG_ALWAYS(devid >= 0 && devid < STARPU_MAXTCPIPDEVS, "bogus devid %d for memory node %d\n", devid, memory_node); #endif return _starpu_src_nodes[archtype][devid]; } /* Send a request to the sink linked to the MP_NODE to allocate SIZE bytes on * the sink. * In case of success, it returns 0 and *ADDR contains the address of the * allocated area ; * else it returns 1 if the allocation fail. */ uintptr_t _starpu_src_common_allocate(unsigned dst_node, size_t size, int flags) { (void) flags; struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); enum _starpu_mp_command answer; void *arg; int arg_size; uintptr_t addr; STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_ALLOCATE, &size, sizeof(size)); answer = _starpu_src_common_wait_command_sync(mp_node, &arg, &arg_size); if (answer == STARPU_MP_COMMAND_ERROR_ALLOCATE) { STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); return 0; } STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_ALLOCATE && arg_size == sizeof(addr)); memcpy(&addr, arg, arg_size); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); return addr; } /* Send a request to the sink linked to the MP_NODE to deallocate the memory * area pointed by ADDR. */ void _starpu_src_common_free(unsigned dst_node, uintptr_t addr, size_t size, int flags) { (void) flags; (void) size; struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_FREE, &addr, sizeof(addr)); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); } /* Send a request to the sink linked to the MP_NODE to map SIZE bytes on ADDR as mapped area * on the sink. * In case of success, it returns map_addr contains the address of the * mapped area * else it returns NULL if the map fail. */ uintptr_t _starpu_src_common_map(unsigned dst_node, uintptr_t addr, size_t size) { struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); enum _starpu_mp_command answer; void *arg; int arg_size; uintptr_t map_addr; size_t map_offset; char* map_name = _starpu_get_fdname_from_mapaddr(addr, &map_offset, size); if(map_name == NULL) { return 0; } int map_cmd_size = sizeof(struct _starpu_mp_transfer_map_command)+strlen(map_name)+1; struct _starpu_mp_transfer_map_command *map_cmd; _STARPU_MALLOC(map_cmd, map_cmd_size); memcpy(map_cmd->fd_name, map_name, strlen(map_name)+1); free(map_name); map_cmd->offset = map_offset; map_cmd->size = size; STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_MAP, map_cmd, map_cmd_size); answer = _starpu_src_common_wait_command_sync(mp_node, &arg, &arg_size); if (answer == STARPU_MP_COMMAND_ERROR_MAP) { STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); return 0; } STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_MAP && arg_size == sizeof(map_addr)); memcpy(&map_addr, arg, arg_size); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); free(map_cmd); return map_addr; } /* Send a request to the sink linked to the MP_NODE to unmap the memory * area pointed by ADDR. */ void _starpu_src_common_unmap(unsigned dst_node, uintptr_t addr, size_t size) { (void) size; struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); struct _starpu_mp_transfer_unmap_command unmap_cmd = {.addr = addr, .size = size}; STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_UNMAP, &unmap_cmd, sizeof(unmap_cmd)); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); } /* Send SIZE bytes pointed by SRC to DST on the sink linked to the MP_NODE with a * synchronous mode. */ int _starpu_src_common_copy_host_to_sink_sync(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size) { struct _starpu_mp_transfer_command cmd = {.size = size, .addr = dst, .event = NULL}; STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_RECV_FROM_HOST, &cmd, sizeof(cmd)); mp_node->dt_send(mp_node, src, size, NULL); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); return 0; } /* Send SIZE bytes pointed by SRC to DST on the sink linked to the MP_NODE with an * asynchronous mode. */ int _starpu_src_common_copy_host_to_sink_async(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size, void * event) { struct _starpu_mp_transfer_command cmd = {.size = size, .addr = dst, .event = event}; STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); /* For asynchronous transfers, we save information * to test is they are finished */ struct _starpu_async_channel * async_channel = event; async_channel->polling_node_receiver = mp_node; _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC, &cmd, sizeof(cmd)); mp_node->dt_send(mp_node, src, size, event); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); return -EAGAIN; } int _starpu_src_common_copy_data_host_to_sink(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { (void) src_node; struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(dst_node); if (async_channel) return _starpu_src_common_copy_host_to_sink_async(mp_node, (void*) (src + src_offset), (void*) (dst + dst_offset), size, async_channel); else return _starpu_src_common_copy_host_to_sink_sync(mp_node, (void*) (src + src_offset), (void*) (dst + dst_offset), size); } /* Receive SIZE bytes pointed by SRC on the sink linked to the MP_NODE and store them in DST * with a synchronous mode. */ int _starpu_src_common_copy_sink_to_host_sync(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size) { enum _starpu_mp_command answer; void *arg; int arg_size; struct _starpu_mp_transfer_command cmd = {.size = size, .addr = src, .event = NULL}; STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_SEND_TO_HOST, &cmd, sizeof(cmd)); answer = _starpu_src_common_wait_command_sync(mp_node, &arg, &arg_size); STARPU_ASSERT(answer == STARPU_MP_COMMAND_SEND_TO_HOST); mp_node->dt_recv(mp_node, dst, size, NULL); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); return 0; } /* Receive SIZE bytes pointed by SRC on the sink linked to the MP_NODE and store them in DST * with an asynchronous mode. */ int _starpu_src_common_copy_sink_to_host_async(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size, void * event) { struct _starpu_mp_transfer_command cmd = {.size = size, .addr = src, .event = event}; STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); /* For asynchronous transfers, we save information * to test is they are finished */ struct _starpu_async_channel * async_channel = event; async_channel->polling_node_sender = mp_node; _starpu_mp_common_send_command(mp_node, STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC, &cmd, sizeof(cmd)); mp_node->dt_recv(mp_node, dst, size, event); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); return -EAGAIN; } int _starpu_src_common_copy_data_sink_to_host(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { (void) dst_node; struct _starpu_mp_node *mp_node = _starpu_src_common_get_mp_node_from_memory_node(src_node); if (async_channel) return _starpu_src_common_copy_sink_to_host_async(mp_node, (void*) (src + src_offset), (void*) (dst + dst_offset), size, async_channel); else return _starpu_src_common_copy_sink_to_host_sync(mp_node, (void*) (src + src_offset), (void*) (dst + dst_offset), size); } /* Tell the sink linked to SRC_NODE to send SIZE bytes of data pointed by SRC * to the sink linked to DST_NODE. The latter store them in DST with a synchronous * mode. */ int _starpu_src_common_copy_sink_to_sink_sync(struct _starpu_mp_node *src_node, struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size) { enum _starpu_mp_command answer; void *arg; int arg_size; struct _starpu_mp_transfer_command_to_device cmd = {.devid = dst_node->peer_id, .size = size, .addr = src, .event = NULL}; /* lock the node with the little peer_id first to prevent deadlock */ if (src_node->peer_id > dst_node->peer_id) { STARPU_PTHREAD_MUTEX_LOCK(&dst_node->connection_mutex); STARPU_PTHREAD_MUTEX_LOCK(&src_node->connection_mutex); } else { STARPU_PTHREAD_MUTEX_LOCK(&src_node->connection_mutex); STARPU_PTHREAD_MUTEX_LOCK(&dst_node->connection_mutex); } /* Tell source to send data to dest. */ _starpu_mp_common_send_command(src_node, STARPU_MP_COMMAND_SEND_TO_SINK, &cmd, offsetof(struct _starpu_mp_transfer_command_to_device, end)); /* Release the source as fast as possible */ STARPU_PTHREAD_MUTEX_UNLOCK(&src_node->connection_mutex); cmd.devid = src_node->peer_id; cmd.size = size; cmd.addr = dst; /* Tell dest to receive data from source. */ _starpu_mp_common_send_command(dst_node, STARPU_MP_COMMAND_RECV_FROM_SINK, &cmd, offsetof(struct _starpu_mp_transfer_command_to_device, end)); /* Wait for answer from dest to know whether transfer is finished. */ answer = _starpu_src_common_wait_command_sync(dst_node, &arg, &arg_size); STARPU_ASSERT(answer == STARPU_MP_COMMAND_ANSWER_TRANSFER_COMPLETE); /* Release the receiver when we received the acknowledgment */ STARPU_PTHREAD_MUTEX_UNLOCK(&dst_node->connection_mutex); return 0; } /* Tell the sink linked to SRC_NODE to send SIZE bytes of data pointed by SRC * to the sink linked to DST_NODE. The latter store them in DST with an asynchronous * mode. */ int _starpu_src_common_copy_sink_to_sink_async(struct _starpu_mp_node *src_node, struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size, void * event) { struct _starpu_mp_transfer_command_to_device cmd = {.devid = dst_node->peer_id, .size = size, .addr = src, .event = event}; /* lock the node with the little peer_id first to prevent deadlock */ if (src_node->peer_id > dst_node->peer_id) { STARPU_PTHREAD_MUTEX_LOCK(&dst_node->connection_mutex); STARPU_PTHREAD_MUTEX_LOCK(&src_node->connection_mutex); } else { STARPU_PTHREAD_MUTEX_LOCK(&src_node->connection_mutex); STARPU_PTHREAD_MUTEX_LOCK(&dst_node->connection_mutex); } /* For asynchronous transfers, we save information * to test is they are finished */ struct _starpu_async_channel * async_channel = event; async_channel->polling_node_sender = src_node; async_channel->polling_node_receiver = dst_node; /* Increase number of ack waited */ async_channel->starpu_mp_common_finished_receiver++; async_channel->starpu_mp_common_finished_sender++; /* Tell source to send data to dest. */ _starpu_mp_common_send_command(src_node, STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC, &cmd, offsetof(struct _starpu_mp_transfer_command_to_device, end)); STARPU_PTHREAD_MUTEX_UNLOCK(&src_node->connection_mutex); cmd.devid = src_node->peer_id; cmd.size = size; cmd.addr = dst; /* Tell dest to receive data from source. */ _starpu_mp_common_send_command(dst_node, STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC, &cmd, offsetof(struct _starpu_mp_transfer_command_to_device, end)); STARPU_PTHREAD_MUTEX_UNLOCK(&dst_node->connection_mutex); return -EAGAIN; } int _starpu_src_common_copy_data_sink_to_sink(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { if (async_channel) return _starpu_src_common_copy_sink_to_sink_async( _starpu_src_common_get_mp_node_from_memory_node(src_node), _starpu_src_common_get_mp_node_from_memory_node(dst_node), (void*) (src + src_offset), (void*) (dst + dst_offset), size, async_channel); else return _starpu_src_common_copy_sink_to_sink_sync( _starpu_src_common_get_mp_node_from_memory_node(src_node), _starpu_src_common_get_mp_node_from_memory_node(dst_node), (void*) (src + src_offset), (void*) (dst + dst_offset), size); } void _starpu_src_common_init_switch_env(unsigned this) { save_thread_env[this].current_task = starpu_task_get_current(); save_thread_env[this].current_worker = STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_key); save_thread_env[this].current_worker_set = STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_set_key); #ifdef STARPU_OPENMP save_thread_env[this].current_omp_thread = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_thread_key); save_thread_env[this].current_omp_task = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_task_key); #endif } static void _starpu_src_common_switch_env(unsigned old, unsigned new) { save_thread_env[old].current_task = starpu_task_get_current(); save_thread_env[old].current_worker = STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_key); save_thread_env[old].current_worker_set = STARPU_PTHREAD_GETSPECIFIC(_starpu_worker_set_key); #ifdef STARPU_OPENMP save_thread_env[old].current_omp_thread = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_thread_key); save_thread_env[old].current_omp_task = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_task_key); #endif _starpu_set_current_task(save_thread_env[new].current_task); STARPU_PTHREAD_SETSPECIFIC(_starpu_worker_key, save_thread_env[new].current_worker); STARPU_PTHREAD_SETSPECIFIC(_starpu_worker_set_key, save_thread_env[new].current_worker_set); #ifdef STARPU_OPENMP STARPU_PTHREAD_SETSPECIFIC(_starpu_omp_thread_key, save_thread_env[new].current_omp_thread); STARPU_PTHREAD_SETSPECIFIC(_starpu_omp_task_key, save_thread_env[new].current_omp_task); #endif } /* Send workers to the sink node */ static void _starpu_src_common_send_workers(struct _starpu_mp_node * node, int baseworkerid, int nworkers) { struct _starpu_machine_config *config = _starpu_get_machine_config(); int worker_size = sizeof(struct _starpu_worker)*nworkers; int combined_worker_size = STARPU_NMAX_COMBINEDWORKERS*sizeof(struct _starpu_combined_worker); int msg[5]; msg[0] = nworkers; msg[1] = worker_size; msg[2] = combined_worker_size; msg[3] = baseworkerid; msg[4] = starpu_worker_get_count(); STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); /* tell the sink node that we will send him all workers */ _starpu_mp_common_send_command(node, STARPU_MP_COMMAND_SYNC_WORKERS, &msg, sizeof(msg)); /* Send all worker to the sink node */ node->dt_send(node,&config->workers[baseworkerid],worker_size, NULL); /* Send all combined workers to the sink node */ node->dt_send(node, &config->combined_workers,combined_worker_size, NULL); STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); } static void _starpu_src_common_worker_internal_work(struct _starpu_worker_set * worker_set, struct _starpu_mp_node * mp_node, unsigned memnode) { int res = 0; unsigned i; struct starpu_task *tasks[worker_set->nworkers]; _starpu_may_pause(); #ifdef STARPU_SIMGRID starpu_pthread_wait_reset(&worker_set->workers[0].wait); #endif /* Test if async transfers are completed */ for (i = 0; i < worker_set->nworkers; i++) { struct starpu_task *task = worker_set->workers[i].task_transferring; /* We send all buffers to execute the task */ if (task != NULL && worker_set->workers[i].nb_buffers_transferred == worker_set->workers[i].nb_buffers_totransfer) { STARPU_RMB(); struct _starpu_job * j = _starpu_get_job_associated_to_task(task); _STARPU_TRACE_END_PROGRESS(memnode); _starpu_set_local_worker_key(&worker_set->workers[i]); _starpu_fetch_task_input_tail(task, j, &worker_set->workers[i]); /* Reset it */ worker_set->workers[i].task_transferring = NULL; j->workerid = worker_set->workers[i].workerid; /* Execute the task */ res = _starpu_src_common_execute(j, &worker_set->workers[i], mp_node); switch (res) { case 0: /* The task task has been launched with no error */ break; case -EAGAIN: _STARPU_DISP("ouch, this MP worker could not actually run task %p, putting it back...\n", tasks[i]); _starpu_push_task_to_workers(worker_set->workers[i].task_transferring); STARPU_ABORT(); continue; break; default: STARPU_ASSERT(0); } _STARPU_TRACE_START_PROGRESS(memnode); } } res |= __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); /* Handle message which have been store */ _starpu_src_common_handle_stored_async(mp_node); STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); unsigned stopped_progress = 0; /* poll the device for completed jobs.*/ while(mp_node->nt_recv_is_ready(mp_node)) { stopped_progress = 1; _STARPU_TRACE_END_PROGRESS(mp_node_memory_node(mp_node)); _starpu_src_common_recv_async(mp_node); /* Mutex is unlock in _starpu_src_common_recv_async */ STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); } if (stopped_progress) _STARPU_TRACE_START_PROGRESS(mp_node_memory_node(mp_node)); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); /* get task for each worker*/ res |= _starpu_get_multi_worker_task(worker_set->workers, tasks, worker_set->nworkers, memnode); #ifdef STARPU_SIMGRID if (!res) starpu_pthread_wait_wait(&worker_set->workers[0].wait); #endif /*if at least one worker have pop a task*/ if(res != 0) { for(i=0; inworkers; i++) { if(tasks[i] != NULL) { struct _starpu_worker *worker = &worker_set->workers[i]; _STARPU_TRACE_END_PROGRESS(worker->memory_node); _starpu_set_local_worker_key(worker); int ret = _starpu_fetch_task_input(tasks[i], _starpu_get_job_associated_to_task(tasks[i]), 1); STARPU_ASSERT(!ret); _STARPU_TRACE_START_PROGRESS(worker->memory_node); } } /* Handle message which have been store */ _starpu_src_common_handle_stored_async(mp_node); } } /* Function looping on the source node */ void _starpu_src_common_workers_set(struct _starpu_worker_set * worker_set, int ndevices, struct _starpu_mp_node ** mp_node) { unsigned memnode[ndevices]; int device; for (device = 0; device < ndevices; device++) memnode[device] = worker_set[device].workers[0].memory_node; for (device = 0; device < ndevices; device++) { struct _starpu_worker_set * device_worker_set = &worker_set[device]; struct _starpu_worker *baseworker = &device_worker_set->workers[0]; struct _starpu_machine_config *config = baseworker->config; unsigned baseworkerid = baseworker - config->workers; _starpu_src_common_send_workers(mp_node[device], baseworkerid, worker_set[device].nworkers); } for (device = 0; device < ndevices; device++) { struct _starpu_worker_set * device_worker_set = &worker_set[device]; struct _starpu_worker *worker0 = &device_worker_set->workers[0]; STARPU_PTHREAD_MUTEX_LOCK(&worker0->mutex); worker0->status = STATUS_UNKNOWN; STARPU_PTHREAD_MUTEX_UNLOCK(&worker0->mutex); } for (device = 0; device < ndevices; device++) { struct _starpu_worker_set * device_worker_set = &worker_set[device]; /* tell the main thread that this one is ready */ STARPU_PTHREAD_MUTEX_LOCK(&device_worker_set->mutex); device_worker_set->set_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&device_worker_set->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&device_worker_set->mutex); _STARPU_TRACE_START_PROGRESS(memnode[device]); } /*main loop*/ while (_starpu_machine_is_running()) { for (device = 0; device < ndevices ; device++) { if (ndevices > 1) _starpu_src_common_switch_env(((device-1)+ndevices)%ndevices, device); _starpu_src_common_worker_internal_work(&worker_set[device], mp_node[device], memnode[device]); } } for (device = 0; device < ndevices; device++) { _STARPU_TRACE_END_PROGRESS(memnode[device]); _starpu_datawizard_handle_all_pending_node_data_requests(memnode[device]); } /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ for (device = 0; device < ndevices; device++) _starpu_free_all_automatically_allocated_buffers(memnode[device]); } starpu-1.4.9+dfsg/src/drivers/mp_common/source_common.h000066400000000000000000000107371507764646700232720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SOURCE_COMMON_H__ #define __SOURCE_COMMON_H__ /** @file */ #ifdef STARPU_USE_MP #include #include #include #pragma GCC visibility push(hidden) /* Array of structures containing all the information useful to send * and receive information with devices */ #ifdef STARPU_USE_MPI_MASTER_SLAVE extern struct _starpu_mp_node *_starpu_src_nodes[STARPU_NARCH][STARPU_MAXMPIDEVS]; #endif #ifdef STARPU_USE_TCPIP_MASTER_SLAVE extern struct _starpu_mp_node *_starpu_src_nodes[STARPU_NARCH][STARPU_MAXTCPIPDEVS]; #endif int _starpu_src_common_store_message(struct _starpu_mp_node *node, void * arg, int arg_size, enum _starpu_mp_command answer); enum _starpu_mp_command _starpu_src_common_wait_completed_execution(struct _starpu_mp_node *node, int devid, void **arg, int * arg_size); int _starpu_src_common_sink_nbcores(struct _starpu_mp_node *node, int *buf); int _starpu_src_common_lookup(struct _starpu_mp_node *node, void (**func_ptr)(void), const char *func_name); starpu_cpu_func_t _starpu_src_common_get_cpu_func_from_codelet(struct starpu_codelet *cl, unsigned nimpl); void(* _starpu_src_common_get_cpu_func_from_job(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *j))(void); struct _starpu_mp_node *_starpu_src_common_get_mp_node_from_memory_node(int memory_node); uintptr_t _starpu_src_common_allocate(unsigned dst_node, size_t size, int flags); void _starpu_src_common_free(unsigned dst_node, uintptr_t addr, size_t size, int flags); uintptr_t _starpu_src_common_map(unsigned dst_node, uintptr_t addr, size_t size); void _starpu_src_common_unmap(unsigned dst_node, uintptr_t addr, size_t size); int _starpu_src_common_execute_kernel(struct _starpu_mp_node *node, void (*kernel)(void), unsigned coreid, enum starpu_codelet_type type, int is_parallel_task, int cb_workerid, starpu_data_handle_t *handles, void **interfaces, unsigned nb_interfaces, void *cl_arg, size_t cl_arg_size, int detached); int _starpu_src_common_copy_host_to_sink_sync(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size); int _starpu_src_common_copy_sink_to_host_sync(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size); int _starpu_src_common_copy_sink_to_sink_sync(struct _starpu_mp_node *src_node, struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size); int _starpu_src_common_copy_host_to_sink_async(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size, void *event); int _starpu_src_common_copy_sink_to_host_async(struct _starpu_mp_node *mp_node, void *src, void *dst, size_t size, void *event); int _starpu_src_common_copy_sink_to_sink_async(struct _starpu_mp_node *src_node, struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size, void *event); int _starpu_src_common_copy_data_host_to_sink(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel); int _starpu_src_common_copy_data_sink_to_host(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel); int _starpu_src_common_copy_data_sink_to_sink(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel); void _starpu_src_common_init_switch_env(unsigned this); void _starpu_src_common_workers_set(struct _starpu_worker_set * worker_set, int ndevices, struct _starpu_mp_node ** mp_node); void _starpu_src_common_deinit(void); #pragma GCC visibility pop #endif /* STARPU_USE_MP */ #endif /* __SOURCE_COMMON_H__ */ starpu-1.4.9+dfsg/src/drivers/mpi/000077500000000000000000000000001507764646700170425ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/mpi/driver_mpi_common.c000066400000000000000000000452441507764646700227270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #define NITER 32 #define SIZE_BANDWIDTH (1024*1024) #define DRIVER_MPI_MASTER_NODE_DEFAULT 0 static int mpi_initialized = 0; static int extern_initialized = 0; static int src_node_id; int _starpu_mpi_common_multiple_thread; /* (For a given datawizard we may have several starpu_interface_copy calls) */ LIST_TYPE(_starpu_mpi_ms_event_request, MPI_Request request; ); struct _starpu_mpi_ms_async_event { int is_sender; struct _starpu_mpi_ms_event_request_list * requests; }; static inline struct _starpu_mpi_ms_async_event *_starpu_mpi_ms_async_event(union _starpu_async_channel_event *_event) { struct _starpu_mpi_ms_async_event *event; STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); event = (void *) _event; return event; } /* This lets the user decide which MPI rank is to be the master. Usually it's just rank 0 */ static void _starpu_mpi_set_src_node_id() { int node_id = starpu_getenv_number("STARPU_MPI_MASTER_NODE"); if (node_id != -1) { int nb_proc, id_proc; MPI_Comm_size(MPI_COMM_WORLD, &nb_proc); MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); if (node_id < nb_proc) { src_node_id = node_id; return; } else if (id_proc == DRIVER_MPI_MASTER_NODE_DEFAULT) { /* Only one node prints the error message. */ _STARPU_MSG("The node (%d) you specify to be the master is " "greater than the total number of nodes (%d). " "StarPU will use node %d.\n", node_id, nb_proc, DRIVER_MPI_MASTER_NODE_DEFAULT); } } /* Node by default. */ src_node_id = DRIVER_MPI_MASTER_NODE_DEFAULT; } int _starpu_mpi_common_mp_init() { //Here we supposed the programmer called two times starpu_init. if (mpi_initialized) return -ENODEV; mpi_initialized = 1; _starpu_mpi_common_multiple_thread = starpu_getenv_number_default("STARPU_MPI_MS_MULTIPLE_THREAD", 0); if (MPI_Initialized(&extern_initialized) != MPI_SUCCESS) STARPU_ABORT_MSG("Cannot check if MPI is initialized or not !"); //Here MPI_Init or MPI_Init_thread is already called if (!extern_initialized) { int required = _starpu_mpi_common_multiple_thread ? MPI_THREAD_MULTIPLE : MPI_THREAD_FUNNELED; int thread_support; if (MPI_Init_thread(_starpu_get_argc(), _starpu_get_argv(), required, &thread_support) != MPI_SUCCESS) { STARPU_ABORT_MSG("Cannot Initialize MPI !"); } if (thread_support != required) { if (required == MPI_THREAD_MULTIPLE) _STARPU_DISP("MPI doesn't support MPI_THREAD_MULTIPLE option. MPI Master-Slave can have problems if multiple slaves are launched. \n"); if (required == MPI_THREAD_FUNNELED) _STARPU_DISP("MPI doesn't support MPI_THREAD_FUNNELED option. Many errors can occur. \n"); } } /* Find which node is the master */ _starpu_mpi_set_src_node_id(); /* In MPI case we look at the rank to know if we are a sink */ if (!_starpu_mpi_common_is_src_node()) setenv("STARPU_SINK", "STARPU_MPI_MS", 1); return 1; } void _starpu_mpi_common_mp_deinit() { if (!extern_initialized) MPI_Finalize(); } int _starpu_mpi_common_is_src_node() { int id_proc; MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); return id_proc == src_node_id; } int _starpu_mpi_common_get_src_node() { return src_node_id; } int _starpu_mpi_common_is_mp_initialized() { return mpi_initialized; } /* common parts to initialize a source or a sink node */ void _starpu_mpi_common_mp_initialize_src_sink(struct _starpu_mp_node *node) { struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology; int nmpicores = starpu_getenv_number("STARPU_NMPIMSTHREADS"); if (nmpicores == -1) { int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; node->nb_cores = topology->nusedpus / nhyperthreads; } else node->nb_cores = nmpicores; } int _starpu_mpi_common_recv_is_ready(const struct _starpu_mp_node *mp_node) { int res, source; int flag = 0; int id_proc; MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); if (id_proc == src_node_id) { /* Source has mp_node defined */ source = mp_node->mp_connection.mpi_remote_nodeid; } else { /* Sink can have sink to sink message */ source = MPI_ANY_SOURCE; } res = MPI_Iprobe(source, SYNC_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot test if we received a message !"); return flag; } int _starpu_mpi_common_notif_recv_is_ready(const struct _starpu_mp_node *mp_node) { int res, source; int flag = 0; int id_proc; MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); if (id_proc == src_node_id) { /* Source has mp_node defined */ source = mp_node->mp_connection.mpi_remote_nodeid; } else { /* Sink can have sink to sink message */ source = MPI_ANY_SOURCE; } res = MPI_Iprobe(source, NOTIF_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot test if we received a message !"); return flag; } int _starpu_mpi_common_notif_send_is_ready(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED) { return 1; } static void __starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int dst_devid, void *msg, int len, void * event, int notif); static void __starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int src_devid, void *msg, int len, void * event, int notif); /* SEND to source node */ static void __starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif) { //_STARPU_MSG("envoi %d B to %d\n", len, node->mp_connection.mpi_remote_nodeid); __starpu_mpi_common_send_to_device(node, node->mp_connection.mpi_remote_nodeid, msg, len, event, notif); } void _starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event) { __starpu_mpi_common_send(node, msg, len, event, 0); } void _starpu_mpi_common_mp_send(const struct _starpu_mp_node *node, void *msg, int len) { __starpu_mpi_common_send(node, msg, len, NULL, 0); } void _starpu_mpi_common_nt_send(const struct _starpu_mp_node *node, void *msg, int len) { __starpu_mpi_common_send(node, msg, len, NULL, 1); } /* RECV to source node */ static void __starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif) { //_STARPU_MSG("recv %d B from %d in %p\n", len, node->mp_connection.mpi_remote_nodeid, msg); __starpu_mpi_common_recv_from_device(node, node->mp_connection.mpi_remote_nodeid, msg, len, event, notif); } void _starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event) { __starpu_mpi_common_recv(node, msg, len, event, 0); } void _starpu_mpi_common_mp_recv(const struct _starpu_mp_node *node, void *msg, int len) { __starpu_mpi_common_recv(node, msg, len, NULL, 0); } void _starpu_mpi_common_nt_recv(const struct _starpu_mp_node *node, void *msg, int len) { __starpu_mpi_common_recv(node, msg, len, NULL, 1); } /* SEND to any node */ static void __starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int dst_devid, void *msg, int len, void * event, int notif) { int res; //_STARPU_MSG("S_to_D send %d bytes from %d from %p\n", len, dst_devid, msg); if (event) { /* Asynchronous send */ struct _starpu_async_channel * channel = event; struct _starpu_mpi_ms_async_event *mpi_ms_event = _starpu_mpi_ms_async_event(&channel->event); mpi_ms_event->is_sender = 1; /* call by sink, we need to initialize some parts, for host it's done in data_request.c */ if (channel->node_ops == NULL) mpi_ms_event->requests = NULL; /* Initialize the list */ if (mpi_ms_event->requests == NULL) mpi_ms_event->requests = _starpu_mpi_ms_event_request_list_new(); struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new(); res = MPI_Isend(msg, len, MPI_BYTE, dst_devid, ASYNC_TAG, MPI_COMM_WORLD, &req->request); channel->starpu_mp_common_finished_receiver++; channel->starpu_mp_common_finished_sender++; _starpu_mpi_ms_event_request_list_push_back(mpi_ms_event->requests, req); } else { /* Synchronous send */ /* Send commands */ if (!notif) res = MPI_Send(msg, len, MPI_BYTE, dst_devid, SYNC_TAG, MPI_COMM_WORLD); /* Send notifications */ else res = MPI_Send(msg, len, MPI_BYTE, dst_devid, NOTIF_TAG, MPI_COMM_WORLD); } STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len); } void _starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int dst_devid, void *msg, int len, void * event) { __starpu_mpi_common_send_to_device(node, dst_devid, msg, len, event, 0); } /* RECV to any node */ static void __starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int src_devid, void *msg, int len, void * event, int notif) { int res; //_STARPU_MSG("R_to_D nop recv %d bytes from %d\n", len, src_devid); if (event) { /* Asynchronous recv */ struct _starpu_async_channel * channel = event; struct _starpu_mpi_ms_async_event *mpi_ms_event = _starpu_mpi_ms_async_event(&channel->event); mpi_ms_event->is_sender = 0; /* call by sink, we need to initialize some parts, for host it's done in data_request.c */ if (channel->node_ops == NULL) mpi_ms_event->requests = NULL; /* Initialize the list */ if (mpi_ms_event->requests == NULL) mpi_ms_event->requests = _starpu_mpi_ms_event_request_list_new(); struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_new(); res = MPI_Irecv(msg, len, MPI_BYTE, src_devid, ASYNC_TAG, MPI_COMM_WORLD, &req->request); STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot Ireceive a msg with a size of %d Bytes !", len); channel->starpu_mp_common_finished_receiver++; channel->starpu_mp_common_finished_sender++; _starpu_mpi_ms_event_request_list_push_back(mpi_ms_event->requests, req); } else { /* Synchronous recv */ MPI_Status s; /* Send commands */ if (!notif) res = MPI_Recv(msg, len, MPI_BYTE, src_devid, SYNC_TAG, MPI_COMM_WORLD, &s); else res = MPI_Recv(msg, len, MPI_BYTE, src_devid, NOTIF_TAG, MPI_COMM_WORLD, &s); int num_expected; MPI_Get_count(&s, MPI_BYTE, &num_expected); STARPU_ASSERT_MSG(num_expected == len, "MPI Master/Slave received a msg with a size of %d Bytes (expected %d Bytes) !", num_expected, len); STARPU_ASSERT_MSG(res == MPI_SUCCESS, "MPI Master/Slave cannot receive a msg with a size of %d Bytes !", len); } } void _starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int src_devid, void *msg, int len, void * event) { __starpu_mpi_common_recv_from_device(node, src_devid, msg, len, event, 0); } static void _starpu_mpi_common_polling_node(struct _starpu_mp_node * node) { /* poll the asynchronous messages.*/ if (node != NULL) { STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); while(node->nt_recv_is_ready(node)) { enum _starpu_mp_command answer; void *arg; int arg_size; answer = _starpu_nt_common_recv_command(node, &arg, &arg_size); if(!_starpu_src_common_store_message(node,arg,arg_size,answer)) { _STARPU_ERROR("incorrect command: unknown command or sync command"); } } STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); } } /* - In device to device communications, the first ack received by host * is considered as the sender (but it cannot be, in fact, the sender) */ unsigned int _starpu_mpi_common_test_event(struct _starpu_async_channel * event) { struct _starpu_mpi_ms_async_event *mpi_ms_event = _starpu_mpi_ms_async_event(&event->event); if (mpi_ms_event->requests != NULL && !_starpu_mpi_ms_event_request_list_empty(mpi_ms_event->requests)) { struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_list_begin(mpi_ms_event->requests); struct _starpu_mpi_ms_event_request * req_next; while (req != _starpu_mpi_ms_event_request_list_end(mpi_ms_event->requests)) { req_next = _starpu_mpi_ms_event_request_list_next(req); int flag = 0; MPI_Test(&req->request, &flag, MPI_STATUS_IGNORE); if (flag) { _starpu_mpi_ms_event_request_list_erase(mpi_ms_event->requests, req); _starpu_mpi_ms_event_request_delete(req); if (mpi_ms_event->is_sender) event->starpu_mp_common_finished_sender--; else event->starpu_mp_common_finished_receiver--; } req = req_next; } /* When the list is empty, we finished to wait each request */ if (_starpu_mpi_ms_event_request_list_empty(mpi_ms_event->requests)) { /* Destroy the list */ _starpu_mpi_ms_event_request_list_delete(mpi_ms_event->requests); mpi_ms_event->requests = NULL; } } _starpu_mpi_common_polling_node(event->polling_node_sender); _starpu_mpi_common_polling_node(event->polling_node_receiver); return !event->starpu_mp_common_finished_sender && !event->starpu_mp_common_finished_receiver; } /* - In device to device communications, the first ack received by host * is considered as the sender (but it cannot be, in fact, the sender) */ /* Only used at starpu_shutdown */ void _starpu_mpi_common_wait_request_completion(struct _starpu_async_channel * event) { struct _starpu_mpi_ms_async_event *mpi_ms_event = _starpu_mpi_ms_async_event(&event->event); if (mpi_ms_event->requests != NULL && !_starpu_mpi_ms_event_request_list_empty(mpi_ms_event->requests)) { struct _starpu_mpi_ms_event_request * req = _starpu_mpi_ms_event_request_list_begin(mpi_ms_event->requests); struct _starpu_mpi_ms_event_request * req_next; while (req != _starpu_mpi_ms_event_request_list_end(mpi_ms_event->requests)) { req_next = _starpu_mpi_ms_event_request_list_next(req); MPI_Wait(&req->request, MPI_STATUS_IGNORE); _starpu_mpi_ms_event_request_list_erase(mpi_ms_event->requests, req); _starpu_mpi_ms_event_request_delete(req); req = req_next; if (mpi_ms_event->is_sender) event->starpu_mp_common_finished_sender--; else event->starpu_mp_common_finished_receiver--; } STARPU_ASSERT_MSG(_starpu_mpi_ms_event_request_list_empty(mpi_ms_event->requests), "MPI Request list is not empty after a wait_event !"); /* Destroy the list */ _starpu_mpi_ms_event_request_list_delete(mpi_ms_event->requests); mpi_ms_event->requests = NULL; } //incoming ack from devices while(event->starpu_mp_common_finished_sender > 0 || event->starpu_mp_common_finished_receiver > 0) { _starpu_mpi_common_polling_node(event->polling_node_sender); _starpu_mpi_common_polling_node(event->polling_node_receiver); } } void _starpu_mpi_common_barrier(void) { int ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier failed"); } /* Compute bandwidth and latency between source and sink nodes * Source node has to have the entire set of times at the end */ void _starpu_mpi_common_measure_bandwidth_latency(double timing_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS]) { int ret; unsigned iter; int nb_proc, id_proc; MPI_Comm_rank(MPI_COMM_WORLD, &id_proc); MPI_Comm_size(MPI_COMM_WORLD, &nb_proc); char * buf; _STARPU_MALLOC(buf, SIZE_BANDWIDTH); memset(buf, 0, SIZE_BANDWIDTH); int sender, receiver; for(sender = 0; sender < nb_proc; sender++) { for(receiver = 0; receiver < nb_proc; receiver++) { //Node can't be a sender and a receiver if(sender == receiver) continue; if (src_node_id == id_proc) _STARPU_DISP("measuring from %d to %d\n", sender, receiver); ret = MPI_Barrier(MPI_COMM_WORLD); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier failed"); if(id_proc == sender) { double start, end; /* measure bandwidth sender to receiver */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { ret = MPI_Send(buf, SIZE_BANDWIDTH, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !"); } end = starpu_timing_now(); timing_dtod[sender][receiver] = (end - start)/NITER/SIZE_BANDWIDTH; /* measure latency sender to receiver */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { ret = MPI_Send(buf, 1, MPI_BYTE, receiver, 42, MPI_COMM_WORLD); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Latency of MPI Master/Slave cannot be measured !"); } end = starpu_timing_now(); latency_dtod[sender][receiver] = (end - start)/NITER; } if (id_proc == receiver) { /* measure bandwidth sender to receiver*/ for (iter = 0; iter < NITER; iter++) { ret = MPI_Recv(buf, SIZE_BANDWIDTH, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !"); } /* measure latency sender to receiver */ for (iter = 0; iter < NITER; iter++) { ret = MPI_Recv(buf, 1, MPI_BYTE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "Bandwidth of MPI Master/Slave cannot be measured !"); } } } /* When a sender finished its work, it has to send its results to the master */ /* Sender doesn't need to send to itself its data */ if (sender == src_node_id) goto print; /* if we are the sender, we send the data */ if (sender == id_proc) { MPI_Send(timing_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD); MPI_Send(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, src_node_id, 42, MPI_COMM_WORLD); } /* the master node receives the data */ if (src_node_id == id_proc) { MPI_Recv(timing_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(latency_dtod[sender], STARPU_MAXMPIDEVS, MPI_DOUBLE, sender, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } print: if (src_node_id == id_proc) { for(receiver = 0; receiver < nb_proc; receiver++) { if(sender == receiver) continue; _STARPU_DISP("BANDWIDTH %d -> %d %.0fMB/s %.2fus\n", sender, receiver, 1/timing_dtod[sender][receiver], latency_dtod[sender][receiver]); } } } free(buf); } starpu-1.4.9+dfsg/src/drivers/mpi/driver_mpi_common.h000066400000000000000000000054461507764646700227340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_MPI_COMMON_H__ #define __DRIVER_MPI_COMMON_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) #ifdef STARPU_USE_MPI_MASTER_SLAVE #define SYNC_TAG 44 #define ASYNC_TAG 45 #define NOTIF_TAG 46 extern int _starpu_mpi_common_multiple_thread; int _starpu_mpi_common_mp_init(); void _starpu_mpi_common_mp_deinit(); int _starpu_mpi_common_is_src_node(); int _starpu_mpi_common_get_src_node(); int _starpu_mpi_common_is_mp_initialized(); int _starpu_mpi_common_recv_is_ready(const struct _starpu_mp_node *mp_node); int _starpu_mpi_common_notif_recv_is_ready(const struct _starpu_mp_node *mp_node); int _starpu_mpi_common_notif_send_is_ready(const struct _starpu_mp_node *mp_node); void _starpu_mpi_common_mp_initialize_src_sink(struct _starpu_mp_node *node); void _starpu_mpi_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event); void _starpu_mpi_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event); void _starpu_mpi_common_mp_send(const struct _starpu_mp_node *node, void *msg, int len); void _starpu_mpi_common_mp_recv(const struct _starpu_mp_node *node, void *msg, int len); void _starpu_mpi_common_nt_send(const struct _starpu_mp_node *node, void *msg, int len); void _starpu_mpi_common_nt_recv(const struct _starpu_mp_node *node, void *msg, int len); void _starpu_mpi_common_recv_from_device(const struct _starpu_mp_node *node, int src_devid, void *msg, int len, void * event); void _starpu_mpi_common_send_to_device(const struct _starpu_mp_node *node, int dst_devid, void *msg, int len, void * event); unsigned int _starpu_mpi_common_test_event(struct _starpu_async_channel * event); void _starpu_mpi_common_wait_request_completion(struct _starpu_async_channel * event); void _starpu_mpi_common_barrier(void); void _starpu_mpi_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS], double latency_dtod[STARPU_MAXMPIDEVS][STARPU_MAXMPIDEVS]); #endif /* STARPU_USE_MPI_MASTER_SLAVE */ #pragma GCC visibility pop #endif /* __DRIVER_MPI_COMMON_H__ */ starpu-1.4.9+dfsg/src/drivers/mpi/driver_mpi_init.c000066400000000000000000000027711507764646700224000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static struct _starpu_driver_info driver_info = { .name_upper = "MPI_MS", .name_var = "MPI_MS", .name_lower = "mpi_ms", .memory_kind = STARPU_MPI_MS_RAM, .alpha = 1.0f, #ifdef STARPU_USE_MPI_MASTER_SLAVE .run_worker = _starpu_mpi_src_worker, .init_worker_binding = _starpu_mpi_init_worker_binding, .init_worker_memory = _starpu_mpi_init_worker_memory, #endif }; static struct _starpu_memory_driver_info memory_driver_info = { .name_upper = "MPI_MS", .worker_archtype = STARPU_MPI_MS_WORKER, #ifdef STARPU_USE_MPI_MASTER_SLAVE .ops = &_starpu_driver_mpi_ms_node_ops, #endif }; void _starpu_mpi_ms_preinit(void) { _starpu_driver_info_register(STARPU_MPI_MS_WORKER, &driver_info); _starpu_memory_driver_info_register(STARPU_MPI_MS_RAM, &memory_driver_info); } starpu-1.4.9+dfsg/src/drivers/mpi/driver_mpi_sink.c000066400000000000000000000022451507764646700223750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "driver_mpi_sink.h" #include "driver_mpi_source.h" #include "driver_mpi_common.h" void _starpu_mpi_sink_init(struct _starpu_mp_node *node) { _starpu_mpi_common_mp_initialize_src_sink(node); _STARPU_MALLOC(node->thread_table, sizeof(starpu_pthread_t)*node->nb_cores); //TODO } void _starpu_mpi_sink_bind_thread(const struct _starpu_mp_node *mp_node, int coreid, int *core_table, int nb_core) { //TODO (void)mp_node; (void)coreid; (void)core_table; (void)nb_core; } starpu-1.4.9+dfsg/src/drivers/mpi/driver_mpi_sink.h000066400000000000000000000022411507764646700223760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_MPI_SINK_H__ #define __DRIVER_MPI_SINK_H__ /** @file */ #include #pragma GCC visibility push(hidden) #ifdef STARPU_USE_MPI_MASTER_SLAVE void _starpu_mpi_sink_init(struct _starpu_mp_node *node); void _starpu_mpi_sink_bind_thread(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, int coreid, int * core_table, int nb_core); #endif /* STARPU_USE_MPI_MASTER_SLAVE */ #pragma GCC visibility pop #endif /* __DRIVER_MPI_SINK_H__ */ starpu-1.4.9+dfsg/src/drivers/mpi/driver_mpi_source.c000066400000000000000000000305071507764646700227330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #ifdef STARPU_USE_MPI_MASTER_SLAVE static unsigned mpi_bindid_init[STARPU_MAXMPIDEVS] = { }; static unsigned mpi_bindid[STARPU_MAXMPIDEVS]; static unsigned mpi_memory_init[STARPU_MAXMPIDEVS] = { }; static unsigned mpi_memory_nodes[STARPU_MAXMPIDEVS]; static struct _starpu_worker_set mpi_worker_set[STARPU_MAXMPIDEVS]; #endif struct _starpu_mp_node *_starpu_mpi_ms_src_get_actual_thread_mp_node() { struct _starpu_worker *actual_worker = _starpu_get_local_worker_key(); STARPU_ASSERT(actual_worker); int devid = actual_worker->devid; STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXMPIDEVS); return _starpu_src_nodes[STARPU_MPI_MS_WORKER][devid]; } /* Configure one MPI slaves for run */ static void __starpu_init_mpi_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, unsigned mpi_idx) { int nhwcores; _starpu_src_common_sink_nbcores(_starpu_src_nodes[STARPU_MPI_MS_WORKER][mpi_idx], &nhwcores); STARPU_ASSERT(mpi_idx < STARPU_NMAXDEVS); topology->nhwworker[STARPU_MPI_MS_WORKER][mpi_idx] = nhwcores; int nmpicores; nmpicores = starpu_getenv_number("STARPU_NMPIMSTHREADS"); _starpu_topology_check_ndevices(&nmpicores, nhwcores, 0, INT_MAX, 0, "STARPU_NMPIMSTHREADS", "MPI cores", ""); mpi_worker_set[mpi_idx].workers = &config->workers[topology->nworkers]; mpi_worker_set[mpi_idx].nworkers = nmpicores; _starpu_src_nodes[STARPU_MPI_MS_WORKER][mpi_idx]->baseworkerid = topology->nworkers; _starpu_topology_configure_workers(topology, config, STARPU_MPI_MS_WORKER, mpi_idx, mpi_idx, 0, 0, nmpicores, 1, &mpi_worker_set[mpi_idx], _starpu_mpi_common_multiple_thread ? NULL : mpi_worker_set); } /* Determine which devices we will use */ void _starpu_init_mpi_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, struct starpu_conf *user_conf, int no_mp_config) { int i; /* Discover and configure the mp topology. That means: * - discover the number of mp nodes; * - initialize each discovered node; * - discover the local topology (number of PUs/devices) of each node; * - configure the workers accordingly. */ for (i = 0; i < (int) (sizeof(mpi_worker_set)/sizeof(mpi_worker_set[0])); i++) mpi_worker_set[i].workers = NULL; int nmpims = user_conf->nmpi_ms; if (nmpims != 0) { /* Discover and initialize the number of MPI nodes through the mp * infrastructure. */ unsigned nhwmpidevices = _starpu_mpi_src_get_device_count(); if (nmpims == -1) /* Nothing was specified, so let's use the number of * detected mpi devices. ! */ nmpims = nhwmpidevices; else { if ((unsigned) nmpims > nhwmpidevices) { /* The user requires more MPI devices than there is available */ _STARPU_MSG("# Warning: %d MPI Master-Slave devices requested. Only %u available.\n", nmpims, nhwmpidevices); nmpims = nhwmpidevices; } /* Let's make sure this value is OK. */ if (nmpims > STARPU_MAXMPIDEVS) { _STARPU_DISP("Warning: %d MPI MS devices requested. Only %d enabled. Use configure option --enable-maxmpidev=xxx to update the maximum value of supported MPI MS devices.\n", nmpims, STARPU_MAXMPIDEVS); nmpims = STARPU_MAXMPIDEVS; } } } topology->ndevices[STARPU_MPI_MS_WORKER] = nmpims; /* if user don't want to use MPI slaves, we close the slave processes */ if (no_mp_config && topology->ndevices[STARPU_MPI_MS_WORKER] == 0) { _starpu_mpi_common_mp_deinit(); exit(0); } if (!no_mp_config) { for (i = 0; i < nmpims; i++) _starpu_src_nodes[STARPU_MPI_MS_WORKER][i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i); for (i = 0; i < nmpims; i++) __starpu_init_mpi_config(topology, config, i); } } /* Bind the driver on a CPU core */ void _starpu_mpi_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { /* Perhaps the worker has some "favourite" bindings */ unsigned *preferred_binding = NULL; unsigned npreferred = 0; unsigned devid = workerarg->devid; if (mpi_bindid_init[devid]) { } else { mpi_bindid_init[devid] = 1; if (_starpu_mpi_common_multiple_thread || devid == 0) mpi_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); else mpi_bindid[devid] = mpi_bindid[0]; } } /* Set up memory and buses */ void _starpu_mpi_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned memory_node = -1; unsigned devid = workerarg->devid; unsigned numa, devid2; if (mpi_memory_init[devid]) { memory_node = mpi_memory_nodes[devid]; } else { mpi_memory_init[devid] = 1; memory_node = mpi_memory_nodes[devid] = _starpu_memory_node_register(STARPU_MPI_MS_RAM, devid); _starpu_memory_node_set_mapped(memory_node); for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) { _starpu_register_bus(numa, memory_node); _starpu_register_bus(memory_node, numa); } for (devid2 = 0; devid2 < STARPU_MAXMPIDEVS; devid2++) { if (mpi_memory_init[devid2]) { _starpu_register_bus(mpi_memory_nodes[devid], mpi_memory_nodes[devid2]); _starpu_register_bus(mpi_memory_nodes[devid2], mpi_memory_nodes[devid]); } } } //This worker can manage transfers on NUMA nodes for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa); _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node); if (!_starpu_mpi_common_multiple_thread) { /* MPI driver thread can manage all slave memories if we disable the MPI multiple thread */ int findworker; for (findworker = 0; findworker < workerarg->workerid; findworker++) { struct _starpu_worker *findworkerarg = &config->workers[findworker]; if (findworkerarg->arch == STARPU_MPI_MS_WORKER) { _starpu_worker_drives_memory_node(workerarg, findworkerarg->memory_node); _starpu_worker_drives_memory_node(findworkerarg, memory_node); } } } workerarg->bindid = mpi_bindid[devid]; _starpu_memory_node_add_nworkers(memory_node); workerarg->memory_node = memory_node; } static void _starpu_deinit_mpi_node(int devid) { _starpu_mp_common_send_command(_starpu_src_nodes[STARPU_MPI_MS_WORKER][devid], STARPU_MP_COMMAND_EXIT, NULL, 0); _starpu_mp_common_node_destroy(_starpu_src_nodes[STARPU_MPI_MS_WORKER][devid]); } void _starpu_deinit_mpi_config(struct _starpu_machine_config *config) { struct _starpu_machine_topology *topology = &config->topology; unsigned i; for (i = 0; i < topology->ndevices[STARPU_MPI_MS_WORKER]; i++) _starpu_deinit_mpi_node(i); } void _starpu_mpi_source_init(struct _starpu_mp_node *node) { _starpu_mpi_common_mp_initialize_src_sink(node); //TODO } void _starpu_mpi_source_deinit(struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED) { } unsigned _starpu_mpi_src_get_device_count() { int nb_mpi_devices; if (!_starpu_mpi_common_is_mp_initialized()) return 0; MPI_Comm_size(MPI_COMM_WORLD, &nb_mpi_devices); //Remove one for master nb_mpi_devices = nb_mpi_devices - 1; return nb_mpi_devices; } void *_starpu_mpi_src_worker(void *arg) { struct _starpu_worker *worker0 = arg; struct _starpu_worker_set *set = worker0->set; struct _starpu_worker_set *worker_set_mpi = set; int nbsinknodes = _starpu_mpi_common_multiple_thread ? 1 : _starpu_mpi_src_get_device_count(); int workersetnum; for (workersetnum = 0; workersetnum < nbsinknodes; workersetnum++) { struct _starpu_worker_set * worker_set = &worker_set_mpi[workersetnum]; /* As all workers of a set share common data, we just use the first * one for initializing the following stuffs. */ struct _starpu_worker *baseworker = &worker_set->workers[0]; struct _starpu_machine_config *config = baseworker->config; unsigned baseworkerid = baseworker - config->workers; unsigned devid = baseworker->devid; unsigned i; /* unsigned memnode = baseworker->memory_node; */ _starpu_driver_start(baseworker, STARPU_CPU_WORKER, 0); #ifdef STARPU_USE_FXT for (i = 1; i < worker_set->nworkers; i++) _starpu_worker_start(&worker_set->workers[i], STARPU_MPI_MS_WORKER, 0); #endif // Current task for a thread managing a worker set has no sense. _starpu_set_current_task(NULL); for (i = 0; i < config->topology.nworker[STARPU_MPI_MS_WORKER][devid]; i++) { struct _starpu_worker *worker = &config->workers[baseworkerid+i]; snprintf(worker->name, sizeof(worker->name), "MPI_MS %u core %u", devid, i); snprintf(worker->short_name, sizeof(worker->short_name), "MPI_MS %u.%u", devid, i); } { char thread_name[16]; if (_starpu_mpi_common_multiple_thread) snprintf(thread_name, sizeof(thread_name), "MPI_MS %u", devid); else snprintf(thread_name, sizeof(thread_name), "MPI_MS"); starpu_pthread_setname(thread_name); } for (i = 0; i < worker_set->nworkers; i++) { struct _starpu_worker *worker = &worker_set->workers[i]; _STARPU_TRACE_WORKER_INIT_END(worker->workerid); } _starpu_src_common_init_switch_env(workersetnum); } /* for */ _starpu_src_common_workers_set(worker_set_mpi, nbsinknodes, &_starpu_src_nodes[STARPU_MPI_MS_WORKER][worker_set_mpi->workers[0].devid]); return NULL; } static int _starpu_mpi_is_direct_access_supported(unsigned node, unsigned handling_node) { (void) node; enum starpu_node_kind kind = starpu_node_get_kind(handling_node); return (kind == STARPU_MPI_MS_RAM); } static uintptr_t _starpu_mpi_map(uintptr_t src, size_t src_offset, unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, size_t size, int *ret) { uintptr_t map_addr = _starpu_src_common_map(dst_node, src+src_offset, size); if(map_addr == 0) { *ret=-ENOMEM; } else { *ret = 0; } return map_addr; } static int _starpu_mpi_unmap(uintptr_t src STARPU_ATTRIBUTE_UNUSED, size_t src_offset STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, uintptr_t dst, unsigned dst_node, size_t size) { _starpu_src_common_unmap(dst_node, dst, size); return 0; } static int _starpu_mpi_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) { (void) src; (void) src_offset; (void) src_node; (void) dst; (void) dst_offset; (void) dst_node; (void) size; /* Memory mappings are cache-coherent */ return 0; } struct _starpu_node_ops _starpu_driver_mpi_ms_node_ops = { .name = "mpi driver", .malloc_on_node = _starpu_src_common_allocate, .free_on_node = _starpu_src_common_free, .is_direct_access_supported = _starpu_mpi_is_direct_access_supported, .copy_interface_to[STARPU_CPU_RAM] = _starpu_copy_interface_any_to_any, .copy_interface_to[STARPU_MPI_MS_RAM] = _starpu_copy_interface_any_to_any, .copy_interface_from[STARPU_CPU_RAM] = _starpu_copy_interface_any_to_any, .copy_interface_from[STARPU_MPI_MS_RAM] = _starpu_copy_interface_any_to_any, .copy_data_to[STARPU_CPU_RAM] = _starpu_src_common_copy_data_sink_to_host, .copy_data_to[STARPU_MPI_MS_RAM] = _starpu_src_common_copy_data_sink_to_sink, .copy_data_from[STARPU_CPU_RAM] = _starpu_src_common_copy_data_host_to_sink, .copy_data_from[STARPU_MPI_MS_RAM] = _starpu_src_common_copy_data_sink_to_sink, /* TODO: copy2D/3D? */ .wait_request_completion = _starpu_mpi_common_wait_request_completion, .test_request_completion = _starpu_mpi_common_test_event, .map[STARPU_CPU_RAM] = _starpu_mpi_map, .unmap[STARPU_CPU_RAM] = _starpu_mpi_unmap, .update_map[STARPU_CPU_RAM] = _starpu_mpi_update_map, }; starpu-1.4.9+dfsg/src/drivers/mpi/driver_mpi_source.h000066400000000000000000000040471507764646700227400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_MPI_SOURCE_H__ #define __DRIVER_MPI_SOURCE_H__ /** @file */ #include #include #include #pragma GCC visibility push(hidden) void _starpu_mpi_ms_preinit(void); #ifdef STARPU_USE_MPI_MASTER_SLAVE extern struct _starpu_node_ops _starpu_driver_mpi_ms_node_ops; /** Array of structures containing all the information useful to send * and receive information with devices */ struct _starpu_mp_node *_starpu_mpi_ms_src_get_actual_thread_mp_node(); unsigned _starpu_mpi_src_get_device_count(); void *_starpu_mpi_src_worker(void *arg); void _starpu_init_mpi_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, struct starpu_conf *user_conf, int no_mp_config); void _starpu_mpi_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); void _starpu_mpi_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); void _starpu_deinit_mpi_config(struct _starpu_machine_config *config); void _starpu_mpi_source_init(struct _starpu_mp_node *node); void _starpu_mpi_source_deinit(struct _starpu_mp_node *node); #endif /* STARPU_USE_MPI_MASTER_SLAVE */ #pragma GCC visibility pop #endif /* __DRIVER_MPI_SOURCE_H__ */ starpu-1.4.9+dfsg/src/drivers/opencl/000077500000000000000000000000001507764646700175355ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/opencl/driver_opencl.c000066400000000000000000001617671507764646700225560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022,2023 École de Technologie Supérieure (ETS, Montréal) * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * Copyright (C) 2010-2010 Mehdi Juhoor * Copyright (C) 2021-2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_OPENCL) #include #endif #ifdef STARPU_SIMGRID #include #endif static int nb_devices = -1; static int init_done = 0; static starpu_pthread_mutex_t big_lock = STARPU_PTHREAD_MUTEX_INITIALIZER; static size_t global_mem[STARPU_MAXOPENCLDEVS]; #ifdef STARPU_USE_OPENCL static cl_context contexts[STARPU_MAXOPENCLDEVS]; static cl_device_id devices[STARPU_MAXOPENCLDEVS]; static cl_command_queue queues[STARPU_MAXOPENCLDEVS]; static cl_command_queue map_queues[STARPU_MAXOPENCLDEVS]; static cl_device_type type[STARPU_MAXOPENCLDEVS]; static cl_command_queue in_transfer_queues[STARPU_MAXOPENCLDEVS]; static cl_command_queue out_transfer_queues[STARPU_MAXOPENCLDEVS]; static cl_command_queue peer_transfer_queues[STARPU_MAXOPENCLDEVS]; #ifndef STARPU_SIMGRID static cl_command_queue alloc_queues[STARPU_MAXOPENCLDEVS]; static cl_event task_events[STARPU_MAXOPENCLDEVS][STARPU_MAX_PIPELINE]; #endif /* !STARPU_SIMGRID */ #endif #ifdef STARPU_SIMGRID static unsigned task_finished[STARPU_MAXOPENCLDEVS][STARPU_MAX_PIPELINE]; static starpu_pthread_mutex_t opencl_alloc_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; #endif /* STARPU_SIMGRID */ #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) static unsigned opencl_bindid_init[STARPU_MAXOPENCLDEVS]; static unsigned opencl_bindid[STARPU_MAXOPENCLDEVS]; static unsigned opencl_memory_init[STARPU_MAXOPENCLDEVS]; static unsigned opencl_memory_nodes[STARPU_MAXOPENCLDEVS]; #endif #define _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err) do { if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } while(0) static size_t _starpu_opencl_get_global_mem_size(int devid) { return global_mem[devid]; } #ifdef STARPU_USE_OPENCL void starpu_opencl_get_context(int devid, cl_context *context) { *context = contexts[devid]; } void starpu_opencl_get_device(int devid, cl_device_id *device) { *device = devices[devid]; } void starpu_opencl_get_queue(int devid, cl_command_queue *queue) { *queue = queues[devid]; } void starpu_opencl_get_current_queue(cl_command_queue *queue) { struct _starpu_worker *worker = _starpu_get_local_worker_key(); STARPU_ASSERT(queue); *queue = queues[worker->devid]; } void starpu_opencl_get_current_context(cl_context *context) { struct _starpu_worker *worker = _starpu_get_local_worker_key(); STARPU_ASSERT(context); *context = contexts[worker->devid]; } #endif /* STARPU_USE_OPENCL */ /* This is called to initialize opencl and discover devices */ void _starpu_opencl_init(void) { memset(&opencl_bindid_init, 0, sizeof(opencl_bindid_init)); memset(&opencl_memory_init, 0, sizeof(opencl_memory_init)); STARPU_PTHREAD_MUTEX_LOCK(&big_lock); if (!init_done) { #ifdef STARPU_SIMGRID nb_devices = _starpu_simgrid_get_nbhosts("OpenCL"); #else /* STARPU_USE_OPENCL */ cl_platform_id platform_id[_STARPU_OPENCL_PLATFORM_MAX]; cl_uint nb_platforms; cl_int err; int i; cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR; _STARPU_DEBUG("Initialising OpenCL\n"); // Get Platforms if (starpu_getenv_number("STARPU_OPENCL_ON_CPUS") > 0) device_type |= CL_DEVICE_TYPE_CPU; if (starpu_getenv_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0) device_type = CL_DEVICE_TYPE_CPU; err = clGetPlatformIDs(_STARPU_OPENCL_PLATFORM_MAX, platform_id, &nb_platforms); if (STARPU_UNLIKELY(err != CL_SUCCESS)) nb_platforms=0; _STARPU_DEBUG("Platforms detected: %u\n", nb_platforms); _STARPU_DEBUG("CPU device type: %s\n", (device_type&CL_DEVICE_TYPE_CPU)?"requested":"not requested"); _STARPU_DEBUG("GPU device type: %s\n", (device_type&CL_DEVICE_TYPE_GPU)?"requested":"not requested"); _STARPU_DEBUG("Accelerator device type: %s\n", (device_type&CL_DEVICE_TYPE_ACCELERATOR)?"requested":"not requested"); // Get devices nb_devices = 0; { unsigned j; for (j=0; j STARPU_MAXOPENCLDEVS) { _STARPU_DISP("# Warning: %u OpenCL devices available. Only %d enabled. Use configure option --enable-maxopencldev=xxx to update the maximum value of supported OpenCL devices?\n", nb_devices, STARPU_MAXOPENCLDEVS); nb_devices = STARPU_MAXOPENCLDEVS; } // initialise internal structures for(i=0 ; itopology.nhwdevices[STARPU_OPENCL_WORKER] = nb_devices; } static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config*config) { struct _starpu_machine_topology *topology = &config->topology; struct starpu_conf *uconf = &config->conf; _starpu_initialize_workers_deviceid(uconf->use_explicit_workers_opencl_gpuid == 0 ? NULL : (int *)uconf->workers_opencl_gpuid, &(config->current_devid[STARPU_OPENCL_WORKER]), (int *)topology->workers_devid[STARPU_OPENCL_WORKER], "STARPU_WORKERS_OPENCLID", topology->nhwdevices[STARPU_OPENCL_WORKER], STARPU_OPENCL_WORKER); _starpu_devices_gpu_clear(config, STARPU_OPENCL_WORKER); _starpu_devices_drop_duplicate(topology->workers_devid[STARPU_OPENCL_WORKER]); } /* Determine which devices we will use */ void _starpu_init_opencl_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config) { int nopencl = config->conf.nopencl; if (nopencl != 0) { /* The user did not disable OPENCL. We need to initialize * OpenCL early to count the number of devices */ _starpu_opencl_init(); int n = _starpu_opencl_get_device_count(); _starpu_topology_check_ndevices(&nopencl, n, 0, STARPU_MAXOPENCLDEVS, 0, "nopencl", "OpenCL", "maxopencldev"); } topology->ndevices[STARPU_OPENCL_WORKER] = nopencl; _starpu_initialize_workers_opencl_gpuid(config); unsigned openclgpu; for (openclgpu = 0; (int) openclgpu < nopencl; openclgpu++) { int devid = _starpu_get_next_devid(topology, config, STARPU_OPENCL_WORKER); if (devid == -1) { // There is no more devices left topology->ndevices[STARPU_OPENCL_WORKER] = openclgpu; break; } _starpu_topology_configure_workers(topology, config, STARPU_OPENCL_WORKER, openclgpu, devid, 0, 0, 1, 1, NULL, NULL); } } /* Bind the driver on a CPU core */ void _starpu_opencl_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { /* Perhaps the worker has some "favourite" bindings */ unsigned preferred_binding[STARPU_NMAXWORKERS]; unsigned npreferred = 0; unsigned devid = workerarg->devid; #ifndef STARPU_SIMGRID if (_starpu_may_bind_automatically[STARPU_OPENCL_WORKER]) { /* StarPU is allowed to bind threads automatically */ unsigned *preferred_numa_binding = _starpu_get_opencl_affinity_vector(devid); unsigned npreferred_numa = _starpu_topology_get_nhwnumanodes(config); npreferred = _starpu_topology_get_numa_core_binding(config, preferred_numa_binding, npreferred_numa, preferred_binding, STARPU_NMAXWORKERS); } #endif /* SIMGRID */ if (opencl_bindid_init[devid]) { #ifndef STARPU_SIMGRID workerarg->bindid = opencl_bindid[devid]; #endif /* SIMGRID */ } else { opencl_bindid_init[devid] = 1; workerarg->bindid = opencl_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); } } /* Set up memory and buses */ void _starpu_opencl_init_worker_memory(struct _starpu_machine_config *config STARPU_ATTRIBUTE_UNUSED, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned memory_node = -1; unsigned devid = workerarg->devid; unsigned numa; if (opencl_memory_init[devid]) { memory_node = opencl_memory_nodes[devid]; } else { opencl_memory_init[devid] = 1; memory_node = opencl_memory_nodes[devid] = _starpu_memory_node_register(STARPU_OPENCL_RAM, devid); for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) { _starpu_register_bus(numa, memory_node); _starpu_register_bus(memory_node, numa); } #ifdef STARPU_SIMGRID char name[16]; snprintf(name, sizeof(name), "OpenCL%u", devid); starpu_sg_host_t host = _starpu_simgrid_get_host_by_name(name); STARPU_ASSERT(host); _starpu_simgrid_memory_node_set_host(memory_node, host); #else if (_starpu_opencl_get_device_type(workerarg->devid) == CL_DEVICE_TYPE_CPU) _starpu_memory_node_set_mapped(memory_node); #endif /* SIMGRID */ } _starpu_memory_node_add_nworkers(memory_node); //This worker can manage transfers on NUMA nodes for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) _starpu_worker_drives_memory_node(workerarg, numa); _starpu_worker_drives_memory_node(workerarg, memory_node); workerarg->memory_node = memory_node; } /* Really initialize one device */ int _starpu_opencl_init_context(int devid) { #ifdef STARPU_SIMGRID int j; for (j = 0; j < STARPU_MAX_PIPELINE; j++) task_finished[devid][j] = 0; #else /* !STARPU_SIMGRID */ cl_int err; cl_uint uint; STARPU_PTHREAD_MUTEX_LOCK(&big_lock); _STARPU_DEBUG("Initialising context for dev %d\n", devid); // Create a compute context err = 0; contexts[devid] = clCreateContext(NULL, 1, &devices[devid], NULL, NULL, &err); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clGetDeviceInfo(devices[devid], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(uint), &uint, NULL); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); starpu_malloc_set_align(uint/8); // Create execution queue for the given device queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); // Create transfer queue for the given device cl_command_queue_properties props; err = clGetDeviceInfo(devices[devid], CL_DEVICE_QUEUE_PROPERTIES, sizeof(props), &props, NULL); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); props &= ~CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; in_transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); out_transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); peer_transfer_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], props, &err); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); alloc_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); map_queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock); #endif /* !STARPU_SIMGRID */ return 0; } /* De-initialize one device */ int _starpu_opencl_deinit_context(int devid) { #ifdef STARPU_SIMGRID int j; for (j = 0; j < STARPU_MAX_PIPELINE; j++) task_finished[devid][j] = 0; #else /* !STARPU_SIMGRID */ cl_int err; STARPU_PTHREAD_MUTEX_LOCK(&big_lock); _STARPU_DEBUG("De-initialising context for dev %d\n", devid); err = clFinish(queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clReleaseCommandQueue(queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clFinish(in_transfer_queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clReleaseCommandQueue(in_transfer_queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clFinish(out_transfer_queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clReleaseCommandQueue(out_transfer_queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clFinish(peer_transfer_queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clReleaseCommandQueue(peer_transfer_queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clFinish(alloc_queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clReleaseCommandQueue(alloc_queues[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clReleaseCommandQueue(map_queues[devid]); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clReleaseContext(contexts[devid]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); contexts[devid] = NULL; STARPU_PTHREAD_MUTEX_UNLOCK(&big_lock); #endif return 0; } #ifdef STARPU_USE_OPENCL #ifndef STARPU_SIMGRID static unsigned _starpu_opencl_get_device_name(int dev, char *name, int lname) { int err; if (!init_done) { _starpu_opencl_init(); } // Get device name err = clGetDeviceInfo(devices[dev], CL_DEVICE_NAME, lname, name, NULL); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); _STARPU_DEBUG("Device %d : [%s]\n", dev, name); return EXIT_SUCCESS; } #endif #endif static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid) { starpu_ssize_t limit; size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; #ifdef STARPU_SIMGRID totalGlobalMem = _starpu_simgrid_get_memsize("OpenCL", devid); #elif defined(STARPU_USE_OPENCL) /* Request the size of the current device's memory */ cl_int err; cl_ulong size; err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size), &size, NULL); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); totalGlobalMem = size; #endif limit = starpu_getenv_number("STARPU_LIMIT_OPENCL_MEM"); if (limit == -1) { char name[30]; snprintf(name, sizeof(name), "STARPU_LIMIT_OPENCL_%u_MEM", devid); limit = starpu_getenv_number(name); } #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) if (limit == -1) { /* Use 90% of the available memory by default. */ limit = totalGlobalMem / (1024*1024) * 0.9; } #endif global_mem[devid] = limit * 1024*1024; #ifdef STARPU_USE_OPENCL /* How much memory to waste ? */ to_waste = totalGlobalMem - global_mem[devid]; #endif _STARPU_DEBUG("OpenCL device %u: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n", devid, (long)to_waste/(1024*1024), (long) limit, (long)totalGlobalMem/(1024*1024), (long)(totalGlobalMem - to_waste)/(1024*1024)); } /* This is run from the driver thread to initialize the driver OpenCL context */ static int _starpu_opencl_driver_init(struct _starpu_worker *worker) { int devid = worker->devid; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); #endif _starpu_driver_start(worker, STARPU_OPENCL_WORKER, 0); _starpu_opencl_init_context(devid); /* one more time to avoid hacks from third party lib :) */ _starpu_bind_thread_on_cpu(worker->bindid, worker->workerid, NULL); _starpu_opencl_limit_gpu_mem_if_needed(devid); _starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_opencl_get_global_mem_size(devid)); float size = (float) global_mem[devid] / (1<<30); #ifdef STARPU_SIMGRID const char *devname = _starpu_simgrid_get_devname("OpenCL", devid); if (!devname) devname = "Simgrid"; #else /* get the device's name */ char devname[64]; _starpu_opencl_get_device_name(devid, devname, 64); #endif snprintf(worker->name, sizeof(worker->name), "OpenCL %d (%s %.1f GiB)", devid, devname, size); snprintf(worker->short_name, sizeof(worker->short_name), "OpenCL %d", devid); starpu_pthread_setname(worker->short_name); worker->pipeline_length = starpu_getenv_number_default("STARPU_OPENCL_PIPELINE", 2); if (worker->pipeline_length > STARPU_MAX_PIPELINE) { _STARPU_DISP("Warning: STARPU_OPENCL_PIPELINE is %u, but STARPU_MAX_PIPELINE is only %u\n", worker->pipeline_length, STARPU_MAX_PIPELINE); worker->pipeline_length = STARPU_MAX_PIPELINE; } #if !defined(STARPU_SIMGRID) && !defined(STARPU_NON_BLOCKING_DRIVERS) if (worker->pipeline_length >= 1) { /* We need non-blocking drivers, to poll for OPENCL task * termination */ _STARPU_DISP("Warning: reducing STARPU_OPENCL_PIPELINE to 0 because blocking drivers are enabled (and simgrid is not enabled)\n"); worker->pipeline_length = 0; } #endif _STARPU_DEBUG("OpenCL (%s) dev id %d thread is ready to run on CPU %d !\n", devname, devid, worker->bindid); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, worker->workerid, starpu_prof_tool_driver_ocl, 0, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); #endif _STARPU_TRACE_WORKER_INIT_END(worker->workerid); /* tell the main thread that this one is ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); worker->status = STATUS_UNKNOWN; worker->worker_is_initialized = 1; STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex); return 0; } static int _starpu_opencl_driver_deinit(struct _starpu_worker *worker) { _STARPU_TRACE_WORKER_DEINIT_START; unsigned memnode = worker->memory_node; _starpu_datawizard_handle_all_pending_node_data_requests(memnode); /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); _starpu_malloc_shutdown(memnode); unsigned devid = worker->devid; _starpu_opencl_deinit_context(devid); worker->worker_is_initialized = 0; _STARPU_TRACE_WORKER_DEINIT_END(STARPU_OPENCL_WORKER); #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); #endif return 0; } #ifdef STARPU_USE_OPENCL cl_int starpu_opencl_allocate_memory(int devid STARPU_ATTRIBUTE_UNUSED, cl_mem *mem STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, cl_mem_flags flags STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_SIMGRID STARPU_ABORT(); #else cl_int err; cl_mem memory; memory = clCreateBuffer(contexts[devid], flags, size, NULL, &err); if (err == CL_OUT_OF_HOST_MEMORY) return err; if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE) return err; _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); /* * OpenCL uses lazy memory allocation: we will only know if the * allocation failed when trying to copy data onto the device. But we * want to know this __now__, so we just perform a dummy copy. */ char dummy = 0; cl_event ev; err = clEnqueueWriteBuffer(alloc_queues[devid], memory, CL_TRUE, 0, sizeof(dummy), &dummy, 0, NULL, &ev); if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE) return err; if (err == CL_OUT_OF_RESOURCES) return err; _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); clWaitForEvents(1, &ev); clReleaseEvent(ev); *mem = memory; return CL_SUCCESS; #endif } #endif static uintptr_t _starpu_opencl_malloc_on_node(unsigned dst_node, size_t size, int flags) { (void)flags; uintptr_t addr = 0; #ifdef STARPU_SIMGRID static uintptr_t last[STARPU_MAXNODES]; /* Sleep for the allocation */ STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex); if (_starpu_simgrid_cuda_malloc_cost()) starpu_sleep(0.000175); if (!last[dst_node]) last[dst_node] = 1<<10; addr = last[dst_node]; last[dst_node]+=size; STARPU_ASSERT(last[dst_node] >= addr); STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex); #else int ret; cl_mem ptr; ret = starpu_opencl_allocate_memory(starpu_memory_node_get_devid(dst_node), &ptr, size, CL_MEM_READ_WRITE); if (ret) { addr = 0; } else { addr = (uintptr_t)ptr; } #endif return addr; } static void _starpu_opencl_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags) { (void)dst_node; (void)addr; (void)size; (void)flags; #ifdef STARPU_SIMGRID STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex); /* Sleep for the free */ if (_starpu_simgrid_cuda_malloc_cost()) starpu_sleep(0.000750); STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex); #else cl_int err; err = clReleaseMemObject((void*)addr); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); #endif } #ifdef STARPU_USE_OPENCL cl_int starpu_opencl_copy_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret) { cl_int err; struct _starpu_worker *worker = _starpu_get_local_worker_key(); double start = 0.; if (event) starpu_interface_start_driver_copy_async(src_node, dst_node, &start); cl_event ev; err = clEnqueueWriteBuffer(in_transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev); if (event) starpu_interface_end_driver_copy_async(src_node, dst_node, start); if (STARPU_LIKELY(err == CL_SUCCESS)) { if (event == NULL) { /* We want a synchronous copy, let's synchronise the queue */ err = clWaitForEvents(1, &ev); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clReleaseEvent(ev); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); } else { clFlush(in_transfer_queues[worker->devid]); *event = ev; } if (ret) { *ret = (event == NULL) ? 0 : -EAGAIN; } } return err; } cl_int starpu_opencl_copy_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, size_t offset, cl_event *event, int *ret) { cl_int err; struct _starpu_worker *worker = _starpu_get_local_worker_key(); double start = 0.; if (event) starpu_interface_start_driver_copy_async(src_node, dst_node, &start); cl_event ev; err = clEnqueueReadBuffer(out_transfer_queues[worker->devid], buffer, CL_FALSE, offset, size, ptr, 0, NULL, &ev); if (event) starpu_interface_end_driver_copy_async(src_node, dst_node, start); if (STARPU_LIKELY(err == CL_SUCCESS)) { if (event == NULL) { /* We want a synchronous copy, let's synchronise the queue */ err = clWaitForEvents(1, &ev); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clReleaseEvent(ev); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); } else { clFlush(out_transfer_queues[worker->devid]); *event = ev; } if (ret) { *ret = (event == NULL) ? 0 : -EAGAIN; } } return err; } cl_int starpu_opencl_copy_opencl_to_opencl(cl_mem src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, size_t src_offset, cl_mem dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t dst_offset, size_t size, cl_event *event, int *ret) { cl_int err; struct _starpu_worker *worker = _starpu_get_local_worker_key(); double start = 0.; if (event) starpu_interface_start_driver_copy_async(src_node, dst_node, &start); cl_event ev; err = clEnqueueCopyBuffer(peer_transfer_queues[worker->devid], src, dst, src_offset, dst_offset, size, 0, NULL, &ev); if (event) starpu_interface_end_driver_copy_async(src_node, dst_node, start); if (STARPU_LIKELY(err == CL_SUCCESS)) { if (event == NULL) { /* We want a synchronous copy, let's synchronise the queue */ err = clWaitForEvents(1, &ev); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); err = clReleaseEvent(ev); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); } else { clFlush(peer_transfer_queues[worker->devid]); *event = ev; } if (ret) { *ret = (event == NULL) ? 0 : -EAGAIN; } } return err; } cl_int starpu_opencl_copy_async_sync(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, cl_event *event) { enum starpu_node_kind src_kind = starpu_node_get_kind(src_node); enum starpu_node_kind dst_kind = starpu_node_get_kind(dst_node); cl_int err; int ret; if (src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_CPU_RAM) { err = starpu_opencl_copy_opencl_to_ram((cl_mem) src, src_node, (void*) (dst + dst_offset), dst_node, size, src_offset, event, &ret); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); return ret; } if (src_kind == STARPU_CPU_RAM && dst_kind == STARPU_OPENCL_RAM) { err = starpu_opencl_copy_ram_to_opencl((void*) (src + src_offset), src_node, (cl_mem) dst, dst_node, size, dst_offset, event, &ret); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); return ret; } if (src_kind == STARPU_OPENCL_RAM && (dst_kind == STARPU_CPU_RAM || dst_kind == STARPU_OPENCL_RAM)) { err = starpu_opencl_copy_opencl_to_opencl((cl_mem) src, src_node, src_offset, (cl_mem) dst, dst_node, dst_offset, size, event, &ret); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); return ret; } STARPU_ABORT(); } static inline cl_event *_starpu_opencl_event(union _starpu_async_channel_event *_event) { cl_event *event; STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); event = (void *) _event; return event; } static int _starpu_opencl_copy_data_from_opencl_to_opencl(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_OPENCL_RAM); return starpu_opencl_copy_async_sync(src, src_offset, src_node, dst, dst_offset, dst_node, size, _starpu_opencl_event(&async_channel->event)); } static int _starpu_opencl_copy_data_from_opencl_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_CPU_RAM); return starpu_opencl_copy_async_sync(src, src_offset, src_node, dst, dst_offset, dst_node, size, _starpu_opencl_event(&async_channel->event)); } static int _starpu_opencl_copy_data_from_cpu_to_opencl(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_OPENCL_RAM); return starpu_opencl_copy_async_sync(src, src_offset, src_node, dst, dst_offset, dst_node, size, _starpu_opencl_event(&async_channel->event)); } #if 0 static cl_int _starpu_opencl_copy_rect_opencl_to_ram(cl_mem buffer, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, cl_event *event) { cl_int err; struct _starpu_worker *worker = _starpu_get_local_worker_key(); cl_bool blocking; double start = 0.; blocking = (event == NULL) ? CL_TRUE : CL_FALSE; if (event) starpu_interface_start_driver_copy_async(src_node, dst_node, &start); err = clEnqueueReadBufferRect(out_transfer_queues[worker->devid], buffer, blocking, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, 0, NULL, event); clFlush(out_transfer_queues[worker->devid]); if (event) starpu_interface_end_driver_copy_async(src_node, dst_node, start); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); return CL_SUCCESS; } static cl_int _starpu_opencl_copy_rect_ram_to_opencl(void *ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, cl_mem buffer, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, cl_event *event) { cl_int err; struct _starpu_worker *worker = _starpu_get_local_worker_key(); cl_bool blocking; double start = 0.; blocking = (event == NULL) ? CL_TRUE : CL_FALSE; if (event) starpu_interface_start_driver_copy_async(src_node, dst_node, &start); err = clEnqueueWriteBufferRect(in_transfer_queues[worker->devid], buffer, blocking, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, 0, NULL, event); clFlush(in_transfer_queues[worker->devid]); if (event) starpu_interface_end_driver_copy_async(src_node, dst_node, start); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); return CL_SUCCESS; } #endif static unsigned _starpu_opencl_test_request_completion(struct _starpu_async_channel *async_channel) { cl_int event_status; cl_event opencl_event = *_starpu_opencl_event(&async_channel->event); if (opencl_event == NULL) STARPU_ABORT(); cl_int err = clGetEventInfo(opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); if (event_status < 0) STARPU_OPENCL_REPORT_ERROR(event_status); if (event_status == CL_COMPLETE) { err = clReleaseEvent(opencl_event); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } return (event_status == CL_COMPLETE); } /* Only used at starpu_shutdown */ static void _starpu_opencl_wait_request_completion(struct _starpu_async_channel *async_channel) { cl_int err; if (*_starpu_opencl_event(&async_channel->event) == NULL) STARPU_ABORT(); err = clWaitForEvents(1, _starpu_opencl_event(&async_channel->event)); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clReleaseEvent(*_starpu_opencl_event(&async_channel->event)); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } static int _starpu_opencl_copy_interface_from_opencl_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_OPENCL_RAM); int ret = 1; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* STARPU_OPENCL_RAM -> STARPU_OPENCL_RAM */ STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node || starpu_worker_get_local_memory_node() == src_node); if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() || !(copy_methods->opencl_to_opencl_async || copy_methods->any_to_any)) { STARPU_ASSERT(copy_methods->opencl_to_opencl || copy_methods->any_to_any); /* this is not associated to a request so it's synchronous */ if (copy_methods->opencl_to_opencl) copy_methods->opencl_to_opencl(src_interface, src_node, dst_interface, dst_node); else copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_opencl_node_ops; if (copy_methods->opencl_to_opencl_async) ret = copy_methods->opencl_to_opencl_async(src_interface, src_node, dst_interface, dst_node, _starpu_opencl_event(&req->async_channel.event)); else { STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); } } return ret; } static int _starpu_opencl_copy_interface_from_opencl_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_OPENCL_RAM && dst_kind == STARPU_CPU_RAM); int ret = 1; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* OpenCL -> RAM */ STARPU_ASSERT(starpu_worker_get_local_memory_node() == src_node); if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() || !(copy_methods->opencl_to_ram_async || copy_methods->any_to_any)) { STARPU_ASSERT(copy_methods->opencl_to_ram || copy_methods->any_to_any); /* this is not associated to a request so it's synchronous */ if (copy_methods->opencl_to_ram) copy_methods->opencl_to_ram(src_interface, src_node, dst_interface, dst_node); else copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_opencl_node_ops; if (copy_methods->opencl_to_ram_async) ret = copy_methods->opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, _starpu_opencl_event(&req->async_channel.event)); else { STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); } } return ret; } static int _starpu_opencl_copy_interface_from_cpu_to_opencl(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req) { int src_kind = starpu_node_get_kind(src_node); int dst_kind = starpu_node_get_kind(dst_node); STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_OPENCL_RAM); int ret = 0; const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods; /* STARPU_CPU_RAM -> STARPU_OPENCL_RAM */ STARPU_ASSERT(starpu_worker_get_local_memory_node() == dst_node); if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_opencl_copy_disabled() || !(copy_methods->ram_to_opencl_async || copy_methods->any_to_any)) { STARPU_ASSERT(copy_methods->ram_to_opencl || copy_methods->any_to_any); /* this is not associated to a request so it's synchronous */ if (copy_methods->ram_to_opencl) copy_methods->ram_to_opencl(src_interface, src_node, dst_interface, dst_node); else copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } else { req->async_channel.node_ops = &_starpu_driver_opencl_node_ops; if (copy_methods->ram_to_opencl_async) ret = copy_methods->ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, _starpu_opencl_event(&req->async_channel.event)); else { STARPU_ASSERT(copy_methods->any_to_any); ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel); } } return ret; } static uintptr_t _starpu_opencl_map_ram(uintptr_t src, size_t src_offset, unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size, int *ret) { cl_int err; cl_mem memory; struct _starpu_worker *worker = _starpu_get_local_worker_key(); *ret = -EIO; if (starpu_node_get_kind(src_node) != STARPU_CPU_RAM) return 0; STARPU_ASSERT(dst_node == worker->memory_node); memory = clCreateBuffer(contexts[worker->devid], CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, size, (void*)(src + src_offset), &err); if (err == CL_OUT_OF_HOST_MEMORY) return 0; if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); return (uintptr_t)memory; } static int _starpu_opencl_unmap_ram(uintptr_t src STARPU_ATTRIBUTE_UNUSED, size_t src_offset STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, uintptr_t dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED) { cl_int err; struct _starpu_worker *worker = _starpu_get_local_worker_key(); STARPU_ASSERT(dst_node == worker->memory_node); err = clReleaseMemObject((cl_mem) dst); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); return 0; } static int _starpu_opencl_update_opencl_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) { (void) size; (void) src_node; cl_int err; struct _starpu_worker *worker = _starpu_get_local_worker_key(); STARPU_ASSERT(dst_offset == 0); STARPU_ASSERT(dst_node == worker->memory_node); cl_event ev; err = clEnqueueUnmapMemObject(map_queues[worker->devid], (cl_mem) (dst + dst_offset), (void*) (src + src_offset), 0, NULL, &ev); if (STARPU_UNLIKELY(err)) STARPU_OPENCL_REPORT_ERROR(err); /* We want a synchronous update, let's synchronise the queue */ err = clWaitForEvents(1, &ev); if (STARPU_UNLIKELY(err)) STARPU_OPENCL_REPORT_ERROR(err); err = clReleaseEvent(ev); if (STARPU_UNLIKELY(err)) STARPU_OPENCL_REPORT_ERROR(err); return 0; } static int _starpu_opencl_update_cpu_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) { (void) size; (void) dst_node; cl_int err; struct _starpu_worker *worker = _starpu_get_local_worker_key(); STARPU_ASSERT(src_offset == 0); STARPU_ASSERT(src_node == worker->memory_node); cl_event ev; void *ptr = clEnqueueMapBuffer(map_queues[worker->devid], (cl_mem) (src + src_offset), CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, &ev, &err); if (STARPU_UNLIKELY(!ptr)) STARPU_OPENCL_REPORT_ERROR(err); /* We want a synchronous update, let's synchronise the queue */ err = clWaitForEvents(1, &ev); if (STARPU_UNLIKELY(err)) STARPU_OPENCL_REPORT_ERROR(err); err = clReleaseEvent(ev); if (STARPU_UNLIKELY(err)) STARPU_OPENCL_REPORT_ERROR(err); STARPU_ASSERT((uintptr_t) ptr == (dst + dst_offset)); return 0; } #endif /* STARPU_USE_OPENCL */ static int _starpu_opencl_is_direct_access_supported(unsigned node, unsigned handling_node) { (void)node; (void)handling_node; return 0; } static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker *worker, unsigned char pipeline_idx STARPU_ATTRIBUTE_UNUSED) { STARPU_ASSERT(j); struct starpu_task *task = j->task; int profiling = starpu_profiling_status_get(); STARPU_ASSERT(task); struct starpu_codelet *cl = task->cl; STARPU_ASSERT(cl); _starpu_set_current_task(task); j->workerid = worker->workerid; if (worker->ntasks == 1) { /* We are alone in the pipeline, the kernel will start now, record it */ _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, profiling); } starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, j->nimpl); STARPU_ASSERT_MSG(func, "when STARPU_OPENCL is defined in 'where', opencl_func or opencl_funcs has to be defined"); if (_starpu_get_disable_kernels() <= 0) { _STARPU_TRACE_START_EXECUTING(j); #ifdef STARPU_SIMGRID double length = NAN; double energy = NAN; int async = task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC; int simulate = 1; if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE && !async) { /* Actually execute function */ simulate = 0; func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); #ifdef STARPU_OPENCL_SIMULATOR #ifndef CL_PROFILING_CLOCK_CYCLE_COUNT #ifdef CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT #define CL_PROFILING_CLOCK_CYCLE_COUNT CL_PROFILING_COMMAND_SHAVE_CYCLE_COUNT #else #error The OpenCL simulator must provide CL_PROFILING_CLOCK_CYCLE_COUNT #endif #endif struct starpu_profiling_task_info *profiling_info = task->profiling_info; STARPU_ASSERT_MSG(profiling_info->used_cycles, "Application kernel must call starpu_opencl_collect_stats to collect simulated time"); #if defined(HAVE_SG_HOST_SPEED) || defined(sg_host_speed) # if defined(HAVE_SG_HOST_SELF) || defined(sg_host_self) length = ((double) profiling_info->used_cycles)/sg_host_speed(sg_host_self()); # else length = ((double) profiling_info->used_cycles)/sg_host_speed(MSG_host_self()); # endif #elif defined HAVE_MSG_HOST_GET_SPEED || defined(MSG_host_get_speed) length = ((double) profiling_info->used_cycles)/MSG_host_get_speed(MSG_host_self()); #else length = ((double) profiling_info->used_cycles)/MSG_get_host_speed(MSG_host_self()); #endif energy = info->energy_consumed; /* And give the simulated time to simgrid */ simulate = 1; #endif } else if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT && !async) { _SIMGRID_TIMER_BEGIN(1); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); _SIMGRID_TIMER_END; simulate=0; } if (simulate) { struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); _starpu_simgrid_submit_job(sched_ctx->id, worker->workerid, j, &worker->perf_arch, length, energy, async ? &task_finished[worker->devid][pipeline_idx] : NULL); } #else #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_ocl, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); #endif func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_ocl, -1, (void*)func); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); #endif cl_command_queue queue; starpu_opencl_get_queue(worker->devid, &queue); #endif _STARPU_TRACE_END_EXECUTING(j); } return 0; } static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker *worker); static void _starpu_opencl_execute_job(struct starpu_task *task, struct _starpu_worker *worker) { int res; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); unsigned char pipeline_idx = (worker->first_task + worker->ntasks - 1)%STARPU_MAX_PIPELINE; res = _starpu_opencl_start_job(j, worker, pipeline_idx); if (res) { switch (res) { case -EAGAIN: _STARPU_DISP("ouch, OpenCL could not actually run task %p, putting it back...\n", task); _starpu_push_task_to_workers(task); STARPU_ABORT(); default: STARPU_ABORT(); } } if (task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC) { /* Record event to synchronize with task termination later */ #ifndef STARPU_SIMGRID cl_command_queue queue; starpu_opencl_get_queue(worker->devid, &queue); #endif if (worker->pipeline_length == 0) { #ifdef STARPU_SIMGRID _starpu_simgrid_wait_tasks(worker->workerid); #else starpu_opencl_get_queue(worker->devid, &queue); clFinish(queue); #endif _starpu_opencl_stop_job(j, worker); } else { #ifndef STARPU_SIMGRID int err; /* the function clEnqueueMarker is deprecated from * OpenCL version 1.2. We would like to use the new * function clEnqueueMarkerWithWaitList. We could do * it by checking its availability through our own * configure macro HAVE_CLENQUEUEMARKERWITHWAITLIST * and the OpenCL macro CL_VERSION_1_2. However these * 2 macros detect the function availability in the * ICD and not in the device implementation. */ err = clEnqueueMarker(queue, &task_events[worker->devid][pipeline_idx]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); clFlush(queue); #endif } } else /* Synchronous execution */ { _starpu_opencl_stop_job(j, worker); } } static void _starpu_opencl_stop_job(struct _starpu_job *j, struct _starpu_worker *worker) { int profiling = starpu_profiling_status_get(); _starpu_set_current_task(NULL); if (worker->pipeline_length) worker->current_tasks[worker->first_task] = NULL; else worker->current_task = NULL; worker->first_task = (worker->first_task + 1) % STARPU_MAX_PIPELINE; worker->ntasks--; _starpu_driver_end_job(worker, j, &worker->perf_arch, 0, profiling); struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(worker, j); STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", worker->workerid); if(!sched_ctx->sched_policy) _starpu_driver_update_job_feedback(j, worker, &sched_ctx->perf_arch, profiling); else _starpu_driver_update_job_feedback(j, worker, &worker->perf_arch, profiling); _starpu_push_task_output(j); _starpu_handle_job_termination(j); } static int _starpu_opencl_driver_run_once(struct _starpu_worker *worker) { int workerid = worker->workerid; unsigned memnode = worker->memory_node; struct _starpu_job *j; struct starpu_task *task; int res; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif int idle_tasks, idle_transfers; #ifdef STARPU_SIMGRID starpu_pthread_wait_reset(&worker->wait); #endif idle_tasks = 0; idle_transfers = 0; /* First test for transfers pending for next task */ task = worker->task_transferring; if (!task) idle_transfers++; if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) { STARPU_RMB(); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_END_PROGRESS(memnode); j = _starpu_get_job_associated_to_task(task); _starpu_fetch_task_input_tail(task, j, worker); /* Reset it */ worker->task_transferring = NULL; if (worker->ntasks > 1 && !(task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC)) { /* We have to execute a non-asynchronous task but we * still have tasks in the pipeline... Record it to * prevent more tasks from coming, and do it later */ worker->pipeline_stuck = 1; return 0; } _starpu_opencl_execute_job(task, worker); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif _STARPU_TRACE_START_PROGRESS(memnode); } /* Then poll for completed jobs */ if (worker->pipeline_length) task = worker->current_tasks[worker->first_task]; else task = worker->current_task; if (worker->ntasks && task != worker->task_transferring) { #ifndef STARPU_SIMGRID size_t size; int err; #endif /* On-going asynchronous task, check for its termination first */ #ifdef STARPU_SIMGRID if (!task_finished[worker->devid][worker->first_task]) #else /* !STARPU_SIMGRID */ cl_int status; err = clGetEventInfo(task_events[worker->devid][worker->first_task], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, &size); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); STARPU_ASSERT(size == sizeof(cl_int)); if (status != CL_COMPLETE) #endif /* !STARPU_SIMGRID */ { } else { _STARPU_TRACE_END_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif #ifndef STARPU_SIMGRID err = clReleaseEvent(task_events[worker->devid][worker->first_task]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); task_events[worker->devid][worker->first_task] = 0; #endif /* Asynchronous task completed! */ _starpu_opencl_stop_job(_starpu_get_job_associated_to_task(task), worker); /* See next task if any */ if (worker->ntasks && worker->current_tasks[worker->first_task] != worker->task_transferring) { task = worker->current_tasks[worker->first_task]; j = _starpu_get_job_associated_to_task(task); if (task->cl->opencl_flags[j->nimpl] & STARPU_OPENCL_ASYNC) { /* An asynchronous task, it was already queued, * it's now running, record its start time. */ _starpu_driver_start_job(worker, j, &worker->perf_arch, 0, starpu_profiling_status_get()); } else { /* A synchronous task, we have finished flushing the pipeline, we can now at last execute it. */ _STARPU_TRACE_EVENT("sync_task"); _starpu_opencl_execute_job(task, worker); _STARPU_TRACE_EVENT("end_sync_task"); worker->pipeline_stuck = 0; } } _STARPU_TRACE_START_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif } } if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length) idle_tasks++; #if defined(STARPU_NON_BLOCKING_DRIVERS) && !defined(STARPU_SIMGRID) if (!idle_tasks) { /* No task ready yet, no better thing to do than waiting */ __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, !idle_transfers); return 0; } #endif res = __starpu_datawizard_progress(_STARPU_DATAWIZARD_DO_ALLOC, 1); task = _starpu_get_worker_task(worker, workerid, memnode); #ifdef STARPU_SIMGRID if (!res && !task) starpu_pthread_wait_wait(&worker->wait); #endif if (task == NULL) return 0; j = _starpu_get_job_associated_to_task(task); if (worker->pipeline_length) worker->current_tasks[(worker->first_task + worker->ntasks)%STARPU_MAX_PIPELINE] = task; else worker->current_task = task; worker->ntasks++; /* can OpenCL do that task ? */ if (!_STARPU_MAY_PERFORM(j, OPENCL)) { /* this is not a OpenCL task */ _starpu_worker_refuse_task(worker, task); return 0; } _STARPU_TRACE_END_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif /* Fetch data asynchronously */ res = _starpu_fetch_task_input(task, j, 1); STARPU_ASSERT(res == 0); _STARPU_TRACE_START_PROGRESS(memnode); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, memnode, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif return 0; } void *_starpu_opencl_worker(void *_arg) { struct _starpu_worker* worker = _arg; _starpu_opencl_driver_init(worker); _STARPU_TRACE_START_PROGRESS(worker->memory_node); #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); #endif while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_opencl_driver_run_once(worker); } _starpu_opencl_driver_deinit(worker); _STARPU_TRACE_END_PROGRESS(worker->memory_node); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); #endif return NULL; } #ifdef STARPU_USE_OPENCL static int _starpu_run_opencl(struct _starpu_worker *workerarg) { _STARPU_DEBUG("Running OpenCL %u from the application\n", workerarg->devid); /* Let's go ! */ _starpu_opencl_worker(workerarg); return 0; } static int _starpu_opencl_driver_set_devid(struct starpu_driver *driver, struct _starpu_worker *worker) { starpu_opencl_get_device(worker->devid, &driver->id.opencl_id); return 0; } static int _starpu_opencl_driver_is_devid(struct starpu_driver *driver, struct _starpu_worker *worker) { cl_device_id device; starpu_opencl_get_device(worker->devid, &device); return device == driver->id.opencl_id; } struct _starpu_driver_ops _starpu_driver_opencl_ops = { .init = _starpu_opencl_driver_init, .run = _starpu_run_opencl, .run_once = _starpu_opencl_driver_run_once, .deinit = _starpu_opencl_driver_deinit, .set_devid = _starpu_opencl_driver_set_devid, .is_devid = _starpu_opencl_driver_is_devid, }; #endif #ifdef STARPU_USE_OPENCL cl_device_type _starpu_opencl_get_device_type(int devid) { if (!init_done) _starpu_opencl_init(); return type[devid]; } #endif /* STARPU_USE_OPENCL */ #ifdef STARPU_HAVE_HWLOC hwloc_obj_t _starpu_opencl_get_hwloc_obj(hwloc_topology_t topology, int devid) { #if !defined(STARPU_SIMGRID) cl_device_id device; starpu_opencl_get_device(devid, &device); return hwloc_opencl_get_device_osdev(topology, device); #else return NULL; #endif } #endif struct _starpu_node_ops _starpu_driver_opencl_node_ops = { .name = "opencl driver", .malloc_on_node = _starpu_opencl_malloc_on_node, .free_on_node = _starpu_opencl_free_on_node, .is_direct_access_supported = _starpu_opencl_is_direct_access_supported, #ifndef STARPU_SIMGRID .copy_interface_to[STARPU_CPU_RAM] = _starpu_opencl_copy_interface_from_opencl_to_cpu, .copy_interface_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_interface_from_opencl_to_opencl, .copy_interface_from[STARPU_CPU_RAM] = _starpu_opencl_copy_interface_from_cpu_to_opencl, .copy_interface_from[STARPU_OPENCL_RAM] = _starpu_opencl_copy_interface_from_opencl_to_opencl, .copy_data_to[STARPU_CPU_RAM] = _starpu_opencl_copy_data_from_opencl_to_cpu, .copy_data_to[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_from_opencl_to_opencl, .copy_data_from[STARPU_CPU_RAM] = _starpu_opencl_copy_data_from_cpu_to_opencl, .copy_data_from[STARPU_OPENCL_RAM] = _starpu_opencl_copy_data_from_opencl_to_opencl, /* TODO: copy2D/3D? */ .map[STARPU_CPU_RAM] = _starpu_opencl_map_ram, .unmap[STARPU_CPU_RAM] = _starpu_opencl_unmap_ram, .update_map[STARPU_CPU_RAM] = _starpu_opencl_update_cpu_map, .wait_request_completion = _starpu_opencl_wait_request_completion, .test_request_completion = _starpu_opencl_test_request_completion, #endif }; starpu-1.4.9+dfsg/src/drivers/opencl/driver_opencl.h000066400000000000000000000046641507764646700225530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_OPENCL_H__ #define __DRIVER_OPENCL_H__ /** @file */ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif #ifdef STARPU_USE_OPENCL #define CL_TARGET_OPENCL_VERSION 100 #ifdef __APPLE__ #include #else #include #endif #endif #include #include #pragma GCC visibility push(hidden) void _starpu_opencl_preinit(void); #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) struct _starpu_machine_config; void _starpu_opencl_discover_devices(struct _starpu_machine_config *config); void _starpu_opencl_init(void); int _starpu_opencl_init_context(int devid); int _starpu_opencl_deinit_context(int devid); unsigned _starpu_opencl_get_device_count(void); #ifdef STARPU_HAVE_HWLOC struct _starpu_machine_topology; hwloc_obj_t _starpu_opencl_get_hwloc_obj(hwloc_topology_t topology, int devid); #endif void _starpu_init_opencl_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *); void _starpu_opencl_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); void _starpu_opencl_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); void *_starpu_opencl_worker(void *); extern struct _starpu_node_ops _starpu_driver_opencl_node_ops; #else #define _starpu_opencl_discover_devices(config) ((void) (config)) #endif #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) extern struct _starpu_driver_ops _starpu_driver_opencl_ops; #endif #ifdef STARPU_USE_OPENCL extern char *_starpu_opencl_program_dir; cl_device_type _starpu_opencl_get_device_type(int devid); #endif #pragma GCC visibility pop #endif // __DRIVER_OPENCL_H__ starpu-1.4.9+dfsg/src/drivers/opencl/driver_opencl_init.c000066400000000000000000000033641507764646700235650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static struct _starpu_driver_info driver_info = { .name_upper = "OpenCL", .name_var = "OPENCL", .name_lower = "opencl", .memory_kind = STARPU_OPENCL_RAM, .alpha = 12.22f, .wait_for_worker_initialization = 1, #if defined(STARPU_USE_OPENCL) .driver_ops = &_starpu_driver_opencl_ops, #ifdef STARPU_HAVE_HWLOC .get_hwloc_obj = _starpu_opencl_get_hwloc_obj, #endif #endif #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) .run_worker = _starpu_opencl_worker, .init_worker_binding = _starpu_opencl_init_worker_binding, .init_worker_memory = _starpu_opencl_init_worker_memory, #endif }; static struct _starpu_memory_driver_info memory_driver_info = { .name_upper = "OpenCL", .worker_archtype = STARPU_OPENCL_WORKER, #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) .ops = &_starpu_driver_opencl_node_ops, #endif }; void _starpu_opencl_preinit(void) { _starpu_driver_info_register(STARPU_OPENCL_WORKER, &driver_info); _starpu_memory_driver_info_register(STARPU_OPENCL_RAM, &memory_driver_info); } starpu-1.4.9+dfsg/src/drivers/opencl/driver_opencl_utils.c000066400000000000000000000550671507764646700237710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #include #include #include "driver_opencl_utils.h" #include "driver_opencl.h" #ifdef STARPU_DEVEL #include #endif #ifdef HAVE_CL_CL_EXT_H #include #endif char *_starpu_opencl_program_dir; static int _starpu_opencl_locate_file(const char *source_file_name, char **located_file_name, char **located_dir_name) { int ret = EXIT_FAILURE; *located_file_name = NULL; *located_dir_name = NULL; _STARPU_DEBUG("Trying to locate <%s>\n", source_file_name); if (access(source_file_name, R_OK) == 0) { _STARPU_CALLOC(*located_file_name, 1, strlen(source_file_name)+1); snprintf(*located_file_name, strlen(source_file_name)+1, "%s", source_file_name); ret = EXIT_SUCCESS; } if (ret == EXIT_FAILURE && _starpu_opencl_program_dir) { _STARPU_CALLOC(*located_file_name, 1, strlen(_starpu_opencl_program_dir)+1+strlen(source_file_name)+1); snprintf(*located_file_name, strlen(_starpu_opencl_program_dir)+1+strlen(source_file_name)+1, "%s/%s", _starpu_opencl_program_dir, source_file_name); _STARPU_DEBUG("Trying to locate with _starpu_opencl_program_dir <%s>\n", *located_file_name); if (access(*located_file_name, R_OK) == 0) ret = EXIT_SUCCESS; } #ifdef STARPU_DEVEL if (ret == EXIT_FAILURE) { _STARPU_CALLOC(*located_file_name, 1, strlen(STARPU_SRC_DIR)+1+strlen(source_file_name)+1); snprintf(*located_file_name, strlen(STARPU_SRC_DIR)+1+strlen(source_file_name)+1, "%s/%s", STARPU_SRC_DIR, source_file_name); _STARPU_DEBUG("Trying to locate with STARPU_SRC_DIR <%s>\n", *located_file_name); if (access(*located_file_name, R_OK) == 0) ret = EXIT_SUCCESS; } #endif if (ret == EXIT_FAILURE) { _STARPU_CALLOC(*located_file_name, 1, strlen(STARPU_OPENCL_DATADIR)+1+strlen(source_file_name)+1); snprintf(*located_file_name, strlen(STARPU_OPENCL_DATADIR)+1+strlen(source_file_name)+1, "%s/%s", STARPU_OPENCL_DATADIR, source_file_name); _STARPU_DEBUG("Trying to locate with STARPU_OPENCL_DATADIR <%s>\n", *located_file_name); if (access(*located_file_name, R_OK) == 0) ret = EXIT_SUCCESS; } if (ret == EXIT_FAILURE) { _STARPU_ERROR("Cannot locate file <%s>\n", source_file_name); } else { char *last = strrchr(*located_file_name, '/'); if (!last) { _STARPU_CALLOC(*located_dir_name, 2, sizeof(char)); snprintf(*located_dir_name, 2, "%s", ""); } else { _STARPU_CALLOC(*located_dir_name, 1, 1+strlen(*located_file_name)); snprintf(*located_dir_name, 1+strlen(*located_file_name), "%s", *located_file_name); (*located_dir_name)[strlen(*located_file_name)-strlen(last)+1] = '\0'; } } return ret; } cl_int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid) { cl_int err; cl_device_id device; cl_program program; starpu_opencl_get_device(devid, &device); starpu_opencl_get_queue(devid, queue); program = opencl_programs->programs[devid]; if (!program) { _STARPU_DISP("Program not available for device <%d>\n", devid); return CL_INVALID_PROGRAM; } // Create the compute kernel in the program we wish to run *kernel = clCreateKernel(program, kernel_name, &err); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); return CL_SUCCESS; } cl_int starpu_opencl_release_kernel(cl_kernel kernel) { cl_int err; err = clReleaseKernel(kernel); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); return CL_SUCCESS; } static char *_starpu_opencl_load_program_source(const char *filename) { struct stat statbuf; FILE *fh; char *source; int x; int c; int err; fh = fopen(filename, "r"); if (!fh) return NULL; err = stat(filename, &statbuf); STARPU_ASSERT_MSG(err == 0, "could not open file %s\n", filename); _STARPU_MALLOC(source, statbuf.st_size + 1); for(c=fgetc(fh), x=0 ; c != EOF ; c =fgetc(fh), x++) { source[x] = (char)c; } source[x] = '\0'; _STARPU_EXTRA_DEBUG("OpenCL kernel <%s>\n", source); fclose(fh); return source; } static char *_starpu_opencl_load_program_binary(const char *filename, size_t *len) { struct stat statbuf; FILE *fh; char *binary; int err; fh = fopen(filename, "r"); if (fh == 0) return NULL; err = stat(filename, &statbuf); STARPU_ASSERT_MSG(err == 0, "could not open file %s\n", filename); binary = (char *) malloc(statbuf.st_size); if (!binary) { fclose(fh); return binary; } err = fread(binary, statbuf.st_size, 1, fh); STARPU_ASSERT_MSG(err == 1, "could not read from file %s\n", filename); fclose(fh); *len = statbuf.st_size; return binary; } static void _starpu_opencl_create_binary_directory(char *path, size_t maxlen) { static int _directory_created = 0; snprintf(path, maxlen, "%s/.starpu/opencl/", _starpu_get_home_path()); if (_directory_created == 0) { _STARPU_DEBUG("Creating directory %s\n", path); _starpu_mkpath_and_check(path, S_IRWXU); _directory_created = 1; } } char *_starpu_opencl_get_device_type_as_string(int id) { cl_device_type type; type = _starpu_opencl_get_device_type(id); switch (type) { case CL_DEVICE_TYPE_GPU: return "gpu"; case CL_DEVICE_TYPE_ACCELERATOR: return "acc"; case CL_DEVICE_TYPE_CPU: return "cpu"; default: return "unk"; } } static int _starpu_opencl_get_binary_name(char *binary_file_name, size_t maxlen, const char *source_file_name, int dev, cl_device_id device) { char binary_directory[1024]; char *p; cl_int err; cl_uint vendor_id; _starpu_opencl_create_binary_directory(binary_directory, sizeof(binary_directory)); p = strrchr(source_file_name, '/'); snprintf(binary_file_name, maxlen, "%s/%s", binary_directory, p?p:source_file_name); p = strstr(binary_file_name, ".cl"); if (p == NULL) p=binary_file_name + strlen(binary_file_name); err = clGetDeviceInfo(device, CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); sprintf(p, ".%s.vendor_id_%d_device_id_%d", _starpu_opencl_get_device_type_as_string(dev), (int)vendor_id, dev); return CL_SUCCESS; } static int _starpu_opencl_compile_or_load_opencl_from_string(const char *opencl_program_source, const char* build_options, struct starpu_opencl_program *opencl_programs, const char* source_file_name) { unsigned int dev; unsigned int nb_devices; nb_devices = _starpu_opencl_get_device_count(); // Iterate over each device for(dev = 0; dev < nb_devices; dev ++) { cl_device_id device; cl_context context; cl_program program; cl_int err; if (opencl_programs) { opencl_programs->programs[dev] = NULL; } starpu_opencl_get_device(dev, &device); starpu_opencl_get_context(dev, &context); if (context == NULL) { _STARPU_DEBUG("[%u] is not a valid OpenCL context\n", dev); continue; } // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, (const char **) &opencl_program_source, NULL, &err); if (!program || err != CL_SUCCESS) { _STARPU_DISP("Error: Failed to load program source with options %s!\n", build_options); return EXIT_FAILURE; } // Build the program executable err = clBuildProgram(program, 1, &device, build_options, NULL, NULL); // Get the status { cl_build_status status; size_t len; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); if (len > 2) { char *buffer; _STARPU_MALLOC(buffer, len); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, len, buffer, &len); _STARPU_DISP("Compilation output\n%s\n", buffer); free(buffer); } clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL); if (err != CL_SUCCESS || status != CL_BUILD_SUCCESS) { _STARPU_DISP("Error: Failed to build program executable!\n"); _STARPU_DISP("clBuildProgram: %d - clGetProgramBuildInfo: %d\n", err, status); return EXIT_FAILURE; } } // Store program if (opencl_programs) { opencl_programs->programs[dev] = program; } else { char binary_file_name[2048]; char *binary; size_t binary_len; FILE *fh; err = _starpu_opencl_get_binary_name(binary_file_name, sizeof(binary_file_name), source_file_name, dev, device); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_len, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); _STARPU_MALLOC(binary, binary_len); err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(binary), &binary, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); fh = fopen(binary_file_name, "w"); if (fh == NULL) { _STARPU_DISP("Error: Failed to open file <%s>\n", binary_file_name); perror("fopen"); return EXIT_FAILURE; } fwrite(binary, binary_len, 1, fh); fclose(fh); free(binary); _STARPU_DEBUG("File <%s> created\n", binary_file_name); err = clReleaseProgram(program); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } } return EXIT_SUCCESS; } void starpu_opencl_load_program_source_malloc(const char *source_file_name, char **located_file_name, char **located_dir_name, char **opencl_program_source) { // Locate source file _starpu_opencl_locate_file(source_file_name, located_file_name, located_dir_name); _STARPU_DEBUG("Source file name : <%s>\n", *located_file_name); _STARPU_DEBUG("Source directory name : <%s>\n", *located_dir_name); // Load the compute program from disk into a char * char *source = _starpu_opencl_load_program_source(*located_file_name); if(!source) _STARPU_ERROR("Failed to load compute program from file <%s>!\n", *located_file_name); _STARPU_MALLOC(*opencl_program_source, strlen(source)+1); snprintf(*opencl_program_source, strlen(source)+1, "%s", source); free(source); } void starpu_opencl_load_program_source(const char *source_file_name, char *located_file_name, char *located_dir_name, char *opencl_program_source) { char *_located_file_name; char *_located_dir_name; // Locate source file _starpu_opencl_locate_file(source_file_name, &_located_file_name, &_located_dir_name); _STARPU_DEBUG("Source file name : <%s>\n", _located_file_name); _STARPU_DEBUG("Source directory name : <%s>\n", _located_dir_name); // Load the compute program from disk into a char * char *source = _starpu_opencl_load_program_source(_located_file_name); if(!source) _STARPU_ERROR("Failed to load compute program from file <%s>!\n", _located_file_name); sprintf(located_file_name, "%s", _located_file_name); free(_located_file_name); sprintf(located_dir_name, "%s", _located_dir_name); free(_located_dir_name); sprintf(opencl_program_source, "%s", source); free(source); } static int _starpu_opencl_compile_or_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char* build_options) { int nb_devices; int ret; char *located_file_name; char *located_dir_name; char new_build_options[1024]; char *opencl_program_source; // Do not try to load and compile the file if there is no devices nb_devices = starpu_opencl_worker_get_count(); if (nb_devices == 0) return EXIT_SUCCESS; starpu_opencl_load_program_source_malloc(source_file_name, &located_file_name, &located_dir_name, &opencl_program_source); if (!build_options) build_options = ""; if (!strcmp(located_dir_name, "")) { snprintf(new_build_options, sizeof(new_build_options), "%s", build_options); } else { snprintf(new_build_options, sizeof(new_build_options), "-I %s %s", located_dir_name, build_options); } _STARPU_DEBUG("Build options: <%s>\n", new_build_options); ret = _starpu_opencl_compile_or_load_opencl_from_string(opencl_program_source, new_build_options, opencl_programs, source_file_name); _STARPU_DEBUG("located_file_name : <%s>\n", located_file_name); _STARPU_DEBUG("located_dir_name : <%s>\n", located_dir_name); free(located_file_name); free(located_dir_name); free(opencl_program_source); return ret; } int starpu_opencl_compile_opencl_from_file(const char *source_file_name, const char* build_options) { return _starpu_opencl_compile_or_load_opencl_from_file(source_file_name, NULL, build_options); } int starpu_opencl_compile_opencl_from_string(const char *opencl_program_source, const char *file_name, const char* build_options) { return _starpu_opencl_compile_or_load_opencl_from_string(opencl_program_source, build_options, NULL, file_name); } int starpu_opencl_load_opencl_from_string(const char *opencl_program_source, struct starpu_opencl_program *opencl_programs, const char* build_options) { return _starpu_opencl_compile_or_load_opencl_from_string(opencl_program_source, build_options, opencl_programs, NULL); } int starpu_opencl_load_opencl_from_file(const char *source_file_name, struct starpu_opencl_program *opencl_programs, const char* build_options) { return _starpu_opencl_compile_or_load_opencl_from_file(source_file_name, opencl_programs, build_options); } int starpu_opencl_load_binary_opencl(const char *kernel_id, struct starpu_opencl_program *opencl_programs) { unsigned int dev; unsigned int nb_devices; nb_devices = _starpu_opencl_get_device_count(); // Iterate over each device for(dev = 0; dev < nb_devices; dev ++) { cl_device_id device; cl_context context; cl_program program; cl_int err; char *binary; char binary_file_name[1024]; size_t length; cl_int binary_status; opencl_programs->programs[dev] = NULL; starpu_opencl_get_device(dev, &device); starpu_opencl_get_context(dev, &context); if (context == NULL) { _STARPU_DEBUG("[%u] is not a valid OpenCL context\n", dev); continue; } // Load the binary buffer err = _starpu_opencl_get_binary_name(binary_file_name, sizeof(binary_file_name), kernel_id, dev, device); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); binary = _starpu_opencl_load_program_binary(binary_file_name, &length); // Create the compute program from the binary buffer program = clCreateProgramWithBinary(context, 1, &device, &length, (const unsigned char **) &binary, &binary_status, &err); if (!program || err != CL_SUCCESS) { _STARPU_DISP("Error: Failed to load program binary!\n"); return EXIT_FAILURE; } // Build the program executable err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); // Get the status { cl_build_status status; size_t len; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); if (len > 2) { char *buffer; _STARPU_MALLOC(buffer, len); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, len, buffer, &len); _STARPU_DISP("Compilation output\n%s\n", buffer); free(buffer); } clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL); if (err != CL_SUCCESS || status != CL_BUILD_SUCCESS) { _STARPU_DISP("Error: Failed to build program executable!\n"); _STARPU_DISP("clBuildProgram: %d - clGetProgramBuildInfo: %d\n", err, status); return EXIT_FAILURE; } } // Store program opencl_programs->programs[dev] = program; free(binary); } return 0; } int starpu_opencl_unload_opencl(struct starpu_opencl_program *opencl_programs) { unsigned int dev; unsigned int nb_devices; if (!starpu_opencl_worker_get_count()) return 0; nb_devices = _starpu_opencl_get_device_count(); // Iterate over each device for(dev = 0; dev < nb_devices; dev ++) { if (opencl_programs->programs[dev]) { cl_int err; err = clReleaseProgram(opencl_programs->programs[dev]); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } } return 0; } int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED) { #if defined(CL_PROFILING_CLOCK_CYCLE_COUNT)||defined(CL_PROFILING_STALL_CYCLE_COUNT)||defined(CL_PROFILING_POWER_CONSUMED) struct starpu_task *task = starpu_task_get_current(); struct starpu_profiling_task_info *info = task->profiling_info; #endif #ifdef CL_PROFILING_CLOCK_CYCLE_COUNT if (starpu_profiling_status_get() && info) { cl_int err; unsigned int clock_cycle_count; size_t size; err = clGetEventProfilingInfo(event, CL_PROFILING_CLOCK_CYCLE_COUNT, sizeof(clock_cycle_count), &clock_cycle_count, &size); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); STARPU_ASSERT(size == sizeof(clock_cycle_count)); info->used_cycles += clock_cycle_count; } #endif #ifdef CL_PROFILING_STALL_CYCLE_COUNT if (starpu_profiling_status_get() && info) { cl_int err; unsigned int stall_cycle_count; size_t size; err = clGetEventProfilingInfo(event, CL_PROFILING_STALL_CYCLE_COUNT, sizeof(stall_cycle_count), &stall_cycle_count, &size); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); STARPU_ASSERT(size == sizeof(stall_cycle_count)); info->stall_cycles += stall_cycle_count; } #endif #ifdef CL_PROFILING_POWER_CONSUMED if (info && (starpu_profiling_status_get() || (task->cl && task->cl->energy_model && task->cl->energy_model->benchmarking))) { cl_int err; double energy_consumed; size_t size; err = clGetEventProfilingInfo(event, CL_PROFILING_POWER_CONSUMED, sizeof(energy_consumed), &energy_consumed, &size); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); STARPU_ASSERT(size == sizeof(energy_consumed)); info->energy_consumed += energy_consumed; } #endif return 0; } const char *starpu_opencl_error_string(cl_int status) { const char *errormsg; switch (status) { case CL_SUCCESS: errormsg = "Success"; break; case CL_DEVICE_NOT_FOUND: errormsg = "Device not found"; break; case CL_DEVICE_NOT_AVAILABLE: errormsg = "Device not available"; break; case CL_COMPILER_NOT_AVAILABLE: errormsg = "Compiler not available"; break; case CL_MEM_OBJECT_ALLOCATION_FAILURE: errormsg = "Memory object allocation failure"; break; case CL_OUT_OF_RESOURCES: errormsg = "Out of resources"; break; case CL_OUT_OF_HOST_MEMORY: errormsg = "Out of host memory"; break; case CL_PROFILING_INFO_NOT_AVAILABLE: errormsg = "Profiling info not available"; break; case CL_MEM_COPY_OVERLAP: errormsg = "Memory copy overlap"; break; case CL_IMAGE_FORMAT_MISMATCH: errormsg = "Image format mismatch"; break; case CL_IMAGE_FORMAT_NOT_SUPPORTED: errormsg = "Image format not supported"; break; case CL_BUILD_PROGRAM_FAILURE: errormsg = "Build program failure"; break; case CL_MAP_FAILURE: errormsg = "Map failure"; break; case CL_INVALID_VALUE: errormsg = "Invalid value"; break; case CL_INVALID_DEVICE_TYPE: errormsg = "Invalid device type"; break; case CL_INVALID_PLATFORM: errormsg = "Invalid platform"; break; case CL_INVALID_DEVICE: errormsg = "Invalid device"; break; case CL_INVALID_CONTEXT: errormsg = "Invalid context"; break; case CL_INVALID_QUEUE_PROPERTIES: errormsg = "Invalid queue properties"; break; case CL_INVALID_COMMAND_QUEUE: errormsg = "Invalid command queue"; break; case CL_INVALID_HOST_PTR: errormsg = "Invalid host pointer"; break; case CL_INVALID_MEM_OBJECT: errormsg = "Invalid memory object"; break; case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: errormsg = "Invalid image format descriptor"; break; case CL_INVALID_IMAGE_SIZE: errormsg = "Invalid image size"; break; case CL_INVALID_SAMPLER: errormsg = "Invalid sampler"; break; case CL_INVALID_BINARY: errormsg = "Invalid binary"; break; case CL_INVALID_BUILD_OPTIONS: errormsg = "Invalid build options"; break; case CL_INVALID_PROGRAM: errormsg = "Invalid program"; break; case CL_INVALID_PROGRAM_EXECUTABLE: errormsg = "Invalid program executable"; break; case CL_INVALID_KERNEL_NAME: errormsg = "Invalid kernel name"; break; case CL_INVALID_KERNEL_DEFINITION: errormsg = "Invalid kernel definition"; break; case CL_INVALID_KERNEL: errormsg = "Invalid kernel"; break; case CL_INVALID_ARG_INDEX: errormsg = "Invalid argument index"; break; case CL_INVALID_ARG_VALUE: errormsg = "Invalid argument value"; break; case CL_INVALID_ARG_SIZE: errormsg = "Invalid argument size"; break; case CL_INVALID_KERNEL_ARGS: errormsg = "Invalid kernel arguments"; break; case CL_INVALID_WORK_DIMENSION: errormsg = "Invalid work dimension"; break; case CL_INVALID_WORK_GROUP_SIZE: errormsg = "Invalid work group size"; break; case CL_INVALID_WORK_ITEM_SIZE: errormsg = "Invalid work item size"; break; case CL_INVALID_GLOBAL_OFFSET: errormsg = "Invalid global offset"; break; case CL_INVALID_EVENT_WAIT_LIST: errormsg = "Invalid event wait list"; break; case CL_INVALID_EVENT: errormsg = "Invalid event"; break; case CL_INVALID_OPERATION: errormsg = "Invalid operation"; break; case CL_INVALID_GL_OBJECT: errormsg = "Invalid GL object"; break; case CL_INVALID_BUFFER_SIZE: errormsg = "Invalid buffer size"; break; case CL_INVALID_MIP_LEVEL: errormsg = "Invalid MIP level"; break; #ifdef CL_PLATFORM_NOT_FOUND_KHR case CL_PLATFORM_NOT_FOUND_KHR: errormsg = "Platform not found"; break; #endif default: errormsg = "unknown OpenCL error"; break; } return errormsg; } void starpu_opencl_display_error(const char *func, const char *file, int line, const char* msg, cl_int status) { _STARPU_MSG("oops in %s (%s:%d) (%s) ... <%s> (%d) \n", func, file, line, msg, starpu_opencl_error_string (status), status); } int starpu_opencl_set_kernel_args(cl_int *error, cl_kernel *kernel, ...) { int i; va_list ap; va_start(ap, kernel); for (i = 0; ; i++) { int size = va_arg(ap, int); if (size == 0) break; cl_mem *ptr = va_arg(ap, cl_mem *); int err = clSetKernelArg(*kernel, i, size, ptr); if (STARPU_UNLIKELY(err != CL_SUCCESS)) { *error = err; break; } } va_end(ap); return i; } starpu-1.4.9+dfsg/src/drivers/opencl/driver_opencl_utils.h000066400000000000000000000017121507764646700237620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_OPENCL_UTILS_H__ #define __STARPU_OPENCL_UTILS_H__ #pragma GCC visibility push(hidden) /** @file */ char *_starpu_opencl_get_device_type_as_string(int id); #define _STARPU_OPENCL_PLATFORM_MAX 4 #pragma GCC visibility pop #endif /* __STARPU_OPENCL_UTILS_H__ */ starpu-1.4.9+dfsg/src/drivers/tcpip/000077500000000000000000000000001507764646700173745ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/drivers/tcpip/driver_tcpip_common.c000066400000000000000000001314401507764646700236050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #include #ifdef MSG_ERRQUEUE #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #define NITER 32 #define SIZE_BANDWIDTH (1024*1024) #define _SELECT_DEBUG 0 #if _SELECT_DEBUG # define _SELECT_PRINT(...) printf(__VA_ARGS__) #else # define _SELECT_PRINT(...) #endif #define _ZC_DEBUG 0 #if _ZC_DEBUG # define _ZC_PRINT(...) printf(__VA_ARGS__) #else # define _ZC_PRINT(...) #endif typedef starpu_ssize_t(*what_t)(int fd, void *buf, size_t count); static int tcpip_initialized = 0; //static int src_node_id = 0; static int nb_sink; static char* host_port; static int index_sink = 0; int _starpu_tcpip_common_multiple_thread; static int is_running; static struct _starpu_spinlock ListLock; static starpu_pthread_t thread_pending; static int thread_pipe[2]; static pthread_t master_thread; struct _starpu_tcpip_socket *tcpip_sock; /* a flag to note whether the socket is local socket*/ static int *local_flag; int _starpu_tcpip_mp_has_local() { for (int i=1; i<=nb_sink; i++) { if(local_flag[i] == 1) return 1; } return 0; } MULTILIST_CREATE_TYPE(_starpu_tcpip_ms_request, event); /*_starpu_tcpip_ms_request_multilist_event*/ MULTILIST_CREATE_TYPE(_starpu_tcpip_ms_request, thread); /*_starpu_tcpip_ms_request_multilist_thread*/ MULTILIST_CREATE_TYPE(_starpu_tcpip_ms_request, pending); /*_starpu_tcpip_ms_request_multilist_pending*/ struct _starpu_tcpip_ms_request { /*member of list of event*/ struct _starpu_tcpip_ms_request_multilist_event event; /*member of list of thread for async send/receive*/ struct _starpu_tcpip_ms_request_multilist_thread thread; /*member of list of pending for except in select*/ struct _starpu_tcpip_ms_request_multilist_pending pending; /*the struct of remote socket to send/receive message*/ struct _starpu_tcpip_socket *remote_sock; /*the message to send/receive*/ char* buf; /*the length of message*/ int len; /*a flag to detect whether the operation is completed*/ int flag_completed; /*a semaphore to detect whether the request is completed*/ starpu_sem_t sem_wait_request; /*a flag to detect send or receive*/ int is_sender; /*the length of message that has been sent/wrote*/ int offset; /*active the flag MSG_ZEROCOPY*/ int zerocopy; /*record the count at the end of send*/ uint32_t send_end; }; MULTILIST_CREATE_INLINES(struct _starpu_tcpip_ms_request, _starpu_tcpip_ms_request, event); MULTILIST_CREATE_INLINES(struct _starpu_tcpip_ms_request, _starpu_tcpip_ms_request, thread); MULTILIST_CREATE_INLINES(struct _starpu_tcpip_ms_request, _starpu_tcpip_ms_request, pending); static struct _starpu_tcpip_ms_request_multilist_thread thread_list; struct _starpu_tcpip_ms_async_event { int is_sender; struct _starpu_tcpip_ms_request_multilist_event *requests; }; static inline struct _starpu_tcpip_ms_async_event *_starpu_tcpip_ms_async_event(union _starpu_async_channel_event *_event) { struct _starpu_tcpip_ms_async_event *event; STARPU_STATIC_ASSERT(sizeof(*event) <= sizeof(*_event)); event = (void *) _event; return event; } /*hash table struct*/ struct _starpu_tcpip_req_pending { int remote_sock; struct _starpu_tcpip_ms_request_multilist_thread send_list; struct _starpu_tcpip_ms_request_multilist_thread recv_list; struct _starpu_tcpip_ms_request_multilist_pending pending_list; UT_hash_handle hh; }; //function thread static void * _starpu_tcpip_thread_pending(void *foo STARPU_ATTRIBUTE_UNUSED) { fd_set reads; fd_set writes; int fdmax=0; struct _starpu_tcpip_req_pending *pending_tables = NULL; struct _starpu_tcpip_req_pending *table, *tmp; FD_ZERO(&reads); FD_ZERO(&writes); FD_SET(thread_pipe[0], &reads); fd_set reads2; fd_set writes2; fdmax = thread_pipe[0]; while(is_running) { _SELECT_PRINT("in while\n"); reads2 = reads; writes2 = writes; int ret; ret=select(fdmax+1, &reads2, &writes2, NULL, NULL); STARPU_ASSERT(ret>=0); if(FD_ISSET(thread_pipe[0], &reads2)) { char buf[16]; int n=read(thread_pipe[0], buf, sizeof(buf)); STARPU_ASSERT(n>=0); if(!is_running) break; int i; for(i=0; iremote_sock->async_sock; int is_sender = req_thread->is_sender; HASH_FIND_INT(pending_tables, &remote_sock, table); if(table == NULL) { _STARPU_MALLOC(table, sizeof(*table)); table->remote_sock = remote_sock; _starpu_tcpip_ms_request_multilist_head_init_thread(&table->send_list); _starpu_tcpip_ms_request_multilist_head_init_thread(&table->recv_list); _starpu_tcpip_ms_request_multilist_head_init_pending(&table->pending_list); HASH_ADD_INT(pending_tables, remote_sock, table); } if(is_sender) { _starpu_tcpip_ms_request_multilist_push_back_thread(&table->send_list, req_thread); FD_SET(remote_sock, &writes); } else { _starpu_tcpip_ms_request_multilist_push_back_thread(&table->recv_list, req_thread); FD_SET(remote_sock, &reads); } if(remote_sock > fdmax) fdmax=remote_sock; } } HASH_ITER(hh, pending_tables, table, tmp) { int remote_sock = table->remote_sock; _SELECT_PRINT("remote_sock in loop is %d\n", remote_sock); void socket_action(what_t what, const char * whatstr, struct _starpu_tcpip_ms_request_multilist_thread *list, fd_set * fdset) { struct _starpu_tcpip_ms_request * req = _starpu_tcpip_ms_request_multilist_begin_thread(list); char* msg = req->buf; int len = req->len; int res = 0; res = what(remote_sock, msg+req->offset, len-req->offset); _SELECT_PRINT("%s res is %d\n", whatstr, res); STARPU_ASSERT_MSG(res > 0, "TCP/IP Master/Slave cannot %s a msg asynchronous with a size of %d Bytes!, the result of %s is %d, the error is %s ", whatstr, len, whatstr, res, strerror(errno)); req->offset+=res; _SELECT_PRINT("offset after %s is %d\n", whatstr, req->offset); if(req->offset == len) { _starpu_tcpip_ms_request_multilist_erase_thread(list, req); if(_starpu_tcpip_ms_request_multilist_empty_thread(list)) FD_CLR(remote_sock, fdset); req->flag_completed = 1; starpu_sem_post(&req->sem_wait_request); /*send the signal that message is ready */ struct _starpu_mp_node *node = NULL; _starpu_tcpip_common_signal(node); } } if(FD_ISSET(remote_sock, &writes2)) { #ifdef SO_ZEROCOPY struct pollfd pfd; pfd.fd = remote_sock; pfd.events = POLLERR|POLLOUT; pfd.revents = 0; if(poll(&pfd, 1, -1) <= 0) error(1, errno, "poll"); if(pfd.revents & POLLERR) { struct _starpu_tcpip_ms_request * req_pending = _starpu_tcpip_ms_request_multilist_begin_pending(&table->pending_list); _ZC_PRINT("nbsend is %d\n", req_pending->remote_sock->nbsend); struct sock_extended_err *serr; struct msghdr mg = {}; struct cmsghdr *cm; uint32_t hi, lo; char control[100]; mg.msg_control = control; mg.msg_controllen = sizeof(control); _ZC_PRINT("before recvmsg\n"); int r = recvmsg(remote_sock, &mg, MSG_ERRQUEUE); // if (r == -1 && errno == EAGAIN) // continue; if (r == -1) error(1, errno, "recvmsg notification"); if (mg.msg_flags & MSG_CTRUNC) error(1, errno, "recvmsg notification: truncated"); cm = CMSG_FIRSTHDR(&mg); if (!cm) error(1, 0, "cmsg: no cmsg"); serr = (void *) CMSG_DATA(cm); if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) error(1, 0, "serr: wrong origin: %u", serr->ee_origin); if (serr->ee_errno != 0) error(1, 0, "serr: wrong error code: %u", serr->ee_errno); if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) req_pending->zerocopy = 0; hi = serr->ee_data; lo = serr->ee_info; _ZC_PRINT("h=%u l=%u\n", hi, lo); STARPU_ASSERT(lo == req_pending->remote_sock->nback); STARPU_ASSERT(hi < req_pending->remote_sock->nbsend); req_pending->remote_sock->nback = hi+1; _ZC_PRINT("send end is %d\n", req_pending->send_end); while(!_starpu_tcpip_ms_request_multilist_empty_pending(&table->pending_list)) { struct _starpu_tcpip_ms_request * req_tmp = _starpu_tcpip_ms_request_multilist_begin_pending(&table->pending_list); if(hi+1 >= req_tmp->send_end) { _starpu_tcpip_ms_request_multilist_erase_pending(&table->pending_list, req_tmp); if(_starpu_tcpip_ms_request_multilist_empty_thread(&table->send_list)&&_starpu_tcpip_ms_request_multilist_empty_pending(&table->pending_list)) FD_CLR(remote_sock, &writes); req_tmp->flag_completed = 1; starpu_sem_post(&req_tmp->sem_wait_request); /*send the signal that message is ready*/ struct _starpu_mp_node *node = NULL; _starpu_tcpip_common_signal(node); } else break; } } else { if(!(_starpu_tcpip_ms_request_multilist_empty_thread(&table->send_list))) { struct _starpu_tcpip_ms_request * req = _starpu_tcpip_ms_request_multilist_begin_thread(&table->send_list); char* msg = req->buf; int len = req->len; if(req->remote_sock->zerocopy) { _ZC_PRINT("msg len is %d\n", len); _ZC_PRINT("offset before send is %d\n", req->offset); if(req->offset == 0) { _starpu_tcpip_ms_request_multilist_push_back_pending(&table->pending_list, req); } int res = send(remote_sock, msg+req->offset, len-req->offset, MSG_ZEROCOPY); _ZC_PRINT("send return %d\n", res); STARPU_ASSERT_MSG(res > 0, "TCP/IP Master/Slave cannot send a msg asynchronous with a size of %d Bytes!, the result of send is %d, the error is %s ", len, res, strerror(errno)); req->remote_sock->nbsend++; req->offset+=res; _ZC_PRINT("offset after send is %d\n", req->offset); if(req->offset == len) { req->send_end = req->remote_sock->nbsend; _ZC_PRINT("send end after send is %d\n", req->send_end); _starpu_tcpip_ms_request_multilist_erase_thread(&table->send_list, req); //if(_starpu_tcpip_ms_request_multilist_empty_thread(&table->send_list)) //we need this to check whether the msg are all sent, we would have to remove POLLOUT from poll.events //FD_CLR(remote_sock, &writes); } } else #endif { socket_action((what_t)write, "write", &table->send_list, &writes); } #ifdef SO_ZEROCOPY } } #endif } if(FD_ISSET(remote_sock, &reads2)) { socket_action(read, "read", &table->recv_list, &reads); } /*if the recv/send_list is empty, delete and free hash table*/ if(_starpu_tcpip_ms_request_multilist_empty_thread(&table->send_list)&&_starpu_tcpip_ms_request_multilist_empty_thread(&table->recv_list)&&_starpu_tcpip_ms_request_multilist_empty_pending(&table->pending_list)) { HASH_DEL(pending_tables, table); free(table); } } } /*all hash tables should be deleted*/ STARPU_ASSERT(pending_tables == NULL); return 0; } static void handler(int num STARPU_ATTRIBUTE_UNUSED){} int _starpu_tcpip_common_mp_init() { //Here we supposed the programmer called two times starpu_init. if (tcpip_initialized) return -ENODEV; /*get the slave number*/ nb_sink = starpu_getenv_number("STARPU_TCPIP_MS_SLAVES"); //_TCPIP_PRINT("the slave number is %d\n", nb_sink); if (nb_sink <= 0) /* No slave */ return 0; tcpip_initialized = 1; _starpu_tcpip_common_multiple_thread = starpu_getenv_number_default("STARPU_TCPIP_MS_MULTIPLE_THREAD", 0); master_thread = pthread_self(); signal(SIGUSR1, handler); /*initialize the pipe*/ int r=pipe(thread_pipe); STARPU_ASSERT(r==0); _starpu_spin_init(&ListLock); /*initialize the thread*/ _starpu_tcpip_ms_request_multilist_head_init_thread(&thread_list); STARPU_HG_DISABLE_CHECKING(is_running); is_running = 1; STARPU_PTHREAD_CREATE(&thread_pending, NULL, _starpu_tcpip_thread_pending, NULL); /*get host info*/ host_port = starpu_getenv("STARPU_TCPIP_MS_MASTER"); _STARPU_CALLOC(tcpip_sock, nb_sink + 1, sizeof(struct _starpu_tcpip_socket)); _STARPU_MALLOC(local_flag, (nb_sink + 1)*sizeof(int)); struct sockaddr_in* sink_addr_list; _STARPU_MALLOC(sink_addr_list, (nb_sink + 1)*sizeof(struct sockaddr_in)); #if _TCPIP_DEBUG char clnt_ip[20]; #endif /*master part*/ if(!host_port) { int source_sock_init = 0; int local_sock = 0; struct sockaddr_un name; struct sockaddr_in source_addr_init; socklen_t source_addr_init_size = sizeof(source_addr_init); unsigned short port = starpu_getenv_number_default("STARPU_TCPIP_MS_PORT", 1234); int init_res = master_init(1, &source_sock_init, &local_sock, &source_addr_init, &source_addr_init_size, &name, htonl(INADDR_ANY), htons(port), 3*nb_sink); if(init_res != 0) return -1; _TCPIP_PRINT("source_sock_init is %d\n", source_sock_init); _TCPIP_PRINT("local_sock is %d\n", local_sock); tcpip_sock[0].sync_sock = -1; tcpip_sock[0].async_sock = -1; tcpip_sock[0].notif_sock = -1; tcpip_sock[0].zerocopy = -1; /*source socket is not local socket*/ if(local_sock == 0) local_flag[0] = 0; /*source socket is local socket*/ else local_flag[0] = 1; int i; /*connect each slave, generate sync socket*/ for (i=1; i<=nb_sink; i++) { int sink_sock; int local_sock_flag; int accept_res = master_accept(&sink_sock, source_sock_init, local_sock, NULL, &local_sock_flag); if(accept_res != 0) return -1; _TCPIP_PRINT("sink_sock is %d\n", sink_sock); tcpip_sock[i].sync_sock = sink_sock; local_flag[i] = local_sock_flag; } for (i=1; i<=nb_sink; i++) { /*write the id to slave*/ int id_sink = i; WRITE(tcpip_sock[i].sync_sock, &id_sink, sizeof(id_sink)); _TCPIP_PRINT("write to slave %d its index\n", id_sink); /*receive the slave address with the random allocated port number connect to other slaves*/ struct sockaddr_in buf_addr; READ(tcpip_sock[i].sync_sock, &buf_addr, sizeof(buf_addr)); sink_addr_list[i] = buf_addr; _TCPIP_PRINT("Message from slave (slave address) is , ip : %s, port : %d.\n", inet_ntop(AF_INET, &sink_addr_list[i].sin_addr, clnt_ip, sizeof(clnt_ip)), ntohs(sink_addr_list[i].sin_port)); } /*connect each slave, generate async socket and notif socket*/ for (i=1; i<=2*nb_sink; i++) { int sink_sock2; int zerocopy; int accept_res = master_accept(&sink_sock2, source_sock_init, local_sock, &zerocopy, NULL); if(accept_res != 0) return -1; int i_sink; /*get slave index*/ READ(sink_sock2, &i_sink, sizeof(i_sink)); _TCPIP_PRINT("the index received is %d, the index in loop is %d\n", i_sink, i); _TCPIP_PRINT("sink_sock2 is %d\n", sink_sock2); if(tcpip_sock[i_sink].async_sock == 0) { tcpip_sock[i_sink].async_sock = sink_sock2; tcpip_sock[i_sink].zerocopy = zerocopy; } else { STARPU_ASSERT(tcpip_sock[i_sink].notif_sock == 0); tcpip_sock[i_sink].notif_sock = sink_sock2; } } close(source_sock_init); if (starpu_getenv_number_default("STARPU_TCPIP_USE_LOCAL_SOCKET", 1) != 0) { close(local_sock); unlink(name.sun_path); } for(i=0; i<=nb_sink; i++) { _TCPIP_PRINT("sock_list[%d] in master part is %d\n", i, tcpip_sock[i].sync_sock); } for(i=0; i<=nb_sink; i++) { _TCPIP_PRINT("async_sock_list[%d] in master part is %d\n", i, tcpip_sock[i].async_sock); } for(i=0; i<=nb_sink; i++) { _TCPIP_PRINT("notif_sock_list[%d] in master part is %d\n", i, tcpip_sock[i].notif_sock); } /*write the address of one slave to another*/ int j; for (i=1; i<=nb_sink; i++) { for(j=1; jai_next) { int local_sock_flag; int connect_res; int try = 0; while(1) { connect_res = slave_connect(&source_sock, cur, &sink_addr, NULL, NULL, &local_sock_flag); if (connect_res == 0) break; if (errno != ECONNREFUSED || try++ >= 10) break; sleep(1); } if(connect_res == 1) continue; else if(connect_res < 0) return -1; _TCPIP_PRINT("source_sock is %d\n", source_sock); tcpip_sock[0].sync_sock = source_sock; local_flag[0] = local_sock_flag; break; } freeaddrinfo(res); if (!cur) { fprintf(stderr, "could not connect\n"); return -1; } /*****************************connection between slaves********************************/ /*get slave index in master sock_list*/ READ(source_sock, &index_sink, sizeof(index_sink)); tcpip_sock[index_sink].sync_sock = -1; tcpip_sock[index_sink].async_sock = -1; tcpip_sock[index_sink].notif_sock = -1; tcpip_sock[index_sink].zerocopy = -1; _TCPIP_PRINT("index_sink read from master is %d\n", index_sink); int sink_serv_sock = 0; int sink_local_sock = 0; struct sockaddr_un sink_name; struct sockaddr_in sink_serv_addr; socklen_t sink_serv_addr_size = sizeof(sink_serv_addr); int init_res = master_init(0, &sink_serv_sock, &sink_local_sock, &sink_serv_addr, &sink_serv_addr_size, &sink_name, sink_addr.sin_addr.s_addr, 0, 3*(nb_sink-index_sink)); if(init_res != 0) return -1; _TCPIP_PRINT("sink_serv_sock is %d\n", sink_serv_sock); _TCPIP_PRINT("sink_local_sock is %d\n", sink_local_sock); /*sink serv socket is not local socket*/ if(sink_local_sock == 0) local_flag[index_sink] = 0; /*sink serv socket is local socket*/ else local_flag[index_sink] = 1; /*send slave address to master*/ WRITE(source_sock, &sink_serv_addr, sink_serv_addr_size); /*async and notif communication*/ int source_async_sock; int source_notif_sock; struct addrinfo *res1,*cur1; struct addrinfo hints1; memset(&hints1, 0, sizeof(hints1)); hints1.ai_socktype = SOCK_STREAM; int gaierrno1 = getaddrinfo(host, port, &hints1, &res1); if (gaierrno1) { fprintf(stderr,"getaddrinfo: %s\n", gai_strerror(gaierrno1)); return -1; } for(cur1 = res1; cur1; cur1 = cur1->ai_next) { /*async connect*/ int zerocopy; int connect_res = slave_connect(&source_async_sock, cur1, NULL, NULL, &zerocopy, NULL); if(connect_res == 1) continue; else if(connect_res < 0) return -1; _TCPIP_PRINT("source_async_sock is %d\n", source_async_sock); tcpip_sock[0].async_sock = source_async_sock; tcpip_sock[0].zerocopy = zerocopy; /*notif connect*/ int connect_notif_res = slave_connect(&source_notif_sock, cur1, NULL, NULL, NULL, NULL); if(connect_notif_res == 1) continue; else if(connect_notif_res < 0) { close(source_async_sock); return -1; } _TCPIP_PRINT("source_notif_sock is %d\n", source_notif_sock); tcpip_sock[0].notif_sock = source_notif_sock; break; } freeaddrinfo(res1); if (!cur1) { fprintf(stderr, "could not connect async\n"); return -1; } /*send slave index to master async socket*/ WRITE(source_async_sock, &index_sink, sizeof(index_sink)); /*send slave index to master notif socket*/ WRITE(source_notif_sock, &index_sink, sizeof(index_sink)); /*communication between slaves*/ int j; /*the active part*/ for (j=1; jtopology; int ntcpipcores = starpu_getenv_number("STARPU_NTCPIPMSTHREADS"); if (ntcpipcores == -1) { int nhyperthreads = topology->nhwpus / topology->nhwworker[STARPU_CPU_WORKER][0]; node->nb_cores = topology->nusedpus / nhyperthreads; } else node->nb_cores = ntcpipcores; } int _starpu_tcpip_common_recv_is_ready(const struct _starpu_mp_node *mp_node) { fd_set set; int fd = mp_node->mp_connection.tcpip_mp_connection->sync_sock; int res; struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; FD_ZERO(&set); FD_SET(fd, &set); while((res = select(fd+1, &set, NULL, NULL, &tv)) == -1 && errno == EINTR); STARPU_ASSERT_MSG(res >= 0, "There is an error when doing socket select %s %d\n", strerror(errno), errno); return res; } int _starpu_tcpip_common_notif_recv_is_ready(const struct _starpu_mp_node *mp_node) { fd_set set; int fd = mp_node->mp_connection.tcpip_mp_connection->notif_sock; int res; struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; FD_ZERO(&set); FD_SET(fd, &set); while((res = select(fd+1, &set, NULL, NULL, &tv)) == -1 && errno == EINTR); STARPU_ASSERT_MSG(res >= 0, "There is an error when doing socket select %s %d\n", strerror(errno), errno); return res; } int _starpu_tcpip_common_notif_send_is_ready(const struct _starpu_mp_node *mp_node) { fd_set set; int fd = mp_node->mp_connection.tcpip_mp_connection->notif_sock; int res; struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; FD_ZERO(&set); FD_SET(fd, &set); while((res = select(fd+1, NULL, &set, NULL, &tv)) == -1 && errno == EINTR); STARPU_ASSERT_MSG(res >= 0, "There is an error when doing socket select %s %d\n", strerror(errno), errno); return res; } void _starpu_tcpip_common_wait(struct _starpu_mp_node *mp_node) { fd_set reads; fd_set writes; int fd_sync = mp_node->mp_connection.tcpip_mp_connection->sync_sock; int fd_notif = mp_node->mp_connection.tcpip_mp_connection->notif_sock; int fd_max = 0; int res; FD_ZERO(&reads); FD_ZERO(&writes); FD_SET(fd_sync, &reads); if(fd_sync > fd_max) fd_max = fd_sync; sigset_t sigmask; sigemptyset(&sigmask); STARPU_PTHREAD_MUTEX_LOCK(&mp_node->message_queue_mutex); if(!mp_message_list_empty(&mp_node->message_queue) || !_starpu_mp_event_list_empty(&mp_node->event_queue)) { FD_SET(fd_notif, &writes); if(fd_notif > fd_max) fd_max = fd_notif; } STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->message_queue_mutex); res = pselect(fd_max+1, &reads, &writes, NULL, NULL, &sigmask); if(res < 0) STARPU_ASSERT_MSG(errno == EINTR, "There is an error when doing socket pselect %s %d\n", strerror(errno), errno); } void _starpu_tcpip_common_signal(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED) { int res; res = pthread_kill(master_thread, SIGUSR1); STARPU_ASSERT(res == 0); } static void __starpu_tcpip_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif); static void __starpu_tcpip_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif); static void _starpu_tcpip_common_action_socket(what_t what, const char * whatstr, int is_sender, const struct _starpu_mp_node *node, struct _starpu_tcpip_socket *remote_sock, void *msg, int len, void * event, int notif); static void _starpu_tcpip_common_send_to_socket(const struct _starpu_mp_node *node, struct _starpu_tcpip_socket *dst_sock, void *msg, int len, void * event, int notif); static void _starpu_tcpip_common_recv_from_socket(const struct _starpu_mp_node *node, struct _starpu_tcpip_socket *src_sock, void *msg, int len, void * event, int notif); /* SEND */ void _starpu_tcpip_common_mp_send(const struct _starpu_mp_node *node, void *msg, int len) { __starpu_tcpip_common_send(node, msg, len, NULL, 0); } void _starpu_tcpip_common_nt_send(const struct _starpu_mp_node *node, void *msg, int len) { __starpu_tcpip_common_send(node, msg, len, NULL, 1); } /* SEND to source node */ void _starpu_tcpip_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event) { __starpu_tcpip_common_send(node, msg, len, event, 0); } static void __starpu_tcpip_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif) { _starpu_tcpip_common_send_to_socket(node, node->mp_connection.tcpip_mp_connection, msg, len, event, notif); } /* SEND to any node */ void _starpu_tcpip_common_send_to_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int devid, void *msg, int len, void * event) { struct _starpu_tcpip_socket *dst_sock = &tcpip_sock[devid]; _starpu_tcpip_common_send_to_socket(node, dst_sock, msg, len, event, 0); } static void _starpu_tcpip_common_send_to_socket(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_tcpip_socket *dst_sock, void *msg, int len, void * event, int notif) { _starpu_tcpip_common_action_socket((what_t)write, "send", 1, node, dst_sock, msg, len, event, notif); } /* RECV */ void _starpu_tcpip_common_mp_recv(const struct _starpu_mp_node *node, void *msg, int len) { __starpu_tcpip_common_recv(node, msg, len, NULL, 0); } void _starpu_tcpip_common_nt_recv(const struct _starpu_mp_node *node, void *msg, int len) { __starpu_tcpip_common_recv(node, msg, len, NULL, 1); } void _starpu_tcpip_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event) { __starpu_tcpip_common_recv(node, msg, len, event, 0); } /* RECV from source node */ static void __starpu_tcpip_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event, int notif) { _starpu_tcpip_common_recv_from_socket(node, node->mp_connection.tcpip_mp_connection, msg, len, event, notif); } /* RECV from any node */ void _starpu_tcpip_common_recv_from_device(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, int devid, void *msg, int len, void * event) { struct _starpu_tcpip_socket *src_sock = &tcpip_sock[devid]; _starpu_tcpip_common_recv_from_socket(node, src_sock, msg, len, event, 0); } static void _starpu_tcpip_common_recv_from_socket(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_tcpip_socket *src_sock, void *msg, int len, void * event, int notif) { _starpu_tcpip_common_action_socket(read, "recv", 0, node, src_sock, msg, len, event, notif); } /*do refactor for SEND to and RECV from socket */ static void _starpu_tcpip_common_action_socket(what_t what, const char * whatstr, int is_sender, const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_tcpip_socket *remote_sock, void *msg, int len, void * event, int notif) { if (event) { _TCPIP_PRINT("async %s\n", whatstr); _TCPIP_PRINT("%s %d bytes to %d message %x\n", whatstr, len, remote_sock->async_sock, *((int *) (uintptr_t)msg)); /* Asynchronous*/ struct _starpu_async_channel * channel = event; struct _starpu_tcpip_ms_async_event *tcpip_ms_event = _starpu_tcpip_ms_async_event(&channel->event); tcpip_ms_event->is_sender = is_sender; /* call by sink, we need to initialize some parts, for host it's done in data_request.c */ if (channel->node_ops == NULL) tcpip_ms_event->requests = NULL; /* Initialize the list */ if (tcpip_ms_event->requests == NULL) { _STARPU_MALLOC(tcpip_ms_event->requests, sizeof(*tcpip_ms_event->requests)); _starpu_tcpip_ms_request_multilist_head_init_event(tcpip_ms_event->requests); } struct _starpu_tcpip_ms_request *req; _STARPU_MALLOC(req, sizeof(*req)); _starpu_tcpip_ms_request_multilist_init_thread(req); _starpu_tcpip_ms_request_multilist_init_event(req); _starpu_tcpip_ms_request_multilist_init_pending(req); #ifdef STARPU_SANITIZE_ADDRESS /* Poke data immediately, to get a good backtrace where bogus * pointers come from */ if (is_sender) { char *c = malloc(len); memcpy(c, msg, len); free(c); } else memset(msg, 0, len); #endif /*complete the fields*/ req->remote_sock = remote_sock; req->len = len; req->buf = msg; req->flag_completed = 0; STARPU_HG_DISABLE_CHECKING(req->flag_completed); starpu_sem_init(&req->sem_wait_request, 0, 0); req->is_sender = is_sender; req->offset = 0; req->send_end = 0; _SELECT_PRINT("%s push back\n", whatstr); _starpu_spin_lock(&ListLock); _starpu_tcpip_ms_request_multilist_push_back_thread(&thread_list, req); _starpu_spin_unlock(&ListLock); char buf = 0; int res; while((res = write(thread_pipe[1], &buf, 1)) == -1 && errno == EINTR) ; channel->starpu_mp_common_finished_receiver++; channel->starpu_mp_common_finished_sender++; _starpu_tcpip_ms_request_multilist_push_back_event(tcpip_ms_event->requests, req); } else { _TCPIP_PRINT("sync %s\n", whatstr); /* Synchronous send */ if(!notif) { _TCPIP_PRINT("dst_sock is %d\n", remote_sock->sync_sock); int res, offset = 0; while(offset < len) { while((res = what(remote_sock->sync_sock, (char*)msg+offset, len-offset)) == -1 && errno == EINTR) ; _TCPIP_PRINT("msg after write is %x, res is %d\n", *((int *) (uintptr_t)msg), res); STARPU_ASSERT_MSG(res != 0 && !(res == -1 && errno == ECONNRESET), "TCP/IP Master/Slave noticed that %s (peer %d) has exited unexpectedly", node->kind == STARPU_NODE_TCPIP_SOURCE ? "the master" : "some slave", node->peer_id); STARPU_ASSERT_MSG(res > 0, "TCP/IP Master/Slave cannot %s a msg synchronous with a size of %d Bytes!, the result of %s is %d, the error is %s ", whatstr, len, whatstr, res, strerror(errno)); offset+=res; } } else { _TCPIP_PRINT("dst_sock is %d\n", remote_sock->notif_sock); int res, offset = 0; while(offset < len) { while((res = what(remote_sock->notif_sock, (char*)msg+offset, len-offset)) == -1 && errno == EINTR) ; _TCPIP_PRINT("msg after write is %x, res is %d\n", *((int *) (uintptr_t)msg), res); STARPU_ASSERT_MSG(res != 0 && !(res == -1 && errno == ECONNRESET), "TCP/IP Master/Slave noticed that %s (peer %d) has exited unexpectedly", node->kind == STARPU_NODE_TCPIP_SOURCE ? "the master" : "some slave", node->peer_id); STARPU_ASSERT_MSG(res > 0, "TCP/IP Master/Slave cannot %s a msg notification with a size of %d Bytes!, the result of %s is %d, the error is %s ", whatstr, len, whatstr, res, strerror(errno)); offset+=res; } } _TCPIP_PRINT("finish sync send\n"); } } static void _starpu_tcpip_common_polling_node(struct _starpu_mp_node * node) { /* poll the asynchronous messages.*/ if (node != NULL) { STARPU_PTHREAD_MUTEX_LOCK(&node->connection_mutex); while(node->nt_recv_is_ready(node)) { enum _starpu_mp_command answer; void *arg; int arg_size; //_TCPIP_PRINT("polling_node\n"); answer = _starpu_nt_common_recv_command(node, &arg, &arg_size); if(!_starpu_src_common_store_message(node,arg,arg_size,answer)) { _STARPU_ERROR("incorrect command '%s'", _starpu_mp_common_command_to_string(answer)); } } STARPU_PTHREAD_MUTEX_UNLOCK(&node->connection_mutex); } } /*do refactor for test event and wait request completion */ static unsigned int _starpu_tcpip_common_action_completion(int wait, struct _starpu_async_channel * event) { struct _starpu_tcpip_ms_async_event *tcpip_ms_event = _starpu_tcpip_ms_async_event(&event->event); if (tcpip_ms_event->requests != NULL) { struct _starpu_tcpip_ms_request * req; struct _starpu_tcpip_ms_request * req_next; //_TCPIP_PRINT("event requests is %p\n", req); for (req = _starpu_tcpip_ms_request_multilist_begin_event(tcpip_ms_event->requests); req != _starpu_tcpip_ms_request_multilist_end_event(tcpip_ms_event->requests); req = req_next) { req_next = _starpu_tcpip_ms_request_multilist_next_event(req); int flag = 0; if(!wait) flag = req->flag_completed; //_TCPIP_PRINT("the operation is finished? %d\n", flag); /*operation completed*/ if (flag || wait) { starpu_sem_wait(&req->sem_wait_request); _starpu_tcpip_ms_request_multilist_erase_event(tcpip_ms_event->requests, req); STARPU_HG_ENABLE_CHECKING(req->flag_completed); free(req); if (tcpip_ms_event->is_sender) event->starpu_mp_common_finished_sender--; else event->starpu_mp_common_finished_receiver--; //_TCPIP_PRINT("common finished sender is %d\n", event->starpu_mp_common_finished_sender); //_TCPIP_PRINT("common finished receiver is %d\n", event->starpu_mp_common_finished_receiver); } } /* When the list is empty, we finished to wait each request */ if (_starpu_tcpip_ms_request_multilist_empty_event(tcpip_ms_event->requests)) { /* Destroy the list */ free(tcpip_ms_event->requests); tcpip_ms_event->requests = NULL; } } //incoming ack from devices int i = 0; while((!wait && i++ == 0)||(wait && event->starpu_mp_common_finished_sender > 0) || (wait && event->starpu_mp_common_finished_receiver > 0)) { _starpu_tcpip_common_polling_node(event->polling_node_sender); _starpu_tcpip_common_polling_node(event->polling_node_receiver); } if(!wait) return !event->starpu_mp_common_finished_sender && !event->starpu_mp_common_finished_receiver; else return 0; } /* - In device to device communications, the first ack received by host * is considered as the sender (but it cannot be, in fact, the sender) */ unsigned int _starpu_tcpip_common_test_event(struct _starpu_async_channel * event) { return _starpu_tcpip_common_action_completion(0, event); } /* - In device to device communications, the first ack received by host * is considered as the sender (but it cannot be, in fact, the sender) */ /* Only used at starpu_shutdown */ void _starpu_tcpip_common_wait_request_completion(struct _starpu_async_channel * event) { _starpu_tcpip_common_action_completion(1, event); } void _starpu_tcpip_common_barrier(void) { char buf = 0; //_TCPIP_PRINT("index_sink (in common barrier) is %d\n", index_sink); int ret; /*master part*/ if(index_sink == 0) { int i; for(i=1; i 0, "Cannot read from slave!"); } for(i=1; i 0, "Cannot write to slave!"); } } /*slave part*/ else { //_TCPIP_PRINT("master socket in sock list is %d\n", sock_list[0]); ret=write(tcpip_sock[0].sync_sock, &buf, 1); //printf("ret1 is %d\n", ret); STARPU_ASSERT_MSG(ret > 0, "Cannot write to master!"); ret=read(tcpip_sock[0].sync_sock, &buf, 1); //printf("ret4 is %d\n", ret); STARPU_ASSERT_MSG(ret > 0, "Cannot read from master!"); } _TCPIP_PRINT("finish common barrier\n"); } /* Compute bandwidth and latency between source and sink nodes * Source node has to have the entire set of times at the end */ void _starpu_tcpip_common_measure_bandwidth_latency(double timing_dtod[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS], double latency_dtod[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS]) { int ret; unsigned iter; //_TCPIP_PRINT("index_sink is %d\n", index_sink); char * buf; _STARPU_MALLOC(buf, SIZE_BANDWIDTH); memset(buf, 0, SIZE_BANDWIDTH); _starpu_tcpip_common_mp_init(); int sender, receiver; for(sender = 0; sender < nb_sink+1; sender++) { for(receiver = 0; receiver < nb_sink+1; receiver++) { //Node can't be a sender and a receiver if(sender == receiver) continue; if (!index_sink) _STARPU_DISP("measuring from %d to %d\n", sender, receiver); _starpu_tcpip_common_barrier(); // _TCPIP_PRINT("sender id is %d\n", sender); // _TCPIP_PRINT("index_sink is %d\n", index_sink); if(index_sink == sender) { //_TCPIP_PRINT("sender id is %d\n", sender); double start, end; /* measure bandwidth sender to receiver */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { ret = write(tcpip_sock[receiver].sync_sock, buf, SIZE_BANDWIDTH); STARPU_ASSERT_MSG(ret == SIZE_BANDWIDTH, "short write!"); STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); ret = read(tcpip_sock[receiver].sync_sock, buf, 1); STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); } end = starpu_timing_now(); timing_dtod[sender][receiver] = (end - start)/NITER/SIZE_BANDWIDTH; /* measure latency sender to receiver */ start = starpu_timing_now(); for (iter = 0; iter < NITER; iter++) { ret = write(tcpip_sock[receiver].sync_sock, buf, 1); STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); ret = read(tcpip_sock[receiver].sync_sock, buf, 1); STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); } end = starpu_timing_now(); latency_dtod[sender][receiver] = (end - start)/NITER/2; } // _TCPIP_PRINT("receiver id is %d\n", receiver); // _TCPIP_PRINT("index_sink is %d\n", index_sink); if (index_sink == receiver) { //_TCPIP_PRINT("receiver id is %d\n", receiver); /* measure bandwidth sender to receiver*/ for (iter = 0; iter < NITER; iter++) { size_t pending = SIZE_BANDWIDTH; while (pending) { ret = read(tcpip_sock[sender].sync_sock, buf, SIZE_BANDWIDTH); STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); pending -= ret; } ret = write(tcpip_sock[sender].sync_sock, buf, 1); STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); } /* measure latency sender to receiver */ for (iter = 0; iter < NITER; iter++) { ret = read(tcpip_sock[sender].sync_sock, buf, 1); STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); ret = write(tcpip_sock[sender].sync_sock, buf, 1); STARPU_ASSERT_MSG(ret > 0, "Bandwidth of TCP/IP Master/Slave cannot be measured !"); } } } /* When a sender finished its work, it has to send its results to the master */ /* Master doesn't need to send to itself its data */ if (sender == 0) goto print; /* if we are the sender, we send the data */ if (sender == index_sink) { write(tcpip_sock[0].sync_sock, timing_dtod[sender], sizeof(timing_dtod[sender])); write(tcpip_sock[0].sync_sock, latency_dtod[sender], sizeof(latency_dtod[sender])); } /* the master node receives the data */ if (index_sink == 0) { read(tcpip_sock[sender].sync_sock, timing_dtod[sender], sizeof(timing_dtod[sender])); read(tcpip_sock[sender].sync_sock, latency_dtod[sender], sizeof(latency_dtod[sender])); } print: if (index_sink == 0) { for(receiver = 0; receiver < nb_sink+1; receiver++) { if(sender == receiver) continue; _STARPU_DISP("BANDWIDTH %d -> %d %.0fMB/s %.2fus\n", sender, receiver, 1/timing_dtod[sender][receiver], latency_dtod[sender][receiver]); } } } free(buf); } starpu-1.4.9+dfsg/src/drivers/tcpip/driver_tcpip_common.h000066400000000000000000000066301507764646700236140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_TCPIP_COMMON_H__ #define __DRIVER_TCPIP_COMMON_H__ /** @file */ #include #include #pragma GCC visibility push(hidden) #ifdef STARPU_USE_TCPIP_MASTER_SLAVE extern int _starpu_tcpip_common_multiple_thread; struct _starpu_tcpip_socket { /* socket used for synchronous communications*/ int sync_sock; /* socket used for asynchronous communications*/ int async_sock; /* socket used for notification communications*/ int notif_sock; /* a flag to detect whether the socket can be used for MSG_ZEROCOPY */ int zerocopy; /* how many times is this message split up to send */ unsigned nbsend; unsigned nback; }; extern struct _starpu_tcpip_socket *tcpip_sock; int _starpu_tcpip_mp_has_local(); int _starpu_tcpip_common_mp_init(); void _starpu_tcpip_common_mp_deinit(); int _starpu_tcpip_common_is_src_node(); int _starpu_tcpip_common_get_src_node(); int _starpu_tcpip_common_is_mp_initialized(); int _starpu_tcpip_common_recv_is_ready(const struct _starpu_mp_node *mp_node); int _starpu_tcpip_common_notif_recv_is_ready(const struct _starpu_mp_node *mp_node); int _starpu_tcpip_common_notif_send_is_ready(const struct _starpu_mp_node *mp_node); void _starpu_tcpip_common_wait(struct _starpu_mp_node *mp_node); void _starpu_tcpip_common_signal(const struct _starpu_mp_node *mp_node); void _starpu_tcpip_common_mp_initialize_src_sink(struct _starpu_mp_node *node); void _starpu_tcpip_common_send(const struct _starpu_mp_node *node, void *msg, int len, void * event); void _starpu_tcpip_common_recv(const struct _starpu_mp_node *node, void *msg, int len, void * event); void _starpu_tcpip_common_mp_send(const struct _starpu_mp_node *node, void *msg, int len); void _starpu_tcpip_common_mp_recv(const struct _starpu_mp_node *node, void *msg, int len); void _starpu_tcpip_common_nt_send(const struct _starpu_mp_node *node, void *msg, int len); void _starpu_tcpip_common_nt_recv(const struct _starpu_mp_node *node, void *msg, int len); void _starpu_tcpip_common_recv_from_device(const struct _starpu_mp_node *node, int devid, void *msg, int len, void * event); void _starpu_tcpip_common_send_to_device(const struct _starpu_mp_node *node, int devid, void *msg, int len, void * event); unsigned int _starpu_tcpip_common_test_event(struct _starpu_async_channel * event); void _starpu_tcpip_common_wait_request_completion(struct _starpu_async_channel * event); void _starpu_tcpip_common_barrier(void); void _starpu_tcpip_common_measure_bandwidth_latency(double bandwidth_dtod[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS], double latency_dtod[STARPU_MAXTCPIPDEVS][STARPU_MAXTCPIPDEVS]); #endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ #pragma GCC visibility pop #endif /* __DRIVER_TCPIP_COMMON_H__ */ starpu-1.4.9+dfsg/src/drivers/tcpip/driver_tcpip_common_func.h000066400000000000000000000234621507764646700246310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define _TCPIP_DEBUG 0 #if _TCPIP_DEBUG # define _TCPIP_PRINT(...) printf(__VA_ARGS__) #else # define _TCPIP_PRINT(...) #endif #ifdef __linux__ #include #ifndef ENOTSUPP #define ENOTSUPP 524 #endif #endif enum errcase {SOCK_INIT, SOCK_GETADDRINFO, SOCK_GETADDRINFO_LOCAL}; #define SOCKET(domain, type, protocol, errcase) ({\ int sock = 0; \ sock = socket(domain, type, protocol); \ if(sock < 0) \ { \ if(errcase == SOCK_GETADDRINFO) \ { \ if (errno != EAFNOSUPPORT) /* do not raise exception if ipv6 is not available */ \ perror("fail to create socket"); \ return 1; \ } \ else if(errcase == SOCK_GETADDRINFO_LOCAL) \ { \ if (errno != EAFNOSUPPORT) /* do not raise exception if ipv6 is not available */ \ perror("fail to create socket"); \ return -1; \ } \ else \ { \ perror("fail to create socket"); \ return -1; \ } \ } \ sock; \ }) #define BIND(sockfd, addr, addrlen) ({ \ if(bind(sockfd, addr, addrlen) != 0) \ { \ perror("socket fails to bind"); \ return -1; \ } \ }) #define LISTEN(sockfd, backlog)({ \ if(listen(sockfd, backlog) != 0) \ { \ perror("socket fails to listen"); \ return -1; \ } \ }) #define ADDR_INIT(source_addr, source_port) ({ \ struct sockaddr_in sockaddr_init; \ memset(&sockaddr_init, 0, sizeof(sockaddr_init)); \ sockaddr_init.sin_family = AF_INET; \ sockaddr_init.sin_addr.s_addr = source_addr; \ sockaddr_init.sin_port = source_port; \ sockaddr_init; \ }) #define LOCAL_ADDR_INIT(source_addr_init) ({ \ struct sockaddr_un name; \ memset(&name, 0, sizeof(name)); \ name.sun_family = AF_UNIX; \ snprintf(name.sun_path, sizeof(name.sun_path) - 1, "/tmp/starpu-%d.socket", ntohs(source_addr_init.sin_port)); \ name; \ }) #define GETSOCKNAME(sockfd, addr, addrlen) ({ \ if(getsockname(sockfd, addr, addrlen) != 0) \ { \ perror("getsockname fail"); \ return -1; \ } \ }) #define GETPEERNAME(sockfd, addr, addrlen) ({ \ if(getpeername(sockfd, addr, addrlen) != 0) \ { \ perror("getpeername fail"); \ return -1; \ } \ }) #define ACCEPT(sockfd, addr, addrlen) ({ \ int sock; \ sock = accept(sockfd, addr, addrlen); \ if(sock < 0) \ { \ perror("fail to receive the request of slave"); \ return -1; \ } \ sock; \ }) #define CONNECT(sockfd, addr, addrlen, cur) ({ \ if (connect(sockfd, addr, addrlen) < 0) \ { \ int err = errno; \ perror("fail to connect socket"); \ close(sockfd); \ errno = err; \ if(cur) \ return 1; \ else \ return -1; \ } \ }) #define WRITE(fd, buf, count) ({ \ if(write(fd, buf, count) < 0) \ { \ perror("fail to send"); \ return -1; \ } \ }) #define READ(fd, buf, count) ({ \ if(read(fd, buf, count) < 0) \ { \ perror("fail to receive"); \ return -1; \ } \ }) #define SETSOCKOPT_ZEROCOPY(sockfd, optname) ({ \ int zc; \ int one = 1; \ int ret = setsockopt(sockfd, SOL_SOCKET, optname, &one, sizeof(one)); \ if (ret!=0) \ { \ if (errno != EOPNOTSUPP && errno != ENOPROTOOPT && errno != ENOTSUPP) \ perror("setsockopt zerocopy"); \ zc = 0; \ } \ else \ zc = 1; \ zc; \ }) /* This function contains all steps to initialize a socket before connect and accept steps. * When we call this function, we need to indicate that it is for master-slave (master = 1) * or slave-slave (master = 0). We also need to provide the information sin_addr "source_addr" * and sin_port "source_port" that we want to set to initialize the binding address and * the argument "backlog" for listen. It can generate a TCP/IP socket "ss" or a local socket "ls", * and the bound address "source_addr_init" with its size "source_addr_init_size". * For local socket, it also generates the bound address "local_name" linking a local path. */ static inline int master_init(int master, int *ss, int *ls, struct sockaddr_in *source_addr_init, socklen_t *source_addr_init_size, struct sockaddr_un *local_name, unsigned long source_addr, unsigned short source_port, int backlog) { /*TCPIP*/ *ss = SOCKET(AF_INET, SOCK_STREAM, 0, SOCK_INIT); struct sockaddr_in addr_init = ADDR_INIT(source_addr, source_port); socklen_t addr_init_size = sizeof(addr_init); if(master) { int one = 1; setsockopt(*ss, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); } BIND(*ss, (struct sockaddr*) &addr_init, addr_init_size); if(!master) { GETSOCKNAME(*ss, (struct sockaddr*) &addr_init, &addr_init_size); } LISTEN(*ss, backlog); *source_addr_init = addr_init; *source_addr_init_size = addr_init_size; /*local socket*/ if (starpu_getenv_number_default("STARPU_TCPIP_USE_LOCAL_SOCKET", 1) != 0) { *ls = SOCKET(AF_UNIX, SOCK_STREAM, 0, 0); *local_name = LOCAL_ADDR_INIT(addr_init); _TCPIP_PRINT("local socket name is %s\n", local_name->sun_path); unlink(local_name->sun_path); BIND(*ls, (const struct sockaddr *) &(*local_name), sizeof(*local_name)); LISTEN(*ls, backlog); } return 0; } /* Accept step. We provide the TCP/IP socket "source_sock" or local socket "local_sock" * which is ready to accept the connection request from the other side. It will generate * the socket of the other side "sink_sock". It will also show whether the zerocopy setting * is successful (zerocopy = 1) or not (zerocopy = 0). This setting is only for async communication. */ static inline int master_accept(int *sink_sock, int source_sock, int local_sock, int *zerocopy, int * local_sock_flag) { struct sockaddr_in sink_addr; socklen_t sink_addr_size = sizeof(sink_addr); *sink_sock = ACCEPT(source_sock, (struct sockaddr*)&sink_addr, &sink_addr_size); if (zerocopy != NULL) { #ifdef SO_ZEROCOPY *zerocopy = SETSOCKOPT_ZEROCOPY(*sink_sock, SO_ZEROCOPY); #else *zerocopy = 0; #endif } if (local_sock_flag != NULL) *local_sock_flag = 0; /*local socket*/ if (starpu_getenv_number_default("STARPU_TCPIP_USE_LOCAL_SOCKET", 1) != 0) { struct sockaddr_in boundAddr; socklen_t boundAddr_size = sizeof(boundAddr); GETSOCKNAME(*sink_sock, (struct sockaddr*) &boundAddr, &boundAddr_size); /*master and slave sides use the same ip address*/ if(boundAddr.sin_addr.s_addr == sink_addr.sin_addr.s_addr) { close(*sink_sock); *sink_sock = ACCEPT(local_sock, NULL, NULL); if (local_sock_flag != NULL) *local_sock_flag = 1; } if (zerocopy != NULL) { #ifdef SO_ZEROCOPY *zerocopy = SETSOCKOPT_ZEROCOPY(*sink_sock, SO_ZEROCOPY); #else *zerocopy = 0; #endif } } return 0; } /* Connect step. We provide the connection address for TCP/IP socket, either it is addrinfo "cur" got from * function getaddrinfo in master-salve mode, or it is "source_addr" in slave-slave mode. It will generate * the socket of the other side "source_sock", In the case that slave connects to master, we need to get * the address "source_addr" to which "source_sock" is bound. It will also show whether the zerocopy setting * is successful (zerocopy = 1) or not (zerocopy = 0). This setting is only for async communication. */ static inline int slave_connect(int *source_sock, struct addrinfo *cur, struct sockaddr_in *bound_addr, struct sockaddr_in *source_addr, int *zerocopy, int * local_sock_flag) { if(cur != NULL) { *source_sock = SOCKET(cur->ai_family, cur->ai_socktype, cur->ai_protocol, SOCK_GETADDRINFO); CONNECT(*source_sock, cur->ai_addr, cur->ai_addrlen, 1); } else { *source_sock = SOCKET(AF_INET, SOCK_STREAM, 0, SOCK_INIT); CONNECT(*source_sock, (struct sockaddr*)&(*source_addr), sizeof(*source_addr), 0); } if (zerocopy != NULL) { #ifdef SO_ZEROCOPY *zerocopy = SETSOCKOPT_ZEROCOPY(*source_sock, SO_ZEROCOPY); #else *zerocopy = 0; #endif } if (local_sock_flag != NULL) *local_sock_flag = 0; struct sockaddr_in boundAddr, peerAddr; socklen_t boundAddr_size = sizeof(boundAddr); socklen_t peerAddr_size = sizeof(peerAddr); GETSOCKNAME(*source_sock, (struct sockaddr*) &boundAddr, &boundAddr_size); GETPEERNAME(*source_sock, (struct sockaddr*) &peerAddr, &peerAddr_size); if(bound_addr != NULL) *bound_addr = boundAddr; /*local socket*/ if (starpu_getenv_number_default("STARPU_TCPIP_USE_LOCAL_SOCKET", 1) != 0) { /*master and slave sides use the same ip address*/ if(boundAddr.sin_addr.s_addr == peerAddr.sin_addr.s_addr) { close(*source_sock); if(cur != NULL) *source_sock = SOCKET(AF_UNIX, SOCK_STREAM, 0, SOCK_GETADDRINFO_LOCAL); else *source_sock = SOCKET(AF_UNIX, SOCK_STREAM, 0, SOCK_INIT); struct sockaddr_un local_name = LOCAL_ADDR_INIT(peerAddr); _TCPIP_PRINT("local socket name %s is got for sync connect\n", local_name.sun_path); CONNECT(*source_sock, (const struct sockaddr *) &local_name, sizeof(local_name), 0); if (local_sock_flag != NULL) *local_sock_flag = 1; } if (zerocopy != NULL) { #ifdef SO_ZEROCOPY *zerocopy = SETSOCKOPT_ZEROCOPY(*source_sock, SO_ZEROCOPY); #else *zerocopy = 0; #endif } } return 0; } starpu-1.4.9+dfsg/src/drivers/tcpip/driver_tcpip_init.c000066400000000000000000000030331507764646700232540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static struct _starpu_driver_info driver_info = { .name_upper = "TCPIP_MS", .name_var = "TCPIP_MS", .name_lower = "tcpip_ms", .memory_kind = STARPU_TCPIP_MS_RAM, .alpha = 1.0f, #ifdef STARPU_USE_TCPIP_MASTER_SLAVE .run_worker = _starpu_tcpip_src_worker, .init_worker_binding = _starpu_tcpip_init_worker_binding, .init_worker_memory = _starpu_tcpip_init_worker_memory, #endif }; static struct _starpu_memory_driver_info memory_driver_info = { .name_upper = "TCPIP_MS", .worker_archtype = STARPU_TCPIP_MS_WORKER, #ifdef STARPU_USE_TCPIP_MASTER_SLAVE .ops = &_starpu_driver_tcpip_ms_node_ops, #endif }; void _starpu_tcpip_ms_preinit(void) { _starpu_driver_info_register(STARPU_TCPIP_MS_WORKER, &driver_info); _starpu_memory_driver_info_register(STARPU_TCPIP_MS_RAM, &memory_driver_info); } starpu-1.4.9+dfsg/src/drivers/tcpip/driver_tcpip_sink.c000066400000000000000000000024611507764646700232610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "driver_tcpip_sink.h" #include "driver_tcpip_source.h" #include "driver_tcpip_common.h" void _starpu_tcpip_sink_init(struct _starpu_mp_node *node) { _starpu_tcpip_common_mp_initialize_src_sink(node); _STARPU_MALLOC(node->thread_table, sizeof(starpu_pthread_t)*node->nb_cores); sigset_t set; sigemptyset(&set); sigaddset(&set, SIGUSR1); pthread_sigmask(SIG_BLOCK, &set, NULL); //TODO } void _starpu_tcpip_sink_bind_thread(const struct _starpu_mp_node *mp_node, int coreid, int *core_table, int nb_core) { //TODO (void)mp_node; (void)coreid; (void)core_table; (void)nb_core; } starpu-1.4.9+dfsg/src/drivers/tcpip/driver_tcpip_sink.h000066400000000000000000000022571507764646700232710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_TCPIP_SINK_H__ #define __DRIVER_TCPIP_SINK_H__ /** @file */ #include #pragma GCC visibility push(hidden) #ifdef STARPU_USE_TCPIP_MASTER_SLAVE void _starpu_tcpip_sink_init(struct _starpu_mp_node *node); void _starpu_tcpip_sink_bind_thread(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, int coreid, int * core_table, int nb_core); #endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ #pragma GCC visibility pop #endif /* __DRIVER_TCPIP_SINK_H__ */ starpu-1.4.9+dfsg/src/drivers/tcpip/driver_tcpip_source.c000066400000000000000000000310311507764646700236100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #ifdef STARPU_USE_TCPIP_MASTER_SLAVE static unsigned tcpip_bindid_init[STARPU_MAXTCPIPDEVS] = { }; static unsigned tcpip_bindid[STARPU_MAXTCPIPDEVS]; static unsigned tcpip_memory_init[STARPU_MAXTCPIPDEVS] = { }; static unsigned tcpip_memory_nodes[STARPU_MAXTCPIPDEVS]; static struct _starpu_worker_set tcpip_worker_set[STARPU_MAXTCPIPDEVS]; #endif struct _starpu_mp_node *_starpu_tcpip_ms_src_get_actual_thread_mp_node() { struct _starpu_worker *actual_worker = _starpu_get_local_worker_key(); STARPU_ASSERT(actual_worker); int devid = actual_worker->devid; STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXTCPIPDEVS); return _starpu_src_nodes[STARPU_TCPIP_MS_WORKER][devid]; } static void __starpu_init_tcpip_config(struct _starpu_machine_topology * topology, struct _starpu_machine_config *config, unsigned tcpip_idx) { int nbcores; _starpu_src_common_sink_nbcores(_starpu_src_nodes[STARPU_TCPIP_MS_WORKER][tcpip_idx], &nbcores); STARPU_ASSERT(tcpip_idx < STARPU_NMAXDEVS); topology->nhwworker[STARPU_TCPIP_MS_WORKER][tcpip_idx] = nbcores; int ntcpipcores; ntcpipcores = starpu_getenv_number("STARPU_NTCPIPMSTHREADS"); _starpu_topology_check_ndevices(&ntcpipcores, nbcores, 0, INT_MAX, 0, "STARPU_NTCPIPMSTHREADS", "TCPIP cores", ""); tcpip_worker_set[tcpip_idx].workers = &config->workers[topology->nworkers]; tcpip_worker_set[tcpip_idx].nworkers = ntcpipcores; _starpu_src_nodes[STARPU_TCPIP_MS_WORKER][tcpip_idx]->baseworkerid = topology->nworkers; _starpu_topology_configure_workers(topology, config, STARPU_TCPIP_MS_WORKER, tcpip_idx, tcpip_idx, 0, 0, ntcpipcores, 1, &tcpip_worker_set[tcpip_idx], _starpu_tcpip_common_multiple_thread ? NULL : tcpip_worker_set); } /* Determine which devices we will use */ void _starpu_init_tcpip_config(struct _starpu_machine_topology *topology, struct _starpu_machine_config *config, struct starpu_conf *user_conf, int no_mp_config) { int i; /* Discover and configure the mp topology. That means: * - discover the number of mp nodes; * - initialize each discovered node; * - discover the local topology (number of PUs/devices) of each node; * - configure the workers accordingly. */ for (i = 0; i < (int) (sizeof(tcpip_worker_set)/sizeof(tcpip_worker_set[0])); i++) tcpip_worker_set[i].workers = NULL; int ntcpipms = user_conf->ntcpip_ms; if(ntcpipms != 0) { /* Discover and initialize the number of TCPIP nodes through the mp * infrastructure. */ unsigned nhwtcpipdevices = _starpu_tcpip_src_get_device_count(); if (ntcpipms == -1) /* Nothing was specified, so let's use the number of * detected tcpip devices. ! */ ntcpipms = nhwtcpipdevices; else { if ((unsigned) ntcpipms > nhwtcpipdevices) { /* The user requires more TCPIP devices than there is available */ _STARPU_MSG("# Warning: %d TCPIP Master-Slave devices requested. Only %u available.\n", ntcpipms, nhwtcpipdevices); ntcpipms = nhwtcpipdevices; } /*Let's make sure this value is OK.*/ if(ntcpipms > STARPU_MAXTCPIPDEVS) { _STARPU_DISP("# Warning: %d TCPIP Master-Slave devices requested. Only %u enabled. Use configure options --enable-maxtcpipdev=xxx to update the maximum value of supported TCPIP MS devices.\n", ntcpipms, STARPU_MAXTCPIPDEVS); ntcpipms = STARPU_MAXTCPIPDEVS; } } } topology->ndevices[STARPU_TCPIP_MS_WORKER] = ntcpipms; /* if user don't want to use TCPIP slaves, we close the slave processes */ if (no_mp_config && topology->ndevices[STARPU_TCPIP_MS_WORKER] == 0) { _starpu_tcpip_common_mp_deinit(); exit(0); } if (!no_mp_config) { for (i = 0; i < ntcpipms; i++) _starpu_src_nodes[STARPU_TCPIP_MS_WORKER][i] = _starpu_mp_common_node_create(STARPU_NODE_TCPIP_SOURCE, i); for (i = 0; i < ntcpipms; i++) __starpu_init_tcpip_config(topology, config, i); } } /*Bind the driver on a CPU core*/ void _starpu_tcpip_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { /* Perhaps the worker has some "favourite" bindings */ unsigned *preferred_binding = NULL; unsigned npreferred = 0; unsigned devid = workerarg->devid; if (tcpip_bindid_init[devid]) { } else { tcpip_bindid_init[devid] = 1; if (_starpu_tcpip_common_multiple_thread || devid == 0) tcpip_bindid[devid] = _starpu_get_next_bindid(config, STARPU_THREAD_ACTIVE, preferred_binding, npreferred); else tcpip_bindid[devid] = tcpip_bindid[0]; } workerarg->bindid = tcpip_bindid[devid]; } /*Set up memory and buses*/ void _starpu_tcpip_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg) { unsigned memory_node = -1; unsigned devid = workerarg->devid; unsigned numa, devid2; if (tcpip_memory_init[devid]) { memory_node = tcpip_memory_nodes[devid]; } else { tcpip_memory_init[devid] = 1; memory_node = tcpip_memory_nodes[devid] = _starpu_memory_node_register(STARPU_TCPIP_MS_RAM, devid); _starpu_memory_node_set_mapped(memory_node); for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) { _starpu_register_bus(numa, memory_node); _starpu_register_bus(memory_node, numa); } for (devid2 = 0; devid2 < STARPU_MAXTCPIPDEVS; devid2++) { if (tcpip_memory_init[devid2]) { _starpu_register_bus(tcpip_memory_nodes[devid], tcpip_memory_nodes[devid2]); _starpu_register_bus(tcpip_memory_nodes[devid2], tcpip_memory_nodes[devid]); } } } //This worker can manage transfers on NUMA nodes for (numa = 0; numa < starpu_memory_nodes_get_numa_count(); numa++) _starpu_worker_drives_memory_node(&workerarg->set->workers[0], numa); _starpu_worker_drives_memory_node(&workerarg->set->workers[0], memory_node); if (!_starpu_tcpip_common_multiple_thread) { /* TCP/IP driver thread can manage all slave memories if we disable the TCP/IP multiple thread */ int findworker; for (findworker = 0; findworker < workerarg->workerid; findworker++) { struct _starpu_worker *findworkerarg = &config->workers[findworker]; if (findworkerarg->arch == STARPU_TCPIP_MS_WORKER) { _starpu_worker_drives_memory_node(workerarg, findworkerarg->memory_node); _starpu_worker_drives_memory_node(findworkerarg, memory_node); } } } _starpu_memory_node_add_nworkers(memory_node); workerarg->memory_node = memory_node; } static void _starpu_deinit_tcpip_node(int devid) { _starpu_mp_common_send_command(_starpu_src_nodes[STARPU_TCPIP_MS_WORKER][devid], STARPU_MP_COMMAND_EXIT, NULL, 0); _starpu_mp_common_node_destroy(_starpu_src_nodes[STARPU_TCPIP_MS_WORKER][devid]); } void _starpu_deinit_tcpip_config(struct _starpu_machine_config *config) { struct _starpu_machine_topology *topology = &config->topology; unsigned i; for (i = 0; i < topology->ndevices[STARPU_TCPIP_MS_WORKER]; i++) _starpu_deinit_tcpip_node(i); } void _starpu_tcpip_source_init(struct _starpu_mp_node *node) { _starpu_tcpip_common_mp_initialize_src_sink(node); //TODO } void _starpu_tcpip_source_deinit(struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED) { } unsigned _starpu_tcpip_src_get_device_count() { int nmpims = starpu_getenv_number("STARPU_TCPIP_MS_SLAVES"); if (nmpims == -1) /* No slave */ nmpims = 0; return nmpims; } void *_starpu_tcpip_src_worker(void *arg) { struct _starpu_worker *worker0 = arg; struct _starpu_worker_set *set = worker0->set; struct _starpu_worker_set *worker_set_tcpip = set; int nbsinknodes = _starpu_tcpip_common_multiple_thread ? 1 : _starpu_tcpip_src_get_device_count(); int workersetnum; for (workersetnum = 0; workersetnum < nbsinknodes; workersetnum++) { struct _starpu_worker_set * worker_set = &worker_set_tcpip[workersetnum]; /* As all workers of a set share common data, we just use the first * one for initializing the following stuffs. */ struct _starpu_worker *baseworker = &worker_set->workers[0]; struct _starpu_machine_config *config = baseworker->config; unsigned baseworkerid = baseworker - config->workers; unsigned devid = baseworker->devid; unsigned i; /* unsigned memnode = baseworker->memory_node; */ _starpu_driver_start(baseworker, STARPU_CPU_WORKER, 0); #ifdef STARPU_USE_FXT for (i = 1; i < worker_set->nworkers; i++) _starpu_worker_start(&worker_set->workers[i], STARPU_TCPIP_MS_WORKER, 0); #endif // Current task for a thread managing a worker set has no sense. _starpu_set_current_task(NULL); for (i = 0; i < config->topology.nworker[STARPU_TCPIP_MS_WORKER][devid]; i++) { struct _starpu_worker *worker = &config->workers[baseworkerid+i]; snprintf(worker->name, sizeof(worker->name), "TCPIP_MS %u core %u", devid, i); snprintf(worker->short_name, sizeof(worker->short_name), "TCPIP_MS %u.%u", devid, i); } char thread_name[16]; if (_starpu_tcpip_common_multiple_thread) snprintf(thread_name, sizeof(thread_name), "TCPIP_MS %u", devid); else snprintf(thread_name, sizeof(thread_name), "TCPIP_MS"); starpu_pthread_setname(thread_name); for (i = 0; i < worker_set->nworkers; i++) { struct _starpu_worker *worker = &worker_set->workers[i]; _STARPU_TRACE_WORKER_INIT_END(worker->workerid); } _starpu_src_common_init_switch_env(workersetnum); } /* for */ _starpu_src_common_workers_set(worker_set_tcpip, nbsinknodes, &_starpu_src_nodes[STARPU_TCPIP_MS_WORKER][worker_set_tcpip->workers[0].devid]); return NULL; } static int _starpu_tcpip_is_direct_access_supported(unsigned node, unsigned handling_node) { (void) node; enum starpu_node_kind kind = starpu_node_get_kind(handling_node); return (kind == STARPU_TCPIP_MS_RAM); } static uintptr_t _starpu_tcpip_map(uintptr_t src, size_t src_offset, unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, size_t size, int *ret) { if(!_starpu_tcpip_mp_has_local()) { *ret=-EXDEV; return 0; } uintptr_t map_addr = _starpu_src_common_map(dst_node, src+src_offset, size); if(map_addr == 0) { *ret=-ENOMEM; } else { *ret = 0; } return map_addr; } static int _starpu_tcpip_unmap(uintptr_t src STARPU_ATTRIBUTE_UNUSED, size_t src_offset STARPU_ATTRIBUTE_UNUSED, unsigned src_node STARPU_ATTRIBUTE_UNUSED, uintptr_t dst, unsigned dst_node, size_t size) { _starpu_src_common_unmap(dst_node, dst, size); return 0; } static int _starpu_tcpip_update_map(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size) { (void) src; (void) src_offset; (void) src_node; (void) dst; (void) dst_offset; (void) dst_node; (void) size; /* Memory mappings are cache-coherent */ return 0; } struct _starpu_node_ops _starpu_driver_tcpip_ms_node_ops = { .name = "tcpip driver", .malloc_on_node = _starpu_src_common_allocate, .free_on_node = _starpu_src_common_free, .is_direct_access_supported = _starpu_tcpip_is_direct_access_supported, .copy_interface_to[STARPU_CPU_RAM] = _starpu_copy_interface_any_to_any, .copy_interface_to[STARPU_TCPIP_MS_RAM] = _starpu_copy_interface_any_to_any, .copy_interface_from[STARPU_CPU_RAM] = _starpu_copy_interface_any_to_any, .copy_interface_from[STARPU_TCPIP_MS_RAM] = _starpu_copy_interface_any_to_any, .copy_data_to[STARPU_CPU_RAM] = _starpu_src_common_copy_data_sink_to_host, .copy_data_to[STARPU_TCPIP_MS_RAM] = _starpu_src_common_copy_data_sink_to_sink, .copy_data_from[STARPU_CPU_RAM] = _starpu_src_common_copy_data_host_to_sink, .copy_data_from[STARPU_TCPIP_MS_RAM] = _starpu_src_common_copy_data_sink_to_sink, .wait_request_completion = _starpu_tcpip_common_wait_request_completion, .test_request_completion = _starpu_tcpip_common_test_event, .map[STARPU_CPU_RAM] = _starpu_tcpip_map, .unmap[STARPU_CPU_RAM] = _starpu_tcpip_unmap, .update_map[STARPU_CPU_RAM] = _starpu_tcpip_update_map, }; starpu-1.4.9+dfsg/src/drivers/tcpip/driver_tcpip_source.h000066400000000000000000000041001507764646700236120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __DRIVER_TCPIP_SOURCE_H__ #define __DRIVER_TCPIP_SOURCE_H__ /** @file */ #include #include #include #pragma GCC visibility push(hidden) void _starpu_tcpip_ms_preinit(void); #ifdef STARPU_USE_TCPIP_MASTER_SLAVE extern struct _starpu_node_ops _starpu_driver_tcpip_ms_node_ops; /** Array of structures containing all the information useful to send * and receive information with devices */ struct _starpu_mp_node *_starpu_tcpip_ms_src_get_actual_thread_mp_node(); unsigned _starpu_tcpip_src_get_device_count(); void _starpu_init_tcpip_config(struct _starpu_machine_topology * topology, struct _starpu_machine_config *config, struct starpu_conf *user_conf, int no_mp_config); void _starpu_tcpip_init_worker_binding(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); void _starpu_tcpip_init_worker_memory(struct _starpu_machine_config *config, int no_mp_config STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *workerarg); void _starpu_deinit_tcpip_config(struct _starpu_machine_config *config); void *_starpu_tcpip_src_worker(void *arg); void _starpu_tcpip_source_init(struct _starpu_mp_node *node); void _starpu_tcpip_source_deinit(struct _starpu_mp_node *node); #endif /* STARPU_USE_TCPIP_MASTER_SLAVE */ #pragma GCC visibility pop #endif /* __DRIVER_TCPIP_SOURCE_H__ */ starpu-1.4.9+dfsg/src/parallel_worker/000077500000000000000000000000001507764646700177645ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/parallel_worker/starpu_parallel_worker_create.c000066400000000000000000000657151507764646700262540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This file creates an interface to manage resources within parallel * workers and make use of parallel tasks. It entirely depends on the * hwloc software. */ #include #ifdef STARPU_PARALLEL_WORKER starpu_binding_function _starpu_parallel_worker_type_get_func(enum starpu_parallel_worker_types type) { starpu_binding_function prologue_func; switch (type) { case STARPU_PARALLEL_WORKER_OPENMP: prologue_func = &starpu_parallel_worker_openmp_prologue; break; case STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL: prologue_func = &starpu_parallel_worker_intel_openmp_mkl_prologue; break; case STARPU_PARALLEL_WORKER_GNU_OPENMP_MKL: #ifdef STARPU_MKL prologue_func = &starpu_parallel_worker_gnu_openmp_mkl_prologue; #else _STARPU_MSG("Warning: MKL support is not available, using STARPU_PARALLEL_WORKER_INTEL_OPENMP_MKL instead\n"); prologue_func = &starpu_parallel_worker_intel_openmp_mkl_prologue; #endif break; default: prologue_func = NULL; } return prologue_func; } void starpu_parallel_worker_openmp_prologue(void *arg) { (void) arg; int workerid = starpu_worker_get_id_check(); if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) { struct starpu_task *task = starpu_task_get_current(); int sched_ctx = task->sched_ctx; struct _starpu_sched_ctx *ctx_struct = _starpu_get_sched_ctx_struct(sched_ctx); /* If the view of the worker doesn't correspond to the view of the task, adapt the thread team */ if (ctx_struct->parallel_view != task->possibly_parallel) { int *cpuids = NULL; int ncpuids = 0; starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); if (!task->possibly_parallel) ncpuids=1; omp_set_num_threads(ncpuids); #pragma omp parallel { starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); } free(cpuids); ctx_struct->parallel_view = !ctx_struct->parallel_view; } } return; } #ifdef STARPU_MKL void starpu_parallel_worker_gnu_openmp_mkl_prologue(void *arg) { int workerid = starpu_worker_get_id(); if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) { struct starpu_task *task = starpu_task_get_current(); int sched_ctx = task->sched_ctx; struct _starpu_sched_ctx *ctx_struct = _starpu_get_sched_ctx_struct(sched_ctx); /* If the view of the worker doesn't correspond to the view of the task, adapt the thread team */ if (ctx_struct->parallel_view != task->possibly_parallel) { int *cpuids = NULL; int ncpuids = 0; starpu_sched_ctx_get_available_cpuids(sched_ctx, &cpuids, &ncpuids); if (!task->possibly_parallel) ncpuids=1; omp_set_num_threads(ncpuids); mkl_set_num_threads_local(ncpuids); mkl_set_dynamic(0); #pragma omp parallel { starpu_sched_ctx_bind_current_thread_to_cpuid(cpuids[omp_get_thread_num()]); } free(cpuids); ctx_struct->parallel_view = !ctx_struct->parallel_view; } } return; } #endif /* Main interface function to create a parallel worker view of the machine. * Its job is to capture what the user wants and store it in a standard view. */ struct starpu_parallel_worker_config *_starpu_parallel_worker_init_varg(hwloc_obj_type_t parallel_worker_level, va_list varg_list) { int arg_type; struct starpu_parallel_worker_config *machine; _STARPU_CALLOC(machine, 1, sizeof(struct starpu_parallel_worker_config)); _STARPU_CALLOC(machine->orig_params, 1, sizeof(struct _starpu_parallel_worker_parameters)); machine->params = machine->orig_params; machine->id = STARPU_NMAX_SCHED_CTXS; machine->groups = _starpu_parallel_worker_group_list_new(); machine->nparallel_workers = 0; machine->ngroups = 0; machine->topology = NULL; _starpu_parallel_worker_init_parameters(machine->params); while ((arg_type = va_arg(varg_list, int)) != 0) { if (arg_type == STARPU_PARALLEL_WORKER_MIN_NB) { machine->params->min_nb = va_arg(varg_list, int); if (machine->params->min_nb <= 0) _STARPU_DISP("Caution min number of contexts shouldn't be negative or null\n"); } else if (arg_type == STARPU_PARALLEL_WORKER_MAX_NB) { machine->params->max_nb = va_arg(varg_list, int); if (machine->params->max_nb <= 0) _STARPU_DISP("Caution max number of contexts shouldn't be negative or null\n"); } else if (arg_type == STARPU_PARALLEL_WORKER_NB) { machine->params->nb = va_arg(varg_list, int); if (machine->params->nb <= 0) _STARPU_DISP("Caution number of contexts shouldn't be negative or null\n"); } else if (arg_type == STARPU_PARALLEL_WORKER_POLICY_NAME) { machine->params->sched_policy_name = va_arg(varg_list, char*); } else if (arg_type == STARPU_PARALLEL_WORKER_POLICY_STRUCT) { machine->params->sched_policy_struct = va_arg(varg_list, struct starpu_sched_policy*); } else if (arg_type == STARPU_PARALLEL_WORKER_KEEP_HOMOGENEOUS) { machine->params->keep_homogeneous = va_arg(varg_list, int); /* 0=off, other=on */ } else if (arg_type == STARPU_PARALLEL_WORKER_PREFERE_MIN) { machine->params->prefere_min = va_arg(varg_list, int); /* 0=off, other=on */ } else if (arg_type == STARPU_PARALLEL_WORKER_CREATE_FUNC) { typedef void (*fn)(void*); machine->params->create_func = va_arg(varg_list, fn); } else if (arg_type == STARPU_PARALLEL_WORKER_CREATE_FUNC_ARG) { machine->params->create_func_arg = va_arg(varg_list, void*); } else if (arg_type == STARPU_PARALLEL_WORKER_TYPE) { machine->params->type = va_arg(varg_list, enum starpu_parallel_worker_types); } else if (arg_type == STARPU_PARALLEL_WORKER_AWAKE_WORKERS) { machine->params->awake_workers = va_arg(varg_list, unsigned); } else if (arg_type == STARPU_PARALLEL_WORKER_PARTITION_ONE) { struct _starpu_parallel_worker_group *group = _starpu_parallel_worker_group_new(); _starpu_parallel_worker_group_init(group, machine); _starpu_parallel_worker_group_list_push_back(machine->groups, group); machine->params = group->params; } else if (arg_type == STARPU_PARALLEL_WORKER_NEW) { struct _starpu_parallel_worker *parallel_worker = _starpu_parallel_worker_new(); struct _starpu_parallel_worker_group *group = _starpu_parallel_worker_group_list_back(machine->groups); if (group == NULL) { group = _starpu_parallel_worker_group_new(); _starpu_parallel_worker_group_init(group, machine); _starpu_parallel_worker_group_list_push_back(machine->groups, group); } _starpu_parallel_worker_init(parallel_worker, group); _starpu_parallel_worker_list_push_back(group->parallel_workers, parallel_worker); machine->params = parallel_worker->params; } else if (arg_type == STARPU_PARALLEL_WORKER_NCORES) { struct _starpu_parallel_worker_group *group = _starpu_parallel_worker_group_list_back(machine->groups); if (group == NULL) { group = _starpu_parallel_worker_group_new(); _starpu_parallel_worker_group_init(group, machine); _starpu_parallel_worker_group_list_push_back(machine->groups, group); } struct _starpu_parallel_worker *parallel_worker =_starpu_parallel_worker_list_back(group->parallel_workers); parallel_worker->ncores = va_arg(varg_list, unsigned); } else { STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type); } } va_end(varg_list); switch(parallel_worker_level) { case HWLOC_OBJ_MISC: case HWLOC_OBJ_BRIDGE: case HWLOC_OBJ_PCI_DEVICE: case HWLOC_OBJ_OS_DEVICE: STARPU_ABORT_MSG("Parallel_Worker aggregation isn't supported for level %s\n", hwloc_obj_type_string(parallel_worker_level)); break; default: /* others can pass */ break; } if (_starpu_parallel_worker_config(parallel_worker_level, machine) == -ENODEV) { starpu_parallel_worker_shutdown(machine); machine = NULL; } return machine; } struct starpu_parallel_worker_config *starpu_parallel_worker_init(hwloc_obj_type_t parallel_worker_level, ...) { struct starpu_parallel_worker_config *config; va_list varg_list; va_start(varg_list, parallel_worker_level); config = _starpu_parallel_worker_init_varg(parallel_worker_level, varg_list); va_end(varg_list); return config; } int starpu_parallel_worker_shutdown(struct starpu_parallel_worker_config *machine) { if (machine == NULL) return -1; struct _starpu_parallel_worker_group *g; struct _starpu_parallel_worker_group_list *group_list = machine->groups; if (machine->id != STARPU_NMAX_SCHED_CTXS) starpu_sched_ctx_delete(machine->id); g = _starpu_parallel_worker_group_list_begin(group_list); while (g != _starpu_parallel_worker_group_list_end(group_list)) { struct _starpu_parallel_worker_group *tmp = g; g = _starpu_parallel_worker_group_list_next(g); _starpu_parallel_worker_group_remove(group_list, tmp); } _starpu_parallel_worker_group_list_delete(group_list); if (machine->topology != NULL) hwloc_topology_destroy(machine->topology); free(machine->orig_params); free(machine); starpu_sched_ctx_set_context(0); return 0; } int starpu_parallel_worker_print(struct starpu_parallel_worker_config *parallel_workers) { if (parallel_workers == NULL) return -1; int cnt, w; struct _starpu_parallel_worker_group *group; struct _starpu_parallel_worker *parallel_worker; printf("Number of parallel workers created: %u\n", parallel_workers->nparallel_workers); cnt=0; if (parallel_workers->nparallel_workers) { for (group = _starpu_parallel_worker_group_list_begin(parallel_workers->groups); group != _starpu_parallel_worker_group_list_end(parallel_workers->groups); group = _starpu_parallel_worker_group_list_next(group)) { for (parallel_worker = _starpu_parallel_worker_list_begin(group->parallel_workers); parallel_worker != _starpu_parallel_worker_list_end(group->parallel_workers); parallel_worker = _starpu_parallel_worker_list_next(parallel_worker)) { printf("Parallel worker %d contains the following logical indexes:\n\t", cnt); for (w=0; w < parallel_worker->ncores; w++) printf("%d ", parallel_worker->cores[w]); printf("\n"); cnt++; } } } return 0; } int _starpu_parallel_worker_create(struct _starpu_parallel_worker *parallel_worker) { struct _starpu_machine_config *config = _starpu_get_machine_config(); if (config->topology.nsched_ctxs == STARPU_NMAX_SCHED_CTXS) /* Too many contexts already :/ */ return 0; if (parallel_worker->params->awake_workers) parallel_worker->id = starpu_sched_ctx_create(parallel_worker->workerids, parallel_worker->ncores, "parallel_workers", STARPU_SCHED_CTX_AWAKE_WORKERS, 0); else parallel_worker->id = starpu_sched_ctx_create(parallel_worker->workerids, parallel_worker->ncores, "parallel_workers", 0); /* parallel_worker priority can be the lowest, so let's enforce it */ starpu_sched_ctx_set_priority(parallel_worker->workerids, parallel_worker->ncores, parallel_worker->id, 0); return 1; } int _starpu_parallel_worker_group_create(struct _starpu_parallel_worker_group *group) { struct _starpu_parallel_worker *c; for (c = _starpu_parallel_worker_list_begin(group->parallel_workers) ; c != _starpu_parallel_worker_list_end(group->parallel_workers) ; c = _starpu_parallel_worker_list_next(c)) { if (c->ncores == 0) continue; if (_starpu_parallel_worker_create(c) == 0) return 0; if (!c->params->awake_workers) _starpu_parallel_worker_bind(c); } return 1; } void _starpu_parallel_workers_set_nesting(struct starpu_parallel_worker_config *m) { struct _starpu_parallel_worker_group *g; struct _starpu_parallel_worker *c; for (g = _starpu_parallel_worker_group_list_begin(m->groups) ; g != _starpu_parallel_worker_group_list_end(m->groups) ; g = _starpu_parallel_worker_group_list_next(g)) { for (c = _starpu_parallel_worker_list_begin(g->parallel_workers) ; c != _starpu_parallel_worker_list_end(g->parallel_workers) ; c = _starpu_parallel_worker_list_next(c)) _starpu_get_sched_ctx_struct(c->id)->nesting_sched_ctx = m->id; } } int _starpu_parallel_worker_bind(struct _starpu_parallel_worker *parallel_worker) { starpu_binding_function func; void *func_arg; if (parallel_worker->params->create_func) { func = parallel_worker->params->create_func; func_arg = (void*) parallel_worker->params->create_func_arg; } else { func = _starpu_parallel_worker_type_get_func(parallel_worker->params->type); func_arg = NULL; } return starpu_task_insert(&_starpu_parallel_worker_bind_cl, STARPU_SCHED_CTX, parallel_worker->id, STARPU_POSSIBLY_PARALLEL, 1, STARPU_PROLOGUE_CALLBACK_POP, func, STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE, func_arg, 0); } void _starpu_parallel_worker_group_init(struct _starpu_parallel_worker_group *group, struct starpu_parallel_worker_config *father) { group->id = 0; group->nparallel_workers = 0; group->parallel_workers = _starpu_parallel_worker_list_new(); group->father = father; _STARPU_MALLOC(group->params, sizeof(struct _starpu_parallel_worker_parameters)); _starpu_parallel_worker_copy_parameters(father->params, group->params); return; } void _starpu_parallel_worker_init(struct _starpu_parallel_worker *parallel_worker, struct _starpu_parallel_worker_group *father) { parallel_worker->id = STARPU_NMAX_SCHED_CTXS; parallel_worker->cpuset = hwloc_bitmap_alloc(); parallel_worker->ncores = 0; parallel_worker->cores = NULL; parallel_worker->workerids = NULL; parallel_worker->father = father; _STARPU_MALLOC(parallel_worker->params, sizeof(struct _starpu_parallel_worker_parameters)); _starpu_parallel_worker_copy_parameters(father->params, parallel_worker->params); } int _starpu_parallel_worker_remove(struct _starpu_parallel_worker_list *parallel_worker_list, struct _starpu_parallel_worker *parallel_worker) { if (parallel_worker && parallel_worker->id != STARPU_NMAX_SCHED_CTXS) starpu_sched_ctx_delete(parallel_worker->id); else return -1; if (parallel_worker->cores != NULL) free(parallel_worker->cores); if (parallel_worker->workerids != NULL) free(parallel_worker->workerids); hwloc_bitmap_free(parallel_worker->cpuset); free(parallel_worker->params); _starpu_parallel_worker_list_erase(parallel_worker_list, parallel_worker); _starpu_parallel_worker_delete(parallel_worker); return 0; } int _starpu_parallel_worker_group_remove(struct _starpu_parallel_worker_group_list *group_list, struct _starpu_parallel_worker_group *group) { struct _starpu_parallel_worker_list *parallel_worker_list = group->parallel_workers; struct _starpu_parallel_worker *c = _starpu_parallel_worker_list_begin(parallel_worker_list); while (c != _starpu_parallel_worker_list_end(parallel_worker_list)) { struct _starpu_parallel_worker *tmp = c; c = _starpu_parallel_worker_list_next(c); _starpu_parallel_worker_remove(parallel_worker_list, tmp); } _starpu_parallel_worker_list_delete(parallel_worker_list); free(group->params); _starpu_parallel_worker_group_list_erase(group_list, group); _starpu_parallel_worker_group_delete(group); return 0; } void _starpu_parallel_worker_init_parameters(struct _starpu_parallel_worker_parameters *params) { params->min_nb = 0; params->max_nb = 0; params->nb = 0; params->sched_policy_name = NULL; params->sched_policy_struct = NULL; params->keep_homogeneous = 0; params->prefere_min = 0; params->create_func = NULL; params->create_func_arg = NULL; params->type = STARPU_PARALLEL_WORKER_OPENMP; params->awake_workers = 0; return; } void _starpu_parallel_worker_copy_parameters(struct _starpu_parallel_worker_parameters *src, struct _starpu_parallel_worker_parameters *dst) { dst->min_nb = src->min_nb; dst->max_nb = src->max_nb; dst->nb = src->nb; dst->sched_policy_name = src->sched_policy_name; dst->sched_policy_struct = src->sched_policy_struct; dst->keep_homogeneous = src->keep_homogeneous; dst->prefere_min = src->prefere_min; dst->create_func = src->create_func; dst->create_func_arg = src->create_func_arg; dst->type = src->type; dst->awake_workers = src->awake_workers; return; } /* Considering the resources and parameters, how many parallel_workers should we take? */ int _starpu_parallel_worker_analyze_parameters(struct _starpu_parallel_worker_parameters *params, int npus) { int nb_parallel_workers = 1, j; if (params->nb) { nb_parallel_workers = params->nb <= npus?params->nb : npus; } else if (params->min_nb && params->max_nb) { if (!params->keep_homogeneous) { if (params->prefere_min) nb_parallel_workers = params->min_nb <= npus? params->min_nb : npus; else nb_parallel_workers = params->max_nb <= npus? params->max_nb : npus; } else { int begin = params->prefere_min? params->min_nb:params->max_nb; int end = params->prefere_min? params->max_nb+1:params->min_nb-1; j=begin; int best = 0, second_best = 0, cpu_loss = INT_MAX; while (j != end) { if (npus%j == 0) { best = j; break; } if (npus%j < cpu_loss) { cpu_loss = npus%j; second_best = j; } j = params->prefere_min? j+1:j-1; } if (best) nb_parallel_workers = best; else if (second_best) nb_parallel_workers = second_best; } } return nb_parallel_workers; } int _starpu_parallel_worker_config(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine) { struct _starpu_parallel_worker_group *g; int ret; ret = _starpu_parallel_worker_topology(parallel_worker_level, machine); if (ret) return ret; for (g = _starpu_parallel_worker_group_list_begin(machine->groups) ; g != _starpu_parallel_worker_group_list_end(machine->groups) ; g = _starpu_parallel_worker_group_list_next(g)) if (_starpu_parallel_worker_group_create(g) == 0) return -ENODEV; starpu_task_wait_for_all(); struct _starpu_machine_config *config = _starpu_get_machine_config(); if (config->topology.nsched_ctxs == STARPU_NMAX_SCHED_CTXS) /* Too many contexts already :/ */ return -ENODEV; /* Create containing context */ if (machine->params->sched_policy_struct != NULL) { machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx", STARPU_SCHED_CTX_POLICY_STRUCT, machine->params->sched_policy_struct, 0); } else if (machine->params->sched_policy_name != NULL) { machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx", STARPU_SCHED_CTX_POLICY_NAME, machine->params->sched_policy_name, 0); } else { struct starpu_sched_policy *sched_policy; struct _starpu_sched_ctx *global_ctx =_starpu_get_sched_ctx_struct(STARPU_GLOBAL_SCHED_CTX); sched_policy = _starpu_get_sched_policy(global_ctx); machine->id = starpu_sched_ctx_create(NULL, -1, "main sched ctx", STARPU_SCHED_CTX_POLICY_STRUCT, sched_policy, 0); } _starpu_parallel_workers_set_nesting(machine); starpu_sched_ctx_set_context(&machine->id); return 0; } int _starpu_parallel_worker_topology(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine) { int w; hwloc_topology_t topology; hwloc_cpuset_t avail_cpus; int nworkers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); if (nworkers == 0) return -ENODEV; int *workers; _STARPU_MALLOC(workers, sizeof(int) * nworkers); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workers, nworkers); struct _starpu_machine_config *config = _starpu_get_machine_config(); STARPU_ASSERT_MSG(config->topology.hwtopology != NULL, "STARPU_PARALLEL_WORKER: You " "need to call starpu_init() or make sure to activate hwloc."); hwloc_topology_dup(&topology, config->topology.hwtopology); avail_cpus = hwloc_bitmap_alloc(); hwloc_bitmap_zero(avail_cpus); for (w = 0; w < nworkers ; w++) { struct _starpu_worker *worker_str = _starpu_get_worker_struct(workers[w]); hwloc_bitmap_or(avail_cpus, avail_cpus, worker_str->hwloc_cpu_set); } hwloc_topology_restrict(topology, avail_cpus, 0); hwloc_bitmap_free(avail_cpus); free(workers); if (hwloc_get_nbobjs_by_type(topology, parallel_worker_level) <= 0) return -ENODEV; /* Use new topology to fill in the parallel_worker list */ machine->topology = topology; _starpu_parallel_worker_group(parallel_worker_level, machine); return 0; } void _starpu_parallel_worker_group(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine) { int nb_objects; int i; struct _starpu_parallel_worker_group *group = NULL; if (machine->groups == NULL) machine->groups = _starpu_parallel_worker_group_list_new(); nb_objects = hwloc_get_nbobjs_by_type(machine->topology, parallel_worker_level); STARPU_ASSERT(nb_objects > 0); group = _starpu_parallel_worker_group_list_begin(machine->groups); for (i = 0 ; i < nb_objects ; i++) { hwloc_obj_t parallel_worker_obj = hwloc_get_obj_by_type(machine->topology, parallel_worker_level, i); if (group == NULL) { group = _starpu_parallel_worker_group_new(); _starpu_parallel_worker_group_init(group, machine); _starpu_parallel_worker_group_list_push_back(machine->groups, group); } group->group_obj = parallel_worker_obj; _starpu_parallel_worker(group); machine->ngroups++; machine->nparallel_workers += group->nparallel_workers; group = _starpu_parallel_worker_group_list_next(group); } return; } void _starpu_parallel_worker(struct _starpu_parallel_worker_group *group) { int i, avail_pus, npus, npreset=0; struct _starpu_parallel_worker *parallel_worker; npus = hwloc_get_nbobjs_inside_cpuset_by_type(group->father->topology, group->group_obj->cpuset, HWLOC_OBJ_PU); /* Preset parallel_workers */ avail_pus = npus; for (parallel_worker=_starpu_parallel_worker_list_begin(group->parallel_workers); parallel_worker!=_starpu_parallel_worker_list_end(group->parallel_workers); parallel_worker=_starpu_parallel_worker_list_next(parallel_worker)) { if (parallel_worker->ncores > avail_pus) parallel_worker->ncores = avail_pus; else if (avail_pus == 0) parallel_worker->ncores = 0; if (parallel_worker->ncores > 0) { _STARPU_MALLOC(parallel_worker->cores, sizeof(int)*parallel_worker->ncores); _STARPU_MALLOC(parallel_worker->workerids, sizeof(int)*parallel_worker->ncores); avail_pus -= parallel_worker->ncores; npreset++; } } /* Automatic parallel_workers */ group->nparallel_workers = _starpu_parallel_worker_analyze_parameters(group->params, avail_pus); for (i=0 ; inparallel_workers && avail_pus>0 ; i++) { if (parallel_worker == NULL) { parallel_worker = _starpu_parallel_worker_new(); _starpu_parallel_worker_init(parallel_worker, group); _starpu_parallel_worker_list_push_back(group->parallel_workers, parallel_worker); } if (parallel_worker->ncores != 0 && parallel_worker->ncores > avail_pus) { parallel_worker->ncores = avail_pus; } else { if (parallel_worker->params->keep_homogeneous) parallel_worker->ncores = avail_pus/(group->nparallel_workers-i); else parallel_worker->ncores = i==group->nparallel_workers-1? avail_pus: avail_pus/(group->nparallel_workers-i); } avail_pus -= parallel_worker->ncores; _STARPU_MALLOC(parallel_worker->cores, sizeof(int)*parallel_worker->ncores); _STARPU_MALLOC(parallel_worker->workerids, sizeof(int)*parallel_worker->ncores); parallel_worker = _starpu_parallel_worker_list_next(parallel_worker); } group->nparallel_workers += npreset; parallel_worker = _starpu_parallel_worker_list_begin(group->parallel_workers); int count = 0; static int starpu_parallel_worker_warned = 0; for (i=0 ; ifather->topology, group->group_obj->cpuset, HWLOC_OBJ_PU, i); /* If we have more than one worker on this resource, let's add them too -- even if it's bad (they'll all be boud on the same PU) */ int size = 0, j; struct _starpu_hwloc_userdata *data = pu->userdata; struct _starpu_worker_list *list = data->worker_list; struct _starpu_worker *worker_str; for (worker_str = _starpu_worker_list_begin(list); worker_str != _starpu_worker_list_end(list); worker_str = _starpu_worker_list_next(worker_str)) { if (worker_str->arch == STARPU_CPU_WORKER) size++; } if (size > 1) { STARPU_HG_DISABLE_CHECKING(starpu_parallel_worker_warned); if (!starpu_parallel_worker_warned) { _STARPU_DISP("STARPU PARALLEL_WORKERS: Caution! It seems that you have" " multiple workers bound to the same PU. If you have" " multithreading on your cores it is greatly advised" " to export STARPU_NTHREADS_PER_CORE=nb.\n"); starpu_parallel_worker_warned = 1; } parallel_worker->ncores += size-1; _STARPU_REALLOC(parallel_worker->cores, sizeof(int)*parallel_worker->ncores); _STARPU_REALLOC(parallel_worker->workerids, sizeof(int)*parallel_worker->ncores); } /* grab workerid list and return first cpu */ worker_str = _starpu_worker_list_begin(list); if (worker_str) hwloc_bitmap_or(parallel_worker->cpuset, parallel_worker->cpuset, worker_str->hwloc_cpu_set); j = 0; while (worker_str != _starpu_worker_list_end(list)) { if (worker_str->arch == STARPU_CPU_WORKER) { parallel_worker->cores[count+j] = worker_str->bindid; parallel_worker->workerids[count+j] = worker_str->workerid; j++; } worker_str = _starpu_worker_list_next(worker_str); } count+=size; if (parallel_worker->ncores == count) { count = 0; parallel_worker = _starpu_parallel_worker_list_next(parallel_worker); } } return; } struct starpu_cluster_machine STARPU_DEPRECATED { unsigned id; hwloc_topology_t topology; unsigned nparallel_workers; unsigned ngroups; struct _starpu_parallel_worker_group_list *groups; struct _starpu_parallel_worker_parameters *params; }; struct starpu_cluster_machine *starpu_cluster_machine(hwloc_obj_type_t cluster_level, ...) { struct starpu_parallel_worker_config *config; va_list varg_list; va_start(varg_list, cluster_level); config = _starpu_parallel_worker_init_varg(cluster_level, varg_list); va_end(varg_list); return (struct starpu_cluster_machine *)config; } int starpu_uncluster_machine(struct starpu_cluster_machine *clusters) { struct starpu_parallel_worker_config *c = (struct starpu_parallel_worker_config *)clusters; return starpu_parallel_worker_shutdown(c); } int starpu_cluster_print(struct starpu_cluster_machine *clusters) { struct starpu_parallel_worker_config *c = (struct starpu_parallel_worker_config *)clusters; return starpu_parallel_worker_print(c); } #endif starpu-1.4.9+dfsg/src/parallel_worker/starpu_parallel_worker_create.h000066400000000000000000000105431507764646700262460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_PARALLEL_WORKERS_CREATE_H__ #define __STARPU_PARALLEL_WORKERS_CREATE_H__ /** @file */ #include #include #include #include #include #ifdef STARPU_MKL #include #endif #ifdef STARPU_PARALLEL_WORKER #ifdef __cplusplus extern #endif #pragma GCC visibility push(hidden) struct starpu_parallel_worker_config { unsigned id; hwloc_topology_t topology; unsigned nparallel_workers; unsigned ngroups; struct _starpu_parallel_worker_group_list *groups; struct _starpu_parallel_worker_parameters *orig_params; struct _starpu_parallel_worker_parameters *params; }; struct _starpu_parallel_worker_parameters { int min_nb; int max_nb; int nb; char *sched_policy_name; struct starpu_sched_policy *sched_policy_struct; unsigned keep_homogeneous; unsigned prefere_min; void (*create_func)(void*); void *create_func_arg; int type; unsigned awake_workers; }; LIST_TYPE(_starpu_parallel_worker_group, unsigned id; hwloc_obj_t group_obj; int nparallel_workers; struct _starpu_parallel_worker_list *parallel_workers; struct starpu_parallel_worker_config *father; struct _starpu_parallel_worker_parameters *params; ) LIST_TYPE(_starpu_parallel_worker, unsigned id; hwloc_cpuset_t cpuset; int ncores; int *cores; int *workerids; struct _starpu_parallel_worker_group *father; struct _starpu_parallel_worker_parameters *params; ) /** Machine discovery and parallel_worker creation main functions */ int _starpu_parallel_worker_config(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine); int _starpu_parallel_worker_topology(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine); void _starpu_parallel_worker_group(hwloc_obj_type_t parallel_worker_level, struct starpu_parallel_worker_config *machine); void _starpu_parallel_worker(struct _starpu_parallel_worker_group *group); /** Parameter functions */ void _starpu_parallel_worker_init_parameters(struct _starpu_parallel_worker_parameters *globals); void _starpu_parallel_worker_copy_parameters(struct _starpu_parallel_worker_parameters *src, struct _starpu_parallel_worker_parameters *dst); int _starpu_parallel_worker_analyze_parameters(struct _starpu_parallel_worker_parameters *params, int npus); /** Parallel_Worker helper functions */ void _starpu_parallel_worker_init(struct _starpu_parallel_worker *parallel_worker, struct _starpu_parallel_worker_group *father); int _starpu_parallel_worker_create(struct _starpu_parallel_worker *parallel_worker); int _starpu_parallel_worker_bind(struct _starpu_parallel_worker *parallel_worker); int _starpu_parallel_worker_remove(struct _starpu_parallel_worker_list *parallel_worker_list, struct _starpu_parallel_worker *parallel_worker); /** Parallel_Worker group helper function */ void _starpu_parallel_worker_group_init(struct _starpu_parallel_worker_group *group, struct starpu_parallel_worker_config *father); int _starpu_parallel_worker_group_create(struct _starpu_parallel_worker_group *group); int _starpu_parallel_worker_group_remove(struct _starpu_parallel_worker_group_list *group_list, struct _starpu_parallel_worker_group *group); /** Binding helpers */ void _starpu_parallel_worker_noop(void *buffers[], void *cl_arg) { (void) buffers; (void) cl_arg; } static struct starpu_codelet _starpu_parallel_worker_bind_cl= { .cpu_funcs = {_starpu_parallel_worker_noop}, .nbuffers = 0, .name = "parallel_worker_internal_runtime_init" }; typedef void (*starpu_binding_function)(void*); starpu_binding_function _starpu_parallel_worker_type_get_func(enum starpu_parallel_worker_types type); #pragma GCC visibility pop #endif #endif /* __STARPU_PARALLEL_WORKERS_CREATE_H__ */ starpu-1.4.9+dfsg/src/profiling/000077500000000000000000000000001507764646700165705ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/profiling/bound.c000066400000000000000000001013021507764646700200400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * Record which kinds of tasks have been executed, to later on compute an upper * bound of the performance that could have theoretically been achieved */ #include #include #include #include #include #include #ifdef STARPU_HAVE_GLPK_H #include #endif /* STARPU_HAVE_GLPK_H */ /* TODO: output duration between starpu_bound_start and starpu_bound_stop */ /* TODO: compute critical path and introduce it in the LP */ /* * Record without dependencies: just count each kind of task * * The linear programming problem will just have as variables: * - the number of tasks of kind `t' executed by worker `w' * - the total duration * * and the constraints will be: * - the time taken by each worker to complete its assigned tasks is lower than * the total duration. * - the total number of tasks of a given kind is equal to the number run by the * application. */ struct bound_task_pool { /* Which codelet has been executed */ struct starpu_codelet *cl; /* Task footprint key (for history-based perfmodel) */ uint32_t footprint; /* Number of tasks of this kind */ unsigned long n; /* Other task kinds */ struct bound_task_pool *next; }; /* * Record with dependencies: each task is recorded separately * * The linear programming problem will have as variables: * - The start time of each task * - The completion time of each tag * - The total duration * - For each task and for each worker, whether the task is executing on that worker. * - For each pair of task, which task is scheduled first. * * and the constraints will be: * - All task start time plus duration are less than total duration * - Each task is executed on exactly one worker. * - Each task starts after all its task dependencies finish. * - Each task starts after all its tag dependencies finish. * - For each task pair and each worker, if both tasks are executed by that worker, * one is started after the other's completion. */ struct task_dep { /* Task this depends on */ struct bound_task *dep; /* Data transferred between tasks (i.e. implicit data dep size) */ size_t size; }; struct bound_task { /* Unique ID */ unsigned long id; /* Tag ID, if any */ starpu_tag_t tag_id; int use_tag; /* Which codelet has been executed */ struct starpu_codelet *cl; /* Task footprint key */ uint32_t footprint; /* Task priority */ int priority; /* Tasks this one depends on */ struct task_dep *deps; int depsn; /* Estimated duration */ double** duration[STARPU_NARCH]; /* Other tasks */ struct bound_task *next; }; struct bound_tag_dep { starpu_tag_t tag; starpu_tag_t dep_tag; struct bound_tag_dep *next; }; static struct bound_task_pool *task_pools, *last; static struct bound_task *tasks; static struct bound_tag_dep *tag_deps; int _starpu_bound_recording; static int recorddeps; static int recordprio; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static void _starpu_bound_clear(int record, int deps, int prio) { struct bound_task_pool *tp; struct bound_task *t; struct bound_tag_dep *td; STARPU_PTHREAD_MUTEX_LOCK(&mutex); tp = task_pools; task_pools = NULL; last = NULL; t = tasks; tasks = NULL; td = tag_deps; tag_deps = NULL; _starpu_bound_recording = record; recorddeps = deps; recordprio = prio; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); while (tp != NULL) { struct bound_task_pool *next = tp->next; free(tp); tp = next; } while (t != NULL) { struct bound_task *next = t->next; unsigned i,j; for (i = 0; i < STARPU_NARCH; i++) { if (t->duration[i]) { for (j = 0; t->duration[i][j]; j++) free(t->duration[i][j]); free(t->duration[i]); } } free(t->deps); free(t); t = next; } while (td != NULL) { struct bound_tag_dep *next = td->next; free(td); td = next; } } void starpu_bound_clear(void) { _starpu_bound_clear(0, 0, 0); } /* Initialization */ void starpu_bound_start(int deps, int prio) { _starpu_bound_clear(1, deps, prio); } /* Whether we will include it in the computation */ static int good_job(struct _starpu_job *j) { /* No codelet, nothing to measure */ if (j->exclude_from_dag) return 0; if (!j->task->cl) return 0; /* No performance model, no time duration estimation */ if (!j->task->cl->model) return 0; /* Only support history based */ if (j->task->cl->model->type != STARPU_HISTORY_BASED && j->task->cl->model->type != STARPU_NL_REGRESSION_BASED) return 0; return 1; } static double** initialize_arch_duration(int maxdevid, unsigned* maxncore_table) { int devid, maxncore; double ** arch_model; _STARPU_MALLOC(arch_model, sizeof(*arch_model)*(maxdevid+1)); arch_model[maxdevid] = NULL; for(devid=0; devidduration[type] = initialize_arch_duration(conf->topology.nhwdevices[type], conf->topology.nworker[type]); } static struct starpu_perfmodel_device device = { .type = STARPU_CPU_WORKER, .devid = 0, .ncores = 1, }; static struct starpu_perfmodel_arch dumb_arch = { .ndevices = 1, .devices = &device, }; /* Create a new task (either because it has just been submitted, or a * dependency was added before submission) */ static void new_task(struct _starpu_job *j) { struct bound_task *t; if (j->bound_task) return; _STARPU_CALLOC(t, 1, sizeof(*t)); t->id = j->job_id; t->tag_id = j->task->tag_id; t->use_tag = j->task->use_tag; t->cl = j->task->cl; t->footprint = _starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, &dumb_arch, 0, j); t->priority = j->task->priority; t->deps = NULL; t->depsn = 0; initialize_duration(t); t->next = tasks; j->bound_task = t; tasks = t; } /* A new task was submitted, record it */ void _starpu_bound_record(struct _starpu_job *j) { if (STARPU_LIKELY(!_starpu_bound_recording)) return; if (!good_job(j)) return; STARPU_PTHREAD_MUTEX_LOCK(&mutex); /* Re-check, this time with mutex held */ if (!_starpu_bound_recording) { STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); return; } if (recorddeps) { new_task(j); } else { struct bound_task_pool *tp; _starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, NULL, 0, j); if (last && last->cl == j->task->cl && last->footprint == j->footprint) tp = last; else for (tp = task_pools; tp; tp = tp->next) if (tp->cl == j->task->cl && tp->footprint == j->footprint) break; if (!tp) { _STARPU_MALLOC(tp, sizeof(*tp)); tp->cl = j->task->cl; tp->footprint = j->footprint; tp->n = 0; tp->next = task_pools; task_pools = tp; } /* One more task of this kind */ tp->n++; } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } /* A tag dependency was emitted, record it */ void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id) { struct bound_tag_dep *td; if (!_starpu_bound_recording || !recorddeps) return; STARPU_PTHREAD_MUTEX_LOCK(&mutex); /* Re-check, this time with mutex held */ if (!_starpu_bound_recording || !recorddeps) { STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); return; } _STARPU_MALLOC(td, sizeof(*td)); td->tag = id; td->dep_tag = dep_id; td->next = tag_deps; tag_deps = td; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } /* A task dependency was emitted, record it */ void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j) { struct bound_task *t; int i; if (!_starpu_bound_recording || !recorddeps) return; if (!good_job(j) || !good_job(dep_j)) return; STARPU_PTHREAD_MUTEX_LOCK(&mutex); /* Re-check, this time with mutex held */ if (!_starpu_bound_recording || !recorddeps) { STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); return; } new_task(j); new_task(dep_j); t = j->bound_task; for (i = 0; i < t->depsn; i++) if (t->deps[i].dep == dep_j->bound_task) break; if (i == t->depsn) { /* Not already there, add */ _STARPU_REALLOC(t->deps, ++t->depsn * sizeof(t->deps[0])); t->deps[t->depsn-1].dep = dep_j->bound_task; t->deps[t->depsn-1].size = 0; /* We don't have data information in that case */ } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } /* Look for job with id ID among our tasks */ static struct bound_task *find_job(unsigned long id) { struct bound_task *t; for (t = tasks; t; t = t->next) if (t->id == id) return t; return NULL; } /* Job J depends on previous job of id ID (which is already finished) */ void _starpu_bound_job_id_dep_size(size_t size, struct _starpu_job *j, unsigned long id) { struct bound_task *t, *dep_t; int i; if (!_starpu_bound_recording || !recorddeps) return; if (!good_job(j)) return; STARPU_PTHREAD_MUTEX_LOCK(&mutex); /* Re-check, this time with mutex held */ if (!_starpu_bound_recording || !recorddeps) { STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); return; } new_task(j); dep_t = find_job(id); if (!dep_t) { _STARPU_MSG("dependency %lu not found !\n", id); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); return; } t = j->bound_task; for (i = 0; i < t->depsn; i++) if (t->deps[i].dep == dep_t) { /* Found, just add size */ t->deps[i].size += size; break; } if (i == t->depsn) { /* Not already there, add */ _STARPU_REALLOC(t->deps, ++t->depsn * sizeof(t->deps[0])); t->deps[t->depsn-1].dep = dep_t; t->deps[t->depsn-1].size = size; } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } void _starpu_bound_job_id_dep(starpu_data_handle_t handle, struct _starpu_job *j, unsigned long id) { if (!_starpu_bound_recording || !recorddeps) return; if (!good_job(j)) return; _starpu_bound_job_id_dep_size(_starpu_data_get_size(handle), j, id); } void starpu_bound_stop(void) { STARPU_PTHREAD_MUTEX_LOCK(&mutex); _starpu_bound_recording = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } /* Compute all tasks times on all workers */ static void _starpu_get_tasks_times(int nw, int nt, double *times) { struct bound_task_pool *tp; int w, t; for (w = 0; w < nw; w++) { for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { struct _starpu_job j = { .footprint = tp->footprint, .footprint_is_computed = 1, }; struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); double length = _starpu_history_based_job_expected_perf(tp->cl->model, arch, &j, j.nimpl) - _starpu_history_based_job_expected_deviation(tp->cl->model, arch, &j, j.nimpl); if (isnan(length)) times[w*nt+t] = NAN; else times[w*nt+t] = length / 1000.; } } } /* Return whether PARENT is an ancestor of CHILD */ static int ancestor(struct bound_task *child, struct bound_task *parent) { int i; for (i = 0; i < child->depsn; i++) { if (parent == child->deps[i].dep) return 1; if (ancestor(child->deps[i].dep, parent)) return -1; } return 0; } /* Print bound recording in .dot format */ void starpu_bound_print_dot(FILE *output) { struct bound_task *t; struct bound_tag_dep *td; int i; if (!recorddeps) { fprintf(output, "Dependencies were not enabled in the starpu_bound_start call, thus not supported\n"); return; } fprintf(output, "strict digraph bounddeps {\n"); for (t = tasks; t; t = t->next) { fprintf(output, "\"t%lu\" [label=\"%lu: %s\"]\n", t->id, t->id, _starpu_codelet_get_model_name(t->cl)); for (i = 0; i < t->depsn; i++) fprintf(output, "\"t%lu\" -> \"t%lu\"\n", t->deps[i].dep->id, t->id); } for (td = tag_deps; td; td = td->next) fprintf(output, "\"tag%lu\" -> \"tag%lu\";\n", (unsigned long) td->dep_tag, (unsigned long) td->tag); fprintf(output, "}\n"); } /* * Print bound system in lp_solve format * * When dependencies are enabled, you can check the set of tasks and deps that * were recorded by using tools/lp2paje and vite. */ void starpu_bound_print_lp(FILE *output) { int nt; /* Number of different kinds of tasks */ int nw; /* Number of different workers */ int t; int w, w2; /* worker */ unsigned n, n2; STARPU_PTHREAD_MUTEX_LOCK(&mutex); nw = starpu_worker_get_count(); if (!nw) /* Make llvm happy about the VLA below */ return; if (recorddeps) { struct bound_task *t1, *t2; struct bound_tag_dep *td; int i; nt = 0; for (t1 = tasks; t1; t1 = t1->next) { if (t1->cl->model->type != STARPU_HISTORY_BASED && t1->cl->model->type != STARPU_NL_REGRESSION_BASED) /* TODO: */ _STARPU_MSG("Warning: task %s uses a perf model which is neither history nor non-linear regression-based, support for such model is not implemented yet, system will not be solvable.\n", _starpu_codelet_get_model_name(t1->cl)); struct _starpu_job j = { .footprint = t1->footprint, .footprint_is_computed = 1, }; for (w = 0; w < nw; w++) { struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); if (_STARPU_IS_ZERO(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores])) { double length = _starpu_history_based_job_expected_perf(t1->cl->model, arch, &j,j.nimpl) - _starpu_history_based_job_expected_deviation(t1->cl->model, arch, &j,j.nimpl); if (isnan(length)) /* Avoid problems with binary coding of doubles */ t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores] = NAN; else t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores] = length / 1000.; } } nt++; } if (!nt) return; fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n"); fprintf(output, "/* !! This is a big system, it will be long to solve !! */\n\n"); fprintf(output, "/* We want to minimize total execution time (ms) */\n"); fprintf(output, "min: tmax;\n\n"); fprintf(output, "/* Number of tasks */\n"); fprintf(output, "nt = %d;\n", nt); fprintf(output, "/* Number of workers */\n"); fprintf(output, "nw = %d;\n", nw); fprintf(output, "/* The total execution time is the maximum of all task completion times (ms) */\n"); for (t1 = tasks; t1; t1 = t1->next) fprintf(output, "c%lu <= tmax;\n", t1->id); fprintf(output, "\n/* We have tasks executing on workers, exactly one worker executes each task */\n"); for (t1 = tasks; t1; t1 = t1->next) { for (w = 0; w < nw; w++) { struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores])) fprintf(output, " +t%luw%d", t1->id, w); } fprintf(output, " = 1;\n"); } fprintf(output, "\n/* Completion time is start time plus computation time */\n"); fprintf(output, "/* According to where the task is indeed executed */\n"); for (t1 = tasks; t1; t1 = t1->next) { fprintf(output, "/* %s %x */\tc%lu = s%lu", _starpu_codelet_get_model_name(t1->cl), (unsigned) t1->footprint, t1->id, t1->id); for (w = 0; w < nw; w++) { struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores])) fprintf(output, " + %f t%luw%d", t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores], t1->id, w); } fprintf(output, ";\n"); } fprintf(output, "\n/* Each task starts after all its task dependencies finish and data is transferred. */\n"); fprintf(output, "/* Note that the dependency finish time depends on the worker where it's working */\n"); for (t1 = tasks; t1; t1 = t1->next) for (i = 0; i < t1->depsn; i++) { fprintf(output, "/* %lu bytes transferred */\n", (unsigned long) t1->deps[i].size); fprintf(output, "s%lu >= c%lu", t1->id, t1->deps[i].dep->id); /* Transfer time: pick up one source node and a worker on it */ for (n = 0; n < starpu_memory_nodes_get_count(); n++) for (w = 0; w < nw; w++) if (starpu_worker_get_memory_node(w) == n) { /* pick up another destination node and a worker on it */ for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++) if (n2 != n) { for (w2 = 0; w2 < nw; w2++) if (starpu_worker_get_memory_node(w2) == n2) { /* If predecessor is on worker w and successor * on worker w2 on different nodes, we need to * transfer the data. */ fprintf(output, " + d_t%luw%dt%luw%d", t1->deps[i].dep->id, w, t1->id, w2); } } } fprintf(output, ";\n"); /* Transfer time: pick up one source node and a worker on it */ for (n = 0; n < starpu_memory_nodes_get_count(); n++) for (w = 0; w < nw; w++) if (starpu_worker_get_memory_node(w) == n) { /* pick up another destination node and a worker on it */ for (n2 = 0; n2 < starpu_memory_nodes_get_count(); n2++) if (n2 != n) { for (w2 = 0; w2 < nw; w2++) if (starpu_worker_get_memory_node(w2) == n2) { /* The data transfer is at least 0ms */ fprintf(output, "d_t%luw%dt%luw%d >= 0;\n", t1->deps[i].dep->id, w, t1->id, w2); /* The data transfer from w to w2 only happens if tasks run there */ fprintf(output, "d_t%luw%dt%luw%d >= %f - 2e5 + 1e5 t%luw%d + 1e5 t%luw%d;\n", t1->deps[i].dep->id, w, t1->id, w2, starpu_transfer_predict(n, n2, t1->deps[i].size)/1000., t1->deps[i].dep->id, w, t1->id, w2); } } } } fprintf(output, "\n/* Each tag finishes when its corresponding task finishes */\n"); for (t1 = tasks; t1; t1 = t1->next) if (t1->use_tag) { for (w = 0; w < nw; w++) fprintf(output, "c%lu = tag%lu;\n", t1->id, (unsigned long) t1->tag_id); } fprintf(output, "\n/* tags start after all their tag dependencies finish. */\n"); for (td = tag_deps; td; td = td->next) fprintf(output, "tag%lu >= tag%lu;\n", (unsigned long) td->tag, (unsigned long) td->dep_tag); /* TODO: factorize ancestor calls */ fprintf(output, "\n/* For each task pair and each worker, if both tasks are executed by the same worker,\n"); fprintf(output, " one is started after the other's completion */\n"); for (t1 = tasks; t1; t1 = t1->next) { for (t2 = t1->next; t2; t2 = t2->next) { if (!ancestor(t1, t2) && !ancestor(t2, t1)) { for (w = 0; w < nw; w++) { struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(w, STARPU_NMAX_SCHED_CTXS); if (!isnan(t1->duration[arch->devices[0].type][arch->devices[0].devid][arch->devices[0].ncores])) { fprintf(output, "s%lu - c%lu >= -3e5 + 1e5 t%luw%d + 1e5 t%luw%d + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, w, t2->id, w, t1->id, t2->id); fprintf(output, "s%lu - c%lu >= -2e5 + 1e5 t%luw%d + 1e5 t%luw%d - 1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, w, t2->id, w, t1->id, t2->id); } } } } } #if 0 /* Doesn't help at all to actually express what "after" means */ for (t1 = tasks; t1; t1 = t1->next) for (t2 = t1->next; t2; t2 = t2->next) if (!ancestor(t1, t2) && !ancestor(t2, t1)) { fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id); fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id); } #endif if (recordprio) { fprintf(output, "\n/* For StarPU, a priority means given schedulable tasks it will consider the\n"); fprintf(output, " * more prioritized first */\n"); for (t1 = tasks; t1; t1 = t1->next) { for (t2 = t1->next; t2; t2 = t2->next) { if (!ancestor(t1, t2) && !ancestor(t2, t1) && t1->priority != t2->priority) { if (t1->priority > t2->priority) { /* Either t2 is scheduled before t1, but then it needs to be scheduled before some t dep finishes */ /* One of the t1 deps to give the maximum start time for t2 */ if (t1->depsn > 1) { for (i = 0; i < t1->depsn; i++) fprintf(output, " + t%lut%lud%d", t2->id, t1->id, i); fprintf(output, " = 1;\n"); } for (i = 0; i < t1->depsn; i++) { fprintf(output, "c%lu - s%lu >= ", t1->deps[i].dep->id, t2->id); if (t1->depsn > 1) /* Only checks this when it's this dependency that is chosen */ fprintf(output, "-2e5 + 1e5 t%lut%lud%d", t2->id, t1->id, i); else fprintf(output, "-1e5"); /* Only check this if t1 is after t2 */ fprintf(output, " + 1e5 t%luafter%lu", t1->id, t2->id); fprintf(output, ";\n"); } /* Or t2 is scheduled after t1 is. */ fprintf(output, "s%lu - s%lu >= -1e5 t%luafter%lu;\n", t2->id, t1->id, t1->id, t2->id); } else { /* Either t1 is scheduled before t2, but then it needs to be scheduled before some t2 dep finishes */ /* One of the t2 deps to give the maximum start time for t1 */ if (t2->depsn > 1) { for (i = 0; i < t2->depsn; i++) fprintf(output, " + t%lut%lud%d", t1->id, t2->id, i); fprintf(output, " = 1;\n"); } for (i = 0; i < t2->depsn; i++) { fprintf(output, "c%lu - s%lu >= ", t2->deps[i].dep->id, t1->id); if (t2->depsn > 1) /* Only checks this when it's this dependency that is chosen */ fprintf(output, "-1e5 + 1e5 t%lut%lud%d", t1->id, t2->id, i); /* Only check this if t2 is after t1 */ fprintf(output, " - 1e5 t%luafter%lu;\n", t1->id, t2->id); } /* Or t1 is scheduled after t2 is. */ fprintf(output, "s%lu - s%lu >= -1e5 + 1e5 t%luafter%lu;\n", t1->id, t2->id, t1->id, t2->id); } } } } } for (t1 = tasks; t1; t1 = t1->next) for (t2 = t1->next; t2; t2 = t2->next) if (!ancestor(t1, t2) && !ancestor(t2, t1)) { fprintf(output, "bin t%luafter%lu;\n", t1->id, t2->id); if (recordprio && t1->priority != t2->priority) { if (t1->priority > t2->priority) { if (t1->depsn > 1) for (i = 0; i < t1->depsn; i++) fprintf(output, "bin t%lut%lud%d;\n", t2->id, t1->id, i); } else { if (t2->depsn > 1) for (i = 0; i < t2->depsn; i++) fprintf(output, "bin t%lut%lud%d;\n", t1->id, t2->id, i); } } } for (t1 = tasks; t1; t1 = t1->next) for (w = 0; w < nw; w++) fprintf(output, "bin t%luw%d;\n", t1->id, w); } else { struct bound_task_pool *tp; nt = 0; for (tp = task_pools; tp; tp = tp->next) nt++; if (!nt) return; { double times[nw*nt]; _starpu_get_tasks_times(nw, nt, times); fprintf(output, "/* StarPU upper bound linear programming problem, to be run in lp_solve. */\n\n"); fprintf(output, "/* We want to minimize total execution time (ms) */\n"); fprintf(output, "min: tmax;\n\n"); fprintf(output, "/* Which is the maximum of all worker execution times (ms) */\n"); for (w = 0; w < nw; w++) { char name[32]; starpu_worker_get_name(w, name, sizeof(name)); fprintf(output, "/* worker %s */\n0", name); for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { if (!isnan(times[w*nt+t])) fprintf(output, "\t%+f * w%dt%dn", (float) times[w*nt+t], w, t); } fprintf(output, " <= tmax;\n"); } fprintf(output, "\n"); fprintf(output, "/* And we have to have computed exactly all tasks */\n"); for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { int got_one = 0; fprintf(output, "/* task %s key %x */\n0", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint); for (w = 0; w < nw; w++) { if (isnan(times[w*nt+t])) _STARPU_MSG("Warning: task %s has no performance measurement for worker %d.\n", _starpu_codelet_get_model_name(tp->cl), w); else { got_one = 1; fprintf(output, "\t+w%dt%dn", w, t); } } fprintf(output, " = %lu;\n", tp->n); if (!got_one) _STARPU_MSG("Warning: task %s has no performance measurement for any worker, system will not be solvable!\n", _starpu_codelet_get_model_name(tp->cl)); /* Show actual values */ fprintf(output, "/*"); for (w = 0; w < nw; w++) fprintf(output, "\t+%lu", tp->cl->per_worker_stats[w]); fprintf(output, "\t*/\n\n"); } fprintf(output, "/* Optionally tell that tasks can not be divided */\n"); fprintf(output, "/* int "); int first = 1; for (w = 0; w < nw; w++) for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { if (!first) fprintf(output, ","); else first = 0; fprintf(output, "w%dt%dn", w, t); } fprintf(output, "; */\n"); } } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } /* * Print bound system in MPS output format */ void starpu_bound_print_mps(FILE *output) { struct bound_task_pool * tp; int nt; /* Number of different kinds of tasks */ int nw; /* Number of different workers */ int t, w; if (recorddeps) { fprintf(output, "Dependencies were enabled in the starpu_bound_start call, thus not supported\n"); return; } nw = starpu_worker_get_count(); if (!nw) /* Make llvm happy about the VLA below */ return; STARPU_PTHREAD_MUTEX_LOCK(&mutex); nt = 0; for (tp = task_pools; tp; tp = tp->next) nt++; if (!nt) { STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); return; } { double times[nw*nt]; _starpu_get_tasks_times(nw, nt, times); fprintf(output, "NAME StarPU theoretical bound\n"); fprintf(output, "*\nROWS\n"); fprintf(output, "* We want to minimize total execution time (ms)\n"); fprintf(output, " N TMAX\n"); fprintf(output, "* Which is the maximum of all worker execution times (ms)\n"); for (w = 0; w < nw; w++) { char name[32]; starpu_worker_get_name(w, name, sizeof(name)); fprintf(output, "* worker %s\n", name); fprintf(output, " L W%d\n", w); } fprintf(output, "*\n* And we have to have computed exactly all tasks\n*\n"); for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { fprintf(output, "* task %s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint); fprintf(output, " E T%d\n", t); } fprintf(output, "*\nCOLUMNS\n*\n"); fprintf(output, "*\n* Execution times and completion of all tasks\n*\n"); for (w = 0; w < nw; w++) for (t = 0, tp = task_pools; tp; t++, tp = tp->next) if (!isnan(times[w*nt+t])) { char name[23]; snprintf(name, sizeof(name), "W%dT%d", w, t); fprintf(output," %-8s W%-7d %12f\n", name, w, times[w*nt+t]); fprintf(output," %-8s T%-7d %12d\n", name, t, 1); } fprintf(output, "*\n* Total execution time\n*\n"); for (w = 0; w < nw; w++) fprintf(output," TMAX W%-2d %12d\n", w, -1); fprintf(output," TMAX TMAX %12d\n", 1); fprintf(output, "*\nRHS\n*\n"); fprintf(output, "*\n* Total number of tasks\n*\n"); for (t = 0, tp = task_pools; tp; t++, tp = tp->next) fprintf(output," NT%-2d T%-7d %12lu\n", t, t, tp->n); fprintf(output, "ENDATA\n"); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } /* * Solve bound system thanks to GNU Linear Programming Kit backend */ #ifdef STARPU_HAVE_GLPK_H static glp_prob *_starpu_bound_glp_resolve(int integer) { struct bound_task_pool * tp; int nt; /* Number of different kinds of tasks */ int nw; /* Number of different workers */ int t, w; glp_prob *lp; int ret; nw = starpu_worker_get_count(); if (!nw) /* Make llvm happy about the VLA below */ return NULL; nt = 0; for (tp = task_pools; tp; tp = tp->next) nt++; if (!nt) return NULL; lp = glp_create_prob(); glp_set_prob_name(lp, "StarPU theoretical bound"); glp_set_obj_dir(lp, GLP_MIN); glp_set_obj_name(lp, "total execution time"); { double times[nw*nt]; int ne = nw * (nt+1) /* worker execution time */ + nt * nw + 1; /* glp dumbness */ int n = 1; int ia[ne], ja[ne]; double ar[ne]; _starpu_get_tasks_times(nw, nt, times); /* Variables: number of tasks i assigned to worker j, and tmax */ glp_add_cols(lp, nw*nt+1); #define colnum(w, t) ((t)*nw+(w)+1) glp_set_obj_coef(lp, nw*nt+1, 1.); for (w = 0; w < nw; w++) for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { char name[32]; snprintf(name, sizeof(name), "w%dt%dn", w, t); glp_set_col_name(lp, colnum(w, t), name); if (integer) glp_set_col_kind(lp, colnum(w, t), GLP_IV); glp_set_col_bnds(lp, colnum(w, t), GLP_LO, 0., 0.); } glp_set_col_bnds(lp, nw*nt+1, GLP_LO, 0., 0.); /* Total worker execution time */ glp_add_rows(lp, nw); for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { int someone = 0; for (w = 0; w < nw; w++) if (!isnan(times[w*nt+t])) someone = 1; if (!someone) { /* This task does not have any performance model at all, abort */ glp_delete_prob(lp); return NULL; } } for (w = 0; w < nw; w++) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "worker %s", name); glp_set_row_name(lp, w+1, title); for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { ia[n] = w+1; ja[n] = colnum(w, t); if (isnan(times[w*nt+t])) ar[n] = 1000000000.; else ar[n] = times[w*nt+t]; n++; } /* tmax */ ia[n] = w+1; ja[n] = nw*nt+1; ar[n] = -1; n++; glp_set_row_bnds(lp, w+1, GLP_UP, 0, 0); } /* Total task completion */ glp_add_rows(lp, nt); for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { char name[32], title[64]; starpu_worker_get_name(w, name, sizeof(name)); snprintf(title, sizeof(title), "task %s key %x", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint); glp_set_row_name(lp, nw+t+1, title); for (w = 0; w < nw; w++) { ia[n] = nw+t+1; ja[n] = colnum(w, t); ar[n] = 1; n++; } glp_set_row_bnds(lp, nw+t+1, GLP_FX, tp->n, tp->n); } STARPU_ASSERT(n == ne); glp_load_matrix(lp, ne-1, ia, ja, ar); } glp_smcp parm; glp_init_smcp(&parm); parm.msg_lev = GLP_MSG_OFF; ret = glp_simplex(lp, &parm); if (ret) { glp_delete_prob(lp); lp = NULL; return NULL; } if (integer) { glp_iocp iocp; glp_init_iocp(&iocp); iocp.msg_lev = GLP_MSG_OFF; glp_intopt(lp, &iocp); } return lp; } #endif /* STARPU_HAVE_GLPK_H */ /* Print the computed bound as well as the optimized distribution of tasks */ void starpu_bound_print(FILE *output, int integer) { #ifdef STARPU_HAVE_GLPK_H if (recorddeps) { fprintf(output, "Dependencies were enabled in the starpu_bound_start call, thus not supported\n"); return; } STARPU_PTHREAD_MUTEX_LOCK(&mutex); glp_prob *lp = _starpu_bound_glp_resolve(integer); if (lp) { struct bound_task_pool * tp; int t, w; int nw; /* Number of different workers */ double tmax; nw = starpu_worker_get_count(); if (integer) tmax = glp_mip_obj_val(lp); else tmax = glp_get_obj_val(lp); fprintf(output, "Theoretical minimum execution time: %f ms\n", tmax); for (t = 0, tp = task_pools; tp; t++, tp = tp->next) { fprintf(output, "%s key %x\n", _starpu_codelet_get_model_name(tp->cl), (unsigned) tp->footprint); for (w = 0; w < nw; w++) if (integer) fprintf(output, "\tw%dt%dn %f", w, t, glp_mip_col_val(lp, colnum(w, t))); else fprintf(output, "\tw%dt%dn %f", w, t, glp_get_col_prim(lp, colnum(w, t))); fprintf(output, "\n"); } glp_delete_prob(lp); } else { _STARPU_MSG("Simplex failed\n"); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); #else /* STARPU_HAVE_GLPK_H */ (void) integer; fprintf(output, "Please rebuild StarPU with glpk installed.\n"); #endif /* STARPU_HAVE_GLPK_H */ } /* Compute and return the bound */ void starpu_bound_compute(double *res, double *integer_res, int integer) { #ifdef STARPU_HAVE_GLPK_H double ret; if (recorddeps) { *res = 0.; return; } STARPU_PTHREAD_MUTEX_LOCK(&mutex); glp_prob *lp = _starpu_bound_glp_resolve(integer); if (lp) { ret = glp_get_obj_val(lp); if (integer) *integer_res = glp_mip_obj_val(lp); glp_delete_prob(lp); } else ret = 0.; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); *res = ret; #else /* STARPU_HAVE_GLPK_H */ (void) integer_res; (void) integer; *res = 0.; #endif /* STARPU_HAVE_GLPK_H */ } starpu-1.4.9+dfsg/src/profiling/bound.h000066400000000000000000000030221507764646700200450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __BOUND_H__ #define __BOUND_H__ /** @file */ #include #include #include #pragma GCC visibility push(hidden) /** Are we recording? */ extern int _starpu_bound_recording; /** Record task for bound computation */ extern void _starpu_bound_record(struct _starpu_job *j); /** Record tag dependency: id depends on dep_id */ extern void _starpu_bound_tag_dep(starpu_tag_t id, starpu_tag_t dep_id); /** Record task dependency: j depends on dep_j */ extern void _starpu_bound_task_dep(struct _starpu_job *j, struct _starpu_job *dep_j); /** Record job id dependency: j depends on job_id */ extern void _starpu_bound_job_id_dep(starpu_data_handle_t handle, struct _starpu_job *dep_j, unsigned long job_id); /** Clear recording */ extern void starpu_bound_clear(void); #pragma GCC visibility pop #endif // __BOUND_H__ starpu-1.4.9+dfsg/src/profiling/callbacks.c000066400000000000000000000241371507764646700206620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022-2022 École de Technologie Supérieure (ETS, Montréal) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef HAVE_DLOPEN #include #endif #include #include #include #include #include #define STARPU_NB_CALLBACKS 17 struct _starpu_prof_tool_callbacks starpu_prof_tool_callbacks; starpu_prof_tool_cb_func *_starpu_prof_tool_callback_map[STARPU_NB_CALLBACKS]; #ifdef HAVE_DLOPEN static void *lib_handle=NULL; #endif /** Dummy implementations of the callbacks */ static void _starpu_prof_tool_event_dummy_func(struct starpu_prof_tool_info *pti, union starpu_prof_tool_event_info *ptei, struct starpu_prof_tool_api_info *ptai) { } void starpu_profiling_init_lib() { starpu_prof_tool_callbacks.starpu_prof_tool_event_init = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_terminate = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_init_begin = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_init_end = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_start_cpu_exec = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_user_start = &_starpu_prof_tool_event_dummy_func; starpu_prof_tool_callbacks.starpu_prof_tool_event_user_end = &_starpu_prof_tool_event_dummy_func; } struct starpu_prof_tool_info _starpu_prof_tool_get_info(enum starpu_prof_tool_event event_type, int device_num, int workerid, enum starpu_prof_tool_driver_type driver, unsigned int memnode, void *fun_ptr) { struct starpu_prof_tool_info ret; ret.event_type = event_type; ret.starpu_version[0] = STARPU_MAJOR_VERSION; ret.starpu_version[1] = STARPU_MINOR_VERSION; ret.starpu_version[2] = STARPU_RELEASE_VERSION; ret.device_number = device_num; ret.driver_type = driver; ret.fun_ptr = fun_ptr; ret.memnode = memnode; ret.thread_id = (int)pthread_self(); ret.worker_id = workerid; /* unused fields */ ret.conf = NULL; ret.bytes_to_transfer = 0; ret.bytes_transfered = 0; return ret; } /** This function is specific for data transfers, in order to keep the prototypes simple */ struct starpu_prof_tool_info _starpu_prof_tool_get_info_d(enum starpu_prof_tool_event event_type, int device_num, int workerid, enum starpu_prof_tool_driver_type driver, unsigned memnode, unsigned to_transfer, unsigned transfered) { struct starpu_prof_tool_info ret; ret.event_type = event_type; ret.starpu_version[0] = STARPU_MAJOR_VERSION; ret.starpu_version[1] = STARPU_MINOR_VERSION; ret.starpu_version[2] = STARPU_RELEASE_VERSION; ret.device_number = device_num; ret.driver_type = driver; ret.memnode = memnode; ret.bytes_to_transfer = to_transfer; ret.bytes_transfered = transfered; ret.fun_ptr = NULL; ret.thread_id = (int)pthread_self(); ret.worker_id = workerid; /* unused fields */ ret.conf = NULL; ret.fun_ptr = NULL; return ret; } struct starpu_prof_tool_info _starpu_prof_tool_get_info_init(enum starpu_prof_tool_event event_type, int device_num, enum starpu_prof_tool_driver_type driver, struct starpu_conf* conf) { struct starpu_prof_tool_info ret; ret.event_type = event_type; ret.starpu_version[0] = STARPU_MAJOR_VERSION; ret.starpu_version[1] = STARPU_MINOR_VERSION; ret.starpu_version[2] = STARPU_RELEASE_VERSION; ret.device_number = device_num; ret.driver_type = driver; ret.conf = conf; ret.thread_id = (int)pthread_self(); ret.worker_id = 0; /* unused fields */ ret.memnode = -1; ret.bytes_to_transfer = 0; ret.bytes_transfered = 0; ret.fun_ptr = NULL; return ret; } // The name of the function below is important so it can be found in a library preloaded with LD_PRELOAD (necessary for TAU and Apex) __attribute__((weak)) void starpu_prof_tool_library_register(starpu_prof_tool_entry_register_func reg, starpu_prof_tool_entry_register_func unreg) { (void) reg; (void) unreg; } /** Register a callback for a given event. TODO use a list in order to link multiple callbacks */ void _starpu_prof_tool_register_cb(enum starpu_prof_tool_event event_type, starpu_prof_tool_cb_func cb, enum starpu_prof_tool_command info) { (void) info; *(_starpu_prof_tool_callback_map[event_type]) = cb; } /** Unregister a callback for a given event. TODO use a list in order to link multiple callbacks */ void _starpu_prof_tool_unregister_cb(enum starpu_prof_tool_event event_type, starpu_prof_tool_cb_func cb, enum starpu_prof_tool_command info) { (void) info; (void) cb; *(_starpu_prof_tool_callback_map[event_type]) = NULL; } #ifdef STARPU_PROF_TOOL static void init_prof_map() { _starpu_prof_tool_callback_map[starpu_prof_tool_event_init] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_init); _starpu_prof_tool_callback_map[starpu_prof_tool_event_terminate] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_terminate); _starpu_prof_tool_callback_map[starpu_prof_tool_event_init_begin] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_init_begin); _starpu_prof_tool_callback_map[starpu_prof_tool_event_init_end] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_init_end); _starpu_prof_tool_callback_map[starpu_prof_tool_event_driver_init] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init); _starpu_prof_tool_callback_map[starpu_prof_tool_event_driver_deinit] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit); _starpu_prof_tool_callback_map[starpu_prof_tool_event_driver_init_start] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start); _starpu_prof_tool_callback_map[starpu_prof_tool_event_driver_init_end] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end); _starpu_prof_tool_callback_map[starpu_prof_tool_event_start_cpu_exec] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_start_cpu_exec); _starpu_prof_tool_callback_map[starpu_prof_tool_event_end_cpu_exec] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec); _starpu_prof_tool_callback_map[starpu_prof_tool_event_start_gpu_exec] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec); _starpu_prof_tool_callback_map[starpu_prof_tool_event_end_gpu_exec] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec); _starpu_prof_tool_callback_map[starpu_prof_tool_event_start_transfer] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer); _starpu_prof_tool_callback_map[starpu_prof_tool_event_end_transfer] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer); _starpu_prof_tool_callback_map[starpu_prof_tool_event_user_start] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_user_start); _starpu_prof_tool_callback_map[starpu_prof_tool_event_user_end] = &(starpu_prof_tool_callbacks.starpu_prof_tool_event_user_end); } #endif /** * Looks if there is a profiling tool pointed at by the appropriate * environment variable. * Returns 0 if nothing is loaded, -1 if there was a problem, 1 otherwise. */ int _starpu_prof_tool_try_load() { #ifdef STARPU_PROF_TOOL init_prof_map(); starpu_profiling_init_lib(); const char *tool_libs = starpu_getenv(STARPU_PROF_TOOL_ENV_VAR); if (tool_libs != NULL) { #ifdef HAVE_DLOPEN void *found; _STARPU_DEBUG("Loading profiling tool %s\n", tool_libs); lib_handle = dlopen(tool_libs, RTLD_LAZY); // TODO best flag? if (!lib_handle) { perror("Could not open the requested file"); fprintf(stderr, "%s\n", dlerror()); return -1; } /* load the loading function we find in this library */ found = dlsym(lib_handle, "starpu_prof_tool_library_register"); if (!found) { perror("Could not find the required registration function in the profiling library\n"); return -1; } starpu_prof_tool_entry_func entry_func = (starpu_prof_tool_entry_func)found; entry_func(_starpu_prof_tool_register_cb, _starpu_prof_tool_unregister_cb); return 1; #else _STARPU_MSG("Environment variable '%s' defined but the dlopen functionality is unavailable on the system\n", STARPU_PROF_TOOL_ENV_VAR); #endif } /* This corresponds to something if we LD_PRELOAD a tool */ starpu_prof_tool_library_register(_starpu_prof_tool_register_cb, _starpu_prof_tool_unregister_cb); return 0; #else const char *tool_libs = starpu_getenv(STARPU_PROF_TOOL_ENV_VAR); if (tool_libs != NULL) { _STARPU_MSG("Variable '%s' is defined but StarPU profiling tool is not enabled\n", STARPU_PROF_TOOL_ENV_VAR); } return 1; #endif } void _starpu_prof_tool_unload() { #ifdef HAVE_DLOPEN if (lib_handle) { dlclose(lib_handle); lib_handle = NULL; } #endif } starpu-1.4.9+dfsg/src/profiling/callbacks.h000066400000000000000000000063511507764646700206650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2022-2022 École de Technologie Supérieure (ETS, Montréal) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef _STARPU_CALLBACKS_H_ #define _STARPU_CALLBACKS_H_ #include #include #define STARPU_PROF_TOOL_ENV_VAR "STARPU_PROF_TOOL" #ifdef __cplusplus extern "C" { #endif /** The events themselves. This structure can be built by the preprocessor, but we decided to list the function pointers explicitly for readability purpose. */ struct _starpu_prof_tool_callbacks { starpu_prof_tool_cb_func starpu_prof_tool_event_init; starpu_prof_tool_cb_func starpu_prof_tool_event_terminate; starpu_prof_tool_cb_func starpu_prof_tool_event_init_begin; starpu_prof_tool_cb_func starpu_prof_tool_event_init_end; starpu_prof_tool_cb_func starpu_prof_tool_event_driver_init; starpu_prof_tool_cb_func starpu_prof_tool_event_driver_deinit; starpu_prof_tool_cb_func starpu_prof_tool_event_driver_init_start; starpu_prof_tool_cb_func starpu_prof_tool_event_driver_init_end; starpu_prof_tool_cb_func starpu_prof_tool_event_start_cpu_exec; starpu_prof_tool_cb_func starpu_prof_tool_event_end_cpu_exec; starpu_prof_tool_cb_func starpu_prof_tool_event_start_gpu_exec; starpu_prof_tool_cb_func starpu_prof_tool_event_end_gpu_exec; starpu_prof_tool_cb_func starpu_prof_tool_event_start_transfer; starpu_prof_tool_cb_func starpu_prof_tool_event_end_transfer; starpu_prof_tool_cb_func starpu_prof_tool_event_user_start; starpu_prof_tool_cb_func starpu_prof_tool_event_user_end; }; extern struct _starpu_prof_tool_callbacks starpu_prof_tool_callbacks; /******************************************************************************* * Functions used by the callbacks *******************************************************************************/ struct starpu_prof_tool_info _starpu_prof_tool_get_info(enum starpu_prof_tool_event, int, int, enum starpu_prof_tool_driver_type, unsigned int, /*_starpu_cl_func_t*/ void*); struct starpu_prof_tool_info _starpu_prof_tool_get_info_d(enum starpu_prof_tool_event, int, int, enum starpu_prof_tool_driver_type, unsigned, unsigned, unsigned /* void*: can be added later if necessary */); struct starpu_prof_tool_info _starpu_prof_tool_get_info_init(enum starpu_prof_tool_event, int, enum starpu_prof_tool_driver_type, struct starpu_conf*); /******************************************************************************* * Initialization and cleanup *******************************************************************************/ int _starpu_prof_tool_try_load(); void _starpu_prof_tool_unload(); #ifdef __cplusplus } #endif #endif // _STARPU_CALLBACKS_H_ starpu-1.4.9+dfsg/src/profiling/profiling.c000066400000000000000000000506071507764646700207350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #ifdef STARPU_PAPI #include #endif #ifdef STARPU_PAPI static starpu_pthread_mutex_t papi_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static int papi_events[PAPI_MAX_HWCTRS]; static int papi_nevents = 0; static int warned_component_unavailable = 0; #endif /* Store the busid of the different (src, dst) pairs. busid_matrix[src][dst] * contains the busid of (src, dst) or -1 if the bus was not registered. */ struct node_pair { int src; int dst; struct starpu_profiling_bus_info *bus_info; }; static int busid_matrix[STARPU_MAXNODES][STARPU_MAXNODES]; static struct starpu_profiling_bus_info bus_profiling_info[STARPU_MAXNODES][STARPU_MAXNODES]; static struct node_pair busid_to_node_pair[STARPU_MAXNODES*STARPU_MAXNODES]; static char bus_direct[STARPU_MAXNODES*STARPU_MAXNODES]; static int bus_ngpus[STARPU_MAXNODES*STARPU_MAXNODES]; static unsigned busid_cnt = 0; static void _starpu_bus_reset_profiling_info(struct starpu_profiling_bus_info *bus_info); /* Clear all the profiling info related to the worker. */ static void _starpu_worker_reset_profiling_info_with_lock(int workerid); /* * Global control of profiling */ /* Disabled by default, unless simulating */ int _starpu_profiling = #ifdef STARPU_SIMGRID 1 #else 0 #endif ; int _starpu_codelet_profiling = 1; int _starpu_energy_profiling = 0; void starpu_profiling_init() { _starpu_profiling_init(); } static void _starpu_profiling_reset_counters() { int worker; for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) { _starpu_worker_reset_profiling_info_with_lock(worker); } int busid; int bus_cnt = starpu_bus_get_count(); for (busid = 0; busid < bus_cnt; busid++) { struct starpu_profiling_bus_info *bus_info; bus_info = busid_to_node_pair[busid].bus_info; _starpu_bus_reset_profiling_info(bus_info); } } int starpu_profiling_status_set(int status) { unsigned worker; for (worker = 0; worker < starpu_worker_get_count(); worker++) { struct _starpu_worker *worker_struct = _starpu_get_worker_struct(worker); STARPU_PTHREAD_MUTEX_LOCK(&worker_struct->sched_mutex); } for (worker = 0; worker < starpu_worker_get_count(); worker++) { STARPU_PTHREAD_MUTEX_LOCK(&_starpu_get_worker_struct(worker)->profiling_info_mutex); } ANNOTATE_HAPPENS_AFTER(&_starpu_profiling); int prev_value = _starpu_profiling; _starpu_profiling = status; ANNOTATE_HAPPENS_BEFORE(&_starpu_profiling); _STARPU_TRACE_SET_PROFILING(status); /* If we enable profiling, we reset the counters. */ if (status == STARPU_PROFILING_ENABLE) { _starpu_profiling_reset_counters(); } for (worker = 0; worker < starpu_worker_get_count(); worker++) { struct _starpu_worker *worker_struct = _starpu_get_worker_struct(worker); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_get_worker_struct(worker)->profiling_info_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&worker_struct->sched_mutex); } return prev_value; } void _starpu_profiling_init(void) { int workerid; for (workerid = 0; workerid < STARPU_NMAXWORKERS; workerid++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); enum _starpu_worker_status_index i; memset(&worker->profiling_info, 0, sizeof(worker->profiling_info)); STARPU_PTHREAD_MUTEX_INIT(&worker->profiling_info_mutex, NULL); for (i = 0; i< STATUS_INDEX_NR; i++) worker->profiling_registered_start[i] = 0; worker->profiling_status = STATUS_UNKNOWN; } #ifdef STARPU_PAPI STARPU_PTHREAD_MUTEX_LOCK(&papi_mutex); int retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) { _STARPU_MSG("Failed init PAPI, error: %s.\n", PAPI_strerror(retval)); } retval = PAPI_thread_init(pthread_self); if (retval != PAPI_OK) { _STARPU_MSG("Failed init PAPI thread, error: %s.\n", PAPI_strerror(retval)); } char *conf_papi_events; char *papi_event_name; conf_papi_events = starpu_getenv("STARPU_PROF_PAPI_EVENTS"); papi_nevents = 0; if (conf_papi_events != NULL) { while ((papi_event_name = strtok_r(conf_papi_events, " ,", &conf_papi_events))) { if (papi_nevents == PAPI_MAX_HWCTRS) { _STARPU_MSG("Too many requested papi counters, ignoring %s\n", papi_event_name); continue; } _STARPU_DEBUG("Loading PAPI Event: %s\n", papi_event_name); retval = PAPI_event_name_to_code((char*)papi_event_name, &papi_events[papi_nevents]); if (retval != PAPI_OK) _STARPU_MSG("Failed to codify papi event [%s], error: %s.\n", papi_event_name, PAPI_strerror(retval)); else papi_nevents++; } } STARPU_PTHREAD_MUTEX_UNLOCK(&papi_mutex); #endif } #ifdef STARPU_PAPI void _starpu_profiling_papi_task_start_counters(struct starpu_task *task) { if (!starpu_profiling_status_get()) return; struct starpu_profiling_task_info *profiling_info; profiling_info = task->profiling_info; if (profiling_info && papi_nevents) { int i; profiling_info->papi_event_set = PAPI_NULL; STARPU_PTHREAD_MUTEX_LOCK(&papi_mutex); PAPI_create_eventset(&profiling_info->papi_event_set); for(i=0; ipapi_event_set, papi_events[i]); #ifdef PAPI_ECMP_DISABLED if (ret == PAPI_ECMP_DISABLED && !warned_component_unavailable) { _STARPU_MSG("Error while registering Papi event: Component containing event is disabled. Try running `papi_component_avail` to get more information.\n"); warned_component_unavailable = 1; } #else (void)ret; #endif profiling_info->papi_values[i]=0; } PAPI_reset(profiling_info->papi_event_set); PAPI_start(profiling_info->papi_event_set); STARPU_PTHREAD_MUTEX_UNLOCK(&papi_mutex); } } void _starpu_profiling_papi_task_stop_counters(struct starpu_task *task) { if (!starpu_profiling_status_get()) return; struct starpu_profiling_task_info *profiling_info; profiling_info = task->profiling_info; if (profiling_info && papi_nevents) { int i; STARPU_PTHREAD_MUTEX_LOCK(&papi_mutex); PAPI_stop(profiling_info->papi_event_set, profiling_info->papi_values); for(i=0; ipapi_values[i]); } PAPI_cleanup_eventset(profiling_info->papi_event_set); PAPI_destroy_eventset(&profiling_info->papi_event_set); STARPU_PTHREAD_MUTEX_UNLOCK(&papi_mutex); } } #endif void _starpu_profiling_start(void) { const char *env; if ((env = starpu_getenv("STARPU_PROFILING")) && atoi(env)) { starpu_profiling_status_set(STARPU_PROFILING_ENABLE); } _starpu_codelet_profiling = starpu_get_env_number_default("STARPU_CODELET_PROFILING", 1); _starpu_energy_profiling = starpu_get_env_number_default("STARPU_ENERGY_PROFILING", 0); } void _starpu_profiling_terminate(void) { int worker; for (worker = 0; worker < STARPU_NMAXWORKERS; worker++) { STARPU_PTHREAD_MUTEX_DESTROY(&_starpu_get_worker_struct(worker)->profiling_info_mutex); } #ifdef STARPU_PAPI /* free the resources used by PAPI */ STARPU_PTHREAD_MUTEX_LOCK(&papi_mutex); PAPI_shutdown(); STARPU_PTHREAD_MUTEX_UNLOCK(&papi_mutex); #endif } /* * Task profiling */ struct starpu_profiling_task_info *_starpu_allocate_profiling_info_if_needed(struct starpu_task *task) { struct starpu_profiling_task_info *info = NULL; /* If we are benchmarking, we need room for the energy */ if (starpu_profiling_status_get() || (task->cl && task->cl->energy_model && (task->cl->energy_model->benchmarking || _starpu_get_calibrate_flag()))) { _STARPU_CALLOC(info, 1, sizeof(struct starpu_profiling_task_info)); } return info; } /* * Worker profiling */ static void _starpu_worker_reset_profiling_info_with_lock(int workerid) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct starpu_profiling_worker_info *worker_info = &worker->profiling_info; struct timespec now; _starpu_clock_gettime(&now); worker_info->start_time = now; /* This is computed in a lazy fashion when the application queries * profiling info. */ starpu_timespec_clear(&worker_info->total_time); starpu_timespec_clear(&worker_info->executing_time); starpu_timespec_clear(&worker_info->sleeping_time); worker_info->executed_tasks = 0; worker_info->used_cycles = 0; worker_info->stall_cycles = 0; worker_info->energy_consumed = 0; worker_info->flops = 0; /* We detect if the worker is already sleeping or doing some * computation */ enum _starpu_worker_status status = _starpu_worker_get_status(workerid); enum _starpu_worker_status_index i; for (i = 0; i < STATUS_INDEX_NR; i++) { if (status & (1 << i)) { worker->profiling_registered_start[i] = 1; worker->profiling_registered_start_date[i] = now; } else { worker->profiling_registered_start[i] = 0; } worker->profiling_status = status; worker->profiling_status_start_date = now; } } static void _starpu_worker_time_split_accumulate(struct starpu_profiling_worker_info *worker_info, enum _starpu_worker_status status, struct timespec *delta) { /* We here prioritize where we want to attribute the time spent */ if (status & STATUS_EXECUTING) /* Executing task, this is all we want to know */ starpu_timespec_accumulate(&worker_info->executing_time, delta); else if (status & STATUS_CALLBACK) /* Otherwise, callback, that's fine as well */ starpu_timespec_accumulate(&worker_info->callback_time, delta); else if (status & STATUS_WAITING) /* Not doing any task or callback, held on waiting for some data */ starpu_timespec_accumulate(&worker_info->waiting_time, delta); else if (status & STATUS_SLEEPING) /* Not even waiting for some data, but we don't have any task to do anyway */ starpu_timespec_accumulate(&worker_info->sleeping_time, delta); else if (status & STATUS_SCHEDULING) /* We do have tasks to do, but the scheduler takes time */ starpu_timespec_accumulate(&worker_info->scheduling_time, delta); /* And otherwise it's just uncategorized overhead */ } void _starpu_worker_start_state(int workerid, enum _starpu_worker_status_index index, struct timespec *start_time) { if (starpu_profiling_status_get()) { struct timespec state_start_time; if (!start_time) { _starpu_clock_gettime(&state_start_time); start_time = &state_start_time; } struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); STARPU_PTHREAD_MUTEX_LOCK(&worker->profiling_info_mutex); STARPU_ASSERT(worker->profiling_registered_start[index] == 0); worker->profiling_registered_start[index] = 1; worker->profiling_registered_start_date[index] = *start_time; if (worker->profiling_status != STATUS_UNKNOWN) { struct starpu_profiling_worker_info *worker_info = &worker->profiling_info; struct timespec state_time; starpu_timespec_sub(start_time, &worker->profiling_status_start_date, &state_time); _starpu_worker_time_split_accumulate(worker_info, worker->profiling_status, &state_time); } worker->profiling_status = _starpu_worker_get_status(workerid) | (1<profiling_status_start_date = *start_time; STARPU_PTHREAD_MUTEX_UNLOCK(&worker->profiling_info_mutex); } } static void _starpu_worker_time_accumulate(struct starpu_profiling_worker_info *worker_info, enum _starpu_worker_status_index index, struct timespec *delta) { switch (index) { case STATUS_INDEX_EXECUTING: starpu_timespec_accumulate(&worker_info->all_executing_time, delta); break; case STATUS_INDEX_CALLBACK: starpu_timespec_accumulate(&worker_info->all_callback_time, delta); break; case STATUS_INDEX_WAITING: starpu_timespec_accumulate(&worker_info->all_waiting_time, delta); break; case STATUS_INDEX_SLEEPING: starpu_timespec_accumulate(&worker_info->all_sleeping_time, delta); break; case STATUS_INDEX_SCHEDULING: starpu_timespec_accumulate(&worker_info->all_scheduling_time, delta); break; case STATUS_INDEX_INITIALIZING: /* no profiling info for init */ break; case STATUS_INDEX_NR: STARPU_ASSERT(0); } } void _starpu_worker_stop_state(int workerid, enum _starpu_worker_status_index index, struct timespec *stop_time) { if (starpu_profiling_status_get()) { struct timespec *state_start, state_end_time; struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct starpu_profiling_worker_info *worker_info = &worker->profiling_info; if (!stop_time) { _starpu_clock_gettime(&state_end_time); stop_time = &state_end_time; } STARPU_PTHREAD_MUTEX_LOCK(&worker->profiling_info_mutex); STARPU_ASSERT (worker->profiling_registered_start[index] == 1); { state_start = &worker->profiling_registered_start_date[index]; /* Perhaps that profiling was enabled while the worker was * already blocked, so we don't measure (end - start), but * (end - max(start,worker_start)) where worker_start is the * date of the previous profiling info reset on the worker */ struct timespec *worker_start = &worker_info->start_time; if (starpu_timespec_cmp(state_start, worker_start, <)) { /* state_start < worker_start */ state_start = worker_start; } struct timespec state_time; starpu_timespec_sub(stop_time, state_start, &state_time); _starpu_worker_time_accumulate(worker_info, index, &state_time); worker->profiling_registered_start[index] = 0; } if (worker->profiling_status != STATUS_UNKNOWN) { struct timespec state_time; starpu_timespec_sub(stop_time, &worker->profiling_status_start_date, &state_time); _starpu_worker_time_split_accumulate(worker_info, worker->profiling_status, &state_time); } worker->profiling_status = _starpu_worker_get_status(workerid) & ~(1<profiling_status_start_date = *stop_time; STARPU_PTHREAD_MUTEX_UNLOCK(&worker->profiling_info_mutex); } } void _starpu_worker_update_profiling_info_executing(int workerid, int executed_tasks, uint64_t used_cycles, uint64_t stall_cycles, double energy_consumed, double flops) { struct starpu_profiling_worker_info *worker_info = &_starpu_get_worker_struct(workerid)->profiling_info; if (starpu_profiling_status_get()) { STARPU_PTHREAD_MUTEX_LOCK(&_starpu_get_worker_struct(workerid)->profiling_info_mutex); worker_info->used_cycles += used_cycles; worker_info->stall_cycles += stall_cycles; worker_info->energy_consumed += energy_consumed; worker_info->executed_tasks += executed_tasks; worker_info->flops += flops; STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_get_worker_struct(workerid)->profiling_info_mutex); } else /* Not thread safe, shouldn't be too much a problem */ worker_info->executed_tasks += executed_tasks; } int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *info) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct starpu_profiling_worker_info *worker_info = &worker->profiling_info; if (!starpu_profiling_status_get()) { /* Not thread safe, shouldn't be too much a problem */ info->executed_tasks = worker_info->executed_tasks; } STARPU_PTHREAD_MUTEX_LOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex); STARPU_PTHREAD_MUTEX_LOCK(&_starpu_get_worker_struct(workerid)->profiling_info_mutex); if (info) { /* The total time is computed in a lazy fashion */ struct timespec now; _starpu_clock_gettime(&now); enum _starpu_worker_status_index i; for (i = 0; i< STATUS_INDEX_NR; i++) { /* In case some worker is currently doing something, we take into * account the time spent since it registered. */ if (worker->profiling_registered_start[i]) { struct timespec delta; starpu_timespec_sub(&now, &worker->profiling_registered_start_date[i], &delta); _starpu_worker_time_accumulate(worker_info, i, &delta); } } if (worker->profiling_status != STATUS_UNKNOWN) { struct timespec delta; starpu_timespec_sub(&now, &worker->profiling_status_start_date, &delta); _starpu_worker_time_split_accumulate(worker_info, worker->profiling_status, &delta); } /* total_time = now - start_time */ starpu_timespec_sub(&now, &worker_info->start_time, &worker_info->total_time); *info = *worker_info; } _starpu_worker_reset_profiling_info_with_lock(workerid); STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_get_worker_struct(workerid)->profiling_info_mutex); STARPU_PTHREAD_MUTEX_UNLOCK_SCHED(&_starpu_get_worker_struct(workerid)->sched_mutex); return 0; } /* When did the task reach the scheduler ? */ void _starpu_profiling_set_task_push_start_time(struct starpu_task *task) { if (!starpu_profiling_status_get()) return; struct starpu_profiling_task_info *profiling_info; profiling_info = task->profiling_info; if (profiling_info) _starpu_clock_gettime(&profiling_info->push_start_time); } void _starpu_profiling_set_task_push_end_time(struct starpu_task *task) { if (!starpu_profiling_status_get()) return; struct starpu_profiling_task_info *profiling_info; profiling_info = task->profiling_info; if (profiling_info) _starpu_clock_gettime(&profiling_info->push_end_time); } /* * Bus profiling */ void _starpu_initialize_busid_matrix(void) { int i, j; for (j = 0; j < STARPU_MAXNODES; j++) for (i = 0; i < STARPU_MAXNODES; i++) busid_matrix[i][j] = -1; busid_cnt = 0; } static void _starpu_bus_reset_profiling_info(struct starpu_profiling_bus_info *bus_info) { _starpu_clock_gettime(&bus_info->start_time); bus_info->transferred_bytes = 0; bus_info->transfer_count = 0; } int _starpu_register_bus(int src_node, int dst_node) { if (starpu_bus_get_id(src_node, dst_node) != -1) return -EBUSY; int busid = STARPU_ATOMIC_ADD(&busid_cnt, 1) - 1; busid_matrix[src_node][dst_node] = busid; busid_to_node_pair[busid].src = src_node; busid_to_node_pair[busid].dst = dst_node; busid_to_node_pair[busid].bus_info = &bus_profiling_info[src_node][dst_node]; _starpu_bus_reset_profiling_info(&bus_profiling_info[src_node][dst_node]); return busid; } int starpu_bus_get_count(void) { return busid_cnt; } int starpu_bus_get_id(int src, int dst) { return busid_matrix[src][dst]; } int starpu_bus_get_src(int busid) { return busid_to_node_pair[busid].src; } int starpu_bus_get_dst(int busid) { return busid_to_node_pair[busid].dst; } void starpu_bus_set_direct(int busid, int direct) { bus_direct[busid] = direct; } int starpu_bus_get_direct(int busid) { return bus_direct[busid]; } void starpu_bus_set_ngpus(int busid, int ngpus) { bus_ngpus[busid] = ngpus; } int starpu_bus_get_ngpus(int busid) { int ngpus = bus_ngpus[busid]; if (!ngpus) { struct _starpu_machine_topology *topology = &_starpu_get_machine_config()->topology; /* Unknown number of GPUs, assume it's shared by all GPUs */ ngpus = topology->ndevices[STARPU_CUDA_WORKER]+topology->ndevices[STARPU_OPENCL_WORKER]; } return ngpus; } int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info) { int src_node = starpu_bus_get_src(busid); int dst_node = starpu_bus_get_dst(busid); /* XXX protect all this method with a mutex */ if (bus_info) { struct timespec now; _starpu_clock_gettime(&now); /* total_time = now - start_time */ starpu_timespec_sub(&now, &bus_profiling_info[src_node][dst_node].start_time, &bus_profiling_info[src_node][dst_node].total_time); *bus_info = bus_profiling_info[src_node][dst_node]; } _starpu_bus_reset_profiling_info(&bus_profiling_info[src_node][dst_node]); return 0; } void _starpu_bus_update_profiling_info(int src_node, int dst_node, size_t size) { bus_profiling_info[src_node][dst_node].transferred_bytes += size; bus_profiling_info[src_node][dst_node].transfer_count++; // fprintf(stderr, "PROFILE %d -> %d : %d (cnt %d)\n", src_node, dst_node, size, bus_profiling_info[src_node][dst_node].transfer_count); } #undef starpu_profiling_status_get int starpu_profiling_status_get(void) { int ret; ANNOTATE_HAPPENS_AFTER(&_starpu_profiling); ret = _starpu_profiling; ANNOTATE_HAPPENS_BEFORE(&_starpu_profiling); return ret; } starpu-1.4.9+dfsg/src/profiling/profiling.h000066400000000000000000000073241507764646700207400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __PROFILING_H__ #define __PROFILING_H__ /** @file */ #include #include #include #include #pragma GCC visibility push(hidden) enum _starpu_worker_status_index; extern int _starpu_codelet_profiling; extern int _starpu_energy_profiling; /** Create a task profiling info structure (with the proper time stamps) in case * profiling is enabled. */ struct starpu_profiling_task_info *_starpu_allocate_profiling_info_if_needed(struct starpu_task *task); /** Update the per-worker profiling info after a task (or more) was executed. * This tells StarPU how much time was spent doing computation. */ void _starpu_worker_update_profiling_info_executing(int workerid, int executed_tasks, uint64_t used_cycles, uint64_t stall_cycles, double consumed_energy, double flops); /** Record the date when the worker entered this state. This permits to measure * how much time was spent in this state. * start_time is optional, if unspecified, _starpu_worker_start_state will just * take the current time. */ void _starpu_worker_start_state(int workerid, enum _starpu_worker_status_index index, struct timespec *start_time); /* Record the date when the worker left this state. This permits to measure * how much time was spent in this state. * stop_time is optional, if unspecified, _starpu_worker_start_state will just * take the current time. */ void _starpu_worker_stop_state(int workerid, enum _starpu_worker_status_index index, struct timespec *stop_time); /** When StarPU is initialized, a matrix describing all the bus between memory * nodes is created: it indicates whether there is a physical link between two * memory nodes or not. This matrix should contain the identifier of the bus * between two nodes or -1 in case there is no link. */ void _starpu_initialize_busid_matrix(void); /** Tell StarPU that there exists a link between the two memory nodes. This * function returns the identifier associated to the bus which can be used to * retrieve profiling information about the bus activity later on. */ int _starpu_register_bus(int src_node, int dst_node); /** Tell StarPU that "size" bytes were transferred between the two specified * memory nodes. */ void _starpu_bus_update_profiling_info(int src_node, int dst_node, size_t size); void _starpu_profiling_set_task_push_start_time(struct starpu_task *task); void _starpu_profiling_set_task_push_end_time(struct starpu_task *task); #ifdef STARPU_PAPI /** Functions for papi task profilling */ void _starpu_profiling_papi_task_start_counters(struct starpu_task *task); void _starpu_profiling_papi_task_stop_counters(struct starpu_task *task); #endif /** This function needs to be called before other starpu_profile_* functions */ void _starpu_profiling_init(void); /** This function starts profiling if the STARPU_PROFILING environment variable was set */ void _starpu_profiling_start(void); void _starpu_profiling_terminate(void); #pragma GCC visibility pop #endif // __PROFILING_H__ starpu-1.4.9+dfsg/src/profiling/profiling_helpers.c000066400000000000000000000177651507764646700224670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include static double convert_to_GB(float d) { const double divisor = 1024; return d = (((d / divisor) / divisor) / divisor); } void _starpu_profiling_bus_helper_display_summary(FILE *stream) { int long long sum_transferred = 0; fprintf(stream, "\n#---------------------\n"); fprintf(stream, "Data transfer stats:\n"); int busid; int bus_cnt = starpu_bus_get_count(); for (busid = 0; busid < bus_cnt; busid++) { char src_name[128], dst_name[128]; int src, dst; src = starpu_bus_get_src(busid); dst = starpu_bus_get_dst(busid); struct starpu_profiling_bus_info bus_info; starpu_bus_get_profiling_info(busid, &bus_info); int long long transferred = bus_info.transferred_bytes; int long long transfer_cnt = bus_info.transfer_count; double elapsed_time = starpu_timing_timespec_to_us(&bus_info.total_time) / 1e6; double d = convert_to_GB(transferred); starpu_memory_node_get_name(src, src_name, sizeof(src_name)); starpu_memory_node_get_name(dst, dst_name, sizeof(dst_name)); fprintf(stream, "\t%s -> %s", src_name, dst_name); fprintf(stream, "\t%.4lf %s", d, "GB"); fprintf(stream, "\t%.4lf %s/s", (d * 1024) / elapsed_time, "MB"); fprintf(stream, "\t(transfers : %lld - avg %.4lf %s)\n", transfer_cnt, (d * 1024) / transfer_cnt, "MB"); sum_transferred += transferred; } double d = convert_to_GB(sum_transferred); fprintf(stream, "Total transfers: %.4lf %s\n", d, "GB"); fprintf(stream, "#---------------------\n"); } void starpu_profiling_bus_helper_display_summary(void) { const char *stats; if (!((stats = starpu_getenv("STARPU_BUS_STATS")) && atoi(stats))) return; const char *filename = starpu_getenv("STARPU_BUS_STATS_FILE"); if (filename==NULL) _starpu_profiling_bus_helper_display_summary(stderr); else { FILE *sfile = fopen(filename, "w+"); STARPU_ASSERT_MSG(sfile, "Could not open file %s for displaying bus stats (%s). You can specify another file destination with the STARPU_BUS_STATS_FILE environment variable", filename, strerror(errno)); _starpu_profiling_bus_helper_display_summary(sfile); fclose(sfile); } } void _starpu_profiling_worker_helper_display_summary(FILE *stream) { double sum_consumed = 0.; int profiling = starpu_profiling_status_get(); double overall_time = 0; int workerid; int worker_cnt = starpu_worker_get_count(); double tot_total_time = 0.0; double tot_executing_time = 0.0; double tot_callback_time = 0.0; double tot_waiting_time = 0.0; double tot_sleeping_time = 0.0; double tot_scheduling_time = 0.0; fprintf(stream, "\n#---------------------\n"); fprintf(stream, "Worker stats:\n"); for (workerid = 0; workerid < worker_cnt; workerid++) { struct starpu_profiling_worker_info info; int ret = starpu_profiling_worker_get_info(workerid, &info); char name[64]; STARPU_ASSERT(!ret); starpu_worker_get_name(workerid, name, sizeof(name)); fprintf(stream, "%-32s\n", name); fprintf(stream, "\t%d task(s)\n", info.executed_tasks); if (profiling) { double total_time = starpu_timing_timespec_to_us(&info.total_time) / 1000.; double executing_time = starpu_timing_timespec_to_us(&info.executing_time) / 1000.; double callback_time = starpu_timing_timespec_to_us(&info.callback_time) / 1000.; double waiting_time = starpu_timing_timespec_to_us(&info.waiting_time) / 1000.; double sleeping_time = starpu_timing_timespec_to_us(&info.sleeping_time) / 1000.; double scheduling_time = starpu_timing_timespec_to_us(&info.scheduling_time) / 1000.; double overhead_time = total_time - executing_time - callback_time - waiting_time - sleeping_time - scheduling_time; tot_total_time += total_time; tot_executing_time += executing_time; tot_callback_time += callback_time; tot_waiting_time += waiting_time; tot_sleeping_time += sleeping_time; tot_scheduling_time += scheduling_time; double all_executing_time = starpu_timing_timespec_to_us(&info.all_executing_time) / 1000.; double all_callback_time = starpu_timing_timespec_to_us(&info.all_callback_time) / 1000.; double all_waiting_time = starpu_timing_timespec_to_us(&info.all_waiting_time) / 1000.; double all_sleeping_time = starpu_timing_timespec_to_us(&info.all_sleeping_time) / 1000.; double all_scheduling_time = starpu_timing_timespec_to_us(&info.all_scheduling_time) / 1000.; if (total_time > overall_time) overall_time = total_time; fprintf(stream, "\ttime split: total %.2lf ms = " "executing: %.2lf ms + " "callback: %.2lf ms + " "waiting: %.2lf ms + " "sleeping: %.2lf ms + " "scheduling: %.2lf ms + " "overhead %.2lf ms\n" "\tall time: " "executing: %.2lf ms " "callback: %.2lf ms " "waiting: %.2lf ms " "sleeping: %.2lf ms " "scheduling: %.2lf ms\n", total_time, executing_time, callback_time, waiting_time, sleeping_time, scheduling_time, overhead_time, all_executing_time, all_callback_time, all_waiting_time, all_sleeping_time, all_scheduling_time); if (info.used_cycles || info.stall_cycles) fprintf(stream, "\t%llu Mcy %llu Mcy stall\n", (unsigned long long)info.used_cycles/1000000, (unsigned long long)info.stall_cycles/1000000); if (info.energy_consumed) fprintf(stream, "\t%f J consumed\n", info.energy_consumed); if (info.flops) fprintf(stream, "\t%f GFlop/s\n\n", info.flops / total_time / 1000000); } sum_consumed += info.energy_consumed; } if (profiling) { double tot_overhead_time = tot_total_time - tot_executing_time - tot_callback_time - tot_waiting_time - tot_sleeping_time - tot_scheduling_time; fprintf(stream, "\nGlobal time split: total %.2lf ms = " "executing: %.2lf ms (%.2lf%%) + " "callback: %.2lf ms (%.2lf%%) + " "waiting: %.2lf ms (%.2lf%%) + " "sleeping: %.2lf ms (%.2lf%%) + " "scheduling: %.2lf ms (%.2lf%%) + " "overhead %.2lf ms (%.2lf%%)\n", tot_total_time, tot_executing_time, tot_executing_time * 100 / tot_total_time, tot_callback_time, tot_callback_time * 100 / tot_total_time, tot_waiting_time, tot_waiting_time * 100 / tot_total_time, tot_sleeping_time, tot_sleeping_time * 100 / tot_total_time, tot_scheduling_time, tot_scheduling_time * 100 / tot_total_time, tot_overhead_time, tot_overhead_time * 100 / tot_total_time); } if (profiling) { const char *strval_idle_power = starpu_getenv("STARPU_IDLE_POWER"); if (strval_idle_power) { double idle_power = atof(strval_idle_power); /* Watt */ double idle_energy = idle_power * overall_time / 1000.; /* J */ fprintf(stream, "Idle energy: %.2lf J\n", idle_energy); fprintf(stream, "Total energy: %.2lf J\n", sum_consumed + idle_energy); } } fprintf(stream, "#---------------------\n"); } void starpu_profiling_worker_helper_display_summary(void) { const char *stats; if (!((stats = starpu_getenv("STARPU_WORKER_STATS")) && atoi(stats))) return; const char *filename = starpu_getenv("STARPU_WORKER_STATS_FILE"); if (filename==NULL) _starpu_profiling_worker_helper_display_summary(stderr); else { FILE *sfile = fopen(filename, "w+"); STARPU_ASSERT_MSG(sfile, "Could not open file %s for displaying worker stats (%s). You can specify another file destination with the STARPU_WORKER_STATS_FILE environment variable", filename, strerror(errno)); _starpu_profiling_worker_helper_display_summary(sfile); fclose(sfile); } } starpu-1.4.9+dfsg/src/sched_policies/000077500000000000000000000000001507764646700175545ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/sched_policies/component_best_implementation.c000066400000000000000000000076411507764646700260540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef BUILDING_STARPU #include #endif /* return true if workerid can execute task, and fill task->predicted and task->predicted_transfer * according to best implementation predictions */ static int find_best_impl(unsigned sched_ctx_id, struct starpu_task * task, int workerid) { double len = DBL_MAX; int best_impl = -1; unsigned impl; if (!task->cl->model) { /* No perfmodel, first available will be fine */ int can_execute = starpu_worker_can_execute_task_first_impl(workerid, task, &impl); STARPU_ASSERT(can_execute); best_impl = impl; len = 0.0; } else { for(impl = 0; impl < STARPU_MAXIMPLEMENTATIONS; impl++) { if(starpu_worker_can_execute_task(workerid, task, impl)) { double d = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, impl); if(isnan(d)) { best_impl = impl; len = 0.0; break; } if(d < len) { len = d; best_impl = impl; } } } } if(best_impl == -1) return 0; task->predicted = len; task->predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid); starpu_task_set_implementation(task, best_impl); return 1; } /* set implementation, task->predicted and task->predicted_transfer with the first worker of workers that can execute that task * or have to be calibrated */ static void select_best_implementation_and_set_preds(unsigned sched_ctx_id, struct starpu_bitmap * workers, struct starpu_task * task) { int workerid; for(workerid = starpu_bitmap_first(workers); -1 != workerid; workerid = starpu_bitmap_next(workers, workerid)) if(find_best_impl(sched_ctx_id, task, workerid)) break; } static int best_implementation_push_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component->nchildren == 1); select_best_implementation_and_set_preds(component->tree->sched_ctx_id, &component->workers_in_ctx, task); return starpu_sched_component_push_task(component,component->children[0],task); } int starpu_sched_component_is_best_implementation(struct starpu_sched_component * component) { return component->push_task == best_implementation_push_task; } static struct starpu_task * best_implementation_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * from STARPU_ATTRIBUTE_UNUSED) { struct starpu_task * task = NULL; unsigned i; for(i=0; i < component->nparents; i++) { if(component->parents[i] == NULL) continue; else { task = starpu_sched_component_pull_task(component->parents[i], component); if(task) break; } } if(task) /* this worker can execute this task as it was returned by a pop*/ (void)find_best_impl(component->tree->sched_ctx_id, task, starpu_bitmap_first(&component->workers_in_ctx)); return task; } struct starpu_sched_component * starpu_sched_component_best_implementation_create(struct starpu_sched_tree *tree, void *arg) { (void)arg; struct starpu_sched_component * component = starpu_sched_component_create(tree, "best_impl"); component->push_task = best_implementation_push_task; component->pull_task = best_implementation_pull_task; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_composed.c000066400000000000000000000175141507764646700236230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include /* a composed component is parametred by a list of pair * (create_component_function(arg), arg) */ LIST_TYPE(fun_create_component, struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void * arg); void * arg; ); struct starpu_sched_component_composed_recipe { struct fun_create_component_list list; }; struct starpu_sched_component_composed_recipe * starpu_sched_component_composed_recipe_create(void) { struct starpu_sched_component_composed_recipe *recipe; _STARPU_MALLOC(recipe, sizeof(*recipe)); fun_create_component_list_init(&recipe->list); return recipe; } void starpu_sched_component_composed_recipe_add(struct starpu_sched_component_composed_recipe * recipe, struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void * arg), void * arg) { struct fun_create_component * e = fun_create_component_new(); e->create_component = create_component; e->arg = arg; fun_create_component_list_push_back(&recipe->list, e); } struct starpu_sched_component_composed_recipe *starpu_sched_component_composed_recipe_create_singleton(struct starpu_sched_component *(*create_component)(struct starpu_sched_tree *tree, void * arg), void * arg) { struct starpu_sched_component_composed_recipe * r = starpu_sched_component_composed_recipe_create(); starpu_sched_component_composed_recipe_add(r, create_component, arg); return r; } void starpu_sched_component_composed_recipe_destroy(struct starpu_sched_component_composed_recipe * recipe) { if(!recipe) return; while(!fun_create_component_list_empty(&recipe->list)) fun_create_component_delete(fun_create_component_list_pop_back(&recipe->list)); free(recipe); } struct composed_component { struct starpu_sched_component *top,*bottom; }; /* this function actually build the composed component data by changing the list of * (component_create_fun, arg_create_fun) into a tree where all components have 1 children */ static struct composed_component create_composed_component(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe * recipe #ifdef STARPU_HAVE_HWLOC ,hwloc_obj_t obj #endif ) { struct composed_component c; STARPU_ASSERT(recipe); struct fun_create_component_list * list = &recipe->list; struct fun_create_component * i = fun_create_component_list_begin(list); STARPU_ASSERT(i); STARPU_ASSERT(i->create_component); c.top = c.bottom = i->create_component(tree, i->arg); #ifdef STARPU_HAVE_HWLOC c.top->obj = obj; #endif for(i = fun_create_component_list_next(i); i != fun_create_component_list_end(list); i = fun_create_component_list_next(i)) { STARPU_ASSERT(i->create_component); struct starpu_sched_component * component = i->create_component(tree, i->arg); #ifdef STARPU_HAVE_HWLOC component->obj = obj; #endif c.bottom->add_child(c.bottom, component); /* we want to be able to traverse scheduler bottom up for all sched ctxs * when a worker call pop() */ unsigned j; for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++) component->add_parent(component, c.bottom); c.bottom = component; } STARPU_ASSERT(!starpu_sched_component_is_worker(c.bottom)); return c; } static int composed_component_push_task(struct starpu_sched_component * component, struct starpu_task * task) { struct composed_component *c = component->data; return starpu_sched_component_push_task(component,c->top,task); } static struct starpu_task * composed_component_pull_task(struct starpu_sched_component *component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { struct composed_component *c = component->data; struct starpu_task *task; task = starpu_sched_component_pull_task(c->bottom,component); if(task) return task; unsigned i; for(i=0; i < component->nparents; i++) { if(component->parents[i] == NULL) continue; else { task = starpu_sched_component_pull_task(component->parents[i],component); if(task) break; } } return task; } static double composed_component_estimated_load(struct starpu_sched_component * component) { struct composed_component * c = component->data; return c->top->estimated_load(c->top); } static void composed_component_add_child(struct starpu_sched_component * component, struct starpu_sched_component * child) { struct composed_component * c = component->data; component->add_child(component, child); c->bottom->add_child(c->bottom, child); } static void composed_component_remove_child(struct starpu_sched_component * component, struct starpu_sched_component * child) { struct composed_component * c = component->data; component->remove_child(component, child); c->bottom->remove_child(c->bottom, child); } static void composed_component_notify_change_workers(struct starpu_sched_component * component) { struct composed_component * c = component->data; struct starpu_bitmap * workers = &component->workers; struct starpu_bitmap * workers_in_ctx = &component->workers_in_ctx; struct starpu_sched_component * n; for(n = c->top; ;n = n->children[0]) { starpu_bitmap_unset_all(&n->workers); starpu_bitmap_or(&n->workers, workers); starpu_bitmap_unset_all(&n->workers_in_ctx); starpu_bitmap_or(&n->workers_in_ctx, workers_in_ctx); n->properties = component->properties; if(n == c->bottom) break; } } static void composed_component_deinit_data(struct starpu_sched_component * _component) { struct composed_component *c = _component->data; c->bottom->children = NULL; c->bottom->nchildren = 0; struct starpu_sched_component * component; struct starpu_sched_component * next = c->top; do { component = next; next = component->children ? component->children[0] : NULL; starpu_sched_component_destroy(component); } while(next); free(c); _component->data = NULL; } struct starpu_sched_component * starpu_sched_component_composed_component_create(struct starpu_sched_tree *tree, struct starpu_sched_component_composed_recipe * recipe) { STARPU_ASSERT(!fun_create_component_list_empty(&recipe->list)); struct fun_create_component_list * l = &recipe->list; if(l->_head == l->_tail) return l->_head->create_component(tree, l->_head->arg); struct starpu_sched_component * component = starpu_sched_component_create(tree, "composed"); struct composed_component *c; _STARPU_MALLOC(c, sizeof(struct composed_component)); *c = create_composed_component(tree, recipe #ifdef STARPU_HAVE_HWLOC ,component->obj #endif ); c->bottom->nchildren = component->nchildren; c->bottom->children = component->children; c->bottom->nparents = component->nparents; c->bottom->parents = component->parents; component->data = c; component->deinit_data = composed_component_deinit_data; component->push_task = composed_component_push_task; component->pull_task = composed_component_pull_task; component->estimated_load = composed_component_estimated_load; component->estimated_end = starpu_sched_component_estimated_end_min; component->add_child = composed_component_add_child; component->remove_child = composed_component_remove_child; component->notify_change_workers = composed_component_notify_change_workers; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_eager.c000066400000000000000000000122501507764646700230650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef BUILDING_STARPU #include #endif struct _starpu_eager_data { struct starpu_sched_component *target; starpu_pthread_mutex_t scheduling_mutex; int ntasks; }; static int eager_push_task(struct starpu_sched_component * component, struct starpu_task * task) { int ret; STARPU_ASSERT(component && task && starpu_sched_component_is_eager(component)); STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); struct _starpu_eager_data *d = component->data; struct starpu_sched_component *target; /* FIX atomicity */ if (d->ntasks == 0) /* We have already pushed a task down */ return 1; if (d->ntasks > 0) d->ntasks--; if ((target = d->target)) { /* target told us we could push to it, try to */ int idworker; for(idworker = starpu_bitmap_first(&target->workers); idworker != -1; idworker = starpu_bitmap_next(&target->workers, idworker)) { int nimpl; for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if(starpu_worker_can_execute_task(idworker,task,nimpl) || starpu_combined_worker_can_execute_task(idworker, task, nimpl)) { ret = starpu_sched_component_push_task(component,target,task); if (!ret) return 0; } } } } /* FIXME: should rather just loop over children before looping over its workers */ int workerid; for(workerid = starpu_bitmap_first(&component->workers_in_ctx); workerid != -1; workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) { int nimpl; for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { /* FIXME: use starpu_worker_can_execute_task_first_impl instead */ if(starpu_worker_can_execute_task(workerid,task,nimpl) || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) { unsigned i; for (i = 0; i < component->nchildren; i++) { int idworker; for(idworker = starpu_bitmap_first(&component->children[i]->workers); idworker != -1; idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) { if (idworker == workerid) { if(starpu_sched_component_is_worker(component->children[i])) { if (component->children[i]->can_pull(component->children[i])) return 1; } else { ret = starpu_sched_component_push_task(component,component->children[i],task); if (!ret) return 0; } } } } } } } return 1; } /* Note: we can't use starpu_sched_component_pump_to ourself because if a fifo below * refuses a task, we have no way to push it back to a fifo above. */ static int eager_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to) { int success; struct _starpu_eager_data *d = component->data; STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex); /* Target flow of tasks to this child */ d->target = to; /* But make pump above push only one task */ d->ntasks = 1; success = starpu_sched_component_can_push(component, to); d->target = NULL; d->ntasks = -1; STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); return success; } static struct starpu_task *eager_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to) { /* We can't directly pull (in case the obtained task does not match * the constraints of `to'), but we can try to push, and components * below will cope with it */ eager_can_push(component, to); return NULL; } static void eager_deinit_data(struct starpu_sched_component *component) { STARPU_ASSERT(starpu_sched_component_is_eager(component)); struct _starpu_eager_data *d = component->data; STARPU_PTHREAD_MUTEX_DESTROY(&d->scheduling_mutex); free(d); } int starpu_sched_component_is_eager(struct starpu_sched_component * component) { return component->push_task == eager_push_task; } struct starpu_sched_component * starpu_sched_component_eager_create(struct starpu_sched_tree *tree, void *arg) { (void)arg; struct starpu_sched_component * component = starpu_sched_component_create(tree, "eager"); struct _starpu_eager_data *data; _STARPU_MALLOC(data, sizeof(*data)); data->target = NULL; data->ntasks = -1; STARPU_PTHREAD_MUTEX_INIT(&data->scheduling_mutex, NULL); component->data = data; component->push_task = eager_push_task; component->pull_task = eager_pull_task; component->can_push = eager_can_push; component->can_pull = starpu_sched_component_can_pull_all; component->deinit_data = eager_deinit_data; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_eager_calibration.c000066400000000000000000000051401507764646700254340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include int eager_calibration_push_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component && task); STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); starpu_task_bundle_t bundle = task->bundle; int workerid; for(workerid = starpu_bitmap_first(&component->workers_in_ctx); workerid != -1; workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) { struct starpu_perfmodel_arch* archtype = starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id); int nimpl; for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if(starpu_worker_can_execute_task(workerid,task,nimpl) || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) { double d; if(bundle) d = starpu_task_bundle_expected_length(bundle, archtype, nimpl); else d = starpu_task_expected_length(task, archtype, nimpl); if(isnan(d)) { unsigned i; for (i = 0; i < component->nchildren; i++) { int idworker; for(idworker = starpu_bitmap_first(&component->children[i]->workers); idworker != -1; idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) { if (idworker == workerid) { return starpu_sched_component_push_task(component,component->children[i],task); } } } } } } } return 1; } int starpu_sched_component_is_eager_calibration(struct starpu_sched_component * component) { return component->push_task == eager_calibration_push_task; } struct starpu_sched_component * starpu_sched_component_eager_calibration_create(struct starpu_sched_tree *tree, void *arg) { (void)arg; struct starpu_sched_component * component = starpu_sched_component_create(tree, "eager_calibration"); component->push_task = eager_calibration_push_task; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_eager_prio.c000066400000000000000000000120021507764646700241110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* eager component which has its own priority queue. It can thus eagerly push * tasks to lower queues without having to wait for being pulled from. */ #include #include #include #include #include #include #include struct _starpu_eager_prio_data { struct starpu_st_prio_deque prio; starpu_pthread_mutex_t mutex; }; static int eager_prio_progress_one(struct starpu_sched_component *component) { struct _starpu_eager_prio_data * data = component->data; starpu_pthread_mutex_t * mutex = &data->mutex; struct starpu_st_prio_deque * prio = &data->prio; struct starpu_task *task; int ret; STARPU_COMPONENT_MUTEX_LOCK(mutex); task = starpu_st_prio_deque_pop_task(prio); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); if (!task) { return 1; } /* FIXME: should rather just loop over children before looping over its workers */ int workerid; for(workerid = starpu_bitmap_first(&component->workers_in_ctx); workerid != -1; workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) { int nimpl; for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { /* FIXME: use starpu_worker_can_execute_task_first_impl instead */ if(starpu_worker_can_execute_task(workerid,task,nimpl) || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) { unsigned i; for (i = 0; i < component->nchildren; i++) { int idworker; for(idworker = starpu_bitmap_first(&component->children[i]->workers); idworker != -1; idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) { if (idworker == workerid) { STARPU_ASSERT(!starpu_sched_component_is_worker(component->children[i])); ret = starpu_sched_component_push_task(component,component->children[i],task); if (!ret) return 0; } } } } } } /* Could not push to child actually, push that one back too */ STARPU_COMPONENT_MUTEX_LOCK(mutex); starpu_st_prio_deque_push_front_task(prio, task); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); return 1; } /* Try to push some tasks below */ static void eager_prio_progress(struct starpu_sched_component *component) { STARPU_ASSERT(component && starpu_sched_component_is_eager_prio(component)); while (!eager_prio_progress_one(component)) ; } static int eager_prio_push_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component && task && starpu_sched_component_is_eager_prio(component)); struct _starpu_eager_prio_data * data = component->data; struct starpu_st_prio_deque * prio = &data->prio; starpu_pthread_mutex_t * mutex = &data->mutex; STARPU_COMPONENT_MUTEX_LOCK(mutex); starpu_st_prio_deque_push_back_task(prio,task); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); eager_prio_progress(component); return 0; } static int eager_prio_can_push(struct starpu_sched_component *component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { eager_prio_progress(component); int ret = 0; unsigned j; for(j=0; j < component->nparents; j++) { if(component->parents[j] == NULL) continue; else { ret = component->parents[j]->can_push(component->parents[j], component); if(ret) break; } } return ret; } static void eager_prio_component_deinit_data(struct starpu_sched_component * component) { STARPU_ASSERT(starpu_sched_component_is_eager_prio(component)); struct _starpu_eager_prio_data * d = component->data; starpu_st_prio_deque_destroy(&d->prio); free(d); } int starpu_sched_component_is_eager_prio(struct starpu_sched_component * component) { return component->push_task == eager_prio_push_task; } struct starpu_sched_component * starpu_sched_component_eager_prio_create(struct starpu_sched_tree *tree, void *arg) { (void)arg; struct starpu_sched_component * component = starpu_sched_component_create(tree, "eager_prio"); struct _starpu_eager_prio_data *data; _STARPU_MALLOC(data, sizeof(*data)); starpu_st_prio_deque_init(&data->prio); STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); component->data = data; component->push_task = eager_prio_push_task; component->can_push = eager_prio_can_push; component->deinit_data = eager_prio_component_deinit_data; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_fifo.c000066400000000000000000000230231507764646700227250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include struct _starpu_fifo_data { struct starpu_st_fifo_taskq fifo; starpu_pthread_mutex_t mutex; unsigned ntasks_threshold; double exp_len_threshold; int ready; int exp; }; static void fifo_component_deinit_data(struct starpu_sched_component * component) { STARPU_ASSERT(component && component->data); struct _starpu_fifo_data * f = component->data; STARPU_PTHREAD_MUTEX_DESTROY(&f->mutex); free(f); } static double fifo_estimated_end(struct starpu_sched_component * component) { STARPU_ASSERT(component && component->data); struct _starpu_fifo_data * data = component->data; struct starpu_st_fifo_taskq * queue = &data->fifo; return starpu_sched_component_estimated_end_min_add(component, queue->exp_len); } static double fifo_estimated_load(struct starpu_sched_component * component) { STARPU_ASSERT(component && component->data); STARPU_ASSERT(starpu_bitmap_cardinal(&component->workers_in_ctx) != 0); struct _starpu_fifo_data * data = component->data; struct starpu_st_fifo_taskq * queue = &data->fifo; starpu_pthread_mutex_t * mutex = &data->mutex; double relative_speedup = 0.0; double load = starpu_sched_component_estimated_load(component); if(STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component)) { int first_worker = starpu_bitmap_first(&component->workers_in_ctx); relative_speedup = starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(first_worker, component->tree->sched_ctx_id)); STARPU_COMPONENT_MUTEX_LOCK(mutex); load += queue->ntasks / relative_speedup; STARPU_COMPONENT_MUTEX_UNLOCK(mutex); return load; } else { int i; for(i = starpu_bitmap_first(&component->workers_in_ctx); i != -1; i = starpu_bitmap_next(&component->workers_in_ctx, i)) relative_speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(i, component->tree->sched_ctx_id)); relative_speedup /= starpu_bitmap_cardinal(&component->workers_in_ctx); STARPU_ASSERT(!_STARPU_IS_ZERO(relative_speedup)); STARPU_COMPONENT_MUTEX_LOCK(mutex); load += queue->ntasks / relative_speedup; STARPU_COMPONENT_MUTEX_UNLOCK(mutex); } return load; } static int fifo_push_local_task(struct starpu_sched_component * component, struct starpu_task * task, unsigned is_pushback) { STARPU_ASSERT(component && component->data && task); STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); struct _starpu_fifo_data * data = component->data; struct starpu_st_fifo_taskq * queue = &data->fifo; starpu_pthread_mutex_t * mutex = &data->mutex; int ret = 0; const double now = starpu_timing_now(); STARPU_COMPONENT_MUTEX_LOCK(mutex); if (!is_pushback && data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold) { ret = 1; STARPU_COMPONENT_MUTEX_UNLOCK(mutex); } else if(data->exp) { double exp_len; if(!isnan(task->predicted)) exp_len = queue->exp_len + task->predicted; else exp_len = queue->exp_len; if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold) { static int warned; STARPU_HG_DISABLE_CHECKING(warned); if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned) { _STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold); warned = 1; } ret = 1; STARPU_COMPONENT_MUTEX_UNLOCK(mutex); } else { if(!isnan(task->predicted_transfer)) { double end = fifo_estimated_end(component); double tfer_end = now + task->predicted_transfer; /* FIXME: We don't have overlap when running CPU-CPU transfers */ if(tfer_end < end) task->predicted_transfer = 0.0; else task->predicted_transfer = tfer_end - end; exp_len += task->predicted_transfer; } if(!isnan(task->predicted)) { queue->exp_len = exp_len; queue->exp_end = queue->exp_start + queue->exp_len; } STARPU_ASSERT(!isnan(queue->exp_end)); STARPU_ASSERT(!isnan(queue->exp_len)); STARPU_ASSERT(!isnan(queue->exp_start)); } } if(!ret) { if(is_pushback) ret = starpu_st_fifo_taskq_push_back_task(queue,task); else { ret = starpu_st_fifo_taskq_push_task(queue,task); starpu_sched_component_prefetch_on_node(component, task); } STARPU_COMPONENT_MUTEX_UNLOCK(mutex); if(!is_pushback) component->can_pull(component); } return ret; } static int fifo_push_task(struct starpu_sched_component * component, struct starpu_task * task) { return fifo_push_local_task(component, task, 0); } static struct starpu_task * fifo_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to) { STARPU_ASSERT(component && component->data); struct _starpu_fifo_data * data = component->data; struct starpu_st_fifo_taskq * queue = &data->fifo; starpu_pthread_mutex_t * mutex = &data->mutex; const double now = starpu_timing_now(); if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_fifo_taskq_empty(queue)) { starpu_sched_component_send_can_push_to_parents(component); return NULL; } STARPU_COMPONENT_MUTEX_LOCK(mutex); struct starpu_task * task; if (data->ready && to->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE) #ifdef STARPU_DEVEL #warning In eager schedulers, we never write that we want to fill the fifo before picking up a task. Eager is then ineffective since in practice the fifo will not fill #endif task = starpu_st_fifo_taskq_pop_first_ready_task(queue, starpu_bitmap_first(&to->workers_in_ctx), -1); else if (to->properties & STARPU_SCHED_COMPONENT_HOMOGENEOUS) task = starpu_st_fifo_taskq_pop_task(queue, starpu_bitmap_first(&to->workers_in_ctx)); else task = starpu_st_fifo_taskq_pop_task(queue, -1); if(task && data->exp) { if(!isnan(task->predicted)) { const double exp_len = queue->exp_len - task->predicted; queue->exp_start = now + task->predicted; if (exp_len >= 0.0) { queue->exp_len = exp_len; } else { /* exp_len can become negative due to rounding errors */ queue->exp_len = 0.0; } } STARPU_ASSERT_MSG(queue->exp_len>=0, "fifo->exp_len=%lf\n",queue->exp_len); if(!isnan(task->predicted_transfer)) { if (queue->exp_len > task->predicted_transfer) { queue->exp_start += task->predicted_transfer; queue->exp_len -= task->predicted_transfer; } else { queue->exp_start += queue->exp_len; queue->exp_len = 0; } } queue->exp_end = queue->exp_start + queue->exp_len; if(queue->ntasks == 0) queue->exp_len = 0.0; } STARPU_ASSERT(!isnan(queue->exp_end)); STARPU_ASSERT(!isnan(queue->exp_len)); STARPU_ASSERT(!isnan(queue->exp_start)); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); // When a pop is called, a can_push is called for pushing tasks onto // the empty place of the queue left by the popped task. starpu_sched_component_send_can_push_to_parents(component); if(task) return task; return NULL; } /* When a can_push is caught by this function, we try to pop and push * tasks from our local queue as much as possible, until a * push fails, which means that the worker fifo_components are * currently "full". */ static int fifo_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { STARPU_ASSERT(component && starpu_sched_component_is_fifo(component)); int res = 0; struct starpu_task * task; task = starpu_sched_component_pump_downstream(component, &res); if(task) { int ret = fifo_push_local_task(component,task,1); STARPU_ASSERT(!ret); } return res; } int starpu_sched_component_is_fifo(struct starpu_sched_component * component) { return component->push_task == fifo_push_task; } struct starpu_sched_component * starpu_sched_component_fifo_create(struct starpu_sched_tree *tree, struct starpu_sched_component_fifo_data * params) { struct starpu_sched_component *component = starpu_sched_component_create(tree, "fifo"); struct _starpu_fifo_data *data; _STARPU_MALLOC(data, sizeof(*data)); starpu_st_fifo_taskq_init(&data->fifo); STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); component->data = data; component->estimated_end = fifo_estimated_end; component->estimated_load = fifo_estimated_load; component->push_task = fifo_push_task; component->pull_task = fifo_pull_task; component->can_push = fifo_can_push; component->deinit_data = fifo_component_deinit_data; if(params) { data->ntasks_threshold=params->ntasks_threshold; data->exp_len_threshold=params->exp_len_threshold; data->ready=params->ready; data->exp=params->exp; } else { data->ntasks_threshold=0; data->exp_len_threshold=0.0; data->ready=0; data->exp=0; } return component; } starpu-1.4.9+dfsg/src/sched_policies/component_heft.c000066400000000000000000000171751507764646700227430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * Copyright (C) 2020-2020 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* HEFT variant which tries to schedule a given number of tasks instead of just * the first of its scheduling window, and actually schedule the task for which * the most benefit is achieved. */ #include #include #include #include "helper_mct.h" #include #include #include #include #define NTASKS 5 struct _starpu_heft_data { struct starpu_st_prio_deque prio; starpu_pthread_mutex_t mutex; struct _starpu_mct_data *mct_data; }; static int heft_progress_one(struct starpu_sched_component *component) { struct _starpu_heft_data * data = component->data; starpu_pthread_mutex_t * mutex = &data->mutex; struct starpu_st_prio_deque * prio = &data->prio; struct starpu_task * (tasks[NTASKS]); unsigned ntasks = 0; STARPU_COMPONENT_MUTEX_LOCK(mutex); tasks[0] = starpu_st_prio_deque_pop_task(prio); if (tasks[0]) { int priority = tasks[0]->priority; /* Try to look at NTASKS from the queue */ for (ntasks = 1; ntasks < NTASKS; ntasks++) { tasks[ntasks] = starpu_st_prio_deque_highest_task(prio); if (!tasks[ntasks] || tasks[ntasks]->priority < priority) break; starpu_st_prio_deque_pop_task(prio); } } STARPU_COMPONENT_MUTEX_UNLOCK(mutex); if (!ntasks) { return 1; } { struct _starpu_mct_data * d = data->mct_data; struct starpu_sched_component * best_component; unsigned n; /* Estimated task duration for each child */ double estimated_lengths[component->nchildren * ntasks]; /* Estimated transfer duration for each child */ double estimated_transfer_length[component->nchildren * ntasks]; /* Estimated transfer+task termination for each child */ double estimated_ends_with_task[component->nchildren * ntasks]; /* estimated energy */ double local_energy[component->nchildren * ntasks]; /* Minimum transfer+task termination of the NTASKS tasks over all workers */ double min_exp_end_of_task[ntasks]; /* Maximum termination of the already-scheduled tasks over all workers */ double max_exp_end_of_workers; unsigned suitable_components[component->nchildren * ntasks]; unsigned nsuitable_components[ntasks]; /* Estimate durations */ for (n = 0; n < ntasks; n++) { unsigned offset = component->nchildren * n; nsuitable_components[n] = starpu_mct_compute_execution_times(component, tasks[n], estimated_lengths + offset, estimated_transfer_length + offset, suitable_components + offset); starpu_mct_compute_expected_times(component, tasks[n], estimated_lengths + offset, estimated_transfer_length + offset, estimated_ends_with_task + offset, &min_exp_end_of_task[n], &max_exp_end_of_workers, suitable_components + offset, nsuitable_components[n]); /* Compute the energy, if provided*/ starpu_mct_compute_energy(component, tasks[n], local_energy + offset, suitable_components + offset, nsuitable_components[n]); } /* best_task is the task that will finish first among the ntasks, while best_benefit is its expected execution time*/ int best_task = 0; double best_benefit = min_exp_end_of_task[0]; /* Find the task which provides the most computation time benefit */ for (n = 1; n < ntasks; n++) { if (best_benefit > min_exp_end_of_task[n]) { best_benefit = min_exp_end_of_task[n]; best_task = n; } } STARPU_ASSERT(best_task >= 0); /* Push back the other tasks */ STARPU_COMPONENT_MUTEX_LOCK(mutex); for (n = ntasks - 1; n < ntasks; n--) if ((int) n != best_task) starpu_st_prio_deque_push_front_task(prio, tasks[n]); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); unsigned offset = component->nchildren * best_task; int best_icomponent = starpu_mct_get_best_component(d, tasks[best_task], estimated_lengths + offset, estimated_transfer_length + offset, estimated_ends_with_task + offset, local_energy + offset, min_exp_end_of_task[best_task], max_exp_end_of_workers, suitable_components + offset, nsuitable_components[best_task]); if (best_icomponent == -1) return eager_calibration_push_task(component, tasks[best_task]); best_component = component->children[best_icomponent]; if(starpu_sched_component_is_worker(best_component)) { best_component->can_pull(best_component); return 1; } starpu_sched_task_break(tasks[best_task]); int ret = starpu_sched_component_push_task(component, best_component, tasks[best_task]); if (ret) { /* Could not push to child actually, push that one back too */ STARPU_COMPONENT_MUTEX_LOCK(mutex); starpu_st_prio_deque_push_front_task(prio, tasks[best_task]); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); return 1; } else return 0; } } /* Try to push some tasks below */ static void heft_progress(struct starpu_sched_component *component) { STARPU_ASSERT(component && starpu_sched_component_is_heft(component)); while (!heft_progress_one(component)) ; } static int heft_push_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component && task && starpu_sched_component_is_heft(component)); struct _starpu_heft_data * data = component->data; struct starpu_st_prio_deque * prio = &data->prio; starpu_pthread_mutex_t * mutex = &data->mutex; STARPU_COMPONENT_MUTEX_LOCK(mutex); starpu_st_prio_deque_push_back_task(prio,task); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); heft_progress(component); return 0; } static int heft_can_push(struct starpu_sched_component *component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { heft_progress(component); int ret = 0; unsigned j; for(j=0; j < component->nparents; j++) { if(component->parents[j] == NULL) continue; else { ret = component->parents[j]->can_push(component->parents[j], component); if(ret) break; } } return ret; } static void heft_component_deinit_data(struct starpu_sched_component * component) { STARPU_ASSERT(starpu_sched_component_is_heft(component)); struct _starpu_heft_data * d = component->data; struct _starpu_mct_data * mct_d = d->mct_data; starpu_st_prio_deque_destroy(&d->prio); free(mct_d); free(d); } int starpu_sched_component_is_heft(struct starpu_sched_component * component) { return component->push_task == heft_push_task; } struct starpu_sched_component * starpu_sched_component_heft_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data * params) { struct starpu_sched_component * component = starpu_sched_component_create(tree, "heft"); struct _starpu_mct_data *mct_data = starpu_mct_init_parameters(params); struct _starpu_heft_data *data; _STARPU_MALLOC(data, sizeof(*data)); starpu_st_prio_deque_init(&data->prio); STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); data->mct_data = mct_data; component->data = data; component->push_task = heft_push_task; component->can_push = heft_can_push; component->deinit_data = heft_component_deinit_data; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_heteroprio.c000066400000000000000000000435241507764646700241720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * Copyright (C) 2020-2020 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Heteroprio, which sorts tasks by acceleration factor into buckets, and makes * GPUs take accelerated tasks first and CPUs take non-accelerated tasks first */ #include #include #include #include "helper_mct.h" #include #include #include #include /* Approximation ratio for acceleration factor bucketing * We will put tasks with +-10% similar acceleration into the same bucket. */ #define APPROX 0.10 struct _starpu_heteroprio_data { /* This is an array of priority queues. * The array is sorted by acceleration factor, most accelerated first */ struct starpu_st_prio_deque **bucket; float *accel; unsigned naccel; /* This contains tasks which are not supported on all archs. */ struct starpu_st_prio_deque no_accel; /* This protects all queues */ starpu_pthread_mutex_t mutex; struct _starpu_mct_data *mct_data; unsigned batch; }; static int heteroprio_progress_accel(struct starpu_sched_component *component, struct _starpu_heteroprio_data *data, enum starpu_worker_archtype archtype, int front) { struct starpu_task *task = NULL; starpu_pthread_mutex_t * mutex = &data->mutex; int j, ret = 1; double acceleration = INFINITY; struct _starpu_mct_data * d = data->mct_data; STARPU_COMPONENT_MUTEX_LOCK(mutex); if (front) /* Pick up accelerated tasks first */ for (j = 0; j < (int) data->naccel; j++) { task = starpu_st_prio_deque_pop_task(data->bucket[j]); if (task) break; } else /* Pick up accelerated tasks last */ for (j = (int) data->naccel-1; j >= 0; j--) { if (data->batch && 0) task = starpu_st_prio_deque_pop_back_task(data->bucket[j]); else task = starpu_st_prio_deque_pop_task(data->bucket[j]); if (task) break; } if (task) { acceleration = data->accel[j]; //fprintf(stderr, "for %s thus %s, found task %p in bucket %d: %f\n", starpu_worker_get_type_as_string(archtype), front?"front":"back", task, j, acceleration); } STARPU_COMPONENT_MUTEX_UNLOCK(mutex); if (!task) return 1; if (data->batch) /* In batch mode the fifos below do not use priorities. Do not * leak a priority for the data prefetches either */ task->priority = INT_MAX; /* TODO: we might want to prefer to pick up a task whose data is already on some GPU */ struct starpu_sched_component * best_component; /* Estimated task duration for each child */ double estimated_lengths[component->nchildren]; /* Estimated transfer duration for each child */ double estimated_transfer_length[component->nchildren]; /* Estimated transfer+task termination for each child */ double estimated_ends_with_task[component->nchildren]; /* provided local energy */ double local_energy[component->nchildren]; /* Minimum transfer+task termination of the task over all workers */ double min_exp_end_of_task; /* Maximum termination of the already-scheduled tasks over all workers */ double max_exp_end_of_workers; unsigned suitable_components[component->nchildren]; unsigned nsuitable_components; nsuitable_components = starpu_mct_compute_execution_times(component, task, estimated_lengths, estimated_transfer_length, suitable_components); if (data->batch && 0) { /* In batch mode, we may want to insist on filling workers with tasks * by ignoring when other workers would finish this. */ unsigned i; for (i = 0; i < component->nchildren; i++) { int idworker; for(idworker = starpu_bitmap_first(&component->children[i]->workers); idworker != -1; idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) { if (starpu_worker_get_type(idworker) == archtype) break; } if (idworker == -1) { /* Not the targeted arch, avoid it */ /* XXX: INFINITY doesn't seem to be working properly */ estimated_lengths[i] = 1000000000; estimated_transfer_length[i] = 1000000000; } } } /* Entering critical section to make sure no two workers make scheduling decisions at the same time */ STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex); starpu_mct_compute_expected_times(component, task, estimated_lengths, estimated_transfer_length, estimated_ends_with_task, &min_exp_end_of_task, &max_exp_end_of_workers, suitable_components, nsuitable_components); /* Compute the energy, if provided*/ starpu_mct_compute_energy(component, task, local_energy, suitable_components, nsuitable_components); /* And now find out which worker suits best for this task, * including data transfer */ int best_icomponent = starpu_mct_get_best_component(d, task, estimated_lengths, estimated_transfer_length, estimated_ends_with_task, local_energy, min_exp_end_of_task, max_exp_end_of_workers, suitable_components, nsuitable_components); /* If no best component is found, it means that the perfmodel of * the task had been purged since it has been pushed on the mct component. */ /* FIXME: We should perform a push_back message to its parent so that it will * be able to reschedule the task properly. */ if(best_icomponent == -1) { STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); return eager_calibration_push_task(component, task); } best_component = component->children[best_icomponent]; int idworker; for(idworker = starpu_bitmap_first(&best_component->workers); idworker != -1; idworker = starpu_bitmap_next(&best_component->workers, idworker)) { if (starpu_worker_get_type(idworker) == archtype) break; } if (idworker == -1) goto out; /* Ok, we do have a worker there of that type, try to push it there. */ STARPU_ASSERT(!starpu_sched_component_is_worker(best_component)); starpu_sched_task_break(task); ret = starpu_sched_component_push_task(component,best_component,task); /* I can now exit the critical section: Pushing the task above ensures that its execution time will be taken into account for subsequent scheduling decisions */ if (!ret) { STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); //fprintf(stderr, "pushed %p to %d\n", task, best_icomponent); /* Great! */ return 0; } out: STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); /* No such kind of worker there, or it refused our task, abort */ //fprintf(stderr, "could not push %p to %d actually\n", task, best_icomponent); /* Could not push to child actually, push that one back */ STARPU_COMPONENT_MUTEX_LOCK(mutex); for (j = 0; j < (int) data->naccel; j++) { if (acceleration == data->accel[j]) { starpu_st_prio_deque_push_front_task(data->bucket[j], task); break; } } STARPU_ASSERT(j != (int) data->naccel); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); //fprintf(stderr, "finished pushing to %d\n", archtype); return 1; } static int heteroprio_progress_noaccel(struct starpu_sched_component *component, struct _starpu_heteroprio_data *data, struct starpu_task *task) { struct _starpu_mct_data * d = data->mct_data; int ret; struct starpu_sched_component * best_component; /* Estimated task duration for each child */ double estimated_lengths[component->nchildren]; /* Estimated transfer duration for each child */ double estimated_transfer_length[component->nchildren]; /* Estimated transfer+task termination for each child */ double estimated_ends_with_task[component->nchildren]; /* estimated energy */ double local_energy[component->nchildren]; /* Minimum transfer+task termination of the task over all workers */ double min_exp_end_of_task; /* Maximum termination of the already-scheduled tasks over all workers */ double max_exp_end_of_workers; unsigned suitable_components[component->nchildren]; unsigned nsuitable_components; nsuitable_components = starpu_mct_compute_execution_times(component, task, estimated_lengths, estimated_transfer_length, suitable_components); /* If no suitable components were found, it means that the perfmodel of * the task had been purged since it has been pushed on the mct component. */ /* FIXME: We should perform a push_back message to its parent so that it will * be able to reschedule the task properly. */ if(nsuitable_components == 0) return eager_calibration_push_task(component, task); /* Entering critical section to make sure no two workers make scheduling decisions at the same time */ STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex); starpu_mct_compute_expected_times(component, task, estimated_lengths, estimated_transfer_length, estimated_ends_with_task, &min_exp_end_of_task, &max_exp_end_of_workers, suitable_components, nsuitable_components); /* Compute the energy, if provided*/ starpu_mct_compute_energy(component, task, local_energy, suitable_components, nsuitable_components); /* And now find out which worker suits best for this task, * including data transfer */ int best_icomponent = starpu_mct_get_best_component(d, task, estimated_lengths, estimated_transfer_length, estimated_ends_with_task, local_energy, min_exp_end_of_task, max_exp_end_of_workers, suitable_components, nsuitable_components); /* If no best component is found, it means that the perfmodel of * the task had been purged since it has been pushed on the mct component. */ /* FIXME: We should perform a push_back message to its parent so that it will * be able to reschedule the task properly. */ if(best_icomponent == -1) { STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); return eager_calibration_push_task(component, task); } best_component = component->children[best_icomponent]; STARPU_ASSERT(!starpu_sched_component_is_worker(best_component)); ret = starpu_sched_component_push_task(component,best_component,task); STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); return ret; } static int heteroprio_progress_one(struct starpu_sched_component *component) { struct _starpu_heteroprio_data * data = component->data; starpu_pthread_mutex_t * mutex = &data->mutex; struct starpu_task *task; struct starpu_st_prio_deque * no_accel = &data->no_accel; STARPU_COMPONENT_MUTEX_LOCK(mutex); task = starpu_st_prio_deque_pop_task(no_accel); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); if (task) { if (heteroprio_progress_noaccel(component, data, task)) { /* Could not push to child actually, push that one back */ STARPU_COMPONENT_MUTEX_LOCK(mutex); starpu_st_prio_deque_push_front_task(no_accel, task); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); } } /* Note: this hardcodes acceleration order */ if (!heteroprio_progress_accel(component, data, STARPU_CUDA_WORKER, 1)) return 0; if (!heteroprio_progress_accel(component, data, STARPU_HIP_WORKER, 1)) return 0; if (!heteroprio_progress_accel(component, data, STARPU_OPENCL_WORKER, 1)) return 0; if (!heteroprio_progress_accel(component, data, STARPU_MPI_MS_WORKER, 0)) return 0; if (!heteroprio_progress_accel(component, data, STARPU_CPU_WORKER, 0)) return 0; return 1; } /* Try to push some tasks below */ static void heteroprio_progress(struct starpu_sched_component *component) { STARPU_ASSERT(component && starpu_sched_component_is_heteroprio(component)); while (!heteroprio_progress_one(component)) ; } static int heteroprio_push_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component && task && starpu_sched_component_is_heteroprio(component)); struct _starpu_heteroprio_data * data = component->data; starpu_pthread_mutex_t * mutex = &data->mutex; unsigned nimpl; double min_expected = INFINITY, max_expected = -INFINITY; double acceleration; if (data->batch && 0) /* Batch mode, we may want to ignore priorities completely */ task->priority = INT_MAX; /* Compute acceleration between best-performing arch and least-performing arch */ int workerid; for(workerid = starpu_bitmap_first(&component->workers_in_ctx); workerid != -1; workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) { unsigned impl_mask; if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask)) break; struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, task->sched_ctx); double min_arch = INFINITY; for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if (!(impl_mask & (1U << nimpl))) continue; double expected = starpu_task_expected_length(task, perf_arch, nimpl); if (isnan(expected) || expected == 0.) { min_arch = expected; break; } if (expected < min_arch) min_arch = expected; } if (isnan(min_arch) || min_arch == 0.) /* No known execution time, can't do anything here */ break; STARPU_ASSERT(min_arch != INFINITY); if (min_arch < min_expected) min_expected = min_arch; if (min_arch > max_expected) max_expected = min_arch; } if (workerid == -1) { /* All archs can run it */ STARPU_ASSERT(!isnan(min_expected)); STARPU_ASSERT(!isnan(max_expected)); STARPU_ASSERT(min_expected != INFINITY); STARPU_ASSERT(max_expected != -INFINITY); acceleration = max_expected / min_expected; STARPU_ASSERT(!isnan(acceleration)); //fprintf(stderr,"%s: acceleration %f\n", starpu_task_get_name(task), acceleration); STARPU_COMPONENT_MUTEX_LOCK(mutex); unsigned i, j; /* Try to find a bucket with similar acceleration */ for (i = 0; i < data->naccel; i++) { if (acceleration >= data->accel[i] * (1 - APPROX) && acceleration <= data->accel[i] * (1 + APPROX)) break; } if (i == data->naccel) { /* Didn't find it, add one */ data->naccel++; float *newaccel; _STARPU_MALLOC(newaccel, data->naccel * sizeof(*newaccel)); struct starpu_st_prio_deque **newbuckets; _STARPU_MALLOC(newbuckets, data->naccel * sizeof(*newbuckets)); struct starpu_st_prio_deque *newbucket; _STARPU_MALLOC(newbucket, sizeof(*newbucket)); starpu_st_prio_deque_init(newbucket); int inserted = 0; for (j = 0; j < data->naccel-1; j++) { if (!inserted && acceleration > data->accel[j]) { /* Insert the new bucket here */ i = j; newbuckets[j] = newbucket; newaccel[j] = acceleration; inserted = 1; } newbuckets[j+inserted] = data->bucket[j]; newaccel[j+inserted] = data->accel[j]; } if (!inserted) { /* Insert it last */ newbuckets[data->naccel-1] = newbucket; newaccel[data->naccel-1] = acceleration; } free(data->bucket); free(data->accel); data->bucket = newbuckets; data->accel = newaccel; } #if 0 fprintf(stderr,"buckets:"); for (j = 0; j < data->naccel; j++) { fprintf(stderr, " %f", data->accel[j]); } fprintf(stderr,"\ninserting %p %f to %d\n", task, acceleration, i); #endif starpu_st_prio_deque_push_back_task(data->bucket[i],task); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); } else { /* Not all archs can run it, will resort to HEFT strategy */ acceleration = INFINITY; //fprintf(stderr,"%s: some archs can't do it\n", starpu_task_get_name(task)); struct starpu_st_prio_deque * no_accel = &data->no_accel; STARPU_COMPONENT_MUTEX_LOCK(mutex); starpu_st_prio_deque_push_back_task(no_accel,task); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); } heteroprio_progress(component); return 0; } static int heteroprio_can_push(struct starpu_sched_component *component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { heteroprio_progress(component); int ret = 0; unsigned j; for(j=0; j < component->nparents; j++) { if(component->parents[j] == NULL) continue; else { ret = component->parents[j]->can_push(component->parents[j], component); if(ret) break; } } return ret; } static void heteroprio_component_deinit_data(struct starpu_sched_component * component) { STARPU_ASSERT(starpu_sched_component_is_heteroprio(component)); struct _starpu_heteroprio_data * d = component->data; struct _starpu_mct_data * mct_d = d->mct_data; unsigned i; for (i = 0; i < d->naccel; i++) { starpu_st_prio_deque_destroy(d->bucket[i]); free(d->bucket[i]); } free(d->bucket); free(d->accel); starpu_st_prio_deque_destroy(&d->no_accel); STARPU_PTHREAD_MUTEX_DESTROY(&d->mutex); STARPU_PTHREAD_MUTEX_DESTROY(&mct_d->scheduling_mutex); free(mct_d); free(d); } int starpu_sched_component_is_heteroprio(struct starpu_sched_component * component) { return component->push_task == heteroprio_push_task; } struct starpu_sched_component * starpu_sched_component_heteroprio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_heteroprio_data * params) { struct starpu_sched_component * component = starpu_sched_component_create(tree, "heteroprio"); struct _starpu_mct_data *mct_data = starpu_mct_init_parameters(params ? params->mct : NULL); struct _starpu_heteroprio_data *data; _STARPU_MALLOC(data, sizeof(*data)); data->bucket = NULL; data->accel = NULL; data->naccel = 0; starpu_st_prio_deque_init(&data->no_accel); STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); data->mct_data = mct_data; STARPU_PTHREAD_MUTEX_INIT(&mct_data->scheduling_mutex,NULL); if (params) data->batch = params->batch; else data->batch = 1; component->data = data; component->push_task = heteroprio_push_task; component->can_push = heteroprio_can_push; component->deinit_data = heteroprio_component_deinit_data; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_mct.c000066400000000000000000000117301507764646700225670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * Copyright (C) 2020-2020 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper_mct.h" #include #include #include static int mct_push_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component && task && starpu_sched_component_is_mct(component)); struct _starpu_mct_data * d = component->data; struct starpu_sched_component * best_component; /* Estimated task duration for each child */ double estimated_lengths[component->nchildren]; /* Estimated transfer duration for each child */ double estimated_transfer_length[component->nchildren]; /* Estimated transfer+task termination for each child */ double estimated_ends_with_task[component->nchildren]; /* estimated energy */ double local_energy[component->nchildren]; /* Minimum transfer+task termination of the task over all workers */ double min_exp_end_of_task; /* Maximum termination of the already-scheduled tasks over all workers */ double max_exp_end_of_workers; unsigned suitable_components[component->nchildren]; unsigned nsuitable_components; nsuitable_components = starpu_mct_compute_execution_times(component, task, estimated_lengths, estimated_transfer_length, suitable_components); /* If no suitable components were found, it means that the perfmodel of * the task had been purged since it has been pushed on the mct component. */ /* FIXME: We should perform a push_back message to its parent so that it will * be able to reschedule the task properly. */ if(nsuitable_components == 0) return eager_calibration_push_task(component, task); /* Entering critical section to make sure no two workers make scheduling decisions at the same time */ STARPU_COMPONENT_MUTEX_LOCK(&d->scheduling_mutex); starpu_mct_compute_expected_times(component, task, estimated_lengths, estimated_transfer_length, estimated_ends_with_task, &min_exp_end_of_task, &max_exp_end_of_workers, suitable_components, nsuitable_components); /* Compute the energy, if provided*/ starpu_mct_compute_energy(component, task, local_energy, suitable_components, nsuitable_components); int best_icomponent = starpu_mct_get_best_component(d, task, estimated_lengths, estimated_transfer_length, estimated_ends_with_task, local_energy, min_exp_end_of_task, max_exp_end_of_workers, suitable_components, nsuitable_components); /* If no best component is found, it means that the perfmodel of * the task had been purged since it has been pushed on the mct component. */ /* FIXME: We should perform a push_back message to its parent so that it will * be able to reschedule the task properly. */ if(best_icomponent == -1) { STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); return eager_calibration_push_task(component, task); } best_component = component->children[best_icomponent]; if(starpu_sched_component_is_worker(best_component)) { best_component->can_pull(best_component); STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); return 1; } starpu_sched_task_break(task); int ret = starpu_sched_component_push_task(component, best_component, task); /* I can now exit the critical section: Pushing the task below ensures that its execution time will be taken into account for subsequent scheduling decisions */ STARPU_COMPONENT_MUTEX_UNLOCK(&d->scheduling_mutex); return ret; } static void mct_component_deinit_data(struct starpu_sched_component * component) { STARPU_ASSERT(starpu_sched_component_is_mct(component)); struct _starpu_mct_data * d = component->data; STARPU_PTHREAD_MUTEX_DESTROY(&d->scheduling_mutex); free(d); } int starpu_sched_component_is_mct(struct starpu_sched_component * component) { return component->push_task == mct_push_task; } struct starpu_sched_component * starpu_sched_component_mct_create(struct starpu_sched_tree *tree, struct starpu_sched_component_mct_data * params) { struct starpu_sched_component * component = starpu_sched_component_create(tree, "mct"); struct _starpu_mct_data *data = starpu_mct_init_parameters(params); component->data = data; STARPU_PTHREAD_MUTEX_INIT(&data->scheduling_mutex, NULL); component->push_task = mct_push_task; component->deinit_data = mct_component_deinit_data; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_perfmodel_select.c000066400000000000000000000101521507764646700253150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include /* The decision component takes care of the scheduling of tasks which are not * calibrated, or tasks which don't have a performance model, because the scheduling * architecture of this scheduler for tasks with no performance model is exactly * the same as the tree-prio scheduler. * Tasks with a perfmodel are pushed to the perfmodel_component, which takes care of the * scheduling of those tasks on the correct worker_component. */ struct _starpu_perfmodel_select_data { struct starpu_sched_component * calibrator_component; struct starpu_sched_component * no_perfmodel_component; struct starpu_sched_component * perfmodel_component; }; static int perfmodel_select_push_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component && component->data && task && starpu_sched_component_is_perfmodel_select(component)); STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); struct _starpu_perfmodel_select_data * data = component->data; double length; int can_execute = starpu_sched_component_execute_preds(component,task,&length); if(can_execute) { if(isnan(length)) { static int warned; STARPU_HG_DISABLE_CHECKING(warned); if (!warned) { warned = 1; _STARPU_DISP("Warning: performance model for %s not finished calibrating, using a dumb scheduling heuristic for now\n",starpu_task_get_name(task)); } return starpu_sched_component_push_task(component,data->calibrator_component,task); } if(_STARPU_IS_ZERO(length)) return starpu_sched_component_push_task(component,data->no_perfmodel_component,task); return starpu_sched_component_push_task(component,data->perfmodel_component,task); } else return 1; } static struct starpu_task * perfmodel_select_pull_task(struct starpu_sched_component * component STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { /* We don't want to pull tasks blindly, only let them go through push, so we push to the right component. */ return NULL; } static void perfmodel_select_component_deinit_data(struct starpu_sched_component * component) { STARPU_ASSERT(component && component->data); struct _starpu_perfmodel_select_data * d = component->data; free(d); } int starpu_sched_component_is_perfmodel_select(struct starpu_sched_component * component) { return component->push_task == perfmodel_select_push_task; } struct starpu_sched_component * starpu_sched_component_perfmodel_select_create(struct starpu_sched_tree *tree, struct starpu_sched_component_perfmodel_select_data * params) { STARPU_ASSERT(params); STARPU_ASSERT(params->calibrator_component && params->no_perfmodel_component && params->perfmodel_component); struct starpu_sched_component * component = starpu_sched_component_create(tree, "perfmodel_selector"); struct _starpu_perfmodel_select_data *data; _STARPU_MALLOC(data, sizeof(*data)); data->calibrator_component = params->calibrator_component; data->no_perfmodel_component = params->no_perfmodel_component; data->perfmodel_component = params->perfmodel_component; component->data = data; component->can_pull = starpu_sched_component_send_can_push_to_parents; component->push_task = perfmodel_select_push_task; component->pull_task = perfmodel_select_pull_task; component->deinit_data = perfmodel_select_component_deinit_data; component->estimated_end = starpu_sched_component_estimated_end_min; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_prio.c000066400000000000000000000247661507764646700227720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #ifdef STARPU_USE_FXT #define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do { \ if (fut_active) { \ int workerid = STARPU_NMAXWORKERS + 1; \ if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \ workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \ _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len); \ } \ } while (0) #define STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component,ntasks,exp_len) do { \ if (fut_active) { \ int workerid = STARPU_NMAXWORKERS + 1; \ if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \ workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \ _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len); \ } \ } while (0) #else #define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do { } while (0) #define STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component,ntasks,exp_len) do { } while (0) #endif struct _starpu_prio_data { struct starpu_st_prio_deque prio; starpu_pthread_mutex_t mutex; unsigned ntasks_threshold; double exp_len_threshold; int ready; int exp; }; static void prio_component_deinit_data(struct starpu_sched_component * component) { STARPU_ASSERT(component && component->data); struct _starpu_prio_data * f = component->data; starpu_st_prio_deque_destroy(&f->prio); STARPU_PTHREAD_MUTEX_DESTROY(&f->mutex); free(f); } static double prio_estimated_end(struct starpu_sched_component * component) { STARPU_ASSERT(component && component->data); struct _starpu_prio_data * data = component->data; struct starpu_st_prio_deque * queue = &data->prio; return starpu_sched_component_estimated_end_min_add(component, queue->exp_len); } static double prio_estimated_load(struct starpu_sched_component * component) { STARPU_ASSERT(component && component->data); STARPU_ASSERT(starpu_bitmap_cardinal(&component->workers_in_ctx) != 0); struct _starpu_prio_data * data = component->data; struct starpu_st_prio_deque * queue = &data->prio; starpu_pthread_mutex_t * mutex = &data->mutex; double relative_speedup = 0.0; double load = starpu_sched_component_estimated_load(component); if(STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component)) { int first_worker = starpu_bitmap_first(&component->workers_in_ctx); relative_speedup = starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(first_worker, component->tree->sched_ctx_id)); STARPU_COMPONENT_MUTEX_LOCK(mutex); load += queue->ntasks / relative_speedup; STARPU_COMPONENT_MUTEX_UNLOCK(mutex); return load; } else { int i; for(i = starpu_bitmap_first(&component->workers_in_ctx); i != -1; i = starpu_bitmap_next(&component->workers_in_ctx, i)) relative_speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(i, component->tree->sched_ctx_id)); relative_speedup /= starpu_bitmap_cardinal(&component->workers_in_ctx); STARPU_ASSERT(!_STARPU_IS_ZERO(relative_speedup)); STARPU_COMPONENT_MUTEX_LOCK(mutex); load += queue->ntasks / relative_speedup; STARPU_COMPONENT_MUTEX_UNLOCK(mutex); } return load; } static int prio_push_local_task(struct starpu_sched_component * component, struct starpu_task * task, unsigned is_pushback) { STARPU_ASSERT(component && component->data && task); STARPU_ASSERT(starpu_sched_component_can_execute_task(component,task)); struct _starpu_prio_data * data = component->data; struct starpu_st_prio_deque * queue = &data->prio; starpu_pthread_mutex_t * mutex = &data->mutex; int ret = 0; const double now = starpu_timing_now(); STARPU_COMPONENT_MUTEX_LOCK(mutex); double exp_len = NAN; if (!is_pushback && data->ntasks_threshold != 0 && queue->ntasks >= data->ntasks_threshold) { ret = 1; STARPU_COMPONENT_MUTEX_UNLOCK(mutex); } else if(data->exp) { if(!isnan(task->predicted)) exp_len = queue->exp_len + task->predicted; else exp_len = queue->exp_len; if (!is_pushback && data->exp_len_threshold != 0.0 && exp_len >= data->exp_len_threshold) { static int warned; STARPU_HG_DISABLE_CHECKING(warned); if(data->exp_len_threshold != 0.0 && task->predicted > data->exp_len_threshold && !warned) { _STARPU_DISP("Warning : a predicted task length (%lf) exceeds the expected length threshold (%lf) of a prio component queue, you should reconsider the value of this threshold. This message will not be printed again for further thresholds exceeding.\n",task->predicted,data->exp_len_threshold); warned = 1; } ret = 1; STARPU_COMPONENT_MUTEX_UNLOCK(mutex); } else { if(!isnan(task->predicted_transfer)) { double end = prio_estimated_end(component); double tfer_end = now + task->predicted_transfer; /* FIXME: We don't have overlap when running CPU-CPU transfers */ if(tfer_end < end) task->predicted_transfer = 0.0; else task->predicted_transfer = tfer_end - end; exp_len += task->predicted_transfer; } if(!isnan(task->predicted)) { queue->exp_len = exp_len; queue->exp_end = queue->exp_start + queue->exp_len; } STARPU_ASSERT(!isnan(queue->exp_end)); STARPU_ASSERT(!isnan(queue->exp_len)); STARPU_ASSERT(!isnan(queue->exp_start)); } } if(!ret) { if(is_pushback) ret = starpu_st_prio_deque_push_front_task(queue,task); else { ret = starpu_st_prio_deque_push_back_task(queue,task); starpu_sched_component_prefetch_on_node(component, task); STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component, queue->ntasks, exp_len); } STARPU_COMPONENT_MUTEX_UNLOCK(mutex); if(!is_pushback) component->can_pull(component); } return ret; } static int prio_push_task(struct starpu_sched_component * component, struct starpu_task * task) { int ret = prio_push_local_task(component, task, 0); return ret; } static struct starpu_task * prio_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to) { STARPU_ASSERT(component && component->data); struct _starpu_prio_data * data = component->data; struct starpu_st_prio_deque * queue = &data->prio; starpu_pthread_mutex_t * mutex = &data->mutex; const double now = starpu_timing_now(); if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_prio_deque_is_empty(queue)) { starpu_sched_component_send_can_push_to_parents(component); return NULL; } STARPU_COMPONENT_MUTEX_LOCK(mutex); struct starpu_task * task; if (data->ready && to->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE) task = starpu_st_prio_deque_deque_first_ready_task(queue, starpu_bitmap_first(&to->workers_in_ctx)); else task = starpu_st_prio_deque_pop_task(queue); if(task && data->exp) { if(!isnan(task->predicted)) { const double exp_len = queue->exp_len - task->predicted; queue->exp_start = now + task->predicted; if (exp_len >= 0.0) { queue->exp_len = exp_len; } else { /* exp_len can become negative due to rounding errors */ queue->exp_len = 0.0; } } STARPU_ASSERT_MSG(queue->exp_len>=0, "prio->exp_len=%lf\n",queue->exp_len); if(!isnan(task->predicted_transfer)) { if (queue->exp_len > task->predicted_transfer) { queue->exp_start += task->predicted_transfer; queue->exp_len -= task->predicted_transfer; } else { queue->exp_start += queue->exp_len; queue->exp_len = 0; } } queue->exp_end = queue->exp_start + queue->exp_len; if(queue->ntasks == 0) queue->exp_len = 0.0; } if(task) STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component, queue->ntasks, queue->exp_len); STARPU_ASSERT(!isnan(queue->exp_end)); STARPU_ASSERT(!isnan(queue->exp_len)); STARPU_ASSERT(!isnan(queue->exp_start)); STARPU_COMPONENT_MUTEX_UNLOCK(mutex); // When a pop is called, a can_push is called for pushing tasks onto // the empty place of the queue left by the popped task. starpu_sched_component_send_can_push_to_parents(component); if(task) return task; return NULL; } /* When a can_push is caught by this function, we try to pop and push * tasks from our local queue as much as possible, until a * push fails, which means that the worker prio_components are * currently "full". */ static int prio_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { STARPU_ASSERT(component && starpu_sched_component_is_prio(component)); int res = 0; struct starpu_task * task; task = starpu_sched_component_pump_downstream(component, &res); if(task) { int ret = prio_push_local_task(component,task,1); STARPU_ASSERT(!ret); } return res; } int starpu_sched_component_is_prio(struct starpu_sched_component * component) { return component->push_task == prio_push_task; } struct starpu_sched_component * starpu_sched_component_prio_create(struct starpu_sched_tree *tree, struct starpu_sched_component_prio_data * params) { struct starpu_sched_component * component = starpu_sched_component_create(tree, "prio"); struct _starpu_prio_data *data; _STARPU_MALLOC(data, sizeof(*data)); starpu_st_prio_deque_init(&data->prio); STARPU_PTHREAD_MUTEX_INIT(&data->mutex,NULL); component->data = data; component->estimated_end = prio_estimated_end; component->estimated_load = prio_estimated_load; component->push_task = prio_push_task; component->pull_task = prio_pull_task; component->can_push = prio_can_push; component->deinit_data = prio_component_deinit_data; if(params) { data->ntasks_threshold=params->ntasks_threshold; data->exp_len_threshold=params->exp_len_threshold; data->ready=params->ready; data->exp=params->exp; } else { data->ntasks_threshold=0; data->exp_len_threshold=0.0; data->ready=0; data->exp=0; } return component; } starpu-1.4.9+dfsg/src/sched_policies/component_random.c000066400000000000000000000066631507764646700232750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include static double compute_relative_speedup(struct starpu_sched_component * component) { double sum = 0.0; int id; for(id = starpu_bitmap_first(&component->workers_in_ctx); id != -1; id = starpu_bitmap_next(&component->workers_in_ctx, id)) { struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(id, component->tree->sched_ctx_id); sum += starpu_worker_get_relative_speedup(perf_arch); } STARPU_ASSERT(sum != 0.0); return sum; } static int random_push_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component->nchildren > 0); /* indexes_components and size are used to memoize component that can execute tasks * during the first phase of algorithm, it contain the size indexes of the components * that can execute task. */ int indexes_components[component->nchildren]; unsigned size=0; /* speedup[i] is revelant only if i is in the size firsts elements of * indexes_components */ double speedup[component->nchildren]; double alpha_sum = 0.0; unsigned i; for(i = 0; i < component->nchildren ; i++) { if(starpu_sched_component_can_execute_task(component->children[i],task)) { speedup[size] = compute_relative_speedup(component->children[i]); alpha_sum += speedup[size]; indexes_components[size] = i; size++; } } if(size == 0) return -ENODEV; /* not fully sure that this code is correct * because of bad properties of double arithmetic */ double random = starpu_drand48()*alpha_sum; double alpha = 0.0; struct starpu_sched_component * select = NULL; for(i = 0; i < size ; i++) { int index = indexes_components[i]; if(alpha + speedup[i] >= random) { select = component->children[index]; break; } alpha += speedup[i]; } STARPU_ASSERT(select != NULL); if(starpu_sched_component_is_worker(select)) { select->can_pull(select); return 1; } starpu_sched_task_break(task); int ret_val = starpu_sched_component_push_task(component,select,task); return ret_val; } static struct starpu_task *random_pull_task(struct starpu_sched_component * from, struct starpu_sched_component *to) { starpu_sched_component_can_push(from, to); return NULL; } int starpu_sched_component_is_random(struct starpu_sched_component *component) { return component->push_task == random_push_task; } struct starpu_sched_component * starpu_sched_component_random_create(struct starpu_sched_tree *tree, void *arg) { (void)arg; struct starpu_sched_component * component = starpu_sched_component_create(tree, "random"); component->push_task = random_push_task; component->pull_task = random_pull_task; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_sched.c000066400000000000000000000623121507764646700230740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include "sched_component.h" /****************************************************************************** * Generic Scheduling Components' helper functions * ******************************************************************************/ /* * this function find the best implementation or an implementation * that need to be calibrated for a worker available and set * prediction in *length. nan if a implementation need to be * calibrated, 0.0 if no perf model are available * return false if no worker on the component can execute that task */ int starpu_sched_component_execute_preds(struct starpu_sched_component * component, struct starpu_task * task, double * length) { STARPU_ASSERT(component && task); int can_execute = 0; starpu_task_bundle_t bundle = task->bundle; double len = DBL_MAX; int workerid; for(workerid = starpu_bitmap_first(&component->workers_in_ctx); workerid != -1; workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) { int nimpl; for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if(starpu_worker_can_execute_task(workerid,task,nimpl) || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) { double d; can_execute = 1; if(bundle) { struct starpu_perfmodel_arch* archtype = starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id); d = starpu_task_bundle_expected_length(bundle, archtype, nimpl); } else d = starpu_task_worker_expected_length(task, workerid, component->tree->sched_ctx_id, nimpl); if(isnan(d)) { *length = d; return can_execute; } if(_STARPU_IS_ZERO(d)) { continue; } STARPU_ASSERT_MSG(d >= 0, "workerid=%d, nimpl=%d, bundle=%p, d=%lf\n", workerid, nimpl, bundle, d); if(d < len) { len = d; } } } if(STARPU_SCHED_COMPONENT_IS_HOMOGENEOUS(component)) break; } if(len == DBL_MAX) /* we dont have perf model */ len = 0.0; if(length) *length = len; return can_execute; } /* very similar function that dont compute prediction */ int starpu_sched_component_can_execute_task(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(task); STARPU_ASSERT(component); unsigned nimpl; int worker; for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) for(worker = starpu_bitmap_first(&component->workers_in_ctx); -1 != worker; worker = starpu_bitmap_next(&component->workers_in_ctx, worker)) if (starpu_worker_can_execute_task(worker, task, nimpl) || starpu_combined_worker_can_execute_task(worker, task, nimpl)) { if (starpu_worker_can_execute_task(worker, task, nimpl) == 1) _STARPU_EXTRA_DEBUG("worker %d CAN execute task %s with impl %d\n", worker, starpu_task_get_name(task), nimpl); if (starpu_combined_worker_can_execute_task(worker, task, nimpl) == 1) _STARPU_EXTRA_DEBUG("combined worker %d CAN execute task %s with impl %d\n", worker, starpu_task_get_name(task), nimpl); return 1; } else { if (starpu_worker_can_execute_task(worker, task, nimpl) == 0) _STARPU_EXTRA_DEBUG("worker %d CANNOT execute task %s with impl %d\n", worker, starpu_task_get_name(task), nimpl); if (starpu_combined_worker_can_execute_task(worker, task, nimpl) == 0) _STARPU_EXTRA_DEBUG("combined worker %d CANNOT execute task %s with impl %d\n", worker, starpu_task_get_name(task), nimpl); } return 0; } /* compute the average of transfer length for tasks on all workers * maybe this should be optimised if all workers are under the same numa component */ double starpu_sched_component_transfer_length(struct starpu_sched_component * component, struct starpu_task * task) { STARPU_ASSERT(component && task); int nworkers = starpu_bitmap_cardinal(&component->workers_in_ctx); double sum = 0.0; int worker; if(STARPU_SCHED_COMPONENT_IS_SINGLE_MEMORY_NODE(component)) { unsigned memory_node = starpu_worker_get_memory_node(starpu_bitmap_first(&component->workers_in_ctx)); if(task->bundle) return starpu_task_bundle_expected_data_transfer_time(task->bundle,memory_node); else return starpu_task_expected_data_transfer_time(memory_node, task); } for(worker = starpu_bitmap_first(&component->workers_in_ctx); worker != -1; worker = starpu_bitmap_next(&component->workers_in_ctx, worker)) { unsigned memory_node = starpu_worker_get_memory_node(worker); if(task->bundle) { sum += starpu_task_bundle_expected_data_transfer_time(task->bundle,memory_node); } else { sum += starpu_task_expected_data_transfer_time(memory_node, task); /* sum += starpu_task_expected_conversion_time(task, starpu_worker_get_perf_archtype(worker, component->tree->sched_ctx_id), impl ?) * I dont know what to do as we dont know what implementation would be used here... */ } } return sum / nworkers; } /* This function can be called by components when they think that a prefetching request can be submitted. * For example, it is currently used by the MCT component to begin the prefetching on accelerators * on which it pushed tasks as soon as possible. */ void starpu_sched_component_prefetch_on_node(struct starpu_sched_component * component, struct starpu_task * task) { if (starpu_get_prefetch_flag() && (!task->prefetched) && (component->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE)) { int worker = starpu_bitmap_first(&component->workers_in_ctx); unsigned memory_node = starpu_worker_get_memory_node(worker); starpu_prefetch_task_input_on_node(task, memory_node); } } /* remove all child * for all child of component, if child->parents[x] == component, set child->parents[x] to null * call component->deinit_data */ void starpu_sched_component_destroy(struct starpu_sched_component *component) { STARPU_ASSERT(component); unsigned i,j; for(i = 0; i < component->nchildren; i++) { struct starpu_sched_component * child = component->children[i]; for(j = 0; j < child->nparents; j++) if(child->parents[j] == component) child->remove_parent(child,component); } while(component->nchildren != 0) component->remove_child(component, component->children[0]); for(i = 0; i < component->nparents; i++) { struct starpu_sched_component * parent = component->parents[i]; for(j = 0; j < parent->nchildren; j++) if(parent->children[j] == component) parent->remove_child(parent,component); } while(component->nparents != 0) component->remove_parent(component, component->parents[0]); component->deinit_data(component); free(component->children); free(component->parents); free(component->name); free(component); } void starpu_sched_component_destroy_rec(struct starpu_sched_component * component) { if(component == NULL) return; unsigned i = 0; while(i < component->nchildren) { if (starpu_sched_component_is_worker(component->children[i])) i++; else starpu_sched_component_destroy_rec(component->children[i]); } if (!starpu_sched_component_is_worker(component)) starpu_sched_component_destroy(component); } static void set_properties(struct starpu_sched_component * component) { STARPU_ASSERT(component); component->properties = 0; int worker = starpu_bitmap_first(&component->workers_in_ctx); if (worker == -1) return; if (starpu_worker_is_combined_worker(worker)) return; #ifdef STARPU_DEVEL #warning FIXME: Not all CUDA devices have the same speed #endif uint32_t first_worker = _starpu_get_worker_struct(worker)->worker_mask; unsigned first_memory_node = _starpu_get_worker_struct(worker)->memory_node; int is_homogeneous = 1; int is_all_same_component = 1; for(; worker != -1; worker = starpu_bitmap_next(&component->workers_in_ctx, worker)) { if(starpu_worker_is_combined_worker(worker)) continue; if(first_worker != _starpu_get_worker_struct(worker)->worker_mask) is_homogeneous = 0; if(first_memory_node != _starpu_get_worker_struct(worker)->memory_node) is_all_same_component = 0; } if(is_homogeneous) component->properties |= STARPU_SCHED_COMPONENT_HOMOGENEOUS; if(is_all_same_component) component->properties |= STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE; } /* recursively set the component->workers member of component's subtree */ void _starpu_sched_component_update_workers(struct starpu_sched_component * component) { STARPU_ASSERT(component); if(starpu_sched_component_is_worker(component)) return; starpu_bitmap_unset_all(&component->workers); unsigned i; for(i = 0; i < component->nchildren; i++) { _starpu_sched_component_update_workers(component->children[i]); starpu_bitmap_or(&component->workers, &component->children[i]->workers); } component->notify_change_workers(component); } /* recursively set the component->workers_in_ctx in component's subtree */ void _starpu_sched_component_update_workers_in_ctx(struct starpu_sched_component * component, unsigned sched_ctx_id) { STARPU_ASSERT(component); /* worker components are shared among sched_ctxs, thus we do not apply the sched_ctx worker mask to them. * per-ctx filtering is performed higher in the tree */ if(starpu_sched_component_is_worker(component)) return; struct starpu_bitmap * workers_in_ctx = _starpu_get_worker_mask(sched_ctx_id); starpu_bitmap_unset_and(&component->workers_in_ctx,&component->workers, workers_in_ctx); unsigned i,j; for(i = starpu_worker_get_count(); i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++) { if (starpu_bitmap_get(&component->workers, i)) { /* Component has this combined worker, check whether the * context has all the corresponding workers */ int worker_size; int *combined_workerid; starpu_combined_worker_get_description(i, &worker_size, &combined_workerid); for (j = 0; j < (unsigned) worker_size; j++) if (!starpu_bitmap_get(workers_in_ctx, combined_workerid[j])) goto nocombined; /* We have all workers, add it */ starpu_bitmap_set(&component->workers_in_ctx, i); } nocombined: (void)0; } for(i = 0; i < component->nchildren; i++) { struct starpu_sched_component * child = component->children[i]; _starpu_sched_component_update_workers_in_ctx(child, sched_ctx_id); } set_properties(component); component->notify_change_workers(component); } /****************************************************************************** * Scheduling Trees' helper functions * ******************************************************************************/ struct starpu_bitmap * _starpu_get_worker_mask(unsigned sched_ctx_id) { STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_ASSERT(t); return &t->workers; } void starpu_sched_tree_update_workers_in_ctx(struct starpu_sched_tree * t) { STARPU_ASSERT(t); if (t->root) _starpu_sched_component_update_workers_in_ctx(t->root, t->sched_ctx_id); } void starpu_sched_tree_update_workers(struct starpu_sched_tree * t) { STARPU_ASSERT(t); if (t->root) _starpu_sched_component_update_workers(t->root); } /****************************************************************************** * Scheduling Trees' Functions * * Most of them are used to define the starpu_sched_policy interface * ******************************************************************************/ void starpu_sched_component_connect(struct starpu_sched_component *parent, struct starpu_sched_component *child) { parent->add_child(parent, child); child->add_parent(child, parent); _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent,child); } int starpu_sched_tree_push_task(struct starpu_task * task) { STARPU_ASSERT(task); unsigned sched_ctx_id = task->sched_ctx; struct starpu_sched_tree *tree = starpu_sched_ctx_get_policy_data(sched_ctx_id); int ret_val = starpu_sched_component_push_task(NULL, tree->root,task); /* Modular schedulers are not supposed to refuse tasks */ STARPU_ASSERT(!ret_val); return 0; } int starpu_sched_component_push_task(struct starpu_sched_component *from STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_component *to, struct starpu_task *task) { int pushback; int priority = task->priority; pushback = to->push_task(to, task); if (!pushback) _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, priority); return pushback; } struct starpu_task * starpu_sched_tree_pop_task(unsigned sched_ctx) { unsigned workerid = starpu_worker_get_id_check(); struct starpu_sched_component * component = starpu_sched_component_worker_get(sched_ctx, workerid); /* _starpu_sched_component_lock_worker(workerid) is called by component->pull_task() */ struct starpu_task * task = starpu_sched_component_pull_task(component,NULL); return task; } struct starpu_task * starpu_sched_component_pull_task(struct starpu_sched_component *from, struct starpu_sched_component *to) { struct starpu_task *task = from->pull_task(from, to); if (task) _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task); return task; } /* * Pump mechanic to get the task flow rolling. Takes tasks from * component and send them to the child. * To be used by components with only one child */ struct starpu_task* starpu_sched_component_pump_to(struct starpu_sched_component *component, struct starpu_sched_component *child, int* success) { int ret = 0; struct starpu_task * task; while (1) { task = component->pull_task(component,child); if (!task) break; ret = starpu_sched_component_push_task(component,child,task); if (ret) break; if(success) * success = 1; } if(task && ret) /* Return the task which couldn't actually be pushed */ return task; return NULL; } struct starpu_task* starpu_sched_component_pump_downstream(struct starpu_sched_component *component, int* success) { STARPU_ASSERT(component->nchildren == 1); return starpu_sched_component_pump_to(component, component->children[0], success); } void starpu_sched_tree_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); STARPU_ASSERT(workerids); struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_COMPONENT_MUTEX_LOCK(&t->lock); _starpu_sched_component_lock_all_workers(); unsigned i; for(i = 0; i < nworkers; i++) starpu_bitmap_set(&t->workers, workerids[i]); starpu_sched_tree_update_workers_in_ctx(t); _starpu_sched_component_unlock_all_workers(); STARPU_COMPONENT_MUTEX_UNLOCK(&t->lock); } void starpu_sched_tree_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); STARPU_ASSERT(workerids); struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_COMPONENT_MUTEX_LOCK(&t->lock); _starpu_sched_component_lock_all_workers(); unsigned i; for(i = 0; i < nworkers; i++) starpu_bitmap_unset(&t->workers, workerids[i]); starpu_sched_tree_update_workers_in_ctx(t); _starpu_sched_component_unlock_all_workers(); STARPU_COMPONENT_MUTEX_UNLOCK(&t->lock); } static void _starpu_sched_tree_do_schedule(struct starpu_sched_component *component) { unsigned i; if (component->do_schedule) component->do_schedule(component); for (i = 0; i < component->nchildren; i++) _starpu_sched_tree_do_schedule(component->children[i]); } void starpu_sched_tree_do_schedule(unsigned sched_ctx_id) { STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); struct starpu_sched_tree * t = starpu_sched_ctx_get_policy_data(sched_ctx_id); if (t->root) _starpu_sched_tree_do_schedule(t->root); } static struct starpu_sched_tree *trees[STARPU_NMAX_SCHED_CTXS]; struct starpu_sched_tree * starpu_sched_tree_create(unsigned sched_ctx_id) { STARPU_ASSERT(sched_ctx_id < STARPU_NMAX_SCHED_CTXS); STARPU_ASSERT(!trees[sched_ctx_id]); struct starpu_sched_tree *t; _STARPU_CALLOC(t, 1, sizeof(*t)); t->sched_ctx_id = sched_ctx_id; starpu_bitmap_init(&t->workers); STARPU_PTHREAD_MUTEX_INIT(&t->lock,NULL); trees[sched_ctx_id] = t; return t; } void starpu_sched_tree_destroy(struct starpu_sched_tree * tree) { STARPU_ASSERT(tree); STARPU_ASSERT(trees[tree->sched_ctx_id] == tree); trees[tree->sched_ctx_id] = NULL; if(tree->root) starpu_sched_component_destroy_rec(tree->root); STARPU_PTHREAD_MUTEX_DESTROY(&tree->lock); free(tree); } struct starpu_sched_tree * starpu_sched_tree_get(unsigned sched_ctx_id) { return trees[sched_ctx_id]; } void starpu_sched_tree_deinitialize(unsigned sched_ctx_id) { struct starpu_sched_tree *t = (struct starpu_sched_tree*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_sched_tree_destroy(t); } /****************************************************************************** * Interface Functions for Generic Scheduling Components * ******************************************************************************/ void starpu_sched_component_add_child(struct starpu_sched_component* component, struct starpu_sched_component * child) { STARPU_ASSERT(component && child); STARPU_ASSERT(!starpu_sched_component_is_simple_worker(component)); unsigned i; for(i = 0; i < component->nchildren; i++) { STARPU_ASSERT(component->children[i] != component); STARPU_ASSERT(component->children[i] != NULL); } _STARPU_REALLOC(component->children, sizeof(struct starpu_sched_component *) * (component->nchildren + 1)); component->children[component->nchildren] = child; component->nchildren++; } static void starpu_sched_component_remove_child(struct starpu_sched_component * component, struct starpu_sched_component * child) { STARPU_ASSERT(component && child); STARPU_ASSERT(!starpu_sched_component_is_simple_worker(component)); unsigned pos; for(pos = 0; pos < component->nchildren; pos++) if(component->children[pos] == child) break; STARPU_ASSERT(pos != component->nchildren); component->children[pos] = component->children[--component->nchildren]; } static void starpu_sched_component_add_parent(struct starpu_sched_component* component, struct starpu_sched_component * parent) { STARPU_ASSERT(component && parent); unsigned i; for(i = 0; i < component->nparents; i++) { STARPU_ASSERT(component->parents[i] != component); STARPU_ASSERT(component->parents[i] != NULL); } _STARPU_REALLOC(component->parents, sizeof(struct starpu_sched_component *) * (component->nparents + 1)); component->parents[component->nparents] = parent; component->nparents++; } static void starpu_sched_component_remove_parent(struct starpu_sched_component * component, struct starpu_sched_component * parent) { STARPU_ASSERT(component && parent); unsigned pos; for(pos = 0; pos < component->nparents; pos++) if(component->parents[pos] == parent) break; STARPU_ASSERT(pos != component->nparents); component->parents[pos] = component->parents[--component->nparents]; } /* default implementation for component->pull_task() * just perform a recursive call on parent */ struct starpu_task * starpu_sched_component_parents_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { STARPU_ASSERT(component); struct starpu_task * task = NULL; unsigned i; for(i=0; i < component->nparents; i++) { if(component->parents[i] == NULL) continue; else { task = starpu_sched_component_pull_task(component->parents[i], component); if(task) break; } } return task; } /* The default implementation of the can_push function is a recursive call to its parents. * A personally-made can_push in a component (like in prio components) is necessary to catch * this recursive call somewhere, if the user wants to exploit it. */ int starpu_sched_component_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { STARPU_ASSERT(component); int ret = 0; if(component->nparents > 0) { unsigned i; for(i=0; i < component->nparents; i++) { struct starpu_sched_component * parent = component->parents[i]; if(parent != NULL) ret = parent->can_push(parent, component); if(ret) break; } } return ret; } /* A can_pull call will try to wake up one worker associated to the children of the * component. It is currently called by components which holds a queue (like fifo and prio * components) to signify its children that a task has been pushed on its local queue. */ int starpu_sched_component_can_pull(struct starpu_sched_component * component) { STARPU_ASSERT(component); STARPU_ASSERT(!starpu_sched_component_is_worker(component)); unsigned i; for(i = 0; i < component->nchildren; i++) { if (component->children[i]->can_pull(component->children[i])) return 1; } return 0; } /* A can_pull call will try to wake up one worker associated to the children of the * component. It is currently called by components which holds a queue (like fifo and prio * components) to signify its children that a task has been pushed on its local queue. */ int starpu_sched_component_can_pull_all(struct starpu_sched_component * component) { STARPU_ASSERT(component); STARPU_ASSERT(!starpu_sched_component_is_worker(component)); unsigned i; for(i = 0; i < component->nchildren; i++) component->children[i]->can_pull(component->children[i]); return 0; } /* Alternative can_pull which says that this component does not want * to pull but prefers that you push. It can be used by decision * components, in which decisions are usually taken in their push() * functions */ int starpu_sched_component_send_can_push_to_parents(struct starpu_sched_component * component) { STARPU_ASSERT(component); STARPU_ASSERT(!starpu_sched_component_is_worker(component)); unsigned i; int ret = 0; for(i=0; i < component->nparents; i++) { if(component->parents[i] == NULL) continue; else { ret = component->parents[i]->can_push(component->parents[i], component); if(ret) break; } } return ret != 0; } double starpu_sched_component_estimated_load(struct starpu_sched_component * component) { double sum = 0.0; unsigned i; for(i = 0; i < component->nchildren; i++) { struct starpu_sched_component * c = component->children[i]; sum += c->estimated_load(c); } return sum; } double starpu_sched_component_estimated_end_min_add(struct starpu_sched_component * component, double exp_len) { STARPU_ASSERT(component); double min = DBL_MAX; unsigned i; double ends[component->nchildren]; for(i = 0; i < component->nchildren; i++) { double tmp = ends[i] = component->children[i]->estimated_end(component->children[i]); if(tmp < min) min = tmp; } if (exp_len > 0) { /* We don't know which workers will do this, assume it will be * evenly distributed to existing work */ int card = starpu_bitmap_cardinal(&component->workers_in_ctx); if (card == 0) /* Oops, no resources to compute our tasks. Let's just hope that * we will be given one at some point */ card = 1; for(i = 0; i < component->nchildren; i++) { exp_len += ends[i] - min; } min += exp_len / card; } return min; } double starpu_sched_component_estimated_end_min(struct starpu_sched_component * component) { return starpu_sched_component_estimated_end_min_add(component, 0.); } double starpu_sched_component_estimated_end_average(struct starpu_sched_component * component) { STARPU_ASSERT(component); double sum = 0.0; unsigned i; for(i = 0; i < component->nchildren; i++) sum += component->children[i]->estimated_end(component->children[i]); return sum / component->nchildren; } static void take_component_and_does_nothing(struct starpu_sched_component * component STARPU_ATTRIBUTE_UNUSED) { } struct starpu_sched_component * starpu_sched_component_create(struct starpu_sched_tree *tree, const char *name) { struct starpu_sched_component *component; _STARPU_CALLOC(component, 1, sizeof(*component)); component->tree = tree; starpu_bitmap_init(&component->workers); starpu_bitmap_init(&component->workers_in_ctx); component->add_child = starpu_sched_component_add_child; component->remove_child = starpu_sched_component_remove_child; component->add_parent = starpu_sched_component_add_parent; component->remove_parent = starpu_sched_component_remove_parent; component->pull_task = starpu_sched_component_parents_pull_task; component->can_push = starpu_sched_component_can_push; component->can_pull = starpu_sched_component_can_pull; component->estimated_load = starpu_sched_component_estimated_load; component->estimated_end = starpu_sched_component_estimated_end_min; component->deinit_data = take_component_and_does_nothing; component->notify_change_workers = take_component_and_does_nothing; component->name = strdup(name); _STARPU_TRACE_SCHED_COMPONENT_NEW(component); return component; } starpu-1.4.9+dfsg/src/sched_policies/component_stage.c000066400000000000000000000045341507764646700231130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This component takes tasks from its parents in the parent order. * It can be useful to make scheduling stages, pushing tasks of different stages * to different schedulers, and this component will pick them up in the right * order. */ #include #include static int stage_push_task(struct starpu_sched_component * component, struct starpu_task * task) { _STARPU_DISP("stage component is not supposed to be pushed to...\n"); STARPU_ASSERT(component->nchildren == 1); return starpu_sched_component_push_task(component, component->children[0], task); } static int stage_can_push(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { _STARPU_DISP("stage component is not supposed to be pushed to...\n"); return starpu_sched_component_can_push(component, to); } static struct starpu_task * stage_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { struct starpu_task *task; task = starpu_sched_component_parents_pull_task(component, to); return task; } int starpu_sched_component_is_stage(struct starpu_sched_component * component) { return component->push_task == stage_push_task; } struct starpu_sched_component * starpu_sched_component_stage_create(struct starpu_sched_tree *tree, void *args STARPU_ATTRIBUTE_UNUSED) { struct starpu_sched_component *component = starpu_sched_component_create(tree, "stage"); component->push_task = stage_push_task; /* The default implementation happens to be doing staged pull from parents */ component->pull_task = stage_pull_task; component->can_push = stage_can_push; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_userchoice.c000066400000000000000000000043371507764646700241420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This component uses (uintptr_t) tasks->sched_data as the child number it * should push its tasks to. It can thus be used to let the user choose which * scheduler a task should go to. */ #include #include static int userchoice_push_task(struct starpu_sched_component * component, struct starpu_task * task) { unsigned target = (uintptr_t) task->sched_data; STARPU_ASSERT(target < component->nchildren); return starpu_sched_component_push_task(component, component->children[target], task); } static struct starpu_task * userchoice_pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { _STARPU_DISP("stage component is not supposed to be pull from...\n"); return starpu_sched_component_parents_pull_task(component, to); } static int userchoice_can_pull(struct starpu_sched_component * component) { _STARPU_DISP("stage component is not supposed to be pull from...\n"); return starpu_sched_component_can_pull(component); } int starpu_sched_component_is_userchoice(struct starpu_sched_component * component) { return component->push_task == userchoice_push_task; } struct starpu_sched_component * starpu_sched_component_userchoice_create(struct starpu_sched_tree *tree, void *args STARPU_ATTRIBUTE_UNUSED) { struct starpu_sched_component *component = starpu_sched_component_create(tree, "userchoice"); component->push_task = userchoice_push_task; component->pull_task = userchoice_pull_task; component->can_pull = userchoice_can_pull; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_work_stealing.c000066400000000000000000000304421507764646700246550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #ifdef STARPU_DEVEL #warning TODO: locality work-stealing #endif struct _starpu_component_work_stealing_data_per_worker { struct starpu_st_prio_deque fifo; unsigned last_pop_child; }; struct _starpu_component_work_stealing_data { /* keep track of the work performed from the beginning of the algorithm to make * better decisions about which queue to child when stealing or deferring work */ struct _starpu_component_work_stealing_data_per_worker *per_worker; unsigned performed_total, last_push_child; starpu_pthread_mutex_t ** mutexes; unsigned size; }; /** * steal a task in a round robin way * return NULL if none available */ static struct starpu_task * steal_task_round_robin(struct starpu_sched_component *component, int workerid) { struct _starpu_component_work_stealing_data *wsd = component->data; unsigned i = wsd->per_worker[workerid].last_pop_child; wsd->per_worker[workerid].last_pop_child = (i + 1) % component->nchildren; /* If the worker's queue have no suitable tasks, let's try * the next ones */ struct starpu_task * task = NULL; while (1) { struct starpu_st_prio_deque * fifo = &wsd->per_worker[i].fifo; STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); task = starpu_st_prio_deque_deque_task_for_worker(fifo, workerid, NULL); if(task && !isnan(task->predicted)) { fifo->exp_len -= task->predicted; fifo->nprocessed--; } STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); if(task) { starpu_sched_task_break(task); break; } if (i == wsd->per_worker[workerid].last_pop_child) { /* We got back to the first worker, * don't go in infinite loop */ return NULL; } i = (i + 1) % component->nchildren; } return task; } /** * Return a worker to whom add a task. * Selecting a worker is done in a round-robin fashion. */ static unsigned select_worker_round_robin(struct starpu_sched_component * component) { struct _starpu_component_work_stealing_data *ws = (struct _starpu_component_work_stealing_data*)component->data; unsigned i = (ws->last_push_child + 1) % component->nchildren ; ws->last_push_child = i; return i; } /** * Return a worker from which a task can be stolen. * This is a phony function used to call the right * function depending on the value of USE_OVERLOAD. */ static inline struct starpu_task * steal_task(struct starpu_sched_component * component, int workerid) { return steal_task_round_robin(component, workerid); } /** * Return a worker from which a task can be stolen. * This is a phony function used to call the right * function depending on the value of USE_OVERLOAD. */ static inline unsigned select_worker(struct starpu_sched_component * component) { return select_worker_round_robin(component); } static int is_worker_of_component(struct starpu_sched_component * component, int workerid) { return starpu_bitmap_get(&component->workers, workerid); } static struct starpu_task * pull_task(struct starpu_sched_component * component, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { unsigned workerid = starpu_worker_get_id_check(); unsigned i; for(i = 0; i < component->nchildren; i++) { if(is_worker_of_component(component->children[i], workerid)) break; } STARPU_ASSERT(i < component->nchildren); struct _starpu_component_work_stealing_data * wsd = component->data; const double now = starpu_timing_now(); STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); struct starpu_task * task = starpu_st_prio_deque_pop_task(&wsd->per_worker[i].fifo); if(task) { if(!isnan(task->predicted)) { wsd->per_worker[i].fifo.exp_len -= task->predicted; wsd->per_worker[i].fifo.exp_start = now + task->predicted; } } else wsd->per_worker[i].fifo.exp_len = 0.0; STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); if(task) { return task; } task = steal_task(component, workerid); if(task) { STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); wsd->per_worker[i].fifo.nprocessed++; STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); return task; } for(i=0; i < component->nparents; i++) { if(component->parents[i] == NULL) continue; else { task = starpu_sched_component_pull_task(component->parents[i],component); if(task) break; } } if(task) return task; else return NULL; } static double _ws_estimated_end(struct starpu_sched_component * component) { STARPU_ASSERT(starpu_sched_component_is_work_stealing(component)); struct _starpu_component_work_stealing_data * wsd = component->data; double sum_len = 0.0; double sum_start = 0.0; unsigned i; const double now = starpu_timing_now(); for(i = 0; i < component->nchildren; i++) { STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); sum_len += wsd->per_worker[i].fifo.exp_len; wsd->per_worker[i].fifo.exp_start = STARPU_MAX(now, wsd->per_worker[i].fifo.exp_start); sum_start += wsd->per_worker[i].fifo.exp_start; STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); } int nb_workers = starpu_bitmap_cardinal(&component->workers_in_ctx); return (sum_start + sum_len) / nb_workers; } static double _ws_estimated_load(struct starpu_sched_component * component) { STARPU_ASSERT(starpu_sched_component_is_work_stealing(component)); struct _starpu_component_work_stealing_data * wsd = component->data; int ntasks = 0; unsigned i; for(i = 0; i < component->nchildren; i++) { STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); ntasks += wsd->per_worker[i].fifo.ntasks; STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); } double speedup = 0.0; int workerid; for(workerid = starpu_bitmap_first(&component->workers_in_ctx); -1 != workerid; workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) { speedup += starpu_worker_get_relative_speedup(starpu_worker_get_perf_archtype(workerid, component->tree->sched_ctx_id)); } return ntasks / speedup; } static int push_task(struct starpu_sched_component * component, struct starpu_task * task) { struct _starpu_component_work_stealing_data * wsd = component->data; int ret; unsigned i = wsd->last_push_child; int found = 0; /* Find a child component that can execute this task */ i = (i+1)%component->nchildren; while(1) { int workerid; for(workerid = starpu_bitmap_first(&component->children[i]->workers_in_ctx); -1 != workerid; workerid = starpu_bitmap_next(&component->children[i]->workers_in_ctx, workerid)) { unsigned impl; int can_execute = starpu_worker_can_execute_task_first_impl(workerid, task, &impl); if (can_execute) { /* Found one, set the implementation by the way */ starpu_task_set_implementation(task, impl); found = 1; break; } } if (found) break; STARPU_ASSERT_MSG(i != wsd->last_push_child, "Could not find child able to execute this task"); i = (i+1)%component->nchildren; } STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); starpu_sched_task_break(task); ret = starpu_st_prio_deque_push_front_task(&wsd->per_worker[i].fifo, task); STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); wsd->last_push_child = i; starpu_sched_component_can_pull_all(component); return ret; } //this function is special, when a worker call it, we want to push the task in his fifo int starpu_sched_tree_work_stealing_push_task(struct starpu_task *task) { int workerid = starpu_worker_get_id(); if(workerid == -1) return starpu_sched_tree_push_task(task); /* Check that we can execute it */ unsigned impl; int can_execute = starpu_worker_can_execute_task_first_impl(workerid, task, &impl); if (!can_execute) return starpu_sched_tree_push_task(task); /* Ok, use that implementation */ starpu_task_set_implementation(task, impl); unsigned sched_ctx_id = task->sched_ctx; struct starpu_sched_component * component =starpu_sched_component_worker_get(sched_ctx_id, workerid); while(sched_ctx_id < component->nparents && component->parents[sched_ctx_id] != NULL) { component = component->parents[sched_ctx_id]; if(starpu_sched_component_is_work_stealing(component)) { if(!starpu_sched_component_can_execute_task(component, task)) return starpu_sched_tree_push_task(task); unsigned i; for(i = 0; i < component->nchildren; i++) if(is_worker_of_component(component->children[i], workerid)) break; STARPU_ASSERT(i < component->nchildren); struct _starpu_component_work_stealing_data * wsd = component->data; STARPU_COMPONENT_MUTEX_LOCK(wsd->mutexes[i]); int ret = starpu_st_prio_deque_push_front_task(&wsd->per_worker[i].fifo , task); if(ret == 0 && !isnan(task->predicted)) wsd->per_worker[i].fifo.exp_len += task->predicted; STARPU_COMPONENT_MUTEX_UNLOCK(wsd->mutexes[i]); component->can_pull(component); return ret; } } return starpu_sched_tree_push_task(task); } static void _ws_add_child(struct starpu_sched_component * component, struct starpu_sched_component * child) { struct _starpu_component_work_stealing_data * wsd = component->data; starpu_sched_component_add_child(component, child); if(wsd->size < component->nchildren) { STARPU_ASSERT(wsd->size == component->nchildren - 1); _STARPU_REALLOC(wsd->per_worker, component->nchildren * sizeof(*wsd->per_worker)); _STARPU_REALLOC(wsd->mutexes, component->nchildren * sizeof(*wsd->mutexes)); wsd->size = component->nchildren; } wsd->per_worker[component->nchildren - 1].last_pop_child = 0; starpu_st_prio_deque_init(&wsd->per_worker[component->nchildren - 1].fifo); starpu_pthread_mutex_t *mutex; _STARPU_MALLOC(mutex, sizeof(*mutex)); STARPU_PTHREAD_MUTEX_INIT(mutex,NULL); wsd->mutexes[component->nchildren - 1] = mutex; } static void _ws_remove_child(struct starpu_sched_component * component, struct starpu_sched_component * child) { struct _starpu_component_work_stealing_data * wsd = component->data; STARPU_PTHREAD_MUTEX_DESTROY(wsd->mutexes[component->nchildren - 1]); free(wsd->mutexes[component->nchildren - 1]); unsigned i_component; for(i_component = 0; i_component < component->nchildren; i_component++) { if(component->children[i_component] == child) break; } STARPU_ASSERT(i_component != component->nchildren); struct starpu_st_prio_deque tmp_fifo = wsd->per_worker[i_component].fifo; wsd->per_worker[i_component].fifo = wsd->per_worker[component->nchildren - 1].fifo; component->children[i_component] = component->children[component->nchildren - 1]; component->nchildren--; struct starpu_task * task; while ((task = starpu_st_prio_deque_pop_task(&tmp_fifo))) { starpu_sched_component_push_task(NULL, component, task); } } static void _work_stealing_component_deinit_data(struct starpu_sched_component * component) { struct _starpu_component_work_stealing_data * wsd = component->data; free(wsd->per_worker); free(wsd->mutexes); free(wsd); } int starpu_sched_component_is_work_stealing(struct starpu_sched_component * component) { return component->push_task == push_task; } struct starpu_sched_component * starpu_sched_component_work_stealing_create(struct starpu_sched_tree *tree, void *arg) { (void)arg; struct starpu_sched_component *component = starpu_sched_component_create(tree, "work_stealing"); struct _starpu_component_work_stealing_data *wsd; _STARPU_CALLOC(wsd, 1, sizeof(*wsd)); component->pull_task = pull_task; component->push_task = push_task; component->add_child = _ws_add_child; component->remove_child = _ws_remove_child; component->estimated_end = _ws_estimated_end; component->estimated_load = _ws_estimated_load; component->deinit_data = _work_stealing_component_deinit_data; component->data = wsd; return component; } starpu-1.4.9+dfsg/src/sched_policies/component_worker.c000066400000000000000000000673361507764646700233320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include /* data structure for worker's queue look like this : * W = worker * T = simple task * P = parallel task * * * P--P T * | | \| * P--P T T P T * | | | | | | * T T P--P--P T * | | | | | | * W W W W W W * * * * its possible that a _starpu_task_grid won't have task, because it have been * poped by a worker. * * N = no task * * T T T * | | | * P--N--N * | | | * W W W * * * this API is a little asymmetric : struct _starpu_task_grid are allocated by the caller and freed by the data structure * */ /****************************************************************************** * Worker Components' Data Structures * *****************************************************************************/ struct _starpu_task_grid { /* this member may be NULL if a worker have poped it but its a * parallel task and we dont want mad pointers */ struct starpu_task * task; struct _starpu_task_grid *up, *down, *left, *right; /* this is used to count the number of task to be poped by a worker * the leftist _starpu_task_grid maintain the ntasks counter (ie .left == NULL), * all the others use the pntasks that point to it * * when the counter reach 0, all the left and right member are set to NULL, * that mean that we will free that components. */ union { int ntasks; int * pntasks; }; }; /* list->exp_start, list->exp_len, list-exp_end and list->ntasks * are updated by starpu_sched_component_worker_push_task(component, task) and pre_exec_hook */ struct _starpu_worker_task_list { double exp_start, exp_len, exp_end, pipeline_len; struct _starpu_task_grid *first, *last; unsigned ntasks, pipeline_ntasks; starpu_pthread_mutex_t mutex; }; /* This is called when a transfer request is actually pushed to the worker */ static void _starpu_worker_task_list_transfer_started(struct _starpu_worker_task_list *l, struct starpu_task *task) { double transfer_model = task->predicted_transfer; if (isnan(transfer_model)) return; /* We now start the transfer, move it from predicted to pipelined */ l->exp_len -= transfer_model; l->pipeline_len += transfer_model; l->exp_start = starpu_timing_now() + l->pipeline_len; l->exp_end = l->exp_start + l->exp_len; } #ifdef STARPU_DEVEL #warning FIXME: merge with deque_modeling_policy_data_aware #endif /* This is called when a task is actually pushed to the worker (i.e. the transfer finished */ static void _starpu_worker_task_list_started(struct _starpu_worker_task_list *l, struct starpu_task *task) { double model = task->predicted; double transfer_model = task->predicted_transfer; if(!isnan(transfer_model)) /* The transfer is over, remove it from pipelined */ l->pipeline_len -= transfer_model; if(!isnan(model)) { /* We now start the computation, move it from predicted to pipelined */ l->exp_len -= model; l->pipeline_len += model; l->exp_start = starpu_timing_now() + l->pipeline_len; l->exp_end= l->exp_start + l->exp_len; } } /* This is called when a task is actually finished */ static void _starpu_worker_task_list_finished(struct _starpu_worker_task_list *l, struct starpu_task *task) { if(!isnan(task->predicted)) /* The execution is over, remove it from pipelined */ l->pipeline_len -= task->predicted; if (!l->pipeline_ntasks) _STARPU_DISP("warning: bogus computation of pipeline_ntasks?\n"); else l->pipeline_ntasks--; l->exp_start = STARPU_MAX(starpu_timing_now() + l->pipeline_len, l->exp_start); l->exp_end = l->exp_start + l->exp_len; } struct _starpu_worker_component_data { union { struct _starpu_worker * worker; struct { unsigned worker_size; unsigned workerids[STARPU_NMAXWORKERS]; } parallel_worker; }; struct _starpu_worker_task_list * list; }; /* this array store worker components */ static struct starpu_sched_component * _worker_components[STARPU_NMAX_SCHED_CTXS][STARPU_NMAXWORKERS]; /****************************************************************************** * Worker Components' Task List and Grid Functions * *****************************************************************************/ static struct _starpu_worker_task_list * _starpu_worker_task_list_create(void) { struct _starpu_worker_task_list *l; _STARPU_MALLOC(l, sizeof(*l)); memset(l, 0, sizeof(*l)); l->exp_len = l->pipeline_len = 0.0; l->exp_start = l->exp_end = starpu_timing_now(); /* These are only for statistics */ STARPU_HG_DISABLE_CHECKING(l->exp_end); STARPU_HG_DISABLE_CHECKING(l->exp_start); STARPU_HG_DISABLE_CHECKING(l->exp_len); STARPU_HG_DISABLE_CHECKING(l->pipeline_len); STARPU_PTHREAD_MUTEX_INIT(&l->mutex,NULL); return l; } static struct _starpu_task_grid * _starpu_task_grid_create(void) { struct _starpu_task_grid *t; _STARPU_MALLOC(t, sizeof(*t)); memset(t, 0, sizeof(*t)); return t; } static struct _starpu_worker_task_list * _worker_get_list(unsigned sched_ctx_id) { unsigned workerid = starpu_worker_get_id_check(); STARPU_ASSERT(workerid < starpu_worker_get_count()); struct _starpu_worker_component_data * d = starpu_sched_component_worker_get(sched_ctx_id, workerid)->data; return d->list; } static void _starpu_task_grid_destroy(struct _starpu_task_grid * t) { free(t); } static void _starpu_worker_task_list_destroy(struct _starpu_worker_task_list * l) { if(l) { /* There can be empty task grids, when we picked the last task after the front task grid */ struct _starpu_task_grid *t = l->first, *nextt; while(t) { STARPU_ASSERT(!t->task); nextt = t->up; _starpu_task_grid_destroy(t); t = nextt; } STARPU_PTHREAD_MUTEX_DESTROY(&l->mutex); free(l); } } static inline void _starpu_worker_task_list_add(struct _starpu_worker_task_list * l, struct starpu_task *task) { double predicted = task->predicted; double predicted_transfer = task->predicted_transfer; double end = l->exp_end; const double now = starpu_timing_now(); /* Sometimes workers didn't take the tasks as early as we expected */ l->exp_start = STARPU_MAX(l->exp_start, now); if (now + predicted_transfer < end) { /* We may hope that the transfer will be finished by * the start of the task. */ predicted_transfer = 0.0; } else { /* The transfer will not be finished by then, take the * remainder into account */ predicted_transfer = (now + predicted_transfer) - end; } if(!isnan(predicted_transfer)) l->exp_len += predicted_transfer; if(!isnan(predicted)) l->exp_len += predicted; l->exp_end = l->exp_start + l->exp_len; task->predicted = predicted; task->predicted_transfer = predicted_transfer; l->pipeline_ntasks++; } static inline void _starpu_worker_task_list_push(struct _starpu_worker_task_list * l, struct _starpu_task_grid * t) { /* the task, ntasks, pntasks, left and right members of t are set by the caller */ STARPU_ASSERT(t->task); if(l->first == NULL) l->first = l->last = t; t->down = l->last; l->last->up = t; t->up = NULL; l->last = t; l->ntasks++; _starpu_worker_task_list_add(l, t->task); } /* recursively set left and right pointers to NULL */ static inline void _starpu_task_grid_unset_left_right_member(struct _starpu_task_grid * t) { STARPU_ASSERT(t->task == NULL); struct _starpu_task_grid * t_left = t->left; struct _starpu_task_grid * t_right = t->right; t->left = t->right = NULL; while(t_left) { STARPU_ASSERT(t_left->task == NULL); t = t_left; t_left = t_left->left; t->left = NULL; t->right = NULL; } while(t_right) { STARPU_ASSERT(t_right->task == NULL); t = t_right; t_right = t_right->right; t->left = NULL; t->right = NULL; } } static inline struct starpu_task * _starpu_worker_task_list_pop(struct _starpu_worker_task_list * l) { if(!l->first) { l->exp_len = l->pipeline_len = 0.0; l->exp_start = l->exp_end = starpu_timing_now(); return NULL; } struct _starpu_task_grid * t = l->first; /* if there is no task there is no tasks linked to this, then we can free it */ if(t->task == NULL && t->right == NULL && t->left == NULL) { l->first = t->up; if(l->first) l->first->down = NULL; if(l->last == t) l->last = NULL; _starpu_task_grid_destroy(t); return _starpu_worker_task_list_pop(l); } while(t) { if(t->task) { struct starpu_task * task = t->task; t->task = NULL; /* the leftist thing hold the number of tasks, other have a pointer to it */ int * p = t->left ? t->pntasks : &t->ntasks; /* the worker who pop the last task allow the rope to be freed */ if(STARPU_ATOMIC_ADD(p, -1) == 0) _starpu_task_grid_unset_left_right_member(t); l->ntasks--; return task; } t = t->up; } return NULL; } /****************************************************************************** * Worker Components' Public Helper Functions (Part 1) * *****************************************************************************/ struct _starpu_worker * _starpu_sched_component_worker_get_worker(struct starpu_sched_component * worker_component) { STARPU_ASSERT(starpu_sched_component_is_simple_worker(worker_component)); struct _starpu_worker_component_data * data = worker_component->data; return data->worker; } /****************************************************************************** * Worker Components' Private Helper Functions * *****************************************************************************/ #ifndef STARPU_NO_ASSERT static int _worker_consistant(struct starpu_sched_component * component) { int is_a_worker = 0; int i; for(i = 0; itree->sched_ctx_id][i] == component) is_a_worker = 1; if(!is_a_worker) return 0; struct _starpu_worker_component_data * data = component->data; if(data->worker) { int id = data->worker->workerid; return (_worker_components[component->tree->sched_ctx_id][id] == component) && component->nchildren == 0; } return 1; } #endif /****************************************************************************** * Simple Worker Components' Interface Functions * *****************************************************************************/ static int simple_worker_can_pull(struct starpu_sched_component * worker_component) { struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(worker_component); int workerid = worker->workerid; return starpu_wake_worker_relax_light(workerid); } static int simple_worker_push_task(struct starpu_sched_component * component, struct starpu_task *task) { STARPU_ASSERT(starpu_sched_component_is_worker(component)); /*this function take the worker's mutex */ struct _starpu_worker_component_data * data = component->data; struct _starpu_task_grid * t = _starpu_task_grid_create(); t->task = task; t->ntasks = 1; task->workerid = starpu_bitmap_first(&component->workers); #if 1 /* dead lock problem? */ if (starpu_get_prefetch_flag() && !task->prefetched) starpu_prefetch_task_input_for(task, task->workerid); #endif struct _starpu_worker_task_list * list = data->list; STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); _starpu_worker_task_list_push(list, t); STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); simple_worker_can_pull(component); return 0; } static struct starpu_task * simple_worker_pull_task(struct starpu_sched_component *component, struct starpu_sched_component * to) { unsigned workerid = starpu_worker_get_id_check(); struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); struct _starpu_worker_component_data * data = component->data; struct _starpu_worker_task_list * list = data->list; struct starpu_task * task; unsigned i; int n_tries = 0; do { const double now = starpu_timing_now(); /* do not reset state_keep_awake here has it may hide tasks in worker->local_tasks */ n_tries++; STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); /* Take the opportunity to update start time */ data->list->exp_start = STARPU_MAX(now, data->list->exp_start); data->list->exp_end = data->list->exp_start + data->list->exp_len; task = _starpu_worker_task_list_pop(list); if(task) { _starpu_worker_task_list_transfer_started(list, task); starpu_push_task_end(task); STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); goto ret; } STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); for(i=0; i < component->nparents; i++) { if(component->parents[i] == NULL) continue; else { task = starpu_sched_component_pull_task(component->parents[i],component); if(task) break; } } } while((!task) && worker->state_keep_awake && n_tries < 2); if(!task) goto ret; if(task->cl->type == STARPU_SPMD) { if(!starpu_worker_is_combined_worker(workerid)) { STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); _starpu_worker_task_list_add(list, task); _starpu_worker_task_list_transfer_started(list, task); starpu_push_task_end(task); STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); goto ret; } struct starpu_sched_component * combined_worker_component = starpu_sched_component_worker_get(component->tree->sched_ctx_id, workerid); starpu_sched_component_push_task(component, combined_worker_component, task); /* we have pushed a task in queue, so can make a recursive call */ task = simple_worker_pull_task(component, to); goto ret; } if(task) { STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); _starpu_worker_task_list_add(list, task); _starpu_worker_task_list_transfer_started(list, task); starpu_push_task_end(task); STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); } ret: return task; } static double simple_worker_estimated_end(struct starpu_sched_component * component) { struct _starpu_worker_component_data * data = component->data; double now = starpu_timing_now(); if (now > data->list->exp_start) { data->list->exp_start = now; data->list->exp_end = data->list->exp_start + data->list->exp_len; } return data->list->exp_end; } static double simple_worker_estimated_load(struct starpu_sched_component * component) { struct _starpu_worker * worker = _starpu_sched_component_worker_get_worker(component); int nb_task = 0; STARPU_COMPONENT_MUTEX_LOCK(&worker->mutex); struct starpu_task_prio_list *list = &worker->local_tasks; struct starpu_task * task; for(task = starpu_task_prio_list_begin(list); task != starpu_task_prio_list_end(list); task = starpu_task_prio_list_next(list, task)) nb_task++; STARPU_COMPONENT_MUTEX_UNLOCK(&worker->mutex); struct _starpu_worker_component_data * d = component->data; struct _starpu_worker_task_list * l = d->list; int ntasks_in_fifo = l ? l->ntasks + l->pipeline_ntasks : 0; return (double) (nb_task + ntasks_in_fifo) / starpu_worker_get_relative_speedup( starpu_worker_get_perf_archtype(starpu_bitmap_first(&component->workers), component->tree->sched_ctx_id)); } static void _worker_component_deinit_data(struct starpu_sched_component * component) { struct _starpu_worker_component_data * d = component->data; _starpu_worker_task_list_destroy(d->list); int i, j; for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++) for(i = 0; i < STARPU_NMAXWORKERS; i++) if(_worker_components[j][i] == component) { _worker_components[j][i] = NULL; break; } free(d); } static struct starpu_sched_component * starpu_sched_component_worker_create(struct starpu_sched_tree *tree, int workerid) { STARPU_ASSERT(workerid >= 0 && workerid < (int) starpu_worker_get_count()); if(_worker_components[tree->sched_ctx_id][workerid]) return _worker_components[tree->sched_ctx_id][workerid]; struct _starpu_worker * worker = _starpu_get_worker_struct(workerid); if(worker == NULL) return NULL; char name[32]; snprintf(name, sizeof(name), "worker %d", workerid); struct starpu_sched_component * component = starpu_sched_component_create(tree, name); struct _starpu_worker_component_data *data; _STARPU_MALLOC(data, sizeof(*data)); memset(data, 0, sizeof(*data)); data->worker = worker; data->list = _starpu_worker_task_list_create(); component->data = data; /* FIXME: missing push_task_notify */ component->push_task = simple_worker_push_task; component->pull_task = simple_worker_pull_task; component->can_pull = simple_worker_can_pull; component->estimated_end = simple_worker_estimated_end; component->estimated_load = simple_worker_estimated_load; component->deinit_data = _worker_component_deinit_data; starpu_bitmap_set(&component->workers, workerid); starpu_bitmap_or(&component->workers_in_ctx, &component->workers); _worker_components[tree->sched_ctx_id][workerid] = component; /* #ifdef STARPU_HAVE_HWLOC struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid); STARPU_ASSERT(obj); component->obj = obj; #endif */ return component; } /****************************************************************************** * Combined Worker Components' Interface Functions * *****************************************************************************/ static int combined_worker_can_pull(struct starpu_sched_component * component) { (void) component; STARPU_ASSERT(starpu_sched_component_is_combined_worker(component)); struct _starpu_worker_component_data * data = component->data; int workerid = starpu_worker_get_id(); unsigned i; for(i = 0; i < data->parallel_worker.worker_size; i++) { int target = data->parallel_worker.workerids[i]; if(target == workerid) continue; if (starpu_wake_worker_relax_light(target)) return 1; } return 0; } static int combined_worker_push_task(struct starpu_sched_component * component, struct starpu_task *task) { STARPU_ASSERT(starpu_sched_component_is_combined_worker(component)); struct _starpu_worker_component_data * data = component->data; STARPU_ASSERT(data->parallel_worker.worker_size >= 1); struct _starpu_task_grid * task_alias[data->parallel_worker.worker_size]; starpu_parallel_task_barrier_init(task, starpu_bitmap_first(&component->workers)); task_alias[0] = _starpu_task_grid_create(); task_alias[0]->task = starpu_task_dup(task); task_alias[0]->task->workerid = data->parallel_worker.workerids[0]; task_alias[0]->task->destroy = 1; task_alias[0]->left = NULL; task_alias[0]->ntasks = data->parallel_worker.worker_size; _STARPU_TRACE_JOB_PUSH(task_alias[0]->task, task_alias[0]->task->priority > 0); unsigned i; for(i = 1; i < data->parallel_worker.worker_size; i++) { task_alias[i] = _starpu_task_grid_create(); task_alias[i]->task = starpu_task_dup(task); task_alias[i]->task->destroy = 1; task_alias[i]->task->workerid = data->parallel_worker.workerids[i]; task_alias[i]->left = task_alias[i-1]; task_alias[i - 1]->right = task_alias[i]; task_alias[i]->pntasks = &(task_alias[0]->ntasks); _STARPU_TRACE_JOB_PUSH(task_alias[i]->task, task_alias[i]->task->priority > 0); } starpu_pthread_mutex_t * mutex_to_unlock = NULL; i = 0; do { struct starpu_sched_component * worker_component = starpu_sched_component_worker_get(component->tree->sched_ctx_id, data->parallel_worker.workerids[i]); struct _starpu_worker_component_data * worker_data = worker_component->data; struct _starpu_worker_task_list * list = worker_data->list; STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); if(mutex_to_unlock) STARPU_COMPONENT_MUTEX_UNLOCK(mutex_to_unlock); mutex_to_unlock = &list->mutex; _starpu_worker_task_list_push(list, task_alias[i]); i++; } while(i < data->parallel_worker.worker_size); STARPU_COMPONENT_MUTEX_UNLOCK(mutex_to_unlock); int workerid = starpu_worker_get_id(); if(-1 == workerid) { combined_worker_can_pull(component); } else { /* wake up all other workers of combined worker */ for(i = 0; i < data->parallel_worker.worker_size; i++) { struct starpu_sched_component * worker_component = starpu_sched_component_worker_get(component->tree->sched_ctx_id, data->parallel_worker.workerids[i]); simple_worker_can_pull(worker_component); } combined_worker_can_pull(component); } return 0; } static struct starpu_task *combined_worker_pull_task(struct starpu_sched_component * from STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_component * to STARPU_ATTRIBUTE_UNUSED) { return NULL; } static double combined_worker_estimated_end(struct starpu_sched_component * component) { STARPU_ASSERT(starpu_sched_component_is_combined_worker(component)); struct _starpu_worker_component_data * d = component->data; double max = 0.0; unsigned i; for(i = 0; i < d->parallel_worker.worker_size; i++) { struct _starpu_worker_component_data * data; data = _worker_components[component->tree->sched_ctx_id][d->parallel_worker.workerids[i]]->data; double tmp = data->list->exp_end; max = tmp > max ? tmp : max; } return max; } static double combined_worker_estimated_load(struct starpu_sched_component * component) { struct _starpu_worker_component_data * d = component->data; double load = 0; unsigned i; for(i = 0; i < d->parallel_worker.worker_size; i++) { struct starpu_sched_component * n = starpu_sched_component_worker_get(component->tree->sched_ctx_id, d->parallel_worker.workerids[i]); load += n->estimated_load(n); } return load; } struct starpu_sched_component *starpu_sched_component_parallel_worker_create(struct starpu_sched_tree *tree, unsigned nworkers, unsigned *workers) { struct starpu_sched_component * component = starpu_sched_component_create(tree, "combined_worker"); struct _starpu_worker_component_data *data; _STARPU_MALLOC(data, sizeof(*data)); memset(data, 0, sizeof(*data)); STARPU_ASSERT(nworkers <= STARPU_NMAXWORKERS); STARPU_ASSERT(nworkers <= starpu_worker_get_count()); data->parallel_worker.worker_size = nworkers; memcpy(data->parallel_worker.workerids, workers, nworkers * sizeof(unsigned)); component->data = data; component->push_task = combined_worker_push_task; component->pull_task = combined_worker_pull_task; component->estimated_end = combined_worker_estimated_end; component->estimated_load = combined_worker_estimated_load; component->can_pull = combined_worker_can_pull; component->deinit_data = _worker_component_deinit_data; unsigned i; for (i = 0; i < nworkers; i++) starpu_sched_component_connect(component, starpu_sched_component_worker_get(tree->sched_ctx_id, workers[i])); return component; } static struct starpu_sched_component * starpu_sched_component_combined_worker_create(struct starpu_sched_tree *tree, int workerid) { STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS); if(_worker_components[tree->sched_ctx_id][workerid]) return _worker_components[tree->sched_ctx_id][workerid]; struct _starpu_combined_worker * combined_worker = _starpu_get_combined_worker_struct(workerid); if(combined_worker == NULL) return NULL; struct starpu_sched_component *component = starpu_sched_component_parallel_worker_create(tree, combined_worker->worker_size, (unsigned *) combined_worker->combined_workerid); starpu_bitmap_set(&component->workers, workerid); starpu_bitmap_or(&component->workers_in_ctx, &component->workers); _worker_components[tree->sched_ctx_id][workerid] = component; /* #ifdef STARPU_HAVE_HWLOC struct _starpu_worker_component_data * data = component->data; struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; struct _starpu_worker *worker = _starpu_get_worker_struct(data->parallel_worker.workerids[0]); hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid); STARPU_ASSERT(obj); component->obj = obj; #endif */ return component; } /****************************************************************************** * Worker Components' Public Helper Functions (Part 2) * *****************************************************************************/ void _starpu_sched_component_lock_all_workers(void) { unsigned i; for(i = 0; i < starpu_worker_get_count(); i++) starpu_worker_lock(i); } void _starpu_sched_component_unlock_all_workers(void) { unsigned i; for(i = 0; i < starpu_worker_get_count(); i++) starpu_worker_unlock(i); } void _starpu_sched_component_workers_destroy(void) { int i, j; for(j = 0; j < STARPU_NMAX_SCHED_CTXS; j++) for(i = 0; i < STARPU_NMAXWORKERS; i++) if (_worker_components[j][i]) starpu_sched_component_destroy(_worker_components[j][i]); } int starpu_sched_component_worker_get_workerid(struct starpu_sched_component * worker_component) { #ifndef STARPU_NO_ASSERT STARPU_ASSERT(_worker_consistant(worker_component)); #endif STARPU_ASSERT(1 == starpu_bitmap_cardinal(&worker_component->workers)); return starpu_bitmap_first(&worker_component->workers); } void starpu_sched_component_worker_pre_exec_hook(struct starpu_task * task, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) { struct _starpu_worker_task_list * list = _worker_get_list(sched_ctx_id); const double now = starpu_timing_now(); STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); _starpu_worker_task_list_started(list, task); /* Take the opportunity to update start time */ list->exp_start = STARPU_MAX(now + list->pipeline_len, list->exp_start); STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); } void starpu_sched_component_worker_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) { if(task->execute_on_a_specific_worker) return; struct _starpu_worker_task_list * list = _worker_get_list(sched_ctx_id); STARPU_COMPONENT_MUTEX_LOCK(&list->mutex); _starpu_worker_task_list_finished(list, task); STARPU_COMPONENT_MUTEX_UNLOCK(&list->mutex); } int starpu_sched_component_is_simple_worker(struct starpu_sched_component * component) { return component->push_task == simple_worker_push_task; } int starpu_sched_component_is_combined_worker(struct starpu_sched_component * component) { return component->push_task == combined_worker_push_task; } int starpu_sched_component_is_worker(struct starpu_sched_component * component) { return starpu_sched_component_is_simple_worker(component) || starpu_sched_component_is_combined_worker(component); } /* As Worker Components' creating functions are protected, this function allows * the user to get a Worker Component from a worker id */ struct starpu_sched_component * starpu_sched_component_worker_get(unsigned sched_ctx, int workerid) { STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS); /* we may need to take a mutex here */ if (!_worker_components[sched_ctx][workerid]) return starpu_sched_component_worker_new(sched_ctx, workerid); return _worker_components[sched_ctx][workerid]; } struct starpu_sched_component * starpu_sched_component_worker_new(unsigned sched_ctx, int workerid) { STARPU_ASSERT(workerid >= 0 && workerid < STARPU_NMAXWORKERS); /* we may need to take a mutex here */ if (_worker_components[sched_ctx][workerid]) return _worker_components[sched_ctx][workerid]; struct starpu_sched_component * component; if(workerid < (int) starpu_worker_get_count()) component = starpu_sched_component_worker_create(starpu_sched_tree_get(sched_ctx), workerid); else component = starpu_sched_component_combined_worker_create(starpu_sched_tree_get(sched_ctx), workerid); _worker_components[sched_ctx][workerid] = component; return component; } starpu-1.4.9+dfsg/src/sched_policies/deque_modeling_policy_data_aware.c000066400000000000000000001134351507764646700264370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2013-2013 Simon Archipoff * Copyright (C) 2013-2013 Joris Pablo * Copyright (C) 2011,2020 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Distributed queues using performance modeling to assign tasks */ #include #include #include #include #include #include #include #include #ifdef BUILDING_STARPU #include #endif #include #include #include /* for fpclassify() checks on knob values */ #ifndef DBL_MIN #define DBL_MIN __DBL_MIN__ #endif #ifndef DBL_MAX #define DBL_MAX __DBL_MAX__ #endif //#define NOTIFY_READY_SOON struct _starpu_dmda_data { double alpha; double beta; double _gamma; double idle_power; struct starpu_st_fifo_taskq queue_array[STARPU_NMAXWORKERS]; long int total_task_cnt; long int ready_task_cnt; long int eager_task_cnt; /* number of tasks scheduled without model */ int num_priorities; }; /* performance steering knobs */ /* . per-scheduler knobs */ static int __s_alpha_knob; static int __s_beta_knob; static int __s_gamma_knob; static int __s_idle_power_knob; /* . knob variables */ static double __s_alpha__value = 1.0; static double __s_beta__value = 1.0; static double __s_gamma__value = 1.0; static double __s_idle_power__value = 1.0; /* . per-scheduler knob group */ static struct starpu_perf_knob_group * __kg_starpu_dmda__per_scheduler; static void sched_knobs__set(const struct starpu_perf_knob * const knob, void *context, const struct starpu_perf_knob_value * const value) { const char * const sched_policy_name = *(const char **)context; (void) sched_policy_name; if (knob->id == __s_alpha_knob) { STARPU_ASSERT(fpclassify(value->val_double) == FP_NORMAL); __s_alpha__value = value->val_double; } else if (knob->id == __s_beta_knob) { STARPU_ASSERT(fpclassify(value->val_double) == FP_NORMAL); __s_beta__value = value->val_double; } else if (knob->id == __s_gamma_knob) { STARPU_ASSERT(fpclassify(value->val_double) == FP_NORMAL); __s_gamma__value = value->val_double; } else if (knob->id == __s_idle_power_knob) { STARPU_ASSERT(fpclassify(value->val_double) == FP_NORMAL); __s_idle_power__value = value->val_double; } else { STARPU_ASSERT(0); abort(); } } static void sched_knobs__get(const struct starpu_perf_knob * const knob, void *context, struct starpu_perf_knob_value * const value) { const char * const sched_policy_name = *(const char **)context; (void) sched_policy_name; if (knob->id == __s_alpha_knob) { value->val_double = __s_alpha__value; } else if (knob->id == __s_beta_knob) { value->val_double = __s_beta__value; } else if (knob->id == __s_gamma_knob) { value->val_double = __s_gamma__value; } else if (knob->id == __s_idle_power_knob) { value->val_double = __s_idle_power__value; } else { STARPU_ASSERT(0); abort(); } } void _starpu__dmda_c__register_knobs(void) { { const enum starpu_perf_knob_scope scope = starpu_perf_knob_scope_per_scheduler; __kg_starpu_dmda__per_scheduler = _starpu_perf_knob_group_register(scope, sched_knobs__set, sched_knobs__get); /* TODO: priority capping knobs actually work globally for now, the sched policy name is ignored */ __STARPU_PERF_KNOB_REG("starpu.dmda", __kg_starpu_dmda__per_scheduler, s_alpha_knob, double, "alpha constant multiplier"); __STARPU_PERF_KNOB_REG("starpu.dmda", __kg_starpu_dmda__per_scheduler, s_beta_knob, double, "beta constant multiplier"); __STARPU_PERF_KNOB_REG("starpu.dmda", __kg_starpu_dmda__per_scheduler, s_gamma_knob, double, "gamma constant multiplier"); __STARPU_PERF_KNOB_REG("starpu.dmda", __kg_starpu_dmda__per_scheduler, s_idle_power_knob, double, "idle_power constant multiplier"); } } void _starpu__dmda_c__unregister_knobs(void) { _starpu_perf_knob_group_unregister(__kg_starpu_dmda__per_scheduler); __kg_starpu_dmda__per_scheduler = NULL; } /* The dmda scheduling policy uses * * alpha * T_computation + beta * T_communication + gamma * Consumption * * Here are the default values of alpha, beta, gamma */ #define _STARPU_SCHED_ALPHA_DEFAULT 1.0 #define _STARPU_SCHED_BETA_DEFAULT 1.0 #define _STARPU_SCHED_GAMMA_DEFAULT 1000.0 /* This is called when a transfer request is actually pushed to the worker */ static void _starpu_fifo_task_transfer_started(struct starpu_st_fifo_taskq *fifo, struct starpu_task *task, int num_priorities) { double transfer_model = task->predicted_transfer; if (isnan(transfer_model)) return; /* We now start the transfer, move it from predicted to pipelined */ fifo->exp_len -= transfer_model; fifo->pipeline_len += transfer_model; fifo->exp_start = starpu_timing_now() + fifo->pipeline_len; fifo->exp_end = fifo->exp_start + fifo->exp_len; if(num_priorities != -1) { int i; int task_prio = starpu_st_normalize_prio(task->priority, num_priorities, task->sched_ctx); for(i = 0; i <= task_prio; i++) fifo->exp_len_per_priority[i] -= transfer_model; } } /* This is called when a task is actually pushed to the worker (i.e. the transfer finished */ static void _starpu_fifo_task_started(struct starpu_st_fifo_taskq *fifo, struct starpu_task *task, int num_priorities) { double model = task->predicted; double transfer_model = task->predicted_transfer; if(!isnan(transfer_model)) /* The transfer is over, remove it from pipelined */ fifo->pipeline_len -= transfer_model; if(!isnan(model)) { /* We now start the computation, move it from predicted to pipelined */ fifo->exp_len -= model; fifo->pipeline_len += model; fifo->exp_start = starpu_timing_now() + fifo->pipeline_len; fifo->exp_end= fifo->exp_start + fifo->exp_len; if(num_priorities != -1) { int i; int task_prio = starpu_st_normalize_prio(task->priority, num_priorities, task->sched_ctx); for(i = 0; i <= task_prio; i++) fifo->exp_len_per_priority[i] -= model; } } } /* This is called when a task is actually finished */ static void _starpu_fifo_task_finished(struct starpu_st_fifo_taskq *fifo, struct starpu_task *task, int num_priorities STARPU_ATTRIBUTE_UNUSED) { if(!isnan(task->predicted)) /* The execution is over, remove it from pipelined */ fifo->pipeline_len -= task->predicted; if (!fifo->pipeline_ntasks) _STARPU_DISP("warning: bogus computation of pipeline_ntasks?\n"); else fifo->pipeline_ntasks--; fifo->exp_start = STARPU_MAX(starpu_timing_now() + fifo->pipeline_len, fifo->exp_start); fifo->exp_end = fifo->exp_start + fifo->exp_len; } static struct starpu_task *_dmda_pop_task(unsigned sched_ctx_id, int ready) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_task *task; unsigned workerid = starpu_worker_get_id_check(); struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; /* Take the opportunity to update start time */ fifo->exp_start = STARPU_MAX(starpu_timing_now(), fifo->exp_start); fifo->exp_end = fifo->exp_start + fifo->exp_len; STARPU_ASSERT_MSG(fifo, "worker %u does not belong to ctx %u anymore.\n", workerid, sched_ctx_id); if (ready) task = starpu_st_fifo_taskq_pop_first_ready_task(fifo, workerid, dt->num_priorities); else task = starpu_st_fifo_taskq_pop_local_task(fifo); if (task) { _starpu_fifo_task_transfer_started(fifo, task, dt->num_priorities); starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); #ifdef STARPU_VERBOSE if (task->cl) { int non_ready = starpu_st_non_ready_buffers_count(task, workerid); if (non_ready == 0) dt->ready_task_cnt++; } dt->total_task_cnt++; #endif } return task; } static struct starpu_task *dmda_pop_ready_task(unsigned sched_ctx_id) { return _dmda_pop_task(sched_ctx_id, 1); } static struct starpu_task *dmda_pop_task(unsigned sched_ctx_id) { return _dmda_pop_task(sched_ctx_id, 0); } static int push_task_on_best_worker(struct starpu_task *task, int best_workerid, double predicted, double predicted_transfer, int prio, unsigned sched_ctx_id) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); /* make sure someone could execute that task ! */ STARPU_ASSERT(best_workerid != -1); if (_starpu_get_nsched_ctxs() > 1) { starpu_worker_relax_on(); _starpu_sched_ctx_lock_write(sched_ctx_id); starpu_worker_relax_off(); if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, best_workerid, task)) task = NULL; _starpu_sched_ctx_unlock_write(sched_ctx_id); if (!task) return 0; } struct starpu_st_fifo_taskq *fifo = &dt->queue_array[best_workerid]; double now = starpu_timing_now(); #ifdef STARPU_USE_SC_HYPERVISOR starpu_sched_ctx_call_pushed_task_cb(best_workerid, sched_ctx_id); #endif //STARPU_USE_SC_HYPERVISOR starpu_worker_lock(best_workerid); fifo->pipeline_ntasks++; /* Sometimes workers didn't take the tasks as early as we expected */ fifo->exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now); fifo->exp_end = fifo->exp_start + fifo->exp_len; /* FIXME: We don't have overlap when running CPU-CPU transfers */ if ((now + predicted_transfer) < fifo->exp_end) { /* We may hope that the transfer will be finished by * the start of the task. */ predicted_transfer = 0.0; } else { /* The transfer will not be finished by then, take the * remainder into account */ predicted_transfer = (now + predicted_transfer) - fifo->exp_end; } if(!isnan(predicted_transfer)) { fifo->exp_len += predicted_transfer; if(dt->num_priorities != -1) { int i; int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); for(i = 0; i <= task_prio; i++) fifo->exp_len_per_priority[i] += predicted_transfer; } } if(!isnan(predicted)) { fifo->exp_len += predicted; if(dt->num_priorities != -1) { int i; int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); for(i = 0; i <= task_prio; i++) fifo->exp_len_per_priority[i] += predicted; } } fifo->exp_end = fifo->exp_start + fifo->exp_len; starpu_worker_unlock(best_workerid); task->predicted = predicted; task->predicted_transfer = predicted_transfer; if (starpu_get_prefetch_flag()) starpu_prefetch_task_input_for(task, best_workerid); STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), best_workerid); if (_starpu_get_nsched_ctxs() > 1) { unsigned stream_ctx_id = starpu_worker_get_sched_ctx_id_stream(best_workerid); if(stream_ctx_id != STARPU_NMAX_SCHED_CTXS) { starpu_worker_relax_on(); _starpu_sched_ctx_lock_write(sched_ctx_id); starpu_worker_relax_off(); starpu_sched_ctx_move_task_to_ctx_locked(task, stream_ctx_id, 0); starpu_sched_ctx_revert_task_counters_ctx_locked(sched_ctx_id, task->flops); _starpu_sched_ctx_unlock_write(sched_ctx_id); } } int ret = 0; if (prio) { starpu_worker_lock(best_workerid); ret =starpu_st_fifo_taskq_push_sorted_task(&dt->queue_array[best_workerid], task); if(dt->num_priorities != -1) { int i; int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); for(i = 0; i <= task_prio; i++) dt->queue_array[best_workerid].ntasks_per_priority[i]++; } #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) starpu_wake_worker_locked(best_workerid); #endif starpu_push_task_end(task); starpu_worker_unlock(best_workerid); } else { starpu_worker_lock(best_workerid); starpu_task_list_push_back (&dt->queue_array[best_workerid].taskq, task); dt->queue_array[best_workerid].ntasks++; dt->queue_array[best_workerid].nprocessed++; #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) starpu_wake_worker_locked(best_workerid); #endif starpu_push_task_end(task); starpu_worker_unlock(best_workerid); } starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, best_workerid); return ret; } /* TODO: factorise CPU computations, expensive with a lot of cores */ static void compute_all_performance_predictions(struct starpu_task *task, unsigned nworkers, double local_task_length[nworkers][STARPU_MAXIMPLEMENTATIONS], double exp_end[nworkers][STARPU_MAXIMPLEMENTATIONS], double *max_exp_endp_of_workers, double *min_exp_endp_of_task, double local_data_penalty[nworkers][STARPU_MAXIMPLEMENTATIONS], double local_energy[nworkers][STARPU_MAXIMPLEMENTATIONS], int *forced_worker, int *forced_impl, unsigned sched_ctx_id, unsigned sorted_decision) { int calibrating = 0; double max_exp_end_of_workers = DBL_MIN; double best_exp_end_of_task = DBL_MAX; int ntasks_best = -1; int nimpl_best = 0; double ntasks_best_end = 0.0; /* A priori, we know all estimations */ int unknown = 0; unsigned worker_ctx = 0; int task_prio = 0; starpu_task_bundle_t bundle = task->bundle; struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); if(sorted_decision && dt->num_priorities != -1) task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, sched_ctx_id); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); double now = starpu_timing_now(); struct starpu_sched_ctx_iterator it; workers->init_iterator_for_parallel_tasks(workers, &it, task); while(worker_ctxhas_next(workers, &it)) { unsigned nimpl; unsigned impl_mask; unsigned workerid = workers->get_next(workers, &it); struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id); unsigned memory_node = starpu_worker_get_memory_node(workerid); STARPU_ASSERT_MSG(fifo != NULL, "workerid %u ctx %u\n", workerid, sched_ctx_id); /* Sometimes workers didn't take the tasks as early as we expected */ double exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now); if (!starpu_worker_can_execute_task_impl(workerid, task, &impl_mask)) continue; for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if (!(impl_mask & (1U << nimpl))) { /* no one on that queue may execute this task */ continue; } int fifo_ntasks = fifo->ntasks + fifo->pipeline_ntasks; double prev_exp_len = fifo->exp_len; /* consider the priority of the task when deciding on which workerid to schedule, compute the expected_end of the task if it is inserted before other tasks already scheduled */ if(sorted_decision) { if(dt->num_priorities != -1) { prev_exp_len = fifo->exp_len_per_priority[task_prio]; fifo_ntasks = fifo->ntasks_per_priority[task_prio]; } else { starpu_worker_lock(workerid); prev_exp_len = starpu_st_fifo_taskq_get_exp_len_prev_task_list(fifo, task, workerid, nimpl, &fifo_ntasks); starpu_worker_unlock(workerid); } } exp_end[worker_ctx][nimpl] = exp_start + prev_exp_len; if (exp_end[worker_ctx][nimpl] > max_exp_end_of_workers) max_exp_end_of_workers = exp_end[worker_ctx][nimpl]; //_STARPU_DEBUG("Scheduler dmda: task length (%lf) workerid (%u) kernel (%u) \n", local_task_length[workerid][nimpl],workerid,nimpl); if (bundle) { /* TODO : conversion time */ local_task_length[worker_ctx][nimpl] = starpu_task_bundle_expected_length(bundle, perf_arch, nimpl); if (local_data_penalty) local_data_penalty[worker_ctx][nimpl] = starpu_task_bundle_expected_data_transfer_time(bundle, memory_node); if (local_energy) local_energy[worker_ctx][nimpl] = starpu_task_bundle_expected_energy(bundle, perf_arch,nimpl); } else { local_task_length[worker_ctx][nimpl] = starpu_task_worker_expected_length(task, workerid, sched_ctx_id, nimpl); if (local_data_penalty) local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid); if (local_energy) local_energy[worker_ctx][nimpl] = starpu_task_worker_expected_energy(task, workerid, sched_ctx_id,nimpl); double conversion_time = starpu_task_expected_conversion_time(task, perf_arch, nimpl); if (conversion_time > 0.0) local_task_length[worker_ctx][nimpl] += conversion_time; } double ntasks_end = fifo_ntasks / starpu_worker_get_relative_speedup(perf_arch); /* * This implements a default greedy scheduler for the * case of tasks which have no performance model, or * whose performance model is not calibrated yet. * * It simply uses the number of tasks already pushed to * the workers, divided by the relative performance of * a CPU and of a GPU. * * This is always computed, but the ntasks_best * selection is only really used if the task indeed has * no performance model, or is not calibrated yet. */ if (ntasks_best == -1 /* Always compute the greedy decision, at least for * the tasks with no performance model. */ || (!calibrating && ntasks_end < ntasks_best_end) /* The performance model of this task is not * calibrated on this workerid, try to run it there * to calibrate it there. */ || (!calibrating && isnan(local_task_length[worker_ctx][nimpl])) /* the performance model of this task is not * calibrated on this workerid either, rather run it * there if this one is low on scheduled tasks. */ || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end) ) { ntasks_best_end = ntasks_end; ntasks_best = workerid; nimpl_best = nimpl; } if (isnan(local_task_length[worker_ctx][nimpl])) /* we are calibrating, we want to speed-up calibration time * so we privilege non-calibrated tasks (but still * greedily distribute them to avoid dumb schedules) */ calibrating = 1; if (isnan(local_task_length[worker_ctx][nimpl]) || _STARPU_IS_ZERO(local_task_length[worker_ctx][nimpl])) /* there is no prediction available for that task * with that arch (yet or at all), so switch to a greedy strategy */ unknown = 1; if (unknown) continue; double task_starting_time = exp_start + prev_exp_len; if (local_data_penalty) task_starting_time = STARPU_MAX(task_starting_time, now + local_data_penalty[worker_ctx][nimpl]); exp_end[worker_ctx][nimpl] = task_starting_time + local_task_length[worker_ctx][nimpl]; if (exp_end[worker_ctx][nimpl] < best_exp_end_of_task) { /* a better solution was found */ best_exp_end_of_task = exp_end[worker_ctx][nimpl]; nimpl_best = nimpl; } if (local_energy) if (isnan(local_energy[worker_ctx][nimpl])) local_energy[worker_ctx][nimpl] = 0.; } worker_ctx++; } *forced_worker = unknown?ntasks_best:-1; *forced_impl = unknown?nimpl_best:-1; #ifdef STARPU_VERBOSE if (unknown) { dt->eager_task_cnt++; } #endif *min_exp_endp_of_task = best_exp_end_of_task; *max_exp_endp_of_workers = max_exp_end_of_workers; } static double _dmda_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id, unsigned da, unsigned simulate, unsigned sorted_decision) { /* find the queue */ int best = -1, best_in_ctx = -1; int selected_impl = 0; double model_best = 0.0; double transfer_model_best = 0.0; /* this flag is set if the corresponding worker is selected because there is no performance prediction available yet */ int forced_best = -1; int forced_impl = -1; struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); unsigned nworkers_ctx = workers->nworkers; double local_task_length[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; double local_data_penalty[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; double local_energy[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; /* Expected end of this task on the workers */ double exp_end[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; /* This is the minimum among the exp_end[] matrix */ double min_exp_end_of_task; /* This is the maximum termination time of already-scheduled tasks over all workers */ double max_exp_end_of_workers = 0.0; double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; compute_all_performance_predictions(task, nworkers_ctx, local_task_length, exp_end, &max_exp_end_of_workers, &min_exp_end_of_task, da ? local_data_penalty : NULL, da ? local_energy : NULL, &forced_best, &forced_impl, sched_ctx_id, sorted_decision); if (forced_best == -1) { double best_fitness = -1; unsigned worker_ctx = 0; struct starpu_sched_ctx_iterator it; workers->init_iterator_for_parallel_tasks(workers, &it, task); while(worker_ctx < nworkers_ctx && workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); unsigned nimpl; unsigned impl_mask; if (!starpu_worker_can_execute_task_impl(worker, task, &impl_mask)) continue; for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if (!(impl_mask & (1U << nimpl))) { /* no one on that queue may execute this task */ continue; } if (da) fitness[worker_ctx][nimpl] = dt->alpha * __s_alpha__value *(exp_end[worker_ctx][nimpl] - min_exp_end_of_task) + dt->beta * __s_beta__value *(local_data_penalty[worker_ctx][nimpl]) + dt->_gamma * __s_gamma__value *(local_energy[worker_ctx][nimpl]); else fitness[worker_ctx][nimpl] = exp_end[worker_ctx][nimpl] - min_exp_end_of_task; if (da && exp_end[worker_ctx][nimpl] > max_exp_end_of_workers) { /* This placement will make the computation * longer, take into account the idle * consumption of other cpus */ fitness[worker_ctx][nimpl] += dt->_gamma * __s_gamma__value * dt->idle_power * __s_idle_power__value * (exp_end[worker_ctx][nimpl] - max_exp_end_of_workers) / 1000000.0; /* Since gamma is the cost in us of one Joules, then d->idle_power * (exp_end - max_exp_end_of_workers) must be in Joules, thus the / 1000000.0 */ } if (best == -1 || fitness[worker_ctx][nimpl] < best_fitness) { /* we found a better solution */ best_fitness = fitness[worker_ctx][nimpl]; best = worker; best_in_ctx = worker_ctx; selected_impl = nimpl; //_STARPU_DEBUG("best fitness (worker %d) %e = alpha*(%e) + beta(%e) +gamma(%e)\n", worker, best_fitness, exp_end[worker][nimpl] - min_exp_end_of_task, local_data_penalty[worker][nimpl], local_energy[worker][nimpl]); } } worker_ctx++; } } STARPU_ASSERT(forced_best != -1 || best != -1); if (forced_best != -1) { /* there is no prediction available for that task * with that arch we want to speed-up calibration time * so we force this measurement */ best = forced_best; selected_impl = forced_impl; model_best = 0.0; transfer_model_best = 0.0; } else if (task->bundle) { struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(best_in_ctx, sched_ctx_id); unsigned memory_node = starpu_worker_get_memory_node(best); model_best = starpu_task_expected_length(task, perf_arch, selected_impl); if (da) transfer_model_best = starpu_task_expected_data_transfer_time(memory_node, task); } else { model_best = local_task_length[best_in_ctx][selected_impl]; if (da) transfer_model_best = local_data_penalty[best_in_ctx][selected_impl]; } //_STARPU_DEBUG("Scheduler dmda: kernel (%u)\n", selected_impl); starpu_task_set_implementation(task, selected_impl); starpu_sched_task_break(task); if(!simulate) { /* we should now have the best worker in variable "best" */ return push_task_on_best_worker(task, best, model_best, transfer_model_best, prio, sched_ctx_id); } else { return exp_end[best_in_ctx][selected_impl] ; } } static int dmda_push_sorted_decision_task(struct starpu_task *task) { return _dmda_push_task(task, 1, task->sched_ctx, 1, 0, 1); } static int dmda_push_sorted_task(struct starpu_task *task) { #ifdef STARPU_DEVEL #warning TODO: after defining a scheduling window, use that instead of empty_ctx_tasks #endif return _dmda_push_task(task, 1, task->sched_ctx, 1, 0, 0); } static int dm_push_task(struct starpu_task *task) { return _dmda_push_task(task, 0, task->sched_ctx, 0, 0, 0); } static double dm_simulate_push_task(struct starpu_task *task) { return _dmda_push_task(task, 0, task->sched_ctx, 0, 1, 0); } static int dmda_push_task(struct starpu_task *task) { STARPU_ASSERT(task); return _dmda_push_task(task, 0, task->sched_ctx, 1, 0, 0); } static double dmda_simulate_push_task(struct starpu_task *task) { STARPU_ASSERT(task); return _dmda_push_task(task, 0, task->sched_ctx, 1, 1, 0); } static double dmda_simulate_push_sorted_task(struct starpu_task *task) { STARPU_ASSERT(task); return _dmda_push_task(task, 1, task->sched_ctx, 1, 1, 0); } static double dmda_simulate_push_sorted_decision_task(struct starpu_task *task) { STARPU_ASSERT(task); return _dmda_push_task(task, 1, task->sched_ctx, 1, 1, 1); } #ifdef NOTIFY_READY_SOON static void dmda_notify_ready_soon(void *data STARPU_ATTRIBUTE_UNUSED, struct starpu_task *task, double delay) { if (!task->cl) return; /* fprintf(stderr, "task %lu %p %p %s %s will be ready within %f\n", starpu_task_get_job_id(task), task, task->cl, task->cl->name, task->cl->model?task->cl->model->symbol : NULL, delay); */ /* TODO: do something with it */ } #endif static void dmda_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned i; for (i = 0; i < nworkers; i++) { struct starpu_st_fifo_taskq *q; int workerid = workerids[i]; /* if the worker has alreadry belonged to this context the queue and the synchronization variables have been already initialized */ q = &dt->queue_array[workerid]; starpu_st_fifo_taskq_init(q); /* These are only stats, they can be read with races */ STARPU_HG_DISABLE_CHECKING(q->exp_start); STARPU_HG_DISABLE_CHECKING(q->exp_len); STARPU_HG_DISABLE_CHECKING(q->exp_end); if(dt->num_priorities != -1) { _STARPU_MALLOC(q->exp_len_per_priority, dt->num_priorities*sizeof(double)); _STARPU_MALLOC(q->ntasks_per_priority, dt->num_priorities*sizeof(unsigned)); int j; for(j = 0; j < dt->num_priorities; j++) { q->exp_len_per_priority[j] = 0.0; q->ntasks_per_priority[j] = 0; } } } } static void dmda_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned i; for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; if(dt->num_priorities != -1) { free(dt->queue_array[workerid].exp_len_per_priority); free(dt->queue_array[workerid].ntasks_per_priority); } } } static void initialize_dmda_policy(unsigned sched_ctx_id) { struct _starpu_dmda_data *dt; _STARPU_CALLOC(dt, 1, sizeof(struct _starpu_dmda_data)); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)dt); dt->alpha = starpu_getenv_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT); dt->beta = starpu_getenv_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT); /* data->_gamma: cost of one Joule in us. If gamma is set to 10^6, then one Joule cost 1s */ #ifdef STARPU_NON_BLOCKING_DRIVERS if (starpu_getenv("STARPU_SCHED_GAMMA")) _STARPU_DISP("Warning: STARPU_SCHED_GAMMA was used, but --enable-blocking-drivers configuration was not set, CPU cores will not actually be sleeping\n"); #endif dt->_gamma = starpu_getenv_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT); /* data->idle_power: Idle power of the whole machine in Watt */ dt->idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); if(starpu_sched_ctx_min_priority_is_set(sched_ctx_id) != 0 && starpu_sched_ctx_max_priority_is_set(sched_ctx_id) != 0) dt->num_priorities = starpu_sched_ctx_get_max_priority(sched_ctx_id) - starpu_sched_ctx_get_min_priority(sched_ctx_id) + 1; else dt->num_priorities = -1; #ifdef NOTIFY_READY_SOON starpu_task_notify_ready_soon_register(dmda_notify_ready_soon, dt); #endif } static void initialize_dmda_sorted_policy(unsigned sched_ctx_id) { initialize_dmda_policy(sched_ctx_id); /* The application may use any integer */ if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); } static void deinitialize_dmda_policy(unsigned sched_ctx_id) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); #ifdef STARPU_VERBOSE { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id); long int modelled_task_cnt = dt->total_task_cnt - dt->eager_task_cnt; _STARPU_DEBUG("%s sched policy (sched_ctx %u): total_task_cnt %ld ready_task_cnt %ld (%.1f%%), modelled_task_cnt = %ld (%.1f%%)%s\n", sched_ctx->sched_policy?sched_ctx->sched_policy->policy_name:"", sched_ctx_id, dt->total_task_cnt, dt->ready_task_cnt, (100.0f*dt->ready_task_cnt)/dt->total_task_cnt, modelled_task_cnt, (100.0f*modelled_task_cnt)/dt->total_task_cnt, modelled_task_cnt==0?" *** Check if performance models are enabled and converging on a per-codelet basis, or use an non-modeling scheduling policy. ***":""); } #endif free(dt); } /* dmda_pre_exec_hook is called right after the data transfer is done and right * before the computation to begin, it is useful to update more precisely the * value of the expected start, end, length, etc... */ static void dmda_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id) { unsigned workerid = starpu_worker_get_id_check(); struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; const double now = starpu_timing_now(); /* Once the task is executing, we can update the predicted amount * of work. */ starpu_worker_lock_self(); _starpu_fifo_task_started(fifo, task, dt->num_priorities); /* Take the opportunity to update start time */ fifo->exp_start = STARPU_MAX(now + fifo->pipeline_len, fifo->exp_start); fifo->exp_end = fifo->exp_start + fifo->exp_len; starpu_worker_unlock_self(); } static void _dm_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id, int da) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; /* Compute the expected penalty */ double predicted = starpu_task_worker_expected_length(task, perf_workerid, sched_ctx_id, starpu_task_get_implementation(task)); double predicted_transfer = NAN; if (da) predicted_transfer = starpu_task_expected_data_transfer_time_for(task, workerid); double now = starpu_timing_now(); /* Update the predictions */ starpu_worker_lock(workerid); fifo->pipeline_ntasks++; /* Sometimes workers didn't take the tasks as early as we expected */ fifo->exp_start = isnan(fifo->exp_start) ? now + fifo->pipeline_len : STARPU_MAX(fifo->exp_start, now); fifo->exp_end = fifo->exp_start + fifo->exp_len; if (da) { /* If there is no prediction available, we consider the task has a null length */ if (!isnan(predicted_transfer)) { if (now + predicted_transfer < fifo->exp_end) { /* We may hope that the transfer will be finished by * the start of the task. */ predicted_transfer = 0; } else { /* The transfer will not be finished by then, take the * remainder into account */ predicted_transfer = (now + predicted_transfer) - fifo->exp_end; } task->predicted_transfer = predicted_transfer; fifo->exp_end += predicted_transfer; fifo->exp_len += predicted_transfer; if(dt->num_priorities != -1) { int i; int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); for(i = 0; i <= task_prio; i++) fifo->exp_len_per_priority[i] += predicted_transfer; } } } /* If there is no prediction available, we consider the task has a null length */ if (!isnan(predicted)) { task->predicted = predicted; fifo->exp_end += predicted; fifo->exp_len += predicted; if(dt->num_priorities != -1) { int i; int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); for(i = 0; i <= task_prio; i++) fifo->exp_len_per_priority[i] += predicted; } } if(dt->num_priorities != -1) { int i; int task_prio = starpu_st_normalize_prio(task->priority, dt->num_priorities, task->sched_ctx); for(i = 0; i <= task_prio; i++) fifo->ntasks_per_priority[i]++; } fifo->ntasks++; starpu_worker_unlock(workerid); } static void dm_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id) { _dm_push_task_notify(task, workerid, perf_workerid, sched_ctx_id, 0); } static void dmda_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id) { _dm_push_task_notify(task, workerid, perf_workerid, sched_ctx_id, 1); } static void dmda_post_exec_hook(struct starpu_task * task, unsigned sched_ctx_id) { struct _starpu_dmda_data *dt = (struct _starpu_dmda_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned workerid = starpu_worker_get_id_check(); struct starpu_st_fifo_taskq *fifo = &dt->queue_array[workerid]; starpu_worker_lock_self(); _starpu_fifo_task_finished(fifo, task, dt->num_priorities); starpu_worker_unlock_self(); } struct starpu_sched_policy _starpu_sched_dm_policy = { .init_sched = initialize_dmda_policy, .deinit_sched = deinitialize_dmda_policy, .add_workers = dmda_add_workers , .remove_workers = dmda_remove_workers, .push_task = dm_push_task, .simulate_push_task = dm_simulate_push_task, .push_task_notify = dm_push_task_notify, .pop_task = dmda_pop_task, .pre_exec_hook = dmda_pre_exec_hook, .post_exec_hook = dmda_post_exec_hook, .policy_name = "dm", .policy_description = "performance model", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; struct starpu_sched_policy _starpu_sched_dmda_policy = { .init_sched = initialize_dmda_policy, .deinit_sched = deinitialize_dmda_policy, .add_workers = dmda_add_workers , .remove_workers = dmda_remove_workers, .push_task = dmda_push_task, .simulate_push_task = dmda_simulate_push_task, .push_task_notify = dmda_push_task_notify, .pop_task = dmda_pop_task, .pre_exec_hook = dmda_pre_exec_hook, .post_exec_hook = dmda_post_exec_hook, .policy_name = "dmda", .policy_description = "data-aware performance model", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; struct starpu_sched_policy _starpu_sched_dmda_prio_policy = { .init_sched = initialize_dmda_sorted_policy, .deinit_sched = deinitialize_dmda_policy, .add_workers = dmda_add_workers , .remove_workers = dmda_remove_workers, .push_task = dmda_push_sorted_task, .simulate_push_task = dmda_simulate_push_sorted_task, .push_task_notify = dmda_push_task_notify, .pop_task = dmda_pop_task, .pre_exec_hook = dmda_pre_exec_hook, .post_exec_hook = dmda_post_exec_hook, .policy_name = "dmdap", .policy_description = "data-aware performance model (priority)", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; struct starpu_sched_policy _starpu_sched_dmda_sorted_policy = { .init_sched = initialize_dmda_sorted_policy, .deinit_sched = deinitialize_dmda_policy, .add_workers = dmda_add_workers , .remove_workers = dmda_remove_workers, .push_task = dmda_push_sorted_task, .simulate_push_task = dmda_simulate_push_sorted_task, .push_task_notify = dmda_push_task_notify, .pop_task = dmda_pop_ready_task, .pre_exec_hook = dmda_pre_exec_hook, .post_exec_hook = dmda_post_exec_hook, .policy_name = "dmdas", .policy_description = "data-aware performance model (sorted)", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; struct starpu_sched_policy _starpu_sched_dmda_sorted_decision_policy = { .init_sched = initialize_dmda_sorted_policy, .deinit_sched = deinitialize_dmda_policy, .add_workers = dmda_add_workers , .remove_workers = dmda_remove_workers, .push_task = dmda_push_sorted_decision_task, .simulate_push_task = dmda_simulate_push_sorted_decision_task, .push_task_notify = dmda_push_task_notify, .pop_task = dmda_pop_ready_task, .pre_exec_hook = dmda_pre_exec_hook, .post_exec_hook = dmda_post_exec_hook, .policy_name = "dmdasd", .policy_description = "data-aware performance model (sorted decision)", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; struct starpu_sched_policy _starpu_sched_dmda_ready_policy = { .init_sched = initialize_dmda_policy, .deinit_sched = deinitialize_dmda_policy, .add_workers = dmda_add_workers , .remove_workers = dmda_remove_workers, .push_task = dmda_push_task, .simulate_push_task = dmda_simulate_push_task, .push_task_notify = dmda_push_task_notify, .pop_task = dmda_pop_ready_task, .pre_exec_hook = dmda_pre_exec_hook, .post_exec_hook = dmda_post_exec_hook, .policy_name = "dmdar", .policy_description = "data-aware performance model (ready)", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/eager_central_policy.c000066400000000000000000000147411507764646700241010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is just the trivial policy where every worker use the same * JOB QUEUE. */ #include #include #include #include #include #include struct _starpu_eager_center_policy_data { struct starpu_st_fifo_taskq fifo; starpu_pthread_mutex_t policy_mutex; struct starpu_bitmap waiters; }; static void initialize_eager_center_policy(unsigned sched_ctx_id) { struct _starpu_eager_center_policy_data *data; _STARPU_MALLOC(data, sizeof(struct _starpu_eager_center_policy_data)); /* there is only a single queue in that trivial design */ starpu_st_fifo_taskq_init(&data->fifo); starpu_bitmap_init(&data->waiters); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); } static void deinitialize_eager_center_policy(unsigned sched_ctx_id) { struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_st_fifo_taskq *fifo = &data->fifo; STARPU_ASSERT(starpu_task_list_empty(&fifo->taskq)); STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); free(data); } static int push_task_eager_policy(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_worker_relax_off(); starpu_task_list_push_back(&data->fifo.taskq,task); data->fifo.ntasks++; data->fifo.nprocessed++; if (_starpu_get_nsched_ctxs() > 1) { starpu_worker_relax_on(); _starpu_sched_ctx_lock_write(sched_ctx_id); starpu_worker_relax_off(); starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(task, sched_ctx_id); _starpu_sched_ctx_unlock_write(sched_ctx_id); } starpu_push_task_end(task); /*if there are no tasks block */ /* wake people waiting for a task */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; #ifndef STARPU_NON_BLOCKING_DRIVERS char dowake[STARPU_NMAXWORKERS] = { 0 }; #endif workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); #ifdef STARPU_NON_BLOCKING_DRIVERS if (!starpu_bitmap_get(&data->waiters, worker)) /* This worker is not waiting for a task */ continue; #endif if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) { /* It can execute this one, tell him! */ #ifdef STARPU_NON_BLOCKING_DRIVERS starpu_bitmap_unset(&data->waiters, worker); /* We really woke at least somebody, no need to wake somebody else */ break; #else dowake[worker] = 1; #endif } } /* Let the task free */ STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) /* Now that we have a list of potential workers, try to wake one */ workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); if (dowake[worker]) if (starpu_wake_worker_relax_light(worker)) break; // wake up a single worker } #endif return 0; } static struct starpu_task *pop_task_eager_policy(unsigned sched_ctx_id) { struct starpu_task *chosen_task = NULL; unsigned workerid = starpu_worker_get_id_check(); struct _starpu_eager_center_policy_data *data = (struct _starpu_eager_center_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); /* Here helgrind would shout that this is unprotected, this is just an * integer access, and we hold the sched mutex, so we can not miss any * wake up. */ if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_fifo_taskq_empty(&data->fifo)) { return NULL; } #ifdef STARPU_NON_BLOCKING_DRIVERS if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&data->waiters, workerid)) /* Nobody woke us, avoid bothering the mutex */ { return NULL; } #endif starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_worker_relax_off(); chosen_task = starpu_st_fifo_taskq_pop_task(&data->fifo, workerid); if (!chosen_task) /* Tell pushers that we are waiting for tasks for us */ starpu_bitmap_set(&data->waiters, workerid); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); if(chosen_task &&_starpu_get_nsched_ctxs() > 1) { starpu_worker_relax_on(); _starpu_sched_ctx_lock_write(sched_ctx_id); starpu_worker_relax_off(); starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(chosen_task, sched_ctx_id); if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, chosen_task)) chosen_task = NULL; _starpu_sched_ctx_unlock_write(sched_ctx_id); } return chosen_task; } static void eager_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { unsigned i; for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; int curr_workerid = _starpu_worker_get_id(); if(workerid != curr_workerid) starpu_wake_worker_locked(workerid); starpu_sched_ctx_worker_shares_tasks_lists(workerid, sched_ctx_id); } } struct starpu_sched_policy _starpu_sched_eager_policy = { .init_sched = initialize_eager_center_policy, .deinit_sched = deinitialize_eager_center_policy, .add_workers = eager_add_workers, .remove_workers = NULL, .push_task = push_task_eager_policy, .pop_task = pop_task_eager_policy, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "eager", .policy_description = "eager policy with a central queue", .worker_type = STARPU_WORKER_LIST, }; starpu-1.4.9+dfsg/src/sched_policies/eager_central_priority_policy.c000066400000000000000000000175431507764646700260450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is policy where every worker use the same JOB QUEUE, but taking * task priorities into account * * TODO: merge with eager, after checking the scalability */ #include #include #include #include #include #include #include #include struct _starpu_eager_central_prio_data { struct starpu_st_prio_deque taskq; starpu_pthread_mutex_t policy_mutex; struct starpu_bitmap waiters; }; /* * Centralized queue with priorities */ static void initialize_eager_center_priority_policy(unsigned sched_ctx_id) { struct _starpu_eager_central_prio_data *data; _STARPU_MALLOC(data, sizeof(struct _starpu_eager_central_prio_data)); /* only a single queue (even though there are several internally) */ starpu_st_prio_deque_init(&data->taskq); starpu_bitmap_init(&data->waiters); /* Tell helgrind that it's fine to check for empty fifo in * _starpu_priority_pop_task without actual mutex (it's just an * integer) */ STARPU_HG_DISABLE_CHECKING(data->taskq.ntasks); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); /* The application may use any integer */ if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); } static void deinitialize_eager_center_priority_policy(unsigned sched_ctx_id) { /* TODO check that there is no task left in the queue */ struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); /* deallocate the job queue */ starpu_st_prio_deque_destroy(&data->taskq); STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); free(data); } static int _starpu_priority_push_task(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_st_prio_deque *taskq = &data->taskq; starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_worker_relax_off(); starpu_st_prio_deque_push_back_task(taskq, task); if (_starpu_get_nsched_ctxs() > 1) { starpu_worker_relax_on(); _starpu_sched_ctx_lock_write(sched_ctx_id); starpu_worker_relax_off(); starpu_sched_ctx_list_task_counters_increment_all_ctx_locked(task, sched_ctx_id); _starpu_sched_ctx_unlock_write(sched_ctx_id); } starpu_push_task_end(task); /*if there are no tasks block */ /* wake people waiting for a task */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; #ifndef STARPU_NON_BLOCKING_DRIVERS char dowake[STARPU_NMAXWORKERS] = { 0 }; #endif workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); #ifdef STARPU_NON_BLOCKING_DRIVERS if (!starpu_bitmap_get(&data->waiters, worker)) /* This worker is not waiting for a task */ continue; #endif if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) { /* It can execute this one, tell him! */ #ifdef STARPU_NON_BLOCKING_DRIVERS starpu_bitmap_unset(&data->waiters, worker); /* We really woke at least somebody, no need to wake somebody else */ break; #else dowake[worker] = 1; #endif } } /* Let the task free */ STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) /* Now that we have a list of potential workers, try to wake one */ workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); if (dowake[worker]) if (starpu_wake_worker_relax_light(worker)) break; // wake up a single worker } #endif return 0; } static struct starpu_task *_starpu_priority_pop_task(unsigned sched_ctx_id) { struct starpu_task *chosen_task; unsigned workerid = starpu_worker_get_id_check(); struct starpu_task *skipped; struct _starpu_eager_central_prio_data *data = (struct _starpu_eager_central_prio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_st_prio_deque *taskq = &data->taskq; /* Here helgrind would shout that this is unprotected, this is just an * integer access, and we hold the sched mutex, so we can not miss any * wake up. */ if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_prio_deque_is_empty(taskq)) { return NULL; } #ifdef STARPU_NON_BLOCKING_DRIVERS if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&data->waiters, workerid)) /* Nobody woke us, avoid bothering the mutex */ { return NULL; } #endif starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_worker_relax_off(); chosen_task = starpu_st_prio_deque_pop_task_for_worker(taskq, workerid, &skipped); if (!chosen_task && skipped) { /* Notify another worker to do that task */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); if(worker != workerid && starpu_worker_can_execute_task_first_impl(worker, skipped, NULL)) { #ifdef STARPU_NON_BLOCKING_DRIVERS starpu_bitmap_unset(&data->waiters, worker); #else starpu_wake_worker_relax_light(worker); #endif } } } if (!chosen_task) /* Tell pushers that we are waiting for tasks for us */ starpu_bitmap_set(&data->waiters, workerid); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); if(chosen_task &&_starpu_get_nsched_ctxs() > 1) { starpu_worker_relax_on(); _starpu_sched_ctx_lock_write(sched_ctx_id); starpu_worker_relax_off(); starpu_sched_ctx_list_task_counters_decrement_all_ctx_locked(chosen_task, sched_ctx_id); if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, chosen_task)) chosen_task = NULL; _starpu_sched_ctx_unlock_write(sched_ctx_id); } return chosen_task; } static void eager_center_priority_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { unsigned i; for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; int curr_workerid = _starpu_worker_get_id(); if(workerid != curr_workerid) starpu_wake_worker_locked(workerid); starpu_sched_ctx_worker_shares_tasks_lists(workerid, sched_ctx_id); } } struct starpu_sched_policy _starpu_sched_prio_policy = { .add_workers = eager_center_priority_add_workers, .init_sched = initialize_eager_center_priority_policy, .deinit_sched = deinitialize_eager_center_priority_policy, /* we always use priorities in that policy */ .push_task = _starpu_priority_push_task, .pop_task = _starpu_priority_pop_task, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "prio", .policy_description = "eager (with priorities)", .worker_type = STARPU_WORKER_LIST, }; starpu-1.4.9+dfsg/src/sched_policies/fifo_queues.c000066400000000000000000000306631507764646700222420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * Copyright (C) 2013-2013 Simon Archipoff * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* FIFO queues, ready for use by schedulers */ #include #include #include #include #include #include /* static int is_sorted_task_list(struct starpu_task * task) { if(!task) return 1; struct starpu_task * next = task->next; if(!next) return 1; while(next) { if(task->priority < next->priority) return 0; task = next; next = next->next; } return 1; } */ void starpu_st_fifo_taskq_init(struct starpu_st_fifo_taskq *fifo) { /* note that not all mechanisms (eg. the semaphore) have to be used */ starpu_task_list_init(&fifo->taskq); fifo->ntasks = 0; fifo->pipeline_ntasks = 0; /* Tell helgrind that it's fine to check for empty fifo in * pop_task_graph_test_policy without actual mutex (it's just an integer) */ STARPU_HG_DISABLE_CHECKING(fifo->ntasks); fifo->nprocessed = 0; fifo->exp_start = starpu_timing_now(); fifo->exp_len = 0.0; fifo->exp_end = fifo->exp_start; fifo->exp_len_per_priority = NULL; fifo->pipeline_len = 0.0; STARPU_HG_DISABLE_CHECKING(fifo->exp_start); STARPU_HG_DISABLE_CHECKING(fifo->exp_len); STARPU_HG_DISABLE_CHECKING(fifo->exp_end); } struct starpu_st_fifo_taskq *starpu_st_fifo_taskq_create(void) { struct starpu_st_fifo_taskq *fifo; _STARPU_MALLOC(fifo, sizeof(struct starpu_st_fifo_taskq)); starpu_st_fifo_taskq_init(fifo); return fifo; } void starpu_st_fifo_taskq_destroy(struct starpu_st_fifo_taskq *fifo) { free(fifo); } int starpu_st_fifo_taskq_empty(struct starpu_st_fifo_taskq *fifo) { return fifo->ntasks == 0; } unsigned starpu_st_fifo_ntasks_get(struct starpu_st_fifo_taskq *fifo) { return fifo->ntasks; } void starpu_st_fifo_ntasks_inc(struct starpu_st_fifo_taskq *fifo, int n) { fifo->ntasks += n; } unsigned *starpu_st_fifo_ntasks_per_priority_get(struct starpu_st_fifo_taskq *fifo) { return fifo->ntasks_per_priority; } unsigned starpu_st_fifo_nprocessed_get(struct starpu_st_fifo_taskq *fifo) { return fifo->nprocessed; } void starpu_st_fifo_nprocessed_inc(struct starpu_st_fifo_taskq *fifo, int n) { fifo->nprocessed += n; } double starpu_st_fifo_exp_start_get(struct starpu_st_fifo_taskq *fifo) { return fifo->exp_start; } void starpu_st_fifo_exp_start_set(struct starpu_st_fifo_taskq *fifo, double exp_start) { fifo->exp_start = exp_start; } double starpu_st_fifo_exp_end_get(struct starpu_st_fifo_taskq *fifo) { return fifo->exp_end; } void starpu_st_fifo_exp_end_set(struct starpu_st_fifo_taskq *fifo, double exp_end) { fifo->exp_end = exp_end; } double starpu_st_fifo_exp_len_get(struct starpu_st_fifo_taskq *fifo) { return fifo->exp_len; } void starpu_st_fifo_exp_len_set(struct starpu_st_fifo_taskq *fifo, double exp_len) { fifo->exp_len = exp_len; } void starpu_st_fifo_exp_len_inc(struct starpu_st_fifo_taskq *fifo, double exp_len) { fifo->exp_len += exp_len; } double *starpu_st_fifo_exp_len_per_priority_get(struct starpu_st_fifo_taskq *fifo) { return fifo->exp_len_per_priority; } double starpu_st_fifo_pipeline_len_get(struct starpu_st_fifo_taskq *fifo) { return fifo->pipeline_len; } void starpu_st_fifo_pipeline_len_set(struct starpu_st_fifo_taskq *fifo, double pipeline_len) { fifo->pipeline_len = pipeline_len; } void starpu_st_fifo_pipeline_len_inc(struct starpu_st_fifo_taskq *fifo, double pipeline_len) { fifo->pipeline_len += pipeline_len; } double starpu_st_fifo_taskq_get_exp_len_prev_task_list(struct starpu_st_fifo_taskq *fifo_queue, struct starpu_task *task, int workerid, int nimpl, int *fifo_ntasks) { struct starpu_task_list *list = &fifo_queue->taskq; struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, task->sched_ctx); double exp_len = fifo_queue->pipeline_len; if (list->_head != NULL) { struct starpu_task *current = list->_head; struct starpu_task *prev = NULL; if (list->_head->priority == task->priority && list->_head->priority == list->_tail->priority) { /* They all have the same priority, the task's place is at the end */ prev = list->_tail; current = NULL; } else while (current) { if (current->priority < task->priority) break; prev = current; current = current->next; } if (prev != NULL) { if (current) { /* the task's place is between prev and current */ struct starpu_task *it; *fifo_ntasks = fifo_queue->pipeline_ntasks; for(it = list->_head; it != current; it = it->next) { exp_len += starpu_task_expected_length(it, perf_arch, nimpl); (*fifo_ntasks) ++; } } else { /* the task's place is at the _tail of the list */ exp_len = fifo_queue->exp_len; *fifo_ntasks = fifo_queue->ntasks + fifo_queue->pipeline_ntasks; } } } return exp_len; } int starpu_st_fifo_taskq_push_sorted_task(struct starpu_st_fifo_taskq *fifo_queue, struct starpu_task *task) { struct starpu_task_list *list = &fifo_queue->taskq; if (list->_head == NULL) { list->_head = task; list->_tail = task; task->prev = NULL; task->next = NULL; } else if (list->_head->priority == task->priority && list->_head->priority == list->_tail->priority) { /* They all have the same priority, just put at the end */ list->_tail->next = task; task->next = NULL; task->prev = list->_tail; list->_tail = task; } else { struct starpu_task *current = list->_head; struct starpu_task *prev = NULL; while (current) { if (current->priority < task->priority) break; prev = current; current = current->next; } if (prev == NULL) { /* Insert at the front of the list */ list->_head->prev = task; task->prev = NULL; task->next = list->_head; list->_head = task; } else { if (current) { /* Insert between prev and current */ task->prev = prev; prev->next = task; task->next = current; current->prev = task; } else { /* Insert at the _tail of the list */ list->_tail->next = task; task->next = NULL; task->prev = list->_tail; list->_tail = task; } } } fifo_queue->ntasks++; fifo_queue->nprocessed++; return 0; } int starpu_st_fifo_taskq_push_task(struct starpu_st_fifo_taskq *fifo_queue, struct starpu_task *task) { if (task->priority > 0) { starpu_st_fifo_taskq_push_sorted_task(fifo_queue, task); } else { starpu_task_list_push_back(&fifo_queue->taskq, task); fifo_queue->ntasks++; fifo_queue->nprocessed++; } return 0; } int starpu_st_fifo_taskq_push_back_task(struct starpu_st_fifo_taskq *fifo_queue, struct starpu_task *task) { if (task->priority > 0) { starpu_st_fifo_taskq_push_sorted_task(fifo_queue, task); } else { starpu_task_list_push_front(&fifo_queue->taskq, task); fifo_queue->ntasks++; } return 0; } int starpu_st_fifo_taskq_pop_this_task(struct starpu_st_fifo_taskq *fifo_queue, int workerid, struct starpu_task *task) { unsigned nimpl = 0; STARPU_ASSERT(task); #ifdef STARPU_DEBUG STARPU_ASSERT(starpu_task_list_ismember(&fifo_queue->taskq, task)); #endif if (workerid < 0 || starpu_worker_can_execute_task_first_impl(workerid, task, &nimpl)) { starpu_task_set_implementation(task, nimpl); starpu_task_list_erase(&fifo_queue->taskq, task); fifo_queue->ntasks--; return 1; } return 0; } struct starpu_task *starpu_st_fifo_taskq_pop_task(struct starpu_st_fifo_taskq *fifo_queue, int workerid) { struct starpu_task *task; for (task = starpu_task_list_begin(&fifo_queue->taskq); task != starpu_task_list_end(&fifo_queue->taskq); task = starpu_task_list_next(task)) { if (starpu_st_fifo_taskq_pop_this_task(fifo_queue, workerid, task)) return task; } return NULL; } struct starpu_task *starpu_st_fifo_taskq_pop_local_task(struct starpu_st_fifo_taskq *fifo_queue) { struct starpu_task *task = NULL; if (!starpu_task_list_empty(&fifo_queue->taskq)) { task = starpu_task_list_pop_front(&fifo_queue->taskq); fifo_queue->ntasks--; } return task; } int starpu_st_normalize_prio(int priority, int num_priorities, unsigned sched_ctx_id) { int min = starpu_sched_ctx_get_min_priority(sched_ctx_id); int max = starpu_sched_ctx_get_max_priority(sched_ctx_id); return ((num_priorities-1)/(max-min)) * (priority - min); } void starpu_st_non_ready_buffers_size(struct starpu_task *task, unsigned worker, size_t *non_readyp, size_t *non_loadingp, size_t *non_allocatedp) { size_t non_ready = 0, non_loading = 0, non_allocated = 0; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned index; for (index = 0; index < nbuffers; index++) { starpu_data_handle_t handle; enum starpu_data_access_mode mode; int buffer_node = _starpu_task_data_get_node_on_worker(task, index, worker); if (buffer_node < 0) continue; handle = STARPU_TASK_GET_HANDLE(task, index); mode = STARPU_TASK_GET_MODE(task, index); if ((mode & STARPU_SCRATCH) || (mode & STARPU_REDUX)) continue; int is_allocated, is_valid, is_loading; starpu_data_query_status2(handle, buffer_node, &is_allocated, &is_valid, &is_loading, NULL); if (!is_allocated) non_allocated+=starpu_data_get_size(handle); if (mode & STARPU_R && !is_valid) { non_ready+=starpu_data_get_size(handle); if (!is_loading) non_loading+=starpu_data_get_size(handle); } } *non_readyp = non_ready; *non_loadingp = non_loading; *non_allocatedp = non_allocated; } int starpu_st_non_ready_buffers_count(struct starpu_task *task, unsigned worker) { int cnt = 0; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); unsigned index; for (index = 0; index < nbuffers; index++) { starpu_data_handle_t handle; enum starpu_data_access_mode mode; int buffer_node = _starpu_task_data_get_node_on_worker(task, index, worker); if (buffer_node < 0) continue; handle = STARPU_TASK_GET_HANDLE(task, index); mode = STARPU_TASK_GET_MODE(task, index); if ((mode & STARPU_SCRATCH) || (mode & STARPU_REDUX)) continue; int is_valid; starpu_data_query_status(handle, buffer_node, NULL, &is_valid, NULL); if (!is_valid) cnt++; } return cnt; } struct starpu_task *starpu_st_fifo_taskq_pop_first_ready_task(struct starpu_st_fifo_taskq *fifo_queue, unsigned workerid, int num_priorities) { struct starpu_task *task = NULL, *current; if (fifo_queue->ntasks == 0) return NULL; if (fifo_queue->ntasks > 0) { fifo_queue->ntasks--; task = starpu_task_list_front(&fifo_queue->taskq); if (STARPU_UNLIKELY(!task)) return NULL; int first_task_priority = task->priority; size_t non_ready_best = SIZE_MAX; size_t non_loading_best = SIZE_MAX; size_t non_allocated_best = SIZE_MAX; for (current = task; current; current = current->next) { int priority = current->priority; if (priority >= first_task_priority) { size_t non_ready, non_loading, non_allocated; starpu_st_non_ready_buffers_size(current, workerid, &non_ready, &non_loading, &non_allocated); if (non_ready < non_ready_best) { non_ready_best = non_ready; non_loading_best = non_loading; non_allocated_best = non_allocated; task = current; if (non_ready == 0 && non_allocated == 0) break; } else if (non_ready == non_ready_best) { if (non_loading < non_loading_best) { non_loading_best = non_loading; non_allocated_best = non_allocated; task = current; } else if (non_loading == non_loading_best) { if (non_allocated < non_allocated_best) { non_allocated_best = non_allocated; task = current; } } } } } if(num_priorities != -1) { int i; int task_prio = starpu_st_normalize_prio(task->priority, num_priorities, task->sched_ctx); for(i = 0; i <= task_prio; i++) fifo_queue->ntasks_per_priority[i]--; } starpu_task_list_erase(&fifo_queue->taskq, task); } return task; } starpu-1.4.9+dfsg/src/sched_policies/fifo_queues.h000066400000000000000000000035151507764646700222430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __FIFO_QUEUES_H__ #define __FIFO_QUEUES_H__ #include /** @file */ struct starpu_st_fifo_taskq { /** the actual list */ struct starpu_task_list taskq; /** the number of tasks currently in the queue */ unsigned ntasks; /** the number of tasks already pushed to the worker */ unsigned pipeline_ntasks; /** the number of tasks currently in the queue corresponding to each priority */ unsigned *ntasks_per_priority; /** the number of tasks that were processed */ unsigned nprocessed; /** only meaningful if the queue is only used by a single worker */ double exp_start; /** Expected start date of next item to do in the * queue (i.e. not started yet). This is thus updated * when we start it. */ double exp_end; /** Expected end date of last task in the queue */ double exp_len; /** Expected duration of the set of tasks in the queue */ double *exp_len_per_priority; /** Expected duration of the set of tasks in the queue corresponding to each priority */ double pipeline_len; /** the expected duration of what is already pushed to the worker */ }; #endif /* __FIFO_QUEUES_H__ */ starpu-1.4.9+dfsg/src/sched_policies/graph_test_policy.c000066400000000000000000000273371507764646700234530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is just a test policy for using task graph information * * We keep tasks in the fifo queue, and store the graph of tasks, until we * get the do_schedule call from the application, which tells us all tasks * were queued, and we can now compute task depths or descendants and let a simple * central-queue greedy algorithm proceed. * * TODO: let workers starting running tasks before the whole graph is submitted? */ #include #include #include #include #include #include #include #include #include struct _starpu_graph_test_policy_data { struct starpu_st_fifo_taskq fifo; /* Bag of tasks which are ready before do_schedule is called */ struct starpu_st_prio_deque prio_cpu; struct starpu_st_prio_deque prio_gpu; starpu_pthread_mutex_t policy_mutex; struct starpu_bitmap waiters; unsigned computed; unsigned descendants; /* Whether we use descendants, or depths, for priorities */ }; static void initialize_graph_test_policy(unsigned sched_ctx_id) { struct _starpu_graph_test_policy_data *data; _STARPU_MALLOC(data, sizeof(struct _starpu_graph_test_policy_data)); /* there is only a single queue in that trivial design */ starpu_st_fifo_taskq_init(&data->fifo); starpu_st_prio_deque_init(&data->prio_cpu); starpu_st_prio_deque_init(&data->prio_gpu); starpu_bitmap_init(&data->waiters); data->computed = 0; data->descendants = starpu_getenv_number_default("STARPU_SCHED_GRAPH_TEST_DESCENDANTS", 0); _starpu_graph_record = 1; starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); } static void deinitialize_graph_test_policy(unsigned sched_ctx_id) { struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_st_fifo_taskq *fifo = &data->fifo; STARPU_ASSERT(starpu_task_list_empty(&fifo->taskq)); /* deallocate the job queue */ starpu_st_prio_deque_destroy(&data->prio_cpu); starpu_st_prio_deque_destroy(&data->prio_gpu); _starpu_graph_record = 0; STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); free(data); } /* Push the given task on CPU or GPU prio list, using a dumb heuristic */ static struct starpu_st_prio_deque *select_prio(unsigned sched_ctx_id, struct _starpu_graph_test_policy_data *data, struct starpu_task *task) { int cpu_can = 0, gpu_can = 0; double cpu_speed = 0.; double gpu_speed = 0.; /* Compute how fast CPUs can compute it, and how fast GPUs can compute it */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); if (!starpu_worker_can_execute_task(worker, task, 0)) /* This worker can not execute this task, don't count it */ continue; if (starpu_worker_get_type(worker) == STARPU_CPU_WORKER) /* At least one CPU can run it */ cpu_can = 1; else /* At least one GPU can run it */ gpu_can = 1; /* Get expected task duration for this worker */ struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker, sched_ctx_id); double length = starpu_task_expected_length(task, perf_arch, 0); double power; if (isnan(length)) /* We don't have an estimation yet */ length = 0.; if (length == 0.) { if (!task->cl || task->cl->model == NULL) { static unsigned _warned; STARPU_HG_DISABLE_CHECKING(_warned); if (STARPU_ATOMIC_ADD(&_warned, 1) == 1) { _STARPU_DISP("Warning: graph_test needs performance models for all tasks, including %s\n", starpu_task_get_name(task)); } else { (void)STARPU_ATOMIC_ADD(&_warned, -1); } } power = 0.; } else power = 1./length; /* Add the computation power to the CPU or GPU pool */ if (starpu_worker_get_type(worker) == STARPU_CPU_WORKER) cpu_speed += power; else gpu_speed += power; } /* Decide to push on CPUs or GPUs depending on the overall computation power */ if (!gpu_can || (cpu_can && cpu_speed > gpu_speed)) return &data->prio_cpu; else return &data->prio_gpu; } static void set_priority(void *_data, struct _starpu_graph_node *node) { struct _starpu_graph_test_policy_data *data = _data; starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&node->mutex); starpu_worker_relax_off(); struct _starpu_job *job = node->job; if (job) { if (data->descendants) job->task->priority = node->descendants; else job->task->priority = node->depth; } STARPU_PTHREAD_MUTEX_UNLOCK(&node->mutex); } static void do_schedule_graph_test_policy(unsigned sched_ctx_id) { struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_worker_relax_off(); if (data->descendants) _starpu_graph_compute_descendants(); else _starpu_graph_compute_depths(); if (data->computed == 0) { data->computed = 1; /* FIXME: if data->computed already == 1, some tasks may already have been pushed to priority stage '0' in * push_task_graph_test_policy, then if we change the priority here, the stage lookup to remove the task * will get the wrong stage */ _starpu_graph_foreach(set_priority, data); } /* Now that we have priorities, move tasks from bag to priority queue */ while(!starpu_st_fifo_taskq_empty(&data->fifo)) { struct starpu_task *task = starpu_st_fifo_taskq_pop_task(&data->fifo, -1); struct starpu_st_prio_deque *prio = select_prio(sched_ctx_id, data, task); starpu_st_prio_deque_push_back_task(prio, task); } /* And unleash the beast! */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; #ifdef STARPU_NON_BLOCKING_DRIVERS workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { /* Tell each worker is shouldn't sleep any more */ unsigned worker = workers->get_next(workers, &it); starpu_bitmap_unset(&data->waiters, worker); } #endif STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { /* Wake each worker */ unsigned worker = workers->get_next(workers, &it); starpu_wake_worker_relax_light(worker); } #endif } static int push_task_graph_test_policy(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_worker_relax_off(); if (!data->computed) { /* Priorities are not computed, leave the task in the bag for now */ starpu_task_list_push_back(&data->fifo.taskq,task); data->fifo.ntasks++; data->fifo.nprocessed++; starpu_push_task_end(task); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); return 0; } /* Priorities are computed, we can push to execution */ struct starpu_st_prio_deque *prio = select_prio(sched_ctx_id, data, task); starpu_st_prio_deque_push_back_task(prio, task); starpu_push_task_end(task); /*if there are no tasks block */ /* wake people waiting for a task */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; #ifndef STARPU_NON_BLOCKING_DRIVERS char dowake[STARPU_NMAXWORKERS] = { 0 }; #endif workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); #ifdef STARPU_NON_BLOCKING_DRIVERS if (!starpu_bitmap_get(&data->waiters, worker)) /* This worker is not waiting for a task */ continue; #endif if (prio == &data->prio_cpu && starpu_worker_get_type(worker) != STARPU_CPU_WORKER) /* This worker doesn't pop from the queue we have filled */ continue; if (prio == &data->prio_gpu && starpu_worker_get_type(worker) == STARPU_CPU_WORKER) /* This worker doesn't pop from the queue we have filled */ continue; if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) { /* It can execute this one, tell him! */ #ifdef STARPU_NON_BLOCKING_DRIVERS starpu_bitmap_unset(&data->waiters, worker); /* We really woke at least somebody, no need to wake somebody else */ break; #else dowake[worker] = 1; #endif } } /* Let the task free */ STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) /* Now that we have a list of potential workers, try to wake one */ workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); if (dowake[worker]) { if (starpu_wake_worker_relax_light(worker)) break; // wake up a single worker } } #endif return 0; } static struct starpu_task *pop_task_graph_test_policy(unsigned sched_ctx_id) { struct starpu_task *chosen_task = NULL; unsigned workerid = starpu_worker_get_id_check(); struct _starpu_graph_test_policy_data *data = (struct _starpu_graph_test_policy_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_st_prio_deque *prio; if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) prio = &data->prio_cpu; else prio = &data->prio_gpu; /* block until some event happens */ /* Here helgrind would shout that this is unprotected, this is just an * integer access, and we hold the sched mutex, so we can not miss any * wake up. */ if (!STARPU_RUNNING_ON_VALGRIND && starpu_st_prio_deque_is_empty(prio)) return NULL; #ifdef STARPU_NON_BLOCKING_DRIVERS if (!STARPU_RUNNING_ON_VALGRIND && !data->computed) /* Not computed yet */ return NULL; if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&data->waiters, workerid)) /* Nobody woke us, avoid bothering the mutex */ return NULL; #endif starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_worker_relax_off(); if (!data->computed) { STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); return NULL; } chosen_task = starpu_st_prio_deque_pop_task_for_worker(prio, workerid, NULL); if (!chosen_task) /* Tell pushers that we are waiting for tasks for us */ starpu_bitmap_set(&data->waiters, workerid); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); return chosen_task; } struct starpu_sched_policy _starpu_sched_graph_test_policy = { .init_sched = initialize_graph_test_policy, .deinit_sched = deinitialize_graph_test_policy, .do_schedule = do_schedule_graph_test_policy, .push_task = push_task_graph_test_policy, .pop_task = pop_task_graph_test_policy, .policy_name = "graph_test", .policy_description = "test policy for using graphs in scheduling decisions", .worker_type = STARPU_WORKER_LIST, }; starpu-1.4.9+dfsg/src/sched_policies/helper_mct.c000066400000000000000000000215321507764646700220450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * Copyright (C) 2020-2020 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "helper_mct.h" #include /* Alpha, Beta and Gamma are MCT-specific values, which allows the * user to set more precisely the weight of each computing value. * Beta, for example, controls the weight of communications between * memories for the computation of the best component to choose. */ #define _STARPU_SCHED_ALPHA_DEFAULT 1.0 #define _STARPU_SCHED_BETA_DEFAULT 1.0 #define _STARPU_SCHED_GAMMA_DEFAULT 1000.0 struct _starpu_mct_data *starpu_mct_init_parameters(struct starpu_sched_component_mct_data *params) { struct _starpu_mct_data *data; _STARPU_MALLOC(data, sizeof(*data)); if (params) { data->alpha = params->alpha; data->beta = params->beta; /* data->_gamma: cost of one Joule in us. If gamma is set to 10^6, then one Joule cost 1s */ data->_gamma = params->_gamma; /* data->idle_power: Idle power of the whole machine in Watt */ data->idle_power = params->idle_power; } else { data->alpha = starpu_getenv_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT); data->beta = starpu_getenv_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT); #ifdef STARPU_NON_BLOCKING_DRIVERS if (starpu_getenv("STARPU_SCHED_GAMMA")) _STARPU_DISP("Warning: STARPU_SCHED_GAMMA was used, but --enable-blocking-drivers configuration was not set, CPU cores will not actually be sleeping\n"); #endif data->_gamma = starpu_getenv_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT); data->idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); } return data; } /* compute predicted_end by taking into account the case of the predicted transfer and the predicted_end overlap */ static double compute_expected_time(double now, double predicted_end, double predicted_length, double predicted_transfer) { STARPU_ASSERT(!isnan(now + predicted_end + predicted_length + predicted_transfer)); STARPU_ASSERT_MSG(now >= 0.0 && predicted_end >= 0.0 && predicted_length >= 0.0 && predicted_transfer >= 0.0, "now=%lf, predicted_end=%lf, predicted_length=%lf, predicted_transfer=%lf\n", now, predicted_end, predicted_length, predicted_transfer); /* TODO: actually schedule transfers */ /* Compute the transfer time which will not be overlapped */ /* However, no modification in calling function so that the whole transfer time is counted as a penalty */ if (now + predicted_transfer < predicted_end) { /* We may hope that the transfer will be finished by * the start of the task. */ predicted_transfer = 0; } else { /* The transfer will not be finished by then, take the * remainder into account */ predicted_transfer -= (predicted_end - now); } predicted_end += predicted_transfer; predicted_end += predicted_length; return predicted_end; } double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end_of_task, double max_exp_end_of_workers, double transfer_len, double local_energy) { if(isnan(local_energy)) /* Energy not calibrated yet, but we cannot do this * automatically anyway, so ignoring this for now */ local_energy = 0.; /* Note: the expected end includes the data transfer duration, which we want to be able to tune separately */ /* min_exp_end_of_task is the minimum end time of the task over all workers */ double fitness = d->alpha * (exp_end - min_exp_end_of_task) + d->beta * transfer_len + d->_gamma * local_energy; /* max_exp_end is the maximum end time of the workers. If the total execution time is increased, then an additional energy penalty must be considered*/ if(exp_end > max_exp_end_of_workers) fitness += d->_gamma * d->idle_power * (exp_end - max_exp_end_of_workers) / 1000000.0; /* Since gamma is the cost in us of one Joules, then d->idle_power * (exp_end - max_exp_end) must be in Joules, thus the / 1000000.0 */ return fitness; } unsigned starpu_mct_compute_execution_times(struct starpu_sched_component *component, struct starpu_task *task, double *estimated_lengths, double *estimated_transfer_length, unsigned *suitable_components) { unsigned nsuitable_components = 0; unsigned i; for(i = 0; i < component->nchildren; i++) { struct starpu_sched_component * c = component->children[i]; /* Silence static analysis warnings */ estimated_lengths[i] = NAN; estimated_transfer_length[i] = NAN; if(starpu_sched_component_execute_preds(c, task, estimated_lengths + i)) { if(isnan(estimated_lengths[i])) /* The perfmodel had been purged since the task was pushed * onto the mct component. */ continue; STARPU_ASSERT_MSG(estimated_lengths[i]>=0, "component=%p, child[%u]=%p, estimated_lengths[%u]=%lf\n", component, i, c, i, estimated_lengths[i]); estimated_transfer_length[i] = starpu_sched_component_transfer_length(c, task); suitable_components[nsuitable_components++] = i; } } return nsuitable_components; } void starpu_mct_compute_expected_times(struct starpu_sched_component *component, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task, double *min_exp_end_of_task, double *max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components) { unsigned i; double now = starpu_timing_now(); *min_exp_end_of_task = DBL_MAX; *max_exp_end_of_workers = 0.0; for(i = 0; i < nsuitable_components; i++) { unsigned icomponent = suitable_components[i]; struct starpu_sched_component * c = component->children[icomponent]; /* Estimated availability of worker */ double estimated_end = c->estimated_end(c); if (estimated_end < now) estimated_end = now; estimated_ends_with_task[icomponent] = compute_expected_time(now, estimated_end, estimated_lengths[icomponent], estimated_transfer_length[icomponent]); /* estimated_ends_with_task[icomponent]: estimated end of execution on the worker icomponent estimated_end: estimatated end of the worker min_exp_end_of_task: minimum estimated execution time of the task over all workers max_exp_end_of_workers: maximum estimated end of the already-scheduled tasks over all workers */ if(estimated_ends_with_task[icomponent] < *min_exp_end_of_task) *min_exp_end_of_task = estimated_ends_with_task[icomponent]; if(estimated_end > *max_exp_end_of_workers) *max_exp_end_of_workers = estimated_end; } } /* This function retrieves the energy consumption of a task in Joules*/ void starpu_mct_compute_energy(struct starpu_sched_component *component, struct starpu_task *task , double *local_energy, unsigned *suitable_components, unsigned nsuitable_components) { unsigned i; for(i = 0; i < nsuitable_components; i++) { unsigned icomponent = suitable_components[i]; int nimpl = 0; local_energy[icomponent] = starpu_task_worker_expected_energy(task, icomponent, component->tree->sched_ctx_id, nimpl); for (nimpl = 1; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { double e; e = starpu_task_worker_expected_energy(task, icomponent, component->tree->sched_ctx_id, nimpl); if (e < local_energy[icomponent]) local_energy[icomponent] = e; } } } int starpu_mct_get_best_component(struct _starpu_mct_data *d, struct starpu_task *task, double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task, double *local_energy, double min_exp_end_of_task, double max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components) { double best_fitness = DBL_MAX; int best_icomponent = -1; unsigned i; for(i = 0; i < nsuitable_components; i++) { int icomponent = suitable_components[i]; double tmp = starpu_mct_compute_fitness(d, estimated_ends_with_task[icomponent], min_exp_end_of_task, max_exp_end_of_workers, estimated_transfer_length[icomponent], local_energy[icomponent]); if(tmp < best_fitness) { best_fitness = tmp; best_icomponent = icomponent; } } if (best_icomponent != -1) { task->predicted = estimated_lengths[best_icomponent]; task->predicted_transfer = estimated_transfer_length[best_icomponent]; } return best_icomponent; } starpu-1.4.9+dfsg/src/sched_policies/helper_mct.h000066400000000000000000000050731507764646700220540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020-2020 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #pragma GCC visibility push(hidden) /** @file */ struct _starpu_mct_data { double alpha; double beta; double _gamma; double idle_power; starpu_pthread_mutex_t scheduling_mutex; }; struct _starpu_mct_data *starpu_mct_init_parameters(struct starpu_sched_component_mct_data *params); unsigned starpu_mct_compute_execution_times(struct starpu_sched_component *component, struct starpu_task *task, double *estimated_lengths, double *estimated_transfer_length, unsigned *suitable_components); void starpu_mct_compute_expected_times(struct starpu_sched_component *component, struct starpu_task *task, double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task, double *min_exp_end_of_task, double *max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components); double starpu_mct_compute_fitness(struct _starpu_mct_data * d, double exp_end, double min_exp_end, double max_exp_end, double transfer_len, double local_energy); int starpu_mct_get_best_component(struct _starpu_mct_data *d, struct starpu_task *task, double *estimated_lengths, double *estimated_transfer_length, double *estimated_ends_with_task, double *local_energy, double min_exp_end_of_task, double max_exp_end_of_workers, unsigned *suitable_components, unsigned nsuitable_components); void starpu_mct_compute_energy(struct starpu_sched_component *component, struct starpu_task *task , double *local_energy, unsigned *suitable_components, unsigned nsuitable_components); int eager_calibration_push_task(struct starpu_sched_component * component, struct starpu_task * task); #pragma GCC visibility pop starpu-1.4.9+dfsg/src/sched_policies/heteroprio.c000066400000000000000000003767551507764646700221270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Distributed queues using performance modeling to assign tasks */ #include #include #include #include #include #include #include "heteroprio.h" #include #include #include #include #include #include #include #include #include #include #ifndef DBL_MIN #define DBL_MIN __DBL_MIN__ #endif #ifndef DBL_MAX #define DBL_MAX __DBL_MAX__ #endif #define STARPU_NB_TYPES STARPU_NARCH #define STR_MAX_SIZE 64 #define STRINGIFY(x) _STR(x) #define _STR(x) #x /** Push strategy for use_locality */ enum laheteroprio_push_strategy { PUSH_LS_SDH, PUSH_LS_SDH2, PUSH_LS_SDHB, PUSH_LC_SMWB, PUSH_NB_AUTO, // Always last to limit auto PUSH_LcS, PUSH_WORKER, PUSH_AUTO }; /** Queue used when use_locality is enabled */ struct laqueue { unsigned char* data; long int capacity; long int current_index; long int size_of_element; }; static struct laqueue laqueue_init(const long int size_of_element); static void laqueue_destroy(struct laqueue* q); //static long int laqueue_size(struct laqueue* q); static void laqueue_push(struct laqueue* q, void* data); static void* laqueue_pop(struct laqueue* q); //static void* laqueue_top(struct laqueue* q); struct starpu_laheteroprio_access_item { unsigned prio_idx; unsigned wgroup_idx; }; static struct laqueue laqueue_init(const long int size_of_element) { struct laqueue q; q.data = NULL; q.capacity = 0; q.current_index = 0; q.size_of_element = size_of_element; return q; } static void laqueue_destroy(struct laqueue* q) { STARPU_ASSERT(q->current_index == 0); free(q->data); } //static long int laqueue_size(struct laqueue* q) //{ // return q->capacity; //} static void laqueue_push(struct laqueue* q, void* data) { if(q->current_index == q->capacity) { q->capacity = (q->capacity+10)*2; _STARPU_REALLOC(q->data, q->size_of_element*q->capacity); } memcpy(&q->data[(q->current_index++)*q->size_of_element], data, q->size_of_element); } static void* laqueue_pop(struct laqueue* q) { STARPU_ASSERT(q->current_index-1 >= 0); unsigned char* data = &q->data[(q->current_index-1)*q->size_of_element]; q->current_index -= 1; return data; } //static void* laqueue_top(struct laqueue* q) //{ // STARPU_ASSERT(q->current_index-1 >= 0); // return &q->data[(q->current_index-1)*q->size_of_element]; //} /** How are codelet grouped by priority */ enum autoheteroprio_codelet_grouping_strategy { BY_PERF_MODEL_OR_NAME = 0, /** Using perfmodel symbol or codelet's name if no perfmodel */ BY_NAME_ONLY = 1 /** Based on the codelet's name only */ }; /* A bucket corresponds to a Pair of priorities * When a task is pushed with a priority X, it will be stored * into the bucket X. * All the tasks stored in the fifo should be computable by the arch * in valid_archs. * For example if valid_archs = (STARPU_CPU|STARPU_CUDA) * Then task->task->where should be at least (STARPU_CPU|STARPU_CUDA) */ struct _heteroprio_bucket { /* Tasks of the current bucket */ /* In case data locality is NOT used, only the first element of the array is used */ /* In case data locality IS used, the element refers to a worker group */ struct starpu_task_list tasks_queue[LAHETEROPRIO_MAX_WORKER_GROUPS]; /* The correct arch for the current bucket */ unsigned valid_archs; /* The slow factors for any archs */ float slow_factors_per_index[STARPU_NB_TYPES]; /* The base arch for the slow factor (the fatest arch for the current task in the bucket */ unsigned factor_base_arch_index; /**** Fields used when use_locality == 1 : ****/ /* the number of tasks in all the queues (was previously tasks_queue.ntasks) */ unsigned tasks_queue_ntasks; /* to keep track of the mn at push time */ struct laqueue auto_mn[LAHETEROPRIO_MAX_WORKER_GROUPS]; }; static int use_la_mode = 0; static int use_auto_mode = 0; /* Init a bucket */ static void _heteroprio_bucket_init(struct _heteroprio_bucket* bucket) { if(use_la_mode) { unsigned i; memset(bucket, 0, sizeof(*bucket)); for(i = 0 ; i < LAHETEROPRIO_MAX_WORKER_GROUPS ; ++i) { starpu_task_list_init(&bucket->tasks_queue[i]); bucket->auto_mn[i] = laqueue_init(sizeof(unsigned)*PUSH_NB_AUTO); } } else { memset(bucket, 0, sizeof(*bucket)); starpu_task_list_init(&bucket->tasks_queue[0]); } } /* Release a bucket */ static void _heteroprio_bucket_release(struct _heteroprio_bucket* bucket) { if(use_la_mode) { unsigned i; for(i = 0 ; i < LAHETEROPRIO_MAX_WORKER_GROUPS ; ++i) { STARPU_ASSERT(starpu_task_list_empty(&bucket->tasks_queue[i]) != 0); laqueue_destroy(&bucket->auto_mn[i]); } } else { STARPU_ASSERT(starpu_task_list_empty(&bucket->tasks_queue[0]) != 0); // don't task_lists need to be destroyed ? } } // Must be manually add to get more stats //#define LAHETEROPRIO_PRINT_STAT static enum laheteroprio_push_strategy getEnvAdvPush() { const char *push = starpu_getenv("STARPU_LAHETEROPRIO_PUSH"); if (push) { if(strcmp(push, "WORKER") == 0) { #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LAHETEROPRIO] Use PUSH_WORKER\n"); #endif return PUSH_WORKER; } if(strcmp(push, "LcS") == 0) { #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LcS\n"); #endif return PUSH_LcS; } if(strcmp(push, "LS_SDH") == 0) { #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LS_SDH\n"); #endif return PUSH_LS_SDH; } if(strcmp(push, "LS_SDH2") == 0) { #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LS_SDH2\n"); #endif return PUSH_LS_SDH2; } if(strcmp(push, "LS_SDHB") == 0) { #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LS_SDHB\n"); #endif return PUSH_LS_SDHB; } if(strcmp(push, "LC_SMWB") == 0) { #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LAHETEROPRIO] Use PUSH_LC_SMWB\n"); #endif return PUSH_LC_SMWB; } if(strcmp(push, "AUTO") == 0) { #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LAHETEROPRIO] Use PUSH_AUTO\n"); #endif return PUSH_AUTO; } _STARPU_MSG("Undefined push strategy %s\n", push); } #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LAHETEROPRIO] Use PUSH_AUTO\n"); #endif return PUSH_AUTO; } /* A worker is mainly composed of a fifo for the tasks * and some direct access to worker properties. * The fifo is implemented with any array, * to read a task, access tasks_queue[tasks_queue_index] * to write a task, access tasks_queue[(tasks_queue_index+tasks_queue_size)%HETEROPRIO_MAX_PREFETCH] */ /* ANDRA_MODIF: can use starpu fifo + starpu sched_mutex*/ struct _heteroprio_worker_wrapper { unsigned arch_type; unsigned arch_index; /** Only used when use_locality==0 : */ struct starpu_st_prio_deque tasks_queue; }; struct _starpu_heteroprio_data { starpu_pthread_mutex_t policy_mutex; struct starpu_bitmap waiters; /* The bucket to store the tasks */ struct _heteroprio_bucket buckets[HETEROPRIO_MAX_PRIO]; /* Whether heteroprio should consider data locality or not */ unsigned use_locality; /* The number of buckets for each arch */ unsigned nb_prio_per_arch_index[STARPU_NB_TYPES]; /* The mapping to the corresponding buckets */ unsigned prio_mapping_per_arch_index[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; /* The number of available tasks for a given arch (not prefetched) */ unsigned nb_remaining_tasks_per_arch_index[STARPU_NB_TYPES]; /* The total number of tasks in the bucket (not prefetched) */ unsigned total_tasks_in_buckets; /* The number of workers for a given arch */ unsigned nb_workers_per_arch_index[STARPU_NB_TYPES]; /* Information on all the workers */ struct _heteroprio_worker_wrapper workers_heteroprio[STARPU_NMAXWORKERS]; /*** use_locality==0 specific : */ /* The total number of prefetched tasks for a given arch */ unsigned nb_prefetched_tasks_per_arch_index[STARPU_NB_TYPES]; /*** use_locality==1 (laheteroprio) specific : */ /* Helps ensuring laheteroprio has been correctly initialized */ unsigned map_wgroup_has_been_called; /* Helps ensuring laheteroprio has been correctly initialized */ unsigned warned_change_nb_memory_nodes; /* Number of memory nodes */ unsigned nb_memory_nodes; /* The mapping to the corresponding prio prio_mapping_per_arch_index[x][prio_mapping_per_arch_index[x][y]] = y */ unsigned bucket_mapping_per_arch_index[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; /* The wgroup for all the workers */ unsigned workers_laheteroprio_wgroup_index[STARPU_NMAXWORKERS]; /* Number of wgroups */ unsigned nb_wgroups; /* The task queue for the tasks inserted by the master thread */ unsigned master_tasks_queue_idx; /* Arch related to each wgroup (for now only one kind of arch per wgroup */ unsigned arch_of_wgroups[LAHETEROPRIO_MAX_WORKER_GROUPS]; /* The pop offset per group */ struct starpu_laheteroprio_access_item wgroup_pop_access_orders[LAHETEROPRIO_MAX_WORKER_GROUPS][LAHETEROPRIO_MAX_WORKER_GROUPS*HETEROPRIO_MAX_PRIO]; /* Size of wgroup_pop_access_orders items */ unsigned wgroup_pop_access_orders_size[LAHETEROPRIO_MAX_WORKER_GROUPS]; /* The push strategy */ enum laheteroprio_push_strategy pushStrategyToUse; enum laheteroprio_push_strategy pushStrategySet; int pushStrategyHistory[PUSH_NB_AUTO]; starpu_pthread_mutex_t push_history_mutex; /*** auto-heteroprio specific : */ /** Strategy to determine on which base which can assign same priority to codelets */ enum autoheteroprio_codelet_grouping_strategy codelet_grouping_strategy; unsigned use_auto_calibration; starpu_pthread_mutex_t auto_calibration_mutex; // parameters: unsigned autoheteroprio_priority_ordering_policy; // reorder priority every priority_ordering_interval pushed tasks int priority_ordering_interval; // if set to 0: will gather data from execution (task time, NOD, etc.) unsigned freeze_data_gathering; unsigned autoheteroprio_print_prio_after_ordering; unsigned autoheteroprio_print_data_on_update; // 0 = if a task has no implementation on arch, expected time will be AUTOHETEROPRIO_LONG_TIME // 1 = if a task has no implementation on arch, expected time will be the shortest time among all archs unsigned autoheteroprio_time_estimation_policy; // environment hyperparameters double NTnodPond; double NTexpVal; double BNexpVal; double URTurt; double URT2urt; double URT2prop; double and2pond; double and3pond; double and4pond; double and5xoffset; double and5yoffset; double and9xoffset; double and9yoffset; double and10xoffset; double and10yoffset; double and11xoffset; double and11yoffset; double ANTnodPond; double ANTexpVal; int priority_last_ordering; // lightweight time profiling: // busy time and free time of each arch for current execution double current_arch_busy_time[STARPU_NB_TYPES]; double current_arch_free_time[STARPU_NB_TYPES]; // last time a worker executed either pre_exec or post_exec hook double last_hook_exec_time[STARPU_NMAXWORKERS]; // task data: unsigned found_codelet_names_length; char found_codelet_names[HETEROPRIO_MAX_PRIO][CODELET_MAX_NAME_LENGTH]; unsigned found_codelet_names_on_arch[STARPU_NB_TYPES]; // busy time and free time of each arch double average_arch_busy_time[STARPU_NB_TYPES]; double average_arch_free_time[STARPU_NB_TYPES]; // average prio NOD for each task double prio_average_NOD[HETEROPRIO_MAX_PRIO]; // NOD sample size unsigned prio_average_NOD_count[HETEROPRIO_MAX_PRIO]; // average prio URT for each task double prio_average_URT[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; // URT sample size unsigned prio_average_URT_count[HETEROPRIO_MAX_PRIO]; // average execution time for each arch double prio_average_time_arch[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; // sample size of execution times unsigned prio_average_time_arch_count[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; // true if we have at least one sample to compute the average execution time unsigned prio_arch_has_time_info[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; // proportion of each task during execution (sum of each prio should equal 1) double prio_overall_proportion[HETEROPRIO_MAX_PRIO]; // sample size (number of added tasks of a type) unsigned prio_overall_proportion_count[HETEROPRIO_MAX_PRIO]; // actual location of a task execution (~= probability of being executed on an arch) (sum of each arch for a prio should equal 1) double prio_arch_proportion[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; unsigned prio_arch_proportion_count[HETEROPRIO_MAX_PRIO]; // sum of each successor's best time (better arch) double prio_average_successors_best_time_sum[HETEROPRIO_MAX_PRIO]; // sample size unsigned prio_average_successors_best_time_sum_count[HETEROPRIO_MAX_PRIO]; // best possible time of a prio (between archs) double prio_average_best[HETEROPRIO_MAX_PRIO]; unsigned prio_average_best_count[HETEROPRIO_MAX_PRIO]; }; // declare prototypes void starpu_heteroprio_map_wgroup_memory_nodes_hp(struct _starpu_heteroprio_data *hp); static double get_best_autoheteroprio_estimated_time(struct _starpu_heteroprio_data *hp, unsigned priority); static int starpu_heteroprio_types_to_arch(enum starpu_worker_archtype arch) { if (arch >= STARPU_NARCH) return 0; return STARPU_WORKER_TO_MASK(arch); } static int arch_can_execute_prio(struct _starpu_heteroprio_data *hp, unsigned arch, unsigned prio) { return (hp->buckets[prio].valid_archs&starpu_heteroprio_types_to_arch(arch))!=0; } void starpu_heteroprio_set_use_locality(unsigned sched_ctx_id, unsigned use_locality) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_ASSERT(use_locality == 0 || use_locality == 1); hp->use_locality = use_locality; } /** Tell how many prio there are for a given arch */ void starpu_heteroprio_set_nb_prios_hp(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned max_prio) { STARPU_ASSERT(max_prio <= HETEROPRIO_MAX_PRIO); hp->nb_prio_per_arch_index[arch] = max_prio; if(hp->use_locality) { starpu_heteroprio_map_wgroup_memory_nodes_hp(hp); } } /** Tell how many prio there are for a given arch */ void starpu_heteroprio_set_nb_prios(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned max_prio) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_heteroprio_set_nb_prios_hp(hp, arch, max_prio); } void starpu_heteroprio_set_mapping_hp_without_arch(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id) { STARPU_ASSERT(dest_bucket_id < HETEROPRIO_MAX_PRIO); hp->prio_mapping_per_arch_index[arch][source_prio] = dest_bucket_id; if(hp->use_locality == 1) { hp->bucket_mapping_per_arch_index[arch][dest_bucket_id] = source_prio; } } void starpu_heteroprio_set_mapping_without_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_heteroprio_set_mapping_hp_without_arch(hp, arch, source_prio, dest_bucket_id); } /** Set the mapping for a given arch prio=>bucket */ void starpu_heteroprio_set_mapping_hp(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id) { starpu_heteroprio_set_mapping_hp_without_arch(hp, arch, source_prio, dest_bucket_id); hp->buckets[dest_bucket_id].valid_archs |= starpu_heteroprio_types_to_arch(arch); _STARPU_DEBUG("Adding arch %d to bucket %u\n", arch, dest_bucket_id); } inline void starpu_heteroprio_set_mapping(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned source_prio, unsigned dest_bucket_id) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_heteroprio_set_mapping_hp(hp, arch, source_prio, dest_bucket_id); } void starpu_heteroprio_clear_mapping_hp(struct _starpu_heteroprio_data *hp) { // direct mapping for all archs (and overwrite any changes to bucket archs) unsigned arch; for(arch=0;archbuckets[prio].valid_archs = 0; } } } void starpu_heteroprio_set_faster_arch_hp(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned bucket_id) { STARPU_ASSERT(bucket_id < HETEROPRIO_MAX_PRIO); hp->buckets[bucket_id].factor_base_arch_index = arch; hp->buckets[bucket_id].slow_factors_per_index[arch] = 0; } /** Tell which arch is the faster for the tasks of a bucket (optional) */ inline void starpu_heteroprio_set_faster_arch(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_heteroprio_set_faster_arch_hp(hp, arch, bucket_id); } void starpu_heteroprio_set_arch_slow_factor_hp(struct _starpu_heteroprio_data *hp, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor) { STARPU_ASSERT(bucket_id < HETEROPRIO_MAX_PRIO); hp->buckets[bucket_id].slow_factors_per_index[arch] = slow_factor; } /** Tell how slow is a arch for the tasks of a bucket (optional) */ inline void starpu_heteroprio_set_arch_slow_factor(unsigned sched_ctx_id, enum starpu_worker_archtype arch, unsigned bucket_id, float slow_factor) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_heteroprio_set_arch_slow_factor_hp(hp, arch, bucket_id, slow_factor); } void starpu_heteroprio_set_pop_access_order_hp(struct _starpu_heteroprio_data *hp, unsigned wgroup_id, const struct starpu_laheteroprio_access_item access_items[], const unsigned size) { STARPU_ASSERT(size <= LAHETEROPRIO_MAX_WORKER_GROUPS * HETEROPRIO_MAX_PRIO); const unsigned arch_of_wgroup = hp->arch_of_wgroups[wgroup_id]; const unsigned nb_prios = hp->nb_prio_per_arch_index[arch_of_wgroup]; const unsigned nb_wgroups = hp->nb_wgroups; STARPU_ASSERT(size <= nb_wgroups *nb_prios); memcpy(hp->wgroup_pop_access_orders[wgroup_id], access_items, sizeof(struct starpu_laheteroprio_access_item) *size); hp->wgroup_pop_access_orders_size[wgroup_id] = size; } void starpu_heteroprio_set_pop_access_order(unsigned sched_ctx_id, unsigned wgroup_id, const struct starpu_laheteroprio_access_item access_items[], const unsigned size) { STARPU_ASSERT(size <= LAHETEROPRIO_MAX_WORKER_GROUPS * HETEROPRIO_MAX_PRIO); struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data *) starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_heteroprio_set_pop_access_order_hp(hp, wgroup_id, access_items, size); } struct dist { double dist; unsigned wgroup_idx; }; static int comp_dist(const void *elem1, const void *elem2) { const struct dist *d1 = ((struct dist *) elem1); const struct dist *d2 = ((struct dist *) elem2); if (d1->dist > d2->dist) return 1; if (d1->dist < d2->dist) return -1; return 0; } void starpu_heteroprio_map_wgroup_memory_nodes_hp(struct _starpu_heteroprio_data *hp) { STARPU_ASSERT_MSG(hp->use_locality == 1, "starpu_heteroprio_map_wgroup_memory_nodes has been called without enabling LA mode\n"); hp->map_wgroup_has_been_called = 1; // Set flag to 1 // Set the number of memory nodes hp->nb_memory_nodes = starpu_memory_nodes_get_count(); const unsigned current_nb_memory_nodes = hp->nb_memory_nodes; hp->warned_change_nb_memory_nodes = 0; hp->nb_wgroups = current_nb_memory_nodes; // Set memory nodes' type { unsigned idx_memnode; for (idx_memnode = 0; idx_memnode < current_nb_memory_nodes; ++idx_memnode) { const enum starpu_node_kind memnode_kind = starpu_node_get_kind(idx_memnode); hp->arch_of_wgroups[idx_memnode] = starpu_memory_node_get_worker_archtype(memnode_kind); } } // Set workers' type { unsigned idx_worker; for (idx_worker = 0; idx_worker < starpu_worker_get_count(); ++idx_worker) { hp->workers_laheteroprio_wgroup_index[idx_worker] = starpu_worker_get_memory_node(idx_worker); } } if (starpu_cpu_worker_get_count() != 0) { unsigned cpu_0 = starpu_worker_get_by_type(STARPU_CPU_WORKER, 0); hp->master_tasks_queue_idx = starpu_worker_get_memory_node(cpu_0); } else { // Consider memory node 0 as the CPU STARPU_ASSERT(starpu_node_get_kind(0) == STARPU_CPU_RAM); hp->master_tasks_queue_idx = 0; } // Build memory distance matrix double dist_mem_matrix[LAHETEROPRIO_MAX_WORKER_GROUPS][LAHETEROPRIO_MAX_WORKER_GROUPS] = {{ 0 }}; { unsigned idx_mem_node1; unsigned idx_mem_node2; double max_dist_mem = 0; for (idx_mem_node1 = 0; idx_mem_node1 < current_nb_memory_nodes; ++idx_mem_node1) { for (idx_mem_node2 = 0; idx_mem_node2 < current_nb_memory_nodes; ++idx_mem_node2) { if (idx_mem_node1 == idx_mem_node2) { dist_mem_matrix[idx_mem_node1][idx_mem_node2] = 0; } else { dist_mem_matrix[idx_mem_node1][idx_mem_node2] = starpu_transfer_predict(idx_mem_node2, idx_mem_node1, 1024 * 1024 *1024); max_dist_mem = STARPU_MAX(max_dist_mem, dist_mem_matrix[idx_mem_node1][idx_mem_node2]); } } } for (idx_mem_node1 = 0; idx_mem_node1 < current_nb_memory_nodes; ++idx_mem_node1) { for (idx_mem_node2 = 0; idx_mem_node2 < current_nb_memory_nodes; ++idx_mem_node2) { dist_mem_matrix[idx_mem_node1][idx_mem_node2] /= max_dist_mem; } } } // Build priority distance matrix double dist_prio_matrix[LAHETEROPRIO_MAX_WORKER_GROUPS][LAHETEROPRIO_MAX_WORKER_GROUPS] = {{ 0 }}; { unsigned idx_prio_node1; unsigned idx_prio_node2; for (idx_prio_node1 = 0; idx_prio_node1 < current_nb_memory_nodes; ++idx_prio_node1) { for (idx_prio_node2 = 0; idx_prio_node2 < current_nb_memory_nodes; ++idx_prio_node2) { if (idx_prio_node1 == idx_prio_node2) { dist_prio_matrix[idx_prio_node1][idx_prio_node2] = 0; } else { const unsigned arch_wgroup1 = hp->arch_of_wgroups[idx_prio_node1]; const unsigned arch_wgroup2 = hp->arch_of_wgroups[idx_prio_node2]; double diff = 0; int cpt1 = 0; int cpt2 = 0; unsigned idx; for(idx = 0; idx < HETEROPRIO_MAX_PRIO; ++idx) { diff += fabs((double)(hp->bucket_mapping_per_arch_index[arch_wgroup1][idx] + 1) - (double)(hp->bucket_mapping_per_arch_index[arch_wgroup2][idx] + 1)); if (hp->bucket_mapping_per_arch_index[arch_wgroup1][idx] != (unsigned)-1) cpt1 += 1; if (hp->bucket_mapping_per_arch_index[arch_wgroup2][idx] != (unsigned)-1) cpt2 += 1; } const int maxcpt = STARPU_MAX(cpt1, cpt2); diff /= (maxcpt + 1) *(maxcpt + 2) / 2.; dist_prio_matrix[idx_prio_node1][idx_prio_node2] = diff; } } } } // Build final distance matrix double dist_matrix[LAHETEROPRIO_MAX_WORKER_GROUPS][LAHETEROPRIO_MAX_WORKER_GROUPS] = {{ 0 }}; { const double alpha = 0.5; unsigned idx_node1; unsigned idx_node2; for (idx_node1 = 0; idx_node1 < current_nb_memory_nodes; ++idx_node1) { for (idx_node2 = 0; idx_node2 < current_nb_memory_nodes; ++idx_node2) { dist_matrix[idx_node1][idx_node2] = (1 - dist_prio_matrix[idx_node1][idx_node2]) *alpha + dist_mem_matrix[idx_node1][idx_node2] *(1 - alpha); } } } unsigned nb_closed_nodes[STARPU_NB_TYPES]; { char var_name[STR_MAX_SIZE]; // Retrieving environment variable STARPU_LAHETEROPRIO_S_* for each architecture unsigned arch; for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { const char *arch_env_name = starpu_worker_get_type_as_env_var(arch); if(arch_env_name) { snprintf(var_name, STR_MAX_SIZE, "STARPU_LAHETEROPRIO_S_%s", arch_env_name); unsigned default_value = arch == STARPU_CPU_WORKER ? current_nb_memory_nodes - 1 : 1; nb_closed_nodes[arch] = starpu_getenv_number_default(var_name, default_value); } } } unsigned nb_prio_step[STARPU_NB_TYPES]; { char var_name[STR_MAX_SIZE]; // Retrieving environment variable STARPU_LAHETEROPRIO_PRIO_STEP_* for each architecture unsigned arch; for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { const char *arch_env_name = starpu_worker_get_type_as_env_var(arch); if(arch_env_name) { snprintf(var_name, STR_MAX_SIZE, "STARPU_LAHETEROPRIO_PRIO_STEP_%s", arch_env_name); unsigned default_value = arch != STARPU_CPU_WORKER ? hp->nb_prio_per_arch_index[arch] : 1; nb_prio_step[arch] = starpu_getenv_number_default(var_name, default_value); } } } #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LAHETEROPRIO] nb_closed_nodes[STARPU_CPU_WORKER] %u\n", nb_closed_nodes[STARPU_CPU_WORKER]); _STARPU_MSG("[LAHETEROPRIO] nb_closed_nodes[STARPU_CUDA_WORKER] %u\n", nb_closed_nodes[STARPU_CUDA_WORKER]); _STARPU_MSG("[LAHETEROPRIO] nb_prio_step[STARPU_CPU_WORKER] %u\n", nb_prio_step[STARPU_CPU_WORKER]); _STARPU_MSG("[LAHETEROPRIO] nb_prio_step[STARPU_CUDA_WORKER] %u\n", nb_prio_step[STARPU_CUDA_WORKER]); #endif STARPU_ASSERT(hp->nb_wgroups == current_nb_memory_nodes); unsigned wgroup_idx; for (wgroup_idx = 0; wgroup_idx < current_nb_memory_nodes; ++wgroup_idx) { const unsigned wgroup_arch = hp->arch_of_wgroups[wgroup_idx]; struct dist others[LAHETEROPRIO_MAX_WORKER_GROUPS]; unsigned access_wgroup_idx; for (access_wgroup_idx = 0; access_wgroup_idx < current_nb_memory_nodes; ++access_wgroup_idx) { others[access_wgroup_idx].wgroup_idx = access_wgroup_idx; others[access_wgroup_idx].dist = dist_matrix[wgroup_idx][access_wgroup_idx]; } { struct dist tmp = others[wgroup_idx]; others[wgroup_idx] = others[0]; others[0] = tmp; } qsort(others + 1, current_nb_memory_nodes - 1, sizeof(struct dist), comp_dist); struct starpu_laheteroprio_access_item buffer_access_items[LAHETEROPRIO_MAX_WORKER_GROUPS *HETEROPRIO_MAX_PRIO]; const unsigned nb_prio_in_wgroup = hp->nb_prio_per_arch_index[hp->arch_of_wgroups[wgroup_idx]]; unsigned access_idx = 0; unsigned prio_block_idx; for (prio_block_idx = 0; prio_block_idx < nb_prio_in_wgroup; prio_block_idx += nb_prio_step[wgroup_arch]) { { access_wgroup_idx = 0; unsigned prio_idx; for (prio_idx = prio_block_idx; prio_idx < STARPU_MIN(prio_block_idx + nb_prio_step[wgroup_arch], nb_prio_in_wgroup); ++prio_idx) { buffer_access_items[access_idx].prio_idx = prio_idx; buffer_access_items[access_idx].wgroup_idx = others[access_wgroup_idx].wgroup_idx; access_idx += 1; } } unsigned prio_idx; for (prio_idx = prio_block_idx; prio_idx < STARPU_MIN(prio_block_idx + nb_prio_step[wgroup_arch], nb_prio_in_wgroup); ++prio_idx) { for (access_wgroup_idx = 1; access_wgroup_idx < STARPU_MIN(nb_closed_nodes[wgroup_arch] + 1, current_nb_memory_nodes); ++access_wgroup_idx) { buffer_access_items[access_idx].prio_idx = prio_idx; buffer_access_items[access_idx].wgroup_idx = others[access_wgroup_idx].wgroup_idx; access_idx += 1; } } } unsigned prio_idx; for (prio_idx = 0; prio_idx < nb_prio_in_wgroup; ++prio_idx) { for (access_wgroup_idx = nb_closed_nodes[wgroup_arch] + 1; access_wgroup_idx < current_nb_memory_nodes; ++access_wgroup_idx) { buffer_access_items[access_idx].prio_idx = prio_idx; buffer_access_items[access_idx].wgroup_idx = others[access_wgroup_idx].wgroup_idx; access_idx += 1; } } starpu_heteroprio_set_pop_access_order_hp(hp, wgroup_idx, buffer_access_items, access_idx); } } void starpu_heteroprio_map_wgroup_memory_nodes(unsigned sched_ctx_id) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data *) starpu_sched_ctx_get_policy_data(sched_ctx_id); starpu_heteroprio_map_wgroup_memory_nodes_hp(hp); } void starpu_heteroprio_print_wgroups(FILE *stream, unsigned sched_ctx_id) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data *) starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_ASSERT_MSG(hp->use_locality == 1, "starpu_heteroprio_print_wgroups has been called without enabling LA mode\n"); fprintf(stream, "[STARPU-LAHETEROPRIO] There are %u groups\n", hp->nb_wgroups); char dest_name[512]; unsigned worker_id; for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) { starpu_worker_get_name(worker_id, dest_name, 512); fprintf(stream, "[STARPU-LAHETEROPRIO] Worker %u => group %u (%s)\n", worker_id, hp->workers_laheteroprio_wgroup_index[worker_id], dest_name); } fprintf(stream, "\n"); unsigned idx_wgroup; for (idx_wgroup = 0; idx_wgroup < hp->nb_wgroups; ++idx_wgroup) { int access_order[LAHETEROPRIO_MAX_WORKER_GROUPS][HETEROPRIO_MAX_PRIO] = {{ 0 }}; memset(access_order, -1, sizeof(access_order[0][0]) *LAHETEROPRIO_MAX_WORKER_GROUPS * HETEROPRIO_MAX_PRIO); const unsigned wgroup_arch = hp->arch_of_wgroups[idx_wgroup]; const unsigned nb_prios = hp->nb_prio_per_arch_index[wgroup_arch]; const unsigned nb_wgroups = hp->nb_wgroups; const struct starpu_laheteroprio_access_item *wgroup_access_order = hp->wgroup_pop_access_orders[idx_wgroup]; const unsigned wgroup_access_order_size = hp->wgroup_pop_access_orders_size[idx_wgroup]; unsigned idx_access_item; for (idx_access_item = 0; idx_access_item < wgroup_access_order_size; ++idx_access_item) { const unsigned current_wgroupid = wgroup_access_order[idx_access_item].wgroup_idx; const unsigned current_prio = wgroup_access_order[idx_access_item].prio_idx; access_order[current_wgroupid][current_prio] = idx_access_item; } fprintf(stream, "[STARPU-LAHETEROPRIO] Access order for wgroup %u (of arch type %u):\n", idx_wgroup, wgroup_arch); unsigned idx_prio; for (idx_prio = nb_prios; idx_prio > 0; --idx_prio) { const unsigned current_bucket = hp->prio_mapping_per_arch_index[wgroup_arch][idx_prio - 1]; fprintf(stream, "[STARPU-LAHETEROPRIO] Prio %3u (Bucket %3u) => ", idx_prio - 1, current_bucket); unsigned idx_wgroup_prio; for (idx_wgroup_prio = 0; idx_wgroup_prio < nb_wgroups; ++idx_wgroup_prio) { if (access_order[idx_wgroup][idx_prio - 1] == -1) { fprintf(stream, "[XX] "); } else { fprintf(stream, "[%2d] ", access_order[idx_wgroup][idx_prio - 1]); } } fprintf(stream, "\n"); } fprintf(stream, "\n"); } } /** If the user does not provide an init callback we create a single bucket for all architectures */ static inline void default_init_sched(unsigned sched_ctx_id) { int min_prio = starpu_sched_ctx_get_min_priority(sched_ctx_id); int max_prio = starpu_sched_ctx_get_max_priority(sched_ctx_id); STARPU_ASSERT(min_prio >= 0); STARPU_ASSERT(max_prio >= 0); enum starpu_worker_archtype type; // By default each type of devices uses 1 bucket and no slow factor for (type = 0; type < STARPU_NARCH; type++) if (starpu_worker_get_count_by_type(type) > 0) starpu_heteroprio_set_nb_prios(sched_ctx_id, type, max_prio-min_prio+1); // Direct mapping int prio; for(prio=min_prio ; prio<=max_prio ; prio++) { // By default each type of devices uses 1 bucket and no slow factor for (type = 0; type < STARPU_NARCH; type++) if (starpu_worker_get_count_by_type(type) > 0) starpu_heteroprio_set_mapping(sched_ctx_id, type, prio, prio); } } /** stats of heteroprio when use_locality==1 */ #ifdef LAHETEROPRIO_PRINT_STAT struct laheteropriostats { long int nb_tasks; long int nb_tasks_per_worker[128][HETEROPRIO_MAX_PRIO]; long int nb_tasks_per_wgroup[LAHETEROPRIO_MAX_WORKER_GROUPS][HETEROPRIO_MAX_PRIO]; long int task_skipt_due_to_factor_per_worker[128][HETEROPRIO_MAX_PRIO]; long int task_list_empty_per_worker[128][HETEROPRIO_MAX_PRIO]; long int task_stolen_per_worker[128][HETEROPRIO_MAX_PRIO]; long int task_stolen_in_wgroup[LAHETEROPRIO_MAX_WORKER_GROUPS][HETEROPRIO_MAX_PRIO]; long int push_redirect[128+1][LAHETEROPRIO_MAX_WORKER_GROUPS]; long int pop_redirect[128][LAHETEROPRIO_MAX_WORKER_GROUPS]; long int push_to_use[128][PUSH_NB_AUTO]; }; struct laheteropriostats lastats; #endif static void check_heteroprio_mapping(struct _starpu_heteroprio_data *hp) { //return 0; unsigned idx_prio; /* Ensure that information have been correctly filled */ unsigned check_all_archs[HETEROPRIO_MAX_PRIO]; memset(check_all_archs, 0, sizeof(unsigned)*HETEROPRIO_MAX_PRIO); unsigned arch_index; for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) { STARPU_ASSERT(hp->nb_prio_per_arch_index[arch_index] <= HETEROPRIO_MAX_PRIO); unsigned check_archs[HETEROPRIO_MAX_PRIO]; memset(check_archs, 0, sizeof(unsigned)*HETEROPRIO_MAX_PRIO); for(idx_prio = 0; idx_prio < hp->nb_prio_per_arch_index[arch_index]; ++idx_prio) { const unsigned mapped_prio = hp->prio_mapping_per_arch_index[arch_index][idx_prio]; STARPU_ASSERT(mapped_prio <= HETEROPRIO_MAX_PRIO); STARPU_ASSERT(hp->buckets[mapped_prio].slow_factors_per_index[arch_index] >= 0.0); STARPU_ASSERT(hp->buckets[mapped_prio].valid_archs & starpu_heteroprio_types_to_arch(arch_index)); check_archs[mapped_prio] = 1; check_all_archs[mapped_prio] += 1; } for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) { /* Ensure the current arch use a bucket or someone else can use it */ STARPU_ASSERT(check_archs[idx_prio] == 1 || hp->buckets[idx_prio].valid_archs == 0 || (hp->buckets[idx_prio].valid_archs & ~starpu_heteroprio_types_to_arch(arch_index)) != 0); } } /* Ensure that if a valid_archs = (STARPU_CPU|STARPU_CUDA) then check_all_archs[] = 2 for example */ for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) { unsigned nb_arch_on_bucket = 0; for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) { if(hp->buckets[idx_prio].valid_archs & starpu_heteroprio_types_to_arch(arch_index)) { nb_arch_on_bucket += 1; } } STARPU_ASSERT_MSG(check_all_archs[idx_prio] == nb_arch_on_bucket, "check_all_archs[idx_prio(%u)] = %u != nb_arch_on_bucket = %u\n", idx_prio, check_all_archs[idx_prio], nb_arch_on_bucket); } } static void starpu_autoheteroprio_add_task(struct _starpu_heteroprio_data *hp, const char name[CODELET_MAX_NAME_LENGTH], unsigned archs[STARPU_NB_TYPES]) { unsigned arch; for(arch=0;archfound_codelet_names_on_arch[arch], hp->found_codelet_names_length); ++hp->found_codelet_names_on_arch[arch]; starpu_heteroprio_set_nb_prios_hp(hp, arch, hp->found_codelet_names_on_arch[arch]); } } // TODO: remap laheteroprio policy strncpy(&hp->found_codelet_names[hp->found_codelet_names_length][0], name, CODELET_MAX_NAME_LENGTH); ++hp->found_codelet_names_length; check_heteroprio_mapping(hp); // ensures that priorities are correctly mapped } #define _HETEROPRIO_DIR_MAXLEN 256 static char _heteroprio_data_dir[_HETEROPRIO_DIR_MAXLEN]; /* Try to get the name of the program, to get specific data file for each program */ #ifdef STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME #define _progname program_invocation_short_name #else #define _progname "UNKNOWN_PROGRAM" #endif static char *_starpu_heteroprio_get_data_dir() { static int directory_existence_was_tested = 0; if(!directory_existence_was_tested) { char *path = starpu_getenv("STARPU_HETEROPRIO_DATA_DIR"); if(path) { snprintf(_heteroprio_data_dir, _HETEROPRIO_DIR_MAXLEN, "%s/", path); } else { snprintf(_heteroprio_data_dir, _HETEROPRIO_DIR_MAXLEN, "%s/heteroprio/", _starpu_get_perf_model_dir_default()); } _starpu_mkpath_and_check(_heteroprio_data_dir, S_IRWXU); directory_existence_was_tested = 1; } return _heteroprio_data_dir; } static void starpu_autoheteroprio_fetch_task_data(struct _starpu_heteroprio_data *hp) { const char *custom_path = starpu_getenv("STARPU_HETEROPRIO_DATA_FILE"); #ifndef STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME if(!custom_path) { _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio can't determine the program's name to automatically store performance data. " "You can specify a path to store program associated data with STARPU_HETEROPRIO_DATA_FILE\n"); } #endif char path[_HETEROPRIO_DIR_MAXLEN+6]; if(!custom_path) { snprintf(path, _HETEROPRIO_DIR_MAXLEN+6, "%s/%s.data", _starpu_heteroprio_get_data_dir(), _progname); } FILE *autoheteroprio_file; int locked; autoheteroprio_file = fopen(custom_path ? custom_path : path, "r"); if(autoheteroprio_file == NULL) { // unable to open heteroprio data file return; } locked = _starpu_frdlock(autoheteroprio_file) == 0; _starpu_drop_comments(autoheteroprio_file); unsigned number_of_archs; unsigned archs[STARPU_NB_TYPES]; unsigned arch_ind, arch_type; int c; if(fscanf(autoheteroprio_file, "%u", &number_of_archs) != 1) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing a number of architectures\n"); return; } // Count number of archs not available in this version const unsigned ignored_archs = STARPU_MAX(0, (int) (number_of_archs - STARPU_NB_TYPES)); const unsigned supported_archs = STARPU_MIN(STARPU_NB_TYPES, number_of_archs); // Reading list of supported architectures for(arch_ind = 0; arch_ind < supported_archs; ++arch_ind) { if(fscanf(autoheteroprio_file, "%u", &arch_type) != 1) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture id\n"); return; } archs[arch_ind] = arch_type; } for(arch_ind = 0; arch_ind < ignored_archs; ++arch_ind) { if(fscanf(autoheteroprio_file, "%u", &arch_type) != 1) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture id\n"); return; } } if(getc(autoheteroprio_file) != '\n') { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is improperly formatted\n"); return; } _starpu_drop_comments(autoheteroprio_file); // Reading architectures average times double avg_arch_busy_time, avg_arch_free_time; for(arch_ind = 0; arch_ind < supported_archs; ++arch_ind) { if(fscanf(autoheteroprio_file, "%lf %lf", &avg_arch_busy_time, &avg_arch_free_time) != 2) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture average times id\n"); return; } else if(arch_ind < STARPU_NB_TYPES && archs[arch_ind] < STARPU_NB_TYPES) { hp->average_arch_busy_time[archs[arch_ind]] = avg_arch_busy_time; hp->average_arch_free_time[archs[arch_ind]] = avg_arch_free_time; } } for(arch_ind = 0; arch_ind < ignored_archs; ++arch_ind) { if(fscanf(autoheteroprio_file, "%lf %lf", &avg_arch_busy_time, &avg_arch_free_time) != 2) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture average times id\n"); return; } } if(getc(autoheteroprio_file) != '\n') { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is improperly formatted\n"); return; } _starpu_drop_comments(autoheteroprio_file); unsigned codelet_archs[STARPU_NB_TYPES]; unsigned codelet_exec_archs[STARPU_NB_TYPES]; unsigned prio = hp->found_codelet_names_length; char codelet_name[CODELET_MAX_NAME_LENGTH+1]; unsigned ignored_lines, arch_can_execute; // Read saved stats for each codelet while(fscanf(autoheteroprio_file, "%" STRINGIFY(CODELET_MAX_NAME_LENGTH) "s", codelet_name) == 1) { memset(codelet_exec_archs, 0, STARPU_NB_TYPES * sizeof(unsigned)); // Read compatible architectures ignored_lines = 0; for(arch_ind = 0; arch_ind < supported_archs; ++arch_ind) { if(fscanf(autoheteroprio_file, "%u", &arch_can_execute) != 1) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture information for a codelet\n"); return; } else if(arch_ind < STARPU_NB_TYPES) { codelet_archs[arch_ind] = arch_can_execute; if(archs[arch_ind] < STARPU_NB_TYPES) codelet_exec_archs[archs[arch_ind]] = arch_can_execute; } } for(arch_ind = 0; arch_ind < ignored_archs; ++arch_ind) { if(fscanf(autoheteroprio_file, "%u", &arch_can_execute) != 1) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is missing an architecture information for a codelet\n"); return; } else if(arch_can_execute) { ignored_lines += 1; } } // Read general codelet data if(fscanf(autoheteroprio_file, "%lf %u %u %lf %u %u %lf %u %lf %u", &hp->prio_average_NOD[prio], &hp->prio_average_NOD_count[prio], &hp->prio_average_URT_count[prio], &hp->prio_overall_proportion[prio], &hp->prio_overall_proportion_count[prio], &hp->prio_arch_proportion_count[prio], &hp->prio_average_successors_best_time_sum[prio], &hp->prio_average_successors_best_time_sum_count[prio], &hp->prio_average_best[prio], &hp->prio_average_best_count[prio] ) != 10) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is improperly formatted\n"); return; } // Read architecture specific data for(arch_ind = 0; arch_ind < supported_archs; ++arch_ind) { if(codelet_archs[arch_ind] && archs[arch_ind] < STARPU_NB_TYPES) { if(fscanf(autoheteroprio_file, "%lf %lf %u %lf\n", &hp->prio_average_URT[archs[arch_ind]][prio], &hp->prio_average_time_arch[archs[arch_ind]][prio], &hp->prio_average_time_arch_count[archs[arch_ind]][prio], &hp->prio_arch_proportion[archs[arch_ind]][prio] ) != 4) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file is improperly formatted\n"); return; } if(hp->prio_average_time_arch_count[archs[arch_ind]][prio] > 0) hp->prio_arch_has_time_info[archs[arch_ind]][prio] = 1; } else if(codelet_archs[arch_ind] && archs[arch_ind] >= STARPU_NB_TYPES) { while((c = getc(autoheteroprio_file)) != '\n') if(c == EOF) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file ended abruptly\n"); return; } } } for(arch_ind = 0; arch_ind < ignored_lines; ++arch_ind) { while((c = getc(autoheteroprio_file)) != '\n') if(c == EOF) { fclose(autoheteroprio_file); _STARPU_MSG("[HETEROPRIO][INITIALIZATION] Warning, autoheteroprio's data file ended abruptly\n"); return; } } starpu_autoheteroprio_add_task(hp, codelet_name, codelet_exec_archs); prio = hp->found_codelet_names_length; // update current prio (+1) _starpu_drop_comments(autoheteroprio_file); } if(locked) _starpu_frdunlock(autoheteroprio_file); fclose(autoheteroprio_file); } static void starpu_autoheteroprio_save_task_data(struct _starpu_heteroprio_data *hp) { const char *custom_path = starpu_getenv("STARPU_HETEROPRIO_DATA_FILE"); char path[_HETEROPRIO_DIR_MAXLEN+6]; if(!custom_path) { snprintf(path, _HETEROPRIO_DIR_MAXLEN+6, "%s/%s.data", _starpu_heteroprio_get_data_dir(), _progname); } FILE *autoheteroprio_file; int locked; autoheteroprio_file = fopen(custom_path ? custom_path : path, "w+"); if(autoheteroprio_file == NULL) { _STARPU_MSG("[HETEROPRIO][DEINITIALIZATION] Warning: unable to save task data\n"); return; } locked = _starpu_fwrlock(autoheteroprio_file) == 0; fseek(autoheteroprio_file, 0, SEEK_SET); _starpu_fftruncate(autoheteroprio_file, 0); unsigned number_of_archs = 0; unsigned is_arch_used[STARPU_NB_TYPES]; unsigned arch_ind; fprintf(autoheteroprio_file, "##################\n"); fprintf(autoheteroprio_file, "# Known architectures\n"); fprintf(autoheteroprio_file, "# number_of_archs arch_ids ("); for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) { if(hp->found_codelet_names_on_arch[arch_ind] > 0) { // Architecture was used is_arch_used[arch_ind] = 1; number_of_archs += 1; fprintf(autoheteroprio_file, "%s - %u, ", starpu_worker_get_type_as_string(arch_ind), arch_ind); } else is_arch_used[arch_ind] = 0; } fprintf(autoheteroprio_file, ")\n"); // List of used architectures designed by their id fprintf(autoheteroprio_file, "%u", number_of_archs); for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) { if(is_arch_used[arch_ind]) fprintf(autoheteroprio_file, " %u", arch_ind); } fprintf(autoheteroprio_file, "\n"); fprintf(autoheteroprio_file, "##################\n"); fprintf(autoheteroprio_file, "# Busy/Free proportion per architecture\n"); fprintf(autoheteroprio_file, "# ARCH1_busy_time ARCH1_free_time ... ARCHn_busy_time ARCHn_free_time\n"); // Busy and free proportion per architecture for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) { if(is_arch_used[arch_ind]) fprintf(autoheteroprio_file, " %lf %lf", hp->average_arch_busy_time[arch_ind], hp->average_arch_free_time[arch_ind]); } fprintf(autoheteroprio_file, "\n"); fprintf(autoheteroprio_file, "##################\n"); fprintf(autoheteroprio_file, "# Codelets specific data\n"); fprintf(autoheteroprio_file, "# codelet_name arch_1_can_exec ... arch_n_can_exec\n"); fprintf(autoheteroprio_file, "# average_NOD average_NOD_count average_URT_count overall_proportion overall_proportion_count arch_proportion_count avg_best_successor_time avg_best_successor_time_count prio_average_best prio_average_best_count\n"); fprintf(autoheteroprio_file, "# for each arch which can exec: average_URT_ARCH average_time_ARCH average_time_ARCH_count ARCH_proportion\n"); fprintf(autoheteroprio_file, "##########\n"); unsigned prio; unsigned codelet_archs[STARPU_NB_TYPES]; for(prio = 0; prio < hp->found_codelet_names_length; ++prio) { fprintf(autoheteroprio_file, "%s", hp->found_codelet_names[prio]); // Indicate if each can execute codelet for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) { if(is_arch_used[arch_ind]) { codelet_archs[arch_ind] = arch_can_execute_prio(hp, arch_ind, prio); fprintf(autoheteroprio_file, " %u", codelet_archs[arch_ind]); } else codelet_archs[arch_ind] = 0; } fprintf(autoheteroprio_file, "\n"); // Non specific codelet data fprintf(autoheteroprio_file, "%lf %u %u %lf %u %u %lf %u %lf %u\n", hp->prio_average_NOD[prio], hp->prio_average_NOD_count[prio], hp->prio_average_URT_count[prio], hp->prio_overall_proportion[prio], hp->prio_overall_proportion_count[prio], hp->prio_arch_proportion_count[prio], hp->prio_average_successors_best_time_sum[prio], hp->prio_average_successors_best_time_sum_count[prio], hp->prio_average_best[prio], hp->prio_average_best_count[prio]); // Architecture specific data for(arch_ind = 0; arch_ind < STARPU_NB_TYPES; ++arch_ind) { if(codelet_archs[arch_ind]) { fprintf(autoheteroprio_file, "%lf %lf %u %lf\n", hp->prio_average_URT[arch_ind][prio], hp->prio_average_time_arch[arch_ind][prio], hp->prio_average_time_arch_count[arch_ind][prio], hp->prio_arch_proportion[arch_ind][prio]); } } fprintf(autoheteroprio_file, "#####\n"); } if(locked) _starpu_fwrunlock(autoheteroprio_file); fclose(autoheteroprio_file); } static void initialize_heteroprio_policy(unsigned sched_ctx_id) { #ifdef LAHETEROPRIO_PRINT_STAT memset(&lastats, 0, sizeof(lastats)); #endif int max_priority = starpu_sched_ctx_get_max_priority(sched_ctx_id); if(max_priority < HETEROPRIO_MAX_PRIO-1) { starpu_sched_ctx_set_max_priority(sched_ctx_id, HETEROPRIO_MAX_PRIO-1); _STARPU_DISP("[HETEROPRIO][INITIALIZATION] Max priority has been set to %d\n", HETEROPRIO_MAX_PRIO-1); } int min_priority = starpu_sched_ctx_get_min_priority(sched_ctx_id); if(min_priority > 0) { starpu_sched_ctx_set_min_priority(sched_ctx_id, 0); _STARPU_DISP("[HETEROPRIO][INITIALIZATION] Min priority has been set to 0\n"); } /* Alloc the scheduler data */ struct _starpu_heteroprio_data *hp; _STARPU_MALLOC(hp, sizeof(struct _starpu_heteroprio_data)); memset(hp, 0, sizeof(*hp)); hp->use_locality = use_la_mode = starpu_getenv_number_default("STARPU_HETEROPRIO_USE_LA", 0); _STARPU_DISP("[HETEROPRIO] Data locality : %s\n", hp->use_locality?"ENABLED":"DISABLED"); hp->codelet_grouping_strategy = use_auto_mode = starpu_getenv_number_default("STARPU_HETEROPRIO_CODELET_GROUPING_STRATEGY", 0); switch(hp->codelet_grouping_strategy) { case BY_PERF_MODEL_OR_NAME: _STARPU_DISP("[HETEROPRIO] Codelet grouping strategy : BY_PERF_MODEL_OR_NAME\n"); break; case BY_NAME_ONLY: _STARPU_DISP("[HETEROPRIO] Codelet grouping strategy : BY_NAME\n"); break; default: _STARPU_DISP("[HETEROPRIO] Codelet grouping strategy : UNKNOWN\n"); hp->codelet_grouping_strategy = BY_PERF_MODEL_OR_NAME; // setting to default } hp->use_auto_calibration = use_auto_mode = starpu_getenv_number_default("STARPU_HETEROPRIO_USE_AUTO_CALIBRATION", 1); _STARPU_DISP("[HETEROPRIO] Auto calibration : %s\n", hp->use_auto_calibration?"ENABLED":"DISABLED"); if(hp->use_auto_calibration) { const int ordering_policy = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY", STARPU_HETEROPRIO_URT_DOT_DIFF_4); STARPU_ASSERT_MSG(ordering_policy < STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT, "STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY must be < %d.\n", STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY_COUNT); STARPU_ASSERT_MSG(ordering_policy >= 0, "STARPU_AUTOHETEROPRIO_PRIORITY_ORDERING_POLICY must be >= 0.\n"); hp->autoheteroprio_priority_ordering_policy = ordering_policy; _STARPU_DISP("[AUTOHETEROPRIO] Priority ordering policy : %s\n", &starpu_autoheteroprio_priority_ordering_policy_names[hp->autoheteroprio_priority_ordering_policy][0]); hp->priority_ordering_interval = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_ORDERING_INTERVAL", 32); hp->freeze_data_gathering = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_FREEZE_GATHERING", 0); _STARPU_DISP("[AUTOHETEROPRIO] Data gathering : %s\n", !hp->freeze_data_gathering?"ENABLED":"DISABLED"); hp->autoheteroprio_print_prio_after_ordering = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_PRINT_AFTER_ORDERING", 0); _STARPU_DISP("[AUTOHETEROPRIO] Print after ordering : %s\n", hp->autoheteroprio_print_prio_after_ordering?"ENABLED":"DISABLED"); hp->autoheteroprio_print_data_on_update = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_PRINT_DATA_ON_UPDATE", 0); _STARPU_DISP("[AUTOHETEROPRIO] Print on update : %s\n", hp->autoheteroprio_print_data_on_update?"ENABLED":"DISABLED"); hp->autoheteroprio_time_estimation_policy = starpu_getenv_number_default("STARPU_AUTOHETEROPRIO_TIME_ESTIMATION_POLICY", 0); } starpu_bitmap_init(&hp->waiters); if(hp->use_locality) { hp->pushStrategySet = getEnvAdvPush(); if(hp->pushStrategySet != PUSH_AUTO) { hp->pushStrategyToUse = hp->pushStrategySet; } else { hp->pushStrategyToUse = PUSH_LS_SDHB; } } starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hp); STARPU_PTHREAD_MUTEX_INIT(&hp->policy_mutex, NULL); if(hp->use_locality) { STARPU_PTHREAD_MUTEX_INIT(&hp->push_history_mutex, NULL); } if(hp->use_auto_calibration) { STARPU_PTHREAD_MUTEX_INIT(&hp->auto_calibration_mutex, NULL); } // get environment hyperparameters hp->NTnodPond = starpu_getenv_float_default("STARPU_HETEROPRIO_NOD_TIME_COMBINATION_NOD_MULTIPLIER", 0.3); hp->NTexpVal = starpu_getenv_float_default("STARPU_HETEROPRIO_NOD_TIME_COMBINATION_EXP_SELECTIVITY", 0.5); hp->BNexpVal = starpu_getenv_float_default("STARPU_HETEROPRIO_BEST_NODS_SCORE_EXP_SELECTIVITY", 0.5); hp->URTurt = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_URT_MULTIPLIER", 0.5); hp->URT2urt = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_2_URT_MULTIPLIER", 0.5); hp->URT2prop = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_2_ARCH_NEED_MULTIPLIER", 2.0); hp->and2pond = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_2_ARCH_NEED_MULTIPLIER", 1.0); hp->and3pond = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_3_ARCH_NEED_MULTIPLIER", 1.0); hp->and4pond = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_4_ARCH_NEED_MULTIPLIER", 1.0); hp->and5xoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_5_NOD_OFFSET", 1.3); hp->and5yoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_5_ARCH_DIFF_OFFSET", 1.0); hp->and9xoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_9_NOD_OFFSET", 1.3); hp->and9yoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_9_ARCH_DIFF_OFFSET", 1.0); hp->and10xoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_AURT_DOT_DIFF_10_NOD_OFFSET", 1.3); hp->and10yoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_10_ARCH_DIFF_OFFSET", 1.0); hp->and11xoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_11_NOD_OFFSET", 1.3); hp->and11yoffset = starpu_getenv_float_default("STARPU_HETEROPRIO_URT_DOT_DIFF_11_ARCH_DIFF_OFFSET", 1.0); hp->ANTnodPond = starpu_getenv_float_default("STARPU_HETEROPRIO_URTS_TIME_COMBINATION_NOD_MULTIPLIER", 0.3); hp->ANTexpVal = starpu_getenv_float_default("STARPU_HETEROPRIO_URTS_TIME_COMBINATION_EXP_SELECTIVITY", 0.5); unsigned idx_prio; for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) _heteroprio_bucket_init(&hp->buckets[idx_prio]); if(hp->use_locality) { hp->nb_wgroups = LAHETEROPRIO_MAX_WORKER_GROUPS; unsigned idx_wgroup; for(idx_wgroup = 0 ; idx_wgroup < LAHETEROPRIO_MAX_WORKER_GROUPS ; ++idx_wgroup) { hp->arch_of_wgroups[idx_wgroup] = STARPU_ANY_WORKER; // We set STARPU_ANY_WORKER = default (none) value } memset(hp->bucket_mapping_per_arch_index, -1, sizeof(unsigned)*STARPU_NB_TYPES*HETEROPRIO_MAX_PRIO); } void (*callback_sched)(unsigned) = starpu_sched_ctx_get_sched_policy_callback(sched_ctx_id); if(callback_sched) { if(hp->use_auto_calibration) { _STARPU_DISP("[HETEROPRIO][INITIALIZATION] Warning: a custom sched init function has been detected while being in auto calibration mode (STARPU_HETEROPRIO_USE_AUTO_CALIBRATION). Custom changes to priority mapping will be overwritten.\n"); } callback_sched(sched_ctx_id); } else { default_init_sched(sched_ctx_id); } check_heteroprio_mapping(hp); if(hp->use_auto_calibration) { unsigned arch; for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) { hp->prio_average_NOD[idx_prio] = 0.f; hp->prio_average_NOD_count[idx_prio] = 0; hp->prio_average_URT_count[idx_prio] = 0; hp->prio_overall_proportion[idx_prio] = 0.f; hp->prio_overall_proportion_count[idx_prio] = 0; hp->prio_arch_proportion_count[idx_prio] = 0; hp->prio_average_successors_best_time_sum[idx_prio] = 0.f; hp->prio_average_successors_best_time_sum_count[idx_prio] = 0; hp->prio_average_best[idx_prio] = 0.f; hp->prio_average_best_count[idx_prio] = 0; for(arch=0;archprio_average_URT[arch][idx_prio] = 0.f; hp->prio_average_time_arch[arch][idx_prio] = 0.f; hp->prio_average_time_arch_count[arch][idx_prio] = 0; hp->prio_arch_proportion[arch][idx_prio] = 0.f; if(arch != STARPU_CPU_WORKER) { starpu_heteroprio_set_arch_slow_factor_hp(hp, arch, idx_prio, 1.0f); } } starpu_heteroprio_set_faster_arch_hp(hp, STARPU_CPU_WORKER, idx_prio); } starpu_heteroprio_clear_mapping_hp(hp); for(arch=0;archfreeze_data_gathering) { _starpu_graph_record = 1; // allow starpu graph recording } } } static void register_arch_times(struct _starpu_heteroprio_data *hp, unsigned arch, double busy_time, double free_time); static void deinitialize_heteroprio_policy(unsigned sched_ctx_id) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); /* Ensure there are no more tasks */ STARPU_ASSERT(hp->total_tasks_in_buckets == 0); unsigned arch_index; for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) { if(hp->use_locality) { STARPU_ASSERT(hp->nb_remaining_tasks_per_arch_index[arch_index] == 0); } else { STARPU_ASSERT(hp->nb_remaining_tasks_per_arch_index[arch_index] == 0); STARPU_ASSERT(hp->nb_prefetched_tasks_per_arch_index[arch_index] == 0); } } unsigned idx_prio; for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) { STARPU_ASSERT(hp->buckets[idx_prio].tasks_queue_ntasks == 0); // potentially not wanted if use_la==0 _heteroprio_bucket_release(&hp->buckets[idx_prio]); } if(hp->use_locality) { #ifdef LAHETEROPRIO_PRINT_STAT _STARPU_MSG("[LASTATS] nb tasks %ld\n", lastats.nb_tasks); { _STARPU_MSG("[LASTATS] Tasks pushed per workers of kind:\n"); unsigned nb_tasks = 0; unsigned worker_id; for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) { const unsigned worker_arch = hp->workers_heteroprio[worker_id].arch_index; const unsigned nb_prios = hp->nb_prio_per_arch_index[worker_arch]; _STARPU_MSG("[LASTATS] "); for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) { fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.nb_tasks_per_worker[worker_id][idx_prio]); nb_tasks += lastats.nb_tasks_per_worker[worker_id][idx_prio]; } fprintf(stderr, "\n"); } _STARPU_MSG("[LASTATS] Total tasks pushed per workers of kind: %u\n\n", nb_tasks); } { _STARPU_MSG("[LASTATS] Tasks pushed per workers to mem node:\n"); unsigned nb_tasks = 0; _STARPU_MSG("[LASTATS] Master: "); unsigned idx_mem; for (idx_mem = 0; idx_mem < hp->nb_wgroups; ++idx_mem) { fprintf(stderr, "[%3u] %9ld ", idx_mem, lastats.push_redirect[0][idx_mem]); nb_tasks += lastats.push_redirect[0][idx_mem]; } fprintf(stderr, "\n"); _STARPU_MSG("[LASTATS] Total tasks pushed per workers to mem node: %u\n\n", nb_tasks); } { unsigned worker_id; for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) { _STARPU_MSG("[LASTATS] %u: ", worker_id); unsigned idx_mem; for (idx_mem = 0; idx_mem < hp->nb_wgroups; ++idx_mem) { fprintf(stderr, "[%3u] %9ld ", idx_mem, lastats.push_redirect[worker_id + 1][idx_mem]); } fprintf(stderr, "\n"); } fprintf(stderr, "\n"); } { _STARPU_MSG("[LASTATS] Tasks per wgroup:\n"); unsigned nb_tasks = 0; unsigned idx_wgroup; for (idx_wgroup = 0; idx_wgroup < hp->nb_wgroups; ++idx_wgroup) { const unsigned wgroup_arch = hp->arch_of_wgroups[idx_wgroup]; const unsigned nb_prios = hp->nb_prio_per_arch_index[wgroup_arch]; _STARPU_MSG("[LASTATS] "); for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) { fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.nb_tasks_per_wgroup[idx_wgroup][idx_prio]); nb_tasks += lastats.nb_tasks_per_wgroup[idx_wgroup][idx_prio]; } fprintf(stderr, "\n"); } _STARPU_MSG("[LASTATS] Total tasks pushed per wgroup: %u\n\n", nb_tasks); } { _STARPU_MSG("[LASTATS] Tasks skipt per workers:\n"); unsigned worker_id; for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) { const unsigned worker_arch = hp->workers_heteroprio[worker_id].arch_index; const unsigned nb_prios = hp->nb_prio_per_arch_index[worker_arch]; _STARPU_MSG("[LASTATS] "); for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) { fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.task_skipt_due_to_factor_per_worker[worker_id][idx_prio]); } fprintf(stderr, "\n"); } fprintf(stderr, "\n"); } { _STARPU_MSG("[LASTATS] Tasks list empty per workers:\n"); unsigned worker_id; for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) { const unsigned worker_arch = hp->workers_heteroprio[worker_id].arch_index; const unsigned nb_prios = hp->nb_prio_per_arch_index[worker_arch]; _STARPU_MSG("[LASTATS] "); for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) { fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.task_list_empty_per_worker[worker_id][idx_prio]); } fprintf(stderr, "\n"); } fprintf(stderr, "\n"); } { _STARPU_MSG("[LASTATS] Tasks stolen per workers:\n"); unsigned nb_tasks = 0; unsigned worker_id; for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) { const unsigned worker_arch = hp->workers_heteroprio[worker_id].arch_index; const unsigned nb_prios = hp->nb_prio_per_arch_index[worker_arch]; _STARPU_MSG("[LASTATS] "); for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) { fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.task_stolen_per_worker[worker_id][idx_prio]); nb_tasks += lastats.task_stolen_per_worker[worker_id][idx_prio]; } fprintf(stderr, "\n"); } _STARPU_MSG("[LASTATS] Total tasks stolen per worker: %u\n\n", nb_tasks); } { _STARPU_MSG("[LASTATS] Tasks stolen in wgroup:\n"); unsigned nb_tasks = 0; unsigned idx_wgroup; for (idx_wgroup = 0; idx_wgroup < hp->nb_wgroups; ++idx_wgroup) { const unsigned wgroup_arch = hp->arch_of_wgroups[idx_wgroup]; const unsigned nb_prios = hp->nb_prio_per_arch_index[wgroup_arch]; _STARPU_MSG("[LASTATS] "); for (idx_prio = 0; idx_prio < nb_prios; ++idx_prio) { fprintf(stderr, "[%3u] %9ld ", idx_prio, lastats.task_stolen_in_wgroup[idx_wgroup][idx_prio]); nb_tasks += lastats.task_stolen_in_wgroup[idx_wgroup][idx_prio]; } fprintf(stderr, "\n"); } _STARPU_MSG("[LASTATS] Total tasks stolen in wgroup: %u\n\n", nb_tasks); } { _STARPU_MSG("[LASTATS] Tasks push/pop different wgroup:\n"); unsigned nb_tasks = 0; unsigned worker_id; for (worker_id = 0; worker_id < starpu_worker_get_count(); ++worker_id) { _STARPU_MSG("[LASTATS] %u: ", worker_id); unsigned idx_mem; for (idx_mem = 0; idx_mem < hp->nb_wgroups; ++idx_mem) { fprintf(stderr, "[%3u] %9ld ", idx_mem, lastats.pop_redirect[worker_id][idx_mem]); nb_tasks += lastats.pop_redirect[worker_id][idx_mem]; } fprintf(stderr, "\n"); } _STARPU_MSG("[LASTATS] Total tasks push/pop different wgroup: %u\n\n", nb_tasks); } { _STARPU_MSG("[LASTATS] push strategy used:\n"); unsigned worker_id; unsigned counter[PUSH_NB_AUTO] = { 0 }; unsigned idx_more_used = 0; for (worker_id = 0; worker_id <= starpu_worker_get_count(); ++worker_id) { _STARPU_MSG("[LASTATS] %u: ", worker_id); unsigned idx_strategy; for (idx_strategy = 0; idx_strategy < PUSH_NB_AUTO; ++idx_strategy) { fprintf(stderr, "[%3u] %9ld ", idx_strategy, lastats.push_to_use[worker_id][idx_strategy]); counter[idx_strategy] += lastats.push_to_use[worker_id][idx_strategy]; if (counter[idx_strategy] > counter[idx_more_used]) { idx_more_used = idx_strategy; } } fprintf(stderr, "\n"); } _STARPU_MSG("[LASTATS] More used push: %u\n\n", idx_more_used); } { _STARPU_MSG("[LASTATS] correct MN pushes:\n"); unsigned idx_strategy; for (idx_strategy = 0; idx_strategy < PUSH_NB_AUTO; ++idx_strategy) { _STARPU_MSG("[LASTATS][%u] %u \n", idx_strategy, hp->pushStrategyHistory[idx_strategy]); } } #endif } STARPU_PTHREAD_MUTEX_DESTROY(&hp->policy_mutex); if(hp->use_locality) { STARPU_PTHREAD_MUTEX_DESTROY(&hp->push_history_mutex); } if(hp->use_auto_calibration) { STARPU_PTHREAD_MUTEX_DESTROY(&hp->auto_calibration_mutex); } if(hp->use_auto_calibration && !hp->freeze_data_gathering) { // update autoheteroprio data with free and busy worker time for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) { register_arch_times(hp, arch_index, hp->current_arch_busy_time[arch_index], hp->current_arch_free_time[arch_index]); } starpu_autoheteroprio_save_task_data(hp); } _starpu_graph_record = 0; // disable starpu graph recording (that may have been activated due to hp->use_auto_calibration) free(hp); } static void add_workers_heteroprio_policy(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); // Retrieve current time to set as starting time for each worker struct timespec tsnow; _starpu_clock_gettime(&tsnow); const double now = starpu_timing_timespec_to_us(&tsnow); unsigned i; for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; memset(&hp->workers_heteroprio[workerid], 0, sizeof(hp->workers_heteroprio[workerid])); if(!hp->use_locality) { /* if the worker has already belonged to this context the queue and the synchronization variables have been already initialized */ starpu_st_prio_deque_init(&hp->workers_heteroprio[workerid].tasks_queue); } enum starpu_worker_archtype arch_index = starpu_worker_get_type(workerid); hp->workers_heteroprio[workerid].arch_index = arch_index; hp->workers_heteroprio[workerid].arch_type = starpu_heteroprio_types_to_arch(arch_index); hp->nb_workers_per_arch_index[hp->workers_heteroprio[workerid].arch_index]++; hp->last_hook_exec_time[workerid] = now; } } static void remove_workers_heteroprio_policy(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); if(!hp->use_locality) { unsigned i; for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; starpu_st_prio_deque_destroy(&hp->workers_heteroprio[workerid].tasks_queue); } } } static unsigned get_best_mem_node(struct starpu_task *task, struct _starpu_heteroprio_data *hp, const enum laheteroprio_push_strategy pushStrategy) { const unsigned workerid = starpu_worker_get_id(); unsigned best_mem_node; STARPU_ASSERT(task != NULL); if (pushStrategy != PUSH_WORKER) { if(!hp->warned_change_nb_memory_nodes && starpu_memory_nodes_get_count() != hp->nb_memory_nodes) { _STARPU_MSG("[HETEROPRIO][INITIALIZATION][get_best_mem_node] Warning: current memory node number is different from the one retrieved at initialization.\n\ This warning will only be displayed once.\n"); hp->warned_change_nb_memory_nodes = 1; } const unsigned nnodes = hp->nb_memory_nodes; // == starpu_memory_nodes_get_count() if number of mem nodes didn't change during execution if (pushStrategy == PUSH_LcS) { int node_to_worker[LAHETEROPRIO_MAX_WORKER_GROUPS]; unsigned idx_worker; for (idx_worker = 0; idx_worker < starpu_worker_get_count(); ++idx_worker) { // overwrite, we simply need one worker per mem node node_to_worker[starpu_worker_get_memory_node(idx_worker)] = idx_worker; } double bestTransferTime = starpu_task_expected_data_transfer_time_for(task, node_to_worker[0]); best_mem_node = 0; unsigned idx_node; for (idx_node = 1; idx_node < nnodes; ++idx_node) { const double transferTime = starpu_task_expected_data_transfer_time_for(task, node_to_worker[idx_node]); if (transferTime < bestTransferTime) { bestTransferTime = transferTime; best_mem_node = idx_node; } } } else if (pushStrategy == PUSH_LS_SDH || pushStrategy == PUSH_LS_SDH2) { size_t max_size_so_far = 0; unsigned idx_max_size = 0; const unsigned wgroupid = (workerid == (unsigned)-1 ? hp->master_tasks_queue_idx : hp->workers_laheteroprio_wgroup_index[workerid]); size_t data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; assert(nnodes <= LAHETEROPRIO_MAX_WORKER_GROUPS); unsigned idx_data; for (idx_data = 0; idx_data < STARPU_TASK_GET_NBUFFERS(task); ++idx_data) { const starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, idx_data); const size_t raw_data_size = starpu_data_get_size(handle); const unsigned is_read = (STARPU_TASK_GET_MODE(task, idx_data) == STARPU_R); // Easy: size_t data_size; if (pushStrategy == PUSH_LS_SDH) { data_size = raw_data_size; } else { assert(pushStrategy == PUSH_LS_SDH2); data_size = (is_read ? raw_data_size : raw_data_size *raw_data_size); } unsigned idx_node; for (idx_node = 0; idx_node < nnodes; ++idx_node) { if (starpu_data_is_on_node(handle, idx_node)) { data_per_mem_node[idx_node] += data_size; if (max_size_so_far < data_per_mem_node[idx_node] || (max_size_so_far == data_per_mem_node[idx_node] && idx_node == wgroupid)) { max_size_so_far = data_per_mem_node[idx_node]; idx_max_size = idx_node; } } } } best_mem_node = idx_max_size; } else if (pushStrategy == PUSH_LC_SMWB) { const unsigned wgroupid = (workerid == (unsigned)-1 ? hp->master_tasks_queue_idx : hp->workers_laheteroprio_wgroup_index[workerid]); assert(nnodes <= LAHETEROPRIO_MAX_WORKER_GROUPS); const unsigned N = STARPU_TASK_GET_NBUFFERS(task); unsigned data_exist_every_where[128] = { 0 }; unsigned nb_data_exist_every_where = 0; { unsigned idx_data; for (idx_data = 0; idx_data < N; ++idx_data) { const starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, idx_data); data_exist_every_where[idx_data] = 1; unsigned idx_node; for (idx_node = 0; idx_node < nnodes; ++idx_node) { if (starpu_data_is_on_node(handle, idx_node)) { // Ok } else { data_exist_every_where[idx_data] = 0; break; } } if (data_exist_every_where[idx_data]) { nb_data_exist_every_where += 1; } } } assert(N <= 128); unsigned data_is_read[128] = { 0 }; unsigned Nw = 0; size_t total_size = 0; size_t total_size_in_read = 0; size_t total_size_in_write = 0; size_t data_sizes[128] = { 0 }; unsigned data_Ri[128] = { 0 }; size_t data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; size_t data_per_mem_node_in_read[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; size_t data_per_mem_node_in_write[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; unsigned nb_data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; unsigned nb_data_in_w_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; { unsigned idx_data; for (idx_data = 0; idx_data < N; ++idx_data) { if (data_exist_every_where[idx_data] == 0) { const starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, idx_data); data_sizes[idx_data] = starpu_data_get_size(handle); data_is_read[idx_data] = (STARPU_TASK_GET_MODE(task, idx_data) == STARPU_R); total_size += data_sizes[idx_data]; if (data_is_read[idx_data]) { total_size_in_read += data_sizes[idx_data]; } else { total_size_in_write += data_sizes[idx_data]; Nw += 1; } unsigned idx_node; for (idx_node = 0; idx_node < nnodes; ++idx_node) { if (starpu_data_is_on_node(handle, idx_node)) { data_Ri[idx_data] += 1; data_per_mem_node[idx_node] += data_sizes[idx_data]; nb_data_per_mem_node[idx_node] += 1; if (data_is_read[idx_data]) { data_per_mem_node_in_read[idx_node] += data_sizes[idx_data]; } else { data_per_mem_node_in_write[idx_node] += data_sizes[idx_data]; nb_data_in_w_per_mem_node[idx_node] += 1; } } } } } } double max_score_so_far = 0; unsigned idx_max_score = 0; unsigned idx_node; for (idx_node = 0; idx_node < nnodes; ++idx_node) { double current_score = 0; current_score = (data_per_mem_node_in_read[idx_node]) + 1000. *(data_per_mem_node_in_write[idx_node] *nb_data_in_w_per_mem_node[idx_node]); if (max_score_so_far < current_score || (max_score_so_far == current_score && idx_node == wgroupid)) { max_score_so_far = current_score; idx_max_score = idx_node; } } best_mem_node = idx_max_score; } else { const unsigned wgroupid = (workerid == (unsigned)-1 ? hp->master_tasks_queue_idx : hp->workers_laheteroprio_wgroup_index[workerid]); assert(nnodes <= LAHETEROPRIO_MAX_WORKER_GROUPS); const unsigned N = STARPU_TASK_GET_NBUFFERS(task); assert(N <= 128); unsigned data_is_read[128] = { 0 }; unsigned Nw = 0; size_t total_size = 0; size_t total_size_in_read = 0; size_t total_size_in_write = 0; size_t data_sizes[128] = { 0 }; unsigned data_Ri[128] = { 0 }; size_t data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; size_t data_per_mem_node_in_read[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; size_t data_per_mem_node_in_write[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; unsigned nb_data_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; unsigned nb_data_in_w_per_mem_node[LAHETEROPRIO_MAX_WORKER_GROUPS] = { 0 }; { unsigned idx_data; for (idx_data = 0; idx_data < N; ++idx_data) { const starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, idx_data); data_sizes[idx_data] = starpu_data_get_size(handle); data_is_read[idx_data] = (STARPU_TASK_GET_MODE(task, idx_data) == STARPU_R); total_size += data_sizes[idx_data]; if (data_is_read[idx_data]) { total_size_in_read += data_sizes[idx_data]; } else { total_size_in_write += data_sizes[idx_data]; Nw += 1; } unsigned idx_node; for (idx_node = 0; idx_node < nnodes; ++idx_node) { if (starpu_data_is_on_node(handle, idx_node)) { data_Ri[idx_data] += 1; data_per_mem_node[idx_node] += data_sizes[idx_data]; nb_data_per_mem_node[idx_node] += 1; if (data_is_read[idx_data]) { data_per_mem_node_in_read[idx_node] += data_sizes[idx_data]; } else { data_per_mem_node_in_write[idx_node] += data_sizes[idx_data]; nb_data_in_w_per_mem_node[idx_node] += 1; } } } } } double max_score_so_far = DBL_MAX; unsigned idx_max_score = 0; unsigned idx_node; for (idx_node = 0; idx_node < nnodes; ++idx_node) { /* const enum starpu_node_kind memnode_kind = starpu_node_get_kind(idx_node); if(memnode_kind == STARPU_DISK_RAM) { continue; // a disk has no associated worker } */ double current_score = 0; assert(pushStrategy == PUSH_LS_SDHB); current_score = (total_size_in_read - data_per_mem_node_in_read[idx_node]) + ((total_size_in_write - data_per_mem_node_in_write[idx_node]) *(2. - (double)(Nw) / (double)(N))); if (max_score_so_far > current_score || (max_score_so_far == current_score && idx_node == wgroupid)) { max_score_so_far = current_score; idx_max_score = idx_node; } } best_mem_node = idx_max_score; } #ifdef LAHETEROPRIO_PRINT_STAT lastats.push_redirect[workerid + 1][best_mem_node] += 1; #endif // LAHETEROPRIO_PRINT_STAT } else { if (workerid == (unsigned)-1) { /*master thread */ best_mem_node = hp->master_tasks_queue_idx; } else { const unsigned wgroupid = hp->workers_laheteroprio_wgroup_index[workerid]; best_mem_node = wgroupid; } } return best_mem_node; } static void print_priorities(struct _starpu_heteroprio_data *hp) { STARPU_ASSERT(hp->autoheteroprio_print_prio_after_ordering); starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&hp->auto_calibration_mutex); starpu_worker_relax_off(); fprintf(stderr, "Updated task priorities :\n"); unsigned arch; for(arch=0;archfound_codelet_names_on_arch[arch];++p) { fprintf(stderr, "%s ", hp->found_codelet_names[hp->prio_mapping_per_arch_index[arch][p]]); } fprintf(stderr, "\n"); } STARPU_PTHREAD_MUTEX_UNLOCK(&hp->auto_calibration_mutex); } static double get_autoheteroprio_arch_busy_proportion(struct _starpu_heteroprio_data *hp, unsigned arch) { double total = hp->average_arch_busy_time[arch] + hp->average_arch_free_time[arch]; if(total <= 0) { // if we have no info on workers times, we assume they are never busy (near-arbitrary choice) return 0; } return hp->average_arch_busy_time[arch]/total; } static double get_autoheteroprio_estimated_time(struct _starpu_heteroprio_data *hp, unsigned priority, unsigned arch) { if(hp->prio_arch_has_time_info[arch][priority]) { return hp->prio_average_time_arch[arch][priority]; } if(arch_can_execute_prio(hp, arch, priority)) { // if arch is legit but we have no time information, return a decent arbitrary time return AUTOHETEROPRIO_FAIR_TIME; } if(hp->autoheteroprio_time_estimation_policy == 0) { return AUTOHETEROPRIO_LONG_TIME; } else if(hp->autoheteroprio_time_estimation_policy == 1) { // we can't execute this task on this arch, we therefore act as if it would be executed as fast as on the fastest architecture double bestTime = AUTOHETEROPRIO_EXTREMELY_LONG_TIME; unsigned a; for(a=0;aautoheteroprio_time_estimation_policy == 0 || hp->autoheteroprio_time_estimation_policy == 1); return 0.; // to get rid of warning } } static double get_autoheteroprio_prio_proportion(struct _starpu_heteroprio_data *hp, unsigned priority) { if(hp->prio_overall_proportion_count[priority] > 0) { return hp->prio_overall_proportion[priority]; } // no prio of this type has ever been recorded return 0; } // get normalized time (no unit, with average best arch executes tasks in 1.0) static double get_autoheteroprio_normalized_time(struct _starpu_heteroprio_data *hp, unsigned priority, unsigned arch) { double sum = 0.f; unsigned p; for(p=0;pfound_codelet_names_length;++p) { sum += get_autoheteroprio_prio_proportion(hp, p) * get_best_autoheteroprio_estimated_time(hp, p); } if(sum <= 0.f) { return 1.0; } return get_autoheteroprio_estimated_time(hp, priority, arch) / sum; } static double get_autoheteroprio_prio_arch_proportion(struct _starpu_heteroprio_data *hp, unsigned priority, unsigned arch) { if(hp->prio_arch_proportion_count[priority] > 0) { return hp->prio_arch_proportion[arch][priority]; } // this prio has never been executed on this arch return 0; } static double get_autoheteroprio_successors_best_time_sum(struct _starpu_heteroprio_data *hp, unsigned priority) { if(hp->prio_average_successors_best_time_sum_count[priority] > 0) { return hp->prio_average_successors_best_time_sum[priority]; } return AUTOHETEROPRIO_FAIR_TIME; } // best execution time of a prio //static double get_autoheteroprio_best_time(struct _starpu_heteroprio_data *hp, unsigned priority) //{ // if(hp->prio_average_best_count[priority] > 0) // { // return hp->prio_average_best[priority]; // } // // return AUTOHETEROPRIO_FAIR_TIME; //} static double get_autoheteroprio_NOD(struct _starpu_heteroprio_data *hp, unsigned priority) { if(hp->prio_average_NOD_count[priority] > 0) { return hp->prio_average_NOD[priority]; } return 1.0f; } static double get_autoheteroprio_URT(struct _starpu_heteroprio_data *hp, unsigned arch, unsigned priority) { if(hp->prio_average_URT_count[priority] > 0) { return hp->prio_average_URT[arch][priority]; } return AUTOHETEROPRIO_FAIR_TIME; } static double reLU(double x) { if(x<0.0f) { return 0.0f; } return x; } static double rpg(double x) { if(x > 1.0f) { return 1.0f; } return sqrt(x)*sqrt(2.0f-x); } struct prio_score { unsigned index; double score; }; static int compare_prio_scores(const void* elem1, const void* elem2) { if(((const struct prio_score*)elem1)->score > ((const struct prio_score*)elem2)->score) return -1; return ((const struct prio_score*)elem1)->score < ((const struct prio_score*)elem2)->score; } static void order_priorities(struct _starpu_heteroprio_data *hp) { STARPU_ASSERT(use_auto_mode); STARPU_ASSERT(hp->use_auto_calibration); // priorities should only be changed during execution if in auto calibration mode struct prio_score prio_arch[STARPU_NB_TYPES][HETEROPRIO_MAX_PRIO]; unsigned prio_arch_index[STARPU_NB_TYPES] = {0}; // lock the global policy mutex _starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&hp->auto_calibration_mutex); starpu_worker_relax_off(); unsigned p, a; for(p=0;pfound_codelet_names_length;++p) { int worst_arch = -1; double worstTime = -1.0f; int second_worst_arch = -1; double secondWorstTime = -1.0f; // Find the worst architecture and the second worst if there is one for(a = 0; a < STARPU_NB_TYPES; ++a) { if((hp->buckets[p].valid_archs & starpu_heteroprio_types_to_arch(a)) == 0) continue; const double arch_time = get_autoheteroprio_normalized_time(hp, p, a); if(worstTime < arch_time) { second_worst_arch = worst_arch; secondWorstTime = worstTime; worst_arch = a; worstTime = arch_time; } else if(secondWorstTime < arch_time) { second_worst_arch = a; secondWorstTime = arch_time; } } // Ensure that there is at least one arch that can execute priority STARPU_ASSERT(worst_arch != -1); const double worstArchTaskProportion = get_autoheteroprio_prio_arch_proportion(hp, p, worst_arch); const double URT_worst = get_autoheteroprio_URT(hp, worst_arch, p); double secondWorstArchTaskProportion, URT_secondWorst; if(second_worst_arch == -1) { // If there's no second worst set values to worst possible values secondWorstTime = AUTOHETEROPRIO_EXTREMELY_LONG_TIME; secondWorstArchTaskProportion = 0.f; URT_secondWorst = 0.f; } else { secondWorstTime = get_autoheteroprio_normalized_time(hp, p, second_worst_arch); secondWorstArchTaskProportion = get_autoheteroprio_prio_arch_proportion(hp, p, second_worst_arch); URT_secondWorst = get_autoheteroprio_URT(hp, second_worst_arch, p); } // Compute scores for(a=0;abuckets[p].valid_archs & starpu_heteroprio_types_to_arch(a)) { double otherTime, otherArchTaskProportion, URT_other; unsigned prio = prio_arch_index[a]++; if(a == (unsigned) worst_arch) { // Compare the worst architecture to the second worst otherTime = secondWorstTime; URT_other = URT_secondWorst; otherArchTaskProportion = secondWorstArchTaskProportion; } else { // Compare to the worst architecture otherTime = worstTime; URT_other = URT_worst; otherArchTaskProportion = worstArchTaskProportion; } const double need_other = 1.0f - otherArchTaskProportion; double NOD = get_autoheteroprio_NOD(hp, p); double sum = get_autoheteroprio_successors_best_time_sum(hp, p); double ownTime = get_autoheteroprio_normalized_time(hp, p, a); double archDiff = otherTime - ownTime; double archRelDiff = otherTime/ownTime; double ownArchTaskProportion = get_autoheteroprio_prio_arch_proportion(hp, p, a); double URT_own = get_autoheteroprio_URT(hp, a, p); double need_own = 1.0f - get_autoheteroprio_arch_busy_proportion(hp, a); double archNeedDiff = need_own-need_other; double URT = (URT_own*need_own + URT_other*need_other); prio_arch[a][prio].index = p; if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_NOD_TIME_COMBINATION) { double relDiff = archRelDiff>1.0f?archRelDiff:1.0/archRelDiff; double multiplier = exp(-hp->NTexpVal*(relDiff-1)*(relDiff-1)); prio_arch[a][prio].score = archDiff + hp->NTnodPond*multiplier*NOD; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_BEST_NODS_SCORE || hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_BEST_NODS) { // TODO, implement BEST_NODS double multiplier = exp(-hp->BNexpVal*(archDiff)*(archDiff)); if(archDiff > 0.0f) { // my arch is faster multiplier = 1.0f; } multiplier = 2.0f*multiplier - 1.0f; // bad diff becomes -1, good or equal diff 1 prio_arch[a][prio].score = multiplier*NOD; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_PURE) { prio_arch[a][prio].score = URT; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT) { prio_arch[a][prio].score = hp->URTurt * URT + archDiff; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_2) { prio_arch[a][prio].score = hp->URT2urt * URT + archDiff + hp->URT2prop * reLU(ownArchTaskProportion*otherArchTaskProportion*archNeedDiff); } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_PURE) { prio_arch[a][prio].score = URT*archDiff; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_PURE_2) { prio_arch[a][prio].score = (1.0f + URT)*archDiff; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE) { prio_arch[a][prio].score = URT*archRelDiff; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_REL_DIFF_PURE_2) { prio_arch[a][prio].score = (1.0f + URT)*archRelDiff; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_2) { prio_arch[a][prio].score = (1.0f + URT)*archDiff + hp->and2pond * ownTime * archNeedDiff; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_3) { prio_arch[a][prio].score = (1.0f + URT)*archDiff + hp->and3pond * ownTime * reLU(archNeedDiff); } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_4) { prio_arch[a][prio].score = (1.0f + URT)*archDiff - hp->and4pond * ownTime * reLU(-archNeedDiff); } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_5) { prio_arch[a][prio].score = (hp->and5xoffset + URT) * (hp->and5yoffset + archDiff); } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_6) { prio_arch[a][prio].score = (1.0f + URT)*log1p(exp(archDiff)); } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_7) { prio_arch[a][prio].score = rpg(URT)*(1+URT)*(1+archDiff)+(1-rpg(URT))*(-log1p(exp(-archDiff))); } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_8) { prio_arch[a][prio].score = (1/(1+exp(-URT))-0.5)*(1+URT)*(1+archDiff)+(1/(1+exp(-1/URT))-0.5)*(-exp(-archDiff)); } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_9) { prio_arch[a][prio].score = log(hp->and9xoffset+URT)*atan(archDiff+hp->and9yoffset*URT); } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_10) { prio_arch[a][prio].score = (hp->and10xoffset+URT)*atan(archDiff) + hp->and10yoffset*URT; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URT_DOT_DIFF_11) { prio_arch[a][prio].score = (hp->and11xoffset+URT)*(archDiff+hp->and11yoffset*URT); } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_PER_SECONDS) { prio_arch[a][prio].score = URT / ownTime; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_PER_SECONDS_2) { prio_arch[a][prio].score = (URT + archDiff) / ownTime; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_PER_SECONDS_DIFF) { prio_arch[a][prio].score = URT / ownTime + archDiff; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_TIME_RELEASED_DIFF) { prio_arch[a][prio].score = URT*(sum+archDiff)/ownTime; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_URTS_TIME_COMBINATION) { double relDiff = archRelDiff>1.0f?archRelDiff:1.0/archRelDiff; double multiplier = exp(-hp->ANTexpVal*(relDiff-1)*(relDiff-1)); prio_arch[a][prio].score = archDiff + hp->ANTnodPond*multiplier*URT; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_NODS_PER_SECOND) { prio_arch[a][prio].score = NOD/ownTime; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_NODS_TIME_RELEASED) { prio_arch[a][prio].score = NOD*sum/ownTime; } else if(hp->autoheteroprio_priority_ordering_policy == STARPU_HETEROPRIO_NODS_TIME_RELEASED_DIFF) { prio_arch[a][prio].score = NOD*(sum+archDiff)/ownTime; } else { _STARPU_MSG("[AUTOHETEROPRIO] Warning: unknown ordering policy.\n"); prio_arch[a][prio].score = 0; } if(!hp->freeze_data_gathering && hp->prio_average_time_arch_count[a][p] < AUTOHETEROPRIO_RELEVANT_SAMPLE_SIZE) { // if we dont have enough data on execution time, we push execution on it by increasing the score prio_arch[a][prio].score += 99999999.; } } } } for(a=0;afound_codelet_names_on_arch[a], sizeof(struct prio_score), compare_prio_scores); } starpu_heteroprio_clear_mapping_hp(hp); for(a=0;afound_codelet_names_on_arch[a];++p) { starpu_heteroprio_set_mapping_hp(hp, a, p, prio_arch[a][p].index); } } /* // uncomment to print task names ordered by priority (TODO : use environment variable) printf("priorities sorted:\n"); printf("CPU:\n"); for(p=0;pfound_codelet_names_on_arch[STARPU_CPU_WORKER];++p) { printf("%d : %s bucket=%d (score = %f)\n", p, hp->found_codelet_names[prio_arch[STARPU_CPU_WORKER][p].index], prio_arch[STARPU_CPU_WORKER][p].index, prio_arch[STARPU_CPU_WORKER][p].score); } printf("GPU:\n"); for(p=0;pfound_codelet_names_on_arch[STARPU_CUDA_WORKER];++p) { printf("%d : %s bucket=%d (score = %f)\n", p, hp->found_codelet_names[prio_arch[STARPU_CUDA_WORKER][p].index], prio_arch[STARPU_CUDA_WORKER][p].index, prio_arch[STARPU_CUDA_WORKER][p].score); } */ STARPU_PTHREAD_MUTEX_UNLOCK(&hp->auto_calibration_mutex); } // used to get the name of a codelet, considering a codelet grouping strategy static const char *_heteroprio_get_codelet_name(enum autoheteroprio_codelet_grouping_strategy strategy, struct starpu_codelet *cl) { const char *name = NULL; switch(strategy) { case BY_PERF_MODEL_OR_NAME: name = _starpu_codelet_get_model_name(cl); break; case BY_NAME_ONLY: name = _starpu_codelet_get_name(cl); break; } return name ? name : AUTOHETEROPRIO_NO_NAME; } // used by get_task_auto_priority for knowing if a submitted codelet equals an other static int are_same_codelets(struct _starpu_heteroprio_data *hp, const struct starpu_task *task, const char name[CODELET_MAX_NAME_LENGTH], unsigned valid_archs) { unsigned task_valid_archs = task->where >= 0 ? (unsigned) task->where : task->cl->where; if(task_valid_archs != valid_archs) { // are not same codelet, because different architectures return 0; } const char *task_name = _heteroprio_get_codelet_name(hp->codelet_grouping_strategy, task->cl); return strncmp(name, task_name, CODELET_MAX_NAME_LENGTH) == 0; } static int get_task_auto_priority(struct _starpu_heteroprio_data *hp, const struct starpu_task *task) { STARPU_ASSERT(use_auto_mode); STARPU_ASSERT(hp->use_auto_calibration); STARPU_ASSERT(hp->found_codelet_names_length <= HETEROPRIO_MAX_PRIO); if(task->cl->where == STARPU_NOWHERE) { return -1; } const char *name = _heteroprio_get_codelet_name(hp->codelet_grouping_strategy, task->cl); starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&hp->auto_calibration_mutex); starpu_worker_relax_off(); unsigned current_priority; for(current_priority = 0;current_priorityfound_codelet_names_length;++current_priority) { if(are_same_codelets(hp, task, &hp->found_codelet_names[current_priority][0], hp->buckets[current_priority].valid_archs)) { STARPU_PTHREAD_MUTEX_UNLOCK(&hp->auto_calibration_mutex); return current_priority; } } // codelet's name does not exist in found_codelet_names, add it STARPU_ASSERT(hp->found_codelet_names_length < HETEROPRIO_MAX_PRIO); const unsigned found_codelet_names_length = hp->found_codelet_names_length; if(!task->cl->model) { // The codelet does not have a perf model _STARPU_DISP("[HETEROPRIO] Warning: codelet %s does not have a perfmodel. This may negatively impact heteroprio's auto prioritizing.\n", name); } unsigned archs[STARPU_NB_TYPES] = {0}; unsigned arch; for(arch=0;archauto_calibration_mutex); return found_codelet_names_length; } // checks that auto-heteroprio arrays are correctly set (for debugging purposes) //static void check_auto_heteroprio_mapping(struct _starpu_heteroprio_data *hp) //{ // // may be useful // (void) hp; //} static double get_job_NOD(struct _starpu_heteroprio_data *hp, struct _starpu_job *job) { STARPU_ASSERT(!hp->freeze_data_gathering); STARPU_ASSERT(_starpu_graph_record == 1); double NOD = 0.f; //STARPU_PTHREAD_MUTEX_LOCK(&job->sync_mutex); /*if(!job->tag) { STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); return 0; }*/ //STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); _starpu_graph_wrlock(); struct _starpu_graph_node *node = job->graph_node; if(!node) { // No information because the graph isn't available _starpu_graph_wrunlock(); return 0.f; } unsigned n; for(n=0;nn_outgoing;++n) { struct _starpu_graph_node *successor = node->outgoing[n]; // there is a node->outgoing_slot, but this ordering array does not seem useful here if(successor) { // successor may be NULL NOD += 1.f/(double)successor->n_incoming; } } _starpu_graph_wrunlock(); return NOD; } // get job's NRT (Normalized Released Time) static double get_job_NRT(struct _starpu_heteroprio_data *hp, struct _starpu_job *job, unsigned arch) { STARPU_ASSERT(!hp->freeze_data_gathering); STARPU_ASSERT(_starpu_graph_record == 1); double NOD = 0.f; //STARPU_PTHREAD_MUTEX_LOCK(&job->sync_mutex); /*if(!job->tag) { STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); return 0; }*/ //STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); _starpu_graph_wrlock(); struct _starpu_graph_node *node = job->graph_node; if(!node) { // No information because the graph isn't available _starpu_graph_wrunlock(); return 0.f; } unsigned n; for(n=0;nn_outgoing;++n) { struct _starpu_graph_node *successor = node->outgoing[n]; // there is a node->outgoing_slot, but this ordering array does not seem useful here if(successor) { // successor may be NULL struct _starpu_job *successor_job = successor->job; STARPU_PTHREAD_MUTEX_LOCK(&successor_job->sync_mutex); const struct starpu_task *successor_task = successor_job->task; STARPU_PTHREAD_MUTEX_UNLOCK(&successor_job->sync_mutex); if(successor_task->cl) { // if a codelet is associated to the task, we can count it in the NOD int successor_prio = get_task_auto_priority(hp, successor_task); double successor_arch_time; if(successor_prio == -1) { successor_arch_time = 0.f; } else { successor_arch_time = get_autoheteroprio_prio_arch_proportion(hp, successor_prio, arch) * get_autoheteroprio_normalized_time(hp, successor_prio, arch); } NOD += successor_arch_time/(double)successor->n_incoming; } } } _starpu_graph_wrunlock(); return NOD; } static void register_arch_times(struct _starpu_heteroprio_data *hp, unsigned arch, double busy_time, double free_time) { STARPU_ASSERT(!hp->freeze_data_gathering); double summed_busy_time = hp->average_arch_busy_time[arch] + busy_time; double summed_free_time = hp->average_arch_free_time[arch] + free_time; double max_time = STARPU_MAX(summed_busy_time, summed_free_time); double scale_to_apply = 1.0f; if(max_time > AUTOHETEROPRIO_MAX_WORKER_PROFILING_TIME) { scale_to_apply = AUTOHETEROPRIO_MAX_WORKER_PROFILING_TIME/max_time; } hp->average_arch_busy_time[arch] = summed_busy_time*scale_to_apply; hp->average_arch_free_time[arch] = summed_free_time*scale_to_apply; } // gets the lowest expected time between each architectures static double get_best_autoheteroprio_estimated_time(struct _starpu_heteroprio_data *hp, unsigned priority) { double time = 999999999999999.f; unsigned arch; for(arch=0;archtask); double time; if(task_priority == -1) { time = AUTOHETEROPRIO_DEFAULT_TASK_TIME; } else { time = get_best_autoheteroprio_estimated_time(hp, task_priority); } return time; } static double get_job_successors_best_time_sum(struct _starpu_heteroprio_data *hp, struct _starpu_job *job) { STARPU_ASSERT(!hp->freeze_data_gathering); STARPU_ASSERT(_starpu_graph_record == 1); double sum = 0.f; _starpu_graph_wrlock(); struct _starpu_graph_node *node = job->graph_node; if(!node) { // No information because the graph isn't available _starpu_graph_wrunlock(); return 0.f; } unsigned n; for(n=0;nn_outgoing;++n) { struct _starpu_graph_node *successor = node->outgoing[n]; // there is a node->outgoing_slot, but this ordering array does not seem useful here if(successor && successor->job && successor->job->task->cl) { // successor may be NULL sum += get_job_best_time(hp, successor->job); } } _starpu_graph_wrunlock(); return sum; } static void add_NOD_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority, double NOD) { STARPU_ASSERT(!hp->freeze_data_gathering); if(hp->prio_average_NOD_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) { ++hp->prio_average_NOD_count[task_priority]; } const unsigned count = hp->prio_average_NOD_count[task_priority]; hp->prio_average_NOD[task_priority] = hp->prio_average_NOD[task_priority] * (double)(count - 1) / (double)count + NOD / (double)count; } static void add_URTs_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority, double archs_URTs[STARPU_NARCH]) { STARPU_ASSERT(!hp->freeze_data_gathering); if(hp->prio_average_URT_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) { ++hp->prio_average_URT_count[task_priority]; } const unsigned count = hp->prio_average_URT_count[task_priority]; unsigned arch; for(arch=0;archprio_average_URT[arch][task_priority] = hp->prio_average_URT[arch][task_priority] * (double)(count - 1) / (double)count + archs_URTs[arch] / (double)count; } } static void register_execution_time(struct _starpu_heteroprio_data *hp, unsigned arch, unsigned task_priority, double time) { STARPU_ASSERT(!hp->freeze_data_gathering); if(hp->prio_average_time_arch_count[arch][task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) { ++hp->prio_average_time_arch_count[arch][task_priority]; } const unsigned count = hp->prio_average_time_arch_count[arch][task_priority]; hp->prio_average_time_arch[arch][task_priority] = hp->prio_average_time_arch[arch][task_priority] * (double)(count - 1) / (double)count + time / (double)count; hp->prio_arch_has_time_info[arch][task_priority] = 1; } static inline unsigned get_total_submitted_task_num(struct _starpu_heteroprio_data *hp) { unsigned total = 0; unsigned idx_prio; for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) { total += hp->prio_overall_proportion_count[idx_prio]; } return total; } static inline double get_sum_task_proportions(struct _starpu_heteroprio_data *hp) { double total = 0.f; unsigned idx_prio; for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) { total += hp->prio_overall_proportion[idx_prio]; } return total; } // noralizes tasks proportions so that their sum equals 1 static inline void normalize_task_proportions(struct _starpu_heteroprio_data *hp) { const double total_task_proportions = get_sum_task_proportions(hp); STARPU_ASSERT(total_task_proportions > 0); unsigned idx_prio; for(idx_prio = 0; idx_prio < HETEROPRIO_MAX_PRIO; ++idx_prio) { hp->prio_overall_proportion[idx_prio] /= total_task_proportions; } } static void add_submitted_task_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority) { STARPU_ASSERT(!hp->freeze_data_gathering); if(hp->prio_overall_proportion_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) { ++hp->prio_overall_proportion_count[task_priority]; } const unsigned count = get_total_submitted_task_num(hp); STARPU_ASSERT(count > 0); hp->prio_overall_proportion[task_priority] += 1.f/(double)count; // take back task proportions to a valid value (sum = 1) normalize_task_proportions(hp); } // gets the sum of a task's architecture proportions static inline double get_sum_task_arch_proportions(struct _starpu_heteroprio_data *hp, unsigned task_priority) { double total = 0.f; unsigned arch; for(arch=0;archprio_arch_proportion[arch][task_priority]; } return total; } // noralizes tasks execution proportions so that the sum of proportions of a task on each arch equals 1 // EXAMPLE : task A : %CPU = 0.75, %GPU = 0.25 static inline void normalize_task_arch_proportions(struct _starpu_heteroprio_data *hp, unsigned task_priority) { const double total_task_proportions = get_sum_task_arch_proportions(hp, task_priority); STARPU_ASSERT(total_task_proportions > 0); unsigned arch; for(arch=0;archprio_arch_proportion[arch][task_priority] /= total_task_proportions; } } static void register_task_arch_execution(struct _starpu_heteroprio_data *hp, unsigned task_priority, unsigned arch) { STARPU_ASSERT(!hp->freeze_data_gathering); if(hp->prio_arch_proportion_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) { ++hp->prio_arch_proportion_count[task_priority]; } unsigned count = hp->prio_arch_proportion_count[task_priority]; STARPU_ASSERT(count > 0); if(count >= 2) { // to have correct proportions and not divide by zero count -=1; } hp->prio_arch_proportion[arch][task_priority] += 1.f/(double)count; // take back task proportions to a valid value (sum = 1) normalize_task_arch_proportions(hp, task_priority); } static void add_successors_best_time_sum_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority, double sum) { STARPU_ASSERT(!hp->freeze_data_gathering); if(hp->prio_average_successors_best_time_sum_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) { ++hp->prio_average_successors_best_time_sum_count[task_priority]; } const unsigned count = hp->prio_average_successors_best_time_sum_count[task_priority]; hp->prio_average_successors_best_time_sum[task_priority] = hp->prio_average_successors_best_time_sum[task_priority] * (double)(count - 1) / (double)count + sum / (double)count; } static void add_best_time_to_data(struct _starpu_heteroprio_data *hp, unsigned task_priority, double sum) { STARPU_ASSERT(!hp->freeze_data_gathering); if(hp->prio_average_best_count[task_priority] < AUTOHETEROPRIO_RELEVANT_TASK_LIFE) { ++hp->prio_average_best_count[task_priority]; } const unsigned count = hp->prio_average_best_count[task_priority]; hp->prio_average_best[task_priority] = hp->prio_average_best[task_priority] * (double)(count - 1) / (double)count + sum / (double)count; } static void autoheteroprio_update_slowdown_data(struct _starpu_heteroprio_data *hp) { unsigned p, arch; for(p=0;pfound_codelet_names_length;++p) { unsigned valid_archs[STARPU_NB_TYPES] = {0}; double arch_times[STARPU_NB_TYPES] = {0.f}; for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { valid_archs[arch] = arch_can_execute_prio(hp, arch, p); if(valid_archs[arch]) { double arch_time = get_autoheteroprio_estimated_time(hp, p, arch); STARPU_ASSERT(arch_time > 0.f); arch_times[arch] = arch_time; } } // Assert that at least one architecture can execute priority for(arch = 0; arch < STARPU_NB_TYPES && !valid_archs[arch]; ++arch) ; STARPU_ASSERT(arch < STARPU_NB_TYPES); arch = 0; while(!valid_archs[arch]) ++arch; unsigned fastest_arch = arch; double best_time = arch_times[arch]; ++arch; for(; arch < STARPU_NB_TYPES; ++arch) { if(valid_archs[arch] && arch_times[arch] < best_time) { fastest_arch = arch; best_time = arch_times[arch]; } } starpu_heteroprio_set_faster_arch_hp(hp, fastest_arch, p); for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { if(valid_archs[arch] && arch != fastest_arch) starpu_heteroprio_set_arch_slow_factor_hp(hp, arch, p, arch_times[arch]/best_time); } } check_heteroprio_mapping(hp); } /* Push a new task (simply store it and update counters) */ static int push_task_heteroprio_policy(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned computed_best_mem_node = 0; unsigned best_node_now[PUSH_NB_AUTO] = {0}; if(hp->use_locality) { #ifdef LAHETEROPRIO_PRINT_STAT lastats.push_to_use[starpu_worker_get_id()+1][hp->pushStrategyToUse] += 1; #endif //unsigned best_node_now[PUSH_NB_AUTO] = {0}; if(hp->pushStrategySet == PUSH_AUTO) { unsigned idx_strategy; for(idx_strategy = 0 ; idx_strategy < PUSH_NB_AUTO ; ++idx_strategy) { best_node_now[idx_strategy] = get_best_mem_node(task, hp, idx_strategy); } } computed_best_mem_node = (hp->pushStrategySet == PUSH_AUTO && hp->pushStrategyToUse < PUSH_NB_AUTO ? best_node_now[hp->pushStrategyToUse] : get_best_mem_node(task, hp, hp->pushStrategyToUse)); STARPU_ASSERT_MSG(hp->map_wgroup_has_been_called, "starpu_laheteroprio_map_wgroup_memory_nodes \ has not been called while you are using the heteroprio in LA mode. To fix this, you can either turn LA mode off by setting \ the HETEROPRIO_USE_LA variable to 0, or calling starpu_laheteroprio_map_wgroup_memory_nodes after starpu_laheteroprio_set_nb_prios.\n"); } const unsigned best_mem_node = computed_best_mem_node; /* One worker at a time uses heteroprio */ starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex); starpu_worker_relax_off(); /* Get tasks priority (ID) */ int task_priority; if(hp->use_auto_calibration) { task_priority = get_task_auto_priority(hp, task); if(!hp->freeze_data_gathering && hp->priority_last_ordering >= hp->priority_ordering_interval) { hp->priority_last_ordering = 0; } if(hp->priority_last_ordering == 0) { // first pushed task OR at least "priority_ordering_interval" tasks have been pushed order_priorities(hp); if(hp->autoheteroprio_print_prio_after_ordering) { print_priorities(hp); } autoheteroprio_update_slowdown_data(hp); } ++hp->priority_last_ordering; if(!hp->freeze_data_gathering) { struct _starpu_job *job = _starpu_get_job_associated_to_task(task); if(task_priority != -1) { // register that the task has been submitted add_submitted_task_to_data(hp, task_priority); double NOD = get_job_NOD(hp, job); add_NOD_to_data(hp, task_priority, NOD); double archs_NRTs[STARPU_NARCH]; unsigned arch; for(arch=0;archautoheteroprio_print_data_on_update) { unsigned arch; char is_arch_used[STARPU_NB_TYPES]; for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { if(hp->average_arch_busy_time[arch] + hp->average_arch_free_time[arch] > 0) is_arch_used[arch] = 1; else is_arch_used[arch] = 0; } fprintf(stderr, "Updated values :\n"); fprintf(stderr, "Busy proportion :\n\t"); for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { if(is_arch_used[arch]) fprintf(stderr, "%s : %f, ", starpu_worker_get_type_as_string(arch), get_autoheteroprio_arch_busy_proportion(hp, arch)); } fprintf(stderr, "\n"); unsigned idx_prio; fprintf(stderr, "Assumed values for heuristic computation :\n"); for(idx_prio = 0; idx_prio < hp->found_codelet_names_length; ++idx_prio) { fprintf(stderr, "task %s :\n\tNOD = %f", &hp->found_codelet_names[idx_prio][0], get_autoheteroprio_NOD(hp, idx_prio)); for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { if(is_arch_used[arch]) fprintf(stderr, ", URT_%s = %f", starpu_worker_get_type_as_string(arch), get_autoheteroprio_URT(hp, arch, idx_prio)); } fprintf(stderr, "\n\testimated time : "); for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { if(is_arch_used[arch]) fprintf(stderr, "%s : %f, ", starpu_worker_get_type_as_string(arch), get_autoheteroprio_estimated_time(hp, idx_prio, arch)); } fprintf(stderr, "\n\tnormalized time : "); for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { if(is_arch_used[arch]) fprintf(stderr, "%s : %f, ", starpu_worker_get_type_as_string(arch), get_autoheteroprio_normalized_time(hp, idx_prio, arch)); } fprintf(stderr, "\n\tbestsum=%f, proportion=%f", get_autoheteroprio_successors_best_time_sum(hp, idx_prio), get_autoheteroprio_prio_proportion(hp, idx_prio)); for(arch = 0; arch < STARPU_NB_TYPES; ++arch) { if(is_arch_used[arch]) fprintf(stderr, ", prop%s=%f", starpu_worker_get_type_as_string(arch), get_autoheteroprio_prio_arch_proportion(hp, idx_prio, arch)); } fprintf(stderr, "\n"); } } } } else { task_priority = task->priority; } /* Retrieve the correct bucket */ STARPU_ASSERT(task_priority >= 0); STARPU_ASSERT(task_priority < HETEROPRIO_MAX_PRIO); struct _heteroprio_bucket* bucket = &hp->buckets[task_priority]; /* Ensure that any worker that check that list can compute the task */ STARPU_ASSERT_MSG(bucket->valid_archs, "The bucket %d does not have any archs\n", task_priority); STARPU_ASSERT(((bucket->valid_archs ^ task->where) & bucket->valid_archs) == 0); if(hp->use_locality) { /* save the task */ starpu_task_list_push_front(&bucket->tasks_queue[best_mem_node], task); if(hp->pushStrategySet == PUSH_AUTO) { laqueue_push(&bucket->auto_mn[best_mem_node], best_node_now); } #ifdef LAHETEROPRIO_PRINT_STAT if(starpu_worker_get_id() != -1) { lastats.nb_tasks_per_wgroup[best_mem_node][task_priority] += 1; lastats.nb_tasks_per_worker[starpu_worker_get_id()][task_priority] += 1; } #endif // LAHETEROPRIO_PRINT_STAT bucket->tasks_queue_ntasks += 1; #ifdef LAHETEROPRIO_PRINT_STAT lastats.nb_tasks += 1; #endif // LAHETEROPRIO_PRINT_STAT } else { /* save the task */ starpu_task_list_push_front(&bucket->tasks_queue[0],task); /* Increase the total number of tasks */ bucket->tasks_queue_ntasks += 1; } /* Inc counters */ unsigned arch_index; for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) { /* We test the archs on the bucket and not on task->where since it is restrictive */ if(bucket->valid_archs & starpu_heteroprio_types_to_arch(arch_index)) { hp->nb_remaining_tasks_per_arch_index[arch_index] += 1; } } hp->total_tasks_in_buckets += 1; starpu_push_task_end(task); /*if there are no tasks_queue block */ /* wake people waiting for a task */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; #ifndef STARPU_NON_BLOCKING_DRIVERS char dowake[STARPU_NMAXWORKERS] = { 0 }; #endif workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); #ifdef STARPU_NON_BLOCKING_DRIVERS if (!starpu_bitmap_get(&hp->waiters, worker)) /* This worker is not waiting for a task */ continue; #endif if (starpu_worker_can_execute_task_first_impl(worker, task, NULL)) { /* It can execute this one, tell him! */ #ifdef STARPU_NON_BLOCKING_DRIVERS starpu_bitmap_unset(&hp->waiters, worker); /* We really woke at least somebody, no need to wake somebody else */ break; #else dowake[worker] = 1; #endif } } /* Let the task free */ STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex); #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) /* Now that we have a list of potential workers, try to wake one */ workers->init_iterator_for_parallel_tasks(workers, &it, task); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); if (dowake[worker]) if (starpu_wake_worker_relax_light(worker)) break; // wake up a single worker } #endif return 0; } static struct starpu_task *pop_task_heteroprio_policy(unsigned sched_ctx_id) { const unsigned workerid = starpu_worker_get_id_check(); struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct _heteroprio_worker_wrapper* worker = &hp->workers_heteroprio[workerid]; struct starpu_task* task = NULL; #ifdef STARPU_NON_BLOCKING_DRIVERS /* If no tasks available, no tasks in worker queue or some arch worker queue just return NULL */ if (!STARPU_RUNNING_ON_VALGRIND && (hp->total_tasks_in_buckets == 0 || hp->nb_remaining_tasks_per_arch_index[worker->arch_index] == 0) && (hp->use_locality || (worker->tasks_queue.ntasks == 0 && hp->nb_prefetched_tasks_per_arch_index[worker->arch_index] == 0))) { return NULL; } if (!STARPU_RUNNING_ON_VALGRIND && starpu_bitmap_get(&hp->waiters, workerid)) { /* Nobody woke us, avoid bothering the mutex */ return NULL; } #endif starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex); starpu_worker_relax_off(); // if(hp->use_locality) // { // used only with use_locality==1 #ifdef LAHETEROPRIO_PRINT_STAT unsigned src_mem_node = (unsigned)-1; #endif unsigned best_node_previous[PUSH_NB_AUTO] = {0}; // } // else // { // used only with use_locality==0 /* keep track of the new added task to perform real prefetch on node */ unsigned nb_added_tasks = 0; // } if (hp->use_locality) { const unsigned wgroupid = hp->workers_laheteroprio_wgroup_index[workerid]; if (hp->nb_remaining_tasks_per_arch_index[worker->arch_index] != 0) { const struct starpu_laheteroprio_access_item *wgroup_access_order = hp->wgroup_pop_access_orders[wgroupid]; const unsigned wgroup_access_order_size = hp->wgroup_pop_access_orders_size[wgroupid]; unsigned idx_access_item; for (idx_access_item = 0; task == NULL && idx_access_item < wgroup_access_order_size; ++idx_access_item) { const unsigned current_wgroupid = wgroup_access_order[idx_access_item].wgroup_idx; /*Retrieve the bucket using the mapping */ struct _heteroprio_bucket *bucket = &hp->buckets[hp->prio_mapping_per_arch_index[worker->arch_index][wgroup_access_order[idx_access_item].prio_idx]]; /*Ensure we can compute task from this bucket */ STARPU_ASSERT(bucket->valid_archs &worker->arch_type); /*Take one task if possible */ if (!starpu_task_list_empty(&bucket->tasks_queue[current_wgroupid])) { if ((bucket->factor_base_arch_index == 0 || worker->arch_index == bucket->factor_base_arch_index || (((float) bucket->tasks_queue_ntasks) / ((float) hp->nb_workers_per_arch_index[bucket->factor_base_arch_index])) >= bucket->slow_factors_per_index[worker->arch_index])) { task = starpu_task_list_pop_front(&bucket->tasks_queue[current_wgroupid]); if(!starpu_worker_can_execute_task(workerid, task, 0)) { // Put the task back because worker can't execute it (e.g. codelet.can_execute) starpu_task_list_push_front(&bucket->tasks_queue[0], task); break; } if (hp->pushStrategySet == PUSH_AUTO) { memcpy(best_node_previous, laqueue_pop(&bucket->auto_mn[current_wgroupid]), sizeof(unsigned) *PUSH_NB_AUTO); } /*Save the task */ STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), workerid); /*Update general counter */ hp->total_tasks_in_buckets -= 1; bucket->tasks_queue_ntasks -= 1; unsigned arch_index; for (arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) { /*We test the archs on the bucket and not on task->where since it is restrictive */ if (bucket->valid_archs &starpu_heteroprio_types_to_arch(arch_index)) { hp->nb_remaining_tasks_per_arch_index[arch_index] -= 1; } } #ifdef LAHETEROPRIO_PRINT_STAT if (current_wgroupid != wgroupid) { lastats.task_stolen_per_worker[workerid][wgroup_access_order[idx_access_item].prio_idx] += 1; lastats.task_stolen_in_wgroup[current_wgroupid][wgroup_access_order[idx_access_item].prio_idx] += 1; } src_mem_node = current_wgroupid; #endif break; } #ifdef LAHETEROPRIO_PRINT_STAT else { lastats.task_skipt_due_to_factor_per_worker[workerid][wgroup_access_order[idx_access_item].prio_idx] += 1; } #endif } #ifdef LAHETEROPRIO_PRINT_STAT else { if (current_wgroupid == wgroupid) { lastats.task_list_empty_per_worker[workerid][wgroup_access_order[idx_access_item].prio_idx] += 1; } } #endif } } } else { // !hp->use_locality /* Check that some tasks are available for the current worker arch */ if(hp->nb_remaining_tasks_per_arch_index[worker->arch_index] != 0) { /* Ideally we would like to fill the prefetch array */ unsigned nb_tasks_to_prefetch = (STARPU_HETEROPRIO_MAX_PREFETCH-worker->tasks_queue.ntasks); /* But there are maybe less tasks than that! */ if(nb_tasks_to_prefetch > hp->nb_remaining_tasks_per_arch_index[worker->arch_index]) { nb_tasks_to_prefetch = hp->nb_remaining_tasks_per_arch_index[worker->arch_index]; } /* But in case there are less tasks than worker we take the minimum */ if(hp->nb_remaining_tasks_per_arch_index[worker->arch_index] < starpu_sched_ctx_get_nworkers(sched_ctx_id)) { if(worker->tasks_queue.ntasks == 0) nb_tasks_to_prefetch = 1; else nb_tasks_to_prefetch = 0; } unsigned idx_prio, arch_index; /* We iterate until we found all the tasks we need */ for(idx_prio = 0; nb_tasks_to_prefetch && idx_prio < hp->nb_prio_per_arch_index[worker->arch_index]; ++idx_prio) { /* Retrieve the bucket using the mapping */ struct _heteroprio_bucket* bucket = &hp->buckets[hp->prio_mapping_per_arch_index[worker->arch_index][idx_prio]]; /* Ensure we can compute task from this bucket */ STARPU_ASSERT(bucket->valid_archs & worker->arch_type); /* Take nb_tasks_to_prefetch tasks if possible */ while(!starpu_task_list_empty(&bucket->tasks_queue[0]) && nb_tasks_to_prefetch && (bucket->factor_base_arch_index == 0 || worker->arch_index == bucket->factor_base_arch_index || (((float)bucket->tasks_queue_ntasks)/((float)hp->nb_workers_per_arch_index[bucket->factor_base_arch_index])) >= bucket->slow_factors_per_index[worker->arch_index] )) { task = starpu_task_list_pop_front(&bucket->tasks_queue[0]); if(!starpu_worker_can_execute_task(workerid, task, 0)) { // Put the task back because worker can't execute it (e.g. codelet.can_execute) starpu_task_list_push_front(&bucket->tasks_queue[0], task); break; } /* Save the task */ STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), workerid); starpu_st_prio_deque_push_front_task(&worker->tasks_queue, task); /* Update general counter */ hp->nb_prefetched_tasks_per_arch_index[worker->arch_index] += 1; hp->total_tasks_in_buckets -= 1; bucket->tasks_queue_ntasks -= 1; for(arch_index = 0; arch_index < STARPU_NB_TYPES; ++arch_index) { /* We test the archs on the bucket and not on task->where since it is restrictive */ if(bucket->valid_archs & starpu_heteroprio_types_to_arch(arch_index)) { hp->nb_remaining_tasks_per_arch_index[arch_index] -= 1; } } /* Decrease the number of tasks to found */ nb_tasks_to_prefetch -= 1; nb_added_tasks += 1; // TODO starpu_prefetch_task_input_for(task, workerid); } } } task = NULL; /* The worker has some tasks in its queue */ if(worker->tasks_queue.ntasks) { task = starpu_st_prio_deque_pop_task_for_worker(&worker->tasks_queue, workerid, NULL); hp->nb_prefetched_tasks_per_arch_index[worker->arch_index] -= 1; } /* Otherwise look if we can steal some work */ else if(hp->nb_prefetched_tasks_per_arch_index[worker->arch_index]) { /* If HETEROPRIO_MAX_PREFETCH==1 it should not be possible to steal work */ STARPU_ASSERT(STARPU_HETEROPRIO_MAX_PREFETCH != 1); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); unsigned victim; unsigned current_worker; /* Start stealing from just after ourself */ while(workers->has_next(workers, &it)) { current_worker = workers->get_next(workers, &it); if(current_worker == workerid) break; } /* circular loop */ while (1) { if (!workers->has_next(workers, &it)) { /* End of the list, restart from the beginning */ workers->init_iterator(workers, &it); } while(workers->has_next(workers, &it)) { victim = workers->get_next(workers, &it); /* When getting on ourself again, we're done trying to find work */ if(victim == workerid) goto done; /* If it is the same arch and there is a task to steal */ if(hp->workers_heteroprio[victim].arch_index == worker->arch_index && hp->workers_heteroprio[victim].tasks_queue.ntasks) { /* ensure the worker is not currently prefetching its data */ starpu_worker_lock(victim); if(hp->workers_heteroprio[victim].arch_index == worker->arch_index && hp->workers_heteroprio[victim].tasks_queue.ntasks) { /* steal the last added task */ task = starpu_st_prio_deque_pop_task_for_worker(&hp->workers_heteroprio[victim].tasks_queue, workerid, NULL); /* we steal a task update global counter */ hp->nb_prefetched_tasks_per_arch_index[hp->workers_heteroprio[victim].arch_index] -= 1; starpu_worker_unlock(victim); goto done; } starpu_worker_unlock(victim); } } } done: ; } } if (!task) { /* Tell pushers that we are waiting for tasks_queue for us */ starpu_bitmap_set(&hp->waiters, workerid); } STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex); if(task &&_starpu_get_nsched_ctxs() > 1) { starpu_worker_relax_on(); _starpu_sched_ctx_lock_write(sched_ctx_id); starpu_worker_relax_off(); if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, task)) task = NULL; _starpu_sched_ctx_unlock_write(sched_ctx_id); if(hp->use_locality) { #ifdef LAHETEROPRIO_PRINT_STAT { const unsigned best_node_now = get_best_mem_node(task, hp, hp->pushStrategyToUse); if (best_node_now != src_mem_node) { lastats.pop_redirect[workerid][src_mem_node] += 1; } } #endif if (hp->pushStrategySet == PUSH_AUTO) { unsigned best_node_now[PUSH_NB_AUTO] = { 0 }; unsigned idx_strategy; for (idx_strategy = 0; idx_strategy < PUSH_NB_AUTO; ++idx_strategy) { best_node_now[idx_strategy] = get_best_mem_node(task, hp, idx_strategy); } STARPU_PTHREAD_MUTEX_LOCK(&hp->push_history_mutex); unsigned idx_best_strategy = 0; for (idx_strategy = 0; idx_strategy < PUSH_NB_AUTO; ++idx_strategy) { if (best_node_now[idx_strategy] == best_node_previous[idx_strategy]) { hp->pushStrategyHistory[idx_strategy] += 1; } if (hp->pushStrategyHistory[idx_strategy] >= hp->pushStrategyHistory[idx_best_strategy]) { idx_best_strategy = idx_strategy; } } hp->pushStrategyToUse = idx_best_strategy; STARPU_PTHREAD_MUTEX_UNLOCK(&hp->push_history_mutex); } } } if(!hp->use_locality) { /* if we have task (task) me way have some in the queue (worker->tasks_queue_size) that was freshly added (nb_added_tasks) */ if(task && worker->tasks_queue.ntasks && nb_added_tasks && starpu_get_prefetch_flag()) { /* TODO berenger: iterate in the other sense */ struct starpu_task *task_to_prefetch = NULL; for (task_to_prefetch = starpu_task_prio_list_begin(&worker->tasks_queue.list); (task_to_prefetch != starpu_task_prio_list_end(&worker->tasks_queue.list) && nb_added_tasks && hp->nb_remaining_tasks_per_arch_index[worker->arch_index] != 0); task_to_prefetch = starpu_task_prio_list_next(&worker->tasks_queue.list, task_to_prefetch)) { /* prefetch from closest to end task */ if (!task_to_prefetch->prefetched) /* FIXME: it seems we are prefetching several times?? */ { starpu_prefetch_task_input_for(task_to_prefetch, workerid); } nb_added_tasks -= 1; } } } return task; } static void pre_exec_hook_heteroprio_policy(struct starpu_task *task, unsigned sched_ctx_id) { (void) task; const unsigned workerid = starpu_worker_get_id_check(); struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); if(hp->freeze_data_gathering || !hp->use_auto_calibration) return; starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex); starpu_worker_relax_off(); struct timespec tsnow; _starpu_clock_gettime(&tsnow); const double now = starpu_timing_timespec_to_us(&tsnow); // Register free time between the post and pre hook hp->current_arch_free_time[starpu_worker_get_type(workerid)] += now - hp->last_hook_exec_time[workerid]; STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex); hp->last_hook_exec_time[workerid] = now; } static void post_exec_hook_heteroprio_policy(struct starpu_task *task, unsigned sched_ctx_id) { const unsigned workerid = starpu_worker_get_id_check(); struct _starpu_heteroprio_data *hp = (struct _starpu_heteroprio_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); if(hp->freeze_data_gathering || !hp->use_auto_calibration) return; struct timespec tsnow; _starpu_clock_gettime(&tsnow); const double now = starpu_timing_timespec_to_us(&tsnow); const double busy_time = now - hp->last_hook_exec_time[workerid]; starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&hp->policy_mutex); starpu_worker_relax_off(); // Register the busy time between the pre and post hook hp->current_arch_busy_time[starpu_worker_get_type(workerid)] += busy_time; // Register task execution const int prio = get_task_auto_priority(hp, task); if(prio != -1) { register_task_arch_execution(hp, prio, starpu_worker_get_type(workerid)); register_execution_time(hp, starpu_worker_get_type(workerid), prio, busy_time); } STARPU_PTHREAD_MUTEX_UNLOCK(&hp->policy_mutex); hp->last_hook_exec_time[workerid] = now; } struct starpu_sched_policy _starpu_sched_heteroprio_policy = { .init_sched = initialize_heteroprio_policy, .deinit_sched = deinitialize_heteroprio_policy, .add_workers = add_workers_heteroprio_policy, .remove_workers = remove_workers_heteroprio_policy, .push_task = push_task_heteroprio_policy, .simulate_push_task = NULL, .push_task_notify = NULL, .pop_task = pop_task_heteroprio_policy, .pre_exec_hook = pre_exec_hook_heteroprio_policy, .post_exec_hook = post_exec_hook_heteroprio_policy, .policy_name = "heteroprio", .policy_description = "heteroprio", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/heteroprio.h000066400000000000000000000032641507764646700221120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SCHED_HETEROPRIO_H__ #define __SCHED_HETEROPRIO_H__ #include #define CODELET_MAX_NAME_LENGTH 32 #define HETEROPRIO_MAX_PRIO 100 #define LAHETEROPRIO_MAX_WORKER_GROUPS 10 #define AUTOHETEROPRIO_NO_NAME "NO_NAME" // will tend to ignore tasks older than this when measuring values such as NOD, execution time, etc. // i.e. if there are more than STARPU_AUTOHETEROPRIO_RELEVANT_TASK_LIFE of the same type #define AUTOHETEROPRIO_RELEVANT_TASK_LIFE 256 #define AUTOHETEROPRIO_RELEVANT_SAMPLE_SIZE 16 #define AUTOHETEROPRIO_EXTREMELY_LONG_TIME 999999999999999.0 #define AUTOHETEROPRIO_LONG_TIME 100000000.0 #define AUTOHETEROPRIO_FAIR_TIME 1000.0 #define AUTOHETEROPRIO_DEFAULT_TASK_TIME AUTOHETEROPRIO_FAIR_TIME // at the end of the execution, if the sum of all worker profiling times is superior to this, the times will be compressed so that no time exceeds this one // (probably in us) #define AUTOHETEROPRIO_MAX_WORKER_PROFILING_TIME 1000000000.0 #endif // __SCHED_HETEROPRIO_H__ starpu-1.4.9+dfsg/src/sched_policies/hierarchical_heft.c000066400000000000000000000065321507764646700233520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static struct starpu_sched_component_composed_recipe * recipe_for_worker(enum starpu_worker_archtype a STARPU_ATTRIBUTE_UNUSED) { struct starpu_sched_component_composed_recipe * r = starpu_sched_component_composed_recipe_create(); starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_best_implementation_create, NULL); starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_fifo_create, NULL); return r; } static void initialize_heft_center_policy(unsigned sched_ctx_id) { struct starpu_sched_component_specs specs; memset(&specs,0,sizeof(specs)); struct starpu_sched_component_mct_data heft_data = { .alpha = 1.0, .beta = 1.0, ._gamma = 0.0, .idle_power = 0.0, /* .no_perf_model_component_create = starpu_sched_component_random_create, .arg_no_perf_model = NULL, .calibrating_component_create = starpu_sched_component_random_create, .arg_calibrating_component = NULL, */ }; struct starpu_sched_component_composed_recipe * r = starpu_sched_component_composed_recipe_create(); /* FIXME: add perfmodel_select component */ starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_heft_create,&heft_data); specs.hwloc_machine_composed_sched_component = r; r = starpu_sched_component_composed_recipe_create(); starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_best_implementation_create, NULL); starpu_sched_component_composed_recipe_add(r, (starpu_sched_component_create_t) starpu_sched_component_fifo_create ,NULL); specs.hwloc_component_composed_sched_component = r; specs.worker_composed_sched_component = recipe_for_worker; struct starpu_sched_tree *t = starpu_sched_component_make_scheduler(sched_ctx_id, specs); starpu_sched_component_composed_recipe_destroy(specs.hwloc_machine_composed_sched_component); starpu_sched_tree_update_workers(t); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t); } struct starpu_sched_policy _starpu_sched_tree_heft_hierarchical_policy = { .init_sched = initialize_heft_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "modular-heft-hierarchical", .policy_description = "hierarchical heft tree policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_eager.c000066400000000000000000000032571507764646700225350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static void initialize_eager_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_eager_policy = { .init_sched = initialize_eager_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-eager", .policy_description = "eager modular policy", .worker_type = STARPU_WORKER_LIST, }; starpu-1.4.9+dfsg/src/sched_policies/modular_eager_prefetching.c000066400000000000000000000035221507764646700251060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static void initialize_eager_prefetching_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_eager_prefetching_policy = { .init_sched = initialize_eager_prefetching_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-eager-prefetching", .policy_description = "eager with prefetching modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_eager_prio.c000066400000000000000000000035341507764646700235640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include static void initialize_eager_prio_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_prio_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_eager_prio_policy = { .init_sched = initialize_eager_prio_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-eager-prio", .policy_description = "eager-prio modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_ez.c000066400000000000000000000404621507764646700220670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include /* The scheduling strategy may look like this : * * | * fifo_above * | * decision_component <--push-- perfmodel_select_component --push--> eager_component * | | | | * fifo fifo fifo | * | | | | * eager eager eager | * | | | | * >--------------------------------------------------------------< * | | * best_impl_component best_impl_component * | | * worker_component worker_component */ /* The two thresolds concerns the fifo components below, which contains queues * who can handle the priority of StarPU tasks. You can tune your * scheduling by benching those values and choose which one is the * best for your current application. * The current value of the ntasks_threshold is the best we found * so far across several types of applications (cholesky, LU, stencil). */ #define _STARPU_SCHED_NTASKS_THRESHOLD_HEFT 30 #define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 2 #define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0 void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id, unsigned ndecisions, ...) { struct starpu_sched_tree * t; struct starpu_sched_component *last = NULL; /* Stores the last created component, from top to bottom */ unsigned i, j, n; struct starpu_sched_component *userchoice_component = NULL; struct starpu_sched_component *pre_decision_component = NULL; struct starpu_sched_component *last_pre_decision_component = NULL; struct starpu_sched_component *decision_component = NULL; struct starpu_sched_component *no_perfmodel_component = NULL; struct starpu_sched_component *calibrator_component = NULL; unsigned sched; va_list varg_list; unsigned decide_flags; unsigned flags; /* Start building the tree */ t = starpu_sched_tree_create(sched_ctx_id); t->root = NULL; starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t); STARPU_ASSERT(ndecisions >= 1); if (ndecisions != 1) { /* Take choice between schedulers from user */ userchoice_component = starpu_sched_component_userchoice_create(t, NULL); t->root = userchoice_component; } unsigned nbelow; unsigned nummaxids; va_start(varg_list, ndecisions); for (sched = 0; sched < ndecisions; sched++) { last = userchoice_component; starpu_sched_component_create_t create_decision_component = va_arg(varg_list, starpu_sched_component_create_t); void *data = va_arg(varg_list, void *); flags = va_arg(varg_list, unsigned); (void) create_decision_component; (void) data; while ((flags & STARPU_SCHED_SIMPLE_PRE_DECISION) == STARPU_SCHED_SIMPLE_PRE_DECISION) { STARPU_ASSERT(flags == STARPU_SCHED_SIMPLE_PRE_DECISION); (void) va_arg(varg_list, starpu_sched_component_create_t); (void) va_arg(varg_list, void *); flags = va_arg(varg_list, unsigned); } int above_prio = starpu_getenv_number_default("STARPU_SCHED_SORTED_ABOVE", (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO) ? 1 : 0); int below_prio = starpu_getenv_number_default("STARPU_SCHED_SORTED_BELOW", (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO) ? 1 : 0); /* Create combined workers if requested */ if (flags & STARPU_SCHED_SIMPLE_COMBINED_WORKERS) starpu_sched_find_all_worker_combinations(); /* Components parameters */ if (above_prio || below_prio) { /* The application may use any integer */ if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); } /* See what the component will decide */ nummaxids = starpu_worker_get_count() + starpu_combined_worker_get_count(); if (starpu_memory_nodes_get_count() > nummaxids) nummaxids = starpu_memory_nodes_get_count(); if (STARPU_NARCH > nummaxids) nummaxids = STARPU_NARCH; if (sched == 0) decide_flags = flags & STARPU_SCHED_SIMPLE_DECIDE_MASK; else STARPU_ASSERT(decide_flags == (flags & STARPU_SCHED_SIMPLE_DECIDE_MASK)); } va_end(varg_list); unsigned below_id[nummaxids]; switch (decide_flags) { case STARPU_SCHED_SIMPLE_DECIDE_WORKERS: /* Count workers */ nbelow = starpu_worker_get_count() + starpu_combined_worker_get_count(); /* and no need for IDs */ break; case STARPU_SCHED_SIMPLE_DECIDE_MEMNODES: { /* Count memory nodes */ n = starpu_memory_nodes_get_count(); nbelow = 0; for(i = 0; i < n; i++) { for(j = 0; j < starpu_worker_get_count() + starpu_combined_worker_get_count(); j++) if (starpu_worker_get_memory_node(j) == i) break; if (j >= starpu_worker_get_count() + starpu_combined_worker_get_count()) /* Don't create a component string for this memory node with no worker */ continue; below_id[nbelow] = i; nbelow++; } break; } case STARPU_SCHED_SIMPLE_DECIDE_ARCHS: { /* Count available architecture types */ enum starpu_worker_archtype type; nbelow = 0; for (type = 0; type < STARPU_NARCH; type++) { if (starpu_worker_get_count_by_type(type)) { below_id[nbelow] = type; nbelow++; } } break; } default: STARPU_ABORT(); } STARPU_ASSERT(nbelow > 0); struct starpu_sched_component *last_below[nbelow]; memset(&last_below, 0, sizeof(last_below)); if (ndecisions != 1) { /* Will need to stage pulls, create one per choice */ for (i = 0; i < nbelow; i++) last_below[i] = starpu_sched_component_stage_create(t, NULL); } va_start(varg_list, ndecisions); for (sched = 0; sched < ndecisions; sched++) { last = userchoice_component; starpu_sched_component_create_t create_decision_component = va_arg(varg_list, starpu_sched_component_create_t); void *data = va_arg(varg_list, void *); flags = va_arg(varg_list, unsigned); while ((flags & STARPU_SCHED_SIMPLE_PRE_DECISION) == STARPU_SCHED_SIMPLE_PRE_DECISION) { starpu_sched_component_create_t create_pre_decision_component = va_arg(varg_list, starpu_sched_component_create_t); void *pre_data = va_arg(varg_list, void *); flags = va_arg(varg_list, unsigned); struct starpu_sched_component *component; component = create_pre_decision_component(t, pre_data); if (pre_decision_component) /* Connect after previous pre-decision component */ starpu_sched_component_connect(pre_decision_component, component); else /* We are the first pre-decision component */ pre_decision_component = component; last_pre_decision_component = component; } int above_prio = starpu_getenv_number_default("STARPU_SCHED_SORTED_ABOVE", (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO) ? 1 : 0); int below_prio = starpu_getenv_number_default("STARPU_SCHED_SORTED_BELOW", (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO) ? 1 : 0); if (nbelow == 1 && !(flags & STARPU_SCHED_SIMPLE_DECIDE_ALWAYS)) { /* Oh, no choice, we don't actually need to decide, just * use an eager scheduler */ decision_component = starpu_sched_component_eager_create(t, NULL); /* But make sure we have a fifo above it, fifos below it would * possibly refuse tasks out of available room */ flags |= STARPU_SCHED_SIMPLE_FIFO_ABOVE; } else { decision_component = create_decision_component(t, data); } if (last_pre_decision_component) starpu_sched_component_connect(last_pre_decision_component, decision_component); else pre_decision_component = decision_component; /* First, a fifo if requested */ if (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE) { struct starpu_sched_component *fifo_above; if (above_prio) { fifo_above = starpu_sched_component_prio_create(t, NULL); } else { fifo_above = starpu_sched_component_fifo_create(t, NULL); } if (!last) last = t->root = fifo_above; else { starpu_sched_component_connect(last, fifo_above); last = fifo_above; } } /* Then, perfmodel calibration if requested, and plug the scheduling decision-making component to it */ if (flags & STARPU_SCHED_SIMPLE_PERFMODEL) { no_perfmodel_component = starpu_sched_component_eager_create(t, NULL); calibrator_component = starpu_sched_component_eager_calibration_create(t, NULL); if (! (flags & STARPU_SCHED_SIMPLE_FIFO_ABOVE)) { /* We won't have a fifo above, the eager components do need one */ struct starpu_sched_component *calibrator_fifo = starpu_sched_component_fifo_create(t, NULL); struct starpu_sched_component *no_perfmodel_fifo = starpu_sched_component_fifo_create(t, NULL); starpu_sched_component_connect(calibrator_fifo, calibrator_component); starpu_sched_component_connect(no_perfmodel_fifo, no_perfmodel_component); calibrator_component = calibrator_fifo; no_perfmodel_component = no_perfmodel_fifo; } struct starpu_sched_component_perfmodel_select_data perfmodel_select_data = { .calibrator_component = calibrator_component, .no_perfmodel_component = no_perfmodel_component, .perfmodel_component = pre_decision_component, }; struct starpu_sched_component * perfmodel_select_component = starpu_sched_component_perfmodel_select_create(t, &perfmodel_select_data); if (!last) last = t->root = perfmodel_select_component; else starpu_sched_component_connect(last, perfmodel_select_component); starpu_sched_component_connect(perfmodel_select_component, pre_decision_component); starpu_sched_component_connect(perfmodel_select_component, calibrator_component); starpu_sched_component_connect(perfmodel_select_component, no_perfmodel_component); } else { /* No perfmodel calibration */ if (!last) /* Plug decision_component directly */ t->root = pre_decision_component; else /* Plug decision components to fifo */ starpu_sched_component_connect(last, pre_decision_component); } /* Take default ntasks_threshold */ unsigned ntasks_threshold; if (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT) { ntasks_threshold = UINT_MAX; } else if (starpu_sched_component_is_heft(decision_component) || starpu_sched_component_is_mct(decision_component) || starpu_sched_component_is_heteroprio(decision_component)) { /* These need more queueing to allow CPUs to take some share of the work */ ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_HEFT; } else { ntasks_threshold = _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT; } /* But let user tune it */ ntasks_threshold = starpu_getenv_number_default("STARPU_NTASKS_THRESHOLD", ntasks_threshold); double exp_len_threshold; if (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT) { exp_len_threshold = INFINITY; } else { exp_len_threshold = _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT; } /* But let user tune it */ exp_len_threshold = starpu_getenv_float_default("STARPU_EXP_LEN_THRESHOLD", exp_len_threshold); int ready = starpu_getenv_number_default("STARPU_SCHED_READY", (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY) ? 1 : 0); int exp = (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP) ? 1 : 0; struct starpu_sched_component_prio_data prio_data = { .ntasks_threshold = ntasks_threshold, .exp_len_threshold = exp_len_threshold, .ready = ready, .exp = exp, }; struct starpu_sched_component_fifo_data fifo_data = { .ntasks_threshold = ntasks_threshold, .exp_len_threshold = exp_len_threshold, .ready = ready, .exp = exp, }; /* Create one fifo+eager component pair per choice, below scheduling decision */ for(i = 0; i < nbelow; i++) { last = decision_component; if (flags & STARPU_SCHED_SIMPLE_FIFOS_BELOW && !(decide_flags == STARPU_SCHED_SIMPLE_DECIDE_WORKERS && i >= starpu_worker_get_count())) { struct starpu_sched_component *fifo_below; if (below_prio) { fifo_below = starpu_sched_component_prio_create(t, &prio_data); } else { fifo_below = starpu_sched_component_fifo_create(t, &fifo_data); } starpu_sched_component_connect(last, fifo_below); last = fifo_below; } switch (decide_flags) { case STARPU_SCHED_SIMPLE_DECIDE_WORKERS: /* 1-1 mapping between choice and worker, no need for an eager component */ n = 1; break; case STARPU_SCHED_SIMPLE_DECIDE_MEMNODES: n = 0; for (j = 0; j < starpu_worker_get_count() + starpu_combined_worker_get_count(); j++) if (starpu_worker_get_memory_node(j) == below_id[i]) n++; break; case STARPU_SCHED_SIMPLE_DECIDE_ARCHS: n = starpu_worker_get_count_by_type(i); break; default: STARPU_ABORT(); } STARPU_ASSERT(n >= 1); if (n > 1) { /* Several workers for this choice, need to introduce * a component to distribute the work */ struct starpu_sched_component *distribute; if (flags & STARPU_SCHED_SIMPLE_WS_BELOW) { distribute = starpu_sched_component_work_stealing_create(t, NULL); } else { distribute = starpu_sched_component_eager_create(t, NULL); } starpu_sched_component_connect(last, distribute); last = distribute; } if (ndecisions != 1) /* Connect to stage component */ starpu_sched_component_connect(last, last_below[i]); else /* Directly let it connected to worker */ last_below[i] = last; } } va_end(varg_list); /* Finish by creating components per worker */ for(i = 0; i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++) { /* Start from the bottom */ struct starpu_sched_component * worker_component = starpu_sched_component_worker_new(sched_ctx_id, i); struct starpu_sched_component * worker = worker_component; unsigned id; /* Create implementation chooser if requested */ if (flags & STARPU_SCHED_SIMPLE_IMPL) { struct starpu_sched_component * impl_component = starpu_sched_component_best_implementation_create(t, NULL); starpu_sched_component_connect(impl_component, worker_component); /* Reroute components above through it */ worker = impl_component; } switch (decide_flags) { case STARPU_SCHED_SIMPLE_DECIDE_WORKERS: id = i; break; case STARPU_SCHED_SIMPLE_DECIDE_MEMNODES: for (id = 0; id < nbelow; id++) if (below_id[id] == starpu_worker_get_memory_node(i)) break; break; case STARPU_SCHED_SIMPLE_DECIDE_ARCHS: for (id = 0; id < nbelow; id++) if (below_id[id] == starpu_worker_get_type(i)) break; break; default: STARPU_ABORT(); } STARPU_ASSERT(id < nbelow); last = last_below[id]; if (!last) last = decision_component; starpu_sched_component_connect(last, worker); /* Plug perfmodel calibrator if requested */ /* FIXME: this won't work with several scheduling decisions */ if (flags & STARPU_SCHED_SIMPLE_PERFMODEL) { starpu_sched_component_connect(no_perfmodel_component, worker); /* Calibrator needs to choose the implementation */ starpu_sched_component_connect(calibrator_component, worker_component); } } starpu_sched_tree_update_workers(t); starpu_sched_tree_update_workers_in_ctx(t); } void starpu_sched_component_initialize_simple_scheduler(starpu_sched_component_create_t create_decision_component, void *data, unsigned flags, unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_schedulers(sched_ctx_id, 1, create_decision_component, data, flags); } starpu-1.4.9+dfsg/src/sched_policies/modular_gemm.c000066400000000000000000000136671507764646700224050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This scheduler runs only GEMMs on GPUs, and tries to feed them with as many * GEMMs as possible. */ #include #include /* Optionally, it can take memory affinity into account, to avoid too many GPU * data transfers */ #define MEMORY_AFFINITY struct child_data { double expected_start; double predicted; double predicted_transfer; double expected_end; unsigned child; }; static int compar(const void *_a, const void *_b) { const struct child_data *a = _a; const struct child_data *b = _b; if (a->expected_end < b->expected_end) return -1; if (a->expected_end == b->expected_end) return 0; return 1; } static int gemm_push_task(struct starpu_sched_component * component, struct starpu_task * task) { unsigned n = component->nchildren; unsigned i; /* See if it's a GEMM task */ const char *name = starpu_task_get_model_name(task); //fprintf(stderr, "it's %s\n", name); if (name && (!strcmp(name, "gemm") || !strcmp(name, "dgemm") || !strcmp(name, "sgemm") || !strcmp(name, "chol_model_22") || !strcmp(name, "starpu_dlu_lu_model_22") || !strcmp(name, "starpu_slu_lu_model_22"))) { /* It's a GEMM, try to push to GPUs */ struct child_data child_data[n]; for (i = 0; i < n; i++) { child_data[i].expected_end = -1; child_data[i].child = i; } /* Look at GPU availability time */ for (i = 0; i < n; i++) { struct starpu_sched_component *child = component->children[i]; double predicted; if (starpu_sched_component_execute_preds(child, task, &predicted)) { double expected_start; child_data[i].expected_start = expected_start = child->estimated_end(child); child_data[i].predicted = predicted; child_data[i].expected_end = expected_start + predicted; #ifdef MEMORY_AFFINITY double predicted_transfer; child_data[i].predicted_transfer = predicted_transfer = starpu_sched_component_transfer_length(child, task); child_data[i].expected_end += predicted_transfer; #endif } } /* Sort by increasing expected end */ qsort(child_data, n, sizeof(*child_data), compar); /* Try to push to the GPU with minimum availability time, to balance the load. */ for (i = 0; i < n; i++) { if (child_data[i].expected_end != -1) { struct starpu_sched_component *child = component->children[child_data[i].child]; /* Note it in the task so that estimated_end() has it */ task->predicted = child_data[i].predicted; task->predicted_transfer = child_data[i].predicted_transfer; int ret = starpu_sched_component_push_task(component,child,task); if (!ret) /* Ok, this GPU took it */ return 0; } } } int workerid; /* It's not a GEMM, or no GPU wanted to take it, find somebody else */ for(workerid = starpu_bitmap_first(&component->workers_in_ctx); workerid != -1; workerid = starpu_bitmap_next(&component->workers_in_ctx, workerid)) { int nimpl; for(nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if(starpu_worker_can_execute_task(workerid,task,nimpl) || starpu_combined_worker_can_execute_task(workerid, task, nimpl)) { for (i = 0; i < n; i++) { struct starpu_sched_component *child = component->children[i]; int idworker; for(idworker = starpu_bitmap_first(&component->children[i]->workers); idworker != -1; idworker = starpu_bitmap_next(&component->children[i]->workers, idworker)) { if (idworker == workerid) { if ((starpu_cpu_worker_get_count() == 0 || starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) && (starpu_worker_can_execute_task(workerid,task,nimpl) || starpu_combined_worker_can_execute_task(workerid, task, nimpl))) { int ret = starpu_sched_component_push_task(component,child,task); if (!ret) return 0; } } } } } } } /* FIFOs are full */ return 1; } struct starpu_sched_component *starpu_sched_component_gemm_create(struct starpu_sched_tree *tree, void *params STARPU_ATTRIBUTE_UNUSED) { struct starpu_sched_component *component = starpu_sched_component_create(tree, "gemm"); component->push_task = gemm_push_task; return component; } static void initialize_gemm_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_gemm_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_MEMNODES | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_gemm_policy = { .init_sched = initialize_gemm_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-gemm", .policy_description = "gemm modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_heft.c000066400000000000000000000175611507764646700224030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include /* The scheduling strategy look like this : * * | * window_component * | * mct_component <--push-- perfmodel_select_component --push--> eager_component * | | * | | * >----------------------------------------------------< * | | * best_impl_component best_impl_component * | | * prio_component prio_component * | | * worker_component worker_component * * A window contain the tasks that failed to be pushed, so as when the prio_components reclaim * tasks by calling can_push to their parent (classically, just after a successful pop have * been made by its associated worker_component), this call goes up to the window_component which * pops a task from its local queue and try to schedule it by pushing it to the * decision_component. * Finally, the task will be pushed to the prio_component which is the direct * parent in the tree of the worker_component the task has been scheduled on. This * component will push the task on its local queue if no one of the two thresholds * have been reached for it, or send a push_error signal to its parent. */ static void initialize_heft_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_PERFMODEL | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_heft_policy = { .init_sched = initialize_heft_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-heft", .policy_description = "heft modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; static void initialize_dmda_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_PERFMODEL | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_dmda_policy = { .init_sched = initialize_dmda_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-dmda", .policy_description = "data-aware performance model modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; static void initialize_dmdap_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_PERFMODEL | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_dmdap_policy = { .init_sched = initialize_dmdap_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-dmdap", .policy_description = "data-aware performance model modular policy (priority)", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; static void initialize_dmdar_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_PERFMODEL | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_dmdar_policy = { .init_sched = initialize_dmdar_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-dmdar", .policy_description = "data-aware performance model modular policy (ready)", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; static void initialize_dmdas_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_PERFMODEL | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_FIFOS_BELOW_NOLIMIT | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_dmdas_policy = { .init_sched = initialize_dmdas_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-dmdas", .policy_description = "data-aware performance model (sorted) modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_heft2.c000066400000000000000000000067521507764646700224650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include /* The scheduling strategy look like this : * * | * window_component * | * heft_component <--push-- perfmodel_select_component --push--> eager_component * | | * | | * >----------------------------------------------------< * | | * best_impl_component best_impl_component * | | * prio_component prio_component * | | * worker_component worker_component * * A window contain the tasks that failed to be pushed, so as when the prio_components reclaim * tasks by calling can_push to their parent (classically, just after a successful pop have * been made by its associated worker_component), this call goes up to the window_component which * pops a task from its local queue and try to schedule it by pushing it to the * decision_component. * Finally, the task will be pushed to the prio_component which is the direct * parent in the tree of the worker_component the task has been scheduled on. This * component will push the task on its local queue if no one of the two thresholds * have been reached for it, or send a push_error signal to its parent. */ static void initialize_heft2_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_heft_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_PERFMODEL | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_heft2_policy = { .init_sched = initialize_heft2_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-heft2", .policy_description = "heft modular2 policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_heft_prio.c000066400000000000000000000070761507764646700234340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include /* The scheduling strategy look like this : * * | * window_component * | * mct_component <--push-- perfmodel_select_component --push--> eager_component * | | | | * prio prio prio | * | | | | * eager eager eager | * | | | | * >--------------------------------------------------------------< * | | * best_impl_component best_impl_component * | | * worker_component worker_component * * A window contain the tasks that failed to be pushed, so as when the prio_components reclaim * tasks by calling can_push to their parent (classically, just after a successful pop have * been made by its associated worker_component), this call goes up to the window_component which * pops a task from its local queue and try to schedule it by pushing it to the * decision_component. * Finally, the task will be pushed to the prio_component which is the direct * parent in the tree of the worker_component the task has been scheduled on. This * component will push the task on its local queue if no one of the two thresholds * have been reached for it, or send a push_error signal to its parent. */ static void initialize_heft_prio_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_MEMNODES | STARPU_SCHED_SIMPLE_PERFMODEL | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_heft_prio_policy = { .init_sched = initialize_heft_prio_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-heft-prio", .policy_description = "heft+prio modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_heteroprio.c000066400000000000000000000037651507764646700236360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include static void initialize_heteroprio_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_heteroprio_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_PERFMODEL | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_heteroprio_policy = { .init_sched = initialize_heteroprio_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-heteroprio", .policy_description = "heteroprio modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_heteroprio_heft.c000066400000000000000000000045601507764646700246360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include static void initialize_heteroprio_heft_center_policy(unsigned sched_ctx_id) { struct starpu_sched_component_heteroprio_data heteroprio_data = { .mct = NULL, .batch = 1, }; starpu_sched_component_initialize_simple_schedulers(sched_ctx_id, 2, (starpu_sched_component_create_t) starpu_sched_component_heteroprio_create, &heteroprio_data, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_IMPL, (starpu_sched_component_create_t) starpu_sched_component_heft_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_IMPL); } struct starpu_sched_policy _starpu_sched_modular_heteroprio_heft_policy = { .init_sched = initialize_heteroprio_heft_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-heteroprio-heft", .policy_description = "heteroprio+heft modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_parallel_heft.c000066400000000000000000000073211507764646700242500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include /* The scheduling strategy look like this : * * | * window_component * | * mct_component <--push-- perfmodel_select_component --push--> eager_component * | | * | | * >----------------------------------------------------< * | | * best_impl_component best_impl_component * | | * prio_component prio_component * | | * worker_component worker_component * * A window contain the tasks that failed to be pushed, so as when the prio_components reclaim * tasks by calling can_push to their parent (classically, just after a successful pop have * been made by its associated worker_component), this call goes up to the window_component which * pops a task from its local queue and try to schedule it by pushing it to the * decision_component. * Finally, the task will be pushed to the prio_component which is the direct * parent in the tree of the worker_component the task has been scheduled on. This * component will push the task on its local queue if no one of the two thresholds * have been reached for it, or send a push_error signal to its parent. */ static void initialize_parallel_heft_center_policy(unsigned sched_ctx_id) { _STARPU_DISP("Warning: the modular-pheft scheduler is mostly a proof of concept and not really very optimized\n"); starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_mct_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_COMBINED_WORKERS | STARPU_SCHED_SIMPLE_PERFMODEL | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_FIFOS_BELOW_EXP | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_parallel_heft_policy = { .init_sched = initialize_parallel_heft_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-pheft", .policy_description = "parallel heft modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_parallel_random.c000066400000000000000000000056061507764646700246060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include /* Random scheduler with a fifo queue for its scheduling window */ static void initialize_parallel_random_fifo_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_COMBINED_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_parallel_random_policy = { .init_sched = initialize_parallel_random_fifo_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "modular-prandom", .policy_description = "prandom modular policy", .worker_type = STARPU_WORKER_LIST, }; /* Random scheduler with a priority queue for its scheduling window */ static void initialize_parallel_random_prio_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_COMBINED_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_parallel_random_prio_policy = { .init_sched = initialize_parallel_random_prio_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "modular-prandom-prio", .policy_description = "prandom-prio modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_prio.c000066400000000000000000000033111507764646700224120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include static void starpu_initialize_prio_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_prio_policy = { .init_sched = starpu_initialize_prio_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-prio", .policy_description = "prio modular policy", .worker_type = STARPU_WORKER_LIST, }; starpu-1.4.9+dfsg/src/sched_policies/modular_prio_prefetching.c000066400000000000000000000070201507764646700247710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include /* Just as documentation example, here is the detailed equivalent of the * starpu_sched_component_initialize_simple_scheduler call below */ #if 0 static void initialize_prio_prefetching_center_policy(unsigned sched_ctx_id) { struct starpu_sched_tree *t; struct starpu_sched_component * eager_component; t = starpu_sched_tree_create(sched_ctx_id); t->root = starpu_sched_component_prio_create(t, NULL); eager_component = starpu_sched_component_eager_create(t, NULL); starpu_sched_component_connect(t->root, eager_component); struct starpu_sched_component_prio_data prio_data = { .ntasks_threshold = starpu_getenv_number_default("STARPU_NTASKS_THRESHOLD", _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT), .exp_len_threshold = starpu_getenv_float_default("STARPU_EXP_LEN_THRESHOLD", _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT), }; unsigned i; for(i = 0; i < starpu_worker_get_count() + starpu_combined_worker_get_count(); i++) { struct starpu_sched_component * worker_component = starpu_sched_component_worker_new(sched_ctx_id, i); struct starpu_sched_component * prio_component = starpu_sched_component_prio_create(t, &prio_data); starpu_sched_component_connect(prio_component, worker_component); starpu_sched_component_connect(eager_component, prio_component); } starpu_sched_tree_update_workers(t); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)t); /* The application may use any integer */ if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); } #endif static void initialize_prio_prefetching_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_eager_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_prio_prefetching_policy = { .init_sched = initialize_prio_prefetching_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = starpu_sched_component_worker_pre_exec_hook, .post_exec_hook = starpu_sched_component_worker_post_exec_hook, .policy_name = "modular-prio-prefetching", .policy_description = "prio prefetching modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_random.c000066400000000000000000000052331507764646700227260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include /* Random scheduler with a fifo queue for its scheduling window */ static void initialize_random_fifo_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_random_policy = { .init_sched = initialize_random_fifo_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "modular-random", .policy_description = "random modular policy", .worker_type = STARPU_WORKER_LIST, }; /* Random scheduler with a priority queue for its scheduling window */ static void initialize_random_prio_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_random_prio_policy = { .init_sched = initialize_random_prio_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "modular-random-prio", .policy_description = "random-prio modular policy", .worker_type = STARPU_WORKER_LIST, }; starpu-1.4.9+dfsg/src/sched_policies/modular_random_prefetching.c000066400000000000000000000061761507764646700253130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #define _STARPU_SCHED_NTASKS_THRESHOLD_DEFAULT 2 #define _STARPU_SCHED_EXP_LEN_THRESHOLD_DEFAULT 1000000000.0 /* Random scheduler with fifo queues for its scheduling window and its workers. */ static void initialize_random_fifo_prefetching_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_random_prefetching_policy = { .init_sched = initialize_random_fifo_prefetching_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "modular-random-prefetching", .policy_description = "random prefetching modular policy", .worker_type = STARPU_WORKER_LIST, }; /* Random scheduler with priority queues for its scheduling window and its workers. */ static void initialize_random_prio_prefetching_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_random_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_FIFO_ABOVE | STARPU_SCHED_SIMPLE_FIFO_ABOVE_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW | STARPU_SCHED_SIMPLE_FIFOS_BELOW_PRIO | STARPU_SCHED_SIMPLE_FIFOS_BELOW_READY | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_random_prio_prefetching_policy = { .init_sched = initialize_random_prio_prefetching_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "modular-random-prio-prefetching", .policy_description = "random-prio prefetching modular policy", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/modular_ws.c000066400000000000000000000031621507764646700220760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include static void initialize_ws_center_policy(unsigned sched_ctx_id) { starpu_sched_component_initialize_simple_scheduler((starpu_sched_component_create_t) starpu_sched_component_work_stealing_create, NULL, STARPU_SCHED_SIMPLE_DECIDE_WORKERS | STARPU_SCHED_SIMPLE_WS_BELOW | STARPU_SCHED_SIMPLE_IMPL, sched_ctx_id); } struct starpu_sched_policy _starpu_sched_modular_ws_policy = { .init_sched = initialize_ws_center_policy, .deinit_sched = starpu_sched_tree_deinitialize, .add_workers = starpu_sched_tree_add_workers, .remove_workers = starpu_sched_tree_remove_workers, .push_task = starpu_sched_tree_work_stealing_push_task, .pop_task = starpu_sched_tree_pop_task, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "modular-ws", .policy_description = "work stealing modular policy", .worker_type = STARPU_WORKER_LIST, }; starpu-1.4.9+dfsg/src/sched_policies/parallel_eager.c000066400000000000000000000273241507764646700226670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include struct _starpu_peager_common_data { int possible_combinations_cnt[STARPU_NMAXWORKERS]; int *possible_combinations[STARPU_NMAXWORKERS]; int *possible_combinations_size[STARPU_NMAXWORKERS]; int max_combination_size[STARPU_NMAXWORKERS]; int no_combined_workers; int ref_count; }; static struct _starpu_peager_common_data *_peager_common_data = NULL; struct _starpu_peager_data { starpu_pthread_mutex_t policy_mutex; struct starpu_st_fifo_taskq fifo; struct starpu_st_fifo_taskq local_fifo[STARPU_NMAXWORKERS]; }; static void initialize_peager_common(void) { if (_peager_common_data == NULL) { struct _starpu_peager_common_data *common_data = NULL; _STARPU_CALLOC(common_data, 1, sizeof(struct _starpu_peager_common_data)); common_data->ref_count = 1; _peager_common_data = common_data; const unsigned nbasic_workers = starpu_worker_get_count(); unsigned i; starpu_sched_find_all_worker_combinations(); const unsigned ncombined_workers = starpu_combined_worker_get_count(); common_data->no_combined_workers = ncombined_workers == 0; for(i = 0; i < nbasic_workers; i++) { common_data->possible_combinations_cnt[i] = 0; int cnt = common_data->possible_combinations_cnt[i]++; /* Allocate ncombined_workers + 1 for the singleton worker itself */ _STARPU_CALLOC(common_data->possible_combinations[i], 1+ncombined_workers, sizeof(int)); _STARPU_CALLOC(common_data->possible_combinations_size[i], 1+ncombined_workers, sizeof(int)); common_data->possible_combinations[i][cnt] = i; common_data->possible_combinations_size[i][cnt] = 1; common_data->max_combination_size[i] = 1; } for (i = 0; i < ncombined_workers; i++) { unsigned combined_workerid = nbasic_workers + i; int *workers; int size; starpu_combined_worker_get_description(combined_workerid, &size, &workers); int master = workers[0]; if (size > common_data->max_combination_size[master]) { common_data->max_combination_size[master] = size; } int cnt = common_data->possible_combinations_cnt[master]++; common_data->possible_combinations[master][cnt] = combined_workerid; common_data->possible_combinations_size[master][cnt] = size; } } else { _peager_common_data->ref_count++; } } static void deinitialize_peager_common(void) { STARPU_ASSERT(_peager_common_data != NULL); _peager_common_data->ref_count--; if (_peager_common_data->ref_count == 0) { const unsigned nbasic_workers = starpu_worker_get_count(); unsigned i; for(i = 0; i < nbasic_workers; i++) { free(_peager_common_data->possible_combinations[i]); _peager_common_data->possible_combinations[i] = NULL; free(_peager_common_data->possible_combinations_size[i]); _peager_common_data->possible_combinations_size[i] = NULL; } free(_peager_common_data); _peager_common_data = NULL; } } static void peager_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { if (sched_ctx_id == 0) { /* FIXME Fix scheduling contexts initialization or combined * worker management, to make the initialize_peager_common() * call to work right from initialize_peager_policy. For now, * this fails because it causes combined workers to be generated * too early. */ initialize_peager_common(); } struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned i; for(i = 0; i < nworkers; i++) { unsigned workerid = workerids[i]; if(starpu_worker_is_combined_worker(workerid)) { continue; } starpu_sched_ctx_worker_shares_tasks_lists(workerid, sched_ctx_id); /* slaves pick up tasks from their local queue, their master * will put tasks directly in that local list when a parallel * tasks comes. */ starpu_st_fifo_taskq_init(&data->local_fifo[workerid]); } } static void peager_remove_workers(unsigned sched_ctx_id, int *workerids STARPU_ATTRIBUTE_UNUSED, unsigned nworkers STARPU_ATTRIBUTE_UNUSED) { if (sched_ctx_id == 0) { deinitialize_peager_common(); } } static void initialize_peager_policy(unsigned sched_ctx_id) { struct _starpu_peager_data *data; _STARPU_CALLOC(data, 1, sizeof(struct _starpu_peager_data)); _STARPU_DISP("Warning: the peager scheduler is mostly a proof of concept and not really very optimized\n"); /* masters pick tasks from that queue */ starpu_st_fifo_taskq_init(&data->fifo); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)data); STARPU_PTHREAD_MUTEX_INIT(&data->policy_mutex, NULL); } static void deinitialize_peager_policy(unsigned sched_ctx_id) { /* TODO check that there is no task left in the queue */ struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_PTHREAD_MUTEX_DESTROY(&data->policy_mutex); free(data); } static int push_task_peager_policy(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; int ret_val; struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); ret_val = starpu_st_fifo_taskq_push_task(&data->fifo, task); #ifndef STARPU_NON_BLOCKING_DRIVERS int is_parallel_task = task->cl && task->cl->max_parallelism > 1; #endif starpu_push_task_end(task); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); #ifndef STARPU_NON_BLOCKING_DRIVERS struct _starpu_peager_common_data *common_data = _peager_common_data; /* if there are no tasks block */ /* wake people waiting for a task */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int workerid = workers->get_next(workers, &it); /* If this is not a CPU then the workerid simply grabs tasks from the fifo */ if (starpu_worker_is_combined_worker(workerid)) { continue; } if (starpu_worker_get_type(workerid) != STARPU_CPU_WORKER) { starpu_wake_worker_relax_light(workerid); continue; } if ((!is_parallel_task) /* This is not a parallel task, can wake any workerid */ || (common_data->no_combined_workers) /* There is no combined workerid */ || (common_data->max_combination_size[workerid] > 1) /* This is a combined workerid master and the task is parallel */ ) { starpu_wake_worker_relax_light(workerid); } } #endif return ret_val; } static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id) { struct _starpu_peager_common_data *common_data = _peager_common_data; struct _starpu_peager_data *data = (struct _starpu_peager_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); int workerid = starpu_worker_get_id_check(); /* If this is not a CPU then the worker simply grabs tasks from the fifo */ if (starpu_worker_get_type(workerid) != STARPU_CPU_WORKER) { struct starpu_task *task; starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_worker_relax_off(); task = starpu_st_fifo_taskq_pop_task(&data->fifo, workerid); STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); return task; } struct starpu_task *task; int slave_task = 0; starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&data->policy_mutex); starpu_worker_relax_off(); /* check if a slave task is available in the local queue */ task = starpu_st_fifo_taskq_pop_task(&data->local_fifo[workerid], workerid); if (!task) { /* no slave task, try to pop a task as master */ task = starpu_st_fifo_taskq_pop_task(&data->fifo, workerid); if (task) { _STARPU_DEBUG("poping master task %p\n", task); } #if 1 /* Optional heuristic to filter out purely slave workers for parallel tasks */ if (task && task->cl && task->cl->max_parallelism > 1 && common_data->max_combination_size[workerid] == 1 && !common_data->no_combined_workers) { /* task is potentially parallel, leave it for a combined worker master */ _STARPU_DEBUG("pushing back master task %p\n", task); starpu_st_fifo_taskq_push_back_task(&data->fifo, task); task = NULL; } #endif } else { slave_task = 1; _STARPU_DEBUG("poping slave task %p\n", task); } if (!task || slave_task) { STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); goto ret; } /* Find the largest compatible worker combination */ int best_size = -1; int best_workerid = -1; int i; for (i = 0; i < common_data->possible_combinations_cnt[workerid]; i++) { if (common_data->possible_combinations_size[workerid][i] > best_size) { int combined_worker = common_data->possible_combinations[workerid][i]; if (starpu_combined_worker_can_execute_task(combined_worker, task, 0)) { best_size = common_data->possible_combinations_size[workerid][i]; best_workerid = combined_worker; } } } _STARPU_DEBUG("task %p, best_workerid=%d, best_size=%d\n", task, best_workerid, best_size); /* In case nobody can execute this task, we let the master * worker take it anyway, so that it can discard it afterward. * */ if (best_workerid == -1) { STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); goto ret; } /* Is this a basic worker or a combined worker ? */ if (best_workerid < (int) starpu_worker_get_count()) { STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); /* The master is alone */ goto ret; } starpu_parallel_task_barrier_init(task, best_workerid); int worker_size = 0; int *combined_workerid; starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid); _STARPU_DEBUG("dispatching task %p on combined worker %d of size %d\n", task, best_workerid, worker_size); /* Dispatch task aliases to the different slaves */ for (i = 1; i < worker_size; i++) { struct starpu_task *alias = starpu_task_dup(task); int local_worker = combined_workerid[i]; alias->destroy = 1; _STARPU_TRACE_JOB_PUSH(alias, alias->priority > 0); starpu_st_fifo_taskq_push_task(&data->local_fifo[local_worker], alias); } /* The master also manipulated an alias */ struct starpu_task *master_alias = starpu_task_dup(task); master_alias->destroy = 1; task = master_alias; STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); _STARPU_TRACE_JOB_PUSH(master_alias, master_alias->priority > 0); for (i = 1; i < worker_size; i++) { int local_worker = combined_workerid[i]; starpu_worker_lock(local_worker); #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) starpu_wake_worker_locked(local_worker); #endif starpu_worker_unlock(local_worker); } ret: return task; } struct starpu_sched_policy _starpu_sched_peager_policy = { .init_sched = initialize_peager_policy, .deinit_sched = deinitialize_peager_policy, .add_workers = peager_add_workers, .remove_workers = peager_remove_workers, .push_task = push_task_peager_policy, .pop_task = pop_task_peager_policy, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "peager", .policy_description = "parallel eager policy", .worker_type = STARPU_WORKER_LIST, }; starpu-1.4.9+dfsg/src/sched_policies/parallel_heft.c000066400000000000000000000477731507764646700225440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Distributed queues using performance modeling to assign tasks */ #include #include #include #include #include #include #include #ifndef DBL_MIN #define DBL_MIN __DBL_MIN__ #endif #ifndef DBL_MAX #define DBL_MAX __DBL_MAX__ #endif /* if no priority is set when creating the scheduling context, we use the following ones */ #define DEFAULT_MIN_PRIORITY 0 #define DEFAULT_MAX_PRIORITY 1 //static unsigned ncombinedworkers; //static enum starpu_perfmodel_archtype applicable_perf_archtypes[STARPU_NARCH_VARIATIONS]; //static unsigned napplicable_perf_archtypes = 0; /* * Here are the default values of alpha, beta, gamma */ #define _STARPU_SCHED_ALPHA_DEFAULT 1.0 #define _STARPU_SCHED_BETA_DEFAULT 1.0 #define _STARPU_SCHED_GAMMA_DEFAULT 1000.0 struct _starpu_pheft_data { double alpha; double beta; double _gamma; double idle_power; /* When we push a task on a combined worker we need all the cpu workers it contains * to be locked at once */ starpu_pthread_mutex_t global_push_mutex; }; static double worker_exp_start[STARPU_NMAXWORKERS]; static double worker_exp_end[STARPU_NMAXWORKERS]; static double worker_exp_len[STARPU_NMAXWORKERS]; static int ntasks[STARPU_NMAXWORKERS]; /*!!!!!!! * It doesn't work with several contexts because the combined workers * are constructed from the workers available to the program, and not * to the context !!!!!!!!!!!!!!!!!!!!!!! */ static void parallel_heft_pre_exec_hook(struct starpu_task *task, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) { if (!task->cl || task->execute_on_a_specific_worker) return; unsigned workerid = starpu_worker_get_id_check(); double model = task->predicted; double transfer_model = task->predicted_transfer; const double now = starpu_timing_now(); if (isnan(model)) model = 0.0; if (isnan(transfer_model)) transfer_model = 0.0; /* Once we have started the task, we can update the predicted amount * of work. */ starpu_worker_lock_self(); worker_exp_len[workerid] -= model + transfer_model; worker_exp_start[workerid] = now + model; worker_exp_end[workerid] = worker_exp_start[workerid] + worker_exp_len[workerid]; ntasks[workerid]--; starpu_worker_unlock_self(); } static void parallel_heft_post_exec_hook(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) { unsigned workerid = starpu_worker_get_id_check(); const double now = starpu_timing_now(); /* Once we have executed the task, we can update the predicted amount * of work. */ starpu_worker_lock_self(); worker_exp_start[workerid] = now; worker_exp_end[workerid] = worker_exp_start[workerid] + worker_exp_len[workerid]; starpu_worker_unlock_self(); } static int push_task_on_best_worker(struct starpu_task *task, int best_workerid, double exp_start_predicted, double exp_end_predicted, int prio, unsigned sched_ctx_id) { /* make sure someone coule execute that task ! */ STARPU_ASSERT(best_workerid != -1); struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); if (starpu_get_prefetch_flag()) starpu_prefetch_task_input_for(task, best_workerid); int ret = 0; if (!starpu_worker_is_combined_worker(best_workerid)) { starpu_worker_lock(best_workerid); task->predicted = exp_end_predicted - exp_start_predicted; /* TODO */ task->predicted_transfer = 0; worker_exp_len[best_workerid] += task->predicted; worker_exp_end[best_workerid] = exp_end_predicted; worker_exp_start[best_workerid] = exp_end_predicted - worker_exp_len[best_workerid]; ntasks[best_workerid]++; starpu_worker_unlock(best_workerid); /* We don't want it to interlace its task with a combined * worker's one */ starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex); starpu_worker_relax_off(); ret = starpu_push_local_task(best_workerid, task, prio); STARPU_PTHREAD_MUTEX_UNLOCK(&hd->global_push_mutex); } else { /* This task doesn't belong to an actual worker, it belongs * to a combined worker and thus the scheduler doesn't care * of its predicted values which are insignificant */ task->predicted = 0; task->predicted_transfer = 0; starpu_parallel_task_barrier_init(task, best_workerid); int worker_size = 0; int *combined_workerid; starpu_combined_worker_get_description(best_workerid, &worker_size, &combined_workerid); /* All cpu workers must be locked at once */ starpu_worker_relax_on(); STARPU_PTHREAD_MUTEX_LOCK(&hd->global_push_mutex); starpu_worker_relax_off(); /* This is a combined worker so we create task aliases */ int i; for (i = 0; i < worker_size; i++) { struct starpu_task *alias = starpu_task_dup(task); int local_combined_workerid = combined_workerid[i]; alias->predicted = exp_end_predicted - worker_exp_end[local_combined_workerid]; /* TODO */ alias->predicted_transfer = 0; alias->destroy = 1; starpu_worker_lock(local_combined_workerid); worker_exp_len[local_combined_workerid] += alias->predicted; worker_exp_end[local_combined_workerid] = exp_end_predicted; worker_exp_start[local_combined_workerid] = exp_end_predicted - worker_exp_len[local_combined_workerid]; ntasks[local_combined_workerid]++; starpu_worker_unlock(local_combined_workerid); _STARPU_TRACE_JOB_PUSH(alias, alias->priority > 0); ret |= starpu_push_local_task(local_combined_workerid, alias, prio); } STARPU_PTHREAD_MUTEX_UNLOCK(&hd->global_push_mutex); } return ret; } static double compute_expected_end(double *_worker_exp_end, int workerid) { if (!starpu_worker_is_combined_worker(workerid)) { double res; /* This is a basic worker */ res = _worker_exp_end[workerid]; return res; } else { /* This is a combined worker, the expected end is the end for the latest worker */ int worker_size; int *combined_workerid; starpu_combined_worker_get_description(workerid, &worker_size, &combined_workerid); double exp_end = DBL_MIN; int i; for (i = 0; i < worker_size; i++) { double local_exp_end = _worker_exp_end[combined_workerid[i]]; exp_end = STARPU_MAX(exp_end, local_exp_end); } return exp_end; } } static double compute_ntasks_end(int workerid, unsigned sched_ctx_id) { struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id); if (!starpu_worker_is_combined_worker(workerid)) { double res; /* This is a basic worker */ /* Here helgrind would shout that this is unprotected, but we * are fine with getting outdated values, this is just an * estimation */ res = ntasks[workerid] / starpu_worker_get_relative_speedup(perf_arch); return res; } else { /* This is a combined worker, the expected end is the end for the latest worker */ int worker_size; int *combined_workerid; starpu_combined_worker_get_description(workerid, &worker_size, &combined_workerid); int ntasks_end=0; /* Here helgrind would shout that this is unprotected, but we * are fine with getting outdated values, this is just an * estimation */ int i; for (i = 0; i < worker_size; i++) { /* XXX: this is actually bogus: not all pushed tasks are necessarily parallel... */ ntasks_end = STARPU_MAX(ntasks_end, (int) ((double) ntasks[combined_workerid[i]] / starpu_worker_get_relative_speedup(perf_arch))); } return ntasks_end; } } static int _parallel_heft_push_task(struct starpu_task *task, unsigned prio, unsigned sched_ctx_id) { struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); unsigned nworkers_ctx = workers->nworkers; unsigned workerid, worker_ctx = 0; int best = -1, best_id_ctx = -1; /* this flag is set if the corresponding workerid is selected because there is no performance prediction available yet */ int forced_best = -1, forced_best_ctx = -1, forced_nimpl = -1; double local_task_length[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; double local_data_penalty[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; double local_energy[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; double local_exp_end[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; double fitness[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; double max_exp_end = 0.0; int skip_worker[nworkers_ctx][STARPU_MAXIMPLEMENTATIONS]; double best_exp_start; double best_exp_end = DBL_MAX; //double penality_best = 0.0; int ntasks_best = -1, ntasks_best_ctx = -1, nimpl_best = -1; double ntasks_best_end = 0.0; int calibrating = 0; /* A priori, we know all estimations */ int unknown = 0; struct starpu_sched_ctx_iterator it; double now = starpu_timing_now(); double _worker_exp_end[nworkers_ctx]; memset(skip_worker, 0, nworkers_ctx*STARPU_MAXIMPLEMENTATIONS*sizeof(int)); workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { workerid = workers->get_next(workers, &it); if(!starpu_worker_is_combined_worker(workerid)) { /* Here helgrind would shout that this is unprotected, but we * are fine with getting outdated values, this is just an * estimation */ /* Sometimes workers didn't take the tasks as early as we expected */ double exp_start = STARPU_MAX(worker_exp_start[workerid], now); _worker_exp_end[workerid] = exp_start + worker_exp_len[workerid]; if (_worker_exp_end[workerid] > max_exp_end) max_exp_end = _worker_exp_end[workerid]; } } unsigned nimpl; worker_ctx = 0; while(workers->has_next(workers, &it)) { workerid = workers->get_next(workers, &it); for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if (!starpu_combined_worker_can_execute_task(workerid, task, nimpl)) { /* no one on that queue may execute this task */ skip_worker[worker_ctx][nimpl] = 1; continue; } else { skip_worker[worker_ctx][nimpl] = 0; } struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(workerid, sched_ctx_id); local_task_length[worker_ctx][nimpl] = starpu_task_expected_length(task, perf_arch,nimpl); local_data_penalty[worker_ctx][nimpl] = starpu_task_expected_data_transfer_time_for(task, workerid); double ntasks_end = compute_ntasks_end(workerid, sched_ctx_id); if (ntasks_best == -1 || (!calibrating && ntasks_end < ntasks_best_end) /* Not calibrating, take better task */ || (!calibrating && isnan(local_task_length[worker_ctx][nimpl])) /* Not calibrating but this workerid is being calibrated */ || (calibrating && isnan(local_task_length[worker_ctx][nimpl]) && ntasks_end < ntasks_best_end) /* Calibrating, compete this workerid with other non-calibrated */ ) { ntasks_best_end = ntasks_end; ntasks_best = workerid; ntasks_best_ctx = worker_ctx; nimpl_best = nimpl; } if (isnan(local_task_length[worker_ctx][nimpl])) { static int warned; STARPU_HG_DISABLE_CHECKING(warned); if (!warned) { warned = 1; _STARPU_DISP("Warning: performance model for %s not finished calibrating on %u, using a dumb scheduling heuristic for now\n", starpu_task_get_name(task), workerid); } /* we are calibrating, we want to speed-up calibration time * so we privilege non-calibrated tasks (but still * greedily distribute them to avoid dumb schedules) */ calibrating = 1; } if (isnan(local_task_length[worker_ctx][nimpl]) || _STARPU_IS_ZERO(local_task_length[worker_ctx][nimpl])) /* there is no prediction available for that task * with that arch yet, so switch to a greedy strategy */ unknown = 1; if (unknown) continue; double task_starting_time = STARPU_MAX( compute_expected_end(_worker_exp_end, workerid), now + local_data_penalty[worker_ctx][nimpl]); local_exp_end[worker_ctx][nimpl] = task_starting_time + local_task_length[worker_ctx][nimpl]; //fprintf(stderr, "WORKER %d -> length %e end %e\n", workerid, local_task_length[worker_ctx][nimpl], local_exp_end[workerid][nimpl]); if (local_exp_end[worker_ctx][nimpl] < best_exp_end) { /* a better solution was found */ best_exp_end = local_exp_end[worker_ctx][nimpl]; nimpl_best = nimpl; } local_energy[worker_ctx][nimpl] = starpu_task_expected_energy(task, perf_arch,nimpl); //_STARPU_DEBUG("Scheduler parallel heft: task length (%lf) local energy (%lf) workerid (%u) kernel (%u) \n", local_task_length[workerid],local_energy[workerid],workerid,nimpl); if (isnan(local_energy[worker_ctx][nimpl])) local_energy[worker_ctx][nimpl] = 0.; } worker_ctx++; } if (unknown) { forced_best = ntasks_best; forced_best_ctx = ntasks_best_ctx; forced_nimpl = nimpl_best; } if (forced_best == -1) { double best_fitness = -1; worker_ctx = 0; while(workers->has_next(workers, &it)) { workerid = workers->get_next(workers, &it); for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { if (skip_worker[worker_ctx][nimpl]) { /* no one on that queue may execute this task */ continue; } fitness[worker_ctx][nimpl] = hd->alpha*(local_exp_end[worker_ctx][nimpl] - best_exp_end) + hd->beta*(local_data_penalty[worker_ctx][nimpl]) + hd->_gamma*(local_energy[worker_ctx][nimpl]); if (local_exp_end[worker_ctx][nimpl] > max_exp_end) /* This placement will make the computation * longer, take into account the idle * consumption of other cpus */ fitness[worker_ctx][nimpl] += hd->_gamma * hd->idle_power * (local_exp_end[worker_ctx][nimpl] - max_exp_end) / 1000000.0; if (best == -1 || fitness[worker_ctx][nimpl] < best_fitness) { /* we found a better solution */ best_fitness = fitness[worker_ctx][nimpl]; best = workerid; best_id_ctx = worker_ctx; nimpl_best = nimpl; } // fprintf(stderr, "FITNESS workerid %d -> %e local_exp_end %e - local_data_penalty %e\n", workerid, fitness[workerid][nimpl], local_exp_end[workerid][nimpl] - best_exp_end, local_data_penalty[workerid][nimpl]); } worker_ctx++; } } STARPU_ASSERT(forced_best != -1 || best != -1); if (forced_best != -1) { /* there is no prediction available for that task * with that arch we want to speed-up calibration time * so we force this measurement */ best = forced_best; best_id_ctx = forced_best_ctx; nimpl_best = forced_nimpl; //penality_best = 0.0; best_exp_end = compute_expected_end(_worker_exp_end, best); } else { //penality_best = local_data_penalty[best_id_ctx][nimpl_best]; STARPU_ASSERT(best_id_ctx != -1); STARPU_ASSERT(nimpl_best != -1); best_exp_end = local_exp_end[best_id_ctx][nimpl_best]; } best_exp_start = _worker_exp_end[best]; //_STARPU_DEBUG("Scheduler parallel heft: kernel (%u)\n", nimpl_best); starpu_task_set_implementation(task, nimpl_best); /* we should now have the best workerid in variable "best" */ starpu_sched_task_break(task); return push_task_on_best_worker(task, best, best_exp_start, best_exp_end, prio, sched_ctx_id); } static int parallel_heft_push_task(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; int ret_val = -1; if (task->priority == STARPU_MAX_PRIO) { ret_val = _parallel_heft_push_task(task, 1, sched_ctx_id); return ret_val; } ret_val = _parallel_heft_push_task(task, 0, sched_ctx_id); return ret_val; } static void parallel_heft_add_workers(__attribute__((unused)) unsigned sched_ctx_id, int *workerids, unsigned nworkers) { unsigned i; double now = starpu_timing_now(); for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; struct _starpu_worker *workerarg = _starpu_get_worker_struct(workerid); /* init these structures only once for each worker */ if(!workerarg->has_prev_init) { worker_exp_start[workerid] = now; worker_exp_len[workerid] = 0.0; worker_exp_end[workerid] = worker_exp_start[workerid]; ntasks[workerid] = 0; workerarg->has_prev_init = 1; } } _starpu_sched_find_worker_combinations(workerids, nworkers); // start_unclear_part: not very clear where this is used /* struct _starpu_machine_config *config = _starpu_get_machine_config(); */ /* ncombinedworkers = config->topology.ncombinedworkers; */ /* /\* We pre-compute an array of all the perfmodel archs that are applicable *\/ */ /* unsigned total_worker_count = nworkers + ncombinedworkers; */ /* unsigned used_perf_archtypes[STARPU_NARCH_VARIATIONS]; */ /* memset(used_perf_archtypes, 0, sizeof(used_perf_archtypes)); */ /* for (workerid = 0; workerid < total_worker_count; workerid++) */ /* { */ /* enum starpu_perfmodel_archtype perf_archtype = starpu_worker_get_perf_archtype(workerid); */ /* used_perf_archtypes[perf_archtype] = 1; */ /* } */ // end_unclear_part // napplicable_perf_archtypes = 0; // int arch; // for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++) // { // if (used_perf_archtypes[arch]) // applicable_perf_archtypes[napplicable_perf_archtypes++] = arch; // } } static void initialize_parallel_heft_policy(unsigned sched_ctx_id) { struct _starpu_pheft_data *hd; _STARPU_MALLOC(hd, sizeof(struct _starpu_pheft_data)); _STARPU_DISP("Warning: the pheft scheduler is mostly a proof of concept and not really very optimized\n"); if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_min_priority(sched_ctx_id, DEFAULT_MIN_PRIORITY); if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_max_priority(sched_ctx_id, DEFAULT_MAX_PRIORITY); STARPU_ASSERT_MSG(starpu_sched_ctx_get_min_priority(sched_ctx_id) < starpu_sched_ctx_get_max_priority(sched_ctx_id), "Priority min %d should be lower than priority max %d\n", starpu_sched_ctx_get_min_priority(sched_ctx_id), starpu_sched_ctx_get_max_priority(sched_ctx_id)); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)hd); hd->alpha = starpu_getenv_float_default("STARPU_SCHED_ALPHA", _STARPU_SCHED_ALPHA_DEFAULT); hd->beta = starpu_getenv_float_default("STARPU_SCHED_BETA", _STARPU_SCHED_BETA_DEFAULT); #ifdef STARPU_NON_BLOCKING_DRIVERS if (starpu_getenv("STARPU_SCHED_GAMMA")) _STARPU_DISP("Warning: STARPU_SCHED_GAMMA was used, but --enable-blocking-drivers configuration was not set, CPU cores will not actually be sleeping\n"); #endif hd->_gamma = starpu_getenv_float_default("STARPU_SCHED_GAMMA", _STARPU_SCHED_GAMMA_DEFAULT); hd->idle_power = starpu_getenv_float_default("STARPU_IDLE_POWER", 0.0); STARPU_PTHREAD_MUTEX_INIT(&hd->global_push_mutex, NULL); /* Tell helgrind that we are fine with getting outdated values when * estimating schedules */ STARPU_HG_DISABLE_CHECKING(worker_exp_start); STARPU_HG_DISABLE_CHECKING(worker_exp_end); STARPU_HG_DISABLE_CHECKING(worker_exp_len); STARPU_HG_DISABLE_CHECKING(ntasks); } static void parallel_heft_deinit(unsigned sched_ctx_id) { struct _starpu_pheft_data *hd = (struct _starpu_pheft_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); STARPU_PTHREAD_MUTEX_DESTROY(&hd->global_push_mutex); free(hd); } struct starpu_sched_policy _starpu_sched_parallel_heft_policy = { .init_sched = initialize_parallel_heft_policy, .deinit_sched = parallel_heft_deinit, .add_workers = parallel_heft_add_workers, .remove_workers = NULL, .push_task = parallel_heft_push_task, .pop_task = NULL, .pre_exec_hook = parallel_heft_pre_exec_hook, .post_exec_hook = parallel_heft_post_exec_hook, .policy_name = "pheft", .policy_description = "parallel HEFT", .worker_type = STARPU_WORKER_LIST, .prefetches = 1, }; starpu-1.4.9+dfsg/src/sched_policies/prio_deque.c000066400000000000000000000147771507764646700220740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include void starpu_st_prio_deque_init(struct starpu_st_prio_deque *pdeque) { memset(pdeque,0,sizeof(*pdeque)); starpu_task_prio_list_init(&pdeque->list); STARPU_HG_DISABLE_CHECKING(pdeque->exp_start); STARPU_HG_DISABLE_CHECKING(pdeque->exp_end); STARPU_HG_DISABLE_CHECKING(pdeque->exp_len); } void starpu_st_prio_deque_destroy(struct starpu_st_prio_deque *pdeque) { starpu_task_prio_list_deinit(&pdeque->list); } int starpu_st_prio_deque_is_empty(struct starpu_st_prio_deque *pdeque) { return pdeque->ntasks == 0; } void starpu_st_prio_deque_erase(struct starpu_st_prio_deque *pdeque, struct starpu_task *task) { starpu_task_prio_list_erase(&pdeque->list, task); } int starpu_st_prio_deque_push_front_task(struct starpu_st_prio_deque *pdeque, struct starpu_task *task) { starpu_task_prio_list_push_front(&pdeque->list, task); pdeque->ntasks++; return 0; } int starpu_st_prio_deque_push_back_task(struct starpu_st_prio_deque *pdeque, struct starpu_task *task) { starpu_task_prio_list_push_back(&pdeque->list, task); pdeque->ntasks++; return 0; } struct starpu_task *starpu_st_prio_deque_highest_task(struct starpu_st_prio_deque *pdeque) { struct starpu_task *task; if (starpu_task_prio_list_empty(&pdeque->list)) return NULL; task = starpu_task_prio_list_front_highest(&pdeque->list); return task; } struct starpu_task *starpu_st_prio_deque_pop_task(struct starpu_st_prio_deque *pdeque) { struct starpu_task *task; if (starpu_task_prio_list_empty(&pdeque->list)) return NULL; task = starpu_task_prio_list_pop_front_highest(&pdeque->list); pdeque->ntasks--; return task; } struct starpu_task *starpu_st_prio_deque_pop_back_task(struct starpu_st_prio_deque *pdeque) { struct starpu_task *task; if (starpu_task_prio_list_empty(&pdeque->list)) return NULL; task = starpu_task_prio_list_pop_back_lowest(&pdeque->list); pdeque->ntasks--; return task; } int starpu_st_prio_deque_pop_this_task(struct starpu_st_prio_deque *pdeque, int workerid, struct starpu_task *task) { unsigned nimpl = 0; #ifdef STARPU_DEBUG STARPU_ASSERT(starpu_task_prio_list_ismember(&pdeque->list, task)); #endif if (workerid < 0 || starpu_worker_can_execute_task_first_impl(workerid, task, &nimpl)) { starpu_task_set_implementation(task, nimpl); starpu_task_prio_list_erase(&pdeque->list, task); pdeque->ntasks--; return 1; } return 0; } static inline int pred_true(struct starpu_task *t STARPU_ATTRIBUTE_UNUSED, void *v STARPU_ATTRIBUTE_UNUSED) { (void)t; (void)v; return 1; } static inline int pred_can_execute(struct starpu_task * t, void * pworkerid) { int i; for(i = 0; i < STARPU_MAXIMPLEMENTATIONS; i++) if(starpu_worker_can_execute_task(*(int*)pworkerid, t,i)) { starpu_task_set_implementation(t, i); return 1; } return 0; } #define REMOVE_TASK(pdeque, first_task, next_task, predicate, parg) \ { \ struct starpu_task * t; \ if (skipped) \ *skipped = NULL; \ for (t = starpu_task_prio_##first_task(&pdeque->list); \ t != starpu_task_prio_list_end(&pdeque->list); \ t = starpu_task_prio_##next_task(&pdeque->list, t)) \ { \ if (predicate(t, parg)) \ { \ starpu_task_prio_list_erase(&pdeque->list, t); \ pdeque->ntasks--; \ return t; \ } \ else \ if (skipped) \ *skipped = t; \ } \ return NULL; \ } struct starpu_task *starpu_st_prio_deque_pop_task_for_worker(struct starpu_st_prio_deque * pdeque, int workerid, struct starpu_task * *skipped) { STARPU_ASSERT(pdeque); STARPU_ASSERT(workerid >= 0 && (unsigned) workerid < starpu_worker_get_count()); REMOVE_TASK(pdeque, list_begin, list_next, pred_can_execute, &workerid); } struct starpu_task *starpu_st_prio_deque_deque_task_for_worker(struct starpu_st_prio_deque * pdeque, int workerid, struct starpu_task * *skipped) { STARPU_ASSERT(pdeque); STARPU_ASSERT(workerid >= 0 && (unsigned) workerid < starpu_worker_get_count()); REMOVE_TASK(pdeque, list_back_highest, list_prev_highest, pred_can_execute, &workerid); } struct starpu_task *starpu_st_prio_deque_deque_first_ready_task(struct starpu_st_prio_deque * pdeque, unsigned workerid) { struct starpu_task *task = NULL, *current; if (starpu_task_prio_list_empty(&pdeque->list)) return NULL; if (pdeque->ntasks > 0) { pdeque->ntasks--; task = starpu_task_prio_list_front_highest(&pdeque->list); if (STARPU_UNLIKELY(!task)) return NULL; int first_task_priority = task->priority; size_t non_ready_best = SIZE_MAX; size_t non_loading_best = SIZE_MAX; size_t non_allocated_best = SIZE_MAX; for (current = starpu_task_prio_list_begin(&pdeque->list); current != starpu_task_prio_list_end(&pdeque->list); current = starpu_task_prio_list_next(&pdeque->list, current)) { int priority = current->priority; if (priority >= first_task_priority) { size_t non_ready, non_loading, non_allocated; starpu_st_non_ready_buffers_size(current, workerid, &non_ready, &non_loading, &non_allocated); if (non_ready < non_ready_best) { non_ready_best = non_ready; non_loading_best = non_loading; non_allocated_best = non_allocated; task = current; if (non_ready == 0 && non_allocated == 0) break; } else if (non_ready == non_ready_best) { if (non_loading < non_loading_best) { non_loading_best = non_loading; non_allocated_best = non_allocated; task = current; } else if (non_loading == non_loading_best) { if (non_allocated < non_allocated_best) { non_allocated_best = non_allocated; task = current; } } } } } starpu_task_prio_list_erase(&pdeque->list, task); } return task; } starpu-1.4.9+dfsg/src/sched_policies/prio_deque.h000066400000000000000000000023701507764646700220630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Uppsala University * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __PRIO_DEQUE_H__ #define __PRIO_DEQUE_H__ #include /** @file */ struct starpu_st_prio_deque { struct starpu_task_prio_list list; unsigned ntasks; unsigned nprocessed; // Assumptions: // exp_len is the sum of predicted_length + predicted_tansfer of all tasks in list // exp_start is the time at which the first task of list can start // exp_end = exp_start + exp_end // Careful: those are NOT maintained by the prio_queue operations double exp_start, exp_end, exp_len; }; #endif /* __PRIO_DEQUE_H__ */ starpu-1.4.9+dfsg/src/sched_policies/random_policy.c000066400000000000000000000062241507764646700225630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Policy attributing tasks randomly to workers */ #include #include #include #include #include #include static int _random_push_task(struct starpu_task *task, unsigned prio) { /* find the queue */ double alpha_sum = 0.0; unsigned sched_ctx_id = task->sched_ctx; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); int worker; int worker_arr[STARPU_NMAXWORKERS]; double speedup_arr[STARPU_NMAXWORKERS]; int size = 0; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); unsigned impl; if(starpu_worker_can_execute_task_first_impl(worker, task, &impl)) { struct starpu_perfmodel_arch* perf_arch = starpu_worker_get_perf_archtype(worker, sched_ctx_id); double speedup = starpu_worker_get_relative_speedup(perf_arch); alpha_sum += speedup; speedup_arr[size] = speedup; worker_arr[size++] = worker; } } double random = starpu_drand48()*alpha_sum; //printf("my rand is %e over %e\n", random, alpha_sum); if(size == 0) return -ENODEV; unsigned selected = worker_arr[size - 1]; double alpha = 0.0; int i; for(i = 0; i < size; i++) { worker = worker_arr[i]; double worker_alpha = speedup_arr[i]; if (alpha + worker_alpha >= random) { /* we found the worker */ selected = worker; break; } alpha += worker_alpha; } STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), selected); starpu_sched_task_break(task); return starpu_push_local_task(selected, task, prio); } static int random_push_task(struct starpu_task *task) { return _random_push_task(task, !!task->priority); } static void initialize_random_policy(unsigned sched_ctx_id) { (void) sched_ctx_id; starpu_srand48(time(NULL)); } static void deinitialize_random_policy(unsigned sched_ctx_id) { (void) sched_ctx_id; } struct starpu_sched_policy _starpu_sched_random_policy = { .init_sched = initialize_random_policy, .add_workers = NULL, .remove_workers = NULL, .deinit_sched = deinitialize_random_policy, .push_task = random_push_task, .pop_task = NULL, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "random", .policy_description = "weighted random based on worker overall performance", .worker_type = STARPU_WORKER_LIST, }; starpu-1.4.9+dfsg/src/sched_policies/sched_component.h000066400000000000000000000024321507764646700230760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __SCHED_COMPONENT_H__ #define __SCHED_COMPONENT_H__ /** @file */ #include #pragma GCC visibility push(hidden) /** lock and unlock drivers for modifying schedulers */ void _starpu_sched_component_lock_all_workers(void); void _starpu_sched_component_unlock_all_workers(void); void _starpu_sched_component_workers_destroy(void); struct _starpu_worker * _starpu_sched_component_worker_get_worker(struct starpu_sched_component *); struct starpu_bitmap * _starpu_get_worker_mask(unsigned sched_ctx_id); #pragma GCC visibility pop #endif starpu-1.4.9+dfsg/src/sched_policies/scheduler_maker.c000066400000000000000000000234431507764646700230630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef STARPU_HAVE_HWLOC #include #ifndef HWLOC_API_VERSION #define HWLOC_OBJ_PU HWLOC_OBJ_PROC #endif #if HWLOC_API_VERSION < 0x00010b00 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE #endif #endif #include "sched_component.h" /* The scheduler is built by a recursive function called on the hwloc topology with a starpu_sched_specs structure, * each call return a set of starpu_sched_component, not a single one, because you may have a topology like that : * MACHINE -- MEMORY NODE -- SOCKET * \- SOCKET * and you have defined a component for MACHINE, and a component for SOCKET, but not for MEMORY NODE then the recursive call * on MEMORY NODE will return 2 starpu_sched_component for those 2 sockets * * */ struct sched_component_list { struct starpu_sched_component ** arr; unsigned size; }; static void init_list(struct sched_component_list * list) { memset(list,0,sizeof(*list)); } static void destroy_list(struct sched_component_list * list) { free(list->arr); } static void add_component(struct sched_component_list *list, struct starpu_sched_component * component) { _STARPU_REALLOC(list->arr, sizeof(*list->arr) * (list->size + 1)); list->arr[list->size] = component; list->size++; } /* this is the function that actually built the scheduler, but without workers */ static struct sched_component_list helper_make_scheduler(struct starpu_sched_tree *tree, hwloc_obj_t obj, struct starpu_sched_component_specs specs, unsigned sched_ctx_id) { STARPU_ASSERT(obj); struct starpu_sched_component * component = NULL; /*set components for this obj */ #define CASE(ENUM,spec_member) \ case ENUM: \ if(specs.spec_member) \ component = starpu_sched_component_composed_component_create(tree, specs.spec_member); \ break switch(obj->type) { CASE(HWLOC_OBJ_MACHINE,hwloc_machine_composed_sched_component); CASE(HWLOC_OBJ_GROUP,hwloc_component_composed_sched_component); CASE(HWLOC_OBJ_NUMANODE,hwloc_component_composed_sched_component); CASE(HWLOC_OBJ_SOCKET,hwloc_socket_composed_sched_component); #ifdef HWLOC_OBJ_CACHE CASE(HWLOC_OBJ_CACHE,hwloc_cache_composed_sched_component); #endif #ifdef HWLOC_OBJ_L1CACHE CASE(HWLOC_OBJ_L1CACHE,hwloc_cache_composed_sched_component); CASE(HWLOC_OBJ_L2CACHE,hwloc_cache_composed_sched_component); CASE(HWLOC_OBJ_L3CACHE,hwloc_cache_composed_sched_component); CASE(HWLOC_OBJ_L4CACHE,hwloc_cache_composed_sched_component); CASE(HWLOC_OBJ_L5CACHE,hwloc_cache_composed_sched_component); #endif default: break; } struct sched_component_list l; init_list(&l); unsigned i; /* collect children component's */ for(i = 0; i < obj->arity; i++) { struct sched_component_list lc = helper_make_scheduler(tree, obj->children[i],specs, sched_ctx_id); unsigned j; for(j = 0; j < lc.size; j++) add_component(&l, lc.arr[j]); destroy_list(&lc); } if(!component) return l; for(i = 0; i < l.size; i++) starpu_sched_component_connect(component, l.arr[i]); destroy_list(&l); init_list(&l); component->obj = obj; add_component(&l, component); return l; } /* return the first component in prefix order such as component->obj == obj, or NULL */ static struct starpu_sched_component * _find_sched_component_with_obj(struct starpu_sched_component * component, hwloc_obj_t obj) { if(component == NULL) return NULL; if(component->obj == obj) return component; unsigned i; for(i = 0; i < component->nchildren; i++) { struct starpu_sched_component * tmp = _find_sched_component_with_obj(component->children[i], obj); if(tmp) return tmp; } return NULL; } /* return true if all workers in the tree have the same perf_arch as w_ref, * if there is no worker it return true */ static int is_same_kind_of_all(struct starpu_sched_component * root, struct _starpu_worker * w_ref) { if(starpu_sched_component_is_worker(root)) { struct _starpu_worker * w = root->data; STARPU_ASSERT(w->perf_arch.ndevices == 1); return w->perf_arch.devices[0].type == w_ref->perf_arch.devices[0].type; } unsigned i; for(i = 0;i < root->nchildren; i++) if(!is_same_kind_of_all(root->children[i], w_ref)) return 0; return 1; } /* buggy function * return the starpu_sched_component linked to the supposed memory component of worker_component */ static struct starpu_sched_component * find_mem_component(struct starpu_sched_component * root, struct starpu_sched_component * worker_component) { struct starpu_sched_component * component = worker_component; while(component->obj->type != HWLOC_OBJ_NUMANODE && component->obj->type != HWLOC_OBJ_GROUP && component->obj->type != HWLOC_OBJ_MACHINE) { hwloc_obj_t tmp = component->obj; do { component = _find_sched_component_with_obj(root,tmp); tmp = tmp->parent; } while(!component); } return component; } static struct starpu_sched_component * where_should_we_plug_this(struct starpu_sched_component *root, struct starpu_sched_component * worker_component, struct starpu_sched_component_specs specs, unsigned sched_ctx_id) { struct starpu_sched_component * mem = find_mem_component(root ,worker_component); if(specs.mix_heterogeneous_workers || mem->parents[sched_ctx_id] == NULL) return mem; hwloc_obj_t obj = mem->obj; struct starpu_sched_component * parent = mem->parents[sched_ctx_id]; unsigned i; for(i = 0; i < parent->nchildren; i++) { if(parent->children[i]->obj == obj && is_same_kind_of_all(parent->children[i], worker_component->data)) return parent->children[i]; } if(obj->type == HWLOC_OBJ_NUMANODE || obj->type == HWLOC_OBJ_GROUP) { struct starpu_sched_component * component = starpu_sched_component_composed_component_create(root->tree, specs.hwloc_component_composed_sched_component); component->obj = obj; starpu_sched_component_connect(parent, component); return component; } return parent; } static void set_worker_leaf(struct starpu_sched_component * root, struct starpu_sched_component * worker_component, unsigned sched_ctx_id, struct starpu_sched_component_specs specs) { struct _starpu_worker * worker = worker_component->data; struct starpu_sched_component * component = where_should_we_plug_this(root,worker_component,specs, sched_ctx_id); struct starpu_sched_component_composed_recipe * recipe = specs.worker_composed_sched_component ? specs.worker_composed_sched_component(worker->arch):NULL; STARPU_ASSERT(component); if(recipe) { struct starpu_sched_component * tmp = starpu_sched_component_composed_component_create(root->tree, recipe); #ifdef STARPU_DEVEL #warning FIXME component->obj is set to worker_component->obj even for accelerators workers #endif tmp->obj = worker_component->obj; starpu_sched_component_connect(component, tmp); component = tmp; } starpu_sched_component_composed_recipe_destroy(recipe); starpu_sched_component_connect(component, worker_component); } #ifdef STARPU_DEVEL static const char * name_hwloc_component(struct starpu_sched_component * component) { return hwloc_obj_type_string(component->obj->type); } static const char * name_sched_component(struct starpu_sched_component * component) { if(starpu_sched_component_is_fifo(component)) return "fifo component"; if(starpu_sched_component_is_heft(component)) return "heft component"; if(starpu_sched_component_is_random(component)) return "random component"; if(starpu_sched_component_is_worker(component)) { struct _starpu_worker * w = _starpu_sched_component_worker_get_worker(component); #define SIZE 256 static char output[SIZE]; snprintf(output, SIZE,"component worker %d %s",w->workerid,w->name); return output; } if(starpu_sched_component_is_work_stealing(component)) return "work stealing component"; return "unknown"; } static void helper_display_scheduler(FILE* out, unsigned depth, struct starpu_sched_component * component) { if(!component) return; fprintf(out,"%*s-> %s : %s\n", depth * 2 , "", name_sched_component(component), name_hwloc_component(component)); unsigned i; for(i = 0; i < component->nchildren; i++) helper_display_scheduler(out, depth + 1, component->children[i]); } #endif //STARPU_DEVEL struct starpu_sched_tree * starpu_sched_component_make_scheduler(unsigned sched_ctx_id, struct starpu_sched_component_specs specs) { struct starpu_sched_tree * tree = starpu_sched_tree_create(sched_ctx_id); struct _starpu_machine_config *config = _starpu_get_machine_config(); hwloc_topology_t topology = config->topology.hwtopology; struct sched_component_list list = helper_make_scheduler(tree, hwloc_get_root_obj(topology), specs, sched_ctx_id); STARPU_ASSERT(list.size == 1); tree->root = list.arr[0]; destroy_list(&list); unsigned i; for(i = 0; i < starpu_worker_get_count(); i++) { struct _starpu_worker *worker = _starpu_get_worker_struct(i); struct starpu_sched_component *worker_component = starpu_sched_component_worker_new(sched_ctx_id, i); STARPU_ASSERT(worker); set_worker_leaf(tree->root,worker_component, sched_ctx_id, specs); } starpu_sched_tree_update_workers(tree); #ifdef STARPU_DEVEL _STARPU_MSG("scheduler created :\n"); helper_display_scheduler(stderr, 0, tree->root); #endif return tree; } starpu-1.4.9+dfsg/src/sched_policies/work_stealing_policy.c000066400000000000000000000722351507764646700241600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Work stealing policy */ #include #include #include #include #include #include #include #include #include #include /* Experimental (dead) code which needs to be tested, fixed... */ /* #define USE_OVERLOAD */ /* * Experimental code for improving data cache locality: * * USE_LOCALITY: * - for each data, we record on which worker it was last accessed with the * locality flag. * * - when pushing a ready task, we choose the worker which has last accessed the * most data of the task with the locality flag. * * USE_LOCALITY_TASKS: * - for each worker, we record the locality data that the task used last (i.e. a rough * estimation of what is contained in the innermost caches). * * - for each worker, we have a hash table associating from a data handle to * all the ready tasks pushed to it that will use it with the locality flag. * * - When fetching a task from a queue, pick a task which has the biggest number * of data estimated to be contained in the cache. */ //#define USE_LOCALITY //#define USE_LOCALITY_TASKS /* Maximum number of recorded locality data per task */ #define MAX_LOCALITY 8 /* Entry for queued_tasks_per_data: records that a queued task is accessing the data with locality flag */ #ifdef USE_LOCALITY_TASKS struct locality_entry { UT_hash_handle hh; starpu_data_handle_t data; struct starpu_task *task; }; #endif struct _starpu_work_stealing_data_per_worker { char fill1[STARPU_CACHELINE_SIZE]; /* This is read-mostly, only updated when the queue becomes empty or * becomes non-empty, to make it generally cheap to check */ unsigned notask; /* whether the queue is empty */ char fill2[STARPU_CACHELINE_SIZE]; struct starpu_st_prio_deque queue; int running; int *proxlist; int busy; /* Whether this worker is working on a task */ /* keep track of the work performed from the beginning of the algorithm to make * better decisions about which queue to select when deferring work */ unsigned last_pop_worker; #ifdef USE_LOCALITY_TASKS /* This records the same as queue, but hashed by data accessed with locality flag. */ /* FIXME: we record only one task per data, assuming that the access is * RW, and thus only one task is ready to write to it. Do we really need to handle the R case too? */ struct locality_entry *queued_tasks_per_data; /* This records the last data accessed by the worker */ starpu_data_handle_t last_locality[MAX_LOCALITY]; int nlast_locality; #endif }; struct _starpu_work_stealing_data { int (*select_victim)(struct _starpu_work_stealing_data *, unsigned, int); struct _starpu_work_stealing_data_per_worker *per_worker; /* keep track of the work performed from the beginning of the algorithm to make * better decisions about which queue to select when deferring work */ unsigned last_push_worker; }; #ifdef USE_OVERLOAD /** * Minimum number of task we wait for being processed before we start assuming * on which worker the computation would be faster. */ static int calibration_value = 0; #endif /* USE_OVERLOAD */ /** * Return a worker from which a task can be stolen. * Selecting a worker is done in a round-robin fashion, unless * the worker previously selected doesn't own any task, * then we return the first non-empty worker. */ static int select_victim_round_robin(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id) { unsigned workerid = starpu_worker_get_id_check(); unsigned worker = ws->per_worker[workerid].last_pop_worker; unsigned nworkers; int *workerids = NULL; nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &workerids); unsigned ntasks = 0; /* If the worker's queue is empty, let's try * the next ones */ while (1) { /* Here helgrind would shout that this is unprotected, but we * are fine with getting outdated values, this is just an * estimation */ if (!ws->per_worker[workerids[worker]].notask) { if (ws->per_worker[workerids[worker]].busy || starpu_worker_is_blocked_in_parallel(workerids[worker])) { ntasks = 1; break; } } worker = (worker + 1) % nworkers; if (worker == ws->per_worker[workerid].last_pop_worker) { /* We got back to the first worker, * don't go in infinite loop */ ntasks = 0; break; } } ws->per_worker[workerid].last_pop_worker = (worker + 1) % nworkers; worker = workerids[worker]; if (ntasks) return worker; else return -1; } /** * Return a worker to whom add a task. * Selecting a worker is done in a round-robin fashion. */ static unsigned select_worker_round_robin(struct _starpu_work_stealing_data *ws, struct starpu_task *task, unsigned sched_ctx_id) { unsigned worker; unsigned nworkers; int *workerids; nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &workerids); worker = ws->last_push_worker; do worker = (worker + 1) % nworkers; while (!ws->per_worker[workerids[worker]].running || !starpu_worker_can_execute_task_first_impl(workerids[worker], task, NULL)); ws->last_push_worker = worker; return workerids[worker]; } #ifdef USE_LOCALITY /* Select a worker according to the locality of the data of the task to be scheduled */ static unsigned select_worker_locality(struct _starpu_work_stealing_data *ws, struct starpu_task *task, unsigned sched_ctx_id) { unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); if (nbuffers == 0) return -1; unsigned i, n; unsigned ndata[STARPU_NMAXWORKERS] = { 0 }; int best_worker = -1; n = 0; for (i = 0; i < nbuffers; i++) { if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) { starpu_data_handle_t data = STARPU_TASK_GET_HANDLE(task, i); int locality = data->last_locality; if (locality >= 0) ndata[locality]++; n++; } } if (n) { /* Some locality buffers, choose worker which has most of them */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; unsigned best_ndata = 0; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { int workerid = workers->get_next(workers, &it); if (ndata[workerid] > best_ndata && ws->per_worker[workerid].running && ws->per_worker[workerid].busy) { best_worker = workerid; best_ndata = ndata[workerid]; } } } return best_worker; } /* Record in the data which worker will handle the task with the locality flag */ static void record_data_locality(struct starpu_task *task, int workerid) { /* Record where in locality data where the task went */ unsigned i; for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) { STARPU_TASK_GET_HANDLE(task, i)->last_locality = workerid; } } #else static void record_data_locality(struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED) { } #endif #ifdef USE_LOCALITY_TASKS /* Record in the worker which data it used last with the locality flag */ static void record_worker_locality(struct _starpu_work_stealing_data *ws, struct starpu_task *task, int workerid, unsigned sched_ctx_id) { /* Record where in locality data where the task went */ unsigned i; struct _starpu_work_stealing_data_per_worker *data = &ws->per_worker[workerid]; data->nlast_locality = 0; for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) { data->last_locality[data->nlast_locality] = STARPU_TASK_GET_HANDLE(task, i); data->nlast_locality++; if (data->nlast_locality == MAX_LOCALITY) break; } } /* Called when pushing a task to a queue */ static void locality_pushed_task(struct _starpu_work_stealing_data *ws, struct starpu_task *task, int workerid, unsigned sched_ctx_id) { struct _starpu_work_stealing_data_per_worker *data = &ws->per_worker[workerid]; unsigned i; for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); struct locality_entry *entry; HASH_FIND_PTR(data->queued_tasks_per_data, &handle, entry); if (STARPU_LIKELY(!entry)) { _STARPU_MALLOC(entry, sizeof(*entry)); entry->data = handle; entry->task = task; HASH_ADD_PTR(data->queued_tasks_per_data, data, entry); } } } /* Pick a task from workerid's queue, for execution on target */ static struct starpu_task *ws_pick_task(struct _starpu_work_stealing_data *ws, int source, int target) { struct _starpu_work_stealing_data_per_worker *data_source = &ws->per_worker[source]; struct _starpu_work_stealing_data_per_worker *data_target = &ws->per_worker[target]; unsigned i, j, n = data_target->nlast_locality; struct starpu_task *(tasks[MAX_LOCALITY]) = { NULL }, *best_task = NULL; int ntasks[MAX_LOCALITY] = { 0 }, best_n; /* Number of locality data for this worker used by this task */ /* Look at the last data accessed by this worker */ STARPU_ASSERT(n < MAX_LOCALITY); for (i = 0; i < n; i++) { starpu_data_handle_t handle = data_target->last_locality[i]; struct locality_entry *entry; HASH_FIND_PTR(data_source->queued_tasks_per_data, &handle, entry); if (entry) { /* Record task */ tasks[i] = entry->task; ntasks[i] = 1; /* And increment counter of the same task */ for (j = 0; j < i; j++) { if (tasks[j] == tasks[i]) { ntasks[j]++; break; } } } } /* Now find the task with most locality data for this worker */ best_n = 0; for (i = 0; i < n; i++) { if (ntasks[i] > best_n) { best_task = tasks[i]; best_n = ntasks[i]; } } if (best_n > 0) { /* found an interesting task, try to pick it! */ if (starpu_st_prio_deque_pop_this_task(&data_source->queue, target, best_task)) { if (!data_source->queue.ntasks) { STARPU_ASSERT(ws->per_worker[source].notask == 0); ws->per_worker[source].notask = 1; } return best_task; } } /* Didn't find an interesting task, or couldn't run it */ struct starpu_task *task; if (source != target) task = starpu_st_prio_deque_deque_task_for_worker(&data_source->queue, target, NULL); else task = starpu_st_prio_deque_pop_task_for_worker(&data_source->queue, target, NULL); if (task && !data_source->queue.ntasks) { STARPU_ASSERT(ws->per_worker[source].notask == 0); ws->per_worker[source].notask = 1; } return task; } /* Called when popping a task from a queue */ static void locality_popped_task(struct _starpu_work_stealing_data *ws, struct starpu_task *task, int workerid, unsigned sched_ctx_id) { struct _starpu_work_stealing_data_per_worker *data = &ws->per_worker[workerid]; unsigned i; for (i = 0; i < STARPU_TASK_GET_NBUFFERS(task); i++) if (STARPU_TASK_GET_MODE(task, i) & STARPU_LOCALITY) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); struct locality_entry *entry; HASH_FIND_PTR(data->queued_tasks_per_data, &handle, entry); if (STARPU_LIKELY(entry)) { if (entry->task == task) { HASH_DEL(data->queued_tasks_per_data, entry); free(entry); } } } } #else static void record_worker_locality(struct _starpu_work_stealing_data *ws STARPU_ATTRIBUTE_UNUSED, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) { } /* Called when pushing a task to a queue */ static void locality_pushed_task(struct _starpu_work_stealing_data *ws STARPU_ATTRIBUTE_UNUSED, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) { } /* Pick a task from workerid's queue, for execution on target */ static struct starpu_task *ws_pick_task(struct _starpu_work_stealing_data *ws, int source, int target) { struct starpu_task *task; if (source != target) task = starpu_st_prio_deque_deque_task_for_worker(&ws->per_worker[source].queue, target, NULL); else task = starpu_st_prio_deque_pop_task_for_worker(&ws->per_worker[source].queue, target, NULL); if (task && !ws->per_worker[source].queue.ntasks) { STARPU_ASSERT(ws->per_worker[source].notask == 0); ws->per_worker[source].notask = 1; } return task; } /* Called when popping a task from a queue */ static void locality_popped_task(struct _starpu_work_stealing_data *ws STARPU_ATTRIBUTE_UNUSED, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, unsigned sched_ctx_id STARPU_ATTRIBUTE_UNUSED) { } #endif #ifdef USE_OVERLOAD /** * Return a ratio helpful to determine whether a worker is suitable to steal * tasks from or to put some tasks in its queue. * * \return a ratio with a positive or negative value, describing the current state of the worker : * a smaller value implies a faster worker with an relatively emptier queue : more suitable to put tasks in * a bigger value implies a slower worker with an relatively more replete queue : more suitable to steal tasks from */ static float overload_metric(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id, unsigned id) { float execution_ratio = 0.0f; float current_ratio = 0.0f; int nprocessed = _starpu_get_deque_nprocessed(ws->per_worker[id].queue); unsigned njobs = _starpu_get_deque_njobs(ws->per_worker[id].queue); /* Did we get enough information ? */ if (ws->performed_total > 0 && nprocessed > 0) { /* How fast or slow is the worker compared to the other workers */ execution_ratio = (float) nprocessed / ws->performed_total; /* How replete is its queue */ current_ratio = (float) njobs / nprocessed; } else { return 0.0f; } return (current_ratio - execution_ratio); } /** * Return the most suitable worker from which a task can be stolen. * The number of previously processed tasks, total and local, * and the number of tasks currently awaiting to be processed * by the tasks are taken into account to select the most suitable * worker to steal task from. */ static int select_victim_overload(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id) { unsigned best_worker = 0; float best_ratio = FLT_MIN; /* Don't try to play smart until we get * enough information. */ if (ws->performed_total < calibration_value) return select_victim_round_robin(ws, sched_ctx_id); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); float worker_ratio = overload_metric(ws, sched_ctx_id, worker); if (worker_ratio > best_ratio && ws->per_worker[worker].running && ws->per_worker[worker].busy) { best_worker = worker; best_ratio = worker_ratio; } } return best_worker; } /** * Return the most suitable worker to whom add a task. * The number of previously processed tasks, total and local, * and the number of tasks currently awaiting to be processed * by the tasks are taken into account to select the most suitable * worker to add a task to. */ static unsigned select_worker_overload(struct _starpu_work_stealing_data *ws, struct starpu_task *task, unsigned sched_ctx_id) { unsigned best_worker = 0; float best_ratio = FLT_MAX; /* Don't try to play smart until we get * enough information. */ if (ws->performed_total < calibration_value) return select_worker_round_robin(task, sched_ctx_id); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { unsigned worker = workers->get_next(workers, &it); float worker_ratio = overload_metric(ws, sched_ctx_id, worker); if (worker_ratio < best_ratio && ws->per_worker[worker].running && starpu_worker_can_execute_task_first_impl(worker, task, NULL)) { best_worker = worker; best_ratio = worker_ratio; } } return best_worker; } #endif /* USE_OVERLOAD */ /** * Return a worker from which a task can be stolen. * This is a phony function used to call the right * function depending on the value of USE_OVERLOAD. */ static inline int select_victim(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id, int workerid STARPU_ATTRIBUTE_UNUSED) { #ifdef USE_OVERLOAD return select_victim_overload(ws, sched_ctx_id); #else return select_victim_round_robin(ws, sched_ctx_id); #endif /* USE_OVERLOAD */ } /** * Return a worker from which a task can be stolen. * This is a phony function used to call the right * function depending on the value of USE_OVERLOAD. */ static inline unsigned select_worker(struct _starpu_work_stealing_data *ws, struct starpu_task *task, unsigned sched_ctx_id) { #ifdef USE_OVERLOAD return select_worker_overload(ws, task, sched_ctx_id); #else return select_worker_round_robin(ws, task, sched_ctx_id); #endif /* USE_OVERLOAD */ } /* Note: this is not scalable work stealing, use lws instead */ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id) { struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct starpu_task *task = NULL; unsigned workerid = starpu_worker_get_id_check(); if (ws->per_worker[workerid].busy) ws->per_worker[workerid].busy = 0; #ifdef STARPU_NON_BLOCKING_DRIVERS if (STARPU_RUNNING_ON_VALGRIND || !starpu_st_prio_deque_is_empty(&ws->per_worker[workerid].queue)) #endif { task = ws_pick_task(ws, workerid, workerid); if (task) locality_popped_task(ws, task, workerid, sched_ctx_id); } if(task) { /* there was a local task */ ws->per_worker[workerid].busy = 1; if (_starpu_get_nsched_ctxs() > 1) { starpu_worker_relax_on(); _starpu_sched_ctx_lock_write(sched_ctx_id); starpu_worker_relax_off(); starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, workerid); if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, task)) task = NULL; _starpu_sched_ctx_unlock_write(sched_ctx_id); } return task; } /* we need to steal someone's job */ starpu_worker_relax_on(); int victim = ws->select_victim(ws, sched_ctx_id, workerid); starpu_worker_relax_off(); if (victim == -1) { return NULL; } if (_starpu_worker_trylock(victim)) { /* victim is busy, don't bother it, come back later */ #ifdef STARPU_SIMGRID starpu_sleep(0.000001); /* Make sure we come back and not block */ starpu_wake_worker_no_relax(workerid); #endif return NULL; } if (ws->per_worker[victim].running && ws->per_worker[victim].queue.ntasks > 0) { task = ws_pick_task(ws, victim, workerid); } if (task) { _STARPU_TRACE_WORK_STEALING(workerid, victim); starpu_sched_task_break(task); starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, victim); record_data_locality(task, workerid); record_worker_locality(ws, task, workerid, sched_ctx_id); locality_popped_task(ws, task, victim, sched_ctx_id); } starpu_worker_unlock(victim); #ifndef STARPU_NON_BLOCKING_DRIVERS /* While stealing, perhaps somebody actually give us a task, don't miss * the opportunity to take it before going to sleep. */ { struct _starpu_worker *worker = _starpu_get_worker_struct(starpu_worker_get_id()); if (!task && worker->state_keep_awake) { task = ws_pick_task(ws, workerid, workerid); if (task) { /* keep_awake notice taken into account here, clear flag */ worker->state_keep_awake = 0; locality_popped_task(ws, task, workerid, sched_ctx_id); } } } #endif if (task &&_starpu_get_nsched_ctxs() > 1) { starpu_worker_relax_on(); _starpu_sched_ctx_lock_write(sched_ctx_id); starpu_worker_relax_off(); if (_starpu_sched_ctx_worker_is_master_for_child_ctx(sched_ctx_id, workerid, task)) task = NULL; _starpu_sched_ctx_unlock_write(sched_ctx_id); if (!task) return NULL; } if (ws->per_worker[workerid].busy != !!task) ws->per_worker[workerid].busy = !!task; return task; } static int ws_push_task(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); int workerid; #ifdef USE_LOCALITY workerid = select_worker_locality(ws, task, sched_ctx_id); #else workerid = -1; #endif if (workerid == -1) workerid = starpu_worker_get_id(); /* If the current thread is not a worker but * the main thread (-1) or the current worker is not in the target * context, we find the better one to put task on its queue */ if (workerid == -1 || !starpu_sched_ctx_contains_worker(workerid, sched_ctx_id) || !starpu_worker_can_execute_task_first_impl(workerid, task, NULL)) workerid = select_worker(ws, task, sched_ctx_id); starpu_worker_lock(workerid); STARPU_AYU_ADDTOTASKQUEUE(starpu_task_get_job_id(task), workerid); starpu_sched_task_break(task); record_data_locality(task, workerid); STARPU_ASSERT_MSG(ws->per_worker[workerid].running, "workerid=%d, ws=%p\n", workerid, ws); starpu_st_prio_deque_push_back_task(&ws->per_worker[workerid].queue, task); if (ws->per_worker[workerid].queue.ntasks == 1) { STARPU_ASSERT(ws->per_worker[workerid].notask == 1); ws->per_worker[workerid].notask = 0; } locality_pushed_task(ws, task, workerid, sched_ctx_id); starpu_push_task_end(task); starpu_worker_unlock(workerid); starpu_sched_ctx_list_task_counters_increment(sched_ctx_id, workerid); #if !defined(STARPU_NON_BLOCKING_DRIVERS) || defined(STARPU_SIMGRID) /* TODO: implement fine-grain signaling, similar to what eager does */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) starpu_wake_worker_relax_light(workers->get_next(workers, &it)); #endif return 0; } static void ws_push_task_notify(struct starpu_task *task, int workerid, int perf_workerid, unsigned sched_ctx_id) { (void)task; (void)perf_workerid; struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); ws->per_worker[workerid].busy = 1; } static void ws_add_workers(unsigned sched_ctx_id, int *workerids,unsigned nworkers) { struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned i; for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; starpu_sched_ctx_worker_shares_tasks_lists(workerid, sched_ctx_id); starpu_st_prio_deque_init(&ws->per_worker[workerid].queue); ws->per_worker[workerid].notask = 1; ws->per_worker[workerid].running = 1; /* Tell helgrind that we are fine with getting outdated values, * this is just an estimation */ STARPU_HG_DISABLE_CHECKING(ws->per_worker[workerid].notask); STARPU_HG_DISABLE_CHECKING(ws->per_worker[workerid].queue.ntasks); ws->per_worker[workerid].busy = 0; STARPU_HG_DISABLE_CHECKING(ws->per_worker[workerid].busy); } } static void ws_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); unsigned i; for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; starpu_st_prio_deque_destroy(&ws->per_worker[workerid].queue); ws->per_worker[workerid].running = 0; free(ws->per_worker[workerid].proxlist); ws->per_worker[workerid].proxlist = NULL; } } static void initialize_ws_policy(unsigned sched_ctx_id) { struct _starpu_work_stealing_data *ws; _STARPU_MALLOC(ws, sizeof(struct _starpu_work_stealing_data)); starpu_sched_ctx_set_policy_data(sched_ctx_id, (void*)ws); ws->last_push_worker = 0; STARPU_HG_DISABLE_CHECKING(ws->last_push_worker); ws->select_victim = select_victim; unsigned nw = starpu_worker_get_count(); _STARPU_CALLOC(ws->per_worker, nw, sizeof(struct _starpu_work_stealing_data_per_worker)); /* The application may use any integer */ if (starpu_sched_ctx_min_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_min_priority(sched_ctx_id, INT_MIN); if (starpu_sched_ctx_max_priority_is_set(sched_ctx_id) == 0) starpu_sched_ctx_set_max_priority(sched_ctx_id, INT_MAX); } static void deinit_ws_policy(unsigned sched_ctx_id) { struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); free(ws->per_worker); free(ws); } struct starpu_sched_policy _starpu_sched_ws_policy = { .init_sched = initialize_ws_policy, .deinit_sched = deinit_ws_policy, .add_workers = ws_add_workers, .remove_workers = ws_remove_workers, .push_task = ws_push_task, .pop_task = ws_pop_task, .push_task_notify = ws_push_task_notify, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "ws", .policy_description = "work stealing", .worker_type = STARPU_WORKER_LIST, }; /* local work stealing policy */ /* Return a worker to steal a task from. The worker is selected according to * the proximity list built using the info on the architecture provided by hwloc */ #ifdef STARPU_HAVE_HWLOC static int lws_select_victim(struct _starpu_work_stealing_data *ws, unsigned sched_ctx_id, int workerid) { int nworkers = starpu_sched_ctx_get_nworkers(sched_ctx_id); int i; for (i = 0; i < nworkers; i++) { int neighbor = ws->per_worker[workerid].proxlist[i]; if (ws->per_worker[neighbor].notask) continue; /* FIXME: do not keep looking again and again at some worker * which has tasks, but that can't execute on me */ if (ws->per_worker[neighbor].busy || starpu_worker_is_blocked_in_parallel(neighbor)) return neighbor; } return -1; } #endif static void lws_add_workers(unsigned sched_ctx_id, int *workerids, unsigned nworkers) { ws_add_workers(sched_ctx_id, workerids, nworkers); #ifdef STARPU_HAVE_HWLOC struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); /* Build a proximity list for every worker. It is cheaper to * build this once and then use it for popping tasks rather * than traversing the hwloc tree every time a task must be * stolen */ struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_tree *tree = (struct starpu_tree*)workers->collection_private; unsigned i; /* get the complete list of workers (not just the added one) and rebuild the proxlists */ nworkers = starpu_sched_ctx_get_workers_list_raw(sched_ctx_id, &workerids); for (i = 0; i < nworkers; i++) { int workerid = workerids[i]; if (ws->per_worker[workerid].proxlist == NULL) _STARPU_CALLOC(ws->per_worker[workerid].proxlist, STARPU_NMAXWORKERS, sizeof(int)); int bindid; struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); bindid = starpu_worker_get_bindid(workerid); it.value = starpu_tree_get(tree, bindid); int cnt = 0; for(;;) { struct starpu_tree *neighbour = (struct starpu_tree*)it.value; int *neigh_workerids; int neigh_nworkers = starpu_bindid_get_workerids(neighbour->id, &neigh_workerids); int w; for(w = 0; w < neigh_nworkers; w++) { if(!it.visited[neigh_workerids[w]] && workers->present[neigh_workerids[w]]) { ws->per_worker[workerid].proxlist[cnt++] = neigh_workerids[w]; it.visited[neigh_workerids[w]] = 1; } } if(!workers->has_next(workers, &it)) break; it.value = it.possible_value; it.possible_value = NULL; } } #endif } static void initialize_lws_policy(unsigned sched_ctx_id) { /* lws is loosely based on ws, except that it might use hwloc. */ initialize_ws_policy(sched_ctx_id); if (starpu_worker_get_count() != starpu_cpu_worker_get_count() || starpu_memory_nodes_get_numa_count() > 1 ) { _STARPU_DISP("Warning: you are running the default lws scheduler, which is not a very smart scheduler, while the system has GPUs or several memory nodes. Make sure to read the StarPU documentation about adding performance models in order to be able to use the dmda or dmdas scheduler instead.\n"); } #ifdef STARPU_HAVE_HWLOC struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data *)starpu_sched_ctx_get_policy_data(sched_ctx_id); ws->select_victim = lws_select_victim; #endif } struct starpu_sched_policy _starpu_sched_lws_policy = { .init_sched = initialize_lws_policy, .deinit_sched = deinit_ws_policy, .add_workers = lws_add_workers, .remove_workers = ws_remove_workers, .push_task = ws_push_task, .pop_task = ws_pop_task, .push_task_notify = ws_push_task_notify, .pre_exec_hook = NULL, .post_exec_hook = NULL, .policy_name = "lws", .policy_description = "locality work stealing", #ifdef STARPU_HAVE_HWLOC .worker_type = STARPU_WORKER_TREE, #else .worker_type = STARPU_WORKER_LIST, #endif }; starpu-1.4.9+dfsg/src/util/000077500000000000000000000000001507764646700155545ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/util/execute_on_all.c000066400000000000000000000115251507764646700207120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include struct wrapper_func_args { void (*func)(void *); void *arg; }; static void wrapper_func(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *_args) { struct wrapper_func_args *args = (struct wrapper_func_args *) _args; #ifdef STARPU_PROF_TOOL struct starpu_prof_tool_info pi; #endif #ifdef STARPU_PROF_TOOL int worker = starpu_worker_get_id(); pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker, worker, starpu_prof_tool_driver_gpu, -1, (void*)args->func); starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); #endif args->func(args->arg); #ifdef STARPU_PROF_TOOL pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker, worker, starpu_prof_tool_driver_gpu, -1, (void*)args->func); starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); #endif } /** * Execute func(arg) on the given workers. */ void starpu_execute_on_specific_workers(void (*func)(void*), void * arg, unsigned num_workers, unsigned * workers, const char * name) { int ret; unsigned w; struct starpu_task *tasks[STARPU_NMAXWORKERS]; /* create a wrapper codelet */ struct starpu_codelet wrapper_cl = { .where = 0xFF, .cuda_funcs = {wrapper_func}, .hip_funcs = {wrapper_func}, .cpu_funcs = {wrapper_func}, .opencl_funcs = {wrapper_func}, .nbuffers = 0, .name = name }; struct wrapper_func_args args = { .func = func, .arg = arg }; for (w = 0; w < num_workers; w++) { unsigned worker = workers[w]; tasks[w] = starpu_task_create(); tasks[w]->name = name; tasks[w]->cl = &wrapper_cl; tasks[w]->cl_arg = &args; tasks[w]->execute_on_a_specific_worker = 1; tasks[w]->workerid = worker; tasks[w]->detach = 0; tasks[w]->destroy = 0; _starpu_exclude_task_from_dag(tasks[w]); ret = starpu_task_submit(tasks[w]); if (ret == -ENODEV) { /* if the worker is not able to execute this tasks, we * don't insist as this means the worker is not * designated by the "where" bitmap */ starpu_task_destroy(tasks[w]); tasks[w] = NULL; } } for (w= 0; w < num_workers; w++) { if (tasks[w]) { ret = starpu_task_wait(tasks[w]); STARPU_ASSERT(!ret); starpu_task_destroy(tasks[w]); } } } /* execute func(arg) on each worker that matches the "where" flag */ void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t where, const char * name) { int ret; unsigned worker; unsigned nworkers = starpu_worker_get_count(); struct starpu_task *tasks[STARPU_NMAXWORKERS]; STARPU_ASSERT_MSG((where & ~STARPU_CPU & ~STARPU_CUDA & ~STARPU_OPENCL & ~STARPU_HIP) == 0, "This function is implemented only on CPU, CUDA, HIP, OpenCL"); /* create a wrapper codelet */ struct starpu_codelet wrapper_cl = { .where = where, .cuda_funcs = {wrapper_func}, .hip_funcs = {wrapper_func}, .cpu_funcs = {wrapper_func}, .opencl_funcs = {wrapper_func}, .nbuffers = 0, .name = (name != NULL ? name : "execute_on_all_wrapper") }; struct wrapper_func_args args = { .func = func, .arg = arg }; for (worker = 0; worker < nworkers; worker++) { tasks[worker] = starpu_task_create(); tasks[worker]->name = wrapper_cl.name; tasks[worker]->cl = &wrapper_cl; tasks[worker]->cl_arg = &args; tasks[worker]->execute_on_a_specific_worker = 1; tasks[worker]->workerid = worker; tasks[worker]->detach = 0; tasks[worker]->destroy = 0; _starpu_exclude_task_from_dag(tasks[worker]); ret = _starpu_task_submit_internally(tasks[worker]); if (ret == -ENODEV) { /* if the worker is not able to execute this task, we * don't insist as this means the worker is not * designated by the "where" bitmap */ starpu_task_destroy(tasks[worker]); tasks[worker] = NULL; } } for (worker = 0; worker < nworkers; worker++) { if (tasks[worker]) { ret = starpu_task_wait(tasks[worker]); STARPU_ASSERT(!ret); starpu_task_destroy(tasks[worker]); } } } void starpu_execute_on_each_worker(void (*func)(void *), void *arg, uint32_t where) { starpu_execute_on_each_worker_ex(func, arg, where, NULL); } starpu-1.4.9+dfsg/src/util/file.c000066400000000000000000000020501507764646700166340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include void _starpu_drop_comments(FILE *f) { while(1) { int c = getc(f); switch (c) { case '#': { char s[128]; char *ret; do { ret = fgets(s, sizeof(s), f); } while (ret && (!strchr(s, '\n'))); continue; } case '\n': continue; default: ungetc(c, f); return; } } } starpu-1.4.9+dfsg/src/util/fstarpu.c000066400000000000000000000661021507764646700174110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include typedef void (*_starpu_callback_func_t)(void *); static const intptr_t fstarpu_r = STARPU_R; static const intptr_t fstarpu_w = STARPU_W; static const intptr_t fstarpu_rw = STARPU_RW; static const intptr_t fstarpu_scratch = STARPU_SCRATCH; static const intptr_t fstarpu_redux = STARPU_REDUX; static const intptr_t fstarpu_mpi_redux = STARPU_MPI_REDUX; static const intptr_t fstarpu_commute = STARPU_COMMUTE; static const intptr_t fstarpu_ssend = STARPU_SSEND; static const intptr_t fstarpu_locality = STARPU_LOCALITY; static const intptr_t fstarpu_nofootprint = STARPU_NOFOOTPRINT; static const intptr_t fstarpu_data_array = STARPU_DATA_ARRAY; static const intptr_t fstarpu_data_mode_array = STARPU_DATA_MODE_ARRAY; static const intptr_t fstarpu_cl_args = STARPU_CL_ARGS; static const intptr_t fstarpu_cl_args_nfree = STARPU_CL_ARGS_NFREE; static const intptr_t fstarpu_task_deps_array = STARPU_TASK_DEPS_ARRAY; static const intptr_t fstarpu_task_end_deps_array = STARPU_TASK_END_DEPS_ARRAY; static const intptr_t fstarpu_callback = STARPU_CALLBACK; static const intptr_t fstarpu_callback_with_arg = STARPU_CALLBACK_WITH_ARG; static const intptr_t fstarpu_callback_with_arg_nfree = STARPU_CALLBACK_WITH_ARG_NFREE; static const intptr_t fstarpu_callback_arg = STARPU_CALLBACK_ARG; static const intptr_t fstarpu_callback_arg_nfree= STARPU_CALLBACK_ARG_NFREE; static const intptr_t fstarpu_prologue_callback = STARPU_PROLOGUE_CALLBACK; static const intptr_t fstarpu_prologue_callback_arg = STARPU_PROLOGUE_CALLBACK_ARG; static const intptr_t fstarpu_prologue_callback_arg_nfree = STARPU_PROLOGUE_CALLBACK_ARG_NFREE; static const intptr_t fstarpu_prologue_callback_pop = STARPU_PROLOGUE_CALLBACK_POP; static const intptr_t fstarpu_prologue_callback_pop_arg = STARPU_PROLOGUE_CALLBACK_POP_ARG; static const intptr_t fstarpu_prologue_callback_pop_arg_nfree = STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE; static const intptr_t fstarpu_priority = STARPU_PRIORITY; static const intptr_t fstarpu_execute_on_node = STARPU_EXECUTE_ON_NODE; static const intptr_t fstarpu_execute_on_data = STARPU_EXECUTE_ON_DATA; static const intptr_t fstarpu_execute_where = STARPU_EXECUTE_WHERE; static const intptr_t fstarpu_execute_on_worker = STARPU_EXECUTE_ON_WORKER; static const intptr_t fstarpu_worker_order = STARPU_WORKER_ORDER; static const intptr_t fstarpu_hypervisor_tag = STARPU_HYPERVISOR_TAG; static const intptr_t fstarpu_possibly_parallel = STARPU_POSSIBLY_PARALLEL; static const intptr_t fstarpu_flops = STARPU_FLOPS; static const intptr_t fstarpu_tag = STARPU_TAG; static const intptr_t fstarpu_tag_only = STARPU_TAG_ONLY; static const intptr_t fstarpu_name = STARPU_NAME; static const intptr_t fstarpu_task_color = STARPU_TASK_COLOR; static const intptr_t fstarpu_handles_sequential_consistency = STARPU_HANDLES_SEQUENTIAL_CONSISTENCY; static const intptr_t fstarpu_task_end_dep = STARPU_TASK_END_DEP; static const intptr_t fstarpu_task_synchronous = STARPU_TASK_SYNCHRONOUS; static const intptr_t fstarpu_node_selection_policy = STARPU_NODE_SELECTION_POLICY; static const intptr_t fstarpu_task_workerids = STARPU_TASK_WORKERIDS; static const intptr_t fstarpu_sequential_consistency = STARPU_SEQUENTIAL_CONSISTENCY; static const intptr_t fstarpu_task_profiling_info = STARPU_TASK_PROFILING_INFO; static const intptr_t fstarpu_task_no_submitorder = STARPU_TASK_NO_SUBMITORDER; static const intptr_t fstarpu_task_sched_data = STARPU_TASK_SCHED_DATA; static const intptr_t fstarpu_task_file = STARPU_TASK_FILE; static const intptr_t fstarpu_task_line = STARPU_TASK_LINE; static const intptr_t fstarpu_value = STARPU_VALUE; static const intptr_t fstarpu_sched_ctx = STARPU_SCHED_CTX; static const intptr_t fstarpu_cpu_worker = STARPU_CPU_WORKER; static const intptr_t fstarpu_cuda_worker = STARPU_CUDA_WORKER; static const intptr_t fstarpu_opencl_worker = STARPU_OPENCL_WORKER; static const intptr_t fstarpu_any_worker = STARPU_ANY_WORKER; static const intptr_t fstarpu_narch = STARPU_NARCH; static const intptr_t fstarpu_nmaxbufs = STARPU_NMAXBUFS; static const intptr_t fstarpu_sched_ctx_policy_name = STARPU_SCHED_CTX_POLICY_NAME; static const intptr_t fstarpu_sched_ctx_policy_struct = STARPU_SCHED_CTX_POLICY_STRUCT; static const intptr_t fstarpu_sched_ctx_policy_min_prio = STARPU_SCHED_CTX_POLICY_MIN_PRIO; static const intptr_t fstarpu_sched_ctx_policy_max_prio = STARPU_SCHED_CTX_POLICY_MAX_PRIO; static const intptr_t fstarpu_sched_ctx_hierarchy_level = STARPU_SCHED_CTX_HIERARCHY_LEVEL; static const intptr_t fstarpu_sched_ctx_nested = STARPU_SCHED_CTX_NESTED; static const intptr_t fstarpu_sched_ctx_awake_workers = STARPU_SCHED_CTX_AWAKE_WORKERS; static const intptr_t fstarpu_sched_ctx_policy_init = STARPU_SCHED_CTX_POLICY_INIT; static const intptr_t fstarpu_sched_ctx_user_data = STARPU_SCHED_CTX_USER_DATA; static const intptr_t fstarpu_starpu_nowhere = STARPU_NOWHERE; static const intptr_t fstarpu_starpu_cpu = STARPU_CPU; static const intptr_t fstarpu_starpu_cuda = STARPU_CUDA; static const intptr_t fstarpu_starpu_opencl = STARPU_OPENCL; static const intptr_t fstarpu_starpu_codelet_simgrid_execute = STARPU_CODELET_SIMGRID_EXECUTE; static const intptr_t fstarpu_starpu_codelet_simgrid_execute_and_inject = STARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT; static const intptr_t fstarpu_starpu_cuda_async = STARPU_CUDA_ASYNC; static const intptr_t fstarpu_starpu_opencl_async = STARPU_OPENCL_ASYNC; //static const intptr_t fstarpu_per_worker = STARPU_PER_WORKER; //static const intptr_t fstarpu_per_arch = STARPU_PER_ARCH; //static const intptr_t fstarpu_per_common = STARPU_COMMON; static const intptr_t fstarpu_history_based = STARPU_HISTORY_BASED; static const intptr_t fstarpu_regression_based = STARPU_REGRESSION_BASED; static const intptr_t fstarpu_nl_regression_based = STARPU_NL_REGRESSION_BASED; static const intptr_t fstarpu_multiple_regression_based = STARPU_MULTIPLE_REGRESSION_BASED; static const intptr_t fstarpu_seq = STARPU_SEQ; static const intptr_t fstarpu_spmd = STARPU_SPMD; static const intptr_t fstarpu_forkjoin = STARPU_FORKJOIN; static const intptr_t fstarpu_default_prio = STARPU_DEFAULT_PRIO; intptr_t fstarpu_get_constant(char *s) { if (!strcmp(s, "FSTARPU_R")) { return fstarpu_r; } else if (!strcmp(s, "FSTARPU_W")) { return fstarpu_w; } else if (!strcmp(s, "FSTARPU_RW")) { return fstarpu_rw; } else if (!strcmp(s, "FSTARPU_SCRATCH")) { return fstarpu_scratch; } else if (!strcmp(s, "FSTARPU_REDUX")) { return fstarpu_redux; } else if (!strcmp(s, "FSTARPU_MPI_REDUX")) { return fstarpu_mpi_redux; } else if (!strcmp(s, "FSTARPU_COMMUTE")) { return fstarpu_commute; } else if (!strcmp(s, "FSTARPU_SSEND")) { return fstarpu_ssend; } else if (!strcmp(s, "FSTARPU_LOCALITY")) { return fstarpu_locality; } else if (!strcmp(s, "FSTARPU_NOFOOTPRINT")) { return fstarpu_nofootprint; } else if (!strcmp(s, "FSTARPU_DATA_ARRAY")) { return fstarpu_data_array; } else if (!strcmp(s, "FSTARPU_DATA_MODE_ARRAY")) { return fstarpu_data_mode_array; } else if (!strcmp(s, "FSTARPU_CL_ARGS")) { return fstarpu_cl_args; } else if (!strcmp(s, "FSTARPU_CL_ARGS_NFREE")) { return fstarpu_cl_args_nfree; } else if (!strcmp(s, "FSTARPU_TASK_DEPS_ARRAY")) { return fstarpu_task_deps_array; } else if (!strcmp(s, "FSTARPU_TASK_END_DEPS_ARRAY")) { return fstarpu_task_end_deps_array; } else if (!strcmp(s, "FSTARPU_CALLBACK")) { return fstarpu_callback; } else if (!strcmp(s, "FSTARPU_CALLBACK_WITH_ARG")) { return fstarpu_callback_with_arg; } else if (!strcmp(s, "FSTARPU_CALLBACK_WITH_ARG_NFREE")) { return fstarpu_callback_with_arg_nfree; } else if (!strcmp(s, "FSTARPU_CALLBACK_ARG")) { return fstarpu_callback_arg; } else if (!strcmp(s, "FSTARPU_CALLBACK_ARG_NFREE")) { return fstarpu_callback_arg_nfree; } else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK")) { return fstarpu_prologue_callback; } else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_ARG")) { return fstarpu_prologue_callback_arg; } else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_ARG_NFREE")) { return fstarpu_prologue_callback_arg_nfree; } else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_POP")) { return fstarpu_prologue_callback_pop; } else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_POP_ARG")) { return fstarpu_prologue_callback_pop_arg; } else if (!strcmp(s, "FSTARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE")) { return fstarpu_prologue_callback_pop_arg_nfree; } else if (!strcmp(s, "FSTARPU_PRIORITY")) { return fstarpu_priority; } else if (!strcmp(s, "FSTARPU_EXECUTE_ON_NODE")) { return fstarpu_execute_on_node; } else if (!strcmp(s, "FSTARPU_EXECUTE_ON_DATA")) { return fstarpu_execute_on_data; } else if (!strcmp(s, "FSTARPU_EXECUTE_WHERE")) { return fstarpu_execute_where; } else if (!strcmp(s, "FSTARPU_EXECUTE_ON_WORKER")) { return fstarpu_execute_on_worker; } else if (!strcmp(s, "FSTARPU_WORKER_ORDER")) { return fstarpu_worker_order; } else if (!strcmp(s, "FSTARPU_HYPERVISOR_TAG")) { return fstarpu_hypervisor_tag; } else if (!strcmp(s, "FSTARPU_POSSIBLY_PARALLEL")) { return fstarpu_possibly_parallel; } else if (!strcmp(s, "FSTARPU_FLOPS")) { return fstarpu_flops; } else if (!strcmp(s, "FSTARPU_TAG")) { return fstarpu_tag; } else if (!strcmp(s, "FSTARPU_TAG_ONLY")) { return fstarpu_tag_only; } else if (!strcmp(s, "FSTARPU_NAME")) { return fstarpu_name; } else if (!strcmp(s, "FSTARPU_NODE_SELECTION_POLICY")) { return fstarpu_node_selection_policy; } else if (!strcmp(s, "FSTARPU_VALUE")) { return fstarpu_value; } else if (!strcmp(s, "FSTARPU_SCHED_CTX")) { return fstarpu_sched_ctx; } else if (!strcmp(s, "FSTARPU_TASK_COLOR")) { return fstarpu_task_color; } else if (!strcmp(s, "FSTARPU_HANDLES_SEQUENTIAL_CONSISTENCY")) { return fstarpu_handles_sequential_consistency; } else if (!strcmp(s, "FSTARPU_TASK_END_DEP")) { return fstarpu_task_end_dep; } else if (!strcmp(s, "FSTARPU_TASK_WORKERIDS")) { return fstarpu_task_workerids; } else if (!strcmp(s, "FSTARPU_TASK_SYNCHRONOUS")) { return fstarpu_task_synchronous; } else if (!strcmp(s, "FSTARPU_SEQUENTIAL_CONSISTENCY")) { return fstarpu_sequential_consistency; } else if (!strcmp(s, "FSTARPU_TASK_PROFILING_INFO")) { return fstarpu_task_profiling_info; } else if (!strcmp(s, "FSTARPU_TASK_NO_SUBMITORDER")) { return fstarpu_task_no_submitorder; } else if (!strcmp(s, "FSTARPU_TASK_SCHED_DATA")) { return fstarpu_task_sched_data; } else if (!strcmp(s, "FSTARPU_TASK_FILE")) { return fstarpu_task_file; } else if (!strcmp(s, "FSTARPU_TASK_LINE")) { return fstarpu_task_line; } else if (!strcmp(s, "FSTARPU_CPU_WORKER")) { return fstarpu_cpu_worker; } else if (!strcmp(s, "FSTARPU_CUDA_WORKER")) { return fstarpu_cuda_worker; } else if (!strcmp(s, "FSTARPU_OPENCL_WORKER")) { return fstarpu_opencl_worker; } else if (!strcmp(s, "FSTARPU_ANY_WORKER")) { return fstarpu_any_worker; } else if (!strcmp(s, "FSTARPU_NARCH")) { return fstarpu_narch; } else if (!strcmp(s, "FSTARPU_NMAXBUFS")) { return fstarpu_nmaxbufs; } else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_NAME")) { return fstarpu_sched_ctx_policy_name; } else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_STRUCT")) { return fstarpu_sched_ctx_policy_struct; } else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_MIN_PRIO")) { return fstarpu_sched_ctx_policy_min_prio; } else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_MAX_PRIO")) { return fstarpu_sched_ctx_policy_max_prio; } else if (!strcmp(s, "FSTARPU_SCHED_CTX_HIERARCHY_LEVEL")) { return fstarpu_sched_ctx_hierarchy_level; } else if (!strcmp(s, "FSTARPU_SCHED_CTX_NESTED")) { return fstarpu_sched_ctx_nested; } else if (!strcmp(s, "FSTARPU_SCHED_CTX_AWAKE_WORKERS")) { return fstarpu_sched_ctx_awake_workers; } else if (!strcmp(s, "FSTARPU_SCHED_CTX_POLICY_INIT")) { return fstarpu_sched_ctx_policy_init; } else if (!strcmp(s, "FSTARPU_SCHED_CTX_USER_DATA")) { return fstarpu_sched_ctx_user_data; } else if (!strcmp(s, "FSTARPU_NOWHERE")) { return fstarpu_starpu_nowhere; } else if (!strcmp(s, "FSTARPU_CPU")) { return fstarpu_starpu_cpu; } else if (!strcmp(s, "FSTARPU_CUDA")) { return fstarpu_starpu_cuda; } else if (!strcmp(s, "FSTARPU_OPENCL")) { return fstarpu_starpu_opencl; } else if (!strcmp(s, "FSTARPU_CODELET_SIMGRID_EXECUTE")) { return fstarpu_starpu_codelet_simgrid_execute; } else if (!strcmp(s, "FSTARPU_CODELET_SIMGRID_EXECUTE_AND_INJECT")) { return fstarpu_starpu_codelet_simgrid_execute_and_inject; } else if (!strcmp(s, "FSTARPU_CUDA_ASYNC")) { return fstarpu_starpu_cuda_async; } else if (!strcmp(s, "FSTARPU_OPENCL_ASYNC")) { return fstarpu_starpu_opencl_async; } // else if (!strcmp(s, "FSTARPU_PER_WORKER")) { return fstarpu_per_worker; } // else if (!strcmp(s, "FSTARPU_PER_ARCH")) { return fstarpu_per_arch; } // else if (!strcmp(s, "FSTARPU_COMMON")) { return fstarpu_per_common; } else if (!strcmp(s, "FSTARPU_HISTORY_BASED")) { return fstarpu_history_based; } else if (!strcmp(s, "FSTARPU_REGRESSION_BASED")) { return fstarpu_regression_based; } else if (!strcmp(s, "FSTARPU_NL_REGRESSION_BASED")) { return fstarpu_nl_regression_based; } else if (!strcmp(s, "FSTARPU_MULTIPLE_REGRESSION_BASED")) { return fstarpu_multiple_regression_based; } else if (!strcmp(s, "FSTARPU_SEQ")) { return fstarpu_seq; } else if (!strcmp(s, "FSTARPU_SPMD")) { return fstarpu_spmd; } else if (!strcmp(s, "FSTARPU_FORKJOIN")) { return fstarpu_forkjoin; } else if (!strcmp(s, "FSTARPU_DEFAULT_PRIO")) { return fstarpu_default_prio; } else { _STARPU_ERROR("unknown constant"); } return -1; } STARPU_ATTRIBUTE_MALLOC struct starpu_conf *fstarpu_conf_allocate(void) { struct starpu_conf *conf; _STARPU_MALLOC(conf, sizeof(*conf)); starpu_conf_init(conf); return conf; } void fstarpu_conf_free(struct starpu_conf *conf) { memset(conf, 0, sizeof(*conf)); free(conf); } void fstarpu_conf_set_sched_policy_name(struct starpu_conf *conf, const char *sched_policy_name) { conf->sched_policy_name = sched_policy_name; } void fstarpu_conf_set_min_prio(struct starpu_conf *conf, int min_prio) { conf->global_sched_ctx_min_priority = min_prio; } void fstarpu_conf_set_max_prio(struct starpu_conf *conf, int max_prio) { conf->global_sched_ctx_max_priority = max_prio; } void fstarpu_conf_set_ncpu(struct starpu_conf *conf, int ncpu) { STARPU_ASSERT(ncpu >= 0 && ncpu <= STARPU_NMAXWORKERS); conf->ncpus = ncpu; } void fstarpu_conf_set_ncuda(struct starpu_conf *conf, int ncuda) { STARPU_ASSERT(ncuda >= 0 && ncuda <= STARPU_NMAXWORKERS); conf->ncuda = ncuda; } void fstarpu_conf_set_nopencl(struct starpu_conf *conf, int nopencl) { STARPU_ASSERT(nopencl >= 0 && nopencl <= STARPU_NMAXWORKERS); conf->nopencl = nopencl; } void fstarpu_conf_set_calibrate(struct starpu_conf *conf, int calibrate) { STARPU_ASSERT(calibrate == 0 || calibrate == 1); conf->calibrate = calibrate; } void fstarpu_conf_set_bus_calibrate(struct starpu_conf *conf, int bus_calibrate) { STARPU_ASSERT(bus_calibrate == 0 || bus_calibrate == 1); conf->bus_calibrate = bus_calibrate; } void fstarpu_topology_print(void) { starpu_topology_print(stderr); } STARPU_ATTRIBUTE_MALLOC struct starpu_codelet *fstarpu_codelet_allocate(void) { struct starpu_codelet *cl; _STARPU_MALLOC(cl, sizeof(*cl)); starpu_codelet_init(cl); return cl; } void fstarpu_codelet_free(struct starpu_codelet *cl) { memset(cl, 0, sizeof(*cl)); free(cl); } void fstarpu_codelet_set_name(struct starpu_codelet *cl, const char *cl_name) { cl->name = cl_name; } void fstarpu_codelet_set_color(struct starpu_codelet *cl, int cl_color) { STARPU_ASSERT(cl_color >= 0); cl->color = (unsigned)cl_color; } void fstarpu_codelet_set_model(struct starpu_codelet *cl, struct starpu_perfmodel *cl_perfmodel) { cl->model = cl_perfmodel; } void fstarpu_codelet_set_energy_model(struct starpu_codelet *cl, struct starpu_perfmodel *cl_perfmodel) { cl->energy_model = cl_perfmodel; } void fstarpu_codelet_add_cpu_func(struct starpu_codelet *cl, void *f_ptr) { const size_t max_cpu_funcs = sizeof(cl->cpu_funcs)/sizeof(cl->cpu_funcs[0])-1; size_t i; for (i = 0; i < max_cpu_funcs; i++) { if (cl->cpu_funcs[i] == NULL) { cl->cpu_funcs[i] = f_ptr; return; } } _STARPU_ERROR("fstarpu: too many cpu functions in Fortran codelet"); } void fstarpu_codelet_add_cuda_func(struct starpu_codelet *cl, void *f_ptr) { const size_t max_cuda_funcs = sizeof(cl->cuda_funcs)/sizeof(cl->cuda_funcs[0])-1; unsigned i; for (i = 0; i < max_cuda_funcs; i++) { if (cl->cuda_funcs[i] == NULL) { cl->cuda_funcs[i] = f_ptr; return; } } _STARPU_ERROR("fstarpu: too many cuda functions in Fortran codelet"); } void fstarpu_codelet_add_cuda_flags(struct starpu_codelet *cl, intptr_t flags) { const size_t max_cuda_flags = sizeof(cl->cuda_flags)/sizeof(cl->cuda_flags[0])-1; unsigned i; for (i = 0; i < max_cuda_flags; i++) { if (cl->cuda_flags[i] == 0) { cl->cuda_flags[i] = (char)flags; return; } } _STARPU_ERROR("fstarpu: too many cuda flags in Fortran codelet"); } void fstarpu_codelet_add_opencl_func(struct starpu_codelet *cl, void *f_ptr) { const size_t max_opencl_funcs = sizeof(cl->opencl_funcs)/sizeof(cl->opencl_funcs[0])-1; unsigned i; for (i = 0; i < max_opencl_funcs; i++) { if (cl->opencl_funcs[i] == NULL) { cl->opencl_funcs[i] = f_ptr; return; } } _STARPU_ERROR("fstarpu: too many opencl functions in Fortran codelet"); } void fstarpu_codelet_add_opencl_flags(struct starpu_codelet *cl, intptr_t flags) { const size_t max_opencl_flags = sizeof(cl->opencl_flags)/sizeof(cl->opencl_flags[0])-1; unsigned i; for (i = 0; i < max_opencl_flags; i++) { if (cl->opencl_flags[i] == 0) { cl->opencl_flags[i] = (char)flags; return; } } _STARPU_ERROR("fstarpu: too many opencl flags in Fortran codelet"); } void fstarpu_codelet_add_buffer(struct starpu_codelet *cl, intptr_t _mode) { enum starpu_data_access_mode mode = (enum starpu_data_access_mode) _mode; const size_t max_modes = sizeof(cl->modes)/sizeof(cl->modes[0])-1; if ((mode & (STARPU_ACCESS_MODE_MAX-1)) != mode) { _STARPU_ERROR("fstarpu: invalid data mode"); } if (cl->nbuffers < (int) max_modes) { cl->modes[cl->nbuffers] = (unsigned int)mode; cl->nbuffers++; } else { _STARPU_ERROR("fstarpu: too many buffers in Fortran codelet"); } } void fstarpu_codelet_set_variable_nbuffers(struct starpu_codelet *cl) { cl->nbuffers = STARPU_VARIABLE_NBUFFERS; } void fstarpu_codelet_set_nbuffers(struct starpu_codelet *cl, int nbuffers) { if (nbuffers >= 0) { cl->nbuffers = nbuffers; } else { _STARPU_ERROR("fstarpu: invalid nbuffers parameter"); } } void fstarpu_codelet_set_flags(struct starpu_codelet *cl, intptr_t flags) { cl->flags = (int)flags; } void fstarpu_codelet_set_where(struct starpu_codelet *cl, intptr_t where) { STARPU_ASSERT(where >= 0); cl->where = (uint32_t)where; } void fstarpu_codelet_set_type(struct starpu_codelet *cl, intptr_t type_constant) { STARPU_ASSERT(type_constant == STARPU_SEQ || type_constant == STARPU_SPMD || type_constant == STARPU_FORKJOIN); cl->type = (int)type_constant; } void fstarpu_codelet_set_max_parallelism(struct starpu_codelet *cl, int max_parallelism) { if (max_parallelism >= 1) { cl->max_parallelism = max_parallelism; } else { _STARPU_ERROR("fstarpu: invalid max_parallelism parameter"); } } STARPU_ATTRIBUTE_MALLOC struct starpu_perfmodel *fstarpu_perfmodel_allocate(void) { struct starpu_perfmodel *model; _STARPU_CALLOC(model, 1, sizeof(*model)); return model; } void fstarpu_perfmodel_free(struct starpu_perfmodel *model) { memset(model, 0, sizeof(*model)); free(model); } void fstarpu_perfmodel_set_symbol(struct starpu_perfmodel *model, const char *model_symbol) { model->symbol = model_symbol; } void fstarpu_perfmodel_set_type(struct starpu_perfmodel *model, intptr_t type) { STARPU_ASSERT(type == fstarpu_history_based || type == fstarpu_regression_based || type == fstarpu_nl_regression_based || type == fstarpu_multiple_regression_based); model->type = type; } void * fstarpu_variable_get_ptr(void *buffers[], int i) { return (void *)STARPU_VARIABLE_GET_PTR(buffers[i]); } void * fstarpu_vector_get_ptr(void *buffers[], int i) { return (void *)STARPU_VECTOR_GET_PTR(buffers[i]); } int fstarpu_vector_get_nx(void *buffers[], int i) { return STARPU_VECTOR_GET_NX(buffers[i]); } void * fstarpu_matrix_get_ptr(void *buffers[], int i) { return (void *)STARPU_MATRIX_GET_PTR(buffers[i]); } int fstarpu_matrix_get_ld(void *buffers[], int i) { return STARPU_MATRIX_GET_LD(buffers[i]); } int fstarpu_matrix_get_nx(void *buffers[], int i) { return STARPU_MATRIX_GET_NX(buffers[i]); } int fstarpu_matrix_get_ny(void *buffers[], int i) { return STARPU_MATRIX_GET_NY(buffers[i]); } void * fstarpu_block_get_ptr(void *buffers[], int i) { return (void *)STARPU_BLOCK_GET_PTR(buffers[i]); } int fstarpu_block_get_ldy(void *buffers[], int i) { return STARPU_BLOCK_GET_LDY(buffers[i]); } int fstarpu_block_get_ldz(void *buffers[], int i) { return STARPU_BLOCK_GET_LDZ(buffers[i]); } int fstarpu_block_get_nx(void *buffers[], int i) { return STARPU_BLOCK_GET_NX(buffers[i]); } int fstarpu_block_get_ny(void *buffers[], int i) { return STARPU_BLOCK_GET_NY(buffers[i]); } int fstarpu_block_get_nz(void *buffers[], int i) { return STARPU_BLOCK_GET_NZ(buffers[i]); } void fstarpu_data_acquire(starpu_data_handle_t handle, intptr_t mode) { STARPU_ASSERT(mode == fstarpu_r || mode == fstarpu_w || mode == fstarpu_rw); starpu_data_acquire(handle, (int)mode); } void fstarpu_unpack_arg(char *cl_arg, void **buffer_list) { size_t current_arg_offset = 0; int nargs, arg; /* We fill the different pointers with the appropriate arguments */ memcpy(&nargs, cl_arg, sizeof(nargs)); current_arg_offset += sizeof(nargs); for (arg = 0; arg < nargs; arg++) { void *argptr = buffer_list[arg]; /* If not reading all cl_args */ if(argptr == NULL) break; size_t arg_size; memcpy(&arg_size, cl_arg+current_arg_offset, sizeof(arg_size)); current_arg_offset += sizeof(arg_size); memcpy(argptr, cl_arg+current_arg_offset, arg_size); current_arg_offset += arg_size; } } void fstarpu_sched_ctx_display_workers(int ctx) { starpu_sched_ctx_display_workers((unsigned)ctx, stderr); } intptr_t fstarpu_worker_get_type(int workerid) { return (intptr_t)starpu_worker_get_type(workerid); } int fstarpu_worker_get_count_by_type(intptr_t type) { return starpu_worker_get_count_by_type((enum starpu_worker_archtype)type); } unsigned fstarpu_worker_get_ids_by_type(intptr_t type, int *workerids, unsigned maxsize) { return starpu_worker_get_ids_by_type((enum starpu_worker_archtype)type, workerids, maxsize); } int fstarpu_worker_get_by_type(intptr_t type, int num) { return starpu_worker_get_by_type((enum starpu_worker_archtype)type, num); } int fstarpu_worker_get_by_devid(intptr_t type, int devid) { return starpu_worker_get_by_type((enum starpu_worker_archtype)type, devid); } void fstarpu_worker_get_type_as_string(intptr_t type, char *dst, size_t maxlen) { const char *str = starpu_worker_get_type_as_string((enum starpu_worker_archtype)type); snprintf(dst, maxlen, "%s", str); } STARPU_ATTRIBUTE_MALLOC starpu_data_handle_t *fstarpu_data_handle_array_alloc(int nb) { void *ptr; _STARPU_CALLOC(ptr, (size_t)nb, sizeof(starpu_data_handle_t)); return ptr; } void fstarpu_data_handle_array_free(starpu_data_handle_t *handles) { free(handles); } void fstarpu_data_handle_array_set(starpu_data_handle_t *handles, int i, starpu_data_handle_t handle) { handles[i] = handle; } STARPU_ATTRIBUTE_MALLOC struct starpu_data_descr *fstarpu_data_descr_array_alloc(int nb) { void *ptr; _STARPU_CALLOC(ptr, (size_t)nb, sizeof(struct starpu_data_descr)); return ptr; } STARPU_ATTRIBUTE_MALLOC struct starpu_data_descr *fstarpu_data_descr_alloc(void) { return fstarpu_data_descr_array_alloc(1); } void fstarpu_data_descr_array_free(struct starpu_data_descr *descrs) { free(descrs); } void fstarpu_data_descr_free(struct starpu_data_descr *descr) { fstarpu_data_descr_array_free(descr); } void fstarpu_data_descr_array_set(struct starpu_data_descr *descrs, int i, starpu_data_handle_t handle, intptr_t mode) { descrs[i].handle = handle; descrs[i].mode = (enum starpu_data_access_mode)mode; } void fstarpu_data_descr_set(struct starpu_data_descr *descr, starpu_data_handle_t handle, intptr_t mode) { fstarpu_data_descr_array_set(descr, 1, handle, mode); } STARPU_ATTRIBUTE_MALLOC struct starpu_data_filter *fstarpu_data_filter_allocate(void) { struct starpu_data_filter *filter; _STARPU_CALLOC(filter, 1, sizeof(*filter)); return filter; } /* Note: use fstarpu_df_alloc_ prefix instead of fstarpu_data_filter_allocate_ to fit within the * Fortran id length limit */ #define _FSTARPU_DATA_FILTER_ALLOCATOR(name) \ STARPU_ATTRIBUTE_MALLOC \ struct starpu_data_filter *fstarpu_df_alloc_##name(void) \ { \ struct starpu_data_filter *filter = fstarpu_data_filter_allocate(); \ filter->filter_func = starpu_##name; \ return filter; \ } _FSTARPU_DATA_FILTER_ALLOCATOR(bcsr_filter_canonical_block); _FSTARPU_DATA_FILTER_ALLOCATOR(csr_filter_vertical_block); _FSTARPU_DATA_FILTER_ALLOCATOR(matrix_filter_block); _FSTARPU_DATA_FILTER_ALLOCATOR(matrix_filter_block_shadow); _FSTARPU_DATA_FILTER_ALLOCATOR(matrix_filter_vertical_block); _FSTARPU_DATA_FILTER_ALLOCATOR(matrix_filter_vertical_block_shadow); _FSTARPU_DATA_FILTER_ALLOCATOR(vector_filter_block); _FSTARPU_DATA_FILTER_ALLOCATOR(vector_filter_block_shadow); _FSTARPU_DATA_FILTER_ALLOCATOR(vector_filter_list); _FSTARPU_DATA_FILTER_ALLOCATOR(vector_filter_divide_in_2); _FSTARPU_DATA_FILTER_ALLOCATOR(block_filter_block); _FSTARPU_DATA_FILTER_ALLOCATOR(block_filter_block_shadow); _FSTARPU_DATA_FILTER_ALLOCATOR(block_filter_vertical_block); _FSTARPU_DATA_FILTER_ALLOCATOR(block_filter_vertical_block_shadow); #undef _FSTARPU_DATA_FILTER_ALLOCATOR void fstarpu_data_filter_free(struct starpu_data_filter *filter) { memset(filter, 0, sizeof(*filter)); free(filter); } void fstarpu_data_filter_set_filter_func(struct starpu_data_filter *filter, void *f_ptr) { STARPU_ASSERT(f_ptr != NULL); filter->filter_func = f_ptr; } void fstarpu_data_filter_set_nchildren(struct starpu_data_filter *filter, int nchildren) { STARPU_ASSERT(nchildren >= 0); filter->nchildren = nchildren; } void fstarpu_data_filter_set_get_nchildren_func(struct starpu_data_filter *filter, void *f_ptr) { filter->get_nchildren = f_ptr; } void fstarpu_data_filter_set_get_child_ops_func(struct starpu_data_filter *filter, void *f_ptr) { filter->get_child_ops = f_ptr; } void fstarpu_data_filter_set_filter_arg(struct starpu_data_filter *filter, int filter_arg) { STARPU_ASSERT(filter_arg >= 0); /* starpu_data_filter.filter_arg is unsigned, but * Fortran does not support unsigned types */ filter->filter_arg = (unsigned)filter_arg; } void fstarpu_data_filter_set_filter_arg_ptr(struct starpu_data_filter *filter, void *filter_arg_ptr) { filter->filter_arg_ptr = filter_arg_ptr; } starpu-1.4.9+dfsg/src/util/misc.c000066400000000000000000000037041507764646700166570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include const char *_starpu_codelet_get_name(struct starpu_codelet *cl) { if (!cl) return NULL; if (cl->name) return cl->name; else if (cl->model && cl->model->symbol && cl->model->symbol[0]) return cl->model->symbol; else return NULL; } const char *_starpu_codelet_get_model_name(struct starpu_codelet *cl) { if (!cl) return NULL; if (cl->model && cl->model->symbol && cl->model->symbol[0]) return cl->model->symbol; else return cl->name; } const char *_starpu_job_get_model_name(struct _starpu_job *j) { if (!j) return NULL; struct starpu_task *task = j->task; if (!task) return NULL; return _starpu_codelet_get_model_name(task->cl); } const char *_starpu_job_get_task_name(struct _starpu_job *j) { if (!j) return NULL; struct starpu_task *task = j->task; if (!task) return NULL; if (task->name) return task->name; else return _starpu_job_get_model_name(j); } const char *starpu_task_get_model_name(struct starpu_task *task) { if (!task) return NULL; return _starpu_codelet_get_model_name(task->cl); } const char *starpu_task_get_name(struct starpu_task *task) { if (!task) return NULL; if (task->name) return task->name; else return starpu_task_get_model_name(task); } starpu-1.4.9+dfsg/src/util/openmp_runtime_support.c000066400000000000000000002514541507764646700225700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef STARPU_OPENMP /* * locally disable -Wdeprecated-declarations to avoid * lots of deprecated warnings for ucontext related functions */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include #include #include #include #include #include #include #include #include #include #include #define _STARPU_INITIAL_THREAD_STACKSIZE 2097152 static struct starpu_omp_global _global_state; starpu_pthread_key_t _starpu_omp_thread_key; starpu_pthread_key_t _starpu_omp_task_key; struct starpu_omp_global *_starpu_omp_global_state = NULL; double _starpu_omp_clock_ref = 0.0; /* clock reference for starpu_omp_get_wtick */ /* Entry in the `registered_handles' hash table. */ struct handle_entry { UT_hash_handle hh; void *pointer; starpu_data_handle_t handle; }; static struct handle_entry *registered_handles; static struct _starpu_spinlock registered_handles_lock; static struct starpu_omp_critical *create_omp_critical_struct(void); static void destroy_omp_critical_struct(struct starpu_omp_critical *critical); static struct starpu_omp_device *create_omp_device_struct(void); static void destroy_omp_device_struct(struct starpu_omp_device *device); static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_region *parent_region, struct starpu_omp_device *owner_device); static void destroy_omp_region_struct(struct starpu_omp_region *region); static struct starpu_omp_thread *create_omp_thread_struct(struct starpu_omp_region *owner_region); static void destroy_omp_thread_struct(struct starpu_omp_thread *thread); static struct starpu_omp_task *create_omp_task_struct(struct starpu_omp_task *parent_task, struct starpu_omp_thread *owner_thread, struct starpu_omp_region *owner_region, int is_implicit); static void destroy_omp_task_struct(struct starpu_omp_task *task); static void wake_up_and_unlock_task(struct starpu_omp_task *task); static void wake_up_barrier(struct starpu_omp_region *parallel_region); static void starpu_omp_task_preempt(void); struct starpu_omp_thread * _starpu_omp_get_thread(void) { struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_thread_key); return thread; } static inline void _starpu_omp_set_thread(struct starpu_omp_thread *thread) { STARPU_PTHREAD_SETSPECIFIC(_starpu_omp_thread_key, thread); } struct starpu_omp_task *_starpu_omp_get_task(void) { struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(_starpu_omp_task_key); return task; } static inline void _starpu_omp_set_task(struct starpu_omp_task *task) { STARPU_PTHREAD_SETSPECIFIC(_starpu_omp_task_key, task); } struct starpu_omp_region *_starpu_omp_get_region_at_level(int level) { const struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *parallel_region; if (!task) return NULL; parallel_region = task->owner_region; if (level < 0 || level > parallel_region->icvs.levels_var) return NULL; while (level < parallel_region->icvs.levels_var) { parallel_region = parallel_region->parent_region; } return parallel_region; } int _starpu_omp_get_region_thread_num(const struct starpu_omp_region * const region) { struct starpu_omp_thread *thread = _starpu_omp_get_thread(); STARPU_ASSERT(thread != NULL); if (thread == region->master_thread) return 0; int tid = starpu_omp_thread_list_member(®ion->thread_list, thread); if (tid >= 0) return tid+1; _STARPU_ERROR("unrecognized omp thread\n"); } static void weak_task_lock(struct starpu_omp_task *task) { _starpu_spin_lock(&task->lock); while (task->transaction_pending) { _starpu_spin_unlock(&task->lock); STARPU_UYIELD(); _starpu_spin_lock(&task->lock); } } static void weak_task_unlock(struct starpu_omp_task *task) { _starpu_spin_unlock(&task->lock); } static void wake_up_and_unlock_task(struct starpu_omp_task *task) { STARPU_ASSERT(task->transaction_pending == 0); if (task->wait_on == 0) { weak_task_unlock(task); int ret = starpu_task_submit(task->starpu_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } else { weak_task_unlock(task); } } static void transaction_callback(void *_task) { struct starpu_omp_task *task = _task; _starpu_spin_lock(&task->lock); STARPU_ASSERT(task->transaction_pending != 0); task->transaction_pending = 0; _starpu_spin_unlock(&task->lock); } static void condition_init(struct starpu_omp_condition *condition) { condition->contention_list_head = NULL; } static void condition_exit(struct starpu_omp_condition *condition) { STARPU_ASSERT(condition->contention_list_head == NULL); condition->contention_list_head = NULL; } static void condition_wait(struct starpu_omp_condition *condition, struct _starpu_spinlock *lock, enum starpu_omp_task_wait_on flag) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_task_link link; _starpu_spin_lock(&task->lock); task->wait_on |= flag; link.task = task; link.next = condition->contention_list_head; condition->contention_list_head = &link; task->transaction_pending = 1; _starpu_spin_unlock(&task->lock); _starpu_spin_unlock(lock); _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); starpu_omp_task_preempt(); /* re-acquire the lock released by the callback */ _starpu_spin_lock(lock); } #if 0 /* unused for now */ static void condition_signal(struct starpu_omp_condition *condition) { if (condition->contention_list_head != NULL) { struct starpu_omp_task *next_task = condition->contention_list_head->task; weak_task_lock(next_task); condition->contention_list_head = condition->contention_list_head->next; STARPU_ASSERT(next_task->wait_on & starpu_omp_task_wait_on_condition); next_task->wait_on &= ~starpu_omp_task_wait_on_condition; wake_up_and_unlock_task(next_task); } } #endif static void condition_broadcast(struct starpu_omp_condition *condition, enum starpu_omp_task_wait_on flag) { while (condition->contention_list_head != NULL) { struct starpu_omp_task *next_task = condition->contention_list_head->task; weak_task_lock(next_task); condition->contention_list_head = condition->contention_list_head->next; STARPU_ASSERT(next_task->wait_on & flag); next_task->wait_on &= ~flag; wake_up_and_unlock_task(next_task); } } static void register_thread_worker(struct starpu_omp_thread *thread) { STARPU_ASSERT(thread->worker != NULL); _starpu_spin_lock(&_global_state.hash_workers_lock); struct _starpu_worker *check = thread->worker; struct starpu_omp_thread *tmp = NULL; HASH_FIND_PTR(_global_state.hash_workers, &check, tmp); STARPU_ASSERT(tmp == NULL); HASH_ADD_PTR(_global_state.hash_workers, worker, thread); _starpu_spin_unlock(&_global_state.hash_workers_lock); } static struct starpu_omp_thread *get_worker_thread(struct _starpu_worker *starpu_worker) { struct starpu_omp_thread *thread = NULL; _starpu_spin_lock(&_global_state.hash_workers_lock); HASH_FIND_PTR(_global_state.hash_workers, &starpu_worker, thread); _starpu_spin_unlock(&_global_state.hash_workers_lock); return thread; } static struct starpu_omp_thread *get_local_thread(void) { struct starpu_omp_thread *thread = _starpu_omp_get_thread(); if (thread == NULL) { struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key(); STARPU_ASSERT(starpu_worker != NULL); thread = get_worker_thread(starpu_worker); if ( #ifdef STARPU_DEVEL #warning Why not just checking for STARPU_CPU_WORKER? #endif #ifdef STARPU_USE_CUDA (starpu_worker->arch != STARPU_CUDA_WORKER) && #endif #ifdef STARPU_USE_OPENCL (starpu_worker->arch != STARPU_OPENCL_WORKER) && #endif 1 ) { STARPU_ASSERT(thread != NULL); } if (thread != NULL) { _starpu_omp_set_thread(thread); } } return thread; } static struct starpu_omp_thread * __attribute__ ((noinline)) _get_local_thread_noinline(void) { return get_local_thread(); } static struct starpu_omp_critical *create_omp_critical_struct(void) { struct starpu_omp_critical *critical; _STARPU_CALLOC(critical, 1, sizeof(*critical)); _starpu_spin_init(&critical->lock); return critical; } static void destroy_omp_critical_struct(struct starpu_omp_critical *critical) { STARPU_ASSERT(critical->state == 0); STARPU_ASSERT(critical->contention_list_head == NULL); _starpu_spin_destroy(&critical->lock); critical->name = NULL; free(critical); } static struct starpu_omp_device *create_omp_device_struct(void) { struct starpu_omp_device *device; _STARPU_CALLOC(device, 1, sizeof(*device)); _starpu_spin_init(&device->atomic_lock); return device; } static void destroy_omp_device_struct(struct starpu_omp_device *device) { _starpu_spin_destroy(&device->atomic_lock); memset(device, 0, sizeof(*device)); free(device); } static struct starpu_omp_device *get_caller_device(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_device *device; if (task) { STARPU_ASSERT(task->owner_region != NULL); device = task->owner_region->owner_device; } else { device = _global_state.initial_device; } STARPU_ASSERT(device != NULL); return device; } static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_region *parent_region, struct starpu_omp_device *owner_device) { struct starpu_omp_region *region; _STARPU_CALLOC(region, 1, sizeof(*region)); region->parent_region = parent_region; region->owner_device = owner_device; starpu_omp_thread_list_init0(®ion->thread_list); _starpu_spin_init(®ion->lock); _starpu_spin_init(®ion->registered_handles_lock); region->level = (parent_region != NULL)?parent_region->level+1:0; return region; } static void destroy_omp_region_struct(struct starpu_omp_region *region) { STARPU_ASSERT(region->nb_threads == 0); STARPU_ASSERT(starpu_omp_thread_list_empty(®ion->thread_list)); STARPU_ASSERT(region->continuation_starpu_task == NULL); _starpu_spin_destroy(®ion->registered_handles_lock); _starpu_spin_destroy(®ion->lock); memset(region, 0, sizeof(*region)); free(region); } static void omp_initial_thread_func(void) { struct starpu_omp_thread *initial_thread = _global_state.initial_thread; struct starpu_omp_task *initial_task = _global_state.initial_task; while (1) { struct starpu_task *continuation_starpu_task = initial_task->nested_region->continuation_starpu_task; starpu_driver_run_once(&initial_thread->starpu_driver); /* * if we are leaving the first nested region we give control back to initial task * otherwise, we should continue to execute work */ if (_starpu_task_test_termination(continuation_starpu_task)) { initial_task->nested_region->continuation_starpu_task = NULL; _starpu_omp_set_task(initial_task); swapcontext(&initial_thread->ctx, &initial_task->ctx); } } } static struct starpu_omp_thread *create_omp_thread_struct(struct starpu_omp_region *owner_region) { struct starpu_omp_thread *thread = starpu_omp_thread_new(); if (thread == NULL) _STARPU_ERROR("memory allocation failed"); memset(thread, 0, sizeof(*thread)); thread->owner_region = owner_region; return thread; } static void destroy_omp_thread_struct(struct starpu_omp_thread *thread) { STARPU_ASSERT(thread->current_task == NULL); memset(thread, 0, sizeof(*thread)); starpu_omp_thread_delete(thread); } /* Register the mapping from PTR to HANDLE. If PTR is already mapped to * some handle, the new mapping shadows the previous one. */ static void register_ram_pointer(starpu_data_handle_t handle, void *ptr) { struct handle_entry *entry; _STARPU_MALLOC(entry, sizeof(*entry)); entry->pointer = ptr; entry->handle = handle; struct starpu_omp_task *task = _starpu_omp_get_task(); if (task) { if (task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT) { struct starpu_omp_region *parallel_region = task->owner_region; _starpu_spin_lock(¶llel_region->registered_handles_lock); HASH_ADD_PTR(parallel_region->registered_handles, pointer, entry); _starpu_spin_unlock(¶llel_region->registered_handles_lock); } else { HASH_ADD_PTR(task->registered_handles, pointer, entry); } } else { struct handle_entry *old_entry; _starpu_spin_lock(®istered_handles_lock); HASH_FIND_PTR(registered_handles, &ptr, old_entry); if (old_entry) { /* Already registered this pointer, avoid undefined * behavior of duplicate in hash table */ _starpu_spin_unlock(®istered_handles_lock); free(entry); } else { HASH_ADD_PTR(registered_handles, pointer, entry); _starpu_spin_unlock(®istered_handles_lock); } } } void starpu_omp_handle_register(starpu_data_handle_t handle) { unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { if (starpu_node_get_kind(node) != STARPU_CPU_RAM) continue; void *ptr = starpu_data_handle_to_pointer(handle, node); if (ptr != NULL) register_ram_pointer(handle, ptr); } } /* * Stop monitoring a piece of data */ static void unregister_ram_pointer(starpu_data_handle_t handle, unsigned node) { if (starpu_node_get_kind(node) != STARPU_CPU_RAM) return; if (handle->removed_from_context_hash) return; const void *ram_ptr = starpu_data_handle_to_pointer(handle, node); if (ram_ptr != NULL) { /* Remove the PTR -> HANDLE mapping. If a mapping from PTR * to another handle existed before (e.g., when using * filters), it becomes visible again. */ struct handle_entry *entry; struct starpu_omp_task *task = _starpu_omp_get_task(); if (task) { if (task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT) { struct starpu_omp_region *parallel_region = task->owner_region; _starpu_spin_lock(¶llel_region->registered_handles_lock); HASH_FIND_PTR(parallel_region->registered_handles, &ram_ptr, entry); STARPU_ASSERT(entry != NULL); HASH_DEL(registered_handles, entry); _starpu_spin_unlock(¶llel_region->registered_handles_lock); } else { HASH_FIND_PTR(task->registered_handles, &ram_ptr, entry); STARPU_ASSERT(entry != NULL); HASH_DEL(task->registered_handles, entry); } } else { _starpu_spin_lock(®istered_handles_lock); HASH_FIND_PTR(registered_handles, &ram_ptr, entry); if (entry) { if (entry->handle == handle) { HASH_DEL(registered_handles, entry); } else /* don't free it, it's not ours */ entry = NULL; } _starpu_spin_unlock(®istered_handles_lock); } free(entry); } } void starpu_omp_handle_unregister(starpu_data_handle_t handle) { unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct _starpu_data_replicate *local = &handle->per_node[node]; STARPU_ASSERT(!local->refcnt); if (local->allocated) { unregister_ram_pointer(handle, node); } } } static void unregister_region_handles(struct starpu_omp_region *region) { _starpu_spin_lock(®ion->registered_handles_lock); struct handle_entry *entry=NULL, *tmp=NULL; HASH_ITER(hh, (region->registered_handles), entry, tmp) { entry->handle->removed_from_context_hash = 1; HASH_DEL(region->registered_handles, entry); starpu_data_unregister(entry->handle); free(entry); } _starpu_spin_unlock(®ion->registered_handles_lock); } static void unregister_task_handles(struct starpu_omp_task *task) { struct handle_entry *entry=NULL, *tmp=NULL; HASH_ITER(hh, task->registered_handles, entry, tmp) { entry->handle->removed_from_context_hash = 1; HASH_DEL(task->registered_handles, entry); starpu_data_unregister(entry->handle); free(entry); } } starpu_data_handle_t starpu_omp_data_lookup(const void *ptr) { starpu_data_handle_t result; struct starpu_omp_task *task = _starpu_omp_get_task(); if (task) { if (task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT) { struct starpu_omp_region *parallel_region = task->owner_region; _starpu_spin_lock(¶llel_region->registered_handles_lock); { struct handle_entry *entry; HASH_FIND_PTR(parallel_region->registered_handles, &ptr, entry); if(STARPU_UNLIKELY(entry == NULL)) result = NULL; else result = entry->handle; } _starpu_spin_unlock(¶llel_region->registered_handles_lock); } else { struct handle_entry *entry; HASH_FIND_PTR(task->registered_handles, &ptr, entry); if(STARPU_UNLIKELY(entry == NULL)) result = NULL; else result = entry->handle; } } else { _starpu_spin_lock(®istered_handles_lock); { struct handle_entry *entry; HASH_FIND_PTR(registered_handles, &ptr, entry); if(STARPU_UNLIKELY(entry == NULL)) result = NULL; else result = entry->handle; } _starpu_spin_unlock(®istered_handles_lock); } return result; } static void starpu_omp_explicit_task_entry(struct starpu_omp_task *task) { STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT)); struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key(); /* XXX on work */ if (task->is_loop) { starpu_omp_for_inline_first_alt(task->nb_iterations, task->chunk, starpu_omp_sched_static, 1, &task->begin_i, &task->end_i); } if (starpu_worker->arch == STARPU_CPU_WORKER) { task->cpu_f(task->starpu_buffers, task->starpu_cl_arg); } #ifdef STARPU_USE_CUDA else if (starpu_worker->arch == STARPU_CUDA_WORKER) { task->cuda_f(task->starpu_buffers, task->starpu_cl_arg); } #endif #ifdef STARPU_USE_OPENCL else if (starpu_worker->arch == STARPU_OPENCL_WORKER) { task->opencl_f(task->starpu_buffers, task->starpu_cl_arg); } #endif else _STARPU_ERROR("invalid worker architecture"); /**/ unregister_task_handles(task); _starpu_spin_lock(&task->lock); task->state = starpu_omp_task_state_terminated; task->transaction_pending=1; _starpu_spin_unlock(&task->lock); struct starpu_omp_thread *thread = _starpu_omp_get_thread(); /* * the task reached the terminated state, definitively give hand back to the worker code. * * about to run on the worker stack... */ setcontext(&thread->ctx); STARPU_ASSERT(0); /* unreachable code */ } static void starpu_omp_implicit_task_entry(struct starpu_omp_task *task) { struct starpu_omp_thread *thread = _starpu_omp_get_thread(); STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); task->cpu_f(task->starpu_buffers, task->starpu_cl_arg); starpu_omp_barrier(); if (thread == task->owner_region->master_thread) { unregister_region_handles(task->owner_region); } task->state = starpu_omp_task_state_terminated; /* * the task reached the terminated state, definitively give hand back to the worker code. * * about to run on the worker stack... */ setcontext(&thread->ctx); STARPU_ASSERT(0); /* unreachable code */ } /* * stop executing a task that is about to block * and give hand back to the thread */ static void starpu_omp_task_preempt(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_thread *thread = _starpu_omp_get_thread(); task->state = starpu_omp_task_state_preempted; /* * the task reached a blocked state, give hand back to the worker code. * * about to run on the worker stack... */ swapcontext(&task->ctx, &thread->ctx); /* now running on the task stack again */ } /* * wrap a task function to allow the task to be preempted */ static void starpu_omp_implicit_task_exec(void *buffers[], void *cl_arg) { struct starpu_omp_task *task = starpu_task_get_current()->omp_task; STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); _starpu_omp_set_task(task); /* get_local_thread() inlining triggers a clobbering warning with some * versions of GCC, thus we explicitly call the noinline variant */ struct starpu_omp_thread *thread = _get_local_thread_noinline(); if (task->state != starpu_omp_task_state_preempted) { task->starpu_buffers = buffers; task->starpu_cl_arg = cl_arg; STARPU_ASSERT(task->stack == NULL); STARPU_ASSERT(task->stacksize > 0); _STARPU_MALLOC(task->stack, task->stacksize); getcontext(&task->ctx); /* * we do not use uc_link, starpu_omp_task_entry will handle * the end of the task */ task->ctx.uc_link = NULL; task->ctx.uc_stack.ss_sp = task->stack; task->ctx.uc_stack.ss_size = task->stacksize; task->stack_vg_id = VALGRIND_STACK_REGISTER(task->stack, task->stack+task->stacksize); makecontext(&task->ctx, (void (*) ()) starpu_omp_implicit_task_entry, 1, task); } task->state = starpu_omp_task_state_clear; /* * start the task execution, or restore a previously preempted task. * about to run on the task stack... * */ swapcontext(&thread->ctx, &task->ctx); /* now running on the worker stack again */ STARPU_ASSERT(task->state == starpu_omp_task_state_preempted || task->state == starpu_omp_task_state_terminated); _starpu_omp_set_task(NULL); /* TODO: analyse the cause of the return and take appropriate steps */ if (task->state == starpu_omp_task_state_terminated) { task->starpu_task->omp_task = NULL; task->starpu_task = NULL; VALGRIND_STACK_DEREGISTER(task->stack_vg_id); task->stack_vg_id = 0; free(task->stack); task->stack = NULL; memset(&task->ctx, 0, sizeof(task->ctx)); } else if (task->state != starpu_omp_task_state_preempted) _STARPU_ERROR("invalid omp task state"); } static void starpu_omp_task_completion_accounting(struct starpu_omp_task *task) { struct starpu_omp_task *parent_task = task->parent_task; struct starpu_omp_region *parallel_region = task->owner_region; weak_task_lock(parent_task); if (STARPU_ATOMIC_ADD(&parent_task->child_task_count, -1) == 0) { if (parent_task->state == starpu_omp_task_state_zombie) { STARPU_ASSERT(!(parent_task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT)); weak_task_unlock(parent_task); destroy_omp_task_struct(parent_task); } else if (parent_task->wait_on & starpu_omp_task_wait_on_task_childs) { parent_task->wait_on &= ~starpu_omp_task_wait_on_task_childs; wake_up_and_unlock_task(parent_task); } else { weak_task_unlock(parent_task); } } else { weak_task_unlock(parent_task); } _starpu_spin_lock(¶llel_region->lock); if (STARPU_ATOMIC_ADD(¶llel_region->bound_explicit_task_count, -1) == 0) { struct starpu_omp_task *waiting_task = parallel_region->waiting_task; _starpu_spin_unlock(¶llel_region->lock); if (waiting_task) { weak_task_lock(waiting_task); _starpu_spin_lock(¶llel_region->lock); parallel_region->waiting_task = NULL; STARPU_ASSERT(waiting_task->wait_on & starpu_omp_task_wait_on_region_tasks); waiting_task->wait_on &= ~starpu_omp_task_wait_on_region_tasks; _starpu_spin_unlock(¶llel_region->lock); wake_up_and_unlock_task(waiting_task); } } else { _starpu_spin_unlock(¶llel_region->lock); } if (task->task_group) { struct starpu_omp_task *leader_task = task->task_group->leader_task; STARPU_ASSERT(leader_task != task); weak_task_lock(leader_task); if (STARPU_ATOMIC_ADD(&task->task_group->descendent_task_count, -1) == 0) { if (leader_task->wait_on & starpu_omp_task_wait_on_group && task->task_group == leader_task->task_group) /* only wake the leader_task if it is actually * waiting for the current task's task_group */ { leader_task->wait_on &= ~starpu_omp_task_wait_on_group; wake_up_and_unlock_task(leader_task); } else { weak_task_unlock(leader_task); } } else { weak_task_unlock(leader_task); } } } /* * wrap a task function to allow the task to be preempted */ static void starpu_omp_explicit_task_exec(void *buffers[], void *cl_arg) { struct starpu_omp_task *task = starpu_task_get_current()->omp_task; STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT)); _starpu_omp_set_task(task); /* get_local_thread() inlining triggers a clobbering warning with some * versions of GCC, thus we explicitly call the noinline variant */ struct starpu_omp_thread *thread = _get_local_thread_noinline(); if (task->state != starpu_omp_task_state_preempted) { if (thread == NULL) { struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key(); if (starpu_worker->arch != STARPU_CPU_WORKER) { if ( #ifdef STARPU_USE_CUDA (starpu_worker->arch != STARPU_CUDA_WORKER) && #endif #ifdef STARPU_USE_OPENCL (starpu_worker->arch != STARPU_OPENCL_WORKER) && #endif 1 ) { _STARPU_ERROR("invalid worker architecture"); } struct starpu_omp_thread *new_thread; new_thread = create_omp_thread_struct(NULL); new_thread->worker = starpu_worker; register_thread_worker(new_thread); thread = get_local_thread(); STARPU_ASSERT(thread == new_thread); } else { _STARPU_ERROR("orphaned CPU thread"); } } STARPU_ASSERT(thread != NULL); if (!(task->flags & STARPU_OMP_TASK_FLAGS_UNTIED)) { struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key(); task->starpu_task->workerid = starpu_worker->workerid; task->starpu_task->execute_on_a_specific_worker = 1; } task->starpu_buffers = buffers; task->starpu_cl_arg = cl_arg; STARPU_ASSERT(task->stack == NULL); STARPU_ASSERT(task->stacksize > 0); _STARPU_MALLOC(task->stack, task->stacksize); getcontext(&task->ctx); /* * we do not use uc_link, starpu_omp_task_entry will handle * the end of the task */ task->ctx.uc_link = NULL; task->ctx.uc_stack.ss_sp = task->stack; task->ctx.uc_stack.ss_size = task->stacksize; makecontext(&task->ctx, (void (*) ()) starpu_omp_explicit_task_entry, 1, task); } task->state = starpu_omp_task_state_clear; /* * start the task execution, or restore a previously preempted task. * about to run on the task stack... * */ swapcontext(&thread->ctx, &task->ctx); /* now running on the worker stack again */ STARPU_ASSERT(task->state == starpu_omp_task_state_preempted || task->state == starpu_omp_task_state_terminated); _starpu_omp_set_task(NULL); /* TODO: analyse the cause of the return and take appropriate steps */ if (task->state == starpu_omp_task_state_terminated) { free(task->stack); task->stack = NULL; memset(&task->ctx, 0, sizeof(task->ctx)); starpu_omp_task_completion_accounting(task); } else if (task->state != starpu_omp_task_state_preempted) _STARPU_ERROR("invalid omp task state"); } static struct starpu_omp_task *create_omp_task_struct(struct starpu_omp_task *parent_task, struct starpu_omp_thread *owner_thread, struct starpu_omp_region *owner_region, int is_implicit) { struct starpu_omp_task *task = starpu_omp_task_new(); if (task == NULL) _STARPU_ERROR("memory allocation failed"); memset(task, 0, sizeof(*task)); task->parent_task = parent_task; task->owner_thread = owner_thread; task->owner_region = owner_region; if (is_implicit) { task->flags |= STARPU_OMP_TASK_FLAGS_IMPLICIT; } _starpu_spin_init(&task->lock); /* TODO: initialize task->data_env_icvs with proper values */ memset(&task->data_env_icvs, 0, sizeof(task->data_env_icvs)); if (is_implicit) { /* TODO: initialize task->implicit_task_icvs with proper values */ memset(&task->implicit_task_icvs, 0, sizeof(task->implicit_task_icvs)); } if (owner_region->level > 0) { STARPU_ASSERT(owner_region->owner_device->icvs.stacksize_var > 0); task->stacksize = owner_region->owner_device->icvs.stacksize_var; } return task; } static void destroy_omp_task_struct(struct starpu_omp_task *task) { STARPU_ASSERT(task->state == starpu_omp_task_state_terminated || (task->state == starpu_omp_task_state_zombie && task->child_task_count == 0) || task->state == starpu_omp_task_state_target); if (task->state == starpu_omp_task_state_target) { starpu_omp_task_completion_accounting(task); } STARPU_ASSERT(task->nested_region == NULL); STARPU_ASSERT(task->starpu_task == NULL); STARPU_ASSERT(task->stack == NULL); _starpu_spin_destroy(&task->lock); memset(task, 0, sizeof(*task)); starpu_omp_task_delete(task); } /* * setup the main application thread to handle the possible preemption of the initial task */ static int omp_initial_thread_setup(void) { struct starpu_omp_thread *initial_thread = _global_state.initial_thread; struct starpu_omp_task *initial_task = _global_state.initial_task; /* .current_task */ initial_thread->current_task = initial_task; /* .owner_region already set in create_omp_thread_struct */ /* .initial_thread_stack */ _STARPU_MALLOC(initial_thread->initial_thread_stack, _STARPU_INITIAL_THREAD_STACKSIZE); if (initial_thread->initial_thread_stack == NULL) _STARPU_ERROR("memory allocation failed"); /* .ctx */ getcontext(&initial_thread->ctx); /* * we do not use uc_link, the initial thread always should give hand back to the initial task */ initial_thread->ctx.uc_link = NULL; initial_thread->ctx.uc_stack.ss_sp = initial_thread->initial_thread_stack; initial_thread->ctx.uc_stack.ss_size = _STARPU_INITIAL_THREAD_STACKSIZE; initial_thread->initial_thread_stack_vg_id = VALGRIND_STACK_REGISTER(initial_thread->initial_thread_stack, initial_thread->initial_thread_stack+_STARPU_INITIAL_THREAD_STACKSIZE); makecontext(&initial_thread->ctx, omp_initial_thread_func, 0); /* .starpu_driver */ /* * we configure starpu to not launch CPU worker 0 * because we will use the main thread to play the role of worker 0 */ struct starpu_conf omp_starpu_conf; int ret = starpu_conf_init(&omp_starpu_conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init"); initial_thread->starpu_driver.type = STARPU_CPU_WORKER; initial_thread->starpu_driver.id.cpu_id = 0; omp_starpu_conf.not_launched_drivers = &initial_thread->starpu_driver; omp_starpu_conf.n_not_launched_drivers = 1; #ifdef STARPU_DEVEL #warning setting nhip to 0 should not be necessary #endif omp_starpu_conf.nhip = 0; omp_starpu_conf.nmpi_ms = 0; omp_starpu_conf.ntcpip_ms = 0; /* we are now ready to start StarPU */ ret = starpu_init(&omp_starpu_conf); int check = _starpu_omp_environment_check(); if (check == 0) { STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_driver_init(&initial_thread->starpu_driver); STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_init"); _starpu_omp_set_task(initial_task); _global_state.nb_starpu_cpu_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); _STARPU_MALLOC(_global_state.starpu_cpu_worker_ids, _global_state.nb_starpu_cpu_workers * sizeof(int)); if (_global_state.starpu_cpu_worker_ids == NULL) _STARPU_ERROR("memory allocation failed"); unsigned n = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, _global_state.starpu_cpu_worker_ids, _global_state.nb_starpu_cpu_workers); STARPU_ASSERT(n == _global_state.nb_starpu_cpu_workers); initial_thread->worker = _starpu_get_worker_struct(_global_state.starpu_cpu_worker_ids[0]); STARPU_ASSERT(initial_thread->worker); STARPU_ASSERT(initial_thread->worker->arch == STARPU_CPU_WORKER); _starpu_omp_set_thread(initial_thread); register_thread_worker(initial_thread); } return check; } static void omp_initial_thread_exit() { struct starpu_omp_thread *initial_thread = _global_state.initial_thread; int ret = starpu_driver_deinit(&initial_thread->starpu_driver); STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_deinit"); memset(&initial_thread->starpu_driver, 0, sizeof (initial_thread->starpu_driver)); /* the driver for the main thread is now de-inited, we can shutdown Starpu */ starpu_shutdown(); free(_global_state.starpu_cpu_worker_ids); _global_state.starpu_cpu_worker_ids = NULL; _global_state.nb_starpu_cpu_workers = 0; VALGRIND_STACK_DEREGISTER(initial_thread->initial_thread_stack_vg_id); free(initial_thread->initial_thread_stack); initial_thread->initial_thread_stack = NULL; memset(&initial_thread->ctx, 0, sizeof (initial_thread->ctx)); initial_thread->current_task = NULL; } static int omp_initial_region_setup(void) { int ret = omp_initial_thread_setup(); if (ret != 0) return ret; const int max_active_levels = _starpu_omp_initial_icv_values->max_active_levels_var; const int max_threads = (int)starpu_cpu_worker_get_count(); /* implementation specific initial ICV values override */ if (_starpu_omp_initial_icv_values->nthreads_var[0] == 0) { _starpu_omp_initial_icv_values->nthreads_var[0] = max_threads; _starpu_omp_initial_icv_values->nthreads_var[1] = 0; } else { int i; for (i = 0; i < max_active_levels; i++) { if (_starpu_omp_initial_icv_values->nthreads_var[i] == 0) break; if (_starpu_omp_initial_icv_values->nthreads_var[i] > max_threads) { _starpu_omp_initial_icv_values->nthreads_var[i] = max_threads; } } } _starpu_omp_initial_icv_values->dyn_var = 0; _starpu_omp_initial_icv_values->nest_var = 0; _global_state.initial_device->icvs.max_active_levels_var = max_active_levels; _global_state.initial_device->icvs.def_sched_var = _starpu_omp_initial_icv_values->def_sched_var; _global_state.initial_device->icvs.def_sched_chunk_var = _starpu_omp_initial_icv_values->def_sched_chunk_var; _global_state.initial_device->icvs.stacksize_var = _starpu_omp_initial_icv_values->stacksize_var; _global_state.initial_device->icvs.wait_policy_var = _starpu_omp_initial_icv_values->wait_policy_var; _global_state.initial_region->master_thread = _global_state.initial_thread; _global_state.initial_region->nb_threads++; _global_state.initial_region->icvs.dyn_var = _starpu_omp_initial_icv_values->dyn_var; _global_state.initial_region->icvs.nest_var = _starpu_omp_initial_icv_values->nest_var; if (_starpu_omp_initial_icv_values->nthreads_var[1] != 0) { _STARPU_MALLOC(_global_state.initial_region->icvs.nthreads_var, (1+max_active_levels-_global_state.initial_region->level) * sizeof(*_global_state.initial_region->icvs.nthreads_var)); int i,j; for (i = _global_state.initial_region->level, j = 0; i < max_active_levels; i++, j++) { _global_state.initial_region->icvs.nthreads_var[j] = _starpu_omp_initial_icv_values->nthreads_var[j]; } _global_state.initial_region->icvs.nthreads_var[j] = 0; } else { _STARPU_MALLOC(_global_state.initial_region->icvs.nthreads_var, 2 * sizeof(*_global_state.initial_region->icvs.nthreads_var)); _global_state.initial_region->icvs.nthreads_var[0] = _starpu_omp_initial_icv_values->nthreads_var[0]; _global_state.initial_region->icvs.nthreads_var[1] = 0; } if (_starpu_omp_initial_icv_values->bind_var[1] != starpu_omp_proc_bind_undefined) { _STARPU_MALLOC(_global_state.initial_region->icvs.bind_var, (1+max_active_levels-_global_state.initial_region->level) * sizeof(*_global_state.initial_region->icvs.bind_var)); int i,j; for (i = _global_state.initial_region->level, j = 0; i < max_active_levels; i++, j++) { _global_state.initial_region->icvs.bind_var[j] = _starpu_omp_initial_icv_values->bind_var[j]; } _global_state.initial_region->icvs.bind_var[j] = starpu_omp_proc_bind_undefined; } else { _STARPU_MALLOC(_global_state.initial_region->icvs.bind_var, 2 * sizeof(*_global_state.initial_region->icvs.bind_var)); _global_state.initial_region->icvs.bind_var[0] = _starpu_omp_initial_icv_values->bind_var[0]; _global_state.initial_region->icvs.bind_var[1] = starpu_omp_proc_bind_undefined; } _global_state.initial_region->icvs.thread_limit_var = _starpu_omp_initial_icv_values->thread_limit_var; _global_state.initial_region->icvs.active_levels_var = 0; _global_state.initial_region->icvs.levels_var = 0; _global_state.initial_region->icvs.run_sched_var = _starpu_omp_initial_icv_values->run_sched_var; _global_state.initial_region->icvs.run_sched_chunk_var = _starpu_omp_initial_icv_values->run_sched_chunk_var; _global_state.initial_region->icvs.default_device_var = _starpu_omp_initial_icv_values->default_device_var; _global_state.initial_region->icvs.max_task_priority_var = _starpu_omp_initial_icv_values->max_task_priority_var; _global_state.initial_region->implicit_task_array = &_global_state.initial_task; return 0; } static void omp_initial_region_exit(void) { omp_initial_thread_exit(); _global_state.initial_task->state = starpu_omp_task_state_terminated; _global_state.initial_region->implicit_task_array = NULL; _global_state.initial_region->master_thread = NULL; free(_global_state.initial_region->icvs.nthreads_var); free(_global_state.initial_region->icvs.bind_var); _global_state.initial_region->nb_threads--; } /* * If StarPU was compiled with --enable-openmp, but the OpenMP runtime support * is not in use, starpu_init() may have been called directly instead of * through starpu_omp_init(). However, some starpu_omp functions may be still * be called such as _starpu_omp_get_task(). So let's setup a basic environment * for them. */ void _starpu_omp_dummy_init(void) { if (_starpu_omp_global_state != &_global_state) { STARPU_PTHREAD_KEY_CREATE(&_starpu_omp_thread_key, NULL); STARPU_PTHREAD_KEY_CREATE(&_starpu_omp_task_key, NULL); } } /* * Free data structures allocated by _starpu_omp_dummy_init(). */ void _starpu_omp_dummy_shutdown(void) { if (_starpu_omp_global_state != &_global_state) { STARPU_PTHREAD_KEY_DELETE(_starpu_omp_thread_key); STARPU_PTHREAD_KEY_DELETE(_starpu_omp_task_key); } } /* * Entry point to be called by the OpenMP runtime constructor */ int starpu_omp_init(void) { #ifdef STARPU_SIMGRID /* XXX: ideally we'd pass the real argc/argv. */ /* We have to tell simgrid to avoid cleaning up at exit, since that's before our destructor :/ */ # if SIMGRID_VERSION >= 32300 char *argv[] = { "program", "--cfg=debug/clean-atexit:0", NULL }; # else char *argv[] = { "program", "--cfg=clean-atexit:0", NULL }; # endif int argc = sizeof(argv) / sizeof(argv[0]) - 1; char **_argv = argv; /* Initialize simgrid before anything else. */ _starpu_simgrid_init_early(&argc, &_argv); #endif _starpu_omp_global_state = &_global_state; STARPU_PTHREAD_KEY_CREATE(&_starpu_omp_thread_key, NULL); STARPU_PTHREAD_KEY_CREATE(&_starpu_omp_task_key, NULL); _global_state.initial_device = create_omp_device_struct(); _global_state.initial_region = create_omp_region_struct(NULL, _global_state.initial_device); _global_state.initial_thread = create_omp_thread_struct(_global_state.initial_region); _global_state.initial_task = create_omp_task_struct(NULL, _global_state.initial_thread, _global_state.initial_region, 1); _global_state.default_critical = create_omp_critical_struct(); _global_state.default_arbiter = starpu_arbiter_create(); _global_state.named_criticals = NULL; _starpu_spin_init(&_global_state.named_criticals_lock); _global_state.hash_workers = NULL; _starpu_spin_init(&_global_state.hash_workers_lock); _starpu_omp_environment_init(); _global_state.icvs.cancel_var = _starpu_omp_initial_icv_values->cancel_var; _global_state.environment_valid = -EINVAL; /* in case starpu_init exits (e.g. on a slave) */ _global_state.environment_valid = omp_initial_region_setup(); /* init clock reference for starpu_omp_get_wtick */ _starpu_omp_clock_ref = starpu_timing_now(); _starpu_spin_init(®istered_handles_lock); return _global_state.environment_valid; } void starpu_omp_shutdown(void) { if (_global_state.environment_valid != 0) return; omp_initial_region_exit(); /* TODO: free ICV variables */ /* TODO: free task/thread/region/device structures */ destroy_omp_task_struct(_global_state.initial_task); _global_state.initial_task = NULL; _global_state.initial_thread = NULL; destroy_omp_region_struct(_global_state.initial_region); _global_state.initial_region = NULL; destroy_omp_device_struct(_global_state.initial_device); _global_state.initial_device = NULL; destroy_omp_critical_struct(_global_state.default_critical); _global_state.default_critical = NULL; starpu_arbiter_destroy(_global_state.default_arbiter); _global_state.default_arbiter = NULL; _starpu_spin_lock(&_global_state.named_criticals_lock); { struct starpu_omp_critical *critical=NULL, *tmp=NULL; HASH_ITER(hh, _global_state.named_criticals, critical, tmp) { STARPU_ASSERT(critical != NULL); HASH_DEL(_global_state.named_criticals, critical); destroy_omp_critical_struct(critical); } } STARPU_ASSERT(_global_state.named_criticals == NULL); _starpu_spin_unlock(&_global_state.named_criticals_lock); _starpu_spin_destroy(&_global_state.named_criticals_lock); { struct handle_entry *entry=NULL, *tmp=NULL; if (registered_handles) { _STARPU_DISP("[warning] The application has not unregistered all data handles.\n"); } _starpu_spin_destroy(®istered_handles_lock); HASH_ITER(hh, registered_handles, entry, tmp) { HASH_DEL(registered_handles, entry); free(entry); } registered_handles = NULL; } _starpu_spin_lock(&_global_state.hash_workers_lock); { struct starpu_omp_thread *thread=NULL, *tmp=NULL; HASH_ITER(hh, _global_state.hash_workers, thread, tmp) { STARPU_ASSERT(thread != NULL); HASH_DEL(_global_state.hash_workers, thread); destroy_omp_thread_struct(thread); } } STARPU_ASSERT(_global_state.hash_workers == NULL); _starpu_spin_unlock(&_global_state.hash_workers_lock); _starpu_spin_destroy(&_global_state.hash_workers_lock); _starpu_omp_environment_exit(); STARPU_PTHREAD_KEY_DELETE(_starpu_omp_task_key); STARPU_PTHREAD_KEY_DELETE(_starpu_omp_thread_key); #ifdef STARPU_SIMGRID _starpu_simgrid_deinit_late(); #endif } static void implicit_task__destroy_callback(void *_task) { struct starpu_omp_task *task = _task; destroy_omp_task_struct(task); } void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr) { struct starpu_omp_thread *master_thread = _starpu_omp_get_thread(); struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *generating_region = task->owner_region; const int max_active_levels = generating_region->owner_device->icvs.max_active_levels_var; struct starpu_omp_region *new_region = create_omp_region_struct(generating_region, _global_state.initial_device); int ret; int nb_threads = 1; /* TODO: for now, nested parallel sections are not supported, thus we * open an active parallel section only if the generating region is the * initial region */ if (attr->if_clause != 0) { const int max_threads = (int)starpu_cpu_worker_get_count(); if (attr->num_threads > 0) { nb_threads = attr->num_threads; } else { nb_threads = generating_region->icvs.nthreads_var[0]; } if (nb_threads > max_threads) { nb_threads = max_threads; } if (nb_threads > 1 && generating_region->icvs.active_levels_var+1 > max_active_levels) { nb_threads = 1; } } STARPU_ASSERT(nb_threads > 0); new_region->icvs.dyn_var = generating_region->icvs.dyn_var; new_region->icvs.nest_var = generating_region->icvs.nest_var; /* the nthreads_var and bind_var arrays do not hold more than * max_active_levels entries at most, even if some in-between levels * are inactive */ if (new_region->level < max_active_levels) { if (generating_region->icvs.nthreads_var[1] != 0) { _STARPU_MALLOC(new_region->icvs.nthreads_var, (1+max_active_levels-new_region->level) * sizeof(*new_region->icvs.nthreads_var)); int i,j; for (i = new_region->level, j = 0; i < max_active_levels; i++, j++) { new_region->icvs.nthreads_var[j] = generating_region->icvs.nthreads_var[j+1]; } new_region->icvs.nthreads_var[j] = 0; } else { _STARPU_MALLOC(new_region->icvs.nthreads_var, 2 * sizeof(*new_region->icvs.nthreads_var)); new_region->icvs.nthreads_var[0] = generating_region->icvs.nthreads_var[0]; new_region->icvs.nthreads_var[1] = 0; } if (generating_region->icvs.bind_var[1] != starpu_omp_proc_bind_undefined) { _STARPU_MALLOC(new_region->icvs.bind_var, (1+max_active_levels-new_region->level) * sizeof(*new_region->icvs.bind_var)); int i,j; for (i = new_region->level, j = 0; i < max_active_levels; i++, j++) { new_region->icvs.bind_var[j] = generating_region->icvs.bind_var[j+1]; } new_region->icvs.bind_var[j] = starpu_omp_proc_bind_undefined; } else { _STARPU_MALLOC(new_region->icvs.bind_var, 2 * sizeof(*new_region->icvs.bind_var)); new_region->icvs.bind_var[0] = generating_region->icvs.bind_var[0]; new_region->icvs.bind_var[1] = starpu_omp_proc_bind_undefined; } } else { _STARPU_MALLOC(new_region->icvs.nthreads_var, sizeof(*new_region->icvs.nthreads_var)); new_region->icvs.nthreads_var[0] = generating_region->icvs.nthreads_var[0]; _STARPU_MALLOC(new_region->icvs.bind_var, sizeof(*new_region->icvs.bind_var)); new_region->icvs.bind_var[0] = generating_region->icvs.bind_var[0]; } new_region->icvs.thread_limit_var = generating_region->icvs.thread_limit_var; new_region->icvs.active_levels_var = (nb_threads > 1)?generating_region->icvs.active_levels_var+1:generating_region->icvs.active_levels_var; new_region->icvs.levels_var = generating_region->icvs.levels_var+1; new_region->icvs.run_sched_var = generating_region->icvs.run_sched_var; new_region->icvs.run_sched_chunk_var = generating_region->icvs.run_sched_chunk_var; new_region->icvs.default_device_var = generating_region->icvs.default_device_var; new_region->icvs.max_task_priority_var = generating_region->icvs.max_task_priority_var; _STARPU_CALLOC(new_region->implicit_task_array, nb_threads, sizeof(*new_region->implicit_task_array)); int i; for (i = 0; i < nb_threads; i++) { struct starpu_omp_thread *new_thread; if (i == 0) { new_thread = master_thread; new_region->master_thread = master_thread; } else { /* TODO: specify actual starpu worker */ /* TODO: use a less arbitrary thread/worker mapping scheme */ if (generating_region->level == 0) { struct _starpu_worker *worker = _starpu_get_worker_struct(_global_state.starpu_cpu_worker_ids[i]); new_thread = get_worker_thread(worker); if (new_thread == NULL) { new_thread = create_omp_thread_struct(new_region); new_thread->worker = _starpu_get_worker_struct(_global_state.starpu_cpu_worker_ids[i]); register_thread_worker(new_thread); } } else { new_thread = master_thread; } starpu_omp_thread_list_push_back(&new_region->thread_list, new_thread); } struct starpu_omp_task *new_task = create_omp_task_struct(task, new_thread, new_region, 1); new_task->rank = new_region->nb_threads; new_region->nb_threads++; new_region->implicit_task_array[i] = new_task; } STARPU_ASSERT(new_region->nb_threads == nb_threads); /* * if task == initial_task, create a starpu task as a continuation to all the implicit * tasks of the new region, else prepare the task for preemption, * to become itself a continuation to the implicit tasks of the new region */ if (task == _global_state.initial_task) { new_region->continuation_starpu_task = starpu_task_create(); /* in that case, the continuation starpu task is only used for synchronisation */ new_region->continuation_starpu_task->cl = NULL; new_region->continuation_starpu_task->workerid = master_thread->worker->workerid; new_region->continuation_starpu_task->execute_on_a_specific_worker = 1; /* this sync task will be tested for completion in omp_initial_thread_func() */ new_region->continuation_starpu_task->detach = 0; } else { /* through the preemption, the parent starpu task becomes the continuation task */ _starpu_task_prepare_for_continuation(); new_region->continuation_starpu_task = task->starpu_task; } task->nested_region = new_region; /* * create the starpu tasks for the implicit omp tasks, * create explicit dependencies between these starpu tasks and the continuation starpu task */ for (i = 0; i < nb_threads; i++) { struct starpu_omp_task * implicit_task = new_region->implicit_task_array[i]; implicit_task->cl = attr->cl; /* * save pointer to the regions user function from the parallel region codelet * * TODO: add support for multiple/heterogeneous implementations */ implicit_task->cpu_f = implicit_task->cl.cpu_funcs[0]; /* * plug the task wrapper into the parallel region codelet instead, to support task preemption */ implicit_task->cl.cpu_funcs[0] = starpu_omp_implicit_task_exec; implicit_task->starpu_task = starpu_task_create(); _starpu_task_set_omp_cleanup_callback(implicit_task->starpu_task, implicit_task__destroy_callback, implicit_task); implicit_task->starpu_task->cl = &implicit_task->cl; { int j; for (j = 0; j < implicit_task->cl.nbuffers; j++) { implicit_task->starpu_task->handles[j] = attr->handles[j]; } } implicit_task->starpu_task->cl_arg = attr->cl_arg; implicit_task->starpu_task->cl_arg_size = attr->cl_arg_size; implicit_task->starpu_task->cl_arg_free = attr->cl_arg_free; implicit_task->starpu_task->omp_task = implicit_task; implicit_task->starpu_task->workerid = implicit_task->owner_thread->worker->workerid; implicit_task->starpu_task->execute_on_a_specific_worker = 1; starpu_task_declare_deps_array(new_region->continuation_starpu_task, 1, &implicit_task->starpu_task); } attr = NULL; /* * submit all the region implicit starpu tasks */ for (i = 0; i < nb_threads; i++) { struct starpu_omp_task * implicit_task = new_region->implicit_task_array[i]; ret = starpu_task_submit(implicit_task->starpu_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* * submit the region continuation starpu task if task == initial_task */ if (task == _global_state.initial_task) { ret = _starpu_task_submit_internally(new_region->continuation_starpu_task); STARPU_CHECK_RETURN_VALUE(ret, "_starpu_task_submit_internally"); } /* * preempt for completion of the region */ starpu_omp_task_preempt(); if (task == _global_state.initial_task) { STARPU_ASSERT(new_region->continuation_starpu_task == NULL); } else { STARPU_ASSERT(new_region->continuation_starpu_task != NULL); new_region->continuation_starpu_task = NULL; } /* * TODO: free region resources */ for (i = 0; i < nb_threads; i++) { if (i == 0) { new_region->master_thread = NULL; } else { starpu_omp_thread_list_pop_front(&new_region->thread_list); /* TODO: cleanup unused threads */ } new_region->nb_threads--; } /* implicit tasks will be freed in implicit_task__destroy_callback() */ free(new_region->implicit_task_array); STARPU_ASSERT(new_region->nb_threads == 0); task->nested_region = NULL; free(new_region->icvs.bind_var); free(new_region->icvs.nthreads_var); destroy_omp_region_struct(new_region); } static void wake_up_barrier(struct starpu_omp_region *parallel_region) { struct starpu_omp_task *task = _starpu_omp_get_task(); int i; for (i = 0; i < parallel_region->nb_threads; i++) { struct starpu_omp_task * implicit_task = parallel_region->implicit_task_array[i]; if (implicit_task == task) continue; weak_task_lock(implicit_task); STARPU_ASSERT(implicit_task->wait_on & starpu_omp_task_wait_on_barrier); implicit_task->wait_on &= ~starpu_omp_task_wait_on_barrier; wake_up_and_unlock_task(implicit_task); } } void starpu_omp_barrier(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); /* Assume barriers are performed in by the implicit tasks of a parallel_region */ STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); struct starpu_omp_region *parallel_region = task->owner_region; _starpu_spin_lock(&task->lock); int inc_barrier_count = STARPU_ATOMIC_ADD(¶llel_region->barrier_count, 1); if (inc_barrier_count == parallel_region->nb_threads) { /* last task reaching the barrier */ _starpu_spin_lock(¶llel_region->lock); ANNOTATE_HAPPENS_AFTER(¶llel_region->barrier_count); ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(¶llel_region->barrier_count); parallel_region->barrier_count = 0; ANNOTATE_HAPPENS_AFTER(¶llel_region->barrier_count); ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(¶llel_region->barrier_count); if (parallel_region->bound_explicit_task_count > 0) { task->wait_on |= starpu_omp_task_wait_on_region_tasks; parallel_region->waiting_task = task; task->transaction_pending = 1; _starpu_spin_unlock(¶llel_region->lock); _starpu_spin_unlock(&task->lock); _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); starpu_omp_task_preempt(); } else { _starpu_spin_unlock(¶llel_region->lock); _starpu_spin_unlock(&task->lock); } wake_up_barrier(parallel_region); } else { ANNOTATE_HAPPENS_BEFORE(¶llel_region->barrier_count); /* not the last task reaching the barrier * . prepare for conditional continuation * . sleep */ task->wait_on |= starpu_omp_task_wait_on_barrier; task->transaction_pending = 1; _starpu_spin_unlock(&task->lock); _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); starpu_omp_task_preempt(); STARPU_ASSERT(task->child_task_count == 0); } } void starpu_omp_master(void (*f)(void *arg), void *arg) { if (starpu_omp_master_inline()) f(arg); } /* variant of omp_master for inlined code * return !0 for the task that should perform the master section * return 0 for the tasks that should not perform the master section */ int starpu_omp_master_inline(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_thread *thread = _starpu_omp_get_thread(); /* Assume master is performed in by the implicit tasks of a region */ STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); struct starpu_omp_region *region = task->owner_region; return thread == region->master_thread; } void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait) { if (starpu_omp_single_inline()) f(arg); if (!nowait) starpu_omp_barrier(); } /* variant of omp_single for inlined code * return !0 for the task that should perform the single section * return 0 for the tasks that should not perform the single section * wait/nowait should be handled directly by the calling code using starpu_omp_barrier */ int starpu_omp_single_inline(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); /* Assume singles are performed in by the implicit tasks of a region */ STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); struct starpu_omp_region *region = task->owner_region; int first = STARPU_BOOL_COMPARE_AND_SWAP(®ion->single_id, task->single_id, task->single_id+1); task->single_id++; return first; } void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *region = task->owner_region; int first = starpu_omp_single_inline(); if (first) { region->copy_private_data = data; f(arg, data, data_size); } starpu_omp_barrier(); if (!first) memcpy(data, region->copy_private_data, data_size); starpu_omp_barrier(); } void *starpu_omp_single_copyprivate_inline_begin(void *data) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *region = task->owner_region; int first = starpu_omp_single_inline(); if (first) { task->single_first = 1; region->copy_private_data = data; return NULL; } starpu_omp_barrier(); return region->copy_private_data; } void starpu_omp_single_copyprivate_inline_end(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); /* Assume singles are performed in by the implicit tasks of a region */ STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT); if (task->single_first) { task->single_first = 0; starpu_omp_barrier(); } starpu_omp_barrier(); } void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name) { starpu_omp_critical_inline_begin(name); f(arg); starpu_omp_critical_inline_end(name); } void starpu_omp_critical_inline_begin(const char *name) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_critical *critical = NULL; struct starpu_omp_task_link link; if (name) { _starpu_spin_lock(&_global_state.named_criticals_lock); HASH_FIND_STR(_global_state.named_criticals, name, critical); if (critical == NULL) { critical = create_omp_critical_struct(); critical->name = name; HASH_ADD_STR(_global_state.named_criticals, name, critical); } _starpu_spin_unlock(&_global_state.named_criticals_lock); } else { critical = _global_state.default_critical; } _starpu_spin_lock(&critical->lock); while (critical->state != 0) { _starpu_spin_lock(&task->lock); task->wait_on |= starpu_omp_task_wait_on_critical; task->transaction_pending = 1; link.task = task; link.next = critical->contention_list_head; critical->contention_list_head = &link; _starpu_spin_unlock(&task->lock); _starpu_spin_unlock(&critical->lock); _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); starpu_omp_task_preempt(); /* re-acquire the spin lock */ _starpu_spin_lock(&critical->lock); } critical->state = 1; _starpu_spin_unlock(&critical->lock); } void starpu_omp_critical_inline_end(const char *name) { struct starpu_omp_critical *critical = NULL; if (name) { _starpu_spin_lock(&_global_state.named_criticals_lock); HASH_FIND_STR(_global_state.named_criticals, name, critical); _starpu_spin_unlock(&_global_state.named_criticals_lock); } else { critical = _global_state.default_critical; } STARPU_ASSERT(critical != NULL); _starpu_spin_lock(&critical->lock); STARPU_ASSERT(critical->state == 1); critical->state = 0; if (critical->contention_list_head != NULL) { struct starpu_omp_task *next_task = critical->contention_list_head->task; weak_task_lock(next_task); critical->contention_list_head = critical->contention_list_head->next; STARPU_ASSERT(next_task->wait_on & starpu_omp_task_wait_on_critical); next_task->wait_on &= ~starpu_omp_task_wait_on_critical; wake_up_and_unlock_task(next_task); } _starpu_spin_unlock(&critical->lock); } static void explicit_task__destroy_callback(void *_task) { struct starpu_omp_task *task = _task; STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT)); task->starpu_task->omp_task = NULL; task->starpu_task = NULL; _starpu_spin_lock(&task->lock); if (task->state != starpu_omp_task_state_target) { STARPU_ASSERT(task->transaction_pending == 1); task->transaction_pending = 0; if (task->child_task_count != 0) { task->state = starpu_omp_task_state_zombie; _starpu_spin_unlock(&task->lock); return; } } _starpu_spin_unlock(&task->lock); destroy_omp_task_struct(task); } void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr) { struct starpu_omp_task *generating_task = _starpu_omp_get_task(); struct starpu_omp_region *parallel_region = generating_task->owner_region; int is_undeferred = 0; int is_final = 0; int is_included = 0; int is_merged = 0; int ret; if (generating_task == _global_state.initial_task) { is_undeferred = 1; is_final = 1; is_included = 1; } else { if (!attr->if_clause) { is_undeferred = 1; } if (generating_task->flags & STARPU_OMP_TASK_FLAGS_FINAL) { is_final = 1; is_included = 1; } else if (attr->final_clause) { is_final = 1; } if (is_included) { is_undeferred = 1; } if ((is_undeferred || is_included) & attr->mergeable_clause) { is_merged = 1; } } if (is_merged || is_included) { if (is_included) { /* TODO: backup current ICVs and setup new ICVs for the included task */ } int i; unsigned n = attr->cl.nbuffers; if (n == 0) n = 1; void *data_interfaces[n]; for (i = 0; i < attr->cl.nbuffers; i++) { starpu_data_handle_t handle = attr->handles[i]; ret = starpu_data_acquire(handle, attr->cl.modes[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); data_interfaces[i] = starpu_data_get_interface_on_node(handle, handle->home_node); } void (*f)(void **starpu_buffers, void *starpu_cl_arg) = attr->cl.cpu_funcs[0]; f(data_interfaces, attr->cl_arg); for (i = 0; i < attr->cl.nbuffers; i++) { starpu_data_release(attr->handles[i]); } if (attr->cl_arg_free) { free(attr->cl_arg); } if (is_included) { /* TODO: restore backuped ICVs */ } } else { struct starpu_omp_task *generated_task = create_omp_task_struct(generating_task, NULL, parallel_region, 0); generated_task->cl = attr->cl; if (attr->untied_clause) { generated_task->flags |= STARPU_OMP_TASK_FLAGS_UNTIED; } if (is_final) { generated_task->flags |= STARPU_OMP_TASK_FLAGS_FINAL; } if (is_undeferred) { generated_task->flags |= STARPU_OMP_TASK_FLAGS_UNDEFERRED; } // XXX taskgroup exist if (!attr->nogroup_clause) { generated_task->task_group = generating_task->task_group; } generated_task->rank = -1; /* XXX taskloop attributes */ generated_task->is_loop = attr->is_loop; generated_task->nb_iterations = attr->nb_iterations; generated_task->grainsize = attr->grainsize; generated_task->chunk = attr->chunk; generated_task->begin_i = attr->begin_i; generated_task->end_i = attr->end_i; /* * save pointer to the regions user function from the task region codelet * * TODO: add support for multiple/heterogeneous implementations */ if (generated_task->cl.cpu_funcs[0]) { generated_task->cpu_f = generated_task->cl.cpu_funcs[0]; /* * plug the task wrapper into the task region codelet instead, to support task preemption */ generated_task->cl.cpu_funcs[0] = starpu_omp_explicit_task_exec; } #ifdef STARPU_USE_CUDA if (generated_task->cl.cuda_funcs[0]) { generated_task->cuda_f = generated_task->cl.cuda_funcs[0]; #if 1 /* we assume for now that Cuda task won't block, thus we don't need * to initialize the StarPU OpenMP Runtime Support context for enabling * continuations on Cuda tasks */ generated_task->state = starpu_omp_task_state_target; #else generated_task->cl.cuda_funcs[0] = starpu_omp_explicit_task_exec; #endif } #endif #ifdef STARPU_USE_OPENCL if (generated_task->cl.opencl_funcs[0]) { generated_task->opencl_f = generated_task->cl.opencl_funcs[0]; #if 1 /* we assume for now that OpenCL task won't block, thus we don't need * to initialize the StarPU OpenMP Runtime Support context for enabling * continuations on OpenCL tasks */ generated_task->state = starpu_omp_task_state_target; #else generated_task->cl.opencl_funcs[0] = starpu_omp_explicit_task_exec; #endif } #endif /* TODO: add other accelerator support */ generated_task->starpu_task = starpu_task_create(); generated_task->starpu_task->cl = &generated_task->cl; generated_task->starpu_task->cl_arg = attr->cl_arg; generated_task->starpu_task->cl_arg_size = attr->cl_arg_size; generated_task->starpu_task->cl_arg_free = attr->cl_arg_free; generated_task->starpu_task->priority = attr->priority; { int i; for (i = 0; i < generated_task->cl.nbuffers; i++) { generated_task->starpu_task->handles[i] = attr->handles[i]; } } generated_task->starpu_task->omp_task = generated_task; _starpu_task_set_omp_cleanup_callback(generated_task->starpu_task, explicit_task__destroy_callback, generated_task); /* if the task is tied, execute_on_a_specific_worker will be changed to 1 * upon the first preemption of the generated task, once we know * which worker thread has been selected */ generated_task->starpu_task->execute_on_a_specific_worker = 0; (void)STARPU_ATOMIC_ADD(&generating_task->child_task_count, 1); (void)STARPU_ATOMIC_ADD(¶llel_region->bound_explicit_task_count, 1); if (generated_task->task_group) { (void)STARPU_ATOMIC_ADD(&generated_task->task_group->descendent_task_count, 1); } /* do not use the attribute struct afterward as it may become out of scope */ attr = NULL; if (is_undeferred) { _starpu_task_prepare_for_continuation(); starpu_task_declare_deps_array(generating_task->starpu_task, 1, &generated_task->starpu_task); } ret = starpu_task_submit(generated_task->starpu_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); if (is_undeferred) { starpu_omp_task_preempt(); } } } void starpu_omp_taskwait(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); _starpu_spin_lock(&task->lock); if (task->child_task_count > 0) { task->wait_on |= starpu_omp_task_wait_on_task_childs; task->transaction_pending = 1; _starpu_spin_unlock(&task->lock); _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); starpu_omp_task_preempt(); STARPU_ASSERT(task->child_task_count == 0); } else { _starpu_spin_unlock(&task->lock); } } void starpu_omp_taskgroup(void (*f)(void *arg), void *arg) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_task_group task_group; task_group.p_previous_task_group = task->task_group; task_group.descendent_task_count = 0; task_group.leader_task = task; task->task_group = &task_group; f(arg); _starpu_spin_lock(&task->lock); if (task_group.descendent_task_count > 0) { task->wait_on |= starpu_omp_task_wait_on_group; task->transaction_pending = 1; _starpu_spin_unlock(&task->lock); _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); starpu_omp_task_preempt(); STARPU_ASSERT(task_group.descendent_task_count == 0); } else { _starpu_spin_unlock(&task->lock); } task->task_group = task_group.p_previous_task_group; } void starpu_omp_taskgroup_inline_begin(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_task_group *p_task_group; _STARPU_MALLOC(p_task_group, sizeof(*p_task_group)); p_task_group->p_previous_task_group = task->task_group; p_task_group->descendent_task_count = 0; p_task_group->leader_task = task; task->task_group = p_task_group; } void starpu_omp_taskgroup_inline_end(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); _starpu_spin_lock(&task->lock); struct starpu_omp_task_group *p_task_group = task->task_group; if (p_task_group->descendent_task_count > 0) { task->wait_on |= starpu_omp_task_wait_on_group; task->transaction_pending = 1; _starpu_spin_unlock(&task->lock); _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task); starpu_omp_task_preempt(); STARPU_ASSERT(p_task_group->descendent_task_count == 0); } else { _starpu_spin_unlock(&task->lock); } task->task_group = p_task_group->p_previous_task_group; free(p_task_group); } // XXX on work void starpu_omp_taskloop_inline_begin(struct starpu_omp_task_region_attr *attr) { if (!attr->nogroup_clause) { starpu_omp_taskgroup_inline_begin(); } int nb_subloop; if (attr->num_tasks) { nb_subloop = attr->num_tasks; } else if (attr->grainsize) { nb_subloop = attr->nb_iterations / attr->grainsize; } else { nb_subloop = 4; } attr->is_loop = 1; int i; int nb_iter_i = attr->nb_iterations / nb_subloop; for (i = 0; i < nb_subloop; i++) { attr->begin_i = nb_iter_i * i; attr->end_i = attr->begin_i + nb_iter_i; attr->end_i += (i+1 != nb_subloop) ? 0 : (attr->nb_iterations % nb_subloop); attr->chunk = attr->end_i - attr->begin_i; starpu_omp_task_region(attr); } } // XXX on work void starpu_omp_taskloop_inline_end(const struct starpu_omp_task_region_attr *attr) { if (!attr->nogroup_clause) { starpu_omp_taskgroup_inline_end(); } } static inline void _starpu_omp_for_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task, struct starpu_omp_loop *loop, int first_call, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) { *_nb_i = 0; if (schedule == starpu_omp_sched_undefined) { schedule = parallel_region->owner_device->icvs.def_sched_var; chunk = parallel_region->owner_device->icvs.def_sched_chunk_var; } else if (schedule == starpu_omp_sched_runtime) { schedule = parallel_region->icvs.run_sched_var; chunk = parallel_region->icvs.run_sched_chunk_var; } STARPU_ASSERT(schedule == starpu_omp_sched_static || schedule == starpu_omp_sched_dynamic || schedule == starpu_omp_sched_guided || schedule == starpu_omp_sched_auto); if (schedule == starpu_omp_sched_auto) { schedule = starpu_omp_sched_static; chunk = 0; } if (schedule == starpu_omp_sched_static) { if (chunk > 0) { if (first_call) { *_first_i = task->rank * chunk; } else { *_first_i += parallel_region->nb_threads * chunk; } if (*_first_i < nb_iterations) { if (*_first_i + chunk > nb_iterations) { *_nb_i = nb_iterations - *_first_i; } else { *_nb_i = chunk; } } } else { if (first_call) { *_nb_i = nb_iterations / parallel_region->nb_threads; *_first_i = (unsigned)task->rank * (*_nb_i); unsigned long long remainder = nb_iterations % parallel_region->nb_threads; if (remainder > 0) { if ((unsigned)task->rank < remainder) { (*_nb_i)++; *_first_i += (unsigned)task->rank; } else { *_first_i += remainder; } } } } } else if (schedule == starpu_omp_sched_dynamic) { if (chunk == 0) { chunk = 1; } if (first_call) { *_first_i = 0; } _starpu_spin_lock(¶llel_region->lock); if (loop->next_iteration < nb_iterations) { *_first_i = loop->next_iteration; if (*_first_i + chunk > nb_iterations) { *_nb_i = nb_iterations - *_first_i; } else { *_nb_i = chunk; } loop->next_iteration += *_nb_i; } _starpu_spin_unlock(¶llel_region->lock); } else if (schedule == starpu_omp_sched_guided) { if (chunk == 0) { chunk = 1; } if (first_call) { *_first_i = 0; } _starpu_spin_lock(¶llel_region->lock); if (loop->next_iteration < nb_iterations) { *_first_i = loop->next_iteration; *_nb_i = (nb_iterations - *_first_i)/parallel_region->nb_threads; if (*_nb_i < chunk) { if (*_first_i+chunk > nb_iterations) { *_nb_i = nb_iterations - *_first_i; } else { *_nb_i = chunk; } } loop->next_iteration += *_nb_i; } _starpu_spin_unlock(¶llel_region->lock); } if (ordered) { task->ordered_first_i = *_first_i; task->ordered_nb_i = *_nb_i; } } static inline struct starpu_omp_loop *_starpu_omp_for_get_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task) { struct starpu_omp_loop *loop; loop = parallel_region->loop_list; while (loop && loop->id != task->loop_id) { loop = loop->next_loop; } return loop; } static inline struct starpu_omp_loop *_starpu_omp_for_loop_begin(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task, int ordered) { struct starpu_omp_loop *loop; _starpu_spin_lock(¶llel_region->lock); loop = _starpu_omp_for_get_loop(parallel_region, task); if (!loop) { _STARPU_MALLOC(loop, sizeof(*loop)); loop->id = task->loop_id; loop->next_iteration = 0; loop->nb_completed_threads = 0; loop->next_loop = parallel_region->loop_list; parallel_region->loop_list = loop; if (ordered) { loop->ordered_iteration = 0; _starpu_spin_init(&loop->ordered_lock); condition_init(&loop->ordered_cond); } } _starpu_spin_unlock(¶llel_region->lock); return loop; } static inline void _starpu_omp_for_loop_end(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task, struct starpu_omp_loop *loop, int ordered) { _starpu_spin_lock(¶llel_region->lock); loop->nb_completed_threads++; if (loop->nb_completed_threads == parallel_region->nb_threads) { struct starpu_omp_loop **p_loop; if (ordered) { loop->ordered_iteration = 0; condition_exit(&loop->ordered_cond); _starpu_spin_destroy(&loop->ordered_lock); } STARPU_ASSERT(loop->next_loop == NULL); p_loop = &(parallel_region->loop_list); while (*p_loop != loop) { p_loop = &((*p_loop)->next_loop); } *p_loop = NULL; free(loop); } _starpu_spin_unlock(¶llel_region->lock); task->loop_id++; } int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *parallel_region = task->owner_region; struct starpu_omp_loop *loop = _starpu_omp_for_loop_begin(parallel_region, task, ordered); _starpu_omp_for_loop(parallel_region, task, loop, 1, nb_iterations, chunk, schedule, ordered, _first_i, _nb_i); if (*_nb_i == 0) { _starpu_omp_for_loop_end(parallel_region, task, loop, ordered); } return *_nb_i != 0; } int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *parallel_region = task->owner_region; struct starpu_omp_loop *loop = _starpu_omp_for_loop_begin(parallel_region, task, ordered); _starpu_omp_for_loop(parallel_region, task, loop, 0, nb_iterations, chunk, schedule, ordered, _first_i, _nb_i); if (*_nb_i == 0) { _starpu_omp_for_loop_end(parallel_region, task, loop, ordered); } return *_nb_i != 0; } int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) { unsigned long long nb_i; int end = starpu_omp_for_inline_first(nb_iterations, chunk, schedule, ordered, _begin_i, &nb_i); *_end_i = *_begin_i + nb_i; return end; } int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i) { unsigned long long nb_i; int end = starpu_omp_for_inline_next(nb_iterations, chunk, schedule, ordered, _begin_i, &nb_i); *_end_i = *_begin_i + nb_i; return end; } void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) { unsigned long long _first_i = 0; unsigned long long _nb_i = 0; if (starpu_omp_for_inline_first(nb_iterations, chunk, schedule, ordered, &_first_i, &_nb_i)) { do { f(_first_i, _nb_i, arg); } while (starpu_omp_for_inline_next(nb_iterations, chunk, schedule, ordered, &_first_i, &_nb_i)); } if (!nowait) { starpu_omp_barrier(); } } void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait) { unsigned long long _begin_i = 0; unsigned long long _end_i = 0; if (starpu_omp_for_inline_first_alt(nb_iterations, chunk, schedule, ordered, &_begin_i, &_end_i)) { do { f(_begin_i, _end_i, arg); } while (starpu_omp_for_inline_next_alt(nb_iterations, chunk, schedule, ordered, &_begin_i, &_end_i)); } if (!nowait) { starpu_omp_barrier(); } } void starpu_omp_ordered(void (*f)(void *arg), void *arg) { starpu_omp_ordered_inline_begin(); f(arg); starpu_omp_ordered_inline_end(); } void starpu_omp_ordered_inline_begin(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *parallel_region = task->owner_region; struct starpu_omp_loop *loop = _starpu_omp_for_get_loop(parallel_region, task); unsigned long long i; STARPU_ASSERT(task->ordered_nb_i > 0); i = task->ordered_first_i; task->ordered_first_i++; task->ordered_nb_i--; _starpu_spin_lock(&loop->ordered_lock); while (i != loop->ordered_iteration) { STARPU_ASSERT(i > loop->ordered_iteration); condition_wait(&loop->ordered_cond, &loop->ordered_lock, starpu_omp_task_wait_on_ordered); } } void starpu_omp_ordered_inline_end(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *parallel_region = task->owner_region; struct starpu_omp_loop *loop = _starpu_omp_for_get_loop(parallel_region, task); loop->ordered_iteration++; condition_broadcast(&loop->ordered_cond, starpu_omp_task_wait_on_ordered); _starpu_spin_unlock(&loop->ordered_lock); } static inline struct starpu_omp_sections *_starpu_omp_get_sections(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task) { struct starpu_omp_sections *sections; sections = parallel_region->sections_list; while (sections && sections->id != task->sections_id) { sections = sections->next_sections; } return sections; } static inline struct starpu_omp_sections *_starpu_omp_sections_begin(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task) { struct starpu_omp_sections *sections; _starpu_spin_lock(¶llel_region->lock); sections = _starpu_omp_get_sections(parallel_region, task); if (!sections) { _STARPU_MALLOC(sections, sizeof(*sections)); sections->id = task->sections_id; sections->next_section_num = 0; sections->nb_completed_threads = 0; sections->next_sections = parallel_region->sections_list; parallel_region->sections_list = sections; } _starpu_spin_unlock(¶llel_region->lock); return sections; } static inline void _starpu_omp_sections_end(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task, struct starpu_omp_sections *sections) { _starpu_spin_lock(¶llel_region->lock); sections->nb_completed_threads++; if (sections->nb_completed_threads == parallel_region->nb_threads) { struct starpu_omp_sections **p_sections; STARPU_ASSERT(sections->next_sections == NULL); p_sections = &(parallel_region->sections_list); while (*p_sections != sections) { p_sections = &((*p_sections)->next_sections); } *p_sections = NULL; free(sections); } _starpu_spin_unlock(¶llel_region->lock); task->sections_id++; } void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *parallel_region = task->owner_region; struct starpu_omp_sections *sections = _starpu_omp_sections_begin(parallel_region, task); for (;;) { void (*f)(void *arg) = NULL; void *arg = NULL; _starpu_spin_lock(¶llel_region->lock); if (sections->next_section_num < nb_sections) { f = section_f[sections->next_section_num]; arg = section_arg[sections->next_section_num]; sections->next_section_num ++; } _starpu_spin_unlock(¶llel_region->lock); if (f == NULL) break; f(arg); } _starpu_omp_sections_end(parallel_region, task, sections); if (!nowait) { starpu_omp_barrier(); } } void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void *section_arg, int nowait) { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *parallel_region = task->owner_region; struct starpu_omp_sections *sections = _starpu_omp_sections_begin(parallel_region, task); for (;;) { unsigned long long section_num; void *arg = NULL; _starpu_spin_lock(¶llel_region->lock); if (sections->next_section_num < nb_sections) { section_num = sections->next_section_num; arg = section_arg; sections->next_section_num ++; } else { _starpu_spin_unlock(¶llel_region->lock); break; } _starpu_spin_unlock(¶llel_region->lock); section_f(section_num, arg); } _starpu_omp_sections_end(parallel_region, task, sections); if (!nowait) { starpu_omp_barrier(); } } static void _starpu_omp_lock_init(void **_internal) { struct _starpu_omp_lock_internal *_lock; _STARPU_CALLOC(_lock, 1, sizeof(*_lock)); _starpu_spin_init(&_lock->lock); condition_init(&_lock->cond); *_internal = _lock; } static void _starpu_omp_lock_destroy(void **_internal) { struct _starpu_omp_lock_internal * const _lock = *_internal; STARPU_ASSERT(_lock->state == 0); condition_exit(&_lock->cond); _starpu_spin_destroy(&_lock->lock); memset(_lock, 0, sizeof(*_lock)); free(_lock); *_internal = NULL; } static void _starpu_omp_lock_set(void **_internal) { struct _starpu_omp_lock_internal * const _lock = *_internal; _starpu_spin_lock(&_lock->lock); while (_lock->state != 0) { condition_wait(&_lock->cond, &_lock->lock, starpu_omp_task_wait_on_lock); } _lock->state = 1; _starpu_spin_unlock(&_lock->lock); } static void _starpu_omp_lock_unset(void **_internal) { struct _starpu_omp_lock_internal * const _lock = *_internal; _starpu_spin_lock(&_lock->lock); STARPU_ASSERT(_lock->state == 1); _lock->state = 0; condition_broadcast(&_lock->cond, starpu_omp_task_wait_on_lock); _starpu_spin_unlock(&_lock->lock); } static int _starpu_omp_lock_test(void **_internal) { struct _starpu_omp_lock_internal * const _lock = *_internal; int ret = 0; _starpu_spin_lock(&_lock->lock); if (_lock->state == 0) { _lock->state = 1; ret = 1; } _starpu_spin_unlock(&_lock->lock); return ret; } static void _starpu_omp_nest_lock_init(void **_internal) { struct _starpu_omp_nest_lock_internal *_nest_lock; _STARPU_CALLOC(_nest_lock, 1, sizeof(*_nest_lock)); _starpu_spin_init(&_nest_lock->lock); condition_init(&_nest_lock->cond); *_internal = _nest_lock; } static void _starpu_omp_nest_lock_destroy(void **_internal) { struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal; STARPU_ASSERT(_nest_lock->state == 0); STARPU_ASSERT(_nest_lock->nesting == 0); STARPU_ASSERT(_nest_lock->owner_task == NULL); condition_exit(&_nest_lock->cond); _starpu_spin_destroy(&_nest_lock->lock); memset(_nest_lock, 0, sizeof(*_nest_lock)); free(_nest_lock); *_internal = NULL; } static void _starpu_omp_nest_lock_set(void **_internal) { struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal; struct starpu_omp_task * const task = _starpu_omp_get_task(); _starpu_spin_lock(&_nest_lock->lock); if (_nest_lock->owner_task == task) { STARPU_ASSERT(_nest_lock->state == 1); STARPU_ASSERT(_nest_lock->nesting > 0); _nest_lock->nesting++; } else { while (_nest_lock->state != 0) { condition_wait(&_nest_lock->cond, &_nest_lock->lock, starpu_omp_task_wait_on_nest_lock); } STARPU_ASSERT(_nest_lock->nesting == 0); STARPU_ASSERT(_nest_lock->owner_task == NULL); _nest_lock->state = 1; _nest_lock->owner_task = task; _nest_lock->nesting = 1; } _starpu_spin_unlock(&_nest_lock->lock); } static void _starpu_omp_nest_lock_unset(void **_internal) { struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal; struct starpu_omp_task * const task = _starpu_omp_get_task(); _starpu_spin_lock(&_nest_lock->lock); STARPU_ASSERT(_nest_lock->owner_task == task); STARPU_ASSERT(_nest_lock->state == 1); STARPU_ASSERT(_nest_lock->nesting > 0); _nest_lock->nesting--; if (_nest_lock->nesting == 0) { _nest_lock->state = 0; _nest_lock->owner_task = NULL; condition_broadcast(&_nest_lock->cond, starpu_omp_task_wait_on_nest_lock); } _starpu_spin_unlock(&_nest_lock->lock); } static int _starpu_omp_nest_lock_test(void **_internal) { struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal; struct starpu_omp_task * const task = _starpu_omp_get_task(); int ret = 0; _starpu_spin_lock(&_nest_lock->lock); if (_nest_lock->state == 0) { STARPU_ASSERT(_nest_lock->nesting == 0); STARPU_ASSERT(_nest_lock->owner_task == NULL); _nest_lock->state = 1; _nest_lock->owner_task = task; _nest_lock->nesting = 1; ret = _nest_lock->nesting; } else if (_nest_lock->owner_task == task) { STARPU_ASSERT(_nest_lock->state == 1); STARPU_ASSERT(_nest_lock->nesting > 0); _nest_lock->nesting++; ret = _nest_lock->nesting; } _starpu_spin_unlock(&_nest_lock->lock); return ret; } void starpu_omp_init_lock (starpu_omp_lock_t *lock) { _starpu_omp_lock_init(&lock->internal); } void starpu_omp_destroy_lock (starpu_omp_lock_t *lock) { _starpu_omp_lock_destroy(&lock->internal); } void starpu_omp_set_lock (starpu_omp_lock_t *lock) { _starpu_omp_lock_set(&lock->internal); } void starpu_omp_unset_lock (starpu_omp_lock_t *lock) { _starpu_omp_lock_unset(&lock->internal); } int starpu_omp_test_lock (starpu_omp_lock_t *lock) { return _starpu_omp_lock_test(&lock->internal); } void starpu_omp_init_nest_lock (starpu_omp_nest_lock_t *nest_lock) { _starpu_omp_nest_lock_init(&nest_lock->internal); } void starpu_omp_destroy_nest_lock (starpu_omp_nest_lock_t *nest_lock) { _starpu_omp_nest_lock_destroy(&nest_lock->internal); } void starpu_omp_set_nest_lock (starpu_omp_nest_lock_t *nest_lock) { _starpu_omp_nest_lock_set(&nest_lock->internal); } void starpu_omp_unset_nest_lock (starpu_omp_nest_lock_t *nest_lock) { _starpu_omp_nest_lock_unset(&nest_lock->internal); } int starpu_omp_test_nest_lock (starpu_omp_nest_lock_t *nest_lock) { return _starpu_omp_nest_lock_test(&nest_lock->internal); } void starpu_omp_atomic_fallback_inline_begin(void) { struct starpu_omp_device *device = get_caller_device(); _starpu_spin_lock(&device->atomic_lock); } void starpu_omp_atomic_fallback_inline_end(void) { struct starpu_omp_device *device = get_caller_device(); _starpu_spin_unlock(&device->atomic_lock); } void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base) { unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *) starpu_data_get_interface_on_node(handle, node); assert(vector_interface->id == STARPU_VECTOR_INTERFACE_ID); vector_interface->slice_base = slice_base; } } struct starpu_arbiter *starpu_omp_get_default_arbiter(void) { return _global_state.default_arbiter; } /* * restore deprecated diagnostics (-Wdeprecated-declarations) */ #pragma GCC diagnostic pop #endif /* STARPU_OPENMP */ starpu-1.4.9+dfsg/src/util/openmp_runtime_support.h000066400000000000000000000247311507764646700225710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __OPENMP_RUNTIME_SUPPORT_H__ #define __OPENMP_RUNTIME_SUPPORT_H__ /** @file */ #include #ifdef STARPU_OPENMP #include #include #include /** ucontexts have been deprecated as of POSIX 1-2004 * _XOPEN_SOURCE required at least on OS/X * * TODO: add detection in configure.ac */ #ifndef _XOPEN_SOURCE #define _XOPEN_SOURCE #endif #include #pragma GCC visibility push(hidden) extern starpu_pthread_key_t omp_thread_key; extern starpu_pthread_key_t omp_task_key; /** * Arbitrary limit on the number of nested parallel sections */ #define STARPU_OMP_MAX_ACTIVE_LEVELS 1 /** * Possible abstract names for OpenMP places */ enum starpu_omp_place_name { starpu_omp_place_undefined = 0, starpu_omp_place_threads = 1, starpu_omp_place_cores = 2, starpu_omp_place_sockets = 3, starpu_omp_place_numerical = 4 /** place specified numerically */ }; struct starpu_omp_numeric_place { int excluded_place; int *included_numeric_items; int nb_included_numeric_items; int *excluded_numeric_items; int nb_excluded_numeric_items; }; /** * OpenMP place for thread affinity, defined by the OpenMP spec */ struct starpu_omp_place { int abstract_name; int abstract_excluded; int abstract_length; struct starpu_omp_numeric_place *numeric_places; int nb_numeric_places; }; /** * Internal Control Variables (ICVs) declared following * OpenMP 4.0.0 spec section 2.3.1 */ struct starpu_omp_data_environment_icvs { /** parallel region icvs */ int dyn_var; int nest_var; int *nthreads_var; /** nthreads_var ICV is a list */ int thread_limit_var; int active_levels_var; int levels_var; int *bind_var; /** bind_var ICV is a list */ /** loop region icvs */ int run_sched_var; unsigned long long run_sched_chunk_var; /** program execution icvs */ int default_device_var; int max_task_priority_var; }; struct starpu_omp_device_icvs { /** parallel region icvs */ int max_active_levels_var; /** loop region icvs */ int def_sched_var; unsigned long long def_sched_chunk_var; /** program execution icvs */ int stacksize_var; int wait_policy_var; }; struct starpu_omp_implicit_task_icvs { /** parallel region icvs */ int place_partition_var; }; struct starpu_omp_global_icvs { /** program execution icvs */ int cancel_var; }; struct starpu_omp_initial_icv_values { int dyn_var; int nest_var; int *nthreads_var; int run_sched_var; unsigned long long run_sched_chunk_var; int def_sched_var; unsigned long long def_sched_chunk_var; int *bind_var; int stacksize_var; int wait_policy_var; int thread_limit_var; int max_active_levels_var; int active_levels_var; int levels_var; int place_partition_var; int cancel_var; int default_device_var; int max_task_priority_var; /** not a real ICV, but needed to store the contents of OMP_PLACES */ struct starpu_omp_place places; }; struct starpu_omp_task_group { int descendent_task_count; struct starpu_omp_task *leader_task; struct starpu_omp_task_group *p_previous_task_group; }; struct starpu_omp_task_link { struct starpu_omp_task *task; struct starpu_omp_task_link *next; }; struct starpu_omp_condition { struct starpu_omp_task_link *contention_list_head; }; struct starpu_omp_critical { UT_hash_handle hh; struct _starpu_spinlock lock; unsigned state; struct starpu_omp_task_link *contention_list_head; const char *name; }; enum starpu_omp_task_state { starpu_omp_task_state_clear = 0, starpu_omp_task_state_preempted = 1, starpu_omp_task_state_terminated = 2, starpu_omp_task_state_zombie = 3, /** target tasks are non-preemptible tasks, without dedicated stack and OpenMP Runtime Support context */ starpu_omp_task_state_target = 4, }; enum starpu_omp_task_wait_on { starpu_omp_task_wait_on_task_childs = 1 << 0, starpu_omp_task_wait_on_region_tasks = 1 << 1, starpu_omp_task_wait_on_barrier = 1 << 2, starpu_omp_task_wait_on_group = 1 << 3, starpu_omp_task_wait_on_critical = 1 << 4, starpu_omp_task_wait_on_ordered = 1 << 5, starpu_omp_task_wait_on_lock = 1 << 6, starpu_omp_task_wait_on_nest_lock = 1 << 7, }; enum starpu_omp_task_flags { STARPU_OMP_TASK_FLAGS_IMPLICIT = 1 << 0, STARPU_OMP_TASK_FLAGS_UNDEFERRED = 1 << 1, STARPU_OMP_TASK_FLAGS_FINAL = 1 << 2, STARPU_OMP_TASK_FLAGS_UNTIED = 1 << 3, }; LIST_TYPE(starpu_omp_task, struct starpu_omp_implicit_task_icvs icvs; struct starpu_omp_task *parent_task; struct starpu_omp_thread *owner_thread; struct starpu_omp_region *owner_region; struct starpu_omp_region *nested_region; int rank; int child_task_count; struct starpu_omp_task_group *task_group; struct _starpu_spinlock lock; int transaction_pending; int wait_on; int barrier_count; int single_id; int single_first; int loop_id; unsigned long long ordered_first_i; unsigned long long ordered_nb_i; int sections_id; struct starpu_omp_data_environment_icvs data_env_icvs; struct starpu_omp_implicit_task_icvs implicit_task_icvs; struct handle_entry *registered_handles; struct starpu_task *starpu_task; struct starpu_codelet cl; void **starpu_buffers; void *starpu_cl_arg; /* Driver porters: adding your driver here is very optional. */ /** actual task function to be run */ void (*cpu_f)(void **starpu_buffers, void *starpu_cl_arg); void (*cuda_f)(void **starpu_buffers, void *starpu_cl_arg); void (*opencl_f)(void **starpu_buffers, void *starpu_cl_arg); enum starpu_omp_task_state state; enum starpu_omp_task_flags flags; /* * context to store the processing state of the task * in case of blocking/recursive task operation */ ucontext_t ctx; /* * stack to execute the task over, to be able to switch * in case blocking/recursive task operation */ void *stack; /* * Valgrind stack id */ int stack_vg_id; size_t stacksize; /* * taskloop attribute * */ int is_loop; unsigned long long nb_iterations; unsigned long long grainsize; unsigned long long chunk; unsigned long long begin_i; unsigned long long end_i; ) LIST_TYPE(starpu_omp_thread, UT_hash_handle hh; struct starpu_omp_task *current_task; struct starpu_omp_region *owner_region; /* * stack to execute the initial thread over * when preempting the initial task * note: should not be used for other threads */ void *initial_thread_stack; /* * Valgrind stack id */ int initial_thread_stack_vg_id; /* * context to store the 'scheduler' state of the thread, * to which the execution of thread comes back upon a * blocking/recursive task operation */ ucontext_t ctx; struct starpu_driver starpu_driver; struct _starpu_worker *worker; ) struct _starpu_omp_lock_internal { struct _starpu_spinlock lock; struct starpu_omp_condition cond; unsigned state; }; struct _starpu_omp_nest_lock_internal { struct _starpu_spinlock lock; struct starpu_omp_condition cond; unsigned state; struct starpu_omp_task *owner_task; unsigned nesting; }; struct starpu_omp_loop { int id; unsigned long long next_iteration; int nb_completed_threads; struct starpu_omp_loop *next_loop; struct _starpu_spinlock ordered_lock; struct starpu_omp_condition ordered_cond; unsigned long long ordered_iteration; }; struct starpu_omp_sections { int id; unsigned long long next_section_num; int nb_completed_threads; struct starpu_omp_sections *next_sections; }; struct starpu_omp_region { struct starpu_omp_data_environment_icvs icvs; struct starpu_omp_region *parent_region; struct starpu_omp_device *owner_device; struct starpu_omp_thread *master_thread; /** note: the list of threads does not include the master_thread */ struct starpu_omp_thread_list thread_list; /** list of implicit omp tasks created to run the region */ struct starpu_omp_task **implicit_task_array; /** include both the master thread and the region own threads */ int nb_threads; struct _starpu_spinlock lock; struct starpu_omp_task *waiting_task; int barrier_count; int bound_explicit_task_count; int single_id; void *copy_private_data; int level; struct starpu_omp_loop *loop_list; struct starpu_omp_sections *sections_list; struct starpu_task *continuation_starpu_task; struct handle_entry *registered_handles; struct _starpu_spinlock registered_handles_lock; }; struct starpu_omp_device { struct starpu_omp_device_icvs icvs; /** atomic fallback implementation lock */ struct _starpu_spinlock atomic_lock; }; struct starpu_omp_global { struct starpu_omp_global_icvs icvs; struct starpu_omp_task *initial_task; struct starpu_omp_thread *initial_thread; struct starpu_omp_region *initial_region; struct starpu_omp_device *initial_device; struct starpu_omp_critical *default_critical; struct starpu_omp_critical *named_criticals; struct _starpu_spinlock named_criticals_lock; struct starpu_omp_thread *hash_workers; struct _starpu_spinlock hash_workers_lock; struct starpu_arbiter *default_arbiter; unsigned nb_starpu_cpu_workers; int *starpu_cpu_worker_ids; int environment_valid; }; /* * internal global variables */ extern struct starpu_omp_initial_icv_values *_starpu_omp_initial_icv_values; extern struct starpu_omp_global *_starpu_omp_global_state; extern starpu_pthread_key_t _starpu_omp_thread_key; extern starpu_pthread_key_t _starpu_omp_task_key; extern double _starpu_omp_clock_ref; /* * internal API */ void _starpu_omp_environment_init(void); void _starpu_omp_environment_exit(void); int _starpu_omp_environment_check(void); struct starpu_omp_thread *_starpu_omp_get_thread(void); struct starpu_omp_region *_starpu_omp_get_region_at_level(int level) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; struct starpu_omp_task *_starpu_omp_get_task(void); int _starpu_omp_get_region_thread_num(const struct starpu_omp_region *const region) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; void _starpu_omp_dummy_init(void); void _starpu_omp_dummy_shutdown(void); #endif // STARPU_OPENMP #pragma GCC visibility pop #endif // __OPENMP_RUNTIME_SUPPORT_H__ starpu-1.4.9+dfsg/src/util/openmp_runtime_support_environment.c000066400000000000000000000521701507764646700252060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef STARPU_OPENMP #include #include #include #include #include #include #define _STARPU_INITIAL_PLACES_LIST_SIZE 4 #define _STARPU_INITIAL_PLACE_ITEMS_LIST_SIZE 4 #define _STARPU_DEFAULT_STACKSIZE 2097152 static struct starpu_omp_initial_icv_values _initial_icv_values = { .dyn_var = 0, .nest_var = 0, .nthreads_var = NULL, .run_sched_var = starpu_omp_sched_static, .run_sched_chunk_var = 0, .def_sched_var = starpu_omp_sched_static, .def_sched_chunk_var = 0, .bind_var = NULL, .stacksize_var = _STARPU_DEFAULT_STACKSIZE, .wait_policy_var = 0, .max_active_levels_var = STARPU_OMP_MAX_ACTIVE_LEVELS, .active_levels_var = 0, .levels_var = 0, .place_partition_var = 0, .cancel_var = 0, .default_device_var = 0, .max_task_priority_var = 0 }; struct starpu_omp_initial_icv_values *_starpu_omp_initial_icv_values = NULL; static void remove_spaces(char *str) { int i = 0; int j = 0; while (str[j] != '\0') { if (isspace(str[j])) { j++; continue; } if (j > i) { str[i] = str[j]; } i++; j++; } if (j > i) { str[i] = str[j]; } } static int stringsn_cmp(const char *strings[], const char *str, size_t n) { int mode = 0; while (strings[mode]) { if (strncasecmp(str, strings[mode], n) == 0) break; mode++; } if (strings[mode] == NULL) return -1; return mode; } static int read_int_var(const char *str, int *dst) { char *endptr; int val; long lval; if (!str) return 0; errno = 0; /* To distinguish success/failure after call */ lval = strtol(str, &endptr, 10); /* Check for various possible errors */ if ((errno == ERANGE && (lval == LONG_MAX || lval == LONG_MIN)) || (errno != 0 && lval == 0)) return 0; if (lval < INT_MIN || lval > INT_MAX) return 0; val = (int) lval; /* No digits were found. */ if (str == endptr) return 0; *dst = val; return 1; } static int _strings_cmp(const char *strings[], const char *str) { int mode = 0; while (strings[mode]) { if (strncasecmp(str, strings[mode], strlen(strings[mode])) == 0) break; mode++; } if (strings[mode] == NULL) return -1; return mode; } static void read_sched_var(const char *var, int *dest, unsigned long long *dest_chunk) { const char *env = starpu_getenv(var); if (env) { char *str = strdup(env); if (str == NULL) _STARPU_ERROR("memory allocation failed\n"); remove_spaces(str); if (str[0] == '\0') { free(str); return; } static const char *strings[] = { "undefined", "static", "dynamic", "guided", "auto", NULL }; int mode = _strings_cmp(strings, str); if (mode < 0) _STARPU_ERROR("parse error in variable %s\n", var); *dest = mode; int offset = strlen(strings[mode]); if (str[offset] == ',') { offset++; errno = 0; long long v = strtoll(str+offset, NULL, 10); if (errno != 0) _STARPU_ERROR("could not parse environment variable %s, strtol failed with error %s\n", var, strerror(errno)); if (v < 0) _STARPU_ERROR("invalid negative modifier in environment variable %s\n", var); unsigned long long uv = (unsigned long long) v; *dest_chunk = uv; } else { *dest_chunk = 1; } free(str); } } static int convert_place_name(const char *str, size_t n) { static const char *strings[] = { "threads", "cores", "sockets", NULL }; int mode = stringsn_cmp(strings, str, n); if (mode < 0) _STARPU_ERROR("place abstract name parse error\n"); return mode+1; /* 0 is for undefined abstract name */ } /* Note: this function modifies the string str */ static void read_a_place_name(char *str, struct starpu_omp_place *places) { int i = 0; /* detect exclusion of abstract name expressed as '!' prefix */ if (str[i] == '!') { places->abstract_excluded = 1; i++; } else { places->abstract_excluded = 0; } /* detect length value for abstract name expressed as '(length)' suffix) */ char *begin_length_spec = strchr(str+i,'('); if (begin_length_spec != NULL) { char *end_length_spec = strrchr(begin_length_spec+1, ')'); if (end_length_spec == NULL || end_length_spec <= begin_length_spec+1) _STARPU_ERROR("parse error in places list\n"); *begin_length_spec = '\0'; *end_length_spec = '\0'; errno = 0; int v = (int)strtol(begin_length_spec+1, NULL, 10); if (errno != 0) _STARPU_ERROR("parse error in places list\n"); places->abstract_length = v; } else { places->abstract_length = 1; } /* convert abstract place name string to corresponding value */ { int mode = convert_place_name(str+i, strlen(str+i)); STARPU_ASSERT(mode >= starpu_omp_place_threads && mode <= starpu_omp_place_sockets); places->abstract_name = mode; places->numeric_places = NULL; places->nb_numeric_places = 0; } } static void read_a_places_list(const char *str, struct starpu_omp_place *places) { if (str[0] == '\0') { places->numeric_places = NULL; places->nb_numeric_places = 0; places->abstract_name = starpu_omp_place_undefined; return; } enum { state_split, state_read_brace_prefix, state_read_opening_brace, state_read_numeric_prefix, state_read_numeric, state_split_numeric, state_read_closing_brace, state_read_brace_suffix, }; struct starpu_omp_numeric_place *places_list = NULL; int places_list_size = 0; int nb_places = 0; int *included_items_list = NULL; int included_items_list_size = 0; int nb_included_items = 0; int *excluded_items_list = NULL; int excluded_items_list_size = 0; int nb_excluded_items = 0; int exclude_place_flag = 0; int exclude_item_flag = 0; int i = 0; int state = state_read_brace_prefix; while (1) { switch (state) { /* split a comma separated list of numerical places */ case state_split: if (str[i] == '\0') { goto eol; } else if (str[i] != ',') _STARPU_ERROR("parse error in places list\n"); i++; state = state_read_brace_prefix; break; /* read optional exclude flag '!' for numerical place */ case state_read_brace_prefix: exclude_place_flag = 0; if (str[i] == '!') { exclude_place_flag = 1; i++; } state = state_read_opening_brace; break; /* read place opening brace */ case state_read_opening_brace: if (str[i] != '{') _STARPU_ERROR("parse error in places list\n"); i++; state = state_read_numeric_prefix; break; /* read optional exclude flag '!' for numerical item */ case state_read_numeric_prefix: exclude_item_flag = 0; if (str[i] == '!') { exclude_item_flag = 1; i++; } state = state_read_numeric; break; /* read numerical item */ case state_read_numeric: { char *endptr = NULL; errno = 0; int v = (int)strtol(str+i, &endptr, 10); if (errno != 0) _STARPU_ERROR("parse error in places list, strtol failed with error %s\n", strerror(errno)); if (exclude_item_flag) { if (excluded_items_list_size == 0) { excluded_items_list_size = _STARPU_INITIAL_PLACE_ITEMS_LIST_SIZE; _STARPU_MALLOC(excluded_items_list, excluded_items_list_size * sizeof(int)); } else if (nb_excluded_items == excluded_items_list_size) { excluded_items_list_size *= 2; _STARPU_REALLOC(excluded_items_list, excluded_items_list_size * sizeof(int)); } excluded_items_list[nb_excluded_items] = v; nb_excluded_items++; } else { if (included_items_list_size == 0) { included_items_list_size = _STARPU_INITIAL_PLACE_ITEMS_LIST_SIZE; _STARPU_MALLOC(included_items_list, included_items_list_size * sizeof(int)); } else if (nb_included_items == included_items_list_size) { included_items_list_size *= 2; _STARPU_REALLOC(included_items_list, included_items_list_size * sizeof(int)); } included_items_list[nb_included_items] = v; nb_included_items++; } exclude_item_flag = 0; i = endptr - str; state = state_split_numeric; } break; /* read comma separated or colon separated numerical item list */ case state_split_numeric: if (str[i] == ':') /* length and stride colon separated arguments not supported for now */ _STARPU_ERROR("colon support unimplemented in numeric place list"); if (str[i] == ',') { i++; state = state_read_numeric_prefix; } else { state = state_read_closing_brace; } break; /* read end of numerical item list */ case state_read_closing_brace: if (str[i] != '}') _STARPU_ERROR("parse error in places list\n"); if (places_list_size == 0) { places_list_size = _STARPU_INITIAL_PLACES_LIST_SIZE; _STARPU_MALLOC(places_list, places_list_size * sizeof(*places_list)); } else if (nb_places == places_list_size) { places_list_size *= 2; _STARPU_REALLOC(places_list, places_list_size * sizeof(*places_list)); } places_list[nb_places].excluded_place = exclude_place_flag; places_list[nb_places].included_numeric_items = included_items_list; places_list[nb_places].nb_included_numeric_items = nb_included_items; places_list[nb_places].excluded_numeric_items = excluded_items_list; places_list[nb_places].nb_excluded_numeric_items = nb_excluded_items; nb_places++; exclude_place_flag = 0; included_items_list = NULL; included_items_list_size = 0; nb_included_items = 0; excluded_items_list = NULL; excluded_items_list_size = 0; nb_excluded_items = 0; i++; state = state_read_brace_suffix; break; /* read optional place colon separated suffix */ case state_read_brace_suffix: if (str[i] == ':') /* length and stride colon separated arguments not supported for now */ _STARPU_ERROR("colon support unimplemented in numeric place list"); state = state_split; break; default: _STARPU_ERROR("invalid state in parsing places list\n"); } } eol: places->numeric_places = places_list; places->nb_numeric_places = nb_places; places->abstract_name = starpu_omp_place_numerical; } static void convert_places_string(const char *_str, struct starpu_omp_place *places) { char *str = strdup(_str); if (str == NULL) _STARPU_ERROR("memory allocation failed\n"); remove_spaces(str); if (str[0] != '\0') { /* check whether this is the start of an abstract name */ if (isalpha(str[0]) || (str[0] == '!' && isalpha(str[1]))) { read_a_place_name(str, places); } /* else the string must contain a list of braces */ else { read_a_places_list(str, places); } } free(str); } static void free_places(struct starpu_omp_place *places) { int i; for (i = 0; i < places->nb_numeric_places; i++) { if (places->numeric_places[i].nb_included_numeric_items > 0) { free(places->numeric_places[i].included_numeric_items); } if (places->numeric_places[i].nb_excluded_numeric_items > 0) { free(places->numeric_places[i].excluded_numeric_items); } } if (places->nb_numeric_places > 0) { free(places->numeric_places); } } static int _get_env_string_var(const char *str, const char *strings[], int *dst) { int val; if (!str) return 0; val = _strings_cmp(strings, str); if (val < 0) return 0; *dst = val; return 1; } static void read_proc_bind_var() { const int max_levels = _initial_icv_values.max_active_levels_var + 1; int *bind_list = NULL; char *env; _STARPU_CALLOC(bind_list, max_levels, sizeof(*bind_list)); env = starpu_getenv("OMP_PROC_BIND"); if (env) { static const char *strings[] = { "false", "true", "master", "close", "spread", NULL }; char *saveptr, *token; int level = 0; token = strtok_r(env, ",", &saveptr); for (; token != NULL; token = strtok_r(NULL, ",", &saveptr)) { int value; if (!_get_env_string_var(token, strings, &value)) { _STARPU_MSG("StarPU: Invalid value for environment variable OMP_PROC_BIND\n"); break; } bind_list[level++] = value; } } _initial_icv_values.bind_var = bind_list; } static void read_num_threads_var() { const int max_levels = _initial_icv_values.max_active_levels_var + 1; int *num_threads_list = NULL; char *env; _STARPU_CALLOC(num_threads_list, max_levels, sizeof(*num_threads_list)); env = starpu_getenv("OMP_NUM_THREADS"); if (env) { char *saveptr, *token; int level = 0; token = strtok_r(env, ",", &saveptr); for (; token != NULL; token = strtok_r(NULL, ",", &saveptr)) { int value; if (!read_int_var(token, &value)) { _STARPU_MSG("StarPU: Invalid value for environment variable OMP_NUM_THREADS\n"); break; } num_threads_list[level++] = value; } } _initial_icv_values.nthreads_var = num_threads_list; } static void read_omp_environment(void) { const char *boolean_strings[] = { "false", "true", NULL }; _initial_icv_values.dyn_var = starpu_getenv_string_var_default("OMP_DYNAMIC", boolean_strings, _initial_icv_values.dyn_var); _initial_icv_values.nest_var = starpu_getenv_string_var_default("OMP_NESTED", boolean_strings, _initial_icv_values.nest_var); read_sched_var("OMP_SCHEDULE", &_initial_icv_values.run_sched_var, &_initial_icv_values.run_sched_chunk_var); _initial_icv_values.stacksize_var = starpu_getenv_size_default("OMP_STACKSIZE", _initial_icv_values.stacksize_var); { const char *strings[] = { "passive", "active", NULL }; _initial_icv_values.wait_policy_var = starpu_getenv_string_var_default("OMP_WAIT_POLICY", strings, _initial_icv_values.wait_policy_var); } _initial_icv_values.thread_limit_var = starpu_getenv_number_default("OMP_THREAD_LIMIT", _initial_icv_values.thread_limit_var); _initial_icv_values.max_active_levels_var = starpu_getenv_number_default("OMP_MAX_ACTIVE_LEVELS", _initial_icv_values.max_active_levels_var); _initial_icv_values.cancel_var = starpu_getenv_string_var_default("OMP_CANCELLATION", boolean_strings, _initial_icv_values.cancel_var); _initial_icv_values.default_device_var = starpu_getenv_number_default("OMP_DEFAULT_DEVICE", _initial_icv_values.default_device_var); _initial_icv_values.max_task_priority_var = starpu_getenv_number_default("OMP_MAX_TASK_PRIORITY", _initial_icv_values.max_task_priority_var); /* Avoid overflow e.g. in num_threads_list allocation */ STARPU_ASSERT_MSG(_initial_icv_values.max_active_levels_var > 0 && _initial_icv_values.max_active_levels_var < 1000000, "OMP_MAX_ACTIVE_LEVELS should have a reasonable value"); /* TODO: check others */ read_proc_bind_var(); read_num_threads_var(); /* read OMP_PLACES */ { memset(&_initial_icv_values.places, 0, sizeof(_initial_icv_values.places)); _initial_icv_values.places.abstract_name = starpu_omp_place_undefined; const char *env = starpu_getenv("OMP_PLACES"); if (env) { convert_places_string(env, &_initial_icv_values.places); } } _starpu_omp_initial_icv_values = &_initial_icv_values; } static void free_omp_environment(void) { /**/ _starpu_omp_initial_icv_values = NULL; /* OMP_DYNAMIC */ /* OMP_NESTED */ /* OMP_SCHEDULE */ /* OMP_STACKSIZE */ /* OMP_WAIT_POLICY */ /* OMP_THREAD_LIMIT */ /* OMP_MAX_ACTIVE_LEVELS */ /* OMP_CANCELLATION */ /* OMP_DEFAULT_DEVICE */ /* OMP_MAX_TASK_PRIORITY */ /* OMP_PROC_BIND */ free(_initial_icv_values.bind_var); _initial_icv_values.bind_var = NULL; /* OMP_NUM_THREADS */ free(_initial_icv_values.nthreads_var); _initial_icv_values.nthreads_var = NULL; /* OMP_PLACES */ free_places(&_initial_icv_values.places); } static void display_omp_environment(int verbosity_level) { if (verbosity_level > 0) { printf("OPENMP DISPLAY ENVIRONMENT BEGIN\n"); printf(" _OPENMP = 'xxxxxx'\n"); printf(" [host] OMP_DYNAMIC = '%s'\n", _starpu_omp_initial_icv_values->dyn_var?"TRUE":"FALSE"); printf(" [host] OMP_NESTED = '%s'\n", _starpu_omp_initial_icv_values->nest_var?"TRUE":"FALSE"); printf(" [host] OMP_SCHEDULE = '"); switch (_starpu_omp_initial_icv_values->run_sched_var) { case starpu_omp_sched_static: printf("STATIC, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var); break; case starpu_omp_sched_dynamic: printf("DYNAMIC, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var); break; case starpu_omp_sched_guided: printf("GUIDED, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var); break; case starpu_omp_sched_auto: printf("AUTO, %llu", _starpu_omp_initial_icv_values->run_sched_chunk_var); break; case starpu_omp_sched_undefined: default: break; } printf("'\n"); printf(" [host] OMP_STACKSIZE = '%d'\n", _starpu_omp_initial_icv_values->stacksize_var); printf(" [host] OMP_WAIT_POLICY = '%s'\n", _starpu_omp_initial_icv_values->wait_policy_var?"ACTIVE":"PASSIVE"); printf(" [host] OMP_MAX_ACTIVE_LEVELS = '%d'\n", _starpu_omp_initial_icv_values->max_active_levels_var); printf(" [host] OMP_CANCELLATION = '%s'\n", _starpu_omp_initial_icv_values->cancel_var?"TRUE":"FALSE"); printf(" [host] OMP_DEFAULT_DEVICE = '%d'\n", _starpu_omp_initial_icv_values->default_device_var); printf(" [host] OMP_MAX_TASK_PRIORITY = '%d'\n", _starpu_omp_initial_icv_values->max_task_priority_var); printf(" [host] OMP_PROC_BIND = '"); { int level; for (level = 0; level < _starpu_omp_initial_icv_values->max_active_levels_var; level++) { if (level > 0) { printf(", "); } switch (_starpu_omp_initial_icv_values->bind_var[level]) { case starpu_omp_proc_bind_false: printf("FALSE"); break; case starpu_omp_proc_bind_true: printf("TRUE"); break; case starpu_omp_proc_bind_master: printf("MASTER"); break; case starpu_omp_proc_bind_close: printf("CLOSE"); break; case starpu_omp_proc_bind_spread: printf("SPREAD"); break; default: break; } } } printf("'\n"); printf(" [host] OMP_NUM_THREADS = '"); { int level; for (level = 0; level < _starpu_omp_initial_icv_values->max_active_levels_var; level++) { if (level > 0) { printf(", "); } printf("%d", _starpu_omp_initial_icv_values->nthreads_var[level]); } } printf("'\n"); printf(" [host] OMP_PLACES = '"); { struct starpu_omp_place *places = &_starpu_omp_initial_icv_values->places; if (places->nb_numeric_places > 0) { int p; for (p = 0; p < places->nb_numeric_places; p++) { if (p > 0) { printf(","); } struct starpu_omp_numeric_place *np = &places->numeric_places[p]; if (np->excluded_place) { printf("!"); } printf("{"); int i; for (i = 0; i < np->nb_included_numeric_items; i++) { if (i > 0) { printf(","); } printf("%d", np->included_numeric_items[i]); } for (i = 0; i < np->nb_excluded_numeric_items; i++) { if (i > 0 || np->nb_included_numeric_items) { printf(","); } printf("!%d", np->excluded_numeric_items[i]); } printf("}"); /* TODO: print length/stride suffix */ } } else { if (places->abstract_excluded) { printf("!"); } switch (places->abstract_name) { case starpu_omp_place_threads: printf("THREADS"); break; case starpu_omp_place_cores: printf("CORES"); break; case starpu_omp_place_sockets: printf("SOCKETS"); break; case starpu_omp_place_numerical: printf(""); break; case starpu_omp_place_undefined: default: break; } if (places->abstract_length) { printf("(%d)", places->abstract_length); } } } printf("'\n"); printf(" [host] OMP_THREAD_LIMIT = '%d'\n", _initial_icv_values.thread_limit_var); if (verbosity_level > 1) { /* no vendor specific runtime variable */ } printf("OPENMP DISPLAY ENVIRONMENT END\n"); } } void _starpu_omp_environment_init(void) { read_omp_environment(); const char *strings[] = { "false", "true", "verbose", NULL }; int display_env = starpu_getenv_string_var_default("OMP_DISPLAY_ENV", strings, 0); if (display_env > 0) { display_omp_environment(display_env); } } int _starpu_omp_environment_check(void) { if (starpu_cpu_worker_get_count() == 0) { _STARPU_DISP("OpenMP support needs at least 1 CPU worker\n"); return -EINVAL; } int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) { struct starpu_sched_policy *sched_policy = starpu_sched_ctx_get_sched_policy(i); if (sched_policy && (strcmp(sched_policy->policy_name, _starpu_sched_graph_test_policy.policy_name) == 0)) { _STARPU_DISP("OpenMP support is not compatible with scheduler '%s' ('%s')\n", _starpu_sched_graph_test_policy.policy_name, _starpu_sched_graph_test_policy.policy_description); return -EINVAL; } } return 0; } void _starpu_omp_environment_exit(void) { free_omp_environment(); } #endif /* STARPU_OPENMP */ starpu-1.4.9+dfsg/src/util/openmp_runtime_support_omp_api.c000066400000000000000000000167651507764646700243000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef STARPU_OPENMP #include void starpu_omp_set_num_threads(int threads) { STARPU_ASSERT(threads > 0); struct starpu_omp_task *task = _starpu_omp_get_task(); STARPU_ASSERT(task != NULL); struct starpu_omp_region *region; region = task->owner_region; STARPU_ASSERT(region != NULL); region->icvs.nthreads_var[0] = threads; } int starpu_omp_get_num_threads() { struct starpu_omp_task *task = _starpu_omp_get_task(); struct starpu_omp_region *region; if (task == NULL) return 1; region = task->owner_region; return region->nb_threads; } int starpu_omp_get_thread_num() { struct starpu_omp_task *task = _starpu_omp_get_task(); if (task == NULL) return 0; return _starpu_omp_get_region_thread_num(task->owner_region); } int starpu_omp_get_max_threads() { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; int max_threads = parallel_region->icvs.nthreads_var[0]; /* TODO: for now, nested parallel sections are not supported, thus we * open an active parallel section only if the generating region is the * initial region */ if (parallel_region->level > 0) { max_threads = 1; } return max_threads; } int starpu_omp_get_num_procs(void) { /* starpu_cpu_worker_get_count defined as topology.nworkers[STARPU_CPU_WORKER] */ return starpu_cpu_worker_get_count(); } int starpu_omp_in_parallel(void) { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; return parallel_region->icvs.active_levels_var > 0; } void starpu_omp_set_dynamic(int dynamic_threads) { (void) dynamic_threads; /* TODO: dynamic adjustment of the number of threads is not supported for now */ } int starpu_omp_get_dynamic(void) { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; return parallel_region->icvs.dyn_var; } void starpu_omp_set_nested(int nested) { (void) nested; /* TODO: nested parallelism not supported for now */ } int starpu_omp_get_nested(void) { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; return parallel_region->icvs.nest_var; } int starpu_omp_get_cancellation(void) { return _starpu_omp_global_state->icvs.cancel_var; } void starpu_omp_set_schedule(enum starpu_omp_sched_value kind, int modifier) { struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; STARPU_ASSERT(kind == starpu_omp_sched_static || kind == starpu_omp_sched_dynamic || kind == starpu_omp_sched_guided || kind == starpu_omp_sched_auto); STARPU_ASSERT(modifier >= 0); parallel_region->icvs.run_sched_var = kind; parallel_region->icvs.run_sched_chunk_var = (unsigned long long)modifier; } void starpu_omp_get_schedule(enum starpu_omp_sched_value *kind, int *modifier) { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; *kind = parallel_region->icvs.run_sched_var; *modifier = (int)parallel_region->icvs.run_sched_chunk_var; } int starpu_omp_get_thread_limit(void) { return starpu_cpu_worker_get_count(); } void starpu_omp_set_max_active_levels(int max_levels) { struct starpu_omp_device * const device = _starpu_omp_get_task()->owner_region->owner_device; if (max_levels > 1) { /* TODO: nested parallelism not supported for now */ max_levels = 1; } device->icvs.max_active_levels_var = max_levels; } int starpu_omp_get_max_active_levels(void) { const struct starpu_omp_device * const device = _starpu_omp_get_task()->owner_region->owner_device; return device->icvs.max_active_levels_var; } int starpu_omp_get_level(void) { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; return parallel_region->icvs.levels_var; } int starpu_omp_get_ancestor_thread_num(int level) { struct starpu_omp_region *parallel_region; if (level == 0) return 0; parallel_region = _starpu_omp_get_region_at_level(level); if (!parallel_region) return -1; return _starpu_omp_get_region_thread_num(parallel_region); } int starpu_omp_get_team_size(int level) { struct starpu_omp_region *parallel_region; if (level == 0) return 1; parallel_region = _starpu_omp_get_region_at_level(level); if (!parallel_region) return -1; return parallel_region->nb_threads; } int starpu_omp_get_active_level(void) { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; return parallel_region->icvs.active_levels_var; } int starpu_omp_in_final(void) { const struct starpu_omp_task *task = _starpu_omp_get_task(); return task->flags & STARPU_OMP_TASK_FLAGS_FINAL; } enum starpu_omp_proc_bind_value starpu_omp_get_proc_bind(void) { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; int proc_bind = parallel_region->icvs.bind_var[0]; return proc_bind; } int starpu_omp_get_num_places(void) { struct starpu_omp_place *places = &_starpu_omp_initial_icv_values->places; return places->nb_numeric_places; } int starpu_omp_get_place_num_procs(int place_num) { (void) place_num; /* TODO */ return 0; } void starpu_omp_get_place_proc_ids(int place_num, int *ids) { (void) place_num; (void) ids; /* TODO */ } int starpu_omp_get_place_num(void) { /* TODO */ return -1; } int starpu_omp_get_partition_num_places(void) { /* TODO */ return 0; } void starpu_omp_get_partition_place_nums(int *place_nums) { (void) place_nums; /* TODO */ } void starpu_omp_set_default_device(int device_num) { (void) device_num; /* TODO: set_default_device not supported for now */ } int starpu_omp_get_default_device(void) { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; return parallel_region->icvs.default_device_var; } int starpu_omp_get_num_devices(void) { /* TODO: get_num_devices not supported for now * assume 1 device */ return 1; } int starpu_omp_get_num_teams(void) { /* TODO: num_teams not supported for now * assume 1 team */ return 1; } int starpu_omp_get_team_num(void) { /* TODO: team_num not supported for now * assume team_num 0 */ return 0; } int starpu_omp_is_initial_device(void) { struct starpu_omp_task *task = _starpu_omp_get_task(); if (!task) return 0; const struct starpu_omp_device * const device = task->owner_region->owner_device; return device == _starpu_omp_global_state->initial_device; } int starpu_omp_get_initial_device(void) { /* Assume only one device for now. */ return 0; } int starpu_omp_get_max_task_priority(void) { const struct starpu_omp_region * const parallel_region = _starpu_omp_get_task()->owner_region; return parallel_region->icvs.max_task_priority_var; } double starpu_omp_get_wtime(void) { return 1e-6 * (starpu_timing_now() - _starpu_omp_clock_ref); } double starpu_omp_get_wtick(void) { /* arbitrary precision value */ return 1e-6; } #endif /* STARPU_OPENMP */ starpu-1.4.9+dfsg/src/util/starpu_create_sync_task.c000066400000000000000000000033561507764646700226460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t *deps, void (*callback)(void *), void *callback_arg) { starpu_tag_declare_deps_array(sync_tag, ndeps, deps); /* We create an empty task */ struct starpu_task *sync_task = starpu_task_create(); sync_task->name = "create_sync_task"; sync_task->use_tag = 1; sync_task->tag_id = sync_tag; sync_task->callback_func = callback; sync_task->callback_arg = callback_arg; /* This task does nothing */ sync_task->cl = NULL; int sync_ret = _starpu_task_submit_internally(sync_task); STARPU_ASSERT(!sync_ret); } void starpu_create_callback_task(void (*callback)(void *), void *callback_arg) { /* We create an empty task */ struct starpu_task *empty_task = starpu_task_create(); empty_task->name = "empty_task"; empty_task->callback_func = callback; empty_task->callback_arg = callback_arg; /* This task does nothing */ empty_task->cl = NULL; int ret = _starpu_task_submit_internally(empty_task); STARPU_ASSERT(!ret); } starpu-1.4.9+dfsg/src/util/starpu_data_cpy.c000066400000000000000000000150711507764646700211060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include static void common_data_cpy_func(void *descr[], void *cl_arg) { unsigned interface_id = *(unsigned *)cl_arg; const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id); const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods; int workerid = starpu_worker_get_id_check(); enum starpu_worker_archtype type = starpu_worker_get_type(workerid); unsigned memory_node = starpu_worker_get_memory_node(workerid); void *dst_interface = descr[0]; void *src_interface = descr[1]; /* Driver porters: adding your driver here is very optional, any_to_any will be enough. */ switch (type) { case STARPU_CPU_WORKER: if (copy_methods->ram_to_ram) { copy_methods->ram_to_ram(src_interface, memory_node, dst_interface, memory_node); return; } break; #ifdef STARPU_USE_CUDA case STARPU_CUDA_WORKER: { cudaStream_t stream = starpu_cuda_get_local_stream(); if (copy_methods->cuda_to_cuda_async) { copy_methods->cuda_to_cuda_async(src_interface, memory_node, dst_interface, memory_node, stream); return; } else if (copy_methods->cuda_to_cuda) { copy_methods->cuda_to_cuda(src_interface, memory_node, dst_interface, memory_node); return; } break; } #endif #ifdef STARPU_USE_HIP case STARPU_HIP_WORKER: { hipStream_t stream = starpu_hip_get_local_stream(); if (copy_methods->hip_to_hip_async) { copy_methods->hip_to_hip_async(src_interface, memory_node, dst_interface, memory_node, stream); return; } else if (copy_methods->hip_to_hip) { copy_methods->hip_to_hip(src_interface, memory_node, dst_interface, memory_node); return; } break; } #endif case STARPU_OPENCL_WORKER: if (copy_methods->opencl_to_opencl) { copy_methods->opencl_to_opencl(src_interface, memory_node, dst_interface, memory_node); return; } break; default: /* unknown architecture */ STARPU_ABORT(); } STARPU_ASSERT(copy_methods->any_to_any); copy_methods->any_to_any(src_interface, memory_node, dst_interface, memory_node, NULL); } static struct starpu_perfmodel copy_model = { .type = STARPU_HISTORY_BASED, .symbol = "starpu_data_cpy" }; static struct starpu_codelet copy_cl = { .where = STARPU_CPU|STARPU_CUDA|STARPU_HIP|STARPU_OPENCL, .cpu_funcs = {common_data_cpy_func}, .cuda_funcs = {common_data_cpy_func}, .opencl_funcs = {common_data_cpy_func}, .hip_funcs = {common_data_cpy_func}, .nbuffers = 2, .modes = {STARPU_W, STARPU_R}, .model = ©_model }; int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg, int reduction, struct starpu_task *reduction_dep_task, int priority) { if (dst_handle == src_handle) { if (callback_func) callback_func(callback_arg); return 0; } struct starpu_task *task = starpu_task_create(); STARPU_ASSERT(task); task->name = "data_cpy"; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); if (reduction) { j->reduction_task = reduction; if (reduction_dep_task) starpu_task_declare_deps_array(task, 1, &reduction_dep_task); } task->cl = ©_cl; STARPU_ASSERT(dst_handle->ops->interfaceid == src_handle->ops->interfaceid); unsigned *interface_id; _STARPU_MALLOC(interface_id, sizeof(*interface_id)); *interface_id = dst_handle->ops->interfaceid; task->cl_arg = interface_id; task->cl_arg_size = sizeof(*interface_id); task->cl_arg_free = 1; task->priority = priority; task->callback_func = callback_func; task->callback_arg = callback_arg; /* FIXME: priority!! */ STARPU_TASK_SET_HANDLE(task, dst_handle, 0); STARPU_TASK_SET_HANDLE(task, src_handle, 1); task->synchronous = !asynchronous; int ret = _starpu_task_submit_internally(task); STARPU_ASSERT_MSG(ret != -ENODEV, "Implementation of _starpu_data_cpy is needed for this only available architecture\n"); STARPU_ASSERT_MSG(!ret, "Task data copy failed with code: %d\n", ret); return 0; } int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg) { return _starpu_data_cpy(dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL, STARPU_DEFAULT_PRIO); } int starpu_data_cpy_priority(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg, int priority) { return _starpu_data_cpy(dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL, priority); } /* TODO: implement copy on write, and introduce starpu_data_dup as well */ int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous) { _starpu_spin_lock(&src_handle->header_lock); if (src_handle->readonly_dup) { /* Already a ro duplicate, just return it with one more ref */ *dst_handle = src_handle->readonly_dup; _starpu_spin_unlock(&src_handle->header_lock); _starpu_spin_lock(&(*dst_handle)->header_lock); (*dst_handle)->aliases++; _starpu_spin_unlock(&(*dst_handle)->header_lock); return 0; } if (src_handle->readonly) { src_handle->aliases++; _starpu_spin_unlock(&src_handle->header_lock); *dst_handle = src_handle; return 0; } _starpu_spin_unlock(&src_handle->header_lock); starpu_data_register_same(dst_handle, src_handle); _starpu_data_cpy(*dst_handle, src_handle, asynchronous, NULL, NULL, 0, NULL, STARPU_DEFAULT_PRIO); (*dst_handle)->readonly = 1; _starpu_spin_lock(&src_handle->header_lock); src_handle->readonly_dup = (*dst_handle); (*dst_handle)->readonly_dup_of = src_handle; _starpu_spin_unlock(&src_handle->header_lock); return 0; } starpu-1.4.9+dfsg/src/util/starpu_data_cpy.h000066400000000000000000000021411507764646700211050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_DATA_CPY_H__ #define __STARPU_DATA_CPY_H__ /** @file */ #include #pragma GCC visibility push(hidden) int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg, int reduction, struct starpu_task *reduction_dep_task, int priority); #pragma GCC visibility pop #endif // __STARPU_DATA_CPY_H__ starpu-1.4.9+dfsg/src/util/starpu_task_insert.c000066400000000000000000000137361507764646700216560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This file provides an interface that is very similar to that of the Quark * scheduler from the PLASMA project (see http://icl.cs.utk.edu/plasma/). */ #include #include #include #include void starpu_codelet_pack_args(void **arg_buffer, size_t *arg_buffer_size, ...) { struct starpu_codelet_pack_arg_data state; va_list varg_list; int arg_type; starpu_codelet_pack_arg_init(&state); va_start(varg_list, arg_buffer_size); while((arg_type = va_arg(varg_list, int)) != 0) { if (arg_type==STARPU_VALUE) { /* We have a constant value: this should be followed by a pointer to the cst value and the size of the constant */ void *ptr = va_arg(varg_list, void *); size_t ptr_size = va_arg(varg_list, size_t); starpu_codelet_pack_arg(&state, ptr, ptr_size); } else { STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type); } } va_end(varg_list); starpu_codelet_pack_arg_fini(&state, arg_buffer, arg_buffer_size); } void _starpu_codelet_unpack_args_and_copyleft(char *cl_arg, void *_buffer, size_t buffer_size, va_list varg_list) { size_t current_arg_offset = 0; int nargs, arg; /* We fill the different pointers with the appropriate arguments */ memcpy(&nargs, cl_arg, sizeof(nargs)); current_arg_offset += sizeof(nargs); for (arg = 0; arg < nargs; arg++) { void *argptr = va_arg(varg_list, void *); /* If not reading all cl_args */ // NULL was the initial end marker, we now use 0 // 0 and NULL should be the same value, but we // keep both equalities for systems on which they could be different // cppcheck-suppress duplicateExpression if(argptr == 0 || argptr == NULL) break; size_t arg_size; memcpy(&arg_size, cl_arg+current_arg_offset, sizeof(arg_size)); current_arg_offset += sizeof(arg_size); memcpy(argptr, cl_arg+current_arg_offset, arg_size); current_arg_offset += arg_size; } if (buffer_size) { int left = nargs-arg; char *buffer = (char *) _buffer; int current_buffer_offset = 0; memcpy(buffer, (int *)&left, sizeof(left)); current_buffer_offset += sizeof(left); for (; arg < nargs; arg++) { size_t arg_size; memcpy(&arg_size, cl_arg+current_arg_offset, sizeof(arg_size)); current_arg_offset += sizeof(arg_size); memcpy(buffer+current_buffer_offset, &arg_size, sizeof(arg_size)); current_buffer_offset += sizeof(arg_size); memcpy(buffer+current_buffer_offset, cl_arg+current_arg_offset, arg_size); current_arg_offset += arg_size; current_buffer_offset += arg_size; } } } void starpu_codelet_unpack_args_and_copyleft(void *_cl_arg, void *buffer, size_t buffer_size, ...) { char *cl_arg = (char *) _cl_arg; va_list varg_list; STARPU_ASSERT(cl_arg); va_start(varg_list, buffer_size); _starpu_codelet_unpack_args_and_copyleft(cl_arg, buffer, buffer_size, varg_list); va_end(varg_list); } void starpu_codelet_unpack_args(void *_cl_arg, ...) { char *cl_arg = (char *) _cl_arg; va_list varg_list; STARPU_ASSERT(cl_arg); va_start(varg_list, _cl_arg); _starpu_codelet_unpack_args_and_copyleft(cl_arg, NULL, 0, varg_list); va_end(varg_list); } static struct starpu_task *_starpu_task_build_v(struct starpu_task *ptask, struct starpu_codelet *cl, const char* task_name, int cl_arg_free, va_list varg_list) { va_list varg_list_copy; int ret; struct starpu_task *task = ptask ? ptask : starpu_task_create(); task->name = task_name ? task_name : task->name; task->cl_arg_free = cl_arg_free; va_copy(varg_list_copy, varg_list); ret = _starpu_task_insert_create(cl, task, varg_list_copy); va_end(varg_list_copy); if (ret != 0) { task->destroy = 0; starpu_task_destroy(task); } return (ret == 0) ? task : NULL; } #undef starpu_task_submit int _starpu_task_insert_v(struct starpu_codelet *cl, va_list varg_list) { struct starpu_task *task; int ret; task = _starpu_task_build_v(NULL, cl, NULL, 1, varg_list); ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { _STARPU_MSG("submission of task %p with codelet %p failed (symbol `%s') (err: ENODEV)\n", task, task->cl, (cl == NULL) ? "none" : task->cl->name ? task->cl->name : (task->cl->model && task->cl->model->symbol)?task->cl->model->symbol:"none"); task->destroy = 0; starpu_task_destroy(task); } return ret; } #undef starpu_task_set int starpu_task_set(struct starpu_task *task, struct starpu_codelet *cl, ...) { va_list varg_list; va_start(varg_list, cl); _starpu_task_build_v(task, cl, NULL, 1, varg_list); va_end(varg_list); return 0; } #undef starpu_task_insert int starpu_task_insert(struct starpu_codelet *cl, ...) { va_list varg_list; int ret; va_start(varg_list, cl); ret = _starpu_task_insert_v(cl, varg_list); va_end(varg_list); return ret; } #undef starpu_insert_task int starpu_insert_task(struct starpu_codelet *cl, ...) { va_list varg_list; int ret; va_start(varg_list, cl); ret = _starpu_task_insert_v(cl, varg_list); va_end(varg_list); return ret; } #undef starpu_task_build struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...) { struct starpu_task *task; va_list varg_list; va_start(varg_list, cl); task = _starpu_task_build_v(NULL, cl, "task_build", 0, varg_list); if (task && task->cl_arg) { task->cl_arg_free = 1; } va_end(varg_list); return task; } starpu-1.4.9+dfsg/src/util/starpu_task_insert_utils.c000066400000000000000000000734351507764646700231000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include void starpu_codelet_pack_arg_init(struct starpu_codelet_pack_arg_data *state) { state->arg_buffer = NULL; state->arg_buffer_size = 0; state->arg_buffer_used = 0; state->current_offset = sizeof(int); state->nargs = 0; } void starpu_codelet_pack_arg(struct starpu_codelet_pack_arg_data *state, const void *ptr, size_t ptr_size) { STARPU_ASSERT_MSG(state->current_offset >= sizeof(int), "struct starpu_codelet_pack_arg has to be initialized with starpu_codelet_pack_arg_init"); if (state->current_offset + sizeof(ptr_size) + ptr_size > state->arg_buffer_size) { if (state->arg_buffer_size == 0) state->arg_buffer_size = 128 + sizeof(ptr_size) + ptr_size; else state->arg_buffer_size = 2 * state->arg_buffer_size + sizeof(ptr_size) + ptr_size; _STARPU_REALLOC(state->arg_buffer, state->arg_buffer_size); } memcpy(state->arg_buffer+state->current_offset, (void *)&ptr_size, sizeof(ptr_size)); state->current_offset += sizeof(ptr_size); memcpy(state->arg_buffer+state->current_offset, ptr, ptr_size); state->current_offset += ptr_size; STARPU_ASSERT(state->current_offset <= state->arg_buffer_size); state->arg_buffer_used = state->current_offset; state->nargs++; } void starpu_codelet_pack_arg_fini(struct starpu_codelet_pack_arg_data *state, void **cl_arg, size_t *cl_arg_size) { if (state->nargs) { memcpy(state->arg_buffer, &state->nargs, sizeof(state->nargs)); } else { free(state->arg_buffer); state->arg_buffer = NULL; } *cl_arg = state->arg_buffer; *cl_arg_size = state->arg_buffer_used; } void starpu_codelet_unpack_arg_init(struct starpu_codelet_pack_arg_data *state, void *cl_arg, size_t cl_arg_size) { state->arg_buffer = cl_arg; state->arg_buffer_size = cl_arg_size; state->arg_buffer_used = cl_arg_size; state->current_offset = sizeof(int); state->nargs = 0; } void starpu_codelet_unpack_arg(struct starpu_codelet_pack_arg_data *state, void *ptr, size_t size) { size_t ptr_size; STARPU_ASSERT_MSG(state->current_offset + sizeof(size) <= state->arg_buffer_size, "The unpack brings offset %ld beyond the buffer size (%ld)\n", state->current_offset, (long)state->arg_buffer_size); memcpy((void *)&ptr_size, state->arg_buffer+state->current_offset, sizeof(ptr_size)); STARPU_ASSERT_MSG(ptr_size==size, "The given size (%ld) is not the size of the next argument (%ld)\n", size, ptr_size); state->current_offset += sizeof(size); STARPU_ASSERT_MSG(state->current_offset + size <= state->arg_buffer_size, "The recorded size (%ld) brings beyond the buffer size (%ld)\n", (long)size, (long)state->arg_buffer_size); memcpy(ptr, state->arg_buffer+state->current_offset, ptr_size); state->current_offset += size; state->nargs++; } void starpu_codelet_dup_arg(struct starpu_codelet_pack_arg_data *state, void **ptr, size_t *size) { STARPU_ASSERT_MSG(state->current_offset + sizeof(*size) <= state->arg_buffer_size, "The unpack brings offset %ld beyond the buffer size (%ld)\n", state->current_offset, (long)state->arg_buffer_size); memcpy((void*)size, state->arg_buffer+state->current_offset, sizeof(*size)); state->current_offset += sizeof(*size); STARPU_ASSERT_MSG(state->current_offset + *size <= state->arg_buffer_size, "The recorded size (%ld) brings beyond the buffer size (%ld)\n", *size, (long)state->arg_buffer_size); _STARPU_MALLOC(*ptr, *size); memcpy(*ptr, state->arg_buffer+state->current_offset, *size); state->current_offset += *size; state->nargs++; } void starpu_codelet_pick_arg(struct starpu_codelet_pack_arg_data *state, void **ptr, size_t *size) { STARPU_ASSERT_MSG(state->current_offset + sizeof(*size) <= state->arg_buffer_size, "The unpack brings offset %ld beyond the buffer size (%ld)\n", state->current_offset, (long)state->arg_buffer_size); memcpy((void*)size, state->arg_buffer+state->current_offset, sizeof(*size)); state->current_offset += sizeof(*size); STARPU_ASSERT_MSG(state->current_offset + *size <= state->arg_buffer_size, "The recorded size (%ld) brings beyond the buffer size (%ld)\n", (long)(*size), (long)state->arg_buffer_size); *ptr = state->arg_buffer+state->current_offset; state->current_offset += *size; state->nargs++; } void starpu_codelet_unpack_arg_fini(struct starpu_codelet_pack_arg_data *state) { if (state->current_offset < state->arg_buffer_size) { _STARPU_MSG("Arguments still need to be unpacked from the starpu_codelet_pack_arg_data (offset %ld - buffer_size %ld)\n", state->current_offset, (long)state->arg_buffer_size); } } void starpu_codelet_unpack_discard_arg(struct starpu_codelet_pack_arg_data *state) { size_t ptr_size; memcpy((void *)&ptr_size, state->arg_buffer+state->current_offset, sizeof(ptr_size)); state->current_offset += sizeof(ptr_size); state->current_offset += ptr_size; state->nargs++; } void starpu_task_insert_data_make_room(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int current_buffer, int room) { if (current_buffer + room > STARPU_NMAXBUFS) { if (*allocated_buffers == 0) { int i; struct starpu_codelet *cl2 = task->cl; *allocated_buffers = (current_buffer + room) * 2; _STARPU_MALLOC(task->dyn_handles, *allocated_buffers * sizeof(starpu_data_handle_t)); for(i=0 ; idyn_handles[i] = task->handles[i]; } if (cl2->nbuffers == STARPU_VARIABLE_NBUFFERS || !cl2->dyn_modes) { _STARPU_MALLOC(task->dyn_modes, *allocated_buffers * sizeof(enum starpu_data_access_mode)); for(i=0 ; idyn_modes[i] = task->modes[i]; } } } else if (current_buffer + room > *allocated_buffers) { *allocated_buffers = (current_buffer + room) * 2; _STARPU_REALLOC(task->dyn_handles, *allocated_buffers * sizeof(starpu_data_handle_t)); if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS || !cl->dyn_modes) { _STARPU_REALLOC(task->dyn_modes, *allocated_buffers * sizeof(enum starpu_data_access_mode)); } } } } void starpu_task_insert_data_process_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int arg_type, starpu_data_handle_t handle) { STARPU_ASSERT(cl != NULL); STARPU_ASSERT_MSG(cl->nbuffers == STARPU_VARIABLE_NBUFFERS || *current_buffer < cl->nbuffers, "Too many data passed to starpu_task_insert"); starpu_task_insert_data_make_room(cl, task, allocated_buffers, *current_buffer, 1); STARPU_TASK_SET_HANDLE(task, handle, *current_buffer); enum starpu_data_access_mode arg_mode = (enum starpu_data_access_mode) arg_type & ~STARPU_SSEND & ~STARPU_NOFOOTPRINT; /* MPI_REDUX should be interpreted as RW|COMMUTE by the "ground" StarPU layer.*/ if (arg_mode & STARPU_MPI_REDUX) { arg_mode = STARPU_RW|STARPU_COMMUTE; } if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (cl->nbuffers > STARPU_NMAXBUFS && !cl->dyn_modes)) { STARPU_TASK_SET_MODE(task, arg_mode,* current_buffer); } else if (STARPU_CODELET_GET_MODE(cl, *current_buffer)) { STARPU_ASSERT_MSG((STARPU_CODELET_GET_MODE(cl, *current_buffer) & ~STARPU_NOFOOTPRINT) == arg_mode, "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n", _starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer), *current_buffer, arg_mode); } else { #ifdef STARPU_DEVEL # warning shall we print a warning to the user /* Morse uses it to avoid having to set it in the codelet structure */ #endif STARPU_CODELET_SET_MODE(cl, arg_mode, *current_buffer); } (*current_buffer)++; } void starpu_task_insert_data_process_array_arg(struct starpu_codelet *cl, struct starpu_task *task, int *allocated_buffers, int *current_buffer, int nb_handles, starpu_data_handle_t *handles) { STARPU_ASSERT(cl != NULL); starpu_task_insert_data_make_room(cl, task, allocated_buffers, *current_buffer, nb_handles); int i; for(i=0 ; inbuffers == STARPU_VARIABLE_NBUFFERS || *current_buffer < cl->nbuffers, "Too many data passed to starpu_task_insert"); STARPU_TASK_SET_HANDLE(task, descrs[i].handle, *current_buffer); if (task->dyn_modes) { task->dyn_modes[*current_buffer] = descrs[i].mode; } else if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS || (cl->nbuffers > STARPU_NMAXBUFS && !cl->dyn_modes)) STARPU_TASK_SET_MODE(task, descrs[i].mode, *current_buffer); else if (STARPU_CODELET_GET_MODE(cl, *current_buffer)) { STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(cl, *current_buffer) == descrs[i].mode, "The codelet <%s> defines the access mode %d for the buffer %d which is different from the mode %d given to starpu_task_insert\n", _starpu_codelet_get_name(cl), STARPU_CODELET_GET_MODE(cl, *current_buffer), *current_buffer, descrs[i].mode); } else { STARPU_CODELET_SET_MODE(cl, descrs[i].mode, *current_buffer); } (*current_buffer)++; } } int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *task, va_list varg_list) { int arg_type; int current_buffer; int allocated_buffers = 0; unsigned ndeps = 0; unsigned nend_deps = 0; struct starpu_task **task_deps_array = NULL; struct starpu_task **task_end_deps_array = NULL; _STARPU_TRACE_TASK_BUILD_START(); task->cl = cl; current_buffer = 0; struct starpu_codelet_pack_arg_data state; starpu_codelet_pack_arg_init(&state); while((arg_type = va_arg(varg_list, int)) != 0) { if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX || arg_type & STARPU_MPI_REDUX) { /* We have an access mode : we expect to find a handle */ starpu_data_handle_t handle = va_arg(varg_list, starpu_data_handle_t); starpu_task_insert_data_process_arg(cl, task, &allocated_buffers, ¤t_buffer, arg_type, handle); } else if (arg_type == STARPU_DATA_ARRAY) { // Expect to find a array of handles and its size starpu_data_handle_t *handles = va_arg(varg_list, starpu_data_handle_t *); int nb_handles = va_arg(varg_list, int); starpu_task_insert_data_process_array_arg(cl, task, &allocated_buffers, ¤t_buffer, nb_handles, handles); } else if (arg_type==STARPU_DATA_MODE_ARRAY) { // Expect to find a array of descr and its size struct starpu_data_descr *descrs = va_arg(varg_list, struct starpu_data_descr *); int nb_descrs = va_arg(varg_list, int); starpu_task_insert_data_process_mode_array_arg(cl, task, &allocated_buffers, ¤t_buffer, nb_descrs, descrs); } else if (arg_type==STARPU_VALUE) { void *ptr = va_arg(varg_list, void *); size_t ptr_size = va_arg(varg_list, size_t); starpu_codelet_pack_arg(&state, ptr, ptr_size); } else if (arg_type==STARPU_CL_ARGS) { task->cl_arg = va_arg(varg_list, void *); task->cl_arg_size = va_arg(varg_list, size_t); task->cl_arg_free = 1; } else if (arg_type==STARPU_CL_ARGS_NFREE) { task->cl_arg = va_arg(varg_list, void *); task->cl_arg_size = va_arg(varg_list, size_t); task->cl_arg_free = 0; } else if (arg_type==STARPU_TASK_DEPS_ARRAY) { STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' passed twice not supported yet"); ndeps = va_arg(varg_list, unsigned); task_deps_array = va_arg(varg_list, struct starpu_task **); } else if (arg_type==STARPU_TASK_END_DEPS_ARRAY) { STARPU_ASSERT_MSG(task_end_deps_array == NULL, "Parameter 'STARPU_TASK_END_DEPS_ARRAY' passed twice not supported yet"); nend_deps = va_arg(varg_list, unsigned); task_end_deps_array = va_arg(varg_list, struct starpu_task **); } else if (arg_type==STARPU_CALLBACK) { task->callback_func = va_arg(varg_list, _starpu_callback_func_t); } else if (arg_type==STARPU_CALLBACK_WITH_ARG) { task->callback_func = va_arg(varg_list, _starpu_callback_func_t); task->callback_arg = va_arg(varg_list, void *); task->callback_arg_free = 1; } else if (arg_type==STARPU_CALLBACK_WITH_ARG_NFREE) { task->callback_func = va_arg(varg_list, _starpu_callback_func_t); task->callback_arg = va_arg(varg_list, void *); task->callback_arg_free = 0; } else if (arg_type==STARPU_CALLBACK_ARG) { task->callback_arg = va_arg(varg_list, void *); task->callback_arg_free = 1; } else if (arg_type==STARPU_CALLBACK_ARG_NFREE) { task->callback_arg = va_arg(varg_list, void *); task->callback_arg_free = 0; } else if (arg_type==STARPU_EPILOGUE_CALLBACK) { task->epilogue_callback_func = va_arg(varg_list, _starpu_callback_func_t); } else if (arg_type==STARPU_EPILOGUE_CALLBACK_ARG) { task->epilogue_callback_arg = va_arg(varg_list, void *); task->epilogue_callback_arg_free = 1; } else if (arg_type==STARPU_PROLOGUE_CALLBACK) { task->prologue_callback_func = va_arg(varg_list, _starpu_callback_func_t); } else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG) { task->prologue_callback_arg = va_arg(varg_list, void *); task->prologue_callback_arg_free = 1; } else if (arg_type==STARPU_PROLOGUE_CALLBACK_ARG_NFREE) { task->prologue_callback_arg = va_arg(varg_list, void *); task->prologue_callback_arg_free = 0; } else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP) { task->prologue_callback_pop_func = va_arg(varg_list, _starpu_callback_func_t); } else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG) { task->prologue_callback_pop_arg = va_arg(varg_list, void *); task->prologue_callback_pop_arg_free = 1; } else if (arg_type==STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE) { task->prologue_callback_pop_arg = va_arg(varg_list, void *); task->prologue_callback_pop_arg_free = 0; } else if (arg_type==STARPU_PRIORITY) { /* Followed by a priority level */ int prio = va_arg(varg_list, int); task->priority = prio; } else if (arg_type==STARPU_EXECUTE_ON_NODE) { (void)va_arg(varg_list, int); } else if (arg_type==STARPU_EXECUTE_ON_DATA) { (void)va_arg(varg_list, starpu_data_handle_t); } else if (arg_type==STARPU_EXECUTE_WHERE) { task->where = va_arg(varg_list, unsigned long long); } else if (arg_type==STARPU_EXECUTE_ON_WORKER) { int worker = va_arg(varg_list, int); if (worker != -1) { task->workerid = worker; task->execute_on_a_specific_worker = 1; } } else if (arg_type==STARPU_WORKER_ORDER) { unsigned order = va_arg(varg_list, unsigned); if (order != 0) { STARPU_ASSERT_MSG(task->execute_on_a_specific_worker, "worker order only makes sense if a workerid is provided"); task->workerorder = order; } } else if (arg_type==STARPU_SCHED_CTX) { unsigned sched_ctx = va_arg(varg_list, unsigned); task->sched_ctx = sched_ctx; } else if (arg_type==STARPU_HYPERVISOR_TAG) { int hypervisor_tag = va_arg(varg_list, int); task->hypervisor_tag = hypervisor_tag; } else if (arg_type==STARPU_POSSIBLY_PARALLEL) { unsigned possibly_parallel = va_arg(varg_list, unsigned); task->possibly_parallel = possibly_parallel; } else if (arg_type==STARPU_FLOPS) { double flops = va_arg(varg_list, double); task->flops = flops; } else if (arg_type==STARPU_TAG) { starpu_tag_t tag = va_arg(varg_list, starpu_tag_t); task->tag_id = tag; task->use_tag = 1; } else if (arg_type==STARPU_TAG_ONLY) { starpu_tag_t tag = va_arg(varg_list, starpu_tag_t); task->tag_id = tag; } else if (arg_type==STARPU_NAME) { const char *name = va_arg(varg_list, const char *); task->name = name; } else if (arg_type==STARPU_NODE_SELECTION_POLICY) { (void)va_arg(varg_list, int); } else if (arg_type==STARPU_TASK_COLOR) { task->color = va_arg(varg_list, int); } else if (arg_type==STARPU_TASK_SYNCHRONOUS) { task->synchronous = va_arg(varg_list, int); } else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY) { task->handles_sequential_consistency = va_arg(varg_list, unsigned char *); } #ifdef STARPU_BUBBLE else if (arg_type==STARPU_BUBBLE_FUNC) { task->bubble_func = va_arg(varg_list, starpu_bubble_func_t); } else if (arg_type==STARPU_BUBBLE_FUNC_ARG) { task->bubble_func_arg = va_arg(varg_list, void*); } else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC) { task->bubble_gen_dag_func = va_arg(varg_list, starpu_bubble_gen_dag_func_t); } else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC_ARG) { task->bubble_gen_dag_func_arg = va_arg(varg_list,void*); } else if (arg_type==STARPU_BUBBLE_PARENT) { struct starpu_task *parent = va_arg(varg_list, struct starpu_task *); if (parent) { struct _starpu_job *job = _starpu_get_job_associated_to_task(parent); task->bubble_parent = job->job_id; } } #endif else if (arg_type==STARPU_TASK_END_DEP) { int end_dep = va_arg(varg_list, int); starpu_task_end_dep_add(task, end_dep); } else if (arg_type==STARPU_TASK_WORKERIDS) { task->workerids_len = va_arg(varg_list, unsigned); task->workerids = va_arg(varg_list, uint32_t*); } else if (arg_type==STARPU_SEQUENTIAL_CONSISTENCY) { task->sequential_consistency = va_arg(varg_list, unsigned); } else if (arg_type==STARPU_TASK_PROFILING_INFO) { task->profiling_info = va_arg(varg_list, struct starpu_profiling_task_info *); } else if (arg_type==STARPU_TASK_NO_SUBMITORDER) { task->no_submitorder = va_arg(varg_list, unsigned); } else if (arg_type==STARPU_TASK_SCHED_DATA) { task->sched_data = va_arg(varg_list, void *); } else if (arg_type==STARPU_TASK_FILE) { task->file = va_arg(varg_list, const char *); } else if (arg_type==STARPU_TASK_LINE) { task->line = va_arg(varg_list, int); } else if (arg_type==STARPU_TRANSACTION) { STARPU_ASSERT_MSG(task->transaction == NULL, "a transaction has already been set"); task->transaction = va_arg(varg_list, struct starpu_transaction *); } else { STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type); } } if (cl) { if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS) { task->nbuffers = current_buffer; } else { STARPU_ASSERT_MSG(current_buffer == cl->nbuffers, "Incoherent number of buffers between cl (%d) and number of parameters (%d)", cl->nbuffers, current_buffer); } } if (state.nargs) { if (task->cl_arg != NULL) { _STARPU_DISP("Parameters STARPU_CL_ARGS and STARPU_VALUE cannot be used in the same call\n"); free(state.arg_buffer); return -EINVAL; } starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); } if (task_deps_array) { starpu_task_declare_deps_array(task, ndeps, task_deps_array); } if (task_end_deps_array) { starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array); } _STARPU_TRACE_TASK_BUILD_END(); return 0; } int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *task, void **arglist) { int arg_i = 0; int current_buffer = 0; int allocated_buffers = 0; unsigned ndeps = 0; unsigned nend_deps = 0; struct starpu_task **task_deps_array = NULL; struct starpu_task **task_end_deps_array = NULL; _STARPU_TRACE_TASK_BUILD_START(); struct starpu_codelet_pack_arg_data state; starpu_codelet_pack_arg_init(&state); task->cl = cl; task->name = NULL; task->cl_arg_free = 1; while (arglist[arg_i] != NULL) { const int arg_type = (int)(intptr_t)arglist[arg_i]; if (arg_type & STARPU_R || arg_type & STARPU_W || arg_type & STARPU_SCRATCH || arg_type & STARPU_REDUX || arg_type & STARPU_MPI_REDUX) { arg_i++; starpu_data_handle_t handle = arglist[arg_i]; starpu_task_insert_data_process_arg(cl, task, &allocated_buffers, ¤t_buffer, arg_type, handle); } else if (arg_type == STARPU_DATA_ARRAY) { arg_i++; starpu_data_handle_t *handles = arglist[arg_i]; arg_i++; int nb_handles = *(int *)arglist[arg_i]; starpu_task_insert_data_process_array_arg(cl, task, &allocated_buffers, ¤t_buffer, nb_handles, handles); } else if (arg_type == STARPU_DATA_MODE_ARRAY) { arg_i++; struct starpu_data_descr *descrs = arglist[arg_i]; arg_i++; int nb_descrs = *(int *)arglist[arg_i]; starpu_task_insert_data_process_mode_array_arg(cl, task, &allocated_buffers, ¤t_buffer, nb_descrs, descrs); } else if (arg_type == STARPU_VALUE) { arg_i++; void *ptr = arglist[arg_i]; arg_i++; size_t ptr_size = (size_t)(intptr_t)arglist[arg_i]; starpu_codelet_pack_arg(&state, ptr, ptr_size); } else if (arg_type == STARPU_CL_ARGS) { arg_i++; task->cl_arg = arglist[arg_i]; arg_i++; task->cl_arg_size = (size_t)(intptr_t)arglist[arg_i]; task->cl_arg_free = 1; } else if (arg_type == STARPU_CL_ARGS_NFREE) { arg_i++; task->cl_arg = arglist[arg_i]; arg_i++; task->cl_arg_size = (size_t)(intptr_t)arglist[arg_i]; task->cl_arg_free = 0; } else if (arg_type==STARPU_TASK_DEPS_ARRAY) { STARPU_ASSERT_MSG(task_deps_array == NULL, "Parameter 'STARPU_TASK_DEPS_ARRAY' passed twice not supported yet"); arg_i++; ndeps = *(unsigned *)arglist[arg_i]; arg_i++; task_deps_array = arglist[arg_i]; } else if (arg_type==STARPU_TASK_END_DEPS_ARRAY) { STARPU_ASSERT_MSG(task_end_deps_array == NULL, "Parameter 'STARPU_TASK_END_DEPS_ARRAY' passed twice not supported yet"); arg_i++; nend_deps = *(unsigned *)arglist[arg_i]; arg_i++; task_end_deps_array = arglist[arg_i]; } else if (arg_type == STARPU_CALLBACK) { arg_i++; task->callback_func = (_starpu_callback_func_t)arglist[arg_i]; } else if (arg_type == STARPU_CALLBACK_WITH_ARG) { arg_i++; task->callback_func = (_starpu_callback_func_t)arglist[arg_i]; arg_i++; task->callback_arg = arglist[arg_i]; task->callback_arg_free = 1; } else if (arg_type == STARPU_CALLBACK_WITH_ARG_NFREE) { arg_i++; task->callback_func = (_starpu_callback_func_t)arglist[arg_i]; arg_i++; task->callback_arg = arglist[arg_i]; task->callback_arg_free = 0; } else if (arg_type == STARPU_CALLBACK_ARG) { arg_i++; task->callback_arg = arglist[arg_i]; task->callback_arg_free = 1; } else if (arg_type == STARPU_CALLBACK_ARG_NFREE) { arg_i++; task->callback_arg = arglist[arg_i]; task->callback_arg_free = 0; } else if (arg_type == STARPU_EPILOGUE_CALLBACK) { arg_i++; task->epilogue_callback_func = (_starpu_callback_func_t)arglist[arg_i]; } else if (arg_type == STARPU_EPILOGUE_CALLBACK_ARG) { arg_i++; task->epilogue_callback_arg = arglist[arg_i]; task->epilogue_callback_arg_free = 1; } else if (arg_type == STARPU_PROLOGUE_CALLBACK) { arg_i++; task->prologue_callback_func = (_starpu_callback_func_t)arglist[arg_i]; } else if (arg_type == STARPU_PROLOGUE_CALLBACK_ARG) { arg_i++; task->prologue_callback_arg = arglist[arg_i]; task->prologue_callback_arg_free = 1; } else if (arg_type == STARPU_PROLOGUE_CALLBACK_ARG_NFREE) { arg_i++; task->prologue_callback_arg = arglist[arg_i]; task->prologue_callback_arg_free = 0; } else if (arg_type == STARPU_PROLOGUE_CALLBACK_POP) { arg_i++; task->prologue_callback_pop_func = (_starpu_callback_func_t)arglist[arg_i]; } else if (arg_type == STARPU_PROLOGUE_CALLBACK_POP_ARG) { arg_i++; task->prologue_callback_pop_arg = arglist[arg_i]; task->prologue_callback_pop_arg_free = 1; } else if (arg_type == STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE) { arg_i++; task->prologue_callback_pop_arg = arglist[arg_i]; task->prologue_callback_pop_arg_free = 0; } else if (arg_type == STARPU_PRIORITY) { arg_i++; task->priority = *(int *)arglist[arg_i]; } else if (arg_type == STARPU_EXECUTE_ON_NODE) { arg_i++; (void)arglist[arg_i]; } else if (arg_type == STARPU_EXECUTE_ON_DATA) { arg_i++; (void)arglist[arg_i]; } else if (arg_type == STARPU_EXECUTE_WHERE) { arg_i++; int32_t where = (int32_t)(intptr_t)arglist[arg_i]; task->where = where; } else if (arg_type == STARPU_EXECUTE_ON_WORKER) { arg_i++; int worker = *(int *)arglist[arg_i]; if (worker != -1) { task->workerid = worker; task->execute_on_a_specific_worker = 1; } } else if (arg_type == STARPU_WORKER_ORDER) { arg_i++; unsigned order = *(unsigned *)arglist[arg_i]; if (order != 0) { STARPU_ASSERT_MSG(task->execute_on_a_specific_worker, "worker order only makes sense if a workerid is provided"); task->workerorder = order; } } else if (arg_type == STARPU_SCHED_CTX) { arg_i++; task->sched_ctx = *(unsigned *)arglist[arg_i]; } else if (arg_type == STARPU_HYPERVISOR_TAG) { arg_i++; task->hypervisor_tag = *(int *)arglist[arg_i]; } else if (arg_type == STARPU_POSSIBLY_PARALLEL) { arg_i++; task->possibly_parallel = *(unsigned *)arglist[arg_i]; } else if (arg_type == STARPU_FLOPS) { arg_i++; task->flops = *(double *)arglist[arg_i]; } else if (arg_type == STARPU_TAG) { arg_i++; task->tag_id = *(starpu_tag_t *)arglist[arg_i]; task->use_tag = 1; } else if (arg_type == STARPU_TAG_ONLY) { arg_i++; task->tag_id = *(starpu_tag_t *)arglist[arg_i]; } else if (arg_type == STARPU_NAME) { arg_i++; task->name = arglist[arg_i]; } else if (arg_type == STARPU_NODE_SELECTION_POLICY) { arg_i++; (void)arglist[arg_i]; } else if (arg_type == STARPU_TASK_COLOR) { arg_i++; task->color = *(int *)arglist[arg_i]; } else if (arg_type == STARPU_TASK_SYNCHRONOUS) { arg_i++; task->synchronous = *(int *)arglist[arg_i]; } else if (arg_type==STARPU_HANDLES_SEQUENTIAL_CONSISTENCY) { task->handles_sequential_consistency = (unsigned char *)arglist[arg_i]; } #ifdef STARPU_BUBBLE else if (arg_type==STARPU_BUBBLE_FUNC) { arg_i++; task->bubble_func = (starpu_bubble_func_t)arglist[arg_i]; } else if (arg_type==STARPU_BUBBLE_FUNC_ARG) { arg_i++; task->bubble_func_arg = (void *)arglist[arg_i]; } else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC) { arg_i++; task->bubble_gen_dag_func = (starpu_bubble_gen_dag_func_t)arglist[arg_i]; } else if (arg_type==STARPU_BUBBLE_GEN_DAG_FUNC_ARG) { arg_i++; task->bubble_gen_dag_func_arg = (void*)arglist[arg_i]; } else if (arg_type==STARPU_BUBBLE_PARENT) { arg_i++; struct starpu_task *parent = (struct starpu_task *)arglist[arg_i]; struct _starpu_job *job = _starpu_get_job_associated_to_task(parent); task->bubble_parent = job->job_id; } #endif else if (arg_type==STARPU_TASK_END_DEP) { arg_i++; starpu_task_end_dep_add(task, *(int*)arglist[arg_i]); } else if (arg_type==STARPU_TASK_WORKERIDS) { arg_i++; task->workerids_len = *(unsigned *)arglist[arg_i]; arg_i++; task->workerids = (uint32_t *)arglist[arg_i]; } else if (arg_type==STARPU_SEQUENTIAL_CONSISTENCY) { arg_i++; task->sequential_consistency = *(unsigned *)arglist[arg_i]; } else if (arg_type==STARPU_TASK_PROFILING_INFO) { arg_i++; task->profiling_info = (struct starpu_profiling_task_info *)arglist[arg_i]; } else if (arg_type==STARPU_TASK_NO_SUBMITORDER) { arg_i++; task->no_submitorder = *(unsigned *)arglist[arg_i]; } else if (arg_type == STARPU_TASK_SCHED_DATA) { arg_i++; task->sched_data = (void*)arglist[arg_i]; } else if (arg_type == STARPU_TASK_FILE) { arg_i++; task->file = arglist[arg_i]; } else if (arg_type == STARPU_TASK_LINE) { arg_i++; task->line = *(int *)arglist[arg_i]; } else if (arg_type==STARPU_TRANSACTION) { STARPU_ASSERT_MSG(task->transaction == NULL, "a transaction has already been set"); arg_i++; task->transaction = arglist[arg_i]; } else { STARPU_ABORT_MSG("unknown/unsupported argument %d, did you perhaps forget to end arguments with 0?", arg_type); } arg_i++; } if (cl) { if (cl->nbuffers == STARPU_VARIABLE_NBUFFERS) { task->nbuffers = current_buffer; } else { STARPU_ASSERT_MSG(current_buffer == cl->nbuffers, "Incoherent number of buffers between cl (%d) and number of parameters (%d)", cl->nbuffers, current_buffer); } } if (state.nargs) { if (task->cl_arg != NULL) { _STARPU_DISP("Parameters STARPU_CL_ARGS and STARPU_VALUE cannot be used in the same call\n"); free(state.arg_buffer); return -EINVAL; } starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); } if (task_deps_array) { starpu_task_declare_deps_array(task, ndeps, task_deps_array); } if (task_end_deps_array) { starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array); } _STARPU_TRACE_TASK_BUILD_END(); return 0; } /* Fortran interface to task_insert */ #undef starpu_task_submit void fstarpu_task_insert(void **arglist) { struct starpu_codelet *cl = arglist[0]; if (cl == NULL) { STARPU_ABORT_MSG("task without codelet"); } struct starpu_task *task = starpu_task_create(); int ret = _fstarpu_task_insert_create(cl, task, arglist+1); if (ret != 0) { STARPU_ABORT_MSG("task creation failed"); } ret = starpu_task_submit(task); if (ret != 0) { STARPU_ABORT_MSG("starpu_task_submit failed"); } } /* fstarpu_insert_task: aliased to fstarpu_task_insert in fstarpu_mod.f90 */ starpu-1.4.9+dfsg/src/util/starpu_task_insert_utils.h000066400000000000000000000023751507764646700231000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPU_TASK_INSERT_UTILS_H__ #define __STARPU_TASK_INSERT_UTILS_H__ /** @file */ #include #include #include #pragma GCC visibility push(hidden) typedef void (*_starpu_callback_func_t)(void *); int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *task, va_list varg_list) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *task, void **arglist) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; #pragma GCC visibility pop #endif // __STARPU_TASK_INSERT_UTILS_H__ starpu-1.4.9+dfsg/src/worker_collection/000077500000000000000000000000001507764646700203235ustar00rootroot00000000000000starpu-1.4.9+dfsg/src/worker_collection/worker_list.c000066400000000000000000000174051507764646700230420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "core/workers.h" static unsigned list_has_next_unblocked_worker(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { int nworkers = workers->nunblocked_workers; STARPU_ASSERT(it != NULL); unsigned ret = it->cursor < nworkers ; if(!ret) it->cursor = 0; return ret; } static int list_get_next_unblocked_worker(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { int *workerids = (int *)workers->unblocked_workers; int nworkers = (int)workers->nunblocked_workers; STARPU_ASSERT(it->cursor < nworkers); int ret = workerids[it->cursor++]; return ret; } static unsigned list_has_next_master(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { int nworkers = workers->nmasters; STARPU_ASSERT(it != NULL); unsigned ret = it->cursor < nworkers ; if(!ret) it->cursor = 0; return ret; } static int list_get_next_master(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { int *workerids = (int *)workers->masters; int nworkers = (int)workers->nmasters; STARPU_ASSERT_MSG(it->cursor < nworkers, "cursor %d nworkers %d\n", it->cursor, nworkers); int ret = workerids[it->cursor++]; return ret; } static unsigned list_has_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { if(it->possibly_parallel == 1) return list_has_next_master(workers, it); else if(it->possibly_parallel == 0) return list_has_next_unblocked_worker(workers, it); int nworkers = workers->nworkers; STARPU_ASSERT(it != NULL); unsigned ret = it->cursor < nworkers ; if(!ret) it->cursor = 0; return ret; } static int list_get_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { if(it->possibly_parallel == 1) return list_get_next_master(workers, it); else if(it->possibly_parallel == 0) return list_get_next_unblocked_worker(workers, it); int *workerids = (int *)workers->workerids; int nworkers = (int)workers->nworkers; STARPU_ASSERT(it->cursor < nworkers); int ret = workerids[it->cursor++]; return ret; } static unsigned _worker_belongs_to_ctx(struct starpu_worker_collection *workers, int workerid) { int *workerids = (int *)workers->workerids; unsigned nworkers = workers->nworkers; unsigned i; for(i = 0; i < nworkers; i++) { if(workerids[i] == workerid) return 1; } return 0; } static int list_add(struct starpu_worker_collection *workers, int worker) { int *workerids = (int *)workers->workerids; unsigned *nworkers = &workers->nworkers; STARPU_ASSERT(*nworkers < (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS)); if(!_worker_belongs_to_ctx(workers, worker)) { workerids[(*nworkers)++] = worker; return worker; } else return -1; } static int _get_first_free_worker(int *workerids, int nworkers) { int i; for(i = 0; i < nworkers; i++) if(workerids[i] == -1) return i; return -1; } /* rearange array of workerids in order not to have {-1, -1, 5, -1, 7} * and have instead {5, 7, -1, -1, -1} * it is easier afterwards to iterate the array */ static void _rearange_workerids(int *workerids, int old_nworkers) { int first_free_id = -1; int i; for(i = 0; i < old_nworkers; i++) { if(workerids[i] != -1) { first_free_id = _get_first_free_worker(workerids, old_nworkers); if(first_free_id != -1) { workerids[first_free_id] = workerids[i]; workerids[i] = -1; } } } } static int list_remove(struct starpu_worker_collection *workers, int worker) { int *workerids = (int *)workers->workerids; unsigned nworkers = workers->nworkers; int *unblocked_workers = (int *)workers->unblocked_workers; unsigned nunblocked_workers = workers->nunblocked_workers; int *masters = (int *)workers->masters; unsigned nmasters = workers->nmasters; unsigned i; int found_worker = -1; for(i = 0; i < nworkers; i++) { if(workerids[i] == worker) { workerids[i] = -1; found_worker = worker; break; } } _rearange_workerids(workerids, nworkers); if(found_worker != -1) workers->nworkers--; int found_unblocked = -1; for(i = 0; i < nunblocked_workers; i++) { if(unblocked_workers[i] == worker) { unblocked_workers[i] = -1; found_unblocked = worker; break; } } _rearange_workerids(unblocked_workers, nunblocked_workers); if(found_unblocked != -1) workers->nunblocked_workers--; int found_master = -1; for(i = 0; i < nmasters; i++) { if(masters[i] == worker) { masters[i] = -1; found_master = worker; break; } } _rearange_workerids(masters, nmasters); if(found_master != -1) workers->nmasters--; return found_worker; } static void _init_workers(int *workerids) { unsigned i; unsigned nworkers = starpu_worker_get_count(); for(i = 0; i < nworkers; i++) workerids[i] = -1; return; } static void list_init(struct starpu_worker_collection *workers) { int *workerids; int *unblocked_workers; int *masters; _STARPU_MALLOC(workerids, (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS) * sizeof(int)); _STARPU_MALLOC(unblocked_workers, (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS) * sizeof(int)); _STARPU_MALLOC(masters, (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS) * sizeof(int)); _init_workers(workerids); _init_workers(unblocked_workers); _init_workers(masters); workers->workerids = (void*)workerids; workers->nworkers = 0; workers->unblocked_workers = (void*)unblocked_workers; workers->nunblocked_workers = 0; workers->masters = (void*)masters; workers->nmasters = 0; return; } static void list_deinit(struct starpu_worker_collection *workers) { free(workers->workerids); free(workers->unblocked_workers); free(workers->masters); } static void list_init_iterator(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { (void) workers; it->cursor = 0; it->possibly_parallel = -1; /* -1 => we don't care about this field */ } static void list_init_iterator_for_parallel_tasks(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task) { list_init_iterator(workers, it); if (_starpu_get_nsched_ctxs() <= 1) return; it->possibly_parallel = task->possibly_parallel; /* 0/1 => this field indicates if we consider masters only or slaves not blocked too */ int *workerids = (int *)workers->workerids; unsigned nworkers = workers->nworkers; unsigned i; int nm = 0, nub = 0; for(i = 0; i < nworkers; i++) { if(!starpu_worker_is_blocked_in_parallel(workerids[i])) { ((int*)workers->unblocked_workers)[nub++] = workerids[i]; if(!it->possibly_parallel) /* don't bother filling the table with masters we won't use it anyway */ continue; if(!starpu_worker_is_slave_somewhere(workerids[i])) ((int*)workers->masters)[nm++] = workerids[i]; } } workers->nmasters = nm; workers->nunblocked_workers = nub; } struct starpu_worker_collection starpu_worker_list = { .has_next = list_has_next, .get_next = list_get_next, .add = list_add, .remove = list_remove, .init = list_init, .deinit = list_deinit, .init_iterator = list_init_iterator, .init_iterator_for_parallel_tasks = list_init_iterator_for_parallel_tasks, .type = STARPU_WORKER_LIST }; starpu-1.4.9+dfsg/src/worker_collection/worker_tree.c000066400000000000000000000234271507764646700230270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #ifdef STARPU_HAVE_HWLOC #include #include "core/workers.h" static unsigned tree_has_next_unblocked_worker(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { STARPU_ASSERT(it != NULL); if(workers->nworkers == 0) return 0; struct starpu_tree *tree = (struct starpu_tree*)workers->collection_private; struct starpu_tree *neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->present); if(!neighbour) { starpu_tree_reset_visited(tree, it->visited); it->value = NULL; it->possible_value = NULL; return 0; } int id = -1; int *workerids; int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); int w; for(w = 0; w < nworkers; w++) { if(!it->visited[workerids[w]] && workers->present[workerids[w]]) { if(workers->is_unblocked[workerids[w]]) { id = workerids[w]; it->possible_value = neighbour; break; } else { it->visited[workerids[w]] = 1; it->value = neighbour; return tree_has_next_unblocked_worker(workers, it); } } } STARPU_ASSERT_MSG(id != -1, "bind id (%d) for workerid (%d) not correct", neighbour->id, id); return 1; } static int tree_get_next_unblocked_worker(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { int ret = -1; struct starpu_tree *tree = (struct starpu_tree *)workers->collection_private; struct starpu_tree *neighbour = NULL; if(it->possible_value) { neighbour = it->possible_value; it->possible_value = NULL; } else neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->present); STARPU_ASSERT_MSG(neighbour, "no element anymore"); int *workerids; int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); int w; for(w = 0; w < nworkers; w++) { if(!it->visited[workerids[w]] && workers->present[workerids[w]] && workers->is_unblocked[workerids[w]]) { ret = workerids[w]; it->visited[workerids[w]] = 1; it->value = neighbour; break; } } STARPU_ASSERT_MSG(ret != -1, "bind id not correct"); return ret; } static unsigned tree_has_next_master(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { STARPU_ASSERT(it != NULL); if(workers->nworkers == 0) return 0; struct starpu_tree *tree = (struct starpu_tree*)workers->collection_private; struct starpu_tree *neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->is_master); if(!neighbour) { starpu_tree_reset_visited(tree, it->visited); it->value = NULL; it->possible_value = NULL; return 0; } int id = -1; int *workerids; int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); int w; for(w = 0; w < nworkers; w++) { if(!it->visited[workerids[w]] && workers->is_master[workerids[w]]) { id = workerids[w]; it->possible_value = neighbour; break; } } STARPU_ASSERT_MSG(id != -1, "bind id (%d) for workerid (%d) not correct", neighbour->id, id); return 1; } static int tree_get_next_master(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { int ret = -1; struct starpu_tree *tree = (struct starpu_tree *)workers->collection_private; struct starpu_tree *neighbour = NULL; if(it->possible_value) { neighbour = it->possible_value; it->possible_value = NULL; } else neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->is_master); STARPU_ASSERT_MSG(neighbour, "no element anymore"); int *workerids; int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); int w; for(w = 0; w < nworkers; w++) { if(!it->visited[workerids[w]] && workers->is_master[workerids[w]]) { ret = workerids[w]; it->visited[workerids[w]] = 1; it->value = neighbour; break; } } STARPU_ASSERT_MSG(ret != -1, "bind id not correct"); return ret; } static unsigned tree_has_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { if(it->possibly_parallel == 1) return tree_has_next_master(workers, it); else if(it->possibly_parallel == 0) return tree_has_next_unblocked_worker(workers, it); STARPU_ASSERT(it != NULL); if(workers->nworkers == 0) return 0; struct starpu_tree *tree = (struct starpu_tree*)workers->collection_private; int *workerids; int nworkers; int w; if (it->value) { struct starpu_tree *node = it->value; /* Are there workers left to be processed in the current node? */ nworkers = starpu_bindid_get_workerids(node->id, &workerids); for(w = 0; w < nworkers; w++) { if(!it->visited[workerids[w]] && workers->present[workerids[w]]) { /* Still some! */ it->possible_value = node; return 1; } } } struct starpu_tree *neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->present); if(!neighbour) { starpu_tree_reset_visited(tree, it->visited); it->value = NULL; it->possible_value = NULL; return 0; } int id = -1; nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); for(w = 0; w < nworkers; w++) { if(!it->visited[workerids[w]] && workers->present[workerids[w]]) { id = workerids[w]; it->possible_value = neighbour; break; } } STARPU_ASSERT_MSG(id != -1, "bind id (%d) for workerid (%d) not correct", neighbour->id, id); return 1; } static int tree_get_next(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { if(it->possibly_parallel == 1) return tree_get_next_master(workers, it); else if(it->possibly_parallel == 0) return tree_get_next_unblocked_worker(workers, it); int ret = -1; struct starpu_tree *tree = (struct starpu_tree *)workers->collection_private; struct starpu_tree *neighbour = NULL; if(it->possible_value) { neighbour = it->possible_value; it->possible_value = NULL; } else neighbour = starpu_tree_get_neighbour(tree, (struct starpu_tree*)it->value, it->visited, workers->present); STARPU_ASSERT_MSG(neighbour, "no element anymore"); int *workerids; int nworkers = starpu_bindid_get_workerids(neighbour->id, &workerids); int w; for(w = 0; w < nworkers; w++) { if(!it->visited[workerids[w]] && workers->present[workerids[w]]) { ret = workerids[w]; it->visited[workerids[w]] = 1; it->value = neighbour; break; } } STARPU_ASSERT_MSG(ret != -1, "bind id not correct"); return ret; } static int tree_add(struct starpu_worker_collection *workers, int worker) { if(!workers->present[worker]) { workers->present[worker] = 1; workers->workerids[workers->nworkers] = worker; workers->nworkers++; return worker; } else return -1; } static int tree_remove(struct starpu_worker_collection *workers, int worker) { if(workers->present[worker]) { unsigned i; for (i = 0; i < workers->nworkers; i++) if (workers->workerids[i] == worker) { memmove(&workers->workerids[i], &workers->workerids[i+1], (workers->nworkers-1-i) * sizeof(workers->workerids[i])); break; } workers->present[worker] = 0; workers->is_unblocked[worker] = 0; workers->is_master[worker] = 0; workers->nworkers--; return worker; } else return -1; } static void tree_init(struct starpu_worker_collection *workers) { _STARPU_MALLOC(workers->workerids, (STARPU_NMAXWORKERS+STARPU_NMAX_COMBINEDWORKERS) * sizeof(int)); workers->collection_private = (void*)starpu_workers_get_tree(); workers->nworkers = 0; int i; int nworkers = starpu_worker_get_count(); for(i = 0; i < nworkers; i++) { workers->workerids[i] = -1; workers->present[i] = 0; workers->is_unblocked[i] = 0; workers->is_master[i] = 0; } return; } static void tree_deinit(struct starpu_worker_collection *workers) { (void) workers; free(workers->workerids); } static void tree_init_iterator(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it) { (void) workers; it->value = NULL; it->possible_value = NULL; it->possibly_parallel = -1; int nworkers = starpu_worker_get_count(); memset(&it->visited, 0, nworkers * sizeof(it->visited[0])); } static void tree_init_iterator_for_parallel_tasks(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it, struct starpu_task *task) { if (_starpu_get_nsched_ctxs() <= 1) { tree_init_iterator(workers, it); return; } tree_init_iterator(workers, it); it->possibly_parallel = task->possibly_parallel; int i; int nworkers = starpu_worker_get_count(); for(i = 0; i < nworkers; i++) { workers->is_unblocked[i] = (workers->present[i] && !starpu_worker_is_blocked_in_parallel(i)); if(!it->possibly_parallel) /* don't bother filling the table with masters we won't use it anyway */ continue; workers->is_master[i] = (workers->present[i] && !starpu_worker_is_blocked_in_parallel(i) && !starpu_worker_is_slave_somewhere(i)); } } struct starpu_worker_collection starpu_worker_tree = { .has_next = tree_has_next, .get_next = tree_get_next, .add = tree_add, .remove = tree_remove, .init = tree_init, .deinit = tree_deinit, .init_iterator = tree_init_iterator, .init_iterator_for_parallel_tasks = tree_init_iterator_for_parallel_tasks, .type = STARPU_WORKER_TREE }; #endif// STARPU_HAVE_HWLOC starpu-1.4.9+dfsg/starpu_openmp_llvm/000077500000000000000000000000001507764646700177365ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpu_openmp_llvm/Makefile.am000066400000000000000000000013471507764646700217770ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-subdirtests.mk SUBDIRS=src examples starpu-1.4.9+dfsg/starpu_openmp_llvm/Makefile.in000066400000000000000000000663071507764646700220170ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = starpu_openmp_llvm ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-subdirtests.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = src examples all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpu_openmp_llvm/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpu_openmp_llvm/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am tags tags-am uninstall uninstall-am .PRECIOUS: Makefile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpu_openmp_llvm/examples/000077500000000000000000000000001507764646700215545ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpu_openmp_llvm/examples/Makefile.am000066400000000000000000000027701507764646700236160ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk LAUNCHER_ENV += OMP_NUM_THREADS=4 TESTS = $(STARPU_OPENMP_LLVM_EXAMPLES) STARPU_OPENMP_LLVM_EXAMPLES = # we want to compile the application just with clang --fopenmp CC = $(PROG_CLANG) AM_CPPFLAGS = AM_CFLAGS += -Wall -g -fopenmp # ideally, we should create a link libomp.so.5 to # libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.so but because it # is libtool we use LDADD LDADD = $(top_builddir)/starpu_openmp_llvm/src/libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la examplebindir = $(libdir)/starpu/examples/starpu_openmp_llvm examplebin_PROGRAMS = $(STARPU_OPENMP_LLVM_EXAMPLES) check_PROGRAMS = $(LOADER) $(STARPU_OPENMP_LLVM_EXAMPLES) STARPU_OPENMP_LLVM_EXAMPLES += hello-task exampledir = $(libdir)/starpu/examples/starpu_openmp_llvm example_DATA = README hello-task.c EXTRA_DIST = README starpu-1.4.9+dfsg/starpu_openmp_llvm/examples/Makefile.in000066400000000000000000001442121507764646700236250ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ TESTS = $(am__EXEEXT_1) examplebin_PROGRAMS = $(am__EXEEXT_1) check_PROGRAMS = $(am__EXEEXT_1) subdir = starpu_openmp_llvm/examples ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__EXEEXT_1 = hello-task$(EXEEXT) am__installdirs = "$(DESTDIR)$(examplebindir)" \ "$(DESTDIR)$(exampledir)" PROGRAMS = $(examplebin_PROGRAMS) hello_task_SOURCES = hello-task.c hello_task_OBJECTS = hello-task.$(OBJEXT) hello_task_LDADD = $(LDADD) hello_task_DEPENDENCIES = $(top_builddir)/starpu_openmp_llvm/src/libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/hello-task.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = hello-task.c DIST_SOURCES = hello-task.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } DATA = $(example_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) AM_RECURSIVE_TARGETS = check recheck TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk README DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ # we want to compile the application just with clang --fopenmp CC = $(PROG_CLANG) CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = OMP_NUM_THREADS=4 LAUNCHER = AM_CFLAGS = $(GLOBAL_AM_CFLAGS) -Wall -g -fopenmp AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile STARPU_OPENMP_LLVM_EXAMPLES = hello-task AM_CPPFLAGS = # ideally, we should create a link libomp.so.5 to # libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.so but because it # is libtool we use LDADD LDADD = $(top_builddir)/starpu_openmp_llvm/src/libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la examplebindir = $(libdir)/starpu/examples/starpu_openmp_llvm exampledir = $(libdir)/starpu/examples/starpu_openmp_llvm example_DATA = README hello-task.c EXTRA_DIST = README all: all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpu_openmp_llvm/examples/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpu_openmp_llvm/examples/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list hello-task$(EXEEXT): $(hello_task_OBJECTS) $(hello_task_DEPENDENCIES) $(EXTRA_hello_task_DEPENDENCIES) @rm -f hello-task$(EXEEXT) $(AM_V_CCLD)$(LINK) $(hello_task_OBJECTS) $(hello_task_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hello-task.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-exampleDATA: $(example_DATA) @$(NORMAL_INSTALL) @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(exampledir)'"; \ $(MKDIR_P) "$(DESTDIR)$(exampledir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(exampledir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(exampledir)" || exit $$?; \ done uninstall-exampleDATA: @$(NORMAL_UNINSTALL) @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(exampledir)'; $(am__uninstall_files_from_dir) ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? hello-task.log: hello-task$(EXEEXT) @p='hello-task$(EXEEXT)'; \ b='hello-task'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-am all-am: Makefile $(PROGRAMS) $(DATA) installdirs: for dir in "$(DESTDIR)$(examplebindir)" "$(DESTDIR)$(exampledir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool mostlyclean-am distclean: distclean-am -rm -f ./$(DEPDIR)/hello-task.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-exampleDATA install-examplebinPROGRAMS install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/hello-task.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-exampleDATA uninstall-examplebinPROGRAMS .MAKE: check-am install-am install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ clean-generic clean-libtool cscopelist-am ctags ctags-am \ distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exampleDATA \ install-examplebinPROGRAMS install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-exampleDATA \ uninstall-examplebinPROGRAMS .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpu_openmp_llvm/examples/README000066400000000000000000000026471507764646700224450ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # To compile and execute this application outside StarPU : $ clang -fopenmp ./hello-task.c $ ldd ./a.out ... libomp.so.5 => /usr/lib/x86_64-linux-gnu/libomp.so.5 (0x00007fbf3d42d000) ... $ ./a.out Hello from 0 Hey 0 Hey 1 Hey 2 Hey 3 Hey there array: 1, 1, 1, 1, To execute the application using the StarPU OpenMP LLVM support, one just needs it to create a symbolic link named libomp.so.5 to the StarPU OpenMP LLVM library, e.g $ mkdir libs $ ln -s $STARPU_ROOT/lib/libstarpu_openmp_llvm-1.3.so libs/libomp.so.5 $ LD_LIBRARY_PATH=./libs:$LD_LIBRARY_PATH ./a.out [starpu][__kmp_constructor] Initialising the StarPU OpenMP LLVM Support Hello from 0 [starpu][__kmpc_omp_task_with_deps] Using the StarPU OpenMP LLVM Support Hey 0 Hey 1 Hey 2 Hey 3 Hey there array: 1, 1, 1, 1, starpu-1.4.9+dfsg/starpu_openmp_llvm/examples/hello-task.c000066400000000000000000000030311507764646700237600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include int array[] = {1, 2, 3, 4}; #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) int main() { int res=0; #pragma omp parallel #pragma omp master { FPRINTF(stderr, "Hello from %i\n", omp_get_thread_num()); #pragma omp task { sleep(2); FPRINTF(stderr, "Hey there\n"); } for (int i = 0; i < 4; i++) { #pragma omp task depend(in: array[i]) depend(inout: array[(i+1)%4]) { array[(i+1)%4] = array[i]; FPRINTF(stderr, "Hey %i\n", i); } } } FPRINTF(stderr, "array: "); for (int i = 0; i < 4; i++) { FPRINTF(stderr, "%i, ", array[i]); if (array[i] != 1) { FPRINTF(stderr, "\n"); FPRINTF(stderr, "Incorrect value. Should be 1\n"); res = 1; } } FPRINTF(stderr, "\n"); return res; } starpu-1.4.9+dfsg/starpu_openmp_llvm/src/000077500000000000000000000000001507764646700205255ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpu_openmp_llvm/src/Makefile.am000066400000000000000000000034061507764646700225640ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CFLAGS += $(FXT_CFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(FXT_LDFLAGS) $(FXT_LIBS) libstarpu_openmp_llvm_so_version = $(LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT):$(LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION):$(LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE) lib_LTLIBRARIES = libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = $(AM_CPPFLAGS) libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(AM_CFLAGS) libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info $(libstarpu_openmp_llvm_so_version) libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(AM_LIBADD) $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(HWLOC_LIBS) libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ openmp_runtime_support_llvm.c starpu-1.4.9+dfsg/starpu_openmp_llvm/src/Makefile.in000066400000000000000000001203621507764646700225760ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = starpu_openmp_llvm/src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(libdir)" LTLIBRARIES = $(lib_LTLIBRARIES) am__DEPENDENCIES_1 = libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES = \ $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la \ $(am__DEPENDENCIES_1) am_libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = $(am_libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) \ $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) \ $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS) \ $(CFLAGS) \ $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ $(LDFLAGS) -o $@ AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) DIST_SOURCES = $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) $(FXT_LDFLAGS) $(FXT_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(FXT_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/src/ -I$(top_builddir)/src -I$(top_builddir)/include $(STARPU_H_CPPFLAGS) libstarpu_openmp_llvm_so_version = $(LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT):$(LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION):$(LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE) lib_LTLIBRARIES = libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = $(AM_CPPFLAGS) libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(AM_CFLAGS) libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info $(libstarpu_openmp_llvm_so_version) libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(AM_LIBADD) $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(HWLOC_LIBS) libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ openmp_runtime_support_llvm.c all: all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpu_openmp_llvm/src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpu_openmp_llvm/src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; \ locs=`for p in $$list; do echo $$p; done | \ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ sort -u`; \ test -z "$$locs" || { \ echo rm -f $${locs}; \ rm -f $${locs}; \ } libstarpu_openmp_llvm-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(AM_V_CCLD)$(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo: openmp_runtime_support_llvm.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS) $(CPPFLAGS) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS) $(CFLAGS) -MT libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo -MD -MP -MF $(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Tpo -c -o libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo `test -f 'openmp_runtime_support_llvm.c' || echo '$(srcdir)/'`openmp_runtime_support_llvm.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Tpo $(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='openmp_runtime_support_llvm.c' object='libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS) $(CPPFLAGS) $(libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS) $(CFLAGS) -c -o libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.lo `test -f 'openmp_runtime_support_llvm.c' || echo '$(srcdir)/'`openmp_runtime_support_llvm.c mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(LTLIBRARIES) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-libLTLIBRARIES install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/libstarpu_openmp_llvm_@STARPU_EFFECTIVE_VERSION@_la-openmp_runtime_support_llvm.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-libLTLIBRARIES .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-libLTLIBRARIES install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ uninstall-libLTLIBRARIES .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpu_openmp_llvm/src/openmp_runtime_support_llvm.c000066400000000000000000000575701507764646700265760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #ifdef STARPU_OPENMP_LLVM #include #include #include #include #include #include #include typedef struct ident ident_t; typedef int32_t kmp_int32; typedef void * kmp_intptr_t; typedef void(* kmpc_micro) (kmp_int32 *global_tid, kmp_int32 *bound_tid,...); typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32 gtid, void *kmp_task); typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in : 1; bool out : 1; } flags; size_t elem_size; } kmp_depend_info_t; typedef union kmp_cmplrdata { kmp_int32 priority; /**< priority specified by user for the task */ kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */ /* future data */ } kmp_cmplrdata_t; // the LLVM support was first implemented with a compiler supporting variants, however as most compilers do not enable variants, we disable the feature // by default, variants are not enabled, it is not possible to enable them with configure.ac as we do not want users to enable it by mistake #ifdef _STARPU_OPENMP_LLVM_VARIANT typedef void *(*kmp_variant_entry_t)(void *, ...); typedef enum kmp_variant_kind { VARIANT_CPU, VARIANT_OPENCL, VARIANT_CUDA } kmp_variant_kind_t; typedef struct kmp_variant { kmp_variant_entry_t fn; kmp_variant_kind_t kind; } kmp_variant_t; #endif typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ void *shareds; /**< pointer to block of pointers to shared vars */ kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ kmp_int32 part_id; /**< part id for the task */ kmp_cmplrdata_t data1; /* Two known optional additions: destructors and priority */ kmp_cmplrdata_t data2; /* Process destructors first, priority second */ /* future data */ #ifdef _STARPU_OPENMP_LLVM_VARIANT kmp_variant_t *variants; kmp_int32 nvariants; #endif } kmp_task_t; struct s_microtask_wrap { int argc; void **arg_ptrs; kmpc_micro microtask; }; enum sched_type /* : kmp_int32 */ { kmp_sch_lower = 32, kmp_sch_static_chunked = 33, kmp_sch_static = 34, kmp_sch_dynamic_chunked = 35, kmp_sch_guided_chunked = 36, kmp_sch_runtime = 37, kmp_sch_auto = 38, kmp_sch_trapezoidal = 39, kmp_sch_static_greedy = 40, kmp_sch_static_balanced = 41, kmp_sch_guided_iterative_chunked = 42, kmp_sch_guided_analytical_chunked = 43, kmp_sch_static_steal = 44, kmp_sch_static_balanced_chunked = 45, kmp_sch_guided_simd = 46, kmp_sch_runtime_simd = 47, kmp_sch_upper, kmp_ord_lower = 64, kmp_ord_static_chunked = 65, kmp_ord_static = 66, kmp_ord_dynamic_chunked = 67, kmp_ord_guided_chunked = 68, kmp_ord_runtime = 69, kmp_ord_auto = 70, kmp_ord_trapezoidal = 71, kmp_ord_upper, kmp_distribute_static_chunked = 91, kmp_distribute_static = 92, kmp_nm_lower = 160, kmp_nm_static_chunked = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), kmp_nm_static = 162, kmp_nm_dynamic_chunked = 163, kmp_nm_guided_chunked = 164, kmp_nm_runtime = 165, kmp_nm_auto = 166, kmp_nm_trapezoidal = 167, kmp_nm_static_greedy = 168, kmp_nm_static_balanced = 169, kmp_nm_guided_iterative_chunked = 170, kmp_nm_guided_analytical_chunked = 171, kmp_nm_static_steal = 172, kmp_nm_ord_static_chunked = 193, kmp_nm_ord_static = 194, kmp_nm_ord_dynamic_chunked = 195, kmp_nm_ord_guided_chunked = 196, kmp_nm_ord_runtime = 197, kmp_nm_ord_auto = 198, kmp_nm_ord_trapezoidal = 199, kmp_nm_upper, kmp_sch_modifier_monotonic = (1 << 29), kmp_sch_modifier_nonmonotonic = (1 << 30), kmp_sch_default = kmp_sch_static }; typedef kmp_int32 kmp_critical_name[8]; kmp_int32 __kmpc_global_thread_num(ident_t *loc); kmp_int32 __kmpc_global_num_threads(ident_t *loc); kmp_int32 __kmpc_bound_thread_num(ident_t *loc); kmp_int32 __kmpc_bound_num_threads(ident_t *loc); static void parallel_call(void *buffers[], void *args) { (void) buffers; int gtid=__kmpc_global_thread_num(NULL); int ltid=__kmpc_bound_thread_num(NULL); void **arg_ptrs = args; kmpc_micro microtask = *arg_ptrs++; kmp_int32 argc = (intptr_t)*arg_ptrs++; switch (argc) { case 0: microtask(>id, <id); break; case 1: microtask(>id, <id, arg_ptrs[0]); break; case 2: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1]); break; case 3: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2]); break; case 4: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3]); break; case 5: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4]); break; case 6: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5]); break; case 7: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6]); break; case 8: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7]); break; case 9: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8]); break; case 10: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9]); break; case 11: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10]); break; case 12: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10], arg_ptrs[11]); break; case 13: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10], arg_ptrs[11], arg_ptrs[12]); break; case 14: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10], arg_ptrs[11], arg_ptrs[12], arg_ptrs[13]); break; case 15: microtask(>id, <id, arg_ptrs[0], arg_ptrs[1], arg_ptrs[2], arg_ptrs[3], arg_ptrs[4], arg_ptrs[5], arg_ptrs[6], arg_ptrs[7], arg_ptrs[8], arg_ptrs[9], arg_ptrs[10], arg_ptrs[11], arg_ptrs[12], arg_ptrs[13], arg_ptrs[14]); break; default: assert(0); } } /* Deprecated Functions */ kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { (void) loc; return !0; } /* Startup and Shutdown */ void __kmpc_begin(ident_t *loc, kmp_int32 flags) { (void) loc; (void) flags; /* TODO: add auto-init in other lib funcs if kmpc_begin is not called */ starpu_omp_init(); } void __kmpc_end(ident_t *loc) { (void) loc; /* TODO: add support for KMP_IGNORE_MPPEND */ starpu_omp_shutdown(); } /* Parallel (fork/join) */ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads) { (void) loc; (void) global_tid; (void) num_threads; abort(); } void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { (void) loc; va_list vargs; va_start(vargs, microtask); void *arg_ptrs[2+argc]; arg_ptrs[0] = microtask; arg_ptrs[1] = (void*)(intptr_t)argc; int i; for (i=0; icl.model = &starpu_perfmodel_nop; attr->cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; #endif attr->cl.cpu_funcs[0] = parallel_call; attr->cl.where = STARPU_CPU; attr->cl_arg_size = (argc+2)*sizeof(void *); attr->cl_arg_free = 0; attr->cl_arg = arg_ptrs; attr->if_clause = 1; starpu_omp_parallel_region(attr); free((void *)attr); va_end(vargs); } static void task_call(void *buffers[], void *args) { (void) buffers; int gtid=__kmpc_global_thread_num(NULL); void **arg_ptrs = args; kmp_task_t *task = *arg_ptrs++; /*typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32 gtid, void *kmp_task);*/ task->routine(gtid, task); } kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t task_entry) { (void) loc_ref; (void) gtid; (void) flags; // The initial content of kmp_task_t is: // - void *shared // - kmp_routine_entry_t routine // - kmp_int32 part_id // But the compiler may need more fields, hence it passes a "sizeof_kmp_task_t" that we should honor. kmp_task_t *task; /* FIXME: avoid double malloc by allocating shared+task_t at once */ /* FIXME: free the things somewhere*/ _STARPU_MALLOC(task, sizeof_kmp_task_t); void *shared; _STARPU_MALLOC(shared, sizeof_shareds); task->shareds = shared; task->routine = task_entry; task->part_id = 0; #ifdef _STARPU_OPENMP_LLVM_VARIANT task->variants = 0; task->nvariants = 0; #endif return task; } #define GETDEP(task, i) starpu_data_handle_to_pointer(task->starpu_task->handles[i], STARPU_MAIN_RAM) #define GET(i) (void*)STARPU_VARIABLE_GET_PTR(buffers[i]) static void task_call_variants(void (*fn)(void*, ...), void *buffers[], void *args) { void **arg_ptrs = args; intptr_t nargs = (intptr_t) arg_ptrs[1]; // TODO: asm it, as we could do it nicely in a loop switch (nargs) { case 0: fn(0); break; case 1: fn(GET(0)); break; case 2: fn(GET(0), GET(1)); break; case 3: fn(GET(0), GET(1), GET(2)); break; case 4: fn(GET(0), GET(1), GET(2), GET(3)); break; case 5: fn(GET(0), GET(1), GET(2), GET(3), GET(4)); break; case 6: fn(GET(0), GET(1), GET(2), GET(3), GET(4), GET(5)); break; default: fprintf(stderr, "Unsupported number of dependencies/arguments in task call.\n"); abort(); break; } } #undef GETDEP #ifdef _STARPU_OPENMP_LLVM_VARIANT static void task_call_cpu(void *buffers[], void *args) { void **arg_ptrs = args; task_call_variants((void (*)(void *, ...))arg_ptrs[2], buffers, args); } static void task_call_cuda(void *buffers[], void *args) { void **arg_ptrs = args; task_call_variants((void (*)(void *, ...))arg_ptrs[3], buffers, args); } #endif /*TODO: wrapper void *(buffers[], nbuffer) { push push call }*/ kmp_task_t *__kmpc_omp_task_alloc_variants(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int32 nvariants STARPU_ATTRIBUTE_UNUSED) { kmp_task_t *task = __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t, sizeof_shareds, task_entry); #ifdef _STARPU_OPENMP_LLVM_VARIANT task->nvariants = nvariants; _STARPU_MALLOC(task->variants, nvariants * sizeof(kmp_variant_t)); #endif return task; } kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { (void) loc_ref; (void) gtid; starpu_omp_taskwait(); return 0; } kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) { (void) loc_ref; (void) gtid; /* NOTE: for some reason, just having a static struct and passing its address * triggered a segfault in the starpu_omp_task_region. * */ static int _msg=0; if (_msg == 0) { _STARPU_MSG("Using the StarPU OpenMP LLVM Support\n"); _msg = 1; } struct starpu_omp_task_region_attr *attr = calloc(1, sizeof(struct starpu_omp_task_region_attr)); /* This is freed in starpu_omp_task_region, as attr.cl_arg_free is set to true*/ void **arg_ptrs = calloc(4, sizeof(void*)); arg_ptrs[0] = new_task; arg_ptrs[1] = (void*) (intptr_t) (ndeps + ndeps_noalias); #ifdef _STARPU_OPENMP_LLVM_VARIANT if (new_task->nvariants == 0) #endif { attr->cl.cpu_funcs[0] = task_call; attr->cl.where = STARPU_CPU; } #ifdef _STARPU_OPENMP_LLVM_VARIANT else { for (int i = 0; i < new_task->nvariants; ++i) { switch(new_task->variants[i].kind) { case VARIANT_CPU: attr->cl.where |= STARPU_CPU; attr->cl.cpu_funcs[0] = task_call_cpu; arg_ptrs[2] = new_task->variants[i].fn; break; case VARIANT_CUDA: attr->cl.where |= STARPU_CUDA; attr->cl.cuda_funcs[0] = task_call_cuda; arg_ptrs[3] = new_task->variants[i].fn; break; case VARIANT_OPENCL: fprintf(stderr, "variant for opencl detected but not supported: %p, ignoring.\n", new_task->variants[i].fn); break; } } } #endif attr->cl_arg_size = (4)*sizeof(void *); attr->cl_arg_free = 1; attr->cl_arg = arg_ptrs; attr->if_clause = 1; attr->final_clause = 0; attr->untied_clause = 1; attr->mergeable_clause = 0; attr->cl.nbuffers = ndeps + ndeps_noalias; starpu_data_handle_t *handles = calloc(attr->cl.nbuffers, sizeof(starpu_data_handle_t)); int current_buffer = 0; starpu_data_handle_t current_handler = 0; for (int i = 0; i < ndeps; i++) { if (dep_list[i].flags.in && dep_list[i].flags.out) { attr->cl.modes[current_buffer] = STARPU_RW; } else if (dep_list[i].flags.in) { attr->cl.modes[current_buffer] = STARPU_R; } else { attr->cl.modes[current_buffer] = STARPU_W; } current_handler = starpu_omp_data_lookup(dep_list[i].base_addr); if (current_handler) { handles[current_buffer] = current_handler; } else { if (dep_list[i].len == 1) { starpu_variable_data_register(&handles[current_buffer], STARPU_MAIN_RAM, (uintptr_t)dep_list[i].base_addr, sizeof(kmp_intptr_t)); starpu_omp_handle_register(handles[current_buffer]); } else { starpu_vector_data_register(&handles[current_buffer], STARPU_MAIN_RAM, (uintptr_t)dep_list[i].base_addr, dep_list[i].len, dep_list[i].elem_size); starpu_omp_handle_register(handles[current_buffer]); } } current_buffer++; } for (int i = 0; i < ndeps_noalias; i++) { if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) { attr->cl.modes[current_buffer] = STARPU_RW; } else if (noalias_dep_list[i].flags.in) { attr->cl.modes[current_buffer] = STARPU_R; } else { attr->cl.modes[current_buffer] = STARPU_W; } current_handler = starpu_omp_data_lookup(noalias_dep_list[i].base_addr); if (current_handler) { handles[current_buffer] = current_handler; } else { if (dep_list[i].len == 1) { starpu_variable_data_register(&handles[current_buffer], STARPU_MAIN_RAM, (uintptr_t)dep_list[i].base_addr, sizeof(kmp_intptr_t)); starpu_omp_handle_register(handles[current_buffer]); } else { starpu_vector_data_register(&handles[current_buffer], STARPU_MAIN_RAM, (uintptr_t)dep_list[i].base_addr, dep_list[i].len, dep_list[i].elem_size); starpu_omp_handle_register(handles[current_buffer]); } } current_buffer++; } if (current_buffer) { // If we have any deps attr->handles = &handles[0]; } // thoughts : create starpu_omp_task_region_attr here, fill it with kmp_taskdata // keep an arg to the wrapper with the kmp_task_t starpu_omp_task_region(attr); free(attr); return 0; } kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task) { int retval = __kmpc_omp_task_with_deps(loc_ref, gtid, new_task, 0, 0, 0, 0); return retval; } void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads) { (void) loc; (void) global_tid; (void) num_teams; (void) num_threads; abort(); } void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { (void) loc; (void) argc; (void) microtask; abort(); } void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; abort(); } void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; abort(); } /* Thread Information */ kmp_int32 __kmpc_global_thread_num(ident_t *loc) { (void) loc; struct starpu_omp_region *region; region = _starpu_omp_get_region_at_level(1); if (region == NULL) return 0; return _starpu_omp_get_region_thread_num(region); } kmp_int32 __kmpc_global_num_threads(ident_t *loc) { (void) loc; struct starpu_omp_region *region; region = _starpu_omp_get_region_at_level(1); if (region == NULL) return 1; return region->nb_threads; } kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { (void) loc; return starpu_omp_get_thread_num(); } kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { (void) loc; return starpu_omp_get_num_threads(); } kmp_int32 __kmpc_in_parallel(ident_t *loc) { (void) loc; return starpu_omp_in_parallel(); } /* Work sharing */ kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; return starpu_omp_master_inline(); } void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; /* nothing */ } void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; starpu_omp_ordered_inline_begin(); } void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; starpu_omp_ordered_inline_end(); } kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; return starpu_omp_single_inline(); } void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; /* nothing */ } void __kmpc_dispatch_init_4(ident_t *loc) { (void) loc; abort(); } void __kmpc_dispatch_next_4(ident_t *loc) { (void) loc; abort(); } /* Work sharing */ void __kmpc_flush(ident_t *loc) { (void) loc; abort(); } void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; starpu_omp_barrier(); } kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; abort(); } void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; abort(); } kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { (void) loc; (void) global_tid; abort(); } void __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck) { (void) loc; (void) global_tid; (void) num_vars; (void) reduce_size; (void) reduce_data; (void) reduce_func; (void) lck; abort(); } void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck) { (void) loc; (void) global_tid; (void) lck; abort(); } void __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck) { (void) loc; (void) global_tid; (void) num_vars; (void) reduce_size; (void) reduce_data; (void) reduce_func; (void) lck; abort(); } void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck) { (void) loc; (void) global_tid; (void) lck; abort(); } /* lib constructor/destructor */ __attribute__((constructor)) static void __kmp_constructor(void) { static int _msg=0; if (_msg == 0) { _STARPU_MSG("Initialising the StarPU OpenMP LLVM Support\n"); _msg = 1; } int ret = starpu_omp_init(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void kmp_destructor(void) { starpu_omp_shutdown(); } /* omp lib API */ void omp_set_num_threads(int threads) { starpu_omp_set_num_threads(threads); } int omp_get_num_threads() { return starpu_omp_get_num_threads(); } int omp_get_thread_num() { return starpu_omp_get_thread_num(); } int omp_get_max_threads() { return starpu_omp_get_max_threads(); } int omp_get_num_procs(void) { return starpu_omp_get_num_procs(); } int omp_in_parallel(void) { return starpu_omp_in_parallel(); } void omp_set_dynamic(int dynamic_threads) { starpu_omp_set_dynamic(dynamic_threads); } int omp_get_dynamic(void) { return starpu_omp_get_dynamic(); } void omp_set_nested(int nested) { starpu_omp_set_nested(nested); } int omp_get_nested(void) { return starpu_omp_get_nested(); } int omp_get_cancellation(void) { return starpu_omp_get_cancellation(); } void omp_set_schedule(enum omp_sched_value kind, int modifier) { starpu_omp_set_schedule(kind, modifier); } void omp_get_schedule(enum omp_sched_value *kind, int *modifier) { starpu_omp_get_schedule((enum starpu_omp_sched_value*)kind, modifier); } int omp_get_thread_limit(void) { return starpu_omp_get_thread_limit(); } void omp_set_max_active_levels(int max_levels) { starpu_omp_set_max_active_levels(max_levels); } int omp_get_max_active_levels(void) { return starpu_omp_get_max_active_levels(); } int omp_get_level(void) { return starpu_omp_get_level(); } int omp_get_ancestor_thread_num(int level) { return starpu_omp_get_ancestor_thread_num(level); } int omp_get_team_size(int level) { return starpu_omp_get_team_size(level); } int omp_get_active_level(void) { return starpu_omp_get_active_level(); } int omp_in_final(void) { return starpu_omp_in_final(); } enum omp_proc_bind_value omp_get_proc_bind(void) { return starpu_omp_get_proc_bind(); } int omp_get_num_places(void) { return starpu_omp_get_num_places(); } int omp_get_place_num_procs(int place_num) { return starpu_omp_get_place_num_procs(place_num); } void omp_get_place_proc_ids(int place_num, int *ids) { starpu_omp_get_place_proc_ids(place_num, ids); } int omp_get_place_num(void) { return starpu_omp_get_place_num(); } int omp_get_partition_num_places(void) { return starpu_omp_get_partition_num_places(); } void omp_get_partition_place_nums(int *place_nums) { starpu_omp_get_partition_place_nums(place_nums); } void omp_set_default_device(int device_num) { starpu_omp_set_default_device(device_num); } int omp_get_default_device(void) { return starpu_omp_get_default_device(); } int omp_get_num_devices(void) { return starpu_omp_get_num_devices(); } int omp_get_num_teams(void) { return starpu_omp_get_num_teams(); } int omp_get_team_num(void) { return starpu_omp_get_team_num(); } int omp_is_initial_device(void) { return starpu_omp_is_initial_device(); } int omp_get_initial_device(void) { return starpu_omp_get_initial_device(); } int omp_get_max_task_priority(void) { return starpu_omp_get_max_task_priority(); } void omp_init_lock(omp_lock_t *lock) { starpu_omp_init_lock(lock); } void omp_destroy_lock(omp_lock_t *lock) { starpu_omp_destroy_lock(lock); } void omp_set_lock(omp_lock_t *lock) { starpu_omp_set_lock(lock); } void omp_unset_lock(omp_lock_t *lock) { starpu_omp_unset_lock(lock); } int omp_test_lock(omp_lock_t *lock) { return starpu_omp_test_lock(lock); } void omp_init_nest_lock(omp_nest_lock_t *lock) { starpu_omp_init_nest_lock(lock); } void omp_destroy_nest_lock(omp_nest_lock_t *lock) { starpu_omp_destroy_nest_lock(lock); } void omp_set_nest_lock(omp_nest_lock_t *lock) { starpu_omp_set_nest_lock(lock); } void omp_unset_nest_lock(omp_nest_lock_t *lock) { starpu_omp_unset_nest_lock(lock); } int omp_test_nest_lock(omp_nest_lock_t *lock) { return starpu_omp_test_nest_lock(lock); } double omp_get_wtime(void) { return starpu_omp_get_wtime(); } double omp_get_wtick(void) { return starpu_omp_get_wtick(); } void *omp_get_local_cuda_stream(void) { #ifdef STARPU_USE_CUDA return starpu_cuda_get_local_stream(); #else return 0; #endif } #endif /* STARPU_OPENMP_LLVM */ starpu-1.4.9+dfsg/starpufft/000077500000000000000000000000001507764646700160265ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpufft/Makefile.am000066400000000000000000000021701507764646700200620ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-subdirtests.mk SUBDIRS=src if STARPU_BUILD_STARPUFFT_EXAMPLES if STARPU_BUILD_TESTS SUBDIRS += tests endif endif versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = \ include/starpufft.h pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = packages/libstarpufft.pc packages/starpufft-1.0.pc packages/starpufft-1.1.pc packages/starpufft-1.2.pc packages/starpufft-1.3.pc packages/starpufft-1.4.pc starpu-1.4.9+dfsg/starpufft/Makefile.in000066400000000000000000000755761507764646700201170ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_STARPUFFT_EXAMPLES_TRUE@@STARPU_BUILD_TESTS_TRUE@am__append_1 = tests subdir = starpufft ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(versinclude_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgconfigdir)" \ "$(DESTDIR)$(versincludedir)" DATA = $(pkgconfig_DATA) HEADERS = $(versinclude_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = src tests am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-subdirtests.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # SUBDIRS = src $(am__append_1) versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = \ include/starpufft.h pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = packages/libstarpufft.pc packages/starpufft-1.0.pc packages/starpufft-1.1.pc packages/starpufft-1.2.pc packages/starpufft-1.3.pc packages/starpufft-1.4.pc all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpufft/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpufft/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgconfigDATA: $(pkgconfig_DATA) @$(NORMAL_INSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ done uninstall-pkgconfigDATA: @$(NORMAL_UNINSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) install-versincludeHEADERS: $(versinclude_HEADERS) @$(NORMAL_INSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ done uninstall-versincludeHEADERS: @$(NORMAL_UNINSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(DATA) $(HEADERS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(versincludedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-pkgconfigDATA install-versincludeHEADERS install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-pkgconfigDATA uninstall-versincludeHEADERS .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-pkgconfigDATA install-ps \ install-ps-am install-strip install-versincludeHEADERS \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags tags-am uninstall uninstall-am uninstall-pkgconfigDATA \ uninstall-versincludeHEADERS .PRECIOUS: Makefile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpufft/include/000077500000000000000000000000001507764646700174515ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpufft/include/starpufft.h000066400000000000000000000106611507764646700216440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ // The documentation for this file is in doc/doxygen/chapters/api/fft_support.doxy #ifndef __STARPU_FFT_H__ #define __STARPU_FFT_H__ #include #include #include #ifdef STARPU_USE_CUDA #include #define STARPU_CUFFT_REPORT_ERROR(status) STARPUFFT(report_error) (__starpu_func__, __FILE__, __LINE__, status) #endif /* !STARPU_USE_CUDA */ #define STARPUFFT_FORWARD -1 #define STARPUFFT_INVERSE 1 #define __STARPUFFT(name) starpufft_##name #define __STARPUFFTF(name) starpufftf_##name #define __STARPUFFTL(name) starpufftl_##name #define __STARPUFFT_INTERFACE(starpufft, real) \ typedef real _Complex starpufft(complex); \ \ typedef struct starpufft(plan) * starpufft(plan); \ \ starpufft(plan) starpufft(plan_dft_1d)(int n, int sign, unsigned flags); \ starpufft(plan) starpufft(plan_dft_2d)(int n, int m, int sign, unsigned flags); \ starpufft(plan) starpufft(plan_dft_3d)(int n, int m, int p, int sign, unsigned flags); \ starpufft(plan) starpufft(plan_dft_r2c_1d)(int n, unsigned flags); \ starpufft(plan) starpufft(plan_dft_c2r_1d)(int n, unsigned flags); \ \ void *starpufft(malloc)(size_t n); \ void starpufft(free)(void *p, size_t dim); \ \ int starpufft(execute)(starpufft(plan) p, void *in, void *out); \ struct starpu_task *starpufft(start)(starpufft(plan) p, void *in, void *out); \ \ int starpufft(execute_handle)(starpufft(plan) p, starpu_data_handle_t in, starpu_data_handle_t out); \ struct starpu_task *starpufft(start_handle)(starpufft(plan) p, starpu_data_handle_t in, starpu_data_handle_t out); \ \ void starpufft(cleanup)(starpufft(plan) p); \ void starpufft(destroy_plan)(starpufft(plan) p); \ \ void starpufft(startstats)(void); \ void starpufft(stopstats)(void); \ void starpufft(showstats)(FILE * out); __STARPUFFT_INTERFACE(__STARPUFFT, double) __STARPUFFT_INTERFACE(__STARPUFFTF, float) __STARPUFFT_INTERFACE(__STARPUFFTL, long double) /* Internal use */ extern int starpufft_last_plan_number; #endif // __STARPU_FFT_H__ starpu-1.4.9+dfsg/starpufft/packages/000077500000000000000000000000001507764646700176045ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpufft/packages/libstarpufft.pc.in000066400000000000000000000021301507764646700232360ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpufft Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ starpu-1.4.9+dfsg/starpufft/packages/starpufft-1.0.pc.in000066400000000000000000000020741507764646700230520ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpufft Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ starpu-1.4.9+dfsg/starpufft/packages/starpufft-1.1.pc.in000066400000000000000000000020741507764646700230530ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpufft Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ starpu-1.4.9+dfsg/starpufft/packages/starpufft-1.2.pc.in000066400000000000000000000020741507764646700230540ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpufft Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ starpu-1.4.9+dfsg/starpufft/packages/starpufft-1.3.pc.in000066400000000000000000000020741507764646700230550ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpufft Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ starpu-1.4.9+dfsg/starpufft/packages/starpufft-1.4.pc.in000066400000000000000000000020731507764646700230550ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: starpufft Description: offers support for heterogeneous multicore architecture Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@ starpu-1.4.9+dfsg/starpufft/src/000077500000000000000000000000001507764646700166155ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpufft/src/Makefile.am000066400000000000000000000037521507764646700206600ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk AM_CFLAGS += $(FFTWF_CFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/starpufft/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(FFTW_LIBS) $(FFTWF_LIBS) LIBS += $(STARPU_CUDA_LDFLAGS) $(STARPU_CUFFT_LDFLAGS) lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la EXTRA_DIST = \ starpufft-float.h \ starpufft-double.h \ cudax_kernels.h \ starpufftx.c \ starpufftx1d.c \ starpufftx2d.c \ starpufftx3d.c \ cuda_kernels.cu \ cudaf_kernels.cu \ cudax_kernels.cu libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpufft.c starpufftf.c starpufft_common.c libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ -version-info $(LIBSTARPUFFT_INTERFACE_CURRENT):$(LIBSTARPUFFT_INTERFACE_REVISION):$(LIBSTARPUFFT_INTERFACE_AGE) libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = if STARPU_USE_CUDA NVCCFLAGS += -Xcompiler -fPIC -Xlinker -fPIC libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD += cudaf_kernels.o if STARPU_HAVE_CUFFTDOUBLECOMPLEX libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD += cuda_kernels.o endif libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBS = $(LIBS) $(STARPU_CUDA_LDFLAGS) endif starpu-1.4.9+dfsg/starpufft/src/Makefile.in000066400000000000000000001004121507764646700206600ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ @STARPU_USE_CUDA_TRUE@am__append_3 = -Xcompiler -fPIC -Xlinker -fPIC @STARPU_USE_CUDA_TRUE@am__append_4 = cudaf_kernels.o @STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE@@STARPU_USE_CUDA_TRUE@am__append_5 = cuda_kernels.o subdir = starpufft/src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(libdir)" LTLIBRARIES = $(lib_LTLIBRARIES) libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES = \ $(am__append_4) $(am__append_5) am_libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = starpufft.lo \ starpufftf.lo starpufft_common.lo libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ $(am_libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) \ $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ $(LDFLAGS) -o $@ AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/starpufft.Plo \ ./$(DEPDIR)/starpufft_common.Plo ./$(DEPDIR)/starpufftf.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) DIST_SOURCES = $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) $(FFTW_LIBS) $(FFTWF_LIBS) \ $(STARPU_CUDA_LDFLAGS) $(STARPU_CUFFT_LDFLAGS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) $(am__append_3) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(FFTWF_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/starpufft/include/ -I$(top_builddir)/include -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la EXTRA_DIST = \ starpufft-float.h \ starpufft-double.h \ cudax_kernels.h \ starpufftx.c \ starpufftx1d.c \ starpufftx2d.c \ starpufftx3d.c \ cuda_kernels.cu \ cudaf_kernels.cu \ cudax_kernels.cu libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpufft.c starpufftf.c starpufft_common.c libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined \ -version-info $(LIBSTARPUFFT_INTERFACE_CURRENT):$(LIBSTARPUFFT_INTERFACE_REVISION):$(LIBSTARPUFFT_INTERFACE_AGE) libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(am__append_4) \ $(am__append_5) @STARPU_USE_CUDA_TRUE@libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBS = $(LIBS) $(STARPU_CUDA_LDFLAGS) all: all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpufft/src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpufft/src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; \ locs=`for p in $$list; do echo $$p; done | \ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ sort -u`; \ test -z "$$locs" || { \ echo rm -f $${locs}; \ rm -f $${locs}; \ } libstarpufft-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(AM_V_CCLD)$(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpufft.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpufft_common.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpufftf.Plo@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags TAGS: ctags CTAGS: cscope cscopelist: distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(LTLIBRARIES) installdirs: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ mostlyclean-am distclean: distclean-am -rm -f ./$(DEPDIR)/starpufft.Plo -rm -f ./$(DEPDIR)/starpufft_common.Plo -rm -f ./$(DEPDIR)/starpufftf.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-libLTLIBRARIES install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/starpufft.Plo -rm -f ./$(DEPDIR)/starpufft_common.Plo -rm -f ./$(DEPDIR)/starpufftf.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-libLTLIBRARIES .MAKE: install-am install-strip .PHONY: all all-am am--depfiles check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool cscopelist-am ctags-am \ distclean distclean-compile distclean-generic \ distclean-libtool distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am \ install-libLTLIBRARIES install-man install-pdf install-pdf-am \ install-ps install-ps-am install-strip installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags-am uninstall uninstall-am uninstall-libLTLIBRARIES .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpufft/src/cuda_kernels.cu000066400000000000000000000013511507764646700216050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpufft-double.h" #include "cudax_kernels.cu" starpu-1.4.9+dfsg/starpufft/src/cudaf_kernels.cu000066400000000000000000000013501507764646700217520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpufft-float.h" #include "cudax_kernels.cu" starpu-1.4.9+dfsg/starpufft/src/cudax_kernels.cu000066400000000000000000000126551507764646700220060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define _externC extern "C" #include "cudax_kernels.h" /* Note: these assume that the sizes are powers of two */ #define VARS_1d \ unsigned start = threadIdx.x + blockIdx.x * blockDim.x; \ unsigned numthreads = blockDim.x * gridDim.x; #define DISTRIB_1d(n, func,args) \ unsigned threads_per_block = 128; \ \ if (n < threads_per_block) \ { \ dim3 dimGrid(n); \ func <<>> args; \ cudaError_t status = cudaGetLastError(); \ if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ } \ else \ { \ dim3 dimGrid(n / threads_per_block); \ dim3 dimBlock(threads_per_block); \ func <<>> args; \ cudaError_t status = cudaGetLastError(); \ if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ } \ cudaStreamSynchronize(starpu_cuda_get_local_stream()); \ extern "C" __global__ void STARPUFFT(cuda_twist1_1d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2) { unsigned j; VARS_1d unsigned end = n2; for (j = start; j < end; j += numthreads) twisted1[j] = in[i+j*n1]; } extern "C" void STARPUFFT(cuda_twist1_1d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2) { DISTRIB_1d(n2, STARPUFFT(cuda_twist1_1d), (in, twisted1, i, n1, n2)); } extern "C" __global__ void STARPUFFT(cuda_twiddle_1d)(_cuComplex * out, const _cuComplex * roots, unsigned n, unsigned i) { unsigned j; VARS_1d unsigned end = n; for (j = start; j < end; j += numthreads) out[j] = _cuCmul(out[j], roots[i*j]); return; } extern "C" void STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsigned n, unsigned i) { DISTRIB_1d(n, STARPUFFT(cuda_twiddle_1d), (out, roots, n, i)); } #define VARS_2d \ unsigned startx = threadIdx.x + blockIdx.x * blockDim.x; \ unsigned starty = threadIdx.y + blockIdx.y * blockDim.y; \ unsigned numthreadsx = blockDim.x * gridDim.x; \ unsigned numthreadsy = blockDim.y * gridDim.y; /* FIXME: introduce threads_per_dim_n / m instead */ #define DISTRIB_2d(n, m, func, args) \ unsigned threads_per_dim = 16; \ if (n < threads_per_dim) \ { \ if (m < threads_per_dim) \ { \ dim3 dimGrid(n, m); \ func <<>> args; \ cudaError_t status = cudaGetLastError(); \ if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ } \ else \ { \ dim3 dimGrid(1, m / threads_per_dim); \ dim3 dimBlock(n, threads_per_dim); \ func <<>> args; \ cudaError_t status = cudaGetLastError(); \ if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ } \ } \ else \ { \ if (m < threads_per_dim) \ { \ dim3 dimGrid(n / threads_per_dim, 1); \ dim3 dimBlock(threads_per_dim, m); \ func <<>> args; \ cudaError_t status = cudaGetLastError(); \ if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ } \ else \ { \ dim3 dimGrid(n / threads_per_dim, m / threads_per_dim); \ dim3 dimBlock(threads_per_dim, threads_per_dim); \ func <<>> args; \ cudaError_t status = cudaGetLastError(); \ if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); \ } \ } \ cudaStreamSynchronize(starpu_cuda_get_local_stream()); \ extern "C" __global__ void STARPUFFT(cuda_twist1_2d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2) { unsigned k, l; VARS_2d unsigned endx = n2; unsigned endy = m2; unsigned m = m1*m2; for (k = startx; k < endx; k += numthreadsx) for (l = starty; l < endy; l += numthreadsy) twisted1[k*m2+l] = in[i*m+j+k*m*n1+l*m1]; } extern "C" void STARPUFFT(cuda_twist1_2d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2) { DISTRIB_2d(n2, m2, STARPUFFT(cuda_twist1_2d), (in, twisted1, i, j, n1, n2, m1, m2)); } extern "C" __global__ void STARPUFFT(cuda_twiddle_2d)(_cuComplex * out, const _cuComplex * roots0, const _cuComplex * roots1, unsigned n2, unsigned m2, unsigned i, unsigned j) { unsigned k, l; VARS_2d unsigned endx = n2; unsigned endy = m2; for (k = startx; k < endx ; k += numthreadsx) for (l = starty; l < endy ; l += numthreadsy) out[k*m2 + l] = _cuCmul(_cuCmul(out[k*m2 + l], roots0[i*k]), roots1[j*l]); return; } extern "C" void STARPUFFT(cuda_twiddle_2d_host)(_cuComplex *out, const _cuComplex *roots0, const _cuComplex *roots1, unsigned n2, unsigned m2, unsigned i, unsigned j) { DISTRIB_2d(n2, m2, STARPUFFT(cuda_twiddle_2d), (out, roots0, roots1, n2, m2, i, j)); } starpu-1.4.9+dfsg/starpufft/src/cudax_kernels.h000066400000000000000000000024341507764646700216200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include _externC void STARPUFFT(cuda_twist1_1d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2); _externC void STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsigned n, unsigned i); _externC void STARPUFFT(cuda_twist1_2d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2); _externC void STARPUFFT(cuda_twiddle_2d_host)(_cuComplex *out, const _cuComplex *roots0, const _cuComplex *roots1, unsigned n2, unsigned m2, unsigned i, unsigned j); starpu-1.4.9+dfsg/starpufft/src/starpufft-double.h000066400000000000000000000031461507764646700222600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) #include #include #endif #ifdef STARPU_USE_CUDA #include #endif #undef STARPUFFT_FLOAT #define STARPUFFT_DOUBLE typedef double real; #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) typedef fftw_complex _fftw_complex; typedef fftw_plan _fftw_plan; #endif #ifdef STARPU_USE_CUDA typedef cuDoubleComplex _cuComplex; typedef cufftDoubleComplex _cufftComplex; #define _cufftExecC2C cufftExecZ2Z #define _cufftExecR2C cufftExecD2Z #define _cufftExecC2R cufftExecZ2D #define _CUFFT_C2C CUFFT_Z2Z #define _CUFFT_R2C CUFFT_D2Z #define _CUFFT_C2R CUFFT_Z2D #define _cuCmul(x,y) cuCmul(x,y) #endif #define STARPUFFT(name) starpufft_##name #define _FFTW(name) fftw_##name #ifdef STARPU_USE_CUDA void STARPUFFT(report_error)(const char *func, const char *file, int line, cufftResult status); #endif /* !STARPU_USE_CUDA */ #define TYPE "" starpu-1.4.9+dfsg/starpufft/src/starpufft-float.h000066400000000000000000000031371507764646700221130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) #include #include #endif #ifdef STARPU_USE_CUDA #include #endif #undef STARPUFFT_DOUBLE #define STARPUFFT_FLOAT typedef float real; #if defined(STARPU_HAVE_FFTW) && !defined(__CUDACC__) typedef fftwf_complex _fftw_complex; typedef fftwf_plan _fftw_plan; #endif #ifdef STARPU_USE_CUDA typedef cuComplex _cuComplex; typedef cufftComplex _cufftComplex; #define _cufftExecC2C cufftExecC2C #define _cufftExecR2C cufftExecR2C #define _cufftExecC2R cufftExecC2R #define _CUFFT_C2C CUFFT_C2C #define _CUFFT_R2C CUFFT_R2C #define _CUFFT_C2R CUFFT_C2R #define _cuCmul(x,y) cuCmulf(x,y) #endif #define STARPUFFT(name) starpufftf_##name #define _FFTW(name) fftwf_##name #ifdef STARPU_USE_CUDA void STARPUFFT(report_error)(const char *func, const char *file, int line, cufftResult status); #endif /* !STARPU_USE_CUDA */ #define TYPE "f" starpu-1.4.9+dfsg/starpufft/src/starpufft.c000066400000000000000000000013451507764646700210020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpufft-double.h" #include "starpufftx.c" starpu-1.4.9+dfsg/starpufft/src/starpufft_common.c000066400000000000000000000014601507764646700223500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpufft.h" /* Used as an identifier in starpu tags to let plans run concurrently */ int starpufft_last_plan_number; starpu-1.4.9+dfsg/starpufft/src/starpufftf.c000066400000000000000000000013441507764646700211470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpufft-float.h" #include "starpufftx.c" starpu-1.4.9+dfsg/starpufft/src/starpufftx.c000066400000000000000000000320141507764646700211670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define PARALLEL 0 #include #include #include #include #include #include "starpufft.h" #ifdef STARPU_USE_CUDA #define _externC extern #include "cudax_kernels.h" #if (defined(STARPUFFT_FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)) && !defined(STARPU_COVERITY) # define __STARPU_USE_CUDA #else # undef __STARPU_USE_CUDA #endif #endif #define _FFTW_FLAGS FFTW_ESTIMATE /* Steps for the parallel variant */ enum steps { SPECIAL, TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END }; #define NUMBER_BITS 5 #define NUMBER_SHIFT (64 - NUMBER_BITS) #define STEP_BITS 3 #define STEP_SHIFT (NUMBER_SHIFT - STEP_BITS) /* Tags for the steps of the parallel variant */ #define _STEP_TAG(plan, step, i) (((starpu_tag_t) plan->number << NUMBER_SHIFT) | ((starpu_tag_t)(step) << STEP_SHIFT) | (starpu_tag_t) (i)) #define I_BITS STEP_SHIFT enum type { R2C, C2R, C2C }; static unsigned task_per_worker[STARPU_NMAXWORKERS]; static unsigned samples_per_worker[STARPU_NMAXWORKERS]; static struct timeval start, submit_tasks, end; /* * * The actual kernels * */ struct STARPUFFT(plan) { int number; /* uniquely identifies the plan, for starpu tags */ int *n; int *n1; int *n2; int totsize; int totsize1; /* Number of first-round tasks */ int totsize2; /* Size of first-round tasks */ int totsize3; /* Number of second-round tasks */ int totsize4; /* Size of second-round tasks */ int dim; enum type type; int sign; STARPUFFT(complex) *roots[2]; starpu_data_handle_t roots_handle[2]; /* For each worker, we need some data */ struct { #ifdef STARPU_USE_CUDA /* CUFFT plans */ cufftHandle plan1_cuda, plan2_cuda; /* Sequential version */ cufftHandle plan_cuda; #endif #ifdef STARPU_HAVE_FFTW /* FFTW plans */ _fftw_plan plan1_cpu, plan2_cpu; /* Sequential version */ _fftw_plan plan_cpu; #endif } plans[STARPU_NMAXWORKERS]; /* Buffers for codelets */ STARPUFFT(complex) *in, *twisted1, *fft1, *twisted2, *fft2, *out; size_t twisted1_size, twisted2_size, fft1_size, fft2_size; /* corresponding starpu DSM handles */ starpu_data_handle_t in_handle, *twisted1_handle, *fft1_handle, *twisted2_handle, *fft2_handle, out_handle; /* Tasks */ struct starpu_task **twist1_tasks, **fft1_tasks, **twist2_tasks, **fft2_tasks, **twist3_tasks; struct starpu_task *join_task, *end_task; /* Arguments for tasks */ struct STARPUFFT(args) *fft1_args, *fft2_args; }; struct STARPUFFT(args) { struct STARPUFFT(plan) *plan; int i, j, jj, kk, ll, *iv, *kkv; }; static void check_dims(STARPUFFT(plan) plan) { int dim; for (dim = 0; dim < plan->dim; dim++) if (plan->n[dim] & (plan->n[dim]-1)) { fprintf(stderr,"can't cope with non-power-of-2\n"); STARPU_ABORT(); } } static void compute_roots(STARPUFFT(plan) plan) { int dim, k; /* Compute the n-roots and m-roots of unity for twiddling */ for (dim = 0; dim < plan->dim; dim++) { STARPUFFT(complex) exp = (plan->sign * 2. * 4.*atan(1.)) * _Complex_I / (STARPUFFT(complex)) plan->n[dim]; plan->roots[dim] = malloc(plan->n[dim] * sizeof(**plan->roots)); for (k = 0; k < plan->n[dim]; k++) plan->roots[dim][k] = cexp(exp*k); starpu_vector_data_register(&plan->roots_handle[dim], STARPU_MAIN_RAM, (uintptr_t) plan->roots[dim], plan->n[dim], sizeof(**plan->roots)); #ifdef STARPU_USE_CUDA if (plan->n[dim] > 100000) { /* prefetch the big root array on GPUs */ unsigned worker; unsigned nworkers = starpu_worker_get_count(); for (worker = 0; worker < nworkers; worker++) { unsigned node = starpu_worker_get_memory_node(worker); if (starpu_worker_get_type(worker) == STARPU_CUDA_WORKER) starpu_data_prefetch_on_node(plan->roots_handle[dim], node, 0); } } #endif } } /* Only CUDA capability >= 1.3 supports doubles, rule old card out. */ #ifdef STARPUFFT_DOUBLE static int can_execute(unsigned workerid, struct starpu_task *task STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED) { if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER) return 1; #ifdef STARPU_USE_CUDA { /* Cuda device */ const struct cudaDeviceProp *props; props = starpu_cuda_get_device_properties(workerid); if (props->major >= 2 || props->minor >= 3) /* At least compute capability 1.3, supports doubles */ return 1; /* Old card does not support doubles */ return 0; } #endif return 0; } #define CAN_EXECUTE .can_execute = can_execute, #else #define CAN_EXECUTE #endif #include "starpufftx1d.c" #include "starpufftx2d.c" #include "starpufftx3d.c" struct starpu_task * STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out) { struct starpu_task *task; int z; plan->in = _in; plan->out = _out; switch (plan->dim) { case 1: { switch (plan->type) { case C2C: starpu_vector_data_register(&plan->in_handle, STARPU_MAIN_RAM, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex))); if (!PARALLEL) starpu_vector_data_register(&plan->out_handle, STARPU_MAIN_RAM, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex))); if (PARALLEL) { for (z = 0; z < plan->totsize1; z++) plan->twist1_tasks[z]->handles[0] = plan->in_handle; } task = STARPUFFT(start1dC2C)(plan, plan->in_handle, plan->out_handle); break; default: STARPU_ABORT(); break; } break; } case 2: starpu_vector_data_register(&plan->in_handle, STARPU_MAIN_RAM, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex))); if (!PARALLEL) starpu_vector_data_register(&plan->out_handle, STARPU_MAIN_RAM, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex))); if (PARALLEL) { for (z = 0; z < plan->totsize1; z++) plan->twist1_tasks[z]->handles[0] = plan->in_handle; } task = STARPUFFT(start2dC2C)(plan, plan->in_handle, plan->out_handle); break; case 3: starpu_vector_data_register(&plan->in_handle, STARPU_MAIN_RAM, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex))); if (!PARALLEL) starpu_vector_data_register(&plan->out_handle, STARPU_MAIN_RAM, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex))); if (PARALLEL) { for (z = 0; z < plan->totsize1; z++) plan->twist1_tasks[z]->handles[0] = plan->in_handle; } task = STARPUFFT(start3dC2C)(plan, plan->in_handle, plan->out_handle); break; default: STARPU_ABORT(); break; } return task; } void STARPUFFT(cleanup)(STARPUFFT(plan) plan) { if (plan->in_handle) starpu_data_unregister(plan->in_handle); if (!PARALLEL) { if (plan->out_handle) starpu_data_unregister(plan->out_handle); } } struct starpu_task * STARPUFFT(start_handle)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) { return STARPUFFT(start1dC2C)(plan, in, out); } int STARPUFFT(execute)(STARPUFFT(plan) plan, void *in, void *out) { int ret; memset(task_per_worker, 0, sizeof(task_per_worker)); memset(samples_per_worker, 0, sizeof(task_per_worker)); gettimeofday(&start, NULL); struct starpu_task *task = STARPUFFT(start)(plan, in, out); gettimeofday(&submit_tasks, NULL); if (task) { ret = starpu_task_wait(task); STARPU_ASSERT(ret == 0); } STARPUFFT(cleanup)(plan); gettimeofday(&end, NULL); return (task == NULL ? -1 : 0); } int STARPUFFT(execute_handle)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) { int ret; struct starpu_task *task = STARPUFFT(start_handle)(plan, in, out); if (!task) return -1; ret = starpu_task_wait(task); STARPU_ASSERT(ret == 0); return 0; } /* Destroy FFTW plans, unregister and free buffers, and free tags */ void STARPUFFT(destroy_plan)(STARPUFFT(plan) plan) { unsigned workerid; int dim, i; for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { switch (starpu_worker_get_type(workerid)) { case STARPU_CPU_WORKER: #ifdef STARPU_HAVE_FFTW if (PARALLEL) { _FFTW(destroy_plan)(plan->plans[workerid].plan1_cpu); _FFTW(destroy_plan)(plan->plans[workerid].plan2_cpu); } else { _FFTW(destroy_plan)(plan->plans[workerid].plan_cpu); } #endif break; case STARPU_CUDA_WORKER: #ifdef STARPU_USE_CUDA /* FIXME: Can't deallocate */ #endif break; default: /* Do not care, we won't be executing anything there. */ break; } } if (PARALLEL) { for (i = 0; i < plan->totsize1; i++) { starpu_data_unregister(plan->twisted1_handle[i]); free(plan->twist1_tasks[i]); starpu_data_unregister(plan->fft1_handle[i]); free(plan->fft1_tasks[i]); } free(plan->twisted1_handle); free(plan->twist1_tasks); free(plan->fft1_handle); free(plan->fft1_tasks); free(plan->fft1_args); free(plan->join_task); for (i = 0; i < plan->totsize3; i++) { starpu_data_unregister(plan->twisted2_handle[i]); free(plan->twist2_tasks[i]); starpu_data_unregister(plan->fft2_handle[i]); free(plan->fft2_tasks[i]); free(plan->twist3_tasks[i]); } free(plan->end_task); free(plan->twisted2_handle); free(plan->twist2_tasks); free(plan->fft2_handle); free(plan->fft2_tasks); free(plan->twist3_tasks); free(plan->fft2_args); for (dim = 0; dim < plan->dim; dim++) { starpu_data_unregister(plan->roots_handle[dim]); free(plan->roots[dim]); } switch (plan->dim) { case 1: STARPUFFT(free_1d_tags)(plan); break; case 2: STARPUFFT(free_2d_tags)(plan); break; default: STARPU_ABORT(); break; } free(plan->n1); free(plan->n2); STARPUFFT(free)(plan->twisted1, plan->twisted1_size); STARPUFFT(free)(plan->fft1, plan->fft1_size); STARPUFFT(free)(plan->twisted2, plan->twisted2_size); STARPUFFT(free)(plan->fft2, plan->fft2_size); } free(plan->n); free(plan); } void * STARPUFFT(malloc)(size_t n) { #ifdef STARPU_USE_CUDA void *res; starpu_malloc(&res, n); return res; #else # ifdef STARPU_HAVE_FFTW return _FFTW(malloc)(n); # else return malloc(n); # endif #endif } void STARPUFFT(free)(void *p, size_t dim) { #ifdef STARPU_USE_CUDA starpu_free_noflag(p, dim); #else (void)dim; # ifdef STARPU_HAVE_FFTW _FFTW(free)(p); # else free(p); # endif #endif } void STARPUFFT(showstats)(FILE *out) { unsigned worker; unsigned total; #define TIMING(begin,end) (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)) #define MSTIMING(begin,end) (TIMING(begin,end)/1000.) double paratiming = TIMING(start,end); fprintf(out, "Tasks submission took %2.2f ms\n", MSTIMING(start,submit_tasks)); fprintf(out, "Tasks termination took %2.2f ms\n", MSTIMING(submit_tasks,end)); fprintf(out, "Total %2.2f ms\n", MSTIMING(start,end)); for (worker = 0, total = 0; worker < starpu_worker_get_count(); worker++) total += task_per_worker[worker]; if (!total) return; for (worker = 0; worker < starpu_worker_get_count(); worker++) { if (task_per_worker[worker]) { char name[32]; starpu_worker_get_name(worker, name, sizeof(name)); unsigned long bytes = sizeof(STARPUFFT(complex))*samples_per_worker[worker]; fprintf(stderr, "\t%s -> %2.2f MB\t%2.2f\tMB/s\t%u %2.2f %%\n", name, (1.0*bytes)/(1024*1024), bytes/paratiming, task_per_worker[worker], (100.0*task_per_worker[worker])/total); } } } #ifdef STARPU_USE_CUDA void STARPUFFT(report_error)(const char *func, const char *file, int line, cufftResult status) { char *errormsg; switch (status) { case CUFFT_SUCCESS: errormsg = "success"; /* It'd be weird to get here. */ break; case CUFFT_INVALID_PLAN: errormsg = "invalid plan"; break; case CUFFT_ALLOC_FAILED: errormsg = "alloc failed"; break; case CUFFT_INVALID_TYPE: errormsg = "invalid type"; break; case CUFFT_INVALID_VALUE: errormsg = "invalid value"; break; case CUFFT_INTERNAL_ERROR: errormsg = "internal error"; break; case CUFFT_EXEC_FAILED: errormsg = "exec failed"; break; case CUFFT_SETUP_FAILED: errormsg = "setup failed"; break; case CUFFT_INVALID_SIZE: errormsg = "invalid size"; break; case CUFFT_UNALIGNED_DATA: errormsg = "unaligned data"; break; #if defined(MAX_CUFFT_ERROR) && (MAX_CUFFT_ERROR >= 0xE) case CUFFT_INCOMPLETE_PARAMETER_LIST: errormsg = "incomplete parameter list"; break; case CUFFT_INVALID_DEVICE: errormsg = "invalid device"; break; case CUFFT_PARSE_ERROR: errormsg = "parse error"; break; case CUFFT_NO_WORKSPACE: errormsg = "no workspace"; break; #endif /* MAX_CUFFT_ERROR >= 0xE */ default: errormsg = "unknown error"; break; } fprintf(stderr, "oops in %s (%s:%d)... %d: %s\n", func, file, line, status, errormsg); STARPU_ABORT(); } #endif /* !STARPU_USE_CUDA */ starpu-1.4.9+dfsg/starpufft/src/starpufftx1d.c000066400000000000000000000626241507764646700214260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * * Dumb parallel version * */ #define DIV_1D 64 /* * Overall strategy for an fft of size n: * - perform n1 ffts of size n2 * - twiddle * - perform n2 ffts of size n1 * * - n1 defaults to DIV_1D, thus n2 defaults to n / DIV_1D. * * Precise tasks: * * - twist1: twist the whole n-element input (called "in") into n1 chunks of * size n2, by using n1 tasks taking the whole n-element input as a * R parameter and one n2 output as a W parameter. The result is * called twisted1. * - fft1: perform n1 (n2) ffts, by using n1 tasks doing one fft each. Also * twiddle the result to prepare for the fft2. The result is called * fft1. * - join: depends on all the fft1s, to gather the n1 results of size n2 in * the fft1 vector. * - twist2: twist the fft1 vector into n2 chunks of size n1, called twisted2. * since n2 is typically very large, this step is divided in DIV_1D * tasks, each of them performing n2/DIV_1D of them * - fft2: perform n2 ffts of size n1. This is divided in DIV_1D tasks of * n2/DIV_1D ffts, to be performed in batches. The result is called * fft2. * - twist3: twist back the result of the fft2s above into the output buffer. * Only implemented on CPUs for simplicity of the gathering. * * The tag space thus uses 3 dimensions: * - the number of the plan. * - the step (TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END) * - an index i between 0 and DIV_1D-1. */ #define STEP_TAG_1D(plan, step, i) _STEP_TAG(plan, step, i) #ifdef __STARPU_USE_CUDA /* twist1: * * Twist the full input vector (first parameter) into one chunk of size n2 * (second parameter) */ static void STARPUFFT(twist1_1d_kernel_gpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int i = args->i; int n1 = plan->n1[0]; int n2 = plan->n2[0]; _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); _cufftComplex * restrict twisted1 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); STARPUFFT(cuda_twist1_1d_host)(in, twisted1, i, n1, n2); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } /* fft1: * * Perform one fft of size n2 */ static void STARPUFFT(fft1_1d_plan_gpu)(void *args) { STARPUFFT(plan) plan = args; int n2 = plan->n2[0]; int workerid = starpu_worker_get_id_check(); cufftResult cures; cures = cufftPlan1d(&plan->plans[workerid].plan1_cuda, n2, _CUFFT_C2C, 1); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cufftSetStream(plan->plans[workerid].plan1_cuda, starpu_cuda_get_local_stream()); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); } static void STARPUFFT(fft1_1d_kernel_gpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int i = args->i; int n2 = plan->n2[0]; cufftResult cures; _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); const _cufftComplex * restrict roots = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[2]); int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; cures = _cufftExecC2C(plan->plans[workerid].plan1_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); STARPUFFT(cuda_twiddle_1d_host)(out, roots, n2, i); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } /* fft2: * * Perform n3 = n2/DIV_1D ffts of size n1 */ static void STARPUFFT(fft2_1d_plan_gpu)(void *args) { STARPUFFT(plan) plan = args; int n1 = plan->n1[0]; int n2 = plan->n2[0]; int n3 = n2/DIV_1D; cufftResult cures; int workerid = starpu_worker_get_id_check(); cures = cufftPlan1d(&plan->plans[workerid].plan2_cuda, n1, _CUFFT_C2C, n3); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cufftSetStream(plan->plans[workerid].plan2_cuda, starpu_cuda_get_local_stream()); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); } static void STARPUFFT(fft2_1d_kernel_gpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; cufftResult cures; _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; /* NOTE using batch support */ cures = _cufftExecC2C(plan->plans[workerid].plan2_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } #endif /* twist1: * * Twist the full input vector (first parameter) into one chunk of size n2 * (second parameter) */ static void STARPUFFT(twist1_1d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int i = args->i; int j; int n1 = plan->n1[0]; int n2 = plan->n2[0]; STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); /* printf("twist1 %d %g\n", i, (double) cabs(plan->in[i])); */ for (j = 0; j < n2; j++) twisted1[j] = in[i+j*n1]; } #ifdef STARPU_HAVE_FFTW /* fft1: * * Perform one fft of size n2 */ static void STARPUFFT(fft1_1d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int i = args->i; int j; int n2 = plan->n2[0]; int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); STARPUFFT(complex) * restrict fft1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); /* printf("fft1 %d %g\n", i, (double) cabs(twisted1[0])); */ _FFTW(execute_dft)(plan->plans[workerid].plan1_cpu, twisted1, fft1); /* twiddle fft1 buffer */ for (j = 0; j < n2; j++) fft1[j] = fft1[j] * plan->roots[0][i*j]; } #endif /* twist2: * * Twist the full vector (results of the fft1s) into one package of n2/DIV_1D * chunks of size n1 */ static void STARPUFFT(twist2_1d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int jj = args->jj; /* between 0 and DIV_1D */ int jjj; /* between 0 and n3 */ int i; int n1 = plan->n1[0]; int n2 = plan->n2[0]; int n3 = n2/DIV_1D; STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); /* printf("twist2 %d %g\n", jj, (double) cabs(plan->fft1[jj])); */ for (jjj = 0; jjj < n3; jjj++) { int j = jj * n3 + jjj; for (i = 0; i < n1; i++) twisted2[jjj*n1+i] = plan->fft1[i*n2+j]; } } #ifdef STARPU_HAVE_FFTW /* fft2: * * Perform n3 = n2/DIV_1D ffts of size n1 */ static void STARPUFFT(fft2_1d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; /* int jj = args->jj; */ int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); /* printf("fft2 %d %g\n", jj, (double) cabs(twisted2[plan->totsize4-1])); */ _FFTW(execute_dft)(plan->plans[workerid].plan2_cpu, twisted2, fft2); } #endif /* twist3: * * Spread the package of n2/DIV_1D chunks of size n1 into the output vector */ static void STARPUFFT(twist3_1d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int jj = args->jj; /* between 0 and DIV_1D */ int jjj; /* between 0 and n3 */ int i; int n1 = plan->n1[0]; int n2 = plan->n2[0]; int n3 = n2/DIV_1D; const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); /* printf("twist3 %d %g\n", jj, (double) cabs(fft2[0])); */ for (jjj = 0; jjj < n3; jjj++) { int j = jj * n3 + jjj; for (i = 0; i < n1; i++) plan->out[i*n2+j] = fft2[jjj*n1+i]; } } /* Performance models for the 5 kinds of tasks */ static struct starpu_perfmodel STARPUFFT(twist1_1d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"twist1_1d" }; static struct starpu_perfmodel STARPUFFT(fft1_1d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"fft1_1d" }; static struct starpu_perfmodel STARPUFFT(twist2_1d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"twist2_1d" }; static struct starpu_perfmodel STARPUFFT(fft2_1d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"fft2_1d" }; static struct starpu_perfmodel STARPUFFT(twist3_1d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"twist3_1d" }; /* codelet pointers for the 5 kinds of tasks */ static struct starpu_codelet STARPUFFT(twist1_1d_codelet) = { .where = #ifdef __STARPU_USE_CUDA STARPU_CUDA| #endif STARPU_CPU, #ifdef __STARPU_USE_CUDA .cuda_funcs = {STARPUFFT(twist1_1d_kernel_gpu)}, #endif .cpu_funcs = {STARPUFFT(twist1_1d_kernel_cpu)}, CAN_EXECUTE .model = &STARPUFFT(twist1_1d_model), .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .name = "twist1_1d_codelet" }; static struct starpu_codelet STARPUFFT(fft1_1d_codelet) = { .where = #ifdef __STARPU_USE_CUDA STARPU_CUDA| #endif #ifdef STARPU_HAVE_FFTW STARPU_CPU| #endif 0, #ifdef __STARPU_USE_CUDA .cuda_funcs = {STARPUFFT(fft1_1d_kernel_gpu)}, #endif #ifdef STARPU_HAVE_FFTW .cpu_funcs = {STARPUFFT(fft1_1d_kernel_cpu)}, #endif CAN_EXECUTE .model = &STARPUFFT(fft1_1d_model), .nbuffers = 3, .modes = {STARPU_R, STARPU_W, STARPU_R}, .name = "fft1_1d_codelet" }; static struct starpu_codelet STARPUFFT(twist2_1d_codelet) = { .where = STARPU_CPU, .cpu_funcs = {STARPUFFT(twist2_1d_kernel_cpu)}, CAN_EXECUTE .model = &STARPUFFT(twist2_1d_model), .nbuffers = 1, .modes = {STARPU_W}, .name = "twist2_1d_codelet" }; static struct starpu_codelet STARPUFFT(fft2_1d_codelet) = { .where = #ifdef __STARPU_USE_CUDA STARPU_CUDA| #endif #ifdef STARPU_HAVE_FFTW STARPU_CPU| #endif 0, #ifdef __STARPU_USE_CUDA .cuda_funcs = {STARPUFFT(fft2_1d_kernel_gpu)}, #endif #ifdef STARPU_HAVE_FFTW .cpu_funcs = {STARPUFFT(fft2_1d_kernel_cpu)}, #endif CAN_EXECUTE .model = &STARPUFFT(fft2_1d_model), .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .name = "fft2_1d_codelet" }; static struct starpu_codelet STARPUFFT(twist3_1d_codelet) = { .where = STARPU_CPU, .cpu_funcs = {STARPUFFT(twist3_1d_kernel_cpu)}, CAN_EXECUTE .model = &STARPUFFT(twist3_1d_model), .nbuffers = 1, .modes = {STARPU_R}, .name = "twist3_1d_codelet" }; /* * * Sequential version * */ #ifdef __STARPU_USE_CUDA /* Perform one fft of size n */ static void STARPUFFT(fft_1d_plan_gpu)(void *args) { STARPUFFT(plan) plan = args; cufftResult cures; int n = plan->n[0]; int workerid = starpu_worker_get_id_check(); cures = cufftPlan1d(&plan->plans[workerid].plan_cuda, n, _CUFFT_C2C, 1); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream()); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); } static void STARPUFFT(fft_1d_kernel_gpu)(void *descr[], void *args) { STARPUFFT(plan) plan = args; cufftResult cures; _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; cures = _cufftExecC2C(plan->plans[workerid].plan_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_HAVE_FFTW /* Perform one fft of size n */ static void STARPUFFT(fft_1d_kernel_cpu)(void *descr[], void *_args) { STARPUFFT(plan) plan = _args; int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); STARPUFFT(complex) * restrict out = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); _FFTW(execute_dft)(plan->plans[workerid].plan_cpu, in, out); } #endif static struct starpu_perfmodel STARPUFFT(fft_1d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"fft_1d" }; static struct starpu_codelet STARPUFFT(fft_1d_codelet) = { .where = #ifdef __STARPU_USE_CUDA STARPU_CUDA| #endif #ifdef STARPU_HAVE_FFTW STARPU_CPU| #endif 0, #ifdef __STARPU_USE_CUDA .cuda_funcs = {STARPUFFT(fft_1d_kernel_gpu)}, #endif #ifdef STARPU_HAVE_FFTW .cpu_funcs = {STARPUFFT(fft_1d_kernel_cpu)}, #endif CAN_EXECUTE .model = &STARPUFFT(fft_1d_model), .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .name = "fft_1d_codelet" }; /* Planning: * * - For each CPU worker, we need to plan the two fftw stages. * - For GPU workers, we need to do the planning in the CUDA context, so we do * this lazily through the initialised1 and initialised2 flags ; TODO: use * starpu_execute_on_each_worker instead (done in the omp branch). * - We allocate all the temporary buffers and register them to starpu. * - We create all the tasks, but do not submit them yet. It will be possible * to reuse them at will to perform several ffts with the same planning. */ STARPUFFT(plan) STARPUFFT(plan_dft_1d)(int n, int sign, unsigned flags) { unsigned workerid; int n1 = DIV_1D; int n2 = n / n1; int n3; int z; struct starpu_task *task; if (PARALLEL) { #ifdef __STARPU_USE_CUDA /* cufft 1D limited to 8M elements */ while (n2 > 8 << 20) { n1 *= 2; n2 /= 2; } #endif STARPU_ASSERT(n == n1*n2); STARPU_ASSERT((unsigned long long) n1 < (1ULL << I_BITS)); /* distribute the n2 second ffts into DIV_1D packages */ n3 = n2 / DIV_1D; STARPU_ASSERT(n2 == n3*DIV_1D); } /* TODO: flags? Automatically set FFTW_MEASURE on calibration? */ STARPU_ASSERT(flags == 0); STARPUFFT(plan) plan = malloc(sizeof(*plan)); memset(plan, 0, sizeof(*plan)); if (PARALLEL) { plan->number = STARPU_ATOMIC_ADD(&starpufft_last_plan_number, 1) - 1; /* The plan number has a limited size */ STARPU_ASSERT((unsigned long long) plan->number < (1ULL << NUMBER_BITS)); } /* Just one dimension */ plan->dim = 1; plan->n = malloc(plan->dim * sizeof(*plan->n)); plan->n[0] = n; if (PARALLEL) { check_dims(plan); plan->n1 = malloc(plan->dim * sizeof(*plan->n1)); plan->n1[0] = n1; plan->n2 = malloc(plan->dim * sizeof(*plan->n2)); plan->n2[0] = n2; } /* Note: this is for coherency with the 2D case */ plan->totsize = n; if (PARALLEL) { plan->totsize1 = n1; plan->totsize2 = n2; plan->totsize3 = DIV_1D; plan->totsize4 = plan->totsize / plan->totsize3; } plan->type = C2C; plan->sign = sign; if (PARALLEL) { /* Compute the w^k just once. */ compute_roots(plan); } /* Initialize per-worker working set */ for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { switch (starpu_worker_get_type(workerid)) { case STARPU_CPU_WORKER: #ifdef STARPU_HAVE_FFTW if (PARALLEL) { /* first fft plan: one fft of size n2. * FFTW imposes that buffer pointers are known at * planning time. */ plan->plans[workerid].plan1_cpu = _FFTW(plan_dft_1d)(n2, NULL, (void*) 1, sign, _FFTW_FLAGS); STARPU_ASSERT(plan->plans[workerid].plan1_cpu); /* second fft plan: n3 ffts of size n1 */ plan->plans[workerid].plan2_cpu = _FFTW(plan_many_dft)(plan->dim, plan->n1, n3, NULL, NULL, 1, plan->totsize1, (void*) 1, NULL, 1, plan->totsize1, sign, _FFTW_FLAGS); STARPU_ASSERT(plan->plans[workerid].plan2_cpu); } else { /* fft plan: one fft of size n. */ plan->plans[workerid].plan_cpu = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, sign, _FFTW_FLAGS); STARPU_ASSERT(plan->plans[workerid].plan_cpu); } #else /* #warning libstarpufft can not work correctly if libfftw3 is not installed */ #endif break; case STARPU_CUDA_WORKER: break; default: /* Do not care, we won't be executing anything there. */ break; } } #ifdef __STARPU_USE_CUDA if (PARALLEL) { starpu_execute_on_each_worker(STARPUFFT(fft1_1d_plan_gpu), plan, STARPU_CUDA); starpu_execute_on_each_worker(STARPUFFT(fft2_1d_plan_gpu), plan, STARPU_CUDA); } else { starpu_execute_on_each_worker(STARPUFFT(fft_1d_plan_gpu), plan, STARPU_CUDA); } #endif if (PARALLEL) { /* Allocate buffers. */ plan->twisted1_size = plan->totsize * sizeof(*plan->twisted1); plan->twisted1 = STARPUFFT(malloc)(plan->twisted1_size); memset(plan->twisted1, 0, plan->twisted1_size); plan->fft1_size = plan->totsize * sizeof(*plan->fft1); plan->fft1 = STARPUFFT(malloc)(plan->fft1_size); memset(plan->fft1, 0, plan->fft1_size); plan->twisted2_size = plan->totsize * sizeof(*plan->twisted2); plan->twisted2 = STARPUFFT(malloc)(plan->twisted2_size); memset(plan->twisted2, 0, plan->twisted2_size); plan->fft2_size = plan->totsize * sizeof(*plan->fft2); plan->fft2 = STARPUFFT(malloc)(plan->fft2_size); memset(plan->fft2, 0, plan->fft2_size); /* Allocate handle arrays */ plan->twisted1_handle = malloc(plan->totsize1 * sizeof(*plan->twisted1_handle)); plan->fft1_handle = malloc(plan->totsize1 * sizeof(*plan->fft1_handle)); plan->twisted2_handle = malloc(plan->totsize3 * sizeof(*plan->twisted2_handle)); plan->fft2_handle = malloc(plan->totsize3 * sizeof(*plan->fft2_handle)); /* Allocate task arrays */ plan->twist1_tasks = malloc(plan->totsize1 * sizeof(*plan->twist1_tasks)); plan->fft1_tasks = malloc(plan->totsize1 * sizeof(*plan->fft1_tasks)); plan->twist2_tasks = malloc(plan->totsize3 * sizeof(*plan->twist2_tasks)); plan->fft2_tasks = malloc(plan->totsize3 * sizeof(*plan->fft2_tasks)); plan->twist3_tasks = malloc(plan->totsize3 * sizeof(*plan->twist3_tasks)); /* Allocate codelet argument arrays */ plan->fft1_args = malloc(plan->totsize1 * sizeof(*plan->fft1_args)); plan->fft2_args = malloc(plan->totsize3 * sizeof(*plan->fft2_args)); /* Create first-round tasks: DIV_1D tasks of type twist1 and fft1 */ for (z = 0; z < plan->totsize1; z++) { int i = z; #define STEP_TAG(step) STEP_TAG_1D(plan, step, i) /* TODO: get rid of tags */ plan->fft1_args[z].plan = plan; plan->fft1_args[z].i = i; /* Register the twisted1 buffer of size n2. */ starpu_vector_data_register(&plan->twisted1_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1)); /* Register the fft1 buffer of size n2. */ starpu_vector_data_register(&plan->fft1_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1)); /* We'll need the result of fft1 on the CPU for the second * twist anyway, so tell starpu to not keep the fft1 buffer in * the GPU. */ starpu_data_set_wt_mask(plan->fft1_handle[z], 1<<0); /* Create twist1 task */ plan->twist1_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(twist1_1d_codelet); /* task->handles[0] = to be filled at execution to point to the application input. */ task->handles[1] = plan->twisted1_handle[z]; task->cl_arg = &plan->fft1_args[z]; task->tag_id = STEP_TAG(TWIST1); task->use_tag = 1; task->destroy = 0; /* Tell that fft1 depends on twisted1 */ starpu_tag_declare_deps(STEP_TAG(FFT1), 1, STEP_TAG(TWIST1)); /* Create FFT1 task */ plan->fft1_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(fft1_1d_codelet); task->handles[0] = plan->twisted1_handle[z]; task->handles[1] = plan->fft1_handle[z]; task->handles[2] = plan->roots_handle[0]; task->cl_arg = &plan->fft1_args[z]; task->tag_id = STEP_TAG(FFT1); task->use_tag = 1; task->destroy = 0; /* Tell that the join task will depend on the fft1 task. */ starpu_tag_declare_deps(STEP_TAG_1D(plan, JOIN, 0), 1, STEP_TAG(FFT1)); #undef STEP_TAG } /* Create the join task, only serving as a dependency point between * fft1 and twist2 tasks */ plan->join_task = task = starpu_task_create(); task->cl = NULL; task->tag_id = STEP_TAG_1D(plan, JOIN, 0); task->use_tag = 1; task->destroy = 0; /* Create second-round tasks: DIV_1D batches of n2/DIV_1D twist2, fft2, * and twist3 */ for (z = 0; z < plan->totsize3; z++) { int jj = z; #define STEP_TAG(step) STEP_TAG_1D(plan, step, jj) plan->fft2_args[z].plan = plan; plan->fft2_args[z].jj = jj; /* Register n3 twisted2 buffers of size n1 */ starpu_vector_data_register(&plan->twisted2_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2)); starpu_vector_data_register(&plan->fft2_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2)); /* We'll need the result of fft2 on the CPU for the third * twist anyway, so tell starpu to not keep the fft2 buffer in * the GPU. */ starpu_data_set_wt_mask(plan->fft2_handle[z], 1<<0); /* Tell that twisted2 depends on the join task */ starpu_tag_declare_deps(STEP_TAG(TWIST2), 1, STEP_TAG_1D(plan, JOIN, 0)); /* Create twist2 task */ plan->twist2_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(twist2_1d_codelet); task->handles[0] = plan->twisted2_handle[z]; task->cl_arg = &plan->fft2_args[z]; task->tag_id = STEP_TAG(TWIST2); task->use_tag = 1; task->destroy = 0; /* Tell that fft2 depends on twisted2 */ starpu_tag_declare_deps(STEP_TAG(FFT2), 1, STEP_TAG(TWIST2)); /* Create FFT2 task */ plan->fft2_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(fft2_1d_codelet); task->handles[0] = plan->twisted2_handle[z]; task->handles[1] = plan->fft2_handle[z]; task->cl_arg = &plan->fft2_args[z]; task->tag_id = STEP_TAG(FFT2); task->use_tag = 1; task->destroy = 0; /* Tell that twist3 depends on fft2 */ starpu_tag_declare_deps(STEP_TAG(TWIST3), 1, STEP_TAG(FFT2)); /* Create twist3 tasks */ /* These run only on CPUs and thus write directly into the * application output buffer. */ plan->twist3_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(twist3_1d_codelet); task->handles[0] = plan->fft2_handle[z]; task->cl_arg = &plan->fft2_args[z]; task->tag_id = STEP_TAG(TWIST3); task->use_tag = 1; task->destroy = 0; /* Tell that to be completely finished we need to have finished * this twisted3 */ starpu_tag_declare_deps(STEP_TAG_1D(plan, END, 0), 1, STEP_TAG(TWIST3)); #undef STEP_TAG } /* Create end task, only serving as a join point. */ plan->end_task = task = starpu_task_create(); task->cl = NULL; task->tag_id = STEP_TAG_1D(plan, END, 0); task->use_tag = 1; task->destroy = 0; task->detach = 0; } return plan; } /* Actually submit all the tasks. */ static struct starpu_task * STARPUFFT(start1dC2C)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) { STARPU_ASSERT(plan->type == C2C); int z; int ret; if (PARALLEL) { for (z=0; z < plan->totsize1; z++) { ret = starpu_task_submit(plan->twist1_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(plan->fft1_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_submit(plan->join_task); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); for (z=0; z < plan->totsize3; z++) { ret = starpu_task_submit(plan->twist2_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(plan->fft2_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(plan->twist3_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_submit(plan->end_task); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return plan->end_task; } else /* !PARALLEL */ { struct starpu_task *task; /* Create FFT task */ task = starpu_task_create(); task->detach = 0; task->cl = &STARPUFFT(fft_1d_codelet); task->handles[0] = in; task->handles[1] = out; task->cl_arg = plan; ret = starpu_task_submit(task); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return task; } } /* Free all the tags. The generic code handles freeing the buffers. */ static void STARPUFFT(free_1d_tags)(STARPUFFT(plan) plan) { int i; int n1 = plan->n1[0]; if (!PARALLEL) return; for (i = 0; i < n1; i++) { starpu_tag_remove(STEP_TAG_1D(plan, TWIST1, i)); starpu_tag_remove(STEP_TAG_1D(plan, FFT1, i)); } starpu_tag_remove(STEP_TAG_1D(plan, JOIN, 0)); for (i = 0; i < DIV_1D; i++) { starpu_tag_remove(STEP_TAG_1D(plan, TWIST2, i)); starpu_tag_remove(STEP_TAG_1D(plan, FFT2, i)); starpu_tag_remove(STEP_TAG_1D(plan, TWIST3, i)); } starpu_tag_remove(STEP_TAG_1D(plan, END, 0)); } starpu-1.4.9+dfsg/starpufft/src/starpufftx2d.c000066400000000000000000000616311507764646700214240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define DIV_2D_N 8 #define DIV_2D_M 8 #define I_SHIFT (I_BITS/2) #define J_BITS I_SHIFT #define STEP_TAG_2D(plan, step, i, j) _STEP_TAG(plan, step, ((starpu_tag_t) i << I_SHIFT) | (starpu_tag_t) j) #ifdef __STARPU_USE_CUDA /* Twist the full vector into a n2,m2 chunk */ static void STARPUFFT(twist1_2d_kernel_gpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int i = args->i; int j = args->j; int n1 = plan->n1[0]; int n2 = plan->n2[0]; int m1 = plan->n1[1]; int m2 = plan->n2[1]; _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); _cufftComplex * restrict twisted1 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); STARPUFFT(cuda_twist1_2d_host)(in, twisted1, i, j, n1, n2, m1, m2); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } /* fft1: * * Perform one fft of size n2,m2 */ static void STARPUFFT(fft1_2d_plan_gpu)(void *args) { STARPUFFT(plan) plan = args; int n2 = plan->n2[0]; int m2 = plan->n2[1]; int workerid = starpu_worker_get_id_check(); cufftResult cures; cures = cufftPlan2d(&plan->plans[workerid].plan1_cuda, n2, m2, _CUFFT_C2C); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cufftSetStream(plan->plans[workerid].plan1_cuda, starpu_cuda_get_local_stream()); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); } static void STARPUFFT(fft1_2d_kernel_gpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int i = args->i; int j = args->j; int n2 = plan->n2[0]; int m2 = plan->n2[1]; cufftResult cures; _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); const _cufftComplex * restrict roots0 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[2]); const _cufftComplex * restrict roots1 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[3]); int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; cures = _cufftExecC2C(plan->plans[workerid].plan1_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); /* synchronization is done after the twiddling */ STARPUFFT(cuda_twiddle_2d_host)(out, roots0, roots1, n2, m2, i, j); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } /* fft2: * * Perform n3*m3 ffts of size n1,m1 */ static void STARPUFFT(fft2_2d_plan_gpu(void *args)) { STARPUFFT(plan) plan = args; int n1 = plan->n1[0]; int m1 = plan->n1[1]; cufftResult cures; int workerid = starpu_worker_get_id_check(); cures = cufftPlan2d(&plan->plans[workerid].plan2_cuda, n1, m1, _CUFFT_C2C); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cufftSetStream(plan->plans[workerid].plan2_cuda, starpu_cuda_get_local_stream()); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); } static void STARPUFFT(fft2_2d_kernel_gpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int n1 = plan->n1[0]; int n2 = plan->n2[0]; int m1 = plan->n1[1]; int m2 = plan->n2[1]; int n3 = n2/DIV_2D_N; int m3 = m2/DIV_2D_M; int n; cufftResult cures; _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; for (n = 0; n < n3*m3; n++) { cures = _cufftExecC2C(plan->plans[workerid].plan2_cuda, in + n * n1*m1, out + n * n1*m1, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); } cudaStreamSynchronize(starpu_cuda_get_local_stream()); } #endif /* Twist the full vector into a n2,m2 chunk */ static void STARPUFFT(twist1_2d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int i = args->i; int j = args->j; int k, l; int n1 = plan->n1[0]; int n2 = plan->n2[0]; int m1 = plan->n1[1]; int m2 = plan->n2[1]; int m = plan->n[1]; STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); /* printf("twist1 %d %d %g\n", i, j, (double) cabs(plan->in[i+j])); */ for (k = 0; k < n2; k++) for (l = 0; l < m2; l++) twisted1[k*m2+l] = in[i*m+j+k*m*n1+l*m1]; } #ifdef STARPU_HAVE_FFTW /* Perform an n2,m2 fft */ static void STARPUFFT(fft1_2d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int i = args->i; int j = args->j; int k, l; int n2 = plan->n2[0]; int m2 = plan->n2[1]; int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; STARPUFFT(complex) *twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); STARPUFFT(complex) *fft1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); /* printf("fft1 %d %d %g\n", i, j, (double) cabs(twisted1[0])); */ _FFTW(execute_dft)(plan->plans[workerid].plan1_cpu, twisted1, fft1); for (k = 0; k < n2; k++) for (l = 0; l < m2; l++) fft1[k*m2 + l] = fft1[k*m2 + l] * plan->roots[0][i*k] * plan->roots[1][j*l]; } #endif /* Twist the full vector into a package of n2/DIV_2D_N,m2/DIV_2D_M (n1,m1) chunks */ static void STARPUFFT(twist2_2d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int kk = args->kk; /* between 0 and DIV_2D_N */ int ll = args->ll; /* between 0 and DIV_2D_M */ int kkk, lll; /* between 0,0 and n3,m3 */ int i, j; int n1 = plan->n1[0]; int n2 = plan->n2[0]; int m1 = plan->n1[1]; int m2 = plan->n2[1]; int n3 = n2/DIV_2D_N; int m3 = m2/DIV_2D_M; STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); /* printf("twist2 %d %d %g\n", kk, ll, (double) cabs(plan->fft1[kk+ll])); */ for (kkk = 0; kkk < n3; kkk++) { int k = kk * n3 + kkk; for (lll = 0; lll < m3; lll++) { int l = ll * m3 + lll; for (i = 0; i < n1; i++) for (j = 0; j < m1; j++) twisted2[kkk*m3*n1*m1+lll*n1*m1+i*m1+j] = plan->fft1[i*n1*n2*m2+j*n2*m2+k*m2+l]; } } } #ifdef STARPU_HAVE_FFTW /* Perform (n2/DIV_2D_N)*(m2/DIV_2D_M) (n1,m1) ffts */ static void STARPUFFT(fft2_2d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; /* int kk = args->kk; */ /* int ll = args->ll; */ int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; STARPUFFT(complex) *twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); STARPUFFT(complex) *fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); /* printf("fft2 %d %d %g\n", kk, ll, (double) cabs(twisted2[plan->totsize4-1])); */ _FFTW(execute_dft)(plan->plans[workerid].plan2_cpu, twisted2, fft2); } #endif /* Spread the package of (n2/DIV_2D_N)*(m2/DIV_2D_M) (n1,m1) chunks into the full vector */ static void STARPUFFT(twist3_2d_kernel_cpu)(void *descr[], void *_args) { struct STARPUFFT(args) *args = _args; STARPUFFT(plan) plan = args->plan; int kk = args->kk; /* between 0 and DIV_2D_N */ int ll = args->ll; /* between 0 and DIV_2D_M */ int kkk, lll; /* between 0,0 and n3,m3 */ int i, j; int n1 = plan->n1[0]; int n2 = plan->n2[0]; int m1 = plan->n1[1]; int m2 = plan->n2[1]; int n3 = n2/DIV_2D_N; int m3 = m2/DIV_2D_M; int m = plan->n[1]; const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); /* printf("twist3 %d %d %g\n", kk, ll, (double) cabs(fft2[0])); */ for (kkk = 0; kkk < n3; kkk++) { int k = kk * n3 + kkk; for (lll = 0; lll < m3; lll++) { int l = ll * m3 + lll; for (i = 0; i < n1; i++) for (j = 0; j < m1; j++) plan->out[i*n2*m+j*m2+k*m+l] = fft2[kkk*m3*n1*m1+lll*n1*m1+i*m1+j]; } } } struct starpu_perfmodel STARPUFFT(twist1_2d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"twist1_2d" }; struct starpu_perfmodel STARPUFFT(fft1_2d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"fft1_2d" }; struct starpu_perfmodel STARPUFFT(twist2_2d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"twist2_2d" }; struct starpu_perfmodel STARPUFFT(fft2_2d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"fft2_2d" }; struct starpu_perfmodel STARPUFFT(twist3_2d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"twist3_2d" }; static struct starpu_codelet STARPUFFT(twist1_2d_codelet) = { .where = #ifdef __STARPU_USE_CUDA STARPU_CUDA| #endif STARPU_CPU, #ifdef __STARPU_USE_CUDA .cuda_funcs = {STARPUFFT(twist1_2d_kernel_gpu)}, #endif .cpu_funcs = {STARPUFFT(twist1_2d_kernel_cpu)}, CAN_EXECUTE .model = &STARPUFFT(twist1_2d_model), .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .name = "twist1_2d_codelet" }; static struct starpu_codelet STARPUFFT(fft1_2d_codelet) = { .where = #ifdef __STARPU_USE_CUDA STARPU_CUDA| #endif #ifdef STARPU_HAVE_FFTW STARPU_CPU| #endif 0, #ifdef __STARPU_USE_CUDA .cuda_funcs = {STARPUFFT(fft1_2d_kernel_gpu)}, #endif #ifdef STARPU_HAVE_FFTW .cpu_funcs = {STARPUFFT(fft1_2d_kernel_cpu)}, #endif CAN_EXECUTE .model = &STARPUFFT(fft1_2d_model), .nbuffers = 4, .modes = {STARPU_R, STARPU_W, STARPU_R, STARPU_R}, .name = "fft1_2d_codelet" }; static struct starpu_codelet STARPUFFT(twist2_2d_codelet) = { .where = STARPU_CPU, .cpu_funcs = {STARPUFFT(twist2_2d_kernel_cpu)}, CAN_EXECUTE .model = &STARPUFFT(twist2_2d_model), .nbuffers = 1, .modes = {STARPU_W}, .name = "twist2_2d_codelet" }; static struct starpu_codelet STARPUFFT(fft2_2d_codelet) = { .where = #ifdef __STARPU_USE_CUDA STARPU_CUDA| #endif #ifdef STARPU_HAVE_FFTW STARPU_CPU| #endif 0, #ifdef __STARPU_USE_CUDA .cuda_funcs = {STARPUFFT(fft2_2d_kernel_gpu)}, #endif #ifdef STARPU_HAVE_FFTW .cpu_funcs = {STARPUFFT(fft2_2d_kernel_cpu)}, #endif CAN_EXECUTE .model = &STARPUFFT(fft2_2d_model), .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .name = "fft2_2d_codelet" }; static struct starpu_codelet STARPUFFT(twist3_2d_codelet) = { .where = STARPU_CPU, .cpu_funcs = {STARPUFFT(twist3_2d_kernel_cpu)}, CAN_EXECUTE .model = &STARPUFFT(twist3_2d_model), .nbuffers = 1, .modes = {STARPU_R}, .name = "twist3_2d_codelet" }; /* * * Sequential version * */ #ifdef __STARPU_USE_CUDA /* Perform one fft of size n,m */ static void STARPUFFT(fft_2d_plan_gpu)(void *args) { STARPUFFT(plan) plan = args; cufftResult cures; int n = plan->n[0]; int m = plan->n[1]; int workerid = starpu_worker_get_id_check(); cures = cufftPlan2d(&plan->plans[workerid].plan_cuda, n, m, _CUFFT_C2C); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream()); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); } static void STARPUFFT(fft_2d_kernel_gpu)(void *descr[], void *args) { STARPUFFT(plan) plan = args; cufftResult cures; _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; cures = _cufftExecC2C(plan->plans[workerid].plan_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_HAVE_FFTW /* Perform one fft of size n,m */ static void STARPUFFT(fft_2d_kernel_cpu)(void *descr[], void *_args) { STARPUFFT(plan) plan = _args; int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); STARPUFFT(complex) * restrict out = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); _FFTW(execute_dft)(plan->plans[workerid].plan_cpu, in, out); } #endif static struct starpu_perfmodel STARPUFFT(fft_2d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"fft_2d" }; static struct starpu_codelet STARPUFFT(fft_2d_codelet) = { .where = #ifdef __STARPU_USE_CUDA STARPU_CUDA| #endif #ifdef STARPU_HAVE_FFTW STARPU_CPU| #endif 0, #ifdef __STARPU_USE_CUDA .cuda_funcs = {STARPUFFT(fft_2d_kernel_gpu)}, #endif #ifdef STARPU_HAVE_FFTW .cpu_funcs = {STARPUFFT(fft_2d_kernel_cpu)}, #endif CAN_EXECUTE .model = &STARPUFFT(fft_2d_model), .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .name = "fft_2d_codelet" }; STARPUFFT(plan) STARPUFFT(plan_dft_2d)(int n, int m, int sign, unsigned flags) { unsigned workerid; int n1 = DIV_2D_N; int n2 = n / n1; int n3; int m1 = DIV_2D_M; int m2 = m / m1; int m3; int z; struct starpu_task *task; if (PARALLEL) { /* * Simple strategy: * * - twist1: twist input in n1*m1 (n2,m2) chunks * - fft1: perform n1*m1 (n2,m2) ffts * - twist2: twist into n2*m2 (n1,m1) chunks distributed in * DIV_2D_N*DIV_2D_M groups * - fft2: perform DIV_2D_N*DIV_2D_M times n3*m3 (n1,m1) ffts * - twist3: twist back into output */ #ifdef __STARPU_USE_CUDA /* cufft 2D-3D limited to [2,16384] */ while (n2 > 16384) { n1 *= 2; n2 /= 2; } #endif STARPU_ASSERT(n == n1*n2); STARPU_ASSERT((unsigned long long) n1 < (1ULL << J_BITS)); #ifdef __STARPU_USE_CUDA /* cufft 2D-3D limited to [2,16384] */ while (m2 > 16384) { m1 *= 2; m2 /= 2; } #endif STARPU_ASSERT(m == m1*m2); STARPU_ASSERT((unsigned long long) m1 < (1ULL << J_BITS)); /* distribute the n2*m2 second ffts into DIV_2D_N*DIV_2D_M packages */ n3 = n2 / DIV_2D_N; STARPU_ASSERT(n2 == n3*DIV_2D_N); m3 = m2 / DIV_2D_M; STARPU_ASSERT(m2 == m3*DIV_2D_M); } /* TODO: flags? Automatically set FFTW_MEASURE on calibration? */ STARPU_ASSERT(flags == 0); STARPUFFT(plan) plan = malloc(sizeof(*plan)); memset(plan, 0, sizeof(*plan)); if (PARALLEL) { plan->number = STARPU_ATOMIC_ADD(&starpufft_last_plan_number, 1) - 1; /* 4bit limitation in the tag space */ STARPU_ASSERT((unsigned long long) plan->number < (1ULL << NUMBER_BITS)); } plan->dim = 2; plan->n = malloc(plan->dim * sizeof(*plan->n)); plan->n[0] = n; plan->n[1] = m; if (PARALLEL) { check_dims(plan); plan->n1 = malloc(plan->dim * sizeof(*plan->n1)); plan->n1[0] = n1; plan->n1[1] = m1; plan->n2 = malloc(plan->dim * sizeof(*plan->n2)); plan->n2[0] = n2; plan->n2[1] = m2; } plan->totsize = n * m; if (PARALLEL) { plan->totsize1 = n1 * m1; plan->totsize2 = n2 * m2; plan->totsize3 = DIV_2D_N * DIV_2D_M; plan->totsize4 = plan->totsize / plan->totsize3; } plan->type = C2C; plan->sign = sign; if (PARALLEL) { /* Compute the w^k just once. */ compute_roots(plan); } /* Initialize per-worker working set */ for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { switch (starpu_worker_get_type(workerid)) { case STARPU_CPU_WORKER: #ifdef STARPU_HAVE_FFTW if (PARALLEL) { /* first fft plan: one n2*m2 fft */ plan->plans[workerid].plan1_cpu = _FFTW(plan_dft_2d)(n2, m2, NULL, (void*) 1, sign, _FFTW_FLAGS); STARPU_ASSERT(plan->plans[workerid].plan1_cpu); /* second fft plan: n3*m3 n1*m1 ffts */ plan->plans[workerid].plan2_cpu = _FFTW(plan_many_dft)(plan->dim, plan->n1, n3*m3, NULL, NULL, 1, plan->totsize1, (void*) 1, NULL, 1, plan->totsize1, sign, _FFTW_FLAGS); STARPU_ASSERT(plan->plans[workerid].plan2_cpu); } else { /* fft plan: one fft of size n, m. */ plan->plans[workerid].plan_cpu = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, sign, _FFTW_FLAGS); STARPU_ASSERT(plan->plans[workerid].plan_cpu); } #else /* #warning libstarpufft can not work correctly if libfftw3 is not installed */ #endif break; case STARPU_CUDA_WORKER: break; default: /* Do not care, we won't be executing anything there. */ break; } } #ifdef __STARPU_USE_CUDA if (PARALLEL) { starpu_execute_on_each_worker(STARPUFFT(fft1_2d_plan_gpu), plan, STARPU_CUDA); starpu_execute_on_each_worker(STARPUFFT(fft2_2d_plan_gpu), plan, STARPU_CUDA); } else { starpu_execute_on_each_worker(STARPUFFT(fft_2d_plan_gpu), plan, STARPU_CUDA); } #endif if (PARALLEL) { /* Allocate buffers. */ plan->twisted1_size = plan->totsize * sizeof(*plan->twisted1); plan->twisted1 = STARPUFFT(malloc)(plan->twisted1_size); memset(plan->twisted1, 0, plan->twisted1_size); plan->fft1_size = plan->totsize * sizeof(*plan->fft1); plan->fft1 = STARPUFFT(malloc)(plan->fft1_size); memset(plan->fft1, 0, plan->fft1_size); plan->twisted2_size = plan->totsize * sizeof(*plan->twisted2); plan->twisted2 = STARPUFFT(malloc)(plan->twisted2_size); memset(plan->twisted2, 0, plan->twisted2_size); plan->fft2_size = plan->totsize * sizeof(*plan->fft2); plan->fft2 = STARPUFFT(malloc)(plan->fft2_size); memset(plan->fft2, 0, plan->fft2_size); /* Allocate handle arrays */ plan->twisted1_handle = malloc(plan->totsize1 * sizeof(*plan->twisted1_handle)); plan->fft1_handle = malloc(plan->totsize1 * sizeof(*plan->fft1_handle)); plan->twisted2_handle = malloc(plan->totsize3 * sizeof(*plan->twisted2_handle)); plan->fft2_handle = malloc(plan->totsize3 * sizeof(*plan->fft2_handle)); /* Allocate task arrays */ plan->twist1_tasks = malloc(plan->totsize1 * sizeof(*plan->twist1_tasks)); plan->fft1_tasks = malloc(plan->totsize1 * sizeof(*plan->fft1_tasks)); plan->twist2_tasks = malloc(plan->totsize3 * sizeof(*plan->twist2_tasks)); plan->fft2_tasks = malloc(plan->totsize3 * sizeof(*plan->fft2_tasks)); plan->twist3_tasks = malloc(plan->totsize3 * sizeof(*plan->twist3_tasks)); /* Allocate codelet argument arrays */ plan->fft1_args = malloc(plan->totsize1 * sizeof(*plan->fft1_args)); plan->fft2_args = malloc(plan->totsize3 * sizeof(*plan->fft2_args)); /* Create first-round tasks */ for (z = 0; z < plan->totsize1; z++) { int i = z / m1, j = z % m1; #define STEP_TAG(step) STEP_TAG_2D(plan, step, i, j) /* TODO: get rid of tags */ plan->fft1_args[z].plan = plan; plan->fft1_args[z].i = i; plan->fft1_args[z].j = j; /* Register (n2,m2) chunks */ starpu_vector_data_register(&plan->twisted1_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1)); starpu_vector_data_register(&plan->fft1_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1)); /* We'll need it on the CPU for the second twist anyway */ starpu_data_set_wt_mask(plan->fft1_handle[z], 1<<0); /* Create twist1 task */ plan->twist1_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(twist1_2d_codelet); /* task->handles[0] = to be filled at execution */ task->handles[1] = plan->twisted1_handle[z]; task->cl_arg = &plan->fft1_args[z]; task->tag_id = STEP_TAG(TWIST1); task->use_tag = 1; task->destroy = 0; /* Tell that fft1 depends on twisted1 */ starpu_tag_declare_deps(STEP_TAG(FFT1), 1, STEP_TAG(TWIST1)); /* Create FFT1 task */ plan->fft1_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(fft1_2d_codelet); task->handles[0] = plan->twisted1_handle[z]; task->handles[1] = plan->fft1_handle[z]; task->handles[2] = plan->roots_handle[0]; task->handles[3] = plan->roots_handle[1]; task->cl_arg = &plan->fft1_args[z]; task->tag_id = STEP_TAG(FFT1); task->use_tag = 1; task->destroy = 0; /* Tell that to be done with first step we need to have * finished this fft1 */ starpu_tag_declare_deps(STEP_TAG_2D(plan, JOIN, 0, 0), 1, STEP_TAG(FFT1)); #undef STEP_TAG } /* Create join task */ plan->join_task = task = starpu_task_create(); task->cl = NULL; task->tag_id = STEP_TAG_2D(plan, JOIN, 0, 0); task->use_tag = 1; task->destroy = 0; /* Create second-round tasks */ for (z = 0; z < plan->totsize3; z++) { int kk = z / DIV_2D_M, ll = z % DIV_2D_M; #define STEP_TAG(step) STEP_TAG_2D(plan, step, kk, ll) plan->fft2_args[z].plan = plan; plan->fft2_args[z].kk = kk; plan->fft2_args[z].ll = ll; /* Register n3*m3 (n1,m1) chunks */ starpu_vector_data_register(&plan->twisted2_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2)); starpu_vector_data_register(&plan->fft2_handle[z], STARPU_MAIN_RAM, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2)); /* We'll need it on the CPU for the last twist anyway */ starpu_data_set_wt_mask(plan->fft2_handle[z], 1<<0); /* Tell that twisted2 depends on the whole first step to be * done */ starpu_tag_declare_deps(STEP_TAG(TWIST2), 1, STEP_TAG_2D(plan, JOIN, 0, 0)); /* Create twist2 task */ plan->twist2_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(twist2_2d_codelet); task->handles[0] = plan->twisted2_handle[z]; task->cl_arg = &plan->fft2_args[z]; task->tag_id = STEP_TAG(TWIST2); task->use_tag = 1; task->destroy = 0; /* Tell that fft2 depends on twisted2 */ starpu_tag_declare_deps(STEP_TAG(FFT2), 1, STEP_TAG(TWIST2)); /* Create FFT2 task */ plan->fft2_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(fft2_2d_codelet); task->handles[0] = plan->twisted2_handle[z]; task->handles[1] = plan->fft2_handle[z]; task->cl_arg = &plan->fft2_args[z]; task->tag_id = STEP_TAG(FFT2); task->use_tag = 1; task->destroy = 0; /* Tell that twist3 depends on fft2 */ starpu_tag_declare_deps(STEP_TAG(TWIST3), 1, STEP_TAG(FFT2)); /* Create twist3 tasks */ /* These run only on CPUs and thus write directly into the * application output buffer. */ plan->twist3_tasks[z] = task = starpu_task_create(); task->cl = &STARPUFFT(twist3_2d_codelet); task->handles[0] = plan->fft2_handle[z]; task->cl_arg = &plan->fft2_args[z]; task->tag_id = STEP_TAG(TWIST3); task->use_tag = 1; task->destroy = 0; /* Tell that to be completely finished we need to have finished this twisted3 */ starpu_tag_declare_deps(STEP_TAG_2D(plan, END, 0, 0), 1, STEP_TAG(TWIST3)); #undef STEP_TAG } /* Create end task */ plan->end_task = task = starpu_task_create(); task->cl = NULL; task->tag_id = STEP_TAG_2D(plan, END, 0, 0); task->use_tag = 1; task->destroy = 0; task->detach = 0; } return plan; } /* Actually submit all the tasks. */ static struct starpu_task * STARPUFFT(start2dC2C)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) { STARPU_ASSERT(plan->type == C2C); int z; int ret; if (PARALLEL) { for (z=0; z < plan->totsize1; z++) { ret = starpu_task_submit(plan->twist1_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(plan->fft1_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_submit(plan->join_task); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); for (z=0; z < plan->totsize3; z++) { ret = starpu_task_submit(plan->twist2_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(plan->fft2_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(plan->twist3_tasks[z]); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_submit(plan->end_task); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return plan->end_task; } else /* !PARALLEL */ { struct starpu_task *task; /* Create FFT task */ task = starpu_task_create(); task->detach = 0; task->cl = &STARPUFFT(fft_2d_codelet); task->handles[0] = in; task->handles[1] = out; task->cl_arg = plan; ret = starpu_task_submit(task); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return task; } } /* Free all the tags. The generic code handles freeing the buffers. */ static void STARPUFFT(free_2d_tags)(STARPUFFT(plan) plan) { int i, j; int n1 = plan->n1[0]; int m1 = plan->n1[1]; if (!PARALLEL) return; for (i = 0; i < n1; i++) { for (j = 0; j < m1; j++) { starpu_tag_remove(STEP_TAG_2D(plan, TWIST1, i, j)); starpu_tag_remove(STEP_TAG_2D(plan, FFT1, i, j)); } } starpu_tag_remove(STEP_TAG_2D(plan, JOIN, 0, 0)); for (i = 0; i < DIV_2D_N; i++) { for (j = 0; j < DIV_2D_M; j++) { starpu_tag_remove(STEP_TAG_2D(plan, TWIST2, i, j)); starpu_tag_remove(STEP_TAG_2D(plan, FFT2, i, j)); starpu_tag_remove(STEP_TAG_2D(plan, TWIST3, i, j)); } } starpu_tag_remove(STEP_TAG_2D(plan, END, 0, 0)); } starpu-1.4.9+dfsg/starpufft/src/starpufftx3d.c000066400000000000000000000114041507764646700214160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * * Sequential version * */ #ifdef __STARPU_USE_CUDA /* Perform one fft of size n,m */ static void STARPUFFT(fft_3d_plan_gpu)(void *args) { STARPUFFT(plan) plan = args; cufftResult cures; int n = plan->n[0]; int m = plan->n[1]; int p = plan->n[2]; int workerid = starpu_worker_get_id_check(); cures = cufftPlan3d(&plan->plans[workerid].plan_cuda, n, m, p, _CUFFT_C2C); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream()); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); } static void STARPUFFT(fft_3d_kernel_gpu)(void *descr[], void *args) { STARPUFFT(plan) plan = args; cufftResult cures; _cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]); _cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]); int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; cures = _cufftExecC2C(plan->plans[workerid].plan_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE); if (cures != CUFFT_SUCCESS) STARPU_CUFFT_REPORT_ERROR(cures); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_HAVE_FFTW /* Perform one fft of size n,m */ static void STARPUFFT(fft_3d_kernel_cpu)(void *descr[], void *_args) { STARPUFFT(plan) plan = _args; int workerid = starpu_worker_get_id_check(); task_per_worker[workerid]++; STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]); STARPUFFT(complex) * restrict out = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]); _FFTW(execute_dft)(plan->plans[workerid].plan_cpu, in, out); } #endif static struct starpu_perfmodel STARPUFFT(fft_3d_model) = { .type = STARPU_HISTORY_BASED, .symbol = TYPE"fft_3d" }; static struct starpu_codelet STARPUFFT(fft_3d_codelet) = { .where = #ifdef __STARPU_USE_CUDA STARPU_CUDA| #endif #ifdef STARPU_HAVE_FFTW STARPU_CPU| #endif 0, #ifdef __STARPU_USE_CUDA .cuda_funcs = {STARPUFFT(fft_3d_kernel_gpu)}, #endif #ifdef STARPU_HAVE_FFTW .cpu_funcs = {STARPUFFT(fft_3d_kernel_cpu)}, #endif CAN_EXECUTE .model = &STARPUFFT(fft_3d_model), .nbuffers = 2, .modes = {STARPU_R, STARPU_W}, .name = "fft_3d_codelet" }; STARPUFFT(plan) STARPUFFT(plan_dft_3d)(int n, int m, int p, int sign, unsigned flags) { unsigned workerid; if (PARALLEL) { /* TODO */ STARPU_ASSERT(0); } /* TODO: flags? Automatically set FFTW_MEASURE on calibration? */ STARPU_ASSERT(flags == 0); STARPUFFT(plan) plan = malloc(sizeof(*plan)); memset(plan, 0, sizeof(*plan)); plan->dim = 3; plan->n = malloc(plan->dim * sizeof(*plan->n)); plan->n[0] = n; plan->n[1] = m; plan->n[2] = p; plan->totsize = n * m; plan->type = C2C; plan->sign = sign; /* Initialize per-worker working set */ for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { switch (starpu_worker_get_type(workerid)) { case STARPU_CPU_WORKER: #ifdef STARPU_HAVE_FFTW /* fft plan: one fft of size n, m. */ plan->plans[workerid].plan_cpu = _FFTW(plan_dft_3d)(n, m, p, NULL, (void*) 1, sign, _FFTW_FLAGS); STARPU_ASSERT(plan->plans[workerid].plan_cpu); #else /* #warning libstarpufft can not work correctly if libfftw3 is not installed */ #endif break; case STARPU_CUDA_WORKER: break; default: /* Do not care, we won't be executing anything there. */ break; } } #ifdef __STARPU_USE_CUDA starpu_execute_on_each_worker(STARPUFFT(fft_3d_plan_gpu), plan, STARPU_CUDA); #endif return plan; } /* Actually submit all the tasks. */ static struct starpu_task * STARPUFFT(start3dC2C)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out) { STARPU_ASSERT(plan->type == C2C); int ret; if (PARALLEL) { /* TODO */ STARPU_ASSERT(0); } else /* !PARALLEL */ { struct starpu_task *task; /* Create FFT task */ task = starpu_task_create(); task->detach = 0; task->cl = &STARPUFFT(fft_3d_codelet); task->handles[0] = in; task->handles[1] = out; task->cl_arg = plan; ret = starpu_task_submit(task); if (ret == -ENODEV) return NULL; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); return task; } } starpu-1.4.9+dfsg/starpufft/tests/000077500000000000000000000000001507764646700171705ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpufft/tests/Makefile.am000066400000000000000000000037031507764646700212270ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk include $(top_srcdir)/make/starpu-loader.mk CLEANFILES = starpu_idle_microsec.log examplebindir = $(libdir)/starpu/examples/starpufft EXTRA_DIST = \ testx.c \ testx_threads.c \ testf_threads.c \ test_threads.c check_PROGRAMS = $(STARPU_FFT_EXAMPLES) AM_CFLAGS += $(APP_CFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/starpufft/include -I$(top_srcdir)/starpufft/src $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ ../src/libstarpufft-@STARPU_EFFECTIVE_VERSION@.la $(STARPU_EXPORTED_LIBS) LIBS += $(STARPU_CUDA_LDFLAGS) examplebin_PROGRAMS = examplebin_PROGRAMS += \ testf \ test STARPU_FFT_EXAMPLES = testf testf_LDADD = $(FFTWF_LIBS) # If we don't have CUDA, we assume that we have fftw available in double # precision anyway, we just want to make sure that if CUFFT is used, it also # supports double precision. if !STARPU_USE_CUDA STARPU_FFT_EXAMPLES += test else if STARPU_HAVE_CUFFTDOUBLECOMPLEX STARPU_FFT_EXAMPLES += test endif endif test_LDADD = $(FFTW_LIBS) TESTS = $(STARPU_FFT_EXAMPLES) #check_PROGRAMS += examples/test_threads examples/testf_threads #examples_test_threads_LDADD = -lfftw3_threads #examples_testf_threads_LDADD = -lfftw3f_threads starpu-1.4.9+dfsg/starpufft/tests/Makefile.in000066400000000000000000001612111507764646700212370ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_4) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader check_PROGRAMS = $(am__EXEEXT_3) examplebin_PROGRAMS = testf$(EXEEXT) test$(EXEEXT) # If we don't have CUDA, we assume that we have fftw available in double # precision anyway, we just want to make sure that if CUFFT is used, it also # supports double precision. @STARPU_USE_CUDA_FALSE@am__append_8 = test @STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE@@STARPU_USE_CUDA_TRUE@am__append_9 = test TESTS = $(am__EXEEXT_3) subdir = starpufft/tests ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = @STARPU_USE_CUDA_FALSE@am__EXEEXT_1 = test$(EXEEXT) @STARPU_HAVE_CUFFTDOUBLECOMPLEX_TRUE@@STARPU_USE_CUDA_TRUE@am__EXEEXT_2 = test$(EXEEXT) am__EXEEXT_3 = testf$(EXEEXT) $(am__EXEEXT_1) $(am__EXEEXT_2) am__installdirs = "$(DESTDIR)$(examplebindir)" @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_4 = loader$(EXEEXT) PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = test_SOURCES = test.c test_OBJECTS = test.$(OBJEXT) am__DEPENDENCIES_1 = test_DEPENDENCIES = $(am__DEPENDENCIES_1) testf_SOURCES = testf.c testf_OBJECTS = testf.$(OBJEXT) testf_DEPENDENCIES = $(am__DEPENDENCIES_1) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po ./$(DEPDIR)/test.Po \ ./$(DEPDIR)/testf.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = loader.c test.c testf.c DIST_SOURCES = loader.c test.c testf.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) AM_RECURSIVE_TARGETS = check recheck TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ ../src/libstarpufft-@STARPU_EFFECTIVE_VERSION@.la \ $(STARPU_EXPORTED_LIBS) $(STARPU_CUDA_LDFLAGS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(am__append_4) $(am__append_6) LAUNCHER = $(am__append_3) $(am__append_5) AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 CLEANFILES = starpu_idle_microsec.log examplebindir = $(libdir)/starpu/examples/starpufft EXTRA_DIST = \ testx.c \ testx_threads.c \ testf_threads.c \ test_threads.c AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include -I$(top_srcdir)/starpufft/include -I$(top_srcdir)/starpufft/src $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ STARPU_FFT_EXAMPLES = testf $(am__append_8) $(am__append_9) testf_LDADD = $(FFTWF_LIBS) test_LDADD = $(FFTW_LIBS) all: all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpufft/tests/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpufft/tests/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) test$(EXEEXT): $(test_OBJECTS) $(test_DEPENDENCIES) $(EXTRA_test_DEPENDENCIES) @rm -f test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(test_OBJECTS) $(test_LDADD) $(LIBS) testf$(EXEEXT): $(testf_OBJECTS) $(testf_DEPENDENCIES) $(EXTRA_testf_DEPENDENCIES) @rm -f testf$(EXEEXT) $(AM_V_CCLD)$(LINK) $(testf_OBJECTS) $(testf_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testf.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? testf.log: testf$(EXEEXT) @p='testf$(EXEEXT)'; \ b='testf'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) test.log: test$(EXEEXT) @p='test$(EXEEXT)'; \ b='test'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-am all-am: Makefile $(PROGRAMS) installdirs: for dir in "$(DESTDIR)$(examplebindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f ./$(DEPDIR)/test.Po -rm -f ./$(DEPDIR)/testf.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-examplebinPROGRAMS install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f ./$(DEPDIR)/test.Po -rm -f ./$(DEPDIR)/testf.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-examplebinPROGRAMS .MAKE: check-am install-am install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ check-am clean clean-checkPROGRAMS clean-examplebinPROGRAMS \ clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am \ install-examplebinPROGRAMS install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-examplebinPROGRAMS .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS #check_PROGRAMS += examples/test_threads examples/testf_threads #examples_test_threads_LDADD = -lfftw3_threads #examples_testf_threads_LDADD = -lfftw3f_threads # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpufft/tests/loader.c000066400000000000000000000274611507764646700206140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/starpufft/tests/test.c000066400000000000000000000013401507764646700203110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpufft-double.h" #include "testx.c" starpu-1.4.9+dfsg/starpufft/tests/test_threads.c000066400000000000000000000013501507764646700220240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpufft-double.h" #include "testx_threads.c" starpu-1.4.9+dfsg/starpufft/tests/testf.c000066400000000000000000000013371507764646700204650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpufft-float.h" #include "testx.c" starpu-1.4.9+dfsg/starpufft/tests/testf_threads.c000066400000000000000000000013471507764646700222000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "starpufft-float.h" #include "testx_threads.c" starpu-1.4.9+dfsg/starpufft/tests/testx.c000066400000000000000000000201101507764646700204750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include "starpufft.h" #undef STARPU_USE_CUDA #ifdef STARPU_HAVE_FFTW #include #endif #ifdef STARPU_USE_CUDA #include #endif #define SIGN (-1) /* #define SIGN (1) */ #ifdef STARPU_HAVE_FFTW static void check_fftw(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, int size) { int i; double max = 0., tot = 0., norm = 0., normdiff = 0.; for (i = 0; i < size; i++) { double diff = cabs(out[i]-out_fftw[i]); double diff2 = diff * diff; double dsize = cabs(out_fftw[i]); double size2 = dsize * dsize; if (diff > max) max = diff; tot += diff; normdiff += diff2; norm += size2; } fprintf(stderr, "\nmaximum difference %g\n", max); fprintf(stderr, "average difference %g\n", tot / size); fprintf(stderr, "difference norm %g\n", sqrt(normdiff)); double relmaxdiff = max / sqrt(norm); fprintf(stderr, "relative maximum difference %g\n", relmaxdiff); double relavgdiff = (tot / size) / sqrt(norm); fprintf(stderr, "relative average difference %g\n", relavgdiff); if (!strcmp(TYPE, "f") && (relmaxdiff > 1e-7 || relavgdiff > 1e-7)) { fprintf(stderr, "Failure: Difference too big (TYPE f)\n"); exit(EXIT_FAILURE); } if (!strcmp(TYPE, "") && (relmaxdiff > 1e-16 || relavgdiff > 1e-16)) { fprintf(stderr, "Failure: Difference too big\n"); exit(EXIT_FAILURE); } } #endif #ifdef STARPU_USE_CUDA static void check_cuda(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, int size) { int i; double max = 0., tot = 0., norm = 0., normdiff = 0.; for (i = 0; i < size; i++) { double diff = cabs(out_cuda[i]-out_fftw[i]); double diff2 = diff * diff; double size = cabs(out_fftw[i]); double size2 = size * size; if (diff > max) max = diff; tot += diff; normdiff += diff2; norm += size2; } fprintf(stderr, "\nmaximum difference %g\n", max); fprintf(stderr, "average difference %g\n", tot / size); fprintf(stderr, "difference norm %g\n", sqrt(normdiff)); double relmaxdiff = max / sqrt(norm); fprintf(stderr, "relative maximum difference %g\n", relmaxdiff); double relavgdiff = (tot / size) / sqrt(norm); fprintf(stderr, "relative average difference %g\n", relavgdiff); if (!strcmp(TYPE, "f") && (relmaxdiff > 1e-8 || relavgdiff > 1e-8)) exit(EXIT_FAILURE); if (!strcmp(TYPE, "") && (relmaxdiff > 1e-16 || relavgdiff > 1e-16)) exit(EXIT_FAILURE); } #endif int main(int argc, char *argv[]) { int i, ret; int size; int n = 0, m = 0, p = 0; STARPUFFT(plan) plan; starpu_data_handle_t in_handle, out_handle; #ifdef STARPU_HAVE_FFTW _FFTW(plan) fftw_plan; #endif #ifdef STARPU_USE_CUDA cufftHandle cuda_plan; cudaError_t cures; #endif #if defined(STARPU_HAVE_FFTW) || defined(STARPU_USE_CUDA) struct timeval begin, end; double timing; size_t bytes; #endif struct starpu_conf conf; starpu_conf_init(&conf); /* FIXME: the testcase needs to be updated to properly support cuFFT */ conf.ncuda = 0; ret = starpu_init(&conf); ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (argc == 1) { n = 42; /* 1D */ size = n; } else if (argc == 2) { n = atoi(argv[1]); /* 1D */ size = n; } else if (argc == 3) { n = atoi(argv[1]); m = atoi(argv[2]); /* 2D */ size = n * m; } else if (argc == 4) { n = atoi(argv[1]); m = atoi(argv[2]); p = atoi(argv[3]); /* 3D */ size = n * m * p; } else { assert(0); } #if defined(STARPU_HAVE_FFTW) || defined(STARPU_USE_CUDA) bytes = size * sizeof(STARPUFFT(complex)); #endif STARPUFFT(complex) *in_orig = STARPUFFT(malloc)(size * sizeof(*in_orig)); starpu_srand48(0); for (i = 0; i < size; i++) in_orig[i] = starpu_drand48() + I * starpu_drand48(); STARPUFFT(complex) *in = STARPUFFT(malloc)(size * sizeof(*in)); STARPUFFT(complex) *out = STARPUFFT(malloc)(size * sizeof(*out)); #ifdef STARPU_HAVE_FFTW STARPUFFT(complex) *out_fftw = STARPUFFT(malloc)(size * sizeof(*out_fftw)); #endif #ifdef STARPU_USE_CUDA STARPUFFT(complex) *out_cuda = STARPUFFT(malloc)(size * sizeof(*out_cuda)); #endif if (argc <= 2) { plan = STARPUFFT(plan_dft_1d)(n, SIGN, 0); #ifdef STARPU_HAVE_FFTW fftw_plan = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, SIGN, FFTW_ESTIMATE); #endif #ifdef STARPU_USE_CUDA if (cufftPlan1d(&cuda_plan, n, _CUFFT_C2C, 1) != CUFFT_SUCCESS) printf("erf\n"); #endif } else if (argc == 3) { plan = STARPUFFT(plan_dft_2d)(n, m, SIGN, 0); #ifdef STARPU_HAVE_FFTW fftw_plan = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, SIGN, FFTW_ESTIMATE); #endif #ifdef STARPU_USE_CUDA STARPU_ASSERT(cufftPlan2d(&cuda_plan, n, m, _CUFFT_C2C) == CUFFT_SUCCESS); #endif } else if (argc == 4) { plan = STARPUFFT(plan_dft_3d)(n, m, p, SIGN, 0); #ifdef STARPU_HAVE_FFTW fftw_plan = _FFTW(plan_dft_3d)(n, m, p, NULL, (void*) 1, SIGN, FFTW_ESTIMATE); #endif #ifdef STARPU_USE_CUDA STARPU_ASSERT(cufftPlan3d(&cuda_plan, n, m, p, _CUFFT_C2C) == CUFFT_SUCCESS); #endif } else { assert(0); } #ifdef STARPU_HAVE_FFTW memcpy(in, in_orig, size * sizeof(*in)); gettimeofday(&begin, NULL); _FFTW(execute_dft)(fftw_plan, in, out_fftw); gettimeofday(&end, NULL); _FFTW(destroy_plan)(fftw_plan); timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)); printf("FFTW took %2.2f ms (%2.2f MB/s)\n\n", timing/1000, bytes/timing); #endif #ifdef STARPU_USE_CUDA memcpy(in, in_orig, size * sizeof(*in)); gettimeofday(&begin, NULL); if (cufftExecC2C(cuda_plan, (cufftComplex*) in, (cufftComplex*) out_cuda, CUFFT_FORWARD) != CUFFT_SUCCESS) printf("erf2\n"); if ((cures = cudaDeviceSynchronize()) != cudaSuccess) STARPU_CUDA_REPORT_ERROR(cures); gettimeofday(&end, NULL); cufftDestroy(cuda_plan); timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)); printf("CUDA took %2.2f ms (%2.2f MB/s)\n\n", timing/1000, bytes/timing); #endif memcpy(in, in_orig, size * sizeof(*in)); ret = STARPUFFT(execute)(plan, in, out); if (ret == -1) return 77; STARPUFFT(showstats)(stdout); #ifdef STARPU_HAVE_FFTW check_fftw(out, out_fftw, size); #endif #ifdef STARPU_USE_CUDA check_cuda(out, out_cuda, size); #endif #if 1 memcpy(in, in_orig, size * sizeof(*in)); starpu_vector_data_register(&in_handle, STARPU_MAIN_RAM, (uintptr_t) in, size, sizeof(*in)); starpu_vector_data_register(&out_handle, STARPU_MAIN_RAM, (uintptr_t) out, size, sizeof(*out)); ret = STARPUFFT(execute_handle)(plan, in_handle, out_handle); if (ret == -1) return 77; starpu_data_unregister(in_handle); starpu_data_unregister(out_handle); #ifdef STARPU_HAVE_FFTW check_fftw(out, out_fftw, size); #endif #ifdef STARPU_USE_CUDA check_cuda(out, out_cuda, size); #endif #endif STARPUFFT(showstats)(stdout); STARPUFFT(destroy_plan)(plan); printf("\n"); #if 0 for (i = 0; i < 16; i++) printf("(%f,%f) ", cimag(in[i]), creal(in[i])); printf("\n\n"); for (i = 0; i < 16; i++) printf("(%f,%f) ", cimag(out[i]), creal(out[i])); printf("\n\n"); #ifdef STARPU_HAVE_FFTW for (i = 0; i < 16; i++) printf("(%f,%f) ", cimag(out_fftw[i]), creal(out_fftw[i])); printf("\n\n"); #endif #endif STARPUFFT(free)(in_orig, size * sizeof(*in_orig)); STARPUFFT(free)(in, size * sizeof(*in)); STARPUFFT(free)(out, size * sizeof(*out)); #ifdef STARPU_HAVE_FFTW STARPUFFT(free)(out_fftw, size * sizeof(*out_fftw)); #endif #ifdef STARPU_USE_CUDA free(out_cuda); #endif starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/starpufft/tests/testx_threads.c000066400000000000000000000045701507764646700222230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include "starpufft.h" #include #define SIGN (-1) /* #define SIGN (1) */ int main(int argc, char *argv[]) { int i; struct timeval begin, end; int size; size_t bytes; int n = 0, m = 0; _FFTW(plan) fftw_plan; double timing; char *num; int num_threads = 1; _FFTW(init_threads)(); num = getenv("NUM_THREADS"); if (num) num_threads = atoi(num); _FFTW(plan_with_nthreads)(num_threads); if (argc < 2 || argc > 3) { fprintf(stderr,"need one or two size of vector\n"); exit(EXIT_FAILURE); } if (argc == 2) { n = atoi(argv[1]); /* 1D */ size = n; } else if (argc == 3) { n = atoi(argv[1]); m = atoi(argv[2]); /* 2D */ size = n * m; } else { assert(0); } bytes = size * sizeof(_FFTW(complex)); _FFTW(complex) *in = _FFTW(malloc)(size * sizeof(*in)); starpu_srand48(0); for (i = 0; i < size; i++) in[i] = starpu_drand48() + I * starpu_drand48(); _FFTW(complex) *out_fftw = _FFTW(malloc)(size * sizeof(*out_fftw)); if (argc == 2) { fftw_plan = _FFTW(plan_dft_1d)(n, in, out_fftw, SIGN, FFTW_ESTIMATE); } else if (argc == 3) { fftw_plan = _FFTW(plan_dft_2d)(n, m, in, out_fftw, SIGN, FFTW_ESTIMATE); } else { assert(0); } gettimeofday(&begin, NULL); _FFTW(execute)(fftw_plan); gettimeofday(&end, NULL); _FFTW(destroy_plan)(fftw_plan); timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)); printf("FFTW with %d threads took %2.2f ms (%2.2f MB/s)\n\n", num_threads, timing/1000, bytes/(timing*num_threads)); printf("\n"); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/starpupy/000077500000000000000000000000001507764646700156775ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpupy/Makefile.am000066400000000000000000000014131507764646700177320ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-subdirtests.mk SUBDIRS = src SUBDIRS += examples SUBDIRS += benchmark starpu-1.4.9+dfsg/starpupy/Makefile.in000066400000000000000000000665251507764646700177620ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = starpupy ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = execute.sh CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/execute.sh.in \ $(top_srcdir)/make/starpu-subdirtests.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = src examples benchmark all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpupy/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpupy/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): execute.sh: $(top_builddir)/config.status $(srcdir)/execute.sh.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am tags tags-am uninstall uninstall-am .PRECIOUS: Makefile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpupy/benchmark/000077500000000000000000000000001507764646700176315ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpupy/benchmark/Makefile.am000066400000000000000000000023111507764646700216620ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo EXTRA_DIST = \ tasks_size_overhead.py \ tasks_size_overhead.sh \ tasks_size_overhead.gp \ test_handle_perf.py \ test_handle_perf.sh \ test_handle_perf_pickle.py \ test_handle_perf_pickle.sh \ test_handle_bench.py \ handle_perf_plot_pickle.py \ handle_perf_plot.py python_sourcesdir = $(libdir)/starpu/python dist_python_sources_DATA = \ tasks_size_overhead.py \ test_handle_perf.py \ test_handle_perf_pickle.py TESTS = starpu-1.4.9+dfsg/starpupy/benchmark/Makefile.in000066400000000000000000001374601507764646700217110ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ TESTS = subdir = starpupy/benchmark ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(dist_python_sources_DATA) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(python_sourcesdir)" DATA = $(dist_python_sources_DATA) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ check recheck distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = LAUNCHER = AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo EXTRA_DIST = \ tasks_size_overhead.py \ tasks_size_overhead.sh \ tasks_size_overhead.gp \ test_handle_perf.py \ test_handle_perf.sh \ test_handle_perf_pickle.py \ test_handle_perf_pickle.sh \ test_handle_bench.py \ handle_perf_plot_pickle.py \ handle_perf_plot.py python_sourcesdir = $(libdir)/starpu/python dist_python_sources_DATA = \ tasks_size_overhead.py \ test_handle_perf.py \ test_handle_perf_pickle.py all: all-recursive .SUFFIXES: .SUFFIXES: .cu .cubin .hip .log .o .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpupy/benchmark/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpupy/benchmark/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-dist_python_sourcesDATA: $(dist_python_sources_DATA) @$(NORMAL_INSTALL) @list='$(dist_python_sources_DATA)'; test -n "$(python_sourcesdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(python_sourcesdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(python_sourcesdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(python_sourcesdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(python_sourcesdir)" || exit $$?; \ done uninstall-dist_python_sourcesDATA: @$(NORMAL_UNINSTALL) @list='$(dist_python_sources_DATA)'; test -n "$(python_sourcesdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(python_sourcesdir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-recursive all-am: Makefile $(DATA) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(python_sourcesdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dist_python_sourcesDATA install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-dist_python_sourcesDATA .MAKE: $(am__recursive_targets) check-am install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-TESTS check-am clean clean-generic clean-libtool \ cscopelist-am ctags ctags-am distclean distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dist_python_sourcesDATA install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am recheck tags tags-am uninstall uninstall-am \ uninstall-dist_python_sourcesDATA .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpupy/benchmark/handle_perf_plot.py000066400000000000000000000064201507764646700235120ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import json import matplotlib.pyplot as plt import test_handle_bench file1 = open('handle_perf1.txt', 'r') js1 = file1.read() retfut_dict = json.loads(js1) #print(retfut_dict) program_submit1 = [x*1000 for x in retfut_dict['program_submit']] program_await1 = [x*1000 for x in retfut_dict['program_await']] file2 = open('handle_perf2.txt', 'r') js2 = file2.read() rethandle_dict = json.loads(js2) #print(rethandle_dict) program_submit2 = [x*1000 for x in rethandle_dict['program_submit']] program_await2 = [x*1000 for x in rethandle_dict['program_await']] file3 = open('handle_perf3.txt', 'r') js3 = file3.read() nostarpu_dict = json.loads(js3) #print(nostarpu_dict) program_submit3 = [x*1000 for x in nostarpu_dict['program_submit']] file_std = open('handle_perf_std.txt', 'r') js_std = file_std.read() dict_std = json.loads(js_std) file1.close() file2.close() file3.close() file_std.close() std11 = dict_std['list_std11'] std12 = dict_std['list_std12'] std21 = dict_std['list_std21'] std22 = dict_std['list_std22'] std3 = dict_std['list_std3'] plt.subplot(2, 1, 1) plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.xscale("log") plt.yscale("log") plt.errorbar([i for i in test_handle_bench.list_size], program_submit1, yerr=std11, fmt='+-', ecolor='r', color='r', elinewidth=1, capsize=3, linewidth=1, label='using StarPU and returning future object') plt.errorbar([i for i in test_handle_bench.list_size], program_submit2, yerr=std21, fmt='+-', ecolor='b', color='b', elinewidth=1, capsize=3, linewidth=1, label='using StarPU and returning handle object') plt.errorbar([i for i in test_handle_bench.list_size], program_submit3, yerr=std3, fmt='+-',ecolor='y', color='y', elinewidth=1, capsize=3, linewidth=1, label='using numpy.add function') plt.legend(loc='upper left', fontsize=15) plt.xlabel("Numpy array size (# of elements)", fontsize=15) plt.ylabel("Program execution time (ms)", fontsize=15) plt.subplot(2, 1, 2) plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.xscale("log") plt.yscale("log") plt.errorbar([i for i in test_handle_bench.list_size], program_await1, yerr=std12, fmt='+-',ecolor='r', color='r', elinewidth=1, capsize=3, linewidth=1, label='using StarPU and returning future object') plt.errorbar([i for i in test_handle_bench.list_size], program_await2, yerr=std22, fmt='+-',ecolor='b', color='b', elinewidth=1, capsize=3, linewidth=1, label='using StarPU and returning handle object') plt.legend(loc='upper left', fontsize=15) plt.xlabel("Numpy array size (# of elements)", fontsize=15) plt.ylabel("Program await time (ms)", fontsize=15) plt.show() #plt.savefig("starpupy_handle_perf.png") #plt.savefig("starpupy_handle_perf.eps") starpu-1.4.9+dfsg/starpupy/benchmark/handle_perf_plot_pickle.py000066400000000000000000000062221507764646700250410ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import json import matplotlib.pyplot as plt num = 1000000 listX = [10, 100, 1000, 10000, 100000, 1000000] list_size = [] for x in listX: for X in range(x, x*10, x): list_size.append(X) list_size.append(10000000) list_size.append(20000000) list_size.append(30000000) list_size.append(40000000) list_size.append(50000000) #print(list_size) file1 = open('handle_perf1.txt', 'r') js1 = file1.read() withhandle_dict = json.loads(js1) #print(withhandle_dict) program_submit1 = withhandle_dict['program_submit'] program_await1 = withhandle_dict['program_await'] file2 = open('handle_perf2.txt', 'r') js2 = file2.read() nohandle_dict = json.loads(js2) #print(nohandle_dict) program_submit2 = nohandle_dict['program_submit'] program_await2 = nohandle_dict['program_await'] file3 = open('handle_perf3.txt', 'r') js3 = file3.read() nostarpu_dict = json.loads(js3) #print(nostarpu_dict) program_submit3 = nostarpu_dict['program_submit'] file_std = open('handle_perf_std.txt', 'r') js_std = file_std.read() dict_std = json.loads(js_std) std11 = dict_std['list_std11'] std12 = dict_std['list_std12'] std21 = dict_std['list_std21'] std22 = dict_std['list_std22'] std3 = dict_std['list_std3'] plt.subplot(2, 1, 1) plt.xscale("log") plt.yscale("log") plt.errorbar([i/num for i in list_size], program_submit1, yerr=std11, fmt='+-', ecolor='r', color='r', elinewidth=1, capsize=3, linewidth=1, label='using virtually shared memory manager') plt.errorbar([i/num for i in list_size], program_submit2, yerr=std21, fmt='+-', ecolor='b', color='b', elinewidth=1, capsize=3, linewidth=1, label='without using virtually shared memory manager') plt.errorbar([i/num for i in list_size], program_submit3, yerr=std3, fmt='+-',ecolor='y', color='y', elinewidth=1, capsize=3, linewidth=1, label='without using StarPU task submitting') plt.legend(loc='upper left') plt.xlabel("Numpy array size (MB)") plt.ylabel("Program execution time (s)") plt.subplot(2, 1, 2) plt.xscale("log") plt.yscale("log") plt.errorbar([i/num for i in list_size], program_await1, yerr=std12, fmt='+-',ecolor='r', color='r', elinewidth=1, capsize=3, linewidth=1, label='using virtually shared memory manager') plt.errorbar([i/num for i in list_size], program_await2, yerr=std22, fmt='+-',ecolor='b', color='b', elinewidth=1, capsize=3, linewidth=1, label='without using virtually shared memory manager') plt.legend(loc='upper left') plt.xlabel("Numpy array size (MB)") plt.ylabel("Program await time (s)") plt.show() file1.close() file2.close() file3.close() file_std.close() starpu-1.4.9+dfsg/starpupy/benchmark/tasks_size_overhead.gp000077500000000000000000000023731507764646700242250ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # OUTPUT=tasks_size_overhead.output VALS=$(sed -n -e '3p' < $OUTPUT) PLOTS="" for x in $(seq 1 11) do pos=$((2 * $x + 1)) double=$((2 * $x)) value=$(echo "$VALS" | cut -d ' ' -f $pos) if test -n "$value" then PLOTS=",\"$OUTPUT\" using 1:($value)/(\$$pos) with linespoints title columnheader($double) $PLOTS" fi done [ -n "$TERMINAL" ] || TERMINAL=eps [ -n "$OUTFILE" ] || OUTFILE=tasks_size_overhead.eps gnuplot << EOF set terminal $TERMINAL set output "$OUTFILE" set key top left set xlabel "number of cores" set ylabel "speedup" plot \ x title "linear" $PLOTS EOF starpu-1.4.9+dfsg/starpupy/benchmark/tasks_size_overhead.py000066400000000000000000000142061507764646700242420ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import starpu from starpu import starpupy import time import sys import getopt import asyncio import cProfile import sys mincpus = 1 maxcpus = starpupy.worker_get_count_by_type(starpu.STARPU_CPU_WORKER) cpustep = 1 mintime = 128 maxtime = 128*1024 factortime = 2 ntasks = 64 nbuffers = 0 total_nbuffers = 0 #################parameters############## try: opts, args = getopt.getopt(sys.argv[1:],"i:b:B:c:C:s:t:T:f:h") except getopt.GetoptError: print("Usage:", sys.argv[0], "\n"\ "\t[-h help] \n "\ "\t[-i ntasks] [-b nbuffers] [-B total_nbuffers] \n"\ "\t[-c mincpus] [ -C maxcpus] [-s cpustep]\n"\ "\t[-t mintime] [-T maxtime] [-f factortime]") starpupy.shutdown() sys.exit(1) for opt, arg in opts: if opt == '-i': ntasks = int(arg) elif opt == '-b': nbuffers = int(arg) elif opt == '-B': total_nbuffers = int(arg) elif opt == '-c': mincpus = int(arg) elif opt == '-C': maxcpus = int(arg) elif opt == '-s': cpustep = int(arg) elif opt == '-t': mintime = int(arg) elif opt == '-T': maxtime = int(arg) elif opt == '-f': factortime = int(arg) elif opt == '-h': print("Usage:", sys.argv[0], "[-h help] \n "\ "\t[-i ntasks] [-b nbuffers] [-B total_nbuffers] \n"\ "\t[-c mincpus] [ -C maxcpus] [-s cpustep]\n"\ "\t[-t mintime] [-T maxtime] [-f factortime]\n") print("runs \'ntasks\' tasks\n"\ "- using \'nbuffers\' data each, randomly among \'total_nbuffers\' choices,\n"\ "- with varying task durations, from \'mintime\' to \'maxtime\' (using \'factortime\')\n"\ "- on varying numbers of cpus, from \'mincpus\' to \'maxcpus\' (using \'cpustep\')\n"\ "\n"\ "currently selected parameters: ", ntasks, " tasks using ", nbuffers, " buffers among ", total_nbuffers, \ ", from ", mintime, "us to ", maxtime, "us (factor ", factortime, "), from ", mincpus, " cpus to ", maxcpus, " cpus (step ", cpustep, ")", sep='') starpupy.shutdown() sys.exit(0) ######################################## # multiplication increment def range_multi(start, end, factor): val_multi = [] val = start while val <= end: val_multi.append(val) val = val * factor return val_multi # the test function def func_test(t): time.sleep(t/1000000) #pr = cProfile.Profile() f = open("tasks_size_overhead.output",'w') method="handle" if len(sys.argv) > 1: method=sys.argv[1] print("# tasks :", ntasks, "buffers :", nbuffers, "totoal_nbuffers :", total_nbuffers, file=f) print("# ncups", end='\t', file=f) for size in range_multi(mintime, maxtime, factortime): print(size, "iters(us)\ttotal(s)", end='\t', file=f) print(end='\n', file=f) print("\"seq\"\t", end=' ', file=f) for size in range_multi(mintime, maxtime, factortime): #print("time size is", size) dstart=time.time() for i in range(ntasks): func_test(size) dend=time.time() print(int((dend-dstart)/ntasks*1000000), "\t", dend-dstart, end='\t', file=f) #print(size, "\t", dend-dstart, end='\t', file=f) print(end='\n', file=f) #pr.enable() if method == "handle": # return value is handle for ncpus in range(mincpus, maxcpus+1, cpustep): starpupy.set_ncpu(ncpus) #print("ncpus is", ncpus) print(ncpus, end='\t', file=f) for size in range_multi(mintime, maxtime, factortime): #print("time size is", size) start=time.time() for i in range(ntasks*ncpus): res=starpu.task_submit(ret_handle=True)("func_test", size) starpupy.task_wait_for_all() end=time.time() timing = end-start print(size, "\t", timing/ncpus, end='\t', file=f) print(end='\n', file=f) elif method == "futur": # return value is future async def main(): for ncpus in range(mincpus, maxcpus+1, cpustep): starpupy.set_ncpu(ncpus) #print("ncpus is", ncpus) print(ncpus, end='\t', file=f) for size in range_multi(mintime, maxtime, factortime): #print("time size is", size) start=time.time() for i in range(ntasks*ncpus): fut=starpu.task_submit(ret_fut=True)("func_test", size) starpupy.task_wait_for_all() end=time.time() timing = end-start print(size, "\t", timing/ncpus, end='\t', file=f) print(end='\n', file=f) asyncio.run(main()) else: # return value is neither future nor handle for ncpus in range(mincpus, maxcpus+1, cpustep): starpupy.set_ncpu(ncpus) #print("ncpus is", ncpus) print(ncpus, end='\t', file=f) for size in range_multi(mintime, maxtime, factortime): #print("time size is", size) start=time.time() for i in range(ntasks*ncpus): fut=starpu.task_submit(ret_fut=False)("func_test", size) starpupy.task_wait_for_all() end=time.time() timing = end-start print(size, "\t", timing/ncpus, end='\t', file=f) print(end='\n', file=f) #pr.disable() f.close() #pr.print_stats() starpupy.shutdown() starpu-1.4.9+dfsg/starpupy/benchmark/tasks_size_overhead.sh000077500000000000000000000017331507764646700242300ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ROOT=${0%.sh} for x in handle futur none do $(dirname $0)/../execute.sh benchmark/tasks_size_overhead.py $x $* TERMINAL="png large size 1280,960" OUTFILE="tasks_size_overhead_py_$x.png" $ROOT.gp TERMINAL="eps" OUTFILE="tasks_size_overhead_py_$x.eps" $ROOT.gp done #gv tasks_size_overhead.eps starpu-1.4.9+dfsg/starpupy/benchmark/test_handle_bench.py000066400000000000000000000017411507764646700236370ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # listX = [10, 100, 1000, 10000, 100000, 1000000] #listX = [10, 100] list_size = [] for x in listX: for X in range(x, x*10, x): list_size.append(X) list_size.append(10000000) list_size.append(20000000) list_size.append(30000000) list_size.append(40000000) list_size.append(50000000) #print("list of size is",list_size) starpu-1.4.9+dfsg/starpupy/benchmark/test_handle_perf.py000066400000000000000000000122221507764646700235100ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import starpu from starpu import starpupy from starpu import Handle from starpu import HandleNumpy try: import numpy as np except (ModuleNotFoundError, ImportError): print("\n\nCan't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)\n\n") starpupy.shutdown() exit(77) import asyncio import time import array import struct try: import nest_asyncio except ModuleNotFoundError as e: print("\n\nCan't find \"Python3 nest_asyncio\" module (consider running \"pip3 install nest_asyncio\")\n\n") starpupy.shutdown() exit(77) import json import sys import statistics import test_handle_bench #############################perf test#################################### # Numpy function @starpu.access(a="RW", b="R") def add_numpy(a,b): np.add(a,b,out=a) # custom function @starpu.access(a="RW", b="R") def add_custom(a,b): for i in range(np.size(a)): a[i] = a[i] + b[i] program_submit1=[] program_await1=[] program_submit2=[] program_await2=[] program_submit3=[] num=20 # calculate the standard deviasion list_std11 = [] list_std12 = [] list_std21 = [] list_std22 = [] list_std3 = [] # using handle return future def test_comp_handle_ret_fut(a,b): async def asy_main(): start_exec1=0 end_exec1=0 start_exec2=0 end_exec2=0 list_submit=[] list_await=[] for t in range(num): #print("loop", t) start_exec1=time.time() res_fut = starpu.task_submit()(add_custom, a, b) end_exec1=time.time() list_submit.append(end_exec1-start_exec1) start_exec2=time.time() res = await res_fut end_exec2=time.time() list_await.append(end_exec2-start_exec2) program_submit1.append(statistics.mean(list_submit)) program_await1.append(statistics.mean(list_await)) list_std11.append(statistics.stdev(list_submit)) list_std12.append(statistics.stdev(list_await)) loop=asyncio.get_event_loop() nest_asyncio.apply() loop.run_until_complete(asy_main()) # using handle return handle def test_comp_handle_ret_handle(a,b): async def asy_main(): start_exec1=0 end_exec1=0 start_exec2=0 end_exec2=0 list_submit=[] list_await=[] for t in range(num): #print("loop", t) start_exec1=time.time() res_handle = starpu.task_submit(ret_handle=True)(add_custom, a, b) end_exec1=time.time() list_submit.append(end_exec1-start_exec1) start_exec2=time.time() starpupy.task_wait_for_all() end_exec2=time.time() list_await.append(end_exec2-start_exec2) program_submit2.append(statistics.mean(list_submit)) program_await2.append(statistics.mean(list_await)) list_std21.append(statistics.stdev(list_submit)) list_std22.append(statistics.stdev(list_await)) loop=asyncio.get_event_loop() nest_asyncio.apply() loop.run_until_complete(asy_main()) #without using starpu def test_numpy(a,b): async def asy_main(): start_exec1=0 end_exec1=0 list_submit=[] for t in range(num): start_exec1=time.time() add_numpy(a, b) end_exec1=time.time() list_submit.append(end_exec1-start_exec1) program_submit3.append(statistics.mean(list_submit)) list_std3.append(statistics.stdev(list_submit)) loop=asyncio.get_event_loop() nest_asyncio.apply() loop.run_until_complete(asy_main()) #with handle return future for i in test_handle_bench.list_size: #print("i with handle return future is", i) A = np.arange(i) test_comp_handle_ret_fut(A, A) starpu.unregister(A) #with handle return handle for i in test_handle_bench.list_size: #print("i with handle return handle is", i) A = np.arange(i) test_comp_handle_ret_handle(A, A) starpu.unregister(A) #without starpu for i in test_handle_bench.list_size: A = np.arange(i) test_numpy(A, A) retfut_dict={'program_submit':program_submit1, 'program_await': program_await1} rethandle_dict={'program_submit':program_submit2, 'program_await': program_await2} nostarpu_dict={'program_submit':program_submit3} # print(retfut_dict) # print(rethandle_dict) # print(nostarpu_dict) dict_std={'list_std11':list_std11, 'list_std12':list_std12, 'list_std21':list_std21, 'list_std22':list_std22, 'list_std3':list_std3} #####write the dict in file##### js1 = json.dumps(retfut_dict) file1 = open('handle_perf1.txt', 'w') file1.write(js1) file1.close() js2 = json.dumps(rethandle_dict) file2 = open('handle_perf2.txt', 'w') file2.write(js2) file2.close() js3 = json.dumps(nostarpu_dict) file3 = open('handle_perf3.txt', 'w') file3.write(js3) file3.close() js_std = json.dumps(dict_std) file_std = open('handle_perf_std.txt', 'w') file_std.write(js_std) file_std.close() starpupy.shutdown() starpu-1.4.9+dfsg/starpupy/benchmark/test_handle_perf.sh000077500000000000000000000015331507764646700235000ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh benchmark/test_handle_perf.py $* if test $? != 77 then $(dirname $0)/../execute.sh benchmark/handle_perf_plot.py else echo "skip test" fi starpu-1.4.9+dfsg/starpupy/benchmark/test_handle_perf_pickle.py000066400000000000000000000120411507764646700250360ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import starpu from starpu import starpupy from starpu import Handle from starpu import HandleNumpy try: import numpy as np except (ModuleNotFoundError, ImportError): print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") starpupy.shutdown() exit(77) import asyncio import time import array import struct import nest_asyncio import json import sys import statistics #############################perf test#################################### # Numpy function @starpu.access(a="RW", b="R") def add(a,b): np.add(a,b,out=a) # custom function # @starpu.access(a="RW", b="R") # def add(a,b): # for i in range(np.size(a)): # a[i] = a[i] + b[i] listX = [10, 100, 1000, 10000, 100000, 1000000] list_size = [] for x in listX: for X in range(x, x*10, x): list_size.append(X) list_size.append(10000000) list_size.append(20000000) list_size.append(30000000) list_size.append(40000000) list_size.append(50000000) #print("list of size is",list_size) program_submit1=[] program_await1=[] program_submit2=[] program_await2=[] program_submit3=[] num=20 # calculate the standard deviasion list_std11 = [] list_std12 = [] list_std21 = [] list_std22 = [] list_std3 = [] # using handle def test_comp_handle(a,b): async def asy_main(): start_exec1=0 end_exec1=0 start_exec2=0 end_exec2=0 list_submit=[] list_await=[] for t in range(num): #print("loop", t) start_exec1=time.time() res_fut = starpu.task_submit()(add, a, b) end_exec1=time.time() list_submit.append(end_exec1-start_exec1) start_exec2=time.time() res = await res_fut end_exec2=time.time() list_await.append(end_exec2-start_exec2) program_submit1.append(statistics.mean(list_submit)) program_await1.append(statistics.mean(list_await)) list_std11.append(statistics.stdev(list_submit)) list_std12.append(statistics.stdev(list_await)) loop=asyncio.get_event_loop() nest_asyncio.apply() loop.run_until_complete(asy_main()) #without using handle def test_comp(a,b): async def asy_main(): start_exec1=0 end_exec1=0 start_exec2=0 end_exec2=0 list_submit=[] list_await=[] for t in range(num): #print("loop", t) start_exec1=time.time() res_fut = starpu.task_submit(arg_handle=False)(add, a, b) end_exec1=time.time() list_submit.append(end_exec1-start_exec1) start_exec2=time.time() res = await res_fut end_exec2=time.time() list_await.append(end_exec2-start_exec2) program_submit2.append(statistics.mean(list_submit)) program_await2.append(statistics.mean(list_await)) list_std21.append(statistics.stdev(list_submit)) list_std22.append(statistics.stdev(list_await)) loop=asyncio.get_event_loop() nest_asyncio.apply() loop.run_until_complete(asy_main()) #without using starpu def test_numpy(a,b): async def asy_main(): start_exec1=0 end_exec1=0 list_submit=[] for t in range(num): start_exec1=time.time() add(a, b) end_exec1=time.time() list_submit.append(end_exec1-start_exec1) program_submit3.append(statistics.mean(list_submit)) list_std3.append(statistics.stdev(list_submit)) loop=asyncio.get_event_loop() nest_asyncio.apply() loop.run_until_complete(asy_main()) #with handle for i in list_size: #print("i with handle is", i) A = np.arange(i) test_comp_handle(A, A) starpu.unregister(A) #without handle for i in list_size: #print("i without handle is", i) A = np.arange(i) test_comp(A, A) #without starpu for i in list_size: A = np.arange(i) test_numpy(A, A) withhandle_dict={'program_submit':program_submit1, 'program_await': program_await1} nohandle_dict={'program_submit':program_submit2, 'program_await': program_await2} nostarpu_dict={'program_submit':program_submit3} # print(withhandle_dict) # print(nohandle_dict) # print(nostarpu_dict) dict_std={'list_std11':list_std11, 'list_std12':list_std12, 'list_std21':list_std21, 'list_std22':list_std22, 'list_std3':list_std3} #####write the dict in file##### js1 = json.dumps(withhandle_dict) file1 = open('handle_perf1.txt', 'w') file1.write(js1) file1.close() js2 = json.dumps(nohandle_dict) file2 = open('handle_perf2.txt', 'w') file2.write(js2) file2.close() js3 = json.dumps(nostarpu_dict) file3 = open('handle_perf3.txt', 'w') file3.write(js3) file3.close() js_std = json.dumps(dict_std) file_std = open('handle_perf_std.txt', 'w') file_std.write(js_std) file_std.close() starpupy.shutdown() starpu-1.4.9+dfsg/starpupy/benchmark/test_handle_perf_pickle.sh000077500000000000000000000014231507764646700250250ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh benchmark/test_handle_perf_pickle.py $* python3 handle_perf_plot_pickle.py starpu-1.4.9+dfsg/starpupy/examples/000077500000000000000000000000001507764646700175155ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpupy/examples/Makefile.am000066400000000000000000000053131507764646700215530ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk include $(top_srcdir)/make/starpu-loader.mk SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo \ starpu_py.concurrent.py \ starpu_py_handle.concurrent.py \ starpu_py_np.concurrent.py \ starpu_py_partition.concurrent.py \ starpu_py_perfmodel.concurrent.py \ starpu_py_numpy.concurrent.py if STARPU_USE_MPI_MASTER_SLAVE TESTS_ENVIRONMENT += LOADER_ARGS="--mpirun" endif %.concurrent.py: %.py sed -e 's/async //g' -e 's/\ $@ all-local: \ starpu_py.concurrent.py \ starpu_py_handle.concurrent.py \ starpu_py_np.concurrent.py \ starpu_py_partition.concurrent.py \ starpu_py_perfmodel.concurrent.py \ starpu_py_numpy.concurrent.py TESTS = if STARPU_STARPUPY_NUMPY TESTS += starpu_py_perfmodel.sh TESTS += starpu_py_perfmodel.concurrent.sh endif if !STARPU_SIMGRID TESTS += starpu_py.sh TESTS += starpu_py.concurrent.sh TESTS += starpu_py_parallel.sh TESTS += starpu_py_handle.sh TESTS += starpu_py_handle.concurrent.sh if STARPU_STARPUPY_NUMPY TESTS += starpu_py_numpy.sh TESTS += starpu_py_numpy.concurrent.sh TESTS += starpu_py_np.sh TESTS += starpu_py_np.concurrent.sh TESTS += starpu_py_partition.sh TESTS += starpu_py_partition.concurrent.sh endif endif EXTRA_DIST = \ starpu_py.concurrent.sh \ starpu_py_handle.concurrent.sh \ starpu_py_handle.py \ starpu_py_handle.sh \ starpu_py_np.concurrent.sh \ starpu_py_np.py \ starpu_py_np.sh \ starpu_py_numpy.concurrent.sh \ starpu_py_numpy.py \ starpu_py_numpy.sh \ starpu_py_parallel.py \ starpu_py_parallel.sh \ starpu_py_partition.concurrent.sh \ starpu_py_partition.py \ starpu_py_partition.sh \ starpu_py_perfmodel.concurrent.sh \ starpu_py_perfmodel.py \ starpu_py_perfmodel.sh \ starpu_py.py \ starpu_py.sh python_sourcesdir = $(libdir)/starpu/python dist_python_sources_DATA = \ starpu_py_handle.py \ starpu_py_np.py \ starpu_py_numpy.py \ starpu_py_parallel.py \ starpu_py_partition.py \ starpu_py_perfmodel.py \ starpu_py.py starpu-1.4.9+dfsg/starpupy/examples/Makefile.in000066400000000000000000001765641507764646700216050ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_1) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_8 = LOADER_ARGS="--mpirun" @STARPU_STARPUPY_NUMPY_TRUE@am__append_9 = starpu_py_perfmodel.sh \ @STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_perfmodel.concurrent.sh @STARPU_SIMGRID_FALSE@am__append_10 = starpu_py.sh \ @STARPU_SIMGRID_FALSE@ starpu_py.concurrent.sh \ @STARPU_SIMGRID_FALSE@ starpu_py_parallel.sh \ @STARPU_SIMGRID_FALSE@ starpu_py_handle.sh \ @STARPU_SIMGRID_FALSE@ starpu_py_handle.concurrent.sh @STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@am__append_11 = starpu_py_numpy.sh \ @STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_numpy.concurrent.sh \ @STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_np.sh \ @STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_np.concurrent.sh \ @STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_partition.sh \ @STARPU_SIMGRID_FALSE@@STARPU_STARPUPY_NUMPY_TRUE@ starpu_py_partition.concurrent.sh subdir = starpupy/examples ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(dist_python_sources_DATA) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_1 = loader$(EXEEXT) PROGRAMS = $(noinst_PROGRAMS) loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = loader.c DIST_SOURCES = loader.c RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(python_sourcesdir)" DATA = $(dist_python_sources_DATA) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ check recheck distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(am__append_4) $(am__append_6) LAUNCHER = $(am__append_3) $(am__append_5) AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) \ @STARPU_HAVE_AM111_FALSE@ top_builddir="$(abs_top_builddir)" \ @STARPU_HAVE_AM111_FALSE@ top_srcdir="$(abs_top_srcdir)" \ @STARPU_HAVE_AM111_FALSE@ $(LOADER_BIN) $(am__append_8) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) \ @STARPU_HAVE_AM111_TRUE@ top_builddir="$(abs_top_builddir)" \ @STARPU_HAVE_AM111_TRUE@ top_srcdir="$(abs_top_srcdir)" \ @STARPU_HAVE_AM111_TRUE@ $(am__append_8) @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo \ starpu_py.concurrent.py \ starpu_py_handle.concurrent.py \ starpu_py_np.concurrent.py \ starpu_py_partition.concurrent.py \ starpu_py_perfmodel.concurrent.py \ starpu_py_numpy.concurrent.py TESTS = $(am__append_9) $(am__append_10) $(am__append_11) EXTRA_DIST = \ starpu_py.concurrent.sh \ starpu_py_handle.concurrent.sh \ starpu_py_handle.py \ starpu_py_handle.sh \ starpu_py_np.concurrent.sh \ starpu_py_np.py \ starpu_py_np.sh \ starpu_py_numpy.concurrent.sh \ starpu_py_numpy.py \ starpu_py_numpy.sh \ starpu_py_parallel.py \ starpu_py_parallel.sh \ starpu_py_partition.concurrent.sh \ starpu_py_partition.py \ starpu_py_partition.sh \ starpu_py_perfmodel.concurrent.sh \ starpu_py_perfmodel.py \ starpu_py_perfmodel.sh \ starpu_py.py \ starpu_py.sh python_sourcesdir = $(libdir)/starpu/python dist_python_sources_DATA = \ starpu_py_handle.py \ starpu_py_np.py \ starpu_py_numpy.py \ starpu_py_parallel.py \ starpu_py_partition.py \ starpu_py_perfmodel.py \ starpu_py.py all: all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpupy/examples/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpupy/examples/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-dist_python_sourcesDATA: $(dist_python_sources_DATA) @$(NORMAL_INSTALL) @list='$(dist_python_sources_DATA)'; test -n "$(python_sourcesdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(python_sourcesdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(python_sourcesdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(python_sourcesdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(python_sourcesdir)" || exit $$?; \ done uninstall-dist_python_sourcesDATA: @$(NORMAL_UNINSTALL) @list='$(dist_python_sources_DATA)'; test -n "$(python_sourcesdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(python_sourcesdir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? starpu_py_perfmodel.sh.log: starpu_py_perfmodel.sh @p='starpu_py_perfmodel.sh'; \ b='starpu_py_perfmodel.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_perfmodel.concurrent.sh.log: starpu_py_perfmodel.concurrent.sh @p='starpu_py_perfmodel.concurrent.sh'; \ b='starpu_py_perfmodel.concurrent.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py.sh.log: starpu_py.sh @p='starpu_py.sh'; \ b='starpu_py.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py.concurrent.sh.log: starpu_py.concurrent.sh @p='starpu_py.concurrent.sh'; \ b='starpu_py.concurrent.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_parallel.sh.log: starpu_py_parallel.sh @p='starpu_py_parallel.sh'; \ b='starpu_py_parallel.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_handle.sh.log: starpu_py_handle.sh @p='starpu_py_handle.sh'; \ b='starpu_py_handle.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_handle.concurrent.sh.log: starpu_py_handle.concurrent.sh @p='starpu_py_handle.concurrent.sh'; \ b='starpu_py_handle.concurrent.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_numpy.sh.log: starpu_py_numpy.sh @p='starpu_py_numpy.sh'; \ b='starpu_py_numpy.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_numpy.concurrent.sh.log: starpu_py_numpy.concurrent.sh @p='starpu_py_numpy.concurrent.sh'; \ b='starpu_py_numpy.concurrent.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_np.sh.log: starpu_py_np.sh @p='starpu_py_np.sh'; \ b='starpu_py_np.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_np.concurrent.sh.log: starpu_py_np.concurrent.sh @p='starpu_py_np.concurrent.sh'; \ b='starpu_py_np.concurrent.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_partition.sh.log: starpu_py_partition.sh @p='starpu_py_partition.sh'; \ b='starpu_py_partition.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_py_partition.concurrent.sh.log: starpu_py_partition.concurrent.sh @p='starpu_py_partition.concurrent.sh'; \ b='starpu_py_partition.concurrent.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-recursive all-am: Makefile $(PROGRAMS) $(DATA) all-local installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(python_sourcesdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dist_python_sourcesDATA install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-dist_python_sourcesDATA .MAKE: $(am__recursive_targets) check-am install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ am--depfiles check check-TESTS check-am clean clean-generic \ clean-libtool clean-noinstPROGRAMS cscopelist-am ctags \ ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dist_python_sourcesDATA install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-dist_python_sourcesDATA .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS %.concurrent.py: %.py sed -e 's/async //g' -e 's/\ $@ all-local: \ starpu_py.concurrent.py \ starpu_py_handle.concurrent.py \ starpu_py_np.concurrent.py \ starpu_py_partition.concurrent.py \ starpu_py_perfmodel.concurrent.py \ starpu_py_numpy.concurrent.py # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpupy/examples/loader.c000066400000000000000000000274611507764646700211410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/starpupy/examples/starpu_py.concurrent.sh000077500000000000000000000013441507764646700242650ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh starpu_py.concurrent.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py.py000066400000000000000000000110411507764646700221120ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # from math import sqrt import starpu from starpu import starpupy import time import asyncio def await_fut(fut): return fut.result() try: starpu.init() except Exception as e: print(e) exit(77) ############################################################################ #function no input no output print hello world def hello(): print ("Example 1:") print ("Hello, world!") ############################################################################# #function no input no output def func1(): print ("Example 2:") print ("This is a function no input no output") ############################################################################## #using decorator wrap the function no input no output @starpu.delayed def func1_deco(): #time.sleep(1) print ("Example 3:") print ("This is a function no input no output wrapped by the decorator function") ############################################################################## #function no input return a value def func2(): print ("Example 4:") return 12 ############################################################################### #function has 2 int inputs and 1 int output def multi(a,b): print ("Example 5:") return a*b #print(multi(2, 3)) ############################################################################### #function has 4 float inputs and 1 float output def add(a,b,c,d): print ("Example 6:") return a+b+c+d #print(add(1.2, 2.5, 3.6, 4.9)) ############################################################################### #function has 2 int inputs 1 float input and 1 float output 1 int output def sub(a,b,c): print ("Example 7:") return a-b-c, a-b #print(sub(6, 2, 5.9)) ############################################################################### #using decorator wrap the function with input @starpu.delayed(name="test") def add_deco(a,b,c): #time.sleep(1) print ("Example 8:") print ("This is a function with input and output wrapped by the decorator function:") return a+b+c ############################################################################### #using decorator wrap the function with input @starpu.delayed(color=1) def sub_deco(x,a): print ("Example 9:") print ("This is a function with input and output wrapped by the decorator function:") return x-a ############################################################################### async def main(): #submit function "hello" fut = starpu.task_submit()(hello) await(fut) #submit function "func1" fut1 = starpu.task_submit()(func1) await(fut1) #apply starpu.delayed(func1_deco()) await(func1_deco()) #submit function "func2" fut2 = starpu.task_submit()(func2) res2 = await(fut2) #print the result of function print("This is a function no input and the return value is", res2) #submit function "multi" fut3 = starpu.task_submit()(multi, 2, 3) res3 = await(fut3) print("The result of function multi is :", res3) #submit function "add" fut4 = starpu.task_submit()(add, 1.2, 2.5, 3.6, 4.9) res4 = await(fut4) print("The result of function add is :", res4) #submit function "sub" but only provide function name fut5 = starpu.task_submit()(sub, 6, 2, 5.9) res5 = await(fut5) print("The result of function sub is:", res5) #apply starpu.delayed(add_deco) fut6 = add_deco(1,2,3) #res6 = await(fut6) #print("The result of function is", res6) #apply starpu.delayed(sub_deco) fut7 = sub_deco(fut6, 1) res7 = await(fut7) print("The first argument of this function is the result of Example 8") print("The result of function is", res7) fut8 = starpu.task_submit()("sqrt", 4) res8 = await(fut8) print("The result of function sqrt is:", res8) try: asyncio.run(main()) except starpupy.error as e: print("No worker to execute the job") starpu.shutdown() exit(77) starpu.shutdown() #starpu.task_wait_for_all() starpu-1.4.9+dfsg/starpupy/examples/starpu_py.sh000077500000000000000000000013421507764646700221020ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh examples/starpu_py.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_handle.concurrent.sh000077500000000000000000000013531507764646700256000ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh starpu_py_handle.concurrent.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_handle.py000066400000000000000000000374171507764646700234440ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # try: import numpy as np except (ModuleNotFoundError, ImportError): print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") np = None import starpu from starpu import starpupy from starpu import Handle from starpu import HandleNumpy import asyncio import time import array import struct try: starpu.init() except Exception as e: print(e) exit(77) def await_fut(fut): return fut.result() if starpupy.worker_get_count_by_type(starpu.STARPU_MPI_MS_WORKER) >= 1 or starpupy.worker_get_count_by_type(starpu.STARPU_TCPIP_MS_WORKER) >= 1: print("This program does not work in MS mode") starpu.shutdown() exit(77) def show(x, y): print("Function printing:", x, y) def add(x, y): print ("Example add(x, y):") return x + y # create Handle objects x=2 y=3 x_h = Handle(x) y_h = Handle(y) print("*************************") print("constant handle:") print("*************************") # show function returns Handle ret_h1 = starpu.task_submit(ret_handle=True)(show, "first argument is:", x_h) print("show function returns:", ret_h1.get()) # return value is Handle res1 = starpu.task_submit(ret_handle=True)(add, x_h, y_h) print("result of Handle(2)+Handle(3) is:", res1.get()) # return value is Handle res2 = starpu.task_submit(ret_handle=True)(add, res1, y_h) print("result of res1+Handle(3) is:", res2.get()) # show function returns Handle ret_h2 = starpu.task_submit(ret_handle=True)(show, res1, res2) print("*************************************") print("constant handle return in parameter:") print("*************************************") ret = Handle(0) print("before calling function, ret value is:", ret.get()) # return value as parameter ret_n = starpu.task_submit(ret_param=True)(add, ret, x_h, y_h) print("result of Handle(2)+Handle(3) is:", ret.get()) print("return value of task_submit is:", ret_n) assert ret.get() == x+y x_h.unregister() y_h.unregister() ret_h1.unregister() ret_h2.unregister() res2.unregister() ret.unregister() if np is not None: ############################################################################################## print("*************************") print("Numpy array handle:") print("*************************") def scal(x, t): for i in range(len(t)): t[i] = t[i] * x print ("Example scal(scalar, array):") t = np.arange(10) # create Handle object for Numpy array t_h = Handle(t) # return value is Handle res3 = starpu.task_submit(ret_handle=True)(scal, 2, t_h) print("result of scal(2, Handle(np.arange(10)) is:", t_h.get()) # show function returns Future async def main(): res_fut1 = starpu.task_submit()(show, res1, t_h) await(res_fut1) asyncio.run(main()) t_h.unregister() res1.unregister() res3.unregister() ###################### def arr_add(a,b): for i in range(np.size(a)): a[i] = a[i] + b[i] a = np.array([1, 2, 3]) b = np.array([4, 5, 6]) # create Handle objects a_h = Handle(a) b_h = Handle(b) # two array element addition res4 = starpu.task_submit(ret_handle=True)(arr_add, a_h, b_h) print("result of adding two Handle(numpy.array) is:", a_h.get()) a_h.unregister() b_h.unregister() res4.unregister() ####################### def multi(x, y): print ("Example multi(x, y):") np.multiply(x, y, out=x) c = np.array([[1, 2], [3, 4]]) d = np.array([[2, 2], [2, 2]]) # create Handle objects c_h = Handle(c) d_h = Handle(d) # two array element multiplication res5 = starpu.task_submit(ret_handle=True)(multi, c_h, d_h) print("result of multiplying two Handle(numpy.array) is:", c_h.get()) ######################## @starpu.access(x="RW") def matrix_multi(x, y): print ("Example matrix_multi(x, y):") np.dot(x, y, out=x) # two array matrix multiplication res6 = starpu.task_submit(ret_handle=True)(matrix_multi, c_h, d_h) print("result of matrix multiplying two Handle(numpy.array) is:", c_h.get()) # two array matrix multiplication (inverse order) res7 = starpu.task_submit(ret_handle=True)(matrix_multi, d_h, c_h) print("result of matrix multiplying two Handle(numpy.array) is:", d_h.get()) c_h.unregister() d_h.unregister() res5.unregister() res6.unregister() res7.unregister() ###################################empty Numpy array handle##################################### print("*************************") print("empty Numpy array handle:") print("*************************") a1 = np.array([1, 2, 3, 4]) a2 = np.array([[1, 2, 3], [4, 5, 6]]) a3 = np.array([[[1, 2, 3], [4, 5, 6]],[[7, 8, 9], [10, 11, 12]]]) # create Handle objects a1_h = Handle(a1) a2_h = Handle(a2) a3_h = Handle(a3) a1_r = a1_h.acquire(mode='R') print("original 1-dimension array is:", a1_r) a1_h.release() a2_r = a2_h.acquire(mode='R') print("original 2-dimension array is:", a2_r) a2_h.release() a3_r = a3_h.acquire(mode='R') print("original 3-dimension array is:", a3_r) a3_h.release() @starpu.access(b="W") def assign(a,b): for i in range(min(np.size(a), np.size(b))): b[i] = a[i] @starpu.access(b="W") def assign2(a,b): for i in range(min(np.size(a,0), np.size(b,0))): for j in range(min(np.size(a,1), np.size(b,1))): b[i][j] = a[i][j] @starpu.access(b="W") def assign3(a,b): for i in range(min(np.size(a,0), np.size(b,0))): for j in range(min(np.size(a,1), np.size(b,1))): for k in range(min(np.size(a,2), np.size(b,2))): b[i][j][k] = a[i][j][k] # generate empty arrays Handle object using HandleNumpy # 1-dimension e1_h = HandleNumpy(a1.shape, a1.dtype) res8 = starpu.task_submit(ret_handle=True)(assign, a1_h, e1_h) e1_r = e1_h.acquire(mode='RW') print("assigned 1-dimension array is:", e1_r) # e1_h is writeable, we modify the first element e1_r[0] = 100 print("the first element of 1-dimension array is modified to 100:", e1_r) e1_h.release() # 2-dimension e2_h = HandleNumpy(a2.shape, a2.dtype) res9 = starpu.task_submit(ret_handle=True)(assign2, a2_h, e2_h) e2_r = e2_h.acquire(mode='R') print("assigned 2-dimension array is", e2_r) e2_h.release() # 3-dimension e3_h = HandleNumpy(a3.shape, a3.dtype) res10 = starpu.task_submit(ret_handle=True)(assign3, a3_h, e3_h) e3_r = e3_h.acquire(mode='R') print("assigned 3-dimension array is", e3_r) e3_h.release() a1_h.unregister() a2_h.unregister() a3_h.unregister() e1_h.unregister() e2_h.unregister() e3_h.unregister() res8.unregister() res9.unregister() res10.unregister() ##################################bytes handle############################################ print("*************************") print("bytes handle:") print("*************************") bt1 = bytes([1,2]) bt2 = bytes([3,4]) bt1_h = Handle(bt1) bt2_h = Handle(bt2) bt1_r = bt1_h.acquire(mode='R') print("first bytes object is", bt1_r) bt1_h.release() bt2_r = bt2_h.acquire(mode='R') print("second bytes object is", bt2_r) bt2_h.release() ret_bt1 = starpu.task_submit(ret_handle=True)(add, bt1_h, bt2_h) print("result of appending two bytes: ", ret_bt1.get()) def bytes_add(x, y): z = bytearray(len(x)) for i in range (len(x)): z[i] = x[i] + y[i] return bytes(z) ret_bt2 = starpu.task_submit(ret_handle=True)(bytes_add, bt1_h, bt2_h) print("result of adding two bytes elements: ", ret_bt2.get()) bt1_h.unregister() bt2_h.unregister() ret_bt1.unregister() ret_bt2.unregister() ####################################bytearray handle######################################### print("*************************") print("bytearray handle:") print("*************************") bta1 = bytearray([1,2]) bta2 = bytearray([3,4]) bta1_h = Handle(bta1) bta2_h = Handle(bta2) bta1_r = bta1_h.acquire(mode='RW') print("first bytearray object is", bta1_r) bta1[0] = 0 bta1_h.release() bta11_r = bta1_h.acquire(mode='R') print("first bytearray object is modified", bta11_r) bta1_h.release() bta2_r = bta2_h.acquire(mode='R') print("second bytearray object is", bta2_r) bta2_h.release() def bytearray_add(x, y): z = bytearray(len(x)) for i in range (len(x)): z[i] = x[i] + y[i] return z ret_bta1 = starpu.task_submit(ret_handle=True)(bytearray_add, bta1_h, bta2_h) print("result of adding two bytearray elements: ", ret_bta1.get()) bta1_h.unregister() bta2_h.unregister() ret_bta1.unregister() ##################################array.array handle########################################## print("*************************") print("array.array handle:") print("*************************") arr1 = array.array('i', [1, 2, 3, 4]) arr2 = array.array('i', [2, 2, 2, 2]) arr3 = array.array('f', [4.5, 5.5, 6.5]) arr4 = array.array('u', 'hello') def arrarr_add(x, y): for i in range (len(x)): x[i] = x[i] + y[i] #time.sleep(1) return x def arrarr_multi(x, y): for i in range (len(x)): x[i] = x[i] * y[i] return x def arrarr_scal(x, s): for i in range (len(x)): x[i] = x[i] * s return x arr1_h = Handle(arr1) arr1_r = arr1_h.acquire(mode='RW') print("first array.array object is", arr1_r) arr1[0] = 0 arr1_h.release() arr11_r = arr1_h.acquire(mode='R') print("first array.array object is modified", arr11_r) arr1_h.release() arr2_h = Handle(arr2) arr2_r = arr2_h.acquire(mode='R') print("second array.array object is", arr2_r) arr2_h.release() arr3_h = Handle(arr3) arr3_r = arr3_h.acquire(mode='R') print("third array.array object is", arr3_r) arr3_h.release() arr4_h = Handle(arr4) arr4_r = arr4_h.acquire(mode='R') print("fourth array.array object is", arr4_r) arr4_h.release() ret_arr1 = starpu.task_submit(ret_handle=True)(arrarr_add, arr1_h, arr2_h) print("result of adding two array.array elements: ", ret_arr1.get()) ret_arr2 = starpu.task_submit(ret_handle=True)(arrarr_multi, arr1_h, arr2_h) print("result of multiplying two array.array elements: ", ret_arr2.get()) ret_arr3 = starpu.task_submit(ret_handle=True)(arrarr_scal, arr3_h, 2) print("result of multiplying array.array element by a scalar: ", ret_arr3.get()) arr1_h.unregister() arr2_h.unregister() arr3_h.unregister() arr4_h.unregister() ret_arr1.unregister() ret_arr2.unregister() ret_arr3.unregister() ##################################memoryview handle########################################### print("*************************") print("memoryview handle:") print("*************************") m1 = memoryview(bytearray("hello", 'utf-8')) m1_tb = m1.tobytes() print("m1 to bytes is", m1_tb) m2 = memoryview(array.array('i', [1, 2, 3, 4])) m2_tl = m2.tolist() print("m2 to list is", m2_tl) m3 = memoryview(array.array('u', 'hello')) m1_h = Handle(m1) print("m1 is", m1_h.acquire(mode='RW')) m1[0] = 100 m1_h.release() print("m1 to bytes after modifying is", m1_tb) print("m1 after modifying is", m1_h.acquire(mode='RW')) m1_h.release() m2_h = Handle(m2) print("m2 is", m2_h.acquire(mode='R')) m2_h.release() m3_h = Handle(m3) print("m3 is", m3_h.acquire(mode='R')) m3_h.release() # multi dimension def mem_show(x): print("memory is", x) buf = struct.pack("L"*12, *list(range(12))) x = memoryview(buf) # 2-dimension y = x.cast('L', shape=[3,4]) # 3-dimension z = x.cast('L', shape=[2,3,2]) print(y.tolist()) print(z.tolist()) y_h = Handle(y) ret_m1 = starpu.task_submit(ret_handle=True)(mem_show, y_h) print("y is", y_h.acquire(mode='R')) y_h.release() z_h = Handle(z) ret_m2 = starpu.task_submit(ret_handle=True)(mem_show, z_h) print("z is", z_h.acquire(mode='R')) z_h.release() m1_h.unregister() m2_h.unregister() m3_h.unregister() y_h.unregister() z_h.unregister() ret_m1.unregister() ret_m2.unregister() if np is not None: #####################################access mode annotation################################### print("*************************") print("access mode annotation:") print("*************************") a = np.array([1, 2, 3, 4]) a_h = Handle(a) e_h = HandleNumpy(a.shape, a.dtype) a_r = a_h.acquire(mode='R') print("original array is:", a_r) a_h.release() ######access##### print("------------------") print("access decorator:") print("------------------") @starpu.access(a="R", b="W") def assign(a,b): for i in range(min(np.size(a), np.size(b))): b[i]=a[i] res11 = starpu.task_submit(ret_handle=True)(assign, a_h, e_h) e_r = e_h.acquire(mode='RW') print("assigned 1-dimension array is:", e_r) e_h.release() ######delayed####### print("------------------") print("delayed decorator:") print("------------------") @starpu.delayed(ret_handle=True, a="R", b="W") def assign(a,b): for i in range(min(np.size(a), np.size(b))): b[i]=a[i] res12 = assign(a_h, e_h) e_r = e_h.acquire(mode='RW') print("assigned 1-dimension array is:", e_r) e_h.release() ######set access###### print("------------------") print("access function:") print("------------------") def assign(a,b): for i in range(min(np.size(a), np.size(b))): b[i]=a[i] assign_access=starpu.set_access(assign, a="R", b="W") res13 = starpu.task_submit(ret_handle=True)(assign_access, a_h, e_h) e_r = e_h.acquire(mode='RW') print("assigned 1-dimension array is:", e_r) e_h.release() a_h.unregister() e_h.unregister() res11.unregister() res12.unregister() res13.unregister() #######################Numpy without explicit handle############################ print("*******************************") print("Numpy without explicit handle:") print("*******************************") arrh1 = np.array([1, 2, 3]) arrh2 = np.array([4, 5, 6]) @starpu.access(a="RW", b="R") def np_add(a, b): #time.sleep(2) for i in range(np.size(a)): a[i] = a[i] + b[i] print("First argument before task submitting is", starpu.acquire(arrh1, mode='R')) #a[0]=100 starpu.release(arrh1) # without explicit handle res14 = starpu.task_submit(ret_handle=True)(np_add, arrh1, arrh2) print("First argument after task submitting is", starpu.acquire(arrh1, mode='R')) starpu.release(arrh1) # it's mandatory to call unregister when the argument is no longer needed to access, but it's not obligatory, calling starpupy.shutdown() in the end is enough, which will unregister all no-explicit handle starpu.unregister(arrh1) res14.unregister() #######################Numpy without using handle############################### print("*******************************") print("Numpy without using handle:") print("*******************************") npa1 = np.array([1, 2, 3]) npa2 = np.array([4, 5, 6]) print("First argument before task submitting is", npa1) # without using handle, set option arg_handle to False res15 = starpu.task_submit(arg_handle=False, ret_handle=True)(np_add, npa1, npa2) print("First argument after task submitting is", npa1) #print("The addition result is", res15.get()) res15.unregister() ######################### starpu.shutdown() starpu-1.4.9+dfsg/starpupy/examples/starpu_py_handle.sh000077500000000000000000000013511507764646700234150ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh examples/starpu_py_handle.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_np.concurrent.sh000077500000000000000000000013471507764646700247650ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh starpu_py_np.concurrent.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_np.py000066400000000000000000000046741507764646700226250ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # try: import numpy as np except (ModuleNotFoundError, ImportError): print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") exit(77) import starpu from starpu import starpupy import asyncio try: starpu.init() except Exception as e: print(e) exit(77) def await_fut(fut): return fut.result() ############################################################################### def scal(x, t): for i in range(len(t)): t[i] = t[i] * x print ("Example scal(scalar, array):") return t def add(x, y): print ("Example add(array, array):") return x + y def multi(x, y): print ("Example multi(array, array):") return x * y def matrix_multi(x, y): print ("Example matrix_multi(array, array):") return x @ y t = np.arange(10) a = np.array([1, 2, 3]) b = np.array([4, 5, 6]) c = np.array([[1, 2], [3, 4]]) d = np.array([[2, 2], [2, 2]]) async def main(): fut1 = starpu.task_submit()(scal, 2, t) res1 = await(fut1) print("The result is", res1) # two array element addition fut2 = starpu.task_submit()(add, a, b) res2 = await(fut2) print("The result is", res2) # two array element multiplication fut3 = starpu.task_submit()(multi, c, d) res3 = await(fut3) print("The result is", res3) # two array matrix multiplication fut4 = starpu.task_submit()(matrix_multi, c, d) res4 = await(fut4) print("The result is", res4) # two array matrix multiplication (inverse order) fut5 = starpu.task_submit()(matrix_multi, d, c) res5 = await(fut5) print("The result is", res5) try: asyncio.run(main()) except starpupy.error as e: print("No worker to execute the job") starpupy.shutdown() exit(77) starpu.shutdown() starpu-1.4.9+dfsg/starpupy/examples/starpu_py_np.sh000077500000000000000000000013451507764646700226020ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh examples/starpu_py_np.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_numpy.concurrent.sh000077500000000000000000000013521507764646700255140ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh starpu_py_numpy.concurrent.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_numpy.py000066400000000000000000000024641507764646700233530ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # try: import numpy as np except (ModuleNotFoundError, ImportError): print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") exit(77) import starpu from starpu import Handle import asyncio try: starpu.init() except Exception as e: print(e) exit(77) @starpu.access(a="RW",b="R") def arr_add(a,b): for i in range(np.size(a)): a[i] = a[i] + b[i] a_h = Handle(np.array([1, 2, 3, 4])) b_h = Handle(np.array([5, 6, 7, 8])) starpu.task_submit(ret_fut=False)(arr_add, a_h, b_h) print("Array is", a_h.get()) a_h.unregister() b_h.unregister() starpu.shutdown() starpu-1.4.9+dfsg/starpupy/examples/starpu_py_numpy.sh000077500000000000000000000013501507764646700233310ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh examples/starpu_py_numpy.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_parallel.py000066400000000000000000000341571507764646700240030ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # try: import numpy as np except (ModuleNotFoundError, ImportError): print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") np = None import starpu try: import starpu.joblib except (ModuleNotFoundError, ImportError): print("Can't find starpu.joblib\" module (consider running \"pip3 install joblib\")") exit(77) from starpu import starpupy import time import asyncio from math import sqrt from math import log10 import sys try: starpu.init() except Exception as e: print(e) exit(77) def await_fut(fut): return fut.result() #generate a list to store functions g_func=[] #function no input no output print hello world def hello(): print ("Example 1: Hello, world!") g_func.append(starpu.joblib.delayed(hello)()) #function no input no output def func1(): print ("Example 2: This is a function no input no output") g_func.append(starpu.joblib.delayed(func1)()) #function no input return a value def func2(): print ("Example 3:") return 12 g_func.append(starpu.joblib.delayed(func2)()) #function has 2 int inputs and 1 int output def exp(a,b): res_exp=a**b print("Example 4: The result of ",a,"^",b,"is",res_exp) return res_exp g_func.append(starpu.joblib.delayed(exp)(2, 3)) #function has 4 float inputs and 1 float output def add(a,b,c,d): res_add=a+b+c+d print("Example 5: The result of ",a,"+",b,"+",c,"+",d,"is",res_add) return res_add g_func.append(starpu.joblib.delayed(add)(1.2, 2.5, 3.6, 4.9)) #function has 2 int inputs 1 float input and 1 float output 1 int output def sub(a,b,c): res_sub1=a-b-c res_sub2=a-b print ("Example 6: The result of ",a,"-",b,"-",c,"is",res_sub1,"and the result of",a,"-",b,"is",res_sub2) return res_sub1, res_sub2 g_func.append(starpu.joblib.delayed(sub)(6, 2, 5.9)) ##########functions of array calculation############### def scal(a, t): for i in range(len(t)): t[i]=t[i]*a return t @starpu.access(t="RW") def scal_np(a, t): for i in range(len(t)): t[i]=t[i]*a @starpu.access(t1="RW") def add_scal(a, t1, t2): for i in range(len(t1)): t1[i]=t1[i]*a+t2[i] #return t1 @starpu.access(t="RW") def scal_arr(a, t): for i in range(len(t)): t[i]=t[i]*a[i] def multi(a,b): res_multi=a*b return res_multi def multi_2arr(a, b): for i in range(len(a)): a[i]=a[i]*b[i] return a @starpu.access(a="RW") def multi_2np(a, b): for i in range(len(a)): a[i]=a[i]*b[i] def multi_list(l): res = [] for (a,b) in l: res.append(a*b) return res @starpu.access(t="RW") def log10_arr(t): for i in range(len(t)): t[i]=log10(t[i]) ######################################################## displayPlot=False listX=[10, 100] for arg in sys.argv[1:]: if arg == "-long": listX = [10, 100, 1000, 10000, 100000, 1000000, 10000000] if arg == "-plot": displayPlot=True if np is not None: #################scikit test################### # DEFAULT_JOBLIB_BACKEND = starpu.joblib.get_active_backend()[0].__class__ # class MyBackend(DEFAULT_JOBLIB_BACKEND): # type: ignore # def __init__(self, *args, **kwargs): # self.count = 0 # super().__init__(*args, **kwargs) # def start_call(self): # self.count += 1 # return super().start_call() # starpu.joblib.register_parallel_backend('testing', MyBackend) # with starpu.joblib.parallel_backend("testing") as (ba, n_jobs): # print("backend and n_jobs is", ba, n_jobs) ############################################### N=100 # A=np.arange(N) # B=np.arange(N) # a=np.arange(N) # b=np.arange(N, 2*N, 1) for x in listX: for X in range(x, x*10, x): #print("X=",X) try : starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_list")(starpu.joblib.delayed(log10)(i+1)for i in range(X)) A=np.arange(1,X+1,1) starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="log_arr")(starpu.joblib.delayed(log10_arr)(A)) except starpupy.error as e: print("No worker to execute the job") exit(77) print("************************") print("parallel Normal version:") print("************************") print("--(sqrt)(i**2)for i in range(N)") start_exec1=time.time() start_cpu1=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N)) end_exec1=time.time() end_cpu1=time.process_time() print("the program execution time is", end_exec1-start_exec1) print("the cpu execution time is", end_cpu1-start_cpu1) print("--(multi)(i,j) for i,j in zip(a,b)") a=np.arange(N) b=np.arange(N, 2*N, 1) start_exec2=time.time() start_cpu2=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b)) end_exec2=time.time() end_cpu2=time.process_time() print("the program execution time is", end_exec2-start_exec2) print("the cpu execution time is", end_cpu2-start_cpu2) print("--(scal_arr)((i for i in b), A)") A=np.arange(N) b=np.arange(N, 2*N, 1) print("The input array is", A) start_exec3=time.time() start_cpu3=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A)) end_exec3=time.time() end_cpu3=time.process_time() print("The return array is", A) print("the program execution time is", end_exec3-start_exec3) print("the cpu execution time is", end_cpu3-start_cpu3) print("--(multi_list)((i,j) for i,j in zip(a,b))") a=np.arange(N) b=np.arange(N, 2*N, 1) start_exec4=time.time() start_cpu4=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b))) end_exec4=time.time() end_cpu4=time.process_time() print("the program execution time is", end_exec4-start_exec4) print("the cpu execution time is", end_cpu4-start_cpu4) print("--(multi_2arr)((i for i in a), (j for j in b))") a=np.arange(N) b=np.arange(N, 2*N, 1) start_exec5=time.time() start_cpu5=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b))) end_exec5=time.time() end_cpu5=time.process_time() print("the program execution time is", end_exec5-start_exec5) print("the cpu execution time is", end_cpu5-start_cpu5) print("--(multi_2np)(A, B)") # A=np.arange(N) # B=np.arange(N, 2*N, 1) n, m = 4, 5 A = np.arange(n*m).reshape(n, m) B = np.arange(n*m, 2*n*m, 1).reshape(n, m) print("The input arrays are A", A, "B", B) start_exec6=time.time() start_cpu6=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2np)(A, B)) end_exec6=time.time() end_cpu6=time.process_time() print("The return array is", A) print("the program execution time is", end_exec6-start_exec6) print("the cpu execution time is", end_cpu6-start_cpu6) print("--(scal)(2, t=(j for j in a))") a=np.arange(N) start_exec7=time.time() start_cpu7=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, t=(j for j in a))) end_exec7=time.time() end_cpu7=time.process_time() print("the program execution time is", end_exec7-start_exec7) print("the cpu execution time is", end_cpu7-start_cpu7) print("--(scal_np)(2,A)") A=np.arange(N) print("The input is", A) start_exec8=time.time() start_cpu8=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal_np)(2,A)) end_exec8=time.time() end_cpu8=time.process_time() print("The return array is", A) print("the program execution time is", end_exec8-start_exec8) print("the cpu execution time is", end_cpu8-start_cpu8) print("--(add_scal)(t1=A,t2=B,a=2)") A=np.arange(N) B=np.arange(N) print("The input arrays are t1", A, "t2", B) start_exec9=time.time() start_cpu9=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(t1=A,t2=B,a=2)) end_exec9=time.time() end_cpu9=time.process_time() print("The return array is", A) print("the program execution time is", end_exec9-start_exec9) print("the cpu execution time is", end_cpu9-start_cpu9) print("--input is iterable function list") start_exec10=time.time() start_cpu10=time.process_time() starpu.joblib.Parallel(mode="normal", n_jobs=-1, perfmodel="func")(g_func) end_exec10=time.time() end_cpu10=time.process_time() print("the program execution time is", end_exec10-start_exec10) print("the cpu execution time is", end_cpu10-start_cpu10) # def producer(): # for i in range(6): # print('Produced %s' % i) # yield i #starpu.joblib.Parallel(n_jobs=2)(starpu.joblib.delayed(sqrt)(i) for i in producer()) print("************************") print("parallel Future version:") print("************************") async def main(): print("--(sqrt)(i**2)for i in range(N)") fut1=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="sqrt")(starpu.joblib.delayed(sqrt)(i**2)for i in range(N)) res1=await(fut1) print("The result is", sum(res1,[])) print("--(multi)(i,j) for i,j in zip(a,b)") a=np.arange(N) b=np.arange(N, 2*N, 1) print("The inputs are a", a, "b", b) fut2=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi")(starpu.joblib.delayed(multi)(i,j) for i,j in zip(a,b)) res2=await(fut2) print("The result is", sum(res2,[])) print("--(scal_arr)((i for i in b), A)") A=np.arange(N) b=np.arange(N, 2*N, 1) print("The input arrays are A", A, "b", b) fut3=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal_arr")(starpu.joblib.delayed(scal_arr)((i for i in b), A)) res3=await(fut3) #print("The return array is", np.concatenate(res3)) print("The return array is", A) print("--(multi_list)((i,j) for i,j in zip(a,b))") a=np.arange(N) b=np.arange(N, 2*N, 1) print("The input lists are a", a, "b", b) fut4=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_list")(starpu.joblib.delayed(multi_list)((i,j) for i,j in zip(a,b))) res4=await(fut4) print("The result is", sum(res4,[])) print("--(multi_2arr)((i for i in a), (j for j in b))") a=np.arange(N) b=np.arange(N, 2*N, 1) print("The input lists are a", a, "b", b) fut5=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2arr)((i for i in a), (j for j in b))) res5=await(fut5) print("The result is", sum(res5,[])) print("--(multi_2np)(b=B, a=A)") A=np.arange(N) B=np.arange(N, 2*N, 1) print("The input arrays are A", A, "B", B) fut6=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="multi_2arr")(starpu.joblib.delayed(multi_2np)(b=B, a=A)) res6=await(fut6) #print("The return array is", np.concatenate(res6)) print("The return array is", A) print("--(scal)(2, (j for j in a))") a=np.arange(N) print("The input list is a", a) fut7=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal)(2, (j for j in a))) res7=await(fut7) print("The result is", sum(res7,[])) print("--(scal_np)(2,t=A)") A=np.arange(N) print("The input array is", A) fut8=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="scal")(starpu.joblib.delayed(scal_np)(2,t=A)) res8=await(fut8) #print("The return array is", np.concatenate(res8)) print("The return array is", A) print("--(scal)(2,A,B)") A=np.arange(N) B=np.arange(N) print("The input arrays are A", A, "B", B) fut9=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="add_scal")(starpu.joblib.delayed(add_scal)(2,A,B)) res9=await(fut9) #print("The return array is", np.concatenate(res9)) print("The return array is", A) print("--input is iterable function list") fut10=starpu.joblib.Parallel(mode="future", n_jobs=-1, perfmodel="func")(g_func) res10=await(fut10) #print(res9) try: asyncio.run(main()) except starpupy.error as e: starpu.shutdown() exit(77) starpu.perfmodel_plot(perfmodel="sqrt",view=displayPlot) starpu.perfmodel_plot(perfmodel="multi",view=displayPlot) starpu.perfmodel_plot(perfmodel="scal_arr",view=displayPlot) starpu.perfmodel_plot(perfmodel="multi_list",view=displayPlot) starpu.perfmodel_plot(perfmodel="multi_2arr",view=displayPlot) starpu.perfmodel_plot(perfmodel="scal",view=displayPlot) starpu.perfmodel_plot(perfmodel="add_scal",view=displayPlot) starpu.perfmodel_plot(perfmodel="func",view=displayPlot) starpu.perfmodel_plot(perfmodel="log_list",view=displayPlot) starpu.perfmodel_plot(perfmodel="log_arr",view=displayPlot) starpu.shutdown() starpu-1.4.9+dfsg/starpupy/examples/starpu_py_parallel.sh000077500000000000000000000013761507764646700237650ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # STARPU_CALIBRATE=1 $(dirname $0)/../execute.sh examples/starpu_py_parallel.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_partition.concurrent.sh000077500000000000000000000013561507764646700263610ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh starpu_py_partition.concurrent.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_partition.py000066400000000000000000000042771507764646700242200ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # try: import numpy as np except (ModuleNotFoundError, ImportError): print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") exit(77) import starpu from starpu import Handle from starpu import HandleNumpy import asyncio import time import array import struct try: starpu.init() except Exception as e: print(e) exit(77) def await_fut(fut): return fut.result() # 1-dimension # arr = np.arange(20) # 2-dimension # n, m = 20, 10 # arr = np.arange(n*m).reshape(n, m) # 3-dimension # x, y, z = 10, 15, 20 # arr = np.arange(x*y*z).reshape(x, y, z) # 4-dimension x, y, z, t = 10, 5, 10, 20 arr = np.arange(x*y*z*t).reshape(x, y, z, t) print("input array is", arr) arr_h = Handle(arr) # split into split_num of sub handles split_num = 3 #arr_h_list = arr_h.partition(split_num, 0, [6,6,8]) arr_h_list = arr_h.partition(split_num, 0, [3,2,5]) n_arr = arr_h.get_partition_size(arr_h_list) print("partition size is", n_arr) def show(x): print("Function printing:", x) @starpu.access(a="RW") def add(a,b): np.add(a,b,out=a) for i in range(split_num): starpu.task_submit(ret_handle=False,ret_fut=False)(add, arr_h_list[i], arr_h_list[i]) # async def main(): # for i in range(split_num): # res=starpu.task_submit()(add, arr_h_list[i], arr_h_list[i]) # res1=await(res) # asyncio.run(main()) arr_r = arr_h.acquire(mode='RW') print("output array is:", arr_r) arr_h.release() arr_h.unpartition(arr_h_list, split_num) arr_h.unregister() starpu.shutdown() starpu-1.4.9+dfsg/starpupy/examples/starpu_py_partition.sh000077500000000000000000000013541507764646700241760ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh examples/starpu_py_partition.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_perfmodel.concurrent.sh000077500000000000000000000013561507764646700263250ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh starpu_py_perfmodel.concurrent.py $* starpu-1.4.9+dfsg/starpupy/examples/starpu_py_perfmodel.py000066400000000000000000000024731507764646700241600ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # try: import numpy as np except (ModuleNotFoundError, ImportError): print("Can't find \"Python3 NumPy\" module (consider running \"pip3 install numpy\" or refer to https://numpy.org/install/)") exit(77) import starpu from starpu import Handle import asyncio try: starpu.init() except Exception as e: print(e) exit(77) @starpu.access(a="RW",b="R") def arr_add(a,b): for i in range(np.size(a)): a[i] = a[i] + b[i] a_h = Handle(np.arange(1000)) b_h = Handle(np.arange(1000)) starpu.task_submit(ret_fut=False, perfmodel="arr_add")(arr_add, a_h, b_h) print("Array is", a_h.get()) a_h.unregister() b_h.unregister() starpu.shutdown() starpu-1.4.9+dfsg/starpupy/examples/starpu_py_perfmodel.sh000077500000000000000000000013541507764646700241420ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # $(dirname $0)/../execute.sh examples/starpu_py_perfmodel.py $* starpu-1.4.9+dfsg/starpupy/execute.sh.in000077500000000000000000000063211507764646700203070ustar00rootroot00000000000000#!@REALBASH@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROGNAME=$0 usage() { echo "Tool to launch StarPUPY and examples" echo "" echo " Typical usage:" echo " $PROGNAME [python_script.py]" echo "" echo "Options:" echo " -h, --help display this help and exit" echo " -v, --version output version information and exit" echo " --valgrind launch executable with valgrind" echo " --gdb launch executable with gdb" echo " --mpirun launch executable with MPI using 2 processes" exit 0 } starpupy_path=@STARPU_SRC_DIR@/starpupy modpath=@STARPU_BUILD_DIR@/src/.libs${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} pypath=@STARPU_BUILD_DIR@/starpupy/src/build:$PYTHONPATH LOADER="@STARPU_BUILD_DIR@/starpupy/examples/loader" PYTHON=@PYTHON@ # python produces a lot of "possibly lost" false positives export STARPU_VALGRIND_OPTIONS="--errors-for-leak-kinds=definite,indirect --show-leak-kinds=definite,indirect" MPI_LAUNCHER="$LAUNCHER" if test -z "$MPI_LAUNCHER" then MPI_LAUNCHER="mpiexec -np 2" fi mpi="" gdb="" MPI_LOADER="" valgrind="" EXEC_ARGS="" for x in $LOADER_ARGS $* do if [ "$x" = "-v" ] || [ "$x" = "--version" ] then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$x" = "-h" ] || [ "$x" = "--help" ] then usage exit 0 fi if test "$x" == "--valgrind" then valgrind="valgrind" export PYTHONMALLOC=malloc LOADER="$LOADER valgrind --track-origins=yes" elif test "$x" == "--gdb" then gdb="gdb" LOADER="gdb --args" elif test "$x" == "--mpirun" then mpi="mpi" else EXEC_ARGS="$EXEC_ARGS $x" fi done if test "$mpi" == "mpi" then if test -n "$gdb" then MPI_LOADER="$MPI_LAUNCHER xterm $XTERM_PARAMS -sl 10000 -e" elif test -n "$valgrind" then MPI_LOADER="$MPI_LAUNCHER xterm $XTERM_PARAMS -sl 10000 -hold -e" else MPI_LOADER="$MPI_LAUNCHER" fi fi # set arguments with the cleaned list set -- $EXEC_ARGS examplefile=$1 shift if test -n "$examplefile" then if test -f $examplefile then pythonscript=$examplefile elif test -f $starpupy_path/$examplefile then pythonscript=$starpupy_path/$examplefile else echo "Error. Python script $examplefile not found in current directory or in $starpupy_path" exit 1 fi else # Interactive use if ! test -n "$gdb" then LOADER="" fi fi set -x if [ -n "$STARPU_LD_PRELOAD" ] then export LD_PRELOAD=$STARPU_LD_PRELOAD${LD_PRELOAD:+:$LD_PRELOAD} fi export PYTHONPATH=$pypath LD_LIBRARY_PATH=$modpath exec $MPI_LOADER $LOADER $PYTHON $pythonscript $* starpu-1.4.9+dfsg/starpupy/src/000077500000000000000000000000001507764646700164665ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpupy/src/Makefile.am000066400000000000000000000063621507764646700205310ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk SUBDIRS = PYTHON_PY_SRC = $(wildcard $(top_srcdir)/starpupy/src/*py) PYTHON_PY_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_PY_SRC))) PYTHON_C_SRC = $(wildcard $(top_srcdir)/starpupy/src/*c) PYTHON_C_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_C_SRC))) PYTHON_H_SRC = $(wildcard $(top_srcdir)/starpupy/src/*h) PYTHON_H_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_H_SRC))) $(top_builddir)/starpupy/src/starpu/%.py: $(abs_top_srcdir)/starpupy/src/%.py $(MKDIR_P) starpu $(V_ln) $(LN_S) $< $@ $(top_builddir)/starpupy/src/starpu/%.c: $(abs_top_srcdir)/starpupy/src/%.c @$(MKDIR_P) starpu $(V_ln) $(LN_S) $< $@ $(top_builddir)/starpupy/src/starpu/%.h: $(abs_top_srcdir)/starpupy/src/%.h @$(MKDIR_P) starpu $(V_ln) $(LN_S) $< $@ all: $(PYTHON_PY_BUILD) $(PYTHON_C_BUILD) $(PYTHON_H_BUILD) LDFLAGS=$${LDFLAGS/-no-pie/} $(PYTHON) setup.py build $(PYTHON_SETUP_OPTIONS) # FIXME and enable dist-hook: nm -n build/starpu/starpupy.cpython-*.so | grep -v " [Ua-z] " | grep -ve " _\?_\?_\?f\?starpu" | grep -ve " \(_init\|_fini\|_edata\|__bss_start\|_end\|PyInit_starpupy\|__gcov_\|mangle_path\)" | (! grep .) check: all PYTHONPATH=$(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages install-exec-local: all LDFLAGS=$${LDFLAGS/-no-pie/} $(PYTHON) setup.py clean @if test -d $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION) ; \ then \ chmod u+w $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION) ; \ fi @if test -d $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages ; \ then \ chmod u+w $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages ; \ fi $(MKDIR_P) $(PYTHONPATH)/starpu chmod u+w $(PYTHONPATH)/starpu $(PYTHON) setup.py install --prefix '$(prefix)' --root '$(DESTDIR)$(prefix)' rm -fr build/bdist* if STARPU_BUILD_STARPUPY clean-local: rm -rf lib dist build starpupy.egg-info $(PYTHON) setup.py clean -a endif distclean-local: rm -rf build dist lib uninstall-local: clean-local rm -rf $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages/starpu* rm -rf $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages/tmp/starpu* EXTRA_DIST = \ delay.py \ __init__.py \ intermedia.py \ joblib.py \ handle_access.py \ starpu_task_wrapper.c \ starpupy_cloudpickle.h \ starpupy_interface.c \ starpupy_interface.h \ starpupy_buffer_interface.c \ starpupy_buffer_interface.h \ starpupy_numpy_filters.c \ starpupy_numpy_filters.h \ starpupy_handle.c \ starpupy_handle.h \ starpupy_private.h starpu-1.4.9+dfsg/starpupy/src/Makefile.in000066400000000000000000001021041507764646700205310ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ subdir = starpupy/src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = setup.cfg setup.py CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/setup.cfg.in \ $(srcdir)/setup.py.in $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) SUBDIRS = PYTHON_PY_SRC = $(wildcard $(top_srcdir)/starpupy/src/*py) PYTHON_PY_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_PY_SRC))) PYTHON_C_SRC = $(wildcard $(top_srcdir)/starpupy/src/*c) PYTHON_C_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_C_SRC))) PYTHON_H_SRC = $(wildcard $(top_srcdir)/starpupy/src/*h) PYTHON_H_BUILD = $(addprefix $(top_builddir)/starpupy/src/starpu/,$(notdir $(PYTHON_H_SRC))) PYTHONPATH = $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages EXTRA_DIST = \ delay.py \ __init__.py \ intermedia.py \ joblib.py \ handle_access.py \ starpu_task_wrapper.c \ starpupy_cloudpickle.h \ starpupy_interface.c \ starpupy_interface.h \ starpupy_buffer_interface.c \ starpupy_buffer_interface.h \ starpupy_numpy_filters.c \ starpupy_numpy_filters.h \ starpupy_handle.c \ starpupy_handle.h \ starpupy_private.h all: all-recursive .SUFFIXES: .SUFFIXES: .cu .cubin .hip .o $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpupy/src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpupy/src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): setup.cfg: $(top_builddir)/config.status $(srcdir)/setup.cfg.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ setup.py: $(top_builddir)/config.status $(srcdir)/setup.py.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$(top_distdir)" distdir="$(distdir)" \ dist-hook check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_BUILD_STARPUPY_FALSE@clean-local: clean: clean-recursive clean-am: clean-generic clean-libtool clean-local mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-local \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-exec-local install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-local .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool clean-local \ cscopelist-am ctags ctags-am dist-hook distclean \ distclean-generic distclean-libtool distclean-local \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-exec-local \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ uninstall-am uninstall-local .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null $(top_builddir)/starpupy/src/starpu/%.py: $(abs_top_srcdir)/starpupy/src/%.py $(MKDIR_P) starpu $(V_ln) $(LN_S) $< $@ $(top_builddir)/starpupy/src/starpu/%.c: $(abs_top_srcdir)/starpupy/src/%.c @$(MKDIR_P) starpu $(V_ln) $(LN_S) $< $@ $(top_builddir)/starpupy/src/starpu/%.h: $(abs_top_srcdir)/starpupy/src/%.h @$(MKDIR_P) starpu $(V_ln) $(LN_S) $< $@ all: $(PYTHON_PY_BUILD) $(PYTHON_C_BUILD) $(PYTHON_H_BUILD) LDFLAGS=$${LDFLAGS/-no-pie/} $(PYTHON) setup.py build $(PYTHON_SETUP_OPTIONS) # FIXME and enable dist-hook: nm -n build/starpu/starpupy.cpython-*.so | grep -v " [Ua-z] " | grep -ve " _\?_\?_\?f\?starpu" | grep -ve " \(_init\|_fini\|_edata\|__bss_start\|_end\|PyInit_starpupy\|__gcov_\|mangle_path\)" | (! grep .) check: all install-exec-local: all LDFLAGS=$${LDFLAGS/-no-pie/} $(PYTHON) setup.py clean @if test -d $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION) ; \ then \ chmod u+w $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION) ; \ fi @if test -d $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages ; \ then \ chmod u+w $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages ; \ fi $(MKDIR_P) $(PYTHONPATH)/starpu chmod u+w $(PYTHONPATH)/starpu $(PYTHON) setup.py install --prefix '$(prefix)' --root '$(DESTDIR)$(prefix)' rm -fr build/bdist* @STARPU_BUILD_STARPUPY_TRUE@clean-local: @STARPU_BUILD_STARPUPY_TRUE@ rm -rf lib dist build starpupy.egg-info @STARPU_BUILD_STARPUPY_TRUE@ $(PYTHON) setup.py clean -a distclean-local: rm -rf build dist lib uninstall-local: clean-local rm -rf $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages/starpu* rm -rf $(DESTDIR)$(prefix)/lib/python$(PYTHON_VERSION)/site-packages/tmp/starpu* # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpupy/src/__init__.py000066400000000000000000000060221507764646700205770ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # from . import starpupy from .delay import * from .handle_access import * #from . import joblib from .intermedia import * import asyncio import concurrent.futures try: import numpy as np has_numpy=True except: has_numpy=False async def wait_for_fut(fut): return await fut async def asyncio_wait_for_fut(fut): return await fut def concurrent_futures_wait_for_fut(fut): return fut.result() #class handle class Handle(object): def __init__(self, obj, retval=False): self.obj=obj self.obj_id=id(self.obj) self.retval=retval self.handle_cap=starpupy.starpupy_data_register(self.obj, self) def get_capsule(self): return self.handle_cap def get_obj_id(self): return self.obj_id def get_retval(self): return self.retval # get PyObject def get(self): return starpupy.starpupy_get_object(self.handle_cap) # get array object def acquire(self, mode='R'): return starpupy.starpupy_acquire_handle(self.handle_cap, mode) # release def release(self): return starpupy.starpupy_release_handle(self.handle_cap) # unregister def unregister(self): return starpupy.starpupy_data_unregister(self) # unregister_submit def unregister_submit(self): return starpupy.starpupy_data_unregister_submit(self) # partition def partition(self, nchildren, dim, chunks_list=[]): return starpupy.starpupy_data_partition(self.handle_cap, nchildren, dim, chunks_list) # get partition size def get_partition_size(self, handle_list): return starpupy.starpupy_get_partition_size(self.handle_cap, handle_list) # unpartition def unpartition(self, handle_list, nchildren): return starpupy.starpupy_data_unpartition(self.handle_cap, handle_list, nchildren) def new_empty_numpy(shape, dtype): return np.empty(shape, dtype) #class handle class HandleNumpy(Handle): if has_numpy: def __init__(self, shape, dtype=np.dtype('float64'), retval=False): self.dtype=dtype self.retval=retval self.obj=new_empty_numpy(shape, self.dtype) self.obj_id=id(self.obj) self.handle_cap=starpupy.starpupy_data_register(self.obj, self) #detect class handle class Handle_token(object): pass #this dict contains all handle objects of mutable Python objects handle_dict={} def handle_dict_set_item(obj, handle): assert handle_dict.get(id(obj))==None handle_dict[id(obj)]=handle return handle_dict #this set contains all handle objects of immutable Python objects handle_set=set() starpu-1.4.9+dfsg/starpupy/src/delay.py000066400000000000000000000024611507764646700201410ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # from starpu import starpupy import starpu import asyncio from functools import partial import inspect def delayed(f=None, **kwargs): # add options of task_submit if f is None: return partial(delayed, **kwargs) def submit(*args): # set the access right access_mode={} f_args = inspect.getfullargspec(f).args # check the access right of argument is set in mode or not for i in range(len(f_args)): if f_args[i] in kwargs.keys(): # write access modes in f.access attribute access_mode[f_args[i]]=kwargs[f_args[i]] setattr(f, "starpu_access", access_mode) fut = starpu.task_submit(**kwargs)(f, *args) return fut return submit starpu-1.4.9+dfsg/starpupy/src/handle_access.py000066400000000000000000000020721507764646700216150ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # from starpu import starpupy import starpu import inspect def access(**kwargs): def access_decorator(func): # write access modes in f.access attribute setattr(func,'starpu_access', kwargs) return func return access_decorator # set mode as **kwargs of func def set_access(func, **kwargs): # write access modes in f.access attribute setattr(func,'starpu_access', kwargs) return func starpu-1.4.9+dfsg/starpupy/src/intermedia.py000066400000000000000000000061661507764646700211720ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import starpu from starpu import starpupy import os import inspect import array # define the different architecture STARPU_CPU_WORKER = 0 STARPU_CUDA_WORKER = 1 STARPU_OPENCL_WORKER = 2 STARPU_MAX_FPGA_WORKER = 4 STARPU_MPI_MS_WORKER = 5 STARPU_TCPIP_MS_WORKER = 6 STARPU_HIP_WORKER = 7 STARPU_NARCH = 8 STARPU_ANY_WORKER = 255 #class perfmodel class Perfmodel(object): def __init__(self, symbol): self.symbol=symbol self.pstruct=starpupy.init_perfmodel(self.symbol) def get_struct(self): return self.pstruct def __del__(self): #def free_struct(self): starpupy.free_perfmodel(self.pstruct) # generate the dictionary which contains the perfmodel symbol and its struct pointer dict_perf={} def dict_perf_generator(perfsymbol): if dict_perf.get(perfsymbol)==None: p=Perfmodel(perfsymbol) dict_perf[perfsymbol]=p else: p=dict_perf[perfsymbol] return p # add options in function task_submit def task_submit(**kwargs): # set perfmodel perf=None if kwargs.__contains__("perfmodel") and kwargs['perfmodel']!=None: perf=dict_perf_generator(kwargs['perfmodel']) kwargs['perfmodel']=perf def call_task_submit(f, *args): modes={} # if there is access mode defined if hasattr(f,"starpu_access"): # the starpu_access attribute of f is the access mode access_mode=f.starpu_access # get the name of formal arguments of f f_args = inspect.getfullargspec(f).args # check the access right of argument is set in mode or not for i in range(len(f_args)): if f_args[i] in access_mode.keys(): # set access mode in modes option modes[id(args[i])]=access_mode[f_args[i]] kwargs['modes']=modes res=starpupy._task_submit(f, *args, kwargs) return res return call_task_submit # dump performance model and show the plot def perfmodel_plot(perfmodel, view=True): p=dict_perf[perfmodel] starpupy.save_history_based_model(p) if view == True: os.system('starpu_perfmodel_plot -s "' + perfmodel +'"') os.system('gnuplot starpu_'+perfmodel+'.gp') os.system('gv starpu_'+perfmodel+'.eps') # acquire object def acquire(obj, mode='R'): return starpupy.starpupy_acquire_object(obj, mode) # release object def release(obj): return starpupy.starpupy_release_object(obj) # acquire object def unregister(obj): return starpupy.starpupy_data_unregister_object(obj) # acquire object def unregister_submit(obj): return starpupy.starpupy_data_unregister_submit_object(obj) def init(): return starpupy.init() def shutdown(): return starpupy.shutdown() starpu-1.4.9+dfsg/starpupy/src/joblib.py000066400000000000000000000304561507764646700203110ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import sys import types import joblib as jl from joblib import logger from joblib._parallel_backends import ParallelBackendBase from starpu import starpupy from starpu import Handle import starpu import asyncio import math import functools try: import numpy as np has_numpy=True except: has_numpy=False import inspect import threading loop = asyncio.get_event_loop() if (loop.is_running()): try: import nest_asyncio nest_asyncio.apply() has_nest=True except (ModuleNotFoundError, ImportError): has_nest=False BACKENDS={ #'loky': LokyBackend, } _backend = threading.local() # get the number of CPUs controlled by StarPU def cpu_count(): n_cpus=starpupy.worker_get_count_by_type(starpu.STARPU_CPU_WORKER) return n_cpus # split a list ls into n_block numbers of sub-lists def partition(ls, n_block): if len(ls)>=n_block: # there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0) q1=math.ceil(len(ls)/n_block) q2=math.floor(len(ls)/n_block) n1=len(ls)%n_block #n2=n_block-n1 # generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2 L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)] L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)] L=L1+L2 else: # if the block number is larger than the length of list, each element in the list is a sub-list L=[ls[i:i+1] for i in range (len(ls))] return L # split a two-dimension numpy matrix into n_block numbers of sub-matrices def array2d_split(a, n_block): # decompose number of n_jobs to two integers multiply c_tmp=math.floor(math.sqrt(n_block)) for i in range (c_tmp,0,-1): if n_block%i==0: c=i r=int(n_block/c) break # split column arr_split_c=np.array_split(a,c,0) arr_split=[] # split row for i in range(c): arr_split_r=np.array_split(arr_split_c[i],r,1) for j in range(r): arr_split.append(arr_split_r[j].copy(order='C')) return arr_split def future_generator(iterable, n_jobs, dict_task): # iterable is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)] #print("iterable type is ", type(iterable)) #print("iterable is", iterable) # get the number of block if n_jobs<-cpu_count()-1: raise SystemExit('Error: n_jobs is out of range, number of CPUs is', cpu_count()) elif n_jobs<0: n_block=cpu_count()+1+n_jobs else: n_block=n_jobs if (n_block <= 0): n_block = 1 # if arguments is tuple format if type(iterable) is tuple: # the function is always the first element f=iterable[0] # get the name of formal arguments of f formal_args=inspect.getfullargspec(f).args # get the arguments list args=[] # argument is arbitrary in iterable[1] args=list(iterable[1]) # argument is keyword argument in iterable[2] for i in range(len(formal_args)): for j in iterable[2].keys(): if j==formal_args[i]: args.append(iterable[2][j]) # check whether all arrays have the same size l_arr=[] # list of Future result L_fut=[] # split the vector args_split=[] # handle list arg_h=[] for i in range(len(args)): args_split.append([]) # if the array is an numpy array if has_numpy and type(args[i]) is np.ndarray: # check whether the arg is already registered handle_dict = starpu.handle_dict if handle_dict.get(id(args[i]))==None: arr_h = Handle(args[i]) arg_h.append(arr_h) args_split[i] = arr_h.partition(n_block, 0) else: arr_h = handle_dict.get(id(args[i])) arg_h.append(arr_h) args_split[i] = arr_h.partition(n_block, 0) # if the array is a generator elif isinstance(args[i],types.GeneratorType): # split generator args_split[i]=partition(list(args[i]),n_block) arg_h.append(None) # get the length of generator l_arr.append(sum(len(args_split[i][j]) for j in range(len(args_split[i])))) else: arg_h.append(None) if len(set(l_arr))>1: raise SystemExit('Error: all arrays should have the same size') #print("args list is", args_split) for i in range(n_block): # generate the argument list L_args=[] sizebase=0 for j in range(len(args)): if (has_numpy and type(args[j]) is np.ndarray): L_args.append(args_split[j][i]) n_arr = arg_h[j].get_partition_size(args_split[j]) if sizebase==0: sizebase=n_arr[i] elif sizebase==n_arr[i]: continue else: raise SystemExit('Error: all arrays should be split into equal size') elif isinstance(args[j],types.GeneratorType): L_args.append(args_split[j][i]) if sizebase==0: sizebase=len(args_split[j][i]) elif sizebase==len(args_split[j][i]): continue else: raise SystemExit('Error: all arrays should be split into equal size') else: L_args.append(args[j]) #print("L_args is", L_args) fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\ color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'], sizebase=sizebase,\ ret_handle=dict_task['ret_handle'], ret_fut=dict_task['ret_fut'], arg_handle=dict_task['arg_handle'], modes=dict_task['modes'])\ (f, *L_args) L_fut.append(fut) # unpartition and unregister the numpy array for i in range(len(args)): if (has_numpy and type(args[i]) is np.ndarray): arg_h[i].unpartition(args_split[i], n_block) arg_h[i].unregister() return L_fut # if iterable is a generator or a list of function else: L=list(iterable) #print(L) # generate a list of function according to iterable def lf(ls): L_func=[] for i in range(len(ls)): # the first element is the function f=ls[i][0] # the second element is the args list of a type tuple L_args=list(ls[i][1]) # generate a list of function L_func.append(f(*L_args)) return L_func # generate the split function list L_split=partition(L,n_block) # operation in each split list L_fut=[] for i in range(len(L_split)): sizebase=len(L_split[i]) fut=starpu.task_submit(name=dict_task['name'], synchronous=dict_task['synchronous'], priority=dict_task['priority'],\ color=dict_task['color'], flops=dict_task['flops'], perfmodel=dict_task['perfmodel'], sizebase=sizebase,\ ret_handle=dict_task['ret_handle'], ret_fut=dict_task['ret_fut'], arg_handle=dict_task['arg_handle'], modes=dict_task['modes'])\ (lf, L_split[i]) L_fut.append(fut) return L_fut class Parallel(object): def __init__(self, mode="normal", perfmodel=None, end_msg=None,\ name=None, synchronous=0, priority=0, color=None, flops=None,\ ret_handle=False, ret_fut=True, arg_handle=True, modes=None,\ n_jobs=None, backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\ batch_size='auto', temp_folder=None, max_nbytes='1M',\ mmap_mode='r', prefer=None, require=None): #active_backend= get_active_backend() # nesting_level = active_backend.nesting_level # if backend is None: # backend = active_backend # else: # try: # backend_factory = BACKENDS[backend] # except KeyError as e: # raise ValueError("Invalid backend: %s, expected one of %r" # % (backend, sorted(BACKENDS.keys()))) from e # backend = backend_factory(nesting_level=nesting_level) if n_jobs is None: n_jobs = 1 self.mode=mode self.perfmodel=perfmodel self.end_msg=end_msg self.name=name self.synchronous=synchronous self.priority=priority self.color=color self.flops=flops self.ret_handle=ret_handle self.ret_fut=ret_fut self.arg_handle=arg_handle self.modes=modes self.n_jobs=n_jobs self._backend=backend def print_progress(self): #todo print("", starpupy.task_nsubmitted()) def __call__(self,iterable): #generate the dictionary of task_submit dict_task={'name': self.name, 'synchronous': self.synchronous, 'priority': self.priority, 'color': self.color, 'flops': self.flops, 'perfmodel': self.perfmodel, 'ret_handle': self.ret_handle, 'ret_fut': self.ret_fut, 'arg_handle': self.arg_handle, 'modes': self.modes} if hasattr(self._backend, 'start_call'): self._backend.start_call() # the mode normal, user can call the function directly without using async if self.mode=="normal": async def asy_main(): L_fut=future_generator(iterable, self.n_jobs, dict_task) res=[] for i in range(len(L_fut)): L_res=await L_fut[i] if L_res is None: res=None else: res.extend(L_res) #print(res) #print("type of result is", type(res)) return res #asyncio.run(asy_main()) #retVal=asy_main #loop = asyncio.get_event_loop() if(loop.is_running() and not has_nest): raise starpupy.error("Can't find \'nest_asyncio\' module (consider running \"pip3 install nest_asyncio\" or try to remove \"-m asyncio\" when starting Python interpreter)") results = loop.run_until_complete(asy_main()) retVal = results # the mode future, user needs to use asyncio module and await the Future result in main function elif self.mode=="future": L_fut=future_generator(iterable, self.n_jobs, dict_task) fut=asyncio.gather(*L_fut) if self.end_msg!=None: fut.add_done_callback(functools.partial(print, self.end_msg)) retVal=fut if hasattr(self._backend, 'stop_call'): self._backend.stop_call() return retVal def delayed(function): def delayed_function(*args, **kwargs): return function, args, kwargs return delayed_function ###################################################################### __version__ = jl.__version__ class Memory(jl.Memory): def __init__(self,location=None, backend='local', cachedir=None, mmap_mode=None, compress=False, verbose=1, bytes_limit=None, backend_options=None): super(Memory, self).__init__(location=None, backend='local', cachedir=None, mmap_mode=None, compress=False, verbose=1, bytes_limit=None, backend_options=None) def dump(value, filename, compress=0, protocol=None, cache_size=None): return jl.dump(value, filename, compress, protocol, cache_size) def load(filename, mmap_mode=None): return jl.load(filename, mmap_mode) def hash(obj, hash_name='md5', coerce_mmap=False): return jl.hash(obj, hash_name, coerce_mmap) def register_compressor(compressor_name, compressor, force=False): return jl.register_compressor(compressor_name, compressor, force) def effective_n_jobs(n_jobs=-1): return cpu_count() def get_active_backend(): backend_and_jobs = getattr(_backend, 'backend_and_jobs', None) if backend_and_jobs is not None: backend,n_jobs=backend_and_jobs return backend backend = BACKENDS[loky](nesting_level=0) return backend class parallel_backend(object): def __init__(self, backend, n_jobs=-1, inner_max_num_threads=None, **backend_params): if isinstance(backend, str): backend = BACKENDS[backend](**backend_params) current_backend_and_jobs = getattr(_backend, 'backend_and_jobs', None) if backend.nesting_level is None: if current_backend_and_jobs is None: nesting_level = 0 else: nesting_level = current_backend_and_jobs[0].nesting_level backend.nesting_level = nesting_level # Save the backends info and set the active backend self.old_backend_and_jobs = current_backend_and_jobs self.new_backend_and_jobs = (backend, n_jobs) _backend.backend_and_jobs = (backend, n_jobs) def __enter__(self): return self.new_backend_and_jobs def __exit__(self, type, value, traceback): self.unregister() def unregister(self): if self.old_backend_and_jobs is None: if getattr(_backend, 'backend_and_jobs', None) is not None: del _backend.backend_and_jobs else: _backend.backend_and_jobs = self.old_backend_and_jobs def register_parallel_backend(name, factory): BACKENDS[name] = factory starpu-1.4.9+dfsg/starpupy/src/setup.cfg.in000066400000000000000000000014411507764646700207140ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # [build] build_platlib=build build_temp=build/tmp [install] root=@prefix@ install_lib=/lib/python@PYTHON_VERSION@/site-packages starpu-1.4.9+dfsg/starpupy/src/setup.py.in000066400000000000000000000043531507764646700206120ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # from setuptools import setup, Extension import sys numpy_dir = '@PYTHON_NUMPY_DIR@' if numpy_dir != '': numpy_include_dir = [numpy_dir] else: numpy_include_dir = [] cppflags = '@STARPU_H_CPPFLAGS@' am_cflags = '@GLOBAL_AM_CFLAGS@' cflags = '@CFLAGS@' compile_args = cppflags.split(' ') + am_cflags.split(' ') + cflags.split(' ') extra_compile_args = [] for f in compile_args: if f: extra_compile_args.append(f) ver = sys.version_info libpython = 'python%s.%s%s' % (ver.major, ver.minor, sys.abiflags) starpupy = Extension('starpu.starpupy', include_dirs = ['@STARPU_SRC_DIR@/include', '@STARPU_BUILD_DIR@/include', '@STARPU_SRC_DIR@/starpupy/src', '@STARPU_SRC_DIR@/src', '@STARPU_BUILD_DIR@/src'] + numpy_include_dir, libraries = ['starpu-@STARPU_EFFECTIVE_VERSION@', libpython], extra_compile_args = extra_compile_args, extra_link_args = ['-Wl,-rpath,$ORIGIN/../../../', '-shared', @STARPUPY_EXTRA_LINK_ARGS@], library_dirs = ['@STARPU_BUILD_DIR@/src/.libs'], sources = ['starpu/starpu_task_wrapper.c', 'starpu/starpupy_handle.c', 'starpu/starpupy_interface.c', 'starpu/starpupy_buffer_interface.c', 'starpu/starpupy_numpy_filters.c']) setup( name = 'starpupy', version = '0.5', description = 'Python bindings for StarPU', author = 'StarPU team', author_email = 'starpu-devel@inria.fr', url = 'https://starpu.gitlabpages.inria.fr/', license = 'GPL', platforms = 'posix', ext_modules = [starpupy], packages = ['starpu'], ) starpu-1.4.9+dfsg/starpupy/src/starpu_task_wrapper.c000066400000000000000000001737241507764646700227500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Python C extension reference count special cases: * 1. Stolen reference: When you pass an object reference into these functions, * they take over ownership of the item passed to them, even if they fail (except PyModule_AddObject()). * PyErr_SetExcInfo() * PyException_SetContext() * PyException_SetCause() * PyTuple_SetItem() * PyTuple_SET_ITEM() * PyStructSequence_SetItem() * PyStructSequence_SET_ITEM() * PyList_SetItem() * PyList_SET_ITEM() * PyModule_AddObject(): Unlike other functions that steal references, this function only decrements * the reference count of value on success. The new PyModule_AddObjectRef() function * is recommended for Python version >= 3.10 * 2. Borrowed reference: return references that you borrow from the tuple, list or dictionary etc. * The borrowed reference’s lifetime is guaranteed until the function returns. It does not modify the * object reference count. It becomes a dangling pointer if the object is destroyed. * Calling Py_INCREF() on the borrowed reference is recommended to convert it to a strong reference * inplace, except when the object cannot be destroyed before the last usage of the borrowed reference. * PyErr_Occurred() * PySys_GetObject() * PySys_GetXOptions() * PyImport_AddModuleObject() * PyImport_AddModule() * PyImport_GetModuleDict() * PyEval_GetBuiltins() * PyEval_GetLocals() * PyEval_GetGlobals() * PyEval_GetFrame() * PySequence_Fast_GET_ITEM() * PyTuple_GetItem() * PyTuple_GET_ITEM() * PyStructSequence_GetItem() * PyStructSequence_GET_ITEM() * PyList_GetItem() * PyList_GET_ITEM() * PyDict_GetItem() * PyDict_GetItemWithError() * PyDict_GetItemString() * PyDict_SetDefault() * PyFunction_GetCode() * PyFunction_GetGlobals() * PyFunction_GetModule() * PyFunction_GetDefaults() * PyFunction_GetClosure() * PyFunction_GetAnnotations() * PyInstanceMethod_Function() * PyInstanceMethod_GET_FUNCTION() * PyMethod_Function() * PyMethod_GET_FUNCTION() * PyMethod_Self() * PyMethod_GET_SELF() * PyCell_GET() * PyModule_GetDict() * PyModuleDef_Init() * PyState_FindModule() * PyWeakref_GetObject() * PyWeakref_GET_OBJECT() * PyThreadState_GetDict() * PyObject_Init() * PyObject_InitVar() * Py_TYPE() * */ #undef NDEBUG #include #include #include #include #include #include #include "starpupy_cloudpickle.h" #include "starpupy_handle.h" #include "starpupy_interface.h" #include "starpupy_buffer_interface.h" #include "starpupy_numpy_filters.h" #define PY_SSIZE_T_CLEAN #include static void STARPU_ATTRIBUTE_NORETURN print_exception(const char *msg, ...) { PyObject *type, *value, *traceback; PyErr_Fetch(&type, &value, &traceback); PyObject *str = PyObject_CallMethod(value, "__str__", NULL); Py_UCS4 *wstr = PyUnicode_AsUCS4Copy(str); va_list ap; va_start(ap, msg); vfprintf(stderr, msg, ap); va_end(ap); fprintf(stderr, "got exception %ls\n", wstr); STARPU_ASSERT(0); } /*********************Functions passed in task_submit wrapper***********************/ static int active_multi_interpreter = 0; /*active multi-interpreter */ static PyObject *StarpupyError; /*starpupy error exception*/ static PyObject *asyncio_module; /*python asyncio module*/ static PyObject *concurrent_futures_future_class; /*python concurrent.futures.Future class*/ static PyObject *cloudpickle_module; /*cloudpickle module*/ static PyObject *pickle_module; /*pickle module*/ static PyObject *asyncio_wait_method = Py_None; /*method asyncio_wait_for_fut*/ static PyObject *concurrent_futures_wait_method = Py_None; /*method concurrent_futures_wait_for_fut*/ static PyObject *Handle_class = Py_None; /*Handle class*/ static PyObject *Token_class = Py_None; /*Handle_token class*/ static pthread_t main_thread; /* Asyncio futures */ static PyObject *cb_loop = Py_None; /*another event loop besides main running loop*/ /* concurrent.futures */ static PyObject *cb_executor = Py_None; /*executor for callbacks*/ static pthread_t thread_id; static PyThreadState *orig_thread_states[STARPU_NMAXWORKERS]; static PyThreadState *new_thread_states[STARPU_NMAXWORKERS]; /*********************************************************************************************/ static uint32_t where_inter = STARPU_CPU; /* prologue_callback_func*/ void starpupy_prologue_cb_func(void *cl_arg) { (void)cl_arg; PyObject *func_data; size_t func_data_size; PyObject *func_py; PyObject *argList; PyObject *fut; PyObject *loop; int h_flag; PyObject *perfmodel; int sb; /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); struct starpu_task *task = starpu_task_get_current(); /*Initialize struct starpu_codelet_unpack_arg_data*/ struct starpu_codelet_pack_arg_data data_org; starpu_codelet_unpack_arg_init(&data_org, task->cl_arg, task->cl_arg_size); if(active_multi_interpreter) { /*get func_py char**/ starpu_codelet_pick_arg(&data_org, (void**)&func_data, &func_data_size); } else { /*get func_py*/ starpu_codelet_unpack_arg(&data_org, &func_py, sizeof(func_py)); } /*get argList*/ starpu_codelet_unpack_arg(&data_org, &argList, sizeof(argList)); /*get fut*/ starpu_codelet_unpack_arg(&data_org, &fut, sizeof(fut)); /*get loop*/ starpu_codelet_unpack_arg(&data_org, &loop, sizeof(loop)); /*get h_flag*/ starpu_codelet_unpack_arg(&data_org, &h_flag, sizeof(h_flag)); /*get perfmodel*/ starpu_codelet_unpack_arg(&data_org, &perfmodel, sizeof(perfmodel)); /*get sb*/ starpu_codelet_unpack_arg(&data_org, &sb, sizeof(sb)); starpu_codelet_unpack_arg_fini(&data_org); /*check if there is Future in argList, if so, get the Future result*/ int i; int fut_flag = 0; for(i=0; i < PyTuple_Size(argList); i++) { PyObject *obj=PyTuple_GetItem(argList, i); /*protect borrowed reference, decremented in the end of the loop*/ Py_INCREF(obj); const char* tp = Py_TYPE(obj)->tp_name; if(strcmp(tp, "_asyncio.Future") == 0 || strcmp(tp, "Future") == 0) { fut_flag = 1; PyObject *done = PyObject_CallMethod(obj, "done", NULL); /*if the argument is Future and future object is not finished, we will await its result in cb_loop, since the main loop may be occupied to await the final result of function*/ if (!PyObject_IsTrue(done)) { /*if the future object is not finished, get its corresponding arg_fut*/ PyObject *cb_obj = PyObject_GetAttrString(obj, "arg_fut"); if(strcmp(tp, "_asyncio.Future") == 0) { /* asyncio */ /*call the method asyncio_wait_for_fut to await obj*/ if (asyncio_wait_method == Py_None) asyncio_wait_method = PyDict_GetItemString(starpu_dict, "asyncio_wait_for_fut"); PyObject *wait_obj = PyObject_CallFunctionObjArgs(asyncio_wait_method, cb_obj, NULL); /*decrement the reference obtained before if{}, then get the new reference*/ Py_DECREF(cb_obj); /*call obj = asyncio.run_coroutine_threadsafe(wait_for_fut(cb_obj), cb_loop)*/ cb_obj = PyObject_CallMethod(asyncio_module, "run_coroutine_threadsafe", "O,O", wait_obj, cb_loop); Py_DECREF(wait_obj); } else { /* concurrent.futures */ /*call the method concurrent_futures_wait_for_fut to await obj*/ if (concurrent_futures_wait_method == Py_None) concurrent_futures_wait_method = PyDict_GetItemString(starpu_dict, "concurrent_futures_wait_for_fut"); /*call obj = executor.submit(wait_for_fut, cb_obj)*/ PyObject *new_obj = PyObject_CallMethod(cb_executor, "submit", "O,O", concurrent_futures_wait_method, cb_obj); /*decrement the reference obtained before if{}, then get the new reference*/ Py_DECREF(cb_obj); cb_obj = new_obj; } Py_DECREF(obj); obj = cb_obj; } /*if one of arguments is Future, get its result*/ PyObject *fut_result = PyObject_CallMethod(obj, "result", NULL); /*replace the Future argument to its result*/ PyTuple_SetItem(argList, i, fut_result); Py_DECREF(done); } Py_DECREF(obj); } int pack_flag = 0; if(active_multi_interpreter||fut_flag) pack_flag = 1; /*if the argument is changed in arglist or program runs with multi-interpreter, repack the data*/ if(pack_flag == 1) { /*Initialize struct starpu_codelet_pack_arg_data*/ struct starpu_codelet_pack_arg_data data; starpu_codelet_pack_arg_init(&data); if(active_multi_interpreter) { /*repack func_data*/ starpu_codelet_pack_arg(&data, func_data, func_data_size); /*use cloudpickle to dump argList*/ Py_ssize_t arg_data_size; char* arg_data; PyObject *arg_bytes = starpu_cloudpickle_dumps(argList, &arg_data, &arg_data_size); starpu_codelet_pack_arg(&data, arg_data, arg_data_size); Py_DECREF(arg_bytes); Py_DECREF(argList); } else if (fut_flag) { /*repack func_py*/ starpu_codelet_pack_arg(&data, &func_py, sizeof(func_py)); /*repack arglist*/ starpu_codelet_pack_arg(&data, &argList, sizeof(argList)); } /*repack fut*/ starpu_codelet_pack_arg(&data, &fut, sizeof(fut)); /*repack loop*/ starpu_codelet_pack_arg(&data, &loop, sizeof(loop)); /*repack h_flag*/ starpu_codelet_pack_arg(&data, &h_flag, sizeof(h_flag)); /*repack perfmodel*/ starpu_codelet_pack_arg(&data, &perfmodel, sizeof(perfmodel)); /*repack sb*/ starpu_codelet_pack_arg(&data, &sb, sizeof(sb)); /*free the pointer precedent*/ free(task->cl_arg); /*finish repacking data and store the struct in cl_arg*/ starpu_codelet_pack_arg_fini(&data, &task->cl_arg, &task->cl_arg_size); } free((void*)task->name); /*restore previous GIL state*/ PyGILState_Release(state); } /*function passed to starpu_codelet.cpu_func*/ void starpupy_codelet_func(void *descr[], void *cl_arg) { (void)cl_arg; PyObject *func_py; /*the python function passed in*/ PyObject *pFunc; PyObject *argList; /*argument list of python function passed in*/ int h_flag; /*detect return value is handle or not*/ /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); struct starpu_task *task = starpu_task_get_current(); /*Initialize struct starpu_codelet_unpack_arg_data*/ struct starpu_codelet_pack_arg_data data; starpu_codelet_unpack_arg_init(&data, task->cl_arg, task->cl_arg_size); if(active_multi_interpreter) { char* func_data; size_t func_data_size; char* arg_data; size_t arg_data_size; /*get func_py char**/ starpu_codelet_pick_arg(&data, (void**)&func_data, &func_data_size); /*use cloudpickle to load function (maybe only function name), return a new reference*/ pFunc=starpu_cloudpickle_loads(func_data, func_data_size); if (!pFunc) print_exception("cloudpickle could not unpack the function from the main interpreter"); /*get argList char**/ starpu_codelet_pick_arg(&data, (void**)&arg_data, &arg_data_size); /*use cloudpickle to load argList*/ argList=starpu_cloudpickle_loads(arg_data, arg_data_size); if (!argList) print_exception("cloudpickle could not unpack the argument list from the main interpreter"); } else { /*get func_py*/ starpu_codelet_unpack_arg(&data, &pFunc, sizeof(pFunc)); /*get argList*/ starpu_codelet_unpack_arg(&data, &argList, sizeof(argList)); } /*skip fut*/ starpu_codelet_unpack_discard_arg(&data); /*skip loop*/ starpu_codelet_unpack_discard_arg(&data); /*get h_flag*/ starpu_codelet_unpack_arg(&data, &h_flag, sizeof(h_flag)); /*skip perfmodel*/ starpu_codelet_unpack_discard_arg(&data); /*skip sb*/ starpu_codelet_unpack_discard_arg(&data); starpu_codelet_unpack_arg_fini(&data); /* if the function name is passed in*/ const char* tp_func = Py_TYPE(pFunc)->tp_name; if (strcmp(tp_func, "str")==0) { /*getattr(sys.modules[__name__], "")*/ /*get sys.modules*/ PyObject *sys_modules = PyImport_GetModuleDict(); /*protect borrowed reference, decrement after being called by the function*/ Py_INCREF(sys_modules); /*get sys.modules[__name__]*/ PyObject *sys_modules_name=PyDict_GetItemString(sys_modules,"__main__"); /*protect borrowed reference, decrement after being called by the function*/ Py_INCREF(sys_modules_name); /*get function object*/ func_py=PyObject_GetAttr(sys_modules_name,pFunc); Py_DECREF(sys_modules); Py_DECREF(sys_modules_name); /*decrement the reference obtained from unpack*/ Py_DECREF(pFunc); } else { /*transfer the ref of pFunc to func_py*/ func_py=pFunc; } /*check if there is Handle in argList, if so, get the object*/ int h_index= (h_flag ? 1 : 0); int i; /*if there is the return Handle in argList, length of argList minus 1*/ Py_ssize_t pArglist_len = (h_flag == 2) ? PyTuple_Size(argList)-1 : PyTuple_Size(argList); /*new tuple contains all function arguments, decrement after calling function*/ PyObject *pArglist = PyTuple_New(pArglist_len); for(i=0; i < pArglist_len; i++) { /*if there is the return Handle in argList, start with the second argument*/ PyObject *obj= (h_flag == 2) ? PyTuple_GetItem(argList, i+1) : PyTuple_GetItem(argList, i); /*protect borrowed reference, is decremented in the end of the loop*/ Py_INCREF(obj); const char* tp = Py_TYPE(obj)->tp_name; if(strcmp(tp, "Handle_token") == 0) { /*if one of arguments is Handle, replace the Handle argument to the object*/ if ((task->handles[h_index] && STARPUPY_PYOBJ_CHECK(task->handles[h_index])) || STARPUPY_PYOBJ_CHECK_INTERFACE(descr[h_index])) { PyObject *obj_handle = STARPUPY_GET_PYOBJECT(descr[h_index]); PyTuple_SetItem(pArglist, i, obj_handle); } else if ((task->handles[h_index] && STARPUPY_BUF_CHECK(task->handles[h_index])) || STARPUPY_BUF_CHECK_INTERFACE(descr[h_index])) { PyObject *buf_handle = STARPUPY_BUF_GET_PYOBJECT(descr[h_index]); PyTuple_SetItem(pArglist, i, buf_handle); } else { STARPU_ASSERT_MSG(0, "unexpected object %d\n", ((struct starpupyobject_interface *)(descr[h_index]))->id); } h_index++; } else { Py_INCREF(obj); PyTuple_SetItem(pArglist, i, obj); } Py_DECREF(obj); } // printf("arglist before applying is "); // PyObject_Print(pArglist, stdout, 0); // printf("\n"); /*verify that the function is a proper callable*/ if (!PyCallable_Check(func_py)) { PyErr_Format(StarpupyError, "Expected a callable function"); } /*call the python function get the return value rv, it's a new reference*/ PyObject *rv = PyObject_CallObject(func_py, pArglist); if (!rv) PyErr_PrintEx(1); // printf("arglist after applying is "); // PyObject_Print(pArglist, stdout, 0); // printf("\n"); // printf("rv after call function is "); // PyObject_Print(rv, stdout, 0); // printf("\n"); /*if return handle*/ if(h_flag) { STARPU_ASSERT(STARPUPY_PYOBJ_CHECK(task->handles[0])); /*pass ref to descr[0]*/ STARPUPY_SET_PYOBJECT(descr[0], rv); } else { /*Initialize struct starpu_codelet_pack_arg_data for return value*/ struct starpu_codelet_pack_arg_data data_ret; starpu_codelet_pack_arg_init(&data_ret); /*if the result is None type, pack NULL without using cloudpickle*/ if (rv==Py_None) { char* rv_data=NULL; Py_ssize_t rv_data_size=0; starpu_codelet_pack_arg(&data_ret, &rv_data_size, sizeof(rv_data_size)); starpu_codelet_pack_arg(&data_ret, &rv_data, sizeof(rv_data)); /*decrement the ref obtained from callobject*/ Py_DECREF(rv); } else { if(active_multi_interpreter) { /*else use cloudpickle to dump rv*/ Py_ssize_t rv_data_size; char* rv_data; PyObject *rv_bytes = starpu_cloudpickle_dumps(rv, &rv_data, &rv_data_size); starpu_codelet_pack_arg(&data_ret, &rv_data_size, sizeof(rv_data_size)); starpu_codelet_pack_arg(&data_ret, rv_data, rv_data_size); Py_DECREF(rv_bytes); Py_DECREF(rv); } else { /*if the result is not None type, we set rv_data_size to 1, it does not mean that the data size is 1, but only for determine statements*/ size_t rv_data_size=1; starpu_codelet_pack_arg(&data_ret, &rv_data_size, sizeof(rv_data_size)); /*pack rv*/ starpu_codelet_pack_arg(&data_ret, &rv, sizeof(rv)); } } /*store the return value in task->cl_ret*/ starpu_codelet_pack_arg_fini(&data_ret, &task->cl_ret, &task->cl_ret_size); task->cl_ret_free = 1; } /*decrement the ref obtained from pFunc*/ Py_DECREF(func_py); /*decrement the ref obtained by unpack*/ Py_DECREF(argList); /*decrement the ref obtains by PyTuple_New*/ Py_DECREF(pArglist); /*restore previous GIL state*/ PyGILState_Release(state); } /*function passed to starpu_task.epilogue_callback_func*/ void starpupy_epilogue_cb_func(void *v) { (void)v; PyObject *fut; /*asyncio.Future*/ PyObject *loop; /*asyncio.Eventloop*/ int h_flag; PyObject *perfmodel; char* rv_data; size_t rv_data_size; PyObject *rv; /*return value when using PyObject_CallObject call the function f*/ /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); struct starpu_task *task = starpu_task_get_current(); /*Initialize struct starpu_codelet_unpack_arg_data data*/ struct starpu_codelet_pack_arg_data data; starpu_codelet_unpack_arg_init(&data, task->cl_arg, task->cl_arg_size); /*skip func_py*/ starpu_codelet_unpack_discard_arg(&data); /*skip argList*/ starpu_codelet_unpack_discard_arg(&data); /*get fut*/ starpu_codelet_unpack_arg(&data, &fut, sizeof(fut)); /*get loop*/ starpu_codelet_unpack_arg(&data, &loop, sizeof(loop)); /*get h_flag*/ starpu_codelet_unpack_arg(&data, &h_flag, sizeof(h_flag)); /*get perfmodel*/ starpu_codelet_unpack_arg(&data, &perfmodel, sizeof(perfmodel)); /*skip sb*/ starpu_codelet_unpack_discard_arg(&data); starpu_codelet_unpack_arg_fini(&data); /*if return value is not handle, unpack from cl_ret*/ if(!h_flag) { /*Initialize struct starpu_codelet_unpack_arg_data data*/ struct starpu_codelet_pack_arg_data data_ret; starpu_codelet_unpack_arg_init(&data_ret, task->cl_ret, task->cl_ret_size); /*get rv_data_size*/ starpu_codelet_unpack_arg(&data_ret, &rv_data_size, sizeof(rv_data_size)); /*if the rv_data_size is 0, the result is None type*/ if (rv_data_size==0) { starpu_codelet_unpack_discard_arg(&data_ret); rv=Py_None; Py_INCREF(rv); } /*else use cloudpickle to load rv*/ else if(active_multi_interpreter) { /*get rv char**/ starpu_codelet_pick_arg(&data_ret, (void**)&rv_data, &rv_data_size); /*use cloudpickle to load rv*/ rv=starpu_cloudpickle_loads(rv_data, rv_data_size); } else { /*unpack rv*/ starpu_codelet_unpack_arg(&data_ret, &rv, sizeof(rv)); } starpu_codelet_unpack_arg_fini(&data_ret); /*set the Future result and mark the Future as done*/ if(fut!=Py_None) { PyObject *cb_fut = PyObject_GetAttrString(fut, "arg_fut"); if (!cb_fut) PyErr_PrintEx(1); PyObject *cb_set_result = PyObject_GetAttrString(cb_fut, "set_result"); if (!cb_set_result) PyErr_PrintEx(1); PyObject *set_result = PyObject_GetAttrString(fut, "set_result"); if (!set_result) PyErr_PrintEx(1); const char* tp = Py_TYPE(fut)->tp_name; if(strcmp(tp, "_asyncio.Future") == 0) { /* asyncio */ /*set the Future result in cb_loop*/ PyObject *cb_loop_callback = PyObject_CallMethod(cb_loop, "call_soon_threadsafe", "(O,O)", cb_set_result, rv); if (!cb_loop_callback) PyErr_PrintEx(1); Py_DECREF(cb_loop_callback); /*set the Future result in main running loop*/ PyObject *loop_callback = PyObject_CallMethod(loop, "call_soon_threadsafe", "(O,O)", set_result, rv); if (!loop_callback) PyErr_PrintEx(1); Py_DECREF(loop_callback); } else { /* concurrent.futures */ /*set the Future result in cb_loop*/ PyObject *cb_loop_callback = PyObject_CallMethod(cb_executor, "submit", "(O,O)", cb_set_result, rv); if (!cb_loop_callback) PyErr_PrintEx(1); Py_DECREF(cb_loop_callback); /*set the Future result in main running loop*/ PyObject *loop_callback = PyObject_CallMethod(cb_executor, "submit", "(O,O)", set_result, rv); if (!loop_callback) PyErr_PrintEx(1); Py_DECREF(loop_callback); } Py_DECREF(cb_set_result); Py_DECREF(cb_fut); Py_DECREF(set_result); } /*decrement the refs obtained from upack*/ Py_DECREF(rv); } Py_DECREF(fut); Py_DECREF(loop); struct starpu_codelet *func_cl=(struct starpu_codelet *) task->cl; if (func_cl->model != NULL) { Py_DECREF(perfmodel); } /*restore previous GIL state*/ PyGILState_Release(state); } void starpupy_cb_func(void *v) { (void)v; struct starpu_task *task = starpu_task_get_current(); /*deallocate task*/ free(task->cl); } /***********************************************************************************/ /*PyObject*->struct starpu_task**/ static struct starpu_task *PyTask_AsTask(PyObject *obj) { return (struct starpu_task *) PyCapsule_GetPointer(obj, "Task"); } /* destructor function for task */ static void del_Task(PyObject *obj) { struct starpu_task *obj_task=PyTask_AsTask(obj); starpu_task_set_destroy(obj_task); } /*struct starpu_task*->PyObject**/ static PyObject *PyTask_FromTask(struct starpu_task *task) { PyObject * task_cap = PyCapsule_New(task, "Task", del_Task); return task_cap; } /***********************************************************************************/ static size_t sizebase (struct starpu_task *task, unsigned nimpl) { (void)nimpl; int sb; /*Initialize struct starpu_codelet_unpack_arg_data*/ struct starpu_codelet_pack_arg_data data; starpu_codelet_unpack_arg_init(&data, task->cl_arg, task->cl_arg_size); /*skip func_py*/ //starpu_codelet_unpack_discard_arg(&data); starpu_codelet_unpack_discard_arg(&data); /*skip argList*/ //starpu_codelet_unpack_discard_arg(&data); starpu_codelet_unpack_discard_arg(&data); /*skip fut*/ starpu_codelet_unpack_discard_arg(&data); /*skip loop*/ starpu_codelet_unpack_discard_arg(&data); /*skip h_flag*/ starpu_codelet_unpack_discard_arg(&data); /*skip perfmodel*/ starpu_codelet_unpack_discard_arg(&data); /*get sb*/ starpu_codelet_unpack_arg(&data, &sb, sizeof(sb)); starpu_codelet_unpack_arg_fini(&data); return sb; } /*initialization of perfmodel*/ static PyObject* init_perfmodel(PyObject *self, PyObject *args) { (void)self; char *sym; if (!PyArg_ParseTuple(args, "s", &sym)) return NULL; /*allocate a perfmodel structure*/ struct starpu_perfmodel *perf=(struct starpu_perfmodel*)calloc(1, sizeof(struct starpu_perfmodel)); /*get the perfmodel symbol*/ char *p =strdup(sym); perf->symbol=p; perf->type=STARPU_HISTORY_BASED; perf->size_base=&sizebase; /*struct perfmodel*->PyObject**/ PyObject *perfmodel=PyCapsule_New(perf, "Perf", NULL); return perfmodel; } /*free perfmodel*/ static PyObject* free_perfmodel(PyObject *self, PyObject *args) { (void)self; PyObject *perfmodel; if (!PyArg_ParseTuple(args, "O", &perfmodel)) return NULL; /*PyObject*->struct perfmodel**/ struct starpu_perfmodel *perf=PyCapsule_GetPointer(perfmodel, "Perf"); Py_BEGIN_ALLOW_THREADS; #ifndef STARPU_SIMGRID starpu_save_history_based_model(perf); #endif //starpu_perfmodel_unload_model(perf); starpu_perfmodel_deinit(perf); Py_END_ALLOW_THREADS; free((void*)perf->symbol); free(perf); /*return type is void*/ Py_INCREF(Py_None); return Py_None; } #ifndef STARPU_SIMGRID static PyObject* starpu_save_history_based_model_wrapper(PyObject *self, PyObject *args) { (void)self; PyObject *perfmodel; if (!PyArg_ParseTuple(args, "O", &perfmodel)) return NULL; /*call the method get_struct*/ const char *tp_perfmodel = Py_TYPE(perfmodel)->tp_name; if (strcmp(tp_perfmodel, "Perfmodel") != 0) { /*the argument should be the object of class Perfmodel*/ PyErr_Format(StarpupyError, "Expected a Perfmodel object"); return NULL; } PyObject *perfmodel_capsule = PyObject_CallMethod(perfmodel, "get_struct", NULL); /*PyObject*->struct perfmodel**/ const char *tp_perf = Py_TYPE(perfmodel_capsule)->tp_name; if (strcmp(tp_perf, "PyCapsule") != 0) { /*the argument should be the PyCapsule object*/ PyErr_Format(StarpupyError, "Expected a PyCapsule object"); return NULL; } /*PyObject*->struct perfmodel**/ struct starpu_perfmodel *perf = PyCapsule_GetPointer(perfmodel_capsule, "Perf"); Py_BEGIN_ALLOW_THREADS; starpu_save_history_based_model(perf); Py_END_ALLOW_THREADS; /*decrement the capsule object obtained from Perfmodel class*/ Py_DECREF(perfmodel_capsule); /*return type is void*/ Py_INCREF(Py_None); return Py_None; } #endif /*****************************Wrappers of StarPU methods****************************/ /*wrapper submit method*/ static PyObject* starpu_task_submit_wrapper(PyObject *self, PyObject *args) { (void)self; /*first argument in args is always the python function passed in*/ PyObject *func_py = PyTuple_GetItem(args, 0); /*protect borrowed reference, used in codelet pack, in case multi-interpreter, decremented after cloudpickle_dumps, otherwise decremented in starpupy_codelet_func*/ Py_INCREF(func_py); /*Initialize struct starpu_codelet_pack_arg_data*/ struct starpu_codelet_pack_arg_data data; starpu_codelet_pack_arg_init(&data); if(active_multi_interpreter) { /*use cloudpickle to dump func_py*/ Py_ssize_t func_data_size; char* func_data; PyObject *func_bytes = starpu_cloudpickle_dumps(func_py, &func_data, &func_data_size); starpu_codelet_pack_arg(&data, func_data, func_data_size); Py_DECREF(func_bytes); /*decrement the ref obtained from args passed in*/ Py_DECREF(func_py); } else { /*if there is no multi interpreter only pack func_py*/ starpu_codelet_pack_arg(&data, &func_py, sizeof(func_py)); } PyObject *loop; PyObject *fut; /*allocate a task structure and initialize it with default values*/ struct starpu_task *task = starpu_task_create(); /*allocate a codelet structure*/ struct starpu_codelet *func_cl = (struct starpu_codelet*)malloc(sizeof(struct starpu_codelet)); /*initialize func_cl with default values*/ starpu_codelet_init(func_cl); func_cl->cpu_funcs[0] = &starpupy_codelet_func; func_cl->cpu_funcs_name[0] = "starpupy_codelet_func"; func_cl->flags = STARPU_CODELET_SIMGRID_EXECUTE; int h_index = 0, h_flag = 0; int nbuffer = 0; /*the last argument is the option dictionary*/ PyObject *dict_option = PyTuple_GetItem(args, PyTuple_Size(args)-1); /*protect borrowed reference*/ Py_INCREF(dict_option); /*check whether the return value is handle*/ PyObject *ret_handle = PyDict_GetItemString(dict_option, "ret_handle"); /*set the default value*/ if(ret_handle == NULL) { ret_handle = Py_False; } /*check whether the return value is fut*/ PyObject *ret_fut = PyDict_GetItemString(dict_option, "ret_fut"); /*set the default value*/ if(ret_fut == NULL) { ret_fut = Py_True; } /*check whether to store the return value as a parameter*/ PyObject *ret_param = PyDict_GetItemString(dict_option, "ret_param"); /*set the default value*/ if(ret_param == NULL) { ret_param = Py_False; } /*if return value is a parameter, then we will not return a future nor handle object even ret_fut/ret_handle has been set to true*/ else if(PyObject_IsTrue(ret_param)) { h_flag = 2; ret_fut = Py_False; ret_handle = Py_False; } /*if return value is handle*/ PyObject *r_handle_obj = NULL; if(PyObject_IsTrue(ret_handle)) { h_flag = 1; /*return value is handle there are no loop and fut*/ loop = Py_None; fut = Py_None; /* these are decremented in starpupy_epilogue_cb_func */ Py_INCREF(loop); Py_INCREF(fut); /*create Handle object Handle(None)*/ /*import Handle class*/ if (Handle_class == Py_None) { Handle_class = PyDict_GetItemString(starpu_dict, "Handle"); } /*get the constructor, decremented after being called*/ PyObject *pInstanceHandle = PyInstanceMethod_New(Handle_class); /*create a Null Handle object, decremented in the end of this if{}*/ PyObject *handle_arg = PyTuple_New(2); /*Py_None is used for PyTuple_SetItem(handle_arg), once handle_arg is decremented, Py_None is decremented as well*/ Py_INCREF(Py_None); PyTuple_SetItem(handle_arg, 0, Py_None); PyTuple_SetItem(handle_arg, 1, Py_True); /*r_handle_obj will be the return value of this function starpu_task_submit_wrapper*/ r_handle_obj = PyObject_CallObject(pInstanceHandle,handle_arg); /*get the Handle capsule object, decremented in the end of this if{}*/ PyObject *r_handle_cap = PyObject_CallMethod(r_handle_obj, "get_capsule", NULL); /*get Handle*/ starpu_data_handle_t r_handle = (starpu_data_handle_t) PyCapsule_GetPointer(r_handle_cap, "Handle"); if (r_handle == (void*)-1) { PyErr_Format(StarpupyError, "Handle has already been unregistered"); return NULL; } task->handles[0] = r_handle; func_cl->modes[0] = STARPU_W; h_index++; nbuffer = h_index; Py_DECREF(pInstanceHandle); Py_DECREF(handle_arg); Py_DECREF(r_handle_cap); } else if(PyObject_IsTrue(ret_fut)) { PyObject *cb_fut; /*get the running asyncio Event loop, decremented in starpupy_epilogue_cb_func*/ loop = PyObject_CallMethod(asyncio_module, "get_running_loop", NULL); if (loop) { /*create a asyncio.Future object, decremented in starpupy_epilogue_cb_func*/ fut = PyObject_CallMethod(loop, "create_future", NULL); if (fut == NULL) { PyErr_Format(StarpupyError, "Can't create future for loop from asyncio module (try to add \"-m asyncio\" when starting Python interpreter)"); return NULL; } /*create a asyncio.Future object attached to cb_loop*/ cb_fut = PyObject_CallMethod(cb_loop, "create_future", NULL); if (cb_fut == NULL) { PyErr_Format(StarpupyError, "Can't create future for cb_loop from asyncio module (try to add \"-m asyncio\" when starting Python interpreter)"); return NULL; } } else { PyErr_Clear(); loop = Py_None; /* this is decremented in starpupy_epilogue_cb_func */ Py_INCREF(loop); /*create a concurrent.futures.Future object, decremented in starpupy_epilogue_cb_func*/ PyObject *fut_instance = PyInstanceMethod_New(concurrent_futures_future_class); fut = PyObject_CallObject(fut_instance, NULL); if (fut == NULL) { PyErr_Format(StarpupyError, "Can't create future from concurrent.futures module"); return NULL; } /*create a concurrent.futures.Future object for cb_executor*/ cb_fut = PyObject_CallObject(fut_instance, NULL); if (cb_fut == NULL) { PyErr_Format(StarpupyError, "Can't create future from concurrent.futures module"); return NULL; } } int ret; /*set one of fut attribute to cb_fut*/ ret = PyObject_SetAttrString(fut, "arg_fut", cb_fut); if (ret) { PyErr_Format(StarpupyError, "Can't set arg_fut in fut"); return NULL; } Py_DECREF(cb_fut); task->destroy = 0; PyObject *PyTask = PyTask_FromTask(task); /*set one of fut attribute to the task pointer*/ ret = PyObject_SetAttrString(fut, "starpu_task", PyTask); if (ret) { PyErr_Format(StarpupyError, "Can't set starpu_task in fut"); return NULL; } /*fut is the return value of this function*/ Py_INCREF(fut); Py_DECREF(PyTask); } else { /* return value is not fut or handle there are no loop and fut*/ loop = Py_None; fut = Py_None; /* these are decremented in starpupy_epilogue_cb_func */ Py_INCREF(loop); Py_INCREF(fut); } /*check the arguments of python function passed in*/ int i; for(i = 1; i < PyTuple_Size(args)-1; i++) { PyObject *obj = PyTuple_GetItem(args, i); /*protect borrowed reference*/ Py_INCREF(obj); const char* tp = Py_TYPE(obj)->tp_name; if(strcmp(tp, "_asyncio.Future") == 0 || strcmp(tp, "Future") == 0) { /*if one of arguments is Future, get its corresponding task*/ PyObject *fut_task = PyObject_GetAttrString(obj, "starpu_task"); /*declare task dependencies between the current task and the corresponding task of Future argument*/ starpu_task_declare_deps(task, 1, PyTask_AsTask(fut_task)); Py_DECREF(fut_task); } /*decrement the reference which is obtained at the beginning of the loop*/ Py_DECREF(obj); } /*check whether the option perfmodel is None*/ PyObject *perfmodel = PyDict_GetItemString(dict_option, "perfmodel"); /*protect borrowed reference, pack in cl_arg, decrement in starpupy_epilogue_cb_func*/ Py_INCREF(perfmodel); /*call the method get_struct*/ PyObject *perfmodel_capsule; const char *tp_perfmodel = Py_TYPE(perfmodel)->tp_name; if (strcmp(tp_perfmodel, "Perfmodel") == 0) { perfmodel_capsule = PyObject_CallMethod(perfmodel, "get_struct", NULL); } else { Py_INCREF(Py_None); perfmodel_capsule = Py_None; } const char *tp_perf = Py_TYPE(perfmodel_capsule)->tp_name; if (strcmp(tp_perf, "PyCapsule") == 0) { /*PyObject*->struct perfmodel**/ struct starpu_perfmodel *perf = PyCapsule_GetPointer(perfmodel_capsule, "Perf"); func_cl->model = perf; } /*decrement the capsule object obtained from Perfmodel class*/ Py_DECREF(perfmodel_capsule); /*create Handle object Handle(None)*/ /*import Handle_token class*/ if (Token_class == Py_None) { Token_class = PyDict_GetItemString(starpu_dict, "Handle_token"); } /*get the constructor, decremented after passing args in argList*/ PyObject *pInstanceToken = PyInstanceMethod_New(Token_class); /*check whether the argument is explicit handle*/ PyObject *arg_handle = PyDict_GetItemString(dict_option, "arg_handle"); /*set the default value*/ if(arg_handle == NULL) { arg_handle = Py_True; } /*argument list of python function passed in*/ PyObject *argList; /*pass args in argList, argList is decremented in starpupy_codelet_func*/ if (PyTuple_Size(args) == 2)/*function no arguments*/ argList = PyTuple_New(0); else { /*function has arguments*/ argList = PyTuple_New(PyTuple_Size(args)-2); int j; for(j=0; jtp_name; //printf("arg type is %s\n", tp_arg); if (strcmp(tp_arg, "Handle") == 0 || strcmp(tp_arg, "HandleNumpy") == 0) { /*create the Handle_token object to replace the Handle Capsule*/ PyObject *token_obj = PyObject_CallObject(pInstanceToken, NULL); PyTuple_SetItem(argList, j, token_obj); /*get Handle capsule object, decremented in the end of this if{}*/ PyObject *tmp_cap = PyObject_CallMethod(tmp, "get_capsule", NULL); /*get Handle*/ starpu_data_handle_t tmp_handle = (starpu_data_handle_t) PyCapsule_GetPointer(tmp_cap, "Handle"); if (tmp_handle == (void*)-1) { PyErr_Format(StarpupyError, "Handle has already been unregistered"); return NULL; } /*if the function result will be returned in parameter, the first argument will be the handle of return value, but this object should not be the Python object supporting buffer protocol*/ if(PyObject_IsTrue(ret_param) && i==0 && STARPUPY_BUF_CHECK(tmp_handle)) { PyErr_Format(StarpupyError, "Return value as parameter should not be the Python object supporting buffer protocol"); return NULL; } task->handles[h_index] = tmp_handle; /*set access mode*/ /*mode is STARPU_R*/ if(tmp_mode_py != NULL && strcmp(tmp_mode, "R") == 0) { func_cl->modes[h_index] = STARPU_R; } /*mode is STARPU_W*/ if(tmp_mode_py != NULL && strcmp(tmp_mode, "W") == 0) { func_cl->modes[h_index] = STARPU_W; } /*mode is STARPU_RW*/ if(tmp_mode_py != NULL && strcmp(tmp_mode, "RW") == 0) { func_cl->modes[h_index] = STARPU_RW; } /*access mode is not defined for Handle object, and this object is not the return value*/ if(tmp_mode_py == NULL && strcmp(tp_arg, "Handle") == 0 && (!PyObject_IsTrue(ret_param) || (PyObject_IsTrue(ret_param) && j != 0))) { func_cl->modes[h_index] = STARPU_R; } /*access mode is not defined for Handle object, and this object is the return value*/ if(tmp_mode_py == NULL && strcmp(tp_arg, "Handle") == 0 && PyObject_IsTrue(ret_param) && j == 0) { func_cl->modes[h_index] = STARPU_W; } /*access mode is not defined for HandleNumpy object*/ if(tmp_mode_py == NULL && strcmp(tp_arg, "HandleNumpy") == 0) { PyErr_Format(StarpupyError, "access mode should be set as STARPU_W"); return NULL; } h_index++; nbuffer = h_index; Py_DECREF(tmp_cap); Py_DECREF(tmp); } /*check if the arg is buffer protocol*/ else if((PyObject_IsTrue(arg_handle)) && (strcmp(tp_arg, "numpy.ndarray")==0 || strcmp(tp_arg, "bytes")==0 || strcmp(tp_arg, "bytearray")==0 || strcmp(tp_arg, "array.array")==0 || strcmp(tp_arg, "memoryview")==0)) { /*get the corresponding handle of the obj, return a new reference, decremented in the end of this else if{}*/ PyObject *tmp_cap = starpupy_handle_dict_check(tmp, tmp_mode, "register"); /*create the Handle_token object to replace the Handle Capsule*/ PyObject *token_obj = PyObject_CallObject(pInstanceToken, NULL); PyTuple_SetItem(argList, j, token_obj); /*get Handle*/ starpu_data_handle_t tmp_handle = (starpu_data_handle_t) PyCapsule_GetPointer(tmp_cap, "Handle"); task->handles[h_index] = tmp_handle; /*set access mode*/ /*mode is STARPU_R*/ if(tmp_mode_py != NULL && strcmp(tmp_mode, "R") == 0) { func_cl->modes[h_index] = STARPU_R; } /*mode is STARPU_W*/ if(tmp_mode_py != NULL && strcmp(tmp_mode, "W") == 0) { func_cl->modes[h_index] = STARPU_W; } /*mode is STARPU_RW*/ if(tmp_mode_py != NULL && strcmp(tmp_mode, "RW") == 0) { func_cl->modes[h_index] = STARPU_RW; } /*access mode is not defined*/ if(tmp_mode_py == NULL) { func_cl->modes[h_index] = STARPU_R; } h_index++; nbuffer = h_index; Py_DECREF(tmp_cap); Py_DECREF(tmp); } /* check if the arg is the sub handle*/ else if(strcmp(tp_arg, "PyCapsule")==0) { //printf("it's the sub handles\n"); /*create the Handle_token object to replace the Handle Capsule*/ PyObject *token_obj = PyObject_CallObject(pInstanceToken, NULL); PyTuple_SetItem(argList, j, token_obj); /*get Handle*/ starpu_data_handle_t tmp_handle = (starpu_data_handle_t) PyCapsule_GetPointer(tmp, "Handle"); task->handles[h_index] = tmp_handle; /*set access mode*/ /*mode is STARPU_R*/ if(tmp_mode_py != NULL && strcmp(tmp_mode, "R") == 0) { func_cl->modes[h_index] = STARPU_R; } /*mode is STARPU_W*/ if(tmp_mode_py != NULL && strcmp(tmp_mode, "W") == 0) { func_cl->modes[h_index] = STARPU_W; } /*mode is STARPU_RW*/ if(tmp_mode_py != NULL && strcmp(tmp_mode, "RW") == 0) { func_cl->modes[h_index] = STARPU_RW; } /*access mode is not defined*/ if(tmp_mode_py == NULL) { func_cl->modes[h_index] = STARPU_R; } h_index++; nbuffer = h_index; Py_DECREF(tmp); } else { PyTuple_SetItem(argList, j, tmp); } if(tmp_mode_py != NULL) { free(tmp_mode); } Py_DECREF(PyModes); Py_DECREF(arg_id); } //printf("nbuffer is %d\n", nbuffer); } /*decrement the references which are obtained before generating the argList*/ Py_DECREF(pInstanceToken); func_cl->nbuffers = nbuffer; /*pack argList*/ starpu_codelet_pack_arg(&data, &argList, sizeof(argList)); /*pack fut*/ starpu_codelet_pack_arg(&data, &fut, sizeof(fut)); /*pack loop*/ starpu_codelet_pack_arg(&data, &loop, sizeof(loop)); /*pack h_flag*/ starpu_codelet_pack_arg(&data, &h_flag, sizeof(h_flag)); /*pack perfmodel*/ starpu_codelet_pack_arg(&data, &perfmodel, sizeof(perfmodel)); task->cl=func_cl; /*pass optional values name=None, synchronous=1, priority=0, color=None, flops=None, perfmodel=None, sizebase=0*/ /*const char * name*/ PyObject *PyName = PyDict_GetItemString(dict_option, "name"); if (PyName!=NULL && PyName!=Py_None) { const char* name_str = PyUnicode_AsUTF8(PyName); char* name = strdup(name_str); //printf("name is %s\n", name); task->name=name; } /*unsigned synchronous:1*/ PyObject *PySync = PyDict_GetItemString(dict_option, "synchronous"); if (PySync!=NULL) { unsigned sync=PyLong_AsUnsignedLong(PySync); //printf("sync is %u\n", sync); task->synchronous=sync; } /*int priority*/ PyObject *PyPrio = PyDict_GetItemString(dict_option, "priority"); if (PyPrio!=NULL) { int prio=PyLong_AsLong(PyPrio); //printf("prio is %d\n", prio); task->priority=prio; } /*unsigned color*/ PyObject *PyColor = PyDict_GetItemString(dict_option, "color"); if (PyColor!=NULL && PyColor!=Py_None) { unsigned color=PyLong_AsUnsignedLong(PyColor); //printf("color is %u\n", color); task->color=color; } /*double flops*/ PyObject *PyFlops = PyDict_GetItemString(dict_option, "flops"); if (PyFlops!=NULL && PyFlops!=Py_None) { double flops=PyFloat_AsDouble(PyFlops); //printf("flops is %f\n", flops); task->flops=flops; } /*int sizebase*/ PyObject *PySB = PyDict_GetItemString(dict_option, "sizebase"); int sb; if (PySB!=NULL) { sb=PyLong_AsLong(PySB); } else { sb=0; } //printf("pack sizebase is %d\n", sb); /*pack sb*/ starpu_codelet_pack_arg(&data, &sb, sizeof(sb)); /*finish packing data and store the struct in cl_arg*/ starpu_codelet_pack_arg_fini(&data, &task->cl_arg, &task->cl_arg_size); task->cl_arg_free = 1; task->prologue_callback_func=&starpupy_prologue_cb_func; task->epilogue_callback_func=&starpupy_epilogue_cb_func; task->callback_func=&starpupy_cb_func; /*call starpu_task_submit method*/ int ret; Py_BEGIN_ALLOW_THREADS; ret = starpu_task_submit(task); Py_END_ALLOW_THREADS; if (ret!=0) { PyErr_Format(StarpupyError, "Unexpected value %d returned for starpu_task_submit", ret); return NULL; } /*decrement the ref obtained at the beginning of this function*/ Py_DECREF(dict_option); //printf("the number of reference is %ld\n", Py_REFCNT(func_py)); //printf("fut %ld\n", Py_REFCNT(fut)); /*if return value is handle*/ if(PyObject_IsTrue(ret_handle)) { return r_handle_obj; } else if(PyObject_IsTrue(ret_fut)) { return fut; } else { Py_INCREF(Py_None); return Py_None; } } /*wrapper wait for all method*/ static PyObject* starpu_task_wait_for_all_wrapper(PyObject *self, PyObject *args) { (void)self; (void)args; /*call starpu_task_wait_for_all method*/ Py_BEGIN_ALLOW_THREADS; starpu_task_wait_for_all(); Py_END_ALLOW_THREADS; /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /*wrapper pause method*/ static PyObject* starpu_pause_wrapper(PyObject *self, PyObject *args) { (void)self; (void)args; /*call starpu_pause method*/ starpu_pause(); /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /*wrapper resume method*/ static PyObject* starpu_resume_wrapper(PyObject *self, PyObject *args) { (void)self; (void)args; /*call starpu_resume method*/ starpu_resume(); /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /*wrapper worker_get_count_by_type method*/ static PyObject* starpu_worker_get_count_by_type_wrapper(PyObject *self, PyObject *args) { (void)self; int type; if (!PyArg_ParseTuple(args, "I", &type)) return NULL; if (!((type >= STARPU_CPU_WORKER && type <= STARPU_NARCH) || type == STARPU_ANY_WORKER)) RETURN_EXCEPT("Parameter %d invalid", type); int num_worker=starpu_worker_get_count_by_type(type); /*return type is unsigned*/ return Py_BuildValue("I", num_worker); } /*wrapper get min priority method*/ static PyObject* starpu_sched_get_min_priority_wrapper(PyObject *self, PyObject *args) { (void)self; (void)args; /*call starpu_sched_get_min_priority*/ int min_prio=starpu_sched_get_min_priority(); /*return type is int*/ return Py_BuildValue("i", min_prio); } /*wrapper get max priority method*/ static PyObject* starpu_sched_get_max_priority_wrapper(PyObject *self, PyObject *args) { (void)self; (void)args; /*call starpu_sched_get_max_priority*/ int max_prio=starpu_sched_get_max_priority(); /*return type is int*/ return Py_BuildValue("i", max_prio); } /*wrapper get the number of no completed submitted tasks method*/ static PyObject* starpu_task_nsubmitted_wrapper(PyObject *self, PyObject *args) { (void)self; (void)args; /*call starpu_task_nsubmitted*/ int num_task=starpu_task_nsubmitted(); /*Return the number of submitted tasks which have not completed yet */ return Py_BuildValue("i", num_task); } /*generate new sub-interpreters*/ static void new_inter(void* arg) { (void)arg; unsigned workerid = starpu_worker_get_id_check(); PyThreadState *new_thread_state; PyGILState_STATE state; state = PyGILState_Ensure(); // take the GIL STARPU_ASSERT(state == PyGILState_UNLOCKED); orig_thread_states[workerid] = PyThreadState_GET(); if (starpu_getenv_number_default("STARPUPY_OWN_GIL", 0)) { #ifdef PyInterpreterConfig_OWN_GIL /* https://peps.nogil.dev/pep-0684/ */ PyInterpreterConfig config = { .check_multi_interp_extensions = 1, .gil = PyInterpreterConfig_OWN_GIL, }; Py_NewInterpreterFromConfig(&new_thread_state, &config); #else fprintf(stderr, "STARPUPY_OWN_GIL is only supported starting from python 3.12\n"); exit(1); #endif } else new_thread_state = Py_NewInterpreter(); PyThreadState_Swap(new_thread_state); new_thread_states[workerid] = new_thread_state; PyEval_SaveThread(); // releases the GIL } /*delete sub-interpreters*/ static void del_inter(void* arg) { (void)arg; unsigned workerid = starpu_worker_get_id_check(); PyThreadState *new_thread_state = new_thread_states[workerid]; PyEval_RestoreThread(new_thread_state); // reacquires the GIL Py_EndInterpreter(new_thread_state); PyThreadState_Swap(orig_thread_states[workerid]); PyGILState_Release(PyGILState_UNLOCKED); } void _starpupy_data_register_ops(void) { _starpupy_interface_pyobject_ops.interfaceid = starpu_data_interface_get_next_id(); _starpupy_interface_pybuffer_ops.interfaceid = starpu_data_interface_get_next_id(); _starpupy_interface_pybuffer_bytes_ops.interfaceid = starpu_data_interface_get_next_id(); starpu_data_register_ops(&_starpupy_interface_pyobject_ops); starpu_data_register_ops(&_starpupy_interface_pybuffer_ops); starpu_data_register_ops(&_starpupy_interface_pybuffer_bytes_ops); } /*wrapper init method*/ static PyObject* starpu_init_wrapper(PyObject *self, PyObject *args) { (void)self; (void)args; /*starpu initialization*/ int ret; _starpupy_data_register_ops(); struct starpu_conf conf; Py_BEGIN_ALLOW_THREADS; starpu_conf_init(&conf); ret = starpu_init(&conf); Py_END_ALLOW_THREADS; if (ret!=0) { PyErr_Format(StarpupyError, "Unexpected value %d returned for starpu_init", ret); return NULL; } if (conf.sched_policy_name && !strcmp(conf.sched_policy_name, "graph_test")) { /* FIXME: should call starpu_do_schedule when appropriate, the graph_test scheduler needs it. */ fprintf(stderr,"TODO: The graph_test scheduler needs starpu_do_schedule calls\n"); exit(77); } if (active_multi_interpreter) { /*generate new interpreter on each worker*/ Py_BEGIN_ALLOW_THREADS; starpu_execute_on_each_worker_ex(new_inter, NULL, where_inter, "new_inter"); Py_END_ALLOW_THREADS; } /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /*wrapper shutdown method*/ static PyObject* starpu_shutdown_wrapper(PyObject *self, PyObject *args) { (void)self; (void)args; //printf("it's starpu_shutdown function\n"); /*unregister the rest of handle in handle_dict*/ /*get handle_dict, decrement after using*/ PyObject *handle_dict = PyObject_GetAttrString(starpu_module, "handle_dict"); /*obj_id is the key in dict, handle_obj is the value in dict*/ PyObject *obj_id, *handle_obj; Py_ssize_t handle_pos = 0; while(PyDict_Next(handle_dict, &handle_pos, &obj_id, &handle_obj)) { /*PyObject *->handle*/ PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); if (handle != (void*)-1) { /*call starpu_data_unregister method*/ Py_BEGIN_ALLOW_THREADS starpu_data_unregister(handle); Py_END_ALLOW_THREADS PyCapsule_SetPointer(handle_cap, (void*)-1); } /*remove this handle from handle_dict*/ PyDict_DelItem(handle_dict, obj_id); Py_DECREF(handle_cap); } Py_DECREF(handle_dict); /*unregister the rest of handle in handle_set*/ /*get handle_set, decrement after using*/ PyObject *handle_set = PyObject_GetAttrString(starpu_module, "handle_set"); /*treat set as an iterator, decrement after using*/ PyObject *handle_set_iterator = PyObject_GetIter(handle_set); while((handle_obj=PyIter_Next(handle_set_iterator))) { /*PyObject *->handle*/ PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); if (handle != (void*)-1) { /*call starpu_data_unregister method*/ Py_BEGIN_ALLOW_THREADS starpu_data_unregister(handle); Py_END_ALLOW_THREADS PyCapsule_SetPointer(handle_cap, (void*)-1); } /*remove this handle from handle_set*/ PySet_Discard(handle_set, handle_obj); Py_DECREF(handle_set_iterator); handle_set_iterator = PyObject_GetIter(handle_set); Py_DECREF(handle_cap); /*release ref obtained by PyInter_Next*/ Py_DECREF(handle_obj); } Py_DECREF(handle_set_iterator); Py_DECREF(handle_set); /*clean all perfmodel which are saved in dict_perf*/ /*get dict_perf, decrement after using*/ PyObject *perf_dict = PyObject_GetAttrString(starpu_module, "dict_perf"); PyObject *perf_key, *perf_value; Py_ssize_t perf_pos = 0; while(PyDict_Next(perf_dict, &perf_pos, &perf_key, &perf_value)) { PyDict_DelItem(perf_dict, perf_key); } Py_DECREF(perf_dict); /*gc module import*/ PyObject *gc_module = PyImport_ImportModule("gc"); if (gc_module == NULL) { PyErr_Format(StarpupyError, "can't find gc module"); Py_XDECREF(gc_module); return NULL; } PyObject *gc_collect = PyObject_CallMethod(gc_module, "collect", NULL); PyObject *gc_garbage = PyObject_GetAttrString(gc_module, "garbage"); Py_DECREF(gc_collect); Py_DECREF(gc_garbage); Py_DECREF(gc_module); /*stop the cb_loop*/ if (cb_loop) { PyObject * cb_loop_stop = PyObject_CallMethod(cb_loop, "stop", NULL); Py_DECREF(cb_loop_stop); } /*call starpu_shutdown method*/ Py_BEGIN_ALLOW_THREADS; starpu_task_wait_for_all(); if(active_multi_interpreter) { /*delete interpreter on each worker*/ starpu_execute_on_each_worker_ex(del_inter, NULL, where_inter, "del_inter"); } starpu_shutdown(); Py_END_ALLOW_THREADS; /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /*set ncpu*/ static PyObject* starpu_set_ncpu(PyObject *self, PyObject *args) { (void)self; int ncpu; if (!PyArg_ParseTuple(args, "I", &ncpu)) return NULL; Py_BEGIN_ALLOW_THREADS; starpu_task_wait_for_all(); if(active_multi_interpreter) { /*delete interpreter on each worker*/ starpu_execute_on_each_worker_ex(del_inter, NULL, where_inter, "del_inter"); } starpu_shutdown(); if (starpu_getenv("STARPU_NCPU") || starpu_getenv("STARPU_NCPUS")) fprintf(stderr, "warning: starpupy.set_ncpu is ineffective when the STARPU_NCPU or STARPU_NCPUS environment variable is defined"); int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncpus = ncpu; ret = starpu_init(&conf); if (ret!=0) { PyErr_Format(StarpupyError, "Unexpected value %d returned for starpu_init", ret); return NULL; } if (active_multi_interpreter) { /* generate new interpreter on each worker*/ starpu_execute_on_each_worker_ex(new_inter, NULL, where_inter, "new_inter"); } Py_END_ALLOW_THREADS; /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /***********************************************************************************/ /***************The module’s method table and initialization function**************/ /*method table*/ static PyMethodDef starpupyMethods[] = { {"init", starpu_init_wrapper, METH_VARARGS, "initialize StarPU"}, /* init method*/ {"_task_submit", starpu_task_submit_wrapper, METH_VARARGS, "submit the task"}, /*submit method*/ {"task_wait_for_all", starpu_task_wait_for_all_wrapper, METH_VARARGS, "wait the task"}, /*wait for all method*/ {"pause", starpu_pause_wrapper, METH_VARARGS, "suspend the processing of new tasks by workers"}, /*pause method*/ {"resume", starpu_resume_wrapper, METH_VARARGS, "resume the workers polling for new tasks"}, /*resume method*/ {"init_perfmodel", init_perfmodel, METH_VARARGS, "initialize struct starpu_perfmodel"}, /*initialize perfmodel*/ {"free_perfmodel", free_perfmodel, METH_VARARGS, "free struct starpu_perfmodel"}, /*free perfmodel*/ #ifndef STARPU_SIMGRID {"save_history_based_model", starpu_save_history_based_model_wrapper, METH_VARARGS, "save the performance model"}, /*save the performance model*/ #endif {"sched_get_min_priority", starpu_sched_get_min_priority_wrapper, METH_VARARGS, "get the number of min priority"}, /*get the number of min priority*/ {"sched_get_max_priority", starpu_sched_get_max_priority_wrapper, METH_VARARGS, "get the number of max priority"}, /*get the number of max priority*/ {"task_nsubmitted", starpu_task_nsubmitted_wrapper, METH_VARARGS, "get the number of submitted tasks which have not completed yet"}, /*get the number of submitted tasks which have not completed yet*/ {"shutdown", starpu_shutdown_wrapper, METH_VARARGS, "shutdown starpu"}, /*shutdown starpu*/ {"starpupy_data_register", starpupy_data_register_wrapper, METH_VARARGS, "register PyObject in a handle"}, /*register PyObject in a handle*/ {"starpupy_numpy_register", starpupy_numpy_register_wrapper, METH_VARARGS, "register empty Numpy array in a handle"}, /*register PyObject in a handle*/ {"starpupy_get_object", starpupy_get_object_wrapper, METH_VARARGS, "get PyObject from handle"}, /*get PyObject from handle*/ {"starpupy_acquire_handle", starpupy_acquire_handle_wrapper, METH_VARARGS, "acquire handle"}, /*acquire handle*/ {"starpupy_release_handle", starpupy_release_handle_wrapper, METH_VARARGS, "release handle"}, /*release handle*/ {"starpupy_data_unregister", starpupy_data_unregister_wrapper, METH_VARARGS, "unregister handle"}, /*unregister handle*/ {"starpupy_data_unregister_submit", starpupy_data_unregister_submit_wrapper, METH_VARARGS, "unregister handle and object"}, /*unregister handle and object*/ {"starpupy_acquire_object", starpupy_acquire_object_wrapper, METH_VARARGS, "acquire PyObject handle"}, /*acquire handle*/ {"starpupy_release_object", starpupy_release_object_wrapper, METH_VARARGS, "release PyObject handle"}, /*release handle*/ {"starpupy_data_unregister_object", starpupy_data_unregister_object_wrapper, METH_VARARGS, "unregister PyObject handle"}, /*unregister handle*/ {"starpupy_data_unregister_submit_object", starpupy_data_unregister_submit_object_wrapper, METH_VARARGS, "unregister PyObject handle and object"}, /*unregister handle and object*/ {"starpupy_data_partition", starpu_data_partition_wrapper, METH_VARARGS, "handle partition into sub handles"}, {"starpupy_data_unpartition", starpu_data_unpartition_wrapper, METH_VARARGS, "handle unpartition sub handles"}, {"starpupy_get_partition_size", starpupy_get_partition_size_wrapper, METH_VARARGS, "get the array size from each sub handle"}, {"set_ncpu", starpu_set_ncpu, METH_VARARGS,"reinitialize starpu with given number of CPU"}, {"worker_get_count_by_type", starpu_worker_get_count_by_type_wrapper, METH_VARARGS, "get the number of workers for a given type"}, {NULL, NULL,0,NULL} }; /*function of slot type Py_mod_exec */ static int my_exec(PyObject *m) { PyModule_AddStringConstant(m, "starpupy", "starpupy"); /* Add an exception type */ if (StarpupyError == NULL) { StarpupyError = PyErr_NewException("starpupy.error", NULL, NULL); } if (PyModule_AddObject(m, "error", StarpupyError) < 0) { Py_XDECREF(StarpupyError); return -1; } return 0; } /*m_slots member of the module*/ static PyModuleDef_Slot mySlots[] = { {Py_mod_exec, my_exec}, {0, NULL} }; /*deallocation function*/ static void starpupyFree(void *self) { (void)self; //printf("it's the free function\n"); Py_XDECREF(asyncio_module); Py_XDECREF(concurrent_futures_future_class); Py_XDECREF(cloudpickle_module); Py_XDECREF(dumps); Py_XDECREF(pickle_module); Py_XDECREF(loads); Py_XDECREF(starpu_module); Py_XDECREF(starpu_dict); Py_XDECREF(cb_loop); } /*module definition structure*/ static struct PyModuleDef starpupymodule = { PyModuleDef_HEAD_INIT, .m_name = "starpupy", .m_doc = NULL, .m_methods = starpupyMethods, .m_size = 0, .m_slots = mySlots, .m_traverse = NULL, .m_clear = NULL, .m_free = starpupyFree }; static void* set_cb_loop(void* arg) { (void)arg; PyGILState_STATE state = PyGILState_Ensure(); /*second loop will run until we stop it in starpu_shutdown*/ PyObject * cb_loop_run = PyObject_CallMethod(cb_loop, "run_forever", NULL); Py_DECREF(cb_loop_run); PyGILState_Release(state); return NULL; } /*initialization function*/ PyMODINIT_FUNC PyInit_starpupy(void) { #if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 9) PyEval_InitThreads(); #endif #if defined(STARPU_USE_MPI_MASTER_SLAVE) active_multi_interpreter = 1; #else if (starpu_getenv_number_default("STARPUPY_MULTI_INTERPRETER", 0) || starpu_getenv_number("STARPU_TCPIP_MS_SLAVES") > 0) active_multi_interpreter = 1; #endif main_thread = pthread_self(); /*python asyncio import*/ asyncio_module = PyImport_ImportModule("asyncio"); if (asyncio_module == NULL) { PyErr_Format(PyExc_RuntimeError, "can't find asyncio module"); starpupyFree(NULL); return NULL; } /*cloudpickle import*/ if (active_multi_interpreter) { cloudpickle_module = PyImport_ImportModule("cloudpickle"); if (cloudpickle_module == NULL) { PyErr_Format(PyExc_RuntimeError, "can't find cloudpickle module"); starpupyFree(NULL); return NULL; } /*dumps method*/ dumps = PyObject_GetAttrString(cloudpickle_module, "dumps"); } /*pickle import*/ if (active_multi_interpreter) { pickle_module = PyImport_ImportModule("pickle"); if (pickle_module == NULL) { PyErr_Format(PyExc_RuntimeError, "can't find pickle module"); starpupyFree(NULL); return NULL; } /*loads method*/ loads = PyObject_GetAttrString(pickle_module, "loads"); } /*starpu import*/ starpu_module = PyImport_ImportModule("starpu"); if (starpu_module == NULL) { PyErr_Format(PyExc_RuntimeError, "can't find starpu module"); starpupyFree(NULL); return NULL; } starpu_dict = PyModule_GetDict(starpu_module); /*protect borrowed reference, decremented in starpupyFree*/ Py_INCREF(starpu_dict); /* Prepare for running asyncio futures */ /*create a new event loop in another thread, in case the main loop is occupied*/ cb_loop = PyObject_CallMethod(asyncio_module, "new_event_loop", NULL); if (cb_loop == NULL) { PyErr_Format(PyExc_RuntimeError, "can't create cb_loop from asyncio module (try to add \"-m asyncio\" when starting Python interpreter)"); starpupyFree(NULL); return NULL; } int pc = pthread_create(&thread_id, NULL, set_cb_loop, NULL); if (pc) { PyErr_Format(PyExc_RuntimeError, "Fail to create thread\n"); starpupyFree(NULL); return NULL; } /* Prepare for running concurrent.futures futures */ /*python concurrent.futures import*/ PyObject *concurrent_futures_module = PyImport_ImportModule("concurrent.futures"); if (concurrent_futures_module == NULL) { PyErr_Format(PyExc_RuntimeError, "can't find concurrent.futures module"); starpupyFree(NULL); return NULL; } PyObject *concurrent_futures_module_dict = PyModule_GetDict(concurrent_futures_module); /* borrowed */ Py_DECREF(concurrent_futures_module); if (concurrent_futures_module_dict == NULL) { PyErr_Format(PyExc_RuntimeError, "can't get concurrent.futures dict"); starpupyFree(NULL); return NULL; } concurrent_futures_future_class = PyDict_GetItemString(concurrent_futures_module_dict, "Future"); Py_DECREF(concurrent_futures_module_dict); if (concurrent_futures_future_class == NULL) { PyErr_Format(PyExc_RuntimeError, "can't find Future class"); starpupyFree(NULL); return NULL; } PyObject *concurrent_futures_thread_module = PyImport_ImportModule("concurrent.futures.thread"); if (concurrent_futures_thread_module == NULL) { PyErr_Format(PyExc_RuntimeError, "can't find concurrent.futures.thread module"); starpupyFree(NULL); return NULL; } PyObject *concurrent_futures_thread_module_dict = PyModule_GetDict(concurrent_futures_thread_module); /* borrowed */ Py_DECREF(concurrent_futures_thread_module); if (concurrent_futures_thread_module_dict == NULL) { PyErr_Format(PyExc_RuntimeError, "can't get concurrent.futures.thread dict"); Py_DECREF(concurrent_futures_thread_module); starpupyFree(NULL); return NULL; } PyObject *executor_class = PyDict_GetItemString(concurrent_futures_thread_module_dict, "ThreadPoolExecutor"); Py_DECREF(concurrent_futures_thread_module_dict); if (executor_class == NULL) { PyErr_Format(PyExc_RuntimeError, "can't find ThreadPoolExecutor class"); starpupyFree(NULL); return NULL; } PyObject *cb_executor_instance = PyInstanceMethod_New(executor_class); Py_DECREF(executor_class); if (cb_executor_instance == NULL) { PyErr_Format(PyExc_RuntimeError, "can't create concurrent.futures executor"); starpupyFree(NULL); return NULL; } cb_executor = PyObject_CallObject(cb_executor_instance, NULL); Py_DECREF(cb_executor_instance); if (cb_executor == NULL) { PyErr_Format(PyExc_RuntimeError, "can't create concurrent.futures executor"); starpupyFree(NULL); return NULL; } /*module import multi-phase initialization*/ return PyModuleDef_Init(&starpupymodule); } /***********************************************************************************/ starpu-1.4.9+dfsg/starpupy/src/starpupy_buffer_interface.c000066400000000000000000000707741507764646700241110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #undef NDEBUG #include #include #include #define PY_SSIZE_T_CLEAN #include #ifdef STARPU_PYTHON_HAVE_NUMPY #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include #endif #include "starpupy_buffer_interface.h" PyObject* starpupy_buffer_get_numpy(struct starpupy_buffer_interface *pybuffer_interface) { #ifdef STARPU_PYTHON_HAVE_NUMPY char* pybuf = pybuffer_interface->py_buffer; Py_ssize_t nbuf = pybuffer_interface->buffer_size; int arr_type = pybuffer_interface->array_type; size_t nitem = pybuffer_interface->item_size; npy_intp narray = nbuf/nitem; npy_intp* get_dim = pybuffer_interface->array_dim; int get_ndim = pybuffer_interface->dim_size; /*store the dim array in a tuple*/ PyObject* dim_tup = PyTuple_New(get_ndim); int i; for (i=0; itypecode; char* pybuf = pybuffer_interface->py_buffer; Py_ssize_t nbuf = pybuffer_interface->buffer_size; size_t nitem = pybuffer_interface->item_size; /*get size of array*/ int narray = nbuf/nitem; /*create the new array.array*/ PyObject *arr_module = PyImport_ImportModule("array"); PyObject *arr_dict = PyModule_GetDict(arr_module); /*get array.array class*/ PyObject *arr_class = PyDict_GetItemString(arr_dict, "array"); /*create an instance of array.array, decrement in the end of the function*/ PyObject *arr_instance = PyInstanceMethod_New(arr_class); /*get the buffer bytes, decrement in the end of the function*/ PyObject *pybt=PyBytes_FromStringAndSize(pybuf, nbuf); /*get the array elements, reference is stolen by PyTuple_SetItem*/ PyObject *arr_list = NULL; /*if the element is not unicode character*/ if (arr_typecode!='u') { char type_str[narray+1]; memset(type_str, arr_typecode, narray); type_str[narray] = 0; /*get the array element list using struct module*/ PyObject *struct_module = PyImport_ImportModule("struct"); arr_list = PyObject_CallMethod(struct_module, "unpack", "sO", type_str, pybt); Py_DECREF(struct_module); } /*if the element is unicode character*/ else { /*decode buffer bytes to unicode*/ PyObject* pyuni = PyUnicode_DecodeUTF32(PyBytes_AsString(pybt), PyBytes_Size(pybt), "can't decode", NULL); /*convert unicode to wide char*/ wchar_t* uni_str = PyUnicode_AsWideCharString(pyuni, NULL); if(uni_str != NULL) { arr_list = Py_BuildValue("u", uni_str); PyMem_Free(uni_str); } Py_DECREF(pyuni); } /*initialize the instance*/ PyObject *arr_args=PyTuple_New(2); char arr_type[]={arr_typecode, 0}; PyTuple_SetItem(arr_args, 0, Py_BuildValue("s", arr_type)); PyTuple_SetItem(arr_args, 1, arr_list); PyObject *arr_obj = PyObject_CallObject(arr_instance,arr_args); Py_DECREF(pybt); Py_DECREF(arr_module); Py_DECREF(arr_instance); Py_DECREF(arr_args); return arr_obj; } PyObject* starpupy_buffer_get_memview(struct starpupy_buffer_interface *pybuffer_interface) { char* pybuf = pybuffer_interface->py_buffer; Py_ssize_t nbuf = pybuffer_interface->buffer_size; char mem_format = pybuffer_interface->typecode; size_t nitem = pybuffer_interface->item_size; int ndim = pybuffer_interface->dim_size; int* mem_shape = pybuffer_interface->shape; int narray = nbuf/nitem; /*decrement in each if{}*/ PyObject *pybt=PyBytes_FromStringAndSize(pybuf, nbuf); /*return value of the function*/ PyObject *memview_obj = NULL; if(mem_format=='B') { memview_obj = pybt; } /*if the element is not unicode character of array.array*/ else if(mem_format!='w') { /* We have a flat array, split it into ndim-dimension lists of lists according to mem_shape */ char type_str[narray+1]; memset(type_str, mem_format, narray); type_str[narray] = 0; /*get the array element list using struct module, decrement after used*/ PyObject *struct_module = PyImport_ImportModule("struct"); PyObject *m_obj = PyObject_CallMethod(struct_module, "unpack", "sO", type_str, pybt); Py_DECREF(struct_module); Py_DECREF(pybt); /*reshape the list in case the original array is multi dimension*/ /*get the index of each element in new multi dimension array*/ int ind[narray][ndim]; int d; int i; for (i = 0; i < narray; i++) { int n = narray; int ii = i; for (d = 0; d < ndim; d++) { n = n / mem_shape[d]; ind[i][d] = ii / n; ii = ii % n; } } /*put the element of one dimension array into the multi dimension array according to the index*/ PyObject* list_obj[ndim]; memset(&list_obj, 0, sizeof(list_obj)); for (i = 0; i < narray; i++) { for (d = ndim-1; d >=0; d--) { /*in the innermost nested list, we set the element in the current list*/ if (d==ndim-1) { /*if i is the first element of this list, we need to initialize the list*/ if(ind[i][d]==0) { if(list_obj[d] != NULL) Py_DECREF(list_obj[d]); list_obj[d] = PyList_New(mem_shape[d]); } PyObject *m_obj_item = PyTuple_GetItem(m_obj, i); /*protect borrowed reference, give it to PyList_SetItem*/ Py_INCREF(m_obj_item); PyList_SetItem(list_obj[d], ind[i][d], m_obj_item); } /*in the rest of nested list, we set the inner list in the current list, once we have the nested list, one element of inner list is changed, current list is changes as well*/ else { /*if the index of element in all inner list is 0, we are the first, we have to add this new list to the upper dimension list*/ int flag=1; int dd; for(dd=ndim-1; dd>=d+1; dd--) { if(ind[i][dd]!=0) flag=0; } if(flag==1) { /*if i is the first element of this list and also the first element of all inner list, we need to initialize this list*/ if (ind[i][d]==0) { if(list_obj[d] != NULL) Py_DECREF(list_obj[d]); list_obj[d] = PyList_New(mem_shape[d]); } /*if i is the first element of all inner list, we set the last inner list in the current list*/ /*reference is stolen by PyList_SetItem*/ Py_INCREF(list_obj[d+1]); PyList_SetItem(list_obj[d],ind[i][d],list_obj[d+1]); } } } } Py_DECREF(m_obj); memview_obj = list_obj[0]; for(i=1; idim_size; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpupy_buffer_interface *local_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); if (node == home_node) { if(pybuffer_interface->object != NULL) { Py_INCREF(pybuffer_interface->object); local_interface->object = pybuffer_interface->object; } else { local_interface->object = NULL; } local_interface->py_buffer = pybuffer_interface->py_buffer; } else { local_interface->object = NULL; local_interface->py_buffer = NULL; } local_interface->id = pybuffer_interface->id; local_interface->buffer_type = pybuffer_interface->buffer_type; local_interface->buffer_size = pybuffer_interface->buffer_size; local_interface->dim_size = pybuffer_interface->dim_size; #ifdef STARPU_PYTHON_HAVE_NUMPY npy_intp* arr_dim = pybuffer_interface->array_dim; npy_intp* a_dim; if (arr_dim!=NULL) { a_dim = (npy_intp*)malloc(ndim*sizeof(npy_intp)); memcpy(a_dim, arr_dim, ndim*sizeof(npy_intp)); } else a_dim = NULL; local_interface->array_dim = a_dim; #endif local_interface->array_type = pybuffer_interface->array_type; local_interface->item_size = pybuffer_interface->item_size; local_interface->typecode = pybuffer_interface->typecode; int* mem_shape = pybuffer_interface->shape; int* m_shape; if (mem_shape!=NULL) { m_shape = (int*)malloc(ndim*sizeof(int)); memcpy(m_shape, mem_shape, ndim*sizeof(int)); } else m_shape = NULL; local_interface->shape = m_shape; } } static void pybuffer_unregister_data_handle(starpu_data_handle_t handle) { /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); unsigned home_node = starpu_data_get_home_node(handle); unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct starpupy_buffer_interface *local_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); if(node == home_node) { if(local_interface->object!=NULL) { Py_DECREF(local_interface->object); local_interface->object = NULL; local_interface->py_buffer = NULL; } } else { STARPU_ASSERT(local_interface->object == NULL); STARPU_ASSERT(local_interface->py_buffer == NULL); } #ifdef STARPU_PYTHON_HAVE_NUMPY free(local_interface->array_dim); local_interface->array_dim = NULL; #endif free(local_interface->shape); local_interface->shape = NULL; } /* release GIL */ PyGILState_Release(state); } static starpu_ssize_t pybuffer_allocate_data_on_node(void *data_interface, unsigned node) { struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) data_interface; starpu_ssize_t requested_memory = pybuffer_interface->buffer_size; pybuffer_interface->py_buffer = (char*)starpu_malloc_on_node(node, requested_memory); if (!pybuffer_interface->py_buffer) return -ENOMEM; return requested_memory; } static starpu_ssize_t pybuffer_allocate_bytes_data_on_node(void *data_interface, unsigned node) { (void)node; /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) data_interface; char* pybuf = pybuffer_interface->py_buffer; Py_ssize_t nbuf = pybuffer_interface->buffer_size; STARPU_ASSERT(pybuf == NULL); PyObject *pybt=PyBytes_FromStringAndSize(NULL, nbuf); pybuffer_interface->object = pybt; pybuffer_interface->py_buffer = PyBytes_AsString(pybt); if (!pybuffer_interface->py_buffer) return -ENOMEM; /* release GIL */ PyGILState_Release(state); return nbuf; } static void pybuffer_free_data_on_node(void *data_interface, unsigned node) { struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) data_interface; starpu_ssize_t requested_memory = pybuffer_interface->buffer_size; starpu_free_on_node(node, (uintptr_t) pybuffer_interface->py_buffer, requested_memory); pybuffer_interface->py_buffer = NULL; } static void pybuffer_free_bytes_data_on_node(void *data_interface, unsigned node) { (void)node; /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) data_interface; if (pybuffer_interface->object != NULL) { Py_DECREF(pybuffer_interface->object); } pybuffer_interface->object = NULL; pybuffer_interface->py_buffer = NULL; /* release GIL */ PyGILState_Release(state); } static void pybuffer_cache_data_on_node(void *cached_interface, void *src_data_interface, unsigned node) { (void)node; struct starpupy_buffer_interface *cached_pybuffer_interface = (struct starpupy_buffer_interface *) cached_interface; struct starpupy_buffer_interface *src_pybuffer_interface = (struct starpupy_buffer_interface *) src_data_interface; cached_pybuffer_interface->object = src_pybuffer_interface->object; src_pybuffer_interface->object = NULL; cached_pybuffer_interface->py_buffer = src_pybuffer_interface->py_buffer; src_pybuffer_interface->py_buffer = NULL; cached_pybuffer_interface->buffer_size = src_pybuffer_interface->buffer_size; } static void pybuffer_reuse_data_on_node(void *dst_data_interface, const void *cached_interface, unsigned node) { (void)node; struct starpupy_buffer_interface *dst_pybuffer_interface = (struct starpupy_buffer_interface *) dst_data_interface; const struct starpupy_buffer_interface *cached_pybuffer_interface = (const struct starpupy_buffer_interface *) cached_interface; dst_pybuffer_interface->object = cached_pybuffer_interface->object; dst_pybuffer_interface->py_buffer = cached_pybuffer_interface->py_buffer; } static int pybuffer_map_data(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpupy_buffer_interface *src_pybuf = src_interface; struct starpupy_buffer_interface *dst_pybuf = dst_interface; int ret; uintptr_t mapped; mapped = starpu_interface_map((uintptr_t )src_pybuf->py_buffer, 0, src_node, dst_node, (size_t)src_pybuf->buffer_size, &ret); if (mapped) { dst_pybuf->py_buffer = (char*)mapped; return 0; } return ret; } static int pybuffer_unmap_data(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpupy_buffer_interface *src_pybuf = src_interface; struct starpupy_buffer_interface *dst_pybuf = dst_interface; int ret = starpu_interface_unmap((uintptr_t)src_pybuf->py_buffer, 0, src_node, (uintptr_t)dst_pybuf->py_buffer, dst_node, (size_t)src_pybuf->buffer_size); dst_pybuf->py_buffer = 0; return ret; } static int pybuffer_update_map(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpupy_buffer_interface *src_pybuf = src_interface; struct starpupy_buffer_interface *dst_pybuf = dst_interface; return starpu_interface_update_map((uintptr_t)src_pybuf->py_buffer, 0, src_node, (uintptr_t)dst_pybuf->py_buffer, 0, dst_node, (size_t)src_pybuf->buffer_size); } static size_t pybuffer_get_size(starpu_data_handle_t handle) { size_t size; struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); size = pybuffer_interface->buffer_size; return size; } static int pybuffer_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); char* pybuf = pybuffer_interface->py_buffer; Py_ssize_t nbuf = pybuffer_interface->buffer_size; char *data; data = (void*)starpu_malloc_on_node_flags(node, nbuf, 0); memcpy(data, pybuf, nbuf); *ptr = data; *count = nbuf; return 0; } static int pybuffer_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { (void)count; char *data = ptr; struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); pybuffer_interface->id = _starpupy_interface_pybuffer_ops.interfaceid; memcpy(pybuffer_interface->py_buffer, data, pybuffer_interface->buffer_size); return 0; } static int pybuffer_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { pybuffer_peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); return 0; } static int pybuffer_meta_size(struct starpupy_buffer_interface *pybuffer_interface) { starpu_ssize_t count; count = sizeof(pybuffer_interface->buffer_type) + /* sizeof(pybuffer_interface->object) + => built on the fly */ sizeof(pybuffer_interface->py_buffer) + sizeof(pybuffer_interface->buffer_size) + sizeof(pybuffer_interface->dim_size) + sizeof(pybuffer_interface->array_type) + sizeof(pybuffer_interface->item_size) + sizeof(pybuffer_interface->typecode) + sizeof(int); #ifdef STARPU_PYTHON_HAVE_NUMPY count += sizeof(int); #endif count += pybuffer_interface->dim_size * ( #ifdef STARPU_PYTHON_HAVE_NUMPY sizeof(pybuffer_interface->array_dim[0]) + #endif sizeof(pybuffer_interface->shape[0])); return count; } #define _pack(dst, src) do { memcpy(dst, &src, sizeof(src)); dst += sizeof(src); } while (0) static int pybuffer_pack_meta(void *data_interface, void **ptr, starpu_ssize_t *count) { struct starpupy_buffer_interface *pybuffer_interface = data_interface; *count = pybuffer_meta_size(pybuffer_interface); _STARPU_CALLOC(*ptr, *count, 1); char *cur = *ptr; _pack(cur, pybuffer_interface->buffer_type); _pack(cur, pybuffer_interface->py_buffer); _pack(cur, pybuffer_interface->buffer_size); _pack(cur, pybuffer_interface->dim_size); _pack(cur, pybuffer_interface->array_type); _pack(cur, pybuffer_interface->item_size); _pack(cur, pybuffer_interface->typecode); #ifdef STARPU_PYTHON_HAVE_NUMPY int array_dim = pybuffer_interface->array_dim ? 1 : 0; _pack(cur, array_dim); if (pybuffer_interface->array_dim) { memcpy(cur, pybuffer_interface->array_dim, pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0])); cur += pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0]); } #endif int shape = pybuffer_interface->shape ? 1 : 0; _pack(cur, shape); if (pybuffer_interface->shape) memcpy(cur, pybuffer_interface->shape, pybuffer_interface->dim_size * sizeof(pybuffer_interface->shape[0])); return 0; } #define _unpack(dst, src) do { memcpy(&dst, src, sizeof(dst)); src += sizeof(dst); } while(0) static int pybuffer_unpack_meta(void **data_interface, void *ptr, starpu_ssize_t *count) { _STARPU_CALLOC(*data_interface, 1, sizeof(struct starpupy_buffer_interface)); struct starpupy_buffer_interface *pybuffer_interface = (*data_interface); char *cur = ptr; pybuffer_interface->id = _starpupy_interface_pybuffer_ops.interfaceid; _unpack(pybuffer_interface->buffer_type, cur); _unpack(pybuffer_interface->py_buffer, cur); _unpack(pybuffer_interface->buffer_size, cur); _unpack(pybuffer_interface->dim_size, cur); _unpack(pybuffer_interface->array_type, cur); _unpack(pybuffer_interface->item_size, cur); _unpack(pybuffer_interface->typecode, cur); #ifdef STARPU_PYTHON_HAVE_NUMPY int array_dim; _unpack(array_dim, cur); if (array_dim) { _STARPU_MALLOC(pybuffer_interface->array_dim, pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0])); memcpy(pybuffer_interface->array_dim, cur, pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0])); cur += pybuffer_interface->dim_size * sizeof(pybuffer_interface->array_dim[0]); } else pybuffer_interface->array_dim = NULL; #endif int shape; _unpack(shape, cur); if (shape) { _STARPU_MALLOC(pybuffer_interface->shape, pybuffer_interface->dim_size * sizeof(pybuffer_interface->shape[0])); memcpy(pybuffer_interface->shape, cur, pybuffer_interface->dim_size * sizeof(pybuffer_interface->shape[0])); } else pybuffer_interface->shape = NULL; *count = pybuffer_meta_size(pybuffer_interface); return 0; } static int pybuffer_free_meta(void *data_interface) { struct starpupy_buffer_interface *pybuffer_interface = data_interface; #ifdef STARPU_PYTHON_HAVE_NUMPY free(pybuffer_interface->array_dim); pybuffer_interface->array_dim = NULL; #endif free(pybuffer_interface->shape); pybuffer_interface->shape = NULL; return 0; } static uint32_t starpupy_buffer_footprint(starpu_data_handle_t handle) { struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); int buf_type = pybuffer_interface->buffer_type; Py_ssize_t nbuf = pybuffer_interface->buffer_size; int ndim = pybuffer_interface->dim_size; int arr_type = pybuffer_interface->array_type; size_t nitem = pybuffer_interface->item_size; size_t narray = 0; if(pybuffer_interface->buffer_type != starpupy_bytes_interface && pybuffer_interface->buffer_type != starpupy_bytearray_interface) { narray = nbuf/nitem; } uint32_t crc = 0; crc=starpu_hash_crc32c_be(buf_type, crc); crc=starpu_hash_crc32c_be(nbuf, crc); crc=starpu_hash_crc32c_be(ndim, crc); crc=starpu_hash_crc32c_be(arr_type, crc); crc=starpu_hash_crc32c_be(narray, crc); crc=starpu_hash_crc32c_be(nitem, crc); return crc; } static void pybuffer_display(starpu_data_handle_t handle, FILE *f) { struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%u\t", pybuffer_interface->dim_size); } static int pybuffer_compare(void *data_interface_a, void *data_interface_b) { struct starpupy_buffer_interface *a = (struct starpupy_buffer_interface *) data_interface_a; struct starpupy_buffer_interface *b = (struct starpupy_buffer_interface *) data_interface_b; /* FIXME: compare content of shape or array_dim */ return ((a->array_type == b->array_type) && (a->item_size == b->item_size) && (a->dim_size == b->dim_size)); } static int pybuffer_alloc_compare(void *data_interface_a, void *data_interface_b) { struct starpupy_buffer_interface *a = (struct starpupy_buffer_interface *) data_interface_a; struct starpupy_buffer_interface *b = (struct starpupy_buffer_interface *) data_interface_b; return a->buffer_size == b->buffer_size; } static int pybuffer_copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct starpupy_buffer_interface *src = (struct starpupy_buffer_interface *) src_interface; struct starpupy_buffer_interface *dst = (struct starpupy_buffer_interface *) dst_interface; starpu_interface_copy((uintptr_t) src->py_buffer, 0, src_node, (uintptr_t) dst->py_buffer, 0, dst_node, src->buffer_size, async_data); starpu_interface_data_copy(src_node, dst_node, src->buffer_size); return 0; } static int pybuffer_copy_bytes_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { struct starpupy_buffer_interface *src = (struct starpupy_buffer_interface *) src_interface; struct starpupy_buffer_interface *dst = (struct starpupy_buffer_interface *) dst_interface; starpu_interface_copy((uintptr_t) src->py_buffer, 0, src_node, (uintptr_t) dst->py_buffer, 0, dst_node, src->buffer_size, NULL); starpu_interface_data_copy(src_node, dst_node, src->buffer_size); return 0; } static const struct starpu_data_copy_methods pybuffer_copy_data_methods_s = { .any_to_any = pybuffer_copy_any_to_any, }; static const struct starpu_data_copy_methods pybuffer_bytes_copy_data_methods_s = { .ram_to_ram = pybuffer_copy_bytes_ram_to_ram, }; struct starpu_data_interface_ops _starpupy_interface_pybuffer_ops = { .register_data_handle = pybuffer_register_data_handle, .unregister_data_handle = pybuffer_unregister_data_handle, .allocate_data_on_node = pybuffer_allocate_data_on_node, .free_data_on_node = pybuffer_free_data_on_node, .cache_data_on_node = pybuffer_cache_data_on_node, .reuse_data_on_node = pybuffer_reuse_data_on_node, .map_data = pybuffer_map_data, .unmap_data = pybuffer_unmap_data, .update_map = pybuffer_update_map, .get_size = pybuffer_get_size, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpupy_buffer_interface), .footprint = starpupy_buffer_footprint, .pack_data = pybuffer_pack_data, .peek_data = pybuffer_peek_data, .unpack_data = pybuffer_unpack_data, .pack_meta = pybuffer_pack_meta, .unpack_meta = pybuffer_unpack_meta, .free_meta = pybuffer_free_meta, .dontcache = 0, .display = pybuffer_display, .compare = pybuffer_compare, .alloc_compare = pybuffer_alloc_compare, .name = "STARPUPY_BUFFER_INTERFACE", .copy_methods = &pybuffer_copy_data_methods_s, }; /* we need another interface for bytes, bytearray, array.array, since we have to copy these objects between processes. * some more explanations are here: https://discuss.python.org/t/adding-pybytes-frombuffer-and-similar-for-array-array/21717 */ struct starpu_data_interface_ops _starpupy_interface_pybuffer_bytes_ops = { .register_data_handle = pybuffer_register_data_handle, .unregister_data_handle = pybuffer_unregister_data_handle, .allocate_data_on_node = pybuffer_allocate_bytes_data_on_node, .free_data_on_node = pybuffer_free_bytes_data_on_node, .cache_data_on_node = pybuffer_cache_data_on_node, .reuse_data_on_node = pybuffer_reuse_data_on_node, .get_size = pybuffer_get_size, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpupy_buffer_interface), .footprint = starpupy_buffer_footprint, .pack_data = pybuffer_pack_data, .peek_data = pybuffer_peek_data, .unpack_data = pybuffer_unpack_data, .dontcache = 0, .display = pybuffer_display, .compare = pybuffer_compare, .alloc_compare = pybuffer_alloc_compare, .name = "STARPUPY_BUFFER_BYTES_INTERFACE", .copy_methods = &pybuffer_bytes_copy_data_methods_s, }; #ifdef STARPU_PYTHON_HAVE_NUMPY void starpupy_buffer_numpy_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, int ndim, npy_intp* arr_dim, int arr_type, size_t nitem) { struct starpupy_buffer_interface pybuffer_interface = { .id = _starpupy_interface_pybuffer_ops.interfaceid, .buffer_type = buf_type, .py_buffer = pybuf, .buffer_size = nbuf, .dim_size = ndim, .array_dim = arr_dim, .array_type = arr_type, .item_size = nitem }; starpu_data_register(handleptr, home_node, &pybuffer_interface, &_starpupy_interface_pybuffer_ops); } #endif void starpupy_buffer_bytes_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, PyObject *obj) { struct starpupy_buffer_interface pybuffer_interface = { .id = _starpupy_interface_pybuffer_ops.interfaceid, .object = obj, .buffer_type = buf_type, .py_buffer = pybuf, .buffer_size = nbuf }; starpu_data_register(handleptr, home_node, &pybuffer_interface, &_starpupy_interface_pybuffer_bytes_ops); } void starpupy_buffer_array_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, char arr_typecode, size_t nitem, PyObject *obj) { struct starpupy_buffer_interface pybuffer_interface = { .id = _starpupy_interface_pybuffer_ops.interfaceid, .object = obj, .buffer_type = buf_type, .py_buffer = pybuf, .buffer_size = nbuf, .typecode = arr_typecode, .item_size = nitem }; starpu_data_register(handleptr, home_node, &pybuffer_interface, &_starpupy_interface_pybuffer_bytes_ops); } void starpupy_buffer_memview_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, char mem_format, size_t nitem, int ndim, int* mem_shape) { struct starpupy_buffer_interface pybuffer_interface = { .id = _starpupy_interface_pybuffer_ops.interfaceid, .buffer_type = buf_type, .py_buffer = pybuf, .buffer_size = nbuf, .typecode = mem_format, .item_size = nitem, .dim_size = ndim, .shape = mem_shape }; starpu_data_register(handleptr, home_node, &pybuffer_interface, &_starpupy_interface_pybuffer_ops); } int starpupy_check_buffer_interface_id(starpu_data_handle_t handle) { int interfaceid = (int)starpu_data_get_interface_id(handle); return (interfaceid == _starpupy_interface_pybuffer_ops.interfaceid || interfaceid == _starpupy_interface_pybuffer_bytes_ops.interfaceid); } starpu-1.4.9+dfsg/starpupy/src/starpupy_buffer_interface.h000066400000000000000000000122261507764646700241020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #define PY_SSIZE_T_CLEAN #include #ifdef STARPU_PYTHON_HAVE_NUMPY #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include #endif #include extern struct starpu_data_interface_ops _starpupy_interface_pybuffer_ops; extern struct starpu_data_interface_ops _starpupy_interface_pybuffer_bytes_ops; struct starpupy_buffer_interface { int id; /**< Identifier of the interface */ enum BufType {starpupy_numpy_interface, starpupy_bytes_interface, starpupy_bytearray_interface, starpupy_array_interface, starpupy_memoryview_interface}buffer_type; PyObject* object; /* For bytes, bytearray, array.array, object corresponding py_buffer */ char* py_buffer; /* The buffer actually allocated to store the data */ Py_ssize_t buffer_size; /* The size of py_buffer */ int dim_size; /* For numpy objects, the dimension */ #ifdef STARPU_PYTHON_HAVE_NUMPY npy_intp* array_dim; /* For numpy objects, the shapes of the different dimensions */ #endif int array_type; /* The type of elements */ size_t item_size; /* The size of elements */ char typecode; /* For array.array, the type of elements */ int* shape; /* For memoryview, the shape of each dimension */ }; #ifdef STARPU_PYTHON_HAVE_NUMPY void starpupy_buffer_numpy_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, int ndim, npy_intp* arr_dim, int arr_type, size_t nitem); #endif void starpupy_buffer_bytes_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, PyObject* obj); void starpupy_buffer_array_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, char arr_typecode, size_t nitem, PyObject* obj); void starpupy_buffer_memview_register(starpu_data_handle_t *handleptr, int home_node, int buf_type, char* pybuf, Py_ssize_t nbuf, char mem_format, size_t nitem, int ndim, int* mem_shape); int starpupy_check_buffer_interface_id(starpu_data_handle_t handle); PyObject* starpupy_buffer_get_numpy(struct starpupy_buffer_interface *pybuffer_interface); PyObject* starpupy_buffer_get_arrarr(struct starpupy_buffer_interface *pybuffer_interface); PyObject* starpupy_buffer_get_memview(struct starpupy_buffer_interface *pybuffer_interface); #define STARPUPY_BUF_CHECK(handle) (starpupy_check_buffer_interface_id(handle)) #define STARPUPY_BUF_CHECK_INTERFACE(interface) (((struct starpupy_buffer_interface *)(interface))->id == _starpupy_interface_pybuffer_ops.interfaceid) #define STARPUPY_BUF_GET_TYPE(interface) (((struct starpupy_buffer_interface *)(interface))->buffer_type) #define STARPUPY_BUF_GET_OBJ(interface) (Py_INCREF(((struct starpupy_buffer_interface *)(interface))->object), ((struct starpupy_buffer_interface *)(interface))->object) #define STARPUPY_BUF_GET_PYBUF(interface) (((struct starpupy_buffer_interface *)(interface))->py_buffer) #define STARPUPY_BUF_GET_NBUF(interface) (((struct starpupy_buffer_interface *)(interface))->buffer_size) #define STARPUPY_BUF_GET_NDIM(interface) (((struct starpupy_buffer_interface *)(interface))->dim_size) #define STARPUPY_BUF_GET_DIM(interface) (((struct starpupy_buffer_interface *)(interface))->array_dim) #define STARPUPY_BUF_GET_ARRTYPE(interface) (((struct starpupy_buffer_interface *)(interface))->array_type) #define STARPUPY_BUF_GET_NITEM(interface) (((struct starpupy_buffer_interface *)(interface))->item_size) #define STARPUPY_BUF_GET_TYPECODE(interface) (((struct starpupy_buffer_interface *)(interface))->typecode) #define STARPUPY_BUF_GET_SHAPE(interface) (((struct starpupy_buffer_interface *)(interface))->shape) #define STARPUPY_BUF_GET_PYNUMPY(interface) (starpupy_buffer_get_numpy(interface)) #define STARPUPY_BUF_GET_PYBYTES(interface) (PyBytes_FromStringAndSize(STARPUPY_BUF_GET_PYBUF(interface), STARPUPY_BUF_GET_NBUF(interface))) #define STARPUPY_BUF_GET_PYARRAY(interface) (starpupy_buffer_get_arrarr(interface)) #define STARPUPY_BUF_GET_PYMEMVIEW(interface) (starpupy_buffer_get_memview(interface)) #define STARPUPY_BUF_GET_PYOBJECT(interface)\ (STARPUPY_BUF_GET_TYPE(interface)==starpupy_numpy_interface ? STARPUPY_BUF_GET_PYNUMPY(interface) \ : STARPUPY_BUF_GET_TYPE(interface)==starpupy_bytes_interface || STARPUPY_BUF_GET_TYPE(interface)==starpupy_bytearray_interface || STARPUPY_BUF_GET_TYPE(interface)==starpupy_array_interface ? STARPUPY_BUF_GET_OBJ(interface) \ : STARPUPY_BUF_GET_TYPE(interface)==starpupy_memoryview_interface ? STARPUPY_BUF_GET_PYMEMVIEW(interface) \ : NULL) starpu-1.4.9+dfsg/starpupy/src/starpupy_cloudpickle.h000066400000000000000000000030001507764646700230750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define PY_SSIZE_T_CLEAN #include static PyObject *dumps; /*cloudpickle.dumps method*/ static PyObject *loads; /*pickle.loads method*/ /*return the reference of PyBytes which must be kept while using obj_data. See documentation of PyBytes_AsStringAndSize()*/ static inline PyObject* starpu_cloudpickle_dumps(PyObject *obj, char **obj_data, Py_ssize_t *obj_data_size) { PyObject *obj_bytes= PyObject_CallFunctionObjArgs(dumps, obj, NULL); PyBytes_AsStringAndSize(obj_bytes, obj_data, obj_data_size); return obj_bytes; } static inline PyObject* starpu_cloudpickle_loads(char* pyString, Py_ssize_t pyString_size) { PyObject *obj_bytes_str = PyBytes_FromStringAndSize(pyString, pyString_size); PyObject *obj = PyObject_CallFunctionObjArgs(loads, obj_bytes_str, NULL); Py_DECREF(obj_bytes_str); return obj; } starpu-1.4.9+dfsg/starpupy/src/starpupy_handle.c000066400000000000000000000526251507764646700220460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #undef NDEBUG #include #include "starpupy_interface.h" #include "starpupy_buffer_interface.h" #define PY_SSIZE_T_CLEAN #include #ifdef STARPU_PYTHON_HAVE_NUMPY #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include #endif #include "starpupy_handle.h" PyObject *starpu_module; /*starpu __init__ module*/ PyObject *starpu_dict; /*starpu __init__ dictionary*/ /*register buffer protocol PyObject*/ static PyObject* starpupy_object_register(PyObject *obj, PyObject *retval, char* mode) { starpu_data_handle_t handle; int home_node = 0; const char *tp = Py_TYPE(obj)->tp_name; //printf("the type of object is %s\n", tp); /*if we are in master slave mode and the object is not a numpy array and a return value, it cannot work */ if ((starpu_tcpip_ms_worker_get_count() >= 1 || starpu_mpi_ms_worker_get_count() >= 1) && strcmp(tp, "numpy.ndarray") != 0 && !PyObject_IsTrue(retval)) { RETURN_EXCEPTION("in master-slave mode, data handles are supported only for numpy arrays for now"); } /*if the object is bytes*/ if (strcmp(tp, "bytes")==0) { /*bytes size*/ Py_ssize_t nbytes; char* buf_bytes; PyBytes_AsStringAndSize(obj, &buf_bytes, &nbytes); /*register the buffer*/ starpupy_buffer_bytes_register(&handle, home_node, starpupy_bytes_interface, buf_bytes, nbytes, obj); } #ifdef STARPU_PYTHON_HAVE_NUMPY /*if the object is a numpy array*/ else if (strcmp(tp, "numpy.ndarray")==0) { import_array(); /*if array is not contiguous, treat it as a normal Python object*/ if (!PyArray_IS_C_CONTIGUOUS((const PyArrayObject *)obj)&&!PyArray_IS_F_CONTIGUOUS((const PyArrayObject *)obj)) { if(mode != NULL && strcmp(mode, "R")!=0) { RETURN_EXCEPTION("The mode of object should not be other than R"); } else { starpupy_data_register(&handle, home_node, obj); } } /*otherwise treat it as Python object supporting buffer protocol*/ else { /*get number of dimension*/ int ndim = PyArray_NDIM((const PyArrayObject *)obj); /*get array dim*/ npy_intp* arr_dim = PyArray_DIMS((PyArrayObject *)obj); /*get the item size*/ int nitem = PyArray_ITEMSIZE((const PyArrayObject *)obj); /*get the array type*/ int arr_type = PyArray_TYPE((const PyArrayObject *)obj); /*generate buffer of the array*/ Py_buffer *view = (Py_buffer *) malloc(sizeof(*view)); PyObject_GetBuffer(obj, view, PyBUF_SIMPLE); /*register the buffer*/ starpupy_buffer_numpy_register(&handle, home_node, starpupy_numpy_interface, view->buf, view->len, ndim, arr_dim, arr_type, nitem); PyBuffer_Release(view); free(view); } } #endif /*if the object is bytearray*/ else if (strcmp(tp, "bytearray")==0) { /*generate buffer of the array*/ Py_buffer *view = (Py_buffer *) malloc(sizeof(*view)); PyObject_GetBuffer(obj, view, PyBUF_SIMPLE); /*register the buffer*/ starpupy_buffer_bytes_register(&handle, home_node, starpupy_bytearray_interface, view->buf, view->len, obj); PyBuffer_Release(view); free(view); } /*if the object is array.array*/ else if (strcmp(tp, "array.array")==0) { /*get the arraytype*/ PyObject* PyArrtype=PyObject_GetAttrString(obj,"typecode"); const char* type_str = PyUnicode_AsUTF8(PyArrtype); char arr_type = type_str[0]; /*generate buffer of the array*/ Py_buffer *view = (Py_buffer *) malloc(sizeof(*view)); PyObject_GetBuffer(obj, view, PyBUF_SIMPLE); /*register the buffer*/ starpupy_buffer_array_register(&handle, home_node, starpupy_array_interface, view->buf, view->len, arr_type, view->itemsize, obj); Py_DECREF(PyArrtype); PyBuffer_Release(view); free(view); } /*if the object is memoryview*/ else if (strcmp(tp, "memoryview")==0) { /*generate buffer of the memoryview*/ Py_buffer *view = PyMemoryView_GET_BUFFER(obj); /*get the format of memoryview*/ PyObject* PyFormat=PyObject_GetAttrString(obj,"format"); const char* format_str = PyUnicode_AsUTF8(PyFormat); char mem_format = format_str[0]; PyObject* PyShape=PyObject_GetAttrString(obj,"shape"); int ndim = PyTuple_Size(PyShape); int* mem_shape; mem_shape = (int*)malloc(ndim*sizeof(int)); int i; for(i=0; ibuf, view->len, mem_format, view->itemsize, ndim, mem_shape); Py_DECREF(PyFormat); Py_DECREF(PyShape); free(mem_shape); } /*if the object is PyObject*/ else { if(mode != NULL && strcmp(mode, "R")!=0) { RETURN_EXCEPTION("The mode of object should not be other than R"); } else { starpupy_data_register(&handle, home_node, obj); } } PyObject *handle_cap=PyCapsule_New(handle, "Handle", NULL); return handle_cap; } /*register PyObject in a handle*/ PyObject* starpupy_data_register_wrapper(PyObject *self, PyObject *args) { PyObject *obj; PyObject *handle_obj; if (!PyArg_ParseTuple(args, "OO", &obj, &handle_obj)) return NULL; PyObject *retval = PyObject_CallMethod(handle_obj, "get_retval", NULL); /*register the python object*/ PyObject *handle_cap = starpupy_object_register(obj, retval, NULL); if (!handle_cap) return handle_cap; const char *tp = Py_TYPE(obj)->tp_name; //printf("the type of object is %s\n", tp); /*if the object is immutable, store the obj_id and handle_obj in handle_set, and registering the same python object several times is authorised*/ if (strcmp(tp, "int")==0 || strcmp(tp, "float")==0 || strcmp(tp, "str")==0 || strcmp(tp, "bool")==0 || strcmp(tp, "tuple")==0 || strcmp(tp, "range")==0 || strcmp(tp, "complex")==0 || strcmp(tp, "decimal.Decimal")==0 || strcmp(tp, "NoneType")==0) { /*set handle_obj in handle_set*/ /*get handle_set*/ PyObject *handle_set = PyObject_GetAttrString(starpu_module, "handle_set"); /*add new handle object in set*/ PySet_Add(handle_set, handle_obj); Py_DECREF(handle_set); } /*if the object is mutable, store the obj_id and handle_obj in handle_dict, and should not register the same python object more than twice*/ else { /*set the obj_id and handle_obj in handle_dict*/ /*get handle_dict*/ PyObject *handle_dict = PyObject_GetAttrString(starpu_module, "handle_dict"); /*get object id*/ PyObject *obj_id = PyObject_CallMethod(handle_obj, "get_obj_id", NULL); if(PyDict_GetItem(handle_dict, obj_id)!=NULL) { RETURN_EXCEPT("Should not register the same mutable python object once more."); } PyDict_SetItem(handle_dict, obj_id, handle_obj); Py_DECREF(handle_dict); Py_DECREF(obj_id); } return handle_cap; } /*generate empty Numpy array*/ PyObject* starpupy_numpy_register_wrapper(PyObject *self, PyObject *args) { #ifdef STARPU_PYTHON_HAVE_NUMPY /*get the first argument*/ PyObject *dimobj = PyTuple_GetItem(args, 0); /*protect borrowed reference, decrement after check*/ Py_INCREF(dimobj); /*detect whether user provides dtype or not*/ int ndim; npy_intp *dim; /*if the first argument is integer, it's an array one dimension*/ if(PyLong_Check(dimobj)) { ndim = 1; dim = (npy_intp*)malloc(ndim*sizeof(npy_intp)); dim[0] = PyLong_AsLong(dimobj); } /*if the first argument is a tuple, it contains information of dimension*/ else if(PyTuple_Check(dimobj)) { ndim = PyTuple_Size(dimobj); dim = (npy_intp*)malloc(ndim*sizeof(npy_intp)); int i; for (i=0; ihandle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); if (handle == (void*)-1) { RETURN_EXCEPT("Handle has already been unregistered"); } int ret; /*call starpu_data_acquire*/ Py_BEGIN_ALLOW_THREADS ret= starpu_data_acquire(handle, STARPU_R); Py_END_ALLOW_THREADS if (ret!=0) { RETURN_EXCEPT("Unexpected value %d returned for starpu_data_acquire", ret); } PyObject *obj = NULL; if (STARPUPY_PYOBJ_CHECK(handle)) { struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); obj = STARPUPY_GET_PYOBJECT(pyobject_interface); } if (STARPUPY_BUF_CHECK(handle)) { struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); obj = STARPUPY_BUF_GET_PYOBJECT(pybuffer_interface); } /*call starpu_data_release method*/ Py_BEGIN_ALLOW_THREADS starpu_data_release(handle); Py_END_ALLOW_THREADS if(obj == NULL) { RETURN_EXCEPT("Unexpected PyObject value NULL returned for get()"); } return obj; } PyObject *starpupy_handle_dict_check(PyObject *obj, char* mode, char* op) { (void)mode; /*get handle_dict*/ PyObject *handle_dict = PyObject_GetAttrString(starpu_module, "handle_dict"); /*get the arg id*/ PyObject *obj_id = PyLong_FromVoidPtr(obj); //XXX in CPython, the pointer of object can be treated as it's id, in other implementation, it may be realised by other ways PyObject *handle_obj = NULL; if (strcmp(op, "register") == 0) { /*check whether the arg is already registered*/ if(PyDict_GetItem(handle_dict, obj_id)==NULL) { PyObject *Handle_class = PyDict_GetItemString(starpu_dict, "Handle"); /*get the constructor, decremented after being called*/ PyObject *pInstanceHandle = PyInstanceMethod_New(Handle_class); /*create a Null Handle object, decremented in the end of this if{}*/ PyObject *handle_arg = PyTuple_New(1); /*obj is used for PyTuple_SetItem(handle_arg), once handle_arg is decremented, obj is decremented as well*/ Py_INCREF(obj); PyTuple_SetItem(handle_arg, 0, obj); /*generate the handle object, decremented in the end of this function*/ handle_obj = PyObject_CallObject(pInstanceHandle,handle_arg); /*set the arg_id and handle in handle_dict*/ PyDict_SetItem(handle_dict, obj_id, handle_obj); Py_DECREF(pInstanceHandle); Py_DECREF(handle_arg); } else { handle_obj = PyDict_GetItem(handle_dict, obj_id); /*protect borrowed reference, decremented in the end of this function*/ Py_INCREF(handle_obj); } } else if (strcmp(op, "exception") == 0) { /*check in handle_dict whether this arg is already registered*/ if(!PyDict_Contains(handle_dict, obj_id)) { RETURN_EXCEPTION("Argument does not have registered handle"); } /*get the corresponding handle of the obj*/ handle_obj = PyDict_GetItem(handle_dict, obj_id); /*protect borrowed reference, decremented in the end of this function*/ Py_INCREF(handle_obj); } Py_DECREF(handle_dict); Py_DECREF(obj_id); /*get Handle capsule object, which is the return value of this function*/ PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); Py_DECREF(handle_obj); return handle_cap; } /*acquire Handle*/ PyObject *starpupy_acquire_handle_wrapper(PyObject *self, PyObject *args) { PyObject *handle_cap; PyObject *pyMode; if (!PyArg_ParseTuple(args, "OO", &handle_cap, &pyMode)) return NULL; const char* mode_str = PyUnicode_AsUTF8(pyMode); char* obj_mode = strdup(mode_str); /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); if (handle == (void*)-1) { RETURN_EXCEPT("Handle has already been unregistered"); } int ret=0; if(strcmp(obj_mode, "R") == 0) { /*call starpu_data_acquire(STARPU_R)*/ Py_BEGIN_ALLOW_THREADS ret= starpu_data_acquire(handle, STARPU_R); Py_END_ALLOW_THREADS } if(strcmp(obj_mode, "W") == 0) { /*call starpu_data_acquire(STARPU_W)*/ Py_BEGIN_ALLOW_THREADS ret= starpu_data_acquire(handle, STARPU_W); Py_END_ALLOW_THREADS } if(strcmp(obj_mode, "RW") == 0) { /*call starpu_data_acquire(STARPU_RW)*/ Py_BEGIN_ALLOW_THREADS ret= starpu_data_acquire(handle, STARPU_RW); Py_END_ALLOW_THREADS } free(obj_mode); if (ret!=0) { RETURN_EXCEPT("Unexpected value returned for starpu_data_acquire"); } PyObject *obj = NULL; if (STARPUPY_PYOBJ_CHECK(handle)) { struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); obj = STARPUPY_GET_PYOBJECT(pyobject_interface); } if (STARPUPY_BUF_CHECK(handle)) { struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); obj = STARPUPY_BUF_GET_PYOBJECT(pybuffer_interface); } return obj; } /*acquire PyObject Handle*/ PyObject *starpupy_acquire_object_wrapper(PyObject *self, PyObject *args) { PyObject *obj; PyObject *pyMode; if (!PyArg_ParseTuple(args, "OO", &obj, &pyMode)) return NULL; const char* mode_str = PyUnicode_AsUTF8(pyMode); char* obj_mode = strdup(mode_str); /*get the corresponding handle capsule of the obj*/ PyObject *handle_cap = starpupy_handle_dict_check(obj, NULL, "register"); /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); Py_DECREF(handle_cap); int ret=0; if(strcmp(obj_mode, "R") == 0) { /*call starpu_data_acquire(STARPU_R)*/ Py_BEGIN_ALLOW_THREADS ret= starpu_data_acquire(handle, STARPU_R); Py_END_ALLOW_THREADS } if(strcmp(obj_mode, "W") == 0) { /*call starpu_data_acquire(STARPU_W)*/ Py_BEGIN_ALLOW_THREADS ret= starpu_data_acquire(handle, STARPU_W); Py_END_ALLOW_THREADS } if(strcmp(obj_mode, "RW") == 0) { /*call starpu_data_acquire(STARPU_RW)*/ Py_BEGIN_ALLOW_THREADS ret= starpu_data_acquire(handle, STARPU_RW); Py_END_ALLOW_THREADS } free(obj_mode); if (ret!=0) { RETURN_EXCEPT("Unexpected value returned for starpu_data_acquire"); } PyObject *obj_get = NULL; if (STARPUPY_PYOBJ_CHECK(handle)) { struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); obj_get = STARPUPY_GET_PYOBJECT(pyobject_interface); } if (STARPUPY_BUF_CHECK(handle)) { struct starpupy_buffer_interface *pybuffer_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); obj_get = STARPUPY_BUF_GET_PYOBJECT(pybuffer_interface); } return obj_get; } /*release Handle*/ PyObject *starpupy_release_handle_wrapper(PyObject *self, PyObject *args) { PyObject *handle_cap; if (!PyArg_ParseTuple(args, "O", &handle_cap)) return NULL; /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); if (handle == (void*)-1) { RETURN_EXCEPT("Handle has already been unregistered"); } if (!STARPUPY_BUF_CHECK(handle)) { RETURN_EXCEPT("Wrong interface is used"); } /*call starpu_data_release method*/ Py_BEGIN_ALLOW_THREADS starpu_data_release(handle); Py_END_ALLOW_THREADS /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /*release PyObejct Handle*/ PyObject *starpupy_release_object_wrapper(PyObject *self, PyObject *args) { (void)self; PyObject *obj; if (!PyArg_ParseTuple(args, "O", &obj)) return NULL; /*get the corresponding handle capsule of the obj*/ PyObject *handle_cap = starpupy_handle_dict_check(obj, NULL, "exception"); if(handle_cap == NULL) { Py_XDECREF(handle_cap); return NULL; } /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); Py_DECREF(handle_cap); /*call starpu_data_release method*/ Py_BEGIN_ALLOW_THREADS starpu_data_release(handle); Py_END_ALLOW_THREADS /*return type is void*/ Py_INCREF(Py_None); return Py_None; } static void starpupy_remove_handle_from_dict(PyObject *obj_id) { /*delete object from handle_dict*/ PyObject *handle_dict = PyObject_GetAttrString(starpu_module, "handle_dict"); if(PyDict_GetItem(handle_dict, obj_id) != NULL) { PyDict_DelItem(handle_dict, obj_id); } Py_DECREF(handle_dict); } static void starpupy_remove_handle_from_set(PyObject *handle_obj) { /*delete object from handle_set*/ PyObject *handle_set = PyObject_GetAttrString(starpu_module, "handle_set"); PySet_Discard(handle_set, handle_obj); Py_DECREF(handle_set); } /* unregister handle*/ PyObject *starpupy_data_unregister_wrapper(PyObject *self, PyObject *args) { PyObject *handle_obj; if (!PyArg_ParseTuple(args, "O", &handle_obj)) return NULL; /*get the handle capsule*/ PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); /*get the id of arg*/ PyObject *obj_id = PyObject_CallMethod(handle_obj, "get_obj_id", NULL); /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); if (handle == (void*)-1) { RETURN_EXCEPT("Handle has already been unregistered"); } /*call starpu_data_unregister method*/ Py_BEGIN_ALLOW_THREADS starpu_data_unregister(handle); Py_END_ALLOW_THREADS PyCapsule_SetPointer(handle_cap, (void*)-1); starpupy_remove_handle_from_dict(obj_id); starpupy_remove_handle_from_set(handle_obj); Py_DECREF(handle_cap); Py_DECREF(obj_id); /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /* unregister PyObject handle*/ PyObject *starpupy_data_unregister_object_wrapper(PyObject *self, PyObject *args) { PyObject *obj; if (!PyArg_ParseTuple(args, "O", &obj)) return NULL; /*get the corresponding handle capsule of the obj*/ PyObject *handle_cap = starpupy_handle_dict_check(obj, NULL, "exception"); /*get the id of obj*/ PyObject *obj_id = PyLong_FromVoidPtr(obj); if(handle_cap == NULL) { Py_XDECREF(handle_cap); return NULL; } /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); if (handle == (void*)-1) { RETURN_EXCEPT("Handle has already been unregistered"); } /*call starpu_data_unregister method*/ Py_BEGIN_ALLOW_THREADS starpu_data_unregister(handle); Py_END_ALLOW_THREADS PyCapsule_SetPointer(handle_cap, (void*)-1); starpupy_remove_handle_from_dict(obj_id); Py_DECREF(handle_cap); Py_DECREF(obj_id); /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /* unregister_submit handle*/ PyObject *starpupy_data_unregister_submit_wrapper(PyObject *self, PyObject *args) { PyObject *handle_obj; if (!PyArg_ParseTuple(args, "O", &handle_obj)) return NULL; /*get the handle capsule*/ PyObject *handle_cap = PyObject_CallMethod(handle_obj, "get_capsule", NULL); /*get the id of arg*/ PyObject *obj_id = PyObject_CallMethod(handle_obj, "get_obj_id", NULL); /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); if (handle == (void*)-1) { RETURN_EXCEPT("Handle has already been unregistered"); } /*call starpu_data_unregister method*/ Py_BEGIN_ALLOW_THREADS starpu_data_unregister_submit(handle); Py_END_ALLOW_THREADS PyCapsule_SetPointer(handle_cap, (void*)-1); starpupy_remove_handle_from_dict(obj_id); starpupy_remove_handle_from_set(handle_obj); Py_DECREF(handle_cap); Py_DECREF(obj_id); /*return type is void*/ Py_INCREF(Py_None); return Py_None; } /* unregister_submit PyObject handle*/ PyObject *starpupy_data_unregister_submit_object_wrapper(PyObject *self, PyObject *args) { PyObject *obj; if (!PyArg_ParseTuple(args, "O", &obj)) return NULL; /*get the corresponding handle capsule of the obj*/ PyObject *handle_cap = starpupy_handle_dict_check(obj, NULL, "exception"); /*get the id of obj*/ PyObject *obj_id = PyLong_FromVoidPtr(obj); if(handle_cap == NULL) { Py_XDECREF(handle_cap); return NULL; } /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_cap, "Handle"); if (handle == (void*)-1) { PyErr_Format(PyObject_GetAttrString(self, "error"), "Handle has already been unregistered"); return NULL; } /*call starpu_data_unregister method*/ Py_BEGIN_ALLOW_THREADS starpu_data_unregister_submit(handle); Py_END_ALLOW_THREADS PyCapsule_SetPointer(handle_cap, (void*)-1); starpupy_remove_handle_from_dict(obj_id); Py_DECREF(handle_cap); Py_DECREF(obj_id); /*return type is void*/ Py_INCREF(Py_None); return Py_None; } starpu-1.4.9+dfsg/starpupy/src/starpupy_handle.h000066400000000000000000000034151507764646700220440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define PY_SSIZE_T_CLEAN #include extern PyObject *starpu_module; /*starpu __init__ module*/ extern PyObject *starpu_dict; /*starpu __init__ dictionary*/ PyObject *starpupy_handle_dict_check(PyObject *obj, char* mode, char* op); PyObject *starpupy_data_register_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_numpy_register_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_get_object_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_acquire_handle_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_acquire_object_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_release_handle_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_release_object_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_data_unregister_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_data_unregister_object_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_data_unregister_submit_wrapper(PyObject *self, PyObject *args); PyObject *starpupy_data_unregister_submit_object_wrapper(PyObject *self, PyObject *args); starpu-1.4.9+dfsg/starpupy/src/starpupy_interface.c000066400000000000000000000231211507764646700225400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #undef NDEBUG #include #define PY_SSIZE_T_CLEAN #include #ifdef STARPU_PYTHON_HAVE_NUMPY #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include #endif #include "starpupy_interface.h" void starpupy_set_pyobject(struct starpupyobject_interface *pyobject_interface, PyObject *value) { if (pyobject_interface->object != NULL) Py_DECREF(pyobject_interface->object); pyobject_interface->object = value; } static void pyobject_register_data_handle(starpu_data_handle_t handle, int home_node, void *data_interface) { struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) data_interface; int node; for (node =0; node < STARPU_MAXNODES; node++) { struct starpupyobject_interface *local_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, node); local_interface->id = pyobject_interface->id; if (node == home_node) { Py_INCREF(pyobject_interface->object); local_interface->object = pyobject_interface->object; } else { local_interface->object = NULL; } } } static void pyobject_unregister_data_handle(starpu_data_handle_t handle) { /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); int node = starpu_data_get_home_node(handle); if (node >= 0) { struct starpupyobject_interface *local_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, node); Py_DECREF(local_interface->object); local_interface->object = NULL; } /* release GIL */ PyGILState_Release(state); } static starpu_ssize_t pyobject_allocate_data_on_node(void *data_interface, unsigned node) { (void)node; (void)data_interface; return 0; } static void pyobject_free_data_on_node(void *data_interface, unsigned node) { (void)node; /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) data_interface; if (pyobject_interface->object != NULL) { Py_DECREF(pyobject_interface->object); } pyobject_interface->object = NULL; /* release GIL */ PyGILState_Release(state); } static size_t pyobject_get_size(starpu_data_handle_t handle) { #ifdef STARPU_DEVEL #warning this operation is needed for fxt tracing when calling starpu_data_register(), using the cloudpickle as below does not seem to work #endif (void)handle; return sizeof(struct starpupyobject_interface); } /*return the reference of PyBytes which must be kept while using obj_data. See documentation of PyBytes_AsStringAndSize()*/ static PyObject * _pyobject_pack_data(struct starpupyobject_interface *pyobject_interface, char **obj_data, Py_ssize_t *obj_data_size) { /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); /*borrow the reference from the interface*/ PyObject *obj = pyobject_interface->object; PyObject *cloudpickle_module = PyImport_ImportModule("cloudpickle"); if (cloudpickle_module == NULL) { printf("can't find cloudpickle module\n"); Py_XDECREF(cloudpickle_module); exit(1); } PyObject *dumps = PyObject_GetAttrString(cloudpickle_module, "dumps"); PyObject *obj_bytes = PyObject_CallFunctionObjArgs(dumps, obj, NULL); PyBytes_AsStringAndSize(obj_bytes, obj_data, obj_data_size); Py_DECREF(cloudpickle_module); Py_DECREF(dumps); /*restore previous GIL state*/ PyGILState_Release(state); return obj_bytes; } static int pyobject_pack_data(starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count) { struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, node); PyObject *obj_bytes; char *obj_data; Py_ssize_t obj_data_size; /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); obj_bytes = _pyobject_pack_data(pyobject_interface, &obj_data, &obj_data_size); char *data; data = (void*)starpu_malloc_on_node_flags(node, obj_data_size, 0); memcpy(data, obj_data, obj_data_size); *ptr = data; *count = obj_data_size; Py_DECREF(obj_bytes); /* release GIL */ PyGILState_Release(state); return 0; } static int _pyobject_peek_data(struct starpupyobject_interface *pyobject_interface, unsigned node, void *ptr, size_t count) { (void)node; /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); char *data = ptr; PyObject *pickle_module = PyImport_ImportModule("pickle"); if (pickle_module == NULL) { printf("can't find pickle module\n"); Py_XDECREF(pickle_module); exit(1); } PyObject *loads = PyObject_GetAttrString(pickle_module, "loads"); /* TODO: should tell python that we want allocation to happen on node \p node */ PyObject *obj_bytes_str = PyBytes_FromStringAndSize(data, count); PyObject *obj= PyObject_CallFunctionObjArgs(loads, obj_bytes_str, NULL); if(pyobject_interface->object != NULL) Py_DECREF(pyobject_interface->object); pyobject_interface->object = obj; pyobject_interface->id = _starpupy_interface_pyobject_ops.interfaceid; Py_DECREF(pickle_module); Py_DECREF(loads); Py_DECREF(obj_bytes_str); /*restore previous GIL state*/ PyGILState_Release(state); return 0; } static int pyobject_peek_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, node); if (pyobject_interface->object != NULL) { Py_DECREF(pyobject_interface->object); } return _pyobject_peek_data(pyobject_interface, node, ptr, count); } static int pyobject_unpack_data(starpu_data_handle_t handle, unsigned node, void *ptr, size_t count) { pyobject_peek_data(handle, node, ptr, count); starpu_free_on_node_flags(node, (uintptr_t) ptr, count, 0); return 0; } static uint32_t starpupy_footprint(starpu_data_handle_t handle) { struct starpupyobject_interface *pyobject_interface = (struct starpupyobject_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); /*borrow the reference from the interface*/ PyObject *obj = pyobject_interface->object; /*fet obj.__class__*/ PyObject *obj_class=PyObject_GetAttrString(obj,"__class__"); // PyObject_Print(obj_class, stdout, 0); // printf("\n"); uint32_t crc = 0; crc=starpu_hash_crc32c_be_ptr(obj_class, crc); Py_DECREF(obj_class); #ifdef STARPU_PYTHON_HAVE_NUMPY const char *tp = Py_TYPE(obj)->tp_name; /*if the object is a numpy array*/ if (strcmp(tp, "numpy.ndarray")==0) { import_array1(0); /*get the array size*/ int n1 = PyArray_SIZE((PyArrayObject *)obj); /*get the item size*/ int n2 = PyArray_ITEMSIZE((const PyArrayObject *)obj); crc=starpu_hash_crc32c_be(n1, crc); crc=starpu_hash_crc32c_be(n2, crc); } else #endif { crc=starpu_hash_crc32c_be_ptr(obj, crc); } /*restore previous GIL state*/ PyGILState_Release(state); return crc; } static int pyobject_copy_ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { (void)src_node; struct starpupyobject_interface *src = (struct starpupyobject_interface *) src_interface; struct starpupyobject_interface *dst = (struct starpupyobject_interface *) dst_interface; PyObject *obj_bytes; char *obj_data; Py_ssize_t obj_data_size; /*make sure we own the GIL*/ PyGILState_STATE state = PyGILState_Ensure(); obj_bytes = _pyobject_pack_data(src, &obj_data, &obj_data_size); _pyobject_peek_data(dst, dst_node, obj_data, obj_data_size); Py_DECREF(obj_bytes); /* release GIL */ PyGILState_Release(state); return 0; } static const struct starpu_data_copy_methods pyobject_copy_data_methods_s = { .ram_to_ram = pyobject_copy_ram_to_ram, }; struct starpu_data_interface_ops _starpupy_interface_pyobject_ops = { .register_data_handle = pyobject_register_data_handle, .unregister_data_handle = pyobject_unregister_data_handle, .allocate_data_on_node = pyobject_allocate_data_on_node, .free_data_on_node = pyobject_free_data_on_node, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct starpupyobject_interface), .footprint = starpupy_footprint, .pack_data = pyobject_pack_data, .peek_data = pyobject_peek_data, .unpack_data = pyobject_unpack_data, .get_size = pyobject_get_size, .dontcache = 1, .name = "STARPUPY_OBJECT_INTERFACE", .copy_methods = &pyobject_copy_data_methods_s, }; void starpupy_data_register(starpu_data_handle_t *handleptr, unsigned home_node, PyObject *obj) { assert(_starpupy_interface_pyobject_ops.interfaceid != STARPU_UNKNOWN_INTERFACE_ID); struct starpupyobject_interface pyobject_interface = { .id = _starpupy_interface_pyobject_ops.interfaceid, .object = obj }; starpu_data_register(handleptr, home_node, &pyobject_interface, &_starpupy_interface_pyobject_ops); } int starpupy_check_pyobject_interface_id(starpu_data_handle_t handle) { int interfaceid = (int)starpu_data_get_interface_id(handle); return interfaceid == _starpupy_interface_pyobject_ops.interfaceid; } starpu-1.4.9+dfsg/starpupy/src/starpupy_interface.h000066400000000000000000000033411507764646700225470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #define PY_SSIZE_T_CLEAN #include #include extern struct starpu_data_interface_ops _starpupy_interface_pyobject_ops; struct starpupyobject_interface { int id; /**< Identifier of the interface */ PyObject *object; }; void starpupy_data_register(starpu_data_handle_t *handleptr, unsigned home_node, PyObject *obj); int starpupy_check_pyobject_interface_id(starpu_data_handle_t handle); /* Steals a reference to value */ void starpupy_set_pyobject(struct starpupyobject_interface *pyobject_interface, PyObject *value); #define STARPUPY_PYOBJ_CHECK(handle) (starpupy_check_pyobject_interface_id(handle)) #define STARPUPY_PYOBJ_CHECK_INTERFACE(interface) (((struct starpupyobject_interface *)(interface))->id == _starpupy_interface_pyobject_ops.interfaceid) #define STARPUPY_GET_PYOBJECT(interface) (Py_INCREF(((struct starpupyobject_interface *)(interface))->object), ((struct starpupyobject_interface *)(interface))->object) #define STARPUPY_SET_PYOBJECT(interface, value) (starpupy_set_pyobject(interface, value)) starpu-1.4.9+dfsg/starpupy/src/starpupy_numpy_filters.c000066400000000000000000000165761507764646700235200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #undef NDEBUG #include #define PY_SSIZE_T_CLEAN #include #ifdef STARPU_PYTHON_HAVE_NUMPY #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include #endif #include "starpupy_buffer_interface.h" #include "starpupy_numpy_filters.h" static void starpupy_numpy_filter(void *father_interface, void *child_interface, STARPU_ATTRIBUTE_UNUSED struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpupy_buffer_interface *buffer_father = (struct starpupy_buffer_interface *) father_interface; struct starpupy_buffer_interface *buffer_child = (struct starpupy_buffer_interface *) child_interface; size_t elemsize = buffer_father->item_size; STARPU_ASSERT_MSG(buffer_father->id == _starpupy_interface_pybuffer_ops.interfaceid, "%s can only be applied on a vector data", __func__); /*get the ndim*/ int ndim = buffer_father->dim_size; #ifdef STARPU_PYTHON_HAVE_NUMPY Py_ssize_t nbuf = buffer_father->buffer_size; int narr = nbuf/elemsize; int child_narr; size_t offset; int dim = f->filter_arg; unsigned ni[ndim]; int i; for (i=0; iarray_dim[i]; } unsigned nn = ni[dim]; unsigned ld; if (dim == 0 && ndim != 1) { ld = ni[1]; } else if (dim == 1 || ndim == 1) { ld = 1; } else { ld = 1; for (i=0; ifilter_arg_ptr; if (chunks_list != NULL) { child_nn = chunks_list[id]; unsigned chunk_nn = 0; unsigned j = 0; while(j < id) { chunk_nn = chunk_nn + chunks_list[j]; j++; } offset = chunk_nn * ld * elemsize; } else { starpu_filter_nparts_compute_chunk_size_and_offset(nn, nchunks, elemsize, id, ld, &child_nn, &offset); } child_narr = narr/nn*child_nn; if(buffer_father->py_buffer) buffer_child->py_buffer = buffer_father->py_buffer + offset; buffer_child->buffer_size = child_narr * elemsize; npy_intp *child_dim; child_dim = (npy_intp*)malloc(ndim*sizeof(npy_intp)); for (i=0; iarray_dim = child_dim; #endif buffer_child->id = buffer_father->id; buffer_child->buffer_type = buffer_father->buffer_type; buffer_child->dim_size = ndim; buffer_child->array_type = buffer_father->array_type; buffer_child->item_size = elemsize; } /*wrapper data partition*/ PyObject* starpu_data_partition_wrapper(PyObject *self, PyObject *args) { PyObject *handle_obj; int nparts; int dim; PyObject *chunks_list; if (!PyArg_ParseTuple(args, "OIIO", &handle_obj, &nparts, &dim, &chunks_list)) return NULL; /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_obj, "Handle"); if (handle == (void*)-1) { RETURN_EXCEPT("Handle has already been unregistered"); } int node = starpu_data_get_home_node(handle); struct starpupy_buffer_interface *local_interface = (struct starpupy_buffer_interface *) starpu_data_get_interface_on_node(handle, node); int ndim = local_interface->dim_size; if (ndim <= 0) { RETURN_EXCEPT("Dimension size %d must be greater than 0.", ndim); } if (dim < 0) { RETURN_EXCEPT("The given dimension dim %d must not be less than 0.", dim); } if (dim >= ndim) { RETURN_EXCEPT("dim %d must be less than dimension size %d.", dim, ndim); } int i; int dim_len = 0; int nlist = PyList_Size(chunks_list); int nchunks[nparts]; if(nlist != 0) { if (nlist != nparts) { RETURN_EXCEPT("The chunk list size %d does not correspond to the required split size %d.", nlist, nparts); } for (i=0; iarray_dim[dim]) { RETURN_EXCEPT("The total length of segments in chunk list %d must be equal to the length of selected dimension %d.", dim_len, local_interface->array_dim[dim]); } #endif } /*filter func*/ struct starpu_data_filter f; starpu_data_handle_t handles[nparts]; f.filter_func = starpupy_numpy_filter; f.nchildren = nparts; f.get_nchildren = 0; f.get_child_ops = 0; f.filter_arg_ptr = (nlist==0) ? NULL : nchunks; /* partition along the given dimension */ f.filter_arg = dim; Py_BEGIN_ALLOW_THREADS starpu_data_partition_plan(handle, &f, handles); Py_END_ALLOW_THREADS PyObject *handle_list = PyList_New(nparts); for(i=0; ihandle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_obj, "Handle"); if (handle == (void*)-1) { RETURN_EXCEPT("Handle has already been unregistered"); } PyObject *arr_size = PyList_New(nparts); int i; for(i=0; ibuffer_size/local_interface->item_size; PyList_SetItem(arr_size, i, Py_BuildValue("I", narr)); Py_DECREF(handles_cap); } return arr_size; } /*wrapper data unpartition*/ PyObject* starpu_data_unpartition_wrapper(PyObject *self, PyObject *args) { PyObject *handle_obj; PyObject *handle_list; int nparts; if (!PyArg_ParseTuple(args, "OOI", &handle_obj, &handle_list, &nparts)) return NULL; /*PyObject *->handle*/ starpu_data_handle_t handle = (starpu_data_handle_t) PyCapsule_GetPointer(handle_obj, "Handle"); if (handle == (void*)-1) { RETURN_EXCEPT("Handle has already been unregistered"); } starpu_data_handle_t handles[nparts]; int i; for(i=0; i #define PY_SSIZE_T_CLEAN #include PyObject* starpu_data_partition_wrapper(PyObject *self, PyObject *args); PyObject* starpupy_get_partition_size_wrapper(PyObject *self, PyObject *args); PyObject* starpu_data_unpartition_wrapper(PyObject *self, PyObject *args); starpu-1.4.9+dfsg/starpupy/src/starpupy_private.h000066400000000000000000000024421507764646700222620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2023-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPUPY__PRIVATE_H #define __STARPUPY__PRIVATE_H #define RETURN_EXCEPT(...) do{ \ PyObject *starpupy_err = PyObject_GetAttrString(self, "error"); \ PyErr_Format(starpupy_err, __VA_ARGS__); \ Py_DECREF(starpupy_err); \ return NULL;\ }while(0) #define RETURN_EXCEPTION(...) do{ \ PyObject *starpupy_module = PyObject_GetAttrString(starpu_module, "starpupy"); \ PyObject *starpupy_err = PyObject_GetAttrString(starpupy_module, "error"); \ PyErr_Format(starpupy_err, __VA_ARGS__); \ Py_DECREF(starpupy_module); \ Py_DECREF(starpupy_err); \ return NULL;\ }while(0) #endif // __STARPUPY__PRIVATE_H starpu-1.4.9+dfsg/starpurm/000077500000000000000000000000001507764646700156655ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpurm/Makefile.am000066400000000000000000000020541507764646700177220ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-subdirtests.mk SUBDIRS = src SUBDIRS += tests if STARPU_BUILD_STARPURM_EXAMPLES SUBDIRS += examples endif pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = packages/starpurm-1.3.pc versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = \ include/starpurm.h nodist_versinclude_HEADERS = \ include/starpurm_config.h starpu-1.4.9+dfsg/starpurm/Makefile.in000066400000000000000000000776251507764646700177530ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_BUILD_STARPURM_EXAMPLES_TRUE@am__append_1 = examples subdir = starpurm ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(versinclude_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(pkgconfigdir)" \ "$(DESTDIR)$(versincludedir)" "$(DESTDIR)$(versincludedir)" DATA = $(pkgconfig_DATA) HEADERS = $(nodist_versinclude_HEADERS) $(versinclude_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = src tests examples am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/make/starpu-subdirtests.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = src tests $(am__append_1) pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = packages/starpurm-1.3.pc versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION) versinclude_HEADERS = \ include/starpurm.h nodist_versinclude_HEADERS = \ include/starpurm_config.h all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-subdirtests.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpurm/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpurm/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-subdirtests.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-pkgconfigDATA: $(pkgconfig_DATA) @$(NORMAL_INSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ done uninstall-pkgconfigDATA: @$(NORMAL_UNINSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) install-nodist_versincludeHEADERS: $(nodist_versinclude_HEADERS) @$(NORMAL_INSTALL) @list='$(nodist_versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ done uninstall-nodist_versincludeHEADERS: @$(NORMAL_UNINSTALL) @list='$(nodist_versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) install-versincludeHEADERS: $(versinclude_HEADERS) @$(NORMAL_INSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(versincludedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(versincludedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(versincludedir)'"; \ $(INSTALL_HEADER) $$files "$(DESTDIR)$(versincludedir)" || exit $$?; \ done uninstall-versincludeHEADERS: @$(NORMAL_UNINSTALL) @list='$(versinclude_HEADERS)'; test -n "$(versincludedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(versincludedir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(DATA) $(HEADERS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(versincludedir)" "$(DESTDIR)$(versincludedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-nodist_versincludeHEADERS \ install-pkgconfigDATA install-versincludeHEADERS install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-nodist_versincludeHEADERS \ uninstall-pkgconfigDATA uninstall-versincludeHEADERS .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ check-am clean clean-generic clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-nodist_versincludeHEADERS install-pdf install-pdf-am \ install-pkgconfigDATA install-ps install-ps-am install-strip \ install-versincludeHEADERS installcheck installcheck-am \ installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ uninstall-am uninstall-nodist_versincludeHEADERS \ uninstall-pkgconfigDATA uninstall-versincludeHEADERS .PRECIOUS: Makefile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # recheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i recheck || RET=1 ; \ done ; \ exit $$RET showcheckfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpurm/examples/000077500000000000000000000000001507764646700175035ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpurm/examples/Makefile.am000066400000000000000000000030741507764646700215430ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CFLAGS += $(DLB_CFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_builddir)/include AM_CPPFLAGS += -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src -I$(top_builddir)/starpurm/src -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(HWLOC_LIBS) $(DLB_LIBS) EXTRA_DIST = \ chameleon/dgemm.c \ cuda_vector_scale/vector_scale.c \ cuda_vector_scale/vs_cuda_kernel.cu examplebindir = $(libdir)/starpu/examples/starpurm examplebin_PROGRAMS = $(STARPU_EXAMPLES) TESTS = $(STARPU_EXAMPLES) STARPU_EXAMPLES = \ async_spawn \ spawn \ vector_scale \ block_test/block_test starpu-1.4.9+dfsg/starpurm/examples/Makefile.in000066400000000000000000001604261507764646700215610ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ examplebin_PROGRAMS = $(am__EXEEXT_1) TESTS = $(am__EXEEXT_1) subdir = starpurm/examples ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__EXEEXT_1 = async_spawn$(EXEEXT) spawn$(EXEEXT) \ vector_scale$(EXEEXT) block_test/block_test$(EXEEXT) am__installdirs = "$(DESTDIR)$(examplebindir)" PROGRAMS = $(examplebin_PROGRAMS) async_spawn_SOURCES = async_spawn.c async_spawn_OBJECTS = async_spawn.$(OBJEXT) async_spawn_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = block_test_block_test_SOURCES = block_test/block_test.c am__dirstamp = $(am__leading_dot)dirstamp block_test_block_test_OBJECTS = block_test/block_test.$(OBJEXT) block_test_block_test_LDADD = $(LDADD) spawn_SOURCES = spawn.c spawn_OBJECTS = spawn.$(OBJEXT) spawn_LDADD = $(LDADD) vector_scale_SOURCES = vector_scale.c vector_scale_OBJECTS = vector_scale.$(OBJEXT) vector_scale_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/async_spawn.Po ./$(DEPDIR)/spawn.Po \ ./$(DEPDIR)/vector_scale.Po block_test/$(DEPDIR)/block_test.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = async_spawn.c block_test/block_test.c spawn.c vector_scale.c DIST_SOURCES = async_spawn.c block_test/block_test.c spawn.c \ vector_scale.c RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ check recheck distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ \ $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la \ $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) \ $(HWLOC_LIBS) $(DLB_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = LAUNCHER = AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(DLB_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src \ -I$(top_builddir)/src -I$(top_builddir)/include \ -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src \ -I$(top_builddir)/starpurm/src \ -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ EXTRA_DIST = \ chameleon/dgemm.c \ cuda_vector_scale/vector_scale.c \ cuda_vector_scale/vs_cuda_kernel.cu examplebindir = $(libdir)/starpu/examples/starpurm STARPU_EXAMPLES = \ async_spawn \ spawn \ vector_scale \ block_test/block_test all: all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpurm/examples/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpurm/examples/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list async_spawn$(EXEEXT): $(async_spawn_OBJECTS) $(async_spawn_DEPENDENCIES) $(EXTRA_async_spawn_DEPENDENCIES) @rm -f async_spawn$(EXEEXT) $(AM_V_CCLD)$(LINK) $(async_spawn_OBJECTS) $(async_spawn_LDADD) $(LIBS) block_test/$(am__dirstamp): @$(MKDIR_P) block_test @: > block_test/$(am__dirstamp) block_test/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) block_test/$(DEPDIR) @: > block_test/$(DEPDIR)/$(am__dirstamp) block_test/block_test.$(OBJEXT): block_test/$(am__dirstamp) \ block_test/$(DEPDIR)/$(am__dirstamp) block_test/block_test$(EXEEXT): $(block_test_block_test_OBJECTS) $(block_test_block_test_DEPENDENCIES) $(EXTRA_block_test_block_test_DEPENDENCIES) block_test/$(am__dirstamp) @rm -f block_test/block_test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(block_test_block_test_OBJECTS) $(block_test_block_test_LDADD) $(LIBS) spawn$(EXEEXT): $(spawn_OBJECTS) $(spawn_DEPENDENCIES) $(EXTRA_spawn_DEPENDENCIES) @rm -f spawn$(EXEEXT) $(AM_V_CCLD)$(LINK) $(spawn_OBJECTS) $(spawn_LDADD) $(LIBS) vector_scale$(EXEEXT): $(vector_scale_OBJECTS) $(vector_scale_DEPENDENCIES) $(EXTRA_vector_scale_DEPENDENCIES) @rm -f vector_scale$(EXEEXT) $(AM_V_CCLD)$(LINK) $(vector_scale_OBJECTS) $(vector_scale_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f block_test/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/async_spawn.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spawn.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vector_scale.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@block_test/$(DEPDIR)/block_test.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf block_test/.libs block_test/_libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? async_spawn.log: async_spawn$(EXEEXT) @p='async_spawn$(EXEEXT)'; \ b='async_spawn'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) spawn.log: spawn$(EXEEXT) @p='spawn$(EXEEXT)'; \ b='spawn'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) vector_scale.log: vector_scale$(EXEEXT) @p='vector_scale$(EXEEXT)'; \ b='vector_scale'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) block_test/block_test.log: block_test/block_test$(EXEEXT) @p='block_test/block_test$(EXEEXT)'; \ b='block_test/block_test'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-recursive all-am: Makefile $(PROGRAMS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(examplebindir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f block_test/$(DEPDIR)/$(am__dirstamp) -rm -f block_test/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-examplebinPROGRAMS clean-generic clean-libtool \ mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/async_spawn.Po -rm -f ./$(DEPDIR)/spawn.Po -rm -f ./$(DEPDIR)/vector_scale.Po -rm -f block_test/$(DEPDIR)/block_test.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-examplebinPROGRAMS install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/async_spawn.Po -rm -f ./$(DEPDIR)/spawn.Po -rm -f ./$(DEPDIR)/vector_scale.Po -rm -f block_test/$(DEPDIR)/block_test.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-examplebinPROGRAMS .MAKE: $(am__recursive_targets) check-am install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-TESTS check-am clean \ clean-examplebinPROGRAMS clean-generic clean-libtool \ cscopelist-am ctags ctags-am distclean distclean-compile \ distclean-generic distclean-libtool distclean-tags distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-examplebinPROGRAMS install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ recheck tags tags-am uninstall uninstall-am \ uninstall-examplebinPROGRAMS .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpurm/examples/async_spawn.c000066400000000000000000000175131507764646700222030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows a basic StarPU vector scale app on top of StarPURM, * making use of both the main RM API and the spawn_kernel_on_cpus API func */ #include #include #include #include #include #include #include static int rm_cpu_type_id = -1; static int rm_nb_cpu_units = 0; static void usage(void); static void test1(const int N); static void test2(const int N, const int task_mult); static void init_rm_infos(void); static unsigned spawn_pending = 0; static pthread_mutex_t spawn_pending_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t spawn_pending_cond; static void _inc_spawn_pending(void) { pthread_mutex_lock(&spawn_pending_mutex); assert(spawn_pending < UINT_MAX); spawn_pending++; pthread_mutex_unlock(&spawn_pending_mutex); } static void _dec_spawn_pending(void) { pthread_mutex_lock(&spawn_pending_mutex); assert(spawn_pending > 0); spawn_pending--; if (spawn_pending == 0) pthread_cond_broadcast(&spawn_pending_cond); pthread_mutex_unlock(&spawn_pending_mutex); } static void _wait_pending_spawns(void) { pthread_mutex_lock(&spawn_pending_mutex); while (spawn_pending > 0) pthread_cond_wait(&spawn_pending_cond, &spawn_pending_mutex); pthread_mutex_unlock(&spawn_pending_mutex); } static void spawn_callback(void *_arg) { assert(42 == (uintptr_t)_arg); _dec_spawn_pending(); } /* vector scale codelet */ static void vector_scale_func(void *cl_buffers[], void *cl_arg) { double scalar = -1.0; int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); int i; starpu_codelet_unpack_args(cl_arg, &scalar); int workerid = starpu_worker_get_id(); hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); { int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s\n", workerid, vector, n, scalar, str1); } hwloc_bitmap_free(worker_cpuset); for (i = 0; i < n; i++) { vector[i] *= scalar; } } static struct starpu_codelet vector_scale_cl = { .cpu_funcs = {vector_scale_func}, .nbuffers = 1 }; /* main routines */ static void usage(void) { fprintf(stderr, "usage: 05_vector_scale [VECTOR_SIZE]\n"); exit(1); } static void test1(const int N) { double *vector = NULL; const double scalar = 2.0; starpu_data_handle_t vector_handle; int ret; starpu_malloc((void **)&vector, N * sizeof(*vector)); { int i; for (i = 0; i < N; i++) { vector[i] = i; } } starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); ret = starpu_task_insert(&vector_scale_cl, STARPU_RW, vector_handle, STARPU_VALUE, &scalar, sizeof(scalar), 0); assert(ret == 0); starpu_task_wait_for_all(); starpu_data_unregister(vector_handle); { int i; for (i = 0; i < N; i++) { double d_i = i; if (vector[i] != d_i*scalar) { fprintf(stderr, "%s: check_failed\n", __func__); exit(1); } } } starpu_free_noflag(vector, N * sizeof(*vector)); } static void test2(const int N, const int task_mult) { double *vector = NULL; const double scalar = 3.0; starpu_data_handle_t vector_handle; int ret; starpu_malloc((void **)&vector, N * sizeof(*vector)); { int i; for (i = 0; i < N; i++) { vector[i] = i; } } starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); struct starpu_data_filter partition_filter = { .filter_func = starpu_vector_filter_block, .nchildren = rm_nb_cpu_units * task_mult }; starpu_data_partition(vector_handle, &partition_filter); { int i; for (i = 0; i < rm_nb_cpu_units*task_mult; i++) { starpu_data_handle_t sub_vector_handle = starpu_data_get_sub_data(vector_handle, 1, i); ret = starpu_task_insert(&vector_scale_cl, STARPU_RW, sub_vector_handle, STARPU_VALUE, &scalar, sizeof(scalar), 0); assert(ret == 0); } } starpu_task_wait_for_all(); starpu_data_unpartition(vector_handle, STARPU_MAIN_RAM); starpu_data_unregister(vector_handle); { int i; for (i = 0; i < N; i++) { double d_i = i; if (vector[i] != d_i*scalar) { fprintf(stderr, "%s: check_failed\n", __func__); exit(1); } } } starpu_free_noflag(vector, N * sizeof(*vector)); } static void init_rm_infos(void) { int cpu_type = starpurm_get_device_type_id("cpu"); int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); if (nb_cpu_units < 1) { /* No CPU unit available. */ exit(77); } rm_cpu_type_id = cpu_type; rm_nb_cpu_units = nb_cpu_units; } static void kernel_to_spawn(void *args) { int param_N = *(int*)args; //test1(param_N); test2(param_N, 1); //test2(param_N, 10); //test2(param_N, 100); } int main(int argc, char *argv[]) { pthread_cond_init(&spawn_pending_cond, NULL); int param_N = 1000000; int drs_enabled; if (argc > 1) { param_N = atoi(argv[1]); if (param_N < 1) { usage(); } } hwloc_cpuset_t init_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(init_cpuset, 0, 7); starpurm_initialize_with_cpuset(init_cpuset); hwloc_bitmap_free(init_cpuset); init_rm_infos(); if (rm_nb_cpu_units > 1) { const int half_nb_cpus = rm_nb_cpu_units/2; starpurm_set_drs_enable(NULL); drs_enabled = starpurm_drs_enabled_p(); assert(drs_enabled != 0); int repeat; for (repeat=0; repeat < 20; repeat++) { hwloc_cpuset_t cpu_cpuset = starpurm_get_all_cpu_workers_cpuset(); { int strl1 = hwloc_bitmap_snprintf(NULL, 0, cpu_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, cpu_cpuset); printf("all cpus cpuset = %s\n", str1); } int first_idx = hwloc_bitmap_first(cpu_cpuset); int last_idx = hwloc_bitmap_last(cpu_cpuset); hwloc_cpuset_t sel_cpuset = hwloc_bitmap_alloc(); assert(sel_cpuset != NULL); int count = 0; int idx = first_idx; while (idx != -1 && idx <= last_idx && count < half_nb_cpus) { if (hwloc_bitmap_isset(cpu_cpuset, idx)) { hwloc_bitmap_set(sel_cpuset, idx); count ++; } idx = hwloc_bitmap_next(cpu_cpuset, idx); } assert(count == half_nb_cpus); { int strl1 = hwloc_bitmap_snprintf(NULL, 0, sel_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, sel_cpuset); printf("spawning a kernel on cpuset = %s\n", str1); } _inc_spawn_pending(); starpurm_spawn_kernel_on_cpus_callback(NULL, kernel_to_spawn, ¶m_N, sel_cpuset, spawn_callback, (void*)(uintptr_t)42); hwloc_bitmap_free(sel_cpuset); hwloc_bitmap_free(cpu_cpuset); } _wait_pending_spawns(); printf("withdrawing %d cpus from StarPU\n", half_nb_cpus); starpurm_withdraw_cpus_from_starpu(NULL, half_nb_cpus); test1(param_N); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); printf("assigning %d cpus to StarPU\n", half_nb_cpus); starpurm_assign_cpus_to_starpu(NULL, half_nb_cpus); test1(param_N); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); starpurm_set_drs_disable(NULL); drs_enabled = starpurm_drs_enabled_p(); assert(drs_enabled == 0); } starpurm_shutdown(); pthread_cond_destroy(&spawn_pending_cond); return 0; } starpu-1.4.9+dfsg/starpurm/examples/block_test/000077500000000000000000000000001507764646700216345ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpurm/examples/block_test/block_test.c000066400000000000000000000146051507764646700241370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows a basic StarPU vector scale app on top of StarPURM with a nVidia CUDA kernel */ #include #include #include #include #include #ifdef STARPU_NON_BLOCKING_DRIVERS int main(int argc, char *argv[]) { (void)argc; (void)argv; return 77; } #else static int rm_cpu_type_id = -1; static int rm_nb_cpu_units = 0; #if defined (STARPU_QUICK_CHECK) static int global_nb_tasks_1 = 20; static const int nb_random_tests_1 = 5; static int global_nb_tasks_2 = 10; static const int nb_random_tests_2 = 2; #elif defined (STARPU_LONG_CHECK) static int global_nb_tasks_1 = 200; static const int nb_random_tests_1 = 20; static int global_nb_tasks_2 = 100; static const int nb_random_tests_2 = 10; #else static int global_nb_tasks_1 = 50; static const int nb_random_tests_1 = 5; static int global_nb_tasks_2 = 10; static const int nb_random_tests_2 = 8; #endif /* vector scale codelet */ static void work_func(void *cl_buffers[], void *cl_arg) { (void)cl_buffers; (void)cl_arg; double timestamp = starpu_timing_now(); double timestamp2; do { timestamp2 = starpu_timing_now(); } while ((timestamp2 - timestamp) < 1e6); } static struct starpu_codelet work_cl = { .cpu_funcs = {work_func}, }; /* main routines */ static void test_1() { int i; for (i=0; i 1) { global_nb_tasks_1 = atoi(argv[1]); if (argc > 2) { global_nb_tasks_2 = atoi(argv[2]); } else { global_nb_tasks_2 = global_nb_tasks_1 / 10; if (global_nb_tasks_2 < 1) { global_nb_tasks_2 = 1; } } } starpurm_initialize(); init_rm_infos(); printf("using default units\n"); disp_selected_cpuset(); test_1(); if (rm_nb_cpu_units > 1) { const int nb_cpus = rm_nb_cpu_units; const int half_nb_cpus = nb_cpus/2; printf("nb_cpu_units = %d\n", nb_cpus); starpurm_set_drs_enable(NULL); drs_enabled = starpurm_drs_enabled_p(); assert(drs_enabled != 0); printf("withdrawing %d cpus from StarPU\n", half_nb_cpus); starpurm_withdraw_cpus_from_starpu(NULL, half_nb_cpus); disp_selected_cpuset(); test_1(); printf("assigning %d cpus to StarPU\n", half_nb_cpus); starpurm_assign_cpus_to_starpu(NULL, half_nb_cpus); disp_selected_cpuset(); test_1(); int i; for (i=0; i #include #include #include #include #include #include #include #define CHECK static int rm_cpu_type_id = -1; static int rm_cuda_type_id = -1; static int rm_nb_cpu_units = 0; static int rm_nb_cuda_units = 0; static const int nb_random_tests = 10; static unsigned spawn_pending = 0; static pthread_mutex_t spawn_pending_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t spawn_pending_cond; static void _inc_spawn_pending(void) { pthread_mutex_lock(&spawn_pending_mutex); assert(spawn_pending < UINT_MAX); spawn_pending++; pthread_mutex_unlock(&spawn_pending_mutex); } static void _dec_spawn_pending(void) { pthread_mutex_lock(&spawn_pending_mutex); assert(spawn_pending > 0); spawn_pending--; if (spawn_pending == 0) pthread_cond_broadcast(&spawn_pending_cond); pthread_mutex_unlock(&spawn_pending_mutex); } static void _wait_pending_spawns(void) { pthread_mutex_lock(&spawn_pending_mutex); while (spawn_pending > 0) pthread_cond_wait(&spawn_pending_cond, &spawn_pending_mutex); pthread_mutex_unlock(&spawn_pending_mutex); } static void spawn_callback(void *_arg) { assert(42 == (uintptr_t)_arg); _dec_spawn_pending(); } static void usage(void) { fprintf(stderr, "dgemm: M N K \n"); exit(EXIT_FAILURE); } static void init_rm_infos(void) { int cpu_type = starpurm_get_device_type_id("cpu"); int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); if (nb_cpu_units < 1) { /* No CPU unit available. */ exit(77); } int cuda_type = starpurm_get_device_type_id("cuda"); int nb_cuda_units = starpurm_get_nb_devices_by_type(cuda_type); rm_cpu_type_id = cpu_type; rm_cuda_type_id = cuda_type; rm_nb_cpu_units = nb_cpu_units; rm_nb_cuda_units = nb_cuda_units; } static void disp_cpuset(hwloc_cpuset_t selected_cpuset) { //hwloc_cpuset_t selected_cpuset = starpurm_get_selected_cpuset(); int strl = hwloc_bitmap_snprintf(NULL, 0, selected_cpuset); char str[strl+1]; hwloc_bitmap_snprintf(str, strl+1, selected_cpuset); printf("%llx: selected cpuset = %s\n", (unsigned long long)pthread_self(), str); } struct s_test_args { const int m; const int n; const int k; int transA; int transB; }; static void test(void *_args) { struct s_test_args *args = _args; const int m = args->m; const int n = args->n; const int k = args->k; int transA = args->transA; int transB = args->transB; unsigned rand_seed = (unsigned)time(NULL); double *A = malloc(m * k * sizeof(double)); double *B = malloc(k * n * sizeof(double)); double *C = calloc(m * n, sizeof(double)); double *C_test = calloc(m * n, sizeof(double)); const double alpha = (double)rand_r(&rand_seed) / ((double)rand_r(&rand_seed) + DBL_MIN); const double beta = (double)rand_r(&rand_seed) / ((double)rand_r(&rand_seed) + DBL_MIN); int i; for (i = 0; i < m; i++) { int j; for (j = 0; j < n; j++) { A[i*n+j] = (double)rand_r(&rand_seed) / ((double)rand_r(&rand_seed) + DBL_MIN); B[i*n+j] = (double)rand_r(&rand_seed) / ((double)rand_r(&rand_seed) + DBL_MIN); } } MORSE_dgemm(transA, transB, m, n, k, alpha, A, k, B, n, beta, C, n); #ifdef CHECK /* Check */ cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE) transA, (CBLAS_TRANSPOSE) transB, m, n, k, alpha, A, k, B, n, beta, C_test, n); double C_test_inorm = LAPACKE_dlange(CblasColMajor, 'I', m, n, C_test, n); cblas_daxpy(m*n, -1, C, 1, C_test, 1); double inorm = LAPACKE_dlange(CblasColMajor, 'I', m, n, C_test, n); printf("%llx: ||C_test-C||_I / ||C_test||_I = %e\n", (unsigned long long)pthread_self(), inorm/C_test_inorm); #endif free(A); free(B); free(C); free(C_test); } static void select_units(hwloc_cpuset_t selected_cpuset, hwloc_cpuset_t available_cpuset, int offset, int nb) { int first_idx = hwloc_bitmap_first(available_cpuset); int last_idx = hwloc_bitmap_last(available_cpuset); int count = 0; int idx = first_idx; while (idx != -1 && idx <= last_idx && count < offset+nb) { if (hwloc_bitmap_isset(available_cpuset, idx)) { if (count >= offset) { hwloc_bitmap_set(selected_cpuset, idx); } count ++; } idx = hwloc_bitmap_next(available_cpuset, idx); } assert(count == offset+nb); } void spawn_tests(int cpu_offset, int cpu_nb, int cuda_offset, int cuda_nb, void *args) { if (cpu_offset + cpu_nb > rm_nb_cpu_units) exit(77); if (cuda_offset + cuda_nb > rm_nb_cuda_units) exit(77); hwloc_cpuset_t cpu_cpuset = starpurm_get_all_cpu_workers_cpuset(); hwloc_cpuset_t cuda_cpuset = starpurm_get_all_device_workers_cpuset_by_type(rm_cuda_type_id); hwloc_cpuset_t sel_cpuset = hwloc_bitmap_alloc(); assert(sel_cpuset != NULL); select_units(sel_cpuset, cpu_cpuset, cpu_offset, cpu_nb); select_units(sel_cpuset, cuda_cpuset, cuda_offset, cuda_nb); { int strl1 = hwloc_bitmap_snprintf(NULL, 0, cpu_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, cpu_cpuset); int strl2 = hwloc_bitmap_snprintf(NULL, 0, cuda_cpuset); char str2[strl2+1]; hwloc_bitmap_snprintf(str2, strl2+1, cuda_cpuset); printf("all cpus cpuset = %s\n", str1); int strl3 = hwloc_bitmap_snprintf(NULL, 0, sel_cpuset); char str3[strl3+1]; hwloc_bitmap_snprintf(str3, strl1+3, sel_cpuset); printf("spawn on selected cpuset = %s (avail cpu %s, avail cuda %s)\n", str3, str1, str2); } _inc_spawn_pending(); starpurm_spawn_kernel_on_cpus_callback(NULL, test, args, sel_cpuset, spawn_callback, (void*)(uintptr_t)42); hwloc_bitmap_free(sel_cpuset); hwloc_bitmap_free(cpu_cpuset); hwloc_bitmap_free(cuda_cpuset); } int main(int argc, char const *argv[]) { pthread_cond_init(&spawn_pending_cond, NULL); int transA = MorseTrans; int transB = MorseTrans; if (argc < 6 || argc > 6) usage(); int m = atoi(argv[1]); if (m < 1) usage(); int n = atoi(argv[2]); if (n < 1) usage(); int k = atoi(argv[3]); if (k < 1) usage(); if (strcmp(argv[4], "T") == 0) transA = MorseTrans; else if (strcmp(argv[4], "N") == 0) transA = MorseNoTrans; else usage(); if (strcmp(argv[5], "T") == 0) transB = MorseTrans; else if (strcmp(argv[5], "N") == 0) transB = MorseNoTrans; else usage(); srand(time(NULL)); struct s_test_args test_args = { .m = m, .n = n, .k = k, .transA = transA, .transB = transB }; /* Test case */ starpurm_initialize(); starpurm_set_drs_enable(NULL); init_rm_infos(); printf("cpu units: %d\n", rm_nb_cpu_units); printf("cuda units: %d\n", rm_nb_cuda_units); printf("using default units\n"); disp_cpuset(starpurm_get_selected_cpuset()); MORSE_Init(rm_nb_cpu_units, rm_nb_cuda_units); test(&test_args); { int cpu_offset = 0; int cpu_nb = rm_nb_cpu_units/2; if (cpu_nb == 0 && rm_nb_cpu_units > 0) { cpu_nb = 1; } int cuda_offset = 0; int cuda_nb = rm_nb_cuda_units/2; if (cuda_nb == 0 && rm_nb_cuda_units > 0) { cuda_nb = 1; } spawn_tests(cpu_offset, cpu_nb, cuda_offset, cuda_nb, &test_args); } { int cpu_offset = rm_nb_cpu_units/2; int cpu_nb = cpu_offset; if (cpu_nb == 0 && rm_nb_cpu_units > 0) { cpu_nb = 1; } int cuda_offset = rm_nb_cuda_units/2; int cuda_nb = rm_nb_cuda_units - cuda_offset; spawn_tests(cpu_offset, cpu_nb, cuda_offset, cuda_nb, &test_args); } _wait_pending_spawns(); MORSE_Finalize(); starpurm_shutdown(); pthread_cond_destroy(&spawn_pending_cond); return 0; } starpu-1.4.9+dfsg/starpurm/examples/cuda_vector_scale/000077500000000000000000000000001507764646700231505ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpurm/examples/cuda_vector_scale/vector_scale.c000066400000000000000000000164021507764646700257700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows a basic StarPU vector scale app on top of StarPURM with a nVidia CUDA kernel */ #include #include #include #include #include static int rm_cpu_type_id = -1; static int rm_cuda_type_id = -1; static int rm_nb_cpu_units = 0; static int rm_nb_cuda_units = 0; static void usage(void); static void test1(const int N); static void test2(const int N, const int task_mult); static void init_rm_infos(void); /* vector scale codelet */ static void vector_scale_func(void *cl_buffers[], void *cl_arg) { float scalar = -1.0; int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); float *vector = (float *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); int i; starpu_codelet_unpack_args(cl_arg, &scalar); { int workerid = starpu_worker_get_id(); hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); hwloc_cpuset_t check_cpuset = starpurm_get_selected_cpuset(); #if 0 { int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); int strl2 = hwloc_bitmap_snprintf(NULL, 0, check_cpuset); char str2[strl2+1]; hwloc_bitmap_snprintf(str2, strl2+1, check_cpuset); printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s, selected cpuset = %s\n", workerid, vector, n, scalar, str1, str2); } #endif hwloc_bitmap_and(check_cpuset, check_cpuset, worker_cpuset); assert(!hwloc_bitmap_iszero(check_cpuset)); hwloc_bitmap_free(check_cpuset); hwloc_bitmap_free(worker_cpuset); } for (i = 0; i < n; i++) { vector[i] *= scalar; } } extern void vector_scale_cuda_func(void *cl_buffers[], void *cl_arg); static struct starpu_codelet vector_scale_cl = { .cpu_funcs = {vector_scale_func}, .cuda_funcs = {vector_scale_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 1 }; /* main routines */ static void usage(void) { fprintf(stderr, "usage: 05_vector_scale [VECTOR_SIZE]\n"); exit(1); } static void test1(const int N) { float *vector = NULL; const float scalar = 2.0; starpu_data_handle_t vector_handle; int ret; starpu_malloc((void **)&vector, N * sizeof(*vector)); { int i; for (i = 0; i < N; i++) { vector[i] = i; } } starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); ret = starpu_task_insert(&vector_scale_cl, STARPU_RW, vector_handle, STARPU_VALUE, &scalar, sizeof(scalar), 0); assert(ret == 0); starpu_task_wait_for_all(); starpu_data_unregister(vector_handle); { int i; for (i = 0; i < N; i++) { float d_i = i; if (vector[i] != d_i*scalar) { fprintf(stderr, "%s: check_failed, vector[%d]: %f != %f\n", __func__, i, vector[i], d_i*scalar); exit(1); } } } starpu_free_noflag(vector, N * sizeof(*vector)); } static void test2(const int N, const int task_mult) { float *vector = NULL; const float scalar = 3.0; starpu_data_handle_t vector_handle; int ret; starpu_malloc((void **)&vector, N * sizeof(*vector)); { int i; for (i = 0; i < N; i++) { vector[i] = i; } } const int nparts = (rm_nb_cpu_units+rm_nb_cuda_units) * task_mult; starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); struct starpu_data_filter partition_filter = { .filter_func = starpu_vector_filter_block, .nchildren = nparts }; starpu_data_partition(vector_handle, &partition_filter); { int i; for (i = 0; i < nparts; i++) { starpu_data_handle_t sub_vector_handle = starpu_data_get_sub_data(vector_handle, 1, i); ret = starpu_task_insert(&vector_scale_cl, STARPU_RW, sub_vector_handle, STARPU_VALUE, &scalar, sizeof(scalar), 0); assert(ret == 0); } } starpu_task_wait_for_all(); starpu_data_unpartition(vector_handle, STARPU_MAIN_RAM); starpu_data_unregister(vector_handle); { int i; for (i = 0; i < N; i++) { float d_i = i; if (vector[i] != d_i*scalar) { fprintf(stderr, "%s: check_failed, vector[%d]: %f != %f\n", __func__, i, vector[i], d_i*scalar); exit(1); } } } starpu_free_noflag(vector, N * sizeof(*vector)); } static void init_rm_infos(void) { int cpu_type = starpurm_get_device_type_id("cpu"); int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); if (nb_cpu_units < 1) { /* No CPU unit available. */ exit(77); } int cuda_type = starpurm_get_device_type_id("cuda"); int nb_cuda_units = starpurm_get_nb_devices_by_type(cuda_type); if (nb_cuda_units < 1) { /* No CUDA unit available. */ exit(77); } rm_cpu_type_id = cpu_type; rm_cuda_type_id = cuda_type; rm_nb_cpu_units = nb_cpu_units; rm_nb_cuda_units = nb_cuda_units; } static void disp_selected_cpuset(void) { hwloc_cpuset_t selected_cpuset = starpurm_get_selected_cpuset(); int strl = hwloc_bitmap_snprintf(NULL, 0, selected_cpuset); char str[strl+1]; hwloc_bitmap_snprintf(str, strl+1, selected_cpuset); printf("selected cpuset = %s\n", str); } int main(int argc, char *argv[]) { int param_N = 1000000; int drs_enabled; if (argc > 1) { param_N = atoi(argv[1]); if (param_N < 1) { usage(); } } starpurm_initialize(); init_rm_infos(); printf("using default units\n"); disp_selected_cpuset(); test1(param_N); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); if (rm_nb_cpu_units > 1 && rm_nb_cuda_units > 1) { int nb_cpus = rm_nb_cpu_units; const int nb_cudas = rm_nb_cuda_units; const int cuda_type = rm_cuda_type_id; printf("nb_cpu_units = %d\n", nb_cpus); printf("nb_cuda_units = %d\n", nb_cudas); /* Keep at least one CPU core */ nb_cpus--; starpurm_set_drs_enable(NULL); drs_enabled = starpurm_drs_enabled_p(); assert(drs_enabled != 0); printf("withdrawing %d cpus from StarPU\n", nb_cpus); starpurm_withdraw_cpus_from_starpu(NULL, nb_cpus); disp_selected_cpuset(); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); printf("assigning %d cpus to StarPU\n", nb_cpus); starpurm_assign_cpus_to_starpu(NULL, nb_cpus); disp_selected_cpuset(); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); printf("withdrawing %d cuda devices from StarPU\n", nb_cudas); starpurm_withdraw_devices_from_starpu(NULL, cuda_type, nb_cudas); disp_selected_cpuset(); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); printf("lending %d cuda devices to StarPU\n", nb_cudas); starpurm_assign_devices_to_starpu(NULL, cuda_type, nb_cudas); disp_selected_cpuset(); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); starpurm_set_drs_disable(NULL); drs_enabled = starpurm_drs_enabled_p(); assert(drs_enabled == 0); } starpurm_shutdown(); return 0; } starpu-1.4.9+dfsg/starpurm/examples/cuda_vector_scale/vs_cuda_kernel.cu000066400000000000000000000045731507764646700264760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows a basic StarPU vector scale app on top of StarPURM with a nVidia CUDA kernel */ #include #include static __global__ void vector_scale_cuda_kernel(float *vector, unsigned n, float scalar) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) { vector[i] *= scalar; } } extern "C" void vector_scale_cuda_func(void *cl_buffers[], void *cl_arg) { float scalar = -1.0; unsigned n = STARPU_VECTOR_GET_NX(cl_buffers[0]); float *vector = (float *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); starpu_codelet_unpack_args(cl_arg, &scalar); { int workerid = starpu_worker_get_id(); hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); hwloc_cpuset_t check_cpuset = starpurm_get_selected_cpuset(); #if 0 { int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); int strl2 = hwloc_bitmap_snprintf(NULL, 0, check_cpuset); char str2[strl2+1]; hwloc_bitmap_snprintf(str2, strl2+1, check_cpuset); printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s, selected cpuset = %s\n", workerid, vector, n, scalar, str1, str2); } #endif hwloc_bitmap_and(check_cpuset, check_cpuset, worker_cpuset); assert(!hwloc_bitmap_iszero(check_cpuset)); hwloc_bitmap_free(check_cpuset); hwloc_bitmap_free(worker_cpuset); } unsigned nb_threads_per_block = 64; unsigned nb_blocks = (n + nb_threads_per_block-1) / nb_threads_per_block; vector_scale_cuda_kernel<<>>(vector, n, scalar); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/starpurm/examples/spawn.c000066400000000000000000000150101507764646700207740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows a basic StarPU vector scale app on top of StarPURM, * making use of both the main RM API and the spawn_kernel_on_cpus API func */ #include #include #include #include #include static int rm_cpu_type_id = -1; static int rm_nb_cpu_units = 0; static void usage(void); static void test1(const int N); static void test2(const int N, const int task_mult); static void init_rm_infos(void); /* vector scale codelet */ static void vector_scale_func(void *cl_buffers[], void *cl_arg) { double scalar = -1.0; int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); int i; starpu_codelet_unpack_args(cl_arg, &scalar); int workerid = starpu_worker_get_id(); hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); { int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s\n", workerid, vector, n, scalar, str1); } hwloc_bitmap_free(worker_cpuset); for (i = 0; i < n; i++) { vector[i] *= scalar; } } static struct starpu_codelet vector_scale_cl = { .cpu_funcs = {vector_scale_func}, .nbuffers = 1 }; /* main routines */ static void usage(void) { fprintf(stderr, "usage: 05_vector_scale [VECTOR_SIZE]\n"); exit(1); } static void test1(const int N) { double *vector = NULL; const double scalar = 2.0; starpu_data_handle_t vector_handle; int ret; starpu_malloc((void **)&vector, N * sizeof(*vector)); { int i; for (i = 0; i < N; i++) { vector[i] = i; } } starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); ret = starpu_task_insert(&vector_scale_cl, STARPU_RW, vector_handle, STARPU_VALUE, &scalar, sizeof(scalar), 0); assert(ret == 0); starpu_task_wait_for_all(); starpu_data_unregister(vector_handle); { int i; for (i = 0; i < N; i++) { double d_i = i; if (vector[i] != d_i*scalar) { fprintf(stderr, "%s: check_failed\n", __func__); exit(1); } } } starpu_free_noflag(vector, N * sizeof(*vector)); } static void test2(const int N, const int task_mult) { double *vector = NULL; const double scalar = 3.0; starpu_data_handle_t vector_handle; int ret; starpu_malloc((void **)&vector, N * sizeof(*vector)); { int i; for (i = 0; i < N; i++) { vector[i] = i; } } starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); struct starpu_data_filter partition_filter = { .filter_func = starpu_vector_filter_block, .nchildren = rm_nb_cpu_units * task_mult }; starpu_data_partition(vector_handle, &partition_filter); { int i; for (i = 0; i < rm_nb_cpu_units*task_mult; i++) { starpu_data_handle_t sub_vector_handle = starpu_data_get_sub_data(vector_handle, 1, i); ret = starpu_task_insert(&vector_scale_cl, STARPU_RW, sub_vector_handle, STARPU_VALUE, &scalar, sizeof(scalar), 0); assert(ret == 0); } } starpu_task_wait_for_all(); starpu_data_unpartition(vector_handle, STARPU_MAIN_RAM); starpu_data_unregister(vector_handle); { int i; for (i = 0; i < N; i++) { double d_i = i; if (vector[i] != d_i*scalar) { fprintf(stderr, "%s: check_failed\n", __func__); exit(1); } } } starpu_free_noflag(vector, N * sizeof(*vector)); } static void init_rm_infos(void) { int cpu_type = starpurm_get_device_type_id("cpu"); int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); if (nb_cpu_units < 1) { /* No CPU unit available. */ exit(77); } rm_cpu_type_id = cpu_type; rm_nb_cpu_units = nb_cpu_units; } static void kernel_to_spawn(void *args) { int param_N = *(int*)args; test1(param_N); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); } int main(int argc, char *argv[]) { int param_N = 1000000; int drs_enabled; if (argc > 1) { param_N = atoi(argv[1]); if (param_N < 1) { usage(); } } starpurm_initialize(); init_rm_infos(); if (rm_nb_cpu_units > 1) { const int half_nb_cpus = rm_nb_cpu_units/2; starpurm_set_drs_enable(NULL); drs_enabled = starpurm_drs_enabled_p(); assert(drs_enabled != 0); { hwloc_cpuset_t cpu_cpuset = starpurm_get_all_cpu_workers_cpuset(); { int strl1 = hwloc_bitmap_snprintf(NULL, 0, cpu_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, cpu_cpuset); printf("all cpus cpuset = %s\n", str1); } int first_idx = hwloc_bitmap_first(cpu_cpuset); int last_idx = hwloc_bitmap_last(cpu_cpuset); hwloc_cpuset_t sel_cpuset = hwloc_bitmap_alloc(); assert(sel_cpuset != NULL); int count = 0; int idx = first_idx; while (idx != -1 && idx <= last_idx && count < half_nb_cpus) { if (hwloc_bitmap_isset(cpu_cpuset, idx)) { hwloc_bitmap_set(sel_cpuset, idx); count ++; } idx = hwloc_bitmap_next(cpu_cpuset, idx); } assert(count == half_nb_cpus); { int strl1 = hwloc_bitmap_snprintf(NULL, 0, sel_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, sel_cpuset); printf("spawning a kernel on cpuset = %s\n", str1); } starpurm_spawn_kernel_on_cpus(NULL, kernel_to_spawn, ¶m_N, sel_cpuset); hwloc_bitmap_free(sel_cpuset); hwloc_bitmap_free(cpu_cpuset); } printf("withdrawing %d cpus from StarPU\n", half_nb_cpus); starpurm_withdraw_cpus_from_starpu(NULL, half_nb_cpus); test1(param_N); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); printf("assigning %d cpus to StarPU\n", half_nb_cpus); starpurm_assign_cpus_to_starpu(NULL, half_nb_cpus); test1(param_N); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); starpurm_set_drs_disable(NULL); drs_enabled = starpurm_drs_enabled_p(); assert(drs_enabled == 0); } starpurm_shutdown(); return 0; } starpu-1.4.9+dfsg/starpurm/examples/vector_scale.c000066400000000000000000000137701507764646700223300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example shows a basic StarPU vector scale app on top of StarPURM */ #include #include #include #include #include static int rm_cpu_type_id = -1; static int rm_nb_cpu_units = 0; static void usage(void); static void test1(const int N); static void test2(const int N, const int task_mult); static void init_rm_infos(void); /* vector scale codelet */ static void vector_scale_func(void *cl_buffers[], void *cl_arg) { double scalar = -1.0; int n = STARPU_VECTOR_GET_NX(cl_buffers[0]); double *vector = (double *)STARPU_VECTOR_GET_PTR(cl_buffers[0]); int i; starpu_codelet_unpack_args(cl_arg, &scalar); int workerid = starpu_worker_get_id(); hwloc_cpuset_t worker_cpuset = starpu_worker_get_hwloc_cpuset(workerid); hwloc_cpuset_t check_cpuset = starpurm_get_selected_cpuset(); { int strl1 = hwloc_bitmap_snprintf(NULL, 0, worker_cpuset); char str1[strl1+1]; hwloc_bitmap_snprintf(str1, strl1+1, worker_cpuset); int strl2 = hwloc_bitmap_snprintf(NULL, 0, check_cpuset); char str2[strl2+1]; hwloc_bitmap_snprintf(str2, strl2+1, check_cpuset); printf("worker[%03d] - task: vector=%p, n=%d, scalar=%lf, worker cpuset = %s, selected cpuset = %s\n", workerid, vector, n, scalar, str1, str2); } hwloc_bitmap_and(check_cpuset, check_cpuset, worker_cpuset); assert(!hwloc_bitmap_iszero(check_cpuset)); hwloc_bitmap_free(check_cpuset); hwloc_bitmap_free(worker_cpuset); for (i = 0; i < n; i++) { vector[i] *= scalar; } } static struct starpu_codelet vector_scale_cl = { .cpu_funcs = {vector_scale_func}, .nbuffers = 1 }; /* main routines */ static void usage(void) { fprintf(stderr, "usage: 05_vector_scale [VECTOR_SIZE]\n"); exit(1); } static void test1(const int N) { double *vector = NULL; const double scalar = 2.0; starpu_data_handle_t vector_handle; int ret; starpu_malloc((void **)&vector, N * sizeof(*vector)); { int i; for (i = 0; i < N; i++) { vector[i] = i; } } starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); ret = starpu_task_insert(&vector_scale_cl, STARPU_RW, vector_handle, STARPU_VALUE, &scalar, sizeof(scalar), 0); assert(ret == 0); starpu_task_wait_for_all(); starpu_data_unregister(vector_handle); { int i; for (i = 0; i < N; i++) { double d_i = i; if (vector[i] != d_i*scalar) { fprintf(stderr, "%s: check_failed\n", __func__); exit(1); } } } starpu_free_noflag(vector, N * sizeof(*vector)); } static void test2(const int N, const int task_mult) { double *vector = NULL; const double scalar = 3.0; starpu_data_handle_t vector_handle; int ret; starpu_malloc((void **)&vector, N * sizeof(*vector)); { int i; for (i = 0; i < N; i++) { vector[i] = i; } } starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, N, sizeof(*vector)); struct starpu_data_filter partition_filter = { .filter_func = starpu_vector_filter_block, .nchildren = rm_nb_cpu_units * task_mult }; starpu_data_partition(vector_handle, &partition_filter); { int i; for (i = 0; i < rm_nb_cpu_units*task_mult; i++) { starpu_data_handle_t sub_vector_handle = starpu_data_get_sub_data(vector_handle, 1, i); ret = starpu_task_insert(&vector_scale_cl, STARPU_RW, sub_vector_handle, STARPU_VALUE, &scalar, sizeof(scalar), 0); assert(ret == 0); } } starpu_task_wait_for_all(); starpu_data_unpartition(vector_handle, STARPU_MAIN_RAM); starpu_data_unregister(vector_handle); { int i; for (i = 0; i < N; i++) { double d_i = i; if (vector[i] != d_i*scalar) { fprintf(stderr, "%s: check_failed\n", __func__); exit(1); } } } starpu_free_noflag(vector, N * sizeof(*vector)); } static void init_rm_infos(void) { int cpu_type = starpurm_get_device_type_id("cpu"); int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_type); if (nb_cpu_units < 1) { /* No CPU unit available. */ exit(77); } rm_cpu_type_id = cpu_type; rm_nb_cpu_units = nb_cpu_units; } int main(int argc, char *argv[]) { int param_N = 1000000; int drs_enabled; if (argc > 1) { param_N = atoi(argv[1]); if (param_N < 1) { usage(); } } starpurm_initialize(); init_rm_infos(); test1(param_N); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); if (rm_nb_cpu_units > 1) { const int half_nb_cpus = rm_nb_cpu_units/2; starpurm_set_drs_enable(NULL); drs_enabled = starpurm_drs_enabled_p(); assert(drs_enabled != 0); printf("withdrawing %d cpus from StarPU\n", half_nb_cpus); starpurm_withdraw_cpus_from_starpu(NULL, half_nb_cpus); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); printf("assigning %d cpus to StarPU\n", half_nb_cpus); starpurm_assign_cpus_to_starpu(NULL, half_nb_cpus); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); int i; for (i = rm_nb_cpu_units-1; i > 0; i--) { starpurm_set_max_parallelism(NULL, i); test2(param_N, 10); } printf("withdrawing all cpus from StarPU\n"); starpurm_withdraw_all_cpus_from_starpu(NULL); printf("assigning %d cpus to StarPU\n", rm_nb_cpu_units); starpurm_assign_cpus_to_starpu(NULL, rm_nb_cpu_units); test2(param_N, 1); test2(param_N, 10); test2(param_N, 100); starpurm_set_drs_disable(NULL); drs_enabled = starpurm_drs_enabled_p(); assert(drs_enabled == 0); } starpurm_shutdown(); return 0; } starpu-1.4.9+dfsg/starpurm/include/000077500000000000000000000000001507764646700173105ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpurm/include/starpurm.h000066400000000000000000000354311507764646700213440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPURM_H #define __STARPURM_H #include #include #ifdef __cplusplus extern "C" { #endif /** @defgroup API_Interop_Support Interoperability Support @brief API to interoperate with other runtime systems. @{ */ /** StarPU Resource Manager return type. */ enum e_starpurm_drs_ret { starpurm_DRS_SUCCESS = 0, /**< Dynamic resource sharing operation succeeded. */ starpurm_DRS_DISABLD = -1, /**< Dynamic resource sharing is disabled. */ starpurm_DRS_PERM = -2, /**< Dynamic resource sharing operation is not authorized or implemented. */ starpurm_DRS_EINVAL = -3 /**< Dynamic resource sharing operation has been called with one or more invalid parameters. */ #if 0 /* Unused for now */ starpurm_DRS_NOTED, starpurm_DRS_REQST #endif }; typedef int starpurm_drs_ret_t; typedef void *starpurm_drs_desc_t; typedef void *starpurm_drs_cbs_t; typedef void (*starpurm_drs_cb_t)(void *); typedef void *starpurm_block_cond_t; typedef int (*starpurm_polling_t)(void *); /** @name Initialisation @{ */ /** Resource enforcement */ void starpurm_initialize_with_cpuset(hwloc_cpuset_t initially_owned_cpuset); /** Initialize StarPU and the StarPU-RM resource management module. The starpu_init() function should not have been called before the call to starpurm_initialize(). The starpurm_initialize() function will take care of this */ void starpurm_initialize(void); /** Shutdown StarPU-RM and StarPU. The starpu_shutdown() function should not be called before. The starpurm_shutdown() function will take care of this. */ void starpurm_shutdown(void); /** @} */ /** @name Spawn @{ */ /** Allocate a temporary context spanning the units selected in the cpuset bitmap, set it as the default context for the current thread, and call user function \p f. Upon the return of user function \p f, the temporary context is freed and the previous default context for the current thread is restored. */ void starpurm_spawn_kernel_on_cpus(void *data, void (*f)(void *), void *args, hwloc_cpuset_t cpuset); /** Spawn a POSIX thread and returns immediately. The thread spawned will allocate a temporary context spanning the units selected in the cpuset bitmap, set it as the default context for the current thread, and call user function \p f. Upon the return of user function \p f, the temporary context will be freed and the previous default context for the current thread restored. A user specified callback \p cb_f will be called just before the termination of the thread. */ void starpurm_spawn_kernel_on_cpus_callback(void *data, void (*f)(void *), void *args, hwloc_cpuset_t cpuset, void (*cb_f)(void *), void *cb_args); void starpurm_spawn_kernel_callback(void *data, void (*f)(void *), void *args, void (*cb_f)(void *), void *cb_args); /** @} */ /** @name DynamicResourceSharing @{ */ /** Turn-on dynamic resource sharing support. */ starpurm_drs_ret_t starpurm_set_drs_enable(starpurm_drs_desc_t *spd); /** Turn-off dynamic resource sharing support. */ starpurm_drs_ret_t starpurm_set_drs_disable(starpurm_drs_desc_t *spd); /** Return the state of the dynamic resource sharing support (\p =!0 enabled, \p =0 disabled). */ int starpurm_drs_enabled_p(void); /** Set the maximum number of CPU computing units available for StarPU computations to \p max. This number cannot exceed the maximum number of StarPU's CPU worker allocated at start-up time. */ starpurm_drs_ret_t starpurm_set_max_parallelism(starpurm_drs_desc_t *spd, int max); #if 0 /* Unused for now */ starpurm_drs_ret_t starpurm_callback_set(starpurm_drs_desc_t *spd, starpurm_drs_cbs_t which, starpurm_drs_cb_t callback); starpurm_drs_ret_t starpurm_callback_get(starpurm_drs_desc_t *spd, starpurm_drs_cbs_t which, starpurm_drs_cb_t *callback); #endif /** Extend StarPU's default scheduling context to execute tasks on worker corresponding to logical unit \p cpuid. If StarPU does not have a worker thread initialized for logical unit \p cpuid, do nothing. */ starpurm_drs_ret_t starpurm_assign_cpu_to_starpu(starpurm_drs_desc_t *spd, int cpuid); /** Extend StarPU's default scheduling context to execute tasks on \p ncpus more workers, up to the number of StarPU worker threads initialized. */ starpurm_drs_ret_t starpurm_assign_cpus_to_starpu(starpurm_drs_desc_t *spd, int ncpus); /** Extend StarPU's default scheduling context to execute tasks on the additional logical units selected in \p mask. Logical units of \p mask for which no StarPU worker is initialized are silently ignored. */ starpurm_drs_ret_t starpurm_assign_cpu_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Set StarPU's default scheduling context to execute tasks on all available logical units for which a StarPU worker has been initialized. */ starpurm_drs_ret_t starpurm_assign_all_cpus_to_starpu(starpurm_drs_desc_t *spd); /** Shrink StarPU's default scheduling context so as to not execute tasks on worker corresponding to logical unit \p cpuid. If StarPU does not have a worker thread initialized for logical unit \p cpuid, do nothing. */ starpurm_drs_ret_t starpurm_withdraw_cpu_from_starpu(starpurm_drs_desc_t *spd, int cpuid); /** Shrink StarPU's default scheduling context to execute tasks on \p ncpus less workers. */ starpurm_drs_ret_t starpurm_withdraw_cpus_from_starpu(starpurm_drs_desc_t *spd, int ncpus); /** Shrink StarPU's default scheduling context so as to not execute tasks on the logical units selected in \p mask. Logical units of \p mask for which no StarPU worker is initialized are silently ignored. */ starpurm_drs_ret_t starpurm_withdraw_cpu_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Shrink StarPU's default scheduling context so as to remove all logical units. */ starpurm_drs_ret_t starpurm_withdraw_all_cpus_from_starpu(starpurm_drs_desc_t *spd); /* --- */ /** Synonym for starpurm_assign_all_cpus_to_starpu(). */ starpurm_drs_ret_t starpurm_lend(starpurm_drs_desc_t *spd); /** Synonym for starpurm_assign_cpu_to_starpu(). */ starpurm_drs_ret_t starpurm_lend_cpu(starpurm_drs_desc_t *spd, int cpuid); /** Synonym for starpurm_assign_cpus_to_starpu(). */ starpurm_drs_ret_t starpurm_lend_cpus(starpurm_drs_desc_t *spd, int ncpus); /** Synonym for starpurm_assign_cpu_mask_to_starpu(). */ starpurm_drs_ret_t starpurm_lend_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Synonym for starpurm_withdraw_all_cpus_from_starpu(). */ starpurm_drs_ret_t starpurm_reclaim(starpurm_drs_desc_t *spd); /** Synonym for starpurm_withdraw_cpu_from_starpu(). */ starpurm_drs_ret_t starpurm_reclaim_cpu(starpurm_drs_desc_t *spd, int cpuid); /** Synonym for starpurm_withdraw_cpus_from_starpu(). */ starpurm_drs_ret_t starpurm_reclaim_cpus(starpurm_drs_desc_t *spd, int ncpus); /** Synonym for starpurm_withdraw_cpu_mask_from_starpu(). */ starpurm_drs_ret_t starpurm_reclaim_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Synonym for starpurm_withdraw_all_cpus_from_starpu(). */ starpurm_drs_ret_t starpurm_acquire(starpurm_drs_desc_t *spd); /** Synonym for starpurm_withdraw_cpu_from_starpu(). */ starpurm_drs_ret_t starpurm_acquire_cpu(starpurm_drs_desc_t *spd, int cpuid); /** Synonym for starpurm_withdraw_cpus_from_starpu(). */ starpurm_drs_ret_t starpurm_acquire_cpus(starpurm_drs_desc_t *spd, int ncpus); /** Synonym for starpurm_withdraw_cpu_mask_from_starpu(). */ starpurm_drs_ret_t starpurm_acquire_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Synonym for starpurm_assign_all_cpus_to_starpu(). */ starpurm_drs_ret_t starpurm_return_all(starpurm_drs_desc_t *spd); /** Synonym for starpurm_assign_cpu_to_starpu(). */ starpurm_drs_ret_t starpurm_return_cpu(starpurm_drs_desc_t *spd, int cpuid); #if 0 /* Pause/resume (not implemented) */ starpurm_drs_ret_t starpurm_create_block_condition(starpurm_block_cond_t *cond); void starpurm_block_current_task(starpurm_block_cond_t *cond); void starpurm_signal_block_condition(starpurm_block_cond_t *cond); void starpurm_register_polling_service(const char *service_name, starpurm_polling_t function, void *data); void starpurm_unregister_polling_service(const char *service_name, starpurm_polling_t function, void *data); #endif /** @} */ /** @name Devices @{ */ /** Return the device type ID constant associated to the device type name. Valid names for \p type_str are: - \c "cpu": regular CPU unit; - \c "opencl": OpenCL device unit; - \c "cuda": nVidia CUDA device unit; */ int starpurm_get_device_type_id(const char *type_str); /** Return the device type name associated to the device type ID constant. */ const char *starpurm_get_device_type_name(int type_id); /** Return the number of initialized StarPU worker for the device type \p type_id. */ int starpurm_get_nb_devices_by_type(int type_id); /** Return the unique ID assigned to the \p device_rank nth device of type \p type_id. */ int starpurm_get_device_id(int type_id, int device_rank); /** Extend StarPU's default scheduling context to use \p unit_rank nth device of type \p type_id. */ starpurm_drs_ret_t starpurm_assign_device_to_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank); /** Extend StarPU's default scheduling context to use \p ndevices more devices of type \p type_id, up to the number of StarPU workers initialized for such device type. */ starpurm_drs_ret_t starpurm_assign_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices); /** Extend StarPU's default scheduling context to use additional devices as designated by their corresponding StarPU worker thread(s) CPU-set \p mask. */ starpurm_drs_ret_t starpurm_assign_device_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Extend StarPU's default scheduling context to use all devices of type \p type_id for which it has a worker thread initialized. */ starpurm_drs_ret_t starpurm_assign_all_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id); /** Shrink StarPU's default scheduling context to not use \p unit_rank nth device of type \p type_id. */ starpurm_drs_ret_t starpurm_withdraw_device_from_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank); /** Shrink StarPU's default scheduling context to use \p ndevices less devices of type \p type_id. */ starpurm_drs_ret_t starpurm_withdraw_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices); /** Shrink StarPU's default scheduling context to not use devices designated by their corresponding StarPU worker thread(s) CPU-set \p mask. */ starpurm_drs_ret_t starpurm_withdraw_device_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Shrink StarPU's default scheduling context to use no devices of type \p type_id. */ starpurm_drs_ret_t starpurm_withdraw_all_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id); /* --- */ /** Synonym for starpurm_assign_device_to_starpu(). */ starpurm_drs_ret_t starpurm_lend_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank); /** Synonym for starpurm_assign_devices_to_starpu(). */ starpurm_drs_ret_t starpurm_lend_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices); /** Synonym for starpurm_assign_device_mask_to_starpu(). */ starpurm_drs_ret_t starpurm_lend_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Synonym for starpurm_assign_all_devices_to_starpu(). */ starpurm_drs_ret_t starpurm_lend_all_devices(starpurm_drs_desc_t *spd, int type_id); /** Synonym for starpurm_withdraw_device_from_starpu(). */ starpurm_drs_ret_t starpurm_reclaim_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank); /** Synonym for starpurm_withdraw_devices_from_starpu(). */ starpurm_drs_ret_t starpurm_reclaim_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices); /** Synonym for starpurm_withdraw_device_mask_from_starpu(). */ starpurm_drs_ret_t starpurm_reclaim_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Synonym for starpurm_withdraw_all_devices_from_starpu(). */ starpurm_drs_ret_t starpurm_reclaim_all_devices(starpurm_drs_desc_t *spd, int type_id); /** Synonym for starpurm_withdraw_device_from_starpu(). */ starpurm_drs_ret_t starpurm_acquire_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank); /** Synonym for starpurm_withdraw_devices_from_starpu(). */ starpurm_drs_ret_t starpurm_acquire_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices); /** Synonym for starpurm_withdraw_device_mask_from_starpu(). */ starpurm_drs_ret_t starpurm_acquire_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask); /** Synonym for starpurm_withdraw_all_devices_from_starpu(). */ starpurm_drs_ret_t starpurm_acquire_all_devices(starpurm_drs_desc_t *spd, int type_id); /** Synonym for starpurm_assign_all_devices_to_starpu(). */ starpurm_drs_ret_t starpurm_return_all_devices(starpurm_drs_desc_t *spd, int type_id); /** Synonym for starpurm_assign_device_to_starpu(). */ starpurm_drs_ret_t starpurm_return_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank); /** @} */ /** @name CpusetsQueries @{ */ /** Return the CPU-set of the StarPU worker associated to the \p unit_rank nth unit of type \p type_id. */ hwloc_cpuset_t starpurm_get_device_worker_cpuset(int type_id, int unit_rank); /** Return the cumulated CPU-set of all StarPU worker threads. */ hwloc_cpuset_t starpurm_get_global_cpuset(void); /** Return the CPU-set of the StarPU worker threads currently selected in the default StarPU's scheduling context. */ hwloc_cpuset_t starpurm_get_selected_cpuset(void); /** Return the cumulated CPU-set of all CPU StarPU worker threads. */ hwloc_cpuset_t starpurm_get_all_cpu_workers_cpuset(void); /** Return the cumulated CPU-set of all "non-CPU" StarPU worker threads. */ hwloc_cpuset_t starpurm_get_all_device_workers_cpuset(void); /** Return the cumulated CPU-set of all StarPU worker threads for devices of type \p typeid. */ hwloc_cpuset_t starpurm_get_all_device_workers_cpuset_by_type(int typeid); /** @} */ /** @} */ #ifdef __cplusplus } #endif #endif /* __STARPURM_H */ starpu-1.4.9+dfsg/starpurm/include/starpurm_config.h.in000066400000000000000000000017171507764646700232760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPURM_CONFIG_H__ #define __STARPURM_CONFIG_H__ /* Major version number of StarPU RM. */ #undef STARPURM_MAJOR_VERSION /* Minor version number of StarPU RM. */ #undef STARPURM_MINOR_VERSION /* Release version number of StarPU RM. */ #undef STARPURM_RELEASE_VERSION #endif starpu-1.4.9+dfsg/starpurm/packages/000077500000000000000000000000001507764646700174435ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpurm/packages/starpurm-1.3.pc.in000066400000000000000000000020601507764646700225460ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ pkglibdir=@pkglibdir@ includedir=@includedir@ Name: starpurm Description: resource management layer on top of StarPU Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpurm/@STARPU_EFFECTIVE_VERSION@ @DLB_CFLAGS@ Libs: -L${libdir} -lstarpurm-@STARPU_EFFECTIVE_VERSION@ @DLB_LIBS@ Libs.private: @LDFLAGS@ @LIBS@ Requires: starpu-1.3 hwloc starpu-1.4.9+dfsg/starpurm/packages/starpurm-1.4.pc.in000066400000000000000000000020601507764646700225470ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ pkglibdir=@pkglibdir@ includedir=@includedir@ Name: starpurm Description: resource management layer on top of StarPU Version: @PACKAGE_VERSION@ Cflags: -I${includedir}/starpurm/@STARPU_EFFECTIVE_VERSION@ @DLB_CFLAGS@ Libs: -L${libdir} -lstarpurm-@STARPU_EFFECTIVE_VERSION@ @DLB_LIBS@ Libs.private: @LDFLAGS@ @LIBS@ Requires: starpu-1.4 hwloc starpu-1.4.9+dfsg/starpurm/src/000077500000000000000000000000001507764646700164545ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpurm/src/Makefile.am000066400000000000000000000031401507764646700205060ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-notests.mk SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_builddir)/include AM_CPPFLAGS += -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src -I$(top_builddir)/starpurm/src -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) $(HWLOC_LIBS) libstarpurm_so_version = $(LIBSTARPURM_INTERFACE_CURRENT):$(LIBSTARPURM_INTERFACE_REVISION):$(LIBSTARPURM_INTERFACE_AGE) lib_LTLIBRARIES = libstarpurm-@STARPU_EFFECTIVE_VERSION@.la libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined -version-info $(libstarpurm_so_version) libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = \ starpurm.c if STARPURM_HAVE_DLB libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += \ starpurm_dlb.c endif noinst_HEADERS = \ starpurm_private.h starpu-1.4.9+dfsg/starpurm/src/Makefile.in000066400000000000000000001140741507764646700205300ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ @STARPURM_HAVE_DLB_TRUE@am__append_3 = \ @STARPURM_HAVE_DLB_TRUE@ starpurm_dlb.c subdir = starpurm/src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(libdir)" LTLIBRARIES = $(lib_LTLIBRARIES) libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = am__libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST = \ starpurm.c starpurm_dlb.c @STARPURM_HAVE_DLB_TRUE@am__objects_1 = starpurm_dlb.lo am_libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = starpurm.lo \ $(am__objects_1) libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = \ $(am_libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS) \ $(LDFLAGS) -o $@ AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/starpurm.Plo \ ./$(DEPDIR)/starpurm_dlb.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES) DIST_SOURCES = \ $(am__libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES_DIST) RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac HEADERS = $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/make/starpu-notests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) $(HWLOC_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src \ -I$(top_builddir)/src -I$(top_builddir)/include \ -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src \ -I$(top_builddir)/starpurm/src \ -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) libstarpurm_so_version = $(LIBSTARPURM_INTERFACE_CURRENT):$(LIBSTARPURM_INTERFACE_REVISION):$(LIBSTARPURM_INTERFACE_AGE) lib_LTLIBRARIES = libstarpurm-@STARPU_EFFECTIVE_VERSION@.la libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined -version-info $(libstarpurm_so_version) libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpurm.c \ $(am__append_3) noinst_HEADERS = \ starpurm_private.h all: all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpurm/src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpurm/src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-notests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; \ locs=`for p in $$list; do echo $$p; done | \ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ sort -u`; \ test -z "$$locs" || { \ echo rm -f $${locs}; \ rm -f $${locs}; \ } libstarpurm-@STARPU_EFFECTIVE_VERSION@.la: $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(EXTRA_libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_DEPENDENCIES) $(AM_V_CCLD)$(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LINK) -rpath $(libdir) $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS) $(libstarpurm_@STARPU_EFFECTIVE_VERSION@_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpurm.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpurm_dlb.Plo@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/starpurm.Plo -rm -f ./$(DEPDIR)/starpurm_dlb.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-libLTLIBRARIES install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/starpurm.Plo -rm -f ./$(DEPDIR)/starpurm_dlb.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-libLTLIBRARIES .MAKE: $(am__recursive_targets) install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-libLTLIBRARIES install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ uninstall-libLTLIBRARIES .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) recheck: -cat /dev/null showcheckfailed: @-cat /dev/null showfailed: @-cat /dev/null showcheck: -cat /dev/null showsuite: -cat /dev/null # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/starpurm/src/starpurm.c000066400000000000000000001445141507764646700205060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include /* * #define _DEBUG */ struct s_starpurm_unit { /* Opaque unit id. * * For StarPU-RM, this id is used as an index to array starpurm->units[]. */ int id; /* Id of the unit type. */ int type; /* Boolean indicating whether the device is currently selected for use by the runtime system. */ int selected; /* StarPU id of the worker driving the device. */ int workerid; /* Cpuset of the StarPU worker. */ hwloc_cpuset_t worker_cpuset; /* Condition variable to notify that a unit is now available to driver a worker waking up. */ starpu_pthread_cond_t unit_available_cond; }; static struct s_starpurm *_starpurm = NULL; #if 0 static char *bitmap_to_str(hwloc_bitmap_t bitmap) { int strl = hwloc_bitmap_snprintf(NULL, 0, bitmap); char *str = malloc(strl+1); hwloc_bitmap_snprintf(str, strl+1, bitmap); return str; } #endif #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS enum e_starpurm_event { starpurm_event_code_min = 0, starpurm_event_exit = 0, starpurm_event_worker_going_to_sleep = 1, starpurm_event_worker_waking_up = 2, starpurm_event_unit_available = 3, starpurm_event_code_max = 3 }; const char *_starpurm_event_to_str(int event_code) { const char *s = NULL; switch (event_code) { case starpurm_event_exit: s = "starpurm_event_exit"; break; case starpurm_event_worker_going_to_sleep: s = "starpurm_event_worker_going_to_sleep"; break; case starpurm_event_worker_waking_up: s = "starpurm_event_worker_waking_up"; break; case starpurm_event_unit_available: s = "starpurm_event_unit_available"; break; default: s = ""; break; } return s; } struct s_starpurm_event { struct s_starpurm_event *next; struct s_starpurm_event *prev; enum e_starpurm_event code; int workerid; }; static void _enqueue_event(struct s_starpurm_event *event) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; assert(event->next == NULL); assert(event->prev == NULL); assert(event->code >= starpurm_event_code_min && event->code <= starpurm_event_code_max); STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); if (rm->event_processing_ended) { STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); return; } assert((rm->event_list_head == NULL && rm->event_list_tail == NULL) || (rm->event_list_head != NULL && rm->event_list_tail != NULL)); if (rm->event_list_head == NULL) { rm->event_list_tail = event; } else { rm->event_list_head->prev = event; } event->next = rm->event_list_head; rm->event_list_head = event; if (event->code == starpurm_event_exit) { rm->event_processing_ended = 1; int i; for (i=0; inunits; i++) { STARPU_PTHREAD_COND_BROADCAST(&rm->units[i].unit_available_cond); } } #ifdef STARPURM_VERBOSE if (event->code != starpurm_event_worker_waking_up) fprintf(stderr, "%s: event->code=%d('%s'), workerid=%u\n", __func__, event->code, _starpurm_event_to_str(event->code), event->workerid); #endif STARPU_PTHREAD_COND_BROADCAST(&rm->event_list_cond); #ifdef STARPURM_HAVE_DLB if (event->code == starpurm_event_worker_waking_up) { int unit_id = rm->worker_unit_ids[event->workerid]; /* if DLB is in use, wait for the unit to become available from the point of view of DLB, before using it */ #ifdef STARPURM_VERBOSE fprintf(stderr, "%s: event->code=%d('%s'), workerid=%u - waiting\n", __func__, event->code, _starpurm_event_to_str(event->code), event->workerid); #endif STARPU_PTHREAD_COND_WAIT(&rm->units[unit_id].unit_available_cond, &rm->event_list_mutex); #ifdef STARPURM_VERBOSE fprintf(stderr, "%s: event->code=%d('%s'), workerid=%u - wakeup\n", __func__, event->code, _starpurm_event_to_str(event->code), event->workerid); #endif } #endif STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); } static struct s_starpurm_event *_dequeue_event_no_lock(void) { struct s_starpurm *rm = _starpurm; struct s_starpurm_event *event = NULL; if (rm->event_list_tail != NULL) { event = rm->event_list_tail; if (event->prev == NULL) { rm->event_list_head = NULL; rm->event_list_tail = NULL; } else { event->prev->next = NULL; rm->event_list_tail = event->prev; } event->prev = NULL; event->next = NULL; } return event; } static struct s_starpurm_event *_wait_event_no_lock(void) { struct s_starpurm *rm = _starpurm; while (rm->event_list_head == NULL) { STARPU_PTHREAD_COND_WAIT(&rm->event_list_cond, &rm->event_list_mutex); } struct s_starpurm_event *event = _dequeue_event_no_lock(); return event; } /* unused */ static struct s_starpurm_event *_dequeue_event(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); struct s_starpurm_event *event = _dequeue_event_no_lock(); STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); return event; } /* unused */ static struct s_starpurm_event *_wait_event(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); struct s_starpurm_event *event = _wait_event_no_lock(); STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); return event; } static void _enqueue_exit_event(void) { struct s_starpurm_event *event = calloc(1, sizeof(*event)); event->code = starpurm_event_exit; event->workerid = 0; _enqueue_event(event); } static void callback_worker_going_to_sleep(int workerid) { struct s_starpurm_event *event = calloc(1, sizeof(*event)); event->code = starpurm_event_worker_going_to_sleep; event->workerid = workerid; _enqueue_event(event); } static void callback_worker_waking_up(int workerid) { struct s_starpurm_event *event = calloc(1, sizeof(*event)); event->code = starpurm_event_worker_waking_up; event->workerid = workerid; _enqueue_event(event); } void starpurm_enqueue_event_cpu_unit_available(int unit_id) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; assert(unit_id >= 0); /* * unit_id may exceed the number of CPU units actually used by StarPU, * if some CPU cores are not used. * * //assert(unit_id < rm->nunits_by_type[starpurm_unit_cpu]); */ int workerid = rm->units[unit_id].workerid; struct s_starpurm_event *event = calloc(1, sizeof(*event)); event->code = starpurm_event_unit_available; event->workerid = workerid; _enqueue_event(event); } static void *event_thread_func(void *_arg) { (void)_arg; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; int need_refresh = 0; STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); while (rm->event_processing_enabled == 0) { STARPU_PTHREAD_COND_WAIT(&rm->event_processing_cond, &rm->event_list_mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); hwloc_cpuset_t owned_cpuset = hwloc_bitmap_dup(rm->global_cpuset); hwloc_cpuset_t to_reclaim_cpuset = hwloc_bitmap_alloc(); hwloc_cpuset_t to_lend_cpuset = hwloc_bitmap_alloc(); while (1) { struct s_starpurm_event *event = _dequeue_event(); #ifdef STARPURM_HAVE_DLB if ((event == NULL || event->code == starpurm_event_exit) || need_refresh) #else if ((event == NULL || event->code == starpurm_event_exit) && need_refresh) #endif { int did_lend_cpuset = 1; #ifdef STARPURM_HAVE_DLB /* notify DLB about changes */ if (!hwloc_bitmap_iszero(to_reclaim_cpuset)) { starpurm_dlb_notify_starpu_worker_mask_waking_up(to_reclaim_cpuset); } did_lend_cpuset = 0; if (!hwloc_bitmap_iszero(to_lend_cpuset)) { did_lend_cpuset = starpurm_dlb_notify_starpu_worker_mask_going_to_sleep(to_lend_cpuset); } #endif /* if DLB is not initialized, ignore lend operations */ if (did_lend_cpuset) { hwloc_bitmap_andnot(owned_cpuset, owned_cpuset, to_lend_cpuset); } hwloc_bitmap_or(owned_cpuset, owned_cpuset, to_reclaim_cpuset); #if 0 { char *to_lend_str = bitmap_to_str(to_lend_cpuset); char *to_reclaim_str = bitmap_to_str(to_reclaim_cpuset); free(to_lend_str); free(to_reclaim_str); } #endif need_refresh = 0; hwloc_bitmap_zero(to_lend_cpuset); hwloc_bitmap_zero(to_reclaim_cpuset); } if (event == NULL) { event = _wait_event(); } if (event->code == starpurm_event_exit) { free(event); break; } /* TODO: accumulate state change */ switch (event->code) { case starpurm_event_worker_going_to_sleep: { if (event->workerid < rm->nunits) { int unit_id = rm->worker_unit_ids[event->workerid]; hwloc_bitmap_or(to_lend_cpuset, to_lend_cpuset, rm->units[unit_id].worker_cpuset); hwloc_bitmap_andnot(to_reclaim_cpuset, to_reclaim_cpuset, rm->units[unit_id].worker_cpuset); } } break; case starpurm_event_worker_waking_up: { if (event->workerid < rm->nunits) { int unit_id = rm->worker_unit_ids[event->workerid]; hwloc_bitmap_andnot(to_lend_cpuset, to_lend_cpuset, rm->units[unit_id].worker_cpuset); #ifdef STARPURM_HAVE_DLB if (rm->units[unit_id].type == starpurm_unit_cpu && !hwloc_bitmap_intersects(rm->units[unit_id].worker_cpuset, owned_cpuset)) { /* Only reclaim the unit from DLB if StarPU does not own it already. */ hwloc_bitmap_or(to_reclaim_cpuset, to_reclaim_cpuset, rm->units[unit_id].worker_cpuset); } else { STARPU_PTHREAD_COND_BROADCAST(&rm->units[unit_id].unit_available_cond); } #else hwloc_bitmap_or(to_reclaim_cpuset, to_reclaim_cpuset, rm->units[unit_id].worker_cpuset); #endif } } break; #ifdef STARPURM_HAVE_DLB case starpurm_event_unit_available: { if (event->workerid < rm->nunits) { /* a reclaimed unit is now available from DLB, unlock the corresponding worker waking up */ int unit_id = rm->worker_unit_ids[event->workerid]; STARPU_PTHREAD_COND_BROADCAST(&rm->units[unit_id].unit_available_cond); } } break; #endif default: /* unknown event code */ assert(0); break; } free(event); need_refresh = 1; } STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); /* exit event should be last */ assert(rm->event_list_head == NULL); assert(rm->event_list_tail == NULL); hwloc_bitmap_free(owned_cpuset); hwloc_bitmap_free(to_reclaim_cpuset); hwloc_bitmap_free(to_lend_cpuset); STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); return NULL; } #endif /* STARPURM_STARPU_HAVE_WORKER_CALLBACKS */ /* Resource enforcement */ static starpurm_drs_ret_t _starpurm_update_cpuset(hwloc_cpuset_t cpuset) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (hwloc_bitmap_isequal(cpuset, rm->selected_cpuset)) { return starpurm_DRS_SUCCESS; } STARPU_PTHREAD_MUTEX_LOCK(&rm->temporary_ctxs_mutex); if (rm->starpu_in_pause) { starpu_resume(); rm->starpu_in_pause = 0; } int workers_to_remove[_starpurm->nunits]; int nworkers_to_remove = 0; int workers_to_add[_starpurm->nunits]; int nworkers_to_add = 0; int i; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_alloc(); int new_selected_ncpus = 0; for (i=0; inunits; i++) { struct s_starpurm_unit *unit = &rm->units[i]; hwloc_bitmap_and(temp_cpuset, unit->worker_cpuset, cpuset); if (hwloc_bitmap_iszero(temp_cpuset)) { workers_to_remove[nworkers_to_remove] = unit->workerid; unit->selected = 0; nworkers_to_remove++; } else { workers_to_add[nworkers_to_add] = unit->workerid; unit->selected = 1; nworkers_to_add++; if (unit->type == starpurm_unit_cpu) { new_selected_ncpus++; } } } hwloc_bitmap_free(temp_cpuset); rm->selected_nworkers = nworkers_to_add; rm->selected_ncpus = new_selected_ncpus; hwloc_bitmap_free(rm->selected_cpuset); rm->selected_cpuset = hwloc_bitmap_dup(cpuset); if (nworkers_to_add > 0) { #if defined(STARPURM_HAVE_DLB) && !defined(STARPURM_STARPU_HAVE_WORKER_CALLBACKS) { /* if StarPU worker callbacks are not enabled, we still * notify DLB about resource usage changes, but we do * not wait for the formal DLB go to use the units */ hwloc_cpuset_t to_reclaim_cpuset = hwloc_bitmap_alloc(); for (i=0; iworker_unit_ids[workers_to_add[i]]; hwloc_bitmap_or(to_reclaim_cpuset, to_reclaim_cpuset, rm->units[unit_id].worker_cpuset); } starpurm_dlb_notify_starpu_worker_mask_waking_up(to_reclaim_cpuset); hwloc_bitmap_free(to_reclaim_cpuset); } #endif starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, rm->sched_ctx_id); } if (nworkers_to_remove > 0) { starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, rm->sched_ctx_id); #if defined(STARPURM_HAVE_DLB) && !defined(STARPURM_STARPU_HAVE_WORKER_CALLBACKS) { /* if StarPU worker callbacks are not enabled, we still * notify DLB about resource usage changes, but we do * not wait for the workers to become idle */ hwloc_cpuset_t to_lend_cpuset = hwloc_bitmap_alloc(); for (i=0; iworker_unit_ids[workers_to_remove[i]]; hwloc_bitmap_or(to_lend_cpuset, to_lend_cpuset, rm->units[unit_id].worker_cpuset); } starpurm_dlb_notify_starpu_worker_mask_going_to_sleep(to_lend_cpuset); hwloc_bitmap_free(to_lend_cpuset); } #endif } #ifdef _DEBUG starpu_sched_ctx_display_workers(rm->sched_ctx_id, stderr); #endif /* _DEBUG */ if (rm->selected_nworkers == 0 && rm->avail_temporary_ctxs == rm->max_temporary_ctxs) { rm->starpu_in_pause = 1; starpu_pause(); } STARPU_PTHREAD_MUTEX_UNLOCK(&rm->temporary_ctxs_mutex); return starpurm_DRS_SUCCESS; } static unsigned _starpurm_temporary_context_alloc(hwloc_cpuset_t cpuset) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); assert(_starpurm->max_temporary_ctxs > 0); struct s_starpurm *rm = _starpurm; STARPU_PTHREAD_MUTEX_LOCK(&rm->temporary_ctxs_mutex); while(rm->avail_temporary_ctxs == 0) { STARPU_PTHREAD_COND_WAIT(&rm->temporary_ctxs_cond, &rm->temporary_ctxs_mutex); } assert(rm->avail_temporary_ctxs > 0); rm->avail_temporary_ctxs--; if (rm->starpu_in_pause) { starpu_resume(); rm->starpu_in_pause = 0; } STARPU_PTHREAD_MUTEX_UNLOCK(&rm->temporary_ctxs_mutex); unsigned sched_ctx_id = starpu_sched_ctx_create(NULL, -1, "starpurm_temp", STARPU_SCHED_CTX_POLICY_NAME, "eager", 0); assert(sched_ctx_id != STARPU_NMAX_SCHED_CTXS); int workers_to_remove[_starpurm->nunits]; int nworkers_to_remove = 0; int workers_to_add[_starpurm->nunits]; int nworkers_to_add = 0; int i; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_alloc(); for (i=0; inunits; i++) { struct s_starpurm_unit *unit = &rm->units[i]; hwloc_bitmap_and(temp_cpuset, unit->worker_cpuset, cpuset); if (hwloc_bitmap_iszero(temp_cpuset)) { workers_to_remove[nworkers_to_remove] = unit->workerid; nworkers_to_remove++; } else { workers_to_add[nworkers_to_add] = unit->workerid; nworkers_to_add++; } } hwloc_bitmap_free(temp_cpuset); if (nworkers_to_add > 0) starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, sched_ctx_id); if (nworkers_to_remove > 0) starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, sched_ctx_id); #ifdef _DEBUG starpu_sched_ctx_display_workers(sched_ctx_id, stderr); #endif /* _DEBUG */ return sched_ctx_id; } static void _starpurm_temporary_context_free(unsigned ctx) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); assert(_starpurm->max_temporary_ctxs > 0); struct s_starpurm *rm = _starpurm; starpu_sched_ctx_delete(ctx); STARPU_PTHREAD_MUTEX_LOCK(&rm->temporary_ctxs_mutex); rm->avail_temporary_ctxs++; STARPU_PTHREAD_COND_SIGNAL(&rm->temporary_ctxs_cond); if (rm->selected_nworkers == 0 && rm->avail_temporary_ctxs == rm->max_temporary_ctxs) { rm->starpu_in_pause = 1; starpu_pause(); } STARPU_PTHREAD_MUTEX_UNLOCK(&rm->temporary_ctxs_mutex); } static starpurm_drs_ret_t _starpurm_set_ncpus(int ncpus) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; int i; if (ncpus > rm->nunits_by_type[starpurm_unit_cpu]) { ncpus = rm->nunits_by_type[starpurm_unit_cpu]; } if (ncpus == rm->selected_ncpus) { return starpurm_DRS_SUCCESS; } STARPU_PTHREAD_MUTEX_LOCK(&rm->temporary_ctxs_mutex); if (rm->starpu_in_pause) { starpu_resume(); rm->starpu_in_pause = 0; } int workers_to_remove[_starpurm->nunits]; int nworkers_to_remove = 0; int workers_to_add[_starpurm->nunits]; int nworkers_to_add = 0; for (i=0; inunits; i++) { struct s_starpurm_unit *unit = &rm->units[i]; if (unit->type != starpurm_unit_cpu) continue; if (nworkers_to_add < ncpus) { workers_to_add[nworkers_to_add] = unit->workerid; unit->selected = 1; nworkers_to_add++; hwloc_bitmap_or(rm->selected_cpuset, rm->selected_cpuset, unit->worker_cpuset); } else { workers_to_remove[nworkers_to_remove] = unit->workerid; unit->selected = 0; hwloc_bitmap_andnot(rm->selected_cpuset, rm->selected_cpuset, unit->worker_cpuset); nworkers_to_remove++; } } rm->selected_nworkers = nworkers_to_add; rm->selected_ncpus = nworkers_to_add; if (nworkers_to_add > 0) starpu_sched_ctx_add_workers(workers_to_add, nworkers_to_add, rm->sched_ctx_id); if (nworkers_to_remove > 0) starpu_sched_ctx_remove_workers(workers_to_remove, nworkers_to_remove, rm->sched_ctx_id); #ifdef _DEBUG starpu_sched_ctx_display_workers(rm->sched_ctx_id, stderr); #endif /* _DEBUG */ if (rm->selected_nworkers == 0 && rm->avail_temporary_ctxs == rm->max_temporary_ctxs) { rm->starpu_in_pause = 1; starpu_pause(); } STARPU_PTHREAD_MUTEX_UNLOCK(&rm->temporary_ctxs_mutex); return starpurm_DRS_SUCCESS; } /* Initialize rm state for StarPU */ void starpurm_initialize_with_cpuset(const hwloc_cpuset_t initially_owned_cpuset) { int ret; assert(_starpurm == NULL); struct s_starpurm *rm = calloc(1, sizeof(*rm)); STARPU_PTHREAD_MUTEX_INIT(&rm->temporary_ctxs_mutex, NULL); STARPU_PTHREAD_COND_INIT(&rm->temporary_ctxs_cond, NULL); rm->state = state_init; /* init hwloc objects */ ret = hwloc_topology_init(&rm->topology); STARPU_ASSERT_MSG(ret == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); ret = hwloc_topology_load(rm->topology); STARPU_ASSERT_MSG(ret == 0, "Could not load Hwloc topology (%s)\n", strerror(errno)); rm->global_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_zero(rm->global_cpuset); rm->initially_owned_cpuset_mask = hwloc_bitmap_dup(initially_owned_cpuset); rm->all_cpu_workers_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_zero(rm->all_cpu_workers_cpuset); rm->all_opencl_device_workers_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_zero(rm->all_opencl_device_workers_cpuset); rm->all_cuda_device_workers_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_zero(rm->all_cuda_device_workers_cpuset); rm->all_device_workers_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_zero(rm->all_device_workers_cpuset); /* init event list, before StarPU is initialized */ STARPU_PTHREAD_MUTEX_INIT(&rm->event_list_mutex, NULL); STARPU_PTHREAD_COND_INIT(&rm->event_list_cond, NULL); STARPU_PTHREAD_COND_INIT(&rm->event_processing_cond, NULL); STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); rm->event_processing_enabled = 0; rm->event_processing_ended = 0; rm->event_list_head = NULL; rm->event_list_tail = NULL; STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); /* set _starpurm here since StarPU's callbacks may reference it once starpu_init is called */ _starpurm = rm; #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS /* launch event thread */ ret = pthread_create(&rm->event_thread, NULL, event_thread_func, rm); assert(ret == 0); #endif /* init StarPU */ struct starpu_conf starpu_conf; ret = starpu_conf_init(&starpu_conf); assert(ret == 0); #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS starpu_conf.callback_worker_going_to_sleep = callback_worker_going_to_sleep; starpu_conf.callback_worker_waking_up = callback_worker_waking_up; #endif ret = starpu_init(&starpu_conf); assert(ret == 0); /* init any worker objects */ rm->nunits = starpu_worker_get_count_by_type(STARPU_ANY_WORKER); /* init device worker objects */ rm->unit_ntypes = starpurm_unit_ntypes; rm->nunits_by_type = calloc(rm->unit_ntypes, sizeof(*rm->nunits_by_type)); rm->unit_offsets_by_type = calloc(rm->unit_ntypes, sizeof(*rm->unit_offsets_by_type)); const int cpu_nunits = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); rm->nunits_by_type[starpurm_unit_cpu] = cpu_nunits; const int opencl_nunits = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); rm->nunits_by_type[starpurm_unit_opencl] = opencl_nunits; const int cuda_nunits = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); rm->nunits_by_type[starpurm_unit_cuda] = cuda_nunits; const int nunits = cpu_nunits + opencl_nunits + cuda_nunits; rm->nunits = nunits; rm->units = calloc(nunits, sizeof(*rm->units)); int unitid = 0; int cpu_workerids[cpu_nunits]; starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, cpu_workerids, cpu_nunits); rm->unit_offsets_by_type[starpurm_unit_cpu] = unitid; int max_worker_id = 0; int i; for (i = 0; i < cpu_nunits; i++) { rm->units[unitid].id = unitid; rm->units[unitid].type = starpurm_unit_cpu; rm->units[unitid].selected = 1; /* enabled by default */ rm->units[unitid].workerid = cpu_workerids[i]; if (max_worker_id < rm->units[unitid].workerid) { max_worker_id = rm->units[unitid].workerid; } rm->units[unitid].worker_cpuset = starpu_worker_get_hwloc_cpuset(rm->units[unitid].workerid); STARPU_PTHREAD_COND_INIT(&rm->units[unitid].unit_available_cond, NULL); hwloc_bitmap_or(rm->global_cpuset, rm->global_cpuset, rm->units[unitid].worker_cpuset); hwloc_bitmap_or(rm->all_cpu_workers_cpuset, rm->all_cpu_workers_cpuset, rm->units[unitid].worker_cpuset);; #ifdef STARPURM_VERBOSE { char * s_unit = NULL; hwloc_bitmap_asprintf(&s_unit, rm->units[unitid].worker_cpuset); fprintf(stderr, "%s: 'cpu', unitid=%d, cpuset=0x%s, workerid=%d\n", __func__, unitid, s_unit, rm->units[unitid].workerid); free(s_unit); } #endif unitid++; } int opencl_workerids[opencl_nunits]; starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, opencl_workerids, opencl_nunits); rm->unit_offsets_by_type[starpurm_unit_opencl] = unitid; for (i = 0; i < opencl_nunits; i++) { rm->units[unitid].id = unitid; rm->units[unitid].type = starpurm_unit_opencl; rm->units[unitid].selected = 1; /* enabled by default */ rm->units[unitid].workerid = opencl_workerids[i]; if (max_worker_id < rm->units[unitid].workerid) { max_worker_id = rm->units[unitid].workerid; } rm->units[unitid].worker_cpuset = starpu_worker_get_hwloc_cpuset(rm->units[unitid].workerid); STARPU_PTHREAD_COND_INIT(&rm->units[unitid].unit_available_cond, NULL); hwloc_bitmap_or(rm->global_cpuset, rm->global_cpuset, rm->units[unitid].worker_cpuset); hwloc_bitmap_or(rm->all_opencl_device_workers_cpuset, rm->all_opencl_device_workers_cpuset, rm->units[unitid].worker_cpuset); hwloc_bitmap_or(rm->all_device_workers_cpuset, rm->all_device_workers_cpuset, rm->units[unitid].worker_cpuset); unitid++; } int cuda_workerids[opencl_nunits]; starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, cuda_workerids, cuda_nunits); rm->unit_offsets_by_type[starpurm_unit_cuda] = unitid; for (i = 0; i < cuda_nunits; i++) { rm->units[unitid].id = unitid; rm->units[unitid].type = starpurm_unit_cuda; rm->units[unitid].selected = 1; /* enabled by default */ rm->units[unitid].workerid = cuda_workerids[i]; if (max_worker_id < rm->units[unitid].workerid) { max_worker_id = rm->units[unitid].workerid; } rm->units[unitid].worker_cpuset = starpu_worker_get_hwloc_cpuset(rm->units[unitid].workerid); STARPU_PTHREAD_COND_INIT(&rm->units[unitid].unit_available_cond, NULL); hwloc_bitmap_or(rm->global_cpuset, rm->global_cpuset, rm->units[unitid].worker_cpuset); hwloc_bitmap_or(rm->all_cuda_device_workers_cpuset, rm->all_cuda_device_workers_cpuset, rm->units[unitid].worker_cpuset); hwloc_bitmap_or(rm->all_device_workers_cpuset, rm->all_device_workers_cpuset, rm->units[unitid].worker_cpuset); unitid++; } rm->max_worker_id = max_worker_id; { int *worker_unit_ids = malloc((max_worker_id+1) * sizeof(*worker_unit_ids)); for (i = 0; i < max_worker_id+1; i++) { worker_unit_ids[i] = -1; } for (i=0; inunits; i++) { worker_unit_ids[rm->units[i].workerid] = i; } rm->worker_unit_ids = worker_unit_ids; } /* create StarPU sched_ctx for RM instance */ { int workerids[rm->nunits]; starpu_worker_get_ids_by_type(STARPU_ANY_WORKER, workerids, rm->nunits); /* TODO: make sched_ctx policy configurable */ rm->sched_ctx_id = starpu_sched_ctx_create(workerids, rm->nunits, "starpurm", STARPU_SCHED_CTX_POLICY_NAME, "eager", 0); #ifdef _DEBUG starpu_sched_ctx_display_workers(rm->sched_ctx_id, stderr); #endif /* _DEBUG */ } starpu_sched_ctx_set_context(&rm->sched_ctx_id); /* number selected workers (total) */ rm->selected_nworkers = rm->nunits; /* number of selected CPUs workers */ rm->selected_ncpus = rm->nunits_by_type[starpurm_unit_cpu]; /* cpuset of all currently selected workers */ rm->selected_cpuset = hwloc_bitmap_dup(rm->global_cpuset); if (STARPU_NMAX_SCHED_CTXS > 2) { /* account for main ctx (0) and default rm ctx (1) * TODO: check that no other ctxs are allocated by external codes */ rm->max_temporary_ctxs = STARPU_NMAX_SCHED_CTXS - 2; } else { rm->max_temporary_ctxs = 0; } rm->avail_temporary_ctxs = rm->max_temporary_ctxs; if (rm->selected_nworkers == 0) { rm->starpu_in_pause = 1; starpu_pause(); } else { rm->starpu_in_pause = 0; } #ifdef STARPURM_HAVE_DLB starpurm_dlb_init(rm); #endif STARPU_PTHREAD_MUTEX_LOCK(&rm->event_list_mutex); rm->event_processing_enabled = 1; STARPU_PTHREAD_COND_BROADCAST(&rm->event_processing_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&rm->event_list_mutex); _starpurm = rm; } void starpurm_initialize() { hwloc_cpuset_t full_cpuset = hwloc_bitmap_alloc_full(); starpurm_initialize_with_cpuset(full_cpuset); hwloc_bitmap_free(full_cpuset); } /* Free rm struct for StarPU */ void starpurm_shutdown(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (rm->starpu_in_pause) { starpu_resume(); rm->starpu_in_pause = 0; } starpu_sched_ctx_delete(rm->sched_ctx_id); #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS _enqueue_exit_event(); #endif starpu_shutdown(); #ifdef STARPURM_HAVE_DLB starpurm_dlb_exit(); #endif hwloc_topology_destroy(rm->topology); #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS STARPU_PTHREAD_JOIN(rm->event_thread, NULL); #endif assert(rm->event_list_head == NULL); assert(rm->event_list_tail == NULL); STARPU_PTHREAD_COND_DESTROY(&rm->event_list_cond); STARPU_PTHREAD_MUTEX_DESTROY(&rm->event_list_mutex); rm->state = state_uninitialized; hwloc_bitmap_free(rm->global_cpuset); hwloc_bitmap_free(rm->all_cpu_workers_cpuset); hwloc_bitmap_free(rm->all_opencl_device_workers_cpuset); hwloc_bitmap_free(rm->all_cuda_device_workers_cpuset); hwloc_bitmap_free(rm->all_device_workers_cpuset); hwloc_bitmap_free(rm->selected_cpuset); hwloc_bitmap_free(rm->initially_owned_cpuset_mask); int i; for (i=0; inunits; i++) { STARPU_PTHREAD_COND_DESTROY(&rm->units[i].unit_available_cond); } free(rm->units); rm->units = NULL; free(rm->nunits_by_type); rm->nunits_by_type = NULL; free(rm->unit_offsets_by_type); rm->unit_offsets_by_type = NULL; free(rm); _starpurm = NULL; } void starpurm_spawn_kernel_on_cpus(void *data, void(*f)(void *), void *args, hwloc_cpuset_t cpuset) { (void) data; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; unsigned ctx = _starpurm_temporary_context_alloc(cpuset); starpu_sched_ctx_set_context(&ctx); f(args); starpu_sched_ctx_set_context(&rm->sched_ctx_id); _starpurm_temporary_context_free(ctx); } struct s_starpurm__spawn_args { void(*f)(void *); void *args; void(*cb_f)(void *); void *cb_args; hwloc_cpuset_t cpuset; }; static void *_starpurm_spawn_kernel_thread(void *_spawn_args) { struct s_starpurm__spawn_args *spawn_args = _spawn_args; unsigned ctx = _starpurm_temporary_context_alloc(spawn_args->cpuset); starpu_sched_ctx_set_context(&ctx); spawn_args->f(spawn_args->args); struct s_starpurm *rm = _starpurm; starpu_sched_ctx_set_context(&rm->sched_ctx_id); _starpurm_temporary_context_free(ctx); spawn_args->cb_f(spawn_args->cb_args); hwloc_bitmap_free(spawn_args->cpuset); free(spawn_args); return NULL; } void starpurm_spawn_kernel_on_cpus_callback(void *data, void(*f)(void *), void *args, hwloc_cpuset_t cpuset, void(*cb_f)(void *), void *cb_args) { (void) data; struct s_starpurm__spawn_args *spawn_args = calloc(1, sizeof(*spawn_args)); spawn_args->f = f; spawn_args->args = args; spawn_args->cb_f = cb_f; spawn_args->cb_args = cb_args; spawn_args->cpuset = hwloc_bitmap_dup(cpuset); pthread_attr_t attr; int ret; ret = pthread_attr_init(&attr); assert(ret == 0); ret = pthread_attr_setdetachstate(&attr, 1); assert(ret == 0); pthread_t t; ret = pthread_create(&t, &attr, _starpurm_spawn_kernel_thread, spawn_args); assert(ret == 0); } static void *_starpurm_spawn_kernel_in_default_context_thread(void *_spawn_args) { struct s_starpurm__spawn_args *spawn_args = _spawn_args; struct s_starpurm *rm = _starpurm; starpu_sched_ctx_set_context(&rm->sched_ctx_id); spawn_args->f(spawn_args->args); spawn_args->cb_f(spawn_args->cb_args); free(spawn_args); return NULL; } void starpurm_spawn_kernel_callback(void *data, void(*f)(void *), void *args, void(*cb_f)(void *), void *cb_args) { (void) data; struct s_starpurm__spawn_args *spawn_args = calloc(1, sizeof(*spawn_args)); spawn_args->f = f; spawn_args->args = args; spawn_args->cb_f = cb_f; spawn_args->cb_args = cb_args; pthread_attr_t attr; int ret; ret = pthread_attr_init(&attr); assert(ret == 0); ret = pthread_attr_setdetachstate(&attr, 1); assert(ret == 0); pthread_t t; ret = pthread_create(&t, &attr, _starpurm_spawn_kernel_in_default_context_thread, spawn_args); assert(ret == 0); } hwloc_cpuset_t starpurm_get_unit_cpuset(int unitid) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; assert(unitid >= 0 && unitid < rm->nunits); return hwloc_bitmap_dup(rm->units[unitid].worker_cpuset); } hwloc_cpuset_t starpurm_get_cpu_worker_cpuset(int unit_rank) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; assert(unit_rank >= 0 && unit_rank < rm->nunits_by_type[starpurm_unit_cpu]); return hwloc_bitmap_dup(rm->units[rm->unit_offsets_by_type[starpurm_unit_cpu] + unit_rank].worker_cpuset); } /* Dynamic resource sharing */ starpurm_drs_ret_t starpurm_set_drs_enable(starpurm_drs_desc_t *spd) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; rm->dynamic_resource_sharing = 1; return starpurm_DRS_SUCCESS; } starpurm_drs_ret_t starpurm_set_drs_disable(starpurm_drs_desc_t *spd) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; rm->dynamic_resource_sharing = 0; return starpurm_DRS_SUCCESS; } int starpurm_drs_enabled_p(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; return rm->dynamic_resource_sharing; } starpurm_drs_ret_t starpurm_set_max_parallelism(starpurm_drs_desc_t *spd, int ncpus) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; if (ncpus > rm->nunits_by_type[starpurm_unit_cpu]) { ncpus = rm->nunits_by_type[starpurm_unit_cpu]; } rm->max_ncpus = ncpus; if (rm->selected_ncpus > ncpus) { return _starpurm_set_ncpus(ncpus); } return starpurm_DRS_SUCCESS; } starpurm_drs_ret_t starpurm_callback_set(starpurm_drs_desc_t *spd, starpurm_drs_cbs_t which, starpurm_drs_cb_t callback) { (void)spd; (void)which; (void)callback; /* unimplemented */ assert(0); return starpurm_DRS_PERM; } starpurm_drs_ret_t starpurm_callback_get(starpurm_drs_desc_t *spd, starpurm_drs_cbs_t which, starpurm_drs_cb_t *callback) { (void)spd; (void)which; (void)callback; /* unimplemented */ assert(0); return starpurm_DRS_PERM; } starpurm_drs_ret_t starpurm_assign_cpu_to_starpu(starpurm_drs_desc_t *spd, int cpuid) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; starpurm_drs_ret_t ret = 0; assert(hwloc_bitmap_isset(rm->global_cpuset, cpuid)); if (!hwloc_bitmap_isset(rm->selected_cpuset, cpuid)) { hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); hwloc_bitmap_set(temp_cpuset, cpuid); ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); } return ret; } starpurm_drs_ret_t starpurm_assign_cpus_to_starpu(starpurm_drs_desc_t *spd, int ncpus) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; /* add ncpus more CPUs to the CPUs pool */ return _starpurm_set_ncpus(rm->selected_ncpus+ncpus); } starpurm_drs_ret_t starpurm_assign_cpu_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); hwloc_bitmap_or(temp_cpuset, temp_cpuset, mask); starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); return ret; } starpurm_drs_ret_t starpurm_assign_all_cpus_to_starpu(starpurm_drs_desc_t *spd) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; return starpurm_assign_cpus_to_starpu(spd, rm->nunits_by_type[starpurm_unit_cpu]); } starpurm_drs_ret_t starpurm_withdraw_cpu_from_starpu(starpurm_drs_desc_t *spd, int cpuid) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; starpurm_drs_ret_t ret = 0; assert(hwloc_bitmap_isset(rm->global_cpuset, cpuid)); if (hwloc_bitmap_isset(rm->selected_cpuset, cpuid)) { hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); hwloc_bitmap_clr(temp_cpuset, cpuid); ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); } return ret; } starpurm_drs_ret_t starpurm_withdraw_cpus_from_starpu(starpurm_drs_desc_t *spd, int ncpus) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; /* add ncpus more CPUs to the CPUs pool */ starpurm_drs_ret_t ret = 0; if (ncpus <= rm->nunits_by_type[starpurm_unit_cpu]) { ret = _starpurm_set_ncpus(rm->nunits_by_type[starpurm_unit_cpu]-ncpus); } else { ret = _starpurm_set_ncpus(0); } return ret; } starpurm_drs_ret_t starpurm_withdraw_cpu_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); hwloc_bitmap_andnot(temp_cpuset, temp_cpuset, mask); starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); return ret; } starpurm_drs_ret_t starpurm_withdraw_all_cpus_from_starpu(starpurm_drs_desc_t *spd) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; return starpurm_withdraw_cpus_from_starpu(spd, rm->nunits_by_type[starpurm_unit_cpu]); } /* --- */ starpurm_drs_ret_t starpurm_lend_cpu(starpurm_drs_desc_t *spd, int cpuid) { return starpurm_assign_cpu_to_starpu(spd, cpuid); } starpurm_drs_ret_t starpurm_lend_cpus(starpurm_drs_desc_t *spd, int ncpus) { return starpurm_assign_cpus_to_starpu(spd, ncpus); } starpurm_drs_ret_t starpurm_lend_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { return starpurm_assign_cpu_mask_to_starpu(spd, mask); } starpurm_drs_ret_t starpurm_lend(starpurm_drs_desc_t *spd) { return starpurm_assign_all_cpus_to_starpu(spd); } starpurm_drs_ret_t starpurm_reclaim_cpu(starpurm_drs_desc_t *spd, int cpuid) { return starpurm_withdraw_cpu_from_starpu(spd, cpuid); } starpurm_drs_ret_t starpurm_reclaim_cpus(starpurm_drs_desc_t *spd, int ncpus) { return starpurm_withdraw_cpus_from_starpu(spd, ncpus); } starpurm_drs_ret_t starpurm_reclaim_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { return starpurm_withdraw_cpu_mask_from_starpu(spd, mask); } starpurm_drs_ret_t starpurm_reclaim(starpurm_drs_desc_t *spd) { return starpurm_withdraw_all_cpus_from_starpu(spd); } starpurm_drs_ret_t starpurm_acquire(starpurm_drs_desc_t *spd) { return starpurm_withdraw_all_cpus_from_starpu(spd); } starpurm_drs_ret_t starpurm_acquire_cpu(starpurm_drs_desc_t *spd, int cpuid) { return starpurm_withdraw_cpu_from_starpu(spd, cpuid); } starpurm_drs_ret_t starpurm_acquire_cpus(starpurm_drs_desc_t *spd, int ncpus) { return starpurm_withdraw_cpus_from_starpu(spd, ncpus); } starpurm_drs_ret_t starpurm_acquire_cpu_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { return starpurm_withdraw_cpu_mask_from_starpu(spd, mask); } starpurm_drs_ret_t starpurm_return_all(starpurm_drs_desc_t *spd) { return starpurm_assign_all_cpus_to_starpu(spd); } starpurm_drs_ret_t starpurm_return_cpu(starpurm_drs_desc_t *spd, int cpuid) { return starpurm_assign_cpu_to_starpu(spd, cpuid); } /* Pause/resume */ starpurm_drs_ret_t starpurm_create_block_condition(starpurm_block_cond_t *cond) { /* unimplemented */ (void)cond; assert(0); return starpurm_DRS_PERM; } void starpurm_block_current_task(starpurm_block_cond_t *cond) { /* unimplemented */ (void)cond; assert(0); } void starpurm_signal_block_condition(starpurm_block_cond_t *cond) { /* unimplemented */ (void)cond; assert(0); } void starpurm_register_polling_service(const char *service_name, starpurm_polling_t function, void *data) { /* unimplemented */ (void)service_name; (void)function; (void)data; assert(0); } void starpurm_unregister_polling_service(const char *service_name, starpurm_polling_t function, void *data) { /* unimplemented */ (void)service_name; (void)function; (void)data; assert(0); } /* devices */ int starpurm_get_device_type_id(const char *type_str) { if (strcmp(type_str, "cpu") == 0) return starpurm_unit_cpu; if (strcmp(type_str, "opencl") == 0) return starpurm_unit_opencl; if (strcmp(type_str, "cuda") == 0) return starpurm_unit_cuda; return -1; } const char *starpurm_get_device_type_name(int type_id) { if (type_id == starpurm_unit_cpu) return "cpu"; if (type_id == starpurm_unit_opencl) return "opencl"; if (type_id == starpurm_unit_cuda) return "cuda"; return NULL; } int starpurm_get_nb_devices_by_type(int type_id) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (type_id < 0 || type_id >= starpurm_unit_ntypes) return -1; return rm->nunits_by_type[type_id]; } int starpurm_get_device_id(int type_id, int unit_rank) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (type_id < 0 || type_id >= starpurm_unit_ntypes) return -1; if (unit_rank < 0 || unit_rank >= rm->nunits_by_type[type_id]) return -1; return rm->units[rm->unit_offsets_by_type[type_id] + unit_rank].id; } starpurm_drs_ret_t starpurm_assign_device_to_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; if (type_id < 0 || type_id >= starpurm_unit_ntypes) return starpurm_DRS_EINVAL; if (unit_rank < 0 || unit_rank >= rm->nunits_by_type[type_id]) return starpurm_DRS_EINVAL; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); hwloc_bitmap_or(temp_cpuset, temp_cpuset, rm->units[rm->unit_offsets_by_type[type_id] + unit_rank].worker_cpuset); starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); return ret; } starpurm_drs_ret_t starpurm_assign_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; if (type_id < 0 || type_id >= starpurm_unit_ntypes) return starpurm_DRS_EINVAL; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); if (ndevices > rm->nunits_by_type[type_id]) { ndevices = rm->nunits_by_type[type_id]; } int i; for (i = 0; i < ndevices; i++) { hwloc_bitmap_or(temp_cpuset, temp_cpuset, rm->units[rm->unit_offsets_by_type[type_id] + i].worker_cpuset); } starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); return ret; } starpurm_drs_ret_t starpurm_assign_device_mask_to_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); hwloc_bitmap_or(temp_cpuset, temp_cpuset, mask); starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); return ret; } starpurm_drs_ret_t starpurm_assign_all_devices_to_starpu(starpurm_drs_desc_t *spd, int type_id) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; if (type_id < 0 || type_id >= starpurm_unit_ntypes) return starpurm_DRS_EINVAL; return starpurm_assign_devices_to_starpu(spd, type_id, rm->nunits_by_type[type_id]); } starpurm_drs_ret_t starpurm_withdraw_device_from_starpu(starpurm_drs_desc_t *spd, int type_id, int unit_rank) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; if (type_id < 0 || type_id >= starpurm_unit_ntypes) return starpurm_DRS_EINVAL; if (unit_rank < 0 || unit_rank >= rm->nunits_by_type[type_id]) return starpurm_DRS_EINVAL; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); hwloc_bitmap_andnot(temp_cpuset, temp_cpuset, rm->units[rm->unit_offsets_by_type[type_id] + unit_rank].worker_cpuset); starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); return ret; } starpurm_drs_ret_t starpurm_withdraw_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id, int ndevices) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; if (type_id < 0 || type_id >= starpurm_unit_ntypes) return starpurm_DRS_EINVAL; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); if (ndevices > rm->nunits_by_type[type_id]) { ndevices = rm->nunits_by_type[type_id]; } int i; for (i = 0; i < ndevices; i++) { hwloc_bitmap_andnot(temp_cpuset, temp_cpuset, rm->units[rm->unit_offsets_by_type[type_id] + i].worker_cpuset); } starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); return ret; } starpurm_drs_ret_t starpurm_withdraw_device_mask_from_starpu(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { (void)spd; assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; hwloc_cpuset_t temp_cpuset = hwloc_bitmap_dup(rm->selected_cpuset); hwloc_bitmap_andnot(temp_cpuset, temp_cpuset, mask); starpurm_drs_ret_t ret = _starpurm_update_cpuset(temp_cpuset); hwloc_bitmap_free(temp_cpuset); return ret; } starpurm_drs_ret_t starpurm_withdraw_all_devices_from_starpu(starpurm_drs_desc_t *spd, int type_id) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; if (!rm->dynamic_resource_sharing) return starpurm_DRS_DISABLD; if (type_id < 0 || type_id >= starpurm_unit_ntypes) return starpurm_DRS_EINVAL; return starpurm_withdraw_devices_from_starpu(spd, type_id, rm->nunits_by_type[type_id]); } /* --- */ starpurm_drs_ret_t starpurm_lend_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank) { return starpurm_assign_device_to_starpu(spd, type_id, unit_rank); } starpurm_drs_ret_t starpurm_lend_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices) { return starpurm_assign_devices_to_starpu(spd, type_id, ndevices); } starpurm_drs_ret_t starpurm_lend_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { return starpurm_assign_device_mask_to_starpu(spd, mask); } starpurm_drs_ret_t starpurm_lend_all_devices(starpurm_drs_desc_t *spd, int type_id) { return starpurm_assign_all_devices_to_starpu(spd, type_id); } starpurm_drs_ret_t starpurm_reclaim_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank) { return starpurm_withdraw_device_from_starpu(spd, type_id, unit_rank); } starpurm_drs_ret_t starpurm_reclaim_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices) { return starpurm_withdraw_devices_from_starpu(spd, type_id, ndevices); } starpurm_drs_ret_t starpurm_reclaim_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { return starpurm_withdraw_device_mask_from_starpu(spd, mask); } starpurm_drs_ret_t starpurm_reclaim_all_devices(starpurm_drs_desc_t *spd, int type_id) { return starpurm_withdraw_all_devices_from_starpu(spd, type_id); } starpurm_drs_ret_t starpurm_acquire_all_devices(starpurm_drs_desc_t *spd, int type_id) { return starpurm_withdraw_all_devices_from_starpu(spd, type_id); } starpurm_drs_ret_t starpurm_acquire_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank) { return starpurm_withdraw_device_from_starpu(spd, type_id, unit_rank); } starpurm_drs_ret_t starpurm_acquire_devices(starpurm_drs_desc_t *spd, int type_id, int ndevices) { return starpurm_withdraw_devices_from_starpu(spd, type_id, ndevices); } starpurm_drs_ret_t starpurm_acquire_device_mask(starpurm_drs_desc_t *spd, const hwloc_cpuset_t mask) { return starpurm_withdraw_device_mask_from_starpu(spd, mask); } starpurm_drs_ret_t starpurm_return_all_devices(starpurm_drs_desc_t *spd, int type_id) { return starpurm_assign_all_devices_to_starpu(spd, type_id); } starpurm_drs_ret_t starpurm_return_device(starpurm_drs_desc_t *spd, int type_id, int unit_rank) { return starpurm_assign_device_to_starpu(spd, type_id, unit_rank); } /* cpusets */ hwloc_cpuset_t starpurm_get_device_worker_cpuset(int type_id, int unit_rank) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; assert(type_id >= 0 && type_id < starpurm_unit_ntypes); assert(unit_rank >= 0 && unit_rank < rm->nunits_by_type[type_id]); return hwloc_bitmap_dup(rm->units[rm->unit_offsets_by_type[type_id] + unit_rank].worker_cpuset); } hwloc_cpuset_t starpurm_get_global_cpuset(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; return hwloc_bitmap_dup(rm->global_cpuset); } hwloc_cpuset_t starpurm_get_selected_cpuset(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; return hwloc_bitmap_dup(rm->selected_cpuset); } hwloc_cpuset_t starpurm_get_all_cpu_workers_cpuset(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; return hwloc_bitmap_dup(rm->all_cpu_workers_cpuset); } static hwloc_cpuset_t starpurm_get_all_opencl_device_workers_cpuset(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; return hwloc_bitmap_dup(rm->all_opencl_device_workers_cpuset); } static hwloc_cpuset_t starpurm_get_all_cuda_device_workers_cpuset(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; return hwloc_bitmap_dup(rm->all_cuda_device_workers_cpuset); } hwloc_cpuset_t starpurm_get_all_device_workers_cpuset(void) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; return hwloc_bitmap_dup(rm->all_device_workers_cpuset); } hwloc_cpuset_t starpurm_get_all_device_workers_cpuset_by_type(int typeid) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); assert(typeid != starpurm_unit_cpu); if (typeid == starpurm_unit_opencl) return starpurm_get_all_opencl_device_workers_cpuset(); if (typeid == starpurm_unit_cuda) return starpurm_get_all_cuda_device_workers_cpuset(); hwloc_cpuset_t empty_bitmap = hwloc_bitmap_alloc(); hwloc_bitmap_zero(empty_bitmap); return empty_bitmap; } starpu-1.4.9+dfsg/starpurm/src/starpurm_dlb.c000066400000000000000000000275741507764646700213350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* CPUSET routines */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #ifdef HAVE_HWLOC_GLIBC_SCHED_H #include #endif #include #include #include #include #ifndef STARPURM_HAVE_DLB #error "STARPU-RM DLB support not enabled" #endif #include #include /* * DLB interfacing */ static dlb_handler_t dlb_handle; static cpu_set_t starpurm_process_mask; static hwloc_cpuset_t starpurm_process_cpuset; static struct s_starpurm *_starpurm = NULL; static pthread_mutex_t dlb_handle_mutex = PTHREAD_MUTEX_INITIALIZER; static int glibc_cpuid_to_unitid[CPU_SETSIZE]; static int *unitid_to_glibc_cpuid = NULL; static const char * _dlb_error_str(int error_code) { const char *s = NULL; switch (error_code) { case DLB_NOUPDT: s="DLB_NOUPDT"; break; case DLB_NOTED: s="DLB_NOTED"; break; case DLB_SUCCESS: s="DLB_SUCCESS"; break; case DLB_ERR_UNKNOWN: s="DLB_ERR_UNKNOWN"; break; case DLB_ERR_NOINIT: s="DLB_ERR_NOINIT"; break; case DLB_ERR_INIT: s="DLB_ERR_INIT"; break; case DLB_ERR_DISBLD: s="DLB_ERR_DISBLD"; break; case DLB_ERR_NOSHMEM: s="DLB_ERR_NOSHMEM"; break; case DLB_ERR_NOPROC: s="DLB_ERR_NOPROC"; break; case DLB_ERR_PDIRTY: s="DLB_ERR_PDIRTY"; break; case DLB_ERR_PERM: s="DLB_ERR_PERM"; break; case DLB_ERR_TIMEOUT: s="DLB_ERR_TIMEOUT"; break; case DLB_ERR_NOCBK: s="DLB_ERR_NOCBK"; break; case DLB_ERR_NOENT: s="DLB_ERR_NOENT"; break; case DLB_ERR_NOCOMP: s="DLB_ERR_NOCOMP"; break; case DLB_ERR_REQST: s="DLB_ERR_REQST"; break; case DLB_ERR_NOMEM: s="DLB_ERR_NOMEM"; break; case DLB_ERR_NOPOL: s="DLB_ERR_NOPOL"; break; default: s = ""; break; } return s; } #define _dlb_check(s,r) do { if ((r) != DLB_SUCCESS) {fprintf(stderr, "%s:%d, %s - DLB call '%s' %s %d (%s)\n",__FILE__, __LINE__, __func__, (s), (r)>0?"returned warning code":"failed with error code", (r), _dlb_error_str((r))); assert(dlb_ret >= DLB_SUCCESS); }} while (0) #if 0 /* unused for now */ static void _glibc_cpuset_to_hwloc_cpuset(const cpu_set_t *glibc_cpuset, hwloc_cpuset_t *hwloc_cpuset) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; int status = hwloc_cpuset_from_glibc_sched_affinity(rm->topology, *hwloc_cpuset, glibc_cpuset, sizeof(cpu_set_t)); assert(status == 0); } #endif static void _hwloc_cpuset_to_glibc_cpuset(const hwloc_cpuset_t hwloc_cpuset, cpu_set_t *glibc_cpuset) { assert(_starpurm != NULL); assert(_starpurm->state != state_uninitialized); struct s_starpurm *rm = _starpurm; int status = hwloc_cpuset_to_glibc_sched_affinity(rm->topology, hwloc_cpuset, glibc_cpuset, sizeof(cpu_set_t)); assert(status == 0); } int starpurm_dlb_notify_starpu_worker_mask_going_to_sleep(const hwloc_cpuset_t hwloc_workers_cpuset) { int status = 0; pthread_mutex_lock(&dlb_handle_mutex); if (dlb_handle != NULL) { hwloc_cpuset_t hwloc_to_lend_cpuset = hwloc_bitmap_alloc(); hwloc_cpuset_t hwloc_to_return_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_zero(hwloc_to_lend_cpuset); hwloc_bitmap_zero(hwloc_to_return_cpuset); hwloc_bitmap_and(hwloc_to_lend_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset); hwloc_bitmap_andnot(hwloc_to_return_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset); #ifdef STARPURM_DLB_VERBOSE { char * s_to_lend = NULL; char * s_to_return = NULL; hwloc_bitmap_asprintf(&s_to_lend, hwloc_to_lend_cpuset); hwloc_bitmap_asprintf(&s_to_return, hwloc_to_return_cpuset); fprintf(stderr, "%s: to_lend='%s', to_return='%s'\n", __func__, s_to_lend, s_to_return); free(s_to_lend); free(s_to_return); } #endif if (!hwloc_bitmap_iszero(hwloc_to_lend_cpuset)) { cpu_set_t glibc_to_lend_cpuset; CPU_ZERO(&glibc_to_lend_cpuset); _hwloc_cpuset_to_glibc_cpuset(hwloc_to_lend_cpuset, &glibc_to_lend_cpuset); int dlb_ret = DLB_LendCpuMask_sp(dlb_handle, &glibc_to_lend_cpuset); _dlb_check("DLB_LendCpuMask_sp", dlb_ret); } if (!hwloc_bitmap_iszero(hwloc_to_return_cpuset)) { cpu_set_t glibc_to_return_cpuset; CPU_ZERO(&glibc_to_return_cpuset); _hwloc_cpuset_to_glibc_cpuset(hwloc_to_return_cpuset, &glibc_to_return_cpuset); /* Use DLB_Lend for returning borrowed units. DLB_Return seems to require that * a reclaim has previously been emitted by the unit owning runtime system */ #if 0 int dlb_ret = DLB_ReturnCpuMask_sp(dlb_handle, &glibc_to_return_cpuset); _dlb_check("DLB_ReturnCpuMask_sp", dlb_ret); #else int dlb_ret = DLB_LendCpuMask_sp(dlb_handle, &glibc_to_return_cpuset); _dlb_check("DLB_LendCpuMask_sp", dlb_ret); #endif } hwloc_bitmap_free(hwloc_to_lend_cpuset); hwloc_bitmap_free(hwloc_to_return_cpuset); status = 1; } pthread_mutex_unlock(&dlb_handle_mutex); return status; } int starpurm_dlb_notify_starpu_worker_mask_waking_up(const hwloc_cpuset_t hwloc_workers_cpuset) { int status = 0; pthread_mutex_lock(&dlb_handle_mutex); if (dlb_handle != NULL) { hwloc_cpuset_t hwloc_to_reclaim_cpuset = hwloc_bitmap_alloc(); hwloc_cpuset_t hwloc_to_borrow_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_zero(hwloc_to_reclaim_cpuset); hwloc_bitmap_zero(hwloc_to_borrow_cpuset); hwloc_bitmap_and(hwloc_to_reclaim_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset); hwloc_bitmap_andnot(hwloc_to_borrow_cpuset, hwloc_workers_cpuset, starpurm_process_cpuset); #ifdef STARPURM_DLB_VERBOSE { char * s_to_reclaim = NULL; char * s_to_borrow = NULL; hwloc_bitmap_asprintf(&s_to_reclaim, hwloc_to_reclaim_cpuset); hwloc_bitmap_asprintf(&s_to_borrow, hwloc_to_borrow_cpuset); fprintf(stderr, "%s: to_reclaim='%s', to_borrow='%s'\n", __func__, s_to_reclaim, s_to_borrow); free(s_to_reclaim); free(s_to_borrow); } #endif if (!hwloc_bitmap_iszero(hwloc_to_reclaim_cpuset)) { cpu_set_t glibc_to_reclaim_cpuset; CPU_ZERO(&glibc_to_reclaim_cpuset); _hwloc_cpuset_to_glibc_cpuset(hwloc_to_reclaim_cpuset, &glibc_to_reclaim_cpuset); int dlb_ret = DLB_ReclaimCpuMask_sp(dlb_handle, &glibc_to_reclaim_cpuset); _dlb_check("DLB_ReclaimCpuMask_sp", dlb_ret); } if (!hwloc_bitmap_iszero(hwloc_to_borrow_cpuset)) { cpu_set_t glibc_to_borrow_cpuset; CPU_ZERO(&glibc_to_borrow_cpuset); _hwloc_cpuset_to_glibc_cpuset(hwloc_to_borrow_cpuset, &glibc_to_borrow_cpuset); int dlb_ret = DLB_BorrowCpuMask_sp(dlb_handle, &glibc_to_borrow_cpuset); _dlb_check("DLB_BorrowCpuMask_sp", dlb_ret); } hwloc_bitmap_free(hwloc_to_reclaim_cpuset); hwloc_bitmap_free(hwloc_to_borrow_cpuset); status = 1; } pthread_mutex_unlock(&dlb_handle_mutex); return status; } #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG static void _dlb_callback_enable_cpu(int cpuid, void *arg) #else static void _dlb_callback_enable_cpu(int cpuid) #endif { #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG (void) arg; #endif int unitid = glibc_cpuid_to_unitid[cpuid]; #ifdef STARPURM_DLB_VERBOSE fprintf(stderr, "%s: cpuid=%d, unitid=%d\n", __func__, cpuid, unitid); #endif if (unitid != -1) { starpurm_enqueue_event_cpu_unit_available(unitid); } } #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG static void _dlb_callback_disable_cpu(int cpuid, void *arg) #else static void _dlb_callback_disable_cpu(int cpuid) #endif { #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG (void) arg; #endif int unitid = glibc_cpuid_to_unitid[cpuid]; #ifdef STARPURM_DLB_VERBOSE fprintf(stderr, "%s: cpuid=%d, unitid=%d\n", __func__, cpuid, unitid); #endif if (unitid != -1) { /* nothing */ } } #endif void starpurm_dlb_init(struct s_starpurm *rm) { _starpurm = rm; { int unitid; int cpuid; unitid_to_glibc_cpuid = malloc(rm->nunits * sizeof(*unitid_to_glibc_cpuid)); for (cpuid = 0; cpuidnunits; unitid++) { hwloc_cpuset_t unit_cpuset = starpurm_get_unit_cpuset(unitid); cpu_set_t unit_mask; CPU_ZERO(&unit_mask); _hwloc_cpuset_to_glibc_cpuset(unit_cpuset, &unit_mask); unitid_to_glibc_cpuid[unitid] = -1; for (cpuid = 0; cpuidselected_cpuset); hwloc_bitmap_and(starpurm_process_cpuset, starpurm_process_cpuset, rm->initially_owned_cpuset_mask); _hwloc_cpuset_to_glibc_cpuset(starpurm_process_cpuset, &starpurm_process_mask); #ifdef STARPURM_DLB_VERBOSE { char * s_reachable = NULL; char * s_initially_owned = NULL; hwloc_bitmap_asprintf(&s_reachable, rm->selected_cpuset); hwloc_bitmap_asprintf(&s_initially_owned, starpurm_process_cpuset); fprintf(stderr, "%s: StarPU reachable units='%s', StarPU initially owned units='%s'\n", __func__, s_reachable, s_initially_owned); free(s_reachable); free(s_initially_owned); } #endif pthread_mutex_lock(&dlb_handle_mutex); /* TODO: autodetect DLB policy according to DLB version */ #if 1 dlb_handle = DLB_Init_sp(0, &starpurm_process_mask, "--lewi=yes --drom=no --mode=async"); #else dlb_handle = DLB_Init_sp(0, &starpurm_process_mask, "--policy=new --drom=no --mode=async"); #endif /* cpu-based callbacks are mutually exclusive with mask-based callbacks, * we only register cpu-based callbacks */ int dlb_ret; #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS #ifdef STARPURM_HAVE_DLB_CALLBACK_ARG dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu, NULL); _dlb_check("DLB_CallbackSet_sp", dlb_ret); dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_enable_cpu, (dlb_callback_t)_dlb_callback_enable_cpu, NULL); _dlb_check("DLB_CallbackSet_sp", dlb_ret); #else dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_disable_cpu, (dlb_callback_t)_dlb_callback_disable_cpu); _dlb_check("DLB_CallbackSet_sp", dlb_ret); dlb_ret = DLB_CallbackSet_sp(dlb_handle, dlb_callback_enable_cpu, (dlb_callback_t)_dlb_callback_enable_cpu); _dlb_check("DLB_CallbackSet_sp", dlb_ret); #endif #endif dlb_ret = DLB_Enable_sp(dlb_handle); _dlb_check("DLB_Enable_sp", dlb_ret); pthread_mutex_unlock(&dlb_handle_mutex); } void starpurm_dlb_exit(void) { pthread_mutex_lock(&dlb_handle_mutex); dlb_handler_t dlb_handle_save = dlb_handle; dlb_handle = 0; pthread_mutex_unlock(&dlb_handle_mutex); /* lend every resources that StarPU may still have */ DLB_Lend_sp(dlb_handle_save); DLB_Return_sp(dlb_handle_save); pthread_mutex_lock(&dlb_handle_mutex); DLB_Disable_sp(dlb_handle_save); DLB_Finalize_sp(dlb_handle_save); hwloc_bitmap_free(starpurm_process_cpuset); free(unitid_to_glibc_cpuid); pthread_mutex_unlock(&dlb_handle_mutex); } starpu-1.4.9+dfsg/starpurm/src/starpurm_private.h000066400000000000000000000073761507764646700222510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef __STARPURM_PRIVATE_H #define __STARPURM_PRIVATE_H /** @file */ enum e_state { state_uninitialized = 0, state_init }; enum e_starpurm_unit_type { starpurm_unit_cpu = 0, starpurm_unit_opencl = 1, starpurm_unit_cuda = 2, starpurm_unit_ntypes = 3 }; struct s_starpurm { /** Machine topology as detected by hwloc. */ hwloc_topology_t topology; /** Current upper bound on the number of CPU cores selectable for computing with the runtime system. */ int max_ncpus; /** Number of currently selected CPU workers */ int selected_ncpus; /** Number of currently selected workers (CPU+devices) */ int selected_nworkers; /** Initialization state of the RM instance. */ int state; /** Boolean indicating the state of the dynamic resource sharing layer. * * !0 indicates that dynamic resource sharing is enabled. * 0 indicates that dynamic resource sharing is disabled. */ int dynamic_resource_sharing; /** Id of the StarPU's sched_ctx used by the RM instance. */ unsigned sched_ctx_id; /** Number of unit types supported by this RM instance. */ int unit_ntypes; /** Number of units available for each type. */ int *nunits_by_type; /** Number of units. */ int nunits; /** Offset of unit numbering for each type. */ int *unit_offsets_by_type; /** Array of units. */ struct s_starpurm_unit *units; /** Cpuset of all the StarPU's workers (CPU+devices. */ hwloc_cpuset_t global_cpuset; /** Cpuset of all StarPU CPU workers. */ hwloc_cpuset_t all_cpu_workers_cpuset; /** Cpuset of all StarPU OpenCL workers. */ hwloc_cpuset_t all_opencl_device_workers_cpuset; /** Cpuset of all StarPU CUDA workers. */ hwloc_cpuset_t all_cuda_device_workers_cpuset; /** Cpuset of all StarPU device workers. */ hwloc_cpuset_t all_device_workers_cpuset; /** Cpuset of all selected workers (CPU+devices). */ hwloc_cpuset_t selected_cpuset; /** Cpuset mask of initially owned cpuset or full if not used. */ hwloc_cpuset_t initially_owned_cpuset_mask; /** maximum value among worker ids */ int max_worker_id; /** worker id to unit id table */ int *worker_unit_ids; /** Temporary contexts accounting. */ unsigned int max_temporary_ctxs; unsigned int avail_temporary_ctxs; starpu_pthread_mutex_t temporary_ctxs_mutex; starpu_pthread_cond_t temporary_ctxs_cond; /** Global StarPU pause state */ int starpu_in_pause; /** Event list. */ pthread_t event_thread; starpu_pthread_mutex_t event_list_mutex; starpu_pthread_cond_t event_list_cond; starpu_pthread_cond_t event_processing_cond; int event_processing_enabled; int event_processing_ended; struct s_starpurm_event *event_list_head; struct s_starpurm_event *event_list_tail; }; #ifdef STARPURM_HAVE_DLB void starpurm_dlb_init(struct s_starpurm *rm); void starpurm_dlb_exit(void); int starpurm_dlb_notify_starpu_worker_mask_going_to_sleep(const hwloc_cpuset_t hwloc_workers_cpuset); int starpurm_dlb_notify_starpu_worker_mask_waking_up(const hwloc_cpuset_t hwloc_workers_cpuset); #ifdef STARPURM_STARPU_HAVE_WORKER_CALLBACKS void starpurm_enqueue_event_cpu_unit_available(int cpuid); #endif #endif #endif /* __STARPURM_PRIVATE_H */ starpu-1.4.9+dfsg/starpurm/tests/000077500000000000000000000000001507764646700170275ustar00rootroot00000000000000starpu-1.4.9+dfsg/starpurm/tests/01_init_exit.c000066400000000000000000000016371507764646700214760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example tests the proper initialization and shutdown of StarPURM. */ #include #include int main(int argc, char *argv[]) { (void)argc; (void)argv; starpurm_initialize(); starpurm_shutdown(); return 0; } starpu-1.4.9+dfsg/starpurm/tests/02_list_units.c000066400000000000000000000031171507764646700216730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example lists the CPU and device units detected and managed by * StarPURM. */ #include #include int main(int argc, char *argv[]) { (void)argc; (void)argv; int ids[3]; int i; starpurm_initialize(); ids[0] = starpurm_get_device_type_id("cpu"); ids[1] = starpurm_get_device_type_id("opencl"); ids[2] = starpurm_get_device_type_id("cuda"); for (i=0; i<3; i++) { const int id = ids[i]; if (id == -1) continue; const int nb_units = starpurm_get_nb_devices_by_type(id); printf("%s: %d units\n", starpurm_get_device_type_name(id), nb_units); int j; for (j=0; j #include #include static void disp_cpuset(const char * name, hwloc_cpuset_t cpuset) { int strl = hwloc_bitmap_snprintf(NULL, 0, cpuset); char str[strl+1]; hwloc_bitmap_snprintf(str, strl+1, cpuset); printf(". %s: %s\n", name, str); } int main(int argc, char *argv[]) { (void)argc; (void)argv; starpurm_initialize(); int cpu_id = starpurm_get_device_type_id("cpu"); const int nb_cpu_units = starpurm_get_nb_devices_by_type(cpu_id); if (nb_cpu_units < 1) { starpurm_shutdown(); return 77; } hwloc_cpuset_t cpuset; cpuset = starpurm_get_device_worker_cpuset(cpu_id, 0); disp_cpuset("worker cpuset", cpuset); hwloc_bitmap_free(cpuset); cpuset = starpurm_get_global_cpuset(); disp_cpuset("global cpuset", cpuset); hwloc_bitmap_free(cpuset); cpuset = starpurm_get_selected_cpuset(); disp_cpuset("selected cpuset", cpuset); hwloc_bitmap_free(cpuset); cpuset = starpurm_get_all_cpu_workers_cpuset(); disp_cpuset("all cpu workers cpuset", cpuset); hwloc_bitmap_free(cpuset); cpuset = starpurm_get_all_device_workers_cpuset(); disp_cpuset("all device workers cpuset", cpuset); hwloc_bitmap_free(cpuset); starpurm_shutdown(); return 0; } starpu-1.4.9+dfsg/starpurm/tests/04_drs_enable.c000066400000000000000000000024441507764646700216000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This example tests the proper initialization and shutdown of StarPURM. */ #include #include int main(int argc, char *argv[]) { (void)argc; (void)argv; int drs_enabled; starpurm_initialize(); drs_enabled = starpurm_drs_enabled_p(); printf("drs enabled at startup: %d\n", drs_enabled); starpurm_set_drs_enable(NULL); drs_enabled = starpurm_drs_enabled_p(); printf("drs state after explicit enable: %d\n", drs_enabled); starpurm_set_drs_disable(NULL); drs_enabled = starpurm_drs_enabled_p(); printf("drs state after explicit disable: %d\n", drs_enabled); starpurm_shutdown(); return 0; } starpu-1.4.9+dfsg/starpurm/tests/Makefile.am000066400000000000000000000027361507764646700210730ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CFLAGS += $(DLB_CFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_builddir)/include AM_CPPFLAGS += -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src -I$(top_builddir)/starpurm/src -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(HWLOC_LIBS) $(DLB_LIBS) noinst_PROGRAMS = noinst_PROGRAMS += $(myPROGRAMS) check_PROGRAMS = $(myPROGRAMS) TESTS = $(myPROGRAMS) myPROGRAMS = myPROGRAMS += 01_init_exit myPROGRAMS += 02_list_units myPROGRAMS += 03_cpusets myPROGRAMS += 04_drs_enable starpu-1.4.9+dfsg/starpurm/tests/Makefile.in000066400000000000000000001537521507764646700211110ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_1) check_PROGRAMS = $(am__EXEEXT_1) TESTS = $(am__EXEEXT_1) subdir = starpurm/tests ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__EXEEXT_1 = 01_init_exit$(EXEEXT) 02_list_units$(EXEEXT) \ 03_cpusets$(EXEEXT) 04_drs_enable$(EXEEXT) PROGRAMS = $(noinst_PROGRAMS) 01_init_exit_SOURCES = 01_init_exit.c 01_init_exit_OBJECTS = 01_init_exit.$(OBJEXT) 01_init_exit_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = 02_list_units_SOURCES = 02_list_units.c 02_list_units_OBJECTS = 02_list_units.$(OBJEXT) 02_list_units_LDADD = $(LDADD) 03_cpusets_SOURCES = 03_cpusets.c 03_cpusets_OBJECTS = 03_cpusets.$(OBJEXT) 03_cpusets_LDADD = $(LDADD) 04_drs_enable_SOURCES = 04_drs_enable.c 04_drs_enable_OBJECTS = 04_drs_enable.$(OBJEXT) 04_drs_enable_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/01_init_exit.Po \ ./$(DEPDIR)/02_list_units.Po ./$(DEPDIR)/03_cpusets.Po \ ./$(DEPDIR)/04_drs_enable.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = 01_init_exit.c 02_list_units.c 03_cpusets.c 04_drs_enable.c DIST_SOURCES = 01_init_exit.c 02_list_units.c 03_cpusets.c \ 04_drs_enable.c RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ check recheck distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ \ $(top_builddir)/starpurm/src/libstarpurm-@STARPU_EFFECTIVE_VERSION@.la \ $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) \ $(HWLOC_LIBS) $(DLB_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = LAUNCHER = AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(DLB_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # SUBDIRS = CLEANFILES = *.gcno *.gcda *.linkinfo AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src \ -I$(top_builddir)/src -I$(top_builddir)/include \ -I$(top_srcdir)/starpurm/include -I$(top_srcdir)/starpurm/src \ -I$(top_builddir)/starpurm/src \ -I$(top_builddir)/starpurm/include $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ myPROGRAMS = 01_init_exit 02_list_units 03_cpusets 04_drs_enable all: all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign starpurm/tests/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign starpurm/tests/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list 01_init_exit$(EXEEXT): $(01_init_exit_OBJECTS) $(01_init_exit_DEPENDENCIES) $(EXTRA_01_init_exit_DEPENDENCIES) @rm -f 01_init_exit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(01_init_exit_OBJECTS) $(01_init_exit_LDADD) $(LIBS) 02_list_units$(EXEEXT): $(02_list_units_OBJECTS) $(02_list_units_DEPENDENCIES) $(EXTRA_02_list_units_DEPENDENCIES) @rm -f 02_list_units$(EXEEXT) $(AM_V_CCLD)$(LINK) $(02_list_units_OBJECTS) $(02_list_units_LDADD) $(LIBS) 03_cpusets$(EXEEXT): $(03_cpusets_OBJECTS) $(03_cpusets_DEPENDENCIES) $(EXTRA_03_cpusets_DEPENDENCIES) @rm -f 03_cpusets$(EXEEXT) $(AM_V_CCLD)$(LINK) $(03_cpusets_OBJECTS) $(03_cpusets_LDADD) $(LIBS) 04_drs_enable$(EXEEXT): $(04_drs_enable_OBJECTS) $(04_drs_enable_DEPENDENCIES) $(EXTRA_04_drs_enable_DEPENDENCIES) @rm -f 04_drs_enable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(04_drs_enable_OBJECTS) $(04_drs_enable_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/01_init_exit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/02_list_units.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/03_cpusets.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/04_drs_enable.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? 01_init_exit.log: 01_init_exit$(EXEEXT) @p='01_init_exit$(EXEEXT)'; \ b='01_init_exit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) 02_list_units.log: 02_list_units$(EXEEXT) @p='02_list_units$(EXEEXT)'; \ b='02_list_units'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) 03_cpusets.log: 03_cpusets$(EXEEXT) @p='03_cpusets$(EXEEXT)'; \ b='03_cpusets'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) 04_drs_enable.log: 04_drs_enable$(EXEEXT) @p='04_drs_enable$(EXEEXT)'; \ b='04_drs_enable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-recursive all-am: Makefile $(PROGRAMS) installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-checkPROGRAMS clean-generic clean-libtool \ clean-noinstPROGRAMS mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/01_init_exit.Po -rm -f ./$(DEPDIR)/02_list_units.Po -rm -f ./$(DEPDIR)/03_cpusets.Po -rm -f ./$(DEPDIR)/04_drs_enable.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/01_init_exit.Po -rm -f ./$(DEPDIR)/02_list_units.Po -rm -f ./$(DEPDIR)/03_cpusets.Po -rm -f ./$(DEPDIR)/04_drs_enable.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(am__recursive_targets) check-am install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-TESTS check-am clean \ clean-checkPROGRAMS clean-generic clean-libtool \ clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ distclean-compile distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/tests/000077500000000000000000000000001507764646700151525ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/Makefile.am000066400000000000000000001071601507764646700172130ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk include $(top_srcdir)/make/starpu-loader.mk AM_CFLAGS += $(APP_CFLAGS) AM_CXXFLAGS += $(APP_CXXFLAGS) AM_FFLAGS += $(APP_FFLAGS) AM_FCFLAGS += $(APP_FCFLAGS) AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) LIBS += $(HWLOC_LIBS) LIBS += $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) LIBS += $(STARPU_MAX_FPGA_LDFLAGS) EXTRA_DIST = \ helper.h \ datawizard/locality.sh \ overlap/overlap.sh \ datawizard/scal.h \ regression/profiles.in \ regression/regression.sh.in \ regression/profiles.build.only.in \ microbenchs/tasks_data_overhead.sh \ microbenchs/sync_tasks_data_overhead.sh \ microbenchs/async_tasks_data_overhead.sh \ microbenchs/tasks_size_overhead.sh \ microbenchs/tasks_size_overhead_sched.sh \ microbenchs/tasks_size_overhead_scheds.sh \ microbenchs/tasks_size_overhead.gp \ microbenchs/parallel_dependent_homogeneous_tasks_data.sh \ microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ microbenchs/parallel_independent_heterogeneous_tasks.sh \ microbenchs/parallel_independent_homogeneous_tasks_data.sh \ microbenchs/parallel_independent_homogeneous_tasks.sh \ microbenchs/bandwidth_scheds.sh \ microbenchs/starpu_check.sh \ energy/static.sh \ energy/dynamic.sh \ energy/perfs.gp \ datawizard/scratch_opencl_kernel.cl \ datawizard/sync_and_notify_data_opencl_codelet.cl\ coverage/coverage.sh \ variable/increment.h \ variable/increment_opencl_kernel.cl \ variable/redux_opencl_kernel.cl \ variable/neutral_opencl_kernel.cl \ datawizard/interfaces/test_interfaces.h \ datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl \ datawizard/interfaces/coo/coo_opencl_kernel.cl \ datawizard/interfaces/matrix/matrix_opencl_kernel.cl \ datawizard/interfaces/variable/variable_opencl_kernel.cl \ datawizard/interfaces/vector/vector_opencl_kernel.cl \ datawizard/interfaces/multiformat/multiformat_types.h \ datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl \ datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl \ datawizard/interfaces/multiformat/advanced/generic.h \ datawizard/interfaces/csr/csr_opencl_kernel.cl \ datawizard/interfaces/block/block_opencl_kernel.cl \ datawizard/interfaces/tensor/tensor_opencl_kernel.cl \ datawizard/interfaces/ndim/ndim_opencl_kernel.cl \ perfmodels/opencl_memset_kernel.cl \ $(MICROBENCHS:=.sh) \ microbenchs/microbench.sh \ memory/memstress2.gp \ memory/memstress2.sh \ memory/memstress.gp \ memory/memstress.sh \ maxfpga/LMemLoopbackCpuCode.c \ maxfpga/MyTasksManager.maxj \ maxfpga/MyTasksMuxManager.maxj \ maxfpga/README.txt \ maxfpga/StreamFMACpuCode.cpp \ maxfpga/Task1.maxj \ maxfpga/Task2.maxj \ maxfpga/Task3.maxj \ datawizard/interfaces/test_interfaces.sh CLEANFILES = \ *.gcno *.gcda *.linkinfo core starpu_idle_microsec.log *.mod *.png *.output tasks.rec perfs.rec */perfs.rec */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 bandwidth-*.dat bandwidth.gp bandwidth.eps bandwidth.svg *.csv *.md *.Rmd *.pdf *.html clean-local: -rm -rf overlap/overlap.traces datawizard/locality.traces BUILT_SOURCES = SUBDIRS = examplebindir = $(libdir)/starpu/examples if STARPU_USE_OPENCL nobase_STARPU_OPENCL_DATA_DATA = endif testbindir = $(libdir)/starpu/tests ##################################### # What to install and what to check # ##################################### TESTS = $(SHELL_TESTS) $(myPROGRAMS) SHELL_TESTS = if STARPU_COVERAGE_ENABLED SHELL_TESTS += coverage/coverage.sh endif XFAIL_TESTS = \ errorcheck/invalid_blocking_calls myPROGRAMS = myPROGRAMS += \ main/callback \ main/bind \ main/mkdtemp \ main/execute_schedule \ main/insert_task_pack \ main/insert_task_nullcodelet \ main/insert_task_where \ main/multithreaded_init \ main/empty_task \ main/empty_task_chain \ main/starpu_worker_exists \ main/codelet_null_callback \ datawizard/allocate \ datawizard/acquire_cb \ datawizard/deps \ datawizard/user_interaction_implicit \ datawizard/interfaces/copy_interfaces \ datawizard/numa_overflow \ datawizard/locality \ datawizard/variable_size \ errorcheck/starpu_init_noworker \ errorcheck/invalid_tasks \ helper/cublas_init \ helper/cublasLt_init \ helper/cusparse_init \ helper/hipblas_init \ helper/pinned_memory \ helper/execute_on_all \ microbenchs/display_structures_size \ microbenchs/local_pingpong \ overlap/overlap \ sched_ctx/sched_ctx_list \ sched_ctx/sched_ctx_policy_data \ openmp/init_exit_01 \ openmp/init_exit_02 \ openmp/environment \ openmp/api_01 \ openmp/parallel_01 \ openmp/parallel_02 \ openmp/parallel_03 \ openmp/parallel_barrier_01 \ openmp/parallel_master_01 \ openmp/parallel_master_inline_01 \ openmp/parallel_single_wait_01 \ openmp/parallel_single_nowait_01 \ openmp/parallel_single_inline_01 \ openmp/parallel_single_copyprivate_01 \ openmp/parallel_single_copyprivate_inline_01 \ openmp/parallel_critical_01 \ openmp/parallel_critical_inline_01 \ openmp/parallel_critical_named_01 \ openmp/parallel_critical_named_inline_01\ openmp/parallel_simple_lock_01 \ openmp/parallel_nested_lock_01 \ openmp/parallel_for_01 \ openmp/parallel_for_02 \ openmp/parallel_for_ordered_01 \ openmp/parallel_sections_01 \ openmp/parallel_sections_combined_01 \ openmp/task_01 \ openmp/task_02 \ openmp/task_03 \ openmp/taskloop \ openmp/taskwait_01 \ openmp/taskgroup_01 \ openmp/taskgroup_02 \ openmp/array_slice_01 \ openmp/cuda_task_01 \ perfmodels/value_nan \ sched_policies/workerids if STARPU_SIMGRID myPROGRAMS += \ energy/energy_efficiency \ datawizard/simgrid-locality endif if !STARPU_SIMGRID myPROGRAMS += \ main/deprecated_func \ main/driver_api/init_run_deinit \ main/driver_api/run_driver \ main/deploop \ main/display_binding \ main/execute_on_a_specific_worker \ main/insert_task \ main/insert_task_value \ main/insert_task_dyn_handles \ main/insert_task_array \ main/insert_task_many \ main/job \ main/multithreaded \ main/starpu_task_bundle \ main/starpu_task_wait_for_all \ main/starpu_task_wait \ main/static_restartable \ main/static_restartable_using_initializer\ main/static_restartable_tag \ main/regenerate \ main/regenerate_pipeline \ main/restart \ main/wait_all_regenerable_tasks \ main/subgraph_repeat \ main/subgraph_repeat_tag \ main/subgraph_repeat_regenerate \ main/subgraph_repeat_regenerate_tag \ main/subgraph_repeat_regenerate_tag_cycle \ main/empty_task_sync_point \ main/empty_task_sync_point_tasks \ main/tag_wait_api \ main/tag_get_task \ main/task_wait_api \ main/declare_deps_in_callback \ main/declare_deps_after_submission \ main/declare_deps_after_submission_synchronous \ main/get_current_task \ main/starpu_init \ main/submit \ main/const_codelet \ main/pause_resume \ main/pack \ main/get_children_tasks \ main/hwloc_cpuset \ main/task_end_dep \ datawizard/acquire_cb_insert \ datawizard/acquire_release \ datawizard/acquire_release2 \ datawizard/acquire_release_to \ datawizard/acquire_try \ datawizard/bcsr \ datawizard/cache \ datawizard/commute \ datawizard/commute2 \ datawizard/copy \ datawizard/data_implicit_deps \ datawizard/data_register \ datawizard/scratch \ datawizard/scratch_reuse \ datawizard/sync_and_notify_data \ datawizard/sync_and_notify_data_implicit\ datawizard/dsm_stress \ datawizard/double_parameter \ datawizard/write_only_tmp_buffer \ datawizard/data_invalidation \ datawizard/data_deinitialize \ datawizard/dining_philosophers \ datawizard/manual_reduction \ datawizard/readers_and_writers \ datawizard/unpartition \ datawizard/sync_with_data_with_mem \ datawizard/sync_with_data_with_mem_non_blocking\ datawizard/sync_with_data_with_mem_non_blocking_implicit\ datawizard/mpi_like \ datawizard/mpi_like_async \ datawizard/critical_section_with_void_interface\ datawizard/increment_init \ datawizard/increment_redux \ datawizard/increment_redux_partition \ datawizard/increment_redux_v2 \ datawizard/increment_redux_with_args \ datawizard/increment_redux_lazy \ datawizard/handle_to_pointer \ datawizard/lazy_allocation \ datawizard/no_unregister \ datawizard/noreclaim \ datawizard/nowhere \ datawizard/interfaces/block/block_interface \ datawizard/interfaces/bcsr/bcsr_interface \ datawizard/interfaces/coo/coo_interface \ datawizard/interfaces/csr/csr_interface \ datawizard/interfaces/matrix/matrix_interface \ datawizard/interfaces/multiformat/multiformat_interface \ datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl \ datawizard/interfaces/multiformat/advanced/multiformat_data_release \ datawizard/interfaces/multiformat/advanced/multiformat_worker \ datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion \ datawizard/interfaces/multiformat/advanced/same_handle \ datawizard/interfaces/tensor/tensor_interface \ datawizard/interfaces/ndim/ndim_interface \ datawizard/interfaces/variable/variable_interface \ datawizard/interfaces/vector/vector_interface \ datawizard/interfaces/void/void_interface \ datawizard/in_place_partition \ datawizard/partition_dep \ datawizard/partition_lazy \ datawizard/partition_init \ datawizard/partition_wontuse \ datawizard/gpu_register \ datawizard/gpu_ptr_register \ datawizard/variable_parameters \ datawizard/wt_host \ datawizard/wt_broadcast \ datawizard/readonly \ datawizard/specific_node \ datawizard/specific_node_same \ datawizard/task_with_multiple_time_the_same_handle \ datawizard/test_arbiter \ datawizard/invalidate_pending_requests \ datawizard/deinitialize_pending_requests \ datawizard/temporary_partition \ datawizard/partitioned_initialization \ datawizard/partitioned_acquire \ datawizard/temporary_partition_implicit \ datawizard/temporary_partition_read \ datawizard/redux_acquire \ disk/disk_copy \ disk/disk_copy_unpack \ disk/disk_copy_to_disk \ disk/disk_compute \ disk/disk_pack \ disk/mem_reclaim \ errorcheck/invalid_blocking_calls \ errorcheck/workers_cpuid \ fault-tolerance/retry \ helper/starpu_data_cpy \ helper/starpu_data_dup_ro \ helper/starpu_create_sync_task \ microbenchs/async_tasks_overhead \ microbenchs/sync_tasks_overhead \ microbenchs/tasks_overhead \ microbenchs/tasks_size_overhead \ microbenchs/prefetch_data_on_node \ microbenchs/redundant_buffer \ microbenchs/matrix_as_vector \ microbenchs/bandwidth \ overlap/gpu_concurrency \ parallel_tasks/combined_worker_assign_workerid \ parallel_tasks/explicit_combined_worker \ parallel_tasks/parallel_kernels \ parallel_tasks/parallel_kernels_trivial \ parallel_tasks/parallel_kernels_spmd \ parallel_tasks/spmd_peager \ parallel_tasks/cuda_only \ perfmodels/regression_based_memset \ perfmodels/regression_based_check \ perfmodels/regression_based_multiimpl \ perfmodels/regression_based_energy \ perfmodels/regression_based_gpu \ perfmodels/non_linear_regression_based \ perfmodels/feed \ perfmodels/user_base \ perfmodels/valid_model \ perfmodels/path \ perfmodels/memory \ sched_policies/data_locality \ sched_policies/execute_all_tasks \ sched_policies/prio \ sched_policies/simple_deps \ sched_policies/simple_cpu_gpu_sched \ sched_ctx/sched_ctx_hierarchy noinst_PROGRAMS += \ datawizard/allocate_many_numa_nodes if STARPU_USE_MAX_FPGA myPROGRAMS += \ maxfpga/max_fpga_basic_static \ maxfpga/max_fpga_advanced_static \ maxfpga/max_fpga_dynamic \ maxfpga/max_fpga_mux endif endif MICROBENCHS = \ microbenchs/parallel_independent_homogeneous_tasks \ microbenchs/parallel_independent_heterogeneous_tasks \ microbenchs/parallel_independent_homogeneous_tasks_data \ microbenchs/parallel_independent_heterogeneous_tasks_data \ microbenchs/parallel_redux_homogeneous_tasks_data \ microbenchs/parallel_redux_heterogeneous_tasks_data \ microbenchs/parallel_dependent_homogeneous_tasks_data if STARPU_HAVE_FC if !STARPU_SANITIZE if !STARPU_SIMGRID myPROGRAMS += \ fortran90/init_01 endif endif endif if !STARPU_SIMGRID if STARPU_LONG_CHECK myPROGRAMS += \ main/tag_task_data_deps \ datawizard/reclaim endif endif examplebin_PROGRAMS = \ main/deadlock \ microbenchs/async_tasks_overhead \ microbenchs/sync_tasks_overhead \ microbenchs/tasks_overhead \ microbenchs/tasks_size_overhead \ microbenchs/local_pingpong examplebin_SCRIPTS = \ microbenchs/tasks_data_overhead.sh \ microbenchs/sync_tasks_data_overhead.sh \ microbenchs/async_tasks_data_overhead.sh \ microbenchs/tasks_size_overhead.gp \ microbenchs/tasks_size_overhead.sh if !STARPU_SIMGRID if !STARPU_USE_MPI_MASTER_SLAVE examplebin_PROGRAMS += \ microbenchs/bandwidth SHELL_TESTS += \ microbenchs/tasks_data_overhead.sh \ microbenchs/sync_tasks_data_overhead.sh \ microbenchs/async_tasks_data_overhead.sh \ microbenchs/tasks_size_overhead_scheds.sh endif endif if STARPU_HAVE_WINDOWS check_PROGRAMS = $(myPROGRAMS) else check_PROGRAMS = $(LOADER) $(myPROGRAMS) endif noinst_PROGRAMS += $(myPROGRAMS) noinst_PROGRAMS += $(MICROBENCHS) if STARPU_SIMGRID SHELL_TESTS += $(MICROBENCHS:=.sh) endif SHELL_TESTS += \ datawizard/locality.sh \ microbenchs/bandwidth_scheds.sh if STARPU_USE_FXT SHELL_TESTS += \ overlap/overlap.sh endif ################################ # Simgrid Model Checking tests # ################################ if STARPU_SIMGRID_MC SUBDIRS += model-checking endif ####################### # Source files # ####################### datawizard_acquire_release_SOURCES = \ datawizard/acquire_release.c \ variable/increment.c if STARPU_USE_CUDA datawizard_acquire_release_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_acquire_release_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_acquire_release_SOURCES += \ variable/increment_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ variable/increment_opencl_kernel.cl endif datawizard_acquire_release2_SOURCES = \ datawizard/acquire_release2.c \ variable/increment.c if STARPU_USE_CUDA datawizard_acquire_release2_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_acquire_release2_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_acquire_release2_SOURCES += \ variable/increment_opencl.c endif datawizard_acquire_release_to_SOURCES = \ datawizard/acquire_release_to.c \ variable/increment.c if STARPU_USE_CUDA datawizard_acquire_release_to_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_acquire_release_to_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_acquire_release_to_SOURCES += \ variable/increment_opencl.c endif datawizard_wt_host_SOURCES = \ datawizard/wt_host.c \ variable/increment.c if STARPU_USE_CUDA datawizard_wt_host_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_wt_host_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_wt_host_SOURCES += \ variable/increment_opencl.c endif datawizard_wt_broadcast_SOURCES = \ datawizard/wt_broadcast.c \ variable/increment.c if STARPU_USE_CUDA datawizard_wt_broadcast_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_wt_broadcast_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_wt_broadcast_SOURCES += \ variable/increment_opencl.c endif datawizard_increment_redux_lazy_SOURCES = \ datawizard/increment_redux_lazy.c \ variable/increment.c if STARPU_USE_CUDA datawizard_increment_redux_lazy_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_increment_redux_lazy_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_increment_redux_lazy_SOURCES += \ variable/increment_opencl.c endif datawizard_increment_redux_SOURCES = \ datawizard/increment_redux.c \ variable/increment.c if STARPU_USE_CUDA datawizard_increment_redux_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_increment_redux_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_increment_redux_SOURCES += \ variable/increment_opencl.c endif datawizard_increment_redux_partition_SOURCES = \ datawizard/increment_redux_partition.c \ variable/increment.c if STARPU_USE_CUDA datawizard_increment_redux_partition_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_increment_redux_partition_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_increment_redux_partition_SOURCES += \ variable/increment_opencl.c endif datawizard_increment_redux_v2_SOURCES = \ datawizard/increment_redux_v2.c \ variable/increment.c if STARPU_USE_CUDA datawizard_increment_redux_v2_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_increment_redux_v2_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_increment_redux_v2_SOURCES += \ variable/increment_opencl.c endif datawizard_increment_redux_with_args_SOURCES = \ datawizard/increment_redux_with_args.c \ variable/increment.c if STARPU_USE_CUDA datawizard_increment_redux_with_args_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_increment_redux_with_args_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_increment_redux_with_args_SOURCES += \ variable/increment_opencl.c endif datawizard_increment_init_SOURCES = \ datawizard/increment_init.c \ variable/increment.c if STARPU_USE_CUDA datawizard_increment_init_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_increment_init_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_increment_init_SOURCES += \ variable/increment_opencl.c endif datawizard_scratch_SOURCES = \ datawizard/scratch.c if STARPU_USE_CUDA datawizard_scratch_SOURCES += \ datawizard/scratch_cuda.cu endif if STARPU_USE_OPENCL datawizard_scratch_SOURCES += \ datawizard/scratch_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/scratch_opencl_kernel.cl endif datawizard_mpi_like_SOURCES = \ datawizard/mpi_like.c \ variable/increment.c if STARPU_USE_CUDA datawizard_mpi_like_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_mpi_like_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_mpi_like_SOURCES += \ variable/increment_opencl.c endif datawizard_mpi_like_async_SOURCES = \ datawizard/mpi_like_async.c \ variable/increment.c if STARPU_USE_CUDA datawizard_mpi_like_async_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_mpi_like_async_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_mpi_like_async_SOURCES += \ variable/increment_opencl.c endif datawizard_sync_and_notify_data_SOURCES = \ datawizard/sync_and_notify_data.c if STARPU_USE_CUDA datawizard_sync_and_notify_data_SOURCES += \ datawizard/sync_and_notify_data_kernels.cu endif if STARPU_USE_OPENCL datawizard_sync_and_notify_data_SOURCES += \ datawizard/sync_and_notify_data_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/sync_and_notify_data_opencl_codelet.cl endif datawizard_sync_and_notify_data_implicit_SOURCES = \ datawizard/sync_and_notify_data_implicit.c if STARPU_USE_CUDA datawizard_sync_and_notify_data_implicit_SOURCES += \ datawizard/sync_and_notify_data_kernels.cu endif if STARPU_USE_OPENCL datawizard_sync_and_notify_data_implicit_SOURCES += \ datawizard/sync_and_notify_data_opencl.c endif datawizard_in_place_partition_SOURCES = \ datawizard/in_place_partition.c \ datawizard/scal.c if STARPU_USE_CUDA datawizard_in_place_partition_SOURCES += \ datawizard/scal_cuda.cu endif if STARPU_USE_OPENCL datawizard_in_place_partition_SOURCES += \ datawizard/scal_opencl.cl endif datawizard_partition_dep_SOURCES = \ datawizard/partition_dep.c \ datawizard/scal.c if STARPU_USE_CUDA datawizard_partition_dep_SOURCES += \ datawizard/scal_cuda.cu endif if STARPU_USE_OPENCL datawizard_partition_dep_SOURCES += \ datawizard/scal_opencl.cl endif datawizard_partition_lazy_SOURCES = \ datawizard/partition_lazy.c \ datawizard/scal.c if STARPU_USE_CUDA datawizard_partition_lazy_SOURCES += \ datawizard/scal_cuda.cu endif if STARPU_USE_OPENCL datawizard_partition_lazy_SOURCES += \ datawizard/scal_opencl.cl endif datawizard_gpu_register_SOURCES = \ datawizard/gpu_register.c \ datawizard/scal.c if STARPU_USE_CUDA datawizard_gpu_register_SOURCES += \ datawizard/scal_cuda.cu endif if STARPU_USE_OPENCL datawizard_gpu_register_SOURCES += \ datawizard/scal_opencl.cl endif datawizard_gpu_ptr_register_SOURCES = \ datawizard/gpu_ptr_register.c \ datawizard/scal.c if STARPU_USE_CUDA datawizard_gpu_ptr_register_SOURCES += \ datawizard/scal_cuda.cu endif if STARPU_USE_OPENCL datawizard_gpu_ptr_register_SOURCES += \ datawizard/scal_opencl.cl endif datawizard_specific_node_SOURCES = \ datawizard/specific_node.c \ variable/increment.c if STARPU_USE_CUDA datawizard_specific_node_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP datawizard_specific_node_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL datawizard_specific_node_SOURCES += \ variable/increment_opencl.c endif datawizard_test_arbiter_SOURCES = \ datawizard/test_arbiter.cpp main_starpu_worker_exists_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) main_deprecated_func_CFLAGS = $(AM_CFLAGS) -Wno-deprecated-declarations main_insert_task_where_SOURCES = \ main/insert_task_where.c \ variable/increment.c if STARPU_USE_CUDA main_insert_task_where_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP main_insert_task_where_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL main_insert_task_where_SOURCES += \ variable/increment_opencl.c endif main_subgraph_repeat_SOURCES = \ main/subgraph_repeat.c \ variable/increment.c if STARPU_USE_CUDA main_subgraph_repeat_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP main_subgraph_repeat_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL main_subgraph_repeat_SOURCES += \ variable/increment_opencl.c endif main_subgraph_repeat_tag_SOURCES = \ main/subgraph_repeat_tag.c \ variable/increment.c if STARPU_USE_CUDA main_subgraph_repeat_tag_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP main_subgraph_repeat_tag_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL main_subgraph_repeat_tag_SOURCES += \ variable/increment_opencl.c endif main_subgraph_repeat_regenerate_SOURCES = \ main/subgraph_repeat_regenerate.c \ variable/increment.c if STARPU_USE_CUDA main_subgraph_repeat_regenerate_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP main_subgraph_repeat_regenerate_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL main_subgraph_repeat_regenerate_SOURCES += \ variable/increment_opencl.c endif main_subgraph_repeat_regenerate_tag_SOURCES = \ main/subgraph_repeat_regenerate_tag.c \ variable/increment.c if STARPU_USE_CUDA main_subgraph_repeat_regenerate_tag_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP main_subgraph_repeat_regenerate_tag_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL main_subgraph_repeat_regenerate_tag_SOURCES += \ variable/increment_opencl.c endif main_subgraph_repeat_regenerate_tag_cycle_SOURCES = \ main/subgraph_repeat_regenerate_tag_cycle.c \ variable/increment.c if STARPU_USE_CUDA main_subgraph_repeat_regenerate_tag_cycle_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP main_subgraph_repeat_regenerate_tag_cycle_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL main_subgraph_repeat_regenerate_tag_cycle_SOURCES += \ variable/increment_opencl.c endif if STARPU_HAVE_FC fortran90_init_01_SOURCES = \ fortran90/starpu_mod.f90 \ fortran90/init_01.f90 endif helper_starpu_data_dup_ro_SOURCES = \ helper/starpu_data_dup_ro.c \ variable/increment.c if STARPU_USE_CUDA helper_starpu_data_dup_ro_SOURCES += \ variable/increment_cuda.cu endif if STARPU_USE_HIP helper_starpu_data_dup_ro_SOURCES += \ variable/increment_hip.hip endif if STARPU_USE_OPENCL helper_starpu_data_dup_ro_SOURCES += \ variable/increment_opencl.c endif datawizard_interfaces_copy_interfaces_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) datawizard_data_register_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) ################### # Block interface # ################### datawizard_interfaces_block_block_interface_SOURCES= \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/block/block_interface.c datawizard_interfaces_block_block_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_block_block_interface_SOURCES+= \ datawizard/interfaces/block/block_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_block_block_interface_SOURCES+= \ datawizard/interfaces/block/block_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/interfaces/block/block_opencl_kernel.cl endif ################### # Tensor interface # ################### datawizard_interfaces_tensor_tensor_interface_SOURCES= \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/tensor/tensor_interface.c datawizard_interfaces_tensor_tensor_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_tensor_tensor_interface_SOURCES+= \ datawizard/interfaces/tensor/tensor_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_tensor_tensor_interface_SOURCES+= \ datawizard/interfaces/tensor/tensor_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/interfaces/tensor/tensor_opencl_kernel.cl endif ################### # Ndim interface # ################### datawizard_interfaces_ndim_ndim_interface_SOURCES= \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/ndim/ndim_interface.c datawizard_interfaces_ndim_ndim_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_ndim_ndim_interface_SOURCES+= \ datawizard/interfaces/ndim/ndim_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_ndim_ndim_interface_SOURCES+= \ datawizard/interfaces/ndim/ndim_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/interfaces/ndim/ndim_opencl_kernel.cl endif ################## # BSCR interface # ################## datawizard_interfaces_bcsr_bcsr_interface_SOURCES= \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/bcsr/bcsr_interface.c datawizard_interfaces_bcsr_bcsr_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_bcsr_bcsr_interface_SOURCES+= \ datawizard/interfaces/bcsr/bcsr_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_bcsr_bcsr_interface_SOURCES+= \ datawizard/interfaces/bcsr/bcsr_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl endif ################# # COO interface # ################# datawizard_interfaces_coo_coo_interface_SOURCES= \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/coo/coo_interface.c datawizard_interfaces_coo_coo_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_coo_coo_interface_SOURCES+= \ datawizard/interfaces/coo/coo_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_coo_coo_interface_SOURCES+= \ datawizard/interfaces/coo/coo_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/interfaces/coo/coo_opencl_kernel.cl endif ################# # CSR interface # ################# datawizard_interfaces_csr_csr_interface_SOURCES= \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/csr/csr_interface.c datawizard_interfaces_csr_csr_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_csr_csr_interface_SOURCES+= \ datawizard/interfaces/csr/csr_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_csr_csr_interface_SOURCES+= \ datawizard/interfaces/csr/csr_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/interfaces/csr/csr_opencl_kernel.cl endif #################### # Vector interface # #################### datawizard_interfaces_vector_vector_interface_SOURCES = \ datawizard/interfaces/vector/vector_interface.c \ datawizard/interfaces/test_interfaces.c datawizard_interfaces_vector_vector_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_vector_vector_interface_SOURCES += \ datawizard/interfaces/vector/vector_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_vector_vector_interface_SOURCES += \ datawizard/interfaces/vector/vector_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/interfaces/vector/vector_opencl_kernel.cl endif #################### # Matrix interface # #################### datawizard_interfaces_matrix_matrix_interface_SOURCES= \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/matrix/matrix_interface.c datawizard_interfaces_matrix_matrix_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_matrix_matrix_interface_SOURCES+= \ datawizard/interfaces/matrix/matrix_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_matrix_matrix_interface_SOURCES+= \ datawizard/interfaces/matrix/matrix_opencl.c nobase_STARPU_OPENCL_DATA_DATA+= \ datawizard/interfaces/matrix/matrix_opencl_kernel.cl endif ######################### # Multiformat interface # ######################### datawizard_interfaces_multiformat_multiformat_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/multiformat/multiformat_interface.c \ datawizard/interfaces/multiformat/multiformat_conversion_codelets.c datawizard_interfaces_multiformat_multiformat_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_multiformat_multiformat_interface_SOURCES+= \ datawizard/interfaces/multiformat/multiformat_cuda.cu \ datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_multiformat_multiformat_interface_SOURCES+= \ datawizard/interfaces/multiformat/multiformat_opencl.c \ datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl \ datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl endif datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_SOURCES=\ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c datawizard_interfaces_multiformat_advanced_multiformat_data_release_SOURCES = \ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/multiformat_data_release.c datawizard_interfaces_multiformat_advanced_multiformat_worker_SOURCES=\ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/multiformat_worker.c datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_SOURCES = \ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c datawizard_interfaces_multiformat_advanced_same_handle_SOURCES= \ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/same_handle.c ###################### # Variable interface # ###################### datawizard_interfaces_variable_variable_interface_SOURCES= \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/variable/variable_interface.c datawizard_interfaces_variable_variable_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) if STARPU_USE_CUDA datawizard_interfaces_variable_variable_interface_SOURCES+= \ datawizard/interfaces/variable/variable_cuda.cu endif if STARPU_USE_OPENCL datawizard_interfaces_variable_variable_interface_SOURCES+= \ datawizard/interfaces/variable/variable_opencl.c nobase_STARPU_OPENCL_DATA_DATA += \ datawizard/interfaces/variable/variable_opencl_kernel.cl endif ################## # Void interface # ################## datawizard_interfaces_void_void_interface_SOURCES=\ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/void/void_interface.c datawizard_interfaces_void_void_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) overlap_gpu_concurrency_SOURCES=\ overlap/gpu_concurrency.c if STARPU_USE_CUDA overlap_gpu_concurrency_SOURCES+=\ overlap/long_kernel.cu endif perfmodels_regression_based_memset_SOURCES=\ perfmodels/regression_based_memset.c perfmodels_regression_based_gpu_SOURCES=\ perfmodels/regression_based_gpu.c maxfpga_max_fpga_basic_static_LDADD = $(LDADD) \ maxfpga/slic_MyTasks.o maxfpga/max_fpga_basic_static.o: maxfpga/MyTasks.max maxfpga_max_fpga_advanced_static_LDADD = $(LDADD) \ maxfpga/slic_MyTasks.o maxfpga/max_fpga_advanced_static.o: maxfpga/MyTasks.max maxfpga_max_fpga_dynamic_LDADD = $(LDADD) \ maxfpga/slic_MyTasks.o maxfpga/max_fpga_dynamic.o: maxfpga/MyTasks.max maxfpga_max_fpga_mux_LDADD = $(LDADD) \ maxfpga/slic_MyTasksMux.o maxfpga/max_fpga_mux.o: maxfpga/MyTasksMux.max if STARPU_USE_OPENCL perfmodels_regression_based_memset_SOURCES+=\ perfmodels/opencl_memset.c perfmodels_regression_based_gpu_SOURCES+=\ perfmodels/opencl_memset.c nobase_STARPU_OPENCL_DATA_DATA += \ perfmodels/opencl_memset_kernel.cl endif perfmodels_non_linear_regression_based_SOURCES=\ perfmodels/non_linear_regression_based.c if STARPU_USE_OPENCL perfmodels_non_linear_regression_based_SOURCES+=\ perfmodels/opencl_memset.c endif sched_policies_execute_all_tasks_LDFLAGS = $(AM_LDFLAGS) -lm sched_policies_execute_all_tasks_CFLAGS = $(AM_LDFLAGS) $(FXT_CFLAGS) # Fortran90 tests # - link over source file to build our own object fortran90/starpu_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ if STARPU_HAVE_FC # - express the creation of .mod along .o starpu_mod.mod: fortran90/starpu_mod.o # - list explicit dependences to control proper module files dependencies fortran90/init_01.o: starpu_mod.mod endif # Maxeler compiler if STARPU_USE_MAX_FPGA # or DFE for hardware execution MAX_TARGET ?= DFE_SIM MAX_DFE = MAX5C_$(MAX_TARGET) MAXJ=$(wildcard maxfpga/*.maxj) CLASS=$(MAXJ:.maxj=.class) $(CLASS) &: $(MAXJ) maxjc -1.7 -cp $$MAXCLASSPATH $(dir $<) %.max: %Manager.class CLASSPATH=$$CLASSPATH:. maxJavaRun $(shell dirname $*).$(notdir $*)Manager DFEModel=MAIA maxFileName=$(notdir $*) target=$(MAX_TARGET) cp $(notdir $*)_$(MAX_DFE)/results/$(notdir $*).{max,h} $(dir $@) slic_%.o: %.max sliccompile $< $@ CLEANFILES += */*.max */*.class max_fpga/*.h endif starpu-1.4.9+dfsg/tests/Makefile.in000066400000000000000000023641351507764646700172350ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_8) $(am__EXEEXT_9) $(am__EXEEXT_6) \ $(am__EXEEXT_10) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader TESTS = $(am__EXEEXT_13) $(am__EXEEXT_6) @STARPU_COVERAGE_ENABLED_TRUE@am__append_8 = coverage/coverage.sh XFAIL_TESTS = errorcheck/invalid_blocking_calls$(EXEEXT) @STARPU_SIMGRID_TRUE@am__append_9 = \ @STARPU_SIMGRID_TRUE@ energy/energy_efficiency \ @STARPU_SIMGRID_TRUE@ datawizard/simgrid-locality @STARPU_SIMGRID_FALSE@am__append_10 = \ @STARPU_SIMGRID_FALSE@ main/deprecated_func \ @STARPU_SIMGRID_FALSE@ main/driver_api/init_run_deinit \ @STARPU_SIMGRID_FALSE@ main/driver_api/run_driver \ @STARPU_SIMGRID_FALSE@ main/deploop \ @STARPU_SIMGRID_FALSE@ main/display_binding \ @STARPU_SIMGRID_FALSE@ main/execute_on_a_specific_worker \ @STARPU_SIMGRID_FALSE@ main/insert_task \ @STARPU_SIMGRID_FALSE@ main/insert_task_value \ @STARPU_SIMGRID_FALSE@ main/insert_task_dyn_handles \ @STARPU_SIMGRID_FALSE@ main/insert_task_array \ @STARPU_SIMGRID_FALSE@ main/insert_task_many \ @STARPU_SIMGRID_FALSE@ main/job \ @STARPU_SIMGRID_FALSE@ main/multithreaded \ @STARPU_SIMGRID_FALSE@ main/starpu_task_bundle \ @STARPU_SIMGRID_FALSE@ main/starpu_task_wait_for_all \ @STARPU_SIMGRID_FALSE@ main/starpu_task_wait \ @STARPU_SIMGRID_FALSE@ main/static_restartable \ @STARPU_SIMGRID_FALSE@ main/static_restartable_using_initializer\ @STARPU_SIMGRID_FALSE@ main/static_restartable_tag \ @STARPU_SIMGRID_FALSE@ main/regenerate \ @STARPU_SIMGRID_FALSE@ main/regenerate_pipeline \ @STARPU_SIMGRID_FALSE@ main/restart \ @STARPU_SIMGRID_FALSE@ main/wait_all_regenerable_tasks \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat_tag \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate_tag \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate_tag_cycle \ @STARPU_SIMGRID_FALSE@ main/empty_task_sync_point \ @STARPU_SIMGRID_FALSE@ main/empty_task_sync_point_tasks \ @STARPU_SIMGRID_FALSE@ main/tag_wait_api \ @STARPU_SIMGRID_FALSE@ main/tag_get_task \ @STARPU_SIMGRID_FALSE@ main/task_wait_api \ @STARPU_SIMGRID_FALSE@ main/declare_deps_in_callback \ @STARPU_SIMGRID_FALSE@ main/declare_deps_after_submission \ @STARPU_SIMGRID_FALSE@ main/declare_deps_after_submission_synchronous \ @STARPU_SIMGRID_FALSE@ main/get_current_task \ @STARPU_SIMGRID_FALSE@ main/starpu_init \ @STARPU_SIMGRID_FALSE@ main/submit \ @STARPU_SIMGRID_FALSE@ main/const_codelet \ @STARPU_SIMGRID_FALSE@ main/pause_resume \ @STARPU_SIMGRID_FALSE@ main/pack \ @STARPU_SIMGRID_FALSE@ main/get_children_tasks \ @STARPU_SIMGRID_FALSE@ main/hwloc_cpuset \ @STARPU_SIMGRID_FALSE@ main/task_end_dep \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_cb_insert \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_release \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_release2 \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_release_to \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_try \ @STARPU_SIMGRID_FALSE@ datawizard/bcsr \ @STARPU_SIMGRID_FALSE@ datawizard/cache \ @STARPU_SIMGRID_FALSE@ datawizard/commute \ @STARPU_SIMGRID_FALSE@ datawizard/commute2 \ @STARPU_SIMGRID_FALSE@ datawizard/copy \ @STARPU_SIMGRID_FALSE@ datawizard/data_implicit_deps \ @STARPU_SIMGRID_FALSE@ datawizard/data_register \ @STARPU_SIMGRID_FALSE@ datawizard/scratch \ @STARPU_SIMGRID_FALSE@ datawizard/scratch_reuse \ @STARPU_SIMGRID_FALSE@ datawizard/sync_and_notify_data \ @STARPU_SIMGRID_FALSE@ datawizard/sync_and_notify_data_implicit\ @STARPU_SIMGRID_FALSE@ datawizard/dsm_stress \ @STARPU_SIMGRID_FALSE@ datawizard/double_parameter \ @STARPU_SIMGRID_FALSE@ datawizard/write_only_tmp_buffer \ @STARPU_SIMGRID_FALSE@ datawizard/data_invalidation \ @STARPU_SIMGRID_FALSE@ datawizard/data_deinitialize \ @STARPU_SIMGRID_FALSE@ datawizard/dining_philosophers \ @STARPU_SIMGRID_FALSE@ datawizard/manual_reduction \ @STARPU_SIMGRID_FALSE@ datawizard/readers_and_writers \ @STARPU_SIMGRID_FALSE@ datawizard/unpartition \ @STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem \ @STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem_non_blocking\ @STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem_non_blocking_implicit\ @STARPU_SIMGRID_FALSE@ datawizard/mpi_like \ @STARPU_SIMGRID_FALSE@ datawizard/mpi_like_async \ @STARPU_SIMGRID_FALSE@ datawizard/critical_section_with_void_interface\ @STARPU_SIMGRID_FALSE@ datawizard/increment_init \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux_partition \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux_v2 \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux_with_args \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux_lazy \ @STARPU_SIMGRID_FALSE@ datawizard/handle_to_pointer \ @STARPU_SIMGRID_FALSE@ datawizard/lazy_allocation \ @STARPU_SIMGRID_FALSE@ datawizard/no_unregister \ @STARPU_SIMGRID_FALSE@ datawizard/noreclaim \ @STARPU_SIMGRID_FALSE@ datawizard/nowhere \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/block/block_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/bcsr/bcsr_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/coo/coo_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/csr/csr_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/matrix/matrix_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/multiformat_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_data_release \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_worker \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/same_handle \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/tensor/tensor_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/ndim/ndim_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/variable/variable_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/vector/vector_interface \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/void/void_interface \ @STARPU_SIMGRID_FALSE@ datawizard/in_place_partition \ @STARPU_SIMGRID_FALSE@ datawizard/partition_dep \ @STARPU_SIMGRID_FALSE@ datawizard/partition_lazy \ @STARPU_SIMGRID_FALSE@ datawizard/partition_init \ @STARPU_SIMGRID_FALSE@ datawizard/partition_wontuse \ @STARPU_SIMGRID_FALSE@ datawizard/gpu_register \ @STARPU_SIMGRID_FALSE@ datawizard/gpu_ptr_register \ @STARPU_SIMGRID_FALSE@ datawizard/variable_parameters \ @STARPU_SIMGRID_FALSE@ datawizard/wt_host \ @STARPU_SIMGRID_FALSE@ datawizard/wt_broadcast \ @STARPU_SIMGRID_FALSE@ datawizard/readonly \ @STARPU_SIMGRID_FALSE@ datawizard/specific_node \ @STARPU_SIMGRID_FALSE@ datawizard/specific_node_same \ @STARPU_SIMGRID_FALSE@ datawizard/task_with_multiple_time_the_same_handle \ @STARPU_SIMGRID_FALSE@ datawizard/test_arbiter \ @STARPU_SIMGRID_FALSE@ datawizard/invalidate_pending_requests \ @STARPU_SIMGRID_FALSE@ datawizard/deinitialize_pending_requests \ @STARPU_SIMGRID_FALSE@ datawizard/temporary_partition \ @STARPU_SIMGRID_FALSE@ datawizard/partitioned_initialization \ @STARPU_SIMGRID_FALSE@ datawizard/partitioned_acquire \ @STARPU_SIMGRID_FALSE@ datawizard/temporary_partition_implicit \ @STARPU_SIMGRID_FALSE@ datawizard/temporary_partition_read \ @STARPU_SIMGRID_FALSE@ datawizard/redux_acquire \ @STARPU_SIMGRID_FALSE@ disk/disk_copy \ @STARPU_SIMGRID_FALSE@ disk/disk_copy_unpack \ @STARPU_SIMGRID_FALSE@ disk/disk_copy_to_disk \ @STARPU_SIMGRID_FALSE@ disk/disk_compute \ @STARPU_SIMGRID_FALSE@ disk/disk_pack \ @STARPU_SIMGRID_FALSE@ disk/mem_reclaim \ @STARPU_SIMGRID_FALSE@ errorcheck/invalid_blocking_calls \ @STARPU_SIMGRID_FALSE@ errorcheck/workers_cpuid \ @STARPU_SIMGRID_FALSE@ fault-tolerance/retry \ @STARPU_SIMGRID_FALSE@ helper/starpu_data_cpy \ @STARPU_SIMGRID_FALSE@ helper/starpu_data_dup_ro \ @STARPU_SIMGRID_FALSE@ helper/starpu_create_sync_task \ @STARPU_SIMGRID_FALSE@ microbenchs/async_tasks_overhead \ @STARPU_SIMGRID_FALSE@ microbenchs/sync_tasks_overhead \ @STARPU_SIMGRID_FALSE@ microbenchs/tasks_overhead \ @STARPU_SIMGRID_FALSE@ microbenchs/tasks_size_overhead \ @STARPU_SIMGRID_FALSE@ microbenchs/prefetch_data_on_node \ @STARPU_SIMGRID_FALSE@ microbenchs/redundant_buffer \ @STARPU_SIMGRID_FALSE@ microbenchs/matrix_as_vector \ @STARPU_SIMGRID_FALSE@ microbenchs/bandwidth \ @STARPU_SIMGRID_FALSE@ overlap/gpu_concurrency \ @STARPU_SIMGRID_FALSE@ parallel_tasks/combined_worker_assign_workerid \ @STARPU_SIMGRID_FALSE@ parallel_tasks/explicit_combined_worker \ @STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels \ @STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels_trivial \ @STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels_spmd \ @STARPU_SIMGRID_FALSE@ parallel_tasks/spmd_peager \ @STARPU_SIMGRID_FALSE@ parallel_tasks/cuda_only \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_memset \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_check \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_multiimpl \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_energy \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_gpu \ @STARPU_SIMGRID_FALSE@ perfmodels/non_linear_regression_based \ @STARPU_SIMGRID_FALSE@ perfmodels/feed \ @STARPU_SIMGRID_FALSE@ perfmodels/user_base \ @STARPU_SIMGRID_FALSE@ perfmodels/valid_model \ @STARPU_SIMGRID_FALSE@ perfmodels/path \ @STARPU_SIMGRID_FALSE@ perfmodels/memory \ @STARPU_SIMGRID_FALSE@ sched_policies/data_locality \ @STARPU_SIMGRID_FALSE@ sched_policies/execute_all_tasks \ @STARPU_SIMGRID_FALSE@ sched_policies/prio \ @STARPU_SIMGRID_FALSE@ sched_policies/simple_deps \ @STARPU_SIMGRID_FALSE@ sched_policies/simple_cpu_gpu_sched \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_hierarchy @STARPU_SIMGRID_FALSE@am__append_11 = \ @STARPU_SIMGRID_FALSE@ datawizard/allocate_many_numa_nodes @STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@am__append_12 = \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_basic_static \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_advanced_static \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_dynamic \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_mux @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__append_13 = \ @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@ fortran90/init_01 @STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@am__append_14 = \ @STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@ main/tag_task_data_deps \ @STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@ datawizard/reclaim examplebin_PROGRAMS = main/deadlock$(EXEEXT) \ microbenchs/async_tasks_overhead$(EXEEXT) \ microbenchs/sync_tasks_overhead$(EXEEXT) \ microbenchs/tasks_overhead$(EXEEXT) \ microbenchs/tasks_size_overhead$(EXEEXT) \ microbenchs/local_pingpong$(EXEEXT) $(am__EXEEXT_7) @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@am__append_15 = \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/bandwidth @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@am__append_16 = \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/tasks_data_overhead.sh \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/sync_tasks_data_overhead.sh \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/async_tasks_data_overhead.sh \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@ microbenchs/tasks_size_overhead_scheds.sh @STARPU_HAVE_WINDOWS_FALSE@check_PROGRAMS = $(am__EXEEXT_6) @STARPU_HAVE_WINDOWS_TRUE@check_PROGRAMS = $(am__EXEEXT_6) @STARPU_SIMGRID_TRUE@am__append_17 = $(MICROBENCHS:=.sh) @STARPU_USE_FXT_TRUE@am__append_18 = \ @STARPU_USE_FXT_TRUE@ overlap/overlap.sh ################################ # Simgrid Model Checking tests # ################################ @STARPU_SIMGRID_MC_TRUE@am__append_19 = model-checking @STARPU_USE_CUDA_TRUE@am__append_20 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_21 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_22 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_23 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_24 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_25 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_26 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_27 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_28 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_29 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_30 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_31 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_32 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_33 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_34 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_35 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_36 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_37 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_38 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_39 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_40 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_41 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_42 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_43 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_44 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_45 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_46 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_47 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_48 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_49 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_50 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_51 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_52 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_53 = \ @STARPU_USE_CUDA_TRUE@ datawizard/scratch_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_54 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/scratch_opencl.c @STARPU_USE_CUDA_TRUE@am__append_55 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_56 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_57 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_58 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_59 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_60 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_61 = \ @STARPU_USE_CUDA_TRUE@ datawizard/sync_and_notify_data_kernels.cu @STARPU_USE_OPENCL_TRUE@am__append_62 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/sync_and_notify_data_opencl.c @STARPU_USE_CUDA_TRUE@am__append_63 = \ @STARPU_USE_CUDA_TRUE@ datawizard/sync_and_notify_data_kernels.cu @STARPU_USE_OPENCL_TRUE@am__append_64 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/sync_and_notify_data_opencl.c @STARPU_USE_CUDA_TRUE@am__append_65 = \ @STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_66 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl @STARPU_USE_CUDA_TRUE@am__append_67 = \ @STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_68 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl @STARPU_USE_CUDA_TRUE@am__append_69 = \ @STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_70 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl @STARPU_USE_CUDA_TRUE@am__append_71 = \ @STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_72 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl @STARPU_USE_CUDA_TRUE@am__append_73 = \ @STARPU_USE_CUDA_TRUE@ datawizard/scal_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_74 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/scal_opencl.cl @STARPU_USE_CUDA_TRUE@am__append_75 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_76 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_77 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_78 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_79 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_80 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_81 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_82 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_83 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_84 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_85 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_86 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_87 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_88 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_89 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_90 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_91 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_92 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_93 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_94 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_95 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_96 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.cu @STARPU_USE_HIP_TRUE@am__append_97 = \ @STARPU_USE_HIP_TRUE@ variable/increment_hip.hip @STARPU_USE_OPENCL_TRUE@am__append_98 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__append_99 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/block/block_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_100 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/block/block_opencl.c @STARPU_USE_CUDA_TRUE@am__append_101 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/tensor/tensor_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_102 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/tensor/tensor_opencl.c @STARPU_USE_CUDA_TRUE@am__append_103 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/ndim/ndim_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_104 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/ndim/ndim_opencl.c @STARPU_USE_CUDA_TRUE@am__append_105 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/bcsr/bcsr_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_106 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/bcsr/bcsr_opencl.c @STARPU_USE_CUDA_TRUE@am__append_107 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/coo/coo_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_108 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/coo/coo_opencl.c @STARPU_USE_CUDA_TRUE@am__append_109 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/csr/csr_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_110 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/csr/csr_opencl.c @STARPU_USE_CUDA_TRUE@am__append_111 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/vector/vector_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_112 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/vector/vector_opencl.c @STARPU_USE_CUDA_TRUE@am__append_113 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/matrix/matrix_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_114 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/matrix/matrix_opencl.c @STARPU_USE_CUDA_TRUE@am__append_115 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/multiformat/multiformat_cuda.cu \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_116 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_opencl.c \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c @STARPU_USE_CUDA_TRUE@am__append_117 = \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/variable/variable_cuda.cu @STARPU_USE_OPENCL_TRUE@am__append_118 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/variable/variable_opencl.c @STARPU_USE_CUDA_TRUE@am__append_119 = \ @STARPU_USE_CUDA_TRUE@ overlap/long_kernel.cu @STARPU_USE_OPENCL_TRUE@am__append_120 = \ @STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset.c @STARPU_USE_OPENCL_TRUE@am__append_121 = \ @STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset.c @STARPU_USE_OPENCL_TRUE@am__append_122 = \ @STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset.c @STARPU_USE_MAX_FPGA_TRUE@am__append_123 = */*.max */*.class max_fpga/*.h subdir = tests ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = @STARPU_SIMGRID_TRUE@am__EXEEXT_1 = energy/energy_efficiency$(EXEEXT) \ @STARPU_SIMGRID_TRUE@ datawizard/simgrid-locality$(EXEEXT) @STARPU_SIMGRID_FALSE@am__EXEEXT_2 = main/deprecated_func$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/driver_api/init_run_deinit$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/driver_api/run_driver$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/deploop$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/display_binding$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/execute_on_a_specific_worker$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/insert_task$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/insert_task_value$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/insert_task_dyn_handles$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/insert_task_array$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/insert_task_many$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/job$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/multithreaded$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/starpu_task_bundle$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/starpu_task_wait_for_all$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/starpu_task_wait$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/static_restartable$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/static_restartable_using_initializer$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/static_restartable_tag$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/regenerate$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/regenerate_pipeline$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/restart$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/wait_all_regenerable_tasks$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat_tag$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate_tag$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/empty_task_sync_point$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/empty_task_sync_point_tasks$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/tag_wait_api$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/tag_get_task$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/task_wait_api$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/declare_deps_in_callback$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/declare_deps_after_submission$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/declare_deps_after_submission_synchronous$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/get_current_task$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/starpu_init$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/submit$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/const_codelet$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/pause_resume$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/pack$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/get_children_tasks$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/hwloc_cpuset$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ main/task_end_dep$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_cb_insert$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_release$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_release2$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_release_to$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/acquire_try$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/bcsr$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/cache$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/commute$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/commute2$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/copy$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/data_implicit_deps$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/data_register$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/scratch$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/scratch_reuse$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/sync_and_notify_data$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/sync_and_notify_data_implicit$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/dsm_stress$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/double_parameter$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/write_only_tmp_buffer$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/data_invalidation$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/data_deinitialize$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/dining_philosophers$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/manual_reduction$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/readers_and_writers$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/unpartition$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/mpi_like$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/mpi_like_async$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/critical_section_with_void_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/increment_init$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux_partition$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux_v2$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux_with_args$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/increment_redux_lazy$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/handle_to_pointer$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/lazy_allocation$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/no_unregister$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/noreclaim$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/nowhere$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/block/block_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/coo/coo_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/csr/csr_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/matrix/matrix_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/tensor/tensor_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/ndim/ndim_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/variable/variable_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/vector/vector_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/interfaces/void/void_interface$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/in_place_partition$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/partition_dep$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/partition_lazy$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/partition_init$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/partition_wontuse$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/gpu_register$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/gpu_ptr_register$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/variable_parameters$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/wt_host$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/wt_broadcast$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/readonly$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/specific_node$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/specific_node_same$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/task_with_multiple_time_the_same_handle$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/test_arbiter$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/invalidate_pending_requests$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/deinitialize_pending_requests$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/temporary_partition$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/partitioned_initialization$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/partitioned_acquire$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/temporary_partition_implicit$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/temporary_partition_read$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ datawizard/redux_acquire$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ disk/disk_copy$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ disk/disk_copy_unpack$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ disk/disk_copy_to_disk$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ disk/disk_compute$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ disk/disk_pack$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ disk/mem_reclaim$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ errorcheck/invalid_blocking_calls$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ errorcheck/workers_cpuid$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ fault-tolerance/retry$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ helper/starpu_data_cpy$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ helper/starpu_data_dup_ro$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ helper/starpu_create_sync_task$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ microbenchs/async_tasks_overhead$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ microbenchs/sync_tasks_overhead$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ microbenchs/tasks_overhead$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ microbenchs/tasks_size_overhead$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ microbenchs/prefetch_data_on_node$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ microbenchs/redundant_buffer$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ microbenchs/matrix_as_vector$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ microbenchs/bandwidth$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ overlap/gpu_concurrency$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ parallel_tasks/combined_worker_assign_workerid$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ parallel_tasks/explicit_combined_worker$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels_trivial$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ parallel_tasks/parallel_kernels_spmd$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ parallel_tasks/spmd_peager$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ parallel_tasks/cuda_only$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_memset$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_check$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_multiimpl$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_energy$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/regression_based_gpu$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/non_linear_regression_based$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/feed$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/user_base$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/valid_model$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/path$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ perfmodels/memory$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_policies/data_locality$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_policies/execute_all_tasks$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_policies/prio$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_policies/simple_deps$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_policies/simple_cpu_gpu_sched$(EXEEXT) \ @STARPU_SIMGRID_FALSE@ sched_ctx/sched_ctx_hierarchy$(EXEEXT) @STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@am__EXEEXT_3 = maxfpga/max_fpga_basic_static$(EXEEXT) \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_advanced_static$(EXEEXT) \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_dynamic$(EXEEXT) \ @STARPU_SIMGRID_FALSE@@STARPU_USE_MAX_FPGA_TRUE@ maxfpga/max_fpga_mux$(EXEEXT) @STARPU_HAVE_FC_TRUE@@STARPU_SANITIZE_FALSE@@STARPU_SIMGRID_FALSE@am__EXEEXT_4 = fortran90/init_01$(EXEEXT) @STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@am__EXEEXT_5 = main/tag_task_data_deps$(EXEEXT) \ @STARPU_LONG_CHECK_TRUE@@STARPU_SIMGRID_FALSE@ datawizard/reclaim$(EXEEXT) am__EXEEXT_6 = main/callback$(EXEEXT) main/bind$(EXEEXT) \ main/mkdtemp$(EXEEXT) main/execute_schedule$(EXEEXT) \ main/insert_task_pack$(EXEEXT) \ main/insert_task_nullcodelet$(EXEEXT) \ main/insert_task_where$(EXEEXT) \ main/multithreaded_init$(EXEEXT) main/empty_task$(EXEEXT) \ main/empty_task_chain$(EXEEXT) \ main/starpu_worker_exists$(EXEEXT) \ main/codelet_null_callback$(EXEEXT) \ datawizard/allocate$(EXEEXT) datawizard/acquire_cb$(EXEEXT) \ datawizard/deps$(EXEEXT) \ datawizard/user_interaction_implicit$(EXEEXT) \ datawizard/interfaces/copy_interfaces$(EXEEXT) \ datawizard/numa_overflow$(EXEEXT) datawizard/locality$(EXEEXT) \ datawizard/variable_size$(EXEEXT) \ errorcheck/starpu_init_noworker$(EXEEXT) \ errorcheck/invalid_tasks$(EXEEXT) helper/cublas_init$(EXEEXT) \ helper/cublasLt_init$(EXEEXT) helper/cusparse_init$(EXEEXT) \ helper/hipblas_init$(EXEEXT) helper/pinned_memory$(EXEEXT) \ helper/execute_on_all$(EXEEXT) \ microbenchs/display_structures_size$(EXEEXT) \ microbenchs/local_pingpong$(EXEEXT) overlap/overlap$(EXEEXT) \ sched_ctx/sched_ctx_list$(EXEEXT) \ sched_ctx/sched_ctx_policy_data$(EXEEXT) \ openmp/init_exit_01$(EXEEXT) openmp/init_exit_02$(EXEEXT) \ openmp/environment$(EXEEXT) openmp/api_01$(EXEEXT) \ openmp/parallel_01$(EXEEXT) openmp/parallel_02$(EXEEXT) \ openmp/parallel_03$(EXEEXT) \ openmp/parallel_barrier_01$(EXEEXT) \ openmp/parallel_master_01$(EXEEXT) \ openmp/parallel_master_inline_01$(EXEEXT) \ openmp/parallel_single_wait_01$(EXEEXT) \ openmp/parallel_single_nowait_01$(EXEEXT) \ openmp/parallel_single_inline_01$(EXEEXT) \ openmp/parallel_single_copyprivate_01$(EXEEXT) \ openmp/parallel_single_copyprivate_inline_01$(EXEEXT) \ openmp/parallel_critical_01$(EXEEXT) \ openmp/parallel_critical_inline_01$(EXEEXT) \ openmp/parallel_critical_named_01$(EXEEXT) \ openmp/parallel_critical_named_inline_01$(EXEEXT) \ openmp/parallel_simple_lock_01$(EXEEXT) \ openmp/parallel_nested_lock_01$(EXEEXT) \ openmp/parallel_for_01$(EXEEXT) \ openmp/parallel_for_02$(EXEEXT) \ openmp/parallel_for_ordered_01$(EXEEXT) \ openmp/parallel_sections_01$(EXEEXT) \ openmp/parallel_sections_combined_01$(EXEEXT) \ openmp/task_01$(EXEEXT) openmp/task_02$(EXEEXT) \ openmp/task_03$(EXEEXT) openmp/taskloop$(EXEEXT) \ openmp/taskwait_01$(EXEEXT) openmp/taskgroup_01$(EXEEXT) \ openmp/taskgroup_02$(EXEEXT) openmp/array_slice_01$(EXEEXT) \ openmp/cuda_task_01$(EXEEXT) perfmodels/value_nan$(EXEEXT) \ sched_policies/workerids$(EXEEXT) $(am__EXEEXT_1) \ $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \ $(am__EXEEXT_5) @STARPU_SIMGRID_FALSE@@STARPU_USE_MPI_MASTER_SLAVE_FALSE@am__EXEEXT_7 = microbenchs/bandwidth$(EXEEXT) am__installdirs = "$(DESTDIR)$(examplebindir)" \ "$(DESTDIR)$(examplebindir)" \ "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_8 = loader$(EXEEXT) @STARPU_SIMGRID_FALSE@am__EXEEXT_9 = datawizard/allocate_many_numa_nodes$(EXEEXT) am__EXEEXT_10 = \ microbenchs/parallel_independent_homogeneous_tasks$(EXEEXT) \ microbenchs/parallel_independent_heterogeneous_tasks$(EXEEXT) \ microbenchs/parallel_independent_homogeneous_tasks_data$(EXEEXT) \ microbenchs/parallel_independent_heterogeneous_tasks_data$(EXEEXT) \ microbenchs/parallel_redux_homogeneous_tasks_data$(EXEEXT) \ microbenchs/parallel_redux_heterogeneous_tasks_data$(EXEEXT) \ microbenchs/parallel_dependent_homogeneous_tasks_data$(EXEEXT) PROGRAMS = $(examplebin_PROGRAMS) $(noinst_PROGRAMS) datawizard_acquire_cb_SOURCES = datawizard/acquire_cb.c am__dirstamp = $(am__leading_dot)dirstamp datawizard_acquire_cb_OBJECTS = datawizard/acquire_cb.$(OBJEXT) datawizard_acquire_cb_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = datawizard_acquire_cb_insert_SOURCES = datawizard/acquire_cb_insert.c datawizard_acquire_cb_insert_OBJECTS = \ datawizard/acquire_cb_insert.$(OBJEXT) datawizard_acquire_cb_insert_LDADD = $(LDADD) am__datawizard_acquire_release_SOURCES_DIST = \ datawizard/acquire_release.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_1 = \ @STARPU_USE_CUDA_TRUE@ variable/increment_cuda.$(OBJEXT) @STARPU_USE_HIP_TRUE@am__objects_2 = variable/increment_hip.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_3 = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl.$(OBJEXT) am_datawizard_acquire_release_OBJECTS = \ datawizard/acquire_release.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_acquire_release_OBJECTS = \ $(am_datawizard_acquire_release_OBJECTS) datawizard_acquire_release_LDADD = $(LDADD) am__datawizard_acquire_release2_SOURCES_DIST = \ datawizard/acquire_release2.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_acquire_release2_OBJECTS = \ datawizard/acquire_release2.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_acquire_release2_OBJECTS = \ $(am_datawizard_acquire_release2_OBJECTS) datawizard_acquire_release2_LDADD = $(LDADD) am__datawizard_acquire_release_to_SOURCES_DIST = \ datawizard/acquire_release_to.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_acquire_release_to_OBJECTS = \ datawizard/acquire_release_to.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_acquire_release_to_OBJECTS = \ $(am_datawizard_acquire_release_to_OBJECTS) datawizard_acquire_release_to_LDADD = $(LDADD) datawizard_acquire_try_SOURCES = datawizard/acquire_try.c datawizard_acquire_try_OBJECTS = datawizard/acquire_try.$(OBJEXT) datawizard_acquire_try_LDADD = $(LDADD) datawizard_allocate_SOURCES = datawizard/allocate.c datawizard_allocate_OBJECTS = datawizard/allocate.$(OBJEXT) datawizard_allocate_LDADD = $(LDADD) datawizard_allocate_many_numa_nodes_SOURCES = \ datawizard/allocate_many_numa_nodes.c datawizard_allocate_many_numa_nodes_OBJECTS = \ datawizard/allocate_many_numa_nodes.$(OBJEXT) datawizard_allocate_many_numa_nodes_LDADD = $(LDADD) datawizard_bcsr_SOURCES = datawizard/bcsr.c datawizard_bcsr_OBJECTS = datawizard/bcsr.$(OBJEXT) datawizard_bcsr_LDADD = $(LDADD) datawizard_cache_SOURCES = datawizard/cache.c datawizard_cache_OBJECTS = datawizard/cache.$(OBJEXT) datawizard_cache_LDADD = $(LDADD) datawizard_commute_SOURCES = datawizard/commute.c datawizard_commute_OBJECTS = datawizard/commute.$(OBJEXT) datawizard_commute_LDADD = $(LDADD) datawizard_commute2_SOURCES = datawizard/commute2.c datawizard_commute2_OBJECTS = datawizard/commute2.$(OBJEXT) datawizard_commute2_LDADD = $(LDADD) datawizard_copy_SOURCES = datawizard/copy.c datawizard_copy_OBJECTS = datawizard/copy.$(OBJEXT) datawizard_copy_LDADD = $(LDADD) datawizard_critical_section_with_void_interface_SOURCES = \ datawizard/critical_section_with_void_interface.c datawizard_critical_section_with_void_interface_OBJECTS = \ datawizard/critical_section_with_void_interface.$(OBJEXT) datawizard_critical_section_with_void_interface_LDADD = $(LDADD) datawizard_data_deinitialize_SOURCES = datawizard/data_deinitialize.c datawizard_data_deinitialize_OBJECTS = \ datawizard/data_deinitialize.$(OBJEXT) datawizard_data_deinitialize_LDADD = $(LDADD) datawizard_data_implicit_deps_SOURCES = \ datawizard/data_implicit_deps.c datawizard_data_implicit_deps_OBJECTS = \ datawizard/data_implicit_deps.$(OBJEXT) datawizard_data_implicit_deps_LDADD = $(LDADD) datawizard_data_invalidation_SOURCES = datawizard/data_invalidation.c datawizard_data_invalidation_OBJECTS = \ datawizard/data_invalidation.$(OBJEXT) datawizard_data_invalidation_LDADD = $(LDADD) datawizard_data_register_SOURCES = datawizard/data_register.c datawizard_data_register_OBJECTS = \ datawizard/data_register-data_register.$(OBJEXT) datawizard_data_register_LDADD = $(LDADD) datawizard_data_register_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(datawizard_data_register_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ datawizard_deinitialize_pending_requests_SOURCES = \ datawizard/deinitialize_pending_requests.c datawizard_deinitialize_pending_requests_OBJECTS = \ datawizard/deinitialize_pending_requests.$(OBJEXT) datawizard_deinitialize_pending_requests_LDADD = $(LDADD) datawizard_deps_SOURCES = datawizard/deps.c datawizard_deps_OBJECTS = datawizard/deps.$(OBJEXT) datawizard_deps_LDADD = $(LDADD) datawizard_dining_philosophers_SOURCES = \ datawizard/dining_philosophers.c datawizard_dining_philosophers_OBJECTS = \ datawizard/dining_philosophers.$(OBJEXT) datawizard_dining_philosophers_LDADD = $(LDADD) datawizard_double_parameter_SOURCES = datawizard/double_parameter.c datawizard_double_parameter_OBJECTS = \ datawizard/double_parameter.$(OBJEXT) datawizard_double_parameter_LDADD = $(LDADD) datawizard_dsm_stress_SOURCES = datawizard/dsm_stress.c datawizard_dsm_stress_OBJECTS = datawizard/dsm_stress.$(OBJEXT) datawizard_dsm_stress_LDADD = $(LDADD) am__datawizard_gpu_ptr_register_SOURCES_DIST = \ datawizard/gpu_ptr_register.c datawizard/scal.c \ datawizard/scal_cuda.cu datawizard/scal_opencl.cl @STARPU_USE_CUDA_TRUE@am__objects_4 = datawizard/scal_cuda.$(OBJEXT) am__objects_5 = am_datawizard_gpu_ptr_register_OBJECTS = \ datawizard/gpu_ptr_register.$(OBJEXT) \ datawizard/scal.$(OBJEXT) $(am__objects_4) $(am__objects_5) datawizard_gpu_ptr_register_OBJECTS = \ $(am_datawizard_gpu_ptr_register_OBJECTS) datawizard_gpu_ptr_register_LDADD = $(LDADD) am__datawizard_gpu_register_SOURCES_DIST = datawizard/gpu_register.c \ datawizard/scal.c datawizard/scal_cuda.cu \ datawizard/scal_opencl.cl am_datawizard_gpu_register_OBJECTS = \ datawizard/gpu_register.$(OBJEXT) datawizard/scal.$(OBJEXT) \ $(am__objects_4) $(am__objects_5) datawizard_gpu_register_OBJECTS = \ $(am_datawizard_gpu_register_OBJECTS) datawizard_gpu_register_LDADD = $(LDADD) datawizard_handle_to_pointer_SOURCES = datawizard/handle_to_pointer.c datawizard_handle_to_pointer_OBJECTS = \ datawizard/handle_to_pointer.$(OBJEXT) datawizard_handle_to_pointer_LDADD = $(LDADD) am__datawizard_in_place_partition_SOURCES_DIST = \ datawizard/in_place_partition.c datawizard/scal.c \ datawizard/scal_cuda.cu datawizard/scal_opencl.cl am_datawizard_in_place_partition_OBJECTS = \ datawizard/in_place_partition.$(OBJEXT) \ datawizard/scal.$(OBJEXT) $(am__objects_4) $(am__objects_5) datawizard_in_place_partition_OBJECTS = \ $(am_datawizard_in_place_partition_OBJECTS) datawizard_in_place_partition_LDADD = $(LDADD) am__datawizard_increment_init_SOURCES_DIST = \ datawizard/increment_init.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_increment_init_OBJECTS = \ datawizard/increment_init.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_increment_init_OBJECTS = \ $(am_datawizard_increment_init_OBJECTS) datawizard_increment_init_LDADD = $(LDADD) am__datawizard_increment_redux_SOURCES_DIST = \ datawizard/increment_redux.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_increment_redux_OBJECTS = \ datawizard/increment_redux.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_increment_redux_OBJECTS = \ $(am_datawizard_increment_redux_OBJECTS) datawizard_increment_redux_LDADD = $(LDADD) am__datawizard_increment_redux_lazy_SOURCES_DIST = \ datawizard/increment_redux_lazy.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_increment_redux_lazy_OBJECTS = \ datawizard/increment_redux_lazy.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_increment_redux_lazy_OBJECTS = \ $(am_datawizard_increment_redux_lazy_OBJECTS) datawizard_increment_redux_lazy_LDADD = $(LDADD) am__datawizard_increment_redux_partition_SOURCES_DIST = \ datawizard/increment_redux_partition.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_increment_redux_partition_OBJECTS = \ datawizard/increment_redux_partition.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_increment_redux_partition_OBJECTS = \ $(am_datawizard_increment_redux_partition_OBJECTS) datawizard_increment_redux_partition_LDADD = $(LDADD) am__datawizard_increment_redux_v2_SOURCES_DIST = \ datawizard/increment_redux_v2.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_increment_redux_v2_OBJECTS = \ datawizard/increment_redux_v2.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_increment_redux_v2_OBJECTS = \ $(am_datawizard_increment_redux_v2_OBJECTS) datawizard_increment_redux_v2_LDADD = $(LDADD) am__datawizard_increment_redux_with_args_SOURCES_DIST = \ datawizard/increment_redux_with_args.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_increment_redux_with_args_OBJECTS = \ datawizard/increment_redux_with_args.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_increment_redux_with_args_OBJECTS = \ $(am_datawizard_increment_redux_with_args_OBJECTS) datawizard_increment_redux_with_args_LDADD = $(LDADD) am__datawizard_interfaces_bcsr_bcsr_interface_SOURCES_DIST = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/bcsr/bcsr_interface.c \ datawizard/interfaces/bcsr/bcsr_cuda.cu \ datawizard/interfaces/bcsr/bcsr_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_6 = datawizard/interfaces/bcsr/bcsr_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_7 = datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.$(OBJEXT) am_datawizard_interfaces_bcsr_bcsr_interface_OBJECTS = datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.$(OBJEXT) \ $(am__objects_6) $(am__objects_7) datawizard_interfaces_bcsr_bcsr_interface_OBJECTS = \ $(am_datawizard_interfaces_bcsr_bcsr_interface_OBJECTS) datawizard_interfaces_bcsr_bcsr_interface_LDADD = $(LDADD) datawizard_interfaces_bcsr_bcsr_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am__datawizard_interfaces_block_block_interface_SOURCES_DIST = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/block/block_interface.c \ datawizard/interfaces/block/block_cuda.cu \ datawizard/interfaces/block/block_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_8 = datawizard/interfaces/block/block_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_9 = datawizard/interfaces/block/block_interface-block_opencl.$(OBJEXT) am_datawizard_interfaces_block_block_interface_OBJECTS = datawizard/interfaces/block_block_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/block/block_interface-block_interface.$(OBJEXT) \ $(am__objects_8) $(am__objects_9) datawizard_interfaces_block_block_interface_OBJECTS = \ $(am_datawizard_interfaces_block_block_interface_OBJECTS) datawizard_interfaces_block_block_interface_LDADD = $(LDADD) datawizard_interfaces_block_block_interface_LINK = $(LIBTOOL) \ $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) \ $(datawizard_interfaces_block_block_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am__datawizard_interfaces_coo_coo_interface_SOURCES_DIST = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/coo/coo_interface.c \ datawizard/interfaces/coo/coo_cuda.cu \ datawizard/interfaces/coo/coo_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_10 = datawizard/interfaces/coo/coo_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_11 = datawizard/interfaces/coo/coo_interface-coo_opencl.$(OBJEXT) am_datawizard_interfaces_coo_coo_interface_OBJECTS = datawizard/interfaces/coo_coo_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/coo/coo_interface-coo_interface.$(OBJEXT) \ $(am__objects_10) $(am__objects_11) datawizard_interfaces_coo_coo_interface_OBJECTS = \ $(am_datawizard_interfaces_coo_coo_interface_OBJECTS) datawizard_interfaces_coo_coo_interface_LDADD = $(LDADD) datawizard_interfaces_coo_coo_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(datawizard_interfaces_coo_coo_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ datawizard_interfaces_copy_interfaces_SOURCES = \ datawizard/interfaces/copy_interfaces.c datawizard_interfaces_copy_interfaces_OBJECTS = datawizard/interfaces/copy_interfaces-copy_interfaces.$(OBJEXT) datawizard_interfaces_copy_interfaces_LDADD = $(LDADD) datawizard_interfaces_copy_interfaces_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(datawizard_interfaces_copy_interfaces_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am__datawizard_interfaces_csr_csr_interface_SOURCES_DIST = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/csr/csr_interface.c \ datawizard/interfaces/csr/csr_cuda.cu \ datawizard/interfaces/csr/csr_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_12 = datawizard/interfaces/csr/csr_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_13 = datawizard/interfaces/csr/csr_interface-csr_opencl.$(OBJEXT) am_datawizard_interfaces_csr_csr_interface_OBJECTS = datawizard/interfaces/csr_csr_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/csr/csr_interface-csr_interface.$(OBJEXT) \ $(am__objects_12) $(am__objects_13) datawizard_interfaces_csr_csr_interface_OBJECTS = \ $(am_datawizard_interfaces_csr_csr_interface_OBJECTS) datawizard_interfaces_csr_csr_interface_LDADD = $(LDADD) datawizard_interfaces_csr_csr_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(datawizard_interfaces_csr_csr_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am__datawizard_interfaces_matrix_matrix_interface_SOURCES_DIST = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/matrix/matrix_interface.c \ datawizard/interfaces/matrix/matrix_cuda.cu \ datawizard/interfaces/matrix/matrix_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_14 = datawizard/interfaces/matrix/matrix_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_15 = datawizard/interfaces/matrix/matrix_interface-matrix_opencl.$(OBJEXT) am_datawizard_interfaces_matrix_matrix_interface_OBJECTS = datawizard/interfaces/matrix_matrix_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/matrix/matrix_interface-matrix_interface.$(OBJEXT) \ $(am__objects_14) $(am__objects_15) datawizard_interfaces_matrix_matrix_interface_OBJECTS = \ $(am_datawizard_interfaces_matrix_matrix_interface_OBJECTS) datawizard_interfaces_matrix_matrix_interface_LDADD = $(LDADD) datawizard_interfaces_matrix_matrix_interface_LINK = $(LIBTOOL) \ $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) \ $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am_datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS = \ datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.$(OBJEXT) datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS) datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_LDADD = \ $(LDADD) am_datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS = \ datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ datawizard/interfaces/multiformat/advanced/multiformat_data_release.$(OBJEXT) datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS) datawizard_interfaces_multiformat_advanced_multiformat_data_release_LDADD = \ $(LDADD) am_datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS = \ datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.$(OBJEXT) datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS) datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_LDADD = \ $(LDADD) am_datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS = \ datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ datawizard/interfaces/multiformat/advanced/multiformat_worker.$(OBJEXT) datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS) datawizard_interfaces_multiformat_advanced_multiformat_worker_LDADD = \ $(LDADD) am_datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS = \ datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT) \ datawizard/interfaces/multiformat/advanced/same_handle.$(OBJEXT) datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS = $(am_datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS) datawizard_interfaces_multiformat_advanced_same_handle_LDADD = \ $(LDADD) am__datawizard_interfaces_multiformat_multiformat_interface_SOURCES_DIST = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/multiformat/multiformat_interface.c \ datawizard/interfaces/multiformat/multiformat_conversion_codelets.c \ datawizard/interfaces/multiformat/multiformat_cuda.cu \ datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu \ datawizard/interfaces/multiformat/multiformat_opencl.c \ datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_16 = datawizard/interfaces/multiformat/multiformat_cuda.$(OBJEXT) \ @STARPU_USE_CUDA_TRUE@ datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_17 = datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.$(OBJEXT) \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.$(OBJEXT) am_datawizard_interfaces_multiformat_multiformat_interface_OBJECTS = datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.$(OBJEXT) \ datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.$(OBJEXT) \ $(am__objects_16) $(am__objects_17) datawizard_interfaces_multiformat_multiformat_interface_OBJECTS = $(am_datawizard_interfaces_multiformat_multiformat_interface_OBJECTS) datawizard_interfaces_multiformat_multiformat_interface_LDADD = \ $(LDADD) datawizard_interfaces_multiformat_multiformat_interface_LINK = \ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am__datawizard_interfaces_ndim_ndim_interface_SOURCES_DIST = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/ndim/ndim_interface.c \ datawizard/interfaces/ndim/ndim_cuda.cu \ datawizard/interfaces/ndim/ndim_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_18 = datawizard/interfaces/ndim/ndim_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_19 = datawizard/interfaces/ndim/ndim_interface-ndim_opencl.$(OBJEXT) am_datawizard_interfaces_ndim_ndim_interface_OBJECTS = datawizard/interfaces/ndim_ndim_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/ndim/ndim_interface-ndim_interface.$(OBJEXT) \ $(am__objects_18) $(am__objects_19) datawizard_interfaces_ndim_ndim_interface_OBJECTS = \ $(am_datawizard_interfaces_ndim_ndim_interface_OBJECTS) datawizard_interfaces_ndim_ndim_interface_LDADD = $(LDADD) datawizard_interfaces_ndim_ndim_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am__datawizard_interfaces_tensor_tensor_interface_SOURCES_DIST = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/tensor/tensor_interface.c \ datawizard/interfaces/tensor/tensor_cuda.cu \ datawizard/interfaces/tensor/tensor_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_20 = datawizard/interfaces/tensor/tensor_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_21 = datawizard/interfaces/tensor/tensor_interface-tensor_opencl.$(OBJEXT) am_datawizard_interfaces_tensor_tensor_interface_OBJECTS = datawizard/interfaces/tensor_tensor_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/tensor/tensor_interface-tensor_interface.$(OBJEXT) \ $(am__objects_20) $(am__objects_21) datawizard_interfaces_tensor_tensor_interface_OBJECTS = \ $(am_datawizard_interfaces_tensor_tensor_interface_OBJECTS) datawizard_interfaces_tensor_tensor_interface_LDADD = $(LDADD) datawizard_interfaces_tensor_tensor_interface_LINK = $(LIBTOOL) \ $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) \ $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am__datawizard_interfaces_variable_variable_interface_SOURCES_DIST = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/variable/variable_interface.c \ datawizard/interfaces/variable/variable_cuda.cu \ datawizard/interfaces/variable/variable_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_22 = datawizard/interfaces/variable/variable_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_23 = datawizard/interfaces/variable/variable_interface-variable_opencl.$(OBJEXT) am_datawizard_interfaces_variable_variable_interface_OBJECTS = datawizard/interfaces/variable_variable_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/variable/variable_interface-variable_interface.$(OBJEXT) \ $(am__objects_22) $(am__objects_23) datawizard_interfaces_variable_variable_interface_OBJECTS = $(am_datawizard_interfaces_variable_variable_interface_OBJECTS) datawizard_interfaces_variable_variable_interface_LDADD = $(LDADD) datawizard_interfaces_variable_variable_interface_LINK = $(LIBTOOL) \ $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) \ $(datawizard_interfaces_variable_variable_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am__datawizard_interfaces_vector_vector_interface_SOURCES_DIST = \ datawizard/interfaces/vector/vector_interface.c \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/vector/vector_cuda.cu \ datawizard/interfaces/vector/vector_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_24 = datawizard/interfaces/vector/vector_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_25 = datawizard/interfaces/vector/vector_interface-vector_opencl.$(OBJEXT) am_datawizard_interfaces_vector_vector_interface_OBJECTS = datawizard/interfaces/vector/vector_interface-vector_interface.$(OBJEXT) \ datawizard/interfaces/vector_vector_interface-test_interfaces.$(OBJEXT) \ $(am__objects_24) $(am__objects_25) datawizard_interfaces_vector_vector_interface_OBJECTS = \ $(am_datawizard_interfaces_vector_vector_interface_OBJECTS) datawizard_interfaces_vector_vector_interface_LDADD = $(LDADD) datawizard_interfaces_vector_vector_interface_LINK = $(LIBTOOL) \ $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) \ $(datawizard_interfaces_vector_vector_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am_datawizard_interfaces_void_void_interface_OBJECTS = datawizard/interfaces/void_void_interface-test_interfaces.$(OBJEXT) \ datawizard/interfaces/void/void_interface-void_interface.$(OBJEXT) datawizard_interfaces_void_void_interface_OBJECTS = \ $(am_datawizard_interfaces_void_void_interface_OBJECTS) datawizard_interfaces_void_void_interface_LDADD = $(LDADD) datawizard_interfaces_void_void_interface_LINK = $(LIBTOOL) $(AM_V_lt) \ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ $(CCLD) $(datawizard_interfaces_void_void_interface_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ datawizard_invalidate_pending_requests_SOURCES = \ datawizard/invalidate_pending_requests.c datawizard_invalidate_pending_requests_OBJECTS = \ datawizard/invalidate_pending_requests.$(OBJEXT) datawizard_invalidate_pending_requests_LDADD = $(LDADD) datawizard_lazy_allocation_SOURCES = datawizard/lazy_allocation.c datawizard_lazy_allocation_OBJECTS = \ datawizard/lazy_allocation.$(OBJEXT) datawizard_lazy_allocation_LDADD = $(LDADD) datawizard_locality_SOURCES = datawizard/locality.c datawizard_locality_OBJECTS = datawizard/locality.$(OBJEXT) datawizard_locality_LDADD = $(LDADD) datawizard_manual_reduction_SOURCES = datawizard/manual_reduction.c datawizard_manual_reduction_OBJECTS = \ datawizard/manual_reduction.$(OBJEXT) datawizard_manual_reduction_LDADD = $(LDADD) am__datawizard_mpi_like_SOURCES_DIST = datawizard/mpi_like.c \ variable/increment.c variable/increment_cuda.cu \ variable/increment_hip.hip variable/increment_opencl.c am_datawizard_mpi_like_OBJECTS = datawizard/mpi_like.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_mpi_like_OBJECTS = $(am_datawizard_mpi_like_OBJECTS) datawizard_mpi_like_LDADD = $(LDADD) am__datawizard_mpi_like_async_SOURCES_DIST = \ datawizard/mpi_like_async.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_mpi_like_async_OBJECTS = \ datawizard/mpi_like_async.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_mpi_like_async_OBJECTS = \ $(am_datawizard_mpi_like_async_OBJECTS) datawizard_mpi_like_async_LDADD = $(LDADD) datawizard_no_unregister_SOURCES = datawizard/no_unregister.c datawizard_no_unregister_OBJECTS = datawizard/no_unregister.$(OBJEXT) datawizard_no_unregister_LDADD = $(LDADD) datawizard_noreclaim_SOURCES = datawizard/noreclaim.c datawizard_noreclaim_OBJECTS = datawizard/noreclaim.$(OBJEXT) datawizard_noreclaim_LDADD = $(LDADD) datawizard_nowhere_SOURCES = datawizard/nowhere.c datawizard_nowhere_OBJECTS = datawizard/nowhere.$(OBJEXT) datawizard_nowhere_LDADD = $(LDADD) datawizard_numa_overflow_SOURCES = datawizard/numa_overflow.c datawizard_numa_overflow_OBJECTS = datawizard/numa_overflow.$(OBJEXT) datawizard_numa_overflow_LDADD = $(LDADD) am__datawizard_partition_dep_SOURCES_DIST = \ datawizard/partition_dep.c datawizard/scal.c \ datawizard/scal_cuda.cu datawizard/scal_opencl.cl am_datawizard_partition_dep_OBJECTS = \ datawizard/partition_dep.$(OBJEXT) datawizard/scal.$(OBJEXT) \ $(am__objects_4) $(am__objects_5) datawizard_partition_dep_OBJECTS = \ $(am_datawizard_partition_dep_OBJECTS) datawizard_partition_dep_LDADD = $(LDADD) datawizard_partition_init_SOURCES = datawizard/partition_init.c datawizard_partition_init_OBJECTS = \ datawizard/partition_init.$(OBJEXT) datawizard_partition_init_LDADD = $(LDADD) am__datawizard_partition_lazy_SOURCES_DIST = \ datawizard/partition_lazy.c datawizard/scal.c \ datawizard/scal_cuda.cu datawizard/scal_opencl.cl am_datawizard_partition_lazy_OBJECTS = \ datawizard/partition_lazy.$(OBJEXT) datawizard/scal.$(OBJEXT) \ $(am__objects_4) $(am__objects_5) datawizard_partition_lazy_OBJECTS = \ $(am_datawizard_partition_lazy_OBJECTS) datawizard_partition_lazy_LDADD = $(LDADD) datawizard_partition_wontuse_SOURCES = datawizard/partition_wontuse.c datawizard_partition_wontuse_OBJECTS = \ datawizard/partition_wontuse.$(OBJEXT) datawizard_partition_wontuse_LDADD = $(LDADD) datawizard_partitioned_acquire_SOURCES = \ datawizard/partitioned_acquire.c datawizard_partitioned_acquire_OBJECTS = \ datawizard/partitioned_acquire.$(OBJEXT) datawizard_partitioned_acquire_LDADD = $(LDADD) datawizard_partitioned_initialization_SOURCES = \ datawizard/partitioned_initialization.c datawizard_partitioned_initialization_OBJECTS = \ datawizard/partitioned_initialization.$(OBJEXT) datawizard_partitioned_initialization_LDADD = $(LDADD) datawizard_readers_and_writers_SOURCES = \ datawizard/readers_and_writers.c datawizard_readers_and_writers_OBJECTS = \ datawizard/readers_and_writers.$(OBJEXT) datawizard_readers_and_writers_LDADD = $(LDADD) datawizard_readonly_SOURCES = datawizard/readonly.c datawizard_readonly_OBJECTS = datawizard/readonly.$(OBJEXT) datawizard_readonly_LDADD = $(LDADD) datawizard_reclaim_SOURCES = datawizard/reclaim.c datawizard_reclaim_OBJECTS = datawizard/reclaim.$(OBJEXT) datawizard_reclaim_LDADD = $(LDADD) datawizard_redux_acquire_SOURCES = datawizard/redux_acquire.c datawizard_redux_acquire_OBJECTS = datawizard/redux_acquire.$(OBJEXT) datawizard_redux_acquire_LDADD = $(LDADD) am__datawizard_scratch_SOURCES_DIST = datawizard/scratch.c \ datawizard/scratch_cuda.cu datawizard/scratch_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_26 = \ @STARPU_USE_CUDA_TRUE@ datawizard/scratch_cuda.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_27 = \ @STARPU_USE_OPENCL_TRUE@ datawizard/scratch_opencl.$(OBJEXT) am_datawizard_scratch_OBJECTS = datawizard/scratch.$(OBJEXT) \ $(am__objects_26) $(am__objects_27) datawizard_scratch_OBJECTS = $(am_datawizard_scratch_OBJECTS) datawizard_scratch_LDADD = $(LDADD) datawizard_scratch_reuse_SOURCES = datawizard/scratch_reuse.c datawizard_scratch_reuse_OBJECTS = datawizard/scratch_reuse.$(OBJEXT) datawizard_scratch_reuse_LDADD = $(LDADD) datawizard_simgrid_locality_SOURCES = datawizard/simgrid-locality.c datawizard_simgrid_locality_OBJECTS = \ datawizard/simgrid-locality.$(OBJEXT) datawizard_simgrid_locality_LDADD = $(LDADD) am__datawizard_specific_node_SOURCES_DIST = \ datawizard/specific_node.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_datawizard_specific_node_OBJECTS = \ datawizard/specific_node.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_specific_node_OBJECTS = \ $(am_datawizard_specific_node_OBJECTS) datawizard_specific_node_LDADD = $(LDADD) datawizard_specific_node_same_SOURCES = \ datawizard/specific_node_same.c datawizard_specific_node_same_OBJECTS = \ datawizard/specific_node_same.$(OBJEXT) datawizard_specific_node_same_LDADD = $(LDADD) am__datawizard_sync_and_notify_data_SOURCES_DIST = \ datawizard/sync_and_notify_data.c \ datawizard/sync_and_notify_data_kernels.cu \ datawizard/sync_and_notify_data_opencl.c @STARPU_USE_CUDA_TRUE@am__objects_28 = datawizard/sync_and_notify_data_kernels.$(OBJEXT) @STARPU_USE_OPENCL_TRUE@am__objects_29 = datawizard/sync_and_notify_data_opencl.$(OBJEXT) am_datawizard_sync_and_notify_data_OBJECTS = \ datawizard/sync_and_notify_data.$(OBJEXT) $(am__objects_28) \ $(am__objects_29) datawizard_sync_and_notify_data_OBJECTS = \ $(am_datawizard_sync_and_notify_data_OBJECTS) datawizard_sync_and_notify_data_LDADD = $(LDADD) am__datawizard_sync_and_notify_data_implicit_SOURCES_DIST = \ datawizard/sync_and_notify_data_implicit.c \ datawizard/sync_and_notify_data_kernels.cu \ datawizard/sync_and_notify_data_opencl.c am_datawizard_sync_and_notify_data_implicit_OBJECTS = \ datawizard/sync_and_notify_data_implicit.$(OBJEXT) \ $(am__objects_28) $(am__objects_29) datawizard_sync_and_notify_data_implicit_OBJECTS = \ $(am_datawizard_sync_and_notify_data_implicit_OBJECTS) datawizard_sync_and_notify_data_implicit_LDADD = $(LDADD) datawizard_sync_with_data_with_mem_SOURCES = \ datawizard/sync_with_data_with_mem.c datawizard_sync_with_data_with_mem_OBJECTS = \ datawizard/sync_with_data_with_mem.$(OBJEXT) datawizard_sync_with_data_with_mem_LDADD = $(LDADD) datawizard_sync_with_data_with_mem_non_blocking_SOURCES = \ datawizard/sync_with_data_with_mem_non_blocking.c datawizard_sync_with_data_with_mem_non_blocking_OBJECTS = \ datawizard/sync_with_data_with_mem_non_blocking.$(OBJEXT) datawizard_sync_with_data_with_mem_non_blocking_LDADD = $(LDADD) datawizard_sync_with_data_with_mem_non_blocking_implicit_SOURCES = \ datawizard/sync_with_data_with_mem_non_blocking_implicit.c datawizard_sync_with_data_with_mem_non_blocking_implicit_OBJECTS = datawizard/sync_with_data_with_mem_non_blocking_implicit.$(OBJEXT) datawizard_sync_with_data_with_mem_non_blocking_implicit_LDADD = \ $(LDADD) datawizard_task_with_multiple_time_the_same_handle_SOURCES = \ datawizard/task_with_multiple_time_the_same_handle.c datawizard_task_with_multiple_time_the_same_handle_OBJECTS = \ datawizard/task_with_multiple_time_the_same_handle.$(OBJEXT) datawizard_task_with_multiple_time_the_same_handle_LDADD = $(LDADD) datawizard_temporary_partition_SOURCES = \ datawizard/temporary_partition.c datawizard_temporary_partition_OBJECTS = \ datawizard/temporary_partition.$(OBJEXT) datawizard_temporary_partition_LDADD = $(LDADD) datawizard_temporary_partition_implicit_SOURCES = \ datawizard/temporary_partition_implicit.c datawizard_temporary_partition_implicit_OBJECTS = \ datawizard/temporary_partition_implicit.$(OBJEXT) datawizard_temporary_partition_implicit_LDADD = $(LDADD) datawizard_temporary_partition_read_SOURCES = \ datawizard/temporary_partition_read.c datawizard_temporary_partition_read_OBJECTS = \ datawizard/temporary_partition_read.$(OBJEXT) datawizard_temporary_partition_read_LDADD = $(LDADD) am_datawizard_test_arbiter_OBJECTS = \ datawizard/test_arbiter.$(OBJEXT) datawizard_test_arbiter_OBJECTS = \ $(am_datawizard_test_arbiter_OBJECTS) datawizard_test_arbiter_LDADD = $(LDADD) datawizard_unpartition_SOURCES = datawizard/unpartition.c datawizard_unpartition_OBJECTS = datawizard/unpartition.$(OBJEXT) datawizard_unpartition_LDADD = $(LDADD) datawizard_user_interaction_implicit_SOURCES = \ datawizard/user_interaction_implicit.c datawizard_user_interaction_implicit_OBJECTS = \ datawizard/user_interaction_implicit.$(OBJEXT) datawizard_user_interaction_implicit_LDADD = $(LDADD) datawizard_variable_parameters_SOURCES = \ datawizard/variable_parameters.c datawizard_variable_parameters_OBJECTS = \ datawizard/variable_parameters.$(OBJEXT) datawizard_variable_parameters_LDADD = $(LDADD) datawizard_variable_size_SOURCES = datawizard/variable_size.c datawizard_variable_size_OBJECTS = datawizard/variable_size.$(OBJEXT) datawizard_variable_size_LDADD = $(LDADD) datawizard_write_only_tmp_buffer_SOURCES = \ datawizard/write_only_tmp_buffer.c datawizard_write_only_tmp_buffer_OBJECTS = \ datawizard/write_only_tmp_buffer.$(OBJEXT) datawizard_write_only_tmp_buffer_LDADD = $(LDADD) am__datawizard_wt_broadcast_SOURCES_DIST = datawizard/wt_broadcast.c \ variable/increment.c variable/increment_cuda.cu \ variable/increment_hip.hip variable/increment_opencl.c am_datawizard_wt_broadcast_OBJECTS = \ datawizard/wt_broadcast.$(OBJEXT) variable/increment.$(OBJEXT) \ $(am__objects_1) $(am__objects_2) $(am__objects_3) datawizard_wt_broadcast_OBJECTS = \ $(am_datawizard_wt_broadcast_OBJECTS) datawizard_wt_broadcast_LDADD = $(LDADD) am__datawizard_wt_host_SOURCES_DIST = datawizard/wt_host.c \ variable/increment.c variable/increment_cuda.cu \ variable/increment_hip.hip variable/increment_opencl.c am_datawizard_wt_host_OBJECTS = datawizard/wt_host.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) datawizard_wt_host_OBJECTS = $(am_datawizard_wt_host_OBJECTS) datawizard_wt_host_LDADD = $(LDADD) disk_disk_compute_SOURCES = disk/disk_compute.c disk_disk_compute_OBJECTS = disk/disk_compute.$(OBJEXT) disk_disk_compute_LDADD = $(LDADD) disk_disk_copy_SOURCES = disk/disk_copy.c disk_disk_copy_OBJECTS = disk/disk_copy.$(OBJEXT) disk_disk_copy_LDADD = $(LDADD) disk_disk_copy_to_disk_SOURCES = disk/disk_copy_to_disk.c disk_disk_copy_to_disk_OBJECTS = disk/disk_copy_to_disk.$(OBJEXT) disk_disk_copy_to_disk_LDADD = $(LDADD) disk_disk_copy_unpack_SOURCES = disk/disk_copy_unpack.c disk_disk_copy_unpack_OBJECTS = disk/disk_copy_unpack.$(OBJEXT) disk_disk_copy_unpack_LDADD = $(LDADD) disk_disk_pack_SOURCES = disk/disk_pack.c disk_disk_pack_OBJECTS = disk/disk_pack.$(OBJEXT) disk_disk_pack_LDADD = $(LDADD) disk_mem_reclaim_SOURCES = disk/mem_reclaim.c disk_mem_reclaim_OBJECTS = disk/mem_reclaim.$(OBJEXT) disk_mem_reclaim_LDADD = $(LDADD) energy_energy_efficiency_SOURCES = energy/energy_efficiency.c energy_energy_efficiency_OBJECTS = energy/energy_efficiency.$(OBJEXT) energy_energy_efficiency_LDADD = $(LDADD) errorcheck_invalid_blocking_calls_SOURCES = \ errorcheck/invalid_blocking_calls.c errorcheck_invalid_blocking_calls_OBJECTS = \ errorcheck/invalid_blocking_calls.$(OBJEXT) errorcheck_invalid_blocking_calls_LDADD = $(LDADD) errorcheck_invalid_tasks_SOURCES = errorcheck/invalid_tasks.c errorcheck_invalid_tasks_OBJECTS = errorcheck/invalid_tasks.$(OBJEXT) errorcheck_invalid_tasks_LDADD = $(LDADD) errorcheck_starpu_init_noworker_SOURCES = \ errorcheck/starpu_init_noworker.c errorcheck_starpu_init_noworker_OBJECTS = \ errorcheck/starpu_init_noworker.$(OBJEXT) errorcheck_starpu_init_noworker_LDADD = $(LDADD) errorcheck_workers_cpuid_SOURCES = errorcheck/workers_cpuid.c errorcheck_workers_cpuid_OBJECTS = errorcheck/workers_cpuid.$(OBJEXT) errorcheck_workers_cpuid_LDADD = $(LDADD) fault_tolerance_retry_SOURCES = fault-tolerance/retry.c fault_tolerance_retry_OBJECTS = fault-tolerance/retry.$(OBJEXT) fault_tolerance_retry_LDADD = $(LDADD) am__fortran90_init_01_SOURCES_DIST = fortran90/starpu_mod.f90 \ fortran90/init_01.f90 @STARPU_HAVE_FC_TRUE@am_fortran90_init_01_OBJECTS = \ @STARPU_HAVE_FC_TRUE@ fortran90/starpu_mod.$(OBJEXT) \ @STARPU_HAVE_FC_TRUE@ fortran90/init_01.$(OBJEXT) fortran90_init_01_OBJECTS = $(am_fortran90_init_01_OBJECTS) fortran90_init_01_LDADD = $(LDADD) helper_cublasLt_init_SOURCES = helper/cublasLt_init.c helper_cublasLt_init_OBJECTS = helper/cublasLt_init.$(OBJEXT) helper_cublasLt_init_LDADD = $(LDADD) helper_cublas_init_SOURCES = helper/cublas_init.c helper_cublas_init_OBJECTS = helper/cublas_init.$(OBJEXT) helper_cublas_init_LDADD = $(LDADD) helper_cusparse_init_SOURCES = helper/cusparse_init.c helper_cusparse_init_OBJECTS = helper/cusparse_init.$(OBJEXT) helper_cusparse_init_LDADD = $(LDADD) helper_execute_on_all_SOURCES = helper/execute_on_all.c helper_execute_on_all_OBJECTS = helper/execute_on_all.$(OBJEXT) helper_execute_on_all_LDADD = $(LDADD) helper_hipblas_init_SOURCES = helper/hipblas_init.c helper_hipblas_init_OBJECTS = helper/hipblas_init.$(OBJEXT) helper_hipblas_init_LDADD = $(LDADD) helper_pinned_memory_SOURCES = helper/pinned_memory.c helper_pinned_memory_OBJECTS = helper/pinned_memory.$(OBJEXT) helper_pinned_memory_LDADD = $(LDADD) helper_starpu_create_sync_task_SOURCES = \ helper/starpu_create_sync_task.c helper_starpu_create_sync_task_OBJECTS = \ helper/starpu_create_sync_task.$(OBJEXT) helper_starpu_create_sync_task_LDADD = $(LDADD) helper_starpu_data_cpy_SOURCES = helper/starpu_data_cpy.c helper_starpu_data_cpy_OBJECTS = helper/starpu_data_cpy.$(OBJEXT) helper_starpu_data_cpy_LDADD = $(LDADD) am__helper_starpu_data_dup_ro_SOURCES_DIST = \ helper/starpu_data_dup_ro.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_helper_starpu_data_dup_ro_OBJECTS = \ helper/starpu_data_dup_ro.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) helper_starpu_data_dup_ro_OBJECTS = \ $(am_helper_starpu_data_dup_ro_OBJECTS) helper_starpu_data_dup_ro_LDADD = $(LDADD) loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) main_bind_SOURCES = main/bind.c main_bind_OBJECTS = main/bind.$(OBJEXT) main_bind_LDADD = $(LDADD) main_callback_SOURCES = main/callback.c main_callback_OBJECTS = main/callback.$(OBJEXT) main_callback_LDADD = $(LDADD) main_codelet_null_callback_SOURCES = main/codelet_null_callback.c main_codelet_null_callback_OBJECTS = \ main/codelet_null_callback.$(OBJEXT) main_codelet_null_callback_LDADD = $(LDADD) main_const_codelet_SOURCES = main/const_codelet.c main_const_codelet_OBJECTS = main/const_codelet.$(OBJEXT) main_const_codelet_LDADD = $(LDADD) main_deadlock_SOURCES = main/deadlock.c main_deadlock_OBJECTS = main/deadlock.$(OBJEXT) main_deadlock_LDADD = $(LDADD) main_declare_deps_after_submission_SOURCES = \ main/declare_deps_after_submission.c main_declare_deps_after_submission_OBJECTS = \ main/declare_deps_after_submission.$(OBJEXT) main_declare_deps_after_submission_LDADD = $(LDADD) main_declare_deps_after_submission_synchronous_SOURCES = \ main/declare_deps_after_submission_synchronous.c main_declare_deps_after_submission_synchronous_OBJECTS = \ main/declare_deps_after_submission_synchronous.$(OBJEXT) main_declare_deps_after_submission_synchronous_LDADD = $(LDADD) main_declare_deps_in_callback_SOURCES = \ main/declare_deps_in_callback.c main_declare_deps_in_callback_OBJECTS = \ main/declare_deps_in_callback.$(OBJEXT) main_declare_deps_in_callback_LDADD = $(LDADD) main_deploop_SOURCES = main/deploop.c main_deploop_OBJECTS = main/deploop.$(OBJEXT) main_deploop_LDADD = $(LDADD) main_deprecated_func_SOURCES = main/deprecated_func.c main_deprecated_func_OBJECTS = \ main/deprecated_func-deprecated_func.$(OBJEXT) main_deprecated_func_LDADD = $(LDADD) main_deprecated_func_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(main_deprecated_func_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ main_display_binding_SOURCES = main/display_binding.c main_display_binding_OBJECTS = main/display_binding.$(OBJEXT) main_display_binding_LDADD = $(LDADD) main_driver_api_init_run_deinit_SOURCES = \ main/driver_api/init_run_deinit.c main_driver_api_init_run_deinit_OBJECTS = \ main/driver_api/init_run_deinit.$(OBJEXT) main_driver_api_init_run_deinit_LDADD = $(LDADD) main_driver_api_run_driver_SOURCES = main/driver_api/run_driver.c main_driver_api_run_driver_OBJECTS = \ main/driver_api/run_driver.$(OBJEXT) main_driver_api_run_driver_LDADD = $(LDADD) main_empty_task_SOURCES = main/empty_task.c main_empty_task_OBJECTS = main/empty_task.$(OBJEXT) main_empty_task_LDADD = $(LDADD) main_empty_task_chain_SOURCES = main/empty_task_chain.c main_empty_task_chain_OBJECTS = main/empty_task_chain.$(OBJEXT) main_empty_task_chain_LDADD = $(LDADD) main_empty_task_sync_point_SOURCES = main/empty_task_sync_point.c main_empty_task_sync_point_OBJECTS = \ main/empty_task_sync_point.$(OBJEXT) main_empty_task_sync_point_LDADD = $(LDADD) main_empty_task_sync_point_tasks_SOURCES = \ main/empty_task_sync_point_tasks.c main_empty_task_sync_point_tasks_OBJECTS = \ main/empty_task_sync_point_tasks.$(OBJEXT) main_empty_task_sync_point_tasks_LDADD = $(LDADD) main_execute_on_a_specific_worker_SOURCES = \ main/execute_on_a_specific_worker.c main_execute_on_a_specific_worker_OBJECTS = \ main/execute_on_a_specific_worker.$(OBJEXT) main_execute_on_a_specific_worker_LDADD = $(LDADD) main_execute_schedule_SOURCES = main/execute_schedule.c main_execute_schedule_OBJECTS = main/execute_schedule.$(OBJEXT) main_execute_schedule_LDADD = $(LDADD) main_get_children_tasks_SOURCES = main/get_children_tasks.c main_get_children_tasks_OBJECTS = main/get_children_tasks.$(OBJEXT) main_get_children_tasks_LDADD = $(LDADD) main_get_current_task_SOURCES = main/get_current_task.c main_get_current_task_OBJECTS = main/get_current_task.$(OBJEXT) main_get_current_task_LDADD = $(LDADD) main_hwloc_cpuset_SOURCES = main/hwloc_cpuset.c main_hwloc_cpuset_OBJECTS = main/hwloc_cpuset.$(OBJEXT) main_hwloc_cpuset_LDADD = $(LDADD) main_insert_task_SOURCES = main/insert_task.c main_insert_task_OBJECTS = main/insert_task.$(OBJEXT) main_insert_task_LDADD = $(LDADD) main_insert_task_array_SOURCES = main/insert_task_array.c main_insert_task_array_OBJECTS = main/insert_task_array.$(OBJEXT) main_insert_task_array_LDADD = $(LDADD) main_insert_task_dyn_handles_SOURCES = main/insert_task_dyn_handles.c main_insert_task_dyn_handles_OBJECTS = \ main/insert_task_dyn_handles.$(OBJEXT) main_insert_task_dyn_handles_LDADD = $(LDADD) main_insert_task_many_SOURCES = main/insert_task_many.c main_insert_task_many_OBJECTS = main/insert_task_many.$(OBJEXT) main_insert_task_many_LDADD = $(LDADD) main_insert_task_nullcodelet_SOURCES = main/insert_task_nullcodelet.c main_insert_task_nullcodelet_OBJECTS = \ main/insert_task_nullcodelet.$(OBJEXT) main_insert_task_nullcodelet_LDADD = $(LDADD) main_insert_task_pack_SOURCES = main/insert_task_pack.c main_insert_task_pack_OBJECTS = main/insert_task_pack.$(OBJEXT) main_insert_task_pack_LDADD = $(LDADD) main_insert_task_value_SOURCES = main/insert_task_value.c main_insert_task_value_OBJECTS = main/insert_task_value.$(OBJEXT) main_insert_task_value_LDADD = $(LDADD) am__main_insert_task_where_SOURCES_DIST = main/insert_task_where.c \ variable/increment.c variable/increment_cuda.cu \ variable/increment_hip.hip variable/increment_opencl.c am_main_insert_task_where_OBJECTS = main/insert_task_where.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) main_insert_task_where_OBJECTS = $(am_main_insert_task_where_OBJECTS) main_insert_task_where_LDADD = $(LDADD) main_job_SOURCES = main/job.c main_job_OBJECTS = main/job.$(OBJEXT) main_job_LDADD = $(LDADD) main_mkdtemp_SOURCES = main/mkdtemp.c main_mkdtemp_OBJECTS = main/mkdtemp.$(OBJEXT) main_mkdtemp_LDADD = $(LDADD) main_multithreaded_SOURCES = main/multithreaded.c main_multithreaded_OBJECTS = main/multithreaded.$(OBJEXT) main_multithreaded_LDADD = $(LDADD) main_multithreaded_init_SOURCES = main/multithreaded_init.c main_multithreaded_init_OBJECTS = main/multithreaded_init.$(OBJEXT) main_multithreaded_init_LDADD = $(LDADD) main_pack_SOURCES = main/pack.c main_pack_OBJECTS = main/pack.$(OBJEXT) main_pack_LDADD = $(LDADD) main_pause_resume_SOURCES = main/pause_resume.c main_pause_resume_OBJECTS = main/pause_resume.$(OBJEXT) main_pause_resume_LDADD = $(LDADD) main_regenerate_SOURCES = main/regenerate.c main_regenerate_OBJECTS = main/regenerate.$(OBJEXT) main_regenerate_LDADD = $(LDADD) main_regenerate_pipeline_SOURCES = main/regenerate_pipeline.c main_regenerate_pipeline_OBJECTS = main/regenerate_pipeline.$(OBJEXT) main_regenerate_pipeline_LDADD = $(LDADD) main_restart_SOURCES = main/restart.c main_restart_OBJECTS = main/restart.$(OBJEXT) main_restart_LDADD = $(LDADD) main_starpu_init_SOURCES = main/starpu_init.c main_starpu_init_OBJECTS = main/starpu_init.$(OBJEXT) main_starpu_init_LDADD = $(LDADD) main_starpu_task_bundle_SOURCES = main/starpu_task_bundle.c main_starpu_task_bundle_OBJECTS = main/starpu_task_bundle.$(OBJEXT) main_starpu_task_bundle_LDADD = $(LDADD) main_starpu_task_wait_SOURCES = main/starpu_task_wait.c main_starpu_task_wait_OBJECTS = main/starpu_task_wait.$(OBJEXT) main_starpu_task_wait_LDADD = $(LDADD) main_starpu_task_wait_for_all_SOURCES = \ main/starpu_task_wait_for_all.c main_starpu_task_wait_for_all_OBJECTS = \ main/starpu_task_wait_for_all.$(OBJEXT) main_starpu_task_wait_for_all_LDADD = $(LDADD) main_starpu_worker_exists_SOURCES = main/starpu_worker_exists.c main_starpu_worker_exists_OBJECTS = \ main/starpu_worker_exists-starpu_worker_exists.$(OBJEXT) main_starpu_worker_exists_LDADD = $(LDADD) main_starpu_worker_exists_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ main_static_restartable_SOURCES = main/static_restartable.c main_static_restartable_OBJECTS = main/static_restartable.$(OBJEXT) main_static_restartable_LDADD = $(LDADD) main_static_restartable_tag_SOURCES = main/static_restartable_tag.c main_static_restartable_tag_OBJECTS = \ main/static_restartable_tag.$(OBJEXT) main_static_restartable_tag_LDADD = $(LDADD) main_static_restartable_using_initializer_SOURCES = \ main/static_restartable_using_initializer.c main_static_restartable_using_initializer_OBJECTS = \ main/static_restartable_using_initializer.$(OBJEXT) main_static_restartable_using_initializer_LDADD = $(LDADD) am__main_subgraph_repeat_SOURCES_DIST = main/subgraph_repeat.c \ variable/increment.c variable/increment_cuda.cu \ variable/increment_hip.hip variable/increment_opencl.c am_main_subgraph_repeat_OBJECTS = main/subgraph_repeat.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) main_subgraph_repeat_OBJECTS = $(am_main_subgraph_repeat_OBJECTS) main_subgraph_repeat_LDADD = $(LDADD) am__main_subgraph_repeat_regenerate_SOURCES_DIST = \ main/subgraph_repeat_regenerate.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_main_subgraph_repeat_regenerate_OBJECTS = \ main/subgraph_repeat_regenerate.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) main_subgraph_repeat_regenerate_OBJECTS = \ $(am_main_subgraph_repeat_regenerate_OBJECTS) main_subgraph_repeat_regenerate_LDADD = $(LDADD) am__main_subgraph_repeat_regenerate_tag_SOURCES_DIST = \ main/subgraph_repeat_regenerate_tag.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_main_subgraph_repeat_regenerate_tag_OBJECTS = \ main/subgraph_repeat_regenerate_tag.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) main_subgraph_repeat_regenerate_tag_OBJECTS = \ $(am_main_subgraph_repeat_regenerate_tag_OBJECTS) main_subgraph_repeat_regenerate_tag_LDADD = $(LDADD) am__main_subgraph_repeat_regenerate_tag_cycle_SOURCES_DIST = \ main/subgraph_repeat_regenerate_tag_cycle.c \ variable/increment.c variable/increment_cuda.cu \ variable/increment_hip.hip variable/increment_opencl.c am_main_subgraph_repeat_regenerate_tag_cycle_OBJECTS = \ main/subgraph_repeat_regenerate_tag_cycle.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) main_subgraph_repeat_regenerate_tag_cycle_OBJECTS = \ $(am_main_subgraph_repeat_regenerate_tag_cycle_OBJECTS) main_subgraph_repeat_regenerate_tag_cycle_LDADD = $(LDADD) am__main_subgraph_repeat_tag_SOURCES_DIST = \ main/subgraph_repeat_tag.c variable/increment.c \ variable/increment_cuda.cu variable/increment_hip.hip \ variable/increment_opencl.c am_main_subgraph_repeat_tag_OBJECTS = \ main/subgraph_repeat_tag.$(OBJEXT) \ variable/increment.$(OBJEXT) $(am__objects_1) $(am__objects_2) \ $(am__objects_3) main_subgraph_repeat_tag_OBJECTS = \ $(am_main_subgraph_repeat_tag_OBJECTS) main_subgraph_repeat_tag_LDADD = $(LDADD) main_submit_SOURCES = main/submit.c main_submit_OBJECTS = main/submit.$(OBJEXT) main_submit_LDADD = $(LDADD) main_tag_get_task_SOURCES = main/tag_get_task.c main_tag_get_task_OBJECTS = main/tag_get_task.$(OBJEXT) main_tag_get_task_LDADD = $(LDADD) main_tag_task_data_deps_SOURCES = main/tag_task_data_deps.c main_tag_task_data_deps_OBJECTS = main/tag_task_data_deps.$(OBJEXT) main_tag_task_data_deps_LDADD = $(LDADD) main_tag_wait_api_SOURCES = main/tag_wait_api.c main_tag_wait_api_OBJECTS = main/tag_wait_api.$(OBJEXT) main_tag_wait_api_LDADD = $(LDADD) main_task_end_dep_SOURCES = main/task_end_dep.c main_task_end_dep_OBJECTS = main/task_end_dep.$(OBJEXT) main_task_end_dep_LDADD = $(LDADD) main_task_wait_api_SOURCES = main/task_wait_api.c main_task_wait_api_OBJECTS = main/task_wait_api.$(OBJEXT) main_task_wait_api_LDADD = $(LDADD) main_wait_all_regenerable_tasks_SOURCES = \ main/wait_all_regenerable_tasks.c main_wait_all_regenerable_tasks_OBJECTS = \ main/wait_all_regenerable_tasks.$(OBJEXT) main_wait_all_regenerable_tasks_LDADD = $(LDADD) maxfpga_max_fpga_advanced_static_SOURCES = \ maxfpga/max_fpga_advanced_static.c maxfpga_max_fpga_advanced_static_OBJECTS = \ maxfpga/max_fpga_advanced_static.$(OBJEXT) maxfpga_max_fpga_advanced_static_DEPENDENCIES = \ maxfpga/slic_MyTasks.o maxfpga_max_fpga_basic_static_SOURCES = \ maxfpga/max_fpga_basic_static.c maxfpga_max_fpga_basic_static_OBJECTS = \ maxfpga/max_fpga_basic_static.$(OBJEXT) maxfpga_max_fpga_basic_static_DEPENDENCIES = maxfpga/slic_MyTasks.o maxfpga_max_fpga_dynamic_SOURCES = maxfpga/max_fpga_dynamic.c maxfpga_max_fpga_dynamic_OBJECTS = maxfpga/max_fpga_dynamic.$(OBJEXT) maxfpga_max_fpga_dynamic_DEPENDENCIES = maxfpga/slic_MyTasks.o maxfpga_max_fpga_mux_SOURCES = maxfpga/max_fpga_mux.c maxfpga_max_fpga_mux_OBJECTS = maxfpga/max_fpga_mux.$(OBJEXT) maxfpga_max_fpga_mux_DEPENDENCIES = maxfpga/slic_MyTasksMux.o microbenchs_async_tasks_overhead_SOURCES = \ microbenchs/async_tasks_overhead.c microbenchs_async_tasks_overhead_OBJECTS = \ microbenchs/async_tasks_overhead.$(OBJEXT) microbenchs_async_tasks_overhead_LDADD = $(LDADD) microbenchs_bandwidth_SOURCES = microbenchs/bandwidth.c microbenchs_bandwidth_OBJECTS = microbenchs/bandwidth.$(OBJEXT) microbenchs_bandwidth_LDADD = $(LDADD) microbenchs_display_structures_size_SOURCES = \ microbenchs/display_structures_size.c microbenchs_display_structures_size_OBJECTS = \ microbenchs/display_structures_size.$(OBJEXT) microbenchs_display_structures_size_LDADD = $(LDADD) microbenchs_local_pingpong_SOURCES = microbenchs/local_pingpong.c microbenchs_local_pingpong_OBJECTS = \ microbenchs/local_pingpong.$(OBJEXT) microbenchs_local_pingpong_LDADD = $(LDADD) microbenchs_matrix_as_vector_SOURCES = microbenchs/matrix_as_vector.c microbenchs_matrix_as_vector_OBJECTS = \ microbenchs/matrix_as_vector.$(OBJEXT) microbenchs_matrix_as_vector_LDADD = $(LDADD) microbenchs_parallel_dependent_homogeneous_tasks_data_SOURCES = \ microbenchs/parallel_dependent_homogeneous_tasks_data.c microbenchs_parallel_dependent_homogeneous_tasks_data_OBJECTS = microbenchs/parallel_dependent_homogeneous_tasks_data.$(OBJEXT) microbenchs_parallel_dependent_homogeneous_tasks_data_LDADD = \ $(LDADD) microbenchs_parallel_independent_heterogeneous_tasks_SOURCES = \ microbenchs/parallel_independent_heterogeneous_tasks.c microbenchs_parallel_independent_heterogeneous_tasks_OBJECTS = microbenchs/parallel_independent_heterogeneous_tasks.$(OBJEXT) microbenchs_parallel_independent_heterogeneous_tasks_LDADD = $(LDADD) microbenchs_parallel_independent_heterogeneous_tasks_data_SOURCES = \ microbenchs/parallel_independent_heterogeneous_tasks_data.c microbenchs_parallel_independent_heterogeneous_tasks_data_OBJECTS = microbenchs/parallel_independent_heterogeneous_tasks_data.$(OBJEXT) microbenchs_parallel_independent_heterogeneous_tasks_data_LDADD = \ $(LDADD) microbenchs_parallel_independent_homogeneous_tasks_SOURCES = \ microbenchs/parallel_independent_homogeneous_tasks.c microbenchs_parallel_independent_homogeneous_tasks_OBJECTS = \ microbenchs/parallel_independent_homogeneous_tasks.$(OBJEXT) microbenchs_parallel_independent_homogeneous_tasks_LDADD = $(LDADD) microbenchs_parallel_independent_homogeneous_tasks_data_SOURCES = \ microbenchs/parallel_independent_homogeneous_tasks_data.c microbenchs_parallel_independent_homogeneous_tasks_data_OBJECTS = microbenchs/parallel_independent_homogeneous_tasks_data.$(OBJEXT) microbenchs_parallel_independent_homogeneous_tasks_data_LDADD = \ $(LDADD) microbenchs_parallel_redux_heterogeneous_tasks_data_SOURCES = \ microbenchs/parallel_redux_heterogeneous_tasks_data.c microbenchs_parallel_redux_heterogeneous_tasks_data_OBJECTS = \ microbenchs/parallel_redux_heterogeneous_tasks_data.$(OBJEXT) microbenchs_parallel_redux_heterogeneous_tasks_data_LDADD = $(LDADD) microbenchs_parallel_redux_homogeneous_tasks_data_SOURCES = \ microbenchs/parallel_redux_homogeneous_tasks_data.c microbenchs_parallel_redux_homogeneous_tasks_data_OBJECTS = \ microbenchs/parallel_redux_homogeneous_tasks_data.$(OBJEXT) microbenchs_parallel_redux_homogeneous_tasks_data_LDADD = $(LDADD) microbenchs_prefetch_data_on_node_SOURCES = \ microbenchs/prefetch_data_on_node.c microbenchs_prefetch_data_on_node_OBJECTS = \ microbenchs/prefetch_data_on_node.$(OBJEXT) microbenchs_prefetch_data_on_node_LDADD = $(LDADD) microbenchs_redundant_buffer_SOURCES = microbenchs/redundant_buffer.c microbenchs_redundant_buffer_OBJECTS = \ microbenchs/redundant_buffer.$(OBJEXT) microbenchs_redundant_buffer_LDADD = $(LDADD) microbenchs_sync_tasks_overhead_SOURCES = \ microbenchs/sync_tasks_overhead.c microbenchs_sync_tasks_overhead_OBJECTS = \ microbenchs/sync_tasks_overhead.$(OBJEXT) microbenchs_sync_tasks_overhead_LDADD = $(LDADD) microbenchs_tasks_overhead_SOURCES = microbenchs/tasks_overhead.c microbenchs_tasks_overhead_OBJECTS = \ microbenchs/tasks_overhead.$(OBJEXT) microbenchs_tasks_overhead_LDADD = $(LDADD) microbenchs_tasks_size_overhead_SOURCES = \ microbenchs/tasks_size_overhead.c microbenchs_tasks_size_overhead_OBJECTS = \ microbenchs/tasks_size_overhead.$(OBJEXT) microbenchs_tasks_size_overhead_LDADD = $(LDADD) openmp_api_01_SOURCES = openmp/api_01.c openmp_api_01_OBJECTS = openmp/api_01.$(OBJEXT) openmp_api_01_LDADD = $(LDADD) openmp_array_slice_01_SOURCES = openmp/array_slice_01.c openmp_array_slice_01_OBJECTS = openmp/array_slice_01.$(OBJEXT) openmp_array_slice_01_LDADD = $(LDADD) openmp_cuda_task_01_SOURCES = openmp/cuda_task_01.c openmp_cuda_task_01_OBJECTS = openmp/cuda_task_01.$(OBJEXT) openmp_cuda_task_01_LDADD = $(LDADD) openmp_environment_SOURCES = openmp/environment.c openmp_environment_OBJECTS = openmp/environment.$(OBJEXT) openmp_environment_LDADD = $(LDADD) openmp_init_exit_01_SOURCES = openmp/init_exit_01.c openmp_init_exit_01_OBJECTS = openmp/init_exit_01.$(OBJEXT) openmp_init_exit_01_LDADD = $(LDADD) openmp_init_exit_02_SOURCES = openmp/init_exit_02.c openmp_init_exit_02_OBJECTS = openmp/init_exit_02.$(OBJEXT) openmp_init_exit_02_LDADD = $(LDADD) openmp_parallel_01_SOURCES = openmp/parallel_01.c openmp_parallel_01_OBJECTS = openmp/parallel_01.$(OBJEXT) openmp_parallel_01_LDADD = $(LDADD) openmp_parallel_02_SOURCES = openmp/parallel_02.c openmp_parallel_02_OBJECTS = openmp/parallel_02.$(OBJEXT) openmp_parallel_02_LDADD = $(LDADD) openmp_parallel_03_SOURCES = openmp/parallel_03.c openmp_parallel_03_OBJECTS = openmp/parallel_03.$(OBJEXT) openmp_parallel_03_LDADD = $(LDADD) openmp_parallel_barrier_01_SOURCES = openmp/parallel_barrier_01.c openmp_parallel_barrier_01_OBJECTS = \ openmp/parallel_barrier_01.$(OBJEXT) openmp_parallel_barrier_01_LDADD = $(LDADD) openmp_parallel_critical_01_SOURCES = openmp/parallel_critical_01.c openmp_parallel_critical_01_OBJECTS = \ openmp/parallel_critical_01.$(OBJEXT) openmp_parallel_critical_01_LDADD = $(LDADD) openmp_parallel_critical_inline_01_SOURCES = \ openmp/parallel_critical_inline_01.c openmp_parallel_critical_inline_01_OBJECTS = \ openmp/parallel_critical_inline_01.$(OBJEXT) openmp_parallel_critical_inline_01_LDADD = $(LDADD) openmp_parallel_critical_named_01_SOURCES = \ openmp/parallel_critical_named_01.c openmp_parallel_critical_named_01_OBJECTS = \ openmp/parallel_critical_named_01.$(OBJEXT) openmp_parallel_critical_named_01_LDADD = $(LDADD) openmp_parallel_critical_named_inline_01_SOURCES = \ openmp/parallel_critical_named_inline_01.c openmp_parallel_critical_named_inline_01_OBJECTS = \ openmp/parallel_critical_named_inline_01.$(OBJEXT) openmp_parallel_critical_named_inline_01_LDADD = $(LDADD) openmp_parallel_for_01_SOURCES = openmp/parallel_for_01.c openmp_parallel_for_01_OBJECTS = openmp/parallel_for_01.$(OBJEXT) openmp_parallel_for_01_LDADD = $(LDADD) openmp_parallel_for_02_SOURCES = openmp/parallel_for_02.c openmp_parallel_for_02_OBJECTS = openmp/parallel_for_02.$(OBJEXT) openmp_parallel_for_02_LDADD = $(LDADD) openmp_parallel_for_ordered_01_SOURCES = \ openmp/parallel_for_ordered_01.c openmp_parallel_for_ordered_01_OBJECTS = \ openmp/parallel_for_ordered_01.$(OBJEXT) openmp_parallel_for_ordered_01_LDADD = $(LDADD) openmp_parallel_master_01_SOURCES = openmp/parallel_master_01.c openmp_parallel_master_01_OBJECTS = \ openmp/parallel_master_01.$(OBJEXT) openmp_parallel_master_01_LDADD = $(LDADD) openmp_parallel_master_inline_01_SOURCES = \ openmp/parallel_master_inline_01.c openmp_parallel_master_inline_01_OBJECTS = \ openmp/parallel_master_inline_01.$(OBJEXT) openmp_parallel_master_inline_01_LDADD = $(LDADD) openmp_parallel_nested_lock_01_SOURCES = \ openmp/parallel_nested_lock_01.c openmp_parallel_nested_lock_01_OBJECTS = \ openmp/parallel_nested_lock_01.$(OBJEXT) openmp_parallel_nested_lock_01_LDADD = $(LDADD) openmp_parallel_sections_01_SOURCES = openmp/parallel_sections_01.c openmp_parallel_sections_01_OBJECTS = \ openmp/parallel_sections_01.$(OBJEXT) openmp_parallel_sections_01_LDADD = $(LDADD) openmp_parallel_sections_combined_01_SOURCES = \ openmp/parallel_sections_combined_01.c openmp_parallel_sections_combined_01_OBJECTS = \ openmp/parallel_sections_combined_01.$(OBJEXT) openmp_parallel_sections_combined_01_LDADD = $(LDADD) openmp_parallel_simple_lock_01_SOURCES = \ openmp/parallel_simple_lock_01.c openmp_parallel_simple_lock_01_OBJECTS = \ openmp/parallel_simple_lock_01.$(OBJEXT) openmp_parallel_simple_lock_01_LDADD = $(LDADD) openmp_parallel_single_copyprivate_01_SOURCES = \ openmp/parallel_single_copyprivate_01.c openmp_parallel_single_copyprivate_01_OBJECTS = \ openmp/parallel_single_copyprivate_01.$(OBJEXT) openmp_parallel_single_copyprivate_01_LDADD = $(LDADD) openmp_parallel_single_copyprivate_inline_01_SOURCES = \ openmp/parallel_single_copyprivate_inline_01.c openmp_parallel_single_copyprivate_inline_01_OBJECTS = \ openmp/parallel_single_copyprivate_inline_01.$(OBJEXT) openmp_parallel_single_copyprivate_inline_01_LDADD = $(LDADD) openmp_parallel_single_inline_01_SOURCES = \ openmp/parallel_single_inline_01.c openmp_parallel_single_inline_01_OBJECTS = \ openmp/parallel_single_inline_01.$(OBJEXT) openmp_parallel_single_inline_01_LDADD = $(LDADD) openmp_parallel_single_nowait_01_SOURCES = \ openmp/parallel_single_nowait_01.c openmp_parallel_single_nowait_01_OBJECTS = \ openmp/parallel_single_nowait_01.$(OBJEXT) openmp_parallel_single_nowait_01_LDADD = $(LDADD) openmp_parallel_single_wait_01_SOURCES = \ openmp/parallel_single_wait_01.c openmp_parallel_single_wait_01_OBJECTS = \ openmp/parallel_single_wait_01.$(OBJEXT) openmp_parallel_single_wait_01_LDADD = $(LDADD) openmp_task_01_SOURCES = openmp/task_01.c openmp_task_01_OBJECTS = openmp/task_01.$(OBJEXT) openmp_task_01_LDADD = $(LDADD) openmp_task_02_SOURCES = openmp/task_02.c openmp_task_02_OBJECTS = openmp/task_02.$(OBJEXT) openmp_task_02_LDADD = $(LDADD) openmp_task_03_SOURCES = openmp/task_03.c openmp_task_03_OBJECTS = openmp/task_03.$(OBJEXT) openmp_task_03_LDADD = $(LDADD) openmp_taskgroup_01_SOURCES = openmp/taskgroup_01.c openmp_taskgroup_01_OBJECTS = openmp/taskgroup_01.$(OBJEXT) openmp_taskgroup_01_LDADD = $(LDADD) openmp_taskgroup_02_SOURCES = openmp/taskgroup_02.c openmp_taskgroup_02_OBJECTS = openmp/taskgroup_02.$(OBJEXT) openmp_taskgroup_02_LDADD = $(LDADD) openmp_taskloop_SOURCES = openmp/taskloop.c openmp_taskloop_OBJECTS = openmp/taskloop.$(OBJEXT) openmp_taskloop_LDADD = $(LDADD) openmp_taskwait_01_SOURCES = openmp/taskwait_01.c openmp_taskwait_01_OBJECTS = openmp/taskwait_01.$(OBJEXT) openmp_taskwait_01_LDADD = $(LDADD) am__overlap_gpu_concurrency_SOURCES_DIST = overlap/gpu_concurrency.c \ overlap/long_kernel.cu @STARPU_USE_CUDA_TRUE@am__objects_30 = overlap/long_kernel.$(OBJEXT) am_overlap_gpu_concurrency_OBJECTS = \ overlap/gpu_concurrency.$(OBJEXT) $(am__objects_30) overlap_gpu_concurrency_OBJECTS = \ $(am_overlap_gpu_concurrency_OBJECTS) overlap_gpu_concurrency_LDADD = $(LDADD) overlap_overlap_SOURCES = overlap/overlap.c overlap_overlap_OBJECTS = overlap/overlap.$(OBJEXT) overlap_overlap_LDADD = $(LDADD) parallel_tasks_combined_worker_assign_workerid_SOURCES = \ parallel_tasks/combined_worker_assign_workerid.c parallel_tasks_combined_worker_assign_workerid_OBJECTS = \ parallel_tasks/combined_worker_assign_workerid.$(OBJEXT) parallel_tasks_combined_worker_assign_workerid_LDADD = $(LDADD) parallel_tasks_cuda_only_SOURCES = parallel_tasks/cuda_only.c parallel_tasks_cuda_only_OBJECTS = parallel_tasks/cuda_only.$(OBJEXT) parallel_tasks_cuda_only_LDADD = $(LDADD) parallel_tasks_explicit_combined_worker_SOURCES = \ parallel_tasks/explicit_combined_worker.c parallel_tasks_explicit_combined_worker_OBJECTS = \ parallel_tasks/explicit_combined_worker.$(OBJEXT) parallel_tasks_explicit_combined_worker_LDADD = $(LDADD) parallel_tasks_parallel_kernels_SOURCES = \ parallel_tasks/parallel_kernels.c parallel_tasks_parallel_kernels_OBJECTS = \ parallel_tasks/parallel_kernels.$(OBJEXT) parallel_tasks_parallel_kernels_LDADD = $(LDADD) parallel_tasks_parallel_kernels_spmd_SOURCES = \ parallel_tasks/parallel_kernels_spmd.c parallel_tasks_parallel_kernels_spmd_OBJECTS = \ parallel_tasks/parallel_kernels_spmd.$(OBJEXT) parallel_tasks_parallel_kernels_spmd_LDADD = $(LDADD) parallel_tasks_parallel_kernels_trivial_SOURCES = \ parallel_tasks/parallel_kernels_trivial.c parallel_tasks_parallel_kernels_trivial_OBJECTS = \ parallel_tasks/parallel_kernels_trivial.$(OBJEXT) parallel_tasks_parallel_kernels_trivial_LDADD = $(LDADD) parallel_tasks_spmd_peager_SOURCES = parallel_tasks/spmd_peager.c parallel_tasks_spmd_peager_OBJECTS = \ parallel_tasks/spmd_peager.$(OBJEXT) parallel_tasks_spmd_peager_LDADD = $(LDADD) perfmodels_feed_SOURCES = perfmodels/feed.c perfmodels_feed_OBJECTS = perfmodels/feed.$(OBJEXT) perfmodels_feed_LDADD = $(LDADD) perfmodels_memory_SOURCES = perfmodels/memory.c perfmodels_memory_OBJECTS = perfmodels/memory.$(OBJEXT) perfmodels_memory_LDADD = $(LDADD) am__perfmodels_non_linear_regression_based_SOURCES_DIST = \ perfmodels/non_linear_regression_based.c \ perfmodels/opencl_memset.c @STARPU_USE_OPENCL_TRUE@am__objects_31 = \ @STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset.$(OBJEXT) am_perfmodels_non_linear_regression_based_OBJECTS = \ perfmodels/non_linear_regression_based.$(OBJEXT) \ $(am__objects_31) perfmodels_non_linear_regression_based_OBJECTS = \ $(am_perfmodels_non_linear_regression_based_OBJECTS) perfmodels_non_linear_regression_based_LDADD = $(LDADD) perfmodels_path_SOURCES = perfmodels/path.c perfmodels_path_OBJECTS = perfmodels/path.$(OBJEXT) perfmodels_path_LDADD = $(LDADD) perfmodels_regression_based_check_SOURCES = \ perfmodels/regression_based_check.c perfmodels_regression_based_check_OBJECTS = \ perfmodels/regression_based_check.$(OBJEXT) perfmodels_regression_based_check_LDADD = $(LDADD) perfmodels_regression_based_energy_SOURCES = \ perfmodels/regression_based_energy.c perfmodels_regression_based_energy_OBJECTS = \ perfmodels/regression_based_energy.$(OBJEXT) perfmodels_regression_based_energy_LDADD = $(LDADD) am__perfmodels_regression_based_gpu_SOURCES_DIST = \ perfmodels/regression_based_gpu.c perfmodels/opencl_memset.c am_perfmodels_regression_based_gpu_OBJECTS = \ perfmodels/regression_based_gpu.$(OBJEXT) $(am__objects_31) perfmodels_regression_based_gpu_OBJECTS = \ $(am_perfmodels_regression_based_gpu_OBJECTS) perfmodels_regression_based_gpu_LDADD = $(LDADD) am__perfmodels_regression_based_memset_SOURCES_DIST = \ perfmodels/regression_based_memset.c \ perfmodels/opencl_memset.c am_perfmodels_regression_based_memset_OBJECTS = \ perfmodels/regression_based_memset.$(OBJEXT) $(am__objects_31) perfmodels_regression_based_memset_OBJECTS = \ $(am_perfmodels_regression_based_memset_OBJECTS) perfmodels_regression_based_memset_LDADD = $(LDADD) perfmodels_regression_based_multiimpl_SOURCES = \ perfmodels/regression_based_multiimpl.c perfmodels_regression_based_multiimpl_OBJECTS = \ perfmodels/regression_based_multiimpl.$(OBJEXT) perfmodels_regression_based_multiimpl_LDADD = $(LDADD) perfmodels_user_base_SOURCES = perfmodels/user_base.c perfmodels_user_base_OBJECTS = perfmodels/user_base.$(OBJEXT) perfmodels_user_base_LDADD = $(LDADD) perfmodels_valid_model_SOURCES = perfmodels/valid_model.c perfmodels_valid_model_OBJECTS = perfmodels/valid_model.$(OBJEXT) perfmodels_valid_model_LDADD = $(LDADD) perfmodels_value_nan_SOURCES = perfmodels/value_nan.c perfmodels_value_nan_OBJECTS = perfmodels/value_nan.$(OBJEXT) perfmodels_value_nan_LDADD = $(LDADD) sched_ctx_sched_ctx_hierarchy_SOURCES = \ sched_ctx/sched_ctx_hierarchy.c sched_ctx_sched_ctx_hierarchy_OBJECTS = \ sched_ctx/sched_ctx_hierarchy.$(OBJEXT) sched_ctx_sched_ctx_hierarchy_LDADD = $(LDADD) sched_ctx_sched_ctx_list_SOURCES = sched_ctx/sched_ctx_list.c sched_ctx_sched_ctx_list_OBJECTS = sched_ctx/sched_ctx_list.$(OBJEXT) sched_ctx_sched_ctx_list_LDADD = $(LDADD) sched_ctx_sched_ctx_policy_data_SOURCES = \ sched_ctx/sched_ctx_policy_data.c sched_ctx_sched_ctx_policy_data_OBJECTS = \ sched_ctx/sched_ctx_policy_data.$(OBJEXT) sched_ctx_sched_ctx_policy_data_LDADD = $(LDADD) sched_policies_data_locality_SOURCES = sched_policies/data_locality.c sched_policies_data_locality_OBJECTS = \ sched_policies/data_locality.$(OBJEXT) sched_policies_data_locality_LDADD = $(LDADD) sched_policies_execute_all_tasks_SOURCES = \ sched_policies/execute_all_tasks.c sched_policies_execute_all_tasks_OBJECTS = \ sched_policies/execute_all_tasks-execute_all_tasks.$(OBJEXT) sched_policies_execute_all_tasks_LDADD = $(LDADD) sched_policies_execute_all_tasks_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) \ $(sched_policies_execute_all_tasks_LDFLAGS) $(LDFLAGS) -o $@ sched_policies_prio_SOURCES = sched_policies/prio.c sched_policies_prio_OBJECTS = sched_policies/prio.$(OBJEXT) sched_policies_prio_LDADD = $(LDADD) sched_policies_simple_cpu_gpu_sched_SOURCES = \ sched_policies/simple_cpu_gpu_sched.c sched_policies_simple_cpu_gpu_sched_OBJECTS = \ sched_policies/simple_cpu_gpu_sched.$(OBJEXT) sched_policies_simple_cpu_gpu_sched_LDADD = $(LDADD) sched_policies_simple_deps_SOURCES = sched_policies/simple_deps.c sched_policies_simple_deps_OBJECTS = \ sched_policies/simple_deps.$(OBJEXT) sched_policies_simple_deps_LDADD = $(LDADD) sched_policies_workerids_SOURCES = sched_policies/workerids.c sched_policies_workerids_OBJECTS = sched_policies/workerids.$(OBJEXT) sched_policies_workerids_LDADD = $(LDADD) am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } SCRIPTS = $(examplebin_SCRIPTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ datawizard/$(DEPDIR)/acquire_cb.Po \ datawizard/$(DEPDIR)/acquire_cb_insert.Po \ datawizard/$(DEPDIR)/acquire_release.Po \ datawizard/$(DEPDIR)/acquire_release2.Po \ datawizard/$(DEPDIR)/acquire_release_to.Po \ datawizard/$(DEPDIR)/acquire_try.Po \ datawizard/$(DEPDIR)/allocate.Po \ datawizard/$(DEPDIR)/allocate_many_numa_nodes.Po \ datawizard/$(DEPDIR)/bcsr.Po datawizard/$(DEPDIR)/cache.Po \ datawizard/$(DEPDIR)/commute.Po \ datawizard/$(DEPDIR)/commute2.Po datawizard/$(DEPDIR)/copy.Po \ datawizard/$(DEPDIR)/critical_section_with_void_interface.Po \ datawizard/$(DEPDIR)/data_deinitialize.Po \ datawizard/$(DEPDIR)/data_implicit_deps.Po \ datawizard/$(DEPDIR)/data_invalidation.Po \ datawizard/$(DEPDIR)/data_register-data_register.Po \ datawizard/$(DEPDIR)/deinitialize_pending_requests.Po \ datawizard/$(DEPDIR)/deps.Po \ datawizard/$(DEPDIR)/dining_philosophers.Po \ datawizard/$(DEPDIR)/double_parameter.Po \ datawizard/$(DEPDIR)/dsm_stress.Po \ datawizard/$(DEPDIR)/gpu_ptr_register.Po \ datawizard/$(DEPDIR)/gpu_register.Po \ datawizard/$(DEPDIR)/handle_to_pointer.Po \ datawizard/$(DEPDIR)/in_place_partition.Po \ datawizard/$(DEPDIR)/increment_init.Po \ datawizard/$(DEPDIR)/increment_redux.Po \ datawizard/$(DEPDIR)/increment_redux_lazy.Po \ datawizard/$(DEPDIR)/increment_redux_partition.Po \ datawizard/$(DEPDIR)/increment_redux_v2.Po \ datawizard/$(DEPDIR)/increment_redux_with_args.Po \ datawizard/$(DEPDIR)/invalidate_pending_requests.Po \ datawizard/$(DEPDIR)/lazy_allocation.Po \ datawizard/$(DEPDIR)/locality.Po \ datawizard/$(DEPDIR)/manual_reduction.Po \ datawizard/$(DEPDIR)/mpi_like.Po \ datawizard/$(DEPDIR)/mpi_like_async.Po \ datawizard/$(DEPDIR)/no_unregister.Po \ datawizard/$(DEPDIR)/noreclaim.Po \ datawizard/$(DEPDIR)/nowhere.Po \ datawizard/$(DEPDIR)/numa_overflow.Po \ datawizard/$(DEPDIR)/partition_dep.Po \ datawizard/$(DEPDIR)/partition_init.Po \ datawizard/$(DEPDIR)/partition_lazy.Po \ datawizard/$(DEPDIR)/partition_wontuse.Po \ datawizard/$(DEPDIR)/partitioned_acquire.Po \ datawizard/$(DEPDIR)/partitioned_initialization.Po \ datawizard/$(DEPDIR)/readers_and_writers.Po \ datawizard/$(DEPDIR)/readonly.Po \ datawizard/$(DEPDIR)/reclaim.Po \ datawizard/$(DEPDIR)/redux_acquire.Po \ datawizard/$(DEPDIR)/scal.Po datawizard/$(DEPDIR)/scratch.Po \ datawizard/$(DEPDIR)/scratch_opencl.Po \ datawizard/$(DEPDIR)/scratch_reuse.Po \ datawizard/$(DEPDIR)/simgrid-locality.Po \ datawizard/$(DEPDIR)/specific_node.Po \ datawizard/$(DEPDIR)/specific_node_same.Po \ datawizard/$(DEPDIR)/sync_and_notify_data.Po \ datawizard/$(DEPDIR)/sync_and_notify_data_implicit.Po \ datawizard/$(DEPDIR)/sync_and_notify_data_opencl.Po \ datawizard/$(DEPDIR)/sync_with_data_with_mem.Po \ datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking.Po \ datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking_implicit.Po \ datawizard/$(DEPDIR)/task_with_multiple_time_the_same_handle.Po \ datawizard/$(DEPDIR)/temporary_partition.Po \ datawizard/$(DEPDIR)/temporary_partition_implicit.Po \ datawizard/$(DEPDIR)/temporary_partition_read.Po \ datawizard/$(DEPDIR)/test_arbiter.Po \ datawizard/$(DEPDIR)/unpartition.Po \ datawizard/$(DEPDIR)/user_interaction_implicit.Po \ datawizard/$(DEPDIR)/variable_parameters.Po \ datawizard/$(DEPDIR)/variable_size.Po \ datawizard/$(DEPDIR)/write_only_tmp_buffer.Po \ datawizard/$(DEPDIR)/wt_broadcast.Po \ datawizard/$(DEPDIR)/wt_host.Po \ datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po \ datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po \ datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po \ datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po \ datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po \ datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po \ datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po \ datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po \ datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po \ datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po \ datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po \ datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po \ datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po \ datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po \ datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po \ datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/generic.Po \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_cuda_opencl.Po \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_data_release.Po \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_handle_conversion.Po \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_worker.Po \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/same_handle.Po \ datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po \ datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po \ datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po \ datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po \ datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po \ datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po \ datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po \ datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po \ datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po \ disk/$(DEPDIR)/disk_compute.Po disk/$(DEPDIR)/disk_copy.Po \ disk/$(DEPDIR)/disk_copy_to_disk.Po \ disk/$(DEPDIR)/disk_copy_unpack.Po disk/$(DEPDIR)/disk_pack.Po \ disk/$(DEPDIR)/mem_reclaim.Po \ energy/$(DEPDIR)/energy_efficiency.Po \ errorcheck/$(DEPDIR)/invalid_blocking_calls.Po \ errorcheck/$(DEPDIR)/invalid_tasks.Po \ errorcheck/$(DEPDIR)/starpu_init_noworker.Po \ errorcheck/$(DEPDIR)/workers_cpuid.Po \ fault-tolerance/$(DEPDIR)/retry.Po \ helper/$(DEPDIR)/cublasLt_init.Po \ helper/$(DEPDIR)/cublas_init.Po \ helper/$(DEPDIR)/cusparse_init.Po \ helper/$(DEPDIR)/execute_on_all.Po \ helper/$(DEPDIR)/hipblas_init.Po \ helper/$(DEPDIR)/pinned_memory.Po \ helper/$(DEPDIR)/starpu_create_sync_task.Po \ helper/$(DEPDIR)/starpu_data_cpy.Po \ helper/$(DEPDIR)/starpu_data_dup_ro.Po main/$(DEPDIR)/bind.Po \ main/$(DEPDIR)/callback.Po \ main/$(DEPDIR)/codelet_null_callback.Po \ main/$(DEPDIR)/const_codelet.Po main/$(DEPDIR)/deadlock.Po \ main/$(DEPDIR)/declare_deps_after_submission.Po \ main/$(DEPDIR)/declare_deps_after_submission_synchronous.Po \ main/$(DEPDIR)/declare_deps_in_callback.Po \ main/$(DEPDIR)/deploop.Po \ main/$(DEPDIR)/deprecated_func-deprecated_func.Po \ main/$(DEPDIR)/display_binding.Po main/$(DEPDIR)/empty_task.Po \ main/$(DEPDIR)/empty_task_chain.Po \ main/$(DEPDIR)/empty_task_sync_point.Po \ main/$(DEPDIR)/empty_task_sync_point_tasks.Po \ main/$(DEPDIR)/execute_on_a_specific_worker.Po \ main/$(DEPDIR)/execute_schedule.Po \ main/$(DEPDIR)/get_children_tasks.Po \ main/$(DEPDIR)/get_current_task.Po \ main/$(DEPDIR)/hwloc_cpuset.Po main/$(DEPDIR)/insert_task.Po \ main/$(DEPDIR)/insert_task_array.Po \ main/$(DEPDIR)/insert_task_dyn_handles.Po \ main/$(DEPDIR)/insert_task_many.Po \ main/$(DEPDIR)/insert_task_nullcodelet.Po \ main/$(DEPDIR)/insert_task_pack.Po \ main/$(DEPDIR)/insert_task_value.Po \ main/$(DEPDIR)/insert_task_where.Po main/$(DEPDIR)/job.Po \ main/$(DEPDIR)/mkdtemp.Po main/$(DEPDIR)/multithreaded.Po \ main/$(DEPDIR)/multithreaded_init.Po main/$(DEPDIR)/pack.Po \ main/$(DEPDIR)/pause_resume.Po main/$(DEPDIR)/regenerate.Po \ main/$(DEPDIR)/regenerate_pipeline.Po \ main/$(DEPDIR)/restart.Po main/$(DEPDIR)/starpu_init.Po \ main/$(DEPDIR)/starpu_task_bundle.Po \ main/$(DEPDIR)/starpu_task_wait.Po \ main/$(DEPDIR)/starpu_task_wait_for_all.Po \ main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po \ main/$(DEPDIR)/static_restartable.Po \ main/$(DEPDIR)/static_restartable_tag.Po \ main/$(DEPDIR)/static_restartable_using_initializer.Po \ main/$(DEPDIR)/subgraph_repeat.Po \ main/$(DEPDIR)/subgraph_repeat_regenerate.Po \ main/$(DEPDIR)/subgraph_repeat_regenerate_tag.Po \ main/$(DEPDIR)/subgraph_repeat_regenerate_tag_cycle.Po \ main/$(DEPDIR)/subgraph_repeat_tag.Po main/$(DEPDIR)/submit.Po \ main/$(DEPDIR)/tag_get_task.Po \ main/$(DEPDIR)/tag_task_data_deps.Po \ main/$(DEPDIR)/tag_wait_api.Po main/$(DEPDIR)/task_end_dep.Po \ main/$(DEPDIR)/task_wait_api.Po \ main/$(DEPDIR)/wait_all_regenerable_tasks.Po \ main/driver_api/$(DEPDIR)/init_run_deinit.Po \ main/driver_api/$(DEPDIR)/run_driver.Po \ maxfpga/$(DEPDIR)/max_fpga_advanced_static.Po \ maxfpga/$(DEPDIR)/max_fpga_basic_static.Po \ maxfpga/$(DEPDIR)/max_fpga_dynamic.Po \ maxfpga/$(DEPDIR)/max_fpga_mux.Po \ microbenchs/$(DEPDIR)/async_tasks_overhead.Po \ microbenchs/$(DEPDIR)/bandwidth.Po \ microbenchs/$(DEPDIR)/display_structures_size.Po \ microbenchs/$(DEPDIR)/local_pingpong.Po \ microbenchs/$(DEPDIR)/matrix_as_vector.Po \ microbenchs/$(DEPDIR)/parallel_dependent_homogeneous_tasks_data.Po \ microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks.Po \ microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks_data.Po \ microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks.Po \ microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks_data.Po \ microbenchs/$(DEPDIR)/parallel_redux_heterogeneous_tasks_data.Po \ microbenchs/$(DEPDIR)/parallel_redux_homogeneous_tasks_data.Po \ microbenchs/$(DEPDIR)/prefetch_data_on_node.Po \ microbenchs/$(DEPDIR)/redundant_buffer.Po \ microbenchs/$(DEPDIR)/sync_tasks_overhead.Po \ microbenchs/$(DEPDIR)/tasks_overhead.Po \ microbenchs/$(DEPDIR)/tasks_size_overhead.Po \ openmp/$(DEPDIR)/api_01.Po openmp/$(DEPDIR)/array_slice_01.Po \ openmp/$(DEPDIR)/cuda_task_01.Po \ openmp/$(DEPDIR)/environment.Po \ openmp/$(DEPDIR)/init_exit_01.Po \ openmp/$(DEPDIR)/init_exit_02.Po \ openmp/$(DEPDIR)/parallel_01.Po \ openmp/$(DEPDIR)/parallel_02.Po \ openmp/$(DEPDIR)/parallel_03.Po \ openmp/$(DEPDIR)/parallel_barrier_01.Po \ openmp/$(DEPDIR)/parallel_critical_01.Po \ openmp/$(DEPDIR)/parallel_critical_inline_01.Po \ openmp/$(DEPDIR)/parallel_critical_named_01.Po \ openmp/$(DEPDIR)/parallel_critical_named_inline_01.Po \ openmp/$(DEPDIR)/parallel_for_01.Po \ openmp/$(DEPDIR)/parallel_for_02.Po \ openmp/$(DEPDIR)/parallel_for_ordered_01.Po \ openmp/$(DEPDIR)/parallel_master_01.Po \ openmp/$(DEPDIR)/parallel_master_inline_01.Po \ openmp/$(DEPDIR)/parallel_nested_lock_01.Po \ openmp/$(DEPDIR)/parallel_sections_01.Po \ openmp/$(DEPDIR)/parallel_sections_combined_01.Po \ openmp/$(DEPDIR)/parallel_simple_lock_01.Po \ openmp/$(DEPDIR)/parallel_single_copyprivate_01.Po \ openmp/$(DEPDIR)/parallel_single_copyprivate_inline_01.Po \ openmp/$(DEPDIR)/parallel_single_inline_01.Po \ openmp/$(DEPDIR)/parallel_single_nowait_01.Po \ openmp/$(DEPDIR)/parallel_single_wait_01.Po \ openmp/$(DEPDIR)/task_01.Po openmp/$(DEPDIR)/task_02.Po \ openmp/$(DEPDIR)/task_03.Po openmp/$(DEPDIR)/taskgroup_01.Po \ openmp/$(DEPDIR)/taskgroup_02.Po openmp/$(DEPDIR)/taskloop.Po \ openmp/$(DEPDIR)/taskwait_01.Po \ overlap/$(DEPDIR)/gpu_concurrency.Po \ overlap/$(DEPDIR)/overlap.Po \ parallel_tasks/$(DEPDIR)/combined_worker_assign_workerid.Po \ parallel_tasks/$(DEPDIR)/cuda_only.Po \ parallel_tasks/$(DEPDIR)/explicit_combined_worker.Po \ parallel_tasks/$(DEPDIR)/parallel_kernels.Po \ parallel_tasks/$(DEPDIR)/parallel_kernels_spmd.Po \ parallel_tasks/$(DEPDIR)/parallel_kernels_trivial.Po \ parallel_tasks/$(DEPDIR)/spmd_peager.Po \ perfmodels/$(DEPDIR)/feed.Po perfmodels/$(DEPDIR)/memory.Po \ perfmodels/$(DEPDIR)/non_linear_regression_based.Po \ perfmodels/$(DEPDIR)/opencl_memset.Po \ perfmodels/$(DEPDIR)/path.Po \ perfmodels/$(DEPDIR)/regression_based_check.Po \ perfmodels/$(DEPDIR)/regression_based_energy.Po \ perfmodels/$(DEPDIR)/regression_based_gpu.Po \ perfmodels/$(DEPDIR)/regression_based_memset.Po \ perfmodels/$(DEPDIR)/regression_based_multiimpl.Po \ perfmodels/$(DEPDIR)/user_base.Po \ perfmodels/$(DEPDIR)/valid_model.Po \ perfmodels/$(DEPDIR)/value_nan.Po \ sched_ctx/$(DEPDIR)/sched_ctx_hierarchy.Po \ sched_ctx/$(DEPDIR)/sched_ctx_list.Po \ sched_ctx/$(DEPDIR)/sched_ctx_policy_data.Po \ sched_policies/$(DEPDIR)/data_locality.Po \ sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po \ sched_policies/$(DEPDIR)/prio.Po \ sched_policies/$(DEPDIR)/simple_cpu_gpu_sched.Po \ sched_policies/$(DEPDIR)/simple_deps.Po \ sched_policies/$(DEPDIR)/workerids.Po \ variable/$(DEPDIR)/increment.Po \ variable/$(DEPDIR)/increment_opencl.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CXXFLAGS) $(CXXFLAGS) AM_V_CXX = $(am__v_CXX_@AM_V@) am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) am__v_CXX_0 = @echo " CXX " $@; am__v_CXX_1 = CXXLD = $(CXX) CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) am__v_CXXLD_0 = @echo " CXXLD " $@; am__v_CXXLD_1 = FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) LTFCCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) AM_V_FC = $(am__v_FC_@AM_V@) am__v_FC_ = $(am__v_FC_@AM_DEFAULT_V@) am__v_FC_0 = @echo " FC " $@; am__v_FC_1 = FCLD = $(FC) FCLINK = $(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_FCLD = $(am__v_FCLD_@AM_V@) am__v_FCLD_ = $(am__v_FCLD_@AM_DEFAULT_V@) am__v_FCLD_0 = @echo " FCLD " $@; am__v_FCLD_1 = SOURCES = datawizard/acquire_cb.c datawizard/acquire_cb_insert.c \ $(datawizard_acquire_release_SOURCES) \ $(datawizard_acquire_release2_SOURCES) \ $(datawizard_acquire_release_to_SOURCES) \ datawizard/acquire_try.c datawizard/allocate.c \ datawizard/allocate_many_numa_nodes.c datawizard/bcsr.c \ datawizard/cache.c datawizard/commute.c datawizard/commute2.c \ datawizard/copy.c \ datawizard/critical_section_with_void_interface.c \ datawizard/data_deinitialize.c datawizard/data_implicit_deps.c \ datawizard/data_invalidation.c datawizard/data_register.c \ datawizard/deinitialize_pending_requests.c datawizard/deps.c \ datawizard/dining_philosophers.c datawizard/double_parameter.c \ datawizard/dsm_stress.c $(datawizard_gpu_ptr_register_SOURCES) \ $(datawizard_gpu_register_SOURCES) \ datawizard/handle_to_pointer.c \ $(datawizard_in_place_partition_SOURCES) \ $(datawizard_increment_init_SOURCES) \ $(datawizard_increment_redux_SOURCES) \ $(datawizard_increment_redux_lazy_SOURCES) \ $(datawizard_increment_redux_partition_SOURCES) \ $(datawizard_increment_redux_v2_SOURCES) \ $(datawizard_increment_redux_with_args_SOURCES) \ $(datawizard_interfaces_bcsr_bcsr_interface_SOURCES) \ $(datawizard_interfaces_block_block_interface_SOURCES) \ $(datawizard_interfaces_coo_coo_interface_SOURCES) \ datawizard/interfaces/copy_interfaces.c \ $(datawizard_interfaces_csr_csr_interface_SOURCES) \ $(datawizard_interfaces_matrix_matrix_interface_SOURCES) \ $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_SOURCES) \ $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_SOURCES) \ $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_SOURCES) \ $(datawizard_interfaces_multiformat_advanced_multiformat_worker_SOURCES) \ $(datawizard_interfaces_multiformat_advanced_same_handle_SOURCES) \ $(datawizard_interfaces_multiformat_multiformat_interface_SOURCES) \ $(datawizard_interfaces_ndim_ndim_interface_SOURCES) \ $(datawizard_interfaces_tensor_tensor_interface_SOURCES) \ $(datawizard_interfaces_variable_variable_interface_SOURCES) \ $(datawizard_interfaces_vector_vector_interface_SOURCES) \ $(datawizard_interfaces_void_void_interface_SOURCES) \ datawizard/invalidate_pending_requests.c \ datawizard/lazy_allocation.c datawizard/locality.c \ datawizard/manual_reduction.c $(datawizard_mpi_like_SOURCES) \ $(datawizard_mpi_like_async_SOURCES) \ datawizard/no_unregister.c datawizard/noreclaim.c \ datawizard/nowhere.c datawizard/numa_overflow.c \ $(datawizard_partition_dep_SOURCES) \ datawizard/partition_init.c \ $(datawizard_partition_lazy_SOURCES) \ datawizard/partition_wontuse.c \ datawizard/partitioned_acquire.c \ datawizard/partitioned_initialization.c \ datawizard/readers_and_writers.c datawizard/readonly.c \ datawizard/reclaim.c datawizard/redux_acquire.c \ $(datawizard_scratch_SOURCES) datawizard/scratch_reuse.c \ datawizard/simgrid-locality.c \ $(datawizard_specific_node_SOURCES) \ datawizard/specific_node_same.c \ $(datawizard_sync_and_notify_data_SOURCES) \ $(datawizard_sync_and_notify_data_implicit_SOURCES) \ datawizard/sync_with_data_with_mem.c \ datawizard/sync_with_data_with_mem_non_blocking.c \ datawizard/sync_with_data_with_mem_non_blocking_implicit.c \ datawizard/task_with_multiple_time_the_same_handle.c \ datawizard/temporary_partition.c \ datawizard/temporary_partition_implicit.c \ datawizard/temporary_partition_read.c \ $(datawizard_test_arbiter_SOURCES) datawizard/unpartition.c \ datawizard/user_interaction_implicit.c \ datawizard/variable_parameters.c datawizard/variable_size.c \ datawizard/write_only_tmp_buffer.c \ $(datawizard_wt_broadcast_SOURCES) \ $(datawizard_wt_host_SOURCES) disk/disk_compute.c \ disk/disk_copy.c disk/disk_copy_to_disk.c \ disk/disk_copy_unpack.c disk/disk_pack.c disk/mem_reclaim.c \ energy/energy_efficiency.c errorcheck/invalid_blocking_calls.c \ errorcheck/invalid_tasks.c errorcheck/starpu_init_noworker.c \ errorcheck/workers_cpuid.c fault-tolerance/retry.c \ $(fortran90_init_01_SOURCES) helper/cublasLt_init.c \ helper/cublas_init.c helper/cusparse_init.c \ helper/execute_on_all.c helper/hipblas_init.c \ helper/pinned_memory.c helper/starpu_create_sync_task.c \ helper/starpu_data_cpy.c $(helper_starpu_data_dup_ro_SOURCES) \ loader.c main/bind.c main/callback.c \ main/codelet_null_callback.c main/const_codelet.c \ main/deadlock.c main/declare_deps_after_submission.c \ main/declare_deps_after_submission_synchronous.c \ main/declare_deps_in_callback.c main/deploop.c \ main/deprecated_func.c main/display_binding.c \ main/driver_api/init_run_deinit.c main/driver_api/run_driver.c \ main/empty_task.c main/empty_task_chain.c \ main/empty_task_sync_point.c \ main/empty_task_sync_point_tasks.c \ main/execute_on_a_specific_worker.c main/execute_schedule.c \ main/get_children_tasks.c main/get_current_task.c \ main/hwloc_cpuset.c main/insert_task.c \ main/insert_task_array.c main/insert_task_dyn_handles.c \ main/insert_task_many.c main/insert_task_nullcodelet.c \ main/insert_task_pack.c main/insert_task_value.c \ $(main_insert_task_where_SOURCES) main/job.c main/mkdtemp.c \ main/multithreaded.c main/multithreaded_init.c main/pack.c \ main/pause_resume.c main/regenerate.c \ main/regenerate_pipeline.c main/restart.c main/starpu_init.c \ main/starpu_task_bundle.c main/starpu_task_wait.c \ main/starpu_task_wait_for_all.c main/starpu_worker_exists.c \ main/static_restartable.c main/static_restartable_tag.c \ main/static_restartable_using_initializer.c \ $(main_subgraph_repeat_SOURCES) \ $(main_subgraph_repeat_regenerate_SOURCES) \ $(main_subgraph_repeat_regenerate_tag_SOURCES) \ $(main_subgraph_repeat_regenerate_tag_cycle_SOURCES) \ $(main_subgraph_repeat_tag_SOURCES) main/submit.c \ main/tag_get_task.c main/tag_task_data_deps.c \ main/tag_wait_api.c main/task_end_dep.c main/task_wait_api.c \ main/wait_all_regenerable_tasks.c \ maxfpga/max_fpga_advanced_static.c \ maxfpga/max_fpga_basic_static.c maxfpga/max_fpga_dynamic.c \ maxfpga/max_fpga_mux.c microbenchs/async_tasks_overhead.c \ microbenchs/bandwidth.c microbenchs/display_structures_size.c \ microbenchs/local_pingpong.c microbenchs/matrix_as_vector.c \ microbenchs/parallel_dependent_homogeneous_tasks_data.c \ microbenchs/parallel_independent_heterogeneous_tasks.c \ microbenchs/parallel_independent_heterogeneous_tasks_data.c \ microbenchs/parallel_independent_homogeneous_tasks.c \ microbenchs/parallel_independent_homogeneous_tasks_data.c \ microbenchs/parallel_redux_heterogeneous_tasks_data.c \ microbenchs/parallel_redux_homogeneous_tasks_data.c \ microbenchs/prefetch_data_on_node.c \ microbenchs/redundant_buffer.c \ microbenchs/sync_tasks_overhead.c microbenchs/tasks_overhead.c \ microbenchs/tasks_size_overhead.c openmp/api_01.c \ openmp/array_slice_01.c openmp/cuda_task_01.c \ openmp/environment.c openmp/init_exit_01.c \ openmp/init_exit_02.c openmp/parallel_01.c \ openmp/parallel_02.c openmp/parallel_03.c \ openmp/parallel_barrier_01.c openmp/parallel_critical_01.c \ openmp/parallel_critical_inline_01.c \ openmp/parallel_critical_named_01.c \ openmp/parallel_critical_named_inline_01.c \ openmp/parallel_for_01.c openmp/parallel_for_02.c \ openmp/parallel_for_ordered_01.c openmp/parallel_master_01.c \ openmp/parallel_master_inline_01.c \ openmp/parallel_nested_lock_01.c openmp/parallel_sections_01.c \ openmp/parallel_sections_combined_01.c \ openmp/parallel_simple_lock_01.c \ openmp/parallel_single_copyprivate_01.c \ openmp/parallel_single_copyprivate_inline_01.c \ openmp/parallel_single_inline_01.c \ openmp/parallel_single_nowait_01.c \ openmp/parallel_single_wait_01.c openmp/task_01.c \ openmp/task_02.c openmp/task_03.c openmp/taskgroup_01.c \ openmp/taskgroup_02.c openmp/taskloop.c openmp/taskwait_01.c \ $(overlap_gpu_concurrency_SOURCES) overlap/overlap.c \ parallel_tasks/combined_worker_assign_workerid.c \ parallel_tasks/cuda_only.c \ parallel_tasks/explicit_combined_worker.c \ parallel_tasks/parallel_kernels.c \ parallel_tasks/parallel_kernels_spmd.c \ parallel_tasks/parallel_kernels_trivial.c \ parallel_tasks/spmd_peager.c perfmodels/feed.c \ perfmodels/memory.c \ $(perfmodels_non_linear_regression_based_SOURCES) \ perfmodels/path.c perfmodels/regression_based_check.c \ perfmodels/regression_based_energy.c \ $(perfmodels_regression_based_gpu_SOURCES) \ $(perfmodels_regression_based_memset_SOURCES) \ perfmodels/regression_based_multiimpl.c perfmodels/user_base.c \ perfmodels/valid_model.c perfmodels/value_nan.c \ sched_ctx/sched_ctx_hierarchy.c sched_ctx/sched_ctx_list.c \ sched_ctx/sched_ctx_policy_data.c \ sched_policies/data_locality.c \ sched_policies/execute_all_tasks.c sched_policies/prio.c \ sched_policies/simple_cpu_gpu_sched.c \ sched_policies/simple_deps.c sched_policies/workerids.c DIST_SOURCES = datawizard/acquire_cb.c datawizard/acquire_cb_insert.c \ $(am__datawizard_acquire_release_SOURCES_DIST) \ $(am__datawizard_acquire_release2_SOURCES_DIST) \ $(am__datawizard_acquire_release_to_SOURCES_DIST) \ datawizard/acquire_try.c datawizard/allocate.c \ datawizard/allocate_many_numa_nodes.c datawizard/bcsr.c \ datawizard/cache.c datawizard/commute.c datawizard/commute2.c \ datawizard/copy.c \ datawizard/critical_section_with_void_interface.c \ datawizard/data_deinitialize.c datawizard/data_implicit_deps.c \ datawizard/data_invalidation.c datawizard/data_register.c \ datawizard/deinitialize_pending_requests.c datawizard/deps.c \ datawizard/dining_philosophers.c datawizard/double_parameter.c \ datawizard/dsm_stress.c \ $(am__datawizard_gpu_ptr_register_SOURCES_DIST) \ $(am__datawizard_gpu_register_SOURCES_DIST) \ datawizard/handle_to_pointer.c \ $(am__datawizard_in_place_partition_SOURCES_DIST) \ $(am__datawizard_increment_init_SOURCES_DIST) \ $(am__datawizard_increment_redux_SOURCES_DIST) \ $(am__datawizard_increment_redux_lazy_SOURCES_DIST) \ $(am__datawizard_increment_redux_partition_SOURCES_DIST) \ $(am__datawizard_increment_redux_v2_SOURCES_DIST) \ $(am__datawizard_increment_redux_with_args_SOURCES_DIST) \ $(am__datawizard_interfaces_bcsr_bcsr_interface_SOURCES_DIST) \ $(am__datawizard_interfaces_block_block_interface_SOURCES_DIST) \ $(am__datawizard_interfaces_coo_coo_interface_SOURCES_DIST) \ datawizard/interfaces/copy_interfaces.c \ $(am__datawizard_interfaces_csr_csr_interface_SOURCES_DIST) \ $(am__datawizard_interfaces_matrix_matrix_interface_SOURCES_DIST) \ $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_SOURCES) \ $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_SOURCES) \ $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_SOURCES) \ $(datawizard_interfaces_multiformat_advanced_multiformat_worker_SOURCES) \ $(datawizard_interfaces_multiformat_advanced_same_handle_SOURCES) \ $(am__datawizard_interfaces_multiformat_multiformat_interface_SOURCES_DIST) \ $(am__datawizard_interfaces_ndim_ndim_interface_SOURCES_DIST) \ $(am__datawizard_interfaces_tensor_tensor_interface_SOURCES_DIST) \ $(am__datawizard_interfaces_variable_variable_interface_SOURCES_DIST) \ $(am__datawizard_interfaces_vector_vector_interface_SOURCES_DIST) \ $(datawizard_interfaces_void_void_interface_SOURCES) \ datawizard/invalidate_pending_requests.c \ datawizard/lazy_allocation.c datawizard/locality.c \ datawizard/manual_reduction.c \ $(am__datawizard_mpi_like_SOURCES_DIST) \ $(am__datawizard_mpi_like_async_SOURCES_DIST) \ datawizard/no_unregister.c datawizard/noreclaim.c \ datawizard/nowhere.c datawizard/numa_overflow.c \ $(am__datawizard_partition_dep_SOURCES_DIST) \ datawizard/partition_init.c \ $(am__datawizard_partition_lazy_SOURCES_DIST) \ datawizard/partition_wontuse.c \ datawizard/partitioned_acquire.c \ datawizard/partitioned_initialization.c \ datawizard/readers_and_writers.c datawizard/readonly.c \ datawizard/reclaim.c datawizard/redux_acquire.c \ $(am__datawizard_scratch_SOURCES_DIST) \ datawizard/scratch_reuse.c datawizard/simgrid-locality.c \ $(am__datawizard_specific_node_SOURCES_DIST) \ datawizard/specific_node_same.c \ $(am__datawizard_sync_and_notify_data_SOURCES_DIST) \ $(am__datawizard_sync_and_notify_data_implicit_SOURCES_DIST) \ datawizard/sync_with_data_with_mem.c \ datawizard/sync_with_data_with_mem_non_blocking.c \ datawizard/sync_with_data_with_mem_non_blocking_implicit.c \ datawizard/task_with_multiple_time_the_same_handle.c \ datawizard/temporary_partition.c \ datawizard/temporary_partition_implicit.c \ datawizard/temporary_partition_read.c \ $(datawizard_test_arbiter_SOURCES) datawizard/unpartition.c \ datawizard/user_interaction_implicit.c \ datawizard/variable_parameters.c datawizard/variable_size.c \ datawizard/write_only_tmp_buffer.c \ $(am__datawizard_wt_broadcast_SOURCES_DIST) \ $(am__datawizard_wt_host_SOURCES_DIST) disk/disk_compute.c \ disk/disk_copy.c disk/disk_copy_to_disk.c \ disk/disk_copy_unpack.c disk/disk_pack.c disk/mem_reclaim.c \ energy/energy_efficiency.c errorcheck/invalid_blocking_calls.c \ errorcheck/invalid_tasks.c errorcheck/starpu_init_noworker.c \ errorcheck/workers_cpuid.c fault-tolerance/retry.c \ $(am__fortran90_init_01_SOURCES_DIST) helper/cublasLt_init.c \ helper/cublas_init.c helper/cusparse_init.c \ helper/execute_on_all.c helper/hipblas_init.c \ helper/pinned_memory.c helper/starpu_create_sync_task.c \ helper/starpu_data_cpy.c \ $(am__helper_starpu_data_dup_ro_SOURCES_DIST) loader.c \ main/bind.c main/callback.c main/codelet_null_callback.c \ main/const_codelet.c main/deadlock.c \ main/declare_deps_after_submission.c \ main/declare_deps_after_submission_synchronous.c \ main/declare_deps_in_callback.c main/deploop.c \ main/deprecated_func.c main/display_binding.c \ main/driver_api/init_run_deinit.c main/driver_api/run_driver.c \ main/empty_task.c main/empty_task_chain.c \ main/empty_task_sync_point.c \ main/empty_task_sync_point_tasks.c \ main/execute_on_a_specific_worker.c main/execute_schedule.c \ main/get_children_tasks.c main/get_current_task.c \ main/hwloc_cpuset.c main/insert_task.c \ main/insert_task_array.c main/insert_task_dyn_handles.c \ main/insert_task_many.c main/insert_task_nullcodelet.c \ main/insert_task_pack.c main/insert_task_value.c \ $(am__main_insert_task_where_SOURCES_DIST) main/job.c \ main/mkdtemp.c main/multithreaded.c main/multithreaded_init.c \ main/pack.c main/pause_resume.c main/regenerate.c \ main/regenerate_pipeline.c main/restart.c main/starpu_init.c \ main/starpu_task_bundle.c main/starpu_task_wait.c \ main/starpu_task_wait_for_all.c main/starpu_worker_exists.c \ main/static_restartable.c main/static_restartable_tag.c \ main/static_restartable_using_initializer.c \ $(am__main_subgraph_repeat_SOURCES_DIST) \ $(am__main_subgraph_repeat_regenerate_SOURCES_DIST) \ $(am__main_subgraph_repeat_regenerate_tag_SOURCES_DIST) \ $(am__main_subgraph_repeat_regenerate_tag_cycle_SOURCES_DIST) \ $(am__main_subgraph_repeat_tag_SOURCES_DIST) main/submit.c \ main/tag_get_task.c main/tag_task_data_deps.c \ main/tag_wait_api.c main/task_end_dep.c main/task_wait_api.c \ main/wait_all_regenerable_tasks.c \ maxfpga/max_fpga_advanced_static.c \ maxfpga/max_fpga_basic_static.c maxfpga/max_fpga_dynamic.c \ maxfpga/max_fpga_mux.c microbenchs/async_tasks_overhead.c \ microbenchs/bandwidth.c microbenchs/display_structures_size.c \ microbenchs/local_pingpong.c microbenchs/matrix_as_vector.c \ microbenchs/parallel_dependent_homogeneous_tasks_data.c \ microbenchs/parallel_independent_heterogeneous_tasks.c \ microbenchs/parallel_independent_heterogeneous_tasks_data.c \ microbenchs/parallel_independent_homogeneous_tasks.c \ microbenchs/parallel_independent_homogeneous_tasks_data.c \ microbenchs/parallel_redux_heterogeneous_tasks_data.c \ microbenchs/parallel_redux_homogeneous_tasks_data.c \ microbenchs/prefetch_data_on_node.c \ microbenchs/redundant_buffer.c \ microbenchs/sync_tasks_overhead.c microbenchs/tasks_overhead.c \ microbenchs/tasks_size_overhead.c openmp/api_01.c \ openmp/array_slice_01.c openmp/cuda_task_01.c \ openmp/environment.c openmp/init_exit_01.c \ openmp/init_exit_02.c openmp/parallel_01.c \ openmp/parallel_02.c openmp/parallel_03.c \ openmp/parallel_barrier_01.c openmp/parallel_critical_01.c \ openmp/parallel_critical_inline_01.c \ openmp/parallel_critical_named_01.c \ openmp/parallel_critical_named_inline_01.c \ openmp/parallel_for_01.c openmp/parallel_for_02.c \ openmp/parallel_for_ordered_01.c openmp/parallel_master_01.c \ openmp/parallel_master_inline_01.c \ openmp/parallel_nested_lock_01.c openmp/parallel_sections_01.c \ openmp/parallel_sections_combined_01.c \ openmp/parallel_simple_lock_01.c \ openmp/parallel_single_copyprivate_01.c \ openmp/parallel_single_copyprivate_inline_01.c \ openmp/parallel_single_inline_01.c \ openmp/parallel_single_nowait_01.c \ openmp/parallel_single_wait_01.c openmp/task_01.c \ openmp/task_02.c openmp/task_03.c openmp/taskgroup_01.c \ openmp/taskgroup_02.c openmp/taskloop.c openmp/taskwait_01.c \ $(am__overlap_gpu_concurrency_SOURCES_DIST) overlap/overlap.c \ parallel_tasks/combined_worker_assign_workerid.c \ parallel_tasks/cuda_only.c \ parallel_tasks/explicit_combined_worker.c \ parallel_tasks/parallel_kernels.c \ parallel_tasks/parallel_kernels_spmd.c \ parallel_tasks/parallel_kernels_trivial.c \ parallel_tasks/spmd_peager.c perfmodels/feed.c \ perfmodels/memory.c \ $(am__perfmodels_non_linear_regression_based_SOURCES_DIST) \ perfmodels/path.c perfmodels/regression_based_check.c \ perfmodels/regression_based_energy.c \ $(am__perfmodels_regression_based_gpu_SOURCES_DIST) \ $(am__perfmodels_regression_based_memset_SOURCES_DIST) \ perfmodels/regression_based_multiimpl.c perfmodels/user_base.c \ perfmodels/valid_model.c perfmodels/value_nan.c \ sched_ctx/sched_ctx_hierarchy.c sched_ctx/sched_ctx_list.c \ sched_ctx/sched_ctx_policy_data.c \ sched_policies/data_locality.c \ sched_policies/execute_all_tasks.c sched_policies/prio.c \ sched_policies/simple_cpu_gpu_sched.c \ sched_policies/simple_deps.c sched_policies/workerids.c RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac DATA = $(nobase_STARPU_OPENCL_DATA_DATA) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ check recheck distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) am__EXEEXT_11 = microbenchs/parallel_independent_homogeneous_tasks.sh \ microbenchs/parallel_independent_heterogeneous_tasks.sh \ microbenchs/parallel_independent_homogeneous_tasks_data.sh \ microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ microbenchs/parallel_redux_homogeneous_tasks_data.sh \ microbenchs/parallel_redux_heterogeneous_tasks_data.sh \ microbenchs/parallel_dependent_homogeneous_tasks_data.sh @STARPU_SIMGRID_TRUE@am__EXEEXT_12 = $(am__EXEEXT_11) am__EXEEXT_13 = $(am__append_8) $(am__append_16) $(am__EXEEXT_12) \ datawizard/locality.sh microbenchs/bandwidth_scheds.sh \ $(am__append_18) TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) DIST_SUBDIRS = model-checking am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) $(HWLOC_LIBS) $(STARPU_OPENCL_LDFLAGS) \ $(STARPU_CUDA_LDFLAGS) $(STARPU_HIP_LDFLAGS) \ $(STARPU_MAX_FPGA_LDFLAGS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(am__append_4) $(am__append_6) LAUNCHER = $(am__append_3) $(am__append_5) # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # AM_CFLAGS = $(GLOBAL_AM_CFLAGS) $(APP_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) $(APP_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) $(APP_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) $(APP_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ EXTRA_DIST = \ helper.h \ datawizard/locality.sh \ overlap/overlap.sh \ datawizard/scal.h \ regression/profiles.in \ regression/regression.sh.in \ regression/profiles.build.only.in \ microbenchs/tasks_data_overhead.sh \ microbenchs/sync_tasks_data_overhead.sh \ microbenchs/async_tasks_data_overhead.sh \ microbenchs/tasks_size_overhead.sh \ microbenchs/tasks_size_overhead_sched.sh \ microbenchs/tasks_size_overhead_scheds.sh \ microbenchs/tasks_size_overhead.gp \ microbenchs/parallel_dependent_homogeneous_tasks_data.sh \ microbenchs/parallel_independent_heterogeneous_tasks_data.sh \ microbenchs/parallel_independent_heterogeneous_tasks.sh \ microbenchs/parallel_independent_homogeneous_tasks_data.sh \ microbenchs/parallel_independent_homogeneous_tasks.sh \ microbenchs/bandwidth_scheds.sh \ microbenchs/starpu_check.sh \ energy/static.sh \ energy/dynamic.sh \ energy/perfs.gp \ datawizard/scratch_opencl_kernel.cl \ datawizard/sync_and_notify_data_opencl_codelet.cl\ coverage/coverage.sh \ variable/increment.h \ variable/increment_opencl_kernel.cl \ variable/redux_opencl_kernel.cl \ variable/neutral_opencl_kernel.cl \ datawizard/interfaces/test_interfaces.h \ datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl \ datawizard/interfaces/coo/coo_opencl_kernel.cl \ datawizard/interfaces/matrix/matrix_opencl_kernel.cl \ datawizard/interfaces/variable/variable_opencl_kernel.cl \ datawizard/interfaces/vector/vector_opencl_kernel.cl \ datawizard/interfaces/multiformat/multiformat_types.h \ datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl \ datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl \ datawizard/interfaces/multiformat/advanced/generic.h \ datawizard/interfaces/csr/csr_opencl_kernel.cl \ datawizard/interfaces/block/block_opencl_kernel.cl \ datawizard/interfaces/tensor/tensor_opencl_kernel.cl \ datawizard/interfaces/ndim/ndim_opencl_kernel.cl \ perfmodels/opencl_memset_kernel.cl \ $(MICROBENCHS:=.sh) \ microbenchs/microbench.sh \ memory/memstress2.gp \ memory/memstress2.sh \ memory/memstress.gp \ memory/memstress.sh \ maxfpga/LMemLoopbackCpuCode.c \ maxfpga/MyTasksManager.maxj \ maxfpga/MyTasksMuxManager.maxj \ maxfpga/README.txt \ maxfpga/StreamFMACpuCode.cpp \ maxfpga/Task1.maxj \ maxfpga/Task2.maxj \ maxfpga/Task3.maxj \ datawizard/interfaces/test_interfaces.sh CLEANFILES = *.gcno *.gcda *.linkinfo core starpu_idle_microsec.log \ *.mod *.png *.output tasks.rec perfs.rec */perfs.rec \ */*/perfs.rec perfs2.rec fortran90/starpu_mod.f90 \ bandwidth-*.dat bandwidth.gp bandwidth.eps bandwidth.svg *.csv \ *.md *.Rmd *.pdf *.html $(am__append_123) BUILT_SOURCES = SUBDIRS = $(am__append_19) examplebindir = $(libdir)/starpu/examples @STARPU_USE_OPENCL_TRUE@nobase_STARPU_OPENCL_DATA_DATA = \ @STARPU_USE_OPENCL_TRUE@ variable/increment_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/scratch_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/sync_and_notify_data_opencl_codelet.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/block/block_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/tensor/tensor_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/ndim/ndim_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/coo/coo_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/csr/csr_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/vector/vector_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/matrix/matrix_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ datawizard/interfaces/variable/variable_opencl_kernel.cl \ @STARPU_USE_OPENCL_TRUE@ perfmodels/opencl_memset_kernel.cl testbindir = $(libdir)/starpu/tests SHELL_TESTS = $(am__append_8) $(am__append_16) $(am__append_17) \ datawizard/locality.sh microbenchs/bandwidth_scheds.sh \ $(am__append_18) myPROGRAMS = main/callback main/bind main/mkdtemp \ main/execute_schedule main/insert_task_pack \ main/insert_task_nullcodelet main/insert_task_where \ main/multithreaded_init main/empty_task main/empty_task_chain \ main/starpu_worker_exists main/codelet_null_callback \ datawizard/allocate datawizard/acquire_cb datawizard/deps \ datawizard/user_interaction_implicit \ datawizard/interfaces/copy_interfaces datawizard/numa_overflow \ datawizard/locality datawizard/variable_size \ errorcheck/starpu_init_noworker errorcheck/invalid_tasks \ helper/cublas_init helper/cublasLt_init helper/cusparse_init \ helper/hipblas_init helper/pinned_memory helper/execute_on_all \ microbenchs/display_structures_size microbenchs/local_pingpong \ overlap/overlap sched_ctx/sched_ctx_list \ sched_ctx/sched_ctx_policy_data openmp/init_exit_01 \ openmp/init_exit_02 openmp/environment openmp/api_01 \ openmp/parallel_01 openmp/parallel_02 openmp/parallel_03 \ openmp/parallel_barrier_01 openmp/parallel_master_01 \ openmp/parallel_master_inline_01 \ openmp/parallel_single_wait_01 \ openmp/parallel_single_nowait_01 \ openmp/parallel_single_inline_01 \ openmp/parallel_single_copyprivate_01 \ openmp/parallel_single_copyprivate_inline_01 \ openmp/parallel_critical_01 openmp/parallel_critical_inline_01 \ openmp/parallel_critical_named_01 \ openmp/parallel_critical_named_inline_01 \ openmp/parallel_simple_lock_01 openmp/parallel_nested_lock_01 \ openmp/parallel_for_01 openmp/parallel_for_02 \ openmp/parallel_for_ordered_01 openmp/parallel_sections_01 \ openmp/parallel_sections_combined_01 openmp/task_01 \ openmp/task_02 openmp/task_03 openmp/taskloop \ openmp/taskwait_01 openmp/taskgroup_01 openmp/taskgroup_02 \ openmp/array_slice_01 openmp/cuda_task_01 perfmodels/value_nan \ sched_policies/workerids $(am__append_9) $(am__append_10) \ $(am__append_12) $(am__append_13) $(am__append_14) MICROBENCHS = \ microbenchs/parallel_independent_homogeneous_tasks \ microbenchs/parallel_independent_heterogeneous_tasks \ microbenchs/parallel_independent_homogeneous_tasks_data \ microbenchs/parallel_independent_heterogeneous_tasks_data \ microbenchs/parallel_redux_homogeneous_tasks_data \ microbenchs/parallel_redux_heterogeneous_tasks_data \ microbenchs/parallel_dependent_homogeneous_tasks_data examplebin_SCRIPTS = \ microbenchs/tasks_data_overhead.sh \ microbenchs/sync_tasks_data_overhead.sh \ microbenchs/async_tasks_data_overhead.sh \ microbenchs/tasks_size_overhead.gp \ microbenchs/tasks_size_overhead.sh ####################### # Source files # ####################### datawizard_acquire_release_SOURCES = datawizard/acquire_release.c \ variable/increment.c $(am__append_20) $(am__append_21) \ $(am__append_22) datawizard_acquire_release2_SOURCES = datawizard/acquire_release2.c \ variable/increment.c $(am__append_23) $(am__append_24) \ $(am__append_25) datawizard_acquire_release_to_SOURCES = \ datawizard/acquire_release_to.c variable/increment.c \ $(am__append_26) $(am__append_27) $(am__append_28) datawizard_wt_host_SOURCES = datawizard/wt_host.c variable/increment.c \ $(am__append_29) $(am__append_30) $(am__append_31) datawizard_wt_broadcast_SOURCES = datawizard/wt_broadcast.c \ variable/increment.c $(am__append_32) $(am__append_33) \ $(am__append_34) datawizard_increment_redux_lazy_SOURCES = \ datawizard/increment_redux_lazy.c variable/increment.c \ $(am__append_35) $(am__append_36) $(am__append_37) datawizard_increment_redux_SOURCES = datawizard/increment_redux.c \ variable/increment.c $(am__append_38) $(am__append_39) \ $(am__append_40) datawizard_increment_redux_partition_SOURCES = \ datawizard/increment_redux_partition.c variable/increment.c \ $(am__append_41) $(am__append_42) $(am__append_43) datawizard_increment_redux_v2_SOURCES = \ datawizard/increment_redux_v2.c variable/increment.c \ $(am__append_44) $(am__append_45) $(am__append_46) datawizard_increment_redux_with_args_SOURCES = \ datawizard/increment_redux_with_args.c variable/increment.c \ $(am__append_47) $(am__append_48) $(am__append_49) datawizard_increment_init_SOURCES = datawizard/increment_init.c \ variable/increment.c $(am__append_50) $(am__append_51) \ $(am__append_52) datawizard_scratch_SOURCES = datawizard/scratch.c $(am__append_53) \ $(am__append_54) datawizard_mpi_like_SOURCES = datawizard/mpi_like.c \ variable/increment.c $(am__append_55) $(am__append_56) \ $(am__append_57) datawizard_mpi_like_async_SOURCES = datawizard/mpi_like_async.c \ variable/increment.c $(am__append_58) $(am__append_59) \ $(am__append_60) datawizard_sync_and_notify_data_SOURCES = \ datawizard/sync_and_notify_data.c $(am__append_61) \ $(am__append_62) datawizard_sync_and_notify_data_implicit_SOURCES = \ datawizard/sync_and_notify_data_implicit.c $(am__append_63) \ $(am__append_64) datawizard_in_place_partition_SOURCES = \ datawizard/in_place_partition.c datawizard/scal.c \ $(am__append_65) $(am__append_66) datawizard_partition_dep_SOURCES = datawizard/partition_dep.c \ datawizard/scal.c $(am__append_67) $(am__append_68) datawizard_partition_lazy_SOURCES = datawizard/partition_lazy.c \ datawizard/scal.c $(am__append_69) $(am__append_70) datawizard_gpu_register_SOURCES = datawizard/gpu_register.c \ datawizard/scal.c $(am__append_71) $(am__append_72) datawizard_gpu_ptr_register_SOURCES = datawizard/gpu_ptr_register.c \ datawizard/scal.c $(am__append_73) $(am__append_74) datawizard_specific_node_SOURCES = datawizard/specific_node.c \ variable/increment.c $(am__append_75) $(am__append_76) \ $(am__append_77) datawizard_test_arbiter_SOURCES = \ datawizard/test_arbiter.cpp main_starpu_worker_exists_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) main_deprecated_func_CFLAGS = $(AM_CFLAGS) -Wno-deprecated-declarations main_insert_task_where_SOURCES = main/insert_task_where.c \ variable/increment.c $(am__append_78) $(am__append_79) \ $(am__append_80) main_subgraph_repeat_SOURCES = main/subgraph_repeat.c \ variable/increment.c $(am__append_81) $(am__append_82) \ $(am__append_83) main_subgraph_repeat_tag_SOURCES = main/subgraph_repeat_tag.c \ variable/increment.c $(am__append_84) $(am__append_85) \ $(am__append_86) main_subgraph_repeat_regenerate_SOURCES = \ main/subgraph_repeat_regenerate.c variable/increment.c \ $(am__append_87) $(am__append_88) $(am__append_89) main_subgraph_repeat_regenerate_tag_SOURCES = \ main/subgraph_repeat_regenerate_tag.c variable/increment.c \ $(am__append_90) $(am__append_91) $(am__append_92) main_subgraph_repeat_regenerate_tag_cycle_SOURCES = \ main/subgraph_repeat_regenerate_tag_cycle.c \ variable/increment.c $(am__append_93) $(am__append_94) \ $(am__append_95) @STARPU_HAVE_FC_TRUE@fortran90_init_01_SOURCES = \ @STARPU_HAVE_FC_TRUE@ fortran90/starpu_mod.f90 \ @STARPU_HAVE_FC_TRUE@ fortran90/init_01.f90 helper_starpu_data_dup_ro_SOURCES = helper/starpu_data_dup_ro.c \ variable/increment.c $(am__append_96) $(am__append_97) \ $(am__append_98) datawizard_interfaces_copy_interfaces_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) datawizard_data_register_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) ################### # Block interface # ################### datawizard_interfaces_block_block_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/block/block_interface.c $(am__append_99) \ $(am__append_100) datawizard_interfaces_block_block_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) ################### # Tensor interface # ################### datawizard_interfaces_tensor_tensor_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/tensor/tensor_interface.c \ $(am__append_101) $(am__append_102) datawizard_interfaces_tensor_tensor_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) ################### # Ndim interface # ################### datawizard_interfaces_ndim_ndim_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/ndim/ndim_interface.c $(am__append_103) \ $(am__append_104) datawizard_interfaces_ndim_ndim_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) ################## # BSCR interface # ################## datawizard_interfaces_bcsr_bcsr_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/bcsr/bcsr_interface.c $(am__append_105) \ $(am__append_106) datawizard_interfaces_bcsr_bcsr_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) ################# # COO interface # ################# datawizard_interfaces_coo_coo_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/coo/coo_interface.c $(am__append_107) \ $(am__append_108) datawizard_interfaces_coo_coo_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) ################# # CSR interface # ################# datawizard_interfaces_csr_csr_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/csr/csr_interface.c $(am__append_109) \ $(am__append_110) datawizard_interfaces_csr_csr_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) #################### # Vector interface # #################### datawizard_interfaces_vector_vector_interface_SOURCES = \ datawizard/interfaces/vector/vector_interface.c \ datawizard/interfaces/test_interfaces.c $(am__append_111) \ $(am__append_112) datawizard_interfaces_vector_vector_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) #################### # Matrix interface # #################### datawizard_interfaces_matrix_matrix_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/matrix/matrix_interface.c \ $(am__append_113) $(am__append_114) datawizard_interfaces_matrix_matrix_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) ######################### # Multiformat interface # ######################### datawizard_interfaces_multiformat_multiformat_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/multiformat/multiformat_interface.c \ datawizard/interfaces/multiformat/multiformat_conversion_codelets.c \ $(am__append_115) $(am__append_116) datawizard_interfaces_multiformat_multiformat_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_SOURCES = \ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c datawizard_interfaces_multiformat_advanced_multiformat_data_release_SOURCES = \ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/multiformat_data_release.c datawizard_interfaces_multiformat_advanced_multiformat_worker_SOURCES = \ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/multiformat_worker.c datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_SOURCES = \ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c datawizard_interfaces_multiformat_advanced_same_handle_SOURCES = \ datawizard/interfaces/multiformat/advanced/generic.c \ datawizard/interfaces/multiformat/advanced/same_handle.c ###################### # Variable interface # ###################### datawizard_interfaces_variable_variable_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/variable/variable_interface.c \ $(am__append_117) $(am__append_118) datawizard_interfaces_variable_variable_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) ################## # Void interface # ################## datawizard_interfaces_void_void_interface_SOURCES = \ datawizard/interfaces/test_interfaces.c \ datawizard/interfaces/void/void_interface.c datawizard_interfaces_void_void_interface_CFLAGS = $(AM_CFLAGS) $(FXT_CFLAGS) overlap_gpu_concurrency_SOURCES = overlap/gpu_concurrency.c \ $(am__append_119) perfmodels_regression_based_memset_SOURCES = \ perfmodels/regression_based_memset.c $(am__append_120) perfmodels_regression_based_gpu_SOURCES = \ perfmodels/regression_based_gpu.c $(am__append_121) maxfpga_max_fpga_basic_static_LDADD = $(LDADD) \ maxfpga/slic_MyTasks.o maxfpga_max_fpga_advanced_static_LDADD = $(LDADD) \ maxfpga/slic_MyTasks.o maxfpga_max_fpga_dynamic_LDADD = $(LDADD) \ maxfpga/slic_MyTasks.o maxfpga_max_fpga_mux_LDADD = $(LDADD) \ maxfpga/slic_MyTasksMux.o perfmodels_non_linear_regression_based_SOURCES = \ perfmodels/non_linear_regression_based.c $(am__append_122) sched_policies_execute_all_tasks_LDFLAGS = $(AM_LDFLAGS) -lm sched_policies_execute_all_tasks_CFLAGS = $(AM_LDFLAGS) $(FXT_CFLAGS) @STARPU_USE_MAX_FPGA_TRUE@MAX_DFE = MAX5C_$(MAX_TARGET) @STARPU_USE_MAX_FPGA_TRUE@MAXJ = $(wildcard maxfpga/*.maxj) @STARPU_USE_MAX_FPGA_TRUE@CLASS = $(MAXJ:.maxj=.class) all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-recursive .SUFFIXES: .SUFFIXES: .c .cpp .cu .cubin .f90 .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign tests/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign tests/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list install-examplebinPROGRAMS: $(examplebin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(examplebin_PROGRAMS)'; test -n "$(examplebindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(examplebindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(examplebindir)" && rm -f $$files clean-examplebinPROGRAMS: @list='$(examplebin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list datawizard/$(am__dirstamp): @$(MKDIR_P) datawizard @: > datawizard/$(am__dirstamp) datawizard/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/$(DEPDIR) @: > datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/acquire_cb.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/acquire_cb$(EXEEXT): $(datawizard_acquire_cb_OBJECTS) $(datawizard_acquire_cb_DEPENDENCIES) $(EXTRA_datawizard_acquire_cb_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/acquire_cb$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_acquire_cb_OBJECTS) $(datawizard_acquire_cb_LDADD) $(LIBS) datawizard/acquire_cb_insert.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/acquire_cb_insert$(EXEEXT): $(datawizard_acquire_cb_insert_OBJECTS) $(datawizard_acquire_cb_insert_DEPENDENCIES) $(EXTRA_datawizard_acquire_cb_insert_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/acquire_cb_insert$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_acquire_cb_insert_OBJECTS) $(datawizard_acquire_cb_insert_LDADD) $(LIBS) datawizard/acquire_release.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) variable/$(am__dirstamp): @$(MKDIR_P) variable @: > variable/$(am__dirstamp) variable/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) variable/$(DEPDIR) @: > variable/$(DEPDIR)/$(am__dirstamp) variable/increment.$(OBJEXT): variable/$(am__dirstamp) \ variable/$(DEPDIR)/$(am__dirstamp) variable/increment_cuda.$(OBJEXT): variable/$(am__dirstamp) \ variable/$(DEPDIR)/$(am__dirstamp) variable/increment_hip.$(OBJEXT): variable/$(am__dirstamp) \ variable/$(DEPDIR)/$(am__dirstamp) variable/increment_opencl.$(OBJEXT): variable/$(am__dirstamp) \ variable/$(DEPDIR)/$(am__dirstamp) datawizard/acquire_release$(EXEEXT): $(datawizard_acquire_release_OBJECTS) $(datawizard_acquire_release_DEPENDENCIES) $(EXTRA_datawizard_acquire_release_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/acquire_release$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_acquire_release_OBJECTS) $(datawizard_acquire_release_LDADD) $(LIBS) datawizard/acquire_release2.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/acquire_release2$(EXEEXT): $(datawizard_acquire_release2_OBJECTS) $(datawizard_acquire_release2_DEPENDENCIES) $(EXTRA_datawizard_acquire_release2_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/acquire_release2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_acquire_release2_OBJECTS) $(datawizard_acquire_release2_LDADD) $(LIBS) datawizard/acquire_release_to.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/acquire_release_to$(EXEEXT): $(datawizard_acquire_release_to_OBJECTS) $(datawizard_acquire_release_to_DEPENDENCIES) $(EXTRA_datawizard_acquire_release_to_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/acquire_release_to$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_acquire_release_to_OBJECTS) $(datawizard_acquire_release_to_LDADD) $(LIBS) datawizard/acquire_try.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/acquire_try$(EXEEXT): $(datawizard_acquire_try_OBJECTS) $(datawizard_acquire_try_DEPENDENCIES) $(EXTRA_datawizard_acquire_try_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/acquire_try$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_acquire_try_OBJECTS) $(datawizard_acquire_try_LDADD) $(LIBS) datawizard/allocate.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/allocate$(EXEEXT): $(datawizard_allocate_OBJECTS) $(datawizard_allocate_DEPENDENCIES) $(EXTRA_datawizard_allocate_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/allocate$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_allocate_OBJECTS) $(datawizard_allocate_LDADD) $(LIBS) datawizard/allocate_many_numa_nodes.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/allocate_many_numa_nodes$(EXEEXT): $(datawizard_allocate_many_numa_nodes_OBJECTS) $(datawizard_allocate_many_numa_nodes_DEPENDENCIES) $(EXTRA_datawizard_allocate_many_numa_nodes_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/allocate_many_numa_nodes$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_allocate_many_numa_nodes_OBJECTS) $(datawizard_allocate_many_numa_nodes_LDADD) $(LIBS) datawizard/bcsr.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/bcsr$(EXEEXT): $(datawizard_bcsr_OBJECTS) $(datawizard_bcsr_DEPENDENCIES) $(EXTRA_datawizard_bcsr_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/bcsr$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_bcsr_OBJECTS) $(datawizard_bcsr_LDADD) $(LIBS) datawizard/cache.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/cache$(EXEEXT): $(datawizard_cache_OBJECTS) $(datawizard_cache_DEPENDENCIES) $(EXTRA_datawizard_cache_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/cache$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_cache_OBJECTS) $(datawizard_cache_LDADD) $(LIBS) datawizard/commute.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/commute$(EXEEXT): $(datawizard_commute_OBJECTS) $(datawizard_commute_DEPENDENCIES) $(EXTRA_datawizard_commute_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/commute$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_commute_OBJECTS) $(datawizard_commute_LDADD) $(LIBS) datawizard/commute2.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/commute2$(EXEEXT): $(datawizard_commute2_OBJECTS) $(datawizard_commute2_DEPENDENCIES) $(EXTRA_datawizard_commute2_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/commute2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_commute2_OBJECTS) $(datawizard_commute2_LDADD) $(LIBS) datawizard/copy.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/copy$(EXEEXT): $(datawizard_copy_OBJECTS) $(datawizard_copy_DEPENDENCIES) $(EXTRA_datawizard_copy_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/copy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_copy_OBJECTS) $(datawizard_copy_LDADD) $(LIBS) datawizard/critical_section_with_void_interface.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/critical_section_with_void_interface$(EXEEXT): $(datawizard_critical_section_with_void_interface_OBJECTS) $(datawizard_critical_section_with_void_interface_DEPENDENCIES) $(EXTRA_datawizard_critical_section_with_void_interface_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/critical_section_with_void_interface$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_critical_section_with_void_interface_OBJECTS) $(datawizard_critical_section_with_void_interface_LDADD) $(LIBS) datawizard/data_deinitialize.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/data_deinitialize$(EXEEXT): $(datawizard_data_deinitialize_OBJECTS) $(datawizard_data_deinitialize_DEPENDENCIES) $(EXTRA_datawizard_data_deinitialize_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/data_deinitialize$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_data_deinitialize_OBJECTS) $(datawizard_data_deinitialize_LDADD) $(LIBS) datawizard/data_implicit_deps.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/data_implicit_deps$(EXEEXT): $(datawizard_data_implicit_deps_OBJECTS) $(datawizard_data_implicit_deps_DEPENDENCIES) $(EXTRA_datawizard_data_implicit_deps_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/data_implicit_deps$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_data_implicit_deps_OBJECTS) $(datawizard_data_implicit_deps_LDADD) $(LIBS) datawizard/data_invalidation.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/data_invalidation$(EXEEXT): $(datawizard_data_invalidation_OBJECTS) $(datawizard_data_invalidation_DEPENDENCIES) $(EXTRA_datawizard_data_invalidation_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/data_invalidation$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_data_invalidation_OBJECTS) $(datawizard_data_invalidation_LDADD) $(LIBS) datawizard/data_register-data_register.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/data_register$(EXEEXT): $(datawizard_data_register_OBJECTS) $(datawizard_data_register_DEPENDENCIES) $(EXTRA_datawizard_data_register_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/data_register$(EXEEXT) $(AM_V_CCLD)$(datawizard_data_register_LINK) $(datawizard_data_register_OBJECTS) $(datawizard_data_register_LDADD) $(LIBS) datawizard/deinitialize_pending_requests.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/deinitialize_pending_requests$(EXEEXT): $(datawizard_deinitialize_pending_requests_OBJECTS) $(datawizard_deinitialize_pending_requests_DEPENDENCIES) $(EXTRA_datawizard_deinitialize_pending_requests_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/deinitialize_pending_requests$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_deinitialize_pending_requests_OBJECTS) $(datawizard_deinitialize_pending_requests_LDADD) $(LIBS) datawizard/deps.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/deps$(EXEEXT): $(datawizard_deps_OBJECTS) $(datawizard_deps_DEPENDENCIES) $(EXTRA_datawizard_deps_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/deps$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_deps_OBJECTS) $(datawizard_deps_LDADD) $(LIBS) datawizard/dining_philosophers.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/dining_philosophers$(EXEEXT): $(datawizard_dining_philosophers_OBJECTS) $(datawizard_dining_philosophers_DEPENDENCIES) $(EXTRA_datawizard_dining_philosophers_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/dining_philosophers$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_dining_philosophers_OBJECTS) $(datawizard_dining_philosophers_LDADD) $(LIBS) datawizard/double_parameter.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/double_parameter$(EXEEXT): $(datawizard_double_parameter_OBJECTS) $(datawizard_double_parameter_DEPENDENCIES) $(EXTRA_datawizard_double_parameter_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/double_parameter$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_double_parameter_OBJECTS) $(datawizard_double_parameter_LDADD) $(LIBS) datawizard/dsm_stress.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/dsm_stress$(EXEEXT): $(datawizard_dsm_stress_OBJECTS) $(datawizard_dsm_stress_DEPENDENCIES) $(EXTRA_datawizard_dsm_stress_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/dsm_stress$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_dsm_stress_OBJECTS) $(datawizard_dsm_stress_LDADD) $(LIBS) datawizard/gpu_ptr_register.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/scal.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/scal_cuda.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/gpu_ptr_register$(EXEEXT): $(datawizard_gpu_ptr_register_OBJECTS) $(datawizard_gpu_ptr_register_DEPENDENCIES) $(EXTRA_datawizard_gpu_ptr_register_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/gpu_ptr_register$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_gpu_ptr_register_OBJECTS) $(datawizard_gpu_ptr_register_LDADD) $(LIBS) datawizard/gpu_register.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/gpu_register$(EXEEXT): $(datawizard_gpu_register_OBJECTS) $(datawizard_gpu_register_DEPENDENCIES) $(EXTRA_datawizard_gpu_register_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/gpu_register$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_gpu_register_OBJECTS) $(datawizard_gpu_register_LDADD) $(LIBS) datawizard/handle_to_pointer.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/handle_to_pointer$(EXEEXT): $(datawizard_handle_to_pointer_OBJECTS) $(datawizard_handle_to_pointer_DEPENDENCIES) $(EXTRA_datawizard_handle_to_pointer_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/handle_to_pointer$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_handle_to_pointer_OBJECTS) $(datawizard_handle_to_pointer_LDADD) $(LIBS) datawizard/in_place_partition.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/in_place_partition$(EXEEXT): $(datawizard_in_place_partition_OBJECTS) $(datawizard_in_place_partition_DEPENDENCIES) $(EXTRA_datawizard_in_place_partition_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/in_place_partition$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_in_place_partition_OBJECTS) $(datawizard_in_place_partition_LDADD) $(LIBS) datawizard/increment_init.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/increment_init$(EXEEXT): $(datawizard_increment_init_OBJECTS) $(datawizard_increment_init_DEPENDENCIES) $(EXTRA_datawizard_increment_init_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/increment_init$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_increment_init_OBJECTS) $(datawizard_increment_init_LDADD) $(LIBS) datawizard/increment_redux.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/increment_redux$(EXEEXT): $(datawizard_increment_redux_OBJECTS) $(datawizard_increment_redux_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/increment_redux$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_OBJECTS) $(datawizard_increment_redux_LDADD) $(LIBS) datawizard/increment_redux_lazy.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/increment_redux_lazy$(EXEEXT): $(datawizard_increment_redux_lazy_OBJECTS) $(datawizard_increment_redux_lazy_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_lazy_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/increment_redux_lazy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_lazy_OBJECTS) $(datawizard_increment_redux_lazy_LDADD) $(LIBS) datawizard/increment_redux_partition.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/increment_redux_partition$(EXEEXT): $(datawizard_increment_redux_partition_OBJECTS) $(datawizard_increment_redux_partition_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_partition_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/increment_redux_partition$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_partition_OBJECTS) $(datawizard_increment_redux_partition_LDADD) $(LIBS) datawizard/increment_redux_v2.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/increment_redux_v2$(EXEEXT): $(datawizard_increment_redux_v2_OBJECTS) $(datawizard_increment_redux_v2_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_v2_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/increment_redux_v2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_v2_OBJECTS) $(datawizard_increment_redux_v2_LDADD) $(LIBS) datawizard/increment_redux_with_args.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/increment_redux_with_args$(EXEEXT): $(datawizard_increment_redux_with_args_OBJECTS) $(datawizard_increment_redux_with_args_DEPENDENCIES) $(EXTRA_datawizard_increment_redux_with_args_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/increment_redux_with_args$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_increment_redux_with_args_OBJECTS) $(datawizard_increment_redux_with_args_LDADD) $(LIBS) datawizard/interfaces/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces @: > datawizard/interfaces/$(am__dirstamp) datawizard/interfaces/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/$(DEPDIR) @: > datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/bcsr/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/bcsr @: > datawizard/interfaces/bcsr/$(am__dirstamp) datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/bcsr/$(DEPDIR) @: > datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.$(OBJEXT): \ datawizard/interfaces/bcsr/$(am__dirstamp) \ datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/bcsr/bcsr_cuda.$(OBJEXT): \ datawizard/interfaces/bcsr/$(am__dirstamp) \ datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.$(OBJEXT): \ datawizard/interfaces/bcsr/$(am__dirstamp) \ datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT): $(datawizard_interfaces_bcsr_bcsr_interface_OBJECTS) $(datawizard_interfaces_bcsr_bcsr_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_bcsr_bcsr_interface_DEPENDENCIES) datawizard/interfaces/bcsr/$(am__dirstamp) @rm -f datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_bcsr_bcsr_interface_LINK) $(datawizard_interfaces_bcsr_bcsr_interface_OBJECTS) $(datawizard_interfaces_bcsr_bcsr_interface_LDADD) $(LIBS) datawizard/interfaces/block_block_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/block/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/block @: > datawizard/interfaces/block/$(am__dirstamp) datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/block/$(DEPDIR) @: > datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/block/block_interface-block_interface.$(OBJEXT): \ datawizard/interfaces/block/$(am__dirstamp) \ datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/block/block_cuda.$(OBJEXT): \ datawizard/interfaces/block/$(am__dirstamp) \ datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/block/block_interface-block_opencl.$(OBJEXT): \ datawizard/interfaces/block/$(am__dirstamp) \ datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/block/block_interface$(EXEEXT): $(datawizard_interfaces_block_block_interface_OBJECTS) $(datawizard_interfaces_block_block_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_block_block_interface_DEPENDENCIES) datawizard/interfaces/block/$(am__dirstamp) @rm -f datawizard/interfaces/block/block_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_block_block_interface_LINK) $(datawizard_interfaces_block_block_interface_OBJECTS) $(datawizard_interfaces_block_block_interface_LDADD) $(LIBS) datawizard/interfaces/coo_coo_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/coo/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/coo @: > datawizard/interfaces/coo/$(am__dirstamp) datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/coo/$(DEPDIR) @: > datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/coo/coo_interface-coo_interface.$(OBJEXT): \ datawizard/interfaces/coo/$(am__dirstamp) \ datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/coo/coo_cuda.$(OBJEXT): \ datawizard/interfaces/coo/$(am__dirstamp) \ datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/coo/coo_interface-coo_opencl.$(OBJEXT): \ datawizard/interfaces/coo/$(am__dirstamp) \ datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/coo/coo_interface$(EXEEXT): $(datawizard_interfaces_coo_coo_interface_OBJECTS) $(datawizard_interfaces_coo_coo_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_coo_coo_interface_DEPENDENCIES) datawizard/interfaces/coo/$(am__dirstamp) @rm -f datawizard/interfaces/coo/coo_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_coo_coo_interface_LINK) $(datawizard_interfaces_coo_coo_interface_OBJECTS) $(datawizard_interfaces_coo_coo_interface_LDADD) $(LIBS) datawizard/interfaces/copy_interfaces-copy_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/copy_interfaces$(EXEEXT): $(datawizard_interfaces_copy_interfaces_OBJECTS) $(datawizard_interfaces_copy_interfaces_DEPENDENCIES) $(EXTRA_datawizard_interfaces_copy_interfaces_DEPENDENCIES) datawizard/interfaces/$(am__dirstamp) @rm -f datawizard/interfaces/copy_interfaces$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_copy_interfaces_LINK) $(datawizard_interfaces_copy_interfaces_OBJECTS) $(datawizard_interfaces_copy_interfaces_LDADD) $(LIBS) datawizard/interfaces/csr_csr_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/csr/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/csr @: > datawizard/interfaces/csr/$(am__dirstamp) datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/csr/$(DEPDIR) @: > datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/csr/csr_interface-csr_interface.$(OBJEXT): \ datawizard/interfaces/csr/$(am__dirstamp) \ datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/csr/csr_cuda.$(OBJEXT): \ datawizard/interfaces/csr/$(am__dirstamp) \ datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/csr/csr_interface-csr_opencl.$(OBJEXT): \ datawizard/interfaces/csr/$(am__dirstamp) \ datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/csr/csr_interface$(EXEEXT): $(datawizard_interfaces_csr_csr_interface_OBJECTS) $(datawizard_interfaces_csr_csr_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_csr_csr_interface_DEPENDENCIES) datawizard/interfaces/csr/$(am__dirstamp) @rm -f datawizard/interfaces/csr/csr_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_csr_csr_interface_LINK) $(datawizard_interfaces_csr_csr_interface_OBJECTS) $(datawizard_interfaces_csr_csr_interface_LDADD) $(LIBS) datawizard/interfaces/matrix_matrix_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/matrix/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/matrix @: > datawizard/interfaces/matrix/$(am__dirstamp) datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/matrix/$(DEPDIR) @: > datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/matrix/matrix_interface-matrix_interface.$(OBJEXT): \ datawizard/interfaces/matrix/$(am__dirstamp) \ datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/matrix/matrix_cuda.$(OBJEXT): \ datawizard/interfaces/matrix/$(am__dirstamp) \ datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/matrix/matrix_interface-matrix_opencl.$(OBJEXT): \ datawizard/interfaces/matrix/$(am__dirstamp) \ datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/matrix/matrix_interface$(EXEEXT): $(datawizard_interfaces_matrix_matrix_interface_OBJECTS) $(datawizard_interfaces_matrix_matrix_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_matrix_matrix_interface_DEPENDENCIES) datawizard/interfaces/matrix/$(am__dirstamp) @rm -f datawizard/interfaces/matrix/matrix_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_matrix_matrix_interface_LINK) $(datawizard_interfaces_matrix_matrix_interface_OBJECTS) $(datawizard_interfaces_matrix_matrix_interface_LDADD) $(LIBS) datawizard/interfaces/multiformat/advanced/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/multiformat/advanced @: > datawizard/interfaces/multiformat/advanced/$(am__dirstamp) datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/multiformat/advanced/$(DEPDIR) @: > datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/advanced/generic.$(OBJEXT): \ datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.$(OBJEXT): \ datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) @rm -f datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_LDADD) $(LIBS) datawizard/interfaces/multiformat/advanced/multiformat_data_release.$(OBJEXT): \ datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_multiformat_data_release_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) @rm -f datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_data_release_LDADD) $(LIBS) datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.$(OBJEXT): \ datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) @rm -f datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_LDADD) $(LIBS) datawizard/interfaces/multiformat/advanced/multiformat_worker.$(OBJEXT): \ datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_worker_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_multiformat_worker_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) @rm -f datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_multiformat_worker_OBJECTS) $(datawizard_interfaces_multiformat_advanced_multiformat_worker_LDADD) $(LIBS) datawizard/interfaces/multiformat/advanced/same_handle.$(OBJEXT): \ datawizard/interfaces/multiformat/advanced/$(am__dirstamp) \ datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT): $(datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS) $(datawizard_interfaces_multiformat_advanced_same_handle_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_advanced_same_handle_DEPENDENCIES) datawizard/interfaces/multiformat/advanced/$(am__dirstamp) @rm -f datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_interfaces_multiformat_advanced_same_handle_OBJECTS) $(datawizard_interfaces_multiformat_advanced_same_handle_LDADD) $(LIBS) datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/multiformat @: > datawizard/interfaces/multiformat/$(am__dirstamp) datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/multiformat/$(DEPDIR) @: > datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.$(OBJEXT): \ datawizard/interfaces/multiformat/$(am__dirstamp) \ datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.$(OBJEXT): \ datawizard/interfaces/multiformat/$(am__dirstamp) \ datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/multiformat_cuda.$(OBJEXT): \ datawizard/interfaces/multiformat/$(am__dirstamp) \ datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.$(OBJEXT): \ datawizard/interfaces/multiformat/$(am__dirstamp) \ datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.$(OBJEXT): \ datawizard/interfaces/multiformat/$(am__dirstamp) \ datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.$(OBJEXT): \ datawizard/interfaces/multiformat/$(am__dirstamp) \ datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT): $(datawizard_interfaces_multiformat_multiformat_interface_OBJECTS) $(datawizard_interfaces_multiformat_multiformat_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_multiformat_multiformat_interface_DEPENDENCIES) datawizard/interfaces/multiformat/$(am__dirstamp) @rm -f datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_multiformat_multiformat_interface_LINK) $(datawizard_interfaces_multiformat_multiformat_interface_OBJECTS) $(datawizard_interfaces_multiformat_multiformat_interface_LDADD) $(LIBS) datawizard/interfaces/ndim_ndim_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/ndim/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/ndim @: > datawizard/interfaces/ndim/$(am__dirstamp) datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/ndim/$(DEPDIR) @: > datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/ndim/ndim_interface-ndim_interface.$(OBJEXT): \ datawizard/interfaces/ndim/$(am__dirstamp) \ datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/ndim/ndim_cuda.$(OBJEXT): \ datawizard/interfaces/ndim/$(am__dirstamp) \ datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/ndim/ndim_interface-ndim_opencl.$(OBJEXT): \ datawizard/interfaces/ndim/$(am__dirstamp) \ datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/ndim/ndim_interface$(EXEEXT): $(datawizard_interfaces_ndim_ndim_interface_OBJECTS) $(datawizard_interfaces_ndim_ndim_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_ndim_ndim_interface_DEPENDENCIES) datawizard/interfaces/ndim/$(am__dirstamp) @rm -f datawizard/interfaces/ndim/ndim_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_ndim_ndim_interface_LINK) $(datawizard_interfaces_ndim_ndim_interface_OBJECTS) $(datawizard_interfaces_ndim_ndim_interface_LDADD) $(LIBS) datawizard/interfaces/tensor_tensor_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/tensor/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/tensor @: > datawizard/interfaces/tensor/$(am__dirstamp) datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/tensor/$(DEPDIR) @: > datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/tensor/tensor_interface-tensor_interface.$(OBJEXT): \ datawizard/interfaces/tensor/$(am__dirstamp) \ datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/tensor/tensor_cuda.$(OBJEXT): \ datawizard/interfaces/tensor/$(am__dirstamp) \ datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/tensor/tensor_interface-tensor_opencl.$(OBJEXT): \ datawizard/interfaces/tensor/$(am__dirstamp) \ datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/tensor/tensor_interface$(EXEEXT): $(datawizard_interfaces_tensor_tensor_interface_OBJECTS) $(datawizard_interfaces_tensor_tensor_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_tensor_tensor_interface_DEPENDENCIES) datawizard/interfaces/tensor/$(am__dirstamp) @rm -f datawizard/interfaces/tensor/tensor_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_tensor_tensor_interface_LINK) $(datawizard_interfaces_tensor_tensor_interface_OBJECTS) $(datawizard_interfaces_tensor_tensor_interface_LDADD) $(LIBS) datawizard/interfaces/variable_variable_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/variable/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/variable @: > datawizard/interfaces/variable/$(am__dirstamp) datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/variable/$(DEPDIR) @: > datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/variable/variable_interface-variable_interface.$(OBJEXT): \ datawizard/interfaces/variable/$(am__dirstamp) \ datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/variable/variable_cuda.$(OBJEXT): \ datawizard/interfaces/variable/$(am__dirstamp) \ datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/variable/variable_interface-variable_opencl.$(OBJEXT): \ datawizard/interfaces/variable/$(am__dirstamp) \ datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/variable/variable_interface$(EXEEXT): $(datawizard_interfaces_variable_variable_interface_OBJECTS) $(datawizard_interfaces_variable_variable_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_variable_variable_interface_DEPENDENCIES) datawizard/interfaces/variable/$(am__dirstamp) @rm -f datawizard/interfaces/variable/variable_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_variable_variable_interface_LINK) $(datawizard_interfaces_variable_variable_interface_OBJECTS) $(datawizard_interfaces_variable_variable_interface_LDADD) $(LIBS) datawizard/interfaces/vector/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/vector @: > datawizard/interfaces/vector/$(am__dirstamp) datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/vector/$(DEPDIR) @: > datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/vector/vector_interface-vector_interface.$(OBJEXT): \ datawizard/interfaces/vector/$(am__dirstamp) \ datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/vector_vector_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/vector/vector_cuda.$(OBJEXT): \ datawizard/interfaces/vector/$(am__dirstamp) \ datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/vector/vector_interface-vector_opencl.$(OBJEXT): \ datawizard/interfaces/vector/$(am__dirstamp) \ datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/vector/vector_interface$(EXEEXT): $(datawizard_interfaces_vector_vector_interface_OBJECTS) $(datawizard_interfaces_vector_vector_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_vector_vector_interface_DEPENDENCIES) datawizard/interfaces/vector/$(am__dirstamp) @rm -f datawizard/interfaces/vector/vector_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_vector_vector_interface_LINK) $(datawizard_interfaces_vector_vector_interface_OBJECTS) $(datawizard_interfaces_vector_vector_interface_LDADD) $(LIBS) datawizard/interfaces/void_void_interface-test_interfaces.$(OBJEXT): \ datawizard/interfaces/$(am__dirstamp) \ datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/void/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/void @: > datawizard/interfaces/void/$(am__dirstamp) datawizard/interfaces/void/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) datawizard/interfaces/void/$(DEPDIR) @: > datawizard/interfaces/void/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/void/void_interface-void_interface.$(OBJEXT): \ datawizard/interfaces/void/$(am__dirstamp) \ datawizard/interfaces/void/$(DEPDIR)/$(am__dirstamp) datawizard/interfaces/void/void_interface$(EXEEXT): $(datawizard_interfaces_void_void_interface_OBJECTS) $(datawizard_interfaces_void_void_interface_DEPENDENCIES) $(EXTRA_datawizard_interfaces_void_void_interface_DEPENDENCIES) datawizard/interfaces/void/$(am__dirstamp) @rm -f datawizard/interfaces/void/void_interface$(EXEEXT) $(AM_V_CCLD)$(datawizard_interfaces_void_void_interface_LINK) $(datawizard_interfaces_void_void_interface_OBJECTS) $(datawizard_interfaces_void_void_interface_LDADD) $(LIBS) datawizard/invalidate_pending_requests.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/invalidate_pending_requests$(EXEEXT): $(datawizard_invalidate_pending_requests_OBJECTS) $(datawizard_invalidate_pending_requests_DEPENDENCIES) $(EXTRA_datawizard_invalidate_pending_requests_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/invalidate_pending_requests$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_invalidate_pending_requests_OBJECTS) $(datawizard_invalidate_pending_requests_LDADD) $(LIBS) datawizard/lazy_allocation.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/lazy_allocation$(EXEEXT): $(datawizard_lazy_allocation_OBJECTS) $(datawizard_lazy_allocation_DEPENDENCIES) $(EXTRA_datawizard_lazy_allocation_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/lazy_allocation$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_lazy_allocation_OBJECTS) $(datawizard_lazy_allocation_LDADD) $(LIBS) datawizard/locality.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/locality$(EXEEXT): $(datawizard_locality_OBJECTS) $(datawizard_locality_DEPENDENCIES) $(EXTRA_datawizard_locality_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/locality$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_locality_OBJECTS) $(datawizard_locality_LDADD) $(LIBS) datawizard/manual_reduction.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/manual_reduction$(EXEEXT): $(datawizard_manual_reduction_OBJECTS) $(datawizard_manual_reduction_DEPENDENCIES) $(EXTRA_datawizard_manual_reduction_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/manual_reduction$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_manual_reduction_OBJECTS) $(datawizard_manual_reduction_LDADD) $(LIBS) datawizard/mpi_like.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/mpi_like$(EXEEXT): $(datawizard_mpi_like_OBJECTS) $(datawizard_mpi_like_DEPENDENCIES) $(EXTRA_datawizard_mpi_like_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/mpi_like$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_mpi_like_OBJECTS) $(datawizard_mpi_like_LDADD) $(LIBS) datawizard/mpi_like_async.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/mpi_like_async$(EXEEXT): $(datawizard_mpi_like_async_OBJECTS) $(datawizard_mpi_like_async_DEPENDENCIES) $(EXTRA_datawizard_mpi_like_async_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/mpi_like_async$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_mpi_like_async_OBJECTS) $(datawizard_mpi_like_async_LDADD) $(LIBS) datawizard/no_unregister.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/no_unregister$(EXEEXT): $(datawizard_no_unregister_OBJECTS) $(datawizard_no_unregister_DEPENDENCIES) $(EXTRA_datawizard_no_unregister_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/no_unregister$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_no_unregister_OBJECTS) $(datawizard_no_unregister_LDADD) $(LIBS) datawizard/noreclaim.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/noreclaim$(EXEEXT): $(datawizard_noreclaim_OBJECTS) $(datawizard_noreclaim_DEPENDENCIES) $(EXTRA_datawizard_noreclaim_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/noreclaim$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_noreclaim_OBJECTS) $(datawizard_noreclaim_LDADD) $(LIBS) datawizard/nowhere.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/nowhere$(EXEEXT): $(datawizard_nowhere_OBJECTS) $(datawizard_nowhere_DEPENDENCIES) $(EXTRA_datawizard_nowhere_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/nowhere$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_nowhere_OBJECTS) $(datawizard_nowhere_LDADD) $(LIBS) datawizard/numa_overflow.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/numa_overflow$(EXEEXT): $(datawizard_numa_overflow_OBJECTS) $(datawizard_numa_overflow_DEPENDENCIES) $(EXTRA_datawizard_numa_overflow_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/numa_overflow$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_numa_overflow_OBJECTS) $(datawizard_numa_overflow_LDADD) $(LIBS) datawizard/partition_dep.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/partition_dep$(EXEEXT): $(datawizard_partition_dep_OBJECTS) $(datawizard_partition_dep_DEPENDENCIES) $(EXTRA_datawizard_partition_dep_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/partition_dep$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_partition_dep_OBJECTS) $(datawizard_partition_dep_LDADD) $(LIBS) datawizard/partition_init.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/partition_init$(EXEEXT): $(datawizard_partition_init_OBJECTS) $(datawizard_partition_init_DEPENDENCIES) $(EXTRA_datawizard_partition_init_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/partition_init$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_partition_init_OBJECTS) $(datawizard_partition_init_LDADD) $(LIBS) datawizard/partition_lazy.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/partition_lazy$(EXEEXT): $(datawizard_partition_lazy_OBJECTS) $(datawizard_partition_lazy_DEPENDENCIES) $(EXTRA_datawizard_partition_lazy_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/partition_lazy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_partition_lazy_OBJECTS) $(datawizard_partition_lazy_LDADD) $(LIBS) datawizard/partition_wontuse.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/partition_wontuse$(EXEEXT): $(datawizard_partition_wontuse_OBJECTS) $(datawizard_partition_wontuse_DEPENDENCIES) $(EXTRA_datawizard_partition_wontuse_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/partition_wontuse$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_partition_wontuse_OBJECTS) $(datawizard_partition_wontuse_LDADD) $(LIBS) datawizard/partitioned_acquire.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/partitioned_acquire$(EXEEXT): $(datawizard_partitioned_acquire_OBJECTS) $(datawizard_partitioned_acquire_DEPENDENCIES) $(EXTRA_datawizard_partitioned_acquire_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/partitioned_acquire$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_partitioned_acquire_OBJECTS) $(datawizard_partitioned_acquire_LDADD) $(LIBS) datawizard/partitioned_initialization.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/partitioned_initialization$(EXEEXT): $(datawizard_partitioned_initialization_OBJECTS) $(datawizard_partitioned_initialization_DEPENDENCIES) $(EXTRA_datawizard_partitioned_initialization_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/partitioned_initialization$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_partitioned_initialization_OBJECTS) $(datawizard_partitioned_initialization_LDADD) $(LIBS) datawizard/readers_and_writers.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/readers_and_writers$(EXEEXT): $(datawizard_readers_and_writers_OBJECTS) $(datawizard_readers_and_writers_DEPENDENCIES) $(EXTRA_datawizard_readers_and_writers_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/readers_and_writers$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_readers_and_writers_OBJECTS) $(datawizard_readers_and_writers_LDADD) $(LIBS) datawizard/readonly.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/readonly$(EXEEXT): $(datawizard_readonly_OBJECTS) $(datawizard_readonly_DEPENDENCIES) $(EXTRA_datawizard_readonly_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/readonly$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_readonly_OBJECTS) $(datawizard_readonly_LDADD) $(LIBS) datawizard/reclaim.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/reclaim$(EXEEXT): $(datawizard_reclaim_OBJECTS) $(datawizard_reclaim_DEPENDENCIES) $(EXTRA_datawizard_reclaim_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/reclaim$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_reclaim_OBJECTS) $(datawizard_reclaim_LDADD) $(LIBS) datawizard/redux_acquire.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/redux_acquire$(EXEEXT): $(datawizard_redux_acquire_OBJECTS) $(datawizard_redux_acquire_DEPENDENCIES) $(EXTRA_datawizard_redux_acquire_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/redux_acquire$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_redux_acquire_OBJECTS) $(datawizard_redux_acquire_LDADD) $(LIBS) datawizard/scratch.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/scratch_cuda.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/scratch_opencl.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/scratch$(EXEEXT): $(datawizard_scratch_OBJECTS) $(datawizard_scratch_DEPENDENCIES) $(EXTRA_datawizard_scratch_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/scratch$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_scratch_OBJECTS) $(datawizard_scratch_LDADD) $(LIBS) datawizard/scratch_reuse.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/scratch_reuse$(EXEEXT): $(datawizard_scratch_reuse_OBJECTS) $(datawizard_scratch_reuse_DEPENDENCIES) $(EXTRA_datawizard_scratch_reuse_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/scratch_reuse$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_scratch_reuse_OBJECTS) $(datawizard_scratch_reuse_LDADD) $(LIBS) datawizard/simgrid-locality.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/simgrid-locality$(EXEEXT): $(datawizard_simgrid_locality_OBJECTS) $(datawizard_simgrid_locality_DEPENDENCIES) $(EXTRA_datawizard_simgrid_locality_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/simgrid-locality$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_simgrid_locality_OBJECTS) $(datawizard_simgrid_locality_LDADD) $(LIBS) datawizard/specific_node.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/specific_node$(EXEEXT): $(datawizard_specific_node_OBJECTS) $(datawizard_specific_node_DEPENDENCIES) $(EXTRA_datawizard_specific_node_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/specific_node$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_specific_node_OBJECTS) $(datawizard_specific_node_LDADD) $(LIBS) datawizard/specific_node_same.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/specific_node_same$(EXEEXT): $(datawizard_specific_node_same_OBJECTS) $(datawizard_specific_node_same_DEPENDENCIES) $(EXTRA_datawizard_specific_node_same_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/specific_node_same$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_specific_node_same_OBJECTS) $(datawizard_specific_node_same_LDADD) $(LIBS) datawizard/sync_and_notify_data.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/sync_and_notify_data_kernels.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/sync_and_notify_data_opencl.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/sync_and_notify_data$(EXEEXT): $(datawizard_sync_and_notify_data_OBJECTS) $(datawizard_sync_and_notify_data_DEPENDENCIES) $(EXTRA_datawizard_sync_and_notify_data_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/sync_and_notify_data$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_sync_and_notify_data_OBJECTS) $(datawizard_sync_and_notify_data_LDADD) $(LIBS) datawizard/sync_and_notify_data_implicit.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/sync_and_notify_data_implicit$(EXEEXT): $(datawizard_sync_and_notify_data_implicit_OBJECTS) $(datawizard_sync_and_notify_data_implicit_DEPENDENCIES) $(EXTRA_datawizard_sync_and_notify_data_implicit_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/sync_and_notify_data_implicit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_sync_and_notify_data_implicit_OBJECTS) $(datawizard_sync_and_notify_data_implicit_LDADD) $(LIBS) datawizard/sync_with_data_with_mem.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/sync_with_data_with_mem$(EXEEXT): $(datawizard_sync_with_data_with_mem_OBJECTS) $(datawizard_sync_with_data_with_mem_DEPENDENCIES) $(EXTRA_datawizard_sync_with_data_with_mem_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/sync_with_data_with_mem$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_sync_with_data_with_mem_OBJECTS) $(datawizard_sync_with_data_with_mem_LDADD) $(LIBS) datawizard/sync_with_data_with_mem_non_blocking.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT): $(datawizard_sync_with_data_with_mem_non_blocking_OBJECTS) $(datawizard_sync_with_data_with_mem_non_blocking_DEPENDENCIES) $(EXTRA_datawizard_sync_with_data_with_mem_non_blocking_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_sync_with_data_with_mem_non_blocking_OBJECTS) $(datawizard_sync_with_data_with_mem_non_blocking_LDADD) $(LIBS) datawizard/sync_with_data_with_mem_non_blocking_implicit.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT): $(datawizard_sync_with_data_with_mem_non_blocking_implicit_OBJECTS) $(datawizard_sync_with_data_with_mem_non_blocking_implicit_DEPENDENCIES) $(EXTRA_datawizard_sync_with_data_with_mem_non_blocking_implicit_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_sync_with_data_with_mem_non_blocking_implicit_OBJECTS) $(datawizard_sync_with_data_with_mem_non_blocking_implicit_LDADD) $(LIBS) datawizard/task_with_multiple_time_the_same_handle.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/task_with_multiple_time_the_same_handle$(EXEEXT): $(datawizard_task_with_multiple_time_the_same_handle_OBJECTS) $(datawizard_task_with_multiple_time_the_same_handle_DEPENDENCIES) $(EXTRA_datawizard_task_with_multiple_time_the_same_handle_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/task_with_multiple_time_the_same_handle$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_task_with_multiple_time_the_same_handle_OBJECTS) $(datawizard_task_with_multiple_time_the_same_handle_LDADD) $(LIBS) datawizard/temporary_partition.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/temporary_partition$(EXEEXT): $(datawizard_temporary_partition_OBJECTS) $(datawizard_temporary_partition_DEPENDENCIES) $(EXTRA_datawizard_temporary_partition_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/temporary_partition$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_temporary_partition_OBJECTS) $(datawizard_temporary_partition_LDADD) $(LIBS) datawizard/temporary_partition_implicit.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/temporary_partition_implicit$(EXEEXT): $(datawizard_temporary_partition_implicit_OBJECTS) $(datawizard_temporary_partition_implicit_DEPENDENCIES) $(EXTRA_datawizard_temporary_partition_implicit_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/temporary_partition_implicit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_temporary_partition_implicit_OBJECTS) $(datawizard_temporary_partition_implicit_LDADD) $(LIBS) datawizard/temporary_partition_read.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/temporary_partition_read$(EXEEXT): $(datawizard_temporary_partition_read_OBJECTS) $(datawizard_temporary_partition_read_DEPENDENCIES) $(EXTRA_datawizard_temporary_partition_read_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/temporary_partition_read$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_temporary_partition_read_OBJECTS) $(datawizard_temporary_partition_read_LDADD) $(LIBS) datawizard/test_arbiter.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/test_arbiter$(EXEEXT): $(datawizard_test_arbiter_OBJECTS) $(datawizard_test_arbiter_DEPENDENCIES) $(EXTRA_datawizard_test_arbiter_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/test_arbiter$(EXEEXT) $(AM_V_CXXLD)$(CXXLINK) $(datawizard_test_arbiter_OBJECTS) $(datawizard_test_arbiter_LDADD) $(LIBS) datawizard/unpartition.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/unpartition$(EXEEXT): $(datawizard_unpartition_OBJECTS) $(datawizard_unpartition_DEPENDENCIES) $(EXTRA_datawizard_unpartition_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/unpartition$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_unpartition_OBJECTS) $(datawizard_unpartition_LDADD) $(LIBS) datawizard/user_interaction_implicit.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/user_interaction_implicit$(EXEEXT): $(datawizard_user_interaction_implicit_OBJECTS) $(datawizard_user_interaction_implicit_DEPENDENCIES) $(EXTRA_datawizard_user_interaction_implicit_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/user_interaction_implicit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_user_interaction_implicit_OBJECTS) $(datawizard_user_interaction_implicit_LDADD) $(LIBS) datawizard/variable_parameters.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/variable_parameters$(EXEEXT): $(datawizard_variable_parameters_OBJECTS) $(datawizard_variable_parameters_DEPENDENCIES) $(EXTRA_datawizard_variable_parameters_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/variable_parameters$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_variable_parameters_OBJECTS) $(datawizard_variable_parameters_LDADD) $(LIBS) datawizard/variable_size.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/variable_size$(EXEEXT): $(datawizard_variable_size_OBJECTS) $(datawizard_variable_size_DEPENDENCIES) $(EXTRA_datawizard_variable_size_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/variable_size$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_variable_size_OBJECTS) $(datawizard_variable_size_LDADD) $(LIBS) datawizard/write_only_tmp_buffer.$(OBJEXT): \ datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/write_only_tmp_buffer$(EXEEXT): $(datawizard_write_only_tmp_buffer_OBJECTS) $(datawizard_write_only_tmp_buffer_DEPENDENCIES) $(EXTRA_datawizard_write_only_tmp_buffer_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/write_only_tmp_buffer$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_write_only_tmp_buffer_OBJECTS) $(datawizard_write_only_tmp_buffer_LDADD) $(LIBS) datawizard/wt_broadcast.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/wt_broadcast$(EXEEXT): $(datawizard_wt_broadcast_OBJECTS) $(datawizard_wt_broadcast_DEPENDENCIES) $(EXTRA_datawizard_wt_broadcast_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/wt_broadcast$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_wt_broadcast_OBJECTS) $(datawizard_wt_broadcast_LDADD) $(LIBS) datawizard/wt_host.$(OBJEXT): datawizard/$(am__dirstamp) \ datawizard/$(DEPDIR)/$(am__dirstamp) datawizard/wt_host$(EXEEXT): $(datawizard_wt_host_OBJECTS) $(datawizard_wt_host_DEPENDENCIES) $(EXTRA_datawizard_wt_host_DEPENDENCIES) datawizard/$(am__dirstamp) @rm -f datawizard/wt_host$(EXEEXT) $(AM_V_CCLD)$(LINK) $(datawizard_wt_host_OBJECTS) $(datawizard_wt_host_LDADD) $(LIBS) disk/$(am__dirstamp): @$(MKDIR_P) disk @: > disk/$(am__dirstamp) disk/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) disk/$(DEPDIR) @: > disk/$(DEPDIR)/$(am__dirstamp) disk/disk_compute.$(OBJEXT): disk/$(am__dirstamp) \ disk/$(DEPDIR)/$(am__dirstamp) disk/disk_compute$(EXEEXT): $(disk_disk_compute_OBJECTS) $(disk_disk_compute_DEPENDENCIES) $(EXTRA_disk_disk_compute_DEPENDENCIES) disk/$(am__dirstamp) @rm -f disk/disk_compute$(EXEEXT) $(AM_V_CCLD)$(LINK) $(disk_disk_compute_OBJECTS) $(disk_disk_compute_LDADD) $(LIBS) disk/disk_copy.$(OBJEXT): disk/$(am__dirstamp) \ disk/$(DEPDIR)/$(am__dirstamp) disk/disk_copy$(EXEEXT): $(disk_disk_copy_OBJECTS) $(disk_disk_copy_DEPENDENCIES) $(EXTRA_disk_disk_copy_DEPENDENCIES) disk/$(am__dirstamp) @rm -f disk/disk_copy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(disk_disk_copy_OBJECTS) $(disk_disk_copy_LDADD) $(LIBS) disk/disk_copy_to_disk.$(OBJEXT): disk/$(am__dirstamp) \ disk/$(DEPDIR)/$(am__dirstamp) disk/disk_copy_to_disk$(EXEEXT): $(disk_disk_copy_to_disk_OBJECTS) $(disk_disk_copy_to_disk_DEPENDENCIES) $(EXTRA_disk_disk_copy_to_disk_DEPENDENCIES) disk/$(am__dirstamp) @rm -f disk/disk_copy_to_disk$(EXEEXT) $(AM_V_CCLD)$(LINK) $(disk_disk_copy_to_disk_OBJECTS) $(disk_disk_copy_to_disk_LDADD) $(LIBS) disk/disk_copy_unpack.$(OBJEXT): disk/$(am__dirstamp) \ disk/$(DEPDIR)/$(am__dirstamp) disk/disk_copy_unpack$(EXEEXT): $(disk_disk_copy_unpack_OBJECTS) $(disk_disk_copy_unpack_DEPENDENCIES) $(EXTRA_disk_disk_copy_unpack_DEPENDENCIES) disk/$(am__dirstamp) @rm -f disk/disk_copy_unpack$(EXEEXT) $(AM_V_CCLD)$(LINK) $(disk_disk_copy_unpack_OBJECTS) $(disk_disk_copy_unpack_LDADD) $(LIBS) disk/disk_pack.$(OBJEXT): disk/$(am__dirstamp) \ disk/$(DEPDIR)/$(am__dirstamp) disk/disk_pack$(EXEEXT): $(disk_disk_pack_OBJECTS) $(disk_disk_pack_DEPENDENCIES) $(EXTRA_disk_disk_pack_DEPENDENCIES) disk/$(am__dirstamp) @rm -f disk/disk_pack$(EXEEXT) $(AM_V_CCLD)$(LINK) $(disk_disk_pack_OBJECTS) $(disk_disk_pack_LDADD) $(LIBS) disk/mem_reclaim.$(OBJEXT): disk/$(am__dirstamp) \ disk/$(DEPDIR)/$(am__dirstamp) disk/mem_reclaim$(EXEEXT): $(disk_mem_reclaim_OBJECTS) $(disk_mem_reclaim_DEPENDENCIES) $(EXTRA_disk_mem_reclaim_DEPENDENCIES) disk/$(am__dirstamp) @rm -f disk/mem_reclaim$(EXEEXT) $(AM_V_CCLD)$(LINK) $(disk_mem_reclaim_OBJECTS) $(disk_mem_reclaim_LDADD) $(LIBS) energy/$(am__dirstamp): @$(MKDIR_P) energy @: > energy/$(am__dirstamp) energy/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) energy/$(DEPDIR) @: > energy/$(DEPDIR)/$(am__dirstamp) energy/energy_efficiency.$(OBJEXT): energy/$(am__dirstamp) \ energy/$(DEPDIR)/$(am__dirstamp) energy/energy_efficiency$(EXEEXT): $(energy_energy_efficiency_OBJECTS) $(energy_energy_efficiency_DEPENDENCIES) $(EXTRA_energy_energy_efficiency_DEPENDENCIES) energy/$(am__dirstamp) @rm -f energy/energy_efficiency$(EXEEXT) $(AM_V_CCLD)$(LINK) $(energy_energy_efficiency_OBJECTS) $(energy_energy_efficiency_LDADD) $(LIBS) errorcheck/$(am__dirstamp): @$(MKDIR_P) errorcheck @: > errorcheck/$(am__dirstamp) errorcheck/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) errorcheck/$(DEPDIR) @: > errorcheck/$(DEPDIR)/$(am__dirstamp) errorcheck/invalid_blocking_calls.$(OBJEXT): \ errorcheck/$(am__dirstamp) \ errorcheck/$(DEPDIR)/$(am__dirstamp) errorcheck/invalid_blocking_calls$(EXEEXT): $(errorcheck_invalid_blocking_calls_OBJECTS) $(errorcheck_invalid_blocking_calls_DEPENDENCIES) $(EXTRA_errorcheck_invalid_blocking_calls_DEPENDENCIES) errorcheck/$(am__dirstamp) @rm -f errorcheck/invalid_blocking_calls$(EXEEXT) $(AM_V_CCLD)$(LINK) $(errorcheck_invalid_blocking_calls_OBJECTS) $(errorcheck_invalid_blocking_calls_LDADD) $(LIBS) errorcheck/invalid_tasks.$(OBJEXT): errorcheck/$(am__dirstamp) \ errorcheck/$(DEPDIR)/$(am__dirstamp) errorcheck/invalid_tasks$(EXEEXT): $(errorcheck_invalid_tasks_OBJECTS) $(errorcheck_invalid_tasks_DEPENDENCIES) $(EXTRA_errorcheck_invalid_tasks_DEPENDENCIES) errorcheck/$(am__dirstamp) @rm -f errorcheck/invalid_tasks$(EXEEXT) $(AM_V_CCLD)$(LINK) $(errorcheck_invalid_tasks_OBJECTS) $(errorcheck_invalid_tasks_LDADD) $(LIBS) errorcheck/starpu_init_noworker.$(OBJEXT): errorcheck/$(am__dirstamp) \ errorcheck/$(DEPDIR)/$(am__dirstamp) errorcheck/starpu_init_noworker$(EXEEXT): $(errorcheck_starpu_init_noworker_OBJECTS) $(errorcheck_starpu_init_noworker_DEPENDENCIES) $(EXTRA_errorcheck_starpu_init_noworker_DEPENDENCIES) errorcheck/$(am__dirstamp) @rm -f errorcheck/starpu_init_noworker$(EXEEXT) $(AM_V_CCLD)$(LINK) $(errorcheck_starpu_init_noworker_OBJECTS) $(errorcheck_starpu_init_noworker_LDADD) $(LIBS) errorcheck/workers_cpuid.$(OBJEXT): errorcheck/$(am__dirstamp) \ errorcheck/$(DEPDIR)/$(am__dirstamp) errorcheck/workers_cpuid$(EXEEXT): $(errorcheck_workers_cpuid_OBJECTS) $(errorcheck_workers_cpuid_DEPENDENCIES) $(EXTRA_errorcheck_workers_cpuid_DEPENDENCIES) errorcheck/$(am__dirstamp) @rm -f errorcheck/workers_cpuid$(EXEEXT) $(AM_V_CCLD)$(LINK) $(errorcheck_workers_cpuid_OBJECTS) $(errorcheck_workers_cpuid_LDADD) $(LIBS) fault-tolerance/$(am__dirstamp): @$(MKDIR_P) fault-tolerance @: > fault-tolerance/$(am__dirstamp) fault-tolerance/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) fault-tolerance/$(DEPDIR) @: > fault-tolerance/$(DEPDIR)/$(am__dirstamp) fault-tolerance/retry.$(OBJEXT): fault-tolerance/$(am__dirstamp) \ fault-tolerance/$(DEPDIR)/$(am__dirstamp) fault-tolerance/retry$(EXEEXT): $(fault_tolerance_retry_OBJECTS) $(fault_tolerance_retry_DEPENDENCIES) $(EXTRA_fault_tolerance_retry_DEPENDENCIES) fault-tolerance/$(am__dirstamp) @rm -f fault-tolerance/retry$(EXEEXT) $(AM_V_CCLD)$(LINK) $(fault_tolerance_retry_OBJECTS) $(fault_tolerance_retry_LDADD) $(LIBS) fortran90/$(am__dirstamp): @$(MKDIR_P) fortran90 @: > fortran90/$(am__dirstamp) fortran90/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) fortran90/$(DEPDIR) @: > fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/starpu_mod.$(OBJEXT): fortran90/$(am__dirstamp) \ fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/init_01.$(OBJEXT): fortran90/$(am__dirstamp) \ fortran90/$(DEPDIR)/$(am__dirstamp) fortran90/init_01$(EXEEXT): $(fortran90_init_01_OBJECTS) $(fortran90_init_01_DEPENDENCIES) $(EXTRA_fortran90_init_01_DEPENDENCIES) fortran90/$(am__dirstamp) @rm -f fortran90/init_01$(EXEEXT) $(AM_V_FCLD)$(FCLINK) $(fortran90_init_01_OBJECTS) $(fortran90_init_01_LDADD) $(LIBS) helper/$(am__dirstamp): @$(MKDIR_P) helper @: > helper/$(am__dirstamp) helper/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) helper/$(DEPDIR) @: > helper/$(DEPDIR)/$(am__dirstamp) helper/cublasLt_init.$(OBJEXT): helper/$(am__dirstamp) \ helper/$(DEPDIR)/$(am__dirstamp) helper/cublasLt_init$(EXEEXT): $(helper_cublasLt_init_OBJECTS) $(helper_cublasLt_init_DEPENDENCIES) $(EXTRA_helper_cublasLt_init_DEPENDENCIES) helper/$(am__dirstamp) @rm -f helper/cublasLt_init$(EXEEXT) $(AM_V_CCLD)$(LINK) $(helper_cublasLt_init_OBJECTS) $(helper_cublasLt_init_LDADD) $(LIBS) helper/cublas_init.$(OBJEXT): helper/$(am__dirstamp) \ helper/$(DEPDIR)/$(am__dirstamp) helper/cublas_init$(EXEEXT): $(helper_cublas_init_OBJECTS) $(helper_cublas_init_DEPENDENCIES) $(EXTRA_helper_cublas_init_DEPENDENCIES) helper/$(am__dirstamp) @rm -f helper/cublas_init$(EXEEXT) $(AM_V_CCLD)$(LINK) $(helper_cublas_init_OBJECTS) $(helper_cublas_init_LDADD) $(LIBS) helper/cusparse_init.$(OBJEXT): helper/$(am__dirstamp) \ helper/$(DEPDIR)/$(am__dirstamp) helper/cusparse_init$(EXEEXT): $(helper_cusparse_init_OBJECTS) $(helper_cusparse_init_DEPENDENCIES) $(EXTRA_helper_cusparse_init_DEPENDENCIES) helper/$(am__dirstamp) @rm -f helper/cusparse_init$(EXEEXT) $(AM_V_CCLD)$(LINK) $(helper_cusparse_init_OBJECTS) $(helper_cusparse_init_LDADD) $(LIBS) helper/execute_on_all.$(OBJEXT): helper/$(am__dirstamp) \ helper/$(DEPDIR)/$(am__dirstamp) helper/execute_on_all$(EXEEXT): $(helper_execute_on_all_OBJECTS) $(helper_execute_on_all_DEPENDENCIES) $(EXTRA_helper_execute_on_all_DEPENDENCIES) helper/$(am__dirstamp) @rm -f helper/execute_on_all$(EXEEXT) $(AM_V_CCLD)$(LINK) $(helper_execute_on_all_OBJECTS) $(helper_execute_on_all_LDADD) $(LIBS) helper/hipblas_init.$(OBJEXT): helper/$(am__dirstamp) \ helper/$(DEPDIR)/$(am__dirstamp) helper/hipblas_init$(EXEEXT): $(helper_hipblas_init_OBJECTS) $(helper_hipblas_init_DEPENDENCIES) $(EXTRA_helper_hipblas_init_DEPENDENCIES) helper/$(am__dirstamp) @rm -f helper/hipblas_init$(EXEEXT) $(AM_V_CCLD)$(LINK) $(helper_hipblas_init_OBJECTS) $(helper_hipblas_init_LDADD) $(LIBS) helper/pinned_memory.$(OBJEXT): helper/$(am__dirstamp) \ helper/$(DEPDIR)/$(am__dirstamp) helper/pinned_memory$(EXEEXT): $(helper_pinned_memory_OBJECTS) $(helper_pinned_memory_DEPENDENCIES) $(EXTRA_helper_pinned_memory_DEPENDENCIES) helper/$(am__dirstamp) @rm -f helper/pinned_memory$(EXEEXT) $(AM_V_CCLD)$(LINK) $(helper_pinned_memory_OBJECTS) $(helper_pinned_memory_LDADD) $(LIBS) helper/starpu_create_sync_task.$(OBJEXT): helper/$(am__dirstamp) \ helper/$(DEPDIR)/$(am__dirstamp) helper/starpu_create_sync_task$(EXEEXT): $(helper_starpu_create_sync_task_OBJECTS) $(helper_starpu_create_sync_task_DEPENDENCIES) $(EXTRA_helper_starpu_create_sync_task_DEPENDENCIES) helper/$(am__dirstamp) @rm -f helper/starpu_create_sync_task$(EXEEXT) $(AM_V_CCLD)$(LINK) $(helper_starpu_create_sync_task_OBJECTS) $(helper_starpu_create_sync_task_LDADD) $(LIBS) helper/starpu_data_cpy.$(OBJEXT): helper/$(am__dirstamp) \ helper/$(DEPDIR)/$(am__dirstamp) helper/starpu_data_cpy$(EXEEXT): $(helper_starpu_data_cpy_OBJECTS) $(helper_starpu_data_cpy_DEPENDENCIES) $(EXTRA_helper_starpu_data_cpy_DEPENDENCIES) helper/$(am__dirstamp) @rm -f helper/starpu_data_cpy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(helper_starpu_data_cpy_OBJECTS) $(helper_starpu_data_cpy_LDADD) $(LIBS) helper/starpu_data_dup_ro.$(OBJEXT): helper/$(am__dirstamp) \ helper/$(DEPDIR)/$(am__dirstamp) helper/starpu_data_dup_ro$(EXEEXT): $(helper_starpu_data_dup_ro_OBJECTS) $(helper_starpu_data_dup_ro_DEPENDENCIES) $(EXTRA_helper_starpu_data_dup_ro_DEPENDENCIES) helper/$(am__dirstamp) @rm -f helper/starpu_data_dup_ro$(EXEEXT) $(AM_V_CCLD)$(LINK) $(helper_starpu_data_dup_ro_OBJECTS) $(helper_starpu_data_dup_ro_LDADD) $(LIBS) loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) main/$(am__dirstamp): @$(MKDIR_P) main @: > main/$(am__dirstamp) main/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) main/$(DEPDIR) @: > main/$(DEPDIR)/$(am__dirstamp) main/bind.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/bind$(EXEEXT): $(main_bind_OBJECTS) $(main_bind_DEPENDENCIES) $(EXTRA_main_bind_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/bind$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_bind_OBJECTS) $(main_bind_LDADD) $(LIBS) main/callback.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/callback$(EXEEXT): $(main_callback_OBJECTS) $(main_callback_DEPENDENCIES) $(EXTRA_main_callback_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/callback$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_callback_OBJECTS) $(main_callback_LDADD) $(LIBS) main/codelet_null_callback.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/codelet_null_callback$(EXEEXT): $(main_codelet_null_callback_OBJECTS) $(main_codelet_null_callback_DEPENDENCIES) $(EXTRA_main_codelet_null_callback_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/codelet_null_callback$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_codelet_null_callback_OBJECTS) $(main_codelet_null_callback_LDADD) $(LIBS) main/const_codelet.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/const_codelet$(EXEEXT): $(main_const_codelet_OBJECTS) $(main_const_codelet_DEPENDENCIES) $(EXTRA_main_const_codelet_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/const_codelet$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_const_codelet_OBJECTS) $(main_const_codelet_LDADD) $(LIBS) main/deadlock.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/deadlock$(EXEEXT): $(main_deadlock_OBJECTS) $(main_deadlock_DEPENDENCIES) $(EXTRA_main_deadlock_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/deadlock$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_deadlock_OBJECTS) $(main_deadlock_LDADD) $(LIBS) main/declare_deps_after_submission.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/declare_deps_after_submission$(EXEEXT): $(main_declare_deps_after_submission_OBJECTS) $(main_declare_deps_after_submission_DEPENDENCIES) $(EXTRA_main_declare_deps_after_submission_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/declare_deps_after_submission$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_declare_deps_after_submission_OBJECTS) $(main_declare_deps_after_submission_LDADD) $(LIBS) main/declare_deps_after_submission_synchronous.$(OBJEXT): \ main/$(am__dirstamp) main/$(DEPDIR)/$(am__dirstamp) main/declare_deps_after_submission_synchronous$(EXEEXT): $(main_declare_deps_after_submission_synchronous_OBJECTS) $(main_declare_deps_after_submission_synchronous_DEPENDENCIES) $(EXTRA_main_declare_deps_after_submission_synchronous_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/declare_deps_after_submission_synchronous$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_declare_deps_after_submission_synchronous_OBJECTS) $(main_declare_deps_after_submission_synchronous_LDADD) $(LIBS) main/declare_deps_in_callback.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/declare_deps_in_callback$(EXEEXT): $(main_declare_deps_in_callback_OBJECTS) $(main_declare_deps_in_callback_DEPENDENCIES) $(EXTRA_main_declare_deps_in_callback_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/declare_deps_in_callback$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_declare_deps_in_callback_OBJECTS) $(main_declare_deps_in_callback_LDADD) $(LIBS) main/deploop.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/deploop$(EXEEXT): $(main_deploop_OBJECTS) $(main_deploop_DEPENDENCIES) $(EXTRA_main_deploop_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/deploop$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_deploop_OBJECTS) $(main_deploop_LDADD) $(LIBS) main/deprecated_func-deprecated_func.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/deprecated_func$(EXEEXT): $(main_deprecated_func_OBJECTS) $(main_deprecated_func_DEPENDENCIES) $(EXTRA_main_deprecated_func_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/deprecated_func$(EXEEXT) $(AM_V_CCLD)$(main_deprecated_func_LINK) $(main_deprecated_func_OBJECTS) $(main_deprecated_func_LDADD) $(LIBS) main/display_binding.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/display_binding$(EXEEXT): $(main_display_binding_OBJECTS) $(main_display_binding_DEPENDENCIES) $(EXTRA_main_display_binding_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/display_binding$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_display_binding_OBJECTS) $(main_display_binding_LDADD) $(LIBS) main/driver_api/$(am__dirstamp): @$(MKDIR_P) main/driver_api @: > main/driver_api/$(am__dirstamp) main/driver_api/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) main/driver_api/$(DEPDIR) @: > main/driver_api/$(DEPDIR)/$(am__dirstamp) main/driver_api/init_run_deinit.$(OBJEXT): \ main/driver_api/$(am__dirstamp) \ main/driver_api/$(DEPDIR)/$(am__dirstamp) main/driver_api/init_run_deinit$(EXEEXT): $(main_driver_api_init_run_deinit_OBJECTS) $(main_driver_api_init_run_deinit_DEPENDENCIES) $(EXTRA_main_driver_api_init_run_deinit_DEPENDENCIES) main/driver_api/$(am__dirstamp) @rm -f main/driver_api/init_run_deinit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_driver_api_init_run_deinit_OBJECTS) $(main_driver_api_init_run_deinit_LDADD) $(LIBS) main/driver_api/run_driver.$(OBJEXT): main/driver_api/$(am__dirstamp) \ main/driver_api/$(DEPDIR)/$(am__dirstamp) main/driver_api/run_driver$(EXEEXT): $(main_driver_api_run_driver_OBJECTS) $(main_driver_api_run_driver_DEPENDENCIES) $(EXTRA_main_driver_api_run_driver_DEPENDENCIES) main/driver_api/$(am__dirstamp) @rm -f main/driver_api/run_driver$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_driver_api_run_driver_OBJECTS) $(main_driver_api_run_driver_LDADD) $(LIBS) main/empty_task.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/empty_task$(EXEEXT): $(main_empty_task_OBJECTS) $(main_empty_task_DEPENDENCIES) $(EXTRA_main_empty_task_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/empty_task$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_empty_task_OBJECTS) $(main_empty_task_LDADD) $(LIBS) main/empty_task_chain.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/empty_task_chain$(EXEEXT): $(main_empty_task_chain_OBJECTS) $(main_empty_task_chain_DEPENDENCIES) $(EXTRA_main_empty_task_chain_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/empty_task_chain$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_empty_task_chain_OBJECTS) $(main_empty_task_chain_LDADD) $(LIBS) main/empty_task_sync_point.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/empty_task_sync_point$(EXEEXT): $(main_empty_task_sync_point_OBJECTS) $(main_empty_task_sync_point_DEPENDENCIES) $(EXTRA_main_empty_task_sync_point_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/empty_task_sync_point$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_empty_task_sync_point_OBJECTS) $(main_empty_task_sync_point_LDADD) $(LIBS) main/empty_task_sync_point_tasks.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/empty_task_sync_point_tasks$(EXEEXT): $(main_empty_task_sync_point_tasks_OBJECTS) $(main_empty_task_sync_point_tasks_DEPENDENCIES) $(EXTRA_main_empty_task_sync_point_tasks_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/empty_task_sync_point_tasks$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_empty_task_sync_point_tasks_OBJECTS) $(main_empty_task_sync_point_tasks_LDADD) $(LIBS) main/execute_on_a_specific_worker.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/execute_on_a_specific_worker$(EXEEXT): $(main_execute_on_a_specific_worker_OBJECTS) $(main_execute_on_a_specific_worker_DEPENDENCIES) $(EXTRA_main_execute_on_a_specific_worker_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/execute_on_a_specific_worker$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_execute_on_a_specific_worker_OBJECTS) $(main_execute_on_a_specific_worker_LDADD) $(LIBS) main/execute_schedule.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/execute_schedule$(EXEEXT): $(main_execute_schedule_OBJECTS) $(main_execute_schedule_DEPENDENCIES) $(EXTRA_main_execute_schedule_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/execute_schedule$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_execute_schedule_OBJECTS) $(main_execute_schedule_LDADD) $(LIBS) main/get_children_tasks.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/get_children_tasks$(EXEEXT): $(main_get_children_tasks_OBJECTS) $(main_get_children_tasks_DEPENDENCIES) $(EXTRA_main_get_children_tasks_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/get_children_tasks$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_get_children_tasks_OBJECTS) $(main_get_children_tasks_LDADD) $(LIBS) main/get_current_task.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/get_current_task$(EXEEXT): $(main_get_current_task_OBJECTS) $(main_get_current_task_DEPENDENCIES) $(EXTRA_main_get_current_task_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/get_current_task$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_get_current_task_OBJECTS) $(main_get_current_task_LDADD) $(LIBS) main/hwloc_cpuset.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/hwloc_cpuset$(EXEEXT): $(main_hwloc_cpuset_OBJECTS) $(main_hwloc_cpuset_DEPENDENCIES) $(EXTRA_main_hwloc_cpuset_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/hwloc_cpuset$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_hwloc_cpuset_OBJECTS) $(main_hwloc_cpuset_LDADD) $(LIBS) main/insert_task.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/insert_task$(EXEEXT): $(main_insert_task_OBJECTS) $(main_insert_task_DEPENDENCIES) $(EXTRA_main_insert_task_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/insert_task$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_insert_task_OBJECTS) $(main_insert_task_LDADD) $(LIBS) main/insert_task_array.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/insert_task_array$(EXEEXT): $(main_insert_task_array_OBJECTS) $(main_insert_task_array_DEPENDENCIES) $(EXTRA_main_insert_task_array_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/insert_task_array$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_insert_task_array_OBJECTS) $(main_insert_task_array_LDADD) $(LIBS) main/insert_task_dyn_handles.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/insert_task_dyn_handles$(EXEEXT): $(main_insert_task_dyn_handles_OBJECTS) $(main_insert_task_dyn_handles_DEPENDENCIES) $(EXTRA_main_insert_task_dyn_handles_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/insert_task_dyn_handles$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_insert_task_dyn_handles_OBJECTS) $(main_insert_task_dyn_handles_LDADD) $(LIBS) main/insert_task_many.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/insert_task_many$(EXEEXT): $(main_insert_task_many_OBJECTS) $(main_insert_task_many_DEPENDENCIES) $(EXTRA_main_insert_task_many_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/insert_task_many$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_insert_task_many_OBJECTS) $(main_insert_task_many_LDADD) $(LIBS) main/insert_task_nullcodelet.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/insert_task_nullcodelet$(EXEEXT): $(main_insert_task_nullcodelet_OBJECTS) $(main_insert_task_nullcodelet_DEPENDENCIES) $(EXTRA_main_insert_task_nullcodelet_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/insert_task_nullcodelet$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_insert_task_nullcodelet_OBJECTS) $(main_insert_task_nullcodelet_LDADD) $(LIBS) main/insert_task_pack.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/insert_task_pack$(EXEEXT): $(main_insert_task_pack_OBJECTS) $(main_insert_task_pack_DEPENDENCIES) $(EXTRA_main_insert_task_pack_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/insert_task_pack$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_insert_task_pack_OBJECTS) $(main_insert_task_pack_LDADD) $(LIBS) main/insert_task_value.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/insert_task_value$(EXEEXT): $(main_insert_task_value_OBJECTS) $(main_insert_task_value_DEPENDENCIES) $(EXTRA_main_insert_task_value_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/insert_task_value$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_insert_task_value_OBJECTS) $(main_insert_task_value_LDADD) $(LIBS) main/insert_task_where.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/insert_task_where$(EXEEXT): $(main_insert_task_where_OBJECTS) $(main_insert_task_where_DEPENDENCIES) $(EXTRA_main_insert_task_where_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/insert_task_where$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_insert_task_where_OBJECTS) $(main_insert_task_where_LDADD) $(LIBS) main/job.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/job$(EXEEXT): $(main_job_OBJECTS) $(main_job_DEPENDENCIES) $(EXTRA_main_job_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/job$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_job_OBJECTS) $(main_job_LDADD) $(LIBS) main/mkdtemp.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/mkdtemp$(EXEEXT): $(main_mkdtemp_OBJECTS) $(main_mkdtemp_DEPENDENCIES) $(EXTRA_main_mkdtemp_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/mkdtemp$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_mkdtemp_OBJECTS) $(main_mkdtemp_LDADD) $(LIBS) main/multithreaded.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/multithreaded$(EXEEXT): $(main_multithreaded_OBJECTS) $(main_multithreaded_DEPENDENCIES) $(EXTRA_main_multithreaded_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/multithreaded$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_multithreaded_OBJECTS) $(main_multithreaded_LDADD) $(LIBS) main/multithreaded_init.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/multithreaded_init$(EXEEXT): $(main_multithreaded_init_OBJECTS) $(main_multithreaded_init_DEPENDENCIES) $(EXTRA_main_multithreaded_init_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/multithreaded_init$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_multithreaded_init_OBJECTS) $(main_multithreaded_init_LDADD) $(LIBS) main/pack.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/pack$(EXEEXT): $(main_pack_OBJECTS) $(main_pack_DEPENDENCIES) $(EXTRA_main_pack_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/pack$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_pack_OBJECTS) $(main_pack_LDADD) $(LIBS) main/pause_resume.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/pause_resume$(EXEEXT): $(main_pause_resume_OBJECTS) $(main_pause_resume_DEPENDENCIES) $(EXTRA_main_pause_resume_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/pause_resume$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_pause_resume_OBJECTS) $(main_pause_resume_LDADD) $(LIBS) main/regenerate.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/regenerate$(EXEEXT): $(main_regenerate_OBJECTS) $(main_regenerate_DEPENDENCIES) $(EXTRA_main_regenerate_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/regenerate$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_regenerate_OBJECTS) $(main_regenerate_LDADD) $(LIBS) main/regenerate_pipeline.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/regenerate_pipeline$(EXEEXT): $(main_regenerate_pipeline_OBJECTS) $(main_regenerate_pipeline_DEPENDENCIES) $(EXTRA_main_regenerate_pipeline_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/regenerate_pipeline$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_regenerate_pipeline_OBJECTS) $(main_regenerate_pipeline_LDADD) $(LIBS) main/restart.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/restart$(EXEEXT): $(main_restart_OBJECTS) $(main_restart_DEPENDENCIES) $(EXTRA_main_restart_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/restart$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_restart_OBJECTS) $(main_restart_LDADD) $(LIBS) main/starpu_init.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/starpu_init$(EXEEXT): $(main_starpu_init_OBJECTS) $(main_starpu_init_DEPENDENCIES) $(EXTRA_main_starpu_init_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/starpu_init$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_starpu_init_OBJECTS) $(main_starpu_init_LDADD) $(LIBS) main/starpu_task_bundle.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/starpu_task_bundle$(EXEEXT): $(main_starpu_task_bundle_OBJECTS) $(main_starpu_task_bundle_DEPENDENCIES) $(EXTRA_main_starpu_task_bundle_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/starpu_task_bundle$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_starpu_task_bundle_OBJECTS) $(main_starpu_task_bundle_LDADD) $(LIBS) main/starpu_task_wait.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/starpu_task_wait$(EXEEXT): $(main_starpu_task_wait_OBJECTS) $(main_starpu_task_wait_DEPENDENCIES) $(EXTRA_main_starpu_task_wait_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/starpu_task_wait$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_starpu_task_wait_OBJECTS) $(main_starpu_task_wait_LDADD) $(LIBS) main/starpu_task_wait_for_all.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/starpu_task_wait_for_all$(EXEEXT): $(main_starpu_task_wait_for_all_OBJECTS) $(main_starpu_task_wait_for_all_DEPENDENCIES) $(EXTRA_main_starpu_task_wait_for_all_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/starpu_task_wait_for_all$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_starpu_task_wait_for_all_OBJECTS) $(main_starpu_task_wait_for_all_LDADD) $(LIBS) main/starpu_worker_exists-starpu_worker_exists.$(OBJEXT): \ main/$(am__dirstamp) main/$(DEPDIR)/$(am__dirstamp) main/starpu_worker_exists$(EXEEXT): $(main_starpu_worker_exists_OBJECTS) $(main_starpu_worker_exists_DEPENDENCIES) $(EXTRA_main_starpu_worker_exists_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/starpu_worker_exists$(EXEEXT) $(AM_V_CCLD)$(main_starpu_worker_exists_LINK) $(main_starpu_worker_exists_OBJECTS) $(main_starpu_worker_exists_LDADD) $(LIBS) main/static_restartable.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/static_restartable$(EXEEXT): $(main_static_restartable_OBJECTS) $(main_static_restartable_DEPENDENCIES) $(EXTRA_main_static_restartable_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/static_restartable$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_static_restartable_OBJECTS) $(main_static_restartable_LDADD) $(LIBS) main/static_restartable_tag.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/static_restartable_tag$(EXEEXT): $(main_static_restartable_tag_OBJECTS) $(main_static_restartable_tag_DEPENDENCIES) $(EXTRA_main_static_restartable_tag_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/static_restartable_tag$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_static_restartable_tag_OBJECTS) $(main_static_restartable_tag_LDADD) $(LIBS) main/static_restartable_using_initializer.$(OBJEXT): \ main/$(am__dirstamp) main/$(DEPDIR)/$(am__dirstamp) main/static_restartable_using_initializer$(EXEEXT): $(main_static_restartable_using_initializer_OBJECTS) $(main_static_restartable_using_initializer_DEPENDENCIES) $(EXTRA_main_static_restartable_using_initializer_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/static_restartable_using_initializer$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_static_restartable_using_initializer_OBJECTS) $(main_static_restartable_using_initializer_LDADD) $(LIBS) main/subgraph_repeat.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/subgraph_repeat$(EXEEXT): $(main_subgraph_repeat_OBJECTS) $(main_subgraph_repeat_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/subgraph_repeat$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_OBJECTS) $(main_subgraph_repeat_LDADD) $(LIBS) main/subgraph_repeat_regenerate.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/subgraph_repeat_regenerate$(EXEEXT): $(main_subgraph_repeat_regenerate_OBJECTS) $(main_subgraph_repeat_regenerate_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_regenerate_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/subgraph_repeat_regenerate$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_regenerate_OBJECTS) $(main_subgraph_repeat_regenerate_LDADD) $(LIBS) main/subgraph_repeat_regenerate_tag.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/subgraph_repeat_regenerate_tag$(EXEEXT): $(main_subgraph_repeat_regenerate_tag_OBJECTS) $(main_subgraph_repeat_regenerate_tag_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_regenerate_tag_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/subgraph_repeat_regenerate_tag$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_regenerate_tag_OBJECTS) $(main_subgraph_repeat_regenerate_tag_LDADD) $(LIBS) main/subgraph_repeat_regenerate_tag_cycle.$(OBJEXT): \ main/$(am__dirstamp) main/$(DEPDIR)/$(am__dirstamp) main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT): $(main_subgraph_repeat_regenerate_tag_cycle_OBJECTS) $(main_subgraph_repeat_regenerate_tag_cycle_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_regenerate_tag_cycle_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_regenerate_tag_cycle_OBJECTS) $(main_subgraph_repeat_regenerate_tag_cycle_LDADD) $(LIBS) main/subgraph_repeat_tag.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/subgraph_repeat_tag$(EXEEXT): $(main_subgraph_repeat_tag_OBJECTS) $(main_subgraph_repeat_tag_DEPENDENCIES) $(EXTRA_main_subgraph_repeat_tag_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/subgraph_repeat_tag$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_subgraph_repeat_tag_OBJECTS) $(main_subgraph_repeat_tag_LDADD) $(LIBS) main/submit.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/submit$(EXEEXT): $(main_submit_OBJECTS) $(main_submit_DEPENDENCIES) $(EXTRA_main_submit_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/submit$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_submit_OBJECTS) $(main_submit_LDADD) $(LIBS) main/tag_get_task.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/tag_get_task$(EXEEXT): $(main_tag_get_task_OBJECTS) $(main_tag_get_task_DEPENDENCIES) $(EXTRA_main_tag_get_task_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/tag_get_task$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_tag_get_task_OBJECTS) $(main_tag_get_task_LDADD) $(LIBS) main/tag_task_data_deps.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/tag_task_data_deps$(EXEEXT): $(main_tag_task_data_deps_OBJECTS) $(main_tag_task_data_deps_DEPENDENCIES) $(EXTRA_main_tag_task_data_deps_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/tag_task_data_deps$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_tag_task_data_deps_OBJECTS) $(main_tag_task_data_deps_LDADD) $(LIBS) main/tag_wait_api.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/tag_wait_api$(EXEEXT): $(main_tag_wait_api_OBJECTS) $(main_tag_wait_api_DEPENDENCIES) $(EXTRA_main_tag_wait_api_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/tag_wait_api$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_tag_wait_api_OBJECTS) $(main_tag_wait_api_LDADD) $(LIBS) main/task_end_dep.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/task_end_dep$(EXEEXT): $(main_task_end_dep_OBJECTS) $(main_task_end_dep_DEPENDENCIES) $(EXTRA_main_task_end_dep_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/task_end_dep$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_task_end_dep_OBJECTS) $(main_task_end_dep_LDADD) $(LIBS) main/task_wait_api.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/task_wait_api$(EXEEXT): $(main_task_wait_api_OBJECTS) $(main_task_wait_api_DEPENDENCIES) $(EXTRA_main_task_wait_api_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/task_wait_api$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_task_wait_api_OBJECTS) $(main_task_wait_api_LDADD) $(LIBS) main/wait_all_regenerable_tasks.$(OBJEXT): main/$(am__dirstamp) \ main/$(DEPDIR)/$(am__dirstamp) main/wait_all_regenerable_tasks$(EXEEXT): $(main_wait_all_regenerable_tasks_OBJECTS) $(main_wait_all_regenerable_tasks_DEPENDENCIES) $(EXTRA_main_wait_all_regenerable_tasks_DEPENDENCIES) main/$(am__dirstamp) @rm -f main/wait_all_regenerable_tasks$(EXEEXT) $(AM_V_CCLD)$(LINK) $(main_wait_all_regenerable_tasks_OBJECTS) $(main_wait_all_regenerable_tasks_LDADD) $(LIBS) maxfpga/$(am__dirstamp): @$(MKDIR_P) maxfpga @: > maxfpga/$(am__dirstamp) maxfpga/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) maxfpga/$(DEPDIR) @: > maxfpga/$(DEPDIR)/$(am__dirstamp) maxfpga/max_fpga_advanced_static.$(OBJEXT): maxfpga/$(am__dirstamp) \ maxfpga/$(DEPDIR)/$(am__dirstamp) maxfpga/max_fpga_advanced_static$(EXEEXT): $(maxfpga_max_fpga_advanced_static_OBJECTS) $(maxfpga_max_fpga_advanced_static_DEPENDENCIES) $(EXTRA_maxfpga_max_fpga_advanced_static_DEPENDENCIES) maxfpga/$(am__dirstamp) @rm -f maxfpga/max_fpga_advanced_static$(EXEEXT) $(AM_V_CCLD)$(LINK) $(maxfpga_max_fpga_advanced_static_OBJECTS) $(maxfpga_max_fpga_advanced_static_LDADD) $(LIBS) maxfpga/max_fpga_basic_static.$(OBJEXT): maxfpga/$(am__dirstamp) \ maxfpga/$(DEPDIR)/$(am__dirstamp) maxfpga/max_fpga_basic_static$(EXEEXT): $(maxfpga_max_fpga_basic_static_OBJECTS) $(maxfpga_max_fpga_basic_static_DEPENDENCIES) $(EXTRA_maxfpga_max_fpga_basic_static_DEPENDENCIES) maxfpga/$(am__dirstamp) @rm -f maxfpga/max_fpga_basic_static$(EXEEXT) $(AM_V_CCLD)$(LINK) $(maxfpga_max_fpga_basic_static_OBJECTS) $(maxfpga_max_fpga_basic_static_LDADD) $(LIBS) maxfpga/max_fpga_dynamic.$(OBJEXT): maxfpga/$(am__dirstamp) \ maxfpga/$(DEPDIR)/$(am__dirstamp) maxfpga/max_fpga_dynamic$(EXEEXT): $(maxfpga_max_fpga_dynamic_OBJECTS) $(maxfpga_max_fpga_dynamic_DEPENDENCIES) $(EXTRA_maxfpga_max_fpga_dynamic_DEPENDENCIES) maxfpga/$(am__dirstamp) @rm -f maxfpga/max_fpga_dynamic$(EXEEXT) $(AM_V_CCLD)$(LINK) $(maxfpga_max_fpga_dynamic_OBJECTS) $(maxfpga_max_fpga_dynamic_LDADD) $(LIBS) maxfpga/max_fpga_mux.$(OBJEXT): maxfpga/$(am__dirstamp) \ maxfpga/$(DEPDIR)/$(am__dirstamp) maxfpga/max_fpga_mux$(EXEEXT): $(maxfpga_max_fpga_mux_OBJECTS) $(maxfpga_max_fpga_mux_DEPENDENCIES) $(EXTRA_maxfpga_max_fpga_mux_DEPENDENCIES) maxfpga/$(am__dirstamp) @rm -f maxfpga/max_fpga_mux$(EXEEXT) $(AM_V_CCLD)$(LINK) $(maxfpga_max_fpga_mux_OBJECTS) $(maxfpga_max_fpga_mux_LDADD) $(LIBS) microbenchs/$(am__dirstamp): @$(MKDIR_P) microbenchs @: > microbenchs/$(am__dirstamp) microbenchs/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) microbenchs/$(DEPDIR) @: > microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/async_tasks_overhead.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/async_tasks_overhead$(EXEEXT): $(microbenchs_async_tasks_overhead_OBJECTS) $(microbenchs_async_tasks_overhead_DEPENDENCIES) $(EXTRA_microbenchs_async_tasks_overhead_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/async_tasks_overhead$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_async_tasks_overhead_OBJECTS) $(microbenchs_async_tasks_overhead_LDADD) $(LIBS) microbenchs/bandwidth.$(OBJEXT): microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/bandwidth$(EXEEXT): $(microbenchs_bandwidth_OBJECTS) $(microbenchs_bandwidth_DEPENDENCIES) $(EXTRA_microbenchs_bandwidth_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/bandwidth$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_bandwidth_OBJECTS) $(microbenchs_bandwidth_LDADD) $(LIBS) microbenchs/display_structures_size.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/display_structures_size$(EXEEXT): $(microbenchs_display_structures_size_OBJECTS) $(microbenchs_display_structures_size_DEPENDENCIES) $(EXTRA_microbenchs_display_structures_size_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/display_structures_size$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_display_structures_size_OBJECTS) $(microbenchs_display_structures_size_LDADD) $(LIBS) microbenchs/local_pingpong.$(OBJEXT): microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/local_pingpong$(EXEEXT): $(microbenchs_local_pingpong_OBJECTS) $(microbenchs_local_pingpong_DEPENDENCIES) $(EXTRA_microbenchs_local_pingpong_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/local_pingpong$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_local_pingpong_OBJECTS) $(microbenchs_local_pingpong_LDADD) $(LIBS) microbenchs/matrix_as_vector.$(OBJEXT): microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/matrix_as_vector$(EXEEXT): $(microbenchs_matrix_as_vector_OBJECTS) $(microbenchs_matrix_as_vector_DEPENDENCIES) $(EXTRA_microbenchs_matrix_as_vector_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/matrix_as_vector$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_matrix_as_vector_OBJECTS) $(microbenchs_matrix_as_vector_LDADD) $(LIBS) microbenchs/parallel_dependent_homogeneous_tasks_data.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/parallel_dependent_homogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_dependent_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_dependent_homogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_dependent_homogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/parallel_dependent_homogeneous_tasks_data$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_dependent_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_dependent_homogeneous_tasks_data_LDADD) $(LIBS) microbenchs/parallel_independent_heterogeneous_tasks.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/parallel_independent_heterogeneous_tasks$(EXEEXT): $(microbenchs_parallel_independent_heterogeneous_tasks_OBJECTS) $(microbenchs_parallel_independent_heterogeneous_tasks_DEPENDENCIES) $(EXTRA_microbenchs_parallel_independent_heterogeneous_tasks_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/parallel_independent_heterogeneous_tasks$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_independent_heterogeneous_tasks_OBJECTS) $(microbenchs_parallel_independent_heterogeneous_tasks_LDADD) $(LIBS) microbenchs/parallel_independent_heterogeneous_tasks_data.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/parallel_independent_heterogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_independent_heterogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_independent_heterogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_independent_heterogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/parallel_independent_heterogeneous_tasks_data$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_independent_heterogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_independent_heterogeneous_tasks_data_LDADD) $(LIBS) microbenchs/parallel_independent_homogeneous_tasks.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/parallel_independent_homogeneous_tasks$(EXEEXT): $(microbenchs_parallel_independent_homogeneous_tasks_OBJECTS) $(microbenchs_parallel_independent_homogeneous_tasks_DEPENDENCIES) $(EXTRA_microbenchs_parallel_independent_homogeneous_tasks_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/parallel_independent_homogeneous_tasks$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_independent_homogeneous_tasks_OBJECTS) $(microbenchs_parallel_independent_homogeneous_tasks_LDADD) $(LIBS) microbenchs/parallel_independent_homogeneous_tasks_data.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/parallel_independent_homogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_independent_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_independent_homogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_independent_homogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/parallel_independent_homogeneous_tasks_data$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_independent_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_independent_homogeneous_tasks_data_LDADD) $(LIBS) microbenchs/parallel_redux_heterogeneous_tasks_data.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/parallel_redux_heterogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_redux_heterogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_redux_heterogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_redux_heterogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/parallel_redux_heterogeneous_tasks_data$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_redux_heterogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_redux_heterogeneous_tasks_data_LDADD) $(LIBS) microbenchs/parallel_redux_homogeneous_tasks_data.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/parallel_redux_homogeneous_tasks_data$(EXEEXT): $(microbenchs_parallel_redux_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_redux_homogeneous_tasks_data_DEPENDENCIES) $(EXTRA_microbenchs_parallel_redux_homogeneous_tasks_data_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/parallel_redux_homogeneous_tasks_data$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_parallel_redux_homogeneous_tasks_data_OBJECTS) $(microbenchs_parallel_redux_homogeneous_tasks_data_LDADD) $(LIBS) microbenchs/prefetch_data_on_node.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/prefetch_data_on_node$(EXEEXT): $(microbenchs_prefetch_data_on_node_OBJECTS) $(microbenchs_prefetch_data_on_node_DEPENDENCIES) $(EXTRA_microbenchs_prefetch_data_on_node_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/prefetch_data_on_node$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_prefetch_data_on_node_OBJECTS) $(microbenchs_prefetch_data_on_node_LDADD) $(LIBS) microbenchs/redundant_buffer.$(OBJEXT): microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/redundant_buffer$(EXEEXT): $(microbenchs_redundant_buffer_OBJECTS) $(microbenchs_redundant_buffer_DEPENDENCIES) $(EXTRA_microbenchs_redundant_buffer_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/redundant_buffer$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_redundant_buffer_OBJECTS) $(microbenchs_redundant_buffer_LDADD) $(LIBS) microbenchs/sync_tasks_overhead.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/sync_tasks_overhead$(EXEEXT): $(microbenchs_sync_tasks_overhead_OBJECTS) $(microbenchs_sync_tasks_overhead_DEPENDENCIES) $(EXTRA_microbenchs_sync_tasks_overhead_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/sync_tasks_overhead$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_sync_tasks_overhead_OBJECTS) $(microbenchs_sync_tasks_overhead_LDADD) $(LIBS) microbenchs/tasks_overhead.$(OBJEXT): microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/tasks_overhead$(EXEEXT): $(microbenchs_tasks_overhead_OBJECTS) $(microbenchs_tasks_overhead_DEPENDENCIES) $(EXTRA_microbenchs_tasks_overhead_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/tasks_overhead$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_tasks_overhead_OBJECTS) $(microbenchs_tasks_overhead_LDADD) $(LIBS) microbenchs/tasks_size_overhead.$(OBJEXT): \ microbenchs/$(am__dirstamp) \ microbenchs/$(DEPDIR)/$(am__dirstamp) microbenchs/tasks_size_overhead$(EXEEXT): $(microbenchs_tasks_size_overhead_OBJECTS) $(microbenchs_tasks_size_overhead_DEPENDENCIES) $(EXTRA_microbenchs_tasks_size_overhead_DEPENDENCIES) microbenchs/$(am__dirstamp) @rm -f microbenchs/tasks_size_overhead$(EXEEXT) $(AM_V_CCLD)$(LINK) $(microbenchs_tasks_size_overhead_OBJECTS) $(microbenchs_tasks_size_overhead_LDADD) $(LIBS) openmp/$(am__dirstamp): @$(MKDIR_P) openmp @: > openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) openmp/$(DEPDIR) @: > openmp/$(DEPDIR)/$(am__dirstamp) openmp/api_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/api_01$(EXEEXT): $(openmp_api_01_OBJECTS) $(openmp_api_01_DEPENDENCIES) $(EXTRA_openmp_api_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/api_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_api_01_OBJECTS) $(openmp_api_01_LDADD) $(LIBS) openmp/array_slice_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/array_slice_01$(EXEEXT): $(openmp_array_slice_01_OBJECTS) $(openmp_array_slice_01_DEPENDENCIES) $(EXTRA_openmp_array_slice_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/array_slice_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_array_slice_01_OBJECTS) $(openmp_array_slice_01_LDADD) $(LIBS) openmp/cuda_task_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/cuda_task_01$(EXEEXT): $(openmp_cuda_task_01_OBJECTS) $(openmp_cuda_task_01_DEPENDENCIES) $(EXTRA_openmp_cuda_task_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/cuda_task_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_cuda_task_01_OBJECTS) $(openmp_cuda_task_01_LDADD) $(LIBS) openmp/environment.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/environment$(EXEEXT): $(openmp_environment_OBJECTS) $(openmp_environment_DEPENDENCIES) $(EXTRA_openmp_environment_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/environment$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_environment_OBJECTS) $(openmp_environment_LDADD) $(LIBS) openmp/init_exit_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/init_exit_01$(EXEEXT): $(openmp_init_exit_01_OBJECTS) $(openmp_init_exit_01_DEPENDENCIES) $(EXTRA_openmp_init_exit_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/init_exit_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_init_exit_01_OBJECTS) $(openmp_init_exit_01_LDADD) $(LIBS) openmp/init_exit_02.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/init_exit_02$(EXEEXT): $(openmp_init_exit_02_OBJECTS) $(openmp_init_exit_02_DEPENDENCIES) $(EXTRA_openmp_init_exit_02_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/init_exit_02$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_init_exit_02_OBJECTS) $(openmp_init_exit_02_LDADD) $(LIBS) openmp/parallel_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_01$(EXEEXT): $(openmp_parallel_01_OBJECTS) $(openmp_parallel_01_DEPENDENCIES) $(EXTRA_openmp_parallel_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_01_OBJECTS) $(openmp_parallel_01_LDADD) $(LIBS) openmp/parallel_02.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_02$(EXEEXT): $(openmp_parallel_02_OBJECTS) $(openmp_parallel_02_DEPENDENCIES) $(EXTRA_openmp_parallel_02_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_02$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_02_OBJECTS) $(openmp_parallel_02_LDADD) $(LIBS) openmp/parallel_03.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_03$(EXEEXT): $(openmp_parallel_03_OBJECTS) $(openmp_parallel_03_DEPENDENCIES) $(EXTRA_openmp_parallel_03_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_03$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_03_OBJECTS) $(openmp_parallel_03_LDADD) $(LIBS) openmp/parallel_barrier_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_barrier_01$(EXEEXT): $(openmp_parallel_barrier_01_OBJECTS) $(openmp_parallel_barrier_01_DEPENDENCIES) $(EXTRA_openmp_parallel_barrier_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_barrier_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_barrier_01_OBJECTS) $(openmp_parallel_barrier_01_LDADD) $(LIBS) openmp/parallel_critical_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_critical_01$(EXEEXT): $(openmp_parallel_critical_01_OBJECTS) $(openmp_parallel_critical_01_DEPENDENCIES) $(EXTRA_openmp_parallel_critical_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_critical_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_critical_01_OBJECTS) $(openmp_parallel_critical_01_LDADD) $(LIBS) openmp/parallel_critical_inline_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_critical_inline_01$(EXEEXT): $(openmp_parallel_critical_inline_01_OBJECTS) $(openmp_parallel_critical_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_critical_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_critical_inline_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_critical_inline_01_OBJECTS) $(openmp_parallel_critical_inline_01_LDADD) $(LIBS) openmp/parallel_critical_named_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_critical_named_01$(EXEEXT): $(openmp_parallel_critical_named_01_OBJECTS) $(openmp_parallel_critical_named_01_DEPENDENCIES) $(EXTRA_openmp_parallel_critical_named_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_critical_named_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_critical_named_01_OBJECTS) $(openmp_parallel_critical_named_01_LDADD) $(LIBS) openmp/parallel_critical_named_inline_01.$(OBJEXT): \ openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_critical_named_inline_01$(EXEEXT): $(openmp_parallel_critical_named_inline_01_OBJECTS) $(openmp_parallel_critical_named_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_critical_named_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_critical_named_inline_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_critical_named_inline_01_OBJECTS) $(openmp_parallel_critical_named_inline_01_LDADD) $(LIBS) openmp/parallel_for_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_for_01$(EXEEXT): $(openmp_parallel_for_01_OBJECTS) $(openmp_parallel_for_01_DEPENDENCIES) $(EXTRA_openmp_parallel_for_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_for_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_for_01_OBJECTS) $(openmp_parallel_for_01_LDADD) $(LIBS) openmp/parallel_for_02.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_for_02$(EXEEXT): $(openmp_parallel_for_02_OBJECTS) $(openmp_parallel_for_02_DEPENDENCIES) $(EXTRA_openmp_parallel_for_02_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_for_02$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_for_02_OBJECTS) $(openmp_parallel_for_02_LDADD) $(LIBS) openmp/parallel_for_ordered_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_for_ordered_01$(EXEEXT): $(openmp_parallel_for_ordered_01_OBJECTS) $(openmp_parallel_for_ordered_01_DEPENDENCIES) $(EXTRA_openmp_parallel_for_ordered_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_for_ordered_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_for_ordered_01_OBJECTS) $(openmp_parallel_for_ordered_01_LDADD) $(LIBS) openmp/parallel_master_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_master_01$(EXEEXT): $(openmp_parallel_master_01_OBJECTS) $(openmp_parallel_master_01_DEPENDENCIES) $(EXTRA_openmp_parallel_master_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_master_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_master_01_OBJECTS) $(openmp_parallel_master_01_LDADD) $(LIBS) openmp/parallel_master_inline_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_master_inline_01$(EXEEXT): $(openmp_parallel_master_inline_01_OBJECTS) $(openmp_parallel_master_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_master_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_master_inline_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_master_inline_01_OBJECTS) $(openmp_parallel_master_inline_01_LDADD) $(LIBS) openmp/parallel_nested_lock_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_nested_lock_01$(EXEEXT): $(openmp_parallel_nested_lock_01_OBJECTS) $(openmp_parallel_nested_lock_01_DEPENDENCIES) $(EXTRA_openmp_parallel_nested_lock_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_nested_lock_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_nested_lock_01_OBJECTS) $(openmp_parallel_nested_lock_01_LDADD) $(LIBS) openmp/parallel_sections_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_sections_01$(EXEEXT): $(openmp_parallel_sections_01_OBJECTS) $(openmp_parallel_sections_01_DEPENDENCIES) $(EXTRA_openmp_parallel_sections_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_sections_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_sections_01_OBJECTS) $(openmp_parallel_sections_01_LDADD) $(LIBS) openmp/parallel_sections_combined_01.$(OBJEXT): \ openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_sections_combined_01$(EXEEXT): $(openmp_parallel_sections_combined_01_OBJECTS) $(openmp_parallel_sections_combined_01_DEPENDENCIES) $(EXTRA_openmp_parallel_sections_combined_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_sections_combined_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_sections_combined_01_OBJECTS) $(openmp_parallel_sections_combined_01_LDADD) $(LIBS) openmp/parallel_simple_lock_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_simple_lock_01$(EXEEXT): $(openmp_parallel_simple_lock_01_OBJECTS) $(openmp_parallel_simple_lock_01_DEPENDENCIES) $(EXTRA_openmp_parallel_simple_lock_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_simple_lock_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_simple_lock_01_OBJECTS) $(openmp_parallel_simple_lock_01_LDADD) $(LIBS) openmp/parallel_single_copyprivate_01.$(OBJEXT): \ openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_single_copyprivate_01$(EXEEXT): $(openmp_parallel_single_copyprivate_01_OBJECTS) $(openmp_parallel_single_copyprivate_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_copyprivate_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_single_copyprivate_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_copyprivate_01_OBJECTS) $(openmp_parallel_single_copyprivate_01_LDADD) $(LIBS) openmp/parallel_single_copyprivate_inline_01.$(OBJEXT): \ openmp/$(am__dirstamp) openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_single_copyprivate_inline_01$(EXEEXT): $(openmp_parallel_single_copyprivate_inline_01_OBJECTS) $(openmp_parallel_single_copyprivate_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_copyprivate_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_single_copyprivate_inline_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_copyprivate_inline_01_OBJECTS) $(openmp_parallel_single_copyprivate_inline_01_LDADD) $(LIBS) openmp/parallel_single_inline_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_single_inline_01$(EXEEXT): $(openmp_parallel_single_inline_01_OBJECTS) $(openmp_parallel_single_inline_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_inline_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_single_inline_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_inline_01_OBJECTS) $(openmp_parallel_single_inline_01_LDADD) $(LIBS) openmp/parallel_single_nowait_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_single_nowait_01$(EXEEXT): $(openmp_parallel_single_nowait_01_OBJECTS) $(openmp_parallel_single_nowait_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_nowait_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_single_nowait_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_nowait_01_OBJECTS) $(openmp_parallel_single_nowait_01_LDADD) $(LIBS) openmp/parallel_single_wait_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/parallel_single_wait_01$(EXEEXT): $(openmp_parallel_single_wait_01_OBJECTS) $(openmp_parallel_single_wait_01_DEPENDENCIES) $(EXTRA_openmp_parallel_single_wait_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/parallel_single_wait_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_parallel_single_wait_01_OBJECTS) $(openmp_parallel_single_wait_01_LDADD) $(LIBS) openmp/task_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/task_01$(EXEEXT): $(openmp_task_01_OBJECTS) $(openmp_task_01_DEPENDENCIES) $(EXTRA_openmp_task_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/task_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_task_01_OBJECTS) $(openmp_task_01_LDADD) $(LIBS) openmp/task_02.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/task_02$(EXEEXT): $(openmp_task_02_OBJECTS) $(openmp_task_02_DEPENDENCIES) $(EXTRA_openmp_task_02_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/task_02$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_task_02_OBJECTS) $(openmp_task_02_LDADD) $(LIBS) openmp/task_03.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/task_03$(EXEEXT): $(openmp_task_03_OBJECTS) $(openmp_task_03_DEPENDENCIES) $(EXTRA_openmp_task_03_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/task_03$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_task_03_OBJECTS) $(openmp_task_03_LDADD) $(LIBS) openmp/taskgroup_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/taskgroup_01$(EXEEXT): $(openmp_taskgroup_01_OBJECTS) $(openmp_taskgroup_01_DEPENDENCIES) $(EXTRA_openmp_taskgroup_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/taskgroup_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_taskgroup_01_OBJECTS) $(openmp_taskgroup_01_LDADD) $(LIBS) openmp/taskgroup_02.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/taskgroup_02$(EXEEXT): $(openmp_taskgroup_02_OBJECTS) $(openmp_taskgroup_02_DEPENDENCIES) $(EXTRA_openmp_taskgroup_02_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/taskgroup_02$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_taskgroup_02_OBJECTS) $(openmp_taskgroup_02_LDADD) $(LIBS) openmp/taskloop.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/taskloop$(EXEEXT): $(openmp_taskloop_OBJECTS) $(openmp_taskloop_DEPENDENCIES) $(EXTRA_openmp_taskloop_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/taskloop$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_taskloop_OBJECTS) $(openmp_taskloop_LDADD) $(LIBS) openmp/taskwait_01.$(OBJEXT): openmp/$(am__dirstamp) \ openmp/$(DEPDIR)/$(am__dirstamp) openmp/taskwait_01$(EXEEXT): $(openmp_taskwait_01_OBJECTS) $(openmp_taskwait_01_DEPENDENCIES) $(EXTRA_openmp_taskwait_01_DEPENDENCIES) openmp/$(am__dirstamp) @rm -f openmp/taskwait_01$(EXEEXT) $(AM_V_CCLD)$(LINK) $(openmp_taskwait_01_OBJECTS) $(openmp_taskwait_01_LDADD) $(LIBS) overlap/$(am__dirstamp): @$(MKDIR_P) overlap @: > overlap/$(am__dirstamp) overlap/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) overlap/$(DEPDIR) @: > overlap/$(DEPDIR)/$(am__dirstamp) overlap/gpu_concurrency.$(OBJEXT): overlap/$(am__dirstamp) \ overlap/$(DEPDIR)/$(am__dirstamp) overlap/long_kernel.$(OBJEXT): overlap/$(am__dirstamp) \ overlap/$(DEPDIR)/$(am__dirstamp) overlap/gpu_concurrency$(EXEEXT): $(overlap_gpu_concurrency_OBJECTS) $(overlap_gpu_concurrency_DEPENDENCIES) $(EXTRA_overlap_gpu_concurrency_DEPENDENCIES) overlap/$(am__dirstamp) @rm -f overlap/gpu_concurrency$(EXEEXT) $(AM_V_CCLD)$(LINK) $(overlap_gpu_concurrency_OBJECTS) $(overlap_gpu_concurrency_LDADD) $(LIBS) overlap/overlap.$(OBJEXT): overlap/$(am__dirstamp) \ overlap/$(DEPDIR)/$(am__dirstamp) overlap/overlap$(EXEEXT): $(overlap_overlap_OBJECTS) $(overlap_overlap_DEPENDENCIES) $(EXTRA_overlap_overlap_DEPENDENCIES) overlap/$(am__dirstamp) @rm -f overlap/overlap$(EXEEXT) $(AM_V_CCLD)$(LINK) $(overlap_overlap_OBJECTS) $(overlap_overlap_LDADD) $(LIBS) parallel_tasks/$(am__dirstamp): @$(MKDIR_P) parallel_tasks @: > parallel_tasks/$(am__dirstamp) parallel_tasks/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) parallel_tasks/$(DEPDIR) @: > parallel_tasks/$(DEPDIR)/$(am__dirstamp) parallel_tasks/combined_worker_assign_workerid.$(OBJEXT): \ parallel_tasks/$(am__dirstamp) \ parallel_tasks/$(DEPDIR)/$(am__dirstamp) parallel_tasks/combined_worker_assign_workerid$(EXEEXT): $(parallel_tasks_combined_worker_assign_workerid_OBJECTS) $(parallel_tasks_combined_worker_assign_workerid_DEPENDENCIES) $(EXTRA_parallel_tasks_combined_worker_assign_workerid_DEPENDENCIES) parallel_tasks/$(am__dirstamp) @rm -f parallel_tasks/combined_worker_assign_workerid$(EXEEXT) $(AM_V_CCLD)$(LINK) $(parallel_tasks_combined_worker_assign_workerid_OBJECTS) $(parallel_tasks_combined_worker_assign_workerid_LDADD) $(LIBS) parallel_tasks/cuda_only.$(OBJEXT): parallel_tasks/$(am__dirstamp) \ parallel_tasks/$(DEPDIR)/$(am__dirstamp) parallel_tasks/cuda_only$(EXEEXT): $(parallel_tasks_cuda_only_OBJECTS) $(parallel_tasks_cuda_only_DEPENDENCIES) $(EXTRA_parallel_tasks_cuda_only_DEPENDENCIES) parallel_tasks/$(am__dirstamp) @rm -f parallel_tasks/cuda_only$(EXEEXT) $(AM_V_CCLD)$(LINK) $(parallel_tasks_cuda_only_OBJECTS) $(parallel_tasks_cuda_only_LDADD) $(LIBS) parallel_tasks/explicit_combined_worker.$(OBJEXT): \ parallel_tasks/$(am__dirstamp) \ parallel_tasks/$(DEPDIR)/$(am__dirstamp) parallel_tasks/explicit_combined_worker$(EXEEXT): $(parallel_tasks_explicit_combined_worker_OBJECTS) $(parallel_tasks_explicit_combined_worker_DEPENDENCIES) $(EXTRA_parallel_tasks_explicit_combined_worker_DEPENDENCIES) parallel_tasks/$(am__dirstamp) @rm -f parallel_tasks/explicit_combined_worker$(EXEEXT) $(AM_V_CCLD)$(LINK) $(parallel_tasks_explicit_combined_worker_OBJECTS) $(parallel_tasks_explicit_combined_worker_LDADD) $(LIBS) parallel_tasks/parallel_kernels.$(OBJEXT): \ parallel_tasks/$(am__dirstamp) \ parallel_tasks/$(DEPDIR)/$(am__dirstamp) parallel_tasks/parallel_kernels$(EXEEXT): $(parallel_tasks_parallel_kernels_OBJECTS) $(parallel_tasks_parallel_kernels_DEPENDENCIES) $(EXTRA_parallel_tasks_parallel_kernels_DEPENDENCIES) parallel_tasks/$(am__dirstamp) @rm -f parallel_tasks/parallel_kernels$(EXEEXT) $(AM_V_CCLD)$(LINK) $(parallel_tasks_parallel_kernels_OBJECTS) $(parallel_tasks_parallel_kernels_LDADD) $(LIBS) parallel_tasks/parallel_kernels_spmd.$(OBJEXT): \ parallel_tasks/$(am__dirstamp) \ parallel_tasks/$(DEPDIR)/$(am__dirstamp) parallel_tasks/parallel_kernels_spmd$(EXEEXT): $(parallel_tasks_parallel_kernels_spmd_OBJECTS) $(parallel_tasks_parallel_kernels_spmd_DEPENDENCIES) $(EXTRA_parallel_tasks_parallel_kernels_spmd_DEPENDENCIES) parallel_tasks/$(am__dirstamp) @rm -f parallel_tasks/parallel_kernels_spmd$(EXEEXT) $(AM_V_CCLD)$(LINK) $(parallel_tasks_parallel_kernels_spmd_OBJECTS) $(parallel_tasks_parallel_kernels_spmd_LDADD) $(LIBS) parallel_tasks/parallel_kernels_trivial.$(OBJEXT): \ parallel_tasks/$(am__dirstamp) \ parallel_tasks/$(DEPDIR)/$(am__dirstamp) parallel_tasks/parallel_kernels_trivial$(EXEEXT): $(parallel_tasks_parallel_kernels_trivial_OBJECTS) $(parallel_tasks_parallel_kernels_trivial_DEPENDENCIES) $(EXTRA_parallel_tasks_parallel_kernels_trivial_DEPENDENCIES) parallel_tasks/$(am__dirstamp) @rm -f parallel_tasks/parallel_kernels_trivial$(EXEEXT) $(AM_V_CCLD)$(LINK) $(parallel_tasks_parallel_kernels_trivial_OBJECTS) $(parallel_tasks_parallel_kernels_trivial_LDADD) $(LIBS) parallel_tasks/spmd_peager.$(OBJEXT): parallel_tasks/$(am__dirstamp) \ parallel_tasks/$(DEPDIR)/$(am__dirstamp) parallel_tasks/spmd_peager$(EXEEXT): $(parallel_tasks_spmd_peager_OBJECTS) $(parallel_tasks_spmd_peager_DEPENDENCIES) $(EXTRA_parallel_tasks_spmd_peager_DEPENDENCIES) parallel_tasks/$(am__dirstamp) @rm -f parallel_tasks/spmd_peager$(EXEEXT) $(AM_V_CCLD)$(LINK) $(parallel_tasks_spmd_peager_OBJECTS) $(parallel_tasks_spmd_peager_LDADD) $(LIBS) perfmodels/$(am__dirstamp): @$(MKDIR_P) perfmodels @: > perfmodels/$(am__dirstamp) perfmodels/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) perfmodels/$(DEPDIR) @: > perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/feed.$(OBJEXT): perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/feed$(EXEEXT): $(perfmodels_feed_OBJECTS) $(perfmodels_feed_DEPENDENCIES) $(EXTRA_perfmodels_feed_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/feed$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_feed_OBJECTS) $(perfmodels_feed_LDADD) $(LIBS) perfmodels/memory.$(OBJEXT): perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/memory$(EXEEXT): $(perfmodels_memory_OBJECTS) $(perfmodels_memory_DEPENDENCIES) $(EXTRA_perfmodels_memory_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/memory$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_memory_OBJECTS) $(perfmodels_memory_LDADD) $(LIBS) perfmodels/non_linear_regression_based.$(OBJEXT): \ perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/opencl_memset.$(OBJEXT): perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/non_linear_regression_based$(EXEEXT): $(perfmodels_non_linear_regression_based_OBJECTS) $(perfmodels_non_linear_regression_based_DEPENDENCIES) $(EXTRA_perfmodels_non_linear_regression_based_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/non_linear_regression_based$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_non_linear_regression_based_OBJECTS) $(perfmodels_non_linear_regression_based_LDADD) $(LIBS) perfmodels/path.$(OBJEXT): perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/path$(EXEEXT): $(perfmodels_path_OBJECTS) $(perfmodels_path_DEPENDENCIES) $(EXTRA_perfmodels_path_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/path$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_path_OBJECTS) $(perfmodels_path_LDADD) $(LIBS) perfmodels/regression_based_check.$(OBJEXT): \ perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/regression_based_check$(EXEEXT): $(perfmodels_regression_based_check_OBJECTS) $(perfmodels_regression_based_check_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_check_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/regression_based_check$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_check_OBJECTS) $(perfmodels_regression_based_check_LDADD) $(LIBS) perfmodels/regression_based_energy.$(OBJEXT): \ perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/regression_based_energy$(EXEEXT): $(perfmodels_regression_based_energy_OBJECTS) $(perfmodels_regression_based_energy_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_energy_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/regression_based_energy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_energy_OBJECTS) $(perfmodels_regression_based_energy_LDADD) $(LIBS) perfmodels/regression_based_gpu.$(OBJEXT): perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/regression_based_gpu$(EXEEXT): $(perfmodels_regression_based_gpu_OBJECTS) $(perfmodels_regression_based_gpu_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_gpu_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/regression_based_gpu$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_gpu_OBJECTS) $(perfmodels_regression_based_gpu_LDADD) $(LIBS) perfmodels/regression_based_memset.$(OBJEXT): \ perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/regression_based_memset$(EXEEXT): $(perfmodels_regression_based_memset_OBJECTS) $(perfmodels_regression_based_memset_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_memset_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/regression_based_memset$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_memset_OBJECTS) $(perfmodels_regression_based_memset_LDADD) $(LIBS) perfmodels/regression_based_multiimpl.$(OBJEXT): \ perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/regression_based_multiimpl$(EXEEXT): $(perfmodels_regression_based_multiimpl_OBJECTS) $(perfmodels_regression_based_multiimpl_DEPENDENCIES) $(EXTRA_perfmodels_regression_based_multiimpl_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/regression_based_multiimpl$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_regression_based_multiimpl_OBJECTS) $(perfmodels_regression_based_multiimpl_LDADD) $(LIBS) perfmodels/user_base.$(OBJEXT): perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/user_base$(EXEEXT): $(perfmodels_user_base_OBJECTS) $(perfmodels_user_base_DEPENDENCIES) $(EXTRA_perfmodels_user_base_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/user_base$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_user_base_OBJECTS) $(perfmodels_user_base_LDADD) $(LIBS) perfmodels/valid_model.$(OBJEXT): perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/valid_model$(EXEEXT): $(perfmodels_valid_model_OBJECTS) $(perfmodels_valid_model_DEPENDENCIES) $(EXTRA_perfmodels_valid_model_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/valid_model$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_valid_model_OBJECTS) $(perfmodels_valid_model_LDADD) $(LIBS) perfmodels/value_nan.$(OBJEXT): perfmodels/$(am__dirstamp) \ perfmodels/$(DEPDIR)/$(am__dirstamp) perfmodels/value_nan$(EXEEXT): $(perfmodels_value_nan_OBJECTS) $(perfmodels_value_nan_DEPENDENCIES) $(EXTRA_perfmodels_value_nan_DEPENDENCIES) perfmodels/$(am__dirstamp) @rm -f perfmodels/value_nan$(EXEEXT) $(AM_V_CCLD)$(LINK) $(perfmodels_value_nan_OBJECTS) $(perfmodels_value_nan_LDADD) $(LIBS) sched_ctx/$(am__dirstamp): @$(MKDIR_P) sched_ctx @: > sched_ctx/$(am__dirstamp) sched_ctx/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) sched_ctx/$(DEPDIR) @: > sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx_hierarchy.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx_hierarchy$(EXEEXT): $(sched_ctx_sched_ctx_hierarchy_OBJECTS) $(sched_ctx_sched_ctx_hierarchy_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_hierarchy_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/sched_ctx_hierarchy$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_hierarchy_OBJECTS) $(sched_ctx_sched_ctx_hierarchy_LDADD) $(LIBS) sched_ctx/sched_ctx_list.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx_list$(EXEEXT): $(sched_ctx_sched_ctx_list_OBJECTS) $(sched_ctx_sched_ctx_list_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_list_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/sched_ctx_list$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_list_OBJECTS) $(sched_ctx_sched_ctx_list_LDADD) $(LIBS) sched_ctx/sched_ctx_policy_data.$(OBJEXT): sched_ctx/$(am__dirstamp) \ sched_ctx/$(DEPDIR)/$(am__dirstamp) sched_ctx/sched_ctx_policy_data$(EXEEXT): $(sched_ctx_sched_ctx_policy_data_OBJECTS) $(sched_ctx_sched_ctx_policy_data_DEPENDENCIES) $(EXTRA_sched_ctx_sched_ctx_policy_data_DEPENDENCIES) sched_ctx/$(am__dirstamp) @rm -f sched_ctx/sched_ctx_policy_data$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_ctx_sched_ctx_policy_data_OBJECTS) $(sched_ctx_sched_ctx_policy_data_LDADD) $(LIBS) sched_policies/$(am__dirstamp): @$(MKDIR_P) sched_policies @: > sched_policies/$(am__dirstamp) sched_policies/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) sched_policies/$(DEPDIR) @: > sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/data_locality.$(OBJEXT): \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/data_locality$(EXEEXT): $(sched_policies_data_locality_OBJECTS) $(sched_policies_data_locality_DEPENDENCIES) $(EXTRA_sched_policies_data_locality_DEPENDENCIES) sched_policies/$(am__dirstamp) @rm -f sched_policies/data_locality$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_policies_data_locality_OBJECTS) $(sched_policies_data_locality_LDADD) $(LIBS) sched_policies/execute_all_tasks-execute_all_tasks.$(OBJEXT): \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/execute_all_tasks$(EXEEXT): $(sched_policies_execute_all_tasks_OBJECTS) $(sched_policies_execute_all_tasks_DEPENDENCIES) $(EXTRA_sched_policies_execute_all_tasks_DEPENDENCIES) sched_policies/$(am__dirstamp) @rm -f sched_policies/execute_all_tasks$(EXEEXT) $(AM_V_CCLD)$(sched_policies_execute_all_tasks_LINK) $(sched_policies_execute_all_tasks_OBJECTS) $(sched_policies_execute_all_tasks_LDADD) $(LIBS) sched_policies/prio.$(OBJEXT): sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/prio$(EXEEXT): $(sched_policies_prio_OBJECTS) $(sched_policies_prio_DEPENDENCIES) $(EXTRA_sched_policies_prio_DEPENDENCIES) sched_policies/$(am__dirstamp) @rm -f sched_policies/prio$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_policies_prio_OBJECTS) $(sched_policies_prio_LDADD) $(LIBS) sched_policies/simple_cpu_gpu_sched.$(OBJEXT): \ sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/simple_cpu_gpu_sched$(EXEEXT): $(sched_policies_simple_cpu_gpu_sched_OBJECTS) $(sched_policies_simple_cpu_gpu_sched_DEPENDENCIES) $(EXTRA_sched_policies_simple_cpu_gpu_sched_DEPENDENCIES) sched_policies/$(am__dirstamp) @rm -f sched_policies/simple_cpu_gpu_sched$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_policies_simple_cpu_gpu_sched_OBJECTS) $(sched_policies_simple_cpu_gpu_sched_LDADD) $(LIBS) sched_policies/simple_deps.$(OBJEXT): sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/simple_deps$(EXEEXT): $(sched_policies_simple_deps_OBJECTS) $(sched_policies_simple_deps_DEPENDENCIES) $(EXTRA_sched_policies_simple_deps_DEPENDENCIES) sched_policies/$(am__dirstamp) @rm -f sched_policies/simple_deps$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_policies_simple_deps_OBJECTS) $(sched_policies_simple_deps_LDADD) $(LIBS) sched_policies/workerids.$(OBJEXT): sched_policies/$(am__dirstamp) \ sched_policies/$(DEPDIR)/$(am__dirstamp) sched_policies/workerids$(EXEEXT): $(sched_policies_workerids_OBJECTS) $(sched_policies_workerids_DEPENDENCIES) $(EXTRA_sched_policies_workerids_DEPENDENCIES) sched_policies/$(am__dirstamp) @rm -f sched_policies/workerids$(EXEEXT) $(AM_V_CCLD)$(LINK) $(sched_policies_workerids_OBJECTS) $(sched_policies_workerids_LDADD) $(LIBS) install-examplebinSCRIPTS: $(examplebin_SCRIPTS) @$(NORMAL_INSTALL) @list='$(examplebin_SCRIPTS)'; test -n "$(examplebindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(examplebindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(examplebindir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n' \ -e 'h;s|.*|.|' \ -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) { files[d] = files[d] " " $$1; \ if (++n[d] == $(am__install_max)) { \ print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ else { print "f", d "/" $$4, $$1 } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(examplebindir)$$dir'"; \ $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(examplebindir)$$dir" || exit $$?; \ } \ ; done uninstall-examplebinSCRIPTS: @$(NORMAL_UNINSTALL) @list='$(examplebin_SCRIPTS)'; test -n "$(examplebindir)" || exit 0; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 's,.*/,,;$(transform)'`; \ dir='$(DESTDIR)$(examplebindir)'; $(am__uninstall_files_from_dir) mostlyclean-compile: -rm -f *.$(OBJEXT) -rm -f datawizard/*.$(OBJEXT) -rm -f datawizard/interfaces/*.$(OBJEXT) -rm -f datawizard/interfaces/bcsr/*.$(OBJEXT) -rm -f datawizard/interfaces/block/*.$(OBJEXT) -rm -f datawizard/interfaces/coo/*.$(OBJEXT) -rm -f datawizard/interfaces/csr/*.$(OBJEXT) -rm -f datawizard/interfaces/matrix/*.$(OBJEXT) -rm -f datawizard/interfaces/multiformat/*.$(OBJEXT) -rm -f datawizard/interfaces/multiformat/advanced/*.$(OBJEXT) -rm -f datawizard/interfaces/ndim/*.$(OBJEXT) -rm -f datawizard/interfaces/tensor/*.$(OBJEXT) -rm -f datawizard/interfaces/variable/*.$(OBJEXT) -rm -f datawizard/interfaces/vector/*.$(OBJEXT) -rm -f datawizard/interfaces/void/*.$(OBJEXT) -rm -f disk/*.$(OBJEXT) -rm -f energy/*.$(OBJEXT) -rm -f errorcheck/*.$(OBJEXT) -rm -f fault-tolerance/*.$(OBJEXT) -rm -f fortran90/*.$(OBJEXT) -rm -f helper/*.$(OBJEXT) -rm -f main/*.$(OBJEXT) -rm -f main/driver_api/*.$(OBJEXT) -rm -f maxfpga/*.$(OBJEXT) -rm -f microbenchs/*.$(OBJEXT) -rm -f openmp/*.$(OBJEXT) -rm -f overlap/*.$(OBJEXT) -rm -f parallel_tasks/*.$(OBJEXT) -rm -f perfmodels/*.$(OBJEXT) -rm -f sched_ctx/*.$(OBJEXT) -rm -f sched_policies/*.$(OBJEXT) -rm -f variable/*.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_cb.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_cb_insert.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_release.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_release2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_release_to.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/acquire_try.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/allocate.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/allocate_many_numa_nodes.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/bcsr.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/cache.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/commute.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/commute2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/copy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/critical_section_with_void_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_deinitialize.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_implicit_deps.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_invalidation.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/data_register-data_register.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/deinitialize_pending_requests.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/deps.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/dining_philosophers.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/double_parameter.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/dsm_stress.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/gpu_ptr_register.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/gpu_register.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/handle_to_pointer.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/in_place_partition.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_init.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux_lazy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux_partition.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux_v2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/increment_redux_with_args.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/invalidate_pending_requests.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/lazy_allocation.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/locality.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/manual_reduction.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/mpi_like.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/mpi_like_async.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/no_unregister.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/noreclaim.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/nowhere.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/numa_overflow.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partition_dep.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partition_init.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partition_lazy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partition_wontuse.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partitioned_acquire.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/partitioned_initialization.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/readers_and_writers.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/readonly.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/reclaim.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/redux_acquire.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/scal.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/scratch.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/scratch_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/scratch_reuse.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/simgrid-locality.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/specific_node.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/specific_node_same.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_and_notify_data.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_and_notify_data_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_and_notify_data_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_with_data_with_mem.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/task_with_multiple_time_the_same_handle.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/temporary_partition.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/temporary_partition_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/temporary_partition_read.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/test_arbiter.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/unpartition.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/user_interaction_implicit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/variable_parameters.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/variable_size.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/write_only_tmp_buffer.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/wt_broadcast.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/$(DEPDIR)/wt_host.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/generic.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_cuda_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_data_release.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_handle_conversion.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_worker.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/multiformat/advanced/$(DEPDIR)/same_handle.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_compute.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_copy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_copy_to_disk.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_copy_unpack.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/disk_pack.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@disk/$(DEPDIR)/mem_reclaim.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@energy/$(DEPDIR)/energy_efficiency.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@errorcheck/$(DEPDIR)/invalid_blocking_calls.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@errorcheck/$(DEPDIR)/invalid_tasks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@errorcheck/$(DEPDIR)/starpu_init_noworker.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@errorcheck/$(DEPDIR)/workers_cpuid.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@fault-tolerance/$(DEPDIR)/retry.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/cublasLt_init.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/cublas_init.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/cusparse_init.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/execute_on_all.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/hipblas_init.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/pinned_memory.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/starpu_create_sync_task.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/starpu_data_cpy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@helper/$(DEPDIR)/starpu_data_dup_ro.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/bind.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/callback.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/codelet_null_callback.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/const_codelet.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/deadlock.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/declare_deps_after_submission.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/declare_deps_after_submission_synchronous.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/declare_deps_in_callback.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/deploop.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/deprecated_func-deprecated_func.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/display_binding.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/empty_task.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/empty_task_chain.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/empty_task_sync_point.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/empty_task_sync_point_tasks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/execute_on_a_specific_worker.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/execute_schedule.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/get_children_tasks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/get_current_task.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/hwloc_cpuset.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_array.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_dyn_handles.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_many.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_nullcodelet.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_pack.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_value.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/insert_task_where.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/job.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/mkdtemp.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/multithreaded.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/multithreaded_init.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/pack.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/pause_resume.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/regenerate.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/regenerate_pipeline.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/restart.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_init.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_task_bundle.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_task_wait.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_task_wait_for_all.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/static_restartable.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/static_restartable_tag.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/static_restartable_using_initializer.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat_regenerate.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat_regenerate_tag.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat_regenerate_tag_cycle.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/subgraph_repeat_tag.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/submit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/tag_get_task.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/tag_task_data_deps.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/tag_wait_api.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/task_end_dep.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/task_wait_api.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/$(DEPDIR)/wait_all_regenerable_tasks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/driver_api/$(DEPDIR)/init_run_deinit.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@main/driver_api/$(DEPDIR)/run_driver.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@maxfpga/$(DEPDIR)/max_fpga_advanced_static.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@maxfpga/$(DEPDIR)/max_fpga_basic_static.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@maxfpga/$(DEPDIR)/max_fpga_dynamic.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@maxfpga/$(DEPDIR)/max_fpga_mux.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/async_tasks_overhead.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/bandwidth.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/display_structures_size.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/local_pingpong.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/matrix_as_vector.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_dependent_homogeneous_tasks_data.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks_data.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks_data.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_redux_heterogeneous_tasks_data.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/parallel_redux_homogeneous_tasks_data.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/prefetch_data_on_node.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/redundant_buffer.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/sync_tasks_overhead.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/tasks_overhead.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@microbenchs/$(DEPDIR)/tasks_size_overhead.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/api_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/array_slice_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/cuda_task_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/environment.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/init_exit_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/init_exit_02.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_02.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_03.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_barrier_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_critical_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_critical_inline_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_critical_named_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_critical_named_inline_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_for_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_for_02.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_for_ordered_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_master_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_master_inline_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_nested_lock_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_sections_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_sections_combined_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_simple_lock_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_copyprivate_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_copyprivate_inline_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_inline_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_nowait_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/parallel_single_wait_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/task_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/task_02.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/task_03.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/taskgroup_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/taskgroup_02.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/taskloop.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@openmp/$(DEPDIR)/taskwait_01.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@overlap/$(DEPDIR)/gpu_concurrency.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@overlap/$(DEPDIR)/overlap.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/combined_worker_assign_workerid.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/cuda_only.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/explicit_combined_worker.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/parallel_kernels.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/parallel_kernels_spmd.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/parallel_kernels_trivial.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@parallel_tasks/$(DEPDIR)/spmd_peager.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/feed.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/memory.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/non_linear_regression_based.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/opencl_memset.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/path.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_check.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_energy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_gpu.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_memset.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/regression_based_multiimpl.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/user_base.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/valid_model.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@perfmodels/$(DEPDIR)/value_nan.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_hierarchy.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_list.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_ctx/$(DEPDIR)/sched_ctx_policy_data.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/data_locality.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/prio.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/simple_cpu_gpu_sched.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/simple_deps.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@sched_policies/$(DEPDIR)/workerids.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@variable/$(DEPDIR)/increment.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@variable/$(DEPDIR)/increment_opencl.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< datawizard/data_register-data_register.o: datawizard/data_register.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_data_register_CFLAGS) $(CFLAGS) -MT datawizard/data_register-data_register.o -MD -MP -MF datawizard/$(DEPDIR)/data_register-data_register.Tpo -c -o datawizard/data_register-data_register.o `test -f 'datawizard/data_register.c' || echo '$(srcdir)/'`datawizard/data_register.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/$(DEPDIR)/data_register-data_register.Tpo datawizard/$(DEPDIR)/data_register-data_register.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/data_register.c' object='datawizard/data_register-data_register.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_data_register_CFLAGS) $(CFLAGS) -c -o datawizard/data_register-data_register.o `test -f 'datawizard/data_register.c' || echo '$(srcdir)/'`datawizard/data_register.c datawizard/data_register-data_register.obj: datawizard/data_register.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_data_register_CFLAGS) $(CFLAGS) -MT datawizard/data_register-data_register.obj -MD -MP -MF datawizard/$(DEPDIR)/data_register-data_register.Tpo -c -o datawizard/data_register-data_register.obj `if test -f 'datawizard/data_register.c'; then $(CYGPATH_W) 'datawizard/data_register.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/data_register.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/$(DEPDIR)/data_register-data_register.Tpo datawizard/$(DEPDIR)/data_register-data_register.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/data_register.c' object='datawizard/data_register-data_register.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_data_register_CFLAGS) $(CFLAGS) -c -o datawizard/data_register-data_register.obj `if test -f 'datawizard/data_register.c'; then $(CYGPATH_W) 'datawizard/data_register.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/data_register.c'; fi` datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Tpo -c -o datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Tpo -c -o datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr_bcsr_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o: datawizard/interfaces/bcsr/bcsr_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o -MD -MP -MF datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Tpo -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o `test -f 'datawizard/interfaces/bcsr/bcsr_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/bcsr/bcsr_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Tpo datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/bcsr/bcsr_interface.c' object='datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.o `test -f 'datawizard/interfaces/bcsr/bcsr_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/bcsr/bcsr_interface.c datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj: datawizard/interfaces/bcsr/bcsr_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj -MD -MP -MF datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Tpo -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj `if test -f 'datawizard/interfaces/bcsr/bcsr_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/bcsr/bcsr_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/bcsr/bcsr_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Tpo datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/bcsr/bcsr_interface.c' object='datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_interface.obj `if test -f 'datawizard/interfaces/bcsr/bcsr_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/bcsr/bcsr_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/bcsr/bcsr_interface.c'; fi` datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o: datawizard/interfaces/bcsr/bcsr_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o -MD -MP -MF datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Tpo -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o `test -f 'datawizard/interfaces/bcsr/bcsr_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/bcsr/bcsr_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Tpo datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/bcsr/bcsr_opencl.c' object='datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.o `test -f 'datawizard/interfaces/bcsr/bcsr_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/bcsr/bcsr_opencl.c datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj: datawizard/interfaces/bcsr/bcsr_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj -MD -MP -MF datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Tpo -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj `if test -f 'datawizard/interfaces/bcsr/bcsr_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/bcsr/bcsr_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/bcsr/bcsr_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Tpo datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/bcsr/bcsr_opencl.c' object='datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_bcsr_bcsr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/bcsr/bcsr_interface-bcsr_opencl.obj `if test -f 'datawizard/interfaces/bcsr/bcsr_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/bcsr/bcsr_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/bcsr/bcsr_opencl.c'; fi` datawizard/interfaces/block_block_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block_block_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Tpo -c -o datawizard/interfaces/block_block_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/block_block_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block_block_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/block_block_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block_block_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Tpo -c -o datawizard/interfaces/block_block_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/block_block_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block_block_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/block/block_interface-block_interface.o: datawizard/interfaces/block/block_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block/block_interface-block_interface.o -MD -MP -MF datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Tpo -c -o datawizard/interfaces/block/block_interface-block_interface.o `test -f 'datawizard/interfaces/block/block_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/block/block_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Tpo datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/block/block_interface.c' object='datawizard/interfaces/block/block_interface-block_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block/block_interface-block_interface.o `test -f 'datawizard/interfaces/block/block_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/block/block_interface.c datawizard/interfaces/block/block_interface-block_interface.obj: datawizard/interfaces/block/block_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block/block_interface-block_interface.obj -MD -MP -MF datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Tpo -c -o datawizard/interfaces/block/block_interface-block_interface.obj `if test -f 'datawizard/interfaces/block/block_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/block/block_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/block/block_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Tpo datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/block/block_interface.c' object='datawizard/interfaces/block/block_interface-block_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block/block_interface-block_interface.obj `if test -f 'datawizard/interfaces/block/block_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/block/block_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/block/block_interface.c'; fi` datawizard/interfaces/block/block_interface-block_opencl.o: datawizard/interfaces/block/block_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block/block_interface-block_opencl.o -MD -MP -MF datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Tpo -c -o datawizard/interfaces/block/block_interface-block_opencl.o `test -f 'datawizard/interfaces/block/block_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/block/block_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Tpo datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/block/block_opencl.c' object='datawizard/interfaces/block/block_interface-block_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block/block_interface-block_opencl.o `test -f 'datawizard/interfaces/block/block_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/block/block_opencl.c datawizard/interfaces/block/block_interface-block_opencl.obj: datawizard/interfaces/block/block_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/block/block_interface-block_opencl.obj -MD -MP -MF datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Tpo -c -o datawizard/interfaces/block/block_interface-block_opencl.obj `if test -f 'datawizard/interfaces/block/block_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/block/block_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/block/block_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Tpo datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/block/block_opencl.c' object='datawizard/interfaces/block/block_interface-block_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_block_block_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/block/block_interface-block_opencl.obj `if test -f 'datawizard/interfaces/block/block_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/block/block_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/block/block_opencl.c'; fi` datawizard/interfaces/coo_coo_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo_coo_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Tpo -c -o datawizard/interfaces/coo_coo_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/coo_coo_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo_coo_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/coo_coo_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo_coo_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Tpo -c -o datawizard/interfaces/coo_coo_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/coo_coo_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo_coo_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/coo/coo_interface-coo_interface.o: datawizard/interfaces/coo/coo_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo/coo_interface-coo_interface.o -MD -MP -MF datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Tpo -c -o datawizard/interfaces/coo/coo_interface-coo_interface.o `test -f 'datawizard/interfaces/coo/coo_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/coo/coo_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Tpo datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/coo/coo_interface.c' object='datawizard/interfaces/coo/coo_interface-coo_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo/coo_interface-coo_interface.o `test -f 'datawizard/interfaces/coo/coo_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/coo/coo_interface.c datawizard/interfaces/coo/coo_interface-coo_interface.obj: datawizard/interfaces/coo/coo_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo/coo_interface-coo_interface.obj -MD -MP -MF datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Tpo -c -o datawizard/interfaces/coo/coo_interface-coo_interface.obj `if test -f 'datawizard/interfaces/coo/coo_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/coo/coo_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/coo/coo_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Tpo datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/coo/coo_interface.c' object='datawizard/interfaces/coo/coo_interface-coo_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo/coo_interface-coo_interface.obj `if test -f 'datawizard/interfaces/coo/coo_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/coo/coo_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/coo/coo_interface.c'; fi` datawizard/interfaces/coo/coo_interface-coo_opencl.o: datawizard/interfaces/coo/coo_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo/coo_interface-coo_opencl.o -MD -MP -MF datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Tpo -c -o datawizard/interfaces/coo/coo_interface-coo_opencl.o `test -f 'datawizard/interfaces/coo/coo_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/coo/coo_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Tpo datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/coo/coo_opencl.c' object='datawizard/interfaces/coo/coo_interface-coo_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo/coo_interface-coo_opencl.o `test -f 'datawizard/interfaces/coo/coo_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/coo/coo_opencl.c datawizard/interfaces/coo/coo_interface-coo_opencl.obj: datawizard/interfaces/coo/coo_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/coo/coo_interface-coo_opencl.obj -MD -MP -MF datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Tpo -c -o datawizard/interfaces/coo/coo_interface-coo_opencl.obj `if test -f 'datawizard/interfaces/coo/coo_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/coo/coo_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/coo/coo_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Tpo datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/coo/coo_opencl.c' object='datawizard/interfaces/coo/coo_interface-coo_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_coo_coo_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/coo/coo_interface-coo_opencl.obj `if test -f 'datawizard/interfaces/coo/coo_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/coo/coo_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/coo/coo_opencl.c'; fi` datawizard/interfaces/copy_interfaces-copy_interfaces.o: datawizard/interfaces/copy_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_copy_interfaces_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/copy_interfaces-copy_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Tpo -c -o datawizard/interfaces/copy_interfaces-copy_interfaces.o `test -f 'datawizard/interfaces/copy_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/copy_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/copy_interfaces.c' object='datawizard/interfaces/copy_interfaces-copy_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_copy_interfaces_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/copy_interfaces-copy_interfaces.o `test -f 'datawizard/interfaces/copy_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/copy_interfaces.c datawizard/interfaces/copy_interfaces-copy_interfaces.obj: datawizard/interfaces/copy_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_copy_interfaces_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/copy_interfaces-copy_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Tpo -c -o datawizard/interfaces/copy_interfaces-copy_interfaces.obj `if test -f 'datawizard/interfaces/copy_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/copy_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/copy_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/copy_interfaces.c' object='datawizard/interfaces/copy_interfaces-copy_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_copy_interfaces_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/copy_interfaces-copy_interfaces.obj `if test -f 'datawizard/interfaces/copy_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/copy_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/copy_interfaces.c'; fi` datawizard/interfaces/csr_csr_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr_csr_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Tpo -c -o datawizard/interfaces/csr_csr_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/csr_csr_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr_csr_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/csr_csr_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr_csr_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Tpo -c -o datawizard/interfaces/csr_csr_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/csr_csr_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr_csr_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/csr/csr_interface-csr_interface.o: datawizard/interfaces/csr/csr_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr/csr_interface-csr_interface.o -MD -MP -MF datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Tpo -c -o datawizard/interfaces/csr/csr_interface-csr_interface.o `test -f 'datawizard/interfaces/csr/csr_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/csr/csr_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Tpo datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/csr/csr_interface.c' object='datawizard/interfaces/csr/csr_interface-csr_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr/csr_interface-csr_interface.o `test -f 'datawizard/interfaces/csr/csr_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/csr/csr_interface.c datawizard/interfaces/csr/csr_interface-csr_interface.obj: datawizard/interfaces/csr/csr_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr/csr_interface-csr_interface.obj -MD -MP -MF datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Tpo -c -o datawizard/interfaces/csr/csr_interface-csr_interface.obj `if test -f 'datawizard/interfaces/csr/csr_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/csr/csr_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/csr/csr_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Tpo datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/csr/csr_interface.c' object='datawizard/interfaces/csr/csr_interface-csr_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr/csr_interface-csr_interface.obj `if test -f 'datawizard/interfaces/csr/csr_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/csr/csr_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/csr/csr_interface.c'; fi` datawizard/interfaces/csr/csr_interface-csr_opencl.o: datawizard/interfaces/csr/csr_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr/csr_interface-csr_opencl.o -MD -MP -MF datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Tpo -c -o datawizard/interfaces/csr/csr_interface-csr_opencl.o `test -f 'datawizard/interfaces/csr/csr_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/csr/csr_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Tpo datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/csr/csr_opencl.c' object='datawizard/interfaces/csr/csr_interface-csr_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr/csr_interface-csr_opencl.o `test -f 'datawizard/interfaces/csr/csr_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/csr/csr_opencl.c datawizard/interfaces/csr/csr_interface-csr_opencl.obj: datawizard/interfaces/csr/csr_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/csr/csr_interface-csr_opencl.obj -MD -MP -MF datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Tpo -c -o datawizard/interfaces/csr/csr_interface-csr_opencl.obj `if test -f 'datawizard/interfaces/csr/csr_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/csr/csr_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/csr/csr_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Tpo datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/csr/csr_opencl.c' object='datawizard/interfaces/csr/csr_interface-csr_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_csr_csr_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/csr/csr_interface-csr_opencl.obj `if test -f 'datawizard/interfaces/csr/csr_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/csr/csr_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/csr/csr_opencl.c'; fi` datawizard/interfaces/matrix_matrix_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix_matrix_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Tpo -c -o datawizard/interfaces/matrix_matrix_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/matrix_matrix_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix_matrix_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Tpo -c -o datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix_matrix_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/matrix/matrix_interface-matrix_interface.o: datawizard/interfaces/matrix/matrix_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix/matrix_interface-matrix_interface.o -MD -MP -MF datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Tpo -c -o datawizard/interfaces/matrix/matrix_interface-matrix_interface.o `test -f 'datawizard/interfaces/matrix/matrix_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/matrix/matrix_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Tpo datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/matrix/matrix_interface.c' object='datawizard/interfaces/matrix/matrix_interface-matrix_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix/matrix_interface-matrix_interface.o `test -f 'datawizard/interfaces/matrix/matrix_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/matrix/matrix_interface.c datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj: datawizard/interfaces/matrix/matrix_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj -MD -MP -MF datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Tpo -c -o datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj `if test -f 'datawizard/interfaces/matrix/matrix_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/matrix/matrix_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/matrix/matrix_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Tpo datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/matrix/matrix_interface.c' object='datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix/matrix_interface-matrix_interface.obj `if test -f 'datawizard/interfaces/matrix/matrix_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/matrix/matrix_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/matrix/matrix_interface.c'; fi` datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o: datawizard/interfaces/matrix/matrix_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o -MD -MP -MF datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Tpo -c -o datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o `test -f 'datawizard/interfaces/matrix/matrix_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/matrix/matrix_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Tpo datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/matrix/matrix_opencl.c' object='datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix/matrix_interface-matrix_opencl.o `test -f 'datawizard/interfaces/matrix/matrix_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/matrix/matrix_opencl.c datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj: datawizard/interfaces/matrix/matrix_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj -MD -MP -MF datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Tpo -c -o datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj `if test -f 'datawizard/interfaces/matrix/matrix_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/matrix/matrix_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/matrix/matrix_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Tpo datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/matrix/matrix_opencl.c' object='datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_matrix_matrix_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/matrix/matrix_interface-matrix_opencl.obj `if test -f 'datawizard/interfaces/matrix/matrix_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/matrix/matrix_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/matrix/matrix_opencl.c'; fi` datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Tpo -c -o datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Tpo -c -o datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat_multiformat_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o: datawizard/interfaces/multiformat/multiformat_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o `test -f 'datawizard/interfaces/multiformat/multiformat_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_interface.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.o `test -f 'datawizard/interfaces/multiformat/multiformat_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_interface.c datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj: datawizard/interfaces/multiformat/multiformat_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_interface.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_interface.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_interface.c'; fi` datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o: datawizard/interfaces/multiformat/multiformat_conversion_codelets.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o `test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_conversion_codelets.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_conversion_codelets.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.o `test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_conversion_codelets.c datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj: datawizard/interfaces/multiformat/multiformat_conversion_codelets.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_conversion_codelets.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c'; fi` datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o: datawizard/interfaces/multiformat/multiformat_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o `test -f 'datawizard/interfaces/multiformat/multiformat_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_opencl.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.o `test -f 'datawizard/interfaces/multiformat/multiformat_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_opencl.c datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj: datawizard/interfaces/multiformat/multiformat_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_opencl.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_opencl.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_opencl.c'; fi` datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o: datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o `test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.o `test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj: datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj -MD -MP -MF datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Tpo -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Tpo datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c' object='datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_multiformat_multiformat_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/multiformat/multiformat_interface-multiformat_conversion_codelets_opencl.obj `if test -f 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c'; fi` datawizard/interfaces/ndim_ndim_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim_ndim_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Tpo -c -o datawizard/interfaces/ndim_ndim_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/ndim_ndim_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim_ndim_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Tpo -c -o datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim_ndim_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/ndim/ndim_interface-ndim_interface.o: datawizard/interfaces/ndim/ndim_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim/ndim_interface-ndim_interface.o -MD -MP -MF datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Tpo -c -o datawizard/interfaces/ndim/ndim_interface-ndim_interface.o `test -f 'datawizard/interfaces/ndim/ndim_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/ndim/ndim_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Tpo datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/ndim/ndim_interface.c' object='datawizard/interfaces/ndim/ndim_interface-ndim_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim/ndim_interface-ndim_interface.o `test -f 'datawizard/interfaces/ndim/ndim_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/ndim/ndim_interface.c datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj: datawizard/interfaces/ndim/ndim_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj -MD -MP -MF datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Tpo -c -o datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj `if test -f 'datawizard/interfaces/ndim/ndim_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/ndim/ndim_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/ndim/ndim_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Tpo datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/ndim/ndim_interface.c' object='datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim/ndim_interface-ndim_interface.obj `if test -f 'datawizard/interfaces/ndim/ndim_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/ndim/ndim_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/ndim/ndim_interface.c'; fi` datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o: datawizard/interfaces/ndim/ndim_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o -MD -MP -MF datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Tpo -c -o datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o `test -f 'datawizard/interfaces/ndim/ndim_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/ndim/ndim_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Tpo datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/ndim/ndim_opencl.c' object='datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim/ndim_interface-ndim_opencl.o `test -f 'datawizard/interfaces/ndim/ndim_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/ndim/ndim_opencl.c datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj: datawizard/interfaces/ndim/ndim_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj -MD -MP -MF datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Tpo -c -o datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj `if test -f 'datawizard/interfaces/ndim/ndim_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/ndim/ndim_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/ndim/ndim_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Tpo datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/ndim/ndim_opencl.c' object='datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_ndim_ndim_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/ndim/ndim_interface-ndim_opencl.obj `if test -f 'datawizard/interfaces/ndim/ndim_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/ndim/ndim_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/ndim/ndim_opencl.c'; fi` datawizard/interfaces/tensor_tensor_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor_tensor_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Tpo -c -o datawizard/interfaces/tensor_tensor_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/tensor_tensor_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor_tensor_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Tpo -c -o datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor_tensor_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/tensor/tensor_interface-tensor_interface.o: datawizard/interfaces/tensor/tensor_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor/tensor_interface-tensor_interface.o -MD -MP -MF datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Tpo -c -o datawizard/interfaces/tensor/tensor_interface-tensor_interface.o `test -f 'datawizard/interfaces/tensor/tensor_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/tensor/tensor_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Tpo datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/tensor/tensor_interface.c' object='datawizard/interfaces/tensor/tensor_interface-tensor_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor/tensor_interface-tensor_interface.o `test -f 'datawizard/interfaces/tensor/tensor_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/tensor/tensor_interface.c datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj: datawizard/interfaces/tensor/tensor_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj -MD -MP -MF datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Tpo -c -o datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj `if test -f 'datawizard/interfaces/tensor/tensor_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/tensor/tensor_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/tensor/tensor_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Tpo datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/tensor/tensor_interface.c' object='datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor/tensor_interface-tensor_interface.obj `if test -f 'datawizard/interfaces/tensor/tensor_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/tensor/tensor_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/tensor/tensor_interface.c'; fi` datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o: datawizard/interfaces/tensor/tensor_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o -MD -MP -MF datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Tpo -c -o datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o `test -f 'datawizard/interfaces/tensor/tensor_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/tensor/tensor_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Tpo datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/tensor/tensor_opencl.c' object='datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor/tensor_interface-tensor_opencl.o `test -f 'datawizard/interfaces/tensor/tensor_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/tensor/tensor_opencl.c datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj: datawizard/interfaces/tensor/tensor_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj -MD -MP -MF datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Tpo -c -o datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj `if test -f 'datawizard/interfaces/tensor/tensor_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/tensor/tensor_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/tensor/tensor_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Tpo datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/tensor/tensor_opencl.c' object='datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_tensor_tensor_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/tensor/tensor_interface-tensor_opencl.obj `if test -f 'datawizard/interfaces/tensor/tensor_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/tensor/tensor_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/tensor/tensor_opencl.c'; fi` datawizard/interfaces/variable_variable_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable_variable_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Tpo -c -o datawizard/interfaces/variable_variable_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/variable_variable_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable_variable_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/variable_variable_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable_variable_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Tpo -c -o datawizard/interfaces/variable_variable_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/variable_variable_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable_variable_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/variable/variable_interface-variable_interface.o: datawizard/interfaces/variable/variable_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable/variable_interface-variable_interface.o -MD -MP -MF datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Tpo -c -o datawizard/interfaces/variable/variable_interface-variable_interface.o `test -f 'datawizard/interfaces/variable/variable_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/variable/variable_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Tpo datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/variable/variable_interface.c' object='datawizard/interfaces/variable/variable_interface-variable_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable/variable_interface-variable_interface.o `test -f 'datawizard/interfaces/variable/variable_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/variable/variable_interface.c datawizard/interfaces/variable/variable_interface-variable_interface.obj: datawizard/interfaces/variable/variable_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable/variable_interface-variable_interface.obj -MD -MP -MF datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Tpo -c -o datawizard/interfaces/variable/variable_interface-variable_interface.obj `if test -f 'datawizard/interfaces/variable/variable_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/variable/variable_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/variable/variable_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Tpo datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/variable/variable_interface.c' object='datawizard/interfaces/variable/variable_interface-variable_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable/variable_interface-variable_interface.obj `if test -f 'datawizard/interfaces/variable/variable_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/variable/variable_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/variable/variable_interface.c'; fi` datawizard/interfaces/variable/variable_interface-variable_opencl.o: datawizard/interfaces/variable/variable_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable/variable_interface-variable_opencl.o -MD -MP -MF datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Tpo -c -o datawizard/interfaces/variable/variable_interface-variable_opencl.o `test -f 'datawizard/interfaces/variable/variable_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/variable/variable_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Tpo datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/variable/variable_opencl.c' object='datawizard/interfaces/variable/variable_interface-variable_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable/variable_interface-variable_opencl.o `test -f 'datawizard/interfaces/variable/variable_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/variable/variable_opencl.c datawizard/interfaces/variable/variable_interface-variable_opencl.obj: datawizard/interfaces/variable/variable_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/variable/variable_interface-variable_opencl.obj -MD -MP -MF datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Tpo -c -o datawizard/interfaces/variable/variable_interface-variable_opencl.obj `if test -f 'datawizard/interfaces/variable/variable_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/variable/variable_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/variable/variable_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Tpo datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/variable/variable_opencl.c' object='datawizard/interfaces/variable/variable_interface-variable_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_variable_variable_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/variable/variable_interface-variable_opencl.obj `if test -f 'datawizard/interfaces/variable/variable_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/variable/variable_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/variable/variable_opencl.c'; fi` datawizard/interfaces/vector/vector_interface-vector_interface.o: datawizard/interfaces/vector/vector_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector/vector_interface-vector_interface.o -MD -MP -MF datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Tpo -c -o datawizard/interfaces/vector/vector_interface-vector_interface.o `test -f 'datawizard/interfaces/vector/vector_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/vector/vector_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Tpo datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/vector/vector_interface.c' object='datawizard/interfaces/vector/vector_interface-vector_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector/vector_interface-vector_interface.o `test -f 'datawizard/interfaces/vector/vector_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/vector/vector_interface.c datawizard/interfaces/vector/vector_interface-vector_interface.obj: datawizard/interfaces/vector/vector_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector/vector_interface-vector_interface.obj -MD -MP -MF datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Tpo -c -o datawizard/interfaces/vector/vector_interface-vector_interface.obj `if test -f 'datawizard/interfaces/vector/vector_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/vector/vector_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/vector/vector_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Tpo datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/vector/vector_interface.c' object='datawizard/interfaces/vector/vector_interface-vector_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector/vector_interface-vector_interface.obj `if test -f 'datawizard/interfaces/vector/vector_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/vector/vector_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/vector/vector_interface.c'; fi` datawizard/interfaces/vector_vector_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector_vector_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Tpo -c -o datawizard/interfaces/vector_vector_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/vector_vector_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector_vector_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/vector_vector_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector_vector_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Tpo -c -o datawizard/interfaces/vector_vector_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/vector_vector_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector_vector_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/vector/vector_interface-vector_opencl.o: datawizard/interfaces/vector/vector_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector/vector_interface-vector_opencl.o -MD -MP -MF datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Tpo -c -o datawizard/interfaces/vector/vector_interface-vector_opencl.o `test -f 'datawizard/interfaces/vector/vector_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/vector/vector_opencl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Tpo datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/vector/vector_opencl.c' object='datawizard/interfaces/vector/vector_interface-vector_opencl.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector/vector_interface-vector_opencl.o `test -f 'datawizard/interfaces/vector/vector_opencl.c' || echo '$(srcdir)/'`datawizard/interfaces/vector/vector_opencl.c datawizard/interfaces/vector/vector_interface-vector_opencl.obj: datawizard/interfaces/vector/vector_opencl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/vector/vector_interface-vector_opencl.obj -MD -MP -MF datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Tpo -c -o datawizard/interfaces/vector/vector_interface-vector_opencl.obj `if test -f 'datawizard/interfaces/vector/vector_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/vector/vector_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/vector/vector_opencl.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Tpo datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/vector/vector_opencl.c' object='datawizard/interfaces/vector/vector_interface-vector_opencl.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_vector_vector_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/vector/vector_interface-vector_opencl.obj `if test -f 'datawizard/interfaces/vector/vector_opencl.c'; then $(CYGPATH_W) 'datawizard/interfaces/vector/vector_opencl.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/vector/vector_opencl.c'; fi` datawizard/interfaces/void_void_interface-test_interfaces.o: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/void_void_interface-test_interfaces.o -MD -MP -MF datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Tpo -c -o datawizard/interfaces/void_void_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/void_void_interface-test_interfaces.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/void_void_interface-test_interfaces.o `test -f 'datawizard/interfaces/test_interfaces.c' || echo '$(srcdir)/'`datawizard/interfaces/test_interfaces.c datawizard/interfaces/void_void_interface-test_interfaces.obj: datawizard/interfaces/test_interfaces.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/void_void_interface-test_interfaces.obj -MD -MP -MF datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Tpo -c -o datawizard/interfaces/void_void_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Tpo datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/test_interfaces.c' object='datawizard/interfaces/void_void_interface-test_interfaces.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/void_void_interface-test_interfaces.obj `if test -f 'datawizard/interfaces/test_interfaces.c'; then $(CYGPATH_W) 'datawizard/interfaces/test_interfaces.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/test_interfaces.c'; fi` datawizard/interfaces/void/void_interface-void_interface.o: datawizard/interfaces/void/void_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/void/void_interface-void_interface.o -MD -MP -MF datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Tpo -c -o datawizard/interfaces/void/void_interface-void_interface.o `test -f 'datawizard/interfaces/void/void_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/void/void_interface.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Tpo datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/void/void_interface.c' object='datawizard/interfaces/void/void_interface-void_interface.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/void/void_interface-void_interface.o `test -f 'datawizard/interfaces/void/void_interface.c' || echo '$(srcdir)/'`datawizard/interfaces/void/void_interface.c datawizard/interfaces/void/void_interface-void_interface.obj: datawizard/interfaces/void/void_interface.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -MT datawizard/interfaces/void/void_interface-void_interface.obj -MD -MP -MF datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Tpo -c -o datawizard/interfaces/void/void_interface-void_interface.obj `if test -f 'datawizard/interfaces/void/void_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/void/void_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/void/void_interface.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Tpo datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datawizard/interfaces/void/void_interface.c' object='datawizard/interfaces/void/void_interface-void_interface.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(datawizard_interfaces_void_void_interface_CFLAGS) $(CFLAGS) -c -o datawizard/interfaces/void/void_interface-void_interface.obj `if test -f 'datawizard/interfaces/void/void_interface.c'; then $(CYGPATH_W) 'datawizard/interfaces/void/void_interface.c'; else $(CYGPATH_W) '$(srcdir)/datawizard/interfaces/void/void_interface.c'; fi` loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` main/deprecated_func-deprecated_func.o: main/deprecated_func.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_deprecated_func_CFLAGS) $(CFLAGS) -MT main/deprecated_func-deprecated_func.o -MD -MP -MF main/$(DEPDIR)/deprecated_func-deprecated_func.Tpo -c -o main/deprecated_func-deprecated_func.o `test -f 'main/deprecated_func.c' || echo '$(srcdir)/'`main/deprecated_func.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) main/$(DEPDIR)/deprecated_func-deprecated_func.Tpo main/$(DEPDIR)/deprecated_func-deprecated_func.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main/deprecated_func.c' object='main/deprecated_func-deprecated_func.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_deprecated_func_CFLAGS) $(CFLAGS) -c -o main/deprecated_func-deprecated_func.o `test -f 'main/deprecated_func.c' || echo '$(srcdir)/'`main/deprecated_func.c main/deprecated_func-deprecated_func.obj: main/deprecated_func.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_deprecated_func_CFLAGS) $(CFLAGS) -MT main/deprecated_func-deprecated_func.obj -MD -MP -MF main/$(DEPDIR)/deprecated_func-deprecated_func.Tpo -c -o main/deprecated_func-deprecated_func.obj `if test -f 'main/deprecated_func.c'; then $(CYGPATH_W) 'main/deprecated_func.c'; else $(CYGPATH_W) '$(srcdir)/main/deprecated_func.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) main/$(DEPDIR)/deprecated_func-deprecated_func.Tpo main/$(DEPDIR)/deprecated_func-deprecated_func.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main/deprecated_func.c' object='main/deprecated_func-deprecated_func.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_deprecated_func_CFLAGS) $(CFLAGS) -c -o main/deprecated_func-deprecated_func.obj `if test -f 'main/deprecated_func.c'; then $(CYGPATH_W) 'main/deprecated_func.c'; else $(CYGPATH_W) '$(srcdir)/main/deprecated_func.c'; fi` main/starpu_worker_exists-starpu_worker_exists.o: main/starpu_worker_exists.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) -MT main/starpu_worker_exists-starpu_worker_exists.o -MD -MP -MF main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Tpo -c -o main/starpu_worker_exists-starpu_worker_exists.o `test -f 'main/starpu_worker_exists.c' || echo '$(srcdir)/'`main/starpu_worker_exists.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Tpo main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main/starpu_worker_exists.c' object='main/starpu_worker_exists-starpu_worker_exists.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) -c -o main/starpu_worker_exists-starpu_worker_exists.o `test -f 'main/starpu_worker_exists.c' || echo '$(srcdir)/'`main/starpu_worker_exists.c main/starpu_worker_exists-starpu_worker_exists.obj: main/starpu_worker_exists.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) -MT main/starpu_worker_exists-starpu_worker_exists.obj -MD -MP -MF main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Tpo -c -o main/starpu_worker_exists-starpu_worker_exists.obj `if test -f 'main/starpu_worker_exists.c'; then $(CYGPATH_W) 'main/starpu_worker_exists.c'; else $(CYGPATH_W) '$(srcdir)/main/starpu_worker_exists.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Tpo main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main/starpu_worker_exists.c' object='main/starpu_worker_exists-starpu_worker_exists.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(main_starpu_worker_exists_CFLAGS) $(CFLAGS) -c -o main/starpu_worker_exists-starpu_worker_exists.obj `if test -f 'main/starpu_worker_exists.c'; then $(CYGPATH_W) 'main/starpu_worker_exists.c'; else $(CYGPATH_W) '$(srcdir)/main/starpu_worker_exists.c'; fi` sched_policies/execute_all_tasks-execute_all_tasks.o: sched_policies/execute_all_tasks.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) -MT sched_policies/execute_all_tasks-execute_all_tasks.o -MD -MP -MF sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Tpo -c -o sched_policies/execute_all_tasks-execute_all_tasks.o `test -f 'sched_policies/execute_all_tasks.c' || echo '$(srcdir)/'`sched_policies/execute_all_tasks.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Tpo sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_policies/execute_all_tasks.c' object='sched_policies/execute_all_tasks-execute_all_tasks.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) -c -o sched_policies/execute_all_tasks-execute_all_tasks.o `test -f 'sched_policies/execute_all_tasks.c' || echo '$(srcdir)/'`sched_policies/execute_all_tasks.c sched_policies/execute_all_tasks-execute_all_tasks.obj: sched_policies/execute_all_tasks.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) -MT sched_policies/execute_all_tasks-execute_all_tasks.obj -MD -MP -MF sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Tpo -c -o sched_policies/execute_all_tasks-execute_all_tasks.obj `if test -f 'sched_policies/execute_all_tasks.c'; then $(CYGPATH_W) 'sched_policies/execute_all_tasks.c'; else $(CYGPATH_W) '$(srcdir)/sched_policies/execute_all_tasks.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Tpo sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sched_policies/execute_all_tasks.c' object='sched_policies/execute_all_tasks-execute_all_tasks.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(sched_policies_execute_all_tasks_CFLAGS) $(CFLAGS) -c -o sched_policies/execute_all_tasks-execute_all_tasks.obj `if test -f 'sched_policies/execute_all_tasks.c'; then $(CYGPATH_W) 'sched_policies/execute_all_tasks.c'; else $(CYGPATH_W) '$(srcdir)/sched_policies/execute_all_tasks.c'; fi` .cpp.o: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< .cpp.obj: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .cpp.lo: @am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< .f90.o: $(AM_V_FC)$(FCCOMPILE) -c -o $@ $< .f90.obj: $(AM_V_FC)$(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .f90.lo: $(AM_V_FC)$(LTFCCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs -rm -rf datawizard/.libs datawizard/_libs -rm -rf datawizard/interfaces/.libs datawizard/interfaces/_libs -rm -rf datawizard/interfaces/bcsr/.libs datawizard/interfaces/bcsr/_libs -rm -rf datawizard/interfaces/block/.libs datawizard/interfaces/block/_libs -rm -rf datawizard/interfaces/coo/.libs datawizard/interfaces/coo/_libs -rm -rf datawizard/interfaces/csr/.libs datawizard/interfaces/csr/_libs -rm -rf datawizard/interfaces/matrix/.libs datawizard/interfaces/matrix/_libs -rm -rf datawizard/interfaces/multiformat/.libs datawizard/interfaces/multiformat/_libs -rm -rf datawizard/interfaces/multiformat/advanced/.libs datawizard/interfaces/multiformat/advanced/_libs -rm -rf datawizard/interfaces/ndim/.libs datawizard/interfaces/ndim/_libs -rm -rf datawizard/interfaces/tensor/.libs datawizard/interfaces/tensor/_libs -rm -rf datawizard/interfaces/variable/.libs datawizard/interfaces/variable/_libs -rm -rf datawizard/interfaces/vector/.libs datawizard/interfaces/vector/_libs -rm -rf datawizard/interfaces/void/.libs datawizard/interfaces/void/_libs -rm -rf disk/.libs disk/_libs -rm -rf energy/.libs energy/_libs -rm -rf errorcheck/.libs errorcheck/_libs -rm -rf fault-tolerance/.libs fault-tolerance/_libs -rm -rf fortran90/.libs fortran90/_libs -rm -rf helper/.libs helper/_libs -rm -rf main/.libs main/_libs -rm -rf main/driver_api/.libs main/driver_api/_libs -rm -rf maxfpga/.libs maxfpga/_libs -rm -rf microbenchs/.libs microbenchs/_libs -rm -rf openmp/.libs openmp/_libs -rm -rf overlap/.libs overlap/_libs -rm -rf parallel_tasks/.libs parallel_tasks/_libs -rm -rf perfmodels/.libs perfmodels/_libs -rm -rf sched_ctx/.libs sched_ctx/_libs -rm -rf sched_policies/.libs sched_policies/_libs install-nobase_STARPU_OPENCL_DATADATA: $(nobase_STARPU_OPENCL_DATA_DATA) @$(NORMAL_INSTALL) @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)" || exit 1; \ fi; \ $(am__nobase_list) | while read dir files; do \ xfiles=; for file in $$files; do \ if test -f "$$file"; then xfiles="$$xfiles $$file"; \ else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ test -z "$$xfiles" || { \ test "x$$dir" = x. || { \ echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ $(MKDIR_P) "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir"; }; \ echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir'"; \ $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(STARPU_OPENCL_DATAdir)/$$dir" || exit $$?; }; \ done uninstall-nobase_STARPU_OPENCL_DATADATA: @$(NORMAL_UNINSTALL) @list='$(nobase_STARPU_OPENCL_DATA_DATA)'; test -n "$(STARPU_OPENCL_DATAdir)" || list=; \ $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ dir='$(DESTDIR)$(STARPU_OPENCL_DATAdir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? coverage/coverage.sh.log: coverage/coverage.sh @p='coverage/coverage.sh'; \ b='coverage/coverage.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/tasks_data_overhead.sh.log: microbenchs/tasks_data_overhead.sh @p='microbenchs/tasks_data_overhead.sh'; \ b='microbenchs/tasks_data_overhead.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/sync_tasks_data_overhead.sh.log: microbenchs/sync_tasks_data_overhead.sh @p='microbenchs/sync_tasks_data_overhead.sh'; \ b='microbenchs/sync_tasks_data_overhead.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/async_tasks_data_overhead.sh.log: microbenchs/async_tasks_data_overhead.sh @p='microbenchs/async_tasks_data_overhead.sh'; \ b='microbenchs/async_tasks_data_overhead.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/tasks_size_overhead_scheds.sh.log: microbenchs/tasks_size_overhead_scheds.sh @p='microbenchs/tasks_size_overhead_scheds.sh'; \ b='microbenchs/tasks_size_overhead_scheds.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/parallel_independent_homogeneous_tasks.sh.log: microbenchs/parallel_independent_homogeneous_tasks.sh @p='microbenchs/parallel_independent_homogeneous_tasks.sh'; \ b='microbenchs/parallel_independent_homogeneous_tasks.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/parallel_independent_heterogeneous_tasks.sh.log: microbenchs/parallel_independent_heterogeneous_tasks.sh @p='microbenchs/parallel_independent_heterogeneous_tasks.sh'; \ b='microbenchs/parallel_independent_heterogeneous_tasks.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/parallel_independent_homogeneous_tasks_data.sh.log: microbenchs/parallel_independent_homogeneous_tasks_data.sh @p='microbenchs/parallel_independent_homogeneous_tasks_data.sh'; \ b='microbenchs/parallel_independent_homogeneous_tasks_data.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/parallel_independent_heterogeneous_tasks_data.sh.log: microbenchs/parallel_independent_heterogeneous_tasks_data.sh @p='microbenchs/parallel_independent_heterogeneous_tasks_data.sh'; \ b='microbenchs/parallel_independent_heterogeneous_tasks_data.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/parallel_redux_homogeneous_tasks_data.sh.log: microbenchs/parallel_redux_homogeneous_tasks_data.sh @p='microbenchs/parallel_redux_homogeneous_tasks_data.sh'; \ b='microbenchs/parallel_redux_homogeneous_tasks_data.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/parallel_redux_heterogeneous_tasks_data.sh.log: microbenchs/parallel_redux_heterogeneous_tasks_data.sh @p='microbenchs/parallel_redux_heterogeneous_tasks_data.sh'; \ b='microbenchs/parallel_redux_heterogeneous_tasks_data.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/parallel_dependent_homogeneous_tasks_data.sh.log: microbenchs/parallel_dependent_homogeneous_tasks_data.sh @p='microbenchs/parallel_dependent_homogeneous_tasks_data.sh'; \ b='microbenchs/parallel_dependent_homogeneous_tasks_data.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/locality.sh.log: datawizard/locality.sh @p='datawizard/locality.sh'; \ b='datawizard/locality.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/bandwidth_scheds.sh.log: microbenchs/bandwidth_scheds.sh @p='microbenchs/bandwidth_scheds.sh'; \ b='microbenchs/bandwidth_scheds.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) overlap/overlap.sh.log: overlap/overlap.sh @p='overlap/overlap.sh'; \ b='overlap/overlap.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/callback.log: main/callback$(EXEEXT) @p='main/callback$(EXEEXT)'; \ b='main/callback'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/bind.log: main/bind$(EXEEXT) @p='main/bind$(EXEEXT)'; \ b='main/bind'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/mkdtemp.log: main/mkdtemp$(EXEEXT) @p='main/mkdtemp$(EXEEXT)'; \ b='main/mkdtemp'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/execute_schedule.log: main/execute_schedule$(EXEEXT) @p='main/execute_schedule$(EXEEXT)'; \ b='main/execute_schedule'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/insert_task_pack.log: main/insert_task_pack$(EXEEXT) @p='main/insert_task_pack$(EXEEXT)'; \ b='main/insert_task_pack'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/insert_task_nullcodelet.log: main/insert_task_nullcodelet$(EXEEXT) @p='main/insert_task_nullcodelet$(EXEEXT)'; \ b='main/insert_task_nullcodelet'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/insert_task_where.log: main/insert_task_where$(EXEEXT) @p='main/insert_task_where$(EXEEXT)'; \ b='main/insert_task_where'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/multithreaded_init.log: main/multithreaded_init$(EXEEXT) @p='main/multithreaded_init$(EXEEXT)'; \ b='main/multithreaded_init'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/empty_task.log: main/empty_task$(EXEEXT) @p='main/empty_task$(EXEEXT)'; \ b='main/empty_task'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/empty_task_chain.log: main/empty_task_chain$(EXEEXT) @p='main/empty_task_chain$(EXEEXT)'; \ b='main/empty_task_chain'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/starpu_worker_exists.log: main/starpu_worker_exists$(EXEEXT) @p='main/starpu_worker_exists$(EXEEXT)'; \ b='main/starpu_worker_exists'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/codelet_null_callback.log: main/codelet_null_callback$(EXEEXT) @p='main/codelet_null_callback$(EXEEXT)'; \ b='main/codelet_null_callback'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/allocate.log: datawizard/allocate$(EXEEXT) @p='datawizard/allocate$(EXEEXT)'; \ b='datawizard/allocate'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/acquire_cb.log: datawizard/acquire_cb$(EXEEXT) @p='datawizard/acquire_cb$(EXEEXT)'; \ b='datawizard/acquire_cb'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/deps.log: datawizard/deps$(EXEEXT) @p='datawizard/deps$(EXEEXT)'; \ b='datawizard/deps'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/user_interaction_implicit.log: datawizard/user_interaction_implicit$(EXEEXT) @p='datawizard/user_interaction_implicit$(EXEEXT)'; \ b='datawizard/user_interaction_implicit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/copy_interfaces.log: datawizard/interfaces/copy_interfaces$(EXEEXT) @p='datawizard/interfaces/copy_interfaces$(EXEEXT)'; \ b='datawizard/interfaces/copy_interfaces'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/numa_overflow.log: datawizard/numa_overflow$(EXEEXT) @p='datawizard/numa_overflow$(EXEEXT)'; \ b='datawizard/numa_overflow'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/locality.log: datawizard/locality$(EXEEXT) @p='datawizard/locality$(EXEEXT)'; \ b='datawizard/locality'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/variable_size.log: datawizard/variable_size$(EXEEXT) @p='datawizard/variable_size$(EXEEXT)'; \ b='datawizard/variable_size'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) errorcheck/starpu_init_noworker.log: errorcheck/starpu_init_noworker$(EXEEXT) @p='errorcheck/starpu_init_noworker$(EXEEXT)'; \ b='errorcheck/starpu_init_noworker'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) errorcheck/invalid_tasks.log: errorcheck/invalid_tasks$(EXEEXT) @p='errorcheck/invalid_tasks$(EXEEXT)'; \ b='errorcheck/invalid_tasks'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) helper/cublas_init.log: helper/cublas_init$(EXEEXT) @p='helper/cublas_init$(EXEEXT)'; \ b='helper/cublas_init'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) helper/cublasLt_init.log: helper/cublasLt_init$(EXEEXT) @p='helper/cublasLt_init$(EXEEXT)'; \ b='helper/cublasLt_init'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) helper/cusparse_init.log: helper/cusparse_init$(EXEEXT) @p='helper/cusparse_init$(EXEEXT)'; \ b='helper/cusparse_init'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) helper/hipblas_init.log: helper/hipblas_init$(EXEEXT) @p='helper/hipblas_init$(EXEEXT)'; \ b='helper/hipblas_init'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) helper/pinned_memory.log: helper/pinned_memory$(EXEEXT) @p='helper/pinned_memory$(EXEEXT)'; \ b='helper/pinned_memory'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) helper/execute_on_all.log: helper/execute_on_all$(EXEEXT) @p='helper/execute_on_all$(EXEEXT)'; \ b='helper/execute_on_all'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/display_structures_size.log: microbenchs/display_structures_size$(EXEEXT) @p='microbenchs/display_structures_size$(EXEEXT)'; \ b='microbenchs/display_structures_size'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/local_pingpong.log: microbenchs/local_pingpong$(EXEEXT) @p='microbenchs/local_pingpong$(EXEEXT)'; \ b='microbenchs/local_pingpong'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) overlap/overlap.log: overlap/overlap$(EXEEXT) @p='overlap/overlap$(EXEEXT)'; \ b='overlap/overlap'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/sched_ctx_list.log: sched_ctx/sched_ctx_list$(EXEEXT) @p='sched_ctx/sched_ctx_list$(EXEEXT)'; \ b='sched_ctx/sched_ctx_list'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/sched_ctx_policy_data.log: sched_ctx/sched_ctx_policy_data$(EXEEXT) @p='sched_ctx/sched_ctx_policy_data$(EXEEXT)'; \ b='sched_ctx/sched_ctx_policy_data'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/init_exit_01.log: openmp/init_exit_01$(EXEEXT) @p='openmp/init_exit_01$(EXEEXT)'; \ b='openmp/init_exit_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/init_exit_02.log: openmp/init_exit_02$(EXEEXT) @p='openmp/init_exit_02$(EXEEXT)'; \ b='openmp/init_exit_02'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/environment.log: openmp/environment$(EXEEXT) @p='openmp/environment$(EXEEXT)'; \ b='openmp/environment'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/api_01.log: openmp/api_01$(EXEEXT) @p='openmp/api_01$(EXEEXT)'; \ b='openmp/api_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_01.log: openmp/parallel_01$(EXEEXT) @p='openmp/parallel_01$(EXEEXT)'; \ b='openmp/parallel_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_02.log: openmp/parallel_02$(EXEEXT) @p='openmp/parallel_02$(EXEEXT)'; \ b='openmp/parallel_02'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_03.log: openmp/parallel_03$(EXEEXT) @p='openmp/parallel_03$(EXEEXT)'; \ b='openmp/parallel_03'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_barrier_01.log: openmp/parallel_barrier_01$(EXEEXT) @p='openmp/parallel_barrier_01$(EXEEXT)'; \ b='openmp/parallel_barrier_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_master_01.log: openmp/parallel_master_01$(EXEEXT) @p='openmp/parallel_master_01$(EXEEXT)'; \ b='openmp/parallel_master_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_master_inline_01.log: openmp/parallel_master_inline_01$(EXEEXT) @p='openmp/parallel_master_inline_01$(EXEEXT)'; \ b='openmp/parallel_master_inline_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_single_wait_01.log: openmp/parallel_single_wait_01$(EXEEXT) @p='openmp/parallel_single_wait_01$(EXEEXT)'; \ b='openmp/parallel_single_wait_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_single_nowait_01.log: openmp/parallel_single_nowait_01$(EXEEXT) @p='openmp/parallel_single_nowait_01$(EXEEXT)'; \ b='openmp/parallel_single_nowait_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_single_inline_01.log: openmp/parallel_single_inline_01$(EXEEXT) @p='openmp/parallel_single_inline_01$(EXEEXT)'; \ b='openmp/parallel_single_inline_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_single_copyprivate_01.log: openmp/parallel_single_copyprivate_01$(EXEEXT) @p='openmp/parallel_single_copyprivate_01$(EXEEXT)'; \ b='openmp/parallel_single_copyprivate_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_single_copyprivate_inline_01.log: openmp/parallel_single_copyprivate_inline_01$(EXEEXT) @p='openmp/parallel_single_copyprivate_inline_01$(EXEEXT)'; \ b='openmp/parallel_single_copyprivate_inline_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_critical_01.log: openmp/parallel_critical_01$(EXEEXT) @p='openmp/parallel_critical_01$(EXEEXT)'; \ b='openmp/parallel_critical_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_critical_inline_01.log: openmp/parallel_critical_inline_01$(EXEEXT) @p='openmp/parallel_critical_inline_01$(EXEEXT)'; \ b='openmp/parallel_critical_inline_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_critical_named_01.log: openmp/parallel_critical_named_01$(EXEEXT) @p='openmp/parallel_critical_named_01$(EXEEXT)'; \ b='openmp/parallel_critical_named_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_critical_named_inline_01.log: openmp/parallel_critical_named_inline_01$(EXEEXT) @p='openmp/parallel_critical_named_inline_01$(EXEEXT)'; \ b='openmp/parallel_critical_named_inline_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_simple_lock_01.log: openmp/parallel_simple_lock_01$(EXEEXT) @p='openmp/parallel_simple_lock_01$(EXEEXT)'; \ b='openmp/parallel_simple_lock_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_nested_lock_01.log: openmp/parallel_nested_lock_01$(EXEEXT) @p='openmp/parallel_nested_lock_01$(EXEEXT)'; \ b='openmp/parallel_nested_lock_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_for_01.log: openmp/parallel_for_01$(EXEEXT) @p='openmp/parallel_for_01$(EXEEXT)'; \ b='openmp/parallel_for_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_for_02.log: openmp/parallel_for_02$(EXEEXT) @p='openmp/parallel_for_02$(EXEEXT)'; \ b='openmp/parallel_for_02'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_for_ordered_01.log: openmp/parallel_for_ordered_01$(EXEEXT) @p='openmp/parallel_for_ordered_01$(EXEEXT)'; \ b='openmp/parallel_for_ordered_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_sections_01.log: openmp/parallel_sections_01$(EXEEXT) @p='openmp/parallel_sections_01$(EXEEXT)'; \ b='openmp/parallel_sections_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/parallel_sections_combined_01.log: openmp/parallel_sections_combined_01$(EXEEXT) @p='openmp/parallel_sections_combined_01$(EXEEXT)'; \ b='openmp/parallel_sections_combined_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/task_01.log: openmp/task_01$(EXEEXT) @p='openmp/task_01$(EXEEXT)'; \ b='openmp/task_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/task_02.log: openmp/task_02$(EXEEXT) @p='openmp/task_02$(EXEEXT)'; \ b='openmp/task_02'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/task_03.log: openmp/task_03$(EXEEXT) @p='openmp/task_03$(EXEEXT)'; \ b='openmp/task_03'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/taskloop.log: openmp/taskloop$(EXEEXT) @p='openmp/taskloop$(EXEEXT)'; \ b='openmp/taskloop'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/taskwait_01.log: openmp/taskwait_01$(EXEEXT) @p='openmp/taskwait_01$(EXEEXT)'; \ b='openmp/taskwait_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/taskgroup_01.log: openmp/taskgroup_01$(EXEEXT) @p='openmp/taskgroup_01$(EXEEXT)'; \ b='openmp/taskgroup_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/taskgroup_02.log: openmp/taskgroup_02$(EXEEXT) @p='openmp/taskgroup_02$(EXEEXT)'; \ b='openmp/taskgroup_02'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/array_slice_01.log: openmp/array_slice_01$(EXEEXT) @p='openmp/array_slice_01$(EXEEXT)'; \ b='openmp/array_slice_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) openmp/cuda_task_01.log: openmp/cuda_task_01$(EXEEXT) @p='openmp/cuda_task_01$(EXEEXT)'; \ b='openmp/cuda_task_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/value_nan.log: perfmodels/value_nan$(EXEEXT) @p='perfmodels/value_nan$(EXEEXT)'; \ b='perfmodels/value_nan'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_policies/workerids.log: sched_policies/workerids$(EXEEXT) @p='sched_policies/workerids$(EXEEXT)'; \ b='sched_policies/workerids'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) energy/energy_efficiency.log: energy/energy_efficiency$(EXEEXT) @p='energy/energy_efficiency$(EXEEXT)'; \ b='energy/energy_efficiency'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/simgrid-locality.log: datawizard/simgrid-locality$(EXEEXT) @p='datawizard/simgrid-locality$(EXEEXT)'; \ b='datawizard/simgrid-locality'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/deprecated_func.log: main/deprecated_func$(EXEEXT) @p='main/deprecated_func$(EXEEXT)'; \ b='main/deprecated_func'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/driver_api/init_run_deinit.log: main/driver_api/init_run_deinit$(EXEEXT) @p='main/driver_api/init_run_deinit$(EXEEXT)'; \ b='main/driver_api/init_run_deinit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/driver_api/run_driver.log: main/driver_api/run_driver$(EXEEXT) @p='main/driver_api/run_driver$(EXEEXT)'; \ b='main/driver_api/run_driver'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/deploop.log: main/deploop$(EXEEXT) @p='main/deploop$(EXEEXT)'; \ b='main/deploop'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/display_binding.log: main/display_binding$(EXEEXT) @p='main/display_binding$(EXEEXT)'; \ b='main/display_binding'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/execute_on_a_specific_worker.log: main/execute_on_a_specific_worker$(EXEEXT) @p='main/execute_on_a_specific_worker$(EXEEXT)'; \ b='main/execute_on_a_specific_worker'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/insert_task.log: main/insert_task$(EXEEXT) @p='main/insert_task$(EXEEXT)'; \ b='main/insert_task'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/insert_task_value.log: main/insert_task_value$(EXEEXT) @p='main/insert_task_value$(EXEEXT)'; \ b='main/insert_task_value'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/insert_task_dyn_handles.log: main/insert_task_dyn_handles$(EXEEXT) @p='main/insert_task_dyn_handles$(EXEEXT)'; \ b='main/insert_task_dyn_handles'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/insert_task_array.log: main/insert_task_array$(EXEEXT) @p='main/insert_task_array$(EXEEXT)'; \ b='main/insert_task_array'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/insert_task_many.log: main/insert_task_many$(EXEEXT) @p='main/insert_task_many$(EXEEXT)'; \ b='main/insert_task_many'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/job.log: main/job$(EXEEXT) @p='main/job$(EXEEXT)'; \ b='main/job'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/multithreaded.log: main/multithreaded$(EXEEXT) @p='main/multithreaded$(EXEEXT)'; \ b='main/multithreaded'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/starpu_task_bundle.log: main/starpu_task_bundle$(EXEEXT) @p='main/starpu_task_bundle$(EXEEXT)'; \ b='main/starpu_task_bundle'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/starpu_task_wait_for_all.log: main/starpu_task_wait_for_all$(EXEEXT) @p='main/starpu_task_wait_for_all$(EXEEXT)'; \ b='main/starpu_task_wait_for_all'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/starpu_task_wait.log: main/starpu_task_wait$(EXEEXT) @p='main/starpu_task_wait$(EXEEXT)'; \ b='main/starpu_task_wait'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/static_restartable.log: main/static_restartable$(EXEEXT) @p='main/static_restartable$(EXEEXT)'; \ b='main/static_restartable'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/static_restartable_using_initializer.log: main/static_restartable_using_initializer$(EXEEXT) @p='main/static_restartable_using_initializer$(EXEEXT)'; \ b='main/static_restartable_using_initializer'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/static_restartable_tag.log: main/static_restartable_tag$(EXEEXT) @p='main/static_restartable_tag$(EXEEXT)'; \ b='main/static_restartable_tag'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/regenerate.log: main/regenerate$(EXEEXT) @p='main/regenerate$(EXEEXT)'; \ b='main/regenerate'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/regenerate_pipeline.log: main/regenerate_pipeline$(EXEEXT) @p='main/regenerate_pipeline$(EXEEXT)'; \ b='main/regenerate_pipeline'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/restart.log: main/restart$(EXEEXT) @p='main/restart$(EXEEXT)'; \ b='main/restart'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/wait_all_regenerable_tasks.log: main/wait_all_regenerable_tasks$(EXEEXT) @p='main/wait_all_regenerable_tasks$(EXEEXT)'; \ b='main/wait_all_regenerable_tasks'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/subgraph_repeat.log: main/subgraph_repeat$(EXEEXT) @p='main/subgraph_repeat$(EXEEXT)'; \ b='main/subgraph_repeat'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/subgraph_repeat_tag.log: main/subgraph_repeat_tag$(EXEEXT) @p='main/subgraph_repeat_tag$(EXEEXT)'; \ b='main/subgraph_repeat_tag'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/subgraph_repeat_regenerate.log: main/subgraph_repeat_regenerate$(EXEEXT) @p='main/subgraph_repeat_regenerate$(EXEEXT)'; \ b='main/subgraph_repeat_regenerate'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/subgraph_repeat_regenerate_tag.log: main/subgraph_repeat_regenerate_tag$(EXEEXT) @p='main/subgraph_repeat_regenerate_tag$(EXEEXT)'; \ b='main/subgraph_repeat_regenerate_tag'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/subgraph_repeat_regenerate_tag_cycle.log: main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT) @p='main/subgraph_repeat_regenerate_tag_cycle$(EXEEXT)'; \ b='main/subgraph_repeat_regenerate_tag_cycle'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/empty_task_sync_point.log: main/empty_task_sync_point$(EXEEXT) @p='main/empty_task_sync_point$(EXEEXT)'; \ b='main/empty_task_sync_point'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/empty_task_sync_point_tasks.log: main/empty_task_sync_point_tasks$(EXEEXT) @p='main/empty_task_sync_point_tasks$(EXEEXT)'; \ b='main/empty_task_sync_point_tasks'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/tag_wait_api.log: main/tag_wait_api$(EXEEXT) @p='main/tag_wait_api$(EXEEXT)'; \ b='main/tag_wait_api'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/tag_get_task.log: main/tag_get_task$(EXEEXT) @p='main/tag_get_task$(EXEEXT)'; \ b='main/tag_get_task'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/task_wait_api.log: main/task_wait_api$(EXEEXT) @p='main/task_wait_api$(EXEEXT)'; \ b='main/task_wait_api'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/declare_deps_in_callback.log: main/declare_deps_in_callback$(EXEEXT) @p='main/declare_deps_in_callback$(EXEEXT)'; \ b='main/declare_deps_in_callback'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/declare_deps_after_submission.log: main/declare_deps_after_submission$(EXEEXT) @p='main/declare_deps_after_submission$(EXEEXT)'; \ b='main/declare_deps_after_submission'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/declare_deps_after_submission_synchronous.log: main/declare_deps_after_submission_synchronous$(EXEEXT) @p='main/declare_deps_after_submission_synchronous$(EXEEXT)'; \ b='main/declare_deps_after_submission_synchronous'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/get_current_task.log: main/get_current_task$(EXEEXT) @p='main/get_current_task$(EXEEXT)'; \ b='main/get_current_task'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/starpu_init.log: main/starpu_init$(EXEEXT) @p='main/starpu_init$(EXEEXT)'; \ b='main/starpu_init'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/submit.log: main/submit$(EXEEXT) @p='main/submit$(EXEEXT)'; \ b='main/submit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/const_codelet.log: main/const_codelet$(EXEEXT) @p='main/const_codelet$(EXEEXT)'; \ b='main/const_codelet'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/pause_resume.log: main/pause_resume$(EXEEXT) @p='main/pause_resume$(EXEEXT)'; \ b='main/pause_resume'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/pack.log: main/pack$(EXEEXT) @p='main/pack$(EXEEXT)'; \ b='main/pack'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/get_children_tasks.log: main/get_children_tasks$(EXEEXT) @p='main/get_children_tasks$(EXEEXT)'; \ b='main/get_children_tasks'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/hwloc_cpuset.log: main/hwloc_cpuset$(EXEEXT) @p='main/hwloc_cpuset$(EXEEXT)'; \ b='main/hwloc_cpuset'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/task_end_dep.log: main/task_end_dep$(EXEEXT) @p='main/task_end_dep$(EXEEXT)'; \ b='main/task_end_dep'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/acquire_cb_insert.log: datawizard/acquire_cb_insert$(EXEEXT) @p='datawizard/acquire_cb_insert$(EXEEXT)'; \ b='datawizard/acquire_cb_insert'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/acquire_release.log: datawizard/acquire_release$(EXEEXT) @p='datawizard/acquire_release$(EXEEXT)'; \ b='datawizard/acquire_release'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/acquire_release2.log: datawizard/acquire_release2$(EXEEXT) @p='datawizard/acquire_release2$(EXEEXT)'; \ b='datawizard/acquire_release2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/acquire_release_to.log: datawizard/acquire_release_to$(EXEEXT) @p='datawizard/acquire_release_to$(EXEEXT)'; \ b='datawizard/acquire_release_to'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/acquire_try.log: datawizard/acquire_try$(EXEEXT) @p='datawizard/acquire_try$(EXEEXT)'; \ b='datawizard/acquire_try'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/bcsr.log: datawizard/bcsr$(EXEEXT) @p='datawizard/bcsr$(EXEEXT)'; \ b='datawizard/bcsr'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/cache.log: datawizard/cache$(EXEEXT) @p='datawizard/cache$(EXEEXT)'; \ b='datawizard/cache'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/commute.log: datawizard/commute$(EXEEXT) @p='datawizard/commute$(EXEEXT)'; \ b='datawizard/commute'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/commute2.log: datawizard/commute2$(EXEEXT) @p='datawizard/commute2$(EXEEXT)'; \ b='datawizard/commute2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/copy.log: datawizard/copy$(EXEEXT) @p='datawizard/copy$(EXEEXT)'; \ b='datawizard/copy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/data_implicit_deps.log: datawizard/data_implicit_deps$(EXEEXT) @p='datawizard/data_implicit_deps$(EXEEXT)'; \ b='datawizard/data_implicit_deps'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/data_register.log: datawizard/data_register$(EXEEXT) @p='datawizard/data_register$(EXEEXT)'; \ b='datawizard/data_register'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/scratch.log: datawizard/scratch$(EXEEXT) @p='datawizard/scratch$(EXEEXT)'; \ b='datawizard/scratch'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/scratch_reuse.log: datawizard/scratch_reuse$(EXEEXT) @p='datawizard/scratch_reuse$(EXEEXT)'; \ b='datawizard/scratch_reuse'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/sync_and_notify_data.log: datawizard/sync_and_notify_data$(EXEEXT) @p='datawizard/sync_and_notify_data$(EXEEXT)'; \ b='datawizard/sync_and_notify_data'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/sync_and_notify_data_implicit.log: datawizard/sync_and_notify_data_implicit$(EXEEXT) @p='datawizard/sync_and_notify_data_implicit$(EXEEXT)'; \ b='datawizard/sync_and_notify_data_implicit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/dsm_stress.log: datawizard/dsm_stress$(EXEEXT) @p='datawizard/dsm_stress$(EXEEXT)'; \ b='datawizard/dsm_stress'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/double_parameter.log: datawizard/double_parameter$(EXEEXT) @p='datawizard/double_parameter$(EXEEXT)'; \ b='datawizard/double_parameter'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/write_only_tmp_buffer.log: datawizard/write_only_tmp_buffer$(EXEEXT) @p='datawizard/write_only_tmp_buffer$(EXEEXT)'; \ b='datawizard/write_only_tmp_buffer'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/data_invalidation.log: datawizard/data_invalidation$(EXEEXT) @p='datawizard/data_invalidation$(EXEEXT)'; \ b='datawizard/data_invalidation'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/data_deinitialize.log: datawizard/data_deinitialize$(EXEEXT) @p='datawizard/data_deinitialize$(EXEEXT)'; \ b='datawizard/data_deinitialize'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/dining_philosophers.log: datawizard/dining_philosophers$(EXEEXT) @p='datawizard/dining_philosophers$(EXEEXT)'; \ b='datawizard/dining_philosophers'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/manual_reduction.log: datawizard/manual_reduction$(EXEEXT) @p='datawizard/manual_reduction$(EXEEXT)'; \ b='datawizard/manual_reduction'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/readers_and_writers.log: datawizard/readers_and_writers$(EXEEXT) @p='datawizard/readers_and_writers$(EXEEXT)'; \ b='datawizard/readers_and_writers'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/unpartition.log: datawizard/unpartition$(EXEEXT) @p='datawizard/unpartition$(EXEEXT)'; \ b='datawizard/unpartition'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/sync_with_data_with_mem.log: datawizard/sync_with_data_with_mem$(EXEEXT) @p='datawizard/sync_with_data_with_mem$(EXEEXT)'; \ b='datawizard/sync_with_data_with_mem'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/sync_with_data_with_mem_non_blocking.log: datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT) @p='datawizard/sync_with_data_with_mem_non_blocking$(EXEEXT)'; \ b='datawizard/sync_with_data_with_mem_non_blocking'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/sync_with_data_with_mem_non_blocking_implicit.log: datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT) @p='datawizard/sync_with_data_with_mem_non_blocking_implicit$(EXEEXT)'; \ b='datawizard/sync_with_data_with_mem_non_blocking_implicit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/mpi_like.log: datawizard/mpi_like$(EXEEXT) @p='datawizard/mpi_like$(EXEEXT)'; \ b='datawizard/mpi_like'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/mpi_like_async.log: datawizard/mpi_like_async$(EXEEXT) @p='datawizard/mpi_like_async$(EXEEXT)'; \ b='datawizard/mpi_like_async'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/critical_section_with_void_interface.log: datawizard/critical_section_with_void_interface$(EXEEXT) @p='datawizard/critical_section_with_void_interface$(EXEEXT)'; \ b='datawizard/critical_section_with_void_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/increment_init.log: datawizard/increment_init$(EXEEXT) @p='datawizard/increment_init$(EXEEXT)'; \ b='datawizard/increment_init'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/increment_redux.log: datawizard/increment_redux$(EXEEXT) @p='datawizard/increment_redux$(EXEEXT)'; \ b='datawizard/increment_redux'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/increment_redux_partition.log: datawizard/increment_redux_partition$(EXEEXT) @p='datawizard/increment_redux_partition$(EXEEXT)'; \ b='datawizard/increment_redux_partition'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/increment_redux_v2.log: datawizard/increment_redux_v2$(EXEEXT) @p='datawizard/increment_redux_v2$(EXEEXT)'; \ b='datawizard/increment_redux_v2'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/increment_redux_with_args.log: datawizard/increment_redux_with_args$(EXEEXT) @p='datawizard/increment_redux_with_args$(EXEEXT)'; \ b='datawizard/increment_redux_with_args'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/increment_redux_lazy.log: datawizard/increment_redux_lazy$(EXEEXT) @p='datawizard/increment_redux_lazy$(EXEEXT)'; \ b='datawizard/increment_redux_lazy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/handle_to_pointer.log: datawizard/handle_to_pointer$(EXEEXT) @p='datawizard/handle_to_pointer$(EXEEXT)'; \ b='datawizard/handle_to_pointer'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/lazy_allocation.log: datawizard/lazy_allocation$(EXEEXT) @p='datawizard/lazy_allocation$(EXEEXT)'; \ b='datawizard/lazy_allocation'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/no_unregister.log: datawizard/no_unregister$(EXEEXT) @p='datawizard/no_unregister$(EXEEXT)'; \ b='datawizard/no_unregister'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/noreclaim.log: datawizard/noreclaim$(EXEEXT) @p='datawizard/noreclaim$(EXEEXT)'; \ b='datawizard/noreclaim'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/nowhere.log: datawizard/nowhere$(EXEEXT) @p='datawizard/nowhere$(EXEEXT)'; \ b='datawizard/nowhere'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/block/block_interface.log: datawizard/interfaces/block/block_interface$(EXEEXT) @p='datawizard/interfaces/block/block_interface$(EXEEXT)'; \ b='datawizard/interfaces/block/block_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/bcsr/bcsr_interface.log: datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT) @p='datawizard/interfaces/bcsr/bcsr_interface$(EXEEXT)'; \ b='datawizard/interfaces/bcsr/bcsr_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/coo/coo_interface.log: datawizard/interfaces/coo/coo_interface$(EXEEXT) @p='datawizard/interfaces/coo/coo_interface$(EXEEXT)'; \ b='datawizard/interfaces/coo/coo_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/csr/csr_interface.log: datawizard/interfaces/csr/csr_interface$(EXEEXT) @p='datawizard/interfaces/csr/csr_interface$(EXEEXT)'; \ b='datawizard/interfaces/csr/csr_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/matrix/matrix_interface.log: datawizard/interfaces/matrix/matrix_interface$(EXEEXT) @p='datawizard/interfaces/matrix/matrix_interface$(EXEEXT)'; \ b='datawizard/interfaces/matrix/matrix_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/multiformat/multiformat_interface.log: datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT) @p='datawizard/interfaces/multiformat/multiformat_interface$(EXEEXT)'; \ b='datawizard/interfaces/multiformat/multiformat_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.log: datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT) @p='datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl$(EXEEXT)'; \ b='datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/multiformat/advanced/multiformat_data_release.log: datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT) @p='datawizard/interfaces/multiformat/advanced/multiformat_data_release$(EXEEXT)'; \ b='datawizard/interfaces/multiformat/advanced/multiformat_data_release'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/multiformat/advanced/multiformat_worker.log: datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT) @p='datawizard/interfaces/multiformat/advanced/multiformat_worker$(EXEEXT)'; \ b='datawizard/interfaces/multiformat/advanced/multiformat_worker'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.log: datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT) @p='datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion$(EXEEXT)'; \ b='datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/multiformat/advanced/same_handle.log: datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT) @p='datawizard/interfaces/multiformat/advanced/same_handle$(EXEEXT)'; \ b='datawizard/interfaces/multiformat/advanced/same_handle'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/tensor/tensor_interface.log: datawizard/interfaces/tensor/tensor_interface$(EXEEXT) @p='datawizard/interfaces/tensor/tensor_interface$(EXEEXT)'; \ b='datawizard/interfaces/tensor/tensor_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/ndim/ndim_interface.log: datawizard/interfaces/ndim/ndim_interface$(EXEEXT) @p='datawizard/interfaces/ndim/ndim_interface$(EXEEXT)'; \ b='datawizard/interfaces/ndim/ndim_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/variable/variable_interface.log: datawizard/interfaces/variable/variable_interface$(EXEEXT) @p='datawizard/interfaces/variable/variable_interface$(EXEEXT)'; \ b='datawizard/interfaces/variable/variable_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/vector/vector_interface.log: datawizard/interfaces/vector/vector_interface$(EXEEXT) @p='datawizard/interfaces/vector/vector_interface$(EXEEXT)'; \ b='datawizard/interfaces/vector/vector_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/interfaces/void/void_interface.log: datawizard/interfaces/void/void_interface$(EXEEXT) @p='datawizard/interfaces/void/void_interface$(EXEEXT)'; \ b='datawizard/interfaces/void/void_interface'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/in_place_partition.log: datawizard/in_place_partition$(EXEEXT) @p='datawizard/in_place_partition$(EXEEXT)'; \ b='datawizard/in_place_partition'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/partition_dep.log: datawizard/partition_dep$(EXEEXT) @p='datawizard/partition_dep$(EXEEXT)'; \ b='datawizard/partition_dep'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/partition_lazy.log: datawizard/partition_lazy$(EXEEXT) @p='datawizard/partition_lazy$(EXEEXT)'; \ b='datawizard/partition_lazy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/partition_init.log: datawizard/partition_init$(EXEEXT) @p='datawizard/partition_init$(EXEEXT)'; \ b='datawizard/partition_init'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/partition_wontuse.log: datawizard/partition_wontuse$(EXEEXT) @p='datawizard/partition_wontuse$(EXEEXT)'; \ b='datawizard/partition_wontuse'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/gpu_register.log: datawizard/gpu_register$(EXEEXT) @p='datawizard/gpu_register$(EXEEXT)'; \ b='datawizard/gpu_register'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/gpu_ptr_register.log: datawizard/gpu_ptr_register$(EXEEXT) @p='datawizard/gpu_ptr_register$(EXEEXT)'; \ b='datawizard/gpu_ptr_register'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/variable_parameters.log: datawizard/variable_parameters$(EXEEXT) @p='datawizard/variable_parameters$(EXEEXT)'; \ b='datawizard/variable_parameters'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/wt_host.log: datawizard/wt_host$(EXEEXT) @p='datawizard/wt_host$(EXEEXT)'; \ b='datawizard/wt_host'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/wt_broadcast.log: datawizard/wt_broadcast$(EXEEXT) @p='datawizard/wt_broadcast$(EXEEXT)'; \ b='datawizard/wt_broadcast'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/readonly.log: datawizard/readonly$(EXEEXT) @p='datawizard/readonly$(EXEEXT)'; \ b='datawizard/readonly'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/specific_node.log: datawizard/specific_node$(EXEEXT) @p='datawizard/specific_node$(EXEEXT)'; \ b='datawizard/specific_node'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/specific_node_same.log: datawizard/specific_node_same$(EXEEXT) @p='datawizard/specific_node_same$(EXEEXT)'; \ b='datawizard/specific_node_same'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/task_with_multiple_time_the_same_handle.log: datawizard/task_with_multiple_time_the_same_handle$(EXEEXT) @p='datawizard/task_with_multiple_time_the_same_handle$(EXEEXT)'; \ b='datawizard/task_with_multiple_time_the_same_handle'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/test_arbiter.log: datawizard/test_arbiter$(EXEEXT) @p='datawizard/test_arbiter$(EXEEXT)'; \ b='datawizard/test_arbiter'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/invalidate_pending_requests.log: datawizard/invalidate_pending_requests$(EXEEXT) @p='datawizard/invalidate_pending_requests$(EXEEXT)'; \ b='datawizard/invalidate_pending_requests'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/deinitialize_pending_requests.log: datawizard/deinitialize_pending_requests$(EXEEXT) @p='datawizard/deinitialize_pending_requests$(EXEEXT)'; \ b='datawizard/deinitialize_pending_requests'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/temporary_partition.log: datawizard/temporary_partition$(EXEEXT) @p='datawizard/temporary_partition$(EXEEXT)'; \ b='datawizard/temporary_partition'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/partitioned_initialization.log: datawizard/partitioned_initialization$(EXEEXT) @p='datawizard/partitioned_initialization$(EXEEXT)'; \ b='datawizard/partitioned_initialization'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/partitioned_acquire.log: datawizard/partitioned_acquire$(EXEEXT) @p='datawizard/partitioned_acquire$(EXEEXT)'; \ b='datawizard/partitioned_acquire'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/temporary_partition_implicit.log: datawizard/temporary_partition_implicit$(EXEEXT) @p='datawizard/temporary_partition_implicit$(EXEEXT)'; \ b='datawizard/temporary_partition_implicit'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/temporary_partition_read.log: datawizard/temporary_partition_read$(EXEEXT) @p='datawizard/temporary_partition_read$(EXEEXT)'; \ b='datawizard/temporary_partition_read'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/redux_acquire.log: datawizard/redux_acquire$(EXEEXT) @p='datawizard/redux_acquire$(EXEEXT)'; \ b='datawizard/redux_acquire'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) disk/disk_copy.log: disk/disk_copy$(EXEEXT) @p='disk/disk_copy$(EXEEXT)'; \ b='disk/disk_copy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) disk/disk_copy_unpack.log: disk/disk_copy_unpack$(EXEEXT) @p='disk/disk_copy_unpack$(EXEEXT)'; \ b='disk/disk_copy_unpack'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) disk/disk_copy_to_disk.log: disk/disk_copy_to_disk$(EXEEXT) @p='disk/disk_copy_to_disk$(EXEEXT)'; \ b='disk/disk_copy_to_disk'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) disk/disk_compute.log: disk/disk_compute$(EXEEXT) @p='disk/disk_compute$(EXEEXT)'; \ b='disk/disk_compute'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) disk/disk_pack.log: disk/disk_pack$(EXEEXT) @p='disk/disk_pack$(EXEEXT)'; \ b='disk/disk_pack'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) disk/mem_reclaim.log: disk/mem_reclaim$(EXEEXT) @p='disk/mem_reclaim$(EXEEXT)'; \ b='disk/mem_reclaim'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) errorcheck/invalid_blocking_calls.log: errorcheck/invalid_blocking_calls$(EXEEXT) @p='errorcheck/invalid_blocking_calls$(EXEEXT)'; \ b='errorcheck/invalid_blocking_calls'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) errorcheck/workers_cpuid.log: errorcheck/workers_cpuid$(EXEEXT) @p='errorcheck/workers_cpuid$(EXEEXT)'; \ b='errorcheck/workers_cpuid'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) fault-tolerance/retry.log: fault-tolerance/retry$(EXEEXT) @p='fault-tolerance/retry$(EXEEXT)'; \ b='fault-tolerance/retry'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) helper/starpu_data_cpy.log: helper/starpu_data_cpy$(EXEEXT) @p='helper/starpu_data_cpy$(EXEEXT)'; \ b='helper/starpu_data_cpy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) helper/starpu_data_dup_ro.log: helper/starpu_data_dup_ro$(EXEEXT) @p='helper/starpu_data_dup_ro$(EXEEXT)'; \ b='helper/starpu_data_dup_ro'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) helper/starpu_create_sync_task.log: helper/starpu_create_sync_task$(EXEEXT) @p='helper/starpu_create_sync_task$(EXEEXT)'; \ b='helper/starpu_create_sync_task'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/async_tasks_overhead.log: microbenchs/async_tasks_overhead$(EXEEXT) @p='microbenchs/async_tasks_overhead$(EXEEXT)'; \ b='microbenchs/async_tasks_overhead'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/sync_tasks_overhead.log: microbenchs/sync_tasks_overhead$(EXEEXT) @p='microbenchs/sync_tasks_overhead$(EXEEXT)'; \ b='microbenchs/sync_tasks_overhead'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/tasks_overhead.log: microbenchs/tasks_overhead$(EXEEXT) @p='microbenchs/tasks_overhead$(EXEEXT)'; \ b='microbenchs/tasks_overhead'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/tasks_size_overhead.log: microbenchs/tasks_size_overhead$(EXEEXT) @p='microbenchs/tasks_size_overhead$(EXEEXT)'; \ b='microbenchs/tasks_size_overhead'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/prefetch_data_on_node.log: microbenchs/prefetch_data_on_node$(EXEEXT) @p='microbenchs/prefetch_data_on_node$(EXEEXT)'; \ b='microbenchs/prefetch_data_on_node'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/redundant_buffer.log: microbenchs/redundant_buffer$(EXEEXT) @p='microbenchs/redundant_buffer$(EXEEXT)'; \ b='microbenchs/redundant_buffer'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/matrix_as_vector.log: microbenchs/matrix_as_vector$(EXEEXT) @p='microbenchs/matrix_as_vector$(EXEEXT)'; \ b='microbenchs/matrix_as_vector'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) microbenchs/bandwidth.log: microbenchs/bandwidth$(EXEEXT) @p='microbenchs/bandwidth$(EXEEXT)'; \ b='microbenchs/bandwidth'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) overlap/gpu_concurrency.log: overlap/gpu_concurrency$(EXEEXT) @p='overlap/gpu_concurrency$(EXEEXT)'; \ b='overlap/gpu_concurrency'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_tasks/combined_worker_assign_workerid.log: parallel_tasks/combined_worker_assign_workerid$(EXEEXT) @p='parallel_tasks/combined_worker_assign_workerid$(EXEEXT)'; \ b='parallel_tasks/combined_worker_assign_workerid'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_tasks/explicit_combined_worker.log: parallel_tasks/explicit_combined_worker$(EXEEXT) @p='parallel_tasks/explicit_combined_worker$(EXEEXT)'; \ b='parallel_tasks/explicit_combined_worker'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_tasks/parallel_kernels.log: parallel_tasks/parallel_kernels$(EXEEXT) @p='parallel_tasks/parallel_kernels$(EXEEXT)'; \ b='parallel_tasks/parallel_kernels'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_tasks/parallel_kernels_trivial.log: parallel_tasks/parallel_kernels_trivial$(EXEEXT) @p='parallel_tasks/parallel_kernels_trivial$(EXEEXT)'; \ b='parallel_tasks/parallel_kernels_trivial'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_tasks/parallel_kernels_spmd.log: parallel_tasks/parallel_kernels_spmd$(EXEEXT) @p='parallel_tasks/parallel_kernels_spmd$(EXEEXT)'; \ b='parallel_tasks/parallel_kernels_spmd'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_tasks/spmd_peager.log: parallel_tasks/spmd_peager$(EXEEXT) @p='parallel_tasks/spmd_peager$(EXEEXT)'; \ b='parallel_tasks/spmd_peager'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) parallel_tasks/cuda_only.log: parallel_tasks/cuda_only$(EXEEXT) @p='parallel_tasks/cuda_only$(EXEEXT)'; \ b='parallel_tasks/cuda_only'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/regression_based_memset.log: perfmodels/regression_based_memset$(EXEEXT) @p='perfmodels/regression_based_memset$(EXEEXT)'; \ b='perfmodels/regression_based_memset'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/regression_based_check.log: perfmodels/regression_based_check$(EXEEXT) @p='perfmodels/regression_based_check$(EXEEXT)'; \ b='perfmodels/regression_based_check'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/regression_based_multiimpl.log: perfmodels/regression_based_multiimpl$(EXEEXT) @p='perfmodels/regression_based_multiimpl$(EXEEXT)'; \ b='perfmodels/regression_based_multiimpl'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/regression_based_energy.log: perfmodels/regression_based_energy$(EXEEXT) @p='perfmodels/regression_based_energy$(EXEEXT)'; \ b='perfmodels/regression_based_energy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/regression_based_gpu.log: perfmodels/regression_based_gpu$(EXEEXT) @p='perfmodels/regression_based_gpu$(EXEEXT)'; \ b='perfmodels/regression_based_gpu'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/non_linear_regression_based.log: perfmodels/non_linear_regression_based$(EXEEXT) @p='perfmodels/non_linear_regression_based$(EXEEXT)'; \ b='perfmodels/non_linear_regression_based'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/feed.log: perfmodels/feed$(EXEEXT) @p='perfmodels/feed$(EXEEXT)'; \ b='perfmodels/feed'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/user_base.log: perfmodels/user_base$(EXEEXT) @p='perfmodels/user_base$(EXEEXT)'; \ b='perfmodels/user_base'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/valid_model.log: perfmodels/valid_model$(EXEEXT) @p='perfmodels/valid_model$(EXEEXT)'; \ b='perfmodels/valid_model'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/path.log: perfmodels/path$(EXEEXT) @p='perfmodels/path$(EXEEXT)'; \ b='perfmodels/path'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) perfmodels/memory.log: perfmodels/memory$(EXEEXT) @p='perfmodels/memory$(EXEEXT)'; \ b='perfmodels/memory'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_policies/data_locality.log: sched_policies/data_locality$(EXEEXT) @p='sched_policies/data_locality$(EXEEXT)'; \ b='sched_policies/data_locality'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_policies/execute_all_tasks.log: sched_policies/execute_all_tasks$(EXEEXT) @p='sched_policies/execute_all_tasks$(EXEEXT)'; \ b='sched_policies/execute_all_tasks'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_policies/prio.log: sched_policies/prio$(EXEEXT) @p='sched_policies/prio$(EXEEXT)'; \ b='sched_policies/prio'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_policies/simple_deps.log: sched_policies/simple_deps$(EXEEXT) @p='sched_policies/simple_deps$(EXEEXT)'; \ b='sched_policies/simple_deps'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_policies/simple_cpu_gpu_sched.log: sched_policies/simple_cpu_gpu_sched$(EXEEXT) @p='sched_policies/simple_cpu_gpu_sched$(EXEEXT)'; \ b='sched_policies/simple_cpu_gpu_sched'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) sched_ctx/sched_ctx_hierarchy.log: sched_ctx/sched_ctx_hierarchy$(EXEEXT) @p='sched_ctx/sched_ctx_hierarchy$(EXEEXT)'; \ b='sched_ctx/sched_ctx_hierarchy'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) maxfpga/max_fpga_basic_static.log: maxfpga/max_fpga_basic_static$(EXEEXT) @p='maxfpga/max_fpga_basic_static$(EXEEXT)'; \ b='maxfpga/max_fpga_basic_static'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) maxfpga/max_fpga_advanced_static.log: maxfpga/max_fpga_advanced_static$(EXEEXT) @p='maxfpga/max_fpga_advanced_static$(EXEEXT)'; \ b='maxfpga/max_fpga_advanced_static'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) maxfpga/max_fpga_dynamic.log: maxfpga/max_fpga_dynamic$(EXEEXT) @p='maxfpga/max_fpga_dynamic$(EXEEXT)'; \ b='maxfpga/max_fpga_dynamic'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) maxfpga/max_fpga_mux.log: maxfpga/max_fpga_mux$(EXEEXT) @p='maxfpga/max_fpga_mux$(EXEEXT)'; \ b='maxfpga/max_fpga_mux'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) fortran90/init_01.log: fortran90/init_01$(EXEEXT) @p='fortran90/init_01$(EXEEXT)'; \ b='fortran90/init_01'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) main/tag_task_data_deps.log: main/tag_task_data_deps$(EXEEXT) @p='main/tag_task_data_deps$(EXEEXT)'; \ b='main/tag_task_data_deps'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) datawizard/reclaim.log: datawizard/reclaim$(EXEEXT) @p='datawizard/reclaim$(EXEEXT)'; \ b='datawizard/reclaim'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-recursive all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(DATA) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(examplebindir)" "$(DESTDIR)$(examplebindir)" "$(DESTDIR)$(STARPU_OPENCL_DATAdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-recursive install-exec: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -rm -f datawizard/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/$(am__dirstamp) -rm -f datawizard/interfaces/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/$(am__dirstamp) -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/bcsr/$(am__dirstamp) -rm -f datawizard/interfaces/block/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/block/$(am__dirstamp) -rm -f datawizard/interfaces/coo/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/coo/$(am__dirstamp) -rm -f datawizard/interfaces/csr/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/csr/$(am__dirstamp) -rm -f datawizard/interfaces/matrix/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/matrix/$(am__dirstamp) -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/multiformat/$(am__dirstamp) -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/multiformat/advanced/$(am__dirstamp) -rm -f datawizard/interfaces/ndim/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/ndim/$(am__dirstamp) -rm -f datawizard/interfaces/tensor/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/tensor/$(am__dirstamp) -rm -f datawizard/interfaces/variable/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/variable/$(am__dirstamp) -rm -f datawizard/interfaces/vector/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/vector/$(am__dirstamp) -rm -f datawizard/interfaces/void/$(DEPDIR)/$(am__dirstamp) -rm -f datawizard/interfaces/void/$(am__dirstamp) -rm -f disk/$(DEPDIR)/$(am__dirstamp) -rm -f disk/$(am__dirstamp) -rm -f energy/$(DEPDIR)/$(am__dirstamp) -rm -f energy/$(am__dirstamp) -rm -f errorcheck/$(DEPDIR)/$(am__dirstamp) -rm -f errorcheck/$(am__dirstamp) -rm -f fault-tolerance/$(DEPDIR)/$(am__dirstamp) -rm -f fault-tolerance/$(am__dirstamp) -rm -f fortran90/$(DEPDIR)/$(am__dirstamp) -rm -f fortran90/$(am__dirstamp) -rm -f helper/$(DEPDIR)/$(am__dirstamp) -rm -f helper/$(am__dirstamp) -rm -f main/$(DEPDIR)/$(am__dirstamp) -rm -f main/$(am__dirstamp) -rm -f main/driver_api/$(DEPDIR)/$(am__dirstamp) -rm -f main/driver_api/$(am__dirstamp) -rm -f maxfpga/$(DEPDIR)/$(am__dirstamp) -rm -f maxfpga/$(am__dirstamp) -rm -f microbenchs/$(DEPDIR)/$(am__dirstamp) -rm -f microbenchs/$(am__dirstamp) -rm -f openmp/$(DEPDIR)/$(am__dirstamp) -rm -f openmp/$(am__dirstamp) -rm -f overlap/$(DEPDIR)/$(am__dirstamp) -rm -f overlap/$(am__dirstamp) -rm -f parallel_tasks/$(DEPDIR)/$(am__dirstamp) -rm -f parallel_tasks/$(am__dirstamp) -rm -f perfmodels/$(DEPDIR)/$(am__dirstamp) -rm -f perfmodels/$(am__dirstamp) -rm -f sched_ctx/$(DEPDIR)/$(am__dirstamp) -rm -f sched_ctx/$(am__dirstamp) -rm -f sched_policies/$(DEPDIR)/$(am__dirstamp) -rm -f sched_policies/$(am__dirstamp) -rm -f variable/$(DEPDIR)/$(am__dirstamp) -rm -f variable/$(am__dirstamp) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) clean: clean-recursive clean-am: clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-local clean-noinstPROGRAMS mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f datawizard/$(DEPDIR)/acquire_cb.Po -rm -f datawizard/$(DEPDIR)/acquire_cb_insert.Po -rm -f datawizard/$(DEPDIR)/acquire_release.Po -rm -f datawizard/$(DEPDIR)/acquire_release2.Po -rm -f datawizard/$(DEPDIR)/acquire_release_to.Po -rm -f datawizard/$(DEPDIR)/acquire_try.Po -rm -f datawizard/$(DEPDIR)/allocate.Po -rm -f datawizard/$(DEPDIR)/allocate_many_numa_nodes.Po -rm -f datawizard/$(DEPDIR)/bcsr.Po -rm -f datawizard/$(DEPDIR)/cache.Po -rm -f datawizard/$(DEPDIR)/commute.Po -rm -f datawizard/$(DEPDIR)/commute2.Po -rm -f datawizard/$(DEPDIR)/copy.Po -rm -f datawizard/$(DEPDIR)/critical_section_with_void_interface.Po -rm -f datawizard/$(DEPDIR)/data_deinitialize.Po -rm -f datawizard/$(DEPDIR)/data_implicit_deps.Po -rm -f datawizard/$(DEPDIR)/data_invalidation.Po -rm -f datawizard/$(DEPDIR)/data_register-data_register.Po -rm -f datawizard/$(DEPDIR)/deinitialize_pending_requests.Po -rm -f datawizard/$(DEPDIR)/deps.Po -rm -f datawizard/$(DEPDIR)/dining_philosophers.Po -rm -f datawizard/$(DEPDIR)/double_parameter.Po -rm -f datawizard/$(DEPDIR)/dsm_stress.Po -rm -f datawizard/$(DEPDIR)/gpu_ptr_register.Po -rm -f datawizard/$(DEPDIR)/gpu_register.Po -rm -f datawizard/$(DEPDIR)/handle_to_pointer.Po -rm -f datawizard/$(DEPDIR)/in_place_partition.Po -rm -f datawizard/$(DEPDIR)/increment_init.Po -rm -f datawizard/$(DEPDIR)/increment_redux.Po -rm -f datawizard/$(DEPDIR)/increment_redux_lazy.Po -rm -f datawizard/$(DEPDIR)/increment_redux_partition.Po -rm -f datawizard/$(DEPDIR)/increment_redux_v2.Po -rm -f datawizard/$(DEPDIR)/increment_redux_with_args.Po -rm -f datawizard/$(DEPDIR)/invalidate_pending_requests.Po -rm -f datawizard/$(DEPDIR)/lazy_allocation.Po -rm -f datawizard/$(DEPDIR)/locality.Po -rm -f datawizard/$(DEPDIR)/manual_reduction.Po -rm -f datawizard/$(DEPDIR)/mpi_like.Po -rm -f datawizard/$(DEPDIR)/mpi_like_async.Po -rm -f datawizard/$(DEPDIR)/no_unregister.Po -rm -f datawizard/$(DEPDIR)/noreclaim.Po -rm -f datawizard/$(DEPDIR)/nowhere.Po -rm -f datawizard/$(DEPDIR)/numa_overflow.Po -rm -f datawizard/$(DEPDIR)/partition_dep.Po -rm -f datawizard/$(DEPDIR)/partition_init.Po -rm -f datawizard/$(DEPDIR)/partition_lazy.Po -rm -f datawizard/$(DEPDIR)/partition_wontuse.Po -rm -f datawizard/$(DEPDIR)/partitioned_acquire.Po -rm -f datawizard/$(DEPDIR)/partitioned_initialization.Po -rm -f datawizard/$(DEPDIR)/readers_and_writers.Po -rm -f datawizard/$(DEPDIR)/readonly.Po -rm -f datawizard/$(DEPDIR)/reclaim.Po -rm -f datawizard/$(DEPDIR)/redux_acquire.Po -rm -f datawizard/$(DEPDIR)/scal.Po -rm -f datawizard/$(DEPDIR)/scratch.Po -rm -f datawizard/$(DEPDIR)/scratch_opencl.Po -rm -f datawizard/$(DEPDIR)/scratch_reuse.Po -rm -f datawizard/$(DEPDIR)/simgrid-locality.Po -rm -f datawizard/$(DEPDIR)/specific_node.Po -rm -f datawizard/$(DEPDIR)/specific_node_same.Po -rm -f datawizard/$(DEPDIR)/sync_and_notify_data.Po -rm -f datawizard/$(DEPDIR)/sync_and_notify_data_implicit.Po -rm -f datawizard/$(DEPDIR)/sync_and_notify_data_opencl.Po -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem.Po -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking.Po -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking_implicit.Po -rm -f datawizard/$(DEPDIR)/task_with_multiple_time_the_same_handle.Po -rm -f datawizard/$(DEPDIR)/temporary_partition.Po -rm -f datawizard/$(DEPDIR)/temporary_partition_implicit.Po -rm -f datawizard/$(DEPDIR)/temporary_partition_read.Po -rm -f datawizard/$(DEPDIR)/test_arbiter.Po -rm -f datawizard/$(DEPDIR)/unpartition.Po -rm -f datawizard/$(DEPDIR)/user_interaction_implicit.Po -rm -f datawizard/$(DEPDIR)/variable_parameters.Po -rm -f datawizard/$(DEPDIR)/variable_size.Po -rm -f datawizard/$(DEPDIR)/write_only_tmp_buffer.Po -rm -f datawizard/$(DEPDIR)/wt_broadcast.Po -rm -f datawizard/$(DEPDIR)/wt_host.Po -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po -rm -f datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po -rm -f datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po -rm -f datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po -rm -f datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po -rm -f datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po -rm -f datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po -rm -f datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po -rm -f datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/generic.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_cuda_opencl.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_data_release.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_handle_conversion.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_worker.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/same_handle.Po -rm -f datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po -rm -f datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po -rm -f datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po -rm -f datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po -rm -f datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po -rm -f datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po -rm -f datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po -rm -f datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po -rm -f datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po -rm -f disk/$(DEPDIR)/disk_compute.Po -rm -f disk/$(DEPDIR)/disk_copy.Po -rm -f disk/$(DEPDIR)/disk_copy_to_disk.Po -rm -f disk/$(DEPDIR)/disk_copy_unpack.Po -rm -f disk/$(DEPDIR)/disk_pack.Po -rm -f disk/$(DEPDIR)/mem_reclaim.Po -rm -f energy/$(DEPDIR)/energy_efficiency.Po -rm -f errorcheck/$(DEPDIR)/invalid_blocking_calls.Po -rm -f errorcheck/$(DEPDIR)/invalid_tasks.Po -rm -f errorcheck/$(DEPDIR)/starpu_init_noworker.Po -rm -f errorcheck/$(DEPDIR)/workers_cpuid.Po -rm -f fault-tolerance/$(DEPDIR)/retry.Po -rm -f helper/$(DEPDIR)/cublasLt_init.Po -rm -f helper/$(DEPDIR)/cublas_init.Po -rm -f helper/$(DEPDIR)/cusparse_init.Po -rm -f helper/$(DEPDIR)/execute_on_all.Po -rm -f helper/$(DEPDIR)/hipblas_init.Po -rm -f helper/$(DEPDIR)/pinned_memory.Po -rm -f helper/$(DEPDIR)/starpu_create_sync_task.Po -rm -f helper/$(DEPDIR)/starpu_data_cpy.Po -rm -f helper/$(DEPDIR)/starpu_data_dup_ro.Po -rm -f main/$(DEPDIR)/bind.Po -rm -f main/$(DEPDIR)/callback.Po -rm -f main/$(DEPDIR)/codelet_null_callback.Po -rm -f main/$(DEPDIR)/const_codelet.Po -rm -f main/$(DEPDIR)/deadlock.Po -rm -f main/$(DEPDIR)/declare_deps_after_submission.Po -rm -f main/$(DEPDIR)/declare_deps_after_submission_synchronous.Po -rm -f main/$(DEPDIR)/declare_deps_in_callback.Po -rm -f main/$(DEPDIR)/deploop.Po -rm -f main/$(DEPDIR)/deprecated_func-deprecated_func.Po -rm -f main/$(DEPDIR)/display_binding.Po -rm -f main/$(DEPDIR)/empty_task.Po -rm -f main/$(DEPDIR)/empty_task_chain.Po -rm -f main/$(DEPDIR)/empty_task_sync_point.Po -rm -f main/$(DEPDIR)/empty_task_sync_point_tasks.Po -rm -f main/$(DEPDIR)/execute_on_a_specific_worker.Po -rm -f main/$(DEPDIR)/execute_schedule.Po -rm -f main/$(DEPDIR)/get_children_tasks.Po -rm -f main/$(DEPDIR)/get_current_task.Po -rm -f main/$(DEPDIR)/hwloc_cpuset.Po -rm -f main/$(DEPDIR)/insert_task.Po -rm -f main/$(DEPDIR)/insert_task_array.Po -rm -f main/$(DEPDIR)/insert_task_dyn_handles.Po -rm -f main/$(DEPDIR)/insert_task_many.Po -rm -f main/$(DEPDIR)/insert_task_nullcodelet.Po -rm -f main/$(DEPDIR)/insert_task_pack.Po -rm -f main/$(DEPDIR)/insert_task_value.Po -rm -f main/$(DEPDIR)/insert_task_where.Po -rm -f main/$(DEPDIR)/job.Po -rm -f main/$(DEPDIR)/mkdtemp.Po -rm -f main/$(DEPDIR)/multithreaded.Po -rm -f main/$(DEPDIR)/multithreaded_init.Po -rm -f main/$(DEPDIR)/pack.Po -rm -f main/$(DEPDIR)/pause_resume.Po -rm -f main/$(DEPDIR)/regenerate.Po -rm -f main/$(DEPDIR)/regenerate_pipeline.Po -rm -f main/$(DEPDIR)/restart.Po -rm -f main/$(DEPDIR)/starpu_init.Po -rm -f main/$(DEPDIR)/starpu_task_bundle.Po -rm -f main/$(DEPDIR)/starpu_task_wait.Po -rm -f main/$(DEPDIR)/starpu_task_wait_for_all.Po -rm -f main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po -rm -f main/$(DEPDIR)/static_restartable.Po -rm -f main/$(DEPDIR)/static_restartable_tag.Po -rm -f main/$(DEPDIR)/static_restartable_using_initializer.Po -rm -f main/$(DEPDIR)/subgraph_repeat.Po -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate.Po -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate_tag.Po -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate_tag_cycle.Po -rm -f main/$(DEPDIR)/subgraph_repeat_tag.Po -rm -f main/$(DEPDIR)/submit.Po -rm -f main/$(DEPDIR)/tag_get_task.Po -rm -f main/$(DEPDIR)/tag_task_data_deps.Po -rm -f main/$(DEPDIR)/tag_wait_api.Po -rm -f main/$(DEPDIR)/task_end_dep.Po -rm -f main/$(DEPDIR)/task_wait_api.Po -rm -f main/$(DEPDIR)/wait_all_regenerable_tasks.Po -rm -f main/driver_api/$(DEPDIR)/init_run_deinit.Po -rm -f main/driver_api/$(DEPDIR)/run_driver.Po -rm -f maxfpga/$(DEPDIR)/max_fpga_advanced_static.Po -rm -f maxfpga/$(DEPDIR)/max_fpga_basic_static.Po -rm -f maxfpga/$(DEPDIR)/max_fpga_dynamic.Po -rm -f maxfpga/$(DEPDIR)/max_fpga_mux.Po -rm -f microbenchs/$(DEPDIR)/async_tasks_overhead.Po -rm -f microbenchs/$(DEPDIR)/bandwidth.Po -rm -f microbenchs/$(DEPDIR)/display_structures_size.Po -rm -f microbenchs/$(DEPDIR)/local_pingpong.Po -rm -f microbenchs/$(DEPDIR)/matrix_as_vector.Po -rm -f microbenchs/$(DEPDIR)/parallel_dependent_homogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks.Po -rm -f microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks.Po -rm -f microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/parallel_redux_heterogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/parallel_redux_homogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/prefetch_data_on_node.Po -rm -f microbenchs/$(DEPDIR)/redundant_buffer.Po -rm -f microbenchs/$(DEPDIR)/sync_tasks_overhead.Po -rm -f microbenchs/$(DEPDIR)/tasks_overhead.Po -rm -f microbenchs/$(DEPDIR)/tasks_size_overhead.Po -rm -f openmp/$(DEPDIR)/api_01.Po -rm -f openmp/$(DEPDIR)/array_slice_01.Po -rm -f openmp/$(DEPDIR)/cuda_task_01.Po -rm -f openmp/$(DEPDIR)/environment.Po -rm -f openmp/$(DEPDIR)/init_exit_01.Po -rm -f openmp/$(DEPDIR)/init_exit_02.Po -rm -f openmp/$(DEPDIR)/parallel_01.Po -rm -f openmp/$(DEPDIR)/parallel_02.Po -rm -f openmp/$(DEPDIR)/parallel_03.Po -rm -f openmp/$(DEPDIR)/parallel_barrier_01.Po -rm -f openmp/$(DEPDIR)/parallel_critical_01.Po -rm -f openmp/$(DEPDIR)/parallel_critical_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_critical_named_01.Po -rm -f openmp/$(DEPDIR)/parallel_critical_named_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_for_01.Po -rm -f openmp/$(DEPDIR)/parallel_for_02.Po -rm -f openmp/$(DEPDIR)/parallel_for_ordered_01.Po -rm -f openmp/$(DEPDIR)/parallel_master_01.Po -rm -f openmp/$(DEPDIR)/parallel_master_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_nested_lock_01.Po -rm -f openmp/$(DEPDIR)/parallel_sections_01.Po -rm -f openmp/$(DEPDIR)/parallel_sections_combined_01.Po -rm -f openmp/$(DEPDIR)/parallel_simple_lock_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_copyprivate_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_copyprivate_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_nowait_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_wait_01.Po -rm -f openmp/$(DEPDIR)/task_01.Po -rm -f openmp/$(DEPDIR)/task_02.Po -rm -f openmp/$(DEPDIR)/task_03.Po -rm -f openmp/$(DEPDIR)/taskgroup_01.Po -rm -f openmp/$(DEPDIR)/taskgroup_02.Po -rm -f openmp/$(DEPDIR)/taskloop.Po -rm -f openmp/$(DEPDIR)/taskwait_01.Po -rm -f overlap/$(DEPDIR)/gpu_concurrency.Po -rm -f overlap/$(DEPDIR)/overlap.Po -rm -f parallel_tasks/$(DEPDIR)/combined_worker_assign_workerid.Po -rm -f parallel_tasks/$(DEPDIR)/cuda_only.Po -rm -f parallel_tasks/$(DEPDIR)/explicit_combined_worker.Po -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels.Po -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels_spmd.Po -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels_trivial.Po -rm -f parallel_tasks/$(DEPDIR)/spmd_peager.Po -rm -f perfmodels/$(DEPDIR)/feed.Po -rm -f perfmodels/$(DEPDIR)/memory.Po -rm -f perfmodels/$(DEPDIR)/non_linear_regression_based.Po -rm -f perfmodels/$(DEPDIR)/opencl_memset.Po -rm -f perfmodels/$(DEPDIR)/path.Po -rm -f perfmodels/$(DEPDIR)/regression_based_check.Po -rm -f perfmodels/$(DEPDIR)/regression_based_energy.Po -rm -f perfmodels/$(DEPDIR)/regression_based_gpu.Po -rm -f perfmodels/$(DEPDIR)/regression_based_memset.Po -rm -f perfmodels/$(DEPDIR)/regression_based_multiimpl.Po -rm -f perfmodels/$(DEPDIR)/user_base.Po -rm -f perfmodels/$(DEPDIR)/valid_model.Po -rm -f perfmodels/$(DEPDIR)/value_nan.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_hierarchy.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_list.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_policy_data.Po -rm -f sched_policies/$(DEPDIR)/data_locality.Po -rm -f sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po -rm -f sched_policies/$(DEPDIR)/prio.Po -rm -f sched_policies/$(DEPDIR)/simple_cpu_gpu_sched.Po -rm -f sched_policies/$(DEPDIR)/simple_deps.Po -rm -f sched_policies/$(DEPDIR)/workerids.Po -rm -f variable/$(DEPDIR)/increment.Po -rm -f variable/$(DEPDIR)/increment_opencl.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-examplebinPROGRAMS install-examplebinSCRIPTS \ install-nobase_STARPU_OPENCL_DATADATA install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f datawizard/$(DEPDIR)/acquire_cb.Po -rm -f datawizard/$(DEPDIR)/acquire_cb_insert.Po -rm -f datawizard/$(DEPDIR)/acquire_release.Po -rm -f datawizard/$(DEPDIR)/acquire_release2.Po -rm -f datawizard/$(DEPDIR)/acquire_release_to.Po -rm -f datawizard/$(DEPDIR)/acquire_try.Po -rm -f datawizard/$(DEPDIR)/allocate.Po -rm -f datawizard/$(DEPDIR)/allocate_many_numa_nodes.Po -rm -f datawizard/$(DEPDIR)/bcsr.Po -rm -f datawizard/$(DEPDIR)/cache.Po -rm -f datawizard/$(DEPDIR)/commute.Po -rm -f datawizard/$(DEPDIR)/commute2.Po -rm -f datawizard/$(DEPDIR)/copy.Po -rm -f datawizard/$(DEPDIR)/critical_section_with_void_interface.Po -rm -f datawizard/$(DEPDIR)/data_deinitialize.Po -rm -f datawizard/$(DEPDIR)/data_implicit_deps.Po -rm -f datawizard/$(DEPDIR)/data_invalidation.Po -rm -f datawizard/$(DEPDIR)/data_register-data_register.Po -rm -f datawizard/$(DEPDIR)/deinitialize_pending_requests.Po -rm -f datawizard/$(DEPDIR)/deps.Po -rm -f datawizard/$(DEPDIR)/dining_philosophers.Po -rm -f datawizard/$(DEPDIR)/double_parameter.Po -rm -f datawizard/$(DEPDIR)/dsm_stress.Po -rm -f datawizard/$(DEPDIR)/gpu_ptr_register.Po -rm -f datawizard/$(DEPDIR)/gpu_register.Po -rm -f datawizard/$(DEPDIR)/handle_to_pointer.Po -rm -f datawizard/$(DEPDIR)/in_place_partition.Po -rm -f datawizard/$(DEPDIR)/increment_init.Po -rm -f datawizard/$(DEPDIR)/increment_redux.Po -rm -f datawizard/$(DEPDIR)/increment_redux_lazy.Po -rm -f datawizard/$(DEPDIR)/increment_redux_partition.Po -rm -f datawizard/$(DEPDIR)/increment_redux_v2.Po -rm -f datawizard/$(DEPDIR)/increment_redux_with_args.Po -rm -f datawizard/$(DEPDIR)/invalidate_pending_requests.Po -rm -f datawizard/$(DEPDIR)/lazy_allocation.Po -rm -f datawizard/$(DEPDIR)/locality.Po -rm -f datawizard/$(DEPDIR)/manual_reduction.Po -rm -f datawizard/$(DEPDIR)/mpi_like.Po -rm -f datawizard/$(DEPDIR)/mpi_like_async.Po -rm -f datawizard/$(DEPDIR)/no_unregister.Po -rm -f datawizard/$(DEPDIR)/noreclaim.Po -rm -f datawizard/$(DEPDIR)/nowhere.Po -rm -f datawizard/$(DEPDIR)/numa_overflow.Po -rm -f datawizard/$(DEPDIR)/partition_dep.Po -rm -f datawizard/$(DEPDIR)/partition_init.Po -rm -f datawizard/$(DEPDIR)/partition_lazy.Po -rm -f datawizard/$(DEPDIR)/partition_wontuse.Po -rm -f datawizard/$(DEPDIR)/partitioned_acquire.Po -rm -f datawizard/$(DEPDIR)/partitioned_initialization.Po -rm -f datawizard/$(DEPDIR)/readers_and_writers.Po -rm -f datawizard/$(DEPDIR)/readonly.Po -rm -f datawizard/$(DEPDIR)/reclaim.Po -rm -f datawizard/$(DEPDIR)/redux_acquire.Po -rm -f datawizard/$(DEPDIR)/scal.Po -rm -f datawizard/$(DEPDIR)/scratch.Po -rm -f datawizard/$(DEPDIR)/scratch_opencl.Po -rm -f datawizard/$(DEPDIR)/scratch_reuse.Po -rm -f datawizard/$(DEPDIR)/simgrid-locality.Po -rm -f datawizard/$(DEPDIR)/specific_node.Po -rm -f datawizard/$(DEPDIR)/specific_node_same.Po -rm -f datawizard/$(DEPDIR)/sync_and_notify_data.Po -rm -f datawizard/$(DEPDIR)/sync_and_notify_data_implicit.Po -rm -f datawizard/$(DEPDIR)/sync_and_notify_data_opencl.Po -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem.Po -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking.Po -rm -f datawizard/$(DEPDIR)/sync_with_data_with_mem_non_blocking_implicit.Po -rm -f datawizard/$(DEPDIR)/task_with_multiple_time_the_same_handle.Po -rm -f datawizard/$(DEPDIR)/temporary_partition.Po -rm -f datawizard/$(DEPDIR)/temporary_partition_implicit.Po -rm -f datawizard/$(DEPDIR)/temporary_partition_read.Po -rm -f datawizard/$(DEPDIR)/test_arbiter.Po -rm -f datawizard/$(DEPDIR)/unpartition.Po -rm -f datawizard/$(DEPDIR)/user_interaction_implicit.Po -rm -f datawizard/$(DEPDIR)/variable_parameters.Po -rm -f datawizard/$(DEPDIR)/variable_size.Po -rm -f datawizard/$(DEPDIR)/write_only_tmp_buffer.Po -rm -f datawizard/$(DEPDIR)/wt_broadcast.Po -rm -f datawizard/$(DEPDIR)/wt_host.Po -rm -f datawizard/interfaces/$(DEPDIR)/bcsr_bcsr_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/block_block_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/coo_coo_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/copy_interfaces-copy_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/csr_csr_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/matrix_matrix_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/multiformat_multiformat_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/ndim_ndim_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/tensor_tensor_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/variable_variable_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/vector_vector_interface-test_interfaces.Po -rm -f datawizard/interfaces/$(DEPDIR)/void_void_interface-test_interfaces.Po -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_interface.Po -rm -f datawizard/interfaces/bcsr/$(DEPDIR)/bcsr_interface-bcsr_opencl.Po -rm -f datawizard/interfaces/block/$(DEPDIR)/block_interface-block_interface.Po -rm -f datawizard/interfaces/block/$(DEPDIR)/block_interface-block_opencl.Po -rm -f datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_interface.Po -rm -f datawizard/interfaces/coo/$(DEPDIR)/coo_interface-coo_opencl.Po -rm -f datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_interface.Po -rm -f datawizard/interfaces/csr/$(DEPDIR)/csr_interface-csr_opencl.Po -rm -f datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_interface.Po -rm -f datawizard/interfaces/matrix/$(DEPDIR)/matrix_interface-matrix_opencl.Po -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets.Po -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_conversion_codelets_opencl.Po -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_interface.Po -rm -f datawizard/interfaces/multiformat/$(DEPDIR)/multiformat_interface-multiformat_opencl.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/generic.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_cuda_opencl.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_data_release.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_handle_conversion.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/multiformat_worker.Po -rm -f datawizard/interfaces/multiformat/advanced/$(DEPDIR)/same_handle.Po -rm -f datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_interface.Po -rm -f datawizard/interfaces/ndim/$(DEPDIR)/ndim_interface-ndim_opencl.Po -rm -f datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_interface.Po -rm -f datawizard/interfaces/tensor/$(DEPDIR)/tensor_interface-tensor_opencl.Po -rm -f datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_interface.Po -rm -f datawizard/interfaces/variable/$(DEPDIR)/variable_interface-variable_opencl.Po -rm -f datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_interface.Po -rm -f datawizard/interfaces/vector/$(DEPDIR)/vector_interface-vector_opencl.Po -rm -f datawizard/interfaces/void/$(DEPDIR)/void_interface-void_interface.Po -rm -f disk/$(DEPDIR)/disk_compute.Po -rm -f disk/$(DEPDIR)/disk_copy.Po -rm -f disk/$(DEPDIR)/disk_copy_to_disk.Po -rm -f disk/$(DEPDIR)/disk_copy_unpack.Po -rm -f disk/$(DEPDIR)/disk_pack.Po -rm -f disk/$(DEPDIR)/mem_reclaim.Po -rm -f energy/$(DEPDIR)/energy_efficiency.Po -rm -f errorcheck/$(DEPDIR)/invalid_blocking_calls.Po -rm -f errorcheck/$(DEPDIR)/invalid_tasks.Po -rm -f errorcheck/$(DEPDIR)/starpu_init_noworker.Po -rm -f errorcheck/$(DEPDIR)/workers_cpuid.Po -rm -f fault-tolerance/$(DEPDIR)/retry.Po -rm -f helper/$(DEPDIR)/cublasLt_init.Po -rm -f helper/$(DEPDIR)/cublas_init.Po -rm -f helper/$(DEPDIR)/cusparse_init.Po -rm -f helper/$(DEPDIR)/execute_on_all.Po -rm -f helper/$(DEPDIR)/hipblas_init.Po -rm -f helper/$(DEPDIR)/pinned_memory.Po -rm -f helper/$(DEPDIR)/starpu_create_sync_task.Po -rm -f helper/$(DEPDIR)/starpu_data_cpy.Po -rm -f helper/$(DEPDIR)/starpu_data_dup_ro.Po -rm -f main/$(DEPDIR)/bind.Po -rm -f main/$(DEPDIR)/callback.Po -rm -f main/$(DEPDIR)/codelet_null_callback.Po -rm -f main/$(DEPDIR)/const_codelet.Po -rm -f main/$(DEPDIR)/deadlock.Po -rm -f main/$(DEPDIR)/declare_deps_after_submission.Po -rm -f main/$(DEPDIR)/declare_deps_after_submission_synchronous.Po -rm -f main/$(DEPDIR)/declare_deps_in_callback.Po -rm -f main/$(DEPDIR)/deploop.Po -rm -f main/$(DEPDIR)/deprecated_func-deprecated_func.Po -rm -f main/$(DEPDIR)/display_binding.Po -rm -f main/$(DEPDIR)/empty_task.Po -rm -f main/$(DEPDIR)/empty_task_chain.Po -rm -f main/$(DEPDIR)/empty_task_sync_point.Po -rm -f main/$(DEPDIR)/empty_task_sync_point_tasks.Po -rm -f main/$(DEPDIR)/execute_on_a_specific_worker.Po -rm -f main/$(DEPDIR)/execute_schedule.Po -rm -f main/$(DEPDIR)/get_children_tasks.Po -rm -f main/$(DEPDIR)/get_current_task.Po -rm -f main/$(DEPDIR)/hwloc_cpuset.Po -rm -f main/$(DEPDIR)/insert_task.Po -rm -f main/$(DEPDIR)/insert_task_array.Po -rm -f main/$(DEPDIR)/insert_task_dyn_handles.Po -rm -f main/$(DEPDIR)/insert_task_many.Po -rm -f main/$(DEPDIR)/insert_task_nullcodelet.Po -rm -f main/$(DEPDIR)/insert_task_pack.Po -rm -f main/$(DEPDIR)/insert_task_value.Po -rm -f main/$(DEPDIR)/insert_task_where.Po -rm -f main/$(DEPDIR)/job.Po -rm -f main/$(DEPDIR)/mkdtemp.Po -rm -f main/$(DEPDIR)/multithreaded.Po -rm -f main/$(DEPDIR)/multithreaded_init.Po -rm -f main/$(DEPDIR)/pack.Po -rm -f main/$(DEPDIR)/pause_resume.Po -rm -f main/$(DEPDIR)/regenerate.Po -rm -f main/$(DEPDIR)/regenerate_pipeline.Po -rm -f main/$(DEPDIR)/restart.Po -rm -f main/$(DEPDIR)/starpu_init.Po -rm -f main/$(DEPDIR)/starpu_task_bundle.Po -rm -f main/$(DEPDIR)/starpu_task_wait.Po -rm -f main/$(DEPDIR)/starpu_task_wait_for_all.Po -rm -f main/$(DEPDIR)/starpu_worker_exists-starpu_worker_exists.Po -rm -f main/$(DEPDIR)/static_restartable.Po -rm -f main/$(DEPDIR)/static_restartable_tag.Po -rm -f main/$(DEPDIR)/static_restartable_using_initializer.Po -rm -f main/$(DEPDIR)/subgraph_repeat.Po -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate.Po -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate_tag.Po -rm -f main/$(DEPDIR)/subgraph_repeat_regenerate_tag_cycle.Po -rm -f main/$(DEPDIR)/subgraph_repeat_tag.Po -rm -f main/$(DEPDIR)/submit.Po -rm -f main/$(DEPDIR)/tag_get_task.Po -rm -f main/$(DEPDIR)/tag_task_data_deps.Po -rm -f main/$(DEPDIR)/tag_wait_api.Po -rm -f main/$(DEPDIR)/task_end_dep.Po -rm -f main/$(DEPDIR)/task_wait_api.Po -rm -f main/$(DEPDIR)/wait_all_regenerable_tasks.Po -rm -f main/driver_api/$(DEPDIR)/init_run_deinit.Po -rm -f main/driver_api/$(DEPDIR)/run_driver.Po -rm -f maxfpga/$(DEPDIR)/max_fpga_advanced_static.Po -rm -f maxfpga/$(DEPDIR)/max_fpga_basic_static.Po -rm -f maxfpga/$(DEPDIR)/max_fpga_dynamic.Po -rm -f maxfpga/$(DEPDIR)/max_fpga_mux.Po -rm -f microbenchs/$(DEPDIR)/async_tasks_overhead.Po -rm -f microbenchs/$(DEPDIR)/bandwidth.Po -rm -f microbenchs/$(DEPDIR)/display_structures_size.Po -rm -f microbenchs/$(DEPDIR)/local_pingpong.Po -rm -f microbenchs/$(DEPDIR)/matrix_as_vector.Po -rm -f microbenchs/$(DEPDIR)/parallel_dependent_homogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks.Po -rm -f microbenchs/$(DEPDIR)/parallel_independent_heterogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks.Po -rm -f microbenchs/$(DEPDIR)/parallel_independent_homogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/parallel_redux_heterogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/parallel_redux_homogeneous_tasks_data.Po -rm -f microbenchs/$(DEPDIR)/prefetch_data_on_node.Po -rm -f microbenchs/$(DEPDIR)/redundant_buffer.Po -rm -f microbenchs/$(DEPDIR)/sync_tasks_overhead.Po -rm -f microbenchs/$(DEPDIR)/tasks_overhead.Po -rm -f microbenchs/$(DEPDIR)/tasks_size_overhead.Po -rm -f openmp/$(DEPDIR)/api_01.Po -rm -f openmp/$(DEPDIR)/array_slice_01.Po -rm -f openmp/$(DEPDIR)/cuda_task_01.Po -rm -f openmp/$(DEPDIR)/environment.Po -rm -f openmp/$(DEPDIR)/init_exit_01.Po -rm -f openmp/$(DEPDIR)/init_exit_02.Po -rm -f openmp/$(DEPDIR)/parallel_01.Po -rm -f openmp/$(DEPDIR)/parallel_02.Po -rm -f openmp/$(DEPDIR)/parallel_03.Po -rm -f openmp/$(DEPDIR)/parallel_barrier_01.Po -rm -f openmp/$(DEPDIR)/parallel_critical_01.Po -rm -f openmp/$(DEPDIR)/parallel_critical_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_critical_named_01.Po -rm -f openmp/$(DEPDIR)/parallel_critical_named_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_for_01.Po -rm -f openmp/$(DEPDIR)/parallel_for_02.Po -rm -f openmp/$(DEPDIR)/parallel_for_ordered_01.Po -rm -f openmp/$(DEPDIR)/parallel_master_01.Po -rm -f openmp/$(DEPDIR)/parallel_master_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_nested_lock_01.Po -rm -f openmp/$(DEPDIR)/parallel_sections_01.Po -rm -f openmp/$(DEPDIR)/parallel_sections_combined_01.Po -rm -f openmp/$(DEPDIR)/parallel_simple_lock_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_copyprivate_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_copyprivate_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_inline_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_nowait_01.Po -rm -f openmp/$(DEPDIR)/parallel_single_wait_01.Po -rm -f openmp/$(DEPDIR)/task_01.Po -rm -f openmp/$(DEPDIR)/task_02.Po -rm -f openmp/$(DEPDIR)/task_03.Po -rm -f openmp/$(DEPDIR)/taskgroup_01.Po -rm -f openmp/$(DEPDIR)/taskgroup_02.Po -rm -f openmp/$(DEPDIR)/taskloop.Po -rm -f openmp/$(DEPDIR)/taskwait_01.Po -rm -f overlap/$(DEPDIR)/gpu_concurrency.Po -rm -f overlap/$(DEPDIR)/overlap.Po -rm -f parallel_tasks/$(DEPDIR)/combined_worker_assign_workerid.Po -rm -f parallel_tasks/$(DEPDIR)/cuda_only.Po -rm -f parallel_tasks/$(DEPDIR)/explicit_combined_worker.Po -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels.Po -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels_spmd.Po -rm -f parallel_tasks/$(DEPDIR)/parallel_kernels_trivial.Po -rm -f parallel_tasks/$(DEPDIR)/spmd_peager.Po -rm -f perfmodels/$(DEPDIR)/feed.Po -rm -f perfmodels/$(DEPDIR)/memory.Po -rm -f perfmodels/$(DEPDIR)/non_linear_regression_based.Po -rm -f perfmodels/$(DEPDIR)/opencl_memset.Po -rm -f perfmodels/$(DEPDIR)/path.Po -rm -f perfmodels/$(DEPDIR)/regression_based_check.Po -rm -f perfmodels/$(DEPDIR)/regression_based_energy.Po -rm -f perfmodels/$(DEPDIR)/regression_based_gpu.Po -rm -f perfmodels/$(DEPDIR)/regression_based_memset.Po -rm -f perfmodels/$(DEPDIR)/regression_based_multiimpl.Po -rm -f perfmodels/$(DEPDIR)/user_base.Po -rm -f perfmodels/$(DEPDIR)/valid_model.Po -rm -f perfmodels/$(DEPDIR)/value_nan.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_hierarchy.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_list.Po -rm -f sched_ctx/$(DEPDIR)/sched_ctx_policy_data.Po -rm -f sched_policies/$(DEPDIR)/data_locality.Po -rm -f sched_policies/$(DEPDIR)/execute_all_tasks-execute_all_tasks.Po -rm -f sched_policies/$(DEPDIR)/prio.Po -rm -f sched_policies/$(DEPDIR)/simple_cpu_gpu_sched.Po -rm -f sched_policies/$(DEPDIR)/simple_deps.Po -rm -f sched_policies/$(DEPDIR)/workerids.Po -rm -f variable/$(DEPDIR)/increment.Po -rm -f variable/$(DEPDIR)/increment_opencl.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-examplebinPROGRAMS uninstall-examplebinSCRIPTS \ uninstall-nobase_STARPU_OPENCL_DATADATA .MAKE: $(am__recursive_targets) all check check-am install install-am \ install-exec install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-TESTS check-am clean \ clean-checkPROGRAMS clean-examplebinPROGRAMS clean-generic \ clean-libtool clean-local clean-noinstPROGRAMS cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am \ install-examplebinPROGRAMS install-examplebinSCRIPTS \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man \ install-nobase_STARPU_OPENCL_DATADATA install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am recheck tags tags-am uninstall \ uninstall-am uninstall-examplebinPROGRAMS \ uninstall-examplebinSCRIPTS \ uninstall-nobase_STARPU_OPENCL_DATADATA .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS clean-local: -rm -rf overlap/overlap.traces datawizard/locality.traces maxfpga/max_fpga_basic_static.o: maxfpga/MyTasks.max maxfpga/max_fpga_advanced_static.o: maxfpga/MyTasks.max maxfpga/max_fpga_dynamic.o: maxfpga/MyTasks.max maxfpga/max_fpga_mux.o: maxfpga/MyTasksMux.max # Fortran90 tests # - link over source file to build our own object fortran90/starpu_mod.f90: @$(MKDIR_P) $(dir $@) $(V_ln) $(LN_S) $(abs_top_srcdir)/include/$(notdir $@) $@ # - express the creation of .mod along .o @STARPU_HAVE_FC_TRUE@starpu_mod.mod: fortran90/starpu_mod.o # - list explicit dependences to control proper module files dependencies @STARPU_HAVE_FC_TRUE@fortran90/init_01.o: starpu_mod.mod # Maxeler compiler # or DFE for hardware execution @STARPU_USE_MAX_FPGA_TRUE@MAX_TARGET ?= DFE_SIM @STARPU_USE_MAX_FPGA_TRUE@$(CLASS) &: $(MAXJ) @STARPU_USE_MAX_FPGA_TRUE@ maxjc -1.7 -cp $$MAXCLASSPATH $(dir $<) @STARPU_USE_MAX_FPGA_TRUE@%.max: %Manager.class @STARPU_USE_MAX_FPGA_TRUE@ CLASSPATH=$$CLASSPATH:. maxJavaRun $(shell dirname $*).$(notdir $*)Manager DFEModel=MAIA maxFileName=$(notdir $*) target=$(MAX_TARGET) @STARPU_USE_MAX_FPGA_TRUE@ cp $(notdir $*)_$(MAX_DFE)/results/$(notdir $*).{max,h} $(dir $@) @STARPU_USE_MAX_FPGA_TRUE@slic_%.o: %.max @STARPU_USE_MAX_FPGA_TRUE@ sliccompile $< $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/tests/coverage/000077500000000000000000000000001507764646700167455ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/coverage/coverage.sh000077500000000000000000000114651507764646700211060ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # exampledir=../examples/ check_success() { if [ $1 -eq 77 ] ; then echo "skip" exit $1 elif [ $1 != 0 ] ; then echo "failure" exit $1 fi } apps() { if [ -f $exampledir/basic_examples/block ] ; then echo "block opencl" STARPU_NCUDA=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/basic_examples/block check_success $? fi if [ -f $exampledir/basic_examples/variable ] ; then echo "variable opencl" STARPU_NCUDA=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/basic_examples/variable 100 check_success $? echo "variable no worker" STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/basic_examples/variable check_success $? fi if [ -f $exampledir/incrementer/incrementer ] ; then echo "incrementer opencl" STARPU_NCUDA=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/incrementer/incrementer 10 check_success $? echo "incrementer no worker" STARPU_NCUDA=0 STARPU_NOPENCL=0 STARPU_NCPUS=0 $MS_LAUNCHER $STARPU_LAUNCH $exampledir/incrementer/incrementer check_success $? fi if [ -f $exampledir/tag_example/tag_example ] ; then echo "tag_example" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/tag_example/tag_example -iter 64 -i 128 -j 24 check_success $? fi if [ -f $exampledir/tag_example/tag_example2 ] ; then echo "tag_example2" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/tag_example/tag_example2 -iter 64 -i 128 check_success $? fi if [ -f $exampledir/cholesky/dw_cholesky ] ; then echo "chol.dm" STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin check_success $? echo "chol.dmda" STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin check_success $? echo "chol.cpu" STARPU_CALIBRATE=1 STARPU_NCUDA=0 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin check_success $? echo "chol.gpu" STARPU_CALIBRATE=1 STARPU_NCPUS=0 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/cholesky/dw_cholesky -pin check_success $? fi if [ -f $exampledir/heat/heat ] ; then echo "heat.dm.4k.calibrate.v2" STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 66 -nblocks 4 -v2 -pin check_success $? echo "heat.dm.8k.calibrate.v2" STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2 -pin check_success $? echo "heat.dm.8k.no.pin.v2" STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -v2 check_success $? # echo "heat.dm.8k.v2.no.prio" # STARPU_SCHED="no-prio" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 # check_success $? echo "heat.dm.8k.v2.random" STARPU_SCHED="random" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 check_success $? echo "heat.dm.8k.v2" STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 check_success $? echo "heat.greedy.8k.v2" STARPU_SCHED="greedy" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 check_success $? echo "heat.8k.cg" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg check_success $? echo "heat.dm.8k.cg" STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/heat/heat -ntheta 66 -nthick 130 -nblocks 8 -pin -v2 -cg check_success $? fi if [ -f $exampledir/mult/dw_mult_no_stride ] ; then echo "mult.dm.common" STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 4 -x 4096 -y 4096 -z 1024 -pin -common-model check_success $? echo "mult.dm" STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin check_success $? echo "mult.dmda" STARPU_CALIBRATE=1 STARPU_SCHED="dmda" $MS_LAUNCHER $STARPU_LAUNCH $exampledir/mult/dw_mult_no_stride -nblocks 8 -x 4096 -y 4096 -z 4096 -pin check_success $? fi } apps; starpu-1.4.9+dfsg/tests/datawizard/000077500000000000000000000000001507764646700173045ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/acquire_cb.c000066400000000000000000000072561507764646700215570ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test that when using starpu_data_acquire_cb, the callback_w is properly called */ unsigned token = 0; starpu_data_handle_t token_handle; static void callback_w(void *arg) { (void)arg; token = 42; starpu_data_release(token_handle); } static void callback_r(void *arg) { (void)arg; starpu_data_release(token_handle); } int main(int argc, char **argv) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&token_handle, -1, 0, sizeof(unsigned)); starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_unregister(token_handle); STARPU_ASSERT(token == 42); token = 0; starpu_variable_data_register(&token_handle, -1, 0, sizeof(unsigned)); starpu_data_acquire(token_handle, STARPU_W); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_release(token_handle); starpu_data_unregister(token_handle); token = 0; starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); /* These are getting executed immediately */ starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL); starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire(token_handle, STARPU_W); /* These will wait for our release */ starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); starpu_data_acquire_cb(token_handle, STARPU_W, callback_w, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL); starpu_data_acquire_cb(token_handle, STARPU_RW, callback_w, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_acquire_cb(token_handle, STARPU_R, callback_r, NULL); starpu_data_release(token_handle); starpu_data_unregister(token_handle); FPRINTF(stderr, "Token: %u\n", token); starpu_shutdown(); return (token == 42) ? EXIT_SUCCESS : EXIT_FAILURE; } starpu-1.4.9+dfsg/tests/datawizard/acquire_cb_insert.c000066400000000000000000000104741507764646700231370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test that inserting a task from the callback of a starpu_data_acquire_cb * call, with proper dependency with an already-submitted task */ #define N 16 #define M 4 #define X 2 void which_index_cpu(void *descr[], void *_args) { (void)_args; int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); /* A real case would actually compute something */ *x0 = X; } struct starpu_codelet which_index = { .cpu_funcs = {which_index_cpu}, .cpu_funcs_name = {"which_index_cpu"}, .nbuffers = 1, .modes = {STARPU_W} }; void work_cpu(void *descr[], void *_args) { int i, n = STARPU_VECTOR_GET_NX(descr[0]); float *x0 = (float *)STARPU_VECTOR_GET_PTR(descr[0]); (void)_args; for (i = 0; i < n; i++) x0[i] = i + 1; } struct starpu_codelet work = { .cpu_funcs = {work_cpu}, .cpu_funcs_name = {"work_cpu"}, .nbuffers = 1, .modes = {STARPU_W} }; static int x; static starpu_data_handle_t x_handle, f_handle; static void callback(void *arg) { (void)arg; int ret; ret = starpu_task_insert(&work, STARPU_W, starpu_data_get_sub_data(f_handle, 1, x), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_release(x_handle); } int main(int argc, char **argv) { int i, ret; float *f; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if(starpu_cpu_worker_get_count() == 0) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } /* Declare x */ starpu_variable_data_register(&x_handle, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); /* Allocate and Declare f */ ret = starpu_malloc((void**)&f, N * sizeof(*f)); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); memset(f, 0, N * sizeof(*f)); starpu_vector_data_register(&f_handle, STARPU_MAIN_RAM, (uintptr_t)f, N, sizeof(*f)); /* Partition f */ struct starpu_data_filter filter = { .filter_func = starpu_vector_filter_block, .nchildren = M, }; starpu_data_partition(f_handle, &filter); /* Compute which portion we will work on */ ret = starpu_task_insert(&which_index, STARPU_W, x_handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* And submit the corresponding task */ #ifdef __GCC__ STARPU_DATA_ACQUIRE_CB( x_handle, STARPU_R, starpu_task_insert(&work, STARPU_W, starpu_data_get_sub_data(f_handle, 1, x), 0) ); #else starpu_data_acquire_cb(x_handle, STARPU_R, callback, NULL); #endif /* Wait for acquisition (and thus insertion) */ starpu_data_acquire(x_handle, STARPU_W); starpu_data_release(x_handle); /* Now wait for the inserted task */ ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* Can now clean */ starpu_data_unpartition(f_handle, STARPU_MAIN_RAM); starpu_data_unregister(f_handle); starpu_data_unregister(x_handle); FPRINTF(stderr, "VALUES: %d", x); for(i=0 ; i #include "../helper.h" #include "../variable/increment.h" /* * Call acquire/release in competition with inserting task working on the same data */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 10; #else static unsigned ntasks = 10000; #endif unsigned token = 0; starpu_data_handle_t token_handle; static int increment_token(void) { int ret; struct starpu_task *task = starpu_task_create(); task->synchronous = 1; task->cl = &increment_cl; task->handles[0] = token_handle; ret = starpu_task_submit(task); return ret; } static void callback(void *arg) { (void)arg; token++; starpu_data_release(token_handle); } int main(int argc, char **argv) { unsigned i; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); FPRINTF(stderr, "Token: %u\n", token); for(i=0; i #include "../helper.h" #include "../variable/increment.h" /* * Call acquire/release in competition with inserting task working on the same data */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 40; #elif !defined(STARPU_LONG_CHECK) static unsigned ntasks = 4000; #else static unsigned ntasks = 40000; #endif unsigned token = 0; starpu_data_handle_t token_handle; static int increment_token(int synchronous) { struct starpu_task *task = starpu_task_create(); task->synchronous = synchronous; task->cl = &increment_cl; task->handles[0] = token_handle; return starpu_task_submit(task); } static void callback(void *arg) { (void)arg; starpu_data_release(token_handle); } #ifdef STARPU_DEVEL # warning TODO add threads #endif int main(int argc, char **argv) { unsigned i; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); FPRINTF(stderr, "Token: %u\n", token); for(i=0; i #include "../helper.h" #include "../variable/increment.h" /* * Check that _release_to correctly interacts with tasks working on the same data */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 10; #else static unsigned ntasks = 1000; #endif void check_cpu(void *descr[], void *arg) { unsigned *val = arg; unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); STARPU_ASSERT(*tokenptr == *val); } static struct starpu_codelet check_cl = { .modes = { STARPU_R }, .cpu_funcs = {check_cpu}, .cpu_funcs_name = {"check_cpu"}, .nbuffers = 1 }; unsigned token = 0; starpu_data_handle_t token_handle; static int increment_token(void) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = token_handle; ret = starpu_task_submit(task); return ret; } static int check_token(unsigned value) { unsigned *value_p; int ret; struct starpu_task *task = starpu_task_create(); task->cl = &check_cl; task->handles[0] = token_handle; task->cl_arg = value_p = malloc(sizeof(*value_p)); task->cl_arg_size = sizeof(*value_p); task->cl_arg_free = 1; *value_p = value; ret = starpu_task_submit(task); return ret; } static void callback(void *arg) { (void)arg; token++; starpu_data_release_to(token_handle, STARPU_W); starpu_sleep(0.001); starpu_data_release_to(token_handle, STARPU_R); starpu_sleep(0.001); starpu_data_release(token_handle); } int main(int argc, char **argv) { unsigned i; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); FPRINTF(stderr, "Token: %u\n", token); for(i=0; i #include "../helper.h" /* * Try to use data_acquire_try in parallel with tasks */ void func(void *descr[], void *arg) { (void)descr; (void)arg; starpu_sleep(0.01); } static struct starpu_codelet cl = { .modes = { STARPU_RW }, .cpu_funcs = {func}, .cuda_funcs = {func}, .opencl_funcs = {func}, .cpu_funcs_name = {"func"}, .nbuffers = 1 }; unsigned token = 0; starpu_data_handle_t token_handle; static void callback(void *arg) { (void)arg; starpu_data_release(token_handle); } int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, sizeof(unsigned)); ret = starpu_task_insert(&cl, STARPU_RW, token_handle, 0); if (ret == -ENODEV) goto enodev; ret = starpu_data_acquire_try(token_handle, STARPU_R); STARPU_ASSERT(ret != 0); starpu_do_schedule(); while ((ret = starpu_data_acquire_try(token_handle, STARPU_R)) != 0) { starpu_sleep(0.001); } ret = starpu_task_insert(&cl, STARPU_RW, token_handle, 0); if (ret == -ENODEV) goto enodev; starpu_data_release(token_handle); starpu_task_wait_for_all(); ret = starpu_data_acquire_try(token_handle, STARPU_R); STARPU_ASSERT(ret == 0); starpu_data_release(token_handle); starpu_data_unregister(token_handle); starpu_shutdown(); return 0; enodev: starpu_data_unregister(token_handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/allocate.c000066400000000000000000000200161507764646700212330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include #include /* Stress data allocation on a GPU, triggering eviction */ #define SIZE_LIMIT 128 #define STR_LIMIT "128" #define SIZE_ALLOC 128 #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else static int test_prefetch(unsigned memnodes) { float *buffers[4]; starpu_data_handle_t handles[4]; unsigned i, j; starpu_ssize_t available_size; if (starpu_getenv_number_default("STARPU_DIDUSE_BARRIER", 0)) /* This would hang */ return STARPU_TEST_SKIPPED; buffers[0] = malloc(SIZE_ALLOC*1024*512); STARPU_ASSERT(buffers[0]); /* Prefetch half the memory */ starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)buffers[0], SIZE_ALLOC*1024*512); for(i=1 ; i #include #include #include "../helper.h" /* Allocate a big buffer not fitting in a single NUMA node, to see what * happens, especially if NUMA nodes are correctly reported in traces. */ #if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_HAVE_SETENV) || !defined(STARPU_USE_CPU) #warning unsetenv or setenv are not defined. Or CPU are not enabled. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #elif !defined(STARPU_HAVE_HWLOC) // We need hwloc to know the size of NUMA nodes #warning hwloc is not used. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else #if HWLOC_API_VERSION < 0x00010b00 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE #endif static void nop(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet cl = { .cpu_funcs = { nop }, .nbuffers = 1, .modes = { STARPU_RW }, }; int main(int argc, char **argv) { int ret; starpu_data_handle_t handle; int worker; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); unsetenv("STARPU_NCUDA"); conf.ncpus = -1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, &worker, 1) == 0) { fprintf(stderr, "Could not find enough workers\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } size_t numa_node_mem; hwloc_topology_t topo = starpu_get_hwloc_topology(); /* This test should work also when NUMA support isn't enabled in * StarPU, so we can't rely on starpu_memory_nodes_get_numa_count(). */ if (hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_NUMANODE) < 2) { /* Requires at least 2 NUMA nodes, to avoid overflowing memory * if there is only one NUMA node. */ fprintf(stderr, "Could not find enough NUMA nodes\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } hwloc_obj_t numa_node = hwloc_get_obj_by_type(topo, HWLOC_OBJ_NUMANODE, 0); if (!numa_node) { fprintf(stderr, "Can't find NUMA node 0\n"); starpu_shutdown(); return EXIT_FAILURE; } #if HWLOC_API_VERSION >= 0x00020000 numa_node_mem = numa_node->attr->numanode.local_memory; #else numa_node_mem = numa_node->memory.local_memory; #endif size_t buffer_size = numa_node_mem * 1.5; printf("NUMA node 0 has %lu MB, the buffer will use %lu MB\n", numa_node_mem / 1024 / 1024, buffer_size / 1024 / 1024); uintptr_t buffer = starpu_malloc_on_node(STARPU_MAIN_RAM, buffer_size); if (!buffer) { fprintf(stderr, "Refuses to allocate that much, too bad\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } memset((void*) buffer, 0, buffer_size); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, buffer, buffer_size); ret = starpu_task_insert(&cl, STARPU_RW, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); enodev: starpu_data_unregister(handle); starpu_free_on_node(STARPU_MAIN_RAM, buffer, buffer_size); starpu_shutdown(); return EXIT_SUCCESS; } #endif starpu-1.4.9+dfsg/tests/datawizard/bcsr.c000066400000000000000000000110771507764646700204070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" static starpu_data_handle_t bcsr_handle; void cpu_show_bcsr(void *descr[], void *arg) { (void)arg; struct starpu_bcsr_interface *iface = descr[0]; uint32_t nnz = STARPU_BCSR_GET_NNZ(iface); uint32_t nrow = STARPU_BCSR_GET_NROW(iface); int *nzval = (int *)STARPU_BCSR_GET_NZVAL(iface); uint32_t *colind = STARPU_BCSR_GET_COLIND(iface); uint32_t *rowptr = STARPU_BCSR_GET_ROWPTR(iface); STARPU_ASSERT(colind == STARPU_BCSR_GET_RAM_COLIND(iface)); STARPU_ASSERT(rowptr == STARPU_BCSR_GET_RAM_ROWPTR(iface)); uint32_t firstentry = STARPU_BCSR_GET_FIRSTENTRY(iface); uint32_t r = STARPU_BCSR_GET_R(iface); uint32_t c = STARPU_BCSR_GET_C(iface); uint32_t elemsize = STARPU_BCSR_GET_ELEMSIZE(iface); uint32_t i, j, y, x; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; STARPU_PTHREAD_MUTEX_LOCK(&mutex); printf("\nnnz %u elemsize %u\n", nnz, elemsize); for (i = 0; i < nrow; i++) { uint32_t row_start = rowptr[i] - firstentry; uint32_t row_end = rowptr[i+1] - firstentry; printf("row %u\n", i); for (j = row_start; j < row_end; j++) { int *block = nzval + j * r*c; printf(" column %u\n", colind[j]); for (y = 0; y < r; y++) { for (x = 0; x < c; x++) printf(" %d", block[y*c+x]); printf("\n"); } } } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } struct starpu_codelet show_cl = { .cpu_funcs = { cpu_show_bcsr }, .nbuffers = 1, .modes = { STARPU_R }, }; /* * In this test, we use the following matrix: * * +----------------+ * | 0 1 0 0 | * | 2 3 0 0 | * | 4 5 8 9 | * | 6 7 10 11 | * | 0 0 0 0 | * | 0 0 0 0 | * +----------------+ * * nzval = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11] * colind = [0, 0, 1] (column index of each non-zero block) * rowptr = [0, 1, 3] (index of first non-zero block for each row) * r = c = 2 */ /* Size of the blocks */ #define R 2 #define C 2 #define NNZ_BLOCKS 3 /* out of 6 */ #define NZVAL_SIZE (R*C*NNZ_BLOCKS) #define NROWS 3 static int nzval[NZVAL_SIZE] = { 0, 1, 2, 3, /* First block */ 4, 5, 6, 7, /* Second block */ 8, 9, 10, 11 /* Third block */ }; static uint32_t colind[NNZ_BLOCKS] = { 0, 0, 1 }; static uint32_t rowptr[NROWS+1] = { 0, 1, NNZ_BLOCKS, NNZ_BLOCKS }; int main(int argc, char **argv) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; if (starpu_initialize(&conf, &argc, &argv) == -ENODEV) return STARPU_TEST_SKIPPED; if (starpu_cpu_worker_get_count() == 0 || starpu_memory_nodes_get_count() > 1) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu_bcsr_data_register(&bcsr_handle, STARPU_MAIN_RAM, NNZ_BLOCKS, NROWS, (uintptr_t) nzval, colind, rowptr, 0, /* firstentry */ R, C, sizeof(nzval[0])); ret = starpu_task_insert(&show_cl, STARPU_R, bcsr_handle, 0); if (ret == -ENODEV) { starpu_data_unregister(bcsr_handle); starpu_shutdown(); return STARPU_TEST_SKIPPED; } struct starpu_data_filter filter = { .filter_func = starpu_bcsr_filter_vertical_block, .nchildren = 3, }; starpu_data_partition(bcsr_handle, &filter); ret = starpu_task_insert(&show_cl, STARPU_R, starpu_data_get_sub_data(bcsr_handle, 1, 0), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&show_cl, STARPU_R, starpu_data_get_sub_data(bcsr_handle, 1, 1), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&show_cl, STARPU_R, starpu_data_get_sub_data(bcsr_handle, 1, 2), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_unpartition(bcsr_handle, STARPU_MAIN_RAM); starpu_data_unregister(bcsr_handle); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/tests/datawizard/cache.c000066400000000000000000000043771507764646700205260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Trigger re-using a buffer allocation on GPUs */ #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) static void codelet(void *descr[], void *arg) { (void)descr; (void)arg; FPRINTF(stderr, "%lx\n", (unsigned long) STARPU_VARIABLE_GET_PTR(descr[0])); FPRINTF(stderr, "codelet\n"); } #endif #ifdef STARPU_USE_CUDA static struct starpu_codelet cuda_cl = { .cuda_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_R} }; #endif #ifdef STARPU_USE_OPENCL static struct starpu_codelet opencl_cl = { .opencl_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_R} }; #endif void dotest(struct starpu_codelet *cl) { int ret; int var = 42; starpu_data_handle_t handle; starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); ret = starpu_task_insert(cl, STARPU_R, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); ret = starpu_task_insert(cl, STARPU_R, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); enodev: starpu_data_unregister(handle); } int main() { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_CUDA dotest(&cuda_cl); #endif #ifdef STARPU_USE_OPENCL dotest(&opencl_cl); #endif starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/tests/datawizard/commute.c000066400000000000000000000111431507764646700211210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Trigger various STARPU_R / STARPU_RW / STARPU_RW|COMMUTE patterns */ void begin(void *descr[], void *arg) { (void)arg; int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); *x = 0; } static struct starpu_codelet codelet_begin = { .cpu_funcs = {begin}, .cpu_funcs_name = {"begin"}, .nbuffers = 1, .name = "begin", }; void commute1(void *descr[], void *arg) { (void)arg; int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); *x = 1; } static struct starpu_codelet codelet_commute1 = { .cpu_funcs = {commute1}, .cpu_funcs_name = {"commute1"}, .nbuffers = 1, .modes = {STARPU_RW | STARPU_COMMUTE}, .name = "commute1", }; void commute2(void *descr[], void *arg) { (void)arg; int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); *x = 2; } static struct starpu_codelet codelet_commute2 = { .cpu_funcs = {commute2}, .cpu_funcs_name = {"commute2"}, .nbuffers = 1, .modes = {STARPU_W | STARPU_COMMUTE}, .name = "commute2", }; void commute3(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet codelet_commute3 = { .cpu_funcs = {commute3}, .cpu_funcs_name = {"commute3"}, .nbuffers = 1, .modes = {STARPU_RW | STARPU_COMMUTE}, .name = "commute3", }; static struct starpu_codelet codelet_end; void end(void *descr[], void *_args) { int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); enum starpu_data_access_mode end_mode = *(enum starpu_data_access_mode*) _args; if (end_mode & STARPU_W) (*x)++; } static struct starpu_codelet codelet_end = { .cpu_funcs = {end}, .cpu_funcs_name = {"end"}, .nbuffers = 1, .name = "end", }; static int x; static starpu_data_handle_t x_handle, f_handle; static void test(enum starpu_data_access_mode begin_mode, enum starpu_data_access_mode end_mode, int order) { struct starpu_task *begin_t, *commute1_t, *commute2_t, *end_t; int ret; codelet_begin.modes[0] = begin_mode; codelet_end.modes[0] = end_mode; begin_t = starpu_task_create(); begin_t->cl = &codelet_begin; begin_t->handles[0] = x_handle; begin_t->use_tag = 1; begin_t->tag_id = (order<<20) + (begin_mode<<10) + end_mode; commute1_t = starpu_task_create(); commute1_t->cl = &codelet_commute1; commute1_t->handles[0] = x_handle; commute2_t = starpu_task_create(); commute2_t->cl = &codelet_commute2; commute2_t->handles[0] = x_handle; if (order) starpu_task_declare_deps_array(commute2_t, 1, &commute1_t); else starpu_task_declare_deps_array(commute1_t, 1, &commute2_t); end_t = starpu_task_create(); end_t->cl = &codelet_end; end_t->handles[0] = x_handle; end_t->detach = 0; end_t->cl_arg = &end_mode; end_t->cl_arg_size = sizeof(end_mode); if (starpu_task_submit(begin_t) == -ENODEV) exit(STARPU_TEST_SKIPPED); if (starpu_task_submit(commute1_t) == -ENODEV) exit(STARPU_TEST_SKIPPED); if (starpu_task_submit(commute2_t) == -ENODEV) exit(STARPU_TEST_SKIPPED); starpu_task_insert(&codelet_commute3, STARPU_RW|STARPU_COMMUTE, x_handle, 0); if (starpu_task_submit(end_t) == -ENODEV) exit(STARPU_TEST_SKIPPED); ret = starpu_task_wait(end_t); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_data_acquire(x_handle, STARPU_R); if (x != 1 + order + !!(end_mode & STARPU_W)) exit(EXIT_FAILURE); starpu_data_release(x_handle); } int main(int argc, char **argv) { int i, ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Declare x */ starpu_variable_data_register(&x_handle, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); for (i = 0; i <= 1; i++) { test(STARPU_R, STARPU_R, i); test(STARPU_W, STARPU_R, i); test(STARPU_W, STARPU_RW, i); test(STARPU_R, STARPU_RW, i); } starpu_data_unregister(x_handle); starpu_shutdown(); STARPU_RETURN(0); } starpu-1.4.9+dfsg/tests/datawizard/commute2.c000066400000000000000000000060411507764646700212040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* * Test that STARPU_RW vs STARPU_RW|STARPU_COMMUTE get proper dependency */ static unsigned cnt; static void cpu_memcpy(void *descr[], void *cl_arg) { int me = (uintptr_t)cl_arg; int res; (void)descr; FPRINTF(stderr,"%d\n", me); if (me == 0) { /* let commute tasks potentially happen */ starpu_usleep(100000); res = STARPU_ATOMIC_ADD(&cnt,1); STARPU_ASSERT(res == 1); } else { res = STARPU_ATOMIC_ADD(&cnt,1); STARPU_ASSERT(res != 1); } } static struct starpu_codelet my_cl = { .where = STARPU_CPU, .cpu_funcs = {cpu_memcpy}, .nbuffers = STARPU_VARIABLE_NBUFFERS }; int main(void) { double *res, *a; unsigned n=100000, i; starpu_data_handle_t res_handle, a_handle; unsigned nb_tasks = 10; int ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void**)&res, n*sizeof(double)); starpu_malloc((void**)&a, n*sizeof(double)); for(i=0; i < n; i++) res[i] = a[i] = 1.0; starpu_vector_data_register(&res_handle, 0, (uintptr_t)res, (uint32_t)n, sizeof(double)); starpu_vector_data_register(&a_handle, 0, (uintptr_t)a, (uint32_t)n, sizeof(double)); starpu_data_acquire(a_handle, STARPU_RW); for (i = 0; i < nb_tasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl=&my_cl; task->nbuffers = i == 0 ? 2 : 1; task->handles[0] = res_handle; if (i == 0) task->modes[0] = STARPU_RW; else task->modes[0] = STARPU_RW | STARPU_COMMUTE; task->handles[1] = a_handle; task->modes[1] = STARPU_R; task->cl_arg = (void*)(uintptr_t)i; ret = starpu_task_submit(task); if (ret == -ENODEV) { starpu_data_release(a_handle); goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* let commute tasks potentially happen */ starpu_usleep(100000); starpu_data_release(a_handle); starpu_task_wait_for_all (); enodev: starpu_data_unregister(res_handle); starpu_data_unregister(a_handle); starpu_free_noflag(res, n*sizeof(double)); starpu_free_noflag(a, n*sizeof(double)); starpu_shutdown(); return ret == -ENODEV ? STARPU_TEST_SKIPPED : EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/datawizard/copy.c000066400000000000000000000062471507764646700204330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Trigger a lot of transfers of a single variable between CPUs and GPUs */ #ifdef STARPU_QUICK_CHECK static unsigned nloops = 10; #else static unsigned nloops = 1000; #endif void dummy_func(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet cpu_codelet = { .cpu_funcs = {dummy_func}, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 1, .modes = {STARPU_RW} }; static struct starpu_codelet gpu_codelet = { .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .model = NULL, .nbuffers = 1, .modes = {STARPU_RW} }; int main(int argc, char **argv) { float foo; starpu_data_handle_t float_array_handle; unsigned i; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) == 0 && starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER) == 0 && starpu_worker_get_count_by_type(STARPU_MPI_MS_WORKER) == 0) { FPRINTF(stderr, "This application requires a CUDA , OpenCL Worker\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } foo = 0.0f; starpu_variable_data_register(&float_array_handle, STARPU_MAIN_RAM, (uintptr_t)&foo, sizeof(foo)); for (i = 0; i < nloops; i++) { struct starpu_task *task_cpu, *task_gpu; task_cpu = starpu_task_create(); task_gpu = starpu_task_create(); task_cpu->cl = &cpu_codelet; task_cpu->callback_func = NULL; task_cpu->handles[0] = float_array_handle; task_gpu->cl = &gpu_codelet; task_gpu->callback_func = NULL; task_gpu->handles[0] = float_array_handle; ret = starpu_task_submit(task_cpu); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task_gpu); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(float_array_handle); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(float_array_handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/critical_section_with_void_interface.c000066400000000000000000000045501507764646700270660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Use a void interface to protect the access to a variable that is not declared to StarPU */ starpu_data_handle_t void_handle; int critical_var; void critical_section(void *descr[], void *_args) { (void)descr; (void)_args; /* We do not protect this variable because it is only accessed when the * "void_handle" piece of data is accessed. */ critical_var++; } static struct starpu_codelet cl = { .cpu_funcs = {critical_section}, .cuda_funcs = {critical_section}, .opencl_funcs = {critical_section}, .nbuffers = 1, .modes = {STARPU_RW} }; int main(void) { #ifdef STARPU_QUICK_CHECK int ntasks = 10; #else int ntasks = 1000; #endif int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); critical_var = 0; /* Create a void data which will be used as an exclusion mechanism. */ starpu_void_data_register(&void_handle); int i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = void_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(void_handle); ret = (critical_var == ntasks) ? EXIT_SUCCESS : EXIT_FAILURE; starpu_shutdown(); return ret; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/data_deinitialize.c000066400000000000000000000137341507764646700231230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Try to mix starpu_data_deinitialize and starpu_data_deinitialize_submit * calls with task insertions */ #ifdef STARPU_QUICK_CHECK static unsigned nloops=100; #else static unsigned nloops=1000; #endif #define VECTORSIZE 1024 static starpu_data_handle_t v_handle; /* * Memset */ #ifdef STARPU_USE_CUDA static void cuda_memset_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); cudaMemsetAsync(buf, 42, length, starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_USE_OPENCL static void opencl_memset_codelet(void *buffers[], void *args) { (void) args; STARPU_SKIP_IF_VALGRIND; cl_command_queue queue; int id = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(id); starpu_opencl_get_queue(devid, &queue); cl_mem buffer = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); unsigned length = STARPU_VECTOR_GET_NX(buffers[0]); char *v = malloc(length); STARPU_ASSERT(v != NULL); memset(v, 42, length); cl_int err; err = clEnqueueWriteBuffer(queue, buffer, CL_FALSE, 0, /* offset */ length, /* sizeof (char) */ v, 0, /* num_events_in_wait_list */ NULL, /* event_wait_list */ NULL /* event */); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } #endif /* !STARPU_USE_OPENCL */ void cpu_memset_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); memset(buf, 42, length * sizeof(*buf)); } static struct starpu_codelet memset_cl = { .cpu_funcs = {cpu_memset_codelet}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_memset_codelet}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_memset_codelet}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_memset_codelet"}, .nbuffers = 1, .modes = {STARPU_W} }; /* * Check content */ void cpu_check_content_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; for (i = 0; i < length; i++) { if (buf[i] != 42) { FPRINTF(stderr, "buf[%u] is '%c' while it should be '%c'\n", i, buf[i], 42); exit(-1); } } } static struct starpu_codelet check_content_cl = { .cpu_funcs = {cpu_check_content_codelet}, .cpu_funcs_name = {"cpu_check_content_codelet"}, .nbuffers = 1, .modes = {STARPU_R} }; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if(starpu_cpu_worker_get_count() == 0) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) nloops = 2; #endif /* The buffer should never be explicitly allocated */ starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char)); unsigned loop; for (loop = 0; loop < nloops; loop++) { struct starpu_task *memset_task; struct starpu_task *check_content_task; memset_task = starpu_task_create(); memset_task->cl = &memset_cl; memset_task->handles[0] = v_handle; memset_task->detach = 0; ret = starpu_task_submit(memset_task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(memset_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); check_content_task = starpu_task_create(); check_content_task->cl = &check_content_cl; check_content_task->handles[0] = v_handle; check_content_task->detach = 0; ret = starpu_task_submit(check_content_task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(check_content_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_data_deinitialize(v_handle); } for (loop = 0; loop < nloops; loop++) { struct starpu_task *memset_task; struct starpu_task *check_content_task; memset_task = starpu_task_create(); memset_task->cl = &memset_cl; memset_task->handles[0] = v_handle; ret = starpu_task_submit(memset_task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); check_content_task = starpu_task_create(); check_content_task->cl = &check_content_cl; check_content_task->handles[0] = v_handle; ret = starpu_task_submit(check_content_task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_data_deinitialize_submit(v_handle); } /* this should get rid of automatically allocated buffers */ starpu_data_unregister(v_handle); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/data_implicit_deps.c000066400000000000000000000166021507764646700232730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test that implicit dependencies get properly computed */ #define VECTORSIZE 1024 static unsigned *A, *B, *C, *D; starpu_data_handle_t A_handle, B_handle, C_handle, D_handle; static unsigned var = 0; starpu_data_handle_t var_handle; void func(void *descr[], void *arg) { (void)descr; (void)arg; STARPU_SKIP_IF_VALGRIND; starpu_usleep(200000); } static struct starpu_codelet cl_f = { .modes = { STARPU_RW, STARPU_R, STARPU_RW }, .cpu_funcs = {func}, .cuda_funcs = {func}, .opencl_funcs = {func}, .cpu_funcs_name = {"func"}, .nbuffers = 3, }; void g_cpu(void *descr[], void *arg) { (void)descr; (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *val = (unsigned *) STARPU_VARIABLE_GET_PTR(descr[0]); starpu_usleep(100000); *val = 42; } #ifdef STARPU_USE_CUDA void g_cuda(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *val = (unsigned *) STARPU_VARIABLE_GET_PTR(descr[0]); unsigned value = 42; starpu_usleep(100000); cudaMemcpyAsync(val, &value, sizeof(value), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_USE_OPENCL void g_opencl(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; cl_mem val = (cl_mem) STARPU_VARIABLE_GET_PTR(descr[0]); unsigned value = 42; starpu_usleep(100000); cl_command_queue queue; starpu_opencl_get_current_queue(&queue); cl_int err; err = clEnqueueWriteBuffer(queue, val, CL_TRUE, 0, sizeof(unsigned), (void *)&value, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); } #endif static struct starpu_codelet cl_g = { .modes = { STARPU_RW, STARPU_R, STARPU_RW }, .cpu_funcs = {g_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {g_cuda}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {g_opencl}, #endif .cpu_funcs_name = {"g_cpu"}, .nbuffers = 3, }; void h_cpu(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *val = (unsigned *) STARPU_VARIABLE_GET_PTR(descr[0]); FPRINTF(stderr, "VAR %u (should be 42)\n", *val); STARPU_ASSERT(*val == 42); } #ifdef STARPU_USE_CUDA void h_cuda(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *val = (unsigned *) STARPU_VARIABLE_GET_PTR(descr[0]); unsigned value; cudaMemcpyAsync(&value, val, sizeof(value), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); FPRINTF(stderr, "VAR %u (should be 42)\n", value); STARPU_ASSERT(value == 42); } #endif #ifdef STARPU_USE_OPENCL void h_opencl(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; cl_mem val = (cl_mem) STARPU_VARIABLE_GET_PTR(descr[0]); unsigned value = 0; cl_command_queue queue; starpu_opencl_get_current_queue(&queue); cl_int err; err = clEnqueueReadBuffer(queue, val, CL_TRUE, 0, sizeof(unsigned), (void *)&value, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); FPRINTF(stderr, "VAR %u (should be 42)\n", value); STARPU_ASSERT(value == 42); } #endif static struct starpu_codelet cl_h = { .modes = { STARPU_RW, STARPU_R, STARPU_RW }, .cpu_funcs = {h_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {h_cuda}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {h_opencl}, #endif .cpu_funcs_name = {"h_cpu"}, .nbuffers = 3 }; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&A, VECTORSIZE*sizeof(unsigned)); starpu_malloc((void **)&B, VECTORSIZE*sizeof(unsigned)); starpu_malloc((void **)&C, VECTORSIZE*sizeof(unsigned)); starpu_malloc((void **)&D, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, VECTORSIZE, sizeof(unsigned)); starpu_vector_data_register(&B_handle, STARPU_MAIN_RAM, (uintptr_t)B, VECTORSIZE, sizeof(unsigned)); starpu_vector_data_register(&C_handle, STARPU_MAIN_RAM, (uintptr_t)C, VECTORSIZE, sizeof(unsigned)); starpu_vector_data_register(&D_handle, STARPU_MAIN_RAM, (uintptr_t)D, VECTORSIZE, sizeof(unsigned)); starpu_variable_data_register(&var_handle, STARPU_MAIN_RAM, (uintptr_t)(&var), sizeof(var)); #if 0 starpu_data_set_sequential_consistency_flag(A_handle, 0); starpu_data_set_sequential_consistency_flag(B_handle, 0); starpu_data_set_sequential_consistency_flag(C_handle, 0); starpu_data_set_sequential_consistency_flag(D_handle, 0); #endif /* f(Ar, Brw): sleep * g(Br; Crw); sleep, var = 42 * h(Cr; Drw); check that var == 42 */ struct starpu_task *task_f = starpu_task_create(); task_f->cl = &cl_f; task_f->handles[0] = var_handle; task_f->handles[1] = A_handle; task_f->handles[2] = B_handle; ret = starpu_task_submit(task_f); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); struct starpu_task *task_g = starpu_task_create(); task_g->cl = &cl_g; task_g->handles[0] = var_handle; task_g->handles[1] = B_handle; task_g->handles[2] = C_handle; ret = starpu_task_submit(task_g); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); struct starpu_task *task_h = starpu_task_create(); task_h->cl = &cl_h; task_h->handles[0] = var_handle; task_h->handles[1] = C_handle; task_h->handles[2] = D_handle; ret = starpu_task_submit(task_h); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); starpu_data_unregister(C_handle); starpu_data_unregister(D_handle); starpu_data_unregister(var_handle); starpu_free_noflag(A, VECTORSIZE*sizeof(unsigned)); starpu_free_noflag(B, VECTORSIZE*sizeof(unsigned)); starpu_free_noflag(C, VECTORSIZE*sizeof(unsigned)); starpu_free_noflag(D, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); starpu_data_unregister(C_handle); starpu_data_unregister(D_handle); starpu_data_unregister(var_handle); starpu_free_noflag(A, VECTORSIZE*sizeof(unsigned)); starpu_free_noflag(B, VECTORSIZE*sizeof(unsigned)); starpu_free_noflag(C, VECTORSIZE*sizeof(unsigned)); starpu_free_noflag(D, VECTORSIZE*sizeof(unsigned)); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/data_invalidation.c000066400000000000000000000137241507764646700231310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Try to mix starpu_data_invalidate and starpu_data_invalidate_submit * calls with task insertions */ #ifdef STARPU_QUICK_CHECK static unsigned nloops=100; #else static unsigned nloops=1000; #endif #define VECTORSIZE 1024 static starpu_data_handle_t v_handle; /* * Memset */ #ifdef STARPU_USE_CUDA static void cuda_memset_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); cudaMemsetAsync(buf, 42, length, starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_USE_OPENCL static void opencl_memset_codelet(void *buffers[], void *args) { (void) args; STARPU_SKIP_IF_VALGRIND; cl_command_queue queue; int id = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(id); starpu_opencl_get_queue(devid, &queue); cl_mem buffer = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); unsigned length = STARPU_VECTOR_GET_NX(buffers[0]); char *v = malloc(length); STARPU_ASSERT(v != NULL); memset(v, 42, length); cl_int err; err = clEnqueueWriteBuffer(queue, buffer, CL_FALSE, 0, /* offset */ length, /* sizeof (char) */ v, 0, /* num_events_in_wait_list */ NULL, /* event_wait_list */ NULL /* event */); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } #endif /* !STARPU_USE_OPENCL */ void cpu_memset_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); memset(buf, 42, length * sizeof(*buf)); } static struct starpu_codelet memset_cl = { .cpu_funcs = {cpu_memset_codelet}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_memset_codelet}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_memset_codelet}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_memset_codelet"}, .nbuffers = 1, .modes = {STARPU_W} }; /* * Check content */ void cpu_check_content_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; for (i = 0; i < length; i++) { if (buf[i] != 42) { FPRINTF(stderr, "buf[%u] is '%c' while it should be '%c'\n", i, buf[i], 42); exit(-1); } } } static struct starpu_codelet check_content_cl = { .cpu_funcs = {cpu_check_content_codelet}, .cpu_funcs_name = {"cpu_check_content_codelet"}, .nbuffers = 1, .modes = {STARPU_R} }; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if(starpu_cpu_worker_get_count() == 0) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) nloops = 2; #endif /* The buffer should never be explicitly allocated */ starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char)); unsigned loop; for (loop = 0; loop < nloops; loop++) { struct starpu_task *memset_task; struct starpu_task *check_content_task; memset_task = starpu_task_create(); memset_task->cl = &memset_cl; memset_task->handles[0] = v_handle; memset_task->detach = 0; ret = starpu_task_submit(memset_task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(memset_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); check_content_task = starpu_task_create(); check_content_task->cl = &check_content_cl; check_content_task->handles[0] = v_handle; check_content_task->detach = 0; ret = starpu_task_submit(check_content_task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(check_content_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_data_invalidate(v_handle); } for (loop = 0; loop < nloops; loop++) { struct starpu_task *memset_task; struct starpu_task *check_content_task; memset_task = starpu_task_create(); memset_task->cl = &memset_cl; memset_task->handles[0] = v_handle; ret = starpu_task_submit(memset_task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); check_content_task = starpu_task_create(); check_content_task->cl = &check_content_cl; check_content_task->handles[0] = v_handle; ret = starpu_task_submit(check_content_task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_data_invalidate_submit(v_handle); } /* this should get rid of automatically allocated buffers */ starpu_data_unregister(v_handle); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/data_register.c000066400000000000000000000056241507764646700222740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include struct my_interface { enum starpu_data_interface_id id; /* Just a integer */ int x; }; static struct starpu_data_interface_ops starpu_interface_my_ops; static void register_my(starpu_data_handle_t handle, int home_node, void *data_interface) { (void) home_node; struct my_interface *my_interface = data_interface; unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct my_interface *local_interface = starpu_data_get_interface_on_node(handle, node); local_interface->x = my_interface->x; local_interface->id = my_interface->id; } } static size_t my_get_size(starpu_data_handle_t handle) { struct my_interface *my_interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return my_interface->x; } static uint32_t my_footprint(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(my_get_size(handle), 0); } static struct starpu_data_interface_ops starpu_interface_my_ops = { .register_data_handle = register_my, .allocate_data_on_node = NULL, .free_data_on_node = NULL, .copy_methods = NULL, .get_size = my_get_size, .get_max_size = NULL, .footprint = my_footprint, .compare = NULL, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct my_interface), .display = NULL, .pack_data = NULL, .peek_data = NULL, .unpack_data = NULL, .describe = NULL, }; #define N 42 int main(void) { int ret; int x; starpu_data_handle_t handles[N]; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); for (x = 0; x < N; x++) { starpu_interface_my_ops.interfaceid = starpu_data_interface_get_next_id(); struct my_interface my_interface = { .id = starpu_interface_my_ops.interfaceid, }; starpu_data_register(&handles[x], -1, &my_interface, &starpu_interface_my_ops); STARPU_ASSERT(_starpu_data_interface_get_ops(my_interface.id) == &starpu_interface_my_ops); } for (x = 0; x < N; x++) starpu_data_unregister(handles[x]); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/datawizard/deinitialize_pending_requests.c000066400000000000000000000032721507764646700255650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Try invalidating a variable which is pending a request */ #define SIZE (100<<20) int main(void) { int ret; char *var = NULL; starpu_data_handle_t handle; ret = starpu_init(NULL); if (ret == -ENODEV) goto skip; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) == 0 && starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER) == 0) goto enodev; var = malloc(SIZE); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)var, SIZE); /* Let a request fly */ starpu_fxt_trace_user_event_string("requesting"); starpu_data_fetch_on_node(handle, 1, 1); starpu_fxt_trace_user_event_string("requested"); /* But suddenly deinitialize the data while it's on the fly! */ starpu_data_deinitialize_submit(handle); starpu_fxt_trace_user_event_string("deinitialized"); starpu_data_unregister(handle); free(var); starpu_shutdown(); return 0; enodev: starpu_shutdown(); skip: return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/deps.c000066400000000000000000000063151507764646700204100ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #define N 10 #define LOOPS 4 void null_cpu_func(void *buffers[], void *arg) { (void)arg; (void)buffers; } void prod_cpu_func(void *buffers[], void *arg) { int *data = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); int n = STARPU_VECTOR_GET_NX(buffers[0]); int i; int factor; starpu_codelet_unpack_args(arg, &factor); FPRINTF(stderr, "Multiplying by %d\n", factor); for(i=0 ; i #include "../helper.h" /* * Test Dijkstra's Dining Philosophers problem */ /* TODO: try to use an arbiter and check improved concurrency */ /* number of philosophers */ #define N 16 starpu_data_handle_t fork_handles[N]; unsigned forks[N]; void eat_kernel(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet eating_cl = { .modes = { STARPU_RW, STARPU_RW }, .cuda_funcs = {eat_kernel}, .cpu_funcs = {eat_kernel}, .opencl_funcs = {eat_kernel}, .cpu_funcs_name = {"eat_kernel"}, .nbuffers = 2 }; static int submit_one_task(unsigned p) { struct starpu_task *task = starpu_task_create(); task->cl = &eating_cl; unsigned left = p; unsigned right = (p+1)%N; task->handles[0] = fork_handles[left]; task->handles[1] = fork_handles[right]; int ret = starpu_task_submit(task); return ret; } int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* initialize the forks */ unsigned f; for (f = 0; f < N; f++) { forks[f] = 0; starpu_vector_data_register(&fork_handles[f], STARPU_MAIN_RAM, (uintptr_t)&forks[f], 1, sizeof(unsigned)); starpu_data_set_sequential_consistency_flag(fork_handles[f], 0); } unsigned ntasks = 1024; #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) ntasks = 3; #endif unsigned t; for (t = 0; t < ntasks; t++) { /* select one philosopher randomly */ unsigned philosopher = rand() % N; ret = submit_one_task(philosopher); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); FPRINTF(stderr, "waiting done\n"); for (f = 0; f < N; f++) { starpu_data_unregister(fork_handles[f]); } starpu_shutdown(); return EXIT_SUCCESS; enodev: for (f = 0; f < N; f++) { starpu_data_unregister(fork_handles[f]); } fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/double_parameter.c000066400000000000000000000115611507764646700227660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Try passing the same parameter twice, with various access modes */ void dummy_func(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet codelet_R_R = { .cpu_funcs = { dummy_func }, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 2, .modes = {STARPU_R, STARPU_R} }; static struct starpu_codelet codelet_R_W = { .cpu_funcs = { dummy_func }, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 2, .modes = {STARPU_R, STARPU_W} }; static struct starpu_codelet codelet_R_RW = { .cpu_funcs = { dummy_func }, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW} }; static struct starpu_codelet codelet_W_R = { .cpu_funcs = { dummy_func }, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 2, .modes = {STARPU_W, STARPU_R} }; static struct starpu_codelet codelet_W_W = { .cpu_funcs = { dummy_func }, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 2, .modes = {STARPU_W, STARPU_W} }; static struct starpu_codelet codelet_W_RW = { .cpu_funcs = { dummy_func }, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 2, .modes = {STARPU_W, STARPU_RW} }; static struct starpu_codelet codelet_RW_R = { .cpu_funcs = { dummy_func }, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R} }; static struct starpu_codelet codelet_RW_W = { .cpu_funcs = { dummy_func }, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 2, .modes = {STARPU_RW, STARPU_W} }; static struct starpu_codelet codelet_RW_RW = { .cpu_funcs = { dummy_func }, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW} }; int main(int argc, char **argv) { float foo = 0.0f; starpu_data_handle_t handle; int ret; struct starpu_task *task; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&foo, sizeof(foo)); #define SUBMIT(mode0, mode1) \ { \ task = starpu_task_create(); \ task->handles[0] = handle; \ task->handles[1] = handle; \ enum starpu_data_access_mode smode0 = STARPU_##mode0; \ enum starpu_data_access_mode smode1 = STARPU_##mode0; \ if (smode0 == STARPU_R && smode1 == STARPU_R) \ task->cl = &codelet_R_R; \ else if (smode0 == STARPU_R && smode1 == STARPU_W) \ task->cl = &codelet_R_W; \ else if (smode0 == STARPU_R && smode1 == STARPU_RW) \ task->cl = &codelet_R_RW; \ else if (smode0 == STARPU_W && smode1 == STARPU_R) \ task->cl = &codelet_W_R; \ else if (smode0 == STARPU_W && smode1 == STARPU_W) \ task->cl = &codelet_W_W; \ else if (smode0 == STARPU_W && smode1 == STARPU_RW) \ task->cl = &codelet_W_RW; \ else if (smode0 == STARPU_RW && smode1 == STARPU_R) \ task->cl = &codelet_RW_R; \ else if (smode0 == STARPU_RW && smode1 == STARPU_W) \ task->cl = &codelet_RW_W; \ else if (smode0 == STARPU_RW && smode1 == STARPU_RW) \ task->cl = &codelet_RW_RW; \ \ ret = starpu_task_submit(task); \ if (ret == -ENODEV) goto enodev; \ STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); \ } SUBMIT(R,R); SUBMIT(R,W); SUBMIT(R,RW); SUBMIT(W,R); SUBMIT(W,W); SUBMIT(W,RW); SUBMIT(RW,R); SUBMIT(RW,W); SUBMIT(RW,RW); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(handle); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/dsm_stress.c000066400000000000000000000155501507764646700216440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" #include /* * Trigger various combinations of access modes */ #ifdef STARPU_QUICK_CHECK # define N 100 #else # define N 10000 #endif #define VECTORSIZE 1024 static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static unsigned finished = 0; static unsigned cnt = N; starpu_data_handle_t v_handle, v_handle2; static unsigned *v; static unsigned *v2; static void callback(void *arg) { (void)arg; unsigned res = STARPU_ATOMIC_ADD(&cnt, -1); ANNOTATE_HAPPENS_BEFORE(&cnt); if (res == 0) { ANNOTATE_HAPPENS_AFTER(&cnt); STARPU_PTHREAD_MUTEX_LOCK(&mutex); finished = 1; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } } static void cuda_codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; } static void opencl_codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; } void cpu_codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; } static enum starpu_data_access_mode select_random_mode(void) { int r = rand(); switch (r % 3) { case 0: return STARPU_R; case 1: return STARPU_W; case 2: return STARPU_RW; }; return STARPU_RW; } static struct starpu_codelet cl_r_r = { .cpu_funcs = {cpu_codelet_null}, .cuda_funcs = {cuda_codelet_null}, .opencl_funcs = {opencl_codelet_null}, .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 2, .modes = {STARPU_R, STARPU_R} }; static struct starpu_codelet cl_r_w = { .cpu_funcs = {cpu_codelet_null}, .cuda_funcs = {cuda_codelet_null}, .opencl_funcs = {opencl_codelet_null}, .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 2, .modes = {STARPU_R, STARPU_W} }; static struct starpu_codelet cl_r_rw = { .cpu_funcs = {cpu_codelet_null}, .cuda_funcs = {cuda_codelet_null}, .opencl_funcs = {opencl_codelet_null}, .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 2, .modes = {STARPU_R, STARPU_RW} }; static struct starpu_codelet cl_w_r = { .cpu_funcs = {cpu_codelet_null}, .cuda_funcs = {cuda_codelet_null}, .opencl_funcs = {opencl_codelet_null}, .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 2, .modes = {STARPU_W, STARPU_R} }; static struct starpu_codelet cl_w_w = { .cpu_funcs = {cpu_codelet_null}, .cuda_funcs = {cuda_codelet_null}, .opencl_funcs = {opencl_codelet_null}, .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 2, .modes = {STARPU_W, STARPU_W} }; static struct starpu_codelet cl_w_rw = { .cpu_funcs = {cpu_codelet_null}, .cuda_funcs = {cuda_codelet_null}, .opencl_funcs = {opencl_codelet_null}, .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 2, .modes = {STARPU_W, STARPU_RW} }; static struct starpu_codelet cl_rw_r = { .cpu_funcs = {cpu_codelet_null}, .cuda_funcs = {cuda_codelet_null}, .opencl_funcs = {opencl_codelet_null}, .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R} }; static struct starpu_codelet cl_rw_w = { .cpu_funcs = {cpu_codelet_null}, .cuda_funcs = {cuda_codelet_null}, .opencl_funcs = {opencl_codelet_null}, .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_W} }; static struct starpu_codelet cl_rw_rw = { .cpu_funcs = {cpu_codelet_null}, .cuda_funcs = {cuda_codelet_null}, .opencl_funcs = {opencl_codelet_null}, .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW} }; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); memset(v, 0, VECTORSIZE*sizeof(unsigned)); ret = starpu_malloc((void **)&v2, VECTORSIZE*sizeof(unsigned)); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); memset(v2, 0, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); starpu_vector_data_register(&v_handle2, STARPU_MAIN_RAM, (uintptr_t)v2, VECTORSIZE, sizeof(unsigned)); unsigned iter; for (iter = 0; iter < N; iter++) { struct starpu_task *task = starpu_task_create(); task->handles[0] = v_handle; task->handles[1] = v_handle2; enum starpu_data_access_mode mode0 = select_random_mode(); enum starpu_data_access_mode mode1 = select_random_mode(); if (mode0 == STARPU_R && mode1 == STARPU_R) task->cl = &cl_r_r; else if (mode0 == STARPU_R && mode1 == STARPU_W) task->cl = &cl_r_w; else if (mode0 == STARPU_R && mode1 == STARPU_RW) task->cl = &cl_r_rw; else if (mode0 == STARPU_W && mode1 == STARPU_R) task->cl = &cl_w_r; else if (mode0 == STARPU_W && mode1 == STARPU_W) task->cl = &cl_w_w; else if (mode0 == STARPU_W && mode1 == STARPU_RW) task->cl = &cl_w_rw; else if (mode0 == STARPU_RW && mode1 == STARPU_R) task->cl = &cl_rw_r; else if (mode0 == STARPU_RW && mode1 == STARPU_W) task->cl = &cl_rw_w; else if (mode0 == STARPU_RW && mode1 == STARPU_RW) task->cl = &cl_rw_rw; task->callback_func = callback; task->callback_arg = NULL; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_do_schedule(); STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (!finished) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); starpu_data_unregister(v_handle); starpu_data_unregister(v_handle2); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_free_noflag(v2, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); starpu_data_unregister(v_handle2); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_free_noflag(v2, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/gpu_ptr_register.c000066400000000000000000000157751507764646700230530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "scal.h" /* * Register the GPU buffer to be used for an existing data, and perform * a partitioned operation */ #if ! (defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA)) int main(void) { return STARPU_TEST_SKIPPED; } #else static int submit_tasks(starpu_data_handle_t handle, int pieces, int n) { int i, ret; for (i = 0; i < pieces; i++) { struct starpu_task *task = starpu_task_create(); task->handles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl = &scal_codelet; task->execute_on_a_specific_worker = 1; task->workerid = i%n; ret = starpu_task_submit(task); if (ret == -ENODEV) return -ENODEV; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); return 0; } static int find_a_worker(enum starpu_worker_archtype type) { int worker[STARPU_NMAXWORKERS]; int ret = starpu_worker_get_ids_by_type(type, worker, STARPU_NMAXWORKERS); if (ret == 0) return -ENODEV; if (ret == -ERANGE) return worker[STARPU_NMAXWORKERS-1]; return worker[ret-1]; } static int check_result(unsigned *t, size_t size) { unsigned i; for (i = 0; i < size; i++) { if (t[i] != i*2) { FPRINTF(stderr,"t[%u] is %u instead of %u\n", i, t[i], 2*i); return 1; } } return 0; } #ifdef STARPU_USE_CUDA #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER static int test_cuda(void) { int ret; unsigned *foo_gpu; unsigned *foo; int n, i, size, pieces; int devid; int chosen; cudaError_t cures; starpu_data_handle_t handle; /* Find a CUDA worker */ chosen = find_a_worker(STARPU_CUDA_WORKER); if (chosen == -ENODEV) return -ENODEV; n = starpu_worker_get_count(); size = 10 * n; devid = starpu_worker_get_devid(chosen); foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(*foo_gpu)); foo = calloc(size, sizeof(*foo)); for (i = 0; i < size; i++) foo[i] = i; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo)); starpu_vector_ptr_register(handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, (uintptr_t)foo_gpu, 0); /* Broadcast the data to force in-place partitioning */ for (i = 0; i < n; i++) starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); /* Even with just one worker, split in at least two */ if (n == 1) pieces = 2; else pieces = n; struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = pieces, }; starpu_data_partition(handle, &f); ret = submit_tasks(handle, pieces, n); if (ret == -ENODEV) return -ENODEV; starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(chosen), 0); starpu_data_unregister(handle); starpu_cuda_set_device(devid); cures = cudaMemcpy(foo, foo_gpu, size * sizeof(*foo_gpu), cudaMemcpyDeviceToHost); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return check_result(foo, size); } #endif #endif #ifdef STARPU_USE_OPENCL static int test_opencl(void) { int i; int ret; int chosen; int n; int size; int pieces; cl_mem foo_gpu; starpu_data_handle_t handle; ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); /* Find an OpenCL worker */ chosen = find_a_worker(STARPU_OPENCL_WORKER); if (chosen == -ENODEV) return -ENODEV; n = starpu_worker_get_count(); size = 10 * n; int devid; cl_int err; cl_context context; cl_command_queue queue; devid = starpu_worker_get_devid(chosen); starpu_opencl_get_context(devid, &context); starpu_opencl_get_queue(devid, &queue); foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(int)); unsigned int *foo = malloc(size*sizeof(*foo)); for (i = 0; i < size; i++) foo[i] = i; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(int)); starpu_vector_ptr_register(handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, (uintptr_t)foo_gpu, 0); /* Broadcast the data to force in-place partitioning */ for (i = 0; i < n; i++) starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); /* Even with just one worker, split in at least two */ if (n == 1) pieces = 2; else pieces = n; struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = pieces, }; starpu_data_partition(handle, &f); ret = submit_tasks(handle, pieces, n); if (ret == -ENODEV) return -ENODEV; starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(chosen), 0); starpu_data_unregister(handle); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); err = clEnqueueReadBuffer(queue, foo_gpu, CL_FALSE, 0, size*sizeof(*foo), foo, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); return check_result(foo, size); } #endif /* !STARPU_USE_OPENCL */ int main(int argc, char **argv) { int skipped_cuda = 1, skipped_opencl = 1; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif #ifdef STARPU_USE_CUDA #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER ret = test_cuda(); if (ret == 1) goto fail; else if (ret == 0) skipped_cuda = 0; #endif #endif #ifdef STARPU_USE_OPENCL ret = test_opencl(); if (ret == 1) goto fail; else if (ret == 0) skipped_opencl = 0; #endif starpu_shutdown(); if (skipped_cuda == 1 && skipped_opencl == 1) return STARPU_TEST_SKIPPED; return EXIT_SUCCESS; fail: starpu_shutdown(); return EXIT_FAILURE; } #endif /* defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) */ starpu-1.4.9+dfsg/tests/datawizard/gpu_register.c000066400000000000000000000170651507764646700221600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "scal.h" /* * Register a handle from a GPU buffer, and performs a partitioned operation */ #if ! (defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA)) int main(void) { return STARPU_TEST_SKIPPED; } #else static int submit_tasks(starpu_data_handle_t handle, int pieces, int n) { int i, ret; for (i = 0; i < pieces; i++) { struct starpu_task *task = starpu_task_create(); task->handles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl = &scal_codelet; task->execute_on_a_specific_worker = 1; task->workerid = i%n; ret = starpu_task_submit(task); if (ret == -ENODEV) return -ENODEV; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); return 0; } static int find_a_worker(enum starpu_worker_archtype type) { int worker[STARPU_NMAXWORKERS]; int ret = starpu_worker_get_ids_by_type(type, worker, STARPU_NMAXWORKERS); if (ret == 0) return -ENODEV; if (ret == -ERANGE) return worker[STARPU_NMAXWORKERS-1]; return worker[ret-1]; } static int check_result(unsigned *t, size_t size) { unsigned i; for (i = 0; i < size; i++) { if (t[i] != i*2) { FPRINTF(stderr,"t[%u] is %u instead of %u\n", i, t[i], 2*i); return 1; } } return 0; } #ifdef STARPU_USE_CUDA #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER static int test_cuda(void) { int ret; unsigned *foo_gpu; unsigned *foo; int n, i, size, pieces; int devid; int chosen; cudaError_t cures; starpu_data_handle_t handle; /* Find a CUDA worker */ chosen = find_a_worker(STARPU_CUDA_WORKER); if (chosen == -ENODEV) return -ENODEV; n = starpu_worker_get_count(); size = 10 * n; devid = starpu_worker_get_devid(chosen); foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(*foo_gpu)); foo = calloc(size, sizeof(*foo)); for (i = 0; i < size; i++) foo[i] = i; cures = cudaMemcpy(foo_gpu, foo, size * sizeof(*foo_gpu), cudaMemcpyHostToDevice); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); starpu_vector_data_register(&handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, size, sizeof(*foo_gpu)); /* Broadcast the data to force in-place partitioning */ for (i = 0; i < n; i++) starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); /* Even with just one worker, split in at least two */ if (n == 1) pieces = 2; else pieces = n; struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = pieces, }; starpu_data_partition(handle, &f); ret = submit_tasks(handle, pieces, n); if (ret == -ENODEV) { starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(*foo_gpu)); free(foo); return -ENODEV; } starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); starpu_data_unregister(handle); starpu_cuda_set_device(devid); cures = cudaMemcpy(foo, foo_gpu, size * sizeof(*foo_gpu), cudaMemcpyDeviceToHost); if (!cures) cures = cudaDeviceSynchronize(); if (STARPU_UNLIKELY(cures)) { starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(*foo_gpu)); free(foo); STARPU_CUDA_REPORT_ERROR(cures); return 1; } ret = check_result(foo, size); starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(*foo_gpu)); free(foo); return ret; } #endif #endif #ifdef STARPU_USE_OPENCL static int test_opencl(void) { int i; int ret; int chosen; int n; int size; int pieces; cl_mem foo_gpu; starpu_data_handle_t handle; ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); /* Find an OpenCL worker */ chosen = find_a_worker(STARPU_OPENCL_WORKER); if (chosen == -ENODEV) return -ENODEV; n = starpu_worker_get_count(); size = 10 * n; int devid; cl_int err; cl_context context; cl_command_queue queue; devid = starpu_worker_get_devid(chosen); starpu_opencl_get_context(devid, &context); starpu_opencl_get_queue(devid, &queue); foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(int)); unsigned int *foo = malloc(size*sizeof(*foo)); for (i = 0; i < size; i++) foo[i] = i; err = clEnqueueWriteBuffer(queue, foo_gpu, CL_FALSE, 0, size*sizeof(int), foo, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_vector_data_register(&handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, size, sizeof(int)); /* Broadcast the data to force in-place partitioning */ for (i = 0; i < n; i++) starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); /* Even with just one worker, split in at least two */ if (n == 1) pieces = 2; else pieces = n; struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = pieces, }; starpu_data_partition(handle, &f); ret = submit_tasks(handle, pieces, n); if (ret == -ENODEV) return -ENODEV; starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); starpu_data_unregister(handle); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); err = clEnqueueReadBuffer(queue, foo_gpu, CL_FALSE, 0, size*sizeof(*foo), foo, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); ret = check_result(foo, size); starpu_free_on_node(starpu_worker_get_memory_node(chosen), (uintptr_t) foo_gpu, size * sizeof(int)); free(foo); return ret; } #endif /* !STARPU_USE_OPENCL */ int main(int argc, char **argv) { int skipped_cuda = 1, skipped_opencl = 1; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif #ifdef STARPU_USE_CUDA #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER ret = test_cuda(); if (ret == 1) goto fail; else if (ret == 0) skipped_cuda = 0; #endif #endif #ifdef STARPU_USE_OPENCL ret = test_opencl(); if (ret == 1) goto fail; else if (ret == 0) skipped_opencl = 0; #endif starpu_shutdown(); if (skipped_cuda == 1 && skipped_opencl == 1) return STARPU_TEST_SKIPPED; return EXIT_SUCCESS; fail: starpu_shutdown(); return EXIT_FAILURE; } #endif /* defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) */ starpu-1.4.9+dfsg/tests/datawizard/handle_to_pointer.c000066400000000000000000000112061507764646700231450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #undef NDEBUG #include #include #include #include "../helper.h" /* * Test the value returned by starpu_handle_to_pointer */ void cpu_task(void **buffers, void *args) { int *numbers; int i; int size; numbers = (int *) STARPU_VECTOR_GET_PTR(buffers[0]); starpu_codelet_unpack_args (args, &size); for(i = 0; i < (int)size; i++) { numbers[i] = i; } } #ifdef STARPU_USE_CUDA static void cuda_task(void **buffers, void *args) { int *numbers; int i; int size; numbers = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); starpu_codelet_unpack_args (args, &size); for(i = 0; i < (int)size; i++) { cudaMemcpyAsync(&numbers[i], &i, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); } } #endif #ifdef STARPU_USE_OPENCL static void opencl_task(void *buffers[], void *args) { (void)args; cl_command_queue queue; int id = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(id); starpu_opencl_get_queue(devid, &queue); cl_mem numbers = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); unsigned size = STARPU_VECTOR_GET_NX(buffers[0]); unsigned i; for (i = 0; i < size; i++) { cl_int err; err = clEnqueueWriteBuffer(queue, numbers, CL_TRUE, i*sizeof(int), /* offset */ sizeof(int), &i, 0, /* num_events_in_wait_list */ NULL, /* event_wait_list */ NULL /* event */); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } } #endif static struct starpu_codelet cl = { .cpu_funcs = {cpu_task}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_task}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_task}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_task"}, .nbuffers = 1, .modes = {STARPU_W} }; int main(int argc, char *argv[]) { int err, ret; int *pointer; starpu_data_handle_t handle; static const int count = 123; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; err = starpu_malloc((void **)&pointer, count * sizeof(int)); STARPU_ASSERT((err == 0) && (pointer != NULL)); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)pointer, sizeof(int)); STARPU_ASSERT(starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM) == pointer); starpu_data_unregister(handle); starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)pointer, count, sizeof(int)); STARPU_ASSERT(starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM) == pointer); starpu_data_unregister(handle); starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)pointer, count, count, 1, sizeof(int)); STARPU_ASSERT(starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM) == pointer); starpu_data_unregister(handle); starpu_free_noflag(pointer, count * sizeof(int)); pointer = NULL; /* Lazy allocation. */ starpu_vector_data_register(&handle, -1, 0 /* NULL */, count, sizeof(int)); STARPU_ASSERT(starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM) == NULL); /* Pass the handle to a task. */ err = starpu_task_insert(&cl, STARPU_W, handle, STARPU_VALUE, &count, sizeof(count), 0); if (err == -ENODEV) { ret = STARPU_TEST_SKIPPED; goto out; } /* Acquire the handle, forcing a local allocation. */ starpu_data_acquire(handle, STARPU_R); /* Make sure we have a local pointer to it. */ ret = EXIT_SUCCESS; pointer = (int *) starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM); if (pointer == NULL) { FPRINTF(stderr, "pointer should be non NULL\n"); ret = EXIT_FAILURE; } else { int i; for(i = 0; i < count; i++) { int *numbers = (int *)pointer; if (numbers[i] != i) { FPRINTF(stderr, "Incorrect value numbers[%d] == %d should be %d\n", (int)i, numbers[i], (int)i); ret = EXIT_FAILURE; } } } starpu_data_release(handle); out: starpu_data_unregister(handle); starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/tests/datawizard/in_place_partition.c000066400000000000000000000062201507764646700233130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "scal.h" /* * Trigger in-place partitioning by prefetching the whole data before * partitioning */ int main(int argc, char **argv) { unsigned *foo; starpu_data_handle_t handle; int ret; unsigned n, i, size; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.ncuda = -1; conf.nopencl = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif n = starpu_worker_get_count(); if (n == 1) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } size = 10 * n; foo = (unsigned *) calloc(size, sizeof(*foo)); for (i = 0; i < size; i++) foo[i] = i; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo)); /* Broadcast the data to force in-place partitioning */ for (i = 0; i < n; i++) starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = n, }; starpu_data_partition(handle, &f); for (i = 0; i < f.nchildren; i++) { struct starpu_task *task = starpu_task_create(); task->handles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl = &scal_codelet; task->execute_on_a_specific_worker = 1; task->workerid = i; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_shutdown(); ret = EXIT_SUCCESS; for (i = 0; i < size; i++) { if (foo[i] != i*2) { FPRINTF(stderr,"value %u is %u instead of %u\n", i, foo[i], 2*i); ret = EXIT_FAILURE; } } free(foo); return ret; enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/increment_init.c000066400000000000000000000051661507764646700224670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../variable/increment.h" /* * Check that the initializer passed to starpu_data_set_reduction_methods * is used to initialize a handle when it is registered from NULL, and when * starpu_data_invalidate is called */ int main(void) { unsigned *pvar = NULL; int ret; starpu_data_handle_t handle; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); starpu_variable_data_register(&handle, -1, 0, sizeof(unsigned)); starpu_data_set_reduction_methods(handle, NULL, &neutral_cl); #ifdef STARPU_QUICK_CHECK unsigned ntasks = 32; unsigned nloops = 4; #else unsigned ntasks = 1024; unsigned nloops = 16; #endif unsigned loop; unsigned t; for (loop = 0; loop < nloops; loop++) { for (t = 0; t < ntasks; t++) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_data_acquire(handle, STARPU_R); pvar = starpu_data_handle_to_pointer(handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); if (*pvar != ntasks) { FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", *pvar, ntasks); starpu_data_release(handle); starpu_data_unregister(handle); goto err; } starpu_data_release(handle); starpu_data_invalidate(handle); } starpu_data_unregister(handle); increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; err: starpu_shutdown(); return EXIT_FAILURE; } starpu-1.4.9+dfsg/tests/datawizard/increment_redux.c000066400000000000000000000053751507764646700226550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../variable/increment.h" /* * Check that STARPU_REDUX works with a mere incrementation */ int main(int argc, char **argv) { int ret; unsigned var = 0; starpu_data_handle_t handle; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return STARPU_TEST_SKIPPED; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); starpu_data_set_reduction_methods(handle, &redux_cl, &neutral_cl); #ifdef STARPU_QUICK_CHECK unsigned ntasks = 32; unsigned nloops = 4; #else unsigned ntasks = 1024; unsigned nloops = 16; #endif unsigned loop; unsigned t; for (loop = 0; loop < nloops; loop++) { for (t = 0; t < ntasks; t++) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_redux_cl; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_data_acquire(handle, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); if (var != ntasks * (loop+1)) { FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var, ntasks * (loop+1)); starpu_data_release(handle); starpu_data_unregister(handle); goto err; } starpu_data_release(handle); } starpu_data_unregister(handle); if (var != ntasks * nloops) { FPRINTF(stderr, "Value %u != Expected value %u\n", var, ntasks * (loop+1)); goto err; } increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; err: starpu_shutdown(); STARPU_RETURN(EXIT_FAILURE); } starpu-1.4.9+dfsg/tests/datawizard/increment_redux_lazy.c000066400000000000000000000062301507764646700237030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../variable/increment.h" /* * Check that STARPU_REDUX works with a mere incrementation, but without * initializing the variable */ int main(int argc, char **argv) { int ret; unsigned *var; starpu_data_handle_t handle; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return STARPU_TEST_SKIPPED; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() + starpu_opencl_worker_get_count() + starpu_hip_worker_get_count() == 0) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } increment_load_opencl(); starpu_variable_data_register(&handle, -1, (uintptr_t)NULL, sizeof(unsigned)); starpu_data_set_reduction_methods(handle, &redux_cl, &neutral_cl); #ifdef STARPU_QUICK_CHECK unsigned ntasks = 32; unsigned nloops = 4; #else unsigned ntasks = 1024; unsigned nloops = 16; #endif unsigned loop; unsigned t; for (loop = 0; loop < nloops; loop++) { for (t = 0; t < ntasks; t++) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_redux_cl; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_data_acquire(handle, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); var = (unsigned*) starpu_variable_get_local_ptr(handle); starpu_data_release(handle); if (*var != ntasks*(loop + 1)) { ret = EXIT_FAILURE; FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", *var, ntasks * (loop+1)); goto err; } } ret = starpu_data_acquire(handle, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); var = (unsigned*) starpu_variable_get_local_ptr(handle); if (*var != ntasks*nloops) { ret = EXIT_FAILURE; FPRINTF(stderr, "Value %u != Expected value %u\n", *var, ntasks * (loop+1)); goto err; } starpu_data_release(handle); starpu_data_unregister(handle); increment_unload_opencl(); err: starpu_shutdown(); STARPU_RETURN(ret); enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); STARPU_RETURN(STARPU_TEST_SKIPPED); } starpu-1.4.9+dfsg/tests/datawizard/increment_redux_partition.c000066400000000000000000000062441507764646700247420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../variable/increment.h" /* * Check that STARPU_REDUX works with a mere incrementation and on partitioned data */ #define N 8 int main(int argc, char **argv) { int ret; unsigned vec[N] = {}; unsigned i; int status; starpu_data_handle_t handle; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return STARPU_TEST_SKIPPED; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); for (i = 0; i < N; i++) vec[i] = i; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&vec, N, sizeof(unsigned)); /* Partition the vector in PARTS sub-variables */ struct starpu_data_filter f = { .filter_func = starpu_vector_filter_pick_variable, .filter_arg_ptr = (void*)(uintptr_t) 0, .nchildren = N, /* the children use a variable interface*/ .get_child_ops = starpu_vector_filter_pick_variable_child_ops }; starpu_data_partition(handle, &f); for (i = 0; i < N; i++) { starpu_data_handle_t sub_handle = starpu_data_get_sub_data(handle, 1, i); starpu_data_set_reduction_methods(sub_handle, &redux_cl, &neutral_cl); } #ifdef STARPU_QUICK_CHECK unsigned ntasks = 32; #else unsigned ntasks = 1024; #endif unsigned t; for (i = 0; i < N; i++) { starpu_data_handle_t sub_handle = starpu_data_get_sub_data(handle, 1, i); for (t = 0; t < ntasks; t++) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_redux_cl; task->handles[0] = sub_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } starpu_data_unpartition(handle, STARPU_MAIN_RAM); status = EXIT_SUCCESS; starpu_data_acquire(handle, STARPU_R); for (i = 0; i < N; i++) { if (vec[i] != i + ntasks) { FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", vec[i], ntasks); status = EXIT_FAILURE; } } starpu_data_release(handle); starpu_data_unregister(handle); increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; err: starpu_shutdown(); STARPU_RETURN(EXIT_FAILURE); } starpu-1.4.9+dfsg/tests/datawizard/increment_redux_v2.c000066400000000000000000000055711507764646700232620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../variable/increment.h" /* * Check that STARPU_REDUX works with a mere incrementation, but * intermixing with non-REDUX accesses */ int main(int argc, char **argv) { int ret; unsigned var = 0; starpu_data_handle_t handle; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return STARPU_TEST_SKIPPED; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); starpu_data_set_reduction_methods(handle, &redux_cl, &neutral_cl); #ifdef STARPU_QUICK_CHECK unsigned ntasks = 32; unsigned nloops = 4; #else unsigned ntasks = 1024; unsigned nloops = 16; #endif unsigned loop; unsigned t; for (loop = 0; loop < nloops; loop++) { for (t = 0; t < ntasks; t++) { struct starpu_task *task = starpu_task_create(); if (t % 10 == 0) { task->cl = &increment_cl; } else { task->cl = &increment_redux_cl; } task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_data_acquire(handle, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); if (var != ntasks * (loop+1)) { FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var, ntasks * (loop+1)); starpu_data_release(handle); starpu_data_unregister(handle); goto err; } starpu_data_release(handle); } starpu_data_unregister(handle); if (var != ntasks * nloops) { FPRINTF(stderr, "Value %u != Expected value %u\n", var, ntasks * (loop+1)); goto err; } increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; err: starpu_shutdown(); STARPU_RETURN(EXIT_FAILURE); } starpu-1.4.9+dfsg/tests/datawizard/increment_redux_with_args.c000066400000000000000000000051101507764646700247070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../variable/increment.h" void redux_with_args_cpu(void *descr[], void *arg) { int *value = (int *)arg; unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); *dst = *dst + *src + *value; } struct starpu_codelet redux_with_args_cl = { .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .nbuffers = 2, .cpu_funcs = {redux_with_args_cpu}, }; int main(int argc, char **argv) { int ret; unsigned var = 0; starpu_data_handle_t handle; unsigned value = 42; /* Not supported yet */ if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return STARPU_TEST_SKIPPED; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "we need 1 cpu worker\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } increment_load_opencl(); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); starpu_data_set_reduction_methods_with_args(handle, &redux_with_args_cl, &value, &neutral_cl, NULL); ret = starpu_task_insert(&increment_redux_cl, STARPU_REDUX, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_data_unregister(handle); if (var != value+1) { FPRINTF(stderr, "Value %u != Expected value %u\n", var, value+1); goto err; } increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; err: starpu_shutdown(); STARPU_RETURN(EXIT_FAILURE); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/000077500000000000000000000000001507764646700214275ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/bcsr/000077500000000000000000000000001507764646700223605ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/bcsr/bcsr_cuda.cu000066400000000000000000000043751507764646700246470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" extern struct test_config bcsr_config; __global__ void bcsr_cuda(int *nzval, uint32_t nnz, int *err, int factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i >= nnz) return; if (nzval[i] != i*factor) *err = 1; else nzval[i] = -nzval[i]; } extern "C" void test_bcsr_cuda_func(void *buffers[], void *args) { int factor; int *ret; int *val; cudaError_t error; uint32_t nnz = STARPU_BCSR_GET_NNZ(buffers[0]); uint32_t r = ((struct starpu_bcsr_interface *)buffers[0])->r; uint32_t c = ((struct starpu_bcsr_interface *)buffers[0])->c; nnz *= (r*c); unsigned threads_per_block = 64; unsigned nblocks = (nnz + threads_per_block-1) / threads_per_block; factor = *(int *) args; val = (int *) STARPU_BCSR_GET_NZVAL(buffers[0]); error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, &bcsr_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); bcsr_cuda<<>> (val, nnz, ret, factor); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&bcsr_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/bcsr/bcsr_interface.c000066400000000000000000000111221507764646700254720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #include "../../../helper.h" /* * In this test, we use the following matrix: * * +----------------+ * | 0 1 0 0 | * | 2 3 0 0 | * | 4 5 8 9 | * | 6 7 10 11 | * +----------------+ * * nzval = [0, 1, 2, 3] ++ [4, 5, 6, 7] ++ [8, 9, 10, 11] * colind = [0, 0, 1] * rowptr = [0, 1, 3 ] * r = c = 2 */ /* Size of the blocks */ #define R 2 #define C 2 #define NNZ_BLOCKS 3 /* out of 4 */ #define NZVAL_SIZE (R*C*NNZ_BLOCKS) #define NROWS 2 #ifdef STARPU_USE_CPU void test_bcsr_cpu_func(void *buffers[], void *args); #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA extern void test_bcsr_cuda_func(void *buffers[], void *_args); #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL extern void test_bcsr_opencl_func(void *buffers[], void *args); #endif /* !STARPU_USE_OPENCL */ static int nzval[NZVAL_SIZE] = { 0, 1, 2, 3, /* First block */ 4, 5, 6, 7, /* Second block */ 8, 9, 10, 11 /* Third block */ }; static int nzval2[NZVAL_SIZE]; static uint32_t colind[NNZ_BLOCKS] = { 0, 0, 1 }; static uint32_t colind2[NNZ_BLOCKS]; static uint32_t rowptr[NROWS+1] = { 0, 1, NNZ_BLOCKS }; static uint32_t rowptr2[NROWS+1] = { 0, 0, NNZ_BLOCKS }; static starpu_data_handle_t bcsr_handle; static starpu_data_handle_t bcsr2_handle; struct test_config bcsr_config = { #ifdef STARPU_USE_CPU .cpu_func = test_bcsr_cpu_func, #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA .cuda_func = test_bcsr_cuda_func, #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL .opencl_func = test_bcsr_opencl_func, #endif /* !STARPU_USE_OPENCL */ .handle = &bcsr_handle, .ptr = nzval, .dummy_handle = &bcsr2_handle, .dummy_ptr = nzval2, .copy_failed = SUCCESS, .name = "bcsr_interface" }; static void register_data(void) { starpu_bcsr_data_register(&bcsr_handle, STARPU_MAIN_RAM, NNZ_BLOCKS, NROWS, (uintptr_t) nzval, colind, rowptr, 0, /* firstentry */ R, C, sizeof(nzval[0])); starpu_bcsr_data_register(&bcsr2_handle, STARPU_MAIN_RAM, NNZ_BLOCKS, NROWS, (uintptr_t) nzval2, colind2, rowptr2, 0, /* firstentry */ R, C, sizeof(nzval2[0])); } static void unregister_data(void) { starpu_data_unregister(bcsr_handle); starpu_data_unregister(bcsr2_handle); } void test_bcsr_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int *val; int factor; int i; uint32_t nnz = STARPU_BCSR_GET_NNZ(buffers[0]); uint32_t r = ((struct starpu_bcsr_interface *)buffers[0])->r; uint32_t c = ((struct starpu_bcsr_interface *)buffers[0])->c; if (r != R || c != C) { bcsr_config.copy_failed = FAILURE; return; } nnz *= (r*c); val = (int *) STARPU_BCSR_GET_NZVAL(buffers[0]); factor = *(int *) args; for (i = 0; i < (int)nnz; i++) { if (val[i] != i * factor) { bcsr_config.copy_failed = FAILURE; return; } val[i] *= -1; } #if 0 /* TODO */ /* Check colind */ uint32_t *col = STARPU_BCSR_GET_COLIND(buffers[0]); for (i = 0; i < NNZ_BLOCKS; i++) if (col[i] != colind[i]) bcsr_config.copy_failed = FAILURE; /* Check rowptr */ uint32_t *row = STARPU_BCSR_GET_ROWPTR(buffers[0]); for (i = 0; i < 1 + WIDTH/R; i++) if (row[i] != rowptr[i]) bcsr_config.copy_failed = FAILURE; #endif } int main(int argc, char **argv) { struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); ret = run_tests(&bcsr_config, &summary); unregister_data(); starpu_shutdown(); if (ret) data_interface_test_summary_print(stderr, &summary); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/bcsr/bcsr_opencl.c000066400000000000000000000070061507764646700250200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl" extern struct test_config bcsr_config; static struct starpu_opencl_program opencl_program; void test_bcsr_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, ret; int factor = *(int *) args; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); uint32_t nnz = STARPU_BCSR_GET_NNZ(buffers[0]); uint32_t r = ((struct starpu_bcsr_interface *)buffers[0])->r; uint32_t c = ((struct starpu_bcsr_interface *)buffers[0])->c; nnz *= (r*c); cl_mem nzval = (cl_mem)STARPU_BCSR_GET_NZVAL(buffers[0]); cl_context context; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &bcsr_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "test_bcsr_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); int nargs; nargs = starpu_opencl_set_kernel_args(&err, &kernel, sizeof(nzval), &nzval, sizeof(nnz), &nnz, sizeof(fail), &fail, sizeof(factor), &factor, 0); if (nargs != 4) { fprintf(stderr, "Failed to set argument #%d\n", err); STARPU_OPENCL_REPORT_ERROR(err); } { size_t global = nnz; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &bcsr_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl000066400000000000000000000016751507764646700265420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void test_bcsr_opencl(__global int *val, unsigned int nx, __global int *err, int factor) { const int i = get_global_id(0); if (i >= nx) return; if (val[i] != i * factor) *err = 1; else val[i] = - val[i]; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/block/000077500000000000000000000000001507764646700225215ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/block/block_cuda.cu000066400000000000000000000046471507764646700251530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" extern struct test_config block_config; static __global__ void block_cuda(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz, float factor, int *err) { int i, j, k; int val = 0; for (k = 0; k < nz ;k++) { for (j = 0; j < ny ;j++) { for(i = 0; i < nx ;i++) { if (block[(k*ldz)+(j*ldy)+i] != factor * val) { *err = 1; return; } else { block[(k*ldz)+(j*ldy)+i] *= -1; val++; } } } } } extern "C" void test_block_cuda_func(void *buffers[], void *args) { cudaError_t error; int *ret; error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, &block_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); int nx = STARPU_BLOCK_GET_NX(buffers[0]); int ny = STARPU_BLOCK_GET_NY(buffers[0]); int nz = STARPU_BLOCK_GET_NZ(buffers[0]); unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); int *block = (int *) STARPU_BLOCK_GET_PTR(buffers[0]); int factor = *(int*) args; block_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>> (block, nx, ny, nz, ldy, ldz, factor, ret); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&block_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/block/block_interface.c000066400000000000000000000075151507764646700260070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #include "../../../helper.h" #define NX 16 #define NY NX #define NZ NX /* Prototypes */ static void register_data(void); static void unregister_data(void); void test_block_cpu_func(void *buffers[], void *args); #ifdef STARPU_USE_CUDA extern void test_block_cuda_func(void *buffers[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void test_block_opencl_func(void *buffers[], void *args); #endif static starpu_data_handle_t _block_handle; static starpu_data_handle_t _block2_handle; static int _block[NX*NY*NZ]; static int _block2[NX*NY*NZ]; struct test_config block_config = { .cpu_func = test_block_cpu_func, #ifdef STARPU_USE_CUDA .cuda_func = test_block_cuda_func, #endif #ifdef STARPU_USE_OPENCL .opencl_func = test_block_opencl_func, #endif .handle = &_block_handle, .ptr = _block, .dummy_handle = &_block2_handle, .dummy_ptr = _block2, .copy_failed = SUCCESS, .name = "block_interface" }; static void register_data(void) { /* Initializing data */ int val = 0; int i, j, k; for (k = 0; k < NZ; k++) for (j = 0; j < NY; j++) for (i = 0; i < NX; i++) _block[(k*NX*NY)+(j*NX)+i] = val++; /* Registering data */ starpu_block_data_register(&_block_handle, STARPU_MAIN_RAM, (uintptr_t)_block, NX, NX * NY, NX, NY, NZ, sizeof(_block[0])); starpu_block_data_register(&_block2_handle, STARPU_MAIN_RAM, (uintptr_t)_block2, NX, NX * NY, NX, NY, NZ, sizeof(_block2[0])); } static void unregister_data(void) { starpu_data_unregister(_block_handle); starpu_data_unregister(_block2_handle); } void test_block_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int factor = *(int*)args; int nx = STARPU_BLOCK_GET_NX(buffers[0]); int ny = STARPU_BLOCK_GET_NY(buffers[0]); int nz = STARPU_BLOCK_GET_NZ(buffers[0]); unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); int *block = (int *) STARPU_BLOCK_GET_PTR(buffers[0]); int i, j, k; int val = 0; block_config.copy_failed = SUCCESS; for (k = 0; k < nz; k++) { for (j = 0; j < ny; j++) { for (i = 0; i < nx; i++) { if (block[(k*ldz)+(j*ldy)+i] != factor * val) { block_config.copy_failed = FAILURE; return; } else { block[(k*ldz)+(j*ldy)+i] *= -1; val++; } } } } } int main(int argc, char **argv) { struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); ret = run_tests(&block_config, &summary); unregister_data(); starpu_shutdown(); if (ret) data_interface_test_summary_print(stderr, &summary); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/block/block_opencl.c000066400000000000000000000063471507764646700253310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/block/block_opencl_kernel.cl" extern struct test_config block_config; static struct starpu_opencl_program opencl_program; void test_block_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, ret; int factor = *(int *) args; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); int nx = STARPU_BLOCK_GET_NX(buffers[0]); int ny = STARPU_BLOCK_GET_NY(buffers[0]); int nz = STARPU_BLOCK_GET_NZ(buffers[0]); unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]); unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]); cl_mem block = (cl_mem) STARPU_BLOCK_GET_DEV_HANDLE(buffers[0]); cl_context context; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &block_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "block_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); int nargs; nargs = starpu_opencl_set_kernel_args(&err, &kernel, sizeof(block), &block, sizeof(nx), &nx, sizeof(ny), &ny, sizeof(nz), &nz, sizeof(ldy), &ldy, sizeof(ldz), &ldz, sizeof(factor), &factor, sizeof(fail), &fail, 0); if (nargs != 8) { fprintf(stderr, "Failed to set argument #%d\n", nargs); STARPU_OPENCL_REPORT_ERROR(err); } { size_t global[3] = {nx, ny, nz}; err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &block_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/block/block_opencl_kernel.cl000066400000000000000000000021741507764646700270370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void block_opencl(__global int *block, int nx, int ny, int nz, int ldy, int ldz, int factor, __global int *err) { const int idx = get_global_id(0); const int idy = get_global_id(1); const int idz = get_global_id(2); if (idx >= nx) return; if (idy >= ny) return; if (idz >= nz) return; int val = idz*ny*nx+idy*nx+idx; int i = (idz*ldz)+(idy*ldy)+idx; if (block[i] != factor * val) *err = 1; else block[i] *= -1; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/coo/000077500000000000000000000000001507764646700222075ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/coo/coo_cuda.cu000066400000000000000000000041451507764646700243200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" extern struct test_config coo_config; __global__ void coo_cuda(int *val, uint32_t n, int *err, int factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i >= n) return; if (val[i] != i * factor) *err = 1; else val[i] *= -1; } extern "C" void test_coo_cuda_func(void *buffers[], void *args) { int factor; int *ret; int *val; cudaError_t error; uint32_t nvalues = STARPU_COO_GET_NVALUES(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (nvalues + threads_per_block-1) / threads_per_block; factor = *(int *) args; val = (int *) STARPU_COO_GET_VALUES(buffers[0]); error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, &coo_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); coo_cuda<<>> (val, nvalues, ret, factor); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&coo_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/coo/coo_interface.c000066400000000000000000000072451507764646700251630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define NX 2 #define NY 2 #define MATRIX_SIZE (NX*NY) #if defined(STARPU_USE_CPU) void test_coo_cpu_func(void *buffers[], void *args); #endif #ifdef STARPU_USE_CUDA extern void test_coo_cuda_func(void *buffers[], void *args); #endif #ifdef STARPU_USE_OPENCL extern void test_coo_opencl_func(void *buffers[], void *args); #endif static starpu_data_handle_t coo_handle, coo2_handle; static uint32_t columns[MATRIX_SIZE]; static uint32_t rows[MATRIX_SIZE]; static int values[MATRIX_SIZE]; static uint32_t columns2[MATRIX_SIZE]; static uint32_t rows2[MATRIX_SIZE]; static int values2[MATRIX_SIZE]; struct test_config coo_config = { #ifdef STARPU_USE_CPU .cpu_func = test_coo_cpu_func, #endif /* ! STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA .cuda_func = test_coo_cuda_func, #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL .opencl_func = test_coo_opencl_func, #endif /* !STARPU_USE_OPENCL */ .handle = &coo_handle, .ptr = values, .dummy_handle = &coo2_handle, .dummy_ptr = values2, .copy_failed = SUCCESS, .name = "coo_interface" }; void test_coo_cpu_func(void *buffers[], void *args) { int factor = *(int *) args; int *vals = (int *) STARPU_COO_GET_VALUES(buffers[0]); unsigned size = STARPU_COO_GET_NVALUES(buffers[0]); int i; for (i = 0; i < (int)size; i++) { if (vals[i] != i * factor) { coo_config.copy_failed = FAILURE; return; } vals[i] *= -1; } } static void register_data(void) { /* We use the following matrix : +---+---+ | 0 | 1 | +---+---+ | 2 | 3 | +---+---+ Of course, we're not supposed to register the zeros, but it does not matter for this test. */ columns[0] = 0; rows[0] = 0; values[0] = 0; columns[1] = 1; rows[1] = 0; values[1] = 1; columns[2] = 0; rows[2] = 1; values[2] = 2; columns[3] = 1; rows[3] = 1; values[3] = 3; int i; for (i = 0; i < MATRIX_SIZE; i++) { columns2[i] = -1; rows2[i] = -1; values2[i] = -1; } starpu_coo_data_register(&coo_handle, STARPU_MAIN_RAM, NX, NY, MATRIX_SIZE, columns, rows, (uintptr_t) values, sizeof(values[0])); starpu_coo_data_register(&coo2_handle, STARPU_MAIN_RAM, NX, NY, MATRIX_SIZE, columns2, rows2, (uintptr_t) values2, sizeof(values2[0])); } static void unregister_data(void) { starpu_data_unregister(coo_handle); starpu_data_unregister(coo2_handle); } int main(int argc, char **argv) { struct starpu_conf conf; struct data_interface_test_summary summary; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); ret = run_tests(&coo_config, &summary); unregister_data(); if (ret) data_interface_test_summary_print(stderr, &summary); starpu_shutdown(); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/coo/coo_opencl.c000066400000000000000000000066661507764646700245110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/coo/coo_opencl_kernel.cl" extern struct test_config coo_config; static struct starpu_opencl_program coo_program; void test_coo_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, factor, ret; unsigned int n; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; cl_context context; cl_mem val, fail; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &coo_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); factor = *(int *)args; n = STARPU_COO_GET_NVALUES(buffers[0]); val = (cl_mem) STARPU_COO_GET_VALUES(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); err = starpu_opencl_load_kernel(&kernel, &queue, &coo_program, "test_coo_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &coo_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); /* Setting args */ int nargs; nargs = starpu_opencl_set_kernel_args(&err, &kernel, sizeof(val), &val, sizeof(n), &n, sizeof(fail), &fail, sizeof(factor), &factor, 0); if (nargs != 4) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &coo_config.copy_failed, 0, NULL, NULL); if (coo_config.copy_failed != SUCCESS) { fprintf(stderr, "FUCK copy_failed %d\n", coo_config.copy_failed); } if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&coo_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/coo/coo_opencl_kernel.cl000066400000000000000000000016761507764646700262210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void test_coo_opencl(__global int *val, unsigned int nx, __global int *err, int factor) { const int i = get_global_id(0); if (i >= nx) return; if (val[i] != i * factor) *err = val[1]; else val[i] = - val[i]; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/copy_interfaces.c000066400000000000000000000073041507764646700247540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../../helper.h" #include static int check_copy(starpu_data_handle_t handle, char *header) { void *old_interface, *new_interface; starpu_data_handle_t new_handle; int ret=0; starpu_data_register_same(&new_handle, handle); if (!getenv("STARPU_SSILENT")) { if (new_handle->ops->display) { fprintf(stderr, "%s: ", header); new_handle->ops->display(new_handle, stderr); fprintf(stderr, "\n"); } else { fprintf(stderr, "%s does not define a display ops\n", header); } } old_interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); new_interface = starpu_data_get_interface_on_node(new_handle, STARPU_MAIN_RAM); if (new_handle->ops->compare(old_interface, new_interface) == 0) { FPRINTF(stderr, "Error when copying %s data\n", header); ret = 1; } starpu_data_unregister(handle); starpu_data_unregister(new_handle); return ret; } int main(int argc, char **argv) { int ret; starpu_data_handle_t handle; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (ret == 0) { int NX=3; int NY=2; int matrix[NX][NY]; starpu_matrix_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0][0])); ret = check_copy(handle, "matrix"); } if (ret == 0) { int NX=3; int NY=2; int NZ=4; int block[NX*NY*NZ]; starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(block[0])); ret = check_copy(handle, "block"); } if (ret == 0) { int xx[] = {12, 23, 45}; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)xx, 3, sizeof(xx[0])); ret = check_copy(handle, "vector"); } if (ret == 0) { uint32_t nnz = 2; unsigned nrow = 5; float nzvalA[nnz]; uint32_t colind[nnz]; uint32_t rowptr[nrow+1]; starpu_csr_data_register(&handle, STARPU_MAIN_RAM, nnz, nrow, (uintptr_t)nzvalA, colind, rowptr, 0, sizeof(float)); ret = check_copy(handle, "csr"); } if (ret == 0) { uint32_t nnz = 2; unsigned nrow = 5; float nzvalA[nnz]; uint32_t colind[nnz]; uint32_t rowptr[nrow+1]; starpu_bcsr_data_register(&handle, STARPU_MAIN_RAM, nnz, nrow, (uintptr_t)nzvalA, colind, rowptr, 0, 1, 1, sizeof(float)); ret = check_copy(handle, "bcsr"); } if (ret == 0) { int x=42; starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); ret = check_copy(handle, "variable"); } if (ret == 0) { int NX=3; int NY=2; int NZ=4; int NT=3; int tensor[NX*NY*NZ*NT]; starpu_tensor_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)tensor, NX, NX*NY, NX*NY*NZ, NX, NY, NZ, NT, sizeof(tensor[0])); ret = check_copy(handle, "tensor"); } if (ret == 0) { int NX=3; int NY=2; int array2d[NX*NY]; unsigned nn[2] = {NX, NY}; unsigned ldn[2] = {1, NX}; starpu_ndim_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)array2d, ldn, nn, 2, sizeof(int)); ret = check_copy(handle, "ndim"); } starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/csr/000077500000000000000000000000001507764646700222165ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/csr/csr_cuda.cu000066400000000000000000000041441507764646700243350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" extern struct test_config csr_config; __global__ void csr_cuda(int *nzval, uint32_t nnz, int *err, int factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i >= nnz) return; if (nzval[i] != (i+1)*factor) *err = 1; else nzval[i] = -nzval[i]; } extern "C" void test_csr_cuda_func(void *buffers[], void *args) { int factor; int *ret; int *val; cudaError_t error; uint32_t nnz = STARPU_CSR_GET_NNZ(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (nnz + threads_per_block-1) / threads_per_block; factor = *(int *) args; val = (int *) STARPU_CSR_GET_NZVAL(buffers[0]); error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, &csr_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); csr_cuda<<>> (val, nnz, ret, factor); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&csr_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/csr/csr_interface.c000066400000000000000000000070051507764646700251730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #include "../../../helper.h" #define WIDTH 8 #define HEIGHT 4 #define SIZE (WIDTH * HEIGHT) #define NNZ (SIZE-1) #ifdef STARPU_USE_CPU void test_csr_cpu_func(void *buffers[], void *args); #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA extern void test_csr_cuda_func(void *buffers[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void test_csr_opencl_func(void *buffers[], void *args); #endif static int nzval[NNZ]; static int nzval2[NNZ]; static uint32_t colind[NNZ]; static uint32_t colind2[NNZ]; static uint32_t rowptr[HEIGHT+1]; static uint32_t rowptr2[HEIGHT+1]; static starpu_data_handle_t csr_handle; static starpu_data_handle_t csr2_handle; struct test_config csr_config = { #ifdef STARPU_USE_CPU .cpu_func = test_csr_cpu_func, #endif /* ! STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA .cuda_func = test_csr_cuda_func, #endif #ifdef STARPU_USE_OPENCL .opencl_func = test_csr_opencl_func, #endif .handle = &csr_handle, .ptr = nzval, .dummy_handle = &csr2_handle, .dummy_ptr = nzval2, .copy_failed = SUCCESS, .name = "csr_interface" }; static void register_data(void) { int i; for (i = 1; i < SIZE; i++) { nzval[i-1] = i; nzval2[i-1] = 42; colind[i-1] = i % WIDTH; colind2[i-1] = colind[i-1]; } rowptr[0] = 1; rowptr2[0] = 1; for (i = 1; i < HEIGHT; i++) { rowptr[i] = i * WIDTH; rowptr2[i] = rowptr[i]; } rowptr[HEIGHT] = NNZ + 1; rowptr2[HEIGHT] = rowptr[HEIGHT]; starpu_csr_data_register(&csr_handle, STARPU_MAIN_RAM, NNZ, HEIGHT, (uintptr_t) nzval, colind, rowptr, 0, sizeof(nzval[0])); starpu_csr_data_register(&csr2_handle, STARPU_MAIN_RAM, NNZ, HEIGHT, (uintptr_t) nzval2, colind2, rowptr2, 0, sizeof(nzval2[0])); } static void unregister_data(void) { starpu_data_unregister(csr_handle); starpu_data_unregister(csr2_handle); } void test_csr_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int *val; int factor; int i; uint32_t nnz = STARPU_CSR_GET_NNZ(buffers[0]); val = (int *) STARPU_CSR_GET_NZVAL(buffers[0]); factor = *(int *) args; for (i = 0; i < (int)nnz; i++) { if (val[i] != (i+1) * factor) { csr_config.copy_failed = FAILURE; return; } val[i] *= -1; } } int main(int argc, char **argv) { struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); ret = run_tests(&csr_config, &summary); unregister_data(); starpu_shutdown(); if (ret) data_interface_test_summary_print(stderr, &summary); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/csr/csr_opencl.c000066400000000000000000000065541507764646700245230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/csr/csr_opencl_kernel.cl" extern struct test_config csr_config; static struct starpu_opencl_program opencl_program; void test_csr_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, ret; int factor = *(int *) args; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); uint32_t nnz = STARPU_CSR_GET_NNZ(buffers[0]); cl_mem nzval = (cl_mem)STARPU_CSR_GET_NZVAL(buffers[0]); cl_context context; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &csr_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "test_csr_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); int nargs; nargs = starpu_opencl_set_kernel_args(&err, &kernel, sizeof(nzval), &nzval, sizeof(nnz), &nnz, sizeof(fail), &fail, sizeof(factor), &factor, 0); if (nargs != 4) { fprintf(stderr, "Failed to set argument #%d\n", err); STARPU_OPENCL_REPORT_ERROR(err); } { size_t global = nnz; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &csr_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/csr/csr_opencl_kernel.cl000066400000000000000000000016751507764646700262360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void test_csr_opencl(__global int *val, unsigned int nx, __global int *err, int factor) { const int i = get_global_id(0); if (i >= nx) return; if (val[i] != (i+1) * factor) *err = 1; else val[i] = - val[i]; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/matrix/000077500000000000000000000000001507764646700227335ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/matrix/matrix_cuda.cu000066400000000000000000000042511507764646700255660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" extern struct test_config matrix_config; __global__ void matrix_cuda(int *val, unsigned n, int *err, int factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i >= n) return; if (val[i] != i*factor) *err = 1; else val[i] = -val[i]; } extern "C" void test_matrix_cuda_func(void *buffers[], void *args) { int factor; int *ret; int *val; cudaError_t error; unsigned int nx, ny, n; nx = STARPU_MATRIX_GET_NX(buffers[0]); ny = STARPU_MATRIX_GET_NY(buffers[0]); n = nx * ny; unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; factor = *(int *) args; val = (int *) STARPU_MATRIX_GET_PTR(buffers[0]); error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, &matrix_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); matrix_cuda<<>>(val, n, ret, factor); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&matrix_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/matrix/matrix_interface.c000066400000000000000000000063531507764646700264320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #include "../../../helper.h" #define WIDTH 16 #define HEIGHT 16 #ifdef STARPU_USE_CPU void test_matrix_cpu_func(void *buffers[], void *args); #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA extern void test_matrix_cuda_func(void *buffers[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void test_matrix_opencl_func(void *buffers[], void *args); #endif static starpu_data_handle_t matrix_handle; static starpu_data_handle_t matrix2_handle; static int matrix[WIDTH * HEIGHT]; static int matrix2[WIDTH * HEIGHT]; struct test_config matrix_config = { #ifdef STARPU_USE_CPU .cpu_func = test_matrix_cpu_func, #endif /* ! STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA .cuda_func = test_matrix_cuda_func, #endif #ifdef STARPU_USE_OPENCL .opencl_func = test_matrix_opencl_func, #endif .handle = &matrix_handle, .ptr = matrix, .dummy_handle = &matrix2_handle, .dummy_ptr = matrix2, .copy_failed = SUCCESS, .name = "matrix_interface" }; static void register_data(void) { int i; int size = WIDTH * HEIGHT; for (i = 0; i < size; i++) matrix[i] = i; starpu_matrix_data_register(&matrix_handle, STARPU_MAIN_RAM, (uintptr_t) matrix, WIDTH, /* ld */ WIDTH, HEIGHT, sizeof(matrix[0])); starpu_matrix_data_register(&matrix2_handle, STARPU_MAIN_RAM, (uintptr_t) matrix2, WIDTH, /* ld */ WIDTH, HEIGHT, sizeof(matrix[0])); } static void unregister_data(void) { starpu_data_unregister(matrix_handle); starpu_data_unregister(matrix2_handle); } void test_matrix_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int *val; int factor; int i; int nx, ny; nx = STARPU_MATRIX_GET_NX(buffers[0]); ny = STARPU_MATRIX_GET_NY(buffers[0]); val = (int *) STARPU_MATRIX_GET_PTR(buffers[0]); factor = *(int *) args; for (i = 0; i < nx*ny; i++) { if (val[i] != i * factor) { matrix_config.copy_failed = FAILURE; return; } val[i] *= -1; } } int main(int argc, char **argv) { struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); ret = run_tests(&matrix_config, &summary); unregister_data(); starpu_shutdown(); if (ret) data_interface_test_summary_print(stderr, &summary); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/matrix/matrix_opencl.c000066400000000000000000000066061507764646700257530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl" extern struct test_config matrix_config; static struct starpu_opencl_program matrix_program; void test_matrix_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, factor, ret; unsigned int n; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; cl_context context; cl_mem val, fail; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &matrix_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); factor = *(int *)args; n = STARPU_MATRIX_GET_NX(buffers[0]); n*= STARPU_MATRIX_GET_NY(buffers[0]); val = (cl_mem)STARPU_MATRIX_GET_DEV_HANDLE(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); err = starpu_opencl_load_kernel(&kernel, &queue, &matrix_program, "matrix_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &matrix_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); /* Setting args */ int nargs; nargs = starpu_opencl_set_kernel_args(&err, &kernel, sizeof(val), &val, sizeof(n), &n, sizeof(fail), &fail, sizeof(factor), &factor, 0); if (nargs != 4) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &matrix_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&matrix_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl000066400000000000000000000016401507764646700274600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void matrix_opencl(__global int *val, unsigned int nx, __global int *err, int factor) { const int i = get_global_id(0); if (i >= nx) return; if (val[i] != i * factor) *err = i; else val[i] *= -1; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/000077500000000000000000000000001507764646700237725ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/advanced/000077500000000000000000000000001507764646700255375ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/advanced/generic.c000066400000000000000000000102211507764646700273130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "generic.h" #include "../../../../helper.h" struct stats global_stats; #ifdef STARPU_USE_CPU void cpu_func(void *buffers[], void *args) { (void)buffers; (void)args; STARPU_SKIP_IF_VALGRIND; global_stats.cpu++; } #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA void cuda_func(void *buffers[], void *args) { (void)buffers; (void)args; STARPU_SKIP_IF_VALGRIND; global_stats.cuda++; } void cpu_to_cuda_func(void *buffers[], void *args) { (void)buffers; (void)args; STARPU_SKIP_IF_VALGRIND; global_stats.cpu_to_cuda++; } void cuda_to_cpu_func(void *buffers[], void *args) { (void)buffers; (void)args; STARPU_SKIP_IF_VALGRIND; global_stats.cuda_to_cpu++; } struct starpu_codelet cpu_to_cuda_cl = { .cuda_funcs = {cpu_to_cuda_func}, .nbuffers = 1, .modes = { STARPU_RW }, }; struct starpu_codelet cuda_to_cpu_cl = { .cpu_funcs = {cuda_to_cpu_func}, .nbuffers = 1, .modes = { STARPU_RW }, }; #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL void opencl_func(void *buffers[], void *args) { (void)buffers; (void)args; STARPU_SKIP_IF_VALGRIND; global_stats.opencl++; } static void cpu_to_opencl_func(void *buffers[], void *args) { (void)buffers; (void)args; STARPU_SKIP_IF_VALGRIND; global_stats.cpu_to_opencl++; } static void opencl_to_cpu_func(void *buffers[], void *args) { (void)buffers; (void)args; STARPU_SKIP_IF_VALGRIND; global_stats.opencl_to_cpu++; } struct starpu_codelet cpu_to_opencl_cl = { .opencl_funcs = {cpu_to_opencl_func}, .nbuffers = 1, .modes = { STARPU_RW }, }; struct starpu_codelet opencl_to_cpu_cl = { .cpu_funcs = {opencl_to_cpu_func}, .nbuffers = 1, .modes = { STARPU_RW }, }; #endif /* !STARPU_USE_OPENCL */ struct starpu_multiformat_data_interface_ops ops = { #ifdef STARPU_USE_CUDA .cuda_elemsize = sizeof(int), .cpu_to_cuda_cl = &cpu_to_cuda_cl, .cuda_to_cpu_cl = &cuda_to_cpu_cl, #endif #ifdef STARPU_USE_OPENCL .opencl_elemsize = sizeof(int), .cpu_to_opencl_cl = &cpu_to_opencl_cl, .opencl_to_cpu_cl = &opencl_to_cpu_cl, #endif .cpu_elemsize = sizeof(int) }; void print_stats(struct stats *s) { #ifdef STARPU_USE_CPU FPRINTF(stderr, "cpu : %u\n", s->cpu); #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA FPRINTF(stderr, "cuda : %u\n" "cpu->cuda : %u\n" "cuda->cpu : %u\n", s->cuda, s->cpu_to_cuda, s->cuda_to_cpu); #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL FPRINTF(stderr, "opencl : %u\n" "cpu->opencl : %u\n" "opencl->cpu : %u\n", s->opencl, s->cpu_to_opencl, s->opencl_to_cpu); #endif /* !STARPU_USE_OPENCL */ } void reset_stats(struct stats *s) { #ifdef STARPU_USE_CPU s->cpu = 0; #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA s->cuda = 0; s->cpu_to_cuda = 0; s->cuda_to_cpu = 0; #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL s->opencl = 0; s->cpu_to_opencl = 0; s->opencl_to_cpu = 0; #endif /* !STARPU_USE_OPENCL */ } int compare_stats(struct stats *s1, struct stats *s2) { if ( #ifdef STARPU_USE_CPU s1->cpu == s2->cpu && #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA s1->cuda == s2->cuda && s1->cpu_to_cuda == s2->cpu_to_cuda && s1->cuda_to_cpu == s2->cuda_to_cpu && #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL s1->opencl == s2->opencl && s1->cpu_to_opencl == s2->cpu_to_opencl && s1->opencl_to_cpu == s2->opencl_to_cpu && #endif /* !STARPU_USE_OPENCL */ 1 /* Just so the build does not fail if we disable EVERYTHING */ ) return 0; else return 1; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/advanced/generic.h000066400000000000000000000031001507764646700273160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef MULTIFORMAT_GENERIC_H #define MULTIFORMAT_GENERIC_H #define NX 16 #ifdef STARPU_USE_CPU void cpu_func(void *buffers[], void *args); #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA void cuda_func(void *buffers[], void *args); #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL void opencl_func(void *buffers[], void *args); #endif /* !STARPU_USE_OPENCL */ extern struct starpu_multiformat_data_interface_ops ops; /* Counting the calls to the codelets */ struct stats { #ifdef STARPU_USE_CPU unsigned int cpu; #endif #ifdef STARPU_USE_CUDA unsigned int cuda; unsigned int cpu_to_cuda; unsigned int cuda_to_cpu; #endif #ifdef STARPU_USE_OPENCL unsigned int opencl; unsigned int cpu_to_opencl; unsigned int opencl_to_cpu; #endif }; void print_stats(struct stats *); void reset_stats(struct stats *); int compare_stats(struct stats *, struct stats *); #endif /* !MULTIFORMAT_GENERIC_H */ starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c000066400000000000000000000073641507764646700326140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "generic.h" #include "../../../../helper.h" #if defined(STARPU_USE_CUDA) && defined(STARPU_USE_OPENCL) extern struct stats global_stats; static int vector[NX]; static starpu_data_handle_t handle; /* * Initially, our vector should be in RAM. It is then used on a CUDA device, * then on an OpenCL device, and finally, on a CUDA device again. * The following operations should be performed, in this specific order : * - CPU -> CUDA conversion * - CUDA kernel execution * - OpenCL kernel execution * - CUDA kernel execution * - CUDA -> CPU conversion * * Note that we will not run any conversion between CUDA and OpenCL, because * StarPU assumes that the data structures used on CUDA and OpenCL devices are * the same. */ static int test(void) { int ret; struct starpu_task *task_cuda, *task_cuda2, *task_opencl; static struct starpu_codelet cl_cuda = { .cuda_funcs = {cuda_func}, .nbuffers = 1, .modes = {STARPU_RW} }; task_cuda = starpu_task_create(); task_cuda->cl = &cl_cuda; task_cuda->handles[0] = handle; ret = starpu_task_submit(task_cuda); if (ret != 0) { task_cuda->destroy = 0; starpu_task_destroy(task_cuda); return 1; } static struct starpu_codelet cl_opencl = { .opencl_funcs = {opencl_func}, .nbuffers = 1, .modes = {STARPU_RW} }; task_opencl = starpu_task_create(); task_opencl->cl = &cl_opencl; task_opencl->handles[0] = handle; ret = starpu_task_submit(task_opencl); if (ret != 0) { task_opencl->destroy = 0; starpu_task_destroy(task_opencl); return 1; } task_cuda2 = starpu_task_create(); task_cuda2->cl = &cl_cuda; task_cuda2->handles[0] = handle; ret = starpu_task_submit(task_cuda2); if (ret != 0) { task_cuda2->destroy = 0; starpu_task_destroy(task_cuda2); return 1; } return 0; } static void register_handle(void) { int i; for (i = 0; i < NX; i++) vector[i] = i; starpu_multiformat_data_register(&handle, STARPU_MAIN_RAM, vector, NX, &ops); } static void unregister_handle(void) { starpu_data_unregister(handle); } #endif /* !(STARPU_USE_CUDA && STARPU_USE_OPENCL) */ int main(void) { #if defined(STARPU_USE_CUDA) && defined(STARPU_USE_OPENCL) int ret; struct starpu_conf conf; starpu_conf_init(&conf); #if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) conf.ncuda = 0; #else conf.ncuda = 1; #endif conf.nopencl = 1; ret = starpu_init(&conf); if (ret == -ENODEV) goto enodev; reset_stats(&global_stats); register_handle(); ret = test(); unregister_handle(); starpu_shutdown(); if (ret != 0) return STARPU_TEST_SKIPPED; struct stats expected_stats = { #ifdef STARPU_USE_CPU .cpu = 0, #endif #ifdef STARPU_USE_CUDA .cuda = 2, .cpu_to_cuda = 1, .cuda_to_cpu = 1, #endif #ifdef STARPU_USE_OPENCL .opencl = 1, .cpu_to_opencl = 0, .opencl_to_cpu = 0 #endif }; ret = compare_stats(&global_stats, &expected_stats); if (ret != 0) { print_stats(&global_stats); print_stats(&expected_stats); return EXIT_FAILURE; } return EXIT_SUCCESS; enodev: return STARPU_TEST_SKIPPED; #else return STARPU_TEST_SKIPPED; #endif } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/advanced/multiformat_data_release.c000066400000000000000000000072271507764646700327470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "generic.h" #include "../../../../helper.h" static int vector[NX]; static starpu_data_handle_t handle; #define ENTER() do { FPRINTF(stderr, "Entering %s\n", __starpu_func__); } while (0) extern struct stats global_stats; static void register_handle(void) { int i; for (i = 0; i < NX; i++) vector[i] = i; starpu_multiformat_data_register(&handle, STARPU_MAIN_RAM, vector, NX, &ops); } static void unregister_handle(void) { starpu_data_unregister(handle); } #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) static void create_and_submit(int where) { static struct starpu_codelet cl = { .modes = { STARPU_RW }, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_func}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_func}, #endif .nbuffers = 1 }; cl.where = where; struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; /* We need to be sure the data has been copied to the GPU at the end * of this function */ task->synchronous = 1; if (starpu_task_submit(task) == -ENODEV) exit(STARPU_TEST_SKIPPED); } #endif static int test(void) { struct stats expected_stats; memset(&expected_stats, 0, sizeof(expected_stats)); #ifdef STARPU_USE_CUDA create_and_submit(STARPU_CUDA); starpu_data_acquire(handle, STARPU_RW); expected_stats.cuda = 1; expected_stats.cpu_to_cuda = 1; expected_stats.cuda_to_cpu = 1; starpu_data_release(handle); if (compare_stats(&global_stats, &expected_stats) != 0) { FPRINTF(stderr, "CUDA failed\n"); print_stats(&global_stats); FPRINTF(stderr ,"\n"); print_stats(&expected_stats); return -ENODEV; } #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL create_and_submit(STARPU_OPENCL); starpu_data_acquire(handle, STARPU_RW); expected_stats.opencl = 1; expected_stats.cpu_to_opencl = 1; expected_stats.opencl_to_cpu = 1; starpu_data_release(handle); if (compare_stats(&global_stats, &expected_stats) != 0) { FPRINTF(stderr, "OPENCL failed\n"); print_stats(&global_stats); FPRINTF(stderr ,"\n"); print_stats(&expected_stats); return -ENODEV; } #endif /* !STARPU_USE_OPENCL */ return 0; } int main(int argc, char **argv) { #ifdef STARPU_USE_CPU int ret; struct starpu_conf conf; starpu_conf_init(&conf); #if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) conf.ncuda = 0; #else conf.ncuda = 1; #endif conf.nopencl = 1; memset(&global_stats, 0, sizeof(global_stats)); ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV || starpu_cpu_worker_get_count() == 0) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_handle(); int err = test(); unregister_handle(); starpu_shutdown(); switch (err) { case -ENODEV: return STARPU_TEST_SKIPPED; case 0: return EXIT_SUCCESS; default: return EXIT_FAILURE; } #else /* ! STARPU_USE_CPU */ /* Without the CPU, there is no point in using the multiformat * interface, so this test is pointless. */ return STARPU_TEST_SKIPPED; #endif } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c000066400000000000000000000140171507764646700340310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "generic.h" #include "../../../../helper.h" #define DEBUG 0 #if DEBUG #define SYNCHRONOUS 1 /* Easier to debug with synchronous tasks */ #define ENTER() do { FPRINTF(stderr, "Entering %s\n", __starpu_func__); } while (0) #else #define SYNCHRONOUS 0 #define ENTER() #endif extern struct stats global_stats; #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) static void create_and_submit_tasks(int where, starpu_data_handle_t handles[]) { int ret; FPRINTF(stderr, "***** Starting Task 1\n"); struct starpu_codelet cl = { .modes = { STARPU_RW }, .nbuffers = 1, .where = where }; #ifdef STARPU_USE_CUDA if (where & STARPU_CUDA) cl.cuda_funcs[0] = cuda_func; #endif #ifdef STARPU_USE_OPENCL if (where & STARPU_OPENCL) cl.opencl_funcs[0] = opencl_func; #endif struct starpu_task *task = starpu_task_create(); task->synchronous = SYNCHRONOUS; task->cl = &cl; task->handles[0] = handles[0]; ret = starpu_task_submit(task); assert(ret == 0); #ifdef STARPU_USE_CPU FPRINTF(stderr, "***** Starting Task 2\n"); struct starpu_codelet cl2 = { .modes = { STARPU_RW }, .cpu_funcs = {cpu_func}, .nbuffers = 1, .where = STARPU_CPU, }; struct starpu_task *task2 = starpu_task_create(); task2->synchronous = SYNCHRONOUS; task2->cl = &cl2; task2->handles[0] = handles[1]; ret = starpu_task_submit(task2); assert(ret == 0); #endif /* !STARPU_USE_CPU */ FPRINTF(stderr, "***** Starting Task 3\n"); struct starpu_codelet cl3 = { .modes = { STARPU_RW, STARPU_RW }, .nbuffers = 2, .where = where }; #ifdef STARPU_USE_CUDA if (where & STARPU_CUDA) cl3.cuda_funcs[0] = cuda_func; #endif #ifdef STARPU_USE_OPENCL if (where & STARPU_OPENCL) cl3.opencl_funcs[0] = opencl_func; #endif struct starpu_task *task3 = starpu_task_create(); task3->synchronous = SYNCHRONOUS; task3->cl = &cl3; task3->handles[0] = handles[0]; task3->handles[1] = handles[1]; ret = starpu_task_submit(task3); assert(ret == 0); ret = starpu_task_wait_for_all(); assert(ret == 0); FPRINTF(stderr, "***** End of all tasks\n"); return; } #endif /* XXX Just a little bit of copy/pasta here... */ #ifdef STARPU_USE_CUDA static int test_cuda(void) { int i; int vector1[NX]; int vector2[NX]; starpu_data_handle_t handles[2]; for (i = 0; i < NX; i++) { vector1[i] = i; vector2[i] = i; } starpu_multiformat_data_register(&handles[0], STARPU_MAIN_RAM, vector1, NX, &ops); starpu_multiformat_data_register(&handles[1], STARPU_MAIN_RAM, vector2, NX, &ops); memset(&global_stats, 0, sizeof(global_stats)); create_and_submit_tasks(STARPU_CUDA, handles); starpu_data_unregister(handles[0]); starpu_data_unregister(handles[1]); #if DEBUG print_stats(&global_stats); #endif struct stats expected_stats; #ifdef STARPU_USE_CPU expected_stats.cpu = 1; #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_OPENCL expected_stats.opencl = 0; expected_stats.cpu_to_opencl = 0; expected_stats.opencl_to_cpu = 0; #endif /* !STARPU_USE_OPENCL */ expected_stats.cuda = 2; expected_stats.cpu_to_cuda = 2; expected_stats.cuda_to_cpu = 2; return compare_stats(&expected_stats, &global_stats); } #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL static int test_opencl(void) { int i; int vector1[NX]; int vector2[NX]; starpu_data_handle_t handles[2]; for (i = 0; i < NX; i++) { vector1[i] = i; vector2[i] = i; } starpu_multiformat_data_register(&handles[0], STARPU_MAIN_RAM, vector1, NX, &ops); starpu_multiformat_data_register(&handles[1], STARPU_MAIN_RAM, vector2, NX, &ops); memset(&global_stats, 0, sizeof(global_stats)); create_and_submit_tasks(STARPU_OPENCL, handles); starpu_data_unregister(handles[0]); starpu_data_unregister(handles[1]); #if DEBUG print_stats(&global_stats); #endif struct stats expected_stats; #ifdef STARPU_USE_CPU expected_stats.cpu = 1; #endif /* !STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA expected_stats.cuda = 0; expected_stats.cpu_to_cuda = 0; expected_stats.cuda_to_cpu = 0; #endif /* !STARPU_USE_CUDA */ expected_stats.opencl = 2; expected_stats.cpu_to_opencl = 2; expected_stats.opencl_to_cpu = 2; return compare_stats(&expected_stats, &global_stats); } #endif /* !STARPU_USE_OPENCL */ int main(int argc, char **argv) { #ifdef STARPU_USE_CPU int ret; struct starpu_conf conf; starpu_conf_init(&conf); #if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) conf.ncuda = 0; #else conf.ncuda = 2; #endif conf.nopencl = 1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned int ncpu = starpu_cpu_worker_get_count(); if (ncpu == 0) { FPRINTF(stderr, "No CPUS, cannot run this test.\n"); return STARPU_TEST_SKIPPED; } unsigned int ncuda = starpu_cuda_worker_get_count(); unsigned int nopencl = starpu_opencl_worker_get_count(); #ifdef STARPU_USE_OPENCL if (nopencl > 0 && test_opencl() != 0) { FPRINTF(stderr, "OPENCL FAILED\n"); return EXIT_FAILURE; } #endif #ifdef STARPU_USE_CUDA if (ncuda > 0 && test_cuda() != 0) { FPRINTF(stderr, "CUDA FAILED \n"); return EXIT_FAILURE; } #endif starpu_shutdown(); if (ncuda == 0 && nopencl == 0) return STARPU_TEST_SKIPPED; else return EXIT_SUCCESS; #else /* !STARPU_USE_CPU */ /* Without the CPU, there is no point in using the multiformat * interface, so this test is pointless. */ return STARPU_TEST_SKIPPED; #endif } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/advanced/multiformat_worker.c000066400000000000000000000065041507764646700316440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "generic.h" #include "../../../../helper.h" #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) extern struct stats global_stats; static int vector[NX]; static starpu_data_handle_t handle; #endif #ifdef STARPU_USE_CUDA static int ncuda; static int cuda_worker; #endif #ifdef STARPU_USE_OPENCL static int nopencl; static int opencl_worker; #endif #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) static struct starpu_codelet cl = { .modes = { STARPU_RW }, #ifdef STARPU_USE_CUDA .cuda_funcs = { cuda_func }, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = { opencl_func }, #endif .nbuffers = 1, }; static void register_handle(void) { int i; for (i = 0; i < NX; i++) vector[i] = i; starpu_multiformat_data_register(&handle, STARPU_MAIN_RAM, vector, NX, &ops); } static void unregister_handle(void) { starpu_data_unregister(handle); } static int create_and_submit_tasks(void) { struct starpu_task *task; task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; task->execute_on_a_specific_worker = 1; #ifdef STARPU_USE_CUDA if (ncuda > 0) { task->workerid = cuda_worker; } else #endif #ifdef STARPU_USE_OPENCL if (nopencl > 0) { task->workerid = opencl_worker; } else #endif { task->destroy = 0; starpu_task_destroy(task); return -ENODEV; } return starpu_task_submit(task); } #endif int main(int argc STARPU_ATTRIBUTE_UNUSED, char **argv STARPU_ATTRIBUTE_UNUSED) { #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) int err; err = starpu_initialize(NULL, &argc, &argv); if (err == -ENODEV) goto enodev; #if defined(STARPU_USE_CUDA) && !(defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1)) ncuda = starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, &cuda_worker, 1); if (ncuda < 0) ncuda = 1; #endif #ifdef STARPU_USE_OPENCL nopencl = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, &opencl_worker, 1); if (nopencl < 0) nopencl = 1; #endif reset_stats(&global_stats); register_handle(); err = create_and_submit_tasks(); unregister_handle(); starpu_shutdown(); if (err == -ENODEV) goto enodev; #if defined(STARPU_USE_CUDA) if (global_stats.cuda == 1) { if (global_stats.cpu_to_cuda == 1 && global_stats.cuda_to_cpu == 1) return EXIT_SUCCESS; else return EXIT_FAILURE; } #endif /* !STARPU_USE_CUDA */ #if defined(STARPU_USE_OPENCL) if (global_stats.opencl == 1) { if (global_stats.cpu_to_opencl == 1 && global_stats.opencl_to_cpu == 1) return EXIT_SUCCESS; else return EXIT_FAILURE; } #endif /* !STARPU_USE_OPENCL */ /* We should not get here */ return EXIT_FAILURE; enodev: #endif return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/advanced/same_handle.c000066400000000000000000000056311507764646700301500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "generic.h" #include "../../../../helper.h" /* * A single handle can be given twice to a given kernel. In this case, it * should only be converted once. */ #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) extern struct stats global_stats; static int vector[NX]; static starpu_data_handle_t handle; static struct starpu_codelet cl = { .modes = { STARPU_RW, STARPU_RW }, #ifdef STARPU_USE_CUDA .cuda_funcs = { cuda_func }, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = { opencl_func }, #endif .nbuffers = 2, }; static void register_handle(void) { int i; for (i = 0; i < NX; i++) vector[i] = i; starpu_multiformat_data_register(&handle, STARPU_MAIN_RAM, vector, NX, &ops); } static void unregister_handle(void) { starpu_data_unregister(handle); } static int create_and_submit_tasks(void) { int ret; struct starpu_task *task; cl.where = 0; #ifdef STARPU_USE_CUDA cl.where |= STARPU_CUDA; #endif #ifdef STARPU_USE_OPENCL cl.where |= STARPU_OPENCL; #endif task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; task->handles[1] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) { task->destroy = 0; starpu_task_destroy(task); return -ENODEV; } return 0; } #endif int main(int argc, char **argv) { #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) int err; struct starpu_conf conf; starpu_conf_init(&conf); #if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) conf.ncuda = 0; #endif err = starpu_initialize(&conf, &argc, &argv); if (err == -ENODEV) goto enodev; reset_stats(&global_stats); register_handle(); err = create_and_submit_tasks(); unregister_handle(); starpu_shutdown(); if (err == -ENODEV) goto enodev; #ifdef STARPU_USE_CUDA if (global_stats.cuda == 1) { if (global_stats.cpu_to_cuda == 1 && global_stats.cuda_to_cpu == 1) return EXIT_SUCCESS; else return EXIT_FAILURE; } else #endif #if defined(STARPU_USE_OPENCL) if (global_stats.opencl == 1) { if (global_stats.cpu_to_opencl == 1 && global_stats.opencl_to_cpu == 1) return EXIT_SUCCESS; else return EXIT_FAILURE; } else #endif { /* We should not get here */ return EXIT_FAILURE; } enodev: #endif return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c000066400000000000000000000043511507764646700326330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "multiformat_types.h" #include "../../../helper.h" #ifdef STARPU_USE_CUDA void cuda_to_cpu(void *buffers[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; FPRINTF(stderr, "Entering %s\n", __starpu_func__); struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); struct point *dst = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); int i; for (i = 0; i < n; i++) { dst[i].x = src->x[i]; dst[i].y = src->y[i]; } } extern void cpu_to_cuda_cuda_func(void *buffers[], void *args); struct starpu_codelet cpu_to_cuda_cl = { .cuda_funcs = {cpu_to_cuda_cuda_func}, .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 1, .modes = { STARPU_RW }, }; struct starpu_codelet cuda_to_cpu_cl = { .cpu_funcs = {cuda_to_cpu}, .nbuffers = 1, .modes = { STARPU_RW }, }; #endif #ifdef STARPU_USE_OPENCL void opencl_to_cpu(void *buffers[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; struct struct_of_arrays *src = STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); struct point *dst = STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); int i; for (i = 0; i < n; i++) { dst[i].x = src->x[i]; dst[i].y = src->y[i]; } } extern void cpu_to_opencl_opencl_func(void *buffers[], void *args); struct starpu_codelet cpu_to_opencl_cl = { .opencl_funcs = {cpu_to_opencl_opencl_func}, .nbuffers = 1, .modes = { STARPU_RW }, }; struct starpu_codelet opencl_to_cpu_cl = { .cpu_funcs = {opencl_to_cpu}, .nbuffers = 1, .modes = { STARPU_RW }, }; #endif starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu000066400000000000000000000032421507764646700340120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "multiformat_types.h" #include "../../../helper.h" static __global__ void cpu_to_cuda_cuda(struct point *src, struct struct_of_arrays *dst, unsigned n) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) { dst->x[i] = src[i].x; dst->y[i] = src[i].y; } } extern "C" void cpu_to_cuda_cuda_func(void *buffers[], void *_args) { FPRINTF(stderr, "Entering %s\n", __starpu_func__); struct point *src; struct struct_of_arrays *dst; src = (struct point *) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); dst = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; cpu_to_cuda_cuda<<>>(src, dst, n); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl000066400000000000000000000017011507764646700343430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "multiformat_types.h" __kernel void cpu_to_opencl_opencl(__global struct point *src, __global struct struct_of_arrays *dst, unsigned int n) { const unsigned int i = get_global_id(0); if (i < n) { dst->x[i] = src[i].x; dst->y[i] = src[i].y; } } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c000066400000000000000000000060131507764646700341700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../../../helper.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl" static struct starpu_opencl_program opencl_conversion_program; void cpu_to_opencl_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; (void) args; int id, devid, ret; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; unsigned n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); cl_mem src = (cl_mem) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); cl_mem dst = (cl_mem) STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_conversion_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_conversion_program, "cpu_to_opencl_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(src), &src); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(dst), &dst); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(n), &n); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_conversion_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/multiformat_cuda.cu000066400000000000000000000046271507764646700276730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "multiformat_types.h" #include "../test_interfaces.h" #include "../../../helper.h" extern struct test_config multiformat_config; static __global__ void multiformat_cuda(struct struct_of_arrays *soa, unsigned n, int *err, int factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i >= n) return; if (soa->x[i] != i * factor || soa->y[i] != i * factor) { *err = 1; } else { soa->x[i] = -soa->x[i]; soa->y[i] = -soa->y[i]; } } extern "C" void test_multiformat_cuda_func(void *buffers[], void *args) { FPRINTF(stderr, "Entering %s\n", __starpu_func__); int factor; int *ret; cudaError_t error; unsigned int n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); struct struct_of_arrays *soa; soa = (struct struct_of_arrays *) STARPU_MULTIFORMAT_GET_CUDA_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; factor = *(int *) args; error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, &multiformat_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); multiformat_cuda<<>>(soa, n, ret, factor); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&multiformat_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/multiformat_interface.c000066400000000000000000000103731507764646700305250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "multiformat_types.h" #include "../test_interfaces.h" #include "../../../helper.h" static void test_multiformat_cpu_func(void *buffers[], void *args); #ifdef STARPU_USE_CUDA extern void test_multiformat_cuda_func(void *buffers[], void *args); #endif #ifdef STARPU_USE_OPENCL extern void test_multiformat_opencl_func(void *buffers[], void *args); #endif static struct point array_of_structs[N_ELEMENTS]; static struct point array_of_structs_dummy[N_ELEMENTS]; static starpu_data_handle_t multiformat_handle; static starpu_data_handle_t multiformat_dummy_handle; struct test_config multiformat_config = { .cpu_func = test_multiformat_cpu_func, #ifdef STARPU_USE_CUDA .cuda_func = test_multiformat_cuda_func, #endif #ifdef STARPU_USE_OPENCL .opencl_func = test_multiformat_opencl_func, #endif .handle = &multiformat_handle, .ptr = array_of_structs, .dummy_handle = &multiformat_dummy_handle, .dummy_ptr = array_of_structs_dummy, .copy_failed = SUCCESS, .name = "multiformat_interface" }; static void test_multiformat_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; struct point *aos; int n, i; int factor; aos = (struct point *) STARPU_MULTIFORMAT_GET_CPU_PTR(buffers[0]); n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); factor = *(int *) args; for (i = 0; i < n; i++) { FPRINTF(stderr, "(%d %d) [%d]", aos[i].x, aos[i].y, factor); if (aos[i].x != i * factor || aos[i].y != i * factor) { multiformat_config.copy_failed = FAILURE; } aos[i].x = -aos[i].x; aos[i].y = -aos[i].y; } FPRINTF(stderr, "\n"); } #ifdef STARPU_USE_CUDA extern struct starpu_codelet cpu_to_cuda_cl; extern struct starpu_codelet cuda_to_cpu_cl; #endif #ifdef STARPU_USE_OPENCL extern struct starpu_codelet cpu_to_opencl_cl; extern struct starpu_codelet opencl_to_cpu_cl; #endif struct starpu_multiformat_data_interface_ops format_ops = { #ifdef STARPU_USE_CUDA .cuda_elemsize = 2* sizeof(float), .cpu_to_cuda_cl = &cpu_to_cuda_cl, .cuda_to_cpu_cl = &cuda_to_cpu_cl, #endif #ifdef STARPU_USE_OPENCL .opencl_elemsize = 2 * sizeof(float), .cpu_to_opencl_cl = &cpu_to_opencl_cl, .opencl_to_cpu_cl = &opencl_to_cpu_cl, #endif .cpu_elemsize = sizeof(struct point), }; static void register_data(void) { int i; for (i = 0; i < N_ELEMENTS; i++) { array_of_structs[i].x = i; array_of_structs[i].y = i; } starpu_multiformat_data_register(&multiformat_handle, STARPU_MAIN_RAM, &array_of_structs, N_ELEMENTS, &format_ops); starpu_multiformat_data_register(&multiformat_dummy_handle, STARPU_MAIN_RAM, &array_of_structs_dummy, N_ELEMENTS, &format_ops); } static void unregister_data(void) { starpu_data_unregister(multiformat_handle); starpu_data_unregister(multiformat_dummy_handle); } int main(int argc, char **argv) { #ifdef STARPU_USE_CPU int ret; struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); #if defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1) conf.ncuda = 0; #else conf.ncuda = 2; #endif conf.nopencl = 1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); ret = run_tests(&multiformat_config, &summary); if (ret) data_interface_test_summary_print(stderr, &summary); unregister_data(); starpu_shutdown(); return data_interface_test_summary_success(&summary); #else /* Without the CPU, there is no point in using the multiformat * interface, so this test is pointless. */ return STARPU_TEST_SKIPPED; #endif } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/multiformat_opencl.c000066400000000000000000000071341507764646700300460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl" extern struct test_config multiformat_config; static struct starpu_opencl_program multiformat_program; void test_multiformat_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, factor, ret; unsigned int n; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; cl_context context; cl_mem val, fail; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &multiformat_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); factor = *(int *)args; n = STARPU_MULTIFORMAT_GET_NX(buffers[0]); val = (cl_mem)STARPU_MULTIFORMAT_GET_OPENCL_PTR(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); err = starpu_opencl_load_kernel(&kernel, &queue, &multiformat_program, "multiformat_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &multiformat_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); /* Setting args */ err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(n), &n); if (err) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(fail), &fail); if (err) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 3, sizeof(factor), &factor); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &multiformat_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&multiformat_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl000066400000000000000000000020451507764646700315560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "multiformat_types.h" __kernel void multiformat_opencl(__global struct struct_of_arrays *soa, unsigned int nx, __global int *err, int factor) { const int i = get_global_id(0); if (i >= nx) return; if (soa->x[i] != i * factor || soa->y[i] != i * factor) { *err = i; } else { soa->x[i] = -soa->x[i]; soa->y[i] = -soa->y[i]; } } starpu-1.4.9+dfsg/tests/datawizard/interfaces/multiformat/multiformat_types.h000066400000000000000000000015631507764646700277370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef TEST_MULTIFORMAT_TYPES_H #define TEST_MULTIFORMAT_TYPES_H #define N_ELEMENTS 2 struct struct_of_arrays { int x[N_ELEMENTS]; int y[N_ELEMENTS]; }; struct point { int x, y; }; #endif starpu-1.4.9+dfsg/tests/datawizard/interfaces/ndim/000077500000000000000000000000001507764646700223565ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/ndim/ndim_cuda.cu000066400000000000000000000047771507764646700246510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" extern struct test_config arr4d_config; static __global__ void arr4d_cuda(int *arr4d, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, int factor, int *err) { int i, j, k, l; int val = 0; for (l = 0; l < nt ;l++) { for (k = 0; k < nz ;k++) { for (j = 0; j < ny ;j++) { for(i = 0; i < nx ;i++) { if (arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val) { *err = 1; return; } else { arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] *= -1; val++; } } } } } } extern "C" void test_arr4d_cuda_func(void *buffers[], void *args) { cudaError_t error; int *ret; error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, &arr4d_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); int nx = nn[0]; int ny = nn[1]; int nz = nn[2]; int nt = nn[3]; unsigned ldy = ldn[1]; unsigned ldz = ldn[2]; unsigned ldt = ldn[3]; int *arr4d = (int *) STARPU_NDIM_GET_PTR(buffers[0]); int factor = *(int*) args; arr4d_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>> (arr4d, nx, ny, nz, nt, ldy, ldz, ldt, factor, ret); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&arr4d_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/ndim/ndim_interface.c000066400000000000000000000073351507764646700255010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #include "../../../helper.h" #define NX 4 #define NY NX #define NZ NX #define NT NX /* Prototypes */ static void register_data(void); static void unregister_data(void); void test_arr4d_cpu_func(void *buffers[], void *args); #ifdef STARPU_USE_CUDA extern void test_arr4d_cuda_func(void *buffers[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void test_arr4d_opencl_func(void *buffers[], void *args); #endif static starpu_data_handle_t _arr4d_handle; static starpu_data_handle_t _arr4d2_handle; static int _arr4d[NX*NY*NZ*NT]; static int _arr4d2[NX*NY*NZ*NT]; struct test_config arr4d_config = { .cpu_func = test_arr4d_cpu_func, #ifdef STARPU_USE_CUDA .cuda_func = test_arr4d_cuda_func, #endif #ifdef STARPU_USE_OPENCL .opencl_func = test_arr4d_opencl_func, #endif .handle = &_arr4d_handle, .ptr = _arr4d, .dummy_handle = &_arr4d2_handle, .dummy_ptr = _arr4d2, .copy_failed = SUCCESS, .name = "ndim_interface" }; static void register_data(void) { /* Initializing data */ int val = 0; int i, j, k, l; for (l = 0; l < NT; l++) for (k = 0; k < NZ; k++) for (j = 0; j < NY; j++) for (i = 0; i < NX; i++) _arr4d[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i] = val++; /* Registering data */ unsigned nn[4] = {NX, NY, NZ, NT}; unsigned ldn[4] = {1, NX, NX*NY, NX*NY*NZ}; starpu_ndim_data_register(&_arr4d_handle, STARPU_MAIN_RAM, (uintptr_t)_arr4d, ldn, nn, 4, sizeof(_arr4d[0])); starpu_ndim_data_register(&_arr4d2_handle, STARPU_MAIN_RAM, (uintptr_t)_arr4d2, ldn, nn, 4, sizeof(_arr4d2[0])); } static void unregister_data(void) { starpu_data_unregister(_arr4d_handle); starpu_data_unregister(_arr4d2_handle); } void test_arr4d_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int factor = *(int*)args; int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); int nx = nn[0]; int ny = nn[1]; int nz = nn[2]; int nt = nn[3]; unsigned ldy = ldn[1]; unsigned ldz = ldn[2]; unsigned ldt = ldn[3]; int *arr4d = (int *) STARPU_NDIM_GET_PTR(buffers[0]); int i, j, k, l; int val = 0; arr4d_config.copy_failed = SUCCESS; for (l = 0; l < nt; l++) { for (k = 0; k < nz; k++) { for (j = 0; j < ny; j++) { for (i = 0; i < nx; i++) { if (arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val) { arr4d_config.copy_failed = FAILURE; return; } else { arr4d[(l*ldt)+(k*ldz)+(j*ldy)+i] *= -1; val++; } } } } } } int main(int argc, char **argv) { struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); ret = run_tests(&arr4d_config, &summary); unregister_data(); starpu_shutdown(); if (ret) data_interface_test_summary_print(stderr, &summary); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/ndim/ndim_opencl.c000066400000000000000000000064631507764646700250220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl" extern struct test_config arr4d_config; static struct starpu_opencl_program opencl_program; void test_arr4d_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, ret; int factor = *(int *) args; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); int *nn = (int *)STARPU_NDIM_GET_NN(buffers[0]); unsigned *ldn = STARPU_NDIM_GET_LDN(buffers[0]); int nx = nn[0]; int ny = nn[1]; int nz = nn[2]; int nt = nn[3]; unsigned ldy = ldn[1]; unsigned ldz = ldn[2]; unsigned ldt = ldn[3]; cl_mem arr4d = (cl_mem) STARPU_NDIM_GET_DEV_HANDLE(buffers[0]); cl_context context; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &arr4d_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "arr4d_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); int nargs; nargs = starpu_opencl_set_kernel_args(&err, &kernel, sizeof(arr4d), &arr4d, sizeof(nx), &nx, sizeof(ny), &ny, sizeof(nz), &nz, sizeof(nt), &nt, sizeof(ldy), &ldy, sizeof(ldz), &ldz, sizeof(ldt), &ldt, sizeof(factor), &factor, sizeof(fail), &fail, 0); if (nargs != 10) { fprintf(stderr, "Failed to set argument #%d\n", nargs); STARPU_OPENCL_REPORT_ERROR(err); } { size_t global[3] = {nx, ny, nz*nt}; err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &arr4d_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/ndim/ndim_opencl_kernel.cl000066400000000000000000000023531507764646700265300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void arr4d_opencl(__global int *arr4d, int nx, int ny, int nz, int nt, int ldy, int ldz, int ldt, int factor, __global int *err) { const int idx = get_global_id(0); const int idy = get_global_id(1); const int idz = get_global_id(2) % nz; const int idt = get_global_id(2) / nz; if (idx >= nx) return; if (idy >= ny) return; if (idz >= nz) return; if (idt >= nt) return; int val = idt*nz*ny*nx+idz*ny*nx+idy*nx+idx; int i = (idt*ldt)+(idz*ldz)+(idy*ldy)+idx; if (arr4d[i] != factor * val) *err = 1; else arr4d[i] *= -1; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/tensor/000077500000000000000000000000001507764646700227415ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/tensor/tensor_cuda.cu000066400000000000000000000051721507764646700256050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" extern struct test_config tensor_config; static __global__ void tensor_cuda(int *tensor, int nx, int ny, int nz, int nt, unsigned ldy, unsigned ldz, unsigned ldt, int factor, int *err) { int i, j, k, l; int val = 0; for (l = 0; l < nt ;l++) { for (k = 0; k < nz ;k++) { for (j = 0; j < ny ;j++) { for(i = 0; i < nx ;i++) { if (tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val) { *err = 1; return; } else { tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] *= -1; val++; } } } } } } extern "C" void test_tensor_cuda_func(void *buffers[], void *args) { cudaError_t error; int *ret; error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, &tensor_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); int nx = STARPU_TENSOR_GET_NX(buffers[0]); int ny = STARPU_TENSOR_GET_NY(buffers[0]); int nz = STARPU_TENSOR_GET_NZ(buffers[0]); int nt = STARPU_TENSOR_GET_NT(buffers[0]); unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); int *tensor = (int *) STARPU_TENSOR_GET_PTR(buffers[0]); int factor = *(int*) args; tensor_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>> (tensor, nx, ny, nz, nt, ldy, ldz, ldt, factor, ret); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&tensor_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/tensor/tensor_interface.c000066400000000000000000000101471507764646700264420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #include "../../../helper.h" #define NX 4 #define NY NX #define NZ NX #define NT NX /* Prototypes */ static void register_data(void); static void unregister_data(void); void test_tensor_cpu_func(void *buffers[], void *args); #ifdef STARPU_USE_CUDA extern void test_tensor_cuda_func(void *buffers[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void test_tensor_opencl_func(void *buffers[], void *args); #endif static starpu_data_handle_t _tensor_handle; static starpu_data_handle_t _tensor2_handle; static int _tensor[NX*NY*NZ*NT]; static int _tensor2[NX*NY*NZ*NT]; struct test_config tensor_config = { .cpu_func = test_tensor_cpu_func, #ifdef STARPU_USE_CUDA .cuda_func = test_tensor_cuda_func, #endif #ifdef STARPU_USE_OPENCL .opencl_func = test_tensor_opencl_func, #endif .handle = &_tensor_handle, .ptr = _tensor, .dummy_handle = &_tensor2_handle, .dummy_ptr = _tensor2, .copy_failed = SUCCESS, .name = "tensor_interface" }; static void register_data(void) { /* Initializing data */ int val = 0; int i, j, k, l; for (l = 0; l < NT; l++) for (k = 0; k < NZ; k++) for (j = 0; j < NY; j++) for (i = 0; i < NX; i++) _tensor[(l*NX*NY*NZ)+(k*NX*NY)+(j*NX)+i] = val++; /* Registering data */ starpu_tensor_data_register(&_tensor_handle, STARPU_MAIN_RAM, (uintptr_t)_tensor, NX, NX * NY, NX * NY * NZ, NX, NY, NZ, NT, sizeof(_tensor[0])); starpu_tensor_data_register(&_tensor2_handle, STARPU_MAIN_RAM, (uintptr_t)_tensor2, NX, NX * NY, NX * NY * NZ, NX, NY, NZ, NT, sizeof(_tensor2[0])); } static void unregister_data(void) { starpu_data_unregister(_tensor_handle); starpu_data_unregister(_tensor2_handle); } void test_tensor_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int factor = *(int*)args; int nx = STARPU_TENSOR_GET_NX(buffers[0]); int ny = STARPU_TENSOR_GET_NY(buffers[0]); int nz = STARPU_TENSOR_GET_NZ(buffers[0]); int nt = STARPU_TENSOR_GET_NT(buffers[0]); unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); int *tensor = (int *) STARPU_TENSOR_GET_PTR(buffers[0]); int i, j, k, l; int val = 0; tensor_config.copy_failed = SUCCESS; for (l = 0; l < nt; l++) { for (k = 0; k < nz; k++) { for (j = 0; j < ny; j++) { for (i = 0; i < nx; i++) { if (tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] != factor * val) { tensor_config.copy_failed = FAILURE; return; } else { tensor[(l*ldt)+(k*ldz)+(j*ldy)+i] *= -1; val++; } } } } } } int main(int argc, char **argv) { struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); ret = run_tests(&tensor_config, &summary); unregister_data(); starpu_shutdown(); if (ret) data_interface_test_summary_print(stderr, &summary); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/tensor/tensor_opencl.c000066400000000000000000000066331507764646700257670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl" extern struct test_config tensor_config; static struct starpu_opencl_program opencl_program; void test_tensor_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, ret; int factor = *(int *) args; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); int nx = STARPU_TENSOR_GET_NX(buffers[0]); int ny = STARPU_TENSOR_GET_NY(buffers[0]); int nz = STARPU_TENSOR_GET_NZ(buffers[0]); int nt = STARPU_TENSOR_GET_NT(buffers[0]); unsigned ldy = STARPU_TENSOR_GET_LDY(buffers[0]); unsigned ldz = STARPU_TENSOR_GET_LDZ(buffers[0]); unsigned ldt = STARPU_TENSOR_GET_LDT(buffers[0]); cl_mem tensor = (cl_mem) STARPU_TENSOR_GET_DEV_HANDLE(buffers[0]); cl_context context; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &tensor_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "tensor_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); int nargs; nargs = starpu_opencl_set_kernel_args(&err, &kernel, sizeof(tensor), &tensor, sizeof(nx), &nx, sizeof(ny), &ny, sizeof(nz), &nz, sizeof(nt), &nt, sizeof(ldy), &ldy, sizeof(ldz), &ldz, sizeof(ldt), &ldt, sizeof(factor), &factor, sizeof(fail), &fail, 0); if (nargs != 10) { fprintf(stderr, "Failed to set argument #%d\n", nargs); STARPU_OPENCL_REPORT_ERROR(err); } { size_t global[3] = {nx, ny, nz*nt}; err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &tensor_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/tensor/tensor_opencl_kernel.cl000066400000000000000000000023571507764646700275020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void tensor_opencl(__global int *tensor, int nx, int ny, int nz, int nt, int ldy, int ldz, int ldt, int factor, __global int *err) { const int idx = get_global_id(0); const int idy = get_global_id(1); const int idz = get_global_id(2) % nz; const int idt = get_global_id(2) / nz; if (idx >= nx) return; if (idy >= ny) return; if (idz >= nz) return; if (idt >= nt) return; int val = idt*nz*ny*nx+idz*ny*nx+idy*nx+idx; int i = (idt*ldt)+(idz*ldz)+(idy*ldy)+idx; if (tensor[i] != factor * val) *err = 1; else tensor[i] *= -1; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/test_interfaces.c000066400000000000000000000344071507764646700247650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "test_interfaces.h" #include "../../helper.h" /* * This is definitely note thread-safe. */ static struct test_config *current_config; /* TODO : - OpenCL to OpenCL support */ static char *enum_to_string(int exit_code) { switch (exit_code) { case SUCCESS: return "Success"; case FAILURE: return "Failure"; case UNTESTED: return "Untested"; case NO_DEVICE: case -ENODEV: return "No device available"; case TASK_SUBMISSION_FAILURE: return "Task submission failed"; default: STARPU_ASSERT_MSG(0, "unknown error code %d\n", exit_code); } } void data_interface_test_summary_print(FILE *f, struct data_interface_test_summary *s) { if (!f) f = stderr; FPRINTF(f, "%s : %s\n", current_config->name, enum_to_string(s->success)); FPRINTF(f, "Asynchronous :\n"); FPRINTF(f, "\tCPU -> CUDA : %s\n", enum_to_string(s->cpu_to_cuda_async)); FPRINTF(f, "\tCUDA -> CUDA : %s\n", enum_to_string(s->cuda_to_cuda_async)); FPRINTF(f, "\tCUDA -> CPU : %s\n", enum_to_string(s->cuda_to_cpu_async)); FPRINTF(f, "\n"); FPRINTF(f, "\tCPU -> OpenCL : %s\n", enum_to_string(s->cpu_to_opencl_async)); FPRINTF(f, "\tOpenCL -> CPU : %s\n", enum_to_string(s->opencl_to_cpu_async)); FPRINTF(f, "\n"); FPRINTF(f, "Synchronous :\n"); FPRINTF(f, "\tCPU -> CUDA : %s\n", enum_to_string(s->cpu_to_cuda)); FPRINTF(f, "\tCUDA -> CUDA : %s\n", enum_to_string(s->cuda_to_cuda)); FPRINTF(f, "\tCUDA -> CPU : %s\n", enum_to_string(s->cuda_to_cpu)); FPRINTF(f, "\n"); FPRINTF(f, "\tCPU -> OpenCL : %s\n", enum_to_string(s->cpu_to_opencl)); FPRINTF(f, "\tOpenCL -> CPU : %s\n", enum_to_string(s->opencl_to_cpu)); FPRINTF(f, "\n"); FPRINTF(f, "\n"); FPRINTF(f, "CPU -> CPU : %s\n", enum_to_string(s->cpu_to_cpu)); FPRINTF(f, "to_pointer() : %s\n", enum_to_string(s->to_pointer)); FPRINTF(f, "compare() : %s\n", enum_to_string(s->compare)); FPRINTF(f, "pack_unpack() : %s\n", enum_to_string(s->pack)); } int data_interface_test_summary_success(struct data_interface_test_summary *s) { return s->success; } static void set_field(struct data_interface_test_summary *s, int *field, int ret) { *field = ret; if (ret == FAILURE) s->success = ret; } static void summary_init(struct data_interface_test_summary *s) { s->cpu_to_cpu = UNTESTED; s->compare = UNTESTED; s->cpu_to_cuda = UNTESTED; s->cuda_to_cuda = UNTESTED; s->cuda_to_cpu = UNTESTED; s->cpu_to_cuda_async = UNTESTED; s->cuda_to_cpu_async = UNTESTED; s->cuda_to_cuda_async = UNTESTED; s->cpu_to_opencl = UNTESTED; s->opencl_to_cpu = UNTESTED; s->cpu_to_opencl_async = UNTESTED; s->opencl_to_cpu_async = UNTESTED; s->to_pointer = UNTESTED; s->pack = UNTESTED; s->success = SUCCESS; }; /* * This variable has to be either -1 or 1. * The kernels should check that the ith value stored in the data interface is * equal to i, if factor == 1, or -i, if factor == -1. */ static int factor = -1; /* * Creates a complete task, only knowing on what device it should be executed. * Note that the global variable is heavily used here. * Arguments : * - taskp : a pointer to a valid task * - type : STARPU_{CPU,CUDA,OPENCL}_WORKER. * - id: when positive, should be the worker id * Return values : * -ENODEV * 0 : success. */ static int create_task(struct starpu_task **taskp, enum starpu_worker_archtype type, int id) { static int cpu_workers[STARPU_MAXCPUS]; static int cuda_workers[STARPU_MAXCUDADEVS]; static int opencl_workers[STARPU_MAXOPENCLDEVS]; static int n_cpus = -1; static int n_cudas = -1; static int n_opencls = -1; if (n_cpus == -1) /* First time here */ { /* We do not check the return values of the calls to * starpu_worker_get_ids_by_type now, because it is simpler to * detect a problem in the switch that comes right after this * block of code. */ n_cpus = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, cpu_workers, STARPU_MAXCPUS); n_cudas = starpu_worker_get_ids_by_type(STARPU_CUDA_WORKER, cuda_workers, STARPU_MAXCUDADEVS); n_opencls = starpu_worker_get_ids_by_type(STARPU_OPENCL_WORKER, opencl_workers, STARPU_MAXOPENCLDEVS); } int *workers; static struct starpu_codelet cl; starpu_codelet_init(&cl); cl.nbuffers = 1; cl.modes[0] = STARPU_RW; if (type == STARPU_CPU_WORKER) { if (n_cpus == 0) return -ENODEV; if (id != -1 && id >= n_cpus) { FPRINTF(stderr, "Not enough CPU workers\n"); return -ENODEV; } workers = cpu_workers; cl.cpu_funcs[0] = current_config->cpu_func; } else if (type == STARPU_CUDA_WORKER) { if (n_cudas == 0) return -ENODEV; if (id != -1 && id >= n_cudas) { FPRINTF(stderr, "Not enough CUDA workers\n"); return -ENODEV; } workers = cuda_workers; cl.cuda_funcs[0] = current_config->cuda_func; } else if (type == STARPU_OPENCL_WORKER) { if (n_opencls == 0) return -ENODEV; if (id != -1 && id >= n_opencls) { FPRINTF(stderr, "Not enough OpenCL workers\n"); return -ENODEV; } workers = opencl_workers; cl.opencl_funcs[0] = current_config->opencl_func; } else { return -ENODEV; } factor = -factor; struct starpu_task *task; task = starpu_task_build(&cl, STARPU_RW, *current_config->handle, STARPU_TASK_SYNCHRONOUS, 1, 0); task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); if (id != -1) { task->execute_on_a_specific_worker = 1; task->workerid = workers[id]; } *taskp = task; return 0; } /* * _to_ functions. * They all create and submit a task that has to be executed on , * forcing a copy between and . */ static enum exit_code ram_to_cuda(void) { int err; struct starpu_task *task; err = create_task(&task, STARPU_CUDA_WORKER, 0); if (err != 0) return NO_DEVICE; err = starpu_task_submit(task); if (err != 0) return TASK_SUBMISSION_FAILURE; FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); return current_config->copy_failed; } static enum exit_code cuda_to_cuda(void) { int err; struct starpu_task *task; err = create_task(&task, STARPU_CUDA_WORKER, 1); if (err != 0) return NO_DEVICE; err = starpu_task_submit(task); if (err != 0) return TASK_SUBMISSION_FAILURE; FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); return current_config->copy_failed; } static enum exit_code cuda_to_ram(void) { int err; struct starpu_task *task; err = create_task(&task, STARPU_CPU_WORKER, -1); if (err != 0) return NO_DEVICE; err = starpu_task_submit(task); if (err != 0) return TASK_SUBMISSION_FAILURE; FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); return current_config->copy_failed; } static enum exit_code ram_to_opencl(void) { int err; struct starpu_task *task; err = create_task(&task, STARPU_OPENCL_WORKER, -1); if (err != 0) return NO_DEVICE; err = starpu_task_submit(task); if (err != 0) return TASK_SUBMISSION_FAILURE; FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); return current_config->copy_failed; } static enum exit_code opencl_to_ram(void) { int err; struct starpu_task *task; err = create_task(&task, STARPU_CPU_WORKER, -1); if (err != 0) return NO_DEVICE; err = starpu_task_submit(task); if (err != 0) return TASK_SUBMISSION_FAILURE; FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); return current_config->copy_failed; } /* End of the _to_ functions. */ static void run_cuda(int async, struct data_interface_test_summary *s) { /* RAM -> CUDA (-> CUDA) -> RAM */ int err; err = ram_to_cuda(); set_field(s, async==1?&s->cpu_to_cuda_async:&s->cpu_to_cuda, err); /* If this failed, there is no point in continuing. */ if (err != SUCCESS) return; if (starpu_cuda_worker_get_count() >= 2) { err = cuda_to_cuda(); } else { err = UNTESTED; } set_field(s, async==1?&s->cuda_to_cuda_async:&s->cuda_to_cuda, err); /* Even if cuda_to_cuda() failed, a valid copy is left on the first * cuda device, which means we can safely test cuda_to_ram() */ err = cuda_to_ram(); set_field(s, async==1?&s->cuda_to_cpu_async:&s->cuda_to_cpu, err); } static void run_opencl(int async, struct data_interface_test_summary *s) { /* RAM -> OpenCL -> RAM */ int err; err = ram_to_opencl(); set_field(s, async==1?&s->cpu_to_opencl_async:&s->cpu_to_opencl, err); if (err != SUCCESS) return; err = opencl_to_ram(); set_field(s, async==1?&s->opencl_to_cpu_async:&s->opencl_to_cpu, err); } static void ram_to_ram(struct data_interface_test_summary *s) { int err; struct starpu_task *task; starpu_data_handle_t src, dst; void *src_interface, *dst_interface; src = *current_config->handle; dst = *current_config->dummy_handle; /* We do not care about the nodes */ src_interface = starpu_data_get_interface_on_node(src, STARPU_MAIN_RAM); dst_interface = starpu_data_get_interface_on_node(dst, STARPU_MAIN_RAM); if (src->ops->copy_methods->ram_to_ram) src->ops->copy_methods->ram_to_ram(src_interface, STARPU_MAIN_RAM, dst_interface, STARPU_MAIN_RAM); else src->ops->copy_methods->any_to_any(src_interface, STARPU_MAIN_RAM, dst_interface, STARPU_MAIN_RAM, NULL); err = create_task(&task, STARPU_CPU_WORKER, -1); if (err != 0) goto out; task->handles[0] = dst; err = starpu_task_submit(task); if (err != 0) { err = TASK_SUBMISSION_FAILURE; goto out; } FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); err = current_config->copy_failed; out: set_field(s, &s->cpu_to_cpu, err); } static void run_async(struct data_interface_test_summary *s) { int async = starpu_asynchronous_copy_disabled(); if (async == 1) { FPRINTF(stderr, "Asynchronous copies have been disabled\n"); return; } run_cuda(1, s); run_opencl(1, s); } static void run_sync(struct data_interface_test_summary *s) { starpu_data_handle_t handle = *current_config->handle; struct starpu_data_copy_methods new_copy_methods; struct starpu_data_copy_methods *old_copy_methods; old_copy_methods = (struct starpu_data_copy_methods *) handle->ops->copy_methods; memcpy(&new_copy_methods, old_copy_methods, sizeof(struct starpu_data_copy_methods)); new_copy_methods.ram_to_cuda_async = NULL; new_copy_methods.cuda_to_cuda_async = NULL; new_copy_methods.cuda_to_ram_async = NULL; new_copy_methods.ram_to_opencl_async = NULL; new_copy_methods.opencl_to_ram_async = NULL; handle->ops->copy_methods = &new_copy_methods; run_cuda(0, s); run_opencl(0, s); handle->ops->copy_methods = old_copy_methods; } static void compare(struct data_interface_test_summary *s) { int err; void *interface_a, *interface_b; starpu_data_handle_t handle, dummy_handle; handle = *current_config->handle; dummy_handle = *current_config->dummy_handle; interface_a = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); interface_b = starpu_data_get_interface_on_node(dummy_handle, STARPU_MAIN_RAM); err = handle->ops->compare(interface_a, interface_b); s->compare = (err == 0) ? FAILURE : SUCCESS; set_field(s, &s->compare, s->compare); } static void to_pointer(struct data_interface_test_summary *s) { starpu_data_handle_t handle; s->to_pointer = UNTESTED; handle = *current_config->handle; if (handle->ops->to_pointer) { unsigned int node; unsigned int tests = 0; for (node = 0; node < STARPU_MAXNODES; node++) { if (starpu_node_get_kind(node) != STARPU_CPU_RAM) continue; if (!starpu_data_test_if_allocated_on_node(handle, node)) continue; void *data_interface = starpu_data_get_interface_on_node(handle, node); void *ptr = handle->ops->to_pointer(data_interface, node); if (ptr != current_config->ptr) { s->to_pointer = FAILURE; break; } tests++; } if (tests > 0) s->to_pointer = SUCCESS; } set_field(s, &s->to_pointer, s->to_pointer); } static void pack_unpack(struct data_interface_test_summary *s) { starpu_data_handle_t handle; starpu_data_handle_t dummy_handle; int err = UNTESTED; handle = *current_config->handle; dummy_handle = *current_config->dummy_handle; if (handle->ops->pack_data && handle->ops->unpack_data) { void *ptr = NULL; starpu_ssize_t size = 0; starpu_data_pack(handle, &ptr, &size); if (size != 0) { struct starpu_task *task; void *mem = (void *)starpu_malloc_on_node_flags(STARPU_MAIN_RAM, size, 0); starpu_data_acquire(dummy_handle, STARPU_W); starpu_data_unpack(dummy_handle, mem, size); starpu_data_unpack(dummy_handle, ptr, size); starpu_data_release(dummy_handle); factor = -factor; err = create_task(&task, STARPU_CPU_WORKER, -1); if (err != SUCCESS) goto out; task->handles[0] = dummy_handle; err = starpu_task_submit(task); if (err != 0) { err = TASK_SUBMISSION_FAILURE; goto out; } FPRINTF(stderr, "[%s] : %d\n", __starpu_func__, current_config->copy_failed); err = current_config->copy_failed; } } out: set_field(s, &s->pack, err); } static int load_conf(struct test_config *config) { if (!config || #ifdef STARPU_USE_CPU !config->cpu_func || !config->dummy_handle || #endif #ifdef STARPU_USE_CUDA !config->cuda_func || #endif #ifdef STARPU_USE_OPENCL !config->opencl_func || #endif !config->handle) { return 1; } current_config = config; return 0; } int run_tests(struct test_config *conf, struct data_interface_test_summary *s) { summary_init(s); if (load_conf(conf) == 1) { FPRINTF(stderr, "Failed to load conf.\n"); return 0; } run_async(s); run_sync(s); ram_to_ram(s); compare(s); to_pointer(s); pack_unpack(s); return 1; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/test_interfaces.h000066400000000000000000000054061507764646700247670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef TEST_INTERFACES_H #define TEST_INTERFACES_H #include "../../helper.h" /* * Users do not know about this enum. They only know that SUCCESS is 0, and * FAILURE is 1. Therefore, the values of SUCCESS and FAILURE shall not be * changed. */ enum exit_code { SUCCESS = 0, FAILURE = 1, UNTESTED = 2, NO_DEVICE = 3, TASK_SUBMISSION_FAILURE = 4 }; struct test_config { /** we use pointers as we want to allow static initializations in the main application */ /* A pointer to a registered handle */ starpu_data_handle_t *handle; void *ptr; /* A pointer to a registered handle, that will be used to test * RAM to RAM copy. The values it points to should be different from * the ones pointed to by the previous handle. */ starpu_data_handle_t *dummy_handle; void *dummy_ptr; /* StarPU codelets. The following functions should : * 1) Check that the values are correct * 2) Negate every element */ starpu_cpu_func_t cpu_func; starpu_cuda_func_t cuda_func; starpu_hip_func_t hip_func; starpu_opencl_func_t opencl_func; /* The previous codelets must update this field at the end of their * execution. copy_failed must be FAILURE if the copy failed, SUCCESS otherwise. */ enum exit_code copy_failed; /* A human-readable name for the test */ const char *name; }; struct data_interface_test_summary { int success; /* Copy methods */ int cpu_to_cpu; int cpu_to_cuda; int cuda_to_cuda; int cuda_to_cpu; int cpu_to_cuda_async; int cuda_to_cpu_async; int cuda_to_cuda_async; int cpu_to_hip; int hip_to_hip; int hip_to_cpu; int cpu_to_hip_async; int hip_to_cpu_async; int hip_to_hip_async; int cpu_to_opencl; int opencl_to_cpu; int cpu_to_opencl_async; int opencl_to_cpu_async; /* Other stuff */ int compare; int to_pointer; int pack; }; void data_interface_test_summary_print(FILE *f, struct data_interface_test_summary *summary); int data_interface_test_summary_success(struct data_interface_test_summary *summary); int run_tests(struct test_config*, struct data_interface_test_summary *summary); #endif /* !TEST_INTERFACES_H */ starpu-1.4.9+dfsg/tests/datawizard/interfaces/test_interfaces.sh000077500000000000000000000017131507764646700251520ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # for i in bcsr block coo csr matrix multiformat ndim tensor variable vector void do $MS_LAUNCHER $STARPU_LAUNCH ./tests/datawizard/interfaces/$i/${i}_interface ret=$? if test "$ret" = "0" then echo "Interface $i: success" else echo "Interface $i: failure" fi done starpu-1.4.9+dfsg/tests/datawizard/interfaces/variable/000077500000000000000000000000001507764646700232145ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/variable/variable_cuda.cu000066400000000000000000000037751507764646700263420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" extern struct test_config variable_config; static __global__ void variable_cuda(int *val, int *err, int factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i > 0) return; if (*val != 42 * factor) *err = 1; else *val *= -1; } extern "C" void test_variable_cuda_func(void *buffers[], void *args) { cudaError_t error; int *ret; error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(ret, &variable_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); int *val = (int *)STARPU_VARIABLE_GET_PTR(buffers[0]); int factor = *(int*) args; unsigned threads_per_block = 64; unsigned nblocks = 1; variable_cuda<<>>(val, ret, factor); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&variable_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/variable/variable_interface.c000066400000000000000000000054051507764646700271710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" static int variable; static int variable2; static starpu_data_handle_t variable_handle; static starpu_data_handle_t variable2_handle; /* Codelets */ void test_variable_cpu_func(void *buffers[], void *args); #ifdef STARPU_USE_CUDA extern void test_variable_cuda_func(void *buffers[], void *args); #endif #ifdef STARPU_USE_OPENCL extern void test_variable_opencl_func(void *buffers[], void *args); #endif struct test_config variable_config = { .cpu_func = test_variable_cpu_func, #ifdef STARPU_USE_CUDA .cuda_func = test_variable_cuda_func, #endif #ifdef STARPU_USE_OPENCL .opencl_func = test_variable_opencl_func, #endif .handle = &variable_handle, .ptr = &variable, .dummy_handle = &variable2_handle, .dummy_ptr = &variable2, .copy_failed = SUCCESS, .name = "variable_interface" }; void test_variable_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int *val; int factor; val = (int *) STARPU_VARIABLE_GET_PTR(buffers[0]); factor = *(int *)args; if (*val != 42 * factor) variable_config.copy_failed = FAILURE; else *val *= -1; } static void register_data(void) { variable = 42; variable2 = 12; starpu_variable_data_register(&variable_handle, STARPU_MAIN_RAM, (uintptr_t) &variable, sizeof(variable)); starpu_variable_data_register(&variable2_handle, STARPU_MAIN_RAM, (uintptr_t) &variable2, sizeof(variable2)); } static void unregister_data(void) { starpu_data_unregister(variable_handle); starpu_data_unregister(variable2_handle); } int main(int argc, char **argv) { int ret; struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); run_tests(&variable_config, &summary); unregister_data(); starpu_shutdown(); data_interface_test_summary_print(stderr, &summary); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/variable/variable_opencl.c000066400000000000000000000057371507764646700265210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/variable/variable_opencl_kernel.cl" extern struct test_config variable_config; static struct starpu_opencl_program opencl_program; void test_variable_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, ret; int factor = *(int *) args; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]); cl_context context; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &variable_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "variable_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(fail), &fail); if (err) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(factor), &factor); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global = 1; size_t local = 1; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &variable_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); return; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/variable/variable_opencl_kernel.cl000066400000000000000000000016251507764646700302250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void variable_opencl(__global int *val, __global int *err, int factor) { const int i = get_global_id(0); if (i > 0) return; if (*val != 42 * factor) *err = 1; else *val *= -1; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/vector/000077500000000000000000000000001507764646700227315ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/vector/vector_cuda.cu000066400000000000000000000040571507764646700255660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" extern struct test_config vector_config; __global__ void framework_cuda(int *val, unsigned n, int *err, int factor) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i >= n) return; if (val[i] != i*factor) *err = 1; else val[i] = -val[i]; } extern "C" void test_vector_cuda_func(void *buffers[], void *args) { cudaError_t error; int *ret; error = cudaMalloc(&ret, sizeof(int)); if (error != cudaSuccess) { fprintf(stderr, "cudaMalloc failed...\n"); return; } error = cudaMemcpyAsync(ret, &vector_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (error != cudaSuccess) return; unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); int factor = *(int*) args; unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; framework_cuda<<>>(val, n, ret, factor); error = cudaGetLastError(); if (error != cudaSuccess) STARPU_CUDA_REPORT_ERROR(error); error = cudaMemcpyAsync(&vector_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (error != cudaSuccess) { return; } cudaFree(ret); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/vector/vector_interface.c000066400000000000000000000062561507764646700264300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #include "../../../helper.h" /* Prototypes */ static void register_data(void); static void unregister_data(void); void test_vector_cpu_func(void *buffers[], void *args); #ifdef STARPU_USE_CUDA extern void test_vector_cuda_func(void *buffers[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void test_vector_opencl_func(void *buffers[], void *args); #endif starpu_data_handle_t vector_handle; starpu_data_handle_t vector2_handle; #define VECTOR_SIZE 123 static int vector[VECTOR_SIZE]; static int vector2[VECTOR_SIZE]; struct test_config vector_config = { .cpu_func = test_vector_cpu_func, #ifdef STARPU_USE_CUDA .cuda_func = test_vector_cuda_func, #endif #ifdef STARPU_USE_OPENCL .opencl_func = test_vector_opencl_func, #endif .handle = &vector_handle, .ptr = vector, .dummy_handle = &vector2_handle, .dummy_ptr = vector2, .copy_failed = SUCCESS, .name = "vector_interface" }; static void register_data(void) { /* Initializing data */ int i; for (i = 0; i < VECTOR_SIZE; i++) vector[i] = i; /* Registering data */ starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, VECTOR_SIZE, sizeof(int)); starpu_vector_data_register(&vector2_handle, STARPU_MAIN_RAM, (uintptr_t)vector2, VECTOR_SIZE, sizeof(int)); } static void unregister_data(void) { starpu_data_unregister(vector_handle); starpu_data_unregister(vector2_handle); } void test_vector_cpu_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); int *val = (int *) STARPU_VECTOR_GET_PTR(buffers[0]); int factor = *(int*)args; unsigned int i; for (i = 0; i < n; i++) { if (val[i] != (int)i*factor) { vector_config.copy_failed = FAILURE; return; } val[i] = -val[i]; } } int main(int argc, char **argv) { struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); run_tests(&vector_config, &summary); unregister_data(); starpu_shutdown(); data_interface_test_summary_print(stderr, &summary); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/vector/vector_opencl.c000066400000000000000000000065701507764646700257470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #define KERNEL_LOCATION "tests/datawizard/interfaces/vector/vector_opencl_kernel.cl" extern struct test_config vector_config; static struct starpu_opencl_program opencl_program; void test_vector_opencl_func(void *buffers[], void *args) { STARPU_SKIP_IF_VALGRIND; int id, devid, ret; int factor = *(int *) args; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_event event; ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); cl_context context; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); starpu_opencl_get_context(devid, &context); cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &vector_config.copy_failed, &err); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "test_vector_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); int nargs; nargs = starpu_opencl_set_kernel_args(&err, &kernel, sizeof(val), &val, sizeof(n), &n, sizeof(fail), &fail, sizeof(factor), &factor, 0); if (nargs != 4) { fprintf(stderr, "Failed to set argument #%d\n", err); STARPU_OPENCL_REPORT_ERROR(err); } { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local = global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } err = clEnqueueReadBuffer(queue, fail, CL_TRUE, 0, sizeof(int), &vector_config.copy_failed, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); starpu_opencl_collect_stats(event); clReleaseEvent(event); starpu_opencl_release_kernel(kernel); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } starpu-1.4.9+dfsg/tests/datawizard/interfaces/vector/vector_opencl_kernel.cl000066400000000000000000000016601507764646700274560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void test_vector_opencl(__global int *val, unsigned int nx, __global int *err, int factor) { const int i = get_global_id(0); if (i >= nx) return; if (val[i] != i * factor) *err = 1; else val[i] = - val[i]; } starpu-1.4.9+dfsg/tests/datawizard/interfaces/void/000077500000000000000000000000001507764646700223705ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/datawizard/interfaces/void/void_interface.c000066400000000000000000000037321507764646700255220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../test_interfaces.h" #include "../../../helper.h" void fake_func(void *buffers[], void *arg) { (void) buffers; (void) arg; } static starpu_data_handle_t void_handle; static starpu_data_handle_t void2_handle; struct test_config void_config = { .cpu_func = fake_func, #ifdef STARPU_USE_CUDA .cuda_func = fake_func, #endif #ifdef STARPU_USE_OPENCL .opencl_func = fake_func, #endif .handle = &void_handle, .dummy_handle = &void2_handle, .copy_failed = SUCCESS, .name = "void_interface" }; static void register_data(void) { starpu_void_data_register(&void_handle); starpu_void_data_register(&void2_handle); } static void unregister_data(void) { starpu_data_unregister(void_handle); starpu_data_unregister(void2_handle); } int main(int argc, char **argv) { struct data_interface_test_summary summary; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncuda = 2; conf.nopencl = 1; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); register_data(); run_tests(&void_config, &summary); unregister_data(); starpu_shutdown(); data_interface_test_summary_print(stderr, &summary); return data_interface_test_summary_success(&summary); } starpu-1.4.9+dfsg/tests/datawizard/invalidate_pending_requests.c000066400000000000000000000032641507764646700252340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Try invalidating a variable which is pending a request */ #define SIZE (100<<20) int main(void) { int ret; char *var = NULL; starpu_data_handle_t handle; ret = starpu_init(NULL); if (ret == -ENODEV) goto skip; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) == 0 && starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER) == 0) goto enodev; var = malloc(SIZE); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)var, SIZE); /* Let a request fly */ starpu_fxt_trace_user_event_string("requesting"); starpu_data_fetch_on_node(handle, 1, 1); starpu_fxt_trace_user_event_string("requested"); /* But suddenly invalidate the data while it's on the fly! */ starpu_data_invalidate_submit(handle); starpu_fxt_trace_user_event_string("invalidated"); starpu_data_unregister(handle); free(var); starpu_shutdown(); return 0; enodev: starpu_shutdown(); skip: return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/lazy_allocation.c000066400000000000000000000133221507764646700226350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Trigger lazy allocation by registering NULL, then setting a value, and * then checking it */ #define VECTORSIZE 1024 static starpu_data_handle_t v_handle; /* * Memset */ #ifdef STARPU_USE_CUDA static void cuda_memset_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); cudaMemsetAsync(buf, 42, length, starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_USE_OPENCL static void opencl_memset_codelet(void *buffers[], void *args) { (void) args; cl_command_queue queue; int id = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(id); starpu_opencl_get_queue(devid, &queue); cl_mem buffer = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); unsigned length = STARPU_VECTOR_GET_NX(buffers[0]); char *v = malloc(length); STARPU_ASSERT(v != NULL); memset(v, 42, length); cl_int err; err = clEnqueueWriteBuffer(queue, buffer, CL_FALSE, 0, /* offset */ length, /* sizeof (char) */ v, 0, /* num_events_in_wait_list */ NULL, /* event_wait_list */ NULL /* event */); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } #endif void cpu_memset_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); memset(buf, 42, length * sizeof(*buf)); } static struct starpu_codelet memset_cl = { .cpu_funcs = {cpu_memset_codelet}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_memset_codelet}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_memset_codelet}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_memset_codelet"}, .nbuffers = 1, .modes = {STARPU_W} }; /* * Check content */ void cpu_check_content_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; for (i = 0; i < length; i++) { if (buf[i] != 42) { FPRINTF(stderr, "buf[%u] is %c while it should be %c\n", i, buf[i], 42); exit(-1); } } } #ifdef STARPU_USE_CUDA static void cuda_check_content_codelet(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned length = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; for (i = 0; i < length; i++) { char dst; cudaMemcpyAsync(&dst, &buf[i], sizeof(char), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); if (dst != 42) { FPRINTF(stderr, "buf[%u] is %c while it should be %c\n", i, dst, 42); exit(-1); } } } #endif #ifdef STARPU_USE_OPENCL static void opencl_check_content_codelet(void *buffers[], void *args) { (void)args; STARPU_SKIP_IF_VALGRIND; cl_command_queue queue; int id = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(id); starpu_opencl_get_queue(devid, &queue); cl_mem buf = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); unsigned length = STARPU_VECTOR_GET_NX(buffers[0]); unsigned i; for (i = 0; i < length; i++) { char dst; cl_int err; err = clEnqueueReadBuffer(queue, buf, CL_FALSE, i * sizeof(dst), sizeof(dst), &dst, 0, /* num_events_in_wait_list */ NULL, /* event_wait_list */ NULL /* event */); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); if (dst != 42) { FPRINTF(stderr, "buf[%u] is '%c' while it should be '%c'\n", i, dst, 42); exit(-1); } } } #endif static struct starpu_codelet check_content_cl = { .cpu_funcs = {cpu_check_content_codelet}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_check_content_codelet}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_check_content_codelet}, #endif .cpu_funcs_name = {"cpu_check_content_codelet"}, .nbuffers = 1, .modes = {STARPU_R} }; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char)); ret = starpu_task_insert(&memset_cl, STARPU_W, v_handle, 0); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); ret = starpu_task_insert(&check_content_cl, STARPU_R, v_handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(v_handle); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/datawizard/locality.c000066400000000000000000000100711507764646700212670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This is a dumb sample of stencil application * * Dumb domain split in N pieces: * * 0 | 1 | ... | N-1 * * for each simulation iteration, a task works on some adjacent pieces * * Locality is thus set on the central piece. */ #include #include "../helper.h" #ifdef STARPU_USE_FXT #define N 5 #define ITER 5 #else #define N 50 #define ITER 50 #endif int task_worker[N][ITER]; int worker_task[STARPU_NMAXWORKERS][N*ITER]; unsigned worker_ntask[STARPU_NMAXWORKERS]; void cpu_f(void *descr[], void *_args) { (void)descr; unsigned i, loop, worker = starpu_worker_get_id(); enum starpu_worker_archtype worker_type = starpu_worker_get_type(worker); starpu_codelet_unpack_args(_args, &loop, &i); task_worker[i][loop] = worker; worker_task[worker][worker_ntask[worker]++] = i; if (worker_type == STARPU_CPU_WORKER) starpu_sleep(0.001); else starpu_sleep(0.0001); } double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void) t; (void) i; STARPU_ASSERT(a->ndevices == 1); if (a->devices[0].type == STARPU_CPU_WORKER) { STARPU_ASSERT(a->devices[0].ncores == 1); return 1000; } else return 100; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl = { .cpu_funcs = { cpu_f }, .cpu_funcs_name = { "cpu_f" }, .cuda_funcs = { cpu_f }, .opencl_funcs = { cpu_f }, .nbuffers = 4, .modes = { STARPU_RW, STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, }, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model, }; int main(int argc, char *argv[]) { int ret; starpu_data_handle_t A[N]; starpu_data_handle_t B[N]; unsigned i, loop, finished; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Get most parallelism by using an arbiter */ starpu_arbiter_t arbiter = starpu_arbiter_create(); for (i = 0; i < N; i++) { starpu_void_data_register(&A[i]); starpu_void_data_register(&B[i]); starpu_data_assign_arbiter(A[i], arbiter); } for (loop = 0; loop < ITER; loop++) { for (i = 1; i < N-1; i++) { ret = starpu_task_insert(&cl, STARPU_RW, B[i], STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, A[i-1], STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, A[i], STARPU_RW | STARPU_COMMUTE | STARPU_LOCALITY, A[i+1], STARPU_VALUE, &loop, sizeof(loop), STARPU_VALUE, &i, sizeof(i), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } } starpu_task_wait_for_all(); for (i = 0; i < N; i++) { starpu_data_unregister(A[i]); starpu_data_unregister(B[i]); } starpu_arbiter_destroy(arbiter); printf("worker where each domain piece was computed, over time\n"); for (loop = 0; loop < ITER; loop++) { for (i = 1; i < N-1; i++) { printf("%02d ", task_worker[i][loop]); } printf("\n"); } printf("\n"); printf("domain piece that each worker has computed, over time\n"); loop = 0; do { finished = 1; for (i = 0; i < starpu_worker_get_count(); i++) { if (loop < worker_ntask[i]) { printf("%02d ", worker_task[i][loop]); finished = 0; } else printf(" "); } loop++; printf("\n"); } while (!finished && loop < 100); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/datawizard/locality.sh000077500000000000000000000030321507764646700214610ustar00rootroot00000000000000#!/bin/sh -x # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2018-2018 Federal University of Rio Grande do Sul (UFRGS) # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Test generation of FxT traces # Testing another specific scheduler, no need to run this [ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = modular-eager ] || exit 77 set -e PREFIX=$(dirname $0) rm -rf $PREFIX/locality.traces mkdir -p $PREFIX/locality.traces test -x $PREFIX/../../tools/starpu_fxt_tool || exit 77 export STARPU_FXT_PREFIX=$PREFIX/locality.traces STARPU_FXT_TRACE=1 STARPU_SCHED=modular-eager $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/locality $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -d $STARPU_FXT_PREFIX -memory-states -label-deps -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 # Check that they are approved by Grenoble :) if type pj_dump > /dev/null 2> /dev/null then $PREFIX/../../tools/starpu_paje_sort $STARPU_FXT_PREFIX/paje.trace pj_dump -e 0 $STARPU_FXT_PREFIX/paje.trace fi starpu-1.4.9+dfsg/tests/datawizard/manual_reduction.c000066400000000000000000000214511507764646700230040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Allocate one buffer per worker, doing computations with it, and * eventually reducing it into a single buffer */ #define INIT_VALUE 42 #define NTASKS 10000 static unsigned variable; static starpu_data_handle_t variable_handle; static uintptr_t per_worker[STARPU_NMAXWORKERS]; static starpu_data_handle_t per_worker_handle[STARPU_NMAXWORKERS]; static unsigned ndone; /* Create per-worker handles */ static void initialize_per_worker_handle(void *arg) { (void)arg; int workerid = starpu_worker_get_id_check(); /* Allocate memory on the worker, and initialize it to 0 */ switch (starpu_worker_get_type(workerid)) { case STARPU_CPU_WORKER: per_worker[workerid] = (uintptr_t)calloc(1, sizeof(variable)); break; #ifdef STARPU_USE_OPENCL case STARPU_OPENCL_WORKER: { cl_context context; cl_command_queue queue; cl_int err; starpu_opencl_get_current_context(&context); starpu_opencl_get_current_queue(&queue); cl_mem ptr = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(variable), NULL, NULL); /* Poor's man memset */ unsigned zero = 0; err = clEnqueueWriteBuffer(queue, ptr, CL_FALSE, 0, sizeof(variable), (void *)&zero, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); per_worker[workerid] = (uintptr_t)ptr; } break; #endif #ifdef STARPU_USE_CUDA case STARPU_CUDA_WORKER: { cudaError_t status; status = cudaMalloc((void **)&per_worker[workerid], sizeof(variable)); if (!per_worker[workerid] || (status != cudaSuccess)) { STARPU_CUDA_REPORT_ERROR(status); } status = cudaMemsetAsync((void *)per_worker[workerid], 0, sizeof(variable), starpu_cuda_get_local_stream()); if (!status) status = cudaStreamSynchronize(starpu_cuda_get_local_stream()); if (status) STARPU_CUDA_REPORT_ERROR(status); break; } #endif default: STARPU_ABORT(); break; } FPRINTF(stderr, "worker %d got data %lx\n", workerid, (unsigned long) per_worker[workerid]); STARPU_ASSERT(per_worker[workerid]); } /* * Implement reduction method */ void cpu_redux_func(void *descr[], void *cl_arg) { (void)cl_arg; unsigned *a = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned *b = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); FPRINTF(stderr, "%u = %u + %u\n", *a + *b, *a, *b); *a = *a + *b; } static struct starpu_codelet reduction_codelet = { .cpu_funcs = {cpu_redux_func}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_R}, .model = NULL }; /* * Use per-worker local copy */ void cpu_func_incr(void *descr[], void *cl_arg) { (void)cl_arg; unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); *val = *val + 1; STARPU_ATOMIC_ADD(&ndone, 1); } #ifdef STARPU_USE_CUDA /* dummy CUDA implementation */ static void cuda_func_incr(void *descr[], void *cl_arg) { (void)cl_arg; STARPU_SKIP_IF_VALGRIND; unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned h_val, h_val2; cudaError_t status; status = cudaMemcpyAsync(&h_val, val, sizeof(unsigned), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (status) STARPU_CUDA_REPORT_ERROR(status); status = cudaStreamSynchronize(starpu_cuda_get_local_stream()); if (status) STARPU_CUDA_REPORT_ERROR(status); h_val++; status = cudaMemcpyAsync(val, &h_val, sizeof(unsigned), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); if (status) STARPU_CUDA_REPORT_ERROR(status); status = cudaStreamSynchronize(starpu_cuda_get_local_stream()); if (status) STARPU_CUDA_REPORT_ERROR(status); status = cudaMemcpyAsync(&h_val2, val, sizeof(unsigned), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); if (status) STARPU_CUDA_REPORT_ERROR(status); status = cudaStreamSynchronize(starpu_cuda_get_local_stream()); if (status) STARPU_CUDA_REPORT_ERROR(status); STARPU_ASSERT_MSG(h_val2 == h_val, "%lx should be %u, not %u, I have just written it ?!\n", (unsigned long)(uintptr_t) val, h_val, h_val2); STARPU_ATOMIC_ADD(&ndone, 1); } #endif #ifdef STARPU_USE_OPENCL /* dummy OpenCL implementation */ static void opencl_func_incr(void *descr[], void *cl_arg) { (void)cl_arg; STARPU_SKIP_IF_VALGRIND; cl_mem d_val = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned h_val; cl_int err; cl_command_queue queue; starpu_opencl_get_current_queue(&queue); err = clEnqueueReadBuffer(queue, d_val, CL_FALSE, 0, sizeof(unsigned), (void *)&h_val, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); h_val++; err = clEnqueueWriteBuffer(queue, d_val, CL_FALSE, 0, sizeof(unsigned), (void *)&h_val, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); STARPU_ATOMIC_ADD(&ndone, 1); } #endif static struct starpu_codelet use_data_on_worker_codelet = { .cpu_funcs = {cpu_func_incr}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_func_incr}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_func_incr}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .model = NULL }; int main(int argc, char **argv) { unsigned worker; unsigned i; int ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.ncuda = -1; conf.nopencl = -1; variable = INIT_VALUE; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned nworkers = starpu_worker_get_count(); starpu_variable_data_register(&variable_handle, STARPU_MAIN_RAM, (uintptr_t)&variable, sizeof(unsigned)); /* Allocate a per-worker handle on each worker (and initialize it to 0) */ starpu_execute_on_each_worker(initialize_per_worker_handle, NULL, STARPU_CPU|STARPU_CUDA|STARPU_OPENCL); /* Register all per-worker handles */ for (worker = 0; worker < nworkers; worker++) { STARPU_ASSERT(per_worker[worker]); unsigned memory_node = starpu_worker_get_memory_node(worker); starpu_variable_data_register(&per_worker_handle[worker], memory_node, per_worker[worker], sizeof(variable)); } /* Submit NTASKS tasks to the different worker to simulate the usage of a data in reduction */ for (i = 0; i < NTASKS; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &use_data_on_worker_codelet; int workerid = (i % nworkers); task->handles[0] = per_worker_handle[workerid]; task->execute_on_a_specific_worker = 1; task->workerid = (unsigned)workerid; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Perform the reduction of all per-worker handles into the variable_handle */ for (worker = 0; worker < nworkers; worker++) { struct starpu_task *task = starpu_task_create(); task->cl = &reduction_codelet; task->handles[0] = variable_handle; task->handles[1] = per_worker_handle[worker]; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(variable_handle); /* Destroy all per-worker handles */ for (worker = 0; worker < nworkers; worker++) { starpu_data_unregister_no_coherency(per_worker_handle[worker]); switch(starpu_worker_get_type(worker)) { case STARPU_CPU_WORKER: free((void*)per_worker[worker]); break; #ifdef STARPU_USE_CUDA case STARPU_CUDA_WORKER: cudaFree((void*)per_worker[worker]); break; #endif /* !STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL case STARPU_OPENCL_WORKER: clReleaseMemObject((void*)per_worker[worker]); break; #endif /* !STARPU_USE_OPENCL */ default: STARPU_ABORT(); } } starpu_shutdown(); if (variable == INIT_VALUE + NTASKS) ret = EXIT_SUCCESS; else { FPRINTF(stderr, "%u != %d + %d\n", variable, INIT_VALUE, NTASKS); FPRINTF(stderr, "ndone: %u\n", ndone); ret = EXIT_FAILURE; } STARPU_RETURN(ret); enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); starpu_task_wait_for_all(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/mpi_like.c000066400000000000000000000134071507764646700212460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" #include "../variable/increment.h" /* * Mimic the behavior of libstarpumpi, tested by a ring of threads which * increment the same variable one after the other. * This is the synchronous version: the threads wait for completion of each * step before continuing. */ #define NTHREADS 4 #define NITER 2 //static starpu_pthread_cond_t cond; //static starpu_pthread_mutex_t mutex; struct thread_data { unsigned index; unsigned val; starpu_data_handle_t handle; starpu_pthread_t thread; starpu_pthread_cond_t recv_cond; starpu_pthread_mutex_t recv_mutex; unsigned recv_flag; // set when a message is received unsigned recv_buf; struct thread_data *neighbour; }; static struct thread_data problem_data[NTHREADS]; /* We implement some ring transfer, every thread will try to receive a piece of * data from its neighbour and increment it before transmitting it to its * successor. */ static void increment_handle(struct thread_data *thread_data) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = thread_data->handle; task->cl_arg = thread_data; task->cl_arg_size = sizeof(thread_data); task->destroy = 1; task->detach = 0; int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_ASSERT(!ret); ret = starpu_task_wait(task); STARPU_ASSERT(!ret); } static void recv_handle(struct thread_data *thread_data) { starpu_data_acquire(thread_data->handle, STARPU_W); STARPU_PTHREAD_MUTEX_LOCK(&thread_data->recv_mutex); /* We wait for the previous thread to notify that the data is available */ while (!thread_data->recv_flag) STARPU_PTHREAD_COND_WAIT(&thread_data->recv_cond, &thread_data->recv_mutex); /* We overwrite thread's data with the received value */ thread_data->val = thread_data->recv_buf; /* Notify that we read the value */ thread_data->recv_flag = 0; STARPU_PTHREAD_COND_SIGNAL(&thread_data->recv_cond); // FPRINTF(stderr, "Thread %d received value %d from thread %d\n", thread_data->index, thread_data->val, (thread_data->index - 1)%NTHREADS); STARPU_PTHREAD_MUTEX_UNLOCK(&thread_data->recv_mutex); starpu_data_release(thread_data->handle); } static void send_handle(struct thread_data *thread_data) { struct thread_data *neighbour_data = thread_data->neighbour; starpu_data_acquire(thread_data->handle, STARPU_R); // FPRINTF(stderr, "Thread %d sends value %d to thread %d\n", thread_data->index, thread_data->val, neighbour_data->index); /* send the message */ STARPU_PTHREAD_MUTEX_LOCK(&neighbour_data->recv_mutex); neighbour_data->recv_buf = thread_data->val; neighbour_data->recv_flag = 1; STARPU_PTHREAD_COND_SIGNAL(&neighbour_data->recv_cond); /* wait until it's received (ie. neighbour's recv_flag is set back to 0) */ while (neighbour_data->recv_flag) STARPU_PTHREAD_COND_WAIT(&neighbour_data->recv_cond, &neighbour_data->recv_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&neighbour_data->recv_mutex); starpu_data_release(thread_data->handle); } static void *thread_func(void *arg) { unsigned iter; struct thread_data *thread_data = (struct thread_data *) arg; unsigned index = thread_data->index; starpu_variable_data_register(&thread_data->handle, STARPU_MAIN_RAM, (uintptr_t)&thread_data->val, sizeof(unsigned)); for (iter = 0; iter < NITER; iter++) { /* The first thread initiates the first transfer */ if (!((index == 0) && (iter == 0))) { recv_handle(thread_data); } increment_handle(thread_data); if (!((index == (NTHREADS - 1)) && (iter == (NITER - 1)))) { send_handle(thread_data); } } return NULL; } int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); unsigned t; for (t = 0; t < NTHREADS; t++) { problem_data[t].index = t; problem_data[t].val = 0; STARPU_PTHREAD_COND_INIT(&problem_data[t].recv_cond, NULL); STARPU_PTHREAD_MUTEX_INIT(&problem_data[t].recv_mutex, NULL); problem_data[t].recv_flag = 0; problem_data[t].neighbour = &problem_data[(t+1)%NTHREADS]; } for (t = 0; t < NTHREADS; t++) { STARPU_PTHREAD_CREATE(&problem_data[t].thread, NULL, thread_func, &problem_data[t]); } for (t = 0; t < NTHREADS; t++) { void *retval; STARPU_PTHREAD_JOIN(problem_data[t].thread, &retval); STARPU_ASSERT(retval == NULL); } /* We check that the value in the "last" thread is valid */ starpu_data_handle_t last_handle = problem_data[NTHREADS - 1].handle; starpu_data_acquire(last_handle, STARPU_R); starpu_data_release(last_handle); for (t = 0; t < NTHREADS; t++) { starpu_data_unregister(problem_data[t].handle); } increment_unload_opencl(); starpu_shutdown(); ret = EXIT_SUCCESS; if (problem_data[NTHREADS - 1].val != (NTHREADS * NITER)) { FPRINTF(stderr, "Final value : %u should be %d\n", problem_data[NTHREADS - 1].val, (NTHREADS * NITER)); ret = EXIT_FAILURE; } else FPRINTF(stderr, "Final value : %u (niter %u nthread %u)\n", problem_data[NTHREADS - 1].val, NITER, NTHREADS); return ret; } starpu-1.4.9+dfsg/tests/datawizard/mpi_like_async.c000066400000000000000000000217221507764646700224420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include #include "../variable/increment.h" /* * Mimic the behavior of libstarpumpi, tested by a ring of threads which * increment the same variable one after the other. * This is the asynchronous version: the threads submit the series of * synchronizations and tasks. */ #ifdef STARPU_QUICK_CHECK # define NTHREADS_DEFAULT 4 # define NITER_DEFAULT 8 #else # define NTHREADS_DEFAULT 16 # define NITER_DEFAULT 128 #endif static unsigned nthreads = NTHREADS_DEFAULT; static unsigned niter = NITER_DEFAULT; //#define DEBUG_MESSAGES 1 //static starpu_pthread_cond_t cond; //static starpu_pthread_mutex_t mutex; struct thread_data { unsigned index; unsigned val; starpu_data_handle_t handle; starpu_pthread_t thread; starpu_pthread_mutex_t recv_mutex; unsigned recv_flag; // set when a message is received unsigned recv_buf; struct thread_data *neighbour; }; struct data_req { int (*test_func)(void *); void *test_arg; struct data_req *next; }; static starpu_pthread_mutex_t data_req_mutex; static starpu_pthread_cond_t data_req_cond; struct data_req *data_req_list; unsigned progress_thread_running; static struct thread_data problem_data[NTHREADS_DEFAULT]; /* We implement some ring transfer, every thread will try to receive a piece of * data from its neighbour and increment it before transmitting it to its * successor. */ static void increment_handle_async(struct thread_data *thread_data) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = thread_data->handle; task->detach = 1; task->destroy = 1; int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_ASSERT(!ret); } static int test_recv_handle_async(void *arg) { // FPRINTF(stderr, "test_recv_handle_async\n"); int ret; struct thread_data *thread_data = (struct thread_data *) arg; STARPU_PTHREAD_MUTEX_LOCK(&thread_data->recv_mutex); ret = (thread_data->recv_flag == 1); if (ret) { thread_data->recv_flag = 0; thread_data->val = thread_data->recv_buf; } STARPU_PTHREAD_MUTEX_UNLOCK(&thread_data->recv_mutex); if (ret) { #ifdef DEBUG_MESSAGES FPRINTF(stderr, "Thread %u received value %u from thread %d\n", thread_data->index, thread_data->val, (thread_data->index - 1)%nthreads); #endif starpu_data_release(thread_data->handle); } return ret; } static void recv_handle_async(void *_thread_data) { struct thread_data *thread_data = (struct thread_data *) _thread_data; struct data_req *req = (struct data_req *) malloc(sizeof(struct data_req)); req->test_func = test_recv_handle_async; req->test_arg = thread_data; STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); req->next = data_req_list; data_req_list = req; STARPU_PTHREAD_COND_SIGNAL(&data_req_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); } static int test_send_handle_async(void *arg) { int ret; struct thread_data *thread_data = (struct thread_data *) arg; struct thread_data *neighbour_data = thread_data->neighbour; STARPU_PTHREAD_MUTEX_LOCK(&neighbour_data->recv_mutex); ret = (neighbour_data->recv_flag == 0); STARPU_PTHREAD_MUTEX_UNLOCK(&neighbour_data->recv_mutex); if (ret) { #ifdef DEBUG_MESSAGES FPRINTF(stderr, "Thread %u sends value %u to thread %u\n", thread_data->index, thread_data->val, neighbour_data->index); #endif starpu_data_release(thread_data->handle); } return ret; } static void send_handle_async(void *_thread_data) { struct thread_data *thread_data = (struct thread_data *) _thread_data; struct thread_data *neighbour_data = thread_data->neighbour; // FPRINTF(stderr, "send_handle_async\n"); /* send the message */ STARPU_PTHREAD_MUTEX_LOCK(&neighbour_data->recv_mutex); neighbour_data->recv_buf = thread_data->val; neighbour_data->recv_flag = 1; STARPU_PTHREAD_MUTEX_UNLOCK(&neighbour_data->recv_mutex); struct data_req *req = (struct data_req *) malloc(sizeof(struct data_req)); req->test_func = test_send_handle_async; req->test_arg = thread_data; STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); req->next = data_req_list; data_req_list = req; STARPU_PTHREAD_COND_SIGNAL(&data_req_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); } static void *progress_func(void *arg) { (void)arg; STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); progress_thread_running = 1; STARPU_PTHREAD_COND_SIGNAL(&data_req_cond); while (progress_thread_running || data_req_list) { struct data_req *req; if (data_req_list == NULL) STARPU_PTHREAD_COND_WAIT(&data_req_cond, &data_req_mutex); req = data_req_list; if (req) { data_req_list = req->next; req->next = NULL; STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); STARPU_VALGRIND_YIELD(); int ret = req->test_func(req->test_arg); if (ret) { free(req); STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); } else { /* ret = 0 : the request is not finished, we put it back at the end of the list */ STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); struct data_req *req_aux = data_req_list; if (!req_aux) { /* The list is empty */ data_req_list = req; } else { while (req_aux) { if (req_aux->next == NULL) { req_aux->next = req; break; } req_aux = req_aux->next; } } } } } STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); return NULL; } static void *thread_func(void *arg) { unsigned iter; struct thread_data *thread_data = (struct thread_data *) arg; unsigned index = thread_data->index; int ret; starpu_variable_data_register(&thread_data->handle, STARPU_MAIN_RAM, (uintptr_t)&thread_data->val, sizeof(unsigned)); for (iter = 0; iter < niter; iter++) { /* The first thread initiates the first transfer */ if (!((index == 0) && (iter == 0))) { starpu_data_acquire_cb( thread_data->handle, STARPU_W, recv_handle_async, thread_data ); } increment_handle_async(thread_data); if (!((index == (nthreads - 1)) && (iter == (niter - 1)))) { starpu_data_acquire_cb( thread_data->handle, STARPU_R, send_handle_async, thread_data ); } } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); return NULL; } int main(int argc, char **argv) { int ret; void *retval; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); /* Create a thread to perform blocking calls */ starpu_pthread_t progress_thread; STARPU_PTHREAD_MUTEX_INIT(&data_req_mutex, NULL); STARPU_PTHREAD_COND_INIT(&data_req_cond, NULL); data_req_list = NULL; progress_thread_running = 0; unsigned t; for (t = 0; t < nthreads; t++) { problem_data[t].index = t; problem_data[t].val = 0; STARPU_PTHREAD_MUTEX_INIT(&problem_data[t].recv_mutex, NULL); problem_data[t].recv_flag = 0; problem_data[t].neighbour = &problem_data[(t+1)%nthreads]; } STARPU_PTHREAD_CREATE(&progress_thread, NULL, progress_func, NULL); STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); while (!progress_thread_running) STARPU_PTHREAD_COND_WAIT(&data_req_cond, &data_req_mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); for (t = 0; t < nthreads; t++) { STARPU_PTHREAD_CREATE(&problem_data[t].thread, NULL, thread_func, &problem_data[t]); } for (t = 0; t < nthreads; t++) { STARPU_PTHREAD_JOIN(problem_data[t].thread, &retval); STARPU_ASSERT(retval == NULL); } STARPU_PTHREAD_MUTEX_LOCK(&data_req_mutex); progress_thread_running = 0; STARPU_PTHREAD_COND_SIGNAL(&data_req_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&data_req_mutex); STARPU_PTHREAD_JOIN(progress_thread, &retval); STARPU_ASSERT(retval == NULL); /* We check that the value in the "last" thread is valid */ starpu_data_handle_t last_handle = problem_data[nthreads - 1].handle; starpu_data_acquire(last_handle, STARPU_R); ret = EXIT_SUCCESS; if (problem_data[nthreads - 1].val != (nthreads * niter)) { FPRINTF(stderr, "Final value : %u should be %u\n", problem_data[nthreads - 1].val, (nthreads * niter)); ret = EXIT_FAILURE; } else FPRINTF(stderr, "Final value : %u (niter %u nthread %u)\n", problem_data[nthreads - 1].val, niter, nthreads); starpu_data_release(last_handle); for (t = 0; t < nthreads; t++) { starpu_data_unregister(problem_data[t].handle); } increment_load_opencl(); starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/tests/datawizard/no_unregister.c000066400000000000000000000044221507764646700223350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Check that not unregistering a data is not too crashy */ void dummy_func(void ** buffers, void * args) { (void) buffers; (void) args; } static struct starpu_codelet dummy_cl = { .modes = { STARPU_RW }, .cpu_funcs = { dummy_func }, .cpu_funcs_name = { "dummy_func" }, .nbuffers = 1 }; int main(void) { int ret; int buffer[1024]; starpu_data_handle_t handle; struct starpu_task *t1,*t2; struct starpu_conf conf; #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) return STARPU_TEST_SKIPPED; #endif #ifdef STARPU_SANITIZE_LEAK return STARPU_TEST_SKIPPED; #endif starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)buffer, 1024*sizeof(int)); t1 = starpu_task_create(); t2 = starpu_task_create(); t2->cl = &dummy_cl; t2->detach = 0; t2->handles[0] = handle; starpu_task_declare_deps_array(t2, 1, &t1); ret = starpu_task_submit(t2); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(t1); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(t2); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/datawizard/noreclaim.c000066400000000000000000000070331507764646700214240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * Stress the memory allocation system and force StarPU to reclaim memory from * time to time. */ #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else void dummy_func(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet dummy_cl = { .cpu_funcs = {dummy_func}, .cpu_funcs_name = {"dummy_func"}, .nbuffers = 1, .modes = {STARPU_W} }; static void emit_task(starpu_data_handle_t handle) { struct starpu_task *task = starpu_task_create(); int ret; task->cl = &dummy_cl; task->handles[0] = handle; ret = starpu_task_submit(task); STARPU_ASSERT(ret == 0); } static struct starpu_codelet empty_cl = { .cpu_funcs = {dummy_func}, .cpu_funcs_name = {"dummy_func"}, .nbuffers = 0, }; static void emit_empty_task(void) { struct starpu_task *task = starpu_task_create(); int ret; task->cl = &empty_cl; ret = starpu_task_submit(task); STARPU_ASSERT(ret == 0); } #define TOTAL "100" #define FILL (99*1024*1024) int main(int argc, char **argv) { int ret; struct starpu_conf conf; starpu_data_handle_t handle; void *allocated; setenv("STARPU_LIMIT_CPU_NUMA_MEM", TOTAL, 1); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = 1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu_variable_data_register(&handle, -1, 0, FILL); /* This makes the data allocated */ emit_task(handle); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); ret = starpu_malloc_flags(&allocated, FILL, STARPU_MALLOC_COUNT); /* Room should be busy due to data */ STARPU_ASSERT(ret == -ENOMEM); ret = starpu_malloc_flags(&allocated, FILL, STARPU_MALLOC_COUNT|STARPU_MALLOC_NORECLAIM); /* But we should be able to tell we don't care */ STARPU_ASSERT(ret == 0); ((char*)allocated)[FILL-1] = 0; starpu_free_flags(allocated, FILL, STARPU_MALLOC_COUNT); /* Release the automatically allocated data */ starpu_data_unregister(handle); /* Memory may not be available immediately, make sure the driver has * the opportunity to release it */ emit_empty_task(); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); emit_empty_task(); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); ret = starpu_malloc_flags(&allocated, FILL, STARPU_MALLOC_COUNT); /* Room should now be available */ STARPU_ASSERT(ret == 0); starpu_free_flags(allocated, FILL, STARPU_MALLOC_COUNT); starpu_shutdown(); return EXIT_SUCCESS; } #endif starpu-1.4.9+dfsg/tests/datawizard/nowhere.c000066400000000000000000000073261507764646700211270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Try the NOWHERE flag */ static int x, y; static void prod(void *descr[], void *arg) { (void)arg; int *v = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); *v = 1; } static struct starpu_codelet cl_prod = { .cpu_funcs = { prod }, .nbuffers = 1, .modes = { STARPU_W }, }; static void callback0(void *callback_arg) { (void)callback_arg; STARPU_ASSERT(x==0); STARPU_ASSERT(y==0); } static void callback(void *callback_arg) { (void)callback_arg; STARPU_ASSERT(x>=1); STARPU_ASSERT(y>=1); } static struct starpu_codelet cl_nowhere = { .where = STARPU_NOWHERE, .nbuffers = 2, .modes = { STARPU_R, STARPU_R }, }; static void cons(void *descr[], void *_args) { (void)_args; int *v = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); STARPU_ASSERT(*v == 1); *v = 2; } static struct starpu_codelet cl_cons = { .cpu_funcs = { cons }, .nbuffers = 1, .modes = { STARPU_RW }, }; int main(int argc, char **argv) { starpu_data_handle_t handle_x, handle_y; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_memory_nodes_get_numa_count() > 1) { /* FIXME: assumes only one RAM node */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu_variable_data_register(&handle_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); starpu_variable_data_register(&handle_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y)); ret = starpu_task_insert(&cl_nowhere, STARPU_R, handle_x, STARPU_R, handle_y, STARPU_CALLBACK, callback0, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_insert(&cl_prod, STARPU_W, handle_x, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_insert(&cl_prod, STARPU_W, handle_y, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_insert(&cl_nowhere, STARPU_R, handle_x, STARPU_R, handle_y, STARPU_CALLBACK, callback, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_insert(&cl_cons, STARPU_RW, handle_x, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_insert(&cl_cons, STARPU_RW, handle_y, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(handle_x); starpu_data_unregister(handle_y); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(handle_x); starpu_data_unregister(handle_y); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/numa_overflow.c000066400000000000000000000066751507764646700223510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else #ifdef STARPU_SANITIZE_ADDRESS #define ITER 2 #define N 4 #else #define ITER 10 #define N 10 #endif #define SIZE (10*1024*1024) /* * Check that when overflowing a NUMA node we manage to revert to other nodes. */ static void nop(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet cl_r = { .cpu_funcs = { nop }, .nbuffers = 1, .modes = { STARPU_R }, }; static struct starpu_codelet cl_rw = { .cpu_funcs = { nop }, .nbuffers = 1, .modes = { STARPU_RW }, }; int main(int argc, char **argv) { starpu_data_handle_t handles[N]; uintptr_t data[N]; int ret; unsigned i, j; char s[16]; int worker; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; snprintf(s, sizeof(s), "%u", (N*3/4)*SIZE/(1024*1024)); /* We make NUMA nodes not big enough for all data */ setenv("STARPU_LIMIT_CPU_NUMA_MEM", s, 1); ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, &worker, 1) == 0 || starpu_memory_nodes_get_numa_count() <= 1) { /* We need several NUMA nodes */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } /* We distribute some data on both NUMA nodes */ for (i = 0; i < N; i++) { data[i] = starpu_malloc_on_node(i%2, SIZE); memset((void*) data[i], 0, SIZE); starpu_variable_data_register(&handles[i], i%2, data[i], SIZE); } /* And now we try to execute all tasks on worker 0, that will fail if * StarPU doesn't manage to evict some memory */ for (j = 0; j < ITER; j++) for (i = 0; i < N; i++) { if (rand() % 2 == 0) ret = starpu_task_insert(&cl_r, STARPU_R, handles[i], STARPU_EXECUTE_ON_WORKER, worker, 0); else ret = starpu_task_insert(&cl_rw, STARPU_RW, handles[i], STARPU_EXECUTE_ON_WORKER, worker, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (i = 0; i < N; i++) { starpu_data_unregister(handles[i]); starpu_free_on_node(i%2, data[i], SIZE); } starpu_shutdown(); return EXIT_SUCCESS; enodev: for (i = 0; i < N; i++) { starpu_data_unregister(handles[i]); starpu_free_on_node(i%2, data[i], SIZE); } fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } #endif starpu-1.4.9+dfsg/tests/datawizard/partition_dep.c000066400000000000000000000056131507764646700223160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "scal.h" /* * Test scaling a partitioned vector */ int main(int argc, char **argv) { unsigned *foo; starpu_data_handle_t handle; int ret; unsigned n, i, size; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif n = starpu_worker_get_count(); if (n == 1) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } size = 10 * n; foo = (unsigned *) calloc(size, sizeof(*foo)); for (i = 0; i < size; i++) foo[i] = i; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo)); ret = starpu_task_insert(&scal_codelet, STARPU_RW, handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = n, }; starpu_data_partition(handle, &f); for (i = 0; i < f.nchildren; i++) { struct starpu_task *task = starpu_task_create(); task->handles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl = &scal_codelet; task->execute_on_a_specific_worker = 1; task->workerid = i; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_shutdown(); ret = EXIT_SUCCESS; for (i = 0; i < size; i++) { if (foo[i] != i*2*2) { FPRINTF(stderr,"value %u is %u instead of %u\n", i, foo[i], 2*i); ret = EXIT_FAILURE; } } free(foo); return ret; enodev: starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/partition_init.c000066400000000000000000000054471507764646700225160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" void my_func(void *buffers[], void *cl_arg) { (void)cl_arg; unsigned nb = STARPU_VECTOR_GET_NX(buffers[0]); int *v = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned i; for(i=0 ; i #include "../helper.h" #include "scal.h" /* * Test partitioning an uninitialized vector */ struct starpu_codelet mycodelet = { .cpu_funcs = { scal_func_cpu }, #ifdef STARPU_USE_OPENCL .opencl_funcs = { scal_func_opencl }, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif #ifdef STARPU_USE_CUDA .cuda_funcs = { scal_func_cuda }, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .cpu_funcs_name = {"scal_func_cpu"}, .modes = { STARPU_W }, .model = NULL, .nbuffers = 1 }; int main(int argc, char **argv) { unsigned *foo; starpu_data_handle_t handle; int ret; int n, size; unsigned i; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif n = starpu_worker_get_count(); size = 10 * n; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(*foo)); struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = n > 1 ? n : 2, }; starpu_data_partition(handle, &f); for (i = 0; i < f.nchildren; i++) { ret = starpu_task_insert(&mycodelet, STARPU_W, starpu_data_get_sub_data(handle, 1, i), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: for (i = 0; i < f.nchildren; i++) { starpu_data_handle_t h = starpu_data_get_sub_data(handle, 1, i); starpu_data_acquire(h, STARPU_W); starpu_data_release(h); } starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/partition_wontuse.c000066400000000000000000000026121507764646700232460ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" int main(int argc, char **argv) { int ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = 2 }; int v[10]; memset(v, 0, 10*sizeof(int)); starpu_data_handle_t array_handle; starpu_vector_data_register(&array_handle, STARPU_MAIN_RAM, (uintptr_t)&v, 10, sizeof(int)); starpu_data_partition(array_handle, &f); starpu_data_wont_use(array_handle); starpu_data_unpartition(array_handle, STARPU_MAIN_RAM); starpu_data_unregister(array_handle); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/tests/datawizard/partitioned_acquire.c000066400000000000000000000051551507764646700235110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #define SIZE (1<<20) #define NPARTS 16 /* * Test asynchronous partitioning on a temporary data. */ static void codelet(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet clw = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_W} }; static struct starpu_codelet clr = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; starpu_data_handle_t handle, handles[NPARTS]; int i; char d[SIZE]; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); memset(d, 0, SIZE*sizeof(char)); starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) &d, SIZE, sizeof(char)); /* Fork */ struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = NPARTS }; starpu_data_partition_plan(handle, &f, handles); /* Read in parallel */ for (i = 0; i < NPARTS; i++) { starpu_data_acquire(handles[i], STARPU_R); } /* Release in parallel */ for (i = 0; i < NPARTS; i++) { starpu_data_release(handles[i]); } starpu_data_invalidate(handle); /* Acquire in parallel */ for (i = 0; i < NPARTS; i++) { starpu_data_acquire(handles[i], STARPU_W); } /* Release in parallel */ for (i = 0; i < NPARTS; i++) { starpu_data_release(handles[i]); } starpu_data_acquire(handle, STARPU_R); starpu_data_release(handle); /* Read result */ ret = starpu_task_insert(&clr, STARPU_R, handle, 0); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* otherwise let's say nevermind */ /* Clean */ starpu_data_partition_clean(handle, NPARTS, handles); starpu_data_unregister(handle); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/tests/datawizard/partitioned_initialization.c000066400000000000000000000053231507764646700251040ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #define SIZE (1<<20) #define NPARTS 16 /* * Test asynchronous partitioning on a temporary data. */ static void codelet(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet clw = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_W} }; static struct starpu_codelet clr = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; starpu_data_handle_t handle, handles[NPARTS]; int i; char d[SIZE]; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); memset(d, 0, SIZE*sizeof(char)); starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t) &d, SIZE, sizeof(char)); starpu_data_invalidate(handle); /* Fork */ struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = NPARTS }; starpu_data_partition_plan(handle, &f, handles); /* Process in parallel */ for (i = 0; i < NPARTS; i++) { ret = starpu_task_insert(&clw, STARPU_W, handles[i], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_data_acquire(handle, STARPU_R); starpu_data_release(handle); /* Read result */ ret = starpu_task_insert(&clr, STARPU_R, handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Clean */ starpu_data_partition_clean(handle, NPARTS, handles); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: starpu_data_partition_clean(handle, NPARTS, handles); starpu_data_unregister(handle); starpu_shutdown(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ fprintf(stderr, "WARNING: No one can execute this task\n"); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/readers_and_writers.c000066400000000000000000000053151507764646700235020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Try mixing readers and writers on the same variable */ static unsigned book = 0; static starpu_data_handle_t book_handle; void dummy_kernel(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet r_cl = { .cuda_funcs = {dummy_kernel}, .cpu_funcs = {dummy_kernel}, .opencl_funcs = {dummy_kernel}, .cpu_funcs_name = {"dummy_kernel"}, .nbuffers = 1, .modes = {STARPU_R} }; static struct starpu_codelet w_cl = { .cuda_funcs = {dummy_kernel}, .cpu_funcs = {dummy_kernel}, .opencl_funcs = {dummy_kernel}, .cpu_funcs_name = {"dummy_kernel"}, .nbuffers = 1, .modes = {STARPU_W} }; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* initialize the resource */ starpu_vector_data_register(&book_handle, STARPU_MAIN_RAM, (uintptr_t)&book, 1, sizeof(unsigned)); #ifdef STARPU_QUICK_CHECK unsigned ntasks = 16; #else unsigned ntasks = 16*1024; #endif unsigned t; for (t = 0; t < ntasks; t++) { struct starpu_task *task = starpu_task_create(); task->handles[0] = book_handle; /* we randomly select either a reader or a writer (give 10 * times more chances to be a reader) */ enum starpu_data_access_mode mode = ((rand() % 10)==0)?STARPU_W:STARPU_R; if (mode == STARPU_W) task->cl = &w_cl; else task->cl = &r_cl; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(book_handle); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(book_handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/readonly.c000066400000000000000000000036031507764646700212670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Try accessing a variable in read-only mode */ #ifdef STARPU_USE_OPENCL static void codelet(void *descr[], void *_args) { (void)descr; (void)_args; FPRINTF(stderr, "codelet\n"); } #endif static struct starpu_codelet cl = { #ifdef STARPU_USE_OPENCL .opencl_funcs = {codelet}, #endif .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; int var = 42; starpu_data_handle_t handle; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int copy = starpu_asynchronous_copy_disabled(); FPRINTF(stderr, "copy %d\n", copy); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); ret = starpu_task_insert(&cl, STARPU_R, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: starpu_data_unregister(handle); starpu_shutdown(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/reclaim.c000066400000000000000000000114361507764646700210710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #ifdef STARPU_HAVE_HWLOC #include #endif #include "../helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else /* * Stress the memory allocation system and force StarPU to reclaim memory from * time to time. */ #ifdef STARPU_QUICK_CHECK # define BLOCK_SIZE (64*1024) static unsigned ntasks = 250; #else # define BLOCK_SIZE (64*1024*1024) static unsigned ntasks = 1000; #endif #ifdef STARPU_HAVE_HWLOC static uint64_t get_total_memory_size(void) { uint64_t size; hwloc_topology_t hwtopology; int err; err = hwloc_topology_init(&hwtopology); STARPU_ASSERT_MSG(err == 0, "Could not initialize Hwloc topology (%s)\n", strerror(errno)); err = hwloc_topology_load(hwtopology); STARPU_ASSERT_MSG(err == 0, "Could not load Hwloc topology (%s)\n", strerror(errno)); hwloc_obj_t root = hwloc_get_root_obj(hwtopology); #if HWLOC_API_VERSION >= 0x00020000 size = root->total_memory; #else size = root->memory.total_memory; #endif hwloc_topology_destroy(hwtopology); return size; } #endif void dummy_func(void *descr[], void *_args) { } static unsigned int i = 0; void func(void *arg) { printf("%u\n", ++i); } static struct starpu_codelet dummy_cl = { .cpu_funcs = {dummy_func}, .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .cpu_funcs_name = {"dummy_func"}, .nbuffers = 3, .modes = {STARPU_RW, STARPU_R, STARPU_R} }; /* Number of chunks */ static unsigned mb = 16; int main(int argc, char **argv) { unsigned j, taskid; int ret; #ifdef STARPU_HAVE_HWLOC /* We allocate 50% of the memory */ uint64_t total_size = get_total_memory_size(); /* On x86_64-freebsd8.2, hwloc 1.3 returns 0 as the total memory * size, so sanity-check what we have. */ if (total_size > 0) mb = (int)((0.50 * total_size)/(BLOCK_SIZE)); #endif setenv("STARPU_LIMIT_OPENCL_MEM", "1000", 1); ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* An optional argument indicates the number of MB to allocate */ if (argc > 1) mb = atoi(argv[1]); if (2*mb > ntasks) ntasks = 2*mb; #ifdef STARPU_QUICK_CHECK mb /= 100; if (mb == 0) mb = 1; #endif FPRINTF(stderr, "Allocate %u buffers of size %d and create %u tasks\n", mb, BLOCK_SIZE, ntasks); float **host_ptr_array; starpu_data_handle_t *handle_array; host_ptr_array = calloc(mb, sizeof(float *)); STARPU_ASSERT(host_ptr_array); handle_array = calloc(mb, sizeof(starpu_data_handle_t)); STARPU_ASSERT(handle_array); /* Register mb buffers of 1MB */ for (j = 0; j < mb; j++) { size_t size = starpu_lrand48()%BLOCK_SIZE + 1; host_ptr_array[j] = calloc(size, 1); if (host_ptr_array[j] == NULL) { mb = j; FPRINTF(stderr, "Cannot allocate more than %u buffers\n", mb); break; } starpu_variable_data_register(&handle_array[j], STARPU_MAIN_RAM, (uintptr_t)host_ptr_array[j], size); STARPU_ASSERT(handle_array[j]); } for (taskid = 0; taskid < ntasks; taskid++) { struct starpu_task *task = starpu_task_create(); task->cl = &dummy_cl; task->handles[0] = handle_array[taskid%mb]; task->handles[1] = handle_array[(taskid+1)%mb]; task->handles[2] = handle_array[(taskid+2)%mb]; task->callback_func = func; task->callback_arg = NULL; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (j = 0; j < mb; j++) { if (j%20 == 0) starpu_data_unregister_submit(handle_array[j]); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); for (j = 0; j < mb; j++) { if (j%20 != 0) starpu_data_unregister(handle_array[j]); free(host_ptr_array[j]); } free(host_ptr_array); free(handle_array); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } #endif starpu-1.4.9+dfsg/tests/datawizard/redux_acquire.c000066400000000000000000000052671507764646700223220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" void init_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; long int *dot = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); *dot = 42; } void redux_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; long int *dota = (long int *)STARPU_VARIABLE_GET_PTR(descr[0]); long int *dotb = (long int *)STARPU_VARIABLE_GET_PTR(descr[1]); *dota = *dota + *dotb; } static struct starpu_codelet init_codelet = { .cpu_funcs = {init_cpu_func}, .nbuffers = 1, .modes = {STARPU_W}, .name = "init_codelet" }; static struct starpu_codelet redux_codelet = { .cpu_funcs = {redux_cpu_func}, .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .nbuffers = 2, .name = "redux_codelet" }; static void check_dot(void *dot_handle) { long int *x = starpu_data_get_local_ptr(dot_handle); STARPU_ASSERT_MSG(*x == 42, "Incorrect value %ld", *x); starpu_data_release(dot_handle); } int main(void) { starpu_data_handle_t dot_handle; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; int ret = starpu_init(&conf); if (ret == -ENODEV) goto skip; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() == 0) goto enodev; starpu_variable_data_register(&dot_handle, -1, (uintptr_t)NULL, sizeof(long int)); starpu_data_set_reduction_methods(dot_handle, &redux_codelet, &init_codelet); starpu_data_acquire(dot_handle, STARPU_R); long int *x = starpu_data_get_local_ptr(dot_handle); STARPU_ASSERT_MSG(*x == 42, "Incorrect value %ld", *x); starpu_data_release(dot_handle); starpu_data_unregister(dot_handle); starpu_variable_data_register(&dot_handle, -1, (uintptr_t)NULL, sizeof(long int)); starpu_data_set_reduction_methods(dot_handle, &redux_codelet, &init_codelet); starpu_data_acquire_cb(dot_handle, STARPU_R, check_dot, dot_handle); starpu_data_unregister(dot_handle); starpu_shutdown(); return 0; enodev: starpu_shutdown(); skip: return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/scal.c000066400000000000000000000057511507764646700204020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "scal.h" #include "helper.h" /* * Implement a kernel that just multiplies a vector by 2 */ void scal_func_cpu(void *buffers[], void *cl_arg) { (void)cl_arg; unsigned i; struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; unsigned *val = (unsigned *) STARPU_VECTOR_GET_PTR(vector); unsigned n = STARPU_VECTOR_GET_NX(vector); /* scale the vector */ for (i = 0; i < n; i++) val[i] *= 2; } #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; void scal_func_opencl(void *buffers[], void *cl_arg) { (void)cl_arg; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); unsigned offset = STARPU_VECTOR_GET_OFFSET(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "vector_mult_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); err |= clSetKernelArg(kernel, 1, sizeof(offset), &offset); err |= clSetKernelArg(kernel, 2, sizeof(n), &n); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } #endif struct starpu_codelet scal_codelet = { .cpu_funcs = { scal_func_cpu }, #ifdef STARPU_USE_OPENCL .opencl_funcs = { scal_func_opencl }, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif #ifdef STARPU_USE_CUDA .cuda_funcs = { scal_func_cuda }, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif .cpu_funcs_name = {"scal_func_cpu"}, .modes = { STARPU_RW }, .model = NULL, .nbuffers = 1 }; starpu-1.4.9+dfsg/tests/datawizard/scal.h000066400000000000000000000020011507764646700203700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include extern struct starpu_codelet scal_codelet; extern struct starpu_opencl_program opencl_program; void scal_func_cpu(void *buffers[], void *cl_arg); #ifdef STARPU_USE_CUDA void scal_func_cuda(void *buffers[], void *cl_arg); #endif #ifdef STARPU_USE_OPENCL void scal_func_opencl(void *buffers[], void *_args); #endif starpu-1.4.9+dfsg/tests/datawizard/scal_cuda.cu000066400000000000000000000025371507764646700215620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include static __global__ void vector_mult_cuda(unsigned *val, unsigned n) { unsigned i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) val[i] *= 2; } extern "C" void scal_func_cuda(void *buffers[], void *_args) { unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(buffers[0]); unsigned threads_per_block = 64; unsigned nblocks = (n + threads_per_block-1) / threads_per_block; vector_mult_cuda<<>>(val, n); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/tests/datawizard/scal_opencl.cl000066400000000000000000000016261507764646700221130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void vector_mult_opencl(__global unsigned* val, unsigned offset, unsigned nx) { const int i = get_global_id(0); val = (__global char*) val + offset; if (i < nx) { val[i] *= 2; } } starpu-1.4.9+dfsg/tests/datawizard/scratch.c000066400000000000000000000101621507764646700210770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test using a scratch data, using it just for temporary storage */ #ifdef STARPU_QUICK_CHECK # define NLOOPS 8 # define VECTORSIZE 128 #else # define NLOOPS 128 # define VECTORSIZE 1024 #endif static unsigned *A; starpu_data_handle_t A_handle, B_handle; //static unsigned var = 0; #ifdef STARPU_USE_CUDA extern void cuda_f(void *descr[], void *_args); #endif #ifdef STARPU_USE_OPENCL extern void opencl_f(void *buffers[], void *args); #endif void cpu_f(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned *tmp = (unsigned *)STARPU_VECTOR_GET_PTR(descr[1]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); memcpy(tmp, v, nx*elemsize); unsigned i; for (i = 0; i < nx; i++) { v[i] = tmp[i] + 1; } } static struct starpu_codelet cl_f = { .cpu_funcs = {cpu_f}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_f}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_f}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_f"}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_SCRATCH} }; #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; #endif int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scratch_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif A = (unsigned *) calloc(VECTORSIZE, sizeof(unsigned)); starpu_vector_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, VECTORSIZE, sizeof(unsigned)); starpu_vector_data_register(&B_handle, -1, (uintptr_t)NULL, VECTORSIZE, sizeof(unsigned)); unsigned loop; for (loop = 0; loop < NLOOPS; loop++) { struct starpu_task *task_f = starpu_task_create(); task_f->cl = &cl_f; task_f->handles[0] = A_handle; task_f->handles[1] = B_handle; ret = starpu_task_submit(task_f); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); /* Check result */ unsigned i; ret = EXIT_SUCCESS; for (i = 0; i < VECTORSIZE; i++) { if (A[i] != NLOOPS) { FPRINTF(stderr, "Error: Incorrect value A[%u] = %u != %d\n", i, A[i], NLOOPS); ret = EXIT_FAILURE; break; } } free(A); STARPU_RETURN(ret); enodev: starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ fprintf(stderr, "WARNING: No one can execute this task\n"); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/scratch_cuda.cu000066400000000000000000000032741507764646700222660ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #define MAXNBLOCKS 32 #define MAXTHREADSPERBLOCK 128 static __global__ void increment_vector(unsigned *v, unsigned *tmp, int nx) { const int tid = threadIdx.x + blockIdx.x*blockDim.x; const int nthreads = gridDim.x * blockDim.x; int i; for (i = tid; i < nx; i += nthreads) { v[i] = tmp[i] + 1; } } extern "C" void cuda_f(void *descr[], void *_args) { unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned *tmp = (unsigned *)STARPU_VECTOR_GET_PTR(descr[1]); unsigned nx = STARPU_VECTOR_GET_NX(descr[0]); size_t elemsize = STARPU_VECTOR_GET_ELEMSIZE(descr[0]); cudaMemcpyAsync(tmp, v, nx*elemsize, cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); unsigned nblocks = 128; unsigned nthread_per_block = STARPU_MIN(MAXTHREADSPERBLOCK, (nx / nblocks)); increment_vector<<>>(v, tmp, nx); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/tests/datawizard/scratch_opencl.c000066400000000000000000000051061507764646700224410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include /* * Queue an OpenCL kernel for the scratch test. */ extern struct starpu_opencl_program opencl_program; void opencl_f(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); unsigned elemsize = STARPU_VECTOR_GET_ELEMSIZE(buffers[0]); cl_mem val = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); cl_mem tmp = (cl_mem) STARPU_VECTOR_GET_DEV_HANDLE(buffers[1]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, "increment_vector_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clEnqueueCopyBuffer(queue, val, tmp, 0, /* offset in val */ 0, /* offset in tmp */ n * elemsize, 0, /* num_events_in_wait_list */ NULL, /* event_wait_list */ NULL); /* event */ if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); err|= clSetKernelArg(kernel, 1, sizeof(tmp), &tmp); err|= clSetKernelArg(kernel, 2, sizeof(n), &n); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/tests/datawizard/scratch_opencl_kernel.cl000066400000000000000000000015731507764646700241610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void increment_vector_opencl(__global unsigned *val, __global unsigned *tmp, unsigned nx) { const int id = get_global_id(0); if (id < nx) val[id] = tmp[id] + 1; } starpu-1.4.9+dfsg/tests/datawizard/scratch_reuse.c000066400000000000000000000040501507764646700223010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #if !defined(STARPU_HAVE_SETENV) || !defined(STARPU_USE_CPU) || !defined(STARPU_HAVE_HWLOC) #warning setenv is not defined or no cpu are available. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else #ifdef STARPU_QUICK_CHECK #define ITER 32 #else #define ITER 128 #endif static void kernel(void *buffers[], void *cl_args) { (void)cl_args; STARPU_ASSERT(STARPU_MATRIX_GET_PTR(buffers[0]) != 0); } static struct starpu_codelet codelet = { .name = "codelet", .cuda_funcs = { kernel }, .nbuffers = 1, .modes = { STARPU_SCRATCH }, }; int main(int argc, char *argv[]) { setenv("STARPU_LIMIT_CUDA_MEM", "50", 1); int ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cuda_worker_get_count() == 0) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu_data_handle_t handle[ITER]; int i; for (i = 0; i < ITER; i++) { starpu_matrix_data_register(&handle[i], -1, 0, 1024, 1024, 1024, sizeof(float)); ret = starpu_task_insert(&codelet, STARPU_SCRATCH, handle[i], 0); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_task_wait_for_all(); for (i = 0; i < ITER; i++) starpu_data_unregister(handle[i]); starpu_shutdown(); return 0; } #endif starpu-1.4.9+dfsg/tests/datawizard/simgrid-locality.c000066400000000000000000000020331507764646700227220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* Check that defining a main makes starpu use MSG_process_attach. */ #include "locality.c" #include #if defined(SIMGRID_VERSION) && ((defined(HAVE_MSG_PROCESS_ATTACH) || defined(MSG_process_attach)) && SIMGRID_VERSION >= 31500 & SIMGRID_VERSION != 31559) #undef main int main(int argc, char *argv[]) { return starpu_main(argc, argv); } #endif starpu-1.4.9+dfsg/tests/datawizard/specific_node.c000066400000000000000000000162121507764646700222440ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" #include "../variable/increment.h" /* * Test using the specific_nodes field by forcing the data to main memory * even if the task is run on a GPU (and actually doing the computation from * the CPU driving the GPU). It mixes such accesses and normal accesses from * the GPU */ unsigned data, data2; void specific3_kernel(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED) { (void)arg; } static struct starpu_codelet specific3_cl = { .cpu_funcs = {specific3_kernel}, .cuda_funcs = {specific3_kernel}, .opencl_funcs = {specific3_kernel}, .hip_funcs = {specific3_kernel}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_NONE, STARPU_SPECIFIC_NODE_NONE}, }; void specific2_kernel(void *descr[], void *arg) { (void)arg; int node = starpu_task_get_current_data_node(0); STARPU_ASSERT(node >= 0); STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); if (node == STARPU_MAIN_RAM) STARPU_ASSERT(dataptr == &data); (*dataptr)++; node = starpu_task_get_current_data_node(1); STARPU_ASSERT(node >= 0); STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM || (unsigned) node == starpu_worker_get_local_memory_node()); dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[1]); if (node == STARPU_MAIN_RAM) STARPU_ASSERT(dataptr == &data || dataptr == &data2); } static struct starpu_codelet specific2_cl = { .cpu_funcs = {specific2_kernel}, .cuda_funcs = {specific2_kernel}, .opencl_funcs = {specific2_kernel}, .hip_funcs = {specific2_kernel}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU}, }; void specific2_ro_kernel(void *descr[], void *arg) { (void)arg; int node = starpu_task_get_current_data_node(0); STARPU_ASSERT(node >= 0); STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); if (node == STARPU_MAIN_RAM) STARPU_ASSERT(dataptr == &data); node = starpu_task_get_current_data_node(1); STARPU_ASSERT(node >= 0); STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM || (unsigned) node == starpu_worker_get_local_memory_node()); dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[1]); if (node == STARPU_MAIN_RAM) STARPU_ASSERT(dataptr == &data || dataptr == &data2); } static struct starpu_codelet specific2_cl_ro = { .cpu_funcs = {specific2_ro_kernel}, .cuda_funcs = {specific2_ro_kernel}, .opencl_funcs = {specific2_ro_kernel}, .hip_funcs = {specific2_ro_kernel}, .nbuffers = 2, .modes = {STARPU_R, STARPU_R}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU}, }; void specific_kernel(void *descr[], void *arg) { (void)arg; int node = starpu_task_get_current_data_node(0); STARPU_ASSERT(node >= 0); STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); if (node == STARPU_MAIN_RAM) STARPU_ASSERT(dataptr == &data); (*dataptr)++; node = starpu_task_get_current_data_node(1); STARPU_ASSERT((unsigned) node == starpu_worker_get_local_memory_node()); } void specific_ro_kernel(void *descr[], void *arg) { (void)arg; int node = starpu_task_get_current_data_node(0); STARPU_ASSERT(node >= 0); STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); if (node == STARPU_MAIN_RAM) STARPU_ASSERT(dataptr == &data); node = starpu_task_get_current_data_node(1); STARPU_ASSERT((unsigned) node == starpu_worker_get_local_memory_node()); } static struct starpu_codelet specific_cl = { .cpu_funcs = {specific_kernel}, .cuda_funcs = {specific_kernel}, .opencl_funcs = {specific_kernel}, .hip_funcs = {specific_kernel}, .nbuffers = 2, .modes = {STARPU_RW, STARPU_RW}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, }; static struct starpu_codelet specific_cl_ro = { .cpu_funcs = {specific_ro_kernel}, .cuda_funcs = {specific_ro_kernel}, .opencl_funcs = {specific_ro_kernel}, .hip_funcs = {specific_ro_kernel}, .nbuffers = 2, .modes = {STARPU_R, STARPU_R}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, }; int main(void) { starpu_data_handle_t data_handle, data_handle2; #ifdef STARPU_QUICK_CHECK unsigned ntasks = 16; #else unsigned ntasks = 1024; #endif int ret; /* Disable prefetching, it makes the test work just by luck */ #ifdef STARPU_HAVE_SETENV setenv("STARPU_PREFETCH", "0", 1); #endif ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); data = 0; data2 = 0; /* Create a void data which will be used as an exclusion mechanism. */ starpu_variable_data_register(&data_handle, STARPU_MAIN_RAM, (uintptr_t) &data, sizeof(data)); starpu_variable_data_register(&data_handle2, STARPU_MAIN_RAM, (uintptr_t) &data2, sizeof(data2)); unsigned i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); switch (i%8) { case 0: task->cl = &specific_cl; break; case 1: task->cl = &specific2_cl; break; case 2: task->cl = &specific3_cl; break; case 3: task->cl = &increment_cl; break; case 4: task->cl = &specific_cl_ro; break; case 5: task->cl = &specific2_cl_ro; break; case 6: task->cl = &specific3_cl; break; case 7: task->cl = &increment_cl; break; } task->handles[0] = data_handle; if (i % 8 >= 4) task->handles[1] = data_handle; else task->handles[1] = data_handle2; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(data_handle); starpu_data_unregister(data_handle2); ret = (data == (ntasks*4) / 8) ? EXIT_SUCCESS : EXIT_FAILURE; increment_unload_opencl(); starpu_shutdown(); return ret; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_data_unregister(data_handle); starpu_data_unregister(data_handle2); increment_unload_opencl(); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/specific_node_same.c000066400000000000000000000060451507764646700232540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test using the specific_nodes field with loading the same data several * times on different nodes. */ unsigned data; void specific_ro_kernel(void *descr[], void *arg) { (void)arg; int node = starpu_task_get_current_data_node(0); STARPU_ASSERT(node >= 0); STARPU_ASSERT(starpu_node_get_kind(node) == STARPU_CPU_RAM); unsigned *dataptr = (unsigned*) STARPU_VARIABLE_GET_PTR(descr[0]); if (node == STARPU_MAIN_RAM) STARPU_ASSERT(dataptr == &data); node = starpu_task_get_current_data_node(1); STARPU_ASSERT((unsigned) node == starpu_worker_get_local_memory_node()); } static struct starpu_codelet specific_cl_ro = { .cpu_funcs = {specific_ro_kernel}, .cuda_funcs = {specific_ro_kernel}, .opencl_funcs = {specific_ro_kernel}, .hip_funcs = {specific_ro_kernel}, .nbuffers = 2, .modes = {STARPU_R, STARPU_R}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, }; int main(void) { starpu_data_handle_t data_handle; #ifdef STARPU_QUICK_CHECK unsigned ntasks = 16; #else unsigned ntasks = 1024; #endif int ret; /* Disable prefetching, it makes the test work just by luck */ #ifdef STARPU_HAVE_SETENV setenv("STARPU_PREFETCH", "0", 1); #endif ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); data = 0; /* Create a void data which will be used as an exclusion mechanism. */ starpu_variable_data_register(&data_handle, STARPU_MAIN_RAM, (uintptr_t) &data, sizeof(data)); unsigned i; for (i = 0 ; i < starpu_worker_get_count(); i++) { struct starpu_task *task = starpu_task_create(); task->cl = &specific_cl_ro; task->execute_on_a_specific_worker = 1; task->workerid = i; task->handles[0] = data_handle; task->handles[1] = data_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(data_handle); starpu_shutdown(); return ret; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_data_unregister(data_handle); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/sync_and_notify_data.c000066400000000000000000000120421507764646700236260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* * Mix synchronous tasks and data acquisitions */ #define N_DEF 100 #define K_DEF 256 static unsigned n=N_DEF; static unsigned k=K_DEF; /* * In this test, we maintain a vector v = (a,b,c). * * Each iteration consists of: * - increment a n times * - sync v in ram * - incrementer b * - notify the modification of v * - incrementer c n times * - sync v * * At the end, we have to make sure that if we did k iterations, * v == (kn, k, kn) */ #ifdef STARPU_USE_CUDA void cuda_codelet_incA(void *descr[], void *_args); void cuda_codelet_incC(void *descr[], void *_args); #endif #ifdef STARPU_USE_OPENCL void opencl_codelet_incA(void *descr[], void *_args); void opencl_codelet_incC(void *descr[], void *_args); struct starpu_opencl_program opencl_code; #endif #define VECTORSIZE 16 starpu_data_handle_t v_handle; static unsigned v[VECTORSIZE] STARPU_ATTRIBUTE_ALIGNED(128) = {0, 0, 0, 0}; void cpu_codelet_incA(void *descr[], void *arg) { (void)arg; unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); val[0]++; } void cpu_codelet_incC(void *descr[], void *arg) { (void)arg; unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); val[2]++; } int main(int argc, char **argv) { int ret; #ifdef STARPU_QUICK_CHECK n /= 10; #endif #ifndef STARPU_LONG_CHECK k /= 16; #endif #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) n = 4; if(RUNNING_ON_VALGRIND) k = 2; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/sync_and_notify_data_opencl_codelet.cl", &opencl_code, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); unsigned iter; for (iter = 0; iter < k; iter++) { unsigned ind; for (ind = 0; ind < n; ind++) { /* increment a = v[0] */ struct starpu_codelet cl_inc_a = { .cpu_funcs = {cpu_codelet_incA}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_codelet_incA}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_codelet_incA}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_codelet_incA"}, .nbuffers = 1, .modes = {STARPU_RW} }; struct starpu_task *task = starpu_task_create(); task->cl = &cl_inc_a; task->handles[0] = v_handle; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* synchronize v in RAM */ ret = starpu_data_acquire(v_handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); /* increment b */ v[1]++; starpu_data_release(v_handle); for (ind = 0; ind < n; ind++) { /* increment c = v[2] */ struct starpu_codelet cl_inc_c = { .cpu_funcs = {cpu_codelet_incC}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_codelet_incC}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_codelet_incC}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_codelet_incC"}, .nbuffers = 1, .modes = {STARPU_RW} }; struct starpu_task *task = starpu_task_create(); task->cl = &cl_inc_c; task->handles[0] = v_handle; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } ret = starpu_data_acquire(v_handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); FPRINTF(stderr, "V = {%u, %u, %u}\n", v[0], v[1], v[2]); starpu_data_release(v_handle); starpu_data_unregister(v_handle); starpu_shutdown(); if ((v[0] != n*k) || (v[1] != k) || (v[2] != n*k)) { FPRINTF(stderr, "Incorrect result\n"); return EXIT_FAILURE; } return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ STARPU_RETURN(STARPU_TEST_SKIPPED); } starpu-1.4.9+dfsg/tests/datawizard/sync_and_notify_data_implicit.c000066400000000000000000000116701507764646700255260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* * Mix tasks with implicit dependencies and data acquisitions */ #define N_DEF 100 #define K_DEF 256 static unsigned n=N_DEF; static unsigned k=K_DEF; /* * In this test, we maintain a vector v = (a,b,c). * * Each iteration consists of: * - increment a n times * - sync v in ram * - incrementer b * - notify the modification of v * - incrementer c n times * - sync v * * At the end, we have to make sure that if we did k iterations, * v == (kn, k, kn) */ #ifdef STARPU_USE_CUDA void cuda_codelet_incA(void *descr[], void *_args); void cuda_codelet_incC(void *descr[], void *_args); #endif #ifdef STARPU_USE_OPENCL void opencl_codelet_incA(void *descr[], void *_args); void opencl_codelet_incC(void *descr[], void *_args); struct starpu_opencl_program opencl_code; #endif #define VECTORSIZE 16 starpu_data_handle_t v_handle; static unsigned v[VECTORSIZE] STARPU_ATTRIBUTE_ALIGNED(128) = {0, 0, 0, 0}; void cpu_codelet_incA(void *descr[], void *arg) { (void)arg; unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); val[0]++; } void cpu_codelet_incC(void *descr[], void *arg) { (void)arg; unsigned *val = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); val[2]++; } /* increment a = v[0] */ static struct starpu_codelet cl_inc_a = { .cpu_funcs = {cpu_codelet_incA}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_codelet_incA}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_codelet_incA}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_codelet_incA"}, .nbuffers = 1, .modes = {STARPU_RW} }; /* increment c = v[2] */ struct starpu_codelet cl_inc_c = { .cpu_funcs = {cpu_codelet_incC}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_codelet_incC}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_codelet_incC}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_codelet_incC"}, .nbuffers = 1, .modes = {STARPU_RW} }; int main(int argc, char **argv) { int ret; #ifdef STARPU_QUICK_CHECK n /= 10; #endif #ifndef STARPU_LONG_CHECK k /= 8; #endif #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) n = 4; if(RUNNING_ON_VALGRIND) k = 2; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/sync_and_notify_data_opencl_codelet.cl", &opencl_code, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); unsigned iter; for (iter = 0; iter < k; iter++) { unsigned ind; for (ind = 0; ind < n; ind++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl_inc_a; task->handles[0] = v_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* synchronize v in RAM */ ret = starpu_data_acquire(v_handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); /* increment b */ v[1]++; starpu_data_release(v_handle); for (ind = 0; ind < n; ind++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl_inc_c; task->handles[0] = v_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } ret = starpu_data_acquire(v_handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); FPRINTF(stderr, "V = {%u, %u, %u}\n", v[0], v[1], v[2]); starpu_data_release(v_handle); starpu_data_unregister(v_handle); starpu_shutdown(); ret = EXIT_SUCCESS; if ((v[0] != n*k) || (v[1] != k) || (v[2] != n*k)) { FPRINTF(stderr, "Incorrect result\n"); ret = EXIT_FAILURE; } return ret; enodev: starpu_data_unregister(v_handle); starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/sync_and_notify_data_kernels.cu000066400000000000000000000027051507764646700255430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include /* * increment a (val[0]) */ static __global__ void _cuda_incA(unsigned *v) { v[0]++; } extern "C" void cuda_codelet_incA(void *descr[], void *_args) { unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); _cuda_incA<<<1,1, 0, starpu_cuda_get_local_stream()>>>(v); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } /* * increment c (val[2]) */ static __global__ void _cuda_incC(unsigned *v) { v[2]++; } extern "C" void cuda_codelet_incC(void *descr[], void *_args) { unsigned *v = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); _cuda_incC<<<1,1, 0, starpu_cuda_get_local_stream()>>>(v); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/tests/datawizard/sync_and_notify_data_opencl.c000066400000000000000000000044771507764646700252030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Queue kernels which just increment the vector */ extern struct starpu_opencl_program opencl_code; void opencl_codelet_incA(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); cl_kernel kernel; cl_command_queue queue; int id, devid, err; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "incA", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=100; size_t local=100; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } void opencl_codelet_incC(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); cl_kernel kernel; cl_command_queue queue; int id, devid, err; id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_code, "incC", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=100; size_t local=100; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/tests/datawizard/sync_and_notify_data_opencl_codelet.cl000066400000000000000000000016171507764646700270470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void incA(__global unsigned* input) { const int i = get_global_id(0); if (i == 0) input[i] ++; } __kernel void incC(__global unsigned* input) { const int i = get_global_id(0); if (i == 2) input[i] ++; } starpu-1.4.9+dfsg/tests/datawizard/sync_with_data_with_mem.c000066400000000000000000000066631507764646700243540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Mix submitting tasks and synchronously acquiring the corresponding data. */ #define NBUFFERS_DEF 64 #define NITER_DEF 128 #define VECTORSIZE_DEF 1024 static int nbuffers = NBUFFERS_DEF; static int niter = NITER_DEF; static int vectorsize = VECTORSIZE_DEF; float *buffer[NBUFFERS_DEF]; starpu_data_handle_t v_handle[NBUFFERS_DEF]; void dummy_codelet(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet cl = { .modes = { STARPU_RW }, .cpu_funcs = {dummy_codelet}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dummy_codelet}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {dummy_codelet}, #endif .cpu_funcs_name = {"dummy_codelet"}, .nbuffers = 1 }; static int use_handle(starpu_data_handle_t handle) { int ret; struct starpu_task *task; task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; ret = starpu_task_submit(task); return ret; } int main(int argc, char **argv) { int ret; #ifdef STARPU_QUICK_CHECK nbuffers /= 4; niter /= 4; vectorsize /= 8; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Allocate all buffers and register them to StarPU */ int b; for (b = 0; b < nbuffers; b++) { ret = starpu_malloc((void **)&buffer[b], vectorsize); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM, (uintptr_t)buffer[b], vectorsize, sizeof(char)); } int iter; for (iter = 0; iter < niter; iter++) { /* Use the buffers on the different workers so that it may not * be in main memory anymore */ for (b = 0; b < nbuffers; b++) { ret = use_handle(v_handle[b]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* Grab the different pieces of data into main memory */ for (b = 0; b < nbuffers; b++) { ret = starpu_data_acquire(v_handle[b], STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); } /* Release them */ for (b = 0; b < nbuffers; b++) starpu_data_release(v_handle[b]); } /* do some cleanup */ for (b = 0; b < nbuffers; b++) { starpu_data_unregister(v_handle[b]); starpu_free_noflag(buffer[b], vectorsize); } starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/sync_with_data_with_mem_non_blocking.c000066400000000000000000000104241507764646700270640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" #include /* * Mix submitting tasks and asynchronously acquiring the corresponding data. */ #define NBUFFERS_DEF 64 #define NITER_DEF 128 #define VECTORSIZE_DEF 1024 static unsigned nbuffers = NBUFFERS_DEF; static unsigned niter = NITER_DEF; static unsigned vectorsize = VECTORSIZE_DEF; float *buffer[NBUFFERS_DEF]; starpu_data_handle_t v_handle[NBUFFERS_DEF]; void dummy_codelet(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet cl = { .modes = { STARPU_RW }, .cpu_funcs = {dummy_codelet}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dummy_codelet}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {dummy_codelet}, #endif .cpu_funcs_name = {"dummy_codelet"}, .nbuffers = 1 }; static int use_handle(starpu_data_handle_t handle) { int ret; struct starpu_task *task; task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; ret = starpu_task_submit(task); return ret; } static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static unsigned n_synced_buffers; static void callback_sync_data(void *arg) { (void)arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); n_synced_buffers++; if (n_synced_buffers == nbuffers) STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } int main(int argc, char **argv) { int ret; #ifdef STARPU_QUICK_CHECK nbuffers /= 4; niter /= 4; vectorsize /= 8; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Allocate all buffers and register them to StarPU */ unsigned b; for (b = 0; b < nbuffers; b++) { ret = starpu_malloc((void **)&buffer[b], vectorsize); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM, (uintptr_t)buffer[b], vectorsize, sizeof(char)); starpu_data_set_sequential_consistency_flag(v_handle[b], 0); } unsigned iter; for (iter = 0; iter < niter; iter++) { /* Use the buffers on the different workers so that it may not * be in main memory anymore */ for (b = 0; b < nbuffers; b++) { ret = use_handle(v_handle[b]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); STARPU_PTHREAD_MUTEX_LOCK(&mutex); n_synced_buffers = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* Grab the different pieces of data into main memory */ for (b = 0; b < nbuffers; b++) { ret = starpu_data_acquire_cb(v_handle[b], STARPU_RW, callback_sync_data, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb"); } /* Wait for all buffers to be available */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (n_synced_buffers != nbuffers) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* Release them */ for (b = 0; b < nbuffers; b++) starpu_data_release(v_handle[b]); } /* do some cleanup */ for (b = 0; b < nbuffers; b++) { starpu_data_unregister(v_handle[b]); starpu_free_noflag(buffer[b], vectorsize); } starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/sync_with_data_with_mem_non_blocking_implicit.c000066400000000000000000000104211507764646700307530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" #include /* * Mix submitting tasks and asynchronously acquiring the corresponding * data, but without implicit dependencies. */ #define NBUFFERS_DEF 64 #define NITER_DEF 128 #define VECTORSIZE_DEF 1024 static unsigned nbuffers = NBUFFERS_DEF; static unsigned niter = NITER_DEF; static unsigned vectorsize = VECTORSIZE_DEF; float *buffer[NBUFFERS_DEF]; starpu_data_handle_t v_handle[NBUFFERS_DEF]; void dummy_codelet(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet cl = { .modes = { STARPU_RW }, .cpu_funcs = {dummy_codelet}, #ifdef STARPU_USE_CUDA .cuda_funcs = {dummy_codelet}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {dummy_codelet}, #endif .cpu_funcs_name = {"dummy_codelet"}, .nbuffers = 1 }; static int use_handle(starpu_data_handle_t handle) { int ret; struct starpu_task *task; task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; ret = starpu_task_submit(task); return ret; } static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static unsigned n_synced_buffers; static void callback_sync_data(void *arg) { (void)arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); n_synced_buffers++; if (n_synced_buffers == nbuffers) STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } int main(int argc, char **argv) { int ret; #ifdef STARPU_QUICK_CHECK nbuffers /= 4; niter /= 4; vectorsize /= 8; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Allocate all buffers and register them to StarPU */ unsigned b; for (b = 0; b < nbuffers; b++) { ret = starpu_malloc((void **)&buffer[b], vectorsize); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM, (uintptr_t)buffer[b], vectorsize, sizeof(char)); } unsigned iter; for (iter = 0; iter < niter; iter++) { /* Use the buffers on the different workers so that it may not * be in main memory anymore */ for (b = 0; b < nbuffers; b++) { ret = use_handle(v_handle[b]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } STARPU_PTHREAD_MUTEX_LOCK(&mutex); n_synced_buffers = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* Grab the different pieces of data into main memory */ for (b = 0; b < nbuffers; b++) { ret = starpu_data_acquire_cb(v_handle[b], STARPU_RW, callback_sync_data, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb"); } starpu_do_schedule(); /* Wait for all buffers to be available */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (n_synced_buffers != nbuffers) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* Release them */ for (b = 0; b < nbuffers; b++) starpu_data_release(v_handle[b]); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* do some cleanup */ for (b = 0; b < nbuffers; b++) { starpu_data_unregister(v_handle[b]); starpu_free_noflag(buffer[b], vectorsize); } starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/task_with_multiple_time_the_same_handle.c000066400000000000000000000075011507764646700275610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test passing the same handle several times to the same task */ void sum_cpu(void * descr[], void *cl_arg) { (void)cl_arg; double * v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]); double * v_src = (double *) STARPU_VECTOR_GET_PTR(descr[1]); STARPU_ASSERT(v_dst == v_src); *v_dst+=*v_src; } void sum3_cpu(void * descr[], void *cl_arg) { (void)cl_arg; double * v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[0]); double * v_src2 = (double *) STARPU_VECTOR_GET_PTR(descr[1]); double * v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[2]); STARPU_ASSERT(v_dst == v_src1); STARPU_ASSERT(v_dst == v_src2); *v_dst+=*v_src1+*v_src2; } void sum4_cpu(void * descr[], void *cl_arg) { (void)cl_arg; double * v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[0]); double * v_src2 = (double *) STARPU_VECTOR_GET_PTR(descr[1]); double * v_dst1 = (double *) STARPU_VECTOR_GET_PTR(descr[2]); double * v_dst2 = (double *) STARPU_VECTOR_GET_PTR(descr[3]); STARPU_ASSERT(v_src1 == v_dst1); STARPU_ASSERT(v_src2 == v_dst2); *v_dst2 = (*v_dst1+=*v_src1+*v_src2); } static struct starpu_codelet sum_cl = { .cpu_funcs = {sum_cpu}, .cpu_funcs_name = {"sum_cpu"}, .nbuffers = 2, .modes={STARPU_RW,STARPU_R} }; static struct starpu_codelet sum3_cl = { .cpu_funcs = {sum3_cpu}, .cpu_funcs_name = {"sum3_cpu"}, .nbuffers = 3, .modes={STARPU_R,STARPU_R,STARPU_RW} }; static struct starpu_codelet sum4_cl = { .cpu_funcs = {sum4_cpu}, .cpu_funcs_name = {"sum4_cpu"}, .nbuffers = 4, .modes={STARPU_R,STARPU_R,STARPU_RW,STARPU_RW} }; #define N 10 int main(void) { starpu_data_handle_t handle; int ret = 0; double value[N] = { 1.0 }; int i; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret=starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; starpu_vector_data_register(&handle,0,(uintptr_t)&value,N,sizeof(double)); for (i=0; i<2; i++) { ret = starpu_task_insert(&sum_cl, STARPU_RW, handle, STARPU_R, handle, 0); if (ret == -ENODEV) goto enodev; ret = starpu_task_insert(&sum3_cl, STARPU_R, handle, STARPU_R, handle, STARPU_RW, handle, 0); if (ret == -ENODEV) goto enodev; } starpu_data_acquire(handle, STARPU_R); if (value[0] != 36) { FPRINTF(stderr, "value is %f instead of %f\n", value[0], 36.); ret = EXIT_FAILURE; } starpu_data_release(handle); struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = 2, }; starpu_data_partition(handle, &f); starpu_task_insert(&sum4_cl, STARPU_R,starpu_data_get_sub_data(handle,1,0), STARPU_R,starpu_data_get_sub_data(handle,1,1), STARPU_RW,starpu_data_get_sub_data(handle,1,0), STARPU_RW,starpu_data_get_sub_data(handle,1,1), 0); starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_shutdown(); return ret; enodev: starpu_data_unregister(handle); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/temporary_partition.c000066400000000000000000000056171507764646700235740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #define SIZE (1<<20) #define NPARTS 16 /* * Test asynchronous partitioning on a temporary data. */ static void codelet(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet clw = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_W} }; static struct starpu_codelet clr = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; starpu_data_handle_t handle, handles[NPARTS]; int i; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_vector_data_register(&handle, -1, 0, SIZE, sizeof(char)); /* Fork */ struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = NPARTS }; starpu_data_partition_plan(handle, &f, handles); starpu_data_partition_submit(handle, NPARTS, handles); /* Process in parallel */ for (i = 0; i < NPARTS; i++) { ret = starpu_task_insert(&clw, STARPU_W, handles[i], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } /* Invalidate one random piece we don't care coherency about */ starpu_data_invalidate_submit(handles[NPARTS/2]); /* Try to wontuse the whole thing */ starpu_data_wont_use(handle); /* Clean */ starpu_data_unpartition_submit(handle, NPARTS, handles, -1); starpu_data_partition_clean(handle, NPARTS, handles); /* Read result */ ret = starpu_task_insert(&clr, STARPU_R, handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: starpu_data_unpartition_submit(handle, NPARTS, handles, -1); starpu_data_partition_clean(handle, NPARTS, handles); starpu_data_unregister(handle); starpu_shutdown(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ fprintf(stderr, "WARNING: No one can execute this task\n"); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/temporary_partition_implicit.c000066400000000000000000000056241507764646700254640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #define SIZE (1<<20) #define NPARTS 16 /* * Test asynchronous partitioning on a temporary data without submitting explicitly * partitioning/unpartitioning. */ static void codelet(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet clw = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_W} }; static struct starpu_codelet clr = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; starpu_data_handle_t handle, handles[NPARTS]; int i; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_vector_data_register(&handle, -1, 0, SIZE, sizeof(char)); /* Fork */ struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = NPARTS }; starpu_data_partition_plan(handle, &f, handles); /* Process in parallel */ for (i = 0; i < NPARTS; i++) { ret = starpu_task_insert(&clw, STARPU_W, handles[i], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } /* Invalidate one random piece we don't care coherency about */ starpu_data_invalidate_submit(handles[NPARTS/2]); /* Try to wontuse the whole thing */ starpu_data_wont_use(handle); /* Clean */ starpu_data_unpartition_submit(handle, NPARTS, handles, -1); starpu_data_partition_clean(handle, NPARTS, handles); /* Read result */ ret = starpu_task_insert(&clr, STARPU_R, handle, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: starpu_data_unpartition_submit(handle, NPARTS, handles, -1); starpu_data_partition_clean(handle, NPARTS, handles); starpu_data_unregister(handle); starpu_shutdown(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ fprintf(stderr, "WARNING: No one can execute this task\n"); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/temporary_partition_read.c000066400000000000000000000051551507764646700245640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #define SIZE (1<<20) #define NPARTS 16 /* * Test asynchronous read partitioning on a non initialized temporary * data without submitting explicitly partitioning/unpartitioning. */ static void codelet(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet clw = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_W} }; static struct starpu_codelet clr = { .where = STARPU_CPU, .cpu_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; starpu_data_handle_t handle, handles[NPARTS]; int i; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_vector_data_register(&handle, -1, 0, SIZE, sizeof(char)); starpu_data_set_reduction_methods(handle, NULL, &clw); /* Fork */ struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = NPARTS }; starpu_data_partition_plan(handle, &f, handles); /* Process in parallel */ for (i = 0; i < NPARTS; i++) { ret = starpu_task_insert(&clr, STARPU_R, handles[i], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_task_wait_for_all(); starpu_data_partition_clean_node(handle, NPARTS, handles, -1); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: starpu_task_wait_for_all(); starpu_data_partition_clean_node(handle, NPARTS, handles, -1); starpu_data_unregister(handle); starpu_shutdown(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ fprintf(stderr, "WARNING: No one can execute this task\n"); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/test_arbiter.cpp000066400000000000000000000141041507764646700224770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * for i from 0 to nbA * insert task handles[i] in STARPU_RW|STARPU_COMMUTE * for j from 0 to nbA * if i != j insert task handles[i] in STARPU_RW|STARPU_COMMUTE, and handles[j] in STARPU_RW|STARPU_COMMUTE */ // @FUSE_STARPU #include #include "../helper.h" #include #include #ifdef STARPU_QUICK_CHECK #define SLEEP_SLOW 6000 #define SLEEP_FAST 1000 #elif !defined(STARPU_LONG_CHECK) #define SLEEP_SLOW 60000 #define SLEEP_FAST 10000 #else #define SLEEP_SLOW 600000 #define SLEEP_FAST 100000 #endif static unsigned nb, nb_slow; void callback(void * /*buffers*/[], void * /*cl_arg*/) { unsigned val; val = STARPU_ATOMIC_ADD(&nb, 1); FPRINTF(stdout,"callback in (%u)\n", val); fflush(stdout); starpu_usleep(SLEEP_FAST); val = STARPU_ATOMIC_ADD(&nb, -1); FPRINTF(stdout,"callback out (%u)\n", val); fflush(stdout); } void callback_slow(void * /*buffers*/[], void * /*cl_arg*/) { unsigned val; val = STARPU_ATOMIC_ADD(&nb_slow, 1); FPRINTF(stdout,"callback_slow in (%u)\n", val); fflush(stdout); starpu_usleep(SLEEP_SLOW); val = STARPU_ATOMIC_ADD(&nb_slow, -1); FPRINTF(stdout,"callback_slow out (%u)\n", val); fflush(stdout); } int main(int /*argc*/, char** /*argv*/) { int ret; struct starpu_conf conf; starpu_arbiter_t arbiter, arbiter2; ret = starpu_conf_init(&conf); STARPU_ASSERT(ret == 0); //conf.ncpus = 1;//// 4 ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_ASSERT(ret == 0); FPRINTF(stdout, "Max Thread %u\n", starpu_worker_get_count()); ////////////////////////////////////////////////////// starpu_codelet normalCodelete; { memset(&normalCodelete, 0, sizeof(normalCodelete)); normalCodelete.where = STARPU_CPU; normalCodelete.cpu_funcs[0] = callback; normalCodelete.nbuffers = 2; normalCodelete.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); normalCodelete.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); normalCodelete.name = "normalCodelete"; } starpu_codelet slowCodelete; { memset(&slowCodelete, 0, sizeof(slowCodelete)); slowCodelete.where = STARPU_CPU; slowCodelete.cpu_funcs[0] = callback_slow; slowCodelete.nbuffers = 1; slowCodelete.modes[0] = starpu_data_access_mode (STARPU_RW|STARPU_COMMUTE); slowCodelete.name = "slowCodelete"; } ////////////////////////////////////////////////////// ////////////////////////////////////////////////////// ///const int nbA = 3; const int nbA = 10; FPRINTF(stdout, "Nb A = %d\n", nbA); std::vector handleA(nbA); std::vector dataA(nbA); arbiter = starpu_arbiter_create(); arbiter2 = starpu_arbiter_create(); for(int idx = 0 ; idx < nbA ; ++idx) { dataA[idx] = idx; } for(int idxHandle = 0 ; idxHandle < nbA ; ++idxHandle) { starpu_variable_data_register(&handleA[idxHandle], 0, (uintptr_t)&dataA[idxHandle], sizeof(dataA[idxHandle])); starpu_data_assign_arbiter(handleA[idxHandle], arbiter); } ////////////////////////////////////////////////////// ////////////////////////////////////////////////////// FPRINTF(stdout,"Submit tasks\n"); for(int idxHandleA1 = 0 ; idxHandleA1 < nbA ; ++idxHandleA1) { ret = starpu_task_insert(&slowCodelete, (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA1], 0); if (ret == -ENODEV) goto out; for(int idxHandleA2 = 0 ; idxHandleA2 < nbA ; ++idxHandleA2) { if(idxHandleA1 != idxHandleA2) { ret = starpu_task_insert(&normalCodelete, (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA1], (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA2], 0); if (ret == -ENODEV) goto out; } } } ////////////////////////////////////////////////////// FPRINTF(stdout,"Wait task\n"); starpu_task_wait_for_all(); ////////////////////////////////////////////////////// FPRINTF(stdout,"Release data\n"); for(int idxHandle = 0 ; idxHandle < nbA ; ++idxHandle) { starpu_data_unregister(handleA[idxHandle]); } ////////////////////////////////////////////////////// FPRINTF(stdout,"Proceed gain, with several arbiters\n"); for(int idxHandle = 0 ; idxHandle < nbA ; ++idxHandle) { starpu_variable_data_register(&handleA[idxHandle], 0, (uintptr_t)&dataA[idxHandle], sizeof(dataA[idxHandle])); starpu_data_assign_arbiter(handleA[idxHandle], (idxHandle%2)?arbiter:arbiter2); } ////////////////////////////////////////////////////// ////////////////////////////////////////////////////// FPRINTF(stdout,"Submit tasks\n"); for(int idxHandleA1 = 0 ; idxHandleA1 < nbA ; ++idxHandleA1) { ret = starpu_task_insert(&slowCodelete, (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA1], 0); if (ret == -ENODEV) goto out; for(int idxHandleA2 = 0 ; idxHandleA2 < nbA ; ++idxHandleA2) { if(idxHandleA1 != idxHandleA2) { ret = starpu_task_insert(&normalCodelete, (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA1], (STARPU_RW|STARPU_COMMUTE), handleA[idxHandleA2], 0); if (ret == -ENODEV) goto out; } } } ////////////////////////////////////////////////////// FPRINTF(stdout,"Wait task\n"); out: starpu_task_wait_for_all(); ////////////////////////////////////////////////////// FPRINTF(stdout,"Release data\n"); for(int idxHandle = 0 ; idxHandle < nbA ; ++idxHandle) { starpu_data_unregister(handleA[idxHandle]); } starpu_arbiter_destroy(arbiter); starpu_arbiter_destroy(arbiter2); ////////////////////////////////////////////////////// FPRINTF(stdout,"Shutdown\n"); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/tests/datawizard/unpartition.c000066400000000000000000000067041507764646700220330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test running a task on a partitioned data, then on the unpartitioned * data, etc. in a loop */ #ifdef STARPU_QUICK_CHECK #define NITER 100 #else #define NITER 1000 #endif #define VECTORSIZE 1024 float *buffer; starpu_data_handle_t v_handle; static struct starpu_task* create_task(starpu_data_handle_t handle) { starpu_codelet_nop.nbuffers = 1; starpu_codelet_nop.modes[0] = STARPU_RW; struct starpu_task *task; task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->handles[0] = handle; task->detach = 0; return task; } int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_malloc((void **)&buffer, VECTORSIZE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)buffer, VECTORSIZE, sizeof(char)); struct starpu_data_filter f = { .filter_func = starpu_vector_filter_divide_in_2, /* there are only 2 children */ .nchildren = 2, /* the length of the first part */ .filter_arg = VECTORSIZE/2 }; unsigned iter; for (iter = 0; iter < NITER; iter++) { struct starpu_task *tasks[3]; starpu_data_map_filters(v_handle, 1, &f); tasks[0] = create_task(starpu_data_get_sub_data(v_handle, 1, 0)); ret = starpu_task_submit(tasks[0]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); tasks[1] = create_task(starpu_data_get_sub_data(v_handle, 1, 1)); ret = starpu_task_submit(tasks[1]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unpartition(v_handle, STARPU_MAIN_RAM); tasks[2] = create_task(v_handle); ret = starpu_task_submit(tasks[2]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_task_destroy(tasks[0]); starpu_task_destroy(tasks[1]); starpu_task_destroy(tasks[2]); } starpu_data_unregister(v_handle); starpu_free_noflag(buffer, VECTORSIZE); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); starpu_free_noflag(buffer, VECTORSIZE); starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/user_interaction_implicit.c000066400000000000000000000044431507764646700247240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test modifying the data in the callback of starpu_data_acquire_cb */ #ifdef STARPU_QUICK_CHECK # define NBUFFERS 4 # define NITER 16 #else # define NBUFFERS 16 # define NITER 128 #endif struct data { unsigned index; unsigned val; starpu_data_handle_t handle; }; struct data buffers[NBUFFERS]; static void callback_sync_data(void *arg) { struct data *data = (struct data *) arg; data->val++; starpu_data_release(data->handle); } int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned b; for (b = 0; b < NBUFFERS; b++) { buffers[b].index = b; starpu_variable_data_register(&buffers[b].handle, STARPU_MAIN_RAM, (uintptr_t)&buffers[b].val, sizeof(unsigned)); } unsigned iter; for (iter = 0; iter < NITER; iter++) { for (b = 0; b < NBUFFERS; b++) { ret = starpu_data_acquire_cb(buffers[b].handle, STARPU_RW, callback_sync_data, &buffers[b]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb"); } } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* do some cleanup */ ret = EXIT_SUCCESS; for (b = 0; b < NBUFFERS; b++) { starpu_data_unregister(buffers[b].handle); /* check result */ if (buffers[b].val != NITER) { FPRINTF(stderr, "buffer[%u] = %u should be %d\n", b, buffers[b].val, NITER); ret = EXIT_FAILURE; } } starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/tests/datawizard/variable_parameters.c000066400000000000000000000153471507764646700234720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test the variable interface */ static starpu_data_handle_t handle1, handle2, handle3, handle4; /* * Increment codelet */ #ifdef STARPU_USE_OPENCL /* dummy OpenCL implementation */ static void increment_opencl_kernel(void *descr[], void *cl_arg) { (void)cl_arg; int num = starpu_task_get_current()->nbuffers; int i; for (i = 0; i < num; i++) { cl_mem d_token = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[i]); unsigned h_token; cl_int err; cl_command_queue queue; starpu_opencl_get_current_queue(&queue); err = clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); h_token++; err = clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); } } #endif #ifdef STARPU_USE_CUDA static void increment_cuda_kernel(void *descr[], void *cl_arg) { (void)cl_arg; int num = starpu_task_get_current()->nbuffers; int i; for (i = 0; i < num; i++) { unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[i]); unsigned host_token; /* This is a dummy technique of course */ cudaMemcpyAsync(&host_token, tokenptr, sizeof(unsigned), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); host_token++; cudaMemcpyAsync(tokenptr, &host_token, sizeof(unsigned), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream()); } cudaStreamSynchronize(starpu_cuda_get_local_stream()); } #endif void increment_cpu_kernel(void *descr[], void *cl_arg) { (void)cl_arg; int num = starpu_task_get_current()->nbuffers; int i; for (i = 0; i < num; i++) { unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[i]); *tokenptr = *tokenptr + 1; } } static struct starpu_codelet increment_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {increment_cuda_kernel}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {increment_opencl_kernel}, #endif .cpu_funcs = {increment_cpu_kernel}, /* starpu_task_get_current() doesn't work on MPI Master Slave */ /*.cpu_funcs_name = {"increment_cpu_kernel"},*/ .nbuffers = STARPU_VARIABLE_NBUFFERS, }; int main(void) { unsigned *pvar = NULL; int ret; unsigned var1 = 0, var2 = 0, var3 = 0, var4 = 0; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle1, STARPU_MAIN_RAM, (uintptr_t)&var1, sizeof(unsigned)); starpu_variable_data_register(&handle2, STARPU_MAIN_RAM, (uintptr_t)&var2, sizeof(unsigned)); starpu_variable_data_register(&handle3, STARPU_MAIN_RAM, (uintptr_t)&var3, sizeof(unsigned)); starpu_variable_data_register(&handle4, STARPU_MAIN_RAM, (uintptr_t)&var4, sizeof(unsigned)); #ifdef STARPU_QUICK_CHECK unsigned nloops = 4; #else unsigned nloops = 16; #endif unsigned loop; unsigned t; for (loop = 0; loop < nloops; loop++) { for (t = 0; t <= 4; t++) { struct starpu_task *task = starpu_task_create(); unsigned i; task->cl = &increment_cl; task->handles[0] = handle1; task->handles[1] = handle2; task->handles[2] = handle3; task->handles[3] = handle4; for (i = 0; i < t; i++) task->modes[i] = STARPU_RW; task->nbuffers = t; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_insert(&increment_cl, STARPU_RW, handle1, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&increment_cl, STARPU_RW, handle1, STARPU_RW, handle2, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&increment_cl, STARPU_RW, handle1, STARPU_RW, handle2, STARPU_RW, handle3, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&increment_cl, STARPU_RW, handle1, STARPU_RW, handle2, STARPU_RW, handle3, STARPU_RW, handle4, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } ret = starpu_data_acquire(handle1, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); if (var1 != 8*nloops) { FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var1, 8*nloops); starpu_data_release(handle1); goto err; } starpu_data_release(handle1); ret = starpu_data_acquire(handle2, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); if (var2 != 6*nloops) { FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var2, 6*nloops); starpu_data_release(handle2); goto err; } starpu_data_release(handle2); ret = starpu_data_acquire(handle3, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); if (var3 != 4*nloops) { FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var3, 4*nloops); starpu_data_release(handle3); goto err; } starpu_data_release(handle3); ret = starpu_data_acquire(handle4, STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); if (var4 != 2*nloops) { FPRINTF(stderr, "[end of loop] Value %u != Expected value %u\n", var4, 2*nloops); starpu_data_release(handle4); goto err; } starpu_data_release(handle4); starpu_data_unregister(handle1); starpu_data_unregister(handle2); starpu_data_unregister(handle3); starpu_data_unregister(handle4); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(handle1); starpu_data_unregister(handle2); starpu_data_unregister(handle3); starpu_data_unregister(handle4); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; err: starpu_data_unregister(handle1); starpu_data_unregister(handle2); starpu_data_unregister(handle3); starpu_data_unregister(handle4); starpu_shutdown(); return EXIT_FAILURE; } starpu-1.4.9+dfsg/tests/datawizard/variable_size.c000066400000000000000000000255151507764646700222770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * This is a dumb test for variable size * We defined a dumb interface for data whose size increase over kernel execution */ #ifdef STARPU_HAVE_MEMCHECK_H #include #else #define VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(addr, size) (void)0 #endif #include #define FULLSIZE (5*1024*1024ULL) #define INCREASE 0.80 #ifdef STARPU_QUICK_CHECK #define N 5 #define LIMIT "60" #else #define N 10 #define LIMIT "250" #endif /* Define the interface */ #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #elif STARPU_MAXNODES == 1 /* Cannot register a disk */ int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else /* Sample Data interface with variable size */ struct variable_size_interface { enum starpu_data_interface_id id; /* Just a buffer of a given size */ uintptr_t ptr; size_t size; /* Coordinates of the represented object, just for modeling growth */ unsigned x, y; }; static struct starpu_data_interface_ops starpu_interface_variable_size_ops; static void register_variable_size(starpu_data_handle_t handle, int home_node, void *data_interface) { struct variable_size_interface *variable_size_interface = data_interface; int node; for (node = 0; node < STARPU_MAXNODES; node++) { struct variable_size_interface *local_interface = starpu_data_get_interface_on_node(handle, node); if (node == home_node) local_interface->ptr = variable_size_interface->ptr; local_interface->size = variable_size_interface->size; local_interface->id = variable_size_interface->id; local_interface->x = variable_size_interface->x; local_interface->y = variable_size_interface->y; } } void variable_size_data_register(starpu_data_handle_t *handleptr, unsigned x, unsigned y) { struct variable_size_interface vsinterface = { .id = starpu_interface_variable_size_ops.interfaceid, .x = x, .y = y, }; /* Simulate that tiles close to the diagonal are more dense */ vsinterface.size = FULLSIZE * (starpu_lrand48() % 1024 + 1024) / 2048. * (N-sqrt(abs((int)x-(int)y)*N)) / N; /* Round to page size */ vsinterface.size -= vsinterface.size & (65536-1); _starpu_simgrid_data_new(vsinterface.size); starpu_data_register(handleptr, -1, &vsinterface, &starpu_interface_variable_size_ops); } static size_t variable_size_get_size(starpu_data_handle_t handle) { struct variable_size_interface *vsinterface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return vsinterface->size; } static size_t variable_size_get_max_size(starpu_data_handle_t handle) { (void)handle; return FULLSIZE; } static uint32_t variable_size_footprint(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(variable_size_get_size(handle), 0); } static int variable_size_compare(void *data_interface_a, void *data_interface_b) { struct variable_size_interface *variable_a = data_interface_a; struct variable_size_interface *variable_b = data_interface_b; /* Two variables are considered compatible if they have the same size */ return variable_a->size == variable_b->size; } static void display_variable_size(starpu_data_handle_t handle, FILE *f) { struct variable_size_interface *variable_interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%lu\t", (unsigned long) variable_interface->size); } static starpu_ssize_t describe_variable_size(void *data_interface, char *buf, size_t size) { struct variable_size_interface *variable_interface = data_interface; return snprintf(buf, size, "vv%lu\t", (unsigned long) variable_interface->size); } /* returns the size of the allocated area */ static starpu_ssize_t allocate_variable_size_on_node(void *data_interface, unsigned dst_node) { struct variable_size_interface *variable_interface = data_interface; variable_interface->ptr = starpu_malloc_on_node_flags(dst_node, variable_interface->size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); if (dst_node == STARPU_MAIN_RAM) _starpu_simgrid_data_alloc(variable_interface->size); STARPU_ASSERT(variable_interface->ptr); return 0; } static void free_variable_size_on_node(void *data_interface, unsigned node) { struct variable_size_interface *variable_interface = data_interface; starpu_free_on_node(node, variable_interface->ptr, variable_interface->size); if (node == STARPU_MAIN_RAM) _starpu_simgrid_data_free(variable_interface->size); variable_interface->ptr = 0; } static int variable_size_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { struct variable_size_interface *src = src_interface; struct variable_size_interface *dst = dst_interface; if (src->size != dst->size) { /* size has been changed by the application in the meantime */ starpu_free_on_node(dst_node, dst->ptr, dst->size); dst->ptr = starpu_malloc_on_node_flags(dst_node, src->size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); dst->size = src->size; } return starpu_interface_copy(src->ptr, 0, src_node, dst->ptr, 0, dst_node, src->size, async_data); } static const struct starpu_data_copy_methods variable_size_copy_data_methods = { .any_to_any = variable_size_copy, }; static struct starpu_data_interface_ops starpu_interface_variable_size_ops = { .register_data_handle = register_variable_size, .allocate_data_on_node = allocate_variable_size_on_node, .free_data_on_node = free_variable_size_on_node, .copy_methods = &variable_size_copy_data_methods, .get_size = variable_size_get_size, .get_max_size = variable_size_get_max_size, .footprint = variable_size_footprint, .compare = variable_size_compare, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct variable_size_interface), .display = display_variable_size, .pack_data = NULL, .peek_data = NULL, .unpack_data = NULL, .describe = describe_variable_size, /* We want to observe actual allocations/deallocations */ .dontcache = 1, }; static void kernel(void *descr[], void *cl_arg) { struct variable_size_interface *variable_interface = descr[0]; unsigned workerid = starpu_worker_get_id_check(); uintptr_t old = variable_interface->ptr; unsigned dst_node = starpu_worker_get_memory_node(workerid); (void) cl_arg; /* Simulate that tiles close to the diagonal fill up faster */ size_t increase = (FULLSIZE - variable_interface->size) * (starpu_lrand48() % 1024 + 1024) / 2048. * INCREASE; /* Round to page size */ increase -= increase & (65536-1); /* Allocation increase */ variable_interface->ptr = starpu_malloc_on_node_flags(dst_node, variable_interface->size + increase, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE((void*) variable_interface->ptr, variable_interface->size + increase); STARPU_ASSERT(variable_interface->ptr); /* fprintf(stderr,"increase from %lu by %lu\n", variable_interface->size, increase); */ starpu_free_on_node_flags(dst_node, old, variable_interface->size, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT | STARPU_MEMORY_OVERFLOW); variable_interface->size += increase; /* These are only simulation bits */ if (increase) _starpu_simgrid_data_increase(increase); starpu_sleep(0.010); } static double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void)t; (void)a; (void)i; return 10000; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl = { .cpu_funcs = {kernel}, /* dynamic size doesn't work on MIC */ /*.cpu_funcs_name = {"kernel"},*/ .nbuffers = 1, .modes = {STARPU_RW}, .model = &perf_model, .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; static void init(void *descr[], void *cl_arg) { (void)cl_arg; struct variable_size_interface *variable_interface = descr[0]; VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE((void*) variable_interface->ptr, variable_interface->size); } static struct starpu_codelet cl_init = { .cpu_funcs = {init}, /* dynamic size doesn't work on MIC */ /*.cpu_funcs_name = {"kernel"},*/ .nbuffers = 1, .modes = {STARPU_W}, .model = &starpu_perfmodel_nop, }; int main(void) { int ret; int i; int x, y; starpu_data_handle_t handles[N][N]; char s[128]; snprintf(s, sizeof(s), "/tmp/%s-variable_size", getenv("USER")); setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); setenv("STARPU_LIMIT_CPU_MEM", LIMIT, 1); setenv("STARPU_DISK_SWAP", s, 0); setenv("STARPU_DISK_SWAP_SIZE", "100000", 1); #if 0 //def STARPU_LINUX_SYS setenv("STARPU_DISK_SWAP_BACKEND", "unistd_o_direct", 0); #else setenv("STARPU_DISK_SWAP_BACKEND", "unistd", 0); #endif struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); for (x = 0; x < N; x++) for (y = 0; y < N; y++) { variable_size_data_register(&handles[x][y], x, y); ret = starpu_task_insert(&cl_init, STARPU_W, handles[x][y], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); #ifdef STARPU_SIMGRID starpu_sleep(0.0005); #endif } starpu_task_wait_for_all(); /* Cholesky-like accesses */ for (i = 0; i < N; i++) for (x = i; x < N; x++) for (y = x; y < N; y++) { ret = starpu_task_insert(&cl, STARPU_RW, handles[x][y], STARPU_PRIORITY, (2*N-x-y), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_task_wait_for_all(); #if 0 /* Look at the values */ for (x = 0; x < N; x++) for (y = 0; y < N; y++) { starpu_data_acquire(handles[x][y], STARPU_R); starpu_data_release(handles[x][y]); } #endif for (x = 0; x < N; x++) for (y = 0; y < N; y++) starpu_data_unregister(handles[x][y]); starpu_shutdown(); return EXIT_SUCCESS; enodev: for (x = 0; x < N; x++) for (y = 0; y < N; y++) starpu_data_unregister(handles[x][y]); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } #endif starpu-1.4.9+dfsg/tests/datawizard/write_only_tmp_buffer.c000066400000000000000000000077661507764646700240740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test initializing a buffer with a task, then printing it with another task */ #define VECTORSIZE 1024 starpu_data_handle_t v_handle; #ifdef STARPU_USE_OPENCL static void opencl_codelet_null(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; cl_mem buf = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(descr[0]); char ptr = 42; cl_command_queue queue; cl_int err; int id = starpu_worker_get_id_check(); int devid = starpu_worker_get_devid(id); starpu_opencl_get_queue(devid, &queue); err = clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, sizeof(char), &ptr, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); } #endif #ifdef STARPU_USE_CUDA static void cuda_codelet_null(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); cudaMemsetAsync(buf, 42, 1, starpu_cuda_get_local_stream()); } #endif void cpu_codelet_null(void *descr[], void *arg) { (void)arg; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); *buf = 42; } void display_var(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]); if (*buf != 42) { FPRINTF(stderr, "Value = <%c> (should be <%c>)\n", *buf, 42); exit(-1); } } static struct starpu_codelet cl = { .cpu_funcs = {cpu_codelet_null}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_codelet_null}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {opencl_codelet_null}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"cpu_codelet_null"}, .nbuffers = 1, .modes = {STARPU_W} }; static struct starpu_codelet display_cl = { .cpu_funcs = {display_var}, .cpu_funcs_name = {"display_var"}, .nbuffers = 1, .modes = {STARPU_R} }; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* The buffer should never be explicitly allocated */ starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char)); struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = v_handle; task->detach = 0; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); task = starpu_task_create(); task->cl = &display_cl; task->handles[0] = v_handle; task->detach = 0; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); /* this should get rid of automatically allocated buffers */ starpu_data_unregister(v_handle); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/wt_broadcast.c000066400000000000000000000051201507764646700221220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../variable/increment.h" /* * Test using starpu_data_set_wt_mask(handle, ~0);, i.e. broadcasting the * result on all devices as soon as it is available. */ static unsigned var = 0; static starpu_data_handle_t handle; int main(void) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.ncuda = -1; conf.nopencl = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); /* Create a mask with all the memory nodes, so that we can ask StarPU * to broadcast the handle whenever it is modified. */ starpu_data_set_wt_mask(handle, ~0); #ifdef STARPU_QUICK_CHECK unsigned ntasks = 32; unsigned nloops = 4; #else unsigned ntasks = 1024; unsigned nloops = 16; #endif unsigned loop; unsigned t; for (loop = 0; loop < nloops; loop++) { for (t = 0; t < ntasks; t++) { struct starpu_task *task = starpu_task_create(); task->cl = &increment_cl; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } starpu_data_unregister(handle); ret = EXIT_SUCCESS; if (var != ntasks*nloops) { FPRINTF(stderr, "VAR is %u should be %u\n", var, ntasks); ret = EXIT_FAILURE; } increment_unload_opencl(); starpu_shutdown(); return ret; enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/datawizard/wt_host.c000066400000000000000000000044731507764646700211470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../variable/increment.h" /* * Test writing back the result into main memory as soon as it is available */ static unsigned var = 0; static starpu_data_handle_t handle; int main(void) { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(unsigned)); /* Copy the handle in main memory every time it is modified */ uint32_t wt_mask = (1<cl = &increment_cl; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } starpu_data_unregister(handle); ret = EXIT_SUCCESS; if (var != ntasks*nloops) { ret = EXIT_FAILURE; FPRINTF(stderr, "VAR is %u should be %u\n", var, ntasks); } increment_unload_opencl(); starpu_shutdown(); STARPU_RETURN(ret); enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/disk/000077500000000000000000000000001507764646700161045ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/disk/disk_compute.c000066400000000000000000000263571507764646700207530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include "../helper.h" #ifdef STARPU_HAVE_HDF5 #include #endif #if STARPU_MAXNODES == 1 /* Cannot register a disk */ int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else /* * Try to write into disk memory * Use mechanism to push data from main ram to disk ram * Here we just simulate performing a dumb computation C=A+0, i.e. a mere copy * actually */ #define NX (16*1024) int dotest(struct starpu_disk_ops *ops, char *base) { int *A, *C; /* Initialize StarPU with default configuration */ /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ // Ignore environment variables as we want to force the exact number of workers struct starpu_conf conf; int ret = starpu_conf_init(&conf); if (ret == -EINVAL) return EXIT_FAILURE; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = 1; conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (ret == -ENODEV) goto enodev; /* Initialize path and name */ const char *name_file_start = "STARPU_DISK_COMPUTE_DATA_"; const char *name_file_end = "STARPU_DISK_COMPUTE_DATA_RESULT_"; char * path_file_start = malloc(strlen(base) + 1 + strlen(name_file_start) + 1); strcpy(path_file_start, base); strcat(path_file_start, "/"); strcat(path_file_start, name_file_start); char * path_file_end = malloc(strlen(base) + 1 + strlen(name_file_end) + 1); strcpy(path_file_end, base); strcat(path_file_end, "/"); strcat(path_file_end, name_file_end); /* register a disk */ int new_dd = starpu_disk_register(ops, (void *) base, STARPU_DISK_SIZE_MIN); /* can't write on /tmp/ */ if (new_dd == -ENOENT) goto enoent; unsigned dd = (unsigned) new_dd; /* allocate two memory spaces */ starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); FPRINTF(stderr, "TEST DISK MEMORY \n"); unsigned int j; /* you register them in a vector */ for(j = 0; j < NX; ++j) { A[j] = j; C[j] = 0; } /* you create a file to store the vector ON the disk */ FILE * f = fopen(path_file_start, "wb+"); if (f == NULL) goto enoent2; /* store it in the file */ fwrite(A, sizeof(int), NX, f); /* close the file */ fclose(f); int descriptor = open(path_file_start, O_RDWR); if (descriptor < 0) goto enoent2; #ifdef STARPU_HAVE_WINDOWS _commit(descriptor); #else fsync(descriptor); #endif close(descriptor); /* create a file to store result */ f = fopen(path_file_end, "wb+"); if (f == NULL) goto enoent2; /* replace all data by 0 */ fwrite(C, sizeof(int), NX, f); /* close the file */ fclose(f); descriptor = open(path_file_end, O_RDWR); #ifdef STARPU_HAVE_WINDOWS _commit(descriptor); #else fsync(descriptor); #endif close(descriptor); /* And now, you want to use your data in StarPU */ /* Open the file ON the disk */ void * data = starpu_disk_open(dd, (void *) name_file_start, NX*sizeof(int)); void * data_result = starpu_disk_open(dd, (void *) name_file_end, NX*sizeof(int)); starpu_data_handle_t vector_handleA, vector_handleC; /* register vector in starpu */ starpu_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int)); /* and do what you want with it, here we copy it into an other vector */ starpu_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int)); starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL); /* free them */ starpu_data_unregister(vector_handleA); starpu_data_unregister(vector_handleC); /* close them in StarPU */ starpu_disk_close(dd, data, NX*sizeof(int)); starpu_disk_close(dd, data_result, NX*sizeof(int)); /* check results */ f = fopen(path_file_end, "rb+"); if (f == NULL) goto enoent2; /* take data */ size_t read = fread(C, sizeof(int), NX, f); STARPU_ASSERT(read == NX); /* close the file */ fclose(f); int try = 1; for (j = 0; j < NX; ++j) if (A[j] != C[j]) { FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); try = 0; } starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); unlink(path_file_start); unlink(path_file_end); free(path_file_start); free(path_file_end); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); if(try) FPRINTF(stderr, "TEST SUCCESS\n"); else FPRINTF(stderr, "TEST FAIL\n"); return try ? EXIT_SUCCESS : EXIT_FAILURE; enodev: return STARPU_TEST_SKIPPED; enoent2: starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); enoent: unlink(path_file_start); unlink(path_file_end); free(path_file_start); free(path_file_end); FPRINTF(stderr, "Couldn't write data: ENOENT\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } #ifdef STARPU_HAVE_HDF5 int dotest_hdf5(struct starpu_disk_ops *ops, char *base) { int *A, *C; herr_t status; /* Open and close file, just to create an empty file */ FILE * f = fopen(base, "wb+"); if (!f) goto h5fail2; fclose(f); /* Initialize StarPU with default configuration */ int ret = starpu_init(NULL); if (ret == -ENODEV) goto h5enodev; /* Initialize path */ const char *path_obj_start = "STARPU_DISK_COMPUTE_DATA_"; const char *path_obj_end = "STARPU_DISK_COMPUTE_DATA_RESULT_"; /* register a disk */ int new_dd = starpu_disk_register(ops, (void *) base, STARPU_DISK_SIZE_MIN); /* can't write on /tmp/ */ if (new_dd == -ENOENT) goto h5enoent; unsigned dd = (unsigned) new_dd; /* allocate two memory spaces */ starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); FPRINTF(stderr, "TEST DISK MEMORY \n"); unsigned int j; /* you register them in a vector */ for(j = 0; j < NX; ++j) { A[j] = j; C[j] = 0; } /* Open HDF5 file to store data */ hid_t file = H5Fopen(base, H5F_ACC_RDWR, H5P_DEFAULT); if (file < 0) goto h5enoent2; /* store initial data in the file */ hsize_t dims[1] = {NX}; hid_t dataspace = H5Screate_simple(1, dims, NULL); if (dataspace < 0) { H5Fclose(file); goto h5fail; } hid_t dataset = H5Dcreate2(file, path_obj_start, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (dataset < 0) { H5Sclose(dataspace); H5Fclose(file); goto h5fail; } status = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, A); /* close the resources before checking the writing */ H5Dclose(dataset); if (status < 0) { H5Fclose(file); goto h5fail; } /* initialize results in file */ dataset = H5Dcreate2(file, path_obj_end, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (dataset < 0) { H5Sclose(dataspace); H5Fclose(file); goto h5fail; } status = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, A); /* close the resources before checking the writing */ H5Dclose(dataset); H5Sclose(dataspace); H5Fclose(file); if (status < 0) goto h5fail; /* And now, you want to use your data in StarPU */ /* Open the file ON the disk */ void * data = starpu_disk_open(dd, (void *) path_obj_start, NX*sizeof(int)); void * data_result = starpu_disk_open(dd, (void *) path_obj_end, NX*sizeof(int)); starpu_data_handle_t vector_handleA, vector_handleC; /* register vector in starpu */ starpu_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int)); /* and do what you want with it, here we copy it into an other vector */ starpu_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int)); starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL); /* free them */ starpu_data_unregister(vector_handleA); starpu_data_unregister(vector_handleC); /* close them in StarPU */ starpu_disk_close(dd, data, NX*sizeof(int)); starpu_disk_close(dd, data_result, NX*sizeof(int)); /* check results */ file = H5Fopen(base, H5F_ACC_RDWR, H5P_DEFAULT); if (file < 0) goto h5enoent2; dataset = H5Dopen2(file, path_obj_end, H5P_DEFAULT); if (dataset < 0) { H5Fclose(file); goto h5fail; } status = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, C); /* close the resources before checking the writing */ H5Dclose(dataset); H5Fclose(file); if (status < 0) goto h5fail; int try = 1; for (j = 0; j < NX; ++j) if (A[j] != C[j]) { FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); try = 0; } starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); unlink(base); if(try) FPRINTF(stderr, "TEST SUCCESS\n"); else FPRINTF(stderr, "TEST FAIL\n"); return (try ? EXIT_SUCCESS : EXIT_FAILURE); h5enodev: unlink(base); return STARPU_TEST_SKIPPED; h5enoent2: starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); h5enoent: FPRINTF(stderr, "Couldn't write data: ENOENT\n"); starpu_shutdown(); unlink(base); return STARPU_TEST_SKIPPED; h5fail: starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_shutdown(); unlink(base); h5fail2: FPRINTF(stderr, "Something goes wrong with HDF5 dataset/dataspace/write \n"); return EXIT_FAILURE; } #endif static int merge_result(int old, int new) { if (new == EXIT_FAILURE) return EXIT_FAILURE; if (old == 0) return 0; return new; } int main(void) { int ret = 0; int ret2; char s[128]; char *ptr; #ifdef STARPU_HAVE_SETENV setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); #endif snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); ptr = _starpu_mkdtemp(s); if (!ptr) { FPRINTF(stderr, "Cannot make directory '%s'\n", s); return STARPU_TEST_SKIPPED; } ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); #ifdef STARPU_LINUX_SYS if ((NX * sizeof(int)) % getpagesize() == 0) { ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); } else { ret = merge_result(ret, STARPU_TEST_SKIPPED); } #endif #ifdef STARPU_HAVE_HDF5 char hdf5_base[128]; strcpy(hdf5_base, s); strcat(hdf5_base, "/STARPU_HDF5_file.h5"); ret = merge_result(ret, dotest_hdf5(&starpu_disk_hdf5_ops, hdf5_base)); #endif ret2 = rmdir(s); if (ret2 < 0) STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); return ret; } #endif starpu-1.4.9+dfsg/tests/disk/disk_copy.c000066400000000000000000000132321507764646700202350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Try to write into disk memory * Use mechanism to push data from main ram to disk ram * Here we make copies between buffers, that StarPU has to evict while * progressing because there is not enough room for all of them. */ /* RAM is not enough to hold 6 times NX * DISK is just enough to hold 6 times NX */ /* size of one vector */ #ifdef STARPU_QUICK_CHECK # define RAM "1" # define DISK 64 # define NX (256*1024/sizeof(double)) #else # define NX (32*1048576/sizeof(double)) # define RAM "160" # define DISK 200 #endif #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #elif STARPU_MAXNODES == 1 /* Cannot register a disk */ int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else int dotest(struct starpu_disk_ops *ops, void *param) { double *A,*F; int ret; /* limit main ram to force to push in disk */ setenv("STARPU_LIMIT_CPU_NUMA_MEM", RAM, 1); /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ // Ignore environment variables as we want to force the exact number of workers struct starpu_conf conf; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return EXIT_FAILURE; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = 1; conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (ret == -ENODEV) goto enodev; /* register a disk */ int new_dd = starpu_disk_register(ops, param, 1024*1024*DISK); /* can't write on /tmp/ */ if (new_dd == -ENOENT) goto enoent; /* allocate two memory spaces */ starpu_malloc_flags((void **)&A, NX*sizeof(double), STARPU_MALLOC_COUNT); starpu_malloc_flags((void **)&F, NX*sizeof(double), STARPU_MALLOC_COUNT); FPRINTF(stderr, "TEST DISK MEMORY \n"); unsigned int j; /* initialization with bad values */ for(j = 0; j < NX; ++j) { A[j] = j; F[j] = -j; } starpu_data_handle_t vector_handleA, vector_handleB, vector_handleC, vector_handleD, vector_handleE, vector_handleF; /* register vector in starpu */ starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(double)); starpu_vector_data_register(&vector_handleB, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleC, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleD, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleE, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleF, STARPU_MAIN_RAM, (uintptr_t)F, NX, sizeof(double)); /* copy vector A->B, B->C... */ starpu_data_cpy(vector_handleB, vector_handleA, 0, NULL, NULL); starpu_data_cpy(vector_handleC, vector_handleB, 0, NULL, NULL); starpu_data_cpy(vector_handleD, vector_handleC, 0, NULL, NULL); starpu_data_cpy(vector_handleE, vector_handleD, 0, NULL, NULL); starpu_data_cpy(vector_handleF, vector_handleE, 0, NULL, NULL); /* StarPU does not need to manipulate the array anymore so we can stop * monitoring it */ /* free them */ starpu_data_unregister(vector_handleA); starpu_data_unregister(vector_handleB); starpu_data_unregister(vector_handleC); starpu_data_unregister(vector_handleD); starpu_data_unregister(vector_handleE); starpu_data_unregister(vector_handleF); /* check if computation is correct */ int try = 1; for (j = 0; j < NX; ++j) if (A[j] != F[j]) { FPRINTF(stderr, "Fail A %f != F %f \n", A[j], F[j]); try = 0; } starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT); starpu_free_flags(F, NX*sizeof(double), STARPU_MALLOC_COUNT); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); if(try) FPRINTF(stderr, "TEST SUCCESS\n"); else FPRINTF(stderr, "TEST FAIL\n"); return try ? EXIT_SUCCESS : EXIT_FAILURE; enodev: return STARPU_TEST_SKIPPED; enoent: FPRINTF(stderr, "Couldn't write data: ENOENT\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } static int merge_result(int old, int new) { if (new == EXIT_FAILURE) return EXIT_FAILURE; if (old == 0) return 0; return new; } int main(void) { int ret = 0; int ret2; char s[128]; char *ptr; #ifdef STARPU_HAVE_SETENV setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); #endif snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); ptr = _starpu_mkdtemp(s); if (!ptr) { FPRINTF(stderr, "Cannot make directory <%s>\n", s); return STARPU_TEST_SKIPPED; } ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); #ifdef STARPU_LINUX_SYS ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); #endif #ifdef STARPU_HAVE_HDF5 ret = merge_result(ret, dotest(&starpu_disk_hdf5_ops, s)); #endif ret2 = rmdir(s); if (ret2 < 0) STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); return ret; } #endif starpu-1.4.9+dfsg/tests/disk/disk_copy_to_disk.c000066400000000000000000000246131507764646700217560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include "../helper.h" #ifdef STARPU_HAVE_HDF5 #include #endif #if STARPU_MAXNODES == 1 /* Cannot register a disk */ int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else /* * Try to write into disk memory * Use mechanism to push data from disk to disk. */ #define NX (16*1024) int dotest(struct starpu_disk_ops *ops, char *base) { int *A, *C; /* Initialize StarPU with default configuration */ int ret = starpu_init(NULL); if (ret == -ENODEV) goto enodev; /* Initialize path and name */ const char *name_file_start = "STARPU_DISK_COMPUTE_DATA"; const char *name_dir_src = "src"; const char *name_dir_dst = "dst"; char * path_file_start = malloc(strlen(base) + 1 + strlen(name_dir_src) + 1 + strlen(name_file_start) + 1); strcpy(path_file_start, base); strcat(path_file_start, "/"); strcat(path_file_start, name_dir_src); strcat(path_file_start, "/"); strcat(path_file_start, name_file_start); char * base_src = malloc(strlen(base) + 1 + strlen(name_dir_src) + 1); strcpy(base_src, base); strcat(base_src, "/"); strcat(base_src, name_dir_src); char * base_dst = malloc(strlen(base) + 1 + strlen(name_dir_dst) + 1); strcpy(base_dst, base); strcat(base_dst, "/"); strcat(base_dst, name_dir_dst); /* register a disks */ int disk_src = starpu_disk_register(ops, (void *) base_src, STARPU_DISK_SIZE_MIN); if (disk_src == -ENOENT) goto enoent; int disk_dst = starpu_disk_register(ops, (void *) base_dst, STARPU_DISK_SIZE_MIN); if (disk_dst == -ENOENT) goto enoent; /* allocate two memory spaces */ starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); FPRINTF(stderr, "TEST DISK MEMORY \n"); unsigned int j; /* you register them in a vector */ for(j = 0; j < NX; ++j) { A[j] = j; C[j] = 0; } /* you create a file to store the vector ON the disk */ FILE * f = fopen(path_file_start, "wb+"); if (f == NULL) goto enoent2; /* store it in the file */ fwrite(A, sizeof(int), NX, f); /* close the file */ fclose(f); int descriptor = open(path_file_start, O_RDWR); if (descriptor < 0) goto enoent2; #ifdef STARPU_HAVE_WINDOWS _commit(descriptor); #else fsync(descriptor); #endif close(descriptor); /* And now, you want to use your data in StarPU */ /* Open the file ON the disk */ void * data = starpu_disk_open(disk_src, (void *) name_file_start, NX*sizeof(int)); STARPU_ASSERT(data); starpu_data_handle_t vector_handleA; starpu_vector_data_register(&vector_handleA, disk_src, (uintptr_t) data, NX, sizeof(int)); /* Move and invalidate copy to an other disk */ starpu_data_acquire_on_node(vector_handleA, disk_dst, STARPU_RW); starpu_data_release_on_node(vector_handleA, disk_dst); starpu_data_acquire_on_node(vector_handleA, disk_src, STARPU_RW); starpu_data_release_on_node(vector_handleA, disk_src); /* free them */ starpu_data_unregister(vector_handleA); /* close them in StarPU */ starpu_disk_close(disk_src, data, NX*sizeof(int)); /* check results */ f = fopen(path_file_start, "rb+"); if (f == NULL) goto enoent2; /* take data */ size_t read = fread(C, sizeof(int), NX, f); STARPU_ASSERT(read == NX); /* close the file */ fclose(f); int try = 1; for (j = 0; j < NX; ++j) if (A[j] != C[j]) { FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); try = 0; } starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); unlink(path_file_start); rmdir(base_src); free(base_src); free(base_dst); free(path_file_start); if(try) FPRINTF(stderr, "TEST SUCCESS\n"); else FPRINTF(stderr, "TEST FAIL\n"); return try ? EXIT_SUCCESS : EXIT_FAILURE; enodev: return STARPU_TEST_SKIPPED; enoent2: starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); enoent: free(base_src); free(base_dst); free(path_file_start); FPRINTF(stderr, "Couldn't write data: ENOENT\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } #ifdef STARPU_HAVE_HDF5 int dotest_hdf5(struct starpu_disk_ops *ops, char *base) { int *A, *C; herr_t status; /* Initialize path */ const char *path_obj_start = "STARPU_DISK_COMPUTE_DATA"; const char *name_hdf5_start = "STARPU_HDF5_src_file.h5"; const char *name_hdf5_end = "STARPU_HDF5_dst_file.h5"; char * hdf5_base_src = malloc(strlen(base) + 1 + strlen(name_hdf5_start) + 1); strcpy(hdf5_base_src, base); strcat(hdf5_base_src, "/"); strcat(hdf5_base_src, name_hdf5_start); char * hdf5_base_dst = malloc(strlen(base) + 1 + strlen(name_hdf5_end) + 1); strcpy(hdf5_base_dst, base); strcat(hdf5_base_dst, "/"); strcat(hdf5_base_dst, name_hdf5_end); /* Open and close files, just to create empty files */ FILE * file_src = fopen(hdf5_base_src, "wb+"); if (!file_src) goto h5fail2; fclose(file_src); FILE * file_dst = fopen(hdf5_base_dst, "wb+"); if (!file_dst) { goto h5fail2; } fclose(file_dst); /* Initialize StarPU with default configuration */ int ret = starpu_init(NULL); if (ret == -ENODEV) goto h5enodev; /* register disks */ int disk_src = starpu_disk_register(ops, (void *) hdf5_base_src, STARPU_DISK_SIZE_MIN); if (disk_src == -ENOENT) goto h5enoent; int disk_dst = starpu_disk_register(ops, (void *) hdf5_base_dst, STARPU_DISK_SIZE_MIN); if (disk_dst == -ENOENT) goto h5enoent; /* allocate two memory spaces */ starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); FPRINTF(stderr, "TEST DISK MEMORY \n"); unsigned int j; /* you register them in a vector */ for(j = 0; j < NX; ++j) { A[j] = j; C[j] = 0; } /* Open HDF5 file to store data */ hid_t file = H5Fopen(hdf5_base_src, H5F_ACC_RDWR, H5P_DEFAULT); if (file < 0) goto h5fail; /* store initial data in the file */ hsize_t dims[1] = {NX}; hid_t dataspace = H5Screate_simple(1, dims, NULL); if (dataspace < 0) { H5Fclose(file); goto h5fail; } hid_t dataset = H5Dcreate2(file, path_obj_start, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (dataset < 0) { H5Sclose(dataspace); H5Fclose(file); goto h5fail; } status = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, A); /* close the resources before checking the writing */ H5Dclose(dataset); if (status < 0) { H5Fclose(file); goto h5fail; } H5Sclose(dataspace); H5Fclose(file); /* Open the file ON the disk */ void * data = starpu_disk_open(disk_src, (void *) path_obj_start, NX*sizeof(int)); starpu_data_handle_t vector_handleA; starpu_vector_data_register(&vector_handleA, disk_src, (uintptr_t) data, NX, sizeof(int)); /* Move and invalidate copy to an other disk */ starpu_data_acquire_on_node(vector_handleA, disk_dst, STARPU_RW); starpu_data_release_on_node(vector_handleA, disk_dst); starpu_data_acquire_on_node(vector_handleA, disk_src, STARPU_RW); starpu_data_release_on_node(vector_handleA, disk_src); starpu_data_unregister(vector_handleA); /* close them in StarPU */ starpu_disk_close(disk_src, data, NX*sizeof(int)); /* check results */ file = H5Fopen(hdf5_base_src, H5F_ACC_RDWR, H5P_DEFAULT); if (file < 0) goto h5fail; dataset = H5Dopen2(file, path_obj_start, H5P_DEFAULT); if (dataset < 0) { H5Fclose(file); goto h5fail; } status = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, C); /* close the resources before checking the writing */ H5Dclose(dataset); H5Fclose(file); if (status < 0) goto h5fail; int try = 1; for (j = 0; j < NX; ++j) if (A[j] != C[j]) { FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); try = 0; } starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); unlink(hdf5_base_src); unlink(hdf5_base_dst); free(hdf5_base_src); free(hdf5_base_dst); if(try) FPRINTF(stderr, "TEST SUCCESS\n"); else FPRINTF(stderr, "TEST FAIL\n"); return (try ? EXIT_SUCCESS : EXIT_FAILURE); h5fail: starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); h5enoent: FPRINTF(stderr, "Couldn't write data: ENOENT\n"); starpu_shutdown(); h5enodev: unlink(hdf5_base_src); unlink(hdf5_base_dst); free(hdf5_base_src); free(hdf5_base_dst); return STARPU_TEST_SKIPPED; h5fail2: free(hdf5_base_src); free(hdf5_base_dst); FPRINTF(stderr, "Something goes wrong with HDF5 dataset/dataspace/write \n"); return EXIT_FAILURE; } #endif static int merge_result(int old, int new) { if (new == EXIT_FAILURE) return EXIT_FAILURE; if (old == 0) return 0; return new; } int main(void) { int ret = 0; int ret2; char s[128]; char *ptr; #ifdef STARPU_HAVE_SETENV setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); #endif snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); ptr = _starpu_mkdtemp(s); if (!ptr) { FPRINTF(stderr, "Cannot make directory '%s'\n", s); return STARPU_TEST_SKIPPED; } ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); #ifdef STARPU_LINUX_SYS if ((NX * sizeof(int)) % getpagesize() == 0) { ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); } else { ret = merge_result(ret, STARPU_TEST_SKIPPED); } #endif #ifdef STARPU_HAVE_HDF5 ret = merge_result(ret, dotest_hdf5(&starpu_disk_hdf5_ops, s)); #endif ret2 = rmdir(s); if (ret2 < 0) STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); return ret; } #endif starpu-1.4.9+dfsg/tests/disk/disk_copy_unpack.c000066400000000000000000000106351507764646700216020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test pack / unpack methods before pushing data on disk with async read/write. */ /* size of one vector */ #ifdef STARPU_QUICK_CHECK # define DISK 64 # define NX (256*1024/sizeof(double)) #else # define NX (32*1048576/sizeof(double)) # define DISK 200 #endif #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #elif STARPU_MAXNODES == 1 /* Cannot register a disk */ int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else int dotest(struct starpu_disk_ops *ops, void *param) { unsigned *A; int ret; /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ // Ignore environment variables as we want to force the exact number of workers struct starpu_conf conf; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return EXIT_FAILURE; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) goto enodev; /* register a disk */ int new_dd = starpu_disk_register(ops, param, 1024*1024*DISK); /* can't write on /tmp/ */ if (new_dd == -ENOENT) goto enoent; /* allocate two memory spaces */ starpu_malloc_flags((void **)&A, NX*sizeof(unsigned), STARPU_MALLOC_COUNT); FPRINTF(stderr, "TEST DISK MEMORY \n"); unsigned int j; /* initialization with bad values */ for(j = 0; j < NX; ++j) { A[j] = j; } starpu_data_handle_t vector_handleA; static const struct starpu_data_copy_methods my_vector_copy_data_methods_s = { .any_to_any = NULL, }; starpu_interface_vector_ops.copy_methods = &my_vector_copy_data_methods_s; /* register vector in starpu */ starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(unsigned)); /* Move and invalidate copy to an other disk */ starpu_data_acquire_on_node(vector_handleA, new_dd, STARPU_RW); starpu_data_release_on_node(vector_handleA, new_dd); starpu_data_acquire_on_node(vector_handleA, new_dd, STARPU_RW); starpu_data_release_on_node(vector_handleA, new_dd); /* free them */ starpu_data_unregister(vector_handleA); /* check if computation is correct */ int try = 1; for (j = 0; j < NX; ++j) if (A[j] != j) { FPRINTF(stderr, "Fail A %u != %u \n", A[j], j); try = 0; } starpu_free_flags(A, NX*sizeof(unsigned), STARPU_MALLOC_COUNT); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); if(try) FPRINTF(stderr, "TEST SUCCESS\n"); else FPRINTF(stderr, "TEST FAIL\n"); return try ? EXIT_SUCCESS : EXIT_FAILURE; enodev: return STARPU_TEST_SKIPPED; enoent: FPRINTF(stderr, "Couldn't write data: ENOENT\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } static int merge_result(int old, int new) { if (new == EXIT_FAILURE) return EXIT_FAILURE; if (old == 0) return 0; return new; } int main(void) { int ret = 0; int ret2; char s[128]; char *ptr; #ifdef STARPU_HAVE_SETENV setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); #endif snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); ptr = _starpu_mkdtemp(s); if (!ptr) { FPRINTF(stderr, "Cannot make directory <%s>\n", s); return STARPU_TEST_SKIPPED; } ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); #ifdef STARPU_LINUX_SYS ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); #endif #ifdef STARPU_HAVE_HDF5 ret = merge_result(ret, dotest(&starpu_disk_hdf5_ops, s)); #endif ret2 = rmdir(s); if (ret2 < 0) STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); return ret; } #endif starpu-1.4.9+dfsg/tests/disk/disk_pack.c000066400000000000000000000166021507764646700202050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include "../helper.h" #if STARPU_MAXNODES == 1 /* Cannot register a disk */ int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else /* * Try to write into disk memory * Use mechanism to push data from main ram to disk ram * Here we force using the pack/unpack mechanism */ #define NX (16*1024) const struct starpu_data_copy_methods my_vector_copy_data_methods_s; struct starpu_data_interface_ops starpu_interface_my_vector_ops; void starpu_my_vector_data_register(starpu_data_handle_t *handleptr, unsigned home_node, uintptr_t ptr, uint32_t nx, size_t elemsize) { struct starpu_vector_interface vector = { .id = STARPU_VECTOR_INTERFACE_ID, .ptr = ptr, .nx = nx, .elemsize = elemsize, .dev_handle = ptr, .slice_base = 0, .offset = 0, .allocsize = nx * elemsize, }; starpu_data_register(handleptr, home_node, &vector, &starpu_interface_my_vector_ops); } int dotest(struct starpu_disk_ops *ops, char *base) { int *A, *C; /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ // Ignore environment variables as we want to force the exact number of workers struct starpu_conf conf; int ret = starpu_conf_init(&conf); if (ret == -EINVAL) return EXIT_FAILURE; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) goto enodev; if (starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } /* Initialize path and name */ const char *name_file_start = "STARPU_DISK_COMPUTE_DATA_"; const char *name_file_end = "STARPU_DISK_COMPUTE_DATA_RESULT_"; char * path_file_start = malloc(strlen(base) + 1 + strlen(name_file_start) + 1); strcpy(path_file_start, base); strcat(path_file_start, "/"); strcat(path_file_start, name_file_start); char * path_file_end = malloc(strlen(base) + 1 + strlen(name_file_end) + 1); strcpy(path_file_end, base); strcat(path_file_end, "/"); strcat(path_file_end, name_file_end); /* register a disk */ int new_dd = starpu_disk_register(ops, (void *) base, STARPU_DISK_SIZE_MIN); /* can't write on /tmp/ */ if (new_dd == -ENOENT) goto enoent; unsigned dd = (unsigned) new_dd; /* allocate two memory spaces */ starpu_malloc_flags((void **)&A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_malloc_flags((void **)&C, NX*sizeof(int), STARPU_MALLOC_COUNT); FPRINTF(stderr, "TEST DISK MEMORY \n"); unsigned int j; /* you register them in a vector */ for(j = 0; j < NX; ++j) { A[j] = j; C[j] = 0; } /* you create a file to store the vector ON the disk */ FILE * f = fopen(path_file_start, "wb+"); if (f == NULL) goto enoent2; /* store it in the file */ fwrite(A, sizeof(int), NX, f); /* close the file */ fclose(f); int descriptor = open(path_file_start, O_RDWR); if (descriptor < 0) goto enoent2; #ifdef STARPU_HAVE_WINDOWS _commit(descriptor); #else fsync(descriptor); #endif close(descriptor); /* create a file to store result */ f = fopen(path_file_end, "wb+"); if (f == NULL) goto enoent2; /* replace all data by 0 */ fwrite(C, sizeof(int), NX, f); /* close the file */ fclose(f); descriptor = open(path_file_end, O_RDWR); if (descriptor < 0) goto enoent2; #ifdef STARPU_HAVE_WINDOWS _commit(descriptor); #else fsync(descriptor); #endif close(descriptor); /* And now, you want to use your data in StarPU */ /* Open the file ON the disk */ void * data = starpu_disk_open(dd, (void *) name_file_start, NX*sizeof(int)); void * data_result = starpu_disk_open(dd, (void *) name_file_end, NX*sizeof(int)); starpu_data_handle_t vector_handleA, vector_handleC; /* Build an vector-like interface which doesn't have the any_to_any helper, to force making use of pack/unpack */ memcpy(&starpu_interface_my_vector_ops, &starpu_interface_vector_ops, sizeof(starpu_interface_my_vector_ops)); starpu_interface_my_vector_ops.copy_methods = &my_vector_copy_data_methods_s; /* register vector in starpu */ starpu_my_vector_data_register(&vector_handleA, dd, (uintptr_t) data, NX, sizeof(int)); /* and do what you want with it, here we copy it into an other vector */ starpu_my_vector_data_register(&vector_handleC, dd, (uintptr_t) data_result, NX, sizeof(int)); starpu_data_cpy(vector_handleC, vector_handleA, 0, NULL, NULL); /* free them */ starpu_data_unregister(vector_handleA); starpu_data_unregister(vector_handleC); /* close them in StarPU */ starpu_disk_close(dd, data, NX*sizeof(int)); starpu_disk_close(dd, data_result, NX*sizeof(int)); /* check results */ f = fopen(path_file_end, "rb+"); if (f == NULL) goto enoent2; /* take data */ size_t read = fread(C, sizeof(int), NX, f); STARPU_ASSERT(read == NX); /* close the file */ fclose(f); int try = 1; for (j = 0; j < NX; ++j) if (A[j] != C[j]) { FPRINTF(stderr, "Fail A %d != C %d \n", A[j], C[j]); try = 0; } starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); unlink(path_file_start); unlink(path_file_end); free(path_file_start); free(path_file_end); /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); if(try) FPRINTF(stderr, "TEST SUCCESS\n"); else FPRINTF(stderr, "TEST FAIL\n"); return try ? EXIT_SUCCESS : EXIT_FAILURE; enodev: return STARPU_TEST_SKIPPED; enoent2: starpu_free_flags(A, NX*sizeof(int), STARPU_MALLOC_COUNT); starpu_free_flags(C, NX*sizeof(int), STARPU_MALLOC_COUNT); enoent: unlink(path_file_start); unlink(path_file_end); free(path_file_start); free(path_file_end); FPRINTF(stderr, "Couldn't write data: ENOENT\n"); starpu_shutdown(); return STARPU_TEST_SKIPPED; } static int merge_result(int old, int new) { if (new == EXIT_FAILURE) return EXIT_FAILURE; if (old == 0) return 0; return new; } int main(void) { int ret = 0; int ret2; char s[128]; char *ptr; #ifdef STARPU_HAVE_SETENV setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); #endif snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); ptr = _starpu_mkdtemp(s); if (!ptr) { FPRINTF(stderr, "Cannot make directory <%s>\n", s); return STARPU_TEST_SKIPPED; } ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s)); ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s)); #ifdef STARPU_LINUX_SYS ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s)); #endif ret2 = rmdir(s); if (ret2 < 0) STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s); return ret; } #endif starpu-1.4.9+dfsg/tests/disk/mem_reclaim.c000066400000000000000000000214211507764646700205220ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Corentin Salingue * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include "../helper.h" /* * Try to write into disk memory * Use mechanism to push data from main ram to disk ram * Here we stress the memory with more tasks than what the RAM can fit. */ #ifdef STARPU_HAVE_MEMCHECK_H #include #else #define VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(addr, size) (void)0 #endif #ifdef STARPU_QUICK_CHECK # define NDATA 4 # define NITER 8 #elif !defined(STARPU_LONG_CHECK) # define NDATA 32 # define NITER 128 #else # define NDATA 128 # define NITER 512 #endif # define MEMSIZE 1 # define MEMSIZE_STR "1" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #elif STARPU_MAXNODES == 1 /* Cannot register a disk */ int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else static int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data); /* We need a ram-to-ram copy for NUMA machine, use any_to_any for that */ static int ram_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node) { return any_to_any(src_interface, src_node, dst_interface, dst_node, NULL); } const struct starpu_data_copy_methods my_vector_copy_data_methods_s = { .ram_to_ram = ram_to_ram }; struct starpu_data_interface_ops starpu_interface_my_vector_ops; void starpu_my_vector_data_register(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize) { struct starpu_vector_interface vector = { .id = STARPU_VECTOR_INTERFACE_ID, .ptr = ptr, .nx = nx, .elemsize = elemsize, .dev_handle = ptr, .slice_base = 0, .offset = 0, .allocsize = nx * elemsize, }; starpu_data_register(handleptr, home_node, &vector, &starpu_interface_my_vector_ops); } static unsigned values[NDATA]; static void zero(void *buffers[], void *args) { (void)args; struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; unsigned *val = (unsigned*) STARPU_VECTOR_GET_PTR(vector); *val = 0; VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(val, STARPU_VECTOR_GET_NX(vector) * STARPU_VECTOR_GET_ELEMSIZE(vector)); } static void inc(void *buffers[], void *args) { struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; unsigned *val = (unsigned*) STARPU_VECTOR_GET_PTR(vector); unsigned i; starpu_codelet_unpack_args(args, &i); (*val)++; STARPU_ATOMIC_ADD(&values[i], 1); } static void check(void *buffers[], void *args) { struct starpu_vector_interface *vector = (struct starpu_vector_interface *) buffers[0]; unsigned *val = (unsigned*) STARPU_VECTOR_GET_PTR(vector); unsigned i; starpu_codelet_unpack_args(args, &i); STARPU_ASSERT_MSG(*val == values[i], "Incorrect value. Value %u should be %u (index %u)", *val, values[i], i); } static struct starpu_codelet zero_cl = { .cpu_funcs = { zero }, .nbuffers = 1, .modes = { STARPU_W }, }; static struct starpu_codelet inc_cl = { .cpu_funcs = { inc }, .nbuffers = 1, .modes = { STARPU_RW }, }; static struct starpu_codelet check_cl = { .cpu_funcs = { check }, .nbuffers = 1, .modes = { STARPU_R }, }; int dotest(struct starpu_disk_ops *ops, char *base, void (*vector_data_register)(starpu_data_handle_t *handleptr, int home_node, uintptr_t ptr, uint32_t nx, size_t elemsize), const char *text) { starpu_data_handle_t handles[NDATA]; if (starpu_getenv_number_default("STARPU_DIDUSE_BARRIER", 0)) /* This would hang */ return STARPU_TEST_SKIPPED; FPRINTF(stderr, "Testing <%s>\n", text); /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ // Ignore environment variables as we want to force the exact number of workers struct starpu_conf conf; int ret = starpu_conf_init(&conf); if (ret == -EINVAL) return EXIT_FAILURE; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; /* Initialize path and name */ /* register swap disk */ int new_dd = starpu_disk_register(ops, (void *) base, STARPU_DISK_SIZE_MIN); /* can't write on /tmp/ */ if (new_dd == -ENOENT) goto enoent; unsigned int i, j; /* Initialize twice as much data as available memory */ for (i = 0; i < NDATA; i++) { vector_data_register(&handles[i], -1, 0, (MEMSIZE*1024*1024*2) / NDATA, sizeof(char)); ret = starpu_task_insert(&zero_cl, STARPU_W, handles[i], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } memset(values, 0, sizeof(values)); /* Work out of core */ for (i = 0; i < NITER; i++) { j = rand()%NDATA; ret = starpu_task_insert(&inc_cl, STARPU_RW, handles[j], STARPU_VALUE, &j, sizeof(j), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_task_wait_for_all(); /* forcibly evict some data, just for fun */ for (i = 0; i < NDATA; i++) { if ((rand() % 2) == 0) starpu_data_evict_from_node(handles[i], STARPU_MAIN_RAM); } /* And work out of core again */ for (i = 0; i < NITER; i++) { j = rand()%NDATA; ret = starpu_task_insert(&inc_cl, STARPU_RW, handles[j], STARPU_VALUE, &j, sizeof(j), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } /* Check and free data */ for (i = 0; i < NDATA; i++) { ret = starpu_task_insert(&check_cl, STARPU_R, handles[i], STARPU_VALUE, &i, sizeof(i), 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_data_unregister(handles[i]); } /* terminate StarPU, no task can be submitted after */ starpu_shutdown(); return EXIT_SUCCESS; enoent: FPRINTF(stderr, "Couldn't write data: ENOENT\n"); enodev: starpu_shutdown(); return STARPU_TEST_SKIPPED; } static int merge_result(int old, int new) { if (new == EXIT_FAILURE || new == STARPU_TEST_SKIPPED) return new; if (old == 0) return 0; return new; } int main(void) { int ret = 0; int ret2; char s[128]; char *ptr; #ifdef STARPU_HAVE_SETENV setenv("STARPU_CALIBRATE_MINIMUM", "1", 1); #endif snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER")); ptr = _starpu_mkdtemp(s); if (!ptr) { FPRINTF(stderr, "Cannot make directory '%s'\n", s); return STARPU_TEST_SKIPPED; } setenv("STARPU_LIMIT_CPU_MEM", MEMSIZE_STR, 1); /* Build an vector-like interface which doesn't have the any_to_any helper, to force making use of pack/unpack */ any_to_any = starpu_interface_vector_ops.copy_methods->any_to_any; memcpy(&starpu_interface_my_vector_ops, &starpu_interface_vector_ops, sizeof(starpu_interface_my_vector_ops)); starpu_interface_my_vector_ops.copy_methods = &my_vector_copy_data_methods_s; ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s, starpu_vector_data_register, "Stdio with read/write vector ops")); if (ret == STARPU_TEST_SKIPPED) goto skipped; ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s, starpu_my_vector_data_register, "Stdio with pack/unpack vector ops")); if (ret == STARPU_TEST_SKIPPED) goto skipped; ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s, starpu_vector_data_register, "unistd with read/write vector ops")); if (ret == STARPU_TEST_SKIPPED) goto skipped; ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s, starpu_my_vector_data_register, "unistd with pack/unpack vector ops")); if (ret == STARPU_TEST_SKIPPED) goto skipped; #ifdef STARPU_LINUX_SYS ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s, starpu_vector_data_register, "unistd_direct with read/write vector ops")); if (ret == STARPU_TEST_SKIPPED) goto skipped; ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s, starpu_my_vector_data_register, "unistd_direct with pack/unpack vector ops")); if (ret == STARPU_TEST_SKIPPED) goto skipped; #endif skipped: ret2 = rmdir(s); STARPU_CHECK_RETURN_VALUE(ret2, "rmdir '%s'\n", s); return ret; } #endif starpu-1.4.9+dfsg/tests/energy/000077500000000000000000000000001507764646700164435ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/energy/dynamic.sh000077500000000000000000000033501507764646700204270ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # To have 24 cores export STARPU_HOSTNAME=sirocco # To avoid slowing down simulation export MALLOC_PERTURB_=0 # You can play with these export N=40 export NITER=30 GAMMAS="1000000 100000 76000 10000 0" for gamma in $GAMMA do (for freq_slow in $(seq 1200 200 3500) ; do STARPU_SCHED_GAMMA=$gamma STARPU_FREQ_SLOW=$freq_slow \ ./energy_efficiency $N $NITER | grep "^$(($N * 512)) " & done) | sort -n -k 2 > dynamic.$gamma.dat done cat > dynamic.gp << EOF set output "dynamic.eps" set term postscript eps enhanced color font ",20" set key bottom right set xlabel "performance (GFlop/s)" set ylabel "energy (J)" plot \\ EOF for gamma in $GAMMAS; do cat >> dynamic.gp << EOF "dynamic.$gamma.dat" using 5:7:6:8 with xyerrorlines lw 2 title "$gamma", \\ EOF done cat >> dynamic.gp << EOF set output "dynamic-time.eps" set xlabel "time (ms)" set ylabel "energy (J)" plot \\ EOF for gamma in $GAMMAS; do cat >> dynamic.gp << EOF "dynamic.$gamma.dat" using 3:7:4:8 with xyerrorlines lw 2 title "$gamma", \\ EOF done gnuplot dynamic.gp gv dynamic.eps & gv dynamic-time.eps & starpu-1.4.9+dfsg/tests/energy/energy_efficiency.c000066400000000000000000000427521507764646700222760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Bérangère Subervie * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * This tries to run kernels with different efficiency depending on the core * frequency. * * This is based on the Cholesky factorization, which is made to exhibit three * caricatural cases as follows: * * - gemm: always get faster with higher frequency * - trsm: gets faster with higher frequency, but efficiency gets lower and * lower * - potrf: reaches a maximum performance, after which there is no point in * running it at higher frequency. * * We here assume that the power use is the same for the different kernels * (which wouldn't be true for real kernels, measurements would be needed, to * feed the performance models). */ /* These are the different frequency and power parameters, as measured and * provided to this program */ static float freq_min, freq_fast; static float power_min, power_fast; /* * This returns the dynamic power used by a CPU core in W at a given frequency * in MHz * This assumes C.V^2.F with V being proportional to F, thus C.F^3 * * freq_min = 1200 * freq_fast = 3500 * power_min = 2 * power_fast = 8.2 * * freq_min3 = freq_min * freq_min * freq_min * freq_fast3 = freq_fast * freq_fast * freq_fast * alpha = (power_fast - power_min) / (freq_fast3 - freq_min3) * power(frequency) = power_min + alpha * (frequency*frequency*frequency - freq_min3) * plot [frequency=freq_min:freq_fast] power(frequency) lw 2 * */ static float power(float frequency) { double freq_min3 = freq_min * freq_min * freq_min; double freq_fast3 = freq_fast * freq_fast * freq_fast; double alpha = (power_fast - power_min) / (freq_fast3 - freq_min3); return power_min + alpha * (frequency*frequency*frequency - freq_min3); } /* * This returns the frequency of the given worker and implementation in MHz. * This is where we can tune either a given number of cores at a low frequency, * or which implementation uses which frequency. */ /* These are the chosen parameters: how many cores get slowed down, at which * frequency */ static int ncpu_slow = -1; static float freq_slow; static float frequency(int worker, unsigned i) { if (ncpu_slow == -1) { /* Version that allows the runtime to switch speed between * tasks, by exposing two implementations with different time * and energy */ if (i == 0) /* Slow implementation */ return freq_slow; else /* Fast implementation */ return freq_fast; } else { /* Version that assumes that ncpu_slow workers are running at * slow speed */ if (worker < ncpu_slow) return freq_slow; else return freq_fast; } } /* This is from magma -- Innovative Computing Laboratory -- Electrical Engineering and Computer Science Department -- University of Tennessee -- (C) Copyright 2009 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Tennessee, Knoxville nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) #define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n)) * (double)(__n) - (1. / 6.))) #define FLOPS_SPOTRF(__n) (FMULS_POTRF((__n)) + FADDS_POTRF((__n))) #define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) #define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) #define FMULS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FMULS_TRMM_2((__m), (__n)) :*/ FMULS_TRMM_2((__n), (__m))) #define FADDS_TRMM(__m, __n) (/*((__side) == PlasmaLeft) ? FADDS_TRMM_2((__m), (__n)) :*/ FADDS_TRMM_2((__n), (__m))) #define FMULS_TRSM FMULS_TRMM #define FADDS_TRSM FMULS_TRMM #define FLOPS_STRSM(__m, __n) (FMULS_TRSM((__m), (__n)) + FADDS_TRSM((__m), (__n))) #define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) #define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) #define FLOPS_SSYRK(__k, __n) (FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n))) #define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) #define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) #define FLOPS_SGEMM(__m, __n, __k) (FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k))) /* Tags for spotting tasks in the trace */ #define TAG_POTRF(k) ((starpu_tag_t)((1ULL<<60) | (unsigned long long)(k))) #define TAG_TRSM(k,j) ((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32) \ | (unsigned long long)(j)))) #define TAG_GEMM(k,i,j) ((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) \ | ((unsigned long long)(i)<<16) \ | (unsigned long long)(j)))) /* Arbitrary tile size */ #define TILE_SIZE 512 /* * Kernel time performance models, would normally be provided by measurements */ /* We assume that GEMM/SYRK scale perfectly with frequency */ #define GEMM_GFLOPS 50. /* At full speed */ #define GEMM_FLOPS(N) FLOPS_SGEMM(N, N, N) #define GEMM_TIME(N) (GEMM_FLOPS(TILE_SIZE) / (GEMM_GFLOPS * 1000000000.)) static double _gemm_time(float frequency) { double ret; /* Fix according to real frequency, linear */ ret = GEMM_TIME(N) / (frequency / freq_fast); return ret * 1000000.; } static double gemm_time(struct starpu_task *t, unsigned workerid, unsigned i) { (void)t; return _gemm_time(frequency(workerid, i)); } #define SYRK_GFLOPS 50. /* At full speed */ #define SYRK_FLOPS(N) FLOPS_SSYRK(N, N) #define SYRK_TIME(N) (SYRK_FLOPS(TILE_SIZE) / (SYRK_GFLOPS * 1000000000.)) static double _syrk_time(float frequency) { double ret; /* Fix according to real frequency, linear */ ret = SYRK_TIME(N) / (frequency / freq_fast); return ret * 1000000.; } static double syrk_time(struct starpu_task *t, unsigned workerid, unsigned i) { (void)t; return _syrk_time(frequency(workerid, i)); } /* We assume that TRSM decays a bit with frequency */ #define TRSM_DECAY 0.5 #define TRSM_FLOPS(N) FLOPS_STRSM(N, N) static double _trsm_time(float frequency) { double ret = GEMM_TIME(N)*0.7; /* as typically observed */ /* Fix according to real frequency, root */ ret = ret / (pow(frequency - freq_min/2, TRSM_DECAY) / pow(freq_fast - freq_min/2, TRSM_DECAY)); return ret * 1000000.; } static double trsm_time(struct starpu_task *t, unsigned workerid, unsigned i) { (void)t; return _trsm_time(frequency(workerid, i)); } /* We assume that POTRF decays strongly with frequency */ #define POTRF_DECAY 0.5 #define POTRF_FLOPS(N) FLOPS_SPOTRF(N) static double _potrf_time(float frequency) { double ret = GEMM_TIME(N)*1.2; /* as typically observed */ /* Fix according to real frequency, asymptote */ ret = ret / (1. - POTRF_DECAY * ((freq_min/(frequency-freq_min/2)) - (freq_min/(freq_fast-freq_min/2)))); return ret * 1000000.; } static double potrf_time(struct starpu_task *t, unsigned workerid, unsigned i) { (void)t; return _potrf_time(frequency(workerid, i)); } /* stub for kernel, shouldn't be getting called in simgrid mode */ void dummy_func(void *descr[], void *_args) { (void)descr; (void)_args; fprintf(stderr, "?? shouldn't be called\n"); } /* Define the codelets */ #define CODELET(kernel, nb, ...) \ static double kernel##_energy(struct starpu_task *t, unsigned workerid, unsigned i) \ { \ double time = kernel##_time(t, workerid, i); \ return power(frequency(workerid, i)) * time / 1000000.; \ } \ \ static struct starpu_perfmodel kernel##_perf_model = \ { \ .symbol = #kernel, \ .type = STARPU_PER_WORKER, \ .worker_cost_function = kernel##_time, \ }; \ \ static struct starpu_perfmodel kernel##_energy_model = \ { \ .symbol = #kernel "_energy", \ .type = STARPU_PER_WORKER, \ .worker_cost_function = kernel##_energy, \ }; \ \ static struct starpu_codelet kernel##_cl = \ { \ .cpu_funcs = { dummy_func }, \ .nbuffers = nb, \ .modes = {__VA_ARGS__}, \ .model = &kernel##_perf_model, \ .energy_model = &kernel##_energy_model, \ }; CODELET(potrf, 1, STARPU_RW) CODELET(trsm, 2, STARPU_R, STARPU_RW) CODELET(syrk, 2, STARPU_R, STARPU_RW) CODELET(gemm, 3, STARPU_R, STARPU_R, STARPU_RW) int main(int argc, char *argv[]) { /* Initialize environment variables */ if (!getenv("STARPU_IDLE_POWER")) setenv("STARPU_IDLE_POWER", "30", 1); const char *hostname = getenv("STARPU_HOSTNAME"); if (!hostname || strcmp(hostname, "sirocco")) { printf("Warning: This is expected to be run with export STARPU_HOSTNAME=sirocco\n"); } freq_min = starpu_getenv_number_default("STARPU_FREQ_MIN", 1200); freq_slow = starpu_getenv_number_default("STARPU_FREQ_SLOW", 1200); freq_fast = starpu_getenv_number_default("STARPU_FREQ_FAST", 3500); power_min = starpu_getenv_float_default("STARPU_POWER_MIN", 2); power_fast = starpu_getenv_float_default("STARPU_POWER_FAST", 8.2); /* Number of slow CPU cores */ ncpu_slow = starpu_getenv_number_default("STARPU_NCPU_SLOW", -1); if (ncpu_slow == -1) { /* Enable second implementation. */ potrf_cl.cpu_funcs[1] = dummy_func; trsm_cl.cpu_funcs[1] = dummy_func; gemm_cl.cpu_funcs[1] = dummy_func; syrk_cl.cpu_funcs[1] = dummy_func; } /* Initialize StarPU */ struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; if (!getenv("STARPU_SCHED")) conf.sched_policy_name = "dmdas"; int ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned N, k, m, n, iter, NITER; if (argc < 2) #ifdef STARPU_QUICK_CHECK N = 10; #else N = 40; #endif else N = atoi(argv[1]); if (argc < 3) #ifdef STARPU_QUICK_CHECK NITER = 3; #else NITER = 10; #endif else NITER = atoi(argv[2]); if (N == 0) { starpu_shutdown(); return 0; } /* Give parameter summary to user */ printf("freqs (MHz):\n"); printf("%f %f %f\n", freq_min, freq_slow, freq_fast); printf("\n"); printf("per-core power (W):\n"); printf("%f %f\n", power_min, power_fast); printf("%f %f %f\n", power(freq_min), power(freq_slow), power(freq_fast)); printf("\n"); printf("kernel perfs in GFlops (min, slow, fast):\n"); printf("gemm:\t%f %f %f\n", GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_min) / 1000, GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_slow) / 1000, GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_fast) / 1000); printf("syrk:\t%f %f %f\n", SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_min) / 1000, SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_slow) / 1000, SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_fast) / 1000); printf("trsm:\t%f %f %f\n", TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_min) / 1000, TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_slow) / 1000, TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_fast) / 1000); printf("potrf:\t%f %f %f\n", POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_min) / 1000, POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_slow) / 1000, POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_fast) / 1000); printf("\n"); printf("kernel efficiency in GFlops/W (min, slow, fast):\n"); printf("gemm:\t%f %f %f\n", GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_min) / 1000 / power(freq_min), GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_slow) / 1000 / power(freq_slow), GEMM_FLOPS(TILE_SIZE) / _gemm_time(freq_fast) / 1000 / power(freq_fast)); printf("syrk:\t%f %f %f\n", SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_min) / 1000 / power(freq_min), SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_slow) / 1000 / power(freq_slow), SYRK_FLOPS(TILE_SIZE) / _syrk_time(freq_fast) / 1000 / power(freq_fast)); printf("trsm:\t%f %f %f\n", TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_min) / 1000 / power(freq_min), TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_slow) / 1000 / power(freq_slow), TRSM_FLOPS(TILE_SIZE) / _trsm_time(freq_fast) / 1000 / power(freq_fast)); printf("potrf:\t%f %f %f\n", POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_min) / 1000 / power(freq_min), POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_slow) / 1000 / power(freq_slow), POTRF_FLOPS(TILE_SIZE) / _potrf_time(freq_fast) / 1000 / power(freq_fast)); printf("\n"); /* Now compute */ starpu_data_handle_t A[N][N]; for (m = 0; m < N; m++) for (n = 0; n < N; n++) starpu_void_data_register(&A[m][n]); unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; double timing_sum = 0.; double energy_sum = 0.; double timing_sum2 = 0.; double energy_sum2 = 0.; for (iter = 0; iter < NITER; iter++) { double start = starpu_timing_now(); double start_energy = starpu_energy_used(); for (k = 0; k < N; k++) { starpu_iteration_push(k); ret = starpu_task_insert(&potrf_cl, STARPU_PRIORITY, unbound_prio ? (int)(2*N - 2*k) : STARPU_MAX_PRIO, STARPU_RW, A[k][k], STARPU_FLOPS, (double) FLOPS_SPOTRF(TILE_SIZE), STARPU_TAG_ONLY, TAG_POTRF(k), 0); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); for (m = k+1; m= 0) toprint_slow = ncpu_slow; else toprint_slow = freq_slow; printf("# size\t%s\tms +-\tGFlop/s +-\ten. (J) +-\tGF/W\n", ncpu_slow >= 0 ? "nslow" : "fslow"); printf("%u\t%u\t%.0f %.1f\t%.1f %.1f\t%.1f %.1f\t%.2f\n", TILE_SIZE * N, toprint_slow, timing_avg/1000, timing_dev/1000, (flop/timing_avg/1000.0f), (flop/(timing_avg*timing_avg)/1000.f)*timing_dev, energy_avg, energy_dev, flop/1000000000./energy_avg); for (m = 0; m < N; m++) for (n = 0; n < N; n++) starpu_data_unregister(A[m][n]); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/tests/energy/perfs.gp000066400000000000000000000050421507764646700201130ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set term postscript eps enhanced color font ",20" set key top left set xlabel "frequency (MHz)" freq_min = 1200 freq_fast = 3500 power_min = 2 power_fast = 8.2 TRSM_DECAY = 0.5 POTRF_DECAY = 0.5 # Plot the power according to frequency (cubic curve) freq_min3 = freq_min * freq_min * freq_min freq_fast3 = freq_fast * freq_fast * freq_fast alpha = (power_fast - power_min) / (freq_fast3 - freq_min3) power(frequency) = power_min + alpha * (frequency*frequency*frequency - freq_min3) set output "power.eps" set ylabel "power (W)" plot [frequency=freq_min:freq_fast] [y=0:] power(frequency) lw 2 notitle # Plot the kernel performance according to frequency set output "perfs.eps" set ylabel "performance (GFlop/s)" gemm_max_perf = 50 trsm_max_perf = 35.784040 potrf_max_perf = 6.964803 gemm_factor(frequency) = frequency / freq_fast trsm_factor(frequency) = (frequency - freq_min/2) ** TRSM_DECAY / (freq_fast - freq_min/2) ** TRSM_DECAY potrf_factor(frequency) = 1 - POTRF_DECAY * ((freq_min/(frequency-freq_min/2)) - (freq_min/(freq_fast-freq_min/2))) plot [frequency=freq_min:freq_fast] \ gemm_max_perf * gemm_factor(frequency) lw 2 title "gemm", \ trsm_max_perf * trsm_factor(frequency) lw 2 title "trsm", \ potrf_max_perf * potrf_factor(frequency) lw 2 title "potrf" # Plot the kernel efficiency according to frequency set output "efficiency.eps" set key top right set ylabel "efficiency (GFlop/W)" gemm_max_efficiency = 6.097561 trsm_max_efficiency = 4.363907 potrf_max_efficiency = 0.849366 power_factor(frequency) = power(frequency) / power(freq_fast) plot [frequency=freq_min:freq_fast] \ gemm_max_efficiency * gemm_factor(frequency) / power_factor(frequency) lw 2 title "gemm", \ trsm_max_efficiency * trsm_factor(frequency) / power_factor(frequency) lw 2 title "trsm", \ potrf_max_efficiency * potrf_factor(frequency) / power_factor(frequency) lw 2 title "potrf" starpu-1.4.9+dfsg/tests/energy/static.sh000077500000000000000000000034311507764646700202720ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # To have 24 cores export STARPU_HOSTNAME=sirocco # To avoid slowing down simulation export MALLOC_PERTURB_=0 # You can play with these export STARPU_FREQ_SLOW=1200 export STARPU_POWER_SLOW=2 export STARPU_POWER_FAST=8.2 export N=40 export NITER=30 GAMMAS="1000000 100000 10000 0" for gamma in $GAMMAS; do (for ncpu_slow in $(seq 0 24) ; do STARPU_SCHED_GAMMA=$gamma STARPU_NCPU_SLOW=$ncpu_slow \ ./energy_efficiency $N $NITER | grep "^$(($N * 512)) " & done) | sort -n -k 2 > static.$gamma.dat done cat > static.gp << EOF set output "static.eps" set term postscript eps enhanced color font ",20" set key top center set xlabel "performance (GFlop/s)" set ylabel "energy (J)" plot \\ EOF for gamma in $GAMMAS; do cat >> static.gp << EOF "static.$gamma.dat" using 5:7:6:8 with xyerrorlines title "$gamma", \\ EOF done cat >> static.gp << EOF set output "static-time.eps" set xlabel "time (ms)" set ylabel "energy (J)" plot \\ EOF for gamma in $GAMMAS; do cat >> static.gp << EOF "static.$gamma.dat" using 3:7:4:8 with xyerrorlines title "$gamma", \\ EOF done gnuplot static.gp gv static.eps & gv static-time.eps & starpu-1.4.9+dfsg/tests/errorcheck/000077500000000000000000000000001507764646700173015ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/errorcheck/invalid_blocking_calls.c000066400000000000000000000065711507764646700241320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Check that we catch calling tag_wait, i.e. a blocking call, from the * codelet function, which is invalid. This test is thus expected to fail. */ /* mpirun may not exit if it fails, skip the test for master-slave */ #if defined(STARPU_NO_ASSERT) int main(void) { return STARPU_TEST_SKIPPED; } #else #define TAG 0x42 static starpu_data_handle_t handle; static unsigned *data; void wrong_func(void *descr[], void *arg) { (void)descr; (void)arg; /* The function is expected to fail. This is indicated in tests/Makefile.am */ /* try to fetch data in the RAM while we are in a codelet, such a * blocking call is forbidden */ starpu_data_acquire(handle, STARPU_RW); starpu_tag_wait(TAG); } static struct starpu_codelet wrong_codelet = { .modes = { STARPU_RW }, .cpu_funcs = {wrong_func}, .cuda_funcs = {wrong_func}, .opencl_funcs = {wrong_func}, .model = NULL, .nbuffers = 0 }; static void wrong_callback(void *arg) { (void)arg; /* The function is expected to fail. This is indicated in tests/Makefile.am */ starpu_data_acquire(handle, STARPU_RW); starpu_tag_wait(TAG); } int main(int argc, char **argv) { int ret; if (RUNNING_ON_VALGRIND) return STARPU_TEST_SKIPPED; disable_coredump(); ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void**)&data, sizeof(*data)); *data = 42; /* register a piece of data */ starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)data, 1, sizeof(unsigned)); struct starpu_task *task = starpu_task_create(); task->cl = &wrong_codelet; task->handles[0] = handle; task->use_tag = 1; task->tag_id = TAG; task->callback_func = wrong_callback; task->detach = 0; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_tag_wait(TAG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); /* This call is valid as it is done by the application outside a * callback */ ret = starpu_data_acquire(handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); starpu_data_release(handle); ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_data_unregister(handle); starpu_free_noflag(data, sizeof(*data)); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } #endif starpu-1.4.9+dfsg/tests/errorcheck/invalid_tasks.c000066400000000000000000000043151507764646700223030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Check that we detect that with only a CPU we can't submit a GPU-only task */ #if !defined(STARPU_USE_CPU) #warning no cpu are available. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else void dummy_func(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet gpu_only_cl = { .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .model = NULL, .nbuffers = 0 }; int main(void) { int ret; /* We force StarPU to use 1 CPU only */ struct starpu_conf conf; starpu_conf_init(&conf); conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = 1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task *task = starpu_task_create(); task->cl = &gpu_only_cl; /* Only a GPU device could execute that task ! */ ret = starpu_task_submit(task); STARPU_ASSERT(ret == -ENODEV); task->destroy = 0; starpu_task_destroy(task); struct starpu_task *task_specific = starpu_task_create(); task_specific->cl = &gpu_only_cl; task_specific->execute_on_a_specific_worker = 1; task_specific->workerid = starpu_worker_get_by_type(STARPU_CPU_WORKER, 0); /* Only a CUDA device could execute that task ! */ ret = starpu_task_submit(task_specific); STARPU_ASSERT(ret == -ENODEV); task_specific->destroy = 0; starpu_task_destroy(task_specific); starpu_shutdown(); return EXIT_SUCCESS; } #endif starpu-1.4.9+dfsg/tests/errorcheck/starpu_init_noworker.c000066400000000000000000000033341507764646700237370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test that starpu_initialize returns ENODEV when no worker is available */ int main(int argc, char **argv) { int ret; /* We try to initialize StarPU without any worker */ struct starpu_conf conf; starpu_conf_init(&conf); conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); /* starpu_init should return -ENODEV */ ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return EXIT_SUCCESS; else { unsigned ncpu = starpu_cpu_worker_get_count(); unsigned ncuda = starpu_cuda_worker_get_count(); unsigned nopencl = starpu_opencl_worker_get_count(); unsigned nmpi_ms = starpu_mpi_ms_worker_get_count(); FPRINTF(stderr, "StarPU has found :\n"); FPRINTF(stderr, "\t%u CPU cores\n", ncpu); FPRINTF(stderr, "\t%u CUDA devices\n", ncuda); FPRINTF(stderr, "\t%u OpenCL devices\n", nopencl); FPRINTF(stderr, "\t%u MPI Master-Slaves devices\n", nmpi_ms); return EXIT_FAILURE; } } starpu-1.4.9+dfsg/tests/errorcheck/workers_cpuid.c000066400000000000000000000110541507764646700223260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Try various values for STARPU_WORKERS_CPUID, checking that the * expected binding does happen */ #if !defined(STARPU_USE_CPU) || !defined(STARPU_HAVE_HWLOC) || !defined(STARPU_HAVE_SETENV) #warning no cpu are available. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else #include #ifdef STARPU_QUICK_CHECK #define CPUSTEP 8 #define NB_TESTS 1 #else #define CPUSTEP 1 #define NB_TESTS 5 #endif int nhwpus; long workers_cpuid[STARPU_NMAXWORKERS]; int workers_id[STARPU_NMAXWORKERS]; static int check_workers_mapping(long *cpuid, int *workerids, int nb_workers) { int i; for (i=0; i STARPU_NMAXWORKERS) nhwpus = STARPU_NMAXWORKERS; for (i=0; i #include "../helper.h" /* This task fakes some repeated errors */ static int retry; void cpu_increment(void *descr[], void *arg) { (void)arg; unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned *var2 = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); FPRINTF(stderr,"computing\n"); *var2 = *var + 1; if (retry < 10) { FPRINTF(stderr,"failing\n"); retry++; /* Fake failure */ starpu_task_ft_failed(starpu_task_get_current()); } else FPRINTF(stderr,"succeed\n"); } static struct starpu_codelet my_codelet = { .cpu_funcs = {cpu_increment}, //.cpu_funcs_name = {"cpu_increment"}, .modes = { STARPU_R, STARPU_W }, .nbuffers = 2 }; /* This implements the retry strategy * (Identical to the default implementation: just retry) */ static void check_ft(void *arg) { struct starpu_task *meta_task = arg; struct starpu_task *current_task = starpu_task_get_current(); struct starpu_task *new_task; int ret; if (!current_task->failed) { FPRINTF(stderr,"didn't fail, release main task\n"); starpu_task_ft_success(meta_task); return; } FPRINTF(stderr,"failed, try again\n"); new_task = starpu_task_ft_create_retry(meta_task, current_task, check_ft); /* Here we could e.g. force the task to use only a CPU implementation * known to be failsafe */ ret = starpu_task_submit_nodeps(new_task); STARPU_ASSERT(!ret); } int main(void) { int x = 12; int y = 1; starpu_data_handle_t h_x, h_y; int ret, ret1; if (starpu_getenv_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) /* TODO _submit_job_take_data_deps */ return STARPU_TEST_SKIPPED; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&h_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); starpu_variable_data_register(&h_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y)); retry = 0; ret1 = starpu_task_insert(&my_codelet, STARPU_PROLOGUE_CALLBACK, starpu_task_ft_prologue, STARPU_PROLOGUE_CALLBACK_ARG_NFREE, check_ft, STARPU_R, h_x, STARPU_W, h_y, 0); if (ret1 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret1, "starpu_task_insert"); starpu_task_wait_for_all(); starpu_data_unregister(h_x); starpu_data_unregister(h_y); starpu_shutdown(); if (x != 12) ret = 1; FPRINTF(stderr, "Value x = %d (expected 12)\n", x); if (ret1 != -ENODEV) { if (y != 13) ret = 1; FPRINTF(stderr, "Value y = %d (expected 13)\n", y); } STARPU_RETURN(ret); } starpu-1.4.9+dfsg/tests/fortran90/000077500000000000000000000000001507764646700167765ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/fortran90/init_01.f90000066400000000000000000000016021507764646700205600ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! PROGRAM init_01 USE starpu_mod USE iso_c_binding IMPLICIT NONE INTEGER(KIND=C_INT) :: res res = starpu_init(C_NULL_PTR) IF (res /= 0) THEN STOP 77 END IF CALL starpu_shutdown() END PROGRAM init_01 starpu-1.4.9+dfsg/tests/fortran90/starpu_mod.f90000066400000000000000000000077151507764646700215050ustar00rootroot00000000000000! StarPU --- Runtime system for heterogeneous multicore architectures. ! ! Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria ! ! StarPU is free software; you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation; either version 2.1 of the License, or (at ! your option) any later version. ! ! StarPU is distributed in the hope that it will be useful, but ! WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! ! See the GNU Lesser General Public License in COPYING.LGPL for more details. ! MODULE starpu_mod ! == starpu.h == ! starpu_conf_init INTERFACE SUBROUTINE starpu_conf_init(conf) BIND(C) USE iso_c_binding TYPE(C_PTR), VALUE :: conf END SUBROUTINE starpu_conf_init END INTERFACE ! starpu_init INTERFACE FUNCTION starpu_init(conf) BIND(C) USE iso_c_binding TYPE(C_PTR), VALUE :: conf INTEGER(KIND=C_INT) :: starpu_init END FUNCTION starpu_init END INTERFACE ! starpu_initialize ! starpu_pause INTERFACE SUBROUTINE starpu_pause() BIND(C) USE iso_c_binding END SUBROUTINE starpu_pause END INTERFACE ! starpu_resume INTERFACE SUBROUTINE starpu_resume() BIND(C) USE iso_c_binding END SUBROUTINE starpu_resume END INTERFACE ! starpu_shutdown INTERFACE SUBROUTINE starpu_shutdown() BIND(C) USE iso_c_binding END SUBROUTINE starpu_shutdown END INTERFACE ! starpu_topology_print ! starpu_asynchronous_copy_disabled INTERFACE SUBROUTINE starpu_asynchronous_copy_disabled() BIND(C) USE iso_c_binding END SUBROUTINE starpu_asynchronous_copy_disabled END INTERFACE ! starpu_asynchronous_cuda_copy_disabled INTERFACE SUBROUTINE starpu_asynchronous_cuda_copy_disabled() BIND(C) USE iso_c_binding END SUBROUTINE starpu_asynchronous_cuda_copy_disabled END INTERFACE ! starpu_asynchronous_opencl_copy_disabled INTERFACE SUBROUTINE starpu_asynchronous_opencl_copy_disabled() BIND(C) USE iso_c_binding END SUBROUTINE starpu_asynchronous_opencl_copy_disabled END INTERFACE ! starpu_display_stats INTERFACE SUBROUTINE starpu_display_stats() BIND(C) USE iso_c_binding END SUBROUTINE starpu_display_stats END INTERFACE ! starpu_get_version INTERFACE SUBROUTINE starpu_get_version(major,minor,release) BIND(C) USE iso_c_binding INTEGER(KIND=C_INT), INTENT(OUT) :: major,minor,release END SUBROUTINE starpu_get_version END INTERFACE ! starpu_cpu_worker_get_count INTERFACE FUNCTION starpu_cpu_worker_get_count() BIND(C) USE iso_c_binding INTEGER(KIND=C_INT) :: starpu_cpu_worker_get_count END FUNCTION starpu_cpu_worker_get_count END INTERFACE ! == starpu_task.h == ! starpu_tag_declare_deps ! starpu_tag_declare_deps_array ! starpu_task_declare_deps_array ! starpu_tag_wait ! starpu_tag_wait_array ! starpu_tag_notify_from_apps ! starpu_tag_restart ! starpu_tag_remove ! starpu_task_init ! starpu_task_clean ! starpu_task_create ! starpu_task_destroy ! starpu_task_set_destroy ! starpu_task_submit ! starpu_task_submit_to_ctx ! starpu_task_finished ! starpu_task_wait ! starpu_task_wait_for_all INTERFACE SUBROUTINE starpu_task_wait_for_all() BIND(C) USE iso_c_binding END SUBROUTINE starpu_task_wait_for_all END INTERFACE ! starpu_task_wait_for_n_submitted ! starpu_task_wait_for_all_in_ctx ! starpu_task_wait_for_n_submitted_in_ctx ! starpu_task_wait_for_no_ready ! starpu_task_nready ! starpu_task_nsubmitted ! starpu_codelet_init ! starpu_codelet_display_stats ! starpu_task_get_current ! starpu_parallel_task_barrier_init ! starpu_parallel_task_barrier_init_n ! starpu_task_dup ! starpu_task_set_implementation ! starpu_task_get_implementation END MODULE starpu_mod starpu-1.4.9+dfsg/tests/helper.h000066400000000000000000000105041507764646700166020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #ifndef _TESTS_HELPER_H #define _TESTS_HELPER_H #include #include #include #ifdef HAVE_GETRLIMIT #include #endif #ifdef STARPU_HAVE_VALGRIND_H #include #endif #ifdef STARPU_HAVE_HELGRIND_H #include #endif #define STARPU_TEST_SKIPPED 77 //void *ALL_IS_OK = (void *)123456789L; //void *ALL_IS_NOT_OK = (void *)987654321L; // //#define STARPU_CHECK_MALLOC(ptr) {if (!ptr) { fprintf(stderr, "starpu_malloc failed\n"); return 1; }} //#define STARPU_CHECK_MALLOC_HAS_FAILED(ptr) {if (ptr) { fprintf(stderr, "starpu_malloc should have failed\n"); return 1; }} //#define STARPU_CHECK_MALLOC_THREAD(ptr) {if (!ptr) { fprintf(stderr, "starpu_malloc failed\n"); return ALL_IS_NOT_OK; }} //#define STARPU_CHECK_MALLOC_HAS_FAILED_THREAD(ptr) {if (ptr) { fprintf(stderr, "starpu_malloc should have failed\n"); return ALL_IS_NOT_OK; }} //#define STARPU_CHECK_RETURN_VALUE_THREAD(err, message) {if (err < 0) { perror(message); return ALL_IS_NOT_OK; }} //#define STARPU_CHECK_RETURN_VALUE_IS_THREAD(err, value, message) {if (err >= 0 || errno != value) { perror(message); return ALL_IS_NOT_OK; }} //#define STARPU_TEST_OUTPUT #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); fflush(ofile); }} while(0) #if defined(STARPU_HAVE_VALGRIND_H) && !defined(STARPU_VALGRIND_FULL) static int _starpu_valgrind_print_once STARPU_ATTRIBUTE_UNUSED = 0; # define STARPU_SKIP_IF_VALGRIND \ do \ { \ if(STARPU_RUNNING_ON_VALGRIND) \ { \ STARPU_HG_DISABLE_CHECKING(_starpu_valgrind_print_once); \ if (!_starpu_valgrind_print_once) \ { \ FPRINTF(stderr, "Running on valgrind, skipping the actual computations\n"); \ _starpu_valgrind_print_once = 1; \ } \ return; \ } \ } while(0) # define STARPU_SKIP_IF_VALGRIND_RETURN_ZERO \ do \ { \ if(STARPU_RUNNING_ON_VALGRIND) \ { \ STARPU_HG_DISABLE_CHECKING(_starpu_valgrind_print_once); \ if (!_starpu_valgrind_print_once) \ { \ FPRINTF(stderr, "Running on valgrind, skipping the actual computations\n"); \ _starpu_valgrind_print_once = 1; \ } \ return 0; \ } \ } while(0) # define STARPU_SKIP_IF_VALGRIND_RETURN_SKIP \ do \ { \ if(STARPU_RUNNING_ON_VALGRIND) \ { \ STARPU_HG_DISABLE_CHECKING(_starpu_valgrind_print_once); \ if (!_starpu_valgrind_print_once) \ { \ FPRINTF(stderr, "Running on valgrind, skipping the actual computations\n"); \ _starpu_valgrind_print_once = 1; \ } \ return STARPU_TEST_SKIPPED; \ } \ } while(0) # define STARPU_RETURN(ret) \ do \ { \ if(STARPU_RUNNING_ON_VALGRIND) \ { \ FPRINTF(stderr, "Running on valgrind, ignoring return value\n"); \ return 0; \ } \ else return ret; \ } while(0) #else /* defined(STARPU_HAVE_VALGRIND_H) && !defined(STARPU_VALGRIND_FULL) */ # define STARPU_RETURN(ret) return ret # define STARPU_SKIP_IF_VALGRIND # define STARPU_SKIP_IF_VALGRIND_RETURN_ZERO # define STARPU_SKIP_IF_VALGRIND_RETURN_SKIP #endif /* defined(STARPU_HAVE_VALGRIND_H) && !defined(STARPU_VALGRIND_FULL) */ #ifndef ANNOTATE_HAPPENS_BEFORE #define ANNOTATE_HAPPENS_BEFORE(obj) ((void)0) #endif #ifndef ANNOTATE_HAPPENS_BEFORE_FORGET_ALL #define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(obj) ((void)0) #endif #ifndef ANNOTATE_HAPPENS_AFTER #define ANNOTATE_HAPPENS_AFTER(obj) ((void)0) #endif static inline void disable_coredump(void) { #ifdef HAVE_GETRLIMIT struct rlimit rlim = { 0, 0 }; setrlimit(RLIMIT_CORE, &rlim); #endif } #endif /* _TESTS_HELPER_H */ starpu-1.4.9+dfsg/tests/helper/000077500000000000000000000000001507764646700164315ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/helper/cublasLt_init.c000066400000000000000000000037251507764646700214000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include "../helper.h" /* * Test initializing cublasLt, and how much time that takes */ static double start; static double end; //static float *data = NULL; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned ngpus = starpu_cuda_worker_get_count(); double init_timing; double shutdown_timing; start = starpu_timing_now(); starpu_cublasLt_init(); end = starpu_timing_now(); init_timing = end - start; start = starpu_timing_now(); starpu_cublasLt_shutdown(); end = starpu_timing_now(); shutdown_timing = end - start; FPRINTF(stderr, "Total:\n"); FPRINTF(stderr, "\tinit: %2.2f ms\n", init_timing/(1000)); FPRINTF(stderr, "\tshutdown: %2.2f ms\n", shutdown_timing/(1000)); if (ngpus != 0) { FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus); FPRINTF(stderr, "\tinit: %2.2f ms\n", init_timing/(1000*ngpus)); FPRINTF(stderr, "\tshutdown: %2.2f ms\n", shutdown_timing/(1000*ngpus)); } starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/helper/cublas_init.c000066400000000000000000000035341507764646700210760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test initializing cublas, and how much time that takes */ static double start; static double end; //static float *data = NULL; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned ngpus = starpu_cuda_worker_get_count(); double init_timing; double shutdown_timing; start = starpu_timing_now(); starpu_cublas_init(); end = starpu_timing_now(); init_timing = end - start; start = starpu_timing_now(); starpu_cublas_shutdown(); end = starpu_timing_now(); shutdown_timing = end - start; FPRINTF(stderr, "Total:\n"); FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000)); FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000)); if (ngpus != 0) { FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus); FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000*ngpus)); FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000*ngpus)); } starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/helper/cusparse_init.c000066400000000000000000000035421507764646700214510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test initializing cusparse, and how much time that takes */ static double start; static double end; //static float *data = NULL; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned ngpus = starpu_cuda_worker_get_count(); double init_timing; double shutdown_timing; start = starpu_timing_now(); starpu_cusparse_init(); end = starpu_timing_now(); init_timing = end - start; start = starpu_timing_now(); starpu_cusparse_shutdown(); end = starpu_timing_now(); shutdown_timing = end - start; FPRINTF(stderr, "Total:\n"); FPRINTF(stderr, "\tinit: %2.2f ms\n", init_timing/(1000)); FPRINTF(stderr, "\tshutdown: %2.2f ms\n", shutdown_timing/(1000)); if (ngpus != 0) { FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus); FPRINTF(stderr, "\tinit: %2.2f ms\n", init_timing/(1000*ngpus)); FPRINTF(stderr, "\tshutdown: %2.2f ms\n", shutdown_timing/(1000*ngpus)); } starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/helper/execute_on_all.c000066400000000000000000000026541507764646700215720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Test executing a function on all workers */ void func(void *arg) { int *ptr = (int *) arg; STARPU_ASSERT(*ptr == 0x42); } int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int arg = 0x42; starpu_execute_on_each_worker(func, &arg, STARPU_CPU|STARPU_CUDA|STARPU_OPENCL); starpu_execute_on_each_worker(func, &arg, STARPU_CPU); starpu_execute_on_each_worker(func, &arg, STARPU_CUDA); starpu_execute_on_each_worker(func, &arg, STARPU_OPENCL); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/helper/hipblas_init.c000066400000000000000000000035731507764646700212520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include "../helper.h" /* * Test initializing hipblas, and how much time that takes */ static double start; static double end; //static float *data = NULL; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned ngpus = starpu_cuda_worker_get_count(); double init_timing; double shutdown_timing; start = starpu_timing_now(); starpu_hipblas_init(); end = starpu_timing_now(); init_timing = end - start; start = starpu_timing_now(); starpu_hipblas_shutdown(); end = starpu_timing_now(); shutdown_timing = end - start; FPRINTF(stderr, "Total:\n"); FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000)); FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000)); if (ngpus != 0) { FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus); FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000*ngpus)); FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000*ngpus)); } starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/helper/pinned_memory.c000066400000000000000000000024631507764646700214470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * Test calling starpu_malloc, i.e. allocating pinned memory */ #define NITER 10 #define SIZE (4*1024*1024*sizeof(float)) static float *data = NULL; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned iter; for (iter = 0; iter < NITER; iter++) { ret = starpu_malloc((void **)&data, SIZE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_free_noflag(data, SIZE); } starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/helper/starpu_create_sync_task.c000066400000000000000000000042121507764646700235130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * Test starpu_create_sync_task */ #define NITER 10 static int create_dummy_task(starpu_tag_t tag) { struct starpu_task *task = starpu_task_create(); task->use_tag = 1; task->tag_id = tag; task->cl = &starpu_codelet_nop; int ret = starpu_task_submit(task); return ret; } int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_tag_t sync_tags[NITER]; unsigned iter; for (iter = 0; iter < NITER; iter++) { starpu_tag_t sync_tag = (starpu_tag_t)iter*100; sync_tags[iter] = sync_tag; unsigned ndeps = 10; starpu_tag_t deps[ndeps]; unsigned d; for (d = 0; d < ndeps; d++) { deps[d] = sync_tag + d + 1; ret = create_dummy_task(deps[d]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_create_sync_task(sync_tag, ndeps, deps, NULL, NULL); } /* Wait all the synchronization tasks */ ret = starpu_tag_wait_array(NITER, sync_tags); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait_array"); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/helper/starpu_data_cpy.c000066400000000000000000000035051507764646700217620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test starpu_data_cpy */ int main(int argc, char **argv) { int ret; int var1, var2; starpu_data_handle_t var1_handle, var2_handle; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() + starpu_opencl_worker_get_count() == 0) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } var1 = 42; var2 = 12; starpu_variable_data_register(&var1_handle, STARPU_MAIN_RAM, (uintptr_t)&var1, sizeof(var1)); starpu_variable_data_register(&var2_handle, STARPU_MAIN_RAM, (uintptr_t)&var2, sizeof(var2)); ret = starpu_data_cpy(var2_handle, var1_handle, 0, NULL, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_cpy"); starpu_data_acquire(var2_handle, STARPU_R); ret = EXIT_SUCCESS; if (var2 != var1) { FPRINTF(stderr, "var2 is %d but it should be %d\n", var2, var1); ret = EXIT_FAILURE; } starpu_data_release(var2_handle); starpu_data_unregister(var1_handle); starpu_data_unregister(var2_handle); starpu_shutdown(); STARPU_RETURN(ret); } starpu-1.4.9+dfsg/tests/helper/starpu_data_dup_ro.c000066400000000000000000000100641507764646700224550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../variable/increment.h" #include "../helper.h" /* * Test starpu_data_dup_ro */ int main(int argc, char **argv) { int ret; unsigned var1, *var; starpu_data_handle_t var1_handle, var2_handle, var3_handle, var4_handle, var5_handle; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() + starpu_cuda_worker_get_count() + starpu_opencl_worker_get_count() == 0) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } increment_load_opencl(); var1 = 42; starpu_variable_data_register(&var1_handle, STARPU_MAIN_RAM, (uintptr_t)&var1, sizeof(var1)); /* Make a duplicate of the original data */ ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); /* Free it */ starpu_data_unregister(var2_handle); /* Make another duplicate of the original data */ ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); /* Free it through submit */ starpu_data_unregister_submit(var2_handle); /* Make another duplicate of the original data */ ret = starpu_data_dup_ro(&var2_handle, var1_handle, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); /* Make a second duplicate of the original data */ ret = starpu_data_dup_ro(&var3_handle, var1_handle, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); STARPU_ASSERT(var3_handle == var2_handle); /* Make a duplicate of a duplicate */ ret = starpu_data_dup_ro(&var4_handle, var2_handle, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); STARPU_ASSERT(var4_handle == var2_handle); ret = starpu_task_insert(&increment_cl, STARPU_RW, var1_handle, 0); if (ret == -ENODEV) { starpu_data_unregister(var1_handle); starpu_data_unregister(var2_handle); starpu_data_unregister(var3_handle); starpu_data_unregister(var4_handle); starpu_shutdown(); return STARPU_TEST_SKIPPED; } /* Make a duplicate of the new value */ ret = starpu_data_dup_ro(&var5_handle, var1_handle, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_dup_ro"); starpu_data_acquire(var2_handle, STARPU_R); var = starpu_data_get_local_ptr(var2_handle); ret = EXIT_SUCCESS; if (*var != 42) { FPRINTF(stderr, "var2 is %u but it should be %d\n", *var, 42); ret = EXIT_FAILURE; } starpu_data_release(var2_handle); starpu_data_acquire(var3_handle, STARPU_R); var = starpu_data_get_local_ptr(var3_handle); if (*var != 42) { FPRINTF(stderr, "var3 is %u but it should be %d\n", *var, 42); ret = EXIT_FAILURE; } starpu_data_release(var3_handle); starpu_data_acquire(var4_handle, STARPU_R); var = starpu_data_get_local_ptr(var4_handle); if (*var != 42) { FPRINTF(stderr, "var4 is %u but it should be %d\n", *var, 42); ret = EXIT_FAILURE; } starpu_data_release(var4_handle); starpu_data_acquire(var5_handle, STARPU_R); var = starpu_data_get_local_ptr(var5_handle); if (*var != 43) { FPRINTF(stderr, "var5 is %u but it should be %d\n", *var, 43); ret = EXIT_FAILURE; } starpu_data_release(var5_handle); starpu_data_unregister(var1_handle); starpu_data_unregister(var2_handle); starpu_data_unregister(var3_handle); starpu_data_unregister(var4_handle); starpu_data_unregister(var5_handle); increment_unload_opencl(); starpu_shutdown(); STARPU_RETURN(ret); } starpu-1.4.9+dfsg/tests/loader.c000066400000000000000000000274611507764646700165760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/tests/main/000077500000000000000000000000001507764646700160765ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/main/bind.c000066400000000000000000000054431507764646700171640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test binding the main thread to its dedicated core, making one less CPU core * available to StarPU. */ int main(void) { int ret; struct starpu_conf conf; int ncpus; unsigned active_bindid; unsigned passive_bindid1; unsigned passive_bindid2; /* First get the number of cores */ starpu_conf_init(&conf); conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ncpus = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); starpu_shutdown(); /* Check we have enough of them */ if (ncpus <= 2) return STARPU_TEST_SKIPPED; /* Now re-initialize with two cores less */ starpu_conf_init(&conf); conf.reserve_ncpus = 2; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Make sure StarPU uses two core less, (or we had hit maxcpus anyway...) */ STARPU_ASSERT_MSG(ncpus == STARPU_MAXCPUS || starpu_worker_get_count_by_type(STARPU_CPU_WORKER) == ncpus-2, "Expected %d CPUs, got %d\n", ncpus-2, starpu_worker_get_count_by_type(STARPU_CPU_WORKER)); FPRINTF(stderr, "CPUS: %d as expected\n", starpu_worker_get_count_by_type(STARPU_CPU_WORKER)); /* Check we can grab a whole core */ active_bindid = starpu_get_next_bindid(STARPU_THREAD_ACTIVE, NULL, 0); starpu_bind_thread_on(active_bindid, STARPU_THREAD_ACTIVE, "main"); /* Check we can request for an additional shared core */ passive_bindid1 = starpu_get_next_bindid(0, NULL, 0); passive_bindid2 = starpu_get_next_bindid(0, NULL, 0); STARPU_ASSERT(passive_bindid1 != active_bindid); STARPU_ASSERT(passive_bindid1 == passive_bindid2); starpu_bind_thread_on(passive_bindid1, 0, "main"); starpu_bind_thread_on(passive_bindid2, 0, "main"); /* Try to bind on a worker */ starpu_bind_thread_on_worker(0); /* Try to bind back to the reserved core */ starpu_bind_thread_on_cpu(active_bindid); /* Try to bind back to the main core, if any */ starpu_bind_thread_on_main(); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/callback.c000066400000000000000000000045511507764646700200030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" void codelet_callback_func(void *arg) { if (arg) { int *x = (int *)arg; FPRINTF(stderr, "calling callback codelet arg %d\n", *x); } else FPRINTF(stderr, "calling callback codelet arg %p\n", arg); } void task_callback_func(void *arg) { FPRINTF(stderr, "\ncalling callback task arg %p\n", arg); if (starpu_task_get_current()->cl->callback_func) starpu_task_get_current()->cl->callback_func(arg); } struct starpu_codelet mycodelet = { .where = STARPU_NOWHERE, .callback_func = codelet_callback_func }; struct starpu_codelet mycodelet2 = { .where = STARPU_NOWHERE, }; int main(void) { int ret; int value=12; int value2=24; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_task_insert(&mycodelet, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&mycodelet, STARPU_CALLBACK_ARG_NFREE, &value, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&mycodelet, STARPU_CALLBACK, &task_callback_func, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&mycodelet, STARPU_CALLBACK, &task_callback_func, STARPU_CALLBACK_ARG_NFREE, &value2, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&mycodelet2, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&mycodelet2, STARPU_CALLBACK, &task_callback_func, STARPU_CALLBACK_ARG_NFREE, &value, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/tests/main/codelet_null_callback.c000066400000000000000000000053771507764646700225430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test passing a NULL codelet, but callbacks */ static void callback(void *ptr) { int *x = (int *)ptr; FPRINTF(stderr, "x=%d\n", *x); STARPU_ASSERT_MSG(*x == 40, "%d != %d\n", *x, 40); (*x)++; } static void callback2(void *ptr) { int *x2 = (int *)ptr; FPRINTF(stderr, "x2=%d\n", *x2); STARPU_ASSERT_MSG(*x2 == 41, "%d != %d\n", *x2, 41); (*x2)++; } static void prologue_callback(void *ptr) { int *y = (int *)ptr; FPRINTF(stderr, "y=%d\n", *y); STARPU_ASSERT_MSG(*y == 12, "%d != %d\n", *y, 12); (*y)++; } static void prologue_callback_pop(void *ptr) { int *z = (int *)ptr; FPRINTF(stderr, "z=%d\n", *z); STARPU_ASSERT_MSG(*z == 32, "%d != %d\n", *z, 32); (*z)++; } int main(int argc, char **argv) { int ret; int x=40; int x2=41; int y=12; int z=32; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_task_insert(NULL, STARPU_CALLBACK_WITH_ARG_NFREE, callback, &x, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(NULL, STARPU_CALLBACK, callback2, STARPU_CALLBACK_ARG_NFREE, &x2, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(NULL, STARPU_PROLOGUE_CALLBACK, prologue_callback, STARPU_PROLOGUE_CALLBACK_ARG_NFREE, &y, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(NULL, STARPU_PROLOGUE_CALLBACK_POP, prologue_callback_pop, STARPU_PROLOGUE_CALLBACK_POP_ARG_NFREE, &z, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); STARPU_ASSERT_MSG(x == 41, "x should be equal to %d and not %d\n", 41, x); STARPU_ASSERT_MSG(x2 == 42, "x2 should be equal to %d and not %d\n", 42, x2); STARPU_ASSERT_MSG(y == 13, "y should be equal to %d and not %d\n", 13, y); STARPU_ASSERT_MSG(z == 33, "z should be equal to %d and not %d\n", 33, z); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/const_codelet.c000066400000000000000000000065021507764646700210720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else /* * Test task submission */ static int i = 0, j; void dummy_func(void *descr[], void *arg) { (void)descr; (void)arg; int old_i = STARPU_ATOMIC_ADD(&i, 1); FPRINTF(stdout, "called third task, i = %d\n", old_i+1); } static const struct starpu_codelet dummy_codelet = { .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL, .cpu_funcs = {dummy_func}, .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .model = NULL, .nbuffers = 0, .checked = 1 }; static void callback(void *arg) { (void)arg; struct starpu_task *task = starpu_task_create(); task->cl = (struct starpu_codelet *) &dummy_codelet; task->detach = 1; if (starpu_task_submit(task) == -ENODEV) exit(STARPU_TEST_SKIPPED); FPRINTF(stdout, "submitted third task, i = %d\n", i); } static const struct starpu_codelet callback_submit_codelet = { .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL, .cpu_funcs = {dummy_func}, .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .model = NULL, .nbuffers = 0, .checked = 1 }; static void task_submit_func(void *descr[], void *arg) { (void)descr; (void)arg; struct starpu_task *task = starpu_task_create(); task->cl = (struct starpu_codelet *) &callback_submit_codelet; task->callback_func = callback; task->detach = 1; if (starpu_task_submit(task) == -ENODEV) exit(STARPU_TEST_SKIPPED); int old_i = STARPU_ATOMIC_ADD(&i, 1); FPRINTF(stdout, "submitted second task, i = %d\n", old_i + 1); } static struct starpu_codelet task_submit_codelet = { .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL, .cpu_funcs = {task_submit_func}, .cuda_funcs = {task_submit_func}, .opencl_funcs = {task_submit_func}, .model = NULL, .nbuffers = 0 }; int main(void) { int ret; setenv("STARPU_CODELET_PROFILING", "0", 1); ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task *task = starpu_task_create(); task->cl = &task_submit_codelet; task->detach = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); j = i; starpu_shutdown(); return j == 3 ? EXIT_SUCCESS : EXIT_FAILURE; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } #endif starpu-1.4.9+dfsg/tests/main/deadlock.c000066400000000000000000000036431507764646700200160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Create a cycle of tasks with NULL codelet, using manual dependencies. * This is meant to try debugging tools with such a deadlock case. */ #define N 4 int main(int argc, char **argv) { int i, ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task **tasks = (struct starpu_task **) malloc(N*sizeof(struct starpu_task *)); for (i = 0; i < N; i++) { tasks[i] = starpu_task_create(); tasks[i]->cl = NULL; } for (i = 0; i < N; i++) { if (i > 0) starpu_task_declare_deps_array(tasks[i], 1, &tasks[i-1]); else starpu_task_declare_deps_array(tasks[i], 1, &tasks[N-1]); } for (i = 0; i < N; i++) { ret = starpu_task_submit(tasks[i]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); starpu_shutdown(); free(tasks); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/declare_deps_after_submission.c000066400000000000000000000046221507764646700243140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test that we can declare a dependency after submitting a non-auto-destroy task */ #ifdef STARPU_QUICK_CHECK #define NLOOPS 4 #else #define NLOOPS 128 #endif static struct starpu_task *create_dummy_task(void) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; return task; } int main(int argc, char **argv) { int ret; unsigned loop, nloops = NLOOPS; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); for (loop = 0; loop < nloops; loop++) { struct starpu_task *taskA, *taskB; taskA = create_dummy_task(); taskB = create_dummy_task(); /* By default, dynamically allocated tasks are destroyed at * termination, we cannot declare a dependency on something * that does not exist anymore. */ taskA->destroy = 0; taskA->detach = 0; /* we wait for the tasks explicitly */ taskB->detach = 0; ret = starpu_task_submit(taskA); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_declare_deps_array(taskB, 1, &taskA); ret = starpu_task_submit(taskB); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_destroy(taskA); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/declare_deps_after_submission_synchronous.c000066400000000000000000000046521507764646700267710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test that we can declare a dependency after submitting a non-auto-destroy synchronous task */ #ifdef STARPU_QUICK_CHECK #define NLOOPS 4 #else #define NLOOPS 128 #endif static struct starpu_task *create_dummy_task(void) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; return task; } int main(int argc, char **argv) { int ret; unsigned loop, nloops=NLOOPS; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task *taskA, *taskB; for (loop = 0; loop < nloops; loop++) { taskA = create_dummy_task(); taskB = create_dummy_task(); /* By default, dynamically allocated tasks are destroyed at * termination, we cannot declare a dependency on something * that does not exist anymore. */ taskA->destroy = 0; taskA->synchronous = 1; ret = starpu_task_submit(taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_declare_deps_array(taskB, 1, &taskA); taskB->synchronous = 1; ret = starpu_task_submit(taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_destroy(taskA); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/declare_deps_in_callback.c000066400000000000000000000044511507764646700231620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test that we can declare deps from the callback of the task */ #ifdef STARPU_QUICK_CHECK #define NLOOPS 4 #else #define NLOOPS 128 #endif static void callback(void *arg) { struct starpu_task *taskA, *taskB; int ret; taskA = starpu_task_get_current(); taskB = (struct starpu_task *) arg; starpu_task_declare_deps_array(taskB, 1, &taskA); ret = starpu_task_submit(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } static struct starpu_task *create_dummy_task(void) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; return task; } int main(int argc, char **argv) { int ret; unsigned loop; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task *taskA, *taskB; for (loop = 0; loop < NLOOPS; loop++) { taskA = create_dummy_task(); taskB = create_dummy_task(); taskA->callback_func = callback; taskA->callback_arg = taskB; ret = starpu_task_submit(taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/deploop.c000066400000000000000000000045151507764646700177110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Create task A and B such that * - B depends on A by tag dependency. * - A would depend on B by data dependency, but we disable that by disabling * sequential consistency. */ void dummy_func(void *descr[], void *arg) { (void)descr; (void)arg; FPRINTF(stderr,"executing task %p\n", starpu_task_get_current()); } static struct starpu_codelet dummy_codelet = { .cpu_funcs = {dummy_func}, .cpu_funcs_name = {"dummy_func"}, .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .model = NULL, .nbuffers = 1, .modes = { STARPU_RW } }; int main(void) { int ret; starpu_data_handle_t handle; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_void_data_register(&handle); struct starpu_task *taskA, *taskB; /* Make B depend on A */ starpu_tag_declare_deps(1, 1, (starpu_tag_t) 0); taskA = starpu_task_create(); taskA->cl = &dummy_codelet; taskA->tag_id = 0; taskA->use_tag = 1; taskA->handles[0] = handle; taskA->sequential_consistency = 0; FPRINTF(stderr,"A is %p\n", taskA); taskB = starpu_task_create(); taskB->cl = &dummy_codelet; taskB->tag_id = 1; taskB->use_tag = 1; taskB->handles[0] = handle; FPRINTF(stderr,"B is %p\n", taskB); ret = starpu_task_submit(taskB); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; ret = starpu_task_submit(taskA); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(handle); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/deprecated_func.c000066400000000000000000000072151507764646700213620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test that we support the cpu_func and where deprecated field */ void cpu_codelet(void *descr[], void *_args) { (void)_args; int *valin = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int *valout = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); *valout = *valin; } void cpu2_codelet(void *descr[], void *_args) { (void)_args; int *valin = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); int *valout = (int *)STARPU_VARIABLE_GET_PTR(descr[1]); *valout = *valin*2; } struct starpu_codelet cl_cpu_funcs = { .where = STARPU_CPU, .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 2, .name = "cpu_funcs", }; struct starpu_codelet cl_cpu_func = { .where = STARPU_CPU, .cpu_func = cpu_codelet, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 2, .name = "cpu_func", }; struct starpu_codelet cl_cpu_multiple = { .where = STARPU_CPU, .cpu_func = STARPU_MULTIPLE_CPU_IMPLEMENTATIONS, .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 2, .name = "cpu_multiple", }; struct starpu_codelet cl_cpu_func_funcs = { .where = STARPU_CPU, .cpu_func = cpu2_codelet, .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 2, .name = "cpu_func_funcs", }; static int submit_codelet(struct starpu_codelet cl, int where) { int x=42, y=14; starpu_data_handle_t handles[2]; int ret; starpu_variable_data_register(&handles[0], STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); starpu_variable_data_register(&handles[1], STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y)); cl.where = where; ret = starpu_task_insert(&cl, STARPU_R, handles[0], STARPU_W, handles[1], 0); if (ret == -ENODEV) { FPRINTF(stderr, "cannot execute codelet <%s> with where=%d\n", cl.name, where); starpu_data_unregister(handles[0]); starpu_data_unregister(handles[1]); return ret; } starpu_task_wait_for_all(); starpu_data_unregister(handles[0]); starpu_data_unregister(handles[1]); if (x != y) { FPRINTF(stderr, "error when executing codelet <%s> with where=%d\n", cl.name, where); } else { FPRINTF(stderr, "success when executing codelet <%s> with where=%d\n", cl.name, where); } return x != y; } int main(void) { int ret; unsigned where; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); for(where=0 ; where<=STARPU_CPU ; where+=STARPU_CPU) { ret = submit_codelet(cl_cpu_func, where); if (ret == -ENODEV) { starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); return STARPU_TEST_SKIPPED; } if (!ret) { ret = submit_codelet(cl_cpu_funcs, where); } if (!ret) { ret = submit_codelet(cl_cpu_multiple, where); } if (!ret) { ret = submit_codelet(cl_cpu_func_funcs, where); } } starpu_shutdown(); STARPU_RETURN(ret); } starpu-1.4.9+dfsg/tests/main/display_binding.c000066400000000000000000000021721507764646700214030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" #if !defined(STARPU_HAVE_SETENV) #warning setenv is not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else int main(void) { setenv("STARPU_DISPLAY_BINDINGS", "1", 1); int ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_shutdown(); return EXIT_SUCCESS; } #endif starpu-1.4.9+dfsg/tests/main/driver_api/000077500000000000000000000000001507764646700202225ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/main/driver_api/init_run_deinit.c000066400000000000000000000143111507764646700235510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../../helper.h" #define NTASKS 8 #if defined(STARPU_USE_CPU) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_HIP) void dummy(void *buffers[], void *args) { (void) buffers; (*(int *)args)++; } static struct starpu_codelet cl = { .cpu_funcs = { dummy }, .cuda_funcs = { dummy }, .opencl_funcs = { dummy }, .hip_funcs = { dummy }, .nbuffers = 0 }; static void init_driver(struct starpu_driver *d) { int ret; ret = starpu_driver_init(d); STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_init"); } static void run(struct starpu_task *task, struct starpu_driver *d) { int ret; ret = starpu_task_submit(task); starpu_do_schedule(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); while (!starpu_task_finished(task)) { ret = starpu_driver_run_once(d); STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_run_once"); } ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); } static void deinit_driver(struct starpu_driver *d) { int ret; ret = starpu_driver_deinit(d); STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_deinit"); } typedef unsigned (*worker_get_count)(void); static int test_driver(struct starpu_conf *conf, struct starpu_driver *d, const char *name_driver, worker_get_count worker_get_count_func, int32_t where_driver) { int var = 0, ret, nworker; ret = starpu_init(conf); if (ret == -ENODEV) { FPRINTF(stderr, "WARNING: No %s worker found\n", name_driver); return STARPU_TEST_SKIPPED; } nworker = worker_get_count_func(); if (nworker == 0) { FPRINTF(stderr, "WARNING: No %s worker found\n", name_driver); starpu_shutdown(); return STARPU_TEST_SKIPPED; } init_driver(d); int i; for (i = 0; i < NTASKS; i++) { struct starpu_task *task; task = starpu_task_create(); cl.where = where_driver; task->cl = &cl; task->cl_arg = &var; task->detach = 0; run(task, d); } deinit_driver(d); starpu_task_wait_for_all(); starpu_shutdown(); FPRINTF(stderr, "[%s] Var is %d (expected value: %d)\n", name_driver, var, NTASKS); return !!(var != NTASKS); } #endif /* STARPU_USE_CPU || STARPU_USE_CUDA || STARPU_USE_OPENCL || STARPU_USE_HIP*/ #ifdef STARPU_USE_CPU static int test_cpu(void) { int ret; struct starpu_conf conf; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return 1; struct starpu_driver d = { .type = STARPU_CPU_WORKER, .id.cpu_id = 0 }; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = 1; conf.not_launched_drivers = &d; conf.n_not_launched_drivers = 1; return test_driver(&conf, &d, "CPU", starpu_cpu_worker_get_count, STARPU_CPU); } #endif /* STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA static int test_cuda(void) { int ret; struct starpu_conf conf; int cudaid = 0; char *cudaid_str = getenv("STARPU_WORKERS_CUDAID"); if (cudaid_str) cudaid = atoi(cudaid_str); ret = starpu_conf_init(&conf); if (ret == -EINVAL) return 1; struct starpu_driver d = { .type = STARPU_CUDA_WORKER, .id.cuda_id = cudaid }; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncuda = 1; conf.not_launched_drivers = &d; conf.n_not_launched_drivers = 1; return test_driver(&conf, &d, "CUDA", starpu_cuda_worker_get_count, STARPU_CUDA); } #endif /* STARPU_USE_CUDA */ #ifdef STARPU_USE_HIP static int test_hip(void) { int ret; struct starpu_conf conf; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return 1; struct starpu_driver d = { .type = STARPU_HIP_WORKER, .id.hip_id = 0 }; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.nhip = 1; conf.not_launched_drivers = &d; conf.n_not_launched_drivers = 1; return test_driver(&conf, &d, "HIP", starpu_hip_worker_get_count, STARPU_HIP); } #endif /* STARPU_USE_HIP */ #ifdef STARPU_USE_OPENCL static int test_opencl(void) { cl_int err; cl_platform_id platform; cl_uint pdummy; int nopencl; err = clGetPlatformIDs(1, &platform, &pdummy); if (err != CL_SUCCESS) { FPRINTF(stderr, "WARNING: No OpenCL platform found\n"); return STARPU_TEST_SKIPPED; } cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR; if (starpu_getenv_number("STARPU_OPENCL_ON_CPUS") > 0) device_type |= CL_DEVICE_TYPE_CPU; if (starpu_getenv_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0) device_type = CL_DEVICE_TYPE_CPU; cl_device_id device_id; err = clGetDeviceIDs(platform, device_type, 1, &device_id, NULL); if (err != CL_SUCCESS) { FPRINTF(stderr, "WARNING: No GPU devices found on OpenCL platform\n"); return STARPU_TEST_SKIPPED; } int var = 0, ret; struct starpu_conf conf; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return 1; struct starpu_driver d = { .type = STARPU_OPENCL_WORKER, .id.opencl_id = device_id }; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.nopencl = 1; conf.not_launched_drivers = &d; conf.n_not_launched_drivers = 1; return test_driver(&conf, &d, "OpenCL", starpu_opencl_worker_get_count, STARPU_OPENCL); } #endif /* STARPU_USE_OPENCL */ int main(void) { int ret = STARPU_TEST_SKIPPED; #ifdef STARPU_USE_CPU ret = test_cpu(); if (ret == 1) return ret; #endif #if defined(STARPU_USE_CUDA) && !(defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1)) ret = test_cuda(); if (ret == 1) return ret; #endif #ifdef STARPU_USE_OPENCL ret = test_opencl(); if (ret == 1) return ret; #endif #ifdef STARPU_USE_HIP ret = test_hip(); if (ret == 1) return ret; #endif return ret; } starpu-1.4.9+dfsg/tests/main/driver_api/run_driver.c000066400000000000000000000150101507764646700225420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../../helper.h" /* * Users can directly control drivers by using the starpu_driver* functions. * * This test makes sure that the starpu_driver_run function works for CPU, CUDA * and OpenCL drivers, and that the starpu_drivers_request_termination function * correctly shuts down all drivers. * * The test_* functions can return: * - 0 (success) * - 1 (failure) * - STARPU_TEST_SKIPPED (non-critical errors) */ #if defined(STARPU_USE_CPU) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_HIP) static void dummy(void *buffers[], void *args) { (void) buffers; (*(int *)args)++; starpu_usleep(100000); } static struct starpu_codelet cl = { .cpu_funcs = { dummy }, .cuda_funcs = { dummy }, .opencl_funcs = { dummy }, .hip_funcs = { dummy }, .nbuffers = 0 }; static void *run_driver(void *arg) { struct starpu_driver *d = (struct starpu_driver *) arg; int ret = starpu_driver_run(d); STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_run"); return NULL; } typedef unsigned (*worker_get_count)(void); static int test_driver(struct starpu_conf *conf, struct starpu_driver *d, const char *name_driver, worker_get_count worker_get_count_func, int32_t where_driver) { int ret, var = 0; static starpu_pthread_t driver_thread; ret = starpu_init(conf); if (ret == -ENODEV || worker_get_count_func() == 0) { FPRINTF(stderr, "WARNING: No %s worker found\n", name_driver); if (ret == 0) starpu_shutdown(); return STARPU_TEST_SKIPPED; } ret = starpu_pthread_create(&driver_thread, NULL, run_driver, d); if (ret != 0) { ret = 1; goto out2; } struct starpu_task *task; task = starpu_task_create(); cl.where = where_driver; task->cl = &cl; task->cl_arg = &var; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) { FPRINTF(stderr, "WARNING: No worker can execute this task\n"); ret = STARPU_TEST_SKIPPED; goto out; } FPRINTF(stderr, "[%s] Var = %d (expected value: 1)\n", name_driver, var); ret = !!(var != 1); out: starpu_drivers_request_termination(); if (starpu_pthread_join(driver_thread, NULL) != 0) return 1; out2: starpu_shutdown(); return ret; } #endif /* STARPU_USE_CPU || STARPU_USE_CUDA || STARPU_USE_OPENCL || STARPU_USE_HIP */ #ifdef STARPU_USE_CPU static int test_cpu(void) { int ret; struct starpu_conf conf; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return 1; struct starpu_driver d = { .type = STARPU_CPU_WORKER, .id.cpu_id = 0 }; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = 1; conf.not_launched_drivers = &d; conf.n_not_launched_drivers = 1; return test_driver(&conf, &d, "CPU", starpu_cpu_worker_get_count, STARPU_CPU); } #endif /* STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA static int test_cuda(void) { int ret; struct starpu_conf conf; int cudaid = 0; char *cudaid_str = getenv("STARPU_WORKERS_CUDAID"); if (cudaid_str) cudaid = atoi(cudaid_str); /* FIXME: starpu_driver would need another field to specify which stream we're driving */ if (starpu_getenv_number_default("STARPU_NWORKER_PER_CUDA", 1) != 1 && starpu_getenv_number_default("STARPU_CUDA_THREAD_PER_WORKER", -1) > 0) return STARPU_TEST_SKIPPED; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return 1; struct starpu_driver d = { .type = STARPU_CUDA_WORKER, .id.cuda_id = cudaid }; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncuda = 1; conf.not_launched_drivers = &d; conf.n_not_launched_drivers = 1; return test_driver(&conf, &d, "CUDA", starpu_cuda_worker_get_count, STARPU_CUDA); } #endif /* STARPU_USE_CUDA */ #ifdef STARPU_USE_HIP static int test_hip(void) { int ret; struct starpu_conf conf; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return 1; struct starpu_driver d = { .type = STARPU_HIP_WORKER, .id.hip_id = 0 }; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.nhip = 1; conf.not_launched_drivers = &d; conf.n_not_launched_drivers = 1; return test_driver(&conf, &d, "HIP", starpu_hip_worker_get_count, STARPU_HIP); } #endif /* STARPU_USE_HIP */ #ifdef STARPU_USE_OPENCL static int test_opencl(void) { int ret; cl_int err; cl_uint pdummy; cl_platform_id platform; err = clGetPlatformIDs(1, &platform, &pdummy); if (err != CL_SUCCESS) { FPRINTF(stderr, "WARNING: No OpenCL platform found\n"); return STARPU_TEST_SKIPPED; } cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR; if (starpu_getenv_number("STARPU_OPENCL_ON_CPUS") > 0) device_type |= CL_DEVICE_TYPE_CPU; if (starpu_getenv_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0) device_type = CL_DEVICE_TYPE_CPU; cl_device_id device_id; err = clGetDeviceIDs(platform, device_type, 1, &device_id, NULL); if (err != CL_SUCCESS) { FPRINTF(stderr, "WARNING: No GPU devices found on OpenCL platform\n"); return STARPU_TEST_SKIPPED; } struct starpu_conf conf; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return 1; struct starpu_driver d = { .type = STARPU_OPENCL_WORKER, .id.opencl_id = device_id }; conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.nopencl = 1; conf.not_launched_drivers = &d; conf.n_not_launched_drivers = 1; return test_driver(&conf, &d, "OpenCL", starpu_opencl_worker_get_count, STARPU_OPENCL); } #endif /* STARPU_USE_OPENCL */ int main(void) { int ret = STARPU_TEST_SKIPPED; #ifdef STARPU_USE_CPU ret = test_cpu(); if (ret == 1) return 1; #endif #if defined(STARPU_USE_CUDA) && !(defined(STARPU_USE_CUDA0) || defined(STARPU_USE_CUDA1)) ret = test_cuda(); if (ret == 1) return 1; #endif #ifdef STARPU_USE_OPENCL ret = test_opencl(); if (ret == 1) return 1; #endif #ifdef STARPU_USE_HIP ret = test_hip(); if (ret == 1) return 1; #endif return ret; } starpu-1.4.9+dfsg/tests/main/empty_task.c000066400000000000000000000045661507764646700204350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Measure the cost of a task with a NULL codelet */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif static void usage(char **argv) { FPRINTF(stderr, "%s [-i ntasks] [-h]\n", argv[0]); exit(-1); } static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:t:h")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; case 'h': usage(argv); break; } } int main(int argc, char **argv) { int ret; double timing; double start; double end; parse_args(argc, argv); ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); FPRINTF(stderr, "#tasks : %u\n", ntasks); start = starpu_timing_now(); unsigned i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = NULL; task->detach = 0; task->destroy = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); } end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/empty_task_chain.c000066400000000000000000000037541507764646700215750ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Create a chain of tasks with NULL codelet, using manual dependencies */ #define N 4 int main(int argc, char **argv) { int i, ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task **tasks = (struct starpu_task **) malloc(N*sizeof(struct starpu_task *)); for (i = 0; i < N; i++) { tasks[i] = starpu_task_create(); tasks[i]->cl = NULL; if (i > 0) { starpu_task_declare_deps_array(tasks[i], 1, &tasks[i-1]); } if (i == (N-1)) tasks[i]->detach = 0; } for (i = 1; i < N; i++) { ret = starpu_task_submit(tasks[i]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_submit(tasks[0]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(tasks[N-1]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_shutdown(); free(tasks); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/empty_task_sync_point.c000066400000000000000000000055621507764646700226770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test using a task with NULL codelet as a synchronization task through tag dependencies */ static starpu_tag_t tagA = 0x0042; static starpu_tag_t tagB = 0x1042; static starpu_tag_t tagC = 0x2042; static starpu_tag_t tagD = 0x3042; static starpu_tag_t tagE = 0x4042; static starpu_tag_t tagF = 0x5042; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* {A,B,C} -> D -> {E,F}, D is empty */ struct starpu_task *taskA = starpu_task_create(); taskA->cl = &starpu_codelet_nop; taskA->use_tag = 1; taskA->tag_id = tagA; struct starpu_task *taskB = starpu_task_create(); taskB->cl = &starpu_codelet_nop; taskB->use_tag = 1; taskB->tag_id = tagB; struct starpu_task *taskC = starpu_task_create(); taskC->cl = &starpu_codelet_nop; taskC->use_tag = 1; taskC->tag_id = tagC; struct starpu_task *taskD = starpu_task_create(); taskD->cl = NULL; taskD->use_tag = 1; taskD->tag_id = tagD; starpu_tag_declare_deps(tagD, 3, tagA, tagB, tagC); struct starpu_task *taskE = starpu_task_create(); taskE->cl = &starpu_codelet_nop; taskE->use_tag = 1; taskE->tag_id = tagE; starpu_tag_declare_deps(tagE, 1, tagD); struct starpu_task *taskF = starpu_task_create(); taskF->cl = &starpu_codelet_nop; taskF->use_tag = 1; taskF->tag_id = tagF; starpu_tag_declare_deps(tagF, 1, tagD); ret = starpu_task_submit(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskF); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_tag_t tag_array[2] = {tagE, tagF}; ret = starpu_tag_wait_array(2, tag_array); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait_array"); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/empty_task_sync_point_tasks.c000066400000000000000000000045311507764646700240770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test using a task with NULL codelet as a synchronization task through task dependencies */ int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* {A,B,C} -> D -> {E,F}, D is empty */ struct starpu_task *taskA = starpu_task_create(); taskA->cl = &starpu_codelet_nop; struct starpu_task *taskB = starpu_task_create(); taskB->cl = &starpu_codelet_nop; struct starpu_task *taskC = starpu_task_create(); taskC->cl = &starpu_codelet_nop; struct starpu_task *taskD = starpu_task_create(); taskD->cl = NULL; struct starpu_task *taskE = starpu_task_create(); taskE->cl = &starpu_codelet_nop; struct starpu_task *taskF = starpu_task_create(); taskF->cl = &starpu_codelet_nop; starpu_task_declare_deps(taskD, 3, taskA, taskB, taskC); starpu_task_declare_deps_array(taskE, 1, &taskD); starpu_task_declare_deps_array(taskF, 1, &taskD); ret = starpu_task_submit(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskF); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/execute_on_a_specific_worker.c000066400000000000000000000103241507764646700241360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" #include /* * Test binding tasks on specific workers */ #ifdef STARPU_QUICK_CHECK #define N 10 #elif !defined(STARPU_LONG_CHECK) #define N 100 #else #define N 1000 #endif #define VECTORSIZE 1024 static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static unsigned finished = 0; static unsigned cnt; starpu_data_handle_t v_handle; static unsigned *v; static void callback(void *arg) { (void)arg; unsigned res = STARPU_ATOMIC_ADD(&cnt, -1); ANNOTATE_HAPPENS_BEFORE(&cnt); if (res == 0) { ANNOTATE_HAPPENS_AFTER(&cnt); STARPU_PTHREAD_MUTEX_LOCK(&mutex); finished = 1; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } } void codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; // int id = starpu_worker_get_id(); // FPRINTF(stderr, "worker #%d\n", id); } static struct starpu_codelet cl_r = { .cpu_funcs = {codelet_null}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .nbuffers = 1, .modes = {STARPU_R} }; static struct starpu_codelet cl_w = { .cpu_funcs = {codelet_null}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .nbuffers = 1, .modes = {STARPU_W} }; static struct starpu_codelet cl_rw = { .cpu_funcs = {codelet_null}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .nbuffers = 1, .modes = {STARPU_RW} }; static struct starpu_codelet *select_codelet_with_random_mode(void) { int r = rand(); switch (r % 3) { case 0: return &cl_r; case 1: return &cl_w; case 2: return &cl_rw; }; return &cl_rw; } int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); memset(v, 0, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); unsigned nworker = starpu_worker_get_count(); cnt = nworker*N; unsigned iter, worker; for (iter = 0; iter < N; iter++) { for (worker = 0; worker < nworker; worker++) { /* execute a task on that worker */ struct starpu_task *task = starpu_task_create(); task->handles[0] = v_handle; task->cl = select_codelet_with_random_mode(); task->callback_func = callback; task->callback_arg = NULL; task->execute_on_a_specific_worker = 1; task->workerid = worker; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (!finished) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/execute_schedule.c000066400000000000000000000071031507764646700215610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" #include /* * Test binding tasks on specific workers and in a specific order */ #ifdef STARPU_QUICK_CHECK #define K 2 #else #define K 16 #endif #define N 64 static unsigned current = 1; void codelet(void *descr[], void *_args) { (void)descr; uintptr_t me = (uintptr_t) _args; STARPU_ASSERT(current == me); current++; } static double cost_function(struct starpu_task *task, unsigned nimpl) { (void) task; (void) nimpl; return 1000; } static struct starpu_perfmodel model = { .type = STARPU_COMMON, .cost_function = cost_function, .symbol = "cost" }; static struct starpu_codelet cl = { .cpu_funcs = {codelet}, .cuda_funcs = {codelet}, .opencl_funcs = {codelet}, .nbuffers = 1, .modes = {STARPU_R}, .model = &model, }; int main(int argc, char **argv) { int ret; struct starpu_task *dep_task[N] = { NULL }; int *t[N]; starpu_data_handle_t h[N]; unsigned n, i, k; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); for (n = 0; n < N; n++) { t[n] = malloc((1<<20) * sizeof(*(t[n]))); starpu_variable_data_register(&h[n], STARPU_MAIN_RAM, (uintptr_t) t[n], (1<<20) * sizeof(*(t[n]))); } for (k = 0; k < K; k++) { for (n = 0; n < N; n++) { struct starpu_task *task; dep_task[n] = starpu_task_create(); dep_task[n]->cl = NULL; task = starpu_task_create(); task->cl = &cl; task->execute_on_a_specific_worker = 1; task->workerid = 0; /* We request for running the tasks in the opposite order of the submission order */ task->workerorder = k*N + (N-n); task->cl_arg = (void*) (uintptr_t) (k*N + (N-n)); task->handles[0] = h[n]; starpu_task_declare_deps_array(task, 1, &dep_task[n]); ret = starpu_task_submit(task); if (ret == -ENODEV) { task->destroy = 0; starpu_task_destroy(task); goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (n = 0; n < N; n++) { i = (int)starpu_drand48()%(N-n); ret = starpu_task_submit(dep_task[i]); memmove(&dep_task[i], &dep_task[i+1], (N-i-1)*sizeof(dep_task[i])); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } starpu_task_wait_for_all(); for (n = 0; n < N; n++) { starpu_data_unregister(h[n]); free(t[n]); } starpu_shutdown(); return EXIT_SUCCESS; enodev: for (n = 0; n < N; n++) { if (dep_task[n]) { dep_task[n]->destroy = 0; starpu_task_destroy(dep_task[n]); } starpu_data_unregister(h[n]); free(t[n]); } starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/get_children_tasks.c000066400000000000000000000052301507764646700220760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Check that starpu_task_get_task_succs returns the set of children tasks */ void func_cpu(void *descr[], void *_args) { (void)descr; (void)_args; } struct starpu_codelet codelet_w = { .modes = { STARPU_W }, .cpu_funcs = {func_cpu}, .cpu_funcs_name = {"func_cpu"}, .nbuffers = 1 }; struct starpu_codelet codelet_r = { .modes = { STARPU_R }, .cpu_funcs = {func_cpu}, .cpu_funcs_name = {"func_cpu"}, .nbuffers = 1 }; int main(void) { int ret; starpu_data_handle_t h; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_void_data_register(&h); starpu_tag_t tag_init = 0; starpu_tag_declare_deps_array((starpu_tag_t) 1, 1, &tag_init); struct starpu_task *task1 = starpu_task_build(&codelet_w, STARPU_W, h, STARPU_TAG, (starpu_tag_t) 1, 0); struct starpu_task *task2 = starpu_task_build(&codelet_r, STARPU_R, h, 0); struct starpu_task *task3 = starpu_task_build(&codelet_r, STARPU_R, h, 0); ret = starpu_task_submit(task1); if (ret == -ENODEV) goto enodev; ret = starpu_task_submit(task2); if (ret == -ENODEV) goto enodev; ret = starpu_task_submit(task3); if (ret == -ENODEV) goto enodev; struct starpu_task *tasks[4]; ret = starpu_task_get_task_succs(task1, sizeof(tasks)/sizeof(*tasks), tasks); STARPU_ASSERT(ret == 2); STARPU_ASSERT(tasks[0] == task2 || tasks[1] == task2); STARPU_ASSERT(tasks[0] == task3 || tasks[1] == task3); starpu_tag_notify_from_apps(0); starpu_data_unregister(h); starpu_shutdown(); STARPU_RETURN(ret?0:1); enodev: starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/get_current_task.c000066400000000000000000000064771507764646700216230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Check that starpu_task_get_current provides the proper task pointer */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif void check_task_func(void *descr[], void *arg) { (void)descr; /* We check that the returned task is valid from the callback */ struct starpu_task *task = (struct starpu_task *) arg; STARPU_ASSERT(task == starpu_task_get_current()); } static void check_task_callback(void *arg) { /* We check that the returned task is valid from the callback */ struct starpu_task *task = (struct starpu_task *) arg; STARPU_ASSERT(task == starpu_task_get_current()); } static struct starpu_codelet dummy_cl = { .cuda_funcs = {check_task_func}, .cpu_funcs = {check_task_func}, .opencl_funcs = {check_task_func}, /* starpu_task_get_current()) is not working on MPI Master Slave mode */ /* .cpu_funcs_name = {"check_task_func"}, */ .model = NULL, .nbuffers = 0 }; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); FPRINTF(stderr, "#tasks : %u\n", ntasks); unsigned i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); /* We check if the function is valid from the codelet or from * the callback */ task->cl = &dummy_cl; task->cl_arg = task; task->cl_arg_size = sizeof(task); task->callback_func = check_task_callback; task->callback_arg = task; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); FPRINTF(stderr, "#empty tasks : %u\n", ntasks); /* We repeat the same experiment with null codelets */ for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = NULL; /* We check if the function is valid from the callback */ task->callback_func = check_task_callback; task->callback_arg = task; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/hwloc_cpuset.c000066400000000000000000000045551507764646700207520ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test workers hwloc cpusets */ int main(void) { int status = 0; #ifdef STARPU_HAVE_HWLOC struct starpu_conf conf; starpu_conf_init(&conf); conf.nmpi_ms = 0; conf.ntcpip_ms = 0; int ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int nworkers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); if (nworkers != 0) { hwloc_cpuset_t accumulator_cpuset = hwloc_bitmap_alloc(); hwloc_cpuset_t temp_cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_zero(accumulator_cpuset); status = 0; int workerids[nworkers]; starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workerids, nworkers); int i; for (i=0; i #include "../helper.h" /* * Try the starpu_task_insert interface in various ways */ static int _ifactor = 12; static float _ffactor = 10.0; void func_cpu_args(void *descr[], void *_args) { int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); int ifactor; float ffactor; starpu_codelet_unpack_args(_args, &ifactor, &ffactor); *x0 = *x0 * ifactor; *x1 = *x1 * ffactor; } void func_cpu_noargs(void *descr[], void *_args) { int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); (void)_args; *x0 = *x0 * _ifactor; *x1 = *x1 * _ffactor; } struct starpu_codelet mycodelet_args = { .modes = { STARPU_RW, STARPU_RW }, .cpu_funcs = {func_cpu_args}, .cpu_funcs_name = {"func_cpu_args"}, .nbuffers = 2 }; /* This one cheats by getting the factor through a global variable, which cannot * work in master-slave mode, so we can only run it on a local CPU */ struct starpu_codelet mycodelet_noargs = { .modes = { STARPU_RW, STARPU_RW }, .cpu_funcs = {func_cpu_noargs}, .nbuffers = 2 }; static int test_codelet(struct starpu_codelet *codelet, int task_insert, int args, int x, float f) { starpu_data_handle_t data_handles[2]; int xx = x; float ff = f; int i, ret; starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&xx, sizeof(xx)); starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&ff, sizeof(ff)); FPRINTF(stderr, "values: %d (%d) %f (%f)\n", xx, _ifactor, ff, _ffactor); if (task_insert) { if (args) ret = starpu_task_insert(codelet, STARPU_VALUE, &_ifactor, sizeof(_ifactor), STARPU_VALUE, &_ffactor, sizeof(_ffactor), STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], 0); else ret = starpu_task_insert(codelet, STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } else { struct starpu_task *task; if (args) task = starpu_task_build(codelet, STARPU_VALUE, &_ifactor, sizeof(_ifactor), STARPU_VALUE, &_ffactor, sizeof(_ffactor), STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], 0); else task = starpu_task_build(codelet, STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], 0); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } enodev: for(i=0 ; i<2 ; i++) { starpu_data_unregister(data_handles[i]); } FPRINTF(stderr, "values: %d (should be %d) %f (should be %f)\n\n", xx, x*_ifactor, ff, f*_ffactor); return ret == -ENODEV ? ret : xx == x*_ifactor && ff == f*_ffactor; } int main(void) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); FPRINTF(stderr, "Testing codelet with task_insert and with arguments\n"); ret = test_codelet(&mycodelet_args, 1, 1, 4, 2.0); if (ret == -ENODEV) goto enodev; if (ret) { FPRINTF(stderr, "Testing codelet with task_insert and without arguments\n"); ret = test_codelet(&mycodelet_noargs, 1, 0, 9, 7.0); } if (ret == -ENODEV) goto enodev; if (ret) { FPRINTF(stderr, "Testing codelet with task_build and with arguments\n"); ret = test_codelet(&mycodelet_args, 0, 1, 5, 3.0); } if (ret == -ENODEV) goto enodev; if (ret) { FPRINTF(stderr, "Testing codelet with task_build and without arguments\n"); ret = test_codelet(&mycodelet_noargs, 0, 0, 7, 5.0); } if (ret == -ENODEV) goto enodev; starpu_shutdown(); STARPU_RETURN(ret?0:1); enodev: starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/insert_task_array.c000066400000000000000000000051201507764646700217640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test STARPU_DATA_ARRAY */ void func_cpu(void *descr[], void *_args) { int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); float *x1 = (float *)STARPU_VARIABLE_GET_PTR(descr[1]); int factor; starpu_codelet_unpack_args(_args, &factor); *x0 = *x0 * factor; *x1 = *x1 * (float)factor; } struct starpu_codelet mycodelet = { .modes = { STARPU_RW, STARPU_RW }, .cpu_funcs = {func_cpu}, .cpu_funcs_name = {"func_cpu"}, .nbuffers = 2 }; int main(void) { int x; float f; int factor=12; int i, ret; starpu_data_handle_t data_handles[2]; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); x = 1; starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); f = 2.0; starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&f, sizeof(f)); ret = starpu_task_insert(&mycodelet, STARPU_DATA_ARRAY, data_handles, 2, STARPU_VALUE, &factor, sizeof(factor), STARPU_PRIORITY, 1, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); enodev: for(i=0 ; i<2 ; i++) { starpu_data_unregister(data_handles[i]); } starpu_shutdown(); if (ret == -ENODEV) { fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } else { FPRINTF(stderr, "VALUES: %d %f\n", x, f); ret = !(x == 12 && f == 24.0); return ret; } } starpu-1.4.9+dfsg/tests/main/insert_task_dyn_handles.c000066400000000000000000000220631507764646700231430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * Try the starpu_task_insert interface in various ways, and notably * triggering the use of dyn_handles */ void func_cpu(void *descr[], void *_args) { int num = STARPU_TASK_GET_NBUFFERS(starpu_task_get_current()); int i; (void)_args; for (i = 0; i < num; i++) { int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[i]); *x = *x + 1; } } struct starpu_codelet codelet = { .cpu_funcs = {func_cpu}, /* starpu_task_get_current() doesn't work on MPI Master Slave */ /* .cpu_funcs_name = {"func_cpu"}, */ .nbuffers = STARPU_VARIABLE_NBUFFERS, }; struct starpu_codelet codelet_minus1 = { .cpu_funcs = {func_cpu}, /* starpu_task_get_current() doesn't work on MPI Master Slave */ /* .cpu_funcs_name = {"func_cpu"}, */ .nbuffers = STARPU_NMAXBUFS-1, }; struct starpu_codelet codelet_exactly = { .cpu_funcs = {func_cpu}, /* starpu_task_get_current() doesn't work on MPI Master Slave */ /* .cpu_funcs_name = {"func_cpu"}, */ .nbuffers = STARPU_NMAXBUFS, }; struct starpu_codelet codelet_plus1 = { .cpu_funcs = {func_cpu}, /* starpu_task_get_current() doesn't work on MPI Master Slave */ /* .cpu_funcs_name = {"func_cpu"}, */ .nbuffers = STARPU_NMAXBUFS+1, }; struct starpu_codelet codelet_plus5 = { .cpu_funcs = {func_cpu}, /* starpu_task_get_current() doesn't work on MPI Master Slave */ /* .cpu_funcs_name = {"func_cpu"}, */ .nbuffers = STARPU_NMAXBUFS+5, }; starpu_data_handle_t *data_handles; struct starpu_data_descr *descrs; int *expected; int test(int n, struct starpu_codelet *static_codelet) { int i, ret; for (i = 0; i < n; i++) expected[i]++; ret = starpu_task_insert(&codelet, STARPU_DATA_MODE_ARRAY, descrs, n, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Same with static number of buffers in codelet */ for (i = 0; i < n; i++) expected[i]++; ret = starpu_task_insert(static_codelet, STARPU_DATA_MODE_ARRAY, descrs, n, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); /* Test a whole array after one data */ expected[0]++; for (i = 1; i < n; i++) expected[i]++; ret = starpu_task_insert(&codelet, STARPU_RW, data_handles[0], STARPU_DATA_MODE_ARRAY, &descrs[1], n-1, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); if (n > 1) { /* Same with static number of buffers in codelet */ expected[0]++; for (i = 1; i < n; i++) expected[i]++; ret = starpu_task_insert(static_codelet, STARPU_RW, data_handles[0], STARPU_DATA_MODE_ARRAY, &descrs[1], n-1, 0); if (ret == -ENODEV) return ret; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } return 0; } int main(void) { int *x; int i, ret, loop; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; #ifdef STARPU_QUICK_CHECK int nloops = 4; #else int nloops = 16; #endif ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); x = calloc(STARPU_NMAXBUFS+5, sizeof(*x)); data_handles = malloc((STARPU_NMAXBUFS+5) * sizeof(*data_handles)); descrs = malloc((STARPU_NMAXBUFS+5) * sizeof(*descrs)); expected = calloc(STARPU_NMAXBUFS+5, sizeof(*expected)); for(i=0 ; i 2 STARPU_RW, data_handles[1], #endif #if STARPU_NMAXBUFS > 3 STARPU_RW, data_handles[2], #endif #if STARPU_NMAXBUFS > 4 STARPU_RW, data_handles[3], #endif #if STARPU_NMAXBUFS > 5 STARPU_RW, data_handles[4], #endif #if STARPU_NMAXBUFS > 6 STARPU_RW, data_handles[5], #endif #if STARPU_NMAXBUFS > 7 STARPU_RW, data_handles[6], #endif #if STARPU_NMAXBUFS > 8 STARPU_RW, data_handles[7], #endif #if STARPU_NMAXBUFS > 9 STARPU_RW, data_handles[8], #endif #if STARPU_NMAXBUFS > 10 STARPU_RW, data_handles[9], #endif 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); #if STARPU_NMAXBUFS > 1 && STARPU_NMAXBUFS <= 8 /* Same with static number of buffers in codelet */ expected[0]++; for (i = 1; i < STARPU_NMAXBUFS-1 && i < 7; i++) expected[i]++; ret = starpu_task_insert(&codelet_minus1, STARPU_RW, data_handles[0], #if STARPU_NMAXBUFS > 2 STARPU_RW, data_handles[1], #endif #if STARPU_NMAXBUFS > 3 STARPU_RW, data_handles[2], #endif #if STARPU_NMAXBUFS > 4 STARPU_RW, data_handles[3], #endif #if STARPU_NMAXBUFS > 5 STARPU_RW, data_handles[4], #endif #if STARPU_NMAXBUFS > 6 STARPU_RW, data_handles[5], #endif #if STARPU_NMAXBUFS > 7 STARPU_RW, data_handles[6], #endif 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); #endif /* Test data one after the other, but more than NMAXBUFS */ for (i = 0; i < STARPU_NMAXBUFS+5 && i < 10; i++) expected[i]++; ret = starpu_task_insert(&codelet, STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], STARPU_RW, data_handles[2], STARPU_RW, data_handles[3], STARPU_RW, data_handles[4], STARPU_RW, data_handles[5], #if STARPU_NMAXBUFS > 1 STARPU_RW, data_handles[6], #endif #if STARPU_NMAXBUFS > 2 STARPU_RW, data_handles[7], #endif #if STARPU_NMAXBUFS > 3 STARPU_RW, data_handles[8], #endif #if STARPU_NMAXBUFS > 4 STARPU_RW, data_handles[9], #endif 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); #if STARPU_NMAXBUFS > 1 && STARPU_NMAXBUFS <= 8 /* Same with static number of buffers in codelet*/ for (i = 0; i < STARPU_NMAXBUFS+5 && i < 13; i++) expected[i]++; ret = starpu_task_insert(&codelet_plus5, STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], STARPU_RW, data_handles[2], STARPU_RW, data_handles[3], STARPU_RW, data_handles[4], STARPU_RW, data_handles[5], #if STARPU_NMAXBUFS > 1 STARPU_RW, data_handles[6], #endif #if STARPU_NMAXBUFS > 2 STARPU_RW, data_handles[7], #endif #if STARPU_NMAXBUFS > 3 STARPU_RW, data_handles[8], #endif #if STARPU_NMAXBUFS > 4 STARPU_RW, data_handles[9], #endif #if STARPU_NMAXBUFS > 5 STARPU_RW, data_handles[10], #endif #if STARPU_NMAXBUFS > 6 STARPU_RW, data_handles[11], #endif #if STARPU_NMAXBUFS > 7 STARPU_RW, data_handles[12], #endif 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); #endif } enodev: for(i=0 ; i #include #include "../helper.h" /* * Try to pass many parameters to a task, testing the various codelet * declarations */ #define NPARAMS 15 void func_cpu(void *descr[], void *_args) { (void)_args; struct starpu_task *task = starpu_task_get_current(); int num = STARPU_TASK_GET_NBUFFERS(task); int i; for (i = 0; i < num; i++) if ((STARPU_TASK_GET_MODE(task, i) & STARPU_W) || (STARPU_TASK_GET_MODE(task, i) & STARPU_SCRATCH)) { int *x = (int *)STARPU_VARIABLE_GET_PTR(descr[i]); *x = *x + 1; } } /* We will fill this one with dyn_modes */ struct starpu_codelet codelet_dyn = { .cpu_funcs = {func_cpu}, /* starpu_task_get_current() doesn't work on MPI Master Slave */ /* .cpu_funcs_name = {"func_cpu"}, */ .nbuffers = NPARAMS, }; /* When maxbuffers is less than NPARAMS we will miss some access modes. * That is on purpose: we here check that we still behave correctly in that case. * We are just not able to check the parameter access modes. */ struct starpu_codelet codelet_toomany = { .cpu_funcs = {func_cpu}, /* starpu_task_get_current() doesn't work on MPI Master Slave */ /* .cpu_funcs_name = {"func_cpu"}, */ .nbuffers = NPARAMS, .modes = { STARPU_R, STARPU_R, STARPU_RW|STARPU_COMMUTE, STARPU_RW|STARPU_COMMUTE, STARPU_R, STARPU_RW, STARPU_R, STARPU_RW|STARPU_COMMUTE, #if STARPU_NMAXBUFS >= 9 STARPU_R, #endif #if STARPU_NMAXBUFS >= 10 STARPU_RW|STARPU_COMMUTE, #endif #if STARPU_NMAXBUFS >= 11 STARPU_R, #endif #if STARPU_NMAXBUFS >= 12 STARPU_R, #endif #if STARPU_NMAXBUFS >= 13 STARPU_SCRATCH, #endif #if STARPU_NMAXBUFS >= 14 STARPU_SCRATCH, #endif #if STARPU_NMAXBUFS >= 15 STARPU_SCRATCH, #endif } }; struct starpu_codelet codelet_variable = { .cpu_funcs = {func_cpu}, /* starpu_task_get_current() doesn't work on MPI Master Slave */ /* .cpu_funcs_name = {"func_cpu"}, */ .nbuffers = STARPU_VARIABLE_NBUFFERS, }; int main(void) { int *x; int i, ret, loop; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; #ifdef STARPU_QUICK_CHECK int nloops = 4; #else int nloops = 16; #endif int val_int = 42; double val_double = 42.; starpu_data_handle_t *data_handles; int *expected; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); codelet_dyn.dyn_modes = malloc(NPARAMS * sizeof(*(codelet_dyn.modes))); codelet_dyn.dyn_modes[0] = STARPU_R, codelet_dyn.dyn_modes[1] = STARPU_R, codelet_dyn.dyn_modes[2] = STARPU_RW|STARPU_COMMUTE, codelet_dyn.dyn_modes[3] = STARPU_RW|STARPU_COMMUTE, codelet_dyn.dyn_modes[4] = STARPU_R, codelet_dyn.dyn_modes[5] = STARPU_RW, codelet_dyn.dyn_modes[6] = STARPU_R, codelet_dyn.dyn_modes[7] = STARPU_RW|STARPU_COMMUTE, codelet_dyn.dyn_modes[8] = STARPU_R, codelet_dyn.dyn_modes[9] = STARPU_RW|STARPU_COMMUTE, codelet_dyn.dyn_modes[10] = STARPU_R, codelet_dyn.dyn_modes[11] = STARPU_R, codelet_dyn.dyn_modes[12] = STARPU_SCRATCH, codelet_dyn.dyn_modes[13] = STARPU_SCRATCH, codelet_dyn.dyn_modes[14] = STARPU_SCRATCH, x = calloc(NPARAMS, sizeof(*x)); data_handles = malloc(NPARAMS * sizeof(*data_handles)); expected = calloc(NPARAMS, sizeof(*expected)); for(i=0 ; i #include "../helper.h" /* * Try starpu_task_insert with a NULL codelet */ int main(void) { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_task_insert(NULL, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/insert_task_pack.c000066400000000000000000000031341507764646700215670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" void func_cpu(void *descr[], void *_args) { (void) descr; (void) _args; } struct starpu_codelet codelet = { .cpu_funcs = { func_cpu }, .cpu_funcs_name = { "func_cpu" } }; int main(int argc, char **argv) { int ret; void *cl_arg = NULL; size_t cl_arg_size = 0; struct starpu_task *task; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; (void)argv; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_codelet_pack_args(&cl_arg, &cl_arg_size, STARPU_VALUE, &argc, sizeof(argc), 0); task = starpu_task_build(&codelet, STARPU_CL_ARGS, cl_arg, cl_arg_size, STARPU_VALUE, &argc, sizeof(argc), 0); starpu_shutdown(); FPRINTF(stderr, "Task %p\n", task); return (task==NULL)?0:1; } starpu-1.4.9+dfsg/tests/main/insert_task_value.c000066400000000000000000000235621507764646700217740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test passing values to tasks in different ways */ #define IFACTOR 42 #define FFACTOR 12.00 void func_cpu_int_float(void *descr[], void *_args) { int ifactor[2048]; float ffactor; (void) descr; starpu_codelet_unpack_args(_args, ifactor, &ffactor); FPRINTF(stderr, "[func_cpu_int_float ] Values %d - %3.2f\n", ifactor[0], ffactor); assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); } void func_cpu_int_float_multiple_unpack(void *descr[], void *_args) { int ifactor[2048]; float ffactor; (void) descr; starpu_codelet_unpack_args(_args, ifactor, 0); starpu_codelet_unpack_args(_args, ifactor, &ffactor); FPRINTF(stderr, "[func_cpu_int_float_multiple_unpack] Values %d - %3.2f\n", ifactor[0], ffactor); assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); } void func_cpu_int_float_unpack_copyleft(void *descr[], void *_args) { int ifactor[2048]; float ffactor; void *buffer; size_t buffer_size; (void) descr; buffer_size = sizeof(int)+sizeof(float)+sizeof(size_t); buffer = calloc(buffer_size, 1); starpu_codelet_unpack_args_and_copyleft(_args, buffer, buffer_size, ifactor, 0); starpu_codelet_unpack_args(buffer, &ffactor); FPRINTF(stderr, "[func_cpu_int_float_unpack_copyleft] Values %d - %3.2f\n", ifactor[0], ffactor); assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); free(buffer); } void func_cpu_float_int(void *descr[], void *_args) { int ifactor[2048]; float ffactor; (void) descr; starpu_codelet_unpack_args(_args, &ffactor, ifactor); FPRINTF(stderr, "[func_cpu_float_int ] Values %d - %3.2f\n", ifactor[0], ffactor); assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); } void func_cpu_float_int_multiple_unpack(void *descr[], void *_args) { int ifactor[2048]; float ffactor; (void) descr; starpu_codelet_unpack_args(_args, &ffactor, 0); starpu_codelet_unpack_args(_args, &ffactor, ifactor); FPRINTF(stderr, "[func_cpu_float_int_multiple_unpack] Values %d - %3.2f\n", ifactor[0], ffactor); assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); } void func_cpu_float_int_unpack_copyleft(void *descr[], void *_args) { int ifactor[2048]; float ffactor; void *buffer; size_t buffer_size; (void) descr; buffer_size = sizeof(int)+2048*sizeof(int)+sizeof(size_t); buffer = calloc(buffer_size, 1); starpu_codelet_unpack_args_and_copyleft(_args, buffer, buffer_size, &ffactor, 0); starpu_codelet_unpack_args(buffer, ifactor); FPRINTF(stderr, "[func_cpu_float_int_multiple_unpack] Values %d - %3.2f\n", ifactor[0], ffactor); assert(ifactor[0] == IFACTOR && ffactor == FFACTOR); free(buffer); } void do_test_int_float_task_insert(starpu_cpu_func_t func, char* func_name) { int *ifactor; float ffactor=FFACTOR; int ret; struct starpu_codelet codelet; FPRINTF(stderr, "\nTesting %s\n", __func__); starpu_codelet_init(&codelet); codelet.cpu_funcs[0] = func; codelet.cpu_funcs_name[0] = func_name; ifactor = calloc(2048, sizeof(int)); ifactor[0] = IFACTOR; ret = starpu_task_insert(&codelet, STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), STARPU_VALUE, &ffactor, sizeof(ffactor), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); free(ifactor); } void do_test_int_float_task_insert_pack(starpu_cpu_func_t func, char* func_name) { int *ifactor; float ffactor=FFACTOR; int ret; struct starpu_codelet codelet; void *cl_arg = NULL; size_t cl_arg_size = 0; FPRINTF(stderr, "\nTesting %s\n", __func__); ifactor = calloc(2048, sizeof(int)); ifactor[0] = IFACTOR; starpu_codelet_pack_args(&cl_arg, &cl_arg_size, STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), STARPU_VALUE, &ffactor, sizeof(ffactor), 0); starpu_codelet_init(&codelet); codelet.cpu_funcs[0] = func; codelet.cpu_funcs_name[0] = func_name; ret = starpu_task_insert(&codelet, STARPU_CL_ARGS, cl_arg, cl_arg_size, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); free(ifactor); } void do_test_float_int_task_insert(starpu_cpu_func_t func, char* func_name) { int *ifactor; float ffactor=FFACTOR; int ret; struct starpu_codelet codelet; FPRINTF(stderr, "\nTesting %s\n", __func__); starpu_codelet_init(&codelet); codelet.cpu_funcs[0] = func; codelet.cpu_funcs_name[0] = func_name; ifactor = calloc(2048, sizeof(int)); ifactor[0] = IFACTOR; ret = starpu_task_insert(&codelet, STARPU_VALUE, &ffactor, sizeof(ffactor), STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); free(ifactor); } void do_test_float_int_task_insert_pack(starpu_cpu_func_t func, char* func_name) { int *ifactor; float ffactor=FFACTOR; int ret; struct starpu_codelet codelet; void *cl_arg = NULL; size_t cl_arg_size = 0; FPRINTF(stderr, "\nTesting %s\n", __func__); ifactor = calloc(2048, sizeof(int)); ifactor[0] = IFACTOR; starpu_codelet_pack_args(&cl_arg, &cl_arg_size, STARPU_VALUE, &ffactor, sizeof(ffactor), STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), 0); starpu_codelet_init(&codelet); codelet.cpu_funcs[0] = func; codelet.cpu_funcs_name[0] = func_name; ret = starpu_task_insert(&codelet, STARPU_CL_ARGS, cl_arg, cl_arg_size, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); free(ifactor); } void do_test_int_float_pack(starpu_cpu_func_t func, char* func_name) { struct starpu_task *task; struct starpu_codelet codelet; int ret; int *ifactor; float ffactor=FFACTOR; FPRINTF(stderr, "\nTesting %s\n", __func__); ifactor = calloc(2048, sizeof(int)); ifactor[0] = IFACTOR; starpu_codelet_init(&codelet); codelet.cpu_funcs[0] = func; codelet.cpu_funcs_name[0] = func_name; task = starpu_task_create(); task->synchronous = 1; task->cl = &codelet; task->cl_arg_free = 1; starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size, STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), STARPU_VALUE, &ffactor, sizeof(ffactor), 0); ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); free(ifactor); } void do_test_float_int_pack(starpu_cpu_func_t func, char* func_name) { struct starpu_task *task; struct starpu_codelet codelet; int ret; int *ifactor; float ffactor=FFACTOR; FPRINTF(stderr, "\nTesting %s\n", __func__); ifactor = calloc(2048, sizeof(int)); ifactor[0] = IFACTOR; starpu_codelet_init(&codelet); codelet.cpu_funcs[0] = func; codelet.cpu_funcs_name[0] = func_name; task = starpu_task_create(); task->synchronous = 1; task->cl = &codelet; task->cl_arg_free = 1; starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size, STARPU_VALUE, &ffactor, sizeof(ffactor), STARPU_VALUE, ifactor, 2048*sizeof(ifactor[0]), 0); ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); free(ifactor); } int main(void) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_worker_get_count_by_type(STARPU_CPU_WORKER) == 0) goto enodev; do_test_int_float_task_insert(func_cpu_int_float, "func_cpu_int_float"); do_test_int_float_task_insert(func_cpu_int_float_multiple_unpack, "func_cpu_int_float_multiple_unpack"); do_test_int_float_task_insert(func_cpu_int_float_unpack_copyleft, "func_cpu_int_float_unpack_copyleft"); do_test_int_float_task_insert_pack(func_cpu_int_float, "func_cpu_int_float"); do_test_int_float_task_insert_pack(func_cpu_int_float_multiple_unpack, "func_cpu_int_float_multiple_unpack"); do_test_int_float_task_insert_pack(func_cpu_int_float_unpack_copyleft, "func_cpu_int_float_unpack_copyleft"); do_test_float_int_task_insert(func_cpu_float_int, "func_cpu_float_int"); do_test_float_int_task_insert(func_cpu_float_int_multiple_unpack, "func_cpu_float_int_multiple_unpack"); do_test_float_int_task_insert(func_cpu_float_int_unpack_copyleft, "func_cpu_float_int_unpack_copyleft"); do_test_float_int_task_insert_pack(func_cpu_float_int, "func_cpu_float_int"); do_test_float_int_task_insert_pack(func_cpu_float_int_multiple_unpack, "func_cpu_float_int_multiple_unpack"); do_test_float_int_task_insert_pack(func_cpu_float_int_unpack_copyleft, "func_cpu_float_int_unpack_copyleft"); do_test_int_float_pack(func_cpu_int_float, "func_cpu_int_float"); do_test_int_float_pack(func_cpu_int_float_multiple_unpack, "func_cpu_int_float_multiple_unpack"); do_test_int_float_pack(func_cpu_int_float_unpack_copyleft, "func_cpu_int_float_unpack_copyleft"); do_test_float_int_pack(func_cpu_float_int, "func_cpu_float_int"); do_test_float_int_pack(func_cpu_float_int_multiple_unpack, "func_cpu_float_int_multiple_unpack"); do_test_float_int_pack(func_cpu_float_int_unpack_copyleft, "func_cpu_float_int_unpack_copyleft"); starpu_shutdown(); return 0; enodev: starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/insert_task_where.c000066400000000000000000000046461507764646700217740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../variable/increment.h" void cpu_increment(void *descr[], void *arg) { (void)arg; unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); (*var) += 2; } /* Also test that the application can provide its own main function */ #undef main int main(void) { starpu_data_handle_t data_handles[2]; int x = 12; int y = 12; int ret, ret1, ret2; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; #ifndef STARPU_SIMGRID conf.ncuda = -1; #endif conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x)); starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y)); // We change the cpu function to have a different computation increment_cl.cpu_funcs[0] = cpu_increment; ret1 = starpu_task_insert(&increment_cl, STARPU_EXECUTE_WHERE, STARPU_CPU, STARPU_RW, data_handles[0], 0); if (ret1 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret1, "starpu_task_insert"); ret2 = starpu_task_insert(&increment_cl, STARPU_EXECUTE_WHERE, STARPU_CUDA, STARPU_RW, data_handles[1], 0); if (ret2 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret2, "starpu_task_insert"); starpu_data_unregister(data_handles[0]); starpu_data_unregister(data_handles[1]); starpu_shutdown(); if (ret1 != -ENODEV) { if (x != 14) ret = 1; FPRINTF(stderr, "Value x = %d (expected 14)\n", x); } if (ret2 != -ENODEV) { if (y != 13) ret = 1; FPRINTF(stderr, "Value y = %d (expected 13)\n", y); } STARPU_RETURN(ret); } starpu-1.4.9+dfsg/tests/main/job.c000066400000000000000000000046621507764646700170240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test that job creation is threadsafe */ #define N 1000 static struct starpu_task *tasks[N]; void dummy_func(void *arg) { unsigned worker, i; int worker0; (void) arg; starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, &worker0, 1); if ((int) starpu_worker_get_id_check() == worker0) /* One worker creates the tasks */ for (i = 0; i < N; i++) { struct starpu_task *task = starpu_task_create(); task->destroy = 0; STARPU_WMB(); tasks[i] = task; } else /* While others eagerly wait for it before trying to get their id */ for (i = 0; i < N; i++) { struct starpu_task *task; while (!(task = tasks[i])) { STARPU_UYIELD(); STARPU_SYNCHRONIZE(); } STARPU_RMB(); starpu_task_get_job_id(task); } } int main(void) { int ret; unsigned i; struct starpu_conf conf; starpu_conf_init(&conf); conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); STARPU_HG_DISABLE_CHECKING(tasks); starpu_execute_on_each_worker(dummy_func, NULL, STARPU_CPU); for (i = 0; i < N; i++) { starpu_task_destroy(tasks[i]); } struct starpu_task *task = starpu_task_create(); unsigned long id; task->destroy = 0; id = starpu_task_get_job_id(task); starpu_task_destroy(task); FPRINTF(stderr, "jobid %lu for %u tasks and %u workers\n", id, N, starpu_worker_get_count()); /* We are not supposed to have created more than one jobid for each * worker (for execute_on_each) and for each of the N user tasks. */ ret = id > starpu_worker_get_count() + N + 1; starpu_shutdown(); return ret; } starpu-1.4.9+dfsg/tests/main/mkdtemp.c000066400000000000000000000030631507764646700177050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include int do_test(char *(*func)(char *tmpl)) { int ret; char *path; char dirname[128]; char *ptr; struct stat sb; path = starpu_getenv("TMPDIR"); if (!path) path = starpu_getenv("TEMP"); if (!path) path = starpu_getenv("TMP"); if (!path) path = "/tmp"; snprintf(dirname, sizeof(dirname), "%s/abcdef_XXXXXX", path); ptr = func(dirname); FPRINTF(stderr, "Directory '%s' (res '%s')\n", dirname, ptr); // use stat ret = stat(dirname, &sb); if (ret != 0 || !S_ISDIR(sb.st_mode)) { FPRINTF(stderr, "Directory '%s' has not been created\n", dirname); return 1; } ret = rmdir(dirname); STARPU_CHECK_RETURN_VALUE(ret, "rmdir '%s'\n", dirname); return ret; } int main(void) { int ret, ret2; ret = do_test(_starpu_mkdtemp); ret2 = do_test(_starpu_mkdtemp_internal); return ret + ret2; } starpu-1.4.9+dfsg/tests/main/multithreaded.c000066400000000000000000000051111507764646700210730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Try submitting tasks from different threads */ starpu_pthread_t threads[16]; #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif static unsigned nthreads = 2; static void *thread_func(void *arg) { int ret; unsigned i; (void)arg; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->callback_func = NULL; task->callback_arg = NULL; ret = starpu_task_submit(task); STARPU_ASSERT_MSG(!ret, "task submission failed with error code %d", ret); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); return NULL; } static void usage(char **argv) { FPRINTF(stderr, "%s [-i ntasks] [-t nthreads] [-h]\n", argv[0]); exit(-1); } static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:t:h")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; case 't': nthreads = atoi(optarg); break; case 'h': usage(argv); break; } } int main(int argc, char **argv) { // unsigned i; double timing; double start; double end; int ret; parse_args(argc, argv); ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); FPRINTF(stderr, "#tasks : %u\n", ntasks); start = starpu_timing_now(); unsigned t; for (t = 0; t < nthreads; t++) { STARPU_PTHREAD_CREATE(&threads[t], NULL, thread_func, NULL); } for (t = 0; t < nthreads; t++) { STARPU_PTHREAD_JOIN(threads[t], NULL); } end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/(nthreads*ntasks)); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/multithreaded_init.c000066400000000000000000000040601507764646700221200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * Try calling starpu_initialize from different threads in parallel */ #define NUM_THREADS 5 int *glob_argc; char ***glob_argv; static void *launch_starpu(void *unused) { int ret; (void) unused; ret = starpu_initialize(NULL, glob_argc, glob_argv); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); return NULL; } static void *shutdown_starpu(void *unused) { (void) unused; starpu_shutdown(); return NULL; } int main(int argc, char **argv) { unsigned i; double timing; double start; double end; glob_argc = &argc; glob_argv = &argv; starpu_pthread_t threads[NUM_THREADS]; start = starpu_timing_now(); for (i = 0; i < NUM_THREADS; ++i) { STARPU_PTHREAD_CREATE(&threads[i], NULL, launch_starpu, NULL); } for (i = 0; i < NUM_THREADS; ++i) { STARPU_PTHREAD_JOIN(threads[i], NULL); } end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Success : %d threads launching simultaneously starpu_init\n", NUM_THREADS); FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/NUM_THREADS); for (i = 0; i < NUM_THREADS; i++) { STARPU_PTHREAD_CREATE(&threads[i], NULL, shutdown_starpu, NULL); } for (i = 0; i < NUM_THREADS; i++) { STARPU_PTHREAD_JOIN(threads[i], NULL); } return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/pack.c000066400000000000000000000171131507764646700171630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test starpu_codelet_pack_args and starpu_codelet_unpack_args */ void func_unpack_args(void *descr[], void *_args) { int factor; char c; int x; (void)descr; starpu_codelet_unpack_args(_args, &factor, &c, &x); FPRINTF(stderr, "[codelet unpack_args] values: %d %c %d\n", factor, c, x); assert(factor == 12 && c == 'n' && x == 42); } struct starpu_codelet mycodelet_unpack_args = { .cpu_funcs = {func_unpack_args}, .cpu_funcs_name = {"func_unpack_args"}, .nbuffers = 0 }; void func_unpack_arg(void *descr[], void *_args) { int factor; char c; int x; (void)descr; size_t size = sizeof(int) + 3*sizeof(size_t) + sizeof(int) + sizeof(char) + sizeof(int); struct starpu_codelet_pack_arg_data state; starpu_codelet_unpack_arg_init(&state, _args, size); starpu_codelet_unpack_arg(&state, (void**)&factor, sizeof(factor)); starpu_codelet_unpack_arg(&state, (void**)&c, sizeof(c)); starpu_codelet_unpack_arg(&state, (void**)&x, sizeof(x)); starpu_codelet_unpack_arg_fini(&state); FPRINTF(stderr, "[codelet unpack_arg] values: %d %c %d\n", factor, c, x); assert(factor == 12 && c == 'n' && x == 42); } struct starpu_codelet mycodelet_unpack_arg = { .cpu_funcs = {func_unpack_arg}, .cpu_funcs_name = {"func_unpack_arg"}, .nbuffers = 0 }; void func_dup_arg(void *descr[], void *_args) { int *factor; char *c; int *x; size_t size; (void)descr; size_t psize = sizeof(int) + 3*sizeof(size_t) + sizeof(int) + sizeof(char) + sizeof(int); struct starpu_codelet_pack_arg_data state; starpu_codelet_unpack_arg_init(&state, _args, psize); starpu_codelet_dup_arg(&state, (void**)&factor, &size); assert(size == sizeof(*factor)); starpu_codelet_dup_arg(&state, (void**)&c, &size); assert(size == sizeof(*c)); starpu_codelet_dup_arg(&state, (void**)&x, &size); assert(size == sizeof(*x)); starpu_codelet_unpack_arg_fini(&state); FPRINTF(stderr, "[codelet dup_arg] values: %d %c %d\n", *factor, *c, *x); assert(*factor == 12 && *c == 'n' && *x == 42); free(factor); free(c); free(x); } struct starpu_codelet mycodelet_dup_arg = { .cpu_funcs = {func_dup_arg}, .cpu_funcs_name = {"func_dup_arg"}, .nbuffers = 0 }; void func_pick_arg(void *descr[], void *_args) { int *factor; char *c; int *x; size_t size; (void)descr; size_t psize = sizeof(int) + 6*sizeof(size_t) + sizeof(int) + 4*sizeof(char) + sizeof(int); struct starpu_codelet_pack_arg_data state; starpu_codelet_unpack_arg_init(&state, _args, psize); starpu_codelet_pick_arg(&state, (void**)&factor, &size); assert(size == sizeof(*factor)); starpu_codelet_pick_arg(&state, (void**)&c, &size); assert(size == sizeof(*c)); starpu_codelet_pick_arg(&state, (void**)&c, &size); assert(size == sizeof(*c)); starpu_codelet_pick_arg(&state, (void**)&c, &size); assert(size == sizeof(*c)); starpu_codelet_pick_arg(&state, (void**)&c, &size); assert(size == sizeof(*c)); starpu_codelet_pick_arg(&state, (void**)&x, &size); assert(size == sizeof(*x)); starpu_codelet_unpack_arg_fini(&state); FPRINTF(stderr, "[codelet pick_arg] values: %d %c %d\n", *factor, *c, *x); assert(*factor == 12 && *c == 'n' && *x == 42); } struct starpu_codelet mycodelet_pick_arg = { .cpu_funcs = {func_pick_arg}, .cpu_funcs_name = {"func_pick_arg"}, .nbuffers = 0 }; int main(void) { int ret; int x=42; int factor=12; char c='n'; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); FPRINTF(stderr, "[init] values: %d %c %d\n", factor, c, x); { struct starpu_task *task = starpu_task_build(&mycodelet_unpack_args, STARPU_TASK_SYNCHRONOUS, 1, 0); task->cl_arg_free = 1; starpu_codelet_pack_args(&task->cl_arg, &task->cl_arg_size, STARPU_VALUE, &factor, sizeof(factor), STARPU_VALUE, &c, sizeof(c), STARPU_VALUE, &x, sizeof(x), 0); ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Test with starpu_codelet_unpack_args */ { struct starpu_task *task = starpu_task_build(&mycodelet_unpack_args, STARPU_TASK_SYNCHRONOUS, 1, 0); task->cl_arg_free = 1; struct starpu_codelet_pack_arg_data state; starpu_codelet_pack_arg_init(&state); starpu_codelet_pack_arg(&state, &factor, sizeof(factor)); starpu_codelet_pack_arg(&state, &c, sizeof(c)); starpu_codelet_pack_arg(&state, &x, sizeof(x)); starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Test with starpu_codelet_unpack_arg */ { struct starpu_task *task = starpu_task_build(&mycodelet_unpack_arg, STARPU_TASK_SYNCHRONOUS, 1, 0); task->cl_arg_free = 1; struct starpu_codelet_pack_arg_data state; starpu_codelet_pack_arg_init(&state); starpu_codelet_pack_arg(&state, &factor, sizeof(factor)); starpu_codelet_pack_arg(&state, &c, sizeof(c)); starpu_codelet_pack_arg(&state, &x, sizeof(x)); starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Test with starpu_codelet_dup_arg */ { struct starpu_task *task = starpu_task_build(&mycodelet_dup_arg, STARPU_TASK_SYNCHRONOUS, 1, 0); task->cl_arg_free = 1; struct starpu_codelet_pack_arg_data state; starpu_codelet_pack_arg_init(&state); starpu_codelet_pack_arg(&state, &factor, sizeof(factor)); starpu_codelet_pack_arg(&state, &c, sizeof(c)); starpu_codelet_pack_arg(&state, &x, sizeof(x)); starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* Test with starpu_codelet_pick_arg */ { struct starpu_task *task = starpu_task_build(&mycodelet_pick_arg, STARPU_TASK_SYNCHRONOUS, 1, 0); task->cl_arg_free = 1; struct starpu_codelet_pack_arg_data state; starpu_codelet_pack_arg_init(&state); starpu_codelet_pack_arg(&state, &factor, sizeof(factor)); starpu_codelet_pack_arg(&state, &c, sizeof(c)); starpu_codelet_pack_arg(&state, &c, sizeof(c)); starpu_codelet_pack_arg(&state, &c, sizeof(c)); starpu_codelet_pack_arg(&state, &c, sizeof(c)); starpu_codelet_pack_arg(&state, &x, sizeof(x)); starpu_codelet_pack_arg_fini(&state, &task->cl_arg, &task->cl_arg_size); ret = starpu_task_submit(task); if (ret != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_shutdown(); if (ret == -ENODEV) { fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } else return 0; } starpu-1.4.9+dfsg/tests/main/pause_resume.c000066400000000000000000000050541507764646700207430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Try starpu_pause/resume */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #elif !defined(STARPU_LONG_CHECK) static unsigned ntasks = 1000; #else static unsigned ntasks = 50000; #endif int main(void) { double timing; double start; double end; int ret; #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) ntasks = 5; #endif ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Check that we can submit tasks to a "paused" StarPU and then have * it run normally. */ starpu_pause(); unsigned i; for (i = 0; i < ntasks; i++) { ret = starpu_task_insert(&starpu_codelet_nop, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } start = starpu_timing_now(); starpu_resume(); starpu_task_wait_for_all(); end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Without interruptions:\n\tTotal: %f secs\n", timing/1000000); FPRINTF(stderr, "\tPer task: %f usecs\n", timing/ntasks); /* Do the same thing, but with a lot of interuptions to see if there * is any overhead associated with the pause/resume calls. */ starpu_pause(); for (i = 0; i < ntasks; i++) { ret = starpu_task_insert(&starpu_codelet_nop, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } starpu_resume(); start = starpu_timing_now(); for (i = 0; i < 100; i++) { starpu_pause(); starpu_resume(); } starpu_task_wait_for_all(); end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "With 100 interruptions:\n\tTotal: %f secs\n", timing/1000000); FPRINTF(stderr, "\tPer task: %f usecs\n", timing/ntasks); /* Finally, check that the nesting of pause/resume calls works. */ starpu_pause(); starpu_pause(); starpu_resume(); starpu_resume(); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/regenerate.c000066400000000000000000000061561507764646700203730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" #include /* * Run one task with regenerate=1, and thus completes several times * before we reset regenerate to 0 in the callback */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif static unsigned cnt = 0; static unsigned completed = 0; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static void callback(void *arg) { (void)arg; struct starpu_task *task = starpu_task_get_current(); cnt++; if (cnt == ntasks) { task->regenerate = 0; FPRINTF(stderr, "Stop !\n"); STARPU_PTHREAD_MUTEX_LOCK(&mutex); completed = 1; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } } static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; } } int main(int argc, char **argv) { // unsigned i; double timing; double start; double end; int ret; parse_args(argc, argv); ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task task; starpu_task_init(&task); task.cl = &starpu_codelet_nop; task.regenerate = 1; task.detach = 1; task.callback_func = callback; FPRINTF(stderr, "#tasks : %u\n", ntasks); start = starpu_timing_now(); ret = starpu_task_submit(&task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_do_schedule(); STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (!completed) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); starpu_task_wait_for_all(); starpu_task_clean(&task); starpu_shutdown(); /* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */ starpu_task_clean(&task); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/regenerate_pipeline.c000066400000000000000000000100541507764646700222500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" #include /* * Create a pipeline of regenerated tasks, i.e. a sort of data flow graph */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif static unsigned cntA = 0; static unsigned cntB = 0; static unsigned cntC = 0; static unsigned completed = 0; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static void callback(void *arg) { struct starpu_task *task = starpu_task_get_current(); unsigned *cnt = arg; unsigned res; res = STARPU_ATOMIC_ADD(cnt, 1); ANNOTATE_HAPPENS_BEFORE(&cnt); if (res == ntasks) { ANNOTATE_HAPPENS_AFTER(&cnt); task->regenerate = 0; FPRINTF(stderr, "Stop !\n"); STARPU_PTHREAD_MUTEX_LOCK(&mutex); completed++; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } } static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; } } int main(int argc, char **argv) { // unsigned i; double timing; double start; double end; int ret; parse_args(argc, argv); ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task taskA, taskB, taskC; struct starpu_task *taskAp = &taskA; struct starpu_task *taskBp = &taskB; starpu_task_init(&taskA); taskA.cl = &starpu_codelet_nop; taskA.regenerate = 1; taskA.detach = 1; taskA.callback_func = callback; taskA.callback_arg = &cntA; starpu_task_init(&taskB); taskB.cl = &starpu_codelet_nop; taskB.regenerate = 1; taskB.detach = 1; taskB.callback_func = callback; taskB.callback_arg = &cntB; starpu_task_declare_deps_array(&taskB, 1, &taskAp); starpu_task_init(&taskC); taskC.cl = &starpu_codelet_nop; taskC.regenerate = 1; taskC.detach = 1; taskC.callback_func = callback; taskC.callback_arg = &cntC; starpu_task_declare_deps_array(&taskC, 1, &taskBp); FPRINTF(stderr, "#tasks : %u\n", ntasks); start = starpu_timing_now(); ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_do_schedule(); STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (completed < 3) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "cntA : %u\n", cntA); FPRINTF(stderr, "cntB : %u\n", cntB); FPRINTF(stderr, "cntC : %u\n", cntC); STARPU_ASSERT(cntA == ntasks); STARPU_ASSERT(cntB == ntasks); STARPU_ASSERT(cntC == ntasks); FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/(ntasks*3)); starpu_task_wait_for_all(); starpu_task_clean(&taskA); starpu_task_clean(&taskB); starpu_task_clean(&taskC); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/restart.c000066400000000000000000000033331507764646700177300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Try initializing/shutting down starpu several times */ #ifdef STARPU_QUICK_CHECK #define N 2 #else #define N 10 #endif static double start; static double end; int main(int argc, char **argv) { unsigned iter; double init_timing = 0.0; double shutdown_timing = 0.0; int ret; for (iter = 0; iter < N; iter++) { start = starpu_timing_now(); /* Initialize StarPU */ ret = starpu_initialize(NULL, &argc, &argv); end = starpu_timing_now(); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); init_timing += end - start; start = starpu_timing_now(); /* Shutdown StarPU */ starpu_shutdown(); end = starpu_timing_now(); shutdown_timing += end - start; } FPRINTF(stderr, "starpu_init: %2.2f seconds\n", init_timing/(N*1000000)); FPRINTF(stderr, "starpu_shutdown: %2.2f seconds\n", shutdown_timing/(N*1000000)); return EXIT_SUCCESS; enodev: return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/starpu_init.c000066400000000000000000000074671507764646700206210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include /* * Try initializing starpu with various CPU parameters */ #if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_HAVE_SETENV) || !defined(STARPU_USE_CPU) #warning unsetenv or setenv are not defined. Or CPU are not enabled. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else static int check_cpu(int env_cpu, int conf_cpu, int precedence_over_env, int expected_cpu, int *cpu) { int ret; FPRINTF(stderr, "\nTesting with env=%d - conf=%d - expected %d (ignore env %d)\n", env_cpu, conf_cpu, expected_cpu, precedence_over_env); if (env_cpu != -1) { char string[11]; snprintf(string, sizeof(string), "%d", env_cpu); setenv("STARPU_NCPUS", string, 1); } struct starpu_conf user_conf; starpu_conf_init(&user_conf); user_conf.nmpi_ms = 0; user_conf.ntcpip_ms = 0; user_conf.precedence_over_environment_variables = precedence_over_env; if (conf_cpu != -1) { user_conf.ncpus = conf_cpu; } ret = starpu_init(&user_conf); if (env_cpu != -1) { unsetenv("STARPU_NCPUS"); } if (ret == -ENODEV) { return STARPU_TEST_SKIPPED; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); *cpu = starpu_cpu_worker_get_count(); starpu_shutdown(); if (expected_cpu == -1) { FPRINTF(stderr, "Number of CPUS: %3d\n", *cpu); return 0; } else { FPRINTF(stderr, "Number of CPUS: %3d -- Number of expected CPUs: %3d --> %s\n", *cpu, expected_cpu, *cpu==expected_cpu?"SUCCESS":"FAILURE"); return *cpu != expected_cpu; } } int main(void) { int ret; int cpu, cpu_init; int cpu_test1, cpu_test2, cpu_test3; unsetenv("STARPU_NCPUS"); unsetenv("STARPU_NCPU"); ret = check_cpu(-1, -1, 0, -1, &cpu_init); if (ret) return ret; if (cpu_init <= 1) return STARPU_TEST_SKIPPED; if (cpu_init >= STARPU_MAXCPUS-5) { cpu_test1 = cpu_init-1; cpu_test2 = cpu_init-2; cpu_test3 = cpu_init-3; } else { cpu_test1 = cpu_init+1; cpu_test2 = cpu_init+2; cpu_test3 = cpu_init+3; } ret = check_cpu(cpu_test1, -1, 0, cpu_test1, &cpu); if (ret) return ret; // Do not set anything --> default value ret = check_cpu(-1, -1, 0, -1, &cpu); if (ret) return ret; if (cpu != cpu_init) { FPRINTF(stderr, "The number of CPUs is incorrect\n"); return 1; } // Do not set environment variable, set starpu_conf::ncpus --> starpu_conf::ncpus ret = check_cpu(-1, cpu_test2, 0, cpu_test2, &cpu); if (ret) return ret; // Set environment variable, and do not set starpu_conf::ncpus --> starpu_conf::ncpus ret = check_cpu(cpu_test2, -1, 0, cpu_test2, &cpu); if (ret) return ret; // Set both environment variable and starpu_conf::ncpus --> environment variable ret = check_cpu(cpu_test3, cpu_test1, 0, cpu_test3, &cpu); if (ret) return ret; // Set both environment variable and starpu_conf::ncpus AND prefer starpu_conf over env --> starpu_conf::ncpus ret = check_cpu(cpu_test3, cpu_test1, 1, cpu_test1, &cpu); if (ret) return ret; // Set environment variable, and do no set starpu_conf, AND prefer starpu_conf over env --> environment variable ret = check_cpu(cpu_test2, -1, 1, cpu_test2, &cpu); if (ret) return ret; return 0; } #endif starpu-1.4.9+dfsg/tests/main/starpu_task_bundle.c000066400000000000000000000073021507764646700221350ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test the bundle interface, putting tasks working on the same data in * the same bundle */ #define NB_BUNDLE 10 #define NB_ITERATION 5 void func_cpu(void *descr[], void *args) { float *x = (float *) STARPU_VARIABLE_GET_PTR(descr[0]); float factor; factor = *(float *) args; *x *= factor; } struct starpu_codelet codelet = { .modes = {STARPU_RW}, .cpu_funcs = {func_cpu}, .cpu_funcs_name = {"func_cpu"}, .nbuffers = 1 }; int main(int argc, char **argv) { int i, j, ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_initialize"); float *data; starpu_malloc((void**)&data, sizeof(*data) * NB_BUNDLE); float factors[NB_BUNDLE]; starpu_data_handle_t handles[NB_BUNDLE]; struct starpu_task *task[NB_ITERATION]; starpu_task_bundle_t bundles[NB_BUNDLE]; for (i = 0; i < NB_BUNDLE; i++) { data[i] = i + 1; factors[i] = NB_BUNDLE - i; } for (i = 0; i < NB_BUNDLE; i++) starpu_variable_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)&data[i], sizeof(float)); FPRINTF(stderr, "VALUES:"); for (i = 0; i < NB_BUNDLE; i++) FPRINTF(stderr, " %f (%f)", data[i], factors[i]); FPRINTF(stderr, "\n"); for (i = 0; i < NB_BUNDLE; i++) { starpu_task_bundle_create(&bundles[i]); for (j = 0; j < NB_ITERATION; j++) { task[j] = starpu_task_create(); task[j]->cl = &codelet; task[j]->cl_arg = &factors[i]; task[j]->cl_arg_size = sizeof(float); task[j]->handles[0] = handles[i]; ret = starpu_task_bundle_insert(bundles[i], task[j]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } /* Put one aside, just for fun */ ret = starpu_task_bundle_remove(bundles[i], task[NB_ITERATION / 2]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_bundle_remove"); for (j = 0; j < NB_ITERATION; j++) { ret = starpu_task_submit(task[j]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_bundle_close(bundles[i]); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); for(i = 0; i < NB_BUNDLE ; i++) { ret = starpu_data_acquire(handles[i], STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); } FPRINTF(stderr, "VALUES:"); for (i = 0; i < NB_BUNDLE; i++) FPRINTF(stderr, " %f (%f)", data[i], factors[i]); FPRINTF(stderr, "\n"); for(i = 0; i < NB_BUNDLE ; i++) { starpu_data_release(handles[i]); starpu_data_unregister(handles[i]); } starpu_free_noflag(data, sizeof(*data) * NB_BUNDLE); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/starpu_task_wait.c000066400000000000000000000050511507764646700216270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test waiting for a task */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif static void usage(char **argv) { FPRINTF(stderr, "%s [-i ntasks] [-h]\n", argv[0]); exit(-1); } static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:t:h")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; case 'h': usage(argv); break; } } int main(int argc, char **argv) { double timing; double start; double end; int ret; parse_args(argc, argv); #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) ntasks = 5; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); FPRINTF(stderr, "#tasks : %u\n", ntasks); start = starpu_timing_now(); unsigned i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->callback_func = NULL; task->callback_arg = NULL; task->detach = 0; task->destroy = 0; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_task_destroy(task); } end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/starpu_task_wait_for_all.c000066400000000000000000000052211507764646700233240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test waiting for all tasks */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif static int inject_one_task(void) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->callback_func = NULL; task->callback_arg = NULL; int ret = starpu_task_submit(task); return ret; } static void usage(char **argv) { FPRINTF(stderr, "%s [-i ntasks] [-p sched_policy] [-h]\n", argv[0]); exit(-1); } static void parse_args(int argc, char **argv, struct starpu_conf *conf) { int c; while ((c = getopt(argc, argv, "i:p:h")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; case 'p': conf->sched_policy_name = optarg; break; case 'h': usage(argv); break; } } int main(int argc, char **argv) { unsigned i; double timing; double start; double end; int ret; struct starpu_conf conf; starpu_conf_init(&conf); parse_args(argc, argv, &conf); ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); FPRINTF(stderr, "#tasks : %u\n", ntasks); start = starpu_timing_now(); for (i = 0; i < ntasks; i++) { ret = inject_one_task(); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/starpu_worker_exists.c000066400000000000000000000040341507764646700225510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define BUILDING_STARPU #include #include "core/workers.h" #include "../helper.h" /* * Test that _starpu_worker_exists works appropriately */ static int can_always_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) { (void) workerid; (void) task; (void) nimpl; return 1; } static int can_never_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) { (void) workerid; (void) task; (void) nimpl; return 0; } int main(int argc, char **argv) { int ret; struct starpu_task *task; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->destroy = 0; task->sched_ctx = 0; starpu_codelet_nop.can_execute = NULL; ret = _starpu_worker_exists(task); if (!ret) { FPRINTF(stderr, "failure with can_execute=NULL\n"); return EXIT_FAILURE; } starpu_codelet_nop.can_execute = can_always_execute; ret = _starpu_worker_exists(task); if (!ret) { FPRINTF(stderr, "failure with can_always_execute\n"); return EXIT_FAILURE; } starpu_codelet_nop.can_execute = can_never_execute; ret = _starpu_worker_exists(task); if (ret) { FPRINTF(stderr, "failure with can_never_execute\n"); return EXIT_FAILURE; } starpu_task_destroy(task); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/static_restartable.c000066400000000000000000000045241507764646700221260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test that one can submit+wait the same task several times */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; } } int main(int argc, char **argv) { unsigned i; double timing; double start; double end; int ret; parse_args(argc, argv); #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) ntasks = 5; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task task; starpu_task_init(&task); task.cl = &starpu_codelet_nop; task.detach = 0; FPRINTF(stderr, "#tasks : %u\n", ntasks); start = starpu_timing_now(); for (i = 0; i < ntasks; i++) { ret = starpu_task_submit(&task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(&task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); } end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); starpu_task_clean(&task); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/static_restartable_tag.c000066400000000000000000000050671507764646700227640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test that one can submit+wait_tag the same task several times */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif static starpu_tag_t tag = 0x32; static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; } } int main(int argc, char **argv) { unsigned i; double timing; double start; double end; int ret; parse_args(argc, argv); #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) ntasks = 5; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task task; starpu_task_init(&task); task.cl = &starpu_codelet_nop; task.use_tag = 1; task.tag_id = tag; FPRINTF(stderr, "#tasks : %u\n", ntasks); start = starpu_timing_now(); for (i = 0; i < ntasks; i++) { ret = starpu_task_submit(&task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_tag_wait(tag); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); } end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); starpu_task_wait_for_all(); starpu_task_clean(&task); starpu_shutdown(); /* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */ starpu_task_clean(&task); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/static_restartable_using_initializer.c000066400000000000000000000047161507764646700257410ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test that one can submit+wait the same task several times, using a static * initialization */ /* This is equivalent to calling starpu_task_init later on */ struct starpu_task task = STARPU_TASK_INITIALIZER; #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 65536; #endif static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; } } int main(int argc, char **argv) { unsigned i; double timing; double start; double end; int ret; parse_args(argc, argv); #ifdef STARPU_HAVE_VALGRIND_H if(RUNNING_ON_VALGRIND) ntasks = 5; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); task.cl = &starpu_codelet_nop; task.detach = 0; FPRINTF(stderr, "#tasks : %u\n", ntasks); start = starpu_timing_now(); for (i = 0; i < ntasks; i++) { ret = starpu_task_submit(&task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(&task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); } end = starpu_timing_now(); timing = end - start; FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks); starpu_task_clean(&task); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_task_clean(&task); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/subgraph_repeat.c000066400000000000000000000111731507764646700214200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../variable/increment.h" #include "../helper.h" /* * Test that one can resubmit a whole task graph repeatedly */ #ifdef STARPU_QUICK_CHECK static unsigned niter = 64; #else static unsigned niter = 16384; #endif /* * * /-->B--\ * | | * -----> A D---\---> * ^ | | | * | \-->C--/ | * | | * \--------------/ * * - {B, C} depend on A * - D depends on {B, C} * - A, B, C and D are resubmitted at the end of the loop (or not) */ static struct starpu_task taskA, taskB, taskC, taskD; static unsigned loop_cnt = 0; static unsigned *check_cnt; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static void callback_task_D(void *arg) { (void)arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); loop_cnt++; if (loop_cnt == niter) { /* We are done */ STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } else { int ret; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* Let's go for another iteration */ ret = starpu_task_submit(&taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } int main(int argc, char **argv) { // unsigned i; // double timing; // double start; // double end; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_data_set_default_sequential_consistency_flag(0); starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); *check_cnt = 0; increment_load_opencl(); starpu_data_handle_t check_data; starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); starpu_task_init(&taskA); taskA.cl = &increment_cl; taskA.handles[0] = check_data; starpu_task_init(&taskB); taskB.cl = &increment_cl; taskB.handles[0] = check_data; starpu_task_init(&taskC); taskC.cl = &increment_cl; taskC.handles[0] = check_data; starpu_task_init(&taskD); taskD.cl = &increment_cl; taskD.callback_func = callback_task_D; taskD.handles[0] = check_data; starpu_task_declare_deps(&taskB, 1, &taskA); starpu_task_declare_deps(&taskC, 1, &taskA); starpu_task_declare_deps(&taskD, 2, &taskB, &taskC); ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_do_schedule(); /* Wait for the termination of all loops */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (loop_cnt < niter) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); starpu_data_acquire(check_data, STARPU_R); starpu_data_release(check_data); STARPU_ASSERT(*check_cnt == (4*loop_cnt)); starpu_free_noflag(check_cnt, sizeof(*check_cnt)); starpu_data_unregister(check_data); starpu_task_wait_for_all(); starpu_task_clean(&taskA); starpu_task_clean(&taskB); starpu_task_clean(&taskC); starpu_task_clean(&taskD); increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_data_unregister(check_data); increment_unload_opencl(); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/subgraph_repeat_regenerate.c000066400000000000000000000117361507764646700236260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../variable/increment.h" #include "../helper.h" /* * Test that one can let a whole task graph repeatedly regenerate itself */ #ifdef STARPU_QUICK_CHECK static unsigned niter = 64; #else static unsigned niter = 16384; #endif /* * * /-->B--\ * | | * -----> A D---\---> * ^ | | | * | \-->C--/ | * | | * \--------------/ * * - {B, C} depend on A * - D depends on {B, C} * - A, B, C and D are resubmitted at the end of the loop (or not) */ static struct starpu_task taskA, taskB, taskC, taskD; static unsigned loop_cntB = 0; static unsigned loop_cntC = 0; static unsigned loop_cntD = 0; static unsigned *check_cnt; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static void callback_task_B(void *arg) { (void)arg; if (++loop_cntB == niter) taskB.regenerate = 0; } static void callback_task_C(void *arg) { (void)arg; if (++loop_cntC == niter) taskC.regenerate = 0; } static void callback_task_D(void *arg) { (void)arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); loop_cntD++; if (loop_cntD == niter) { /* We are done */ taskD.regenerate = 0; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } else { int ret; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* Let's go for another iteration */ ret = starpu_task_submit(&taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } int main(int argc, char **argv) { // unsigned i; // double timing; // double start; // double end; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); increment_load_opencl(); /* Implicit data dependencies and regeneratable tasks are not compatible */ starpu_data_set_default_sequential_consistency_flag(0); starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); *check_cnt = 0; starpu_data_handle_t check_data; starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); starpu_task_init(&taskA); taskA.cl = &increment_cl; taskA.regenerate = 0; /* this task will be explicitly resubmitted if needed */ taskA.handles[0] = check_data; starpu_task_init(&taskB); taskB.cl = &increment_cl; taskB.callback_func = callback_task_B; taskB.regenerate = 1; taskB.handles[0] = check_data; starpu_task_init(&taskC); taskC.cl = &increment_cl; taskC.callback_func = callback_task_C; taskC.regenerate = 1; taskC.handles[0] = check_data; starpu_task_init(&taskD); taskD.cl = &increment_cl; taskD.callback_func = callback_task_D; taskD.regenerate = 1; taskD.handles[0] = check_data; starpu_task_declare_deps(&taskB, 1, &taskA); starpu_task_declare_deps(&taskC, 1, &taskA); starpu_task_declare_deps(&taskD, 2, &taskB, &taskC); ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_do_schedule(); /* Wait for the termination of all loops */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (loop_cntD < niter) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); starpu_data_acquire(check_data, STARPU_R); starpu_data_release(check_data); STARPU_ASSERT(*check_cnt == (4*niter)); starpu_free_noflag(check_cnt, sizeof(*check_cnt)); starpu_data_unregister(check_data); starpu_task_wait_for_all(); starpu_task_clean(&taskA); starpu_task_clean(&taskB); starpu_task_clean(&taskC); starpu_task_clean(&taskD); increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_data_unregister(check_data); increment_unload_opencl(); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/subgraph_repeat_regenerate_tag.c000066400000000000000000000134221507764646700244530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../variable/increment.h" #include "../helper.h" /* * Test that one can let a whole task graph repeatedly regenerate itself, using * tag dependencies */ #ifdef STARPU_QUICK_CHECK static unsigned niter = 64; #else static unsigned niter = 16384; #endif #define TAG_START 0 #define TAG_A 1 #define TAG_B 2 #define TAG_C 3 #define TAG_D 4 /* * * /-->B--\ * | | * -----> A D---\---> * ^ | | | * | \-->C--/ | * | | * \--------------/ * * - {B, C} depend on A * - D depends on {B, C} * - A, B, C and D are resubmitted at the end of the loop (or not) */ static struct starpu_task taskA, taskB, taskC, taskD; static unsigned loop_cnt = 0; static unsigned loop_cnt_A = 0; static unsigned loop_cnt_B = 0; static unsigned loop_cnt_C = 0; static unsigned *check_cnt; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static void callback_task_A(void *arg) { (void)arg; loop_cnt_A++; if (loop_cnt_A == niter) { /* We are done */ taskA.regenerate = 0; } } static void callback_task_B(void *arg) { (void)arg; loop_cnt_B++; if (loop_cnt_B == niter) { /* We are done */ taskB.regenerate = 0; } } static void callback_task_C(void *arg) { (void)arg; loop_cnt_C++; if (loop_cnt_C == niter) { /* We are done */ taskC.regenerate = 0; } } static void callback_task_D(void *arg) { (void)arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); loop_cnt++; if (loop_cnt == niter) { /* We are done */ taskD.regenerate = 0; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } else { STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* Let's go for another iteration */ starpu_tag_restart((starpu_tag_t) TAG_START); starpu_tag_notify_from_apps((starpu_tag_t)TAG_START); } } int main(int argc, char **argv) { // unsigned i; // double timing; // double start; // double end; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Implicit data dependencies and regeneratable tasks are not compatible */ starpu_data_set_default_sequential_consistency_flag(0); starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); *check_cnt = 0; increment_load_opencl(); starpu_data_handle_t check_data; starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); starpu_task_init(&taskA); taskA.cl = &increment_cl; taskA.regenerate = 1; /* this task will be explicitly resubmitted if needed */ taskA.use_tag = 1; taskA.tag_id = TAG_A; taskA.callback_func = callback_task_A; taskA.handles[0] = check_data; starpu_task_init(&taskB); taskB.cl = &increment_cl; taskB.regenerate = 1; taskB.use_tag = 1; taskB.tag_id = TAG_B; taskB.callback_func = callback_task_B; taskB.handles[0] = check_data; starpu_task_init(&taskC); taskC.cl = &increment_cl; taskC.regenerate = 1; taskC.use_tag = 1; taskC.tag_id = TAG_C; taskC.callback_func = callback_task_C; taskC.handles[0] = check_data; starpu_task_init(&taskD); taskD.cl = &increment_cl; taskD.callback_func = callback_task_D; taskD.regenerate = 1; taskD.use_tag = 1; taskD.tag_id = TAG_D; taskD.handles[0] = check_data; starpu_tag_declare_deps((starpu_tag_t) TAG_A, 1, (starpu_tag_t) TAG_START); starpu_tag_declare_deps((starpu_tag_t) TAG_B, 1, (starpu_tag_t) TAG_A); starpu_tag_declare_deps((starpu_tag_t) TAG_C, 1, (starpu_tag_t) TAG_A); starpu_tag_declare_deps((starpu_tag_t) TAG_D, 2, (starpu_tag_t) TAG_B, (starpu_tag_t) TAG_C); ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_tag_notify_from_apps((starpu_tag_t) TAG_START); starpu_do_schedule(); /* Wait for the termination of all loops */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (loop_cnt < niter) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); starpu_data_acquire(check_data, STARPU_R); starpu_data_release(check_data); STARPU_ASSERT(*check_cnt == (4*loop_cnt)); starpu_free_noflag(check_cnt, sizeof(*check_cnt)); starpu_data_unregister(check_data); starpu_task_wait_for_all(); starpu_task_clean(&taskA); starpu_task_clean(&taskB); starpu_task_clean(&taskC); starpu_task_clean(&taskD); increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_data_unregister(check_data); increment_unload_opencl(); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/subgraph_repeat_regenerate_tag_cycle.c000066400000000000000000000133041507764646700256310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../variable/increment.h" #include "../helper.h" /* * Test that one can let a whole task graph repeatedly regenerate itself, using * tag dependencies, with a complete cycle. */ #ifdef STARPU_QUICK_CHECK static unsigned niter = 64; #else static unsigned niter = 16384; #endif #define TAG_A 1 #define TAG_B 2 #define TAG_C 3 #define TAG_D 4 /* * * /-->B--\ * | | * -----> A D---\---> * ^ | | | * | \-->C--/ | * | | * \--------------/ * * - {B, C} depend on A * - D depends on {B, C} * - A, B, C and D are resubmitted at the end of the loop (or not) */ static struct starpu_task taskA, taskB, taskC, taskD; static unsigned loop_cnt = 0; static unsigned loop_cnt_A = 0; static unsigned loop_cnt_B = 0; static unsigned loop_cnt_C = 0; static unsigned *check_cnt; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static void callback_task_A(void *arg) { (void)arg; loop_cnt_A++; if (loop_cnt_A == niter) { /* We are done */ taskA.regenerate = 0; } } static void callback_task_B(void *arg) { (void)arg; loop_cnt_B++; if (loop_cnt_B == niter) { /* We are done */ taskB.regenerate = 0; } } static void callback_task_C(void *arg) { (void)arg; loop_cnt_C++; if (loop_cnt_C == niter) { /* We are done */ taskC.regenerate = 0; } } static void callback_task_D(void *arg) { (void)arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); loop_cnt++; if (loop_cnt == niter) { /* We are done */ taskD.regenerate = 0; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } else { STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* Let's go for another iteration */ } } int main(int argc, char **argv) { // unsigned i; // double timing; // double start; // double end; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Implicit data dependencies and regeneratable tasks are not compatible */ starpu_data_set_default_sequential_consistency_flag(0); starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); *check_cnt = 0; increment_load_opencl(); starpu_data_handle_t check_data; starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); starpu_task_init(&taskA); taskA.cl = &increment_cl; taskA.regenerate = 1; /* this task will be explicitly resubmitted if needed */ taskA.use_tag = 1; taskA.tag_id = TAG_A; taskA.callback_func = callback_task_A; taskA.handles[0] = check_data; starpu_task_init(&taskB); taskB.cl = &increment_cl; taskB.regenerate = 1; taskB.use_tag = 1; taskB.tag_id = TAG_B; taskB.callback_func = callback_task_B; taskB.handles[0] = check_data; starpu_task_init(&taskC); taskC.cl = &increment_cl; taskC.regenerate = 1; taskC.use_tag = 1; taskC.tag_id = TAG_C; taskC.callback_func = callback_task_C; taskC.handles[0] = check_data; starpu_task_init(&taskD); taskD.cl = &increment_cl; taskD.callback_func = callback_task_D; taskD.regenerate = 1; taskD.use_tag = 1; taskD.tag_id = TAG_D; taskD.handles[0] = check_data; starpu_tag_declare_deps((starpu_tag_t) TAG_B, 1, (starpu_tag_t) TAG_A); starpu_tag_declare_deps((starpu_tag_t) TAG_C, 1, (starpu_tag_t) TAG_A); starpu_tag_declare_deps((starpu_tag_t) TAG_D, 2, (starpu_tag_t) TAG_B, (starpu_tag_t) TAG_C); starpu_tag_declare_deps((starpu_tag_t) TAG_A, 1, (starpu_tag_t) TAG_D); ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Break the loop */ starpu_tag_notify_restart_from_apps((starpu_tag_t) TAG_D); starpu_do_schedule(); /* Wait for the termination of all loops */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (loop_cnt < niter) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); starpu_data_acquire(check_data, STARPU_R); starpu_data_release(check_data); STARPU_ASSERT(*check_cnt == (4*loop_cnt)); starpu_free_noflag(check_cnt, sizeof(*check_cnt)); starpu_data_unregister(check_data); starpu_task_wait_for_all(); starpu_task_clean(&taskA); starpu_task_clean(&taskB); starpu_task_clean(&taskC); starpu_task_clean(&taskD); increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_data_unregister(check_data); increment_unload_opencl(); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/subgraph_repeat_tag.c000066400000000000000000000120371507764646700222530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../variable/increment.h" #include "../helper.h" /* * Test that one can resubmit a whole task graph repeatedly, using tag dependencies */ #ifdef STARPU_QUICK_CHECK static unsigned niter = 64; #else static unsigned niter = 16384; #endif /* * * /-->B--\ * | | * -----> A D---\---> * ^ | | | * | \-->C--/ | * | | * \--------------/ * * - {B, C} depend on A * - D depends on {B, C} * - A, B, C and D are resubmitted at the end of the loop (or not) */ static struct starpu_task taskA, taskB, taskC, taskD; static unsigned loop_cnt = 0; static unsigned loop_cnt_B = 0; static unsigned loop_cnt_C = 0; static unsigned *check_cnt; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static void callback_task_B(void *arg) { (void)arg; loop_cnt_B++; if (loop_cnt_B == niter) { /* We are done */ taskB.regenerate = 0; } } static void callback_task_C(void *arg) { (void)arg; loop_cnt_C++; if (loop_cnt_C == niter) { /* We are done */ taskC.regenerate = 0; } } static void callback_task_D(void *arg) { (void)arg; STARPU_PTHREAD_MUTEX_LOCK(&mutex); loop_cnt++; if (loop_cnt == niter) { /* We are done */ taskD.regenerate = 0; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } else { int ret; STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); /* Let's go for another iteration */ ret = starpu_task_submit(&taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } int main(int argc, char **argv) { // unsigned i; // double timing; // double start; // double end; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Implicit data dependencies and regeneratable tasks are not compatible */ starpu_data_set_default_sequential_consistency_flag(0); starpu_malloc((void**)&check_cnt, sizeof(*check_cnt)); *check_cnt = 0; increment_load_opencl(); starpu_data_handle_t check_data; starpu_variable_data_register(&check_data, STARPU_MAIN_RAM, (uintptr_t)check_cnt, sizeof(*check_cnt)); starpu_task_init(&taskA); taskA.cl = &increment_cl; taskA.regenerate = 0; /* this task will be explicitly resubmitted if needed */ taskA.handles[0] = check_data; starpu_task_init(&taskB); taskB.cl = &increment_cl; taskB.regenerate = 1; taskB.callback_func = callback_task_B; taskB.handles[0] = check_data; starpu_task_init(&taskC); taskC.cl = &increment_cl; taskC.regenerate = 1; taskC.callback_func = callback_task_C; taskC.handles[0] = check_data; starpu_task_init(&taskD); taskD.cl = &increment_cl; taskD.callback_func = callback_task_D; taskD.regenerate = 1; taskD.handles[0] = check_data; starpu_task_declare_deps(&taskB, 1, &taskA); starpu_task_declare_deps(&taskC, 1, &taskA); starpu_task_declare_deps(&taskD, 2, &taskB, &taskC); ret = starpu_task_submit(&taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(&taskD); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_do_schedule(); /* Wait for the termination of all loops */ STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (loop_cnt < niter) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); starpu_data_acquire(check_data, STARPU_R); starpu_data_release(check_data); STARPU_ASSERT(*check_cnt == (4*loop_cnt)); starpu_free_noflag(check_cnt, sizeof(*check_cnt)); starpu_data_unregister(check_data); starpu_task_wait_for_all(); starpu_task_clean(&taskA); starpu_task_clean(&taskB); starpu_task_clean(&taskC); starpu_task_clean(&taskD); increment_unload_opencl(); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_data_unregister(check_data); increment_unload_opencl(); starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/submit.c000066400000000000000000000056171507764646700175560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test task submission */ static int i = 0, j; void dummy_func(void *descr[], void *arg) { (void)descr; (void)arg; int old_i = STARPU_ATOMIC_ADD(&i, 1); FPRINTF(stdout, "called third task, i = %d\n", old_i+1); } static struct starpu_codelet dummy_codelet = { .cpu_funcs = {dummy_func}, .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .model = NULL, .nbuffers = 0 }; static void callback(void *arg) { (void)arg; struct starpu_task *task = starpu_task_create(); task->cl = &dummy_codelet; task->detach = 1; if (starpu_task_submit(task) == -ENODEV) exit(STARPU_TEST_SKIPPED); FPRINTF(stdout, "submitted third task, i = %d\n", i); } static struct starpu_codelet callback_submit_codelet = { .cpu_funcs = {dummy_func}, .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .model = NULL, .nbuffers = 0 }; static void task_submit_func(void *descr[], void *arg) { (void)descr; (void)arg; struct starpu_task *task = starpu_task_create(); task->cl = &callback_submit_codelet; task->callback_func = callback; task->detach = 1; if (starpu_task_submit(task) == -ENODEV) exit(STARPU_TEST_SKIPPED); int old_i = STARPU_ATOMIC_ADD(&i, 1); FPRINTF(stdout, "submitted second task, i = %d\n", old_i + 1); } static struct starpu_codelet task_submit_codelet = { .cpu_funcs = {task_submit_func}, .cuda_funcs = {task_submit_func}, .opencl_funcs = {task_submit_func}, .model = NULL, .nbuffers = 0 }; int main(void) { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task *task = starpu_task_create(); task->cl = &task_submit_codelet; task->detach = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); j = i; starpu_shutdown(); return j == 3 ? EXIT_SUCCESS : EXIT_FAILURE; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/tag_get_task.c000066400000000000000000000036421507764646700207030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test that starpu_tag_get_task returns the proper task */ static void callback(void *tag) { fflush(stderr); FPRINTF(stderr, "Callback for tag %p\n", tag); fflush(stderr); } int main(int argc, char **argv) { struct starpu_task *task; starpu_tag_t tag = 0x42; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* create a new dummy task with a tag */ task = starpu_task_create(); task->callback_func = callback; task->callback_arg = (void *)tag; task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->destroy = 0; /* tell StarPU to not destroy the task */ task->use_tag = 1; task->tag_id = tag; /* execute the task */ ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* check that starpu_tag_get_task() returns the correct task */ ret = (starpu_tag_get_task(task->tag_id) != task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_get_task"); starpu_task_destroy(task); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/tag_task_data_deps.c000066400000000000000000000136721507764646700220540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test combinations of various tag/task/data dependencies */ void dummy_func(void *descr[], void *arg) { unsigned duration = (uintptr_t) arg; if (duration) starpu_usleep(duration); } static struct starpu_codelet dummy_Rcodelet = { .cpu_funcs = {dummy_func}, .model = NULL, .nbuffers = 1, .modes = {STARPU_R} }; static struct starpu_codelet dummy_Wcodelet = { .cpu_funcs = {dummy_func}, .model = NULL, .nbuffers = 1, .modes = {STARPU_W} }; static struct starpu_codelet dummy_codelet = { .cpu_funcs = {dummy_func}, .model = NULL, .nbuffers = 0, }; static struct starpu_task *create_dummy_task(int write, int data, unsigned duration, starpu_data_handle_t handle) { struct starpu_task *task = starpu_task_create(); if (data) { if (write) task->cl = &dummy_Wcodelet; else task->cl = &dummy_Rcodelet; task->handles[0] = handle; } else task->cl = &dummy_codelet; task->cl_arg = (void*) (uintptr_t) duration; return task; } int main(void) { int ret; /* We have 17 toggles to try below, thus 2^17 possibilities */ unsigned loop, nloops = 128*1024; unsigned duration = 100; starpu_data_handle_t handle1, handle2; #ifdef STARPU_QUICK_CHECK return STARPU_TEST_SKIPPED; #endif struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_void_data_register(&handle1); starpu_void_data_register(&handle2); starpu_data_set_sequential_consistency_flag(handle2, 0); #if 1 for (loop = 0; loop < nloops; loop++) { #else loop = 0x258; do { #endif int durationA = (loop & 1) ? duration:0; int durationB = (loop & 2) ? duration:0; int durationC = (loop & 4) ? duration:0; int writeA, dataA; int writeB, dataB; int writeC, dataC; starpu_data_handle_t handleA, handleB, handleC; struct starpu_task *taskA, *taskB, *taskC; handleA = handle1; writeA = !!(loop & 8); dataA = !!(loop & 16); if (!dataA && writeA) { handleA = handle2; dataA = 1; } handleB = handle1; writeB = !!(loop & 32); dataB = !!(loop & 64); if (!dataB && writeB) { handleB = handle2; dataB = 1; } handleC = handle1; writeC = !!(loop & 128); dataC = !!(loop & 256); if (!dataC && writeC) { handleC = handle2; dataC = 1; } FPRINTF(stderr,"\r%u", loop); #if 0 if (durationA) FPRINTF(stderr, " longA "); if (durationB) FPRINTF(stderr, " longB "); if (durationC) FPRINTF(stderr, " longC "); if (dataA) { if (writeA) FPRINTF(stderr, " WA"); else FPRINTF(stderr, " RA"); } else if (writeA) FPRINTF(stderr, " wA"); if (dataB) { if (writeB) FPRINTF(stderr, " WB"); else FPRINTF(stderr, " RB"); } else if (writeB) FPRINTF(stderr, " wB"); if (dataC) { if (writeC) FPRINTF(stderr, " WC"); else FPRINTF(stderr, " RC"); } else if (writeC) FPRINTF(stderr, " wC"); if (loop & 512) FPRINTF(stderr, " Tag AB"); if (loop & 1024) FPRINTF(stderr, " Tag AC"); if (loop & 2048) FPRINTF(stderr, " Tag BC"); if (loop & 4096) FPRINTF(stderr, " Task AB"); if (loop & 8192) FPRINTF(stderr, " Task AC"); if (loop & 16384) FPRINTF(stderr, " Task BC"); if (loop & 32768) FPRINTF(stderr, " delayB"); if (loop & 65536) FPRINTF(stderr, " delayC"); FPRINTF(stderr," "); #endif fflush(stderr); taskA = create_dummy_task(writeA, dataA, durationA, handleA); taskB = create_dummy_task(writeB, dataB, durationB, handleB); taskC = create_dummy_task(writeC, dataC, durationC, handleC); taskA->tag_id = 3*loop; taskA->use_tag = 1; taskB->tag_id = 3*loop+1; taskB->use_tag = 1; taskC->tag_id = 3*loop+2; taskC->use_tag = 1; if (loop & 512) starpu_tag_declare_deps(taskB->tag_id, 1, taskA->tag_id); if (loop & 1024) starpu_tag_declare_deps(taskC->tag_id, 1, taskA->tag_id); if (loop & 2048) starpu_tag_declare_deps(taskC->tag_id, 1, taskB->tag_id); if (loop & 4096) starpu_task_declare_deps_array(taskB, 1, &taskA); if (loop & 8192) starpu_task_declare_deps_array(taskC, 1, &taskA); if (loop & 16384) starpu_task_declare_deps_array(taskC, 1, &taskB); taskA->detach = 0; taskB->detach = 0; taskC->detach = 0; ret = starpu_task_submit(taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); if (loop & 32768) starpu_usleep(duration); ret = starpu_task_submit(taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); if (loop & 65536) starpu_usleep(duration); ret = starpu_task_submit(taskC); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); ret = starpu_task_wait(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); ret = starpu_task_wait(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); } while(0); enodev: starpu_data_unregister(handle1); starpu_data_unregister(handle2); starpu_shutdown(); return ret == -ENODEV ? STARPU_TEST_SKIPPED : EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/tag_wait_api.c000066400000000000000000000103171507764646700206740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* * Test tag dependencies and waiting for a tag */ static void callback(void *tag) { fflush(stderr); FPRINTF(stderr, "Callback for tag %p\n", tag); fflush(stderr); } static struct starpu_task *create_dummy_task(starpu_tag_t tag) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->callback_func = callback; task->callback_arg = (void *)(uintptr_t)tag; task->use_tag = 1; task->tag_id = tag; return task; } #define tagA ((starpu_tag_t)0x42) #define tagB ((starpu_tag_t)0x12300) #define tagC ((starpu_tag_t)0x32) #define tagD ((starpu_tag_t)0x52) #define tagE ((starpu_tag_t)0x19999) #define tagF ((starpu_tag_t)0x2312) #define tagG ((starpu_tag_t)0x1985) #define tagH ((starpu_tag_t)0x32234) #define tagI ((starpu_tag_t)0x5234) #define tagJ ((starpu_tag_t)0x199) #define tagK ((starpu_tag_t)0x231234) #define tagL ((starpu_tag_t)0x2345) int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); FPRINTF(stderr, "{ A } -> { B }\n"); fflush(stderr); struct starpu_task *taskA, *taskB; taskA = create_dummy_task(tagA); taskB = create_dummy_task(tagB); /* B depends on A */ starpu_tag_declare_deps(tagB, 1, tagA); ret = starpu_task_submit(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_task_submit(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_tag_wait(tagB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_wait"); FPRINTF(stderr, "{ C, D, E, F } -> { G }\n"); struct starpu_task *taskC, *taskD, *taskE, *taskF, *taskG; taskC = create_dummy_task(tagC); taskD = create_dummy_task(tagD); taskE = create_dummy_task(tagE); taskF = create_dummy_task(tagF); taskG = create_dummy_task(tagG); /* NB: we could have used starpu_tag_declare_deps_array instead */ starpu_tag_declare_deps(tagG, 4, tagC, tagD, tagE, tagF); ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_task_submit(taskG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_task_submit(taskE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_task_submit(taskF); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_tag_wait(tagG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); FPRINTF(stderr, "{ H, I } -> { J, K, L }\n"); struct starpu_task *taskH, *taskI, *taskJ, *taskK, *taskL; taskH = create_dummy_task(tagH); taskI = create_dummy_task(tagI); taskJ = create_dummy_task(tagJ); taskK = create_dummy_task(tagK); taskL = create_dummy_task(tagL); starpu_tag_declare_deps(tagJ, 2, tagH, tagI); starpu_tag_declare_deps(tagK, 2, tagH, tagI); starpu_tag_declare_deps(tagL, 2, tagH, tagI); starpu_tag_t tagJKL[3] = {tagJ, tagK, tagL}; ret = starpu_task_submit(taskH); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_task_submit(taskI); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_task_submit(taskJ); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_task_submit(taskK); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_task_submit(taskL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_submit"); ret = starpu_tag_wait_array(3, tagJKL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait_array"); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/main/task_end_dep.c000066400000000000000000000061571507764646700206730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* This checks that adding an end dependency for an already-terminated task * works */ #include #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) #define INIT 12 void cpu_codelet2(void *descr[], void *args) { int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); (void)args; STARPU_ASSERT(*val == INIT); starpu_sleep(0.1); STARPU_ASSERT(*val == INIT); *val *= 2; } struct starpu_codelet cl2 = { .cpu_funcs = {cpu_codelet2}, .cpu_funcs_name = {"cpu_codelet2"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codelet2" }; void cpu_codelet(void *descr[], void *args) { (void)args; int *val = (int *)STARPU_VARIABLE_GET_PTR(descr[0]); STARPU_ASSERT(*val == 2*INIT); starpu_sleep(0.1); STARPU_ASSERT(*val == 2*INIT); *val *= 2; } struct starpu_codelet cl = { .cpu_funcs = {cpu_codelet}, .cpu_funcs_name = {"cpu_codelet"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "codelet" }; int main(void) { int value=INIT; int ret; starpu_data_handle_t value_handle; struct starpu_conf conf; struct starpu_task *task, *task2; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = 0; conf.ntcpip_ms = 0; ret = starpu_init(&conf); if (STARPU_UNLIKELY(ret == -ENODEV)) { return 77; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cpu_worker_get_count() < 1 && starpu_mpi_ms_worker_get_count() < 1) { FPRINTF(stderr, "This application requires at least 1 cpu worker\n"); starpu_shutdown(); return 77; } starpu_variable_data_register(&value_handle, STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(value)); task = starpu_task_build(&cl, STARPU_RW, value_handle, 0); STARPU_ASSERT(task); task->detach = 0; task2 = starpu_task_build(&cl2, STARPU_RW, value_handle, 0); STARPU_ASSERT(task2); task2->detach = 0; task2->destroy = 0; ret = starpu_task_submit(task2); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(task2); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_task_declare_end_deps(task, 1, task2); starpu_task_destroy(task2); ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_data_unregister(value_handle); STARPU_ASSERT(value == 2*2*INIT); starpu_shutdown(); FPRINTF(stderr, "Value = %d\n", value); return ret; } starpu-1.4.9+dfsg/tests/main/task_wait_api.c000066400000000000000000000102361507764646700210630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test task dependencies and waiting for a task */ static struct starpu_task *create_dummy_task(void) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->cl_arg = NULL; task->detach = 0; return task; } int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); FPRINTF(stderr, "{ A } -> { B }\n"); fflush(stderr); struct starpu_task *taskA, *taskB; taskA = create_dummy_task(); taskB = create_dummy_task(); /* B depends on A */ starpu_task_declare_deps_array(taskB, 1, &taskA); ret = starpu_task_submit(taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskA); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(taskB); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); FPRINTF(stderr, "{ C, D, E, F } -> { G }\n"); struct starpu_task *taskC, *taskD, *taskE, *taskF, *taskG; taskC = create_dummy_task(); taskD = create_dummy_task(); taskE = create_dummy_task(); taskF = create_dummy_task(); taskG = create_dummy_task(); starpu_task_declare_deps(taskG, 4, taskC, taskD, taskE, taskF); ret = starpu_task_submit(taskC); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskD); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskE); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskF); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait(taskG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); FPRINTF(stderr, "{ H, I } -> { J, K, L }\n"); struct starpu_task *taskH, *taskI, *taskJ, *taskK, *taskL; taskH = create_dummy_task(); taskI = create_dummy_task(); taskJ = create_dummy_task(); taskK = create_dummy_task(); taskL = create_dummy_task(); struct starpu_task *tasksHI[2] = {taskH, taskI}; starpu_task_declare_deps_array(taskJ, 2, tasksHI); starpu_task_declare_deps_array(taskK, 2, tasksHI); starpu_task_declare_deps_array(taskL, 2, tasksHI); ret = starpu_task_submit(taskH); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskI); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskJ); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskK); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(taskL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); struct starpu_task *tasksJKL[3] = {taskJ, taskK, taskL}; ret = starpu_task_wait_array(tasksJKL, 3); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_array"); starpu_task_wait_for_all(); /* Destroy all the tasks that were not detached */ starpu_task_destroy(taskA); starpu_task_destroy(taskC); starpu_task_destroy(taskD); starpu_task_destroy(taskE); starpu_task_destroy(taskF); starpu_task_destroy(taskH); starpu_task_destroy(taskI); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/main/wait_all_regenerable_tasks.c000066400000000000000000000055521507764646700236050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test that starpu_task_wait_for_all can work with a regenerating task */ #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 64; #else static unsigned ntasks = 1024; #endif static void callback(void *arg) { struct starpu_task *task = starpu_task_get_current(); unsigned *cnt = (unsigned *) arg; (*cnt)++; if (*cnt == ntasks) task->regenerate = 0; } static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; } } #define K 128 int main(int argc, char **argv) { int ret; double timing; double start; double end; parse_args(argc, argv); ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task task[K]; unsigned cnt[K]; int i; for (i = 0; i < K; i++) { starpu_task_init(&task[i]); cnt[i] = 0; task[i].cl = &starpu_codelet_nop; task[i].regenerate = 1; task[i].detach = 1; task[i].callback_func = callback; task[i].callback_arg = &cnt[i]; } FPRINTF(stderr, "#tasks : %d x %u tasks\n", K, ntasks); start = starpu_timing_now(); for (i = 0; i < K; i++) { ret = starpu_task_submit(&task[i]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); for (i = 0; i < K; i++) starpu_task_clean(&task[i]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); end = starpu_timing_now(); /* Check that all the tasks have been properly executed */ unsigned total_cnt = 0; for (i = 0; i < K; i++) total_cnt += cnt[i]; STARPU_ASSERT(total_cnt == K*ntasks); timing = end - start; FPRINTF(stderr, "Total: %f secs\n", timing/1000000); FPRINTF(stderr, "Per task: %f usecs\n", timing/(K*ntasks)); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/maxfpga/000077500000000000000000000000001507764646700165755ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/maxfpga/LMemLoopbackCpuCode.c000066400000000000000000000025111507764646700225100ustar00rootroot00000000000000/** * Document: MaxCompiler Tutorial (maxcompiler-tutorial.pdf) * Chapter: 13 Example: 2 Name: LMem Loopback * MaxFile name: LMemLoopback * Summary: * Adds two LMem input streams and writes the result to LMem. */ #include #include #include #include "LMemLoopback.h" #include int check(int size, int32_t *outData, int32_t *inA, int32_t *inB) { int status = 0; for (int i = 0; i < size; i++) { if (outData[i] != inA[i] + inB[i]) { fprintf(stderr, "[%d] Verification error, out: %u != expected: %u\n", i, outData[i], inA[i] + inB[i]); status = 1; } } return status; } int main() { const int size = 384; int sizeBytes = size * sizeof(int32_t); int32_t *inA = (int32_t*) malloc(sizeBytes); int32_t *inB = (int32_t*) malloc(sizeBytes); for (int i = 0; i < size; i++) { inA[i] = i; inB[i] = size - i; } printf("Loading DFE memory.\n"); LMemLoopback_writeLMem(size, 0, inA); LMemLoopback_writeLMem(size, size, inB); printf("Running DFE.\n"); LMemLoopback(size); printf("Reading DFE memory.\n"); int32_t *outData = (int32_t*) malloc(sizeBytes); LMemLoopback_readLMem(size, 2 * size, outData); int status = check(size, outData, inA, inB); if (status) printf("Test failed.\n"); else printf("Test passed OK!\n"); return status; } starpu-1.4.9+dfsg/tests/maxfpga/MyTasksManager.maxj000066400000000000000000000126771507764646700223610ustar00rootroot00000000000000package maxfpga; import com.maxeler.maxcompiler.v2.build.EngineParameters; //import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v2.managers.custom.DFELink; import com.maxeler.maxcompiler.v2.managers.custom.blocks.KernelBlock; import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemCommandGroup; import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemInterface; import com.maxeler.maxcompiler.v2.managers.engine_interfaces.CPUTypes; import com.maxeler.maxcompiler.v2.managers.engine_interfaces.EngineInterface; import com.maxeler.maxcompiler.v2.managers.engine_interfaces.EngineInterface.Direction; import com.maxeler.maxcompiler.v2.managers.engine_interfaces.InterfaceParam; import com.maxeler.platform.max5.manager.MAX5CManager; public class MyTasksManager extends MAX5CManager { private static final CPUTypes TYPE = CPUTypes.INT32; public MyTasksManager(EngineParameters params) { super(params); KernelBlock kernel1 = addKernel(new Task1(makeKernelParameters("Task1"))); KernelBlock kernel2 = addKernel(new Task2(makeKernelParameters("Task2"))); KernelBlock kernel3 = addKernel(new Task3(makeKernelParameters("Task3"))); LMemInterface iface = addLMemInterface(); kernel1.getInput("inAT1") <== addStreamFromCPU("inAT1"); kernel1.getInput("inBT1") <== addStreamFromCPU("inBT1"); DFELink outCT1 = iface.addStreamToLMem("outCT1", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); outCT1 <== kernel1.getOutput("outCT1"); DFELink inAT2 = iface.addStreamFromLMem("inAT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); kernel2.getInput("inAT2") <== inAT2; DFELink inBT2 = iface.addStreamFromLMem("inBT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); kernel2.getInput("inBT2") <== inBT2; DFELink outCT2 = iface.addStreamToLMem("outCT2", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); outCT2 <== kernel2.getOutput("outCT2"); DFELink inAT3 = iface.addStreamFromLMem("inAT3", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); kernel3.getInput("inAT3") <== inAT3; DFELink inBT3 = iface.addStreamFromLMem("inBT3", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); kernel3.getInput("inBT3") <== inBT3; addStreamToCPU("outCT3") <== kernel3.getOutput("outCT3"); createSlicInterface(interfaceT1("interfaceT1")); createSlicInterface(interfaceT2("interfaceT2")); createSlicInterface(interfaceT3("interfaceT3")); } public static void main(String[] args) { MyTasksManager manager = new MyTasksManager(new EngineParameters(args)); manager.build(); } private static EngineInterface interfaceT1(String name) { EngineInterface ei = new EngineInterface(name); InterfaceParam ptrC1; InterfaceParam N; InterfaceParam sizeInBytes; N = ei.addParam("N", TYPE); sizeInBytes = N * TYPE.sizeInBytes(); ptrC1 = ei.addParam("ptrC1", TYPE); ei.setTicks("Task1", N); ei.setStream("inAT1", TYPE, sizeInBytes); ei.setStream("inBT1", TYPE, sizeInBytes); ei.setLMemLinear("outCT1", ptrC1, sizeInBytes); ei.ignoreAll(Direction.IN_OUT); ei.ignoreKernel("Task2"); ei.ignoreKernel("Task3"); return ei; } private static EngineInterface interfaceT2(String name) { EngineInterface ei = new EngineInterface(name); InterfaceParam ptrA2; InterfaceParam ptrB2; InterfaceParam ptrC2; InterfaceParam N; InterfaceParam sizeInBytes; N = ei.addParam("N", TYPE); sizeInBytes = N * TYPE.sizeInBytes(); ptrA2 = ei.addParam("ptrA2", TYPE); ptrB2 = ei.addParam("ptrB2", TYPE); ptrC2 = ei.addParam("ptrC2", TYPE); ei.setTicks("Task2", N); ei.setLMemLinear("inAT2", ptrA2, sizeInBytes); ei.setLMemLinear("inBT2", ptrB2, sizeInBytes); ei.setLMemLinear("outCT2", ptrC2, sizeInBytes); ei.ignoreAll(Direction.IN_OUT); ei.ignoreKernel("Task1"); ei.ignoreKernel("Task3"); return ei; } private static EngineInterface interfaceT3(String name) { EngineInterface ei = new EngineInterface(name); InterfaceParam ptrA3; InterfaceParam ptrB3; InterfaceParam N; InterfaceParam sizeInBytes; N = ei.addParam("N", TYPE); sizeInBytes = N * TYPE.sizeInBytes(); ptrA3 = ei.addParam("ptrA3", TYPE); ptrB3 = ei.addParam("ptrB3", TYPE); ei.setTicks("Task3", N); ei.setLMemLinear("inAT3", ptrA3, sizeInBytes); ei.setLMemLinear("inBT3", ptrB3, sizeInBytes); ei.setStream("outCT3", TYPE, sizeInBytes); ei.ignoreAll(Direction.IN_OUT); ei.ignoreKernel("Task1"); ei.ignoreKernel("Task2"); return ei; } } starpu-1.4.9+dfsg/tests/maxfpga/MyTasksMuxManager.maxj000066400000000000000000000101341507764646700230350ustar00rootroot00000000000000package maxfpga; import com.maxeler.maxcompiler.v2.build.EngineParameters; //import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v2.managers.custom.DFELink; import com.maxeler.maxcompiler.v2.managers.custom.blocks.KernelBlock; import com.maxeler.maxcompiler.v2.managers.custom.blocks.Mux; import com.maxeler.maxcompiler.v2.managers.custom.blocks.Demux; import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemCommandGroup; import com.maxeler.maxcompiler.v2.managers.custom.stdlib.LMemInterface; import com.maxeler.platform.max5.manager.MAX5CManager; public class MyTasksMuxManager extends MAX5CManager { public MyTasksMuxManager(EngineParameters params) { super(params); KernelBlock kernel1 = addKernel(new Task1(makeKernelParameters("Task1"))); KernelBlock kernel2 = addKernel(new Task2(makeKernelParameters("Task2"))); KernelBlock kernel3 = addKernel(new Task3(makeKernelParameters("Task3"))); LMemInterface iface = addLMemInterface(); Mux joinInAT1 = mux("joinInAT1"); Mux joinInBT1 = mux("joinInBT1"); joinInAT1.addInput("inCPU") <== addStreamFromCPU("inAT1CPU"); joinInAT1.addInput("inLMem") <== iface.addStreamFromLMem("inAT1LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); joinInBT1.addInput("inCPU") <== addStreamFromCPU("inBT1CPU"); joinInBT1.addInput("inLMem") <== iface.addStreamFromLMem("inBT1LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); kernel1.getInput("inAT1") <== joinInAT1.getOutput(); kernel1.getInput("inBT1") <== joinInBT1.getOutput(); Demux forkOutCT1 = demux("forkOutCT1"); DFELink outCT1CPU = forkOutCT1.addOutput("outCPU"); DFELink outCT1LMem = forkOutCT1.addOutput("outLMem"); forkOutCT1.getInput() <== kernel1.getOutput("outCT1"); addStreamToCPU("outCT1CPU") <== outCT1CPU; iface.addStreamToLMem("outCT1LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D) <== outCT1LMem; Mux joinInAT2 = mux("joinInAT2"); Mux joinInBT2 = mux("joinInBT2"); joinInAT2.addInput("inCPU") <== addStreamFromCPU("inAT2CPU"); joinInAT2.addInput("inLMem") <== iface.addStreamFromLMem("inAT2LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); joinInBT2.addInput("inCPU") <== addStreamFromCPU("inBT2CPU"); joinInBT2.addInput("inLMem") <== iface.addStreamFromLMem("inBT2LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); kernel2.getInput("inAT2") <== joinInAT2.getOutput(); kernel2.getInput("inBT2") <== joinInBT2.getOutput(); Demux forkOutCT2 = demux("forkOutCT2"); DFELink outCT2CPU = forkOutCT2.addOutput("outCPU"); DFELink outCT2LMem = forkOutCT2.addOutput("outLMem"); forkOutCT2.getInput() <== kernel2.getOutput("outCT2"); addStreamToCPU("outCT2CPU") <== outCT2CPU; iface.addStreamToLMem("outCT2LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D) <== outCT2LMem; Mux joinInAT3 = mux("joinInAT3"); Mux joinInBT3 = mux("joinInBT3"); joinInAT3.addInput("inCPU") <== addStreamFromCPU("inAT3CPU"); joinInAT3.addInput("inLMem") <== iface.addStreamFromLMem("inAT3LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); joinInBT3.addInput("inCPU") <== addStreamFromCPU("inBT3CPU"); joinInBT3.addInput("inLMem") <== iface.addStreamFromLMem("inBT3LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D); kernel3.getInput("inAT3") <== joinInAT3.getOutput(); kernel3.getInput("inBT3") <== joinInBT3.getOutput(); Demux forkOutCT3 = demux("forkOutCT3"); DFELink outCT3CPU = forkOutCT3.addOutput("outCPU"); DFELink outCT3LMem = forkOutCT3.addOutput("outLMem"); forkOutCT3.getInput() <== kernel3.getOutput("outCT3"); addStreamToCPU("outCT3CPU") <== outCT3CPU; iface.addStreamToLMem("outCT3LMem", LMemCommandGroup.MemoryAccessPattern.LINEAR_1D) <== outCT3LMem; } public static void main(String[] args) { MyTasksMuxManager manager = new MyTasksMuxManager(new EngineParameters(args)); manager.build(); } } starpu-1.4.9+dfsg/tests/maxfpga/README.txt000066400000000000000000000053171507764646700203010ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ###################### ##### Examples ###### ###################### 4 examples are provided to show the use of the different SLiC interfaces: - max_fpga_basic_static.c lets SLiC initialize the maxeler stack itself. This is a very simple interface but does not allow for multiple fpga support. - max_fpga_advanced_static.c loads the maxeler design itself. This is a bit more complex to call, but allows for multiple fpga support. - max_fpga_dynamic.c achieves the same as max_fpga_advanced_static.c, but using the dynamic interface. - max_fpga_mux.c goes one step further by making input/output on the CPU or local memory at will. ###################### ##### Maxeler ###### ###################### $ export XILINXD_LICENSE_FILE=2100@jumax $ module load vivado maxcompiler $ module load devtoolset/8 The Makefiles then build the program automatically. They do the equivalent of the following, written here only for information: Building the JAVA program: (for kernel and Manager (.maxj)) $ cd starpu/tests/ $ maxjc -1.7 -cp $MAXCLASSPATH fpga Running the Java program to generate a DFE implementation (a .max file) that can be called from a StarPU/FPGA application and slic headers (.h) for simulation: $ java -XX:+UseSerialGC -Xmx2048m -cp $MAXCLASSPATH:. fpga.MyTasksManager DFEModel=MAIA maxFileName=MyTasks target=DFE_SIM $ cp MyTasks_MAX5C_DFE_SIM/results/*{.max,.h} fpga $ cd fpga Building the slic object file (simulation): $ sliccompile MyTasks.max Once built, to start the simulation: $ maxcompilersim -c LIMA -n $USER-MyTasks restart $ export LD_LIBRARY_PATH=$MAXELEROSDIR/lib:$LD_LIBRARY_PATH $ export SLIC_CONF="use_simulation=$USER-MyTasks" PS: To stop simulation $ maxcompilersim -c LIMA -n $USER-MyTasks stop ################################# ##### StarPU with Maxeler ###### ################################# $ ./autogen.sh $ ../configure --prefix=$PWD/../install $ make By default they are built for simulation (target DFE_SIM). To build for native execution, use instead: make MAX_TARGET=DFE To test the code (.c): $ ./tests/fpga/max_fpga starpu-1.4.9+dfsg/tests/maxfpga/StreamFMACpuCode.cpp000066400000000000000000000026011507764646700223220ustar00rootroot00000000000000#include #include #include #include #include #include "StreamFMA.h" #include "MaxSLiCInterface.h" int main() { const int size = 400; int sizeBytes = size * sizeof(int32_t); int32_t *a = (int32_t*) malloc(sizeBytes); int32_t *b = (int32_t*) malloc(sizeBytes); int32_t *c = (int32_t*) malloc(sizeBytes); // TODO Generate input data for(int i = 0; i < size; ++i) { a[i] = random() % 100; b[i] = random() % 100; } max_file_t *maxfile = StreamFMA_init(); max_engine_t *engine = max_load(maxfile, "*"); max_actions_t* act = max_actions_init(maxfile, NULL); max_set_ticks (act, "StreamFMAKernel", size); max_queue_input(act, "a", a, size * sizeof(int32_t)); max_queue_input(act, "b", b, size * sizeof(int32_t)); max_queue_output(act, "output", c, size * sizeof(int32_t)); max_run(engine, act); max_actions_free(act); max_unload(engine); int ret = 0; // TODO Use result data for(std::size_t i = 0; i < size; ++i) { int32_t ref =a[i] + b[i]; if (c[i] != ref) { std::cout << "Invalid Output at index " << i << ": " << std::endl; std::cout << " reference: " << ref << std::endl; std::cout << " value: " << c[i] << std::endl; ret = 1; break; } } if(0 == ret) { std::cout << "All " << size << " values calculated correctly on the DFE!" << std::endl; } std::cout << "Done." << std::endl; return ret; } starpu-1.4.9+dfsg/tests/maxfpga/Task1.maxj000066400000000000000000000012351507764646700204420ustar00rootroot00000000000000package maxfpga; import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v2.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEType; import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEVar; class Task1 extends Kernel { private static final DFEType type = dfeInt(32); protected Task1(KernelParameters parameters) { super(parameters); DFEVar inAT1 = io.input("inAT1", type); DFEVar inBT1 = io.input("inBT1", type); DFEVar outCT1; outCT1 = inAT1+inBT1; io.output("outCT1", outCT1, type); } } starpu-1.4.9+dfsg/tests/maxfpga/Task2.maxj000066400000000000000000000012351507764646700204430ustar00rootroot00000000000000package maxfpga; import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v2.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEType; import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEVar; class Task2 extends Kernel { private static final DFEType type = dfeInt(32); protected Task2(KernelParameters parameters) { super(parameters); DFEVar inAT2 = io.input("inAT2", type); DFEVar inBT2 = io.input("inBT2", type); DFEVar outCT2; outCT2 = inAT2*inBT2; io.output("outCT2", outCT2, type); } } starpu-1.4.9+dfsg/tests/maxfpga/Task3.maxj000066400000000000000000000012351507764646700204440ustar00rootroot00000000000000package maxfpga; import com.maxeler.maxcompiler.v2.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v2.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEType; import com.maxeler.maxcompiler.v2.kernelcompiler.types.base.DFEVar; class Task3 extends Kernel { private static final DFEType type = dfeInt(32); protected Task3(KernelParameters parameters) { super(parameters); DFEVar inAT3 = io.input("inAT3", type); DFEVar inBT3 = io.input("inBT3", type); DFEVar outCT3; outCT3 = inAT3+inBT3; io.output("outCT3", outCT3, type); } } starpu-1.4.9+dfsg/tests/maxfpga/max_fpga_advanced_static.c000066400000000000000000000165341507764646700237300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* This examples shows the case of determining statically whether data is in CPU * memory or DFE memory, and using the advanced Maxeler interface */ #include "MyTasks.h" #include #define SIZE (192/sizeof(int32_t)) void fpga_impl(void *buffers[], void *cl_arg) { (void)cl_arg; int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); int size = STARPU_VECTOR_GET_NX(buffers[0]); // XXX: would rather use a scratch buffer size_t ptrCT1 = 0x00000000000000c0; size_t ptrAT2 = ptrCT1; size_t ptrBT2 = ptrCT1; size_t ptrCT2 = 0x0000000000000180; size_t ptrAT3 = ptrCT2; size_t ptrBT3 = ptrCT2; max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("Loading DFE memory.\n"); /* C = A+B */ MyTasks_interfaceT1_actions_t actT1 = { .param_N = size, .param_ptrC1 = ptrCT1, .instream_inAT1 = ptrA, .instream_inBT1 = ptrB, }; MyTasks_interfaceT1_run(engine, &actT1); printf("T1 finished\n"); /* C = A*B */ MyTasks_interfaceT2_actions_t actT2 = { .param_N = size, .param_ptrA2 = ptrAT2, .param_ptrB2 = ptrBT2, .param_ptrC2 = ptrCT2, }; MyTasks_interfaceT2_run(engine, &actT2); printf("T2 finished\n"); /* C = A+B */ MyTasks_interfaceT3_actions_t actT3 = { .param_N = size, .param_ptrA3 = ptrAT3, .param_ptrB3 = ptrBT3, .outstream_outCT3 = ptrC, }; MyTasks_interfaceT3_run(engine, &actT3); printf("T3 finished\n"); printf("Running DFE.\n"); } static struct starpu_codelet cl = { .max_fpga_funcs = {fpga_impl}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU}, }; void fpga_impl1(void *buffers[], void *cl_arg) { (void)cl_arg; int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ int size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC); /* C = A+B */ MyTasks_interfaceT1_actions_t act = { .param_N = size, .param_ptrC1 = ptrC, .instream_inAT1 = ptrA, .instream_inBT1 = ptrB, }; MyTasks_interfaceT1_run(engine, &act); printf("T1 finished\n"); } static struct starpu_codelet cl1 = { .max_fpga_funcs = {fpga_impl1}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, }; void fpga_impl2(void *buffers[], void *cl_arg) { (void)cl_arg; size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ int size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC); /* C = A*B */ MyTasks_interfaceT2_actions_t act = { .param_N = size, .param_ptrA2 = ptrA, .param_ptrB2 = ptrB, .param_ptrC2 = ptrC, }; MyTasks_interfaceT2_run(engine, &act); printf("T2 finished\n"); } static struct starpu_codelet cl2 = { .max_fpga_funcs = {fpga_impl2}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W} /* local by default */ }; void fpga_impl3(void *buffers[], void *cl_arg) { (void)cl_arg; size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); int size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC); /* C = A+B */ MyTasks_interfaceT3_actions_t act = { .param_N = size, .param_ptrA3 = ptrA, .param_ptrB3 = ptrB, .outstream_outCT3 = ptrC, }; MyTasks_interfaceT3_run(engine, &act); printf("T3 finished\n"); } static struct starpu_codelet cl3 = { .max_fpga_funcs = {fpga_impl3}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU}, }; int main(int argc, char **argv) { struct starpu_conf conf; starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c; int ret; struct starpu_max_load load[2]; load[0].file = MyTasks_init(); load[0].engine_id_pattern = "*"; load[1].file = NULL; load[1].engine_id_pattern = NULL; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 0; conf.max_fpga_load = load; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Enable profiling */ starpu_profiling_status_set(STARPU_PROFILING_ENABLE); int32_t a[SIZE]; int32_t b[SIZE]; int32_t c[SIZE]; int i; for(i = 0; i < SIZE; ++i) { a[i] = random() % 100; b[i] = random() % 100; c[i] = 0; } starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0])); starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0])); starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0])); starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0])); starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0])); #if 0 ret = starpu_task_insert(&cl, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_c, STARPU_TASK_SYNCHRONOUS, 1, 0); fprintf(stderr,"task submitted %d\n", ret); #else ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0); fprintf(stderr,"task submitted %d\n", ret); ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0); fprintf(stderr,"task submitted %d\n", ret); ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0); fprintf(stderr,"task submitted %d\n", ret); #endif starpu_data_unregister(handle_a); starpu_data_unregister(handle_b); starpu_data_unregister(handle_c); ret = EXIT_SUCCESS; for (i = 0; i < SIZE; ++i) { int ct1 = a[i] + b[i]; int ct2 = ct1 * ct1; int ct3 = ct2 + ct2; if (c[i] != ct3) ret = EXIT_FAILURE; if (i < 10) { printf("%d == %d\n", c[i], ct3); if (c[i] != ct3) printf("OOOPS\n"); } } starpu_shutdown(); if (ret == EXIT_SUCCESS) printf("OK!\n"); return ret; } starpu-1.4.9+dfsg/tests/maxfpga/max_fpga_basic_static.c000066400000000000000000000142221507764646700232340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* This examples shows the case of determining statically whether data is in CPU * memory or DFE memory, and using the basic Maxeler interface */ #include "MyTasks.h" #include #define SIZE (192/sizeof(int32_t)) void fpga_impl(void *buffers[], void *cl_arg) { (void)cl_arg; int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); int size = STARPU_VECTOR_GET_NX(buffers[0]); // XXX: would rather use a scratch buffer size_t ptrCT1 = 0x00000000000000c0; size_t ptrAT2 = ptrCT1; size_t ptrBT2 = ptrCT1; size_t ptrCT2 = 0x0000000000000180; size_t ptrAT3 = ptrCT2; size_t ptrBT3 = ptrCT2; printf("Loading DFE memory.\n"); /* C = A+B */ MyTasks_interfaceT1(size, ptrCT1, ptrA, ptrB); printf("T1 finished\n"); /* C = A*B */ MyTasks_interfaceT2(size, ptrAT2, ptrBT2, ptrCT2); printf("T2 finished\n"); /* C = A+B */ MyTasks_interfaceT3(size, ptrAT3, ptrBT3, ptrC); printf("T3 finished\n"); printf("Running DFE.\n"); } static struct starpu_codelet cl = { .max_fpga_funcs = {fpga_impl}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU}, }; void fpga_impl1(void *buffers[], void *cl_arg) { (void)cl_arg; int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ int size = STARPU_VECTOR_GET_NX(buffers[0]); printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC); /* C = A+B */ MyTasks_interfaceT1(size, ptrC, ptrA, ptrB); printf("T1 finished\n"); } static struct starpu_codelet cl1 = { .max_fpga_funcs = {fpga_impl1}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, }; void fpga_impl2(void *buffers[], void *cl_arg) { (void)cl_arg; size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ int size = STARPU_VECTOR_GET_NX(buffers[0]); printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC); /* C = A*B */ MyTasks_interfaceT2(size, ptrA, ptrB, ptrC); printf("T2 finished\n"); } static struct starpu_codelet cl2 = { .max_fpga_funcs = {fpga_impl2}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W} /* local by default */ }; void fpga_impl3(void *buffers[], void *cl_arg) { (void)cl_arg; size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); int size = STARPU_VECTOR_GET_NX(buffers[0]); printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC); /* C = A+B */ MyTasks_interfaceT3(size, ptrA, ptrB, ptrC); printf("T3 finished\n"); } static struct starpu_codelet cl3 = { .max_fpga_funcs = {fpga_impl3}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU}, }; int main(int argc, char **argv) { struct starpu_conf conf; starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c; int ret; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 0; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Enable profiling */ starpu_profiling_status_set(STARPU_PROFILING_ENABLE); int32_t a[SIZE]; int32_t b[SIZE]; int32_t c[SIZE]; int i; for(i = 0; i < SIZE; ++i) { a[i] = random() % 100; b[i] = random() % 100; c[i] = 0; } starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0])); starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0])); starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0])); starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0])); starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0])); #if 0 ret = starpu_task_insert(&cl, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_c, STARPU_TASK_SYNCHRONOUS, 1, 0); fprintf(stderr,"task submitted %d\n", ret); #else ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0); fprintf(stderr,"task submitted %d\n", ret); ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0); fprintf(stderr,"task submitted %d\n", ret); ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0); fprintf(stderr,"task submitted %d\n", ret); #endif starpu_data_unregister(handle_a); starpu_data_unregister(handle_b); starpu_data_unregister(handle_c); ret = EXIT_SUCCESS; for (i = 0; i < SIZE; ++i) { int ct1 = a[i] + b[i]; int ct2 = ct1 * ct1; int ct3 = ct2 + ct2; if (c[i] != ct3) ret = EXIT_FAILURE; if (i < 10) { printf("%d == %d\n", c[i], ct3); if (c[i] != ct3) printf("OOOPS\n"); } } starpu_shutdown(); if (ret == EXIT_SUCCESS) printf("OK!\n"); return ret; } starpu-1.4.9+dfsg/tests/maxfpga/max_fpga_dynamic.c000066400000000000000000000170401507764646700222310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* This examples shows the case of determining statically whether data is in CPU * memory or DFE memory, and using the dynamic Maxeler interface */ #include "MyTasks.h" #include #define SIZE (192/sizeof(int32_t)) static max_file_t *maxfile; void fpga_impl1(void *buffers[], void *cl_arg) { (void)cl_arg; int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ int size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("T1 with %p %p %zu\n", ptrA, ptrB, ptrC); /* C = A+B */ max_actions_t *acts = max_actions_init(maxfile, NULL); max_set_ticks(acts, "Task1", size); max_ignore_scalar(acts, "Task2", "run_cycle_count"); max_ignore_scalar(acts, "Task3", "run_cycle_count"); max_queue_input(acts, "inAT1", ptrA, size * sizeof(uint32_t)); max_queue_input(acts, "inBT1", ptrB, size * sizeof(uint32_t)); max_memctl_linear(acts, "MemoryControllerPro0", "outCT1", ptrC, size * sizeof(uint32_t)); max_ignore_memctl(acts, "MemoryControllerPro0", "inAT2"); max_ignore_memctl(acts, "MemoryControllerPro0", "inBT2"); max_ignore_memctl(acts, "MemoryControllerPro0", "outCT2"); max_ignore_memctl(acts, "MemoryControllerPro0", "inAT3"); max_ignore_memctl(acts, "MemoryControllerPro0", "inBT3"); max_ignore_stream(acts, "outCT3"); max_run(engine, acts); max_actions_free(acts); printf("T1 finished\n"); } static struct starpu_codelet cl1 = { .max_fpga_funcs = {fpga_impl1}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_CPU, STARPU_SPECIFIC_NODE_LOCAL}, }; void fpga_impl2(void *buffers[], void *cl_arg) { (void)cl_arg; size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ size_t ptrC = (size_t) STARPU_VECTOR_GET_PTR(buffers[2]); /* FPGA */ int size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("T2 with %zu %zu %zu\n", ptrA, ptrB, ptrC); /* C = A*B */ max_actions_t *acts = max_actions_init(maxfile, NULL); max_ignore_scalar(acts, "Task1", "run_cycle_count"); max_set_ticks(acts, "Task2", size); max_ignore_scalar(acts, "Task3", "run_cycle_count"); max_ignore_stream(acts, "inAT1"); max_ignore_stream(acts, "inBT1"); max_ignore_memctl(acts, "MemoryControllerPro0", "outCT1"); max_memctl_linear(acts, "MemoryControllerPro0", "inAT2", ptrA, size * sizeof(uint32_t)); max_memctl_linear(acts, "MemoryControllerPro0", "inBT2", ptrB, size * sizeof(uint32_t)); max_memctl_linear(acts, "MemoryControllerPro0", "outCT2", ptrC, size * sizeof(uint32_t)); max_ignore_memctl(acts, "MemoryControllerPro0", "inAT3"); max_ignore_memctl(acts, "MemoryControllerPro0", "inBT3"); max_ignore_stream(acts, "outCT3"); max_run(engine, acts); max_actions_free(acts); printf("T2 finished\n"); } static struct starpu_codelet cl2 = { .max_fpga_funcs = {fpga_impl2}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W} /* local by default */ }; void fpga_impl3(void *buffers[], void *cl_arg) { (void)cl_arg; size_t ptrA = (size_t) STARPU_VECTOR_GET_PTR(buffers[0]); /* FPGA */ size_t ptrB = (size_t) STARPU_VECTOR_GET_PTR(buffers[1]); /* FPGA */ int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); int size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("T3 with %zu %zu %p\n", ptrA, ptrB, ptrC); /* C = A+B */ max_actions_t *acts = max_actions_init(maxfile, NULL); max_ignore_scalar(acts, "Task1", "run_cycle_count"); max_ignore_scalar(acts, "Task2", "run_cycle_count"); max_set_ticks(acts, "Task3", size); max_ignore_stream(acts, "inAT1"); max_ignore_stream(acts, "inBT1"); max_ignore_memctl(acts, "MemoryControllerPro0", "outCT1"); max_ignore_memctl(acts, "MemoryControllerPro0", "inAT2"); max_ignore_memctl(acts, "MemoryControllerPro0", "inBT2"); max_ignore_memctl(acts, "MemoryControllerPro0", "outCT2"); max_memctl_linear(acts, "MemoryControllerPro0", "inAT3", ptrA, size * sizeof(uint32_t)); max_memctl_linear(acts, "MemoryControllerPro0", "inBT3", ptrB, size * sizeof(uint32_t)); max_queue_output(acts, "outCT3", ptrC, size * sizeof(uint32_t)); max_run(engine, acts); max_actions_free(acts); printf("T3 finished\n"); } static struct starpu_codelet cl3 = { .max_fpga_funcs = {fpga_impl3}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_LOCAL, STARPU_SPECIFIC_NODE_CPU}, }; int main(int argc, char **argv) { struct starpu_conf conf; starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c; int ret; maxfile = MyTasks_init(); struct starpu_max_load load[2]; load[0].file = maxfile; load[0].engine_id_pattern = "*"; load[1].file = NULL; load[1].engine_id_pattern = NULL; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 0; conf.max_fpga_load = load; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Enable profiling */ starpu_profiling_status_set(STARPU_PROFILING_ENABLE); int32_t a[SIZE]; int32_t b[SIZE]; int32_t c[SIZE]; int i; for(i = 0; i < SIZE; ++i) { a[i] = random() % 100; b[i] = random() % 100; c[i] = 0; } starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0])); starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0])); starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0])); starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0])); starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0])); ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0); fprintf(stderr,"task submitted %d\n", ret); ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0); fprintf(stderr,"task submitted %d\n", ret); ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0); fprintf(stderr,"task submitted %d\n", ret); starpu_data_unregister(handle_a); starpu_data_unregister(handle_b); starpu_data_unregister(handle_c); ret = EXIT_SUCCESS; for (i = 0; i < SIZE; ++i) { int ct1 = a[i] + b[i]; int ct2 = ct1 * ct1; int ct3 = ct2 + ct2; if (c[i] != ct3) ret = EXIT_FAILURE; if (i < 10) { printf("%d == %d\n", c[i], ct3); if (c[i] != ct3) printf("OOOPS\n"); } } starpu_shutdown(); if (ret == EXIT_SUCCESS) printf("OK!\n"); return ret; } starpu-1.4.9+dfsg/tests/maxfpga/max_fpga_mux.c000066400000000000000000000223321507764646700214160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* This examples shows the case of letting the runtime determining whether data * should be in CPU memory or DFE memory, by integrating a multiplexer in a * design that can be toggled dynamically. */ #include "MyTasksMux.h" #include #define SIZE (192/sizeof(int32_t)) static max_file_t *maxfile; /* * Dynamically configure multiplexer and streaming from CPU or from LMem (ignoring the other) */ #define setupReadData(name) do { \ if (kind##name == STARPU_CPU_RAM) { \ max_route(acts, "joinIn"#name".inCPU", "joinIn"#name".join"); \ max_queue_input(acts, "in"#name"CPU", ptr##name, size * sizeof(uint32_t)); \ max_ignore_memctl(acts, "MemoryControllerPro0", "in"#name"LMem"); \ } else { \ max_route(acts, "joinIn"#name".inLMem", "joinIn"#name".join"); \ max_ignore_stream(acts, "in"#name"CPU"); \ max_memctl_linear(acts, "MemoryControllerPro0", "in"#name"LMem", (size_t) ptr##name, size * sizeof(int32_t)); \ } \ } while (0) /* * Ignore data from unused input */ #define ignoreReadData(name) do { \ max_route(acts, "joinIn"#name".inLMem", "joinIn"#name".join"); \ max_ignore_stream(acts, "in"#name"CPU"); \ max_ignore_memctl(acts, "MemoryControllerPro0", "in"#name"LMem"); \ } while (0) /* * Configure demultiplexer and streaming to CPU or to LMem (ignoring the other) */ #define setupWriteData(name) do { \ if (kind##name == STARPU_CPU_RAM) { \ max_route(acts, "forkOut"#name, "outCPU"); \ max_queue_output(acts, "out"#name"CPU", ptr##name, size * sizeof(uint32_t)); \ max_ignore_memctl(acts, "MemoryControllerPro0", "out"#name"LMem"); \ } else { \ max_route(acts, "forkOut"#name, "outLMem"); \ max_ignore_stream(acts, "out"#name"CPU"); \ max_memctl_linear(acts, "MemoryControllerPro0", "out"#name"LMem", (size_t) ptr##name, size * sizeof(uint32_t)); \ } \ } while (0) /* * Ignore data from unused output */ #define ignoreWriteData(name) do { \ max_route(acts, "forkOut"#name, "outLMem"); \ max_ignore_stream(acts, "out"#name"CPU"); \ max_ignore_memctl(acts, "MemoryControllerPro0", "out"#name"LMem"); \ } while (0) void fpga_impl1(void *buffers[], void *cl_arg) { (void)cl_arg; int32_t *ptrAT1 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); int32_t *ptrBT1 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); int32_t *ptrCT1 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); enum starpu_node_kind kindAT1 = starpu_node_get_kind(starpu_task_get_current_data_node(0)); enum starpu_node_kind kindBT1 = starpu_node_get_kind(starpu_task_get_current_data_node(1)); enum starpu_node_kind kindCT1 = starpu_node_get_kind(starpu_task_get_current_data_node(2)); int size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("T1 with %p %p %p\n", ptrAT1, ptrBT1, ptrCT1); /* C = A+B */ max_actions_t *acts = max_actions_init(maxfile, NULL); max_set_ticks(acts, "Task1", size); max_ignore_scalar(acts, "Task2", "run_cycle_count"); max_ignore_scalar(acts, "Task3", "run_cycle_count"); setupReadData(AT1); setupReadData(BT1); setupWriteData(CT1); ignoreReadData(AT2); ignoreReadData(BT2); ignoreWriteData(CT2); ignoreReadData(AT3); ignoreReadData(BT3); ignoreWriteData(CT3); max_run(engine, acts); max_actions_free(acts); printf("T1 finished\n"); } static struct starpu_codelet cl1 = { .max_fpga_funcs = {fpga_impl1}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL}, }; void fpga_impl2(void *buffers[], void *cl_arg) { (void)cl_arg; int32_t *ptrAT2 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); int32_t *ptrBT2 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); int32_t *ptrCT2 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); enum starpu_node_kind kindAT2 = starpu_node_get_kind(starpu_task_get_current_data_node(0)); enum starpu_node_kind kindBT2 = starpu_node_get_kind(starpu_task_get_current_data_node(1)); enum starpu_node_kind kindCT2 = starpu_node_get_kind(starpu_task_get_current_data_node(2)); int size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("T2 with %p %p %p\n", ptrAT2, ptrBT2, ptrCT2); /* C = A*B */ max_actions_t *acts = max_actions_init(maxfile, NULL); max_ignore_scalar(acts, "Task1", "run_cycle_count"); max_set_ticks(acts, "Task2", size); max_ignore_scalar(acts, "Task3", "run_cycle_count"); setupReadData(AT2); setupReadData(BT2); setupWriteData(CT2); ignoreReadData(AT1); ignoreReadData(BT1); ignoreWriteData(CT1); ignoreReadData(AT3); ignoreReadData(BT3); ignoreWriteData(CT3); max_run(engine, acts); max_actions_free(acts); printf("T2 finished\n"); } static struct starpu_codelet cl2 = { .max_fpga_funcs = {fpga_impl2}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL}, }; void fpga_impl3(void *buffers[], void *cl_arg) { (void)cl_arg; int32_t *ptrAT3 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); int32_t *ptrBT3 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); int32_t *ptrCT3 = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); enum starpu_node_kind kindAT3 = starpu_node_get_kind(starpu_task_get_current_data_node(0)); enum starpu_node_kind kindBT3 = starpu_node_get_kind(starpu_task_get_current_data_node(1)); enum starpu_node_kind kindCT3 = starpu_node_get_kind(starpu_task_get_current_data_node(2)); int size = STARPU_VECTOR_GET_NX(buffers[0]); max_engine_t *engine = starpu_max_fpga_get_local_engine();; printf("T3 with %p %p %p\n", ptrAT3, ptrBT3, ptrCT3); /* C = A+B */ max_actions_t *acts = max_actions_init(maxfile, NULL); max_ignore_scalar(acts, "Task1", "run_cycle_count"); max_ignore_scalar(acts, "Task2", "run_cycle_count"); max_set_ticks(acts, "Task3", size); setupReadData(AT3); setupReadData(BT3); setupWriteData(CT3); ignoreReadData(AT1); ignoreReadData(BT1); ignoreWriteData(CT1); ignoreReadData(AT2); ignoreReadData(BT2); ignoreWriteData(CT2); max_run(engine, acts); max_actions_free(acts); printf("T3 finished\n"); } static struct starpu_codelet cl3 = { .max_fpga_funcs = {fpga_impl3}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W}, .specific_nodes = 1, .nodes = {STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_LOCAL_OR_CPU, STARPU_SPECIFIC_NODE_CPU}, }; int main(int argc, char **argv) { struct starpu_conf conf; starpu_data_handle_t handle_a, handle_b, handle_ct1, handle_ct2, handle_c; int ret; maxfile = MyTasksMux_init(); struct starpu_max_load load[2]; load[0].file = maxfile; load[0].engine_id_pattern = "*"; load[1].file = NULL; load[1].engine_id_pattern = NULL; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 0; conf.max_fpga_load = load; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Enable profiling */ starpu_profiling_status_set(STARPU_PROFILING_ENABLE); int32_t a[SIZE]; int32_t b[SIZE]; int32_t c[SIZE]; int i; for(i = 0; i < SIZE; ++i) { a[i] = random() % 100; b[i] = random() % 100; c[i] = 0; } starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(a[0])); starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(b[0])); starpu_vector_data_register(&handle_ct1, -1, 0, SIZE, sizeof(c[0])); starpu_vector_data_register(&handle_ct2, -1, 0, SIZE, sizeof(c[0])); starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(c[0])); ret = starpu_task_insert(&cl1, STARPU_R, handle_a, STARPU_R, handle_b, STARPU_W, handle_ct1, 0); fprintf(stderr,"task submitted %d\n", ret); ret = starpu_task_insert(&cl2, STARPU_R, handle_ct1, STARPU_R, handle_ct1, STARPU_W, handle_ct2, 0); fprintf(stderr,"task submitted %d\n", ret); ret = starpu_task_insert(&cl3, STARPU_R, handle_ct2, STARPU_R, handle_ct2, STARPU_W, handle_c, 0); fprintf(stderr,"task submitted %d\n", ret); starpu_data_unregister(handle_a); starpu_data_unregister(handle_b); starpu_data_unregister(handle_c); ret = EXIT_SUCCESS; for (i = 0; i < SIZE; ++i) { int ct1 = a[i] + b[i]; int ct2 = ct1 * ct1; int ct3 = ct2 + ct2; if (c[i] != ct3) ret = EXIT_FAILURE; if (i < 10) { printf("%d == %d\n", c[i], ct3); if (c[i] != ct3) printf("OOOPS\n"); } } starpu_shutdown(); if (ret == EXIT_SUCCESS) printf("OK!\n"); return ret; } starpu-1.4.9+dfsg/tests/memory/000077500000000000000000000000001507764646700164625ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/memory/memstress.gp000066400000000000000000000021211507764646700210300ustar00rootroot00000000000000#!/usr/bin/gnuplot -persist # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set term postscript landscape color 22 set output "memstress.ps" set xlabel "Memory Pressure (MB)" set ylabel "execution time degradation (%)" set grid y set key left top box set datafile missing 'x' plot "timings/memstress.data" usi 1:(( 100*(($2 / 2130) - 1))) with linespoint title "matrix size : 4096" ,\ "timings/memstress.data" usi 1:(( 100*(($3 / 16420) - 1) )) with linespoint title "8192" starpu-1.4.9+dfsg/tests/memory/memstress.sh000077500000000000000000000033261507764646700210470ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DIR=$PWD ROOTDIR=$DIR/../.. TIMINGDIR=$DIR/timings/ mkdir -p $TIMINGDIR filename=$TIMINGDIR/memstress.data sizelist="4096 8192" stresslist="0 50 100 150 200 250 300 350 400 450 500 550 600 650 655 660 665 670 675" #stresslist="672" trace_stress() { memstress=$1 export STARPU_NCPUS=0 export STARPU_NCUDA=1 export STRESS_MEM=$memstress line="$memstress" for size in $sizelist do nblocks=$(($size / 1024)) echo "Computing size $size with $memstress MB of memory LESS" echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null" timing=`$MS_LAUNCHER $STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null` echo "size : $size memstress $memstress => $timing us" line="$line $timing" done echo "$line" >> $filename } cd $ROOTDIR make clean 1> /dev/null 2> /dev/null make examples STARPU_ATLAS=1 CUDA=1 CPUS=3 1> /dev/null #2> /dev/null cd $DIR echo "#memstress $sizelist " > $filename for memstress in $stresslist do trace_stress $memstress; done starpu-1.4.9+dfsg/tests/memory/memstress2.gp000066400000000000000000000017671507764646700211310ustar00rootroot00000000000000#!/usr/bin/gnuplot -persist # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set term postscript landscape color 22 set output "memstress2.ps" set xlabel "Problem size" set ylabel "execution time" set logscale x set key left top set datafile missing 'x' plot "timings/memstress2.data" usi 1:2 with lines title "reference" ,\ "timings/memstress2.data" usi 1:3 with lines title "350 MB" starpu-1.4.9+dfsg/tests/memory/memstress2.sh000077500000000000000000000031341507764646700211260ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DIR=$PWD ROOTDIR=$DIR/../.. TIMINGDIR=$DIR/timings/ mkdir -p $TIMINGDIR filename=$TIMINGDIR/memstress2.data sizelist="512 1024 2048 4096 8192 16384" stresslist="0 350" #stresslist="672" trace_stress() { size=$1 line="$size" for stress in $stresslist do export STRESS_MEM=$stress nblocks=$(($size / 1024)) echo "Computing size $size with $stress MB of memory LESS" echo "$ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null" timing=`$MS_LAUNCHER $STARPU_LAUNCH $ROOTDIR/examples/mult/dw_mult -x $size -y $size -z $size -nblocks $nblocks 2>/dev/null` echo "size : $size memstress $stress => $timing us" line="$line $timing" done echo "$line" >> $filename } cd $ROOTDIR make clean 1> /dev/null 2> /dev/null make examples STARPU_ATLAS=1 CUDA=1 CPUS=0 1> /dev/null 2> /dev/null cd $DIR echo "#size $stresslist " > $filename for size in $sizelist do trace_stress $size; done starpu-1.4.9+dfsg/tests/microbenchs/000077500000000000000000000000001507764646700174465ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/microbenchs/async_tasks_data_overhead.sh000077500000000000000000000015511507764646700251770ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi ROOT=${0%.sh} ROOT=$(echo $ROOT | sed 's/tasks_data_overhead/tasks_overhead/') exec $MS_LAUNCHER $STARPU_LAUNCH $ROOT -b 1 "$@" starpu-1.4.9+dfsg/tests/microbenchs/async_tasks_overhead.c000066400000000000000000000133451507764646700240170ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Measure the cost of submitting asynchronous tasks */ starpu_data_handle_t data_handles[8]; float *buffers[8]; #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 128; #else static unsigned ntasks = 65536; #endif static unsigned nbuffers = 0; #define BUFFERSIZE 16 //static unsigned finished = 0; static double cumulated = 0.0; static double cumulated_push = 0.0; static double cumulated_pop = 0.0; void dummy_func(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet dummy_codelet = { .cpu_funcs = {dummy_func}, .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 0, .modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW} }; static void usage(char **argv) { fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]); exit(EXIT_FAILURE); } static void parse_args(int argc, char **argv, struct starpu_conf *conf) { int c; while ((c = getopt(argc, argv, "i:b:p:h")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; case 'b': nbuffers = atoi(optarg); dummy_codelet.nbuffers = nbuffers; break; case 'p': conf->sched_policy_name = optarg; break; case 'h': usage(argv); break; } } int main(int argc, char **argv) { int ret; unsigned i; double timing; double start; double end; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncpus = 2; parse_args(argc, argv, &conf); ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned buffer; for (buffer = 0; buffer < nbuffers; buffer++) { starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float)); starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float)); } starpu_profiling_status_set(STARPU_PROFILING_ENABLE); fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers); /* Create an array of tasks */ struct starpu_task **tasks = (struct starpu_task **) malloc(ntasks*sizeof(struct starpu_task *)); for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &dummy_codelet; task->detach = 0; /* we have 8 buffers at most */ for (buffer = 0; buffer < nbuffers; buffer++) { task->handles[buffer] = data_handles[buffer]; } tasks[i] = task; } start = starpu_timing_now(); for (i = 0; i < ntasks; i++) { ret = starpu_task_submit(tasks[i]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); end = starpu_timing_now(); /* Read profiling feedback */ for (i = 0; i < ntasks; i++) { struct starpu_profiling_task_info *info; info = tasks[i]->profiling_info; double queued = starpu_timing_timespec_delay_us(&info->push_end_time, &info->pop_end_time); double length = starpu_timing_timespec_delay_us(&info->submit_time, &info->end_time); double push_duration = starpu_timing_timespec_delay_us(&info->push_start_time, &info->push_end_time); double pop_duration = starpu_timing_timespec_delay_us(&info->pop_start_time, &info->pop_end_time); starpu_task_destroy(tasks[i]); cumulated += (length - queued); cumulated_push += push_duration; cumulated_pop += pop_duration; } timing = end - start; fprintf(stderr, "Total: %f secs\n", timing/1000000); fprintf(stderr, "Per task: %f usecs\n", timing/ntasks); fprintf(stderr, "Per task (except scheduler): %f usecs\n", cumulated/ntasks); fprintf(stderr, "Per task (push): %f usecs\n", cumulated_push/ntasks); fprintf(stderr, "Per task (pop): %f usecs\n", cumulated_pop/ntasks); { char *output_dir = getenv("STARPU_BENCH_DIR"); char *bench_id = getenv("STARPU_BENCH_ID"); if (output_dir && bench_id) { char number[1+sizeof(nbuffers)*3+1]; const char *numberp; char file[1024]; FILE *f; if (nbuffers) { snprintf(number, sizeof(number), "_%u", nbuffers); numberp = number; } else numberp = ""; snprintf(file, sizeof(file), "%s/async_tasks_overhead_total%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, timing/1000000); fclose(f); snprintf(file, sizeof(file), "%s/async_tasks_overhead_per_task%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, timing/ntasks); fclose(f); } } for (buffer = 0; buffer < nbuffers; buffer++) { starpu_data_unregister(data_handles[buffer]); starpu_free_noflag((void*)buffers[buffer], BUFFERSIZE*sizeof(float)); } starpu_shutdown(); free(tasks); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); free(tasks); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/microbenchs/bandwidth.c000066400000000000000000000205721507764646700215640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Measure the memory bandwidth available to kernels depending on the number of * kernels and number of idle workers. */ #if defined(STARPU_QUICK_CHECK) || defined(STARPU_SANITIZE_LEAK) || defined(STARPU_SANITIZE_ADDRESS) || defined(STARPU_SANITIZE_UNDEFINED) static size_t size = 1024; #else /* Must be bigger than available cache size per core, 64MiB should be enough */ static size_t size = 64UL << 20; #endif static unsigned cpustep = 0; static unsigned noalone = 0; static unsigned iter = 30; static unsigned total_ncpus; static starpu_pthread_barrier_t barrier_begin, barrier_end; static float *result; static void **buffers; /* Indexed by logical core number */ static char padding1[STARPU_CACHELINE_SIZE]; static volatile char finished; static char padding2[STARPU_CACHELINE_SIZE]; static unsigned interleave(unsigned i); /* Initialize the buffer locally */ void initialize_buffer(void *foo) { (void) foo; unsigned id = starpu_worker_get_id(); #ifdef STARPU_HAVE_POSIX_MEMALIGN int ret = posix_memalign(&buffers[id], getpagesize(), 2*size); STARPU_ASSERT(ret == 0); #else buffers[id] = malloc(2*size); #endif memset(buffers[id], 0, 2*size); } /* Actual transfer codelet */ void bw_func(void *descr[], void *arg) { (void)descr; int id = (uintptr_t) arg; void *src = buffers[id]; void *dst = (void*) ((uintptr_t)src + size); unsigned i; double start, stop; STARPU_PTHREAD_BARRIER_WAIT(&barrier_begin); start = starpu_timing_now(); for (i = 0; i < iter; i++) { memcpy(dst, src, size); STARPU_SYNCHRONIZE(); } stop = starpu_timing_now(); STARPU_PTHREAD_BARRIER_WAIT(&barrier_end); finished = 1; result[id] = (size*iter) / (stop - start); } static struct starpu_codelet bw_codelet = { .cpu_funcs = {bw_func}, .model = NULL, .nbuffers = 0, }; /* Codelet that waits for completion while doing lots of cpu yields (nop). */ void nop_func(void *descr[], void *arg) { (void)descr; (void)arg; STARPU_PTHREAD_BARRIER_WAIT(&barrier_begin); while (!finished) { unsigned i; for (i = 0; i < 1000000; i++) STARPU_UYIELD(); STARPU_SYNCHRONIZE(); } } static struct starpu_codelet nop_codelet = { .cpu_funcs = {nop_func}, .model = NULL, .nbuffers = 0, }; /* Codelet that waits for completion while aggressively reading the finished variable. */ void sync_func(void *descr[], void *arg) { (void)descr; (void)arg; STARPU_PTHREAD_BARRIER_WAIT(&barrier_begin); while (!finished) { STARPU_VALGRIND_YIELD(); STARPU_SYNCHRONIZE(); } } static struct starpu_codelet sync_codelet = { .cpu_funcs = {sync_func}, .model = NULL, .nbuffers = 0, }; static void usage(char **argv) { fprintf(stderr, "Usage: %s [-n niter] [-s size (MB)] [-c cpustep] [-a]\n", argv[0]); fprintf(stderr, "\t-n niter\tNumber of iterations\n"); fprintf(stderr, "\t-s size\tBuffer size in MB\n"); fprintf(stderr, "\t-c cpustep\tCpu number increment\n"); fprintf(stderr, "\t-a Do not run the alone test\n"); exit(EXIT_FAILURE); } static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "n:s:c:ah")) != -1) switch(c) { case 'n': iter = atoi(optarg); break; case 's': size = (long)atoi(optarg) << 20; break; case 'c': cpustep = atoi(optarg); break; case 'a': noalone = 1; break; case 'h': usage(argv); break; } } static unsigned interleave(unsigned i) { /* TODO: rather distribute over hierarchy */ if (total_ncpus > 1) return (i % (total_ncpus/2))*2 + i / (total_ncpus/2); else return 0; } enum sleep_type { PAUSE, NOP, SYNC, SCHED, }; static float bench(int *argc, char ***argv, unsigned nbusy, unsigned ncpus, int intl, enum sleep_type sleep) { int ret; unsigned i; struct starpu_conf conf; float bw; starpu_conf_init(&conf); conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = ncpus; if (intl && sleep == PAUSE) { conf.use_explicit_workers_bindid = 1; for (i = 0; i < ncpus; i++) conf.workers_bindid[i] = interleave(i); } ret = starpu_initialize(&conf, argc, argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (sleep == PAUSE || sleep == SCHED) /* In these cases we don't have a task on each cpu */ STARPU_PTHREAD_BARRIER_INIT(&barrier_begin, NULL, nbusy); else STARPU_PTHREAD_BARRIER_INIT(&barrier_begin, NULL, ncpus); STARPU_PTHREAD_BARRIER_INIT(&barrier_end, NULL, nbusy); finished = 0; for (i = 0; i < ncpus; i++) result[i] = NAN; for (i = 0; i < nbusy; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &bw_codelet; if (intl) task->cl_arg = (void*) (uintptr_t) interleave(i); else task->cl_arg = (void*) (uintptr_t) i; task->execute_on_a_specific_worker = 1; if (intl && sleep != PAUSE) /* In the pause case we interleaved above */ task->workerid = interleave(i); else task->workerid = i; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } if (sleep != PAUSE && sleep != SCHED) { /* Add waiting tasks */ for (; i < ncpus; i++) { struct starpu_task *task = starpu_task_create(); switch (sleep) { case NOP: task->cl = &nop_codelet; break; case SYNC: task->cl = &sync_codelet; break; default: STARPU_ASSERT(0); } task->execute_on_a_specific_worker = 1; task->workerid = interleave(i); ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } starpu_task_wait_for_all(); starpu_shutdown(); STARPU_PTHREAD_BARRIER_DESTROY(&barrier_begin); STARPU_PTHREAD_BARRIER_DESTROY(&barrier_end); for (bw = 0., i = 0; i < nbusy; i++) { if (intl) bw += result[interleave(i)]; else bw += result[i]; } return bw; } int main(int argc, char **argv) { int ret; unsigned n; struct starpu_conf conf; float alone, alone_int, alone_int_nop, alone_int_sync, sched, sched_int; parse_args(argc, argv); starpu_conf_init(&conf); conf.precedence_over_environment_variables = 1; starpu_conf_noworker(&conf); conf.ncpus = -1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); total_ncpus = starpu_cpu_worker_get_count(); buffers = malloc(total_ncpus * sizeof(*buffers)); starpu_execute_on_each_worker_ex(initialize_buffer, NULL, STARPU_CPU, "initialize_buffer"); starpu_shutdown(); if (total_ncpus == 0) return STARPU_TEST_SKIPPED; result = malloc(total_ncpus * sizeof(result[0])); if (cpustep == 0) { #if defined(STARPU_QUICK_CHECK) || defined(STARPU_SANITIZE_LEAK) || defined(STARPU_SANITIZE_ADDRESS) cpustep = total_ncpus / 2; #elif defined(STARPU_LONG_CHECK) cpustep = 1; #else cpustep = total_ncpus / 8; #endif if (cpustep == 0) cpustep = 1; } printf("# nw\ta comp.\t+sched\teff%%\ta scat.\t+nop\t+sync\t+sched\teff%% vs nop\n"); for (n = cpustep; n <= total_ncpus; n += cpustep) { if (noalone) { alone = 0.; alone_int = 0.; alone_int_nop = 0.; alone_int_sync = 0.; } else { alone = bench(&argc, &argv, n, n, 0, PAUSE); alone_int = bench(&argc, &argv, n, n, 1, PAUSE); alone_int_nop = bench(&argc, &argv, n, total_ncpus, 1, NOP); alone_int_sync = bench(&argc, &argv, n, total_ncpus, 1, SYNC); } sched = bench(&argc, &argv, n, total_ncpus, 0, SCHED); sched_int = bench(&argc, &argv, n, total_ncpus, 1, SCHED); printf("%u\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", n, alone/1000, sched/1000, sched*100/alone, alone_int/1000, alone_int_nop/1000, alone_int_sync/1000, sched_int/1000, sched_int*100/alone_int_nop); fflush(stdout); } free(result); for (n = 0; n < total_ncpus; n++) free(buffers[n]); free(buffers); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/microbenchs/bandwidth_scheds.sh000077500000000000000000000050651507764646700233100ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi set -e DIR=$(dirname $0) if [ -n "$STARPU_SCHED" ] then SCHEDS=$STARPU_SCHED DEFAULT=$STARPU_SCHED else SCHEDS=`$DIR/../../tools/starpu_sched_display` DEFAULT=eager fi if [ -n "$STARPU_BENCH_DIR" ]; then cat > bandwidth.gp << EOF set term svg font ",12" size 1500,500 linewidth 0.5 set output "bandwidth.svg" set pointsize 0.3 EOF else fast="-n 3 -c 4" cat > bandwidth.gp << EOF set term postscript eps enhanced color font ",18" set output "bandwidth.eps" set size 2,1 EOF fi cat >> bandwidth.gp << EOF set key outside set ylabel "GB/s" set xlabel "ncores" plot \\ "bandwidth-$DEFAULT.dat" using 1:5 with lines title "alone interleave", \\ "bandwidth-$DEFAULT.dat" using 1:6 with lines title "nop", \\ "bandwidth-$DEFAULT.dat" using 1:7 with lines title "sync", \\ "bandwidth-$DEFAULT.dat" using 1:2 with lines title "alone contiguous", \\ EOF run() { sched=$1 type=$2 if [ "$sched" != eager -a "$sched" != "$SCHEDS" ]; then extra=-a else extra= fi STARPU_BACKOFF_MIN=0 STARPU_BACKOFF_MAX=0 STARPU_SCHED=$sched $STARPU_SUB_PARALLEL $MS_LAUNCHER $STARPU_LAUNCH $DIR/bandwidth $fast $extra "$@" | tee bandwidth-$sched.dat echo "\"bandwidth-$sched.dat\" using 1:3 with linespoints lt $type pt $type title \"$sched\", \\" >> bandwidth.gp echo "\"bandwidth-$sched.dat\" using 1:8 with linespoints lt $type pt $type notitle, \\" >> bandwidth.gp } if [ -n "$STARPU_SUB_PARALLEL" ] then type=1 for sched in $SCHEDS do run $sched $type & type=$((type+1)) done RESULT=0 while true do set +e wait -n RET=$? set -e if [ $RET = 127 ] ; then break ; fi if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi done exit $RESULT else type=1 for sched in $SCHEDS do run $sched $type type=$((type+1)) done fi if gnuplot bandwidth.gp ; then if [ -n "$STARPU_BENCH_DIR" ]; then cp bandwidth.svg $STARPU_BENCH_DIR/ fi fi starpu-1.4.9+dfsg/tests/microbenchs/display_structures_size.c000066400000000000000000000016611507764646700246200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include /* * Display the sizes of various StarPU data structures */ int main(int argc, char **argv) { (void)argc; (void)argv; _starpu_debug_display_structures_size(stderr); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/microbenchs/local_pingpong.c000066400000000000000000000057141507764646700226140ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include "../helper.h" /* * Trigger a ping-pong test between two CUDA GPUs */ static size_t vector_size = 1; #ifdef STARPU_QUICK_CHECK static int niter = 16; #else static int niter = 1000; #endif //static unsigned cnt; //static unsigned finished = 0; starpu_data_handle_t v_handle; static unsigned *v; static char worker_0_name[128]; static char worker_1_name[128]; static unsigned memory_node_0; static unsigned memory_node_1; double start; double end; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Create a piece of data */ ret = starpu_malloc((void **)&v, vector_size); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, vector_size, 1); /* Find a pair of memory nodes */ if (starpu_cuda_worker_get_count() > 1) { /* Take the two devices that come first */ int nworkers = (int)starpu_worker_get_count(); unsigned found_node_0 = 0; int w; for (w = 0; w < nworkers; w++) { if (starpu_worker_get_type(w) == STARPU_CUDA_WORKER) { if (!found_node_0) { memory_node_0 = starpu_worker_get_memory_node(w); starpu_worker_get_name(w, worker_0_name, 128); found_node_0 = 1; } else { memory_node_1 = starpu_worker_get_memory_node(w); starpu_worker_get_name(w, worker_1_name, 128); break; } } } fprintf(stderr, "Ping-pong will be done between %s (node %u) and %s (node %u)\n", worker_0_name, memory_node_0, worker_1_name, memory_node_1); } // unsigned iter; /* warm up */ // unsigned nwarmupiter = 128; _starpu_benchmark_ping_pong(v_handle, memory_node_0, memory_node_1, 128); start = starpu_timing_now(); _starpu_benchmark_ping_pong(v_handle, memory_node_0, memory_node_1, niter); end = starpu_timing_now(); double timing = end - start; fprintf(stderr, "Took %f ms\n", timing/1000); fprintf(stderr, "Avg. transfer time : %f us\n", timing/(2*niter)); starpu_data_unregister(v_handle); starpu_free_noflag(v, vector_size); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/microbenchs/matrix_as_vector.c000066400000000000000000000167101507764646700231700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #ifdef STARPU_USE_CUDA # include #endif /* * Compare the efficiency of matrix and vector interfaces */ #ifdef STARPU_QUICK_CHECK #define LOOPS 5 #elif !defined(STARPU_LONG_CHECK) #define LOOPS 30 #else #define LOOPS 100 #endif void vector_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; STARPU_SKIP_IF_VALGRIND; float *matrix = (float *)STARPU_VECTOR_GET_PTR(descr[0]); int nx = STARPU_VECTOR_GET_NX(descr[0]); int i; float sum=0; for(i=0 ; i mean=%7f != %7f\n", nx, matrix[0], mean); ret = EXIT_FAILURE; } end: if (ret == -ENODEV) fprintf(stderr, "# Uh, ENODEV?!"); starpu_free_noflag(matrix, nx*sizeof(matrix[0])); starpu_task_wait_for_all(); return ret; } #define NX_MIN 1024 #ifdef STARPU_QUICK_CHECK #define NX_MAX 1024*32 #else #define NX_MAX 1024*1024 #endif static int check_size_on_device(uint32_t where, char *device_name) { int nx, ret; struct starpu_codelet vector_codelet; struct starpu_codelet matrix_codelet; fprintf(stderr, "# Device: %s\n", device_name); fprintf(stderr, "# nx vector_timing matrix_timing\n"); starpu_codelet_init(&vector_codelet); vector_codelet.modes[0] = STARPU_RW; vector_codelet.nbuffers = 1; if (where == STARPU_CPU) vector_codelet.cpu_funcs[0] = vector_cpu_func; #ifdef STARPU_USE_CUDA if (where == STARPU_CUDA) { vector_codelet.cuda_funcs[0] = vector_cuda_func; vector_codelet.cuda_flags[0] = STARPU_CUDA_ASYNC; } #endif // if (where == STARPU_OPENCL) vector_codelet.opencl_funcs[0] = vector_opencl_func; starpu_codelet_init(&matrix_codelet); matrix_codelet.modes[0] = STARPU_RW; matrix_codelet.nbuffers = 1; if (where == STARPU_CPU) matrix_codelet.cpu_funcs[0] = matrix_cpu_func; #ifdef STARPU_USE_CUDA if (where == STARPU_CUDA) { matrix_codelet.cuda_funcs[0] = matrix_cuda_func; matrix_codelet.cuda_flags[0] = STARPU_CUDA_ASYNC; } #endif // if (where == STARPU_OPENCL) matrix_codelet.opencl_funcs[0] = matrix_opencl_func; for(nx=NX_MIN ; nx<=NX_MAX ; nx*=2) { ret = check_size(nx, &vector_codelet, &matrix_codelet, device_name); if (ret != EXIT_SUCCESS) break; } return ret; } int main(void) { int ret; unsigned devices; #ifdef STARPU_USE_CUDA int cublas_version; #endif ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); devices = starpu_cpu_worker_get_count(); if (devices) { ret = check_size_on_device(STARPU_CPU, "STARPU_CPU"); if (ret) goto error; } #ifdef STARPU_USE_CUDA devices = starpu_cuda_worker_get_count(); if (devices) { cublasHandle_t handle; cublasCreate(&handle); cublasGetVersion(handle, &cublas_version); cublasDestroy(handle); if (cublas_version >= 7050) { starpu_cublas_init(); ret = check_size_on_device(STARPU_CUDA, "STARPU_CUDA"); if (ret) goto error; starpu_cublas_shutdown(); } } #endif #if 0 devices = starpu_opencl_worker_get_count(); if (devices) { ret = check_size_on_device(STARPU_OPENCL, "STARPU_OPENCL"); if (ret) goto error; } #endif error: if (ret == -ENODEV) ret=STARPU_TEST_SKIPPED; starpu_shutdown(); STARPU_RETURN(ret); } starpu-1.4.9+dfsg/tests/microbenchs/microbench.sh000077500000000000000000000052251507764646700221220ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # This provides a helper function to be used for microbenchs that should be run # under the various schedulers. # # The caller should fill either the XFAIL or XSUCCESS variable with the list of # schedulers which are supposed to fail or succeed, and then call test_scheds set -e # disable core generation ulimit -c 0 # Testing a specific scheduler if [ -n "$STARPU_SCHED" ] then SCHEDS=$STARPU_SCHED else SCHEDS=`$(dirname $0)/../../tools/starpu_sched_display` fi run() { sched=$1 set +e STARPU_SCHED=$sched $STARPU_SUB_PARALLEL $MS_LAUNCHER $STARPU_LAUNCH $(dirname $0)/$TEST "$@" ret=$? set -e if test $ret = 0 then ( echo PASS: STARPU_SCHED=$sched ./microbenchs/$TEST >&9 ) 2> /dev/null || true echo "SUCCESS: STARPU_SCHED=$sched ./microbenchs/$TEST" return 0 fi if test $ret = 77 then echo "SKIP: STARPU_SCHED=$sched ./microbenchs/$TEST" return 0 fi RESULT=0 if [ -n "$XSUCCESS" ] then # We have a list of schedulers that are expected to # succeed, others are allowed to fail case " $XSUCCESS " in *\ $sched\ *) echo "FAIL: STARPU_SCHED=$sched ./microbenchs/$TEST" | ( tee /dev/tty || true ) RESULT=1 ;; *) echo "XFAIL: STARPU_SCHED=$sched ./microbenchs/$TEST" ;; esac else # We have a list of schedulers that are expected to # fail, others are expected to succeed case " $XFAIL " in *\ $sched\ *) echo "XFAIL: STARPU_SCHED=$sched ./microbenchs/$TEST" ;; *) echo "FAIL: STARPU_SCHED=$sched ./microbenchs/$TEST" | ( tee /dev/tty || true ) RESULT=1 ;; esac fi return $RESULT } test_scheds() { TEST=$1 shift RESULT=0 if [ -n "$STARPU_SUB_PARALLEL" ] then for sched in $SCHEDS do run $sched & done while true do set +e wait -n RET=$? set -e if [ $RET = 127 ] ; then break ; fi if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi done else for sched in $SCHEDS do set +e run $sched RET=$? set -e if [ $RET != 0 -a $RET != 77 ] ; then RESULT=1 ; fi done fi exit $RESULT } starpu-1.4.9+dfsg/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.c000066400000000000000000000120301507764646700302360ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Bérangère Subervie * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* Run a series of tasks with homogeneous execution time and share data to some extent */ #define TIME 0.010 #ifdef STARPU_QUICK_CHECK #define TASK_COEFFICIENT 5 #define DATA_COEFFICIENT 5.5 #define MARGIN 0.15 #else #define TASK_COEFFICIENT 10 #define DATA_COEFFICIENT 10.5 #define MARGIN 0.05 #endif #define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 #define NB_FLOAT 4000000 void wait_homogeneous(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME); } double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void)t; (void)a; (void)i; return TIME * 1000000; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl = { .cpu_funcs = { wait_homogeneous }, .cuda_funcs = { wait_homogeneous }, .opencl_funcs = { wait_homogeneous }, .cpu_funcs_name = { "wait_homogeneous" }, .nbuffers = 1, .modes = {STARPU_RW}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model, }; int main(int argc, char *argv[]) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned nb_tasks, nb_data, nb_workers; double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; bool check, check_sup; nb_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER) + starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); nb_tasks = nb_workers*TASK_COEFFICIENT*DATA_COEFFICIENT; nb_data = nb_workers*DATA_COEFFICIENT; /* We consider a vector of float that is initialized just as any of C * data */ float *vector[nb_data]; starpu_data_handle_t vector_handle[nb_data]; unsigned i,j; for (j = 0; j < nb_data; j++) { vector[j] = malloc(NB_FLOAT * sizeof(float)); #ifndef STARPU_SIMGRID for (i = 0; i < NB_FLOAT; i++) vector[j][i] = (i+1.0f); #endif /* Tell StaPU to associate the "vector" vector with the "vector_handle" * identifier. When a task needs to access a piece of data, it should * refer to the handle that is associated to it. * In the case of the "vector" data interface: * - the first argument of the registration method is a pointer to the * handle that should describe the data * - the second argument is the memory node where the data (ie. "vector") * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as * opposed to an address on a GPU for instance. * - the third argument is the address of the vector in RAM * - the fourth argument is the number of elements in the vector * - the fifth argument is the size of each element. */ starpu_vector_data_register(&vector_handle[j], STARPU_MAIN_RAM, (uintptr_t)vector[j], NB_FLOAT, sizeof(vector[0][0])); } begin_time = starpu_timing_now(); /*execution des tasks*/ for (i=0; i= ((1 - MARGIN) * expected_speed_up); check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); FPRINTF(stderr, "measured time = %f seconds\n", time_m); FPRINTF(stderr, "sequential time = %f seconds\n", time_s); FPRINTF(stderr, "speed up = %f\n", speed_up); FPRINTF(stderr, "number of workers = %u\n", nb_workers); FPRINTF(stderr, "number of tasks = %u\n", nb_tasks); FPRINTF(stderr, "expected speed up = %f\n", expected_speed_up); FPRINTF(stderr, "percentage of expected speed up %.2f%%\n", percentage_expected_speed_up); starpu_shutdown(); for (j = 0; j < nb_data; j++) free(vector[j]); //test reussi ou test echoue if (check && check_sup) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } } starpu-1.4.9+dfsg/tests/microbenchs/parallel_dependent_homogeneous_tasks_data.sh000077500000000000000000000016461507764646700304440ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi source $(dirname $0)/microbench.sh XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas" test_scheds parallel_dependent_homogeneous_tasks_data starpu-1.4.9+dfsg/tests/microbenchs/parallel_independent_heterogeneous_tasks.c000066400000000000000000000103371507764646700301300ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Bérangère Subervie * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* Run a series of independent tasks with heterogeneous execution time */ #define TIME 0.010 #ifdef STARPU_QUICK_CHECK #define TASK_COEFFICIENT 20 #define MARGIN 0.20 #else #define TASK_COEFFICIENT 100 #define MARGIN 0.10 #endif #define TIME_CUDA_COEFFICIENT 10 #define TIME_OPENCL_COEFFICIENT 5 #define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 void wait_CPU(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME); } void wait_CUDA(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME/TIME_CUDA_COEFFICIENT); } void wait_OPENCL(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME/TIME_OPENCL_COEFFICIENT); } double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void) t; (void) i; STARPU_ASSERT(a->ndevices == 1); if (a->devices[0].type == STARPU_CPU_WORKER) { STARPU_ASSERT(a->devices[0].ncores == 1); return TIME * 1000000; } else if (a->devices[0].type == STARPU_CUDA_WORKER) { return TIME/TIME_CUDA_COEFFICIENT * 1000000; } else if (a->devices[0].type == STARPU_OPENCL_WORKER) { return TIME/TIME_OPENCL_COEFFICIENT * 1000000; } STARPU_ASSERT(0); return 0.0; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl = { .cpu_funcs = { wait_CPU }, .cuda_funcs = { wait_CUDA }, .opencl_funcs = { wait_OPENCL }, .cpu_funcs_name = { "wait_CPU" }, .nbuffers = 0, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model, }; int main(int argc, char *argv[]) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned nb_tasks, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL, i; double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; bool check, check_sup; nb_workers_CPU = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); nb_workers_CUDA = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); nb_workers_OPENCL = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); nb_tasks = (nb_workers_CPU + nb_workers_CUDA + nb_workers_OPENCL)*TASK_COEFFICIENT; begin_time = starpu_timing_now(); /*execution des tasks*/ for (i=0; i= (1 - MARGIN) * expected_speed_up; check_sup = speed_up <= (1 + MARGIN) * expected_speed_up; printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers CPU = %u\nnumber of workers CUDA = %u\nnumber of workers OPENCL = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up = %.2f%%\n", time_m, time_s, speed_up, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL, nb_tasks, expected_speed_up, percentage_expected_speed_up); starpu_shutdown(); //test reussi ou test echoue if (check && check_sup) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } } starpu-1.4.9+dfsg/tests/microbenchs/parallel_independent_heterogeneous_tasks.sh000077500000000000000000000022771507764646700303270ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi source $(dirname $0)/microbench.sh XFAIL="lws ws eager prio modular-prio modular-eager modular-eager-prio modular-eager-prefetching modular-prio-prefetching modular-random modular-random-prio modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test" test_scheds parallel_independent_heterogeneous_tasks starpu-1.4.9+dfsg/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.c000066400000000000000000000133611507764646700311210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Bérangère Subervie * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* Run a series of independent tasks with heterogeneous execution time and independent data */ #define TIME 0.010 #ifdef STARPU_QUICK_CHECK #define TASK_COEFFICIENT 20 #define MARGIN 0.20 #else #define TASK_COEFFICIENT 100 #define MARGIN 0.10 #endif #define TIME_CUDA_COEFFICIENT 10 #define TIME_OPENCL_COEFFICIENT 5 #define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 #define NB_FLOAT 400000 void wait_CPU(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME); } void wait_CUDA(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME/TIME_CUDA_COEFFICIENT); } void wait_OPENCL(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME/TIME_OPENCL_COEFFICIENT); } double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void) t; (void) i; STARPU_ASSERT(a->ndevices == 1); if (a->devices[0].type == STARPU_CPU_WORKER) { STARPU_ASSERT(a->devices[0].ncores == 1); return TIME * 1000000; } else if (a->devices[0].type == STARPU_CUDA_WORKER) { return TIME/TIME_CUDA_COEFFICIENT * 1000000; } else if (a->devices[0].type == STARPU_OPENCL_WORKER) { return TIME/TIME_OPENCL_COEFFICIENT * 1000000; } STARPU_ASSERT(0); return 0.0; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl = { .cpu_funcs = { wait_CPU }, .cuda_funcs = { wait_CUDA }, .opencl_funcs = { wait_OPENCL }, .cpu_funcs_name = { "wait_CPU" }, .nbuffers = 1, .modes = {STARPU_RW}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model, }; int main(int argc, char *argv[]) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned nb_tasks, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL; double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; bool check, check_sup; nb_workers_CPU = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); nb_workers_CUDA = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); nb_workers_OPENCL = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); nb_tasks = (nb_workers_CPU + nb_workers_CUDA + nb_workers_OPENCL)*TASK_COEFFICIENT; /* We consider a vector of float that is initialized just as any of C * data */ float *vector[nb_tasks]; starpu_data_handle_t vector_handle[nb_tasks]; unsigned i,j; for (j = 0; j < nb_tasks; j++) { starpu_malloc((void **)&vector[j], NB_FLOAT * sizeof(float)); #ifndef STARPU_SIMGRID for (i = 0; i < NB_FLOAT; i++) vector[j][i] = (i+1.0f); #endif /* Tell StaPU to associate the "vector" vector with the "vector_handle" * identifier. When a task needs to access a piece of data, it should * refer to the handle that is associated to it. * In the case of the "vector" data interface: * - the first argument of the registration method is a pointer to the * handle that should describe the data * - the second argument is the memory node where the data (ie. "vector") * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as * opposed to an address on a GPU for instance. * - the third argument is the address of the vector in RAM * - the fourth argument is the number of elements in the vector * - the fifth argument is the size of each element. */ starpu_vector_data_register(&vector_handle[j], STARPU_MAIN_RAM, (uintptr_t)vector[j], NB_FLOAT, sizeof(vector[0][0])); } begin_time = starpu_timing_now(); /*execution des tasks*/ for (i=0; i= ((1 - MARGIN) * expected_speed_up); check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers CPU = %u\nnumber of workers CUDA = %u\nnumber of workers OPENCL = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up %.2f%%\n", time_m, time_s, speed_up, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL, nb_tasks, expected_speed_up, percentage_expected_speed_up); starpu_shutdown(); for (j = 0; j < nb_tasks; j++) starpu_free_noflag(vector[j], NB_FLOAT * sizeof(float)); //test reussi ou test echoue if (check && check_sup) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } } starpu-1.4.9+dfsg/tests/microbenchs/parallel_independent_heterogeneous_tasks_data.sh000077500000000000000000000016601507764646700313130ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi source $(dirname $0)/microbench.sh XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas pheft" test_scheds parallel_independent_heterogeneous_tasks_data starpu-1.4.9+dfsg/tests/microbenchs/parallel_independent_homogeneous_tasks.c000066400000000000000000000064421507764646700276060ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Bérangère Subervie * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* Run a series of independent tasks with homogeneous execution time */ #define TIME 0.010 #ifdef STARPU_QUICK_CHECK #define TASK_COEFFICIENT 20 #define MARGIN 0.15 #else #define TASK_COEFFICIENT 100 #define MARGIN 0.05 #endif #define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 void wait_homogeneous(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME); } double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void)t; (void)a; (void)i; return TIME * 1000000; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl = { .cpu_funcs = { wait_homogeneous }, .cuda_funcs = { wait_homogeneous }, .opencl_funcs = { wait_homogeneous }, .cpu_funcs_name = { "wait_homogeneous" }, .nbuffers = 0, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model, }; int main(int argc, char *argv[]) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned nb_tasks, nb_workers, i; double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; bool check, check_sup; nb_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER) + starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); nb_tasks = nb_workers*TASK_COEFFICIENT; begin_time = starpu_timing_now(); /*execution des tasks*/ for (i=0; i= ((1 - MARGIN) * expected_speed_up); check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up = %.2f%%\n", time_m, time_s, speed_up, nb_workers, nb_tasks, expected_speed_up, percentage_expected_speed_up); starpu_shutdown(); //test reussi ou test echoue if (check && check_sup) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } } starpu-1.4.9+dfsg/tests/microbenchs/parallel_independent_homogeneous_tasks.sh000077500000000000000000000021751507764646700300000ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi source $(dirname $0)/microbench.sh XFAIL="modular-eager-prefetching modular-prio-prefetching modular-random modular-random-prio modular-random-prefetching modular-random-prio-prefetching modular-prandom modular-prandom-prio modular-ws modular-heft modular-heft-prio modular-heft2 modular-heteroprio modular-gemm random peager heteroprio graph_test" test_scheds parallel_independent_homogeneous_tasks starpu-1.4.9+dfsg/tests/microbenchs/parallel_independent_homogeneous_tasks_data.c000066400000000000000000000113671507764646700306010ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Bérangère Subervie * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* Run a series of independent tasks with homogeneous execution time and independent data */ #define TIME 0.010 #ifdef STARPU_QUICK_CHECK #define TASK_COEFFICIENT 20 #define MARGIN 0.20 #else #define TASK_COEFFICIENT 100 #define MARGIN 0.10 #endif #define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 #define NB_FLOAT 4000000 void wait_homogeneous(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME); } double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void)t; (void)a; (void)i; return TIME * 1000000; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl = { .cpu_funcs = { wait_homogeneous }, .cuda_funcs = { wait_homogeneous }, .opencl_funcs = { wait_homogeneous }, .cpu_funcs_name = { "wait_homogeneous" }, .nbuffers = 1, .modes = {STARPU_RW}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model, }; int main(int argc, char *argv[]) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned nb_tasks, nb_workers; double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; bool check, check_sup; nb_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER) + starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); nb_tasks = nb_workers*TASK_COEFFICIENT; /* We consider a vector of float that is initialized just as any of C * data */ float *vector[nb_tasks]; starpu_data_handle_t vector_handle[nb_tasks]; unsigned i,j; for (j = 0; j < nb_tasks; j++) { vector[j] = malloc(NB_FLOAT * sizeof(float)); #ifndef STARPU_SIMGRID for (i = 0; i < NB_FLOAT; i++) vector[j][i] = (i+1.0f); #endif /* Tell StaPU to associate the "vector" vector with the "vector_handle" * identifier. When a task needs to access a piece of data, it should * refer to the handle that is associated to it. * In the case of the "vector" data interface: * - the first argument of the registration method is a pointer to the * handle that should describe the data * - the second argument is the memory node where the data (ie. "vector") * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as * opposed to an address on a GPU for instance. * - the third argument is the address of the vector in RAM * - the fourth argument is the number of elements in the vector * - the fifth argument is the size of each element. */ starpu_vector_data_register(&vector_handle[j], STARPU_MAIN_RAM, (uintptr_t)vector[j], NB_FLOAT, sizeof(vector[0][0])); } begin_time = starpu_timing_now(); /*execution des tasks*/ for (i=0; i= ((1 - MARGIN) * expected_speed_up); check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up %.2f%%\n", time_m, time_s, speed_up, nb_workers, nb_tasks, expected_speed_up, percentage_expected_speed_up); starpu_shutdown(); for (j = 0; j < nb_tasks; j++) free(vector[j]); //test reussi ou test echoue if (check && check_sup) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } } starpu-1.4.9+dfsg/tests/microbenchs/parallel_independent_homogeneous_tasks_data.sh000077500000000000000000000016561507764646700307740ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi source $(dirname $0)/microbench.sh XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas pheft" test_scheds parallel_independent_homogeneous_tasks_data starpu-1.4.9+dfsg/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.c000066400000000000000000000147021507764646700277530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Bérangère Subervie * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* Run a series of tasks with heterogeneous execution time and redux data */ #define TIME 0.010 #ifdef STARPU_QUICK_CHECK #define TASK_COEFFICIENT 20 #define MARGIN 0.20 #else #define TASK_COEFFICIENT 100 #define MARGIN 0.10 #endif #define TIME_CUDA_COEFFICIENT 10 #define TIME_OPENCL_COEFFICIENT 5 #define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 #define NB_FLOAT 400000 void wait_CPU(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME); } void wait_CUDA(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME/TIME_CUDA_COEFFICIENT); } void wait_OPENCL(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME/TIME_OPENCL_COEFFICIENT); } double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void) t; (void) i; STARPU_ASSERT(a->ndevices == 1); if (a->devices[0].type == STARPU_CPU_WORKER) { STARPU_ASSERT(a->devices[0].ncores == 1); return TIME * 1000000; } else if (a->devices[0].type == STARPU_CUDA_WORKER) { return TIME/TIME_CUDA_COEFFICIENT * 1000000; } else if (a->devices[0].type == STARPU_OPENCL_WORKER) { return TIME/TIME_OPENCL_COEFFICIENT * 1000000; } STARPU_ASSERT(0); return 0.0; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl = { .cpu_funcs = { wait_CPU }, .cuda_funcs = { wait_CUDA }, .opencl_funcs = { wait_OPENCL }, .cpu_funcs_name = { "wait_CPU" }, .nbuffers = 1, .modes = {STARPU_REDUX}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model, .name = "cl", }; static struct starpu_perfmodel perf_model_init = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl_init = { .cpu_funcs = { wait_CPU }, .cuda_funcs = { wait_CUDA }, .opencl_funcs = { wait_OPENCL }, .cpu_funcs_name = { "wait_CPU" }, .nbuffers = 1, .modes = {STARPU_RW}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model_init, .name = "init", }; static struct starpu_perfmodel perf_model_redux = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl_redux = { .cpu_funcs = { wait_CPU }, .cuda_funcs = { wait_CUDA }, .opencl_funcs = { wait_OPENCL }, .cpu_funcs_name = { "wait_CPU" }, .nbuffers = 2, .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model_redux, .name = "redux", }; int main(int argc, char *argv[]) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned nb_tasks, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL; double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; bool check, check_sup; nb_workers_CPU = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); nb_workers_CUDA = starpu_worker_get_count_by_type(STARPU_CUDA_WORKER); nb_workers_OPENCL = starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); nb_tasks = (nb_workers_CPU + nb_workers_CUDA + nb_workers_OPENCL)*TASK_COEFFICIENT; /* We consider a vector of float that is initialized just as any of C * data */ float *vector; starpu_data_handle_t vector_handle; unsigned i; vector = calloc(NB_FLOAT, sizeof(float)); #ifndef STARPU_SIMGRID for (i = 0; i < NB_FLOAT; i++) vector[i] = (i+1.0f); #endif /* Tell StaPU to associate the "vector" vector with the "vector_handle" * identifier. When a task needs to access a piece of data, it should * refer to the handle that is associated to it. * In the case of the "vector" data interface: * - the first argument of the registration method is a pointer to the * handle that should describe the data * - the second argument is the memory node where the data (ie. "vector") * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as * opposed to an address on a GPU for instance. * - the third argument is the address of the vector in RAM * - the fourth argument is the number of elements in the vector * - the fifth argument is the size of each element. */ starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NB_FLOAT, sizeof(vector[0])); starpu_data_set_reduction_methods(vector_handle, &cl_redux, &cl_init); begin_time = starpu_timing_now(); /*execution des tasks*/ for (i=0; i= ((1 - MARGIN) * expected_speed_up); check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers CPU = %u\nnumber of workers CUDA = %u\nnumber of workers OPENCL = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up %.2f%%\n", time_m, time_s, speed_up, nb_workers_CPU, nb_workers_CUDA, nb_workers_OPENCL, nb_tasks, expected_speed_up, percentage_expected_speed_up); starpu_shutdown(); free(vector); //test reussi ou test echoue if (check && check_sup) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } } starpu-1.4.9+dfsg/tests/microbenchs/parallel_redux_heterogeneous_tasks_data.sh000077500000000000000000000016601507764646700301450ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi source $(dirname $0)/microbench.sh XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas pheft" test_scheds parallel_independent_heterogeneous_tasks_data starpu-1.4.9+dfsg/tests/microbenchs/parallel_redux_homogeneous_tasks_data.c000066400000000000000000000130701507764646700274240ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2016-2016 Bérangère Subervie * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* Run a series of tasks with homogeneous execution time and redux data */ #define TIME 0.010 #ifdef STARPU_QUICK_CHECK #define TASK_COEFFICIENT 20 #define MARGIN 0.20 #else #define TASK_COEFFICIENT 100 #define MARGIN 0.10 #endif #define SECONDS_SCALE_COEFFICIENT_TIMING_NOW 1000000 #define NB_FLOAT 4000000 void wait_homogeneous(void *descr[], void *_args) { (void)descr; (void)_args; starpu_sleep(TIME); } double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void)t; (void)a; (void)i; return TIME * 1000000; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl = { .cpu_funcs = { wait_homogeneous }, .cuda_funcs = { wait_homogeneous }, .opencl_funcs = { wait_homogeneous }, .cpu_funcs_name = { "wait_homogeneous" }, .nbuffers = 1, .modes = {STARPU_REDUX}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model, .name = "cl", }; static struct starpu_perfmodel perf_model_init = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl_init = { .cpu_funcs = { wait_homogeneous }, .cuda_funcs = { wait_homogeneous }, .opencl_funcs = { wait_homogeneous }, .cpu_funcs_name = { "wait_homogeneous" }, .nbuffers = 1, .modes = {STARPU_RW}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model_init, .name = "init", }; static struct starpu_perfmodel perf_model_redux = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet cl_redux = { .cpu_funcs = { wait_homogeneous }, .cuda_funcs = { wait_homogeneous }, .opencl_funcs = { wait_homogeneous }, .cpu_funcs_name = { "wait_homogeneous" }, .nbuffers = 2, .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, .model = &perf_model_redux, .name = "redux", }; int main(int argc, char *argv[]) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned nb_tasks, nb_workers; double begin_time, end_time, time_m, time_s, speed_up, expected_speed_up, percentage_expected_speed_up; bool check, check_sup; nb_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER) + starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) + starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER); nb_tasks = nb_workers*TASK_COEFFICIENT; /* We consider a vector of float that is initialized just as any of C * data */ float *vector; starpu_data_handle_t vector_handle; unsigned i; vector = calloc(NB_FLOAT, sizeof(float)); #ifndef STARPU_SIMGRID for (i = 0; i < NB_FLOAT; i++) vector[i] = (i+1.0f); #endif /* Tell StaPU to associate the "vector" vector with the "vector_handle" * identifier. When a task needs to access a piece of data, it should * refer to the handle that is associated to it. * In the case of the "vector" data interface: * - the first argument of the registration method is a pointer to the * handle that should describe the data * - the second argument is the memory node where the data (ie. "vector") * resides initially: STARPU_MAIN_RAM stands for an address in main memory, as * opposed to an address on a GPU for instance. * - the third argument is the address of the vector in RAM * - the fourth argument is the number of elements in the vector * - the fifth argument is the size of each element. */ starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector, NB_FLOAT, sizeof(vector[0])); starpu_data_set_reduction_methods(vector_handle, &cl_redux, &cl_init); begin_time = starpu_timing_now(); /*execution des tasks*/ for (i=0; i= ((1 - MARGIN) * expected_speed_up); check_sup = speed_up <= ((1 + MARGIN) * expected_speed_up); printf("measured time = %f seconds\nsequential time = %f seconds\nspeed up = %f\nnumber of workers = %u\nnumber of tasks = %u\nexpected speed up = %f\npercentage of expected speed up %.2f%%\n", time_m, time_s, speed_up, nb_workers, nb_tasks, expected_speed_up, percentage_expected_speed_up); starpu_shutdown(); free(vector); //test reussi ou test echoue if (check && check_sup) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } } starpu-1.4.9+dfsg/tests/microbenchs/parallel_redux_homogeneous_tasks_data.sh000077500000000000000000000016561507764646700276260ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi source $(dirname $0)/microbench.sh XSUCCESS="dmda dmdap dmdar dmdas dmdasd modular-dmda modular-dmdap modular-dmdar modular-dmdas pheft" test_scheds parallel_independent_homogeneous_tasks_data starpu-1.4.9+dfsg/tests/microbenchs/prefetch_data_on_node.c000066400000000000000000000113061507764646700241050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Try calling starpu_data_prefetch_on_node before running a task there */ #ifdef STARPU_QUICK_CHECK #define N 10 #elif !defined(STARPU_LONG_CHECK) #define N 100 #else #define N 1000 #endif #define VECTORSIZE 1024 starpu_data_handle_t v_handle; static unsigned *v; static void callback(void *arg) { unsigned node = (unsigned)(uintptr_t) arg; starpu_data_prefetch_on_node(v_handle, node, 1); } void codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; } static struct starpu_codelet cl_r = { .cpu_funcs = {codelet_null}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .nbuffers = 1, .modes = {STARPU_R} }; static struct starpu_codelet cl_w = { .cpu_funcs = {codelet_null}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .nbuffers = 1, .modes = {STARPU_W} }; static struct starpu_codelet cl_rw = { .cpu_funcs = {codelet_null}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .nbuffers = 1, .modes = {STARPU_RW} }; static struct starpu_codelet *select_codelet_with_random_mode(void) { int r = rand(); switch (r % 3) { case 0: return &cl_r; case 1: return &cl_w; case 2: return &cl_rw; }; return &cl_rw; } int main(int argc, char **argv) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.ncuda = -1; conf.nopencl = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); unsigned nworker = starpu_worker_get_count(); unsigned iter, worker; for (iter = 0; iter < N; iter++) { for (worker = 0; worker < nworker; worker++) { /* synchronous prefetch */ unsigned node = starpu_worker_get_memory_node(worker); ret = starpu_data_prefetch_on_node(v_handle, node, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_prefetch_on_node"); /* execute a task */ struct starpu_task *task = starpu_task_create(); task->handles[0] = v_handle; task->cl = select_codelet_with_random_mode(); task->synchronous = 1; task->execute_on_a_specific_worker = 1; task->workerid = worker; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } for (iter = 0; iter < N; iter++) { for (worker = 0; worker < nworker; worker++) { /* asynchronous prefetch */ unsigned node = starpu_worker_get_memory_node(worker); ret = starpu_data_prefetch_on_node(v_handle, node, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_prefetch_on_node"); /* execute a task */ struct starpu_task *task = starpu_task_create(); task->handles[0] = v_handle; task->cl = select_codelet_with_random_mode(); task->callback_func = callback; task->callback_arg = (void*)(uintptr_t) starpu_worker_get_memory_node((worker+1)%nworker); task->execute_on_a_specific_worker = 1; task->workerid = worker; task->synchronous = 0; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/microbenchs/redundant_buffer.c000066400000000000000000000044471507764646700231400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" /* * Try passing the same buffer twice to the same task */ #define N 10000 #define VECTORSIZE 1024 starpu_data_handle_t v_handle; static unsigned *v; int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); starpu_codelet_nop.nbuffers = 2; starpu_codelet_nop.modes[0] = STARPU_R; starpu_codelet_nop.modes[1] = STARPU_R; unsigned iter; for (iter = 0; iter < N; iter++) { struct starpu_task *task = starpu_task_create(); task->cl = &starpu_codelet_nop; task->handles[0] = v_handle; task->handles[1] = v_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/microbenchs/starpu_check.sh000077500000000000000000000044411507764646700224630ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # TEST_DIR=$PWD ntests=0 nfailed=0 nsucess=0 print_summary() { if test $nfailed = 0; then echo "**** All tests are successful ****" else echo "$nfailed test(s) failed out of $ntests" fi } test_with_timeout() { timeout=$1 application=$2 ntests=$(($ntests + 1)) echo "$application" $MS_LAUNCHER $STARPU_LAUNCH $application > /dev/null 2> /dev/null & _pid_appli=$!; (sleep $timeout ; kill -9 $_pid_appli 2> /dev/null) & _pid_killer=$! wait $_pid_appli ret=$? kill $_pid_killer 2> /dev/null if test $ret = 0; then echo " SUCCESS" nsuccess=$(($nsuccess + 1)) else case $ret in 137) # sigkill echo " TEST TIMEOUT" ;; 139) echo " TEST FAILED: SIGSEV" ;; *) echo " TEST FAILED (ret = $ret)" esac nfailed=$(($nfailed + 1)) fi } echo echo "**********************" echo "TEST synchronous tasks" echo "**********************" echo test_with_timeout 10 "./sync_tasks_overhead -i 10000" 2> /dev/null echo echo "***********************" echo "TEST asynchronous tasks" echo "***********************" echo test_with_timeout 10 "./async_tasks_overhead -i 20000" 2> /dev/null echo echo "**************" echo "TEST increment" echo "**************" echo test_with_timeout 10 "../../examples/incrementer/incrementer" 2> /dev/null echo echo "**********" echo "TEST tag 1" echo "**********" echo test_with_timeout 60 "../../examples/tag_example/tag_example -iter 1000" 2> /dev/null echo echo "**********" echo "TEST tag 2" echo "**********" echo test_with_timeout 10 "../../examples/tag_example/tag_example2 -iter 100" 2> /dev/null echo echo "*******" echo "SUMMARY" echo "*******" echo print_summary starpu-1.4.9+dfsg/tests/microbenchs/sync_tasks_data_overhead.sh000077500000000000000000000015511507764646700250360ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi ROOT=${0%.sh} ROOT=$(echo $ROOT | sed 's/tasks_data_overhead/tasks_overhead/') exec $MS_LAUNCHER $STARPU_LAUNCH $ROOT -b 1 "$@" starpu-1.4.9+dfsg/tests/microbenchs/sync_tasks_overhead.c000066400000000000000000000112411507764646700236470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Measure the cost of submitting synchronous tasks */ starpu_data_handle_t data_handles[8]; float *buffers[8]; #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 128; #else static unsigned ntasks = 65536; #endif static unsigned nbuffers = 0; #define BUFFERSIZE 16 void dummy_func(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet dummy_codelet = { .cpu_funcs = {dummy_func}, .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 0, .modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW} }; static int inject_one_task(void) { int ret; struct starpu_task *task = starpu_task_create(); task->cl = &dummy_codelet; task->cl_arg = NULL; task->callback_func = NULL; task->synchronous = 1; ret = starpu_task_submit(task); return ret; } static void usage(char **argv) { fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]); exit(EXIT_FAILURE); } static void parse_args(int argc, char **argv, struct starpu_conf *conf) { int c; while ((c = getopt(argc, argv, "i:b:p:h")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; case 'b': nbuffers = atoi(optarg); dummy_codelet.nbuffers = nbuffers; break; case 'p': conf->sched_policy_name = optarg; break; case 'h': usage(argv); break; } } int main(int argc, char **argv) { int ret; unsigned i; double timing; double start; double end; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncpus = 2; parse_args(argc, argv, &conf); ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned buffer; for (buffer = 0; buffer < nbuffers; buffer++) { starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float)); memset(buffers[buffer], 0, BUFFERSIZE*sizeof(float)); starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float)); } fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers); start = starpu_timing_now(); for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &dummy_codelet; task->synchronous = 1; /* we have 8 buffers at most */ for (buffer = 0; buffer < nbuffers; buffer++) { task->handles[buffer] = data_handles[buffer]; } ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } end = starpu_timing_now(); timing = end - start; fprintf(stderr, "Total: %f secs\n", timing/1000000); fprintf(stderr, "Per task: %f usecs\n", timing/ntasks); { char *output_dir = getenv("STARPU_BENCH_DIR"); char *bench_id = getenv("STARPU_BENCH_ID"); if (output_dir && bench_id) { char number[1+sizeof(nbuffers)*3+1]; const char *numberp; char file[1024]; FILE *f; if (nbuffers) { snprintf(number, sizeof(number), "_%u", nbuffers); numberp = number; } else numberp = ""; snprintf(file, sizeof(file), "%s/sync_tasks_overhead_total%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, timing/1000000); fclose(f); snprintf(file, sizeof(file), "%s/sync_tasks_overhead_per_task%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, timing/ntasks); fclose(f); } } for (buffer = 0; buffer < nbuffers; buffer++) { starpu_data_unregister(data_handles[buffer]); starpu_free_noflag((void*)buffers[buffer], BUFFERSIZE*sizeof(float)); } starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/microbenchs/tasks_data_overhead.sh000077500000000000000000000015511507764646700240020ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi ROOT=${0%.sh} ROOT=$(echo $ROOT | sed 's/tasks_data_overhead/tasks_overhead/') exec $MS_LAUNCHER $STARPU_LAUNCH $ROOT -b 1 "$@" starpu-1.4.9+dfsg/tests/microbenchs/tasks_overhead.c000066400000000000000000000155541507764646700226260ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Measure the submission time and execution time of asynchronous tasks */ starpu_data_handle_t data_handles[8]; float *buffers[8]; #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 128; #else static unsigned ntasks = 65536; #endif static unsigned nbuffers = 0; #define BUFFERSIZE 16 struct starpu_task *tasks; void dummy_func(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_codelet dummy_codelet = { .cpu_funcs = {dummy_func}, .cuda_funcs = {dummy_func}, .opencl_funcs = {dummy_func}, .cpu_funcs_name = {"dummy_func"}, .model = NULL, .nbuffers = 0, .modes = {STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW, STARPU_RW} }; static void usage(char **argv) { fprintf(stderr, "Usage: %s [-i ntasks] [-p sched_policy] [-b nbuffers] [-h]\n", argv[0]); exit(EXIT_FAILURE); } static void parse_args(int argc, char **argv, struct starpu_conf *conf) { int c; while ((c = getopt(argc, argv, "i:b:p:h")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; case 'b': nbuffers = atoi(optarg); dummy_codelet.nbuffers = nbuffers; break; case 'p': conf->sched_policy_name = optarg; break; case 'h': usage(argv); break; } } int main(int argc, char **argv) { int ret; unsigned i; double timing_submit; double start_submit; double end_submit; double timing_exec; double start_exec; double end_exec; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncpus = 2; parse_args(argc, argv, &conf); ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned buffer; for (buffer = 0; buffer < nbuffers; buffer++) { starpu_malloc((void**)&buffers[buffer], BUFFERSIZE*sizeof(float)); starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], BUFFERSIZE, sizeof(float)); } fprintf(stderr, "#tasks : %u\n#buffers : %u\n", ntasks, nbuffers); /* submit tasks (but don't execute them yet !) */ tasks = (struct starpu_task *) calloc(1, ntasks*sizeof(struct starpu_task)); for (i = 0; i < ntasks; i++) { starpu_task_init(&tasks[i]); tasks[i].cl = &dummy_codelet; tasks[i].synchronous = 0; tasks[i].use_tag = 1; tasks[i].tag_id = (starpu_tag_t)i; /* we have 8 buffers at most */ for (buffer = 0; buffer < nbuffers; buffer++) { tasks[i].handles[buffer] = data_handles[buffer]; } } tasks[ntasks-1].detach = 0; start_submit = starpu_timing_now(); if (nbuffers) { /* Data dependency, just submit them all */ for (i = 0; i < ntasks; i++) { ret = starpu_task_submit(&tasks[i]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } else { /* No data dependency, we have to introduce dependencies by hand */ for (i = 1; i < ntasks; i++) { starpu_tag_declare_deps((starpu_tag_t)i, 1, (starpu_tag_t)(i-1)); ret = starpu_task_submit(&tasks[i]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } /* submit the first task */ ret = starpu_task_submit(&tasks[0]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } end_submit = starpu_timing_now(); /* wait for the execution of the tasks */ start_exec = starpu_timing_now(); ret = starpu_task_wait(&tasks[ntasks-1]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); end_exec = starpu_timing_now(); starpu_task_wait_for_all(); for (i = 0; i < ntasks; i++) starpu_task_clean(&tasks[i]); timing_submit = end_submit - start_submit; timing_exec = end_exec - start_exec; fprintf(stderr, "Total submit: %f secs\n", timing_submit/1000000); fprintf(stderr, "Per task submit: %f usecs\n", timing_submit/ntasks); fprintf(stderr, "\n"); fprintf(stderr, "Total execution: %f secs\n", timing_exec/1000000); fprintf(stderr, "Per task execution: %f usecs\n", timing_exec/ntasks); fprintf(stderr, "\n"); fprintf(stderr, "Total: %f secs\n", (timing_submit+timing_exec)/1000000); fprintf(stderr, "Per task: %f usecs\n", (timing_submit+timing_exec)/ntasks); { char *output_dir = getenv("STARPU_BENCH_DIR"); char *bench_id = getenv("STARPU_BENCH_ID"); if (output_dir && bench_id) { char number[1+sizeof(nbuffers)*3+1]; const char *numberp; char file[1024]; FILE *f; if (nbuffers) { snprintf(number, sizeof(number), "_%u", nbuffers); numberp = number; } else numberp = ""; snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, timing_submit/1000000); fclose(f); snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, timing_submit/ntasks); fclose(f); snprintf(file, sizeof(file), "%s/tasks_overhead_total_execution%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, timing_exec/1000000); fclose(f); snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_execution%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, timing_exec/ntasks); fclose(f); snprintf(file, sizeof(file), "%s/tasks_overhead_total_submit_execution%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/1000000); fclose(f); snprintf(file, sizeof(file), "%s/tasks_overhead_per_task_submit_execution%s.dat", output_dir, numberp); f = fopen(file, "a"); fprintf(f, "%s\t%f\n", bench_id, (timing_submit+timing_exec)/ntasks); fclose(f); } } for (buffer = 0; buffer < nbuffers; buffer++) { starpu_data_unregister(data_handles[buffer]); starpu_free_noflag((void*)buffers[buffer], BUFFERSIZE*sizeof(float)); } starpu_shutdown(); free(tasks); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); free(tasks); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/microbenchs/tasks_size_overhead.c000066400000000000000000000216361507764646700236560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* * This benchmark creates a thousand tasks of the same (small) duration, with * various number of cpus and various durations. * * Use ./tasks_size_overhead.sh to generate a plot of the result. * * Thanks Martin Tillenius for the idea. */ #define START 4 #define STOP 4096 #ifdef STARPU_QUICK_CHECK #define FACTOR 64 #else #define FACTOR 2 #endif #ifdef STARPU_QUICK_CHECK static unsigned ntasks = 1; #elif !defined(STARPU_LONG_CHECK) static unsigned ntasks = 64; #else static unsigned ntasks = 256; #endif static unsigned nbuffers = 0; static unsigned total_nbuffers = 0; static unsigned mincpus = 1, maxcpus, cpustep; static unsigned mintime = START, maxtime = STOP, factortime = FACTOR; struct starpu_task *tasks; void func(void *descr[], void *arg) { (void)descr; unsigned n = (uintptr_t)arg; long usec = 0; double tv1 = starpu_timing_now(); do { double tv2 = starpu_timing_now(); usec = tv2 - tv1; } while (usec < (long) n); } double cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void) t; (void) i; (void) a; unsigned n = (uintptr_t) t->cl_arg; return n; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet codelet = { .cpu_funcs = {func}, .nbuffers = 0, .modes = {STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R, STARPU_R}, .model = &perf_model, }; static void parse_args(int argc, char **argv) { int c; while ((c = getopt(argc, argv, "i:b:B:c:C:s:t:T:f:h")) != -1) switch(c) { case 'i': ntasks = atoi(optarg); break; case 'b': nbuffers = atoi(optarg); codelet.nbuffers = nbuffers; break; case 'B': total_nbuffers = atoi(optarg); break; case 'c': mincpus = atoi(optarg); break; case 'C': maxcpus = atoi(optarg); break; case 's': cpustep = atoi(optarg); break; case 't': mintime = atoi(optarg); break; case 'T': maxtime = atoi(optarg); break; case 'f': factortime = atoi(optarg); break; case 'h': fprintf(stderr, "\ Usage: %s [-h]\n\ [-i ntasks] [-b nbuffers] [-B total_nbuffers]\n\ [-c mincpus] [ -C maxcpus] [-s cpustep]\n\ [-t mintime] [-T maxtime] [-f factortime]\n\n", argv[0]); fprintf(stderr,"\ runs 'ntasks' tasks\n\ - using 'nbuffers' data each, randomly among 'total_nbuffers' choices,\n\ - with varying task durations, from 'mintime' to 'maxtime' (using 'factortime')\n\ - on varying numbers of cpus, from 'mincpus' to 'maxcpus' (using 'cpustep')\n\ \n\ currently selected parameters: %u tasks using %u buffers among %u, from %uus to %uus (factor %u), from %u cpus to %u cpus (step %u)\n\ ", ntasks, nbuffers, total_nbuffers, mintime, maxtime, factortime, mincpus, maxcpus, cpustep); exit(EXIT_SUCCESS); break; } } int main(int argc, char **argv) { int ret; unsigned i; unsigned size; unsigned ncpus; double timing; double start; double end; struct starpu_conf conf; unsigned buffer; char *starpu_sched = getenv("STARPU_SCHED"); if (getenv("STARPU_MICROBENCHS_DISABLED")) return STARPU_TEST_SKIPPED; /* Get number of CPUs */ starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; #ifdef STARPU_SIMGRID /* This will get serialized, avoid spending too much time on it. */ maxcpus = 2; #else ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); maxcpus = starpu_worker_get_count_by_type(STARPU_CPU_WORKER); starpu_shutdown(); #endif #ifdef STARPU_HAVE_UNSETENV /* That was useful to force the max number of cpus to use, but now we * want to make it vary */ unsetenv("STARPU_NCPUS"); unsetenv("STARPU_NCPU"); #endif cpustep = sqrt(maxcpus)/2; #ifdef STARPU_QUICK_CHECK cpustep *= 8; #endif if (STARPU_RUNNING_ON_VALGRIND) { factortime *= 4; cpustep *= 4; } if (cpustep >= maxcpus/2) cpustep = maxcpus/2; if (cpustep == 0) cpustep = 1; parse_args(argc, argv); float *buffers[total_nbuffers?total_nbuffers:1]; /* Allocate data */ for (buffer = 0; buffer < total_nbuffers; buffer++) buffers[buffer] = (float *) calloc(16, sizeof(float)); if (!starpu_getenv("STARPU_SINK")) tasks = (struct starpu_task *) calloc(1, ntasks*maxcpus*sizeof(struct starpu_task)); /* Emit headers and compute raw tasks speed */ FPRINTF(stdout, "# tasks : %u buffers : %u total_nbuffers : %u\n", ntasks, nbuffers, total_nbuffers); FPRINTF(stdout, "# ncpus\t"); for (size = mintime; size <= maxtime; size *= factortime) FPRINTF(stdout, "%u iters(us)\ttotal(s)\t", size); FPRINTF(stdout, "\n"); FPRINTF(stdout, "\"seq\"\t"); for (size = mintime; size <= maxtime; size *= factortime) { double dstart, dend; dstart = starpu_timing_now(); for (i = 0; i < ntasks; i++) func(NULL, (void*) (uintptr_t) size); dend = starpu_timing_now(); FPRINTF(stdout, "%.0f \t%f\t", (dend-dstart)/ntasks, (dend-dstart)/1000000); } FPRINTF(stdout, "\n"); fflush(stdout); starpu_data_handle_t data_handles[total_nbuffers?total_nbuffers:1]; if (nbuffers && !total_nbuffers) { fprintf(stderr,"can not have %u buffers with %u total buffers\n", nbuffers, total_nbuffers); goto error; } if (mincpus == 0) mincpus = 1; /* For each number of cpus, benchmark */ for (ncpus= mincpus; ncpus <= maxcpus; ncpus += cpustep) { FPRINTF(stdout, "%u\t", ncpus); fflush(stdout); conf.ncpus = ncpus; ret = starpu_init(&conf); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); for (buffer = 0; buffer < total_nbuffers; buffer++) starpu_vector_data_register(&data_handles[buffer], STARPU_MAIN_RAM, (uintptr_t)buffers[buffer], 16, sizeof(float)); for (size = mintime; size <= maxtime; size *= factortime) { /* submit tasks */ start = starpu_timing_now(); for (i = 0; i < ntasks * ncpus; i++) { starpu_data_handle_t *handles; starpu_task_init(&tasks[i]); tasks[i].callback_func = NULL; tasks[i].cl = &codelet; tasks[i].cl_arg = (void*) (uintptr_t) size; tasks[i].synchronous = 0; if (nbuffers > STARPU_NMAXBUFS) { tasks[i].dyn_handles = malloc(nbuffers * sizeof(*data_handles)); handles = tasks[i].dyn_handles; tasks[i].dyn_modes = malloc(nbuffers * sizeof(tasks[i].dyn_modes[0])); for (buffer = 0; buffer < nbuffers; buffer++) tasks[i].dyn_modes[buffer] = STARPU_R; } else handles = tasks[i].handles; if (nbuffers >= total_nbuffers) for (buffer = 0; buffer < nbuffers; buffer++) handles[buffer] = data_handles[buffer%total_nbuffers]; else for (buffer = 0; buffer < nbuffers; buffer++) handles[buffer] = data_handles[starpu_lrand48()%total_nbuffers]; ret = starpu_task_submit(&tasks[i]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); end = starpu_timing_now(); for (i = 0; i < ntasks * ncpus; i++) starpu_task_clean(&tasks[i]); timing = end - start; FPRINTF(stdout, "%u\t%f\t", size, timing/ncpus/1000000); fflush(stdout); { char *output_dir = getenv("STARPU_BENCH_DIR"); char *bench_id = getenv("STARPU_BENCH_ID"); char *sched = getenv("STARPU_SCHED"); if (output_dir && bench_id) { char file[1024]; FILE *f; snprintf(file, sizeof(file), "%s/tasks_size_overhead_total%s%s.dat", output_dir, sched?"_":"", sched?sched:""); f = fopen(file, "a"); fprintf(f, "%s\t%u\t%u\t%f\n", bench_id, ncpus, size, timing/1000000 /(ntasks*ncpus) *1000); fclose(f); } } } for (buffer = 0; buffer < total_nbuffers; buffer++) { starpu_data_unregister(data_handles[buffer]); } starpu_shutdown(); FPRINTF(stdout, "\n"); fflush(stdout); } free(tasks); for (buffer = 0; buffer < total_nbuffers; buffer++) free(buffers[buffer]); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ error: free(tasks); for (buffer = 0; buffer < total_nbuffers; buffer++) free(buffers[buffer]); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/microbenchs/tasks_size_overhead.gp000077500000000000000000000024071507764646700240400ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # OUTPUT=tasks_size_overhead.output VALS=$(sed -n -e '3p' < $OUTPUT) PLOTS="" for x in 1 2 3 4 5 6 7 8 9 10 11 do pos=$((2 * $x + 1)) double=$((2 * $x)) value=$(echo "$VALS" | cut -d ' ' -f $pos) if test -n "$value" then PLOTS=",\"$OUTPUT\" using 1:($value)/(\$$pos) with linespoints title columnheader($double) $PLOTS" fi done [ -n "$TERMINAL" ] || TERMINAL=eps [ -n "$OUTFILE" ] || OUTFILE=tasks_size_overhead.eps gnuplot << EOF set terminal $TERMINAL set output "$OUTFILE" set key top left set xlabel "number of cores" set ylabel "speedup" plot \ x title "linear" $PLOTS EOF starpu-1.4.9+dfsg/tests/microbenchs/tasks_size_overhead.sh000077500000000000000000000015371507764646700240470ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi ROOT=${0%.sh} $MS_LAUNCHER $STARPU_LAUNCH $ROOT "$@" > tasks_size_overhead.output $ROOT.gp gv tasks_size_overhead.eps starpu-1.4.9+dfsg/tests/microbenchs/tasks_size_overhead_sched.sh000077500000000000000000000025601507764646700252120ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi ROOT=${0%.sh} ROOT=${ROOT%_sched} unset STARPU_SSILENT $_MS_LAUNCHER $STARPU_LAUNCH $_STARPU_LAUNCH $ROOT "$@" > tasks_size_overhead.output ret=$? if test "$ret" = "0" && [ -z "$(echo $MAKEFLAGS | sed -ne 's/.*-j\([0-9]\+\).*/\1/p')" ] then # if the program was successful and we are not running in parallel, try to run gnuplot DIR= [ -z "$STARPU_BENCH_DIR" ] || DIR="$STARPU_BENCH_DIR/" export TERMINAL=png export OUTFILE=${DIR}tasks_size_overhead_${STARPU_SCHED}.png gnuplot_av=$(command -v gnuplot) if test -n "$gnuplot_av" then # If gnuplot is available, plot the result $ROOT.gp ret=$? fi fi exit $ret starpu-1.4.9+dfsg/tests/microbenchs/tasks_size_overhead_scheds.sh000077500000000000000000000017431507764646700253770ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi . $(dirname $0)/microbench.sh XFAIL="heteroprio" if [ -z "$STARPU_BENCH_DIR" ] then FAST="-i 8" fi _STARPU_LAUNCH="$STARPU_LAUNCH" unset STARPU_LAUNCH _MS_LAUNCHER="$MS_LAUNCHER" unset MS_LAUNCHER test_scheds tasks_size_overhead_sched.sh $FAST starpu-1.4.9+dfsg/tests/model-checking/000077500000000000000000000000001507764646700200235ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/model-checking/Makefile.am000066400000000000000000000041661507764646700220660ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk EXTRA_DIST = \ platform.xml \ prio_list.sh \ barrier.sh \ starpu-mc.sh.in AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_builddir)/include -I$(top_srcdir)/include $(SIMGRID_CFLAGS) AM_LDFLAGS = -Wl,-znorelro -Wl,-znoseparate-code LIBS += $(SIMGRID_LDFLAGS) noinst_PROGRAMS = \ prio_list \ prio_list2 \ prio_list3 \ starpu_barrier TESTS = $(SHELL_TESTS) SHELL_TESTS = # takes 1s SHELL_TESTS += prio_list.sh # https://github.com/simgrid/simgrid/issues/166 #SHELL_TESTS += barrier.sh if !STARPU_QUICK_CHECK # takes 7m SHELL_TESTS += prio_list2.sh endif if STARPU_LONG_CHECK # takes 25m SHELL_TESTS += prio_list3.sh endif #MC_FLAGS=--cfg=model-check/reduction:none #MC_FLAGS+=--cfg=contexts/factory:ucontext #MC_FLAGS+=--cfg=model-check/sparse-checkpoint:yes #MC_FLAGS+=--cfg=model-check/visited:1000 # To record the failing trace #MC_FLAGS+=--cfg=model-check/record:1 ## And replay it without simgrid-mc #MC_FLAGS+=--cfg=model-check/reply:'1;3;4' # To see which simix calls are made #MC_FLAGS+=--log=simix_popping.thres:debug test: prio_list simgrid-mc ./prio_list platform.xml MAIN $(MC_FLAGS) debug: prio_list simgrid-mc ./prio_list platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS) test-barrier: starpu_barrier simgrid-mc ./starpu_barrier platform.xml MAIN $(MC_FLAGS) debug-barrier: starpu_barrier simgrid-mc ./starpu_barrier platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS) starpu-1.4.9+dfsg/tests/model-checking/Makefile.in000066400000000000000000001433031507764646700220740ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = prio_list$(EXEEXT) prio_list2$(EXEEXT) \ prio_list3$(EXEEXT) starpu_barrier$(EXEEXT) # https://github.com/simgrid/simgrid/issues/166 #SHELL_TESTS += barrier.sh # takes 7m @STARPU_QUICK_CHECK_FALSE@am__append_3 = prio_list2.sh # takes 25m @STARPU_LONG_CHECK_TRUE@am__append_4 = prio_list3.sh subdir = tests/model-checking ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = starpu-mc.sh CONFIG_CLEAN_VPATH_FILES = PROGRAMS = $(noinst_PROGRAMS) prio_list_SOURCES = prio_list.c prio_list_OBJECTS = prio_list.$(OBJEXT) prio_list_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = prio_list2_SOURCES = prio_list2.c prio_list2_OBJECTS = prio_list2.$(OBJEXT) prio_list2_LDADD = $(LDADD) prio_list3_SOURCES = prio_list3.c prio_list3_OBJECTS = prio_list3.$(OBJEXT) prio_list3_LDADD = $(LDADD) starpu_barrier_SOURCES = starpu_barrier.c starpu_barrier_OBJECTS = starpu_barrier.$(OBJEXT) starpu_barrier_LDADD = $(LDADD) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/prio_list.Po \ ./$(DEPDIR)/prio_list2.Po ./$(DEPDIR)/prio_list3.Po \ ./$(DEPDIR)/starpu_barrier.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = prio_list.c prio_list2.c prio_list3.c starpu_barrier.c DIST_SOURCES = prio_list.c prio_list2.c prio_list3.c starpu_barrier.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) AM_RECURSIVE_TARGETS = check recheck TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/starpu-mc.sh.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(SIMGRID_LDFLAGS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = LAUNCHER = AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # EXTRA_DIST = \ platform.xml \ prio_list.sh \ barrier.sh \ starpu-mc.sh.in AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_srcdir)/src -I$(top_builddir)/include -I$(top_srcdir)/include $(SIMGRID_CFLAGS) AM_LDFLAGS = -Wl,-znorelro -Wl,-znoseparate-code TESTS = $(SHELL_TESTS) # takes 1s SHELL_TESTS = prio_list.sh $(am__append_3) $(am__append_4) all: all-am .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign tests/model-checking/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign tests/model-checking/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): starpu-mc.sh: $(top_builddir)/config.status $(srcdir)/starpu-mc.sh.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list prio_list$(EXEEXT): $(prio_list_OBJECTS) $(prio_list_DEPENDENCIES) $(EXTRA_prio_list_DEPENDENCIES) @rm -f prio_list$(EXEEXT) $(AM_V_CCLD)$(LINK) $(prio_list_OBJECTS) $(prio_list_LDADD) $(LIBS) prio_list2$(EXEEXT): $(prio_list2_OBJECTS) $(prio_list2_DEPENDENCIES) $(EXTRA_prio_list2_DEPENDENCIES) @rm -f prio_list2$(EXEEXT) $(AM_V_CCLD)$(LINK) $(prio_list2_OBJECTS) $(prio_list2_LDADD) $(LIBS) prio_list3$(EXEEXT): $(prio_list3_OBJECTS) $(prio_list3_DEPENDENCIES) $(EXTRA_prio_list3_DEPENDENCIES) @rm -f prio_list3$(EXEEXT) $(AM_V_CCLD)$(LINK) $(prio_list3_OBJECTS) $(prio_list3_LDADD) $(LIBS) starpu_barrier$(EXEEXT): $(starpu_barrier_OBJECTS) $(starpu_barrier_DEPENDENCIES) $(EXTRA_starpu_barrier_DEPENDENCIES) @rm -f starpu_barrier$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_barrier_OBJECTS) $(starpu_barrier_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/prio_list.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/prio_list2.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/prio_list3.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_barrier.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? prio_list.sh.log: prio_list.sh @p='prio_list.sh'; \ b='prio_list.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) prio_list2.sh.log: prio_list2.sh @p='prio_list2.sh'; \ b='prio_list2.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) prio_list3.sh.log: prio_list3.sh @p='prio_list3.sh'; \ b='prio_list3.sh'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-am all-am: Makefile $(PROGRAMS) installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ mostlyclean-am distclean: distclean-am -rm -f ./$(DEPDIR)/prio_list.Po -rm -f ./$(DEPDIR)/prio_list2.Po -rm -f ./$(DEPDIR)/prio_list3.Po -rm -f ./$(DEPDIR)/starpu_barrier.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/prio_list.Po -rm -f ./$(DEPDIR)/prio_list2.Po -rm -f ./$(DEPDIR)/prio_list3.Po -rm -f ./$(DEPDIR)/starpu_barrier.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: check-am install-am install-strip .PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ check-am clean clean-generic clean-libtool \ clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ distclean-compile distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am recheck tags tags-am uninstall \ uninstall-am .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 #MC_FLAGS=--cfg=model-check/reduction:none #MC_FLAGS+=--cfg=contexts/factory:ucontext #MC_FLAGS+=--cfg=model-check/sparse-checkpoint:yes #MC_FLAGS+=--cfg=model-check/visited:1000 # To record the failing trace #MC_FLAGS+=--cfg=model-check/record:1 #MC_FLAGS+=--cfg=model-check/reply:'1;3;4' # To see which simix calls are made #MC_FLAGS+=--log=simix_popping.thres:debug test: prio_list simgrid-mc ./prio_list platform.xml MAIN $(MC_FLAGS) debug: prio_list simgrid-mc ./prio_list platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS) test-barrier: starpu_barrier simgrid-mc ./starpu_barrier platform.xml MAIN $(MC_FLAGS) debug-barrier: starpu_barrier simgrid-mc ./starpu_barrier platform.xml MAIN --log=mc_safety.thres:debug $(MC_FLAGS) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/tests/model-checking/barrier.sh000077500000000000000000000013441507764646700220120ustar00rootroot00000000000000#!/bin/bash -x # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # source $(dirname $0)/starpu-mc.sh test starpu_barrier starpu-1.4.9+dfsg/tests/model-checking/platform.xml000066400000000000000000000012631507764646700223730ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tests/model-checking/prio_list.c000066400000000000000000000100001507764646700221620ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define _STARPU_MALLOC(p, s) do {p = malloc(s);} while (0) #define _STARPU_CALLOC(p, n, s) do {p = calloc(n, s);} while (0) #define _STARPU_MALLOC_CAST(p, s, t) do {p = (t) malloc(s);} while (0) #ifndef NOCONFIG #include #else #define _GNU_SOURCE 1 // Assuming recent simgrid #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NLISTS #define NLISTS 1 #endif #ifndef NTHREADS #define NTHREADS 2 #endif #ifndef NELEMENTS #define NELEMENTS 6 #endif #ifndef NITERS #define NITERS 1 #endif // MC_ignore sg_mutex_t mutex[NLISTS]; LIST_TYPE(foo, unsigned prio; unsigned back; /* Push at back instead of front? */ ); PRIO_LIST_TYPE(foo, prio); struct foo_prio_list mylist[NLISTS]; void check_list_prio(struct foo_prio_list *list) { struct foo *cur; unsigned lastprio = UINT_MAX; unsigned back = 0; for (cur = foo_prio_list_begin(list); cur != foo_prio_list_end(list); cur = foo_prio_list_next(list, cur)) { if (cur->prio == lastprio) /* For same prio, back elements should never get before * front elements */ MC_assert(!(back && !cur->back)); else MC_assert(lastprio > cur->prio); lastprio = cur->prio; back = cur->back; } } void worker(int argc, char *argv[]) { unsigned myrank = atoi(argv[0]); unsigned i, n, l, iter; struct foo *elem; struct drand48_data buffer; long res; srand48_r(myrank, &buffer); l = myrank%NLISTS; for (iter = 0; iter < NITERS; iter++) { for (i = 0; i < NELEMENTS; i++) { elem = malloc(sizeof(*elem)); lrand48_r(&buffer, &res); elem->prio = res%10; lrand48_r(&buffer, &res); elem->back = res%2; sg_mutex_lock(mutex[l]); if (elem->back) foo_prio_list_push_back(&mylist[l], elem); else foo_prio_list_push_front(&mylist[l], elem); check_list_prio(&mylist[l]); sg_mutex_unlock(mutex[l]); } for (i = 0; i < NELEMENTS; i++) { lrand48_r(&buffer, &res); n = res%(NELEMENTS-i); sg_mutex_lock(mutex[l]); for (elem = foo_prio_list_begin(&mylist[l]); n--; elem = foo_prio_list_next(&mylist[l], elem)) ; foo_prio_list_erase(&mylist[l], elem); check_list_prio(&mylist[l]); sg_mutex_unlock(mutex[l]); } /* horrible way to wait for list getting empty */ sg_actor_sleep_for(1000); } } void master(int argc, char *argv[]) { } int main(int argc, char *argv[]) { unsigned l, i; if (argc < 3) { fprintf(stderr,"usage: %s platform.xml host\n", argv[0]); exit(EXIT_FAILURE); } printf("Running with:\n- %d threads\n- %d lists\n- %d elements\n- %d iterations\n", NTHREADS, NLISTS, NELEMENTS, NITERS); srand48(0); simgrid_init(&argc, argv); sg_cfg_set_int("contexts/stack-size", 128); simgrid_load_platform(argv[1]); for (l = 0; l < NLISTS; l++) { mutex[l] = sg_mutex_init(); foo_prio_list_init(&mylist[l]); } for (i = 0; i < NTHREADS; i++) { char *s; asprintf(&s, "%d\n", i); char **args = malloc(sizeof(char*)*2); args[0] = s; args[1] = NULL; sg_actor_create("test", sg_host_by_name(argv[2]), worker, 1, args); } simgrid_run(); return 0; } starpu-1.4.9+dfsg/tests/model-checking/prio_list.sh000077500000000000000000000013371507764646700223720ustar00rootroot00000000000000#!/bin/bash -x # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # source $(dirname $0)/starpu-mc.sh test prio_list starpu-1.4.9+dfsg/tests/model-checking/prio_list2.c000066400000000000000000000014171507764646700222600ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define NLISTS 2 #define NTHREADS 2 #define NELEMENTS 4 #define NITERS 1 #include "prio_list.c" starpu-1.4.9+dfsg/tests/model-checking/prio_list3.c000066400000000000000000000014171507764646700222610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define NLISTS 1 #define NTHREADS 3 #define NELEMENTS 4 #define NITERS 1 #include "prio_list.c" starpu-1.4.9+dfsg/tests/model-checking/starpu-mc.sh.in000077500000000000000000000022541507764646700227050ustar00rootroot00000000000000#!/bin/bash -x # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Test a model-checking program with simgrid model checker SIMGRID_MC=@SIMGRID_MC@ abs_top_srcdir=@abs_top_srcdir@ abs_builddir=@abs_builddir@ set -e [ -x "$SIMGRID_MC" ] || exit 77 #MC_FLAGS=--cfg=model-check/reduction:none # makes it much longer actually #MC_FLAGS+=--cfg=contexts/factory:ucontext #MC_FLAGS+=--cfg=model-check/sparse-checkpoint:yes #MC_FLAGS+=--cfg=model-check/visited:1000 test() { time $SIMGRID_MC $abs_builddir/$1 $abs_top_srcdir/tests/model-checking/platform.xml MAIN $MC_FLAGS } starpu-1.4.9+dfsg/tests/model-checking/starpu_barrier.c000066400000000000000000000071731507764646700232230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #define __COMMON_UTILS_H__ #define _STARPU_MALLOC(p, s) do {p = malloc(s);} while (0) #define _STARPU_CALLOC(p, n, s) do {p = calloc(n, s);} while (0) #define _STARPU_REALLOC(p, s) do {p = realloc(p, s);} while (0) #define STARPU_HG_DISABLE_CHECKING(v) ((void) 0) #define STARPU_HG_ENABLE_CHECKING(v) ((void) 0) #define ANNOTATE_HAPPENS_AFTER(v) ((void) 0) #define ANNOTATE_HAPPENS_BEFORE(v) ((void) 0) #define STARPU_DEBUG_PREFIX "[starpu]" #ifdef STARPU_VERBOSE # define _STARPU_DEBUG(fmt, ...) do { if (!_starpu_silent) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,## __VA_ARGS__); fflush(stderr); }} while(0) #else # define _STARPU_DEBUG(fmt, ...) do { } while (0) #endif #define STARPU_UYIELD() ((void)0) #ifndef NOCONFIG #include #else #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif // Assuming recent simgrid #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* common/thread.c references these, but doesn't need to have them working anyway */ starpu_pthread_mutex_t _starpu_simgrid_time_advance_mutex; starpu_pthread_cond_t _starpu_simgrid_time_advance_cond; void _starpu_simgrid_thread_start(int argc, char *argv[]) { } size_t _starpu_default_stack_size = 8192; void _starpu_simgrid_set_stack_size(size_t stack_size) { } starpu_sg_host_t _starpu_simgrid_get_host_by_name(const char *name) { return NULL; } static void _starpu_clock_gettime(struct timespec *ts) { double now = simgrid_get_clock(); ts->tv_sec = floor(now); ts->tv_nsec = floor((now - ts->tv_sec) * 1000000000); } void starpu_sleep(float nb_sec) { sg_actor_sleep_for(nb_sec); } #include #undef STARPU_DEBUG int starpu_worker_get_id(void) { return 0; } static inline unsigned _starpu_worker_mutex_is_sched_mutex(int workerid, starpu_pthread_mutex_t *mutex) { return 0; } #include #ifndef NTHREADS #define NTHREADS 2 #endif #ifndef NITERS #define NITERS 1 #endif struct _starpu_barrier barrier; void worker(int argc, char *argv[]) { unsigned iter; for (iter = 0; iter < NITERS; iter++) { MC_assert(barrier.count <= NTHREADS); _starpu_barrier_wait(&barrier); } } #undef main int main(int argc, char *argv[]) { unsigned i; if (argc < 3) { fprintf(stderr,"usage: %s platform.xml host\n", argv[0]); exit(EXIT_FAILURE); } srand48(0); simgrid_init(&argc, argv); sg_cfg_set_int("contexts/stack-size", 128); simgrid_load_platform(argv[1]); _starpu_barrier_init(&barrier, NTHREADS); for (i = 0; i < NTHREADS; i++) { char *s; asprintf(&s, "%d\n", i); char **args = malloc(sizeof(char*)*2); args[0] = s; args[1] = NULL; sg_actor_create("test", sg_host_by_name(argv[2]), worker, 1, args); } simgrid_run(); return 0; } starpu-1.4.9+dfsg/tests/openmp/000077500000000000000000000000001507764646700164505ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/openmp/api_01.c000066400000000000000000000111001507764646700176560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include #include /* * Check the OpenMP API getters return proper default results. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret; /* we clear the whole OMP environment for this test, to check the * default behaviour of API functions */ unsetenv("OMP_DYNAMIC"); unsetenv("OMP_NESTED"); unsetenv("OMP_SCHEDULE"); unsetenv("OMP_STACKSIZE"); unsetenv("OMP_WAIT_POLICY"); unsetenv("OMP_THREAD_LIMIT"); unsetenv("OMP_MAX_ACTIVE_LEVELS"); unsetenv("OMP_CANCELLATION"); unsetenv("OMP_DEFAULT_DEVICE"); unsetenv("OMP_MAX_TASK_PRIORITY"); unsetenv("OMP_PROC_BIND"); unsetenv("OMP_NUM_THREADS"); unsetenv("OMP_PLACES"); unsetenv("OMP_DISPLAY_ENV"); ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } #define check_omp_func(f,_tv) \ { \ const int v = (f()); \ const int tv = (_tv); \ printf(#f ": %d (should be %d)\n", v, tv); \ STARPU_ASSERT(v == tv); \ } const char * get_sched_name(int sched_value) { const char *sched_name = NULL; switch (sched_value) { case starpu_omp_sched_undefined: sched_name = ""; break; case starpu_omp_sched_static: sched_name = "static"; break; case starpu_omp_sched_dynamic: sched_name = "dynamic"; break; case starpu_omp_sched_guided: sched_name = "guided"; break; case starpu_omp_sched_auto: sched_name = "auto"; break; case starpu_omp_sched_runtime: sched_name = "runtime"; break; default: _STARPU_ERROR("invalid omp schedule value"); } return sched_name; } int main(void) { const int nb_cpus = starpu_cpu_worker_get_count(); check_omp_func(starpu_omp_get_num_threads, 1); check_omp_func(starpu_omp_get_thread_num, 0); /* since OMP_NUM_THREADS is cleared, starpu_omp_get_max_threads() should return nb_cpus */ check_omp_func(starpu_omp_get_max_threads, nb_cpus); check_omp_func(starpu_omp_get_num_procs, nb_cpus); check_omp_func(starpu_omp_in_parallel, 0); check_omp_func(starpu_omp_get_dynamic, 0); check_omp_func(starpu_omp_get_nested, 0); check_omp_func(starpu_omp_get_cancellation, 0); { const enum starpu_omp_sched_value target_kind = starpu_omp_sched_static; const int target_modifier = 0; enum starpu_omp_sched_value kind; int modifier; const char *sched_name; const char *target_sched_name; starpu_omp_get_schedule(&kind, &modifier); sched_name = get_sched_name(kind); target_sched_name = get_sched_name(target_kind); printf("starpu_omp_get_schedule: %s,%d (should be %s,%d)\n", sched_name, modifier, target_sched_name, target_modifier); STARPU_ASSERT(kind == target_kind && modifier == target_modifier); } check_omp_func(starpu_omp_get_thread_limit, nb_cpus); check_omp_func(starpu_omp_get_max_active_levels, 1); check_omp_func(starpu_omp_get_level, 0); { const int tv = 0; const int v = starpu_omp_get_ancestor_thread_num(0); printf("starpu_omp_get_ancestor_thread_num(0): %d (should be %d)\n", v, tv); STARPU_ASSERT(v == tv); } { const int tv = 1; const int v = starpu_omp_get_team_size(0); printf("starpu_omp_get_team_size(0): %d (should be %d)\n", v, tv); STARPU_ASSERT(v == tv); } check_omp_func(starpu_omp_get_active_level, 0); check_omp_func(starpu_omp_in_final, 0); check_omp_func(starpu_omp_get_proc_bind, starpu_omp_proc_bind_false); check_omp_func(starpu_omp_get_default_device, 0); /* TODO: support more than one device */ check_omp_func(starpu_omp_get_num_devices, 1); check_omp_func(starpu_omp_get_num_teams, 1); check_omp_func(starpu_omp_get_team_num, 0); check_omp_func(starpu_omp_is_initial_device, 1); check_omp_func(starpu_omp_get_initial_device, 0); check_omp_func(starpu_omp_get_max_task_priority, 0); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/array_slice_01.c000066400000000000000000000160231507764646700214130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Test recursive OpenMP tasks, data dependences, data slice dependences. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else #define NX 64 int global_vector[NX]; __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void task_region_h(void *buffers[], void *_args) { void **args = _args; struct starpu_vector_interface *_vector = buffers[0]; int nx = STARPU_VECTOR_GET_NX(_vector); int elemsize = STARPU_VECTOR_GET_ELEMSIZE(_vector); int slice_base = STARPU_VECTOR_GET_SLICE_BASE(_vector); int *v = (int *)STARPU_VECTOR_GET_PTR(_vector); int f = (int)(intptr_t)args[0]; int imin = (int)(intptr_t)args[1]; int imax = (int)(intptr_t)args[2]; int i; assert(elemsize == sizeof(v[0])); printf("depth 2 task, entry: vector ptr = %p, slice_base = %d, imin = %d, imax = %d\n", v, slice_base, imin, imax); for (i = imin; i < imax; i++) { assert(i-slice_base>=0); assert(i-slice_base= 2); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/cuda_task_01.c000066400000000000000000000135051507764646700210560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check executing a CUDA target task. */ #if !defined(STARPU_OPENMP) || !defined(STARPU_USE_CUDA) int main(void) { return STARPU_TEST_SKIPPED; } #else #define NX 64 int global_vector_1[NX]; int global_vector_2[NX]; __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void task_region_g(void *buffers[], void *args) { struct starpu_vector_interface *_vector_1 = buffers[0]; int nx1 = STARPU_VECTOR_GET_NX(_vector_1); int *v1 = (int *)STARPU_VECTOR_GET_PTR(_vector_1); struct starpu_vector_interface *_vector_2 = buffers[1]; int nx2 = STARPU_VECTOR_GET_NX(_vector_2); int *v2 = (int *)STARPU_VECTOR_GET_PTR(_vector_2); int f = (int)(intptr_t)args; STARPU_ASSERT(nx1 == nx2); printf("depth 1 task, entry: vector_1 ptr = %p\n", v1); printf("depth 1 task, entry: vector_2 ptr = %p\n", v2); printf("depth 1 task, entry: f = %d\n", f); fprintf(stderr, "cudaMemcpy: -->\n"); cudaMemcpyAsync(v2,v1,nx1*sizeof(*_vector_1), cudaMemcpyDeviceToDevice, starpu_cuda_get_local_stream()); fprintf(stderr, "cudaMemcpy: <--\n"); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } void master_g1(void *arg) { (void)arg; { starpu_data_handle_t region_vector_handle; int i; printf("master_g1: vector ptr = %p\n", global_vector_1); for (i = 0; i < NX; i++) { global_vector_1[i] = 1; } starpu_vector_data_register(®ion_vector_handle, STARPU_MAIN_RAM, (uintptr_t)global_vector_1, NX, sizeof(global_vector_1[0])); starpu_omp_handle_register(region_vector_handle); printf("master_g1: region_vector_handle = %p\n", region_vector_handle); } { starpu_data_handle_t region_vector_handle; int i; printf("master_g1: vector ptr = %p\n", global_vector_2); for (i = 0; i < NX; i++) { global_vector_2[i] = 0; } starpu_vector_data_register(®ion_vector_handle, STARPU_MAIN_RAM, (uintptr_t)global_vector_2, NX, sizeof(global_vector_2[0])); starpu_omp_handle_register(region_vector_handle); printf("master_g1: region_vector_handle = %p\n", region_vector_handle); } } void master_g2(void *arg) { (void)arg; starpu_data_handle_t region_vector_handles[2]; struct starpu_omp_task_region_attr attr; int i; region_vector_handles[0] = starpu_omp_data_lookup(global_vector_1); printf("master_g2: region_vector_handles[0] = %p\n", region_vector_handles[0]); region_vector_handles[1] = starpu_omp_data_lookup(global_vector_2); printf("master_g2: region_vector_handles[1] = %p\n", region_vector_handles[1]); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; #endif attr.cl.cpu_funcs[0] = NULL; attr.cl.cuda_funcs[0] = task_region_g; attr.cl.where = STARPU_CUDA; attr.cl.nbuffers = 2; attr.cl.modes[0] = STARPU_R; attr.cl.modes[1] = STARPU_W; attr.handles = region_vector_handles; attr.cl_arg_size = sizeof(void *); attr.cl_arg_free = 0; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; i = 0; attr.cl_arg = (void *)(intptr_t)i; starpu_omp_task_region(&attr); } void parallel_region_f(void *buffers[], void *args) { (void)buffers; (void)args; starpu_omp_master(master_g1, NULL); starpu_omp_barrier(); { starpu_data_handle_t region_vector_handle_1; region_vector_handle_1 = starpu_omp_data_lookup(global_vector_1); printf("parallel_region block 1: region_vector_handle_1 = %p\n", region_vector_handle_1); } { starpu_data_handle_t region_vector_handle_2; region_vector_handle_2 = starpu_omp_data_lookup(global_vector_2); printf("parallel_region block 1: region_vector_handle_2 = %p\n", region_vector_handle_2); } starpu_omp_barrier(); starpu_omp_master(master_g2, NULL); starpu_omp_barrier(); { starpu_data_handle_t region_vector_handle_1; region_vector_handle_1 = starpu_omp_data_lookup(global_vector_1); printf("parallel_region block 2: region_vector_handle_1 = %p\n", region_vector_handle_1); } { starpu_data_handle_t region_vector_handle_2; region_vector_handle_2 = starpu_omp_data_lookup(global_vector_2); printf("parallel_region block 2: region_vector_handle_2 = %p\n", region_vector_handle_2); } } int main(void) { struct starpu_omp_parallel_region_attr attr; if (starpu_cuda_worker_get_count() < 1) { return STARPU_TEST_SKIPPED; } memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); int i; for (i = 0; i < NX; i++) { if (global_vector_1[i] != global_vector_2[i]) { fprintf(stderr, "check failed: global_vector_1[%d] = %d, global_vector_2[%d] = %d\n", i, global_vector_1[i], i, global_vector_2[i]); return EXIT_FAILURE; } } return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/environment.c000066400000000000000000000032101507764646700211540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include #include /* * Check OpenMP environment variables are properly parsed. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else int main(void) { setenv("OMP_DYNAMIC","false", 1); setenv("OMP_NESTED","false", 1); setenv("OMP_SCHEDULE","auto", 1); setenv("OMP_STACKSIZE","2M", 1); setenv("OMP_WAIT_POLICY","passive", 1); setenv("OMP_THREAD_LIMIT","0", 1); setenv("OMP_MAX_ACTIVE_LEVELS","4", 1); setenv("OMP_CANCELLATION","false", 1); setenv("OMP_DEFAULT_DEVICE","0", 1); setenv("OMP_MAX_TASK_PRIORITY", "20", 1); setenv("OMP_PROC_BIND","spread, spread, close", 1); setenv("OMP_NUM_THREADS","4, 16, 2", 1); setenv("OMP_PLACES","{1,2,3,4},{5,6,7,8}", 1); setenv("OMP_DISPLAY_ENV","verbose", 1); int ret = starpu_omp_init(); if (ret == -EINVAL) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); starpu_omp_shutdown(); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/init_exit_01.c000066400000000000000000000020651507764646700211130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include /* * Check the starpu_omp_init/shutdown calls. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else int main(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); starpu_omp_shutdown(); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/init_exit_02.c000066400000000000000000000023421507764646700211120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include /* * Check the starpu_omp_init/shutdown calls when called from constructor/destructor. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } int main(void) { return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_01.c000066400000000000000000000034441507764646700207150ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP parallel region support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_02.c000066400000000000000000000046571507764646700207250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the nested OpenMP parallel regions support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void parallel_region_2_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] parallel region 2: task thread = %d\n", (void *)tid, worker_id); } void parallel_region_1_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; struct starpu_omp_parallel_region_attr attr; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] parallel region 1: task thread = %d\n", (void *)tid, worker_id); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_2_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_1_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_03.c000066400000000000000000000035341507764646700207170ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check running multiple OpenMP parallel regions one at a time. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_barrier_01.c000066400000000000000000000042341507764646700224210ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP parallel barrier support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- barrier 1\n", (void *)tid, worker_id); starpu_omp_barrier(); printf("[tid %p] task thread = %d -- barrier 2\n", (void *)tid, worker_id); starpu_omp_barrier(); printf("[tid %p] task thread = %d -- barrier 3\n", (void *)tid, worker_id); starpu_omp_barrier(); printf("[tid %p] task thread = %d -- barrier 4\n", (void *)tid, worker_id); starpu_omp_barrier(); } int main(void) { pthread_t tid; struct starpu_omp_parallel_region_attr attr; tid = pthread_self(); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_critical_01.c000066400000000000000000000046241507764646700225700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP critical support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void critical_g(void *arg) { (void) arg; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_critical(critical_g, NULL, NULL); starpu_omp_critical(critical_g, NULL, NULL); starpu_omp_critical(critical_g, NULL, NULL); starpu_omp_critical(critical_g, NULL, NULL); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); printf("
    \n"); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_critical_inline_01.c000066400000000000000000000052021507764646700241170ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the inline OpenMP critical support */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_critical_inline_begin(NULL); printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); starpu_omp_critical_inline_end(NULL); starpu_omp_critical_inline_begin(NULL); printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); starpu_omp_critical_inline_end(NULL); starpu_omp_critical_inline_begin(NULL); printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); starpu_omp_critical_inline_end(NULL); starpu_omp_critical_inline_begin(NULL); printf("[tid %p] task thread = %d -- critical\n", (void *)tid, worker_id); starpu_omp_critical_inline_end(NULL); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); printf("
    \n"); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_critical_named_01.c000066400000000000000000000051671507764646700237370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP named critical support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void critical_g(void *arg) { (void) arg; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- critical \"g\"\n", (void *)tid, worker_id); } void critical_h(void *arg) { (void) arg; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- critical \"h\"\n", (void *)tid, worker_id); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_critical(critical_g, NULL, "g"); starpu_omp_critical(critical_h, NULL, "h"); starpu_omp_critical(critical_g, NULL, "g"); starpu_omp_critical(critical_h, NULL, "h"); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); printf("
    \n"); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_critical_named_inline_01.c000066400000000000000000000052311507764646700252650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the inline OpenMP named critical support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_critical_inline_begin("g"); printf("[tid %p] task thread = %d -- critical \"g\"\n", (void *)tid, worker_id); starpu_omp_critical_inline_end("g"); starpu_omp_critical_inline_begin("h"); printf("[tid %p] task thread = %d -- critical \"h\"\n", (void *)tid, worker_id); starpu_omp_critical_inline_end("h"); starpu_omp_critical_inline_begin("g"); printf("[tid %p] task thread = %d -- critical \"g\"\n", (void *)tid, worker_id); starpu_omp_critical_inline_end("g"); starpu_omp_critical_inline_begin("h"); printf("[tid %p] task thread = %d -- critical \"h\"\n", (void *)tid, worker_id); starpu_omp_critical_inline_end("h"); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); printf("
    \n"); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_for_01.c000066400000000000000000000116101507764646700215550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP parallel for support, with multiple schedule and chunk settings. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else #define NB_ITERS 256 #define CHUNK 16 unsigned long long array[NB_ITERS]; __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void for_g(unsigned long long i, unsigned long long nb_i, void *arg) { int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i); for (; nb_i > 0; i++, nb_i--) { array[i] = 1; } } void parallel_region_1_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 0); } void parallel_region_2_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 0, 0); } void parallel_region_3_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 0); } void parallel_region_4_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 0); } void parallel_region_5_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0); } void parallel_region_6_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0); } static void clear_array(void) { memset(array, 0, NB_ITERS*sizeof(unsigned long long)); } static void check_array(void) { unsigned long long i; unsigned long long s = 0; for (i = 0; i < NB_ITERS; i++) { s += array[i]; } if (s != NB_ITERS) { printf("missing iterations\n"); exit(1); } } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.where = STARPU_CPU; attr.if_clause = 1; clear_array(); attr.cl.cpu_funcs[0] = parallel_region_1_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_2_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_3_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_4_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_5_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_6_f; starpu_omp_parallel_region(&attr); check_array(); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_for_02.c000066400000000000000000000062061507764646700215630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check multiple OpenMP parallel for support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else #define NB_ITERS 4321 #define CHUNK 42 __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void for_g(unsigned long long i, unsigned long long nb_i, void *arg) { int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i); for (; nb_i > 0; i++, nb_i--) { printf("[tid %p] task thread = %d, for [%s] iteration %llu\n", (void *)tid, worker_id, (const char *)arg, i); } } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 1); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 0, 1); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 1); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 1); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"guided chunk", NB_ITERS, CHUNK, starpu_omp_sched_guided, 0, 1); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 1); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_for_ordered_01.c000066400000000000000000000124351507764646700232670ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP ordered parallel for support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else #define NB_ITERS 256 #define CHUNK 16 unsigned long long array[NB_ITERS]; __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } struct s_ordered_arg { const char *msg; unsigned long long i; }; void ordered_f(void *_arg) { struct s_ordered_arg *arg = _arg; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d, for [%s] iteration (ordered) %llu\n", (void *)tid, worker_id, arg->msg, arg->i); } void for_g(unsigned long long i, unsigned long long nb_i, void *arg) { int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d, for [%s] iterations first=%llu:nb=%llu\n", (void *)tid, worker_id, (const char *)arg, i, nb_i); for (; nb_i > 0; i++, nb_i--) { struct s_ordered_arg ordered_arg = { arg, i }; array[i] = 1; starpu_omp_ordered(ordered_f, &ordered_arg); } } void parallel_region_1_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 1, 0); } void parallel_region_2_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 1, 0); } void parallel_region_3_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 1, 0); } void parallel_region_4_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 1, 0); } void parallel_region_5_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 1, 0); } void parallel_region_6_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 1, 0); } static void clear_array(void) { memset(array, 0, NB_ITERS*sizeof(unsigned long long)); } static void check_array(void) { unsigned long long i; unsigned long long s = 0; for (i = 0; i < NB_ITERS; i++) { s += array[i]; } if (s != NB_ITERS) { printf("missing iterations\n"); exit(1); } } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.where = STARPU_CPU; attr.if_clause = 1; clear_array(); attr.cl.cpu_funcs[0] = parallel_region_1_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_2_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_3_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_4_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_5_f; starpu_omp_parallel_region(&attr); check_array(); clear_array(); attr.cl.cpu_funcs[0] = parallel_region_6_f; starpu_omp_parallel_region(&attr); check_array(); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_master_01.c000066400000000000000000000045461507764646700222740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP master support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void master_g(void *arg) { (void) arg; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_master(master_g, NULL); starpu_omp_master(master_g, NULL); starpu_omp_master(master_g, NULL); starpu_omp_master(master_g, NULL); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_master_inline_01.c000066400000000000000000000046741507764646700236340ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP inline master support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); if (starpu_omp_master_inline()) printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); if (starpu_omp_master_inline()) printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); if (starpu_omp_master_inline()) printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); if (starpu_omp_master_inline()) printf("[tid %p] task thread = %d -- master\n", (void *)tid, worker_id); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_nested_lock_01.c000066400000000000000000000065431507764646700232720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP nested lock support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } starpu_omp_nest_lock_t omp_nest_lock; void locked_func_n2(void) { const int worker_id = starpu_worker_get_id(); const pthread_t tid = pthread_self(); printf("[tid %p] task thread = %d -- locked function n2\n", (void *)tid, worker_id); } void locked_func_n1(void) { const int worker_id = starpu_worker_get_id(); const pthread_t tid = pthread_self(); printf("[tid %p] task thread = %d -- locked function n1 -->\n", (void *)tid, worker_id); starpu_omp_set_nest_lock(&omp_nest_lock); locked_func_n2(); starpu_omp_unset_nest_lock(&omp_nest_lock); printf("[tid %p] task thread = %d -- locked function n1 <--\n", (void *)tid, worker_id); } void master_g1(void *arg) { (void)arg; starpu_omp_init_nest_lock(&omp_nest_lock); } void master_g2(void *arg) { (void)arg; starpu_omp_destroy_nest_lock(&omp_nest_lock); } void parallel_region_f(void *buffers[], void *args) { const int worker_id = starpu_worker_get_id(); const pthread_t tid = pthread_self(); (void) buffers; (void) args; printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_master(master_g1, NULL); starpu_omp_barrier(); starpu_omp_set_nest_lock(&omp_nest_lock); locked_func_n1(); starpu_omp_unset_nest_lock(&omp_nest_lock); starpu_omp_set_nest_lock(&omp_nest_lock); locked_func_n1(); starpu_omp_unset_nest_lock(&omp_nest_lock); starpu_omp_set_nest_lock(&omp_nest_lock); locked_func_n1(); starpu_omp_unset_nest_lock(&omp_nest_lock); starpu_omp_set_nest_lock(&omp_nest_lock); locked_func_n1(); starpu_omp_unset_nest_lock(&omp_nest_lock); starpu_omp_barrier(); starpu_omp_master(master_g2, NULL); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); printf("
    \n"); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_sections_01.c000066400000000000000000000055511507764646700226250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP parallel sections support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void func(void *arg) { int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d, section [%s]\n", (void *)tid, worker_id, (const char *)arg); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; void (*section_f[4])(void *); void *section_args[4]; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); section_f[0] = func; section_f[1] = func; section_f[2] = func; section_f[3] = func; section_args[0] = (void *)"A"; section_args[1] = (void *)"B"; section_args[2] = (void *)"C"; section_args[3] = (void *)"D"; starpu_omp_sections(4, section_f, section_args, 0); section_args[0] = (void *)"E"; section_args[1] = (void *)"F"; section_args[2] = (void *)"G"; section_args[3] = (void *)"H"; starpu_omp_sections(4, section_f, section_args, 0); section_args[0] = (void *)"I"; section_args[1] = (void *)"J"; section_args[2] = (void *)"K"; section_args[3] = (void *)"L"; starpu_omp_sections(4, section_f, section_args, 0); section_args[0] = (void *)"M"; section_args[1] = (void *)"N"; section_args[2] = (void *)"O"; section_args[3] = (void *)"P"; starpu_omp_sections(4, section_f, section_args, 0); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_sections_combined_01.c000066400000000000000000000054751507764646700244720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP combined parallel sections support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void func(unsigned long long section_num, void *arg) { int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d, section [%llu: %s]\n", (void *)tid, worker_id, section_num, (const char *)arg); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; void *section_args[4]; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d\n", (void *)tid, worker_id); section_args[0] = (void *)"A"; section_args[1] = (void *)"B"; section_args[2] = (void *)"C"; section_args[3] = (void *)"D"; starpu_omp_sections_combined(4, func, section_args, 0); section_args[0] = (void *)"E"; section_args[1] = (void *)"F"; section_args[2] = (void *)"G"; section_args[3] = (void *)"H"; starpu_omp_sections_combined(4, func, section_args, 0); section_args[0] = (void *)"I"; section_args[1] = (void *)"J"; section_args[2] = (void *)"K"; section_args[3] = (void *)"L"; starpu_omp_sections_combined(4, func, section_args, 0); section_args[0] = (void *)"M"; section_args[1] = (void *)"N"; section_args[2] = (void *)"O"; section_args[3] = (void *)"P"; starpu_omp_sections_combined(4, func, section_args, 0); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_simple_lock_01.c000066400000000000000000000055201507764646700232730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP simple lock support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } starpu_omp_lock_t omp_lock; void locked_func(void) { const int worker_id = starpu_worker_get_id(); const pthread_t tid = pthread_self(); printf("[tid %p] task thread = %d -- locked function\n", (void *)tid, worker_id); } void master_g1(void *arg) { (void)arg; starpu_omp_init_lock(&omp_lock); } void master_g2(void *arg) { (void)arg; starpu_omp_destroy_lock(&omp_lock); } void parallel_region_f(void *buffers[], void *args) { const int worker_id = starpu_worker_get_id(); const pthread_t tid = pthread_self(); (void) buffers; (void) args; printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_master(master_g1, NULL); starpu_omp_barrier(); starpu_omp_set_lock(&omp_lock); locked_func(); starpu_omp_unset_lock(&omp_lock); starpu_omp_set_lock(&omp_lock); locked_func(); starpu_omp_unset_lock(&omp_lock); starpu_omp_set_lock(&omp_lock); locked_func(); starpu_omp_unset_lock(&omp_lock); starpu_omp_set_lock(&omp_lock); locked_func(); starpu_omp_unset_lock(&omp_lock); starpu_omp_barrier(); starpu_omp_master(master_g2, NULL); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); printf("
    \n"); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_single_copyprivate_01.c000066400000000000000000000063211507764646700247000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP single with copyprivate support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void single_g(void *arg, void *_data, unsigned long long data_size) { (void) arg; int *data = _data; STARPU_ASSERT(data_size >= sizeof(*data)); int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); *data = worker_id; printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; int single_worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id)); printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id)); printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id)); printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); starpu_omp_single_copyprivate(single_g, NULL, &single_worker_id, sizeof(single_worker_id)); printf("[tid %p] task thread = %d -- copyprivate: single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_single_copyprivate_inline_01.c000066400000000000000000000050571507764646700262430ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP inline single with copyprivate support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; int single_worker_id; int i; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); for (i=0; i<4; i++) { int *single_data; if ((single_data = starpu_omp_single_copyprivate_inline_begin(&single_worker_id)) == NULL) { printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id); single_worker_id = worker_id; } else { memcpy(&single_worker_id, single_data, sizeof(single_worker_id)); } starpu_omp_single_copyprivate_inline_end(); printf("[tid %p] task thread = %d -- single_worker_id = %d\n", (void *)tid, worker_id, single_worker_id); } printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_single_inline_01.c000066400000000000000000000060471507764646700236160ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP inline single support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); /* nowait = 0 */ if (starpu_omp_single_inline()) printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); starpu_omp_barrier(); if (starpu_omp_single_inline()) printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); starpu_omp_barrier(); if (starpu_omp_single_inline()) printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); starpu_omp_barrier(); if (starpu_omp_single_inline()) printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); starpu_omp_barrier(); /* nowait = 1 */ if (starpu_omp_single_inline()) printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); if (starpu_omp_single_inline()) printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); if (starpu_omp_single_inline()) printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); if (starpu_omp_single_inline()) printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_single_nowait_01.c000066400000000000000000000046001507764646700236320ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP single nowait support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void single_g(void *arg) { (void) arg; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- single nowait\n", (void *)tid, worker_id); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_single(single_g, NULL, 1); starpu_omp_single(single_g, NULL, 1); starpu_omp_single(single_g, NULL, 1); starpu_omp_single(single_g, NULL, 1); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/parallel_single_wait_01.c000066400000000000000000000045671507764646700233110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP single wait support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void single_g(void *arg) { (void) arg; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- single\n", (void *)tid, worker_id); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d -- parallel -->\n", (void *)tid, worker_id); starpu_omp_single(single_g, NULL, 0); starpu_omp_single(single_g, NULL, 0); starpu_omp_single(single_g, NULL, 0); starpu_omp_single(single_g, NULL, 0); printf("[tid %p] task thread = %d -- parallel <--\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_parallel_region_attr attr; pthread_t tid; tid = pthread_self(); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); starpu_omp_parallel_region(&attr); printf("
    \n"); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/task_01.c000066400000000000000000000051031507764646700200550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP task support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void task_region_g(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: explicit task \"g\"\n", (void *)tid, worker_id); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; struct starpu_omp_task_region_attr attr; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/task_02.c000066400000000000000000000131701507764646700200610ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP nested task support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else #define NX 64 int global_vector[NX]; __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void task_region_h(void *buffers[], void *args) { struct starpu_vector_interface *_vector = buffers[0]; int nx = STARPU_VECTOR_GET_NX(_vector); int *v = (int *)STARPU_VECTOR_GET_PTR(_vector); int f = (int)(intptr_t)args; int i; printf("depth 2 task, entry: vector ptr = %p\n", v); for (i = 0; i < nx; i++) { v[i] += f; } printf("depth 2 task ending\n"); } void task_region_g(void *buffers[], void *args) { struct starpu_vector_interface *_vector = buffers[0]; int nx = STARPU_VECTOR_GET_NX(_vector); int *v = (int *)STARPU_VECTOR_GET_PTR(_vector); int f = (int)(intptr_t)args; printf("depth 1 task, entry: vector ptr = %p\n", v); { starpu_data_handle_t task_vector_handle; int i; for (i = 0; i < nx; i++) { v[i] += f; } starpu_vector_data_register(&task_vector_handle, STARPU_MAIN_RAM, (uintptr_t)v, NX, sizeof(v[0])); starpu_omp_handle_register(task_vector_handle); printf("depth 1 task, block 1: task_vector_handle = %p\n", task_vector_handle); } { starpu_data_handle_t task_vector_handle; struct starpu_omp_task_region_attr attr; int i; task_vector_handle = starpu_omp_data_lookup(v); printf("depth 1 task, block 2: task_vector_handle = %p\n", task_vector_handle); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = task_region_h; attr.cl.where = STARPU_CPU; attr.cl.nbuffers = 1; attr.cl.modes[0] = STARPU_RW; attr.handles = &task_vector_handle; attr.cl_arg_size = sizeof(void *); attr.cl_arg_free = 0; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; i = 0; attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); } starpu_omp_taskwait(); } void master_g1(void *arg) { (void)arg; starpu_data_handle_t region_vector_handle; int i; printf("master_g1: vector ptr = %p\n", global_vector); for (i = 0; i < NX; i++) { global_vector[i] = 1; } starpu_vector_data_register(®ion_vector_handle, STARPU_MAIN_RAM, (uintptr_t)global_vector, NX, sizeof(global_vector[0])); starpu_omp_handle_register(region_vector_handle); printf("master_g1: region_vector_handle = %p\n", region_vector_handle); } void master_g2(void *arg) { (void)arg; starpu_data_handle_t region_vector_handle; struct starpu_omp_task_region_attr attr; int i; region_vector_handle = starpu_omp_data_lookup(global_vector); printf("master_g2: region_vector_handle = %p\n", region_vector_handle); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.cl.nbuffers = 1; attr.cl.modes[0] = STARPU_RW; attr.handles = ®ion_vector_handle; attr.cl_arg_size = sizeof(void *); attr.cl_arg_free = 0; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; i = 0; attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); } void parallel_region_f(void *buffers[], void *args) { (void)buffers; (void)args; starpu_omp_master(master_g1, NULL); starpu_omp_barrier(); { starpu_data_handle_t region_vector_handle; region_vector_handle = starpu_omp_data_lookup(global_vector); printf("parallel_region block 1: region_vector_handle = %p\n", region_vector_handle); } starpu_omp_barrier(); starpu_omp_master(master_g2, NULL); starpu_omp_barrier(); { starpu_data_handle_t region_vector_handle; region_vector_handle = starpu_omp_data_lookup(global_vector); printf("parallel_region block 2: region_vector_handle = %p\n", region_vector_handle); } } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/task_03.c000066400000000000000000000037401507764646700200640ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP orphaned task support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void task_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: explicit task \"f\"\n", (void *)tid, worker_id); } int main(void) { struct starpu_omp_task_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = task_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); starpu_omp_task_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/taskgroup_01.c000066400000000000000000000072061507764646700211400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP taskgroup support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void task_region_g(void *buffers[], void *args) { (void) buffers; int i = (int)(intptr_t) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: explicit task \"g[%d]\"\n", (void *)tid, worker_id, i); } void taskgroup_f(void *arg) { struct starpu_omp_task_region_attr attr; int *p_i = (int *)arg; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.cl_arg_size = sizeof(void *); attr.cl_arg_free = 0; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; attr.cl_arg = (void *)(intptr_t)(*p_i)++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)(*p_i)++; starpu_omp_task_region(&attr); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; struct starpu_omp_task_region_attr attr; int i = 0; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id); starpu_omp_taskgroup(taskgroup_f, (void *)&i); printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id); starpu_omp_taskgroup(taskgroup_f, (void *)&i); printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.cl_arg_size = sizeof(void *); attr.cl_arg_free = 0; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/taskgroup_02.c000066400000000000000000000073671507764646700211510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP multiple taskgroup support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void task_region_g(void *buffers[], void *args) { (void) buffers; int i = (int)(intptr_t) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: explicit task \"g[%d]\"\n", (void *)tid, worker_id, i); } void taskgroup_f(void *arg) { struct starpu_omp_task_region_attr attr; int *p_i = (int *)arg; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.cl_arg_size = sizeof(void *); attr.cl_arg_free = 0; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; attr.cl_arg = (void *)(intptr_t)(*p_i)++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)(*p_i)++; starpu_omp_task_region(&attr); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; struct starpu_omp_task_region_attr attr; int i = 0; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id); starpu_omp_taskgroup_inline_begin(); taskgroup_f((void *)&i); starpu_omp_taskgroup_inline_end(); printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id); starpu_omp_taskgroup_inline_begin(); taskgroup_f((void *)&i); starpu_omp_taskgroup_inline_end(); printf("[tid %p] task thread = %d: implicit task \"f\": taskgroup\n", (void *)tid, worker_id); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.cl_arg_size = sizeof(void *); attr.cl_arg_free = 0; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/taskloop.c000066400000000000000000000044701507764646700204550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2018-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Check the OpenMP orphaned task support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void taskloop_callback(unsigned long long begin_i, unsigned long long end_i) { int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf ("begin = %llu , end = %llu, %p\n", begin_i, end_i, (void *)starpu_task_get_current()); } void taskloop_callback_wrapper(void *buffers[], void *_args) { (void) buffers; struct starpu_omp_task_region_attr * args = _args; taskloop_callback(args->begin_i, args->end_i); } int main(void) { struct starpu_omp_task_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = taskloop_callback_wrapper; attr.cl_arg = &attr; attr.cl.where = STARPU_CPU; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; attr.nogroup_clause = 0; attr.is_loop = 0; attr.collapse = 0; attr.num_tasks = 5; attr.nb_iterations = 400; attr.grainsize = 130; starpu_omp_taskloop_inline_begin(&attr); starpu_omp_taskloop_inline_end(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/openmp/taskwait_01.c000066400000000000000000000063051507764646700207470ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Check the OpenMP taskwait support. */ #if !defined(STARPU_OPENMP) int main(void) { return STARPU_TEST_SKIPPED; } #else __attribute__((constructor)) static void omp_constructor(void) { int ret = starpu_omp_init(); if (ret == -EINVAL) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_omp_init"); } __attribute__((destructor)) static void omp_destructor(void) { starpu_omp_shutdown(); } void task_region_g(void *buffers[], void *args) { (void) buffers; int i = (int)(intptr_t) args; int worker_id; pthread_t tid; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: explicit task \"g[%d]\"\n", (void *)tid, worker_id, i); } void parallel_region_f(void *buffers[], void *args) { (void) buffers; (void) args; int worker_id; pthread_t tid; struct starpu_omp_task_region_attr attr; int i = 0; tid = pthread_self(); worker_id = starpu_worker_get_id(); printf("[tid %p] task thread = %d: implicit task \"f\"\n", (void *)tid, worker_id); memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = task_region_g; attr.cl.where = STARPU_CPU; attr.cl_arg_size = sizeof(void *); attr.cl_arg_free = 0; attr.if_clause = 1; attr.final_clause = 0; attr.untied_clause = 1; attr.mergeable_clause = 0; attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); starpu_omp_taskwait(); printf("[tid %p] task thread = %d: implicit task \"f\": taskwait\n", (void *)tid, worker_id); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); starpu_omp_taskwait(); printf("[tid %p] task thread = %d: implicit task \"f\": taskwait\n", (void *)tid, worker_id); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); attr.cl_arg = (void *)(intptr_t)i++; starpu_omp_task_region(&attr); } int main(void) { struct starpu_omp_parallel_region_attr attr; memset(&attr, 0, sizeof(attr)); #ifdef STARPU_SIMGRID attr.cl.model = &starpu_perfmodel_nop; #endif attr.cl.flags = STARPU_CODELET_SIMGRID_EXECUTE; attr.cl.cpu_funcs[0] = parallel_region_f; attr.cl.where = STARPU_CPU; attr.if_clause = 1; starpu_omp_parallel_region(&attr); return 0; } #endif starpu-1.4.9+dfsg/tests/overlap/000077500000000000000000000000001507764646700166225ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/overlap/gpu_concurrency.c000066400000000000000000000057371507764646700222070ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include "../helper.h" #include /* * Check that concurrency does happen when using multi-stream CUDA. */ #ifdef STARPU_QUICK_CHECK #define NITERS 100000 #else #define NITERS 1000000 #endif #define NTASKS 64 #define SYNC 16 #ifdef STARPU_USE_CUDA extern void long_kernel_cuda(unsigned long niters); void codelet_long_kernel_async(void *descr[], void *arg) { (void)descr; (void)arg; long_kernel_cuda(NITERS); } void codelet_long_kernel_sync(void *descr[], void *arg) { (void)descr; (void)arg; long_kernel_cuda(NITERS); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } static struct starpu_perfmodel model_async = { .type = STARPU_HISTORY_BASED, .symbol = "long_kernel_async", }; static struct starpu_perfmodel model_sync = { .type = STARPU_HISTORY_BASED, .symbol = "long_kernel_sync", }; static struct starpu_codelet cl_async = { .cuda_funcs = {codelet_long_kernel_async}, .cuda_flags = {STARPU_CUDA_ASYNC}, .nbuffers = 0, .model = &model_async, }; static struct starpu_codelet cl = { .cuda_funcs = {codelet_long_kernel_sync}, .nbuffers = 0, .model = &model_sync, }; #endif int main(int argc STARPU_ATTRIBUTE_UNUSED, char **argv STARPU_ATTRIBUTE_UNUSED) { #ifndef STARPU_USE_CUDA return STARPU_TEST_SKIPPED; #else setenv("STARPU_NWORKER_PER_CUDA", "4", 1); int ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_cuda_worker_get_count() == 0) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } unsigned iter; for (iter = 0; iter < NTASKS; iter++) { struct starpu_task *task = starpu_task_create(); if (!(iter % SYNC)) /* Insert a synchronous task, just for fun */ task->cl = &cl; else task->cl = &cl_async; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_shutdown(); STARPU_RETURN(EXIT_SUCCESS); enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); STARPU_RETURN(STARPU_TEST_SKIPPED); #endif } starpu-1.4.9+dfsg/tests/overlap/long_kernel.cu000066400000000000000000000021451507764646700214540ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include extern "C" __global__ void long_kernel(unsigned long niters) { unsigned long i; for (i = 0; i < niters; i++) __syncthreads(); } extern "C" void long_kernel_cuda(unsigned long niters) { dim3 dimBlock(1,1); dim3 dimGrid(1,1); long_kernel<<>>(niters); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/tests/overlap/overlap.c000066400000000000000000000077661507764646700204560ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2010-2010 Mehdi Juhoor * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include "../helper.h" #include /* * Check that working on a partitioned vector gets overlapping of prefetches etc. */ #ifdef STARPU_QUICK_CHECK #define NTASKS 100 #else #define NTASKS 10000 #endif #define VECTORSIZE 1024 #define TASKDURATION 24U #define SYMBOL "sleep" static starpu_pthread_mutex_t mutex = STARPU_PTHREAD_MUTEX_INITIALIZER; static starpu_pthread_cond_t cond = STARPU_PTHREAD_COND_INITIALIZER; static unsigned finished = 0; static unsigned cnt = NTASKS; static void callback(void *arg) { (void)arg; unsigned res = STARPU_ATOMIC_ADD(&cnt, -1); ANNOTATE_HAPPENS_BEFORE(&cnt); if (res == 0) { ANNOTATE_HAPPENS_AFTER(&cnt); STARPU_PTHREAD_MUTEX_LOCK(&mutex); finished = 1; STARPU_PTHREAD_COND_SIGNAL(&cond); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } } void codelet_sleep(void *descr[], void *_args) { (void)descr; (void)_args; STARPU_SKIP_IF_VALGRIND; starpu_usleep(TASKDURATION); } static struct starpu_perfmodel model = { .type = STARPU_HISTORY_BASED, .symbol = NULL /* to be defined later */ }; static struct starpu_codelet cl = { .cpu_funcs = {codelet_sleep}, .cuda_funcs = {codelet_sleep}, #ifndef STARPU_SIMGRID .opencl_funcs = {codelet_sleep}, #endif .cpu_funcs_name = {"codelet_sleep"}, .nbuffers = 1, .modes = {STARPU_R}, .model = &model }; static char symbolname[128]; int main(int argc, char **argv) { int ret; starpu_data_handle_t handle; float *buffer; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* create data */ starpu_malloc((void **)&buffer, NTASKS*VECTORSIZE*sizeof(char)); /* declare data to StarPU */ starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)buffer, NTASKS*VECTORSIZE, sizeof(char)); struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = NTASKS }; starpu_data_partition(handle, &f); snprintf(symbolname, sizeof(symbolname), "overlap_sleep_%d_%u", VECTORSIZE, TASKDURATION); model.symbol = symbolname; unsigned iter; for (iter = 0; iter < NTASKS; iter++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = starpu_data_get_sub_data(handle, 1, iter); task->callback_func = callback; task->callback_arg = NULL; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_do_schedule(); STARPU_PTHREAD_MUTEX_LOCK(&mutex); if (!finished) STARPU_PTHREAD_COND_WAIT(&cond, &mutex); STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_free_noflag(buffer, NTASKS*VECTORSIZE*sizeof(char)); starpu_shutdown(); STARPU_RETURN(EXIT_SUCCESS); enodev: starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_free_noflag(buffer, NTASKS*VECTORSIZE*sizeof(char)); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); STARPU_RETURN(STARPU_TEST_SKIPPED); } starpu-1.4.9+dfsg/tests/overlap/overlap.sh000077500000000000000000000103511507764646700206310ustar00rootroot00000000000000#!/bin/sh -x # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2018-2018 Federal University of Rio Grande do Sul (UFRGS) # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Test parsing of FxT traces if test -n "$STARPU_MICROBENCHS_DISABLED" ; then exit 77 ; fi # Testing another specific scheduler, no need to run this [ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = dmdas ] || exit 77 # XXX: Also see examples/mult/sgemm.sh set -e PREFIX=$(dirname $0) rm -rf $PREFIX/overlap.traces mkdir -p $PREFIX/overlap.traces export STARPU_FXT_PREFIX=$PREFIX/overlap.traces STARPU_FXT_TRACE=1 STARPU_SCHED=dmdas $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/overlap if [ -x $PREFIX/../../tools/starpu_fxt_tool ]; then $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_plot -o $STARPU_FXT_PREFIX -s overlap_sleep_1024_24 -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 [ -f $STARPU_FXT_PREFIX/starpu_overlap_sleep_1024_24.gp -a -f $STARPU_FXT_PREFIX/starpu_overlap_sleep_1024_24.data -a -f $STARPU_FXT_PREFIX/starpu_overlap_sleep_1024_24_avg.data ] # Generate paje, dag, data, etc. $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_tool -d $STARPU_FXT_PREFIX -memory-states -label-deps -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 $PREFIX/../../tools/starpu_paje_sort $STARPU_FXT_PREFIX/paje.trace ! type pj_dump || pj_dump -e 0 < $STARPU_FXT_PREFIX/paje.trace $PREFIX/../../tools/starpu_codelet_profile $STARPU_FXT_PREFIX/distrib.data overlap_sleep_1024_24 [ -f $STARPU_FXT_PREFIX/distrib.data.gp -a \( -f $STARPU_FXT_PREFIX/distrib.data.0 -o -f $STARPU_FXT_PREFIX/distrib.data.1 -o -f $STARPU_FXT_PREFIX/distrib.data.2 -o -f $STARPU_FXT_PREFIX/distrib.data.3 -o -f $STARPU_FXT_PREFIX/distrib.data.4 -o -f $STARPU_FXT_PREFIX/distrib.data.5 -o -f $STARPU_FXT_PREFIX/distrib.data.6 \) ] $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_data_trace -d $STARPU_FXT_PREFIX $STARPU_FXT_PREFIX/prof_file_${USER}_0 overlap_sleep_1024_24 [ -f $STARPU_FXT_PREFIX/data_trace.gp ] $STARPU_LAUNCH $PREFIX/../../tools/starpu_fxt_stats -i $STARPU_FXT_PREFIX/prof_file_${USER}_0 $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_tasks_rec_complete $STARPU_FXT_PREFIX/tasks.rec $STARPU_FXT_PREFIX/tasks2.rec python3 $PREFIX/../../tools/starpu_trace_state_stats.py $STARPU_FXT_PREFIX/trace.rec ! type gnuplot || ( $PREFIX/../../tools/starpu_workers_activity -d $STARPU_FXT_PREFIX $STARPU_FXT_PREFIX/activity.data && [ -f $STARPU_FXT_PREFIX/activity.eps ] ) # needs some R packages $PREFIX/../../tools/starpu_paje_draw_histogram $STARPU_FXT_PREFIX/paje.trace || true $PREFIX/../../tools/starpu_paje_state_stats $STARPU_FXT_PREFIX/paje.trace || true $PREFIX/../../tools/starpu_paje_summary $STARPU_FXT_PREFIX/paje.trace || true $PREFIX/../../tools/starpu_codelet_histo_profile $STARPU_FXT_PREFIX/distrib.data || true [ -f $STARPU_FXT_PREFIX/distrib.data.overlap_sleep_1024_24.0.a3d3725e.1024.pdf ] || true if [ -x $PREFIX/../../tools/starpu_replay ]; then $STARPU_LAUNCH $PREFIX/../../tools/starpu_replay $STARPU_FXT_PREFIX/tasks.rec fi [ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump $STARPU_FXT_PREFIX/tasks.rec -o $STARPU_FXT_PREFIX/perfs2.rec [ -f $STARPU_FXT_PREFIX/perfs2.rec ] fi [ ! -x $PREFIX/../../tools/starpu_perfmodel_display ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_display -s overlap_sleep_1024_24 [ ! -x $PREFIX/../../tools/starpu_perfmodel_display ] || $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_display -x -s overlap_sleep_1024_24 [ ! -x $PREFIX/../../tools/starpu_perfmodel_recdump ] || $MS_LAUNCHER $STARPU_LAUNCH $PREFIX/../../tools/starpu_perfmodel_recdump -o $STARPU_FXT_PREFIX/perfs.rec [ -f $STARPU_FXT_PREFIX/perfs.rec ] starpu-1.4.9+dfsg/tests/parallel_tasks/000077500000000000000000000000001507764646700201535ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/parallel_tasks/combined_worker_assign_workerid.c000066400000000000000000000110171507764646700267420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Check that one create a combined worker by hand and run tasks on it. */ #ifndef STARPU_QUICK_CHECK #define N 1000 #else #define N 100 #endif #define VECTORSIZE 1024 static int combined_workerid; static int combined_ncpus; void codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; STARPU_SKIP_IF_VALGRIND; STARPU_ASSERT(starpu_combined_worker_get_id() == combined_workerid); int worker_size = starpu_combined_worker_get_size(); STARPU_ASSERT(worker_size == combined_ncpus); starpu_usleep(1000./worker_size); #if 1 int id = starpu_worker_get_id(); int combined_id = starpu_combined_worker_get_id(); FPRINTF(stderr, "worker id %d - combined id %d - worker size %d\n", id, combined_id, worker_size); #endif } static struct starpu_codelet cl = { .type = STARPU_FORKJOIN, .max_parallelism = INT_MAX, .cpu_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { starpu_data_handle_t v_handle; unsigned *v; int ret; struct starpu_conf conf; ret = starpu_conf_init(&conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init"); conf.sched_policy_name = "pheft"; conf.calibrate = 1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_sched_policy *policy = starpu_sched_get_sched_policy(); if (strcmp(conf.sched_policy_name, policy->policy_name)) goto shutdown; combined_ncpus = starpu_cpu_worker_get_count(); if (combined_ncpus < 4) goto shutdown; int *workerids = malloc(sizeof(int) * combined_ncpus); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, workerids, combined_ncpus); combined_ncpus /= 2; unsigned ctx_id = starpu_sched_ctx_get_context(); if (ctx_id == STARPU_NMAX_SCHED_CTXS) ctx_id = 0; combined_workerid = starpu_combined_worker_assign_workerid(combined_ncpus, workerids); free(workerids); struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(ctx_id); workers->add(workers, combined_workerid); starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); /* Allow tasks only on this combined worker */ int nuint32 = (combined_workerid + 31) / 32; uint32_t *forced_workerids = malloc(sizeof(uint32_t) * nuint32); memset(forced_workerids, 0, sizeof(uint32_t) * nuint32); forced_workerids[combined_workerid / 32] |= 1U << (combined_workerid%32); unsigned iter; for (iter = 0; iter < N; iter++) { /* execute a task on that worker */ struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = v_handle; if (iter % 2) { task->workerids = forced_workerids; task->workerids_len = nuint32; } else { task->execute_on_a_specific_worker = 1; task->workerid = combined_workerid; } ret = starpu_task_submit(task); if (ret == -ENODEV) { task->destroy = 0; starpu_task_destroy(task); goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); free(forced_workerids); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); free(forced_workerids); fprintf(stderr, "WARNING: No one can execute the task on workerid %u\n", combined_workerid); shutdown: /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/parallel_tasks/cuda_only.c000066400000000000000000000052371507764646700223030ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Check that pheft works with only GPUs */ void codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; } struct starpu_perfmodel model = { .type = STARPU_HISTORY_BASED, .symbol = "test" }; static struct starpu_codelet cl = { .cuda_funcs = {codelet_null}, .model = &model, .nbuffers = 1, .modes = {STARPU_R} }; struct starpu_perfmodel model2 = { .type = STARPU_HISTORY_BASED, .symbol = "test2" }; static struct starpu_codelet cl2 = { .cuda_funcs = {codelet_null}, .model = &model2, .nbuffers = 1, .modes = {STARPU_W} }; int main(void) { int ret; starpu_data_handle_t handle; unsigned data; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy_name = "pheft"; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&data, sizeof(data)); unsigned iter; struct starpu_task *task; for (iter = 0; iter < 100; iter++) { task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); task = starpu_task_create(); task->cl = &cl2; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(handle); starpu_shutdown(); STARPU_RETURN(EXIT_SUCCESS); enodev: task->destroy = 0; starpu_task_destroy(task); starpu_data_unregister(handle); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); STARPU_RETURN(STARPU_TEST_SKIPPED); } starpu-1.4.9+dfsg/tests/parallel_tasks/explicit_combined_worker.c000066400000000000000000000065111507764646700253740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Check that one can bind a parallel task on a parallel worker */ #ifndef STARPU_QUICK_CHECK #define N 1000 #else #define N 100 #endif #define VECTORSIZE 1024 void codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; STARPU_SKIP_IF_VALGRIND; int worker_size = starpu_combined_worker_get_size(); STARPU_ASSERT(worker_size > 0); starpu_usleep(1000./worker_size); #if 1 int id = starpu_worker_get_id(); int combined_id = starpu_combined_worker_get_id(); FPRINTF(stderr, "worker id %d - combined id %d - worker size %d\n", id, combined_id, worker_size); #endif } static struct starpu_codelet cl = { .type = STARPU_FORKJOIN, .max_parallelism = INT_MAX, .cpu_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { starpu_data_handle_t v_handle; unsigned *v; int ret; struct starpu_conf conf; ret = starpu_conf_init(&conf); STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init"); conf.sched_policy_name = "pheft"; conf.calibrate = 1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); unsigned nworker = starpu_worker_get_count() + starpu_combined_worker_get_count(); unsigned iter, worker; for (iter = 0; iter < N; iter++) { for (worker = 0; worker < nworker; worker++) { /* execute a task on that worker */ struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = v_handle; task->execute_on_a_specific_worker = 1; task->workerid = worker; ret = starpu_task_submit(task); if (ret == -ENODEV) { task->destroy = 0; starpu_task_destroy(task); goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); fprintf(stderr, "WARNING: No one can execute the task on workerid %u\n", worker); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/parallel_tasks/parallel_kernels.c000066400000000000000000000064521507764646700236450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Submit dumb parallel forkjoin tasks */ #ifndef STARPU_QUICK_CHECK #define N 1000 #else #define N 10 #endif #define VECTORSIZE 1024 void codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; STARPU_SKIP_IF_VALGRIND; int worker_size = starpu_combined_worker_get_size(); STARPU_ASSERT(worker_size > 0); starpu_usleep(1000./worker_size); #if 0 int id = starpu_worker_get_id(); int combined_id = starpu_combined_worker_get_id(); FPRINTF(stderr, "worker id %d - combined id %d - worker size %d\n", id, combined_id, worker_size); #endif } struct starpu_perfmodel model = { .type = STARPU_HISTORY_BASED, .symbol = "parallel_kernel_test" }; static struct starpu_codelet cl = { .type = STARPU_FORKJOIN, .max_parallelism = INT_MAX, .cpu_funcs = {codelet_null}, .cuda_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .opencl_funcs = {codelet_null}, .model = &model, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; starpu_data_handle_t v_handle; unsigned *v; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy_name = "pheft"; conf.calibrate = 1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); unsigned nworker = starpu_worker_get_count() + starpu_combined_worker_get_count(); unsigned iter, worker, n; n = N; if (STARPU_RUNNING_ON_VALGRIND) { n /= 300; } for (iter = 0; iter < n; iter++) { for (worker = 0; worker < nworker; worker++) { /* execute a task on that worker */ struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = v_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); STARPU_RETURN(EXIT_SUCCESS); enodev: starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); STARPU_RETURN(STARPU_TEST_SKIPPED); } starpu-1.4.9+dfsg/tests/parallel_tasks/parallel_kernels_spmd.c000066400000000000000000000065541507764646700246730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Submit dumb parallel spmd tasks */ #ifndef STARPU_QUICK_CHECK #define N 1000 #else #define N 100 #endif #define VECTORSIZE 1024 void codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; STARPU_SKIP_IF_VALGRIND; int worker_size = starpu_combined_worker_get_size(); STARPU_ASSERT(worker_size > 0); starpu_usleep(1000./worker_size); #if 0 int id = starpu_worker_get_id(); int combined_id = starpu_combined_worker_get_id(); int rank = starpu_combined_worker_get_rank(); FPRINTF(stderr, "worker id %d - combined id %d - worker size %d - SPMD rank %d\n", id, combined_id, worker_size, rank); #endif } struct starpu_perfmodel model = { .type = STARPU_HISTORY_BASED, .symbol = "parallel_kernel_test_spmd" }; static struct starpu_codelet cl = { .type = STARPU_SPMD, .max_parallelism = INT_MAX, .cpu_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .model = &model, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; starpu_data_handle_t v_handle; unsigned *v; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy_name = "pheft"; conf.calibrate = 1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); unsigned nworker = starpu_worker_get_count() + starpu_combined_worker_get_count(); unsigned iter, worker, n; n = N; if (STARPU_RUNNING_ON_VALGRIND) { n /= 300; } for (iter = 0; iter < n; iter++) { for (worker = 0; worker < nworker; worker++) { /* execute a task on that worker */ struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = v_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); STARPU_RETURN(EXIT_SUCCESS); enodev: starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); STARPU_RETURN(STARPU_TEST_SKIPPED); } starpu-1.4.9+dfsg/tests/parallel_tasks/parallel_kernels_trivial.c000066400000000000000000000072171507764646700253770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Submit a simple testcase for parallel tasks. */ #define VECTORSIZE 1024 void codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; STARPU_SKIP_IF_VALGRIND; int worker_size = starpu_combined_worker_get_size(); STARPU_ASSERT(worker_size > 0); starpu_usleep(1000./worker_size); #if 0 int id = starpu_worker_get_id(); int combined_id = starpu_combined_worker_get_id(); FPRINTF(stderr, "worker id %d - combined id %d - worker size %d\n", id, combined_id, worker_size); #endif } struct starpu_perfmodel model = { .type = STARPU_HISTORY_BASED, .symbol = "parallel_kernel_test" }; static struct starpu_codelet cl = { .type = STARPU_FORKJOIN, .max_parallelism = INT_MAX, .cpu_funcs = {codelet_null}, .cuda_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .opencl_funcs = {codelet_null}, .model = &model, .nbuffers = 1, .modes = {STARPU_R} }; static struct starpu_codelet cl_seq = { .cpu_funcs = {codelet_null}, .cuda_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .opencl_funcs = {codelet_null}, .model = &model, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; starpu_data_handle_t v_handle; unsigned *v; struct starpu_conf conf; starpu_conf_init(&conf); conf.ncpus = 2; conf.sched_policy_name = "pheft"; conf.calibrate = 1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); /* First submit a sequential task */ ret = starpu_task_insert(&cl_seq, STARPU_R, v_handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Then a parallel task, which is not interesting to run in parallel when we have only two cpus */ ret = starpu_task_insert(&cl, STARPU_R, v_handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Then another parallel task, which is interesting to run in parallel since the two cpus are now finishing at the same time. */ ret = starpu_task_insert(&cl, STARPU_R, v_handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); STARPU_RETURN(EXIT_SUCCESS); enodev: starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); STARPU_RETURN(STARPU_TEST_SKIPPED); } starpu-1.4.9+dfsg/tests/parallel_tasks/spmd_peager.c000066400000000000000000000062151507764646700226110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Submit dumb parallel spmd tasks with the peager scheduler */ #ifndef STARPU_QUICK_CHECK #define N 1000 #else #define N 100 #endif #define VECTORSIZE 1024 starpu_data_handle_t v_handle; static unsigned *v; void codelet_null(void *descr[], void *_args) { (void)descr; (void)_args; STARPU_SKIP_IF_VALGRIND; int worker_size = starpu_combined_worker_get_size(); STARPU_ASSERT(worker_size > 0); // FPRINTF(stderr, "WORKERSIZE : %d\n", worker_size); starpu_usleep(1000./worker_size); #if 0 int id = starpu_worker_get_id(); int combined_id = starpu_combined_worker_get_id(); int rank = starpu_combined_worker_get_rank(); FPRINTF(stderr, "worker id %d - combined id %d - worker size %d - SPMD rank %d\n", id, combined_id, worker_size, rank); #endif } static struct starpu_codelet cl = { .type = STARPU_SPMD, .max_parallelism = INT_MAX, .cpu_funcs = {codelet_null}, .cpu_funcs_name = {"codelet_null"}, .cuda_funcs = {codelet_null}, .opencl_funcs = {codelet_null}, .nbuffers = 1, .modes = {STARPU_R} }; int main(void) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy_name = "peager"; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned)); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_vector_data_register(&v_handle, STARPU_MAIN_RAM, (uintptr_t)v, VECTORSIZE, sizeof(unsigned)); unsigned iter;//, worker; for (iter = 0; iter < N; iter++) { /* execute a task on that worker */ struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = v_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_data_unregister(v_handle); starpu_free_noflag(v, VECTORSIZE*sizeof(unsigned)); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/perfmodels/000077500000000000000000000000001507764646700173125ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/perfmodels/feed.c000066400000000000000000000051571507764646700203710ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Test the starpu_perfmodel_update_history function */ static struct starpu_perfmodel model = { .type = STARPU_REGRESSION_BASED, .symbol = "feed" }; static struct starpu_perfmodel nl_model = { .type = STARPU_NL_REGRESSION_BASED, .symbol = "nlfeed" }; static struct starpu_codelet cl = { .model = &model, .nbuffers = 1, .modes = {STARPU_W} }; int main(void) { struct starpu_task task; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); if (starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) < 2) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } starpu_task_init(&task); task.cl = &cl; int size; for (size = 1024; size < 16777216; size *= 2) { float measured_fast, measured_slow; starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, 0, size, sizeof(float)); task.handles[0] = handle; /* Simulate Fast GPU. In real applications this would be * replaced by fetching from actual measurement */ measured_fast = 0.002+size*0.00000001; measured_slow = 0.001+size*0.0000001; struct starpu_perfmodel_arch arch; arch.ndevices = 1; arch.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device)); arch.devices[0].type = STARPU_CUDA_WORKER; arch.devices[0].ncores = 0; /* Simulate Fast GPU */ arch.devices[0].devid = 0; starpu_perfmodel_update_history(&model, &task, &arch, 0, 0, measured_fast); starpu_perfmodel_update_history(&nl_model, &task, &arch, 0, 0, measured_fast); /* Simulate Slow GPU */ arch.devices[0].devid = 1; starpu_perfmodel_update_history(&model, &task, &arch, 0, 0, measured_slow); starpu_perfmodel_update_history(&nl_model, &task, &arch, 0, 0, measured_slow); starpu_task_clean(&task); starpu_data_unregister(handle); } starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/memory.c000066400000000000000000000033601507764646700207700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * Test providing the memory perfmodel function */ void func(void *descr[], void *arg) { (void)descr; (void)arg; } static struct starpu_perfmodel my_model = { .type = STARPU_HISTORY_BASED, .symbol = "my_model", }; static struct starpu_codelet my_codelet = { .cpu_funcs = {func}, .cpu_funcs_name = {"func"}, .model = &my_model }; double cuda_cost_function(struct starpu_task *t, struct starpu_perfmodel_arch *a, unsigned i) { (void) t; (void) a; return (double)i; } int main(void) { int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_perfmodel_init(&my_model); starpu_perfmodel_set_per_devices_cost_function(&my_model, 0, cuda_cost_function, STARPU_CUDA_WORKER, 0, 1, -1); ret = starpu_task_insert(&my_codelet, 0); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/non_linear_regression_based.c000066400000000000000000000065311507764646700252050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Benchmark memset with a non-linear regression */ #define START_LOG 0 #ifdef STARPU_QUICK_CHECK #define END_LOG 20 #else #define END_LOG 25 #endif #ifdef STARPU_USE_CUDA static void memset_cuda(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream()); } #endif void memset_cpu(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; int *ptr = (int *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); memset(ptr, 42, n * sizeof(*ptr)); } static struct starpu_perfmodel model = { .type = STARPU_NL_REGRESSION_BASED, .symbol = "non_linear_memset_regression_based" }; #ifdef STARPU_USE_OPENCL extern void memset_opencl(void *buffers[], void *args); #endif static struct starpu_codelet memset_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {memset_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {memset_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs = {memset_cpu}, .cpu_funcs_name = {"memset_cpu"}, .model = &model, .nbuffers = 1, .modes = {STARPU_W} }; static void test_memset(int nelems) { starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int)); int nloops = 200; int loop; for (loop = 0; loop < nloops; loop++) { struct starpu_task *task = starpu_task_create(); task->cl = &memset_cl; task->handles[0] = handle; int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(handle); } #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; #endif int main(int argc, char **argv) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 2; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif int slog; for (slog = START_LOG; slog < END_LOG; slog++) { int size = 1 << slog; test_memset(size); } #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/opencl_memset.c000066400000000000000000000044031507764646700223110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include /* * Enqueue an OpenCL kernel which just does a memset */ extern struct starpu_opencl_program opencl_program; void _memset_opencl(void *buffers[], void *args, const char *name) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); cl_mem val = (cl_mem)STARPU_VECTOR_GET_DEV_HANDLE(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_program, name, devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); err|= clSetKernelArg(kernel, 1, sizeof(n), &n); if (err) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=n; size_t local; size_t s; cl_device_id device; starpu_opencl_get_device(devid, &device); err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); if (local > global) local=global; else global = (global + local-1) / local * local; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } void memset_opencl(void *buffers[], void *args, const char *kernel) { (void)kernel; _memset_opencl(buffers, args, "_memset_opencl"); } void memset0_opencl(void *buffers[], void *args, const char *kernel) { (void)kernel; _memset_opencl(buffers, args, "_memset0_opencl"); } starpu-1.4.9+dfsg/tests/perfmodels/opencl_memset_kernel.cl000066400000000000000000000017341507764646700240310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void _memset0_opencl(__global int *val, int nx) { const int i = get_global_id(0); if (i < nx) val[0] += i; } __kernel void _memset_opencl(__global int *val, int nx) { const int i = get_global_id(0); if (i < nx) val[i] = 42; } starpu-1.4.9+dfsg/tests/perfmodels/path.c000066400000000000000000000116711507764646700204200ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include #if !defined(STARPU_HAVE_UNSETENV) || !defined(STARPU_HAVE_SETENV) #warning unsetenv or setenv are not defined. Skipping test int main(void) { return STARPU_TEST_SKIPPED; } #else void *_set_sampling_dir(char *sampling_dir, size_t s) { char *tpath = starpu_getenv("TMPDIR"); if (!tpath) tpath = starpu_getenv("TEMP"); if (!tpath) tpath = starpu_getenv("TMP"); if (!tpath) tpath = "/tmp"; snprintf(sampling_dir, s, "%s/starpu_sampling_XXXXXX", tpath); return _starpu_mkdtemp(sampling_dir); } void randomstring(char *name, int nb) { int n; static char charset[] = "abcdefghijklmnopqrstuvwxyz"; for(n = 0 ;n < nb-1 ; n++) { int key = starpu_lrand48() % (int)(sizeof(charset) -1); name[n] = charset[key]; } name[nb-1]='\0'; } int do_test(const char *test_name, const char *bus_dir, const char *codelet_dir, const char *model_name) { int ret; char hostname[10]; struct starpu_conf conf; FPRINTF(stderr, "\nTesting %s with <%s> and <%s>\n", test_name, bus_dir, codelet_dir); starpu_srand48((long int)time(NULL)); randomstring(hostname, 10); setenv("STARPU_HOSTNAME", hostname, 1); starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); { char filename[1024]; struct stat statbuf; snprintf(filename, 1024, "%s/bus/%s.config", bus_dir, hostname); ret = stat(filename, &statbuf); if (ret != 0) { FPRINTF(stderr, "Performance model file <%s> for bus benchmarking is not available\n", filename); starpu_shutdown(); return 1; } else { FPRINTF(stderr, "Performance model file <%s> for bus benchmarking is valid\n", filename); } } // create performance model file for codelet char _codelet_dir[512]; snprintf(_codelet_dir, 512, "%s/codelets/%d", codelet_dir, _STARPU_PERFMODEL_VERSION); _starpu_mkpath_and_check(_codelet_dir, S_IRWXU); char codelet_model[1024]; snprintf(codelet_model, 1024, "%s/%s.%s", _codelet_dir, model_name, hostname); FILE *output = fopen(codelet_model, "w"); if (output == NULL) { FPRINTF(stderr, "Cannot create performance model file <%s> for codelet <%s>\n", codelet_model, model_name); starpu_shutdown(); return 1; } fprintf(output, "##################\n"); fprintf(output, "# Performance Model Version\n"); fprintf(output, "45\n"); fclose(output); char path[256]; starpu_perfmodel_get_model_path(model_name, path, 256); if (strlen(path) == 0) { FPRINTF(stderr, "Performance model file <%s> for codelet <%s> is not available\n", path, model_name); starpu_shutdown(); return 1; } else { if (strcmp(path, codelet_model) != 0) { FPRINTF(stderr, "Performance model file <%s> for codelet <%s> is not at expected location <%s>\n", path, model_name, codelet_model); starpu_shutdown(); return 1; } } FPRINTF(stderr, "Performance model file <%s> for codelet <%s> is valid\n", path, model_name); starpu_shutdown(); return 0; } int main(void) { char sampling_dir[256]; int global_ret = 0; int ret; unsetenv("STARPU_PERF_MODEL_DIR"); unsetenv("STARPU_PERF_MODEL_PATH"); _set_sampling_dir(sampling_dir, sizeof(sampling_dir)); { char perf_model_dir[512]; snprintf(perf_model_dir, 512, "%s/sampling", sampling_dir); setenv("STARPU_PERF_MODEL_DIR", perf_model_dir, 1); ret = do_test("STARPU_PERF_MODEL_DIR", perf_model_dir, perf_model_dir, "mymodel"); if (ret == STARPU_TEST_SKIPPED) return ret; global_ret += ret; unsetenv("STARPU_PERF_MODEL_DIR"); } char starpu_home[512]; { snprintf(starpu_home, 512, "%s/.starpu/sampling", sampling_dir); setenv("STARPU_HOME", sampling_dir, 1); ret = do_test("STARPU_HOME", starpu_home, starpu_home, "mymodel"); if (ret == STARPU_TEST_SKIPPED) return ret; global_ret += ret; } { char perf_model_path[512]; snprintf(perf_model_path, 512, "%s/sampling", sampling_dir); setenv("STARPU_PERF_MODEL_PATH", perf_model_path, 1); ret = do_test("STARPU_PERF_MODEL_PATH", starpu_home, perf_model_path, "mymodel2"); if (ret == STARPU_TEST_SKIPPED) return ret; global_ret += ret; } return global_ret; } #endif starpu-1.4.9+dfsg/tests/perfmodels/regression_based_check.c000066400000000000000000000143741507764646700241420ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include "../helper.h" /* * - Calibrate the linear model only for large sizes: STARTline 1048576 * - Separate the test_memset loop in two loops: * - linear: start from 1048576 * - non-linear: keep start at 1024 */ #define STARTlin 131072 #define START 1024 #ifdef STARPU_QUICK_CHECK #define END 1048576 #else #define END 16777216 #endif void memset_cpu(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; starpu_usleep(1000); for (i=0; icl = codelet; task->handles[0] = handle; int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_do_schedule(); starpu_data_unregister(handle); } static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task) { unsigned i; unsigned niter = 100; starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); struct starpu_task *tasks[niter]; for (i = 0; i < niter; i++) { struct starpu_task *task = starpu_task_create(); task->cl = codelet; task->handles[0] = handle; task->synchronous = 1; /* We will destroy the task structure by hand so that we can * query the profiling info before the task is destroyed. */ task->destroy = 0; tasks[i] = task; int ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_data_unregister(handle); starpu_task_wait_for_all(); double length_sum = 0.0; for (i = 0; i < niter; i++) { struct starpu_task *task = tasks[i]; struct starpu_profiling_task_info *info = task->profiling_info; /* How long was the task execution ? */ length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); /* We don't need the task structure anymore */ starpu_task_destroy(task); } /* Display the occupancy of all workers during the test */ unsigned worker; for (worker = 0; worker < starpu_worker_get_count(); worker++) { struct starpu_profiling_worker_info worker_info; int ret = starpu_profiling_worker_get_info(worker, &worker_info); STARPU_ASSERT(!ret); char workername[128]; starpu_worker_get_name(worker, workername, sizeof(workername)); unsigned nimpl; if (starpu_worker_get_type(worker)==STARPU_CPU_WORKER) { FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername); for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f\n", size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_sum)/niter)); } } } } int main(int argc, char **argv) { /* Enable profiling */ starpu_profiling_status_set(STARPU_PROFILING_ENABLE); struct starpu_conf conf; starpu_data_handle_t handle; int ret; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 2; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int size; for (size = STARTlin; size < END; size *= 2) { /* Use a linear regression */ test_memset(size, &memset_cl); } for (size = START; size < END; size *= 2) { /* Use a non-linear regression */ test_memset(size, &nl_memset_cl); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); /* Test Phase */ starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 0; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Now create a dummy task just to estimate its duration according to the regression */ size = 1234567; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); struct starpu_task *task = starpu_task_create(); task->cl = &memset_cl; task->handles[0] = handle; task->destroy = 0; FPRINTF(stdout, "\n ////linear regression results////\n"); compare_performance(size, &memset_cl, task); task->cl = &nl_memset_cl; FPRINTF(stdout, "\n ////non linear regression results////\n"); compare_performance(size, &nl_memset_cl, task); starpu_task_destroy(task); starpu_data_unregister(handle); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/regression_based_energy.c000066400000000000000000000163301507764646700243500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * A multi-implementation benchmark with dmda scheduler * we aim to test the energy model with the different size of gamma * for large size of gamma, dmda choose the second implementation which consumes less energy * otherwise, it choose the first implementation which minimizes the execution time */ #define STARTlin 131072 #define START 1024 #ifdef STARPU_QUICK_CHECK #define END 1048576 #else #define END 16777216 #endif // first implementation with an initial delay (100 us) void memset0_cpu(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; starpu_usleep(100); for (i=0; icl = codelet; task->handles[0] = handle; int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_do_schedule(); starpu_data_unregister(handle); } static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task) { unsigned i; unsigned niter = 100; starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); struct starpu_task *tasks[niter]; for (i = 0; i < niter; i++) { struct starpu_task *task = starpu_task_create(); task->cl = codelet; task->handles[0] = handle; task->synchronous = 1; /* We will destroy the task structure by hand so that we can * query the profiling info before the task is destroyed. */ task->destroy = 0; tasks[i] = task; int ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_data_unregister(handle); starpu_task_wait_for_all(); double length_sum = 0.0; for (i = 0; i < niter; i++) { struct starpu_task *task = tasks[i]; struct starpu_profiling_task_info *info = task->profiling_info; /* How long was the task execution ? */ length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); /* We don't need the task structure anymore */ starpu_task_destroy(task); } /* Display the occupancy of all workers during the test */ unsigned worker; for (worker = 0; worker < starpu_worker_get_count(); worker++) { struct starpu_profiling_worker_info worker_info; int ret = starpu_profiling_worker_get_info(worker, &worker_info); STARPU_ASSERT(!ret); char workername[128]; starpu_worker_get_name(worker, workername, sizeof(workername)); unsigned nimpl; if (starpu_worker_get_type(worker)==STARPU_CPU_WORKER) { FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername); for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f, Expected energy: %f\n", size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_sum)/niter), starpu_task_expected_energy(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl)); } } } } int main(int argc, char **argv) { /* Enable profiling */ starpu_profiling_status_set(STARPU_PROFILING_ENABLE); struct starpu_conf conf; starpu_data_handle_t handle; int ret; starpu_conf_init(&conf); conf.sched_policy_name = "dmda"; conf.calibrate = 2; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int size; for (size = STARTlin; size < END; size *= 2) { /* Use a linear regression */ test_memset(size, &memset_cl); } for (size = START; size < END; size *= 2) { /* Use a non-linear regression */ test_memset(size, &nl_memset_cl); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); /* Test Phase */ starpu_conf_init(&conf); conf.sched_policy_name = "dmda"; conf.calibrate = 0; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Now create a dummy task just to estimate its duration according to the regression */ size = 1234567; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); struct starpu_task *task = starpu_task_create(); task->handles[0] = handle; task->destroy = 0; FPRINTF(stdout, "\n ////non linear regression results////\n"); task->cl = &nl_memset_cl; compare_performance(size, &nl_memset_cl, task); starpu_task_destroy(task); starpu_data_unregister(handle); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/regression_based_gpu.c000066400000000000000000000241321507764646700236510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * A multi-implementation benchmark with dmda scheduler * we aim to test OPENCL workers and calculate the estimated time for each type of worker (CPU or OPENCL or CUDA) * dmda choose OPENCL workers for lage size (variable size of compare_performance) size=1234567 * dmda choose CPU workers for small size (size=1234) */ #define STARTlin 131072 #define START 1024 #ifdef STARPU_QUICK_CHECK #define END 1048576 #else #define END 16777216 #endif #ifdef STARPU_USE_CUDA static void memset_cuda(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_USE_OPENCL extern void memset0_opencl(void *buffers[], void *args); extern void memset_opencl(void *buffers[], void *args); #endif void memset0_cpu(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; //starpu_usleep(100); for (i = 0; i < n; i++) ptr[0] += i; } void memset_cpu(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); //starpu_usleep(10); memset(ptr, 42, n * sizeof(*ptr)); } static struct starpu_perfmodel model = { .type = STARPU_REGRESSION_BASED, .symbol = "memset_regression_based" }; static struct starpu_perfmodel nl_model = { .type = STARPU_NL_REGRESSION_BASED, .symbol = "non_linear_memset_regression_based" }; static struct starpu_codelet memset_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {memset_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {memset0_opencl, memset_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs = {memset0_cpu, memset_cpu}, .cpu_funcs_name = {"memset0_cpu", "memset_cpu"}, .model = &model, .nbuffers = 1, .modes = {STARPU_SCRATCH} }; static struct starpu_codelet nl_memset_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {memset_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {memset0_opencl, memset_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs = {memset0_cpu, memset_cpu}, .cpu_funcs_name = {"memset0_cpu", "memset_cpu"}, .model = &nl_model, .nbuffers = 1, .modes = {STARPU_SCRATCH} }; static void test_memset(int nelems, struct starpu_codelet *codelet) { #ifdef STARPU_QUICK_CHECK int nloops = 10; #else int nloops = 100; #endif int loop; starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int)); for (loop = 0; loop < nloops; loop++) { struct starpu_task *task = starpu_task_create(); task->cl = codelet; task->handles[0] = handle; int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_do_schedule(); starpu_data_unregister(handle); } static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task) { unsigned i; #ifdef STARPU_QUICK_CHECK unsigned niter = 10; #else unsigned niter = 100; #endif starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); struct starpu_task *tasks[niter]; for (i = 0; i < niter; i++) { struct starpu_task *task = starpu_task_create(); task->cl = codelet; task->handles[0] = handle; task->synchronous = 1; /* We will destroy the task structure by hand so that we can * query the profiling info before the task is destroyed. */ task->destroy = 0; tasks[i] = task; int ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_data_unregister(handle); starpu_task_wait_for_all(); double length_cpu_sum = 0.0; double length_gpu_sum = 0.0; enum starpu_worker_archtype archi; for (i = 0; i < niter; i++) { struct starpu_task *task = tasks[i]; struct starpu_profiling_task_info *info = task->profiling_info; //archi=starpu_worker_get_type(0); archi=starpu_worker_get_type(info->workerid); switch (archi) { case STARPU_CPU_WORKER: FPRINTF(stdout, "cpuuu\n"); /* How long was the task execution ? */ length_cpu_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); break; case STARPU_OPENCL_WORKER: FPRINTF(stdout, "openclllllll\n"); /* How long was the task execution ? */ length_gpu_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); break; case STARPU_CUDA_WORKER: FPRINTF(stdout, "cudaaaaaa\n"); /* How long was the task execution ? */ length_gpu_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); break; default: FPRINTF(stdout, "unsupported!\n"); break; } /* We don't need the task structure anymore */ starpu_task_destroy(task); } unsigned worker; /* Display the occupancy of all workers during the test */ unsigned ncpus = starpu_cpu_worker_get_count(); unsigned ngpus = starpu_opencl_worker_get_count()+starpu_cuda_worker_get_count(); //unsigned ncpu= starpu_worker_get_count_by_type(STARPU_CPU_WORKER); FPRINTF(stderr, "ncpus %u \n", ncpus); FPRINTF(stderr, "ngpus %u \n", ngpus); for (worker= 0; worker< starpu_worker_get_count(); worker++) { struct starpu_profiling_worker_info worker_info; int ret = starpu_profiling_worker_get_info(worker, &worker_info); STARPU_ASSERT(!ret); char workername[128]; starpu_worker_get_name(worker, workername, sizeof(workername)); unsigned nimpl; FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername); for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { switch (starpu_worker_get_type(worker)) { case STARPU_CPU_WORKER: FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f \n", size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_cpu_sum)/niter)); break; case STARPU_OPENCL_WORKER: FPRINTF(stdout, "Expectedd time for %d on %s (impl %u): %f, Measuredd time: %f \n", size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_gpu_sum)/niter)); break; case STARPU_CUDA_WORKER: FPRINTF(stdout, "Expectedd time for %d on %s (impl %u): %f, Measuredd time: %f \n", size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_gpu_sum)/niter)); break; default: FPRINTF(stdout, "unsupported!\n"); break; } } } } #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; #endif int main(int argc, char **argv) { /* Enable profiling */ starpu_profiling_status_set(STARPU_PROFILING_ENABLE); struct starpu_conf conf; starpu_data_handle_t handle; int ret; starpu_conf_init(&conf); conf.sched_policy_name = "dmda"; conf.calibrate = 2; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif int size; for (size = STARTlin; size < END; size *= 2) { /* Use a linear regression */ test_memset(size, &memset_cl); } for (size = START*1.5; size < END; size *= 2) { /* Use a non-linear regression */ test_memset(size, &nl_memset_cl); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); /* Test Phase */ starpu_conf_init(&conf); conf.sched_policy_name = "dmda"; conf.calibrate = 0; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif /* Now create a dummy task just to estimate its duration according to the regression */ size = 1234567; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); struct starpu_task *task = starpu_task_create(); task->handles[0] = handle; task->destroy = 0; //FPRINTF(stdout, "\n ////linear regression results////\n"); //task->cl = &memset_cl; //compare_performance(size, &memset_cl, task); FPRINTF(stdout, "\n ////non linear regression results////\n"); task->cl = &nl_memset_cl; compare_performance(size, &nl_memset_cl, task); starpu_task_destroy(task); starpu_data_unregister(handle); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/regression_based_memset.c000066400000000000000000000231271507764646700243530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); return(retval); } /* * Benchmark memset with a linear and non-linear regression */ #define STARTlin 1024 #define START 1024 #ifdef STARPU_QUICK_CHECK #define END 1048576 #define NENERGY 3 #else #define END 16777216 #define NENERGY 100 #endif #ifdef STARPU_USE_CUDA static void memset_cuda(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); cudaMemsetAsync(ptr, 42, n * sizeof(*ptr), starpu_cuda_get_local_stream()); } #endif #ifdef STARPU_USE_OPENCL extern void memset_opencl(void *buffers[], void *args); #endif void memset0_cpu(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; for (i = 0; i < n; i++) ptr[i] = 42; } void memset_cpu(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); starpu_usleep(10); memset(ptr, 42, n * sizeof(*ptr)); } static struct starpu_perfmodel model = { .type = STARPU_REGRESSION_BASED, .symbol = "memset_regression_based" }; static struct starpu_perfmodel nl_model = { .type = STARPU_NL_REGRESSION_BASED, .symbol = "non_linear_memset_regression_based" }; static struct starpu_perfmodel energy_model = { .type = STARPU_REGRESSION_BASED, .symbol = "memset_regression_based_energy" }; static struct starpu_perfmodel nl_energy_model = { .type = STARPU_NL_REGRESSION_BASED, .symbol = "non_linear_memset_regression_based_energy" }; static struct starpu_codelet memset_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {memset_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {memset_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs = {memset0_cpu, memset_cpu}, .cpu_funcs_name = {"memset0_cpu", "memset_cpu"}, .model = &model, .energy_model = &energy_model, .nbuffers = 1, .modes = {STARPU_SCRATCH} }; static struct starpu_codelet nl_memset_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {memset_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {memset_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs = {memset0_cpu, memset_cpu}, .cpu_funcs_name = {"memset0_cpu", "memset_cpu"}, .model = &nl_model, .energy_model = &nl_energy_model, .nbuffers = 1, .modes = {STARPU_SCRATCH} }; static void test_memset(int nelems, struct starpu_codelet *codelet) { int nloops = 100; int loop; starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int)); for (loop = 0; loop < nloops; loop++) { struct starpu_task *task = starpu_task_create(); task->cl = codelet; task->handles[0] = handle; int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_do_schedule(); starpu_data_unregister(handle); } static int test_memset_energy(int nelems, int workerid, int where, enum starpu_worker_archtype archtype, int impl, struct starpu_codelet *codelet) { (void)impl; int nloops; int loop; starpu_data_handle_t handle; nloops = NENERGY; if (workerid == -1) nloops *= starpu_worker_get_count_by_type(archtype); starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int)); for (loop = 0; loop < nloops; loop++) { struct starpu_task *task = starpu_task_create(); task->cl = codelet; task->where = where; task->handles[0] = handle; task->flops = nelems; if (workerid != -1) { task->execute_on_a_specific_worker = 1; task->workerid = workerid; } int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_do_schedule(); starpu_data_unregister(handle); return nloops; } static int bench_energy(int workerid, int where, enum starpu_worker_archtype archtype, int impl, struct starpu_codelet *codelet) { int size; int retval; int ntasks; for (size = STARTlin; size < END; size *= 2) { starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); if ((retval = starpu_energy_start(workerid, archtype)) != 0) { starpu_data_unregister(handle); _STARPU_DISP("Energy measurement not supported for archtype %s\n", starpu_perfmodel_get_archtype_name(archtype)); return -1; } /* Use a linear regression */ ntasks = test_memset_energy(size, workerid, where, archtype, impl, codelet); struct starpu_task *task = starpu_task_create(); task->cl = codelet; task->handles[0] = handle; task->synchronous = 1; task->destroy = 0; task->flops = size; retval = starpu_energy_stop(codelet->energy_model, task, impl, ntasks, workerid, archtype); starpu_task_destroy (task); starpu_data_unregister(handle); if (retval != 0) ERROR_RETURN(retval); } return 0; } static void show_task_perfs(int size, struct starpu_task *task) { unsigned workerid; for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { char name[32]; starpu_worker_get_name(workerid, name, sizeof(name)); unsigned nimpl; for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { FPRINTF(stdout, "Expected time for %d on %s (impl %u):\t%f\n", size, name, nimpl, starpu_task_expected_length(task, starpu_worker_get_perf_archtype(workerid, task->sched_ctx), nimpl)); } } } #ifdef STARPU_USE_OPENCL struct starpu_opencl_program opencl_program; #endif int main(int argc, char **argv) { struct starpu_conf conf; starpu_data_handle_t handle; int ret; unsigned i; starpu_conf_init(&conf); conf.sched_policy_name = "dmda"; conf.calibrate = 2; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif int size; for (size = STARTlin; size < END; size *= 2) { /* Use a linear regression */ test_memset(size, &memset_cl); } for (size = START; size < END; size *= 2) { /* Use a non-linear regression */ test_memset(size, &nl_memset_cl); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* Now create a dummy task just to estimate its duration according to the regression */ size = 12345; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); struct starpu_task *task = starpu_task_create(); task->cl = &memset_cl; task->handles[0] = handle; task->destroy = 0; show_task_perfs(size, task); task->cl = &nl_memset_cl; show_task_perfs(size, task); starpu_task_destroy(task); starpu_data_unregister(handle); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); starpu_conf_init(&conf); /* Use a scheduler which doesn't choose the implementation */ #ifdef STARPU_HAVE_UNSETENV unsetenv("STARPU_SCHED"); #endif conf.sched_policy_name = "eager"; conf.calibrate = 1; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/perfmodels/opencl_memset_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif if (starpu_cpu_worker_get_count() > 0) { memset_cl.cpu_funcs[1] = NULL; bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &memset_cl); #ifdef STARPU_HAVE_UNSETENV memset_cl.cpu_funcs[1] = memset_cpu; memset_cl.cpu_funcs[0] = NULL; bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &memset_cl); #endif nl_memset_cl.cpu_funcs[1] = NULL; bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 0, &nl_memset_cl); #ifdef STARPU_HAVE_UNSETENV nl_memset_cl.cpu_funcs[1] = memset_cpu; nl_memset_cl.cpu_funcs[0] = NULL; bench_energy(-1, STARPU_CPU, STARPU_CPU_WORKER, 1, &nl_memset_cl); #endif } for (i = 0; i < starpu_cuda_worker_get_count(); i++) { int workerid = starpu_worker_get_by_type(STARPU_CUDA_WORKER, i); bench_energy(workerid, STARPU_CUDA, STARPU_CUDA_WORKER, 0, &memset_cl); bench_energy(workerid, STARPU_CUDA, STARPU_CUDA_WORKER, 0, &nl_memset_cl); } #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/regression_based_multiimpl.c000066400000000000000000000161721507764646700250770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * A multi-implementation benchmark with dmda scheduler * we aim to test the dmda behavior when we have two implementations * dmda choose the implementation which minimises the execution time */ #define STARTlin 131072 #define START 1024 #ifdef STARPU_QUICK_CHECK #define END 1048576 #else #define END 16777216 #endif // first implementation with an initial delay (100 us) void memset0_cpu(void *descr[], void *arg) { (void)arg; STARPU_SKIP_IF_VALGRIND; unsigned *ptr = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]); unsigned n = STARPU_VECTOR_GET_NX(descr[0]); unsigned i; starpu_usleep(100); for (i=0; icl = codelet; task->handles[0] = handle; int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_do_schedule(); starpu_data_unregister(handle); } static void compare_performance(int size, struct starpu_codelet *codelet, struct starpu_task *compar_task) { unsigned i; unsigned niter = 100; starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); struct starpu_task *tasks[niter]; for (i = 0; i < niter; i++) { struct starpu_task *task = starpu_task_create(); task->cl = codelet; task->handles[0] = handle; task->synchronous = 1; /* We will destroy the task structure by hand so that we can * query the profiling info before the task is destroyed. */ task->destroy = 0; tasks[i] = task; int ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_data_unregister(handle); starpu_task_wait_for_all(); double length_sum = 0.0; for (i = 0; i < niter; i++) { struct starpu_task *task = tasks[i]; struct starpu_profiling_task_info *info = task->profiling_info; /* How long was the task execution ? */ length_sum += starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); /* We don't need the task structure anymore */ starpu_task_destroy(task); } /* Display the occupancy of all workers during the test */ unsigned worker; for (worker = 0; worker < starpu_worker_get_count(); worker++) { struct starpu_profiling_worker_info worker_info; int ret = starpu_profiling_worker_get_info(worker, &worker_info); STARPU_ASSERT(!ret); char workername[128]; starpu_worker_get_name(worker, workername, sizeof(workername)); unsigned nimpl; if (starpu_worker_get_type(worker)==STARPU_CPU_WORKER) { FPRINTF(stdout, "\n Worker :%s ::::::::::\n\n", workername); for (nimpl = 0; nimpl < STARPU_MAXIMPLEMENTATIONS; nimpl++) { FPRINTF(stdout, "Expected time for %d on %s (impl %u): %f, Measured time: %f, Expected energy: %f\n", size, workername, nimpl,starpu_task_expected_length(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl), ((length_sum)/niter), starpu_task_expected_energy(compar_task, starpu_worker_get_perf_archtype(worker, compar_task->sched_ctx), nimpl)); } } } } int main(int argc, char **argv) { /* Enable profiling */ starpu_profiling_status_set(STARPU_PROFILING_ENABLE); struct starpu_conf conf; starpu_data_handle_t handle; int ret; starpu_conf_init(&conf); conf.sched_policy_name = "dmda"; conf.calibrate = 2; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int size; #if 0 for (size = STARTlin; size < END; size *= 2) { /* Use a linear regression */ test_memset(size, &memset_cl); } #endif for (size = START; size < END; size *= 2) { /* Use a non-linear regression */ test_memset(size, &nl_memset_cl); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); /* Test Phase */ starpu_conf_init(&conf); conf.sched_policy_name = "dmda"; conf.calibrate = 0; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Now create a dummy task just to estimate its duration according to the regression */ size = 1234567; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, size, sizeof(int)); struct starpu_task *task = starpu_task_create(); task->handles[0] = handle; task->destroy = 0; FPRINTF(stdout, "\n ////non linear regression results////\n"); task->cl = &nl_memset_cl; compare_performance(size, &nl_memset_cl, task); starpu_task_destroy(task); starpu_data_unregister(handle); starpu_shutdown(); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/user_base.c000066400000000000000000000063651507764646700214400ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Test using a user-provided base for the perfmodel */ void func(void *descr[], void *arg) { (void)descr; (void)arg; starpu_usleep(1000); } size_t get_size_base(struct starpu_task *task, unsigned nimpl) { (void)task; (void)nimpl; return 3; }; uint32_t get_footprint(struct starpu_task *task) { uint32_t orig = starpu_task_data_footprint(task); return starpu_hash_crc32c_be(42, orig); }; static struct starpu_perfmodel rb_model = { .type = STARPU_REGRESSION_BASED, .symbol = "user_base_valid_model_regression_based", .size_base = get_size_base, }; static struct starpu_perfmodel nlrb_model = { .type = STARPU_NL_REGRESSION_BASED, .symbol = "user_base_valid_model_non_linear_regression_based", .size_base = get_size_base, }; static struct starpu_perfmodel hb_model = { .type = STARPU_HISTORY_BASED, .symbol = "user_base_valid_model_history_based", .size_base = get_size_base, }; static struct starpu_perfmodel hb_model_foot = { .type = STARPU_HISTORY_BASED, .symbol = "user_base_valid_model_history_based_footprint", .footprint = get_footprint, }; static struct starpu_codelet mycodelet = { .cuda_funcs = {func}, .opencl_funcs = {func}, .cpu_funcs = {func}, .cpu_funcs_name = {"func"}, .nbuffers = 1, .modes = {STARPU_W} }; static int submit(struct starpu_codelet *codelet, struct starpu_perfmodel *model) { int nloops = 123; int loop; starpu_data_handle_t handle; int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); codelet->model = model; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, 100, sizeof(int)); for (loop = 0; loop < nloops; loop++) { ret = starpu_task_insert(codelet, STARPU_W, handle, 0); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(handle); starpu_shutdown(); return EXIT_SUCCESS; } int main(void) { int ret; /* Use a linear regression model */ ret = submit(&mycodelet, &rb_model); if (ret) return ret; /* Use a non-linear regression model */ ret = submit(&mycodelet, &nlrb_model); if (ret) return ret; /* Use a history model */ ret = submit(&mycodelet, &hb_model); if (ret) return ret; /* Use a history model with footprints*/ ret = submit(&mycodelet, &hb_model_foot); if (ret) return ret; return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/valid_model.c000066400000000000000000000112401507764646700217330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Check that measurements get recorded in the performance model */ void func(void *descr[], void *arg) { (void)descr; (void)arg; starpu_usleep(1000); } static struct starpu_perfmodel rb_model = { .type = STARPU_REGRESSION_BASED, .symbol = "valid_model_regression_based" }; static struct starpu_perfmodel nlrb_model = { .type = STARPU_NL_REGRESSION_BASED, .symbol = "valid_model_non_linear_regression_based" }; #if 0 static struct starpu_perfmodel hb_model = { .type = STARPU_HISTORY_BASED, .symbol = "valid_model_history_based" }; #endif static struct starpu_codelet mycodelet = { .cuda_funcs = {func}, .opencl_funcs = {func}, .cpu_funcs = {func}, .cpu_funcs_name = {"func"}, .nbuffers = 1, .modes = {STARPU_W} }; static int submit(struct starpu_codelet *codelet, struct starpu_perfmodel *model) { int nloops = 123; int loop; starpu_data_handle_t handle; struct starpu_perfmodel lmodel; int ret; int old_nsamples, new_nsamples; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); codelet->model = model; old_nsamples = 0; memset(&lmodel, 0, sizeof(struct starpu_perfmodel)); lmodel.type = model->type; ret = starpu_perfmodel_load_symbol(codelet->model->symbol, &lmodel); if (ret != 1) { int i, impl; for(i = 0; i < lmodel.state->ncombs; i++) { int comb = lmodel.state->combs[i]; for(impl = 0; impl < lmodel.state->nimpls[comb]; impl++) old_nsamples += lmodel.state->per_arch[comb][impl].regression.nsample; } } starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, 100, sizeof(int)); for (loop = 0; loop < nloops; loop++) { ret = starpu_task_insert(codelet, STARPU_W, handle, 0); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(handle); starpu_perfmodel_unload_model(&lmodel); starpu_shutdown(); // To force dumping perf models on disk // We need to call starpu_init again to initialise values used by perfmodels ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); char path[256]; starpu_perfmodel_get_model_path(codelet->model->symbol, path, 256); FPRINTF(stderr, "Perfmodel File <%s>\n", path); ret = starpu_perfmodel_load_file(path, &lmodel); if (ret == 1) { FPRINTF(stderr, "The performance model for the symbol <%s> could not be loaded\n", codelet->model->symbol); starpu_shutdown(); return 1; } else { int i; new_nsamples = 0; for(i = 0; i < lmodel.state->ncombs; i++) { int comb = lmodel.state->combs[i]; int impl; for(impl = 0; impl < lmodel.state->nimpls[comb]; impl++) new_nsamples += lmodel.state->per_arch[comb][impl].regression.nsample; } } ret = starpu_perfmodel_unload_model(&lmodel); starpu_shutdown(); if (ret == 1) { FPRINTF(stderr, "The performance model for the symbol <%s> could not be UNloaded\n", codelet->model->symbol); return 1; } if (old_nsamples + nloops == new_nsamples) { FPRINTF(stderr, "Sampling for <%s> OK %d + %d == %d\n", codelet->model->symbol, old_nsamples, nloops, new_nsamples); return EXIT_SUCCESS; } else { FPRINTF(stderr, "Sampling for <%s> failed %d + %d != %d\n", codelet->model->symbol, old_nsamples, nloops, new_nsamples); return EXIT_FAILURE; } } int main(void) { int ret; /* Use a linear regression model */ ret = submit(&mycodelet, &rb_model); if (ret) return ret; /* Use a non-linear regression model */ ret = submit(&mycodelet, &nlrb_model); if (ret) return ret; #ifdef STARPU_DEVEL # warning history based model cannot be validated with regression.nsample #endif #if 0 /* Use a history model */ ret = submit(&mycodelet, &hb_model); if (ret) return ret; #endif return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/perfmodels/value_nan.c000066400000000000000000000050621507764646700214310ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include #ifdef STARPU_HAVE_WINDOWS #include #include #endif /* * Test that _starpu_write_double and _starpu_read_double properly manipulate * NaN values */ #define STRING "booh" static int _check_number(double val, int checknan) { char *tmp = "starpu_XXXXXX"; char filename[100]; strcpy(filename, tmp); #ifdef STARPU_HAVE_WINDOWS _mktemp(filename); #else { int id = mkstemp(filename); /* fail */ if (id < 0) { FPRINTF(stderr, "Error when creating temp file\n"); return 1; } } #endif /* write the double value in the file followed by a predefined string */ FILE *f = fopen(filename, "w"); if (!f) { FPRINTF(stderr, "Error when opening file %s\n", filename); return 1; } // A double is written with the format %e ... _starpu_write_double(f, "%e", val); fprintf(f, " %s\n", STRING); fclose(f); /* read the double value and the string back from the file */ f = fopen(filename, "r"); if (!f) { FPRINTF(stderr, "Error when opening file %s\n", filename); return 1; } double lat; char str[10]; // ... but is read with the format %le int x = _starpu_read_double(f, "%le", &lat); int y = fscanf(f, " %9s", str); fclose(f); unlink(filename); /* check that what has been read is identical to what has been written */ int pass; pass = (x == 1) && (y == 1); pass = pass && strcmp(str, STRING) == 0; if (checknan) pass = pass && isnan(val) && isnan(lat); else pass = pass && (int)lat == (int)val; return pass?0:1; } int main(void) { int ret1, ret2; double nanvalue = nan(""); ret1 = _check_number(42.0, 0); FPRINTF(stderr, "%s when reading %e\n", ret1==0?"Success":"Error", 42.0); ret2 = _check_number(nanvalue, 1); FPRINTF(stderr, "%s when reading %e\n", ret2==0?"Success":"Error", nanvalue); return ret1+ret2; } starpu-1.4.9+dfsg/tests/regression/000077500000000000000000000000001507764646700173325ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/regression/profiles.build.only.in000066400000000000000000000017531507764646700235710ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Build configuration # Build configuration --enable-coverage # Build configuration --disable-opencl # Build configuration --disable-cuda # Build configuration --disable-cuda --disable-opencl # Build configuration --enable-cuda --disable-opencl # Build configuration --disable-cuda --enable-opencl # Build configuration --with-fxt starpu-1.4.9+dfsg/tests/regression/profiles.in000066400000000000000000000031331507764646700215050ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Build configuration --enable-coverage # Execution configuration # Build configuration --enable-debug --enable-verbose # Execution configuration STARPU_LOGFILENAME=/tmp/starpu_run.log # # Build configuration # Execution configuration STARPU_NCUDA=0 # Execution configuration STARPU_NCUDA=1 # Execution configuration STARPU_SCHED=ws # Execution configuration STARPU_SCHED=lws # Execution configuration STARPU_SCHED=prio # Execution configuration STARPU_SCHED=no-prio # Execution configuration STARPU_SCHED=dm # Execution configuration STARPU_SCHED=dmda # Execution configuration STARPU_SCHED=random # Execution configuration STARPU_SCHED=eager # Execution configuration STARPU_SCHED=dmda STARPU_SCHED_ALPHA=10 STARPU_SCHED_BETA=15 # Execution configuration STARPU_CALIBRATE=1 # Execution configuration STARPU_PREFETCH=1 # # Build configuration --disable-cuda # Execution configuration # # Build configuration --disable-opencl # Execution configuration starpu-1.4.9+dfsg/tests/regression/regression.sh.in000077500000000000000000000073571507764646700224720ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # WORKDIR=`mktemp -d` SRCDIR=@STARPU_SRC_DIR@ MAKE="${MAKE:-make -j3}" ################################################## # arg: returned status from the previous command check_exec() { PROFILE=$PROFILE_NUM if [ $SUBPROFILE_NUM -ne 0 ] ; then PROFILE="${PROFILE}.${SUBPROFILE_NUM}" fi if [ $1 -eq 0 ]; then echo "PASS: Profile $PROFILE" else echo "FAIL: Profile $PROFILE" if [ ${ABORT_ON_ERROR} -eq 1 ]; then echo "Aborting ..." exit 1; fi fi } do_build() { PROFILE_NUM=`expr ${PROFILE_NUM} + 1` echo ">>> Build configuration ${PROFILE_NUM}: <$@>" rm -rf ${WORKDIR}/build/* cd ${WORKDIR}/build ${SRCDIR}/configure "$@" > $WORKDIR/logs/profile.${PROFILE_NUM} 2>&1 cd - code_build=$? if [ $code_build -ne 0 ]; then check_exec $code_build else ${MAKE} -C ${WORKDIR}/build >> $WORKDIR/logs/profile.${PROFILE_NUM} 2>&1 code_build=$? check_exec $code_build fi } do_test() { SUBPROFILE_NUM=`expr ${SUBPROFILE_NUM} + 1` echo ">>>> Execution configuration ${PROFILE_NUM}.${SUBPROFILE_NUM} : <$@>" ( export $* ; ${MAKE} -C ${WORKDIR}/build check ) > $WORKDIR/logs/profile.${PROFILE_NUM}.${SUBPROFILE_NUM} 2>&1 code_check=$? check_exec $code_check if [ $code_check -ne 0 ] ; then grep FAIL: $WORKDIR/logs/profile.${PROFILE_NUM}.${SUBPROFILE_NUM} fi coverage=$(find ${WORKDIR}/build -name "*.gcda" 2>/dev/null) if [ -n "$coverage" ] ; then lcov -c -d ${WORKDIR}/build -o ${WORKDIR}/cov/profile_${PROFILE_NUM}.${SUBPROFILE_NUM}.lcov >> $WORKDIR/logs/profile.${PROFILE_NUM}.${SUBPROFILE_NUM} 2>&1 fi } ################################################## ABORT_ON_ERROR=0 while [ $# -ne 0 ]; do case $1 in --abort-on-error) ABORT_ON_ERROR=1 shift ;; --help) echo echo "Error. Syntax $0 [ --abort-on-error ] " echo exit 0 ;; *) break ;; esac done if [ -z "$1" ] ; then echo "Error. Syntax $0 [ --abort-on-error ] " exit 0 fi ################################################# ## Create and jump to the workdir mkdir ${WORKDIR}/build ; mkdir ${WORKDIR}/cov ; mkdir ${WORKDIR}/html ; mkdir ${WORKDIR}/logs PROFILE_NUM=0 code_build=1 for file in $* ; do ( while read line ; do if [ "$line" == "# Build configuration" ] ; then read line SUBPROFILE_NUM=0 do_build $line elif [ "$line" == "# Execution configuration" ] ; then read line if [ $code_build -eq 0 ] ; then do_test $line fi fi done ) < $file done echo $WORKDIR ### End of script coverage=$(ls ${WORKDIR}/cov/*.lcov 2>/dev/null) if [ -n "${coverage}" ] ; then genhtml --function-coverage --legend ${WORKDIR}/cov/*.lcov -o ${WORKDIR}/html -t "StarPU coverage test results" > ${WORKDIR}/logs/genhtml.log echo "The coverage report is located at : ${WORKDIR}/html" fi echo "Tests done" starpu-1.4.9+dfsg/tests/sched_ctx/000077500000000000000000000000001507764646700171165ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/sched_ctx/sched_ctx_hierarchy.c000066400000000000000000000126071507764646700232720ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" struct starpu_codelet mycodelet_bis; void func_cpu_bis(void *descr[], void *_args) { (void)descr; char msg; char worker_name[256]; int worker_id = starpu_worker_get_id_check(); int worker_id_expected; int ntasks; starpu_worker_get_name(worker_id, worker_name, 256); starpu_codelet_unpack_args(_args, &msg, &ntasks, &worker_id_expected); STARPU_ASSERT(worker_id == worker_id_expected); FPRINTF(stderr, "[msg '%c'] [worker id %d] [worker name %s] [tasks %d]\n", msg, worker_id, worker_name, ntasks); if (ntasks > 0) { int ret; int nntasks = ntasks - 1; ret = starpu_task_insert(&mycodelet_bis, STARPU_VALUE, &msg, sizeof(msg), STARPU_VALUE, &nntasks, sizeof(ntasks), STARPU_VALUE, &worker_id, sizeof(worker_id), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } } void func_cpu(void *descr[], void *_args) { (void)descr; char msg; char worker_name[256]; int worker_id = starpu_worker_get_id_check(); int worker_id_expected; int ntasks; unsigned sched_ctx_id; unsigned *sched_ctx_id_p; starpu_worker_get_name(worker_id, worker_name, 256); starpu_codelet_unpack_args(_args, &msg, &ntasks, &sched_ctx_id, &worker_id_expected, &sched_ctx_id_p); STARPU_ASSERT(worker_id == worker_id_expected); *sched_ctx_id_p = sched_ctx_id; starpu_sched_ctx_set_context(sched_ctx_id_p); FPRINTF(stderr, "[msg '%c'] [worker id %d] [worker name %s] [sched_ctx_id %u] [tasks %d] [buffer %p]\n", msg, worker_id, worker_name, sched_ctx_id, ntasks, sched_ctx_id_p); if (ntasks > 0) { int ret; int nntasks = ntasks - 1; ret = starpu_task_insert(&mycodelet_bis, STARPU_VALUE, &msg, sizeof(msg), STARPU_VALUE, &nntasks, sizeof(nntasks), STARPU_VALUE, &worker_id, sizeof(worker_id), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } } struct starpu_codelet mycodelet_bis = { .cpu_funcs = {func_cpu_bis}, .cpu_funcs_name = {"func_cpu_bis"}, }; struct starpu_codelet mycodelet = { .cpu_funcs = {func_cpu}, .cpu_funcs_name = {"func_cpu"}, }; int main(void) { int i, ret; int nprocs, nprocs_per_context=1; int procs[STARPU_NMAXWORKERS]; int ntasks=10; char msg[3] = "ab"; unsigned *buffer[2]; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); nprocs = starpu_cpu_worker_get_count(); if (nprocs < 2) goto enodev; nprocs_per_context = 1; FPRINTF(stderr, "# Workers = %d -> %d worker for each sched context\n", nprocs, nprocs_per_context); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); unsigned sched_ctx_0 = starpu_sched_ctx_create(procs, nprocs_per_context, "ctx_0", 0); unsigned sched_ctx_1 = starpu_sched_ctx_create(&procs[nprocs_per_context], nprocs_per_context, "ctx_1", 0); if (!getenv("STARPU_SSILENT")) { char name0[256]; char name1[256]; starpu_worker_get_name(procs[0], name0, 256); starpu_worker_get_name(procs[1], name1, 256); FPRINTF(stderr, "Creating first sched_ctx with %d worker [id %d name %s]\n", nprocs_per_context, procs[0], name0); FPRINTF(stderr, "Creating second sched_ctx with %d worker [id %d name %s]\n", nprocs_per_context, procs[1], name1); starpu_sched_ctx_display_workers(sched_ctx_0, stderr); starpu_sched_ctx_display_workers(sched_ctx_1, stderr); } buffer[0] = malloc(sizeof(unsigned)); buffer[1] = malloc(sizeof(unsigned)); FPRINTF(stderr, "allocating %p and %p\n", buffer[0], buffer[1]); ret = starpu_task_insert(&mycodelet, STARPU_SCHED_CTX, sched_ctx_0, STARPU_VALUE, &msg[0], sizeof(msg[0]), STARPU_VALUE, &ntasks, sizeof(ntasks), STARPU_VALUE, &sched_ctx_0, sizeof(sched_ctx_0), STARPU_VALUE, &procs[0], sizeof(procs[0]), STARPU_VALUE, &buffer[0], sizeof(buffer[0]), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&mycodelet, STARPU_SCHED_CTX, sched_ctx_1, STARPU_VALUE, &msg[1], sizeof(msg[1]), STARPU_VALUE, &ntasks, sizeof(ntasks), STARPU_VALUE, &sched_ctx_1, sizeof(sched_ctx_1), STARPU_VALUE, &procs[1], sizeof(procs[1]), STARPU_VALUE, &buffer[1], sizeof(buffer[1]), 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); starpu_sched_ctx_delete(sched_ctx_0); starpu_sched_ctx_delete(sched_ctx_1); starpu_shutdown(); free(buffer[0]); free(buffer[1]); return 0; enodev: starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; } starpu-1.4.9+dfsg/tests/sched_ctx/sched_ctx_list.c000066400000000000000000000167061507764646700222730ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "../../src/core/sched_ctx_list.h" int main(void) { struct _starpu_sched_ctx_list *ctx_list = NULL, *found_list; struct _starpu_sched_ctx_elt *elt; struct _starpu_sched_ctx_list_iterator it; int ret=1, global=1; /* Check prio list addition */ ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 50, 0) != NULL); ret &= (ctx_list->priority == 50); ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 999, 2) != NULL); ret &= (ctx_list->priority == 999); ret &= (ctx_list->next->priority == 50); ret &= !_starpu_sched_ctx_list_add(&ctx_list, 1); ret &= (ctx_list->next->next->priority == 0); /* Check elements added */ ret &= (ctx_list->head->sched_ctx == 2); ret &= (ctx_list->next->head->sched_ctx == 0); ret &= (ctx_list->next->next->head->sched_ctx == 1); /* Check singleton status */ ret &= (ctx_list->next->head->prev->sched_ctx == 0); ret &= (ctx_list->next->head->next->sched_ctx == 0); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_add"); /* Check addition to existing list */ ret = 1; _starpu_sched_ctx_elt_add(ctx_list->next, 3); ret &= (ctx_list->next->head->next->sched_ctx == 3); ret &= (ctx_list->next->head->prev->sched_ctx == 3); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_elt_add"); /* Find element */ ret = 1; elt = _starpu_sched_ctx_elt_find(ctx_list, 3); ret &= (elt != NULL && elt->sched_ctx == 3); elt = _starpu_sched_ctx_elt_find(ctx_list, 5); ret &= (elt == NULL); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_elt_find"); /* Find list */ ret = 1; found_list = _starpu_sched_ctx_list_find(ctx_list, 0); ret &= (found_list->priority == 0); ret &= (found_list->prev->priority == 50); found_list = _starpu_sched_ctx_list_find(ctx_list, 999); ret &= (found_list->priority==999); found_list = _starpu_sched_ctx_list_find(ctx_list, 42); ret &= (found_list == NULL); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_find"); /* List exists */ ret = 1; ret &= _starpu_sched_ctx_list_exists(ctx_list, 999); ret &= _starpu_sched_ctx_list_exists(ctx_list, 50); ret &= _starpu_sched_ctx_list_exists(ctx_list, 0); ret &= !_starpu_sched_ctx_list_exists(ctx_list, 42); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_exists"); /* Iterator */ ret = 1; ret &= !_starpu_sched_ctx_list_iterator_init(ctx_list, &it); ret &= _starpu_sched_ctx_list_iterator_has_next(&it); elt = _starpu_sched_ctx_list_iterator_get_next(&it); ret &= (elt->sched_ctx == 2); ret &= _starpu_sched_ctx_list_iterator_has_next(&it); elt = _starpu_sched_ctx_list_iterator_get_next(&it); ret &= (elt->sched_ctx == 0); ret &= _starpu_sched_ctx_list_iterator_has_next(&it); elt = _starpu_sched_ctx_list_iterator_get_next(&it); ret &= (elt->sched_ctx == 3); ret &= _starpu_sched_ctx_list_iterator_has_next(&it); elt = _starpu_sched_ctx_list_iterator_get_next(&it); ret &= (elt->sched_ctx == 1); ret &= !_starpu_sched_ctx_list_iterator_has_next(&it); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_iterator"); /* Add element before head */ ret = 1; _starpu_sched_ctx_elt_add_before(ctx_list->next, 4); ret &= (ctx_list->next->head->prev->sched_ctx == 4); ret &= (ctx_list->next->head->next->next->sched_ctx == 4); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_elt_add_before"); /* Let's move it */ ret = 1; ret &= !_starpu_sched_ctx_list_move(&ctx_list, 4, 1002); ret &= (ctx_list->priority == 1002); ret &= (ctx_list->head->sched_ctx == 4); ret &= (ctx_list->head->next->sched_ctx == 4); ret &= (ctx_list->next->next->head->prev->sched_ctx != 4); STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_move"); /* Let's remove it */ ret = 1; elt = _starpu_sched_ctx_elt_find(ctx_list, 4); _starpu_sched_ctx_list_remove_elt(&ctx_list, elt); //ret &= (elt == NULL); ret &= (_starpu_sched_ctx_elt_find(ctx_list, 4) == NULL); ret &= (ctx_list->next->head->next->sched_ctx == 3); ret &= (ctx_list->next->head->prev->sched_ctx == 3); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_elt_remove"); /* Let's remove head of that same ctx */ ret = 1; ret &= !_starpu_sched_ctx_list_remove(&ctx_list, 0); ret &= (_starpu_sched_ctx_elt_find(ctx_list, 0) == NULL); ret &= (ctx_list->next->head->sched_ctx == 3); ret &= (ctx_list->next->head->next->sched_ctx == 3); ret &= (ctx_list->next->head->prev->sched_ctx == 3); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_remove"); /* Remove the last one of this list, we get an empty ctx */ ret = 1; ret &= !_starpu_sched_ctx_list_remove(&ctx_list, 3); ret &= (_starpu_sched_ctx_elt_find(ctx_list, 3) == NULL); found_list = _starpu_sched_ctx_list_find(ctx_list, 50); ret &= (found_list == NULL && ctx_list->priority != 50); ret &= (ctx_list->next->priority == 0); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_remove"); /* Add an element to a new prio then remove it to ensure prio list is cleaned correctly */ ret = 1; ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 100000, 75) != NULL); ret &= (ctx_list->priority == 100000); ret &= (_starpu_sched_ctx_elt_find(ctx_list, 75) != NULL); ret &= (ctx_list->head->sched_ctx == 75); ret &= !_starpu_sched_ctx_list_remove(&ctx_list, 75); ret &= (_starpu_sched_ctx_elt_find(ctx_list, 75) == NULL); found_list = _starpu_sched_ctx_list_find(ctx_list, 100000); ret &= (found_list == NULL && ctx_list->priority != 100000); ret &= (ctx_list->priority == 999); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_remove"); /* Delete this list, the function is internal only so we need to modify the list pointers too */ ret = 1; found_list = ctx_list->next; found_list->prev = ctx_list->prev; _starpu_sched_ctx_list_remove_all(ctx_list); ctx_list = found_list; found_list = _starpu_sched_ctx_list_find(ctx_list, 999); ret &= (found_list == NULL && ctx_list->priority != 999); ret &= (_starpu_sched_ctx_elt_find(ctx_list, 2) == NULL); ret &= (ctx_list->priority == 0); ret &= (ctx_list->head->sched_ctx == 1); //as before ret &= (ctx_list->head->next->sched_ctx == 1); ret &= (ctx_list->head->prev->sched_ctx == 1); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_remove_all"); /* Let's add some things again then clean everything */ ret = 1; ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 1000, 42) != NULL); ret &= (_starpu_sched_ctx_list_add_prio(&ctx_list, 1000, 43) != NULL); _starpu_sched_ctx_list_delete(&ctx_list); ret &= (ctx_list == NULL); global &= ret; STARPU_CHECK_RETURN_VALUE_IS(ret, 1, "_starpu_sched_ctx_list_delete"); STARPU_CHECK_RETURN_VALUE_IS(global, 1, "_starpu_sched_ctx_(list|elt) global status"); return 0; } starpu-1.4.9+dfsg/tests/sched_ctx/sched_ctx_policy_data.c000066400000000000000000000046161507764646700236050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" int main(void) { int ret; int nprocs; int *procs; unsigned sched_ctx; unsigned main_sched_ctx; int *ptr; int *main_ptr; struct starpu_conf conf; starpu_conf_init(&conf); starpu_conf_noworker(&conf); conf.ncpus = -1; conf.nmpi_ms = -1; conf.ntcpip_ms = -1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); nprocs = starpu_cpu_worker_get_count(); if (nprocs == 0) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } procs = (int*)malloc(nprocs*sizeof(int)); starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, procs, nprocs); sched_ctx = starpu_sched_ctx_create(procs, nprocs, "my_context", 0); ptr = starpu_sched_ctx_get_policy_data(sched_ctx); STARPU_ASSERT_MSG(ptr == NULL, "The policy data for the sched ctx should be NULL\n"); starpu_sched_ctx_set_policy_data(sched_ctx, procs); ptr = starpu_sched_ctx_get_policy_data(sched_ctx); FPRINTF(stderr, "sched_ctx %u : data %p (procs %p)\n", sched_ctx, ptr, procs); STARPU_ASSERT_MSG(ptr == procs, "The policy data for the sched ctx is incorrect\n"); main_sched_ctx = starpu_sched_ctx_get_context(); main_ptr = starpu_sched_ctx_get_policy_data(main_sched_ctx); STARPU_ASSERT_MSG(main_ptr == NULL, "The policy data for the sched ctx should be NULL\n"); starpu_sched_ctx_set_policy_data(main_sched_ctx, procs); main_ptr = starpu_sched_ctx_get_policy_data(sched_ctx); FPRINTF(stderr, "sched_ctx %u : data %p (procs %p)\n", main_sched_ctx, main_ptr, procs); STARPU_ASSERT_MSG(main_ptr == procs, "The policy data for the sched ctx is incorrect\n"); starpu_sched_ctx_delete(sched_ctx); free(procs); starpu_shutdown(); return (ptr == procs) ? 0 : 1; } starpu-1.4.9+dfsg/tests/sched_policies/000077500000000000000000000000001507764646700201275ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/sched_policies/data_locality.c000066400000000000000000000132021507764646700231020ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" /* * Check that scheduling policies tend to put tasks on the worker which has a * copy of the data */ #define NTASKS 8 /* * It is very inefficient to keep moving data between memory nodes. This * test makes sure the scheduler will take account of the data locality * when scheduling tasks. * * Applies to : dmda, pheft. */ void dummy(void *buffers[], void *args) { (void) buffers; (void) args; } /* * Dummy cost function, used to make sure the scheduler does schedule the * task, instead of getting rid of it as soon as possible because it doesn't * know its expected length. */ static double cost_function(struct starpu_task *task, unsigned nimpl) { (void) task; (void) nimpl; return 1.0; } static struct starpu_perfmodel model = { .type = STARPU_COMMON, .cost_function = cost_function }; static struct starpu_codelet cl = { .cpu_funcs = { dummy }, .cuda_funcs = { dummy }, .opencl_funcs = { dummy }, .modes = { STARPU_RW }, .model = &model, .nbuffers = 1 }; static int var = 42; static starpu_data_handle_t rw_handle; static void init_data(void) { starpu_variable_data_register(&rw_handle, STARPU_MAIN_RAM, (uintptr_t) &var, sizeof(var)); } static void free_data(void) { starpu_data_unregister(rw_handle); } static int run(struct starpu_sched_policy *policy) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy = policy; ret = starpu_init(&conf); if (ret == -ENODEV) { FPRINTF(stderr, "No device found\n"); return -ENODEV; } if (starpu_cpu_worker_get_count() == 0 || (starpu_cuda_worker_get_count() == 0 && starpu_opencl_worker_get_count() == 0)) goto enodev; starpu_profiling_status_set(1); init_data(); /* Send the handle to a GPU. */ cl.where = STARPU_CUDA | STARPU_OPENCL; struct starpu_task *tasks[NTASKS]; tasks[0] = starpu_task_create(); tasks[0]->cl = &cl; tasks[0]->synchronous = 1; tasks[0]->handles[0] = rw_handle; tasks[0]->destroy = 0; ret = starpu_task_submit(tasks[0]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); /* Now, run multiple tasks using this handle. */ cl.where |= STARPU_CPU; int i; for (i = 1; i < NTASKS; i++) { tasks[i] = starpu_task_create(); tasks[i]->cl = &cl; tasks[i]->handles[0] = rw_handle; tasks[i]->destroy = 0; ret = starpu_task_submit(tasks[i]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); /* All tasks should have been executed on the same GPU. */ ret = 0; unsigned nodeid = starpu_worker_get_memory_node(tasks[0]->profiling_info->workerid); for (i = 0; i < NTASKS; i++) { if (starpu_worker_get_memory_node(tasks[i]->profiling_info->workerid) != nodeid) { FPRINTF(stderr, "Error for task %d. Worker id %d node id %d different from expected node id %d\n", i, tasks[i]->profiling_info->workerid, starpu_worker_get_memory_node(tasks[i]->profiling_info->workerid), nodeid); ret = 1; break; } starpu_task_destroy(tasks[i]); } /* Clean everything up. */ for (; i < NTASKS; i++) starpu_task_destroy(tasks[i]); free_data(); starpu_shutdown(); return ret; enodev: FPRINTF(stderr, "No device found\n"); starpu_shutdown(); return -ENODEV; } /* XXX: Does this test apply to other schedulers ? */ //extern struct starpu_sched_policy _starpu_sched_ws_policy; //extern struct starpu_sched_policy _starpu_sched_prio_policy; //extern struct starpu_sched_policy _starpu_sched_random_policy; //extern struct starpu_sched_policy _starpu_sched_dm_policy; extern struct starpu_sched_policy _starpu_sched_dmda_policy; //extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy; //extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy; //extern struct starpu_sched_policy _starpu_sched_eager_policy; extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy; //extern struct starpu_sched_policy _starpu_sched_peager_policy; static struct starpu_sched_policy *policies[] = { //&_starpu_sched_ws_policy, //&_starpu_sched_prio_policy, //&_starpu_sched_dm_policy, &_starpu_sched_dmda_policy, //&_starpu_sched_dmda_ready_policy, //&_starpu_sched_dmda_sorted_policy, //&_starpu_sched_random_policy, //&_starpu_sched_eager_policy, &_starpu_sched_parallel_heft_policy, //&_starpu_sched_peager_policy }; int main(void) { int i; int n_policies = sizeof(policies)/sizeof(policies[0]); int global_ret = 0; char *sched = getenv("STARPU_SCHED"); for (i = 0; i < n_policies; ++i) { struct starpu_sched_policy *policy = policies[i]; if (sched && strcmp(sched, policy->policy_name)) /* Testing another specific scheduler, no need to run this */ continue; FPRINTF(stdout, "Running with policy %s.\n", policy->policy_name); int ret = run(policy); if (ret == -ENODEV && global_ret == 0) global_ret = STARPU_TEST_SKIPPED; if (ret == 1 && global_ret == 0) global_ret = ret; } return global_ret; } starpu-1.4.9+dfsg/tests/sched_policies/execute_all_tasks.c000066400000000000000000000046311507764646700237760ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * All tasks submitted by StarPU should be executed once. * Applies to: all schedulers. */ #define NTASKS 8 static int run(struct starpu_sched_policy *p) { int ret; struct starpu_conf conf; (void) starpu_conf_init(&conf); conf.sched_policy = p; ret = starpu_init(&conf); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); struct starpu_task *tasks[NTASKS] = { NULL }; int i; for (i = 0; i < NTASKS; i++) { struct starpu_task *task = starpu_task_create(); tasks[i] = task; task->cl = &starpu_codelet_nop; task->synchronous = 1; task->destroy = 0; ret = starpu_task_submit(task); if (ret != 0) { FPRINTF(stderr,"task submission returned %d\n", ret); return 1; } } starpu_task_wait_for_all(); ret = 0; for (i = 0; i < NTASKS; i++) { struct _starpu_job *j = tasks[i]->starpu_private; if (j == NULL || j->terminated == 0) { FPRINTF(stderr, "Error with policy %s.\n", p->policy_name); ret = 1; break; } } for (i = 0; i < NTASKS; i++) { starpu_task_destroy(tasks[i]); } starpu_shutdown(); return ret; } int main(void) { struct starpu_sched_policy **policies; struct starpu_sched_policy **policy; char *sched = getenv("STARPU_SCHED"); policies = starpu_sched_get_predefined_policies(); for(policy=policies ; *policy!=NULL ; policy++) { if (sched && strcmp(sched, (*policy)->policy_name)) /* Testing another specific scheduler, no need to run this */ continue; FPRINTF(stderr, "Running with policy %s.\n", (*policy)->policy_name); int ret; ret = run(*policy); if (ret == 1) return EXIT_FAILURE; } return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/sched_policies/prio.c000066400000000000000000000061161507764646700212500ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Task1 must be executed before task0, even if task0 is submitted first. * Applies to : all schedulers. */ #ifdef STARPU_QUICK_CHECK #define NTASKS 10 #elif !defined(STARPU_LONG_CHECK) #define NTASKS 100 #else #define NTASKS 1000 #endif void funcA(void *buffers[], void *args) { (void) buffers; (void) args; FPRINTF(stdout,"A"); starpu_usleep(1000); } void funcB(void *buffers[], void *args) { (void) buffers; (void) args; FPRINTF(stdout,"B"); starpu_usleep(1000); } static int run(struct starpu_sched_policy *policy) { int ret; struct starpu_conf conf; int i; starpu_conf_init(&conf); conf.sched_policy = policy; ret = starpu_init(&conf); if (ret != 0) exit(STARPU_TEST_SKIPPED); starpu_profiling_status_set(1); struct starpu_codelet clA = { .cpu_funcs = {funcA}, .cpu_funcs_name = {"funcA"}, .opencl_funcs = {funcA}, .cuda_funcs = {funcA}, .max_fpga_funcs = {funcA}, .hip_funcs = {funcA}, .nbuffers = 0 }; struct starpu_codelet clB = { .cpu_funcs = {funcB}, .cpu_funcs_name = {"funcB"}, .opencl_funcs = {funcB}, .cuda_funcs = {funcB}, .max_fpga_funcs = {funcB}, .hip_funcs = {funcB}, .nbuffers = 0 }; starpu_srand48(0); for (i = 0; i < NTASKS; i++) { struct starpu_task *task = starpu_task_create(); if (((int)(starpu_drand48()*2))%2) { task->cl = &clA; task->priority=STARPU_MIN_PRIO; } else { task->cl = &clB; task->priority=STARPU_MAX_PRIO; } task->detach=1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); FPRINTF(stdout,"\n"); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return -ENODEV; } int main(void) { struct starpu_sched_policy **policies; struct starpu_sched_policy **policy; char *sched = getenv("STARPU_SCHED"); policies = starpu_sched_get_predefined_policies(); for(policy=policies ; *policy!=NULL ; policy++) { int ret; if (sched && strcmp(sched, (*policy)->policy_name)) /* Testing another specific scheduler, no need to run this */ continue; FPRINTF(stderr, "Running with policy %s.\n", (*policy)->policy_name); ret = run(*policy); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; if (ret == 1) return EXIT_FAILURE; } return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/sched_policies/simple_cpu_gpu_sched.c000066400000000000000000000167441507764646700244700ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "../helper.h" #include /* * Schedulers that are aware of the expected task length provided by the * perfmodels must make sure that : * - cpu_task is cheduled on a CPU. * - gpu_task is scheduled on a GPU. * * Applies to : dmda and to what other schedulers ? */ void dummy(void *buffers[], void *args) { (void) buffers; (void) args; } /* * Fake cost functions. */ static double cpu_task_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void) task; (void) arch; (void) nimpl; return 1.0; } static double cpu_task_gpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void) task; (void) arch; (void) nimpl; return 10000000.0; } static double gpu_task_cpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void) task; (void) arch; (void) nimpl; return 10000000.0; } static double gpu_task_gpu(struct starpu_task *task, struct starpu_perfmodel_arch* arch, unsigned nimpl) { (void) task; (void) arch; (void) nimpl; return 1.0; } static struct starpu_perfmodel model_cpu_task = { .type = STARPU_PER_ARCH, .symbol = "model_cpu_task" }; static struct starpu_perfmodel model_gpu_task = { .type = STARPU_PER_ARCH, .symbol = "model_gpu_task" }; static void init_perfmodels_gpu(int gpu_type) { int nb_worker_gpu = starpu_worker_get_count_by_type(gpu_type); int *worker_gpu_ids = malloc(nb_worker_gpu * sizeof(int)); int worker_gpu; starpu_worker_get_ids_by_type(gpu_type, worker_gpu_ids, nb_worker_gpu); for(worker_gpu = 0 ; worker_gpu < nb_worker_gpu ; worker_gpu ++) { starpu_perfmodel_set_per_devices_cost_function(&model_cpu_task, 0, cpu_task_gpu, gpu_type, starpu_worker_get_devid(worker_gpu_ids[worker_gpu]), 1, -1); starpu_perfmodel_set_per_devices_cost_function(&model_gpu_task, 0, gpu_task_gpu, gpu_type, starpu_worker_get_devid(worker_gpu_ids[worker_gpu]), 1, -1); } free(worker_gpu_ids); } static void init_perfmodels(void) { starpu_perfmodel_init(&model_cpu_task); starpu_perfmodel_init(&model_gpu_task); starpu_perfmodel_set_per_devices_cost_function(&model_cpu_task, 0, cpu_task_cpu, STARPU_CPU_WORKER, 0, 1, -1); starpu_perfmodel_set_per_devices_cost_function(&model_gpu_task, 0, gpu_task_cpu, STARPU_CPU_WORKER, 0, 1, -1); // We need to set the cost function for each combination with a CUDA or a OpenCL worker init_perfmodels_gpu(STARPU_CUDA_WORKER); init_perfmodels_gpu(STARPU_OPENCL_WORKER); } /* * Dummy codelets. */ static struct starpu_codelet cpu_cl = { .cpu_funcs = { dummy }, .cuda_funcs = { dummy }, .opencl_funcs = { dummy }, .max_fpga_funcs = { dummy }, .nbuffers = 0, .model = &model_cpu_task }; static struct starpu_codelet gpu_cl = { .cpu_funcs = { dummy }, .cuda_funcs = { dummy }, .opencl_funcs = { dummy }, .max_fpga_funcs = { dummy }, .nbuffers = 0, .model = &model_gpu_task }; static int run(struct starpu_sched_policy *policy) { struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy = policy; int ret = starpu_init(&conf); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); /* At least 1 CPU and 1 GPU are needed. */ if (starpu_cpu_worker_get_count() == 0) { starpu_shutdown(); exit(STARPU_TEST_SKIPPED); } if (starpu_cuda_worker_get_count() == 0 && starpu_opencl_worker_get_count() == 0) { starpu_shutdown(); exit(STARPU_TEST_SKIPPED); } starpu_profiling_status_set(1); init_perfmodels(); struct starpu_task *cpu_task = starpu_task_create(); cpu_task->cl = &cpu_cl; cpu_task->destroy = 0; struct starpu_task *gpu_task = starpu_task_create(); gpu_task->cl = &gpu_cl; gpu_task->destroy = 0; ret = starpu_task_submit(cpu_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(gpu_task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); enum starpu_worker_archtype cpu_task_worker, gpu_task_worker; cpu_task_worker = starpu_worker_get_type(cpu_task->profiling_info->workerid); gpu_task_worker = starpu_worker_get_type(gpu_task->profiling_info->workerid); if (cpu_task_worker != STARPU_CPU_WORKER || (gpu_task_worker != STARPU_CUDA_WORKER && gpu_task_worker != STARPU_OPENCL_WORKER)) { FPRINTF(stderr, "Tasks did not execute on expected worker\n"); if (cpu_task_worker != STARPU_CPU_WORKER) { FPRINTF(stderr, "The CPU task did not run on a CPU worker\n"); } if (gpu_task_worker != STARPU_CUDA_WORKER && gpu_task_worker != STARPU_OPENCL_WORKER) { FPRINTF(stderr, "The GPU task did not run on a Cuda or OpenCL worker\n"); } ret = 1; } else { FPRINTF(stderr, "Tasks DID execute on expected worker\n"); ret = 0; } starpu_task_destroy(cpu_task); starpu_task_destroy(gpu_task); starpu_shutdown(); return ret; } /* extern struct starpu_sched_policy _starpu_sched_ws_policy; extern struct starpu_sched_policy _starpu_sched_prio_policy; extern struct starpu_sched_policy _starpu_sched_random_policy; extern struct starpu_sched_policy _starpu_sched_dm_policy; extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy; extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy; extern struct starpu_sched_policy _starpu_sched_eager_policy; extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy; extern struct starpu_sched_policy _starpu_sched_peager_policy; */ extern struct starpu_sched_policy _starpu_sched_dmda_policy; /* XXX: what policies are we interested in ? */ static struct starpu_sched_policy *policies[] = { //&_starpu_sched_ws_policy, //&_starpu_sched_prio_policy, //&_starpu_sched_dm_policy, &_starpu_sched_dmda_policy, //&_starpu_sched_dmda_ready_policy, //&_starpu_sched_dmda_sorted_policy, //&_starpu_sched_random_policy, //&_starpu_sched_eager_policy, //&_starpu_sched_parallel_heft_policy, //&_starpu_sched_peager_policy }; int main(void) { #ifndef STARPU_HAVE_SETENV /* XXX: is this macro used by all the schedulers we are interested in ? */ #warning "setenv() is not available, skipping this test" return STARPU_TEST_SKIPPED; #else setenv("STARPU_SCHED_BETA", "0", 1); char *sched = getenv("STARPU_SCHED"); if (starpu_getenv_number_default("STARPU_NWORKER_PER_CUDA", 1) != 1) return STARPU_TEST_SKIPPED; int i; int n_policies = sizeof(policies)/sizeof(policies[0]); for (i = 0; i < n_policies; ++i) { struct starpu_sched_policy *policy = policies[i]; if (sched && strcmp(sched, policy->policy_name)) /* Testing another specific scheduler, no need to run this */ continue; FPRINTF(stdout, "Running with policy %s.\n", policy->policy_name); int ret; ret = run(policy); if (ret == 1) return EXIT_FAILURE; } return EXIT_SUCCESS; #endif } starpu-1.4.9+dfsg/tests/sched_policies/simple_deps.c000066400000000000000000000057221507764646700226050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Task1 must be executed before task0, even if task0 is submitted first. * Applies to : all schedulers. */ void dummy(void *buffers[], void *args) { (void) buffers; (void) args; starpu_usleep(10000); } static int run(struct starpu_sched_policy *policy) { int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy = policy; ret = starpu_init(&conf); if (ret != 0) exit(STARPU_TEST_SKIPPED); starpu_profiling_status_set(1); struct starpu_codelet cl = { .cpu_funcs = {dummy}, .cpu_funcs_name = {"dummy"}, .opencl_funcs = {dummy}, .cuda_funcs = {dummy}, .max_fpga_funcs = {dummy}, .nbuffers = 0 }; struct starpu_task *task0 = starpu_task_create(); task0->cl = &cl; task0->destroy = 0; struct starpu_task *task1 = starpu_task_create(); task1->cl = &cl; task1->destroy = 0; starpu_task_declare_deps_array(task0, 1, &task1); ret = starpu_task_submit(task0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_task_submit(task1); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_wait_for_all(); double task1_end, task0_start; task1_end = starpu_timing_timespec_to_us(&task1->profiling_info->end_time); task0_start = starpu_timing_timespec_to_us(&task0->profiling_info->start_time); starpu_task_destroy(task0); starpu_task_destroy(task1); starpu_shutdown(); return !!(task1_end > task0_start); enodev: starpu_shutdown(); return -ENODEV; } int main(void) { struct starpu_sched_policy **policies; struct starpu_sched_policy **policy; char *sched = getenv("STARPU_SCHED"); policies = starpu_sched_get_predefined_policies(); for(policy=policies ; *policy!=NULL ; policy++) { int ret; if (sched && strcmp(sched, (*policy)->policy_name)) /* Testing another specific scheduler, no need to run this */ continue; if (!strcmp("heteroprio", (*policy)->policy_name)) /* https://gitlab.inria.fr/starpu/starpu/-/issues/18 */ continue; FPRINTF(stderr, "Running with policy %s.\n", (*policy)->policy_name); ret = run(*policy); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; if (ret == 1) return EXIT_FAILURE; } return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/sched_policies/workerids.c000066400000000000000000000063061507764646700223110ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Simon Archipoff * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include "../helper.h" /* * Check that the starpu_task::workerids field is respected by schedulers. */ #ifdef STARPU_QUICK_CHECK #define NTASKS 10 #elif !defined(STARPU_LONG_CHECK) #define NTASKS 100 #else #define NTASKS 1000 #endif void funcA(void *buffers[], void *args) { (void) buffers; (void) args; STARPU_ASSERT(starpu_worker_get_id() == 0); starpu_usleep(1000); } double cost_function(struct starpu_task *t STARPU_ATTRIBUTE_UNUSED, struct starpu_perfmodel_arch *a STARPU_ATTRIBUTE_UNUSED, unsigned i STARPU_ATTRIBUTE_UNUSED) { return 1000; } static struct starpu_perfmodel perf_model = { .type = STARPU_PER_ARCH, .arch_cost_function = cost_function, }; static struct starpu_codelet clA = { .cpu_funcs = {funcA}, .cpu_funcs_name = {"funcA"}, .opencl_funcs = {funcA}, .cuda_funcs = {funcA}, .hip_funcs = {funcA}, .max_fpga_funcs = {funcA}, .nbuffers = 0, .model = &perf_model, }; static int run(struct starpu_sched_policy *policy) { int ret; struct starpu_conf conf; int i; starpu_conf_init(&conf); conf.sched_policy = policy; ret = starpu_init(&conf); if (ret != 0) exit(STARPU_TEST_SKIPPED); uint32_t zeromask = 1; for (i = 0; i < NTASKS; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &clA; task->workerids = &zeromask; task->workerids_len = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); FPRINTF(stdout,"\n"); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return -ENODEV; } int main(void) { struct starpu_sched_policy **policies; struct starpu_sched_policy **policy; char *sched = getenv("STARPU_SCHED"); policies = starpu_sched_get_predefined_policies(); for(policy=policies ; *policy!=NULL ; policy++) { int ret; if (strcmp((*policy)->policy_name, "lws") == 0 || strcmp((*policy)->policy_name, "ws") == 0 || strcmp((*policy)->policy_name, "heteroprio") == 0 || strcmp((*policy)->policy_name, "modular-gemm") == 0) #ifdef STARPU_DEVEL #warning FIXME performance for ws #endif continue; if (sched && strcmp(sched, (*policy)->policy_name)) /* Testing another specific scheduler, no need to run this */ continue; FPRINTF(stderr, "Running with policy %s.\n", (*policy)->policy_name); ret = run(*policy); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; if (ret == 1) return EXIT_FAILURE; } return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tests/variable/000077500000000000000000000000001507764646700167375ustar00rootroot00000000000000starpu-1.4.9+dfsg/tests/variable/increment.c000066400000000000000000000074711507764646700211000ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "increment.h" #ifdef STARPU_USE_CUDA extern void increment_cuda(void *descr[], void *_args); extern void redux_cuda(void *descr[], void *arg); extern void neutral_cuda(void *descr[], void *arg); #endif #ifdef STARPU_USE_HIP extern void increment_hip(void *descr[], void *_args); extern void redux_hip(void *descr[], void *arg); extern void neutral_hip(void *descr[], void *arg); #endif #ifdef STARPU_USE_OPENCL extern void increment_opencl(void *buffers[], void *args); extern void redux_opencl(void *descr[], void *arg); extern void neutral_opencl(void *descr[], void *arg); #endif void increment_cpu(void *descr[], void *arg) { (void)arg; unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); (*tokenptr)++; } struct starpu_codelet increment_cl = { .modes = {STARPU_RW}, .cpu_funcs = {increment_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {increment_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {increment_hip}, .hip_flags = {STARPU_HIP_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {increment_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"increment_cpu"}, .nbuffers = 1, .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; struct starpu_codelet increment_redux_cl = { .modes = {STARPU_REDUX}, .cpu_funcs = {increment_cpu}, #ifdef STARPU_USE_CUDA .cuda_funcs = {increment_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {increment_hip}, .hip_flags = {STARPU_HIP_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {increment_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs_name = {"increment_cpu"}, .nbuffers = 1, .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; void redux_cpu(void *descr[], void *arg) { (void)arg; unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); *dst = *dst + *src; } struct starpu_codelet redux_cl = { .modes = {STARPU_RW|STARPU_COMMUTE, STARPU_R}, .nbuffers = 2, #ifdef STARPU_USE_CUDA .cuda_funcs = {redux_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {redux_hip}, .hip_flags = {STARPU_HIP_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {redux_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs = {redux_cpu}, .cpu_funcs_name = {"redux_cpu"}, .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; void neutral_cpu(void *descr[], void *arg) { (void)arg; unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); *dst = 0; } struct starpu_codelet neutral_cl = { #ifdef STARPU_USE_CUDA .cuda_funcs = {neutral_cuda}, .cuda_flags = {STARPU_CUDA_ASYNC}, #endif #ifdef STARPU_USE_HIP .hip_funcs = {neutral_hip}, .hip_flags = {STARPU_HIP_ASYNC}, #endif #ifdef STARPU_USE_OPENCL .opencl_funcs = {neutral_opencl}, .opencl_flags = {STARPU_OPENCL_ASYNC}, #endif .cpu_funcs = {neutral_cpu}, .cpu_funcs_name = {"neutral_cpu"}, .modes = {STARPU_W}, .nbuffers = 1, .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; #ifndef STARPU_USE_OPENCL void increment_load_opencl() { } void increment_unload_opencl() { } #endif starpu-1.4.9+dfsg/tests/variable/increment.h000066400000000000000000000016601507764646700210770ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include extern struct starpu_codelet increment_cl; extern struct starpu_codelet increment_redux_cl; extern struct starpu_codelet redux_cl; extern struct starpu_codelet neutral_cl; void increment_load_opencl(); void increment_unload_opencl(); starpu-1.4.9+dfsg/tests/variable/increment_cuda.cu000066400000000000000000000037121507764646700222530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" static __global__ void _increment_cuda(unsigned *val) { val[0]++; } extern "C" void increment_cuda(void *descr[], void *cl_arg) { unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); STARPU_SKIP_IF_VALGRIND; _increment_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(val); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } static __global__ void _redux_cuda(unsigned *dst, unsigned *src) { dst[0] += src[0]; } extern "C" void redux_cuda(void *descr[], void *arg) { unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); STARPU_SKIP_IF_VALGRIND; _redux_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(dst, src); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } static __global__ void _neutral_cuda(unsigned *dst) { dst[0] = 0; } extern "C" void neutral_cuda(void *descr[], void *arg) { unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); STARPU_SKIP_IF_VALGRIND; _neutral_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>(dst); cudaError_t status = cudaGetLastError(); if (status != cudaSuccess) STARPU_CUDA_REPORT_ERROR(status); } starpu-1.4.9+dfsg/tests/variable/increment_hip.hip000066400000000000000000000040061507764646700222650ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" static __global__ void _increment_hip(unsigned *val) { val[0]++; } extern "C" void increment_hip(void *descr[], void *cl_arg) { (void)cl_arg; unsigned *val = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); hipLaunchKernelGGL(_increment_hip, 1, 1, 0, starpu_hip_get_local_stream(), val); hipError_t status = hipGetLastError(); if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); } static __global__ void _redux_hip(unsigned *dst, unsigned *src) { dst[0] += src[0]; } extern "C" void redux_hip(void *descr[], void *cl_arg) { (void)cl_arg; STARPU_SKIP_IF_VALGRIND; unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]); hipLaunchKernelGGL(_redux_hip, 1, 1, 0, starpu_hip_get_local_stream(), dst, src); hipError_t status = hipGetLastError(); if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); } static __global__ void _neutral_hip(unsigned *dst) { dst[0] = 0; } extern "C" void neutral_hip(void *descr[], void *cl_arg) { (void)cl_arg; STARPU_SKIP_IF_VALGRIND; unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]); hipLaunchKernelGGL(_neutral_hip, 1, 1, 0, starpu_hip_get_local_stream(), dst); hipError_t status = hipGetLastError(); if (status != hipSuccess) STARPU_HIP_REPORT_ERROR(status); } starpu-1.4.9+dfsg/tests/variable/increment_opencl.c000066400000000000000000000106101507764646700224250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" /* * Queue an OpenCL kernel that just increments a variable */ struct starpu_opencl_program opencl_increment_program; struct starpu_opencl_program opencl_redux_program; struct starpu_opencl_program opencl_neutral_program; void increment_load_opencl() { int ret = starpu_opencl_load_opencl_from_file("tests/variable/increment_opencl_kernel.cl", &opencl_increment_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); ret = starpu_opencl_load_opencl_from_file("tests/variable/redux_opencl_kernel.cl", &opencl_redux_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); ret = starpu_opencl_load_opencl_from_file("tests/variable/neutral_opencl_kernel.cl", &opencl_neutral_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); } void increment_unload_opencl() { int ret = starpu_opencl_unload_opencl(&opencl_increment_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); ret = starpu_opencl_unload_opencl(&opencl_redux_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); ret = starpu_opencl_unload_opencl(&opencl_neutral_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); } void increment_opencl(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_mem val = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_increment_program, "_increment_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(val), &val); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=1; size_t local=1; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } void redux_opencl(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_mem dst = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]); cl_mem src = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[1]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_redux_program, "_redux_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(dst), &dst); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(src), &src); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=1; size_t local=1; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } void neutral_opencl(void *buffers[], void *args) { (void) args; int id, devid; cl_int err; cl_kernel kernel; cl_command_queue queue; cl_mem dst = (cl_mem)STARPU_VARIABLE_GET_PTR(buffers[0]); id = starpu_worker_get_id_check(); devid = starpu_worker_get_devid(id); err = starpu_opencl_load_kernel(&kernel, &queue, &opencl_neutral_program, "_neutral_opencl", devid); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); err = clSetKernelArg(kernel, 0, sizeof(dst), &dst); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); { size_t global=1; size_t local=1; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err); } starpu_opencl_release_kernel(kernel); } starpu-1.4.9+dfsg/tests/variable/increment_opencl_kernel.cl000066400000000000000000000013661507764646700241510ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void _increment_opencl(__global unsigned *val) { val[0]++; } starpu-1.4.9+dfsg/tests/variable/neutral_opencl_kernel.cl000066400000000000000000000013661507764646700236370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void _neutral_opencl(__global unsigned *dst) { dst[0] = 0; } starpu-1.4.9+dfsg/tests/variable/redux_opencl_kernel.cl000066400000000000000000000014221507764646700233050ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ __kernel void _redux_opencl(__global unsigned *dst, __global unsigned *src) { dst[0] += src[0]; } starpu-1.4.9+dfsg/tools/000077500000000000000000000000001507764646700151505ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/Makefile.am000066400000000000000000000640001507764646700172040ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Thibaut Lambert # Copyright (C) 2013-2013 Joris Pablo # Copyright (C) 2017-2017 Erwan Leria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # include $(top_srcdir)/make/starpu-tests.mk include $(top_srcdir)/make/starpu-loader.mk SUBDIRS = AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_builddir)/src -I$(top_srcdir)/src $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ LIBS += $(top_builddir)/src/@LIBSTARPU_LINK@ $(STARPU_EXPORTED_LIBS) bin_PROGRAMS = dist_bin_SCRIPTS = dist_pkgdata_DATA = gdbinit pkgdata_perfmodels_sampling_busdir = $(datarootdir)/starpu/perfmodels/sampling/bus pkgdata_perfmodels_sampling_codeletsdir = $(datarootdir)/starpu/perfmodels/sampling/codelets/45 dist_pkgdata_perfmodels_sampling_bus_DATA = \ perfmodels/sampling/bus/attila.affinity \ perfmodels/sampling/bus/attila.bandwidth \ perfmodels/sampling/bus/attila.config \ perfmodels/sampling/bus/attila.latency \ perfmodels/sampling/bus/attila.platform.xml \ perfmodels/sampling/bus/attila.platform.v4.xml \ perfmodels/sampling/bus/hannibal.affinity \ perfmodels/sampling/bus/hannibal.bandwidth \ perfmodels/sampling/bus/hannibal.config \ perfmodels/sampling/bus/hannibal.latency \ perfmodels/sampling/bus/hannibal.platform.xml \ perfmodels/sampling/bus/hannibal.platform.v4.xml \ perfmodels/sampling/bus/hannibal-pitch.affinity \ perfmodels/sampling/bus/hannibal-pitch.bandwidth \ perfmodels/sampling/bus/hannibal-pitch.config \ perfmodels/sampling/bus/hannibal-pitch.latency \ perfmodels/sampling/bus/hannibal-pitch.platform.xml \ perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml \ perfmodels/sampling/bus/idgraf.affinity \ perfmodels/sampling/bus/idgraf.bandwidth \ perfmodels/sampling/bus/idgraf.config \ perfmodels/sampling/bus/idgraf.latency \ perfmodels/sampling/bus/idgraf.platform.xml \ perfmodels/sampling/bus/idgraf.platform.v4.xml \ perfmodels/sampling/bus/mirage.affinity \ perfmodels/sampling/bus/mirage.bandwidth \ perfmodels/sampling/bus/mirage.config \ perfmodels/sampling/bus/mirage.latency \ perfmodels/sampling/bus/mirage.platform.xml \ perfmodels/sampling/bus/mirage.platform.v4.xml \ perfmodels/sampling/bus/sirocco.affinity \ perfmodels/sampling/bus/sirocco.bandwidth \ perfmodels/sampling/bus/sirocco.config \ perfmodels/sampling/bus/sirocco.latency \ perfmodels/sampling/bus/sirocco.platform.xml \ perfmodels/sampling/bus/sirocco.platform.v4.xml dist_pkgdata_perfmodels_sampling_codelets_DATA = \ perfmodels/sampling/codelets/45/chol_model_potrf.attila \ perfmodels/sampling/codelets/45/chol_model_trsm.attila \ perfmodels/sampling/codelets/45/chol_model_syrk.attila \ perfmodels/sampling/codelets/45/chol_model_gemm.attila \ perfmodels/sampling/codelets/45/cl_update.attila \ perfmodels/sampling/codelets/45/save_cl_bottom.attila \ perfmodels/sampling/codelets/45/save_cl_top.attila \ perfmodels/sampling/codelets/45/starpu_sgemm_gemm.attila \ perfmodels/sampling/codelets/45/starpu_dgemm_gemm.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.attila \ perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila \ \ perfmodels/sampling/codelets/45/chol_model_potrf.hannibal \ perfmodels/sampling/codelets/45/chol_model_trsm.hannibal \ perfmodels/sampling/codelets/45/chol_model_syrk.hannibal \ perfmodels/sampling/codelets/45/chol_model_gemm.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal \ \ perfmodels/sampling/codelets/45/chol_model_potrf.hannibal-pitch \ perfmodels/sampling/codelets/45/chol_model_trsm.hannibal-pitch \ perfmodels/sampling/codelets/45/chol_model_syrk.hannibal-pitch \ perfmodels/sampling/codelets/45/chol_model_gemm.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal-pitch \ \ perfmodels/sampling/codelets/45/chol_model_potrf.idgraf \ perfmodels/sampling/codelets/45/chol_model_trsm.idgraf \ perfmodels/sampling/codelets/45/chol_model_syrk.idgraf \ perfmodels/sampling/codelets/45/chol_model_gemm.idgraf \ perfmodels/sampling/codelets/45/cl_update.idgraf \ perfmodels/sampling/codelets/45/save_cl_bottom.idgraf \ perfmodels/sampling/codelets/45/save_cl_top.idgraf \ perfmodels/sampling/codelets/45/starpu_sgemm_gemm.idgraf \ perfmodels/sampling/codelets/45/starpu_dgemm_gemm.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.idgraf \ \ perfmodels/sampling/codelets/45/chol_model_potrf.mirage \ perfmodels/sampling/codelets/45/chol_model_trsm.mirage \ perfmodels/sampling/codelets/45/chol_model_syrk.mirage \ perfmodels/sampling/codelets/45/chol_model_gemm.mirage \ perfmodels/sampling/codelets/45/cl_update.mirage \ perfmodels/sampling/codelets/45/save_cl_bottom.mirage \ perfmodels/sampling/codelets/45/save_cl_top.mirage \ perfmodels/sampling/codelets/45/starpu_sgemm_gemm.mirage \ perfmodels/sampling/codelets/45/starpu_dgemm_gemm.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.mirage \ perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage \ perfmodels/sampling/codelets/45/add_scal.mirage \ perfmodels/sampling/codelets/45/func.mirage \ perfmodels/sampling/codelets/45/log_arr.mirage \ perfmodels/sampling/codelets/45/log_list.mirage \ perfmodels/sampling/codelets/45/multi.mirage \ perfmodels/sampling/codelets/45/multi_2arr.mirage \ perfmodels/sampling/codelets/45/multi_list.mirage \ perfmodels/sampling/codelets/45/scal.mirage \ perfmodels/sampling/codelets/45/scal_arr.mirage \ perfmodels/sampling/codelets/45/sqrt.mirage \ \ perfmodels/sampling/codelets/45/chol_model_potrf.sirocco \ perfmodels/sampling/codelets/45/chol_model_trsm.sirocco \ perfmodels/sampling/codelets/45/chol_model_syrk.sirocco \ perfmodels/sampling/codelets/45/chol_model_gemm.sirocco \ perfmodels/sampling/codelets/45/cl_update.sirocco \ perfmodels/sampling/codelets/45/save_cl_bottom.sirocco \ perfmodels/sampling/codelets/45/save_cl_top.sirocco \ perfmodels/sampling/codelets/45/starpu_sgemm_gemm.sirocco \ perfmodels/sampling/codelets/45/starpu_dgemm_gemm.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.sirocco \ perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco \ \ perfmodels/sampling/codelets/45/null.idgraf \ perfmodels/sampling/codelets/45/null.sirocco EXTRA_DIST = \ dev/checker/rename.sed \ dev/checker/rename.sh \ dev/cppcheck/suppressions.txt \ dev/valgrind/blas.suppr \ dev/valgrind/nvidia.suppr \ dev/valgrind/bash.suppr \ dev/valgrind/fxt.suppr \ dev/valgrind/glpk.suppr \ dev/valgrind/hdf5.suppr \ dev/valgrind/hwloc.suppr \ dev/valgrind/libc.suppr \ dev/valgrind/libgomp.suppr \ dev/valgrind/libnuma.suppr \ dev/valgrind/madmpi.suppr \ dev/valgrind/opencl.suppr \ dev/valgrind/openmpi.suppr \ dev/valgrind/openmp.suppr \ dev/valgrind/p11-kit.suppr \ dev/valgrind/padico.suppr \ dev/valgrind/papi.suppr \ dev/valgrind/pthread.suppr \ dev/valgrind/starpu.suppr \ dev/valgrind/starpu_pw.suppr \ dev/valgrind/starpupy.suppr \ dev/valgrind/valgrind.suppr \ dev/valgrind/valgrind.sh \ dev/valgrind/valgrind_xml.sh \ dev/valgrind/helgrind.sh \ dev/tsan/starpu.suppr \ dev/lsan/suppressions \ perfmodels/README \ perfmodels/cluster.xml \ perfmodels/hostfile \ perfmodels/sampling/codelets/tmp/mlr_init.out \ msvc/starpu_clean.bat \ msvc/starpu_open.bat \ msvc/starpu_exec.bat \ msvc/starpu_var.bat \ msvc/starpu.sln \ msvc/starpu/starpu.vcxproj \ release/Makefile \ release/README.md \ patch-ayudame \ perfs/bench_sgemm.sh \ perfs/error_model.gp \ perfs/error_model.sh \ distrib/distrib.r \ distrib/distrib.sh \ starpu_msexec CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log figure/* mlr_* ##################################### # What to install and what to check # ##################################### STARPU_TOOLS = TESTS = $(STARPU_TOOLS) check_PROGRAMS = $(STARPU_TOOLS) if STARPU_USE_FXT bin_PROGRAMS += \ starpu_fxt_tool \ starpu_fxt_stats \ starpu_fxt_data_trace STARPU_TOOLS += \ starpu_fxt_tool \ starpu_fxt_stats \ starpu_fxt_data_trace starpu_fxt_tool_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) starpu_fxt_tool_LDADD = $(FXT_LIBS) starpu_fxt_tool_LDFLAGS = $(FXT_LDFLAGS) starpu_fxt_stats_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) starpu_fxt_stats_LDADD = $(FXT_LIBS) starpu_fxt_stats_LDFLAGS = $(FXT_LDFLAGS) starpu_fxt_data_trace_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) starpu_fxt_data_trace_LDADD = $(FXT_LIBS) starpu_fxt_data_trace_LDFLAGS = $(FXT_LDFLAGS) endif bin_PROGRAMS += \ starpu_perfmodel_display \ starpu_perfmodel_plot \ starpu_calibrate_bus \ starpu_machine_display \ starpu_sched_display \ starpu_tasks_rec_complete \ starpu_lp2paje \ starpu_perfmodel_recdump if STARPU_SIMGRID bin_PROGRAMS += \ starpu_replay starpu_replay_SOURCES = \ starpu_replay.c \ starpu_replay_sched.c endif starpu_perfmodel_plot_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) if STARPU_LONG_CHECK STARPU_TOOLS += \ starpu_calibrate_bus endif STARPU_TOOLS += \ starpu_machine_display \ starpu_sched_display if !STARPU_HAVE_WINDOWS STARPU_TOOLS += \ starpu_perfmodel_display \ starpu_perfmodel_plot endif dist_bin_SCRIPTS += \ starpu_workers_activity \ starpu_codelet_histo_profile \ starpu_codelet_profile \ starpu_env \ starpu_config \ starpu_mpi_comm_matrix.py \ starpu_fxt_number_events_to_names.py \ starpu_paje_draw_histogram \ starpu_paje_draw_histogram.R \ starpu_paje_summary \ starpu_paje_summary.Rmd \ starpu_mlr_analysis \ starpu_mlr_analysis.Rmd \ starpu_paje_state_stats \ starpu_paje_state_stats.R \ starpu_send_recv_data_use.py \ starpu_trace_state_stats.py if STARPU_USE_AYUDAME2 dist_bin_SCRIPTS += \ starpu_temanejo2.sh dist_pkgdata_DATA += \ ayudame.cfg endif if STARPU_HAVE_WINDOWS STARPU_MSVC_dir = $(bindir) nobase_STARPU_MSVC__DATA = \ msvc/starpu_clean.bat \ msvc/starpu_open.bat \ msvc/starpu_exec.bat \ msvc/starpu_var.bat \ msvc/starpu.sln \ msvc/starpu/starpu.vcxproj endif if STARPU_HAVE_HELP2MAN starpu_calibrate_bus.1: starpu_calibrate_bus$(EXEEXT) $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Force StarPU bus calibration" --output=$@ ./$< starpu_machine_display.1: starpu_machine_display$(EXEEXT) $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display machine StarPU information" --output=$@ ./$< starpu_perfmodel_display.1: starpu_perfmodel_display$(EXEEXT) $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU performance model" --output=$@ ./$< starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT) $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Plot StarPU performance model" --output=$@ ./$< starpu_tasks_rec_complete.1: starpu_tasks_rec_complete$(EXEEXT) $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Complete StarPU tasks.rec file" --output=$@ ./$< starpu_lp2paje.1: starpu_lp2paje$(EXEEXT) $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert lp StarPU schedule into Paje format" --output=$@ ./$< starpu_workers_activity.1: starpu_workers_activity @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU workers activity" --output=$@ ./$< starpu_codelet_profile.1: starpu_codelet_profile @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU codelet profile" --output=$@ ./$< starpu_env.1: starpu_env @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Set StarPU environment variables" --output=$@ ./$< starpu_codelet_histo_profile.1: starpu_codelet_histo_profile @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU codelet histogram" --output=$@ ./$< starpu_mpi_comm_matrix.1: starpu_mpi_comm_matrix.py @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU MPI communications matrix" --output=$@ ./$< starpu_fxt_number_events_to_names.1: starpu_fxt_number_events_to_names.py @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert events in StarPU traces" --output=$@ ./$< starpu_paje_draw_histogram.1: starpu_paje_draw_histogram @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU trace histogram" --output=$@ ./$< starpu_paje_state_stats.1: starpu_paje_state_stats @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print statistics from StarPU trace" --output=$@ ./$< starpu_config.1: starpu_config @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU configuration" --output=$@ ./$< if STARPU_USE_FXT starpu_fxt_tool.1: starpu_fxt_tool$(EXEEXT) $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert raw StarPU FxT trace to various traces" --output=$@ ./$< starpu_fxt_stats.1: starpu_fxt_stats$(EXEEXT) $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print statistics from raw StarPU FxT trace" --output=$@ ./$< starpu_fxt_data_trace.1: starpu_fxt_data_trace$(EXEEXT) $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print data trace from raw StarPU FxT trace" --output=$@ ./$< endif if STARPU_USE_TCPIP_MASTER_SLAVE starpu_tcpipexec.1: starpu_tcpipexec @chmod +x $< $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Execute TCPIP application" --output=$@ ./$< endif dist_man1_MANS = \ starpu_calibrate_bus.1 \ starpu_machine_display.1 \ starpu_perfmodel_display.1 \ starpu_perfmodel_plot.1 \ starpu_tasks_rec_complete.1 \ starpu_lp2paje.1 \ starpu_workers_activity.1 \ starpu_codelet_profile.1 \ starpu_codelet_histo_profile.1 \ starpu_env.1 \ starpu_mpi_comm_matrix.1 \ starpu_fxt_number_events_to_names.1 \ starpu_paje_draw_histogram.1 \ starpu_paje_state_stats.1 \ starpu_config.1 if STARPU_USE_FXT dist_man1_MANS +=\ starpu_fxt_tool.1 \ starpu_fxt_stats.1 \ starpu_fxt_data_trace.1 endif if STARPU_USE_TCPIP_MASTER_SLAVE dist_man1_MANS +=\ starpu_tcpipexec.1 endif clean-local: $(RM) $(dist_man1_MANS) starpu_config.cfg endif if STARPU_SIMGRID dist_pkgdata_DATA += starpu_smpi.xslt dist_bin_SCRIPTS += starpu_smpirun endif if STARPU_USE_TCPIP_MASTER_SLAVE dist_bin_SCRIPTS += starpu_tcpipexec endif dist_pkgdata_DATA += starpu_config.cfg starpu_config.cfg: $(top_builddir)/src/common/config.h grep STARPU $< | grep '#' > $@ starpu-1.4.9+dfsg/tools/Makefile.in000066400000000000000000003610711507764646700172250ustar00rootroot00000000000000# Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@am__append_1 = --compiler-options -fno-strict-aliasing -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ $(STARPU_NVCC_H_CPPFLAGS) @STARPU_USE_HIP_TRUE@am__append_2 = -I$(top_builddir)/include -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/ noinst_PROGRAMS = $(am__EXEEXT_6) # Make tests run through mpiexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_3 = $(abs_top_srcdir)/tools/starpu_msexec @STARPU_USE_MPI_MASTER_SLAVE_TRUE@am__append_4 = $(MPI_RUN_ENV) STARPU_NMPIMSTHREADS=4 @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_5 = $(abs_top_srcdir)/tools/starpu_msexec # switch off local socket usage #MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 -nolocal @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_6 = STARPU_RESERVE_NCPU=2 @STARPU_HAVE_WINDOWS_FALSE@am__append_7 = loader bin_PROGRAMS = $(am__EXEEXT_1) starpu_perfmodel_display$(EXEEXT) \ starpu_perfmodel_plot$(EXEEXT) starpu_calibrate_bus$(EXEEXT) \ starpu_machine_display$(EXEEXT) starpu_sched_display$(EXEEXT) \ starpu_tasks_rec_complete$(EXEEXT) starpu_lp2paje$(EXEEXT) \ starpu_perfmodel_recdump$(EXEEXT) $(am__EXEEXT_2) TESTS = $(am__EXEEXT_5) check_PROGRAMS = $(am__EXEEXT_5) @STARPU_USE_FXT_TRUE@am__append_8 = \ @STARPU_USE_FXT_TRUE@ starpu_fxt_tool \ @STARPU_USE_FXT_TRUE@ starpu_fxt_stats \ @STARPU_USE_FXT_TRUE@ starpu_fxt_data_trace @STARPU_USE_FXT_TRUE@am__append_9 = \ @STARPU_USE_FXT_TRUE@ starpu_fxt_tool \ @STARPU_USE_FXT_TRUE@ starpu_fxt_stats \ @STARPU_USE_FXT_TRUE@ starpu_fxt_data_trace @STARPU_SIMGRID_TRUE@am__append_10 = \ @STARPU_SIMGRID_TRUE@ starpu_replay @STARPU_LONG_CHECK_TRUE@am__append_11 = \ @STARPU_LONG_CHECK_TRUE@ starpu_calibrate_bus @STARPU_HAVE_WINDOWS_FALSE@am__append_12 = \ @STARPU_HAVE_WINDOWS_FALSE@ starpu_perfmodel_display \ @STARPU_HAVE_WINDOWS_FALSE@ starpu_perfmodel_plot @STARPU_USE_AYUDAME2_TRUE@am__append_13 = \ @STARPU_USE_AYUDAME2_TRUE@ starpu_temanejo2.sh @STARPU_USE_AYUDAME2_TRUE@am__append_14 = \ @STARPU_USE_AYUDAME2_TRUE@ ayudame.cfg @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@am__append_15 = \ @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ starpu_fxt_tool.1 \ @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ starpu_fxt_stats.1 \ @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ starpu_fxt_data_trace.1 @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_16 = \ @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ starpu_tcpipexec.1 @STARPU_SIMGRID_TRUE@am__append_17 = starpu_smpi.xslt @STARPU_SIMGRID_TRUE@am__append_18 = starpu_smpirun @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@am__append_19 = starpu_tcpipexec subdir = tools ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/acinclude.m4 \ $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \ $(top_srcdir)/m4/ax_dlb_callback_arg.m4 \ $(top_srcdir)/m4/libs.m4 $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__dist_bin_SCRIPTS_DIST) \ $(am__dist_pkgdata_DATA_DIST) \ $(dist_pkgdata_perfmodels_sampling_bus_DATA) \ $(dist_pkgdata_perfmodels_sampling_codelets_DATA) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/common/config.h \ $(top_builddir)/src/common/config-src-build.h \ $(top_builddir)/include/starpu_config.h \ $(top_builddir)/starpurm/include/starpurm_config.h CONFIG_CLEAN_FILES = starpu_env starpu_codelet_profile \ starpu_codelet_histo_profile starpu_mpi_comm_matrix.py \ starpu_fxt_number_events_to_names.py starpu_workers_activity \ starpu_paje_draw_histogram starpu_paje_state_stats \ starpu_paje_summary starpu_config starpu_mlr_analysis \ starpu_paje_sort starpu_smpirun starpu_tcpipexec CONFIG_CLEAN_VPATH_FILES = @STARPU_USE_FXT_TRUE@am__EXEEXT_1 = starpu_fxt_tool$(EXEEXT) \ @STARPU_USE_FXT_TRUE@ starpu_fxt_stats$(EXEEXT) \ @STARPU_USE_FXT_TRUE@ starpu_fxt_data_trace$(EXEEXT) @STARPU_SIMGRID_TRUE@am__EXEEXT_2 = starpu_replay$(EXEEXT) am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" \ "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(pkgdatadir)" \ "$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)" \ "$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)" \ "$(DESTDIR)$(STARPU_MSVC_dir)" @STARPU_LONG_CHECK_TRUE@am__EXEEXT_3 = starpu_calibrate_bus$(EXEEXT) @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_4 = \ @STARPU_HAVE_WINDOWS_FALSE@ starpu_perfmodel_display$(EXEEXT) \ @STARPU_HAVE_WINDOWS_FALSE@ starpu_perfmodel_plot$(EXEEXT) am__EXEEXT_5 = $(am__EXEEXT_1) $(am__EXEEXT_3) \ starpu_machine_display$(EXEEXT) starpu_sched_display$(EXEEXT) \ $(am__EXEEXT_4) @STARPU_HAVE_WINDOWS_FALSE@am__EXEEXT_6 = loader$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) loader_SOURCES = loader.c loader_OBJECTS = loader-loader.$(OBJEXT) loader_LDADD = $(LDADD) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = starpu_calibrate_bus_SOURCES = starpu_calibrate_bus.c starpu_calibrate_bus_OBJECTS = starpu_calibrate_bus.$(OBJEXT) starpu_calibrate_bus_LDADD = $(LDADD) starpu_fxt_data_trace_SOURCES = starpu_fxt_data_trace.c starpu_fxt_data_trace_OBJECTS = \ starpu_fxt_data_trace-starpu_fxt_data_trace.$(OBJEXT) am__DEPENDENCIES_1 = @STARPU_USE_FXT_TRUE@starpu_fxt_data_trace_DEPENDENCIES = \ @STARPU_USE_FXT_TRUE@ $(am__DEPENDENCIES_1) starpu_fxt_data_trace_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(AM_CFLAGS) $(CFLAGS) $(starpu_fxt_data_trace_LDFLAGS) \ $(LDFLAGS) -o $@ starpu_fxt_stats_SOURCES = starpu_fxt_stats.c starpu_fxt_stats_OBJECTS = \ starpu_fxt_stats-starpu_fxt_stats.$(OBJEXT) @STARPU_USE_FXT_TRUE@starpu_fxt_stats_DEPENDENCIES = \ @STARPU_USE_FXT_TRUE@ $(am__DEPENDENCIES_1) starpu_fxt_stats_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(AM_CFLAGS) $(CFLAGS) $(starpu_fxt_stats_LDFLAGS) $(LDFLAGS) \ -o $@ starpu_fxt_tool_SOURCES = starpu_fxt_tool.c starpu_fxt_tool_OBJECTS = starpu_fxt_tool-starpu_fxt_tool.$(OBJEXT) @STARPU_USE_FXT_TRUE@starpu_fxt_tool_DEPENDENCIES = \ @STARPU_USE_FXT_TRUE@ $(am__DEPENDENCIES_1) starpu_fxt_tool_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ $(AM_CFLAGS) $(CFLAGS) $(starpu_fxt_tool_LDFLAGS) $(LDFLAGS) \ -o $@ starpu_lp2paje_SOURCES = starpu_lp2paje.c starpu_lp2paje_OBJECTS = starpu_lp2paje.$(OBJEXT) starpu_lp2paje_LDADD = $(LDADD) starpu_machine_display_SOURCES = starpu_machine_display.c starpu_machine_display_OBJECTS = starpu_machine_display.$(OBJEXT) starpu_machine_display_LDADD = $(LDADD) starpu_perfmodel_display_SOURCES = starpu_perfmodel_display.c starpu_perfmodel_display_OBJECTS = starpu_perfmodel_display.$(OBJEXT) starpu_perfmodel_display_LDADD = $(LDADD) starpu_perfmodel_plot_SOURCES = starpu_perfmodel_plot.c starpu_perfmodel_plot_OBJECTS = \ starpu_perfmodel_plot-starpu_perfmodel_plot.$(OBJEXT) starpu_perfmodel_plot_LDADD = $(LDADD) starpu_perfmodel_recdump_SOURCES = starpu_perfmodel_recdump.c starpu_perfmodel_recdump_OBJECTS = starpu_perfmodel_recdump.$(OBJEXT) starpu_perfmodel_recdump_LDADD = $(LDADD) am__starpu_replay_SOURCES_DIST = starpu_replay.c starpu_replay_sched.c @STARPU_SIMGRID_TRUE@am_starpu_replay_OBJECTS = \ @STARPU_SIMGRID_TRUE@ starpu_replay.$(OBJEXT) \ @STARPU_SIMGRID_TRUE@ starpu_replay_sched.$(OBJEXT) starpu_replay_OBJECTS = $(am_starpu_replay_OBJECTS) starpu_replay_LDADD = $(LDADD) starpu_sched_display_SOURCES = starpu_sched_display.c starpu_sched_display_OBJECTS = starpu_sched_display.$(OBJEXT) starpu_sched_display_LDADD = $(LDADD) starpu_tasks_rec_complete_SOURCES = starpu_tasks_rec_complete.c starpu_tasks_rec_complete_OBJECTS = \ starpu_tasks_rec_complete.$(OBJEXT) starpu_tasks_rec_complete_LDADD = $(LDADD) am__dist_bin_SCRIPTS_DIST = starpu_workers_activity \ starpu_codelet_histo_profile starpu_codelet_profile starpu_env \ starpu_config starpu_mpi_comm_matrix.py \ starpu_fxt_number_events_to_names.py \ starpu_paje_draw_histogram starpu_paje_draw_histogram.R \ starpu_paje_summary starpu_paje_summary.Rmd \ starpu_mlr_analysis starpu_mlr_analysis.Rmd \ starpu_paje_state_stats starpu_paje_state_stats.R \ starpu_send_recv_data_use.py starpu_trace_state_stats.py \ starpu_temanejo2.sh starpu_smpirun starpu_tcpipexec am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } SCRIPTS = $(dist_bin_SCRIPTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src/common -I$(top_builddir)/include -I$(top_builddir)/starpurm/include depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = ./$(DEPDIR)/loader-loader.Po \ ./$(DEPDIR)/starpu_calibrate_bus.Po \ ./$(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po \ ./$(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po \ ./$(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po \ ./$(DEPDIR)/starpu_lp2paje.Po \ ./$(DEPDIR)/starpu_machine_display.Po \ ./$(DEPDIR)/starpu_perfmodel_display.Po \ ./$(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po \ ./$(DEPDIR)/starpu_perfmodel_recdump.Po \ ./$(DEPDIR)/starpu_replay.Po \ ./$(DEPDIR)/starpu_replay_sched.Po \ ./$(DEPDIR)/starpu_sched_display.Po \ ./$(DEPDIR)/starpu_tasks_rec_complete.Po am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ $(AM_CFLAGS) $(CFLAGS) AM_V_CC = $(am__v_CC_@AM_V@) am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = CCLD = $(CC) LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = loader.c starpu_calibrate_bus.c starpu_fxt_data_trace.c \ starpu_fxt_stats.c starpu_fxt_tool.c starpu_lp2paje.c \ starpu_machine_display.c starpu_perfmodel_display.c \ starpu_perfmodel_plot.c starpu_perfmodel_recdump.c \ $(starpu_replay_SOURCES) starpu_sched_display.c \ starpu_tasks_rec_complete.c DIST_SOURCES = loader.c starpu_calibrate_bus.c starpu_fxt_data_trace.c \ starpu_fxt_stats.c starpu_fxt_tool.c starpu_lp2paje.c \ starpu_machine_display.c starpu_perfmodel_display.c \ starpu_perfmodel_plot.c starpu_perfmodel_recdump.c \ $(am__starpu_replay_SOURCES_DIST) starpu_sched_display.c \ starpu_tasks_rec_complete.c RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac man1dir = $(mandir)/man1 NROFF = nroff MANS = $(dist_man1_MANS) am__dist_pkgdata_DATA_DIST = gdbinit ayudame.cfg starpu_smpi.xslt \ starpu_config.cfg DATA = $(dist_pkgdata_DATA) \ $(dist_pkgdata_perfmodels_sampling_bus_DATA) \ $(dist_pkgdata_perfmodels_sampling_codelets_DATA) \ $(nobase_STARPU_MSVC__DATA) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ check recheck distdir distdir-am am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` am__tty_colors_dummy = \ mgn= red= grn= lgn= blu= brg= std=; \ am__color_tests=no am__tty_colors = { \ $(am__tty_colors_dummy); \ if test "X$(AM_COLOR_TESTS)" = Xno; then \ am__color_tests=no; \ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ am__color_tests=yes; \ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ am__color_tests=yes; \ fi; \ if test $$am__color_tests = yes; then \ red=''; \ grn=''; \ lgn=''; \ blu=''; \ mgn=''; \ brg=''; \ std=''; \ fi; \ } am__recheck_rx = ^[ ]*:recheck:[ ]* am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* # A command that, given a newline-separated list of test names on the # standard input, print the name of the tests that are to be re-run # upon "make recheck". am__list_recheck_tests = $(AWK) '{ \ recheck = 1; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ { \ if ((getline line2 < ($$0 ".log")) < 0) \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ { \ recheck = 0; \ break; \ } \ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ { \ break; \ } \ }; \ if (recheck) \ print $$0; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # A command that, given a newline-separated list of test names on the # standard input, create the global log from their .trs and .log files. am__create_global_log = $(AWK) ' \ function fatal(msg) \ { \ print "fatal: making $@: " msg | "cat >&2"; \ exit 1; \ } \ function rst_section(header) \ { \ print header; \ len = length(header); \ for (i = 1; i <= len; i = i + 1) \ printf "="; \ printf "\n\n"; \ } \ { \ copy_in_global_log = 1; \ global_test_result = "RUN"; \ while ((rc = (getline line < ($$0 ".trs"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".trs"); \ if (line ~ /$(am__global_test_result_rx)/) \ { \ sub("$(am__global_test_result_rx)", "", line); \ sub("[ ]*$$", "", line); \ global_test_result = line; \ } \ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ copy_in_global_log = 0; \ }; \ if (copy_in_global_log) \ { \ rst_section(global_test_result ": " $$0); \ while ((rc = (getline line < ($$0 ".log"))) != 0) \ { \ if (rc < 0) \ fatal("failed to read from " $$0 ".log"); \ print line; \ }; \ printf "\n"; \ }; \ close ($$0 ".trs"); \ close ($$0 ".log"); \ }' # Restructured Text title. am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } # Solaris 10 'make', and several other traditional 'make' implementations, # pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it # by disabling -e (using the XSI extension "set +e") if it's set. am__sh_e_setup = case $$- in *e*) set +e;; esac # Default flags passed to test drivers. am__common_driver_flags = \ --color-tests "$$am__color_tests" \ --enable-hard-errors "$$am__enable_hard_errors" \ --expect-failure "$$am__expect_failure" # To be inserted before the command running the test. Creates the # directory for the log if needed. Stores in $dir the directory # containing $f, in $tst the test, in $log the log. Executes the # developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and # passes TESTS_ENVIRONMENT. Set up options for the wrapper that # will run the test scripts (or their associated LOG_COMPILER, if # thy have one). am__check_pre = \ $(am__sh_e_setup); \ $(am__vpath_adj_setup) $(am__vpath_adj) \ $(am__tty_colors); \ srcdir=$(srcdir); export srcdir; \ case "$@" in \ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ *) am__odir=.;; \ esac; \ test "x$$am__odir" = x"." || test -d "$$am__odir" \ || $(MKDIR_P) "$$am__odir" || exit $$?; \ if test -f "./$$f"; then dir=./; \ elif test -f "$$f"; then dir=; \ else dir="$(srcdir)/"; fi; \ tst=$$dir$$f; log='$@'; \ if test -n '$(DISABLE_HARD_ERRORS)'; then \ am__enable_hard_errors=no; \ else \ am__enable_hard_errors=yes; \ fi; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ am__expect_failure=yes;; \ *) \ am__expect_failure=no;; \ esac; \ $(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) # A shell command to get the names of the tests scripts with any registered # extension removed (i.e., equivalently, the names of the test logs, with # the '.log' extension removed). The result is saved in the shell variable # '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, # we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", # since that might cause problem with VPATH rewrites for suffix-less tests. # See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. am__set_TESTS_bases = \ bases='$(TEST_LOGS)'; \ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ bases=`echo $$bases` AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' RECHECK_LOGS = $(TEST_LOGS) TEST_SUITE_LOG = test-suite.log TEST_EXTENSIONS = @EXEEXT@ .test LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) am__set_b = \ case '$@' in \ */*) \ case '$*' in \ */*) b='$*';; \ *) b=`echo '$@' | sed 's/\.log$$//'`; \ esac;; \ *) \ b='$*';; \ esac am__test_logs1 = $(TESTS:=.log) am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) TEST_LOGS = $(am__test_logs2:.test.log=.log) TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build-aux/test-driver TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ $(TEST_LOG_FLAGS) DIST_SUBDIRS = $(SUBDIRS) am__DIST_COMMON = $(dist_man1_MANS) $(srcdir)/Makefile.in \ $(srcdir)/starpu_codelet_histo_profile.in \ $(srcdir)/starpu_codelet_profile.in $(srcdir)/starpu_config.in \ $(srcdir)/starpu_env.in \ $(srcdir)/starpu_fxt_number_events_to_names.py.in \ $(srcdir)/starpu_mlr_analysis.in \ $(srcdir)/starpu_mpi_comm_matrix.py.in \ $(srcdir)/starpu_paje_draw_histogram.in \ $(srcdir)/starpu_paje_sort.in \ $(srcdir)/starpu_paje_state_stats.in \ $(srcdir)/starpu_paje_summary.in $(srcdir)/starpu_smpirun.in \ $(srcdir)/starpu_tcpipexec.in \ $(srcdir)/starpu_workers_activity.in \ $(top_srcdir)/build-aux/depcomp \ $(top_srcdir)/build-aux/test-driver \ $(top_srcdir)/make/starpu-loader.mk \ $(top_srcdir)/make/starpu-tests.mk \ $(top_srcdir)/make/starpu.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" pkglibdir = @pkglibdir@ ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ APP_CFLAGS = @APP_CFLAGS@ APP_CXXFLAGS = @APP_CXXFLAGS@ APP_FCFLAGS = @APP_FCFLAGS@ APP_FFLAGS = @APP_FFLAGS@ AR = @AR@ AS = @AS@ ATLASDIR = @ATLASDIR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ BLAS_LIB = @BLAS_LIB@ BLAS_LIBS = @BLAS_LIBS@ BLAS_OPENBLAS_CFLAGS = @BLAS_OPENBLAS_CFLAGS@ BLAS_OPENBLAS_LIBS = @BLAS_OPENBLAS_LIBS@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CC_OR_MPICC = @CC_OR_MPICC@ CC_OR_NVCC = @CC_OR_NVCC@ CFLAGS = @CFLAGS@ COVERAGE = @COVERAGE@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DGELS_LIBS = @DGELS_LIBS@ DLB_CFLAGS = @DLB_CFLAGS@ DLB_LIBS = @DLB_LIBS@ DLLTOOL = @DLLTOOL@ DOC_GENERATE_LATEX = @DOC_GENERATE_LATEX@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ ECLIPSE = @ECLIPSE@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ F77 = @F77@ FC = @FC@ FCFLAGS = @FCFLAGS@ FFLAGS = @FFLAGS@ FFTWF_CFLAGS = @FFTWF_CFLAGS@ FFTWF_LIBS = @FFTWF_LIBS@ FFTWL_CFLAGS = @FFTWL_CFLAGS@ FFTWL_LIBS = @FFTWL_LIBS@ FFTW_CFLAGS = @FFTW_CFLAGS@ FFTW_LIBS = @FFTW_LIBS@ FGREP = @FGREP@ FILECMD = @FILECMD@ FXTDIR = @FXTDIR@ FXT_CFLAGS = @FXT_CFLAGS@ FXT_LDFLAGS = @FXT_LDFLAGS@ FXT_LIBS = @FXT_LIBS@ GDB = @GDB@ GLOBAL_AM_CFLAGS = @GLOBAL_AM_CFLAGS@ GLOBAL_AM_CXXFLAGS = @GLOBAL_AM_CXXFLAGS@ GLOBAL_AM_FCFLAGS = @GLOBAL_AM_FCFLAGS@ GLOBAL_AM_FFLAGS = @GLOBAL_AM_FFLAGS@ GOTODIR = @GOTODIR@ GREP = @GREP@ HAVE_CXX11 = @HAVE_CXX11@ HAVE_FFTWFL = @HAVE_FFTWFL@ HELP2MAN = @HELP2MAN@ HIPCC = @HIPCC@ HIPCCFLAGS = @HIPCCFLAGS@ $(am__append_2) HIPCONFIG = @HIPCONFIG@ HWLOC_CFLAGS = @HWLOC_CFLAGS@ HWLOC_LIBS = @HWLOC_LIBS@ HWLOC_REQUIRES = @HWLOC_REQUIRES@ ICC = @ICC@ ICC_ARGS = @ICC_ARGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ JULIA = @JULIA@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ $(top_builddir)/src/@LIBSTARPU_LINK@ \ $(STARPU_EXPORTED_LIBS) LIBSOCL_INTERFACE_AGE = @LIBSOCL_INTERFACE_AGE@ LIBSOCL_INTERFACE_CURRENT = @LIBSOCL_INTERFACE_CURRENT@ LIBSOCL_INTERFACE_REVISION = @LIBSOCL_INTERFACE_REVISION@ LIBSTARPUFFT_INTERFACE_AGE = @LIBSTARPUFFT_INTERFACE_AGE@ LIBSTARPUFFT_INTERFACE_CURRENT = @LIBSTARPUFFT_INTERFACE_CURRENT@ LIBSTARPUFFT_INTERFACE_REVISION = @LIBSTARPUFFT_INTERFACE_REVISION@ LIBSTARPUJULIA_INTERFACE_AGE = @LIBSTARPUJULIA_INTERFACE_AGE@ LIBSTARPUJULIA_INTERFACE_CURRENT = @LIBSTARPUJULIA_INTERFACE_CURRENT@ LIBSTARPUJULIA_INTERFACE_REVISION = @LIBSTARPUJULIA_INTERFACE_REVISION@ LIBSTARPUMPI_INTERFACE_AGE = @LIBSTARPUMPI_INTERFACE_AGE@ LIBSTARPUMPI_INTERFACE_CURRENT = @LIBSTARPUMPI_INTERFACE_CURRENT@ LIBSTARPUMPI_INTERFACE_REVISION = @LIBSTARPUMPI_INTERFACE_REVISION@ LIBSTARPURM_INTERFACE_AGE = @LIBSTARPURM_INTERFACE_AGE@ LIBSTARPURM_INTERFACE_CURRENT = @LIBSTARPURM_INTERFACE_CURRENT@ LIBSTARPURM_INTERFACE_REVISION = @LIBSTARPURM_INTERFACE_REVISION@ LIBSTARPU_INTERFACE_AGE = @LIBSTARPU_INTERFACE_AGE@ LIBSTARPU_INTERFACE_CURRENT = @LIBSTARPU_INTERFACE_CURRENT@ LIBSTARPU_INTERFACE_REVISION = @LIBSTARPU_INTERFACE_REVISION@ LIBSTARPU_LDFLAGS = @LIBSTARPU_LDFLAGS@ LIBSTARPU_LINK = @LIBSTARPU_LINK@ LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE = @LIBSTARPU_OPENMP_LLVM_INTERFACE_AGE@ LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT = @LIBSTARPU_OPENMP_LLVM_INTERFACE_CURRENT@ LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION = @LIBSTARPU_OPENMP_LLVM_INTERFACE_REVISION@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAGMA_CFLAGS = @MAGMA_CFLAGS@ MAGMA_LIBS = @MAGMA_LIBS@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ MPICC = @MPICC@ MPICC_LDFLAGS = @MPICC_LDFLAGS@ MPICXX = @MPICXX@ MPIEXEC = @MPIEXEC@ MPIEXEC_ARGS = @MPIEXEC_ARGS@ MPIFORT = @MPIFORT@ MPI_SYNC_CLOCKS_CFLAGS = @MPI_SYNC_CLOCKS_CFLAGS@ MPI_SYNC_CLOCKS_LIBS = @MPI_SYNC_CLOCKS_LIBS@ NM = @NM@ NMAD_CFLAGS = @NMAD_CFLAGS@ NMAD_LIBS = @NMAD_LIBS@ NMEDIT = @NMEDIT@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ $(am__append_1) NVCC_CC = @NVCC_CC@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENBLAS_CFLAGS = @OPENBLAS_CFLAGS@ OPENBLAS_LIBS = @OPENBLAS_LIBS@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PAPI_CFLAGS = @PAPI_CFLAGS@ PAPI_LIBS = @PAPI_LIBS@ PARALLEL = @PARALLEL@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ POTI_CFLAGS = @POTI_CFLAGS@ POTI_LIBS = @POTI_LIBS@ PROG_CLANG = @PROG_CLANG@ PROG_DATE = @PROG_DATE@ PROG_FIND = @PROG_FIND@ PROG_STAT = @PROG_STAT@ PYTHON = @PYTHON@ PYTHON_NUMPY_DIR = @PYTHON_NUMPY_DIR@ PYTHON_SETUP_OPTIONS = @PYTHON_SETUP_OPTIONS@ PYTHON_VERSION = @PYTHON_VERSION@ RANLIB = @RANLIB@ REALBASH = @REALBASH@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMGRID_CFLAGS = @SIMGRID_CFLAGS@ SIMGRID_LDFLAGS = @SIMGRID_LDFLAGS@ SIMGRID_LIBS = @SIMGRID_LIBS@ SIMGRID_MC = @SIMGRID_MC@ SLIC_CONFIG = @SLIC_CONFIG@ SOCL_OCL_LIB_OPENCL_DIR = @SOCL_OCL_LIB_OPENCL_DIR@ SOCL_VENDORS = @SOCL_VENDORS@ STARPUPY_EXTRA_LINK_ARGS = @STARPUPY_EXTRA_LINK_ARGS@ STARPU_BLAS_LDFLAGS = @STARPU_BLAS_LDFLAGS@ STARPU_BUILD_DIR = @STARPU_BUILD_DIR@ STARPU_CUDA_CPPFLAGS = @STARPU_CUDA_CPPFLAGS@ STARPU_CUDA_FORTRAN_LDFLAGS = @STARPU_CUDA_FORTRAN_LDFLAGS@ STARPU_CUDA_LDFLAGS = @STARPU_CUDA_LDFLAGS@ STARPU_CUFFT_LDFLAGS = @STARPU_CUFFT_LDFLAGS@ STARPU_CURAND_LDFLAGS = @STARPU_CURAND_LDFLAGS@ STARPU_EFFECTIVE_VERSION = @STARPU_EFFECTIVE_VERSION@ STARPU_EXPORTED_LIBS = @STARPU_EXPORTED_LIBS@ STARPU_EXPORT_DYNAMIC = @STARPU_EXPORT_DYNAMIC@ STARPU_FXT_EVENT_DEFINES = @STARPU_FXT_EVENT_DEFINES@ STARPU_GLPK_LDFLAGS = @STARPU_GLPK_LDFLAGS@ STARPU_HAVE_CXX11 = @STARPU_HAVE_CXX11@ STARPU_HAVE_F77_H = @STARPU_HAVE_F77_H@ STARPU_HAVE_FFTW = @STARPU_HAVE_FFTW@ STARPU_HAVE_FFTWF = @STARPU_HAVE_FFTWF@ STARPU_HAVE_HWLOC = @STARPU_HAVE_HWLOC@ STARPU_HAVE_MAGMA = @STARPU_HAVE_MAGMA@ STARPU_HDF5_LDFLAGS = @STARPU_HDF5_LDFLAGS@ STARPU_HIP_CPPFLAGS = @STARPU_HIP_CPPFLAGS@ STARPU_HIP_LDFLAGS = @STARPU_HIP_LDFLAGS@ STARPU_H_CPPFLAGS = @STARPU_H_CPPFLAGS@ STARPU_INCLUDE_PATH = @STARPU_INCLUDE_PATH@ STARPU_LAPACK_LDFLAGS = @STARPU_LAPACK_LDFLAGS@ STARPU_LEVELDB_LDFLAGS = @STARPU_LEVELDB_LDFLAGS@ STARPU_LIBLAPACK_LDFLAGS = @STARPU_LIBLAPACK_LDFLAGS@ STARPU_LIBNUMA_LDFLAGS = @STARPU_LIBNUMA_LDFLAGS@ STARPU_LIB_PATH = @STARPU_LIB_PATH@ STARPU_MAJOR_VERSION = @STARPU_MAJOR_VERSION@ STARPU_MINOR_VERSION = @STARPU_MINOR_VERSION@ STARPU_MODULE_LIBS = @STARPU_MODULE_LIBS@ STARPU_MS_LIB = @STARPU_MS_LIB@ STARPU_MS_LIB_ARCH = @STARPU_MS_LIB_ARCH@ STARPU_NVCC_H_CPPFLAGS = @STARPU_NVCC_H_CPPFLAGS@ STARPU_OPENBLAS = @STARPU_OPENBLAS@ STARPU_OPENBLAS_LDFLAGS = @STARPU_OPENBLAS_LDFLAGS@ STARPU_OPENCL_CPPFLAGS = @STARPU_OPENCL_CPPFLAGS@ STARPU_OPENCL_DATAdir = @STARPU_OPENCL_DATAdir@ STARPU_OPENCL_LDFLAGS = @STARPU_OPENCL_LDFLAGS@ STARPU_OPENGL_RENDER = @STARPU_OPENGL_RENDER@ STARPU_OPENGL_RENDER_LDFLAGS = @STARPU_OPENGL_RENDER_LDFLAGS@ STARPU_OPTION_LIBS = @STARPU_OPTION_LIBS@ STARPU_PERF_DEBUG = @STARPU_PERF_DEBUG@ STARPU_RELEASE_VERSION = @STARPU_RELEASE_VERSION@ STARPU_SC_HYPERVISOR = @STARPU_SC_HYPERVISOR@ STARPU_SC_HYPERVISOR_DEBUG = @STARPU_SC_HYPERVISOR_DEBUG@ STARPU_SRC_DIR = @STARPU_SRC_DIR@ STARPU_USE_CPU = @STARPU_USE_CPU@ STARPU_USE_CUDA = @STARPU_USE_CUDA@ STARPU_USE_FXT = @STARPU_USE_FXT@ STARPU_USE_HIPBLAS = @STARPU_USE_HIPBLAS@ STARPU_USE_MAX_FPGA = @STARPU_USE_MAX_FPGA@ STARPU_USE_OPENCL = @STARPU_USE_OPENCL@ STRIP = @STRIP@ VERSION = @VERSION@ XMKMF = @XMKMF@ X_CFLAGS = @X_CFLAGS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBS = @X_LIBS@ X_PRE_LIBS = @X_PRE_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_ct_F77 = @ac_ct_F77@ ac_ct_FC = @ac_ct_FC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ doxygencommand = @doxygencommand@ dvidir = @dvidir@ eclipsepath = @eclipsepath@ epstopdfcommand = @epstopdfcommand@ exec_prefix = @exec_prefix@ gitcommand = @gitcommand@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ hwloccalccommand = @hwloccalccommand@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ juliapath = @juliapath@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpicc_path = @mpicc_path@ mpicxx_path = @mpicxx_path@ mpiexec_path = @mpiexec_path@ mpifort_path = @mpifort_path@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ pdflatexcommand = @pdflatexcommand@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target = @target@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ LAUNCHER_ENV = $(am__append_4) $(am__append_6) LAUNCHER = $(am__append_3) $(am__append_5) AM_CFLAGS = $(GLOBAL_AM_CFLAGS) AM_CXXFLAGS = $(GLOBAL_AM_CXXFLAGS) AM_FFLAGS = $(GLOBAL_AM_FFLAGS) AM_FCFLAGS = $(GLOBAL_AM_FCFLAGS) @STARPU_USE_CUDA_TRUE@V_nvcc_ = $(V_nvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_CUDA_TRUE@V_nvcc_0 = @echo " NVCC " $@; @STARPU_USE_CUDA_TRUE@V_nvcc_1 = @STARPU_USE_CUDA_TRUE@V_nvcc = $(V_nvcc_$(V)) # Avoid using nvcc when making a coverity build, nvcc produces millions of # lines of code which we don't want to analyze. Instead, build dumb .o files # containing empty functions. @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_ = $(V_mynvcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_0 = @echo " myNVCC " $@; @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc_1 = @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@V_mynvcc = $(V_mynvcc_$(V)) @STARPU_USE_HIP_TRUE@V_hipcc_ = $(V_hipcc_$(AM_DEFAULT_VERBOSITY)) @STARPU_USE_HIP_TRUE@V_hipcc_0 = @echo " HIPCC " $@; @STARPU_USE_HIP_TRUE@V_hipcc_1 = @STARPU_USE_HIP_TRUE@V_hipcc = $(V_hipcc_$(V)) V_icc_ = $(V_icc_$(AM_DEFAULT_VERBOSITY)) V_icc_0 = @echo " ICC " $@; V_icc_1 = V_icc = $(V_icc_$(V)) V_ln_ = $(V_ln_$(AM_DEFAULT_VERBOSITY)) V_ln_0 = @echo " LN " $@; V_ln_1 = V_ln = $(V_ln_$(V)) V_help2man_ = $(V_help2man_$(AM_DEFAULT_VERBOSITY)) V_help2man_0 = @echo " HELP2MAN" $@; V_help2man_1 = V_help2man = $(V_help2man_$(V)) # These are always defined, both for starpu-mpi and for mpi-ms # For MPI tests we don't want to oversubscribe the system MPI_RUN_ENV = STARPU_WORKERS_GETBIND=0 STARPU_WORKERS_NOBIND=1 STARPU_NCPU=3 @STARPU_SIMGRID_FALSE@STARPU_MPIEXEC = $(MPIEXEC) $(MPIEXEC_ARGS) -np $(STARPU_MPI_NP) @STARPU_SIMGRID_TRUE@STARPU_MPIEXEC = $(abs_top_builddir)/tools/starpu_smpirun -np $(STARPU_MPI_NP) -platform $(abs_top_srcdir)/tools/perfmodels/cluster.xml -hostfile $(abs_top_srcdir)/tools/perfmodels/hostfile # When GNU parallel is available and -j is passed to make, run tests through # parallel, using a "starpu" semaphore. # Also make test shell scripts run its tests through parallel, using a # "substarpu" semaphore. This brings some overload, but only one level. @HAVE_PARALLEL_TRUE@STARPU_SUB_PARALLEL = $(shell echo $(MAKEFLAGS) | sed -ne 's/.*-j\([0-9]\+\).*/parallel --semaphore --id substarpu --fg --fg-exit -j \1/p') @STARPU_USE_MPI_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(STARPU_MPIEXEC) @STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@MS_LAUNCHER = $(abs_top_builddir)/tools/starpu_tcpipexec -np 2 -nobind -ncpus 1 @STARPU_HAVE_WINDOWS_FALSE@LOADER_BIN = $(LAUNCHER) $(LOADER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_TRUE@LOADER_BIN = $(LAUNCHER) $(EXTERNAL) @STARPU_HAVE_WINDOWS_FALSE@loader_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_builddir)/src/ @STARPU_HAVE_AM111_FALSE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN) @STARPU_HAVE_AM111_TRUE@TESTS_ENVIRONMENT = $(LAUNCHER_ENV) top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" @STARPU_HAVE_AM111_TRUE@LOG_COMPILER = $(LOADER_BIN) AM_TESTS_FD_REDIRECT = 9>&2 # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2013-2013 Thibaut Lambert # Copyright (C) 2013-2013 Joris Pablo # Copyright (C) 2017-2017 Erwan Leria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # SUBDIRS = AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_builddir)/src -I$(top_srcdir)/src $(STARPU_H_CPPFLAGS) AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ dist_bin_SCRIPTS = starpu_workers_activity \ starpu_codelet_histo_profile starpu_codelet_profile starpu_env \ starpu_config starpu_mpi_comm_matrix.py \ starpu_fxt_number_events_to_names.py \ starpu_paje_draw_histogram starpu_paje_draw_histogram.R \ starpu_paje_summary starpu_paje_summary.Rmd \ starpu_mlr_analysis starpu_mlr_analysis.Rmd \ starpu_paje_state_stats starpu_paje_state_stats.R \ starpu_send_recv_data_use.py starpu_trace_state_stats.py \ $(am__append_13) $(am__append_18) $(am__append_19) dist_pkgdata_DATA = gdbinit $(am__append_14) $(am__append_17) \ starpu_config.cfg pkgdata_perfmodels_sampling_busdir = $(datarootdir)/starpu/perfmodels/sampling/bus pkgdata_perfmodels_sampling_codeletsdir = $(datarootdir)/starpu/perfmodels/sampling/codelets/45 dist_pkgdata_perfmodels_sampling_bus_DATA = \ perfmodels/sampling/bus/attila.affinity \ perfmodels/sampling/bus/attila.bandwidth \ perfmodels/sampling/bus/attila.config \ perfmodels/sampling/bus/attila.latency \ perfmodels/sampling/bus/attila.platform.xml \ perfmodels/sampling/bus/attila.platform.v4.xml \ perfmodels/sampling/bus/hannibal.affinity \ perfmodels/sampling/bus/hannibal.bandwidth \ perfmodels/sampling/bus/hannibal.config \ perfmodels/sampling/bus/hannibal.latency \ perfmodels/sampling/bus/hannibal.platform.xml \ perfmodels/sampling/bus/hannibal.platform.v4.xml \ perfmodels/sampling/bus/hannibal-pitch.affinity \ perfmodels/sampling/bus/hannibal-pitch.bandwidth \ perfmodels/sampling/bus/hannibal-pitch.config \ perfmodels/sampling/bus/hannibal-pitch.latency \ perfmodels/sampling/bus/hannibal-pitch.platform.xml \ perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml \ perfmodels/sampling/bus/idgraf.affinity \ perfmodels/sampling/bus/idgraf.bandwidth \ perfmodels/sampling/bus/idgraf.config \ perfmodels/sampling/bus/idgraf.latency \ perfmodels/sampling/bus/idgraf.platform.xml \ perfmodels/sampling/bus/idgraf.platform.v4.xml \ perfmodels/sampling/bus/mirage.affinity \ perfmodels/sampling/bus/mirage.bandwidth \ perfmodels/sampling/bus/mirage.config \ perfmodels/sampling/bus/mirage.latency \ perfmodels/sampling/bus/mirage.platform.xml \ perfmodels/sampling/bus/mirage.platform.v4.xml \ perfmodels/sampling/bus/sirocco.affinity \ perfmodels/sampling/bus/sirocco.bandwidth \ perfmodels/sampling/bus/sirocco.config \ perfmodels/sampling/bus/sirocco.latency \ perfmodels/sampling/bus/sirocco.platform.xml \ perfmodels/sampling/bus/sirocco.platform.v4.xml dist_pkgdata_perfmodels_sampling_codelets_DATA = \ perfmodels/sampling/codelets/45/chol_model_potrf.attila \ perfmodels/sampling/codelets/45/chol_model_trsm.attila \ perfmodels/sampling/codelets/45/chol_model_syrk.attila \ perfmodels/sampling/codelets/45/chol_model_gemm.attila \ perfmodels/sampling/codelets/45/cl_update.attila \ perfmodels/sampling/codelets/45/save_cl_bottom.attila \ perfmodels/sampling/codelets/45/save_cl_top.attila \ perfmodels/sampling/codelets/45/starpu_sgemm_gemm.attila \ perfmodels/sampling/codelets/45/starpu_dgemm_gemm.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.attila \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.attila \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.attila \ perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila \ \ perfmodels/sampling/codelets/45/chol_model_potrf.hannibal \ perfmodels/sampling/codelets/45/chol_model_trsm.hannibal \ perfmodels/sampling/codelets/45/chol_model_syrk.hannibal \ perfmodels/sampling/codelets/45/chol_model_gemm.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal \ \ perfmodels/sampling/codelets/45/chol_model_potrf.hannibal-pitch \ perfmodels/sampling/codelets/45/chol_model_trsm.hannibal-pitch \ perfmodels/sampling/codelets/45/chol_model_syrk.hannibal-pitch \ perfmodels/sampling/codelets/45/chol_model_gemm.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.hannibal-pitch \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal-pitch \ \ perfmodels/sampling/codelets/45/chol_model_potrf.idgraf \ perfmodels/sampling/codelets/45/chol_model_trsm.idgraf \ perfmodels/sampling/codelets/45/chol_model_syrk.idgraf \ perfmodels/sampling/codelets/45/chol_model_gemm.idgraf \ perfmodels/sampling/codelets/45/cl_update.idgraf \ perfmodels/sampling/codelets/45/save_cl_bottom.idgraf \ perfmodels/sampling/codelets/45/save_cl_top.idgraf \ perfmodels/sampling/codelets/45/starpu_sgemm_gemm.idgraf \ perfmodels/sampling/codelets/45/starpu_dgemm_gemm.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.idgraf \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.idgraf \ \ perfmodels/sampling/codelets/45/chol_model_potrf.mirage \ perfmodels/sampling/codelets/45/chol_model_trsm.mirage \ perfmodels/sampling/codelets/45/chol_model_syrk.mirage \ perfmodels/sampling/codelets/45/chol_model_gemm.mirage \ perfmodels/sampling/codelets/45/cl_update.mirage \ perfmodels/sampling/codelets/45/save_cl_bottom.mirage \ perfmodels/sampling/codelets/45/save_cl_top.mirage \ perfmodels/sampling/codelets/45/starpu_sgemm_gemm.mirage \ perfmodels/sampling/codelets/45/starpu_dgemm_gemm.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.mirage \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.mirage \ perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage \ perfmodels/sampling/codelets/45/add_scal.mirage \ perfmodels/sampling/codelets/45/func.mirage \ perfmodels/sampling/codelets/45/log_arr.mirage \ perfmodels/sampling/codelets/45/log_list.mirage \ perfmodels/sampling/codelets/45/multi.mirage \ perfmodels/sampling/codelets/45/multi_2arr.mirage \ perfmodels/sampling/codelets/45/multi_list.mirage \ perfmodels/sampling/codelets/45/scal.mirage \ perfmodels/sampling/codelets/45/scal_arr.mirage \ perfmodels/sampling/codelets/45/sqrt.mirage \ \ perfmodels/sampling/codelets/45/chol_model_potrf.sirocco \ perfmodels/sampling/codelets/45/chol_model_trsm.sirocco \ perfmodels/sampling/codelets/45/chol_model_syrk.sirocco \ perfmodels/sampling/codelets/45/chol_model_gemm.sirocco \ perfmodels/sampling/codelets/45/cl_update.sirocco \ perfmodels/sampling/codelets/45/save_cl_bottom.sirocco \ perfmodels/sampling/codelets/45/save_cl_top.sirocco \ perfmodels/sampling/codelets/45/starpu_sgemm_gemm.sirocco \ perfmodels/sampling/codelets/45/starpu_dgemm_gemm.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.sirocco \ perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.sirocco \ perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco \ \ perfmodels/sampling/codelets/45/null.idgraf \ perfmodels/sampling/codelets/45/null.sirocco EXTRA_DIST = \ dev/checker/rename.sed \ dev/checker/rename.sh \ dev/cppcheck/suppressions.txt \ dev/valgrind/blas.suppr \ dev/valgrind/nvidia.suppr \ dev/valgrind/bash.suppr \ dev/valgrind/fxt.suppr \ dev/valgrind/glpk.suppr \ dev/valgrind/hdf5.suppr \ dev/valgrind/hwloc.suppr \ dev/valgrind/libc.suppr \ dev/valgrind/libgomp.suppr \ dev/valgrind/libnuma.suppr \ dev/valgrind/madmpi.suppr \ dev/valgrind/opencl.suppr \ dev/valgrind/openmpi.suppr \ dev/valgrind/openmp.suppr \ dev/valgrind/p11-kit.suppr \ dev/valgrind/padico.suppr \ dev/valgrind/papi.suppr \ dev/valgrind/pthread.suppr \ dev/valgrind/starpu.suppr \ dev/valgrind/starpu_pw.suppr \ dev/valgrind/starpupy.suppr \ dev/valgrind/valgrind.suppr \ dev/valgrind/valgrind.sh \ dev/valgrind/valgrind_xml.sh \ dev/valgrind/helgrind.sh \ dev/tsan/starpu.suppr \ dev/lsan/suppressions \ perfmodels/README \ perfmodels/cluster.xml \ perfmodels/hostfile \ perfmodels/sampling/codelets/tmp/mlr_init.out \ msvc/starpu_clean.bat \ msvc/starpu_open.bat \ msvc/starpu_exec.bat \ msvc/starpu_var.bat \ msvc/starpu.sln \ msvc/starpu/starpu.vcxproj \ release/Makefile \ release/README.md \ patch-ayudame \ perfs/bench_sgemm.sh \ perfs/error_model.gp \ perfs/error_model.sh \ distrib/distrib.r \ distrib/distrib.sh \ starpu_msexec CLEANFILES = *.gcno *.gcda *.linkinfo starpu_idle_microsec.log figure/* mlr_* ##################################### # What to install and what to check # ##################################### STARPU_TOOLS = $(am__append_9) $(am__append_11) starpu_machine_display \ starpu_sched_display $(am__append_12) @STARPU_USE_FXT_TRUE@starpu_fxt_tool_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) @STARPU_USE_FXT_TRUE@starpu_fxt_tool_LDADD = $(FXT_LIBS) @STARPU_USE_FXT_TRUE@starpu_fxt_tool_LDFLAGS = $(FXT_LDFLAGS) @STARPU_USE_FXT_TRUE@starpu_fxt_stats_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) @STARPU_USE_FXT_TRUE@starpu_fxt_stats_LDADD = $(FXT_LIBS) @STARPU_USE_FXT_TRUE@starpu_fxt_stats_LDFLAGS = $(FXT_LDFLAGS) @STARPU_USE_FXT_TRUE@starpu_fxt_data_trace_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) @STARPU_USE_FXT_TRUE@starpu_fxt_data_trace_LDADD = $(FXT_LIBS) @STARPU_USE_FXT_TRUE@starpu_fxt_data_trace_LDFLAGS = $(FXT_LDFLAGS) @STARPU_SIMGRID_TRUE@starpu_replay_SOURCES = \ @STARPU_SIMGRID_TRUE@ starpu_replay.c \ @STARPU_SIMGRID_TRUE@ starpu_replay_sched.c starpu_perfmodel_plot_CPPFLAGS = $(AM_CPPFLAGS) $(FXT_CFLAGS) @STARPU_HAVE_WINDOWS_TRUE@STARPU_MSVC_dir = $(bindir) @STARPU_HAVE_WINDOWS_TRUE@nobase_STARPU_MSVC__DATA = \ @STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu_clean.bat \ @STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu_open.bat \ @STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu_exec.bat \ @STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu_var.bat \ @STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu.sln \ @STARPU_HAVE_WINDOWS_TRUE@ msvc/starpu/starpu.vcxproj @STARPU_HAVE_HELP2MAN_TRUE@dist_man1_MANS = starpu_calibrate_bus.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_machine_display.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_perfmodel_display.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_perfmodel_plot.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_tasks_rec_complete.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_lp2paje.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_workers_activity.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_codelet_profile.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_codelet_histo_profile.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_env.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_mpi_comm_matrix.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_fxt_number_events_to_names.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_paje_draw_histogram.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_paje_state_stats.1 \ @STARPU_HAVE_HELP2MAN_TRUE@ starpu_config.1 $(am__append_15) \ @STARPU_HAVE_HELP2MAN_TRUE@ $(am__append_16) all: all-recursive .SUFFIXES: .SUFFIXES: .c .cu .cubin .hip .lo .log .o .obj .test .test$(EXEEXT) .trs $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign tools/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign tools/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; $(top_srcdir)/make/starpu-tests.mk $(top_srcdir)/make/starpu.mk $(top_srcdir)/make/starpu-loader.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): starpu_env: $(top_builddir)/config.status $(srcdir)/starpu_env.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_codelet_profile: $(top_builddir)/config.status $(srcdir)/starpu_codelet_profile.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_codelet_histo_profile: $(top_builddir)/config.status $(srcdir)/starpu_codelet_histo_profile.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_mpi_comm_matrix.py: $(top_builddir)/config.status $(srcdir)/starpu_mpi_comm_matrix.py.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_fxt_number_events_to_names.py: $(top_builddir)/config.status $(srcdir)/starpu_fxt_number_events_to_names.py.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_workers_activity: $(top_builddir)/config.status $(srcdir)/starpu_workers_activity.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_paje_draw_histogram: $(top_builddir)/config.status $(srcdir)/starpu_paje_draw_histogram.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_paje_state_stats: $(top_builddir)/config.status $(srcdir)/starpu_paje_state_stats.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_paje_summary: $(top_builddir)/config.status $(srcdir)/starpu_paje_summary.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_config: $(top_builddir)/config.status $(srcdir)/starpu_config.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_mlr_analysis: $(top_builddir)/config.status $(srcdir)/starpu_mlr_analysis.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_paje_sort: $(top_builddir)/config.status $(srcdir)/starpu_paje_sort.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_smpirun: $(top_builddir)/config.status $(srcdir)/starpu_smpirun.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ starpu_tcpipexec: $(top_builddir)/config.status $(srcdir)/starpu_tcpipexec.in cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ install-binPROGRAMS: $(bin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p \ || test -f $$p1 \ ; then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' \ -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ } \ ; done uninstall-binPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' \ `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(bindir)" && rm -f $$files clean-binPROGRAMS: @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list loader$(EXEEXT): $(loader_OBJECTS) $(loader_DEPENDENCIES) $(EXTRA_loader_DEPENDENCIES) @rm -f loader$(EXEEXT) $(AM_V_CCLD)$(LINK) $(loader_OBJECTS) $(loader_LDADD) $(LIBS) starpu_calibrate_bus$(EXEEXT): $(starpu_calibrate_bus_OBJECTS) $(starpu_calibrate_bus_DEPENDENCIES) $(EXTRA_starpu_calibrate_bus_DEPENDENCIES) @rm -f starpu_calibrate_bus$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_calibrate_bus_OBJECTS) $(starpu_calibrate_bus_LDADD) $(LIBS) starpu_fxt_data_trace$(EXEEXT): $(starpu_fxt_data_trace_OBJECTS) $(starpu_fxt_data_trace_DEPENDENCIES) $(EXTRA_starpu_fxt_data_trace_DEPENDENCIES) @rm -f starpu_fxt_data_trace$(EXEEXT) $(AM_V_CCLD)$(starpu_fxt_data_trace_LINK) $(starpu_fxt_data_trace_OBJECTS) $(starpu_fxt_data_trace_LDADD) $(LIBS) starpu_fxt_stats$(EXEEXT): $(starpu_fxt_stats_OBJECTS) $(starpu_fxt_stats_DEPENDENCIES) $(EXTRA_starpu_fxt_stats_DEPENDENCIES) @rm -f starpu_fxt_stats$(EXEEXT) $(AM_V_CCLD)$(starpu_fxt_stats_LINK) $(starpu_fxt_stats_OBJECTS) $(starpu_fxt_stats_LDADD) $(LIBS) starpu_fxt_tool$(EXEEXT): $(starpu_fxt_tool_OBJECTS) $(starpu_fxt_tool_DEPENDENCIES) $(EXTRA_starpu_fxt_tool_DEPENDENCIES) @rm -f starpu_fxt_tool$(EXEEXT) $(AM_V_CCLD)$(starpu_fxt_tool_LINK) $(starpu_fxt_tool_OBJECTS) $(starpu_fxt_tool_LDADD) $(LIBS) starpu_lp2paje$(EXEEXT): $(starpu_lp2paje_OBJECTS) $(starpu_lp2paje_DEPENDENCIES) $(EXTRA_starpu_lp2paje_DEPENDENCIES) @rm -f starpu_lp2paje$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_lp2paje_OBJECTS) $(starpu_lp2paje_LDADD) $(LIBS) starpu_machine_display$(EXEEXT): $(starpu_machine_display_OBJECTS) $(starpu_machine_display_DEPENDENCIES) $(EXTRA_starpu_machine_display_DEPENDENCIES) @rm -f starpu_machine_display$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_machine_display_OBJECTS) $(starpu_machine_display_LDADD) $(LIBS) starpu_perfmodel_display$(EXEEXT): $(starpu_perfmodel_display_OBJECTS) $(starpu_perfmodel_display_DEPENDENCIES) $(EXTRA_starpu_perfmodel_display_DEPENDENCIES) @rm -f starpu_perfmodel_display$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_perfmodel_display_OBJECTS) $(starpu_perfmodel_display_LDADD) $(LIBS) starpu_perfmodel_plot$(EXEEXT): $(starpu_perfmodel_plot_OBJECTS) $(starpu_perfmodel_plot_DEPENDENCIES) $(EXTRA_starpu_perfmodel_plot_DEPENDENCIES) @rm -f starpu_perfmodel_plot$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_perfmodel_plot_OBJECTS) $(starpu_perfmodel_plot_LDADD) $(LIBS) starpu_perfmodel_recdump$(EXEEXT): $(starpu_perfmodel_recdump_OBJECTS) $(starpu_perfmodel_recdump_DEPENDENCIES) $(EXTRA_starpu_perfmodel_recdump_DEPENDENCIES) @rm -f starpu_perfmodel_recdump$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_perfmodel_recdump_OBJECTS) $(starpu_perfmodel_recdump_LDADD) $(LIBS) starpu_replay$(EXEEXT): $(starpu_replay_OBJECTS) $(starpu_replay_DEPENDENCIES) $(EXTRA_starpu_replay_DEPENDENCIES) @rm -f starpu_replay$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_replay_OBJECTS) $(starpu_replay_LDADD) $(LIBS) starpu_sched_display$(EXEEXT): $(starpu_sched_display_OBJECTS) $(starpu_sched_display_DEPENDENCIES) $(EXTRA_starpu_sched_display_DEPENDENCIES) @rm -f starpu_sched_display$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_sched_display_OBJECTS) $(starpu_sched_display_LDADD) $(LIBS) starpu_tasks_rec_complete$(EXEEXT): $(starpu_tasks_rec_complete_OBJECTS) $(starpu_tasks_rec_complete_DEPENDENCIES) $(EXTRA_starpu_tasks_rec_complete_DEPENDENCIES) @rm -f starpu_tasks_rec_complete$(EXEEXT) $(AM_V_CCLD)$(LINK) $(starpu_tasks_rec_complete_OBJECTS) $(starpu_tasks_rec_complete_LDADD) $(LIBS) install-dist_binSCRIPTS: $(dist_bin_SCRIPTS) @$(NORMAL_INSTALL) @list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n' \ -e 'h;s|.*|.|' \ -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) { files[d] = files[d] " " $$1; \ if (++n[d] == $(am__install_max)) { \ print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ else { print "f", d "/" $$4, $$1 } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \ $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ } \ ; done uninstall-dist_binSCRIPTS: @$(NORMAL_UNINSTALL) @list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 's,.*/,,;$(transform)'`; \ dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loader-loader.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_calibrate_bus.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_lp2paje.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_machine_display.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_perfmodel_display.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_perfmodel_recdump.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_replay.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_replay_sched.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_sched_display.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/starpu_tasks_rec_complete.Po@am__quote@ # am--include-marker $(am__depfiles_remade): @$(MKDIR_P) $(@D) @echo '# dummy' >$@-t && $(am__mv) $@-t $@ am--depfiles: $(am__depfiles_remade) .c.o: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< .c.obj: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ @am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< loader-loader.o: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.o -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.o `test -f 'loader.c' || echo '$(srcdir)/'`loader.c loader-loader.obj: loader.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT loader-loader.obj -MD -MP -MF $(DEPDIR)/loader-loader.Tpo -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/loader-loader.Tpo $(DEPDIR)/loader-loader.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='loader.c' object='loader-loader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(loader_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o loader-loader.obj `if test -f 'loader.c'; then $(CYGPATH_W) 'loader.c'; else $(CYGPATH_W) '$(srcdir)/loader.c'; fi` starpu_fxt_data_trace-starpu_fxt_data_trace.o: starpu_fxt_data_trace.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_data_trace_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_data_trace-starpu_fxt_data_trace.o -MD -MP -MF $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Tpo -c -o starpu_fxt_data_trace-starpu_fxt_data_trace.o `test -f 'starpu_fxt_data_trace.c' || echo '$(srcdir)/'`starpu_fxt_data_trace.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Tpo $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_data_trace.c' object='starpu_fxt_data_trace-starpu_fxt_data_trace.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_data_trace_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_data_trace-starpu_fxt_data_trace.o `test -f 'starpu_fxt_data_trace.c' || echo '$(srcdir)/'`starpu_fxt_data_trace.c starpu_fxt_data_trace-starpu_fxt_data_trace.obj: starpu_fxt_data_trace.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_data_trace_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_data_trace-starpu_fxt_data_trace.obj -MD -MP -MF $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Tpo -c -o starpu_fxt_data_trace-starpu_fxt_data_trace.obj `if test -f 'starpu_fxt_data_trace.c'; then $(CYGPATH_W) 'starpu_fxt_data_trace.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_data_trace.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Tpo $(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_data_trace.c' object='starpu_fxt_data_trace-starpu_fxt_data_trace.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_data_trace_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_data_trace-starpu_fxt_data_trace.obj `if test -f 'starpu_fxt_data_trace.c'; then $(CYGPATH_W) 'starpu_fxt_data_trace.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_data_trace.c'; fi` starpu_fxt_stats-starpu_fxt_stats.o: starpu_fxt_stats.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_stats_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_stats-starpu_fxt_stats.o -MD -MP -MF $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Tpo -c -o starpu_fxt_stats-starpu_fxt_stats.o `test -f 'starpu_fxt_stats.c' || echo '$(srcdir)/'`starpu_fxt_stats.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Tpo $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_stats.c' object='starpu_fxt_stats-starpu_fxt_stats.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_stats_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_stats-starpu_fxt_stats.o `test -f 'starpu_fxt_stats.c' || echo '$(srcdir)/'`starpu_fxt_stats.c starpu_fxt_stats-starpu_fxt_stats.obj: starpu_fxt_stats.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_stats_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_stats-starpu_fxt_stats.obj -MD -MP -MF $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Tpo -c -o starpu_fxt_stats-starpu_fxt_stats.obj `if test -f 'starpu_fxt_stats.c'; then $(CYGPATH_W) 'starpu_fxt_stats.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_stats.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Tpo $(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_stats.c' object='starpu_fxt_stats-starpu_fxt_stats.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_stats_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_stats-starpu_fxt_stats.obj `if test -f 'starpu_fxt_stats.c'; then $(CYGPATH_W) 'starpu_fxt_stats.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_stats.c'; fi` starpu_fxt_tool-starpu_fxt_tool.o: starpu_fxt_tool.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_tool_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_tool-starpu_fxt_tool.o -MD -MP -MF $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Tpo -c -o starpu_fxt_tool-starpu_fxt_tool.o `test -f 'starpu_fxt_tool.c' || echo '$(srcdir)/'`starpu_fxt_tool.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Tpo $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_tool.c' object='starpu_fxt_tool-starpu_fxt_tool.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_tool_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_tool-starpu_fxt_tool.o `test -f 'starpu_fxt_tool.c' || echo '$(srcdir)/'`starpu_fxt_tool.c starpu_fxt_tool-starpu_fxt_tool.obj: starpu_fxt_tool.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_tool_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_fxt_tool-starpu_fxt_tool.obj -MD -MP -MF $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Tpo -c -o starpu_fxt_tool-starpu_fxt_tool.obj `if test -f 'starpu_fxt_tool.c'; then $(CYGPATH_W) 'starpu_fxt_tool.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_tool.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Tpo $(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_fxt_tool.c' object='starpu_fxt_tool-starpu_fxt_tool.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_fxt_tool_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_fxt_tool-starpu_fxt_tool.obj `if test -f 'starpu_fxt_tool.c'; then $(CYGPATH_W) 'starpu_fxt_tool.c'; else $(CYGPATH_W) '$(srcdir)/starpu_fxt_tool.c'; fi` starpu_perfmodel_plot-starpu_perfmodel_plot.o: starpu_perfmodel_plot.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_perfmodel_plot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_perfmodel_plot-starpu_perfmodel_plot.o -MD -MP -MF $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Tpo -c -o starpu_perfmodel_plot-starpu_perfmodel_plot.o `test -f 'starpu_perfmodel_plot.c' || echo '$(srcdir)/'`starpu_perfmodel_plot.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Tpo $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_perfmodel_plot.c' object='starpu_perfmodel_plot-starpu_perfmodel_plot.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_perfmodel_plot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_perfmodel_plot-starpu_perfmodel_plot.o `test -f 'starpu_perfmodel_plot.c' || echo '$(srcdir)/'`starpu_perfmodel_plot.c starpu_perfmodel_plot-starpu_perfmodel_plot.obj: starpu_perfmodel_plot.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_perfmodel_plot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT starpu_perfmodel_plot-starpu_perfmodel_plot.obj -MD -MP -MF $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Tpo -c -o starpu_perfmodel_plot-starpu_perfmodel_plot.obj `if test -f 'starpu_perfmodel_plot.c'; then $(CYGPATH_W) 'starpu_perfmodel_plot.c'; else $(CYGPATH_W) '$(srcdir)/starpu_perfmodel_plot.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Tpo $(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='starpu_perfmodel_plot.c' object='starpu_perfmodel_plot-starpu_perfmodel_plot.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(starpu_perfmodel_plot_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o starpu_perfmodel_plot-starpu_perfmodel_plot.obj `if test -f 'starpu_perfmodel_plot.c'; then $(CYGPATH_W) 'starpu_perfmodel_plot.c'; else $(CYGPATH_W) '$(srcdir)/starpu_perfmodel_plot.c'; fi` mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-man1: $(dist_man1_MANS) @$(NORMAL_INSTALL) @list1='$(dist_man1_MANS)'; \ list2=''; \ test -n "$(man1dir)" \ && test -n "`echo $$list1$$list2`" \ || exit 0; \ echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ { for i in $$list1; do echo "$$i"; done; \ if test -n "$$list2"; then \ for i in $$list2; do echo "$$i"; done \ | sed -n '/\.1[a-z]*$$/p'; \ fi; \ } | while read p; do \ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; echo "$$p"; \ done | \ sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ sed 'N;N;s,\n, ,g' | { \ list=; while read file base inst; do \ if test "$$base" = "$$inst"; then list="$$list $$file"; else \ echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ fi; \ done; \ for i in $$list; do echo "$$i"; done | $(am__base_list) | \ while read files; do \ test -z "$$files" || { \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ done; } uninstall-man1: @$(NORMAL_UNINSTALL) @list='$(dist_man1_MANS)'; test -n "$(man1dir)" || exit 0; \ files=`{ for i in $$list; do echo "$$i"; done; \ } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) install-dist_pkgdataDATA: $(dist_pkgdata_DATA) @$(NORMAL_INSTALL) @list='$(dist_pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \ done uninstall-dist_pkgdataDATA: @$(NORMAL_UNINSTALL) @list='$(dist_pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir) install-dist_pkgdata_perfmodels_sampling_busDATA: $(dist_pkgdata_perfmodels_sampling_bus_DATA) @$(NORMAL_INSTALL) @list='$(dist_pkgdata_perfmodels_sampling_bus_DATA)'; test -n "$(pkgdata_perfmodels_sampling_busdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)" || exit $$?; \ done uninstall-dist_pkgdata_perfmodels_sampling_busDATA: @$(NORMAL_UNINSTALL) @list='$(dist_pkgdata_perfmodels_sampling_bus_DATA)'; test -n "$(pkgdata_perfmodels_sampling_busdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)'; $(am__uninstall_files_from_dir) install-dist_pkgdata_perfmodels_sampling_codeletsDATA: $(dist_pkgdata_perfmodels_sampling_codelets_DATA) @$(NORMAL_INSTALL) @list='$(dist_pkgdata_perfmodels_sampling_codelets_DATA)'; test -n "$(pkgdata_perfmodels_sampling_codeletsdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)" || exit $$?; \ done uninstall-dist_pkgdata_perfmodels_sampling_codeletsDATA: @$(NORMAL_UNINSTALL) @list='$(dist_pkgdata_perfmodels_sampling_codelets_DATA)'; test -n "$(pkgdata_perfmodels_sampling_codeletsdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)'; $(am__uninstall_files_from_dir) install-nobase_STARPU_MSVC_DATA: $(nobase_STARPU_MSVC__DATA) @$(NORMAL_INSTALL) @list='$(nobase_STARPU_MSVC__DATA)'; test -n "$(STARPU_MSVC_dir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_MSVC_dir)'"; \ $(MKDIR_P) "$(DESTDIR)$(STARPU_MSVC_dir)" || exit 1; \ fi; \ $(am__nobase_list) | while read dir files; do \ xfiles=; for file in $$files; do \ if test -f "$$file"; then xfiles="$$xfiles $$file"; \ else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ test -z "$$xfiles" || { \ test "x$$dir" = x. || { \ echo " $(MKDIR_P) '$(DESTDIR)$(STARPU_MSVC_dir)/$$dir'"; \ $(MKDIR_P) "$(DESTDIR)$(STARPU_MSVC_dir)/$$dir"; }; \ echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(STARPU_MSVC_dir)/$$dir'"; \ $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(STARPU_MSVC_dir)/$$dir" || exit $$?; }; \ done uninstall-nobase_STARPU_MSVC_DATA: @$(NORMAL_UNINSTALL) @list='$(nobase_STARPU_MSVC__DATA)'; test -n "$(STARPU_MSVC_dir)" || list=; \ $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ dir='$(DESTDIR)$(STARPU_MSVC_dir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags # Recover from deleted '.trs' file; this should ensure that # "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create # both 'foo.log' and 'foo.trs'. Break the recipe in two subshells # to avoid problems with "make -n". .log.trs: rm -f $< $@ $(MAKE) $(AM_MAKEFLAGS) $< # Leading 'am--fnord' is there to ensure the list of targets does not # expand to empty, as could happen e.g. with make check TESTS=''. am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) am--force-recheck: @: $(TEST_SUITE_LOG): $(TEST_LOGS) @$(am__set_TESTS_bases); \ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ redo_bases=`for i in $$bases; do \ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ done`; \ if test -n "$$redo_bases"; then \ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ if $(am__make_dryrun); then :; else \ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ fi; \ fi; \ if test -n "$$am__remaking_logs"; then \ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ "recursion detected" >&2; \ elif test -n "$$redo_logs"; then \ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ fi; \ if $(am__make_dryrun); then :; else \ st=0; \ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ for i in $$redo_bases; do \ test -f $$i.trs && test -r $$i.trs \ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ test -f $$i.log && test -r $$i.log \ || { echo "$$errmsg $$i.log" >&2; st=1; }; \ done; \ test $$st -eq 0 || exit 1; \ fi @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ ws='[ ]'; \ results=`for b in $$bases; do echo $$b.trs; done`; \ test -n "$$results" || results=/dev/null; \ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ if test `expr $$fail + $$xpass + $$error` -eq 0; then \ success=true; \ else \ success=false; \ fi; \ br='==================='; br=$$br$$br$$br$$br; \ result_count () \ { \ if test x"$$1" = x"--maybe-color"; then \ maybe_colorize=yes; \ elif test x"$$1" = x"--no-color"; then \ maybe_colorize=no; \ else \ echo "$@: invalid 'result_count' usage" >&2; exit 4; \ fi; \ shift; \ desc=$$1 count=$$2; \ if test $$maybe_colorize = yes && test $$count -gt 0; then \ color_start=$$3 color_end=$$std; \ else \ color_start= color_end=; \ fi; \ echo "$${color_start}# $$desc $$count$${color_end}"; \ }; \ create_testsuite_report () \ { \ result_count $$1 "TOTAL:" $$all "$$brg"; \ result_count $$1 "PASS: " $$pass "$$grn"; \ result_count $$1 "SKIP: " $$skip "$$blu"; \ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ result_count $$1 "FAIL: " $$fail "$$red"; \ result_count $$1 "XPASS:" $$xpass "$$red"; \ result_count $$1 "ERROR:" $$error "$$mgn"; \ }; \ { \ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ $(am__rst_title); \ create_testsuite_report --no-color; \ echo; \ echo ".. contents:: :depth: 2"; \ echo; \ for b in $$bases; do echo $$b; done \ | $(am__create_global_log); \ } >$(TEST_SUITE_LOG).tmp || exit 1; \ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ if $$success; then \ col="$$grn"; \ else \ col="$$red"; \ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ fi; \ echo "$${col}$$br$${std}"; \ echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ echo "$${col}$$br$${std}"; \ create_testsuite_report --maybe-color; \ echo "$$col$$br$$std"; \ if $$success; then :; else \ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ if test -n "$(PACKAGE_BUGREPORT)"; then \ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ fi; \ echo "$$col$$br$$std"; \ fi; \ $$success || exit 1 check-TESTS: $(check_PROGRAMS) @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ log_list=`for i in $$bases; do echo $$i.log; done`; \ trs_list=`for i in $$bases; do echo $$i.trs; done`; \ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ exit $$?; recheck: all $(check_PROGRAMS) @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) @set +e; $(am__set_TESTS_bases); \ bases=`for i in $$bases; do echo $$i; done \ | $(am__list_recheck_tests)` || exit 1; \ log_list=`for i in $$bases; do echo $$i.log; done`; \ log_list=`echo $$log_list`; \ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? starpu_fxt_tool.log: starpu_fxt_tool$(EXEEXT) @p='starpu_fxt_tool$(EXEEXT)'; \ b='starpu_fxt_tool'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_fxt_stats.log: starpu_fxt_stats$(EXEEXT) @p='starpu_fxt_stats$(EXEEXT)'; \ b='starpu_fxt_stats'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_fxt_data_trace.log: starpu_fxt_data_trace$(EXEEXT) @p='starpu_fxt_data_trace$(EXEEXT)'; \ b='starpu_fxt_data_trace'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_calibrate_bus.log: starpu_calibrate_bus$(EXEEXT) @p='starpu_calibrate_bus$(EXEEXT)'; \ b='starpu_calibrate_bus'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_machine_display.log: starpu_machine_display$(EXEEXT) @p='starpu_machine_display$(EXEEXT)'; \ b='starpu_machine_display'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_sched_display.log: starpu_sched_display$(EXEEXT) @p='starpu_sched_display$(EXEEXT)'; \ b='starpu_sched_display'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_perfmodel_display.log: starpu_perfmodel_display$(EXEEXT) @p='starpu_perfmodel_display$(EXEEXT)'; \ b='starpu_perfmodel_display'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) starpu_perfmodel_plot.log: starpu_perfmodel_plot$(EXEEXT) @p='starpu_perfmodel_plot$(EXEEXT)'; \ b='starpu_perfmodel_plot'; \ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) .test.log: @p='$<'; \ $(am__set_b); \ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) @am__EXEEXT_TRUE@.test$(EXEEXT).log: @am__EXEEXT_TRUE@ @p='$<'; \ @am__EXEEXT_TRUE@ $(am__set_b); \ @am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ @am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ @am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ @am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: check-recursive all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(MANS) $(DATA) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(pkgdatadir)" "$(DESTDIR)$(pkgdata_perfmodels_sampling_busdir)" "$(DESTDIR)$(pkgdata_perfmodels_sampling_codeletsdir)" "$(DESTDIR)$(STARPU_MSVC_dir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." @STARPU_HAVE_HELP2MAN_FALSE@clean-local: clean: clean-recursive clean-am: clean-binPROGRAMS clean-checkPROGRAMS clean-generic \ clean-libtool clean-local clean-noinstPROGRAMS mostlyclean-am distclean: distclean-recursive -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f ./$(DEPDIR)/starpu_calibrate_bus.Po -rm -f ./$(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po -rm -f ./$(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po -rm -f ./$(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po -rm -f ./$(DEPDIR)/starpu_lp2paje.Po -rm -f ./$(DEPDIR)/starpu_machine_display.Po -rm -f ./$(DEPDIR)/starpu_perfmodel_display.Po -rm -f ./$(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po -rm -f ./$(DEPDIR)/starpu_perfmodel_recdump.Po -rm -f ./$(DEPDIR)/starpu_replay.Po -rm -f ./$(DEPDIR)/starpu_replay_sched.Po -rm -f ./$(DEPDIR)/starpu_sched_display.Po -rm -f ./$(DEPDIR)/starpu_tasks_rec_complete.Po -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dist_pkgdataDATA \ install-dist_pkgdata_perfmodels_sampling_busDATA \ install-dist_pkgdata_perfmodels_sampling_codeletsDATA \ install-man install-nobase_STARPU_MSVC_DATA install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-binPROGRAMS install-dist_binSCRIPTS install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-man1 install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f ./$(DEPDIR)/loader-loader.Po -rm -f ./$(DEPDIR)/starpu_calibrate_bus.Po -rm -f ./$(DEPDIR)/starpu_fxt_data_trace-starpu_fxt_data_trace.Po -rm -f ./$(DEPDIR)/starpu_fxt_stats-starpu_fxt_stats.Po -rm -f ./$(DEPDIR)/starpu_fxt_tool-starpu_fxt_tool.Po -rm -f ./$(DEPDIR)/starpu_lp2paje.Po -rm -f ./$(DEPDIR)/starpu_machine_display.Po -rm -f ./$(DEPDIR)/starpu_perfmodel_display.Po -rm -f ./$(DEPDIR)/starpu_perfmodel_plot-starpu_perfmodel_plot.Po -rm -f ./$(DEPDIR)/starpu_perfmodel_recdump.Po -rm -f ./$(DEPDIR)/starpu_replay.Po -rm -f ./$(DEPDIR)/starpu_replay_sched.Po -rm -f ./$(DEPDIR)/starpu_sched_display.Po -rm -f ./$(DEPDIR)/starpu_tasks_rec_complete.Po -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-binPROGRAMS uninstall-dist_binSCRIPTS \ uninstall-dist_pkgdataDATA \ uninstall-dist_pkgdata_perfmodels_sampling_busDATA \ uninstall-dist_pkgdata_perfmodels_sampling_codeletsDATA \ uninstall-man uninstall-nobase_STARPU_MSVC_DATA uninstall-man: uninstall-man1 .MAKE: $(am__recursive_targets) check-am install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--depfiles check check-TESTS check-am clean \ clean-binPROGRAMS clean-checkPROGRAMS clean-generic \ clean-libtool clean-local clean-noinstPROGRAMS cscopelist-am \ ctags ctags-am distclean distclean-compile distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-binPROGRAMS \ install-data install-data-am install-dist_binSCRIPTS \ install-dist_pkgdataDATA \ install-dist_pkgdata_perfmodels_sampling_busDATA \ install-dist_pkgdata_perfmodels_sampling_codeletsDATA \ install-dvi install-dvi-am install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-man1 install-nobase_STARPU_MSVC_DATA \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \ uninstall uninstall-am uninstall-binPROGRAMS \ uninstall-dist_binSCRIPTS uninstall-dist_pkgdataDATA \ uninstall-dist_pkgdata_perfmodels_sampling_busDATA \ uninstall-dist_pkgdata_perfmodels_sampling_codeletsDATA \ uninstall-man uninstall-man1 uninstall-nobase_STARPU_MSVC_DATA .PRECIOUS: Makefile @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ @$(MKDIR_P) `dirname $@` @STARPU_COVERITY_TRUE@@STARPU_USE_CUDA_TRUE@ $(V_mynvcc)grep 'extern *"C" *void *' $< | sed -ne 's/extern *"C" *void *\([a-zA-Z0-9_]*\) *(.*/void \1(void) {}/p' | $(CC) -x c - -o $@ -c @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.cubin: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) -cubin $< -o $@ $(NVCCFLAGS) @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@.cu.o: @STARPU_COVERITY_FALSE@@STARPU_USE_CUDA_TRUE@ $(V_nvcc) $(NVCC) $< -c -o $@ $(NVCCFLAGS) @STARPU_USE_HIP_TRUE@.hip.o: @STARPU_USE_HIP_TRUE@ $(V_hipcc) $(HIPCC) $< -c -o $@ $(HIPCCFLAGS) STARPU_MPI_NP ?= 4 showcheckfailed: @ for x in $(shell grep -l "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null) ; do cat $$x ; done @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheckfailed || RET=1 ; \ done ; \ exit $$RET showfailed: @! grep "^FAIL " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null 2>/dev/null @! grep -l " runtime error: " $(TEST_LOGS) /dev/null 2>/dev/null @RET=0 ; \ for i in $(SUBDIRS) ; do \ make -s -C $$i showfailed || RET=1 ; \ done ; \ exit $$RET showcheck: -cat $(TEST_LOGS) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_LOGS) /dev/null @! grep -q " runtime error: " $(TEST_LOGS) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showcheck || RET=1 ; \ done ; \ exit $$RET showsuite: -cat $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: AddressSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: ThreadSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "ERROR: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q "WARNING: LeakSanitizer: " $(TEST_SUITE_LOG) /dev/null @! grep -q " runtime error: " $(TEST_SUITE_LOG) /dev/null RET=0 ; \ for i in $(SUBDIRS) ; do \ make -C $$i showsuite || RET=1 ; \ done ; \ exit $$RET @STARPU_SIMGRID_TRUE@export STARPU_PERF_MODEL_DIR=$(abs_top_srcdir)/tools/perfmodels/sampling @STARPU_SIMGRID_TRUE@export STARPU_HOSTNAME=mirage @STARPU_SIMGRID_TRUE@export MALLOC_PERTURB_=0 @STARPU_SIMGRID_TRUE@env: @STARPU_SIMGRID_TRUE@ @echo export STARPU_PERF_MODEL_DIR=$(STARPU_PERF_MODEL_DIR) @STARPU_SIMGRID_TRUE@ @echo export STARPU_HOSTNAME=$(STARPU_HOSTNAME) @STARPU_SIMGRID_TRUE@ @echo export MALLOC_PERTURB_=$(MALLOC_PERTURB_) @STARPU_SIMGRID_TRUE@export STARPU_SIMGRID=1 @STARPU_QUICK_CHECK_TRUE@export STARPU_QUICK_CHECK=1 @STARPU_LONG_CHECK_TRUE@export STARPU_LONG_CHECK=1 # # Test loading goes through a lot of launchers: # # - $(LAUNCHER) is called first, to run the test through starpu_msexec, i.e. # either mpirun or starpu_tcpipexec # # - $(LOADER), i.e. tests/loader, is then called to implement timeout, running # gdb, etc. But if it detects that the test is a .sh script, it just executes # it # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # to run the program through e.g. valgrind.sh # # When the program is a shell script, additionally: # # - $(STARPU_SUB_PARALLEL) is called to control parallelism (see below) # # - $(MS_LAUNCHER) is called to run the test through starpu_msexec # # - $(STARPU_LAUNCH) was set by tests/loader to its own path, to run the program # through it. # # - $(STARPU_CHECK_LAUNCHER) $(STARPU_CHECK_LAUNCHER_ARGS) is called by loader # export LAUNCHER @HAVE_PARALLEL_TRUE@export STARPU_SUB_PARALLEL export MS_LAUNCHER LAUNCHER ?= MS_LAUNCHER ?= @STARPU_HAVE_WINDOWS_FALSE@LOADER ?= ./loader LSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/lsan/suppressions TSAN_OPTIONS ?= suppressions=$(abs_top_srcdir)/tools/dev/tsan/starpu.suppr export LSAN_OPTIONS export TSAN_OPTIONS @STARPU_HAVE_HELP2MAN_TRUE@starpu_calibrate_bus.1: starpu_calibrate_bus$(EXEEXT) @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Force StarPU bus calibration" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_machine_display.1: starpu_machine_display$(EXEEXT) @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display machine StarPU information" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_perfmodel_display.1: starpu_perfmodel_display$(EXEEXT) @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU performance model" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_perfmodel_plot.1: starpu_perfmodel_plot$(EXEEXT) @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Plot StarPU performance model" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_tasks_rec_complete.1: starpu_tasks_rec_complete$(EXEEXT) @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Complete StarPU tasks.rec file" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_lp2paje.1: starpu_lp2paje$(EXEEXT) @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert lp StarPU schedule into Paje format" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_workers_activity.1: starpu_workers_activity @STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU workers activity" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_codelet_profile.1: starpu_codelet_profile @STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU codelet profile" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_env.1: starpu_env @STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Set StarPU environment variables" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_codelet_histo_profile.1: starpu_codelet_histo_profile @STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU codelet histogram" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_mpi_comm_matrix.1: starpu_mpi_comm_matrix.py @STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU MPI communications matrix" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_fxt_number_events_to_names.1: starpu_fxt_number_events_to_names.py @STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert events in StarPU traces" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_paje_draw_histogram.1: starpu_paje_draw_histogram @STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Draw StarPU trace histogram" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_paje_state_stats.1: starpu_paje_state_stats @STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print statistics from StarPU trace" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@starpu_config.1: starpu_config @STARPU_HAVE_HELP2MAN_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Display StarPU configuration" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@starpu_fxt_tool.1: starpu_fxt_tool$(EXEEXT) @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Convert raw StarPU FxT trace to various traces" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@starpu_fxt_stats.1: starpu_fxt_stats$(EXEEXT) @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print statistics from raw StarPU FxT trace" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@starpu_fxt_data_trace.1: starpu_fxt_data_trace$(EXEEXT) @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_FXT_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Print data trace from raw StarPU FxT trace" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@starpu_tcpipexec.1: starpu_tcpipexec @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ @chmod +x $< @STARPU_HAVE_HELP2MAN_TRUE@@STARPU_USE_TCPIP_MASTER_SLAVE_TRUE@ $(V_help2man) LC_ALL=C help2man --no-discard-stderr -N -n "Execute TCPIP application" --output=$@ ./$< @STARPU_HAVE_HELP2MAN_TRUE@clean-local: @STARPU_HAVE_HELP2MAN_TRUE@ $(RM) $(dist_man1_MANS) starpu_config.cfg starpu_config.cfg: $(top_builddir)/src/common/config.h grep STARPU $< | grep '#' > $@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: starpu-1.4.9+dfsg/tools/ayudame.cfg000066400000000000000000000022321507764646700172550ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # default configuration file for Ayudame2 connect { stdout_human = "true"; stdout_raw = "false"; temanejo1 = "false"; temanejo = "true"; dot = "false"; dot_filename = "ayudame.dot"; xml = "true"; xml_filename = "ayudame.xml"; ayu_port = "8888"; ayu_host = "localhost"; } logging { error = "true"; warning = "true"; info = "true"; verbosity_level = "3"; debug = "true"; } starpu-1.4.9+dfsg/tools/dev/000077500000000000000000000000001507764646700157265ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/dev/checker/000077500000000000000000000000001507764646700173325ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/dev/checker/rename.sed000066400000000000000000000241541507764646700213040ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # s/\bstruct starpu_per_arch_perfmodel\b/struct starpu_perfmodel_per_arch/g s/\bstruct starpu_regression_model\b/struct starpu_perfmodel_regression_model/g s/\bstruct starpu_history_table\b/struct starpu_perfmodel_history_table/g s/\bstruct starpu_history_entry\b/struct starpu_perfmodel_history_entry/g s/\bstruct starpu_history_list\b/struct starpu_perfmodel_history_list/g s/\bstarpu_list_models\b/starpu_perfmodel_list/g s/\bstruct starpu_model_list\b/struct _starpu_perfmodel_list/g s/\bstarpu_load_history_debug\b/starpu_perfmodel_load_symbol/g s/\bstarpu_access_mode\b/enum starpu_access_mode/g s/\bstruct starpu_codelet_t\b/struct starpu_codelet/g s/\bstarpu_codelet\b/struct starpu_codelet/g s/\bstarpu_codelet_t\b/struct starpu_codelet/g s/\bstarpu_data_handle\b/starpu_data_handle_t/g s/\bstarpu_block_interface_t\b/struct starpu_block_interface/g s/\bstarpu_matrix_interface_t\b/struct starpu_matrix_interface/g s/\bstarpu_vector_interface_t\b/struct starpu_vector_interface/g s/\bstarpu_variable_interface_t\b/struct starpu_variable_interface/g s/\bstarpu_csr_interface_t\b/struct starpu_csr_interface/g s/\bstarpu_bcsr_interface_t\b/struct starpu_bcsr_interface/g s/\bstarpu_multiformat_interface_t\b/struct starpu_multiformat_interface/g s/\bstarpu_machine_topology_s\b/starpu_machine_topology/g s/\bstarpu_htbl32_node_s\b/starpu_htbl32_node/g s/\bstarpu_history_list_t\b/starpu_history_list/g s/\bstarpu_buffer_descr_t\b/starpu_buffer_descr/g s/\bstarpu_history_entry_t\b/starpu_history_entry/g s/\bstarpu_history_list_t\b/starpu_history_list/g s/\bstarpu_model_list_t\b/starpu_model_list/g s/\bstarpu_regression_model_t\b/starpu_regression_model/g s/\bstarpu_per_arch_perfmodel_t\b/starpu_per_arch_perfmodel/g #s/\bstarpu_buffer_descr\b/struct starpu_buffer_descr/g s/\bstarpu_perfmodel_t\b/starpu_perfmodel/g s/\bstarpu_sched_policy_s\b/starpu_sched_policy/g s/\bstarpu_data_interface_ops_t\b/starpu_data_interface_ops/g s/\bstarpu_submit_task\b/starpu_task_submit/g s/\bstarpu_wait_task\b/starpu_task_wait/g s/\bstarpu_helper_init_cublas\b/starpu_helper_cublas_init/g s/\bstarpu_helper_shutdown_cublas\b/starpu_helper_cublas_shutdown/g s/\bstarpu_deregister_progression_hook\b/starpu_progression_hook_deregister/g s/\bstarpu_register_progression_hook\b/starpu_progression_hook_register/g s/\bstarpu_get_worker_id\b/starpu_worker_get_id/g s/\bstarpu_get_worker_devid\b/starpu_worker_get_devid/g s/\bstarpu_get_worker_memory_node\b/starpu_worker_get_memory_node/g s/\bstarpu_get_worker_name\b/starpu_worker_get_name/g s/\bstarpu_get_worker_type\b/starpu_worker_get_type/g s/\bstarpu_get_worker_count\b/starpu_worker_get_count/g s/\bstarpu_get_cpu_worker_count\b/starpu_cpu_worker_get_count/g s/\bstarpu_get_spu_worker_count\b/starpu_spu_worker_get_count/g s/\bstarpu_get_opencl_worker_count\b/starpu_opencl_worker_get_count/g s/\bstarpu_get_cuda_worker_count\b/starpu_cuda_worker_get_count/g s/\bstarpu_get_local_cuda_stream\b/starpu_cuda_get_local_stream/g s/\bstarpu_wait_all_tasks\b/starpu_task_wait_for_all/g s/\bstarpu_delete_data\b/starpu_data_unregister/g s/\bstarpu_malloc_pinned_if_possible\b/starpu_data_malloc_pinned_if_possible/g s/\bstarpu_free_pinned_if_possible\b/starpu_data_free_pinned_if_possible/g s/\bstarpu_sync_data_with_mem\b/starpu_data_acquire/g s/\bstarpu_data_sync_with_mem\b/starpu_data_acquire/g s/\bstarpu_sync_data_with_mem_non_blocking\b/starpu_data_acquire_cb/g s/\bstarpu_data_sync_with_mem_non_blocking\b/starpu_data_acquire_cb/g s/\bstarpu_release_data_from_mem\b/starpu_data_release/g s/\bstarpu_data_release_from_mem\b/starpu_data_release/g s/\bstarpu_advise_if_data_is_important\b/starpu_data_advise_as_important/g s/\bstarpu_request_data_allocation\b/starpu_data_request_allocation/g s/\bstarpu_prefetch_data_on_node\b/starpu_data_prefetch_on_node/g s/\bstarpu_get_sub_data\b/starpu_data_get_sub_data/g s/\bstarpu_partition_data\b/starpu_data_partition/g s/\bstarpu_unpartition_data\b/starpu_data_unpartition/g s/\bstarpu_map_filters\b/starpu_data_map_filters/g s/\bstarpu_test_if_data_is_allocated_on_node\b/starpu_data_test_if_allocated_on_node/g s/\bstarpu_get_block_elemsize\b/starpu_block_get_elemsize/g s/\bstarpu_get_block_local_ldy\b/starpu_block_get_local_ldy/g s/\bstarpu_get_block_local_ldz\b/starpu_block_get_local_ldz/g s/\bstarpu_get_block_local_ptr\b/starpu_block_get_local_ptr/g s/\bstarpu_get_block_nx\b/starpu_block_get_nx/g s/\bstarpu_get_block_ny\b/starpu_block_get_ny/g s/\bstarpu_get_block_nz\b/starpu_block_get_nz/g s/\bstarpu_register_block_data\b/starpu_block_data_register/g s/\bstarpu_get_bcsr_c\b/starpu_bcsr_get_c/g s/\bstarpu_get_bcsr_elemsize\b/starpu_bcsr_get_elemsize/g s/\bstarpu_get_bcsr_firstentry\b/starpu_bcsr_get_firstentry/g s/\bstarpu_get_bcsr_local_colind\b/starpu_bcsr_get_local_colind/g s/\bstarpu_get_bcsr_local_nzval\b/starpu_bcsr_get_local_nzval/g s/\bstarpu_get_bcsr_local_rowptr\b/starpu_bcsr_get_local_rowptr/g s/\bstarpu_get_bcsr_nnz\b/starpu_bcsr_get_nnz/g s/\bstarpu_get_bcsr_nrow\b/starpu_bcsr_get_nrow/g s/\bstarpu_get_bcsr_r\b/starpu_bcsr_get_r/g s/\bstarpu_register_bcsr_data\b/starpu_bcsr_data_register/g s/\bstarpu_get_csr_elemsize\b/starpu_csr_get_elemsize/g s/\bstarpu_get_csr_firstentry\b/starpu_csr_get_firstentry/g s/\bstarpu_get_csr_local_colind\b/starpu_csr_get_local_colind/g s/\bstarpu_get_csr_local_nzval\b/starpu_csr_get_local_nzval/g s/\bstarpu_get_csr_local_rowptr\b/starpu_csr_get_local_rowptr/g s/\bstarpu_get_csr_nnz\b/starpu_csr_get_nnz/g s/\bstarpu_get_csr_nrow\b/starpu_csr_get_nrow/g s/\bstarpu_register_csr_data\b/starpu_csr_data_register/g s/\bstarpu_get_matrix_elemsize\b/starpu_matrix_get_elemsize/g s/\bstarpu_get_matrix_local_ld\b/starpu_matrix_get_local_ld/g s/\bstarpu_get_matrix_local_ptr\b/starpu_matrix_get_local_ptr/g s/\bstarpu_get_matrix_nx\b/starpu_matrix_get_nx/g s/\bstarpu_get_matrix_ny\b/starpu_matrix_get_ny/g s/\bstarpu_register_matrix_data\b/starpu_matrix_data_register/g s/\bstarpu_divide_in_2_filter_func_vector\b/starpu_vector_divide_in_2_filter_func/g s/\bstarpu_register_vector_data\b/starpu_vector_data_register/g s/\bstarpu_get_vector_elemsize\b/starpu_vector_get_elemsize/g s/\bstarpu_get_vector_local_ptr\b/starpu_vector_get_local_ptr/g s/\bstarpu_get_vector_nx\b/starpu_vector_get_nx/g s/\bstarpu_data_set_wb_mask\b/starpu_data_set_wt_mask/g s/\bstarpu_list_filter_func_vector\b/starpu_vector_list_filter_func/g s/\bSTARPU_GET_MATRIX_PTR\b/STARPU_MATRIX_GET_PTR/g s/\bSTARPU_GET_MATRIX_NX\b/STARPU_MATRIX_GET_NX/g s/\bSTARPU_GET_MATRIX_NY\b/STARPU_MATRIX_GET_NY/g s/\bSTARPU_GET_MATRIX_LD\b/STARPU_MATRIX_GET_LD/g s/\bSTARPU_GET_MATRIX_ELEMSIZE\b/STARPU_MATRIX_GET_ELEMSIZE/g s/\bSTARPU_GET_BLOCK_PTR\b/STARPU_BLOCK_GET_PTR/g s/\bSTARPU_GET_BLOCK_NX\b/STARPU_BLOCK_GET_NX/g s/\bSTARPU_GET_BLOCK_NY\b/STARPU_BLOCK_GET_NY/g s/\bSTARPU_GET_BLOCK_NZ\b/STARPU_BLOCK_GET_NZ/g s/\bSTARPU_GET_BLOCK_LDY\b/STARPU_BLOCK_GET_LDY/g s/\bSTARPU_GET_BLOCK_LDZ\b/STARPU_BLOCK_GET_LDZ/g s/\bSTARPU_GET_BLOCK_ELEMSIZE\b/STARPU_BLOCK_GET_ELEMSIZE/g s/\bSTARPU_GET_VECTOR_PTR\b/STARPU_VECTOR_GET_PTR/g s/\bSTARPU_GET_VECTOR_NX\b/STARPU_VECTOR_GET_NX/g s/\bSTARPU_GET_VECTOR_ELEMSIZE\b/STARPU_VECTOR_GET_ELEMSIZE/g s/\bSTARPU_GET_VARIABLE_PTR\b/STARPU_VARIABLE_GET_PTR/g s/\bSTARPU_GET_VARIABLE_ELEMSIZE\b/STARPU_VARIABLE_GET_ELEMSIZE/g s/\bSTARPU_GET_CSR_NNZ\b/STARPU_CSR_GET_NNZ/g s/\bSTARPU_GET_CSR_NROW\b/STARPU_CSR_GET_NROW/g s/\bSTARPU_GET_CSR_NZVAL\b/STARPU_CSR_GET_NZVAL/g s/\bSTARPU_GET_CSR_COLIND\b/STARPU_CSR_GET_COLIND/g s/\bSTARPU_GET_CSR_ROWPTR\b/STARPU_CSR_GET_ROWPTR/g s/\bSTARPU_GET_CSR_FIRSTENTRY\b/STARPU_CSR_GET_FIRSTENTRY/g s/\bSTARPU_GET_CSR_ELEMSIZE\b/STARPU_CSR_GET_ELEMSIZE/g s/\bstarpu_print_bus_bandwidth\b/starpu_bus_print_bandwidth/g s/\bstarpu_get_handle_interface_id\b/starpu_handle_get_interface_id/g s/\bstarpu_get_current_task\b/starpu_task_get_current/g s/\bstarpu_pack_cl_args\b/starpu_codelet_pack_args/g s/\bstarpu_unpack_cl_args\b/starpu_codelet_unpack_args/g s/\bstarpu_task_deinit\b/starpu_task_clean/g s/\bstarpu_helper_cublas_init\b/starpu_cublas_init/g s/\bstarpu_helper_cublas_shutdown\b/starpu_cublas_shutdown/g s/\bstarpu_allocate_buffer_on_node\b/starpu_malloc_on_node/g s/\bstarpu_free_buffer_on_node\b/starpu_free_on_node/g s/\benum starpu_access_mode\b/enum starpu_data_access_mode/g s/\bstruct starpu_buffer_descr\b/struct starpu_data_descr/g s/\bstarpu_memory_display_stats\b/starpu_data_display_memory_stats/g s/\bstarpu_handle_to_pointer\b/starpu_data_handle_to_pointer/g s/\bstarpu_handle_get_local_ptr\b/starpu_data_get_local_ptr/g s/\bstarpu_crc32_be_n\b/starpu_hash_crc32c_be_n/g s/\bstarpu_crc32_be\b/starpu_hash_crc32c_be/g s/\bstarpu_crc32_string\b/starpu_hash_crc32c_string/g s/\benum starpu_perf_archtype\b/enum starpu_perfmodel_archtype/g s/\bstarpu_history_based_expected_perf\b/starpu_permodel_history_based_expected_perf/g s/\bstruct starpu_task_profiling_info\b/struct starpu_profiling_task_info/g s/\bstruct starpu_worker_profiling_info\b/struct starpu_profiling_worker_info/g s/\bstruct starpu_bus_profiling_info\b/struct starpu_profiling_bus_info/g s/\bstarpu_set_profiling_id\b/starpu_profiling_set_id/g s/\bstarpu_worker_get_profiling_info\b/starpu_profiling_worker_get_info/g s/\bstarpu_bus_profiling_helper_display_summary\b/starpu_profiling_bus_helper_display_summary/g s/\bstarpu_worker_profiling_helper_display_summary\b/starpu_profiling_worker_helper_display_summary/g s/\benum starpu_archtype\b/enum starpu_worker_archtype/g s/\bstarpu_handle_get_interface_id\b/starpu_data_get_interface_id/g s/\bstarpu_handle_get_size\b/starpu_data_get_size/g s/\bstarpu_handle_pack_data\b/starpu_data_pack/g s/\bstarpu_handle_unpack_data\b/starpu_data_unpack/g starpu-1.4.9+dfsg/tools/dev/checker/rename.sh000077500000000000000000000013561507764646700211450ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # find . -type f -not -name rename.sed |xargs sed -i -f $(dirname $0)/rename.sed starpu-1.4.9+dfsg/tools/dev/cppcheck/000077500000000000000000000000001507764646700175065ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/dev/cppcheck/suppressions.txt000066400000000000000000000132551507764646700230320ustar00rootroot00000000000000// StarPU --- Runtime system for heterogeneous multicore architectures. // // Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria // // StarPU is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation; either version 2.1 of the License, or (at // your option) any later version. // // StarPU is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // See the GNU Lesser General Public License in COPYING.LGPL for more details. // knownConditionTrueFalse variableScope ConfigurationNotChecked shadowVariable unusedFunction unmatchedSuppression unusedStructMember missingInclude constArgument nullPointer nullPointerRedundantCheck cstyleCast constParameter unknownMacro *:build*/* *:starpufft/* *:min-dgels/* *:socl/src/CL/* *:tools/* // TODO. We should cppcheck the code *:sc_hypervisor/* varFuncNullUB:examples/sched_ctx/two_cpu_contexts.c:76 negativeIndex:examples/stencil/stencil-tasks.c constStatement:examples/stencil/* shiftTooManyBitsSigned:examples/pi/SobolQRNG/sobol_gold.c:88 unreadVariable:tests/openmp/* unusedLabel:tests/datawizard/gpu_register.c unusedLabel:tests/datawizard/gpu_ptr_register.c redundantAssignment:tests/datawizard/interfaces/test_interfaces.c:757 redundantAssignment:tests/datawizard/mpi_like_async.c:165 redundantAssignment:tests/datawizard/mpi_like_async.c:211 unusedPrivateFunction:tests/main/combined_workers/bfs/timer.h cstyleCast:tests/main/combined_workers/bfs/bfs.cpp redundantAssignment:tests/main/driver_api/init_run_deinit.c redundantAssignment:tests/main/driver_api/run_driver.c unreadVariable:tests/datawizard/variable_size.c uselessAssignmentPtrArg:mpi/src/starpu_mpi.c:171 unreadVariable:mpi/src/mpi/starpu_mpi_mpi.c:641 unusedVariable:mpi/src/mpi/starpu_mpi_mpi.c:715 unreadVariable:mpi/src/mpi/starpu_mpi_mpi.c:716 unreadVariable:mpi/src/mpi/starpu_mpi_mpi.c:704 unreadVariable:mpi/src/mpi/starpu_mpi_mpi.c:738 unreadVariable:mpi/src/load_balancer/policy/load_heat_propagation.c:597 signConversion:mpi/examples/matrix_decomposition/mpi_cholesky_codelets.c:195 nullPointer:src/common/utils.c:554 redundantAssignment:src/common/utils.c:547 nullPointerRedundantCheck:src/common/rbtree.c negativeIndex:include/starpu_bitmap.h:275 AssignmentIntegerToAddress:src/core/jobs.c:148 redundantAssignment:src/core/workers.c uselessAssignmentPtrArg:src/core/sched_ctx_list.c:144 wrongPrintfScanfArgNum:src/core/simgrid.c:1296 unreadVariable:src/core/task.c:657 unusedVariable:src/core/task.c:658 uninitvar:src/core/sched_ctx.c:387 invalidPointerCast:src/core/perfmodel/perfmodel_nan.c:74 unreadVariable:src/core/perfmodel/energy_model.c:179 unusedLabel:src/core/perfmodel/perfmodel_bus.c:757 unreadVariable:src/core/perfmodel/perfmodel_bus.c:1399 unreadVariable:src/core/perfmodel/perfmodel_bus.c:1739 unreadVariable:src/core/perfmodel/perfmodel_bus.c:1420 unreadVariable:src/core/perfmodel/perfmodel_bus.c:1761 unreadVariable:src/core/perfmodel/perfmodel_bus.c:1435 unreadVariable:src/core/perfmodel/perfmodel_bus.c:1775 unreadVariable:src/core/perfmodel/perfmodel_bus.c:1458 unreadVariable:src/core/perfmodel/perfmodel_bus.c:1799 unsignedPositive:src/core/perfmodel/perfmodel_bus.c:1390 unsignedPositive:src/core/perfmodel/perfmodel_bus.c:1730 unreadVariable:src/core/dependencies/tags.c:120 nullPointerRedundantCheck:src/datawizard/data_request.c:228 nullPointerRedundantCheck:src/datawizard/data_request.c:230 nullPointerRedundantCheck:src/datawizard/copy_driver.c:682 unreadVariable:src/datawizard/interfaces/* unreadVariable:src/drivers/driver_common/driver_common.c:493 redundantAssignment:src/drivers/hip/driver_hip.c:1188 unreadVariable:src/drivers/hip/driver_hip.c:346 unreadVariable:src/drivers/hip/driver_hip.c:343 unreadVariable:src/drivers/hip/driver_hip.c:1147 unreadVariable:src/drivers/hip/driver_hip.c:1095 unreadVariable:src/drivers/opencl/driver_opencl.c:767 redundantAssignment:src/drivers/opencl/driver_opencl.c:831 clarifyCondition:src/drivers/opencl/driver_opencl.c:945 unreadVariable:src/drivers/opencl/driver_opencl.c:1489 unreadVariable:src/drivers/opencl/driver_opencl.c:1406 redundantAssignment:src/drivers/opencl/driver_opencl.c:1534 clarifyCondition:src/drivers/cuda/driver_cuda.c:498 unreadVariable:src/drivers/cuda/driver_cuda.c:2151 unreadVariable:src/drivers/cuda/driver_cuda.c:2041 unreadVariable:src/drivers/cuda/driver_cuda.c:1696 unreadVariable:src/drivers/cuda/driver_cuda0.c:249 redundantAssignment:src/drivers/cuda/driver_cuda1.c:1236 unreadVariable:src/drivers/cuda/driver_cuda1.c:357 unreadVariable:src/drivers/cuda/driver_cuda1.c:1195 unreadVariable:src/drivers/cuda/driver_cuda1.c:1143 nullPointerRedundantCheck:src/sched_policies/fifo_queues.c:507 nullPointerRedundantCheck:src/sched_policies/deque_modeling_policy_data_aware.c:207 redundantAssignment:src/sched_policies/component_eager.c:109 varFuncNullUB:src/sched_policies/modular_heteroprio_heft.c:37 nullPointerRedundantCheck:src/sched_policies/prio_deque.c:176 negativeIndex:src/sched_policies/parallel_heft.c:478 sizeofDereferencedVoidPointer:src/util/fstarpu.c duplicateExpression:src/util/starpu_task_insert.c:52 pointerSize:socl/src/cl_getcontextinfo.c:33 unreadVariable:socl/src/gc.c:193 // the following warnings are weird and not understandable, let's ignore them ignoredReturnValue:socl/src/cl_createkernel.c:170 leakReturnValNotUsed:socl/src/cl_createkernel.c:170 ignoredReturnValue:socl/src/cl_createprogramwithsource.c:136 leakReturnValNotUsed:socl/src/cl_createprogramwithsource.c:136 varFuncNullUB:bubble/tests/basic/gemm_dag.c:213 varFuncNullUB:bubble/tests/basic/gemm_dag.c:208 starpu-1.4.9+dfsg/tools/dev/lsan/000077500000000000000000000000001507764646700166635ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/dev/lsan/suppressions000066400000000000000000000021711507764646700213640ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # libc leak:lt__malloc # openmpi leak:ompi_free_list_grow leak:opal_dss_unpack_value leak:opal_dss_copy_value leak:mca_bml_base_btl_array_reserve leak:opal_hash_table_init # hwloc leak:hwloc_topology_load leak:hwloc_topology_set_xml leak:hwloc_components_init leak:hwloc_plugins_init leak:hwloc_plugins_exit # papi leak:_pe_libpfm4_init leak:allocate_thread leak:pfmlib_build_fstr # starpupy leak:/numpy/random/ leak:_abc__abc_init leak:marshal_loads # leak:PyTuple_New starpu-1.4.9+dfsg/tools/dev/tsan/000077500000000000000000000000001507764646700166735ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/dev/tsan/starpu.suppr000066400000000000000000000066431507764646700213150ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Disabled checking race:^active_hook_cnt$ race:^worker_exp_start$ race:^worker_exp_end$ race:^worker_exp_len$ race:^ntasks$ race:^mc_cache_size$ race:^mc_nb$ race:^mc_clean_nb$ race:^prefetch_out_of_memory$ race:^data_requests$ race:^prefetch_requests$ race:^idle_requests$ race:^data_requests_npending$ race:^used_size$ race:^hit_cnt$ race:^miss_cnt$ race:^alloc_cache_hit_cnt$ race:^alloc_cnt$ race:^comm_amount$ race:^_starpu_valgrind_print_once$ race:^_starpu_silent$ race:^keys_initialized$ race:^tidying$ race:^reclaiming$ race:^_starpu_worker_drives_memory$ race:^starpu_memory_get_total$ race:^starpu_unistd_opened_files$ # don't care about cache hit stats race:^_starpu_msi_cache_hit$ race:^_starpu_msi_cache_miss$ # This is racy, but since we'll always put the same values, this is not a problem. race:^_starpu_codelet_check_deprecated_fields$ # This is racy, but we don't care, it's only a statistic race:^starpu_task_nsubmitted$ race:^starpu_task_nready$ race:^_starpu_bus_update_profiling_info$ race:^lws_select_victim$ race:^select_worker_round_robin$ # The config.running/pause_depth state is only protected by memory barriers race:^_starpu_machine_is_running$ race:^_starpu_kill_all_workers$ race:^starpu_pause$ race:^_starpu_may_pause$ race:^starpu_resume$ race:^is_running$ race:^_starpu_sink_deinit$ # worker_is_initialized is not actually racy since deinit happens only after main set running to 0 race:^_starpu_opencl_driver_deinit$ race:^_starpu_cuda_driver_deinit$ race:^_starpu_cpu_driver_deinit$ # The integer access is atomic, and we use the sched mutex to avoid missing wake ups race:^starpu_st_fifo_taskq_empty$ race:^push_task_eager_policy$ # These are just statistics race:^starpu_memory_get_available$ race:^_starpu_profiling$ race:^_starpu_history_based_job_expected_perf$ race:^compute_ntasks_end$ race:^compute_expected_end$ race:^compute_all_performance_predictions$ # There is actually no race with busy_count, see comment race:_starpu_data_unregister # ignore other libraries' races called_from_lib:^libmpi.so$ called_from_lib:^libhwloc*.so$ # see valgrind/starpu.suppr deadlock:starpu_pthread_mutex_lock_sched deadlock:_starpu_sched_component_lock_worker deadlock:_starpu_sched_component_worker_lock_scheduling deadlock:simple_worker_pull_task # the assert on lock_write_owner to check for recursive write lock is inherently racy, but fine # for the intended purpose race:^_starpu_sched_ctx_lock_write$ race:^_starpu_sched_ctx_lock_read$ # only protected by memory barrier race:^_starpu_keys_initialized$ # disable race detection on cg->ntags, see comment in code race:^_starpu_notify_cg$ # this does not need to be safe race:^evictable$ # don't care about data status query race, this is just a hint race:^starpu_data_query_status$ race:^starpu_data_query_status2$ starpu-1.4.9+dfsg/tools/dev/valgrind/000077500000000000000000000000001507764646700175345ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/dev/valgrind/bash.suppr000066400000000000000000000015071507764646700215470ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Free ... obj:/bin/bash fun:execute_command_internal fun:execute_command fun:reader_loop fun:main } starpu-1.4.9+dfsg/tools/dev/valgrind/blas.suppr000066400000000000000000000013721507764646700215530ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Ignore blas internal races Helgrind:Race ... fun:blas_memory_alloc ... } starpu-1.4.9+dfsg/tools/dev/valgrind/fxt.suppr000066400000000000000000000051071507764646700214330ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:fxt_setinfos fun:fut_setup } { Memcheck:Leak match-leak-kinds: reachable fun:realloc fun:fxt_get_cpu_info fun:fxt_setinfos fun:fut_setup } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup fun:fut_set_filename } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:fxt_setinfos fun:fut_setup } { Memcheck:Cond fun:fxt_get_cpu_info fun:fxt_setinfos fun:fut_setup } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:realloc fun:fxt_next_ev } { Memcheck:Leak match-leak-kinds: possible fun:malloc fun:fxt_blockev_enter } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup fun:fxt_fdopen } { Memcheck:Leak match-leak-kinds: reachable fun:realloc fun:fxt_next_ev } { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:fxt_load_time fun:fxt_fdopen } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:fkt_load_pids fun:fxt_fdopen } { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:fxt_fdopen } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:fdopen@@GLIBC_2.2.5 fun:fxt_fdopen } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:fut_setup fun:_starpu_fxt_init_profiling fun:starpu_initialize fun:main } starpu-1.4.9+dfsg/tools/dev/valgrind/glpk.suppr000066400000000000000000000014231507764646700215640ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Leak match-leak-kinds: reachable ... fun:glp_init_env } starpu-1.4.9+dfsg/tools/dev/valgrind/hdf5.suppr000066400000000000000000000023331507764646700214560ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { thread-specific value Memcheck:Leak match-leak-kinds: reachable ... fun:H5E_get_stack ... } { thread-specific value Memcheck:Leak match-leak-kinds: reachable ... fun:H5E__get_stack ... } { thread-specific value Memcheck:Leak match-leak-kinds: reachable ... fun:H5TS_thread_id ... } { thread-specific value Memcheck:Leak match-leak-kinds: reachable ... fun:H5TS_cancel_count_inc ... } { thread-specific value Memcheck:Leak match-leak-kinds: reachable ... fun:H5CX_push ... } starpu-1.4.9+dfsg/tools/dev/valgrind/helgrind.sh000077500000000000000000000033301507764646700216660ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # EXEC=$(basename $0 .sh) DIRNAME=$(dirname $0) CLIMIT=$(ulimit -c) if [ "$CLIMIT" = unlimited ] then # valgrind cores are often *huge*, 100MB will already be quite big... ulimit -c 100000 fi if test "$EXEC" == "valgrind" then RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --error-exitcode=42 $STARPU_VALGRIND_OPTIONS" elif test "$EXEC" == "valgrind_xml" then mkdir -p ${DIRNAME}/../../../valgrind XML_FILE=$(mktemp -p ${DIRNAME}/../../../valgrind starpu-valgrind_XXXXXXXXXX.xml) RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --xml=yes --xml-file=${XML_FILE} $STARPU_VALGRIND_OPTIONS" else RUN="valgrind --tool=$EXEC --error-exitcode=42" fi SUPPRESSIONS=$(for f in $(dirname $0)/*.suppr /usr/share/hwloc/hwloc-valgrind.supp; do if test -f $f ; then echo "--suppressions=$f" ; fi ; done) exec $RUN --keep-debuginfo=yes --num-callers=42 --error-limit=no --gen-suppressions=all $SUPPRESSIONS $* starpu-1.4.9+dfsg/tools/dev/valgrind/hwloc.suppr000066400000000000000000000060541507764646700217500ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Leak match-leak-kinds: reachable ... fun:hwloc_components_init } { Memcheck:Leak match-leak-kinds: indirect ... fun:hwloc_components_init } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:lt__malloc fun:lt__zalloc obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 fun:lt_dlopenadvise obj:* obj:* obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 fun:hwloc_topology_init } { Memcheck:Leak fun:malloc ... fun:tls_get_addr_tail ... fun:hwloc_backends_notify_new_object ... } { Memcheck:Leak fun:memalign fun:tls_get_addr_tail ... fun:hwloc_backends_notify_new_object ... } { Memcheck:Leak fun:malloc ... fun:hwloc_topology_set_xml ... } { Memcheck:Leak fun:calloc ... fun:hwloc_topology_set_xml ... } { Memcheck:Leak fun:realloc ... fun:hwloc_topology_set_xml ... } { Memcheck:Leak fun:malloc ... fun:hwloc_topology_load ... } { Memcheck:Leak fun:calloc ... fun:hwloc_topology_load ... } { Memcheck:Leak fun:realloc ... fun:hwloc_topology_load ... } { Memcheck:Leak match-leak-kinds: reachable fun:memalign fun:tls_get_addr_tail obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:clGetPlatformIDs obj:/usr/lib/x86_64-linux-gnu/hwloc/hwloc_opencl.so fun:hwloc_backends_notify_new_object obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.7.3 fun:hwloc_insert_pci_device_list obj:/usr/lib/x86_64-linux-gnu/hwloc/hwloc_pci.so fun:hwloc_topology_load fun:_starpu_init_topology fun:_starpu_topology_get_nhwcpu fun:check_bus_config_file fun:_starpu_load_bus_performance_files fun:starpu_initialize fun:starpu_init fun:omp_initial_thread_setup fun:omp_initial_region_setup fun:starpu_omp_init fun:omp_constructor fun:__libc_csu_init fun:(below main) } { Helgrind:Race fun:hwloc_linux_set_area_membind ... } starpu-1.4.9+dfsg/tools/dev/valgrind/libc.suppr000066400000000000000000000157021507764646700215450ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Ignore libc aio-internal races Helgrind:Race ... fun:aio_read ... } { Ignore libc aio-internal cond oddity Helgrind:Misc ... fun:aio_read ... } { Ignore libc aio-internal races Helgrind:Race ... fun:aio_write ... } { Ignore libc aio-internal cond oddity Helgrind:Misc ... fun:aio_write ... } { Ignore libc aio-internal races Helgrind:Race ... fun:__aio_notify ... } { Ignore libc aio-internal races Helgrind:Race ... fun:aio_suspend ... } { Ignore libc aio errors Memcheck:Leak match-leak-kinds: reachable ... fun:__aio_enqueue_request ... } { Ignore libc aio-internal leak Memcheck:Leak match-leak-kinds: possible ... fun:__aio_create_helper_thread ... } { Ignore libc printf races Helgrind:Race ... fun:__vfprintf_internal ... } { Ignore libc printf races Helgrind:Race ... fun:_IO_file_xsputn ... } { Memcheck:Leak ... fun:dlopen@@GLIBC_2.2.5 ... } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:lt__malloc fun:lt__zalloc obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1 ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.22 fun:call_init.part.0 fun:_dl_init obj:/lib/x86_64-linux-gnu/ld-2.21.so } { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:_dl_new_object fun:_dl_map_object_from_fd fun:_dl_map_object fun:openaux fun:_dl_catch_error fun:_dl_map_object_deps fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:_dl_new_object fun:_dl_map_object_from_fd fun:_dl_map_object fun:openaux fun:_dl_catch_error fun:_dl_map_object_deps fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup fun:_dl_load_cache_lookup fun:_dl_map_object fun:openaux fun:_dl_catch_error fun:_dl_map_object_deps fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 fun:call_init.part.0 fun:call_init fun:_dl_init obj:/lib/x86_64-linux-gnu/ld-2.26.so } { Memcheck:Value8 obj:/lib/x86_64-linux-gnu/libc-2.28.so ... } { Memcheck:Cond obj:/lib/x86_64-linux-gnu/libc-2.28.so ... } { Memcheck:Cond obj:/lib/x86_64-linux-gnu/libc-2.28.so ... } { Memcheck:Value8 obj:/lib/x86_64-linux-gnu/ld-2.28.so obj:* obj:* obj:* } { Memcheck:Cond obj:/lib/x86_64-linux-gnu/ld-2.28.so obj:* } { Memcheck:Value8 obj:/lib/x86_64-linux-gnu/ld-2.28.so obj:* } { Memcheck:Param openat(filename) obj:/lib/x86_64-linux-gnu/libc-2.28.so obj:* } { Memcheck:Value8 obj:/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Cond obj:/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Param openat(filename) obj:/lib/x86_64-linux-gnu/ld-2.28.so ... } { Memcheck:Cond obj:/lib/x86_64-linux-gnu/libdl-2.28.so } { Memcheck:Param read(count) obj:/lib/x86_64-linux-gnu/libc-2.28.so } { Memcheck:Param read(buf) obj:/lib/x86_64-linux-gnu/libc-2.28.so } { Memcheck:Param lseek(offset) obj:/lib/x86_64-linux-gnu/libc-2.28.so obj:/lib/x86_64-linux-gnu/libc-2.28.so } { Memcheck:Cond obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1 obj:* } { Memcheck:Value8 obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1 obj:* } { Memcheck:Cond obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.1 } { Memcheck:Leak match-leak-kinds: reachable fun:malloc ... fun:_dl_close fun:_dl_catch_exception fun:_dl_catch_error fun:dlerror_run fun:free_mem fun:__libc_freeres fun:_vgnU_freeres fun:__run_exit_handlers fun:exit fun:(below main) } { Memcheck:Addr8 fun:strncmp fun:is_dst fun:_dl_dst_count fun:expand_dynamic_string_token fun:fillin_rpath.isra.0 ... } { Memcheck:Addr8 fun:strncmp fun:is_dst fun:_dl_dst_substitute fun:fillin_rpath.isra.0 ... } { Memcheck:Cond fun:free fun:free_res fun:__libc_freeres fun:_vgnU_freeres fun:__run_exit_handlers fun:exit fun:(below main) } { Memcheck:Free fun:free fun:free_res fun:__libc_freeres fun:_vgnU_freeres fun:__run_exit_handlers fun:exit fun:(below main) } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:malloc fun:_dl_close_worker fun:_dl_close fun:_dl_catch_exception fun:_dl_catch_error fun:_dlerror_run ... } { Memcheck:Leak match-leak-kinds: possible ... fun:_dl_allocate_tls fun:allocate_stack fun:pthread_create@@GLIBC_2.34 ... } { Memcheck:Leak match-leak-kinds: reachable ... fun:allocate_dtv_entry fun:allocate_and_init fun:tls_get_addr_tail fun:__tls_get_addr ... } starpu-1.4.9+dfsg/tools/dev/valgrind/libgomp.suppr000066400000000000000000000041131507764646700222570ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2015-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { gomp2 Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:gomp_malloc fun:gomp_init_num_threads fun:initialize_env fun:call_init.part.0 fun:call_init fun:_dl_init ... } { gomp3 Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.21 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 fun:call_init fun:call_init fun:_dl_init obj:/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 fun:call_init.part.0 fun:call_init fun:_dl_init obj:/usr/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libgomp.so.1.0.0 fun:call_init fun:call_init fun:_dl_init obj:/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 } starpu-1.4.9+dfsg/tools/dev/valgrind/libnuma.suppr000066400000000000000000000020401507764646700222520ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { libnuma error Memcheck:Cond ... fun:numa_node_size64 ... } { libnuma error Memcheck:Value8 ... fun:numa_node_size64 ... } { Memcheck:Cond obj:/usr/lib/x86_64-linux-gnu/libnuma.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libnuma.so.1.0.0 obj:* obj:/usr/lib/x86_64-linux-gnu/libquadmath.so.0.0.0 obj:* } starpu-1.4.9+dfsg/tools/dev/valgrind/madmpi.suppr000066400000000000000000000027721507764646700221060ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Leak match-leak-kinds: indirect fun:malloc fun:hwloc_bitmap_alloc fun:piom_topo_current_obj fun:piom_ltask_schedule fun:nm_schedule fun:nm_sr_progress fun:nm_sr_stest fun:MPI_Test } { Memcheck:Leak match-leak-kinds: definite ... fun:nm_schedule fun:nm_sr_progress fun:nm_sr_stest fun:MPI_Test } { Memcheck:Leak match-leak-kinds: indirect fun:malloc fun:hwloc_bitmap_alloc fun:piom_topo_current_obj fun:piom_ltask_schedule fun:nm_schedule fun:nm_sr_progress fun:nm_sr_rtest fun:MPI_Test } { Memcheck:Leak match-leak-kinds: definite ... fun:nm_schedule fun:nm_sr_progress fun:nm_sr_rtest fun:MPI_Test } starpu-1.4.9+dfsg/tools/dev/valgrind/nvidia.suppr000066400000000000000000000057041507764646700221070ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Cond obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 ... obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Value8 obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 ... obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Param readlink(path) obj:/lib/x86_64-linux-gnu/libc-2.28.so obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Param lstat(file_name) obj:/lib/x86_64-linux-gnu/libc-2.28.so obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.390.87 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Value8 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Cond obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Value8 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/lib/x86_64-linux-gnu/ld-2.28.so } { Memcheck:Cond obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/lib/x86_64-linux-gnu/ld-2.28.so } starpu-1.4.9+dfsg/tools/dev/valgrind/opencl.suppr000066400000000000000000000535361507764646700221230ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: indirect fun:malloc fun:strdup obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: definite fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 } { Memcheck:Leak match-leak-kinds: possible fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 ... obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:__alloc_dir obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 ... } { Memcheck:Leak match-leak-kinds: definite fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:_dl_new_object fun:_dl_map_object_from_fd fun:_dl_map_object fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 } { Memcheck:Leak match-leak-kinds: reachable fun:realloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: possible fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: possible fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 } { Memcheck:Leak match-leak-kinds: possible fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: possible fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: possible fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:_dl_check_map_versions fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 } { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:_dl_new_object fun:_dl_map_object_from_fd fun:_dl_map_object fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 } { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:_dl_new_object fun:_dl_map_object_from_fd fun:_dl_map_object fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 ... obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 ... obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 ... obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 } { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:_dl_check_map_versions fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 ... obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:_dl_new_object fun:_dl_map_object_from_fd fun:_dl_map_object fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup fun:_dl_load_cache_lookup fun:_dl_map_object fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 } { Memcheck:Leak match-leak-kinds: indirect fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 ... obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup fun:_dl_load_cache_lookup fun:_dl_map_object fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 obj:/usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.331.49 } { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init ... } { Helgrind:UnlockUnlocked fun:mutex_unlock_WRK obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so fun:aclCompilerInit obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so fun:clIcdGetPlatformIDsKHR obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 ... } { Helgrind:PthAPIerror fun:mutex_unlock_WRK obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so fun:aclCompilerInit obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so fun:clIcdGetPlatformIDsKHR obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 ... } { Helgrind:Race ... fun:init_one_static_tls fun:__pthread_init_static_tls fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 obj:/usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 fun:call_init.part.0 fun:_dl_init fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 ... } { Memcheck:Cond ... obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so ... fun:call_init.part.0 fun:_dl_init fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 ... } { Memcheck:Value8 ... obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so ... fun:call_init.part.0 fun:_dl_init fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 ... } { Memcheck:Leak ... obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so ... fun:call_init.part.0 fun:_dl_init fun:dl_open_worker fun:_dl_catch_error fun:_dl_open fun:dlopen_doit fun:_dl_catch_error fun:_dlerror_run fun:dlopen@@GLIBC_2.2.5 ... } { Memcheck:Leak match-leak-kinds: reachable fun:calloc fun:_dlerror_run fun:dlclose obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so fun:__run_exit_handlers fun:exit ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libamdocl64.so ... } starpu-1.4.9+dfsg/tools/dev/valgrind/openmp.suppr000066400000000000000000000126041507764646700221300ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2017-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Leak match-leak-kinds: reachable fun:realloc obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 fun:pci_device_get_device_name fun:hwloc_look_pci fun:hwloc_discover fun:hwloc_topology_load fun:_starpu_init_topology fun:_starpu_topology_get_nhwcpu fun:check_bus_config_file fun:_starpu_load_bus_performance_files fun:starpu_initialize fun:starpu_init fun:omp_initial_thread_setup fun:omp_initial_region_setup fun:starpu_omp_init fun:omp_constructor fun:__libc_csu_init fun:(below main) } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 fun:pci_device_get_device_name fun:hwloc_look_pci fun:hwloc_discover fun:hwloc_topology_load fun:_starpu_init_topology fun:_starpu_topology_get_nhwcpu fun:check_bus_config_file fun:_starpu_load_bus_performance_files fun:starpu_initialize fun:starpu_init fun:omp_initial_thread_setup fun:omp_initial_region_setup fun:starpu_omp_init fun:omp_constructor fun:__libc_csu_init fun:(below main) } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 fun:pci_device_get_device_name fun:hwloc_look_pci fun:hwloc_discover fun:hwloc_topology_load fun:_starpu_init_topology fun:_starpu_topology_get_nhwcpu fun:check_bus_config_file fun:_starpu_load_bus_performance_files fun:starpu_initialize fun:starpu_init fun:omp_initial_thread_setup fun:omp_initial_region_setup fun:starpu_omp_init fun:omp_constructor fun:__libc_csu_init fun:(below main) } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 fun:hwloc_look_pci fun:hwloc_discover fun:hwloc_topology_load fun:_starpu_init_topology fun:_starpu_topology_get_nhwcpu fun:check_bus_config_file fun:_starpu_load_bus_performance_files fun:starpu_initialize fun:starpu_init fun:omp_initial_thread_setup fun:omp_initial_region_setup fun:starpu_omp_init fun:omp_constructor fun:__libc_csu_init fun:(below main) } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 fun:hwloc_look_pci fun:hwloc_discover fun:hwloc_topology_load fun:_starpu_init_topology fun:_starpu_topology_get_nhwcpu fun:check_bus_config_file fun:_starpu_load_bus_performance_files fun:starpu_initialize fun:starpu_init fun:omp_initial_thread_setup fun:omp_initial_region_setup fun:starpu_omp_init fun:omp_constructor fun:__libc_csu_init fun:(below main) } { Memcheck:Leak match-leak-kinds: reachable fun:calloc obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 fun:hwloc_look_pci fun:hwloc_discover fun:hwloc_topology_load fun:_starpu_init_topology fun:_starpu_topology_get_nhwcpu fun:check_bus_config_file fun:_starpu_load_bus_performance_files fun:starpu_initialize fun:starpu_init fun:omp_initial_thread_setup fun:omp_initial_region_setup fun:starpu_omp_init fun:omp_constructor fun:__libc_csu_init fun:(below main) } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 obj:/usr/lib/x86_64-linux-gnu/libpciaccess.so.0.11.1 fun:hwloc_look_pci fun:hwloc_discover fun:hwloc_topology_load fun:_starpu_init_topology fun:_starpu_topology_get_nhwcpu fun:check_bus_config_file fun:_starpu_load_bus_performance_files fun:starpu_initialize fun:starpu_init fun:omp_initial_thread_setup fun:omp_initial_region_setup fun:starpu_omp_init fun:omp_constructor fun:__libc_csu_init fun:(below main) } { ignore GOMP barrier race Helgrind:Race ... fun:gomp_barrier_wait ... } { ignore GOMP barrier race Helgrind:Race ... fun:gomp_barrier_wait_start ... } { ignore GOMP barrier race Helgrind:Race ... fun:gomp_barrier_wait_end ... } { ignore GOMP barrier race Helgrind:Race ... fun:gomp_barrier_wait_final ... } starpu-1.4.9+dfsg/tools/dev/valgrind/openmpi.suppr000066400000000000000000000306141507764646700223020ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { suppr1 Memcheck:Param writev(vector[...]) ... fun:ompi_mpi_init ... } { suppr2 Memcheck:Addr4 ... fun:orte_init ... } { suppr3 Memcheck:Param sched_setaffinity(mask) ... fun:orte_init ... } { suppr4 Memcheck:Addr8 ... fun:orte_init } { suppr5 Memcheck:Leak ... fun:ompi_mpi_init } { suppr5 Helgrind:Race ... fun:ompi_mpi_init } { suppr6 Memcheck:Leak ... fun:mca_pml_base_open } { suppr7 Memcheck:Leak ... fun:orte_init } { suppr7 Memcheck:Leak ... fun:orte_progress_thread_engine } { suppr7 Helgrind:Race ... fun:orte_progress_thread_engine } { suppr7 Helgrind:Race ... fun:event_base_loop fun:progress_engine ... } { suppr8 Memcheck:Leak ... fun:orte_ess_base_app_setup } { suppr9 Memcheck:Leak ... fun:opal_paffinity_base_open } { suppr10 Memcheck:Leak ... fun:ompi_mpi_finalize } { suppr10 Helgrind:Race ... fun:ompi_mpi_finalize } { suppr10 Helgrind:Misc ... fun:ompi_mpi_finalize } { suppr10 Helgrind:PthAPIerror ... fun:ompi_mpi_finalize } { suppr11 Memcheck:Leak ... fun:mca_base_components_open } { suppr12 Memcheck:Param writev(vector[...]) ... fun:PMPI_Init_thread } { suppr13 Memcheck:Param writev(vector[...]) ... fun:PMPI_Init_thread } { suppr14 Memcheck:Param sched_setaffinity(mask) ... fun:PMPI_Init_thread } { suppr15 Memcheck:Leak fun:malloc fun:ompi_free_list_grow ... fun:opal_progress fun:ompi_request_default_test fun:PMPI_Test } { suppr15 Memcheck:Leak fun:malloc fun:opal_free_list_grow ... fun:opal_progress fun:ompi_request_default_test fun:PMPI_Test } { suppr16 Memcheck:Leak fun:malloc fun:ompi_ddt_set_args fun:PMPI_Type_vector } { suppr17 Memcheck:Leak fun:malloc fun:ompi_ddt_optimize_short.constprop.0 fun:ompi_ddt_commit fun:PMPI_Type_commit } { suppr18 Memcheck:Leak fun:calloc fun:ompi_ddt_create fun:ompi_ddt_create_vector fun:PMPI_Type_vector } { suppr19 Memcheck:Leak fun:malloc fun:ompi_ddt_create fun:ompi_ddt_create_vector fun:PMPI_Type_vector } { suppr20 Memcheck:Leak fun:malloc fun:ompi_free_list_grow ... fun:PMPI_Isend } { suppr20 Memcheck:Leak fun:malloc fun:opal_free_list_grow ... fun:PMPI_Isend } { suppr20 Memcheck:Leak fun:malloc fun:ompi_free_list_grow ... fun:PMPI_Barrier } { suppr20 Memcheck:Leak fun:malloc fun:opal_free_list_grow ... fun:PMPI_Barrier } { suppr21 Memcheck:Leak ... fun:hwloc_topology_set_xmlbuffer fun:opal_hwloc_unpack fun:opal_dss_unpack_buffer } { suppr22 Memcheck:Leak ... fun:hwloc_topology_set_xmlbuffer fun:opal_hwloc_unpack } { suppr23 Memcheck:Leak ... fun:hwloc_topology_load fun:opal_hwloc_unpack } { suppr24 Memcheck:Leak fun:malloc ... fun:xmlParseElement } { suppr25 Memcheck:Leak match-leak-kinds: indirect ... fun:ompi_datatype_commit fun:PMPI_Type_commit } { suppr26 Memcheck:Leak match-leak-kinds: definite ... fun:ompi_datatype_create_vector fun:PMPI_Type_vector } { suppr27 Memcheck:Leak match-leak-kinds: indirect ... fun:ompi_datatype_create_vector fun:PMPI_Type_vector } { suppr28 Memcheck:Leak match-leak-kinds: indirect fun:malloc fun:ompi_datatype_set_args fun:PMPI_Type_vector } { suppr29 Memcheck:Leak ... fun:PMPI_Comm_split fun:main } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:orte_grpcomm_base_update_modex_entries fun:orte_grpcomm_base_modex_unpack obj:* obj:* obj:* obj:* obj:* fun:opal_event_base_loop fun:opal_progress obj:* fun:ompi_modex_recv_key_value } { Memcheck:Leak match-leak-kinds: definite fun:malloc obj:/usr/lib/openmpi/lib/libmpi.so.1.0.8 fun:orte_grpcomm_base_update_modex_entries fun:orte_grpcomm_base_modex_unpack obj:* obj:* obj:* obj:* obj:* fun:opal_event_base_loop fun:opal_progress obj:* } { Memcheck:Leak match-leak-kinds: indirect fun:malloc fun:orte_grpcomm_base_update_modex_entries fun:orte_grpcomm_base_modex_unpack obj:* obj:* obj:* obj:* obj:* fun:opal_event_base_loop fun:opal_progress obj:* fun:ompi_modex_recv_key_value } { Memcheck:Leak match-leak-kinds: indirect fun:malloc obj:/usr/lib/openmpi/lib/libmpi.so.1.0.8 fun:orte_grpcomm_base_update_modex_entries fun:orte_grpcomm_base_modex_unpack obj:* obj:* obj:* obj:* obj:* fun:opal_event_base_loop fun:opal_progress obj:* } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:lt__malloc fun:lt__zalloc obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 fun:lt_dlopenadvise obj:/usr/lib/x86_64-linux-gnu/hwloc/hwloc_cuda.so obj:/usr/lib/x86_64-linux-gnu/hwloc/hwloc_cuda.so obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 fun:hwloc_topology_init fun:opal_hwloc_unpack fun:opal_dss_unpack_buffer fun:opal_dss_unpack } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 fun:lt_dlforeachfile obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 fun:hwloc_topology_init fun:opal_hwloc_unpack fun:opal_dss_unpack_buffer fun:opal_dss_unpack } { Memcheck:Leak match-leak-kinds: reachable fun:malloc obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 obj:/usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0 fun:lt_dlforeachfile obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 obj:/usr/lib/x86_64-linux-gnu/libhwloc.so.5.6.8 fun:hwloc_topology_init fun:opal_hwloc_unpack fun:opal_dss_unpack_buffer fun:opal_dss_unpack fun:orte_util_nidmap_init } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:opal_dss_unpack_string fun:opal_dss_unpack_buffer fun:opal_dss_unpack fun:orte_grpcomm_base_update_modex_entries fun:orte_grpcomm_base_modex_unpack obj:* obj:* obj:* obj:* obj:* fun:opal_event_base_loop } { Memcheck:Leak match-leak-kinds: indirect fun:malloc fun:strdup fun:orte_grpcomm_base_update_modex_entries fun:orte_grpcomm_base_modex_unpack obj:* obj:* obj:* obj:* obj:* fun:opal_event_base_loop fun:opal_progress obj:* } # the following suppression occurs on bertha with openmpi 1.10.2 # no idea why it is failing # Invalid read of size 8 # at 0x4C2E726: memcpy@@GLIBC_2.14 (vg_replace_strmem.c:1018) # by 0x77F31C0: opal_convertor_pack (in /usr/lib/openmpi/lib/libopen-pal.so.13.0.2) # by 0x105D8C41: mca_btl_vader_sendi (in /usr/lib/openmpi/lib/openmpi/mca_btl_vader.so) # by 0x10C16F05: mca_pml_ob1_send_inline (in /usr/lib/openmpi/lib/openmpi/mca_pml_ob1.so) # by 0x10C17662: mca_pml_ob1_isend (in /usr/lib/openmpi/lib/openmpi/mca_pml_ob1.so) # by 0x5B6471D: PMPI_Isend (in /usr/lib/openmpi/lib/libmpi.so.12.0.2) # by 0x4E3F08E: _starpu_mpi_isend_size_func (starpu_mpi.c:384) # by 0x4E4602D: _starpu_mpi_handle_ready_request (starpu_mpi.c:1132) # by 0x4E47C3C: _starpu_mpi_progress_thread_func (starpu_mpi.c:1342) # by 0x58E6283: start_thread (pthread_create.c:333) # by 0x60F5A4C: clone (in /lib/x86_64-linux-gnu/libc-2.21.so) # Address 0x1ab67c40 is 16 bytes inside a block of size 24 alloc'd # at 0x4C2BBD5: calloc (vg_replace_malloc.c:711) # by 0x4E3EDBA: _starpu_mpi_isend_size_func (starpu_mpi.c:369) # by 0x4E4602D: _starpu_mpi_handle_ready_request (starpu_mpi.c:1132) # by 0x4E47C3C: _starpu_mpi_progress_thread_func (starpu_mpi.c:1342) # by 0x58E6283: start_thread (pthread_create.c:333) # by 0x60F5A4C: clone (in /lib/x86_64-linux-gnu/libc-2.21.so) { Memcheck:Addr8 fun:memcpy@@GLIBC_2.14 fun:opal_convertor_pack fun:mca_btl_vader_sendi fun:mca_pml_ob1_send_inline fun:mca_pml_ob1_isend fun:PMPI_Isend fun:_starpu_mpi_isend_size_func fun:_starpu_mpi_handle_ready_request fun:_starpu_mpi_progress_thread_func fun:start_thread } { Memcheck:Addr2 fun:memcpy@@GLIBC_2.14 fun:opal_convertor_pack fun:mca_btl_vader_sendi fun:mca_pml_ob1_send_inline fun:mca_pml_ob1_isend fun:PMPI_Isend fun:_starpu_mpi_isend_data_func fun:_starpu_mpi_isend_size_func fun:_starpu_mpi_handle_ready_request fun:_starpu_mpi_progress_thread_func fun:start_thread } { Memcheck:Addr8 fun:memcpy@@GLIBC_2.14 fun:vader_prepare_src fun:mca_pml_ob1_send_request_start_prepare fun:mca_pml_ob1_isend fun:PMPI_Isend fun:_starpu_mpi_isend_data_func fun:_starpu_mpi_isend_size_func fun:_starpu_mpi_handle_ready_request fun:_starpu_mpi_progress_thread_func fun:start_thread } { Memcheck:Addr8 fun:memcpy@@GLIBC_2.14 fun:mca_btl_vader_sendi fun:mca_pml_ob1_send_inline fun:mca_pml_ob1_isend fun:PMPI_Isend fun:_starpu_mpi_isend_size_func fun:_starpu_mpi_handle_ready_request fun:_starpu_mpi_progress_thread_func fun:start_thread } { Memcheck:Addr8 fun:memcpy@@GLIBC_2.14 fun:opal_convertor_pack ... fun:PMPI_Isend fun:_starpu_mpi_isend_size_func fun:_starpu_mpi_handle_ready_request fun:_starpu_mpi_progress_thread_func fun:start_thread } { Memcheck:Addr2 fun:memcpy@@GLIBC_2.14 fun:opal_convertor_pack ... fun:PMPI_Isend fun:_starpu_mpi_isend_data_func fun:_starpu_mpi_isend_size_func fun:_starpu_mpi_handle_ready_request fun:_starpu_mpi_progress_thread_func fun:start_thread } { Memcheck:Addr8 fun:_starpu_mpi_isend_size_func fun:_starpu_mpi_handle_ready_request fun:_starpu_mpi_progress_thread_func fun:start_thread } # that happens at initialization { Memcheck:Param writev(vector[...]) fun:__writev fun:writev fun:send_msg fun:pmix_ptl_base_send_handler fun:event_persist_closure fun:event_process_active_single_queue fun:event_process_active fun:event_base_loop fun:progress_engine fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:fetch_nodeinfo fun:hash_fetch fun:_getnb_cbfunc fun:pmix_ptl_base_process_msg fun:event_process_active_single_queue fun:event_process_active fun:event_base_loop fun:progress_engine fun:start_thread fun:clone } { Helgrind:LockOrder ... fun:mca_bml_r2_add_procs ... } { Memcheck:Leak match-leak-kinds: indirect fun:calloc fun:ompi_group_allocate fun:ompi_group_incl_plist fun:PMPI_Group_incl fun:PMPI_Group_incl } { Memcheck:Leak match-leak-kinds: definite fun:calloc fun:UnknownInlinedFun fun:ompi_group_allocate_plist_w_procs fun:ompi_group_allocate fun:ompi_group_incl_plist fun:PMPI_Group_incl fun:PMPI_Group_incl } starpu-1.4.9+dfsg/tools/dev/valgrind/p11-kit.suppr000066400000000000000000000014141507764646700220150ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Helgrind:Misc fun:mutex_destroy_WRK fun:p11_library_uninit } starpu-1.4.9+dfsg/tools/dev/valgrind/padico.suppr000066400000000000000000000344351507764646700220770ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Param match-leak-kinds: reachable ... fun:padico_tasklet_worker.2516.2614 } { Memcheck:Leak match-leak-kinds: reachable ... fun:padico_tasklet_worker.2516.2614 } { Memcheck:Param ioctl(SIOCETHTOOL) ... fun:padico_tasklet_worker.2516.2614 } { Memcheck:Param write(buf) ... fun:nm_launcher_init } { Memcheck:Param match-leak-kinds: reachable ... fun:nm_launcher_init } { Memcheck:Leak match-leak-kinds: reachable ... fun:nm_launcher_init } { Memcheck:Leak match-leak-kinds: reachable ... fun:puk_xml_parse_file } { Memcheck:Leak match-leak-kinds: reachable ... fun:puk_xml_end_handler.3579 } { Memcheck:Leak match-leak-kinds: reachable ... fun:puk_xml_parse_buffer } { Memcheck:Leak match-leak-kinds: indirect ... fun:padico_tasklet_worker.2516.2614 } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:puk_mod_builtin_declare fun:call_init.part.0 fun:_dl_init ... } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:hwloc_bitmap_alloc fun:piom_topo_current_obj fun:__piom_ltask_timer_worker.3325 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc ... fun:_dl_close fun:_dl_catch_error fun:dlerror_run fun:free_mem fun:__libc_freeres fun:_vgnU_freeres fun:__run_exit_handlers fun:exit fun:(below main) } { Memcheck:Leak match-leak-kinds: indirect fun:malloc fun:strdup fun:topo_property_end_handler fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_flush fun:sysio_control_start fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:piom_topo_current_obj } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:hwloc_bitmap_alloc fun:piom_topo_current_obj fun:piom_ltask_schedule fun:__piom_ltask_idle_worker.3322 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:padico_addrdb_publish_handler fun:puk_xml_start_handler.3574 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:_puk_component_instantiate fun:sysio_control_bootstrap_acceptor fun:sfplain_acceptor_finalizer fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:topo_property_end_handler fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:sysio_control_start fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:topo_property_end_handler fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:sysio_control_instantiate fun:_puk_component_instantiate fun:sysio_control_bootstrap_acceptor fun:sfplain_acceptor_finalizer fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:padico_tasklet_schedule fun:sysio_control_read_callback fun:na_sysio_thread fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:padico_control_deliver_message fun:sysio_control_read_callback fun:na_sysio_thread fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:padico_tasklet_schedule fun:sysio_control_read_callback fun:na_sysio_thread fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:puk_iface_register fun:puk_component_declare2 fun:padico_module_init fun:unit_binary_load.5249.2496 fun:padico_puk_mod_load fun:_puk_component_resolve fun:padico_control_send_msg fun:padico_control_send_ext fun:control_router_event_listener fun:padico_tasklet_flush fun:control_router_event_listener fun:padico_tasklet_flush fun:sysio_control_start fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:realloc fun:na_control_event_listener fun:padico_tasklet_flush fun:sysio_control_start fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:strdup fun:puk_iface_register fun:puk_component_declare2 fun:padico_module_init fun:unit_binary_load.5249.2496 fun:padico_puk_mod_load fun:_puk_component_resolve fun:padico_control_send_msg fun:padico_control_send_ext fun:control_router_event_listener fun:padico_tasklet_flush fun:control_router_event_listener fun:padico_tasklet_flush fun:sysio_control_start fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:topo_property_end_handler fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_flush fun:padico_module_init fun:unit_binary_load.5249.2496 fun:padico_puk_mod_load fun:_puk_component_resolve fun:padico_control_send_msg fun:padico_control_send_ext fun:control_router_event_listener fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:padico_tm_tasks_process.2519.2436 fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:padico_addrdb_publish_handler fun:puk_xml_start_handler.3574 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tm_req_wait fun:padico_tm_mod_action_args fun:run_end_handler.2909.2464 fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:sysio_control_read_callback fun:na_sysio_thread fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:topo_property_end_handler fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_flush fun:padico_module_init fun:unit_binary_load.5249.2496 fun:padico_puk_mod_load fun:_puk_component_resolve fun:padico_control_send_msg fun:padico_control_send_ext fun:control_router_event_listener fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:padico_tm_tasks_process.2519.2436 fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:padico_addrdb_publish_handler fun:puk_xml_start_handler.3574 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tm_req_wait fun:padico_tm_mod_action_args fun:run_end_handler.2909.2464 fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:sysio_control_read_callback fun:na_sysio_thread fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:topo_property_end_handler fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_flush fun:padico_module_init fun:unit_binary_load.5249.2496 fun:padico_puk_mod_load fun:_puk_component_resolve fun:padico_control_send_msg fun:padico_control_send_ext fun:control_router_event_listener fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:padico_tm_tasks_process.2519.2436 fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:padico_addrdb_publish_handler fun:puk_xml_start_handler.3574 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tm_req_wait fun:padico_tm_mod_action_args fun:run_end_handler.2909.2464 fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:sysio_control_read_callback fun:na_sysio_thread fun:start_thread fun:clone } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:topo_property_end_handler fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_flush fun:router_update_end_handler fun:puk_xml_end_handler.3579 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 obj:/lib/x86_64-linux-gnu/libexpat.so.1.6.0 fun:XML_ParseBuffer fun:puk_xml_parse_buffer fun:na_control_event_listener fun:padico_tasklet_worker.2516.2614 fun:start_thread fun:clone } starpu-1.4.9+dfsg/tools/dev/valgrind/papi.suppr000066400000000000000000000026031507764646700215610ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Leak fun:malloc ... fun:_pe_libpfm4_init ... } # This happens in multithreaded_init: papi does not support getting initialized in one thread and shut down in another thread. { Memcheck:Leak ... fun:allocate_thread ... } # This seems like a leak in papi { Memcheck:Leak match-leak-kinds: definite ... fun:pfmlib_build_fstr.part.0 fun:pfmlib_perf_event_encode fun:allocate_native_event fun:_pe_libpfm4_ntv_name_to_code fun:_papi_hwi_native_name_to_code fun:PAPI_event_name_to_code fun:PAPI_event_name_to_code fun:PAPI_add_named_event fun:add_event ... } starpu-1.4.9+dfsg/tools/dev/valgrind/pthread.suppr000066400000000000000000000020741507764646700222610ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { pthread_create Memcheck:Leak ... fun:pthread_create@@GLIBC_2.2.5 ... } { name Memcheck:Leak ... fun:pthread_cancel_init ... } { Helgrind 3.9 wrongly compares to-be-destroyed mutex with init value Helgrind:Race fun:my_memcmp fun:pthread_mutex_destroy ... } { Memcheck:Value8 obj:/lib/x86_64-linux-gnu/libpthread-2.28.so } starpu-1.4.9+dfsg/tools/dev/valgrind/starpu.suppr000066400000000000000000000104201507764646700221420ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2012-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { don't care about cache hit stats Helgrind:Race fun:_starpu_msi_cache_hit ... } { don't care about cache miss stats Helgrind:Race fun:_starpu_msi_cache_miss ... } { don't care about data status query race, this is just a hint Helgrind:Race fun:starpu_data_query_status ... } { don't care about data status query race, this is just a hint Helgrind:Race fun:starpu_data_query_status2 ... } { known race, but not problematic in practice, see comment in _starpu_tag_clear Helgrind:LockOrder ... fun:_starpu_tag_free fun:_starpu_htbl_clear_tags ... fun:_starpu_tag_clear fun:starpu_shutdown ... } { There is actually no race on current_mode, because the mode can not change unexpectedly, until _starpu_notify_data_dependencies() is called further down. Valgrind can not know about such software rwlock. Helgrind:Race fun:_starpu_release_data_on_node fun:_starpu_push_task_output ... } { We do not care about races on profiling statistics Helgrind:Race fun:_starpu_worker_get_status fun:_starpu_worker_reset_profiling_info_with_lock ... } { This is racy, but since we'll always put the same values, this is not a problem. Helgrind:Race fun:_starpu_codelet_check_deprecated_fields ... } { This is racy, but we don't care, it's only a statistic Helgrind:Race fun:starpu_task_nsubmitted ... } { This is racy, but we don't care, it's only a statistic Helgrind:Race fun:starpu_task_nready ... } { This is racy, but we don't care, it's only a statistic Helgrind:Race fun:_starpu_bus_update_profiling_info ... } { fscanf error Memcheck:Cond ... fun:fscanf fun:_starpu_load_bus_performance_files ... } { locking order. It's not a problem when it's a trylock... but helgrind doesn't handle that :/ https://bugs.kde.org/show_bug.cgi?id=243232 Helgrind:LockOrder fun:mutex_trylock_WRK ... } { locking order. It's not a problem when it's a trylock... but helgrind doesn't handle that :/ https://bugs.kde.org/show_bug.cgi?id=243232 Helgrind:LockOrder fun:pthread_rwlock_trywrlock_WRK ... } { locking order. It's not a problem when it's a trylock... but helgrind doesn't handle that :/ https://bugs.kde.org/show_bug.cgi?id=243232 Helgrind:LockOrder fun:pthread_rwlock_tryrdlock_WRK ... } { mc / handle locking order1 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:try_to_free_mem_chunk ... } { mc / handle locking order1 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:try_to_throw_mem_chunk ... } { mc / handle locking order2 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:try_to_find_reusable_mem_chunk ... } { mc / handle locking order2 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:try_to_find_reusable_mc ... } { mc / handle locking order3 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:free_potentially_in_use_mc ... } { mc / handle locking order4 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:free_potentially_in_use_mc ... } { mc / handle locking order5 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:register_mem_chunk ... } { mc / handle locking order6 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:_starpu_request_mem_chunk_removal ... } { mc / handle locking order7 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:_starpu_allocate_interface ... } { mc / handle locking order8 Helgrind:LockOrder ... fun:__starpu_spin_lock fun:_starpu_memchunk_recently_used ... } starpu-1.4.9+dfsg/tools/dev/valgrind/starpu_pw.suppr000066400000000000000000000034141507764646700226550ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # the code needs to be fixed that these suppression are not needed { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:_starpu_parallel_worker_init fun:_starpu_parallel_worker fun:_starpu_parallel_worker_group fun:_starpu_parallel_worker_topology fun:_starpu_parallel_worker_config fun:_starpu_parallel_worker_init_varg fun:starpu_parallel_worker_init } { Memcheck:Leak match-leak-kinds: definite fun:calloc fun:_starpu_parallel_worker_init_varg fun:starpu_parallel_worker_init } { Memcheck:Leak match-leak-kinds: definite fun:calloc fun:_starpu_parallel_worker_init_varg fun:starpu_cluster_machine } { Memcheck:Leak match-leak-kinds: definite fun:malloc fun:_starpu_parallel_worker_init fun:_starpu_parallel_worker fun:_starpu_parallel_worker_group fun:_starpu_parallel_worker_topology fun:_starpu_parallel_worker_config fun:_starpu_parallel_worker_init_varg fun:starpu_cluster_machine } starpu-1.4.9+dfsg/tools/dev/valgrind/starpupy.suppr000066400000000000000000000027211507764646700225200ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { Memcheck:Leak fun:malloc ... obj:*/numpy/random/* ... } { Memcheck:Leak fun:malloc ... fun:_abc__abc_init.lto_priv.0 ... } { Memcheck:Leak fun:malloc ... fun:PyTuple_New ... fun:marshal_loads ... } { Memcheck:Leak fun:malloc ... fun:PyTuple_New ... fun:PyMarshal_ReadObjectFromString ... } { Memcheck:Param epoll_ctl(event) fun:epoll_ctl fun:pyepoll_internal_ctl.lto_priv.0 ... } { Memcheck:Addr8 fun:strncmp fun:is_dst ... fun:dlopen@@GLIBC_2.2.5 fun:_PyImport_FindSharedFuncptr ... } starpu-1.4.9+dfsg/tools/dev/valgrind/valgrind.sh000077500000000000000000000033301507764646700217000ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # EXEC=$(basename $0 .sh) DIRNAME=$(dirname $0) CLIMIT=$(ulimit -c) if [ "$CLIMIT" = unlimited ] then # valgrind cores are often *huge*, 100MB will already be quite big... ulimit -c 100000 fi if test "$EXEC" == "valgrind" then RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --error-exitcode=42 $STARPU_VALGRIND_OPTIONS" elif test "$EXEC" == "valgrind_xml" then mkdir -p ${DIRNAME}/../../../valgrind XML_FILE=$(mktemp -p ${DIRNAME}/../../../valgrind starpu-valgrind_XXXXXXXXXX.xml) RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --xml=yes --xml-file=${XML_FILE} $STARPU_VALGRIND_OPTIONS" else RUN="valgrind --tool=$EXEC --error-exitcode=42" fi SUPPRESSIONS=$(for f in $(dirname $0)/*.suppr /usr/share/hwloc/hwloc-valgrind.supp; do if test -f $f ; then echo "--suppressions=$f" ; fi ; done) exec $RUN --keep-debuginfo=yes --num-callers=42 --error-limit=no --gen-suppressions=all $SUPPRESSIONS $* starpu-1.4.9+dfsg/tools/dev/valgrind/valgrind.suppr000066400000000000000000000016511507764646700224400ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # { valgrind_suppr1 Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:ensure_sReq_space fun:add_shadow_Request fun:PMPI_Irecv } { _dl_init_suppr_01 Memcheck:Leak fun:malloc ... fun:call_init.part.0 fun:_dl_init ... } starpu-1.4.9+dfsg/tools/dev/valgrind/valgrind_xml.sh000077500000000000000000000033301507764646700225600ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # EXEC=$(basename $0 .sh) DIRNAME=$(dirname $0) CLIMIT=$(ulimit -c) if [ "$CLIMIT" = unlimited ] then # valgrind cores are often *huge*, 100MB will already be quite big... ulimit -c 100000 fi if test "$EXEC" == "valgrind" then RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --error-exitcode=42 $STARPU_VALGRIND_OPTIONS" elif test "$EXEC" == "valgrind_xml" then mkdir -p ${DIRNAME}/../../../valgrind XML_FILE=$(mktemp -p ${DIRNAME}/../../../valgrind starpu-valgrind_XXXXXXXXXX.xml) RUN="valgrind --track-origins=yes --show-reachable=yes --leak-check=full --errors-for-leak-kinds=all --show-leak-kinds=all --xml=yes --xml-file=${XML_FILE} $STARPU_VALGRIND_OPTIONS" else RUN="valgrind --tool=$EXEC --error-exitcode=42" fi SUPPRESSIONS=$(for f in $(dirname $0)/*.suppr /usr/share/hwloc/hwloc-valgrind.supp; do if test -f $f ; then echo "--suppressions=$f" ; fi ; done) exec $RUN --keep-debuginfo=yes --num-callers=42 --error-limit=no --gen-suppressions=all $SUPPRESSIONS $* starpu-1.4.9+dfsg/tools/distrib/000077500000000000000000000000001507764646700166105ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/distrib/distrib.r000077500000000000000000000026401507764646700204400ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # t<-read.table("output.update_block"); plot.multi.dens <- function(s) { junk.x = NULL junk.y = NULL for(i in 1:length(s)) { junk.x = c(junk.x, density(s[[i]])$x) junk.y = c(junk.y, density(s[[i]])$y) } xr <- range(junk.x) yr <- range(junk.y) plot(density(s[[1]]), xlim = xr, ylim = yr, main="") for(i in 1:length(s)) { lines(density(s[[i]]), xlim = xr, ylim = yr, col = i) } } cpus <- seq(1, 8); per_cpu <- function(cpuid) { t[4][t[2] == cpuid]; } density_cpu <- function(cpuid) { density(per_cpu(cpuid)) } l <- list() leg <- c() for (cpu in 1:4) { l <- c(l, list(per_cpu(cpu))) leg <- c(leg, cpu) } library(Hmisc) plot.multi.dens( l); le <- largest.empty(per_cpu(0), per_cpu(1), 0.1, 0.1) legend(le,legend=leg, col=(1:8), lwd=2, lty = 1) starpu-1.4.9+dfsg/tools/distrib/distrib.sh000077500000000000000000000015031507764646700206060ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # filename=$1 symbol_list=`cut -f1 $filename|sort -u` for symbol in $symbol_list do echo $symbol grep "^$symbol" $filename > output.$symbol done starpu-1.4.9+dfsg/tools/gdbinit000066400000000000000000001140261507764646700165170ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # To set a breakpoint when starting gdb with option "-ex run", # here what you need to do: #set breakpoint pending on #break starpu_mpi.c:419 define starpu-print-job set language c set $job = (struct _starpu_job *)$arg0 printf "StarPU Job (%p)\n", $job if ($job != 0) printf "\ttask:\t\t\t\t<%p>\n", $job->task printf "\tsubmitted:\t\t\t<%d>\n", $job->submitted printf "\tterminated:\t\t\t<%d>\n", $job->terminated printf "\tjob_id:\t\t\t\t<%d>\n", $job->job_id if $job->task && $job->task->name printf "\tname:\t\t\t\t<%s>\n", $job->task->name end end end define starpu-print-task set language c set $task = (struct starpu_task *)$arg0 set $job = (struct _starpu_job *)$task->starpu_private set $status=0 printf "StarPU Task (%p)\n", $task if $task->name printf "\tname:\t\t\t\t<%s>\n", $task->name end printf "\tcodelet:\t\t\t<%p>\n", $task->cl if $task->cl && $task->cl->name printf "\tcodelet name:\t\t\t<%s>\n", $task->cl->name end set $nbuffers = $task->nbuffers if $task->cl && $task->cl->nbuffers != -1 set $nbuffers = $task->cl->nbuffers end printf "\tnbuffers:\t\t\t<%d>\n", $nbuffers printf "\tcallback:\t\t\t<%p>\n", $task->callback_func printf "\tcl_arg:\t\t\t\t<%p>\n", $task->cl_arg printf "\tsynchronous:\t\t\t<%d>\n", $task->synchronous printf "\texecute_on_a_specific_worker:\t<%d>\n", $task->execute_on_a_specific_worker printf "\tworkerid:\t\t\t<%d>\n", $task->workerid printf "\tpriority:\t\t\t<%d>\n", $task->priority printf "\tdetach:\t\t\t\t<%d>\n", $task->detach printf "\tdestroy:\t\t\t<%d>\n", $task->destroy printf "\tregenerate:\t\t\t<%d>\n", $task->regenerate printf "\tstatus:\t\t\t\t" if $task->status == 0 printf "STARPU_TASK_INIT" end if $task->status == 1 printf "STARPU_TASK_BLOCKED" end if $task->status == 2 printf "STARPU_TASK_READY" end if $task->status == 3 printf "STARPU_TASK_RUNNING" end if $task->status == 4 printf "STARPU_TASK_FINISHED" end if $task->status == 5 printf "STARPU_TASK_BLOCKED_ON_TAG" end if $task->status == 6 printf "STARPU_TASK_BLOCKED_ON_TASK" end if $task->status == 7 printf "STARPU_TASK_BLOCKED_ON_DATA" end if $task->status == 8 printf "STARPU_TASK_STOPPED" end printf "\n" printf "\tjob:\t\t\t\t<%p>\n", $job printf "\ttag_id:\t\t\t\t<%d>\n", $task->tag_id printf "\tndeps:\t\t\t\t<%u>\n", $job->job_successors->ndeps printf "\tndeps_remaining:\t\t<%u>\n", $job->job_successors->ndeps - $job->job_successors->ndeps_completed if _starpu_debug set $n = 0 while $n < $job->job_successors->ndeps if ! $job->job_successors->done[$n] set $cg = $job->job_successors->deps[$n] set $m = 0 printf "\t\tcg:\t\t\t<%u>\n", $cg->ndeps while $m < $cg->ndeps if ! $cg->done[$m] set $depj = (struct _starpu_job *) $cg->deps[$m] printf "\t\t\ttask %p\n", $depj->task end set $m = $m + 1 end end set $n = $n + 1 end end printf "\tndeps_completed:\t\t<%u>\n", $job->job_successors->ndeps_completed printf "\tnsuccs:\t\t\t\t<%u>\n", $job->job_successors->nsuccs printf "\tparent:\t\t\t\t<%lu>\n", $task->bubble_parent if $job starpu-print-job $job end end define starpu-print-task-and-successor set language c set $t = (struct starpu_task *) ($arg0) starpu-print-task $t set $j = (struct _starpu_job *) $t->starpu_private set $nsuccs = $j->job_successors.nsuccs set $i = 0 while $i < $nsuccs set $cg = $j->job_successors.succ[$i] if ($cg->cg_type == 1) # STARPU_CG_APPS printf "waited for by application" end if ($cg->cg_type == 2) # STARPU_CG_TAG printf "will produce tag %x\n", $cg->succ.tag end if ($cg->cg_type == 4) # STARPU_CG_TASK printf "dep of job %p task %p\n", $cg->succ.job, $cg->succ.job->task starpu-print-task $cg->succ.job->task end set $i = $i + 1 end end define starpu-print-tasks-treelist set $list = $arg0 set $task = $list->_head while $task != 0 starpu-print-task $task set $task = $task->_next end end define starpu-print-tasks-tree if $arg0 starpu-print-tasks-tree $arg0->children[0] set $stage = (struct _starpu_task_prio_list_stage *) $arg0 starpu-print-tasks-treelist (&($stage->list)) starpu-print-tasks-tree $arg0->children[1] end end define starpu-print-tasks-list if _starpu_debug starpu-print-tasks-treelist &$arg0.list else starpu-print-tasks-tree $arg0.tree.root end end define starpu-tasks-on-worker set language c set $worker=&_starpu_config->workers[$arg0] starpu-print-tasks-list $worker->local_tasks end define starpu-tasks-on-workers set language c set $num=0 while $num<_starpu_config->topology->nworkers printf "Worker %s\n", _starpu_config->workers[$num].name starpu-tasks-on-worker $num set $num = $num + 1 end end define starpu-workers set language c set $num=0 printf "[Id] Name Arch Mask Devid Bindid Workerid Isrunning Isinitialized Status\n" while $num<_starpu_config->topology->nworkers set $worker=&_starpu_config->workers[$num] printf "[%2d] %-40s %-4d %-4d %-5d %-6d %-8d %-9d %-13d", $num, $worker->name, $worker->arch, $worker->worker_mask, \ $worker->devid, $worker->bindid, $worker->workerid, $worker->worker_is_running, $worker->worker_is_initialized if $worker->status == STATUS_INVALID printf " INVALID" end if $worker->status == STATUS_UNKNOWN printf " UNKNOWN" end if $worker->status & STATUS_INITIALIZING printf " INITIALIZING" end if $worker->status & STATUS_EXECUTING printf " EXECUTING" end if $worker->status & STATUS_CALLBACK printf " CALLBACK" end if $worker->status & STATUS_SCHEDULING printf " SCHEDULING" end if $worker->status & STATUS_WAITING printf " WAITING" end if $worker->status & STATUS_SLEEPING printf " SLEEPING" end printf "\n" set $num = $num + 1 end end define starpu-print-tag set language c set $tag_struct = (struct _starpu_tag *)_gettag_struct($arg0) printf "tag %d ((struct _starpu_tag *) %p)\n", $arg0, $tag_struct printf "\tstate " if $tag_struct->state == STARPU_INVALID_STATE printf "STARPU_INVALID_STATE" end if $tag_struct->state == STARPU_ASSOCIATED printf "STARPU_ASSOCIATED" end if $tag_struct->state == STARPU_BLOCKED printf "STARPU_BLOCKED" end if $tag_struct->state == STARPU_READY printf "STARPU_READY" end if $tag_struct->state == STARPU_DONE printf "STARPU_DONE" end printf "\n" printf "\tdeps %d\n", $tag_struct->tag_successors.ndeps printf "\tcompleted %d\n", $tag_struct->tag_successors.ndeps_completed printf "\tndeps_remaining:\t\t<%u>\n", $tag_struct->tag_successors->ndeps - $tag_struct->tag_successors->ndeps_completed if _starpu_debug set $n = 0 while $n < $tag_struct->tag_successors->ndeps if ! $tag_struct->tag_successors->done[$n] set $cg = $tag_struct->tag_successors->deps[$n] set $m = 0 printf "\t\tcg:\t\t\t<%u>\n", $cg->ndeps while $m < $cg->ndeps if ! $cg->done[$m] set $dept = (starpu_tag_t) $cg->deps[$m] printf "\t\t\ttag %u\n", $dept end set $m = $m + 1 end end set $n = $n + 1 end end printf "\tndeps_completed:\t\t<%u>\n", $tag_struct->tag_successors->ndeps_completed printf "\tnsuccs:\t\t\t\t<%u>\n", $tag_struct->tag_successors->nsuccs end define starpu-tags set language c printf "tags htbl %p\n", tag_htbl set $tags = tag_htbl while $tags starpu-print-tag $tags->id set $tags = (struct _starpu_tag_table *) $tags.hh.next end end define starpu-tasks set language c set $num=0 set $nsubmitted=0 set $nready=0 while $num<_starpu_config->topology->nsched_ctxs set $nsubmitted = $nsubmitted + _starpu_config->sched_ctxs[$num]->tasks_barrier->barrier->reached_start set $nready = $nready + _starpu_config->sched_ctxs[$num]->ready_tasks_barrier->barrier->reached_start set $num = $num + 1 end printf "%d submitted tasks\n", $nsubmitted printf "%d ready tasks\n", $nready printf "Tasks being run:\n" set $n = 0 while $n < _starpu_config.topology.nworkers set $w = &_starpu_config.workers[$n] printf "worker %d %s:\n", $n, $w->short_name if $w->pipeline_length > 0 set $m = 0 while $m < $w->ntasks set $t = $w->current_tasks[($w->first_task + $m) % (sizeof($w->current_tasks)/sizeof($w->current_tasks[0]))] printf " task %p\n", $t set $m = $m + 1 end end set $task = $w->current_task if ($task) printf " task %p\n", $task end set $n = $n + 1 end if (tag_htbl) printf "TODO: tags\n" end print "TODO: complete\n" end define starpu-print-all-tasks set language c if ! _starpu_debug printf "you need to configure with --enable-debug to get starpu-print-all-tasks working\n" else set $l = all_jobs_list->next while $l != &all_jobs_list set $j = (struct _starpu_job*) (((unsigned long) $l) - ((unsigned long) &((struct _starpu_job *)0)->all_submitted)) printf "task %p\n", $j->task starpu-print-task $j->task set $l = $l->next end end end define starpu-all-tasks set language c if ! _starpu_debug printf "you need to configure with --enable-debug to get starpu-all-tasks working\n" else set $l = all_jobs_list->next while $l != &all_jobs_list set $j = (struct _starpu_job*) (((unsigned long) $l) - ((unsigned long) &((struct _starpu_job *)0)->all_submitted)) set $task = $j->task if $task->name printf "task %p %s\n", $task, $task->name else printf "task %p\n", $task end set $l = $l->next end end end define starpu printf "Here I am...\n" end define starpu-print-mode if ($arg0 & 1) printf "R" end if ($arg0 & 2) printf "W" end if ($arg0 & 4) printf " SCRATCH" end if ($arg0 & 8) printf " REDUX" end end define starpu-print-data set language c set $data = (starpu_data_handle_t) $arg0 printf "Data handle %p\n", $data if $data->ops->interfaceid == 0 printf "Matrix\n" end if $data->ops->interfaceid == 1 printf "Block\n" end if $data->ops->interfaceid == 2 printf "Vector\n" end if $data->ops->interfaceid == 3 printf "CSR\n" end if $data->ops->interfaceid == 4 printf "BCSR\n" end if $data->ops->interfaceid == 5 printf "Variable\n" end if $data->ops->interfaceid == 6 printf "Void\n" end if $data->ops->interfaceid == 7 printf "Multiformat\n" end if $data->ops->interfaceid == 8 printf "COO\n" end if $data->ops->interfaceid == 9 printf "Tensor\n" end if $data->ops->interfaceid > 9 printf "Interface id %d\n", $data->ops->interfaceid end printf "Home node %d\n", $data->home_node printf "RWlock refs %d\n", $data->refcnt printf "Busy count %d\n", $data->busy_count printf "Initialization submitted %d\n", $data->initialized printf "Current mode " starpu-print-mode $data->current_mode printf "\n" if $data->mpi_data printf "TAG %ld\n",((struct _starpu_mpi_data *) ($data->mpi_data))->node_tag.data_tag end if $data->current_mode & (4|8) set $n = 0 while $n < _starpu_config.topology.nworkers set $replicate = $data->per_worker[$n] printf "Worker %2d %10s:", $n, _starpu_config->workers[$n]->name if $replicate.state == 0 printf " OWNER" end if $replicate.state == 1 printf " SHARED" end if $replicate.state == 2 printf " INVALID" end if $replicate.initialized printf " initialized" end printf "\n" set $n = $n + 1 end else set $n = 0 while $n < _starpu_descr.nnodes set $replicate = &$data->per_node[$n] printf "Node %2d (%2d):", $n, $replicate->refcnt if $replicate.state == 0 printf " OWNER" end if $replicate.state == 1 printf " SHARED" end if $replicate.state == 2 printf " INVALID" end if $replicate.initialized printf " initialized" end printf "\n" set $m = 0 while $m < _starpu_descr.nnodes if $replicate->request[$m] printf " request %p from %d\n", $replicate->request[$m], $m end set $m = $m + 1 end set $n = $n + 1 end set $r = $data->write_invalidation_req if $r printf "w_req %p for %d\n", $r, $r->dst_replicate->memory_node end end printf "Post sync tasks\n" set $tasklist = $data->post_sync_tasks while $tasklist != 0x0 starpu-print-task $tasklist->task set $tasklist = $tasklist->next end if _starpu_debug printf "Requester tasks\n" set $requesterlist = $data->req_list.list._head while $requesterlist != 0x0 printf "mode: " starpu-print-mode $requesterlist->mode printf "\n" starpu-print-job $requesterlist->j set $requesterlist = $requesterlist->_next end printf "Arbitered requester tasks\n" set $requesterlist = $data->arbitered_req_list.list._head while $requesterlist != 0x0 printf "mode: " starpu-print-mode $requesterlist->mode printf "\n" starpu-print-job $requesterlist->j set $requesterlist = $requesterlist->_next end else printf "Printing requester tasks not supported yet without --enable-debug, sorry!\n" end if ($data->nchildren) printf "%d children\n", $data->nchildren end end define starpu-print-datas set $entry = registered_handles while $entry starpu-print-data $entry->handle printf "\n" set $entry = (struct handle_entry *) $entry.hh.next end end define starpu-print-datas-summary set language c set $entry = registered_handles set $data_n = 0 set $pw_data_n = 0 set $data_n_allocated = 0 set $replicate_n_owners = 0 set $replicate_n_shared = 0 set $replicate_n_invalid = 0 set $replicate_n_initialized = 0 set $replicate_n_allocated = 0 set $pw_replicate_n_owners = 0 set $pw_replicate_n_shared = 0 set $pw_replicate_n_invalid = 0 set $pw_replicate_n_initialized = 0 set $pw_replicate_n_allocated = 0 while $entry set $data = (starpu_data_handle_t) $entry->handle if $data->current_mode & (4|8) set $pw_data_n = $pw_data_n + 1 set $n = 0 while $n < _starpu_config.topology.nworkers set $replicate = $data->per_worker[$n] if $replicate.state == 0 set $pw_replicate_n_owners = $pw_replicate_n_owners + 1 end if $replicate.state == 1 set $pw_replicate_n_shared = $pw_replicate_n_shared + 1 end if $replicate.state == 2 set $pw_replicate_n_invalid = $pw_replicate_n_invalid + 1 end if $replicate.initialized set $pw_replicate_n_initialized = $pw_replicate_n_initialized + 1 end if $replicate.allocated set $pw_replicate_n_allocated = $pw_replicate_n_allocated + 1 end set $n = $n + 1 end else set $data_n = $data_n + 1 set $n = 0 while $n < _starpu_descr.nnodes set $replicate = &$data->per_node[$n] if $replicate.state == 0 set $replicate_n_owners = $replicate_n_owners + 1 end if $replicate.state == 1 set $replicate_n_shared = $replicate_n_shared + 1 end if $replicate.state == 2 set $replicate_n_invalid = $replicate_n_invalid + 1 end if $replicate.initialized set $replicate_n_initialized = $replicate_n_initialized + 1 end if $replicate.allocated set $replicate_n_allocated = $replicate_n_allocated + 1 set $data_allocated = 1 end set $n = $n + 1 end if $data_allocated set $data_n_allocated = $data_n_allocated + 1 end end set $entry = (struct handle_entry *) $entry.hh.next end printf "Number of handles: %d\n", $data_n printf "Number of allocated handles: %d\n", $data_n_allocated printf "Number of OWNER replicates: %d\n", $replicate_n_owners printf "Number of SHARED replicates: %d\n", $replicate_n_shared printf "Number of INVALID replicates: %d\n", $replicate_n_invalid printf "Number of initialized replicates: %d\n", $replicate_n_initialized printf "Number of allocated replicates: %d\n", $replicate_n_allocated printf "Number of per-worker handles: %d\n", $pw_data_n printf "Number of OWNER per-worker replicates: %d\n", $pw_replicate_n_owners printf "Number of SHARED per-worker replicates: %d\n", $pw_replicate_n_shared printf "Number of INVALID per-worker replicates: %d\n", $pw_replicate_n_invalid printf "Number of initialized per-worker replicates: %d\n", $pw_replicate_n_initialized printf "Number of allocated per-worker replicates: %d\n", $pw_replicate_n_allocated end define starpu-print-replicate set $repl = (struct _starpu_data_replicate*) $arg0 printf "Handle %p\n", $repl->handle printf "Interface %p\n", $repl->data_interface printf "Refcnt %u\n", $repl->refcnt printf "Node %u\n", $repl->memory_node if $repl->state == 0 printf "Owner\n" end if $repl->state == 1 printf "Shared\n" end if $repl->state == 2 printf "Invalid\n" end if $repl->relaxed_coherency printf " relaxed_coherency" end if $repl->initialized printf " initialized" end if $repl->allocated printf " allocated" end if $repl->automatically_allocated printf " automatically allocated" end printf "\n" printf "MC %p\n", $repl->mc end define starpu-print-request set $r = (struct _starpu_data_request *)$arg0 printf "Request %p\n", $r printf "Origin %s\n", $r->origin printf "Refcnt %d\n", $r->refcnt printf "Handle %p\n", $r->handle printf "src_replicate %p", $r->src_replicate if $r->src_replicate printf " (%d)", $r->src_replicate->memory_node end printf "\n" printf "dst_replicate %p", $r->dst_replicate if $r->dst_replicate printf " (%d)", $r->dst_replicate->memory_node end printf "\n" printf "handling_node %d\n", $r->handling_node if ($r->mode & 1) printf "R" end if ($r->mode & 2) printf "W" end if ($r->mode & 4) printf "S" end if ($r->mode & 8) printf "X" end printf "\n" printf "completed: %d\n", $r->completed printf "prefetch: %d\n", $r->prefetch printf "retval: %d\n", $r->retval printf "ndeps: %d\n", $r->ndeps printf "next_req_count: %d\n", $r->next_req_count set $c = 0 while $c < $r->next_req_count printf " %p\n", $r->next_req[$c] set $c = $c + 1 end printf "comid: %u\n", $r->com_id set $c = $r->callbacks while $c != 0 printf "callback: %p %p\n", $c->callback_func, $c->callback_arg set $c = $c->next end end define starpu-print-requests-treelist set $list = $arg0 set $request = $list->_head while $request != 0 printf " Request %p: handle %p ", $request, $request->handle starpu-print-mode $request->mode printf "\n" set $request = $request->_next end end define starpu-print-requests-tree if $arg0 starpu-print-requests-tree $arg0->children[0] set $stage = (struct _starpu_data_request_prio_list_stage *) $arg0 starpu-print-requests-treelist (&($stage->list)) starpu-print-requests-tree $arg0->children[1] end end define starpu-print-requests-list if _starpu_debug starpu-print-requests-treelist &$arg0.list else starpu-print-requests-tree $arg0.tree.root end end define starpu-print-requests set $node = 0 while $node < _starpu_descr.nnodes printf "Node %u:\n", $node set $node2 = 0 while $node2 < _starpu_descr.nnodes starpu-print-requests-list _starpu_config.nodes[$node].data_requests[$node2][0] starpu-print-requests-list _starpu_config.nodes[$node].data_requests[$node2][1] set $node2 = $node2 + 1 end set $node = $node + 1 end end define starpu-print-prequests set $node = 0 while $node < _starpu_descr.nnodes printf "Node %u:\n", $node set $node2 = 0 while $node2 < _starpu_descr.nnodes if _starpu_config.nodes[$node].data_requests_npending[$node2][0] printf "%u pending requests from %u\n", _starpu_config.nodes[$node].data_requests_npending[$node2][0], $node2 end starpu-print-requests-list _starpu_config.nodes[$node].data_requests_pending[$node2][0] if _starpu_config.nodes[$node].data_requests_npending[$node2][1] printf "%u pending requests to %u\n", _starpu_config.nodes[$node].data_requests_npending[$node2][1], $node2 end starpu-print-requests-list _starpu_config.nodes[$node].data_requests_pending[$node2][1] set $node2 = $node2 + 1 end set $node = $node + 1 end end define starpu-print-arch set $arch = (struct starpu_perfmodel_arch *)$arg0 set $device = 0 while $device < $arch->ndevices printf " Device type %d - devid: %d - ncores: %d\n", $arch->devices[$device].type, $arch->devices[$device].devid, $arch->devices[$device].ncores set $device = $device + 1 end end define starpu-print-archs set $comb = 0 while $comb < current_arch_comb printf "Combination %d with %d devices\n", $comb, arch_combs[$comb]->ndevices starpu-print-arch arch_combs[$comb] set $comb = $comb + 1 end end define starpu-print-frequests set $node = 0 while $node < _starpu_descr.nnodes printf "Node %u:\n", $node set $node2 = 0 while $node2 < _starpu_descr.nnodes starpu-print-requests-list _starpu_config.nodes[$node].prefetch_requests[$node2][0] starpu-print-requests-list _starpu_config.nodes[$node].prefetch_requests[$node2][1] set $node2 = $node2 + 1 end set $node = $node + 1 end end define starpu-print-irequests set $node = 0 while $node < _starpu_descr.nnodes printf "Node %u:\n", $node set $node2 = 0 while $node2 < _starpu_descr.nnodes starpu-print-requests-list _starpu_config.nodes[$node].idle_requests[$node2][0] starpu-print-requests-list _starpu_config.nodes[$node].idle_requests[$node2][1] set $node2 = $node2 + 1 end set $node = $node + 1 end end define starpu-memusage set scheduler-locking on set $node = 0 while $node < _starpu_descr.nnodes printf "\n\nNode %u (%d/MiB/%dMiB):\n", $node, _starpu_config.nodes[$node].used_size >> 20, _starpu_config.nodes[$node].global_size >> 20 set $total = 0 set $total_b = 0 set $wt = 0 set $wt_b = 0 set $home = 0 set $home_b = 0 set $ooc = 0 set $ooc_b = 0 set $prefetch = 0 set $prefetch_b = 0 set $redux = 0 set $redux_b = 0 set $relax = 0 set $relax_b = 0 set $noref = 0 set $noref_b = 0 set $normal = 0 set $normal_b = 0 set $owner = 0 set $owner_b = 0 set $shared = 0 set $shared_b = 0 set $invalid = 0 set $invalid_b = 0 set $nodataref = 0 set $nodataref_b = 0 set $nosubdataref = 0 set $nosubdataref_b = 0 set $reading = 0 set $reading_b = 0 set $writing = 0 set $writing_b = 0 set $overwriting = 0 set $overwriting_b = 0 set $mc = _starpu_config.nodes[$node].mc_list->_head set pagination off while $mc != 0 set $handle = $mc->data set $replicate = &$handle->per_node[$node] set $size = _starpu_data_get_size($handle) set $total = $total + 1 if $total % 100 == 0 printf "\rinspected %d data...", $total end set $total_b = $total_b + $size if $node < sizeof($handle->wt_mask) * 8 && $handle->wt_mask & (1 << $node) set $wt = $wt + 1 set $wt_b = $wt_b + $size end if $node == $handle->home_node set $home = $home + 1 set $home_b = $home_b + $size end if $handle->ooc set $ooc = $ooc + 1 set $ooc_b = $ooc_b + $size end if $replicate->nb_tasks_prefetch set $prefetch = $prefetch + 1 set $prefetch_b = $prefetch_b + $size end if $mc->relaxed_coherency == 2 set $redux = $redux + 1 set $redux_b = $redux_b + $size end if $mc->relaxed_coherency == 1 set $relax = $relax + 1 set $relax_b = $relax_b + $size if $mc->replicate if $mc->replicate->refcnt == 0 set $noref = $noref + 1 set $noref_b = $noref_b + $size end end end if $mc->relaxed_coherency == 0 set $normal = $normal + 1 set $normal_b = $normal_b + $size if $replicate->state == STARPU_OWNER set $owner = $owner + 1 set $owner_b = $owner_b + $size end if $replicate->state == STARPU_SHARED set $shared = $shared + 1 set $shared_b = $shared_b + $size end if $replicate->state == STARPU_INVALID set $invalid = $invalid + 1 set $invalid_b = $invalid_b + $size end if (may_free_subtree($handle,$node)) set $nosubdataref = $nosubdataref + 1 set $nosubdataref_b = $nosubdataref_b + $size end if $replicate->refcnt == 0 set $nodataref = $nodataref + 1 set $nodataref_b = $nodataref_b + $size else set $r = 0 set $w = 0 set $_node = 0 while $_node < _starpu_descr.nnodes set $_node = $_node + 1 if $_node != $node if $handle->per_node[$_node].request[$node] != 0 set $r = $r + 1 end if $handle->per_node[$node].request[$_node] != 0 set $w = $w + 1 end end end if $r != 0 set $reading = $reading + 1 set $reading_b = $reading_b + $size end if $w != 0 set $writing = $writing + 1 set $writing_b = $writing_b + $size end if $replicate->request[$node] != 0 set $overwriting = $overwriting + 1 set $overwriting_b = $overwriting_b + $size end end end set $mc = $mc->_next end set pagination on printf "\r \r" printf " Total used: %u, %uMiB\n", $total, $total_b / 1048576 printf " WT: %u, %uMiB\n", $wt, $wt_b / 1048576 printf " home: %u, %uMiB\n", $home, $home_b / 1048576 printf " OOC: %u, %uMiB\n", $ooc, $ooc_b / 1048576 printf " prefetch: %u, %uMiB\n", $prefetch, $prefetch_b / 1048576 printf " redux: %u, %uMiB\n", $redux, $redux_b / 1048576 printf " relax: %u, %uMiB\n", $relax, $relax_b / 1048576 printf " noref: %u, %uMiB\n", $noref, $noref_b / 1048576 printf " normal: %u, %uMiB\n", $normal, $normal_b / 1048576 printf " owner: %u, %uMiB\n", $owner, $owner_b / 1048576 printf " shared: %u, %uMiB\n", $shared, $shared_b / 1048576 printf " invalid: %u, %uMiB\n", $invalid, $invalid_b / 1048576 printf " nosubdataref: %u, %uMiB\n", $nosubdataref, $nosubdataref_b / 1048576 printf " nodataref: %u, %uMiB\n", $nodataref, $nodataref_b / 1048576 printf " reading: %u, %uMiB\n", $reading, $reading_b / 1048576 printf " writing: %u, %uMiB\n", $writing, $writing_b / 1048576 printf " overwriting: %u, %uMiB\n", $overwriting, $overwriting_b / 1048576 printf "\n cached: %u, %uMiB\n", _starpu_config.nodes[$node].mc_cache_nb, _starpu_config.nodes[$node].mc_cache_size / 1048576 set $node = $node + 1 end end define starpu-print-model set $model = (struct starpu_perfmodel *)$arg0 printf "Model %p type %d symbol ", $model, $model->type if $model->symbol printf "%s", $model->symbol else printf "NULL" end printf "\n" end define starpu-print-registered-models set $node = registered_models._head while $node starpu-print-model $node->model set $node = $node->_next end end define starpu-sched-data print _starpu_config.sched_ctxs[$arg0]->policy_data end define starpu-print-spaces set $j = 0 while $j < $arg0 printf " " set $j = $j + 1 end end define starpu-sched-print-component set $c = (struct starpu_sched_component *) $arg1 starpu-print-spaces $arg0 printf "%s %c %c (struct starpu_sched_component *) %p\n", $c->name, $c->properties & STARPU_SCHED_COMPONENT_HOMOGENEOUS ? 'o':'e', $c->properties & STARPU_SCHED_COMPONENT_SINGLE_MEMORY_NODE ? 's' : 'm', $c if $c->push_task == fifo_push_task set $f = &((struct _starpu_fifo_data *) $c->data)->fifo starpu-print-spaces $arg0 printf "%d tasks start %f len %f end %f processed %d\n", $f->ntasks, $f->exp_start, $f->exp_len, $f->exp_end, $f->nprocessed end if $c->push_task == prio_push_task set $q = &((struct _starpu_prio_data *) $c->data)->prio starpu-print-spaces $arg0 printf "%d tasks start %f len %f end %f processed %d\n", $q->ntasks, $q->exp_start, $q->exp_len, $q->exp_end, $q->nprocessed end if $c->push_task == simple_worker_push_task set $d = (struct _starpu_worker_component_data *) $c->data set $l = $d->list starpu-print-spaces $arg0 printf "%d tasks pipeline %f start %f len %f end %f\n", $l->ntasks, $l->pipeline_len, $l->exp_start, $l->exp_len, $l->exp_end end end define starpu-sched-print-recur-component starpu-sched-print-component $arg0 $arg1 set $i[$arg0] = 0 while $i[$arg0] < $arg1->nchildren starpu-sched-print-recur-component ($arg0+1) $arg1->children[$i[$arg0]] set $i[$arg0] = $i[$arg0] + 1 end end define starpu-sched-print-modular set $t = (struct starpu_sched_tree *) _starpu_config.sched_ctxs[$arg0]->policy_data set $i = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0 } starpu-sched-print-recur-component 0 $t->root end define starpu-sched-print-lws set $ws = (struct _starpu_work_stealing_data *) _starpu_config.sched_ctxs[$arg0]->policy_data set $num = 0 while $num < _starpu_config->topology->nworkers printf "Worker %s:", _starpu_config->workers[$num].name printf " %d tasks", $ws->per_worker[$num].queue.ntasks if $ws->per_worker[$num].notask printf " notask" end if $ws->per_worker[$num].running printf " running" end if $ws->per_worker[$num].busy printf " busy" end printf " lastpop %d", $ws->per_worker[$num].last_pop_worker printf "\n" set $num = $num + 1 end end define starpu-sched-print-dmda set $dt = (struct _starpu_dmda_data *) _starpu_config.sched_ctxs[$arg0]->policy_data set $num = 0 printf "alpha %f beta %f gamma %f power %fW\n", $dt->alpha, $dt->beta, $dt->_gamma, $dt->idle_power while $num < _starpu_config->topology->nworkers printf "Worker %s:", _starpu_config->workers[$num].name printf " %d tasks", $dt->queue_array[$num].ntasks printf " start %f", $dt->queue_array[$num].exp_start printf " len %f", $dt->queue_array[$num].exp_len printf " end %f", $dt->queue_array[$num].exp_end printf " piped %f", $dt->queue_array[$num].pipeline_len printf "\n" set $num = $num + 1 end end define starpu-mpi-print-request set $request = (struct _starpu_mpi_req *)$arg0 printf "Request (struct _starpu_mpi_req *) %p data %p tag %ld to MPI node %d type ", $request, $request->data_handle, $request->data_handle && $request->data_handle->mpi_data ? ((struct _starpu_mpi_data *) ($request->data_handle->mpi_data))->node_tag.data_tag : -1, $request->node_tag.node.rank, if $request->request_type == SEND_REQ printf "SEND_REQ" end if $request->request_type == RECV_REQ printf "RECV_REQ" end if $request->request_type == WAIT_REQ printf "WAIT_REQ" end if $request->request_type == TEST_REQ printf "TEST_REQ" end if $request->request_type == BARRIER_REQ printf "BARRIER_REQ" end if $request->request_type == PROBE_REQ printf "PROBE_REQ" end if $request->request_type == UNKNOWN_REQ printf "UNKNOWN_REQ" end printf " submitted %d completed %d posted %d detached %d\n", $request->submitted, $request->completed, $request->posted, $request->detached end define starpu-mpi-print-ready-recv-requests set $list = (struct _starpu_mpi_req_list) ready_recv_requests if $list set $request = $list.list._head while $request starpu-mpi-print-request $request set $request = $request->_next end else printf "No ready recv requests\n" end end define starpu-mpi-print-requests-list set $list = $arg0 set $request = $list._head while $request starpu-mpi-print-request $request set $request = $request->_next end end define starpu-mpi-print-requests-tree if $arg0 starpu-mpi-print-requests-tree $arg0->children[0] set $stage = (struct _starpu_mpi_req_prio_list_stage *) $arg0 starpu-mpi-print-requests-list (&($stage->list)) starpu-mpi-print-requests-tree $arg0->children[1] end end define starpu-mpi-print-ready-send-requests set $prio_list = (struct _starpu_mpi_req_prio_list) ready_send_requests if _starpu_debug if $prio_list starpu-mpi-print-requests-list $prio_list.list else printf "No ready send requests\n" end else if $prio_list.empty == 0 starpu-mpi-print-requests-tree $prio_list.tree.root else printf "No ready send requests\n" end end end define starpu-mpi-print-detached-requests set $list = (struct _starpu_mpi_req_list) detached_requests if $list set $request = $list._head while $request starpu-mpi-print-request $request set $request = $request->_next end else printf "No detached requests\n" end end define starpu-mpi-print-early-data set $hash = (struct _starpu_mpi_early_data_handle_hashlist *)_starpu_mpi_early_data_handle_hashmap if $hash while $hash printf "Communicator %p Rank %d Data_tag %d\n", $hash->node_tag->comm, $hash->node_tag->rank, $hash->node_tag->data_tag set $list = (struct _starpu_mpi_early_data_handle_list *) $hash->list if $list set $data = (struct _starpu_mpi_early_data_handle *)$list->_head while $data starpu-mpi-print-request $data->req set $data = $data->_next end end set $hash = (struct _starpu_mpi_early_data_handle_hashlist *) $hash->hh.next end else printf "No early data\n" end end define starpu-mpi-print-early-requests set $hash = (struct _starpu_mpi_early_request_hashlist *)_starpu_mpi_early_request_hash if $hash while $hash printf "Communicator %p Rank %d Data_tag %d\n", $hash->node_tag->comm, $hash->node_tag->rank, $hash->node_tag->data_tag set $list = (struct _starpu_mpi_req_list*) $hash->list if $list set $request = $list->_head while $request starpu-mpi-print-request $request set $request = $request->_next end end set $hash = (struct _starpu_mpi_early_request_hashlist *) $hash->hh.next end else printf "No early request\n" end end define starpu-mpi-print-sync-data set $hash = (struct _starpu_mpi_sync_data_handle_hashlist *)_starpu_mpi_sync_data_handle_hashmap if $hash while $hash printf "Communicator %p Rank %d Data_tag %d\n", $hash->node_tag->comm, $hash->node_tag->rank, $hash->node_tag->data_tag set $list = (struct _starpu_mpi_req_list *) $hash->list if $list set $request = $list->_head while $request starpu-mpi-print-request $request set $request = $request->_next end end set $hash = (struct _starpu_mpi_sync_data_handle_hashlist *) $hash->hh.next end else printf "No sync data\n" end end document starpu List of StarPU-specific gdb functions: starpu-workers prints a list of the StarPU workers starpu-tasks-on-workers prints a list of the tasks queued on workers starpu-tasks-on-worker prints a list of the tasks queued on the given worker starpu-print-job prints a StarPU job starpu-print-task prints a StarPU task starpu-print-all-tasks prints all StarPU tasks starpu-print-task-and-successor prints a StarPU task and its successors starpu-print-data prints a StarPU data handle starpu-print-datas prints all StarPU data handles starpu-print-datas-summary prints a summary of data handles starpu-print-replicate prints a StarPU data replicate starpu-print-request prints a StarPU data request starpu-print-prequests prints all pending StarPU data requests starpu-print-requests prints all queued StarPU data requests starpu-print-frequests prints all queued StarPU prefetch data requests starpu-print-irequests prints all queued StarPU idle data requests starpu-tasks prints a summary of the tasks flowing in StarPU starpu-all-tasks prints a list of all the tasks flowing in StarPU starpu-tags prints a list of the tags known to StarPU starpu-print-tag prints a given tag starpu-memusage prints the memory node usage starpu-print-archs prints all known arch combinations starpu-print-arch prints a given arch combination starpu-print-registered-models prints all registered performance models starpu-print-model prints a given performance model starpu-sched-data prints the data of the given scheduler starpu-sched-print-modular prints the hierarchy of modular scheduling components starpu-sched-print-lws prints the state of the lws scheduler starpu-sched-print-dmda prints the state of any of the dm* schedulers starpu-mpi-print-ready-recv-requests prints all MPI ready recv requests starpu-mpi-print-ready-send-requests prints all MPI ready send requests starpu-mpi-print-detached-requests prints all MPI detached requests starpu-mpi-print-early-data prints all MPI early received data starpu-mpi-print-early-requests prints all MPI early requests starpu-mpi-print-sync-data prints all MPI sync data end starpu-1.4.9+dfsg/tools/loader.c000066400000000000000000000274611507764646700165740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) #include #else #include #endif #ifdef STARPU_QUICK_CHECK /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s add some extra times for tests which run with all schedulers */ #define DEFAULT_TIMEOUT 100 #elif !defined(STARPU_LONG_CHECK) /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */ #define DEFAULT_TIMEOUT 300 #else /* Long checks can be very long */ #define DEFAULT_TIMEOUT 1000 #endif #define AUTOTEST_SKIPPED_TEST 77 static pid_t child_pid = 0; static int timeout; #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) static int mygettimeofday(struct timeval *tv, void *tz) { if (tv) { FILETIME ft; unsigned long long res; GetSystemTimeAsFileTime(&ft); /* 100-nanosecond intervals since January 1, 1601 */ res = ft.dwHighDateTime; res <<= 32; res |= ft.dwLowDateTime; res /= 10; /* Now we have microseconds */ res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL; /* Now we are based on epoch */ tv->tv_sec = res / 1000000ULL; tv->tv_usec = res % 1000000ULL; } } #else #define mygettimeofday(tv,tz) gettimeofday(tv,tz) #endif #ifdef STARPU_GDB_PATH static int try_launch_gdb(const char *exe, const char *core) { # define GDB_COMMANDS \ "-ex", "py-list", \ "-ex", "starpu-tasks", \ "-ex", "starpu-workers", \ "-ex", "starpu-print-datas-summary", \ "-ex", "starpu-memusage", \ "-ex", "starpu-print-archs", \ "-ex", "starpu-print-registered-models", \ "-ex", "bt full", \ "-ex", "py-bt", \ "-ex", "thread apply all bt full", \ "-ex", "thread apply all py-bt", \ int err; pid_t pid; struct stat st; const char *top_builddir; char *gdb; err = stat(core, &st); if (err != 0) { fprintf(stderr, "while looking for core file of %s: %s: %m\n", exe, core); return -1; } if (!(st.st_mode & S_IFREG)) { fprintf(stderr, "%s: not a regular file\n", core); return -1; } top_builddir = getenv("top_builddir"); pid = fork(); switch (pid) { case 0: /* kid */ if (top_builddir != NULL) { /* Run gdb with Libtool. */ gdb = alloca(strlen(top_builddir) + sizeof("/libtool") + 1); strcpy(gdb, top_builddir); strcat(gdb, "/libtool"); err = execl(gdb, "gdb", "--mode=execute", STARPU_GDB_PATH, "--batch", GDB_COMMANDS exe, core, NULL); } else { /* Run gdb directly */ gdb = STARPU_GDB_PATH; err = execl(gdb, "gdb", "--batch", GDB_COMMANDS exe, core, NULL); } if (err != 0) { fprintf(stderr, "while launching `%s': %m\n", gdb); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); break; case -1: fprintf(stderr, "fork: %m\n"); return -1; default: /* parent */ { pid_t who; int status; who = waitpid(pid, &status, 0); if (who != pid) fprintf(stderr, "while waiting for gdb " "process %d: %m\n", pid); } } return 0; # undef GDB_COMMANDS } #endif /* STARPU_GDB_PATH */ static void launch_gdb(const char *exe) { #ifdef STARPU_GDB_PATH char s[32]; snprintf(s, sizeof(s), "core.%d", child_pid); if (try_launch_gdb(exe, s) < 0) try_launch_gdb(exe, "core"); #endif /* STARPU_GDB_PATH */ } static char *test_name; static void test_cleaner(int sig) { pid_t child_gid; int status; (void) sig; // send signal to all loader family members fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout); child_gid = getpgid(child_pid); kill(-child_gid, SIGQUIT); waitpid(child_pid, &status, 0); launch_gdb(test_name); raise(SIGALRM); exit(EXIT_FAILURE); } static void forwardsig(int sig) { pid_t child_gid; child_gid = getpgid(child_pid); kill(-child_gid, sig); } static int _decode(char **src, char *motif, const char *value) { char *found; found = strstr(*src, motif); if (found == NULL) return 0; char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1); strncpy(new_src, *src, found - *src); strcat(new_src, value); strcat(new_src, found+strlen(motif)); *src = new_src; return 1; } static void decode(char **src, char *motif, const char *value) { if (*src) { if (strstr(*src, motif) && value == NULL) { fprintf(stderr, "error: $%s undefined\n", motif); exit(EXIT_FAILURE); } int d = _decode(src, motif, value); while (d) d = _decode(src, motif, value); } } int main(int argc, char *argv[]) { int child_exit_status; char *test_args; char *launcher; char *launcher_args; char *libtool; char *cflags; const char *top_builddir = getenv("top_builddir"); struct sigaction sa; int ret; struct timeval start; struct timeval end; double timing; int x=1; int asan = 0, lsan = 0, tsan = 0, usan = 0; (void) argc; test_args = NULL; timeout = 0; launcher=getenv("STARPU_CHECK_LAUNCHER"); launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS"); cflags = getenv("CFLAGS"); if (cflags) { if (strstr(cflags, "-fsanitize=address")) asan = 1; if (strstr(cflags, "-fsanitize=leak")) lsan = 1; if (strstr(cflags, "-fsanitize=thread")) tsan = 1; if (strstr(cflags, "-fsanitize=undefined")) usan = 1; } if (argv[x] && strcmp(argv[x], "-t") == 0) { timeout = strtol(argv[x+1], NULL, 10); x += 2; } else if (getenv("STARPU_TIMEOUT_ENV")) { /* get user-defined iter_max value */ timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10); } else if (timeout <= 0) { timeout = DEFAULT_TIMEOUT; if ((launcher && strstr(launcher, "valgrind")) || (launcher && strstr(launcher, "helgrind")) || tsan) timeout *= 20; if (asan || usan || lsan || (launcher && strstr(launcher, "compute-sanitizer"))) timeout *= 5; if (timeout > 1750) timeout = 1750; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG timeout *= 20; #endif #endif #ifdef STARPU_USE_MPI_MASTER_SLAVE /* compare values between the 2 values of timeout */ if (getenv("MPIEXEC_TIMEOUT")) { int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10); if (mpiexec_timeout != timeout) fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout); } #endif if (argv[x] && strcmp(argv[x], "-p") == 0) { test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1); sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]); x += 3; } else { test_name = argv[x]; x += 1; } if (!test_name) { fprintf(stderr, "[error] Need name of program to start\n"); exit(EXIT_FAILURE); } size_t len = strlen(test_name); if (len >= 3 && test_name[len-3] == '.' && test_name[len-2] == 's' && test_name[len-1] == 'h') { /* This is a shell script, don't run ourself on bash, but make * the script call us for each program invocation */ char *launch = NULL; if (top_builddir == NULL) // this may fail if .libs is in the directory path setenv("STARPU_LAUNCH", argv[0], 1); else { launch = malloc(strlen(top_builddir) + strlen("/tests/loader") + 1); strcpy(launch, top_builddir); strcat(launch, "/tests/loader"); setenv("STARPU_LAUNCH", launch, 1); } execvp(test_name, argv+x-1); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); free(launch); exit(EXIT_FAILURE); } if (strstr(test_name, "spmv/dw_block_spmv")) { test_args = (char *) calloc(512, sizeof(char)); snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR); } else if (strstr(test_name, "starpu_perfmodel_display")) { if (x >= argc) test_args = strdup("-l"); } else if (strstr(test_name, "starpu_perfmodel_plot")) { if (x >= argc) test_args = strdup("-l"); } /* get launcher program */ if (launcher_args) launcher_args=strdup(launcher_args); if (top_builddir == NULL) { fprintf(stderr, "warning: $top_builddir undefined, " "so $STARPU_CHECK_LAUNCHER ignored\n"); launcher = NULL; launcher_args = NULL; libtool = NULL; } else { libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1); strcpy(libtool, top_builddir); strcat(libtool, "/libtool"); } if (launcher) { const char *top_srcdir = getenv("top_srcdir"); decode(&launcher, "@top_srcdir@", top_srcdir); decode(&launcher_args, "@top_srcdir@", top_srcdir); } setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1); /* set SIGALARM handler */ sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); sa.sa_handler = test_cleaner; if (-1 == sigaction(SIGALRM, &sa, NULL)) perror("sigaction"); signal(SIGINT, forwardsig); signal(SIGHUP, forwardsig); signal(SIGPIPE, forwardsig); signal(SIGTERM, forwardsig); child_pid = fork(); if (child_pid == 0) { char *launcher_argv[100]; int i=0; setpgid(0, 0); /* "Launchers" such as Valgrind need to be inserted * after the Libtool-generated wrapper scripts, hence * this special-case. */ if (launcher && top_builddir != NULL) { launcher_argv[i++] = libtool; launcher_argv[i++] = "--mode=execute"; launcher_argv[i++] = launcher; if (launcher_args) { launcher_argv[i++] = strtok(launcher_args, " "); while (launcher_argv[i-1]) { launcher_argv[i++] = strtok(NULL, " "); } } } launcher_argv[i++] = test_name; if (test_args) launcher_argv[i++] = test_args; else while (argv[x]) { launcher_argv[i++] = argv[x++]; } #ifdef STARPU_SIMGRID #ifdef STARPU_DEBUG launcher_argv[i++] = "--cfg=contexts/factory:thread"; #endif #endif launcher_argv[i++] = NULL; execvp(*launcher_argv, launcher_argv); fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name); exit(EXIT_FAILURE); } if (child_pid == -1) { fprintf(stderr, "[error] fork. test marked as failed\n"); exit(EXIT_FAILURE); } free(test_args); free(libtool); ret = EXIT_SUCCESS; gettimeofday(&start, NULL); alarm(timeout); if (child_pid == waitpid(child_pid, &child_exit_status, 0)) { if (WIFEXITED(child_exit_status)) { int status = WEXITSTATUS(child_exit_status); if (status == EXIT_SUCCESS) { alarm(0); } else { if (status != AUTOTEST_SKIPPED_TEST) fprintf(stdout, "`%s' exited with return code %d\n", test_name, status); ret = status; } } else if (WIFSIGNALED(child_exit_status)) { fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n", test_name, WTERMSIG(child_exit_status)); launch_gdb(test_name); ret = EXIT_FAILURE; } else { fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n", test_name); ret = EXIT_FAILURE; } } gettimeofday(&end, NULL); timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name); return ret; } starpu-1.4.9+dfsg/tools/msvc/000077500000000000000000000000001507764646700161205ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/msvc/starpu.sln000077500000000000000000000015721507764646700201640ustar00rootroot00000000000000 Microsoft Visual Studio Solution File, Format Version 11.00 # Visual C++ Express 2010 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "starpu", "starpu\starpu.vcxproj", "{1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 Release|Win32 = Release|Win32 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}.Debug|Win32.ActiveCfg = Debug|Win32 {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}.Debug|Win32.Build.0 = Debug|Win32 {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}.Release|Win32.ActiveCfg = Release|Win32 {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3}.Release|Win32.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection EndGlobal starpu-1.4.9+dfsg/tools/msvc/starpu/000077500000000000000000000000001507764646700174365ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/msvc/starpu/starpu.vcxproj000077500000000000000000000102551507764646700223770ustar00rootroot00000000000000 Debug Win32 Release Win32 {1A4DC8EB-1250-4DC5-B3CE-2E4BB4C51CA3} Win32Proj starpu Application true Unicode Application false true Unicode true false NotUsing Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) $(STARPU_PATH)\include\starpu\1.3;%(AdditionalIncludeDirectories) Console true $(STARPU_PATH)\lib;%(AdditionalLibraryDirectories) libstarpu-1.3.lib;%(AdditionalDependencies) Level3 Use MaxSpeed true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) Console true true true starpu-1.4.9+dfsg/tools/msvc/starpu_clean.bat000066400000000000000000000016101507764646700212660ustar00rootroot00000000000000@ECHO OFF REM StarPU --- Runtime system for heterogeneous multicore architectures. REM REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria REM REM StarPU is free software; you can redistribute it and/or modify REM it under the terms of the GNU Lesser General Public License as published by REM the Free Software Foundation; either version 2.1 of the License, or (at REM your option) any later version. REM REM StarPU is distributed in the hope that it will be useful, but REM WITHOUT ANY WARRANTY; without even the implied warranty of REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. REM REM See the GNU Lesser General Public License in COPYING.LGPL for more details. REM TITLE MSVC StarPU Cleaning ECHO. ECHO MSVC StarPU Cleaning ECHO. FOR %%d in (debug starpu\debug ipch) DO IF EXIST %%d RMDIR /S /Q %%d FOR %%f in (starpu.sdf starpu.suo) DO IF EXIST %%f DEL %%f starpu-1.4.9+dfsg/tools/msvc/starpu_exec.bat000066400000000000000000000025231507764646700211340ustar00rootroot00000000000000@ECHO OFF REM StarPU --- Runtime system for heterogeneous multicore architectures. REM REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria REM REM StarPU is free software; you can redistribute it and/or modify REM it under the terms of the GNU Lesser General Public License as published by REM the Free Software Foundation; either version 2.1 of the License, or (at REM your option) any later version. REM REM StarPU is distributed in the hope that it will be useful, but REM WITHOUT ANY WARRANTY; without even the implied warranty of REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. REM REM See the GNU Lesser General Public License in COPYING.LGPL for more details. REM TITLE MSVC StarPU Execution ECHO. ECHO MSVC StarPU Execution IF "%1" == "" GOTO invalidparam IF NOT EXIST %1 GOTO invalidparam call .\starpu_var.bat mkdir starpu FOR %%F IN (%STARPU_PATH%\bin\*dll) DO COPY %%F starpu\%%~nF FOR %%F IN (%HWLOC%\bin\*dll) DO COPY %%F starpu set STARPU_OLDPATH=%PATH% call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x86 cl %1 %STARPU_CFLAGS% %STARPU_LDFLAGS% set PATH=starpu;c:\MinGW\bin;%PATH% .\%~n1.exe set PATH=%STARPU_OLDPATH% GOTO end :invalidparam ECHO. ECHO Syntax error. You need to give the name of a StarPU application EXIT /B 2 GOTO end :end starpu-1.4.9+dfsg/tools/msvc/starpu_open.bat000066400000000000000000000027641507764646700211600ustar00rootroot00000000000000@ECHO OFF REM StarPU --- Runtime system for heterogeneous multicore architectures. REM REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria REM REM StarPU is free software; you can redistribute it and/or modify REM it under the terms of the GNU Lesser General Public License as published by REM the Free Software Foundation; either version 2.1 of the License, or (at REM your option) any later version. REM REM StarPU is distributed in the hope that it will be useful, but REM WITHOUT ANY WARRANTY; without even the implied warranty of REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. REM REM See the GNU Lesser General Public License in COPYING.LGPL for more details. REM TITLE MSVC StarPU Execution ECHO. ECHO MSVC StarPU Execution IF NOT EXIST %STARPU_PATH%\AUTHORS GOTO starpunotfound ECHO. ECHO %STARPU_PATH% IF "%1" == "" GOTO invalidparam IF NOT EXIST %1 GOTO invalidparam COPY %1 starpu\starpu_appli.c FOR %%F IN (%STARPU_PATH%\bin\*dll) DO COPY %%F starpu\%%~nF FOR %%F IN (%STARPU_PATH%\bin\*dll) DO COPY %%F starpu COPY c:\MinGW\bin\pthreadGC2.dll starpu IF EXIST Debug RMDIR /S /Q Debug IF EXIST starpu\Debug RMDIR /S /Q starpu\Debug "C:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\IDE\VCExpress.exe" starpu.sln GOTO end :invalidparam ECHO. ECHO Syntax error. You need to give the name of a StarPU application GOTO end :starpunotfound ECHO. ECHO You need to set the variable STARPU_PATH to a valid StarPU installation directory GOTO end :end starpu-1.4.9+dfsg/tools/msvc/starpu_var.bat000066400000000000000000000023361507764646700210020ustar00rootroot00000000000000@ECHO OFF REM StarPU --- Runtime system for heterogeneous multicore architectures. REM REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria REM REM StarPU is free software; you can redistribute it and/or modify REM it under the terms of the GNU Lesser General Public License as published by REM the Free Software Foundation; either version 2.1 of the License, or (at REM your option) any later version. REM REM StarPU is distributed in the hope that it will be useful, but REM WITHOUT ANY WARRANTY; without even the implied warranty of REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. REM REM See the GNU Lesser General Public License in COPYING.LGPL for more details. REM TITLE MSVC StarPU Environment ECHO. ECHO MSVC StarPU Environment IF NOT EXIST %STARPU_PATH%\AUTHORS GOTO starpunotfound ECHO. ECHO Setting environment from %STARPU_PATH% set STARPU_LIBDIR=%STARPU_PATH%/lib set STARPU_INCLUDEDIR=%STARPU_PATH%/include set STARPU_CFLAGS=/I%STARPU_INCLUDEDIR%\starpu\1.4 /I%HWLOC%\include set STARPU_LDFLAGS=/link %STARPU_PATH%\lib\libstarpu-1.4.lib GOTO end :starpunotfound ECHO. ECHO You need to set the variable STARPU_PATH to a valid StarPU installation directory exit /B 1 GOTO end :end starpu-1.4.9+dfsg/tools/msvc/starpu_var.bat.in000066400000000000000000000024421507764646700214050ustar00rootroot00000000000000@ECHO OFF REM StarPU --- Runtime system for heterogeneous multicore architectures. REM REM Copyright (C) 2013-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria REM REM StarPU is free software; you can redistribute it and/or modify REM it under the terms of the GNU Lesser General Public License as published by REM the Free Software Foundation; either version 2.1 of the License, or (at REM your option) any later version. REM REM StarPU is distributed in the hope that it will be useful, but REM WITHOUT ANY WARRANTY; without even the implied warranty of REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. REM REM See the GNU Lesser General Public License in COPYING.LGPL for more details. REM TITLE MSVC StarPU Environment ECHO. ECHO MSVC StarPU Environment IF NOT EXIST %STARPU_PATH%\AUTHORS GOTO starpunotfound ECHO. ECHO Setting environment from %STARPU_PATH% set STARPU_LIBDIR=%STARPU_PATH%/lib set STARPU_INCLUDEDIR=%STARPU_PATH%/include set STARPU_CFLAGS=/I%STARPU_INCLUDEDIR%\starpu\@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ /I%HWLOC%\include set STARPU_LDFLAGS=/link %STARPU_PATH%\lib\libstarpu-@STARPU_EFFECTIVE_VERSION@.lib GOTO end :starpunotfound ECHO. ECHO You need to set the variable STARPU_PATH to a valid StarPU installation directory exit /B 1 GOTO end :end starpu-1.4.9+dfsg/tools/patch-ayudame000066400000000000000000000020721507764646700176160ustar00rootroot00000000000000--- Ayudame.h 2012-10-31 10:29:19.000000000 -0600 +++ Ayudame.h 2012-11-16 17:34:04.963495624 -0700 @@ -14,6 +14,7 @@ #include #include +#include //#define _REENTRANT // defined by default by gcc @@ -68,7 +69,7 @@ // --- -const size_t AYU_buf_size=8; // size of message buffer +#define AYU_buf_size ((size_t)8) // size of message buffer extern unsigned long AYU_n_threads; // current number of threads extern unsigned long AYU_max_threads; // maximum number of threads @@ -202,7 +203,9 @@ }; /******/ +#ifdef __cplusplus extern "C" { +#endif /****f* AYUDAME/AYU_event * NAME @@ -224,7 +227,7 @@ * void * SOURCE */ - void AYU_event(ayu_event_t event, const int64_t taskId, void *p) + void AYU_event(enum ayu_event_t event, const int64_t taskId, void *p) __attribute__ ((weak)); /******/ @@ -342,7 +345,9 @@ void AYU_registerTask(void*) __attribute__ ((weak)); /******/ +#ifdef __cplusplus } +#endif #ifndef AYU_MASTER_TASKID #define AYU_MASTER_TASKID 0 starpu-1.4.9+dfsg/tools/perfmodels/000077500000000000000000000000001507764646700173105ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/README000066400000000000000000000032701507764646700201720ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # This directory contains performance models for given architectures and examples. The architecture 'mirage' is composed of: - 2 Intel Xeon X5650 @2.67GHz, thus 12 CPU cores - MKL 11.1.3 - 3 NVidia GF100 Tesla M2070, thus 3 GPUs - CUDA 6.0 - Magma 1.6.0 The architecture 'attila' is composed of: - 2 Intel Xeon X5650 @2.67GHz, thus 12 CPU cores - OpenBlas 0.2.12-1 - 3 NVidia GF100 Tesla C2050, thus 3 GPUs - CUDA 6.0 The architecture 'idgraf' is composed of: - 2 Intel Xeon X5650 @2.67GHz, thus 12 CPU cores - MKL 11.1.0 - 8 NVIDIA GF100 Tesla C2050, thus 8 GPUs - CUDA 6.0 The architecture 'sirocco' is composed of: - 2 Intel Xeon E5-2680 @2.50GHz, thus 24 CPU cores - MKL 11.2 - 4 NVIDIA GK110BGL Tesla K40m, thus 4 GPUs - CUDA 6.5 To use performance models stored in this directory, one needs to set the environment variable 'STARPU_PERF_MODEL_DIR' to the location of the directory, e.g.: export STARPU_PERF_MODEL_DIR=.../tools/perfmodels/sampling and then select the desired architecture: export STARPU_HOSTNAME=mirage starpu-1.4.9+dfsg/tools/perfmodels/cluster.xml000066400000000000000000000005671507764646700215230ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/hostfile000066400000000000000000000001441507764646700210470ustar00rootroot00000000000000mirage01.plafrim.cluster mirage02.plafrim.cluster mirage03.plafrim.cluster mirage04.plafrim.cluster starpu-1.4.9+dfsg/tools/perfmodels/sampling/000077500000000000000000000000001507764646700211225ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/000077500000000000000000000000001507764646700217135ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/attila.affinity000066400000000000000000000003631507764646700247260ustar00rootroot00000000000000# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 0 0 1 2 3 4 5 6 7 8 9 10 11 1 0 1 2 3 4 5 6 7 8 9 10 11 2 0 1 2 3 4 5 6 7 8 9 10 11 0 0 1 2 3 4 5 6 7 8 9 10 11 1 0 1 2 3 4 5 6 7 8 9 10 11 2 0 1 2 3 4 5 6 7 8 9 10 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/attila.bandwidth000066400000000000000000000111141507764646700250550ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 to 16 to 17 to 18 to 19 to 20 to 21 to 22 to 23 to 24 to 25 to 26 to 27 to 28 to 29 to 30 to 31 0.000000 6008.942513 6015.054829 5191.265149 3549.286897 4151.586883 4150.380415 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 6526.606701 0.000000 5296.854371 3827.552062 2299.031785 2537.486754 2537.035995 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 6526.729932 5297.158728 0.000000 3836.700007 2299.047076 2537.505381 2537.054615 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4533.335723 3081.579529 3082.603074 0.000000 1990.703988 2167.035675 2166.706914 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 3931.561374 2376.592429 2377.547977 2237.220832 0.000000 2019.289776 2019.004313 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4431.559719 2550.546612 2551.647193 2390.711859 1970.828109 0.000000 2143.181889 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4121.971523 2444.862313 2445.873558 2297.616593 1907.126408 2068.363099 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/attila.config000066400000000000000000000001761507764646700243640ustar00rootroot00000000000000# Current configuration 12 # Number of CPUs 3 # Number of CUDA devices 3 # Number of OpenCL devices 0 # Number of MIC devices starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/attila.latency000066400000000000000000000107641507764646700245620ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 to 16 to 17 to 18 to 19 to 20 to 21 to 22 to 23 to 24 to 25 to 26 to 27 to 28 to 29 to 30 to 31 0.000000 9.500836 9.473047 10.237367 9.863812 9.678141 10.473812 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 10.286836 0.000000 14.883266 24.716781 20.150648 19.964977 20.760648 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 10.005258 14.426969 0.000000 24.677023 19.869070 19.683398 20.479070 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 11.149320 20.650156 20.622367 0.000000 21.013133 20.827461 21.623133 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 14.257031 23.757867 23.730078 24.494398 0.000000 23.935172 24.730844 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 13.983570 23.484406 23.456617 24.220938 23.847383 0.000000 24.457383 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 14.954109 24.454945 24.427156 25.191477 24.817922 24.632250 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/attila.platform.v4.xml000066400000000000000000000270071507764646700260740ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/attila.platform.xml000066400000000000000000000265101507764646700255420ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal-pitch.affinity000066400000000000000000000002411507764646700263240ustar00rootroot00000000000000# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 0 0 1 2 3 4 5 6 7 1 4 5 6 7 0 1 2 3 2 4 5 6 7 0 1 2 3 0 0 1 2 3 4 5 6 7 1 4 5 6 7 0 1 2 3 2 4 5 6 7 0 1 2 3 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal-pitch.bandwidth000066400000000000000000000027341507764646700264700ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 0.000000 5988.779905 3149.675860 5988.971975 3975.378655 2636.838726 3992.447567 nan nan nan nan nan nan nan nan nan 3599.738919 0.000000 1679.850942 2248.345554 1889.122528 1521.977521 1892.968372 nan nan nan nan nan nan nan nan nan 3352.127736 2149.165370 0.000000 2149.190105 1818.623736 1475.884075 1822.187624 nan nan nan nan nan nan nan nan nan 3554.530216 2230.599117 1669.939421 0.000000 1876.596887 1513.836926 1880.391850 nan nan nan nan nan nan nan nan nan 2937.163572 1970.662958 1519.854976 1970.683755 0.000000 1389.455231 1692.226493 nan nan nan nan nan nan nan nan nan 2610.203571 1817.881699 1427.338068 1817.899396 1575.646193 0.000000 1578.320689 nan nan nan nan nan nan nan nan nan 2812.550617 1913.772761 1485.791058 1913.792375 1647.181820 1360.930908 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal-pitch.config000066400000000000000000000001431507764646700257610ustar00rootroot00000000000000# Current configuration 8 # Number of CPUs 3 # Number of CUDA devices 3 # Number of OpenCL devices starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal-pitch.latency000066400000000000000000000026101507764646700261540ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 0.000000 12.460938 12.570312 12.468750 20.000000 20.328125 19.593750 nan nan nan nan nan nan nan nan nan 12.476562 0.000000 25.046875 24.945312 32.476562 32.804688 32.070312 nan nan nan nan nan nan nan nan nan 12.593750 25.054688 0.000000 25.062500 32.593750 32.921875 32.187500 nan nan nan nan nan nan nan nan nan 12.539062 25.000000 25.109375 0.000000 32.539062 32.867188 32.132812 nan nan nan nan nan nan nan nan nan 63.601562 76.062500 76.171875 76.070312 0.000000 83.929688 83.195312 nan nan nan nan nan nan nan nan nan 35.992188 48.453125 48.562500 48.460938 55.992188 0.000000 55.585938 nan nan nan nan nan nan nan nan nan 36.765625 49.226562 49.335938 49.234375 56.765625 57.093750 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal-pitch.platform.v4.xml000066400000000000000000000104231507764646700274710ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal-pitch.platform.xml000066400000000000000000000103071507764646700271420ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal.affinity000066400000000000000000000002411507764646700252170ustar00rootroot00000000000000# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 0 0 1 2 3 4 5 6 7 1 4 5 6 7 0 1 2 3 2 4 5 6 7 0 1 2 3 0 0 1 2 3 4 5 6 7 1 4 5 6 7 0 1 2 3 2 4 5 6 7 0 1 2 3 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal.bandwidth000066400000000000000000000027341507764646700253630ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 0.000000 5988.779905 3149.675860 5988.971975 3975.378655 2636.838726 3992.447567 nan nan nan nan nan nan nan nan nan 3599.738919 0.000000 1679.850942 2248.345554 1889.122528 1521.977521 1892.968372 nan nan nan nan nan nan nan nan nan 3352.127736 2149.165370 0.000000 2149.190105 1818.623736 1475.884075 1822.187624 nan nan nan nan nan nan nan nan nan 3554.530216 2230.599117 1669.939421 0.000000 1876.596887 1513.836926 1880.391850 nan nan nan nan nan nan nan nan nan 2937.163572 1970.662958 1519.854976 1970.683755 0.000000 1389.455231 1692.226493 nan nan nan nan nan nan nan nan nan 2610.203571 1817.881699 1427.338068 1817.899396 1575.646193 0.000000 1578.320689 nan nan nan nan nan nan nan nan nan 2812.550617 1913.772761 1485.791058 1913.792375 1647.181820 1360.930908 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal.config000066400000000000000000000001431507764646700246540ustar00rootroot00000000000000# Current configuration 8 # Number of CPUs 3 # Number of CUDA devices 3 # Number of OpenCL devices starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal.latency000066400000000000000000000026101507764646700250470ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 0.000000 12.460938 12.570312 12.468750 20.000000 20.328125 19.593750 nan nan nan nan nan nan nan nan nan 12.476562 0.000000 25.046875 24.945312 32.476562 32.804688 32.070312 nan nan nan nan nan nan nan nan nan 12.593750 25.054688 0.000000 25.062500 32.593750 32.921875 32.187500 nan nan nan nan nan nan nan nan nan 12.539062 25.000000 25.109375 0.000000 32.539062 32.867188 32.132812 nan nan nan nan nan nan nan nan nan 63.601562 76.062500 76.171875 76.070312 0.000000 83.929688 83.195312 nan nan nan nan nan nan nan nan nan 35.992188 48.453125 48.562500 48.460938 55.992188 0.000000 55.585938 nan nan nan nan nan nan nan nan nan 36.765625 49.226562 49.335938 49.234375 56.765625 57.093750 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal.platform.v4.xml000066400000000000000000000104271507764646700263700ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/hannibal.platform.xml000066400000000000000000000103131507764646700260320ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/idgraf.affinity000066400000000000000000000004551507764646700247060ustar00rootroot00000000000000# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 0 0 1 2 3 4 5 6 7 8 9 10 11 1 0 1 2 3 4 5 6 7 8 9 10 11 2 0 1 2 3 4 5 6 7 8 9 10 11 3 0 1 2 3 4 5 6 7 8 9 10 11 4 6 7 8 9 10 11 0 1 2 3 4 5 5 6 7 8 9 10 11 0 1 2 3 4 5 6 6 7 8 9 10 11 0 1 2 3 4 5 7 6 7 8 9 10 11 0 1 2 3 4 5 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/idgraf.bandwidth000066400000000000000000000033261507764646700250410ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 0.000000 5985.150529 5982.867814 6004.601450 6002.864228 6017.068835 6017.279500 6014.519783 6025.792854 nan nan nan nan nan nan nan 6518.853316 0.000000 6634.627873 5290.661415 5290.656130 3070.759552 3077.363038 3081.101044 3071.316544 nan nan nan nan nan nan nan 6507.263283 6634.402756 0.000000 5290.497370 5290.566592 3070.780185 3077.514743 3081.028777 3071.425106 nan nan nan nan nan nan nan 6468.888505 5290.537572 5290.537715 0.000000 6634.508569 3071.001714 3077.753708 3081.299378 3071.319606 nan nan nan nan nan nan nan 6379.975977 5290.504747 5290.535878 6634.456149 0.000000 3070.823130 3077.536005 3081.267119 3071.334077 nan nan nan nan nan nan nan 6520.767791 3803.989690 3804.097536 3799.684659 3800.145340 0.000000 6635.277188 5293.782380 5293.769441 nan nan nan nan nan nan nan 6520.979807 3803.774735 3804.546566 3799.981880 3800.522991 6635.252627 0.000000 5293.483381 5293.507488 nan nan nan nan nan nan nan 6520.981045 3803.433709 3804.330189 3799.708364 3800.220748 5293.757566 5293.607121 0.000000 6635.079661 nan nan nan nan nan nan nan 6518.969813 3803.670471 3803.908300 3799.582824 3800.130361 5293.416171 5293.465355 6635.049331 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/idgraf.config000066400000000000000000000001761507764646700243420ustar00rootroot00000000000000# Current configuration 12 # Number of CPUs 8 # Number of CUDA devices 0 # Number of OpenCL devices 0 # Number of MIC devices starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/idgraf.latency000066400000000000000000000031061507764646700245300ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 0.000000 11.018609 10.962211 10.954469 11.007844 11.005922 11.098250 11.109961 11.063805 nan nan nan nan nan nan nan 11.927148 0.000000 18.151891 18.879328 18.799250 28.088742 27.684023 27.682789 27.425391 nan nan nan nan nan nan nan 11.955969 18.258547 0.000000 18.841516 18.742984 27.801375 27.632695 27.804492 27.925742 nan nan nan nan nan nan nan 11.869312 18.796500 18.733070 0.000000 18.166664 27.588664 27.297711 27.356961 27.379555 nan nan nan nan nan nan nan 11.895391 18.823414 18.725125 18.160563 0.000000 27.561664 27.485000 27.345914 27.047594 nan nan nan nan nan nan nan 12.104258 27.886172 28.017508 27.861555 27.938109 0.000000 17.586773 18.179898 18.207852 nan nan nan nan nan nan nan 12.097562 28.033187 27.980211 27.902445 28.026523 17.499156 0.000000 18.177344 18.203297 nan nan nan nan nan nan nan 12.132922 27.677352 27.910406 27.592461 27.664289 18.172375 18.238891 0.000000 17.600648 nan nan nan nan nan nan nan 12.117094 23.135703 23.079305 23.071563 23.124938 23.123016 23.215344 23.227055 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/idgraf.platform.v4.xml000066400000000000000000002042751507764646700260560ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/idgraf.platform.xml000066400000000000000000002032201507764646700255130ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/mirage.affinity000066400000000000000000000003631507764646700247140ustar00rootroot00000000000000# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 0 0 1 2 3 4 5 6 7 8 9 10 11 1 6 7 8 9 10 11 0 1 2 3 4 5 2 6 7 8 9 10 11 0 1 2 3 4 5 0 0 1 2 3 4 5 6 7 8 9 10 11 1 6 7 8 9 10 11 0 1 2 3 4 5 2 6 7 8 9 10 11 0 1 2 3 4 5 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/mirage.bandwidth000066400000000000000000000111141507764646700250430ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 to 16 to 17 to 18 to 19 to 20 to 21 to 22 to 23 to 24 to 25 to 26 to 27 to 28 to 29 to 30 to 31 0.000000 6030.996807 6011.099701 6023.264949 4533.752864 4530.361672 4457.700383 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 6517.591026 0.000000 3074.666060 3073.669260 2673.805763 2672.625905 2647.170533 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 6517.940403 3834.443072 0.000000 5296.205823 2673.864562 2672.684652 2647.228166 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 6517.196551 3831.214832 5296.294945 0.000000 2673.739370 2672.559571 2647.105457 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4380.832064 2537.573804 2534.044575 2536.203977 0.000000 2227.171158 2209.465982 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4385.089408 2539.001663 2535.468464 2537.630294 2229.091070 0.000000 2210.548390 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4367.520334 2533.101675 2529.584866 2531.736671 2224.542196 2223.725456 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/mirage.config000066400000000000000000000001761507764646700243520ustar00rootroot00000000000000# Current configuration 12 # Number of CPUs 3 # Number of CUDA devices 3 # Number of OpenCL devices 0 # Number of MIC devices starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/mirage.latency000066400000000000000000000107651507764646700245510ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 to 16 to 17 to 18 to 19 to 20 to 21 to 22 to 23 to 24 to 25 to 26 to 27 to 28 to 29 to 30 to 31 0.000000 9.836008 9.800445 9.824289 11.355898 11.248469 10.962445 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 10.565859 0.000000 24.149859 24.293578 21.921758 21.814328 21.528305 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 12.277023 25.184922 0.000000 17.178188 23.632922 23.525492 23.239469 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 10.580664 20.416672 20.381109 0.000000 21.936563 21.829133 21.543109 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 15.931570 25.767578 25.732016 25.755859 0.000000 27.180039 26.894016 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 15.837656 25.673664 25.638102 25.661945 27.193555 0.000000 26.800102 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 15.669383 25.505391 25.469828 25.493672 27.025281 26.917852 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/mirage.platform.v4.xml000066400000000000000000000270071507764646700260620ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/mirage.platform.xml000066400000000000000000000265101507764646700255300ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/sirocco.affinity000066400000000000000000000012251507764646700251070ustar00rootroot00000000000000# GPU CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 CPU12 CPU13 CPU14 CPU15 CPU16 CPU17 CPU18 CPU19 CPU20 CPU21 CPU22 CPU23 0 6 7 8 9 10 11 0 1 2 3 4 5 18 19 20 21 22 23 12 13 14 15 16 17 1 6 7 8 9 10 11 0 1 2 3 4 5 18 19 20 21 22 23 12 13 14 15 16 17 2 18 19 20 21 22 23 12 13 14 15 16 17 0 1 2 3 4 5 6 7 8 9 10 11 3 18 19 20 21 22 23 12 13 14 15 16 17 0 1 2 3 4 5 6 7 8 9 10 11 0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 2 12 13 14 15 16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7 8 9 10 11 3 12 13 14 15 16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7 8 9 10 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/sirocco.bandwidth000066400000000000000000000035021507764646700252420ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 0.000000e+00 1.051768e+04 1.051743e+04 1.051732e+04 1.051718e+04 7.997534e+03 7.978223e+03 8.025122e+03 8.002101e+03 nan nan nan nan nan nan nan 1.052170e+04 0.000000e+00 1.024409e+04 7.662719e+03 8.527736e+03 4.543798e+03 4.537558e+03 4.552690e+03 4.545272e+03 nan nan nan nan nan nan nan 1.052123e+04 1.024068e+04 0.000000e+00 7.630370e+03 8.542254e+03 4.543711e+03 4.537471e+03 4.552602e+03 4.545185e+03 nan nan nan nan nan nan nan 1.052183e+04 8.504225e+03 8.517476e+03 0.000000e+00 1.023200e+04 4.543822e+03 4.537582e+03 4.552715e+03 4.545296e+03 nan nan nan nan nan nan nan 1.052172e+04 8.496221e+03 8.514240e+03 1.024287e+04 0.000000e+00 4.543801e+03 4.537561e+03 4.552693e+03 4.545275e+03 nan nan nan nan nan nan nan 7.434276e+03 4.355589e+03 4.355546e+03 4.355527e+03 4.355503e+03 0.000000e+00 3.848326e+03 3.859204e+03 3.853873e+03 nan nan nan nan nan nan nan 7.232140e+03 4.285414e+03 4.285373e+03 4.285355e+03 4.285331e+03 3.797802e+03 0.000000e+00 3.804012e+03 3.798832e+03 nan nan nan nan nan nan nan 7.300126e+03 4.309194e+03 4.309152e+03 4.309134e+03 4.309110e+03 3.816466e+03 3.812063e+03 0.000000e+00 3.817506e+03 nan nan nan nan nan nan nan 7.333166e+03 4.320685e+03 4.320643e+03 4.320625e+03 4.320601e+03 3.825477e+03 3.821053e+03 3.831778e+03 0.000000e+00 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/sirocco.config000066400000000000000000000001761507764646700245470ustar00rootroot00000000000000# Current configuration 24 # Number of CPUs 4 # Number of CUDA devices 4 # Number of OpenCL devices 0 # Number of MIC devices starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/sirocco.latency000066400000000000000000000035021507764646700247350ustar00rootroot00000000000000# to 0 to 1 to 2 to 3 to 4 to 5 to 6 to 7 to 8 to 9 to 10 to 11 to 12 to 13 to 14 to 15 0.000000e+00 1.029027e+01 1.031898e+01 9.529422e+00 1.039846e+01 9.643953e+00 1.113670e+01 1.055939e+01 1.004796e+01 nan nan nan nan nan nan nan 1.085040e+01 0.000000e+00 1.152573e+01 2.350899e+01 2.337711e+01 2.049435e+01 2.198709e+01 2.140979e+01 2.089836e+01 nan nan nan nan nan nan nan 9.920578e+00 1.167180e+01 0.000000e+00 2.304539e+01 2.330630e+01 1.956453e+01 2.105727e+01 2.047997e+01 1.996854e+01 nan nan nan nan nan nan nan 1.093016e+01 2.380006e+01 2.358666e+01 0.000000e+00 1.101548e+01 2.057412e+01 2.206686e+01 2.148955e+01 2.097812e+01 nan nan nan nan nan nan nan 1.097311e+01 2.126338e+01 2.129209e+01 2.050253e+01 0.000000e+00 2.061706e+01 2.210980e+01 2.153250e+01 2.102107e+01 nan nan nan nan nan nan nan 1.162996e+01 2.192023e+01 2.194894e+01 2.115938e+01 2.202842e+01 0.000000e+00 2.276666e+01 2.218935e+01 2.167792e+01 nan nan nan nan nan nan nan 1.359506e+01 2.388534e+01 2.391404e+01 2.312448e+01 2.399352e+01 2.323902e+01 0.000000e+00 2.415445e+01 2.364302e+01 nan nan nan nan nan nan nan 1.245815e+01 2.274842e+01 2.277712e+01 2.198757e+01 2.285661e+01 2.210210e+01 2.359484e+01 0.000000e+00 2.250611e+01 nan nan nan nan nan nan nan 1.236026e+01 2.265053e+01 2.267923e+01 2.188968e+01 2.275872e+01 2.200421e+01 2.349695e+01 2.291965e+01 0.000000e+00 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/sirocco.platform.v4.xml000066400000000000000000000215511507764646700262550ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/bus/sirocco.platform.xml000066400000000000000000000213101507764646700257160ustar00rootroot00000000000000 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/000077500000000000000000000000001507764646700227245ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/000077500000000000000000000000001507764646700231545ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/add_scal.mirage000066400000000000000000000016601507764646700260770ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n 50d9324f 25 0.000000e+00 1.540344e+02 3.339691e+01 1.632765e+04 2.633248e+06 106 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_gemm.attila000066400000000000000000000100421507764646700274630ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 8.407559e+04 3.415249e+03 1.399859e+08 1.178881e+13 1665 f0ac7beb 4915200 5.242880e+08 2.610119e+04 1.422415e+03 4.251883e+07 1.113088e+12 1629 d46431bb 1228800 6.553600e+07 3.432588e+03 1.640071e+02 9.130685e+06 3.141343e+10 2660 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.795670e+03 5.624760e+01 1.818024e+07 5.084653e+10 6503 f0ac7beb 4915200 5.242880e+08 8.880682e+02 3.243424e+01 5.760010e+06 5.122105e+09 6486 d46431bb 1228800 6.553600e+07 2.022322e+02 1.071833e+01 1.116119e+06 2.263493e+08 5519 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.815870e+03 4.694553e+01 1.827781e+07 5.148226e+10 6491 f0ac7beb 4915200 5.242880e+08 8.961392e+02 3.565427e+01 5.741564e+06 5.153386e+09 6407 d46431bb 1228800 6.553600e+07 2.020566e+02 9.551669e+00 1.107876e+06 2.243540e+08 5483 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.810209e+03 3.946230e+01 1.806121e+07 5.076578e+10 6427 f0ac7beb 4915200 5.242880e+08 8.833768e+02 3.092949e+01 5.707497e+06 5.048051e+09 6461 d46431bb 1228800 6.553600e+07 1.637484e+02 6.969807e+00 1.084015e+06 1.778273e+08 6620 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal000066400000000000000000000040251507764646700277650ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 5.763709e+03 3.768350e+03 4.501024e+09 3.703209e+13 780925 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 5.889910e+03 4.485232e+03 4.352661e+09 4.050353e+13 739003 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 5.782569e+03 3.939612e+03 4.412291e+09 3.735706e+13 763033 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_gemm.hannibal-pitch000066400000000000000000000040251507764646700310720ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 5.763709e+03 3.768350e+03 4.501024e+09 3.703209e+13 780925 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 5.889910e+03 4.485232e+03 4.352661e+09 4.050353e+13 739003 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 5.782569e+03 3.939612e+03 4.412291e+09 3.735706e+13 763033 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_gemm.idgraf000066400000000000000000000217051507764646700274510ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 8.987520e+04 9.682708e+02 2.085105e+07 1.874210e+12 232 d46431bb 1228800 6.553600e+07 3.465410e+03 7.427679e+01 2.737674e+06 9.491521e+09 790 f0ac7beb 4915200 5.242880e+08 2.744657e+04 5.713498e+02 7.575252e+06 2.080048e+11 276 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.825219e+03 1.227364e+02 1.106356e+07 3.131597e+10 3916 d46431bb 1228800 6.553600e+07 2.060677e+02 2.909125e+01 5.497887e+05 1.155516e+08 2668 f0ac7beb 4915200 5.242880e+08 9.076961e+02 5.977819e+01 3.441076e+06 3.136998e+09 3791 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.823818e+03 1.560928e+02 1.063450e+07 3.012164e+10 3766 d46431bb 1228800 6.553600e+07 1.632767e+02 2.137437e+01 5.224853e+05 8.677162e+07 3200 f0ac7beb 4915200 5.242880e+08 9.226606e+02 6.080500e+01 3.410153e+06 3.160079e+09 3696 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.821988e+03 1.631366e+02 1.046111e+07 2.961977e+10 3707 d46431bb 1228800 6.553600e+07 1.661504e+02 2.199315e+01 5.172263e+05 8.744312e+07 3113 f0ac7beb 4915200 5.242880e+08 9.172785e+02 5.967474e+01 3.492079e+06 3.216766e+09 3807 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.817019e+03 1.506473e+02 1.119202e+07 3.161828e+10 3973 d46431bb 1228800 6.553600e+07 2.042642e+02 2.707555e+01 5.498791e+05 1.142941e+08 2692 f0ac7beb 4915200 5.242880e+08 9.044446e+02 5.780357e+01 3.477590e+06 3.158134e+09 3845 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.816710e+03 1.414989e+02 1.032888e+07 2.916687e+10 3667 d46431bb 1228800 6.553600e+07 1.639497e+02 2.257894e+01 4.474187e+05 7.474542e+07 2729 f0ac7beb 4915200 5.242880e+08 9.331501e+02 5.611510e+01 3.235231e+06 3.029874e+09 3467 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.815225e+03 1.445443e+02 1.009821e+07 2.850368e+10 3587 d46431bb 1228800 6.553600e+07 1.659035e+02 2.475202e+01 4.006569e+05 6.794997e+07 2415 f0ac7beb 4915200 5.242880e+08 9.137585e+02 6.301297e+01 3.125968e+06 2.869963e+09 3421 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.807699e+03 1.292512e+02 1.006279e+07 2.831317e+10 3584 d46431bb 1228800 6.553600e+07 1.680450e+02 2.634123e+01 3.922170e+05 6.752957e+07 2334 f0ac7beb 4915200 5.242880e+08 8.912551e+02 5.629783e+01 3.090873e+06 2.765747e+09 3468 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 1.769472e+09 2.827622e+03 1.304764e+02 1.027841e+07 2.912533e+10 3635 d46431bb 1228800 6.553600e+07 1.666216e+02 2.357918e+01 4.083895e+05 6.940921e+07 2451 f0ac7beb 4915200 5.242880e+08 9.077285e+02 5.688987e+01 3.089908e+06 2.815814e+09 3404 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_gemm.mirage000066400000000000000000000107241507764646700274600ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 6.553600e+07 3.262643e+03 7.810468e+01 1.532463e+07 5.002746e+10 4697 6e96d715 12288 6.553600e+04 3.262643e+00 7.810468e-02 1.532463e+04 5.002746e+6 4697 24c84a50 11059200 1.769472e+09 8.778020e+04 9.598441e+02 2.889724e+08 2.536909e+13 3292 f0ac7beb 4915200 5.242880e+08 2.647095e+04 4.073263e+02 4.762124e+07 1.260878e+12 1799 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 6.553600e+07 2.011248e+02 1.498037e+01 1.633334e+06 3.303264e+08 8121 6e96d715 12288 6.553600e+04 2.011248e-01 1.498037e-02 1.633334e+03 3.303264e+04 8121 24c84a50 11059200 1.769472e+09 2.805491e+03 6.931361e+01 3.683329e+07 1.033985e+11 13129 f0ac7beb 4915200 5.242880e+08 8.944354e+02 3.828761e+01 5.687714e+06 5.096615e+09 6359 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 6.553600e+07 2.004836e+02 1.337883e+01 1.620709e+06 3.263726e+08 8084 6e96d715 12288 6.553600e+04 2.004836e-01 1.337883e-02 1.620709e+03 3.263726e+04 8084 24c84a50 11059200 1.769472e+09 2.828590e+03 6.702999e+01 3.650578e+07 1.033178e+11 12906 f0ac7beb 4915200 5.242880e+08 9.090465e+02 3.898196e+01 5.691540e+06 5.183389e+09 6261 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 6.553600e+07 1.603761e+02 1.178063e+01 1.522450e+06 2.454821e+08 9493 6e96d715 12288 6.553600e+04 1.603761e-01 1.178063e-02 1.522450e+03 2.454821e+04 9493 24c84a50 11059200 1.769472e+09 2.828209e+03 7.003112e+01 3.675541e+07 1.040157e+11 12996 f0ac7beb 4915200 5.242880e+08 8.858930e+02 3.323656e+01 5.675030e+06 5.034546e+09 6406 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_gemm.sirocco000066400000000000000000000130761507764646700276600ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 5.971968e+09 2.632193e+03 3.963412e+02 2.414774e+07 6.500262e+10 9174 24c84a50 11059200 1.769472e+09 7.545142e+02 4.622075e+01 1.174100e+07 8.891991e+09 15561 f0ac7beb 4915200 5.242880e+08 2.651541e+02 2.896639e+01 2.197862e+06 5.897272e+08 8289 d46431bb 1228800 6.553600e+07 5.633559e+01 1.027680e+01 7.345034e+05 4.275566e+07 13038 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 5.971968e+09 2.577659e+03 3.627579e+02 2.443621e+07 6.423574e+10 9480 24c84a50 11059200 1.769472e+09 7.434516e+02 4.620554e+01 1.184541e+07 8.840509e+09 15933 f0ac7beb 4915200 5.242880e+08 2.633265e+02 2.997768e+01 2.185610e+06 5.829880e+08 8300 d46431bb 1228800 6.553600e+07 5.716015e+01 1.157773e+01 6.223597e+05 3.703364e+07 10888 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 5.971968e+09 2.575702e+03 3.709810e+02 2.479886e+07 6.519954e+10 9628 24c84a50 11059200 1.769472e+09 7.437036e+02 4.816754e+01 1.192752e+07 8.907749e+09 16038 f0ac7beb 4915200 5.242880e+08 2.593699e+02 2.791728e+01 2.302427e+06 6.040986e+08 8877 d46431bb 1228800 6.553600e+07 5.656092e+01 1.160148e+01 5.523739e+05 3.255722e+07 9766 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 5.971968e+09 2.620269e+03 3.685780e+02 2.431872e+07 6.498239e+10 9281 24c84a50 11059200 1.769472e+09 7.558763e+02 4.610795e+01 1.204791e+07 9.140616e+09 15939 f0ac7beb 4915200 5.242880e+08 2.625144e+02 2.860172e+01 2.207221e+06 5.863054e+08 8408 d46431bb 1228800 6.553600e+07 5.829194e+01 1.220705e+01 7.805874e+05 4.749737e+07 13391 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 5.971968e+09 1.120418e+05 2.694960e+04 3.755640e+08 4.451334e+13 3352 24c84a50 11059200 1.769472e+09 3.494026e+04 6.587010e+03 1.697048e+08 6.140270e+12 4857 f0ac7beb 4915200 5.242880e+08 1.147661e+04 2.242393e+03 3.799907e+07 4.527495e+11 3311 d46431bb 1228800 6.553600e+07 1.593513e+03 3.073908e+02 2.396962e+07 3.961722e+10 15042 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_potrf.attila000066400000000000000000000100121507764646700276650ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 7.593370e+04 4.030251e+03 2.353945e+06 1.792473e+11 31 afdd228b 1638400 8.758624e+07 2.346245e+04 1.988237e+03 1.032348e+06 2.439534e+10 44 cea37d6d 409600 1.097392e+07 3.401100e+03 3.819888e+02 1.564506e+05 5.388163e+08 46 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 4.425357e+04 2.763302e+03 7.523107e+05 3.342224e+10 17 afdd228b 1638400 8.758624e+07 2.425311e+04 2.094515e+03 2.910373e+05 7.111204e+09 12 cea37d6d 409600 1.097392e+07 1.130795e+04 5.745206e+02 1.130795e+05 1.281997e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 4.555777e+04 5.907293e+03 7.744821e+05 3.587692e+10 17 afdd228b 1638400 8.758624e+07 2.509024e+04 3.469720e+03 2.509024e+05 6.415590e+09 10 cea37d6d 409600 1.097392e+07 1.082278e+04 2.477308e+02 1.082278e+05 1.171939e+09 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 4.828333e+04 3.379151e+03 5.311166e+05 2.576968e+10 11 afdd228b 1638400 8.758624e+07 2.737149e+04 3.217773e+03 2.737149e+05 7.595526e+09 10 cea37d6d 409600 1.097392e+07 1.143954e+04 1.654563e+02 1.143954e+05 1.308904e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal000066400000000000000000000040231507764646700301700ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 1.701016e+05 7.229737e+03 4.082438e+06 6.956835e+11 24 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 1.188776e+05 9.331204e+02 2.113643e+08 2.512803e+13 1778 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 1.205438e+05 2.044578e+03 2.189075e+08 2.639552e+13 1816 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_potrf.hannibal-pitch000066400000000000000000000040231507764646700312750ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 1.701016e+05 7.229737e+03 4.082438e+06 6.956835e+11 24 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 1.188776e+05 9.331204e+02 2.113643e+08 2.512803e+13 1778 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 1.205438e+05 2.044578e+03 2.189075e+08 2.639552e+13 1816 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_potrf.idgraf000066400000000000000000000216221507764646700276540ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 2.069576e+04 7.440388e+01 4.346110e+05 8.994720e+09 21 cea37d6d 409600 1.097392e+07 1.068290e+03 2.074934e+01 3.098041e+04 3.310855e+07 29 afdd228b 1638400 8.758624e+07 6.632886e+03 6.634864e+01 1.392906e+05 9.239911e+08 21 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 5.998499e+04 6.600211e+03 5.998499e+05 3.641761e+10 10 cea37d6d 409600 1.097392e+07 1.500406e+04 5.740284e+02 1.500406e+05 2.254514e+09 10 afdd228b 1638400 8.758624e+07 3.368249e+04 5.947857e+03 3.368249e+05 1.169887e+10 10 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 5.745646e+04 7.363450e+03 5.745646e+05 3.355466e+10 10 cea37d6d 409600 1.097392e+07 1.559370e+04 1.137871e+03 1.559370e+05 2.444583e+09 10 afdd228b 1638400 8.758624e+07 3.216379e+04 4.954206e+03 3.216379e+05 1.059054e+10 10 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 5.761859e+04 8.603827e+03 5.761859e+05 3.393928e+10 10 cea37d6d 409600 1.097392e+07 1.498399e+04 7.885417e+02 1.498399e+05 2.251416e+09 10 afdd228b 1638400 8.758624e+07 3.033086e+04 2.968298e+03 3.033086e+05 9.287718e+09 10 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 5.207197e+04 2.780602e+03 5.207197e+05 2.719222e+10 10 cea37d6d 409600 1.097392e+07 1.609271e+04 2.681035e+03 1.609271e+05 2.661633e+09 10 afdd228b 1638400 8.758624e+07 3.107603e+04 1.620445e+03 3.107603e+05 9.683455e+09 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 5.062446e+04 2.429137e+03 5.062446e+05 2.568736e+10 10 cea37d6d 409600 1.097392e+07 1.506158e+04 8.561331e+02 1.506158e+05 2.275840e+09 10 afdd228b 1638400 8.758624e+07 3.034398e+04 4.027845e+03 3.034398e+05 9.369809e+09 10 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 5.174953e+04 6.183695e+03 5.174953e+05 2.716252e+10 10 cea37d6d 409600 1.097392e+07 1.518996e+04 9.403764e+02 1.518996e+05 2.316193e+09 10 afdd228b 1638400 8.758624e+07 3.100983e+04 5.124047e+03 3.100983e+05 9.878653e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 5.475211e+04 6.512121e+03 5.475211e+05 3.040202e+10 10 cea37d6d 409600 1.097392e+07 1.504708e+04 5.064339e+02 1.504708e+05 2.266711e+09 10 afdd228b 1638400 8.758624e+07 2.918927e+04 4.035680e+03 2.918927e+05 8.683004e+09 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 1.097392e+07 1.467875e+04 2.659893e+02 1.467875e+05 2.155366e+09 10 afdd228b 1638400 8.758624e+07 3.186232e+04 5.396938e+03 3.186232e+05 1.044334e+10 10 617e5fe6 3686400 2.953730e+08 5.896762e+04 1.233845e+04 5.896762e+05 3.629418e+10 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_potrf.mirage000066400000000000000000000106631507764646700276670ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 1.097392e+07 9.221097e+02 4.521529e+01 7.930143e+04 7.330044e+07 86 e5a07e31 4096 1.097392e+04 9.221097e-01 4.521529e-02 7.930143e+01 7.330044e+03 86 617e5fe6 3686400 2.953730e+08 1.884969e+04 7.370619e+02 2.111165e+06 3.985565e+10 112 afdd228b 1638400 8.758624e+07 5.940712e+03 2.434960e+02 2.376285e+05 1.414054e+09 40 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 5.366740e+03 9.457424e+01 1.556354e+05 8.355143e+08 29 cea37d6d 409600 1.097392e+07 2.003232e+03 2.738589e+01 5.809372e+04 1.163969e+08 29 e5a07e31 4096 1.097392e+04 2.003232e+00 2.738589e-02 5.809372e+01 1.163969e+04 29 afdd228b 1638400 8.758624e+07 3.522008e+03 5.701992e+01 1.021382e+05 3.598259e+08 29 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 8.758624e+07 3.775579e+03 6.170997e+01 1.094918e+05 4.135053e+08 29 cea37d6d 409600 1.097392e+07 2.237869e+03 3.535637e+01 6.489820e+04 1.452699e+08 29 e5a07e31 4096 1.097392e+04 2.237869e+00 3.535637e-02 6.489820e+01 1.452699e+04 29 617e5fe6 3686400 2.953730e+08 5.557011e+03 6.001156e+01 1.611533e+05 8.956353e+08 29 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 2.953730e+08 5.500099e+03 6.565775e+01 1.595029e+05 8.774066e+08 29 cea37d6d 409600 1.097392e+07 2.220120e+03 2.867670e+01 6.438348e+04 1.429629e+08 29 e5a07e31 4096 1.097392e+04 2.220120e+00 2.867670e-02 6.438348e+01 1.429629e+04 29 afdd228b 1638400 8.758624e+07 3.742877e+03 5.898784e+01 1.085434e+05 4.063656e+08 29 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_potrf.sirocco000066400000000000000000000130171507764646700300600ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 9.963650e+08 1.842284e+04 2.612920e+03 1.271176e+06 2.388975e+10 69 617e5fe6 3686400 2.953730e+08 7.379027e+03 1.089221e+03 5.903221e+05 4.450915e+09 80 afdd228b 1638400 8.758624e+07 2.799281e+03 5.546704e+02 2.323403e+05 6.759213e+08 83 cea37d6d 409600 1.097392e+07 4.391691e+02 4.300491e+01 7.597626e+04 3.368638e+07 173 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 9.963650e+08 4.852293e+04 1.266847e+04 4.852293e+05 2.514965e+10 10 617e5fe6 3686400 2.953730e+08 7.699799e+03 1.513211e+03 9.239759e+04 7.389205e+08 12 afdd228b 1638400 8.758624e+07 5.010966e+03 7.544427e+02 5.010965e+04 2.567896e+08 10 cea37d6d 409600 1.097392e+07 3.343709e+03 3.943178e+02 3.343709e+04 1.133588e+08 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 9.963650e+08 3.188534e+04 1.123169e+04 3.188534e+05 1.142826e+10 10 617e5fe6 3686400 2.953730e+08 8.514691e+03 1.568765e+03 9.366160e+04 8.245709e+08 11 afdd228b 1638400 8.758624e+07 5.436465e+03 1.325711e+03 5.436465e+04 3.131266e+08 10 cea37d6d 409600 1.097392e+07 3.336739e+03 3.113015e+02 3.336739e+04 1.123074e+08 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 9.963650e+08 3.051372e+04 1.107281e+04 3.051372e+05 1.053694e+10 10 617e5fe6 3686400 2.953730e+08 8.456328e+03 1.411439e+03 1.014759e+05 8.820196e+08 12 afdd228b 1638400 8.758624e+07 4.996835e+03 9.396038e+02 4.996835e+04 2.585122e+08 10 cea37d6d 409600 1.097392e+07 3.060839e+03 4.968177e+01 3.060839e+04 9.371202e+07 10 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 9.963650e+08 1.332935e+04 3.063746e+03 2.132696e+05 2.992929e+09 16 617e5fe6 3686400 2.953730e+08 8.333388e+03 1.108400e+03 1.000007e+05 8.480868e+08 12 afdd228b 1638400 8.758624e+07 5.517925e+03 1.047059e+03 5.517925e+04 3.154382e+08 10 cea37d6d 409600 1.097392e+07 3.435367e+03 2.405829e+02 3.435367e+04 1.185962e+08 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_syrk.attila000066400000000000000000000100321507764646700275250ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 4.203779e+04 3.415249e+03 6.999295e+07 1.178881e+13 1665 d39bff17 3276800 2.625536e+08 1.305059e+04 1.422415e+03 2.125942e+07 1.113088e+12 1629 2c1922b7 819200 3.287040e+07 1.716294e+03 1.640071e+02 4.565342e+06 3.141343e+10 2660 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.397835e+03 5.624760e+01 9.090120e+06 5.084653e+10 6503 d39bff17 3276800 2.625536e+08 4.440341e+02 3.243424e+01 2.880005e+06 5.122105e+09 6486 2c1922b7 819200 3.287040e+07 1.011161e+02 1.071833e+01 5.580595e+05 2.263493e+08 5519 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.407935e+03 4.694553e+01 9.138905e+06 5.148226e+10 6491 d39bff17 3276800 2.625536e+08 4.480696e+02 3.565427e+01 2.870782e+06 5.153386e+09 6407 2c1922b7 819200 3.287040e+07 1.010283e+02 9.551669e+00 5.539380e+05 2.243540e+08 5483 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.405104e+03 3.946230e+01 9.030605e+06 5.076578e+10 6427 d39bff17 3276800 2.625536e+08 4.416884e+02 3.092949e+01 2.853748e+06 5.048051e+09 6461 2c1922b7 819200 3.287040e+07 8.187420e+01 6.969807e+00 5.420075e+05 1.778273e+08 6620 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal000066400000000000000000000040221507764646700300250ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 2.881854e+03 3.768350e+03 2.250512e+09 3.703209e+13 780925 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 2.944955e+03 4.485232e+03 2.176330e+09 4.050353e+13 739003 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 2.891285e+03 3.939612e+03 2.206146e+09 3.735706e+13 763033 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_syrk.hannibal-pitch000066400000000000000000000040221507764646700311320ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 2.881854e+03 3.768350e+03 2.250512e+09 3.703209e+13 780925 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 2.944955e+03 4.485232e+03 2.176330e+09 4.050353e+13 739003 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 2.891285e+03 3.939612e+03 2.206146e+09 3.735706e+13 763033 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_syrk.idgraf000066400000000000000000000216631507764646700275170ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 4.493760e+04 9.682708e+02 1.042552e+07 1.874210e+12 232 2c1922b7 819200 3.287040e+07 1.732705e+03 7.427679e+01 1.368837e+06 9.491521e+09 790 d39bff17 3276800 2.625536e+08 1.372328e+04 5.713498e+02 3.787626e+06 2.080048e+11 276 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.412610e+03 1.227364e+02 5.531780e+06 3.131597e+10 3916 2c1922b7 819200 3.287040e+07 1.030339e+02 2.909125e+01 2.748943e+05 1.155516e+08 2668 d39bff17 3276800 2.625536e+08 4.538481e+02 5.977819e+01 1.720538e+06 3.136998e+09 3791 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.411909e+03 1.560928e+02 5.317250e+06 3.012164e+10 3766 2c1922b7 819200 3.287040e+07 8.163835e+01 2.137437e+01 2.612426e+05 8.677162e+07 3200 d39bff17 3276800 2.625536e+08 4.613303e+02 6.080500e+01 1.705076e+06 3.160079e+09 3696 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.410994e+03 1.631366e+02 5.230555e+06 2.961977e+10 3707 2c1922b7 819200 3.287040e+07 8.307520e+01 2.199315e+01 2.586131e+05 8.744312e+07 3113 d39bff17 3276800 2.625536e+08 4.586393e+02 5.967474e+01 1.746040e+06 3.216766e+09 3807 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.408509e+03 1.506473e+02 5.596010e+06 3.161828e+10 3973 2c1922b7 819200 3.287040e+07 1.021321e+02 2.707555e+01 2.749395e+05 1.142941e+08 2692 d39bff17 3276800 2.625536e+08 4.522223e+02 5.780357e+01 1.738795e+06 3.158134e+09 3845 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.408355e+03 1.414989e+02 5.164440e+06 2.916687e+10 3667 2c1922b7 819200 3.287040e+07 8.197485e+01 2.257894e+01 2.237094e+05 7.474542e+07 2729 d39bff17 3276800 2.625536e+08 4.665750e+02 5.611510e+01 1.617616e+06 3.029874e+09 3467 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.407612e+03 1.445443e+02 5.049105e+06 2.850368e+10 3587 2c1922b7 819200 3.287040e+07 8.295175e+01 2.475202e+01 2.003285e+05 6.794997e+07 2415 d39bff17 3276800 2.625536e+08 4.568793e+02 6.301297e+01 1.562984e+06 2.869963e+09 3421 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.403850e+03 1.292512e+02 5.031395e+06 2.831317e+10 3584 2c1922b7 819200 3.287040e+07 8.402250e+01 2.634123e+01 1.961085e+05 6.752957e+07 2334 d39bff17 3276800 2.625536e+08 4.456275e+02 5.629783e+01 1.545436e+06 2.765747e+09 3468 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.413811e+03 1.304764e+02 5.139205e+06 2.912533e+10 3635 2c1922b7 819200 3.287040e+07 8.331080e+01 2.357918e+01 2.041948e+05 6.940921e+07 2451 d39bff17 3276800 2.625536e+08 4.538643e+02 5.688987e+01 1.544954e+06 2.815814e+09 3404 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_syrk.mirage000066400000000000000000000107101507764646700275160ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 3.287040e+07 1.631322e+03 7.810468e+01 7.662315e+06 5.002746e+10 4697 1827caf5 8192 3.287040e+04 1.631322e+00 7.810468e-02 7.662315e+03 5.002746e+6 4697 ff82dda0 7372800 8.856576e+08 4.389010e+04 9.598441e+02 1.444862e+08 2.536909e+13 3292 d39bff17 3276800 2.625536e+08 1.323548e+04 4.073263e+02 2.381062e+07 1.260878e+12 1799 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 3.287040e+07 1.005624e+02 1.498037e+01 8.166670e+05 3.303264e+08 8121 1827caf5 8192 3.287040e+04 1.005624e-01 1.498037e-02 8.166670e+02 3.303264e+04 8121 ff82dda0 7372800 8.856576e+08 1.402745e+03 6.931361e+01 1.841664e+07 1.033985e+11 13129 d39bff17 3276800 2.625536e+08 4.472177e+02 3.828761e+01 2.843857e+06 5.096615e+09 6359 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 3.287040e+07 1.002418e+02 1.337883e+01 8.103545e+05 3.263726e+08 8084 1827caf5 8192 3.287040e+04 1.002418e-01 1.337883e-02 8.103545e+02 3.263726e+04 8084 ff82dda0 7372800 8.856576e+08 1.414295e+03 6.702999e+01 1.825289e+07 1.033178e+11 12906 d39bff17 3276800 2.625536e+08 4.545233e+02 3.898196e+01 2.845770e+06 5.183389e+09 6261 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 3.287040e+07 8.018805e+01 1.178063e+01 7.612250e+05 2.454821e+08 9493 1827caf5 8192 3.287040e+04 8.018805e-02 1.178063e-02 7.612250e+02 2.454821e+04 9493 ff82dda0 7372800 8.856576e+08 1.414104e+03 7.003112e+01 1.837770e+07 1.040157e+11 12996 d39bff17 3276800 2.625536e+08 4.429465e+02 3.323656e+01 2.837515e+06 5.034546e+09 6406 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_syrk.sirocco000066400000000000000000000130641507764646700277200ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 1.316097e+03 3.963412e+02 1.207387e+07 6.500262e+10 9174 ff82dda0 7372800 8.856576e+08 3.772571e+02 4.622075e+01 5.870500e+06 8.891991e+09 15561 d39bff17 3276800 2.625536e+08 1.325771e+02 2.896639e+01 1.098931e+06 5.897272e+08 8289 2c1922b7 819200 3.287040e+07 2.816780e+01 1.027680e+01 3.672517e+05 4.275566e+07 13038 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 1.288830e+03 3.627579e+02 1.221810e+07 6.423574e+10 9480 ff82dda0 7372800 8.856576e+08 3.717258e+02 4.620554e+01 5.922705e+06 8.840509e+09 15933 d39bff17 3276800 2.625536e+08 1.316633e+02 2.997768e+01 1.092805e+06 5.829880e+08 8300 2c1922b7 819200 3.287040e+07 2.858008e+01 1.157773e+01 3.111798e+05 3.703364e+07 10888 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 1.287851e+03 3.709810e+02 1.239943e+07 6.519954e+10 9628 ff82dda0 7372800 8.856576e+08 3.718518e+02 4.816754e+01 5.963760e+06 8.907749e+09 16038 d39bff17 3276800 2.625536e+08 1.296849e+02 2.791728e+01 1.151214e+06 6.040986e+08 8877 2c1922b7 819200 3.287040e+07 2.828046e+01 1.160148e+01 2.761870e+05 3.255722e+07 9766 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 1.310134e+03 3.685780e+02 1.215936e+07 6.498239e+10 9281 ff82dda0 7372800 8.856576e+08 3.779382e+02 4.610795e+01 6.023955e+06 9.140616e+09 15939 d39bff17 3276800 2.625536e+08 1.312572e+02 2.860172e+01 1.103610e+06 5.863054e+08 8408 2c1922b7 819200 3.287040e+07 2.914597e+01 1.220705e+01 3.902937e+05 4.749737e+07 13391 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 5.602090e+04 2.694960e+04 1.877820e+08 4.451334e+13 3352 ff82dda0 7372800 8.856576e+08 1.747013e+04 6.587010e+03 8.485240e+07 6.140270e+12 4857 d39bff17 3276800 2.625536e+08 5.738305e+03 2.242393e+03 1.899954e+07 4.527495e+11 3311 2c1922b7 819200 3.287040e+07 7.967565e+02 3.073908e+02 1.198481e+07 3.961722e+10 15042 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_trsm.attila000066400000000000000000000100261507764646700275250ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 4.361594e+04 1.992990e+03 1.840593e+07 8.044680e+11 422 d39bff17 3276800 2.625536e+08 1.391260e+04 9.732436e+02 6.552836e+06 9.161314e+10 471 2c1922b7 819200 3.287040e+07 2.026126e+03 2.243730e+02 1.355478e+06 2.780050e+09 669 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.302421e+03 2.284702e+02 1.739468e+06 1.097727e+10 276 d39bff17 3276800 2.625536e+08 2.553136e+03 1.599096e+02 6.714747e+05 1.721091e+09 263 2c1922b7 819200 3.287040e+07 6.889531e+02 1.162392e+02 1.198778e+05 8.494121e+07 174 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.325409e+03 1.859324e+02 1.688884e+06 1.069211e+10 267 d39bff17 3276800 2.625536e+08 2.539349e+03 1.556256e+02 6.297585e+05 1.605183e+09 248 2c1922b7 819200 3.287040e+07 6.837878e+02 1.012279e+02 1.319710e+05 9.221787e+07 193 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.297128e+03 2.218514e+02 1.542796e+06 9.727245e+09 245 d39bff17 3276800 2.625536e+08 2.528040e+03 1.085312e+02 7.255475e+05 1.837594e+09 287 2c1922b7 819200 3.287040e+07 6.470080e+02 5.924722e+01 1.598110e+05 1.042660e+08 247 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal000066400000000000000000000040171507764646700300260ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.551780e+04 9.258624e+03 5.415867e+08 1.139602e+13 34901 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.787309e+04 1.121893e+04 5.782658e+08 1.440761e+13 32354 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.675795e+04 1.012077e+04 5.931309e+08 1.356507e+13 35394 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_trsm.hannibal-pitch000066400000000000000000000040171507764646700311330ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.551780e+04 9.258624e+03 5.415867e+08 1.139602e+13 34901 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.787309e+04 1.121893e+04 5.782658e+08 1.440761e+13 32354 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 1.675795e+04 1.012077e+04 5.931309e+08 1.356507e+13 35394 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_trsm.idgraf000066400000000000000000000216531507764646700275130ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 4.711469e+04 4.337925e+02 3.203799e+06 1.509588e+11 68 2c1922b7 819200 3.287040e+07 1.979166e+03 8.798869e+01 6.828124e+05 1.354070e+09 345 d39bff17 3276800 2.625536e+08 1.482664e+04 2.506296e+02 2.298130e+06 3.408328e+10 155 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.573848e+03 6.169449e+02 1.360787e+06 9.024393e+09 207 2c1922b7 819200 3.287040e+07 6.955196e+02 8.976154e+01 1.286711e+05 9.098386e+07 185 d39bff17 3276800 2.625536e+08 2.647434e+03 2.520462e+02 4.685958e+05 1.251821e+09 177 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.555664e+03 6.950469e+02 1.252132e+06 8.300825e+09 191 2c1922b7 819200 3.287040e+07 6.812499e+02 8.342802e+01 1.273937e+05 8.808853e+07 187 d39bff17 3276800 2.625536e+08 2.596800e+03 1.668067e+02 5.894736e+05 1.537061e+09 227 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.446442e+03 5.413553e+02 1.276395e+06 8.286236e+09 198 2c1922b7 819200 3.287040e+07 6.941204e+02 8.002896e+01 1.277182e+05 8.983023e+07 184 d39bff17 3276800 2.625536e+08 2.630763e+03 2.300111e+02 4.603835e+05 1.220418e+09 175 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.554622e+03 7.028631e+02 1.238824e+06 8.213390e+09 189 2c1922b7 819200 3.287040e+07 6.905951e+02 7.284704e+01 1.353566e+05 9.451674e+07 196 d39bff17 3276800 2.625536e+08 2.623425e+03 2.211699e+02 4.905805e+05 1.296149e+09 187 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.504271e+03 6.367049e+02 9.951534e+05 6.534773e+09 153 2c1922b7 819200 3.287040e+07 7.029111e+02 9.289767e+01 7.169693e+04 5.127683e+07 102 d39bff17 3276800 2.625536e+08 2.684586e+03 3.481310e+02 4.080571e+05 1.113886e+09 152 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.618862e+03 8.843940e+02 8.405955e+05 5.663119e+09 127 2c1922b7 819200 3.287040e+07 7.079333e+02 9.356613e+01 6.796160e+04 4.895273e+07 96 d39bff17 3276800 2.625536e+08 2.800887e+03 4.371231e+02 3.221020e+05 9.241450e+08 115 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.576395e+03 7.489644e+02 8.878133e+05 5.914339e+09 135 2c1922b7 819200 3.287040e+07 7.050156e+02 1.025857e+02 8.037177e+04 5.786307e+07 114 d39bff17 3276800 2.625536e+08 2.645162e+03 2.750078e+02 4.205807e+05 1.124529e+09 159 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 8.856576e+08 6.544427e+03 6.576164e+02 9.358531e+05 6.186464e+09 143 2c1922b7 819200 3.287040e+07 7.150712e+02 1.054194e+02 8.223319e+04 6.008061e+07 115 d39bff17 3276800 2.625536e+08 2.613530e+03 2.505172e+02 3.972565e+05 1.047781e+09 152 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_trsm.mirage000066400000000000000000000107051507764646700275170ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 3.287040e+07 1.868713e+03 9.872163e+01 2.535843e+06 4.751987e+09 1357 1827caf5 8192 3.287040e+04 1.868713e+00 9.872163e-02 2.535843e+03 4.751987e+05 1357 ff82dda0 7372800 8.856576e+08 4.564676e+04 7.031596e+02 5.687586e+07 2.596815e+12 1246 d39bff17 3276800 2.625536e+08 1.417018e+04 5.255501e+02 8.232874e+06 1.168218e+11 581 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 3.287040e+07 6.829247e+02 7.632991e+01 1.420483e+05 9.822018e+07 208 1827caf5 8192 3.287040e+04 6.829247e-01 7.632991e-02 1.420483e+02 9.822018e+03 208 ff82dda0 7372800 8.856576e+08 6.393638e+03 3.362739e+02 3.650767e+06 2.340625e+10 571 d39bff17 3276800 2.625536e+08 2.602204e+03 1.876810e+02 6.895839e+05 1.803772e+09 265 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 3.287040e+07 6.842707e+02 6.860830e+01 1.211159e+05 8.370922e+07 177 1827caf5 8192 3.287040e+04 6.842707e-01 6.860830e-02 1.211159e+02 8.370922e+03 177 ff82dda0 7372800 8.856576e+08 6.416842e+03 2.746569e+02 2.951747e+06 1.897560e+10 460 d39bff17 3276800 2.625536e+08 2.601751e+03 2.008905e+02 6.114115e+05 1.600224e+09 235 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 3.287040e+07 6.827700e+02 7.214633e+01 1.297263e+05 8.956219e+07 190 1827caf5 8192 3.287040e+04 6.827700e-01 7.214633e-02 1.297263e+02 8.956219e+03 190 ff82dda0 7372800 8.856576e+08 6.409443e+03 3.434222e+02 3.243178e+06 2.084664e+10 506 d39bff17 3276800 2.625536e+08 2.604891e+03 2.068197e+02 6.069396e+05 1.590978e+09 233 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/chol_model_trsm.sirocco000066400000000000000000000130441507764646700277130ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 6.085177e+04 1.761936e+04 4.551712e+07 3.002008e+12 748 ff82dda0 7372800 8.856576e+08 1.775772e+04 3.736007e+03 2.386637e+07 4.425714e+11 1344 d39bff17 3276800 2.625536e+08 5.276862e+03 9.789431e+02 7.070995e+06 3.859682e+10 1340 2c1922b7 819200 3.287040e+07 7.675336e+02 1.464194e+02 2.842177e+06 2.260854e+09 3703 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 5.422549e+03 1.109859e+03 2.917331e+06 1.648207e+10 538 ff82dda0 7372800 8.856576e+08 2.018325e+03 2.870643e+02 1.687320e+06 3.474450e+09 836 d39bff17 3276800 2.625536e+08 1.179394e+03 1.705358e+02 3.538181e+05 4.260157e+08 300 2c1922b7 819200 3.287040e+07 4.644748e+02 7.687001e+01 3.297771e+04 1.573685e+07 71 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 5.480822e+03 1.130650e+03 2.899355e+06 1.656711e+10 529 ff82dda0 7372800 8.856576e+08 2.005118e+03 2.787124e+02 1.836689e+06 3.753933e+09 916 d39bff17 3276800 2.625536e+08 1.227664e+03 1.874122e+02 2.970946e+05 3.732321e+08 242 2c1922b7 819200 3.287040e+07 4.209987e+02 9.547071e+01 6.441281e+04 2.851225e+07 153 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 5.510320e+03 1.138149e+03 2.992104e+06 1.719084e+10 543 ff82dda0 7372800 8.856576e+08 2.005118e+03 2.787124e+02 1.836689e+06 3.753933e+09 916 d39bff17 3276800 2.625536e+08 1.227664e+03 1.874122e+02 2.970946e+05 3.732321e+08 242 2c1922b7 819200 3.287040e+07 4.209987e+02 9.547071e+01 6.441281e+04 2.851225e+07 153 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 2.988058e+09 5.534879e+03 1.226333e+03 3.210230e+06 1.864049e+10 580 ff82dda0 7372800 8.856576e+08 2.051755e+03 2.742098e+02 1.811700e+06 3.783559e+09 883 d39bff17 3276800 2.625536e+08 1.153240e+03 1.913332e+02 3.194475e+05 3.785401e+08 277 2c1922b7 819200 3.287040e+07 4.950127e+02 6.747714e+01 5.445140e+04 2.745498e+07 110 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/cl_update.attila000066400000000000000000000100241507764646700263110ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 49ec0825 34613280 0.000000e+00 7.774706e+04 9.102018e+02 4.664824e+06 3.627260e+11 60 6d78e48f 4461600 0.000000e+00 9.929947e+03 1.596124e+02 1.797320e+06 1.785191e+10 181 8ec75d42 14753312 0.000000e+00 3.310870e+04 5.189822e+02 1.920304e+06 6.359440e+10 58 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 49ec0825 34613280 0.000000e+00 5.051327e+03 6.437605e+02 2.692357e+06 1.382086e+10 533 6d78e48f 4461600 0.000000e+00 1.009230e+03 9.724548e+01 3.835076e+05 3.906410e+08 380 8ec75d42 14753312 0.000000e+00 1.883088e+03 3.340290e+02 1.069594e+06 2.077513e+09 568 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 49ec0825 34613280 0.000000e+00 5.105276e+03 6.922431e+02 2.807902e+06 1.459867e+10 550 6d78e48f 4461600 0.000000e+00 1.012651e+03 9.766669e+01 3.686049e+05 3.767403e+08 364 8ec75d42 14753312 0.000000e+00 2.097710e+03 2.383227e+02 9.880215e+05 2.099334e+09 471 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 49ec0825 34613280 0.000000e+00 4.938766e+03 7.348710e+02 2.780525e+06 1.403640e+10 563 6d78e48f 4461600 0.000000e+00 1.022015e+03 1.088844e+02 3.740577e+05 3.866319e+08 366 8ec75d42 14753312 0.000000e+00 1.829845e+03 3.604651e+02 8.893047e+05 1.690438e+09 486 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/cl_update.idgraf000066400000000000000000000216551507764646700263030ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.774670e+03 3.622348e+02 1.348749e+06 2.493306e+09 760 6d78e48f 4461600 0.000000e+00 1.036351e+03 9.390524e+01 2.839601e+05 2.966985e+08 274 49ec0825 34613280 0.000000e+00 4.962997e+03 6.650844e+02 5.096998e+06 2.575067e+10 1027 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.813689e+03 3.729019e+02 1.331248e+06 2.516537e+09 734 6d78e48f 4461600 0.000000e+00 1.023951e+03 1.005326e+02 3.553110e+05 3.673281e+08 347 49ec0825 34613280 0.000000e+00 5.017264e+03 7.095917e+02 4.365019e+06 2.233852e+10 870 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 4.692078e+04 5.010795e+02 1.501465e+06 7.045793e+10 32 6d78e48f 4461600 0.000000e+00 1.405585e+04 1.896523e+02 7.590156e+05 1.067055e+10 54 49ec0825 34613280 0.000000e+00 1.108029e+05 1.348959e+03 3.545692e+06 3.929311e+11 32 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.859413e+03 3.366203e+02 1.309027e+06 2.513795e+09 704 6d78e48f 4461600 0.000000e+00 1.027564e+03 1.046018e+02 3.483442e+05 3.616551e+08 339 49ec0825 34613280 0.000000e+00 5.060000e+03 7.405627e+02 4.164380e+06 2.152312e+10 823 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.862800e+03 3.783314e+02 8.438483e+05 1.636760e+09 453 6d78e48f 4461600 0.000000e+00 9.567271e+02 3.148502e+01 5.606421e+05 5.369624e+08 586 49ec0825 34613280 0.000000e+00 4.965851e+03 6.509733e+02 5.810046e+06 2.934763e+10 1170 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.889366e+03 3.958521e+02 1.186522e+06 2.340180e+09 628 6d78e48f 4461600 0.000000e+00 1.028680e+03 8.044529e+01 2.880303e+05 2.981029e+08 280 49ec0825 34613280 0.000000e+00 5.035634e+03 7.113130e+02 4.899672e+06 2.516526e+10 973 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.814024e+03 3.173708e+02 1.186372e+06 2.217980e+09 654 6d78e48f 4461600 0.000000e+00 1.025445e+03 7.185494e+01 3.466003e+05 3.571646e+08 338 49ec0825 34613280 0.000000e+00 5.092715e+03 7.051028e+02 3.513973e+06 1.823871e+10 690 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.793350e+03 3.531620e+02 1.388053e+06 2.585801e+09 774 6d78e48f 4461600 0.000000e+00 1.033622e+03 1.055186e+02 3.783058e+05 3.951004e+08 366 49ec0825 34613280 0.000000e+00 4.986601e+03 7.025210e+02 5.345636e+06 2.718562e+10 1072 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.781570e+03 3.261441e+02 1.501864e+06 2.765346e+09 843 6d78e48f 4461600 0.000000e+00 1.022184e+03 1.018115e+02 3.751415e+05 3.872679e+08 367 49ec0825 34613280 0.000000e+00 5.102994e+03 7.050225e+02 4.327339e+06 2.250389e+10 848 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/cl_update.mirage000066400000000000000000000100311507764646700262750ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 6d78e48f 4461600 0.000000e+00 6.670318e+03 3.279077e+02 6.103341e+06 4.080961e+10 915 8ec75d42 14753312 0.000000e+00 2.178007e+04 1.559694e+03 1.008417e+07 2.207603e+11 463 49ec0825 34613280 0.000000e+00 5.101465e+04 2.613713e+03 2.443602e+07 1.249867e+12 479 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 6d78e48f 4461600 0.000000e+00 1.028619e+03 1.201323e+02 5.626547e+05 5.866515e+08 547 8ec75d42 14753312 0.000000e+00 1.871093e+03 3.437894e+02 1.981488e+06 3.832713e+09 1059 49ec0825 34613280 0.000000e+00 5.018828e+03 7.664203e+02 4.672528e+06 2.399748e+10 931 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 6d78e48f 4461600 0.000000e+00 1.024201e+03 1.096599e+02 6.452464e+05 6.684377e+08 630 8ec75d42 14753312 0.000000e+00 1.877457e+03 3.608958e+02 1.907496e+06 3.713572e+09 1016 49ec0825 34613280 0.000000e+00 5.018101e+03 7.255196e+02 5.314169e+06 2.722447e+10 1059 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 6d78e48f 4461600 0.000000e+00 1.010004e+03 1.090743e+02 5.383321e+05 5.500588e+08 533 8ec75d42 14753312 0.000000e+00 1.986058e+03 3.264552e+02 1.288952e+06 2.629100e+09 649 49ec0825 34613280 0.000000e+00 5.064765e+03 7.492118e+02 4.948276e+06 2.561026e+10 977 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/cl_update.sirocco000066400000000000000000000120161507764646700264770ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.292737e+03 8.111826e+01 1.783977e+06 2.315293e+09 1380 6d78e48f 4461600 0.000000e+00 7.254397e+02 8.693801e+01 2.717497e+06 1.999693e+09 3746 49ec0825 34613280 0.000000e+00 2.847204e+03 1.159244e+02 5.255939e+06 1.498954e+10 1846 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 2.010356e+04 1.739800e+03 4.201644e+06 8.510064e+10 209 6d78e48f 4461600 0.000000e+00 6.471465e+03 9.708551e+02 2.344612e+07 1.551456e+11 3623 49ec0825 34613280 0.000000e+00 4.705100e+04 5.067137e+03 1.383299e+07 6.584049e+11 294 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.333639e+03 8.095890e+01 1.871095e+06 2.504561e+09 1403 6d78e48f 4461600 0.000000e+00 7.466797e+02 9.599725e+01 2.594712e+06 1.969443e+09 3475 49ec0825 34613280 0.000000e+00 2.914989e+03 1.085303e+02 5.037101e+06 1.470345e+10 1728 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.319437e+03 8.470023e+01 1.921100e+06 2.545216e+09 1456 6d78e48f 4461600 0.000000e+00 7.342135e+02 9.435309e+01 2.608661e+06 1.946944e+09 3553 49ec0825 34613280 0.000000e+00 2.843027e+03 1.019747e+02 5.080489e+06 1.446255e+10 1787 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8ec75d42 14753312 0.000000e+00 1.324805e+03 7.460278e+01 1.748742e+06 2.324088e+09 1320 6d78e48f 4461600 0.000000e+00 7.321696e+02 8.668478e+01 2.571380e+06 1.909076e+09 3512 49ec0825 34613280 0.000000e+00 2.873920e+03 1.173279e+02 5.566783e+06 1.602515e+10 1937 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/func.mirage000066400000000000000000000020331507764646700252730ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 2 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n 2669802c 1 0.000000e+00 2.118378e+02 3.509374e+01 7.202485e+03 1.567632e+06 34 4cd30058 2 0.000000e+00 3.688151e+02 7.632401e+01 1.143327e+04 4.397349e+06 31 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/log_arr.mirage000066400000000000000000000063661507764646700260020ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 23 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n 06c91a73 225 0.000000e+00 2.857770e+02 5.674783e+01 1.428885e+04 4.244441e+06 50 bb714cfa 200 0.000000e+00 2.519470e+02 5.692391e+01 1.385708e+04 3.669469e+06 55 8c736af3 175 0.000000e+00 2.361612e+02 4.646855e+01 1.346119e+04 3.302092e+06 57 6aebe038 150 0.000000e+00 9.788850e+01 2.193830e+01 2.545101e+03 2.616496e+05 26 0d619432 125 0.000000e+00 1.261692e+02 2.840256e+01 6.939307e+03 9.198957e+05 55 5db8a67d 100 0.000000e+00 1.912077e+02 4.355993e+01 8.986764e+03 1.807520e+06 47 3575f01c 75 0.000000e+00 1.486505e+02 3.245366e+01 8.027128e+03 1.250112e+06 54 a1b2649e 50 0.000000e+00 1.601281e+02 2.952993e+01 8.486791e+03 1.405191e+06 53 50d9324f 25 0.000000e+00 5.679895e+01 1.315071e+01 1.135979e+03 6.798124e+04 20 8e55dcaa 22 0.000000e+00 1.354191e+02 2.833115e+01 1.218772e+03 1.722689e+05 9 a83c5c86 23 0.000000e+00 1.611343e+02 3.529197e+01 3.544955e+03 5.986154e+05 22 c286dcf2 20 0.000000e+00 1.669332e+02 3.870204e+01 8.179726e+03 1.438862e+06 49 7d495c6e 17 0.000000e+00 1.598129e+02 3.241270e+01 3.036446e+03 5.052245e+05 19 17f3dc1a 18 0.000000e+00 1.605213e+02 3.804432e+01 5.618246e+03 9.525062e+05 35 de8ceee5 15 0.000000e+00 8.465237e+01 1.742428e+01 5.079142e+03 4.481777e+05 60 b4366e91 12 0.000000e+00 1.584736e+02 3.363140e+01 3.169472e+03 5.248991e+05 20 925feebd 13 0.000000e+00 1.141695e+02 1.417130e+01 9.133560e+02 1.058840e+05 8 61436e79 10 0.000000e+00 1.475159e+02 3.333011e+01 8.555921e+03 1.326566e+06 58 f31c80c4 7 0.000000e+00 1.253070e+02 3.041314e+01 3.007369e+03 3.990435e+05 24 2d906e21 8 0.000000e+00 8.466750e+01 1.652881e+01 2.540025e+03 2.232536e+05 30 bfcf809c 5 0.000000e+00 8.161995e+01 1.779563e+01 3.264798e+03 2.791400e+05 40 4cd30058 2 0.000000e+00 1.732845e+02 3.197810e+01 4.332113e+03 7.762531e+05 25 6aba8074 3 0.000000e+00 1.697346e+02 2.859508e+01 3.564427e+03 6.221779e+05 21 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/log_list.mirage000066400000000000000000000063701507764646700261640ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 23 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n 06c91a73 225 0.000000e+00 2.019197e+02 4.410081e+01 1.029790e+04 2.178539e+06 51 bb714cfa 200 0.000000e+00 2.154911e+02 4.319096e+01 1.077456e+04 2.415094e+06 50 8c736af3 175 0.000000e+00 1.504727e+02 3.433867e+01 7.674106e+03 1.214880e+06 51 6aebe038 150 0.000000e+00 1.811410e+02 3.948222e+01 9.238192e+03 1.752917e+06 51 0d619432 125 0.000000e+00 1.395840e+02 2.983957e+01 6.979199e+03 1.018704e+06 50 5db8a67d 100 0.000000e+00 1.437386e+02 3.298439e+01 8.624317e+03 1.304926e+06 60 3575f01c 75 0.000000e+00 1.142060e+02 2.420593e+01 5.824505e+03 6.950756e+05 51 a1b2649e 50 0.000000e+00 5.032226e+01 1.023252e+01 1.559990e+03 8.174806e+04 31 50d9324f 25 0.000000e+00 1.034515e+02 2.275354e+01 6.413990e+03 6.956354e+05 62 a83c5c86 23 0.000000e+00 1.001690e+02 2.097800e+01 1.402366e+03 1.466347e+05 14 8e55dcaa 22 0.000000e+00 1.677354e+02 3.762885e+01 3.690179e+03 6.501242e+05 22 c286dcf2 20 0.000000e+00 8.453644e+01 2.071754e+01 4.564968e+03 4.090839e+05 54 17f3dc1a 18 0.000000e+00 9.082029e+01 1.856443e+01 2.542968e+03 2.406029e+05 28 7d495c6e 17 0.000000e+00 1.579351e+02 3.138662e+01 3.632508e+03 5.963584e+05 23 de8ceee5 15 0.000000e+00 6.749869e+01 1.578830e+01 3.712428e+03 2.642939e+05 55 b4366e91 12 0.000000e+00 1.045285e+02 2.047968e+01 2.195099e+03 2.382582e+05 21 925feebd 13 0.000000e+00 5.843426e+01 1.319529e+01 1.577725e+03 9.689431e+04 27 61436e79 10 0.000000e+00 8.635067e+01 1.606126e+01 3.367676e+03 3.008617e+05 39 f31c80c4 7 0.000000e+00 8.667993e+01 1.877054e+01 2.340358e+03 2.123751e+05 27 2d906e21 8 0.000000e+00 9.561275e+01 1.242246e+01 1.147353e+03 1.115534e+05 12 bfcf809c 5 0.000000e+00 9.516219e+01 2.051119e+01 4.091974e+03 4.074917e+05 43 4cd30058 2 0.000000e+00 3.056131e+02 6.553774e+01 4.278584e+03 1.367724e+06 14 6aba8074 3 0.000000e+00 5.585311e+01 1.149143e+01 1.005356e+03 5.852921e+04 18 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/multi.mirage000066400000000000000000000016571507764646700255050ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n 50d9324f 25 0.000000e+00 5.623175e+01 1.128390e+01 4.723467e+03 2.763042e+05 84 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/multi_2arr.mirage000066400000000000000000000020341507764646700264210ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 2 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n bfcf809c 5 0.000000e+00 9.442641e+01 1.743873e+01 5.476732e+03 5.347865e+05 58 50d9324f 25 0.000000e+00 9.340223e+01 2.207403e+01 1.550477e+04 1.529066e+06 166 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/multi_list.mirage000066400000000000000000000016571507764646700265400ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n 50d9324f 25 0.000000e+00 1.154125e+02 2.553199e+01 1.131042e+04 1.369248e+06 98 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/null.idgraf000066400000000000000000000001531507764646700253030ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 0 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/null.sirocco000066400000000000000000000001531507764646700255100ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 0 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.attila000066400000000000000000000063371507764646700302000ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 8.457435e+01 8.456846e+01 6.080896e+05 5.145841e+07 7190 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 8.959785e+01 8.957735e+01 1.959505e+05 1.756781e+07 2187 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 9.097446e+01 8.933429e+01 2.547285e+03 2.318591e+05 28 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 9.082936e+01 9.075245e+01 5.368015e+04 4.878224e+06 591 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.mirage000066400000000000000000000063371507764646700301660ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 8.457435e+01 8.456846e+01 6.080896e+05 5.145841e+07 7190 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 8.959785e+01 8.957735e+01 1.959505e+05 1.756781e+07 2187 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 9.097446e+01 8.933429e+01 2.547285e+03 2.318591e+05 28 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 9.082936e+01 9.075245e+01 5.368015e+04 4.878224e+06 591 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/overlap_sleep_1024_24.sirocco000066400000000000000000000077731507764646700303700ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 8.457435e+01 8.456846e+01 6.080896e+05 5.145841e+07 7190 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 8.959785e+01 8.957735e+01 1.959505e+05 1.756781e+07 2187 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 9.097446e+01 8.933429e+01 2.547285e+03 2.318591e+05 28 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 9.082936e+01 9.075245e+01 5.368015e+04 4.878224e+06 591 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3, MPI_MS - 5) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb4) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n a3d3725e 1024 0.000000e+00 9.097446e+01 8.933429e+01 2.547285e+03 2.318591e+05 28 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/save_cl_bottom.attila000066400000000000000000000100161507764646700273520ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f2ff9ae5 34480152 0.000000e+00 4.419323e+01 9.394629e+00 1.825180e+04 8.430572e+05 413 fb4b8624 4427800 0.000000e+00 1.267467e+01 2.411186e+00 5.754301e+03 7.557335e+04 454 4af260f6 14678040 0.000000e+00 2.442142e+01 5.135780e+00 1.394463e+04 3.556084e+05 571 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f2ff9ae5 34480152 0.000000e+00 3.910144e+01 6.371108e+00 1.329449e+03 5.336347e+04 34 fb4b8624 4427800 0.000000e+00 3.998483e+01 8.150933e+00 2.519044e+03 1.049091e+05 63 4af260f6 14678040 0.000000e+00 3.398450e+01 5.156207e+00 8.156280e+02 2.835679e+04 24 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f2ff9ae5 34480152 0.000000e+00 3.173112e+01 3.816078e+00 7.615470e+02 2.451424e+04 24 fb4b8624 4427800 0.000000e+00 2.860497e+01 5.248990e+00 1.029779e+03 3.044867e+04 36 4af260f6 14678040 0.000000e+00 3.652883e+01 8.229435e+00 1.716855e+03 6.589771e+04 47 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f2ff9ae5 34480152 0.000000e+00 3.719045e+01 7.851419e+00 2.566141e+03 9.968943e+04 69 fb4b8624 4427800 0.000000e+00 4.509905e+01 6.110617e+00 9.470800e+02 4.349654e+04 21 4af260f6 14678040 0.000000e+00 2.634116e+01 3.746211e+00 6.479926e+03 1.741412e+05 246 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/save_cl_bottom.idgraf000066400000000000000000000216551507764646700273430ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.985465e+01 6.158621e+00 3.552703e+03 1.105782e+05 119 fb4b8624 4427800 0.000000e+00 1.132689e+01 2.249702e+00 2.423954e+03 2.853894e+04 214 f2ff9ae5 34480152 0.000000e+00 5.622304e+01 1.121739e+01 9.276802e+03 5.423319e+05 165 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.632587e+01 4.244468e+00 1.174134e+04 3.171360e+05 446 fb4b8624 4427800 0.000000e+00 2.560067e+01 2.946464e+00 6.809779e+03 1.766442e+05 266 f2ff9ae5 34480152 0.000000e+00 2.687395e+01 3.041318e+00 1.378634e+04 3.752385e+05 513 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.682825e+01 3.688517e+00 9.309402e+03 2.544759e+05 347 fb4b8624 4427800 0.000000e+00 2.601287e+01 3.034296e+00 6.711320e+03 1.769561e+05 258 f2ff9ae5 34480152 0.000000e+00 2.650277e+01 3.250317e+00 1.327789e+04 3.571937e+05 501 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.844841e+01 4.987346e+00 1.118023e+04 3.278350e+05 393 fb4b8624 4427800 0.000000e+00 2.545228e+01 3.038424e+00 7.533874e+03 1.944869e+05 296 f2ff9ae5 34480152 0.000000e+00 2.671593e+01 2.977811e+00 9.510870e+03 2.572485e+05 356 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.683154e+01 3.643920e+00 1.145707e+04 3.130806e+05 427 fb4b8624 4427800 0.000000e+00 2.439165e+01 2.519213e+00 6.951620e+03 1.713702e+05 285 f2ff9ae5 34480152 0.000000e+00 2.686670e+01 3.337051e+00 1.332588e+04 3.635460e+05 496 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.743422e+01 5.313897e+00 8.504608e+03 2.420709e+05 310 fb4b8624 4427800 0.000000e+00 2.591823e+01 3.009457e+00 7.879143e+03 2.069667e+05 304 f2ff9ae5 34480152 0.000000e+00 2.671837e+01 2.963498e+00 1.282482e+04 3.468736e+05 480 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.572873e+01 2.716127e+00 1.232406e+04 3.206163e+05 479 fb4b8624 4427800 0.000000e+00 2.555023e+01 2.989409e+00 6.362008e+03 1.647760e+05 249 f2ff9ae5 34480152 0.000000e+00 2.648407e+01 2.593556e+00 1.504295e+04 4.022192e+05 568 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.535931e+01 2.801001e+00 7.303480e+03 1.874707e+05 288 fb4b8624 4427800 0.000000e+00 2.706983e+01 4.764143e+00 1.228970e+04 3.429847e+05 454 f2ff9ae5 34480152 0.000000e+00 2.645378e+01 3.704623e+00 1.251264e+04 3.374982e+05 473 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.565071e+01 2.881053e+00 1.064505e+04 2.764977e+05 415 fb4b8624 4427800 0.000000e+00 2.622930e+01 4.388391e+00 8.262230e+03 2.227788e+05 315 f2ff9ae5 34480152 0.000000e+00 2.628917e+01 2.974884e+00 1.204044e+04 3.205863e+05 458 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/save_cl_bottom.mirage000066400000000000000000000100171507764646700273410ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n fb4b8624 4427800 0.000000e+00 1.291871e+01 2.707056e+00 1.452063e+04 1.958246e+05 1124 4af260f6 14678040 0.000000e+00 2.737328e+01 5.740626e+00 2.241872e+04 6.406639e+05 819 f2ff9ae5 34480152 0.000000e+00 4.727593e+01 1.006422e+01 4.179193e+04 2.065291e+06 884 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n fb4b8624 4427800 0.000000e+00 4.529760e+01 1.106616e+01 3.940891e+03 1.891669e+05 87 4af260f6 14678040 0.000000e+00 5.186998e+01 9.163546e+00 2.386019e+03 1.276254e+05 46 f2ff9ae5 34480152 0.000000e+00 5.555996e+01 1.123960e+01 2.777998e+03 1.606619e+05 50 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n fb4b8624 4427800 0.000000e+00 4.174505e+01 7.717897e+00 1.711547e+03 7.389082e+04 41 4af260f6 14678040 0.000000e+00 4.338701e+01 9.950636e+00 8.677401e+03 3.962895e+05 200 f2ff9ae5 34480152 0.000000e+00 5.523574e+01 1.243105e+01 2.154194e+03 1.250152e+05 39 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n fb4b8624 4427800 0.000000e+00 5.145250e+01 1.035300e+01 1.955195e+03 1.046727e+05 38 4af260f6 14678040 0.000000e+00 4.717138e+01 8.365012e+00 2.217055e+03 1.078703e+05 47 f2ff9ae5 34480152 0.000000e+00 4.374296e+01 8.852326e+00 3.455694e+03 1.573530e+05 79 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/save_cl_bottom.sirocco000066400000000000000000000120031507764646700275330ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.447447e+01 7.398265e+00 3.237153e+04 1.167387e+06 939 fb4b8624 4427800 0.000000e+00 5.439097e+01 1.253425e+01 3.094846e+05 1.772711e+07 5690 f2ff9ae5 34480152 0.000000e+00 5.041329e+01 1.085485e+01 6.226042e+04 3.284270e+06 1235 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.176283e+01 6.812714e+00 1.673901e+04 5.561382e+05 527 fb4b8624 4427800 0.000000e+00 5.311651e+01 1.346481e+01 1.290731e+04 7.296474e+05 243 f2ff9ae5 34480152 0.000000e+00 4.192896e+01 9.759572e+00 3.207566e+04 1.417765e+06 765 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.786531e+01 9.328071e+00 1.991715e+04 7.999380e+05 526 fb4b8624 4427800 0.000000e+00 5.555598e+01 1.303330e+01 9.444517e+03 5.535768e+05 170 f2ff9ae5 34480152 0.000000e+00 4.359390e+01 1.022197e+01 2.218929e+04 1.020503e+06 509 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 4.035980e+01 9.947105e+00 1.989738e+04 8.518341e+05 493 fb4b8624 4427800 0.000000e+00 8.863692e+01 1.909792e+01 1.161144e+04 1.076982e+06 131 f2ff9ae5 34480152 0.000000e+00 3.838146e+01 9.359960e+00 2.890124e+04 1.175241e+06 753 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.505264e+01 8.845541e+00 1.945422e+04 7.253469e+05 555 fb4b8624 4427800 0.000000e+00 4.717545e+01 1.027132e+01 6.227160e+03 3.076951e+05 132 f2ff9ae5 34480152 0.000000e+00 3.110432e+01 6.170515e+00 2.370149e+04 7.662320e+05 762 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/save_cl_top.attila000066400000000000000000000100231507764646700266460ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f2ff9ae5 34480152 0.000000e+00 4.346555e+01 9.370422e+00 2.103733e+04 9.568966e+05 484 fb4b8624 4427800 0.000000e+00 1.080055e+01 2.408554e+00 6.631537e+03 7.518614e+04 614 4af260f6 14678040 0.000000e+00 2.045608e+01 4.186697e+00 1.294870e+04 2.759751e+05 633 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f2ff9ae5 34480152 0.000000e+00 3.184284e+01 5.707419e+00 4.840111e+03 1.590742e+05 152 fb4b8624 4427800 0.000000e+00 3.194475e+01 5.964283e+00 1.150011e+03 3.801743e+04 36 4af260f6 14678040 0.000000e+00 3.430576e+01 6.297323e+00 5.523228e+03 1.958632e+05 161 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f2ff9ae5 34480152 0.000000e+00 2.735681e+01 5.823931e+00 3.063963e+03 8.761910e+04 112 fb4b8624 4427800 0.000000e+00 3.161427e+01 5.733859e+00 2.212999e+03 7.226375e+04 70 4af260f6 14678040 0.000000e+00 3.666193e+01 6.692591e+00 6.819119e+03 2.583331e+05 186 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f2ff9ae5 34480152 0.000000e+00 3.900993e+01 8.465923e+00 7.489907e+03 3.059418e+05 192 fb4b8624 4427800 0.000000e+00 3.364966e+01 7.354940e+00 1.278687e+03 4.508300e+04 38 4af260f6 14678040 0.000000e+00 2.853135e+01 5.469952e+00 8.730594e+03 2.582513e+05 306 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/save_cl_top.idgraf000066400000000000000000000216551507764646700266410ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.246622e+01 6.891153e+00 3.928413e+03 1.332868e+05 121 fb4b8624 4427800 0.000000e+00 1.139753e+01 2.243693e+00 2.644226e+03 3.130556e+04 232 f2ff9ae5 34480152 0.000000e+00 5.591168e+01 1.328211e+01 1.241239e+04 7.331618e+05 222 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.604823e+01 3.286196e+00 1.130493e+04 2.991603e+05 434 fb4b8624 4427800 0.000000e+00 2.652276e+01 4.354433e+00 7.850738e+03 2.138358e+05 296 f2ff9ae5 34480152 0.000000e+00 2.714414e+01 3.836601e+00 1.555359e+04 4.306232e+05 573 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.599288e+01 3.808778e+00 1.115095e+04 2.960687e+05 429 fb4b8624 4427800 0.000000e+00 2.539365e+01 2.861737e+00 6.678529e+03 1.717461e+05 263 f2ff9ae5 34480152 0.000000e+00 2.629746e+01 2.517281e+00 1.159718e+04 3.077710e+05 441 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.561750e+01 2.633232e+00 1.155349e+04 2.990988e+05 451 fb4b8624 4427800 0.000000e+00 2.673210e+01 4.378492e+00 1.031859e+04 2.832378e+05 386 f2ff9ae5 34480152 0.000000e+00 2.631930e+01 2.903449e+00 1.339652e+04 3.568781e+05 509 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.607685e+01 3.121182e+00 7.979517e+03 2.110617e+05 306 fb4b8624 4427800 0.000000e+00 2.621449e+01 3.800716e+00 7.654632e+03 2.048804e+05 292 f2ff9ae5 34480152 0.000000e+00 2.661811e+01 2.706929e+00 1.810031e+04 4.867788e+05 680 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.759081e+01 5.463486e+00 5.435390e+03 1.558472e+05 197 fb4b8624 4427800 0.000000e+00 2.575898e+01 3.723342e+00 7.779212e+03 2.045713e+05 302 f2ff9ae5 34480152 0.000000e+00 2.684177e+01 3.098778e+00 1.181038e+04 3.212366e+05 440 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.613306e+01 2.901462e+00 1.100202e+04 2.910606e+05 421 fb4b8624 4427800 0.000000e+00 2.615768e+01 3.461177e+00 7.010257e+03 1.865826e+05 268 f2ff9ae5 34480152 0.000000e+00 2.749333e+01 3.923485e+00 1.492888e+04 4.188033e+05 543 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.628755e+01 3.829892e+00 1.025215e+04 2.752243e+05 390 fb4b8624 4427800 0.000000e+00 2.540957e+01 3.333356e+00 8.258109e+03 2.134461e+05 325 f2ff9ae5 34480152 0.000000e+00 2.728087e+01 3.903560e+00 1.404965e+04 3.911340e+05 515 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 2.705248e+01 4.049710e+00 1.163257e+04 3.217418e+05 430 fb4b8624 4427800 0.000000e+00 2.626990e+01 3.774104e+00 6.908983e+03 1.852444e+05 263 f2ff9ae5 34480152 0.000000e+00 2.670502e+01 3.597311e+00 1.303205e+04 3.543362e+05 488 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/save_cl_top.mirage000066400000000000000000000100261507764646700266370ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n fb4b8624 4427800 0.000000e+00 1.161593e+01 2.312881e+00 1.684310e+04 2.034051e+05 1450 4af260f6 14678040 0.000000e+00 2.793439e+01 6.208645e+00 2.807406e+04 8.229715e+05 1005 f2ff9ae5 34480152 0.000000e+00 5.388292e+01 1.191766e+01 4.930288e+04 2.786541e+06 915 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n fb4b8624 4427800 0.000000e+00 3.349897e+01 6.495369e+00 7.939257e+03 2.759560e+05 237 4af260f6 14678040 0.000000e+00 3.814493e+01 8.460348e+00 1.609716e+04 6.442306e+05 422 f2ff9ae5 34480152 0.000000e+00 3.894616e+01 8.022125e+00 1.339748e+04 5.439182e+05 344 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n fb4b8624 4427800 0.000000e+00 4.505725e+01 1.044174e+01 7.209160e+02 3.422697e+04 16 4af260f6 14678040 0.000000e+00 3.820932e+01 8.787776e+00 1.138638e+04 4.580788e+05 298 f2ff9ae5 34480152 0.000000e+00 4.714002e+01 1.060923e+01 1.343491e+04 6.654002e+05 285 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n fb4b8624 4427800 0.000000e+00 2.397521e+01 2.109607e+00 5.754050e+02 1.390227e+04 24 4af260f6 14678040 0.000000e+00 3.827520e+01 8.943097e+00 9.453975e+03 3.816076e+05 247 f2ff9ae5 34480152 0.000000e+00 5.567087e+01 1.159966e+01 8.127947e+03 4.721345e+05 146 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/save_cl_top.sirocco000066400000000000000000000120031507764646700270310ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.501046e+01 7.466097e+00 2.835847e+04 1.037995e+06 810 fb4b8624 4427800 0.000000e+00 2.773216e+01 6.482940e+00 1.680846e+05 4.916085e+06 6061 f2ff9ae5 34480152 0.000000e+00 5.337509e+01 1.160081e+01 6.591824e+04 3.684596e+06 1235 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.362211e+01 7.569501e+00 1.795421e+04 6.342550e+05 534 fb4b8624 4427800 0.000000e+00 3.231969e+01 5.862640e+00 1.877774e+04 6.268602e+05 581 f2ff9ae5 34480152 0.000000e+00 3.995777e+01 9.699452e+00 3.180638e+04 1.345799e+06 796 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.654429e+01 8.110997e+00 1.710273e+04 6.557958e+05 468 fb4b8624 4427800 0.000000e+00 3.606370e+01 8.402269e+00 2.171034e+04 8.254553e+05 602 f2ff9ae5 34480152 0.000000e+00 3.192218e+01 6.956874e+00 1.695068e+04 5.668019e+05 531 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.551068e+01 7.784366e+00 1.278384e+04 4.757777e+05 360 fb4b8624 4427800 0.000000e+00 3.460669e+01 7.284566e+00 1.996806e+04 7.216470e+05 577 f2ff9ae5 34480152 0.000000e+00 3.547098e+01 8.430109e+00 2.805755e+04 1.051443e+06 791 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 4af260f6 14678040 0.000000e+00 3.712899e+01 8.969310e+00 2.030956e+04 7.980787e+05 547 fb4b8624 4427800 0.000000e+00 3.619911e+01 8.162351e+00 2.287784e+04 8.702638e+05 632 f2ff9ae5 34480152 0.000000e+00 3.132201e+01 6.179930e+00 2.584066e+04 8.408892e+05 825 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/scal.mirage000066400000000000000000000016601507764646700252670ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n 50d9324f 25 0.000000e+00 1.236737e+02 2.505671e+01 2.597148e+04 3.343836e+06 210 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/scal_arr.mirage000066400000000000000000000016601507764646700261330ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n 50d9324f 25 0.000000e+00 1.147772e+02 2.771885e+01 1.296983e+04 1.575463e+06 113 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/sqrt.mirage000066400000000000000000000016601507764646700253360ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 1 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OpenCL - 2, FPGA - 4, MPI_MS - 5, TCPIP_MS - 6, HIP - 7) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_cores1_impl0 (Comb0) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us or J) dev (us or J) sum sum2 n 50d9324f 25 0.000000e+00 1.164091e+02 2.495470e+01 1.268860e+04 1.544947e+06 109 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.attila000066400000000000000000000100221507764646700300430ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 66355200 7.077888e+09 6.739553e+05 1.180373e+04 5.391643e+07 3.634841e+13 80 0b0b0ce8 7372800 2.621440e+08 2.919185e+04 3.106016e+03 2.335348e+06 6.894489e+10 80 4220e23d 29491200 2.097152e+09 2.058556e+05 4.934163e+03 1.646845e+07 3.392071e+12 80 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 66355200 7.077888e+09 2.313178e+04 3.749098e+01 2.451969e+06 5.671856e+10 106 0b0b0ce8 7372800 2.621440e+08 1.039822e+03 4.572723e+01 9.982293e+04 1.039988e+08 96 4220e23d 29491200 2.097152e+09 7.017044e+03 9.150160e+00 7.367896e+05 5.170094e+09 105 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 66355200 7.077888e+09 2.311999e+04 3.792305e+01 2.427599e+06 5.612623e+10 105 0b0b0ce8 7372800 2.621440e+08 1.036523e+03 7.589914e+00 8.810443e+04 9.132713e+07 85 4220e23d 29491200 2.097152e+09 7.017425e+03 3.431116e+01 7.298122e+05 5.121525e+09 104 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 66355200 7.077888e+09 2.312215e+04 3.532854e+01 2.427826e+06 5.613668e+10 105 0b0b0ce8 7372800 2.621440e+08 1.036998e+03 4.343669e+01 1.078478e+05 1.120342e+08 104 4220e23d 29491200 2.097152e+09 7.010229e+03 3.598880e+01 7.360740e+05 5.160183e+09 105 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.idgraf000066400000000000000000000216221507764646700300310ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.052061e+03 3.198115e+01 6.838395e+04 7.201055e+07 65 4220e23d 29491200 2.097152e+09 7.092203e+03 4.667104e+02 6.028372e+05 4.293959e+09 85 492beed5 66355200 7.077888e+09 2.348390e+04 1.879558e+03 2.230970e+06 5.272750e+10 95 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.052063e+03 4.974434e+01 7.680058e+04 8.097966e+07 73 4220e23d 29491200 2.097152e+09 7.169429e+03 6.510141e+02 6.165709e+05 4.456910e+09 86 492beed5 66355200 7.077888e+09 2.369721e+04 2.666656e+03 2.203840e+06 5.288620e+10 93 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.073351e+03 1.039589e+02 5.796097e+04 6.279609e+07 54 4220e23d 29491200 2.097152e+09 7.178253e+03 6.674450e+02 6.245080e+05 4.521634e+09 87 492beed5 66355200 7.077888e+09 2.322028e+04 3.606800e+02 2.252367e+06 5.231319e+10 97 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.047275e+03 4.810046e+01 6.074194e+04 6.374769e+07 58 4220e23d 29491200 2.097152e+09 7.215871e+03 7.571281e+02 6.277808e+05 4.579858e+09 87 492beed5 66355200 7.077888e+09 2.323291e+04 1.169036e+03 2.230359e+06 5.194892e+10 96 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.045464e+03 2.548321e+01 6.168239e+04 6.452506e+07 59 4220e23d 29491200 2.097152e+09 7.130284e+03 4.158059e+02 5.632924e+05 4.030093e+09 79 492beed5 66355200 7.077888e+09 2.322391e+04 7.530407e+02 2.090152e+06 4.859253e+10 90 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.058842e+03 8.984549e+01 6.353054e+04 6.775316e+07 60 4220e23d 29491200 2.097152e+09 7.197321e+03 6.902584e+02 6.549562e+05 4.757287e+09 91 492beed5 66355200 7.077888e+09 2.322727e+04 1.128695e+03 2.253045e+06 5.245566e+10 97 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.063382e+03 9.562944e+01 5.529587e+04 5.927619e+07 52 4220e23d 29491200 2.097152e+09 7.227464e+03 8.541890e+02 6.287894e+05 4.608031e+09 87 492beed5 66355200 7.077888e+09 2.322877e+04 9.079114e+02 2.253191e+06 5.241882e+10 97 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.057961e+03 6.059722e+01 5.289807e+04 5.614771e+07 50 4220e23d 29491200 2.097152e+09 7.169935e+03 6.166650e+02 5.592549e+05 4.039483e+09 78 492beed5 66355200 7.077888e+09 2.322622e+04 8.447450e+02 2.090360e+06 4.861539e+10 90 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 3.132122e+04 3.995607e+03 8.456730e+05 2.691857e+10 27 4220e23d 29491200 2.097152e+09 2.241875e+05 7.780157e+03 6.053063e+06 1.358656e+12 27 492beed5 66355200 7.077888e+09 7.222063e+05 7.344712e+03 1.661074e+07 1.199762e+13 23 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.mirage000066400000000000000000000100231507764646700300320ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 2.783376e+04 1.016266e+03 2.254534e+06 6.283582e+10 81 492beed5 66355200 7.077888e+09 7.068870e+05 1.582112e+04 5.725785e+07 4.049511e+13 81 4220e23d 29491200 2.097152e+09 2.135531e+05 4.787239e+03 1.729780e+07 3.695855e+12 81 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.040745e+03 1.710737e+01 1.040745e+05 1.083442e+08 100 492beed5 66355200 7.077888e+09 2.322675e+04 6.514638e+01 2.438809e+06 5.664606e+10 105 4220e23d 29491200 2.097152e+09 7.042883e+03 4.736092e+01 7.395027e+05 5.208467e+09 105 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.057967e+03 4.209841e+01 1.057967e+05 1.121067e+08 100 492beed5 66355200 7.077888e+09 2.322865e+04 8.861437e+01 2.439008e+06 5.665569e+10 105 4220e23d 29491200 2.097152e+09 7.053091e+03 5.410169e+01 7.405746e+05 5.223647e+09 105 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 7372800 2.621440e+08 1.050834e+03 7.708100e+01 1.019309e+05 1.076889e+08 97 492beed5 66355200 7.077888e+09 2.323864e+04 5.619683e+01 2.440057e+06 5.670394e+10 105 4220e23d 29491200 2.097152e+09 7.040571e+03 3.296604e+01 7.392600e+05 5.204926e+09 105 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dgemm_gemm.sirocco000066400000000000000000000130371507764646700302370ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 66355200 7.077888e+09 6.467396e+03 5.820387e+02 1.403425e+06 9.150018e+09 217 0b0b0ce8 7372800 2.621440e+08 2.828637e+02 4.132770e+01 2.376055e+04 6.864469e+06 84 4220e23d 29491200 2.097152e+09 2.091138e+03 2.430963e+02 3.764048e+05 7.977516e+08 180 87a7dc42 149299200 2.388787e+10 2.171545e+04 9.979353e+02 3.431041e+06 7.466394e+10 158 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 66355200 7.077888e+09 6.530201e+03 6.982602e+02 1.214617e+06 8.022384e+09 186 0b0b0ce8 7372800 2.621440e+08 2.596160e+02 3.720670e+01 2.907699e+04 7.703898e+06 112 4220e23d 29491200 2.097152e+09 2.068075e+03 2.561461e+02 4.156832e+05 8.728519e+08 201 87a7dc42 149299200 2.388787e+10 2.178854e+04 1.485331e+03 3.355435e+06 7.344977e+10 154 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 66355200 7.077888e+09 6.594324e+03 6.341124e+02 1.384808e+06 9.216313e+09 210 0b0b0ce8 7372800 2.621440e+08 2.592059e+02 3.728165e+01 2.773503e+04 7.337807e+06 107 4220e23d 29491200 2.097152e+09 2.149687e+03 2.853500e+02 3.847940e+05 8.417616e+08 179 87a7dc42 149299200 2.388787e+10 2.210351e+04 9.525598e+02 3.426044e+06 7.586825e+10 155 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 66355200 7.077888e+09 6.615698e+03 6.959563e+02 1.210673e+06 8.098082e+09 183 0b0b0ce8 7372800 2.621440e+08 2.665077e+02 3.721734e+01 3.278045e+04 8.906615e+06 123 4220e23d 29491200 2.097152e+09 2.090283e+03 2.730830e+02 4.285080e+05 9.109906e+08 205 87a7dc42 149299200 2.388787e+10 2.206407e+04 1.175820e+03 3.397867e+06 7.518367e+10 154 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 66355200 7.077888e+09 2.793361e+05 4.545353e+04 2.039154e+07 5.846913e+12 73 0b0b0ce8 7372800 2.621440e+08 1.003329e+04 9.763114e+02 3.471519e+06 3.516056e+10 346 4220e23d 29491200 2.097152e+09 8.266143e+04 1.577004e+04 6.860899e+06 5.877733e+11 83 87a7dc42 149299200 2.388787e+10 9.422627e+05 1.729617e+05 1.517043e+08 1.477617e+14 161 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.attila000066400000000000000000000100431507764646700314210ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.687897e+05 8.023245e+03 3.110794e+08 5.262564e+13 1843 f0ac7beb 9830400 0.000000e+00 5.125521e+04 2.656019e+03 7.375625e+07 3.790543e+12 1439 d46431bb 2457600 0.000000e+00 6.821106e+03 3.878220e+02 1.100926e+07 7.533811e+10 1614 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.852261e+03 2.067645e+02 2.783335e+07 1.630914e+11 4756 f0ac7beb 9830400 0.000000e+00 1.831142e+03 6.447275e+01 9.274735e+06 1.700441e+10 5065 d46431bb 2457600 0.000000e+00 2.556618e+02 1.838263e+01 1.252232e+06 3.218030e+08 4898 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.846675e+03 2.141855e+02 2.732736e+07 1.599886e+11 4674 f0ac7beb 9830400 0.000000e+00 1.834114e+03 5.380375e+01 9.566740e+06 1.756159e+10 5216 d46431bb 2457600 0.000000e+00 2.610266e+02 2.020042e+01 1.246402e+06 3.272925e+08 4775 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.840318e+03 1.756302e+02 2.921911e+07 1.708032e+11 5003 f0ac7beb 9830400 0.000000e+00 1.840833e+03 4.879997e+01 9.542881e+06 1.757920e+10 5184 d46431bb 2457600 0.000000e+00 2.617920e+02 1.853601e+01 1.281472e+06 3.371609e+08 4895 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.idgraf000066400000000000000000000217051507764646700314060ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.901996e+03 2.140574e+02 2.574451e+07 1.521439e+11 4362 f0ac7beb 9830400 0.000000e+00 1.855425e+03 1.035707e+02 7.464374e+06 1.389274e+10 4023 d46431bb 2457600 0.000000e+00 2.667843e+02 3.133790e+01 9.321442e+05 2.521128e+08 3494 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.924812e+03 2.030546e+02 2.715934e+07 1.611030e+11 4584 f0ac7beb 9830400 0.000000e+00 1.850857e+03 1.138114e+02 6.774137e+06 1.258537e+10 3660 d46431bb 2457600 0.000000e+00 2.743267e+02 3.237528e+01 7.903352e+05 2.198298e+08 2881 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.905959e+03 2.324106e+02 2.885061e+07 1.706544e+11 4885 f0ac7beb 9830400 0.000000e+00 1.844033e+03 9.904039e+01 7.516278e+06 1.390024e+10 4076 d46431bb 2457600 0.000000e+00 2.662813e+02 2.651200e+01 1.098144e+06 2.953140e+08 4124 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.905860e+03 2.088868e+02 2.619840e+07 1.549176e+11 4436 f0ac7beb 9830400 0.000000e+00 1.843182e+03 9.714398e+01 7.671323e+06 1.417892e+10 4162 d46431bb 2457600 0.000000e+00 2.666213e+02 3.154593e+01 1.003829e+06 2.713890e+08 3765 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.916743e+03 2.291447e+02 2.686793e+07 1.592091e+11 4541 f0ac7beb 9830400 0.000000e+00 1.837574e+03 9.255327e+01 7.197777e+06 1.326000e+10 3917 d46431bb 2457600 0.000000e+00 2.645367e+02 2.904285e+01 1.078252e+06 2.886751e+08 4076 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.776444e+05 1.603881e+03 1.085407e+08 1.928322e+13 611 f0ac7beb 9830400 0.000000e+00 5.438487e+04 1.553469e+03 1.598915e+07 8.702776e+11 294 d46431bb 2457600 0.000000e+00 6.892168e+03 1.879454e+02 4.218007e+06 2.909283e+10 612 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908798e+03 2.191022e+02 2.432652e+07 1.439381e+11 4117 f0ac7beb 9830400 0.000000e+00 1.870298e+03 1.158137e+02 6.306645e+06 1.184053e+10 3372 d46431bb 2457600 0.000000e+00 2.622005e+02 3.221213e+01 7.630036e+05 2.030794e+08 2910 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c # not multiple-regression-base 0 nan nan nan # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908890e+03 2.133901e+02 2.731089e+07 1.615875e+11 4622 f0ac7beb 9830400 0.000000e+00 1.853662e+03 1.234628e+02 6.493379e+06 1.208993e+10 3503 d46431bb 2457600 0.000000e+00 2.672063e+02 3.249622e+01 7.262666e+05 1.969332e+08 2718 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908429e+03 2.149126e+02 2.371053e+07 1.402773e+11 4013 f0ac7beb 9830400 0.000000e+00 1.855601e+03 1.161756e+02 6.509447e+06 1.212628e+10 3508 d46431bb 2457600 0.000000e+00 2.697690e+02 3.186509e+01 7.013994e+05 1.918558e+08 2600 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.mirage000066400000000000000000000100421507764646700314060ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 5.319005e+04 1.072845e+03 1.074439e+08 5.717271e+12 2020 24c84a50 22118400 0.000000e+00 1.747556e+05 3.288616e+03 2.457064e+08 4.295378e+13 1406 d46431bb 2457600 0.000000e+00 6.731248e+03 2.017842e+02 1.758875e+07 1.185006e+11 2613 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.857771e+03 5.953793e+01 1.442559e+07 2.682697e+10 7765 24c84a50 22118400 0.000000e+00 5.825821e+03 1.536397e+02 3.023019e+07 1.762382e+11 5189 d46431bb 2457600 0.000000e+00 2.626388e+02 2.130047e+01 1.891262e+06 4.999858e+08 7201 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.841710e+03 6.898710e+01 1.448873e+07 2.672149e+10 7867 24c84a50 22118400 0.000000e+00 5.866678e+03 1.842980e+02 2.977339e+07 1.748433e+11 5075 d46431bb 2457600 0.000000e+00 2.614108e+02 2.029949e+01 1.936531e+06 5.092829e+08 7408 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.853277e+03 6.983878e+01 1.439996e+07 2.672502e+10 7770 24c84a50 22118400 0.000000e+00 5.858635e+03 1.761006e+02 3.008995e+07 1.764453e+11 5136 d46431bb 2457600 0.000000e+00 2.701366e+02 1.779276e+01 1.899060e+06 5.152311e+08 7030 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm.sirocco000066400000000000000000000130661507764646700316140ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.754881e+03 1.567907e+02 8.516439e+06 1.506464e+10 4853 d46431bb 2457600 0.000000e+00 9.227862e+01 1.339393e+01 6.585925e+05 6.205436e+07 7137 f0ac7beb 9830400 0.000000e+00 5.560171e+02 4.481480e+01 2.452035e+06 1.372230e+09 4410 8cfc3ba0 49766400 0.000000e+00 5.688840e+03 4.278238e+02 1.962081e+07 1.122509e+11 3449 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.725012e+03 1.556789e+02 8.775134e+06 1.526049e+10 5087 d46431bb 2457600 0.000000e+00 9.099306e+01 1.290433e+01 7.117477e+05 6.606663e+07 7822 f0ac7beb 9830400 0.000000e+00 5.497124e+02 4.364744e+01 2.308242e+06 1.276869e+09 4199 8cfc3ba0 49766400 0.000000e+00 5.591076e+03 4.188165e+02 1.997692e+07 1.123192e+11 3573 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.724207e+03 1.559700e+02 8.529651e+06 1.482723e+10 4947 d46431bb 2457600 0.000000e+00 9.395983e+01 1.410875e+01 5.884704e+05 5.653928e+07 6263 f0ac7beb 9830400 0.000000e+00 5.531811e+02 3.935565e+01 3.264875e+06 1.815209e+09 5902 8cfc3ba0 49766400 0.000000e+00 5.682607e+03 4.627422e+02 2.006529e+07 1.147792e+11 3531 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 6.659236e+04 1.142300e+04 1.507651e+08 1.033522e+13 2264 d46431bb 2457600 0.000000e+00 3.623237e+03 8.721045e+02 1.668138e+07 6.394225e+10 4604 f0ac7beb 9830400 0.000000e+00 2.355764e+04 4.984182e+03 4.405279e+07 1.084235e+12 1870 8cfc3ba0 49766400 0.000000e+00 2.164742e+05 3.673582e+04 3.325044e+08 7.405148e+13 1536 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.746747e+03 1.539679e+02 8.628932e+06 1.518967e+10 4940 d46431bb 2457600 0.000000e+00 9.539483e+01 1.447066e+01 7.032507e+05 6.863017e+07 7372 f0ac7beb 9830400 0.000000e+00 5.601014e+02 3.783630e+01 3.218342e+06 1.810824e+09 5746 8cfc3ba0 49766400 0.000000e+00 5.710157e+03 4.302245e+02 2.027106e+07 1.164080e+11 3550 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.attila000066400000000000000000000100431507764646700326050ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.687897e+05 8.023245e+03 3.110794e+08 5.262564e+13 1843 f0ac7beb 9830400 0.000000e+00 5.125521e+04 2.656019e+03 7.375625e+07 3.790543e+12 1439 d46431bb 2457600 0.000000e+00 6.821106e+03 3.878220e+02 1.100926e+07 7.533811e+10 1614 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.852261e+03 2.067645e+02 2.783335e+07 1.630914e+11 4756 f0ac7beb 9830400 0.000000e+00 1.831142e+03 6.447275e+01 9.274735e+06 1.700441e+10 5065 d46431bb 2457600 0.000000e+00 2.556618e+02 1.838263e+01 1.252232e+06 3.218030e+08 4898 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.846675e+03 2.141855e+02 2.732736e+07 1.599886e+11 4674 f0ac7beb 9830400 0.000000e+00 1.834114e+03 5.380375e+01 9.566740e+06 1.756159e+10 5216 d46431bb 2457600 0.000000e+00 2.610266e+02 2.020042e+01 1.246402e+06 3.272925e+08 4775 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.840318e+03 1.756302e+02 2.921911e+07 1.708032e+11 5003 f0ac7beb 9830400 0.000000e+00 1.840833e+03 4.879997e+01 9.542881e+06 1.757920e+10 5184 d46431bb 2457600 0.000000e+00 2.617920e+02 1.853601e+01 1.281472e+06 3.371609e+08 4895 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.idgraf000066400000000000000000000217051507764646700325720ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.901996e+03 2.140574e+02 2.574451e+07 1.521439e+11 4362 f0ac7beb 9830400 0.000000e+00 1.855425e+03 1.035707e+02 7.464374e+06 1.389274e+10 4023 d46431bb 2457600 0.000000e+00 2.667843e+02 3.133790e+01 9.321442e+05 2.521128e+08 3494 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.924812e+03 2.030546e+02 2.715934e+07 1.611030e+11 4584 f0ac7beb 9830400 0.000000e+00 1.850857e+03 1.138114e+02 6.774137e+06 1.258537e+10 3660 d46431bb 2457600 0.000000e+00 2.743267e+02 3.237528e+01 7.903352e+05 2.198298e+08 2881 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.905959e+03 2.324106e+02 2.885061e+07 1.706544e+11 4885 f0ac7beb 9830400 0.000000e+00 1.844033e+03 9.904039e+01 7.516278e+06 1.390024e+10 4076 d46431bb 2457600 0.000000e+00 2.662813e+02 2.651200e+01 1.098144e+06 2.953140e+08 4124 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.905860e+03 2.088868e+02 2.619840e+07 1.549176e+11 4436 f0ac7beb 9830400 0.000000e+00 1.843182e+03 9.714398e+01 7.671323e+06 1.417892e+10 4162 d46431bb 2457600 0.000000e+00 2.666213e+02 3.154593e+01 1.003829e+06 2.713890e+08 3765 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.916743e+03 2.291447e+02 2.686793e+07 1.592091e+11 4541 f0ac7beb 9830400 0.000000e+00 1.837574e+03 9.255327e+01 7.197777e+06 1.326000e+10 3917 d46431bb 2457600 0.000000e+00 2.645367e+02 2.904285e+01 1.078252e+06 2.886751e+08 4076 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.776444e+05 1.603881e+03 1.085407e+08 1.928322e+13 611 f0ac7beb 9830400 0.000000e+00 5.438487e+04 1.553469e+03 1.598915e+07 8.702776e+11 294 d46431bb 2457600 0.000000e+00 6.892168e+03 1.879454e+02 4.218007e+06 2.909283e+10 612 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908798e+03 2.191022e+02 2.432652e+07 1.439381e+11 4117 f0ac7beb 9830400 0.000000e+00 1.870298e+03 1.158137e+02 6.306645e+06 1.184053e+10 3372 d46431bb 2457600 0.000000e+00 2.622005e+02 3.221213e+01 7.630036e+05 2.030794e+08 2910 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c # not multiple-regression-base 0 nan nan nan # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908890e+03 2.133901e+02 2.731089e+07 1.615875e+11 4622 f0ac7beb 9830400 0.000000e+00 1.853662e+03 1.234628e+02 6.493379e+06 1.208993e+10 3503 d46431bb 2457600 0.000000e+00 2.672063e+02 3.249622e+01 7.262666e+05 1.969332e+08 2718 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908429e+03 2.149126e+02 2.371053e+07 1.402773e+11 4013 f0ac7beb 9830400 0.000000e+00 1.855601e+03 1.161756e+02 6.509447e+06 1.212628e+10 3508 d46431bb 2457600 0.000000e+00 2.697690e+02 3.186509e+01 7.013994e+05 1.918558e+08 2600 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.mirage000066400000000000000000000100421507764646700325720ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 5.319005e+04 1.072845e+03 1.074439e+08 5.717271e+12 2020 24c84a50 22118400 0.000000e+00 1.747556e+05 3.288616e+03 2.457064e+08 4.295378e+13 1406 d46431bb 2457600 0.000000e+00 6.731248e+03 2.017842e+02 1.758875e+07 1.185006e+11 2613 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.857771e+03 5.953793e+01 1.442559e+07 2.682697e+10 7765 24c84a50 22118400 0.000000e+00 5.825821e+03 1.536397e+02 3.023019e+07 1.762382e+11 5189 d46431bb 2457600 0.000000e+00 2.626388e+02 2.130047e+01 1.891262e+06 4.999858e+08 7201 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.841710e+03 6.898710e+01 1.448873e+07 2.672149e+10 7867 24c84a50 22118400 0.000000e+00 5.866678e+03 1.842980e+02 2.977339e+07 1.748433e+11 5075 d46431bb 2457600 0.000000e+00 2.614108e+02 2.029949e+01 1.936531e+06 5.092829e+08 7408 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.853277e+03 6.983878e+01 1.439996e+07 2.672502e+10 7770 24c84a50 22118400 0.000000e+00 5.858635e+03 1.761006e+02 3.008995e+07 1.764453e+11 5136 d46431bb 2457600 0.000000e+00 2.701366e+02 1.779276e+01 1.899060e+06 5.152311e+08 7030 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_atlas.sirocco000066400000000000000000000130661507764646700330000ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.754881e+03 1.567907e+02 8.516439e+06 1.506464e+10 4853 d46431bb 2457600 0.000000e+00 9.227862e+01 1.339393e+01 6.585925e+05 6.205436e+07 7137 f0ac7beb 9830400 0.000000e+00 5.560171e+02 4.481480e+01 2.452035e+06 1.372230e+09 4410 8cfc3ba0 49766400 0.000000e+00 5.688840e+03 4.278238e+02 1.962081e+07 1.122509e+11 3449 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.725012e+03 1.556789e+02 8.775134e+06 1.526049e+10 5087 d46431bb 2457600 0.000000e+00 9.099306e+01 1.290433e+01 7.117477e+05 6.606663e+07 7822 f0ac7beb 9830400 0.000000e+00 5.497124e+02 4.364744e+01 2.308242e+06 1.276869e+09 4199 8cfc3ba0 49766400 0.000000e+00 5.591076e+03 4.188165e+02 1.997692e+07 1.123192e+11 3573 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.724207e+03 1.559700e+02 8.529651e+06 1.482723e+10 4947 d46431bb 2457600 0.000000e+00 9.395983e+01 1.410875e+01 5.884704e+05 5.653928e+07 6263 f0ac7beb 9830400 0.000000e+00 5.531811e+02 3.935565e+01 3.264875e+06 1.815209e+09 5902 8cfc3ba0 49766400 0.000000e+00 5.682607e+03 4.627422e+02 2.006529e+07 1.147792e+11 3531 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 6.659236e+04 1.142300e+04 1.507651e+08 1.033522e+13 2264 d46431bb 2457600 0.000000e+00 3.623237e+03 8.721045e+02 1.668138e+07 6.394225e+10 4604 f0ac7beb 9830400 0.000000e+00 2.355764e+04 4.984182e+03 4.405279e+07 1.084235e+12 1870 8cfc3ba0 49766400 0.000000e+00 2.164742e+05 3.673582e+04 3.325044e+08 7.405148e+13 1536 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.746747e+03 1.539679e+02 8.628932e+06 1.518967e+10 4940 d46431bb 2457600 0.000000e+00 9.539483e+01 1.447066e+01 7.032507e+05 6.863017e+07 7372 f0ac7beb 9830400 0.000000e+00 5.601014e+02 3.783630e+01 3.218342e+06 1.810824e+09 5746 8cfc3ba0 49766400 0.000000e+00 5.710157e+03 4.302245e+02 2.027106e+07 1.164080e+11 3550 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.attila000066400000000000000000000100431507764646700324510ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.687897e+05 8.023245e+03 3.110794e+08 5.262564e+13 1843 f0ac7beb 9830400 0.000000e+00 5.125521e+04 2.656019e+03 7.375625e+07 3.790543e+12 1439 d46431bb 2457600 0.000000e+00 6.821106e+03 3.878220e+02 1.100926e+07 7.533811e+10 1614 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.852261e+03 2.067645e+02 2.783335e+07 1.630914e+11 4756 f0ac7beb 9830400 0.000000e+00 1.831142e+03 6.447275e+01 9.274735e+06 1.700441e+10 5065 d46431bb 2457600 0.000000e+00 2.556618e+02 1.838263e+01 1.252232e+06 3.218030e+08 4898 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.846675e+03 2.141855e+02 2.732736e+07 1.599886e+11 4674 f0ac7beb 9830400 0.000000e+00 1.834114e+03 5.380375e+01 9.566740e+06 1.756159e+10 5216 d46431bb 2457600 0.000000e+00 2.610266e+02 2.020042e+01 1.246402e+06 3.272925e+08 4775 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.840318e+03 1.756302e+02 2.921911e+07 1.708032e+11 5003 f0ac7beb 9830400 0.000000e+00 1.840833e+03 4.879997e+01 9.542881e+06 1.757920e+10 5184 d46431bb 2457600 0.000000e+00 2.617920e+02 1.853601e+01 1.281472e+06 3.371609e+08 4895 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.idgraf000066400000000000000000000217051507764646700324360ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.901996e+03 2.140574e+02 2.574451e+07 1.521439e+11 4362 f0ac7beb 9830400 0.000000e+00 1.855425e+03 1.035707e+02 7.464374e+06 1.389274e+10 4023 d46431bb 2457600 0.000000e+00 2.667843e+02 3.133790e+01 9.321442e+05 2.521128e+08 3494 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.924812e+03 2.030546e+02 2.715934e+07 1.611030e+11 4584 f0ac7beb 9830400 0.000000e+00 1.850857e+03 1.138114e+02 6.774137e+06 1.258537e+10 3660 d46431bb 2457600 0.000000e+00 2.743267e+02 3.237528e+01 7.903352e+05 2.198298e+08 2881 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.905959e+03 2.324106e+02 2.885061e+07 1.706544e+11 4885 f0ac7beb 9830400 0.000000e+00 1.844033e+03 9.904039e+01 7.516278e+06 1.390024e+10 4076 d46431bb 2457600 0.000000e+00 2.662813e+02 2.651200e+01 1.098144e+06 2.953140e+08 4124 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.905860e+03 2.088868e+02 2.619840e+07 1.549176e+11 4436 f0ac7beb 9830400 0.000000e+00 1.843182e+03 9.714398e+01 7.671323e+06 1.417892e+10 4162 d46431bb 2457600 0.000000e+00 2.666213e+02 3.154593e+01 1.003829e+06 2.713890e+08 3765 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.916743e+03 2.291447e+02 2.686793e+07 1.592091e+11 4541 f0ac7beb 9830400 0.000000e+00 1.837574e+03 9.255327e+01 7.197777e+06 1.326000e+10 3917 d46431bb 2457600 0.000000e+00 2.645367e+02 2.904285e+01 1.078252e+06 2.886751e+08 4076 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.776444e+05 1.603881e+03 1.085407e+08 1.928322e+13 611 f0ac7beb 9830400 0.000000e+00 5.438487e+04 1.553469e+03 1.598915e+07 8.702776e+11 294 d46431bb 2457600 0.000000e+00 6.892168e+03 1.879454e+02 4.218007e+06 2.909283e+10 612 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908798e+03 2.191022e+02 2.432652e+07 1.439381e+11 4117 f0ac7beb 9830400 0.000000e+00 1.870298e+03 1.158137e+02 6.306645e+06 1.184053e+10 3372 d46431bb 2457600 0.000000e+00 2.622005e+02 3.221213e+01 7.630036e+05 2.030794e+08 2910 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c # not multiple-regression-base 0 nan nan nan # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908890e+03 2.133901e+02 2.731089e+07 1.615875e+11 4622 f0ac7beb 9830400 0.000000e+00 1.853662e+03 1.234628e+02 6.493379e+06 1.208993e+10 3503 d46431bb 2457600 0.000000e+00 2.672063e+02 3.249622e+01 7.262666e+05 1.969332e+08 2718 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908429e+03 2.149126e+02 2.371053e+07 1.402773e+11 4013 f0ac7beb 9830400 0.000000e+00 1.855601e+03 1.161756e+02 6.509447e+06 1.212628e+10 3508 d46431bb 2457600 0.000000e+00 2.697690e+02 3.186509e+01 7.013994e+05 1.918558e+08 2600 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.mirage000066400000000000000000000100421507764646700324360ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 5.319005e+04 1.072845e+03 1.074439e+08 5.717271e+12 2020 24c84a50 22118400 0.000000e+00 1.747556e+05 3.288616e+03 2.457064e+08 4.295378e+13 1406 d46431bb 2457600 0.000000e+00 6.731248e+03 2.017842e+02 1.758875e+07 1.185006e+11 2613 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.857771e+03 5.953793e+01 1.442559e+07 2.682697e+10 7765 24c84a50 22118400 0.000000e+00 5.825821e+03 1.536397e+02 3.023019e+07 1.762382e+11 5189 d46431bb 2457600 0.000000e+00 2.626388e+02 2.130047e+01 1.891262e+06 4.999858e+08 7201 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.841710e+03 6.898710e+01 1.448873e+07 2.672149e+10 7867 24c84a50 22118400 0.000000e+00 5.866678e+03 1.842980e+02 2.977339e+07 1.748433e+11 5075 d46431bb 2457600 0.000000e+00 2.614108e+02 2.029949e+01 1.936531e+06 5.092829e+08 7408 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.853277e+03 6.983878e+01 1.439996e+07 2.672502e+10 7770 24c84a50 22118400 0.000000e+00 5.858635e+03 1.761006e+02 3.008995e+07 1.764453e+11 5136 d46431bb 2457600 0.000000e+00 2.701366e+02 1.779276e+01 1.899060e+06 5.152311e+08 7030 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_goto.sirocco000066400000000000000000000130661507764646700326440ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.754881e+03 1.567907e+02 8.516439e+06 1.506464e+10 4853 d46431bb 2457600 0.000000e+00 9.227862e+01 1.339393e+01 6.585925e+05 6.205436e+07 7137 f0ac7beb 9830400 0.000000e+00 5.560171e+02 4.481480e+01 2.452035e+06 1.372230e+09 4410 8cfc3ba0 49766400 0.000000e+00 5.688840e+03 4.278238e+02 1.962081e+07 1.122509e+11 3449 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.725012e+03 1.556789e+02 8.775134e+06 1.526049e+10 5087 d46431bb 2457600 0.000000e+00 9.099306e+01 1.290433e+01 7.117477e+05 6.606663e+07 7822 f0ac7beb 9830400 0.000000e+00 5.497124e+02 4.364744e+01 2.308242e+06 1.276869e+09 4199 8cfc3ba0 49766400 0.000000e+00 5.591076e+03 4.188165e+02 1.997692e+07 1.123192e+11 3573 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.724207e+03 1.559700e+02 8.529651e+06 1.482723e+10 4947 d46431bb 2457600 0.000000e+00 9.395983e+01 1.410875e+01 5.884704e+05 5.653928e+07 6263 f0ac7beb 9830400 0.000000e+00 5.531811e+02 3.935565e+01 3.264875e+06 1.815209e+09 5902 8cfc3ba0 49766400 0.000000e+00 5.682607e+03 4.627422e+02 2.006529e+07 1.147792e+11 3531 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 6.659236e+04 1.142300e+04 1.507651e+08 1.033522e+13 2264 d46431bb 2457600 0.000000e+00 3.623237e+03 8.721045e+02 1.668138e+07 6.394225e+10 4604 f0ac7beb 9830400 0.000000e+00 2.355764e+04 4.984182e+03 4.405279e+07 1.084235e+12 1870 8cfc3ba0 49766400 0.000000e+00 2.164742e+05 3.673582e+04 3.325044e+08 7.405148e+13 1536 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.746747e+03 1.539679e+02 8.628932e+06 1.518967e+10 4940 d46431bb 2457600 0.000000e+00 9.539483e+01 1.447066e+01 7.032507e+05 6.863017e+07 7372 f0ac7beb 9830400 0.000000e+00 5.601014e+02 3.783630e+01 3.218342e+06 1.810824e+09 5746 8cfc3ba0 49766400 0.000000e+00 5.710157e+03 4.302245e+02 2.027106e+07 1.164080e+11 3550 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.attila000066400000000000000000000100431507764646700333040ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.687897e+05 8.023245e+03 3.110794e+08 5.262564e+13 1843 f0ac7beb 9830400 0.000000e+00 5.125521e+04 2.656019e+03 7.375625e+07 3.790543e+12 1439 d46431bb 2457600 0.000000e+00 6.821106e+03 3.878220e+02 1.100926e+07 7.533811e+10 1614 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.852261e+03 2.067645e+02 2.783335e+07 1.630914e+11 4756 f0ac7beb 9830400 0.000000e+00 1.831142e+03 6.447275e+01 9.274735e+06 1.700441e+10 5065 d46431bb 2457600 0.000000e+00 2.556618e+02 1.838263e+01 1.252232e+06 3.218030e+08 4898 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.846675e+03 2.141855e+02 2.732736e+07 1.599886e+11 4674 f0ac7beb 9830400 0.000000e+00 1.834114e+03 5.380375e+01 9.566740e+06 1.756159e+10 5216 d46431bb 2457600 0.000000e+00 2.610266e+02 2.020042e+01 1.246402e+06 3.272925e+08 4775 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.840318e+03 1.756302e+02 2.921911e+07 1.708032e+11 5003 f0ac7beb 9830400 0.000000e+00 1.840833e+03 4.879997e+01 9.542881e+06 1.757920e+10 5184 d46431bb 2457600 0.000000e+00 2.617920e+02 1.853601e+01 1.281472e+06 3.371609e+08 4895 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.idgraf000066400000000000000000000217051507764646700332710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.901996e+03 2.140574e+02 2.574451e+07 1.521439e+11 4362 f0ac7beb 9830400 0.000000e+00 1.855425e+03 1.035707e+02 7.464374e+06 1.389274e+10 4023 d46431bb 2457600 0.000000e+00 2.667843e+02 3.133790e+01 9.321442e+05 2.521128e+08 3494 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.924812e+03 2.030546e+02 2.715934e+07 1.611030e+11 4584 f0ac7beb 9830400 0.000000e+00 1.850857e+03 1.138114e+02 6.774137e+06 1.258537e+10 3660 d46431bb 2457600 0.000000e+00 2.743267e+02 3.237528e+01 7.903352e+05 2.198298e+08 2881 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.905959e+03 2.324106e+02 2.885061e+07 1.706544e+11 4885 f0ac7beb 9830400 0.000000e+00 1.844033e+03 9.904039e+01 7.516278e+06 1.390024e+10 4076 d46431bb 2457600 0.000000e+00 2.662813e+02 2.651200e+01 1.098144e+06 2.953140e+08 4124 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.905860e+03 2.088868e+02 2.619840e+07 1.549176e+11 4436 f0ac7beb 9830400 0.000000e+00 1.843182e+03 9.714398e+01 7.671323e+06 1.417892e+10 4162 d46431bb 2457600 0.000000e+00 2.666213e+02 3.154593e+01 1.003829e+06 2.713890e+08 3765 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.916743e+03 2.291447e+02 2.686793e+07 1.592091e+11 4541 f0ac7beb 9830400 0.000000e+00 1.837574e+03 9.255327e+01 7.197777e+06 1.326000e+10 3917 d46431bb 2457600 0.000000e+00 2.645367e+02 2.904285e+01 1.078252e+06 2.886751e+08 4076 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.776444e+05 1.603881e+03 1.085407e+08 1.928322e+13 611 f0ac7beb 9830400 0.000000e+00 5.438487e+04 1.553469e+03 1.598915e+07 8.702776e+11 294 d46431bb 2457600 0.000000e+00 6.892168e+03 1.879454e+02 4.218007e+06 2.909283e+10 612 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908798e+03 2.191022e+02 2.432652e+07 1.439381e+11 4117 f0ac7beb 9830400 0.000000e+00 1.870298e+03 1.158137e+02 6.306645e+06 1.184053e+10 3372 d46431bb 2457600 0.000000e+00 2.622005e+02 3.221213e+01 7.630036e+05 2.030794e+08 2910 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c # not multiple-regression-base 0 nan nan nan # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908890e+03 2.133901e+02 2.731089e+07 1.615875e+11 4622 f0ac7beb 9830400 0.000000e+00 1.853662e+03 1.234628e+02 6.493379e+06 1.208993e+10 3503 d46431bb 2457600 0.000000e+00 2.672063e+02 3.249622e+01 7.262666e+05 1.969332e+08 2718 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 5.908429e+03 2.149126e+02 2.371053e+07 1.402773e+11 4013 f0ac7beb 9830400 0.000000e+00 1.855601e+03 1.161756e+02 6.509447e+06 1.212628e+10 3508 d46431bb 2457600 0.000000e+00 2.697690e+02 3.186509e+01 7.013994e+05 1.918558e+08 2600 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.mirage000066400000000000000000000100421507764646700332710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 5.319005e+04 1.072845e+03 1.074439e+08 5.717271e+12 2020 24c84a50 22118400 0.000000e+00 1.747556e+05 3.288616e+03 2.457064e+08 4.295378e+13 1406 d46431bb 2457600 0.000000e+00 6.731248e+03 2.017842e+02 1.758875e+07 1.185006e+11 2613 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.857771e+03 5.953793e+01 1.442559e+07 2.682697e+10 7765 24c84a50 22118400 0.000000e+00 5.825821e+03 1.536397e+02 3.023019e+07 1.762382e+11 5189 d46431bb 2457600 0.000000e+00 2.626388e+02 2.130047e+01 1.891262e+06 4.999858e+08 7201 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.841710e+03 6.898710e+01 1.448873e+07 2.672149e+10 7867 24c84a50 22118400 0.000000e+00 5.866678e+03 1.842980e+02 2.977339e+07 1.748433e+11 5075 d46431bb 2457600 0.000000e+00 2.614108e+02 2.029949e+01 1.936531e+06 5.092829e+08 7408 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 9830400 0.000000e+00 1.853277e+03 6.983878e+01 1.439996e+07 2.672502e+10 7770 24c84a50 22118400 0.000000e+00 5.858635e+03 1.761006e+02 3.008995e+07 1.764453e+11 5136 d46431bb 2457600 0.000000e+00 2.701366e+02 1.779276e+01 1.899060e+06 5.152311e+08 7030 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_gemm_openblas.sirocco000066400000000000000000000130661507764646700334770ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.754881e+03 1.567907e+02 8.516439e+06 1.506464e+10 4853 d46431bb 2457600 0.000000e+00 9.227862e+01 1.339393e+01 6.585925e+05 6.205436e+07 7137 f0ac7beb 9830400 0.000000e+00 5.560171e+02 4.481480e+01 2.452035e+06 1.372230e+09 4410 8cfc3ba0 49766400 0.000000e+00 5.688840e+03 4.278238e+02 1.962081e+07 1.122509e+11 3449 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.725012e+03 1.556789e+02 8.775134e+06 1.526049e+10 5087 d46431bb 2457600 0.000000e+00 9.099306e+01 1.290433e+01 7.117477e+05 6.606663e+07 7822 f0ac7beb 9830400 0.000000e+00 5.497124e+02 4.364744e+01 2.308242e+06 1.276869e+09 4199 8cfc3ba0 49766400 0.000000e+00 5.591076e+03 4.188165e+02 1.997692e+07 1.123192e+11 3573 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.724207e+03 1.559700e+02 8.529651e+06 1.482723e+10 4947 d46431bb 2457600 0.000000e+00 9.395983e+01 1.410875e+01 5.884704e+05 5.653928e+07 6263 f0ac7beb 9830400 0.000000e+00 5.531811e+02 3.935565e+01 3.264875e+06 1.815209e+09 5902 8cfc3ba0 49766400 0.000000e+00 5.682607e+03 4.627422e+02 2.006529e+07 1.147792e+11 3531 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 6.659236e+04 1.142300e+04 1.507651e+08 1.033522e+13 2264 d46431bb 2457600 0.000000e+00 3.623237e+03 8.721045e+02 1.668138e+07 6.394225e+10 4604 f0ac7beb 9830400 0.000000e+00 2.355764e+04 4.984182e+03 4.405279e+07 1.084235e+12 1870 8cfc3ba0 49766400 0.000000e+00 2.164742e+05 3.673582e+04 3.325044e+08 7.405148e+13 1536 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 22118400 0.000000e+00 1.746747e+03 1.539679e+02 8.628932e+06 1.518967e+10 4940 d46431bb 2457600 0.000000e+00 9.539483e+01 1.447066e+01 7.032507e+05 6.863017e+07 7372 f0ac7beb 9830400 0.000000e+00 5.601014e+02 3.783630e+01 3.218342e+06 1.810824e+09 5746 8cfc3ba0 49766400 0.000000e+00 5.710157e+03 4.302245e+02 2.027106e+07 1.164080e+11 3550 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.attila000066400000000000000000000100121507764646700315770ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 2.515766e+05 2.096151e+04 2.515766e+06 6.373017e+11 10 afdd228b 3276800 0.000000e+00 7.350482e+04 4.292777e+03 9.555626e+05 7.047802e+10 13 cea37d6d 819200 0.000000e+00 9.586125e+03 1.023620e+03 2.108948e+05 2.044715e+09 22 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.990473e+04 4.071360e+03 1.118476e+06 7.845196e+10 16 afdd228b 3276800 0.000000e+00 2.992444e+04 7.760944e+02 4.787910e+05 1.433719e+10 16 cea37d6d 819200 0.000000e+00 9.620220e+03 2.335102e+02 1.058224e+05 1.018635e+09 11 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.793522e+04 8.600858e+02 6.793522e+05 4.615934e+10 10 afdd228b 3276800 0.000000e+00 2.989699e+04 1.490344e+03 3.587638e+05 1.075261e+10 12 cea37d6d 819200 0.000000e+00 9.974140e+03 1.055336e+03 1.097155e+05 1.106569e+09 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.962168e+04 1.952172e+02 1.322812e+06 9.209711e+10 19 afdd228b 3276800 0.000000e+00 3.047853e+04 4.777511e+01 4.571780e+05 1.393415e+10 15 cea37d6d 819200 0.000000e+00 1.119488e+04 2.171263e+03 1.231437e+05 1.430437e+09 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.idgraf000066400000000000000000000216221507764646700315660ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 2.127055e+05 1.216918e+04 3.190582e+06 6.808756e+11 15 afdd228b 3276800 0.000000e+00 6.346686e+04 7.329654e+02 6.346686e+05 4.028580e+10 10 cea37d6d 819200 0.000000e+00 7.969263e+03 1.770463e+02 1.354775e+05 1.080188e+09 17 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.656100e+04 6.943816e+03 1.471537e+06 1.281974e+11 17 afdd228b 3276800 0.000000e+00 3.567215e+04 3.302464e+03 3.567215e+05 1.283409e+10 10 cea37d6d 819200 0.000000e+00 1.101988e+04 5.146633e+02 1.101988e+05 1.217027e+09 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.935885e+04 6.351673e+03 3.935885e+05 1.589463e+10 10 cea37d6d 819200 0.000000e+00 1.194615e+04 1.359754e+03 1.194615e+05 1.445595e+09 10 617e5fe6 7372800 0.000000e+00 8.781176e+04 9.198610e+03 1.317176e+06 1.169328e+11 15 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.754335e+04 8.654029e+03 1.575780e+06 1.392972e+11 18 afdd228b 3276800 0.000000e+00 3.542725e+04 1.501284e+03 3.542725e+05 1.257344e+10 10 cea37d6d 819200 0.000000e+00 1.193774e+04 1.685032e+03 1.193774e+05 1.453490e+09 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.763521e+04 5.876858e+03 9.639873e+05 8.485914e+10 11 afdd228b 3276800 0.000000e+00 3.909159e+04 6.650440e+03 4.300075e+05 1.729619e+10 11 cea37d6d 819200 0.000000e+00 1.211577e+04 1.649480e+03 1.211577e+05 1.495126e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.616388e+04 4.981316e+03 1.550950e+06 1.340825e+11 18 afdd228b 3276800 0.000000e+00 3.647899e+04 2.965394e+03 4.377479e+05 1.607412e+10 12 cea37d6d 819200 0.000000e+00 1.073272e+04 1.010096e+02 1.073272e+05 1.152015e+09 10 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.786078e+04 7.200822e+03 1.317912e+06 1.165705e+11 15 afdd228b 3276800 0.000000e+00 3.795195e+04 3.399141e+03 3.795195e+05 1.451905e+10 10 cea37d6d 819200 0.000000e+00 1.163527e+04 1.023060e+03 1.163527e+05 1.364262e+09 10 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.814631e+04 6.725805e+03 1.498487e+06 1.328551e+11 17 cea37d6d 819200 0.000000e+00 1.170806e+04 1.094676e+03 1.170806e+05 1.382770e+09 10 afdd228b 3276800 0.000000e+00 4.283079e+04 7.621190e+03 4.283079e+05 1.892559e+10 10 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 9.172766e+04 1.075608e+04 1.375915e+06 1.279449e+11 15 cea37d6d 819200 0.000000e+00 1.117240e+04 8.447401e+02 1.117240e+05 1.255362e+09 10 afdd228b 3276800 0.000000e+00 3.472448e+04 1.278416e+03 3.819693e+05 1.328166e+10 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.mirage000066400000000000000000000100121507764646700315650ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 7.065042e+04 7.118479e+03 1.271707e+06 9.075877e+10 18 617e5fe6 7372800 0.000000e+00 2.321971e+05 3.386520e+04 2.554168e+06 6.056858e+11 11 cea37d6d 819200 0.000000e+00 8.644872e+03 1.175676e+03 4.581782e+05 4.034149e+09 53 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.469534e+04 1.243993e+03 9.714695e+05 3.374879e+10 28 617e5fe6 7372800 0.000000e+00 8.533007e+04 9.757927e+03 1.194621e+06 1.032701e+11 14 cea37d6d 819200 0.000000e+00 1.145973e+04 6.017234e+02 1.145973e+05 1.316874e+09 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.498011e+04 2.179531e+03 6.996022e+05 2.456717e+10 20 617e5fe6 7372800 0.000000e+00 8.442764e+04 6.770170e+03 1.350842e+06 1.147818e+11 16 cea37d6d 819200 0.000000e+00 1.080764e+04 2.151492e+02 1.188840e+05 1.285365e+09 11 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.506064e+04 2.956747e+03 6.310915e+05 2.228383e+10 18 617e5fe6 7372800 0.000000e+00 8.151052e+04 4.250155e+02 1.059637e+06 8.637388e+10 13 cea37d6d 819200 0.000000e+00 1.153062e+04 1.200141e+03 1.153062e+05 1.343956e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf.sirocco000066400000000000000000000130161507764646700317710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 1.778570e+05 1.735127e+04 1.778570e+06 3.193419e+11 10 cea37d6d 819200 0.000000e+00 5.904224e+03 6.575598e+02 5.668055e+05 3.388055e+09 96 afdd228b 3276800 0.000000e+00 4.953149e+04 6.709149e+03 6.439093e+05 3.247895e+10 13 25ebb669 16588800 0.000000e+00 7.801727e+05 1.214440e+05 7.801727e+06 6.234180e+12 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.552387e+04 5.714037e+03 1.054954e+06 5.919546e+10 19 cea37d6d 819200 0.000000e+00 9.707597e+03 9.439210e+02 9.707597e+04 9.512842e+08 10 afdd228b 3276800 0.000000e+00 2.633937e+04 3.608518e+03 3.950905e+05 1.060175e+10 15 25ebb669 16588800 0.000000e+00 1.397955e+05 9.676594e+03 1.537750e+06 2.160006e+11 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.675882e+04 6.232185e+03 1.248694e+06 7.172888e+10 22 cea37d6d 819200 0.000000e+00 9.541018e+03 9.285702e+02 9.541018e+04 9.189326e+08 10 afdd228b 3276800 0.000000e+00 2.651477e+04 2.554649e+03 3.181772e+05 8.514711e+09 12 25ebb669 16588800 0.000000e+00 1.382255e+05 7.304438e+03 1.382255e+06 1.915965e+11 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.624387e+04 5.549003e+03 8.436581e+05 4.791247e+10 15 cea37d6d 819200 0.000000e+00 9.661577e+03 7.114114e+02 9.661577e+04 9.385217e+08 10 afdd228b 3276800 0.000000e+00 2.574090e+04 2.071791e+03 5.148179e+05 1.333772e+10 20 25ebb669 16588800 0.000000e+00 1.361676e+05 1.958095e+03 1.770178e+06 2.410907e+11 13 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.895007e+04 8.369498e+03 7.663509e+05 4.608707e+10 13 cea37d6d 819200 0.000000e+00 9.910778e+03 1.200981e+03 9.910778e+04 9.966588e+08 10 afdd228b 3276800 0.000000e+00 2.572979e+04 2.095041e+03 5.917851e+05 1.532746e+10 23 25ebb669 16588800 0.000000e+00 1.422314e+05 1.728252e+04 1.422314e+06 2.052844e+11 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.attila000066400000000000000000000100121507764646700327630ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 2.515766e+05 2.096151e+04 2.515766e+06 6.373017e+11 10 afdd228b 3276800 0.000000e+00 7.350482e+04 4.292777e+03 9.555626e+05 7.047802e+10 13 cea37d6d 819200 0.000000e+00 9.586125e+03 1.023620e+03 2.108948e+05 2.044715e+09 22 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.990473e+04 4.071360e+03 1.118476e+06 7.845196e+10 16 afdd228b 3276800 0.000000e+00 2.992444e+04 7.760944e+02 4.787910e+05 1.433719e+10 16 cea37d6d 819200 0.000000e+00 9.620220e+03 2.335102e+02 1.058224e+05 1.018635e+09 11 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.793522e+04 8.600858e+02 6.793522e+05 4.615934e+10 10 afdd228b 3276800 0.000000e+00 2.989699e+04 1.490344e+03 3.587638e+05 1.075261e+10 12 cea37d6d 819200 0.000000e+00 9.974140e+03 1.055336e+03 1.097155e+05 1.106569e+09 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.962168e+04 1.952172e+02 1.322812e+06 9.209711e+10 19 afdd228b 3276800 0.000000e+00 3.047853e+04 4.777511e+01 4.571780e+05 1.393415e+10 15 cea37d6d 819200 0.000000e+00 1.119488e+04 2.171263e+03 1.231437e+05 1.430437e+09 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.idgraf000066400000000000000000000216221507764646700327520ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 2.127055e+05 1.216918e+04 3.190582e+06 6.808756e+11 15 afdd228b 3276800 0.000000e+00 6.346686e+04 7.329654e+02 6.346686e+05 4.028580e+10 10 cea37d6d 819200 0.000000e+00 7.969263e+03 1.770463e+02 1.354775e+05 1.080188e+09 17 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.656100e+04 6.943816e+03 1.471537e+06 1.281974e+11 17 afdd228b 3276800 0.000000e+00 3.567215e+04 3.302464e+03 3.567215e+05 1.283409e+10 10 cea37d6d 819200 0.000000e+00 1.101988e+04 5.146633e+02 1.101988e+05 1.217027e+09 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.935885e+04 6.351673e+03 3.935885e+05 1.589463e+10 10 cea37d6d 819200 0.000000e+00 1.194615e+04 1.359754e+03 1.194615e+05 1.445595e+09 10 617e5fe6 7372800 0.000000e+00 8.781176e+04 9.198610e+03 1.317176e+06 1.169328e+11 15 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.754335e+04 8.654029e+03 1.575780e+06 1.392972e+11 18 afdd228b 3276800 0.000000e+00 3.542725e+04 1.501284e+03 3.542725e+05 1.257344e+10 10 cea37d6d 819200 0.000000e+00 1.193774e+04 1.685032e+03 1.193774e+05 1.453490e+09 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.763521e+04 5.876858e+03 9.639873e+05 8.485914e+10 11 afdd228b 3276800 0.000000e+00 3.909159e+04 6.650440e+03 4.300075e+05 1.729619e+10 11 cea37d6d 819200 0.000000e+00 1.211577e+04 1.649480e+03 1.211577e+05 1.495126e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.616388e+04 4.981316e+03 1.550950e+06 1.340825e+11 18 afdd228b 3276800 0.000000e+00 3.647899e+04 2.965394e+03 4.377479e+05 1.607412e+10 12 cea37d6d 819200 0.000000e+00 1.073272e+04 1.010096e+02 1.073272e+05 1.152015e+09 10 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.786078e+04 7.200822e+03 1.317912e+06 1.165705e+11 15 afdd228b 3276800 0.000000e+00 3.795195e+04 3.399141e+03 3.795195e+05 1.451905e+10 10 cea37d6d 819200 0.000000e+00 1.163527e+04 1.023060e+03 1.163527e+05 1.364262e+09 10 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.814631e+04 6.725805e+03 1.498487e+06 1.328551e+11 17 cea37d6d 819200 0.000000e+00 1.170806e+04 1.094676e+03 1.170806e+05 1.382770e+09 10 afdd228b 3276800 0.000000e+00 4.283079e+04 7.621190e+03 4.283079e+05 1.892559e+10 10 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 9.172766e+04 1.075608e+04 1.375915e+06 1.279449e+11 15 cea37d6d 819200 0.000000e+00 1.117240e+04 8.447401e+02 1.117240e+05 1.255362e+09 10 afdd228b 3276800 0.000000e+00 3.472448e+04 1.278416e+03 3.819693e+05 1.328166e+10 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.mirage000066400000000000000000000100121507764646700327510ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 7.065042e+04 7.118479e+03 1.271707e+06 9.075877e+10 18 617e5fe6 7372800 0.000000e+00 2.321971e+05 3.386520e+04 2.554168e+06 6.056858e+11 11 cea37d6d 819200 0.000000e+00 8.644872e+03 1.175676e+03 4.581782e+05 4.034149e+09 53 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.469534e+04 1.243993e+03 9.714695e+05 3.374879e+10 28 617e5fe6 7372800 0.000000e+00 8.533007e+04 9.757927e+03 1.194621e+06 1.032701e+11 14 cea37d6d 819200 0.000000e+00 1.145973e+04 6.017234e+02 1.145973e+05 1.316874e+09 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.498011e+04 2.179531e+03 6.996022e+05 2.456717e+10 20 617e5fe6 7372800 0.000000e+00 8.442764e+04 6.770170e+03 1.350842e+06 1.147818e+11 16 cea37d6d 819200 0.000000e+00 1.080764e+04 2.151492e+02 1.188840e+05 1.285365e+09 11 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.506064e+04 2.956747e+03 6.310915e+05 2.228383e+10 18 617e5fe6 7372800 0.000000e+00 8.151052e+04 4.250155e+02 1.059637e+06 8.637388e+10 13 cea37d6d 819200 0.000000e+00 1.153062e+04 1.200141e+03 1.153062e+05 1.343956e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_atlas.sirocco000066400000000000000000000130161507764646700331550ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 1.778570e+05 1.735127e+04 1.778570e+06 3.193419e+11 10 cea37d6d 819200 0.000000e+00 5.904224e+03 6.575598e+02 5.668055e+05 3.388055e+09 96 afdd228b 3276800 0.000000e+00 4.953149e+04 6.709149e+03 6.439093e+05 3.247895e+10 13 25ebb669 16588800 0.000000e+00 7.801727e+05 1.214440e+05 7.801727e+06 6.234180e+12 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.552387e+04 5.714037e+03 1.054954e+06 5.919546e+10 19 cea37d6d 819200 0.000000e+00 9.707597e+03 9.439210e+02 9.707597e+04 9.512842e+08 10 afdd228b 3276800 0.000000e+00 2.633937e+04 3.608518e+03 3.950905e+05 1.060175e+10 15 25ebb669 16588800 0.000000e+00 1.397955e+05 9.676594e+03 1.537750e+06 2.160006e+11 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.675882e+04 6.232185e+03 1.248694e+06 7.172888e+10 22 cea37d6d 819200 0.000000e+00 9.541018e+03 9.285702e+02 9.541018e+04 9.189326e+08 10 afdd228b 3276800 0.000000e+00 2.651477e+04 2.554649e+03 3.181772e+05 8.514711e+09 12 25ebb669 16588800 0.000000e+00 1.382255e+05 7.304438e+03 1.382255e+06 1.915965e+11 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.624387e+04 5.549003e+03 8.436581e+05 4.791247e+10 15 cea37d6d 819200 0.000000e+00 9.661577e+03 7.114114e+02 9.661577e+04 9.385217e+08 10 afdd228b 3276800 0.000000e+00 2.574090e+04 2.071791e+03 5.148179e+05 1.333772e+10 20 25ebb669 16588800 0.000000e+00 1.361676e+05 1.958095e+03 1.770178e+06 2.410907e+11 13 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.895007e+04 8.369498e+03 7.663509e+05 4.608707e+10 13 cea37d6d 819200 0.000000e+00 9.910778e+03 1.200981e+03 9.910778e+04 9.966588e+08 10 afdd228b 3276800 0.000000e+00 2.572979e+04 2.095041e+03 5.917851e+05 1.532746e+10 23 25ebb669 16588800 0.000000e+00 1.422314e+05 1.728252e+04 1.422314e+06 2.052844e+11 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.attila000066400000000000000000000100121507764646700326270ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 2.515766e+05 2.096151e+04 2.515766e+06 6.373017e+11 10 afdd228b 3276800 0.000000e+00 7.350482e+04 4.292777e+03 9.555626e+05 7.047802e+10 13 cea37d6d 819200 0.000000e+00 9.586125e+03 1.023620e+03 2.108948e+05 2.044715e+09 22 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.990473e+04 4.071360e+03 1.118476e+06 7.845196e+10 16 afdd228b 3276800 0.000000e+00 2.992444e+04 7.760944e+02 4.787910e+05 1.433719e+10 16 cea37d6d 819200 0.000000e+00 9.620220e+03 2.335102e+02 1.058224e+05 1.018635e+09 11 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.793522e+04 8.600858e+02 6.793522e+05 4.615934e+10 10 afdd228b 3276800 0.000000e+00 2.989699e+04 1.490344e+03 3.587638e+05 1.075261e+10 12 cea37d6d 819200 0.000000e+00 9.974140e+03 1.055336e+03 1.097155e+05 1.106569e+09 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.962168e+04 1.952172e+02 1.322812e+06 9.209711e+10 19 afdd228b 3276800 0.000000e+00 3.047853e+04 4.777511e+01 4.571780e+05 1.393415e+10 15 cea37d6d 819200 0.000000e+00 1.119488e+04 2.171263e+03 1.231437e+05 1.430437e+09 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.idgraf000066400000000000000000000216221507764646700326160ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 2.127055e+05 1.216918e+04 3.190582e+06 6.808756e+11 15 afdd228b 3276800 0.000000e+00 6.346686e+04 7.329654e+02 6.346686e+05 4.028580e+10 10 cea37d6d 819200 0.000000e+00 7.969263e+03 1.770463e+02 1.354775e+05 1.080188e+09 17 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.656100e+04 6.943816e+03 1.471537e+06 1.281974e+11 17 afdd228b 3276800 0.000000e+00 3.567215e+04 3.302464e+03 3.567215e+05 1.283409e+10 10 cea37d6d 819200 0.000000e+00 1.101988e+04 5.146633e+02 1.101988e+05 1.217027e+09 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.935885e+04 6.351673e+03 3.935885e+05 1.589463e+10 10 cea37d6d 819200 0.000000e+00 1.194615e+04 1.359754e+03 1.194615e+05 1.445595e+09 10 617e5fe6 7372800 0.000000e+00 8.781176e+04 9.198610e+03 1.317176e+06 1.169328e+11 15 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.754335e+04 8.654029e+03 1.575780e+06 1.392972e+11 18 afdd228b 3276800 0.000000e+00 3.542725e+04 1.501284e+03 3.542725e+05 1.257344e+10 10 cea37d6d 819200 0.000000e+00 1.193774e+04 1.685032e+03 1.193774e+05 1.453490e+09 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.763521e+04 5.876858e+03 9.639873e+05 8.485914e+10 11 afdd228b 3276800 0.000000e+00 3.909159e+04 6.650440e+03 4.300075e+05 1.729619e+10 11 cea37d6d 819200 0.000000e+00 1.211577e+04 1.649480e+03 1.211577e+05 1.495126e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.616388e+04 4.981316e+03 1.550950e+06 1.340825e+11 18 afdd228b 3276800 0.000000e+00 3.647899e+04 2.965394e+03 4.377479e+05 1.607412e+10 12 cea37d6d 819200 0.000000e+00 1.073272e+04 1.010096e+02 1.073272e+05 1.152015e+09 10 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.786078e+04 7.200822e+03 1.317912e+06 1.165705e+11 15 afdd228b 3276800 0.000000e+00 3.795195e+04 3.399141e+03 3.795195e+05 1.451905e+10 10 cea37d6d 819200 0.000000e+00 1.163527e+04 1.023060e+03 1.163527e+05 1.364262e+09 10 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.814631e+04 6.725805e+03 1.498487e+06 1.328551e+11 17 cea37d6d 819200 0.000000e+00 1.170806e+04 1.094676e+03 1.170806e+05 1.382770e+09 10 afdd228b 3276800 0.000000e+00 4.283079e+04 7.621190e+03 4.283079e+05 1.892559e+10 10 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 9.172766e+04 1.075608e+04 1.375915e+06 1.279449e+11 15 cea37d6d 819200 0.000000e+00 1.117240e+04 8.447401e+02 1.117240e+05 1.255362e+09 10 afdd228b 3276800 0.000000e+00 3.472448e+04 1.278416e+03 3.819693e+05 1.328166e+10 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.mirage000066400000000000000000000100121507764646700326150ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 7.065042e+04 7.118479e+03 1.271707e+06 9.075877e+10 18 617e5fe6 7372800 0.000000e+00 2.321971e+05 3.386520e+04 2.554168e+06 6.056858e+11 11 cea37d6d 819200 0.000000e+00 8.644872e+03 1.175676e+03 4.581782e+05 4.034149e+09 53 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.469534e+04 1.243993e+03 9.714695e+05 3.374879e+10 28 617e5fe6 7372800 0.000000e+00 8.533007e+04 9.757927e+03 1.194621e+06 1.032701e+11 14 cea37d6d 819200 0.000000e+00 1.145973e+04 6.017234e+02 1.145973e+05 1.316874e+09 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.498011e+04 2.179531e+03 6.996022e+05 2.456717e+10 20 617e5fe6 7372800 0.000000e+00 8.442764e+04 6.770170e+03 1.350842e+06 1.147818e+11 16 cea37d6d 819200 0.000000e+00 1.080764e+04 2.151492e+02 1.188840e+05 1.285365e+09 11 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.506064e+04 2.956747e+03 6.310915e+05 2.228383e+10 18 617e5fe6 7372800 0.000000e+00 8.151052e+04 4.250155e+02 1.059637e+06 8.637388e+10 13 cea37d6d 819200 0.000000e+00 1.153062e+04 1.200141e+03 1.153062e+05 1.343956e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_goto.sirocco000066400000000000000000000130161507764646700330210ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 1.778570e+05 1.735127e+04 1.778570e+06 3.193419e+11 10 cea37d6d 819200 0.000000e+00 5.904224e+03 6.575598e+02 5.668055e+05 3.388055e+09 96 afdd228b 3276800 0.000000e+00 4.953149e+04 6.709149e+03 6.439093e+05 3.247895e+10 13 25ebb669 16588800 0.000000e+00 7.801727e+05 1.214440e+05 7.801727e+06 6.234180e+12 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.552387e+04 5.714037e+03 1.054954e+06 5.919546e+10 19 cea37d6d 819200 0.000000e+00 9.707597e+03 9.439210e+02 9.707597e+04 9.512842e+08 10 afdd228b 3276800 0.000000e+00 2.633937e+04 3.608518e+03 3.950905e+05 1.060175e+10 15 25ebb669 16588800 0.000000e+00 1.397955e+05 9.676594e+03 1.537750e+06 2.160006e+11 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.675882e+04 6.232185e+03 1.248694e+06 7.172888e+10 22 cea37d6d 819200 0.000000e+00 9.541018e+03 9.285702e+02 9.541018e+04 9.189326e+08 10 afdd228b 3276800 0.000000e+00 2.651477e+04 2.554649e+03 3.181772e+05 8.514711e+09 12 25ebb669 16588800 0.000000e+00 1.382255e+05 7.304438e+03 1.382255e+06 1.915965e+11 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.624387e+04 5.549003e+03 8.436581e+05 4.791247e+10 15 cea37d6d 819200 0.000000e+00 9.661577e+03 7.114114e+02 9.661577e+04 9.385217e+08 10 afdd228b 3276800 0.000000e+00 2.574090e+04 2.071791e+03 5.148179e+05 1.333772e+10 20 25ebb669 16588800 0.000000e+00 1.361676e+05 1.958095e+03 1.770178e+06 2.410907e+11 13 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.895007e+04 8.369498e+03 7.663509e+05 4.608707e+10 13 cea37d6d 819200 0.000000e+00 9.910778e+03 1.200981e+03 9.910778e+04 9.966588e+08 10 afdd228b 3276800 0.000000e+00 2.572979e+04 2.095041e+03 5.917851e+05 1.532746e+10 23 25ebb669 16588800 0.000000e+00 1.422314e+05 1.728252e+04 1.422314e+06 2.052844e+11 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.attila000066400000000000000000000100121507764646700334620ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 2.515766e+05 2.096151e+04 2.515766e+06 6.373017e+11 10 afdd228b 3276800 0.000000e+00 7.350482e+04 4.292777e+03 9.555626e+05 7.047802e+10 13 cea37d6d 819200 0.000000e+00 9.586125e+03 1.023620e+03 2.108948e+05 2.044715e+09 22 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.990473e+04 4.071360e+03 1.118476e+06 7.845196e+10 16 afdd228b 3276800 0.000000e+00 2.992444e+04 7.760944e+02 4.787910e+05 1.433719e+10 16 cea37d6d 819200 0.000000e+00 9.620220e+03 2.335102e+02 1.058224e+05 1.018635e+09 11 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.793522e+04 8.600858e+02 6.793522e+05 4.615934e+10 10 afdd228b 3276800 0.000000e+00 2.989699e+04 1.490344e+03 3.587638e+05 1.075261e+10 12 cea37d6d 819200 0.000000e+00 9.974140e+03 1.055336e+03 1.097155e+05 1.106569e+09 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 6.962168e+04 1.952172e+02 1.322812e+06 9.209711e+10 19 afdd228b 3276800 0.000000e+00 3.047853e+04 4.777511e+01 4.571780e+05 1.393415e+10 15 cea37d6d 819200 0.000000e+00 1.119488e+04 2.171263e+03 1.231437e+05 1.430437e+09 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.idgraf000066400000000000000000000216221507764646700334510ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 2.127055e+05 1.216918e+04 3.190582e+06 6.808756e+11 15 afdd228b 3276800 0.000000e+00 6.346686e+04 7.329654e+02 6.346686e+05 4.028580e+10 10 cea37d6d 819200 0.000000e+00 7.969263e+03 1.770463e+02 1.354775e+05 1.080188e+09 17 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.656100e+04 6.943816e+03 1.471537e+06 1.281974e+11 17 afdd228b 3276800 0.000000e+00 3.567215e+04 3.302464e+03 3.567215e+05 1.283409e+10 10 cea37d6d 819200 0.000000e+00 1.101988e+04 5.146633e+02 1.101988e+05 1.217027e+09 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.935885e+04 6.351673e+03 3.935885e+05 1.589463e+10 10 cea37d6d 819200 0.000000e+00 1.194615e+04 1.359754e+03 1.194615e+05 1.445595e+09 10 617e5fe6 7372800 0.000000e+00 8.781176e+04 9.198610e+03 1.317176e+06 1.169328e+11 15 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.754335e+04 8.654029e+03 1.575780e+06 1.392972e+11 18 afdd228b 3276800 0.000000e+00 3.542725e+04 1.501284e+03 3.542725e+05 1.257344e+10 10 cea37d6d 819200 0.000000e+00 1.193774e+04 1.685032e+03 1.193774e+05 1.453490e+09 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.763521e+04 5.876858e+03 9.639873e+05 8.485914e+10 11 afdd228b 3276800 0.000000e+00 3.909159e+04 6.650440e+03 4.300075e+05 1.729619e+10 11 cea37d6d 819200 0.000000e+00 1.211577e+04 1.649480e+03 1.211577e+05 1.495126e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.616388e+04 4.981316e+03 1.550950e+06 1.340825e+11 18 afdd228b 3276800 0.000000e+00 3.647899e+04 2.965394e+03 4.377479e+05 1.607412e+10 12 cea37d6d 819200 0.000000e+00 1.073272e+04 1.010096e+02 1.073272e+05 1.152015e+09 10 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.786078e+04 7.200822e+03 1.317912e+06 1.165705e+11 15 afdd228b 3276800 0.000000e+00 3.795195e+04 3.399141e+03 3.795195e+05 1.451905e+10 10 cea37d6d 819200 0.000000e+00 1.163527e+04 1.023060e+03 1.163527e+05 1.364262e+09 10 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 8.814631e+04 6.725805e+03 1.498487e+06 1.328551e+11 17 cea37d6d 819200 0.000000e+00 1.170806e+04 1.094676e+03 1.170806e+05 1.382770e+09 10 afdd228b 3276800 0.000000e+00 4.283079e+04 7.621190e+03 4.283079e+05 1.892559e+10 10 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 9.172766e+04 1.075608e+04 1.375915e+06 1.279449e+11 15 cea37d6d 819200 0.000000e+00 1.117240e+04 8.447401e+02 1.117240e+05 1.255362e+09 10 afdd228b 3276800 0.000000e+00 3.472448e+04 1.278416e+03 3.819693e+05 1.328166e+10 11 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.mirage000066400000000000000000000100121507764646700334500ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 7.065042e+04 7.118479e+03 1.271707e+06 9.075877e+10 18 617e5fe6 7372800 0.000000e+00 2.321971e+05 3.386520e+04 2.554168e+06 6.056858e+11 11 cea37d6d 819200 0.000000e+00 8.644872e+03 1.175676e+03 4.581782e+05 4.034149e+09 53 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.469534e+04 1.243993e+03 9.714695e+05 3.374879e+10 28 617e5fe6 7372800 0.000000e+00 8.533007e+04 9.757927e+03 1.194621e+06 1.032701e+11 14 cea37d6d 819200 0.000000e+00 1.145973e+04 6.017234e+02 1.145973e+05 1.316874e+09 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.498011e+04 2.179531e+03 6.996022e+05 2.456717e+10 20 617e5fe6 7372800 0.000000e+00 8.442764e+04 6.770170e+03 1.350842e+06 1.147818e+11 16 cea37d6d 819200 0.000000e+00 1.080764e+04 2.151492e+02 1.188840e+05 1.285365e+09 11 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 3276800 0.000000e+00 3.506064e+04 2.956747e+03 6.310915e+05 2.228383e+10 18 617e5fe6 7372800 0.000000e+00 8.151052e+04 4.250155e+02 1.059637e+06 8.637388e+10 13 cea37d6d 819200 0.000000e+00 1.153062e+04 1.200141e+03 1.153062e+05 1.343956e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_getrf_openblas.sirocco000066400000000000000000000130161507764646700336540ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 1.778570e+05 1.735127e+04 1.778570e+06 3.193419e+11 10 cea37d6d 819200 0.000000e+00 5.904224e+03 6.575598e+02 5.668055e+05 3.388055e+09 96 afdd228b 3276800 0.000000e+00 4.953149e+04 6.709149e+03 6.439093e+05 3.247895e+10 13 25ebb669 16588800 0.000000e+00 7.801727e+05 1.214440e+05 7.801727e+06 6.234180e+12 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.552387e+04 5.714037e+03 1.054954e+06 5.919546e+10 19 cea37d6d 819200 0.000000e+00 9.707597e+03 9.439210e+02 9.707597e+04 9.512842e+08 10 afdd228b 3276800 0.000000e+00 2.633937e+04 3.608518e+03 3.950905e+05 1.060175e+10 15 25ebb669 16588800 0.000000e+00 1.397955e+05 9.676594e+03 1.537750e+06 2.160006e+11 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.675882e+04 6.232185e+03 1.248694e+06 7.172888e+10 22 cea37d6d 819200 0.000000e+00 9.541018e+03 9.285702e+02 9.541018e+04 9.189326e+08 10 afdd228b 3276800 0.000000e+00 2.651477e+04 2.554649e+03 3.181772e+05 8.514711e+09 12 25ebb669 16588800 0.000000e+00 1.382255e+05 7.304438e+03 1.382255e+06 1.915965e+11 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.624387e+04 5.549003e+03 8.436581e+05 4.791247e+10 15 cea37d6d 819200 0.000000e+00 9.661577e+03 7.114114e+02 9.661577e+04 9.385217e+08 10 afdd228b 3276800 0.000000e+00 2.574090e+04 2.071791e+03 5.148179e+05 1.333772e+10 20 25ebb669 16588800 0.000000e+00 1.361676e+05 1.958095e+03 1.770178e+06 2.410907e+11 13 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 7372800 0.000000e+00 5.895007e+04 8.369498e+03 7.663509e+05 4.608707e+10 13 cea37d6d 819200 0.000000e+00 9.910778e+03 1.200981e+03 9.910778e+04 9.966588e+08 10 afdd228b 3276800 0.000000e+00 2.572979e+04 2.095041e+03 5.917851e+05 1.532746e+10 23 25ebb669 16588800 0.000000e+00 1.422314e+05 1.728252e+04 1.422314e+06 2.052844e+11 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.attila000066400000000000000000000100271507764646700321520ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 8.869540e+04 4.010843e+03 1.765039e+07 1.568709e+12 199 d39bff17 6553600 0.000000e+00 2.736718e+04 1.452565e+03 3.886139e+06 1.066523e+11 142 2c1922b7 1638400 0.000000e+00 4.006489e+03 3.502972e+02 8.493756e+05 3.429028e+09 212 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.250005e+03 1.530886e+03 8.555006e+05 6.478930e+09 118 d39bff17 6553600 0.000000e+00 2.060505e+03 3.149423e+02 4.265246e+05 8.993882e+08 207 2c1922b7 1638400 0.000000e+00 5.794447e+02 1.035504e+02 9.155226e+04 5.474365e+07 158 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.906255e+03 1.105050e+03 1.042844e+06 7.386541e+09 151 d39bff17 6553600 0.000000e+00 2.044032e+03 3.248232e+02 3.863220e+05 8.095958e+08 189 2c1922b7 1638400 0.000000e+00 6.103626e+02 1.085471e+02 1.062031e+05 6.687255e+07 174 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.859160e+03 1.144150e+03 1.008296e+06 7.108501e+09 147 d39bff17 6553600 0.000000e+00 2.022724e+03 3.006626e+02 4.308402e+05 8.907256e+08 213 2c1922b7 1638400 0.000000e+00 5.771721e+02 9.999833e+01 9.638774e+04 5.730226e+07 167 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.idgraf000066400000000000000000000216371507764646700321410ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.925318e+03 8.376976e+02 5.748014e+05 4.038926e+09 83 d39bff17 6553600 0.000000e+00 2.271937e+03 3.454949e+02 2.340095e+05 5.439496e+08 103 2c1922b7 1638400 0.000000e+00 7.049814e+02 1.197767e+02 1.254867e+05 9.101946e+07 178 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.291615e+03 1.041939e+03 4.593717e+05 3.417957e+09 63 d39bff17 6553600 0.000000e+00 2.282720e+03 4.096195e+02 3.903452e+05 9.197407e+08 171 2c1922b7 1638400 0.000000e+00 6.999720e+02 1.145665e+02 1.343946e+05 9.659256e+07 192 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.177388e+03 9.455873e+02 3.947563e+05 2.882497e+09 55 d39bff17 6553600 0.000000e+00 2.335362e+03 3.317057e+02 2.825788e+05 6.732374e+08 121 2c1922b7 1638400 0.000000e+00 7.266144e+02 9.381637e+01 4.432348e+04 3.274297e+07 61 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 9.210227e+04 5.563000e+02 1.252591e+07 1.153707e+12 136 d39bff17 6553600 0.000000e+00 2.809162e+04 4.267578e+02 1.573131e+06 4.420199e+10 56 2c1922b7 1638400 0.000000e+00 3.732094e+03 1.582101e+02 3.993341e+05 1.493031e+09 107 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.047943e+03 9.923280e+02 4.017327e+05 2.887518e+09 57 d39bff17 6553600 0.000000e+00 2.358363e+03 2.904964e+02 2.381946e+05 5.702726e+08 101 2c1922b7 1638400 0.000000e+00 7.376273e+02 1.192099e+02 4.425764e+04 3.349831e+07 60 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.125894e+03 1.170430e+03 6.769599e+05 4.954085e+09 95 d39bff17 6553600 0.000000e+00 2.913435e+03 7.837592e+02 2.651226e+05 8.283167e+08 91 2c1922b7 1638400 0.000000e+00 7.396845e+02 1.557697e+02 7.692719e+04 5.942533e+07 104 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.906666e+03 1.069281e+03 3.177066e+05 2.246888e+09 46 d39bff17 6553600 0.000000e+00 2.331985e+03 3.108312e+02 2.914982e+05 6.918465e+08 125 2c1922b7 1638400 0.000000e+00 7.036069e+02 1.117682e+02 5.277052e+04 3.806661e+07 75 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.634969e+03 1.278868e+03 4.122883e+05 3.236126e+09 54 d39bff17 6553600 0.000000e+00 2.361692e+03 2.763159e+02 1.747652e+05 4.183915e+08 74 2c1922b7 1638400 0.000000e+00 7.215132e+02 1.060983e+02 7.287283e+04 5.371565e+07 101 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.011366e+03 8.280915e+02 6.871138e+05 4.884809e+09 98 d39bff17 6553600 0.000000e+00 2.294721e+03 3.366230e+02 4.451759e+05 1.043537e+09 194 2c1922b7 1638400 0.000000e+00 6.840134e+02 1.166270e+02 1.114942e+05 7.848061e+07 163 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.mirage000066400000000000000000000100261507764646700321370ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.758103e+04 7.024890e+02 7.033162e+06 1.941076e+11 255 ff82dda0 14745600 0.000000e+00 9.143755e+04 1.725750e+03 1.234407e+07 1.129114e+12 135 2c1922b7 1638400 0.000000e+00 3.516018e+03 1.528455e+02 1.613852e+06 5.685057e+09 459 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.131008e+03 3.294125e+02 6.755295e+05 1.473957e+09 317 ff82dda0 14745600 0.000000e+00 7.209283e+03 1.090675e+03 1.564414e+06 1.153644e+10 217 2c1922b7 1638400 0.000000e+00 6.237527e+02 1.148972e+02 1.210080e+05 7.804013e+07 194 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.163459e+03 3.374464e+02 5.538454e+05 1.227372e+09 256 ff82dda0 14745600 0.000000e+00 6.895326e+03 1.111793e+03 1.234263e+06 8.731908e+09 179 2c1922b7 1638400 0.000000e+00 6.290993e+02 1.019490e+02 1.333690e+05 8.610581e+07 212 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.119354e+03 3.243594e+02 5.912998e+05 1.282527e+09 279 ff82dda0 14745600 0.000000e+00 6.998019e+03 1.239620e+03 1.070697e+06 7.727865e+09 153 2c1922b7 1638400 0.000000e+00 6.140937e+02 1.075567e+02 1.430838e+05 9.056234e+07 233 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll.sirocco000066400000000000000000000130401507764646700323330ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.402547e+04 6.005726e+03 7.111323e+06 2.495045e+11 209 2c1922b7 1638400 0.000000e+00 6.443940e+03 1.476966e+03 1.610985e+05 1.092645e+09 25 d39bff17 6553600 0.000000e+00 1.041247e+04 1.992240e+03 3.092503e+06 3.337940e+10 297 0e8bce2b 33177600 0.000000e+00 1.103734e+05 1.699353e+04 1.037510e+07 1.172281e+12 94 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.238292e+03 4.902889e+02 6.768030e+05 2.241926e+09 209 2c1922b7 1638400 0.000000e+00 5.889641e+02 1.063542e+02 1.272162e+05 7.736903e+07 216 d39bff17 6553600 0.000000e+00 1.349909e+03 1.936514e+02 2.942801e+05 4.054266e+08 218 0e8bce2b 33177600 0.000000e+00 7.038455e+03 8.353918e+02 1.182460e+06 8.439938e+09 168 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.179744e+03 4.016259e+02 6.804652e+05 2.198224e+09 214 2c1922b7 1638400 0.000000e+00 5.796961e+02 1.048897e+02 1.199971e+05 7.183924e+07 207 d39bff17 6553600 0.000000e+00 1.343917e+03 2.039127e+02 2.244341e+05 3.085646e+08 167 0e8bce2b 33177600 0.000000e+00 6.913467e+03 8.366528e+02 1.244424e+06 8.729283e+09 180 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.362936e+03 5.457359e+02 6.524096e+05 2.251791e+09 194 2c1922b7 1638400 0.000000e+00 5.405600e+02 9.344101e+01 1.513568e+05 8.426217e+07 280 d39bff17 6553600 0.000000e+00 1.275634e+03 1.830051e+02 2.270629e+05 2.956105e+08 178 0e8bce2b 33177600 0.000000e+00 6.852169e+03 8.897789e+02 8.291125e+05 5.777016e+09 121 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.306190e+03 4.921154e+02 7.009122e+05 2.368690e+09 212 2c1922b7 1638400 0.000000e+00 5.641572e+02 1.012475e+02 1.376544e+05 8.015997e+07 244 d39bff17 6553600 0.000000e+00 1.355727e+03 1.656730e+02 2.331851e+05 3.208564e+08 172 0e8bce2b 33177600 0.000000e+00 6.732998e+03 6.928655e+02 1.144610e+06 7.788266e+09 170 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.attila000066400000000000000000000100271507764646700333360ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 8.869540e+04 4.010843e+03 1.765039e+07 1.568709e+12 199 d39bff17 6553600 0.000000e+00 2.736718e+04 1.452565e+03 3.886139e+06 1.066523e+11 142 2c1922b7 1638400 0.000000e+00 4.006489e+03 3.502972e+02 8.493756e+05 3.429028e+09 212 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.250005e+03 1.530886e+03 8.555006e+05 6.478930e+09 118 d39bff17 6553600 0.000000e+00 2.060505e+03 3.149423e+02 4.265246e+05 8.993882e+08 207 2c1922b7 1638400 0.000000e+00 5.794447e+02 1.035504e+02 9.155226e+04 5.474365e+07 158 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.906255e+03 1.105050e+03 1.042844e+06 7.386541e+09 151 d39bff17 6553600 0.000000e+00 2.044032e+03 3.248232e+02 3.863220e+05 8.095958e+08 189 2c1922b7 1638400 0.000000e+00 6.103626e+02 1.085471e+02 1.062031e+05 6.687255e+07 174 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.859160e+03 1.144150e+03 1.008296e+06 7.108501e+09 147 d39bff17 6553600 0.000000e+00 2.022724e+03 3.006626e+02 4.308402e+05 8.907256e+08 213 2c1922b7 1638400 0.000000e+00 5.771721e+02 9.999833e+01 9.638774e+04 5.730226e+07 167 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.idgraf000066400000000000000000000216371507764646700333250ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.925318e+03 8.376976e+02 5.748014e+05 4.038926e+09 83 d39bff17 6553600 0.000000e+00 2.271937e+03 3.454949e+02 2.340095e+05 5.439496e+08 103 2c1922b7 1638400 0.000000e+00 7.049814e+02 1.197767e+02 1.254867e+05 9.101946e+07 178 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.291615e+03 1.041939e+03 4.593717e+05 3.417957e+09 63 d39bff17 6553600 0.000000e+00 2.282720e+03 4.096195e+02 3.903452e+05 9.197407e+08 171 2c1922b7 1638400 0.000000e+00 6.999720e+02 1.145665e+02 1.343946e+05 9.659256e+07 192 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.177388e+03 9.455873e+02 3.947563e+05 2.882497e+09 55 d39bff17 6553600 0.000000e+00 2.335362e+03 3.317057e+02 2.825788e+05 6.732374e+08 121 2c1922b7 1638400 0.000000e+00 7.266144e+02 9.381637e+01 4.432348e+04 3.274297e+07 61 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 9.210227e+04 5.563000e+02 1.252591e+07 1.153707e+12 136 d39bff17 6553600 0.000000e+00 2.809162e+04 4.267578e+02 1.573131e+06 4.420199e+10 56 2c1922b7 1638400 0.000000e+00 3.732094e+03 1.582101e+02 3.993341e+05 1.493031e+09 107 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.047943e+03 9.923280e+02 4.017327e+05 2.887518e+09 57 d39bff17 6553600 0.000000e+00 2.358363e+03 2.904964e+02 2.381946e+05 5.702726e+08 101 2c1922b7 1638400 0.000000e+00 7.376273e+02 1.192099e+02 4.425764e+04 3.349831e+07 60 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.125894e+03 1.170430e+03 6.769599e+05 4.954085e+09 95 d39bff17 6553600 0.000000e+00 2.913435e+03 7.837592e+02 2.651226e+05 8.283167e+08 91 2c1922b7 1638400 0.000000e+00 7.396845e+02 1.557697e+02 7.692719e+04 5.942533e+07 104 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.906666e+03 1.069281e+03 3.177066e+05 2.246888e+09 46 d39bff17 6553600 0.000000e+00 2.331985e+03 3.108312e+02 2.914982e+05 6.918465e+08 125 2c1922b7 1638400 0.000000e+00 7.036069e+02 1.117682e+02 5.277052e+04 3.806661e+07 75 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.634969e+03 1.278868e+03 4.122883e+05 3.236126e+09 54 d39bff17 6553600 0.000000e+00 2.361692e+03 2.763159e+02 1.747652e+05 4.183915e+08 74 2c1922b7 1638400 0.000000e+00 7.215132e+02 1.060983e+02 7.287283e+04 5.371565e+07 101 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.011366e+03 8.280915e+02 6.871138e+05 4.884809e+09 98 d39bff17 6553600 0.000000e+00 2.294721e+03 3.366230e+02 4.451759e+05 1.043537e+09 194 2c1922b7 1638400 0.000000e+00 6.840134e+02 1.166270e+02 1.114942e+05 7.848061e+07 163 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.mirage000066400000000000000000000100261507764646700333230ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.758103e+04 7.024890e+02 7.033162e+06 1.941076e+11 255 ff82dda0 14745600 0.000000e+00 9.143755e+04 1.725750e+03 1.234407e+07 1.129114e+12 135 2c1922b7 1638400 0.000000e+00 3.516018e+03 1.528455e+02 1.613852e+06 5.685057e+09 459 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.131008e+03 3.294125e+02 6.755295e+05 1.473957e+09 317 ff82dda0 14745600 0.000000e+00 7.209283e+03 1.090675e+03 1.564414e+06 1.153644e+10 217 2c1922b7 1638400 0.000000e+00 6.237527e+02 1.148972e+02 1.210080e+05 7.804013e+07 194 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.163459e+03 3.374464e+02 5.538454e+05 1.227372e+09 256 ff82dda0 14745600 0.000000e+00 6.895326e+03 1.111793e+03 1.234263e+06 8.731908e+09 179 2c1922b7 1638400 0.000000e+00 6.290993e+02 1.019490e+02 1.333690e+05 8.610581e+07 212 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.119354e+03 3.243594e+02 5.912998e+05 1.282527e+09 279 ff82dda0 14745600 0.000000e+00 6.998019e+03 1.239620e+03 1.070697e+06 7.727865e+09 153 2c1922b7 1638400 0.000000e+00 6.140937e+02 1.075567e+02 1.430838e+05 9.056234e+07 233 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_atlas.sirocco000066400000000000000000000130401507764646700335170ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.402547e+04 6.005726e+03 7.111323e+06 2.495045e+11 209 2c1922b7 1638400 0.000000e+00 6.443940e+03 1.476966e+03 1.610985e+05 1.092645e+09 25 d39bff17 6553600 0.000000e+00 1.041247e+04 1.992240e+03 3.092503e+06 3.337940e+10 297 0e8bce2b 33177600 0.000000e+00 1.103734e+05 1.699353e+04 1.037510e+07 1.172281e+12 94 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.238292e+03 4.902889e+02 6.768030e+05 2.241926e+09 209 2c1922b7 1638400 0.000000e+00 5.889641e+02 1.063542e+02 1.272162e+05 7.736903e+07 216 d39bff17 6553600 0.000000e+00 1.349909e+03 1.936514e+02 2.942801e+05 4.054266e+08 218 0e8bce2b 33177600 0.000000e+00 7.038455e+03 8.353918e+02 1.182460e+06 8.439938e+09 168 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.179744e+03 4.016259e+02 6.804652e+05 2.198224e+09 214 2c1922b7 1638400 0.000000e+00 5.796961e+02 1.048897e+02 1.199971e+05 7.183924e+07 207 d39bff17 6553600 0.000000e+00 1.343917e+03 2.039127e+02 2.244341e+05 3.085646e+08 167 0e8bce2b 33177600 0.000000e+00 6.913467e+03 8.366528e+02 1.244424e+06 8.729283e+09 180 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.362936e+03 5.457359e+02 6.524096e+05 2.251791e+09 194 2c1922b7 1638400 0.000000e+00 5.405600e+02 9.344101e+01 1.513568e+05 8.426217e+07 280 d39bff17 6553600 0.000000e+00 1.275634e+03 1.830051e+02 2.270629e+05 2.956105e+08 178 0e8bce2b 33177600 0.000000e+00 6.852169e+03 8.897789e+02 8.291125e+05 5.777016e+09 121 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.306190e+03 4.921154e+02 7.009122e+05 2.368690e+09 212 2c1922b7 1638400 0.000000e+00 5.641572e+02 1.012475e+02 1.376544e+05 8.015997e+07 244 d39bff17 6553600 0.000000e+00 1.355727e+03 1.656730e+02 2.331851e+05 3.208564e+08 172 0e8bce2b 33177600 0.000000e+00 6.732998e+03 6.928655e+02 1.144610e+06 7.788266e+09 170 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.attila000066400000000000000000000100271507764646700332020ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 8.869540e+04 4.010843e+03 1.765039e+07 1.568709e+12 199 d39bff17 6553600 0.000000e+00 2.736718e+04 1.452565e+03 3.886139e+06 1.066523e+11 142 2c1922b7 1638400 0.000000e+00 4.006489e+03 3.502972e+02 8.493756e+05 3.429028e+09 212 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.250005e+03 1.530886e+03 8.555006e+05 6.478930e+09 118 d39bff17 6553600 0.000000e+00 2.060505e+03 3.149423e+02 4.265246e+05 8.993882e+08 207 2c1922b7 1638400 0.000000e+00 5.794447e+02 1.035504e+02 9.155226e+04 5.474365e+07 158 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.906255e+03 1.105050e+03 1.042844e+06 7.386541e+09 151 d39bff17 6553600 0.000000e+00 2.044032e+03 3.248232e+02 3.863220e+05 8.095958e+08 189 2c1922b7 1638400 0.000000e+00 6.103626e+02 1.085471e+02 1.062031e+05 6.687255e+07 174 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.859160e+03 1.144150e+03 1.008296e+06 7.108501e+09 147 d39bff17 6553600 0.000000e+00 2.022724e+03 3.006626e+02 4.308402e+05 8.907256e+08 213 2c1922b7 1638400 0.000000e+00 5.771721e+02 9.999833e+01 9.638774e+04 5.730226e+07 167 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.idgraf000066400000000000000000000216371507764646700331710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.925318e+03 8.376976e+02 5.748014e+05 4.038926e+09 83 d39bff17 6553600 0.000000e+00 2.271937e+03 3.454949e+02 2.340095e+05 5.439496e+08 103 2c1922b7 1638400 0.000000e+00 7.049814e+02 1.197767e+02 1.254867e+05 9.101946e+07 178 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.291615e+03 1.041939e+03 4.593717e+05 3.417957e+09 63 d39bff17 6553600 0.000000e+00 2.282720e+03 4.096195e+02 3.903452e+05 9.197407e+08 171 2c1922b7 1638400 0.000000e+00 6.999720e+02 1.145665e+02 1.343946e+05 9.659256e+07 192 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.177388e+03 9.455873e+02 3.947563e+05 2.882497e+09 55 d39bff17 6553600 0.000000e+00 2.335362e+03 3.317057e+02 2.825788e+05 6.732374e+08 121 2c1922b7 1638400 0.000000e+00 7.266144e+02 9.381637e+01 4.432348e+04 3.274297e+07 61 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 9.210227e+04 5.563000e+02 1.252591e+07 1.153707e+12 136 d39bff17 6553600 0.000000e+00 2.809162e+04 4.267578e+02 1.573131e+06 4.420199e+10 56 2c1922b7 1638400 0.000000e+00 3.732094e+03 1.582101e+02 3.993341e+05 1.493031e+09 107 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.047943e+03 9.923280e+02 4.017327e+05 2.887518e+09 57 d39bff17 6553600 0.000000e+00 2.358363e+03 2.904964e+02 2.381946e+05 5.702726e+08 101 2c1922b7 1638400 0.000000e+00 7.376273e+02 1.192099e+02 4.425764e+04 3.349831e+07 60 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.125894e+03 1.170430e+03 6.769599e+05 4.954085e+09 95 d39bff17 6553600 0.000000e+00 2.913435e+03 7.837592e+02 2.651226e+05 8.283167e+08 91 2c1922b7 1638400 0.000000e+00 7.396845e+02 1.557697e+02 7.692719e+04 5.942533e+07 104 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.906666e+03 1.069281e+03 3.177066e+05 2.246888e+09 46 d39bff17 6553600 0.000000e+00 2.331985e+03 3.108312e+02 2.914982e+05 6.918465e+08 125 2c1922b7 1638400 0.000000e+00 7.036069e+02 1.117682e+02 5.277052e+04 3.806661e+07 75 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.634969e+03 1.278868e+03 4.122883e+05 3.236126e+09 54 d39bff17 6553600 0.000000e+00 2.361692e+03 2.763159e+02 1.747652e+05 4.183915e+08 74 2c1922b7 1638400 0.000000e+00 7.215132e+02 1.060983e+02 7.287283e+04 5.371565e+07 101 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.011366e+03 8.280915e+02 6.871138e+05 4.884809e+09 98 d39bff17 6553600 0.000000e+00 2.294721e+03 3.366230e+02 4.451759e+05 1.043537e+09 194 2c1922b7 1638400 0.000000e+00 6.840134e+02 1.166270e+02 1.114942e+05 7.848061e+07 163 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.mirage000066400000000000000000000100261507764646700331670ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.758103e+04 7.024890e+02 7.033162e+06 1.941076e+11 255 ff82dda0 14745600 0.000000e+00 9.143755e+04 1.725750e+03 1.234407e+07 1.129114e+12 135 2c1922b7 1638400 0.000000e+00 3.516018e+03 1.528455e+02 1.613852e+06 5.685057e+09 459 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.131008e+03 3.294125e+02 6.755295e+05 1.473957e+09 317 ff82dda0 14745600 0.000000e+00 7.209283e+03 1.090675e+03 1.564414e+06 1.153644e+10 217 2c1922b7 1638400 0.000000e+00 6.237527e+02 1.148972e+02 1.210080e+05 7.804013e+07 194 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.163459e+03 3.374464e+02 5.538454e+05 1.227372e+09 256 ff82dda0 14745600 0.000000e+00 6.895326e+03 1.111793e+03 1.234263e+06 8.731908e+09 179 2c1922b7 1638400 0.000000e+00 6.290993e+02 1.019490e+02 1.333690e+05 8.610581e+07 212 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.119354e+03 3.243594e+02 5.912998e+05 1.282527e+09 279 ff82dda0 14745600 0.000000e+00 6.998019e+03 1.239620e+03 1.070697e+06 7.727865e+09 153 2c1922b7 1638400 0.000000e+00 6.140937e+02 1.075567e+02 1.430838e+05 9.056234e+07 233 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_goto.sirocco000066400000000000000000000130401507764646700333630ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.402547e+04 6.005726e+03 7.111323e+06 2.495045e+11 209 2c1922b7 1638400 0.000000e+00 6.443940e+03 1.476966e+03 1.610985e+05 1.092645e+09 25 d39bff17 6553600 0.000000e+00 1.041247e+04 1.992240e+03 3.092503e+06 3.337940e+10 297 0e8bce2b 33177600 0.000000e+00 1.103734e+05 1.699353e+04 1.037510e+07 1.172281e+12 94 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.238292e+03 4.902889e+02 6.768030e+05 2.241926e+09 209 2c1922b7 1638400 0.000000e+00 5.889641e+02 1.063542e+02 1.272162e+05 7.736903e+07 216 d39bff17 6553600 0.000000e+00 1.349909e+03 1.936514e+02 2.942801e+05 4.054266e+08 218 0e8bce2b 33177600 0.000000e+00 7.038455e+03 8.353918e+02 1.182460e+06 8.439938e+09 168 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.179744e+03 4.016259e+02 6.804652e+05 2.198224e+09 214 2c1922b7 1638400 0.000000e+00 5.796961e+02 1.048897e+02 1.199971e+05 7.183924e+07 207 d39bff17 6553600 0.000000e+00 1.343917e+03 2.039127e+02 2.244341e+05 3.085646e+08 167 0e8bce2b 33177600 0.000000e+00 6.913467e+03 8.366528e+02 1.244424e+06 8.729283e+09 180 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.362936e+03 5.457359e+02 6.524096e+05 2.251791e+09 194 2c1922b7 1638400 0.000000e+00 5.405600e+02 9.344101e+01 1.513568e+05 8.426217e+07 280 d39bff17 6553600 0.000000e+00 1.275634e+03 1.830051e+02 2.270629e+05 2.956105e+08 178 0e8bce2b 33177600 0.000000e+00 6.852169e+03 8.897789e+02 8.291125e+05 5.777016e+09 121 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.306190e+03 4.921154e+02 7.009122e+05 2.368690e+09 212 2c1922b7 1638400 0.000000e+00 5.641572e+02 1.012475e+02 1.376544e+05 8.015997e+07 244 d39bff17 6553600 0.000000e+00 1.355727e+03 1.656730e+02 2.331851e+05 3.208564e+08 172 0e8bce2b 33177600 0.000000e+00 6.732998e+03 6.928655e+02 1.144610e+06 7.788266e+09 170 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.attila000066400000000000000000000100271507764646700340350ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 8.869540e+04 4.010843e+03 1.765039e+07 1.568709e+12 199 d39bff17 6553600 0.000000e+00 2.736718e+04 1.452565e+03 3.886139e+06 1.066523e+11 142 2c1922b7 1638400 0.000000e+00 4.006489e+03 3.502972e+02 8.493756e+05 3.429028e+09 212 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.250005e+03 1.530886e+03 8.555006e+05 6.478930e+09 118 d39bff17 6553600 0.000000e+00 2.060505e+03 3.149423e+02 4.265246e+05 8.993882e+08 207 2c1922b7 1638400 0.000000e+00 5.794447e+02 1.035504e+02 9.155226e+04 5.474365e+07 158 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.906255e+03 1.105050e+03 1.042844e+06 7.386541e+09 151 d39bff17 6553600 0.000000e+00 2.044032e+03 3.248232e+02 3.863220e+05 8.095958e+08 189 2c1922b7 1638400 0.000000e+00 6.103626e+02 1.085471e+02 1.062031e+05 6.687255e+07 174 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.859160e+03 1.144150e+03 1.008296e+06 7.108501e+09 147 d39bff17 6553600 0.000000e+00 2.022724e+03 3.006626e+02 4.308402e+05 8.907256e+08 213 2c1922b7 1638400 0.000000e+00 5.771721e+02 9.999833e+01 9.638774e+04 5.730226e+07 167 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.idgraf000066400000000000000000000216371507764646700340240ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.925318e+03 8.376976e+02 5.748014e+05 4.038926e+09 83 d39bff17 6553600 0.000000e+00 2.271937e+03 3.454949e+02 2.340095e+05 5.439496e+08 103 2c1922b7 1638400 0.000000e+00 7.049814e+02 1.197767e+02 1.254867e+05 9.101946e+07 178 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.291615e+03 1.041939e+03 4.593717e+05 3.417957e+09 63 d39bff17 6553600 0.000000e+00 2.282720e+03 4.096195e+02 3.903452e+05 9.197407e+08 171 2c1922b7 1638400 0.000000e+00 6.999720e+02 1.145665e+02 1.343946e+05 9.659256e+07 192 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.177388e+03 9.455873e+02 3.947563e+05 2.882497e+09 55 d39bff17 6553600 0.000000e+00 2.335362e+03 3.317057e+02 2.825788e+05 6.732374e+08 121 2c1922b7 1638400 0.000000e+00 7.266144e+02 9.381637e+01 4.432348e+04 3.274297e+07 61 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 9.210227e+04 5.563000e+02 1.252591e+07 1.153707e+12 136 d39bff17 6553600 0.000000e+00 2.809162e+04 4.267578e+02 1.573131e+06 4.420199e+10 56 2c1922b7 1638400 0.000000e+00 3.732094e+03 1.582101e+02 3.993341e+05 1.493031e+09 107 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.047943e+03 9.923280e+02 4.017327e+05 2.887518e+09 57 d39bff17 6553600 0.000000e+00 2.358363e+03 2.904964e+02 2.381946e+05 5.702726e+08 101 2c1922b7 1638400 0.000000e+00 7.376273e+02 1.192099e+02 4.425764e+04 3.349831e+07 60 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.125894e+03 1.170430e+03 6.769599e+05 4.954085e+09 95 d39bff17 6553600 0.000000e+00 2.913435e+03 7.837592e+02 2.651226e+05 8.283167e+08 91 2c1922b7 1638400 0.000000e+00 7.396845e+02 1.557697e+02 7.692719e+04 5.942533e+07 104 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.906666e+03 1.069281e+03 3.177066e+05 2.246888e+09 46 d39bff17 6553600 0.000000e+00 2.331985e+03 3.108312e+02 2.914982e+05 6.918465e+08 125 2c1922b7 1638400 0.000000e+00 7.036069e+02 1.117682e+02 5.277052e+04 3.806661e+07 75 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.634969e+03 1.278868e+03 4.122883e+05 3.236126e+09 54 d39bff17 6553600 0.000000e+00 2.361692e+03 2.763159e+02 1.747652e+05 4.183915e+08 74 2c1922b7 1638400 0.000000e+00 7.215132e+02 1.060983e+02 7.287283e+04 5.371565e+07 101 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 7.011366e+03 8.280915e+02 6.871138e+05 4.884809e+09 98 d39bff17 6553600 0.000000e+00 2.294721e+03 3.366230e+02 4.451759e+05 1.043537e+09 194 2c1922b7 1638400 0.000000e+00 6.840134e+02 1.166270e+02 1.114942e+05 7.848061e+07 163 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.mirage000066400000000000000000000100261507764646700340220ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.758103e+04 7.024890e+02 7.033162e+06 1.941076e+11 255 ff82dda0 14745600 0.000000e+00 9.143755e+04 1.725750e+03 1.234407e+07 1.129114e+12 135 2c1922b7 1638400 0.000000e+00 3.516018e+03 1.528455e+02 1.613852e+06 5.685057e+09 459 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.131008e+03 3.294125e+02 6.755295e+05 1.473957e+09 317 ff82dda0 14745600 0.000000e+00 7.209283e+03 1.090675e+03 1.564414e+06 1.153644e+10 217 2c1922b7 1638400 0.000000e+00 6.237527e+02 1.148972e+02 1.210080e+05 7.804013e+07 194 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.163459e+03 3.374464e+02 5.538454e+05 1.227372e+09 256 ff82dda0 14745600 0.000000e+00 6.895326e+03 1.111793e+03 1.234263e+06 8.731908e+09 179 2c1922b7 1638400 0.000000e+00 6.290993e+02 1.019490e+02 1.333690e+05 8.610581e+07 212 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.119354e+03 3.243594e+02 5.912998e+05 1.282527e+09 279 ff82dda0 14745600 0.000000e+00 6.998019e+03 1.239620e+03 1.070697e+06 7.727865e+09 153 2c1922b7 1638400 0.000000e+00 6.140937e+02 1.075567e+02 1.430838e+05 9.056234e+07 233 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ll_openblas.sirocco000066400000000000000000000130401507764646700342160ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.402547e+04 6.005726e+03 7.111323e+06 2.495045e+11 209 2c1922b7 1638400 0.000000e+00 6.443940e+03 1.476966e+03 1.610985e+05 1.092645e+09 25 d39bff17 6553600 0.000000e+00 1.041247e+04 1.992240e+03 3.092503e+06 3.337940e+10 297 0e8bce2b 33177600 0.000000e+00 1.103734e+05 1.699353e+04 1.037510e+07 1.172281e+12 94 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.238292e+03 4.902889e+02 6.768030e+05 2.241926e+09 209 2c1922b7 1638400 0.000000e+00 5.889641e+02 1.063542e+02 1.272162e+05 7.736903e+07 216 d39bff17 6553600 0.000000e+00 1.349909e+03 1.936514e+02 2.942801e+05 4.054266e+08 218 0e8bce2b 33177600 0.000000e+00 7.038455e+03 8.353918e+02 1.182460e+06 8.439938e+09 168 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.179744e+03 4.016259e+02 6.804652e+05 2.198224e+09 214 2c1922b7 1638400 0.000000e+00 5.796961e+02 1.048897e+02 1.199971e+05 7.183924e+07 207 d39bff17 6553600 0.000000e+00 1.343917e+03 2.039127e+02 2.244341e+05 3.085646e+08 167 0e8bce2b 33177600 0.000000e+00 6.913467e+03 8.366528e+02 1.244424e+06 8.729283e+09 180 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.362936e+03 5.457359e+02 6.524096e+05 2.251791e+09 194 2c1922b7 1638400 0.000000e+00 5.405600e+02 9.344101e+01 1.513568e+05 8.426217e+07 280 d39bff17 6553600 0.000000e+00 1.275634e+03 1.830051e+02 2.270629e+05 2.956105e+08 178 0e8bce2b 33177600 0.000000e+00 6.852169e+03 8.897789e+02 8.291125e+05 5.777016e+09 121 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 3.306190e+03 4.921154e+02 7.009122e+05 2.368690e+09 212 2c1922b7 1638400 0.000000e+00 5.641572e+02 1.012475e+02 1.376544e+05 8.015997e+07 244 d39bff17 6553600 0.000000e+00 1.355727e+03 1.656730e+02 2.331851e+05 3.208564e+08 172 0e8bce2b 33177600 0.000000e+00 6.732998e+03 6.928655e+02 1.144610e+06 7.788266e+09 170 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.attila000066400000000000000000000100271507764646700321710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 8.795690e+04 4.598673e+03 1.715160e+07 1.512725e+12 195 d39bff17 6553600 0.000000e+00 2.744119e+04 1.740624e+03 4.390591e+06 1.209678e+11 160 2c1922b7 1638400 0.000000e+00 4.091631e+03 5.062810e+02 7.283104e+05 3.025603e+09 178 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.847320e+03 1.168001e+03 8.285257e+05 5.838253e+09 121 d39bff17 6553600 0.000000e+00 2.123746e+03 3.153004e+02 4.226254e+05 9.173322e+08 199 2c1922b7 1638400 0.000000e+00 5.446913e+02 9.530021e+01 1.040360e+05 5.840221e+07 191 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.816836e+03 1.179433e+03 1.172496e+06 8.231973e+09 172 d39bff17 6553600 0.000000e+00 2.055421e+03 3.154533e+02 3.864192e+05 8.129623e+08 188 2c1922b7 1638400 0.000000e+00 6.121868e+02 1.314710e+02 9.550115e+04 6.116094e+07 156 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.940618e+03 1.147691e+03 1.006390e+06 7.175959e+09 145 d39bff17 6553600 0.000000e+00 2.093041e+03 3.377347e+02 3.851195e+05 8.270585e+08 184 2c1922b7 1638400 0.000000e+00 6.098259e+02 1.286153e+02 1.091588e+05 6.952888e+07 179 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.idgraf000066400000000000000000000216351507764646700321560ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.700359e+03 1.036459e+03 3.886208e+05 2.666205e+09 58 d39bff17 6553600 0.000000e+00 2.067623e+03 3.658691e+02 3.825102e+05 8.156510e+08 185 2c1922b7 1638400 0.000000e+00 6.344928e+02 1.313164e+02 1.091328e+05 7.220992e+07 172 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.634729e+03 1.380283e+03 4.777005e+05 3.306586e+09 72 d39bff17 6553600 0.000000e+00 2.102108e+03 3.770829e+02 2.690698e+05 5.838144e+08 128 2c1922b7 1638400 0.000000e+00 6.251127e+02 1.334964e+02 1.168961e+05 7.640580e+07 187 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.973111e+03 7.873858e+02 4.420102e+05 2.686054e+09 74 d39bff17 6553600 0.000000e+00 2.088129e+03 3.411148e+02 2.129891e+05 4.566174e+08 102 2c1922b7 1638400 0.000000e+00 5.816119e+02 1.098601e+02 6.165086e+04 3.713622e+07 106 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.813439e+03 5.835403e+02 2.441645e+05 1.433737e+09 42 d39bff17 6553600 0.000000e+00 2.170079e+03 5.032568e+02 7.161259e+04 1.637628e+08 33 2c1922b7 1638400 0.000000e+00 6.080488e+02 1.225789e+02 3.101049e+04 1.962219e+07 51 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 9.133611e+04 7.141260e+02 1.032098e+07 9.427358e+11 113 d39bff17 6553600 0.000000e+00 2.797330e+04 6.068477e+02 1.482585e+06 4.149232e+10 53 2c1922b7 1638400 0.000000e+00 3.803279e+03 2.345034e+02 3.308852e+05 1.263233e+09 87 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.609495e+03 1.035460e+03 4.296172e+05 2.909244e+09 65 d39bff17 6553600 0.000000e+00 2.129873e+03 3.868465e+02 3.407797e+05 7.497615e+08 160 2c1922b7 1638400 0.000000e+00 6.443548e+02 1.239934e+02 8.054435e+04 5.382094e+07 125 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.938773e+03 5.045720e+02 2.078570e+05 1.243326e+09 35 d39bff17 6553600 0.000000e+00 2.180034e+03 4.239424e+02 1.286220e+05 2.910041e+08 59 2c1922b7 1638400 0.000000e+00 5.996256e+02 1.220514e+02 5.816368e+04 3.632139e+07 97 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.467618e+03 9.651621e+02 2.910428e+05 1.924273e+09 45 d39bff17 6553600 0.000000e+00 2.057931e+03 3.333471e+02 1.872717e+05 3.955042e+08 91 2c1922b7 1638400 0.000000e+00 6.141799e+02 1.365857e+02 5.159111e+04 3.325329e+07 84 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.429538e+03 9.929716e+02 5.015040e+05 3.301346e+09 78 d39bff17 6553600 0.000000e+00 2.056349e+03 3.356881e+02 4.565094e+05 9.637588e+08 222 2c1922b7 1638400 0.000000e+00 6.374873e+02 1.360140e+02 9.498561e+04 6.330859e+07 149 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.mirage000066400000000000000000000100261507764646700321560ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.762618e+04 9.983022e+02 6.575032e+06 1.818802e+11 238 ff82dda0 14745600 0.000000e+00 9.068591e+04 1.985325e+03 1.324014e+07 1.201270e+12 146 2c1922b7 1638400 0.000000e+00 3.579286e+03 2.361748e+02 1.449611e+06 5.211162e+09 405 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.106536e+03 3.670134e+02 5.856171e+05 1.271070e+09 278 ff82dda0 14745600 0.000000e+00 7.299043e+03 1.073594e+03 1.284632e+06 9.579441e+09 176 2c1922b7 1638400 0.000000e+00 6.166979e+02 1.186269e+02 1.362902e+05 8.715990e+07 221 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.072463e+03 3.421274e+02 6.590431e+05 1.403064e+09 318 ff82dda0 14745600 0.000000e+00 6.791353e+03 1.183637e+03 1.195278e+06 8.364131e+09 176 2c1922b7 1638400 0.000000e+00 6.128580e+02 1.104622e+02 1.378931e+05 8.725430e+07 225 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.129169e+03 3.327197e+02 5.024838e+05 1.095999e+09 236 ff82dda0 14745600 0.000000e+00 6.700018e+03 1.125184e+03 1.139003e+06 7.846568e+09 170 2c1922b7 1638400 0.000000e+00 6.207819e+02 1.142635e+02 1.440214e+05 9.243491e+07 232 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru.sirocco000066400000000000000000000130351507764646700323560ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 4.935132e+04 1.056942e+04 1.875350e+06 9.679609e+10 38 2c1922b7 1638400 0.000000e+00 1.386830e+03 2.636504e+02 1.256468e+06 1.805486e+09 906 d39bff17 6553600 0.000000e+00 1.762919e+04 3.396463e+03 6.170218e+05 1.128135e+10 35 0e8bce2b 33177600 0.000000e+00 2.090375e+05 4.666688e+04 3.762675e+06 8.257406e+11 18 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.580226e+03 3.710017e+02 5.495882e+05 1.447380e+09 213 2c1922b7 1638400 0.000000e+00 3.441326e+02 6.695097e+01 4.267244e+04 1.524080e+07 124 d39bff17 6553600 0.000000e+00 9.089165e+02 1.570596e+02 1.590604e+05 1.488895e+08 175 0e8bce2b 33177600 0.000000e+00 5.659456e+03 7.119452e+02 1.058318e+06 6.084289e+09 187 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.485190e+03 3.288259e+02 6.188122e+05 1.564789e+09 249 2c1922b7 1638400 0.000000e+00 3.565977e+02 7.541526e+01 3.672956e+04 1.368348e+07 103 d39bff17 6553600 0.000000e+00 9.441529e+02 1.527244e+02 2.398148e+05 2.323464e+08 254 0e8bce2b 33177600 0.000000e+00 5.713813e+03 7.827526e+02 8.456444e+05 4.922534e+09 148 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.615067e+03 4.428563e+02 5.334737e+05 1.435078e+09 204 2c1922b7 1638400 0.000000e+00 3.667814e+02 6.561130e+01 3.227676e+04 1.221734e+07 88 d39bff17 6553600 0.000000e+00 9.018562e+02 1.587421e+02 1.470026e+05 1.366826e+08 163 0e8bce2b 33177600 0.000000e+00 5.604694e+03 6.527594e+02 1.132148e+06 6.431415e+09 202 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.480709e+03 3.749281e+02 4.514890e+05 1.145597e+09 182 2c1922b7 1638400 0.000000e+00 3.718262e+02 7.249781e+01 2.342505e+04 9.041172e+06 63 d39bff17 6553600 0.000000e+00 9.130900e+02 1.739240e+02 2.182285e+05 2.064919e+08 239 0e8bce2b 33177600 0.000000e+00 5.804941e+03 6.806052e+02 9.113758e+05 5.363209e+09 157 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.attila000066400000000000000000000100271507764646700333550ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 8.795690e+04 4.598673e+03 1.715160e+07 1.512725e+12 195 d39bff17 6553600 0.000000e+00 2.744119e+04 1.740624e+03 4.390591e+06 1.209678e+11 160 2c1922b7 1638400 0.000000e+00 4.091631e+03 5.062810e+02 7.283104e+05 3.025603e+09 178 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.847320e+03 1.168001e+03 8.285257e+05 5.838253e+09 121 d39bff17 6553600 0.000000e+00 2.123746e+03 3.153004e+02 4.226254e+05 9.173322e+08 199 2c1922b7 1638400 0.000000e+00 5.446913e+02 9.530021e+01 1.040360e+05 5.840221e+07 191 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.816836e+03 1.179433e+03 1.172496e+06 8.231973e+09 172 d39bff17 6553600 0.000000e+00 2.055421e+03 3.154533e+02 3.864192e+05 8.129623e+08 188 2c1922b7 1638400 0.000000e+00 6.121868e+02 1.314710e+02 9.550115e+04 6.116094e+07 156 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.940618e+03 1.147691e+03 1.006390e+06 7.175959e+09 145 d39bff17 6553600 0.000000e+00 2.093041e+03 3.377347e+02 3.851195e+05 8.270585e+08 184 2c1922b7 1638400 0.000000e+00 6.098259e+02 1.286153e+02 1.091588e+05 6.952888e+07 179 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.idgraf000066400000000000000000000216351507764646700333420ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.700359e+03 1.036459e+03 3.886208e+05 2.666205e+09 58 d39bff17 6553600 0.000000e+00 2.067623e+03 3.658691e+02 3.825102e+05 8.156510e+08 185 2c1922b7 1638400 0.000000e+00 6.344928e+02 1.313164e+02 1.091328e+05 7.220992e+07 172 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.634729e+03 1.380283e+03 4.777005e+05 3.306586e+09 72 d39bff17 6553600 0.000000e+00 2.102108e+03 3.770829e+02 2.690698e+05 5.838144e+08 128 2c1922b7 1638400 0.000000e+00 6.251127e+02 1.334964e+02 1.168961e+05 7.640580e+07 187 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.973111e+03 7.873858e+02 4.420102e+05 2.686054e+09 74 d39bff17 6553600 0.000000e+00 2.088129e+03 3.411148e+02 2.129891e+05 4.566174e+08 102 2c1922b7 1638400 0.000000e+00 5.816119e+02 1.098601e+02 6.165086e+04 3.713622e+07 106 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.813439e+03 5.835403e+02 2.441645e+05 1.433737e+09 42 d39bff17 6553600 0.000000e+00 2.170079e+03 5.032568e+02 7.161259e+04 1.637628e+08 33 2c1922b7 1638400 0.000000e+00 6.080488e+02 1.225789e+02 3.101049e+04 1.962219e+07 51 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 9.133611e+04 7.141260e+02 1.032098e+07 9.427358e+11 113 d39bff17 6553600 0.000000e+00 2.797330e+04 6.068477e+02 1.482585e+06 4.149232e+10 53 2c1922b7 1638400 0.000000e+00 3.803279e+03 2.345034e+02 3.308852e+05 1.263233e+09 87 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.609495e+03 1.035460e+03 4.296172e+05 2.909244e+09 65 d39bff17 6553600 0.000000e+00 2.129873e+03 3.868465e+02 3.407797e+05 7.497615e+08 160 2c1922b7 1638400 0.000000e+00 6.443548e+02 1.239934e+02 8.054435e+04 5.382094e+07 125 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.938773e+03 5.045720e+02 2.078570e+05 1.243326e+09 35 d39bff17 6553600 0.000000e+00 2.180034e+03 4.239424e+02 1.286220e+05 2.910041e+08 59 2c1922b7 1638400 0.000000e+00 5.996256e+02 1.220514e+02 5.816368e+04 3.632139e+07 97 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.467618e+03 9.651621e+02 2.910428e+05 1.924273e+09 45 d39bff17 6553600 0.000000e+00 2.057931e+03 3.333471e+02 1.872717e+05 3.955042e+08 91 2c1922b7 1638400 0.000000e+00 6.141799e+02 1.365857e+02 5.159111e+04 3.325329e+07 84 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.429538e+03 9.929716e+02 5.015040e+05 3.301346e+09 78 d39bff17 6553600 0.000000e+00 2.056349e+03 3.356881e+02 4.565094e+05 9.637588e+08 222 2c1922b7 1638400 0.000000e+00 6.374873e+02 1.360140e+02 9.498561e+04 6.330859e+07 149 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.mirage000066400000000000000000000100261507764646700333420ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.762618e+04 9.983022e+02 6.575032e+06 1.818802e+11 238 ff82dda0 14745600 0.000000e+00 9.068591e+04 1.985325e+03 1.324014e+07 1.201270e+12 146 2c1922b7 1638400 0.000000e+00 3.579286e+03 2.361748e+02 1.449611e+06 5.211162e+09 405 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.106536e+03 3.670134e+02 5.856171e+05 1.271070e+09 278 ff82dda0 14745600 0.000000e+00 7.299043e+03 1.073594e+03 1.284632e+06 9.579441e+09 176 2c1922b7 1638400 0.000000e+00 6.166979e+02 1.186269e+02 1.362902e+05 8.715990e+07 221 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.072463e+03 3.421274e+02 6.590431e+05 1.403064e+09 318 ff82dda0 14745600 0.000000e+00 6.791353e+03 1.183637e+03 1.195278e+06 8.364131e+09 176 2c1922b7 1638400 0.000000e+00 6.128580e+02 1.104622e+02 1.378931e+05 8.725430e+07 225 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.129169e+03 3.327197e+02 5.024838e+05 1.095999e+09 236 ff82dda0 14745600 0.000000e+00 6.700018e+03 1.125184e+03 1.139003e+06 7.846568e+09 170 2c1922b7 1638400 0.000000e+00 6.207819e+02 1.142635e+02 1.440214e+05 9.243491e+07 232 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_atlas.sirocco000066400000000000000000000130351507764646700335420ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 4.935132e+04 1.056942e+04 1.875350e+06 9.679609e+10 38 2c1922b7 1638400 0.000000e+00 1.386830e+03 2.636504e+02 1.256468e+06 1.805486e+09 906 d39bff17 6553600 0.000000e+00 1.762919e+04 3.396463e+03 6.170218e+05 1.128135e+10 35 0e8bce2b 33177600 0.000000e+00 2.090375e+05 4.666688e+04 3.762675e+06 8.257406e+11 18 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.580226e+03 3.710017e+02 5.495882e+05 1.447380e+09 213 2c1922b7 1638400 0.000000e+00 3.441326e+02 6.695097e+01 4.267244e+04 1.524080e+07 124 d39bff17 6553600 0.000000e+00 9.089165e+02 1.570596e+02 1.590604e+05 1.488895e+08 175 0e8bce2b 33177600 0.000000e+00 5.659456e+03 7.119452e+02 1.058318e+06 6.084289e+09 187 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.485190e+03 3.288259e+02 6.188122e+05 1.564789e+09 249 2c1922b7 1638400 0.000000e+00 3.565977e+02 7.541526e+01 3.672956e+04 1.368348e+07 103 d39bff17 6553600 0.000000e+00 9.441529e+02 1.527244e+02 2.398148e+05 2.323464e+08 254 0e8bce2b 33177600 0.000000e+00 5.713813e+03 7.827526e+02 8.456444e+05 4.922534e+09 148 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.615067e+03 4.428563e+02 5.334737e+05 1.435078e+09 204 2c1922b7 1638400 0.000000e+00 3.667814e+02 6.561130e+01 3.227676e+04 1.221734e+07 88 d39bff17 6553600 0.000000e+00 9.018562e+02 1.587421e+02 1.470026e+05 1.366826e+08 163 0e8bce2b 33177600 0.000000e+00 5.604694e+03 6.527594e+02 1.132148e+06 6.431415e+09 202 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.480709e+03 3.749281e+02 4.514890e+05 1.145597e+09 182 2c1922b7 1638400 0.000000e+00 3.718262e+02 7.249781e+01 2.342505e+04 9.041172e+06 63 d39bff17 6553600 0.000000e+00 9.130900e+02 1.739240e+02 2.182285e+05 2.064919e+08 239 0e8bce2b 33177600 0.000000e+00 5.804941e+03 6.806052e+02 9.113758e+05 5.363209e+09 157 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.attila000066400000000000000000000100271507764646700332210ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 8.795690e+04 4.598673e+03 1.715160e+07 1.512725e+12 195 d39bff17 6553600 0.000000e+00 2.744119e+04 1.740624e+03 4.390591e+06 1.209678e+11 160 2c1922b7 1638400 0.000000e+00 4.091631e+03 5.062810e+02 7.283104e+05 3.025603e+09 178 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.847320e+03 1.168001e+03 8.285257e+05 5.838253e+09 121 d39bff17 6553600 0.000000e+00 2.123746e+03 3.153004e+02 4.226254e+05 9.173322e+08 199 2c1922b7 1638400 0.000000e+00 5.446913e+02 9.530021e+01 1.040360e+05 5.840221e+07 191 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.816836e+03 1.179433e+03 1.172496e+06 8.231973e+09 172 d39bff17 6553600 0.000000e+00 2.055421e+03 3.154533e+02 3.864192e+05 8.129623e+08 188 2c1922b7 1638400 0.000000e+00 6.121868e+02 1.314710e+02 9.550115e+04 6.116094e+07 156 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.940618e+03 1.147691e+03 1.006390e+06 7.175959e+09 145 d39bff17 6553600 0.000000e+00 2.093041e+03 3.377347e+02 3.851195e+05 8.270585e+08 184 2c1922b7 1638400 0.000000e+00 6.098259e+02 1.286153e+02 1.091588e+05 6.952888e+07 179 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.idgraf000066400000000000000000000216351507764646700332060ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.700359e+03 1.036459e+03 3.886208e+05 2.666205e+09 58 d39bff17 6553600 0.000000e+00 2.067623e+03 3.658691e+02 3.825102e+05 8.156510e+08 185 2c1922b7 1638400 0.000000e+00 6.344928e+02 1.313164e+02 1.091328e+05 7.220992e+07 172 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.634729e+03 1.380283e+03 4.777005e+05 3.306586e+09 72 d39bff17 6553600 0.000000e+00 2.102108e+03 3.770829e+02 2.690698e+05 5.838144e+08 128 2c1922b7 1638400 0.000000e+00 6.251127e+02 1.334964e+02 1.168961e+05 7.640580e+07 187 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.973111e+03 7.873858e+02 4.420102e+05 2.686054e+09 74 d39bff17 6553600 0.000000e+00 2.088129e+03 3.411148e+02 2.129891e+05 4.566174e+08 102 2c1922b7 1638400 0.000000e+00 5.816119e+02 1.098601e+02 6.165086e+04 3.713622e+07 106 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.813439e+03 5.835403e+02 2.441645e+05 1.433737e+09 42 d39bff17 6553600 0.000000e+00 2.170079e+03 5.032568e+02 7.161259e+04 1.637628e+08 33 2c1922b7 1638400 0.000000e+00 6.080488e+02 1.225789e+02 3.101049e+04 1.962219e+07 51 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 9.133611e+04 7.141260e+02 1.032098e+07 9.427358e+11 113 d39bff17 6553600 0.000000e+00 2.797330e+04 6.068477e+02 1.482585e+06 4.149232e+10 53 2c1922b7 1638400 0.000000e+00 3.803279e+03 2.345034e+02 3.308852e+05 1.263233e+09 87 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.609495e+03 1.035460e+03 4.296172e+05 2.909244e+09 65 d39bff17 6553600 0.000000e+00 2.129873e+03 3.868465e+02 3.407797e+05 7.497615e+08 160 2c1922b7 1638400 0.000000e+00 6.443548e+02 1.239934e+02 8.054435e+04 5.382094e+07 125 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.938773e+03 5.045720e+02 2.078570e+05 1.243326e+09 35 d39bff17 6553600 0.000000e+00 2.180034e+03 4.239424e+02 1.286220e+05 2.910041e+08 59 2c1922b7 1638400 0.000000e+00 5.996256e+02 1.220514e+02 5.816368e+04 3.632139e+07 97 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.467618e+03 9.651621e+02 2.910428e+05 1.924273e+09 45 d39bff17 6553600 0.000000e+00 2.057931e+03 3.333471e+02 1.872717e+05 3.955042e+08 91 2c1922b7 1638400 0.000000e+00 6.141799e+02 1.365857e+02 5.159111e+04 3.325329e+07 84 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.429538e+03 9.929716e+02 5.015040e+05 3.301346e+09 78 d39bff17 6553600 0.000000e+00 2.056349e+03 3.356881e+02 4.565094e+05 9.637588e+08 222 2c1922b7 1638400 0.000000e+00 6.374873e+02 1.360140e+02 9.498561e+04 6.330859e+07 149 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.mirage000066400000000000000000000100261507764646700332060ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.762618e+04 9.983022e+02 6.575032e+06 1.818802e+11 238 ff82dda0 14745600 0.000000e+00 9.068591e+04 1.985325e+03 1.324014e+07 1.201270e+12 146 2c1922b7 1638400 0.000000e+00 3.579286e+03 2.361748e+02 1.449611e+06 5.211162e+09 405 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.106536e+03 3.670134e+02 5.856171e+05 1.271070e+09 278 ff82dda0 14745600 0.000000e+00 7.299043e+03 1.073594e+03 1.284632e+06 9.579441e+09 176 2c1922b7 1638400 0.000000e+00 6.166979e+02 1.186269e+02 1.362902e+05 8.715990e+07 221 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.072463e+03 3.421274e+02 6.590431e+05 1.403064e+09 318 ff82dda0 14745600 0.000000e+00 6.791353e+03 1.183637e+03 1.195278e+06 8.364131e+09 176 2c1922b7 1638400 0.000000e+00 6.128580e+02 1.104622e+02 1.378931e+05 8.725430e+07 225 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.129169e+03 3.327197e+02 5.024838e+05 1.095999e+09 236 ff82dda0 14745600 0.000000e+00 6.700018e+03 1.125184e+03 1.139003e+06 7.846568e+09 170 2c1922b7 1638400 0.000000e+00 6.207819e+02 1.142635e+02 1.440214e+05 9.243491e+07 232 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_goto.sirocco000066400000000000000000000130351507764646700334060ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 4.935132e+04 1.056942e+04 1.875350e+06 9.679609e+10 38 2c1922b7 1638400 0.000000e+00 1.386830e+03 2.636504e+02 1.256468e+06 1.805486e+09 906 d39bff17 6553600 0.000000e+00 1.762919e+04 3.396463e+03 6.170218e+05 1.128135e+10 35 0e8bce2b 33177600 0.000000e+00 2.090375e+05 4.666688e+04 3.762675e+06 8.257406e+11 18 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.580226e+03 3.710017e+02 5.495882e+05 1.447380e+09 213 2c1922b7 1638400 0.000000e+00 3.441326e+02 6.695097e+01 4.267244e+04 1.524080e+07 124 d39bff17 6553600 0.000000e+00 9.089165e+02 1.570596e+02 1.590604e+05 1.488895e+08 175 0e8bce2b 33177600 0.000000e+00 5.659456e+03 7.119452e+02 1.058318e+06 6.084289e+09 187 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.485190e+03 3.288259e+02 6.188122e+05 1.564789e+09 249 2c1922b7 1638400 0.000000e+00 3.565977e+02 7.541526e+01 3.672956e+04 1.368348e+07 103 d39bff17 6553600 0.000000e+00 9.441529e+02 1.527244e+02 2.398148e+05 2.323464e+08 254 0e8bce2b 33177600 0.000000e+00 5.713813e+03 7.827526e+02 8.456444e+05 4.922534e+09 148 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.615067e+03 4.428563e+02 5.334737e+05 1.435078e+09 204 2c1922b7 1638400 0.000000e+00 3.667814e+02 6.561130e+01 3.227676e+04 1.221734e+07 88 d39bff17 6553600 0.000000e+00 9.018562e+02 1.587421e+02 1.470026e+05 1.366826e+08 163 0e8bce2b 33177600 0.000000e+00 5.604694e+03 6.527594e+02 1.132148e+06 6.431415e+09 202 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.480709e+03 3.749281e+02 4.514890e+05 1.145597e+09 182 2c1922b7 1638400 0.000000e+00 3.718262e+02 7.249781e+01 2.342505e+04 9.041172e+06 63 d39bff17 6553600 0.000000e+00 9.130900e+02 1.739240e+02 2.182285e+05 2.064919e+08 239 0e8bce2b 33177600 0.000000e+00 5.804941e+03 6.806052e+02 9.113758e+05 5.363209e+09 157 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.attila000066400000000000000000000100271507764646700340540ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 8.795690e+04 4.598673e+03 1.715160e+07 1.512725e+12 195 d39bff17 6553600 0.000000e+00 2.744119e+04 1.740624e+03 4.390591e+06 1.209678e+11 160 2c1922b7 1638400 0.000000e+00 4.091631e+03 5.062810e+02 7.283104e+05 3.025603e+09 178 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.847320e+03 1.168001e+03 8.285257e+05 5.838253e+09 121 d39bff17 6553600 0.000000e+00 2.123746e+03 3.153004e+02 4.226254e+05 9.173322e+08 199 2c1922b7 1638400 0.000000e+00 5.446913e+02 9.530021e+01 1.040360e+05 5.840221e+07 191 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.816836e+03 1.179433e+03 1.172496e+06 8.231973e+09 172 d39bff17 6553600 0.000000e+00 2.055421e+03 3.154533e+02 3.864192e+05 8.129623e+08 188 2c1922b7 1638400 0.000000e+00 6.121868e+02 1.314710e+02 9.550115e+04 6.116094e+07 156 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.940618e+03 1.147691e+03 1.006390e+06 7.175959e+09 145 d39bff17 6553600 0.000000e+00 2.093041e+03 3.377347e+02 3.851195e+05 8.270585e+08 184 2c1922b7 1638400 0.000000e+00 6.098259e+02 1.286153e+02 1.091588e+05 6.952888e+07 179 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.idgraf000066400000000000000000000216351507764646700340410ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.700359e+03 1.036459e+03 3.886208e+05 2.666205e+09 58 d39bff17 6553600 0.000000e+00 2.067623e+03 3.658691e+02 3.825102e+05 8.156510e+08 185 2c1922b7 1638400 0.000000e+00 6.344928e+02 1.313164e+02 1.091328e+05 7.220992e+07 172 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.634729e+03 1.380283e+03 4.777005e+05 3.306586e+09 72 d39bff17 6553600 0.000000e+00 2.102108e+03 3.770829e+02 2.690698e+05 5.838144e+08 128 2c1922b7 1638400 0.000000e+00 6.251127e+02 1.334964e+02 1.168961e+05 7.640580e+07 187 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.973111e+03 7.873858e+02 4.420102e+05 2.686054e+09 74 d39bff17 6553600 0.000000e+00 2.088129e+03 3.411148e+02 2.129891e+05 4.566174e+08 102 2c1922b7 1638400 0.000000e+00 5.816119e+02 1.098601e+02 6.165086e+04 3.713622e+07 106 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.813439e+03 5.835403e+02 2.441645e+05 1.433737e+09 42 d39bff17 6553600 0.000000e+00 2.170079e+03 5.032568e+02 7.161259e+04 1.637628e+08 33 2c1922b7 1638400 0.000000e+00 6.080488e+02 1.225789e+02 3.101049e+04 1.962219e+07 51 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 9.133611e+04 7.141260e+02 1.032098e+07 9.427358e+11 113 d39bff17 6553600 0.000000e+00 2.797330e+04 6.068477e+02 1.482585e+06 4.149232e+10 53 2c1922b7 1638400 0.000000e+00 3.803279e+03 2.345034e+02 3.308852e+05 1.263233e+09 87 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.609495e+03 1.035460e+03 4.296172e+05 2.909244e+09 65 d39bff17 6553600 0.000000e+00 2.129873e+03 3.868465e+02 3.407797e+05 7.497615e+08 160 2c1922b7 1638400 0.000000e+00 6.443548e+02 1.239934e+02 8.054435e+04 5.382094e+07 125 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 5.938773e+03 5.045720e+02 2.078570e+05 1.243326e+09 35 d39bff17 6553600 0.000000e+00 2.180034e+03 4.239424e+02 1.286220e+05 2.910041e+08 59 2c1922b7 1638400 0.000000e+00 5.996256e+02 1.220514e+02 5.816368e+04 3.632139e+07 97 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.467618e+03 9.651621e+02 2.910428e+05 1.924273e+09 45 d39bff17 6553600 0.000000e+00 2.057931e+03 3.333471e+02 1.872717e+05 3.955042e+08 91 2c1922b7 1638400 0.000000e+00 6.141799e+02 1.365857e+02 5.159111e+04 3.325329e+07 84 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 6.429538e+03 9.929716e+02 5.015040e+05 3.301346e+09 78 d39bff17 6553600 0.000000e+00 2.056349e+03 3.356881e+02 4.565094e+05 9.637588e+08 222 2c1922b7 1638400 0.000000e+00 6.374873e+02 1.360140e+02 9.498561e+04 6.330859e+07 149 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.mirage000066400000000000000000000100261507764646700340410ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.762618e+04 9.983022e+02 6.575032e+06 1.818802e+11 238 ff82dda0 14745600 0.000000e+00 9.068591e+04 1.985325e+03 1.324014e+07 1.201270e+12 146 2c1922b7 1638400 0.000000e+00 3.579286e+03 2.361748e+02 1.449611e+06 5.211162e+09 405 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.106536e+03 3.670134e+02 5.856171e+05 1.271070e+09 278 ff82dda0 14745600 0.000000e+00 7.299043e+03 1.073594e+03 1.284632e+06 9.579441e+09 176 2c1922b7 1638400 0.000000e+00 6.166979e+02 1.186269e+02 1.362902e+05 8.715990e+07 221 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.072463e+03 3.421274e+02 6.590431e+05 1.403064e+09 318 ff82dda0 14745600 0.000000e+00 6.791353e+03 1.183637e+03 1.195278e+06 8.364131e+09 176 2c1922b7 1638400 0.000000e+00 6.128580e+02 1.104622e+02 1.378931e+05 8.725430e+07 225 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 6553600 0.000000e+00 2.129169e+03 3.327197e+02 5.024838e+05 1.095999e+09 236 ff82dda0 14745600 0.000000e+00 6.700018e+03 1.125184e+03 1.139003e+06 7.846568e+09 170 2c1922b7 1638400 0.000000e+00 6.207819e+02 1.142635e+02 1.440214e+05 9.243491e+07 232 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_dlu_lu_model_trsm_ru_openblas.sirocco000066400000000000000000000130351507764646700342410ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 4.935132e+04 1.056942e+04 1.875350e+06 9.679609e+10 38 2c1922b7 1638400 0.000000e+00 1.386830e+03 2.636504e+02 1.256468e+06 1.805486e+09 906 d39bff17 6553600 0.000000e+00 1.762919e+04 3.396463e+03 6.170218e+05 1.128135e+10 35 0e8bce2b 33177600 0.000000e+00 2.090375e+05 4.666688e+04 3.762675e+06 8.257406e+11 18 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.580226e+03 3.710017e+02 5.495882e+05 1.447380e+09 213 2c1922b7 1638400 0.000000e+00 3.441326e+02 6.695097e+01 4.267244e+04 1.524080e+07 124 d39bff17 6553600 0.000000e+00 9.089165e+02 1.570596e+02 1.590604e+05 1.488895e+08 175 0e8bce2b 33177600 0.000000e+00 5.659456e+03 7.119452e+02 1.058318e+06 6.084289e+09 187 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.485190e+03 3.288259e+02 6.188122e+05 1.564789e+09 249 2c1922b7 1638400 0.000000e+00 3.565977e+02 7.541526e+01 3.672956e+04 1.368348e+07 103 d39bff17 6553600 0.000000e+00 9.441529e+02 1.527244e+02 2.398148e+05 2.323464e+08 254 0e8bce2b 33177600 0.000000e+00 5.713813e+03 7.827526e+02 8.456444e+05 4.922534e+09 148 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.615067e+03 4.428563e+02 5.334737e+05 1.435078e+09 204 2c1922b7 1638400 0.000000e+00 3.667814e+02 6.561130e+01 3.227676e+04 1.221734e+07 88 d39bff17 6553600 0.000000e+00 9.018562e+02 1.587421e+02 1.470026e+05 1.366826e+08 163 0e8bce2b 33177600 0.000000e+00 5.604694e+03 6.527594e+02 1.132148e+06 6.431415e+09 202 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 14745600 0.000000e+00 2.480709e+03 3.749281e+02 4.514890e+05 1.145597e+09 182 2c1922b7 1638400 0.000000e+00 3.718262e+02 7.249781e+01 2.342505e+04 9.041172e+06 63 d39bff17 6553600 0.000000e+00 9.130900e+02 1.739240e+02 2.182285e+05 2.064919e+08 239 0e8bce2b 33177600 0.000000e+00 5.804941e+03 6.806052e+02 9.113758e+05 5.363209e+09 157 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.attila000066400000000000000000000142231507764646700300710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 33177600 7.077888e+09 3.328725e+05 1.185902e+04 2.563119e+07 8.542747e+12 77 9c6670ef 29491200 7.077888e+09 3.328725e+05 1.185902e+04 2.563119e+07 8.542747e+12 77 c00cf6b7 29491200 7.077888e+09 3.328725e+05 1.185902e+04 2.563119e+07 8.542747e+12 77 78a2cc08 29491200 7.077888e+09 3.328725e+05 1.185902e+04 2.563119e+07 8.542747e+12 77 a7cdf15b 44236800 1.415578e+10 6.657450e+05 2.371804e+04 5.126238e+07 3.417099e+13 77 24c84a50 11059200 1.769472e+09 8.321812e+04 2.964755e+03 6.407798e+06 5.339217e+11 77 0b0b0ce8 3686400 2.621440e+08 1.421718e+04 3.409134e+02 9.098993e+05 1.294364e+10 64 4220e23d 14745600 2.097152e+09 1.008105e+05 2.361630e+03 8.064841e+06 8.134670e+11 80 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 33177600 7.077888e+09 1.123499e+04 6.785566e+01 1.190909e+06 1.338033e+10 106 9c6670ef 29491200 7.077888e+09 1.123499e+04 6.785566e+01 1.190909e+06 1.338033e+10 106 c00cf6b7 29491200 7.077888e+09 1.123499e+04 6.785566e+01 1.190909e+06 1.338033e+10 106 78a2cc08 29491200 7.077888e+09 1.123499e+04 6.785566e+01 1.190909e+06 1.338033e+10 106 a7cdf15b 44236800 1.415578e+10 2.246998e+04 1.357113e+02 2.381818e+06 5.352132e+10 106 24c84a50 11059200 1.769472e+09 2.808747e+03 1.696392e+01 2.977272e+05 8.362706e+08 106 0b0b0ce8 3686400 2.621440e+08 6.738679e+02 4.393713e+01 6.873452e+04 4.651489e+07 102 4220e23d 14745600 2.097152e+09 5.557425e+03 3.241733e+02 5.835297e+05 3.253957e+09 105 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 33177600 7.077888e+09 1.123077e+04 9.504466e+01 1.179231e+06 1.324463e+10 105 9c6670ef 29491200 7.077888e+09 1.123077e+04 9.504466e+01 1.179231e+06 1.324463e+10 105 c00cf6b7 29491200 7.077888e+09 1.123077e+04 9.504466e+01 1.179231e+06 1.324463e+10 105 78a2cc08 29491200 7.077888e+09 1.123077e+04 9.504466e+01 1.179231e+06 1.324463e+10 105 a7cdf15b 44236800 1.415578e+10 2.246154e+04 1.900893e+02 2.358462e+06 5.297852e+10 105 24c84a50 11059200 1.769472e+09 2.807693e+03 2.376116e+01 2.948078e+05 8.277894e+08 105 0b0b0ce8 3686400 2.621440e+08 6.672056e+02 3.376608e+01 6.805497e+04 4.552295e+07 102 4220e23d 14745600 2.097152e+09 5.553764e+03 3.500896e+02 5.831453e+05 3.251521e+09 105 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 33177600 7.077888e+09 1.124174e+04 2.629960e+01 1.180383e+06 1.326963e+10 105 9c6670ef 29491200 7.077888e+09 1.124174e+04 2.629960e+01 1.180383e+06 1.326963e+10 105 c00cf6b7 29491200 7.077888e+09 1.124174e+04 2.629960e+01 1.180383e+06 1.326963e+10 105 78a2cc08 29491200 7.077888e+09 1.124174e+04 2.629960e+01 1.180383e+06 1.326963e+10 105 a7cdf15b 44236800 1.415578e+10 2.248348e+04 5.259920e+01 2.360766e+06 5.307852e+10 105 24c84a50 11059200 1.769472e+09 2.810435e+03 6.574900e+00 2.950958e+05 8.293519e+08 105 0b0b0ce8 3686400 2.621440e+08 6.002221e+02 2.259043e+01 6.242310e+04 3.752080e+07 104 4220e23d 14745600 2.097152e+09 5.577722e+03 1.615194e+02 5.912385e+05 3.300529e+09 106 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.idgraf000066400000000000000000000332161507764646700300520ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.801013e+02 7.013561e+01 4.760709e+04 3.272198e+07 70 4220e23d 14745600 2.097152e+09 5.623635e+03 5.419920e+02 4.442672e+05 2.521603e+09 79 492beed5 33177600 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87 9c6670ef 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87 c00cf6b7 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87 78a2cc08 29491200 7.077888e+09 1.150361e+04 5.884814e+02 1.000814e+06 1.154310e+10 87 a7cdf15b 44236800 1.415578e+10 2.300722e+04 1.176963e+03 2.001628e+06 4.617240e+10 87 24c84a50 11059200 1.769472e+09 2.875903e+03 1.471204e+02 2.502035e+05 7.214438e+08 87 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb4) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.717051e+02 6.137607e+01 4.500424e+04 3.048197e+07 67 4220e23d 14745600 2.097152e+09 5.648275e+03 4.677390e+02 4.575103e+05 2.601865e+09 81 492beed5 33177600 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88 9c6670ef 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88 c00cf6b7 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88 78a2cc08 29491200 7.077888e+09 1.157020e+04 6.521027e+02 1.018178e+06 1.181795e+10 88 a7cdf15b 44236800 1.415578e+10 2.314040e+04 1.304205e+03 2.036356e+06 4.727180e+10 88 24c84a50 11059200 1.769472e+09 2.892550e+03 1.630257e+02 2.545445e+05 7.386219e+08 88 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb6) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.265559e+02 5.536840e+01 4.824481e+04 3.046412e+07 77 4220e23d 14745600 2.097152e+09 5.631203e+03 4.767455e+02 4.561275e+05 2.586957e+09 81 492beed5 33177600 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88 9c6670ef 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88 c00cf6b7 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88 78a2cc08 29491200 7.077888e+09 1.162826e+04 6.757302e+02 1.023286e+06 1.193922e+10 88 a7cdf15b 44236800 1.415578e+10 2.325652e+04 1.351460e+03 2.046572e+06 4.775688e+10 88 24c84a50 11059200 1.769472e+09 2.907065e+03 1.689325e+02 2.558215e+05 7.462012e+08 88 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb7) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.780899e+02 4.241206e+01 4.543202e+04 3.092751e+07 67 4220e23d 14745600 2.097152e+09 5.857201e+03 8.346836e+02 4.744333e+05 2.835284e+09 81 492beed5 33177600 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86 9c6670ef 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86 c00cf6b7 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86 78a2cc08 29491200 7.077888e+09 1.150498e+04 4.254093e+02 9.894285e+05 1.139892e+10 86 a7cdf15b 44236800 1.415578e+10 2.300996e+04 8.508186e+02 1.978857e+06 4.559568e+10 86 24c84a50 11059200 1.769472e+09 2.876245e+03 1.063523e+02 2.473571e+05 7.124325e+08 86 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.759139e+02 4.092799e+01 4.190666e+04 2.842915e+07 62 4220e23d 14745600 2.097152e+09 5.527477e+03 2.733928e+02 4.421982e+05 2.450220e+09 80 492beed5 33177600 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96 9c6670ef 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96 c00cf6b7 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96 78a2cc08 29491200 7.077888e+09 1.146770e+04 1.768909e+02 1.100899e+06 1.262778e+10 96 a7cdf15b 44236800 1.415578e+10 2.293540e+04 3.537818e+02 2.201798e+06 5.051112e+10 96 24c84a50 11059200 1.769472e+09 2.866925e+03 4.422272e+01 2.752248e+05 7.892362e+08 96 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb1) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.339465e+02 7.125158e+01 4.184047e+04 2.685969e+07 66 4220e23d 14745600 2.097152e+09 5.624130e+03 4.755864e+02 4.668028e+05 2.644133e+09 83 492beed5 33177600 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97 9c6670ef 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97 c00cf6b7 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97 78a2cc08 29491200 7.077888e+09 1.149102e+04 5.375188e+02 1.114629e+06 1.283625e+10 97 a7cdf15b 44236800 1.415578e+10 2.298204e+04 1.075038e+03 2.229258e+06 5.134500e+10 97 24c84a50 11059200 1.769472e+09 2.872755e+03 1.343797e+02 2.786572e+05 8.022656e+08 97 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb3) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.389750e+02 8.615382e+01 4.728415e+04 3.076266e+07 74 4220e23d 14745600 2.097152e+09 5.648331e+03 5.220897e+02 4.631632e+05 2.638450e+09 82 492beed5 33177600 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96 9c6670ef 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96 c00cf6b7 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96 78a2cc08 29491200 7.077888e+09 1.155069e+04 5.660846e+02 1.108866e+06 1.283893e+10 96 a7cdf15b 44236800 1.415578e+10 2.310138e+04 1.132169e+03 2.217732e+06 5.135572e+10 96 24c84a50 11059200 1.769472e+09 2.887673e+03 1.415212e+02 2.772165e+05 8.024331e+08 96 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb5) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.386625e+02 8.094896e+01 4.342905e+04 2.818209e+07 68 4220e23d 14745600 2.097152e+09 5.638657e+03 3.709019e+02 4.454539e+05 2.522630e+09 79 492beed5 33177600 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97 9c6670ef 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97 c00cf6b7 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97 78a2cc08 29491200 7.077888e+09 1.144012e+04 2.531108e+02 1.109691e+06 1.270122e+10 97 a7cdf15b 44236800 1.415578e+10 2.288024e+04 5.062216e+02 2.219382e+06 5.080488e+10 97 24c84a50 11059200 1.769472e+09 2.860030e+03 6.327770e+01 2.774228e+05 7.938262e+08 97 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 1.414338e+04 6.441210e+02 3.535844e+05 5.011251e+09 25 4220e23d 14745600 2.097152e+09 1.091117e+05 2.701159e+03 3.382462e+06 3.692924e+11 31 492beed5 33177600 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23 9c6670ef 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23 c00cf6b7 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23 78a2cc08 29491200 7.077888e+09 3.621356e+05 7.764608e+03 8.329119e+06 3.017657e+12 23 a7cdf15b 44236800 1.415578e+10 7.242712e+05 1.552922e+04 1.665824e+07 1.207063e+13 23 24c84a50 11059200 1.769472e+09 9.053390e+04 1.941152e+03 2.082280e+06 1.886036e+11 23 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.mirage000066400000000000000000000142231507764646700300570ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 1.352609e+04 3.616534e+02 1.082087e+06 1.464687e+10 80 492beed5 33177600 7.077888e+09 3.550396e+05 8.949994e+03 2.840317e+07 1.009066e+13 80 9c6670ef 29491200 7.077888e+09 3.550396e+05 8.949994e+03 2.840317e+07 1.009066e+13 80 c00cf6b7 29491200 7.077888e+09 3.550396e+05 8.949994e+03 2.840317e+07 1.009066e+13 80 78a2cc08 29491200 7.077888e+09 3.550396e+05 8.949994e+03 2.840317e+07 1.009066e+13 80 a7cdf15b 44236800 1.415578e+10 7.100792e+05 1.789999e+04 5.680634e+07 4.036264e+13 80 24c84a50 11059200 1.769472e+09 8.875990e+04 2.237499e+03 7.100792e+06 6.306662e+11 80 4220e23d 14745600 2.097152e+09 1.078112e+05 1.983800e+03 8.624897e+06 9.301755e+11 80 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.589631e+02 8.406511e+00 6.787320e+04 4.473321e+07 103 492beed5 33177600 7.077888e+09 1.151398e+04 9.050114e+01 1.220482e+06 1.405348e+10 106 9c6670ef 29491200 7.077888e+09 1.151398e+04 9.050114e+01 1.220482e+06 1.405348e+10 106 c00cf6b7 29491200 7.077888e+09 1.151398e+04 9.050114e+01 1.220482e+06 1.405348e+10 106 78a2cc08 29491200 7.077888e+09 1.151398e+04 9.050114e+01 1.220482e+06 1.405348e+10 106 a7cdf15b 44236800 1.415578e+10 2.302796e+04 1.810023e+02 2.440964e+06 5.621392e+10 106 24c84a50 11059200 1.769472e+09 2.878495e+03 2.262529e+01 3.051205e+05 8.783425e+08 106 4220e23d 14745600 2.097152e+09 5.574713e+03 3.353004e+02 5.909196e+05 3.306125e+09 106 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.663664e+02 8.616537e+01 6.796937e+04 4.604980e+07 102 492beed5 33177600 7.077888e+09 1.150036e+04 8.404527e+01 1.207538e+06 1.388786e+10 105 9c6670ef 29491200 7.077888e+09 1.150036e+04 8.404527e+01 1.207538e+06 1.388786e+10 105 c00cf6b7 29491200 7.077888e+09 1.150036e+04 8.404527e+01 1.207538e+06 1.388786e+10 105 78a2cc08 29491200 7.077888e+09 1.150036e+04 8.404527e+01 1.207538e+06 1.388786e+10 105 a7cdf15b 44236800 1.415578e+10 2.300072e+04 1.680905e+02 2.415076e+06 5.555144e+10 105 24c84a50 11059200 1.769472e+09 2.875090e+03 2.101132e+01 3.018845e+05 8.679912e+08 105 4220e23d 14745600 2.097152e+09 5.579034e+03 3.672012e+02 5.857985e+05 3.282348e+09 105 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb2) # number of entries 8 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0b0b0ce8 3686400 2.621440e+08 6.181769e+02 5.174143e+01 6.181769e+04 3.848198e+07 100 492beed5 33177600 7.077888e+09 1.148096e+04 7.289415e+01 1.205501e+06 1.384086e+10 105 9c6670ef 29491200 7.077888e+09 1.148096e+04 7.289415e+01 1.205501e+06 1.384086e+10 105 c00cf6b7 29491200 7.077888e+09 1.148096e+04 7.289415e+01 1.205501e+06 1.384086e+10 105 78a2cc08 29491200 7.077888e+09 1.148096e+04 7.289415e+01 1.205501e+06 1.384086e+10 105 a7cdf15b 44236800 1.415578e+10 2.296192e+04 1.457883e+02 2.411002e+06 5.536344e+10 105 24c84a50 11059200 1.769472e+09 2.870240e+03 1.822354e+01 3.013752e+05 8.650538e+08 105 4220e23d 14745600 2.097152e+09 5.580581e+03 3.970717e+02 5.859610e+05 3.286558e+09 105 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_sgemm_gemm.sirocco000066400000000000000000000203061507764646700302530ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 9 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 33177600 7.077888e+09 2.745578e+03 3.064191e+02 6.616844e+05 1.839335e+09 241 9c6670ef 29491200 7.077888e+09 2.745578e+03 3.064191e+02 6.616844e+05 1.839335e+09 241 c00cf6b7 29491200 7.077888e+09 2.745578e+03 3.064191e+02 6.616844e+05 1.839335e+09 241 78a2cc08 29491200 7.077888e+09 2.745578e+03 3.064191e+02 6.616844e+05 1.839335e+09 241 a7cdf15b 44236800 1.415578e+10 5.491156e+03 6.128382e+02 1.323369e+06 7.357340e+09 241 24c84a50 11059200 1.769472e+09 6.863945e+02 7.660478e+01 1.654211e+05 1.149584e+08 241 0b0b0ce8 3686400 2.621440e+08 1.582927e+02 3.333442e+01 3.434951e+04 5.678402e+06 217 4220e23d 14745600 2.097152e+09 8.206871e+02 1.017181e+02 1.148962e+05 9.574235e+07 140 87a7dc42 74649600 2.388787e+10 9.813897e+03 7.998509e+02 1.570224e+06 1.551237e+10 160 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb1) # number of entries 9 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 33177600 7.077888e+09 2.686428e+03 2.002215e+02 6.716071e+05 1.814247e+09 250 9c6670ef 29491200 7.077888e+09 2.686428e+03 2.002215e+02 6.716071e+05 1.814247e+09 250 c00cf6b7 29491200 7.077888e+09 2.686428e+03 2.002215e+02 6.716071e+05 1.814247e+09 250 78a2cc08 29491200 7.077888e+09 2.686428e+03 2.002215e+02 6.716071e+05 1.814247e+09 250 a7cdf15b 44236800 1.415578e+10 5.372856e+03 4.004430e+02 1.343214e+06 7.256988e+09 251 24c84a50 11059200 1.769472e+09 6.716070e+02 5.005537e+01 1.679018e+05 1.133904e+08 250 0b0b0ce8 3686400 2.621440e+08 1.630480e+02 3.438768e+01 3.097912e+04 5.275762e+06 190 4220e23d 14745600 2.097152e+09 8.448030e+02 7.773742e+01 2.433033e+05 2.072837e+08 288 87a7dc42 74649600 2.388787e+10 9.873153e+03 8.026227e+02 1.579704e+06 1.569974e+10 160 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb0) # number of entries 9 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 33177600 7.077888e+09 2.791098e+03 3.147711e+02 6.503258e+05 1.838209e+09 233 9c6670ef 29491200 7.077888e+09 2.791098e+03 3.147711e+02 6.503258e+05 1.838209e+09 233 c00cf6b7 29491200 7.077888e+09 2.791098e+03 3.147711e+02 6.503258e+05 1.838209e+09 233 78a2cc08 29491200 7.077888e+09 2.791098e+03 3.147711e+02 6.503258e+05 1.838209e+09 233 a7cdf15b 44236800 1.415578e+10 5.582196e+03 6.295422e+02 1.300652e+06 7.352836e+09 233 24c84a50 11059200 1.769472e+09 6.977745e+02 7.869277e+01 1.625815e+05 1.148881e+08 233 0b0b0ce8 3686400 2.621440e+08 1.624855e+02 3.298013e+01 2.940987e+04 4.975550e+06 181 4220e23d 14745600 2.097152e+09 8.152506e+02 1.017614e+02 1.173961e+05 9.719839e+07 144 87a7dc42 74649600 2.388787e+10 1.001360e+04 7.827579e+02 1.582149e+06 1.593981e+10 158 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 9 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 33177600 7.077888e+09 2.754203e+03 2.682327e+02 6.830422e+05 1.899080e+09 248 9c6670ef 29491200 7.077888e+09 2.754203e+03 2.682327e+02 6.830422e+05 1.899080e+09 248 c00cf6b7 29491200 7.077888e+09 2.754203e+03 2.682327e+02 6.830422e+05 1.899080e+09 248 78a2cc08 29491200 7.077888e+09 2.754203e+03 2.682327e+02 6.830422e+05 1.899080e+09 248 a7cdf15b 44236800 1.415578e+10 5.508406e+03 5.364654e+02 1.366084e+06 7.596320e+09 248 24c84a50 11059200 1.769472e+09 6.885507e+02 6.705818e+01 1.707605e+05 1.186925e+08 248 0b0b0ce8 3686400 2.621440e+08 1.622246e+02 3.553894e+01 3.714942e+04 6.315779e+06 229 4220e23d 14745600 2.097152e+09 8.611626e+02 9.290485e+01 2.411255e+05 2.100651e+08 280 87a7dc42 74649600 2.388787e+10 9.935915e+03 7.366769e+02 1.569875e+06 1.568389e+10 158 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 9 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 492beed5 33177600 7.077888e+09 1.712078e+05 4.163047e+04 2.773567e+07 5.029326e+12 162 9c6670ef 29491200 7.077888e+09 1.712078e+05 4.163047e+04 2.773567e+07 5.029326e+12 162 c00cf6b7 29491200 7.077888e+09 1.712078e+05 4.163047e+04 2.773567e+07 5.029326e+12 162 78a2cc08 29491200 7.077888e+09 1.712078e+05 4.163047e+04 2.773567e+07 5.029326e+12 162 a7cdf15b 44236800 1.415578e+10 3.424156e+05 8.326094e+04 5.547134e+07 2.011730e+13 162 24c84a50 11059200 1.769472e+09 4.280195e+04 1.040762e+04 6.933918e+06 3.143329e+11 162 0b0b0ce8 3686400 2.621440e+08 6.441655e+03 1.152866e+03 3.220827e+05 2.141201e+09 50 4220e23d 14745600 2.097152e+09 4.927734e+04 1.166029e+04 5.913281e+06 3.077063e+11 120 87a7dc42 74649600 2.388787e+10 5.091210e+05 1.022002e+05 6.974957e+07 3.694192e+13 137 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.attila000066400000000000000000000100431507764646700314400ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 2.587052e+04 1.487038e+03 5.386241e+07 1.398052e+12 2082 24c84a50 11059200 0.000000e+00 8.218890e+04 3.347888e+03 1.244340e+08 1.024406e+13 1514 d46431bb 1228800 0.000000e+00 3.265838e+03 1.561177e+02 8.347482e+06 2.732382e+10 2556 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.047163e+02 4.943457e+01 7.022408e+06 6.372255e+09 7762 24c84a50 11059200 0.000000e+00 2.963966e+03 7.453353e+01 1.530888e+07 4.540369e+10 5165 d46431bb 1228800 0.000000e+00 1.924610e+02 1.043827e+01 8.556817e+05 1.651698e+08 4446 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.810829e+02 4.167975e+01 6.874209e+06 6.070301e+09 7802 24c84a50 11059200 0.000000e+00 2.960803e+03 8.260112e+01 1.519780e+07 4.503271e+10 5133 d46431bb 1228800 0.000000e+00 1.894698e+02 9.561378e+00 8.340462e+05 1.584290e+08 4402 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.953024e+02 5.096374e+01 6.835634e+06 6.139790e+09 7635 24c84a50 11059200 0.000000e+00 2.963787e+03 5.048433e+01 1.524275e+07 4.518938e+10 5143 d46431bb 1228800 0.000000e+00 1.803248e+02 8.617192e+00 8.859357e+05 1.601210e+08 4913 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal000066400000000000000000000040251507764646700317410ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.hannibal-pitch000066400000000000000000000040251507764646700330460ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.idgraf000066400000000000000000000217061507764646700314260ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 3.393927e+03 8.566524e+01 3.533078e+06 1.199865e+10 1041 f0ac7beb 4915200 0.000000e+00 2.682238e+04 4.332821e+02 9.951104e+06 2.669820e+11 371 24c84a50 11059200 0.000000e+00 8.930213e+04 1.450773e+03 2.679064e+07 2.393092e+12 300 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.946363e+02 2.537099e+01 6.294539e+05 1.245963e+08 3234 f0ac7beb 4915200 0.000000e+00 9.257288e+02 6.590058e+01 3.791785e+06 3.527953e+09 4096 24c84a50 11059200 0.000000e+00 2.991139e+03 1.645886e+02 1.221282e+07 3.664085e+10 4083 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.954243e+02 2.831383e+01 5.661443e+05 1.129608e+08 2897 f0ac7beb 4915200 0.000000e+00 9.376794e+02 7.341921e+01 3.415966e+06 3.222718e+09 3643 24c84a50 11059200 0.000000e+00 2.995872e+03 1.614697e+02 1.133938e+07 3.407000e+10 3785 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.867261e+02 2.556099e+01 5.342234e+05 1.016227e+08 2861 f0ac7beb 4915200 0.000000e+00 8.996740e+02 6.639270e+01 3.427758e+06 3.100659e+09 3810 24c84a50 11059200 0.000000e+00 2.987519e+03 1.530428e+02 1.113747e+07 3.336072e+10 3728 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.927028e+02 2.478568e+01 6.783137e+05 1.328754e+08 3520 f0ac7beb 4915200 0.000000e+00 9.234475e+02 6.432680e+01 3.846159e+06 3.568960e+09 4165 24c84a50 11059200 0.000000e+00 2.982449e+03 1.542480e+02 1.210278e+07 3.619247e+10 4058 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.868734e+02 2.558187e+01 5.049318e+05 9.612659e+07 2702 f0ac7beb 4915200 0.000000e+00 9.407115e+02 6.874274e+01 3.317889e+06 3.137844e+09 3527 24c84a50 11059200 0.000000e+00 2.972987e+03 1.569773e+02 1.177600e+07 3.510750e+10 3961 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.924732e+02 2.459364e+01 6.245755e+05 1.221768e+08 3245 f0ac7beb 4915200 0.000000e+00 9.173887e+02 7.039530e+01 3.781476e+06 3.489510e+09 4122 24c84a50 11059200 0.000000e+00 3.001859e+03 1.612679e+02 1.156916e+07 3.482922e+10 3854 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.877972e+02 2.764994e+01 5.324050e+05 1.021516e+08 2835 f0ac7beb 4915200 0.000000e+00 9.245688e+02 6.946750e+01 3.363581e+06 3.127419e+09 3638 24c84a50 11059200 0.000000e+00 3.005524e+03 1.690713e+02 1.154422e+07 3.480621e+10 3841 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.865351e+02 2.381101e+01 6.651841e+05 1.261020e+08 3566 f0ac7beb 4915200 0.000000e+00 9.257403e+02 6.896157e+01 3.669635e+06 3.415980e+09 3964 24c84a50 11059200 0.000000e+00 3.007743e+03 1.477912e+02 1.238889e+07 3.735258e+10 4119 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.mirage000066400000000000000000000100421507764646700314250ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 2.645658e+04 4.968429e+02 5.820449e+07 1.540435e+12 2200 24c84a50 11059200 0.000000e+00 8.756135e+04 9.752924e+02 1.866808e+08 1.634805e+13 2132 d46431bb 1228800 0.000000e+00 3.234444e+03 8.877025e+01 1.325799e+07 4.291452e+10 4099 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.760921e+02 3.574580e+01 7.074444e+06 6.208182e+09 8075 24c84a50 11059200 0.000000e+00 2.988744e+03 8.136061e+01 2.363499e+07 7.069126e+10 7908 d46431bb 1228800 0.000000e+00 1.911930e+02 1.434147e+01 1.248108e+06 2.399722e+08 6528 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.198175e+02 4.677043e+01 6.931745e+06 6.392425e+09 7536 24c84a50 11059200 0.000000e+00 3.016176e+03 6.737054e+01 2.311597e+07 6.975663e+10 7664 d46431bb 1228800 0.000000e+00 1.910500e+02 1.400155e+01 1.317099e+06 2.529832e+08 6894 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.143628e+02 4.685332e+01 6.720566e+06 6.161171e+09 7350 24c84a50 11059200 0.000000e+00 3.002393e+03 6.861698e+01 2.339765e+07 7.028562e+10 7793 d46431bb 1228800 0.000000e+00 1.898967e+02 1.421585e+01 1.327568e+06 2.535136e+08 6991 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm.sirocco000066400000000000000000000130661507764646700316330ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 1.164877e+05 2.576301e+04 2.054842e+08 2.510721e+13 1764 f0ac7beb 4915200 0.000000e+00 1.087142e+04 2.109400e+03 2.505863e+07 2.826792e+11 2305 d46431bb 1228800 0.000000e+00 1.613402e+03 3.115535e+02 8.438094e+06 1.412169e+10 5230 24c84a50 11059200 0.000000e+00 3.517390e+04 7.045528e+03 6.925741e+07 2.533794e+12 1969 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.688252e+03 2.597845e+02 1.459721e+07 3.960743e+10 5430 f0ac7beb 4915200 0.000000e+00 2.657700e+02 2.996380e+01 1.356225e+06 3.650255e+08 5103 d46431bb 1228800 0.000000e+00 6.142508e+01 1.012391e+01 4.393736e+05 2.772170e+07 7153 24c84a50 11059200 0.000000e+00 7.851775e+02 4.684799e+01 4.315336e+06 3.400367e+09 5496 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.707789e+03 2.773178e+02 1.421860e+07 3.890480e+10 5251 f0ac7beb 4915200 0.000000e+00 2.693001e+02 2.710216e+01 1.308798e+06 3.560293e+08 4860 d46431bb 1228800 0.000000e+00 6.592485e+01 1.426453e+01 1.071279e+05 7.393038e+06 1625 24c84a50 11059200 0.000000e+00 7.926860e+02 4.760061e+01 4.363736e+06 3.471546e+09 5505 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.706383e+03 2.631153e+02 1.444938e+07 3.947516e+10 5339 f0ac7beb 4915200 0.000000e+00 2.686331e+02 2.912062e+01 1.401996e+06 3.810483e+08 5219 d46431bb 1228800 0.000000e+00 6.317490e+01 1.087216e+01 2.866877e+05 1.864788e+07 4538 24c84a50 11059200 0.000000e+00 7.922324e+02 5.091772e+01 4.156844e+06 3.306790e+09 5247 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.681149e+03 2.665822e+02 1.451306e+07 3.929636e+10 5413 f0ac7beb 4915200 0.000000e+00 2.642224e+02 2.666799e+01 1.450317e+06 3.871098e+08 5489 d46431bb 1228800 0.000000e+00 5.975719e+01 9.345113e+00 4.033610e+05 2.469321e+07 6750 24c84a50 11059200 0.000000e+00 7.867204e+02 4.699968e+01 4.148377e+06 3.275261e+09 5273 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.attila000066400000000000000000000100431507764646700326240ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 2.587052e+04 1.487038e+03 5.386241e+07 1.398052e+12 2082 24c84a50 11059200 0.000000e+00 8.218890e+04 3.347888e+03 1.244340e+08 1.024406e+13 1514 d46431bb 1228800 0.000000e+00 3.265838e+03 1.561177e+02 8.347482e+06 2.732382e+10 2556 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.047163e+02 4.943457e+01 7.022408e+06 6.372255e+09 7762 24c84a50 11059200 0.000000e+00 2.963966e+03 7.453353e+01 1.530888e+07 4.540369e+10 5165 d46431bb 1228800 0.000000e+00 1.924610e+02 1.043827e+01 8.556817e+05 1.651698e+08 4446 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.810829e+02 4.167975e+01 6.874209e+06 6.070301e+09 7802 24c84a50 11059200 0.000000e+00 2.960803e+03 8.260112e+01 1.519780e+07 4.503271e+10 5133 d46431bb 1228800 0.000000e+00 1.894698e+02 9.561378e+00 8.340462e+05 1.584290e+08 4402 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.953024e+02 5.096374e+01 6.835634e+06 6.139790e+09 7635 24c84a50 11059200 0.000000e+00 2.963787e+03 5.048433e+01 1.524275e+07 4.518938e+10 5143 d46431bb 1228800 0.000000e+00 1.803248e+02 8.617192e+00 8.859357e+05 1.601210e+08 4913 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.hannibal000066400000000000000000000040251507764646700331250ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 starpu_slu_lu_model_gemm_atlas.hannibal-pitch000066400000000000000000000040251507764646700341530ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.idgraf000066400000000000000000000217061507764646700326120ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 3.393927e+03 8.566524e+01 3.533078e+06 1.199865e+10 1041 f0ac7beb 4915200 0.000000e+00 2.682238e+04 4.332821e+02 9.951104e+06 2.669820e+11 371 24c84a50 11059200 0.000000e+00 8.930213e+04 1.450773e+03 2.679064e+07 2.393092e+12 300 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.946363e+02 2.537099e+01 6.294539e+05 1.245963e+08 3234 f0ac7beb 4915200 0.000000e+00 9.257288e+02 6.590058e+01 3.791785e+06 3.527953e+09 4096 24c84a50 11059200 0.000000e+00 2.991139e+03 1.645886e+02 1.221282e+07 3.664085e+10 4083 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.954243e+02 2.831383e+01 5.661443e+05 1.129608e+08 2897 f0ac7beb 4915200 0.000000e+00 9.376794e+02 7.341921e+01 3.415966e+06 3.222718e+09 3643 24c84a50 11059200 0.000000e+00 2.995872e+03 1.614697e+02 1.133938e+07 3.407000e+10 3785 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.867261e+02 2.556099e+01 5.342234e+05 1.016227e+08 2861 f0ac7beb 4915200 0.000000e+00 8.996740e+02 6.639270e+01 3.427758e+06 3.100659e+09 3810 24c84a50 11059200 0.000000e+00 2.987519e+03 1.530428e+02 1.113747e+07 3.336072e+10 3728 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.927028e+02 2.478568e+01 6.783137e+05 1.328754e+08 3520 f0ac7beb 4915200 0.000000e+00 9.234475e+02 6.432680e+01 3.846159e+06 3.568960e+09 4165 24c84a50 11059200 0.000000e+00 2.982449e+03 1.542480e+02 1.210278e+07 3.619247e+10 4058 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.868734e+02 2.558187e+01 5.049318e+05 9.612659e+07 2702 f0ac7beb 4915200 0.000000e+00 9.407115e+02 6.874274e+01 3.317889e+06 3.137844e+09 3527 24c84a50 11059200 0.000000e+00 2.972987e+03 1.569773e+02 1.177600e+07 3.510750e+10 3961 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.924732e+02 2.459364e+01 6.245755e+05 1.221768e+08 3245 f0ac7beb 4915200 0.000000e+00 9.173887e+02 7.039530e+01 3.781476e+06 3.489510e+09 4122 24c84a50 11059200 0.000000e+00 3.001859e+03 1.612679e+02 1.156916e+07 3.482922e+10 3854 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.877972e+02 2.764994e+01 5.324050e+05 1.021516e+08 2835 f0ac7beb 4915200 0.000000e+00 9.245688e+02 6.946750e+01 3.363581e+06 3.127419e+09 3638 24c84a50 11059200 0.000000e+00 3.005524e+03 1.690713e+02 1.154422e+07 3.480621e+10 3841 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.865351e+02 2.381101e+01 6.651841e+05 1.261020e+08 3566 f0ac7beb 4915200 0.000000e+00 9.257403e+02 6.896157e+01 3.669635e+06 3.415980e+09 3964 24c84a50 11059200 0.000000e+00 3.007743e+03 1.477912e+02 1.238889e+07 3.735258e+10 4119 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.mirage000066400000000000000000000100421507764646700326110ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 2.645658e+04 4.968429e+02 5.820449e+07 1.540435e+12 2200 24c84a50 11059200 0.000000e+00 8.756135e+04 9.752924e+02 1.866808e+08 1.634805e+13 2132 d46431bb 1228800 0.000000e+00 3.234444e+03 8.877025e+01 1.325799e+07 4.291452e+10 4099 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.760921e+02 3.574580e+01 7.074444e+06 6.208182e+09 8075 24c84a50 11059200 0.000000e+00 2.988744e+03 8.136061e+01 2.363499e+07 7.069126e+10 7908 d46431bb 1228800 0.000000e+00 1.911930e+02 1.434147e+01 1.248108e+06 2.399722e+08 6528 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.198175e+02 4.677043e+01 6.931745e+06 6.392425e+09 7536 24c84a50 11059200 0.000000e+00 3.016176e+03 6.737054e+01 2.311597e+07 6.975663e+10 7664 d46431bb 1228800 0.000000e+00 1.910500e+02 1.400155e+01 1.317099e+06 2.529832e+08 6894 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.143628e+02 4.685332e+01 6.720566e+06 6.161171e+09 7350 24c84a50 11059200 0.000000e+00 3.002393e+03 6.861698e+01 2.339765e+07 7.028562e+10 7793 d46431bb 1228800 0.000000e+00 1.898967e+02 1.421585e+01 1.327568e+06 2.535136e+08 6991 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_atlas.sirocco000066400000000000000000000130661507764646700330170ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 1.164877e+05 2.576301e+04 2.054842e+08 2.510721e+13 1764 f0ac7beb 4915200 0.000000e+00 1.087142e+04 2.109400e+03 2.505863e+07 2.826792e+11 2305 d46431bb 1228800 0.000000e+00 1.613402e+03 3.115535e+02 8.438094e+06 1.412169e+10 5230 24c84a50 11059200 0.000000e+00 3.517390e+04 7.045528e+03 6.925741e+07 2.533794e+12 1969 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.688252e+03 2.597845e+02 1.459721e+07 3.960743e+10 5430 f0ac7beb 4915200 0.000000e+00 2.657700e+02 2.996380e+01 1.356225e+06 3.650255e+08 5103 d46431bb 1228800 0.000000e+00 6.142508e+01 1.012391e+01 4.393736e+05 2.772170e+07 7153 24c84a50 11059200 0.000000e+00 7.851775e+02 4.684799e+01 4.315336e+06 3.400367e+09 5496 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.707789e+03 2.773178e+02 1.421860e+07 3.890480e+10 5251 f0ac7beb 4915200 0.000000e+00 2.693001e+02 2.710216e+01 1.308798e+06 3.560293e+08 4860 d46431bb 1228800 0.000000e+00 6.592485e+01 1.426453e+01 1.071279e+05 7.393038e+06 1625 24c84a50 11059200 0.000000e+00 7.926860e+02 4.760061e+01 4.363736e+06 3.471546e+09 5505 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.706383e+03 2.631153e+02 1.444938e+07 3.947516e+10 5339 f0ac7beb 4915200 0.000000e+00 2.686331e+02 2.912062e+01 1.401996e+06 3.810483e+08 5219 d46431bb 1228800 0.000000e+00 6.317490e+01 1.087216e+01 2.866877e+05 1.864788e+07 4538 24c84a50 11059200 0.000000e+00 7.922324e+02 5.091772e+01 4.156844e+06 3.306790e+09 5247 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.681149e+03 2.665822e+02 1.451306e+07 3.929636e+10 5413 f0ac7beb 4915200 0.000000e+00 2.642224e+02 2.666799e+01 1.450317e+06 3.871098e+08 5489 d46431bb 1228800 0.000000e+00 5.975719e+01 9.345113e+00 4.033610e+05 2.469321e+07 6750 24c84a50 11059200 0.000000e+00 7.867204e+02 4.699968e+01 4.148377e+06 3.275261e+09 5273 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.attila000066400000000000000000000100431507764646700324700ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 2.587052e+04 1.487038e+03 5.386241e+07 1.398052e+12 2082 24c84a50 11059200 0.000000e+00 8.218890e+04 3.347888e+03 1.244340e+08 1.024406e+13 1514 d46431bb 1228800 0.000000e+00 3.265838e+03 1.561177e+02 8.347482e+06 2.732382e+10 2556 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.047163e+02 4.943457e+01 7.022408e+06 6.372255e+09 7762 24c84a50 11059200 0.000000e+00 2.963966e+03 7.453353e+01 1.530888e+07 4.540369e+10 5165 d46431bb 1228800 0.000000e+00 1.924610e+02 1.043827e+01 8.556817e+05 1.651698e+08 4446 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.810829e+02 4.167975e+01 6.874209e+06 6.070301e+09 7802 24c84a50 11059200 0.000000e+00 2.960803e+03 8.260112e+01 1.519780e+07 4.503271e+10 5133 d46431bb 1228800 0.000000e+00 1.894698e+02 9.561378e+00 8.340462e+05 1.584290e+08 4402 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.953024e+02 5.096374e+01 6.835634e+06 6.139790e+09 7635 24c84a50 11059200 0.000000e+00 2.963787e+03 5.048433e+01 1.524275e+07 4.518938e+10 5143 d46431bb 1228800 0.000000e+00 1.803248e+02 8.617192e+00 8.859357e+05 1.601210e+08 4913 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal000066400000000000000000000040251507764646700327710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.hannibal-pitch000066400000000000000000000040251507764646700340760ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.idgraf000066400000000000000000000217061507764646700324560ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 3.393927e+03 8.566524e+01 3.533078e+06 1.199865e+10 1041 f0ac7beb 4915200 0.000000e+00 2.682238e+04 4.332821e+02 9.951104e+06 2.669820e+11 371 24c84a50 11059200 0.000000e+00 8.930213e+04 1.450773e+03 2.679064e+07 2.393092e+12 300 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.946363e+02 2.537099e+01 6.294539e+05 1.245963e+08 3234 f0ac7beb 4915200 0.000000e+00 9.257288e+02 6.590058e+01 3.791785e+06 3.527953e+09 4096 24c84a50 11059200 0.000000e+00 2.991139e+03 1.645886e+02 1.221282e+07 3.664085e+10 4083 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.954243e+02 2.831383e+01 5.661443e+05 1.129608e+08 2897 f0ac7beb 4915200 0.000000e+00 9.376794e+02 7.341921e+01 3.415966e+06 3.222718e+09 3643 24c84a50 11059200 0.000000e+00 2.995872e+03 1.614697e+02 1.133938e+07 3.407000e+10 3785 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.867261e+02 2.556099e+01 5.342234e+05 1.016227e+08 2861 f0ac7beb 4915200 0.000000e+00 8.996740e+02 6.639270e+01 3.427758e+06 3.100659e+09 3810 24c84a50 11059200 0.000000e+00 2.987519e+03 1.530428e+02 1.113747e+07 3.336072e+10 3728 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.927028e+02 2.478568e+01 6.783137e+05 1.328754e+08 3520 f0ac7beb 4915200 0.000000e+00 9.234475e+02 6.432680e+01 3.846159e+06 3.568960e+09 4165 24c84a50 11059200 0.000000e+00 2.982449e+03 1.542480e+02 1.210278e+07 3.619247e+10 4058 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.868734e+02 2.558187e+01 5.049318e+05 9.612659e+07 2702 f0ac7beb 4915200 0.000000e+00 9.407115e+02 6.874274e+01 3.317889e+06 3.137844e+09 3527 24c84a50 11059200 0.000000e+00 2.972987e+03 1.569773e+02 1.177600e+07 3.510750e+10 3961 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.924732e+02 2.459364e+01 6.245755e+05 1.221768e+08 3245 f0ac7beb 4915200 0.000000e+00 9.173887e+02 7.039530e+01 3.781476e+06 3.489510e+09 4122 24c84a50 11059200 0.000000e+00 3.001859e+03 1.612679e+02 1.156916e+07 3.482922e+10 3854 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.877972e+02 2.764994e+01 5.324050e+05 1.021516e+08 2835 f0ac7beb 4915200 0.000000e+00 9.245688e+02 6.946750e+01 3.363581e+06 3.127419e+09 3638 24c84a50 11059200 0.000000e+00 3.005524e+03 1.690713e+02 1.154422e+07 3.480621e+10 3841 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.865351e+02 2.381101e+01 6.651841e+05 1.261020e+08 3566 f0ac7beb 4915200 0.000000e+00 9.257403e+02 6.896157e+01 3.669635e+06 3.415980e+09 3964 24c84a50 11059200 0.000000e+00 3.007743e+03 1.477912e+02 1.238889e+07 3.735258e+10 4119 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.mirage000066400000000000000000000100421507764646700324550ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 2.645658e+04 4.968429e+02 5.820449e+07 1.540435e+12 2200 24c84a50 11059200 0.000000e+00 8.756135e+04 9.752924e+02 1.866808e+08 1.634805e+13 2132 d46431bb 1228800 0.000000e+00 3.234444e+03 8.877025e+01 1.325799e+07 4.291452e+10 4099 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.760921e+02 3.574580e+01 7.074444e+06 6.208182e+09 8075 24c84a50 11059200 0.000000e+00 2.988744e+03 8.136061e+01 2.363499e+07 7.069126e+10 7908 d46431bb 1228800 0.000000e+00 1.911930e+02 1.434147e+01 1.248108e+06 2.399722e+08 6528 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.198175e+02 4.677043e+01 6.931745e+06 6.392425e+09 7536 24c84a50 11059200 0.000000e+00 3.016176e+03 6.737054e+01 2.311597e+07 6.975663e+10 7664 d46431bb 1228800 0.000000e+00 1.910500e+02 1.400155e+01 1.317099e+06 2.529832e+08 6894 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.143628e+02 4.685332e+01 6.720566e+06 6.161171e+09 7350 24c84a50 11059200 0.000000e+00 3.002393e+03 6.861698e+01 2.339765e+07 7.028562e+10 7793 d46431bb 1228800 0.000000e+00 1.898967e+02 1.421585e+01 1.327568e+06 2.535136e+08 6991 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_goto.sirocco000066400000000000000000000130661507764646700326630ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 1.164877e+05 2.576301e+04 2.054842e+08 2.510721e+13 1764 f0ac7beb 4915200 0.000000e+00 1.087142e+04 2.109400e+03 2.505863e+07 2.826792e+11 2305 d46431bb 1228800 0.000000e+00 1.613402e+03 3.115535e+02 8.438094e+06 1.412169e+10 5230 24c84a50 11059200 0.000000e+00 3.517390e+04 7.045528e+03 6.925741e+07 2.533794e+12 1969 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.688252e+03 2.597845e+02 1.459721e+07 3.960743e+10 5430 f0ac7beb 4915200 0.000000e+00 2.657700e+02 2.996380e+01 1.356225e+06 3.650255e+08 5103 d46431bb 1228800 0.000000e+00 6.142508e+01 1.012391e+01 4.393736e+05 2.772170e+07 7153 24c84a50 11059200 0.000000e+00 7.851775e+02 4.684799e+01 4.315336e+06 3.400367e+09 5496 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.707789e+03 2.773178e+02 1.421860e+07 3.890480e+10 5251 f0ac7beb 4915200 0.000000e+00 2.693001e+02 2.710216e+01 1.308798e+06 3.560293e+08 4860 d46431bb 1228800 0.000000e+00 6.592485e+01 1.426453e+01 1.071279e+05 7.393038e+06 1625 24c84a50 11059200 0.000000e+00 7.926860e+02 4.760061e+01 4.363736e+06 3.471546e+09 5505 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.706383e+03 2.631153e+02 1.444938e+07 3.947516e+10 5339 f0ac7beb 4915200 0.000000e+00 2.686331e+02 2.912062e+01 1.401996e+06 3.810483e+08 5219 d46431bb 1228800 0.000000e+00 6.317490e+01 1.087216e+01 2.866877e+05 1.864788e+07 4538 24c84a50 11059200 0.000000e+00 7.922324e+02 5.091772e+01 4.156844e+06 3.306790e+09 5247 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.681149e+03 2.665822e+02 1.451306e+07 3.929636e+10 5413 f0ac7beb 4915200 0.000000e+00 2.642224e+02 2.666799e+01 1.450317e+06 3.871098e+08 5489 d46431bb 1228800 0.000000e+00 5.975719e+01 9.345113e+00 4.033610e+05 2.469321e+07 6750 24c84a50 11059200 0.000000e+00 7.867204e+02 4.699968e+01 4.148377e+06 3.275261e+09 5273 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.attila000066400000000000000000000100431507764646700333230ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 2.587052e+04 1.487038e+03 5.386241e+07 1.398052e+12 2082 24c84a50 11059200 0.000000e+00 8.218890e+04 3.347888e+03 1.244340e+08 1.024406e+13 1514 d46431bb 1228800 0.000000e+00 3.265838e+03 1.561177e+02 8.347482e+06 2.732382e+10 2556 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.047163e+02 4.943457e+01 7.022408e+06 6.372255e+09 7762 24c84a50 11059200 0.000000e+00 2.963966e+03 7.453353e+01 1.530888e+07 4.540369e+10 5165 d46431bb 1228800 0.000000e+00 1.924610e+02 1.043827e+01 8.556817e+05 1.651698e+08 4446 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.810829e+02 4.167975e+01 6.874209e+06 6.070301e+09 7802 24c84a50 11059200 0.000000e+00 2.960803e+03 8.260112e+01 1.519780e+07 4.503271e+10 5133 d46431bb 1228800 0.000000e+00 1.894698e+02 9.561378e+00 8.340462e+05 1.584290e+08 4402 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.953024e+02 5.096374e+01 6.835634e+06 6.139790e+09 7635 24c84a50 11059200 0.000000e+00 2.963787e+03 5.048433e+01 1.524275e+07 4.518938e+10 5143 d46431bb 1228800 0.000000e+00 1.803248e+02 8.617192e+00 8.859357e+05 1.601210e+08 4913 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.hannibal000066400000000000000000000040251507764646700336240ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 starpu_slu_lu_model_gemm_openblas.hannibal-pitch000066400000000000000000000040251507764646700346520ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.116253e+03 1.361494e+03 9.170526e+08 5.024130e+12 179243 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.228920e+03 1.967478e+03 8.761527e+08 5.229949e+12 167559 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 24c84a50 11059200 0.000000e+00 5.131691e+03 1.494139e+03 8.920059e+08 4.965550e+12 173823 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.idgraf000066400000000000000000000217061507764646700333110ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 3.393927e+03 8.566524e+01 3.533078e+06 1.199865e+10 1041 f0ac7beb 4915200 0.000000e+00 2.682238e+04 4.332821e+02 9.951104e+06 2.669820e+11 371 24c84a50 11059200 0.000000e+00 8.930213e+04 1.450773e+03 2.679064e+07 2.393092e+12 300 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.946363e+02 2.537099e+01 6.294539e+05 1.245963e+08 3234 f0ac7beb 4915200 0.000000e+00 9.257288e+02 6.590058e+01 3.791785e+06 3.527953e+09 4096 24c84a50 11059200 0.000000e+00 2.991139e+03 1.645886e+02 1.221282e+07 3.664085e+10 4083 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.954243e+02 2.831383e+01 5.661443e+05 1.129608e+08 2897 f0ac7beb 4915200 0.000000e+00 9.376794e+02 7.341921e+01 3.415966e+06 3.222718e+09 3643 24c84a50 11059200 0.000000e+00 2.995872e+03 1.614697e+02 1.133938e+07 3.407000e+10 3785 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.867261e+02 2.556099e+01 5.342234e+05 1.016227e+08 2861 f0ac7beb 4915200 0.000000e+00 8.996740e+02 6.639270e+01 3.427758e+06 3.100659e+09 3810 24c84a50 11059200 0.000000e+00 2.987519e+03 1.530428e+02 1.113747e+07 3.336072e+10 3728 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.927028e+02 2.478568e+01 6.783137e+05 1.328754e+08 3520 f0ac7beb 4915200 0.000000e+00 9.234475e+02 6.432680e+01 3.846159e+06 3.568960e+09 4165 24c84a50 11059200 0.000000e+00 2.982449e+03 1.542480e+02 1.210278e+07 3.619247e+10 4058 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.868734e+02 2.558187e+01 5.049318e+05 9.612659e+07 2702 f0ac7beb 4915200 0.000000e+00 9.407115e+02 6.874274e+01 3.317889e+06 3.137844e+09 3527 24c84a50 11059200 0.000000e+00 2.972987e+03 1.569773e+02 1.177600e+07 3.510750e+10 3961 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.924732e+02 2.459364e+01 6.245755e+05 1.221768e+08 3245 f0ac7beb 4915200 0.000000e+00 9.173887e+02 7.039530e+01 3.781476e+06 3.489510e+09 4122 24c84a50 11059200 0.000000e+00 3.001859e+03 1.612679e+02 1.156916e+07 3.482922e+10 3854 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.877972e+02 2.764994e+01 5.324050e+05 1.021516e+08 2835 f0ac7beb 4915200 0.000000e+00 9.245688e+02 6.946750e+01 3.363581e+06 3.127419e+09 3638 24c84a50 11059200 0.000000e+00 3.005524e+03 1.690713e+02 1.154422e+07 3.480621e+10 3841 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d46431bb 1228800 0.000000e+00 1.865351e+02 2.381101e+01 6.651841e+05 1.261020e+08 3566 f0ac7beb 4915200 0.000000e+00 9.257403e+02 6.896157e+01 3.669635e+06 3.415980e+09 3964 24c84a50 11059200 0.000000e+00 3.007743e+03 1.477912e+02 1.238889e+07 3.735258e+10 4119 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.mirage000066400000000000000000000100421507764646700333100ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 2.645658e+04 4.968429e+02 5.820449e+07 1.540435e+12 2200 24c84a50 11059200 0.000000e+00 8.756135e+04 9.752924e+02 1.866808e+08 1.634805e+13 2132 d46431bb 1228800 0.000000e+00 3.234444e+03 8.877025e+01 1.325799e+07 4.291452e+10 4099 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 8.760921e+02 3.574580e+01 7.074444e+06 6.208182e+09 8075 24c84a50 11059200 0.000000e+00 2.988744e+03 8.136061e+01 2.363499e+07 7.069126e+10 7908 d46431bb 1228800 0.000000e+00 1.911930e+02 1.434147e+01 1.248108e+06 2.399722e+08 6528 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.198175e+02 4.677043e+01 6.931745e+06 6.392425e+09 7536 24c84a50 11059200 0.000000e+00 3.016176e+03 6.737054e+01 2.311597e+07 6.975663e+10 7664 d46431bb 1228800 0.000000e+00 1.910500e+02 1.400155e+01 1.317099e+06 2.529832e+08 6894 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n f0ac7beb 4915200 0.000000e+00 9.143628e+02 4.685332e+01 6.720566e+06 6.161171e+09 7350 24c84a50 11059200 0.000000e+00 3.002393e+03 6.861698e+01 2.339765e+07 7.028562e+10 7793 d46431bb 1228800 0.000000e+00 1.898967e+02 1.421585e+01 1.327568e+06 2.535136e+08 6991 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_gemm_openblas.sirocco000066400000000000000000000130661507764646700335160ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 1.164877e+05 2.576301e+04 2.054842e+08 2.510721e+13 1764 f0ac7beb 4915200 0.000000e+00 1.087142e+04 2.109400e+03 2.505863e+07 2.826792e+11 2305 d46431bb 1228800 0.000000e+00 1.613402e+03 3.115535e+02 8.438094e+06 1.412169e+10 5230 24c84a50 11059200 0.000000e+00 3.517390e+04 7.045528e+03 6.925741e+07 2.533794e+12 1969 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.688252e+03 2.597845e+02 1.459721e+07 3.960743e+10 5430 f0ac7beb 4915200 0.000000e+00 2.657700e+02 2.996380e+01 1.356225e+06 3.650255e+08 5103 d46431bb 1228800 0.000000e+00 6.142508e+01 1.012391e+01 4.393736e+05 2.772170e+07 7153 24c84a50 11059200 0.000000e+00 7.851775e+02 4.684799e+01 4.315336e+06 3.400367e+09 5496 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.707789e+03 2.773178e+02 1.421860e+07 3.890480e+10 5251 f0ac7beb 4915200 0.000000e+00 2.693001e+02 2.710216e+01 1.308798e+06 3.560293e+08 4860 d46431bb 1228800 0.000000e+00 6.592485e+01 1.426453e+01 1.071279e+05 7.393038e+06 1625 24c84a50 11059200 0.000000e+00 7.926860e+02 4.760061e+01 4.363736e+06 3.471546e+09 5505 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.706383e+03 2.631153e+02 1.444938e+07 3.947516e+10 5339 f0ac7beb 4915200 0.000000e+00 2.686331e+02 2.912062e+01 1.401996e+06 3.810483e+08 5219 d46431bb 1228800 0.000000e+00 6.317490e+01 1.087216e+01 2.866877e+05 1.864788e+07 4538 24c84a50 11059200 0.000000e+00 7.922324e+02 5.091772e+01 4.156844e+06 3.306790e+09 5247 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 8cfc3ba0 24883200 0.000000e+00 2.681149e+03 2.665822e+02 1.451306e+07 3.929636e+10 5413 f0ac7beb 4915200 0.000000e+00 2.642224e+02 2.666799e+01 1.450317e+06 3.871098e+08 5489 d46431bb 1228800 0.000000e+00 5.975719e+01 9.345113e+00 4.033610e+05 2.469321e+07 6750 24c84a50 11059200 0.000000e+00 7.867204e+02 4.699968e+01 4.148377e+06 3.275261e+09 5273 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.attila000066400000000000000000000100121507764646700316160ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 4.182946e+04 4.195402e+03 1.171225e+06 4.948453e+10 28 617e5fe6 3686400 0.000000e+00 1.431791e+05 1.961610e+04 1.431791e+06 2.088506e+11 10 cea37d6d 409600 0.000000e+00 4.839229e+03 3.061560e+02 1.258200e+05 6.113086e+08 26 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.565619e+04 2.729977e+03 4.618114e+05 1.198247e+10 18 617e5fe6 3686400 0.000000e+00 5.517976e+04 5.023576e+03 8.828762e+05 4.912068e+10 16 cea37d6d 409600 0.000000e+00 9.325377e+03 4.741281e+02 9.325377e+04 8.718745e+08 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.512124e+04 2.223761e+03 4.773036e+05 1.208442e+10 19 617e5fe6 3686400 0.000000e+00 5.116041e+04 1.272422e+03 7.674062e+05 3.928511e+10 15 cea37d6d 409600 0.000000e+00 9.353760e+03 7.152342e+02 9.353760e+04 8.800438e+08 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.814234e+04 3.880171e+03 5.065622e+05 1.452685e+10 18 617e5fe6 3686400 0.000000e+00 5.467956e+04 6.741916e+03 8.201934e+05 4.552961e+10 15 cea37d6d 409600 0.000000e+00 1.004502e+04 9.839619e+02 1.004502e+05 1.018706e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal000066400000000000000000000040101507764646700321150ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.hannibal-pitch000066400000000000000000000040101507764646700332220ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.idgraf000066400000000000000000000216221507764646700316050ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 4.307978e+03 6.474305e+01 1.249314e+05 5.383232e+08 29 afdd228b 1638400 0.000000e+00 3.550524e+04 4.451382e+02 3.550524e+05 1.260821e+10 10 617e5fe6 3686400 0.000000e+00 1.169735e+05 9.368471e+02 1.169735e+06 1.368368e+11 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.140547e+04 1.799023e+03 1.140547e+05 1.333212e+09 10 afdd228b 1638400 0.000000e+00 2.728447e+04 8.307498e+02 2.728447e+05 7.451326e+09 10 617e5fe6 3686400 0.000000e+00 6.234962e+04 7.670296e+03 6.858458e+05 4.340939e+10 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.084154e+04 4.741973e+03 3.084154e+05 9.736872e+09 10 cea37d6d 409600 0.000000e+00 1.194801e+04 1.916839e+03 1.194801e+05 1.464291e+09 10 617e5fe6 3686400 0.000000e+00 6.590141e+04 1.170188e+04 6.590141e+05 4.479930e+10 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 7.169178e+04 1.134864e+04 7.886096e+05 5.795353e+10 11 cea37d6d 409600 0.000000e+00 1.144166e+04 1.161786e+03 1.144166e+05 1.322613e+09 10 afdd228b 1638400 0.000000e+00 2.872444e+04 2.010264e+03 3.159688e+05 9.120481e+09 11 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.150326e+04 1.434617e+03 1.150326e+05 1.343832e+09 10 afdd228b 1638400 0.000000e+00 3.088151e+04 4.858348e+03 3.088151e+05 9.772711e+09 10 617e5fe6 3686400 0.000000e+00 6.102500e+04 7.308309e+03 6.102500e+05 3.777463e+10 10 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 5.751510e+04 2.006299e+03 6.901812e+05 3.974415e+10 12 cea37d6d 409600 0.000000e+00 1.125363e+04 1.219431e+03 1.125363e+05 1.281312e+09 10 afdd228b 1638400 0.000000e+00 3.238968e+04 5.459084e+03 3.238968e+05 1.078893e+10 10 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.926764e+04 3.325362e+03 3.219440e+05 9.544181e+09 11 cea37d6d 409600 0.000000e+00 1.088648e+04 1.129883e+03 1.088648e+05 1.197920e+09 10 617e5fe6 3686400 0.000000e+00 6.506731e+04 1.183046e+04 8.458750e+05 5.685829e+10 13 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.775893e+04 1.476662e+03 3.331071e+05 9.272862e+09 12 cea37d6d 409600 0.000000e+00 1.026126e+04 8.160679e+01 1.026126e+05 1.053001e+09 10 617e5fe6 3686400 0.000000e+00 6.215917e+04 1.023772e+04 6.215917e+05 3.968573e+10 10 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.022286e+04 3.601879e+01 1.022286e+05 1.045081e+09 10 afdd228b 1638400 0.000000e+00 2.891317e+04 4.592264e+03 2.891317e+05 8.570604e+09 10 617e5fe6 3686400 0.000000e+00 5.724831e+04 3.045025e+03 7.442280e+05 4.272633e+10 13 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.mirage000066400000000000000000000100121507764646700316040ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.789658e+04 4.182352e+03 1.250587e+06 4.797021e+10 33 617e5fe6 3686400 0.000000e+00 1.286436e+05 1.271269e+04 2.958803e+06 3.843483e+11 23 cea37d6d 409600 0.000000e+00 4.236597e+03 2.366692e+02 2.372495e+05 1.008267e+09 56 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.864580e+04 3.233071e+03 5.156243e+05 1.495862e+10 18 617e5fe6 3686400 0.000000e+00 5.948740e+04 4.910517e+03 1.070773e+06 6.413154e+10 18 cea37d6d 409600 0.000000e+00 1.060245e+04 4.247968e+02 1.060245e+05 1.125924e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.046163e+04 4.754796e+03 5.483094e+05 1.710934e+10 18 617e5fe6 3686400 0.000000e+00 5.865963e+04 4.672589e+03 1.349171e+06 7.964405e+10 23 cea37d6d 409600 0.000000e+00 1.042618e+04 1.817032e+02 1.042618e+05 1.087383e+09 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.939722e+04 4.040622e+03 4.409582e+05 1.320784e+10 15 617e5fe6 3686400 0.000000e+00 5.704610e+04 3.429433e+03 1.255014e+06 7.185241e+10 22 cea37d6d 409600 0.000000e+00 1.049902e+04 4.776188e+02 1.049902e+05 1.104575e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf.sirocco000066400000000000000000000130161507764646700320100ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 4.111343e+05 7.639666e+04 4.111343e+06 1.748679e+12 10 afdd228b 1638400 0.000000e+00 2.923093e+04 1.278718e+03 5.553877e+05 1.626557e+10 19 cea37d6d 409600 0.000000e+00 4.037068e+03 3.335771e+02 2.906689e+05 1.181462e+09 72 617e5fe6 3686400 0.000000e+00 1.029624e+05 6.177928e+03 1.029624e+06 1.063943e+11 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 9.866251e+03 7.665217e+02 9.866251e+04 9.793047e+08 10 afdd228b 1638400 0.000000e+00 2.088164e+04 1.502169e+03 4.176328e+05 8.765989e+09 20 617e5fe6 3686400 0.000000e+00 4.153583e+04 9.473225e+02 9.968599e+05 4.142694e+10 24 25ebb669 8294400 0.000000e+00 9.378398e+04 2.901838e+03 1.594328e+06 1.496655e+11 17 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.434448e+04 6.197321e+03 2.075578e+06 1.966643e+11 22 afdd228b 1638400 0.000000e+00 2.242688e+04 2.707726e+03 3.139763e+05 7.144153e+09 14 cea37d6d 409600 0.000000e+00 9.238189e+03 1.713378e+02 9.238189e+04 8.537349e+08 10 617e5fe6 3686400 0.000000e+00 4.357190e+04 5.271768e+03 7.842942e+05 3.467343e+10 18 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.395404e+04 4.337001e+03 1.973035e+06 1.857696e+11 21 afdd228b 1638400 0.000000e+00 2.096495e+04 7.732458e+02 3.773690e+05 7.922284e+09 18 cea37d6d 409600 0.000000e+00 9.471831e+03 5.475075e+02 9.471831e+04 9.001535e+08 10 617e5fe6 3686400 0.000000e+00 4.647825e+04 9.283373e+03 5.577390e+05 2.695691e+10 12 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.896522e+04 1.438963e+04 1.187583e+06 1.200141e+11 12 afdd228b 1638400 0.000000e+00 2.172039e+04 1.567348e+03 2.823650e+05 6.165013e+09 13 cea37d6d 409600 0.000000e+00 9.338877e+03 3.249828e+02 9.338877e+04 8.732025e+08 10 617e5fe6 3686400 0.000000e+00 4.258012e+04 2.921691e+03 8.090223e+05 3.461046e+10 19 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.attila000066400000000000000000000100121507764646700330020ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 4.182946e+04 4.195402e+03 1.171225e+06 4.948453e+10 28 617e5fe6 3686400 0.000000e+00 1.431791e+05 1.961610e+04 1.431791e+06 2.088506e+11 10 cea37d6d 409600 0.000000e+00 4.839229e+03 3.061560e+02 1.258200e+05 6.113086e+08 26 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.565619e+04 2.729977e+03 4.618114e+05 1.198247e+10 18 617e5fe6 3686400 0.000000e+00 5.517976e+04 5.023576e+03 8.828762e+05 4.912068e+10 16 cea37d6d 409600 0.000000e+00 9.325377e+03 4.741281e+02 9.325377e+04 8.718745e+08 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.512124e+04 2.223761e+03 4.773036e+05 1.208442e+10 19 617e5fe6 3686400 0.000000e+00 5.116041e+04 1.272422e+03 7.674062e+05 3.928511e+10 15 cea37d6d 409600 0.000000e+00 9.353760e+03 7.152342e+02 9.353760e+04 8.800438e+08 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.814234e+04 3.880171e+03 5.065622e+05 1.452685e+10 18 617e5fe6 3686400 0.000000e+00 5.467956e+04 6.741916e+03 8.201934e+05 4.552961e+10 15 cea37d6d 409600 0.000000e+00 1.004502e+04 9.839619e+02 1.004502e+05 1.018706e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.hannibal000066400000000000000000000040101507764646700333010ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 starpu_slu_lu_model_getrf_atlas.hannibal-pitch000066400000000000000000000040101507764646700343270ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.idgraf000066400000000000000000000216221507764646700327710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 4.307978e+03 6.474305e+01 1.249314e+05 5.383232e+08 29 afdd228b 1638400 0.000000e+00 3.550524e+04 4.451382e+02 3.550524e+05 1.260821e+10 10 617e5fe6 3686400 0.000000e+00 1.169735e+05 9.368471e+02 1.169735e+06 1.368368e+11 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.140547e+04 1.799023e+03 1.140547e+05 1.333212e+09 10 afdd228b 1638400 0.000000e+00 2.728447e+04 8.307498e+02 2.728447e+05 7.451326e+09 10 617e5fe6 3686400 0.000000e+00 6.234962e+04 7.670296e+03 6.858458e+05 4.340939e+10 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.084154e+04 4.741973e+03 3.084154e+05 9.736872e+09 10 cea37d6d 409600 0.000000e+00 1.194801e+04 1.916839e+03 1.194801e+05 1.464291e+09 10 617e5fe6 3686400 0.000000e+00 6.590141e+04 1.170188e+04 6.590141e+05 4.479930e+10 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 7.169178e+04 1.134864e+04 7.886096e+05 5.795353e+10 11 cea37d6d 409600 0.000000e+00 1.144166e+04 1.161786e+03 1.144166e+05 1.322613e+09 10 afdd228b 1638400 0.000000e+00 2.872444e+04 2.010264e+03 3.159688e+05 9.120481e+09 11 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.150326e+04 1.434617e+03 1.150326e+05 1.343832e+09 10 afdd228b 1638400 0.000000e+00 3.088151e+04 4.858348e+03 3.088151e+05 9.772711e+09 10 617e5fe6 3686400 0.000000e+00 6.102500e+04 7.308309e+03 6.102500e+05 3.777463e+10 10 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 5.751510e+04 2.006299e+03 6.901812e+05 3.974415e+10 12 cea37d6d 409600 0.000000e+00 1.125363e+04 1.219431e+03 1.125363e+05 1.281312e+09 10 afdd228b 1638400 0.000000e+00 3.238968e+04 5.459084e+03 3.238968e+05 1.078893e+10 10 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.926764e+04 3.325362e+03 3.219440e+05 9.544181e+09 11 cea37d6d 409600 0.000000e+00 1.088648e+04 1.129883e+03 1.088648e+05 1.197920e+09 10 617e5fe6 3686400 0.000000e+00 6.506731e+04 1.183046e+04 8.458750e+05 5.685829e+10 13 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.775893e+04 1.476662e+03 3.331071e+05 9.272862e+09 12 cea37d6d 409600 0.000000e+00 1.026126e+04 8.160679e+01 1.026126e+05 1.053001e+09 10 617e5fe6 3686400 0.000000e+00 6.215917e+04 1.023772e+04 6.215917e+05 3.968573e+10 10 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.022286e+04 3.601879e+01 1.022286e+05 1.045081e+09 10 afdd228b 1638400 0.000000e+00 2.891317e+04 4.592264e+03 2.891317e+05 8.570604e+09 10 617e5fe6 3686400 0.000000e+00 5.724831e+04 3.045025e+03 7.442280e+05 4.272633e+10 13 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.mirage000066400000000000000000000100121507764646700327700ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.789658e+04 4.182352e+03 1.250587e+06 4.797021e+10 33 617e5fe6 3686400 0.000000e+00 1.286436e+05 1.271269e+04 2.958803e+06 3.843483e+11 23 cea37d6d 409600 0.000000e+00 4.236597e+03 2.366692e+02 2.372495e+05 1.008267e+09 56 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.864580e+04 3.233071e+03 5.156243e+05 1.495862e+10 18 617e5fe6 3686400 0.000000e+00 5.948740e+04 4.910517e+03 1.070773e+06 6.413154e+10 18 cea37d6d 409600 0.000000e+00 1.060245e+04 4.247968e+02 1.060245e+05 1.125924e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.046163e+04 4.754796e+03 5.483094e+05 1.710934e+10 18 617e5fe6 3686400 0.000000e+00 5.865963e+04 4.672589e+03 1.349171e+06 7.964405e+10 23 cea37d6d 409600 0.000000e+00 1.042618e+04 1.817032e+02 1.042618e+05 1.087383e+09 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.939722e+04 4.040622e+03 4.409582e+05 1.320784e+10 15 617e5fe6 3686400 0.000000e+00 5.704610e+04 3.429433e+03 1.255014e+06 7.185241e+10 22 cea37d6d 409600 0.000000e+00 1.049902e+04 4.776188e+02 1.049902e+05 1.104575e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_atlas.sirocco000066400000000000000000000130161507764646700331740ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 4.111343e+05 7.639666e+04 4.111343e+06 1.748679e+12 10 afdd228b 1638400 0.000000e+00 2.923093e+04 1.278718e+03 5.553877e+05 1.626557e+10 19 cea37d6d 409600 0.000000e+00 4.037068e+03 3.335771e+02 2.906689e+05 1.181462e+09 72 617e5fe6 3686400 0.000000e+00 1.029624e+05 6.177928e+03 1.029624e+06 1.063943e+11 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 9.866251e+03 7.665217e+02 9.866251e+04 9.793047e+08 10 afdd228b 1638400 0.000000e+00 2.088164e+04 1.502169e+03 4.176328e+05 8.765989e+09 20 617e5fe6 3686400 0.000000e+00 4.153583e+04 9.473225e+02 9.968599e+05 4.142694e+10 24 25ebb669 8294400 0.000000e+00 9.378398e+04 2.901838e+03 1.594328e+06 1.496655e+11 17 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.434448e+04 6.197321e+03 2.075578e+06 1.966643e+11 22 afdd228b 1638400 0.000000e+00 2.242688e+04 2.707726e+03 3.139763e+05 7.144153e+09 14 cea37d6d 409600 0.000000e+00 9.238189e+03 1.713378e+02 9.238189e+04 8.537349e+08 10 617e5fe6 3686400 0.000000e+00 4.357190e+04 5.271768e+03 7.842942e+05 3.467343e+10 18 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.395404e+04 4.337001e+03 1.973035e+06 1.857696e+11 21 afdd228b 1638400 0.000000e+00 2.096495e+04 7.732458e+02 3.773690e+05 7.922284e+09 18 cea37d6d 409600 0.000000e+00 9.471831e+03 5.475075e+02 9.471831e+04 9.001535e+08 10 617e5fe6 3686400 0.000000e+00 4.647825e+04 9.283373e+03 5.577390e+05 2.695691e+10 12 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.896522e+04 1.438963e+04 1.187583e+06 1.200141e+11 12 afdd228b 1638400 0.000000e+00 2.172039e+04 1.567348e+03 2.823650e+05 6.165013e+09 13 cea37d6d 409600 0.000000e+00 9.338877e+03 3.249828e+02 9.338877e+04 8.732025e+08 10 617e5fe6 3686400 0.000000e+00 4.258012e+04 2.921691e+03 8.090223e+05 3.461046e+10 19 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.attila000066400000000000000000000100121507764646700326460ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 4.182946e+04 4.195402e+03 1.171225e+06 4.948453e+10 28 617e5fe6 3686400 0.000000e+00 1.431791e+05 1.961610e+04 1.431791e+06 2.088506e+11 10 cea37d6d 409600 0.000000e+00 4.839229e+03 3.061560e+02 1.258200e+05 6.113086e+08 26 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.565619e+04 2.729977e+03 4.618114e+05 1.198247e+10 18 617e5fe6 3686400 0.000000e+00 5.517976e+04 5.023576e+03 8.828762e+05 4.912068e+10 16 cea37d6d 409600 0.000000e+00 9.325377e+03 4.741281e+02 9.325377e+04 8.718745e+08 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.512124e+04 2.223761e+03 4.773036e+05 1.208442e+10 19 617e5fe6 3686400 0.000000e+00 5.116041e+04 1.272422e+03 7.674062e+05 3.928511e+10 15 cea37d6d 409600 0.000000e+00 9.353760e+03 7.152342e+02 9.353760e+04 8.800438e+08 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.814234e+04 3.880171e+03 5.065622e+05 1.452685e+10 18 617e5fe6 3686400 0.000000e+00 5.467956e+04 6.741916e+03 8.201934e+05 4.552961e+10 15 cea37d6d 409600 0.000000e+00 1.004502e+04 9.839619e+02 1.004502e+05 1.018706e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.hannibal000066400000000000000000000040101507764646700331450ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 starpu_slu_lu_model_getrf_goto.hannibal-pitch000066400000000000000000000040101507764646700341730ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.idgraf000066400000000000000000000216221507764646700326350ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 4.307978e+03 6.474305e+01 1.249314e+05 5.383232e+08 29 afdd228b 1638400 0.000000e+00 3.550524e+04 4.451382e+02 3.550524e+05 1.260821e+10 10 617e5fe6 3686400 0.000000e+00 1.169735e+05 9.368471e+02 1.169735e+06 1.368368e+11 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.140547e+04 1.799023e+03 1.140547e+05 1.333212e+09 10 afdd228b 1638400 0.000000e+00 2.728447e+04 8.307498e+02 2.728447e+05 7.451326e+09 10 617e5fe6 3686400 0.000000e+00 6.234962e+04 7.670296e+03 6.858458e+05 4.340939e+10 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.084154e+04 4.741973e+03 3.084154e+05 9.736872e+09 10 cea37d6d 409600 0.000000e+00 1.194801e+04 1.916839e+03 1.194801e+05 1.464291e+09 10 617e5fe6 3686400 0.000000e+00 6.590141e+04 1.170188e+04 6.590141e+05 4.479930e+10 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 7.169178e+04 1.134864e+04 7.886096e+05 5.795353e+10 11 cea37d6d 409600 0.000000e+00 1.144166e+04 1.161786e+03 1.144166e+05 1.322613e+09 10 afdd228b 1638400 0.000000e+00 2.872444e+04 2.010264e+03 3.159688e+05 9.120481e+09 11 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.150326e+04 1.434617e+03 1.150326e+05 1.343832e+09 10 afdd228b 1638400 0.000000e+00 3.088151e+04 4.858348e+03 3.088151e+05 9.772711e+09 10 617e5fe6 3686400 0.000000e+00 6.102500e+04 7.308309e+03 6.102500e+05 3.777463e+10 10 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 5.751510e+04 2.006299e+03 6.901812e+05 3.974415e+10 12 cea37d6d 409600 0.000000e+00 1.125363e+04 1.219431e+03 1.125363e+05 1.281312e+09 10 afdd228b 1638400 0.000000e+00 3.238968e+04 5.459084e+03 3.238968e+05 1.078893e+10 10 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.926764e+04 3.325362e+03 3.219440e+05 9.544181e+09 11 cea37d6d 409600 0.000000e+00 1.088648e+04 1.129883e+03 1.088648e+05 1.197920e+09 10 617e5fe6 3686400 0.000000e+00 6.506731e+04 1.183046e+04 8.458750e+05 5.685829e+10 13 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.775893e+04 1.476662e+03 3.331071e+05 9.272862e+09 12 cea37d6d 409600 0.000000e+00 1.026126e+04 8.160679e+01 1.026126e+05 1.053001e+09 10 617e5fe6 3686400 0.000000e+00 6.215917e+04 1.023772e+04 6.215917e+05 3.968573e+10 10 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.022286e+04 3.601879e+01 1.022286e+05 1.045081e+09 10 afdd228b 1638400 0.000000e+00 2.891317e+04 4.592264e+03 2.891317e+05 8.570604e+09 10 617e5fe6 3686400 0.000000e+00 5.724831e+04 3.045025e+03 7.442280e+05 4.272633e+10 13 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.mirage000066400000000000000000000100121507764646700326340ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.789658e+04 4.182352e+03 1.250587e+06 4.797021e+10 33 617e5fe6 3686400 0.000000e+00 1.286436e+05 1.271269e+04 2.958803e+06 3.843483e+11 23 cea37d6d 409600 0.000000e+00 4.236597e+03 2.366692e+02 2.372495e+05 1.008267e+09 56 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.864580e+04 3.233071e+03 5.156243e+05 1.495862e+10 18 617e5fe6 3686400 0.000000e+00 5.948740e+04 4.910517e+03 1.070773e+06 6.413154e+10 18 cea37d6d 409600 0.000000e+00 1.060245e+04 4.247968e+02 1.060245e+05 1.125924e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.046163e+04 4.754796e+03 5.483094e+05 1.710934e+10 18 617e5fe6 3686400 0.000000e+00 5.865963e+04 4.672589e+03 1.349171e+06 7.964405e+10 23 cea37d6d 409600 0.000000e+00 1.042618e+04 1.817032e+02 1.042618e+05 1.087383e+09 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.939722e+04 4.040622e+03 4.409582e+05 1.320784e+10 15 617e5fe6 3686400 0.000000e+00 5.704610e+04 3.429433e+03 1.255014e+06 7.185241e+10 22 cea37d6d 409600 0.000000e+00 1.049902e+04 4.776188e+02 1.049902e+05 1.104575e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_goto.sirocco000066400000000000000000000130161507764646700330400ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 4.111343e+05 7.639666e+04 4.111343e+06 1.748679e+12 10 afdd228b 1638400 0.000000e+00 2.923093e+04 1.278718e+03 5.553877e+05 1.626557e+10 19 cea37d6d 409600 0.000000e+00 4.037068e+03 3.335771e+02 2.906689e+05 1.181462e+09 72 617e5fe6 3686400 0.000000e+00 1.029624e+05 6.177928e+03 1.029624e+06 1.063943e+11 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 9.866251e+03 7.665217e+02 9.866251e+04 9.793047e+08 10 afdd228b 1638400 0.000000e+00 2.088164e+04 1.502169e+03 4.176328e+05 8.765989e+09 20 617e5fe6 3686400 0.000000e+00 4.153583e+04 9.473225e+02 9.968599e+05 4.142694e+10 24 25ebb669 8294400 0.000000e+00 9.378398e+04 2.901838e+03 1.594328e+06 1.496655e+11 17 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.434448e+04 6.197321e+03 2.075578e+06 1.966643e+11 22 afdd228b 1638400 0.000000e+00 2.242688e+04 2.707726e+03 3.139763e+05 7.144153e+09 14 cea37d6d 409600 0.000000e+00 9.238189e+03 1.713378e+02 9.238189e+04 8.537349e+08 10 617e5fe6 3686400 0.000000e+00 4.357190e+04 5.271768e+03 7.842942e+05 3.467343e+10 18 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.395404e+04 4.337001e+03 1.973035e+06 1.857696e+11 21 afdd228b 1638400 0.000000e+00 2.096495e+04 7.732458e+02 3.773690e+05 7.922284e+09 18 cea37d6d 409600 0.000000e+00 9.471831e+03 5.475075e+02 9.471831e+04 9.001535e+08 10 617e5fe6 3686400 0.000000e+00 4.647825e+04 9.283373e+03 5.577390e+05 2.695691e+10 12 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.896522e+04 1.438963e+04 1.187583e+06 1.200141e+11 12 afdd228b 1638400 0.000000e+00 2.172039e+04 1.567348e+03 2.823650e+05 6.165013e+09 13 cea37d6d 409600 0.000000e+00 9.338877e+03 3.249828e+02 9.338877e+04 8.732025e+08 10 617e5fe6 3686400 0.000000e+00 4.258012e+04 2.921691e+03 8.090223e+05 3.461046e+10 19 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.attila000066400000000000000000000100121507764646700335010ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 4.182946e+04 4.195402e+03 1.171225e+06 4.948453e+10 28 617e5fe6 3686400 0.000000e+00 1.431791e+05 1.961610e+04 1.431791e+06 2.088506e+11 10 cea37d6d 409600 0.000000e+00 4.839229e+03 3.061560e+02 1.258200e+05 6.113086e+08 26 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.565619e+04 2.729977e+03 4.618114e+05 1.198247e+10 18 617e5fe6 3686400 0.000000e+00 5.517976e+04 5.023576e+03 8.828762e+05 4.912068e+10 16 cea37d6d 409600 0.000000e+00 9.325377e+03 4.741281e+02 9.325377e+04 8.718745e+08 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.512124e+04 2.223761e+03 4.773036e+05 1.208442e+10 19 617e5fe6 3686400 0.000000e+00 5.116041e+04 1.272422e+03 7.674062e+05 3.928511e+10 15 cea37d6d 409600 0.000000e+00 9.353760e+03 7.152342e+02 9.353760e+04 8.800438e+08 10 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.814234e+04 3.880171e+03 5.065622e+05 1.452685e+10 18 617e5fe6 3686400 0.000000e+00 5.467956e+04 6.741916e+03 8.201934e+05 4.552961e+10 15 cea37d6d 409600 0.000000e+00 1.004502e+04 9.839619e+02 1.004502e+05 1.018706e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.hannibal000066400000000000000000000040101507764646700340000ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 starpu_slu_lu_model_getrf_openblas.hannibal-pitch000066400000000000000000000040101507764646700350260ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 1.250229e+05 4.416720e+03 1.500275e+06 1.878028e+11 12 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.424585e+04 1.140908e+03 4.802014e+07 4.046239e+12 570 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 8.331807e+04 6.460292e+02 5.782274e+07 4.817969e+12 694 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.idgraf000066400000000000000000000216221507764646700334700ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 4.307978e+03 6.474305e+01 1.249314e+05 5.383232e+08 29 afdd228b 1638400 0.000000e+00 3.550524e+04 4.451382e+02 3.550524e+05 1.260821e+10 10 617e5fe6 3686400 0.000000e+00 1.169735e+05 9.368471e+02 1.169735e+06 1.368368e+11 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.140547e+04 1.799023e+03 1.140547e+05 1.333212e+09 10 afdd228b 1638400 0.000000e+00 2.728447e+04 8.307498e+02 2.728447e+05 7.451326e+09 10 617e5fe6 3686400 0.000000e+00 6.234962e+04 7.670296e+03 6.858458e+05 4.340939e+10 11 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.084154e+04 4.741973e+03 3.084154e+05 9.736872e+09 10 cea37d6d 409600 0.000000e+00 1.194801e+04 1.916839e+03 1.194801e+05 1.464291e+09 10 617e5fe6 3686400 0.000000e+00 6.590141e+04 1.170188e+04 6.590141e+05 4.479930e+10 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 7.169178e+04 1.134864e+04 7.886096e+05 5.795353e+10 11 cea37d6d 409600 0.000000e+00 1.144166e+04 1.161786e+03 1.144166e+05 1.322613e+09 10 afdd228b 1638400 0.000000e+00 2.872444e+04 2.010264e+03 3.159688e+05 9.120481e+09 11 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.150326e+04 1.434617e+03 1.150326e+05 1.343832e+09 10 afdd228b 1638400 0.000000e+00 3.088151e+04 4.858348e+03 3.088151e+05 9.772711e+09 10 617e5fe6 3686400 0.000000e+00 6.102500e+04 7.308309e+03 6.102500e+05 3.777463e+10 10 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 617e5fe6 3686400 0.000000e+00 5.751510e+04 2.006299e+03 6.901812e+05 3.974415e+10 12 cea37d6d 409600 0.000000e+00 1.125363e+04 1.219431e+03 1.125363e+05 1.281312e+09 10 afdd228b 1638400 0.000000e+00 3.238968e+04 5.459084e+03 3.238968e+05 1.078893e+10 10 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.926764e+04 3.325362e+03 3.219440e+05 9.544181e+09 11 cea37d6d 409600 0.000000e+00 1.088648e+04 1.129883e+03 1.088648e+05 1.197920e+09 10 617e5fe6 3686400 0.000000e+00 6.506731e+04 1.183046e+04 8.458750e+05 5.685829e+10 13 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.775893e+04 1.476662e+03 3.331071e+05 9.272862e+09 12 cea37d6d 409600 0.000000e+00 1.026126e+04 8.160679e+01 1.026126e+05 1.053001e+09 10 617e5fe6 3686400 0.000000e+00 6.215917e+04 1.023772e+04 6.215917e+05 3.968573e+10 10 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 1.022286e+04 3.601879e+01 1.022286e+05 1.045081e+09 10 afdd228b 1638400 0.000000e+00 2.891317e+04 4.592264e+03 2.891317e+05 8.570604e+09 10 617e5fe6 3686400 0.000000e+00 5.724831e+04 3.045025e+03 7.442280e+05 4.272633e+10 13 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.mirage000066400000000000000000000100121507764646700334670ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.789658e+04 4.182352e+03 1.250587e+06 4.797021e+10 33 617e5fe6 3686400 0.000000e+00 1.286436e+05 1.271269e+04 2.958803e+06 3.843483e+11 23 cea37d6d 409600 0.000000e+00 4.236597e+03 2.366692e+02 2.372495e+05 1.008267e+09 56 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.864580e+04 3.233071e+03 5.156243e+05 1.495862e+10 18 617e5fe6 3686400 0.000000e+00 5.948740e+04 4.910517e+03 1.070773e+06 6.413154e+10 18 cea37d6d 409600 0.000000e+00 1.060245e+04 4.247968e+02 1.060245e+05 1.125924e+09 10 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 3.046163e+04 4.754796e+03 5.483094e+05 1.710934e+10 18 617e5fe6 3686400 0.000000e+00 5.865963e+04 4.672589e+03 1.349171e+06 7.964405e+10 23 cea37d6d 409600 0.000000e+00 1.042618e+04 1.817032e+02 1.042618e+05 1.087383e+09 10 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n afdd228b 1638400 0.000000e+00 2.939722e+04 4.040622e+03 4.409582e+05 1.320784e+10 15 617e5fe6 3686400 0.000000e+00 5.704610e+04 3.429433e+03 1.255014e+06 7.185241e+10 22 cea37d6d 409600 0.000000e+00 1.049902e+04 4.776188e+02 1.049902e+05 1.104575e+09 10 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_getrf_openblas.sirocco000066400000000000000000000130161507764646700336730ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 4.111343e+05 7.639666e+04 4.111343e+06 1.748679e+12 10 afdd228b 1638400 0.000000e+00 2.923093e+04 1.278718e+03 5.553877e+05 1.626557e+10 19 cea37d6d 409600 0.000000e+00 4.037068e+03 3.335771e+02 2.906689e+05 1.181462e+09 72 617e5fe6 3686400 0.000000e+00 1.029624e+05 6.177928e+03 1.029624e+06 1.063943e+11 10 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n cea37d6d 409600 0.000000e+00 9.866251e+03 7.665217e+02 9.866251e+04 9.793047e+08 10 afdd228b 1638400 0.000000e+00 2.088164e+04 1.502169e+03 4.176328e+05 8.765989e+09 20 617e5fe6 3686400 0.000000e+00 4.153583e+04 9.473225e+02 9.968599e+05 4.142694e+10 24 25ebb669 8294400 0.000000e+00 9.378398e+04 2.901838e+03 1.594328e+06 1.496655e+11 17 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.434448e+04 6.197321e+03 2.075578e+06 1.966643e+11 22 afdd228b 1638400 0.000000e+00 2.242688e+04 2.707726e+03 3.139763e+05 7.144153e+09 14 cea37d6d 409600 0.000000e+00 9.238189e+03 1.713378e+02 9.238189e+04 8.537349e+08 10 617e5fe6 3686400 0.000000e+00 4.357190e+04 5.271768e+03 7.842942e+05 3.467343e+10 18 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.395404e+04 4.337001e+03 1.973035e+06 1.857696e+11 21 afdd228b 1638400 0.000000e+00 2.096495e+04 7.732458e+02 3.773690e+05 7.922284e+09 18 cea37d6d 409600 0.000000e+00 9.471831e+03 5.475075e+02 9.471831e+04 9.001535e+08 10 617e5fe6 3686400 0.000000e+00 4.647825e+04 9.283373e+03 5.577390e+05 2.695691e+10 12 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 25ebb669 8294400 0.000000e+00 9.896522e+04 1.438963e+04 1.187583e+06 1.200141e+11 12 afdd228b 1638400 0.000000e+00 2.172039e+04 1.567348e+03 2.823650e+05 6.165013e+09 13 cea37d6d 409600 0.000000e+00 9.338877e+03 3.249828e+02 9.338877e+04 8.732025e+08 10 617e5fe6 3686400 0.000000e+00 4.258012e+04 2.921691e+03 8.090223e+05 3.461046e+10 19 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.attila000066400000000000000000000100221507764646700321640ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.416946e+04 8.998511e+02 5.341885e+06 7.599687e+10 377 ff82dda0 7372800 0.000000e+00 4.394377e+04 1.700468e+03 1.138144e+07 5.008920e+11 259 2c1922b7 819200 0.000000e+00 1.978198e+03 1.079993e+02 7.517154e+05 1.491475e+09 380 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.676312e+03 2.039650e+02 4.549731e+05 1.224722e+09 170 ff82dda0 7372800 0.000000e+00 6.450199e+03 3.193507e+02 5.482669e+05 3.545099e+09 85 2c1922b7 819200 0.000000e+00 7.090855e+02 1.344985e+02 5.247233e+04 3.854602e+07 74 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.648361e+03 2.330106e+02 2.913197e+05 7.774920e+08 110 ff82dda0 7372800 0.000000e+00 3.907893e+03 1.767346e+02 3.790657e+05 1.484378e+09 97 2c1922b7 819200 0.000000e+00 5.977702e+02 1.137267e+02 6.695026e+04 4.146945e+07 112 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.649815e+03 2.112061e+02 4.054218e+05 1.081118e+09 153 ff82dda0 7372800 0.000000e+00 6.517136e+03 3.918474e+02 3.454082e+05 2.259210e+09 53 2c1922b7 819200 0.000000e+00 6.507707e+02 8.750699e+01 4.750626e+04 3.147468e+07 73 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal000066400000000000000000000040141507764646700324660ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.hannibal-pitch000066400000000000000000000040141507764646700335730ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.idgraf000066400000000000000000000216361507764646700321570ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 2.469013e+03 5.595193e+01 2.765294e+05 6.831054e+08 112 d39bff17 3276800 0.000000e+00 1.667528e+04 1.964808e+02 1.300672e+06 2.169208e+10 78 ff82dda0 7372800 0.000000e+00 5.216745e+04 4.664151e+02 3.443052e+06 1.796296e+11 66 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.490410e+02 1.344248e+02 7.415506e+04 5.733412e+07 99 d39bff17 3276800 0.000000e+00 2.737524e+03 2.974057e+02 3.942034e+05 1.091878e+09 144 ff82dda0 7372800 0.000000e+00 7.212728e+03 1.319942e+03 6.924219e+05 5.161506e+09 96 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.688939e+02 1.457751e+02 6.843156e+04 5.450789e+07 89 d39bff17 3276800 0.000000e+00 2.735563e+03 2.889694e+02 2.899697e+05 8.020820e+08 106 ff82dda0 7372800 0.000000e+00 6.820126e+03 9.314994e+02 7.638542e+05 5.306763e+09 112 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.150281e+02 1.235393e+02 6.363750e+04 4.686092e+07 89 d39bff17 3276800 0.000000e+00 2.835249e+03 4.125186e+02 1.899617e+05 5.499903e+08 67 ff82dda0 7372800 0.000000e+00 6.720945e+03 7.632032e+02 6.989783e+05 4.758372e+09 104 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.190609e+02 1.144317e+02 7.406327e+04 5.460474e+07 103 d39bff17 3276800 0.000000e+00 2.867186e+03 4.168496e+02 2.838514e+05 8.310575e+08 99 ff82dda0 7372800 0.000000e+00 6.809425e+03 9.031920e+02 6.400859e+05 4.435298e+09 94 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.136273e+02 1.258701e+02 7.350362e+04 5.408605e+07 103 d39bff17 3276800 0.000000e+00 2.942246e+03 4.585544e+02 1.706502e+05 5.142907e+08 58 ff82dda0 7372800 0.000000e+00 6.744194e+03 8.416374e+02 5.597681e+05 3.833978e+09 83 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.204798e+02 9.746533e+01 1.080720e+05 7.928859e+07 150 d39bff17 3276800 0.000000e+00 2.539831e+03 4.296517e+02 3.885942e+05 1.015208e+09 153 ff82dda0 7372800 0.000000e+00 7.293979e+03 1.385713e+03 6.929280e+05 5.236621e+09 95 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.460951e+02 1.203288e+02 7.386342e+04 5.654256e+07 99 d39bff17 3276800 0.000000e+00 2.972783e+03 5.066224e+02 2.259315e+05 6.911522e+08 76 ff82dda0 7372800 0.000000e+00 6.643349e+03 8.230064e+02 6.510482e+05 4.391520e+09 98 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.518059e+02 1.406096e+02 8.495406e+04 6.610309e+07 113 d39bff17 3276800 0.000000e+00 2.794983e+03 3.357608e+02 4.164524e+05 1.180775e+09 149 ff82dda0 7372800 0.000000e+00 6.735838e+03 7.525487e+02 6.803197e+05 4.639723e+09 101 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.mirage000066400000000000000000000100261507764646700321560ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.615269e+04 4.099119e+02 4.748890e+06 7.675673e+10 294 ff82dda0 7372800 0.000000e+00 5.118532e+04 6.422962e+02 1.530441e+07 7.834845e+11 299 2c1922b7 819200 0.000000e+00 2.296074e+03 7.445272e+01 1.021753e+06 2.348487e+09 445 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.740712e+03 2.663471e+02 5.755494e+05 1.592313e+09 210 ff82dda0 7372800 0.000000e+00 6.504044e+03 4.912781e+02 1.385361e+06 9.061859e+09 213 2c1922b7 819200 0.000000e+00 6.801212e+02 1.149855e+02 1.129001e+05 7.898057e+07 166 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.716114e+03 2.688407e+02 4.237138e+05 1.162130e+09 156 ff82dda0 7372800 0.000000e+00 6.512491e+03 5.367987e+02 8.270864e+05 5.422988e+09 127 2c1922b7 819200 0.000000e+00 7.284912e+02 1.021807e+02 1.049027e+05 7.792421e+07 144 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.294336e+03 5.071880e+02 4.909878e+05 1.181540e+09 214 ff82dda0 7372800 0.000000e+00 6.469485e+03 5.370376e+02 7.698688e+05 5.014976e+09 119 2c1922b7 819200 0.000000e+00 7.112055e+02 1.136474e+02 1.002800e+05 7.314078e+07 141 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll.sirocco000066400000000000000000000130411507764646700323530ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.637990e+04 1.036285e+04 8.118706e+06 4.731958e+11 144 d39bff17 3276800 0.000000e+00 5.106660e+03 6.848530e+02 2.134584e+06 1.109665e+10 418 2c1922b7 819200 0.000000e+00 4.245334e+03 7.020174e+02 6.368000e+04 2.777353e+08 15 ff82dda0 7372800 0.000000e+00 1.726784e+04 3.264426e+03 3.021872e+06 5.404608e+10 175 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.668892e+03 6.964909e+02 1.394547e+06 8.024874e+09 246 d39bff17 3276800 0.000000e+00 1.216432e+03 1.410794e+02 1.934127e+05 2.384382e+08 159 2c1922b7 819200 0.000000e+00 4.901281e+02 6.729653e+01 6.616730e+04 3.304185e+07 135 ff82dda0 7372800 0.000000e+00 2.106719e+03 2.638200e+02 5.646006e+05 1.208108e+09 268 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.677828e+03 6.218985e+02 1.311578e+06 7.536257e+09 231 d39bff17 3276800 0.000000e+00 1.199302e+03 1.658297e+02 1.774966e+05 2.169419e+08 148 2c1922b7 819200 0.000000e+00 4.968224e+02 7.860110e+01 5.415364e+04 2.757816e+07 109 ff82dda0 7372800 0.000000e+00 2.138085e+03 2.696288e+02 6.371492e+05 1.383944e+09 298 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.593766e+03 7.653530e+02 1.510317e+06 8.606516e+09 270 d39bff17 3276800 0.000000e+00 1.148300e+03 2.163448e+02 2.021009e+05 2.403102e+08 176 2c1922b7 819200 0.000000e+00 8.901347e+01 1.918734e+01 2.412265e+04 2.247011e+06 271 ff82dda0 7372800 0.000000e+00 2.196957e+03 3.265420e+02 4.349975e+05 9.767837e+08 198 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.652338e+03 6.245997e+02 1.520479e+06 8.699205e+09 269 d39bff17 3276800 0.000000e+00 1.203544e+03 1.679024e+02 2.286733e+05 2.805746e+08 190 2c1922b7 819200 0.000000e+00 4.930666e+02 7.623523e+01 7.642532e+04 3.858360e+07 155 ff82dda0 7372800 0.000000e+00 2.164310e+03 2.607466e+02 4.869698e+05 1.069251e+09 225 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.attila000066400000000000000000000100221507764646700333500ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.416946e+04 8.998511e+02 5.341885e+06 7.599687e+10 377 ff82dda0 7372800 0.000000e+00 4.394377e+04 1.700468e+03 1.138144e+07 5.008920e+11 259 2c1922b7 819200 0.000000e+00 1.978198e+03 1.079993e+02 7.517154e+05 1.491475e+09 380 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.676312e+03 2.039650e+02 4.549731e+05 1.224722e+09 170 ff82dda0 7372800 0.000000e+00 6.450199e+03 3.193507e+02 5.482669e+05 3.545099e+09 85 2c1922b7 819200 0.000000e+00 7.090855e+02 1.344985e+02 5.247233e+04 3.854602e+07 74 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.648361e+03 2.330106e+02 2.913197e+05 7.774920e+08 110 ff82dda0 7372800 0.000000e+00 3.907893e+03 1.767346e+02 3.790657e+05 1.484378e+09 97 2c1922b7 819200 0.000000e+00 5.977702e+02 1.137267e+02 6.695026e+04 4.146945e+07 112 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.649815e+03 2.112061e+02 4.054218e+05 1.081118e+09 153 ff82dda0 7372800 0.000000e+00 6.517136e+03 3.918474e+02 3.454082e+05 2.259210e+09 53 2c1922b7 819200 0.000000e+00 6.507707e+02 8.750699e+01 4.750626e+04 3.147468e+07 73 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.hannibal000066400000000000000000000040141507764646700336520ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 starpu_slu_lu_model_trsm_ll_atlas.hannibal-pitch000066400000000000000000000040141507764646700347000ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.idgraf000066400000000000000000000216361507764646700333430ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 2.469013e+03 5.595193e+01 2.765294e+05 6.831054e+08 112 d39bff17 3276800 0.000000e+00 1.667528e+04 1.964808e+02 1.300672e+06 2.169208e+10 78 ff82dda0 7372800 0.000000e+00 5.216745e+04 4.664151e+02 3.443052e+06 1.796296e+11 66 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.490410e+02 1.344248e+02 7.415506e+04 5.733412e+07 99 d39bff17 3276800 0.000000e+00 2.737524e+03 2.974057e+02 3.942034e+05 1.091878e+09 144 ff82dda0 7372800 0.000000e+00 7.212728e+03 1.319942e+03 6.924219e+05 5.161506e+09 96 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.688939e+02 1.457751e+02 6.843156e+04 5.450789e+07 89 d39bff17 3276800 0.000000e+00 2.735563e+03 2.889694e+02 2.899697e+05 8.020820e+08 106 ff82dda0 7372800 0.000000e+00 6.820126e+03 9.314994e+02 7.638542e+05 5.306763e+09 112 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.150281e+02 1.235393e+02 6.363750e+04 4.686092e+07 89 d39bff17 3276800 0.000000e+00 2.835249e+03 4.125186e+02 1.899617e+05 5.499903e+08 67 ff82dda0 7372800 0.000000e+00 6.720945e+03 7.632032e+02 6.989783e+05 4.758372e+09 104 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.190609e+02 1.144317e+02 7.406327e+04 5.460474e+07 103 d39bff17 3276800 0.000000e+00 2.867186e+03 4.168496e+02 2.838514e+05 8.310575e+08 99 ff82dda0 7372800 0.000000e+00 6.809425e+03 9.031920e+02 6.400859e+05 4.435298e+09 94 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.136273e+02 1.258701e+02 7.350362e+04 5.408605e+07 103 d39bff17 3276800 0.000000e+00 2.942246e+03 4.585544e+02 1.706502e+05 5.142907e+08 58 ff82dda0 7372800 0.000000e+00 6.744194e+03 8.416374e+02 5.597681e+05 3.833978e+09 83 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.204798e+02 9.746533e+01 1.080720e+05 7.928859e+07 150 d39bff17 3276800 0.000000e+00 2.539831e+03 4.296517e+02 3.885942e+05 1.015208e+09 153 ff82dda0 7372800 0.000000e+00 7.293979e+03 1.385713e+03 6.929280e+05 5.236621e+09 95 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.460951e+02 1.203288e+02 7.386342e+04 5.654256e+07 99 d39bff17 3276800 0.000000e+00 2.972783e+03 5.066224e+02 2.259315e+05 6.911522e+08 76 ff82dda0 7372800 0.000000e+00 6.643349e+03 8.230064e+02 6.510482e+05 4.391520e+09 98 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.518059e+02 1.406096e+02 8.495406e+04 6.610309e+07 113 d39bff17 3276800 0.000000e+00 2.794983e+03 3.357608e+02 4.164524e+05 1.180775e+09 149 ff82dda0 7372800 0.000000e+00 6.735838e+03 7.525487e+02 6.803197e+05 4.639723e+09 101 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.mirage000066400000000000000000000100261507764646700333420ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.615269e+04 4.099119e+02 4.748890e+06 7.675673e+10 294 ff82dda0 7372800 0.000000e+00 5.118532e+04 6.422962e+02 1.530441e+07 7.834845e+11 299 2c1922b7 819200 0.000000e+00 2.296074e+03 7.445272e+01 1.021753e+06 2.348487e+09 445 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.740712e+03 2.663471e+02 5.755494e+05 1.592313e+09 210 ff82dda0 7372800 0.000000e+00 6.504044e+03 4.912781e+02 1.385361e+06 9.061859e+09 213 2c1922b7 819200 0.000000e+00 6.801212e+02 1.149855e+02 1.129001e+05 7.898057e+07 166 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.716114e+03 2.688407e+02 4.237138e+05 1.162130e+09 156 ff82dda0 7372800 0.000000e+00 6.512491e+03 5.367987e+02 8.270864e+05 5.422988e+09 127 2c1922b7 819200 0.000000e+00 7.284912e+02 1.021807e+02 1.049027e+05 7.792421e+07 144 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.294336e+03 5.071880e+02 4.909878e+05 1.181540e+09 214 ff82dda0 7372800 0.000000e+00 6.469485e+03 5.370376e+02 7.698688e+05 5.014976e+09 119 2c1922b7 819200 0.000000e+00 7.112055e+02 1.136474e+02 1.002800e+05 7.314078e+07 141 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_atlas.sirocco000066400000000000000000000130411507764646700335370ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.637990e+04 1.036285e+04 8.118706e+06 4.731958e+11 144 d39bff17 3276800 0.000000e+00 5.106660e+03 6.848530e+02 2.134584e+06 1.109665e+10 418 2c1922b7 819200 0.000000e+00 4.245334e+03 7.020174e+02 6.368000e+04 2.777353e+08 15 ff82dda0 7372800 0.000000e+00 1.726784e+04 3.264426e+03 3.021872e+06 5.404608e+10 175 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.668892e+03 6.964909e+02 1.394547e+06 8.024874e+09 246 d39bff17 3276800 0.000000e+00 1.216432e+03 1.410794e+02 1.934127e+05 2.384382e+08 159 2c1922b7 819200 0.000000e+00 4.901281e+02 6.729653e+01 6.616730e+04 3.304185e+07 135 ff82dda0 7372800 0.000000e+00 2.106719e+03 2.638200e+02 5.646006e+05 1.208108e+09 268 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.677828e+03 6.218985e+02 1.311578e+06 7.536257e+09 231 d39bff17 3276800 0.000000e+00 1.199302e+03 1.658297e+02 1.774966e+05 2.169419e+08 148 2c1922b7 819200 0.000000e+00 4.968224e+02 7.860110e+01 5.415364e+04 2.757816e+07 109 ff82dda0 7372800 0.000000e+00 2.138085e+03 2.696288e+02 6.371492e+05 1.383944e+09 298 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.593766e+03 7.653530e+02 1.510317e+06 8.606516e+09 270 d39bff17 3276800 0.000000e+00 1.148300e+03 2.163448e+02 2.021009e+05 2.403102e+08 176 2c1922b7 819200 0.000000e+00 8.901347e+01 1.918734e+01 2.412265e+04 2.247011e+06 271 ff82dda0 7372800 0.000000e+00 2.196957e+03 3.265420e+02 4.349975e+05 9.767837e+08 198 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.652338e+03 6.245997e+02 1.520479e+06 8.699205e+09 269 d39bff17 3276800 0.000000e+00 1.203544e+03 1.679024e+02 2.286733e+05 2.805746e+08 190 2c1922b7 819200 0.000000e+00 4.930666e+02 7.623523e+01 7.642532e+04 3.858360e+07 155 ff82dda0 7372800 0.000000e+00 2.164310e+03 2.607466e+02 4.869698e+05 1.069251e+09 225 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.attila000066400000000000000000000100221507764646700332140ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.416946e+04 8.998511e+02 5.341885e+06 7.599687e+10 377 ff82dda0 7372800 0.000000e+00 4.394377e+04 1.700468e+03 1.138144e+07 5.008920e+11 259 2c1922b7 819200 0.000000e+00 1.978198e+03 1.079993e+02 7.517154e+05 1.491475e+09 380 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.676312e+03 2.039650e+02 4.549731e+05 1.224722e+09 170 ff82dda0 7372800 0.000000e+00 6.450199e+03 3.193507e+02 5.482669e+05 3.545099e+09 85 2c1922b7 819200 0.000000e+00 7.090855e+02 1.344985e+02 5.247233e+04 3.854602e+07 74 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.648361e+03 2.330106e+02 2.913197e+05 7.774920e+08 110 ff82dda0 7372800 0.000000e+00 3.907893e+03 1.767346e+02 3.790657e+05 1.484378e+09 97 2c1922b7 819200 0.000000e+00 5.977702e+02 1.137267e+02 6.695026e+04 4.146945e+07 112 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.649815e+03 2.112061e+02 4.054218e+05 1.081118e+09 153 ff82dda0 7372800 0.000000e+00 6.517136e+03 3.918474e+02 3.454082e+05 2.259210e+09 53 2c1922b7 819200 0.000000e+00 6.507707e+02 8.750699e+01 4.750626e+04 3.147468e+07 73 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.hannibal000066400000000000000000000040141507764646700335160ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 starpu_slu_lu_model_trsm_ll_goto.hannibal-pitch000066400000000000000000000040141507764646700345440ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.idgraf000066400000000000000000000216361507764646700332070ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 2.469013e+03 5.595193e+01 2.765294e+05 6.831054e+08 112 d39bff17 3276800 0.000000e+00 1.667528e+04 1.964808e+02 1.300672e+06 2.169208e+10 78 ff82dda0 7372800 0.000000e+00 5.216745e+04 4.664151e+02 3.443052e+06 1.796296e+11 66 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.490410e+02 1.344248e+02 7.415506e+04 5.733412e+07 99 d39bff17 3276800 0.000000e+00 2.737524e+03 2.974057e+02 3.942034e+05 1.091878e+09 144 ff82dda0 7372800 0.000000e+00 7.212728e+03 1.319942e+03 6.924219e+05 5.161506e+09 96 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.688939e+02 1.457751e+02 6.843156e+04 5.450789e+07 89 d39bff17 3276800 0.000000e+00 2.735563e+03 2.889694e+02 2.899697e+05 8.020820e+08 106 ff82dda0 7372800 0.000000e+00 6.820126e+03 9.314994e+02 7.638542e+05 5.306763e+09 112 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.150281e+02 1.235393e+02 6.363750e+04 4.686092e+07 89 d39bff17 3276800 0.000000e+00 2.835249e+03 4.125186e+02 1.899617e+05 5.499903e+08 67 ff82dda0 7372800 0.000000e+00 6.720945e+03 7.632032e+02 6.989783e+05 4.758372e+09 104 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.190609e+02 1.144317e+02 7.406327e+04 5.460474e+07 103 d39bff17 3276800 0.000000e+00 2.867186e+03 4.168496e+02 2.838514e+05 8.310575e+08 99 ff82dda0 7372800 0.000000e+00 6.809425e+03 9.031920e+02 6.400859e+05 4.435298e+09 94 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.136273e+02 1.258701e+02 7.350362e+04 5.408605e+07 103 d39bff17 3276800 0.000000e+00 2.942246e+03 4.585544e+02 1.706502e+05 5.142907e+08 58 ff82dda0 7372800 0.000000e+00 6.744194e+03 8.416374e+02 5.597681e+05 3.833978e+09 83 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.204798e+02 9.746533e+01 1.080720e+05 7.928859e+07 150 d39bff17 3276800 0.000000e+00 2.539831e+03 4.296517e+02 3.885942e+05 1.015208e+09 153 ff82dda0 7372800 0.000000e+00 7.293979e+03 1.385713e+03 6.929280e+05 5.236621e+09 95 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.460951e+02 1.203288e+02 7.386342e+04 5.654256e+07 99 d39bff17 3276800 0.000000e+00 2.972783e+03 5.066224e+02 2.259315e+05 6.911522e+08 76 ff82dda0 7372800 0.000000e+00 6.643349e+03 8.230064e+02 6.510482e+05 4.391520e+09 98 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.518059e+02 1.406096e+02 8.495406e+04 6.610309e+07 113 d39bff17 3276800 0.000000e+00 2.794983e+03 3.357608e+02 4.164524e+05 1.180775e+09 149 ff82dda0 7372800 0.000000e+00 6.735838e+03 7.525487e+02 6.803197e+05 4.639723e+09 101 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.mirage000066400000000000000000000100261507764646700332060ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.615269e+04 4.099119e+02 4.748890e+06 7.675673e+10 294 ff82dda0 7372800 0.000000e+00 5.118532e+04 6.422962e+02 1.530441e+07 7.834845e+11 299 2c1922b7 819200 0.000000e+00 2.296074e+03 7.445272e+01 1.021753e+06 2.348487e+09 445 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.740712e+03 2.663471e+02 5.755494e+05 1.592313e+09 210 ff82dda0 7372800 0.000000e+00 6.504044e+03 4.912781e+02 1.385361e+06 9.061859e+09 213 2c1922b7 819200 0.000000e+00 6.801212e+02 1.149855e+02 1.129001e+05 7.898057e+07 166 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.716114e+03 2.688407e+02 4.237138e+05 1.162130e+09 156 ff82dda0 7372800 0.000000e+00 6.512491e+03 5.367987e+02 8.270864e+05 5.422988e+09 127 2c1922b7 819200 0.000000e+00 7.284912e+02 1.021807e+02 1.049027e+05 7.792421e+07 144 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.294336e+03 5.071880e+02 4.909878e+05 1.181540e+09 214 ff82dda0 7372800 0.000000e+00 6.469485e+03 5.370376e+02 7.698688e+05 5.014976e+09 119 2c1922b7 819200 0.000000e+00 7.112055e+02 1.136474e+02 1.002800e+05 7.314078e+07 141 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_goto.sirocco000066400000000000000000000130411507764646700334030ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.637990e+04 1.036285e+04 8.118706e+06 4.731958e+11 144 d39bff17 3276800 0.000000e+00 5.106660e+03 6.848530e+02 2.134584e+06 1.109665e+10 418 2c1922b7 819200 0.000000e+00 4.245334e+03 7.020174e+02 6.368000e+04 2.777353e+08 15 ff82dda0 7372800 0.000000e+00 1.726784e+04 3.264426e+03 3.021872e+06 5.404608e+10 175 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.668892e+03 6.964909e+02 1.394547e+06 8.024874e+09 246 d39bff17 3276800 0.000000e+00 1.216432e+03 1.410794e+02 1.934127e+05 2.384382e+08 159 2c1922b7 819200 0.000000e+00 4.901281e+02 6.729653e+01 6.616730e+04 3.304185e+07 135 ff82dda0 7372800 0.000000e+00 2.106719e+03 2.638200e+02 5.646006e+05 1.208108e+09 268 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.677828e+03 6.218985e+02 1.311578e+06 7.536257e+09 231 d39bff17 3276800 0.000000e+00 1.199302e+03 1.658297e+02 1.774966e+05 2.169419e+08 148 2c1922b7 819200 0.000000e+00 4.968224e+02 7.860110e+01 5.415364e+04 2.757816e+07 109 ff82dda0 7372800 0.000000e+00 2.138085e+03 2.696288e+02 6.371492e+05 1.383944e+09 298 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.593766e+03 7.653530e+02 1.510317e+06 8.606516e+09 270 d39bff17 3276800 0.000000e+00 1.148300e+03 2.163448e+02 2.021009e+05 2.403102e+08 176 2c1922b7 819200 0.000000e+00 8.901347e+01 1.918734e+01 2.412265e+04 2.247011e+06 271 ff82dda0 7372800 0.000000e+00 2.196957e+03 3.265420e+02 4.349975e+05 9.767837e+08 198 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.652338e+03 6.245997e+02 1.520479e+06 8.699205e+09 269 d39bff17 3276800 0.000000e+00 1.203544e+03 1.679024e+02 2.286733e+05 2.805746e+08 190 2c1922b7 819200 0.000000e+00 4.930666e+02 7.623523e+01 7.642532e+04 3.858360e+07 155 ff82dda0 7372800 0.000000e+00 2.164310e+03 2.607466e+02 4.869698e+05 1.069251e+09 225 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.attila000066400000000000000000000100221507764646700340470ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.416946e+04 8.998511e+02 5.341885e+06 7.599687e+10 377 ff82dda0 7372800 0.000000e+00 4.394377e+04 1.700468e+03 1.138144e+07 5.008920e+11 259 2c1922b7 819200 0.000000e+00 1.978198e+03 1.079993e+02 7.517154e+05 1.491475e+09 380 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.676312e+03 2.039650e+02 4.549731e+05 1.224722e+09 170 ff82dda0 7372800 0.000000e+00 6.450199e+03 3.193507e+02 5.482669e+05 3.545099e+09 85 2c1922b7 819200 0.000000e+00 7.090855e+02 1.344985e+02 5.247233e+04 3.854602e+07 74 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.648361e+03 2.330106e+02 2.913197e+05 7.774920e+08 110 ff82dda0 7372800 0.000000e+00 3.907893e+03 1.767346e+02 3.790657e+05 1.484378e+09 97 2c1922b7 819200 0.000000e+00 5.977702e+02 1.137267e+02 6.695026e+04 4.146945e+07 112 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.649815e+03 2.112061e+02 4.054218e+05 1.081118e+09 153 ff82dda0 7372800 0.000000e+00 6.517136e+03 3.918474e+02 3.454082e+05 2.259210e+09 53 2c1922b7 819200 0.000000e+00 6.507707e+02 8.750699e+01 4.750626e+04 3.147468e+07 73 starpu_slu_lu_model_trsm_ll_openblas.hannibal000066400000000000000000000040141507764646700342720ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 starpu_slu_lu_model_trsm_ll_openblas.hannibal-pitch000066400000000000000000000040141507764646700353770ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.072902e+04 3.731292e+03 7.780684e+07 9.357572e+11 7252 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.250147e+04 5.489974e+03 7.944684e+07 1.184741e+12 6355 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.131230e+04 4.120480e+03 8.165221e+07 1.046224e+12 7218 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.idgraf000066400000000000000000000216361507764646700340420ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 2.469013e+03 5.595193e+01 2.765294e+05 6.831054e+08 112 d39bff17 3276800 0.000000e+00 1.667528e+04 1.964808e+02 1.300672e+06 2.169208e+10 78 ff82dda0 7372800 0.000000e+00 5.216745e+04 4.664151e+02 3.443052e+06 1.796296e+11 66 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.490410e+02 1.344248e+02 7.415506e+04 5.733412e+07 99 d39bff17 3276800 0.000000e+00 2.737524e+03 2.974057e+02 3.942034e+05 1.091878e+09 144 ff82dda0 7372800 0.000000e+00 7.212728e+03 1.319942e+03 6.924219e+05 5.161506e+09 96 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.688939e+02 1.457751e+02 6.843156e+04 5.450789e+07 89 d39bff17 3276800 0.000000e+00 2.735563e+03 2.889694e+02 2.899697e+05 8.020820e+08 106 ff82dda0 7372800 0.000000e+00 6.820126e+03 9.314994e+02 7.638542e+05 5.306763e+09 112 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.150281e+02 1.235393e+02 6.363750e+04 4.686092e+07 89 d39bff17 3276800 0.000000e+00 2.835249e+03 4.125186e+02 1.899617e+05 5.499903e+08 67 ff82dda0 7372800 0.000000e+00 6.720945e+03 7.632032e+02 6.989783e+05 4.758372e+09 104 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.190609e+02 1.144317e+02 7.406327e+04 5.460474e+07 103 d39bff17 3276800 0.000000e+00 2.867186e+03 4.168496e+02 2.838514e+05 8.310575e+08 99 ff82dda0 7372800 0.000000e+00 6.809425e+03 9.031920e+02 6.400859e+05 4.435298e+09 94 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.136273e+02 1.258701e+02 7.350362e+04 5.408605e+07 103 d39bff17 3276800 0.000000e+00 2.942246e+03 4.585544e+02 1.706502e+05 5.142907e+08 58 ff82dda0 7372800 0.000000e+00 6.744194e+03 8.416374e+02 5.597681e+05 3.833978e+09 83 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.204798e+02 9.746533e+01 1.080720e+05 7.928859e+07 150 d39bff17 3276800 0.000000e+00 2.539831e+03 4.296517e+02 3.885942e+05 1.015208e+09 153 ff82dda0 7372800 0.000000e+00 7.293979e+03 1.385713e+03 6.929280e+05 5.236621e+09 95 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.460951e+02 1.203288e+02 7.386342e+04 5.654256e+07 99 d39bff17 3276800 0.000000e+00 2.972783e+03 5.066224e+02 2.259315e+05 6.911522e+08 76 ff82dda0 7372800 0.000000e+00 6.643349e+03 8.230064e+02 6.510482e+05 4.391520e+09 98 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 7.518059e+02 1.406096e+02 8.495406e+04 6.610309e+07 113 d39bff17 3276800 0.000000e+00 2.794983e+03 3.357608e+02 4.164524e+05 1.180775e+09 149 ff82dda0 7372800 0.000000e+00 6.735838e+03 7.525487e+02 6.803197e+05 4.639723e+09 101 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.mirage000066400000000000000000000100261507764646700340410ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.615269e+04 4.099119e+02 4.748890e+06 7.675673e+10 294 ff82dda0 7372800 0.000000e+00 5.118532e+04 6.422962e+02 1.530441e+07 7.834845e+11 299 2c1922b7 819200 0.000000e+00 2.296074e+03 7.445272e+01 1.021753e+06 2.348487e+09 445 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.740712e+03 2.663471e+02 5.755494e+05 1.592313e+09 210 ff82dda0 7372800 0.000000e+00 6.504044e+03 4.912781e+02 1.385361e+06 9.061859e+09 213 2c1922b7 819200 0.000000e+00 6.801212e+02 1.149855e+02 1.129001e+05 7.898057e+07 166 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.716114e+03 2.688407e+02 4.237138e+05 1.162130e+09 156 ff82dda0 7372800 0.000000e+00 6.512491e+03 5.367987e+02 8.270864e+05 5.422988e+09 127 2c1922b7 819200 0.000000e+00 7.284912e+02 1.021807e+02 1.049027e+05 7.792421e+07 144 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.294336e+03 5.071880e+02 4.909878e+05 1.181540e+09 214 ff82dda0 7372800 0.000000e+00 6.469485e+03 5.370376e+02 7.698688e+05 5.014976e+09 119 2c1922b7 819200 0.000000e+00 7.112055e+02 1.136474e+02 1.002800e+05 7.314078e+07 141 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ll_openblas.sirocco000066400000000000000000000130411507764646700342360ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.637990e+04 1.036285e+04 8.118706e+06 4.731958e+11 144 d39bff17 3276800 0.000000e+00 5.106660e+03 6.848530e+02 2.134584e+06 1.109665e+10 418 2c1922b7 819200 0.000000e+00 4.245334e+03 7.020174e+02 6.368000e+04 2.777353e+08 15 ff82dda0 7372800 0.000000e+00 1.726784e+04 3.264426e+03 3.021872e+06 5.404608e+10 175 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.668892e+03 6.964909e+02 1.394547e+06 8.024874e+09 246 d39bff17 3276800 0.000000e+00 1.216432e+03 1.410794e+02 1.934127e+05 2.384382e+08 159 2c1922b7 819200 0.000000e+00 4.901281e+02 6.729653e+01 6.616730e+04 3.304185e+07 135 ff82dda0 7372800 0.000000e+00 2.106719e+03 2.638200e+02 5.646006e+05 1.208108e+09 268 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.677828e+03 6.218985e+02 1.311578e+06 7.536257e+09 231 d39bff17 3276800 0.000000e+00 1.199302e+03 1.658297e+02 1.774966e+05 2.169419e+08 148 2c1922b7 819200 0.000000e+00 4.968224e+02 7.860110e+01 5.415364e+04 2.757816e+07 109 ff82dda0 7372800 0.000000e+00 2.138085e+03 2.696288e+02 6.371492e+05 1.383944e+09 298 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.593766e+03 7.653530e+02 1.510317e+06 8.606516e+09 270 d39bff17 3276800 0.000000e+00 1.148300e+03 2.163448e+02 2.021009e+05 2.403102e+08 176 2c1922b7 819200 0.000000e+00 8.901347e+01 1.918734e+01 2.412265e+04 2.247011e+06 271 ff82dda0 7372800 0.000000e+00 2.196957e+03 3.265420e+02 4.349975e+05 9.767837e+08 198 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 5.652338e+03 6.245997e+02 1.520479e+06 8.699205e+09 269 d39bff17 3276800 0.000000e+00 1.203544e+03 1.679024e+02 2.286733e+05 2.805746e+08 190 2c1922b7 819200 0.000000e+00 4.930666e+02 7.623523e+01 7.642532e+04 3.858360e+07 155 ff82dda0 7372800 0.000000e+00 2.164310e+03 2.607466e+02 4.869698e+05 1.069251e+09 225 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.attila000066400000000000000000000100221507764646700322030ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.377909e+04 1.008911e+03 6.145473e+06 8.513300e+10 446 ff82dda0 7372800 0.000000e+00 4.298380e+04 1.919778e+03 1.177756e+07 5.072542e+11 274 2c1922b7 819200 0.000000e+00 1.936516e+03 1.503574e+02 4.725100e+05 9.205395e+08 244 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.656425e+03 2.270595e+02 3.320531e+05 8.885184e+08 125 ff82dda0 7372800 0.000000e+00 6.358340e+03 3.816293e+02 5.023088e+05 3.205356e+09 79 2c1922b7 819200 0.000000e+00 3.867923e+02 4.867053e+01 4.564149e+04 1.793330e+07 118 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.902887e+03 4.574719e+02 4.585957e+05 9.230924e+08 241 ff82dda0 7372800 0.000000e+00 3.810456e+03 1.334249e+02 3.353201e+05 1.279289e+09 88 2c1922b7 819200 0.000000e+00 3.835296e+02 4.543249e+01 2.262825e+04 8.800385e+06 59 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.657310e+03 2.918518e+02 3.162199e+05 8.504305e+08 119 ff82dda0 7372800 0.000000e+00 3.819809e+03 1.073068e+02 3.055848e+05 1.168197e+09 80 2c1922b7 819200 0.000000e+00 4.020211e+02 5.372009e+01 3.256371e+04 1.332505e+07 81 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal000066400000000000000000000040141507764646700325050ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.hannibal-pitch000066400000000000000000000040141507764646700336120ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.idgraf000066400000000000000000000216401507764646700321710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 1.946809e+03 8.216247e+01 4.049363e+05 7.897378e+08 208 d39bff17 3276800 0.000000e+00 1.423970e+04 2.281585e+02 1.395491e+06 1.987647e+10 98 ff82dda0 7372800 0.000000e+00 4.640991e+04 5.437505e+02 4.919451e+06 2.283426e+11 106 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.484181e+02 9.807341e+01 3.004401e+04 1.411671e+07 67 d39bff17 3276800 0.000000e+00 1.658665e+03 2.005859e+02 2.388477e+05 4.019622e+08 144 ff82dda0 7372800 0.000000e+00 3.922083e+03 2.271290e+02 2.588575e+05 1.018665e+09 66 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.042679e+02 1.229686e+02 7.362312e+04 3.933348e+07 146 d39bff17 3276800 0.000000e+00 2.167031e+03 5.827483e+02 2.773800e+05 6.445595e+08 128 ff82dda0 7372800 0.000000e+00 4.035358e+03 4.245106e+02 4.035358e+05 1.646433e+09 100 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.873044e+02 1.174149e+02 2.631443e+04 1.356760e+07 54 d39bff17 3276800 0.000000e+00 1.705876e+03 1.721886e+02 1.808228e+05 3.116041e+08 106 ff82dda0 7372800 0.000000e+00 3.936492e+03 2.608005e+02 2.440625e+05 9.649671e+08 62 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.272057e+02 1.307030e+02 5.535660e+04 3.097805e+07 105 d39bff17 3276800 0.000000e+00 1.638590e+03 9.390080e+01 1.163399e+05 1.912593e+08 71 ff82dda0 7372800 0.000000e+00 4.055643e+03 3.711103e+02 4.177313e+05 1.708355e+09 103 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.303558e+02 7.490536e+01 2.840348e+04 1.259392e+07 66 d39bff17 3276800 0.000000e+00 1.669452e+03 1.444951e+02 1.419035e+05 2.386758e+08 85 ff82dda0 7372800 0.000000e+00 4.288060e+03 7.671104e+02 2.744359e+05 1.214459e+09 64 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.394264e+02 8.387153e+01 6.283798e+04 2.861859e+07 143 d39bff17 3276800 0.000000e+00 2.098818e+03 5.403136e+02 2.140795e+05 4.790917e+08 102 ff82dda0 7372800 0.000000e+00 4.766912e+03 1.123433e+03 7.579390e+05 3.813703e+09 159 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.660051e+02 1.021627e+02 6.477470e+04 3.163611e+07 139 d39bff17 3276800 0.000000e+00 2.103985e+03 5.293854e+02 2.377503e+05 5.318912e+08 113 ff82dda0 7372800 0.000000e+00 3.972257e+03 3.858968e+02 2.899747e+05 1.162725e+09 73 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.222345e+02 1.241013e+02 2.715620e+04 1.498276e+07 52 d39bff17 3276800 0.000000e+00 1.941135e+03 4.386059e+02 1.824667e+05 3.722759e+08 94 ff82dda0 7372800 0.000000e+00 4.892155e+03 1.147723e+03 2.397156e+05 1.237272e+09 49 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.mirage000066400000000000000000000100231507764646700321720ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.373117e+04 2.756172e+02 7.167670e+06 9.846014e+10 522 ff82dda0 7372800 0.000000e+00 4.545501e+04 7.462378e+02 1.750018e+07 7.956851e+11 385 2c1922b7 819200 0.000000e+00 1.798916e+03 8.480081e+01 1.219665e+06 2.198952e+09 678 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.883473e+03 4.679640e+02 2.537456e+05 7.509396e+08 88 ff82dda0 7372800 0.000000e+00 6.462089e+03 4.136967e+02 5.751259e+05 3.731746e+09 89 2c1922b7 819200 0.000000e+00 4.040830e+02 6.411732e+01 3.717564e+04 1.540026e+07 92 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.231007e+03 5.378925e+02 3.234960e+05 7.636746e+08 145 ff82dda0 7372800 0.000000e+00 3.904524e+03 2.515208e+02 4.021660e+05 1.576783e+09 103 2c1922b7 819200 0.000000e+00 5.307827e+02 1.276617e+02 5.467062e+04 3.069686e+07 103 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.665736e+03 1.392688e+02 2.881723e+05 4.833744e+08 173 ff82dda0 7372800 0.000000e+00 3.891632e+03 2.259287e+02 7.199519e+05 2.811230e+09 185 2c1922b7 819200 0.000000e+00 5.125766e+02 1.240167e+02 5.587085e+04 3.031453e+07 109 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru.sirocco000066400000000000000000000130371507764646700323770ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 8.483517e+04 1.709999e+04 1.781539e+06 1.572777e+11 21 d39bff17 3276800 0.000000e+00 8.986208e+03 1.629610e+03 1.797242e+05 1.668151e+09 20 2c1922b7 819200 0.000000e+00 3.523655e+03 5.077738e+02 5.990214e+04 2.154576e+08 17 ff82dda0 7372800 0.000000e+00 1.583302e+04 2.624137e+03 3.974089e+06 6.465024e+10 251 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.641113e+03 5.516013e+02 1.257742e+06 5.919777e+09 271 d39bff17 3276800 0.000000e+00 8.365056e+02 1.344660e+02 1.396964e+05 1.198764e+08 167 2c1922b7 819200 0.000000e+00 2.882912e+02 5.271451e+01 7.409085e+04 2.207390e+07 257 ff82dda0 7372800 0.000000e+00 1.570696e+03 2.281691e+02 4.115224e+05 6.600167e+08 262 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.556926e+03 5.099622e+02 1.048093e+06 4.835897e+09 230 d39bff17 3276800 0.000000e+00 7.019049e+02 1.632697e+02 1.109010e+05 8.205375e+07 158 2c1922b7 819200 0.000000e+00 9.967334e+01 2.197557e+01 2.372225e+04 2.479413e+06 238 ff82dda0 7372800 0.000000e+00 1.571709e+03 2.150516e+02 4.007858e+05 6.417117e+08 255 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.650733e+03 7.073225e+02 1.232444e+06 5.864350e+09 265 d39bff17 3276800 0.000000e+00 8.352707e+02 1.515223e+02 1.587014e+05 1.369209e+08 190 2c1922b7 819200 0.000000e+00 2.858293e+02 5.241353e+01 7.460146e+04 2.204030e+07 261 ff82dda0 7372800 0.000000e+00 1.569547e+03 2.419662e+02 2.589752e+05 4.161341e+08 165 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.671203e+03 5.859459e+02 1.331293e+06 6.316588e+09 285 d39bff17 3276800 0.000000e+00 8.453596e+02 1.395049e+02 1.420204e+05 1.233279e+08 168 2c1922b7 819200 0.000000e+00 2.930233e+02 5.590601e+01 5.362326e+04 1.628483e+07 183 ff82dda0 7372800 0.000000e+00 1.591448e+03 2.256700e+02 2.387172e+05 3.875451e+08 150 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.attila000066400000000000000000000100221507764646700333670ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.377909e+04 1.008911e+03 6.145473e+06 8.513300e+10 446 ff82dda0 7372800 0.000000e+00 4.298380e+04 1.919778e+03 1.177756e+07 5.072542e+11 274 2c1922b7 819200 0.000000e+00 1.936516e+03 1.503574e+02 4.725100e+05 9.205395e+08 244 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.656425e+03 2.270595e+02 3.320531e+05 8.885184e+08 125 ff82dda0 7372800 0.000000e+00 6.358340e+03 3.816293e+02 5.023088e+05 3.205356e+09 79 2c1922b7 819200 0.000000e+00 3.867923e+02 4.867053e+01 4.564149e+04 1.793330e+07 118 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.902887e+03 4.574719e+02 4.585957e+05 9.230924e+08 241 ff82dda0 7372800 0.000000e+00 3.810456e+03 1.334249e+02 3.353201e+05 1.279289e+09 88 2c1922b7 819200 0.000000e+00 3.835296e+02 4.543249e+01 2.262825e+04 8.800385e+06 59 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.657310e+03 2.918518e+02 3.162199e+05 8.504305e+08 119 ff82dda0 7372800 0.000000e+00 3.819809e+03 1.073068e+02 3.055848e+05 1.168197e+09 80 2c1922b7 819200 0.000000e+00 4.020211e+02 5.372009e+01 3.256371e+04 1.332505e+07 81 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.hannibal000066400000000000000000000040141507764646700336710ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 starpu_slu_lu_model_trsm_ru_atlas.hannibal-pitch000066400000000000000000000040141507764646700347170ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.idgraf000066400000000000000000000216401507764646700333550ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 1.946809e+03 8.216247e+01 4.049363e+05 7.897378e+08 208 d39bff17 3276800 0.000000e+00 1.423970e+04 2.281585e+02 1.395491e+06 1.987647e+10 98 ff82dda0 7372800 0.000000e+00 4.640991e+04 5.437505e+02 4.919451e+06 2.283426e+11 106 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.484181e+02 9.807341e+01 3.004401e+04 1.411671e+07 67 d39bff17 3276800 0.000000e+00 1.658665e+03 2.005859e+02 2.388477e+05 4.019622e+08 144 ff82dda0 7372800 0.000000e+00 3.922083e+03 2.271290e+02 2.588575e+05 1.018665e+09 66 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.042679e+02 1.229686e+02 7.362312e+04 3.933348e+07 146 d39bff17 3276800 0.000000e+00 2.167031e+03 5.827483e+02 2.773800e+05 6.445595e+08 128 ff82dda0 7372800 0.000000e+00 4.035358e+03 4.245106e+02 4.035358e+05 1.646433e+09 100 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.873044e+02 1.174149e+02 2.631443e+04 1.356760e+07 54 d39bff17 3276800 0.000000e+00 1.705876e+03 1.721886e+02 1.808228e+05 3.116041e+08 106 ff82dda0 7372800 0.000000e+00 3.936492e+03 2.608005e+02 2.440625e+05 9.649671e+08 62 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.272057e+02 1.307030e+02 5.535660e+04 3.097805e+07 105 d39bff17 3276800 0.000000e+00 1.638590e+03 9.390080e+01 1.163399e+05 1.912593e+08 71 ff82dda0 7372800 0.000000e+00 4.055643e+03 3.711103e+02 4.177313e+05 1.708355e+09 103 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.303558e+02 7.490536e+01 2.840348e+04 1.259392e+07 66 d39bff17 3276800 0.000000e+00 1.669452e+03 1.444951e+02 1.419035e+05 2.386758e+08 85 ff82dda0 7372800 0.000000e+00 4.288060e+03 7.671104e+02 2.744359e+05 1.214459e+09 64 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.394264e+02 8.387153e+01 6.283798e+04 2.861859e+07 143 d39bff17 3276800 0.000000e+00 2.098818e+03 5.403136e+02 2.140795e+05 4.790917e+08 102 ff82dda0 7372800 0.000000e+00 4.766912e+03 1.123433e+03 7.579390e+05 3.813703e+09 159 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.660051e+02 1.021627e+02 6.477470e+04 3.163611e+07 139 d39bff17 3276800 0.000000e+00 2.103985e+03 5.293854e+02 2.377503e+05 5.318912e+08 113 ff82dda0 7372800 0.000000e+00 3.972257e+03 3.858968e+02 2.899747e+05 1.162725e+09 73 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.222345e+02 1.241013e+02 2.715620e+04 1.498276e+07 52 d39bff17 3276800 0.000000e+00 1.941135e+03 4.386059e+02 1.824667e+05 3.722759e+08 94 ff82dda0 7372800 0.000000e+00 4.892155e+03 1.147723e+03 2.397156e+05 1.237272e+09 49 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.mirage000066400000000000000000000100231507764646700333560ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.373117e+04 2.756172e+02 7.167670e+06 9.846014e+10 522 ff82dda0 7372800 0.000000e+00 4.545501e+04 7.462378e+02 1.750018e+07 7.956851e+11 385 2c1922b7 819200 0.000000e+00 1.798916e+03 8.480081e+01 1.219665e+06 2.198952e+09 678 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.883473e+03 4.679640e+02 2.537456e+05 7.509396e+08 88 ff82dda0 7372800 0.000000e+00 6.462089e+03 4.136967e+02 5.751259e+05 3.731746e+09 89 2c1922b7 819200 0.000000e+00 4.040830e+02 6.411732e+01 3.717564e+04 1.540026e+07 92 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.231007e+03 5.378925e+02 3.234960e+05 7.636746e+08 145 ff82dda0 7372800 0.000000e+00 3.904524e+03 2.515208e+02 4.021660e+05 1.576783e+09 103 2c1922b7 819200 0.000000e+00 5.307827e+02 1.276617e+02 5.467062e+04 3.069686e+07 103 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.665736e+03 1.392688e+02 2.881723e+05 4.833744e+08 173 ff82dda0 7372800 0.000000e+00 3.891632e+03 2.259287e+02 7.199519e+05 2.811230e+09 185 2c1922b7 819200 0.000000e+00 5.125766e+02 1.240167e+02 5.587085e+04 3.031453e+07 109 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_atlas.sirocco000066400000000000000000000130371507764646700335630ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 8.483517e+04 1.709999e+04 1.781539e+06 1.572777e+11 21 d39bff17 3276800 0.000000e+00 8.986208e+03 1.629610e+03 1.797242e+05 1.668151e+09 20 2c1922b7 819200 0.000000e+00 3.523655e+03 5.077738e+02 5.990214e+04 2.154576e+08 17 ff82dda0 7372800 0.000000e+00 1.583302e+04 2.624137e+03 3.974089e+06 6.465024e+10 251 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.641113e+03 5.516013e+02 1.257742e+06 5.919777e+09 271 d39bff17 3276800 0.000000e+00 8.365056e+02 1.344660e+02 1.396964e+05 1.198764e+08 167 2c1922b7 819200 0.000000e+00 2.882912e+02 5.271451e+01 7.409085e+04 2.207390e+07 257 ff82dda0 7372800 0.000000e+00 1.570696e+03 2.281691e+02 4.115224e+05 6.600167e+08 262 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.556926e+03 5.099622e+02 1.048093e+06 4.835897e+09 230 d39bff17 3276800 0.000000e+00 7.019049e+02 1.632697e+02 1.109010e+05 8.205375e+07 158 2c1922b7 819200 0.000000e+00 9.967334e+01 2.197557e+01 2.372225e+04 2.479413e+06 238 ff82dda0 7372800 0.000000e+00 1.571709e+03 2.150516e+02 4.007858e+05 6.417117e+08 255 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.650733e+03 7.073225e+02 1.232444e+06 5.864350e+09 265 d39bff17 3276800 0.000000e+00 8.352707e+02 1.515223e+02 1.587014e+05 1.369209e+08 190 2c1922b7 819200 0.000000e+00 2.858293e+02 5.241353e+01 7.460146e+04 2.204030e+07 261 ff82dda0 7372800 0.000000e+00 1.569547e+03 2.419662e+02 2.589752e+05 4.161341e+08 165 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.671203e+03 5.859459e+02 1.331293e+06 6.316588e+09 285 d39bff17 3276800 0.000000e+00 8.453596e+02 1.395049e+02 1.420204e+05 1.233279e+08 168 2c1922b7 819200 0.000000e+00 2.930233e+02 5.590601e+01 5.362326e+04 1.628483e+07 183 ff82dda0 7372800 0.000000e+00 1.591448e+03 2.256700e+02 2.387172e+05 3.875451e+08 150 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.attila000066400000000000000000000100221507764646700332330ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.377909e+04 1.008911e+03 6.145473e+06 8.513300e+10 446 ff82dda0 7372800 0.000000e+00 4.298380e+04 1.919778e+03 1.177756e+07 5.072542e+11 274 2c1922b7 819200 0.000000e+00 1.936516e+03 1.503574e+02 4.725100e+05 9.205395e+08 244 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.656425e+03 2.270595e+02 3.320531e+05 8.885184e+08 125 ff82dda0 7372800 0.000000e+00 6.358340e+03 3.816293e+02 5.023088e+05 3.205356e+09 79 2c1922b7 819200 0.000000e+00 3.867923e+02 4.867053e+01 4.564149e+04 1.793330e+07 118 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.902887e+03 4.574719e+02 4.585957e+05 9.230924e+08 241 ff82dda0 7372800 0.000000e+00 3.810456e+03 1.334249e+02 3.353201e+05 1.279289e+09 88 2c1922b7 819200 0.000000e+00 3.835296e+02 4.543249e+01 2.262825e+04 8.800385e+06 59 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.657310e+03 2.918518e+02 3.162199e+05 8.504305e+08 119 ff82dda0 7372800 0.000000e+00 3.819809e+03 1.073068e+02 3.055848e+05 1.168197e+09 80 2c1922b7 819200 0.000000e+00 4.020211e+02 5.372009e+01 3.256371e+04 1.332505e+07 81 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.hannibal000066400000000000000000000040141507764646700335350ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 starpu_slu_lu_model_trsm_ru_goto.hannibal-pitch000066400000000000000000000040141507764646700345630ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.idgraf000066400000000000000000000216401507764646700332210ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 1.946809e+03 8.216247e+01 4.049363e+05 7.897378e+08 208 d39bff17 3276800 0.000000e+00 1.423970e+04 2.281585e+02 1.395491e+06 1.987647e+10 98 ff82dda0 7372800 0.000000e+00 4.640991e+04 5.437505e+02 4.919451e+06 2.283426e+11 106 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.484181e+02 9.807341e+01 3.004401e+04 1.411671e+07 67 d39bff17 3276800 0.000000e+00 1.658665e+03 2.005859e+02 2.388477e+05 4.019622e+08 144 ff82dda0 7372800 0.000000e+00 3.922083e+03 2.271290e+02 2.588575e+05 1.018665e+09 66 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.042679e+02 1.229686e+02 7.362312e+04 3.933348e+07 146 d39bff17 3276800 0.000000e+00 2.167031e+03 5.827483e+02 2.773800e+05 6.445595e+08 128 ff82dda0 7372800 0.000000e+00 4.035358e+03 4.245106e+02 4.035358e+05 1.646433e+09 100 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.873044e+02 1.174149e+02 2.631443e+04 1.356760e+07 54 d39bff17 3276800 0.000000e+00 1.705876e+03 1.721886e+02 1.808228e+05 3.116041e+08 106 ff82dda0 7372800 0.000000e+00 3.936492e+03 2.608005e+02 2.440625e+05 9.649671e+08 62 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.272057e+02 1.307030e+02 5.535660e+04 3.097805e+07 105 d39bff17 3276800 0.000000e+00 1.638590e+03 9.390080e+01 1.163399e+05 1.912593e+08 71 ff82dda0 7372800 0.000000e+00 4.055643e+03 3.711103e+02 4.177313e+05 1.708355e+09 103 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.303558e+02 7.490536e+01 2.840348e+04 1.259392e+07 66 d39bff17 3276800 0.000000e+00 1.669452e+03 1.444951e+02 1.419035e+05 2.386758e+08 85 ff82dda0 7372800 0.000000e+00 4.288060e+03 7.671104e+02 2.744359e+05 1.214459e+09 64 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.394264e+02 8.387153e+01 6.283798e+04 2.861859e+07 143 d39bff17 3276800 0.000000e+00 2.098818e+03 5.403136e+02 2.140795e+05 4.790917e+08 102 ff82dda0 7372800 0.000000e+00 4.766912e+03 1.123433e+03 7.579390e+05 3.813703e+09 159 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.660051e+02 1.021627e+02 6.477470e+04 3.163611e+07 139 d39bff17 3276800 0.000000e+00 2.103985e+03 5.293854e+02 2.377503e+05 5.318912e+08 113 ff82dda0 7372800 0.000000e+00 3.972257e+03 3.858968e+02 2.899747e+05 1.162725e+09 73 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.222345e+02 1.241013e+02 2.715620e+04 1.498276e+07 52 d39bff17 3276800 0.000000e+00 1.941135e+03 4.386059e+02 1.824667e+05 3.722759e+08 94 ff82dda0 7372800 0.000000e+00 4.892155e+03 1.147723e+03 2.397156e+05 1.237272e+09 49 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.mirage000066400000000000000000000100231507764646700332220ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.373117e+04 2.756172e+02 7.167670e+06 9.846014e+10 522 ff82dda0 7372800 0.000000e+00 4.545501e+04 7.462378e+02 1.750018e+07 7.956851e+11 385 2c1922b7 819200 0.000000e+00 1.798916e+03 8.480081e+01 1.219665e+06 2.198952e+09 678 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.883473e+03 4.679640e+02 2.537456e+05 7.509396e+08 88 ff82dda0 7372800 0.000000e+00 6.462089e+03 4.136967e+02 5.751259e+05 3.731746e+09 89 2c1922b7 819200 0.000000e+00 4.040830e+02 6.411732e+01 3.717564e+04 1.540026e+07 92 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.231007e+03 5.378925e+02 3.234960e+05 7.636746e+08 145 ff82dda0 7372800 0.000000e+00 3.904524e+03 2.515208e+02 4.021660e+05 1.576783e+09 103 2c1922b7 819200 0.000000e+00 5.307827e+02 1.276617e+02 5.467062e+04 3.069686e+07 103 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.665736e+03 1.392688e+02 2.881723e+05 4.833744e+08 173 ff82dda0 7372800 0.000000e+00 3.891632e+03 2.259287e+02 7.199519e+05 2.811230e+09 185 2c1922b7 819200 0.000000e+00 5.125766e+02 1.240167e+02 5.587085e+04 3.031453e+07 109 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_goto.sirocco000066400000000000000000000130371507764646700334270ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 8.483517e+04 1.709999e+04 1.781539e+06 1.572777e+11 21 d39bff17 3276800 0.000000e+00 8.986208e+03 1.629610e+03 1.797242e+05 1.668151e+09 20 2c1922b7 819200 0.000000e+00 3.523655e+03 5.077738e+02 5.990214e+04 2.154576e+08 17 ff82dda0 7372800 0.000000e+00 1.583302e+04 2.624137e+03 3.974089e+06 6.465024e+10 251 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.641113e+03 5.516013e+02 1.257742e+06 5.919777e+09 271 d39bff17 3276800 0.000000e+00 8.365056e+02 1.344660e+02 1.396964e+05 1.198764e+08 167 2c1922b7 819200 0.000000e+00 2.882912e+02 5.271451e+01 7.409085e+04 2.207390e+07 257 ff82dda0 7372800 0.000000e+00 1.570696e+03 2.281691e+02 4.115224e+05 6.600167e+08 262 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.556926e+03 5.099622e+02 1.048093e+06 4.835897e+09 230 d39bff17 3276800 0.000000e+00 7.019049e+02 1.632697e+02 1.109010e+05 8.205375e+07 158 2c1922b7 819200 0.000000e+00 9.967334e+01 2.197557e+01 2.372225e+04 2.479413e+06 238 ff82dda0 7372800 0.000000e+00 1.571709e+03 2.150516e+02 4.007858e+05 6.417117e+08 255 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.650733e+03 7.073225e+02 1.232444e+06 5.864350e+09 265 d39bff17 3276800 0.000000e+00 8.352707e+02 1.515223e+02 1.587014e+05 1.369209e+08 190 2c1922b7 819200 0.000000e+00 2.858293e+02 5.241353e+01 7.460146e+04 2.204030e+07 261 ff82dda0 7372800 0.000000e+00 1.569547e+03 2.419662e+02 2.589752e+05 4.161341e+08 165 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.671203e+03 5.859459e+02 1.331293e+06 6.316588e+09 285 d39bff17 3276800 0.000000e+00 8.453596e+02 1.395049e+02 1.420204e+05 1.233279e+08 168 2c1922b7 819200 0.000000e+00 2.930233e+02 5.590601e+01 5.362326e+04 1.628483e+07 183 ff82dda0 7372800 0.000000e+00 1.591448e+03 2.256700e+02 2.387172e+05 3.875451e+08 150 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.attila000066400000000000000000000100221507764646700340660ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.377909e+04 1.008911e+03 6.145473e+06 8.513300e+10 446 ff82dda0 7372800 0.000000e+00 4.298380e+04 1.919778e+03 1.177756e+07 5.072542e+11 274 2c1922b7 819200 0.000000e+00 1.936516e+03 1.503574e+02 4.725100e+05 9.205395e+08 244 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.656425e+03 2.270595e+02 3.320531e+05 8.885184e+08 125 ff82dda0 7372800 0.000000e+00 6.358340e+03 3.816293e+02 5.023088e+05 3.205356e+09 79 2c1922b7 819200 0.000000e+00 3.867923e+02 4.867053e+01 4.564149e+04 1.793330e+07 118 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.902887e+03 4.574719e+02 4.585957e+05 9.230924e+08 241 ff82dda0 7372800 0.000000e+00 3.810456e+03 1.334249e+02 3.353201e+05 1.279289e+09 88 2c1922b7 819200 0.000000e+00 3.835296e+02 4.543249e+01 2.262825e+04 8.800385e+06 59 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.657310e+03 2.918518e+02 3.162199e+05 8.504305e+08 119 ff82dda0 7372800 0.000000e+00 3.819809e+03 1.073068e+02 3.055848e+05 1.168197e+09 80 2c1922b7 819200 0.000000e+00 4.020211e+02 5.372009e+01 3.256371e+04 1.332505e+07 81 starpu_slu_lu_model_trsm_ru_openblas.hannibal000066400000000000000000000040141507764646700343110ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 starpu_slu_lu_model_trsm_ru_openblas.hannibal-pitch000066400000000000000000000040141507764646700354160ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45################## # Performance Model Version 45 #################### # COMBs # number of combinations 3 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.103789e+04 3.664518e+03 7.889881e+07 9.668643e+11 7148 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.284524e+04 5.462619e+03 8.441889e+07 1.280490e+12 6572 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb3) # number of entries 1 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n ff82dda0 7372800 0.000000e+00 1.171798e+04 4.121992e+03 8.325626e+07 1.096315e+12 7105 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.idgraf000066400000000000000000000216401507764646700340540ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 9 #################### # COMB_8 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb8) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 1.946809e+03 8.216247e+01 4.049363e+05 7.897378e+08 208 d39bff17 3276800 0.000000e+00 1.423970e+04 2.281585e+02 1.395491e+06 1.987647e+10 98 ff82dda0 7372800 0.000000e+00 4.640991e+04 5.437505e+02 4.919451e+06 2.283426e+11 106 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 4 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda4_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.484181e+02 9.807341e+01 3.004401e+04 1.411671e+07 67 d39bff17 3276800 0.000000e+00 1.658665e+03 2.005859e+02 2.388477e+05 4.019622e+08 144 ff82dda0 7372800 0.000000e+00 3.922083e+03 2.271290e+02 2.588575e+05 1.018665e+09 66 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.042679e+02 1.229686e+02 7.362312e+04 3.933348e+07 146 d39bff17 3276800 0.000000e+00 2.167031e+03 5.827483e+02 2.773800e+05 6.445595e+08 128 ff82dda0 7372800 0.000000e+00 4.035358e+03 4.245106e+02 4.035358e+05 1.646433e+09 100 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 6 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda6_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.873044e+02 1.174149e+02 2.631443e+04 1.356760e+07 54 d39bff17 3276800 0.000000e+00 1.705876e+03 1.721886e+02 1.808228e+05 3.116041e+08 106 ff82dda0 7372800 0.000000e+00 3.936492e+03 2.608005e+02 2.440625e+05 9.649671e+08 62 #################### # COMB_7 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb7) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.272057e+02 1.307030e+02 5.535660e+04 3.097805e+07 105 d39bff17 3276800 0.000000e+00 1.638590e+03 9.390080e+01 1.163399e+05 1.912593e+08 71 ff82dda0 7372800 0.000000e+00 4.055643e+03 3.711103e+02 4.177313e+05 1.708355e+09 103 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 5 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda5_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.303558e+02 7.490536e+01 2.840348e+04 1.259392e+07 66 d39bff17 3276800 0.000000e+00 1.669452e+03 1.444951e+02 1.419035e+05 2.386758e+08 85 ff82dda0 7372800 0.000000e+00 4.288060e+03 7.671104e+02 2.744359e+05 1.214459e+09 64 #################### # COMB_5 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb5) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.394264e+02 8.387153e+01 6.283798e+04 2.861859e+07 143 d39bff17 3276800 0.000000e+00 2.098818e+03 5.403136e+02 2.140795e+05 4.790917e+08 102 ff82dda0 7372800 0.000000e+00 4.766912e+03 1.123433e+03 7.579390e+05 3.813703e+09 159 #################### # COMB_6 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb6) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 4.660051e+02 1.021627e+02 6.477470e+04 3.163611e+07 139 d39bff17 3276800 0.000000e+00 2.103985e+03 5.293854e+02 2.377503e+05 5.318912e+08 113 ff82dda0 7372800 0.000000e+00 3.972257e+03 3.858968e+02 2.899747e+05 1.162725e+09 73 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 7 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda7_impl0 (Comb4) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 2c1922b7 819200 0.000000e+00 5.222345e+02 1.241013e+02 2.715620e+04 1.498276e+07 52 d39bff17 3276800 0.000000e+00 1.941135e+03 4.386059e+02 1.824667e+05 3.722759e+08 94 ff82dda0 7372800 0.000000e+00 4.892155e+03 1.147723e+03 2.397156e+05 1.237272e+09 49 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.mirage000066400000000000000000000100231507764646700340550ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 4 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb3) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.373117e+04 2.756172e+02 7.167670e+06 9.846014e+10 522 ff82dda0 7372800 0.000000e+00 4.545501e+04 7.462378e+02 1.750018e+07 7.956851e+11 385 2c1922b7 819200 0.000000e+00 1.798916e+03 8.480081e+01 1.219665e+06 2.198952e+09 678 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb0) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.883473e+03 4.679640e+02 2.537456e+05 7.509396e+08 88 ff82dda0 7372800 0.000000e+00 6.462089e+03 4.136967e+02 5.751259e+05 3.731746e+09 89 2c1922b7 819200 0.000000e+00 4.040830e+02 6.411732e+01 3.717564e+04 1.540026e+07 92 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 2.231007e+03 5.378925e+02 3.234960e+05 7.636746e+08 145 ff82dda0 7372800 0.000000e+00 3.904524e+03 2.515208e+02 4.021660e+05 1.576783e+09 103 2c1922b7 819200 0.000000e+00 5.307827e+02 1.276617e+02 5.467062e+04 3.069686e+07 103 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb1) # number of entries 3 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n d39bff17 3276800 0.000000e+00 1.665736e+03 1.392688e+02 2.881723e+05 4.833744e+08 173 ff82dda0 7372800 0.000000e+00 3.891632e+03 2.259287e+02 7.199519e+05 2.811230e+09 185 2c1922b7 819200 0.000000e+00 5.125766e+02 1.240167e+02 5.587085e+04 3.031453e+07 109 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/45/starpu_slu_lu_model_trsm_ru_openblas.sirocco000066400000000000000000000130371507764646700342620ustar00rootroot00000000000000################## # Performance Model Version 45 #################### # COMBs # number of combinations 5 #################### # COMB_4 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 0 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cpu0_impl0 (Comb4) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 8.483517e+04 1.709999e+04 1.781539e+06 1.572777e+11 21 d39bff17 3276800 0.000000e+00 8.986208e+03 1.629610e+03 1.797242e+05 1.668151e+09 20 2c1922b7 819200 0.000000e+00 3.523655e+03 5.077738e+02 5.990214e+04 2.154576e+08 17 ff82dda0 7372800 0.000000e+00 1.583302e+04 2.624137e+03 3.974089e+06 6.465024e+10 251 #################### # COMB_1 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 0 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda0_impl0 (Comb1) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.641113e+03 5.516013e+02 1.257742e+06 5.919777e+09 271 d39bff17 3276800 0.000000e+00 8.365056e+02 1.344660e+02 1.396964e+05 1.198764e+08 167 2c1922b7 819200 0.000000e+00 2.882912e+02 5.271451e+01 7.409085e+04 2.207390e+07 257 ff82dda0 7372800 0.000000e+00 1.570696e+03 2.281691e+02 4.115224e+05 6.600167e+08 262 #################### # COMB_0 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 2 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda2_impl0 (Comb0) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.556926e+03 5.099622e+02 1.048093e+06 4.835897e+09 230 d39bff17 3276800 0.000000e+00 7.019049e+02 1.632697e+02 1.109010e+05 8.205375e+07 158 2c1922b7 819200 0.000000e+00 9.967334e+01 2.197557e+01 2.372225e+04 2.479413e+06 238 ff82dda0 7372800 0.000000e+00 1.571709e+03 2.150516e+02 4.007858e+05 6.417117e+08 255 #################### # COMB_3 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 3 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda3_impl0 (Comb3) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.650733e+03 7.073225e+02 1.232444e+06 5.864350e+09 265 d39bff17 3276800 0.000000e+00 8.352707e+02 1.515223e+02 1.587014e+05 1.369209e+08 190 2c1922b7 819200 0.000000e+00 2.858293e+02 5.241353e+01 7.460146e+04 2.204030e+07 261 ff82dda0 7372800 0.000000e+00 1.569547e+03 2.419662e+02 2.589752e+05 4.161341e+08 165 #################### # COMB_2 # number of types devices 1 #################### # DEV_0 # device type (CPU - 0, CUDA - 1, OPENCL - 2, MIC - 3) 1 #################### # DEV_0 # device id 1 #################### # DEV_0 # number of cores 1 ########## # number of implementations 1 ##### # Model for cuda1_impl0 (Comb2) # number of entries 4 # sumlnx sumlnx2 sumlny sumlnxlny alpha beta n minx maxx 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 nan nan 0 0 0 # a b c nan nan nan # not multiple-regression-base 0 # hash size flops mean (us) dev (us) sum sum2 n 0e8bce2b 16588800 0.000000e+00 4.671203e+03 5.859459e+02 1.331293e+06 6.316588e+09 285 d39bff17 3276800 0.000000e+00 8.453596e+02 1.395049e+02 1.420204e+05 1.233279e+08 168 2c1922b7 819200 0.000000e+00 2.930233e+02 5.590601e+01 5.362326e+04 1.628483e+07 183 ff82dda0 7372800 0.000000e+00 1.591448e+03 2.256700e+02 2.387172e+05 3.875451e+08 150 starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/tmp/000077500000000000000000000000001507764646700235245ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfmodels/sampling/codelets/tmp/mlr_init.out000066400000000000000000002073051507764646700261010ustar00rootroot00000000000000Duration, M, N, K 0.422000, 6.000000, 5.000000, 4.000000 0.474000, 6.000000, 5.000000, 4.000000 0.673000, 6.000000, 5.000000, 4.000000 0.482000, 6.000000, 5.000000, 4.000000 0.721000, 6.000000, 5.000000, 4.000000 0.441000, 6.000000, 5.000000, 4.000000 0.459000, 6.000000, 5.000000, 4.000000 0.694000, 6.000000, 5.000000, 4.000000 0.458000, 6.000000, 5.000000, 4.000000 0.468000, 6.000000, 5.000000, 4.000000 0.684000, 6.000000, 5.000000, 4.000000 0.470000, 6.000000, 5.000000, 4.000000 0.469000, 6.000000, 5.000000, 4.000000 0.667000, 6.000000, 5.000000, 4.000000 0.422000, 6.000000, 5.000000, 4.000000 0.432000, 6.000000, 5.000000, 4.000000 0.469000, 6.000000, 5.000000, 4.000000 0.691000, 6.000000, 5.000000, 4.000000 0.450000, 6.000000, 5.000000, 4.000000 0.629000, 6.000000, 5.000000, 4.000000 0.451000, 6.000000, 5.000000, 4.000000 0.505000, 6.000000, 5.000000, 4.000000 0.686000, 6.000000, 5.000000, 4.000000 0.435000, 6.000000, 5.000000, 4.000000 0.478000, 6.000000, 5.000000, 4.000000 0.686000, 6.000000, 5.000000, 4.000000 0.418000, 6.000000, 5.000000, 4.000000 0.462000, 6.000000, 5.000000, 4.000000 0.680000, 6.000000, 5.000000, 4.000000 0.523000, 6.000000, 5.000000, 4.000000 0.604000, 6.000000, 5.000000, 4.000000 0.485000, 6.000000, 5.000000, 4.000000 0.692000, 6.000000, 5.000000, 4.000000 0.422000, 6.000000, 5.000000, 4.000000 0.460000, 6.000000, 5.000000, 4.000000 0.685000, 6.000000, 5.000000, 4.000000 0.420000, 6.000000, 5.000000, 4.000000 0.453000, 6.000000, 5.000000, 4.000000 0.661000, 6.000000, 5.000000, 4.000000 0.495000, 6.000000, 5.000000, 4.000000 0.670000, 6.000000, 5.000000, 4.000000 0.427000, 6.000000, 5.000000, 4.000000 1.951000, 4.000000, 8.000000, 10.000000 2.683000, 4.000000, 8.000000, 10.000000 1.936000, 4.000000, 8.000000, 10.000000 1.949000, 4.000000, 8.000000, 10.000000 2.724000, 4.000000, 8.000000, 10.000000 1.906000, 4.000000, 8.000000, 10.000000 1.951000, 4.000000, 8.000000, 10.000000 2.631000, 4.000000, 8.000000, 10.000000 1.989000, 4.000000, 8.000000, 10.000000 2.679000, 4.000000, 8.000000, 10.000000 1.897000, 4.000000, 8.000000, 10.000000 1.951000, 4.000000, 8.000000, 10.000000 2.894000, 4.000000, 8.000000, 10.000000 1.903000, 4.000000, 8.000000, 10.000000 1.948000, 4.000000, 8.000000, 10.000000 2.666000, 4.000000, 8.000000, 10.000000 1.922000, 4.000000, 8.000000, 10.000000 1.951000, 4.000000, 8.000000, 10.000000 2.769000, 4.000000, 8.000000, 10.000000 1.937000, 4.000000, 8.000000, 10.000000 2.739000, 4.000000, 8.000000, 10.000000 1.912000, 4.000000, 8.000000, 10.000000 1.942000, 4.000000, 8.000000, 10.000000 2.694000, 4.000000, 8.000000, 10.000000 1.903000, 4.000000, 8.000000, 10.000000 1.956000, 4.000000, 8.000000, 10.000000 2.784000, 4.000000, 8.000000, 10.000000 1.894000, 4.000000, 8.000000, 10.000000 1.946000, 4.000000, 8.000000, 10.000000 2.704000, 4.000000, 8.000000, 10.000000 1.899000, 4.000000, 8.000000, 10.000000 1.923000, 4.000000, 8.000000, 10.000000 2.748000, 4.000000, 8.000000, 10.000000 1.903000, 4.000000, 8.000000, 10.000000 1.941000, 4.000000, 8.000000, 10.000000 2.818000, 4.000000, 8.000000, 10.000000 1.926000, 4.000000, 8.000000, 10.000000 1.927000, 4.000000, 8.000000, 10.000000 2.793000, 4.000000, 8.000000, 10.000000 1.914000, 4.000000, 8.000000, 10.000000 1.948000, 4.000000, 8.000000, 10.000000 2.701000, 4.000000, 8.000000, 10.000000 0.244000, 8.000000, 2.000000, 2.000000 0.269000, 8.000000, 2.000000, 2.000000 0.380000, 8.000000, 2.000000, 2.000000 0.229000, 8.000000, 2.000000, 2.000000 0.281000, 8.000000, 2.000000, 2.000000 0.395000, 8.000000, 2.000000, 2.000000 0.235000, 8.000000, 2.000000, 2.000000 0.275000, 8.000000, 2.000000, 2.000000 0.383000, 8.000000, 2.000000, 2.000000 0.268000, 8.000000, 2.000000, 2.000000 0.285000, 8.000000, 2.000000, 2.000000 0.345000, 8.000000, 2.000000, 2.000000 0.243000, 8.000000, 2.000000, 2.000000 0.268000, 8.000000, 2.000000, 2.000000 0.424000, 8.000000, 2.000000, 2.000000 0.242000, 8.000000, 2.000000, 2.000000 0.303000, 8.000000, 2.000000, 2.000000 0.404000, 8.000000, 2.000000, 2.000000 0.250000, 8.000000, 2.000000, 2.000000 0.277000, 8.000000, 2.000000, 2.000000 0.369000, 8.000000, 2.000000, 2.000000 0.253000, 8.000000, 2.000000, 2.000000 0.278000, 8.000000, 2.000000, 2.000000 0.441000, 8.000000, 2.000000, 2.000000 0.270000, 8.000000, 2.000000, 2.000000 0.246000, 8.000000, 2.000000, 2.000000 0.284000, 8.000000, 2.000000, 2.000000 0.400000, 8.000000, 2.000000, 2.000000 0.240000, 8.000000, 2.000000, 2.000000 0.252000, 8.000000, 2.000000, 2.000000 0.295000, 8.000000, 2.000000, 2.000000 0.395000, 8.000000, 2.000000, 2.000000 0.248000, 8.000000, 2.000000, 2.000000 0.324000, 8.000000, 2.000000, 2.000000 0.408000, 8.000000, 2.000000, 2.000000 0.312000, 8.000000, 2.000000, 2.000000 0.420000, 8.000000, 2.000000, 2.000000 0.240000, 8.000000, 2.000000, 2.000000 0.266000, 8.000000, 2.000000, 2.000000 0.404000, 8.000000, 2.000000, 2.000000 0.273000, 8.000000, 2.000000, 2.000000 0.334000, 8.000000, 2.000000, 2.000000 0.452000, 1.000000, 5.000000, 2.000000 0.423000, 1.000000, 5.000000, 2.000000 0.474000, 1.000000, 5.000000, 2.000000 0.268000, 1.000000, 5.000000, 2.000000 0.337000, 1.000000, 5.000000, 2.000000 0.420000, 1.000000, 5.000000, 2.000000 0.323000, 1.000000, 5.000000, 2.000000 0.301000, 1.000000, 5.000000, 2.000000 0.428000, 1.000000, 5.000000, 2.000000 0.318000, 1.000000, 5.000000, 2.000000 0.291000, 1.000000, 5.000000, 2.000000 0.436000, 1.000000, 5.000000, 2.000000 0.323000, 1.000000, 5.000000, 2.000000 0.351000, 1.000000, 5.000000, 2.000000 0.456000, 1.000000, 5.000000, 2.000000 0.309000, 1.000000, 5.000000, 2.000000 0.329000, 1.000000, 5.000000, 2.000000 0.514000, 1.000000, 5.000000, 2.000000 0.286000, 1.000000, 5.000000, 2.000000 0.358000, 1.000000, 5.000000, 2.000000 0.485000, 1.000000, 5.000000, 2.000000 0.261000, 1.000000, 5.000000, 2.000000 0.374000, 1.000000, 5.000000, 2.000000 0.470000, 1.000000, 5.000000, 2.000000 0.269000, 1.000000, 5.000000, 2.000000 0.347000, 1.000000, 5.000000, 2.000000 0.402000, 1.000000, 5.000000, 2.000000 0.274000, 1.000000, 5.000000, 2.000000 0.338000, 1.000000, 5.000000, 2.000000 0.510000, 1.000000, 5.000000, 2.000000 0.281000, 1.000000, 5.000000, 2.000000 0.331000, 1.000000, 5.000000, 2.000000 0.311000, 1.000000, 5.000000, 2.000000 0.509000, 1.000000, 5.000000, 2.000000 0.316000, 1.000000, 5.000000, 2.000000 0.342000, 1.000000, 5.000000, 2.000000 0.353000, 1.000000, 5.000000, 2.000000 0.448000, 1.000000, 5.000000, 2.000000 0.308000, 1.000000, 5.000000, 2.000000 0.349000, 1.000000, 5.000000, 2.000000 0.383000, 1.000000, 5.000000, 2.000000 0.320000, 1.000000, 5.000000, 2.000000 1.197000, 6.000000, 8.000000, 5.000000 1.719000, 6.000000, 8.000000, 5.000000 1.152000, 6.000000, 8.000000, 5.000000 1.214000, 6.000000, 8.000000, 5.000000 1.732000, 6.000000, 8.000000, 5.000000 1.175000, 6.000000, 8.000000, 5.000000 1.157000, 6.000000, 8.000000, 5.000000 1.739000, 6.000000, 8.000000, 5.000000 1.181000, 6.000000, 8.000000, 5.000000 1.872000, 6.000000, 8.000000, 5.000000 1.163000, 6.000000, 8.000000, 5.000000 1.217000, 6.000000, 8.000000, 5.000000 1.717000, 6.000000, 8.000000, 5.000000 1.130000, 6.000000, 8.000000, 5.000000 1.194000, 6.000000, 8.000000, 5.000000 1.723000, 6.000000, 8.000000, 5.000000 1.133000, 6.000000, 8.000000, 5.000000 1.177000, 6.000000, 8.000000, 5.000000 1.803000, 6.000000, 8.000000, 5.000000 1.140000, 6.000000, 8.000000, 5.000000 1.218000, 6.000000, 8.000000, 5.000000 1.742000, 6.000000, 8.000000, 5.000000 1.127000, 6.000000, 8.000000, 5.000000 1.185000, 6.000000, 8.000000, 5.000000 1.778000, 6.000000, 8.000000, 5.000000 1.132000, 6.000000, 8.000000, 5.000000 1.184000, 6.000000, 8.000000, 5.000000 1.781000, 6.000000, 8.000000, 5.000000 1.131000, 6.000000, 8.000000, 5.000000 1.210000, 6.000000, 8.000000, 5.000000 1.791000, 6.000000, 8.000000, 5.000000 1.179000, 6.000000, 8.000000, 5.000000 1.858000, 6.000000, 8.000000, 5.000000 1.121000, 6.000000, 8.000000, 5.000000 1.188000, 6.000000, 8.000000, 5.000000 1.742000, 6.000000, 8.000000, 5.000000 1.167000, 6.000000, 8.000000, 5.000000 1.198000, 6.000000, 8.000000, 5.000000 1.751000, 6.000000, 8.000000, 5.000000 1.127000, 6.000000, 8.000000, 5.000000 1.204000, 6.000000, 8.000000, 5.000000 1.583000, 6.000000, 8.000000, 5.000000 0.386000, 8.000000, 4.000000, 3.000000 0.428000, 8.000000, 4.000000, 3.000000 0.555000, 8.000000, 4.000000, 3.000000 0.369000, 8.000000, 4.000000, 3.000000 0.355000, 8.000000, 4.000000, 3.000000 0.378000, 8.000000, 4.000000, 3.000000 0.545000, 8.000000, 4.000000, 3.000000 0.389000, 8.000000, 4.000000, 3.000000 0.365000, 8.000000, 4.000000, 3.000000 0.402000, 8.000000, 4.000000, 3.000000 0.561000, 8.000000, 4.000000, 3.000000 0.339000, 8.000000, 4.000000, 3.000000 0.385000, 8.000000, 4.000000, 3.000000 0.582000, 8.000000, 4.000000, 3.000000 0.341000, 8.000000, 4.000000, 3.000000 0.417000, 8.000000, 4.000000, 3.000000 0.578000, 8.000000, 4.000000, 3.000000 0.341000, 8.000000, 4.000000, 3.000000 0.400000, 8.000000, 4.000000, 3.000000 0.614000, 8.000000, 4.000000, 3.000000 0.339000, 8.000000, 4.000000, 3.000000 0.416000, 8.000000, 4.000000, 3.000000 0.619000, 8.000000, 4.000000, 3.000000 0.335000, 8.000000, 4.000000, 3.000000 0.418000, 8.000000, 4.000000, 3.000000 0.576000, 8.000000, 4.000000, 3.000000 0.341000, 8.000000, 4.000000, 3.000000 0.404000, 8.000000, 4.000000, 3.000000 0.552000, 8.000000, 4.000000, 3.000000 0.343000, 8.000000, 4.000000, 3.000000 0.446000, 8.000000, 4.000000, 3.000000 0.529000, 8.000000, 4.000000, 3.000000 0.382000, 8.000000, 4.000000, 3.000000 0.389000, 8.000000, 4.000000, 3.000000 0.610000, 8.000000, 4.000000, 3.000000 0.345000, 8.000000, 4.000000, 3.000000 0.406000, 8.000000, 4.000000, 3.000000 0.560000, 8.000000, 4.000000, 3.000000 0.380000, 8.000000, 4.000000, 3.000000 0.364000, 8.000000, 4.000000, 3.000000 0.391000, 8.000000, 4.000000, 3.000000 0.387000, 8.000000, 4.000000, 3.000000 0.329000, 1.000000, 1.000000, 6.000000 0.244000, 1.000000, 1.000000, 6.000000 0.371000, 1.000000, 1.000000, 6.000000 0.193000, 1.000000, 1.000000, 6.000000 0.250000, 1.000000, 1.000000, 6.000000 0.293000, 1.000000, 1.000000, 6.000000 0.209000, 1.000000, 1.000000, 6.000000 0.243000, 1.000000, 1.000000, 6.000000 0.309000, 1.000000, 1.000000, 6.000000 0.219000, 1.000000, 1.000000, 6.000000 0.233000, 1.000000, 1.000000, 6.000000 0.355000, 1.000000, 1.000000, 6.000000 0.196000, 1.000000, 1.000000, 6.000000 0.235000, 1.000000, 1.000000, 6.000000 0.202000, 1.000000, 1.000000, 6.000000 0.241000, 1.000000, 1.000000, 6.000000 0.341000, 1.000000, 1.000000, 6.000000 0.174000, 1.000000, 1.000000, 6.000000 0.251000, 1.000000, 1.000000, 6.000000 0.322000, 1.000000, 1.000000, 6.000000 0.220000, 1.000000, 1.000000, 6.000000 0.224000, 1.000000, 1.000000, 6.000000 0.326000, 1.000000, 1.000000, 6.000000 0.171000, 1.000000, 1.000000, 6.000000 0.256000, 1.000000, 1.000000, 6.000000 0.310000, 1.000000, 1.000000, 6.000000 0.222000, 1.000000, 1.000000, 6.000000 0.241000, 1.000000, 1.000000, 6.000000 0.342000, 1.000000, 1.000000, 6.000000 0.181000, 1.000000, 1.000000, 6.000000 0.252000, 1.000000, 1.000000, 6.000000 0.364000, 1.000000, 1.000000, 6.000000 0.188000, 1.000000, 1.000000, 6.000000 0.245000, 1.000000, 1.000000, 6.000000 0.176000, 1.000000, 1.000000, 6.000000 0.245000, 1.000000, 1.000000, 6.000000 0.330000, 1.000000, 1.000000, 6.000000 0.239000, 1.000000, 1.000000, 6.000000 0.358000, 1.000000, 1.000000, 6.000000 0.198000, 1.000000, 1.000000, 6.000000 0.260000, 1.000000, 1.000000, 6.000000 0.322000, 1.000000, 1.000000, 6.000000 0.480000, 3.000000, 7.000000, 2.000000 0.512000, 3.000000, 7.000000, 2.000000 0.682000, 3.000000, 7.000000, 2.000000 0.440000, 3.000000, 7.000000, 2.000000 0.515000, 3.000000, 7.000000, 2.000000 0.711000, 3.000000, 7.000000, 2.000000 0.438000, 3.000000, 7.000000, 2.000000 0.514000, 3.000000, 7.000000, 2.000000 0.759000, 3.000000, 7.000000, 2.000000 0.439000, 3.000000, 7.000000, 2.000000 0.510000, 3.000000, 7.000000, 2.000000 0.719000, 3.000000, 7.000000, 2.000000 0.491000, 3.000000, 7.000000, 2.000000 0.530000, 3.000000, 7.000000, 2.000000 0.683000, 3.000000, 7.000000, 2.000000 0.440000, 3.000000, 7.000000, 2.000000 0.518000, 3.000000, 7.000000, 2.000000 0.711000, 3.000000, 7.000000, 2.000000 0.434000, 3.000000, 7.000000, 2.000000 0.546000, 3.000000, 7.000000, 2.000000 0.746000, 3.000000, 7.000000, 2.000000 0.439000, 3.000000, 7.000000, 2.000000 0.528000, 3.000000, 7.000000, 2.000000 0.708000, 3.000000, 7.000000, 2.000000 0.441000, 3.000000, 7.000000, 2.000000 0.506000, 3.000000, 7.000000, 2.000000 0.733000, 3.000000, 7.000000, 2.000000 0.444000, 3.000000, 7.000000, 2.000000 0.495000, 3.000000, 7.000000, 2.000000 0.727000, 3.000000, 7.000000, 2.000000 0.445000, 3.000000, 7.000000, 2.000000 0.490000, 3.000000, 7.000000, 2.000000 0.696000, 3.000000, 7.000000, 2.000000 0.444000, 3.000000, 7.000000, 2.000000 0.469000, 3.000000, 7.000000, 2.000000 0.488000, 3.000000, 7.000000, 2.000000 0.689000, 3.000000, 7.000000, 2.000000 0.482000, 3.000000, 7.000000, 2.000000 0.442000, 3.000000, 7.000000, 2.000000 0.521000, 3.000000, 7.000000, 2.000000 0.708000, 3.000000, 7.000000, 2.000000 0.437000, 3.000000, 7.000000, 2.000000 2.273000, 4.000000, 10.000000, 6.000000 3.163000, 4.000000, 10.000000, 6.000000 2.216000, 4.000000, 10.000000, 6.000000 2.265000, 4.000000, 10.000000, 6.000000 3.135000, 4.000000, 10.000000, 6.000000 2.283000, 4.000000, 10.000000, 6.000000 3.072000, 4.000000, 10.000000, 6.000000 2.236000, 4.000000, 10.000000, 6.000000 2.312000, 4.000000, 10.000000, 6.000000 3.145000, 4.000000, 10.000000, 6.000000 2.211000, 4.000000, 10.000000, 6.000000 2.221000, 4.000000, 10.000000, 6.000000 2.270000, 4.000000, 10.000000, 6.000000 3.336000, 4.000000, 10.000000, 6.000000 2.216000, 4.000000, 10.000000, 6.000000 2.281000, 4.000000, 10.000000, 6.000000 3.408000, 4.000000, 10.000000, 6.000000 2.236000, 4.000000, 10.000000, 6.000000 2.271000, 4.000000, 10.000000, 6.000000 3.160000, 4.000000, 10.000000, 6.000000 2.248000, 4.000000, 10.000000, 6.000000 2.247000, 4.000000, 10.000000, 6.000000 3.170000, 4.000000, 10.000000, 6.000000 2.195000, 4.000000, 10.000000, 6.000000 2.253000, 4.000000, 10.000000, 6.000000 3.419000, 4.000000, 10.000000, 6.000000 2.281000, 4.000000, 10.000000, 6.000000 2.265000, 4.000000, 10.000000, 6.000000 3.303000, 4.000000, 10.000000, 6.000000 2.251000, 4.000000, 10.000000, 6.000000 2.266000, 4.000000, 10.000000, 6.000000 3.209000, 4.000000, 10.000000, 6.000000 2.241000, 4.000000, 10.000000, 6.000000 2.281000, 4.000000, 10.000000, 6.000000 3.321000, 4.000000, 10.000000, 6.000000 2.263000, 4.000000, 10.000000, 6.000000 3.158000, 4.000000, 10.000000, 6.000000 2.234000, 4.000000, 10.000000, 6.000000 2.275000, 4.000000, 10.000000, 6.000000 3.164000, 4.000000, 10.000000, 6.000000 2.209000, 4.000000, 10.000000, 6.000000 2.213000, 4.000000, 10.000000, 6.000000 0.469000, 6.000000, 6.000000, 2.000000 0.673000, 6.000000, 6.000000, 2.000000 0.482000, 6.000000, 6.000000, 2.000000 0.668000, 6.000000, 6.000000, 2.000000 0.426000, 6.000000, 6.000000, 2.000000 0.411000, 6.000000, 6.000000, 2.000000 0.471000, 6.000000, 6.000000, 2.000000 0.653000, 6.000000, 6.000000, 2.000000 0.430000, 6.000000, 6.000000, 2.000000 0.487000, 6.000000, 6.000000, 2.000000 0.644000, 6.000000, 6.000000, 2.000000 0.419000, 6.000000, 6.000000, 2.000000 0.501000, 6.000000, 6.000000, 2.000000 0.664000, 6.000000, 6.000000, 2.000000 0.451000, 6.000000, 6.000000, 2.000000 0.485000, 6.000000, 6.000000, 2.000000 0.635000, 6.000000, 6.000000, 2.000000 0.414000, 6.000000, 6.000000, 2.000000 0.474000, 6.000000, 6.000000, 2.000000 0.624000, 6.000000, 6.000000, 2.000000 0.410000, 6.000000, 6.000000, 2.000000 0.514000, 6.000000, 6.000000, 2.000000 0.680000, 6.000000, 6.000000, 2.000000 0.416000, 6.000000, 6.000000, 2.000000 0.464000, 6.000000, 6.000000, 2.000000 0.644000, 6.000000, 6.000000, 2.000000 0.440000, 6.000000, 6.000000, 2.000000 0.471000, 6.000000, 6.000000, 2.000000 0.660000, 6.000000, 6.000000, 2.000000 0.411000, 6.000000, 6.000000, 2.000000 0.504000, 6.000000, 6.000000, 2.000000 0.655000, 6.000000, 6.000000, 2.000000 0.415000, 6.000000, 6.000000, 2.000000 0.462000, 6.000000, 6.000000, 2.000000 0.632000, 6.000000, 6.000000, 2.000000 0.409000, 6.000000, 6.000000, 2.000000 0.515000, 6.000000, 6.000000, 2.000000 0.664000, 6.000000, 6.000000, 2.000000 0.414000, 6.000000, 6.000000, 2.000000 0.481000, 6.000000, 6.000000, 2.000000 0.681000, 6.000000, 6.000000, 2.000000 0.418000, 6.000000, 6.000000, 2.000000 0.368000, 6.000000, 3.000000, 4.000000 0.418000, 6.000000, 3.000000, 4.000000 0.270000, 6.000000, 3.000000, 4.000000 0.352000, 6.000000, 3.000000, 4.000000 0.437000, 6.000000, 3.000000, 4.000000 0.276000, 6.000000, 3.000000, 4.000000 0.341000, 6.000000, 3.000000, 4.000000 0.441000, 6.000000, 3.000000, 4.000000 0.271000, 6.000000, 3.000000, 4.000000 0.348000, 6.000000, 3.000000, 4.000000 0.474000, 6.000000, 3.000000, 4.000000 0.309000, 6.000000, 3.000000, 4.000000 0.328000, 6.000000, 3.000000, 4.000000 0.441000, 6.000000, 3.000000, 4.000000 0.272000, 6.000000, 3.000000, 4.000000 0.362000, 6.000000, 3.000000, 4.000000 0.410000, 6.000000, 3.000000, 4.000000 0.302000, 6.000000, 3.000000, 4.000000 0.324000, 6.000000, 3.000000, 4.000000 0.267000, 6.000000, 3.000000, 4.000000 0.313000, 6.000000, 3.000000, 4.000000 0.497000, 6.000000, 3.000000, 4.000000 0.268000, 6.000000, 3.000000, 4.000000 0.376000, 6.000000, 3.000000, 4.000000 0.451000, 6.000000, 3.000000, 4.000000 0.271000, 6.000000, 3.000000, 4.000000 0.329000, 6.000000, 3.000000, 4.000000 0.447000, 6.000000, 3.000000, 4.000000 0.317000, 6.000000, 3.000000, 4.000000 0.271000, 6.000000, 3.000000, 4.000000 0.322000, 6.000000, 3.000000, 4.000000 0.475000, 6.000000, 3.000000, 4.000000 0.298000, 6.000000, 3.000000, 4.000000 0.280000, 6.000000, 3.000000, 4.000000 0.548000, 6.000000, 3.000000, 4.000000 0.262000, 6.000000, 3.000000, 4.000000 0.320000, 6.000000, 3.000000, 4.000000 0.441000, 6.000000, 3.000000, 4.000000 0.271000, 6.000000, 3.000000, 4.000000 0.331000, 6.000000, 3.000000, 4.000000 0.465000, 6.000000, 3.000000, 4.000000 0.269000, 6.000000, 3.000000, 4.000000 0.544000, 5.000000, 7.000000, 2.000000 0.757000, 5.000000, 7.000000, 2.000000 0.476000, 5.000000, 7.000000, 2.000000 0.545000, 5.000000, 7.000000, 2.000000 0.737000, 5.000000, 7.000000, 2.000000 0.477000, 5.000000, 7.000000, 2.000000 0.545000, 5.000000, 7.000000, 2.000000 0.845000, 5.000000, 7.000000, 2.000000 0.477000, 5.000000, 7.000000, 2.000000 0.547000, 5.000000, 7.000000, 2.000000 0.812000, 5.000000, 7.000000, 2.000000 0.529000, 5.000000, 7.000000, 2.000000 0.504000, 5.000000, 7.000000, 2.000000 0.506000, 5.000000, 7.000000, 2.000000 0.533000, 5.000000, 7.000000, 2.000000 0.657000, 5.000000, 7.000000, 2.000000 0.539000, 5.000000, 7.000000, 2.000000 0.765000, 5.000000, 7.000000, 2.000000 0.552000, 5.000000, 7.000000, 2.000000 0.789000, 5.000000, 7.000000, 2.000000 0.475000, 5.000000, 7.000000, 2.000000 0.556000, 5.000000, 7.000000, 2.000000 0.738000, 5.000000, 7.000000, 2.000000 0.512000, 5.000000, 7.000000, 2.000000 0.487000, 5.000000, 7.000000, 2.000000 0.546000, 5.000000, 7.000000, 2.000000 0.796000, 5.000000, 7.000000, 2.000000 0.484000, 5.000000, 7.000000, 2.000000 0.549000, 5.000000, 7.000000, 2.000000 0.766000, 5.000000, 7.000000, 2.000000 0.518000, 5.000000, 7.000000, 2.000000 0.521000, 5.000000, 7.000000, 2.000000 0.799000, 5.000000, 7.000000, 2.000000 0.486000, 5.000000, 7.000000, 2.000000 0.562000, 5.000000, 7.000000, 2.000000 0.813000, 5.000000, 7.000000, 2.000000 0.476000, 5.000000, 7.000000, 2.000000 0.532000, 5.000000, 7.000000, 2.000000 0.741000, 5.000000, 7.000000, 2.000000 0.509000, 5.000000, 7.000000, 2.000000 0.555000, 5.000000, 7.000000, 2.000000 0.780000, 5.000000, 7.000000, 2.000000 2.039000, 8.000000, 10.000000, 5.000000 2.102000, 8.000000, 10.000000, 5.000000 3.005000, 8.000000, 10.000000, 5.000000 2.058000, 8.000000, 10.000000, 5.000000 2.091000, 8.000000, 10.000000, 5.000000 2.928000, 8.000000, 10.000000, 5.000000 2.092000, 8.000000, 10.000000, 5.000000 2.067000, 8.000000, 10.000000, 5.000000 3.010000, 8.000000, 10.000000, 5.000000 2.026000, 8.000000, 10.000000, 5.000000 2.147000, 8.000000, 10.000000, 5.000000 2.996000, 8.000000, 10.000000, 5.000000 2.111000, 8.000000, 10.000000, 5.000000 2.902000, 8.000000, 10.000000, 5.000000 2.034000, 8.000000, 10.000000, 5.000000 2.079000, 8.000000, 10.000000, 5.000000 2.858000, 8.000000, 10.000000, 5.000000 2.060000, 8.000000, 10.000000, 5.000000 2.078000, 8.000000, 10.000000, 5.000000 2.292000, 8.000000, 10.000000, 5.000000 3.105000, 8.000000, 10.000000, 5.000000 2.028000, 8.000000, 10.000000, 5.000000 2.098000, 8.000000, 10.000000, 5.000000 3.042000, 8.000000, 10.000000, 5.000000 2.034000, 8.000000, 10.000000, 5.000000 2.112000, 8.000000, 10.000000, 5.000000 2.800000, 8.000000, 10.000000, 5.000000 2.037000, 8.000000, 10.000000, 5.000000 2.062000, 8.000000, 10.000000, 5.000000 2.866000, 8.000000, 10.000000, 5.000000 2.059000, 8.000000, 10.000000, 5.000000 2.061000, 8.000000, 10.000000, 5.000000 2.920000, 8.000000, 10.000000, 5.000000 2.041000, 8.000000, 10.000000, 5.000000 2.073000, 8.000000, 10.000000, 5.000000 2.959000, 8.000000, 10.000000, 5.000000 2.066000, 8.000000, 10.000000, 5.000000 2.063000, 8.000000, 10.000000, 5.000000 2.905000, 8.000000, 10.000000, 5.000000 2.028000, 8.000000, 10.000000, 5.000000 2.058000, 8.000000, 10.000000, 5.000000 2.989000, 8.000000, 10.000000, 5.000000 1.629000, 6.000000, 8.000000, 8.000000 1.675000, 6.000000, 8.000000, 8.000000 2.372000, 6.000000, 8.000000, 8.000000 1.627000, 6.000000, 8.000000, 8.000000 1.665000, 6.000000, 8.000000, 8.000000 2.568000, 6.000000, 8.000000, 8.000000 1.639000, 6.000000, 8.000000, 8.000000 1.684000, 6.000000, 8.000000, 8.000000 2.432000, 6.000000, 8.000000, 8.000000 1.659000, 6.000000, 8.000000, 8.000000 2.617000, 6.000000, 8.000000, 8.000000 2.096000, 6.000000, 8.000000, 8.000000 2.645000, 6.000000, 8.000000, 8.000000 2.291000, 6.000000, 8.000000, 8.000000 2.590000, 6.000000, 8.000000, 8.000000 8.075000, 6.000000, 8.000000, 8.000000 2.076000, 6.000000, 8.000000, 8.000000 1.842000, 6.000000, 8.000000, 8.000000 2.526000, 6.000000, 8.000000, 8.000000 2.011000, 6.000000, 8.000000, 8.000000 2.423000, 6.000000, 8.000000, 8.000000 1.651000, 6.000000, 8.000000, 8.000000 1.744000, 6.000000, 8.000000, 8.000000 2.338000, 6.000000, 8.000000, 8.000000 1.672000, 6.000000, 8.000000, 8.000000 1.669000, 6.000000, 8.000000, 8.000000 2.845000, 6.000000, 8.000000, 8.000000 1.731000, 6.000000, 8.000000, 8.000000 2.267000, 6.000000, 8.000000, 8.000000 1.654000, 6.000000, 8.000000, 8.000000 1.739000, 6.000000, 8.000000, 8.000000 2.585000, 6.000000, 8.000000, 8.000000 1.710000, 6.000000, 8.000000, 8.000000 1.763000, 6.000000, 8.000000, 8.000000 2.461000, 6.000000, 8.000000, 8.000000 1.681000, 6.000000, 8.000000, 8.000000 1.756000, 6.000000, 8.000000, 8.000000 2.449000, 6.000000, 8.000000, 8.000000 1.644000, 6.000000, 8.000000, 8.000000 2.466000, 6.000000, 8.000000, 8.000000 1.680000, 6.000000, 8.000000, 8.000000 1.866000, 5.000000, 10.000000, 3.000000 2.054000, 6.000000, 8.000000, 8.000000 2.002000, 5.000000, 10.000000, 3.000000 1.352000, 5.000000, 10.000000, 3.000000 1.569000, 5.000000, 10.000000, 3.000000 1.298000, 5.000000, 10.000000, 3.000000 2.725000, 5.000000, 10.000000, 3.000000 1.386000, 5.000000, 10.000000, 3.000000 2.134000, 5.000000, 10.000000, 3.000000 1.318000, 5.000000, 10.000000, 3.000000 1.829000, 5.000000, 10.000000, 3.000000 1.402000, 5.000000, 10.000000, 3.000000 1.981000, 5.000000, 10.000000, 3.000000 1.385000, 5.000000, 10.000000, 3.000000 1.273000, 5.000000, 10.000000, 3.000000 1.246000, 5.000000, 10.000000, 3.000000 1.942000, 5.000000, 10.000000, 3.000000 1.326000, 5.000000, 10.000000, 3.000000 1.370000, 5.000000, 10.000000, 3.000000 1.644000, 5.000000, 10.000000, 3.000000 1.717000, 5.000000, 10.000000, 3.000000 1.654000, 5.000000, 10.000000, 3.000000 1.672000, 5.000000, 10.000000, 3.000000 1.379000, 5.000000, 10.000000, 3.000000 1.848000, 5.000000, 10.000000, 3.000000 1.283000, 5.000000, 10.000000, 3.000000 1.391000, 5.000000, 10.000000, 3.000000 1.927000, 5.000000, 10.000000, 3.000000 1.671000, 5.000000, 10.000000, 3.000000 1.388000, 5.000000, 10.000000, 3.000000 1.627000, 5.000000, 10.000000, 3.000000 1.487000, 5.000000, 10.000000, 3.000000 1.626000, 5.000000, 10.000000, 3.000000 1.345000, 5.000000, 10.000000, 3.000000 2.669000, 2.000000, 8.000000, 9.000000 1.373000, 5.000000, 10.000000, 3.000000 2.108000, 2.000000, 8.000000, 9.000000 1.383000, 5.000000, 10.000000, 3.000000 2.529000, 2.000000, 8.000000, 9.000000 1.329000, 5.000000, 10.000000, 3.000000 2.625000, 2.000000, 8.000000, 9.000000 1.353000, 5.000000, 10.000000, 3.000000 2.606000, 2.000000, 8.000000, 9.000000 1.563000, 5.000000, 10.000000, 3.000000 3.374000, 2.000000, 8.000000, 9.000000 1.380000, 5.000000, 10.000000, 3.000000 1.445000, 5.000000, 10.000000, 3.000000 1.383000, 5.000000, 10.000000, 3.000000 1.317000, 5.000000, 10.000000, 3.000000 1.771000, 2.000000, 8.000000, 9.000000 1.765000, 2.000000, 8.000000, 9.000000 1.864000, 2.000000, 8.000000, 9.000000 1.880000, 2.000000, 8.000000, 9.000000 1.848000, 2.000000, 8.000000, 9.000000 1.708000, 2.000000, 8.000000, 9.000000 1.712000, 2.000000, 8.000000, 9.000000 1.773000, 2.000000, 8.000000, 9.000000 1.691000, 2.000000, 8.000000, 9.000000 1.845000, 2.000000, 8.000000, 9.000000 1.771000, 2.000000, 8.000000, 9.000000 1.782000, 2.000000, 8.000000, 9.000000 1.773000, 2.000000, 8.000000, 9.000000 1.786000, 2.000000, 8.000000, 9.000000 1.825000, 2.000000, 8.000000, 9.000000 1.801000, 2.000000, 8.000000, 9.000000 1.789000, 2.000000, 8.000000, 9.000000 1.793000, 2.000000, 8.000000, 9.000000 1.782000, 2.000000, 8.000000, 9.000000 2.515000, 2.000000, 8.000000, 9.000000 1.788000, 2.000000, 8.000000, 9.000000 2.425000, 2.000000, 8.000000, 9.000000 1.783000, 2.000000, 8.000000, 9.000000 2.576000, 2.000000, 8.000000, 9.000000 1.814000, 2.000000, 8.000000, 9.000000 2.500000, 2.000000, 8.000000, 9.000000 1.761000, 2.000000, 8.000000, 9.000000 2.568000, 2.000000, 8.000000, 9.000000 1.768000, 2.000000, 8.000000, 9.000000 2.505000, 2.000000, 8.000000, 9.000000 1.791000, 2.000000, 8.000000, 9.000000 2.602000, 2.000000, 8.000000, 9.000000 1.833000, 2.000000, 8.000000, 9.000000 2.475000, 2.000000, 8.000000, 9.000000 1.848000, 2.000000, 8.000000, 9.000000 2.618000, 2.000000, 8.000000, 9.000000 0.443000, 3.000000, 5.000000, 5.000000 0.452000, 3.000000, 5.000000, 5.000000 0.516000, 3.000000, 5.000000, 5.000000 1.051000, 3.000000, 5.000000, 5.000000 0.753000, 3.000000, 5.000000, 5.000000 0.745000, 3.000000, 5.000000, 5.000000 0.479000, 3.000000, 5.000000, 5.000000 0.742000, 3.000000, 5.000000, 5.000000 0.473000, 3.000000, 5.000000, 5.000000 0.624000, 3.000000, 5.000000, 5.000000 0.471000, 3.000000, 5.000000, 5.000000 0.498000, 3.000000, 5.000000, 5.000000 1.867000, 3.000000, 5.000000, 5.000000 0.519000, 3.000000, 5.000000, 5.000000 0.500000, 3.000000, 5.000000, 5.000000 0.431000, 3.000000, 5.000000, 5.000000 0.602000, 3.000000, 5.000000, 5.000000 0.577000, 3.000000, 5.000000, 5.000000 0.442000, 3.000000, 5.000000, 5.000000 0.519000, 3.000000, 5.000000, 5.000000 0.701000, 3.000000, 5.000000, 5.000000 0.479000, 3.000000, 5.000000, 5.000000 0.741000, 3.000000, 5.000000, 5.000000 0.538000, 3.000000, 5.000000, 5.000000 0.657000, 3.000000, 5.000000, 5.000000 0.526000, 3.000000, 5.000000, 5.000000 0.625000, 3.000000, 5.000000, 5.000000 0.505000, 3.000000, 5.000000, 5.000000 0.795000, 3.000000, 5.000000, 5.000000 0.444000, 3.000000, 5.000000, 5.000000 0.453000, 3.000000, 5.000000, 5.000000 0.488000, 3.000000, 5.000000, 5.000000 0.674000, 3.000000, 5.000000, 5.000000 0.430000, 3.000000, 5.000000, 5.000000 1.041000, 3.000000, 5.000000, 5.000000 0.637000, 3.000000, 5.000000, 5.000000 0.408000, 3.000000, 5.000000, 5.000000 0.467000, 3.000000, 5.000000, 5.000000 0.692000, 3.000000, 5.000000, 5.000000 0.555000, 3.000000, 5.000000, 5.000000 0.694000, 3.000000, 5.000000, 5.000000 0.460000, 3.000000, 5.000000, 5.000000 0.819000, 8.000000, 5.000000, 9.000000 0.854000, 8.000000, 5.000000, 9.000000 0.795000, 8.000000, 5.000000, 9.000000 1.014000, 8.000000, 5.000000, 9.000000 0.773000, 8.000000, 5.000000, 9.000000 0.963000, 8.000000, 5.000000, 9.000000 0.700000, 8.000000, 5.000000, 9.000000 0.715000, 8.000000, 5.000000, 9.000000 0.993000, 8.000000, 5.000000, 9.000000 0.679000, 8.000000, 5.000000, 9.000000 0.815000, 8.000000, 5.000000, 9.000000 1.104000, 8.000000, 5.000000, 9.000000 0.739000, 8.000000, 5.000000, 9.000000 1.126000, 8.000000, 5.000000, 9.000000 1.082000, 8.000000, 5.000000, 9.000000 0.729000, 8.000000, 5.000000, 9.000000 0.852000, 8.000000, 5.000000, 9.000000 1.015000, 8.000000, 5.000000, 9.000000 0.673000, 8.000000, 5.000000, 9.000000 0.745000, 8.000000, 5.000000, 9.000000 0.959000, 8.000000, 5.000000, 9.000000 0.722000, 8.000000, 5.000000, 9.000000 0.732000, 8.000000, 5.000000, 9.000000 1.037000, 8.000000, 5.000000, 9.000000 0.839000, 8.000000, 5.000000, 9.000000 1.040000, 8.000000, 5.000000, 9.000000 0.740000, 8.000000, 5.000000, 9.000000 0.748000, 8.000000, 5.000000, 9.000000 0.801000, 8.000000, 5.000000, 9.000000 0.980000, 8.000000, 5.000000, 9.000000 0.696000, 8.000000, 5.000000, 9.000000 0.739000, 8.000000, 5.000000, 9.000000 0.736000, 8.000000, 5.000000, 9.000000 1.078000, 8.000000, 5.000000, 9.000000 0.752000, 8.000000, 5.000000, 9.000000 1.116000, 8.000000, 5.000000, 9.000000 0.711000, 8.000000, 5.000000, 9.000000 0.789000, 8.000000, 5.000000, 9.000000 1.081000, 8.000000, 5.000000, 9.000000 0.688000, 8.000000, 5.000000, 9.000000 0.743000, 8.000000, 5.000000, 9.000000 0.913000, 8.000000, 5.000000, 9.000000 0.446000, 2.000000, 4.000000, 8.000000 0.669000, 2.000000, 4.000000, 8.000000 0.419000, 2.000000, 4.000000, 8.000000 0.697000, 2.000000, 4.000000, 8.000000 0.395000, 2.000000, 4.000000, 8.000000 0.713000, 2.000000, 4.000000, 8.000000 0.869000, 2.000000, 4.000000, 8.000000 0.476000, 2.000000, 4.000000, 8.000000 0.544000, 2.000000, 4.000000, 8.000000 0.577000, 2.000000, 4.000000, 8.000000 0.434000, 2.000000, 4.000000, 8.000000 0.498000, 2.000000, 4.000000, 8.000000 0.579000, 2.000000, 4.000000, 8.000000 0.383000, 2.000000, 4.000000, 8.000000 0.462000, 2.000000, 4.000000, 8.000000 0.388000, 2.000000, 4.000000, 8.000000 0.582000, 2.000000, 4.000000, 8.000000 0.373000, 2.000000, 4.000000, 8.000000 0.463000, 2.000000, 4.000000, 8.000000 0.564000, 2.000000, 4.000000, 8.000000 0.431000, 2.000000, 4.000000, 8.000000 0.447000, 2.000000, 4.000000, 8.000000 0.681000, 2.000000, 4.000000, 8.000000 0.390000, 2.000000, 4.000000, 8.000000 0.423000, 2.000000, 4.000000, 8.000000 0.696000, 2.000000, 4.000000, 8.000000 0.712000, 2.000000, 4.000000, 8.000000 0.421000, 2.000000, 4.000000, 8.000000 0.574000, 2.000000, 4.000000, 8.000000 0.596000, 2.000000, 4.000000, 8.000000 0.715000, 2.000000, 4.000000, 8.000000 0.565000, 2.000000, 4.000000, 8.000000 0.441000, 2.000000, 4.000000, 8.000000 0.562000, 2.000000, 4.000000, 8.000000 0.376000, 2.000000, 4.000000, 8.000000 0.400000, 2.000000, 4.000000, 8.000000 0.574000, 2.000000, 4.000000, 8.000000 0.433000, 2.000000, 4.000000, 8.000000 0.423000, 2.000000, 4.000000, 8.000000 0.697000, 2.000000, 4.000000, 8.000000 0.413000, 2.000000, 4.000000, 8.000000 0.371000, 2.000000, 4.000000, 8.000000 1.093000, 2.000000, 7.000000, 8.000000 1.085000, 2.000000, 7.000000, 8.000000 1.099000, 2.000000, 7.000000, 8.000000 1.746000, 2.000000, 7.000000, 8.000000 1.086000, 2.000000, 7.000000, 8.000000 1.123000, 2.000000, 7.000000, 8.000000 1.677000, 2.000000, 7.000000, 8.000000 1.089000, 2.000000, 7.000000, 8.000000 1.081000, 2.000000, 7.000000, 8.000000 1.078000, 2.000000, 7.000000, 8.000000 1.142000, 2.000000, 7.000000, 8.000000 1.621000, 2.000000, 7.000000, 8.000000 1.155000, 2.000000, 7.000000, 8.000000 1.696000, 2.000000, 7.000000, 8.000000 1.106000, 2.000000, 7.000000, 8.000000 1.415000, 2.000000, 7.000000, 8.000000 1.617000, 2.000000, 7.000000, 8.000000 1.512000, 2.000000, 7.000000, 8.000000 1.485000, 2.000000, 7.000000, 8.000000 1.483000, 2.000000, 7.000000, 8.000000 1.614000, 2.000000, 7.000000, 8.000000 1.354000, 2.000000, 7.000000, 8.000000 1.726000, 2.000000, 7.000000, 8.000000 1.400000, 2.000000, 7.000000, 8.000000 1.220000, 2.000000, 7.000000, 8.000000 1.757000, 2.000000, 7.000000, 8.000000 1.251000, 2.000000, 7.000000, 8.000000 1.289000, 2.000000, 7.000000, 8.000000 1.612000, 2.000000, 7.000000, 8.000000 1.190000, 2.000000, 7.000000, 8.000000 1.480000, 2.000000, 7.000000, 8.000000 1.597000, 2.000000, 7.000000, 8.000000 1.380000, 2.000000, 7.000000, 8.000000 1.567000, 2.000000, 7.000000, 8.000000 1.693000, 2.000000, 7.000000, 8.000000 1.563000, 2.000000, 7.000000, 8.000000 1.742000, 2.000000, 7.000000, 8.000000 1.662000, 2.000000, 7.000000, 8.000000 1.201000, 2.000000, 7.000000, 8.000000 1.433000, 2.000000, 7.000000, 8.000000 1.669000, 2.000000, 7.000000, 8.000000 1.411000, 2.000000, 7.000000, 8.000000 0.420000, 7.000000, 2.000000, 9.000000 0.436000, 7.000000, 2.000000, 9.000000 0.345000, 7.000000, 2.000000, 9.000000 0.546000, 7.000000, 2.000000, 9.000000 0.436000, 7.000000, 2.000000, 9.000000 0.462000, 7.000000, 2.000000, 9.000000 0.369000, 7.000000, 2.000000, 9.000000 0.442000, 7.000000, 2.000000, 9.000000 0.334000, 7.000000, 2.000000, 9.000000 0.450000, 7.000000, 2.000000, 9.000000 0.425000, 7.000000, 2.000000, 9.000000 0.463000, 7.000000, 2.000000, 9.000000 0.456000, 7.000000, 2.000000, 9.000000 0.494000, 7.000000, 2.000000, 9.000000 0.535000, 7.000000, 2.000000, 9.000000 0.406000, 7.000000, 2.000000, 9.000000 0.450000, 7.000000, 2.000000, 9.000000 0.458000, 7.000000, 2.000000, 9.000000 0.703000, 7.000000, 2.000000, 9.000000 0.453000, 7.000000, 2.000000, 9.000000 0.521000, 7.000000, 2.000000, 9.000000 0.568000, 7.000000, 2.000000, 9.000000 0.395000, 7.000000, 2.000000, 9.000000 0.511000, 7.000000, 2.000000, 9.000000 0.467000, 7.000000, 2.000000, 9.000000 0.493000, 7.000000, 2.000000, 9.000000 0.377000, 7.000000, 2.000000, 9.000000 0.427000, 7.000000, 2.000000, 9.000000 0.534000, 7.000000, 2.000000, 9.000000 0.409000, 7.000000, 2.000000, 9.000000 0.385000, 7.000000, 2.000000, 9.000000 0.511000, 7.000000, 2.000000, 9.000000 0.444000, 7.000000, 2.000000, 9.000000 0.491000, 7.000000, 2.000000, 9.000000 0.407000, 7.000000, 2.000000, 9.000000 0.360000, 7.000000, 2.000000, 9.000000 0.444000, 7.000000, 2.000000, 9.000000 0.501000, 7.000000, 2.000000, 9.000000 0.403000, 7.000000, 2.000000, 9.000000 0.428000, 7.000000, 2.000000, 9.000000 0.419000, 7.000000, 2.000000, 9.000000 0.342000, 7.000000, 2.000000, 9.000000 0.955000, 7.000000, 8.000000, 2.000000 1.092000, 7.000000, 8.000000, 2.000000 0.838000, 7.000000, 8.000000, 2.000000 0.957000, 7.000000, 8.000000, 2.000000 0.964000, 7.000000, 8.000000, 2.000000 1.008000, 7.000000, 8.000000, 2.000000 1.092000, 7.000000, 8.000000, 2.000000 0.967000, 7.000000, 8.000000, 2.000000 1.118000, 7.000000, 8.000000, 2.000000 1.030000, 7.000000, 8.000000, 2.000000 1.059000, 7.000000, 8.000000, 2.000000 1.173000, 7.000000, 8.000000, 2.000000 0.964000, 7.000000, 8.000000, 2.000000 1.232000, 7.000000, 8.000000, 2.000000 1.165000, 7.000000, 8.000000, 2.000000 1.073000, 7.000000, 8.000000, 2.000000 1.187000, 7.000000, 8.000000, 2.000000 1.045000, 7.000000, 8.000000, 2.000000 1.104000, 7.000000, 8.000000, 2.000000 1.196000, 7.000000, 8.000000, 2.000000 0.996000, 7.000000, 8.000000, 2.000000 1.128000, 7.000000, 8.000000, 2.000000 1.018000, 7.000000, 8.000000, 2.000000 1.007000, 7.000000, 8.000000, 2.000000 1.048000, 7.000000, 8.000000, 2.000000 0.911000, 7.000000, 8.000000, 2.000000 0.966000, 7.000000, 8.000000, 2.000000 1.066000, 7.000000, 8.000000, 2.000000 0.961000, 7.000000, 8.000000, 2.000000 1.014000, 7.000000, 8.000000, 2.000000 1.030000, 7.000000, 8.000000, 2.000000 1.008000, 7.000000, 8.000000, 2.000000 0.953000, 7.000000, 8.000000, 2.000000 0.885000, 7.000000, 8.000000, 2.000000 1.052000, 7.000000, 8.000000, 2.000000 1.050000, 7.000000, 8.000000, 2.000000 0.986000, 7.000000, 8.000000, 2.000000 0.988000, 7.000000, 8.000000, 2.000000 1.309000, 7.000000, 8.000000, 2.000000 1.384000, 7.000000, 8.000000, 2.000000 1.036000, 7.000000, 8.000000, 2.000000 1.032000, 7.000000, 8.000000, 2.000000 1.398000, 3.000000, 7.000000, 6.000000 1.322000, 3.000000, 7.000000, 6.000000 1.450000, 3.000000, 7.000000, 6.000000 1.361000, 3.000000, 7.000000, 6.000000 1.249000, 3.000000, 7.000000, 6.000000 1.399000, 3.000000, 7.000000, 6.000000 1.049000, 3.000000, 7.000000, 6.000000 1.308000, 3.000000, 7.000000, 6.000000 1.408000, 3.000000, 7.000000, 6.000000 1.226000, 3.000000, 7.000000, 6.000000 1.345000, 3.000000, 7.000000, 6.000000 1.387000, 3.000000, 7.000000, 6.000000 1.372000, 3.000000, 7.000000, 6.000000 1.408000, 3.000000, 7.000000, 6.000000 1.351000, 3.000000, 7.000000, 6.000000 1.330000, 3.000000, 7.000000, 6.000000 1.350000, 3.000000, 7.000000, 6.000000 1.221000, 3.000000, 7.000000, 6.000000 1.314000, 3.000000, 7.000000, 6.000000 1.285000, 3.000000, 7.000000, 6.000000 1.418000, 3.000000, 7.000000, 6.000000 1.576000, 3.000000, 7.000000, 6.000000 1.321000, 3.000000, 7.000000, 6.000000 1.422000, 3.000000, 7.000000, 6.000000 1.424000, 3.000000, 7.000000, 6.000000 1.245000, 3.000000, 7.000000, 6.000000 1.409000, 3.000000, 7.000000, 6.000000 1.401000, 3.000000, 7.000000, 6.000000 1.299000, 3.000000, 7.000000, 6.000000 1.473000, 3.000000, 7.000000, 6.000000 1.409000, 3.000000, 7.000000, 6.000000 1.212000, 3.000000, 7.000000, 6.000000 1.338000, 3.000000, 7.000000, 6.000000 1.372000, 3.000000, 7.000000, 6.000000 1.149000, 3.000000, 7.000000, 6.000000 1.430000, 3.000000, 7.000000, 6.000000 1.233000, 3.000000, 7.000000, 6.000000 1.377000, 3.000000, 7.000000, 6.000000 1.357000, 3.000000, 7.000000, 6.000000 1.348000, 3.000000, 7.000000, 6.000000 0.955000, 3.000000, 7.000000, 6.000000 1.098000, 3.000000, 7.000000, 6.000000 0.465000, 7.000000, 4.000000, 1.000000 0.386000, 7.000000, 4.000000, 1.000000 0.549000, 7.000000, 4.000000, 1.000000 0.466000, 7.000000, 4.000000, 1.000000 0.451000, 7.000000, 4.000000, 1.000000 0.455000, 7.000000, 4.000000, 1.000000 0.467000, 7.000000, 4.000000, 1.000000 0.533000, 7.000000, 4.000000, 1.000000 0.480000, 7.000000, 4.000000, 1.000000 0.449000, 7.000000, 4.000000, 1.000000 0.490000, 7.000000, 4.000000, 1.000000 0.468000, 7.000000, 4.000000, 1.000000 0.446000, 7.000000, 4.000000, 1.000000 0.514000, 7.000000, 4.000000, 1.000000 0.488000, 7.000000, 4.000000, 1.000000 0.464000, 7.000000, 4.000000, 1.000000 0.497000, 7.000000, 4.000000, 1.000000 0.480000, 7.000000, 4.000000, 1.000000 0.447000, 7.000000, 4.000000, 1.000000 0.521000, 7.000000, 4.000000, 1.000000 0.464000, 7.000000, 4.000000, 1.000000 0.496000, 7.000000, 4.000000, 1.000000 0.483000, 7.000000, 4.000000, 1.000000 0.429000, 7.000000, 4.000000, 1.000000 0.446000, 7.000000, 4.000000, 1.000000 0.449000, 7.000000, 4.000000, 1.000000 0.475000, 7.000000, 4.000000, 1.000000 0.470000, 7.000000, 4.000000, 1.000000 0.466000, 7.000000, 4.000000, 1.000000 0.436000, 7.000000, 4.000000, 1.000000 0.560000, 7.000000, 4.000000, 1.000000 0.428000, 7.000000, 4.000000, 1.000000 0.457000, 7.000000, 4.000000, 1.000000 0.443000, 7.000000, 4.000000, 1.000000 0.423000, 7.000000, 4.000000, 1.000000 0.453000, 7.000000, 4.000000, 1.000000 0.499000, 7.000000, 4.000000, 1.000000 0.491000, 7.000000, 4.000000, 1.000000 0.413000, 7.000000, 4.000000, 1.000000 0.433000, 7.000000, 4.000000, 1.000000 0.440000, 7.000000, 4.000000, 1.000000 0.447000, 7.000000, 4.000000, 1.000000 1.178000, 3.000000, 5.000000, 9.000000 1.125000, 3.000000, 5.000000, 9.000000 1.165000, 3.000000, 5.000000, 9.000000 1.082000, 3.000000, 5.000000, 9.000000 0.954000, 3.000000, 5.000000, 9.000000 0.876000, 3.000000, 5.000000, 9.000000 0.923000, 3.000000, 5.000000, 9.000000 1.087000, 3.000000, 5.000000, 9.000000 1.120000, 3.000000, 5.000000, 9.000000 1.068000, 3.000000, 5.000000, 9.000000 0.697000, 3.000000, 5.000000, 9.000000 1.114000, 3.000000, 5.000000, 9.000000 0.809000, 3.000000, 5.000000, 9.000000 1.085000, 3.000000, 5.000000, 9.000000 0.685000, 3.000000, 5.000000, 9.000000 0.814000, 3.000000, 5.000000, 9.000000 1.071000, 3.000000, 5.000000, 9.000000 0.717000, 3.000000, 5.000000, 9.000000 0.669000, 3.000000, 5.000000, 9.000000 0.720000, 3.000000, 5.000000, 9.000000 1.067000, 3.000000, 5.000000, 9.000000 1.208000, 3.000000, 5.000000, 9.000000 1.135000, 3.000000, 5.000000, 9.000000 1.076000, 3.000000, 5.000000, 9.000000 0.897000, 3.000000, 5.000000, 9.000000 1.085000, 3.000000, 5.000000, 9.000000 0.884000, 3.000000, 5.000000, 9.000000 1.121000, 3.000000, 5.000000, 9.000000 1.141000, 3.000000, 5.000000, 9.000000 1.092000, 3.000000, 5.000000, 9.000000 0.866000, 3.000000, 5.000000, 9.000000 0.746000, 3.000000, 5.000000, 9.000000 1.057000, 3.000000, 5.000000, 9.000000 1.127000, 3.000000, 5.000000, 9.000000 1.072000, 3.000000, 5.000000, 9.000000 0.816000, 3.000000, 5.000000, 9.000000 1.061000, 3.000000, 5.000000, 9.000000 1.050000, 3.000000, 5.000000, 9.000000 1.076000, 3.000000, 5.000000, 9.000000 1.100000, 3.000000, 5.000000, 9.000000 0.934000, 3.000000, 5.000000, 9.000000 0.914000, 3.000000, 5.000000, 9.000000 0.459000, 3.000000, 5.000000, 2.000000 0.520000, 3.000000, 5.000000, 2.000000 0.487000, 3.000000, 5.000000, 2.000000 0.495000, 3.000000, 5.000000, 2.000000 0.482000, 3.000000, 5.000000, 2.000000 0.503000, 3.000000, 5.000000, 2.000000 0.488000, 3.000000, 5.000000, 2.000000 0.562000, 3.000000, 5.000000, 2.000000 0.503000, 3.000000, 5.000000, 2.000000 0.496000, 3.000000, 5.000000, 2.000000 0.570000, 3.000000, 5.000000, 2.000000 0.502000, 3.000000, 5.000000, 2.000000 0.488000, 3.000000, 5.000000, 2.000000 0.608000, 3.000000, 5.000000, 2.000000 0.471000, 3.000000, 5.000000, 2.000000 0.602000, 3.000000, 5.000000, 2.000000 0.542000, 3.000000, 5.000000, 2.000000 0.484000, 3.000000, 5.000000, 2.000000 0.417000, 3.000000, 5.000000, 2.000000 0.506000, 3.000000, 5.000000, 2.000000 0.465000, 3.000000, 5.000000, 2.000000 0.591000, 3.000000, 5.000000, 2.000000 0.561000, 3.000000, 5.000000, 2.000000 0.515000, 3.000000, 5.000000, 2.000000 0.479000, 3.000000, 5.000000, 2.000000 0.505000, 3.000000, 5.000000, 2.000000 0.469000, 3.000000, 5.000000, 2.000000 0.347000, 3.000000, 5.000000, 2.000000 0.581000, 3.000000, 5.000000, 2.000000 0.457000, 3.000000, 5.000000, 2.000000 0.466000, 3.000000, 5.000000, 2.000000 0.542000, 3.000000, 5.000000, 2.000000 0.487000, 3.000000, 5.000000, 2.000000 0.366000, 3.000000, 5.000000, 2.000000 0.436000, 3.000000, 5.000000, 2.000000 0.482000, 3.000000, 5.000000, 2.000000 0.329000, 3.000000, 5.000000, 2.000000 0.441000, 3.000000, 5.000000, 2.000000 0.453000, 3.000000, 5.000000, 2.000000 0.410000, 3.000000, 5.000000, 2.000000 0.448000, 3.000000, 5.000000, 2.000000 0.442000, 3.000000, 5.000000, 2.000000 1.679000, 9.000000, 7.000000, 10.000000 2.330000, 9.000000, 7.000000, 10.000000 1.743000, 9.000000, 7.000000, 10.000000 1.731000, 9.000000, 7.000000, 10.000000 2.290000, 9.000000, 7.000000, 10.000000 1.796000, 9.000000, 7.000000, 10.000000 1.707000, 9.000000, 7.000000, 10.000000 2.153000, 9.000000, 7.000000, 10.000000 1.701000, 9.000000, 7.000000, 10.000000 2.221000, 9.000000, 7.000000, 10.000000 2.096000, 9.000000, 7.000000, 10.000000 2.336000, 9.000000, 7.000000, 10.000000 2.146000, 9.000000, 7.000000, 10.000000 1.731000, 9.000000, 7.000000, 10.000000 2.210000, 9.000000, 7.000000, 10.000000 1.650000, 9.000000, 7.000000, 10.000000 2.422000, 9.000000, 7.000000, 10.000000 1.920000, 9.000000, 7.000000, 10.000000 2.336000, 9.000000, 7.000000, 10.000000 2.095000, 9.000000, 7.000000, 10.000000 2.173000, 9.000000, 7.000000, 10.000000 2.138000, 9.000000, 7.000000, 10.000000 2.020000, 9.000000, 7.000000, 10.000000 1.945000, 9.000000, 7.000000, 10.000000 2.239000, 9.000000, 7.000000, 10.000000 1.865000, 9.000000, 7.000000, 10.000000 2.184000, 9.000000, 7.000000, 10.000000 2.116000, 9.000000, 7.000000, 10.000000 2.211000, 9.000000, 7.000000, 10.000000 1.847000, 9.000000, 7.000000, 10.000000 2.030000, 9.000000, 7.000000, 10.000000 2.346000, 9.000000, 7.000000, 10.000000 2.497000, 9.000000, 7.000000, 10.000000 2.510000, 9.000000, 7.000000, 10.000000 2.294000, 9.000000, 7.000000, 10.000000 2.353000, 9.000000, 7.000000, 10.000000 2.517000, 9.000000, 7.000000, 10.000000 2.307000, 9.000000, 7.000000, 10.000000 2.259000, 9.000000, 7.000000, 10.000000 2.089000, 9.000000, 7.000000, 10.000000 2.190000, 9.000000, 7.000000, 10.000000 1.871000, 9.000000, 7.000000, 10.000000 2.536000, 1.000000, 9.000000, 6.000000 1.880000, 1.000000, 9.000000, 6.000000 2.211000, 1.000000, 9.000000, 6.000000 1.821000, 1.000000, 9.000000, 6.000000 1.792000, 1.000000, 9.000000, 6.000000 2.414000, 1.000000, 9.000000, 6.000000 1.771000, 1.000000, 9.000000, 6.000000 1.792000, 1.000000, 9.000000, 6.000000 2.404000, 1.000000, 9.000000, 6.000000 1.795000, 1.000000, 9.000000, 6.000000 2.383000, 1.000000, 9.000000, 6.000000 1.917000, 1.000000, 9.000000, 6.000000 2.274000, 1.000000, 9.000000, 6.000000 2.344000, 1.000000, 9.000000, 6.000000 1.592000, 1.000000, 9.000000, 6.000000 1.628000, 1.000000, 9.000000, 6.000000 2.399000, 1.000000, 9.000000, 6.000000 1.657000, 1.000000, 9.000000, 6.000000 2.356000, 1.000000, 9.000000, 6.000000 1.657000, 1.000000, 9.000000, 6.000000 2.552000, 1.000000, 9.000000, 6.000000 1.636000, 1.000000, 9.000000, 6.000000 2.389000, 1.000000, 9.000000, 6.000000 1.618000, 1.000000, 9.000000, 6.000000 1.653000, 1.000000, 9.000000, 6.000000 2.476000, 1.000000, 9.000000, 6.000000 1.598000, 1.000000, 9.000000, 6.000000 1.647000, 1.000000, 9.000000, 6.000000 2.460000, 1.000000, 9.000000, 6.000000 1.593000, 1.000000, 9.000000, 6.000000 1.650000, 1.000000, 9.000000, 6.000000 2.284000, 1.000000, 9.000000, 6.000000 1.642000, 1.000000, 9.000000, 6.000000 1.621000, 1.000000, 9.000000, 6.000000 2.441000, 1.000000, 9.000000, 6.000000 1.637000, 1.000000, 9.000000, 6.000000 2.473000, 1.000000, 9.000000, 6.000000 1.602000, 1.000000, 9.000000, 6.000000 1.646000, 1.000000, 9.000000, 6.000000 2.284000, 1.000000, 9.000000, 6.000000 1.614000, 1.000000, 9.000000, 6.000000 1.620000, 1.000000, 9.000000, 6.000000 0.241000, 6.000000, 1.000000, 6.000000 0.372000, 6.000000, 1.000000, 6.000000 0.202000, 6.000000, 1.000000, 6.000000 0.222000, 6.000000, 1.000000, 6.000000 0.341000, 6.000000, 1.000000, 6.000000 0.170000, 6.000000, 1.000000, 6.000000 0.227000, 6.000000, 1.000000, 6.000000 0.338000, 6.000000, 1.000000, 6.000000 0.179000, 6.000000, 1.000000, 6.000000 0.227000, 6.000000, 1.000000, 6.000000 0.399000, 6.000000, 1.000000, 6.000000 0.189000, 6.000000, 1.000000, 6.000000 0.196000, 6.000000, 1.000000, 6.000000 0.191000, 6.000000, 1.000000, 6.000000 0.213000, 6.000000, 1.000000, 6.000000 0.282000, 6.000000, 1.000000, 6.000000 0.362000, 6.000000, 1.000000, 6.000000 0.350000, 6.000000, 1.000000, 6.000000 0.171000, 6.000000, 1.000000, 6.000000 0.234000, 6.000000, 1.000000, 6.000000 0.340000, 6.000000, 1.000000, 6.000000 0.209000, 6.000000, 1.000000, 6.000000 0.226000, 6.000000, 1.000000, 6.000000 0.340000, 6.000000, 1.000000, 6.000000 0.241000, 6.000000, 1.000000, 6.000000 0.313000, 6.000000, 1.000000, 6.000000 0.202000, 6.000000, 1.000000, 6.000000 0.235000, 6.000000, 1.000000, 6.000000 0.472000, 6.000000, 1.000000, 6.000000 0.170000, 6.000000, 1.000000, 6.000000 0.228000, 6.000000, 1.000000, 6.000000 0.357000, 6.000000, 1.000000, 6.000000 0.189000, 6.000000, 1.000000, 6.000000 0.189000, 6.000000, 1.000000, 6.000000 0.211000, 6.000000, 1.000000, 6.000000 0.189000, 6.000000, 1.000000, 6.000000 0.224000, 6.000000, 1.000000, 6.000000 0.321000, 6.000000, 1.000000, 6.000000 0.185000, 6.000000, 1.000000, 6.000000 0.226000, 6.000000, 1.000000, 6.000000 0.347000, 6.000000, 1.000000, 6.000000 0.217000, 6.000000, 1.000000, 6.000000 4.750000, 6.000000, 10.000000, 9.000000 3.281000, 6.000000, 10.000000, 9.000000 5.009000, 6.000000, 10.000000, 9.000000 3.320000, 6.000000, 10.000000, 9.000000 3.246000, 6.000000, 10.000000, 9.000000 5.181000, 6.000000, 10.000000, 9.000000 3.264000, 6.000000, 10.000000, 9.000000 4.548000, 6.000000, 10.000000, 9.000000 3.266000, 6.000000, 10.000000, 9.000000 4.732000, 6.000000, 10.000000, 9.000000 3.270000, 6.000000, 10.000000, 9.000000 5.188000, 6.000000, 10.000000, 9.000000 3.290000, 6.000000, 10.000000, 9.000000 4.882000, 6.000000, 10.000000, 9.000000 3.293000, 6.000000, 10.000000, 9.000000 5.173000, 6.000000, 10.000000, 9.000000 3.300000, 6.000000, 10.000000, 9.000000 3.258000, 6.000000, 10.000000, 9.000000 5.130000, 6.000000, 10.000000, 9.000000 3.247000, 6.000000, 10.000000, 9.000000 3.304000, 6.000000, 10.000000, 9.000000 4.678000, 6.000000, 10.000000, 9.000000 3.281000, 6.000000, 10.000000, 9.000000 4.766000, 6.000000, 10.000000, 9.000000 3.298000, 6.000000, 10.000000, 9.000000 3.287000, 6.000000, 10.000000, 9.000000 5.051000, 6.000000, 10.000000, 9.000000 3.280000, 6.000000, 10.000000, 9.000000 5.257000, 6.000000, 10.000000, 9.000000 3.289000, 6.000000, 10.000000, 9.000000 4.834000, 6.000000, 10.000000, 9.000000 3.299000, 6.000000, 10.000000, 9.000000 4.629000, 6.000000, 10.000000, 9.000000 3.265000, 6.000000, 10.000000, 9.000000 4.915000, 6.000000, 10.000000, 9.000000 3.256000, 6.000000, 10.000000, 9.000000 3.282000, 6.000000, 10.000000, 9.000000 5.059000, 6.000000, 10.000000, 9.000000 3.250000, 6.000000, 10.000000, 9.000000 3.281000, 6.000000, 10.000000, 9.000000 5.043000, 6.000000, 10.000000, 9.000000 3.236000, 6.000000, 10.000000, 9.000000 0.688000, 5.000000, 6.000000, 6.000000 1.074000, 5.000000, 6.000000, 6.000000 0.661000, 5.000000, 6.000000, 6.000000 0.691000, 5.000000, 6.000000, 6.000000 0.692000, 5.000000, 6.000000, 6.000000 1.025000, 5.000000, 6.000000, 6.000000 0.687000, 5.000000, 6.000000, 6.000000 0.646000, 5.000000, 6.000000, 6.000000 0.700000, 5.000000, 6.000000, 6.000000 1.062000, 5.000000, 6.000000, 6.000000 0.646000, 5.000000, 6.000000, 6.000000 0.714000, 5.000000, 6.000000, 6.000000 1.039000, 5.000000, 6.000000, 6.000000 0.692000, 5.000000, 6.000000, 6.000000 0.679000, 5.000000, 6.000000, 6.000000 0.688000, 5.000000, 6.000000, 6.000000 0.682000, 5.000000, 6.000000, 6.000000 0.706000, 5.000000, 6.000000, 6.000000 1.078000, 5.000000, 6.000000, 6.000000 0.647000, 5.000000, 6.000000, 6.000000 0.689000, 5.000000, 6.000000, 6.000000 1.067000, 5.000000, 6.000000, 6.000000 0.649000, 5.000000, 6.000000, 6.000000 0.711000, 5.000000, 6.000000, 6.000000 1.126000, 5.000000, 6.000000, 6.000000 0.682000, 5.000000, 6.000000, 6.000000 0.702000, 5.000000, 6.000000, 6.000000 1.054000, 5.000000, 6.000000, 6.000000 0.727000, 5.000000, 6.000000, 6.000000 0.973000, 5.000000, 6.000000, 6.000000 0.670000, 5.000000, 6.000000, 6.000000 0.661000, 5.000000, 6.000000, 6.000000 0.687000, 5.000000, 6.000000, 6.000000 1.057000, 5.000000, 6.000000, 6.000000 0.661000, 5.000000, 6.000000, 6.000000 0.694000, 5.000000, 6.000000, 6.000000 1.055000, 5.000000, 6.000000, 6.000000 0.674000, 5.000000, 6.000000, 6.000000 0.649000, 5.000000, 6.000000, 6.000000 0.646000, 5.000000, 6.000000, 6.000000 0.714000, 5.000000, 6.000000, 6.000000 1.044000, 5.000000, 6.000000, 6.000000 0.758000, 10.000000, 5.000000, 10.000000 0.811000, 10.000000, 5.000000, 10.000000 1.226000, 10.000000, 5.000000, 10.000000 0.750000, 10.000000, 5.000000, 10.000000 0.807000, 10.000000, 5.000000, 10.000000 1.208000, 10.000000, 5.000000, 10.000000 0.780000, 10.000000, 5.000000, 10.000000 0.805000, 10.000000, 5.000000, 10.000000 1.215000, 10.000000, 5.000000, 10.000000 0.748000, 10.000000, 5.000000, 10.000000 0.748000, 10.000000, 5.000000, 10.000000 0.795000, 10.000000, 5.000000, 10.000000 1.232000, 10.000000, 5.000000, 10.000000 0.750000, 10.000000, 5.000000, 10.000000 0.813000, 10.000000, 5.000000, 10.000000 1.232000, 10.000000, 5.000000, 10.000000 0.769000, 10.000000, 5.000000, 10.000000 0.797000, 10.000000, 5.000000, 10.000000 1.176000, 10.000000, 5.000000, 10.000000 0.766000, 10.000000, 5.000000, 10.000000 0.794000, 10.000000, 5.000000, 10.000000 1.220000, 10.000000, 5.000000, 10.000000 0.753000, 10.000000, 5.000000, 10.000000 0.764000, 10.000000, 5.000000, 10.000000 0.769000, 10.000000, 5.000000, 10.000000 0.804000, 10.000000, 5.000000, 10.000000 1.182000, 10.000000, 5.000000, 10.000000 0.770000, 10.000000, 5.000000, 10.000000 0.790000, 10.000000, 5.000000, 10.000000 0.770000, 10.000000, 5.000000, 10.000000 0.795000, 10.000000, 5.000000, 10.000000 1.129000, 10.000000, 5.000000, 10.000000 0.752000, 10.000000, 5.000000, 10.000000 0.748000, 10.000000, 5.000000, 10.000000 0.789000, 10.000000, 5.000000, 10.000000 1.198000, 10.000000, 5.000000, 10.000000 0.762000, 10.000000, 5.000000, 10.000000 0.795000, 10.000000, 5.000000, 10.000000 1.226000, 10.000000, 5.000000, 10.000000 0.803000, 10.000000, 5.000000, 10.000000 1.254000, 10.000000, 5.000000, 10.000000 0.766000, 10.000000, 5.000000, 10.000000 0.298000, 4.000000, 4.000000, 3.000000 0.455000, 4.000000, 4.000000, 3.000000 0.291000, 4.000000, 4.000000, 3.000000 0.311000, 4.000000, 4.000000, 3.000000 0.490000, 4.000000, 4.000000, 3.000000 0.314000, 4.000000, 4.000000, 3.000000 0.331000, 4.000000, 4.000000, 3.000000 0.480000, 4.000000, 4.000000, 3.000000 0.476000, 4.000000, 4.000000, 3.000000 0.432000, 4.000000, 4.000000, 3.000000 0.434000, 4.000000, 4.000000, 3.000000 0.439000, 4.000000, 4.000000, 3.000000 0.476000, 4.000000, 4.000000, 3.000000 0.509000, 4.000000, 4.000000, 3.000000 0.394000, 4.000000, 4.000000, 3.000000 0.441000, 4.000000, 4.000000, 3.000000 0.573000, 4.000000, 4.000000, 3.000000 0.445000, 4.000000, 4.000000, 3.000000 0.428000, 4.000000, 4.000000, 3.000000 0.450000, 4.000000, 4.000000, 3.000000 0.464000, 4.000000, 4.000000, 3.000000 0.487000, 4.000000, 4.000000, 3.000000 0.465000, 4.000000, 4.000000, 3.000000 0.384000, 4.000000, 4.000000, 3.000000 0.544000, 4.000000, 4.000000, 3.000000 0.470000, 4.000000, 4.000000, 3.000000 0.360000, 4.000000, 4.000000, 3.000000 0.521000, 4.000000, 4.000000, 3.000000 0.480000, 4.000000, 4.000000, 3.000000 0.358000, 4.000000, 4.000000, 3.000000 0.509000, 4.000000, 4.000000, 3.000000 0.377000, 4.000000, 4.000000, 3.000000 0.475000, 4.000000, 4.000000, 3.000000 0.529000, 4.000000, 4.000000, 3.000000 0.481000, 4.000000, 4.000000, 3.000000 0.340000, 4.000000, 4.000000, 3.000000 0.409000, 4.000000, 4.000000, 3.000000 0.455000, 4.000000, 4.000000, 3.000000 0.332000, 4.000000, 4.000000, 3.000000 0.477000, 4.000000, 4.000000, 3.000000 0.450000, 4.000000, 4.000000, 3.000000 0.438000, 4.000000, 4.000000, 3.000000 0.442000, 10.000000, 2.000000, 7.000000 0.461000, 10.000000, 2.000000, 7.000000 0.480000, 10.000000, 2.000000, 7.000000 0.536000, 10.000000, 2.000000, 7.000000 0.503000, 10.000000, 2.000000, 7.000000 0.537000, 10.000000, 2.000000, 7.000000 0.458000, 10.000000, 2.000000, 7.000000 0.515000, 10.000000, 2.000000, 7.000000 0.427000, 10.000000, 2.000000, 7.000000 0.504000, 10.000000, 2.000000, 7.000000 0.439000, 10.000000, 2.000000, 7.000000 0.482000, 10.000000, 2.000000, 7.000000 0.601000, 10.000000, 2.000000, 7.000000 0.457000, 10.000000, 2.000000, 7.000000 0.515000, 10.000000, 2.000000, 7.000000 0.455000, 10.000000, 2.000000, 7.000000 0.585000, 10.000000, 2.000000, 7.000000 0.451000, 10.000000, 2.000000, 7.000000 0.506000, 10.000000, 2.000000, 7.000000 0.437000, 10.000000, 2.000000, 7.000000 0.448000, 10.000000, 2.000000, 7.000000 0.439000, 10.000000, 2.000000, 7.000000 0.462000, 10.000000, 2.000000, 7.000000 0.426000, 10.000000, 2.000000, 7.000000 0.445000, 10.000000, 2.000000, 7.000000 0.543000, 10.000000, 2.000000, 7.000000 0.441000, 10.000000, 2.000000, 7.000000 0.490000, 10.000000, 2.000000, 7.000000 0.415000, 10.000000, 2.000000, 7.000000 0.519000, 10.000000, 2.000000, 7.000000 0.403000, 10.000000, 2.000000, 7.000000 0.453000, 10.000000, 2.000000, 7.000000 0.445000, 10.000000, 2.000000, 7.000000 0.531000, 10.000000, 2.000000, 7.000000 0.423000, 10.000000, 2.000000, 7.000000 0.461000, 10.000000, 2.000000, 7.000000 0.496000, 10.000000, 2.000000, 7.000000 0.444000, 10.000000, 2.000000, 7.000000 0.555000, 10.000000, 2.000000, 7.000000 0.411000, 10.000000, 2.000000, 7.000000 0.429000, 10.000000, 2.000000, 7.000000 0.572000, 10.000000, 2.000000, 7.000000 0.773000, 1.000000, 8.000000, 2.000000 0.698000, 1.000000, 8.000000, 2.000000 0.845000, 1.000000, 8.000000, 2.000000 0.787000, 1.000000, 8.000000, 2.000000 0.879000, 1.000000, 8.000000, 2.000000 0.862000, 1.000000, 8.000000, 2.000000 0.836000, 1.000000, 8.000000, 2.000000 0.763000, 1.000000, 8.000000, 2.000000 0.859000, 1.000000, 8.000000, 2.000000 0.792000, 1.000000, 8.000000, 2.000000 0.835000, 1.000000, 8.000000, 2.000000 0.860000, 1.000000, 8.000000, 2.000000 0.953000, 1.000000, 8.000000, 2.000000 0.760000, 1.000000, 8.000000, 2.000000 0.788000, 1.000000, 8.000000, 2.000000 0.863000, 1.000000, 8.000000, 2.000000 0.820000, 1.000000, 8.000000, 2.000000 0.858000, 1.000000, 8.000000, 2.000000 0.736000, 1.000000, 8.000000, 2.000000 0.894000, 1.000000, 8.000000, 2.000000 0.823000, 1.000000, 8.000000, 2.000000 0.721000, 1.000000, 8.000000, 2.000000 0.806000, 1.000000, 8.000000, 2.000000 0.852000, 1.000000, 8.000000, 2.000000 0.708000, 1.000000, 8.000000, 2.000000 0.748000, 1.000000, 8.000000, 2.000000 0.778000, 1.000000, 8.000000, 2.000000 0.866000, 1.000000, 8.000000, 2.000000 0.643000, 1.000000, 8.000000, 2.000000 0.786000, 1.000000, 8.000000, 2.000000 0.824000, 1.000000, 8.000000, 2.000000 0.640000, 1.000000, 8.000000, 2.000000 0.726000, 1.000000, 8.000000, 2.000000 0.840000, 1.000000, 8.000000, 2.000000 0.743000, 1.000000, 8.000000, 2.000000 0.845000, 1.000000, 8.000000, 2.000000 0.840000, 1.000000, 8.000000, 2.000000 0.810000, 1.000000, 8.000000, 2.000000 0.846000, 1.000000, 8.000000, 2.000000 0.710000, 1.000000, 8.000000, 2.000000 0.711000, 1.000000, 8.000000, 2.000000 0.876000, 1.000000, 8.000000, 2.000000 0.415000, 6.000000, 3.000000, 1.000000 0.392000, 6.000000, 3.000000, 1.000000 0.430000, 6.000000, 3.000000, 1.000000 0.283000, 6.000000, 3.000000, 1.000000 0.406000, 6.000000, 3.000000, 1.000000 0.238000, 6.000000, 3.000000, 1.000000 0.280000, 6.000000, 3.000000, 1.000000 0.388000, 6.000000, 3.000000, 1.000000 0.247000, 6.000000, 3.000000, 1.000000 0.283000, 6.000000, 3.000000, 1.000000 0.404000, 6.000000, 3.000000, 1.000000 0.266000, 6.000000, 3.000000, 1.000000 0.229000, 6.000000, 3.000000, 1.000000 0.264000, 6.000000, 3.000000, 1.000000 0.343000, 6.000000, 3.000000, 1.000000 0.287000, 6.000000, 3.000000, 1.000000 0.361000, 6.000000, 3.000000, 1.000000 0.237000, 6.000000, 3.000000, 1.000000 0.281000, 6.000000, 3.000000, 1.000000 0.404000, 6.000000, 3.000000, 1.000000 0.250000, 6.000000, 3.000000, 1.000000 0.277000, 6.000000, 3.000000, 1.000000 0.406000, 6.000000, 3.000000, 1.000000 0.208000, 6.000000, 3.000000, 1.000000 0.284000, 6.000000, 3.000000, 1.000000 0.407000, 6.000000, 3.000000, 1.000000 0.279000, 6.000000, 3.000000, 1.000000 0.253000, 6.000000, 3.000000, 1.000000 0.406000, 6.000000, 3.000000, 1.000000 0.279000, 6.000000, 3.000000, 1.000000 0.379000, 6.000000, 3.000000, 1.000000 0.292000, 6.000000, 3.000000, 1.000000 0.378000, 6.000000, 3.000000, 1.000000 0.279000, 6.000000, 3.000000, 1.000000 0.386000, 6.000000, 3.000000, 1.000000 0.293000, 6.000000, 3.000000, 1.000000 0.387000, 6.000000, 3.000000, 1.000000 0.250000, 6.000000, 3.000000, 1.000000 0.276000, 6.000000, 3.000000, 1.000000 0.424000, 6.000000, 3.000000, 1.000000 0.223000, 6.000000, 3.000000, 1.000000 0.262000, 6.000000, 3.000000, 1.000000 0.287000, 7.000000, 3.000000, 3.000000 0.316000, 7.000000, 3.000000, 3.000000 0.489000, 7.000000, 3.000000, 3.000000 0.281000, 7.000000, 3.000000, 3.000000 0.358000, 7.000000, 3.000000, 3.000000 0.456000, 7.000000, 3.000000, 3.000000 0.253000, 7.000000, 3.000000, 3.000000 0.295000, 7.000000, 3.000000, 3.000000 0.258000, 7.000000, 3.000000, 3.000000 0.302000, 7.000000, 3.000000, 3.000000 0.486000, 7.000000, 3.000000, 3.000000 0.330000, 7.000000, 3.000000, 3.000000 0.337000, 7.000000, 3.000000, 3.000000 0.461000, 7.000000, 3.000000, 3.000000 0.258000, 7.000000, 3.000000, 3.000000 0.307000, 7.000000, 3.000000, 3.000000 0.451000, 7.000000, 3.000000, 3.000000 0.282000, 7.000000, 3.000000, 3.000000 0.270000, 7.000000, 3.000000, 3.000000 0.315000, 7.000000, 3.000000, 3.000000 0.430000, 7.000000, 3.000000, 3.000000 0.256000, 7.000000, 3.000000, 3.000000 0.341000, 7.000000, 3.000000, 3.000000 0.473000, 7.000000, 3.000000, 3.000000 0.253000, 7.000000, 3.000000, 3.000000 0.323000, 7.000000, 3.000000, 3.000000 0.446000, 7.000000, 3.000000, 3.000000 0.257000, 7.000000, 3.000000, 3.000000 0.349000, 7.000000, 3.000000, 3.000000 0.425000, 7.000000, 3.000000, 3.000000 0.327000, 7.000000, 3.000000, 3.000000 0.322000, 7.000000, 3.000000, 3.000000 0.408000, 7.000000, 3.000000, 3.000000 0.322000, 7.000000, 3.000000, 3.000000 0.399000, 7.000000, 3.000000, 3.000000 0.271000, 7.000000, 3.000000, 3.000000 0.434000, 7.000000, 3.000000, 3.000000 0.276000, 7.000000, 3.000000, 3.000000 0.340000, 7.000000, 3.000000, 3.000000 0.411000, 7.000000, 3.000000, 3.000000 5.125000, 9.000000, 10.000000, 9.000000 0.307000, 7.000000, 3.000000, 3.000000 0.301000, 7.000000, 3.000000, 3.000000 5.368000, 9.000000, 10.000000, 9.000000 3.378000, 9.000000, 10.000000, 9.000000 3.406000, 9.000000, 10.000000, 9.000000 5.017000, 9.000000, 10.000000, 9.000000 3.472000, 9.000000, 10.000000, 9.000000 3.432000, 9.000000, 10.000000, 9.000000 5.309000, 9.000000, 10.000000, 9.000000 3.419000, 9.000000, 10.000000, 9.000000 5.282000, 9.000000, 10.000000, 9.000000 5.424000, 9.000000, 10.000000, 9.000000 3.427000, 9.000000, 10.000000, 9.000000 3.431000, 9.000000, 10.000000, 9.000000 5.356000, 9.000000, 10.000000, 9.000000 3.426000, 9.000000, 10.000000, 9.000000 4.954000, 9.000000, 10.000000, 9.000000 3.465000, 9.000000, 10.000000, 9.000000 5.028000, 9.000000, 10.000000, 9.000000 3.413000, 9.000000, 10.000000, 9.000000 3.448000, 9.000000, 10.000000, 9.000000 5.344000, 9.000000, 10.000000, 9.000000 3.424000, 9.000000, 10.000000, 9.000000 3.450000, 9.000000, 10.000000, 9.000000 5.401000, 9.000000, 10.000000, 9.000000 3.433000, 9.000000, 10.000000, 9.000000 5.119000, 9.000000, 10.000000, 9.000000 3.407000, 9.000000, 10.000000, 9.000000 3.418000, 9.000000, 10.000000, 9.000000 5.321000, 9.000000, 10.000000, 9.000000 3.431000, 9.000000, 10.000000, 9.000000 5.465000, 9.000000, 10.000000, 9.000000 3.433000, 9.000000, 10.000000, 9.000000 5.473000, 9.000000, 10.000000, 9.000000 3.419000, 9.000000, 10.000000, 9.000000 5.302000, 9.000000, 10.000000, 9.000000 3.435000, 9.000000, 10.000000, 9.000000 5.428000, 9.000000, 10.000000, 9.000000 3.422000, 9.000000, 10.000000, 9.000000 3.425000, 9.000000, 10.000000, 9.000000 5.529000, 9.000000, 10.000000, 9.000000 3.383000, 9.000000, 10.000000, 9.000000 3.436000, 9.000000, 10.000000, 9.000000 1.000000, 10.000000, 6.000000, 4.000000 0.657000, 10.000000, 6.000000, 4.000000 0.748000, 10.000000, 6.000000, 4.000000 1.059000, 10.000000, 6.000000, 4.000000 0.658000, 10.000000, 6.000000, 4.000000 0.718000, 10.000000, 6.000000, 4.000000 1.066000, 10.000000, 6.000000, 4.000000 0.660000, 10.000000, 6.000000, 4.000000 0.697000, 10.000000, 6.000000, 4.000000 1.040000, 10.000000, 6.000000, 4.000000 0.660000, 10.000000, 6.000000, 4.000000 0.755000, 10.000000, 6.000000, 4.000000 1.084000, 10.000000, 6.000000, 4.000000 0.655000, 10.000000, 6.000000, 4.000000 0.714000, 10.000000, 6.000000, 4.000000 1.024000, 10.000000, 6.000000, 4.000000 0.674000, 10.000000, 6.000000, 4.000000 0.772000, 10.000000, 6.000000, 4.000000 1.032000, 10.000000, 6.000000, 4.000000 0.677000, 10.000000, 6.000000, 4.000000 0.662000, 10.000000, 6.000000, 4.000000 0.736000, 10.000000, 6.000000, 4.000000 1.037000, 10.000000, 6.000000, 4.000000 0.693000, 10.000000, 6.000000, 4.000000 0.742000, 10.000000, 6.000000, 4.000000 1.064000, 10.000000, 6.000000, 4.000000 0.654000, 10.000000, 6.000000, 4.000000 0.662000, 10.000000, 6.000000, 4.000000 0.728000, 10.000000, 6.000000, 4.000000 1.067000, 10.000000, 6.000000, 4.000000 0.657000, 10.000000, 6.000000, 4.000000 0.724000, 10.000000, 6.000000, 4.000000 1.066000, 10.000000, 6.000000, 4.000000 0.651000, 10.000000, 6.000000, 4.000000 0.705000, 10.000000, 6.000000, 4.000000 0.985000, 10.000000, 6.000000, 4.000000 0.660000, 10.000000, 6.000000, 4.000000 0.712000, 10.000000, 6.000000, 4.000000 1.095000, 10.000000, 6.000000, 4.000000 0.663000, 10.000000, 6.000000, 4.000000 0.717000, 10.000000, 6.000000, 4.000000 1.080000, 10.000000, 6.000000, 4.000000 0.195000, 2.000000, 1.000000, 1.000000 0.230000, 2.000000, 1.000000, 1.000000 0.323000, 2.000000, 1.000000, 1.000000 0.181000, 2.000000, 1.000000, 1.000000 0.216000, 2.000000, 1.000000, 1.000000 0.328000, 2.000000, 1.000000, 1.000000 0.173000, 2.000000, 1.000000, 1.000000 0.230000, 2.000000, 1.000000, 1.000000 0.294000, 2.000000, 1.000000, 1.000000 0.200000, 2.000000, 1.000000, 1.000000 0.206000, 2.000000, 1.000000, 1.000000 0.295000, 2.000000, 1.000000, 1.000000 0.221000, 2.000000, 1.000000, 1.000000 0.272000, 2.000000, 1.000000, 1.000000 0.157000, 2.000000, 1.000000, 1.000000 0.240000, 2.000000, 1.000000, 1.000000 0.309000, 2.000000, 1.000000, 1.000000 0.226000, 2.000000, 1.000000, 1.000000 0.306000, 2.000000, 1.000000, 1.000000 0.217000, 2.000000, 1.000000, 1.000000 0.308000, 2.000000, 1.000000, 1.000000 0.200000, 2.000000, 1.000000, 1.000000 0.221000, 2.000000, 1.000000, 1.000000 0.353000, 2.000000, 1.000000, 1.000000 0.156000, 2.000000, 1.000000, 1.000000 0.220000, 2.000000, 1.000000, 1.000000 0.318000, 2.000000, 1.000000, 1.000000 0.217000, 2.000000, 1.000000, 1.000000 0.217000, 2.000000, 1.000000, 1.000000 0.302000, 2.000000, 1.000000, 1.000000 0.205000, 2.000000, 1.000000, 1.000000 0.220000, 2.000000, 1.000000, 1.000000 0.338000, 2.000000, 1.000000, 1.000000 0.241000, 2.000000, 1.000000, 1.000000 0.310000, 2.000000, 1.000000, 1.000000 0.195000, 2.000000, 1.000000, 1.000000 0.266000, 2.000000, 1.000000, 1.000000 0.357000, 2.000000, 1.000000, 1.000000 0.234000, 2.000000, 1.000000, 1.000000 0.335000, 2.000000, 1.000000, 1.000000 0.221000, 2.000000, 1.000000, 1.000000 0.239000, 2.000000, 1.000000, 1.000000 0.698000, 2.000000, 7.000000, 2.000000 0.492000, 2.000000, 7.000000, 2.000000 0.401000, 2.000000, 7.000000, 2.000000 0.458000, 2.000000, 7.000000, 2.000000 0.715000, 2.000000, 7.000000, 2.000000 0.403000, 2.000000, 7.000000, 2.000000 0.469000, 2.000000, 7.000000, 2.000000 0.703000, 2.000000, 7.000000, 2.000000 0.466000, 2.000000, 7.000000, 2.000000 0.721000, 2.000000, 7.000000, 2.000000 0.401000, 2.000000, 7.000000, 2.000000 0.482000, 2.000000, 7.000000, 2.000000 0.705000, 2.000000, 7.000000, 2.000000 0.404000, 2.000000, 7.000000, 2.000000 0.463000, 2.000000, 7.000000, 2.000000 0.706000, 2.000000, 7.000000, 2.000000 0.401000, 2.000000, 7.000000, 2.000000 0.456000, 2.000000, 7.000000, 2.000000 0.684000, 2.000000, 7.000000, 2.000000 0.400000, 2.000000, 7.000000, 2.000000 0.468000, 2.000000, 7.000000, 2.000000 0.659000, 2.000000, 7.000000, 2.000000 0.442000, 2.000000, 7.000000, 2.000000 0.419000, 2.000000, 7.000000, 2.000000 0.479000, 2.000000, 7.000000, 2.000000 0.448000, 2.000000, 7.000000, 2.000000 0.694000, 2.000000, 7.000000, 2.000000 0.472000, 2.000000, 7.000000, 2.000000 0.666000, 2.000000, 7.000000, 2.000000 0.440000, 2.000000, 7.000000, 2.000000 0.404000, 2.000000, 7.000000, 2.000000 0.471000, 2.000000, 7.000000, 2.000000 0.679000, 2.000000, 7.000000, 2.000000 0.429000, 2.000000, 7.000000, 2.000000 0.468000, 2.000000, 7.000000, 2.000000 0.707000, 2.000000, 7.000000, 2.000000 0.401000, 2.000000, 7.000000, 2.000000 0.493000, 2.000000, 7.000000, 2.000000 0.711000, 2.000000, 7.000000, 2.000000 0.423000, 2.000000, 7.000000, 2.000000 0.462000, 2.000000, 7.000000, 2.000000 0.655000, 2.000000, 7.000000, 2.000000 0.660000, 10.000000, 6.000000, 4.000000 0.734000, 10.000000, 6.000000, 4.000000 1.087000, 10.000000, 6.000000, 4.000000 0.664000, 10.000000, 6.000000, 4.000000 0.718000, 10.000000, 6.000000, 4.000000 1.057000, 10.000000, 6.000000, 4.000000 0.650000, 10.000000, 6.000000, 4.000000 0.723000, 10.000000, 6.000000, 4.000000 1.042000, 10.000000, 6.000000, 4.000000 0.679000, 10.000000, 6.000000, 4.000000 0.694000, 10.000000, 6.000000, 4.000000 1.084000, 10.000000, 6.000000, 4.000000 0.656000, 10.000000, 6.000000, 4.000000 0.652000, 10.000000, 6.000000, 4.000000 0.705000, 10.000000, 6.000000, 4.000000 1.108000, 10.000000, 6.000000, 4.000000 0.673000, 10.000000, 6.000000, 4.000000 0.710000, 10.000000, 6.000000, 4.000000 1.017000, 10.000000, 6.000000, 4.000000 0.676000, 10.000000, 6.000000, 4.000000 0.725000, 10.000000, 6.000000, 4.000000 1.117000, 10.000000, 6.000000, 4.000000 0.653000, 10.000000, 6.000000, 4.000000 0.723000, 10.000000, 6.000000, 4.000000 1.047000, 10.000000, 6.000000, 4.000000 0.680000, 10.000000, 6.000000, 4.000000 0.727000, 10.000000, 6.000000, 4.000000 1.086000, 10.000000, 6.000000, 4.000000 0.674000, 10.000000, 6.000000, 4.000000 0.770000, 10.000000, 6.000000, 4.000000 0.964000, 10.000000, 6.000000, 4.000000 0.696000, 10.000000, 6.000000, 4.000000 0.727000, 10.000000, 6.000000, 4.000000 1.025000, 10.000000, 6.000000, 4.000000 0.701000, 10.000000, 6.000000, 4.000000 0.685000, 10.000000, 6.000000, 4.000000 1.063000, 10.000000, 6.000000, 4.000000 0.716000, 10.000000, 6.000000, 4.000000 1.109000, 10.000000, 6.000000, 4.000000 0.771000, 10.000000, 6.000000, 4.000000 1.713000, 10.000000, 6.000000, 4.000000 3.440000, 10.000000, 6.000000, 4.000000starpu-1.4.9+dfsg/tools/perfs/000077500000000000000000000000001507764646700162675ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/perfs/bench_sgemm.sh000077500000000000000000000046201507764646700210770ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # DIR=$PWD ROOTDIR=$DIR/../.. BUILDDIR=$PWD/build/ INSTALLDIR=$PWD/local/ PERFDIR=$DIR/sampling/ # Testing another specific scheduler, no need to run this [ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = dm ] || exit 77 make -C ../../ distclean mkdir -p $PERFDIR mkdir -p $BUILDDIR cd $BUILDDIR $DIR/../../configure -C --prefix=$INSTALLDIR --with-goto-dir=/home/gonnet/These/Libs/GotoBLAS/GotoBLAS/ --enable-verbose make -j 10 make install sizelist="16 32 48 64 96 128 192 256 384 512 1024 2048 4096" cpu_output=$DIR/output.cpu gpu_output=$DIR/output.gpu rm -f $cpu_output rm -f $gpu_output export STARPU_WORKERS_CPUID="2" export STARPU_CALIBRATE=1 export STARPU_SCHED="dm" # benchmark GotoBLAS for size in $sizelist; do niter=1000 if test $size -ge 512; then niter=20 fi if test $size -ge 2048; then niter=5 fi echo "GotoBLAS -> size $size niter $niter" timing=`STARPU_NCPUS=1 STARPU_NCUDA=0 $MS_LAUNCHER $STARPU_LAUNCH $INSTALLDIR/lib/starpu/examples/dw_mult_no_filters -x $size -y $size -z $size -nblocks 1 -iter $niter 2> /dev/null` echo "$size $timing $niter" >> $cpu_output done # benchmark CUBLAS for size in $sizelist; do niter=2500 if test $size -ge 512; then niter=250 fi if test $size -ge 2048; then niter=25 fi echo "CUBLAS -> size $size niter $niter" timing=`STARPU_NCPUS=0 STARPU_NCUDA=1 $MS_LAUNCHER $STARPU_LAUNCH $INSTALLDIR/lib/starpu/examples/dw_mult_no_filters -x $size -y $size -z $size -nblocks 1 -iter $niter 2 -pin 2> /dev/null` echo "$size $timing $niter" >> $gpu_output done gnuplot > /dev/null << EOF set term postscript eps enhanced color set output "bench_sgemm.eps" set logscale x set logscale y plot "$cpu_output" usi 1:(\$2/\$3) with linespoint, \ "$gpu_output" usi 1:(\$2/\$3) with linespoint EOF starpu-1.4.9+dfsg/tools/perfs/error_model.gp000077500000000000000000000024571507764646700211430ustar00rootroot00000000000000#!/usr/bin/gnuplot -persist # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set term postscript eps enhanced color set output "model_error.eps" set yrange [0.05:100] set xrange [4:10000] set grid y set grid x set logscale y set logscale x #set title "Cholesky on Cell (PS3)" set xlabel "Number of samples" set ylabel "Prediction error" set grid set ytics (0.01, 0.1, 1, 5,10,25,50,100) set xtics (10, 100, 1000, 10000) set format y "%.1f %%" set format x "10^{%L}" set key title "Execution time Prediction Error (%)" set size 0.75 plot "gnuplot.data" usi 1:($2*100) with linespoint pt -1 lt 1 lw 3 title "CPUs" ,\ "gnuplot.data" usi 3:($4*100) with linespoint pt -1 lt 2 lw 3 title "GPU" starpu-1.4.9+dfsg/tools/perfs/error_model.sh000077500000000000000000000072731507764646700211500ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # for bc scale=8 #nblockslist="2 4 8 16 16 16 16 16 16 16 16 16 16 16 16" nblockslist="4 8 16 16 16 16 16 16 16 16 16" niter=5 #nblockslist="4 4" #niter=2 # Testing another specific scheduler, no need to run this [ -z "$STARPU_SCHED" -o "$STARPU_SCHED" = dm ] || exit 77 rm -f log echo "#iter cpu0 (#tasks0) cpu1 (#tasks1) cpu2 (#tasks2) gpu0 (#tasksgpu0) #totaltask gflops" > gnuplot.data i=0 for nblocks in $nblockslist do i=$(($i + 1)) sumcpu[$i]='0' ntaskcpu[$i]='0' sumcuda[$i]='0' ntaskcuda[$i]='0' cpu_ntasktotal[$i]='0' gpu_ntasktotal[$i]='0' sumgflops[$i]='0' done for iter in `seq 1 $niter` do cpu_taskcnt=0 gpu_taskcnt=0 i=0 rm -f ../../.sampling/* for nblocks in $nblockslist do i=$(($i + 1)) ntheta=$(($((32 * $nblocks)) + 2)) echo "ITER $iter -> I $i NBLOCKS $nblocks" STARPU_CALIBRATE=1 STARPU_SCHED="dm" $MS_LAUNCHER $STARPU_LAUNCH ../../examples/heat/heat -nblocks $nblocks -nthick 34 -ntheta $ntheta -pin 2> output.log.err > output.log gflops=`grep "Synthetic GFlops :" output.log.err| sed -e "s/Synthetic GFlops ://"` sumgflops[$i]=$(echo "${sumgflops[$i]} + $gflops"|bc -l) # retrieve ratio for cpu 0, 1 and 2 avgcpu0=`grep "MODEL ERROR: CPU 0" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\1/"` avgcpu1=`grep "MODEL ERROR: CPU 1" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\1/"` avgcpu2=`grep "MODEL ERROR: CPU 2" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\1/"` avgcuda0=`grep "MODEL ERROR: CUDA 0" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\1/"` ntaskcpu0=`grep "MODEL ERROR: CPU 0" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\2/"` ntaskcpu1=`grep "MODEL ERROR: CPU 1" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\2/"` ntaskcpu2=`grep "MODEL ERROR: CPU 2" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\2/"` ntaskcuda0=`grep "MODEL ERROR: CUDA 0" starpu.log | sed -e "s/^.*RATIO \(.*\) NTASKS\(.*\)$/\2/"` sumcpu[$i]=$(echo "${sumcpu[$i]} + ( $avgcpu0 * $ntaskcpu0 ) + ( $avgcpu1 * $ntaskcpu1 ) + ( $avgcpu2 * $ntaskcpu2 )"| bc -l) ntaskcpu[$i]=$(echo "${ntaskcpu[$i]} + $ntaskcpu0 + $ntaskcpu1 + $ntaskcpu2"|bc -l) sumcuda[$i]=$(echo "${sumcuda[$i]} + ( $avgcuda0 * $ntaskcuda0 )"| bc -l) ntaskcuda[$i]=$(echo "${ntaskcuda[$i]} + $ntaskcuda0"|bc -l) cpu_taskcnt=$(($cpu_taskcnt + $ntaskcpu0 + $ntaskcpu1 + $ntaskcpu2 )) gpu_taskcnt=$(($gpu_taskcnt + $ntaskcuda0)) cpu_ntasktotal[$i]=$( echo "$cpu_taskcnt + ${cpu_ntasktotal[$i]}" | bc -l) gpu_ntasktotal[$i]=$( echo "$gpu_taskcnt + ${gpu_ntasktotal[$i]}" | bc -l) done done i=0 echo "#ntaskscpu #avg. error cpu #ntaskgpu #avg. error gpu #avg. gflops" > gnuplot.data for nblocks in $nblockslist do i=$(($i + 1)) avggflops=$(echo "${sumgflops[$i]}/$niter"|bc -l) cpu_ntasks=$(echo "${cpu_ntasktotal[$i]}/$niter" | bc -l) gpu_ntasks=$(echo "${gpu_ntasktotal[$i]}/$niter" | bc -l) avgcpu=$(echo "${sumcpu[$i]}/${ntaskcpu[$i]}"|bc -l) avgcuda=$(echo "${sumcuda[$i]}/${ntaskcuda[$i]}"|bc -l) echo "$cpu_ntasks $avgcpu $gpu_ntasks $avgcuda $avggflops" >> gnuplot.data done ./error_model.gp starpu-1.4.9+dfsg/tools/release/000077500000000000000000000000001507764646700165705ustar00rootroot00000000000000starpu-1.4.9+dfsg/tools/release/Makefile000066400000000000000000000055531507764646700202400ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # STARPU ?= undefined EXAMPLE ?= undefined TARGETS = TARGETS += hello_world TARGETS += block TARGETS += mult TARGETS += variable TARGETS += incrementer ifeq ($(STARPU),undefined) all: @echo @echo "ERROR. You need to set the variable STARPU to the name of the pkg-config StarPU package" @echo clean:; rm -f $(TARGETS) *.o else ifeq ($(EXAMPLE),undefined) all: @echo @echo "ERROR. You need to set the variable EXAMPLE to the directory hosting the example sources" @echo clean:; rm -f $(TARGETS) *.o else CFLAGS += $$(pkg-config --cflags $(STARPU)) LDFLAGS += $$(pkg-config --libs $(STARPU)) HAS_CUDA = $(shell starpu_config -d | grep -c STARPU_USE_CUDA) NVCC ?= nvcc HAS_OPENCL = $(shell starpu_config -d | grep -c STARPU_USE_OPENCL) ifneq ($(strip $(HAS_CUDA)),0) LDFLAGS += -lcudart endif ifneq ($(strip $(HAS_OPENCL)),0) LDFLAGS += -lOpenCL endif %: %.o $(CC) $< $(LDFLAGS) -o $@ %.o: $(EXAMPLE)/basic_examples/%.cu $(NVCC) -std=c++11 $(CFLAGS) $< -c %.o: $(EXAMPLE)/basic_examples/%.c $(CC) $(CFLAGS) $< -c %.o: $(EXAMPLE)/incrementer/%.cu $(NVCC) -std=c++11 $(CFLAGS) $< -c %.o: $(EXAMPLE)/incrementer/%.c $(CC) $(CFLAGS) $< -c all: $(TARGETS) BLOCK_PREREQUISITES = block.o block_cpu.o ifneq ($(strip $(HAS_CUDA)),0) BLOCK_PREREQUISITES += block_cuda.o endif ifneq ($(strip $(HAS_OPENCL)),0) BLOCK_PREREQUISITES += block_opencl.o endif block: $(BLOCK_PREREQUISITES) $(CC) $^ $(LDFLAGS) -o $@ VARIABLE_PREREQUISITES = variable.o variable_kernels_cpu.o ifneq ($(strip $(HAS_CUDA)),) VARIABLE_PREREQUISITES += variable_kernels.o endif ifneq ($(strip $(HAS_OPENCL)),) VARIABLE_PREREQUISITES += variable_kernels_opencl.o endif variable: $(VARIABLE_PREREQUISITES) $(CC) $^ $(LDFLAGS) -o $@ INCREMENTER_PREREQUISITES = incrementer.o ifneq ($(strip $(HAS_CUDA)),) INCREMENTER_PREREQUISITES += incrementer_kernels.o endif ifneq ($(strip $(HAS_OPENCL)),) INCREMENTER_PREREQUISITES += incrementer_kernels_opencl.o endif incrementer: $(INCREMENTER_PREREQUISITES) $(CC) $^ $(LDFLAGS) -o $@ MULT_PREREQUISITES = mult.o ifneq ($(strip $(HAS_CUDA)),0) MULT_PREREQUISITES += mult_cuda.o endif mult: $(MULT_PREREQUISITES) $(CC) $^ $(LDFLAGS) -o $@ clean:; rm -f $(TARGETS) *.o endif endif starpu-1.4.9+dfsg/tools/release/README.md000066400000000000000000000025531507764646700200540ustar00rootroot00000000000000 The makefile in this directory should be used to test the compilation and execution of StarPU examples against an installed version of StarPU. For example, if StarPU is installed in ``` STARPU_INST=$HOME/softs/starpu-1.4 ``` and the source code of StarPU is in ``` STARPU_SRC=$HOME/src/starpu/master ``` one first need to call the following script ``` source $STARPU_INST/bin/starpu_env ``` and then call ``` make STARPU=starpu-1.4 EXAMPLE=$STARPU_SRC/examples ``` to produce the executables. Examples using an old StarPU API can also be tested, for example the branch 1.0 ``` make STARPU=starpu-1.0 EXAMPLE=$HOME/src/starpu/branches/starpu-1.0/examples/ ``` Note the variable STARPU is set to starpu-1.0 to use the 1.0 API. starpu-1.4.9+dfsg/tools/starpu_calibrate_bus.1000066400000000000000000000007651507764646700214370ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_CALIBRATE_BUS "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_calibrate_bus \- Force StarPU bus calibration .SH SYNOPSIS .B starpu_calibrate_bus [\fI\,OPTION\/\fR] .SH DESCRIPTION Force a bus calibration. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .SH "REPORTING BUGS" Report bugs to . starpu-1.4.9+dfsg/tools/starpu_calibrate_bus.c000066400000000000000000000036301507764646700215130ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #if defined(_WIN32) && !defined(__CYGWIN__) #include #endif #define PROGNAME "starpu_calibrate_bus" static void usage(void) { (void) fprintf(stdout, "Force a bus calibration.\n\ \n\ Usage: %s [OPTION]\n\ \n\ Options:\n\ -h, --help display this help and exit\n\ -v, --version output version information and exit\n\ \n\ Report bugs to <%s>.\n", PROGNAME, PACKAGE_BUGREPORT); } static void parse_args(int argc, char **argv) { if (argc == 1) return; if (argc > 2) { usage(); exit(EXIT_FAILURE); } if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0) { usage(); exit(EXIT_SUCCESS); } else if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) { fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); exit(EXIT_SUCCESS); } else { (void) fprintf(stderr, "Unknown arg %s\n", argv[1]); exit(EXIT_FAILURE); } } int main(int argc, char **argv) { int ret; struct starpu_conf conf; parse_args(argc, argv); starpu_conf_init(&conf); conf.bus_calibrate = 1; ret = starpu_init(&conf); if (ret == -ENODEV) return 77; if (ret != 0) return ret; starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/tools/starpu_codelet_histo_profile000077500000000000000000000042201507764646700230370ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROGNAME=$0 usage() { echo "Offline tool to draw codelet profile histogram over a traced execution" echo "" echo "Usage: $PROGNAME distrib.data" echo "" echo "Options:" echo " -h, --help display this help and exit" echo " -v, --version output version information and exit" echo "" echo "Report bugs to " exit 1 } if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then echo "$PROGNAME (StarPU) 1.4.9" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then usage fi create_histograms() { inputfile=$1 R --no-save > /dev/null << EOF handle_hash <- function (codelet, arch, hash) { mytable <- table mytable <- mytable[mytable[,1]==codelet,] mytable <- mytable[mytable[,2]==arch,] mytable <- mytable[mytable[,4]==hash,] val <- mytable[,5] # there is certainly a better way to do this ! size <- unique(mytable[,3]) pdf(paste("$inputfile", codelet, arch, hash, size, "pdf", sep=".")); try ( { h <- hist(val[val > quantile(val,0.01) & val starpu-1.4.9+dfsg/tools/starpu_codelet_histo_profile.in000077500000000000000000000042421507764646700234500ustar00rootroot00000000000000#!/bin/sh # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROGNAME=$0 usage() { echo "Offline tool to draw codelet profile histogram over a traced execution" echo "" echo "Usage: $PROGNAME distrib.data" echo "" echo "Options:" echo " -h, --help display this help and exit" echo " -v, --version output version information and exit" echo "" echo "Report bugs to <@PACKAGE_BUGREPORT@>" exit 1 } if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then usage fi create_histograms() { inputfile=$1 R --no-save > /dev/null << EOF handle_hash <- function (codelet, arch, hash) { mytable <- table mytable <- mytable[mytable[,1]==codelet,] mytable <- mytable[mytable[,2]==arch,] mytable <- mytable[mytable[,4]==hash,] val <- mytable[,5] # there is certainly a better way to do this ! size <- unique(mytable[,3]) pdf(paste("$inputfile", codelet, arch, hash, size, "pdf", sep=".")); try ( { h <- hist(val[val > quantile(val,0.01) & val" exit 1 } if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then echo "$PROGNAME (StarPU) 1.4.9" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$2" = "" ] ; then usage fi inputfile=$1 codelet_name=$2 archlist=`< $inputfile grep "^$codelet_name " | cut -f 2 | sort | uniq | xargs` # extract subfiles from the history file for arch in $archlist do echo "Arch $arch" grep "^$codelet_name $arch" $inputfile > $inputfile.$arch done # create the gnuplot file gpfile=$inputfile.gp echo "#!/usr/bin/gnuplot -persist" > $gpfile echo "set term postscript eps enhanced color" >> $gpfile echo "set logscale x" >> $gpfile echo "set logscale y" >> $gpfile echo "set output \"$inputfile.eps\"" >> $gpfile echo "set key top left" >> $gpfile echo "set xlabel \"Total data size\"" >> $gpfile echo "set ylabel \"Execution time (ms)\"" >> $gpfile echo -n "plot " >> $gpfile first=1 for arch in $archlist do if [ $first = 0 ] then echo -n " , " >> $gpfile else first=0 fi echo -n " \"$inputfile.$arch\" using 3:5 title \"${codelet_name//_/\\\\_} arch $arch\"" >> $gpfile done starpu-1.4.9+dfsg/tools/starpu_codelet_profile.1000066400000000000000000000010561507764646700217710ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_CODELET_PROFILE "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_codelet_profile \- Draw StarPU codelet profile .SH SYNOPSIS .B starpu_codelet_profile \fI\,distrib.data codelet_name\/\fR .SH DESCRIPTION Offline tool to draw codelet profile over a traced execution .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .SH "REPORTING BUGS" Report bugs to starpu-1.4.9+dfsg/tools/starpu_codelet_profile.in000077500000000000000000000042141507764646700222410ustar00rootroot00000000000000#!@REALBASH@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROGNAME=$0 usage() { echo "Offline tool to draw codelet profile over a traced execution" echo "" echo "Usage: $PROGNAME distrib.data codelet_name" echo "" echo "Options:" echo " -h, --help display this help and exit" echo " -v, --version output version information and exit" echo "" echo "Report bugs to <@PACKAGE_BUGREPORT@>" exit 1 } if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$2" = "" ] ; then usage fi inputfile=$1 codelet_name=$2 archlist=`< $inputfile grep "^$codelet_name " | cut -f 2 | sort | uniq | xargs` # extract subfiles from the history file for arch in $archlist do echo "Arch $arch" grep "^$codelet_name $arch" $inputfile > $inputfile.$arch done # create the gnuplot file gpfile=$inputfile.gp echo "#!/usr/bin/gnuplot -persist" > $gpfile echo "set term postscript eps enhanced color" >> $gpfile echo "set logscale x" >> $gpfile echo "set logscale y" >> $gpfile echo "set output \"$inputfile.eps\"" >> $gpfile echo "set key top left" >> $gpfile echo "set xlabel \"Total data size\"" >> $gpfile echo "set ylabel \"Execution time (ms)\"" >> $gpfile echo -n "plot " >> $gpfile first=1 for arch in $archlist do if [ $first = 0 ] then echo -n " , " >> $gpfile else first=0 fi echo -n " \"$inputfile.$arch\" using 3:5 title \"${codelet_name//_/\\\\_} arch $arch\"" >> $gpfile done starpu-1.4.9+dfsg/tools/starpu_config000077500000000000000000000043421507764646700177440ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROGNAME=$0 set -e usage() { echo "Show the configuration used by StarPU." echo "" echo "Usage: $PROGNAME " echo "" echo "" echo " The starpu_config utility shows all the configuration parameters used when installing StarPU" echo "" echo "Options:" echo " -h, --help display this help and exit" echo " -v, --version output version information and exit" echo " -d only shows define parameters" echo " -u only shows undefined parameters" echo "" echo " if parameters are given, only configuration parameters with the given name are displayed" echo "" echo "Report bugs to " exit 0 } if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then echo "$PROGNAME (StarPU) 1.4.9" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then usage fi prefix=$(realpath /usr/local) if test -d $prefix then starpu_datarootdir=$(realpath ${prefix}/share) config_file="$starpu_datarootdir/starpu/starpu_config.cfg" if test ! -f "$config_file" then config_file=$(realpath $(dirname $0))/starpu_config.cfg fi else config_file=$(realpath $(dirname $0))/starpu_config.cfg fi if test ! -f "$config_file" then echo "Configuration file unavailable" exit 1 fi echo "processing $config_file" if test "$1" == "-d" then grep 'define' $config_file elif test "$1" == "-u" then grep 'undef' $config_file elif test "$1" then for x in $* do grep $x $config_file done else sort $config_file fi starpu-1.4.9+dfsg/tools/starpu_config.1000066400000000000000000000014141507764646700200750ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_CONFIG "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_config \- Display StarPU configuration .SH SYNOPSIS .B starpu_config \fI\,\/\fR .SH DESCRIPTION Show the configuration used by StarPU. .IP The starpu_config utility shows all the configuration parameters used when installing StarPU .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .TP \fB\-d\fR only shows define parameters .TP \fB\-u\fR only shows undefined parameters .IP if parameters are given, only configuration parameters with the given name are displayed .SH "REPORTING BUGS" Report bugs to starpu-1.4.9+dfsg/tools/starpu_config.cfg000066400000000000000000000157431507764646700205060ustar00rootroot00000000000000/* #undef STARPURM_DLB_VERBOSE */ /* #undef STARPURM_HAVE_DLB */ /* #undef STARPURM_HAVE_DLB_CALLBACK_ARG */ /* #undef STARPURM_STARPU_HAVE_WORKER_CALLBACKS */ /* #undef STARPURM_VERBOSE */ /* #undef STARPU_ARMPL */ /* #undef STARPU_ATLAS */ /* #undef STARPU_BUBBLE */ /* #undef STARPU_BUBBLE_VERBOSE */ /* #undef STARPU_BUILT_IN_MIN_DGELS */ /* #undef STARPU_COVERITY */ /* #undef STARPU_DATA_LOCALITY_ENFORCE */ #define STARPU_DEBUG 1 /* #undef STARPU_DEVEL */ /* #undef STARPU_DISABLE_ASYNCHRONOUS_COPY */ /* #undef STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY */ /* #undef STARPU_DISABLE_ASYNCHRONOUS_MAX_FPGA_COPY */ /* #undef STARPU_DISABLE_ASYNCHRONOUS_MPI_MS_COPY */ /* #undef STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY */ /* #undef STARPU_DISABLE_ASYNCHRONOUS_TCPIP_MS_COPY */ /* #undef STARPU_EXTRA_VERBOSE */ /* #undef STARPU_FXT_LOCK_TRACES */ #define STARPU_FXT_MAX_FILES 64 #define STARPU_GDB_PATH "/usr/bin/gdb" /* #undef STARPU_GOTO */ #define STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N 1 #define STARPU_HAVE_ATOMIC_COMPARE_EXCHANGE_N_8 1 #define STARPU_HAVE_ATOMIC_EXCHANGE_N 1 #define STARPU_HAVE_ATOMIC_EXCHANGE_N_8 1 #define STARPU_HAVE_ATOMIC_FETCH_ADD 1 #define STARPU_HAVE_ATOMIC_FETCH_ADD_8 1 #define STARPU_HAVE_ATOMIC_FETCH_OR 1 #define STARPU_HAVE_ATOMIC_FETCH_OR_8 1 #define STARPU_HAVE_ATOMIC_TEST_AND_SET 1 #define STARPU_HAVE_BLAS 1 #define STARPU_HAVE_BUSID 1 #define STARPU_HAVE_CBLAS_H 1 #define STARPU_HAVE_CUDA_CANMAPHOST 1 #define STARPU_HAVE_CUDA_MEMCPY_PEER 1 #define STARPU_HAVE_CUDA_MNGMEM 1 #define STARPU_HAVE_CUDA_PAGEABLEMEM 1 #define STARPU_HAVE_CUDA_POINTER_TYPE 1 #define STARPU_HAVE_CUDA_UNIFIEDADDR 1 #define STARPU_HAVE_CUFFTDOUBLECOMPLEX 1 #define STARPU_HAVE_CURAND 1 #define STARPU_HAVE_CXX11 1 /* #undef STARPU_HAVE_DARWIN */ #define STARPU_HAVE_DOMAINID 1 /* #undef STARPU_HAVE_F77_H */ #define STARPU_HAVE_FC 1 #define STARPU_HAVE_FFTW 1 #define STARPU_HAVE_FFTWF 1 #define STARPU_HAVE_FFTWL 1 #define STARPU_HAVE_GLPK_H 1 /* #undef STARPU_HAVE_HDF5 */ #define STARPU_HAVE_HELGRIND_H 1 /* #undef STARPU_HAVE_HIP_MEMCPY_PEER */ #define STARPU_HAVE_HWLOC 1 /* #undef STARPU_HAVE_ICC */ /* #undef STARPU_HAVE_LEVELDB */ #define STARPU_HAVE_LIBCUBLASLT 1 #define STARPU_HAVE_LIBCUSOLVER 1 #define STARPU_HAVE_LIBCUSPARSE 1 #define STARPU_HAVE_LIBNUMA 1 /* #undef STARPU_HAVE_MAGMA */ #define STARPU_HAVE_MALLOC_H 1 #define STARPU_HAVE_MEMALIGN 1 #define STARPU_HAVE_MEMCHECK_H 1 #define STARPU_HAVE_MPI_COMM_CREATE_GROUP 1 #define STARPU_HAVE_MPI_EXT 1 /* #undef STARPU_HAVE_MPI_SYNC_CLOCKS */ /* #undef STARPU_HAVE_MSG_MSG_H */ #define STARPU_HAVE_NEARBYINTF 1 #define STARPU_HAVE_NVML_H 1 #define STARPU_HAVE_POSIX_MEMALIGN 1 /* #undef STARPU_HAVE_POTI */ #define STARPU_HAVE_PROGRAM_INVOCATION_SHORT_NAME 1 #define STARPU_HAVE_PTHREAD_BARRIER 1 #define STARPU_HAVE_PTHREAD_SETNAME_NP 1 #define STARPU_HAVE_PTHREAD_SPIN_LOCK 1 #define STARPU_HAVE_RINTF 1 /* #undef STARPU_HAVE_S4U_ON_TIME_ADVANCE_CB */ #define STARPU_HAVE_SCHED_YIELD 1 #define STARPU_HAVE_SETENV 1 /* #undef STARPU_HAVE_SIMGRID_ACTOR_H */ /* #undef STARPU_HAVE_SIMGRID_BARRIER_H */ /* #undef STARPU_HAVE_SIMGRID_COND_H */ /* #undef STARPU_HAVE_SIMGRID_ENGINE_H */ /* #undef STARPU_HAVE_SIMGRID_HOST_H */ /* #undef STARPU_HAVE_SIMGRID_LINK_H */ /* #undef STARPU_HAVE_SIMGRID_MSG_H */ /* #undef STARPU_HAVE_SIMGRID_MUTEX_H */ /* #undef STARPU_HAVE_SIMGRID_SEMAPHORE_H */ /* #undef STARPU_HAVE_SIMGRID_SIMDAG_H */ /* #undef STARPU_HAVE_SIMGRID_VERSION_H */ /* #undef STARPU_HAVE_SIMGRID_ZONE_H */ /* #undef STARPU_HAVE_SMX_ACTOR_T */ #define STARPU_HAVE_STATEMENT_EXPRESSIONS 1 #define STARPU_HAVE_STRERROR_R 1 #define STARPU_HAVE_STRUCT_TIMESPEC 1 #define STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP 1 #define STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8 1 #define STARPU_HAVE_SYNC_FETCH_AND_ADD 1 #define STARPU_HAVE_SYNC_FETCH_AND_ADD_8 1 #define STARPU_HAVE_SYNC_FETCH_AND_OR 1 #define STARPU_HAVE_SYNC_FETCH_AND_OR_8 1 #define STARPU_HAVE_SYNC_LOCK_TEST_AND_SET 1 #define STARPU_HAVE_SYNC_SYNCHRONIZE 1 #define STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP 1 #define STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP_8 1 #define STARPU_HAVE_UNISTD_H 1 #define STARPU_HAVE_UNSETENV 1 #define STARPU_HAVE_VALGRIND_H 1 /* #undef STARPU_HAVE_WINDOWS */ #define STARPU_HAVE_X11 1 /* #undef STARPU_HAVE_XBT_BASE_H */ /* #undef STARPU_HAVE_XBT_CONFIG_H */ /* #undef STARPU_HAVE_XBT_SYNCHRO_H */ #define STARPU_HISTORYMAXERROR 50 #define STARPU_LINUX_SYS 1 /* #undef STARPU_LONG_CHECK */ #define STARPU_MAJOR_VERSION 1 #define STARPU_MAXCPUS 128 #define STARPU_MAXCUDADEVS 4 #define STARPU_MAXHIPDEVS 8 #define STARPU_MAXIMPLEMENTATIONS 4 #define STARPU_MAXMAXFPGADEVS 12 #define STARPU_MAXMPIDEVS 0 #define STARPU_MAXNODES 16 #define STARPU_MAXNUMANODES 2 #define STARPU_MAXOPENCLDEVS 8 #define STARPU_MAXTCPIPDEVS 0 /* #undef STARPU_MEMORY_STATS */ #define STARPU_MINOR_VERSION 4 /* #undef STARPU_MKL */ /* #undef STARPU_MLR_MODEL */ /* #undef STARPU_MODEL_DEBUG */ /* #undef STARPU_MPI_EXTRA_VERBOSE */ /* #undef STARPU_MPI_PEDANTIC_ISEND */ /* #undef STARPU_MPI_VERBOSE */ /* #undef STARPU_NATIVE_WINTHREADS */ /* #undef STARPU_NEW_CHECK */ #define STARPU_NMAXBUFS 8 #define STARPU_NMAXDEVS 8 #define STARPU_NMAXWORKERS 160 #define STARPU_NMAX_COMBINEDWORKERS 128 #define STARPU_NMAX_SCHED_CTXS 10 #define STARPU_NON_BLOCKING_DRIVERS 1 /* #undef STARPU_NO_ASSERT */ /* #undef STARPU_OPENBLAS */ /* #undef STARPU_OPENBSD_SYS */ /* #undef STARPU_OPENCL_SIMULATOR */ /* #undef STARPU_OPENGL_RENDER */ #define STARPU_OPENMP 1 /* #undef STARPU_OPENMP_LLVM */ /* #undef STARPU_PAPI */ #define STARPU_PARALLEL_WORKER 1 /* #undef STARPU_PERF_DEBUG */ /* #undef STARPU_PERF_MODEL_DIR */ #define STARPU_PROF_TOOL 1 #define STARPU_PTHREAD_COND_INITIALIZER_ZERO 1 #define STARPU_PTHREAD_MUTEX_INITIALIZER_ZERO 1 #define STARPU_PTHREAD_RWLOCK_INITIALIZER_ZERO 1 #define STARPU_PYTHON_HAVE_CLOUDPICKLE 1 #define STARPU_PYTHON_HAVE_JOBLIB 1 #define STARPU_PYTHON_HAVE_NUMPY 1 #define STARPU_QUICK_CHECK 1 #define STARPU_RELEASE_VERSION 9 /* #undef STARPU_SC_HYPERVISOR_DEBUG */ /* #undef STARPU_SIMGRID */ /* #undef STARPU_SIMGRID_HAVE_SIMGRID_INIT */ /* #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT */ /* #undef STARPU_SIMGRID_MC */ #define STARPU_SPINLOCK_CHECK 1 /* #undef STARPU_STATIC_ONLY */ #define STARPU_SYSTEM_BLAS 1 #define STARPU_USE_ALLOCATION_CACHE 1 /* #undef STARPU_USE_AYUDAME1 */ /* #undef STARPU_USE_AYUDAME2 */ #define STARPU_USE_CPU 1 #define STARPU_USE_CUDA 1 /* #undef STARPU_USE_CUDA0 */ /* #undef STARPU_USE_CUDA1 */ #define STARPU_USE_CUDA_MAP 1 #define STARPU_USE_DRAND48 1 #define STARPU_USE_ERAND48_R 1 #define STARPU_USE_FXT 1 /* #undef STARPU_USE_HIP */ /* #undef STARPU_USE_HIPBLAS */ /* #undef STARPU_USE_MAX_FPGA */ /* #undef STARPU_USE_MP */ #define STARPU_USE_MPI 1 /* #undef STARPU_USE_MPI_FT */ /* #undef STARPU_USE_MPI_FT_STATS */ /* #undef STARPU_USE_MPI_MASTER_SLAVE */ #define STARPU_USE_MPI_MPI 1 /* #undef STARPU_USE_MPI_NMAD */ #define STARPU_USE_OPENCL 1 /* #undef STARPU_USE_SC_HYPERVISOR */ /* #undef STARPU_USE_TCPIP_MASTER_SLAVE */ /* #undef STARPU_VALGRIND_FULL */ #define STARPU_VERBOSE 1 /* #undef STARPU_WORKER_CALLBACKS */ starpu-1.4.9+dfsg/tools/starpu_config.in000066400000000000000000000043611507764646700203470ustar00rootroot00000000000000#!@REALBASH@ # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROGNAME=$0 set -e usage() { echo "Show the configuration used by StarPU." echo "" echo "Usage: $PROGNAME " echo "" echo "" echo " The starpu_config utility shows all the configuration parameters used when installing StarPU" echo "" echo "Options:" echo " -h, --help display this help and exit" echo " -v, --version output version information and exit" echo " -d only shows define parameters" echo " -u only shows undefined parameters" echo "" echo " if parameters are given, only configuration parameters with the given name are displayed" echo "" echo "Report bugs to <@PACKAGE_BUGREPORT@>" exit 0 } if [ "$1" = "-v" ] || [ "$1" = "--version" ] ; then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then usage fi prefix=$(realpath @prefix@) if test -d $prefix then starpu_datarootdir=$(realpath @datarootdir@) config_file="$starpu_datarootdir/starpu/starpu_config.cfg" if test ! -f "$config_file" then config_file=$(realpath $(dirname $0))/starpu_config.cfg fi else config_file=$(realpath $(dirname $0))/starpu_config.cfg fi if test ! -f "$config_file" then echo "Configuration file unavailable" exit 1 fi echo "processing $config_file" if test "$1" == "-d" then grep 'define' $config_file elif test "$1" == "-u" then grep 'undef' $config_file elif test "$1" then for x in $* do grep $x $config_file done else sort $config_file fi starpu-1.4.9+dfsg/tools/starpu_env000077500000000000000000000037341507764646700172730ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROGNAME=starpu_env usage() { echo "Tool to set StarPU environment variables" echo "" echo "Usage: source $PROGNAME" echo "" echo "" echo "Options:" echo " -h, --help display this help and exit" echo " -v, --version output version information and exit" echo "" echo "Report bugs to " } if [ "$1" = "-v" ] || [ "$1" = "--version" ] then echo "$PROGNAME (StarPU) 1.4.9" elif [ "$1" = "-h" ] || [ "$1" = "--help" ] then usage else prefix=$(realpath /usr/local) exec_prefix=$(realpath ${prefix}) starpu_bindir=$(realpath ${exec_prefix}/bin) starpu_libdir=$(realpath ${exec_prefix}/lib) starpu_datarootdir=$(realpath ${prefix}/share) if test -f $starpu_bindir/starpu_machine_display -a -f $starpu_libdir/pkgconfig/libstarpu.pc then echo "Setting StarPU environment for $prefix" export STARPU_ROOT=$prefix export PKG_CONFIG_PATH=$starpu_libdir/pkgconfig:$PKG_CONFIG_PATH export LD_LIBRARY_PATH=$starpu_libdir:$LD_LIBRARY_PATH export PATH=$starpu_bindir:$PATH export MANPATH=$starpu_datarootdir/man:$MANPATH if [ -n "python3" ] then for d in $starpu_libdir/python3*/site-packages ; do export PYTHONPATH=$d:$PYTHONPATH ; done fi else echo "[Error] $prefix is not a valid StarPU installation directory" fi fi starpu-1.4.9+dfsg/tools/starpu_env.1000066400000000000000000000007501507764646700174220ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_ENV "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_env \- Set StarPU environment variables .SH SYNOPSIS .B source \fI\,starpu_env\/\fR .SH DESCRIPTION Tool to set StarPU environment variables .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .SH "REPORTING BUGS" Report bugs to starpu-1.4.9+dfsg/tools/starpu_env.in000077500000000000000000000037341507764646700177000ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # PROGNAME=starpu_env usage() { echo "Tool to set StarPU environment variables" echo "" echo "Usage: source $PROGNAME" echo "" echo "" echo "Options:" echo " -h, --help display this help and exit" echo " -v, --version output version information and exit" echo "" echo "Report bugs to <@PACKAGE_BUGREPORT@>" } if [ "$1" = "-v" ] || [ "$1" = "--version" ] then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" elif [ "$1" = "-h" ] || [ "$1" = "--help" ] then usage else prefix=$(realpath @prefix@) exec_prefix=$(realpath @exec_prefix@) starpu_bindir=$(realpath @bindir@) starpu_libdir=$(realpath @libdir@) starpu_datarootdir=$(realpath @datarootdir@) if test -f $starpu_bindir/starpu_machine_display -a -f $starpu_libdir/pkgconfig/libstarpu.pc then echo "Setting StarPU environment for $prefix" export STARPU_ROOT=$prefix export PKG_CONFIG_PATH=$starpu_libdir/pkgconfig:$PKG_CONFIG_PATH export LD_LIBRARY_PATH=$starpu_libdir:$LD_LIBRARY_PATH export PATH=$starpu_bindir:$PATH export MANPATH=$starpu_datarootdir/man:$MANPATH if [ -n "@PYTHON@" ] then for d in $starpu_libdir/@PYTHON@*/site-packages ; do export PYTHONPATH=$d:$PYTHONPATH ; done fi else echo "[Error] $prefix is not a valid StarPU installation directory" fi fi starpu-1.4.9+dfsg/tools/starpu_fxt_data_trace.1000066400000000000000000000015141507764646700216010ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_FXT_DATA_TRACE "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_fxt_data_trace \- Print data trace from raw StarPU FxT trace .SH SYNOPSIS .B starpu_fxt_data_trace [ \fI\,options \/\fR] \fI\, \/\fR[\fI\, \/\fR.... \fI\,\/\fR] .SH DESCRIPTION Get statistics about tasks lengths and data size .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .HP \fB\-d\fR directory where to save output files (by default current directory) .TP filename specify the FxT trace input file. .TP codeletX specify the codelet name to profile (by default, all codelets are profiled) .SH "REPORTING BUGS" Report bugs to . starpu-1.4.9+dfsg/tools/starpu_fxt_data_trace.c000066400000000000000000000116571507764646700216740ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013 Joris Pablo * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #define PROGNAME "starpu_fxt_data_trace" #define MAX_LINE_SIZE 100 static void usage() { fprintf(stderr, "Get statistics about tasks lengths and data size\n\n"); fprintf(stderr, "Usage: %s [ options ] [ .... ]\n", PROGNAME); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -h, --help display this help and exit\n"); fprintf(stderr, " -v, --version output version information and exit\n\n"); fprintf(stderr, " -d directory where to save output files (by default current directory)\n"); fprintf(stderr, " filename specify the FxT trace input file.\n"); fprintf(stderr, " codeletX specify the codelet name to profile (by default, all codelets are profiled)\n"); fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); fprintf(stderr, "\n"); } static int parse_args(int argc, char **argv, int *pos, char **directory) { int i; if(argc < 2) { fprintf(stderr, "Incorrect usage, aborting\n"); usage(); return 77; } for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { usage(); exit(EXIT_FAILURE); } if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) { fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); exit(EXIT_FAILURE); } if (strcmp(argv[i], "-d") == 0) { free(*directory); *directory = strdup(argv[++i]); *pos += 2; continue; } } return 0; } static void write_gp(char *dir, int argc, char **argv) { char codelet_filename[256]; snprintf(codelet_filename, sizeof(codelet_filename), "%s/codelet_list", dir); FILE *codelet_list = fopen(codelet_filename, "r"); if(!codelet_list) { STARPU_ABORT_MSG("Failed to open '%s' (err %s)", codelet_filename, strerror(errno)); exit(-1); } char codelet_name[MAX_LINE_SIZE]; char file_name[256]; snprintf(file_name, sizeof(file_name), "%s/data_trace.gp", dir); FILE *plt = fopen(file_name, "w+"); if(!plt) { STARPU_ABORT_MSG("Failed to open '%s' (err %s)", file_name, strerror(errno)); exit(-1); } fprintf(plt, "#!/usr/bin/gnuplot -persist\n\n"); fprintf(plt, "set term postscript eps enhanced color\n"); fprintf(plt, "set output \"%s/data_trace.eps\"\n", dir); fprintf(plt, "set title \"Data trace\"\n"); fprintf(plt, "set logscale x\n"); fprintf(plt, "set logscale y\n"); fprintf(plt, "set xlabel \"data size (B)\"\n"); fprintf(plt, "set ylabel \"tasks size (ms)\"\n"); fprintf(plt, "plot "); int c_iter; char *v_iter; int begin = 1; while(fgets(codelet_name, MAX_LINE_SIZE, codelet_list) != NULL) { if(argc == 0) { if(begin) begin = 0; else fprintf(plt, ", "); } int size = strlen(codelet_name); if(size > 0) codelet_name[size-1] = '\0'; if(argc != 0) { for(c_iter = 0, v_iter = argv[c_iter]; c_iter < argc; c_iter++, v_iter = argv[c_iter]) { if(!strcmp(v_iter, codelet_name)) { if(begin) begin = 0; else fprintf(plt, ", "); fprintf(plt, "\"%s\" using 2:1 with dots lw 1 title \"%s\"", codelet_name, codelet_name); } } } else { fprintf(plt, "\"%s/%s\" using 2:1 with dots lw 1 title \"%s\"", dir, codelet_name, codelet_name); } } fprintf(plt, "\n"); if(fclose(codelet_list)) { perror("close failed :"); exit(-1); } if(fclose(plt)) { perror("close failed :"); exit(-1); } struct stat sb; int ret = stat(file_name, &sb); if (ret) { perror("stat"); STARPU_ABORT(); } /* Make the gnuplot script executable for the owner */ ret = chmod(file_name, sb.st_mode|S_IXUSR #ifdef S_IXGRP |S_IXGRP #endif #ifdef S_IXOTH |S_IXOTH #endif ); if (ret) { perror("chmod"); STARPU_ABORT(); } fprintf(stdout, "Gnuplot file <%s/data_trace.gp> has been successfully created.\n", dir); } int main(int argc, char **argv) { char *directory = strdup("."); int pos=0; int ret = parse_args(argc, argv, &pos, &directory); if (ret) { free(directory); return ret; } starpu_fxt_write_data_trace_in_dir(argv[1+pos], directory); write_gp(directory, argc - (2 + pos), argv + 2 + pos); starpu_perfmodel_free_sampling(); free(directory); return 0; } starpu-1.4.9+dfsg/tools/starpu_fxt_number_events_to_names.1000066400000000000000000000011271507764646700242530ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_FXT_NUMBER_EVENTS_TO_NAMES.PY "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_fxt_number_events_to_names.py \- Convert events in StarPU traces .SH SYNOPSIS .B starpu_fxt_number_events_to_names.py \fI\,\/\fR .SH DESCRIPTION Convert event keys in number_events.data to event names .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .SH "REPORTING BUGS" Report bugs to starpu-1.4.9+dfsg/tools/starpu_fxt_number_events_to_names.py000077500000000000000000000237721507764646700245600ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import sys """ Convert event keys into event names Running starpu_fxt_tool with the option -number-events produces a file number_events.data This file contains the number of events for each event type. Events are represented with their key. To convert event keys to event names, call starpu_fxt_number_events_to_names.py """ # STARPU_FXT_EVENT_DEFINES is generated by configure and is the output of # the following command: # grep -E "#define\s+_STARPU_(MPI_)?FUT_" src/common/fxt.h mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 |cut -d : -f 2 fxt_codes_raw = """ #define _STARPU_FUT_WORKER_INIT_START 0x5100 #define _STARPU_FUT_WORKER_INIT_END 0x5101 #define _STARPU_FUT_START_CODELET_BODY 0x5102 #define _STARPU_FUT_END_CODELET_BODY 0x5103 #define _STARPU_FUT_JOB_PUSH 0x5104 #define _STARPU_FUT_JOB_POP 0x5105 #define _STARPU_FUT_UPDATE_TASK_CNT 0x5106 #define _STARPU_FUT_START_FETCH_INPUT_ON_TID 0x5107 #define _STARPU_FUT_END_FETCH_INPUT_ON_TID 0x5108 #define _STARPU_FUT_START_PUSH_OUTPUT_ON_TID 0x5109 #define _STARPU_FUT_END_PUSH_OUTPUT_ON_TID 0x5110 #define _STARPU_FUT_TAG 0x5111 #define _STARPU_FUT_TAG_DEPS 0x5112 #define _STARPU_FUT_TASK_DEPS 0x5113 #define _STARPU_FUT_DATA_COPY 0x5114 #define _STARPU_FUT_WORK_STEALING 0x5115 #define _STARPU_FUT_WORKER_DEINIT_START 0x5116 #define _STARPU_FUT_WORKER_DEINIT_END 0x5117 #define _STARPU_FUT_WORKER_SLEEP_START 0x5118 #define _STARPU_FUT_WORKER_SLEEP_END 0x5119 #define _STARPU_FUT_TASK_SUBMIT 0x511a #define _STARPU_FUT_CODELET_DATA_HANDLE 0x511b #define _STARPU_FUT_MODEL_NAME 0x511c #define _STARPU_FUT_DATA_NAME 0x511d #define _STARPU_FUT_DATA_COORDINATES 0x511e #define _STARPU_FUT_HANDLE_DATA_UNREGISTER 0x511f #define _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS 0x5120 #define _STARPU_FUT_NEW_MEM_NODE 0x5122 #define _STARPU_FUT_START_CALLBACK 0x5123 #define _STARPU_FUT_END_CALLBACK 0x5124 #define _STARPU_FUT_TASK_DONE 0x5125 #define _STARPU_FUT_TAG_DONE 0x5126 #define _STARPU_FUT_START_ALLOC 0x5127 #define _STARPU_FUT_END_ALLOC 0x5128 #define _STARPU_FUT_START_ALLOC_REUSE 0x5129 #define _STARPU_FUT_END_ALLOC_REUSE 0x5130 #define _STARPU_FUT_USED_MEM 0x512a #define _STARPU_FUT_TASK_NAME 0x512b #define _STARPU_FUT_DATA_WONT_USE 0x512c #define _STARPU_FUT_TASK_COLOR 0x512d #define _STARPU_FUT_DATA_DOING_WONT_USE 0x512e #define _STARPU_FUT_TASK_LINE 0x512f #define _STARPU_FUT_START_MEMRECLAIM 0x5131 #define _STARPU_FUT_END_MEMRECLAIM 0x5132 #define _STARPU_FUT_START_DRIVER_COPY 0x5133 #define _STARPU_FUT_END_DRIVER_COPY 0x5134 #define _STARPU_FUT_START_DRIVER_COPY_ASYNC 0x5135 #define _STARPU_FUT_END_DRIVER_COPY_ASYNC 0x5136 #define _STARPU_FUT_START_PROGRESS_ON_TID 0x5137 #define _STARPU_FUT_END_PROGRESS_ON_TID 0x5138 #define _STARPU_FUT_USER_EVENT 0x5139 #define _STARPU_FUT_SET_PROFILING 0x513a #define _STARPU_FUT_TASK_WAIT_FOR_ALL 0x513b #define _STARPU_FUT_EVENT 0x513c #define _STARPU_FUT_THREAD_EVENT 0x513d #define _STARPU_FUT_CODELET_DETAILS 0x513e #define _STARPU_FUT_CODELET_DATA 0x513f #define _STARPU_FUT_LOCKING_MUTEX 0x5140 #define _STARPU_FUT_MUTEX_LOCKED 0x5141 #define _STARPU_FUT_UNLOCKING_MUTEX 0x5142 #define _STARPU_FUT_MUTEX_UNLOCKED 0x5143 #define _STARPU_FUT_TRYLOCK_MUTEX 0x5144 #define _STARPU_FUT_RDLOCKING_RWLOCK 0x5145 #define _STARPU_FUT_RWLOCK_RDLOCKED 0x5146 #define _STARPU_FUT_WRLOCKING_RWLOCK 0x5147 #define _STARPU_FUT_RWLOCK_WRLOCKED 0x5148 #define _STARPU_FUT_UNLOCKING_RWLOCK 0x5149 #define _STARPU_FUT_RWLOCK_UNLOCKED 0x514a #define _STARPU_FUT_LOCKING_SPINLOCK 0x514b #define _STARPU_FUT_SPINLOCK_LOCKED 0x514c #define _STARPU_FUT_UNLOCKING_SPINLOCK 0x514d #define _STARPU_FUT_SPINLOCK_UNLOCKED 0x514e #define _STARPU_FUT_TRYLOCK_SPINLOCK 0x514f #define _STARPU_FUT_COND_WAIT_BEGIN 0x5150 #define _STARPU_FUT_COND_WAIT_END 0x5151 #define _STARPU_FUT_MEMORY_FULL 0x5152 #define _STARPU_FUT_DATA_LOAD 0x5153 #define _STARPU_FUT_START_UNPARTITION_ON_TID 0x5154 #define _STARPU_FUT_END_UNPARTITION_ON_TID 0x5155 #define _STARPU_FUT_START_FREE 0x5156 #define _STARPU_FUT_END_FREE 0x5157 #define _STARPU_FUT_START_WRITEBACK 0x5158 #define _STARPU_FUT_END_WRITEBACK 0x5159 #define _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO 0x515a #define _STARPU_FUT_SCHED_COMPONENT_POP_PRIO 0x515b #define _STARPU_FUT_START_WRITEBACK_ASYNC 0x515c #define _STARPU_FUT_END_WRITEBACK_ASYNC 0x515d #define _STARPU_FUT_HYPERVISOR_BEGIN 0x5160 #define _STARPU_FUT_HYPERVISOR_END 0x5161 #define _STARPU_FUT_BARRIER_WAIT_BEGIN 0x5162 #define _STARPU_FUT_BARRIER_WAIT_END 0x5163 #define _STARPU_FUT_WORKER_SCHEDULING_START 0x5164 #define _STARPU_FUT_WORKER_SCHEDULING_END 0x5165 #define _STARPU_FUT_WORKER_SCHEDULING_PUSH 0x5166 #define _STARPU_FUT_WORKER_SCHEDULING_POP 0x5167 #define _STARPU_FUT_START_EXECUTING 0x5168 #define _STARPU_FUT_END_EXECUTING 0x5169 #define _STARPU_FUT_SCHED_COMPONENT_NEW 0x516a #define _STARPU_FUT_SCHED_COMPONENT_CONNECT 0x516b #define _STARPU_FUT_SCHED_COMPONENT_PUSH 0x516c #define _STARPU_FUT_SCHED_COMPONENT_PULL 0x516d #define _STARPU_FUT_TASK_SUBMIT_START 0x516e #define _STARPU_FUT_TASK_SUBMIT_END 0x516f #define _STARPU_FUT_TASK_BUILD_START 0x5170 #define _STARPU_FUT_TASK_BUILD_END 0x5171 #define _STARPU_FUT_TASK_MPI_DECODE_START 0x5172 #define _STARPU_FUT_TASK_MPI_DECODE_END 0x5173 #define _STARPU_FUT_TASK_MPI_PRE_START 0x5174 #define _STARPU_FUT_TASK_MPI_PRE_END 0x5175 #define _STARPU_FUT_TASK_MPI_POST_START 0x5176 #define _STARPU_FUT_TASK_MPI_POST_END 0x5177 #define _STARPU_FUT_TASK_WAIT_START 0x5178 #define _STARPU_FUT_TASK_WAIT_END 0x5179 #define _STARPU_FUT_TASK_WAIT_FOR_ALL_START 0x517a #define _STARPU_FUT_TASK_WAIT_FOR_ALL_END 0x517b #define _STARPU_FUT_HANDLE_DATA_REGISTER 0x517c #define _STARPU_FUT_START_FETCH_INPUT 0x517e #define _STARPU_FUT_END_FETCH_INPUT 0x517f #define _STARPU_FUT_TASK_THROTTLE_START 0x5180 #define _STARPU_FUT_TASK_THROTTLE_END 0x5181 #define _STARPU_FUT_DATA_STATE_INVALID 0x5182 #define _STARPU_FUT_DATA_STATE_OWNER 0x5183 #define _STARPU_FUT_DATA_STATE_SHARED 0x5184 #define _STARPU_FUT_DATA_REQUEST_CREATED 0x5185 #define _STARPU_FUT_PAPI_TASK_EVENT_VALUE 0x5186 #define _STARPU_FUT_TASK_EXCLUDE_FROM_DAG 0x5187 #define _STARPU_FUT_TASK_END_DEP 0x5188 #define _STARPU_FUT_TASK_BUBBLE 0x5189 #define _STARPU_FUT_START_PARALLEL_SYNC 0x518a #define _STARPU_FUT_END_PARALLEL_SYNC 0x518b #define _STARPU_MPI_FUT_START 0x5201 #define _STARPU_MPI_FUT_STOP 0x5202 #define _STARPU_MPI_FUT_BARRIER 0x5203 #define _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN 0x5204 #define _STARPU_MPI_FUT_ISEND_SUBMIT_END 0x5205 #define _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN 0x5206 #define _STARPU_MPI_FUT_IRECV_SUBMIT_END 0x5207 #define _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN 0x5208 #define _STARPU_MPI_FUT_ISEND_COMPLETE_END 0x5209 #define _STARPU_MPI_FUT_DATA_SET_RANK 0x521a #define _STARPU_MPI_FUT_IRECV_TERMINATED 0x521b #define _STARPU_MPI_FUT_ISEND_TERMINATED 0x521c #define _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN 0x521d #define _STARPU_MPI_FUT_TESTING_DETACHED_END 0x521e #define _STARPU_MPI_FUT_TEST_BEGIN 0x521f #define _STARPU_MPI_FUT_TEST_END 0x5220 #define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN 0x520a #define _STARPU_MPI_FUT_IRECV_COMPLETE_END 0x520b #define _STARPU_MPI_FUT_SLEEP_BEGIN 0x520c #define _STARPU_MPI_FUT_SLEEP_END 0x520d #define _STARPU_MPI_FUT_DTESTING_BEGIN 0x520e #define _STARPU_MPI_FUT_DTESTING_END 0x520f #define _STARPU_MPI_FUT_UTESTING_BEGIN 0x5210 #define _STARPU_MPI_FUT_UTESTING_END 0x5211 #define _STARPU_MPI_FUT_UWAIT_BEGIN 0x5212 #define _STARPU_MPI_FUT_UWAIT_END 0x5213 #define _STARPU_MPI_FUT_POLLING_BEGIN 0x5214 #define _STARPU_MPI_FUT_POLLING_END 0x5215 #define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN 0x5216 #define _STARPU_MPI_FUT_DRIVER_RUN_END 0x5217 #define _STARPU_MPI_FUT_DATA_SET_TAG 0x5218 #define _STARPU_MPI_FUT_IRECV_NUMA_NODE 0x5219 #define _STARPU_MPI_FUT_ISEND_NUMA_NODE 0x5221 #define _STARPU_MPI_FUT_CHECKPOINT_BEGIN 0x5222 #define _STARPU_MPI_FUT_CHECKPOINT_END 0x5223 """ PROGNAME=sys.argv[0] number_events_path = None def usage(): print("Convert event keys in number_events.data to event names") print("") print("Usage: %s " % PROGNAME) print("") print("Options:") print(" -h, --help display this help and exit") print(" -v, --version output version information and exit") print("") print("Report bugs to ") sys.exit(1) if len(sys.argv) == 2: if sys.argv[1] == '-v' or sys.argv[1] == '--version': print("%s (StarPU) 1.4.9" % PROGNAME) sys.exit(0) elif sys.argv[1] == '-h' or sys.argv[1] == '--help': usage() else: number_events_path = sys.argv[1] else: usage() # Process fxt_code_raw content to ease the conversion: fxt_codes = dict() for line in fxt_codes_raw.split("\n"): elements = line.split() if len(elements) == 3: key = int(elements[2][2:], 16) assert key not in fxt_codes fxt_codes[key] = elements[1] # Convert content of the file: nb_events = 0 with open(number_events_path, 'r') as f: for line in f: elements = line.split() if len(elements) == 2: key = int(elements[0][2:], 16) nb = int(elements[1]) nb_events += nb if key in fxt_codes: print("%12d %s" % (nb, fxt_codes[key])) else: print("%12d %s" % (nb, elements[0])) print(" TOTAL: %d" % nb_events) starpu-1.4.9+dfsg/tools/starpu_fxt_number_events_to_names.py.in000066400000000000000000000054411507764646700251530ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2020-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # import sys """ Convert event keys into event names Running starpu_fxt_tool with the option -number-events produces a file number_events.data This file contains the number of events for each event type. Events are represented with their key. To convert event keys to event names, call starpu_fxt_number_events_to_names.py """ # STARPU_FXT_EVENT_DEFINES is generated by configure and is the output of # the following command: # grep -E "#define\s+_STARPU_(MPI_)?FUT_" src/common/fxt.h mpi/src/starpu_mpi_fxt.h | grep 0x | grep -v 0x1 |cut -d : -f 2 fxt_codes_raw = """ @STARPU_FXT_EVENT_DEFINES@ """ PROGNAME=sys.argv[0] number_events_path = None def usage(): print("Convert event keys in number_events.data to event names") print("") print("Usage: %s " % PROGNAME) print("") print("Options:") print(" -h, --help display this help and exit") print(" -v, --version output version information and exit") print("") print("Report bugs to <@PACKAGE_BUGREPORT@>") sys.exit(1) if len(sys.argv) == 2: if sys.argv[1] == '-v' or sys.argv[1] == '--version': print("%s (@PACKAGE_NAME@) @PACKAGE_VERSION@" % PROGNAME) sys.exit(0) elif sys.argv[1] == '-h' or sys.argv[1] == '--help': usage() else: number_events_path = sys.argv[1] else: usage() # Process fxt_code_raw content to ease the conversion: fxt_codes = dict() for line in fxt_codes_raw.split("\n"): elements = line.split() if len(elements) == 3: key = int(elements[2][2:], 16) assert key not in fxt_codes fxt_codes[key] = elements[1] # Convert content of the file: nb_events = 0 with open(number_events_path, 'r') as f: for line in f: elements = line.split() if len(elements) == 2: key = int(elements[0][2:], 16) nb = int(elements[1]) nb_events += nb if key in fxt_codes: print("%12d %s" % (nb, fxt_codes[key])) else: print("%12d %s" % (nb, elements[0])) print(" TOTAL: %d" % nb_events) starpu-1.4.9+dfsg/tools/starpu_fxt_stats.1000066400000000000000000000012361507764646700206510ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_FXT_STAT "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_fxt_stat \- Print statistics from raw StarPU FxT trace .SH SYNOPSIS .B starpu_fxt_stat [ \fI\,options \/\fR] .SH DESCRIPTION Parse the log generated by FxT .SH OPTIONS .TP \fB\-i\fR specify the input file. .TP \fB\-o\fR specify the output file .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .SH "REPORTING BUGS" Report bugs to . open failed :: Bad address .PP open failed :: Bad address starpu-1.4.9+dfsg/tools/starpu_fxt_stats.c000066400000000000000000000106231507764646700207330ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ //#include "fxt_tool.h" #include #include #include #include #include #include #include #include #include #include #include static fxt_t fut; struct fxt_ev_64 ev; static uint64_t transfers[16][16]; #define PROGNAME "starpu_fxt_stat" static void usage() { fprintf(stderr, "Parse the log generated by FxT\n\n"); fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -i specify the input file.\n"); fprintf(stderr, " -o specify the output file\n"); fprintf(stderr, " -h, --help display this help and exit\n"); fprintf(stderr, " -v, --version output version information and exit\n\n"); fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); fprintf(stderr, "\n"); } static int parse_args(int argc, char **argv, char **fin, char **fout) { int i; *fin = NULL; *fout = NULL; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-o") == 0) { *fout = argv[++i]; continue; } if (strcmp(argv[i], "-i") == 0) { *fin = argv[++i]; continue; } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { usage(); return EXIT_SUCCESS; } if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) { fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); return EXIT_SUCCESS; } } if (!*fin) { fprintf(stderr, "Incorrect usage, aborting\n"); usage(); return 77; } return 0; } static void handle_data_copy(void) { unsigned src = ev.param[0]; unsigned dst = ev.param[1]; unsigned size = ev.param[2]; transfers[src][dst] += size; // printf("transfer %d -> %d : %d \n", src, dst, size); } /* * This program should be used to parse the log generated by FxT */ int main(int argc, char **argv) { char *fin, *fout; int ret; int fd_in; FILE *fd_out; ret = parse_args(argc, argv, &fin, &fout); if (ret) return ret; fd_in = open(fin, O_RDONLY); if (fd_in < 0) { perror("open failed :"); exit(-1); } fut = fxt_fdopen(fd_in); if (!fut) { perror("fxt_fdopen :"); exit(-1); } if (!fout) { fd_out = stdout; } else { fd_out = fopen(fout, "w"); if (fd_out == NULL) { perror("open failed :"); exit(-1); } } fxt_blockev_t block; block = fxt_blockev_enter(fut); unsigned njob = 0; unsigned nws = 0; double start_time = 10e30; double end_time = -10e30; while(1) { ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev); if (ret != FXT_EV_OK) { fprintf(stderr, "no more block ...\n"); break; } end_time = STARPU_MAX(end_time, ev.time); start_time = STARPU_MIN(start_time, ev.time); STARPU_ATTRIBUTE_UNUSED int nbparam = ev.nb_params; switch (ev.code) { case _STARPU_FUT_DATA_COPY: handle_data_copy(); break; case _STARPU_FUT_JOB_POP: njob++; break; case _STARPU_FUT_WORK_STEALING: nws++; break; default: break; } } #ifdef HAVE_FXT_BLOCKEV_LEAVE fxt_blockev_leave(block); #endif #ifdef HAVE_FXT_CLOSE fxt_close(fut); #else if (close(fd_in)) { perror("close failed :"); exit(-1); } #endif fprintf(fd_out, "Start : start time %e end time %e length %e\n", start_time, end_time, end_time - start_time); unsigned src, dst; for (src = 0; src < 16; src++) { for (dst = 0; dst < 16; dst++) { if (transfers[src][dst] != 0) { fprintf(fd_out, "%u -> %u \t %lu MB\n", src, dst, (unsigned long)(transfers[src][dst]/(1024*1024))); } } } fprintf(fd_out, "There was %u tasks and %u work stealing\n", njob, nws); if (fd_out != stdout) fclose(fd_out); return 0; } starpu-1.4.9+dfsg/tools/starpu_fxt_tool.1000066400000000000000000000027471507764646700205000ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_FXT_TOOL "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_fxt_tool \- Convert raw StarPU FxT trace to various traces .SH SYNOPSIS .B starpu_fxt_tool [ \fI\,options \/\fR] .SH DESCRIPTION Generate a trace in the Paje format .SH OPTIONS .TP \fB\-i\fR specify the input file[s]. Several files can be provided, or the option specified several times for MPI execution case .TP \fB\-o\fR specify the paje output filename .TP \fB\-d\fR specify the directory in which to save files .TP \fB\-c\fR use a different colour for every type of task .TP \fB\-no\-events\fR do not show events .TP \fB\-no\-counter\fR do not show scheduler counters .TP \fB\-no\-bus\fR do not show PCI bus transfers .TP \fB\-no\-flops\fR do not show flops .TP \fB\-no\-smooth\fR avoid smoothing values for gflops etc. .TP \fB\-no\-acquire\fR do not show application data acquisitions tasks in DAG .TP \fB\-label\-deps\fR add label on dependencies. .TP \fB\-memory\-states\fR show detailed memory states of handles .TP \fB\-internal\fR show StarPU\-internal tasks in DAG .TP \fB\-number\-events\fR generate a file counting FxT events by type .TP \fB\-use\-task\-color\fR propagate the specified task color to the contexts .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .SH "REPORTING BUGS" Report bugs to . starpu-1.4.9+dfsg/tools/starpu_fxt_tool.c000066400000000000000000000114271507764646700205550ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2008-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2020,2021 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This program should be used to parse the log generated by FxT */ #include #include #include #define PROGNAME "starpu_fxt_tool" static void usage() { fprintf(stderr, "Generate a trace in the Paje format\n\n"); fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -i specify the input file[s]. Several files can be provided,\n"); fprintf(stderr, " or the option specified several times for MPI execution\n"); fprintf(stderr, " case\n"); fprintf(stderr, " -o specify the paje output filename\n"); fprintf(stderr, " -d specify the directory in which to save files\n"); fprintf(stderr, " -c use a different colour for every type of task\n"); fprintf(stderr, " -no-events do not show events\n"); fprintf(stderr, " -no-counter do not show scheduler counters\n"); fprintf(stderr, " -no-bus do not show PCI bus transfers\n"); fprintf(stderr, " -no-flops do not show flops\n"); fprintf(stderr, " -no-smooth avoid smoothing values for gflops etc.\n"); fprintf(stderr, " -no-acquire do not show application data acquisitions tasks in DAG\n"); fprintf(stderr, " -label-deps add label on dependencies.\n"); fprintf(stderr, " -memory-states show detailed memory states of handles\n"); fprintf(stderr, " -internal show StarPU-internal tasks in DAG\n"); fprintf(stderr, " -number-events generate a file counting FxT events by type\n"); fprintf(stderr, " -use-task-color propagate the specified task color to the contexts\n"); fprintf(stderr, " -h, --help display this help and exit\n"); fprintf(stderr, " -v, --version output version information and exit\n\n"); fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); fprintf(stderr, "\n"); } static struct starpu_fxt_options options; static int parse_args(int argc, char **argv) { /* Default options */ starpu_fxt_options_init(&options); /* We want to support arguments such as "fxt_tool -i trace_*" */ unsigned reading_input_filenames = 0; int i; for (i = 1; i < argc; i++) { int ret = _starpu_generate_paje_trace_read_option(argv[i], &options); if (ret == 0) { reading_input_filenames = 0; } else if (strcmp(argv[i], "-o") == 0) { free(options.out_paje_path); options.out_paje_path = strdup(argv[++i]); reading_input_filenames = 0; } else if (strcmp(argv[i], "-d") == 0) { options.dir = argv[++i]; reading_input_filenames = 0; } else if (strcmp(argv[i], "-i") == 0) { if (options.ninputfiles >= STARPU_FXT_MAX_FILES) { fprintf(stderr, "Error: The number of trace files is superior to STARPU_FXT_MAX_FILES (%d)\nPlease recompile StarPU with a bigger --enable-fxt-max-files\n", STARPU_FXT_MAX_FILES); return 7; } options.filenames[options.ninputfiles++] = argv[++i]; reading_input_filenames = 1; } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { usage(); return 77; } else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) { fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); return 77; } /* That's pretty dirty: if the reading_input_filenames flag is * set, and that the argument does not match an option, we * assume this may be another filename */ else if (reading_input_filenames) { if (options.ninputfiles >= STARPU_FXT_MAX_FILES) { fprintf(stderr, "Error: The number of trace files is superior to STARPU_FXT_MAX_FILES (%d)\nPlease recompile StarPU with a bigger --enable-fxt-max-files\n", STARPU_FXT_MAX_FILES); return 7; } options.filenames[options.ninputfiles++] = argv[i]; } } if (!options.ninputfiles) { fprintf(stderr, "Incorrect usage, aborting\n"); usage(); return 77; } return 0; } int main(int argc, char **argv) { int ret = parse_args(argc, argv); if (ret) { starpu_fxt_options_shutdown(&options); return ret; } starpu_fxt_generate_trace(&options); starpu_fxt_options_shutdown(&options); return 0; } starpu-1.4.9+dfsg/tools/starpu_lp2paje.1000066400000000000000000000006341507764646700201700ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_LP2PAJE "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_lp2paje \- Convert lp StarPU schedule into Paje format .SH SYNOPSIS .B lp_solve \fI\,file.lp | starpu_lp2paje > paje.trace\/\fR .SH DESCRIPTION Convert schedule optimized by lp into the Paje format .SH "REPORTING BUGS" Report bugs to . starpu-1.4.9+dfsg/tools/starpu_lp2paje.c000066400000000000000000000103041507764646700202450ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #define PROGNAME "starpu_lp2paje" struct task { double start; double stop; int num; int worker; }; int main(int argc, char *argv[]) { int nw, nt; double tmax; int i, w, ww, t, tt; int foo; double bar; if (argc != 1) { if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) { fprintf(stderr, "%s (%s) %s\n", PROGNAME, PACKAGE_NAME, PACKAGE_VERSION); exit(EXIT_SUCCESS); } fprintf(stderr, "Convert schedule optimized by lp into the Paje format\n\n"); fprintf(stderr, "Usage: lp_solve file.lp | %s > paje.trace\n", PROGNAME); fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); fprintf(stderr, "\n"); exit(EXIT_SUCCESS); } assert(scanf("Suboptimal solution\n") == 0); assert(scanf("\nValue of objective function: %lf\n", &tmax) == 1); assert(scanf("Actual values of the variables:\n") == 0); assert(scanf("tmax %lf\n", &tmax) == 1); assert(scanf("nt %d\n", &nt) == 1); assert(nt >= 0); assert(scanf("nw %d\n", &nw) == 1); assert(nw >= 0); printf( "%%EventDef PajeDefineContainerType 1\n" "%% Alias string\n" "%% ContainerType string\n" "%% Name string\n" "%%EndEventDef\n" "%%EventDef PajeCreateContainer 2\n" "%% Time date\n" "%% Alias string\n" "%% Type string\n" "%% Container string\n" "%% Name string\n" "%%EndEventDef\n" "%%EventDef PajeDefineStateType 3\n" "%% Alias string\n" "%% ContainerType string\n" "%% Name string\n" "%%EndEventDef\n" "%%EventDef PajeDestroyContainer 4\n" "%% Time date\n" "%% Name string\n" "%% Type string\n" "%%EndEventDef\n" "%%EventDef PajeDefineEntityValue 5\n" "%% Alias string\n" "%% EntityType string\n" "%% Name string\n" "%% Color color\n" "%%EndEventDef\n" "%%EventDef PajeSetState 6\n" "%% Time date\n" "%% Type string\n" "%% Container string\n" "%% Value string\n" "%%EndEventDef\n" "1 W 0 Worker\n" ); printf("3 S W \"Worker State\"\n"); for (t = 0; t < nt; t++) printf("5 R%d S Running_%d \"0.0 1.0 0.0\"\n", t, t); printf("5 F S Idle \"1.0 0.0 0.0\"\n"); for (i = 0; i < nw; i++) printf("2 0 W%d W 0 \"%d\"\n", i, i); for (w = 0; w < nw; w++) printf("4 %f W%d W\n", tmax, w); fprintf(stderr,"%d workers, %d tasks\n", nw, nt); { struct task task[nt]; memset(&task, 0, sizeof(task)); for (t = nt-1; t >= 0; t--) { assert(scanf("c%d %lf\n", &foo, &task[t].stop) == 2); } for (t = nt-1; t >= 0; t--) for (w = 0; w < nw; w++) { assert(scanf("t%dw%d %lf\n", &tt, &ww, &bar) == 3); assert(ww == w); if (bar > 0.5) { task[t].num = tt; task[t].worker = w; } } for (t = nt-1; t >= 0; t--) { assert(scanf("s%d %lf\n", &tt, &task[t].start) == 2); fprintf(stderr,"%d: task %d on %d: %f - %f\n", nt-1-t, tt, task[t].worker, task[t].start, task[t].stop); assert(tt == task[t].num); } for (t = 0; t < nt; t++) { printf("6 %f S W%d R%d\n", task[t].start, task[t].worker, t); printf("6 %f S W%d F\n", task[t].stop, task[t].worker); } for (t = 0; t < nt; t++) { int t2; for (t2 = 0; t2 < nt; t2++) { if (t != t2 && task[t].worker == task[t2].worker) { if (!(task[t].start >= task[t2].stop || task[t2].start >= task[t].stop)) { fprintf(stderr,"oops, %d and %d sharing worker %d !!\n", task[t].num, task[t2].num, task[t].worker); } } } } } return 0; } starpu-1.4.9+dfsg/tools/starpu_machine_display.1000066400000000000000000000017171507764646700217670ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_MACHINE_DISPLAY "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_machine_display \- Display machine StarPU information .SH SYNOPSIS .B starpu_machine_display [\fI\,OPTION\/\fR] .SH DESCRIPTION Show the processing units that StarPU can use, and the bandwidth and affinity measured between the memory nodes. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .TP \fB\-i\fR, \fB\-\-info\fR display the name of the files containing the information .TP \fB\-f\fR, \fB\-\-force\fR force bus sampling and show measures .HP \fB\-w\fR, \fB\-\-worker\fR only show workers of the given type .TP \fB\-c\fR, \fB\-\-count\fR only display the number of workers .TP \fB\-n\fR, \fB\-\-notopology\fR do not display the bandwidth and affinity .SH "REPORTING BUGS" Report bugs to . starpu-1.4.9+dfsg/tools/starpu_machine_display.c000066400000000000000000000145621507764646700220530ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #define PROGNAME "starpu_machine_display" static void usage() { fprintf(stderr, "Show the processing units that StarPU can use,\n"); fprintf(stderr, "and the bandwidth and affinity measured between the memory nodes.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Usage: %s [OPTION]\n", PROGNAME); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, "\t-h, --help display this help and exit\n"); fprintf(stderr, "\t-v, --version output version information and exit\n"); fprintf(stderr, "\t-i, --info display the name of the files containing the information\n"); fprintf(stderr, "\t-f, --force force bus sampling and show measures \n"); fprintf(stderr, "\t-w, --worker only show workers of the given type\n"); fprintf(stderr, "\t-c, --count only display the number of workers\n"); fprintf(stderr, "\t-n, --notopology do not display the bandwidth and affinity\n"); fprintf(stderr, "\n"); fprintf(stderr, "Report bugs to <%s>.\n", PACKAGE_BUGREPORT); } static void display_combined_worker(unsigned workerid) { int worker_size; int *combined_workerid; starpu_combined_worker_get_description(workerid, &worker_size, &combined_workerid); fprintf(stdout, "\t\t"); int i; for (i = 0; i < worker_size; i++) { char name[256]; starpu_worker_get_name(combined_workerid[i], name, 256); fprintf(stdout, "%s\t", name); } fprintf(stdout, "\n"); } static void display_all_combined_workers(void) { unsigned ncombined_workers = starpu_combined_worker_get_count(); if (ncombined_workers == 0) return; unsigned nworkers = starpu_worker_get_count(); fprintf(stdout, "\t%u Combined workers\n", ncombined_workers); unsigned i; for (i = 0; i < ncombined_workers; i++) display_combined_worker(nworkers + i); } static void parse_args(int argc, char **argv, int *force, int *info, int *count, int *topology, char **worker_type) { int i; if (argc == 1) return; for (i = 1; i < argc; i++) { if (strncmp(argv[i], "--force", 7) == 0 || strncmp(argv[i], "-f", 2) == 0) { *force = 1; } else if (strncmp(argv[i], "--info", 6) == 0 || strncmp(argv[i], "-i", 2) == 0) { *info = 1; } else if (strncmp(argv[i], "--help", 6) == 0 || strncmp(argv[i], "-h", 2) == 0) { usage(); exit(EXIT_FAILURE); } else if (strncmp(argv[i], "--version", 9) == 0 || strncmp(argv[i], "-v", 2) == 0) { fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); exit(EXIT_FAILURE); } else if (strncmp(argv[i], "--count", 7) == 0 || strncmp(argv[i], "-c", 2) == 0) { *count = 1; } else if (strncmp(argv[i], "--worker", 8) == 0 || strncmp(argv[i], "-w", 2) == 0) { *worker_type = strdup(argv[++i]); } else if (strncmp(argv[i], "--notopology", 12) == 0 || strncmp(argv[i], "-n", 2) == 0) { *topology = 0; } else { fprintf(stderr, "Unknown arg %s\n", argv[1]); usage(); exit(EXIT_FAILURE); } } } int main(int argc, char **argv) { int ret; int force = 0; int info = 0; int count = 0; int topology = 1; char *worker_type = NULL; struct starpu_conf conf; parse_args(argc, argv, &force, &info, &count, &topology, &worker_type); starpu_conf_init(&conf); if (force) conf.bus_calibrate = 1; /* Even if starpu_init returns -ENODEV, we should go on : we will just * print that we found no device. */ ret = starpu_init(&conf); if (ret != 0 && ret != -ENODEV) { return ret; } starpu_worker_wait_for_initialisation(); if (info) { starpu_bus_print_filenames(stdout); starpu_shutdown(); return 0; } char real_hostname[128]; char starpu_hostname[128]; gethostname(real_hostname, sizeof(real_hostname)); _starpu_gethostname(starpu_hostname, sizeof(starpu_hostname)); fprintf(stdout, "Real hostname: %s (StarPU hostname: %s)\n", real_hostname, starpu_hostname); const char *env[] = { "STARPU_NCPU", "STARPU_NCPUS", "STARPU_NCUDA", "STARPU_NHIP", "STARPU_NOPENCL", "STARPU_NMAX_FPGA", "STARPU_NMPI_MS", "STARPU_NTCPIP_MS", "STARPU_WORKERS_CPUID", "STARPU_WORKERS_COREID", "STARPU_NTHREADS_PER_CORE", "STARPU_RESERVE_NCPU", "STARPU_MAIN_THREAD_BIND", "STARPU_MAIN_THREAD_CPUID", "STARPU_MAIN_THREAD_COREID", "STARPU_WORKERS_CUDAID", "STARPU_CUDA_THREAD_PER_WORKER", "STARPU_CUDA_THREAD_PER_DEV", "STARPU_WORKERS_OPENCLID", "STARPU_WORKERS_MAX_FPGAID", "STARPU_MPI_MS_MULTIPLE_THREAD", "STARPU_NMPIMSTHREADS", "STARPU_TCPIP_MS_MULTIPLE_THREAD", "STARPU_NTCPIPMSTHREADS", "STARPU_MPI_HOSTNAMES", "STARPU_HOSTNAME", NULL }; int i; static int message=0; for (i = 0; env[i]; i++) { const char *e = getenv(env[i]); if (e) { if (!message) { fprintf(stdout, "Environment variables\n"); message=1; } fprintf(stdout, "\t%s=%s\n", env[i], e); } } if (message) fprintf(stdout,"\n"); void (*func)(FILE *output, enum starpu_worker_archtype type) = &starpu_worker_display_names; if (count == 1) func = &starpu_worker_display_count; enum starpu_worker_archtype type; if (worker_type) { type = starpu_worker_get_type_from_string(worker_type); if (type == STARPU_UNKNOWN_WORKER) fprintf(stderr, "Unknown worker type '%s'\n", worker_type); else func(stdout, type); } else { fprintf(stdout, "StarPU has found :\n"); for (type = 0; type < STARPU_NARCH; type++) func(stdout, type); display_all_combined_workers(); } if (ret != -ENODEV) { if (topology == 1) { fprintf(stdout, "\ntopology ... (hwloc logical indexes)\n"); starpu_topology_print(stdout); fprintf(stdout, "\nbandwidth (MB/s) and latency (us)...\n"); starpu_bus_print_bandwidth(stdout); } starpu_shutdown(); } return 0; } starpu-1.4.9+dfsg/tools/starpu_mlr_analysis000077500000000000000000000041521507764646700211730ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for giving statistical analysis of the paje trace set -e # fail fast # File names SOURCE_DIR=$(dirname $0) outputfile="mlr_analysis.html" analysis_script="$SOURCE_DIR/starpu_mlr_analysis.Rmd" # Command line arguments inputfile="" help_script() { cat << EOF Give an example of the trace analysis for computing multiple linear regression model Options: -h Show this message Examples: $0 .starpu/sampling/codelets/tmp/test_mlr.out Report bugs to EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (StarPU) 1.4.9" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then help_script exit 0 fi while getopts "h" opt; do case $opt in \?) echo "Invalid option: -$OPTARG" help_script exit 3 ;; esac done # Reading files that need to be analyzed shift $((OPTIND - 1)) inputfile=$1 # Error if there is more than one input file if [[ $# < 1 || $# > 1 ]]; then echo "Error!" help_script exit 2 fi if [ ! -s $inputfile ] then echo "Error: file $inputfile does not exist!" exit 5 fi ##################################### # Running analysis file to get actual results in="$(cd "$(dirname "$inputfile")"; pwd)/$(basename "$inputfile")" Rscript -e "library(knitr); input_trace = '$in' ; outputhtml='$outputfile';\ outputRmd = gsub('.html\$','.Rmd',outputhtml);\ knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)" starpu-1.4.9+dfsg/tools/starpu_mlr_analysis.Rmd000066400000000000000000000230111507764646700217040ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ```{r Setup, echo=FALSE} opts_chunk$set(echo=FALSE) ``` ```{r Load_R_files_and_functions} print_codelet <- function(reg,codelet){ cat(paste("/* ############################################ */", "\n")) cat(paste("/*\t Automatically generated code */", "\n")) cat(paste("\t Check for potential errors and be sure parameter value are written in good order (alphabetical one by default)", "\n")) cat(paste("\t Adjusted R-squared: ", summary(reg)$adj.r.squared, "*/\n\n")) ncomb <- reg$rank - 1 cat(paste("\t ", codelet, ".model->ncombinations = ", ncomb, ";\n", sep="")) cat(paste("\t ", codelet, ".model->combinations = (unsigned **) malloc(", codelet, ".model->ncombinations*sizeof(unsigned *))", ";\n\n", sep="")) cat(paste("\t if (", codelet, ".model->combinations)", "\n", "\t {\n", sep="")) cat(paste("\t for (unsigned i = 0; i < ", codelet, ".model->ncombinations; i++)", "\n", "\t {\n", sep="")) cat(paste("\t ", codelet, ".model->combinations[i] = (unsigned *) malloc(", codelet, ".model->nparameters*sizeof(unsigned))", ";\n", "\t }\n", "\t }\n\n", sep="")) # Computing combinations df <- data.frame(attr(reg$terms, "factors")) df <- df/2 df$Params <- row.names(df) df <-df[c(2:nrow(df)),] i=1 options(warn=-1) for(i in (1:nrow(df))) { name <- df[i,]$Params if (grepl("I\\(*", name)) { exp <- as.numeric(gsub("(.*?)\\^(.*?)\\)", "\\2", name)) df[i,] <- as.numeric(df[i,]) * exp df[i,]$Params <- as.character(gsub("I\\((.*?)\\^(.*?)\\)", "\\1", name)) } } df <- aggregate(. ~ Params, transform(df, Params), sum) options(warn=0) i=1 j=1 for(j in (2:length(df))) { for(i in (1:nrow(df))) { cat(paste("\t ", codelet, ".model->combinations[", j-2, "][", i-1, "] = ", as.numeric(df[i,j]), ";\n", sep="")) } } cat(paste("/* ############################################ */", "\n")) } df<-read.csv(input_trace, header=TRUE) opts_chunk$set(echo=TRUE) ``` # Multiple Linear Regression Model Example ## Introduction This document demonstrates the type of the analysis needed to compute the multiple linear regression model of the task. It relies on the input data benchmarked by the StarPU (or any other tool, but following the same format). The input data used in this example is generated by the task "mlr_init", from the "examples/mlr/mlr.c". This document can be used as an template for the analysis of any other task. ### How to compile ./starpu_mlr_analysis .starpu/sampling/codelets/tmp/mlr_init.out ### Software dependencies In order to run the analysis you need to have R installed: sudo apt-get install r-base In order to compile this document, you need *knitr* (although you can perfectly only use the R code from this document without knitr). If you decided that you want to generate this document, then start R (e.g., from terminal) and install knitr package: R> install.packages("knitr") No additional R packages are needed. ## First glimpse at the data First, we show the relations between all parameters in a single plot. ```{r InitPlot} plot(df) ``` For this example, all three parameters M, N, K have some influence, but their relation is not easy to understand. In general, this type of plots can typically show if there are outliers. It can also show if there is a group of parameters which are mutually perfectly correlated, in which case only a one parameter from the group should be kept for the further analysis. Additionally, plot can show the parameters that have a constant value, and since these cannot have an influence on the model, they should also be ignored. However, making conclusions based solely on the visual analysis can be treacherous and it is better to rely on the statistical tools. The multiple linear regression methods used in the following sections will also be able to detect and ignore these irrelevant parameters. Therefore, this initial visual look should only be used to get a basic idea about the model, but all the parameters should be kept for now. ## Initial model At this point, an initial model is computed, using all the parameters, but not taking into account their exponents or the relations between them. ```{r Model1} model1 <- lm(data=df, Duration ~ M+N+K) summary(model1) ``` For each parameter and the constant in the first column, an estimation of the corresponding coefficient is provided along with the 95% confidence interval. If there are any parameters with NA value, which suggests that the parameters are correlated to another parameter or that their value is constant, these parameters should not be used in the following model computations. The stars in the last column indicate the significance of each parameter. However, having maximum three stars for each parameter does not necessarily mean that the model is perfect and we should always inspect the adjusted R^2 value (the closer it is to 1, the better the model is). To the users that are not common to the multiple linear regression analysis and R tools, we suggest to the R documentation. Some explanations are also provided in the following article https://hal.inria.fr/hal-01180272. In this example, all parameters M, N, K are very important. However, it is not clear if there are some relations between them or if some of these parameters should be used with an exponent. Moreover, adjusted R^2 value is not extremely high and we hope we can get a better one. Thus, we proceed to the more advanced analysis. ## Refining the model Now, we can seek for the relations between the parameters. Note that trying all the possible combinations for the cases with a huge number of parameters can be prohibitively long. Thus, it may be better to first get rid of the parameters which seem to have very small influence (typically the ones with no stars from the table in the previous section). ```{r Model2} model2 <- lm(data=df, Duration ~ M*N*K) summary(model2) ``` This model is more accurate, as the R^2 value increased. We can also try some of these parameters with the exponents. ```{r Model3} model3 <- lm(data=df, Duration ~ I(M^2)+I(M^3)+I(N^2)+I(N^3)+I(K^2)+I(K^3)) summary(model3) ``` It seems like some parameters are important. Now we combine these and try to find the optimal combination (here we go directly to the final solution, although this process typically takes several iterations of trying different combinations). ```{r Model4} model4 <- lm(data=df, Duration ~ I(M^2):N+I(N^3):K) summary(model4) ``` This seems to be the most accurate model, with a high R^2 value. We can proceed to its validation. ## Validation Once the model has been computed, we should validate it. Apart from the low adjusted R^2 value, the model weakness can also be observed even better when inspecting the residuals. The results on two following plots (and thus the accuracy of the model) will greatly depend on the measurements variability and the design of experiments. ```{r Validation} par(mfrow=c(1,2)) plot(model4, which=c(1:2)) ``` Generally speaking, if there are some structures on the left plot, this can indicate that there are certain phenomena not explained by the model. Many points on the same horizontal line represent repetitive occurrences of the task with the same parameter values, which is typical for a single experiment run with a homogeneous data. The fact that there is some variability is common, as executing exactly the same code on a real machine will always have slightly different duration. However, having a huge variability means that the benchmarks were very noisy, thus deriving an accurate models from them will be hard. Plot on the right may show that the residuals do not follow the normal distribution. Therefore, such model in overall would have a limited predictive power. If we are not satisfied with the accuracy of the observed models, we should go back to the previous section and try to find a better one. In some cases, the benchmarked data is just be too noisy or the choice of the parameters is not appropriate, and thus the experiments should be redesigned and rerun. When we are finally satisfied with the model accuracy, we should modify our task code, so that StarPU knows which parameters combinations are used in the model. ## Generating C code Depending on the way the task codelet is programmed, this section may be somehow useful. This is a simple helper to generate C code for the parameters combinations and it should be copied to the task description in the application. The function generating the code is not so robust, so make sure that the generated code correctly corresponds to computed model (e.g., parameters are considered in the alphabetical order). ```{r Code} print_codelet(model4, "mlr_cl") ``` ## Conclusion We have computed the model for our benchmarked data using multiple linear regression. After encoding this model into the task code, StarPU will be able to automatically compute the coefficients and use the model to predict task duration. starpu-1.4.9+dfsg/tools/starpu_mlr_analysis.in000066400000000000000000000041741507764646700216010ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for giving statistical analysis of the paje trace set -e # fail fast # File names SOURCE_DIR=$(dirname $0) outputfile="mlr_analysis.html" analysis_script="$SOURCE_DIR/starpu_mlr_analysis.Rmd" # Command line arguments inputfile="" help_script() { cat << EOF Give an example of the trace analysis for computing multiple linear regression model Options: -h Show this message Examples: $0 .starpu/sampling/codelets/tmp/test_mlr.out Report bugs to <@PACKAGE_BUGREPORT@> EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then help_script exit 0 fi while getopts "h" opt; do case $opt in \?) echo "Invalid option: -$OPTARG" help_script exit 3 ;; esac done # Reading files that need to be analyzed shift $((OPTIND - 1)) inputfile=$1 # Error if there is more than one input file if [[ $# < 1 || $# > 1 ]]; then echo "Error!" help_script exit 2 fi if [ ! -s $inputfile ] then echo "Error: file $inputfile does not exist!" exit 5 fi ##################################### # Running analysis file to get actual results in="$(cd "$(dirname "$inputfile")"; pwd)/$(basename "$inputfile")" Rscript -e "library(knitr); input_trace = '$in' ; outputhtml='$outputfile';\ outputRmd = gsub('.html\$','.Rmd',outputhtml);\ knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)" starpu-1.4.9+dfsg/tools/starpu_mpi_comm_matrix.1000066400000000000000000000011461507764646700220160ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_MPI_COMM_MATRIX.PY "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_mpi_comm_matrix.py \- Draw StarPU MPI communications matrix .SH SYNOPSIS .B starpu_mpi_comm_matrix.py \fI\,\/\fR .SH DESCRIPTION Offline tool to draw a communication matrix .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .TP \fB\-png\fR produce plots in png format (default is pdf) .SH "REPORTING BUGS" Report bugs to starpu-1.4.9+dfsg/tools/starpu_mpi_comm_matrix.py000077500000000000000000000102311507764646700223040ustar00rootroot00000000000000#!/usr/bin/env python3 # coding=utf-8 # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # """ Plot statistics produced when running an application with STARPU_MPI_STATS=1 """ import sys import re import os PROGNAME=sys.argv[0] def usage(): print("Offline tool to draw a communication matrix") print("") print("Usage: %s " % PROGNAME) print("") print("Options:") print(" -h, --help display this help and exit") print(" -v, --version output version information and exit") print(" -png produce plots in png format (default is pdf)") print("") print("Report bugs to ") sys.exit(1) if len(sys.argv) >= 2: if sys.argv[1] == '-v' or sys.argv[1] == '--version': print("%s (StarPU) 1.4.9" % PROGNAME) sys.exit(0) if sys.argv[1] == '-h' or sys.argv[1] == '--help': usage() if len(sys.argv) == 1: usage() if len(sys.argv) >= 2 and sys.argv[1] == '-png': outputformat='png' outputext='png' outputfile=sys.argv[2] else: outputformat='pdf color' outputext='pdf' outputfile=sys.argv[1] # find the number of nodes nodes=0 file = open(outputfile, "r") for line in file.readlines(): if re.search('TOTAL', line): (node,stuff)=line.split(sep="[")[2].split("]") if int(node) > nodes: nodes=int(node) file.close() nodes=nodes+1 # extract volume of comm and bandwidth between all pair of nodes volumes = [[0 for _ in range(nodes)] for _ in range(nodes)] bandwidth = [[0 for _ in range(nodes)] for _ in range(nodes)] file = open(outputfile, "r") for line in file.readlines(): if re.search(r'\[starpu_comm_stats]', line) and not re.search('TOTAL', line) and re.search('MB/s', line): (head,volB,B,volMB,MB,bwB,B,bwMB,MB) = line.split() (src,dst)=head.split(sep="[")[2].split(sep="]")[0].split(sep=":") volumes[int(src)][int(dst)] = float(volB) bandwidth[int(src)][int(dst)] = float(bwB) file.close() def write_data(filename, nodes, data): ofile=open(filename, "w") for dst in range(nodes): for src in range(nodes): ofile.write("%f "% data[src][dst]) ofile.write("\n") ofile.close() def generate_gnuplot_script(filename, datafilename, outputfile, nodes): ofile=open(filename, "w") srctics="" dsttics="" for node in range(nodes-1): srctics += "\"src%d\" %d, " % (node, node) dsttics += "\"dst%d\" %d, " % (node, node) ofile.write("set term %s\n" % outputformat) ofile.write("set output \"%s.%s\"\n" % (outputfile, outputext)) ofile.write("set view map scale 1\nset style data lines\n") ofile.write("set palette gray\n") ofile.write("set xtics (%s\"src%d\" %d)\n" % (srctics, nodes-1, nodes-1)) ofile.write("set ytics (%s\"dst%d\" %d)\n" % (dsttics, nodes-1, nodes-1)) ofile.write("plot '%s' matrix with image\n" % datafilename) ofile.close() # generate gnuplot volume data and script file write_data(outputfile+"_volume.data", nodes, volumes) generate_gnuplot_script(outputfile+"_volume.gp", outputfile+"_volume.data", outputfile+"_volume_heatmap", nodes) os.system("gnuplot " + outputfile+"_volume.gp") print("Generated file \"%s.%s\"" % (outputfile+"_volume.data", outputext)) # generate gnuplot bandwidth data and script file write_data(outputfile+"_bw.data", nodes, bandwidth) generate_gnuplot_script(outputfile+"_bw.gp", outputfile+"_bw.data", outputfile+"_bw_heatmap", nodes) os.system("gnuplot " + outputfile+"_bw.gp") print("Generated file \"%s.%s\"" % (outputfile+"_bw.data", outputext)) starpu-1.4.9+dfsg/tools/starpu_mpi_comm_matrix.py.in000077500000000000000000000102531507764646700227150ustar00rootroot00000000000000#!/usr/bin/env python3 # coding=utf-8 # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # """ Plot statistics produced when running an application with STARPU_MPI_STATS=1 """ import sys import re import os PROGNAME=sys.argv[0] def usage(): print("Offline tool to draw a communication matrix") print("") print("Usage: %s " % PROGNAME) print("") print("Options:") print(" -h, --help display this help and exit") print(" -v, --version output version information and exit") print(" -png produce plots in png format (default is pdf)") print("") print("Report bugs to <@PACKAGE_BUGREPORT@>") sys.exit(1) if len(sys.argv) >= 2: if sys.argv[1] == '-v' or sys.argv[1] == '--version': print("%s (@PACKAGE_NAME@) @PACKAGE_VERSION@" % PROGNAME) sys.exit(0) if sys.argv[1] == '-h' or sys.argv[1] == '--help': usage() if len(sys.argv) == 1: usage() if len(sys.argv) >= 2 and sys.argv[1] == '-png': outputformat='png' outputext='png' outputfile=sys.argv[2] else: outputformat='pdf color' outputext='pdf' outputfile=sys.argv[1] # find the number of nodes nodes=0 file = open(outputfile, "r") for line in file.readlines(): if re.search('TOTAL', line): (node,stuff)=line.split(sep="[")[2].split("]") if int(node) > nodes: nodes=int(node) file.close() nodes=nodes+1 # extract volume of comm and bandwidth between all pair of nodes volumes = [[0 for _ in range(nodes)] for _ in range(nodes)] bandwidth = [[0 for _ in range(nodes)] for _ in range(nodes)] file = open(outputfile, "r") for line in file.readlines(): if re.search(r'\[starpu_comm_stats]', line) and not re.search('TOTAL', line) and re.search('MB/s', line): (head,volB,B,volMB,MB,bwB,B,bwMB,MB) = line.split() (src,dst)=head.split(sep="[")[2].split(sep="]")[0].split(sep=":") volumes[int(src)][int(dst)] = float(volB) bandwidth[int(src)][int(dst)] = float(bwB) file.close() def write_data(filename, nodes, data): ofile=open(filename, "w") for dst in range(nodes): for src in range(nodes): ofile.write("%f "% data[src][dst]) ofile.write("\n") ofile.close() def generate_gnuplot_script(filename, datafilename, outputfile, nodes): ofile=open(filename, "w") srctics="" dsttics="" for node in range(nodes-1): srctics += "\"src%d\" %d, " % (node, node) dsttics += "\"dst%d\" %d, " % (node, node) ofile.write("set term %s\n" % outputformat) ofile.write("set output \"%s.%s\"\n" % (outputfile, outputext)) ofile.write("set view map scale 1\nset style data lines\n") ofile.write("set palette gray\n") ofile.write("set xtics (%s\"src%d\" %d)\n" % (srctics, nodes-1, nodes-1)) ofile.write("set ytics (%s\"dst%d\" %d)\n" % (dsttics, nodes-1, nodes-1)) ofile.write("plot '%s' matrix with image\n" % datafilename) ofile.close() # generate gnuplot volume data and script file write_data(outputfile+"_volume.data", nodes, volumes) generate_gnuplot_script(outputfile+"_volume.gp", outputfile+"_volume.data", outputfile+"_volume_heatmap", nodes) os.system("gnuplot " + outputfile+"_volume.gp") print("Generated file \"%s.%s\"" % (outputfile+"_volume.data", outputext)) # generate gnuplot bandwidth data and script file write_data(outputfile+"_bw.data", nodes, bandwidth) generate_gnuplot_script(outputfile+"_bw.gp", outputfile+"_bw.data", outputfile+"_bw_heatmap", nodes) os.system("gnuplot " + outputfile+"_bw.gp") print("Generated file \"%s.%s\"" % (outputfile+"_bw.data", outputext)) starpu-1.4.9+dfsg/tools/starpu_msexec000077500000000000000000000016371507764646700177670ustar00rootroot00000000000000#! /bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Usually run program through $MS_LAUNCHER, unless it is a shell script, in # which case it is the shell script that will run the program through $MS_LAUNCHER case "$2" in *.sh) exec "$@" ;; *) exec $MS_LAUNCHER "$@" ;; esac starpu-1.4.9+dfsg/tools/starpu_paje_draw_histogram000077500000000000000000000070111507764646700225040ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2014-2014 Université Joseph Fourier # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for giving statistical analysis of the paje trace set -e # fail fast PROGNAME=$0 # File names r_script="$(dirname $(command -v $0))/starpu_paje_draw_histogram.R" r_input="" # Command line arguments range="0:-1" name="All" verbose=0 inputfiles="" help_script() { cat << EOF Give statistical analysis of the paje trace $0 [ options ] paje.trace [paje.trace2 ...] Options: -r To fix range x1:x2 ("-1" for infinity) -n To choose a certain state -v Print output to command line -h Show this message Examples: $0 -n chol_model_22 example.native.trace $0 -r 100:300 -n FetchingInput,Overhead -v example.native.trace example.simgrid.trace Report bugs to EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (StarPU) 1.4.9" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then help_script exit 0 fi while getopts "r:n:vh" opt; do case $opt in r) range="$OPTARG" ;; n) name="$OPTARG" ;; v) verbose=1 ;; h) help_script exit 4 ;; \?) echo "Invalid option: -$OPTARG" help_script exit 3 ;; esac done # Reading files that need to be analyzed shift $((OPTIND - 1)) inputfiles=$@ if [[ $# < 1 ]]; then echo "Error!" help_script exit 2 fi # Getting range range1=$(eval echo $range | cut -d: -f1) range2=$(eval echo $range | cut -d: -f2) ##################################### # Transforming input files into .csv for file in $inputfiles; do if [ ! -s $file ] then echo "Error: file $file does not exist!" exit 5 fi dir=$(dirname $file) # Sorting traces grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > $dir/start.trace grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > $dir/end.trace sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace if grep -q start_profiling $dir/endSorted.trace then echo Using start_profiling/stop_profiling trace selection. sed -ne '/start_profiling/,/stop_profiling/p' < $dir/endSorted.trace > $dir/endSorted2.trace else cp $dir/endSorted.trace $dir/endSorted2.trace fi cat $dir/start.trace $dir/endSorted2.trace > $dir/outputSorted.trace # Transferring to .csv pj_dump -n $dir/outputSorted.trace > $file.csv perl -i -ne 'print if /^State/' $file.csv r_input=$(eval echo "$r_input $file.csv") # Cleanup: delete temporary files rm -f $dir/outputSorted.trace rm -f $dir/start.trace rm -f $dir/end.trace rm -f $dir/endSorted.trace rm -f $dir/endSorted2.trace done ##################################### # Running R file to get actual results Rscript $r_script $range1 $range2 $name $r_input # Directly opening .pdf result if [[ $verbose == 1 ]]; then evince Rplots.pdf fi starpu-1.4.9+dfsg/tools/starpu_paje_draw_histogram.1000066400000000000000000000014351507764646700226440ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_PAJE_DRAW_HISTOGRAM "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_paje_draw_histogram \- Draw StarPU trace histogram .SH DESCRIPTION Give statistical analysis of the paje trace .PP \&./starpu_paje_draw_histogram [ options ] paje.trace [paje.trace2 ...] .SH OPTIONS .TP \fB\-r\fR To fix range x1:x2 ("\-1" for infinity) .TP \fB\-n\fR To choose a certain state .TP \fB\-v\fR Print output to command line .TP \fB\-h\fR Show this message .SH EXAMPLES \&./starpu_paje_draw_histogram \-n chol_model_22 example.native.trace .PP \&./starpu_paje_draw_histogram \-r 100:300 \-n FetchingInput,Overhead \-v example.native.trace example.simgrid.trace .SH "REPORTING BUGS" Report bugs to starpu-1.4.9+dfsg/tools/starpu_paje_draw_histogram.R000077500000000000000000000112111507764646700227010ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2014-2014 Université Joseph Fourier # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # R script that is giving statistical analysis of the paje trace # Can be called from the command line with: # Rscript $this_script $range1 $range2 $name $outputfile $inputfiles # Package containing ddply function library(plyr) library(ggplot2) library(data.table) # Function for reading .csv file read_df <- function(file,range1,range2) { df<-read.csv(file, header=FALSE, strip.white=TRUE) names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value") df = df[!(names(df) %in% c("Nature","Type", "Depth"))] df$Origin<-file # Changing names if needed: df$Value <- as.character(df$Value) df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value)) df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value)) df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value)) df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value)) df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value)) df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value)) df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value)) df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value)) df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value)) df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value)) df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value)) df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value)) df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value)) df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value)) df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value)) df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value)) df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value)) # Considering only the states with a given name if (name != "All") df<-df[df$Value %in% name[[1]],] # Aligning to begin time from 0 m <- min(df$Start) df$Start <- df$Start - m df$End <- df$Start+df$Duration # Taking only the states inside a given range df <- df[df$Start>=range1 & df$End<=range2,] # Return data frame df } ######################################### ######################################### # Main ######################################### # Reading command line arguments args <- commandArgs(trailingOnly = TRUE) range1<-as.numeric(args[1]) if (range1==-1) range1<-Inf range2<-as.numeric(args[2]) if (range2==-1) range2<-Inf name<-strsplit(args[3], ",") # Reading first file filename<-args[4] df<-read_df(filename,range1,range2) i=5 while (i <= length(args)) { # Reading next input file filename<-args[i] dft<-read_df(filename,range1,range2) df<-rbindlist(list(df,dft)) i <- i+1 } # Error: if there is no results for a given range and state if (nrow(df)==0) stop("Result is empty!") # Plotting histograms plot <- ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count.., fill=..count..),binwidth = diff(range(df$Duration))/30) plot <- plot + theme_bw() + scale_fill_gradient(high = "#132B43", low = "#56B1F7") + ggtitle("Histograms for state distribution") + ylab("Count") + xlab("Time [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_y") # Adding text for total duration ad<-ggplot_build(plot)$data[[1]] al<-ggplot_build(plot)$panel$layout ad<-merge(ad,al) anno1 <- ddply(ad, .(ROW), summarise, x = max(x)*0.7, y = max(y)*0.9) anno1<-merge(anno1,al) anno2 <- ddply(df, .(Origin,Value), summarise, tot=as.integer(sum(Duration))) anno2$PANEL <- row.names(anno2) anno2$lab <- sprintf("Total duration: \n%ims",anno2$tot) anno <- merge(anno1,anno2) plot <- plot + geom_text(data = anno, aes(x=x, y=y, label=lab, colour="red")) # Printing plot plot # End write("Done producing a histogram plot. Open Rplots.pdf located in this folder to see the results", stdout()) starpu-1.4.9+dfsg/tools/starpu_paje_draw_histogram.in000077500000000000000000000070331507764646700231150ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2014-2014 Université Joseph Fourier # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for giving statistical analysis of the paje trace set -e # fail fast PROGNAME=$0 # File names r_script="$(dirname $(command -v $0))/starpu_paje_draw_histogram.R" r_input="" # Command line arguments range="0:-1" name="All" verbose=0 inputfiles="" help_script() { cat << EOF Give statistical analysis of the paje trace $0 [ options ] paje.trace [paje.trace2 ...] Options: -r To fix range x1:x2 ("-1" for infinity) -n To choose a certain state -v Print output to command line -h Show this message Examples: $0 -n chol_model_22 example.native.trace $0 -r 100:300 -n FetchingInput,Overhead -v example.native.trace example.simgrid.trace Report bugs to <@PACKAGE_BUGREPORT@> EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then help_script exit 0 fi while getopts "r:n:vh" opt; do case $opt in r) range="$OPTARG" ;; n) name="$OPTARG" ;; v) verbose=1 ;; h) help_script exit 4 ;; \?) echo "Invalid option: -$OPTARG" help_script exit 3 ;; esac done # Reading files that need to be analyzed shift $((OPTIND - 1)) inputfiles=$@ if [[ $# < 1 ]]; then echo "Error!" help_script exit 2 fi # Getting range range1=$(eval echo $range | cut -d: -f1) range2=$(eval echo $range | cut -d: -f2) ##################################### # Transforming input files into .csv for file in $inputfiles; do if [ ! -s $file ] then echo "Error: file $file does not exist!" exit 5 fi dir=$(dirname $file) # Sorting traces grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > $dir/start.trace grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > $dir/end.trace sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace if grep -q start_profiling $dir/endSorted.trace then echo Using start_profiling/stop_profiling trace selection. sed -ne '/start_profiling/,/stop_profiling/p' < $dir/endSorted.trace > $dir/endSorted2.trace else cp $dir/endSorted.trace $dir/endSorted2.trace fi cat $dir/start.trace $dir/endSorted2.trace > $dir/outputSorted.trace # Transferring to .csv pj_dump -n $dir/outputSorted.trace > $file.csv perl -i -ne 'print if /^State/' $file.csv r_input=$(eval echo "$r_input $file.csv") # Cleanup: delete temporary files rm -f $dir/outputSorted.trace rm -f $dir/start.trace rm -f $dir/end.trace rm -f $dir/endSorted.trace rm -f $dir/endSorted2.trace done ##################################### # Running R file to get actual results Rscript $r_script $range1 $range2 $name $r_input # Directly opening .pdf result if [[ $verbose == 1 ]]; then evince Rplots.pdf fi starpu-1.4.9+dfsg/tools/starpu_paje_sort.in000077500000000000000000000054321507764646700210730ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2014-2014 Université Joseph Fourier # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for sorting paje traces set -e # fail fast inputfiles="" help_script() { cat << EOF Give statistical analysis of the paje trace $0 [ options ] paje.trace [paje.trace2 ...] Options: -h Show this message Examples: $0 example.trace Report bugs to <@PACKAGE_BUGREPORT@> EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then help_script exit 0 fi while getopts "h" opt; do case $opt in h) help_script exit 4 ;; \?) echo "Invalid option: -$OPTARG" help_script exit 3 ;; esac done # Reading files that need to be analyzed shift $((OPTIND - 1)) inputfiles=$@ if [[ $# < 1 ]]; then echo "Error!" help_script exit 2 fi get_event_num() { grep "^%EventDef[ ]$2" $1 | sed -e "s/.*$2[ ]*//" } ##################################### # Transforming input files into .csv for file in $inputfiles; do if [ ! -s $file ] then echo "Error: file $file does not exist!" exit 5 fi dir=$(dirname $file) DefCont="$(get_event_num $file PajeDefineContainerType) " DefEvent="$(get_event_num $file PajeDefineEventType) " DefState="$(get_event_num $file PajeDefineStateType) " DefVar="$(get_event_num $file PajeDefineVariableType) " DefLink="$(get_event_num $file PajeDefineLinkType) " DefEnt="$(get_event_num $file PajeDefineEntityValue) " CreateCont="$(get_event_num $file PajeCreateContainer) " AddVar="$(get_event_num $file PajeAddVariable) " grepstr="^\\(%\\|$DefCont\\|$DefEvent\\|$DefState\\|$DefVar\\|$DefLink\\|$DefEnt\\|$CreateCont\\|$AddVar\\)" grepstr=${grepstr//[ ]/[ ]} # Sorting traces grep -e "$grepstr" $file > $dir/start.trace grep -e "$grepstr" -v $file > $dir/end.trace sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace cat $dir/start.trace $dir/endSorted.trace > $file # Cleanup: delete temporary files rm -f $dir/start.trace rm -f $dir/end.trace rm -f $dir/endSorted.trace done starpu-1.4.9+dfsg/tools/starpu_paje_state_stats000077500000000000000000000070501507764646700220330ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2014-2014 Université Joseph Fourier # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for giving statistical analysis of the paje trace set -e # fail fast # File names outputfile="starpu_paje_state_stats.csv" r_script="$(dirname $(command -v $0))/starpu_paje_state_stats.R" r_input="" # Command line arguments range="0:-1" name="All" verbose=0 inputfiles="" help_script() { cat << EOF Give statistical analysis of the paje trace $0 [ options ] paje.trace [paje.trace2 ...] Options: -r To fix range x1:x2 ("-1" for infinity) -n To choose a certain state -v Print output to command line -h Show this message Examples: $0 example.native.trace $0 -r 100:300 -n FetchingInput -v example.native.trace example.simgrid.trace Report bugs to EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (StarPU) 1.4.9" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then help_script exit 0 fi while getopts "r:n:vh" opt; do case $opt in r) range="$OPTARG" ;; n) name="$OPTARG" ;; v) verbose=1 ;; h) help_script exit 4 ;; \?) echo "Invalid option: -$OPTARG" help_script exit 3 ;; esac done # Reading files that need to be analyzed shift $((OPTIND - 1)) inputfiles=$@ if [[ $# < 1 ]]; then echo "Error!" help_script exit 2 fi # Getting range range1=$(eval echo $range | cut -d: -f1) range2=$(eval echo $range | cut -d: -f2) ##################################### # Transforming input files into .csv for file in $inputfiles; do if [ ! -s $file ] then echo "Error: file $file does not exist!" exit 5 fi dir=$(dirname $file) # Sorting traces grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > $dir/start.trace grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > $dir/end.trace sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace if grep -q start_profiling $dir/endSorted.trace then echo Using start_profiling/stop_profiling trace selection. sed -ne '/start_profiling/,/stop_profiling/p' < $dir/endSorted.trace > $dir/endSorted2.trace else cp $dir/endSorted.trace $dir/endSorted2.trace fi cat $dir/start.trace $dir/endSorted2.trace > $dir/outputSorted.trace # Transferring to .csv pj_dump -n $dir/outputSorted.trace > $file.csv perl -i -ne 'print if /^State/' $file.csv r_input=$(eval echo "$r_input $file.csv") # Cleanup: delete temporary files rm -f $dir/outputSorted.trace rm -f $dir/start.trace rm -f $dir/end.trace rm -f $dir/endSorted.trace rm -f $dir/endSorted2.trace done ##################################### # Running R file to get actual results Rscript $r_script $range1 $range2 $name $outputfile $r_input # If verbose then write results to stdout if [[ $verbose == 1 ]]; then column -s, -t $outputfile fi starpu-1.4.9+dfsg/tools/starpu_paje_state_stats.1000066400000000000000000000014251507764646700221670ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_PAJE_STATE_STATS "1" "October 2025" "starpu_paje_state_stats (StarPU) 1.4.9" "User Commands" .SH NAME starpu_paje_state_stats \- Print statistics from StarPU trace .SH DESCRIPTION Give statistical analysis of the paje trace .PP \&./starpu_paje_state_stats [ options ] paje.trace [paje.trace2 ...] .SH OPTIONS .TP \fB\-r\fR To fix range x1:x2 ("\-1" for infinity) .TP \fB\-n\fR To choose a certain state .TP \fB\-v\fR Print output to command line .TP \fB\-h\fR Show this message .SH EXAMPLES \&./starpu_paje_state_stats example.native.trace .PP \&./starpu_paje_state_stats \-r 100:300 \-n FetchingInput \-v example.native.trace example.simgrid.trace .SH "REPORTING BUGS" Report bugs to starpu-1.4.9+dfsg/tools/starpu_paje_state_stats.R000077500000000000000000000107561507764646700222420ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2014-2014 Université Joseph Fourier # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # R script that is giving statistical analysis of the paje trace # Can be called from the command line with: # Rscript $this_script $range1 $range2 $name $outputfile $inputfiles # Package containing ddply function library(plyr) # Function for reading .csv file read_df <- function(file,range1,range2) { df<-read.csv(file, header=FALSE, strip.white=TRUE) names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value") df = df[!(names(df) %in% c("Nature","Type", "Depth"))] # Changing names if needed: df$Value <- as.character(df$Value) df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value)) df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value)) df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value)) df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value)) df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value)) df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value)) df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value)) df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value)) df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value)) df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value)) df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value)) df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value)) df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value)) df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value)) df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value)) df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value)) df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value)) df$Value <- ifelse(df$Value == "Su", "SubmittingTask", as.character(df$Value)) # Considering only the states with a given name if (name != "All") df<-df[df$Value %in% name[[1]],] # Aligning to begin time from 0 m <- min(df$Start) df$Start <- df$Start - m df$End <- df$Start+df$Duration # Taking only the states inside a given range df <- df[df$Start>=range1 & df$End<=range2,] # Return data frame df } ######################################### ######################################### # Main ######################################### # Reading command line arguments args <- commandArgs(trailingOnly = TRUE) range1<-as.numeric(args[1]) if (range1==-1) range1<-Inf range2<-as.numeric(args[2]) if (range2==-1) range2<-Inf name<-strsplit(args[3], ",") outputfile<-args[4] # Reading first file filename<-args[5] df<-read_df(filename,range1,range2) # Getting summary of the first file dfout<-ddply(df, c("Value"), summarize, Events_ = length(as.numeric(Duration)), Duration_ = sum(as.numeric(Duration))) names(dfout)<-c("Value",sprintf("Events_%s",filename),sprintf("Duration_%s",filename)) i=6 while (i <= length(args)) { # Reading next input file filename<-args[i] df<-read_df(filename,range1,range2) # Getting summary of the next file dp<-ddply(df, c("Value"), summarize, Events_ = length(as.numeric(Duration)), Duration_ = sum(as.numeric(Duration))) names(dp)<-c("Value",sprintf("Events_%s",filename),sprintf("Duration_%s",filename)) # Merging results into one single data frame if (nrow(dp)>0) { if (nrow(dfout)>0) dfout<-merge(dfout,dp, by = "Value", all=TRUE) else dfout<-dp } i <- i+1 } # Cosmetics: change NA to 0 dfout[is.na(dfout)] <- 0 # Error: if there is no results for a given range and state if (nrow(dfout)==0) stop("Result is empty!") # Write results into the new .csv file write.table(dfout, file=outputfile, row.names=FALSE, sep = ", ") starpu-1.4.9+dfsg/tools/starpu_paje_state_stats.in000077500000000000000000000070721507764646700224440ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2014-2014 Université Joseph Fourier # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for giving statistical analysis of the paje trace set -e # fail fast # File names outputfile="starpu_paje_state_stats.csv" r_script="$(dirname $(command -v $0))/starpu_paje_state_stats.R" r_input="" # Command line arguments range="0:-1" name="All" verbose=0 inputfiles="" help_script() { cat << EOF Give statistical analysis of the paje trace $0 [ options ] paje.trace [paje.trace2 ...] Options: -r To fix range x1:x2 ("-1" for infinity) -n To choose a certain state -v Print output to command line -h Show this message Examples: $0 example.native.trace $0 -r 100:300 -n FetchingInput -v example.native.trace example.simgrid.trace Report bugs to <@PACKAGE_BUGREPORT@> EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then help_script exit 0 fi while getopts "r:n:vh" opt; do case $opt in r) range="$OPTARG" ;; n) name="$OPTARG" ;; v) verbose=1 ;; h) help_script exit 4 ;; \?) echo "Invalid option: -$OPTARG" help_script exit 3 ;; esac done # Reading files that need to be analyzed shift $((OPTIND - 1)) inputfiles=$@ if [[ $# < 1 ]]; then echo "Error!" help_script exit 2 fi # Getting range range1=$(eval echo $range | cut -d: -f1) range2=$(eval echo $range | cut -d: -f2) ##################################### # Transforming input files into .csv for file in $inputfiles; do if [ ! -s $file ] then echo "Error: file $file does not exist!" exit 5 fi dir=$(dirname $file) # Sorting traces grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' $file > $dir/start.trace grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\)\>\)\)' -v $file > $dir/end.trace sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace if grep -q start_profiling $dir/endSorted.trace then echo Using start_profiling/stop_profiling trace selection. sed -ne '/start_profiling/,/stop_profiling/p' < $dir/endSorted.trace > $dir/endSorted2.trace else cp $dir/endSorted.trace $dir/endSorted2.trace fi cat $dir/start.trace $dir/endSorted2.trace > $dir/outputSorted.trace # Transferring to .csv pj_dump -n $dir/outputSorted.trace > $file.csv perl -i -ne 'print if /^State/' $file.csv r_input=$(eval echo "$r_input $file.csv") # Cleanup: delete temporary files rm -f $dir/outputSorted.trace rm -f $dir/start.trace rm -f $dir/end.trace rm -f $dir/endSorted.trace rm -f $dir/endSorted2.trace done ##################################### # Running R file to get actual results Rscript $r_script $range1 $range2 $name $outputfile $r_input # If verbose then write results to stdout if [[ $verbose == 1 ]]; then column -s, -t $outputfile fi starpu-1.4.9+dfsg/tools/starpu_paje_summary000077500000000000000000000056421507764646700211770ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2014-2014 Université Joseph Fourier # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for giving statistical analysis of the paje trace set -e # fail fast # File names SOURCE_DIR=$(dirname $0) outputfile="summary.html" analysis_script="$SOURCE_DIR/starpu_paje_summary.Rmd" analysis_input="" # Command line arguments inputfiles="" help_script() { cat << EOF Give statistical analysis of the paje trace Options: -h Show this message Examples: $0 example.native.trace $0 example.native.trace example.simgrid.trace Report bugs to EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (StarPU) 1.4.9" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then help_script exit 0 fi while getopts "h" opt; do case $opt in \?) echo "Invalid option: -$OPTARG" help_script exit 3 ;; esac done # Reading files that need to be analyzed shift $((OPTIND - 1)) inputfiles=$@ # Error if there is no input files specified if [[ $# < 1 ]]; then echo "Error!" help_script exit 2 fi ##################################### # Transforming input files into .csv for file in $inputfiles; do if [ ! -s $file ] then echo "Error: file $file does not exist!" exit 5 fi dir=$(dirname $file) # Sorting traces grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\)\>\)\)' $file > $dir/start.trace grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\|18\|19\)\>\)\)' -v $file > $dir/end.trace sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace cat $dir/start.trace $dir/endSorted.trace > $dir/outputSorted.trace # Transferring to .csv pj_dump -n $dir/outputSorted.trace > $file.csv perl -i -ne 'print if /^State/' $file.csv # Cleanup: delete temporary files rm -f $dir/outputSorted.trace rm -f $dir/start.trace rm -f $dir/end.trace rm -f $dir/endSorted.trace done analysis_input=`echo \"$inputfiles".csv\"" | sed 's/ */.csv", "/g'` ##################################### # Running analysis file to get actual results Rscript -e "library(knitr); input_traces = c($analysis_input) ; outputhtml='$outputfile';\ outputRmd = gsub('.html\$','.Rmd',outputhtml);\ knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)" starpu-1.4.9+dfsg/tools/starpu_paje_summary.Rmd000066400000000000000000000277641507764646700217260ustar00rootroot00000000000000# StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # ```{r Setup, echo=FALSE} opts_chunk$set(echo=FALSE) ``` ```{r Install_R_libraries} InstalledPackage <- function(package) { available <- suppressMessages(suppressWarnings(sapply(package, require, quietly = TRUE, character.only = TRUE, warn.conflicts = FALSE))) missing <- package[!available] if (length(missing) > 0) return(FALSE) return(TRUE) } CRANChoosen <- function() { return(getOption("repos")["CRAN"] != "@CRAN@") } UsePackage <- function(package, defaultCRANmirror = "http://cran.at.r-project.org") { if(!InstalledPackage(package)) { if(!CRANChoosen()) { chooseCRANmirror() if(!CRANChoosen()) { options(repos = c(CRAN = defaultCRANmirror)) } } suppressMessages(suppressWarnings(install.packages(package))) if(!InstalledPackage(package)) return(FALSE) } return(TRUE) } # Now install desired libraries libraries <- c("ggplot2", "plyr", "data.table", "RColorBrewer") for(libr in libraries) { if(!UsePackage(libr)) { stop("Error!", libr) } } ``` ```{r Load_R_files} # Load ggplot and plyr just for the following cases library(ggplot2) library(plyr) library(data.table) library(RColorBrewer) # Defining non-computation states: def_states<-c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing") # Function for reading .csv file read_df <- function(file,range1,range2) { df<-read.csv(file, header=FALSE, strip.white=TRUE) names(df) <- c("Nature","ResourceId","Type","Start","End","Duration", "Depth", "Value") df = df[!(names(df) %in% c("Nature","Type", "Depth"))] df$Origin<-as.factor(as.character(file)) # Changing names if needed: df$Value <- as.character(df$Value) df$Value <- ifelse(df$Value == "F", "Freeing", as.character(df$Value)) df$Value <- ifelse(df$Value == "A", "Allocating", as.character(df$Value)) df$Value <- ifelse(df$Value == "W", "WritingBack", as.character(df$Value)) df$Value <- ifelse(df$Value == "No", "Nothing", as.character(df$Value)) df$Value <- ifelse(df$Value == "I", "Initializing", as.character(df$Value)) df$Value <- ifelse(df$Value == "D", "Deinitializing", as.character(df$Value)) df$Value <- ifelse(df$Value == "Fi", "FetchingInput", as.character(df$Value)) df$Value <- ifelse(df$Value == "Po", "PushingOutput", as.character(df$Value)) df$Value <- ifelse(df$Value == "C", "Callback", as.character(df$Value)) df$Value <- ifelse(df$Value == "B", "Overhead", as.character(df$Value)) df$Value <- ifelse(df$Value == "Sc", "Scheduling", as.character(df$Value)) df$Value <- ifelse(df$Value == "E", "Executing", as.character(df$Value)) df$Value <- ifelse(df$Value == "Sl", "Sleeping", as.character(df$Value)) df$Value <- ifelse(df$Value == "P", "Progressing", as.character(df$Value)) df$Value <- ifelse(df$Value == "U", "Unpartitioning", as.character(df$Value)) df$Value <- ifelse(df$Value == "Ar", "AllocatingReuse", as.character(df$Value)) df$Value <- ifelse(df$Value == "R", "Reclaiming", as.character(df$Value)) df$Value <- ifelse(df$Value == "Co", "DriverCopy", as.character(df$Value)) df$Value <- ifelse(df$Value == "CoA", "DriverCopyAsync", as.character(df$Value)) # Small cleanup df$Start<-round(df$Start,digit=1) df$End<-round(df$End,digit=1) df$ResourceId<-as.factor(df$ResourceId) df$Value<-as.factor(df$Value) # Start from zero m <- min(df$Start) df$Start <- df$Start - m df$End <- df$Start+df$Duration # Return data frame df } ``` ```{r Load_traces} df<-data.frame() if( !exists("input_traces") ) input_traces<-c("example.native.trace.csv", "example.simgrid.trace.csv") for (i in 1:length(input_traces)){ dfs<-read_df(input_traces[i]) df<-rbindlist(list(df,dfs)) } # Color palettes colourCount = length(unique(df$Value)) getPalette = colorRampPalette(brewer.pal(9, "Set1")) # Order of Value so we can have good colors ker_states<-as.character(unique(df[!(df$Value %in% def_states),Value])) ordered_states<-append(sort(ker_states), def_states) df$Value <- factor(df$Value, levels=ordered_states) # Order of ResourceId so we can have y-axis df$ResourceId <- factor(df$ResourceId, levels=sort(as.character(unique(df$ResourceId)))) ``` # Introduction This document presents a basic analysis of multiple StarPU traces. First, paje *traces* will be transferred into *.csv* files and then we analyze them with **R**. This summary is a first step that should help researchers verify their hypothesis or find problematic areas that require more exhaustive investigation. Be cautious, as the following results are only a brief analysis of the traces and many important phenomena could still be hidden. Also, be very careful when comparing different states or traces. Even though some large discrepancies can be irrelevant, in other cases even the smallest differences can be essential in understanding what exactly happened during the StarPU execution. ### How to compile ./starpu_summary.sh example.native.trace example.simgrid.trace ### Software dependencies In order to run this analysis you need to have R installed: sudo apt-get install r-base Easiest way to transform *paje* traces generated by StarPU to *.csv* is to use *pjdump* program (), so we encourage users to install it. When R is installed, one will need to start R (e.g., from terminal) and install *knitr* package: R> install.packages("knitr") Additional R packages used in this analysis (*ggplot2, plyr, data.table, RColorBrewer*) will be installed automatically when the document is compiled for the first time. If there is any trouble, install them by hand directly from R (the same way as *knitr*) # Gantt Charts of the whole Trace First, we show a simple gantt chart of every trace. X-axis is a simple timeline of the execution, *Resources* on y-axis correspond to different CPUs/GPUs that were used and finally different colors represent different *States* of the application. This kind of figures can often point to the idle time or synchronization problems. Small disadvantage is that in most cases there are too many states, thus it is impossible to display them all on a single plot without aggregation. Therefore for any strange behavior at a certain part of the trace, we strongly suggest to zoom on the interval it occurred. ```{r Gantt1} ggplot(df,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") + facet_wrap(~Origin,ncol=1,scale="free_y") ``` Second, we will concentrate only on computation kernel states, to get rid of visualization artifacts that can be introduced by other (sometimes irrelevant) states. Normally, this plot should not be too different from the previous one. ```{r Gantt2} # Select only computation kernels df1 <- df[!(df$Value %in% c("Initializing","Deinitializing","Overhead","Nothing","Sleeping","Freeing","Allocating","WritingBack","FetchingInput","PushingOutput","Callback","Progressing","Unpartitioning","AllocatingReuse","Reclaiming","DriverCopy","DriverCopyAsync","Scheduling","Executing")),] # Start from zero m <- min(df1$Start) df1$Start <- df1$Start - m df1$End <- df1$Start+df1$Duration # Plot ggplot(df1,aes(x=Start,xend=End, y=factor(ResourceId), yend=factor(ResourceId),color=Value)) + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + geom_segment(size=8) + ylab("Resource") + xlab("Time [ms]") + facet_wrap(~Origin,ncol=1,scale="free_y") ``` # Table Summary Here we present how much time application spent in each state (OverallDuration), how many times it was in that state (Count), mean and median values of duration (Mean and Median), and finally what is a standard deviation (StandDev). General information provided by this table can sometimes give an idea to application experts which parts of code are not working as desired. Be aware that this kind of tables hide many important things, such as outliers, multiple modes, etc. ```{r Table} options(width=120) ddply(df,.(Value,Origin), summarize, OverallDuration=sum(Duration), Count=length(Duration), Mean=mean(Duration), Median=median(Duration), StandDev=sd(Duration)) ``` # State Duration during the Execution Time Now, we show how duration of each state was changing during the execution. This can display a general behavior of a state; show if there are outliers or multiple modes; are some events occurring in groups, etc. . It can also suggest a strange behavior of a state during a certain time interval, which should be later investigated more carefully. However, since each event is represented by a single point (and there is no "alpha" factor), those events that happen almost simultaneously are overplotted. Therefore density of events along execution time may not be easy to read. ```{r Dur} ggplot(df,aes(x=Start,y=Duration)) + geom_point(aes(color=Value)) + theme_bw() + scale_color_manual(name="State",values=getPalette(colourCount)) + ggtitle("State Duration during the Execution Time") + theme(legend.position="none") + ylab("Duration [ms]") + xlab("Time [ms]") + facet_grid(Value~Origin, scale="free_y") ``` # Distribution Histograms Finally, we show a distribution of *Duration* for each state in form of histograms. X-axis is partitioned into bins with equidistant time intervals in milliseconds, while y-axis represents the number of occurrences inside such intervals for a certain state. Note that for the first plot y-axis is not fixed, meaning that the scale changes from one row to another. This plot allows to not only to see what was the most frequent duration of a state, but also to compare duration between different states. ```{r Hist1} ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Value~Origin,scales = "free_y") ``` Similar to the previous figure, only now traces are showed vertically instead of horizontally. Note that for this plot x-axis is not fixed, meaning that the scale changes from one column to another. This plot allows to compare frequency of different states and in case of multiple traces to easily compare duration distribution for each state. ```{r Hist2} ggplot(df, aes(x=Duration)) + geom_histogram(aes(y=..count..,fill=factor(Value)),binwidth = diff(range(df$Duration))/30) + theme_bw() + scale_fill_manual(name="State",values=getPalette(colourCount)) + ggtitle("Histograms for State Distribution") + ylab("Count") + xlab("Duration [ms]") + theme(legend.position="none") + facet_grid(Origin~Value,scales = "free_x") ``` starpu-1.4.9+dfsg/tools/starpu_paje_summary.in000077500000000000000000000056641507764646700216100ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2014-2014 Université Joseph Fourier # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for giving statistical analysis of the paje trace set -e # fail fast # File names SOURCE_DIR=$(dirname $0) outputfile="summary.html" analysis_script="$SOURCE_DIR/starpu_paje_summary.Rmd" analysis_input="" # Command line arguments inputfiles="" help_script() { cat << EOF Give statistical analysis of the paje trace Options: -h Show this message Examples: $0 example.native.trace $0 example.native.trace example.simgrid.trace Report bugs to <@PACKAGE_BUGREPORT@> EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then help_script exit 0 fi while getopts "h" opt; do case $opt in \?) echo "Invalid option: -$OPTARG" help_script exit 3 ;; esac done # Reading files that need to be analyzed shift $((OPTIND - 1)) inputfiles=$@ # Error if there is no input files specified if [[ $# < 1 ]]; then echo "Error!" help_script exit 2 fi ##################################### # Transforming input files into .csv for file in $inputfiles; do if [ ! -s $file ] then echo "Error: file $file does not exist!" exit 5 fi dir=$(dirname $file) # Sorting traces grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\)\>\)\)' $file > $dir/start.trace grep -e '^\(\(%\)\|\(\(0\|1\|2\|3\|4\|5\|6\|7\|9\|18\|19\)\>\)\)' -v $file > $dir/end.trace sort -s -V --key=2,2 $dir/end.trace > $dir/endSorted.trace cat $dir/start.trace $dir/endSorted.trace > $dir/outputSorted.trace # Transferring to .csv pj_dump -n $dir/outputSorted.trace > $file.csv perl -i -ne 'print if /^State/' $file.csv # Cleanup: delete temporary files rm -f $dir/outputSorted.trace rm -f $dir/start.trace rm -f $dir/end.trace rm -f $dir/endSorted.trace done analysis_input=`echo \"$inputfiles".csv\"" | sed 's/ */.csv", "/g'` ##################################### # Running analysis file to get actual results Rscript -e "library(knitr); input_traces = c($analysis_input) ; outputhtml='$outputfile';\ outputRmd = gsub('.html\$','.Rmd',outputhtml);\ knit('$analysis_script',output=outputRmd); knitr::knit2html(outputRmd)" starpu-1.4.9+dfsg/tools/starpu_perfmodel_display.1000066400000000000000000000020101507764646700223230ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_PERFMODEL_DISPLAY "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_perfmodel_display \- Display StarPU performance model .SH SYNOPSIS .B starpu_perfmodel_display [ \fI\,options \/\fR] .SH DESCRIPTION Display a given perfmodel .PP One must specify either \fB\-l\fR or \fB\-s\fR. \fB\-x\fR can be used with \fB\-s\fR Options: .TP \fB\-l\fR display all available models .TP \fB\-s\fR specify the symbol .TP \fB\-x\fR display output in XML format .TP \fB\-p\fR specify the parameter (e.g. a, b, c, mean, stddev) .TP \fB\-a\fR specify the architecture (e.g. cpu, cpu:k, cuda) .TP \fB\-f\fR display the history\-based model for the specified footprint .TP \fB\-d\fR display the directory storing performance models .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .SH "REPORTING BUGS" Report bugs to . starpu-1.4.9+dfsg/tools/starpu_perfmodel_display.c000066400000000000000000000117351507764646700224230ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #if defined(_WIN32) && !defined(__CYGWIN__) #include #endif #define PROGNAME "starpu_perfmodel_display" /* XML format */ static int xml = 0; /* display all available models */ static int plist = 0; /* display directory */ static int pdirectory = 0; /* what kernel ? */ static char *psymbol = NULL; /* what parameter should be displayed ? (NULL = all) */ static char *pparameter = NULL; /* which architecture ? (NULL = all)*/ static char *parch = NULL; /* should we display a specific footprint ? */ static unsigned pdisplay_specific_footprint; static uint32_t pspecific_footprint; static void usage() { fprintf(stderr, "Display a given perfmodel\n\n"); fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME); fprintf(stderr, "\n"); fprintf(stderr, "One must specify either -l or -s. -x can be used with -s\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -l display all available models\n"); fprintf(stderr, " -s specify the symbol\n"); fprintf(stderr, " -x display output in XML format\n"); fprintf(stderr, " -p specify the parameter (e.g. a, b, c, mean, stddev)\n"); fprintf(stderr, " -a specify the architecture (e.g. cpu, cpu:k, cuda)\n"); fprintf(stderr, " -f display the history-based model for the specified footprint\n"); fprintf(stderr, " -d display the directory storing performance models\n"); fprintf(stderr, " -h, --help display this help and exit\n"); fprintf(stderr, " -v, --version output version information and exit\n\n"); fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); fprintf(stderr, "\n"); } static void parse_args(int argc, char **argv) { int c; int res; static struct option long_options[] = { {"arch", required_argument, NULL, 'a'}, {"footprint", required_argument, NULL, 'f'}, {"help", no_argument, NULL, 'h'}, /* XXX Would be cleaner to set a flag */ {"list", no_argument, NULL, 'l'}, {"dir", no_argument, NULL, 'd'}, {"parameter", required_argument, NULL, 'p'}, {"symbol", required_argument, NULL, 's'}, {"version", no_argument, NULL, 'v'}, {0, 0, 0, 0} }; int option_index; while ((c = getopt_long(argc, argv, "dls:p:a:f:hx", long_options, &option_index)) != -1) { switch (c) { case 'l': /* list all models */ plist = 1; break; case 's': /* symbol */ psymbol = optarg; break; case 'p': /* parameter (eg. a, b, c, mean, stddev) */ pparameter = optarg; break; case 'a': /* architecture (cpu, cuda) */ parch = optarg; break; case 'f': /* footprint */ pdisplay_specific_footprint = 1; res = sscanf(optarg, "%08x", &pspecific_footprint); STARPU_ASSERT(res==1); break; case 'd': /* directory */ pdirectory = 1; break; case 'x': /* symbol */ xml = 1; break; case 'h': usage(); exit(EXIT_SUCCESS); case 'v': fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); exit(EXIT_SUCCESS); case '?': default: fprintf(stderr, "Unrecognized option: -%c\n", optopt); } } if (!psymbol && !plist && !pdirectory) { fprintf(stderr, "Incorrect usage, aborting\n"); usage(); exit(-1); } } int main(int argc, char **argv) { #if defined(_WIN32) && !defined(__CYGWIN__) WSADATA wsadata; WSAStartup(MAKEWORD(1,0), &wsadata); #endif parse_args(argc, argv); starpu_drivers_preinit(); starpu_perfmodel_initialize(); if (plist) { starpu_perfmodel_list(stdout); } else if (pdirectory) { starpu_perfmodel_directory(stdout); } else { struct starpu_perfmodel model = { .type = STARPU_PERFMODEL_INVALID }; int ret = starpu_perfmodel_load_symbol(psymbol, &model); if (ret == 1) { fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", psymbol); return 1; } if (xml) { starpu_perfmodel_dump_xml(stdout, &model); } else { uint32_t *footprint = NULL; if (pdisplay_specific_footprint == 1) { footprint = &pspecific_footprint; } starpu_perfmodel_print_all(&model, parch, pparameter, footprint, stdout); } starpu_perfmodel_unload_model(&model); } starpu_perfmodel_free_sampling(); return 0; } starpu-1.4.9+dfsg/tools/starpu_perfmodel_plot.1000066400000000000000000000024561507764646700216520ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_PERFMODEL_PLOT "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_perfmodel_plot \- Plot StarPU performance model .SH SYNOPSIS .B starpu_perfmodel_plot [ \fI\,options \/\fR] .SH DESCRIPTION Draw a graph corresponding to the execution time of a given perfmodel .PP One must specify a symbol with the \fB\-s\fR option or use \fB\-l\fR or \fB\-d\fR Options: .TP \fB\-d\fR display the directory storing performance models .TP \fB\-l\fR display all available models .TP \fB\-s\fR specify the symbol .TP \fB\-e\fR display perfmodel as energy instead of time .TP \fB\-se\fR specify both a time symbol and an energy symbol .TP \fB\-f\fR draw GFlop/s instead of time .TP \fB\-i\fR input FxT files generated by StarPU .TP \fB\-lc\fR display all combinations of a given model .TP \fB\-c\fR specify the combination (use the option \fB\-lc\fR to list all combinations of a given model) .TP \fB\-o\fR specify directory in which to create output files (current directory by default) .TP \fB\-h\fR, \fB\-\-help\fR display this help and exit .TP \fB\-v\fR, \fB\-\-version\fR output version information and exit .SH "REPORTING BUGS" Report bugs to . starpu-1.4.9+dfsg/tools/starpu_perfmodel_plot.c000066400000000000000000000503221507764646700217270ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2013-2013 Thibaut Lambert * Copyright (C) 2011-2011 Télécom Sud Paris * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #ifdef STARPU_USE_FXT #include #endif #include #include #include // we need to browse the list associated to history-based models #if defined(_WIN32) && !defined(__CYGWIN__) #include #endif #define PROGNAME "starpu_perfmodel_plot" struct _perfmodel_plot_options { /* display all available models */ int list; /* display directory */ int directory; /* what kernel ? */ char *symbol; /* what energy model ? */ char *energy_symbol; /* which combination */ int comb_is_set; int comb; /* display all available combinations of a specific model */ int list_combs; int gflops; int energy; /* Unless a FxT file is specified, we just display the model */ int with_fxt_file; char avg_file_name[256]; #ifdef STARPU_USE_FXT struct starpu_fxt_codelet_event *dumped_codelets; struct starpu_fxt_options fxt_options; char data_file_name[256]; #endif }; static void usage() { fprintf(stderr, "Draw a graph corresponding to the execution time of a given perfmodel\n"); fprintf(stderr, "Usage: %s [ options ]\n", PROGNAME); fprintf(stderr, "\n"); fprintf(stderr, "One must specify a symbol with the -s option or use -l or -d\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -d display the directory storing performance models\n"); fprintf(stderr, " -l display all available models\n"); fprintf(stderr, " -s specify the symbol\n"); fprintf(stderr, " -e display perfmodel as energy instead of time\n"); fprintf(stderr, " -se \n"); fprintf(stderr, " specify both a time symbol and an energy symbol\n"); fprintf(stderr, " -f draw GFlop/s instead of time\n"); fprintf(stderr, " -i input FxT files generated by StarPU\n"); fprintf(stderr, " -lc display all combinations of a given model\n"); fprintf(stderr, " -c specify the combination (use the option -lc to list all combinations of a given model)\n"); fprintf(stderr, " -o specify directory in which to create output files (current directory by default)\n"); fprintf(stderr, " -h, --help display this help and exit\n"); fprintf(stderr, " -v, --version output version information and exit\n\n"); fprintf(stderr, "Report bugs to <%s>.", PACKAGE_BUGREPORT); fprintf(stderr, "\n"); } static void parse_args(int argc, char **argv, struct _perfmodel_plot_options *options, char **directory) { int correct_usage = 0; memset(options, 0, sizeof(struct _perfmodel_plot_options)); #ifdef STARPU_USE_FXT /* Default options */ starpu_fxt_options_init(&options->fxt_options); free(options->fxt_options.out_paje_path); options->fxt_options.out_paje_path = NULL; free(options->fxt_options.activity_path); options->fxt_options.activity_path = NULL; free(options->fxt_options.distrib_time_path); options->fxt_options.distrib_time_path = NULL; free(options->fxt_options.dag_path); options->fxt_options.dag_path = NULL; options->fxt_options.dumped_codelets = &options->dumped_codelets; #endif /* We want to support arguments such as "-i trace_*" */ unsigned reading_input_filenames = 0; int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-s") == 0) { if (i >= argc-1) { fprintf(stderr,"-s requires an argument\n"); usage(); exit(EXIT_FAILURE); } options->symbol = argv[++i]; correct_usage = 1; continue; } if (strcmp(argv[i], "-se") == 0) { if (i >= argc-2) { fprintf(stderr,"-se requires two arguments\n"); usage(); exit(EXIT_FAILURE); } options->symbol = argv[++i]; options->energy_symbol = argv[++i]; correct_usage = 1; continue; } if (strcmp(argv[i], "-o") == 0) { free(*directory); *directory = strdup(argv[++i]); #ifdef STARPU_USE_FXT options->fxt_options.dir = strdup(*directory); #endif continue; } if (strcmp(argv[i], "-i") == 0) { if (i >= argc-1) { fprintf(stderr,"-i requires an argument\n"); usage(); exit(EXIT_FAILURE); } reading_input_filenames = 1; #ifdef STARPU_USE_FXT options->fxt_options.filenames[options->fxt_options.ninputfiles++] = argv[++i]; options->with_fxt_file = 1; #else fprintf(stderr, "Warning: FxT support was not enabled in StarPU: FxT traces will thus be ignored!\n"); #endif continue; } if (strcmp(argv[i], "-l") == 0) { options->list = 1; correct_usage = 1; continue; } if (strcmp(argv[i], "-lc") == 0) { options->list_combs = 1; continue; } if (strcmp(argv[i], "-f") == 0) { options->gflops = 1; continue; } if (strcmp(argv[i], "-e") == 0) { options->energy = 1; continue; } if (strcmp(argv[i], "-c") == 0) { if (i >= argc-1) { fprintf(stderr,"-c requires an argument\n"); usage(); exit(EXIT_FAILURE); } options->comb_is_set = 1; options->comb = atoi(argv[++i]); continue; } if (strcmp(argv[i], "-d") == 0) { options->directory = 1; correct_usage = 1; continue; } if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { usage(); exit(EXIT_SUCCESS); } if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) { fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); exit(EXIT_SUCCESS); } /* If the reading_input_filenames flag is set, and that the * argument does not match an option, we assume this may be * another filename */ if (reading_input_filenames) { #ifdef STARPU_USE_FXT options->fxt_options.filenames[options->fxt_options.ninputfiles++] = argv[i]; #endif continue; } } if (correct_usage == 0) { fprintf(stderr, "Incorrect usage, aborting\n"); usage(); exit(-1); } } static char *replace_char(char *str, char old, char new) { char *p = strdup(str); char *ptr = p; while (*ptr) { if (*ptr == old) *ptr = new; ptr ++; } return p; } static void print_comma(FILE *gnuplot_file, int *first) { if (*first) { *first = 0; } else { fprintf(gnuplot_file, ",\\\n\t"); } } static void display_perf_model(FILE *gnuplot_file, struct starpu_perfmodel_arch* arch, struct starpu_perfmodel_per_arch *arch_model, int impl, int *first, struct _perfmodel_plot_options *options) { char arch_name[256]; const char *factor; if (options->energy) factor = ""; else factor = "0.001 * "; starpu_perfmodel_get_arch_name(arch, arch_name, 256, impl); #ifdef STARPU_USE_FXT if (options->with_fxt_file && impl == 0) { if (options->gflops) { _STARPU_DISP("gflops unit selected, ignoring fxt trace\n"); } else { char *arch_name2 = replace_char(arch_name, '_', '-'); print_comma(gnuplot_file, first); fprintf(gnuplot_file, "\"< grep '^%s' %s\" using 3:4 title \"Profiling %s\"", arch_name, options->data_file_name, arch_name2); free(arch_name2); } } #endif /* Only display the regression model if we could actually build a model */ if (!options->gflops && arch_model->regression.valid && !arch_model->regression.nl_valid) { print_comma(gnuplot_file, first); fprintf(stderr, "\tLinear: y = alpha size ^ beta\n"); fprintf(stderr, "\t\talpha = %e\n", arch_model->regression.alpha * 0.001); fprintf(stderr, "\t\tbeta = %e\n", arch_model->regression.beta); fprintf(gnuplot_file, "%s%g * x ** %g title \"Linear Regression %s\"", factor, arch_model->regression.alpha, arch_model->regression.beta, arch_name); } if (!options->gflops && arch_model->regression.nl_valid) { print_comma(gnuplot_file, first); fprintf(stderr, "\tNon-Linear: y = a size ^b + c\n"); fprintf(stderr, "\t\ta = %e\n", arch_model->regression.a * 0.001); fprintf(stderr, "\t\tb = %e\n", arch_model->regression.b); fprintf(stderr, "\t\tc = %e\n", arch_model->regression.c * 0.001); fprintf(gnuplot_file, "%s%g * x ** %g + %s%g title \"Non-Linear Regression %s\"", factor, arch_model->regression.a, arch_model->regression.b, factor, arch_model->regression.c, arch_name); } } static void display_history_based_perf_models(FILE *gnuplot_file, struct starpu_perfmodel *model, struct starpu_perfmodel *energy_model, int *first, struct _perfmodel_plot_options *options) { FILE *datafile; struct starpu_perfmodel_history_list *ptr; char arch_name[32]; int col; unsigned long minimum = 0; datafile = fopen(options->avg_file_name, "w"); col = 2; int i; for(i = 0; i < model->state->ncombs; i++) { int comb = model->state->combs[i]; if (options->comb_is_set == 0 || options->comb == comb) { struct starpu_perfmodel_arch *arch; int impl; arch = starpu_perfmodel_arch_comb_fetch(comb); for(impl = 0; impl < model->state->nimpls[comb]; impl++) { struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][impl]; starpu_perfmodel_get_arch_name(arch, arch_name, 32, impl); if (arch_model->list) { char *arch_name2 = replace_char(arch_name, '_', '-'); print_comma(gnuplot_file, first); fprintf(gnuplot_file, "\"%s\" using 1:%d:%d with errorlines title \"Average %s\"", options->avg_file_name, col, col+1, arch_name2); col += 2; free(arch_name2); } } } } /* Dump entries in size order */ while (1) { unsigned long last = minimum; minimum = ULONG_MAX; /* Get the next minimum */ for(i = 0; i < model->state->ncombs; i++) { int comb = model->state->combs[i]; if (options->comb_is_set == 0 || options->comb == comb) { int impl; for(impl = 0; impl < model->state->nimpls[comb]; impl++) { struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][impl]; for (ptr = arch_model->list; ptr; ptr = ptr->next) { unsigned long size = ptr->entry->size; if (size > last && size < minimum) minimum = size; } } } } if (minimum == ULONG_MAX) break; fprintf(stderr, "%lu ", minimum); fprintf(datafile, "%-15lu ", minimum); /* Find that minimum */ for(i = 0; i < model->state->ncombs; i++) { int comb = model->state->combs[i]; if (options->comb_is_set == 0 || options->comb == comb) { int impl; for(impl = 0; impl < model->state->nimpls[comb]; impl++) { int found = 0; struct starpu_perfmodel_per_arch *arch_model = &model->state->per_arch[comb][impl]; for (ptr = arch_model->list; ptr; ptr = ptr->next) { struct starpu_perfmodel_history_entry *entry = ptr->entry; if (entry->size == minimum) { if (options->energy_symbol) { /* Look for the same in the energy model */ if (impl >= energy_model->state->nimpls[comb]) /* Doesn't have measurements for this impl */ break; struct starpu_perfmodel_per_arch *arch_model2 = &energy_model->state->per_arch[comb][impl]; struct starpu_perfmodel_history_list *ptr2; for (ptr2 = arch_model2->list; ptr2; ptr2 = ptr2->next) { struct starpu_perfmodel_history_entry *entry2 = ptr2->entry; if (entry2->size == minimum) { /* Found the same size, can print */ double rel_delta = sqrt( (entry2->deviation * entry2->deviation) / (entry2->mean * entry2->mean) + (entry->deviation * entry->deviation) / (entry->mean * entry->mean)); fprintf(datafile, "\t%-15le\t%-15le", entry2->mean / (entry->mean / 1000000), entry2->mean / (entry->mean / 1000000) * rel_delta); found = 1; break; } } } else { if (options->gflops) if (options->energy) fprintf(datafile, "\t%-15le\t%-15le", entry->flops / entry->mean / 1000000000, entry->flops * entry->deviation / (entry->mean * entry->mean) / 1000000000 ); else fprintf(datafile, "\t%-15le\t%-15le", entry->flops / (entry->mean * 1000), entry->flops * entry->deviation / (entry->mean * entry->mean * 1000) ); else if (options->energy) fprintf(datafile, "\t%-15le\t%-15le", entry->mean, entry->deviation); else fprintf(datafile, "\t%-15le\t%-15le", 0.001*entry->mean, 0.001*entry->deviation); found = 1; } break; } } if (!found && arch_model->list) /* No value for this arch. */ fprintf(datafile, "\t\"\"\t\"\""); } } } fprintf(datafile, "\n"); } fprintf(stderr, "\n"); fclose(datafile); } static void display_all_perf_models(FILE *gnuplot_file, struct starpu_perfmodel *model, int *first, struct _perfmodel_plot_options *options) { int i; for(i = 0; i < model->state->ncombs; i++) { int comb = model->state->combs[i]; if (options->comb_is_set == 0 || options->comb == comb) { struct starpu_perfmodel_arch *arch; int impl; arch = starpu_perfmodel_arch_comb_fetch(comb); for(impl = 0; impl < model->state->nimpls[comb]; impl++) { struct starpu_perfmodel_per_arch *archmodel = &model->state->per_arch[comb][impl]; display_perf_model(gnuplot_file, arch, archmodel, impl, first, options); } } } } #ifdef STARPU_USE_FXT static void dump_data_file(FILE *data_file, struct _perfmodel_plot_options *options) { int i; for (i = 0; i < options->fxt_options.dumped_codelets_count; i++) { /* Dump only if the codelet symbol matches user's request (with or without the machine name) */ char *tmp = strdup(options->symbol); char *dot = strchr(tmp, '.'); if (dot) tmp[strlen(tmp)-strlen(dot)] = '\0'; if ((strncmp(options->dumped_codelets[i].symbol, options->symbol, (FXT_MAX_PARAMS - 4)*sizeof(unsigned long)-1) == 0) || (strncmp(options->dumped_codelets[i].symbol, tmp, (FXT_MAX_PARAMS - 4)*sizeof(unsigned long)-1) == 0)) { char *archname = options->dumped_codelets[i].perfmodel_archname; size_t size = options->dumped_codelets[i].size; float time = options->dumped_codelets[i].time; fprintf(data_file, "%s %f %f\n", archname, (float)size, time); } free(tmp); } free(options->dumped_codelets); } #endif static void display_selected_models(FILE *gnuplot_file, struct starpu_perfmodel *model, struct starpu_perfmodel *energy_model, struct _perfmodel_plot_options *options) { char hostname[64]; char *symbol = replace_char(options->symbol, '_', '-'); _starpu_gethostname(hostname, sizeof(hostname)); fprintf(gnuplot_file, "#!/usr/bin/gnuplot -persist\n"); fprintf(gnuplot_file, "\n"); fprintf(gnuplot_file, "set term postscript eps enhanced color\n"); fprintf(gnuplot_file, "set output \"starpu_%s%s.eps\"\n", options->energy_symbol?"power_":options->gflops?"gflops_":"", options->symbol); fprintf(gnuplot_file, "set title \"Model for codelet %s on %s\"\n", symbol, hostname); fprintf(gnuplot_file, "set xlabel \"Total data size\"\n"); if (options->energy_symbol) fprintf(gnuplot_file, "set ylabel \"Power (W)\"\n"); else if (options->gflops) if (options->energy) fprintf(gnuplot_file, "set ylabel \"GFlop/s/W\"\n"); else fprintf(gnuplot_file, "set ylabel \"GFlop/s\"\n"); else if (options->energy) fprintf(gnuplot_file, "set ylabel \"Energy (J)\"\n"); else fprintf(gnuplot_file, "set ylabel \"Time (ms)\"\n"); fprintf(gnuplot_file, "\n"); fprintf(gnuplot_file, "set key top left\n"); fprintf(gnuplot_file, "set logscale x\n"); fprintf(gnuplot_file, "set logscale y\n"); fprintf(gnuplot_file, "\n"); /* If no input data is given to gnuplot, we at least need to specify an * arbitrary range. */ if (options->with_fxt_file == 0 || options->gflops) fprintf(gnuplot_file, "set xrange [1 < * < 10**9 : 1 < * < 10**9]\n\n"); int first = 1; fprintf(gnuplot_file, "plot\t"); /* display all or selected combinations */ if (!options->energy_symbol) display_all_perf_models(gnuplot_file, model, &first, options); display_history_based_perf_models(gnuplot_file, model, energy_model, &first, options); fprintf(gnuplot_file, "\nset term png\n"); fprintf(gnuplot_file, "set output \"starpu_%s%s.png\"\n", options->energy_symbol?"power_":options->gflops?"gflops_":"", options->symbol); fprintf(gnuplot_file, "replot\n"); free(symbol); } int main(int argc, char **argv) { int ret = 0; struct starpu_perfmodel model = { .type = STARPU_PERFMODEL_INVALID }; struct starpu_perfmodel energy_model = { .type = STARPU_PERFMODEL_INVALID }; char gnuplot_file_name[256]; struct _perfmodel_plot_options options; char *directory = strdup("./"); #if defined(_WIN32) && !defined(__CYGWIN__) WSADATA wsadata; WSAStartup(MAKEWORD(1,0), &wsadata); #endif parse_args(argc, argv, &options, &directory); starpu_drivers_preinit(); starpu_perfmodel_initialize(); if (options.directory) { starpu_perfmodel_directory(stdout); } else if (options.list) { ret = starpu_perfmodel_list(stdout); if (ret) { _STARPU_DISP("The performance model directory is invalid\n"); } } else { /* Load the performance model associated to the symbol */ ret = starpu_perfmodel_load_symbol(options.symbol, &model); if (options.energy_symbol) ret = starpu_perfmodel_load_symbol(options.energy_symbol, &energy_model); if (ret) { _STARPU_DISP("The performance model for the symbol <%s> could not be loaded\n", options.symbol); } else if (options.list_combs) { ret = starpu_perfmodel_list_combs(stdout, &model); if (ret) { fprintf(stderr, "Error when listing combinations for model <%s>\n", options.symbol); } } else { /* If some FxT input was specified, we put the points on the graph */ #ifdef STARPU_USE_FXT if (options.with_fxt_file) { starpu_fxt_generate_trace(&options.fxt_options); snprintf(options.data_file_name, sizeof(options.data_file_name), "%s/starpu_%s.data", directory, options.symbol); FILE *data_file = fopen(options.data_file_name, "w+"); STARPU_ASSERT(data_file); dump_data_file(data_file, &options); fclose(data_file); } #endif if (options.energy_symbol) { snprintf(gnuplot_file_name, sizeof(gnuplot_file_name), "%s/starpu_power_%s.gp", directory, options.symbol); snprintf(options.avg_file_name, sizeof(options.avg_file_name), "%s/starpu_power_%s_avg.data", directory, options.symbol); } else if (options.gflops) { snprintf(gnuplot_file_name, sizeof(gnuplot_file_name), "%s/starpu_gflops_%s.gp", directory, options.symbol); snprintf(options.avg_file_name, sizeof(options.avg_file_name), "%s/starpu_gflops_%s_avg.data", directory, options.symbol); } else { snprintf(gnuplot_file_name, sizeof(gnuplot_file_name), "%s/starpu_%s.gp", directory, options.symbol); snprintf(options.avg_file_name, sizeof(options.avg_file_name), "%s/starpu_%s_avg.data", directory, options.symbol); } FILE *gnuplot_file = fopen(gnuplot_file_name, "w+"); STARPU_ASSERT_MSG(gnuplot_file, "Cannot create file <%s>\n", gnuplot_file_name); display_selected_models(gnuplot_file, &model, &energy_model, &options); fprintf(gnuplot_file,"\n"); fclose(gnuplot_file); /* Retrieve the current mode of the gnuplot executable */ struct stat sb; ret = stat(gnuplot_file_name, &sb); if (ret) { perror("stat"); STARPU_ABORT(); } /* Make the gnuplot script executable */ ret = chmod(gnuplot_file_name, sb.st_mode|S_IXUSR #ifdef S_IXGRP |S_IXGRP #endif #ifdef S_IXOTH |S_IXOTH #endif ); if (ret) { perror("chmod"); STARPU_ABORT(); } _STARPU_DISP("Gnuplot file <%s> generated\n", gnuplot_file_name); } starpu_perfmodel_unload_model(&model); if (options.energy_symbol) starpu_perfmodel_unload_model(&energy_model); } starpu_perfmodel_free_sampling(); free(directory); #ifdef STARPU_USE_FXT free(options.fxt_options.dir); starpu_fxt_options_shutdown(&options.fxt_options); #endif return ret; } starpu-1.4.9+dfsg/tools/starpu_perfmodel_recdump.c000066400000000000000000000301461507764646700224120ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2011-2011 Télécom Sud Paris * Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #if !defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__) #include #include #endif #include #include #include #include #include #include #include #include // we need to browse the list associated to history-based models // just like in starpu_perfmodel_plot #include #define STRHEADCMP(s, head) strncmp(s, head, strlen(head)) #if defined(_WIN32) && !defined(__CYGWIN__) #include #endif #define PROGNAME "starpu_perfmodel_recdump" struct _footprint_list { struct _footprint_list* next; uint32_t footprint; }; struct _footprint_list* add_footprint(struct _footprint_list* list, uint32_t footprint) { struct _footprint_list * l = list; while(l) { if(l->footprint == footprint) break; l = l->next; } if(l) return list; else { struct _footprint_list *res; _STARPU_MALLOC(res, sizeof(struct _footprint_list)); res->footprint = footprint; res->next = list; return res; } } static struct model { UT_hash_handle hh; char *name; struct starpu_perfmodel model; struct _footprint_list* footprints; } *models; void get_comb_name(int comb, char* name, int name_size) { struct starpu_perfmodel_arch *arch_comb = starpu_perfmodel_arch_comb_fetch(comb); STARPU_ASSERT_MSG(arch_comb->ndevices == 1, "Cannot work with multi-device workers\n"); snprintf(name, name_size, "%s%d", starpu_perfmodel_get_archtype_name(arch_comb->devices[0].type), arch_comb->devices[0].devid); } void print_archs(FILE* output) { int nb_workers = 0; unsigned workerid, node, src, dst; int comb, old_comb = -1; fprintf(output, "%%rec: worker_count\n\n"); for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { struct starpu_perfmodel_arch* arch = starpu_worker_get_perf_archtype(workerid, STARPU_NMAX_SCHED_CTXS); comb = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); if (comb < 0) continue; // Ignore architecture which is not present in any perfmodel if(comb != old_comb) { if(nb_workers > 0) { char name[32]; get_comb_name(old_comb, name, 32); fprintf(output, "Architecture: %s\n", name); fprintf(output, "NbWorkers: %d\n\n", nb_workers); } old_comb = comb; nb_workers = 1; } else { nb_workers += 1; } } if(nb_workers > 0) { char name[32]; get_comb_name(old_comb, name, 32); fprintf(output, "Architecture: %s\n", name); fprintf(output, "NbWorkers: %d\n\n", nb_workers); } fprintf(output, "%%rec: memory_workers\n\n"); for (node = 0; node < starpu_memory_nodes_get_count(); node++) { unsigned printed = 0; char name[32]; fprintf(output, "MemoryNode: %u\n", node); starpu_memory_node_get_name(node, name, sizeof(name)); fprintf(output, "Name: %s\n", name); fprintf(output, "Size: %ld\n", (long) starpu_memory_get_total(node)); for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) { if (starpu_worker_get_memory_node(workerid) == node) { if (!printed) { fprintf(output, "Workers:"); printed = 1; } fprintf(output, " %u", workerid); } } if (printed) fprintf(output, "\n"); fprintf(output, "\n"); } fprintf(output, "%%rec: memory_performance\n\n"); for (src = 0; src < starpu_memory_nodes_get_count(); src++) { for (dst = 0; dst < starpu_memory_nodes_get_count(); dst++) { if (src != dst) { fprintf(output, "MemoryNodeSrc: %u\n", src); fprintf(output, "MemoryNodeDst: %u\n", dst); fprintf(output, "Bandwidth: %f\n", starpu_transfer_bandwidth(src, dst)); fprintf(output, "Latency: %f\n", starpu_transfer_latency(src, dst)); fprintf(output, "\n"); } } } } /* output file name */ static char* poutput = NULL; static char* pinput = NULL; static void usage() { fprintf(stderr, "Dumps perfmodels to a rec file\n\n"); fprintf(stderr, "Usage: %s [ input-file ] [ -o output-file ]\n", PROGNAME); fprintf(stderr, "\n"); fprintf(stderr, "If input or output file names are not given, stdin and stdout are used."); fprintf(stderr, "\n"); fprintf(stderr, "Report bugs to <"PACKAGE_BUGREPORT">."); fprintf(stderr, "\n"); } static void print_entry(const char *name, const char *archname, FILE *output, struct starpu_perfmodel_history_entry *entry) { fprintf(output, "Model: %s\n", name); fprintf(output, "Architecture: %s\n", archname); fprintf(output, "Footprint: %08x\n", entry->footprint); fprintf(output, "Size: %lu\n", (unsigned long) entry->size); if (!isnan(entry->flops)) fprintf(output, "Flops: %-15e\n", entry->flops); fprintf(output, "Mean: %-15e\nStddev: %-15e\n", entry->mean, entry->deviation); fprintf(output, "Samples: %u\n", entry->nsample); fprintf(output, "\n"); } static void parse_args(int argc, char **argv) { int c; static struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"output", required_argument, NULL, 'o'}, {0, 0, 0, 0} }; int option_index; while ((c = getopt_long(argc, argv, "ho:", long_options, &option_index)) != -1) { switch (c) { case 'h': /* display help */ usage(); exit(EXIT_SUCCESS); break; case 'o': poutput = optarg; break; case '?': default: fprintf(stderr, "Unrecognized option: -%c\n", optopt); } } if(optind < argc) { pinput = argv[optind++]; if(optind < argc) { fprintf(stderr, "Unrecognized argument: %s\n", argv[optind]); exit(EXIT_FAILURE); } } } int main(int argc, char **argv) { #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__MINGW32__) WSADATA wsadata; WSAStartup(MAKEWORD(1,0), &wsadata); _STARPU_MSG("Listing perfmodels is not implemented on pure Windows yet\n"); return 1; #else FILE* output; parse_args(argc, argv); if(poutput != NULL) { output = fopen(poutput, "w+"); if (!output) { fprintf(stderr, "couldn't open %s for write: %s\n", poutput, strerror(errno)); exit(EXIT_FAILURE); } } else { output = stdout; } if (starpu_init(NULL) != 0) { fprintf(stderr, "StarPU initialization failure\n"); exit(EXIT_FAILURE); } starpu_pause(); if(pinput) { FILE* input = fopen(pinput, "r"); char s[1024], *c; struct model *model, *tmp=NULL; uint32_t footprint = 0; char *model_name = NULL; int ret; if (!input) { fprintf(stderr, "couldn't open %s for read: %s\n", pinput, strerror(errno)); exit(EXIT_FAILURE); } while (fgets(s, sizeof(s), input)) { if (strlen(s) == sizeof(s) - 1) { fprintf(stderr, "oops, very long line '%s', it's odd\n", s); exit(EXIT_FAILURE); } if (s[0] == '\n') { /* empty line, end of task */ if (model_name) { /* Try to get already-loaded model */ HASH_FIND_STR(models, model_name, model); if (model == NULL) { _STARPU_MALLOC(model, sizeof(*model)); model->name = model_name; model->footprints = NULL; memset(&model->model, 0, sizeof(model->model)); model->model.type = STARPU_PERFMODEL_INVALID; ret = starpu_perfmodel_load_symbol(model_name, &model->model); if (ret == 1) { fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", model_name); exit(EXIT_FAILURE); } HASH_ADD_STR(models, name, model); } else { free(model_name); } model->footprints = add_footprint(model->footprints, footprint); model_name = NULL; } continue; } /* Get rec field name */ c = strchr(s, ':'); if (!c) { fprintf(stderr, "odd line '%s'\n", s); exit(EXIT_FAILURE); } if (!STRHEADCMP(s, "Footprint: ")) { footprint = strtoul(s + strlen("Footprint: "), NULL, 16); } else if (!STRHEADCMP(s, "Model: ")) { model_name = strdup(s + strlen("Model: ")); model_name[strlen(model_name) - 1] = '\0'; /* Drop '\n' */ } } /* All models loaded */ { print_archs(output); fprintf(output, "%%rec: timing\n\n"); int nb_combs = starpu_perfmodel_get_narch_combs(); HASH_ITER(hh, models, model, tmp) { struct _footprint_list* lf = model->footprints, *ltmp; int comb; while(lf) { for(comb = 0; comb < nb_combs; comb++) { char archname[32]; get_comb_name(comb, archname, 32); if(!model->model.state || model->model.state->nimpls[comb] == 0) { _STARPU_DISP("Symbol %s does not have any implementation on comb %d, not dumping\n", model->name, comb); continue; } if(model->model.state->nimpls[comb] > 1) _STARPU_DISP("Warning, more than one implementations in comb %d of symbol %s, using only the first one\n", comb, model->name); struct starpu_perfmodel_per_arch *arch_model = &model->model.state->per_arch[comb][0]; struct starpu_perfmodel_history_list *ptr; ptr = arch_model->list; if(!ptr) _STARPU_DISP("Implementation %d of symbol %s does not have history based model, not dumping\n", comb, model->name); else while(ptr) { struct starpu_perfmodel_history_entry *entry = ptr->entry; if(entry->footprint == lf->footprint) { print_entry(model->name, archname, output, entry); break; } ptr=ptr->next; } } ltmp = lf->next; free(lf); lf = ltmp; } starpu_perfmodel_unload_model(&model->model); free(model->name); HASH_DEL(models, model); free(model); } } fclose(input); } else { fprintf(output, "%%rec: timing\n\n"); char **paths; DIR *dp; struct dirent *ep; int i; paths = _starpu_get_perf_model_dirs_codelet(); for(i=0 ; paths[i] != NULL ; i++) { _STARPU_DISP("Processing directory %s\n", paths[i]); dp = opendir(paths[i]); if (dp != NULL) { while ((ep = readdir(dp))) { if (strcmp(ep->d_name, ".") && strcmp(ep->d_name, "..")) { int comb, nb_combs; char* symbol = strdup(ep->d_name); char *dot = strrchr(symbol, '.'); struct starpu_perfmodel model = {.type = STARPU_PERFMODEL_INVALID }; if(dot) *dot = '\0'; if (starpu_perfmodel_load_symbol(symbol, &model) != 0) { free(symbol); continue; } if(model.state == NULL) { free(symbol); continue; } _STARPU_DISP("Dumping %s\n", symbol); nb_combs = starpu_perfmodel_get_narch_combs(); for(comb = 0; comb < nb_combs; ++comb) { char name[32]; get_comb_name(comb, name, 32); if(!model.state || model.state->nimpls[comb] == 0) { _STARPU_DISP("Symbol %s does not have any implementation on comb %d, not dumping\n", symbol, comb); fprintf(output, "\n"); continue; } struct starpu_perfmodel_per_arch *arch_model = &model.state->per_arch[comb][0]; struct starpu_perfmodel_history_list *ptr; ptr = arch_model->list; if(!ptr) _STARPU_DISP("Symbol %s for comb %d does not have history based model, not dumping\n", symbol, comb); else { while(ptr) { print_entry(symbol, name, output, ptr->entry); ptr=ptr->next; } } } starpu_perfmodel_unload_model(&model); free(symbol); } } closedir(dp); } else { _STARPU_DISP("Could not open the perfmodel directory <%s>: %s\n", paths[i], strerror(errno)); } } print_archs(output); } starpu_resume(); starpu_shutdown(); fclose(output); return 0; #endif } starpu-1.4.9+dfsg/tools/starpu_replay.c000066400000000000000000000733751507764646700202250ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Erwan Leria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This reads a tasks.rec file and replays the recorded task graph. * Currently, this version is done to run with simgrid. * * For further information, contact erwan.leria@inria.fr */ #include #include #include #include #include #include #include #include #define REPLAY_NMAX_DEPENDENCIES 8 #define ARRAY_DUP(in, out, n) memcpy(out, in, n * sizeof(*out)) #define ARRAY_INIT(array, n) memset(array, 0, n * sizeof(*array)) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Declarations of global variables, structures, pointers, ... * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ static int static_workerid; /* TODO: move to core header while moving starpu_replay_sched to core */ extern void schedRecInit(const char * filename); extern void applySchedRec(struct starpu_task * starpu_task, long submit_order); /* Enum for normal and "wontuse" tasks */ enum task_type {NormalTask, WontUseTask}; typedef unsigned long jobid_t; enum task_type control; static char *name = NULL; static char *model = NULL; static jobid_t jobid; static jobid_t *dependson; static long submitorder = -1; static starpu_tag_t tag; static int workerid; static uint32_t footprint; static double flops, total_flops = 0.; static double startTime; //start time (The instant when the task starts) static double endTime; //end time (The instant when the task ends) static int iteration = -1; static starpu_data_handle_t handles[STARPU_NMAXBUFS]; static enum starpu_data_access_mode modes[STARPU_NMAXBUFS]; static char normal_reg_signal[STARPU_NMAXBUFS]; /* Use the following arrays when the number of data is greater than STARPU_NMAXBUFS */ starpu_data_handle_t * handles_ptr; enum starpu_data_access_mode * modes_ptr; size_t * sizes_set; static size_t dependson_size; static size_t ndependson; static unsigned nb_parameters = 0; /* Number of parameters */ static int alloc_mode; /* If alloc_mode value is 1, then the handles are stored in dyn_handles, else they are in handles */ static int priority = 0; char * reg_signal = NULL; /* The register signal (0 or 1 coded on 8 bit) is used to know which handle of the task has to be registered in StarPU (in fact to avoid handle twice)*/ /* Record all tasks, hashed by jobid. */ static struct task { struct starpu_rbtree_node node; UT_hash_handle hh; jobid_t jobid; int iteration; long submit_order; jobid_t *deps; size_t ndependson; struct starpu_task task; enum task_type type; int reg_signal; } *tasks; /* Record handles */ static struct handle { UT_hash_handle hh; starpu_data_handle_t mem_ptr; /* This value should be the registered handle */ starpu_data_handle_t handle; /* The key is the original value of the handle in the file */ } * handles_hash; /* Record models */ static struct perfmodel { UT_hash_handle hh; struct starpu_perfmodel perfmodel; char * model_name; } * model_hash; /* * Replay data interface * We don't care about many things anyway, essentially only sizes. */ struct replay_interface { enum starpu_data_interface_id id; starpu_data_handle_t orig_handle; size_t size; size_t alloc_size; size_t max_size; }; static struct starpu_data_interface_ops replay_interface_ops; static void register_replay(starpu_data_handle_t handle, int home_node, void *data_interface) { (void) home_node; struct replay_interface *replay_interface = data_interface; unsigned node; for (node = 0; node < STARPU_MAXNODES; node++) { struct replay_interface *local_interface = starpu_data_get_interface_on_node(handle, node); local_interface->id = replay_interface->id; local_interface->orig_handle = replay_interface->orig_handle; local_interface->size = replay_interface->size; local_interface->alloc_size = replay_interface->alloc_size; local_interface->max_size = replay_interface->max_size; } } static void replay_data_register(starpu_data_handle_t *handleptr, starpu_data_handle_t orig_handle, int home_node, size_t size, size_t alloc_size, size_t max_size) { struct replay_interface interface = { .id = replay_interface_ops.interfaceid, .orig_handle = orig_handle, .size = size, .alloc_size = alloc_size, .max_size = max_size, }; starpu_data_register(handleptr, home_node, &interface, &replay_interface_ops); } static size_t replay_get_size(starpu_data_handle_t handle) { struct replay_interface *interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return interface->size; } static size_t replay_get_alloc_size(starpu_data_handle_t handle) { struct replay_interface *interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return interface->alloc_size; } static size_t replay_get_max_size(starpu_data_handle_t handle) { struct replay_interface *interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); return interface->max_size; } static uint32_t replay_footprint(starpu_data_handle_t handle) { return starpu_hash_crc32c_be(replay_get_size(handle), 0); } static int replay_compare(void *data_interface_a, void *data_interface_b) { struct replay_interface *replay_a = data_interface_a; struct replay_interface *replay_b = data_interface_b; /* Two variables are considered compatible if they have the same size */ return replay_a->size == replay_b->size; } static void display_replay(starpu_data_handle_t handle, FILE *f) { struct replay_interface *replay_interface = starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM); fprintf(f, "%lu/%lu/%lu\t", (unsigned long) replay_interface->size, (unsigned long) replay_interface->alloc_size, (unsigned long) replay_interface->max_size); } static starpu_ssize_t describe_replay(void *data_interface, char *buf, size_t size) { struct replay_interface *replay_interface = data_interface; return snprintf(buf, size, "r%lu/%lu/%lu\t", (unsigned long) replay_interface->size, (unsigned long) replay_interface->alloc_size, (unsigned long) replay_interface->max_size); } static starpu_ssize_t allocate_replay_on_node(void *data_interface, unsigned dst_node) { struct replay_interface *replay_interface = data_interface; starpu_memory_allocate(dst_node, replay_interface->alloc_size, STARPU_MEMORY_OVERFLOW); return 0; } static void free_replay_on_node(void *data_interface, unsigned dst_node) { struct replay_interface *replay_interface = data_interface; starpu_memory_deallocate(dst_node, replay_interface->alloc_size); } static int replay_copy(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data) { (void) dst_interface; struct replay_interface *src = src_interface; /* We don't care about pointers */ return starpu_interface_copy(1, 0, src_node, 1, 0, dst_node, src->size, async_data); } static const struct starpu_data_copy_methods replay_copy_data_methods = { .any_to_any = replay_copy, }; static struct starpu_data_interface_ops replay_interface_ops = { .register_data_handle = register_replay, .allocate_data_on_node = allocate_replay_on_node, .free_data_on_node = free_replay_on_node, .copy_methods = &replay_copy_data_methods, .get_size = replay_get_size, .get_alloc_size = replay_get_alloc_size, .get_max_size = replay_get_max_size, .footprint = replay_footprint, .compare = replay_compare, .interfaceid = STARPU_UNKNOWN_INTERFACE_ID, .interface_size = sizeof(struct replay_interface), .display = display_replay, .pack_data = NULL, .peek_data = NULL, .unpack_data = NULL, .describe = describe_replay, /* We want to observe actual allocations/deallocations */ .dontcache = 1, }; /* [SUBMITORDER] The tree of the submit order */ static struct starpu_rbtree tree = STARPU_RBTREE_INITIALIZER; /* the cmp_fn arg for rb_tree_insert() */ unsigned int diff(struct starpu_rbtree_node * left_elm, struct starpu_rbtree_node * right_elm) { long oleft = ((struct task *) left_elm)->submit_order; long oright = ((struct task *) right_elm)->submit_order; if (oleft == -1 && oright == -1) { if (left_elm < right_elm) return -1; else return 1; } return oleft - oright; } /* Settings for the perfmodel */ struct task_arg { uint32_t footprint; unsigned narch; double perf[]; }; uint32_t get_footprint(struct starpu_task * task) { return ((struct task_arg*) (task->cl_arg))->footprint; } double arch_cost_function(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl) { int device = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); STARPU_ASSERT(device != -1); (void) nimpl; /* Then, get the pointer to the value of the expected time */ struct task_arg *arg = task->cl_arg; if (device < (int) arg->narch) { double val = arg->perf[device]; if (!(val == 0 || isnan(val))) return val; } fprintf(stderr, "[starpu] Error, expected_time is 0 or lower (replay.c line : %d)", __LINE__- 6); return 0.0; } /* End of settings */ static unsigned long nexecuted_tasks; void dumb_kernel(void *buffers[], void *args) { (void) buffers; (void) args; nexecuted_tasks++; if (!(nexecuted_tasks % 1000)) { fprintf(stderr, "\rExecuted task %lu...", nexecuted_tasks); fflush(stdout); } unsigned this_worker = starpu_worker_get_id_check(); struct starpu_perfmodel_arch *perf_arch = starpu_worker_get_perf_archtype(this_worker, STARPU_NMAX_SCHED_CTXS); struct starpu_task *task = starpu_task_get_current(); unsigned impl = starpu_task_get_implementation(task); double length = starpu_task_expected_length(task, perf_arch, impl); STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length), "Codelet %s does not have a perfmodel, or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated", starpu_task_get_name(task)); starpu_sleep(length / 1000000); } /* [CODELET] Initialization of an unique codelet for all the tasks*/ static int can_execute(unsigned worker_id, struct starpu_task *task, unsigned nimpl) { struct starpu_perfmodel_arch * arch = starpu_worker_get_perf_archtype(worker_id, STARPU_NMAX_SCHED_CTXS); int device = starpu_perfmodel_arch_comb_get(arch->ndevices, arch->devices); if (device == -1) /* Doesn't exist yet, thus unknown, assuming it can not work there. */ return 0; (void) nimpl; /* Then, get the pointer to the value of the expected time */ struct task_arg *arg = task->cl_arg; if (device < (int) arg->narch) { double val = arg->perf[device]; if (!(val == 0 || isnan(val))) return 1; } return 0; } static struct starpu_perfmodel myperfmodel = { .type = STARPU_PER_ARCH, .arch_cost_function = arch_cost_function, .footprint = get_footprint, }; static struct starpu_codelet cl = { .cpu_funcs = { dumb_kernel }, .cpu_funcs_name = { "dumb_kernel" }, .cuda_funcs = { dumb_kernel }, .opencl_funcs = { dumb_kernel }, .nbuffers = STARPU_VARIABLE_NBUFFERS, .can_execute = can_execute, .model = &myperfmodel, .flags = STARPU_CODELET_SIMGRID_EXECUTE, }; /* * * * * * * * * * * * * * * * * * * Functions * * * * * * * * * * * * * * * * * * * */ /* The following function checks if the program has to use static or dynamic arrays*/ static int set_alloc_mode(int total_parameters) { return total_parameters <= STARPU_NMAXBUFS; } /* According to the allocation mode, modify handles_ptr and modes_ptr in static or dynamic */ static void arrays_managing(int mode) { if (mode) { handles_ptr = &handles[0]; modes_ptr = &modes[0]; reg_signal = &normal_reg_signal[0]; } else { _STARPU_MALLOC(handles_ptr, sizeof(*handles_ptr) * nb_parameters); _STARPU_MALLOC(modes_ptr, sizeof(*modes_ptr) * nb_parameters); _STARPU_CALLOC(reg_signal, nb_parameters, sizeof(char)); } } static unsigned count_number_tokens(const char* buffer, const char* delim) { char* dup = strdup(buffer); int result = 0; char* token = strtok(dup, delim); while(token != NULL) { ++result; token = strtok(NULL, delim); } free(dup); return result; } /* Check if a handle hasn't been registered yet */ static void variable_data_register_check(size_t * array_of_size, int nb_handles) { int h, i; starpu_data_handle_t orig_handles[nb_handles]; ARRAY_DUP(handles_ptr, orig_handles, nb_handles); for (h = 0 ; h < nb_handles ; h++) { if(reg_signal[h]) /* Get the register signal, if it's 1 do ... */ { struct handle * handles_cell; for (i = 0; i < h; i++) { /* Maybe we just registered it in this very h loop */ if (handles_ptr[h] == orig_handles[i]) { handles_ptr[h] = handles_ptr[i]; break; } } if (i == h) { _STARPU_MALLOC(handles_cell, sizeof(*handles_cell)); STARPU_ASSERT(handles_cell != NULL); handles_cell->handle = handles_ptr[h]; /* Get the hidden key (initial handle from the file) to store it as a key*/ replay_data_register(handles_ptr+h, handles_ptr[h], modes_ptr[h] & STARPU_R ? STARPU_MAIN_RAM : -1, array_of_size[h], array_of_size[h], array_of_size[h]); handles_cell->mem_ptr = handles_ptr[h]; /* Store the new value of the handle into the hash table */ HASH_ADD(hh, handles_hash, handle, sizeof(handles_ptr[h]), handles_cell); } } } } void reset(void) { control = NormalTask; if (name != NULL) { free(name); name = NULL; } if (model != NULL) { free(model); model = NULL; } if (sizes_set != NULL) { free(sizes_set); sizes_set = NULL; } if (reg_signal != NULL) { if (!alloc_mode) { free(reg_signal); reg_signal = NULL; } else { ARRAY_INIT(reg_signal, nb_parameters); } } jobid = 0; ndependson = 0; tag = -1; workerid = -1; footprint = 0; startTime = 0.0; endTime = 0.0; if (submitorder != -1) submitorder = -1; iteration = -1; nb_parameters = 0; alloc_mode = 1; } void fix_wontuse_handle(struct task * wontuseTask) { STARPU_ASSERT(wontuseTask); if (!wontuseTask->reg_signal) /* Data was already registered when we created this task, so it's already a handle */ return; struct handle *handle_tmp; /* Data was not registered when we created this task, so this is the application pointer, look it up now */ HASH_FIND(hh, handles_hash, &wontuseTask->task.handles[0], sizeof(wontuseTask->task.handles[0]), handle_tmp); if (handle_tmp) wontuseTask->task.handles[0] = handle_tmp->mem_ptr; else /* This data wasn't actually used, don't care about it */ wontuseTask->task.handles[0] = NULL; } /* Function that submits all the tasks (used when the program reaches EOF) */ int submit_tasks(void) { /* Add dependencies */ const struct starpu_rbtree * tmptree = &tree; struct starpu_rbtree_node * currentNode = starpu_rbtree_first(tmptree); long last_submitorder = 0; while (currentNode != NULL) { struct task * currentTask = (struct task *) currentNode; if (currentTask->type == NormalTask) { if (currentTask->submit_order != -1) { STARPU_ASSERT(currentTask->submit_order >= last_submitorder + 1); while (currentTask->submit_order > last_submitorder + 1) { /* Oops, some tasks were not submitted by original application, fake some */ struct starpu_task *task = starpu_task_create(); int ret; task->cl = NULL; task->name = "fake task for submit order"; ret = starpu_task_submit(task); STARPU_ASSERT(ret == 0); last_submitorder++; } } if (currentTask->ndependson > 0) { struct starpu_task * taskdeps[currentTask->ndependson]; unsigned i, j = 0; for (i = 0; i < currentTask->ndependson; i++) { struct task * taskdep; /* Get the ith jobid of deps_jobid */ HASH_FIND(hh, tasks, ¤tTask->deps[i], sizeof(jobid), taskdep); if(taskdep) { taskdeps[j] = &taskdep->task; j ++; } } starpu_task_declare_deps_array(¤tTask->task, j, taskdeps); } if (!(currentTask->iteration == -1)) starpu_iteration_push(currentTask->iteration); applySchedRec(¤tTask->task, currentTask->submit_order); if (currentTask->submit_order == -1) currentTask->task.no_submitorder = 1; int ret_val = starpu_task_submit(¤tTask->task); if (!(currentTask->iteration == -1)) starpu_iteration_pop(); if (ret_val != 0) { fprintf(stderr, "\nWhile submitting task %ld (%s): return %d\n", currentTask->submit_order, currentTask->task.name? currentTask->task.name : "unknown", ret_val); return -1; } //fprintf(stderr, "submitting task %s (%lu, %llu)\n", currentTask->task.name?currentTask->task.name:"anonymous", currentTask->jobid, (unsigned long long) currentTask->task.tag_id); if (!(currentTask->submit_order % 1000)) { fprintf(stderr, "\rSubmitted task order %ld...", currentTask->submit_order); fflush(stdout); } if (currentTask->submit_order != -1) last_submitorder++; } else { fix_wontuse_handle(currentTask); /* Add the handle in the wontuse task */ if (currentTask->task.handles[0]) { starpu_data_wont_use(currentTask->task.handles[0]); last_submitorder++; } } currentNode = starpu_rbtree_next(currentNode); } fprintf(stderr, " done.\n"); return 1; } /* * * * * * * * * * * * * * * */ /* * * * * * MAIN * * * * * * */ /* * * * * * * * * * * * * * */ static void usage(const char *program) { fprintf(stderr,"Usage: %s [--static-workerid] tasks.rec [sched.rec]\n", program); exit(EXIT_FAILURE); } int main(int argc, char **argv) { FILE *rec; char *s; const char *tasks_rec = NULL; const char *sched_rec = NULL; unsigned i; size_t s_allocated = 128; unsigned long nread_tasks = 0; /* FIXME: we do not support data with sequential consistency disabled */ _STARPU_MALLOC(s, s_allocated); dependson_size = REPLAY_NMAX_DEPENDENCIES; /* Change the value of REPLAY_NMAX_DEPENCIES to modify the number of dependencies */ _STARPU_MALLOC(dependson, dependson_size * sizeof (* dependson)); alloc_mode = 1; for (i = 1; i < (unsigned) argc; i++) { if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) { usage(argv[0]); } else if (!strcmp(argv[i], "--static-workerid")) { static_workerid = 1; } else { if (!tasks_rec) tasks_rec = argv[i]; else if (!sched_rec) sched_rec = argv[i]; else usage(argv[0]); } } if (!tasks_rec) usage(argv[0]); if (sched_rec) schedRecInit(sched_rec); rec = fopen(tasks_rec, "r"); if (!rec) { fprintf(stderr,"unable to open file %s: %s\n", tasks_rec, strerror(errno)); exit(EXIT_FAILURE); } int ret = starpu_init(NULL); if (ret == -ENODEV) goto enodev; /* Read line by line, and on empty line submit the task with the accumulated information */ reset(); double start = starpu_timing_now(); int linenum = 0; while(1) { char *ln; if (!fgets(s, s_allocated, rec)) { fprintf(stderr, " done.\n"); int submitted = submit_tasks(); if (submitted == -1) { goto enodev; } goto eof; } while (!(ln = strchr(s, '\n'))) { /* fprintf(stderr,"buffer size %d too small, doubling it\n", s_allocated); */ _STARPU_REALLOC(s, s_allocated * 2); if (!fgets(s + s_allocated-1, s_allocated+1, rec)) { fprintf(stderr, "\n"); int submitted = submit_tasks(); if (submitted == -1) { goto enodev; } goto eof; } s_allocated *= 2; } linenum++; if (ln == s) { /* Empty line, do task */ struct task * task; _STARPU_MALLOC(task, sizeof(*task)); starpu_task_init(&task->task); task->deps = NULL; task->submit_order = submitorder; starpu_rbtree_node_init(&task->node); starpu_rbtree_insert(&tree, &task->node, diff); task->jobid = jobid; task->iteration = iteration; if (name != NULL) task->task.name = strdup(name); task->type = control; if (control == NormalTask) { if (workerid >= 0) { task->task.priority = priority; task->task.cl = &cl; if (static_workerid) { task->task.workerid = workerid; task->task.execute_on_a_specific_worker = 1; } if (alloc_mode) { /* Duplicating the handles stored (and registered in the current context) into the task */ ARRAY_DUP(modes_ptr, task->task.modes, nb_parameters); ARRAY_DUP(modes_ptr, task->task.cl->modes, nb_parameters); variable_data_register_check(sizes_set, nb_parameters); ARRAY_DUP(handles_ptr, task->task.handles, nb_parameters); } else { task->task.dyn_modes = modes_ptr; _STARPU_MALLOC(task->task.cl->dyn_modes, (sizeof(*task->task.cl->dyn_modes) * nb_parameters)); ARRAY_DUP(modes_ptr, task->task.cl->dyn_modes, nb_parameters); variable_data_register_check(sizes_set, nb_parameters); task->task.dyn_handles = handles_ptr; } task->task.nbuffers = nb_parameters; struct perfmodel * realmodel; HASH_FIND_STR(model_hash, model, realmodel); if (realmodel == NULL) { int len = strlen(model); _STARPU_CALLOC(realmodel, 1, sizeof(struct perfmodel)); _STARPU_MALLOC(realmodel->model_name, sizeof(char) * (len+1)); realmodel->model_name = strcpy(realmodel->model_name, model); starpu_perfmodel_init(&realmodel->perfmodel); int error = starpu_perfmodel_load_symbol(model, &realmodel->perfmodel); if (!error) { HASH_ADD_STR(model_hash, model_name, realmodel); } else { fprintf(stderr, "[starpu][Warning] Error loading perfmodel symbol %s\n", model); fprintf(stderr, "[starpu][Warning] Taking only measurements from the given execution, and forcing execution on worker %d\n", workerid); starpu_perfmodel_unload_model(&realmodel->perfmodel); free(realmodel->model_name); free(realmodel); realmodel = NULL; } } struct starpu_perfmodel_arch *arch = starpu_worker_get_perf_archtype(workerid, 0); unsigned comb = starpu_perfmodel_arch_comb_add(arch->ndevices, arch->devices); unsigned narch = starpu_perfmodel_get_narch_combs(); struct task_arg *arg; _STARPU_MALLOC(arg, sizeof(struct task_arg) + sizeof(double) * narch); arg->footprint = footprint; arg->narch = narch; double * perfTime = arg->perf; if (realmodel == NULL) { /* Erf, do without perfmodel, for execution there */ task->task.workerid = workerid; task->task.execute_on_a_specific_worker = 1; for (i = 0; i < narch ; i++) { if (i == comb) perfTime[i] = endTime - startTime; else perfTime[i] = NAN; } } else { int one = 0; for (i = 0; i < narch ; i++) { arch = starpu_perfmodel_arch_comb_fetch(i); perfTime[i] = starpu_perfmodel_history_based_expected_perf(&realmodel->perfmodel, arch, footprint); if (!(perfTime[i] == 0 || isnan(perfTime[i]))) one = 1; } if (!one) { fprintf(stderr, "We do not have any performance measurement for symbol '%s' for footprint %x, we can not execute this", model, footprint); exit(EXIT_FAILURE); } } task->task.cl_arg = arg; task->task.flops = flops; total_flops += flops; } task->task.cl_arg_size = 0; task->task.tag_id = tag; task->task.use_tag = 1; task->ndependson = ndependson; if (ndependson > 0) { _STARPU_MALLOC(task->deps, ndependson * sizeof (* task->deps)); ARRAY_DUP(dependson, task->deps, ndependson); } } else { STARPU_ASSERT(nb_parameters == 1); task->reg_signal = reg_signal[0]; ARRAY_DUP(handles_ptr, task->task.handles, nb_parameters); } /* Add this task to task hash */ HASH_ADD(hh, tasks, jobid, sizeof(jobid), task); nread_tasks++; if (!(nread_tasks % 1000)) { fprintf(stderr, "\rRead task %lu...", nread_tasks); fflush(stdout); } reset(); } /* Record various information */ #define TEST(field) (!strncmp(s, field": ", strlen(field) + 2)) else if(TEST("Control")) { char * c = s+9; if(!strncmp(c, "WontUse", 7)) { control = WontUseTask; nb_parameters = 1; alloc_mode = set_alloc_mode(nb_parameters); arrays_managing(alloc_mode); } else control = NormalTask; } else if (TEST("Name")) { *ln = 0; name = strdup(s+6); } else if (TEST("Model")) { *ln = 0; model = strdup(s+7); } else if (TEST("JobId")) jobid = atol(s+7); else if(TEST("SubmitOrder")) submitorder = atoi(s+13); else if (TEST("DependsOn")) { char *c = s + 11; for (ndependson = 0; *c != '\n'; ndependson++) { if (ndependson >= dependson_size) { dependson_size *= 2; _STARPU_REALLOC(dependson, dependson_size * sizeof(*dependson)); } dependson[ndependson] = strtol(c, &c, 10); } } else if (TEST("Tag")) { tag = strtol(s+5, NULL, 16); } else if (TEST("WorkerId")) { workerid = atoi(s+10); } else if (TEST("Footprint")) { footprint = strtoul(s+11, NULL, 16); } else if (TEST("Parameters")) { /* Nothing to do */ } else if (TEST("Handles")) { *ln = 0; char *buffer = s + 9; const char *delim = " "; unsigned nb_parameters_line = count_number_tokens(buffer, delim); if(nb_parameters == 0) { nb_parameters = nb_parameters_line; arrays_managing(set_alloc_mode(nb_parameters)); } else STARPU_ASSERT(nb_parameters == nb_parameters_line); char* token = strtok(buffer, delim); for (i = 0 ; i < nb_parameters ; i++) { STARPU_ASSERT(token); struct handle *handles_cell; /* A cell of the hash table for the handles */ starpu_data_handle_t handle_value = (starpu_data_handle_t) strtol(token, NULL, 16); /* Get the ith handle on the line (in the file) */ HASH_FIND(hh, handles_hash, &handle_value, sizeof(handle_value), handles_cell); /* Find if the handle_value was already registered as a key in the hash table */ /* If it wasn't, then add it to the hash table */ if (handles_cell == NULL) { /* Hide the initial handle from the file into the handles array to find it when necessary */ handles_ptr[i] = handle_value; reg_signal[i] = 1; } else { handles_ptr[i] = handles_cell->mem_ptr; reg_signal[i] = 0; } token = strtok(NULL, delim); } } else if (TEST("Modes")) { *ln = 0; char * buffer = s + 7; unsigned mode_i = 0; const char * delim = " "; unsigned nb_parameters_line = count_number_tokens(buffer, delim); if(nb_parameters == 0) { nb_parameters = nb_parameters_line; arrays_managing(set_alloc_mode(nb_parameters)); } else STARPU_ASSERT(nb_parameters == nb_parameters_line); char* token = strtok(buffer, delim); while (token != NULL && mode_i < nb_parameters) { /* Subject to the names of starpu modes enumerator are not modified */ if (!strncmp(token, "RW", 2)) { *(modes_ptr+mode_i) = STARPU_RW; mode_i++; } else if (!strncmp(token, "R", 1)) { *(modes_ptr+mode_i) = STARPU_R; mode_i++; } else if (!strncmp(token, "W", 1)) { *(modes_ptr+mode_i) = STARPU_W; mode_i++; } /* Other cases produce a warning*/ else { fprintf(stderr, "[Warning] A mode is different from R/W (jobid task : %lu)", jobid); } token = strtok(NULL, delim); } } else if (TEST("Sizes")) { *ln = 0; char * buffer = s + 7; const char * delim = " "; unsigned nb_parameters_line = count_number_tokens(buffer, delim); unsigned k = 0; if(nb_parameters == 0) { nb_parameters = nb_parameters_line; arrays_managing(set_alloc_mode(nb_parameters)); } else STARPU_ASSERT(nb_parameters == nb_parameters_line); _STARPU_MALLOC(sizes_set, nb_parameters * sizeof(size_t)); char * token = strtok(buffer, delim); while (token != NULL && k < nb_parameters) { sizes_set[k] = strtol(token, NULL, 10); token = strtok(NULL, delim); k++; } } else if (TEST("StartTime")) { startTime = strtod(s+11, NULL); } else if (TEST("EndTime")) { endTime = strtod(s+9, NULL); } else if (TEST("GFlop")) { flops = 1000000000 * strtod(s+7, NULL); } else if (TEST("Iteration")) { iteration = (unsigned) strtol(s+11, NULL, 10); } else if (TEST("Priority")) { priority = strtol(s + 10, NULL, 10); } } eof: starpu_task_wait_for_all(); fprintf(stderr, " done.\n"); printf("%g ms", (starpu_timing_now() - start) / 1000.); if (total_flops != 0.) printf("\t%g GF/s", (total_flops / (starpu_timing_now() - start)) / 1000.); printf("\n"); /* FREE allocated memory */ free(dependson); free(s); /* End of FREE */ struct handle *handle=NULL, *handletmp=NULL; HASH_ITER(hh, handles_hash, handle, handletmp) { starpu_data_unregister(handle->mem_ptr); HASH_DEL(handles_hash, handle); free(handle); } struct perfmodel *model_s=NULL, *modeltmp=NULL; HASH_ITER(hh, model_hash, model_s, modeltmp) { starpu_perfmodel_unload_model(&model_s->perfmodel); HASH_DEL(model_hash, model_s); free(model_s->model_name); free(model_s); } struct task *task=NULL, *tasktmp=NULL; HASH_ITER(hh, tasks, task, tasktmp) { free(task->task.cl_arg); free((char*)task->task.name); if (task->task.dyn_handles != NULL) { free(task->task.dyn_handles); free(task->task.dyn_modes); } HASH_DEL(tasks, task); starpu_task_clean(&task->task); free(task->deps); starpu_rbtree_remove(&tree, &task->node); free(task); } starpu_shutdown(); return 0; enodev: starpu_shutdown(); return 77; } starpu-1.4.9+dfsg/tools/starpu_replay_sched.c000066400000000000000000000245601507764646700213630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * Copyright (C) 2017-2017 Erwan Leria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This reads a sched.rec file and mangles submitted tasks according to the hint * from that file. */ #include #include #include #include #include #include #include #include // // sched.rec files look like this: // // SubmitOrder: 1234 // Priority: 12 // SpecificWorker: 1 // Workers: 0 1 2 // DependsOn: 1235 // // Prefetch: 1234 // DependsOn: 1233 // MemoryNode: 1 // Parameters: 1 #define CPY(src, dst, n) memcpy(dst, src, n * sizeof(*dst)) #if 0 #define debug(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) #else #define debug(fmt, ...) (void)0 #endif static unsigned long submitorder; /* Also use as prefetchtag */ static int priority; static int eosw; static unsigned workerorder; static int memnode; /* FIXME: MAXs */ static uint32_t workers[STARPU_NMAXWORKERS/32]; static unsigned nworkers; static unsigned dependson[STARPU_NMAXBUFS]; static unsigned ndependson; static unsigned params[STARPU_NMAXBUFS]; static unsigned nparams; static enum sched_type { NormalTask, PrefetchTask, } sched_type; static struct starpu_codelet cl_prefetch = { .where = STARPU_NOWHERE, .nbuffers = 1, .modes = { STARPU_R }, }; static struct task { UT_hash_handle hh; unsigned long submitorder; int priority; int memnode; unsigned dependson[STARPU_NMAXBUFS]; unsigned ndependson; struct starpu_task *depends_tasks[STARPU_NMAXBUFS]; /* For real tasks */ int eosw; unsigned workerorder; uint32_t workers[STARPU_NMAXWORKERS/32]; unsigned nworkers; /* For prefetch tasks */ unsigned params[STARPU_NMAXBUFS]; unsigned nparams; struct starpu_task *pref_task; /* Actual prefetch task */ } *mangled_tasks, *prefetch_tasks; LIST_TYPE(dep, struct task *task; unsigned i; ); struct deps { UT_hash_handle hh; unsigned long submitorder; struct dep_list list; } *dependencies = NULL; static void reset(void) { submitorder = 0; priority = INT_MIN; eosw = -1; memset(&workers, 0, sizeof(workers)); nworkers = 0; ndependson = 0; sched_type = NormalTask; nparams = 0; memnode = -1; workerorder = 0; } /* TODO : respecter l'ordre de soumission des tâches SubmitOrder */ static void checkField(char * s) { /* Record various information */ #define TEST(field) (!strncmp(s, field": ", strlen(field) + 2)) if (TEST("SubmitOrder")) { s = s + strlen("SubmitOrder: "); submitorder = strtol(s, NULL, 10); } else if (TEST("Priority")) { s = s + strlen("Priority: "); priority = strtol(s, NULL, 10); } else if (TEST("SpecificWorker")) { s = s + strlen("SpecificWorker: "); eosw = strtol(s, NULL, 10); } else if (TEST("Workers")) { s = s + strlen("Workers: "); char * delim = " "; char * token = strtok(s, delim); int i = 0; while (token != NULL) { int k = strtol(token, NULL, 10); STARPU_ASSERT_MSG(k < STARPU_NMAXWORKERS, "%d is bigger than maximum %d\n", k, STARPU_NMAXWORKERS); workers[k/(sizeof(*workers)*8)] |= (1 << (k%(sizeof(*workers)*8))); i++; token = strtok(NULL, delim); } nworkers = i; } else if (TEST("DependsOn")) { /* NOTE : dependsons (in the sched.rec) should be the submit orders of the dependencies, otherwise it can occur an undefined behaviour (contrary to the tasks.rec where dependencies are jobids */ unsigned i = 0; char * delim = " "; char * token = strtok(s+strlen("DependsOn: "), delim); while (token != NULL) { dependson[i] = strtol(token, NULL, 10); i++; token = strtok(NULL, delim); } ndependson = i; } else if (TEST("Prefetch")) { s = s + strlen("Prefetch: "); submitorder = strtol(s, NULL, 10); sched_type = PrefetchTask; } else if (TEST("Parameters")) { s = s + strlen("Parameters: "); char * delim = " "; char * token = strtok(s, delim); int i = 0; while (token != NULL) { params[i] = strtol(token, NULL, 10); i++; token = strtok(NULL, delim); } nparams = i; } else if (TEST("MemoryNode")) { s = s + strlen("MemoryNode: "); memnode = strtol(s, NULL, 10); } else if (TEST("Workerorder")) { s = s + strlen("Workerorder: "); workerorder = strtol(s, NULL, 10); } } void schedRecInit(const char * filename) { FILE * f = fopen(filename, "r"); if(f == NULL) { fprintf(stderr,"unable to open file %s: %s\n", filename, strerror(errno)); return; } size_t lnsize = 128; char *s; _STARPU_MALLOC(s, sizeof(*s) * lnsize); int eof = 0; reset(); while(!eof && !feof(f)) { char *ln; /* Get the line */ if (!fgets(s, lnsize, f)) { eof = 1; } while (!(ln = strchr(s, '\n'))) { _STARPU_REALLOC(s, lnsize * 2); if (!fgets(s + lnsize-1, lnsize+1, f)) { eof = 1; break; } lnsize *= 2; } if ((ln == s || eof) && submitorder) { /* Empty line, doit */ struct task * task; unsigned i; _STARPU_MALLOC(task, sizeof(*task)); task->submitorder = submitorder; task->priority = priority; task->memnode = memnode; CPY(dependson, task->dependson, ndependson); task->ndependson = ndependson; /* Also record submitorder of tasks that this one will need to depend on */ for (i = 0; i < ndependson; i++) { struct dep *dep; struct starpu_task *starpu_task; _STARPU_MALLOC(dep, sizeof(*dep)); dep->task = task; dep->i = i; struct deps *deps; HASH_FIND(hh, dependencies, &task->dependson[i], sizeof(submitorder), deps); if (!deps) { /* No task depends on this one yet, add a cell for it */ _STARPU_MALLOC(deps, sizeof(*deps)); dep_list_init(&deps->list); deps->submitorder = task->dependson[i]; HASH_ADD(hh, dependencies, submitorder, sizeof(submitorder), deps); } dep_list_push_back(&deps->list, dep); /* Create the intermediate task */ starpu_task = dep->task->depends_tasks[i] = starpu_task_create(); starpu_task->cl = NULL; starpu_task->destroy = 0; starpu_task->no_submitorder = 1; } switch (sched_type) { case NormalTask: /* A new task to mangle, record what needs to be done */ task->eosw = eosw; task->workerorder = workerorder; CPY(workers, task->workers, STARPU_NMAXWORKERS/32); task->nworkers = nworkers; STARPU_ASSERT(nparams == 0); debug("adding mangled task %lu\n", submitorder); HASH_ADD(hh, mangled_tasks, submitorder, sizeof(submitorder), task); break; case PrefetchTask: STARPU_ASSERT(memnode >= 0); STARPU_ASSERT(eosw == -1); STARPU_ASSERT(workerorder == 0); STARPU_ASSERT(nworkers == 0); CPY(params, task->params, nparams); task->nparams = nparams; /* TODO: more params */ STARPU_ASSERT_MSG(nparams == 1, "only supports one parameter at a time"); debug("adding prefetch task for %lu\n", submitorder); HASH_ADD(hh, prefetch_tasks, submitorder, sizeof(submitorder), task); break; default: STARPU_ASSERT(0); break; } reset(); } else checkField(s); } fclose(f); free(s); } static void do_prefetch(void *arg) { unsigned node = (uintptr_t) arg; starpu_data_idle_prefetch_on_node(starpu_task_get_current()->handles[0], node, 1); } void applySchedRec(struct starpu_task *starpu_task, unsigned long submit_order) { struct task *task; struct deps *deps; int ret; HASH_FIND(hh, dependencies, &submit_order, sizeof(submit_order), deps); if (deps) { struct dep *dep; for (dep = dep_list_begin(&deps->list); dep != dep_list_end(&deps->list); dep = dep_list_next(dep)) { debug("task %lu is %d-th dep for %lu\n", submit_order, dep->i, dep->task->submitorder); /* Some task will depend on this one, make the dependency */ starpu_task_declare_deps_array(dep->task->depends_tasks[dep->i], 1, &starpu_task); ret = starpu_task_submit(dep->task->depends_tasks[dep->i]); STARPU_ASSERT(ret == 0); } } HASH_FIND(hh, prefetch_tasks, &submit_order, sizeof(submit_order), task); if (task) { /* We want to submit a prefetch for this task */ debug("task %lu has a prefetch for parameter %d to node %d\n", submit_order, task->params[0], task->memnode); struct starpu_task *pref_task; pref_task = task->pref_task = starpu_task_create(); pref_task->cl = &cl_prefetch; pref_task->destroy = 1; pref_task->no_submitorder = 1; pref_task->callback_arg = (void*)(uintptr_t) task->memnode; pref_task->callback_func = do_prefetch; /* TODO: more params */ pref_task->handles[0] = starpu_task->handles[task->params[0]]; /* Make it depend on intermediate tasks */ if (task->ndependson) { debug("%u dependencies\n", task->ndependson); starpu_task_declare_deps_array(pref_task, task->ndependson, task->depends_tasks); } ret = starpu_task_submit(pref_task); STARPU_ASSERT(ret == 0); } HASH_FIND(hh, mangled_tasks, &submit_order, sizeof(submit_order), task); if (task == NULL) /* Nothing to do for this */ return; debug("mangling task %lu\n", submit_order); if (task->eosw >= 0) { debug("execute on a specific worker %d\n", task->eosw); starpu_task->workerid = task->eosw; starpu_task->execute_on_a_specific_worker = 1; } if (task->workerorder > 0) { debug("workerorder %d\n", task->workerorder); starpu_task->workerorder = task->workerorder; } if (task->priority != INT_MIN) { debug("priority %d\n", task->priority); starpu_task->priority = task->priority; } if (task->nworkers) { debug("%u workers %x\n", task->nworkers, task->workers[0]); starpu_task->workerids_len = sizeof(task->workers) / sizeof(task->workers[0]); _STARPU_MALLOC(starpu_task->workerids, task->nworkers * sizeof(*starpu_task->workerids)); CPY(task->workers, starpu_task->workerids, STARPU_NMAXWORKERS/32); } if (task->ndependson) { debug("%u dependencies\n", task->ndependson); starpu_task_declare_deps_array(starpu_task, task->ndependson, task->depends_tasks); } /* And now, let it go! */ } starpu-1.4.9+dfsg/tools/starpu_sched_display.c000066400000000000000000000017751507764646700215370ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include int main(void) { struct starpu_sched_policy **policies; struct starpu_sched_policy **policy; policies = starpu_sched_get_predefined_policies(); for(policy=policies ; *policy!=NULL ; policy++) printf("%s\n", (*policy)->policy_name); return EXIT_SUCCESS; } starpu-1.4.9+dfsg/tools/starpu_send_recv_data_use.py000077500000000000000000000113671507764646700227500ustar00rootroot00000000000000#!/usr/bin/env python3 # coding=utf-8 # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2019-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # """ Offline tool to draw graph showing elapsed time between sent or received data and their use by tasks """ import sys PROGNAME = sys.argv[0] def usage(): print("Offline tool to draw graph showing elapsed time between sent or received data and their use by tasks") print("") print("Usage: %s " % PROGNAME) if len(sys.argv) != 2: usage() sys.exit(1) import re import numpy as np import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec import os def convert_rec_file(filename): lines = [] item = dict() with open(filename, "r") as f: for l in f.readlines(): if l == "\n": lines.append(item) item = dict() else: ls = l.split(":") key = ls[0].lower() value = ls[1].strip() if key in item: print("Warning: duplicated key '" + key + "'") else: if re.match(r'^\d+$', value) != None: item[key] = int(value) elif re.match(r'^\d+\.\d+$', value) != None: item[key] = float(value) else: item[key] = value return lines working_directory = sys.argv[1] comms = convert_rec_file(os.path.join(working_directory, "comms.rec")) tasks = [t for t in convert_rec_file(os.path.join(working_directory, "tasks.rec")) if "control" not in t and "starttime" in t] if len(tasks) == 0: print("There is no task using data after communication.") sys.exit(0) def plot_graph(comm_time_key, match, filename, title, xlabel): workers = dict() durations = [] min_time = 0. max_time = 0. for c in comms: t_matched = None for t in tasks: if match(t, c): t_matched = t break if t_matched is not None: worker = str(t_matched['mpirank']) + "-" + str(t_matched['workerid']) if worker not in workers: workers[worker] = [] eps = t["starttime"] - c[comm_time_key] assert eps > 0 durations.append(eps) workers[worker].append((c[comm_time_key], eps)) if min_time == 0 or c[comm_time_key] < min_time: min_time = c[comm_time_key] if max_time == 0 or c[comm_time_key] > max_time: max_time = c[comm_time_key] fig = plt.figure(constrained_layout=True) gs = GridSpec(2, 2, figure=fig) axs = [fig.add_subplot(gs[0, :-1]), fig.add_subplot(gs[1, :-1]), fig.add_subplot(gs[0:, -1])] i = 0 for y, x in workers.items(): # print(y, x) axs[0].broken_barh(x, [i*10, 8], facecolors=(0.1, 0.2, 0.5, 0.2)) i += 1 i = 0 for y, x in workers.items(): for xx in x: axs[1].broken_barh([xx], [i, 1]) i += 1 axs[0].set_yticks([i*10+4 for i in range(len(workers))]) axs[0].set_yticklabels(list(workers)) axs[0].set(xlabel="Time (ms) - Duration: " + str(max_time - min_time) + "ms", ylabel="Worker [mpi]-[*pu]", title=title) if len(durations) != 0: axs[2].hist(durations, bins=np.logspace(np.log10(1), np.log10(max(durations)), 50), rwidth=0.8) axs[2].set_xscale("log") axs[2].set(xlabel=xlabel, ylabel="Number of occurrences", title="Histogramm") fig.set_size_inches(15, 9) plt.savefig(os.path.join(working_directory, filename), dpi=100) plt.show() plot_graph("recvtime", lambda t, c: (t["mpirank"] == c["dst"] and t["starttime"] >= c["recvtime"] and str(c["recvhandle"]) in t["handles"]), "recv_use.png", "Elapsed time between recv and use (ms)", "Time between data reception and its use by a task") plot_graph("sendtime", lambda t, c: (t["mpirank"] == c["src"] and t["starttime"] >= c["sendtime"] and str(c["sendhandle"]) in t["handles"]), "send_use.png", "Elapsed time between send and use (ms)", "Time between data sending and its use by a task") starpu-1.4.9+dfsg/tools/starpu_smpi.xslt000066400000000000000000000044661507764646700204440ustar00rootroot00000000000000 - - - - - starpu-1.4.9+dfsg/tools/starpu_smpirun000077500000000000000000000115321507764646700201730ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for running starpu-mpi application in simgrid mode prefix=/usr/local SMPIRUN=/usr/bin/mpiexec STARPU_DATADIR=${prefix}/share STARPU_XSLTDIR=$STARPU_DATADIR/starpu SOURCE_DATADIR=/home/benchmarks/builds/t3_Pvo1xe/0/starpu/starpu/tools BUILDDIR=/home/benchmarks/builds/t3_Pvo1xe/0/starpu/starpu/tools SMPI_VERSION=$($SMPIRUN -version | grep " version " | sed -e 's/.* \([0-9]*\.[0-9]*\).*/\1/') SMPI_MAJOR=${SMPI_VERSION%.*} SMPI_MINOR=${SMPI_VERSION#*.} if [ "$SMPI_MAJOR" -ge 4 -o \( "$SMPI_MAJOR" = 3 -a "$SMPI_MINOR" -ge 13 \) ] then DTD=http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd V=4 VF=.v4 DASH=- else DTD=http://simgrid.gforge.inria.fr/simgrid.dtd V=3 VF="" DASH=_ fi EXTRA_OPT="" if [ "$SMPI_MAJOR" -ge 4 -o \( "$SMPI_MAJOR" = 3 -a "$SMPI_MINOR" -ge 16 \) ] then EXTRA_OPT+=" --cfg=smpi/privatization:yes" else EXTRA_OPT+=" --cfg=smpi/privatize${DASH}global${DASH}variables:yes" fi if [ -n "$TEST_LOGS" ] then # Testsuite, use our loader WRAPPER="-wrapper $BUILDDIR/../tests/loader" fi # When executed from source, take xslt from source [ "$0" -ef $BUILDDIR/starpu_smpirun ] && STARPU_XSLTDIR=$SOURCE_DATADIR MPI_PLATFORM="" MPI_HOSTFILE="" NP="" GDB="" HOSTFILE_PLATFORM_DETECT="" while true; do case "$1" in "-help"|"-h"|"--help") echo "$0 [OPTIONS] program" echo echo "Available options are": echo " -platform FILE specify the simgrid cluster file to be used" echo " -hostfile FILE specify the list of machines to be used": echo " -np N specify the number of nodes to run": echo " -hostfile-platform use performance models of each host specified in the" echo " hostfile" echo " -gdb run through gdb" exit 0 ;; "-platform") MPI_PLATFORM=$2 if [ ! -r "$MPI_PLATFORM" ]; then echo "$MPI_PLATFORM can't be read" exit 1 fi shift 2 ;; "-hostfile") MPI_HOSTFILE=$2 if [ ! -r "$MPI_HOSTFILE" ]; then echo "$MPI_HOSTFILE can't be read" exit 1 fi shift 2 ;; "-np") NP=$2 shift 2 ;; "-hostfile-platform") HOSTFILE_PLATFORM_DETECT=1 shift 1 ;; "-gdb") GDB="-gdb" shift 1 ;; *) break ;; esac done if [ -z "$MPI_PLATFORM" ] || [ -z "$MPI_HOSTFILE" ]; then echo "$0 -platform PLATFORM -hostfile HOSTFILE [ -np N ] [ -gdb ] [ ... ] program [ args ]" exit 2 fi PLATFORM=$(mktemp /tmp/StarPU-MPI-platform-XXXXXXXX.xml) [ -n "$STARPU_HOME" ] || STARPU_HOME=$HOME [ -n "$STARPU_PERF_MODEL_DIR" ] || STARPU_PERF_MODEL_DIR=$STARPU_HOME/.starpu/sampling [ -n "$STARPU_HOSTNAME" ] || STARPU_HOSTNAME=$(hostname) NODE_PLATFORM=$STARPU_PERF_MODEL_DIR/bus/${STARPU_HOSTNAME}.platform$VF.xml [ -n "$NP" ] || NP=$(grep -v "^$" $MPI_HOSTFILE | wc -l) if ! type xsltproc > /dev/null 2> /dev/null then echo xsltproc is needed for starpu simgrid mpi. exit 1 fi if [ -n "$HOSTFILE_PLATFORM_DETECT" ] then HOSTS=$(grep -v "^$" $MPI_HOSTFILE) export STARPU_MPI_HOSTNAMES=$(echo $HOSTS | tr -d '\011\012\015') fi ( cat << EOF EOF tail -n +3 $MPI_PLATFORM | grep -v ' EOF ) > $PLATFORM STACKSIZE=$(ulimit -s) [ "$STACKSIZE" != unlimited ] || STACKSIZE=8192 $SMPIRUN $WRAPPER $GDB -platform $PLATFORM -hostfile $MPI_HOSTFILE -np $NP "$@" $EXTRA_OPT --cfg=smpi/simulate${DASH}computation:no --cfg=contexts/stack${DASH}size:$STACKSIZE RET=$? rm -f $PLATFORM exit $RET starpu-1.4.9+dfsg/tools/starpu_smpirun.in000066400000000000000000000114001507764646700205670ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2014-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # Copyright (C) 2020-2020 Federal University of Rio Grande do Sul (UFRGS) # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # # Script for running starpu-mpi application in simgrid mode prefix=@prefix@ SMPIRUN=@mpiexec_path@ STARPU_DATADIR=@datarootdir@ STARPU_XSLTDIR=$STARPU_DATADIR/starpu SOURCE_DATADIR=@abs_srcdir@ BUILDDIR=@abs_builddir@ SMPI_VERSION=$($SMPIRUN -version | grep " version " | sed -e 's/.* \([0-9]*\.[0-9]*\).*/\1/') SMPI_MAJOR=${SMPI_VERSION%.*} SMPI_MINOR=${SMPI_VERSION#*.} if [ "$SMPI_MAJOR" -ge 4 -o \( "$SMPI_MAJOR" = 3 -a "$SMPI_MINOR" -ge 13 \) ] then DTD=http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd V=4 VF=.v4 DASH=- else DTD=http://simgrid.gforge.inria.fr/simgrid.dtd V=3 VF="" DASH=_ fi EXTRA_OPT="" if [ "$SMPI_MAJOR" -ge 4 -o \( "$SMPI_MAJOR" = 3 -a "$SMPI_MINOR" -ge 16 \) ] then EXTRA_OPT+=" --cfg=smpi/privatization:yes" else EXTRA_OPT+=" --cfg=smpi/privatize${DASH}global${DASH}variables:yes" fi if [ -n "$TEST_LOGS" ] then # Testsuite, use our loader WRAPPER="-wrapper $BUILDDIR/../tests/loader" fi # When executed from source, take xslt from source [ "$0" -ef $BUILDDIR/starpu_smpirun ] && STARPU_XSLTDIR=$SOURCE_DATADIR MPI_PLATFORM="" MPI_HOSTFILE="" NP="" GDB="" HOSTFILE_PLATFORM_DETECT="" while true; do case "$1" in "-help"|"-h"|"--help") echo "$0 [OPTIONS] program" echo echo "Available options are": echo " -platform FILE specify the simgrid cluster file to be used" echo " -hostfile FILE specify the list of machines to be used": echo " -np N specify the number of nodes to run": echo " -hostfile-platform use performance models of each host specified in the" echo " hostfile" echo " -gdb run through gdb" exit 0 ;; "-platform") MPI_PLATFORM=$2 if [ ! -r "$MPI_PLATFORM" ]; then echo "$MPI_PLATFORM can't be read" exit 1 fi shift 2 ;; "-hostfile") MPI_HOSTFILE=$2 if [ ! -r "$MPI_HOSTFILE" ]; then echo "$MPI_HOSTFILE can't be read" exit 1 fi shift 2 ;; "-np") NP=$2 shift 2 ;; "-hostfile-platform") HOSTFILE_PLATFORM_DETECT=1 shift 1 ;; "-gdb") GDB="-gdb" shift 1 ;; *) break ;; esac done if [ -z "$MPI_PLATFORM" ] || [ -z "$MPI_HOSTFILE" ]; then echo "$0 -platform PLATFORM -hostfile HOSTFILE [ -np N ] [ -gdb ] [ ... ] program [ args ]" exit 2 fi PLATFORM=$(mktemp /tmp/StarPU-MPI-platform-XXXXXXXX.xml) [ -n "$STARPU_HOME" ] || STARPU_HOME=$HOME [ -n "$STARPU_PERF_MODEL_DIR" ] || STARPU_PERF_MODEL_DIR=$STARPU_HOME/.starpu/sampling [ -n "$STARPU_HOSTNAME" ] || STARPU_HOSTNAME=$(hostname) NODE_PLATFORM=$STARPU_PERF_MODEL_DIR/bus/${STARPU_HOSTNAME}.platform$VF.xml [ -n "$NP" ] || NP=$(grep -v "^$" $MPI_HOSTFILE | wc -l) if ! type xsltproc > /dev/null 2> /dev/null then echo xsltproc is needed for starpu simgrid mpi. exit 1 fi if [ -n "$HOSTFILE_PLATFORM_DETECT" ] then HOSTS=$(grep -v "^$" $MPI_HOSTFILE) export STARPU_MPI_HOSTNAMES=$(echo $HOSTS | tr -d '\011\012\015') fi ( cat << EOF EOF tail -n +3 $MPI_PLATFORM | grep -v ' EOF ) > $PLATFORM STACKSIZE=$(ulimit -s) [ "$STACKSIZE" != unlimited ] || STACKSIZE=8192 $SMPIRUN $WRAPPER $GDB -platform $PLATFORM -hostfile $MPI_HOSTFILE -np $NP "$@" $EXTRA_OPT --cfg=smpi/simulate${DASH}computation:no --cfg=contexts/stack${DASH}size:$STACKSIZE RET=$? rm -f $PLATFORM exit $RET starpu-1.4.9+dfsg/tools/starpu_tasks_rec_complete.1000066400000000000000000000010471507764646700225000ustar00rootroot00000000000000.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.3. .TH STARPU_TASKS_REC_COMPLETE "1" "October 2025" "StarPU 1.4.9" "User Commands" .SH NAME starpu_tasks_rec_complete \- Complete StarPU tasks.rec file .SH SYNOPSIS .B starpu_tasks_rec_complete [\fI\,input-file \/\fR[\fI\,output-file\/\fR]] .SH DESCRIPTION Complete a tasks.rec file with additional information, notably estimated termination times. .PP If input or output file names are not given, stdin and stdout are used. .SH "REPORTING BUGS" Report bugs to . starpu-1.4.9+dfsg/tools/starpu_tasks_rec_complete.c000066400000000000000000000114521507764646700225630ustar00rootroot00000000000000/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include #include #include #include #include #include #include #define PROGNAME "starpu_tasks_rec_complete" /* * This program takes a tasks.rec file, and emits a tasks.rec file with * additional information, notably estimated termination times. */ static struct model { UT_hash_handle hh; char *name; struct starpu_perfmodel model; } *models; int main(int argc, char *argv[]) { FILE *input; FILE *output; char s[1024], *c; uint32_t footprint = 0; int already_there = 0; char *model_name = NULL; struct model *model, *tmp=NULL; int ret; if (argc >= 2) { if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { fprintf(stderr, "Complete a tasks.rec file with additional information, notably estimated termination times.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Usage: %s [input-file [output-file]]\n", PROGNAME); fprintf(stderr, "\n"); fprintf(stderr, "If input or output file names are not given, stdin and stdout are used."); fprintf(stderr, "\n"); fprintf(stderr, "Report bugs to <%s>.\n", PACKAGE_BUGREPORT); exit(EXIT_SUCCESS); } else if (strncmp(argv[1], "--version", 9) == 0 || strncmp(argv[1], "-v", 2) == 0) { fputs(PROGNAME " (" PACKAGE_NAME ") " PACKAGE_VERSION "\n", stderr); exit(EXIT_FAILURE); } } #ifdef STARPU_HAVE_SETENV setenv("STARPU_FXT_TRACE", "0", 1); #endif if (starpu_init(NULL) != 0) { fprintf(stderr, "StarPU initialization failure\n"); exit(EXIT_FAILURE); } starpu_pause(); if (argc >= 2) { input = fopen(argv[1], "r"); if (!input) { fprintf(stderr, "couldn't open %s for read: %s\n", argv[1], strerror(errno)); exit(EXIT_FAILURE); } } else input = stdin; if (argc >= 3) { output = fopen(argv[2], "w+"); if (!output) { fprintf(stderr, "couldn't open %s for write: %s\n", argv[1], strerror(errno)); exit(EXIT_FAILURE); } } else output = stdout; while (fgets(s, sizeof(s), input)) { if (strlen(s) == sizeof(s) - 1) { fprintf(stderr, "oops, very long line '%s', it's odd\n", s); exit(EXIT_FAILURE); } if (s[0] == '\n') { /* empty line, end of task */ if (model_name) { if (already_there) { free(model_name); } else { /* Try to get already-loaded model */ HASH_FIND_STR(models, model_name, model); if (model == NULL) { _STARPU_MALLOC(model, sizeof(*model)); model->name = model_name; memset(&model->model, 0, sizeof(model->model)); model->model.type = STARPU_PERFMODEL_INVALID; ret = starpu_perfmodel_load_symbol(model_name, &model->model); if (ret == 1) { fprintf(stderr, "The performance model for the symbol <%s> could not be loaded\n", model_name); exit(EXIT_FAILURE); } HASH_ADD_STR(models, name, model); } else free(model_name); fprintf(output, "EstimatedTime: "); starpu_perfmodel_print_estimations(&model->model, footprint, output); fprintf(output, "\n"); } model_name = NULL; } already_there = 0; fprintf(output, "\n"); continue; } /* Get rec field name */ c = strchr(s, ':'); if (!c) { fprintf(stderr, "odd line '%s'\n", s); exit(EXIT_FAILURE); } #define STRHEADCMP(s, head) strncmp(s, head, strlen(head)) if (!STRHEADCMP(s, "Footprint: ")) { footprint = strtoul(s + strlen("Footprint: "), NULL, 16); } else if (!STRHEADCMP(s, "Model: ")) { model_name = strdup(s + strlen("Model: ")); model_name[strlen(model_name) - 1] = '\0'; /* Drop '\n' */ } else if (!STRHEADCMP(s, "EstimatedTime: ")) { already_there = 1; } fprintf(output, "%s", s); } if (fclose(input)) { fprintf(stderr, "couldn't close input: %s\n", strerror(errno)); exit(EXIT_FAILURE); } if (fclose(output)) { fprintf(stderr, "couldn't close output: %s\n", strerror(errno)); exit(EXIT_FAILURE); } starpu_resume(); HASH_ITER(hh, models, model, tmp) { HASH_DEL(models, model); starpu_perfmodel_unload_model(&model->model); free(model->name); free(model); } starpu_shutdown(); return 0; } starpu-1.4.9+dfsg/tools/starpu_tcpipexec000077500000000000000000000042601507764646700204620ustar00rootroot00000000000000#! /bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set -e # fail fast PROGNAME=$0 help_script() { cat << EOF Execute a StarPU TCP IP application $0 [option ....] application Options: -np To set the number of workers -nobind -ncpus To set the number of threads to use on the TCP/IP Slave devices (environment variable STARPU_NTCPIPMSTHREADS) -nolocal -ex To specify an external launcher for the application -v Output version information and exit -h Display the help and exit Examples: $0 -np 2 -nobind -ncpus 1 myapp $0 -np 2 -nobind -ncpus 1 -ex 'xterm -e gdb' myapp Report bugs to EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (StarPU) 1.4.9" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then help_script exit 0 fi NP="" EXECUTE="" while true; do case "$1" in "-np") NP=$2 shift 2 ;; "-nobind") export STARPU_WORKERS_NOBIND=1 shift ;; "-ncpus") export STARPU_NTCPIPMSTHREADS=$2 shift 2 ;; "-nolocal") export STARPU_TCPIP_USE_LOCAL_SOCKET=0 shift ;; "-ex") EXECUTE="$2" shift 2 ;; *) break ;; esac done trap 'kill -INT $CHILDPIDS' INT trap 'kill -QUIT $CHILDPIDS' QUIT export STARPU_TCPIP_MS_PORT=$((10000 + $$ % 20000)) #echo "STARPU_TCPIP_MS_SLAVES=$NP $@" STARPU_TCPIP_MS_SLAVES=$NP $EXECUTE "$@" & CHILDPIDS="$!" sleep 1 for i in $(seq 1 $NP): do STARPU_TCPIP_MS_SLAVES=$NP STARPU_TCPIP_MS_MASTER="127.0.0.1" $EXECUTE "$@" & CHILDPIDS="$CHILDPIDS $!" done wait %1 RET=$? wait exit $RET starpu-1.4.9+dfsg/tools/starpu_tcpipexec.in000077500000000000000000000043021507764646700210640ustar00rootroot00000000000000#! /bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2021-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # set -e # fail fast PROGNAME=$0 help_script() { cat << EOF Execute a StarPU TCP IP application $0 [option ....] application Options: -np To set the number of workers -nobind -ncpus To set the number of threads to use on the TCP/IP Slave devices (environment variable STARPU_NTCPIPMSTHREADS) -nolocal -ex To specify an external launcher for the application -v Output version information and exit -h Display the help and exit Examples: $0 -np 2 -nobind -ncpus 1 myapp $0 -np 2 -nobind -ncpus 1 -ex 'xterm -e gdb' myapp Report bugs to <@PACKAGE_BUGREPORT@> EOF } if [ "$1" = "--version" ] ; then echo "$PROGNAME (@PACKAGE_NAME@) @PACKAGE_VERSION@" exit 0 fi if [ "$1" = "-h" ] || [ "$1" = "--help" ] || [ "$1" = "" ] ; then help_script exit 0 fi NP="" EXECUTE="" while true; do case "$1" in "-np") NP=$2 shift 2 ;; "-nobind") export STARPU_WORKERS_NOBIND=1 shift ;; "-ncpus") export STARPU_NTCPIPMSTHREADS=$2 shift 2 ;; "-nolocal") export STARPU_TCPIP_USE_LOCAL_SOCKET=0 shift ;; "-ex") EXECUTE="$2" shift 2 ;; *) break ;; esac done trap 'kill -INT $CHILDPIDS' INT trap 'kill -QUIT $CHILDPIDS' QUIT export STARPU_TCPIP_MS_PORT=$((10000 + $$ % 20000)) #echo "STARPU_TCPIP_MS_SLAVES=$NP $@" STARPU_TCPIP_MS_SLAVES=$NP $EXECUTE "$@" & CHILDPIDS="$!" sleep 1 for i in $(seq 1 $NP): do STARPU_TCPIP_MS_SLAVES=$NP STARPU_TCPIP_MS_MASTER="127.0.0.1" $EXECUTE "$@" & CHILDPIDS="$CHILDPIDS $!" done wait %1 RET=$? wait exit $RET starpu-1.4.9+dfsg/tools/starpu_temanejo2.sh000077500000000000000000000022211507764646700207660ustar00rootroot00000000000000#!/bin/bash # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2009-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # d=${AYUDAME2_INSTALL_DIR?} cmd=${1?"usage: $0 [args*]"} shift if test ! -r ayudame.cfg; then echo "warning: no 'ayudame.cfg' file found in current working directory, an example is available in /share/starpu/ayudame.cfg" fi PATH=$d/bin:$PATH LD_LIBRARY_PATH=$d/lib:$LD_LIBRARY_PATH PYTHONPATH=$d/lib/python2.7/site-packages:$PYTHONPATH export PATH LD_LIBRARY_PATH PYTHONPATH exec $d/bin/Temanejo2 -p 8888 -d 8889 -P $d/lib/libayudame.so -L $d/lib -A $cmd "$@" starpu-1.4.9+dfsg/tools/starpu_trace_state_stats.py000077500000000000000000000327711507764646700226510ustar00rootroot00000000000000#!/usr/bin/env python3 # coding=utf-8 # # StarPU --- Runtime system for heterogeneous multicore architectures. # # Copyright (C) 2016-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or (at # your option) any later version. # # StarPU is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See the GNU Lesser General Public License in COPYING.LGPL for more details. # """ This script parses the generated trace.rec file and reports statistics about the number of different events/tasks and their durations. The report is similar to the starpu_paje_state_stats.in script, except that this one doesn't need R and pj_dump (from the pajeng repository), and it is also much faster. """ import getopt import os import sys class Event(object): def __init__(self, type, name, category, start_time): self._type = type self._name = name self._category = category self._start_time = start_time class EventStats(object): def __init__(self, name, duration_time, category, count = 1): self._name = name self._duration_time = duration_time self._category = category self._count = count def aggregate(self, duration_time): self._duration_time += duration_time self._count += 1 def show(self): if not self._name == None and not self._category == None: print("\"" + self._name + "\"," + str(self._count) + ",\"" + self._category + "\"," + str(round(self._duration_time, 6))) class Worker(object): def __init__(self, id): self._id = id self._events = [] self._stats = [] self._stack = [] self._current_state = None def get_event_stats(self, name): for stat in self._stats: if stat._name == name: return stat return None def add_event(self, type, name, category, start_time): self._events.append(Event(type, name, category, start_time)) def add_event_to_stats(self, curr_event): if curr_event._type == "PushState": self._stack.append(curr_event) # Will look later to find a PopState event. return elif curr_event._type == "PopState": if len(self._stack) == 0: print("warning: PopState without a PushState, probably a trace with start/stop profiling") self._current_state = None return next_event = curr_event curr_event = self._stack.pop() elif curr_event._type == "SetState": if self._current_state == None: # First SetState event found self._current_state = curr_event return saved_state = curr_event next_event = curr_event curr_event = self._current_state self._current_state = saved_state else: sys.exit("ERROR: Invalid event type!") # Compute duration with the next event. a = curr_event._start_time b = next_event._start_time # Add the event to the list of stats. for i in range(len(self._stats)): if self._stats[i]._name == curr_event._name: self._stats[i].aggregate(b - a) return self._stats.append(EventStats(curr_event._name, b - a, curr_event._category)) def calc_stats(self, start_profiling_times, stop_profiling_times): num_events = len(self._events) use_start_stop = len(start_profiling_times) != 0 for i in range(0, num_events): event = self._events[i] if i > 0 and self._events[i-1]._name == "Deinitializing": # Drop all events after the Deinitializing event is found # because they do not make sense. break if not use_start_stop: self.add_event_to_stats(event) continue # Check if the event is in between start/stop profiling events for t in range(len(start_profiling_times)): if (event._start_time > start_profiling_times[t] and event._start_time < stop_profiling_times[t]): self.add_event_to_stats(event) break if not use_start_stop: return # Special case for SetState events which need a next one for computing # the duration. curr_event = self._events[-1] if curr_event._type == "SetState": for i in range(len(start_profiling_times)): if (curr_event._start_time > start_profiling_times[i] and curr_event._start_time < stop_profiling_times[i]): curr_event = Event(curr_event._type, curr_event._name, curr_event._category, stop_profiling_times[i]) self.add_event_to_stats(curr_event) def read_blocks(input_file): empty_lines = 0 first_line = 1 blocks = [] for line in open(input_file): if first_line: blocks.append([]) blocks[-1].append(line) first_line = 0 # Check for empty lines if not line or line[0] == '\n': # If 1st one: new block if empty_lines == 0: blocks.append([]) empty_lines += 1 else: # Non empty line: add line in current(last) block empty_lines = 0 blocks[-1].append(line) return blocks def read_field(field, index): return field[index+1:-1] def insert_worker_event(workers, prog_events, block): worker_id = -1 name = None start_time = 0.0 category = None for line in block: key = line[:2] value = read_field(line, 2) if key == "E:": # EventType event_type = value elif key == "C:": # Category category = value elif key == "W:": # WorkerId worker_id = int(value) elif key == "N:": # Name name = value elif key == "S:": # StartTime start_time = float(value) # Program events don't belong to workers, they are globals. if category == "Program": prog_events.append(Event(event_type, name, category, start_time)) return for worker in workers: if worker._id == worker_id: worker.add_event(event_type, name, category, start_time) return worker = Worker(worker_id) worker.add_event(event_type, name, category, start_time) workers.append(worker) def calc_times(stats): tr = 0.0 # Runtime tt = 0.0 # Task ti = 0.0 # Idle ts = 0.0 # Scheduling for stat in stats: if stat._category == None: continue if stat._category == "Runtime": if stat._name == "Scheduling": # Scheduling time is part of runtime but we want to have # it separately. ts += stat._duration_time else: tr += stat._duration_time elif stat._category == "Task": tt += stat._duration_time elif stat._category == "Other": ti += stat._duration_time else: print("WARNING: Unknown category '" + stat._category + "'!") return ti, tr, tt, ts def save_times(ti, tr, tt, ts): f = open("times.csv", "w+") f.write("\"Time\",\"Duration\"\n") f.write("\"Runtime\"," + str(tr) + "\n") f.write("\"Task\"," + str(tt) + "\n") f.write("\"Idle\"," + str(ti) + "\n") f.write("\"Scheduling\"," + str(ts) + "\n") f.close() def calc_et(tt_1, tt_p): """ Compute the task efficiency (et). This measures the exploitation of data locality. """ return tt_1 / tt_p def calc_es(tt_p, ts_p): """ Compute the scheduling efficiency (es). This measures time spent in the runtime scheduler. """ return tt_p / (tt_p + ts_p) def calc_er(tt_p, tr_p, ts_p): """ Compute the runtime efficiency (er). This measures how the runtime overhead affects performance.""" return (tt_p + ts_p) / (tt_p + tr_p + ts_p) def calc_ep(tt_p, tr_p, ti_p, ts_p): """ Compute the pipeline efficiency (et). This measures how much concurrency is available and how well it's exploited. """ return (tt_p + tr_p + ts_p) / (tt_p + tr_p + ti_p + ts_p) def calc_e(et, er, ep, es): """ Compute the parallel efficiency. """ return et * er * ep * es def save_efficiencies(e, ep, er, et, es): f = open("efficiencies.csv", "w+") f.write("\"Efficiency\",\"Value\"\n") f.write("\"Parallel\"," + str(e) + "\n") f.write("\"Task\"," + str(et) + "\n") f.write("\"Runtime\"," + str(er) + "\n") f.write("\"Scheduling\"," + str(es) + "\n") f.write("\"Pipeline\"," + str(ep) + "\n") f.close() def usage(): print("USAGE:") print("starpu_trace_state_stats.py [ -te -s=

  • QFL hݺu16l@ZZB!BDh?p@ DF7# 8tZ{<-G&ӿ,%M5*.'Whǰ[s^b܃KSqҫBs,4[?쳿?\Ҽ6AX8v(͘EU}`0( f4KA! hQ&աOWzu0a_~%:EQ8zh7&UfM̙֭37oٳgӷo_),w"Ѱ~}ǝ?~JŇ3'׫˽ 澎W(J;d2~&-[aokKjZ*w| z,,,Я/EQ||JV-W.NN<ۺ~A qNXkظXeZ9[\7pjʻmޖq`- [[P.9vwB;wºzi0^bEnݺٻw/jw]ve"B!BQӠ (`+<;ʸc +O `b4l]"p`W䘼sRN>+kdLʊի)82WKX+W.kwaǎ_ x[AR4:bvlkSlA(lo(FR~zFݻVYv-dƍ$%%Q~}-^bS!ēJʔwueԮUHdu(Ws4ss+(,<0zQJC޿kYT=QqVy?__!!ԫ[>z2iT:wHu}ܧQܸ{. fHhX- wMs,NNq<,疍-j֨aΎ[!i*Ƕ}*4ף͙ux>-b+Ai\4Ys<>b`)5%wIo1|dU':: *а~5ji>s>}/S۴rQANL?D3zmlA%_.6g>_'~=9sFy |ڃ6]Qә4eFQ8n_\ ZjYa޽ܻw۷oyɓ'}Æ 6:|Rn. !(ZofR<}{2W 7XYL(siܨa)JJ(amm7ؽg/-۸8;sƍ"'..vW;vF5ڴjS?IgP.geA|[;&F=(gow(&4n؏JMVٻr[?&&6T.vvj""#9o5kVz<\8<<///n߾M:u>i!B!BE\/{+rFQjHNS0/D0u*:.6S+NxkK %44#΢(P86֞1gT4*ϫkedg^,hZs\iӦ'3s7nҥKlb:u?)'hE`0pEy晲ٝMxmڴi]2ܸ\zB!DYo"~ g'R=^ۻIAa=FE//nܼ5kxƴ|zTT๔{¯\.ޓ{իsIF#mիW+<B!B!J9b+)y//_J ;3&TT>m>9/jE>F7TP K[ҫnW=vn~ }ɛQōv^uqL=&t_\ ޼yTc, !تUʅv:.,<k6[Vf(맟~Ν;ԯ_ǏszZF<88SF2B!B!DG̯Uy\T/:ؒ fۦ {.lHgƠ5Qiqx6,}>Cԡk-..|Y}FO_48DyM/ +++kN8oǓB!JJf-̈́qoxx\]]ٰa7nr=zGG,mz-(,B!B+o}͞:#GSū&C/%9)mO Vw[usޗ> ku[)/`Ͼ#q1 7'ŷӆb$[bF1cKZ1/WvmB5B!D|}}YjUYOC!B!P~۲Ji,aѹVJ*\nSmMKdg}3a̜9 i&XIX!B!B!BZ+ }>EQJuCi IDAT8? A/qn޸Al࢒BbSTT%ⱑB!B!c|p.$)ӨfԬQD*B!B* Z>RRB!B!_B2BKFFԩS F#&Dr>U>u;/<+j >6˗/"""h4 ؾ};zxgqvv.R )vx$܎ A_s3ݪf)̮_zg:^/YB!B!BKHX!DjWNjj*iii("\%ޣFZYY5TV`Noo{WWWRSS/HMMaÆlݺ&Mзo" )Vgu| .B1 Kg% (G}TvZ B!B!_ DGGRbŊԬY7=G?~~~4S!gDRt:L&saeeumVٶGoio,QE9X9h5q9 _3Z\{{\q/-@a_Rֵ1/nz@o߾=cϞ=ܽ{OOOOr>[fʔ) 0!CP\9/^\"~z>Gf9=z4M6eĈ_Z-eB!B!x+W_~Ia2l, =V@@!!!T^._Zfڴi1ѥKB_!2$7VN˲"}DDqqq4jNG\\&K.qpwwgcQyM;.TFH8z -;HCl\n ^SoYҁ |pZsTsVgԨQ8;;fwNٿ?/_رc%v gȑ@#6mBQF~]}%+B!B!Ŀc oٲA߽NW桳q !gOo"cРA̜9Ć ښgi @s+550<<p%WHh$H ҍHDb)WQ RU,\Q(QAP"WH !IH!mG`aM |?ϳ̙y]ynOwpJ*J ?^Aε .v 3[ŋ8ZloMvv2-Ow-͛TvB!B!B[n+WZ=b$*UrmBs3sȌW( """;v,s1_|S~}ZhA&M8sٵw&88͛Mu~~~ݻt~kԨA ᭷Bt:VZE`` ]t1ЪU+ލ5ҥKaB0kʵ.EјՏ9:uDzulKuЁ !B!B!J53OQQ9:ϋCKP jQٙ/Qy)zt҅ŋh矧e˖ܹJٳy9p隍7rA|||Xd #G$66GGGt:2FN̺>>  vYX111kĉBwFu֑C߾}'//V ,¡C0zN8A5*8pm|iȲRt~ҥK9t_~%~-ƍuЁ#F۸!B!B!DŪYgNom\\<Ao_n)0gű$$$`ggP*9ҬM֭IOO7-`S*TVRǺ~:7ӞB!DyjDGG0{lSy-gϞX9s`0sK_>iў3?&~.vۣͨ=w1^uv؁mڴ!22{}aoooj{yBBBe˖\z4T*W~lݺլ]YB!B!B<*,ADDD: vc[VTBKqYz5O<*z=[l)q_ބӱh$11єupp 77Tl: $55 pU !%5۾(Lo>iԨU`% mCZ_+<Gel֡`͚5V%PJEޝ ߻'\ϭh5hF_keffrUquu-TK\\իWɬ.??8ptt,q? 6sr]!D753+x4A!YՏCyU&JB!B!F[zp&ZuC.;cWXWQ{agPWԩ/^VZf| ժUSNܹ#G꽽Yl //gJ⧟~aÆ]ִ[Nnݺh4Μ9Rd3ƢfΜŋrss/g^c'0Y鍁K'Ѵݳ0y&!Dby3MhD޴ RBDPJEz=m#0Zd6իWdmK,ӧYp!_u,B!B!B}Xwngb8xTޙmi1eKq<%} r3 /qp7WoT6E4hC8u~~~lٲK!B!B!Ty<g*oRӝť 1 8kg1yB999,YO?Ԕpwwg̙DDD0| ݻ9s%ӺukիǪUx7M+V`Ȑ!y]$]E!E֯[:Fa}ۨ3QVhΝ؉^N{T՘]|ʍe1~A@f>? ;Fʵĝ=Xl`c4/=ij>ѕCWwR^&r M;P]VV^^^nnSIl6lqqqݻ+WҸqc4iR-ٱ5h {ǂ2k?֞/]Ib!DSg !lƏO*U'::0\\\;vNc}rh0r3 g?wJYAGbJ+#>{wG\AՓ;6[TX3~QF#2n`0F0zN[:-7qstٵq^ G;{r5z=ܫ̜lt=o4IqO±vg'K?~ϗXs=#/_|%<cM ]g秤oi|EٶmQlܸwy7x:uXOff&:OOOHGGG:^F)ׯ䄓SmF#W\˫B!BCwWa9f8|mΜ9Ջ &&>>ooox6IHH777gϞ,_vڱrJ&N,z{x'-рR**!DEB ͞ ۉ'pttf͚(waq6 _ͫi8x81{xχ O~BAݞ͈u]bi7ӣ)YI$FRA6- H,ZItzo9&Z=*g~_KWyT(קpst&&>fL /_CFNcz0c8Fw%|yz3-:d[8IRw1l8ؤ FKHxm( k'_iֽV1cw&,R{=MwǓVm (td/㗼%Q*$Hfr9(hT.߽6w;Kn}kF-ɅY>3¢se:)dҧ,6jOiVԯ?!3'Wzb/ k ke}ddQӇ=sגSR}ޛBtig>ᙰ.|<,w-$ڵݻSn]tڕD ڴiǏŋ(J>|8'N@PШQ#;'xxxp x NJLL O=yyydddPpg`;wg}ӧ˖-3s[nabcc1 DGGbj3fZjeB!&woԨtjGz֝=o/СCٳǪ~i=z4})vţ.=DXuXb;K9:{|\B,B<4 k֬K.E.5B\\=\DW49n;Npt9дG-_Zܝto=Fݧگe}5ɐM:~]D:.U͗Gfr&ϝYPMH#ŝ Bk$mCmyӦνbzS^ ؃5 r5tp'f/ 93Gd$&c4Ѭt76|^4Q$m%_Y7;0tdYJ?jOMn܀k=i)[_qMӦMQF9l߫f姟~b׮'ٺu+'!!`=̞oxiذaAvvtY` :M6%{9ڶm @```6oތE!B/?7/05jl0t*.\… ʶmF׮] mҤ }3+1b^x(H k4f͚Uhis2w\ K.=zڵk[Jbذa̟?QF>'_-ܬsliۻuZ=rWqoJrFNAe !UQ8Aɱ+R˱Tb>njye^t:VZeE($((l)ԇQG=g=dzöGmt>i&tTݝ %uyGZv&ٸ7Q[,Ըfa^W]I|77߰cH]9l9@*4]4n|5 \`vMםWRѸfpƾqHw/SYwOS -ܫCU۠[t%網_秤oM|( 8 Ϭ!C0w\n޼ɖ-[ 4-_0Iyʕ+4nxv]TRBmK۳ҤB!TC:[n;v찺>**BFbĈb4UVYĥgmy1o<ڶ<[?OO\fvYfn1Wِ!hl!ăcS'ϖD_֘8>~%_r,E's&I+xb''b5kHl뉎W^8Ws}&^SW8h=>ijs{FK4֒ B.XW'*dRUOS ( Rn|x۾ٹ:;7ѭiS] .wjZ OұKG2CwS']5fw̝7C7VS/8kWkqƖN oM}k O5 =Oo|~J;ۆ ҥKټy36ҥKy}333ٽ{ bժU:Uo^fWZxԩ_|ozСCFauB!B!w3}kh4 /Ɠ/m/ܖWx76Ykr< -wStc<\y.BTZhfϞm*oѢCѸqclllxƅk_4C;'=m֦aD{t}g1{6nmZoLZ/-LI,Qta_Y)tn؂^-̗mQ1z/2qvugڀѴ:>еHq17M5'}ߏم{ЪnjUfVb ~#ŝĴd&lgg(LAUOR2ә{0zJ9ۋ,4l~iøBZV&*;7Q?6[]lq%_|~J3ȫJVUY݊+>|8~~~x{{ɓMK.ٓ7|6mPjURRRKsuW}t:qqqxyyƥKpuu-ӵΝ^z\B!&/k.VPNXB{sStŵp:C1:Uv}8;yyKOgYzEzTVZvٖvwT3o6 #Uzػ9bxwRzUmowH2Ne`-G;{BոK䂻Kj:e J?’Oi~g www݋ޣ 轗}j(DFFJW!B!,VUTڲˌB^^68ӭԾWerW7lU mi g?WI O5\oT˒㵼牧[ L9U\ULAj6pS2_^!"toֶY?,+.ZMZְ*GwݻB!B!#,&ܝBr9*?~3ry29^n :zϸU0 x0ryr*0'4|-[zSn!P{~$ea*ZG%z& !xH_(;2_;)LwExߟB!B!xmUFv1MhLS&~`D/}GI5#w\jQ4 cl|\T~:Umqw,j|&C(껿5Mmhoe}r dB(P>BUb_E\=O!B!B<\% Elfz|oir%Q)P*]0k2!;)i\9Fniw5CJ~UTjegX!rodlB!B!BB!B!Lce^ZW7(zNУ?OIˎu[K@=yj-˶Wբ Fx4ױW4;eڄն]{xooHS~\֍BJ!1~xƏϳ>+͛76ٳnݺO3p@j֬+0oK16 ?fi̱֯m6  ظq#>, VO\\iiij֬IjլK!B!0`|ʔ)ر[YZW`090QuwUxk`:~|~<>MGwoSpS"%KOueרB3Z̿MH1fe2nJ$eX!ëRgggMejlIhJZmOO?ʝwtQf ^@egOjz 'WFwHU|Y֒߅/DPPg3"~zݢ###ٵkݻwnݺxyyM׮]ILL޾AM61~x.^Rd|TRTzɒ%Kppp>3VZ1vXB!B!*++ ]lxuK֭y͛MwyZ|tm/ c/F0&-1r)UgƵLUf|C'_!xUwv4hT֧Oٸq#7oޤCi8= C3x :SHQNmXFT#BE#h>GFFn:FŤI5j6l(Y3|}}_M[ RYϱnݺٳgIHH௿/>DEEo>B!B!*3w{{ppTA-_/S\x rF^/^G^^/Y'|PV%..lˮt:._NWys9Bl|ޠd (s0*BhXf ]t1 &''-KWhH'rmܼZx e;Z08v2SKǨt3BjiӦihԨ#G4qYprss駟صkM6;v'OrM\\\>l޼B!B!*+iBR4=L R)M/l[DNyZ#-kaVpFr|/kb˱+R栔c9ؕ|=x:)9?LU8)9O?\e0Xf ߬.##"z]裏8r-[dk&M4hɦKnELL%Rd;c[J{n7/=rrrj֟BrY233$44JeҥK\r4h`7B!B!B!*/֭8 ݅(U6Mⷨ02L`0x7:wUjZZj|I}7+_dIu:TZl,B2o>>  vi[ &&X^{58qb׽Kx{{ΩS̒B!Deu5iР0͛7ȨB!B!B2mbkހS^~3h3qٞCEPd3 tT*V}qwwT_uP4G^B!*5FÚ5kҥiV35kdtڕDpwwhB!B!B 3DDDڮY3IĻuoo:v[Lv`lm&Bo cHbbb[rssMɦ@RSSͭ`aYR!ģHӱj*ҥYݐ!Cزe }qҥ T!B!B!Dyj hK֢u{oRIA^7D}.]7n|r ;v,meѢE`ZlIdd$YYYXB``͚58::ҿBuktBIsu>oK˦Պk׮4i҄L²epvv.̙3f͚fVs%ݻ7CŢ'BTvFu֑C߾}'//iii N:ELL -ZB!B!Jϟg)Scǎ27~x_ !,Zښ/@U8xg(Q;a z]bdå33%yɳvʂ z899>lֶUV$$$G@@̘1T_F mf: .ӧOשShB!,VKtt4g6hтp;Zh4ҿ*$^!B!B_YgË]^^Znkft6ol:1{Z۷o[ne^!,N[##vF-WSQ^kmo ;i²/WEmmmm͒w%((ӧOpBkB!*;[[[̙SbN:ѼysrssDغ IDATB!B!= ⦁^UJ*^/s\~OOOL^_Nʕ+E>Ldɒ"HIIHu:TZUBXŪ=-q ΀=Re&+6_zuhUzgϞ[ 4 55LlB޽˭!a₯!B!BJK*He(o%w%U*V-1yd۷/ueĉL0&L@@@#F VU?8_ެ0[yLL M4aÆT^=z\bL$$$6mބL'^̦t6~2Stի˵O!B!B!B/[ BR(H*۳U*%jNbŊhʕ+,^{2uT hdԨQ >YfE.]t  tܙEhի'Oo)6͛7s>}:4%]i?}?B!B!B!?g*P)U:JZDRagSx 8::ӧ^^^Ӝ:u̔)SxlY?Ύ; ދӦM[n|fK/ @xx8sh4P(l/wB!B!BQi,|غ_JvũU+V࣏>bܸqlْӮ]"'&&鉣z35jhHII׷|||Le/!E'B!B!B!*w6`0${mJT[K?U6eKy 4Ay')) ]dtQVJVV鸻iB.BJ/99G{nN8N+&%%{r4MD)*ܹsٳgOE!>50rWOO+:BvիW[`G2ei9{+B>}xRSSիzbʕϝ;ǢEX|97o,T?~xzĉDB!${Jv`IJmfseر#F1 %$$ âE?$$3k,3g7%WZ_|auBQI !(?Nrr2۷o?&//T%>c8tϯ+7Ҽy_,]VZY}?0~g6Uy2ܟm~N=,IVÔCxyUǏm6fmڵkĉ1 V{g֖Ƨ&Wmzqi ٹWqwr 5TN3w$zuTΔ맺7U_fffl۶3sLZl䩸0`Z_ì&GeܹիN5WדU~hԨSKK'&L࣏>???>3پ>>>9rhB!D4j(2ЕQ+0OmӇ.\V;s>}8uAnݺqΞ=;666o߾=ɤR~JK?_oPrr-[Tf$ !)$,B**0~7j5<CR_|G+W9s&f"))STTDnn.3qD|'xnܸ?Κ5k 4֭[9{,:ZnV/r3Gr/^5,U#NF%u!KPT ēxUgr`Gg^## s a{֍ylm=xȻd:˚sht|N[Ʈ9Aeeh5}3~M_ȠHAqj’bN,ۂUR?|*E%\}@GMfCxcJnۀܥ+Iz§ݎttz muOK;ϖD[J2s 6P=gO`]xj!7-l2Fg;4/ޢLrj+xӨP߬_XRG78Xpz3?ìϘ4>/^ůwEUan~ztR6ˮ=zBڷo_)m+SV_g<4/֮2g@ x:?tV6 X4'*uue:tZAޛgh,ن[f<9k7Lo|[m$} >U~-H[͚iݤikI[:v#ӝ[?ɉtV6t:V~ˊɳߴedS@ҖqS \/*duJY ^gPW̤sW;Sm͈V~/ ǫ;$8*%q?3Dz7y|\Hw;?Uq1VbdlAfdUDVWߒ9Wd|^};MMSS#,%mM4 lfD+_nM_uOΖ-[0aӦMc„ lݺR.]o߾*jÇ9u锕h"%M,-]VVFYwdd$iii8887ߐwMI^Ѧ|@ǎ8s 4טo㸻s [7/.'B!BJf !0NGii)Z+?kZپ};~~~899 W3zh6mDHHDDD({-^t;wGzz:z` 7nڵʟ5JJJؽ{7.;(^x#GTfYo>}YǷ2%\bSss*Ӌs 8{:Pe h<g^#eg"ugCUѰC3D6|1G[^=6 GMXdy~=}N-|jXxӭ?Nq o`SߊԳя< |`V~Kժƌ=Mv}{ ~}NK,3ь;vK[׫r㪔hK}W}o63C?B(ՍglrWﶜJ7)Snk?cΝsߟǓ[)_Æ {dm?cƌQ0aK.wީUua%'O_SNxyy~Ŷgƌ 2// Jtt4....'B!BJB!oooz=VbϞ= 8 Tqq1ӦM^z1cưpBٱctX@IMMER1m427UҪklz=nn̬=7M땠2g'9q}|cˆ<8]ŷIGyiS^4WU^A<3o VRZV=ixPsss ڕڟczd_XÿFKd_kP}Z.gѾwνR~0#.W[ܮCgfNqiIL uMOu_P~텆J2X)v󾿮deeչ0^^իpU|IVXY_mlʯ<wֶv4&?t$B!BDB!߀Jq`kkʏG^^^uV[ƌپ}M4… նV RΦQF@x`J@V{߯2m֭FgÃ+WPZZZipmxG\:Ͼg}+y*fW$RaDVQVZ*-nđ\ϼFSo + fp6uXFZRUYf g/zN5p=3sJo ޸m«;Z=1}*iN.ѕ .Vۆqu(s( USw2>Sƿ6I dff*ںt7lP^z*暼rMLW0vgŊX '&M`kkKRR~~~8qڟB!BqB!ӧO+˗/s1%PRhӦ qqqt:]Frr2m۶ݾmcƌaժUDGG3zh師+̟?_Y://`QFn:?y7nLjjAʗ~GXzRƍ߿9c_/?VVV9rRW>'87iR$+b8}mݝz&ObٷJmG@׳yػ$ߍdMwCgNPP?ַLc jBjNA@ JEǏ]8C?v ?FSTZ?FԱ҈x5#k]Y ч~ftqW;G=M+s n/+u?l3@~N3X㑶YRƽ;!p7)s;L~ݿyƍ),,D3`נ'?3/^$&&LСC{ښJbذaX2.\w}GHHHϥW^<3.'B!BJf ! V˖-[~:קӻwo%X`%%%tIW+?L ֮]O?MÆ qvv&##^zI v3gu̖~饗ڵRG.] ~ɓ'|rJf%00?Mh %""2Cʙ,ޏRpiBi {''#I$gbD}c옶SvTJ^&BVmr5cH6..-7om;b"f=K#P)Z6x5tա㔴S}Wwoڳ?<ܪ-ݚ}1?Op'%^cz).^<4j5G v:Ms;L~jٿEDD{թ^^^zZh̙3_|~{nٳ2P&Cf͸x"}e&h"hԨׯ_駟fȐ!>Zn]rB!B!ĭTw B?CB>|;t2^kxoR|^zz=/_Ғ ԪE&~r5iڴiKjZΝ;ofm?[[:3$''3p@+\[W1ݱNVzV.:mM/m-*K4r^_y7ȸz [:-{9?hv txIfdeeѦMZ]>C[bbbsIMMȑ# <|䒒֭-\={ҳgZwߥs!B!{KKT*y89`Qcpbvq8::*ߍ)++_ՒFÆ +\6Z-4n7cmԦ!B͛ƆÇ˔)S ٶm̌ٳg?> Aӱd.]СC`A IDAT$$$䄧' }Z;''aÆѳgO,,,HLLE(_ZgϞMvv6-Zŋ={իW3j(6lC|ر~sc…oʱDJZZ~zzr^ڵS 0ٲe &ODDFbȑ\ppqss3i|jr)}_ݻwxטl FΝ 66cǎakkdƎ ҥKYn6ĉZW^yŤ󾓶moCJ=zgѣJ`^!B`0Ӕ?ZBV?FV^fu.K/n:dԨQ|L:$Nʌ3xGYv- 6 Z^穧2/ _|ѣGжm[ ͛)**o}m*+ӧOTBJB!WfV9B@@~~~s)ev䃠lݕ״Z-g&**GyD9vuy%o@@fܹꉌ$$$;wzj|||ˣM6|̛7OI7nRy #44oРA*0&:::VhB9R033C+ ٓ0|||h߾=K}||/<Jrss),,I9fff}ꉌdi4-m۶ܹsF׏k׮UZ.<<Sӧi߾=;vI&7xС~~~T 2 6I&j*B!BFFB(if07uVVV*['yxx͟Ǚ3g 4_)S @HHǏ7w!߃B!`)))={sHΝuVÇ9~8jΝ;yf&O 6l`t҅]vāL{Umݺwwj;99ѻwo8q">(ݻw L} <:,[oz֭[,\m۶OÆ bʔ)deei߾=zJ'|˜1c Xr%}YYYS.:t(zS~}vj xYXXhO:]MOzWZn1KKK,,,3g=3ߝ?^}kkkT*lyOď=M6Baa!ݻ(_{Νő^g,X@)aƍGvPwIII wV붷gرDEEFl *}hg !B!/ӕ{j4f/lu y5QFˢE !33`aFqU ^KFխm!풻B&880 سgϽ]uի?~<<#mjULeeekF\\/^͍3g\>22+++}n݊5۶m3O?ʔ)SXxq߿?-"""T Y/ݺuCqav+4hmJL7c51+nj+d0>*^x23VƖ3f QQQwAǎHMMER1m4Fȑ#YlCiiixzz露3)?ܬe˖UgzӗB!BffiԘk*j4_5f0855UYAΎ޽{SRR|hذ*K>>>nZya uֱz۪W%`!o@RѸq;_s=GPP|&ۻw/fͪU=t ԩS&/((`ݴhтRRRhӦ lllpsscɒ%TJ6oޜ>}o0Cb&3ĵZ34 AAAڵZ[[ӦM~gܬe˖jRRRc)))}51;wFpAڴiC۶mCѠAJsDFF?tRzJO{xxpJKK+兿?[neӦMl߾8n j *lXvvA@BM˚eff^ՅB! f}F?gUjij>{,۷ˋ:0n8>c{̙3ټy3VVV >Z͚5kX|9-[[Ynm &&۷VB/YZ!xN˖-QT\|cǎ)++T Ren4V$$$pQLjQKK;mt:,YBXX666ܸq-[Tڋ՘X,,,Xv-߿2d *h4̞=^{ɓ'W,--#""N:QXXHLLLl2 ~j[[[M̙3>|8;w={=}>{\tM61gΜ*v|3f ӧO'** '''ku~wRXXaaaj |NʢE}/W_}ʊ#GCUw̘1Zx{=師+̟??FC^^2ϣFbر<쳴iӆ2 oܸ1\t`f yV^ͲecƍL4c_ӽz..B!si4Pe+LWJYaӧ999\pV'3HnݺqΞ=;666ߣ '''ݲeKdjcC!n&3BVe˖-̛7~xyyѻwo%ɓ' o%//pgD$&&k_`mmMnݘ>}:#GO_RXXȢEL*ɣ>zJ2y'|KKKV\iRk֬?'''5kIe+ 0,]Ay+i&M^gϞ4nvir1GٙMңG&Ol4mgxxxШQ#6mʔ)S6mw'=̙CӦM0arw!**h4V9^!448:uD ֮]ˉ'hذ!xxxk.%=889sЭ[7|||pss#66֠.]/c|rbcci֬V뱉`FSRRjE,B!?E 2U͛ӪU+ou,,,BTw 0B!}w28dZժ g,Vϧ;;;~r\xQyt?kkk\]]Inܸ pqqt:RRRQFw999\r-Z% 8:v4mKKJZs䄃C?}___uB!B< k]$2r|%6$%%BpBBBt,YK2tP"""ؿ? 899 +;СCiѢOVaذa iѢ111J@wdggӢE .^ٳgYz5F"//aÆqa:t_~Ɏ;Lz .. o):t(iiiJykN 3`rrr g˖-DDD4>5#GrÉͤɩS ӓ۳~zvŋk>>ѦM>C͛7Nw޼yZ뱸ՠAx8{,-Z`tЁ&M(&NXy呐5P>LW_}sNGYd dݺu@ 'O1v~51TEޔW) 8::pD+vпZlɸq(,,W]V/1qoR^ZX9fi09'38ϛfط[Y›f4d}>}:/lݺӧR!Cac333<'''C&MXp!III3i$N<;|JӧOӾ}{:vH&M7n%%%ӡCpqqyI&jժFP!B!; !bbbXz5t\YYY\tm=3:uꄫ+۷oWmܸ#F(KSHOOĉ۱~ztRb)=Jaa!۷ntO ֮v誘ȽK:NˍKy k xib >jrߦOΖ-[0aӦMc„ lݺR.]o>cNNNݛ` F0Eff& 0yؿ{FDD쌳saLppp-))a߾}=o򊊊dff[/\`0۸I&[2>7;~8@3v~wϔ˄ "##t Wܹy?r233ѣ~~~fZnMZZiiiL8Q)?eHOOܹs8p+WիWb INN6x|l/ B! #..^wCq%BNVKj7+..fӦM =z46m"$$B"""ػwޭ[79vؐtGӧUNŲZ-юObɿx6S\Ig{K+??HٙHA;w.̝;ƏOnnn| 6$==^o0 =::e˖7[oѺuk"##%km6iذ!AAAL2,k}J| cƌ1cʕ|wdeeڵkk5 fСzOڵA"Y3+icS+ 177pMcLAAXXX0g{\]] fb;;v߿ο_uK.sNk,X@)aƍGvPIII wV󶷷gرDEE /9rԳG}>+DGG<\!B?S+**)]veƌ1{CPPн>%`!퍷7zUVgkZ֭[ɳWcƌa…c<<<ر>ydСC;Ҷ^Gޱ¶>*JW^T;*%ʊ^{^{BCC9s&_}IGFFbee5۶mcҤIJ~MpB/^ѣ%@7njՊ(8oexUuNڵV*?hР*upp@Rcr{7߸qCJGGG)..‚1c`ee'|b)?SrƮ/T*MfPBҪolznnndffV{UbjPZ!@ IDATB{(ߒFs-'4j4N_̮s7n ;;GGGŔU_\\Lff&nnnX޷[iZRSSiܸt7nLzߛȠqƘIGQNB!T*760t:6mڄÆ 3g֭lڴ`vߥKXjc̝;Jsssz}{e֬YUjGW\c[3n'ܮ];%\L%@l3:uʤ6 ؽ{7-Z %%ڴiSihXd *+V7oޜ>}o<0!jh4k׮ZkmmM6mM.s-[VIIIQTڣ1sh4<@6mr?St}پ};qqq+&&FSݒjuMT*] 5cS!B<={ZyAޛKxxx0dZjŋ/ԩSIJJbԩ3/ӸqcpuueAmkѼyszA\\m۶eƌx{{FiذCոͯJ֭֭...ٳN/xHX!xpi%pe;Hlٲ BII EEEw-g3f V"::ѣG+U* cǎ,][+++v]vǎK|||/cKCW+++9bpF1I,W(_1~&Jك+b8}䲦4Pӱxb򧪷lbX,,,Xv-˖-cٲe޽J5 gfѢE&-CliiP.,,$&&A`9R2\K$WiӦ'p!kooO߾}tdeei&x*v|3f ӧOWhx/M=7`J`` W~3kwԨQ[NO߸qcRSStq yV^ԽqF_sy^z)?h !Bq?ѠRPU^5Vh3}]grQxWX|9~~~,_4e+ǓErr2$&&n:NVw}G||< VKdd$+gΜa?^^j  5eh8@zz:ӦMc>!ăIB!Hղe͛oͿoݻ7PPBBΝ&<<*%ѩS'7owvvfܹt֍v1j(\1cǎRYnϢh 4u [wa(&ɞؑ>e C0UqppWWW YhIuFFF裏hc{FR̲ٓO>%+W45kꊓ̚5ˤ yᇫ\Ay+i&M^gϞ4nv49r4mڔ=z0ydk;>˗/ÃFѴiSLRi~SU0vXv-'NaÆ]`̙Cn͍X:tBhh(8;;mҬY3<==%,,mY!%%EB!% s34j5FeLQ浮ʊ"[._Jã29r׳i&܈33ee垊˓'OC?ֺ7Q1K8$$Ǐ֪_BNדBgHۇ'cNƫ_거4m|#ܬ~/._˗%sG 8dgOXrEk\^t?kkk\]]Inܸ pqqt:RRRQFw999\r-Z<]vl6mZ`ZsTRƜ?[[:yi|}}IJJۻ !B⢚WՙI4oeokNuV-E{8eR:6o|Cx衇Xx1=zC̚5QF3sGɉ' ޞ~GGG+|||xw !< !/68B!D fmmusqh۶-m۶m0j ?5}h>붶Ȩλu6\##&Oݫ{CiߟULLLlp| >ϸ8,Y"_!BqTYFFwgcL;;iR&MbҤI\~{qƑ P3tbb";wիXYYµk״) ~k/ZMFF^ mjF!! `!B{/!4HKkwީۛ &h ~K]O=O=]F!B21r}FF}Lnj:v숭-C ?GVP(prr"66VSȈ rrr"66#GWhh(!!!۳qFT*fnݺk.qmKyfJKK={cB %,X!M~X!B!O{SE, л튊 RRRP*cffV9YYYQW  99;Һuk:eee$''ꊥB=B!B!BqhHbܹ^8::w_,333K6NB!Z(,,___LLL].B!B! cԨQ6wB|+BP`mmɓ'djժ^B!B!0֮]!! B!ZBJ>ԩSׯ^B!B!B!?uL.BRUrMLLP.B!B!BqB!D HRRӧOʅB!B!B_dBтT*Q*R*B!B!BqBтyyy1j(͛GQQ׫\!B!B!EB!@PBnnnʅB!B!B$,B@EEE$$$Vٳ֫\!B!B!ɤB!)J; ={2dȐz !B!B!? `!W_eٲe̙3z'bll\r!B!BUbb"&Lxb8` ֽ5k0~x/_~W<cZ>=7oެuLqqq̙3ɉCi~>~8ڵ6oLhh(lذ^muޝ_ooo8 $?cѣGXѣ;v$00M;׵ `۶m޽~9sbŊZ>񘛛3rHj79.?}x7h/՝(/+ZN |6Oy4~j7kӷ lp7Br5 B!B!B!i*u'~5mX8\1~j֝opXV3k,f̘˗%22RRDlܹslڴ$N>MVVV]vqQ.^ʕ+9s&EEE:9s&YYYőJLL 7o`ʔ)>}Z /T4i[uL'Ol8w:tWJZ՞RYһ6m"88s璖YJ%{%::t.\ӷqϟ_uǽ6}UcYHpTgYadœ9w*>15@VUt#d6BnC`*Uŭ{If8ӗl&PR/}0HX!B!B!?'+lJ{f\\Ν^ʊŋXҒoN^^ 777:sURolϏSNemۆ#G֖`2|7̝;1]xjf:88`ooߠ5ǍǹsPue۷oO`` _~%Pvxx83g… >}z պb ;Mvu `׮]֮];crɉ;4s>Yfڎ]2\,'|z ?/K<j^)^,׿~%$$GqFnܸ$}4M9_IB!Bo'+]hpcii9ֱcwܙM63ZUFFFtЁ1j&L@HH'O$''2^3|(**⫯c&)))nݚX1 Uh ʨs Zr0$$ JŮ]eСceeE6uwwjuoiԩۓp/GRP(<^#ZMr[ӡSnajfŭokT())^7bna昕Cz2I !-Txx8O&''jYn;wl( j!c'hG#<һs 6l@~4Rs^є8p /ηxB!A0݅ĝB4Mggg]FqqXZZZL4ÇСC7nVyzenj58;;Wk'11;wr֮]oC=իZ_L޼ySg>1ZXXh]Wcڪknnndffj=z4lڴ3fh...={oX[[_q^OVVVRғNwr>=eŔ. ,mڞҒBec7 jDB!Z3f׿RɩSjvA^)Júv)2%}ɻ1|ep:7Sv>>x{{PTTG}TcTs-C L9444T3wƍT* R-###***4IXj͛kFjj*AAAuK.<,]rrح[7v@qqqף.iq tǚȍGJo~Zٖ-[puuƆ_mRxg033'V#44T> 9tzz3h ͛={Ϗ+q>>>ZAAA:M=UVպs]cƌ!,y6B!Bd0bvI|ܫ]ߏv-o{ٽ'ahXȈ駟鉗\]ٳ'3}t>3aF?,^ceeU-͛=z`ĉ?Z ʘ>}5Q(lݺxڵ+]tzḶ~mԩׯ>kҘǔ)S5sL"##0`;w722",,~777\\\jMJ{o4oVxJJJطo'ONvz(EEEsѮ]ZӖ-[jc}{tIJJà .pٳgaaahյblڴݮ!`޽[(Ǝ̙3u!K䯿}_,uq9|0?3| zcǎeر4|,^މq!B!Mi+_]scڳªJ]}[&oWZZJRRX[[ZjZR*`>>>^ر#[, 9s3//WWW;FnW^^yyycnn^HNNյ^I-]x?~<ӧO׫T,,,t.|'}*NNNgذax{{/ʕ+5K/4˗3jTgCӏgӦMڵQqGLLieiSg e9׮`e〩=es9Is!BR(//GTbaaw]6lX?+z?Ҏ$8tuДIZR}>NţP$=ə5 *Nwe꒖F۶mkL6СCkLm۶ބ_f̘1xxxܫ6""~{ nj IDATz3gm۶:u:u~~GIJJҔӇ7x1cƐoGEEgr9r%ccck-wDzz:jAB!B __z566֚EsttQgٮ]ظq#?_ޠj ///kjnOl7Ajjf/XxwVNBӏٳYF߰ʶơcFsBтyyyZ&44ȥKHII$%%KYYIIIcllܡTAZmjH]'R_S[o:vhTb)Uz{9 u֬Z oRֶKSQ({NV7iLPo&#FA9ƯX!B!D5Ε_C|riii 4H9/K,1x鳩ܾdƍՎ\ڱ ܍p>h$,B< ...P^^-?#ܼyYfM{/W_Vs=%Vv K-՗W^ܠF-ݡClllkϟ?O-#G$))uֱxbƌCHHÆ 3L׿oͷ~s_0x`Mb9..u.33WWW+B!k0]]]ILLYk54)$,B@EEEB //gjI\%::C1 I݁/о9R)HJ߹???,--9u?ڝ:u*SNm gy<3?_V;x`<<<nCOOr6ns=O?g ?ĉ>}ѣG_};v잟,B!01)S?KB!Z Rw}Gaa!ѳgO ܡ5 VLf1̞xU ڏ1AAAܹZʕ+\v ccclBn駟 CM{=pvv3f0f̘jֻ}Cٙ^{˗k[P111,]кukϒ%K8q"ZmX''۩|Ν\!B!B,W'Bđ(|zkwW1oeQz7nܠ[[[.%^9sK,ڴn $..ssOj5X~6VOWMڵkٺukah~p).u<>NGkUםuX~]o痍eZs{{^e+_l\w_͆?6͚~?/l/ ׏~1hР1zhF]j߼y=z4/{_^!Bŋ9p@sSyy9k֬a,_yy,Yb꺾L0AX^^ֱiӦ1zhl2FMHH⭋K/!ۡ/HTw1$B*<<ӧO?III|ݻjIqn^/9<F ѩGvoй?y=!f󄆆#ӧ^uO<Ʌ l(}o7D߄:s^]7>^ QZ yo€^CaLwؾ~? w\i `8~j^ 5.~?TRRBiii49r:ٳ7|#GS.\w7yQT@!BTTTiiiN6l ,,ٳgtW\dAˋ(//ԩSOS 8BlqrQWݙz:j5\*M{kF5'f`i݊Vʹ\98f`Ӷ5&Z+(*jL'k2uT̪jPT8;;k?M&j5iii8;;X';;֭[ӺuԤʐ'$p}ؑ_;Tnv _ƀ}jJ._gp>諱[ZA|X>J+g ;_W^WYYҦMN:iT???ZjpT7؀طoOY^(J~j_|ݻӿL~:s+lذAscpsscݛ+Wh@Ϟ=ի:t`锕i d 4Jէ^=m/ `-Լ2 RU&Mrvp76+5w" +psV|Pף}ܸtcӖ-[puuƆ_ݠm8q{{{޽{qwwȑ#d,Co̘1XsDӳB!DRɰOap}2:Xhnnn3.]hmmJ@@... >Μ9T.B@@۷g̙TTTm}tޝ_~///f'|qڵkWk!!!l޼P޽;:_qI^ #==dތU*Z/2TTThP:]zah u+\^ǣ*oë*S4axNy,7L -0YX;}}T(+udT&#µiSĉeOmzMٳg֭[0aFFsdddp9A_p0KTTT| B!tSU;qmc!ƎWDxΝcӦM$%%qi4ߨj&O$$$p9:t\i̙deeGjj*111l޼YCTk.=ŋYr%3gΤHS~ jw6mDpp0sz>-R޽{&== j^ko߾iӦM3+++T* 6+WRdѢEZT}PŒ3|2DFFjݻ)SpBONDDϯϯ^u GuuFVZNŔjo~9:chڌ >Gһwo<<́Y+V JBl8Ν;Ǒ#Gb̞=1OpN<|իW v]899Zc}wh2vtҗ<:" e9 gĤ7K#/cgU~OclbJߡٽ5՞r52| =;?׵ #3BˋQF1o\0<{g[Mb׾C~B׊8{kޥQ)4{* ˈWYt)}eܸq1{:8 h?:::jjcdfZB^׳'08I!xi"dgX6 Mz:n!jP*uwZj%2e {ƍѫW/Mܹs0a111RZZJyyy3F[MQeZG!B<]$8XqrFܹ36m"44gggY9%%֭[Ӿ}jj&L@HH'O$''2Q[FFFtЁ1nv:ǡP(=W_Rصk :th~[ ر4nԩ7 ^Jm}I|/j`fޚzp S3 -n-Z۵BYNIQn޸5&ȶ$BBE*wi( Zffӫ,(.`jիWu&Mjj*;;v`ǎZut&o?PpP(eggk%̴> 0(r?g~ѷ0aܻ=Vy_VCn-Ə~ၟ;v`۶mL2ESChh(QQQY+VV^755EV7@ff&B!B<Y*VfA!u,I&qarrr:t(ƍ*7o}Jbb";wȑ#]7xzHܵQ%=Rj $lqq=e՘[GMqq1+6mbƌ Ͽ=vVii-߿ǽ4,LRғNwr>jBkk{ˊ)-Xx=#cZYVk=%(4n| ÐozBCG^^gϞռ1ZZZJRRѥKfA}o?$b:`ڪ9J-@s~֍zoh9c=QԩSʎ?NIIel]vZ͍]qFbcc5o[CHÆ cݺuuVF9ʕ+۷Z5Reh!!Te2o&&3h+l|\POMoXM>kܘjҔƶ?eBCC`ɚ BٳڵXZZ/4C8vB!M0Oj|}*)x`kkː!C(++CVӥK}Q.]y8??|OhHMS*_}{lZ ]`x=!Uc(-1|| ?øm6>3\]] ođ#Gݻ7;wo׮+VуI&Uۃ)>3NJV⠾Ν;wjʿ?!Bq3R1$}UҮGIϷϽޓ0R4,ݑDϞ=ߟӧgi^ܺu+8::ҵkWtBJJ ̛7???zĉ?~G#F7/fXYU.oNN_~ mqL2RzfΜIdd$ z122",,L KIi###֯_ϧ~'^^^ 0Wf񔔔o߾ZI<[I8b 㗑T(KtP(xlbD&죙}< '7)]ݻ_o!pNׯֱK qxxǼEݸqlmm133*ST\~ ڴi꡴q yGS>_~+Wj^4h˗/gԨ8ϦMصk}⬏9Ҧκr ]SsN)G^B!Z0kk?Led}s`>>>,X?,[ll_:?5YZZj"..%K4[26;:z+̏>ָ8J]ۡȰ7←o۶m^baa1cXf ƍc4nߤcoh>k?!BqhlWcccٚGGGQz |}}u쳴!ڪi>ׯ׻=}577:$00RSS5K4Vo nt~s{rrrݞB!B!BBтyyy1j(͛GQQ&knn#<&&&qF9tP زe nO!B!)͛7#G4ŋsfƏJϟgɒ%k럘Ȅ /h6mGֺ˖-cф,޺j?**lذ0fϞMPP]sɒ%m H.DDDp%nܸANNXLL ;g}Vsٳyꩧ o] 5.]עJYcyiq!) ňZ.I !-PQQ j򭿳g 矨j***8s ;// !B!BIVUg(JJ l~nn.7n*//'99uFA@@z[5s_W-99W\!((?$''3qDvޭ)SgcOPPMJ (/-v|/=aDF~Jâ$BHTwo!C4uFEVVVbժU( {FV12B!B!0,ZH}2 W3i쌏4Hѣx{{ӿ4h0h uF߾}'>>mO=ر#y^jj*0|p\\\8s ݻw_ˋÇ쬙 ġC4??~v_HH7o&44WWW6lPw믿^maaabff:z(vvvz׏[j~>v;wfСxyy}vMـضmw~`Μ9X뤏{i<9M;±O Kugų~8*rR2kW+(y]SE$uod_`ˇ3ؾ>~K16vV!hlllxWYls᭷bĉkٱh"Ν… 3g귢t;!B!B Zź_6v,m~5N7: Att4,\PkOӧIJJ &L`["88aÆq233Q*,Zj5'Ol8w:t*_ߵkGŋ\3gRTT)ZfVٴi̝;4uT*ٻwW8ۗͺD:˺ #+-tͱSt鏩j}߬¥!Kt*U#>W*nKj5w|wǙd3/zD=$B[mkkFoꊫAf !B!B.=w '+ǞF=|ͬqq9j5IIIDEE1g222H;<*}w&2餐6!@::(RVD ຬ. + DAi H'dff2) F;w64W82226ӼysFG}NcL:LLL$>>z OOO̙c.ӧ7n{x{{+hѢ&.xIOOo7uej_ZAo5 gwu(E䜣{pR9cD&xt.}@.j:fl~$,B&1|Ξ=˾}P:!B!B`f{{75m_Z @JJ JSrws=pӫW/l733OOO5kf>N@NKKÃ͛۬h+Jjh[ׯq(Z￟?ƍa@gVV~~~޵h{oM${Sc4Q( Ktס$݆d"%i?.g5K^1Vjj/Tήc>`SB!$~B!Bq%XvI}#BX?-"##^T*Ybok׎>}4*ضsNlb0V˖-[8q:I9nnCIOx7B`ȸ$`rD zw( 鲩|4;84Q7}LbmgBt~ݿY;w:6QխȠ ___ڷoJӧO:Ãbi+C!B!'J@ΝNCM]Fֱ+Ο?OHH>?==Zmwgk ((( ""WךRYd Æ #%%-Zaq^UU)))h4=c)ڲ6QF1n8&O\s=u:gϞEP< 4(ضm/y~ooL+=+WqF֭[ר8QV7O jJ |FuB!ĕzj~c֭<øjk-UMN !B!B\mW+kw5tUjڷooŅv5+іq,]+VԻ\OWWWɖ#Fi&K4G$$$0|ps &4: W~<'Nॗ^oX p]'3[\hdBq]&ɼh_fVIii)Z\\\j՛6m5N!B!B!ĕ" `!兗׵C!B!B!U"{ !B!B!/k̙߿ϙ3;vzpn_c׏ Yl^nzsǮ`D$,B!B!Bޓ`/Ye&iwdgg7ݻwaAQQ6mr~}ЫW/ˆ搯WjxUvg$kyHX!B!B!׵>tiKld0]ZzO }뭖撛)--y6צ^'%%VkV}N`p[fS-8nWc4Zoh4PRA_p?jeŹm11ՔbWl=E9 dž6%T*mXZcwET96X!וPyno֡_?B!>ݗLv`R.%3HJk%3wFqAΝVu1x`Ҙ8q"IIIjXf QQQĉA˪U {^zz:ӧOR\\̶mۈC1fJ%&Ϣ^Gyy͛ǐ!CXre6RLAgӻ$O* Dd׷or!/]߾ϛ?@Ӻ5o>DD>лɖ/^`XBBZt /4߬ӞL&~_2xGNKk{h3kk % 0L5FMj۷/=FZ"B!n@dP ;q"߿Ja{?L@@cǎ%>>泑gٽ{73f ++4Ǝϯ{OSNq1y\]]Yp!k׮srr2{W_@l311xz)<==3g'L]18wg%Pc4M7ݳP=Ņ9O3P:LP{vlBDCbjEdNwٌtб̳o;d$+CqZcoE䜣{pR9cD&d2Q΅TI;.s%9z=FP^Z@Yq.;#7#:A}닆B! G5h4y9ry&ٞ={6lz`ʕjp;ߠ^+{Vuĵn_T*| O?4 , 22Ң^YY4o޼}@M"q…߿A%%%zlqxL&999FBBBlԩnnnGGGs{ûd2ŵoٲ=QUUEVV>>>4kf={FF!!!Vӧ[laፊU!Bk}qvM|9f !z IDATZMUUJ%SN߫W/Ic[233;<""NG~~iiixxxX-ͬZ+Zhacj5pw`n3c׳crxeE8:1Ll 쎫']sB+ 쏽g5jO1/h+)/ җ'K sP(iips8^adBq<9sqwU* O'''Mj#Jմjh"\E__9U5[_<~/ڢrv:aiЃ,w\*G)2OĚ[h',,N>m.Ã6m˒%KO<^^^lْ0ZnoV8lqq1*^zѯ_?ZhAhhE*.]paT*<QT~pp0 >-ZhHJJrh|/5j?5w\f͚ņ X~=1uV馛 @EA)Т_4nu|1URNLWԥW~ _v?}0,@*.bʰW'q4OP]]O?Ν;ya˙~o/2;wfŊ<oyپ};1iii楔M&ҩS'ΝKNNf2o0 :?ٳF-[f@}^OZZ;vdڴi:4;Fee%;w8gΝڵk6m?8ӦMcu~7N:Eff&be^oƍȠYf[ }t*6mmڴ!++Lxg-3 |;?:]vܹs$''{n|ؼy3ٹs'dggӷo^چ FVV;y$gQo2:u*3f 55׳k..ףGvdq !BqѤm%'ikq"=Zmg6F۶me޼yTU$F#{q(:vh~ŋ3fIӶmҫW/ϟOuu5EEEftt4QQQ,] -[19R?88"/{ղ<ʊspq 4"~O{Qf}$lK|B D 5it2sⳮ>Bh&A[;@3OHh˛-_A]khݾ/ ߀p|5}8k5M&SmRW}G_oa,-BF#zjue yyyb./(*+O>Y{gQ\մ5ЕVr㘌&U34ci =vLFoo"m}k oл[)=_h~ڞD UzbgNnNJzuV{97Z16lX'xիWqF***pqqĉ FPi&݋VmDhbҥ9seg- ҬY3.\HN:uj&MIȾk,^^4D^^7ofdffb21b/Bsy)pq7778x ݺuM6WsONPPaaadeeѳgO6o\JO?dzf*++_~gw}L^^۶mSNvT*XѣGj*Z-Ǐ #,X>/Yn]vr38wg_0M= W5 y&h}2=>i[&'&%i?&CH8n{?]ʇ/GeywN}W7:hHoɚfhӡ?U =MWּ0* e:8-vɐq 9 *{#?Wy@.\?پ5>|1oK h6I !7vѮ];L&>۷ogĈTWW3~xkIꫯo4iޞjZhA6KT:=fNo$o>B ztOJ2jiֺfRo@B8/*ϫYƷe"Z˗xb/^Ldd$7ou@Mjr2''򈉉!$$=9/gIG4YR4{ AGZh45OZsM͛7''r&B8n|),,dر2ay{^ ###C}vF7:HiQ6>qv]c[3rvopx{&xz쪦r/挚*]pw _k!&K@ ! P1hZuBCCwh\gҡuH>B yQ#w;5Ax(ԒCY~hCd7 zϹ\moD G?AɋO$= r>@ea;+ޭYn R~Y]' 66:g]w\\Ga̘1̜9 y38w+V0ys5馦=P|(++G}Qkh4$''_ԥ;2N:Α#Gٳ>YfG}eݻSryl}#7o-O?qrrG:Çر#nnntoo+2iӆ+W?ǽˌ3ر#CѫWKhpwwg۶m 2  |8q7ndĉJll,K.^߿[oVC5B!B222ln2k,Ν{#׃v7]JzVbzIcBq]2j5tܙ5֭[Ǿ}tqw5 [c;r&-Ԍ\+xh@d O,F'oܜ9|5u{fiv#sog~`ş'GzH ,c??csPs?JNNfٲeYoKΙ34{=ߏ>,C宻bݺunݚo,86m# yWkƶ?~<6lrDtt4mڴd2Ѻuk,c=mƶmh֬jʕL2`b'>c>CZlFÙ3g)_K+ׇwfhLFqx.SXyJ[!~ݿY;w:6QՎV.WZZVǼ/F#]8.W;f>W[TTOυeuϟ?eJZ-MM&!!n*/**Yfۗ;wrYZjU+^',,ۻP҈#HLLU] .PVVF-jUTTNdd$NNNW"rssiѢnn,K_.##pKDDD$>> 6_pȑZ>}ۓ@v70]^'%%K7="##A`ɉA1m4&Ol.۳g3gѣAB!ğN[Ysh6عӉtܽB G `!Fe7T*&r$߈끛o7c773/߳>N8a"h%۶m[eAAA[G]㋎G_~i,ky9???e (___mw'Z͝wK/رck-a;ܹsXCRyEo{swX9po$B!B$B!ewy'ׯ{ Ǐ'::n+6o޼khқoi;VRQmѢE6ˢK7F`` Wf͚5jՊ eٛL!B!~HX! Yl9wwwV^}B\|IeMݳgOVZ$m !B!ʓB!ǽzj !B!B!ɔ!B!B!B!AB! *// }fZ榛nťdN>󉍍mtB!B!B!#3Bի9vyyyl߾˗j?| >|zFtRüyh߾}B!B!B8I !7G})Sp]w裏9rl߾8FqrrÍӌ1>}B!B!̙Î;jgΜvQy_bw{d;v#BqR(߫T*L&PL^',,\NrrrԴB!B!Dc=ɬ;ۘ^̼!ٵ{n222jEEElڴW^| !_8XMՈz,#/J=AyIJjOgBؙ3g8{,)))ѽ{w(// RJKKgYYM!B!BׇI."62|lv$f1~yy9Y]Ѭ|BCC-|tdggCl+^^^ddd\\7oP,t: CmS]7.nF}5FFeyxxrv}mE1^(Nuј j* qI=xxtj_(gWKqlf"b?I !70Huu5zZhӓ-[i& BVV))):G5ڵtҦB!B!K&;_Kld0)EZ.-}9R}ɌݦϞ=UVh&..˗9pm6bbb3gV"00,Ǝˊ+prrGKHHH`ĉj5 p!ƌRd2q}YĉIJJBVǚ5k< <#̛7!CrZm$'?;&YcϦwI:#T3ɮoB^}7u7nk.|v=9}lFw-_@^iY/=eyl߰D>Tu>MxG>ЦC?9f!^kc훴hۃ=(Jt2n7m{ vh0/_.hn=jz<? %`"h;_Ha~:*gW^ )|46b]ֵMCB! ]vk1b&M~`͚54oޜXRSS׾}OԩZj!!B!/nljK} tclj'Yrxd^d21qD"""ŅI\\1#A $W7O0(}#%;-m 9F{6d25Y^GLR_-'i ֠tRQY^RGcc6æT:Q-gI:#]ù{h9j~{Djdb kݙ~#A_w>͞Mr۸`2}Du^N .hǵKi֎ӖP*9s~\q3^d`!/@PJ~=8<==7n?8&L1}tKII k׮mB!B!&j{q"߿JaV믿BAxx8ٳW_}%}SN9rO>? _q);3<+ .dڵF穧j>3ggϲ{nf̘AVViii;Z|F8fBPA_Ź?(O:ۼ% ݺŅ9O3K*=|pU^>bjEdNwٌN IDAT:б̳o;d$+CqZcoE䜣{pR9cD&d2Q΅TI;.s%9z=FP^Z@Yq.;#7#:A}닦%3BPEEDFFP((((ĉƚR$>>޼Ocjժ^o B!B!6AUV\_z={K/ѷo_od{ɓ 6 ___pBGff&4kvi/ֈt:dee燻ESRRP*f$Ջ",׷~Cx7 1s9=;6.yX;z}̱-ގ]V6:F_@a^qu`У,s >) <(/=t9{c//)E|˷9}5ڊbK/BzY|ߥ9(J~Uv4e{[o}Ѵ$,B܀z=k׮ ZMEE;wf:Ge۶mT*L&cƌ!88}_\RF!B!)ޅ'WgM]ץQGqq1/"cǎ%;;FCyy9gΜaÆ \pOϚZZZ^QTToMB/== Jj5ym ~uRW} ˶DۘATiul^^ShcvT.:ڊb>TWiVXO211_\>8ԼerV$eL&K psUe5K[S_I^~e諫P9_EFϯ+:;Z%B7O>$ ,`ƌ<3s==j 9s& .[nMw@@iiiMҖB!B!Ĥm%'ikq"=ZmgړnLJPUUdm۶ՋS]] @QQEEE(J yyy$$$f͚zo(:vȢEbŌ3RItt4QQQ,]Ynٲem2o<0ٳf,u&!!RO¯_<ʊspq 4"~LGa^ݟ+5 %fm}uG7+0jeeQJLF5 ¼4NY}o)|-LFB[Tm7v߀pGpxgM"hsxZB7 _ G~{:ڢ`M4?mxU[ԟB!n`^^^xyߤ6mcƌcMھB!B!z>y+nO՗]ˤmٳg=z4~~~xyyomޣ>#((`ضm:ub̙tԉ-[b27n%%TPO?eYJbbb]bGfժUhZƏmL> ʢgϞl޼j,u_`> /2ÇgݺuLɟ|l6},j/\\Ք0`晠 8+ZD`^o̷L&%'#@v)E9E\6sR9ی"Wz%r[_4$B!B!B! gl֬Y̝;*G$nû'.}664qB!n('BݦzCB!B!@pe]6Zy>Ayz0iV D B4kg޽>|Nw "kz۟wOOOg֬Y5W^yWoG]0B!B *$EC(NV{X5YB\nn.|6l8_~ߦEtB{ :wdz_mInpӧOG2g5kwޤmhZ 0B!B! !K@ !70ڵ+)))e[lG5 kơCӧϵ :䃓Bjz]5&;v ҿZeUUUdeeCfͬo2www2Pgr377<<%5j\ƎF3?!00Cɓh7ߟƒ%KHHH@0}th4h4|ӧܹ3]v%,,ɓ'ך=l0ytB $))1ӯ_?fΜItt4;wSNPZZJtt4k֬1?~<3f4lk>4 _YoM֯B!B!I !7RvmVшCaa5iLeF|;u`J8VǴ`i3!qF222h֭֬###˗ˇ FVVN"33'OZ$p7o̿ovIrr2:+"## x?0dffx=˽ |$&&yxWL}"*DTb'R-ERZ-EZtMh%VU(JjKD"LfY' Ѹ?js3S9xUxe={DF#zjNJtt4˖-#**Z3f W^'`0Z/""CU[B!B!Bmd h!ڴi=z(q_^ [lyxyy8p?±vsD}%RT<աÿɥhH n_uԉSNqrrrpqq!**TzjƍGHHHQvv%n#]NǮ]Xh3[2uTOSNVT~U*vrr2kKRRR"M?;x„ $$$УGo[e͚֭5B!B!dBQ %//۷+vvv$''SN(xTU(R|rLI3gΘS~}lGV*dի&o_o߾9r~aÆX,B!B!BVX!6mʔ)SLwSLJ<Ç1dffr9ZhQӡW.O˱vr쳝d~NHp;.=MO\{ħRPTm9}6mdVgȑZgE9rĬ񤤤]GX(:y͚5XuSO=e}Y|99k֬aҤI\tB!Bw%n}ٳZڮζGycwNVz";ͯpۖpmvBq߿?,Xw}-[ҼyR48y?N^ANb7~O3<ٕ&[p"[䷥;%>oooΝKN aȑ 8Ь΀3g:uYfsN:ڵcĈͲeLeK.eΝ4lؐ%ַsu`֗ˍUo /F`` ju^z%ONll,^^^xxxb J[sy\ke~ERRR9sfJra?3-[$33Ӭ|Μ9̙3?8T}!B!5k&eXt-@,{%!mܾ^իԭ[X٭`ߐGrr2+/,,$!!???lmm[՚ JOjj*~~~NMMb57$''+OiZ-BmP /{lL׍Bƒ7 f⅕MjgZmUnUѠ/$?7GgwK~4AO^NN.^/XogceeC1e\ ;rprP,կkQIX!T*k:{^^^eֱ&00}Ts1c 6mZ6v_B!BQ9&)UCX`]b3o$ժ;'c3Yk4C;5tǎcȐ!jEaرfaaa<5 iӦj*IJJbȑ|hт~~zj5|߿ľOΪU!11CrJOo9t萩ʀɩ68q"GLJ,vEHHQQQ5ׯh 3PcԨQ?<==YnAAA%RV{({9ЫW/bmĞ;LjR4z.>ɟqv&?7t{d2~8C;V~|O"i{.ރQ1gK %"WErR>ׯ 6GzXbUe;3NnѸyWzl:4}?~}M}*&fr_|Γe4˷!)q~!6hÏSу ؾ Pdv~6XaB'_J,/m.EB! 4Akz|B!B!*gϙEc={$U:( ?7|\ziVGc48{,_}(vzM6qQ|||Xl&L &&GGbO0EammMVV=Vb„ =3gr)Zl F9rd_EQ5j$''ckkKJJ VVVF|Iz!}Z-dڴi^B1n8ڷo޽{b7#GOY.]={6#G,uL/{h +}'c_;{gPr2&).ݞIH=FGpt@QUt; &_iֺOc|0&}g/e+{)c슢woQt/ B~>Y(.A{ rY?_DMZk%SV 9X!B!B!w×xh=gןb9|)m;wgϚ)rvvfwttDѰaPT4hΤILiyF#{-PN<ɷ~ڵk5ƓO>iڂ:33I&9h8k~OOO.\ӧycܹ_X|İ~}YcС=zc,_vݟ,5!hmcAgJOm0д%J*nV5]9KϚTvprΡo#REs/\c$ş+w {Ĵy($e=3*i/Ӿmh}1QPxғ侍-;?nj;;׮y夑t%yKdB!B!B!ju:Wjp$&&i:+~Fꫯ4i۷wޡK.:7Uԯ_bm)O<I߾}qwwGӑn3ezŋo 5.M\\NNN%͛37b @ՒZ|ƢV0aY;v$33Qc֯ W2Ӈ7gӇԩߔN}6ms\'׊}ϧrŽG;c0;gT*N.WQe=/; [M\`"/';mnf;':*_6k֮3\]ͮ[Z^& `!B!B!BܵkhQܛga*vzȠ-K2rHFIVVo6C%)*䄄ϊHzs%"##IOOٹ(GFFN֭ &""˗/;&HMM-`#77L݋z]|c0ؾ}b)Z]jIHt+O=6ECnd22EIYE!/' GgrXI&hkk V[Yc膓'ZM.B6ŗjֶQ12pܛˋx-Bq#::|.^ȅ j *!=m?tO,)f޽\vB!BajLVus&?{$1]C]Jϲ4k֌ -Z}WZ?>>8h|Nu>h4u=n[?:}4ܹ'''HLL$00Űyf^u?~B!Bܴbìæk G1UjWVrJ̪Uh41?111 <OOO\\\ç IDATHLLO>1;CW^j Xji7nɓ aÆ(°a6?tĈL>qƙm\JŚ5k;v,ԭ[vEhh(WfĈ[BBB/*4*k2qD|}}M[oߞ;vKyg͚3<ŋׯpM $ vV#/|ys?~ ( Zs7By"|g$Ξe؄w.@[:w_@qn[)X[ےC_2".s)c^6[?mH!/uOgۚy?3zI{K/;=|{$N^Q~/%WTݧ Vq!hܢv*TOQVX7f ବ,p۷o8T8;E1*$RqJm/s#7/4slB8uά̒K( ׳16p 3|W:}!)9:\(p5-/7r5zr핝hE!!=Oⱗ7?Y89T{*m XBe_oB{m5o]쾊ʚW%Kpaiڴ)f͢{^ӳwqFΝ˙3gJm?99'''J( W^˫B_!Bq;i5}>FM]|[V%&&\\\ʬk0CYf,\}}WgׯMƍK9-- ;X9--43">>|dggs5իk,o h$'+hU? z3mvn4h rpqm1?7Bm>~ž'(ɚ'ҩw8 ۑ[ll?Wd)kY)XؕEA_Hv5]}+;6t]<+%Q `!6;zIX277 6y㗒s'&YWztu/F1mnbT*Mș$ߚܤLEcec?#Ӣ:180 |C++zc]gP幒rGy*:-~7Ggc394:-YOx`^X{WEZN&uc`{}=GX6CLRFȉ%k@!$ NF/u ZC?ΔO0~<|!6v,0ܱ9G&:fL^6VILOaǕPڰ wB?kpr!=7Y?ǟ\ǰwfIG;tgs`{ˇ";?F2sf]_'$ ?DV~.=}8~}+2?e霈9GrVC~ [ɇ_h|e1c_~4i҄_~/'NW^$&&bo_v֭L<˗/VILLdT*BCCYnS}3k,fϞMTT}AѐϛO? ŋ6l裏rJ/KXX7n$&&ȉ' 2ՙ8q":tgм!B!keeEFʬP|}}-lӦM|/5hР*2%r-o ZG*ݫ~u]K>-J,KS؝|Jڦxns(ki{$BZ*''3eʔR-Mv5B=y NGǹmohl@Q0\8ZOD10F7|C7׮1՛kh2*9>q-3zՕ\!ڷu'^<'{4:yȿuhd 0:7kI|uSOo0p.}ZMϲx*=rb4k`4ThlmoF7Nf6wZUsh\ן} _a޺>+k>e D[ų)4eKf^ggَL0BS1aLG0F_>GVvh"`Rߡ)/Ms{\O7{1][_h|e1czpڴiCxx8&L`iӆ:ue 5k>|p7nl^lر̛7+;xWؿ?!!!hZ= @͹zj+LBXX˖-#33;l2Nzs VX?@ΝMO*""^/ `!B! `zp:[￿mW^k׮%3uTf̨-Q4퀓kNeK,  jhBQKmڴ=zS[jy#L[@{4C#7)K;N6n\UCFo}B7x2r&.~OfZiml9xf4kهNAhr pqp$*>X's46xLGL  d pUz?k`h)i<}W_(oӅ?|am{h !EgؖqItBv>ʢuwraLGzYʞӶ zBP{5Wsܹs e„ deeGڵk:t(DFFo_)))رÇP,XfW3n8BBBⷳ[صk-ݝ1cưuV0O>?1-B!-YZ[zutRe%m#- ]UbgWoFO2JODB!j˗/GN!55NGLL XYYtծlzSom3.cw-K׷8 }5 {wb2sx2rsKded|:i|-'}:cp::-,Р/V/^?q967g[>R~? INA>?Do_Z_5n|uT*/|+]eæ;9+EQu%'Ii5?Uuvȶ6h u"sCe/S[_i<<{#FJe}ѣYp!999l߾ кuQ:>>^0'^J˖-+ϭ?lKRRReUѤB!BX[KGT}_x/BZ777o@nn.yyyl߾Zn<)g{[8V0TƊ`EQJmGUS* CaQK_ܥ'O]!7)+iܝ\_/)/s"}=GVֹZ* ?܉'g)))S3¶6o&rh\ןІMxd7ft2:(–R=r>JWT*Y(&)9+4Whv(- "ߒsc5nܘP6nȆ =z߲sׯO\\\}?>>Edի%K:KB!B!ĝ 붅BZiӦL2{xxx0e틶5 h4 h4hښ Jv5QeCb+y]%}>~p^dD_g;ٿ`C#( Cqj^.Į9kkEhy2Ӈ$D- *Կfѽ{)ԡ7vVd* sE._H姓ZAFug^͓!yususFv˪=[8 w{Y͏-=ZeOLmٷ>,TZ0zh/_ζm5jz:uٳ'*Cѹ߿f#Gj*Ξ-:`0p'%%캝=z`Ŋ׬YC>},Cnxꩧ,O!B!VBq:<|y˴ij0KVy7O1dWG#y:Emhҿbv [;M@V\*ǗdᥨD͞-|&Zz*c`;b6gD>{(G?ǤCvug4)|@{+"^4EQh\ן3=?p$_®Gpvk߽Ո}xwФ|뛕}<*u'ŝČ e:gv@ۮNOCjv&cgg34|v Y i~-Ciڿ4kdfcVޭ<~vmySy73F/HhԨYW_}[.$&&2m4Ӗ `Μ9t ???RSS6m;v4Ѯ];FApp0gҤI,]ӰaCٳ''Ox .]"((S B!B6ar֔Bq'86f._2q߳Rg%zFomy%zعVd瑘GM"-'z P,'=)z #OS}>ujON`XvUe吜}>-V7MN gWۿr WJ{7(o~"vW$''s}vC^'66///ӹÖr ŋEӦ;Z!BKi5}>FM]|[BTAԩX!#n}g6xݖ]put^.nU>6 P䂻KVVkP|U޻AySy73pwwݽ3JPs1c$B!BQeB!צso~Բ ޹Weú+;cРA 4B!B!D- `!BQcVُO(LmkBmB!B!g= !Bk+RXzݨO!B!j`0m6]N#22T:IIIDFFXBQFJ!ʟ;‰j: !B!B 9|`|[ڜ;w.fժUfǎˑ#GeݺuEaԨQl޼kkpUqBq#::ZAANb={ST_M,kz::F׮]m!B!bY 9ky+ r͛7_GՖZ磏>ѣDLtt4nB$ `!KNNfŊDFF]9tׯ_gǎ,]NWCQViؽy3{޴-ܹtR(IDD֦B!BOq|"G'pu3%~{p|"9'o3bOKŧ~ /}xYr%W!nٓ@!EaƍnݚXɓ' @aa!'N ,,">Rn4}ݰ5Ѐ^[b0ZΝ;?ټy3*f"00mt:qssìaÆh4B!B!j?VAϿOd;|=1:U}Zyyqqq)ސ!Cٳ''NwޕO!n'Y,BbGח+ٙ;^|Sްiw>{S|޻o2t7G ]#Y;ƌL:H6n7|C8qf ܭ[Xl~O>2n8 ,'|rB!B!fh [ߒmG ?\lǠAhҤI[:;99a0h۶mBEB!D-+$IIIEw #dTWNu0ר7 hy)l'uBOѸO(mI*3X~= qƢڴN:lٲT͚5 >c}%11 .nƐ.;U3Y/V|U3}U筷bɒ%X}v{>fΜIBBBB&[@ !ԦMѣej]={ 8c$ZFu\h,闒d}hTGnR&v#mh6Eϝ;ΝKhh(&L ++T>j(֮]СC)(( 22};uĩS8s 999e ]taʲxgXx1< +&MrBQ$,BB/_&..N:Cjj*:@׳j*4h@Ϟ=k8Ze:ר8wĈRpww7= iР[6O4ݻw3x`\]]jZGnݪ>!B!. urzΝ;(iӦ3<G}<7|رco!B!jBؾ};}vñh4vZ2dH G|wPU>Y"$$7&447aFm*KIIa$%%Va9sX666((TJ(B!B D}/6: Cjir t4ߺ*n,_cǎduJJ"55___Μ9æM5j8::k.z*|nݺѸqcj[!B!IC4#'_C%CYx,;©${)v-""< >ÇW9!NB!A8q目k׎CTX+̮}\` 'Cc:OOh:M&G/HhԨ麷7sΥSN( ,v| cƌ!33 2m4:.]"((bB!B!GCVsP I!ZSڄYqB!ĝp~Cۘ]|[ԎCuU)ZM]GmFZZjx"DEEB!B!JX|O(5ifvs4oٶBQQ `!B^^^ܹs̘1CB!B!B$,BQK 4AtB!B!BdނB!B!B!B!JB!B!B!BZBB!B!B!DtP!!( `!Gaa!76]KIIիNpp0O!B!B!'-BZ.99+Viv=""ӧOݻh45B!B!B |BZLQ6nH֭5+Jh4xbN<P>%3BZѩS'bbbHMMEC@@VVV4mڔM( ˗/g߿B!B!(9] [pZڶVqQkPtLU`м /g5iyFtz#ښYbVjq"%׀-/}nqjtz|]1XiyU>n4&>CoqyߒmkJU-B*BQ %//۷nJߠR#55&B!B!(9)4x:Vq&A27Cp=[ ́Πp8FK؟S<=Ԟ6,ܒqN#kfs^ne _9!Di$,BB7Vp<Ȕ)S'!!@T*iii9s Y!B!Tcʼn]Vy!\\'OqxJ'*:4gdr4F=cdw6onɠ5&aχ0NͧcSEOk ʼ.L2|#<ʴ^n<V }T|ؕOkؘV _ff dž/)=[^Z_Wbo@L)q !nY,B:vHǎM2{lrrrh4ak[!B!BtJ"/]̺QUn;fVkPӃ qOy[s8FKƈ We~  FPPP-+|fqcl5'Wp-}\)5,=V*d_g+ h oeT*Pbq !nI !0\\\j: !B!Bru #);״ );.~w,Okk5uEҺnV.l^+o\0w5c`#?Fn6%׀ {) jEA!+xf55׀;kZ}Q+uJXzvbd&Z!B!B!wѝdIE'ed;c1( U*+ܱJm\ިl>c;~;3ťI)$kC6V*žW\I3SQFœ d83s-$H3BT/Y,B!B!BZ؂k3:N^sNF%&池ބ-vG⸚g<<˵,=*`ן8UdgY9[+: S|;'$f\kT  wv6*.%?fؾQ7`p/ qZΨ׹|Vѱ.5~hjׇr*=[:3&̙_5!ZB6aj:!p~Cۘ]|[ԎCuU)ZM]B!B!*G)Q4jxǺVϩWڝJI }le /vY?- ^!J7 uj`B!B!B!?CM${Kg$=kE}+Z9Y|%g ױ]BB!= ..Je…8pbm?tO,)f޽\Ve-_~TӧOgϞ=UH||_~%99ŷÛܕUcs%'<B!jdVXAdddM7om۶5F,_:X|yho>~"73Fߖka~4:J{'⟸ 4Z#m־,qivIBB\tɬN.]x1-_~^Z8'NFa<#NlٲW_}>}:tVZq >s5 /Э[7w'B!BqBQ)ƍiݺuklK?FQ^J~~3w Z+(Ǘdƌm WI@)#cP~(+> mgI d\0<[IE!)3 F#Z}a&'[Xz|XY& ˃&y(\M+9 9+r+k~q&4?Y#i|NL GMW7qspbל|MPz* 6M7|6mcojj2evɀhӦ ۷oGT7d֬YÀLN[nwޡ:a5((HF믿?ϡC%,,&M/gn{M߿.\`РAܾ}3338y$vvv$fΜ @&M4huyܸqt֍שB!Bq7Y,B< IJJ_~J=ܹ3lڴIvZ^~e*3ftYYY8::2{l%||<3gdǎ={lzµ}dffrJڵkGff&J 22ݻٻw/˗/7V%**KRXXHjjjA^]VSmW=JQQU mY9S!J'>w: f'2*iu V6 X: VV:t:w~]<5zzLo-Z]K_*\Ϣ[ 2Wn&sf%8)K_s d;21ڠ NG ,0oH]ME\ȹӜYC֗[Y\试:-:Π.Nk:NͦIʵ;7rg1S_<oWw.;o֞Y?k(.%`D.K~d}+~b,ރ\a=frR9\w<:MmS[c]Dͨadܬ秦752e ?#cƌaɌ3*vJRRR55Ü>},Z-S jh=߱dffVY~=Jpהھ+4駟ҩS'Ν;ٳgIJJR^&Mx=<}W͈#o +WϞ={B3p@>c5k5j~~~XXXЫW/.--e۶m,X{{{FI\\&M2;l0eUݻn+effhhܸq%IzuT[:o}3ɴOU[kDMGR/TܫWbb\mVl܄S./[Z|Ο9q¢[4"bz|zw.i#{RV|`n`+թƌxf &'JKٷJufϼȺZ=߶]s3ϫ5Ыi[×>k 6=0)tAޚ^=+m$='LJmOM_3c j53fߟW_}*\]]Ⱥ~#""*-bΜ9u>L~dҒSNo>:wW дiSz- wTg899չB!ģb? B!l !sΑA`` QZZJzz:^^^dddԘnnnCΝKaa![lC9ŋT*&OlPΠjfff+cM^巀 Unݺzշ;v)Is64g]s?Q[׭΁#̫X[W /S_ o6m*^!,.L|',`}2m{Һ񠾳ocsSsJ"♁(,͖ÿBV[_˩x\`P]9j-֫kϿo/#;}j5Ͻs;&SRVjR9SR}_Sץƨ^xx8T*+tmm S”JƾN +,]Fj粍MsWPP@rr2̙3[[:ԗB!BHX!x aggǖ-[y&nb˖-3cٰaJ;zM6]1FFFFmU qrrBRKf̀JsN>jӢ׮]*};Ϲr"Ei?}US" ʬFTf*eUmI>>uB!BqB!cm۶DFF*?{FVeWX͛73b庳3̚5K9wÇ_s Ƚ{߼ys.^h耊CTTRڵk߿}ȑ#9p@?ȑ#UJƭ"xu`V/M ma۶uplm% Uҟ^w/,  m~{?x$]ޣMidn jżl@- *^?q};LnҚ).+\e8B:*i^N%5H8G<3 |F\ws )f}T9?-R~BmDY۽ol~,7ϓ]JؠԽvw^1}(*W`ϙ3G۽{w̌3KXX񤤤(A+EEECAA:t>c̙JQzgΜĉ  4^{tZjmر#nnnJcV;.>:5W\L~3fNG=Xp! ,P6?5yW 믿*>[N2cl|dff~z)vvv/o6C?͛7ӤI׿9rd*((\Y]}7^OVVXYYiZJJJj?77kkk0Fff&Ơ=zLB!B!cG^B!c w'Vˍ7tg_PP'NLJ*VGJ)**B(5j}ꉍek4-O>dAJƍU-HLLT%&&jrWf„ ?'''fff? 2ϝL;={_`լJǤرcYtii#F}|Ǽ[9R V*//gݺu^?>|'N`ffF.]0a666YҵkWIIIa޽&սzj>jӢqwwFg!44cңG:Pfߟ"VZŴiӪ{!v͒%K#cBCCILLd񔖖ĬY l߾rnݺ1h ^Jvv6j;##E777L;8q2zh^+}t[[[RRR=z" wggg6mDXXk׮_VR3f ?ٳgh"9s&IIIQRR¾}h߾=DGG3c ?^otޝ˗s ~i/_ΤI|DXA=VVR2p|-Ϸo߮Pm~qpp,,,ʊe˖4iĉ5)STI4i/"iii,ZSVɣ뫭7""sRXXȖ-[SN\xJɓ RDfffŦEBvvv[.cdjPZ!B!LB!c ;;;l͛7u[la̘1W)S`kFHH{~ॗ^2Ν;i 9::nķof۶mq:t@LLA.\H||řԮ=}O?Eӑ÷~k4_h4DDD0e^ KSw?M8~A_ySO=СC?aP+++9RmX͛73b庳3̚5 V @AAIIIJÇ_+%k*gl7oޜ/rӇ(kҿN qo^=ztB!B!=BTVV7|ìYYr%}} ݵ"99Skkk2e Æ ~@V关3EEE̛7Ϥ<+yT* ՖyWd&rJ+h4X[[3m4V0`[nqW裏Pʏ6n8&MDPP͛7GVor_|GOOOzɄ ,Y5k'UDi|+b:wӓ1c(̙C\\annNxx[xx8{sδl mժU}:jՊ˗/NTTÇ^xÇӱcG֭[ǖ-[Lz`Ϟ=̝;s)ג:t(JYYYʸpaa! //`oΏ?HLLI6l̞=bE@@Wf۶m̟?zkRF<>ƍy9xϧB!B!/B!czh:u 6oތÇGR5LR.]zR-(..n_#GI&զZȿx'\0obW=]2Z]uVFqFf͚;cѣ 0@^^^ӉO>ʵ7o0g%oc̙JQzgΜĉ \mРAkӪU+mFǎqssS;q|p!r `̘1t:z… Y`ɫJpp0_5Pr)EEEh4ZF7}<`bcc泵'ÝǍ7l[\\KLL$44:V^̈́  {NNN&$--Myn4hAdٳbU*>>>&ϟ)/.]_~mۖ:7 h4777Ν @JJ 7SN;_~R6--:uꄛFԠ{=:vHqrryqƱbŊZfP!B!: !Bׯ_|INNO>a…DEEn;wٙM6)֮]/lu¥K8}4YYY}l3 PVVי6m:+W?idĈ|QTTDLL ;wTINNbccCJJ}iw޸Wvw&۶6nܸJZ?O~X׫-oaӔ ̵3\@ҙnO͠ }~̘1Z͌3W_%??J>WWW[o޼ŋ~z>CڵkGllerm6n܈?INN...Je˖aP駟ɡUVijСC޽MO)..6XZ66;۹r;ƍWN1XZZbaaqvv6Xml|˽޿_篦+WϞ={g|J5k0j(PMii)۶mS󶷷gȑ1i$Æ SzRg͛7\!B!. !sΑA`` QZZJzz:^^^J [naff?Gi_DDsΥ-[AN &?3tPlmm)))QJS^^~/ ۦTٿkLḻRX:u*SN%//py{ڏʊO>kkk6nȸqLbb"R~РA՞}VQTޝ*ǷnR|*JJJ())‚XlI_?S2~coM//^DR1yd2wY~S ^vpqq!;;J;Nm/B!BhB*++Ύ-[pMnݺŖ-[3f 6664nܸ`W͆  V]r+VQ?^%ҸqczqܹS L-::yxxp5ʪ]xTfc s Fc1dڼ}6۶m#,,3gСCbbb 666pBYt)ߕ-[V{nq*\B!B'g !mZMdd$Ѯ];ߏ^Gr1$""+VyfF\WT+GU IDAT,?num%wwwضm[9r$/?VVVUVb*X7 \Rv668PeNc+UGWDnݺ VZŋYx1|۶mjsssOμyLچҒ+ O>iR*~z9BHHH-+ ɓ'l2<3t۷/~):[^zju~4 L2W(=J篒ggg5kZv_jٻwA͛7ŋ\r຅}!**J{ڵ߿cy^z1z:+B!BGB?Prrr3gsARѷo߆= gϞ=tܙ-[*1c1|p\_|#Gܜp%YM~O(Tqoy'L D46EdX~Z1o<ꌍg\3ϠRHHH+%˗/7+WW_FښiӦTҀؿ?ݺuv{>Z+iƍcҤIѼysj5ߟ/#G舧'={d„ Fu~,Y͚5ӓ*[4GMu7d`ժUΜ9S*b!B!BRSS8p &(ٵk'N$998::=N… X[[ }u899yt:gΜF*~ڵkj ƍ鉥erΟ?FQծ .`kk[}>%%%mֹ~!BTR\@w;{*m 9J.[B!AJA'{fBߋ˙8fn\ѩN4jt·WC[^~_0XYYh/)// OOφN|7J8::l݇IRVn>4i''z͑JBe =/Pܫ{YZZhpppiW>|xB!O:~ j 7~uK!=sYV !ӣ~+?hZvxbo_C/B!BOJA}ۄB!j: qX kB!B!7B!Ywj5tB!B!y^!B!B!B!BAYYV%5^^^{!+)*c?ykgzOsA|;+^/6pOܹs "((2ݻw*VڵAuA8qD,-- ݽJ/2|.\@޽2eJ~,X@.]n!B! `!1KTT111ʵbve?郷q&|Hϧ+ J[40JY#3.iܼq^>bN:ݸg'OdŊt֭N;>#zְK=رc$$$ToܸqA>lܸ.]:ԳgOx t:kC!B!B!czh:u666?^>NύBTf*@e2Hזk)/ piɿrFf;ֵl,nb=\)@lSA[|윞e*WoĪ MStFM̍m Z@߶m5$P]^/ײ XӨqqtL篬kphfWm>׿9r$M4,RhтbSiTii).]Zm4^'33FcЇ͚5 '$$0tP,XPcG2c zU%͔_VKIIIbmm|ޤ:6ރ\y#3їئ7d6+gPϗ|y~3g :UWw^Lft mx2vhWLKyS6ߔ4j҈fގu+kK E%ޟmq pg*/ T^?זm:MyCwgOfoj|T*//gݺul߾>~{j];ôijy&xYh...\zP/_NӦM}0cƌ????khܸjСCF&MxLMIIl=={lƎ @ZZ/yyyܺu+WkJC a͚5Fƍ[n/ !B!#[@ !Bׯ_yO.]PՇex: o3#Rں>}{V։xfo+_;]gâmwGs Cz3:½ _ -0N)іW|+봦ݶzW<ٛ+.K, B:r0>E~M͍I ¹Yuo-{Y8w4fĚԷʛ2mtڊ[;։k݁&ܟM;hfrהkz KXO4jl+3r#bSѣG)**" JZ||<3gdǎ={lziL¥K8}4YYYqB!B!hX!xLҧO*ۈ-//3lذԳſo;LlqnOpD7 nrbYzӟ| {5e76'+-J><;c +vNO1+ <ÙC)+)ŽkOL2/^e{L\N.Kw:=vc933(/<;;=8|R>`Wnu`@yk'NNv)V51k+oMx~f\c׏o^4o`d?Vut/3,!xd7#Ч-ӞJYcJWu͚55 ???,,,渾INNbccCJJJ|Æ S* ##cǎuV||| 2Ξ=KDD +ݛV\O<ᵯbm|2l۶Mٞޞ#GǤI V7wruu%++ ^_ A7oɤ~ !B!HX!x ;w IOO'//RQۇ7 {7jF q+vbΞ}H<%Z- ƭ3N6zJ l툱47;.QЀskl, >^.cUptLJ(v[=vt3.WJl>'J+c}Tٿw6vzkJeZ[4zv>N\g.,& ( .Y:dK嘚wX]cdy[dYڪ]5\R]!PVPAPxC{>纮^A_sWy~>L&S1U2L:U6mҽ+777RJײ*'&裏4fqO4aݻW3'SJʒd233+խ]LCTZZ*www]VTPP .hڵƑDhӕgVyέRRRjGfd2]=Zl-\P$CLF:8TK۶m%IqO:ZPv|{P<JujWqxbWa4gQ' @...ڸqc51 2 ɑT]vOK H͝;WڱcvQe~iĉV7h }':|$_T||ڶm:\}mW۶m[*;;⺣h"Il2 4ȪyJҮ]j\[>~u޽3?7Yp֗zy}\:z?EZԹ힮`%HN$uIS]VZb\ܜF~:q M>Y-=-VW|L&}|8RCc[߫sŊfhyy;%U5c.j ?Eߎ.5?c޺sO_}c-xЮ5ҏf7d]jmIտ?yk7^򿩵.Z9Wϡ|A ҉Oe3ǵ|kر4=ܣ^QQQ K{irrro]5{lEEE[n7nջ,^X۶my 4ծǎ*9r|Iz֭4eܹS?f~={رc&ooo}w}W6lP0M6ͪyJRll|jrgؽ}Hrjسc"nvh:unRG' N 8.W_|5wi.O_jN^Nd}:m+@=xr;bTqkyYy(-.ә5d%&&ѱtIyyyk[̙3:s挂egWF+::Z'Oք eGUXXbuEEEJKKSvԼyf~rssSۭ[jڴiڿT\ThuPk'&*  ``;]Z\ugZޠA,njMD[:Uw^mݫ-squk+K]Zȵ+lھJmWouh1TNz$h|#44T=~۷RgMeݽdTnx >ܢNbbf͚U䥓S^չm\\ϟ_mr\=$ɡz "ujՖ|Gg=6\]C.4xVmU{Q7vVu駟w<{W-ܹF]1|||b }С~J`^))̚5r*b hTX^m`ʽ3>[ߕU[fggW[@a hkϢm}~W?9u|n)))*--UN,)%%E͚5Shh\]](B@C`06.++K-Rll[?Vnn4o] GGG#EFFi]+s}ѓO>Fd2)&&F=zɓ'KKKi&M8Q:ud;Cݻw+**s'd,)Sq}4Ǫs!+OZ8y K=#'9:[üy4~x5o޼gyƪ7MdӐ+//WyyjWbeee) @b+?..NmڴQv.%Iׯw]``X ۹s|}}oqܹs*++3_7 r>$8}z7|^sTBsyda ] {=M^s81iʕ1bDK* @nnnVAqql٢QFI7oC￯Twʔ)Zpc4S;vBMiӦ)44TݻwWDD$I /H IDAT_|a?vX=裕ik5n|#Gϛfks\H`eu]\\\$I.\)av=q$) &D~r$T+rOfkǤJr6>L&urvo= Xv8Bu޽R㕖1ch4Z5KgϞUҮ>==]FQ%''Wb V'o~d47hĈڵxbת}YYݫxedd\+V$?O<Ǐ>PBB~J4kgϞڲeK[@`VZhsr-[Tfyɓ'6[GomܴGu.zNKQ3ܤಭ:~nnV?Wf͚{>߿,bKg:;;駟ȑ#թS*Ϫ]f|||֭[)Scǎ4i/_-ZX{޽d.[ /hȑ:}6o\g1e}w*11QRŖ 5_)==]&Iؠ'N(%%EZRRRrrrTRR$ćzHNNN/taEFFٺnH<;$ hsTz2 _Ěd{5YVVV6\]]%Iϟ5o<ͭR~)44A {{{Is= ͛74i*h[1/V`JKKkJ t]V'OZl?6˗/W6m*FJ#"#'jhQ`Wٳ*--mU7orbIvu%rssSBB%I54hvZ-ZH3gȑ#5qDEGG7J?}ڹs.;;;+!!A=XS"C]gsj#""B...ڷoz`?^Ǐs{QFi5j>T_~ԩ>zDl=;;; :TCթS'hʔ)Zn]… /hϞ=:p oRM5ڵk9Y T07kƍrppdҨQa5 fr|nalq5vXVJKNҹsdooKK.Zn]P_]V6mTPP~X#GTرcܹ7ڴi^=˭n{׳>Q-ZPTTf͚1cnc5~ήTyYn7ndWÞ[qŵGթsU8:5Y*,,TV`Š ,)79[n.rl('&&jȐ!JLL\E&Iǎ|}}+=zTaaaJHHXA~uVM6M]=*K nsp:dyЉ ~kCpsuuU֭N^<|-+Iz/4ue0tM7U*׳fͺ&Wõ>8͟?/j `kmp}\+@Va0`#Hph@HIIQAAŵ0 999:|ԵkW9::^0"##%IںukuΜ9~X4zh/;ͭʲiӦ)55U;v:O?)$$DmڴS+IMMod_fjq[Ei&egg|-44ԜNNNG}]ٳڼy{15o޼BXBǏSXXNqqq NQQQzJZ|8p@ڰaZh!wwwedd(88j2ekN3gΔ_7_ݻ_ZB>}TY~zS#Fh[o]voQ8JJJ!wwwyzzZo^EEE'""BNNNU=3ն վ}je6lؠrpp7|_|Q_<|͞=[TVd2)==]ZEhTqqqgeeEjѢEuL&Ui q06h4*77WtIuEdooΝ;رcMuoϞ=jժEw 2_~Y>>>4h5a6X K.U@@d֬Yz'-YDoGeeeW@@|Isym;#///_3g$)!!A2e>裏>2=zw=z_&LPIIEK/oYÇ-L2E .lX06lz뭷K/iӦMϟWyyӧO]Νkh_rZn|mٲe=z*5x`eddȑ#JOOׯj+--Mcƌhl~/5k+MsM0Aݺu$9::VtUJJJqF͝;Wkz',3ƼMzU0YF>>>u#X 1> UΝuI1I*..SO={GEEEC=իW+??_~գGsԩS5zh߿_999*..ViiiFlKg;V:u`3qʪ|5+ݿ4թשּׁ,L&*33R+yܯ_?)!ĥd$QYYYjݺ$+SNPLLz衇eZp233ICd2YѬY3L&L& m]Za{͛7߮~LLLk6_)))5cggWK_0 R6m$U|.O__pmc06XIIIdWrruM7I t;v\JLLT׮]2C= j͚5z  rrr$U$/WZU}@@\\\qƫsC?~vU嫾_voܸqZx%UgQm۶JMMUvvuGGGEGGkѢE*^l duĉn40`JKKdQŊVxxΐ!C_JJJt-Xzcǎ_vmСfϞ(d2iذa;::㕛9shƌW^:uΝ;'{{{-]T]tѺul~M6{?(mV9991czmӳgO;VaaaW_}USN${վ}{i6mձ;vL;wl1޾tKdž[qŵGթuWG'+)//W^^F<==eoo_dҙ3g䤖-[ZCqQaoΜ93g(88bKJKKS``mۦ [>>>^ZrgM6L'Obk$''ͭNm=0%$$($$pZu)ډ ~kCY b`0*E///yyyչF7xC?Y~JKKӀCSsppPppp sD͚5/ `넗bbb~  ~o>|xS{I$FبX\ `0|Ed2)$$^ڱc S֫/@ݐFmڴIrss3_ 5'o߮oFrpps=W>uIƍӭު={֫?H`âԧO*#Ghڵ]vj׮t8f4JejѢEࠒpe_^7nTyy4NNN\$QC씘KOaaa:m۶ׯ|||u<o#;?CCCչsg9r߿Ο?K*55 .%ۿ/M},6m|}}%Iz7gEFF^ 3aFQ*//Tv)+I͚5S˖-u…~zmܸQ8p PeӧO+;;[]v `lСC%;;;%&&jҥSXXEb-_\X 46Gvvԇs:rE2-^X8p`S h@$A\J_R^^˗EFj 06XIII2Ld%&&ꦛn$L&}WxF)g`JKKdQŊVxx|Ϟ=W_}ܮgϞ$f@lR/d4){{{syk5a@egg'OOϦpq0`#H  )~=nl0pCMx>06*%%E /d.޽{e$22R۷ms~h0!84uqlڴIrss3_ `$XBnnnruu޽{aM>]NNN$ɤiyF.!Gch۫\]kS^Z#QQZn:ܬe0ĉz駭*gϞսޫ͛+::ZW=_U ._F鿱(""B6lP- Kl٢[o^KҔ)SԮ];͜9S~~~V7B}wS```zJZ|UM|\Ɋ+tq Ύ!И76,**J>u?zu}WYY'"_yVAmԳG(56Jbk]SɅjd&re%*8W/IMMUIIIe999:ŵÇ+&&FC͕/ jR%%%ŋUL&cTĉ*))}ݧoF׿OMJ7oƏ͛iz6lؠѣGo?Q(++NcH &h :{lc\z~L&eff*###"""ԦMJCCCT؍Z>L&*߿Nc_RRR'Oܹs5֫OMMv7o]~z 06h4*77dx/`I@pg~wiÚQmzqedw߭v) @6۷O]v?Au*{Uxx|}}ѣGWJ4פLyyyy_~Y>>>4h5aZԩ߽{շo_uUwy^yJ9Rn:$aׯ[o^zI6mT~1_^-RV[N>6~**3Vz} v:n$|S3G ~s~ddd(44T'NTvv}IRQQFӧOkzw)Si+..GCdhΝ;Yfz'FyeޥF꫚1cF~m/++SJJtɓ'[+Pݻw~=<fW_}ɓ'멧ɓcQg޽:re4[ܿ˿j*S_'OVNty{{E|FQ-҂ DyyyIG:qℎ?-[yf Ǻu /觟~Ǖ>}T6x`edd_U}EɤI&GUrrbbby*WS[li@85tPyyyNZt,}-//WiilN0ɽGGW^a6OζttsV;*/Te*7)쾊i;\izf#SIGWe[5t~*юwtS;ir Ҕ2)r=qBa߬'z7u1}JMMTqɓ'5fW]tl.c%%%iԩ9s˵zj-[LϯUK,$ƪm۶UiӦ{= YF۶mSQQQ$yرCSxxqpuuܹsu1}/ny IDAT~iiiRf,_ٳٳg+""B&MBR-$U$dz-kVSZnvءtL& 2-njcs|taܹSr:uqז-<&LnݺUQmСCϗ*իOIIaI[y۷qXc%06PuYGH($$D&I .ԦM4dtVх"V7tt=tm:fNnue3(^ig$I[K vyQ#*Xf.5BvV+١5Mvvvc/P\rVܾ555ʳR&Iر$N۷uEԪjREBRR%((H Q@@@o(vTٲ)I;vd0aQu>}r}cIMM`SO=eq&88>}Y;wNΝ;x@ ,õύHKK!M:U6mҽ+777RKgV_gwxP{l ή? ڶmIRx6:fJSY ئR#{`gu|F_EͿ$)7ɤ9ybrE\]%K=c̯R'ԩS,^7tޥ.\%|dRJJ<<_pv҅";UIqŘ-Z]PXEEy5sY@d<|պkSu>zNKTrXl$+$zb;ѐ*βsgڿxsޯ}m۶ZxBBBԮ];=zTV^mwi%𫯾dy{{k̘1W,TPP>3uUJOOԩSkqo>5JӦMӧu9mz322㏫EJHHm&g:~}oWUm!۷Ozh=?,[LO<5k>Lw},((H۶mӤIt)_^ӟd֭[kzG:_mwޭ.]IrsskտSN駟Zz>ƍGU.]d4k.yE7nԝwY  rrrCiժUzkվm۶ܹs[oiΝڱcJuwՠ[Wjɒ%*++pI[~W*((.^ݻ]t,崒ӲtH-[|]:6%oUkJS3^9S_>o}7CIRȰ[uk sv".S=󓷷2224cƌZ',YO>D۷שS4`Jp1uܹNm{?(n 3f̰H;::㕛9shƌW[gVTTd2~/^x@nnnѣ Tbcc5o<3޾tK$jسc"nvh:uGg^yyd4){{JuUTT$www󹬵U\TX8~/79[n.rlauۢ */{mk+++KgϞU۶m'''ͭmHjӦ9\yzzO>駟:TJ>}Zeee-ZC_.!C(11QWޞٳ*((Pv*]xQ rssvɪϟWFF|||[_.--Mڶm*='񊍍ʕ+o߾J=0%$$($$ĺҵ|ɓ!9sΜ9 h5ydM0\uVM6MshxEV9~8An vh»Pa!anV`쮘 puuUGO:yknZ[n*];tP,NNN58rU^fT׿54P=c_ ZZjVZUYRկ!:ussK9rx ^`4`*'&&j֬YAҷWXz233խ[7)>>^Ç*  ĺuVmȑ#Sc͛kر ^c=M˫gi5>|x"W^s=ztъ+_C+}i֬Y `&VZZZmYmV˹hŊM6>kW[P^zi ?Mghh<|l_FZXX CuKKKթSz{9͙3GGճ>z بM6);;[nnnkYYYZh<==Ν;W[nܹsVG>}T[n2=zɓ 2ѣG5d~ 8f4*w)___7ؘrp;fYlqFk0`,??_[lu@NNN H`*///)11QK.\UV)::Z... 2^||6oެ;wjܹ ':l ]ܹ9"I:qRRRԪU+%%%)''G%%%JJJhx۷oҥK:4<GĥdTqN֮]k*>>^.\ڵkUZZZLm۶ꫯ*lT\\tuAAJLL< I QHH~\\mۦӧ{:(''GTZZ%KL*..Vtt}l;;;LFP `lP˖- /(//OFQ~޽ջw[))) : kL6ɓ'믿VHH6mt4(eff*//OMPHF! -F¯_iχ: ٻ/Y"AѰ(JAVRbZmZ-E[mdׂ({-%A !!dd,?Fl̟?QXDD[~=se$%%չzw^ݻ.nf=/!kQ:GBwy$e}~6Os3i0/)dݯ?NjG#ؿ6o\eݻ9|0YYYX,?9fSb`X?3233 _u|DFFMVV!!!KNY,,X@bb"%%% [oE>}8vl޼ooJtRjGDDDDDDDDDFK@4Scƌ!88HZZK,!<<ʯG%p:ׂn%}^L%edw6z6j81on[cJN8Dr f=1AO%" fUZ"yʔ)l'dΝ˫(}S<֭[֭[=W^yR݉':n߾㸷7bۗ*fBCC̍"""Ih8|#[[fUyY@;qP^P^t #c=|ۏ+W2gΜ*ˢfq 738f̘m_|A`` vK-ZĴi'$$lx l^x1>:uԩS >;RGը%][6^l"""WCtؑ4"xnQkJQQ@T^SyKq\miii=4j_ Cun[^xSI9탫,ONNfϞ=.}YJKKСCsItRBrssСuj[\\Lvv6W#GCjj*ݺukEDDDDDLeun~(];F+,i=4XDD2ֻzͣ>ʎ;5K6ch۶m߮q堠 ,gW[???-OKKc̙J\AJHSO5uU9sfSФNΦCDDDDDDDDY~\Wi&f࿶CDDDDDDDDDIh`fĉ:`08^L&RSS)--%88(ܮv*irr6u(""""""""""W"""TRRyyy9EGG;жm[G`` mڴiEۛ" W/_'ɯ'5QwkDDDDDDDDDD<%EDD8\eٚ5kر#sӬN|ZG!kZk_pl d"77Jn8}4V6mc6 f#33`|||| v),""ҌY, h4:[VRSS/IYYu\D|,R?7/BcqtZͿ'$->ɲ`ؾL^K;w.[&??c2o:uԩS >C s= h"MFxx8!!!dggOyIGU6""\ʸ] IDAT"""""""r0ڲwzRn&W_cǎսUnQkJQQ@ܪ'uT^RKϧK.NKnlBCCk]ʹ:ҡCȑ#ĐJnu}~&P*F;;v$5VX""{viHse4 ^}RYppp??@@@@ڦ1sL%EDDDDDDDDcJwy'wygS!""""""""" P EDDDDDDDDDDDDD䚤H3H3=EDD'NPZZt,&&@EEGԦe˖tj(""""""""""L `f*))<Ǣ1 T?''ݻ+,"""""""""rSXDDc:^f?ү_42,""ҌY, Z5۷o>>>t&""""""""""Wf4cׯ/j2b^e;vЯ_? UPDDDDDDDDDD"""Ԙ1ch4ƒ%K'&&Ʃޙ3g8qbE*""""""""""EK@4S꣣Õm߾Hv""""""""""Ȕq1|9BJJ kDDDDDDDDDD),"" L&222l?~4vT/55B="LidXDD`Řf0L 6ݻ;۱c{ã"ƤH3ԪU+^xX,Vӛ :RiF#M\EXDDDDDDDDDDDDDPXDDDDDDDDDDDDDPXDDDDDDDDDDDDDPXDDDDDDDDDDDDDpoDDD8qNbbb0 י8qaJ0U0ҶKw?5{6b]?Hu !^w_<Ə8{׈'>>6 ݝ+Z:pfرDDD4uH7ޠ_~1C `f*))իWldPXXHFFs!'' #>+2{7u(Mb#}µFw#yur hS`d 0N.]Gnz"kZ۶mo&33}~ <3f`Z:i ix\VQn wOj_Zp77Zr=w?&5WUKlN1~-q|5=Wm &3%gƿ6?̙ԩS `"((N:Q^^mV… dggO```l6;px6mpwoY~=Ǐ7hP&\"""VcӧZiӦR+1U[5k"""͘b???F?qs ssskՔMxٗwAX l60`|\9ZZDD[~=se$%%97$>sJJJ2dHEp_[leif &GYnQt @}Q2?yv`?ӏ8{+ذZl0}'jb,"Zs(?'/vZlX6L/lg?}t8^ǝ |}ƥw_ΙB&>ȮuΔux` ۛt w[Fq9GL5c{\mj?>3m0C|t ϧ_S_@r:=o:Ŷs9ke%&=ܘh sK3/럚ݻ2z]lݺu |WS@vv6&++{9ձX,,XDJJJHKK#88~3|A?Ί+شif "##ٳg+~ݻ9pXV.](뭷ӧǎ#==͛7 ٶm[knٲ8ONdd$deeK/T\ټystRZzKDDDDDDD;GDDDj4f1dÉq1 QZZucYI9I(p!23HN'vhWűc~l<ą <:[Ù1o zĞmŔYc1`M9IɌWںrsO3dwq۴Aج6m:¶pt F7 "2i]'*ܒ]B9% xԜĬ+[zF$n9|j{5E.8cU& r^ F!H6 =@p;r!ڟ=eO\sk6!5ŻgXW|nLhŋ?KY`ҤI]7:f9GDD`XjU%}srrkiL&IKKk׮=ž}N ֱk}iDF@k ɯOvQZ<]ęB>{K:P[Xͧ@vz{۞p lGɳ|t'Uful[Va{8wmٲ[d'%4]fdӲl4eu:w~eˊ=?w},Jn~\liaM|l_WWhhAǕ_S_n$(܏qFα|&jү}}?+qѫW/|||o+{9p~RF"""/Ծ&3g{\ Ao`Zٺu+[n!CϡC5km۶tܹڵrRݻwϑ53fFܹs{01bf͚b75[;w8˷>'"""""""Wf4C,^ٌ&aÆ9fĹ3qDeL&AAA'?i/dpcd ! :ۍi>Ɂ5+=oB- mokhW|CpS h42a,}e ~7Ys6"6-?f`cr硹?aSG_@aG?zb6?Gv]Ȣ߭?d{ À1=ٿ6߯mPzj ?k~Mlg k$sVc\ nx^cYZ/>vƤIXr%7tSرcy爋m۶9s'x:xyySRXXkOg2 p ^fc0.?-ZyU*.E+*K)?w$.?ǹ2|<mھ&?W+Ͽo?3u f}qz ʯ޽QǕGZZG&-- /g6&0"??|td}]X,6lӧOs9rRSS֭[ gZyy9tOOFlBCCk]ʹ:ҡCG,<ٳ""""rm2չMT:wv:vH{kDDR `h4֚1N4\Zz·Iv AUx֑jעAm= \3 n_m7>5'|[[MO{W+Ͽא_QC[q&!?q+h}Qv4"wwwtqmv]h˶m8pwyS4fΜY䥷wސ3\@@@@n۶~uCODDDDDDD%EDDDÍF zq\be7ţoSЮ W|j>+9shӦ {Fczgrte?^voocnuܓK^~k@h{O;aWzGV~p״s6RS ~TY_x Lj#: 4XDDJJJb$'';l6|ƍ,Yv;Å 0;WT;-eoNvSzxI:\e+xtW59l|R/]Μ9L:6ӧOcZiӦlołT}fI6mK˖-iٲeuS`1vByj+l.hi\93! ҌhW. K &=Coꞟ+*.@n6_`ݯoAiZ}TU|q%'9/_8qWW7~|ZG`k5_%]pl zL&rss[+1U?@.~si|_Q8ZDDDDDDyJ4cB0/-QPPl]v ڷoOzzugJ =l^o>O=bt3$~-y|T:Ąp24/OG!])>Swqs7g9oV|޼*]ӝ6!HN7ȇL's0{63ag`'XC;Ef˗qJqƌ1qAIMMo""""x饗Ν;?~<J.]?r&L̙3;wqƱpBG":>>Gyıd>7sqM-7Kz_o@xb/I=|~!xz Eyƞ{M=Xa_jOSsT1 o_"B.sEz*vԒ"a<}HI`Ӻ,N'kù aX=[˧CXH^[/@y0T\!%0Y{yʯ쳽/WS1ٓ^-RjߋP; UFm̝;֭[رc7o-ZH||<]"ڵkGrr2k|Xh<@yL&o&L6}a0ի˖-# IHH`РAX V+)))DE] wu}Q x6v} IDATU%EDD3w\fϞMRRYAΝs+))X~䭌Ͼ #QӁذa878˫/;/*ЇIM ~B_lw2鷣 jbZ=UD.a݇j}MvZ3鷣>80鷣enk{RVVF޽+]ޢO>;vt6oތ7ݻw'33 Eff&fc㣏>bӦMNG4hYYY|wl۶y9ffłbqeq2t3(Ӟ{Eشڹ>̆, x{.) 6|{y6V[wZ0;v4{66&~?};7eac=a9L/@ڷ3ZaKe }~mWO?zRl_/~ bc.o/ϥJ}~wN騉?TSyOh_ <9r\?A}kP!deeqA{1޽jeKSʁtbvϭuⲞ͡Cؾ};}%22>ɓ۷ 6X.K~=;ԩSYz5կ\Ε֮^U_guK !a+L#Uq~~ '6ƬMx018^V7~hۻIԅAXssm;^Z@,XhwcGùhk,=vm}܋j{~u>5_{ajX ?n_ Ϟq=3V.CűgOII :S:jdd\ xǪÆ cku;>6/ ]~=;US_8f\;joߞgRQQQip@@$&&BBB Nh4V ̙3sss޽;`O  rssc$77)!)**r,qZZx=%u=.!WCs[,?$jX{p0j=m=XהЫ. ̥ڗnnLFu<|ؔsώ[,D{BҷO%~WQS5 cWOӯa'd76~x~:]Pyyy̟?gYVsll,|L4kײqF$""ƪU*-CONNUE{4C& /?NZZ]vu)((pzRSS߿{V7G^=b*૥;jN8^.qmd'7SzLJoRٮ](/ֽ{w*%ڶmɓ's:޾}{w@jj*_ˋaÆ`㏹u:vhs)֯__)!Cp .( g]v`%h~Qy+􂛆?\:3&]a4pi%gp㜿ۓuS!{]ټgUrU`>>[ v#}dmmϯ7⯍ۥﭖK{eteO^M}>_ 3gd|YHMF8^uƌȑ#;w.HXX#F`֬Y}Ջk˷:;wqoS|ȕH3TQQŋ5k/2 .dذa{aټKG?M>:c{3Y7 h=ύr2%/0מT٪c{.d߯"u͍I&rJe/uݛ:п:gҤI¼y{c̙3]2aGwy 6ЩS':vHLL ?|ƌ|tܙc:fr=zԥe+ ;bu<3S7|/[6m`dGI~C3tct֥q7 `}W}G퉿FO;c`p,<#w"]5>8- '3)u?< ߺnL| '..XJyȐ!xxxУGƎKVVgE8x о}{ߺ+WrV[T"""""" l~DDDر#iDFm-U/ֱZaX iZ+)):T^Rׂ%<>Ot۞|3d[4FMZZ^^^NedggZRU1dddбcJXy>ȃu]s= """.+VK= 88(Μ9ëʕW^Ʌ ؾ};wmoO>2l0ޑ',""6l@||< 7ofL: /=zt拋#..D;H""".j5\Cuu5UUUf3O+͍N8ݩmwi=qAeeef:Dzz:f(|}})//jHff&aaarԩvMEEEBDDDDDDDDDDZK;EDD\P]]%%%̛7cXضm` ~b{dVXu(,,lw<q """. ш;III9s^d⡇"%%1~xIKK#;;DDDDDDDDDDy* <<<0 EAA^^^  o܋-b̙[C;8O`d4IHH`l6, `0+سgVR=ʕW^󈈈 HHII_gXV"""8q}ʔ)k,ZZ=fhtEDD\TPP?0xzz_[n<Mn=b!77Cڠ5@XXXz)bbbXDDDDDDDDDD.EDDC̝;SN{n|||:$-"""#vHhPXDDDDDDDDDDDDDEhf2ʢPpsssWUUf]tEDD\Tii)+Vח=zApp0QQQ޽y'8ci/EDD\Ԇ '55Р}$%%q16nHG""".j5\Cuu5UUU ىHg`TVVlСCc6;׷N""".͛X,l۶3Τ ꫯh4NRRgΜDDDDDDDDDD3,""`0tu*""""""""""r,""₌F# ߿͆b!##xGBMM蚚L&SW,""""""""""Α믿ŋZDDD0qDG_|oz…DFFwE""""""""""TqQAAA<鉿x.NDDDDDDDDDD: """..44SDwEDDDDDDDDDDDDD\ """""""""""""".B` H1LdeeQQQAhh( P]]ͱc(//'$$8D҈ \Tůufjo 7 4~ȑsVc C~~>6m⥗^Ssx?yxNv=zG9x(8ys,sܹ6ϽvZ}YӦr1w} """"""""b""".jÆ Ǔ`h>{l"###}c'_[ج6.]`4ҭ{cyu̦:lkʲߧU9X,gѭ{^m16 XV~ڻ~Q|oݼ[,`0/e(7aw*Ηw$nnx=+c7} ybՈ 1:۾f?K77&猝wSL92`45_!зY٧>jjMUU3k Sk_WJRR"uSsڬ|i =Ԛ+w_}LL^<#N/~߭}`?.Tw2;8~,q.ETT3o8M=э_?눝wgc@R~w*;sN,X@YYܹ@yy̘1 III[99Ǐ[oJNʪUt:Ƌ/~;p?8s*馛pww' p77>++ɓ'SSSCYY111,\qAeeef:IJeXt)+WrWyM;:33wB~4'1(II {-a6Vc|o?׽0!wOrֵZXz}wܓeSRSZ-`H|}NXώEa/ MΪ ){j96+>&Eޫ8}m|ɫ~[{?vp/  _}u#5,} XՂfb8Mw`r>yf%'W63f`GWcdA侺ǎʦwљYȽɷpgٞY3>c{mm$, owFzz:aZYvgΤO>呛KXX .lU̙3#Gӧٻw/7mĂ O8y$ϟg̘1Nŷl̜9{wӧOzjo^/~{êUHHH ''iii >я-"""""""}""".~ nII jj*mFJJJ&L4ޮ0Uqja8^\}uT/ĦC~(nQ1_Փ;>zwsM{ߣȭ)|w}3{}Ǵj$>Ƒ'X]/Y٧٘kU561m3&-b*޾o`P}ק'GWo=%ϯ ϟiOw@J*Ed90(c %l:=;Ņl6 â_s*wִi;GSM6grssO¢E:dZnʒ%K bڴi|G<NXz5ӧOgРAxyy1n8ggg-[4hGvn;?ޱkBCC;dEDD\HxWc41$%%^l7 66I&k)cر_/77[WTOnu< ߎ23(-xrn{{?˶4b~ꨳxim^hBSm+(f#2R1-2(Ŏv{N tkvQ>^]R/nLhSwy>l=T&UPP`HΟ?th[K fln """.f VVZֻ IDATsx|~C#.Gwl&Pa0ً<5e !*Ηҭ{:3SY~, >;u|(n}ihy {aٓ4FsE ž}VVYwPΊlF1 ] *8 }^KW^{N!_0ZvLh6Hxcߟo~:;^111?lnDEElkvGGGsFZa/9>/((`~g6۷ogٲeVh""".h4lX,222wy*~P[[KMM=D Рm/X7s|AĄRr2+cѿ_:ڟ.kzGϩwVwz]t^@W9{>qzd5u->!mQ:zs%ElNݪةy<{!~+rWUfF&_5U=07ۺS:ziyD`ꈱ# ˛{M=`ҤIӧOϲpB.\<߅=ʻwD؀hFnK)C{bK?7bp*E6:~4''׌<:l7?;GRu=w ?;o!l$^aAݽ6tdRDDDZ=;HLZ/OVqoqq1jjQSVEŹ< i]ͩ/j:JWUQM)XFRFb4zvr'IeM5q2Ր]OߨXܚ8J%fy-As5fX8C|x^/HA£nOs;JKK)(( ..oon]fӧOhfN:E||ok;Hj[=Y7k_?:*-iC`Wܝ;@Nos ,GG_LJpl _/o͍QMLJG5;'ȯDPPwLwio+wwwۦ߿]g??A`놎nڜgɶ6mMK.gyɶn˘|ש,"""=/hy7dرtKǛ7o^m6-""""""""K`)H;"""""""">>TUU1x`vk4eTqQAAA<鉿- yqEjjj ӳC ٳKDDDDDDDDDDZGG@zo'<<Ê3gdݺuߟm۶uX\ivH5jϟ NGDDDDDDDDDE`]q*EDD\dر,,KΞ=ɓ'/sv?L&xSs:ˮo={8^o#Gdȑ;M";;xS_y-[Fjj*eSDDDDDDDD;EDD\Tii)+Vח=zApp0QQQrJo~Ep̚qq kZ!*ʳL2ѣG7n2((( &&rf3!!!^ٳ'555MR ϯžlo oS>|'|qƵi|KϿGbb"ǎSXDDDDDDD hdZkF۷H/s]s DG:چ} ɓs#ǎMg͚wGuSZn%=< 0`? )eefwL W^Q2nAw79 ܹ0x`(++sqҿ )--?2dL>Vx eDGGs9چ]wEBBd\x>vX֬YGuzSCAAr 111яc\QQwmdffjeڵ3gҧO%,, j̙3#Gӧٻwoݴi ,O>ɓ?1c8f38q\***Xtivb4ygwc֯_ONN[?;l6s&K7N-""""""""OG@:JJJ7oVUVm6RRR{h„ SV.`H elp i؋3ftgs,\2gڴi":SՎm/ٷ"CvO?~<111:#33۷o>,YhiӦ9GS(,,dӦMٳ\l6SLaѢEN9lݺՑWPPӦM㣏>p*ի>}: ˫1ӦMg̘ʕ+;^M=pn}ɬX=z0nܸN;ZDDDDDDDD ꫯh4b4IJJ"==/g2j(N:EQQ:uxܺ2Ncbntox߭l.ӀtiGgK;73ܒIIo`~|flnqѻw&--$zh 瞞L&1 /*W4PWwH2G(;[Z_P;˗c~|YɇIN[[.m߾e˖5:U5kf"))wyᄏ1_f6͆fk{p DEE7 -ٳ),tdwQQQc>؏qoho|gh"fΜO?ݖ4EDDDDDDD`d4IHH`l6, п̙?~<̙3CN]O> vdfV;v-g|jj6INq{kWղyͻ[Zߧ^ƾcx@_ܜ 8GkwՋV`ҤIWLZZɎxvsؼys;Y@qqϙƌ7;SUsH,""RRRg,^ĉ:NOYYYSRB??Q2`~"#weKI>}л^bc`c:|0.Wd0vl෧hoi}oOd<qq{>ܟnpގd4l-w_?ݻHll,_fڴix{{7ص\K/Ė-[ٳ'$&&2{lKIIa5ɖ-[۷/{&66c=h{yի)));5E~\'N]t&çۆy |gICIت8^>N-..VSMu0m>]Ch QW])rTVZj0Jvv }MB^pBC=й~_/ܦ񥥥)Ϝ9C@@mo69}4b\uƯ7kJ";;}v1}WpϞ=yW=~8deeѿ6/"""""TS1'ȢW^QQiH;dT;EDD\]hhh Ӡ8PFF|;Q[_@vY`̙[m۶V vTy|||۷|}}IHH;;"~S{ꩧ!&&ɓ=ʣ>⯈H'k""""u߶3Ϝi-!n okZ.gԨQ?2||;ȟy&Zqu5{7ngΝO<{z7r7^ƬDDDDDDDD~teG@wНlnN[@3Wlnh46{+/"""""KG@CjH{wΑ޵eEDDDDDDDEDDDDDDDDDDDDDD\0DVVgϞ^D fL&lΜa }4So())̝;#G:vmPtjҥqK~ nMhۼ5;l}8 Ý^{-[ƞ={8q"sKJDDDDDDD TqQX___zAFFDEEm6 p0` l֬/GbݳӑoYd ;wdɒ%$&&k[p7.|NpU]O¯*̂q0~nӧ 6Ç_DDDDDDDDZA`aIMMm;j(ƌs3Z6 ׷m{tWئ&:bbhmmn}6bBTg3m<7kŋ<< x{)(#<ܣAr fkǙ2e GnZ\(}`1C`Hף{BmMq[jڅ}WU@M5-~K{a6G\\;vL`N""".j5\Cuu5UUUX,bZ/s]s DG:چ} ɓs>vl:k8Mx.&O>L߾{>(wc$&f3feOIMr˫kӿ>b?ɇ)-u>RWW{ۊϟOOb٥m,|lMI1÷?_񧍅n>gS[+"""""""ri *++l6s!1DEEqw{i͛ٺu+VI&1q.̺smt NcU de߾K,Fzz'N ,]…=VbapW֭cn)l 23Æ}ڵsOSӟr2iicOli{,??7jjΫZmx{9|x8ƥšCÈڍ F#̜}xi_s.< /i14-[hۻKL,WTThۛ;!< bzCc3q4g kg_]Ldz/ N֬Gӯ ˩#_;pnMxcdIRl_߆wN--b1au0tl[K냖_oow|ר,""())a޼yXVVZŶmHII'44ѣGIKK{ =uW0}z$268yڴH||E3r9G`ڴDZ#FpTy{jСǧC5*C*8ruiy<"4~vy:6]`Ϟ!暰`ʔ-: \?~|EP֖ٷoK,iOtO=qK 'S/ c?~fqSோqk^]- o=9_`5Ln/}>iշOf+/ zZzYb=z`ܸqwY"""""""" W0jF#F$}Y0` ;ve 99&lK{:5(_<= L)o IDATsc))1s-/77=c۶Rn)7L&+uuv0k5Fv z11Tfa6ݻwFRRzjS K{xw϶G;1Y EKFBycρ Pߺ<;[S[3ƏIKKgϞ*w """. {FEG{qlM} +{z4,:B%Y˗*HN>Lrr0Vaa9ΟEd}#l~xcgpkl߾G@Ϛ5Yf;w<=^]!5!fm}~1@qApK"~"FA̗ ?IMߙ9cѢE̜9~DDDDDDDD:kKh$!!cٰX,ddddԩSݙ3g8z(ʴ;D>}x衇o䓙Y ݻ^ի󩩱b6HK'99ͮ]e;W:4oi}~z݁} pk "{ȑJ֯/vz|D&S[ر̩ӦM~_^zQTTԪ1- /O(Owc?x4?\y)k`ds '2_[,pxo>S`Lppsks;Y36fܸqy睭,"""""""ҁXDDE믳xbV+L8ob6d21aY_~~>ee ))̟ǨQË:~8#}wlЧ=h{h0[ر hoi}ov=9wmIXO>ϨQfձ_}3f{݄ygcٌF;[c  0~p{h=`4X <v}KMpmOX#'g;|(aˇw~-/$@36Au8 CǶ)5Y3l6[%8q`Owo 9"""nAbz}yhe#"""""""z mCDE;EDDطh>ѿm|`PXDDDDDDDDDDDDDE`.]EDD»D{| 4[<~TeWx>OG@ EDDDDDDDDDDDDD\.OdddWP]UŲRjM&l6kb F< ׯK6}O """""""""""".(5Uxxz ClX !,2Kt /o_0pssۛ**/\"8EDDDDDDDDDDD\ŲR<<=1tD1[ \,+sHTqa&nnnFm%""""""""""""ROaQ!6&SmG~G?$88nn$dzGV6j{ltet$׍^fߑkٰZXV۶s`f^仫NK`-.g$6fu77suVk^>2?_N[~`uM@@¤\֜DDDDDD~LZ8A\l  +++9s,qqi7;#FǗ7WiNϞ=9r$'?쳼ッKj Gn,!ZbvXxoӸVon]KΐDeZ)))#Z5Zsu 4Gr{l-q7.'6sx  ,""TWg"pLUY XPgŀfl5pˋlܙɸa߸&^mo|#~-7PErt :F@|eLDDDDDBI oOd&ׯ=77ooo}HttVSXX@||Oq?oѣFwn}:a{c'V⎻WEPOlf/WK,L}fjG޹%GC|4!`l:܌wy93nݯj\6EE@ޝD]ڡVszuU gz:Ƃr+ۏmF#aזt/'6 l|yW<ʯ.ǵn7Tüarrl{OAz!:'>>GųGE--݂\*|ǝz!؍O)})RO`H'N57(,*"G*++9~wq;32>lHY۷ Uɫr/~X{4]1~yG'gٸʊJ}el5`ʝW>O2N}S Ӈ`O?˸;ִߣuuudb4TOCrߓ1n&ƏxdlSSSa?H2{ֿ>Ǐ'667iz;®]aȐ.жsrL<`$l&%5H4;E2h4:xd2FtT$//222sX^Zz:Qs }YӑIHpAA7 MLXw VsjDzdXyvcM=*h4~-0Z緬4"#jz2vT$C /7k;ؽkժi?tϏ`eL&\B3rssIII!22\]TJJ 11$2[no߾,XO=4k֬]v^}*]AA(=yiUFѹ<ձ}[ J?jhkhիt}WoyC_I͢Ftqӯz̒I`07mcЇ1Kj,OiSYWA^߮<@~|=yze0i6ӕYC|-N/+]e?5  9;O"˲5,SrM :-T2>4Rbc\*yTd2X_GNN.9䑓j5>H[R>l;pFۙӻSv#n:'|K72ΛɎ`AA(s o*L4iTq ﮏ,$]M&2"Nƍ6̒İ!ʘ`v6CU:|YsRo;99%1?KhZBC޻TLt0u5wPUt8+Vf߁?ɡ[yylܼ(ү]}ּ:pxuAl\365EO]-H`w-NjVZͶ2au'OGۯw:?SdXT*5,өC;;KIMe;y0rrlHRYQOo &,?"}hҸ1)l۾5jp5__:*e-O>ԩPtX ܿD~9mm{Q]oWܾ_ѓ}?(:%{:V2ȲlsE;1&&1< hY'#%+Pc)7u:sIzj:`艠촿]%2۟dr٤:>z,l@o^gj|u;/[w:3O<3O<'}9zGDZfÙKY*'C^OoYJFl  Gn>>1x06lB:ҡM7\tx{toMr~Y8 ؁~O?IN;x7,ˌ0+\l,ח,kǧ&R3sb[Fe>̮/X+{cϿ`4h4N#w,P|e @=c}о͍a `{'E=kfOܭy!yiѢ%Igm6L0^O>C,2X~=~JTTG?`;v[ Ce.]^0Faz|||?a/0mmu+ӷo_.]Jv͵~WT˝!  7fK׿xb0XJddMh4F]Uػw/^{@CPjUF#3f&<ɤ{8P#Uo!GoAE'O'|ʑV/Jt&‘nFJxݾeA2(O]2f-!'= ?F 3N,IR.M㯃摜+/`4on]\_|3g2i(4 O=}4yl{et|?_/XCG=|%*W^&-=YiӲ9_̬,®`֡]6n[g01*P!s/PΪ FvCM !t[˰ y݄<3a},wM7ЫVS9|"9vHxxM5bïSFnW48{<'M ߟ>z3xw9)G1|!x o J$F="C1KfߦwL<|@u:=iN\, *J<θZ7Vn]EY2dYvkao5$swFEF:\֩ӧ9|0֯Ϗ{={2O_@TTzdg!2zVZEn݈$!! $&&s.Nʕ+We^z1bħYoO֭Y;uf Э[7  Ba-`kw(xoIb >C$77P~}Z*ԨQ,;Ɠ{sF>n6*p ._BLL ֲ$Ɍ/DIlZ&;~FD+vTwn7N=}羾p'A{ODYdA[jd#:ܜa-+zB4Q_}{ӖEX~l_o,n'NZmŖPѲ$>}[ddYtHvԡiGz{Vfiq1Q4x swkގJt BаQy*W&><L$ ^~8^]]˗/6itNԴtm޳h$33՘T*HKO/줫WQL25kk*Yj VA7WCz~fԬ^=]uj+KF!W2Ԫ.`2Kҹ.;'3X/ь$w˩,ˌ7.ro{d2c:0cΗ{IvGPP 7@]͟NhH]bntZ);|fIElx0d­4])KZ}ZS(F!((g8ݎܩuy\GɾvZ?Gw쭆dX.^~}ԕ*UDדf}o!?UR͙Ì3yiԨGyf.pyj5dyƍt,  2Ȳht8,^>/!$$NgJEhh(?Z @ڵCŸc..]B@@Fe;]ĘF`RN%'#8 70ݿ)^?n IDATk9Dv{V]z[*YH~mp(.X'lCV*)_Z??W}ucpOR cӖ4lPZr;%Yq#/̣r1}+ԐU,[ ؝|䒙ix=J;\}}&++I>'lˎlfʟl:ҕض};O=4k֬)ݚ5k۷/ |'AA$:#/^d vL&222)tpHH?\Jh(^dʔɀGgfϞ;˱cǨS *2Cvs%L ֵ!;7Aa$=?iee&Yx4^ DI+M*6ip@zl],C'q$T!":s>"?>>*|} (UZ®3K2s^KVt$'*5/aaaDsc粻,OM=k1o>H;%=V߰œϲJVٰߏ=~tM:s%rl_7LZX|ԮY9A/,9v<,\r?]ǽ,~˜GY;+,xcMrUJl$ߟs/u5C6,Y#ⵗ_T< TL?㏢Yru~W`}[|u>a~vЛ$fthx'f)d9קݭ"ƠR;5_2h4q%N3uXvNιtw~Z2ZͨQ#yATR~VKVv6ӧO美{]6ƍg1 &~=z</^De*VHhh(Zd_"25WwjI&|PLVE$K͝ѻw֬YCvغu+ݺu~tRuFjJ:  -fFtFT—߰ VkCرsh(?{ܪUK4iL~~6r%ԩcqʕY~j֤F e7{YAdv||:=}+6mAd%hPr%vz1; Bm;eIboөRl0A]ɿ>٢/{\7I5qu*/.ODvETwc.2VpùZϏ  vX'tF q>7uw9>ހ+V-70PJxYDuڙ+K|tPKYw"4w- , eZuX4Ǝ}kGV5RMY ZVqўkJwflݾU*LڛiIoa}2hyyIӦZYFw;+IWxBB  5= Ͻ '?ɇcP5 rY$Ǹ(rwAB8zt€Wޠ\XM |-׽r嬟_R1Wx6nނ`]V$=gST }w0&MSNff|V$Y%o<̩U"n⮻pgժ|3;OYZ;׮[PvmT*$9C5 J*|d&5k(Q̖wvj}ȲlJ$']GMNN?x6:]D \8 .ѢeSrxf5~13AxDp˿7bkw`&_iТF}hZ닏On^.eyu˔PAtwW`ex?t |aAZ z6'*$TPs5C-`*uy4hM_e Pس|x81ql>\ğPG<|W.;~ɵ-o13gx<[E9)hZC ^M&6&ZQC6//kDEF{;$I"9%LXCmd*O%P!<Ġ<޴"~/VeYg|_G22ˣB5Z\o4t(? jeegMY_gg4IT},Iܲ/౯Pwv^֍2.sGuJҡ7e4u{7,Cϟ`0RREA]xߵk6p&kegegD\\!_dy7s`` +V,mvzcug{vAA8|Ӷdq$$RRS=E^=m޻{9nӷOo\ ##7L jqqWp]wjv|1u[_zrnv~ob>~*/Ԯ]gA"&:GqkY&#! dffq)4m+5Y€Wͼ!0|`֫.?WT:.sg?[>cp7L&yTZ.O)6RmTTv ӳs'edO]N[9!o4F7iiL$rPI: M騤 iӇr NFg\=, `AAf׮Lb:o*-Z4~7߲rj"#"1*yfI,-'~ IU$*zQ>zBlGW-ٰ\oP7HI<u\rVШP!#,"$rUS7Drm `AA(5vjx'}˺h}UȲJUX:uhwC%Blyc(3_pbE^/AAAE6{Y=l)GTjvO͎]NW:{Q 6By,[G+e \O8-D  A{pN3$ȳBlɇou   ,9BfɌVW>1      ¿XHX8Fvj/eHHXx/KD      ¿X@`u&%$2f N@,AC@      E+O~^.ٙ:dI*Qy*???c e8,     -S\[F@`P==[*JlSZAAAAAA_Bt       K!A HI|C6f0 =    XAʀ;׺! ^    DbhAAAAAAA /^>CA\eM)=f 향$>gR ޕ*AQJ}:Qv륈`YԪYkYi>DrrM]Fagϝg{>;SϿZ^giiiP%/bYXo,շޡgGY)iܵoڶjIn]& /ʮ={t2?,_Aǟt:=?Zx$IR;1g. ~u{^Wn/<},`7Zhξ;:.mSjZoDݸCR;<=g͓sWcv o.չ5[xJyCInom[Ǣy޽}mI}[f[1o! s;!Z*.w\J/[ni6eL&ƈc9y4Η_㰼={1"NId銟ؓI<8~WO1n=̰#=~9+|t]h$>WR ', Hdh4"hpZv7\MN&&:d"5-(4AVV622SRӈ;HZz:f"#}w)*&$`\g$ȱcL=ʭx)3guvz:ߗh֔Æ0{<&1, N3Tv-$h5w6֩cG^z?,q>3?[qL&RRScHNI!<,LѺNH__?TLH~~ O5 eي)Ob"O?|2>w-Ͷ;ۻYK۝UG:NOFff㜣]mx$%9OUJcRz(l_q'qFPz89Y&_ڵk,{yse(QJa֓1f{^o<ѱ0glEv];䮀\8Mˁ㡣sgaqГk[pvN$(~=7?gs[ۆ(҆(ky,^Usʄq6e *E]覉u=~-YʹYW2hy_͞EjwZP};?VEgf̘3iN}yee)UM@ PFyo۰cǏ3nԧc$=)S6s6MmGGd>9/Ghh>ʕ*siѬ)6mF$F~]V/yk>9pIxgW䲎w<3ʖ$=3ngבo,$55(4 M˺xgL9*FWϞ㑣HO`஻0|+BRq}0,JCL>ZEvN#}HMǨTjdYsN,'6ZQ:?dtԮYvMnSZmjwVe@~t݇Q<\5i–߶ͳ*z̘ q1eϸjUsEw}:*U 8CɾR8>gu9v|gxN;X1f,ujd)jn#ƌ3ݼ;v᛹@df3`s a zm^~y /ӫ+Ul6#l6k^͞IxX_~-Sgbsޥ >>>2荷ewQ=dYfԸt/П|^yC=zaӯL5cөc>D6ml>sOlL ycQz569*ș:vǎ|1k/\REKkѦUK6,fdde,^INsNIM`0Wq_r]&{_{9ܖ\$|Hso F/f0]_ؽ aُ?a4h4ڱk7_ϙIxx8?ZȱYx~N׍$I|]k( 8վR4tU IDAT>gu!88nΎu8*9:$ׅss&,k֮'$8YINIQTݼuXLΖN<W0G>lߎ1|zqsTe䚭p<ʗJM&3Oك=-5ERYW@ $=p}IxX۶&l"liV*'Of݆lش9q=Ν?OD{Ix쑾ʉ+3O>o1 aG|hxaG""i޴ [Xg۸y ݻu7(cgrظ7 [? *2Ncq-)ݾp9 Y<߶!Im|zw{ݑd?[~agr|Q /?#DEEQ|ylڼs2c\,]NvN3M[ԩx6wg:J\pe:ě >nڬٱVUcRxP_I<\FJr]87~ ~۾̬,T*bce#J{}v0o~Rrf/k %,6Y8fWytuTQ*.Ytf"zuvp'}hq4w*wцPzl(kmǢ˕9rѭ:*/_,OE/__~ZΣ}z3lHrs`I|%e ] `AA(#j5?θJR*&|~WXr11Ň- V-j]'~~~&$IBVO9{M7&$8HVvHIM#4$N8?3TsRA&Ҽ>P/,[F֭Qx֥3Mm.AK}d:w:''HITSR WHff[JIIAӧcNVMJje_ $8>('sydymڴjoXk6{I]k W,6Y8fWytvTQ͛6q/WqܵZfזM..OWgwr}tq4OW|rhC(=66DYcQEJۜ`yr:??:wXgWZ}k+ˬ8yn%IQcdڤa3TT {)>2|T@Z56c\|Xj"K-Uq1>z3g1wd+l۱I3y;o{v-:<+ޑ U6gd";'sEy}:v1W܍g7x\[t{p뢹LUy!c4eV4&gp'{e{r+vXAz8J3ܻʕXi\qq,<:;w4*.J>>>Şs:f_ny$ۏ3J*~wA6xzўrPLKOg}f^{uvU^gͺuǕRFbɲ 9v8G|,d䐗hyJ7Md둱|_=ΦeX3IWv"7iboJs%M@ψ'A [tҙT]>㌙8yƌ)#??kDFZ5'Ky+TwϞ<*!#ӡmr 5ܭZbw0~Y}ϙ32؁}fA:whDѠRƳ?XG;ƗvmZ񧣘1sC6٫=ݻv_䞻ؼWHI^z?~܍,tҙ#G#cmyq|?C>AK\\9&c`0o^`QU!F|ʦ[Y*t gjކ0˹W(V\5%-=O~`cٺ3K=L\Xw¢%?Jb朹LddfҾmOyVhGK>DEFɓvgUsy}cΝֱ3ގվn|ڟJ"=8u\|!~Dhh17PTTxފ}.ÝxsXOc{ ??ݻ֣Vv茫k6{r,WJcsQnomḫܩ BG?qi<=NsPoͽ'm %UwnvQ"00Xy9Yޏ=ig\m_Z-Z@tC~>4n8)#G)4{TQGY͏+o={BvW(5i/OIi*÷ի0dH,>PV/fo? B1XAʢ|Xݟj>~yoʕ_ nJ/Vr4zt^%׮繹Rhm޺nwlnrv**7'N4Fr.\ض\mw3\/7?Ξ*+}QoC*.6wkLn?h-dm=>룞[EqՄ=mܯ_ml}_w^K=df]ow]}~sm뗬OrI=w1_ϿMp`:CQ8)>^N6 /d/}<2Rz~83u4l̋%$H1jc6ƍ~c댲SGL$N=$߬UjZ>iqi#I^~w˛>ofYi߂%ݺI'9bnw 6on:'ߺ[2.*Mlw.wU;VzX?4NXJ涏ۿVs׿ݱ ~}X}ko[&7ds>LI1Y_fw?_\3qd2F"(\wwr7?[O6}i޿_37Wz>Ym_ko\?p~O]_뎍5g2.q|Z "t6/d|[RH[41Ek'Ȋ}5.`|۷gOҊRFqj^ㄟd*/Ϟ~evU[KJM5b旳ۍ3.efy3l?+{y Y̑5b(.6NWJ}qOoQDisa8Ik'D}{۶Gh޽6.7ҲeI%Kڶ>f_{wcymz&>(;׸Lo7B&L0.le/h{^^4^|c3qU;VO~b^/.6g\2}[{s_]R>8'7~f>^V㧻;VI,o!~{r7?߲2[şݽZfo\}~>Z'X}t2~SZ*}҅ ~j7@4MƯkw0~?th떏3Nɇ޽$ͥ[6_zT)0x~ O%ܹ_KM ^u~V;&q/%#1~cp8n{3:)3[waHo?V_K>,}1BJ2~?iRuuuΗa8| F )؇7VX9>_o1F_|ywm샞0iq曥d?5F_Ϭ6F}#a.f~+Fڰ'$|3_O'Y۾xh38;`1>s挱^V{?/7FHFejXfiO3Ǘoy/ߪ+A{li'WMdWk1d'>Zu\HloG珌zGaj54kiRʼnݰ4]i +3?.>~o.x͘4HW>#cI~F]Td\Z+[Iыix1 R2}_jyz">C[_A./~U#w˻{}Zy8>q~ZVs̴5=fSo3yߊ\/ߪ+A{k|b_sϿV]W(t6Œ 㗾 /]_/uqϣG,w~ IDATOp>!׽ӧ[w Bw{BKۯ[7cј8m_?Hά,kA_od2:vLz?{;%%'m1.v9͝k&?L_kOf-ڿǏKW]\]|ǡIƉ74Gƈ$8܉hw{0?Z#$y6}OwYњ㣻z.~~Ýe*Wҿ,i󁧎XϞF7KV_?}7_rqdMf__w7wNjf}fh}?m_?,+k吏~Pf׮˯aJғ Dd|Y3NIƗuZo1ҪUzҶm#G{yҕ~OhicvEF'h6lh}loa[qJ[n1F?>YХ4dq k5FoܘTi/'4 |sq߮w1~3LJX_f;HǏ7ߔMs.o.(oa_Px,T|kW.]K-Z$Mn~Ŧ"%Ÿ {~mmul+3`\s 3g}9ucۿ Oݽ>1֖_'jefyWOZIvڵ1i 2ԩ͟@=v'ӟ{4I?qY̋lce (wO<\*>??eSށmo98n%&J61e=-=Lp_~وg dҼyifZ O]>kkc}VwO3s׿ƍ3bOkmŒ{ܽZoіYo~`vmmw]zeca\L=wO=;Z+{{- EGiڷjĘg,*2xmf|ɼ|ܒTmTSY6鯨;VZ8u%R/ Kk⋦rkW_U~wjkwҜ9Y=>H;fɓ*;~Xn cg@@e7N4 lݞ?hKlVnZ*O>\ ^V8~'H4sf뗷'ή?_g1]Z=.*1fvd> o `.' ಲ{qeЄhJJO$%.Gr׿|هЊ5._IuL"on~ێh/qMt?0SKmhWҚ5EM3FX˨gZ.:鮻V/]';}8 =x};_89m#_@G{Yָ?'бƌ^\FG7-uX`˩+/` ^$//OUUU0aB4áuiĉ ${$u] ߿_YYYӧN۩|޼y:| KsZJz N:׫X̙3խ[:FErrr4-//Oɲ_t+++Kiii:u۷oرc_]'NٳgyG͛7_c(((-ܢziAtR멧ҍ7$~@HEOaچ 4vXEDD40`ӴzJsQff6nܨj\iEGGO#Lp87oRSSm۶M۷oʕ+G*33·0^$**Jl l6M-_]]-[h֬YݻkNye˖iȑ5y``'N\" ILLlHh˖-ŪUTT׿*??_:tGb[jƍ죨TGŋ3ϸ7??_{n5t"tD%''p(;;[0ae%#g{1%%%iС ӭ0^$//O}4ákرc 4c ͘1CofzNDׯov=ӌ3#x&}Zt^{5=zaÆtn$"999JIIqdiϟWVV4uTw}ͮGU||6oF /hԩi&]{/WPP[nE=e:+xӧ*,,la;VM=z 4-))Iz饗Z\OLL=x+z^VR||VX4мy󔚚!1@gE/Xli6M#,,==z{ w]=zpgٲe9rntj];:,11Q6M|N>؆i_-[&IRdd|IIRii? hӧZ0^(11QSBBBpΝ;J9_{WjjRSSۼ|ttz{O=vچpBBS{_}U-YD999c+w\O86mvܩ+88Ufee)$$D>JJJvuEi8p***C`PXXjkkΗhEDD4LKNNp\~~ -ɚ1c:={j-;w\͝;2F#Eiƍ^>##C;vД)S!:vCLZtBZfMOOO`4o`$G="yyy҄ 9[N'NTxx$L{$鮻jx.INU\\k̙֭z͛ÇkȐ!Zteh'Oj׮]SBBSϜ9kرc6lOݻ_&W" ?oaZ^^UTT0g}(: #G*&&F~_k֭իW<>֮]իW.GyD!!! tj>C)>>^;vh3feo x. >}<†i6lرcdѣGkNӶmۦ۷kʕѣG4OLLۧn$%%T/R]v7PNN__$"QQQfkfٔhQF5< Tu iETTZ,wn׺u㏫\eee/t&$2 mٲU k޽=۪U4nܸf^.HUUUZzFkFSL! {e,álj„ B)))JKKsUkɓlYhhGQϟ?/ɸpQQvnV=z<~ S]]v-ͦnM]-ZIII?~.\v{Oj)^zIx+ @w}m@g`dd4V-_[[ٳgo߾Z|Gcۼy,Ylٚ5kZ8pz)??i?+ `Pbbӕ9͙3GeeezUQQ!I P``RSSj .ܹsPyyջwokZbnvM7dm 4^hڴiڹsƏ`˝;wNںu=mdee)$$D>JJJ8I_x y2Й1/Z*::Z{:竰rmbfW_oQϞ=5`:%F *))I-ƍ[|FFvء)SCtСCI% @'5kִytFA| `/*M0aкu4qDKtJ뮆璴w^ܹSǎStt_UN7o>!ChҥeN<]vN MϜ9kرc6lOݻ7ھ}z3f誫b0^$''G)))N򔜜,0RZZN4~O_{NcƌQyy<>ykL w}M:QF^P~~yٳ^ԩSwiӦMk_^&" >}<†i6lرcdѣGkN>s}Zbvܩ^iEGGO#HJJRii^zfqi׮]z71c4z^VR||VXqF/Xli6Mҥݻ֣qZfv֭[\*++k2O=.ݽ{wѣ΄{eeF[lQZZZlڴinݪCG?Gb[j,Ylٚ5ki"UUUizU]]1c/BCC%IOȑ#*--Ց#Gai+He,álj„ nRϟWHHcEEEg?G#2JHHff\z۷#m޼]/=p@S~~~ .~~a>}Z Ӯz-[zW0//`z'O?V]]$z5~xĕ?ه T^^J9B>}VX:n&IRttz{OT]]k:%JF/4m4ܹSǏWpp媪t})00PW]ubbb4e=cmdee)$$D>JJJŋ@.\(I Pff-Zk~jx.  SmmrssI%>G\ սX~~ =r8-_}ꫯ7ߨgϞ0`@g̘C5[W2F *))I-ƍ׏m.+IڱcLޡСC[Lve9\ @'5kִytFA| `/*M0aкu4qDKtJ뮆VeenVÇ5d-]ZӼSNi*..5sLui'Oj׮]SBBB:߯,Gwyz}`HNNRRR)99Yva畕4M:ٺ t-衇jR裏*>>^7olL4h.]|=Suنt)%%Ew_o߮c*77W&N<\HEOaچ 4vXEDD40`@CSjjj뉉Qttom۶iZrrssuQeff6'%%T/R?S3g233qFUWWkʕ+|j$"QQQfkfٔتz-[#Gnt5y``'N4LRXXXVWWk˖-5k${>}+ `LbbbCBF[liUȑ#ZxyǶj*7GQQQ޽{uX_W+22RjuD%''p(;;[0ay-X@}xl'OVdddeB)))JKKsJuu$cc=$ :a:\ S]]v-ͦnM]{O!ChӦMڷo*++i&]prlCvGkTWW+))IǏ… M/iIDATwoIҙ3gŋ jW1/﯄l6l6͟?UUUԓO>)I*--ǵ`_^}͛dɒf֬Yj۷/_ުGDD(((H1b$iϞ=iU=H<%$$^.!!iW_}UK,QNNGJMMUjjjw83g﫢B@I҅ t9UVVp\ݻ4k,eddh֬Y*..|7|#ΎK@M;wj p<ܹs֭[n'++K!!!zURR5UYY s=JJJa0^(,,LnUtt"""-;wΝdz~~ -Zzp9Orryõ{n8p@AAA ti1N(00PIIIZh6n322cM2k~~~kH% @'5kִytFA| `/r9}':uz)IoUXX[oT'N$uU[nmukm/րC$=c1cn˲>_?AAAA{m٪tzm:uJׯWqq3g[ni$"GѬYjРA,k޽xw7(!!Mޮ޽[o~iڵI֭[5vXJTUUv@r^[ ȑ#^{MuVիbӲeL-;a}gMggg+..tZb222TQQ{*44̙szMhՈ#TTT^{M]wTTTi/B*))Q]]}݆2/!!A%%%ڷognݻ[\K/믿^~mٲE=zdnORSSUTT[M]oƑ#Gxb=3n޽{uh$BgΜQyy˝Fw?~\ׯ׏cHv֯_hÆ 7o$)88XM-4irrr$I7|,Y"ݮ]viҤI??KvW_}$6q9gϞѣ&LBIqqq:p~mT޽UPP`jԣG}ڱcvp]w5e*~3y-X@}u9_EERRR4*v];:TZZ $IJOOWzz$#G}iHTC uQSO4IoT^^g꣏>ɓկ_?EDDtÆ kSԯ_?/ݻwo_'߃>JNNVPPuS뗤\NwܡSN{QFFvv暎- W駟*;;[?Oi&۷Oڴin&uMq䤤$?^ .lv0Hdd\^R[nr8r83]ޯ_?T d$U&QcccuY曚={>#mܸQ&Mjuz-߿I҉'jĈ7K.r8^kǵ|r=zahU,PFFrssyn_k7m WSUUH=䓒qq-X@ׯW>}T[[ٳgo߾Z|yhO\N[p3gh˖-NEFFgϞڰaCToY+VhwԩSMץKiѢE>}_E/n&.(1Bo$@۶mkxG?jz 8P[???ĉuZî]TUU%ɸqPPPC'ߝ4<qEGG+''G}itYY^yUTT4 ;HԞ={tUWiኊfs' @RSSգG-Yt/O>D Rttp^3խ[79R*..nA]+Vhźktw6$//u6R7nfϞÇ+,,L˖-3ג0-\PqqqUJJf̘Ѫz-k̘15n8:?+q7*22RW+H%$$ߊs)33S[nUxxBBB~#͎߮M8"ls1VU^^R]}ѣBlp!)$$uwv]V@@@k*((е^aeLÆ S.=ߙ3gTRR~RR?W<? vq`:`[Vw{ߵkWݽ{?y%%k/44R3((HAAAm^^j'| `př9sfΜaq`$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA| `$GA|DWIqpE+!|_平`A| `$@WqL'tt z! :GtpoG ;{Ξ0\A$6W$e~Ew4_oW VHXxG b.HsrTWݹ8Ou}'Gq~ΘO5wt®_h71P/R̵׶Gp%\jz/7tCi~V}s3xKg8zXtt m>8R]~Ur\=ToTwsVIuvc9M1+I}HL׷Pq}g'tXL?*سWO]FUaΙLL P4ecΠn$b4*WQvB#!&?6XQb4 M ,4@@vNs"Rdf:2C[{w=猎8'7K"6׷_]_hG"яLN Q)H+W2TN $o,I{y'<R8*IhT}uvuv9ssU<״iٳgu]Ju+ue}s͵(PFXe"#cdò&)Z/+JJS~SIƆG+;' I_մ[i٨! 8E"aO675ihxX}u˗iK*(Z]*R1V(1ɻpL5d:)9$[ߢ$ /Uc2WKZ[w亞{ovtyuz^Izڵw]T֎g_|9_[k|]y~ǹB!B!Y[P[7?PzX\?b=wLX&\+Z%3pB& ظdc2f]R&Lx<cmJi5Zq\vo3kt\ZV숞f~E8۶3zpz =nRBʮ@#Ye]*R126"223uPFaI!Sep|t}G@m yllT"Q[nE|I_t̞H$R~uy\>-^rWKP%&7wI~Vr)ԹS;?5 _+7iMQɆODmQ% Z_L>,zB@}tBoPE&7wX[#%룓aeYŒU4RdW2T#ٰdt }!:ҾO>ucJ` XU떻 U4sHhpoҹy9! fƾ-0%gUs'fkiO}_|>i11LrLiuBrrraCff7IEDDDDdet6/c֮]@jڄa68{O+WbX|%##\6lȐ!CYl)0z=%82һzrbr+W0[`n(//MkZw¼mox'8唩{D<;Ԛ)L6^ݺvZc:nU:ta땘UWMN^M_DDDDDmypΝlܸ3<À~Afff;Fri64͈_TTСpa9au7lșgMjjeSXX@BB|m8=D,WLKoYYDDDDDZv[leAdeeޏ=8,"ƌM\\lh+Vp88ٳesq83ٳ!55Çs:]6,X2KϞ=(,,QTTDRRO޵9zHf44wxVXɺu)--%))QF{[X|ח#dڰa#˖-p0jH233Z?k~ ->svhh(ʬ~վ cPe֬pr-7i: n{ *-i[nw,QCČ3HOOg֬Yr 34ib #Zvǎ_>ʄgk$g$jvwy{宽z,ˊ]$|2 Njjg}3g 9GJJ*X|)0$vf =yo AUir`qZ_DDDDD ?s-qsq=wr1Gr~; !^xe3UWMN̲e9=zT2I|i&Nϣ>Ď;9w'pPXXDVV^?o8ï*z爋墋.%O~~QQQ<؃7'7O= Ujr#O3v, 4[9-ܣŲ,N=+W0x xe h̺w7x ~;@h8vhʊ֥͒% BWTTgZVC²ۼK~!XEDDDDD%@srrXns^( Y#j6Z[ò,qnjZFo޼Ku/0 ɫB~}vsʙGrٕwc|Ȃb:M,"""""L0 `Qdg/rdR @aa'N$%%T~JKK9x9%|j~曉wOsQGccgSRRW\5\CYY)Kdۅr2?\qM+dǜwޙȹkظq;%%Y[K=̟@ !FiiQS+<E$r*aٲ+,mnaj{II ~Ym9 ?~ o6֭N~ p^`V^ù^@FF&99;CIgq3gDJJiZ5omj~eW%޽{1sKV:kW3gD~HIInpN(;2sbv;"_YDDDDD]fѳgVX…(++sa޼y>@Ovrȡ#XW&LmFL2xG:t(O>Cv/3`@?N:BUF49s9#?>+V4|Lv -[Ò%y䑌7BFWʒ%OƲ|>JKKٶ-=2ك/⦛n ݻg=Za\~>M^^EX2ӓn?/mde3{Lt0n"cY747v<*]sw /nݺЯ_?"(5mչi۪/\,6%%%@ϳ^.\ʊ7 xl޼_W_ />gުӧ|3j%g@c(48գW9.LAEE9~B:eٟ}`ؑ?0*3拭HqOѳgN̙3ρʞLT>411l&LB:Ny9ݻ~.?V⣏>[ncԨdfVUkYe2Oy㍷=z47t#6m3`4C1{C=&-KttP,;SBߛL$-- 4k}AYm1<dɒ)))!##X"""""O ر}}t˲BӁf \s3f+ogY3\sݚonMpLNNY .b2229xgp]ۆ kߥb凼V"Kra4ͦWLY.cs7@I` omL0ӄO?-bĉI [,__r-ׯ/w} Mu z.2O$33~>UgŰaϯ|4&!!nmۜy晜xl7b…8唓kEеk&$i쪵\{&}Usouݬ_76i~i2waOtp,**bذdddË/>z<ы/>\DFF&Ç`mۆ۵Aر gWP^^e7݅4L>eSVe1 c˟>f*a @ii!>_9 8n <в.WNʊ|ARR`SPkO7M "A?^4bbbz(,eCqݬY]8CL{eqjP^ ## Ӂ[NTT,('gQ^%33AQNlCen~/O~ e}Ǔm̺^C`38s9BwfAܪnn%kn.3fo(++c_1bًի))C=yV %y>reh3T\XS-[M}3GfWFDDDDD)U#}jkTޙk`m$vj+46aagQ7hhmu!3@EL?kZ95n qmKDDDDD!o.=ztvwv(mg C+\"*F~64 G=?v1ǵ_0 Ái:1 g苫ecU=z32~-Ȟ,cS8j*Qܹ?INNP 0s!(t|Z K݀`\zώYDDDDDEE\١>FmODDDDD6۷`ѢӻC'T'&űS|yȮG1uTxXA4p q'gCHKYu=eY|\.;;^phFY Pݏ;xtGYtҬ5Ⱦ^FBB1^pmAL'8:(,ٛr*233;;Z `67oAgWSXDDDDDDDDDDDDDd/avv""""""""""""""6QK(,"""""""""""""PXDDDDDDDDDDDDDd/^B `믻.e_ظiS㥙v隕>V}{ Mt[~֭yqaX\m2;Kc"u [ƎݎvKY˯0B76k}=霹ҥ Lޮ]DEEӪm^XV‚u, %A\Tޓ2o`].V߮.4Hv6|8'1kb{Z~l!33#4m,^M0|>p]n_u?KRQQARRR?Ο&...?ٻϒϲ}vvv TTx iK~^~}͈ `]o~z_7/s>#;b|Wxir1xmVƞɧ%''=zPZZʪ5k8g?\| }%{x㘶ēO?%p50[n뮹C'Mʩۦ[3ߞv,\cǶk3%k~?]t8-f[hO߳wqMf~xX S%>e{>in-Vl`1m\ulZDzmxa /_֛[m5=\' fh˯ƻ|ȿ}aT&Fo~+_~!UxO2쏵ͭw_c>*tB""""""""""uE8(BX+ KjJJy^}U~+vɳ /~9&^[^`99rBᨵ|{8"tLf3t SO<̌^f _;z4N~v瓒[n6vLikӛ{cS.ކpCcs%}SCӋ˫mVu}ܒi NFe0@Fbۉ#c+{o{}BzB5)FIޮ]lڼ^={hI6ǏcQvv(hI6C۟(X}Wi`Gk50GZj.Wǵ?K.3O;c<";simlڴ(ݫgkիo4 lS3{x6rσO2nhn|O˲5=6rӌ/1tnM=;gLj]} }\x1M_²,ftݺu去e$%%)-~~L:\232-ƿ⫯0L5?]|!'>s\tgNNaiS<>9bxőgsx8.X]_mf}=}(oALLLsr5\{Mm[{Q{|,s=P$LS7^{Xχ˝#Z]pƎel۾Ԕ&%%5چ붃Ysϡ[-H z˻;u[quV7n`'.,!f` IDAT"%ANqcL3=Jqhq1As+qX6lwhH|0_̚/+3YY,Z^={_P@AaӮ| ;E=,nyq櫼GxÏw/w'mTs\x1ƍc7s0M7\_&/dC>d nC̓?i.}4Y?B9a|e8<\1*0Xl[o[p4}^sZM[0 t u'?%l/zGO'r&f3cki˲imL=.<<^/ӮViΉlCM3XQs~3O>NR.< <So8spг{Ҧ5͍C䠁q\f>nPqdpC9sx*'I\{BӠ'7BN[X6WxxwZ;m\^4ּm%51k?k7j/f'xI6# cr3AVZɟ.VL5Wc16Y5yԈx IJJbd8g8  :UMP^^^}s`o~sO=NRR~!3u66]DDDDDDDDD`g '~nN?3c?V\EaQQ)xBJ=lܴևz0pgY;OcN}azZ[6u?r19J:6dR+}v.W^b2))Ą;&/2IGáGOw{&>.ߏ5ʧ_"%%eWu۶N>5CͶHݙƛa7V$&? Y9cnk lݶs> (<Ԉ #PcL$GRHϙJB~$oXm͍)\~%g!?WCyƻuf]7g=ӗSZg;xXztkX|`p͍⦮Iӗ-&/oӦĂ +c^GɾSx<-r sNmggY""""""""""5ExQz;0 w]k!xjkqcƄ+7o zd5ymrrI'rI'bY6}wMOiY111$$T'Z2E\;snGߟy:y{yw>o6r+6ndر$).)iEkӶvť^L5?XxQ.b8F-ZNKMmVyuuDj)+/o.)sJzEEE,MzSG^3HpYsϡr=!'$c?a=c-o𗗷vIHqվ)K 8c\&SGW4 ډTηNG51RFAn^[neqv6'F gl^/ ~oscl8la𠁔SΜ\\.']v m~it+*(,*CtGMB|< Lz:N>eV|7/>z?4Jv'/?iMsOv+7UWMŋO9c0ffdPT\4p˶m*9~5Z-[D~tLFh5㍴ WiYݬs~b2 >n||U϶k-Lb|íWZ5mkAG+*-5qԈ]SfFݻuߐǠ1l~!^#GxM}GҞnj?J dQח|>jdr-X3k6jݐ#"""""""""Җ=sy'@e!{6-1 һW/z˯@E|m$~6z'4O?#3h@zE}xŗw% /¡&׷Ν 7j̞=3t^Zq`z`cOP^^myӑWwwjiHTWSmhO6>k9ZA^6;?  @z_wzCM2u~%ŭxkA!*ͣ_1gLm5oF`o00? F ˖lJFl3v,=199|=gcF0 Fh#k{~Dqq(})e3rvWDDDDDDDDDM0 f|#u/M=4rv1dJʚo||c> `JN6U*'w,RFN]-7WSN'+3LaoD\^מm(s#݇l iLhVnZܒsUqv1(x4 Ȭy\vf zpd=]6;,OEwAl3u;), h}Vm)U 5FVBKb"]33ٴe  i1MHsll,CLQq1;z_Κ͘NoGs%"*ͮ|nQ DDDDDDDDDDښQV걛^uJK۵z϶lo~۶Nllǖ]3'(RSC=jڙCTTT!Xw #=sRT\LQQ1]2k=:JKKޭ{=uo8I][7bwlC3:5 u^¥\ġ~WkzSǬ#_^'mxy%<^^zD?hOr.1m\fkoI${5!^:OL7;vILttF*"""""""""I&:?~3"7>oضmݻucͼru5>oY>*\ nC@˾lhC}Xjo 55ォUùʞUkp*+8h22;; ه^".o6sBoIH=JHml7,`YVg & è91omlڴ|wu޻DZ0Zz$^5+c0۶, CtjzgaKfFi* ,""""""""gvv""gY \DDDDDDDDd/mضM0PD`\6%EDժ&˲K(,"""m:EDDDDDDDD@;蚕a}=IQJˡգgItli'vh%e!^;N`z:a@d?n];doiKl ^yf ِgGquph7(,_sUY_o*E78k`9UkՅ9k*XcӤ;a[l `5ZxȠhRhq=Sd$8l(DDDDDDDDDl^+\n7QQѰ;obMc%$гO&Wq[%-[ #'jvC^/K.e-q>5]Ny<v؁2eIIIp8\ζm~?v+&&Tnw( w{NfM9DI (Vsad&:؜ >dpgߞka7_i=ryaH$' /sh40 ~?.>~`2Q nPȞQ~נ=Ä#-+l9_4o/,,F%@$'9 ߬`5lA&tOv鲎ɟeDDDDDDD$2{{OqcU4ap81).j' oww칚۾֯_ς (**j >>qѿwKclmk.y&O-$>>***"11N:İx<͛GNN111O]0a„Un2C]|"4=!F30H7Uj ϰ9LLtWRysyb9u'RsmY^diYhm.{)/sXǣFC ))M?NfٝX}ìi@|IR<I;y?+/VǼՉ?-dx7/|_+mfWEqX>̮uDgs/lZ~^o=/|҂XDLw, #{Eqa=ZXJNq>.9oGoi~ Ln?%6x"J媣p{MWś8Fa뚽n],5iMs|e>Uq; Ęd%:84A3loGV<GT~#X'(iml?W9`$%5~'mgZj5vCc7`S4"""""""m{WnxѶPxӦ{8ʿ58o6pɡ}j0 ,Uq0[ciL8<]jJqq V& F4rmp%v-qqq5k6mS^^ΠAdދΏa0`}mܸ]vX2Jzz:=z`ƍ &^/&))Cz^***иLӤ{KA >>W\\GLtTezw+̹d[~OVacv,"#!)C&qSH^.Nˤ3 qQ<] ȿٿ>NbA%Z!2\3aE|I - 93YvKzJta[M'&T%x42\bn+ɀ {8i_JxTL>:k8-r-q "=Aw'$sę ͘>QL9048ygZʣ{8qeG{[O?-`̘-ߚز7xZ1.;/Lm[{kmlyy}@u믿mڍeYU?CipeWtDH ^lˇa0wtsY lˏa1 """"""{b~7իPXXH||<fto_dW1v+1O=\u1q{j-]@p@gY`SϞDvC h0˴!.z~6v[Ba1b[^)lC[\#P[ygQ),*4`8V|pǩ)Ms:/%.Abt*~iQX-oo+=<~9xL\pc6`É >͟J9{\<_.Փ-'8(ٵ^Yk69tn*ymRo<m.z6(xG`k .Zp Itn$ܘ}ٵaaQS?@3oZ?^XCj|ir~>1tkc(++#66UhH[SA0s#U}Zצ[MճG{ 7c=p<3wL*?c5= ?|J|0|廰pEpG-YEbSuɷmvyFI\;{Wp׏{bzg$""""ҡ֭_[o Mx<,\e˖qԓ8p`^lK}7>{0Ɵ9IrO)]cx7NX62 IDATB'˷TyP2/: Zێ 7V~6j}WHHH;ͻ/┫ezlmd|R2_(< i%uC'H ! %y36EeVUA<2]4,jieN(ٟW3qm ˬZ47s7Ok0(o *sKz)䑉MHӦ5#6J6wVQes]6]ֲLҚx烏ؿǎoPu&ItUF5x*G|S,^^={4lgy/^ٻoװr}W:o_Гe{WwH LeoӞw~e߁z9[r3Z-Y3ՠx<FEI9;,kwY0 L`Rnmwf'|~IsiKš9IQTT7sr*}}x%g~>o~a0qG-'e]de"6&̞{NK[@w<+ )jo68o0aWg(f ^C ]rg=Q{C" (ҤjeuGHH &p_/4;;廓XC1u{f!6ewT=UI ;ZTxb&lXN潚yiI2ӽ{&+cbbN[%GѠ(rrpTrx1ht&bMUUadd^=j*I:=t:QTYABU$Aʕ+(tzܔWkX g[&kn$IrrVs*7^Ƭbc*JػwcJ2EGs+ )##>LaI)Yd!.F10f+5`@@|4n7]{IlU{nvꇣ=#Vlf)l޺ I$xWUQTDeKxn,{SE]} PY|{o<}SyN'6mv/A8u=whmM?_zh5]O)ߏf tFRUhTUeݺ?۷lᑭ،zlfA&6Y_=ݠ:\M0Y}=O.tAAAhZUKkGFF2r\oeKhVvV,42zy5OU5=S9A_<?i5c $ ǖ- 4)2Iԛ>`ټa%g(81( %%'z `WU׋A#IY (ʸՍIʱc q=oU%JUYUvW'sFvSD;[ q m$?' `kEQ~.1Cz5גܵ7rՍGZ5k/I&I1kڙ˅L ĢcyDgru%O¯K~ y8 {E* 1Ϙ8~(.QIhS[Vӕ±nk(,8OI֗ f㮪"w#x^`Ny//`起7m8Ư_i PRB_ݰ=:bp>z2?ͫ.uZ cCKJ?Xl䉓-$N74{KW}Kgu5wcRݱ/=ϔW MgFu8ԯmBnn;wjpR;&&>kLl)1-X2fU`1Xtz _m;?'1y@;ӂ  pڹsÇ5|2bomev;ӏFI%#&O%1]\þsٷ{=R&8">E&z'/u v!ooϒěMüY~oO 1}O8<IGCٽJ֦YmkѪ /Ӭ'kr5S('Ul Tp?K^^ݻwȑ#$%%ke}E?lr{p<ݞڟ:n=8_MݓoAέފ/,$(oh֓?d+985u_F_*cđ/C߰夦&s40INʡNpx =uSUΦMtzн{Wal3=z"Ҽ] >{|:G `XfwQeghL6bÃ{ݺvf߮!Va(LUE)L~jhdb~u9}N<&"gat2{""urv:h5=.|8߾zVGAQ  :bxJ߃O]1mc ƀ ds^ZF~+cI쒊-THhRVd{u;oFr$uRrz5T8kf0|=6y 뇣;)fW+Ǻ nl +kǻjMo GggVǾTR==jtZLf 6+(>gkIX폌tH:Mf8eչA b6!I2ȘtJ8s9xB~A1{7b"|qG`CxWnľm9wʍ\pfD#u;XbT%1]e& Wg^(p2kƣc#a<1NL>OCnq a+_^ R/!جcx.pZ'SwAi{*M?7P8&&;w6؈#2qqq-npJ`EQp8Y7Q]y.Fev xtި Iؾw~-3OLSb$|#8d$=xmપb4[iو ZxF_!"gFx]))qMMbd<ġݛ>m;43|Dբh5+߃!kPʷqa`5QY;љ\peXi  :>9Oy*Yj3+FՂa3,2ő} &W7"7?w(NE>r/:HԎ`.{iV㾼#< x'h l> N9e^\Q|:";JDh˜x%.)QUW^{ï&k~~駱Z\z|=R83ʚ3* ,x*:W=kظ]q[c0 TteU,=cދ)^tudkT]!}LxD$9ѧ7 )^c]@4p5C|tfH<p[ ʈ4 Ǒݿ]zrX/##zlV"2%77Ad-NW"22'yj>lX`x?Og*xѠ.qEOY5YQ1X|ږi$z]Y}}X!jS[A[%m'UZ>yy{):rxy[P  4VZͲ_ҥKHHoxwy$_q8* R2P^]?}Ğ}PTf̡a K?uqVshn'Wy:Q .A؛̉NmI2 b!-$MK*&t&x#""8pI111(tԜYzcLe:%oQRZ>EN|m^|HUU%0zh̶u7:Wx%ijp6b0%.\V'|U2acˁ\'vFR%Iqc#_ ``L~Z3( 8peb0Zԝrb-I*m 4?cbw3ǰEt82c򽇅@^j4ʰ4Kb6mD>}X~H1<~SMW#U'e ~_?]; N  g0p6lݻ>l(tRo߁Vwm.'Tҷ^od`x6\AHX<ȀRKrYPz|59ff>3VƎNJJ`n9U2W.dTmp3iS&rVW){ '?\^^Ξ={%]5 iiiL0rk3tPHe4 N+-+ޠ۸7f~XvcX.?wO `y^Bܵ;h.7nXOayG<~ h EQP|xZ\*JJ+@Ғ\UUU(YFqztNTpdUAtNV~6nJhp0aQ51`ͯ>P욿a^f?,݈IV@+TTU!zlx)).r#WUr$ {y1k_o24TMx*+52R\T+!y]=zrŜs>[Y."b>bd DӉNorU ?0oPBZ4:vфYլLZ$dQ$ӦBQ|jG_IRbFamڸ[pU.*xdM~ਪ !ƾbi>be V篽gbAuy@ř;QRY{f ްUUUwv[]WbHF^g'72`MĦU'xF6FÂOjK]X)|F]1C{ï[Duwi߿G4B f@]jժX, 4lӇPW.OT|3" $&Rgq&  1i|\vݻ뽯jx<,X+Lwxd>yykI;z=E4ny %( 6UkbcAlF]*:FvjroA>|wf`@ZҒ" |^d +hG2+KpgȐAeTV9qzT  x1ZC;\u^A1\~յ|bVZCrJ ɗPCDdXƝRT^{#~];֣7^G]Bb \<k[ܭʁ$IUڵIc}iLFqU6qU՘k$Lbt̶0*k5t-t 2n-tLKF.@F!(+ L한kk8]y_o ,&o <ͱyEQ)(0|;q,]NL<T6 2~#5$p˱gtp&zK4ug\>"z$} VKOΙ:o>FHx* v9O[r wAC_{e UU儇J nXZۿ 䙏 Kǻ{U)/Hx6 zL^zO]]" ֬#U-Me+0?!!!-N@&dYkˉ=}M\d@6>WyMĦ@Q`܀?qȭ$ixݯǀ{+NUoon K߿WMPD,y aRR"II 7 PU6ѷow:[푪K qQXVAIV  _SrR7t#K.`ZIH'o>cv-fo={n|o<~NjmjGѶy=?7)[p;:+AXh;zoS tp{NT AVu@V+P '-NL{<j_ڵ Ʉ7?IؘBCYz [&^/+W_~i+5G KxTڝ zjP dp?e ?r<3~.<۸ٸ f z$H6pǞF< ވ DȃAD[NEݛɸ$kv^}56LVݴiS@#dՠSl$FI]FAA &L{S>b>cL^1^KW!lj6θq-/j۶^i(F擭?Qcx[ɼU(J݈ǣ%/, $u`P⨩U=JՅ)BRȊ&>dCeN'~u`0T Չl1 \X>AAVs9giK0N=jHLhXÍf a(!A_lӡ@퉈>y$i G ^YQN̳!7'S.q*k4:Stޗ$ }k:Z]y4 GbY+\dzsxM/1Y ؁1E/~M'Qrꯟ柋AJ04i3.4GP`̙3={]:wQ硪`΍,wҚz=֭#s@}(Lǻ} `'<ȢXAAĵ\ǟ|JNN^U{QhQv 'jjn:cT$U'梴Xm6:=YM+jߛxL ͝+K]ˆtz$2's()upykoܧpݱ~;tPf>EQNR'V4l6v{IMb^ wt7^}fEן^o/~?ʧt5dRx9,˨'լ(nft: mYHuS݉z&bj_lݵnY q֕SݛuUG%S{w Y<[C]@}]YY=Ë/Hw^:w%,, F#Be%t{k;;%<@SK;=݀6ğkם4m:V@7Z̼;^տ0qP^V @B];`0+)--tҤ!8AbEƋ`twA:em;]ta 6l7ONNN~-nJׯ֭[^=۶ԣ,f+kN|;I3 v 77;6VEP^^bi<eeeopB`0`Xصk}+Ξ={hoBeErP2$T1ۍA5>细8ڟ.^a،7ĤO8Z^Gi bIٳgb׮]hDeZd;LMh( y$.hIR' 6ͯnZ[MpeXdD-Wz:4{N~3DM   '3Lz5sc?wCbb)eY&$hАO$8̤KйKl@֯8{rRpP1ܢPk!vӷ혘ӲvM>+))ŋazaNee% w{鈎f[񐓓C~~>c4^sYggHKKkQԌ W_B'Wzz=l6mVUdYFף* W{kH5瞉iq}-^gk3ht8_ XQJKK?ٳܹ3{nRSSk+EX8U9c{) ;b M柖6xPf+Dz}nA;Ow  d6+[m-قO=SeTް\4$ACR}ۀ6U-@qQ>EEgdc𿯹Ǘhdĉ|lݺVMx(‘#G(**bMV2?VVB,YB޽h| 8&Mď?ȏ?]hKD^>|xt:RRR)//onKKK"##||'oSU^"OάYڵ+.cO$ ؙ] bϙ    g7-x.4m߫x16c,-@EE r|oߞn-[۪ MI֭ݺu7tCÙ4i?37onߌF# wާ711뮻IX, &i׮f˖->|MVb6$U|'T{^>}:>,{g$&&RH B˝"6ɱ   <(B6jޖ*nؓ{h= .1!5|=f3oňnB544.Ѫ `I0h4ЩȲLpѽ$I לL:munn.<< ~H$I`$ ̙] bϙ    gc٨,/Cף5)(^.7 kЩAZv-ڰ 49W "|A!_a#%%X-oV|cp$;KHp0 hut/$맲/+R 8 Dl9cAAAy,q)TRXnGQZVT(Lf3:`mXǭbO ŸݢHv6bN[n~Ԏ8ܭ]zCNLz֬YӬqGEjȲLPPP]kGyy9;vo`5SO++`̨^*ˢ~baؗŏI O?_>6DGG5&s7lTwXZZ%%9ٿ& gq+  ™(֠fUA.0|'~ZVˈaCxOZbZ>mrd7]t򭨨w;l6ƌfh46:s~Ï '"<Eaaaaz5O k:QvGdDz8j8KJDS[Yo:|BhHHڟ i( X-&SPXWi6E^P'}NvWAAAADXASޭ+I|[_sǍEpӘre;yn|-QQ1vhnF3Y̠WϞl۾6:bZvWMw%0o޼*~n D-(pIii)fEEBCCyhЯp$;w{o~Khh(Geȑ<Ȳ>徇a x'ILH8i;O|YY<5{a6_>EMfA|"'Œ˚#7/Ǟx[ByE%yeR;S[q{_6kzQjս1Bl6TU%/?y_E=>^sQQQd\ӧOgƌL0%Kz)((O?eڴiz퓶{ٰq#Ӯ1_V#<, EQ5ôet#%RPP}&98C :NW}rfT al޺XNAAAAAwȧ;AA=6l JzZZF\..I9Viٱk?c<̘1|X:s9.1?pk`&N={jt[YNK.s~]ƹEyqذ'N$Qrrs5,lJIII &&!eYn2C~FjPS nLF_ `W;wb_YWXYz_?zeEatկAAAAA'*A?P Y@=fB\vYo׎=})ziu_mMQU{}Yݫy^n7vpYg|r^/b\|v-zZ-ehZ ףGw*U4 t .`X,zqHՊ㦴udg Iw֕ BN)Q d258o c myE&}o9eevIOK AAAAAߠݾ ?LQ<YY4tÅsÏ2qcь3 ~}}w Ngŵ?:|%~ap])If;t / 7C=ĨQj+_z%&N\󉉉!,,EUy9lzmJ27nfsu2<IGGG( Ι]oM:9∎°lܹFkoj 9pC47e6X{}GǨ鴵c2   BcR;{{iDAh;/ߗ{Ov)*mzܶ2t`L& F JpPP塪*11XV桪*$̲_~!s@N'~i$( %X-zgϞw?qqHđ#GdZ ˮ]5jEEE$''ӭK=NCQ6lhqci6ؗ~$s珏Gn<7{NG7n#ٛբ8ޭ+yVKyy1I:d0~CW IDAT vFrnzb6SXTTVڿK']DĢDXAAAhVDƴ;ap8-A\dYfxp&=|wt#qK/GUT.>l C."&MBdx__n_Nvر#aaal֭[^GW_ѯ_?{=:wLrr28N RWEETVVҡ}*}7oK,^z?ӧO^1>>LN%B!B!bHX!(%Fɓ'd2SdB`0ŋ|rs?}`gg%j\],B!B!B$BRt4jԈ$j5+++s&Zɹs8u$$$pulmmVnnnؠǴB!B!B;B!D)eUZXXRjh488~{]aTcx̡CΎ^s7n~u kkRꫯҿ:vX꾄EgjVBRBA``բר}(JF.8Bsq=OnF1xmkޯsN @N8}$K!BXV*;!B!Bi3+RӦ>Rc^_ bR{1 EQh޼9m۶)uo&}CyB<%J'G]ƮIg<ی;~. %!!?3f[o$`zQ>JPPW^F9IhѢ4͉鐐zMRRYA)ģNVs1 L*~wdbH&:Vÿ@ogr ?|HRiJp&%ʕ+i޼9})ogr-B!B[?c\Ϲxv{yW7NʷF~#Gзo_4 .aY["ɎL8A>|f͚'%%^J```˖4h>{ҽrEtޜm~x:빗bX[@"1l0f͚U5jFݝTn޼yDLL u S~qvvwTT =K*==<==[[$ܸq<)y}Ug1Uξu՟k7gK.(իWYr%...DFF2x`+aaa3G۷ٷo5gBBf͚,[?{q ZFQFŋIHH > .`ee#6lϯN-XldNyѿX*zÆ yꩧؼy3jV^MϞ=ܹs >[nB@@|ի֭h4n:jԨQ9u̓O>:vƍݻ7C)$$AqF Щ54jdN}5ZS?7LbS3ؼdZvj'Tl/@TT4~FQLF2BB!B!xUdRa>s3#6YKWRR_ <ߜR SN%p(fϞ3kdڵntA~ ΤW_%44ICNe{c'ƏgEKMHII!))d@fc\x +++Xj%u|} V۷ԩ+WdјL&QQQzI,ׯ_W^ߟٳ3 h42|pƎ˔)SHNNGdda4rHٿ?ٳg3rH=y8pӧzyꫯ iӦl޼sqsی ~~7\\\XlGʕ+~+°aԩ .$55>}0i$֬Yco۶m;v 777EsS^gߎ;r=󾬧oߞB۸-6*syAogbuYRRRh&zm۶h4r)vBFFF M& _y}d~zHedd?n]8?'''f~z_` /ТE vڅFùs3)l\}-[6 !Jq=]O+'[۷~:-;Fhh(믿2zh._̡Cؼyy:N8u AAA;ּq{9\믿 Fʕ+9+v;:=h'G\ox/׾prr`|4o&k׮q-Zč70L 03fk>&/ڷgUgϾ.BUǏ{зk#,tL:u_vvvԮ];;;L&< ϟGۓF\\ׯ_ںw,YB5 4o+ߨ(s$h=<<̯]Zf9jժ &uV&L`… 8MZˋ\"""LJTbbb˫!D&ZsjzOtci;fz-SekT {N7pm^%UfѢE4oޜ%Λ2nq}sn޺EjUxs^zL1Y¥KX xVǎ& L,{|h4{^.2ժV֖+Wg 2{֬߀N[wL&Sk=w}!թF_hټ9( !B!Ũ83d CyO}sxxxjxx8@y=/`iiEZZF_GV/hOBBQJN]x;wN\|||rU'.. 3d&DݻW`EXr%c֭XYYoժUTR˗/h4O?aoo_SH`` >,;wdDDD_L&"##s$xzzHBB90z}FqQϩ 4C ޟY˪wlV |Tj-5hg2`\}gb9F &={tq_7bߟ٠(F#QѸZr@cǿL󶴴4bcptr %''O;;TIz)q[8; 29Ng]CGs7ln26()QX[8NLl,}/u;Wcv΀`tSq*>]R$,<[bckCxDGxe%k⛷nF=x4sCWؿw7ӓiRHIIao٢X~~ܾڹ3ӧN1~;c~ܱ7WWbbx{7z}r~޽ˤvDƛT*L&{>8|>۶agGpc4tm۴f゚Qܽ{ߛAly,.*5zgdpi:w# RLl\ >ժTn|j55kT'EEK=ZAχVERIW!B!DII7رcz Dpp0Ç6ڵ+M6ʊ7or<+j5k֬aСlذd5j7|ol%=$%%?WgMJJ*vB)+N9gl1uG{nEa䩩L6;5cNEasᅴO~5&<ѭ[7֯+y2ッv9Oz <==q-[/oٲB<*tsvsfMkv.|ؐ|}gL:;wPV\5kyOhZ֬YS~}||سg SNwƍxxx`ggWsrU駹tڕ>k׮QZ5M yرm4iBhh(aaaXYY";' ;>R0 槑I$*{4d2iT&OˬńJ&32nJT xzm_4<Đ 3{k?vI8Vo[1 j|Z~,]kHOO`eŲşkO6?`ԵsgvEk楎omΞ?ϵkHJJё3g0o,--IKKc_~:vh}*xΛ z!9+W p`aaC:#: #b5 g/؉*ಸFt'ƋI v.^$**dRRR9xy_\]\2'7.>V(e>B!BߌiS+;Lی>}^[|_9pH1Wa2^6ge˖\%:^^ .rk߾OqC |9תU{ժUc۶s.7o".u}˖橞W^%مx U%~r<]b~XzAnܸ+WPV-BCC7oVήoY^zEn_jbQ_RǎegXR xR]ZM`Z+[?I|i__VI'/:̜9&M~:eyn0^x~4Q̘>k7l`NÙx(RunM&y\Fؐ3Pxyz޴wY~=?+/ذi^+q㰵Em _ܟn.E|ZŅ)5z\d%o&&(Үd2kӆF 3 gw[2)B!BT4 .f_?uy3jjժY`^o"Ylm- IDATr$оL,("%O8iذ!;gϞݝ~Jwx{{WBT*wk\xreUxor]gƻA橀Fz[[[lmm3k&<h٢9Ǘa?}{127fʱSݻTn$&&͊[ky'2u^7K>Ykysgtw7wkJ['yf,,,eaS=??TGkd|kצiYhْrqjTN><^=zsnB/^DVa4TnyV !B!Rػ+; !GR倩y@u&('~=Ge!(!EQHOOҥѫokTt"oյ\cw^^9'P7ob4pwGӕɘ֮#8d?|yc?&<[ַ's.Fňml2HLLD1-ȾSNWB!BDߌoۮ^ ͵MTBqq0蛑R,ESZ舓cj5^B{>#"kXz Ϝ^زXXXPFRObgg[T*lm+7!B!B!DB]gϝ+8;9E4n԰T}o772P!B!B!DEBY:2I˴?!B!B!KyB!B!B!BDŽ$B!B!B!1Hc@wu#B(dddS١!J(>&h;[t:-j<(B!RS[XTvHiiDߌH - _QihоB(qP%TN=կ^B!B$]GoCz)TB!B!B!BDž$B!B!B!1! `!B!B!B!xLh+;oeX:}%$B!B!B!rO_ Hj+UO6'i@qK9<7n(W_}ӱc< !B!B!B!ģ49 ٦ G3[ G%h"B!B!B!B+iPkxI\IN=Eaɯ}gdd'Zm𣢢puu-Q111XXX`kk[BT+Wr⏓$%%AMpvr*._$F&qvvOj9y4+u|k= pүwo7jX١CDd$F#USZS jդwb',< PQVMKԏB!B!Ѧ.IM*r.0k+wΓ?}_aWkioѺuk\\\6~:ڵAlْMr…"|96mJFV={$::'/Ҥ4}F"A""#پs'ʪ*;ߨٽw_j qwfN7e܏rW&u ?vJh4s.n/WEBmN>57pҥB $111&)}·fc gUD!B!B 7s|w3m)~ Sx{2,rS9eۚ_ 2n!_qqq&Nz-[Jg&Ҕ͛7bŊ6lÆ ˵}ɼ=3 0t Ms+p;wrQ>}:'Oĉ9ߏFaٌ9G'( 6N:pBRSSӧ&Mb͚5/;~kPUqTqq8::ٟh$*:W4My.z҈ >G4EɳHIIat|ڵ!w0_?Fk p6K`tZ-; t+* {{\痥8$KFFQѸ8;쳠Sy:ta<q7ouV%䭢(MLĦjݘX^x~t}BW."d2g%B!Bhۺd޴Tc=Q֏me~ǻ}hY$E99#UPKH&=-^ͼt_L,cirNٶ6~؍?K`.PT$˿H|4o>̙3sQ~ox%Kfk  =>0(qvv67gL&W^,]HL&&((1x"O&$$ ѣVB.Bi!e(%%n;o#q @oܼ[ArșӓiRHIIao٢ǷoۆvcT>!qkL:Z~Wyù_VO޴w 'DEɵAF Pzu~ܾ1Gvz| NWksCy7QTL&z|7q8x_I1m۴f゚Qܽ{ߛ#ٳ_(ϟSyKt1G }ű/8SOuƷVSFuB/\DQY8ܹCjj*^^mU9vXXXODxx8NNN \=<<=ŵB!B!DEЖۄ9\qI9_vO$ =0kqv`m^J9p)WպY%7~B$p^ I9ːJ2/oOzB$!$pa_ -C?ݤZ~k/ϳiiiF]ZfU1Z"!!pDD66688]YCjj*111%^OX!DŹˤѴqBMAXjf9S`ͷIFog_2o"6^Y:ĺU+prtDQlxm+h4Ka;oN@ޭqfL+^yu2=uiƴ h}䀾}ظ;s[vMwavVT Ϗ&55 FӀdkW,M[ﰴБ_9222̷MN-[RׯyIQr*#=`(pzXYYxϜ{7;wp0O=[f֭\rEQ8q~~~?~hۛ;1cO0vB!B!Di%e7nL,YfnHݽK 53Ӡ<&}RJ&*&/ ~|Z%zmkk}S,ȴ_]svGͦ0Ξ`"Y<=򕫄GD8̙43s+5yz@Zϗq sp~?ZSzz:VդIcsum^^xu|SIIIࡿJ󫃫 ֮qٓEa4/ٴimڴ!,, +++L&GfرҸqcڵkד! `!B!B<:Rh%pQEͪΜl9Sv6, `R&,4*-{/$Pdc<0qRWߺQL=t4-T|7pw%<[aЌ2?k@Ybyۃm⦃~ ˑ#7s&T97;ILl,`'%y zy)u_Hz$3pom62e2f?bbŊIvMmNH1Qȋ5%5CÁ*~fTuŘJ]⧕GJ?b~T*֭c̘1Edd$ڵj5k֬aСlذd5j7bS!éf{>2+YT?\Q,ʵ^nQ9`Lݘ};ْ8*mj⣏WX 4`@=uݻt(z||LQܸ}6fXxD-wMs,d&c9…N_5raS\_h߮mmu:;}Olmmj׮k[TTs9~۷/B!B!8XOEɗzP WO \[֖swfcԬ>ņ1T&}撚BdO7P!&&www4M-hy h&>mڷBQACWGfS& ksxߊbggǢE8y$M6-qEN3ʓ=DFcV Qfچ}ڵsÞ={s7n;;;cǎ8M4!44+z S gEkDZjcQ үoP-5+Rh߮-J;ԯF!--Aj>xxcm0W$ܾ-j[QQWxD}iz_822ooonܸAKB!B!DEQ:˲>ʰ3iVlR2L QO`d pT~3n8YFt_N(!ؿ4cdicǵ8Xd4hTNMƮCK,8A՚-[_= _ yٵ--w[Ѧ{kgg#[\YS !xzO/像̝wė3?U*ΞYԣg14_?1Jr|Uoo bbc=cbaPp` 99ڵiڸ1Ol}{9v85W_eǙ3rIt:<ߒMYG`~7~WtɽtGU~m/B!BQ5NwJշeZ˼̯i'Dw5pÔ {M欂/m}Ĺf9{%E]se@{994lQԣf.E]g`LΛTp"25 /)AWkO>Yf,[>*U05B(q97o^ݼu{}>޻w\.=>6.{U53Njj*7ogRe_kUkov]Cah:xh݊r];[ YAIII`kg.*ނ8qӻwo~i^z\m.]İa YfѺukrw|W:!B!/5%\puslz)46!*'ڦ-nD!Tg DzV&~ =K4e[oqoʀ^l˫/c]ϖz_]kVIokVCI]f1qTBjU/Yޞ6-|agic3mROuoaaJoZD$kd$* ~AW8Kuw \A8qo߮DKez)*BVhkk%~Kz#N:>ժ{fٽwTkXgN/\ j֨^X*]j.KtOvٳ'߇|}}9vFFCǎK.oQ8B!B!ʒ)=V% //@@…Q6&gtt|ggyu^ETT_֚g_̬vTx{^9L=g7:ygNekgڵXr%6\v ($,BR]gϝ+8;9E4n԰T}o772P֎;y&5ѣ={}sN#ɓSN%E)B!B)akUk3zWUlY&Lh_gr*,bњ4ZN9v|2-[όgDݺeCW1friNe-kWdxɒ%,Z VVV|?I !j֨Ϋlݒ>ib'JŅ׳ajԨ#GpppѦo߾B!B!m@,^qü mδI }a5~ _.ͳ]^߭v0 Wޠ/)r6uKnа%ϩ`q.7{atbƍ˛Z1/W^=""K՟$B!DYree!B!BQ,nį/2q&nȑV* \AK7h@-ˤg%}I̚5ƍqF|HǙC֯+֭ښ~.^BB!B!xLHX!Dj|}}IOO'##EQ$;ԩW76l[X7t>SiҤ [lE 8DԔ"Qꯪ_h;qXء*ptkjyUǷ~%--h<==+%{]VVEIB!B!\СCĠRjժq $ "BGC$ZFc2$++K CmVҪ69;88( (•+Wx̟ōAQ)K_pfAwWؿg< u=4v.Nu<]I1CQRݳl|Hwܙ {nn߾J*ܽ{v1uT #R /._nAAAܽ{q1cƌ<ۍ7-[2fsկZiB!B!xLThxŊp\?{ ilWPPaaaʅ PL>1c`zQq=$߂iZz}m:]m)n[nOӦMc28<'O$##^DI1;lēTmˡ9qF|TFH ѐ68x`NīYMz-xKێݧh|\`_å( O?DVw}رcqrrbݛVZo>.\/Rf000@^x ?#7nDQƍy]'}%+B!B!B7o(v_硷 1:M!`Š1l0f͚dbb0>|xQQQWzzwQU[S{%H4B*()WQ RU,\Q(QAP"WH H IH!$L i3!}y|ш^?Fypl;RtOܖce8|0_|| ǏuЁ#GyB!B!BU!Ofɚ:7Mzh z6OtkO;,%11;;;RdԨQfcZnMFFs@@OTRZ5J=׵kpww7Lk2 !A 9sL-Z <<^zrJΝ`חΝ;WH*/='|nv{:~J얦aD{xgq{5nϦyعs'NNNiӆH}Yߏi -[ʕ+RXf g۶mf:B!B!Pa """J@WN_iԭ]*[i*_k8VųfvJ" ^֭[K\711Ѵm4IJJ2%r5IKK#33777.\`UB!DIlmmֶ/;SL!-- [[[S5k3ߕ8/L5}o*S 0Lۍ7&##ì770k;w)))kVåB!B! )mi@w+w=?@ Q''59i~qszСC <Ç3o<֭KXX*@:{5gٲe\^PVrŊ :v@駟e;ӲeK"##&66«'B///:(9}4? !B!B!*ؚ/Vo$%.=?F~=sLn?[pvJB"~vͣf's޼y̛7{{{ҥ ǎv( ֮]رcRs˖-(v֍&M@rr2WYfѫW/6mڄ``ԨQ}ӧC;B!B0ڻw/ڟx ػw/iiiJe߲e M4)C?0 hт:y>BT*WT\ 00țߏ35:۲mVc:>]֪/cTԧ>|;{īm?,1k,\?sssz899G@@%>`ƍڵbBQDCF.;MPcVcgPa&/R!B!B>o.VPNXVH8tyثܾ&Uy;uŋUY_Mjԩv/dԨQ~ooo/_NcȐ!ݻRiذ!֭3U`ԩԭ[FÙ3gP*̘1cZ׬Yx"Y_nn.~~~|<3V_?}DP5+}0nd{&$(] `k5r{6FTP*Q(W@RQ^L߄w zgkk[lX8}4-⫯*B!B!BGɜ GK -jo;O'{O>>ٳgc1c ]cMƱc8u>>>̝;6m0i$VXaa:h$**#FȐ!CJkѴmۖL?666׺ pKl7H SA_s !^Z=$7p9z)neՠA>̩Sc֭UtXB!B!B!D $VǏF!##,4O>ԭ[P_vv6^^^nSI7oZÉc߾}ZƍӤIbaq]ֽw,X*c~ܕ$BT> `!B̈́ R P &,, ƍg3gC0HBRaoiRʌO;bkdŧ玍֜(F1 {xP?F-|)9:ZyҮ↣Y_v^|\=UVMt=o4I~ O±v}e^'K>ϕدs-3/_|%"cM g秤oi|Eپ};Qlڴ~_:uX5OVV:OOOLGGG>^F)qk׮䄓ScF# xyy:B!x~~X'x5o;w.{fĉf۬/>>lLq͢|}}ի+V]vZI^^88މGmcA`4TJ8JQ$,B<ggg;HH͚5PtlWppbwϙӋ0 N6| k$IJSgS3H:r wHUf ,oGY31<9s"y2s 3[fD-M_S͋jсN̉7d㡝&'b0~BՠūhX:-MkE8$k4/|!{0qz?/CF5pwSr_x'j)N.\G2]sW.̂Hf^.}[=oafͮLcry` 5FӰF]OfN6<};K|->%ݩDǞZf:[GcOqJݻѣu7AMnHJJ޾ ͛73a.^R$))#FP(hԨ~-wcy<<<~::ӦM#&&'|<233 (3g2fΝ;3|͛zaU:FJ 76N#9Qo;N1]Ҩ|L`4b8QhBFz&Ao3IރoeMTmD J:y7-?)>y-Mkfj]Lk| d7:{6_pw OO77SLco61WM >^.?R*4}6ff =l.Ao{kV;_7~96|{ vC(Xd'3 `;0|[v`۱?xO4Q6CNY|}^\:,&7o,ݶ{ fO1 xS}RyPlc|p/Oo|s]a"3OIJNc4k֌ѣG3j(Of_~!<<oTMPP۶m+8̜9>(x۶m㭷bϞ=4lFáC/qظq#ӧOɓ{ cҥdddкuk.]/||[mQQQt:I !B7C;t[L5MѿEMf޽VSOĘ1c۷ont0iWciaP,IzrKQ~ `!hXv-]t)Pjj*qqq<]d}y S hZzvO5#"kReu2W5\GűL%;EڡXu|F;{Gbc { m @eO#:L {5cOFxk.vY@JV:ێHLKh4ҳY;]woνiX$meۉC,|kܝ\Sl>, %_{5Le[7 e7iZr}n+k}~JWӧiԨF2[jȐ!]prssٽɔmƁHLL,8Ϟfw,m۶ 00\[l"B!? L:`j78eEh"۷ӭ[Bc4iB߾}oG` ٳg* 0zR3\k%ux?.QN zD]. !YQIc8)!l? A>69(9OM/l6nJN%Ie|tO'%.*پmog%*N%擕W5_Ncզ_$r!J>z~4|ʞ3wuM5>?%]k+GgoРAP(~f :yq nJ``Z|||'O6?(hܸڵknR ɅƖfIg!B!v*viuTTQQQGȑ#h4RVBOv>k͟?[4ea>k EU\(tB!ѣT*KT!'7 $dQAO(_B!Det#@ 3ɡ_~瓗V5^~{_+m{e1Ҹ&@'y]$|ßʹ-,5 W9vP0D Xջط'~{cm腿ȻDcqupB2O&U!>5,Q @B V.}׻G׳ff7mcϾ._IOaC;dٯ?>tajuKÖSY97s Н; @zϚZS|ʓM¬~Sq[r}%ﶡCl2l!CLt҅~`]ꬬ,s'Yz5N[9xUV%>>v;;;:uno'=tЁ#GZ}B!B!L*Jw~n˼EmZOw~>DJ'}64E !V%::9s[hAxxi4ncy~*o6=Z֓20=g~Fϙ&o'B{ ߽۠OʊiUT3[9i&#>zQONRz Sz13۫y{xf4mYL3 J>KMHjMfN6]dBV\IO%=; Rɚ]Xm,x/ K>?]m W^UVԪUˬoʕ1???IJJbʔ)˽z7ޠM6TZTLB֭MshтAQ^=gXd f͚dffҥK&L`{8KF.;MPcVcgPa&/Rqcdkg-IL :=Պ/mӒq)\d$)=W2MIڍ TXw_L øKuBcr4yħ^ bJi߫7y>]K gW翔rW2[v}%Ү\vիco_:N#..///Ӻָte:ܹsԫW˶B!Z}̅31ԪjvikܼBqb5_bNWb(f<[j֮g1?oM"%[N_j,[IlwG%:#9goAYl}HI !B!➙%ժ tZ[KvQ(K?t/uUm [{n*}TtB!B!x%=UҧwP^g٦l//c {FH!#]͍_\`F?b:FsaF_FH)u^ HgbR /^yҍ6ߓL?HC7DoCB!xrb-]'B!B!.fwUZhzN~8)mJؤh7M2n=t.}(jjڱM|\Tq:Umqw,}j|C(껿5Mm࿙hoe}s dJ h!xt[xI!I*U/k"'B!B!.M |}7}o`$_[Z;5Z#6*J%0fO&b4%"'ȍ;dU)psTZO\ !~8HlZѡ!B!B!Д+' J)=pqeQ*arTYR)zm+MWqU,"y|-KwғB!^U:77gϒ'Tw{%$$pelll ťBb-O;k+,k./z @Vwr[!B!BxxJ@ h wJYW9q:ί~:C_q21]ݰQ hhktĦhݾs\y&/?БMr7ԷjD+qRB(g\zm۶dM{/$##X{=+$Te5L=rfQ=q۷o'11Ο?_8Ŝ9s8p@)B!B!xe+s h^`dc F:nCft3cۼѷJ@=yjoSWբ Fxtױ<;gƄն]{ژN /5ۦ*P*B!Cc„ L0gy_~7nm߻w/ݻw穧bРAԬYCU`sFc2͠գh1꭯̱}vȅ شi< V͑O\\j֬IjլK!B!8piԩܹ &Xu)+0aШ;d@h>^Q<?j՜;_^78)vV}yegWB3*_&f*7%nJ3 u,UQ%Tbڶٙ7ojYIhJZmOʏc>]uQfGegOjv'WF홅wHUto^VɎoP(۫?f###ٽ{7=znݺxyyMnHJJ޾F͛73a.^Rdƌ|TR4zҥKqpp:;VZ1n8B!B!*+ ]tD}K֭yW-[L{yZiǎt/Q hzȗF#R1r)Mg,Uf|]'_!xUT ]z4h`j۷/cǎlڴ7nСC 4ҞG놡'x :SHѝő϶۰:=$F4ӅG|\7=z4'Oflܸ`f_~1o8p Je?ǺwNRRgϞ%11/>N!B!qo''S{|{O߼y/i֮ht:+KOjjř-UN˗*o.GQ9=c.7(Y^`hBAAenFPy߃BQ4 k׮K.fOCߧ)))dggckk`}ʢv׆ڐ#mƕ%\xrpOؗ;ȊO/^AvrSҪO>ONF5j/C aڵˏ?ݻMmڴyZ#-kaVp袆F}/kbl˶|jzqsPʶlsyd8lll0[PB!BGURL}j JJcÆ ///iN:ŁpvvfԩKF.;MPcVcgPbhdݺu\~CVRA1{lvJt\'''""",A[jJM '*(븑NO ~ j,ކW>##.ŵ&گ%7i#K dcBĽu{6-ewqbbbUR~}NȾsΑkVn1 !B!ZM[垕wFUDhz긹ѱcG>3Sye???bbbLCCC a…xbϻl2F'+V`0бcGV^FBfB!xd(Jˋb9}4sdB!B!DŲQP[eUVT(UYqѤI{9}׉ё~RYd u!88mZ}nݺѤIBBB:u*˗/__~96?vu8B{BeZX!B!{9E㶼z=/_FQF lmmK=FK@@@o%..իT9*ټֿCB!B!B!D%RĮT*jղ;;;իwOupp9(^o$9=B!B!B!B}܎OMO@4Rp%Cdgԩ׏N:UB!B!B!~(j*FQ'Q;6LsdϞ=4o^~k jm>!AٳdeeIhh(*l̥KHHH XƍB!B!Bˢ,N/~gJ5FC * Rob0Ϭ ѪuZ-TZ\NHH{{{K9^ӑDժUQ+B!D!ϧJ*pa~7^xߟ;7&&&]v1qDI !B!B!C 5I`Q-FƽZm̰/xƂu=sWV 4G_RIFFk֬gϞEckGҽ{wRSSMO?4JaXd 1i$ b:~"//~:w\B!*„ R PpՂ &,, Vˎ;9r$AAAF>c=J6m*8r!B!B!e,}ׯgժUI|jſtי<5scKл:~Cq}]FENNNFih4 2qqEN>͞={L7 f7o$&&2yd"#wc!JI4',q>ͽرh8UH?~<>>>< ve[ &&X^}U4iRǽx{{ΩS̒B!DeuURRRhР07̬B!B!B2-bkڀS_JvYI8losn"Fn,ԲE`V*TV$ڵkcgggj/g(He-`!FaڵtT35kd˖-t֍$pwwhB!B!B 3DDD:Y3ɿ»uo:v_:{Ld~_bMFbSRRLہ[RR!ģHӱzjҥYСCٺu+~-qҥ T!B!B!Dy_S:W7Eg 禮Iܵ9-[Xb;9~OxbS_pp0-[$22lbccy-z_B!`0vZ0`@~gggy&OLDDiiiUB!B!BQyY&I'=f36n>OF]P85~YҲib֭M4!$$S|r;k,Y&!!!w\t ðapqq !hdЯ_?Cwpzz:SNC-**d!B!B8piԩܹM0CdB*J@[H9<MԎv.^<;pL ?<̩m k^SGݺuc…QzuL}G1۪U+# GGGfΜiQ۷o7;ӧԩh!V%::9s[hAxx8Ǐ~CVc40`~~~B!B!Dq'*vEn//[W_5K]FF[l1݄o;vн{2/'j j4Tj{>>رLMt_ g6 qSy{+P^=,XPs !B!B!|٪T( JEAW0~WRb>|)V\I\\FXd cڴi <ѣ1bg&;;.]ͩ0  """ܹ3/FлwoL_]l\[lx{{3c "##M} !DiKػSx~L-B!B!B;5*n?@T*jJM%(KH^^6lo߾xyyXӧOs)83SN/rٳg9q;w,x/vvL>ݻW_=|_|oooÙ;w.FBQx!{B!B!B!Dbn'*%v6bVZ\?ӲeK,X@v'ի;bb"xxxjԨF!55__"1m;88^eB!B!B!ވPQ*Qo~VP+ mʖ>[[[BQʶ"B!*;ƞ={?tƤo>=F(EE7o{0ǿDjFS TqMESȮ]rJ} ./ӼSN5ۗ{)--޽{ӻwoVZUܹs,^+VpƍB&LwL4A+BHWQبn' JB$U lp||g ??s2`Sry|V.őB񈊊ĉc>#L.]⣏>"11Ç ۴i͛70_y\T̀ Ⱦ(*%MQ\H15K󗡷\ZtڢK]LPмi%&xQTquAa`@M~߯׼3fyV^ͣ>Z 7X#guƸRw}0OQi ťuOmJyq Zrq%==RRR Ջ_~NW8PSM<"^}U R;w`h?өS' ͛yON>}ؿm!BzLF j~[Y۾*5 4?;EN0a~Yزe VVV9ZڵkYb[ۛ={_VqF;<<$eee,[jyyyhZLWƙX[[cmmm4^'-- ''Jmo|ǏAӓq ZM*t]iRe5rSbioMZ5ʢWEד{ NGg󡴭Q-5ZD[JƵ+Y5¡m]+ KEb`Wp G#_ƱQ4>ٹװh'T^\ZBvuܝ\kB5>~+3nM_Ubcc;v`̚5֭[+y*111 4VWy&F0Ǚ7o}SzMw0~dee4tTƤIO())Ϗ/`ǎ3/!BQ33FJ| teJ)+5S[#''K.j4ܯ_?Μ9cPG\t*5;vHrr24lذ6 lw֭Ba !?^T*̔/ Z .Fm۶) |Mvfb$%%1p@py|ϧznݺœO>ڵk 49<:ڶmV믿rs /_5-ĩMNGȥuW!KQT ox]gzpgnf #s aMyl =xd9sl?}N[ƞ98Qeeh5;޳~M.^̐LAqj’bN-ߊU#R30|E%}`LLJ֖U,۱7;' [V ~RO#3u$|<6s<<[s,m)[e묥4,@3G3oymEX[Wk<g|AN nX;m> Ml!׏7ՙ1cgРAiӆ''J ƍe֬YҮ];mۦ_{ԩS;v,ӦMR'$!!lBBBhР#G?6)ݔ{uߋ/2d Pr)j?Ν#33^ʗp#--kҶm[HKKSSN% t.\CXjA?Xf +W$??*0wڰaݺuUeW?Naa!;v/)+o2pksx  9tV6 Z,Tft:=.~^-xr[E7n~ >n3[U;8Y$~K@oq) i8RVF.ntzVWN3LWfҹui僫;(6fTQ~؝/v.m jуߴYù"g |:<>SW-"8tvo5y|j2F3@ʪXX+ɜd|^ϖMMSS#.#mm4lvRF+_nM_uf̘֭[4iӧOgҤIl߾Rnݺq*jѣG9s 锕xb%M,-]VVFYwdd$iii888ߒwMI^ѦG}DΝ_9w4טoɓs [7ϵ.'B!BIf !NVKÆ Z-ܹ??? ~;v,7o&$$B"""\ݻ#==^YpR~ƍL0:O[fN%%%ݻ{{{ƏOTT/AѣG+M}_~ʬ;@P쩹yŹ=]vCKW nf ew"mtg#UѸS E1|1U^=6M GMX7tE~9{.|j\xӣ?p9 oaЊ5< |Pn#kc  K+y} q$K/~R~h&J%-TqUJ= {kBԑx)xAn7DY6w{gTΔPtT71yĉͭq=q)KO4e˖{ժ0^ҒӧO/ХKLnb[ 2sL WT%::ZB!B!$`!^իٷo¢<U\\iРFƍcѢEk.<<<ܹA23 VRZV=ixPsss Ƶڟf_X?FVKd_oP}ZӮfѱwνV~0#7V[ܭۗCn`fNqiIL uMOu_P~텆J2X)vdeeչ0^7넄pu~iV\Y_mlʯ<ֶv4&?t$B!BDB!JEӦM 3gl۶q)izvitlf͸tRm*$...T*iҤ P>+PL۾}wڵkVܲ;^> WNq9l'qE1JTwQUr *4s$+"73oШqcom簒9$O&(|AzTߗVTU}%:c%󟱸;p9Fu+y/w ̜ۂnb[[j6l?m1`%ɵ%9rj06>.588Aa*oj~0SƧB]oSצ5XL%@\[W\1ƍ+7hЀR\X) \+W@PPAAAwÔ맺]|||gnڴBZ-_~%~ zzzO?pebbbI{M9BQQP续T*Fʕ+)++ҥK|w\s=WrB!B!ĝdBQiZn͛7iذ!tؑ}*y_|… )))K.?P^{5}QZlin:}Y7n3+J#88sңGe+Bݕ:uFhh(L2+V0|pZhAnn.AgnFChh(<eaЀO3Obӽ9Oϧ9wp 6m؞3`u윺czVJj0r8*mq+Pk<>Gµ)пC7v3 tukGh|1 xs*{C#zB{G۴[3u/ h: ^OV5k/3`݋{W]j_?iѢ/_ :/^LPPM4͛< 6瑒B۶mk]N!B!.אBGH;cMƫm MʗOQQvvv4hРR^իXZZҨQZ~1[E4wi\)OAq9YnRwƭ|sܥ ڲ2.dgdcCazelZשà\?5ߏ?HXXx]v7oҼyJiҺuk4i)###*NKKÃ~ OOO53|Bדm>>L6ÇWgig}k[AW3Ks6/ZYcke]N6vwkuuXYXҶ~m6~O0hhģ{4sهAMs7L~jٿC|:qt++>{ulmm5BÆ 6lK,!$$gy ]RѦMW\IZZ2=993fHW!B!=!`!B13g|]3K:GZim`TڭB}??3f<.<Ԝؾ}{/_'x'x !B!B!٢g^s#'MS2B}??!B!B?\L!BqOi4F_݋aTO!BQ0j(W_}?0iBKf !B!B!$J.|ʴJݙ9sAN7n ::Z{ƾ} 2N!6 !ԕ+WHKK͛닙 HOOGz/SNh#332:uE;vI&tM9VTTĮ]077ytpUT*oJJJpӧO畘HQQݻwWz"## MivNNNӧquu%88'''ɓ`ccȑ#qtt4y|LOdd$YYYk׎VPjVŋ;w@RSS9vCE*_2hz聫A}-wݻc0{ڵ++B!BJFjANiԘi4]oݺEvv6wcb*jѸqJiV VKFFM65XC!n'w!ڲe pQbcc:u*}a:x ;vaÆ澀 IDAT1gΜ>"<<t:K.eٲe ><@BBNNNxzzk)'N0|pZjٳgsrr1b{‚DZjELLuΜ9dggӪU+._Yf cƌ!//#FpQ:u_ͮ]k<8-ZįKLLdᤥ)+СC%ϠA!00[aDDD0fFͥK'>>777Ƨ&gΜ!00OOO:vȆ ػw/K,L}fĈtڕxbcc9qK.e,[sQN:իyL:{iǎ[9r߫W/^x?B!*j52wW^yNff&cƌ?fڴi$%%1m4fΜ?κu눏gĈjz=yoM8|0'''GfXhIII3ydN>;|JٳgұcG:wLf͘0a%%%oҩS'pqq6l7n4:'OfFӅB!' s34j5FFQ4%lan^뺭(**b۶mIF'''sIΝ @FxWklgԩ8;;ɓ' ~1ŽCAf !XJJ ϟ… 8::ҵkݥFrQN|RXXȺu={vy8x +V0Z_DD8;;cr? fϞ= p,X`G*? 2Woھtlf͚)dɓ5+==zx駟ҵkWlmmIJJ~q ر.]Ν; `ӦM5JYVmҤIxyy{nyYlwf8p:P\\/6ۙ7o'NԷSUqݻwgժUKJ2֬Y7|CϞ=IMMaÆt֭-[jB!B{#ihifFh>cZlɺu裏2e <K,W^UȨQkl6lHII eee^! w!tjiذC/`ݻwJZ׮]?>Æ #++WZ֘~ >:,_owym,\;vOƍ bԩdeeر#zJg}Ƹq Xj}YYYS.>|8zҰaCwn xYXXhO:]mOzWZn1KKK,,,;w.=3߽O>ɗ_~Y}kkkT*lyOď;͛7Baa!߿(_{ő^g,\P)qF&L@PwIII {U붷gDEEFlƍ+}hg !B!. ӕ{j4f-lu y31cƐŋ !33`a&Mpu ^KF՝m!ݒBQy{{LXX۷Aw龺}I T N8t{Z6h*_7 ..˗/ƬYL.~!۷oښ;vIMMeԩ,Y8ŋAjj*CѬ=z8z({a2?PyU6upp@Ry&rE֭[ʱJGGGƍcСY2v~RXX}Yi/#GQ{'cK7(sR>}:Fb,_ࡈ4<==tNzܔ[n׺uji>}Kw !B! K334j5_53u eggG߾})))Q_4n`%ڶm|y:v숗:ub„ |Yfe9r$jkײb ZnRQ]@LL ;wzd h!*(( ==֭[Rz*'N @SVVFii ***BR)7W$$$pqJNjQKK?mt:.]JXX666ܺu[Vڋ՘X,,,Xn߾2l0 *h4̙37x)ST,--#"".]PXXHLLL|r ~j[[[OάY9r$]v}=磏>gϞ\r͛73w*v|7n3f ** '''ku~RXXaaai|Mŋ}Ϋʿo?VVV;vGyRƍc(]]] d|h4HLLTy3f Ǐ^]vo xӦMIMMʕ+0[XXcf/_N^^6mbɵjӧ^^^ts!B!L-]eRJ V~ҥKhZ<==iРApуK.qyݱ1o?99֭[< [U6lUBq;,BCZ[2|}]E߾}󻗞~isΥyL4I9{ep}h4BCC+J\\]te˖i֭ԩS4n___<<<سgܹsѣ>>>kPGn ggg_bhOOO|}} DDD0vX)))5"B!B'Q"vvv=nV+Y[[[jཀྵ|||6mw޵.ooo_ fff&-ng:t˗W3$+B!BzOB!ZѴm2jԨ?7ރ>?3g3f̨6''z#B!BHX!BTͶ3yB!B!B!D{ƃ>ݾB!B!_dJB!ğȩovB!B!!% Bzʕ+qM5iXXXș3g4h_-ZD޽ݻw} &d*]& ~iҤI]Xe߿N:F!..*tԉ-Z(ر&MЭ[7XQQvܜ͛ӡCz8իWQT8::Ҿ}{())!**cgg3gʢO>5Ibb"EEEt]9뉌$ 777999_VVFTTOՕ`L'O #G1E~~>deeѮ]; ,Q]57n\gVS!B!Bzj˖-bccѣGeԩXZZxbpqqݻ_| >777Ƨ&gΜ!00OOO:vȆ ػw/K,L}SWSfff&Miǎ[9r_S!B!Bz+* @ ;vL,Y%>*))!##;;;e`-ZPTTz> MJ#7;4 WOWZ};fffر 0k,pϞ=aРAqV˜9sSݼy{OСC7o^>DFFݻIJJ }͚5Gv㏙?>aFhhhNC _j {ҩS'5k{ՙ8q"_(|i<&޿›r5}jVUf_Iד#VVVieeeW[vv65b4 0B!B!D#~ !TEʱ//ڨQ#nݺ^IHH ':]yŅҺuk&L@aarZ-_5O>dWueS0]5/s:ӑm6 UR6aKLی3x饗`__~ 67ͥ'''嘙&CHdd|oߞ .3`nܸQiCٳG9gMcÆ L2 bRٴ4Ξ=\7j!CvΝ;?l0+VRcRn?cog%##Mv ƍ|'899ѯ_?5kƢEHJJݝɓ'siqww?Wʞ={;ҹsg5kƄ ())1?((7|N:燋K }UM<իW0B!B!xIX!RRRa͚58::ҵkfeeqڷoҥ ܹS9i&F,uDFFgΜ!==SNXƆ ֭[ه8~8tرRZsXڡbN#R.<}:&Mbu֍srro߾O?d0)2339|0dРADDD͛4""gggkckII y뭷WTTW^%33__:}%͚5Sɔɓ'j쟱3v>_&MLxx̟?~sΑI^#--kҶm[HKKWO:ӹpbժU,++c͚5\| ?۲e ?)(B!xˆ{B|8zҰaCwn`Vh moeon+-'\SPP%̝;WWW^Wʕ+޽8|8SZZzOh{VW]Y6DV){괵ט=CCCqwwGRU[YYooChh(fIGFFbeeŇ~5;v`J͛h",Yرc%0aiӆ (8dxUzNѣٳW*?dȐ*upp@Rcr{ߺuKJGGG)..‚qaeeg}f+?SrƮ/T*OnPB=S IDATҪolznnndffV{UbjPZ!B(ߒFs-'4j4^ήsn";;GGGŔU_\\Lff&nnnXַ;iZRSSiڴt6mJ ȠiӦIGQNB!T*6mj`tl޼+++F{woxyyټy+Wzj8%K7o!Zaٳ|fR]vwRgMCJ0*JgggBCC9sImw^ZjEJJ )))k׮2666tRT*+W4Hoٲ% xhee`0C^*Mh bϞ=ښvO?\v[FVKIIGoMcL׮]h4>|v=⍽UwqwwG׳sNWLL%+*?...T*!+;;jǔϧB!#3My(C[n52Ń{W`ذaiӆ_~iӦĴipwwW_iӦĉ ۷7ޠe˖Ջ8ڷo̙3/ M4!::Z)ӸqcGqۛ_~mңG\\\طo__QHX! 8{Lz*'NPfznJAAÆ x7իWرc*`' {UǏ'>>ʗǎ38^rb,7ү\M3G1>jrYSWEXd @S[n5yFdll,[˗|r~mKAAA9sxb!O (S gpp0~-ǎr%+^yyyJ8rSе|Gt:ؼy3O=Tk;>NNN73f(4TU0vquu%00 (?;f֯_\VVơC oڴ)\rฅ=k֬Q޴iW]>}B!x4hPTժ@+fj4j>qI֭[9~8YYYX???VXAZZĉ"99TY~RV#>> V%22_~sαpB&N|jXtt4"==ӧ3cƌZ~BQiZnyw?}ʗJHH… ^eP$448tB˖-̛7=zСCƌСC+O?eXZZ* Q4 f]wak.l Vs*;<ݛ6;~,; | xzzJaa!/6Hq4ro߾T*Y{駱dժU&vZK\]]qrrښٳgT A8|0>hc888(/%mɼKݛMݦ?~)ǎٙ͛ӫW/Lb4mgŊxxxФI7oԩS+-y\=l:c /֭ԩS4n___<<<سgܹsѣ>>>kPGn gggXXZh'}>+ȒqB!danFFѠѨ i4Jܼu[YYQTTĶm۸z** js16l͛qss#.. ԩS{*f,O2N?P>F,ᐐN>ܺuK.R1t`ccC&MKrrrvZ»ݸql7o^`Z .TR\x[[:yY|}}IJJۻ !B?⢚WӹIlc̯g^uV,E[8UR:lyXUadEA4qI]᪕Y}-5n[B}5nTR9 ʨx8#AkfsoXz5'Ngʕx ƍ;v,.\ ;;;ۇ=aZ[n9֭[7^z%^|EZjŶmׯPϐ!C+V`ȑ@ :tI> !j/&B!Dfmmuchٲ%-[l0ܹs?5}(>*붶jȨy\##O={CߟLLLs|u>X.\(_!B6TYFFwgcL;;iRƍcܸqܼy?QFP3tBB۷oXYY̍7) vg/ZիW5/ZXXhmSs0BI !B<`^{N^C~eތ3>Fcx=^_Mj|xS4B!B诙 %e{52 lfj\sCHIIAVӶm[lmm0`_~%jB111FFF111| :ƾBCC ޞףR4uuԉ;vOSXXXi[7R\\̌3# `!B!Q^Rh5vB!B1ejwoF f'9r$X[[sU ŋ3c >cy~GfϞڵCV3zhjkȐ!caaAzz:7n"~6l;wDR1uT4޽|I !jEB!pCzB!B!ZzndJ%xNFFyyyxxxՊ={HJJm۶4o\NII IIIbii8d`!B!B!B<ح-cccڷo98::ݗ:һM!$B&*++T󱳳Z !B!B6l! !MTxx8666X[[s)"##3g͚5UB!B!0UV5vBG$B&꥗^BPR9}4zUB!B!BO; !Tܭoju˅B!B!B<|dBф%$$HRRCr!B!B!,B4a*RJ%ƨT*ʅB!B!B<\$,B4a^^^ 6ٳgSPP޽{*B!B!BpBP(]r!B!B!I !MPAAjrrr8w*B!B!Bp2iBaxJ~|,,,(((k׮ 0VB!B!B!N2X!hlllx7Xx13g;cǎظVB!B!ƌy۷ mMii)+Wd,Yy.\hj{o@z{:6i$u/^ 1X51x^}U"Z;iTY~U~уMB!Dfmmfffu*B!B!xR}ARSS փfݺu1c K .Gk˗uYYYcDFF2rH^|E͹3f~3X51xYhQ\bT*eŅ$y#zB!B!B!x(ԪJnjT*IMMɩRW_}U۷[[J奥RŤHf> 픐>1Ftt4C ! @sJ%W^'牊b۶mz9Zu7]ܹsݻ7III;???v '$%%km4x?> ZcjEybjnu|TRq;/K Hf !B!B!⁧Rɠ&a`}6q:8~8۷gxyyuV^zyf͍#FСC^yNbsμkxyy1x`fk@֭:u*eee|G#GՎ)%%\\\}M6e9z(kc֬YՎҥK:tO>D3;{{:̙}رc=zƆUV2G$B&*++SNqAΞ=[훚\t>Fp#N-s6?!^!XC,׿~駟~znݺ }4 9>AB!B+W5 N6Vrm^z{{{,--5ڶm[e۳aBCCqvvo߾ZKjȈ6mpjj5cƌ!$$SNEII ׯ_ԙ3gaaa7h1[d7oN֭SD4%%%8BQ)97|Jbǎ2p@ҰE;źS\\Lvvvk74nIvCxƣRP(uOEAVw3iy9Y`nqڮ5eR nVj[Xcbj9fePޣLB!DΙ3gb޽RL֬Y!J*Oȏ! #Q… O,/**>GO?)))6[[[ @IIshhhfQT 0R[FFFi111|͞=7|jSx'Yh@w' SNرjGMVM߿?Ǐ:ԔdzrJ"##2eۛN:i5.))a UN{oŮ]XbEx?N,rON|V026)N)N?DSf ؿU1߾:g۵rî+ei1Gz,I !M &&&bǎё6m c}|3=ql?k.y|86n'IG_=ql~CB_?feid~#Rw}?_lӦMbcc[oUAqq1^̌g}~Rӧw_ZZc=FRRRׯgLJ]͛Kݺu 蠠 چ˫]ƹ#F8K !B!Dc2R187rY}7r~>#EFFF]?OOO4/꒘H׮]ߟɓ'_h}3df]++Jmyxx0{lҥ cǎeѕQRRɓתּB`HǎС {NvW^5YU8&L/Ri%SI>}h߾渑aaaORmRZ{._xؽ{7ǏNfZ gGڧPR.woV( (6~< LYLgBS#p6j'aN%l4:j, ʼn#{UB!&| 'sxH?ė?]Ɏʓ/ѵp$O>$3}tZjU۝ڴiS޷mۦo.xxxpEΞ=7?3aaaDGGkյblذݭ.(++c׮]Y2rHN i]* wѣfi'Nȑ#Gψ9r$#G$$$N[neN !B!Du_R\ ;hKǮ= VT5{bquuںں*JݵVaŊD۶mi޼yeddΙ999rq:u׸rrrssZXRRBRRJ:Wm?ѣF@"""HII,_3*88_h;w+WVX{cSZ8mh^B!0///PՄw^|2ݛD)))!11www;tKcP)k@ʑcjyQ6J5޳jZpV6g^^4oޜ˗ccSMri( Bsϩ:_C;;;\waȐ!U>4X{ !B!aÆոR>[{1,Yu<55~<_f…>}6}_ұe˖U:6wN}ǍГ$BGBŅlJKK_~ ??۷o/0mڴ&ic@jn&g̮V@crss딖tt6m!&&˗p>>>՞w :D~֬Y 1b!!! 4`3]@TT}B5JK̜9s+ ;wGndO!/:=g=m~<kwĉL8+ ^x /O?_]nueddij>˳>˵kX~=ӧO_ӳ퇆rIΜ9vc?RB!BqOc)П !MR~ ?? ڵ+ hI3S|W}a`ؘ o^)s5nܸ16mSN*~!8;;ϔ)S1bDz x{{ݾ!̛oɒ%Kh-Zľ}h޼9{f…;?f͚i6.]ST}vM.B!B!ʼn#{,!~8y ~ݵ]û^7U[nQTT-fffzQZ ɽ%-7Hy'GjlllpttT/111Z3:ٳB!Q\T9bhA{KtPa !7GB!Dfmm3;wK0w\룮 :tЙ… ?**>LB!B!Z2X! B!B!D9,MB!B!B!B&DB!B!B!BDHX!!r2?a!B!B!PB!,N:9{,JR<99jm:H`mk~7{N[6XC5X Vb ڇnXxh8]W߷CNƝc90gx_/ {.{^Jھz&mV1_|u=y W^Ջ~UY'''Ç3|*PfϞy7G}B!B<,X}; JKKYr%Gfɒ% .pBWMHH`̘1ZrrrسgֱI&1|pxbNHH⭉ꫯ!ሯH\WcDI !MTxx8gΜ!++{"M޽{ٵkpܾY/sb]6x=^sw~]'zE嵉… OΝѣVuO:ŋ+}_߅9}^]7.^ wQ\@w\)aDgغv\8I\IG8~@qW}of GUC***ap>=ZeB~gyNu7o7X߾}yWPT@!BIMMm0tZnaaa̘1… \^PP@iii幹DDDpenݺEVVXtt49_|Qs3x7ÿ_CߟE5|(--FRVY^\Or1GIc BK/P'T*1OW^:{o߾bǟ̴#uQFF-Z[0:A|==Kf4knu|5q==115:^,CYRVe6Z'bffVLVJYsi/+69VIMMٹ:4oޜ͛WY*5P< RcG?8\̛yb[Ր U\>|W}㷴9f|}}vt߿믿~;^--Z*k׮ @Yf:˪OW}ݻw3*!B!tQ+?)?dffboomqqqNjt,][U3vpqqLh B@@*J^3N=Ŷm/;;sssW:cKMMIkט;w.{&))cG׮]SSӓ$cOPP'!!AkL ccSL-zP*U󲱴jQLB!o J3|͕Y, ZM@bfu~eAkpڀU$_Hל[`?9|t:OglbS`ѠO4FǬ뫃=>^+(J;J_rΝ;ӻwoO6my&1112}tpuuՕui?~8nnnߟݻs5ڵ+ݺuM6L<My~;wfYߐ #a;Z'9T)ώu7C?q^j֔+_z^[7nLڴi[o'Obooݵk:t(L$EL c1b:t蠵IJJ 0x`\\\8{,Pd nݚSRVVwѹsg^{5|X'hժUqFBCCkjsμ[x{{ӻwoػwoF``&yzjױcǰ#??(K.mۖ@4{]i߾=ˋ[jÖ-[عs'?3gdҥ^'}%>glc:3SXl%Ϛ796&7K)}Ağsod]d'SغuV T0 I !MXBBwf͚5WZfgv fՏuu%h3 $jԨ\>#C&/6[uW u컈U KotwM]Z QU^[Uxߨnܺ^5sx9:ߨn-yc_̙3jގի֭/_ҥK<6l 113gΐYFV3~x$>>ӦMrNSN%##XRRRfƍz_SJ;vp1.]IJe˘:u*_W(U/ a5kqMm)JvETTiii̛7OuM8t={ԴLTTO6l`ĈXYYRfРA\vtJ%׊⻀L•+W!22RSw׮]L0y1yd"""3gNm~E򠍧$]< 2RI=I{ޘYVr\h !Rq(KM}νVw'xw{ 7r()Nʊ%B&LRQZZRB+AҲeKeӦM899ۈ?Fbt*ÁLGB$p2b%w>8W*-_۫=ڳ?87uЬc^߼lj_&]twlSS}7kF\\ǎ{xxxx*pY̓L.]4BҶ{ᣏʳvvvL8]v/׺֦]W.8|_Nfdzt[]-|>uZגYX@.ЭO䪻̶e*`ڥrs k]^A7~xl¨Q(,,dZ/IݛhΝ;ǭ[&&&#ɉ4ju%"""ppphȄB!"쏟 N6Vw$쏟}\ڴ[ӲeKt~\3Ǐ'66|9z(SNu50k,ͱ3fo~43gfVQX|9jqܻq֭ 믿>p~G.^ș3gطoP>uҥ7h^9z(VVV,X3f3fy.^ȩS㏹~˃6'''RRR`z Yf܎zw*'P,%>9d\fiML9p<;7Zލg^aYе_ٷZ][Q72X!h¼6lgϦ@kåZo) ów$v۔'@G:]BK{߻xEѳgOF3f̨ VZ:Hz%t]{Jp OWBUxS'3__>Z@KGmՅZ J36`HXoS;KM7&L`׮]ܺu~ 777u)5kcƌ!::l)--mĈu?*j!BG+W5 N6Vrmoߞ 63},Liݺuj5cƌ!$$SNEIIIz\###ڴiիus}naaAII eee5CPTz& oARclmm8p iiiXYYѢŝb)..&;;RW^KKKͱmjNs&N}@T*/֖OEAVw3iy9Y`nqoڮ5eR *tu-11F$,B< ...:?PVMCax74{^O+@legUGaT5|>S%777_3dggǿoRRR8p۶mc۶mZut&*?+PpP(effj%̴6/|#~g^0f&;O5r󾞩8~7soBVn0Ϗm۶e&L)"44rJ.][{Zh鸺K!BhBzXyuױLƍǑ#Gb5 (O޾}[) l߾Gj*~m{1?sWG;Kj^3P-,,Ԕ߽VMpss"),,gÆ L2EB~~>wSRR033ӹDZ37nЊ?5Ez\xƓ^@YZLZibOƧTXhnmOiI!EwofadlB3KJm5(eiwaMByܹs7FILLԔ_rX:th1? r3owǑ\E{66yGRgq 4?fܪuv6sm9ι z燥%OTv cر#666hŅJnnntؑyʗE4hk֬ //͛73tPMwww9׮]cݕbߟ.C ˓el|Uew~61)AakR <\zn"|w\?*n[_ET׷ &JDDǏW(( ͗,ΝcǎwuuҒ={ԩC8~8B!xM3oOhy|{: }x)))$''`kkˀ())AVӡC|I-Zy877\(++O>Aii)| >k׮eРAtر#o||&_|V,dӻ?ݟ,OV|-o>13xx&~fczollLPP۷oT7H׮]i۶-={dZuzIPPjՊ (O`YUVѡCFM߾}ω]vٳ5导 =۷gذaׯRUJkC <\> NpHI_}o lX`h޷!| _]?8lٲ/WWW*gsqQwN5[jҥKݻ7]taܸqE/'ҬY38j}vO!B<F>刺A\V8F\3HQtGbb"]v&O_|yrHǎСxxx0{lҥ cǎez_] ?,XkbeU{Gxx8ڵۛ^z:Է1a~:u*GYȈ068...&Xv-9xyyѧO:\߿_ xؽ{w_iq$_m;>݇r$NnشB`$Elho>7U[nQTT-fffZe*7oRVVF-0{^-:G]"=!x}TSo#]X\,//W@˖-n[T{+\r=s*3@||< qm;LYTuq3/<~~<&&GCrrr̼" IDATӒqaiFl߾N*bDGG댯B!UE5Wǥw:v9>]{*jԪJPRݽw(p3ϬQtÊ+ ))mҼysJJJHJJҬShk3zh&OW{)))XXX\*^޹bmm]xw4hűg-[Yz_~,Ya>~<6l`ǎ6ofabjN3K)Kɻq +L;ņ}yZ!h¬phdd3&F0w\JlmllULLLjLWYfiiyPX.\h's XO|chWM.O2?Tl(5nпne˖uz񢂅#F`ʕ5ISOϪBB!ã^]fkRgnGGG Gu-,,Yffff҆hq|G]VܼT@"""HII,_3Y-88_h;w+WVX:Ժ)-6`4n2X! $3k onV<ҠG!B!T[,\s.\Hpp0={o} !n2X!>0kfOH!B!?WWWtմlU{B$B!B!B!O4FB!B!B!B!M!B4QYYY,..&&&|Zl7uO?ѣڶmK۶mŋB!B!BF { !MI]B j(O0` )((0X&&&=!B!B!'3B&,!!DGܼyRItt4OFT̄ W͚5#?? !B!B!ГB!0JEii)JcccT*ܸq7x7|2[>/ʪw{B!B!B!jOB!DŰaØ={69O<FFFǕ+WcӦMԻ=!B!B4{l=Z,X}aRZZʕ+=z4K,/}^p BBcƌ:Þ={M4Çk]ŋ3|pBBB oM 5W_}G|E3U߼~_>FtIX!x( \\\SSS} eٲeL6ݻ}!B!BCڻw/u>Vdݺu1c K .l[ft%""˗/s-4Ǣdȑ⋚sg̘s=ofxkbhѢŨT*ˋ I>FtIX!h GVo;wWWW?DVSVVٳgqwww999iG!B!^*?CQ*$''TNPeffYuTWii)III9EGG3dܚq?(FwU]ϯ~RlIIIivAAA?!))cDzsNM'Z?{xxkm4xؿ? ݊ (-.t|=aD*VneJâ$B&HT?o'4u FFF˗/g( zzV12B!B!0,ZH}6 3Il4Hرcx{{ӻwo4h$''ӯ_?:uDϞ='..mO.]h۶-deex^JJ 0x`\\\8{,;w^ˋ쬙 Ç5?8qVZU_HH7n$44WWW֭[W:w[oUnaaabffի߿V\ǎΎZ]^zyfǏ} 8///nݪ)ӧ[laΝO̜9KV{ ܜCVeIG9Ow#غN{8.EiIyFV2}1fq$~f)oj:3w̴ld [׽N|*cogB&Ɔ7xŋ3sL3vX5u?>fb޼y̜9KKz[VVFZZB!B!JbpF]k ; xv^5' **4͛'ɓyIOO'991c0yŭR̠Av(Jϟ_yj@ff&?6m/رcǎq%-[ԩS)((ДW W^U3s+lذ`f͚EjjfR*ڵKigϞŋZq1++Z]ORJ?m4L•+W!22RSw׮]L0y1yd"""3gNIxz7;2I,78rs4bOGާ7fjv~,BȢ-TeRS_*s/ ݞfJʥ&B!0kkkJ8ɤL:)$$B $#"eEA ( OWłؕ]EA "]Jz2L33~'=;7sOQZWzgh4h4FI,B!Bq?Ǹ  ۓq۳>j:f0L;w={0}t233IMMe̘1:tZ=}4Ǐgga.\u֙z$%%w^^{5:<#~aF#v5h[ׯqlniӦ >?Nǚ5k2e P뙐@\\O?4̞=\޻wo6mDnW_eѢEv]nf33vV4oӝߪ}gN$ >Or k%yi\I[X]t}DHX!b޼y;wVw8B!B!K4'/ d_Jf۾r53ZMee%dJ%SL{gϞnFF4i|,<<Ng7M6YGјW*kQ_mP(5> | FM6À_L,Vk޼{___Ip#h4P(=.CH<Dr\\=m @Yq>.j\՗G|bW-/VYE\^]<}dU" `!B4 OOOI !B!BVqűR\>5 ~X|EDDz~HHⴴ4\\\ɫh(++$ȸd"33f͚I؊ s#{ҐjGXXYYYFAEE?+Wde]gf(((?==F=///:wFOvv6aaaKґq( m%X<@;ZZIO^8FB!B!B!M}'kƛYť=w^>۴iCLL sΥ:Qe4ٻwCGFFҡC2,^ѣGTNѴiӆ={2o<(,,H~\x>HС6me˖q5ѯ_?>lq3&L_f۶mL<\VEdd$K.Aš%K,sagY-ZILLLACڪm'N~@Y7emF޽iٲx]RdŊDDDжm[z]`ۮ]غu:7xZ-[ne„ 6d$r}!-W c瓜xUʥ0u߭P(4j6'~eSXT;4Q;őM]clgBx~=vZ;&֑uխsssIOO___ڵkJ3g8Ãpbi+C!B!7&JO6QΟI}n]FƱb.\@fiiijK?[O~~>ḺVŒ%K:t(4o*++INNFX[іq9c2iҤ:W8w///cDFFyK/ۗygf_\M6~Ң\Tή{bWQ\pO@]B!5k퍗۶mG V[c,ڷopX!B!ZV^k{oLVӮ];e...m۶^}^lcҥXzڼN >͛7f^?&>>aÆ?_9y$/r]êO@:i*F$3BkZ6L5F# L^^`ҤI#3B!B?? Izz:}Z6sL̙c9s0~xw~B9 `!&v){{JdZĉӢEkB!B!_FٳV˔J3_}kާCB!Mٳ;wdӸ ݺuH !B!BKiѧڒ9Bh ^FF<ڵB!B!BHX!mۖ;33fP^^Ύ;j9t[:D(B!B!1IX! P(gq`0믿\Z!B!B!ğ$BPyy9gΜd2ɓ'h41 tz)B!B!NB!MH׳n:JKKQՔөS'oQ_~SN8;;_HB!B!B4&I !7!ooo,X@II Z\\\jԛ:uuN!B!B!" `!&兗C!B!B!5"{ !B!B!/kƌ8pϞ=;wFpY^߈c7lYm~ǯbD$,B!B!B޾SpcߩFiwǎdee={nAaa!7ov~]гgO搯>x=WZxUzg8kyHX!B!B!7>ʝ tnKLD0[fo2/|}뭖䐓S())y6צ^'99VkV}N`p[*dQ-8nWa4Zoh4P\A_p?*E9m!1U`Wl=% 6KT*lXRmweT:6X!7SRqnޡ_?B!1۟DV` /'3;hr!'1Wq!̙V3h RSS0ajXv-τ FêU {^ZZӦMRTTۉÇ3zhJ%&x^{x{1ΝYre6Dw{$OK H`oq17߾[>DӪ+35o=Bx9Ȑ{/^dhB5@nY/.Ң\vl\Fvzjw*ue|%~j qI8#jJiݾ/BY=?vt')n/ ;Σy|y1bkA-(MeۗK(/-"(,jr2NσB taۗ/Q7C{Xl׷u-EHX!I撞Nii)k餦LTT^^^)v<9qiv0e::¸%oM6^x11ۼy3k׮%??K/],YB߾}۷o}]vѶm[5kfq|} 8>a/€3gNʯ|&O k3fFVX|5.QT*{ӹҥK֭vE!B܄v̺ }Bdr:zdV7o9s믿0i$z]prr^`ҤIAL"J>cT!: ҉Jm>G2'`#Q{k?{%*gWcM&[XLhNw *[ {7cĎ z> 6~4ߢ-ih[FM]BȮqRNj8;ZB!Mj͚5?~\v-ٻw/(,,ܹsݘL&Υe8~u=q.- dFң5-[Xw/(BJ">>R\xQFn:y N>[o*_l۶ 9{6mZٳgWSEE}< C QOү_?v}M㪋uf͚5 8ps>}O`4w(B!&tl'3!'kd<0f?9w={0}t233IMMe̘1:t4=}4ǏgՕ n:?'%%w^^{5\\\ ϯ6駟ӓٳgۮz .>`̈́~F[^ >gHr=՟STp )q>S$ j&D'F]H8sb'MJ|?im <}LF2;5^N~yz '3L\^L&yi\I[侳N1]\p )'i}8e%ЪC_rԵ,BܤqFW^yGgݻCҳgOV\ɡC9r$&ݿ k+:Χev"{w\e9E8{j|G<]q* c4DTRYY*ݢ .LHH]w]w?o]鄆Z!Ο?Oee%|n'-- +6W_}깎^׶mJogaDDDX+--MֹN$.\~ի^>.cd";;Hflر#nnn5GEEqQ{ûnd2ŵoѢ=QYYIff&>>>4if={zz:͚5SФ IDAT@޽غu+Æ kPB!BX+Cm Ǖ1j*++1 $''T*2eE={RXXhNےt:l?LMMjymmfffx_ѼysuSC#s9?4-Je\qmΰah޼9Dwe/#GZ3_sa̙lܸ 6'0`:۶m#22[nAQXXh.۷/}{b#Gha̘1h4x 333;h۶-:t`С :g}Ν;Ӿ}{ͯaJJ r z_~RTTTt5,Ǜo? 44GC&00!CIc?raaaۗ[nhѢ}vB!f LV(/n(/YZ>8h0 8puZBii{4\\\&N5 eeeVgff(((U:&G7Cm2eLlYs!J^3-'JY kE391ӗgtOY뵬L&JquU败cE(TN[.ߗ>MFFLZĘ,|B#;Z$,BΞ=֭[[n沑#Go>v7|CIIy`bJ6͂ 5mOİ(r 2y(|=bk&2t2 }A<`4X.TUȖ>ݕM"ꩧ~]v裖3|M^y:uĊ+xg,H9;;+c>RSSK)5{L& cǎ̙3lfΜi>`0- r^xYf.["X[zT:tԩSsh|?~ :udqp͙3u1uT|INʆ ,o>} zb?^M6N&MX~=V>uTZnMff&sYg0yw)))!!!x tIJJbϞ=Vg^7e]vDVV}iСdffSNעd21eONJJ 6l`\{ٳB!⒉Z3s0;Of"d;m6mܹsNFteee%/fvmڴgϞ̛7* ),,ͨ("##Yt),[cr~pp0vE_uZeEAHx4C,u 7n~MIN>G_Uɱ{-+0败gm}9F@EYu 73'w:ߛg m+&:F[7 9F#Ҫ] ak8qlmkmL&.ٌ6YZ!F{( rss)--\_XF[WW,9mss}V%jZMҶJ*8 LFQ~ }'&3Ɖ򷷪g=VGsm\(h?uoJ=1$f֝hzeE̿|=ضmwu?O>7Z,|SO=Ś5kشi常p:`Km4k֌wyB͛ٷoZQq=W_}$6Bδi "44Lz-[j=_Tzjƍڵk ::gDpp0l߾;mSTb FŪUj7'NkLԟ?>?0 Æ c5Ho\,6j/\\Ք晠]ǎ8+Gtgg^[oķɉ0LDuBvz¥pǽqR>z9wO?* {KyG}bp־=/ Dԕ}4GQ\(/-1E]m3v O/^]~?UQs$[p1;ɎKxxSVOHXwIX!mۖmb2رcÇ?qULW_}ŷ~ĉT-EkYT\ѷrfo$#i1JQzP^lMV( |[4U:ݭyIYn4-iҲ._zŋxb"""زe ZVDGGKtt4͚5`04xO+0sR%<iф'Vh&Ņxzrv0{7i=^C[^/8mLJW_He Uzy,<}rhb+(-EZcvL UMa^M9y1rK.凋=VBNB! !!!}cj넄?oߪrpvRCVjuwwJI[8a\ޣ0DQjn&_ˉ|{խ4hIII8qٳgsYx s=Mjcrss8q"Ǐo5Jh6$==gJJ y^RiN\Yھ# ŋVS7uItt9G_b1| 5 &b[֨k+˻KZZwfÆ 5Ghh(rvvd29ɚ|={xYp!aaaV۱_zh#3.\WVV9-B!ۛHg]9?m۶5'mՕveRw`|C-utR{>7wo|C Ыg|C^IBK-X#{ʹ:ךC.qqu7@c̵nGBѨ ""BA~~>'O$&&N渺r ^'>>ޜj* 9$&& CFYl\oBrGS{k2ML[|9NFn\T8n* ]9^rmFXH*'#MZ6تu&O=111o%c f. g߾}L2 .uVxwѴiSn6y/_;ve9Bpss}x{{_ٿu֬\NX~ON0 >|={^F;۷ogP(P(ɓ'ٴi&LpbbbXt):~5>|QB!fns3g2gΜk?(PZ+Zn:ğ$B^gݺuV)//SNu}~zߏNϏ{NӁ٤PVCF"|b1j9`@̽_oݣK۴эW' |;C.<)"VmQ,~3qTRR˖-3zXw٤sx2dsׯUVyVgƲyf{{1:tp^tܙr-f0ϝ;'|G}:]G4}Gĸqظqc0#hݺ5&VZ1%֟x oN&Ml~P_+Wd@ff&fr8Y'GѢE .\A1bDpY"##u;yիyYfY$]]]yw),,dɒ%̚5ֶXp!z"<<TnժU?ooot!C>qF%B!i4Ξ=kL\\Z.<8#e O5#wSB!ĵ=3 l_v\l/rZ->>>}Yd4)((r@q”\\Q72t%Z||.T/{<<}k.Ν;G˖-k$`^}{]>|8 VqEJKKi޼yr҈_XXHNN͛7ͭRI``y+ƾ}qűqF =Z3gЮ];i۶m$''os)gXDDD?H2 8991p@NʤIe{eƌ;vL>B!i+|NRb<-DY;&5VXB! ,B̼&vJDXs|=p{|||fs{ɓ'my{{[$ճd۴ic,((cm|QQQB!B!B!&! !7\)--חv՘)Bzz:j[n[PP%K8s #&&m !B!B!pB!nRk֬c/_V5O|p~m*++ҥKٻw/sΥ]v nO!B!B!$,Bܤq&O=?^ѣTUUcbcc1bӦMɉ#G43g0|pzOB!B!3={6;w}={{#FQt1-k_pqUH8JB!MJPX|R0L@2zPsyXXIII RB!B! T3?ngw*zdמ={HOO}yf5ƞ={_'4|$j7F"38\?3$ey+]kM>B!nbgϞܹs$''Gnpww 777JJJ())ip6B!B! _!9`5{ٙſFwkpeeeguEr x]ӑEPPP?@^^xyy8OדNpppKrrrhڴCت0 aOUB^yU7 叓١~U:E{T:GCb4(/-'c0ҩv~''g]Vc,+ݳͶ+ P{:$,BČF#UUUzj5FOOOZh͛> &V%&&'Ç=z4J<`vjj*&L 11Zk׮%22j,?cc1w\ʕ+kp$h#xzP^Z@D@m}m~!V]9yMFO~"CcҬyr3ͪ1uc2PP+c/˷Vhh6OQ{PVO}tlxaoѼMwP(iKc<ʗ-ԂT} *¢~!'4?|<(N}yi]qS{1侧 ؈v}[R4I !7mҶm[L&|;v`L8~kҴiSbbbHIIw_O?cǎlٲ B!B'.ǓӍ'cʕM&yIdĄ '''rss )StRT*EEE 4UV1eׇhd 4e˖1bfb՘L&NɓYhvmO4=zk.x4io6cS^yOBox੏quߗ ?qR>D4c4HN<ؿgL&SQu4$5|:-J'eE(J}44ƨC5tJ26|4c?e1O8G!׸JTήhL&~V;U|n~;^#}2UlhE?[JжRɑ]pw'{ ! .رcy'?~<4k֬}L6}Q\\̺u#l!B!Bbw̪ 9p6mjGP@RR{^ťzI@ر#GO??'((ԩ{}>}Ǐ… YnFx駁zfϞmnܹsٳӧIjj*cƌСCշ-T9bWrhˋA'6ouyIbBanQN^ IDAT.gtj\5nZw!~̉{6!+-1t7݇c2L>P{a~:1~Tt0s{1Lq1'n.j:vq.}ᔕSZC}IONPq `!&T^^NFF( 9y$111:T*3/P-[ӛ!B!Bhٲ%+W7GGӇT<<TUjі[O221]|~8TsVT'eL&JquU败K[S_I^~败*Q9_EFϯ-Z;Z%B7 ,`L>g}ϼG 7o3f`…tڵQ 55QB!B!85:d-|_;Of1{ Q{̟cп*++1Liӆ={2o<(,,R` 77x֮][IXh,^ѣGT*"22K+-[̢혘ΝKee%F{ڌS_ukQZKiQ.nGc0T =(ȭs%$'T/ 2_4QeFCqԡ>Bh:A[ʙj@3OHH[j-_pNd z^Kv}P(/SDZ_[>4m2g3;zB!M //V^SNe㏼ 4QB!B!_χ3ng$&jݠvϝ;ǨQˋLy}<AAAر#3f̠cǎhرc).ަ nWfܸq] T]bFbժUhZƍlj'mL6 BCC̤Glٲj,՟?>?0 Æ c5Ho\,6j/\\Ք晠]ǎ8+Gtgg^[oķɉ0LDuBvz¥pǽqR>z9wO?* {KyG}bp־=/ DԕkZwa;RP^ZcaElw'g/r.JZwS`Шl9^_I-|EdX<)+ɧih$wMzF,;zSٿ5C!Kui{L#שW7ucU/:m8Pո\èB!B($%ӲMűghߩ[cu RSS[,#??|quGjvv6Ŵnݺ#g*jt:Ν;Fqq1.\Yfx{׾j]ׅh(рoJU4f`o해h+JiX^Z@o||JXDwJ iEGhrvDA_Eq<qv_򒋸{nNCꋆ?vDf !!!B!Bɉ-[ڭAAA5J y+ڵ۶wu_ & nG?n7woע>1{6&uKBV-h<'x.qquS"EIX!B!B!*IOOo߾VfΜɜ9sqDFV%&͑z?m4Z-go[۱o[nF43!B!BQBq3Llذ.]lQuVwȑ#1 >|޽{_`ɨNQZ>A>8X)d2Ua2Yp!QVYYIff&>>>4i& pww(3 &7srrfDzz:5h(NGNN7k @tÖ=HӦMӤI֯_Ozz:˗/7:LN>MFFN?{w>p;} "*TZZK륯zzu_UnZ[[/EQbMD6}dfaus]&9993usyΝ;yطo$&&ҹsg5kF\\+W$0080a)SJ||<=zB6 l2p^^^ciz=Ν#!!ȺupqqaL:H/_NDD~a 0j(x1 %[n~B!B!B!D yfuVznJf*5qv{`|,"zl;z-MڕDrY)x4E!'1+;O4خJر#OٳdggBDD|Ռ3XF8:ݻwh"5j۶mcԩfu~i6ׯ5~СQQQ6mkOr _O !B!Bq `!z*111xzzEJJ :( vvv@y/CјTv*ٿF}ԤI:t(O&%%VKAA<.. (Xn@@@(www϶E+7nt֍;GitBӦMo!B!B!Y,B@crrreǎ?WWWHJJVZ@a~_RPJMNNfŊ$&&gϞ5^zĔڎZ.66T*IIIԩS(|=oOdm]}׿E޽9z(?#C XB!B!dBQ5i҄)StL=*z#G`4… 4o޼C]O0Vv\y}\GħRPTn9}Y6olVgZs=zԬNݺu%99캝ݺu#,, (k}eYb;_|k֬aҤI\rB!Bwj}ݻJڮʶGYcwOfZ;/wCۗfBq۷/yyy,X-ZЬYB4949&PxSǗgqV)+}*ۛsұcG>|87ӯ_?̙Cǎiڴ)ڵˬN۶m6lAAAx{{|rSҥKٵk 4ߟ &OlqW\U§Of̙_''':vȌ3xh4Uvcڵ|gѻw"B!B35-԰C3%222ؾ}}aj5|w۷ؾOΪU!!!rJ8|oF~ɩ6bcc8q"GLJLvMpp017nh 5PaĈ\x<==Y~=RZ^x^xУG"mD_8BJb$m,?_pv&/'nty|2;{5=5uN¿I;.ًQ1kzK 92[V d&'Cn] 6'}sTl\8 NnfҨ|xcJqKNJM%]7;ז7141 D_<Ɛ}UѴTQB\|?k=j+ks3Q%QEQirL.z:0d88qm}FMkXRƮ( ?l}A_կsp2 ='UO:<6]>_2k U /BVsbZ~"}zh4lܸTT*73i$j{Ⱦ}m/$$SNw߱vZ|}}M,3ϘgҤI)22ӓK.qx ;w.6lh49QQQ8pbbbbmHH.ك eᡶV6kE1}\qgƑz*چ]GqEQHO%-m)L:ТSүsYKnv*9I<$],LV !B!B!Vq] GBBfsx7lؐ>I&Ѯ];{=:wlwkZZMzHHH(Җ(<Ӝ?޽{N#--Tgʔ)t֍ŋbZ \=7>>ggg<hglb揩U {M0ms\'}ϧtC6;c0g9g[T*N.fW^=7+[M\ka$7; ;mnV;;*_7o֮_S]ͮ[Z~& `!B!B!BܳiuGּ69CZV:uꐞN~~>f[:g >L}]LbUE!!!:uiʕ+sa-&&tSVZĺuXb/rc#77" ֺu뒓CFF Xlmm.s>0 رtYU_f8:M.'~uo̵%Ukk[zwM^g'7 t4yY'XAlii \^!'< M~Nc¤gc,Iicwr@VS5wt&}kۿK1kۨ?meRfrZ!Yz^^/_ҥKO=?Uw%M԰xbtWo>_^a!B!6O=Howӟgضzҳ4M6%00Eߕ|%֏%&&(ܢGAӡ(V]biW_}hG)ҖZ`0 @DDׯ/Ro̙3X V7nL9s&@Coxw4hjhܸ1?A`49x`UvDDD:k~䁢s ǜ$l흨P&Ξ'ǔ>ig-/n/qM2wϺ5Ε еraq*5@~na6=9gFfZ)|Q#u4/s%)mω}lAωy03* w{qoڮה;Lm+kJ}k:Y,BpIIIaoƖ-[pppښW_};lb/8MysrN,Y]mh t*t݈ҷx6RgΜ!$$]v䄛 ܵl›oɉ'ZB!B[&?=^wtmPF>ڨRj8 IDATV\YjaÆ[?**鉋 |gfgу-[@bb"V2]F|r噟btNF] )3ツ Ov㉯S7̘1ӧO7n̯W 'OңG/Pm6&OիWQ$$$0vXOT*!!!_O>7xҘ5kg&""^zhϯpc„ \|!CBnn.O<+W4wބi&0?]K%b`Ĩ7սYp^m_=ɹ&#ᣟҪa M/<'21Δ,mVAVn/uhVye2g_?~k$Hخ8=:5mAl SOo0p%, JYiWaрQ1e056[`pX;C;@*|yhTۏo| _c|0~;,/@[s垟)$KFn6fe L7\SQ]gTǙ0EF\@VfHɤރYSV|obji~Y濼fƌzƏO֭?~<ƍcnݚZjuV 5k:tӨQ#bGf޼y|AwkƁFr15kF\\\+LBhh(˗/'##:|rNzk ԩIۭ[^/ `!B!gzp*_|mqqq<3uTf(-Q7ikeJaʗ- $-;lh.BQCm޼nݺW[j3u&[L[@{4E!'1+;O%n\eFn{\̷x&|&.u̴͟r1.MiT۲C8}1dHDlTzOwI-m񨮏cOخpex/kdpi ?W?~Ȼ(mݙ?~iy?gqqtv9ƢuwraTvYJqӶ4'F|+Tz6Wsܹs aܸqdffGڵk>%?1-B!-YJ[zurJem#-TbgWoFN2JODB!jWCǎ"%%NGTTXYYUwU_olG8S+E)U Oɪ*  /Mf^rz8} k)/o%qwraٿ^er2@ҩZ*tV?څ'gc").Y馟S2¶6o%3sshTۏttad׾2?Z(ֹRJWT*Ii(&))34W+oN()M<ߒrs5jԈ6mƍ9r ߺsիGLLL}?>>$ԩ&;KB!B! 붅BI&L2k׮xxx0e 5 h4 h4h Rv ?E"+y]&ߗ}W?/#op]XHyQuqz>-9k"YqupjMA@Xveef>u-R7NJ gWۿ|W {/(k~*<NW$%%vC^'::///ӹÖvA&;Z!BKi5y1]zZBTBX!5#nw}g߲6xֽ#::T\*}kim8XϿRw'˭Sԩ2)kd|{gtCn߿ .0c I !B!$,B!ԧu 'Gaee0SuOwǀ0`@u!B!B@B!L GO𽳟WbZ]tW>>!B!BrggB!/VV%,{QMB!Bt$<<`v}\~NGxx8)))fu Gߵx,B!j;ɕ; !B!B)|;]`ծ3|{ڜ;w.dժUfGѣGe?T( #F`˖-X[ˆB{$B@LL f9}4ܹsErl~nyo>U)44P~*o[!B!Un>N|J6yZYw.Vf͚ojK'p1SO?%22 !DUBQ%%%Fxxw}Çs vҥKteթۮ 6н{#9s]vO^^W\PYnȑ#U֦B!BOqb.&Wpu3%~ypby%'o2l0XlYuXl/W_eʕV_!dO!SM6ѪU+&O/{nW̟? &0a\\\7h {wĉٳgB;IV !5رc^zEn&lllpvv&77nW',-?s|?&߿Hyz8l`że=OWg̘ԩS gӦM|t'Oiݶm7|zE@@cƌ!??y8q"+V>!B!^..M{`K..c4nܸ-0 iӦR !ĝ" `!zƍ$''Ӽyٝ|B!B{٪O2sS a'hZߋU3d'iwywXd )))Ŗر~>3g_>B!D yfu놣cZ-k׮{fi|, Ⱦ^l;z-MڕDrY<֢ixklgQsÃs¸q41k2x` g;riΞ=Kvv6...DDDX<۷oB!B{Յufuzޮ@P}/\*#ᡇoeUҷBTI !5PAAnnnرrssٱcǏ ڵkqttdРAAVgb7jԈ6mƍ9r,99+VhZ}"qؠ( R*B!BO2s \t"u\9Uΐ'P˥kٲ%O<ׯ7]۶m6lٳk|L4={Rv*A!*CB!D ԤI4ibѣ:t)S 6ȑ#tXYYaccS-1W.W˙o SO0d@\y|97qVeU~#GdŊ?~%KT*T*)))rY6ö#ёݻwӣGw҅F_WYB!B!DuԹ)y:6Ƞ΁iZߋ?iNٻwok֭cݺuq:t(Ct BQ$,B܇ 8y$7]o۶-*LO̮}L@r'ylj=|ȟOӤ_*M6_~ӰaCuoooΝKǎGQ_~;;;>3FEFF .dڴifu\B```,B!B t8Zs` I!YWZYqB!pBZ]zYԎCUU!ZM=GMJjj*j|2AAADDDB!B![|O6njv 4kѦBQ O `!B{^^^… ̘1CB!B!B$,BQC 0TwB!B!BjdނB!B!B!B!IB!B!B!BBB!B!B!D_!!/ `!CAA52]KNN&.. Zi B!B!dBQ%%%Fxxuqٳg1B!B!BQdBQ)¦MhժfeQTF/^̩S HB!B!BTY,B`ǎחz)5B!B!BT1I !5Tvv6g0:L IDATϞ%ֹr ?3aaaxzzҦMB!B!D3m,5zݵ{XSߧpg6ȸw$Bjt GGF XYYa4bB!B!5 k /zqz]^M{nb%-W|x,ȡR} !'g !5իWcǎDEEN#** hҤ M4AQVX={۷o5G/B!BQ#szmV&%H,W5* 3y[+^jRsGj-lErm=cC󺶼C1;te`Z.V]KD>3zQÚtq~ +U!*IB!D TPP;v ''\vM T*u%%%:B!B!(ӑ9uvе:ez1ӳRIu =9bg¨(675eϙ@VI3bůyt`|և\}4z[+6*fƉkZ4wpƔtޙMx~U)Ƈ/HN5oI:ܜNzزh'F@l3lt#QZ4w @vH{ڰrK=; }ǝzq쪖y[-|%BQ\{ѣG9tSL //xPTrYBCC+d!B!BS똲6a-Y9 YϞIȜN[UohϨIdk{҃elOooM5'bÉ\Sl9ǻ31NͲQ qdħZZe~{}LFqeZ7F@v칐o\/>ĤKt jh-JüxmJ/ x;[c,mU|6һ̕r"5`k-B­{=`J0?iM@-J]A˃<,{5աy:rK1jUM_fW%[:n!ĝ#+B@СwWWWfϞMvv6777lmK"B!BQ]K%?k]Y*mgJj J3uzV₋Ku!B!BQuI1Hʡݻ5]:ylim7+\HF/g/k;m?đq37cVڔuW4cA ZŬrRs#hbN1VqYql-B!B!B{NX*ĬrX*ݵ*镆lX6OccvMoTy.][bRD%0‡mT<م5&U깖gtĹg;TזS`NU 5`V[QdB!B!B!i sdpj9yU?Ag8q6O$tG\ 'rY/3qɶ;rV*t[-nb-NhIȸ2רk,I lT\I*~(}o^<숝]4u+jkCC;*1fumĞogWx[u8sgT3EjcZ+Չ+CTwB!}6v6;{ B{"!B!BTVo=#hظٵ/O*RSȵ';5*x^>϶?, ^!JE]+ o#M% +D uDY,B!B!BH'-HZnIxw&Ot>KSŖ5e'K^+g3B@LL .5… 9x`u!>ٺoTa(6SY]uSľ}~/-ZįZvO޽{+YXNO<%K,.RSS߿?o)R~e>C+n7ydϋ/x7B!@R|;LbӋO !y$,BpIII^rq˖-[hӦMuQ)ϟgŊo{Grqˉ;Y1] )С-U޻ĥo)2}Hoe3gk.ʕ+fu:w̋/h W8'NFa<Ilݺ_^z>|-[rI BCC1K/ѥK7B!BqBQ)¦MhժUk,J>FQ+z`@Ֆ~RRQbӖ,Y¨Q-Q!+.tb02k P e'2cSf[~@z γ%GQ3RIHKFQ`4xN_@tR9o( )7jhHJ<*KZ)+B\j5?IiqXiSW^OX-rB/=?Pzg/3[ nK_CJW]v1tP"##ٲe C !>>ެNN-n*|3g3f >(AAA;($&&P禐1?s/ǏgժUhZK:M6߿؄B!B"g !5رcvDGG[\OqIzABBl۶ɓ'sUj5 ;?JEHHׯ'|oiii̚5ٳgA^h4dffy0aP!CHII!77'x+W%Z{Mhh(6m"** ɓ' +5r6]J䤃.q.n>/FRѸ_k @kh3I x$V6VLs ށuyz8_p"blWb2F_^]яa(0`ecţo Y%_{<JENO7LDl潀F%3/}7|z u`j|ݼHΠ_.,_lL}Mtl/Q#'!?m3`~>uV 0k1.ޝUUPApTLS)*%&^դk޲Ljٷr_4~Je~<(8*9?<}^Ó5y^k?} a2b. jۡd`"y-oL\)oDe¥kWpB *T4oJ`rԖV\{/bˣi2*9y7UX]{4jШLKndR0> }C/[޾I3;vџ1S]_-&{yҨaC^514:SLaΝߟ֭[}v4M|f͚5ߨz{GII ۷gƍ?aaa >7xssse_|CMpp05_/0*ݘ/\}6&&&pIllljwQQ ̜9F1p@kqڵ+ǏU9!B!nX!xB瓐@߾}8ԩl޼YvZ^}ULL3f<< kCO;K~-[t1R=;W{'*+>gq!ß 1""`'+wû32o=S>-j>voYFlgť%l;^#z $oL W#z ¬<^ {}O<k KJJ()jb8hldݨ}ns? n;ѱesq8*No6uj;;ZoNvd޸Z)ousQUΘPtT7!ju *Jo{ ՝}[GGG?geeչ0ߩSru~:K.AleU~员%sںc1#!B!&B!@%%%ذuVn޼ɭ[غu+cƌ1q {xxCDD7n$44TIsqqAӱyf+f5kFZZZmT qpp@RM&M sN>*""" +׮]*}/r"Ey4>X\]HeR #*e%ەҭّx7xʹr` bYt&8 ;J?6QJuVn pxYFu+yו? 7jА;nb߶OD{%%EX~Ir6 ͏ϽF=PvC7f~*ԥ?mW Tĵu?;;;+7jԈ\X1 }ڲtR.]ʡC $00W^y5k5IIIk'OY!B!,B<ڴiCXXӫW/j5aaaט eŊlٲÇ+ `֬Yʹyyyzg]6o'NEݻWMrE@oݻwoÕ׮]K~˸F16 ヅG7$'@VʃWűi,)[ݶg8,O7VJzXt:^Z@;mAI "kK?΁9zZ*T*9~<,Qvv5;PXRLiYwDOIswhoɉ\Cݵ?1g庣0yo ȷo3'=}\~CcְLxF;үCZA2c^sT7w:p@W]JKKo{ݝ~ ˗/{_ }g[[[J⥗^bҥ/BpppҳgOFUrB!B!$,B'FHH{SNhB/mժU>^I OOOӫK.兽=˗/WҖ,YB\\͛7///&N`\KY),~_yO!b=zy_dMasJ/1＀NcsJr lM_KCKwo3NaHVE?եU;B`G<B!B!x$,B!#:/QO!B!aOB!B!B!B!)X!ivF IDATHKKk7oJ%[>(ǎcΝtЁnݺajjʞ={̬Lh޼MhҤ ]tQuV6lz$$$pUT*vvv<ӨjO>piٳgJLLg}V鈊[n899)I_VVFtt4Nё 4s bcc_1F~~>QQQdeeѾ}{틉IՍ6oUn߾Mll,mڴ]v\pgŋ9r Rblقz͝;k= йsg+B!B!JB!.;;pj5or_ʕ+X[[+<==%|̞=`Z- .dѢE 2Hv Ch4o+Ǐ3dZlIJJRwNN/H˖-UӧO';;-[reRSS gذaK/qa:tٺu+vql{aܹ;wNȐ!CHOOWP߿?999}v~g"##H СCIKKc8p'''&O& www|}}Yz5۶mc5אl^z%:ẃ>X[[ 2b-Zķ~Ç:y$+V.xX6mć~H=z`=zT !B!BIX!xt:"""ر#ϟG=~"qqqyd٥L>hz\X=g%on4h3f̨TOTTĐ'yyyoߞ/3g*#GT9s&'N$$$7p@^uRSSiٲ%۶mC4kL7v*GǡCʕ+q2c j޽; .dJ:G& o(N:|kyg 2<>SlBFfĈ+//Reut:aaaVVVFQQQgggciiHOOGѽ{wbcc߿CB!B!xȫB!l߾}899TVVƍ7jg?'N___|||Wzzz?(CBB?~C[nn.h4Z U=QQQ 4 泶駟2_o߾ܸqҶuꊷ7ʵxcL0A/`TtRRRx011az1sg뭚UTxzzU~;v,K.2mk׎O?wy#F( _^ ΎBZtt4wåK۷/mڴ}r zK4 zYf̝;$\\\7nNklJJ tؑf͚1rH >CkJAfs8n8VXa0]!B!I +B'T~~> qر*Ʋm6Z-!)--Ü8q:w̺u0aVVVYХKIJJb޽Fսzj>*"""pqqF#((cӽ{ZPd׏VZŴiӪ{!v͒%K =CgYl߾R]2p@^Jff&^^^uj;-- @Ef͚)fĉ5QFW)Z᫯sX[[T)ѣwSN8::yfXv-1c &&̞=E̙3IHHۛ"@vHOO'""3fpJ} [n,_7nϲ|r&M)++#<<~ݻsE7nWO.] [҇B!B!D}B񄊊wޕa0p@4 &&&$''zj{1B>KjjֹsgfΜbΝnz2sUoulŋٰa1m۶%**J2&6mggg #++ '''%/:JŲe իc/deeϪUjAAA 2Nݻiܸ1>^BeAAvŶ 6T5lذv5͏!o333}oߎJlC_j5/"|M[$[ZZRm Ç'88"##ٹs'PwLL {!##NǀOk֬aȑx{{`ff\bmۦlmkkˈ#  :TYZ.gggU`eW !B!B<$,B<Ν;GZZ~~~Cqq1cjjm۶>}߹rF*GͬYxk5NN.,,,:u*SN%''{=~GGEEaaaDdӦM7Nɳk.ܘ;w.gJ_~9ZM֭ a*ϻ^???Z-&>>T*?p*ϾUըT:DߺuK۷+kC(**333BCC`ٲeF~8q"#Gdʔ)&M/LJJ -bԩt* eܹuV\]]ر#/^DR1yd2w^ietz/3899Y)oV*B!B!xIX!x`cc֭[y&nb֭3SSJej{\w^~'^yܹ󞶀=!!!FoA|mmFpp0gΜ}DFFprrb…İtR*-ZW^X }YХF055%00xڭ%۷~ӖɭZĄ3g 3g*[ҹsgLMMٿ?oߞZ~xgxꩧ_GEEc9w_>|޽ʵk()) """ظqht:7o6⽺-+Tupp@RM&M TϗL\\\jB!B!ē?B!ğP6m S~zZ&,, sssHMMU1.\ 99֭[s+V~k֮]˄ `jMF1&j?>@J՟qqqj*/^ŋ裏ضmo߮Ԕӧ3o|rёf͚EYYyyy$$$(y Ʒ~\VVVMrE\w̌޽{ԽvZW۩ծٳ'FuB!B! !B%%%|w̚5?+WһwoڵkW]{ :u*?1eJaaCO?ZGGG 7oQexVn?sT*bcc,kannˍjcʕ|78::hdڴiFпO׮]'V%mܸqL4 6mZ&&&趿+9=nnnу &_Yd 4i777*m\^{MouᅬcƌQ͙3hԔ%CNhѢ^ڪU8yrN+JzPP>~~~xzzD\\^]t!$$///~ɒ%ѼysbĉHΜ9S*b!B!BǝN,,!a8'/NzΥ$ѶvW3VKnn.eee*NQa}ǟInn./_{{N%Z .`iic֭[P9j9s+++e+-''k׮Ѳe2̀HNN6ݸql077^ZZh4ZTv׮]L8*OIIˋ$ڴiSB!xּ+ΞJEk#wΥ$ηB{xPB!LLLdugccM}w J+?6KKKTཱ<==y7kֶ4h@V?ww:ݻw//6xyrr2SLB!B!xB! mۖW_}!=yZߣbʔ)զ 7B!B!GB!Z՝lh㤾W !B!B'B!DdznkW !B!B',)B!xܰC+w7B!B!(Yn B QRR"y&mbjjZO_ݺuWcڵ^2w\dHq;hӦ M4`:wL@@@e;Ν;СݺuԔ={Ye:мysM6ѤIt\+,,d֭4l777IHHիT*xQM>}deeѳg$11B}YN#**nݺ䤴'FWYYќ:u GGGh4Fω'ʊ_~;;;DEEE۷EuukHu_M;;;j=O!B!B<9$,B<Ჳ GVo+oܸe˰iӦ;v Z]e``ݺu={zɓ'Ybgux`aoU5'W|/tU٣GC\\p%ZjѣǏѣήϙ={6hZ.\ȢE2d޽ChpwwVǏgȐ!lْ^z%133#11-[tONvv6-[ˤΰa㥗^t׳uV^سgsܹsʵD Bzz~FF2.ooo%O! ۷?iDFF2l0JZZg8995?59}4իٶmϯq|~թ =iӦM|}:[vM̙֭ bƌEpp0111$%%Ѯ];p<==ˣ}|̜9SI9rR̙38q"!!! 8_TZl mС͚5S;q|Gq!,--r `̘1_hZw… Y`ѣ oʿ[N:|kb 1I^^t:222B/jRG Fk)B!B'-B<ۇ^ @Ւs=GAAo߮N/;Zmy?~ѪU+FIAAA}uJ__|RZإ|`sg{"䜺|` n|`otߦL¤I$""^zU7x`֬Yw-774rA> D@@QQQY[[OWzN}ƍ- WWWWdtWf„ z=ʦ7&&& 8P?w2f~tY~wU* OOO?ϘK.ѷo_ڴiC ƍz|h4zEf͘;w.III0n8N: ...|Jٔ|}}ر#͚5cȑ|@h׮U=w7n+VaB!BB'!!}VJͥD>3.\Hxxcԩl޼YvZ^}Ue@.]ӧɓz{zjtROCc=z|}}+ygtA[ ^mܴڻqLn]8(<il?Eݷ)S?3f&O̘1cK.$$$]h4C'::WWNKK{ѣYf3?w:q@34^>?C72f<<,,nݺٻw/˗/gYY,]|^;[ݘiB!B!#LB!PQQQݻ6%%%\~iӦjYr%kV->JOpp0DFFsN%ϏD?N~~>VVV$%%ݗ{ꅋKiw`SmkÆ +7-́eq_^ey3[W~%btGnfLL"vs3F `ƌjf̘G&77R>ggg222tz[oٲŋa>cڶmKTTerM6mڄFVVNNNJ___e˖W_"??UVjĐ!Ct޽ƍ),,[ZVVu;۹b; VN1ۘcffqtt[mh|˽~~5篺+WÞ={{|J5k0rH033SIqq1۶mS󶵵eĈDGG3i$CUzvuuT–-[^y.B!BGB'йsHKKϏTrrr(..&55www%ӵkWLLL011LJ#Gs.44s璟֭[quucǎJ _2d){(--ouՕucT&*_mi5䐐\\\PTUnlL:SCHH?QGEEaaag}%6mbܸqJ]vܹs?>ÇWu~1rHj5[q*λ^???Z-&>>T*?p*ϾUըT*rrrnN[n)}vrMcEEEaffFhh(,[̨/>?c3~Cou//^DR1yd2w^~cdgg^vprr"33R;JM/B!BhB*))Ɔ[pMnݺ֭[3f VVV4lذ`+|||`ƍz\Š+T(Ǐ iذ!:8Cvܩ&apu\]]v%%%UBW*1Kb!J{{{BBBXdQm޾}m۶̙3gh߾=z`+++Xp!111,]Jz-@J՟qqqj*/^ŋ裏ضm[V2}t͛g6kN9WX~iW!(( 6p+Wl\񓗗M}jdee+Th eʔ)\z@,ߣ = }8::Y(++ //OaÆ*)w^6mŋru333zMxxRڵkׯ_^y={2jԨZ+B!BGB? 3gsARѧO= aϞ=tԉ-Z(1c~~~x{{3l0 T_}#FԔ%YM~/ɿT~o9'35ZH6dHX~ Z;0o<ꌊTsϡRk9˗/7+W7FҒiӦUBٿ?]vr{O>Z(iƍcҤIӴiSj511>_}G777z  ,YWWW4iaaa>^I OOOӫK.兽^ߖ,YB\\͛7///&NXqz>+9sUB!B!xީMzB!C{w\J2mժ3ڝ}YWҨQJ֤az*W^U}%''3`:Qk.&NHbbb󛛛˗޾zX=V˅ ѱ^p-pppiZΜ9Uõkhٲ(ƍdgg憹yRΟ?FQծ .`mm]5=)))xyyD6mj]B!x<ּ+ΞJEk#gΥ$ηB{x FQ!x\\NOI?0sZvAw>Ee>ޞRLLLpssʏ?ȸq*`nn}}T*jKKzCFpppT*4M_樍gQ}^gnnF1711Ύƍ򌭭m|︸0lذ:/B!Oe.sVo\B{p%벬B!Gqp]=+?`Z۶myWbo_}/B!ğB'ORA}ۄB!; IX [B!B!4B!OYwwB!B!Oy^!B!B!B!B'FII R)%{!+*(a[?Ҵ#<_cعo?O6n`܂'nܹotnݺywVN8A||<R)ŋ?>.\W^L2zZ7 ,swWB!BqdB&<={<կ{ҽ{w9vU7n[ 8aӦMty=zojXB!B!x8dBtDDDбcGΟ?\b便|'40:nd2Qac*^zYie%e^ilmZfV[6gvPV\lyJWoȢf+RpL mL)3o6i׵տ$PU?^/2XҠaqV4^ᅭR®Mejz>3b5jTE )aaa\o޼9 Ӡb.] j`>NGzz:Fכ4ireȐ!,X|GeƌٳR134eeeU~vv6XZZchwն#B!BGB'ؾ}prrY/|cǎaaaAZSϴ^ 9ZwH~V7ףRw6c0ύ׵)-)Ŗ_ ,>~e9OlM^Mi> )7M115Xk,>B)?hڑY_‰|N!/5>m oPVŴ Àoӛv29LTt>~Q v??يLKʫ;/&g \:s֝;e4M&֦1_oB4j@{N:SW?>N{i塕^ʽ-4|{?2e_q옽) PZZپ}{}/P\v{iӦ?dѢE899qUX|97Vҭ[7"""HMMEr!ڶm>N'v6ܻ_cگ+/bР)wOuGGRPPofΜɎ;8{,è#00K.qi2228y$_^2Yt)$''ht=s"""عs'vvvTEzz:j 6_uۮ];Yr%m۶%==t% Fnݻ˗5wҥ aݺuOB!B!ꅬB!PQQQݻ6w :!)/gӖ([ڻڕu^O}GqӆddWjә7rgv~y )BgRg8s"%E44rL%*mΕҙNr3r`%e YO9Ռti,iʁS)]EA̚3e#^}/iaeWW7 oq⭕#j;w< 7ѕЮnô?TQb[VY#G Yו?~|HJJoСVŮq1߿:{,kѫW/Yr%O=Æռm6e{j[[[FAtt4&M[ٙ t:]/mٲ%B!B#`! t9#55IMMS?Fݷocݰ DE]8ϕ#pp5|hůȿvfmuBW &Z(mFIkfåWy6ޝU]@YM@P@\4!uD+MK13mQj[f&jV55VRb ᆲnOYd9_ǃC/ns][c&ZWjg%I7WA:7Me*8s^_-ҝksM%egծw58v`>5=} ˌj|%ǟTH:0^xHcǎmݦO>D'NԞ={w^s2r.7߬,L&*33JjzX3 `lPYYݵvZIRaaΝ;kjhݻuw7gVڷ~2fiW{ڸ$N,KKr⎫yCS^W =~ӿj!XnYL ILp1qQaIMi2S(w֖eI7$>/W'tj֞1䞪L}WO}5Q˕ 21W5hj~>{H_Q/}3]@K^F/LCKj^߅q2~T:u ԙ3g=_)))ΣE2L5Ivv.\Ls&#} 'OTxxxcM~~Zj%I뮻4aٳ/۾.`0(++KڵT8!|Aub  M _O.'''sFu֭siէ~/axViN|n>*Irv'g2򕓖U2cc*в֗ADz)_-*.nN=Vj>c&]&_}Z3b; W{wW?.ߩ/Vީ^\{Nة-_;ȣ~٧Ӊ}Xv]*WX~~\yo]t뢱Ov/wS?7|"ZO7)ƾ7w{@<go:u[xbWa4gQ' @...ڸqc1 2 ɑT]nOK H͛7Oھ}o^mhҤIoȐ!Ot!I /xo^:ul̯.}JMMUvvuGGGEGGkѢEgjٲe2dH)I;wuoCHp_ճgjdj/us]Jڟ6-!I֯n#LGv;L9U7LCt|}Z{8[:V*LͫUHEG'o~Ε+r[8:hc"Ct_nTBx:_#};Դ{ZzgreooW[wWaݔ~$KcyEcT_j}C[]s|/ϡ|O}e3u׸qK=*___8::ӄ 䤷zN{{{kΜ9R=4~zJxbm۶Mnnnz5dȐjW=z`/5j(=䓺գGiʔ)ڱc~˶r{q),,Le6lؠ;*((Haaa6mZ)Izj,oWa/M7ErW[qŵGեk*utrfX RR\tUqgKKyxVoMs %S59_P⇕+կRY-\3Zέȗskj U|Tmj99/{[jƶMC_]O]c4̤ 򒝽A'd蕱1=ɼ|$&&jذaJLLcĉ׮ӧO ]>j4eoohM jԦڲVֶJԹڴs7A~j޽2W'־ի\¶:Xmϧ.r:u_c޿U[ݧr$IXK]~>BCCc_U;88(88qK^^^Zѣ#FXILLٳtrrj2ߠ;Scr\9$ɡ & j5x[W=vMh74֕xQv*;o ՍKSO5:5uUcƌi>>>Zb uI?sU#FfϞ]k9k[@p4Sa`0`WϽ1>[唗Xfgg4[@a hg^ol}>qpWQ`C8>ׁK.Ҕ-Z(44T!X ҢEkq}֭㏕$͟?_%H`L&bbbԫW*e[nСC5|p7N;vԎ;!JرCR {{{rptبmٲEӧOׁ9Rk֬QAA UPP{"X `lԪU- eggP-[TEE`ml :~RRRԦM%%%)''GJJJhTYY/_!Chر>^ΝڵkUVV|ܦ}jƨؠӶm4}tIu 0@JHHH=$988hر/Dmڴј1c;4@#:зo_ZxxBCC-[յX `cvvvj0Vb0`#H  u %%Eǎr=99Y۶mݻUZZ Yߑ5{3]-6faܹںukeo>+Q#G護'|&9w5hϛ7O?C AE)66?K*77Wvһ{'j=U$U~… u-T[^\\+?oݻG)22RVwhOO>***4ɤK'N0_/++ӦM4i$uE&Iovڥ rgXZigU[\VZrRVzZN.rtuW ׄ Բej˟~zg-4yd*--Uxx>c=Vǚ󫨨PEEW֒eee) @jvکC.[%Iׯםw``X ۱c|}}oq=77WAn}҇}$_YpO_2s\<ҫnr*9[d.ܗ8ײiagfizkre9r\R#GRtRM>())і-[4zhIR˖-5|p}UN2E .֜_zzs ƴ IDAT\M5m4gϞP~~$@/ǍG}J?M=W_}m/W>j(}ylmk `lTAAl٢;J$ܹs/$̮E__3$Dޠ;ߚO$iPމl }!zxܮ-I v\]u#e2}H={R6aiر2ҥKջwj.>==]FQ%''Wb Oj$h4oȑ#ս{w%%%i SrٳGPEEVX!Iruu'xBǎ|[UiU޽e˖&>FZJdZn;j͚5ە'Nã"Go[}ܴGtJޡYwTaґ{t`V ~nW?۠-Z4z>8p-l g:;;멧ҨQԥKjϪ]f|||p=[NSLQΝkjժU0a}۷e7|y5JN͛hoٲE}$)!!AeeeJLLTetBBB?c???d25tqM6JJJRNNJKKd^I_|:H9;kٴӒ$m%I;<:([&A}X3L >˫ WWWIٳgo>͟?_rssRw jc0doo/r e˖U?jhGS_-]k׮$ܹsZv&O,{{{nZm/_v5WMͿ$)/9GRe-?%GNѢ៏ ԙ3gTVVfU7ovbIqurssSBB%IU2dvZ-ZHfҨQ4i$EGG[mg :Tqqq/-4` 0@{G&Ms233_Ԉ06($$D!!!m6M>|-77WS||c5GM. 2DA>ُj=P:~FV=1"""⢽{O>Vw„ 0aB =Z ,ѣoV}Y P.]駟6"5|pMP=c_l2 MJٳg_+j\\y+qX `CH  6¡M#%%Ed0СCrrrRx$988h֭9}z!IҘ1c̿o-77j˦MTuYO?k׮A/'55U5p@͞=Il `lԦM-777Ps899Y}w3ghzԲe b ;vLCNQQ;)00:5&g̘X-_߿_ڰaZj%wwwedd(88}fʔ)Сf͚%???7襗^Ү]/p5! R~-[~z#Gh47Ν;u뭷^(Ȑ<==-:v:!''j˞~ۅj޽u"6l?O988o /VO߿_sрv.0LJOOW6mbQf4URRRYYYjժZjUcɤ4yyyUhn 3SEEr8qBݺu$۫k׮:zhsy۽{ڴic]z^ `M8QEEEVaҥ Y fϞ'xBђ%K4p@?rH(++KwOr~myyyiܹs%I Д)St!( @}#GԳgOK8qJKK-:t^|Ex :dQgʔ)Zp P_$aׯכo_|Q6m2_?{***x:uJmh]7tڶm|mٲe3f*5tPeddJOOofl &(--McǎhZ̞=[_}&O3fhɊT|ݻw_iiiuժUJKKkYS|n:=駟t1effW+--M~v4G1>}"##'N(..N|EFQ-҂ TPPDyyyYYb~\5|pyyyNZtrIR-w)<<\^^^M8^<Z|>)66V7o6GEEi߾}:x ꪄf~̙#OOO͙3Gzᇕ/I8pmwi.w>sM8Q=z$9::VtuJKKqF͛7O &hz',;ּMzu0YF>>> #` FWk׮:|$hƌ뮻T\\l{zjoU``ze.:uƌ})''G%%%*++kƈ™ƍS.]d0!Ljr/--MAAA ;++K&Ik̬Rrg0@ Vp $7779::*++Km۶TcbuEO>,;;[ .Tff9xAL&>Zh!$$pE㯫 +l/yf[֏qup]+%%~sAYYYj׮ʟ88g\X *))QRR9ٕDp $nݺiP^^ս{ j…Zfxu IUVUi mܸl &LΝ;jlW?~/^xIYi߾RSSmqQZh3-[!C;hҤInX KAeeeZd訒EGG+<<\gذaK馛,Qƍ_r-ԩ̙(d2Q&L<͝;W3gά}ɓ'+{{{-]TݺuӺu6Rw]wSTTڷo͜9S}5ݻƍ0I+SJ}]s=ر5x`M6ޱ=zT]vml1eׅ۷(,&kǏ$K%^/[B2}:&IOZn]J~wi>}Z[rUZZm6߿^+Wj޽ĉ2[\rsskP#G(,,L w{Jءu!# y4B¾]V]6e0}"YF믿Oӟ~JKKӠACssppPppp jpD͞=/ `kbbbwޱb4Ԉ#4bĈ׹=*Alg`RRRTXXhq-,,L畞.ɤF[oi4hNڨ Ci&egg|-44Ԝ_7Yzg5޽ޫ.]hջwF?ذ(ׯڲpEDDZvmС:t蠰0>|04F婢JZje1TZZj~ `lqFUTTh4hPTaA5|pyyyNZt֤:TڷoǧI-Q>>>>44T]vÇ|܁ٳZtRSS|<X uB2_ɓK/]c06DIII2Ld%&&n01*..VYY$X%%% jt?c06LK,QyyURRh:tHK,1]9sf6LWl1 `lP֭+??_FQW_}ժFǪ9(;;;yzzۻJ) P@@ " gQRR~9;;7w8p]b h`[n `$F46*%%Ed0$IEEE:|Ξ=6m(44T*JH`6mڤl|}}_U7nԟglٲB4 `lXTTWmٴi+I*++믿ݻw+22J"F婢Jم$hB[ֹsdx+c06lڸq***4x` 4zNRvvw~#X `l%;;;%&&jҥSXXE-_\X 46Gvvԇk׮:|Er-^XmϪO6Mw}|+W}4q9TJ7o~CsƦM-777P IҊ+&WWWٳG6l$I2L:vJ陧u|Z8K|}4}۵:e:n睫s֭3ϭ ~?~\O=Tmř3gt=e˖~… IooІ ԪU++##C-[onT4euAf͒_˛RQQ;)00J3˗_Ѹ꣹ޏYb;Cy+_~zG~9dEEEG5]?z!{zU^^{JLn7fQP`;vJ<Ϳfe\I|2]~|ڲRoڲ={ڈ#ÇWrҹsuO ]LIII:|&Ih59~JKKuoѿSϟ &e˖ ޿ 6h̘1:v옾TzzzzjRe"qĉ4h]~Y9s1.c>#ɤLedd{DDDڵkWzhhj]dRZZZ{߱cG7h JKKu Z矚Znz$i\ `lhT^^^W^Xz!ix>>rww%''7hٳGƌS%\r=jժt"͝;W/WWԾ}{ٳA7x$i׮]rpp#~mz?P/9Ag2d4cEDDh:uꔞx s{h^whfΜ)ѷz"x˕nݺir`***RϞ=-_ ٳW_iɚ1c&O:{Ç.Ѩ^{]AKU&OOO}JKK:O#:5=ɤ~X>͛7Wwڲe `l%;;;%&&jҥBeee*//9t:B]uٜ?ӗmηб URPTaRؽ+FӶQn)zf%SIGVсe[5Uoη7)[e,-W̻9.Dȿ7ѣG駟*55UR'Nرc'___uM պuzu۷+))ISNլYTQQիWkٲezwǒ%K$Ij߾}uڵk{OAk֬ѶmT\\lDiO IDAT#<۷k ?^>\]]5o<=zTKu_ZZԢE 7gyzzjΜ9?\e>VZILȾzW5NCdggkݺuھ}e24lذj;vy+ߋurrҡCctMҥK]W~|8qzQ9'GG 01BTTۧ@JHHR矒hÆ *߿߱l QHHL&.\M6iذarksosrk\z[tdc`gP=}$IgNK<;$ _|Z8:sٕ[%{tgfnnn3{aڅDʕ+r՞ZiiiΝ;KԱcG/:\\\ԇGQ2!s!( Q[KM_$xyzzJƍ yxxXi۶ůO\iTRSSe04c ս73(77Wwrssukrp:qiiiUV[ԩSi&s=rssSIIʪԫ_8?s1c hvv5o0Ծ}{H;,7T:: v[O6W`TZU&0H+7LOɑG&W$U]H:>:y 7`wa%srI2)l2":%/뤥,ƺE9g6/00PgΜ61u=z0'.mk???-[5a@@L&;m߾~*ukJzxxhJMM͛SetJIIm-d2d… e3gόφ?ydceff\VURRtuIAJLL/I:,ӧO$wnY:t|I犊vJv ^%cjBWԬe!Ѿ~TʎO3=IR_YT[(I~v!#nV]e`]|/Nmofޯ}ZxBBBԡC9rDV^mwa%+dy{{kر-TPP>3u]JOOԩSw^=ZӦMөSkMoFFqjJ [\Z; @ǎ￯mNc"""⢽{O>e˖'P-g;4i۶mzuI_^ӟ2n۶m5x` #{{{={V6ûvRnp5K.OZ?Ə &GUnd4sNyE7n^  rrr뫃jժUzԾ}Լyojǎھ}n*uwiխ@H`ʴdQ%%%Vxxʭw+YϟWϞ=?AReBn:rJiY:WTVN5 }&I5%Ƭ/$)u۫ B};mE_ηk˴>PiCXC֭W_im nzǏך5kzj=c֭Er'''\RO.IuY>@۷o[Թ]vKeO=f̘ӧkG2?{{{7NUҵ U.]d2ԹsgoO;ƍY 駟ꡇ3g9Yd}'رN.7P=c_ jڴi6mT[կu!ussC5j^uw}U^`4hРj'&&jMAMҷWXz233գG)>>^#Fwy/W `f֣GFZ۷lRƍShhhj+j^^^KNGQ%{k,ڵƌ1|||b }ԩ~*@={vuCXVr...ZbE5YO>ZxUG]zV6>7tبZ\ `nYYtqsss5w\9rD<"##'nH`6mڤlVIgeeiѢE_F;o(UQ]ÒǏ+%%EmڴQRRrrrTZZ$/hҥPN6@ݑ:q!,Ukjڵ׹svZ5)Sh۶m:{+k (==]:u`PrruKBBBbm۶iSNit?# *++Ӓ%KT^^.GGG(::ZM>L&S06uz畟/(OOOXo߾۷UVJJUg`)ooZ6yd} ѦMظV+RffޝGUd%}!@  " EV "T,Z*hkj)j#? ZJA$!My{3C7QQXDDD. ADDDDDDDDDGGK@4JOCDDDDDDDDDJJ4SǏ'55f\~-{WINN,N~5:ϚWYN_u6lPe'x/8q7;ٳg׹\DDDDDDDDDmXDDJJJ"//???DZh Kת0o<~^1X+ZByY `Ϟ=i&~:tOi߰0N:Uuˉ'0 k@Ӏ=I[t4-kME'򫭛CDDDٿ`{מNUfcʕlٲnݺJuн DN8ƍY|9˗/oEDDDDDDDD5ozEDDDFFccHKKk׮._z쉏~mat͞ś(w, 1ClO\3\?Ѿ&vﬧ8 oι#e}jcܱcG`?#0ͼ9QֱcGkN|83gt+\\\\^Uٹs't??*ŃkEDDDDDDDD2--"" UTTh"f3^^^L&J]*V1qDVX7X<'9Sۃ-{O&ʇ=߇;{s݈Xt7_#>\n|6u~hŊ̞=ʲh"##l\wu<ӎӧs뭷_H||⒅ 2uT !;;%-Z{GN8y$Æ oTȑ#DEE5j"""""""""ra捶>ȕe1=8;z8Ȩ%^[ZjRTTb!0077:T^RWZZZF"-- /ڗocyx"eۖTR*˓y衇ؽ{wKn9sR:tPܹs8q.]y\:t耷wMhh(ĐJn+d'SyYۤLshcGѽW KDD uNiF#.VEGG#}z͒ Zk{;֭[y뭷jo9((*|||.ـ??jҘ1c""""""""""""!j&߿%ꎾH3ǠA,[z5;vv|õ):Oy:(d`6U`Xuϓ?U1LQm6NjҦMz=bf3AAAֱldffO!"""""""""W/%EDD1Baa!~~~FqJjj*)+++~ri^T_AhL;MP?~鏄Dg;XӿbXz_1 tӇI+r/s̡u3fΝK-'665kPTTDvHNNcǎ1zhΝ;hu]… 曝Oc2x7fԩݻ@Ϟ=Yt) 8˗j%%%(G￟ԡEDDDDDDDDja\֭[ǜ9s5kIIIEEEfvo_WϟϹs0چ'oG0U൚?Mh6yxg=a}D>7#\~BB:t,8(7ڵjeɒI7|޽{sQٴi 0[V͛`ڴiDFFMVV!!!N- '11vdDDDDDDDD.,""L=`F#iii,^pbbbgyʂ HJJb̘1My\wK,ˎwSr:^-sHDv#))t݇q7qqq޽}QRR/NuLi=`222erQQQuj#"""""""""Wm}j7+!e&bzq:vpQݫiQ5/ֱZaX ͭzxzz[Le.cO~~>]tqZrUdggZR),,$77:]&&&TuV니ȵT^V6S5itշHݽS3EDD+H```hʂԟ~~~5(6--3f(+""""""""r SXDDDD;;: iM(""""""""""""""W%%EDDDDDDDDDDDDD %EDDDDDDDDDDDDD ,""L?~Rc111 ***8|p6-[cǎW*DidJ4SIII8`d999t] `k"""X\\ tחxl6?}^DDDDDDDDDDi`fbPXXj޽{SNW&0,4XDD[n_|VÇ3lذ*m߾}b0p""""""""""ҘiFMpp0F4/^Lxx8111NN>w}DŽ (Ri,ZZDD hSMTTTo۶mDFFpCFȏąd, )))۷ "ƦH3d2fp1ڵST, =zh0EDDDDDDDDDi`fEa6d21tPwTo &TDDDDDDDDDD"""PVx)**b[zӦMkDDDDDDDDDDrQXDD26u""""""""""ri`fB `fB `fB `f½㔖:`08^gffrq<<|xS"""""""" """TRRV"99e_ fϞMNNNF|}W<%v'jPlaW9k FaJ ͹'a8y߿-Y?lٲ2EִnM7Dff&{eݺuMR4hӧOj6u("""""""@,""Ҍ1hР*˒IHH`,\m۶1v+bYm`0ihp*-[Mp+,n$ grƻs 6(8UL`o7ʙEۣRyIY<}kz>>>:uܕ۬ߟjl6233 v6m޸n:ƍ믿ޠM&DDD8p!SNaZiӦMrWc6 NuwM7{9rd-"""""""W%EDD1Baa!~~~ v1AI+) (ʳ/{ApN_j8}ƣo WגvB"xS4~.JRۗ>{V<:hHScO4Zj߾lbn+2wԗ:Img+kZ \yv牴Sеo~O n[u2.]3;m"C؟oO-Fx`JL>ڞms; T!/s0Vgc>oqضjax󇁚f-[Ɔ f"003gO3\ /0gZnM~~>cƌaܹhQ'!!|r222ZΝ;;qssEDDD0tP{F/66> <<*'k?>>X֬YCQQڵ#99رc=sa4)++suױpBnfk>ӘL&x :u*{`0гgO.]J@@Kwwɇ~Xm< S|H3n:̙ìYHJJr*;v,_5III|甔0x&ᖽʌSݢvύvZ3#>(z3#yB_G7w7~mFpp06_<;v˗qFfsDFFfͪWf]v~Z,YQoһwo=Jzz:6muJܼy3qqqL6H"$$_|ѩ~uw~iӦjaɒ%|74kwhc4IKKcńc0ˣOOk~Dzr|NaQa%nC">?ŧKٟN쐮%کc}pyu85G3}d.]\=ڊ3`0G8r ȵenyJg$DC&֩Ymx=Ϗ+,w #H yyG;UHQ=i-%mr`ss9Y[{W1xggH])r*k Ϟ'qRC}ybZR 6)>?voVj[ W5?{%66//F8v;}())חJ&LX}?~{~z<3ә>1+{С7G};wO^^k׮e˖-deea5j/Kwp9ZzՄ֣EDDDDDDJRXDDDEEq!bbb?fĉDGq| Bm&+(9sv`C8?sҞ(ݱz?;VAY1m"Cjw+/ Fcΰyk <))tKq7t؟q^ynW6333X|A7n~~~L&****ҥKcDy!!7}]&L-{ǽˮ]oTWt9fOOOL&SO=EAAwuL4D݉W^R֬YÐ!C ]v`0xǜQ^KU/W1QXDDG=(//'</CɐΚTAN16 p&˞mhwa RSN@s(:]H+d״ d>xz{*C){ 綷I$\l_]?o7k Zű,9?[u/wYOU}9s}n۵kkl6[GǼyɡu۷ddUmlGɓtjRTTD˖-3f SLaҤI2cƌ:_ILL$%%?~<3qD֬YÆ #""l\Ғ?T>999DDDG{4C& GرcѵkWlb޽}Tf٫kٹ>4"A#}WKv;ط-|{m.tf!% Kc(-?⓿';==mO#ZUsc:\|t)Wf8Wmټ[xS%8]zfdl$q:w׾eݤ;w{߽, n\liaꝍ|vl_WWhhAǕ_S_n$(܏J|Fjҷ}߽ؿ+qѳgO|||o+ؿ~RF"""/Ծ&ӧ{\ cZٲe [lm{9s&m۶tܹܹrRݻwϑ5k1zhF9s{01|pfΜb75[;v8˷>'"""""""Wf4C,Zٌ&C:fĹ3a>S6oތd"((gMy(lqL .?TvCNtz7wǠHC79+>~u}t8GdrlIZp]0&G__s:"6.f`r9?cLJ\ @ϡoG?y|8| ԏ;/䡿O`VџVY=vsٷ0{7?mPzj ?kvύlw k'sVc\ nL{X/>vĉYb7xS٘1cxgm۶>}8xyy)SPXXȫ?^k!!!3 p ^fc0.?-ZyU*.E+*J)?{$.?٢2|<mھ&?W+Ͽo?ӄu f໽y|zo޹QǕGZZF"-- /g6&0"??|td}]X,:t(ӦM{u>|RSS֭[ gZyy9tOOFlBCCk]ʹ:ҡCG,5'|[[MO{W+Ͽא[_oC[q:?s+hyo4wwwtqmv]hժU֭[ٿ?wqS4f̘Q䥷wސ3\@@@@nݺzuCODDDDDDD%EDDDÍG zalbe7#o]Ю |J<ߡn$?hnEd:דO>x.K/U[(Kχd.]JΝo*wQ)),͘1rȵCK@\AWr j\ZeX-3 u\_mfseF=Hi hGK@\~Ms~qwr\I?FQl$_DD:~8Nbbb0..7{1233iѢ_=W:FoGiam#CzO"Hr=J||<%ٳgӦMvZ9N@y~,|>cG!3ڹ' '׽jWΆ60 ןÍCΩMǥL.@ȆؽۗÇ7u(""""""rhH3ĪUHNNv|l6G Xx1ܹ~7a w]u05ުbƎj_P؝trW?pͣuӷk{2ڵCrxpZ]{`Q_ ӱZߓ\DDDDDDi4hP$%%q}f;w$.. "m3 1WXqL{u\ul6(8UL`/c 39zyV/^8w7yKw%ٳg3eʔ*gkl6N:jM6N, &SFff&mڴNnn.-[e˖֩Nm׿ Auշ3Тrp6ʄ3]BGX,''ϛ: U}{~8~ujQUƕz~[g? ~n{`U_i|ڞty'0b2%""omϮ(..l6T9rM7n:Fltk),""ҌY, hGAAfv`0h߾=lxF_)-<v{ҧ>JL<:hHSS4~.JRۗ>{g|[Y ar:A>;q2 ҍG߾/0,[ 6Tcǎ1zhΝ;hOjj*v//KvqpwwϏPt8Ç?~%B򚪯} ~=ρ2ܕJ y~/9̓c}⯍̞j^V^ ϔ!ԭ5j^`Μ9nݚ|ƌܹsi>@㉍e͚5Ѯ];Xu]… fOc2x7fԩݻ@Ϟ=Yt)  IDATHBBdddd`ZIII!*;MߟxPDDDDDD0--""Ҍ[9s0k,/ :{XII EEEW<'naĽLߏ䖟 =YDD_Q_Zuu|yV>L}8f=oPK/b"v #ޔ9WKk֚IA՛.w[Sٳ2zUo&{ѣi&޽;,X(233t$_m6ӦM׿5}~!7nt:?c̝֭;Qn6f6[,,Ko,k,LPED/UeMe6$eA`ƋOۓwIY;سͭ`tǿ \?̇YVw3sCرf.2 Q7d:'Ҿ۟ \,k`oꐽ}݋}ꮿM tX o?+1ާ#'gZPM?3?ۣm6xOxrm%~Q[CHHH ;;CŁx݋l6k.Ovv6V%K.XغټyMFdd$dee‹/:ILL4H~شiSd曺w\u4XDD=z4FXx1ЪU+:uի1b|wN5w&cw&LPOGh+&6H *Lf<.H>:s'WZ^4f`gbv%;(+2w{ 39zHD't)Ӊҕ޷D9 zzxxT7d۶mӇZ'Nw^֯_@llcP/}_L@@SLaժU7q:[NzzU]^Z|ߟɃ[.&y/ӍWy|}r7۸>6}a GaN{Y]?r oZ9'3S؇bM5rq`q qpp7gK}f+{Aػ 's/%: :q裏sNcڵlٲ,l6F_ W sTի GF `f_FGGšC`ɬYK9vXS{Ew$vGi9uD{oIV^޿VK{6Vy1SO=EAAwuL4Dkq/77p3[v*lNZnMNNN}oI<4b裷_`Op c#grIGpk8b@I!+/{z,|><$/t鵮KCpc<mIp8hg?µ)(.w$h_痚zmv} ρAٗ~>~us*fۃ>HRRƍDE<==;9>>W^yBJKKYf C !44vڑ`ǜȅ/uUߏ \c\;t`VZ1~x?6m\鰮bl6{L}vn:c=.p̸wj߾=gΜ,ILL$%%h2ӧҽ{w6 :HnnSB)!RTTXⴶ7{5qeK{]C~?r!X~IԮaD{q{b)W]A!Aߟɵ/ܘFW3yy1bWX| ә eo_K$d룦ks O@Ǯ0ʏnpm4|0<7o999ϼ}鳬XJKKy8q"k֬aÆ HDD6+WVZj"""+""""""͋iL&_P;v4vSPPسuү_&J)-?⓿';==]{FTVWKvՒ+.p.K9m񇷷'cBוۃ=y}uamyT?<`*8m}x$}VGdw, ڞ_Co 5_7[-_/~Q|  OȾ}쳺 >F#qqqk=#F0g 0̙3׸ouvQ,ߦC `fE1sL^z%,XСC3:vͬYxO~Û0˯}t8GdohЂRsEe,~q%_\Ia={ ׳U\7w!cO&q%ImSnnnL8+VT*[h[W^tЁ~q;ׯ'N$&&Ν gtڕ;۬_N:ѱcGbbbx衇ӧOO>sΌ31{͕_ȑ#.-[^^`H8s7\/mؼn#:Bd L|lۥ . m gèx3s?bOG~0j"B`YQ-W_uC8 O Cnݥ>_7~0s+&#3r\Ucp"n\p3͋ Tϗ{9∍Z>=z`̘1dee9}.\NLL ۷YbsOM+""""""ϰsF[GDDJHٲ}=FdTRnQkJQQ@. /}Me.q58WRcB>tf!ݮKKKcԨQTV\\Lvv6.ZLFF;vt ;Ν;lj'ҥK6&&&TuVо|r]{>r]緘I:v؞,:5k+̷ub[WŐm_>KQ5ikRu#4%??|tRiKW4\:tPi$''C{*>EDDi&`*F;;z86VX""wj`h4V/W(@XǠޢyGؾ}{Y~~~cǎՖU맥1cƌ&M>\޽GV}WkNMf4]lgj=jפs5\-4{ olݺz+""""""G3EDD+9rf/`+.mpDDDDDDD ,"h(xz{7u""""""""""ݵ񝈈wQW_3ol @eV"FuicH^+"W*؂?i@V@(KY@HD6,,dd1d#8G&sNA#""""MRXDDDDDDDDDDDDDEhf2ΦPqsss_W\WZt)vb„ s= """.˗K^$88(N:ūW\sغu+woo߾1tP ֙',""֯_O||W\KJJ󈈈 HHMM_gѢEXV"""0a}ɼk,\: ;nPDDDDDDDDDD:qAVljjjn'00??nNDDDDDDDDDDvrf3 ##LTTwqݝtqA2w\͛ba˖-ݜt%EDD\W_}5Fwww9uT7g&""""""""""]I`SH`d4ILLd޽l6, ;X,jkkEb2+eݝtT^u-Zj%"" &8ڿK|M , 22~;N OOO8EuSv""""""""""Tqqݝ\$XDDDDDDDDDDDDDE,""""""""""""""Tq*N@DDDd";;JBCCILL 9BEE!!! 0&QϿNTp:x|:F7/z+Sko[nƏ߮#Fݝ۷R/~ '"""""""ʴXDDE /m6 O(,,t?lذ^z.;Ϻt[]f?t5.;cבvّ}vrss=5kxgٵkW/5ȑ#N :EDDDDDDDN;EDD\뉏'-- Ш}֬YDFFP__G>sNYm?hG ƆZ-M,Vc䋗Or[)G <|=Sl6ZD5zuzۃ-Γ[RH ^ *k3ܮ5fw(,?~ލߗo k6~Euf﬿wD/j[C&BbbbLHHH{Mmmmq[kFaa!~~~=y~ ׷Mm6DEE5om޺y 6 nY`_nogC'aUT-#q<ܘL ɼ[9%G[o*Lʕ+t:Ƌ/~;9w?8su*}馛pww' p;ggg3i$jkk)//'&& pw;~""".́Xt)K,aŊTWW7ٿ"vϹ7Rof>]f8"q;;)!wO亥w2?fjr su+VAogD͔2[-r~VpUDz}/_Ƕa/ ܕYyS$F&wzrWoP|Lj$n" |sXVVlyῬ%^4[,d|uc#Տaɻo8ƚ6kXmؿ3+8|g;g#"Տ9}lxܛr 'yٟ7?/ 36DŽ୿:_Ky ++|V+k֬0ۗ| cmߒٳg3b8y$wW^qz ?>~)Ǐٳ=ک63fprIVZ֭[[?p@rssYr%撛t7==aÆ5ёcEDDDDDDDXDDۏ---eܹXVV\ɖ-[HMMmd2'6jNl:H2?w'ᅫʳep~aM?j2F+{sGO4ynKi/X{5<-7Ó/OhC.@ߞ1m?21'pqTKvΉF~6ZF}]x k~Lwc 7bӿXpmʣ9gڄo؁W7Wqg;x 52OcX@QE)dN^I6CFלʭ5uTNÇs)**bÆ ڵ<'pN͛7xb:u*}C5c[ڟ~#22g:#777ZBf g IDATg0$"""""""C ãBj?}&\΃_(+;_]68c7T,"Omy㣬C(8pʳeߜ ?xO&e}\9[G]8Fh04n\ٿm"2^=t8MRAyqEY*WUC"8]tɶ`  EouמS f'8Һ)nl߻ΎowTLL}?lqG6Q۟oa!QQQ(ۖќ>}ɶGD Ŏ 8p`F]֭[YtimnEDDDDDDD/t 2$&&w^l6L{wޡ'Q[[8:=6ds{4*Gۋobﲍ]1/`Ml[FWf構9ƧXiv j>~5]^!mP0``0PNgݞO>]Om}fO?"媑(v>b6flS촱)ɇd>}`?zCY+Xc&]9U=02y9'qn&Occ[v7i{ĉyꩧX*WTTm۶͛\ǏgŊثWfҤIN瘖oAVVݻw;?66 lviwv|^ɡu}5⯈,""RSS)((`ѢE,Z 9'Oge,Xuֵ{s x;m)/zw| }>~"oQ>/mp:ވj+xGR__bC"1I#n K 63,a ?3o!lD^aA۹6dDRDDD]HJ൯o6qoII i֘jkQ[^MR< i]-,(jzNWUQ)9_N2b1{xW>r;sokTj). !*fnbda>h~ >~M-Ng˪SX~(Ӓ免2 ۻ:uwlɓ6f'NG:~~"""""6ښ69e6} hWG3pJKDD: gXDDյtfg;з7ܽ=K%*;˛wws#!*A~:: j1NBBB⻻ӿwt]DDDDDDDDEDDD2kKf{sۨ7-yf.b6""""""""rSXDDD{_>so6s7wfی<[DDDDDDDD\ """".ͭSpw_EDDDDDDDy2 """"""""""""""@DDąL&$44Dܾ ZTTDnn.); KKKy8z(saĈ)""""""""""QXDDE|r|}}ի5kߟi&fϞw]x1۷og$%%uRDDDDDDDDDDI*OZZQxjd222:cѣL* <<<@^(..n3gdǎTTT;; """.h4޽{lX,233Gb())СCtx>}t,"""""""""" HHMM_gѢEXV"""0afwyJ|||f\s5h4: """"""""""rq,""⢂x)))G[@@sgӧ;%qq Oxxxf̘ڵk߿?[l鴸"""""""""":N5rHΞ=Kyy9>>>ݝ """%;-""""""""""""""Tq*0ٶmX,&>}Ǐ_~rrL<1L9vw:]ii)?87|3vrO>qG#3fL]ENN<SL矿h.]4^y啋64Mw2/_/z"33`+,,dŊ9Kyzt.ŋ}v/^LRR۷sW;5k8~8)))jw3g$..Gy={^yofKZZCeذamniH`~zIKK`04frUWqɋ`7 /DHCPSc%<ܣK?x'gv7ESokK٠<Վ=ɓ5jT&Bbbbl6޽{S[[켭???ZF^^!!!6jvoy';vlƷ[_\\qqq$%%qEDDDDDDDqAVljjjnߞ={$::"g=^|1AtN{mذϹ/IL{4 _82{̘ V.t|}}GyQ͞211(,[Ńs:1̚u2x>Q^nvz}Nr{9r?c':zgC}}=M0`&99rG[~~>^{-/'%%Nѣ gG2z]fJN^H*+-,YӠj9XlX,6n 8؃k/'7wsfW#GbXOy\uUj8Ǐg۶+v 6ɏܶc\YYwmdeeje͚53fзo_#,, i̞=#Fɓ'ٽ{wݰaO?={ѣG;f3;v<*++YdIvb4{gwS֭[Gnn.]\?;l6fk7N-""""""""OG@zJKK;w.V+WeRSS{hMSV*dڴH elt ԩ؋ӧdŊ3,X"gڼS#:5m/kسp*ƪU6m ˫1SNܧOΊ+:Q=pn}))),_^z1v.;ZDDDDDDDD ꫯh4b4INN&##ӧO3rHN8Aqq1uuu8qxܺ3.kb-poxQAAid͙Ki[nɢGlY?[fa6w;wNrr2}iOPPsOOOL&999 z+JUaa!6Hk={ <>)((h{]έ,l2ݻ """"""""@`CŽzUUU|̘1% ^>]bz ž/\^nqegt4~k rgٲ~,[֏?$% ))zk[t&}] 3gd̙$'';pw:111l6>w{xx`ٰlM~7`(^vA5ќ>}>EE...nPt1.\Ȍ3xۓt,""₌F#ݻ͆b!33xٳƍ#88ٳgwjѩ;۷NQpx*~ ZU@mFzz))!xov(̙:6n<שyw4~kw K@S~4}h.>}P\\ܦ1L8zqomEE۶mk/&&___6odڽ?~<+Vp^z5&Mr:Ǵ4x wngժUb6IOO'%%ώ;8s 7ntzngt43())q9ӔcrwiniEDD\Tjj*,ZEa00aBw (//ozjj(1r~ KdN6m*m'!.Ml.VxXGۃF鳛Lƌ 7 #.nÆs ۙF#6_гgOw>^^^L:ooF[j饗شi{&>>$f͚t~̛7#G2`"##ٴiS> \vebZym> o6}!551cƴ񴪣Y؏ 7رc"dlVېc;wm#)yH׾:z%%%xzzߦ9ZciS ɓz|u啟1wn,^LU8FcԒsgۢ37> &= h&Bկ~/rƗQXXH\\\V?uklɓ6qW2w\Zk4Έ+w޼+\=z$߿ei_fӧ߀}u0DD|""".44Ӌg|GdHbo;+~K pcn+̘1kҿlAAA߿ˎ*owݝfc6Y%11ͭ逸"&&fO8|0>""""""""]L築]L||MeeeIII ڕrrL9:U˸qAfLW:ᆪQ^nvz}Nr{9r?c':zg-im>Ix&M:HBnM;LMjݺ0z~bbv1}a l: r2,vat04Η_h+̇a8_II* OLc^Q0m]h0 5 _MڧW_m8)01^} t>1Y?,www`EDDDDDDD.2qAf8@FFf(|}/l{۸q#7oj2qD&LЍYw 1ImA0se6Ȩرḹ?>%KrXj9[,6,Z ڵ3fL7m_IV֏0aX{rj}SW]ՃtǏ6JJ=VƎWpX6<8c3DEjf}ٰ~/ [zz:K.mw%&{+++n1AS3q8e kg__ `z/ N֢ Gӯ i_;pn |xcdIBl_cw--ba0dL[k_ooFw\jTqA2w\V++Wd˖-phäӳgFU 6-Agzy>=Y⌣|):5q8Qhkm}F={3dHG Jy3ndg_yޫ(O ]QaƆ ص*Ll0yr Z/7"ohhSkb֭ٳŋ7'^MǸqcC ?lׅSٵvmN?Ȟ֏/ LG|T' ?8_/_N^;v,]tH,""₼шh$99 Go.1 IDAT @bb"Gqpn{'"½\ZG]67gN,fn%R3epww"#lRM7df߼f61a0C5tsO.S9h=԰O7&\yH(>|8>!< ږgWkssƸqXl[`$,""`pg4:QtO׶اyqq}AAҦ#[/(ȝelY?>s/`׿ّDFw:T11^lW8v֭[;|̙39s&ɼ;}mΣ9=ceBP =l6{p G6,&2ΜnOiѷ>/o=<|*铴ۥY3.\Ȍ3x;LDDDDDDD􊈈@Fٻw/6 Bff&L&N8(Н:uÇӯ_LSۗziiFYYUݻ+YZ+fRRBmQ3ulxShg穭8З6m`(.WbݺGDx0qbO=uqqEmʝ?uTG[ӧmӚ>Z l[~1#wmn:Ns^bFOr>ip,w7*0}?F\hw?/:;6:?3:ߙ9hSƎ˝w""""""""H;EDD\Tjj*:-j o&f///L&ǏgݜuP^޸pʼyq^(.ᇣ><'!.ۍ}ckyshhogۛ3g0!np~քyxol60}azIX&~8=]h4:s-OLq=!( 7 Oes0a0b"|K_σ_^|~7 2ؾ0 mSڋ0dL{R:ߙ9fxZ±cHLLl[PNflVېc[)"""m$%iWGwS6""""""""mק߀_6DD.-,"";CmCDҢ;EDDDDDDDDDDDDD\ """""""""""""".B`;EDDWGww """""""""""T)T|iw|ullq*-"""""""""""ݦNeԙLl2xzy_]:EDDDDDDDDDDD\\qjk ٰX-CXdT%"hVS]EmM5^>w `0掗75TW]Di*exxzbbw <<=9_^v,"""""""""""L&ܺ,э:EKD;EDDDDDDDDDD"mL&?;>n6zOZB r,-IDۼ. 4#faZZojsɥ NK`KЦлw2VՍzffՍz5/OF\5?_N[~`2u@@\ԜDDDDDD~L:;F\l ګ8u4qqk7;ÇǗ7WF޽9t(u'?ӼKj j,&buZxo׸6oi]KODeZ)--#4Z.鹺ڹTk owC53XmVb h|c9iLōt~Tכ:8c5VV+b1`0[ <}zp6,obDvoSUE!~ɶ/>~C-7PErd% :FH|ELDDDDD\i)oOt6&/ў{7[>MSn$:W)**$>7~~=Ɵ򷿿ƨؽg>0=ض\^Xqݫ"6S-؁?ëwFHM CǏ~ρ>yZZWp0xp:{-n;.0=l\6ZE@ޝL]ځ6sx$ijt,=}c[6G߰kOc\V>~yW^@`K`;JJ)~o>ΙGd$fs>~^v;OX>KHpprK˙_Hd2jpC|_bw &O=1goz%O^^$\vw:|B0L|q6w#22Xa#3gyFk஻~Ell<7t+[YY[ofrv#+v/U!l //7O|a,ġ%iؽƗ mugOk^jMQkS=K) ^K␙d{?}3i.JeU%++ensocX== ^E"[%w2,J?#5U%ض0j?[L^u\ [lmDGLJx|W%]݂q߂̥% ܻg|FO}CB8}8NQq1ѽzQUU'Iv疑ɰW5۞Mf/E99_Dz3_! | w.βiUU6j;|f&aQ `>;Vߣa44OSrߓ1n&ƍxdl[R[[i?H1klֽ>ƍ#66?|}GQ%wH("*H J^QD"*H",RTқEB'G\rer|۝}:7;׭qp+ܹv|>R*0(g` +W6(o۾>}@^nN%  ;qUxTw ^h$;;|[;0  ˟z=qJ{eyF2tN_ )$f.Azj:Ye d'؎фؕEPg&zTd6X|+udx׷]HiZ|G!BƪqK{K1R͜9 %Aܼ;ģWru~Blo z=>.خqrLŚ5kh߾W{   'Ot8MjȺ>::uhǒRGVmm5\z$,jTIػo?7JlL ? K,$Xo zϬ<& }65e:~; 뛟o /_O^k9Lt%a$6x+ FrwYO }hHN%⓱Ȳl$^IA'8J&'QjR~K zK&Zu䒓KNN9_4:Vd * vg]X65k7殶s⏯9.Wt-٘hA2ο^]Ξ;oh|TAdZHKB{mtWdY&j20u6i̖߶a$ yPLĵL kߴ̚;JENn. ~))9.^B̰ޥbqAӼ=lݾހJbYj5AvNݻ+/g-DGG~ڶՁ̰ū @ \Lt?O/x k)o@˰6o!<,tڴj_EVrjŔ c?y>%|IN:'ǢRe, N_Jj*NɓgGrGʊ|9b‚h(i*p1/[nmv^rlع{y͚ù 6ksM2ym|u\mNMٲm;^u[ɼ<7w.&}f Шэ6iBrJ*۶F\v~πh2˖voi(- 2?^cO?ѿPVBm뚙kʝwzgW$e߂ ſNzJf3:Ym:x'Fdd4Z>ƃ>~:RUI;rS3绝kh=AwҵDf XޜL.AGYP3% h-MutZ_/Ny+<<|=|2Q=wtkN19r>jwŬTi4=A?*WwGa?]vm"-6Ջh^k>|||/ Jf)G$^zmjPA/>h1wCyZ}gL2jܤ)Ԭ^ǍFVh2>O5sB7i ?<_\Sٴx*%\/Z͜: Y \6_#9%W^|7o]܁_|3g2i(4 O>0}4ylGetr_-\CGtCG+/,˴mՂ]DfVaׇz}[Ю͚4fнe HϹvgU #;ve'+D58|!2O#u!OgX_ӶzUpi_D.p?A{;Fƍf?$Fv0%Ξ;Io| .^t]NJc u&IEQ"#GOPٯS&&{>̆ޣg^̗$k#@޽gȲL@@zVGDFFVٹsSNʕ+ȲL޽1SҬ˷m6,\-ݻwi޽{AAF0\Z] [ҡs1>824hЀVJbb"5j ++cǎcoq$dx͆_~D8W~n-K̨OD<ݟo㇑<-~aDbG~#mz=FԳ{m wAށOh=k,Kb3첣eE/Xd&onlyc[w5KZӆVxUsTgx,l@s}䮁5\~l˒,Ki@qT=mH՛\*m$.&/an-1W`AA(5.OEPǔ'SI$ca+4ԧ}@|ņ1.(N{6ccb0dffY߻}cJEtTiŖt*jS|^f rrs ]Z-+j ĕ+ǐ^g媟2}&5W瞡nʒQȕ =*D tǎIf:ՏGg4#=r*2#Mn0LdgXގ̘3z]{HPW󧥧bw ஶ_rr ߟYR~h+MkWdʲ}O@kxh 3N#wK6B>>6*P~\ڵky JEvv7n]ٷ?'00u-uج,?>B=7VKBTH{P1.kعKYv IDATxk1Q@*XξSH"8<6U-f.XA^z-?E뮽X>o| $?߀68vEP{}<`UTZYo+e Mr=Oӟޏ)IzqZ.",Z/oP-ICެ7 #zlA2)ǖ7c P~Squ(qh#@h3mzr2wwNbchX ,$_#'7 oJj*6]et"##$Iky%աm:mCnnKd,[Ԉ bz`2K|x/>jW/xʼ| %C:'sy&Lb& 6s_mjȈsr i-C_Lt4:ΦL+~$^d>M=%I 8} v]lQj,ɊK6B<*h+=KZj+hذiʗ/ONN.Ӭ_!4s{q&q fVMC۶o'b͚5ž[f coE/`AAI#9hJ|E. kd"##B OPT =pL2Pp~iysg9vuXAeYСCtОᆱD.,Cv>q8s&0(ij'3#mZ̤4oƼV hѮ"~%8^\I~4rww^}XFW3Iy(1 Ucϸ _ +Go̜̒ob}i*ߑd6cޣ\RFLL47q.ޤ^_SpAD/L)Z.z>*Zg~?PKҐ7j̕@Ud0筘D  AMÐR`/eQ:u$wm'@n ;OJX{>&-u6 bkmN f_%ڋ ]s^Y9r8<赛,\9[׮s{\.Dՙaƣڍe ԩ&9Y*SRc6KdffϹغmݨ б}[/*R*&xGiu~Wۯ`{|u>a~6Л$ftlt')d9קݭX"ǠR;5_2h4q%'NfꄱȝRR2OdD8_pXcܭHiWj5FQJez)Z-YL>{;u{vڌ7c`0gwK_xYX"n݊!25WwjiӦ|PLVE$K˖-ѧO_֬YCٺu+ݻw~l2wNjJuAA[f3=/5]aZ֦7''-c.^PߟVQp֭hڴ +oŋSMʕ+njլIR$ɬ0:uz R7i֒+HKBѐ]w5rJn/+Zt,6˒ľߦS`}}x^?lk9|!SWF*[`(*/ш1WPK9.0DViuX,Ւ7ldԂN* 7zh|Z= [e+XlZhnX9&S>5ng;t;ϣeҴ,[#M7ymM#7g*9,둇F?Cll,iiiՋ/Z믾䩧fSn]̞(ij<#DFF̕+W:e * JV*oA^r;PW\ƍY'qYʕ+ׯ˖-}9GAA.YQ4h4;ufw`4l6K{SnZ|K``qIZ1yy kFRI3TQ@TgOf[oRfRmZ߉,˖ѡ$<@rE|}hϿHH+#Edp-[5 WoQ0cX_Gw^:m\ln7h%z5v?G___|||Jri.Sz֫߀+/SBE; U- IByr CZ‚/f}N%TH02k8Z_7Tb0hР ﻷɍA4g;4,qb*|7y?gyn\7wǵ-o93gx<]ZC97)hZC ^M&6&ZQE6//kDEF{;$I"9%LXCmd*O%P!rid2Ѥq#{Pa)̞?BBCʢO^6={|@׷.o aÆhZ8~8իW[.Zk׮S'UիW)f,/,E!'ΡRKu8xT*by__ab[=(@ff'il[O x^2z)zgӣcʼx@egg d>Ԡ!6?/wT+e9>Ӻiח= ٣e$reVNgMкfZ0}Z<= ~TdjS:*)'u T Owb/JI,z BYtVDgVZ"(63 ouh]JUp;(LJ*UxlwoG m۴>  NUv'իyӗ͛oƑǨZbclR2:#GQFb{IFpq^"-Z4gؾcCӡՖpp}n$'Ҿm֭þ9z8= G,%:6d~7bz#,:'=]},7",#q/S:3|{Wb_ڌf5GM49ԫItT0z JŵLԣ3Xz4#>XAJ^'~M#8yA҉c#Zi^lsPR+v{6nʵkЮMo+K3S}& Ӭ3/a*[FLFg\n XAAٵk7SJ˖-}7j5 |p$5Kˉ_ĦH$Quot'3[OXJ%\=,GJOܞ'(oҐ} ʄF 5f _$ɗ!8$#o=A*| gOG 8f~ہU!˖*YVj`ܱ sA䍡|ÉۛZ^JQ   7C˖-l} {gxgJ9USjvrkJYʫd:Xi.f4zQCn PbgIgygؒ?+7[׏AAA?OY2ss̒֯ԗ%}bmAAAAAAp^"2FR_ `AAAAAA 1M!KHefz{KsY 'AAAAAA.W\33uȒTTj5~~~Dx˖%BqXAAAAA[\r:R=[*JSZAAAAAA_B4       K!A HI|C6f0 =    XAʀ;׺! ^    DbhAAAAAAA /<7|B[  '\xrm>HO%"*E)gj)wCx&MS^)s|;7mnEn2Ya#!<<АwSg$ByBC?ɄsIsp!N< Hd$44 JV9%]/w =տ3ghҸw9mCdddW.__ +\IJ?\l ƫe{{qwy%5cX|]N-f*}S#&EKgɲ㹒p} ƮTGgNJ;4M7h\]hnu:]Fhh(AA˲7G19[xh;yz.c@ svV4oյqN 8ʅގ::vI\=gUkgdOrks; u.u]9LgSƀAR^=ԅ(ΣOtZ8w<=YTve7 =cE񻚿_)v͌9sOۉWpY\˝A, eTNn.ƺ 8v8F}J8&NJsL2i3g|{$]M㑣8~~~0꓏\"?۟͛af$IbØ8e*Z`-zj5S?+ٵw/YY~o*Ͽ c>lڶj͛ uzƍwX~e"IIMC|8jq cj ]l_+~mt3͚cSsfMh̦r,0mvݡ b˙u.ll޺9ӧZ?;z8~+' i^ֻ/FAz?q7?\G;wqibeKĞ}7r FttfMaM3q͜ew_r+gQ_`0Pn}pe?^T*ԑfc@I\})gVaİiѬ)z?q?J,Ktܙ?Ȧ_V+[ӳǟk6 kִhۺ[uڝUstӗQgo֩ɽǝksܹC8;7:DYcaEJ꜅;CG2tȻ6~gxQ4Z )DmWn\98~gT(o*1WL/If$%Y!U& w`AA(CL&{g ڳѣ[&m[o>>2荷eztzȲ̨q~^z?/x{dæ_>k#ǎstˡ}m|,ؘ>.#ƌFjlmgϝ盹mrT3G\}:Ŭ9pJ-o?]G֭c6l{%p3ܽnpJj*r嬟ҕx5ٳo ޳*Fg$IOGyW0P&1}rit7шFQ׎]jLajF%sm$I㑣@}Iz=o3f۹ϙ3z֯"G;wgRxy}~\렒kJr]87gϲf:~;BeEe[wlhwu s\ζsaJJ7׫2 t>_|MzJd:kc̴N׷w/"#"PUz$$2DOPTU+&$Pbg9\v{@vmH<,[*ӧYa#6m&22GzΟ'l">)gϝY&c2x=RR y<dTP 4=؛˗ӮMjT4%wiyl۹ nв>v^Qw:''HITSR WHff[JIIAӧcNVMJje_ $8>('sydymڶngҢEs(I2ϥKbg6)AR<<_ J C@ BC0~^=]:wbM{NACnUL^|1s6F ܠ#ǎwdvuחķtr;%y-Uؾs'z%˖e~Jt:=$hi¦} j;eKWXwT K7 ѮM됈H@Nn.Gg؈=~p4WLVG׮Cdnpm*ULvL1|.]w/Sj"KUq1>z3g1wd+l۱I3y;o{v-:<+ޑ ԭS`2YrCvNG繢Y4fWytv,iU\* |||s>f߿ny$3J*~wA:xzўrPVLKOg}f^{쪼nǚu`?JՋ'2'L1,C^^)h4NG}Al8^e?`$]MEiѴu¿(Y 7A<#z BH]޵ )6|3q|OM7SF~F}&%5:j2 NrW@^xLvm-TIpw=jCː˺  ؁Μy_<6C*ե x5F#J4?h4oۆ?Ō):ӣ[~E{䵨?}e]p1pq~۱xnϐ?ۃ}WIX7 oΔ$xX.̛yv&~җx8>_xnxʷ:AAOr<1|JDT-H(*<,+Є5[mIRq8Ɨ<=9]Vtԑ{ݭxv+\D.uIFtp~e}n[OI1f{ܽ_ڗJOv̥}.kkaeصWYEY0g`Y0o-ۙM@J_Fz, IAAAAAnS)/A+II:AAAAA_@4 BPꝷ:AAAAA6wAAAAAAA%D`AA(g,-^~.\Uaڴ_/PV߼O__ر猫ˑUeKaχ&M373?%q:eFݻwp4]wA*(q孷W/hJ&MK^=z8^n?{J{l/z~sٻʼoDEAPR U6SGYb7V~n֖n[fwn[me.EjFmntg ĨM4(80?pf|jY}kzk6/#m>>+:vkkҾ>t>xG4@o6rԭ[+~~Ree˭z2[Mq8B#QӽsFO#}|f++b)2aw}w 0ܻ};'R +] 4n\?V+}QwǏ7Fr.Xв\mw3\/7?Ο˥}}^oC**2wk-avKoƣO3J|kJSZVZoeeUu3q> yig'_k/\;׷/9>掻V_>}i''~V>zC4=hFx)"B:{(7=WH?qBg$i?4KH~+cհa_}e# 7e&900AzyD/hYV1IO=հ|11ҨQF>ofYi%k]H]zEece{1j~u:oZ>(-_ޢԤ2#~ڌCyno|q1B8y]|}fx;ߑ3lw 6on8'sߺ[2.*z޹ݱ?0{F8a)>VnZ]vao:cr10%\}3ڿw?_\7qt}#av{}Z9ٿg>x4w_V㛫c-W߬9o \?p~O]_뎍5ʧO7.y|Z $t4/d|RH[ԏ6Ek' OH0oػ ji )=8qoqO2~F?nreUW~RSキzvqY<55e7}ܭҾ矗N2~h<>|Li`#"$Яm~&O>TzQ `O?w =dHZ8!j7\λJ۷l;8`ܭ}ԎSzmR_--[fZei`n{ԗWWKgs8YTv'[j8W2NXo=oyWwjOEE6ޱ璹cefow?H_VZjj}СƉ\3Y{}ZV/X_o$.Ot]{'L4KJ;0>vj9:>rj.)|?p=>w=Üb?.]rS+%hu3~]sW?.8'z44W^x~}xSS@'3rWog[xAc~f$^V6~e}o]4_qq|6鷿nJuw|sw|oVgfV]Z{fOk}>q׿$͸$]wq <ת?p" @G3ot_Kqr$(ȸW5dH˸k}_~Dq׮{?+~V\$sq9boj##=;wjEuD_\l<2L5 78zҏ,-\h|:i +3?.?~k.xe1ah)-5|Gh>}dA-+и+Vo'E/ߦ(H~$k-}}t:ycWih w[8>q>}Vc̴9=fS0y ]/YUW^Z3jW)t49 IDATt㗾 /:_tq_ɓOr>!׵gw Bw{BSۯKcQ8ms_?Hc/:ߴet{Ti-w?isqi4kܱCkMfMڿˏ'OJ\\]|ǡˏIŞƉ74Gƈ$8܉wk0?}#$y6}OwYќ㣻\ׇ;-}3Ugi󁧎XF7K6V_?}7_rqd'?WcW+r>Yi巷[ھr{wsSO}W) @G{wW 3~%{Ʉ_^~L2,'$պu͏vi* jhi6c=^я̵VN%-L1Fxb{Zb\=/:0.bsƾ4?_Og^ykIe\J2SO?ݱ4ц Ͳi%ץ׌/_noJuw|sw|4J:[>OW\w[7gd=j\rzzs `:2~;jtҘ1ԩc~PW˖¤ $ɼ+1d(2Me}c=;:L|wjW'=Q㮾`MM5NJ,Yb>6?5pk` ޽ƨC}pI+ndlΝ?X _N '_{Rnq ŸGc\_żV>fX_fGL2.q^?3NN.^l\N<زQH Í[QQq_b_elØc_W嗍x 0N*͞]_nXf[;Vg箾ק8~~V5f|uw~sof$?0juw|sw|lϪ>xoi$Ƹ/h0ej~۞$Yo`-:;NӾ9OFG+:|oc_{\TTc|?m|P~n% {}m Д/g?3h]e~hC+6ָ4-> xCںU;ZO 6?D~3 `U'?wZx#ЪBC5k;Ml,ݗQ_m34]v =lpv5Gz]$$lڴL|eZ|B@5#x FІLoۛ.ԉ]m3.` ^$77W7n\4áui $$s=uk8p@իnө|:r KrrquΜ9׫HԴiԥKv:FE4-77Wɲ_v؋/*33S .ԤI߾}F?~Ο?4Ϝ9s͛7^c߮GyitR驧7 ~@HELCnچ 4zhEDD4ȑׯӴzJ3gTFF6nܨJ\iEGGN#Lp8={RSSm۶M۷oʕ+Ǐ+##0^$**Jlul6MկԖ-[4}tIR׮]5eye˖i馛1y``SNexa$&&%le ࢢ"UWW+**JߕH>|#ZJcƌiQXXhjǎŋ3ϸ7//O]we5t*tnD%''p(++K7n?4xVz뭊l,442{1͟?_{v9_YYRRRpBQ1/ٳG6Mwy:u2ݳgOIҹs甛ŋٳ HlCvч~4h mڴIWyy6mڤK.WYY$;V ,Hp5`0^_ ll;w R~~ &Iڻwbbb<͛dɒF֬YZԓO>)I*..ɓ'5|_^zRuuf̘޽{k$BJKKSnnLӕӧH||MĕOHHpjϫ%K(;;[1x̙*))￯2IR@@W. %%%Rueddh֭ WHHBBB裏wh!0/j(::ZuõgLK.kǏ79_VV6lІ@F/iJNNnvEeffj…4iiii SJJxF3gk͞o/&MoV6m_/|#yQL2ERAAAݴ 6hъh0ȑ#կ_?iIII*..K/zbbb_s=?Zjb yfϞv:*x(fMlJLLl2Z#<֭vZ׮]խ[7y-[Ç릛njCglz衇TUU-[h…$iժUZdIek֬QddeOOrΞ=غi7x-[&IPdd|IIRqqN}ZwVMM9]V'NА!C4euڵ-^0^$;;[?曺iJNNVaaaݴ/*33S;vPTT2 Ç+&&F~h֭ѣGh+F< ?G*$$Dqqqſxb+""Be#c0^(,,LnQtt"""$u8n婠r-dfk_믿Vկ_:$F*))I-ƍ]?==];wĉ[!1x`pFj͚5-hނ#H  >{ErssUQQqMs8ZnƏpIRII~I=S\ۧ]vĉ( i=g֑#G4h -] Zڽ{jjjРܹsZvN8!Chʔ)ڵk]g}۷{:u暶 #"JIIqdi/^Tff.\I&9@_W_ӨQTZZ4Ϝ9s͛7^c0hP~a1B/k޽u/&MoV6m_/-^0^dʔ):t mذAGVDDDG~9MhŊڵkzjybbb:p#))Iz饗-7o{n֨Q_|E=sUV)>>^+VhDEE)66V6nfSbbety׮]U]]8RXXXev]֭ӼyTZZtr]vUnZ-^H0^&11Q6M=e-\Y˰lڴinݪG?Gb[j,Yhٚ5kiiիoRF7J?'бcT\\cǎ)==zW$&&*99YCYYY Ըq㚵 ݮr]xQ!!!N[om2[ŋn;г>^xn>???۷O'NP=<>4^&..N555ڳgl6N:1yd VII{9p8d}xB=$I={W۶md$SRRꭷ(?c0^_ ll;wnoQHl7onK@_ݻw_ݴ˟9rDgϞUlllݴoQ˖-^ %&&>s%$$wi}Ǫ${ErssUQQqMs8ZnƏpIRII~I=SJYYY*//w4}:r KRkw_^EEE8pM.]8si޽[555JHHh(33Szw߭={U VJJӴ\%''nMx233pBM4eo#<ҠlΜ9͛= K*//OO=nf?<--MaaaJII<Р5zh_Ռ0^dʔ):t mذAGVDDDG~5p84{l6EGG{.fضmo߮+W*''GǏWFFF]yRRK/5Z̙37R+WlDEE)66V6nfSbbbl2 >\7tClĈuշo_:unZTT[YY-[h钤]jʔ)N f$2u ͪ*mٲY cǎizg<۪U4f̘F^^^^ۧEEEVTT+//O:|p s{%&&*99YCYYY ԸqL4|z뭊l,44Y*++SJJ.\4*ؕJI\III]>}7Hxɓ'k׮];v;p222uV+$$D!!!zGTHH~a=zT!!!+O111`PXXΗhEDD4Z>k,͚5<Xz!rd󄇇kϞ=:x𠂂0b0P``h"mܸӵsNM8k}~~~H @5kִ~ZZx F  >0 p>M4Iݻw$}7*((wajǏ$uY[nmvkn/V~C$=:un6Yӟ ,;Μ9׫HԴiԥKV4xcǎio4`IRffҴo>Sxw_+!!EޮٳGeee~i޽A֭[5zhjTQQV[~~~nvEGG; Ç+&&F~h֭ѣGx:wN>`d=w嵊uK1Z]Б#GT^^^7LvGbRuu*++]SYY#Gp4Z3Jҙ3gZ~áǏѣM玙fC=rp84{l6(۶mo߮+W*''GǏWFFFt0G=]>|TZZ8p@Fҍ7ި?TUU ͛L'4w\OHHЯ~+=ZÆ S>}W_>STT|Gǎ3Uό|EFFW_}HEFF_wo֭ѨQ4rH={{էOM4IC C]x|„ ={v?|F8+""3j*EFF*((HO=TrWl2 >\7tS#F= T߾}uԩf-hO$BK,O?~Z~S̙35x`=zTEEE ӯk?Ə":tH;vвeL7nv`zVVL/Z+VPzzʴo>]̙35k,:tHov ͖6l knPaa 8+//OGUMM}ݺ2/!!AGUTT{:%v_z%xo_k˖-֭$sL-B{ェnP*~3;ŋgq;o^^ۧF/t9itɓ'~z?VQQ uwjYoUU6lؠٳgK ݔ &(;;[tmiɒ%ڽ{&LЬ{wIRTTܮȑ#/D[ݻ[n7n $yfz?Y={T~~KRnW_iΝ#zӧ"""TXXhzC iQԧO巵]v_'?JNNVPP*++u%S뗤y̙33gSzz:wv䘎% W駟*++K?Oi&߿_ڴinuEq䤤$;V ,hv0Hdd233]^RK.r8r83]ާOXd$U&Qcccuy曚1c>#mܸQ&Lh]%o߾SN=/..ְaLoVNp8]kɓ'|r?~n_jV,JOOWzzrrr}n_s7m WSQQH=䓒qI͟?_ׯW^T]]3fwZ|yhM\o߾۵pº;wN[lq/22Rݻw׆ ]NS۴bŊe;4i:uꤸ8-ZHSLw]⋺[<|wGEEiذaz7$Iڶm[ˈ׏~fVuD PttZw^3եK >\***A廊O+VŋuuK^^-:f͘1CCUXX-[fה0-X@qqqUJJNڬz-kԨQk5feV7oVddV^W^yEJHH0.\PFFnݪp($$D>GYqpUޢ#44lM^Viiuת[n Ç/oUPPh4(?p|]_V_IIJJJ4du;wNGU>}Zt)j+E0Upp[mѭ^~Ν]&vۧ_PKϠ O*3m4M6#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0#H  >0ΒખC :. >0#HlN8axq0N/}wH:pN^Կ[%i/+ϛz[PB; WkTsI_j.|+ǹ9{[9*NJ |r^r+'|s >K\}7_oWCw2-?8jJKo=3ZkLC3WppznW:^~\zO/7tC 󬂃kG{-J cj}M8uo0.۱'v&Y T][hVCJkj^$4i/7Z`/6uU&Hٛ 8 '&&*N7 -)BEQ(yc2.EƟ ~g~R&+}\wF*}Nܿ^m44 כ.@ Y|1KCלHФ3 :ۗxܽ0XFԧMM9zupENiZSS7G zFԳ~;+iO,3UrUQT;TU=ӇzJ't5?F8>=Rf; D]U!eh!w])9rJ.@ i־otϨguoBp0, K0,D8pIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/eclipse_hello_run.png000066400000000000000000007067621507764646700255330ustar00rootroot00000000000000PNG  IHDR$#sBIT|d IDATxw|Uם{sCzBДŲbkYW+ kWݵH:J! f~\Rnxޙs>g|9c'ZeaY`Y`Q4+^\Ƭ6/P=59lUz_aT6٨WP|>AtLqeT[ېJYv;GH[Ѧ> CDWMWW,9nGd۶02cƷ XKVv!Ex}>ÝEѫOgN:e={wWKN `#f#$QPyH@<לV}zc~+(gY NAZ y}LZT *2N̥WFRrl$}92XAttL[Ѧ* \)\9|0A\_rU{>jG('3cu>4+>N (,.73 ϧsI_%EDDDDDDDDDDDD Ǒr}0|oէW_PcQ;p[v^Z1?]-_Ÿ-a[>0lƃ]Gl\to*,""""""""""""r[`1IImF.V3[ui_}Y>be:Ͼ^ÿ"}[ir kWos ; gzEDDDDDDDDDDDD\vCh1sX2'|Ru%z#>>%>>,pƎ ŧK)*(;뭔iQ:k_' bVTDDDDD%x^&LHHH/t[ӢZoa̘㎻<TرsZ*q'oQG!444* """""rhX833M6УG-Zl~)A׿uӯ_֬YMZZwfmۏիWѿ?"k0vIj_s] %Envi /^$**҂4?Ө͟HӒLy*RR9xWY0#a;ܻw/۶/fY>n҂6aup6 #ѯ_N:i  vm\|$$$byyDGGX]@h<=3abRjUѰ"""""ЦMٹs}&55>=}(QQX| K'22ka0?;w4M͛ngذ̙ 4_e%K)))!-;;w"// Oll,}[U֙>9s~0 ?8b0 >_mv[1mf;x#b$%%1{l>Cx`2II/"""""Rlf^lʥ^Frry.fHM=4?0)SܝwލiA׿]֬YHHHK/cƌw+O`}Y\݈ݙ_n " P;6 2*'A#CFJx!Jn_O`"| )r())eŸ'7:=zDx<no8:WaܸsHOZ_ƴi0zH23rH#//T \.77v;ݻwcM@pS:\s x^ؿ?P^xiFK/_~몿Hu'hVV7oˮ'S*%B-SkS]w[G\e`aaatޕKHrr W^y56IKFXXX0",g]|*.-5. Z=EDDDD$QFt #Ub*zI^^>G&>>7xfb.N}O?vg'44 #_SO=Ŵipi /b7RXXȄ ;())fŊ ""QRR '58̱Lp/㏿+._cOIJ,K&N[oǐ^ 0ŋĒ%F޽O(((SNa̘1T |$Ge*^/cǎ駟/gÆضmuYڭUu/^N8l͚|9]٭}iSzueZʵبZԶ^/>#1s,6oތs0 """""\n|>om_~)de'+^_t%̘6MmSUo}׿Bv1tڅ3+&!|Ա}r1m҈ua-r^Lܥ]t`3tpσWa9boJΝXv-K..E c=CFJN8DtLQQ.dElذQ}衇wrr'qn7>cڴ1M/w4N_|wy=8ܳ4/= .)3NeŬ]p_2qL ܹpVSOeĈ1th?bV(OVVƍ4}nٽ;N܃~Ytڹ-_fq7֭+999v:LJ +Ws&1gΏ8N=miAs]|5t{tWӡC V_ۻ?n+`VMb(,,^<II>""""""y<<>;3;x׹xԺZZEuҤrmpeeUGa':J2l` fl6[y&0\؊1bf}´i/ӹs'Əό3o䥸p ?4&&,,-OB:uƚ5kx78Sڵ 1 /x:4_՚Iii)>_Pk>`w߽l߾oχax\",,(Oy&0HLL0*}⦛n$$s:u0*T#$$s zʬr?8n;^ 2<[rܹs`weƍo>]ZgyۦUٳv[Qnm ༼|OQFaYV_DDDDD$`ƌo?PXXțo@^Ns9o*zY嘦ɤI_7$l23ki׮YIJJ*W^yma#!!>q{veEv_ArῨ#9ωȑgԨ̚ 碋.dd8p =P̙ÌUϰa*ժ|2 ӧs0idl6ǟŪUkXto>ZիWc6Cu]=N`߾G*hc葄3w\~z|ƦM%))͛77qTMJӛ}rpHk-[G+1nj̢:U}`͚5N[lڴ?1Ծ?BCC{ԩD/z:?Xc=Am.SNŲ|XB[<5-"""""G$ȱ1 ^>׼yMO4ruLhyrg+ORoW_K\\YY{yװફ%99._3pc}λ3cXGQl]Y ()@tLw=p9i=OfaYF0`~2Faxn:tHG&֝qbbή,))ɼs\s͟4i2!!RRy7\\z% 8t:{&aY_|1sYY;,]+7Jۧ0u꿘2I^|%"##!z{fȑq8B&}>>Y:Yiw}Ljh.mOAAX:v}Ə͝;'|mWsVsgnsܶj&44ƍ;[nHbb"EE,.WQ=刈ȑ?u:lٲ(?8a4x N?cg~~>"99,z z S4h0{&**:ln,}2yL}m-avD%b!lZ>\Jr0-63ͷOTTlH 66|^76T"77N7 ; ||>ee&.DqJfq:rsa1rxƩR/ɩ8v\RBC#0MYnR)))vbY!!\lӉ㢨h#emJQ~<WeV|նƏ++[SK5&& AAA>l5ss T^gm==$`xnCVE= sz=UeYuSVV{0ޢ_^Gf&zI\\\bu?%ky>'Ouua odZoosNl6lr(3zj*ڵ#9)E_ٷP"Ûvmp",bmGXXXИsbKڼe+;v$%%?m=,_A0߈n~eI8!Mn_տVQVVFlll /tY'_8DDDDDDDDDTi罙/YJY _nUdeeXY\ W?Ojxg:7t3+Wf|gۛCɴW^_>߯^˯?+{Ɲ}9>miwz5k5[9tS;ԎWo[c/6i\eַm~q.[V3i{c-M.z 9'usL{=vzUg҃8vwb+w?Ԙ#l TWl-8t6<bY󑕝MB||ywvk|ɳ2~uGO/ p>YY$%&/?/?v{[rv)}2秹٤$eV[7mdxz:F 2o~p: Tߺi}<6mݺoSOs}Ե6tlb۵ ʫkDj1SWlÆ3>}5UcocS8k[[Էz 5F02_!m҂9'֧%_!;ҹVd0zed0p@GMhh(P{+ƺ+4{5z!1!i&݁ƛé7&?; %&&]:7ؠ۰}aò`U_9ǧ>ÖxgxiνG/*.4ML~m7!vӯ__Ng)'oįoo>ˮcFgc&ﻇȿ`uRT\ SGZZMJrrO~1~Gl6gkܳ_|K\l,98Ó Xߏ><>oڦCj8uq.>K/ 2u\^~k׭疿> <<1~oy}leri1O[^mDK`ʯ4X~=Ͼ.W)=HMo Ç4-v$!>>`umc)ǵm[~N ±RSRhʲ+ҹsseM7އ3X<˫޾ꋱ?f˛y'}O??{>^y&o`5x2j4ðQXTĤ{tx rp Ǐafȣ=Ӟ}0xwȣ6ŦE]Ɋ ^yM{G4d66 IDAT˲xhy]Kii)7j\?]GN;eLĖv]:dÈmӴɤn íϽ{>#J=|>?738|> Fjj _}1!!0XO*n>c"#޽[`ۉ c?c)*.f[7r3,|8`}`ymjxu<{؀駜s/\_} KVnӦir\}\.&vGSWy-ن;f):Ul3 .i/ۮ&ϼort!`^Pyҹmcǵ踭m[[sb0_.j7t``9g|EdЀd^/>qWWr+;'<{L^^,^$gѣXtY@B)JKKo91o/g36[""""""""""z0tU{`=d\s&;'̽Y| ]&^PXTW\T ٲuSxx8^|QŸ̽{yg9s;v` $$kt۠=dZ_ob266V'oMȌ?'ETdd#6nog5kص{w $m^yY qnjs5..#G߯ʇn>ڲu+vK/ 44/8?i6T1L)cƿy ʓO<-[`Y/nKg_|[o7oxw٠F}۬Pskצ⸭o۶Ήj>d0Vdϻ2h tؑu~c՚z.74;'ҽ[w~]}qsYtiy˖1<}hl 7EaQQ}cρ{ݍ;lLbyFF-"""""""""RY=m3dfbL*=*=bİaGLtL5 xbs qp޹p޹`|zj*njIqI DGLxIIי{ 'B sx'`GL{wʷ,x۶1rpz<[@s敷ispλUWWƞge_dȠUyÉ *hCu3%u.&&:{T\P</i6X=k,tؑȪ+lP>k1}>@m]pa[>|hlC]Xg X"E.=O’x Ktvɾ :0|vs7w?n909)0l$'&lqT|wINJ4M~*l- r7tF1 ;;wҧw/JKK),*oYل8hw&rhh(.ۿ=*OMNJ7[k8":*I%W^ŸsjT;wbOgfeQx SW}>MJLwCm܆ yUWWFbOt2k:lMIN&boÝw7ۯGUg;vN1ZRSR:gV76\}fVV}=eV!q[׶MMMaϞ*do>5s:Ƿd:vhs~";;޽z0x@>s\.C4z}Ӟ S>cDPzug~a679ݜ{,}9U~#"""""""""ҜZ~Nh{qOy*ӴXYx5k|t҅]:{PVgչ8llY}7xzE]Hލz@]tN,|XŐA[<^92X`Ǟ瓔Hv>ՇgW+>`{TS3<=;~-nSǜ̦͛k]ן.#<$#30~}qqqx ?oݺ0鞻g҃|?{.iisup52o,gf˴x{xG;O#UIur. ~]wЁǍkXX9KJoTT}Z6o {cĪ> t <Y#/]HjJ2gˌ?@ۯ%P0L] YO:Amsqcf*5Ǯ{UG~A{jzSqN\tY-!}`>WUbbh;߷0lu~Ӟ#""{T H90||?{Z9ؿmsud?EDDDDDDDDDjBKٷĄ϶li]wDDD]r7+P=*ۛEhhh!X=/{23INJ_@Ԕ*ρ')n.tб֞u7y[ 3ϼM.%P0L]7N mCm6|qupU^>kkc7v<4$C\`ρ\ynQ#'3Z1R9ҴJH51u_VhibtЁmw0y8mB#O?&))m"\""""""""""-ņ#/xݻnz6oJBBP^qYV`ـq4Ԏ-).*$$44 |4eٹm t֣yCʡp_PWĔM @$ƶ""Q(Nrj*lقmѸbbb:t(۷O+L<ڗi*Fll,IIIZ, þ}())ix '!!O]QA>E 6v#NqaEy(+&"""""""-poJ9r2w *-[d[82bĈQ%JKK/l :[ž}裏3f G}t/[ qùKLLLeXhYYYX<ՕϨQVA;97/m]}9YƲl8 ܮ-+UރEE4}-_ p1ǐҢqYŞ={8c1bD믄0f̘z{6Krr2?:u":::rk׮eɒ%\|$&&hLq͞=3grUW l&6ltrVA</fڵ >t#U߾++)&4,ʷvj%PnWMDDDDDDDZp}ߖ*sDbܥy8c[ 94۾_KZ% СCt[oEϞ=eӦM3Dlss:ٓK}vWc˲Xx1zjm/+¦Mݻw.\ܪ_˒%K(++nNҷNfOA{6`6DC_e<=MVYRw7=.&:)oͲ鼥9Cc-7E Pl6Lp֡ܮ6}I gөE?=ycN#i)+ $DC>~a*ZV;[uWнgzl=gӉ:w*{ڢeէ6J Gl ٸi55kYeQZZJ޽b[u Ϡ ~^nmm_۶m}̬DNضm[ `E\\\t: |EYYatؑ `χf#**Ucnvp:]7^|b5_ڹlۿ4Iclfy$EnͼMxg34+`,veg2{qQǶqU߲)?LA7 ^W0<=.$wk0߻y. ?1L#-k߳9WI4E˪MsmυslI.9{]$2txNhSҝ|ѹ[<]9.9Yp|'aRK.I8";VZXlѼw1a y^P7%ELzeջlx51esҚ۾MqKaEV䤗~OƓj]k4M,fո.jKj\""""""aoE>}5r$w%={}v+ǵX9uy)-* GE>^ey$Zt8=g賉L;g?٦I~cٴiGuTˮ_#FxXz5 nz޽{ҥ Vq6۷oA]*))b :Xd^۾knu,༊d}[8k*S[ܬ2 #`\7ZzTܼmU,<ՆXͧx[emƥ2k {rh:%1ٝSpⳟ \<6sOA}4cGeoGd]DNjapvL*2=sK x9eT4$ڇ$.7ܦEbc1}l>ߧ '|<3n7/1Ysv֢Xs ;s 眤X˧?rEI9¹L&So XI6rtӑ-yc[% lo]BXD{aٷui]]k}OU$m{87%؂H[<|yozPRRBDDDQܧvg>wq ! ߒ]5 vM׋K.n7p:xɯzʘf0b0bbbp|&>\r GF#gʕ8ղ?j .2t"[X=-ޱG~|P{O}c:$VyfΜO+**bҥ^ƏW^KKݳ,o1'y$F6IҢb~`Yv*4hM>^=9|X'+uKvPͿ]'TxH \z:\m,Æ ۿCYj%gժUeY 0 0˖p` ST\5~Yxqsݪk]'13>fI`뭸̏7M =;U42!΋(Aq߾/ _çO;7!ƳpddV̫X17*YYY 0Ξmk1Mx땔)u)+uSZ?=$-%6ϲ(slU#ĉ3@2M\.eaذLl6g{zZ^7 L^론vc1#Rˣg[a˦ٵ' lq Iu'*i=W-s ~ifLIidI$2wd_^>vNXx$;w",4䁳wo~3΢{Axv| sS1đ 5z}-?|ӆ%b`Ufpvn,ʿ&0ucH]}i {)Q!AI |srvI|ر=_Jrf.f}X۴H7=xtGB)%~\a9S>x2λmMb}iyWׅp'Q.3ɝn#dQR^[['P)s#IlG޿ytOK[T69@Y|΢I಼lx2| Ngol}lϩ|1dʄ W 5Mc0J#r'ׅ#&.1ۛhTOg#s/(n[DD={jZl6G`ײ0- Qcmw,X7nN`s۷gʕeYtСkܹsض};GnssXr% !..I?fhKeeY,]IEL0'!Ą$*Y>=g䗹INR޽ѶHXh?{%'%%q0s,֭[Ϗ?xhJi߫! ntwm#~Ur'\4 91W}Dl]>~a*S~?Gޫ2FPPO~~^?l([:2f=Zr- IDAT6=obՊuJn `4˫Y* ຆ,χtbɞ^iY=Oppn% ,d96xm_|2]zwQI8g7z|vDDhʊK1tL#5?{1sgN~y dդ4tvp1]IJaGrtgd0v5bˆ;>:}gf$=zI],@(bzwzF;^bR_|rHC1Kww4<kkUrQk%@hFfm>Grx 3Fs5| unr6֟=~4ͫi>cӽn,'>G %%RR⠤o?V1Ŭ{jv۫' y6\ͧid(_ۛѽڢC@I:Ω %ZnȥS):B.X- kzu;yFuc'˻VVU0hڴ) ?5Ѯm[ZlQ8p{8Obb"& S.]?i&zbTBCCIII?tPvɁ sE23GES@QUT%ZO&Mg:ľ-ط{M]JxLyxջxw2kxWޙiE'K+hLAO8DaAaAؽ,g/b2tyF9ggL41~ZphhiWpѣt҅LZjU%*#))+no7p{^<o]W8_֣o|WQ|#nVTNqQ1gn{݉]^=%&zQvМZ,]vZs<M[8t2 toY17o%4,.]:u`0Ymt֝p{*w5^x뿼{z!=g;ЂCIcL:w];8jѣGM:;v+*e%8*izc2T|V-_nӷw*gOq+DE,#FTt4Vy3wM̎;1i9Gضs!0[IJfמ \nm9vx?7#26&}6ʊrӯ?>?κ^_y G}*eQ>y/ѝw27Ohx%DE*/>߯P,Ѽ8@Ue 5^~}Ǭ&\^ t;%Q|Ƈ@w׿]+'ҏbֲ/؞.LhИr7 "^qyMfv n7EQ߷r)>.M3*.D/ WP-JX 仧ǴfoWM֢;q~(;IJ7׈KiBTb޸!]=y{ukc^n ?~m۲n:ꪊ篼j_vYhQaСITT_}m۶e$7/`ƍUÆU?qvv6;vd5Z[yIOލkIMK#I脄 ҫW'ǴYMF#ȂlF 6т--,M_D+c* 7&OdXrB!BҔH!"?G"™[ :a836@XRX?U|sw7$Oçõûqs=``>?omUf20M섇e~n&g IdW}MɿX;r\Uj @ttv[0bۉMhFx^{=xlz:rٻe51ct/_~Z˥Æq`f:MZo?1A4 >e?HPd";-I+7iw`qOv&ˣ1{̜kyx;׫GAcÔ(@|ߍ|+4vrfJvهaUhc"qҤIS,Wf*-ڤޯ vb ZlYȦC:i|s#>>dx jH>kjbojFxd 6-D y5O%6bs1eeNN'N2276 IC.\/MQy)s~&{o"c7~wf_սz< L}pVL>)vYȡ];+U|U@^]i<9VszͿw.ɖ[i֬))yiV"ǃi5.RJKKY~=m۶sεZWQQ6l{h[wheNtj T]x0 5k~&s nhS* N{2\A%y*oǨ= B!įHDD员xP CBB$@^K9;<Ad' #At ]|`/ǕC11"Y?1[I̐1M |FIeDE'~ZL 4@ ӫMJt ʕt:,r8uՅWwm 2ST ,q^/'FQvlX3_b<ܳ4MJ}R,ϼ!&<81k*^g!#iA- j":;aqY]Wgl=c9 rq]ShdZѧOo̙)7NJ֡l߹ `ųB30i`ߗ߀5wL&!64~b5B(qcg:f[!A3&wۯ tևkS#yw8|5"d^O7  g_~CjK׹9Y}pzv=>vwbmȾn$Km۵/@fuyouŧ@BB5.ʢ\־]GqQvzZftK'G>{Ϙ~׾P(-^JI%f_u_{>c1nzocԿV;YɤA\稯|k֬F9[,?4!R xto/CZ{@ -b@_R0B1adf]Pލ6A4 ac_^Vߨ 8O>l Ġ'P<obֲ?q xxT8ˈHZ;W_c`6+|o[diy}| 3~|[!XBZZ0g0O4|sLo쿅)ǸYGƐԙ ZPt.׻b8!> CG9q1d6k?"ƙuFfnɉ^F*F>1+޺m3mbs|uN޽VےҳjbJ~!,Mp}dmz15ީMo=ٌm۶k k@'ksVtjl#߈Nk;@x<'yϖkW^^IɾG!B_dV^%K\nѢEꝩ.w{,ڙL O#?9=j8d}ycgKMϖI\-݀yٗh Napʹj}z7ՠFwi^G^sS |kIN>{.1118pJY7Һ(/f ՙlY8bnﯔ9hi5ցSAm>|8uw Sgj~IZnÍ ` * R:2o~ݎfxJ؟qдiS$$˸n'h`RRf?E˸RRY=Ry8t]'FRR͢B77~ w-ESU|AIZbN 1/b9zRu`TJ%t|㶣ݻW"6l/-zQucTNe+9?w?:±@.Ǵ5~xn].-$wy`5n%8JoHgأO"*7_|ƒ#pd~躏EK()vl==Γ 3)y푩jWcڇQ$bXm w|Z ,a43ckQamӉ̅)%Wptoqu\c-IL8"coCkƔxEa s&[5Ll?G tXeyL([d%p<#bbBN|w@cPLf3-;vt,[I?W]5ܤ;6sfbTZ.13:thϺuҥ 11羘z}w9UUq\57''-[JddԊC]3ikEr~~~8qjB.TM(**q7o&-5Ϥ>cROI_M'UVYûs>kShKq!B!]>}ظqweARPXȢEرc'&=z4H<^߻R fĤ+Lt]G5vQPX z)++C|aUtB\hTCGWLth 6mFdx8QqMij gD8&m{(J^ӣI5()uu;),*YTȉm ?ۃZVAQ]1+p{9(uy)-uv\\>b#O && ͛&Rj2pbL˅bvXamB~f֠`ܮ5eiIiaEB 7-m֨J3{C6Ko1k: KO{OtsWeGv:ډqKϖ5>1nƧwɯ^ewѣt! FwU׬^|]ݢ O.xbZ,,0 zTXwX stB"fM!iqAA?AJTTi6d́p{ qu?CR^Ӱ7 !B¸|냚 U++㞪2ś[p%ޚ 1) AiGh۞nyVq뤾6䑗S5kz 3U/«P'y]ޱc =|0\*.]:}*?fݻuMСT:&:aY)q  #:2Z-?sn:[Zྩ(?3r}9,6A۱tB- -e!Xӧ{iqn, 2.A>} >B"ʫ(6+4d&?fϴnӆ2a `ӖUqt]'y;އ,_;bz( IDATXxܵ>g zֽ)υ.;c}((+s( Wv+>~3$%7C =N˸p%gKx|Z0MSj_|]$zInц7m寠: hq:>a T$3ch'*u G46{VbnϿnZŹdf{:N?Nvݻ/1̣ \r*2'pJ|zkQkQv) mII'c O N/ x] =n'Y* S8aY|vCz}-͋륣G_@Q(c5bfp 2҆{7.8Z%ѪURϕooϴ:74tⷜǫgIu[TBtŌ+B!שuV}T-Ź)(( $$-[3'}9|8{6w>v=FL}꣊oMȓE*Dž){ $ūN|r>-`pb7%.G. (sG۶m-:blrS3 /Q7%)4S^ǻv"88]e@ DEFx*۶mH<|>V\I^P+(uZ(C!:2_LXH- p*& gFc:f7mӑǟxP1NjfU[*5ՌTf2{p%kږ?iP0 !#dfj{$$$#-;WHdLF>{bXD4ï s4TUb1ӥ{n fU#Gi&M}Tt ^~",,.z<1CqŕW*)(Z?ع1t+ن\t2sstFǙ&7dsĦp54hl#z$-%LJa3ufMYMDԮ+n#`D'G5 -_͛0V>eMОQ^1r)jX;q.nClL i=Xn=]vm_˓7ogZj$UNK7֗=ۘ^ʸ&C'dR +^B!0ztMpyy|lƏG1+w!B,\ͯDLx}` MeζHSN./- pTw `nw c Vwg0j_Xwq֒S+JTb88}rlU3w Yz;UvXCC+[BYk+=|z[ѡVLHp6"g)qIyWV7siė`?aD6̠KGt&R/qqq\3[_CIDƜ_+C7>_NVT!B!Dl6̿rrsO8aB' Kw Wu^r%QS\T@˖ڙIHH0!8J)*)ѽe]kIL{:E3A1AŇp̴hup:vȆ $7nXe#GTizz+W۰aC8m (tְ ^]˯~Ѽ5ܿعsg_i۶mϙL&4Mּ^/EEE4mZ}jnk.j7 z]8Cj&TUEun)u(ix#FArrr*ϥ|LO?x_ U9.dfʕǓҠ׆ao>2220`v>9މ_3l_s}v[7bU>^.Χw!W1\׺B!BK?5Jc+Uf;F^^A/WM Z>mF\\yM Nff&yyy7dܹLhe…у3eܼysz{ޅ(tޝ!CTlM6Z&ӳgOV+2GN_d{r zh鼬ߧ:C~% !B!|]2@II %%%!j5mڔ;3[}^йsg:w\eDGGsO?e|KKPPG7));zLEncZ:O&M0`[n Rl6~p9މ_3l_s}v1q>5Hax[c؄B!Bq>] ٮT yR2q-5!vl?% B!B!D͔}{li2e y"B_@[gݍ4/ʨK;cjF[_8kVXInnaathߞ@V_<ȭwu^V]U~ 6 ^G5j 5guĤ(XM&L&& iXM&,sħ0P_C}nYly{&j2VIUU:t(;okS\?/j]5ޅNoY0sps'No߾C1fZc!B!B_|xwuDEKTd{[v-WAuRSS0yj q1Av'0ĞJѦLLڴiCaa!6m'ӦB%3K봌2dd )eiii^|=zrfcy'%Krucq88JK-F!IUٵh )]ok ۮWC+_-wx[ ~Em^byѣ*O=D_?{/r]55tè9=V;( pB.Xt)iiig|Cl6,XP|^h#G8g^B!B!Dcm|'.7V_8N'&Ί+=z4/DNp(CpuVL;(hyu!B\.tMY//v+Ԥn੧9s:.nVq\=z8} 77DL^fff}?@ll97~ܸ(CVVX,sΟj=raZSua,Zn>x[oQ^E¤j($ U3dWZOtbYU3Z/8.Æݕytow]J@9%KhO '+=sp~Co>/:]_gÑ#Gkx4 Çs矉Z@N,nVZ+/xnذǍGUUt]g77$Hbڵ`"##yܮ]off& W^fpM2k;ňg*Y1eعI&s1N'{f;oWz/g{喙 4l6-ZСC9r$@IiѢEl2840fMŤfIS1)Yqz >|/^44l/ѣ}:>nn7˅tl9R5@tT<ޒ~5k0m4N's%pAƏOzz:DEEGRo9z(N}<bʔ)^X ҥ!!B!Dֶ ؒ2w.Vr8̚./BE8@xx8O>$'MbƌCR̙=~`mذW&MO23]?S4[4M=ǔ)wd3e˗1c_+-f~#4mڔ7^)wNk׮HO+y,jK,&&&Yf1پmix]י8qC r1v<׿ޭX_eKCuꪳ^s&ϫ:t(ϕ߭43^_󉮟Mz"O5T ni7̟ z uo@XX/lYt1۽xtG#9y ^U/w믿fO3m4֯_ ի/F4f̘ Xz9:ƍK.ᥗ^r1j(|A>7 ӲeK;b㧝O`s{,XpxB!BP^L«YexjVem۶ I;`@|>7mK/V0 ^"?V\Ӧq ןq}^JoeEDGG3c_c  vv!.ct@ + 2y>৅ )*,牉 IDAT!&&דk%%űfL6ϊ+1;wl޲i?=~{Ν;q;wd |{z]߿W0sL9a\s5L DGGW].7'_uULp^{|wu|wTɓ'3?lٲӪw֭[[gaZyGzfz8SN5zO_sx 4>x#GV9)9xBҙ52kttMg4N\hK}œfa|GW&=7̄Ջ;,,SN^'gN}/M;w[q;v,0 tRx 0 cOsڹTvb,Z🗞x .r'{eٲe̛7^* [ݹB!B!h<3Ǿ{?\FX%'N{.ĴҒӞOwMk?adddOigB%κoi>_QvՌp <&J6m 0 n6vLgvC6 $%%q-TL;r?Nxxxm*~?p r]wWYW^(((<a:]Sꫯqգڵi7mr2*M4!ȑ"$NDDDŴ-rͭ^M4{!YN2rڻx|Wǻzz:;݇:xm_>BKo$pRRK^9ySS*GGTO7pנ488ۍ###UUPJ>qu\ BQU*\z } 8{dev/իl+qqxxpF\oC~XYJe1`2jmL3(0NǦ?SV^x?aO?w`۶m<#&$$$m0&/0], \233lJ omO~ƉVFWe{qb]e;ӍMQ= uJesՃ*+88NǶmL^.V`` 999dddqe6kU |B!B!9$d\V+.tECB7oN@@om۶&֮[7۷^zW;55d}s#_V- ]Qʽ1cg8ZInVrbRP*tp127mڄΝ;3u44 dffquu5qpgСZL74);v ''k׮駟ۥKxM K|E?y_KrVQ#+`|ҢE }B>7'|ܛWeTaÆٳwwwM&6lx^P  p'_< h# :%/ޣIteƟ.ϭ`Ԇpekg%_sbcoz͏?q5?X SӦN-ZTj5$-"##VV[sv!DU/5>_Ǟu++@ƍ?~]v~ JVvPw.Ӡa#-I̊?~O<$l߯XΤI/иIcBC=f4 |jww2i 4h؈@n޼Iٴic噿`,0n_P(, 8pggg~رlݲeCnQ\IJL믿2`.ITݷK=z ֯GVӪeK|*[oJ%'/*// q/mA PPT}V*qw*j-=HoiWג)b(3lX\!}[>|!*/\ɓС N&=PM^綧w! =t VbܸqDBB;wfW*<3YZM֭Ytq;vQ(\ѣGOzHII7{OsrB!B!K6\8=Eom)gmF?,zI/=(((`L4ѸoMclٲ^Ϙѣ9zqW__` 0ի*={B:8Qhs+1cI=&75u=T6LVV67*kzz:iiԯb2we/AwVv6ԫW/OO T ڵwgҿ{߻šR*4=Q77ϰaÍ7\Iz2ﰸzt~OXQ!B!5)ۜ?}8;(yM!G\)C@ !B!B!B!߅cM BG||<f͚5 4oh(|qpq\a6A". N˩kgi7^7o%E`תj8'Jxզ~e۩P9xe.-;MǶFxpwzZ]w %Z Ʈ+~ZRXǁoܦ򍌿DZ-Ҹs9j[X{~U}~+B )9[CMG$xW֓7طx"uk8 !>N6mt۶m3Yf0w"(QHnn.pF"7nӫs iz|M4b? ҁWO揟e,ƪYo {Iד1HoУjMGr+77gnY HϾ.Nw~9ރaݞ`t/Qƿ P零Nw{BihtZAddG].Q逓yYxyC]Faǀ, Jm֔\֔ߒؔ D=3purũ;?a~ۺ\vAY{mKUB[[P|2pu/nZmzY_=/sY5*{|lB!߅RGAr;@zbjӠ%B:ת2U&%3۫c`۾<آwo~_{0._'$P73GLEe+E]pcǯbո8:s+7r.aAxr2X+bSP(ԯˏO4KRҘc 4( >n.*nJ+aф ג3YǨ]noMoM-\KNf^mGGvdl$>e!l 4tjҞΟ 9?w56o&~KNß?ET脯/-C_Ú-g4W?-Y[Ŷ9!Hw,^Bq`Z>2kE]^~ݻy+رcBqG$4 "_[zBzu_npden'ҧUOWYΠO4oۗһՃtnb<*Z1w'm݋i؃'v cWN=7TmΏF<Ҿq^g/,)i&K:~*|0f&2B_ˏç/Ywh#GZU'wЯ4dGCxX_OЎ]-noMoM-ymK[o&7̓BMr4ar ûvvo\"GYbRbˬo&~s7``o!wL|M-}h)s5[:>Osk꟥ͱp&koRN07m߶^_lX*??{_&}k-ǖ/B!UU2t,>֣ a%#Bz=f_Z-Lr -55Jo/Be-BZF^7G; (04-J̹kI$f$s!?YE|Z|uY`Sj{Rn|/{/GY5=:}OɇZnkf'㘶jG`ھgpqrщIʰX 7>W&K͙siՓ[eӮQ\;guNN$Jb4:o?Jۺ{+UGe瓗dž ӧXFETT* ___֬YCXXU1_pQFD~~>ݺucԭ[r;A! PϻWjYfV.^ex#5qQRˊ<޼ ='vL'g<';"[͋XG^yj2߸HP94h{8{Fղ.n76rӚ%j6Wn)=ܒ#ޤWm~>+mGnzHzx:*YRP[דdڞ>VdAϣGa{.|k%)7HHKiX*3/j{EF^fu͝W?l=~پ-?K]rӸd÷W9>履~B?{H˾E!e.~b\Uך[=TZB!7jឞt LJٜ[T^`smmsZPxO.,iŎ/q~oeI/xšL|8#xWO ΔDo,[#Gd^M97&B 5!g 9_غu+GN:;L<'O0f:w޽{qpp`ٌ3#GM9r$}a/?X^! o@d4.fb2n_צ,=l{XeB(k:ˉ Vp#5̴=U/n6[ݑS)NB,xlrK\]臞̵sd޿l q/陕m>|8nX^o4wI/ 7DZzҤIԩS1 DGG~&L@BB >Gjip%Ξ=pqqaڴi_y]!7NG~~>MQ/| or(rDE$; IDATq`R8zn0'c[PP)4Ǒi{˾x_K(ryB*حT(qP@b1clTO`KZG]ǾgfͥfU*t:bbbP*;d]al4.O||<&AjB!ITT+V0~1c( ukɕ$%]Jà:4_ƛoばzX?,fEtUC:F iu7{X^MF<8mz`X'ruj:4nk8Ow) ,i`hލaPǁo|?Lr rQ*p:AL>٪'=2[fRycZ= }uk!#'[СRӗ=hmϟWS+PӲ~ ޞ3:XS£/l"ӦAKߖ[iM5'G mYB^Az>mal>thܖ!]gYxxaP4 hlĖkYbՖ˚̥oR|, =B!5yZ8~. ڜ8l==-oP Bx<1T`0P0(ĹC& $(gw9}_3vZk^h!'`5Y!0 W],^sؔͮTr8hkgpu/īZSjY͙;w.C ʕ+hy椧]fڝ:u_g-ŋs-j27B;pB|wIZ0.e[ 8$gWSx=<=ʳRCqfsϡwٍض2 dRuBҲөRqo~[GV^u|ך_|hs9UȻ+0B=2O?dK/mt֍)SPXX4L^VmF˖-5k̙3CJBa=v6 [O>nR߲ w6B\>*r.~qɅTx{k=]c`{mݾvs-:N΍x:у7w|,Q8TRϺR4}2 NBo4JR1%uE(*סeVSk,\.p:#'2wrսJ$pq2<8(sR%>{_gB!B;\EbܼJҍNc5/݅ }wv?\Rv!DU~CM=MݻCCQVx<)*GS.k^ íNl+ǎgm6gv蒬m-Vu`iTS7hN|4&=o\NfLtn_zNAK VbܸqDBB;wfW*<3YZM֭YB!(EKJr 'g={\T /'#p0(~^uxwm(.M{]<~S B{Qk˻>4m%ZW=܏1# ;Oo nCf"0uj=&B!B1>.e/S`[>B Ʌl/~uZ.*H2kN3Fp}B7|"'BGw,t{ /a6nShիGMiޑN667к),N|[)wT@+?_f쇞9=z= ""ŋ{Wgb s///,Xӧi׮7Y\Q~u|~AM7_JawGtX~HM6`sR6hЀ;w͛7 NǏlmɍ7PT B!>j:Qk 0Ao}.GRڋ]ܻsЫq? =:Z&s[h6F}d7L- U'B!ž,g@{Zr x8:cȘeƄ7dEd&z=:sFGzhS6QR1irqPYPHGՌl2"""L vN:sM V\Yhz J.$!^wX!B!u%~j NC%6)zZٿbW+=.ge~?9ߕoƯt'W^jÓM=+/}K}SG',`c*G+LJg&F0Y>)))1zMi?p96j yF_#?O~<{ޭ977Kh߾=K,yttk#0Wx!B!B!B!ڵfS˜׶!U.woI~ 6gmWqoYn7Dz rCk4P/>FйI]>TF"lN,>Oz}q<Ѹ )j'$a_s{GDDj?8y$ 7n-& !)Z͙3gؿ?ϟGUKgb"IΨB!B!PP4t{򆆶[\e5*_& i&1lo\M[Σdj5tgkV8/#e]uBBe\c߈RT,[b6' B!}$%%}v.\Haaaw5ǣϓ}|M!IUB!B!8ߊsSo̘3[셞y<e~\%|IWc9{9'&ռ63_$775 ^[MR6mΜȜ}Μ47W-bOOͱclJJB!Ľkĉh8y$ݺu_=pP*9ubTAWI~B!B!4d`;.i="LgOqpqQjq- 6Ȧle/K[`Ѽsm]U 䄇U3<™QGIrB!B!8~(5_-mH< `[]UW#pxNtWڝvM{璟}'p6m5kmڴaڵ|G6' B! ))ZjUey,ھ^Ǡ= h=yy*S!B!Bܿ^x0 {1V-/avwh:z9Kv?1,iB!sZ} ѽS7?G=HnB!B!MwMPe~֬a͚%-iB!cZ˗B߾}4/O;}Zv W*S!B!B!7,Bܧz=V͍CV[-/!B!B!(k:!BT?cFЄB!B!B|RSSQ(T*BBB CX[ĉ[nUBQu4 'O`;sutԉÇTXB!B!B!lT ˖-#""۩8z*8{jlTBrڴiܸqP BT;0n8ٽ{7?pB!̙3B!B!Bag~z 7EcrfTRq9YfEC`^5jɺF!>>@+K\\uյ !B!B!B!DI>Y3ߍAsu Sɸ ?ϓ\4( FɄ LzBҲeK:uDv툊2v4k֌~֭[իa| ԩcAٓ0NwB!B!B!Bb52pEzg8Oa/.?G7Gj矧nݺ?^޽{-u2e *?]!B!B!B!zf\N\gVpI:_MoIsv[K||<...ԪUӧOT*;v:]v%##8sppqR$((y%''퍋񻀀cB!BTNnDt(B!B!UGȑ#-A[rǶMG;f(ZIq(@ z G6Px{뇃t:mfv^x  Ɔ\JZ6.OII1;$$4233UW^PB!RRR#''oooZhcjp&&Y=K-pwUp#-[9Yi֦*[ f[Dfmmj^sp=ߺ[(`i= ݋Ѐ8;:%;{Ayׯ|M!ǯ#v=  )#[thԂBWQ_4.~tRgGZ>MP*+>VYߨk8(hZ~enXS?c/c]|Z'.-xZ^4֕X3RԸ%JҪo΅d|#= s}|\ެLַc[OE!B!D6ho7\f t~a,qww7GKįxhrt瓙ѣG1bǎcܹҭ[7LBaa!P@&|嗤ߢի-[dӦMjϿOfܹ3'O&''h>c+BaWٳ{n>S,I78}[k OOz|{,ޱdv;p+PAVxIJ7LX2p=%_NyPcٿ w)?x};܍+-.݌S -_ooČT>A~aAgi^dEiivyjw^8s(_iUVsYs~<+ 2K⸒xD(iYw#?ͼ_cԂr-9n4sׯ,~Ҹs|{2󲙶SƏʲLr̻^nWc|3Yp YwmtJcM^8;Kn_;kUޖ?.5.9?}Ŵ՟aU|,߷E;V㏡}CۘbəiglmGKl9>q_B!S=+ Hf9Q֫-\xnL1sia`iΝ;sJHH}ԩS4n\B`ժU7Ν;wftOvPT$&&|r<MnU#,߻ QY'FDU/ۻ܂|z~ggFnvnfrr)02 )YrtJ=]mZ8(\Gr+7^>&=6>6鎓鯁V!M8~]*؃N_QM!#La?ҹ_GAϢiX&[Ngxk Yoz1 [6[Oﷺ,m#}[u?Y[f\jtz=^*r)1 Bo0PۣV/+JR~ܵ=k5/>Cެ~&cV2β-ktZҲ3pwQYU%k_)YD_yP=X @s N<$:~_q^/+k KR)|/>2 _CΌz gN&x_fư.y[~Od>l>= v)fk;T{B!W\7`ڢ^yL޽+ĉg4Bjmh/PϞ;Doġ-6ٳǪ|4hΝ;͛uaQdd$~!111ԯ_w;u҅xbbb͍3fJ'|BfO!Eɇ GGG3pz}| ߼AFz¤d#n\Z̼lԚzO}—/Wϡ o\\%񕏍 n^OC=w<ƒp $ R0|LBcك,m-yju~M⿔ÛPD]/_7l^wp{ovm @MO>T]UĤ$0eŇj5yW{(ue>u>O"W[<{G\q2ij9{1 nm8}]X\F5P(j©kn%>KΦ_0GY~f拏יOoV|y^R 5;9?}Et (h'fL~|M!J/̱*>kk3'*SWRZkd IDAT o7OWlMnn&#XYڿݞY{h;>^dЭY[&?.^\:!  yw{(}[u`EJퟻYszClNKRFuK`csx-SRet}~hB!Je8eK׮]2e O?tddduV缼<4Mܽ{7?pBjo(mHz[P:P*Q(|U@P<1T^u0e{7JEe\nڵkI&DFF2oEZ4%93<6ᦥ z s7~{`bSohscP*LX2N}hX7u~ȓ|k=MmNդ8 43i-ؐTbMIE-%z?/_]6ݍv2?}Ob/ԩ{#_&Чqw5cMCyب!moT*9t4o&!<J MɭLT./?%z5B!8:5* J% Z8ݒ鍄zPDi"(rՀW+WDD"`B 5$ͶK,f7y?Cvg93;Fm;&NIHH$''{1aRSS0aSLaРA̛7FZb0fxG=Pejj*G&++ ?>юܺu+'O2/^L/]`A^׶m[x㍋] !WVZѪU+, saڵ T׿]OtJ DRM[]AisvWRh_a!^_׮]GNM. m|S|BMIZGzeoWp F#KvS_W.Wɿ^׏//'/@vn9k94s]SGx`5Qk\_TڿP Ћ/7H 4 JU_^#GϨ>(./ٯߧx{wWڮ{~a]j^](,+E0ox 7P%('ܹ:|~ժ3I-;nzP ~3Vx}Ǽ+^cW>")/BxFJBVYsoThj۷yq1°X,:u YfqFNJrr2`%{_إi4mٛfFE9s&zn'_8,ʕ+ٲe QQQ vn]R@Jnq>!g.I{T*`!b4FjJ m|wfkN_s2hr60tLK}Wzsk{ZO:iv#U>W׷Q_j}d6r",& iYA΍΋ N3WM&J*ʨ0TBIEe WC86Y*^5,:;mrh7:[$<0M2wrv?8탙Ǩ<;j8V8><-_DDZrzdro}h 0[8:_*Jv=}vޓ4s H9n֑qgF?##/FY*/XQSz~KMqZ殾7*1[,|qz{j c!Xg_N]RQfU޳f3F%eg9S`43{Ob]h(t~۷԰1m;b?U\}>ֵ{r~< !B[wsml{u[_>7RUWQCOM6e޼y̙3 ҧO6npL"""ѨQ#gddDxYv7n^w ~u-b!B!@F(++SNWK]%(Bp@ E% macxz{9s2tmږ[Oèwbр]kJthavӭ*-)ZݛhϛcԈx121ݛCDҿ]Oe]{F2j5`ojUV٘CFU vYxuoڏ'`orی(3Sj5{6o[ ySrCxz{m7>& 7Eetw_)>e)qm|S[ҏc-.I+~X`Mܱذ(rK Ҥ-Nת|ήOʧR2~^.?nFCF-Zg/! Jh΋wj5٩j>u y$''Laa!ӧOgȑ>}݆ Oyy9֥!M\\%%%fɓF !Lܷ?BBy7i6v߽vk$qϿsuQ\\LEEukpGE\ ؞U߶p{ݚܠ'&4Ʊ&O0{ݺsl6Vyl r C8Xgߟ`ï׺s o/'Tc}GoX,de`Fy%+k[fX*80xKIEEĄF(՗[\@.kS){|&bC/OWM O_TVBay 1^Jr hp ;gNfqϬلWIߧ>z_LzBo,(w;};ShҸϙ^\N zGV>_oV'ObFZѣGJbذat֍_|]ߡCN}Y8p {졤nݺ1qDnf3;vn୷ޢ[o.\i۴i믿έ Çiٲ%m=+{IݵMB!dDGG}h H]PGud]l>;s, Dz3ݪ~4jȈILuM=awۗ*UW#BiW/_%_T(+@Rp \lVV @swe0{XY0z oynTxo[Yv:g(5NЖE$Ye nXۮ^[m?)!qt|JNѶ b*AYI7c?|13߽V=b|5[C֐h,竩5hѳ$ˬs^:!+g}zr~7~IbT,"w/蝽n(84jQڮ΄HdojϞ $F'.A_cwzg+M'DdbNqB!GA j5g֠Vո?tzz::uytܙ{>tГO>… tvmj>cf͚E-hժ{vZ/`$&&Gyy93gδzj~GE!B!t\Ws~YD]hآJICh2x0<;z@BdWÆI'^X, W]?A!s>}GA['Sߟ j+t ;VB "N3{>w>5b |ҥc@Ls@+o ^jrp}9Bx<x!nm28qFƍ+zP^דNBBxZ?K(g_tk y'BN=Boغu+111{7 "bx' kB!B!Bql&6ԘLۿ»Bңa2[0\Wm6fZT@|Ή~,QJt kiKu"hG4YmPN9їw~)p6_fB!7~z|A৅°.02z Ҷ_B!B!Jqٌ40krUW~+&JeX;\z iXٛߛQI4v$:XCYsUhW"T&P KB aٲe\wut ߘ~7'~GvK-B!B!W(XKhN\}k.2Vc s킸J唘Wێ `0Y(%x]d"GEupY!Gr zEzz:gΜt7nFzoדpT*A&,B!B!Wg hm\-Vj)bwvOWd<605VAD_Eǟ5chT'u*<[ȀPgWٜ!)@hh(VbժU۷RVZ`׼$؂B!B!B!l01-<~z S <Ϊ֐iθ9ҴiS͛ǻ˃>Hy7ӧOgffNרQ#DGG~:HCqeB!B!B!.iI.l{}j4ڳS?k5hώ[#99d >}:#Gv 4lؐ| M]W!l!B\RwE9q%%%vmJu,X">6=el#Jh~H} E% 6'8 ȖCҤ-?N&[-}] j Щq}Cs'.7"28AޯGKJ&m.J_XN&zqM/pɼ[B!Z*MU#~WWV['ObШQ#BCCׯ~!JEll,۴iC֭y7yg)++c̙<ϟ^gܸq-2c%B!jڵX 61GN3T*R|Wؼ8|:ˋgN}6fmYć?Ҕbw3_nL_ 㯂meΚEnSi4`0Vl.F' !ezbqVʭ'ק7w8HNQGi_Y]Jק`Xx~,N*Ե??Bqjή|vFs_Jpzz::uytܙ{>O>… tvmj>cf͚E-hժ{nzj~GB\d h! ֫W/kx[ʑ}mMv2wFF|DcMfw=nf2(gtd$@X,r k{H>7O~]?.>C+BdPhFk:ɘ~ÝLvTR}9&j3J_ZD~~+}Uybp8uW~"C<A]ۿPi{r}*, * |JiuV q}:_J?wQQg@-~%חLFև"t5,O7(B!OZ [|f&~w]{9s'N`4iܸ1vzEbk[v,}E%5ϻ71E˹`9p\bJ*z^M9S__)(-oV_۫ytݶ}Lq-쓨P<&no;~rx),+ܠO.<1~|4Zs?FSo'+쟿`2 =?JesiߌNQuqIos:Τywl­ڦ|>§쯣}odts_YCp޿ ̣L[@tvxr}*?*쟿h6$:UR>OogW>xڿ\#wj>_x`~ _Ыe'MCbK}=J?B!޲!FiSm۶T"!%6,B!PfլỲ߿O 1M r -6a05m! :5n/fX~tk֎v-lD!m;ƿR#ۓ{(~T <2UV=1Y,/0[]OeژP]0` ˊ* ؛{油-ۿݲ 3W[{jꨉ a^:49Y  xskzg3oRa;?窄L>J*x!e~Gf?^#an>(m5{`2 'F"ka+빦ն3X;\k?t{dww4$`0ٟnLf3N`ބWQT<09nIum?no8Y79:V\JO*C#j<>:_nnk_GWI<_i8IRr9qS]]_Ku_szGJtHsx łZr7B!B >}v/~݋bٓ\?n4i֭dB!.5Æ _^஻b͚5߿fzA:$r霕~dFĖ{xld>v-jt~ɬ_v=hmۖ㡏_"yV[Oqpigc2hVbo56j7u;`4Xwmd}{`H>4IG&VI]߱>j}hМܸԥH:M]YL- u}:2C/}j4 ҏ-vmq}7O; ?2`\_u=QaDf=L_2ooWB!B\ϸ{Ay~&ND:kN_zrf\}^K zG!2mM6nݚEUk2oӶC}|g }쟿;&(:~ql?}t%/рd_|Ditoގ!HBD-Pu\ô6 g~ROAָ> ?(J{{?:$\v%կbixPy%T͡ZņTQ^Y{Ը5D'ǝښ׵W~de ҫ)ʧyL#ە_F֥ J < ro ]o]?T?a!zzo/h)v?ؽU[VQYRa2\튳B!BCQP[5fXsW,/`Fc1St/NgOrKZ`0A\\Buu)jٵo4$..m=B!.^JAO߾'K{_RRcb~ض%S޳}}4;Mh:2jl \-oٌ(񛬿8O,x-sUݽ^p4'պnJ8_.JEAi1gGVGӘ++Xsdl?6qǗXc=vFc0+(# '0maggeŶ Z~i1MkꐒQsU~ ME_PZdw_GYxڿ/7櫯=d;ލQ7+?B!B!.~w|g5YThIGwA}5~6IUhuO 뺿34jK3eZjiذ!+Wtll,7n޶mQQQv7n5\C֭6maaaIII|幧۷oϴihݺ5z"::k*j!RIOOrǏF˖-/rjgXj TT=5汉vE]GA*5PVSl_Lh{O 7ؾ'?ȱ f3OsU7Itm c9=qȭ<\JLf?Hm۔_GD]g~ ' qOu*oo%B!B\n aU:xѪPsfsy<6'}_ /w]F-[֭[fٌ;ttGFۗ:͗db<L6r b^u|UWX-[gl߾i[!" 0ꪫ.v-O>Z-* nqM߮'0 fLKĄFaBVZ'ϋS~ROĖn-)Zݛhϛcz&VXOlX%tiupfxmGx1BuAmSM6ǝ3'SZQNצmug2i GkMa}2hVAlXtFo-=@oMjՉQdpe%ܞt=m_gR2~^.?nFCFxt\q?@8Fo, qь3̶MuV>Oo%kxڿXɀ=mSŸ'O8[ g!B!jU;|p#GtOHk D,h6m7n'O bcc:t(ݺucĉ}DEEtR @JJ  ={еkWJJJNr}QRRP#6m0ydƍ={֭ʄBRwmiK)NJ?㲘f 1LѸ7T_QrܒBuA]p KU Yg/K שX)(#H|>nowFI rטBFq>rgl«$F6M$|I7C֝kYU*)#>A7M.8CH@PQTCi\=߮(!풥1Vnog m;v{4\B\7w'=<2 lҗ5ͷֲ|pn@ZM||<]%;;0[aCDG[() JL&,ⲤV }4ӥ*?@!z ̨U*{qawa _s۝QR0Wogs_:w}[%eG9k?_8F!>"m})5jMھʥp} ]<_οVqϺ Ž?B!B Q5j f~}~CfQڸ:O>r_? Lۀ=c911\ ;|uB!nݹ#6؁/3Σ;5yѕ@?B!Bqq3_}bib p(5Ժ,|ɏ t9sO?l6ӯ_Zm׮˖-3gڶjՊݻ3ydJJJHOOVT/!B!.50 CF *BMrB!BK;_cY>+eDl̙2̌\#|P6?~I{x4Ν;Ӻuk&MLPP|,\&Mкuk7?~ r7swZB!B!B!C=͛/v1(ES@6p"L7.E|d9&P*09,&&j* 47|cǎѨQ#ϭobo=رc$$$xl7n?l{;ЪU+饥ٽnѢEa!B!B!.>Y߽nz[zɔ)S۽֮]ː!C.v1(`wo"}3Trd.?ڿwwXEiжm[Eu[h4oޜ4z-,X(]!BדJII nFSy:FBd,au:~_sC^.w}ia z ҫUg:7isrY.㫍+*~ ZOsRT^ ix]WU_ҷMW_iAi\>n.v\_!; @RQ412FV_uNl""" 1L5>}ϖ|F'O sy瑙I\\Z Jfa6 dD5~K0*ؾ:{72oߞ\-[FQQVoZB!ĥwyדźuήJ: @J>Sa$#/t* &c6rqN垾y:)ʻh]l.F' !eGdis yY-s~"^.c?%}/2}9O/| rlʜ5U,jsq־$ԑ:?g">k ww8+-O(pbo߿UB P 5V]ĉILL[ne˖<L0T&L@BBw&M"..!CРAƎk$n߾=iڴ)}aʹoߞ)SЪU+HÆ YrX6nh{m6{֭nݚ^zڵkT!O<8X6adf zxryi)qdeШ n).%}*$ ?O,x!m;9X,f>zEϖ{N_Z~%$O"/f>).WNl;JĖ5tEQQQ%K>|8$&&:=cǎFII 111l޼?~<11> 7gy~C?LmGkb  !.m2!Bq*,,h4k.v܉haÆu]৅&u 0O{3co\6lIt>Ҷ1o+nd6cbd6s MxJc ?no8Y79:6nm;~р7Lųn)0\3˷ w?Gs2ht:e'/QRQn~ίy)i_W;xݛ'P}4xd;#z rYؘP]0`[_>LG|Gt}>yxF F#3=wx;xz5M>}/}Zy?VԯׯpTXZ6b͞?lk\j !줾umU_F~4mڔyҽ{wx S;3d¨$/~YV_RVOffeRL$k !g `!B\ TL:z TkŌw3 cx9Q:7iSoՇ;_mWuM<ҳNqSh@ Jv˙|rٲ#[#z؎ma g)_]xQaDf=^`pigc2h?x &#ۏrKܱ7n7TGk塏_⩯fO.ڹsAI\_gsվJwTv-jt~˩LY>~SXkSouo$C0K?tCm>Zmq=GeH>>uRSkYc=F5{7+E!$=O/s?' >u |&''õ^ȑfQÇټy33fgYfv7ȰlX̤aCAYrrrj/Jc B!Ϻi=Pըj:vΝ;-Ϫ5i[O􌗊h1]ϵj[/(J{K/:$F ZߝՅ7RYVԨR0M3R}+J>onZ0G JX,v&WR^R_'8 ɈhGepk|m.ÝoWS{iJ1%su~mU*Qg0ӷMW0 M&LҩA$Dĸ=t/,0R]9E4iqԯׯrV+HI2 5Bq)j4UU_]ZoF)w^ŹŜ_c=Lgm|wt/0t!2Ps,:@QV-YϪ\_ ˊoʹ~kkՂkcKk_sƏ5_CտKoL6~s3C{cDZ}dݿB!nkN^Jϟ?ϸqˋL>#_|ٳgl2FɦM;w.QQQ( 'N؆ Ftt4nnndeezj<='=^ϬY8|ܝ;wRRR"BB!D3V_EQt:'Nm۶UZzStbAOKN x #< İbZn&'/1oMv],,'i4U~ſ?Fzi<\\͞srt$:3r}wvfݓQuPVYQc7_R ?;;:z_ϝ}`Yi)(צg3վƷ|~ Sj퇋 Rj9'RBc~vY~CT/-Ee7~oio{Ti5:v%Ύ= {XS?SoLtALOY BہOW\QCנTPUQNyi %%8:X>q=ݻٰaL<ӧOSVVƦM8{,6m"))O?7IMMeܸqb 6SNe233;va]w˗ٹs'klٲ;vDQy;BfkƲdz= 2Ӫל_RHIy)jIhКӟ7Ҏ!2×x c^&/<,ZQRxe{|&17%[>agq$s(-3ߙtےBBN=>yeLzw!jׅ}In5f̿:|_?e̼glޗ~\[cۢ17˷?Q^UImVv&`g6<6~&gM3վƷ|w/zyIOK:_N,=m2 {L/agsm[0PLpЯSO |VVCKh`lZao /)-@&nj2kcm9a!#7rB!fNj#Vڵ蜠 ,F.]LN!T㕮={7vB!Dxv5-)n},*f9쌗oRYQn< M׮/o :2 r iS=߸-( M읟=/,'_w/]MwRBZX|^Q.ng{oSkm|k/*+Zy lfKs}-*/ *ͳQ^[ӿƏ5L}w\SYg7coS_1E,=/cwN?ʊrH>O^5]83pBLB>}$JI<*w !͝}>h:92;7~; aoϞ{bݗ|qݲ/{\VhSZXL9om}oV=Z\_<Ĭځ~ߡq[ӿƏ-xSw?7)؅9􋈪L!B*˗/o͈<X!B!o(-`c89| ZD!Bq{;B!B(PcȩB!?B!B!B!OB&B!B!B!OBB!!FÙ3gjmm۶vJ0^6-wh;۴r.q56G:K8ASDkhs*ko'Z=vfRH{z5[|֣Mtx7.hroo_ϝ$V-Eer:3 Esgv^7{SpP;1n૸&m:5v*B!BaL !PEEqqq5eeeѵkWMNCPquv՝Z]ƁSljݻs^/[c}l?Aamn.Mfx㡝|˷ Eѳ9{YTҭCFScoZͺHyUEUZ N[|c7vX? lM>>e <2nήfn߱/Ww;v͔ q[T'NoH~:OlӞ3܁(ulAinN.:YֹtCTw7htZ pqHi?c?Scw^^uO]sּ>ãPc[\B!–dX!8qwwwcRH//;3_Ӭsr3yTi5V3<ѽ;q ~Die9-Zc/@^q!K6tTƫfu\2v .5̂\E's^'eWZQN ovqnF~6~ʊ)T2 yA׾+),-⿿ԥ7"6 zǻFȩ†?ӛui?kj?}qw5,_ļ r:ߩ }j<\uccߕtwo5'N~nOܺ=lQ?cߪ׿g+Y-Y.ΎN:;>geU*gThPTTj5~j~s}ə\++1\@?57Sn?lf\)*/eL8ڬLS㳱u:k-B! B!_wvﺏ~\N;&U8v۬%B;ĥ&oFǝɺDSQT<9y1}oW/ӟ`񦏉ݻGN5X7{]7=Rٖ =4:s*.NNfŷ&?owOzo_w Ont X?~5]C;G)(c']^wXu)s35ohnokM'g5xgL UZ |٤aLNGQsM?zǟ(U3p>;JMe}ãbҭ:sgsݞO?cߪ׿gLVmDuSN'2۝ ZDj@d9r.s) oE?fJSc V 3ejy~SƧoegP!R^B4={^z WY`cǎeVm˲F;Ñ#G`ɒ%vorJ/_nL@~~>Ǐg M>zah_|dZlIllM5Vy;w.N^w|LxnUkW393FLzg4;!B4<˲gZjyf4<]ʿ&հ\W'gځ+H^QEWůNZT^Qj tz]cta|ܽLƷE~~d!'x6Mf+d^NK…y/ ЙD_m:qgcd㡝:NgDK\îp<wWr3kՐ3W}go]|v @PC3}՗ޚpנl_3ǰ}Pr\1oΎN\uW[)eywۓok:q=>O `al|ڻC_EWv>{^Bx ٱc粲24M޷o6)Yjk֬aL4\pMifo.\\övbܸqݻm)Ott4/6BD*%\:-̨i;Bfȑ#t___ƹL!h嗁998y{9EWQTW5naQa-[1FiXkə)gʀVVk>]_OQXZ(6ӛ%.ߘ6X2ʫ*8|=FEXνpqCՠ՞@oHU7 K1ӻƹOC[YCj3Ɵ9WqSVY'#U[ş2~KP\Qx8kOF-VޣKvL4Ƣsa뛹Eu޹ gl|%׷Rf{^BQV=Q׿ IDATMՒAppD|ǵ>>>k4._LHHε߬,puumx%PVVF^^!!!8ዤhD ƈ#,>WՒYgPw=9|0[l8^^^...xyb`]}]ظr O='--~(z @ǎ;vHZZŹ4iٳ5deE888RDֱzҢ<=oX B!1NGBB<-yPT -᝺7T]2:?w/t˖#{uFRQXZWDai-N5>/(}bZYa~e^4t۪Vw+^L%/Aaĥ&/_[tuf^2؝{i2[z#>ǣVQӧ߯_?׿2ujc,ϟի %++ɓ'н{wF7|Z/QF{^2330a+W>/?x G&==z딞O<Ç ڵkٳ=z188opQFA^^^}Q6l؀Zfk<&޽;cǎeݺuTTTPVVƦM2dzYQF&{=6nȾ} y9r <==IIIaԩdggSQQA~ _W_111]ÇĶm6ƣHՔ}ǚ5kxW7-yY3xZ}Ip.eTwN);3'g7 r/kÛj п,ͣ7+o,Qc#i~X:ԀBdϡ6gBь֭[cb^]آZxxӫ]>y3z}7K+IxiZ)!8 ^3:3}wvkJEЎۿ~Qnߍ ~{XsaTUw!'gNd `@o?BqLN^:con4t۪t1onYi9꿦ޣ?3I ϦOgQ^+< W'lu뢲AYU%eTjj;Eώ5ku:J*ʨTPRQfb]ORRQVb}1q?i9 2,?cӒr}U!( W'gf]o!BԤW|+$/L\&$`s(c=̙3x")))ڵ1Z~&99XΟ?ORR,ZƱ[nȑ#;wŋ3k,?k,IMM%==DV^ iHJJ2, wtN<䯢(L:rrr8s ɴnڬZm_%(66)S0g222 K*Kղ}v>˗ygYpYO>LÇ9}XƎ'z)S0x`\BVVZz_۷ogڴi<̘1;v0o}Ԙ >R羈dg0a[]'g7EaKiţ/֢zݍ(m"Ό %ٲ̓,B4c+={ɩS1KdH;tˌ_{'0۸, ='ŅLw_mS_dFZpN!aʺBJ+ծ ckQTN\$<5:E'{6;}ܽ>A'A>-QPߩEgB,-OP7sT*{D>Ί/)ZΤ hϛU?s]MC3TTo CŇ?Kvj`;>}[_R]j[uTc~/^X4^--V$ʺ ??<[; rgk3;ضṷ 3:䑻1^\++~%M35>-ח[j!b:ngnB!,LJw"ޞLʚ3G6'55d:' ,`uNEE7of̘1V9s={6?_ 8**xRSS)))!((C1k,|||2e 1}|sx *++Yn6m$%%w^W^y#F_V׼g؈aȑ>}cǎl2^jvij &==|3>Ls/m"p.{VÙ{9( ?[ }wPM ҹs O=co5mB!nJYiIGG(/_STTqyr)RjɯL{LVARTVbqٶpXwE .g}ʫ*KWNנ:r)RԹc5 ymr)J7_S_RQVZz{iuZ%#?64V-/ʤwy o!V6ll(şGT)GgbA)ꅃ߾Pz`EojwVZlYc[\\aw_|ayڵJ~'''%&&F3싌T֭[Wheʕĉnݺ)=+{2qDù RZZJ߾}MiϞ=5#S9+3_qqe…5*+22Rټy3g(hL֣}W_}UcۦM0E)_{nӳ.\P%;;[QcPE)**R;Vc[PP?y׮]J֭5(|JddٹѹQzrg(uI?wLYq5(R>@QEVNc8~ՒjycߟIxEB!Dꎧ{oM~nx=&׿AeۂπwkW'gpwuj?c5) 4ZS)pq۔| M~= wǧmq}wiيw %5|U n[! !p0@VQ چ4VZQPP@yy9nni2223yd&O̵k̈́ 2|2IVjsY[^gu.]DAA;.]n:VXs=gNgFy{rssMƬ5eGXXXvxx'駟e̙GCBB())___uo"::zY)';;t{}珓zl'^\MU9%VJvpݧVY^~TVTT}wxiQϨnB!Bb"3a !nˤQ=@!7-f4돧UTTO?´ .sDFFtiҳeeenȦs%|||4hUUU5sb ҹ}zA*KV )))_qs;L4d"""۷//B ʱ[nlݺra5ewM|||s:u*ovb̙}t֍_TUUdƏ_kd|l4y7ߔw]%a_{NQC8sLjf2 e-zYO.j-|x_PVSIfץ9 `!B!B!B4ijٽ&񉿑QP{9BFA!{MFjtZfʕ|tؑN:SϟgϞtЁhf̘G}TcjذaDGGɂ XrߛuЁsE=xᇙ8qb&MDUU3f-*SND׮]0L\_gݺuI~LƬ5eǴiqެYصk111kΰ]VfmFXX!!!F'- /sN4TTTsNNZ19Oq/F>%7VRT "iz#.N[rT*C-X,ּa]ͪKsJ>tٻB!ģ\coI tcQ9.&*+ʛDB!B!0[ ?q8*+ʍ]tUcۅ3ܪy7󄆆eXNǥKjmgggþΝ;o2bhӦ FΦ:yf~~>ӭ[7ꕟO~~>m۶HKK#44ԬIgclQV]3f 'Ndƌ[Kd̒q]pp0&22SNg/^lXzyKe֭lڴɪ6$%( `!B!B!Bl1[tbֱ5ĬT羭[g1|p'8Gggg:u/lQV}Xt)+WfD6v%B!D3KprrsfikXFŕm^;Nv]Oj>xFe%B!ѣ Yyׯ}K5gdd0p`=3,\9Z^n~mSgڶxZ۞z[Ֆ-[) BLO?ѧO/MFd\]pu'~uõ9:p8{7bkShpDtPy5l$=ǗB!B4˗/iyk֬s{hh(gϞs_]H߬9ZSqk+R!h1b?&M"<<#G-^d^C._޽rrp:OC9rj[rFbWAì.v6ЯSO>(.N&ӿ29W\Ȓ͟p>'*:Ctu7h9}" W7 Cgxg笝 Jeu|^\ MjJO-1_B!B!dX!hƌÎ;(..bƍgx^wMDp*Fq7;2yn0۰A5 qwކw_M,|QJ*ʘ|>^ף(zCzEtm O=#9׮dVLq6kjof Ox@{V ިUGŰt맵6g.b*O~*{NbL!5ʉ g׉ffwvQ:P̧~?2ߜi~,<7}L-<=rᘆNšӉ ='2۝N"Z janB!B!' !͘J"77\kOĄix!(M+>^>:{N2ܝiI;՝Q1:hqCeX 9w/xT .N89826 ?kW'gځ+!%-#+ ?3ln|ck Wb~={Q+* *}#2~Tza=,ytz!~f?זK!B!B؟,B4Ck2i$:wݸq#۶mcڴivҾC%* vc ӹ|5_w/z@aip| Oo\8~cnzxSPRdq,kNhxmCܺXνpqCՠ̎omStJ?|Uc{@nޣ?_}OYeGΞ ǏVmmʀ)./@qE)Cfvr| !B!B `!vx6iHH{A :ə=>2 rjJ/_zҪ'~%eݴ:s&Ȝe7&|£ݡ* ,֒Zr0Aob;ϿO I 9d:XQ(577? L>! /Aaĥ&/_[tuf^2؝{i27[/!B!BaB ‰'jԘnF$Oᾞ ۝̶V!pLOKN :m(sN:HVNcWأ^].q*Qm-VO!B!BOB!!GGG~a6mϏzS3*2>~X`yeLzw!jׅ}5w8 xSrhĔ( !~LU~CeƇ/ĽgsVF~I!%奨jv&]@kN؟S,:݋dުzZXёgMSRQƜa3}fg6n6K|eI~q!gd1CŇ?KvjPcw&`DZ}$;»h[5QZYnyB!B!|Jמ;!YHIf|z=8;;eTV,s:&g]1م9| M׮/> >@(TKĥӼm5K@_WTVµZ5|sK㏳3or u2t}_B!BƷqO? ˿|<.Qjlp&gm;7v*h4y9BDDK,{~+W|rg}Ϟ=K/Ć  9~8C5l>}:/"ɴlْXks1w\"""ri.׮frhg-άyHI<*K@ !͙ZpoK\QG _z'\ kl_S=6kN|cR23G<9Ip7aM~y(pqMV ƗB!B47###ӨӪUXf gfҤI$… v_(++C" IDATԻ;vp5lKLL`׮]7pٳyٽ{5V^Fh4zWp̯0GB!mac89| v:k bF6Z۽B!BzE_kZeߑ燏Oeee㍩J ժw^|ՓÆ cĈj̬3O>|-[:ޔ<\\\jי[FF5_rz?LTT={V?cǎtp|ǎIKK8Wsػ>&Mb={F졢'\j8x_k(-ݳ]s5!B9FL!ǯSi'=D0龝+B!oObqZ4LS cرDDD׿ޘJOOgĈ0tPBBB8qPdoHH#F 00Yf,o,FyԩCUV;>ٿGҲeK}QV^͊+ eժUfս{w^~e"##߿?fc͚550y{qwȑ#RRR@JJ у6m0j(rss ׏s||<ڵ{SNl޼ٰ/&&kw߱m6|I^ydT>֬YSoi[[ؼjam9Xxe r/GXV/ζ/^oVLҍs4_=ͫǹہzs!B!B!B!hO1q囄Aw—+䓄u NNN&66ϓDvv6-@QNJ@@9999sdZn Y&55tYzEMjlݺ#Gp9/^̬Y(++3WFyZmK2e̙CFFacSeiZoÇ|2>, .4qqqP֔)S8|0OرcD3e̕+WB2zC{3grERRRصk۷3m4}Yf̘;7o9]dV>}o߾z˾@#uLv) /EqrvCQv~vQ*0Ν;G\\oՀW|嗬] :dQ|S1̙C@@PlZ^/:[c޼y'L@rr21 dԨQ|@R֭c֬Y>}$^}UW^yo0IzT' ,0쏉a֭ooo/_ο/KSOpp0)L|:y&UO8ΜK;0? 9sTΧuy\͹HAprvg_M؆L !ŋٿ? TUUݒiޒXB!B!10{]'/f6vˊ+hժ 0,|%<<< u(CNˮad Tqj !B!e@Jjl**a@ʝUV?##q^^^DGG]/K4dggdoNwTrqRxx*kZ.jG\}jGeE Z͍K6dX!h4 ?3'O'G-fd^C._-B!B!\Ōf$pVQ 돧0-ftLOOҥK0h P /FBj5:0I-o,u+V0Yg4hݺuc֭6%+T=nkԩSy뭷صk3g4싌[neXd ǏG=չsg"##Yt)eeeVE]oΛoYTxz_<+ jG:E X#2zao0ruZg= cߖar|Jn6.adX!h jn|s-椘0:F\j/O!B!jٽ&񉿑QP{9BFA!{MFjtٳ':t ::3fGRPT|W:u vJDD.]C̝;(z?ĉ-o,uÆ #::H,Xʕ+^~_gݺuI~֖eӦM~yfb׮]Ю];vZ͚5kضmaaaVլ\?;ҩS'bbb _عscB}***عs'SN˧8{s;s uō7T*M|SYbNEdUg9*!pwywk{మfE4*xҵgC!hR.oI tcQ9.nVQRRo??Wzʬ2*+-ס+z"Z?5QwνT!B!BXf-8~F[xD[™Tn<^?VҶm[kO~~>m۶ƳL)**Cuibtܙ7|#FF6mq^UUiiixflCآj̘1L83fXT^zz:nnnu.G?P*?88^{ Sسg/6,ƥ$űB!ĭIxx8;v`ذadffbN ^n:2*z )B!B޺888Ը[./,(( Q_ ~>o.]LNl-ʪK.eʕgɳl]\\m5;vnX?'%%#G2eՓ`sٝ33?ʷZmcgnA,~ܪ֤&mο.]K`7xznv7V~i1nWV.b-s  BS>}:735fbǎV#;;_~yi̙zz:mEEEl߾޶'2|z/2|pyg3{lΝ;UlOȾxjziq?jF5XAǼyHJJbڵ7_tŋ͡CtMVyHBay <ǯٿ"=7sW2=Voiǘɫά٧WmX+nSV37i1Ww\FPz[{\V̳;3kgOS_[o?kҥ /BW^h{muy  4:t@||۾ͦKXj]QF5r5^ %J/L& JTn>so&翬ܝvf^%eM&Ud}-@o&)^o_{T4h:|{RW`4c~؋O_SCIe> >RⳗIeJ7 W4hz-;R?GCo70 t5ֿl.)W{ϻľ#jנ~4SAf1n L.sXrrrh޼97vҨ Q(x{[cMUUΦy渻[AǏ3p@ c8RRR8pׯopyO>X,mr O=={$##xx:w @۶mi۶ymےJ73g^/* 7~#a_@eY!*&_A?)__T555ddd0`\\\!==:KiDpx$[F᣼'9<ջjTj,}lzV eU$̜ᏠW! !gONdP?o)htԵ^>y5Uj~ 3^zWFC.ѳPZɱ3i1.r9^ %~:)_JW>ԾDɶo1hμ5_O/}Qa`Dn<9AI+,Wwq!22d 4oKߤuHOR&۟+Of_Y^R?(WҪrZztRqdڧuHO7nւxOsFz~H[ׇy^5Q1[:Alߎ^`jWV.K+o+}TДOAAh2!m -gϥzG5g֠Ip'3g*֭[Gd„ T* `ժUɵVjj*&L //FCbb"˗/'88q/_fԩ8p`JKKپ};:ucǎ :k"Q| VbgXXk׮w>|!CPXXhGyիW#Yj=ݼ:vȑ#IJJ~XbÆ 3w^ᇬY]v;7&++ Je=yg0aɌ=\dbĉ{ʕ+4h'N~`ΫS< b ^} ?gS{~ݐvbzΧbcgq+L~kܕdmTU`=Wk?I|HklY:䀉8B]b hAA *++h4KJJ yyyRRRde~S&so׻8>g#T9*e;s*&ɜ29kf/ƚ Y3{a/9KWtzLF~8Y'sn}bGʗ`}p.R7шqFќ-9!!k,SŲ%=3dSY;g&?%mo+~lthÚ9X5}Ng]u7Ɋga͜T봬ڻ)sFz~H[ׇo 7lOJCD,oGO[|W3Y3{!*o{5;34SAf1|v$K&ߏ}$ߏK#Iyf8@vv63f̨&I޽;dffr3i$i6?~<ʕ+RSS̙3mg20as9RRRj_߸q#̟?)SPUUeN݉d.˖-c<dee>WMM 6m~ؽ{7 Əρ8{e˖1rHT*h4G>>̛7ir˗74-H;Zlsb@]EQE}!Ս'p!uw))Lq%[;;νnnBÈ)A/H]S2c ٱcy{Sff52;{RZUk?mOI:q44}bPSc;<:>1soTJ/"BGˬL3%o}/Օߪ(TrODf2|묫j_~^>W6:ˊd,M>k8{?wz+0LΟʇ緤̩klAvߢ+v@j'7ANnCGXo|Y_Sk  ͶR3Ԧ IDAT"m QXyםYT0 ddd vZm&;;J!jZ]O833/// GD\o \Nxx899eUkg/LvC#<Œ3x7ظq#}@3''<=kѢg???yڎ˺p;c49}Ul_g2iqWx޲eE+QY/CMU)J/zyUPzvHiD  P(7!owgha';|=߱n]&7 Օ|x=qw1?q_[laԲՎ`c F# 7u<<=|2g\ζCw]⿴ PRYNn)[ka\ϕesRCBSU/V}ky0hAJ[b/~Gy PW]aO]YΏsm[{;~mϏ/UcoANÖc׃'8rtG]dd2Y^=og{[ϷQ>8~4SAf՜ܲ `ܲ zG5o2#""0 lٲܹ͛7Zm>˸HkgK$[WS=?AAnzǪcUPhyt@(ݻwK:>&&84:zѣG#[G vj5jڼϧ~Jqq1_|F}ƍfѢE :G]wErrrcܘ0a۶mc洆gll,111TUU9lڴ~jOrr2wuKgqd*rW\>.i~ADqxJ{[|/(Y @^}IEMA?4+ﯽ̜9aÆtRϟN[ntVU׮l)*]<1z4oELL0 fb߿o =uvj(/Zcd4HħQT\{iλ`뉽l>0 *tm^r 2F<ܤoбEgo?@TPs&,~Ds`ל6 |/эҨykg~o*ՌML|T;IHۊf;IhjjȄ eמ߀5̩klH}~4HQTCːp &#m_SR+ļuP≁06qt{lߎ{{Ϸ._vq-n8V|~  ,rǻ=HW -gձTzG5gԗ3%Xr%SN%44OѼe'|Fd "[Oɝ mIѳ-#?jyc?SSmǻ)˰`,XC;nu=A/!aZEןB#uIhP> Sc2(**A*iRSMQ_O J+uM&ԅx+ꭷ,jyz7j*R@NIaA6=@GwJuB}oHub-j[*4UTVΐ.Sa^xpZQZ<7~4C}cA)˷'(Iy/Od`:) Ú֛;zZM-K9Ln]> eJu"̝ 2 ZhWZ: """R#F`رL4A5=Z-.\ ""ooi)kѯ_?bbb۷og橗/}96Yl7ndݺuS\xxڿO 5zJ MqskJ=~XA2L/:}}XL{g3gc>"""njM/mϿaSO=u3{VWAD   Դ*   t[L4Ct      7V)%^AAAAAAAy ŸXUU٘L&ڵkp< azI>fOQ:FӻYhzb[q2,UۄF?ԬWZ̻mX 2R(,/Aב]'śXƙ''35OO+Y+鹙>szV)mUue9玳pr{aڧ"=7m'%Wi+;RՒʗb~s>5]]YܤS Xks,7rooXrV37i1n?1Lu*5Ւ^;>Τѣ7XLۛvij1D9 ʊj63 Ͽ#^;!5M)?=YEGr;n\FCHuAAAn-1XA:t@||^8}jpuqՍ25.ܰ=%+mߋ5?N&cZ7g셼0Q"X3{!kf/\n&˞90l?ߜthÚ9X5}Ng]tшNF\۲`v·_Ov](y'/:Zmo3^uM&n,>oa& fwX3g!:-nvJ3L,9#j?8~䲥tdE|Yĥ+l:[ &#?ɬXxzIJF۰l3]棻;ݿd셨^1^(:AAAD  7ľaMh|.-c%"yBd_oK((+A.:VjTQBיN*w1osuqi>ڛv֡|m'd_0A|ғ1]rl<7 ѪCRxC&0(tgoy9SvfvX6ko`4p6'Λup|O;jZg2SOǃZR o?} 3ϞPN^(qsqeHs_~Y1ޭC#VrRȶd>ۍƒw|=%Kߑkx|t5OŭAAA?&1  7ŵ50(uz5h9}N-QW1"BΧ-I_u0柵z?dߩ:V=iGpsgx+{(u~#0Ok y<< &YTʖRGIo +v=ة'nҝ=Rڧ1kkƷhNJ?PZ@I:G]oTOn.课,d2cNWP\Q괲m/+F&x7׉}P|G$7|wkK#=c29?d2 Fy$^|r|;A*)/hivKD_`!eYK#W;ϻLvq>~tǭAA*OɓILLlfbȑﷃt^~eV^-i1QZm_0d?%g'Ix&pAjcUs, {R\Qʯ):-e2Pծ]Gfx\]'tP;eM5zG.ҷ}y g;@c[tN)췿y*e\RYN`iۻ?ԕ|x=ܱ|1?\ԙK [׏;tJd]7ht)  _ɞ$@SՃwp_2dHߵkwqoj͛/zs=ߘٵt=6qw5I(fv !\:Ephڲ)AO`0hk/h4hZLFs2FќFdj MF] MF<<Ҫ=koh2QRQCw]hܜ=Wץu:!8|n.fڶ~뙥y{#ɈhKҞM$FsG8V?@4eŨ+t+]ZÑ@T?O6q}B}8uEj-L_|֪=KhÉ߭;SO?N0SbwTS~>]{޶S`n \LmP eps9<'9{LV۹ZZU{^.g'hbYۻ~֗pB*;WAl#m]Z;7;2jjj̤ 7:BjlYSSCFFj ZzJPVNSYoFh9@YI.KxIQRQ_/O[e8RGCru>ðPC:g52u}y6㨮TS]\kK=ub  IW_ e̙o7ai  Y3flzf0g7V,d;Oiە Ϗx ɿP12kZ'(wC.^d@S kQo{:V:kl4oyC~t_!f˷g'y+`^`1H/Ndnb-Cn32a9}LA_H[eqzZGi㼵3F7_OEj&7vwV>|:>[{BL&`a\]u$ MO |<^d<7Q&}ȏG~Ņ-rӛ}ƓAo&Ll׹A]?1[@_p霥w  wB mP_ҏcj;Ϙm*̙3FCUU֭ڿ2330aiii(JXj111NMMe„ hHLLd۞:L:Lii)۷oSNvLNNfrL&'NxE IDATwCcSO=Ejj*O==dٲe7qf?I2voi~FDUE m;{Q\o??}FDn9$}Qs&#gȃsi֢ɂs|<ҷTEeAN[ɈGre8Z=?̱QzRY^D}?rqX>EtSv#j*4ZDDXx1Zc-ЖdmTU`=WCgٲu1]l[˸)Pz3W <0cɦoA*aZEvfCj5VS}[f2LdayB< _e1?/S>#S0]FzTP@]nMV?3qT뵄6I93 Tomog)<%-g}ӥQyغAAZzcGB<ϖcɴVosgh~g>BS;΀H?~\L+Rd۶m7rHL_k׮{oqF8@BB3gwH׮]߿?-B2|pBBBXb:L&KTTK.ݝ\\\󳙧dsΌ1yQQQHMMQ1ۿK.K/_f+kd⻥ʀѳR]Y\.GXu,ʻp rtJ#:j +8&O/)6qׯxx2\,}1Zۏ$ug =??^~ "Lx:-c{ulEJ 2y|n3e n֖^CG&sxJ.9׫ {]w.RS@  H%Ɉ {-Gw  Ws /j瑇;AaM өG5sڭ:v|.r"Újc k٦|k_AA?鹬O`Ǚ\vʭ3؟pO>$AAA3L&.\`׮]L62333f ngϞĉ( ΝڵkˬXryv|;̙3+R5k9h\pxn 5:.ދ d2sǪ-]ŖҒ+\BaӐ~vQzPzKcڊ+9{Ν܁ʟgpymaLFr2NH%bWeQwj;]\H{Rwc2()Lq%[۹殤s.+•K0"*Jiׇv/4ZAA$ӥ#)hߤcE_h,AAjG1+Jt: r9SL=PNckQTEEEj),,$$$qxyyYLgNNxzz[|RCcr c9ؿގ3Osl+{bh2ӷ))$(ix{uW"^?AAAAAP6 ƎS 3q*Q -ylQ&11{Nhdݒ!..yx뭷=zNhz /^@VVKLL .UUU,Z1I?,,Tmq!Ze1|=h A&J 2mEƙuw}kFIJ% Cɵ Ie429&ڒLΝi7M§nd2ҼeG1Zc+vHB8#hohݾ72 H#8g2]"bٻ3s&+RRϷ`,     m󼔴߼y&-W&rJNJhh(н{w~'rVXqXjtԉK-oaĉFAA۷o'>>fr%K0j(/_Faܸq~WWw*JgQ6cF쁡*}'NjjtW,6i~FGێwSQ'uBX ^>T&i.zd)ǒM:q!  *:޶'%A(<";;DvUWWsY 66uj%ףg #[IYEydd12?/os3U:w'.wS(UGCW`zW !s'\t?/zFwSF%B m:qu+%\))[؛v@o_b2ou$?ED`-."$nړu kA,mdDy, wW7s̳Vo64]%-".rچ[۫Ie2dĆX6aڥd*w!]g|l{ͅ,_J :"{[^#_Sx*<}>J/֮o{ Vh^ܟ/|j8p][3rQ.%eot$>)lK)_v]cr  UHGT78cɴVosg+3q5kO)|2JkQTTDTT ^&oP.h4PCToں4Ueh 'MQǪ*|#07sУDM\77wzH[Wꦰ8EFOYT>),'VQU^w O8 Mb  Y۷R+/r Jpp0bݽIs>2mB#I/OK{oY} b#x8w. < O:C"jGiU9sөE;\]9{7ysEV _GiU][̛gPTfnbfg8WOm.b~s>+]`ޮ,cnb5oɧ@S޿eO3GH:+(U89\^]򡰬 ޝBj;Zv&s.79yyTnAZ<WLƼw:M_˻6ŦhiަtW;@Mhؼ~ʗ;7з}&U{7 &)9kkK;+-bَ |:_|$_Rغtjԕe4\MNIϏ€=oRO{KIY;Rr $=OH|K)u%  R8i'  < ۷wCcr l9pxxX^؎S*hUY5 u6tA:, R:t >>gϲe˖ҧONhhm^wȑ#$&&6I}ϧ|F\rc1:GL{$G׭/#"j?Y{`˝jp}sR]Yޠo" <󚂲bd_a$촁 zl1{O?oc e}/u΄>5(6[K\d[ܹ>PÒk:`hKIeJ7Yb2((+Se-Elxk}x4GU5֮_gװtikVJJVݶ}G00L<Ki~^/w9bOcZJe|['AAAӧŴgy9s,Fܢ݉OFTݒ:Ic":AOf_777T*MVqrR/|>,">*{+R(/0ceY /V! ۶ľױxmT{Ҏ1j 澮w!bkT_ÓF:iʘw %'<^#JʩkӕG<[=:Da#`2׉ raghTQ^_\.<<%;#z 5_sYڋ\.}J a=OߧnwX?c{|rYiN[sKN^FGҥI^]w@Ш}ftoj^YՃїJ#i9MŎĄG})&W}W2o3򏜫0U{A!B-0PiDQu:!w!&g4Yt)v\& lZA M úGgL͈vtbv\3M|: *T<7!t R/,$mWkU;\ӈniىmQakӍf cg)_gL_AAGpP0է:G1ѼO4[-;s' TnVv<0 U tlz1>gSL{k-?O]Yg?⟾Qb2oNN W7_x]]_{:u ;OD[dQ|*ߟ⏫>wwU7ϛB!Bq#`!&fXXh5buZץwЛ8T*KtF3@߯i;?'41 ,Kw;;%(0T*ez˙>tHcrݳ#}?A|{=!<{PȞys\:ԫ6RnvvhP"yT*VnK&5Hk0w]GvqM=mm6#,8gI4IOaE)&Ct= FRQlΗ:(uaJEvq}[vq;su}yRL$4mK M#_o˯ix^զ(mV V Zsu|S]R_!B!-`!eZ1 Ug F#b z=#Gd29/WKLj~_n})}:u'r2fO޽ȫI<;|SϾ308Yftdl=u-bb/+=iԀ*M=6~̿Zl8?ArqPǛJP` ]}[7MJPwu]_t &#?KEr#Nffm9wv}+_5~?_hԀii>Bz pu}^m5S{a/%>_!B!B! *==ŋۿ9s&ӧc6ٷ7xÞgϞ3Z[=&6qNl|w c=o?{+c}ZuXhu]A>BƎuw'z.Ooח@tx$SZ8,ngѷ\~;YpC_?.I^_k>`{ϣ3TҽY;Fe˯Twu߭~aoHL_AMyy=k=yeɿϧ1kۍzt_]_xۿkSm4+~?<_^o*[)R7Qi2ұQ+&ǡWh'O7ߛźrM/B!B\TG*q!7cҬU[mCמ>VaՈPy]Q &#yeEFDuWZDxH_ˈV iK t7=˦(dJBTl3pw) y8 !B(YcGhRiA3'Ӑq!>;Wf !B\)48&I uMlµᚉ #CVh\g _*㒮rD !B!%`!B!B!BZ6c lrU-4e3ycZ)_qʜzX,ۍF#gϞ`0: pb!33m {Fu9MKr1tm36m6+%X-f뱘htW?1Z-f*JZL.tBEi6kgmĠ/ltpJEivTJԕzޞ ;yB!B!B![+v&@d^4u1}[Ըӧh"RRRe|;wiӦ?DYY7nSN̘1EDNNcƌaT:;ĉ`0ЧO{=Z(L4ɡ쬬,&NHzz:aaadz IDAT|rڴi4wz);SO= /w… ;ul_@dt"zm -; OQ\O?|uHiޝ;e=Nֽ8yh 6]Qq rN?1Жy{O#,<Q}/?8wCځ EĠ(E 9j6G4nՓGR1 [`?'m+ڞiJIA?~3Fm  ?k Tj@M;~Uv>?~=s]X!B! Lד(n!ϣj]v3HIH&!*B!B!oߖ# .N<l9[GpB233EQȊ0qD4iB~~>xLoM`` eee 4E1ewWfc„ 4{Ȉ#>}:K,AQNC=ĬYj <ءɓ'ӫW/nJ@@o'Ov1;vW^a.Tw{%mLz?FdOun=HNlV20}HxdTU}&7 *FԗRWZvY1~}Zɠ_"vmΤf_Ǧ qFWܴ]Q wbK5ǐ/wimxUPoSakFM}ZޭKی}}͹5'K@ !7]v1k,-[Ɗ+/[CQPP͛je"WN 5(%&>!B!BvLHnտKd``ʕRhԨOfD||<;w|,]Fݻ}]'NСC/ $$^{+V`HKKѣ 223f`۶m``PV3wbЗJEdL2;28T*ۼe%p(=:jCXD !aQ^_4(p7q#= zϾmsb#'Wq:E);C 0O$vEg1jp?(8.o/}a*ЖӼCϧW_5]2X!AoߞΝ;s ֮][-駟i׿ŁӧOēz(&@fg>ۼ0^S' !B!Q`` 媬VےcCskRk͚5c…<Ջ׏,"""W^E}b2(..~wudggI\\}[&M0C||<ƍZ6#wޔ5shwlY]ؗ9v%"Q|QQX՛dѨeBB#Z-+ 1T*"Uև" obƠ/CWQLHXA!Wwy໢$Jͦ:IiѠ%4-R DQ7{gܭC9x6xx ~e#=(B!BZqk[ؓ[z֡dЮl7;&Sz٥:_?SVVٳ3f (,,6y)VZEqq1UodeeQRRShРZRbcΝ;Gpp0ԯ_*++ /_}lڵk{S~e[uHN1tᗖV%뭁X,F}1IM r烌fcYqgvȫ_\u1Tjݶe]E芻GDc6Ub4h}@Bchb1 _C6ƈɳ{c~/΅YZ!:u3o<ѣd2~:n1]^i>!B!B8W#s:zA-5-Grikr$Q=G9wYYY@ d2( Zw޼KfJKK)--EVcZ)((رc,_ѦM:tY0L[=ZM۶miӦ o6z{ϡ>} /`2Uo2O9v6= 9?ڲeFРI'V3\OIAkBfZr7@ҖcVM7 Z9FfRW5@[R#[=h3_hд6ׄ[mV {ZNvPT&6"6!;XV:ҖkV gS~oϷB!nb6 ٌb!,, VguMp/Qa,شuXYg !B!1;X4,܌ FE||ў߽k~_(6e(6+Q5hZ(/#2:ήW21TVS:Q-lk|4J=(%2Ayi+ڮ-+ 0(V DF'?QxC'ϱ{eB!kР3&B!B!V@@͚5s'!!j5 VpU@F\Bvܖ@}RKu@ v`jcxdD\W`P0qIyFWܵ=2&~A.$8$ܧ\_ BqZfc *z=ٴlJEQQGO>8j!B!B߿Ӵgy*G$MZBDtݞh Y99izk1&n2,BܠYx3gh>}:+Vj CӥK p #B!B!񤤤p)i.-~ }FEF';MS?*2,Bܠڷo[o4-::W^y 111No#B!B!(1:q;Bq"**Z!B!B!\p!B!B!B!AB!B!B!B dX!B!B!B!n2,B!B!B!7kB!^';;EQhݺ|YYYfZhQ1L#%!X;JnLl_F]ڴmա7ؑ]͒ңEG*GHIHiRJ.ɧ{v~>ynM\Jtn_\)= Z&7]ko@ZGSΞR YDExGk.}CBڵO{mu>8Wt؈hBtx==3?_N!<$$@ 9Է3-Ip ZFԏKxğߛ|O_mˊYҥI[:ԯnTw%!*;;> !B!nN'B!uvb֬Y,[+V̗ϼyXjUs:G)(`6]^ϳ Tn'/d29Bi=}=/߹{6Rk?o_>p?wߏ.u3o1y6epd1cZ[ߺzQW 7t2tgfZ`OߑSYxh|:%r{W+ETWaϿc|ƒCz,wo|{=r?׿|O_m^&MV.Fu*,r}ru}{B!BܜdBqj߾=;wĉ]iEQ֭uO,ںlJ&Nx}6+6~=gahY4+l|IwǦ0y<1[,ICdXM|7kG?Ts3f]Mڧ( %rlBBdL)SWSP^Btx$A^87[-UVmvnrl"&٫rw~Lp 6}˻?AnqW*M fq5o|anE0ϭ/ߟQ[O]U=>O\Τ1ib5^Р` +J۪ {SV'k~3iDpۛ;B!čflm`1M׮aSZZʚ5kzfd۶mѣVʺZ~}<<񵍽{^Ix}3YKIQՈzaԒu=BRV^翙;2,Bܤ***ضmO>$>Y nѪ~SfZnѿmwFGҥI^]w@Ш ٫0oOz7-gyJwiRylOM+XNbSoVxl_xd7wg hm]䂿5=Y<Ӫ~t@ՠHp`}O?b}[tlԒbB.7SV.^-:2a`y7}˝W;e|l(͞nSlخdYxf6{b|&I|:vСCYbjR sʇ3fh"a̘1̟?> viϿk.Ν;GD_;wiӦ?DYY7nSN;v'`O>9YYYL8tˆgiY(n?S;vz^x;3 V+#3m79wNؾH?DZvmß> ~Ҽ;w<>z&{qlx__~Eb+_-+`;FXx &_~pZ1X3AWQD4r*i~#Q Z}ƭzܶWmO4 zm VMqET=}K*5Ц*;MI9B HlbX\wu,oF2,B܄Μ9CVV}%##BL&4i҄υ;um33p&N_œbO_^J⃵_:tqƙre{1_V f@tYZޑ=ʹu}JvZ3\} RǟDA܈\rkX=_o믍*w_o۝03Zx:JZt. %^W76"b]YVP^B MZ+6@ϝT*ezDx〱Nr\7M&-믦Ju(pEFS~ UjRڬ~Q+ &򌗸˱WoO?_BŖͭ IDAT# .yb#Cr$0uTz!f͚Vey, 6[jGe…dff(?!իٳgIII̙3)S!)SoHYY bѢEL2wK6=(UFhI(?0ԕV]omE߻V`2H?rۆ3iEpi~BBܷ7mW_E]?a?}1dˠ(l]tB;ld/;akؚQSFVwR6|6c{i8{s.n2,B܄f3111]VNcڵL:N/41Q ؛l>Ug8}toT*Pk6VIɠS4אyNm،P9nviVح6!A3K؛E܈8q,/.{yLoOo ߻/|`8_l\a((<%ѥi[iAZH%JW\^-ݟ<-\_BhpMr$$}Zu0^Z"8']-_kF FRQ LR]9q՗0E8\B*h[~?퉻B!čb\rkɱ5_69--G{n"##1c> V\}GBB5rg>裼lݺ,Ν;JZZZFݻ2e 111L09s'PZZW_}Ů]ܶl߾+VؗOZZb˖-kqwxb mƧ~JNN0f^{5 ILLt}=o4-0(ę;iڦD$y,-#K{;e˯ۗT451Aӌl$<2si`Wms==gEyseܠ0z?[V@y.=>Phޡ??o}9f|:B!~V+jMhu80I ؑ}#4Чvץ[w"'ť`&5 "$|wu\t|U*.ΰwgϳ#}|I1&&#NPTQJ#l8 لfen8O|wtͺ;̯zf>[mkk>CX9@=fC@Tffm9JAfd15]`ї_5K_޿]Hצmaom8^-:]ը^t$K,A׳xb:wl JVVN͛Mdd$qqW5iҤ FB#33Z͔)Sxxyݻ75MD%3xM ߞȷBQED{7 -'(8nlZNQ+j`tY"c.oSTDD%([ڮ+/"(8LFj1cЗ(&$,+&!\9]QJfwY -M߾&-FC|3B! *==ŋۿ9s&ᵰ/ IQq+hݠ)=Qwg }+S74hƠL|o~7y**w&ER:j5YRCޞ}ZwUXZsR_]~G]O=o?{+c}F{&ʒ3Oc0׶X+u{LTn |dz[k/_߮+_ӇOe0*ެ#{c-~J Me}?@ƭ|.ܿ>?=?.qB߂Cʲ Luخ5.~PYYIXXÒΌ?SVVٳ3f lE!''W+ԩSZb"#в())֭ڵcٲe̝;{cRRRtNg6hVKii)UzΝ#88D#%%ڵk{S~: U :mufKw1[X ˯&Il2eJWXVÙ]<W| jՕ_("<2c]qx̦JmT/XHhx Q Z,fW]\R6ƈɳ{cZzoB!RNwORZġ+g rR]<:%b4j^TWd^P,VZm6%8_)֖ըnokPutN[?* fQ4 juLUrK ,բ/U:ß9CWѠdg;G[m6%RX^r "?|^k6X-JVd6Iu}!BJwO>۸ >٠JDD=O=ŋ+(YYYٳgik֬QşmڴQڴi)( ,Pje:uJQJFF(rQ%**J3fC .T4t:mlJ޽)S(&S/%%%JIIbZ:(?(b4aÆ)ƍxl6o߾#<F>۶msz?l0_wۦ=ޟ9M(W*Jߟ=21r|5qg(v8l?T~cEQl2*Ge_:7sU6~bThP .'Ң?Uʊ/((gF)kmGc{* z@1芛l6e量);UEQהu߼MG_6qNgi/{ʦ˶lJN{_}~`=B!oFDhanD^o1QĄGh_JE8EyJT~mt|ߟ#;`Ci6OtXauC::+9CWhpM8MST4Jp5C&PxۛBq#,|Wo>n+9V?>FbѢE ƍÇ`ԨQENN|;dNvJXX,Z>J-Z's4mEQ;v,ƍnj33JbҥL6 Fc_޺W^[i,<_bС|Ntm\\|9(CU1`o{ͫohܲ'C'qed~u2w( mEK a/a|GS+eلFz&1ve?~X Тmz?E7$%1E,bmtM49e&ob1iJ]UŠQ3XLlb1Ѳe^Ql^o>=tEk؆{'"=7:J.=uB!M4kaCеOx"!B!BԌ+H]ΜL>`4 %%(YVX,4i҄`{Z۶m?wMff&7&"yyyӢE !Bjj*:t]EEEѤIBBBΝ;GXXXeQ^^΅ _>NE3/|X ꀚY-+*+G@`Pƨז`6ꉎo+_~0CҨeO*JsGP{i+ڮ-+ 0(V DF'EQxCV"ϝTҟ~dX!zQ֭[;eeejkΫ`f) $Dź͗YlMK* !B!B܌eG:Z jBԮ] #00RA7o+m۶yhiDjQBC q( c~۽2,B!B!>`!վ}{:w̉'Xv<}_~W%GYu5j5;L71 pUM!B!B!n2,Bܠbbb`!&~z6n܈fc 4d26+ûFMjúF:\wbB!B!B !7ÇZ&--/dڵkW'Mp/Qa,شuXY-ߒeL!IB!B!Bq#uBTRR}mҦMN8QgEE0CoCRLyMgT;,!B!B! `!B\6פLr@`B!B!B `!eZ1 f FHFFpYhժ57mSy,!B!B|oΪUZ׬YÅ 0LZB^SBoe;h0T}׿'4χ~HXh'O棏>|w~-MB!nRj8/@:{4NMB!B!iOz{?{eq;C %"RJ_i+RQ׶⋢bcAFD@E)"`DP`L% Lyx``Hib9~&sD~u_OwtZ}z0ye?d3gU0g&L/s=Ǽy-""ˣ ^Uw"""""""'5e/d`"__g㶰2wrr_fԨQ5 " 0/]2zhw^DD~OZ,"""""""""""jmBhO7\VmB|k>}pM7[o ­ZDD~/*ȟg1CނIhgѤNr3}t|MΜ9Sdիꫯ9s&< SD,"""""""""""ZN)tԪ[/wwСC^yBm37x'x~BH:XDD&!!Fƍ Cff&|2)+6`u+M>cO8M2ff͚Ef͈G-"RT6o̊+R ...?baݺu93)EDDn@vm899B-ΙHy,""rtS H%eX͵oK^^NNNm6l6}Qn뙲N@DDD~$22zԩ1qDzŧ~ӱZԨQ.]\tEDDDDDDDDD,""RI3}3qDΞ=>>>`v""""""""""{PXDDpS 3EDDDDDDDDDDDDD* EDDDDDDDDDDDDD* EDDDDDDDDDDD*ȤIX~}߿ɓ'a99rA_?>|\snZ> +T EDD-윷z!"""""""ΦTc&O̭吖ƪU[l3>>l6Oppp!C0qD9BFJ@Oyj},>899_n.ji.)VTR7ofڴi,Z%K8qF_Ͷm5#I1J"{Ls(iH;5O!&N.ΤshNrΞ/W8kC+w@%X1֯A%2]pPyWyIWDDDDDD"ʲQt~>ݍe, FDD>,7H^62 IDATo^z۷ѣGӼysj׮̈́ 'MDHH={F1R!m۶|_xh׮[޾`n{y̙t!ǭ[Off&111lْ͛SNnRRR۩_>wy'7ouN=X`Uߧ~e/`"^E>̖?務Y$>yu wr.$|7o޼j\)=EDD*p^xz]LJc#Gf{qtm8>" ɣb9pqqqٳ;ijZ{lBBB&Lp8wƍnc˖-:tݟ~)}GΝ9uIIIf&NXd6#G2|pbcc!**кuk6lgs֫[lj!6><==W^|wLg`5[|2'7ݬ F%Ǖu^:x#9ho-t$x=̆y!6Z^rܖO/ e٨9pެ9uS,4>i ދ끣7O7i)Dr?Nc_ ԂjGckm24w K>3z;t 5l)\16_BbÀ[{۝w#΀_0ky$VL\DDDDDDn@ !-  כ!-Y=n8V @^0Lk|nqТE v… _-6>J{KܖF}|gE1bb޽ջ/"K,q(<_tyfҤIw{h}} ;RAxGΝ-["RTmƭފdv"x1y94ނ-2+u:4띇RJ36 喑]J`KF2k5{cA7ףGs@rJTjb5_fjbGfRk WOwz [G6h 20qzE,=[,Ώ5 q`|sXș89 B xxi3/@ xmlq>clr{nn {EYtUvhXhn^ȸ>t ~~j_.ȦD{`_o6&^eD],xxx:Ãll6 ⡇b׮]Ϲs犍h^JFL&=XV/_wy' x{{ u%//3g;11jժiVNBVkmjxٟa˽e\˵HevN8j "gԯ{9z7~tmOniЭ9FqTjq}c?8w$]##/-{#}˞'jXʹ؋{Q]c浽L6v~Nlp;!ȏB9>3sq(657B+g{{mVڰ'y卛-K> ~] _0^_aKqwC1㹇8u}O ?EDDDDDDC22Itp-[ƹs68y$E[GIpwތ;~O?Çۋ!!!dff?qqq9/Y&PJXu|dBCC|VfD+EDDnp[naÆ_!olW󣟰XJݷ &'nw+vx{Ux +%zƷq֯Оw/7c=_Ԯq^Ll<يh3& ÿΝ6zl6㘀 <#Vyŗ߻~־wǐ Żc־X/^\:ub\]]y׉b05kƴig 0'e&Mƌ7yw ۾};:ujsCrHM9YaDEDDn`;wr뭷;ֹ#I`ZɈ?K@Ojgv>Wgpx̵kº6.D}a)k99]È05a&x'cZ _ףeL8rGÆ 7n-Zy <+aXz5yy1bh߾=׷_wrrb|wBNNNE݋͛ٳiԨ7}{ǭrss鄃s9QN9g>^Rm-""RiY, (((țL&}bbbX,4k֬\s Z<7z\th;+i[8ҽxo>[5ځ'X4`m 39,rO7ǦYݗf[;ԣlEWbřH53:R=!q~us)a?W xo?1pqph a8xSbl|.]78'Fh m:(5܍_}/  8pߞ soxnfb,ktsKTn7'ϰ(u6NNtxnqYbcQY>)+GPo֪FiCY,>m v~{C<Ų/&&ƽ[]$&}< \4 !)Qh\px=eza|zBc1⦧Q94lf5߽O]aȸ¹M6K'ɥ#""""""R?jTq͘1y]Jsf;M6-~_/"d޽~s}kٳ75qNl]8U*22͹.ys HMfU}K7fr:Vܴ,z}Ns.je@J%sYF!-*Xߊ?jYDDDDDDdrsJ=&zvpBPAD!fWiw_^x{ǫ^5SV޾ƣ]^R|EDDDDDDDDs """"""""""""""H%H%^4DDDDDDDDDD*%띀~IHHfѸqB?y8qg䛋lXH=LA*fX[inr_Ofrz}Ϝ'/#\\d۴LN4_b)xkARc+|z{~,DQ SDDDDDDDDD."""_...8;;_;;;Rm>IJlaNɜ;'&|3Gl|}"oݕ8f^B-l= ήtyi__rJP=ǘwU1;Įבw޶7Ҩh޷|B)ϫ9|3m'^nטym_nOډ nw?QY(~G~f.nߴ5BB}[!6,yIqFtޅzypKiF8q&ڥq.WPN`^HWr`|0{9,?He"""brss1L;SNfbbb ¥1h$Cq=ӡ Wʳoᜓ `?߷q[UsD @,06zNnj^ާzlxcSK@dύ"+Uprqf?R~ w<@N&Te_h_;|c?cϕ=rr;1)Om?\c53n^W1{>.[j|4vn6KZ]ml{s3Kw\|Ac^1܈TM^$!_Whq6FTc;>͌tI]m˝M% ҍwo|CDDgojpmޝ4kٺTq=TH>VT)ؼܜ ˣ4̹ŦS*?39rՂsnZys Y9OHR2 %8˷,[[w Bj\[^nNDN_>~39١"""72'''ZxR=,Lcm߫>^5Q|5eW\>"""""""""tH%H%^4DDDDDDDDDD*%띀~IHHfѸqBSJ"""pss+U L&2^޾nʗn뢃˷Mu/?+M`P6"Rx :6w&"""""""""VTR7ofڴi,Z%Kj駟IMMeǎ̞=ryHELtt<!mRrSɄ3)1ZZ`ʝm:PA V+`Э;ٹw""""""""" """Txx8/{.VPPu:t({f8;;cǎry|]4Ժ'P[D>?y8qg䛋lXH=LAvZX[inr_Ofrz}Ϝ'/#\\d۴LN4_b/*ȇZ6$AnvL8R$?Ϙ+(DDDDDDDDD2ZDDj[jj*fZj`2 ѣk׮sm>IJlaNɜ;'&|3Gl|}"oݕ8f^B-l= ήtyi__rJP=ǘw?~+CvV hac|v&|ec#h#OV]q0aXQ~y<W"""""""""b `7cժUo#11'N_xU1=~/ˑջ k}tkNn>7ޣݧ =fsG:2F}˞'j򼚓`7vb/NEv{{HvxC5r}gQlMk_! o,*^}=Aط[AhCvv^j IDATpp +ww`hxKo 7ŏO-]x=*d0yx: >'@wƟ"""""""""R2*ܠ իYx15jԠm۶Ɩ)WC&#,_]JcX,9q}`r2fžbȈ?WR2gl\~ fЖwo2f+|QgN}0e8okyn8k\PEvmW 3%|.~[_89 """7(ooohpBj֬Yx$j%#,*"M;ZHKfR Wg,LX g;yV7͈7  /on:=7ȱiUMCЦQ7 S@NֹǥNNEϥW/IG~+oqԔ˞ ®nK[TWN>= f/7"""b!77jK^ޥhjj* FGGC֭4נœ09h= ?o_QlENj&uF¹16Efgl/tSim6 [Z׷vG>7 |0.}N.xGv[˚I9 1K(Ȳsprq.QNNp=oa~fl why*+5B )(^d2v>[^|bB®MS釢cl'nsۏahlgQWDDDDDDDDXDD:x SN%(('g֮] 6\H 39]"8f7uQMq:F֪v9ME3x?ov1=GVXGhJhTVO#bh{quP>ǼOq=ݰ%Ol{|GL&nUw0y\"BSp[q\px=eqF/Bw^1$4 aӗxFw_ׂoUhձg|z} xhrLѻoz!""rCٳ75qN,[w*ɡZj{ay9Gis HMfU}K7fr:Vܴ,pY9OHR2 %SƣN#cqi%~w7dYpl8gw.g)i @u?=;XDDFハϵϰ3qpzXHzW}jU8ekZDDDDDDDDDڴXDDDDx띁T$T$H%áCȠZj4iggg>gΜxxxuVDDDDDDDDDD*VTRHNNf͚5̞=|{{ll,IHH`۶m{""""""""""ף"""Ըq _;wҶm[~ZnM߾}X,l߾_ϴEDDDDDDDDDXDDXpuuۛ,f3'NYf8;;Ƒ#GK""""""""""R1T$''BDDXV&99???RSSs""""""""""R*Tryyy,\]WfXrJmۆ+z*""""""""""3EDD*1 k׮Q0annn_~]DDDDDDDDDDXDDZ,\OOO 닻;Oӧ NيHEPXDDl,YlG~~>`2h֬+V48`?#XDDDDDDDDDDH%TPPΝ;x[nͽ w+B~~>Z"<<+""""""""""C`J͍ӧۛsY<<<ߋ """70Dկw""""""""""RAtH%H%H%f ;筽n7̛7/m+n|A`J*''={aX,dggsa:t2,ں)_|ޯe#++޽{P۔)S3fL^ݻo߾ Wj*z!SGxWٸqϜ:uI&~RǻR\\Ǐo߾曥n=={g}V}ܸqݛ'xͫ4ZڶmK۶mرcQf̘O?tKp ^#((@mڵkyGpss`ͬX*Us=0fq<>deAd2nyigubX Zܴ2QPPʕ+IKK+֦MW^Wc2pqqa޽?~zBU;wFΝqvvsؿ?|O>ݻ-Z~~~$&&ҨQ#6lZFM:u4in=rJlBhhh &l2.\Uq--ѣٳs:tرcٻw/NNHqH%5n8ƍ?~<ϟgΝp^xz]hfmK:GК uCkt_f9g]'VOfrz!99>|UWRZVILLjfާO.]=SƒXCAAǎ#;;(Ǐ'??XW^yLq/ZoؿQZŠA8z(+V`$$$ꗙӧ4|.]дiRgddpܹbgdHJJ"11Ǵhт5kޤI֭[ܿ?f_软Wj*s RSSW?..\?+O """TPP+de]ZQ燗WcO$jiX}<<` *4 OƁ/7P~~m^x*S3;|30Ay9F%Ǖ^ë~yXMٳf͚L2PQpqq?Eڵ VZT^Ç ~;;vN:k׮Oc/4h>~֬YoՕ(l駟ˋ ϫZ]\\hٲ%;vŅQFŅΝ;8~pp0u宻N:Ԯ]._ҷo_ɓ?~<˖-cҥDFFrw:􉊊",,t½cǎ|׏='O۷/k{Ԯ]a+k'&&ҽ{w7nLfٳg={7-[$<<@066ڵkGNUeGE,YfwIZ Q G4jԈ|>wAhh(;v$""oL6{Gӱ ^ r[H[ùeDb_*H5#w^5ZWf&pt[p?))'|OOOz-lR;C5>%n9r$ 6$11WԩSX,|̙3s9s&r ǏѣlذROXf ׿9z(IIItСHa={$11ۿ?}Cm6#F`رƲtR~"Wnݚ 6TX"""""""Xp!]vuX\iY4vwg {qSlJw*4֜Q{;ñahwcgI=L뇻nR/lVž/6rϻ*ݚӠ[sύ)-DOn<%Lۉh;5f^Jlܸ|&Nĉi׮m۶uӭ[7uܹs9uTqӉO>KΡY&&UVi&rssKTz'Yh˗/';;777~7߿?/&::cZ1bD߃H-[FHHH}ʓ5_7|pRaƌ9r?8{[I/>>\]]?/"UV_E1 aÆW9~ӧjHIIa͚5$$$`ٸ{|/?ÃuVZjEÆ K2|>,X>H͍{rwSNy[k׎={oqy|||)ԯɓ'ٷoQQQwǎ'88=ϢSXDD3̟?PvZqUP[zFU ]o26ʼnu]& j`r2_/͇(ӽ|R2hĩ_F s.ׯ_5jӧOg4jԈ5kРA{P{Qnݺpj׮]ld7oNJJ ͛7f͚X,rss}Fŋ/#-hyd?%?g?UV`Ȑ!Ԯ]ɄC5j816m_~ ŹxjRRU{Z%PΝ;Wda\ž+]nޝU]_AdqD%ҩqILSG V[,ӚM9RSN62f.9$*. / `֋(g|0~84ibbhڵkϦM**奅 ܹsڶmV^]tm۶ٳg,bw5##C/5o<͚5KS6RR~[SRSS nPkW@X,Zj 5~xHߖd2YB2 rqqQ|z=7uUAQ%A]nnc͜-;Ct*)TZ;˫bQ-*#r/㿖$uo$IEY}yx/,'G_/ywheZ;CNNNZh:te˖X*?ÇW۶m+<<\]t$DJJ~]ӧ6ethhpϜ9-Z&rϞ=SNiҥ)S^$M<ٮ5;v8#G*22Riiiʲ9t }'nܵqu&7778p@vMkZO=_hֺv駟~ң>kӦMo~S/lRz?YUlll޿nK."﯎;?~;V&Lдit-LsM7oրj=``Pffua]VƍU6m(<<\|={h׮]{+ݷo_] @c`!ɤ>}Zsٳ5{l]ѣ={[jZpп7oBII9xLIIj߶ W!#8PcJF~RYzwY.r\3$˳"ZEj5h۶z-]xQoյkW,EFF*22Һ G$8qBcǎUXX&O Gzڽ{ϟ/__ ;ǎ1cz'oJ|P۷o7߬Jw]M6m}ݫI&W^[_xfM>wIrǯ9::j̘1ZfMc\omcǎ h/rO?СZuȑ#jժBCC_M޽n&[Æ %uN,_СCꫯo߾ K\\\E&L>ZݢE ͚5K}UXXƎkgl2Oԋ/V9Y։end,]j8!WN!6eG[{5k;x6cqQ>!O75v(.1X}kn\S``,999:m!ΖNmݺU'OTN4buGlKRtt3gwީ3̙3jݺ<==?--Mj۶mI}_ 2D rqxՐom\xQ馛*ܹs ?;;[馛jW\K~Iڵi͚5ot xuپk(--ӧ[Quu]pAAAArpQw}L'ZvءHy|Td,.O܁} ӦTb3!>v?G@˫_zĊun//_͛7WͫmN:i=zTo?CO=MÇ&*WU;s홿: ѓO>{ilC#Jܪ]p9<==+M_ҴiS1BӨQ*apB%%%{BBf̘qŒҵ~899)((芌[i66lؠTi݊m޽[1_j0ejҤƌqnI:uڴicSVe#Fh59_W /!|}}}>j>BR5{뭷 ѣ/{???XB+WTs ;g̘qph"ƕPZZZe1r иq44> {pco?64:7TQQ;\($$D,͝;Wz饗~cj4Rvܩ4EEEi*))u}]͟?_;v /P{$d2i޼yȭ5dq%u[ IDATFRrWլY3ԺL&3h$)##C]vS=zGFh4j劈[ڊ… gu]2HЈjٲe TDD/RݺuS.k,@ݐ2Z|4rH5uTOժU.{<HY,ZJ1bJJJT\\,TѡCeff^895tL&H̙c-ݻFUcpppb1uCFI&zw\9Zhg^hPLܹl ^WQӦM:WD:p44$p䟻tsC" @#UTTXm߾]qqq*++?۷СC*--mHܹscǎ+2׿.MIQ_藿)?~:pׯNFwߕw^m޼YO<4i"IZb<==ᡟYњ>}\]]8s-^X?m£6fSQqvMwީiӦrpVX'NhРAv AF*22R$ɤy)&&Fᒤw2 $٬{O_}M0WAz]ݥ:ʾsRXwq$iӦM.mVsε?A~~~8p4qDٴ4hx zҥtQI~k׮0`z L:U/؇074eddk׮6ǏצMdW^ a8x𠊊Խ{wQ yJs\7 J&}П}:C4I=Gإͯ~m#l\? p[6l3~~jBn.JڝXe{+GEE^֭[u ;<1h رcJNN֑#G駟ڴ)++Ӓ%KpB)!!AX,zG5m49sFWֶm*7zŊڹsg >@#g4|rEDD ʏ~6L*--UӦMe6pbgggrϦ0>(c^N,fB߮DeLS lQ↟u$I7 ӾOw>f;TVRg*١ YK_iZRrr,.쯾J'NTXXXy..߿M߾}Ç+//OUX|gСC$_~γqF[`R-[LP߹sg :T*,,Ԗ-[ U1!#\Z[t"*VBMtA}sKI ~**X$O-/))Iڵqz5zh*33SFQ&B edmѢE_!!!5{Fl6krssȑ#mk0ԦMeff^讌@]xQ&.ubSNDT`jKG}-rf]n.5C/slݿԶm[={yeX*IFF/^T.ÇWvra6m$I))) $?^]tu vY,ZJ1bJJJT\\lYXXDk… :|5w֭t u-;CtM`=Y;˫bQ{nƧ>SnEuS~rIQ>־OU/Is/*`989j)ffE=E1۷ңǎe˖)..NR}wi 777m޼ٮu1 2 ÇvZoӦ5|fڵKvm5ydP @#d2#I3gw5jJKKj*iӦ*,,T_B3f֬YnB}>:$$\5GO/$uK3NT"،uAt0E5V_ZkHE֬YBСCꫯo߾>n8]\\E&LPvvΝg}uiѢf͚}]vX,6lX۲e#S=z+-|q56c;ҥ{B|~ud{?ꑃ1vڴ^Sqq7o&MX\Toqԧ 2D rq}&C.njnw<з;v(22Rrp R>}Z;\pA.\PPPPפL5eM8ZPǫs6 1'>viSv*1A<<)R xBGS$IGӛNco%I+G7jy_~THHwnݺ)''G\~̘16mz]4,4B&I1119sX{QF5TXWLqV\bSvAj{٧s;ĦXuڳ^z饗?]}Ռ3kΝruu=~n畕%GGG}嗺[eӮ>tޫ_:޿z`n]㸸6ϰX\tMј$&&*44T6;cq}Sh6eD Z3f\@hk5|v| HF04 @#UTTcǎ)77W>>> ٳϷ*pCn_!OftϰR24/t&f!غu:w֭[7t(,vHڹs ZemذA;vX, -:nm5c1֧T"憕O]MF=7|u$EGG+99Y:~UaݺuիU. @#)IdҼypk};$岘-K(v7TRI2sc圻 W/7x4+2SreY+/9yX,\lVkvk2_RjR 5wk&fΓt!]jS_\< +@2TbQt4{M#I9TUխOM^|`bFSsWy}?x:QFOU.=*S6DGGn:zd8*--O''Gnnn6ueee2Վ.wwwWb())I$h.%%Y͚5SAAM2eggS 'j;%Ijߖo:vV>,+G'NC{5/IRchq4Sy}n7ƪe@=q`R_UIwiħ$siTf*ys|VLyǧUh,|Jݚ)I K)W!cc$a"}+7Յl _"M~IR;iu25YfY1 kMvԤѴ<)>a`IғE?\s2AވTgQrun"Iڰ'E.z[A)34oЙDdPrzc"y{b~^~pf>8Vx2sUP\ܥO&NεZ|uf#멡c5g6^v;S>~g|k>)ds4YGS_S6̘1C۶mթS'$hJIIkyyÆ ԩSࠔM4I`PnݴrJyyyG7ސ.^_~Y3gT||b( |gc=&IJLLC=LҥKդI_ k:yfbbblm3uTGӦMպT^P+iiiP׮]m7mڤ>@ol@ՏPT~xCnۦֵ I?Iz>NVi~b̬c[  `z<aݭk.3\ji;ywvNORqvA}\=:Ԣu:ZmSkKJZQK|]m+iF%-hMJҠ}i[%#IVQf6kIZ+[] S%Iee:p꘎/\OW~q[ڷ\&l3VVl[˾޶Q CS>Y?^-<4{bk׾^o-щO*tgHZEs9Lɟ~Ӌc(jUק&OIK7;\qxL?JPܟϢdXb{Ti}j(iFy7?g_m|ՙ1cVZ)Sgє)Szu={T˖-~zkZGb˔)SԱcG(99Y-Zٳ?*Jnݪ'N(55z:B.]K*88XIIIJJJ&%i WrrN>ݻwkѢE_V%Kh…SBB5}Ɋ+sZ @e @#g4|rEDD +%$$/TVڀ^cNNFTtϛcG@{wh>lUz**խ5~+߭m$=%y=ٞDGShObz7bO'WT n?wBO.M݄t0-Vy®82WC|{FWE%FٳU,$ef)k׻_(B,So3km{)].M*=Ƹ2%&m>GJ wݧ SCڴn}.ׄ|{:\~YKMOuoO|U5k5k,uM>rrrƍ5j(i͚5ڶdFFk.%''?d~_+M8Qaaa񻸨D7o%I^^^0a6lؠzʦ?;CXa7ϯP4bZlaSIرcmxȟhc ooJo/I$&]{o}yi$eiԻ/* W*'GZ|m9O~w\~:j$ 2 O'vZ}W[7TҲ2}ukkZ?%JgejӁ=_ M?i\}v^_znna~cҲ+ I>vfŹKK6}kmkv?ÕTUYQ]2~x-^X7nԸq-[TDD^uyP IDATKszرZl/+ݻmoӦΝ; r}Zdu쯿Zɓ'+4B&I1119sX{QFd2oJKK"Ѩ[]ti/ikREH[}6m駣kΣ:)VT!'g>Ku>7T9g3Qݵ@{6~\V['=.wP7P%I3I=>hZxziwdk-Tom͏bQVzֺկߘȻ6@=?O֦gkҟ_WG^JгYګ^}hΜ6>~ֳ]k\IZ0mxgO|Eݦ~ot>+SYrtpЗ[7ƂZQ]5S_.3fyG:t5i$jJ-ZPJJ}YCի}M6Գ>o:F޽5f1s?.IZ`xo^999Pddpqu $I,]j8!WN!6eG[{5kˎl6+''GeeeG%TSYrvwgON~Z̥el[ƥ&eɐGkox(%+C~u:JB^.e+u sshup&Vu.3MAx.SzEZM*ԗtz|=˻8/Ϧu{-i}.GmޟJWJOOM7$WW :}|}}̙3SD*>>^;>k^"اn=mN%&<\cC2nrmvo_sY-\=^/psqUpv5~M=QeZyv~ZP\ڼ?56URAAAu] 1c_pHF{Q[_Ze]p}ǀuMhχc>|xC>> T4W~~|}}\f(=zNAߺu:w֭v_nU%INNNڱcG `0Իᆱ;w*--MQQQZ`JJJ}vTzzzF\?QR?rh%''PǏSYb̙]vۘvHEFF_d24o}hڴiv8p474eddk׮*66V~{=-YD i݅N{2W\jVLSQAF}-Ijv8Yv$|,B?^z3fhժU2eyM2EW.gOlRׯ5zh9886h رcJNN֑#G駟ڽ+VΝ;q`0has$$eeei̙2Ztl٢C6duva0$Zy*mTF϶(\$ɻCK`fxT:S!ff͒f͚nݺGUNN~ܸqZ|F"YF۶mW:|xaƍƁ4bZla-wqq$GrrrRnt̙ pl*z~r1cƨcǎ2 ֏?^6lP^^;G\GVll233e4z{_!!!@K2Z|4rH:9;;`04Pt&CzX,:Viyǎխ[7^Z~Əoŋjݥ} q8;;bb;h,VZB1B%%%*..(uppPpp+Ţ2:tHڵk릤}Dk'*ʗ$k=G[k߽ 7)qxbmܸQƍ  efI}a]BiUѿM<^p`0dRLL$iΜ9޽{kԨQC?;#٬-[{ix/WqV\bSvAnM駣kΣ:)VT!=jXKcƌ?>}C-Zh֬Y۷ڵk'ŢaÆU⢿/0a5w\=6m?zw`K^ 7~#ct˭ŵit5iDv3k(,.\ 98PKbbBCCΝ;_>q)[OS "׆F!?|}}/HHHЌ3H705|@lA504$`";vLQHH%I&I]vW;T@=! @#_~~~ڻw6oެ'xBM4Qqqva>55U]t! 14RTw޼yQxx<<<4m4kR͙3Gzjp;h.%%Y͚5SAAAm:$777o*EHpHKKSFFvZi޽{իW/ >3Z|"""lv_ӧOgϞ >+--ղeJٳG;vUPHHf-_\nnn9rdm^z]W `!ŢUVP#FPIIe2lǫLrKE ON &I1119sX{QFY߻ww.gg#jҤyM2*DZ8 HF04$  @#UTTXm߾]qqq*++&))I;w޽{Q!̝;W;vh0}~]CQsizj3eggɓzڀ_֭[^z5tȑ#Zxcwg]ou@d ?k+2vcp-ODFSI +q5⟺Mznx+:=<(IVrr uq6wy~if߾}.;ΩSX=>믤"_^hSsNz뭊ѧ~p۴ygԿm۶j ATddz!=SSLL~ǎ4h>3F۷מ={0#77W/^b())Id4?==]նX,:w\s\i&L&MTlQndbT7Tj4RVs(쎯Ti2V\g{毌bQj\̐RfKMU/)5tzsk\f  e$@b(B״>sQխOM^|` |zD2J5T}O'jtOXotS6DGGk:q֭[zH6mIҦM|׹5qDs= KUb(55U)))?tM[))Shٲed4gٴ Qv +5kftrr{GGG99]?hJII$iÆ ԩSࠔM4I`PnݴrJyyyY裏o[/^/3g*>>^Tqqrrr I={{1I>CTAA~-]&:h k:yfbbb|U֩T|MǼ>Փ(|yr~[Z}Z{֤Q`0ОzdK3Ze l;!GgGEz_-Mj5PO'W}]i$\Zv{~359q^i`0Ĩ#Rsf?wRg?r 0u$i؀$IXHJ}u!/[C{ע߫if?aZGLMbV_+m;~aiӁ]*)5G`z=y4u${eyX~5Xruv>WŜLЀ7"Y\߷ R䢷uj:9r1CIAukI+_+/w8_7V,.'k惓k>y/)37[E]Zkj\ɗ[7hwqQV?þ:lʦ"Bs] fYZt˽\L0A6k"WeP6YxrDQ_/^s{<3w9?~R/Bg)͟.8xJakncK4%DLG|ʹ~~TͿ5dܹ8z(ѥK:tbXިQcػw燙3gb„ YfAOOOyȑHMME~~>^{ |Z|7{ >eee@yy9.]F]YYd,ZÇ#11=O6 {ӛԎq\LDDt]6rH?~QRRb׫W/XYYµ;wbرШם)SoFNN,,,!ԏâEp\z߿v W777H$lٲݺuD"D"0sL"''ׯ_ӧqF8kjjyf_%%%̬7 ۷ǧޟlzK,PS-ś>#xifPf5<k?LW@.F+WSպdRBGW Nk- \:*::fffسg$U{ѳgOz*/5nسgagg7n4mڵ 'Nhr;""""""ǽ|Hmª`D(((@ii)tttub„ Dpp0%XPP8:u 9996l,Y"߱c&N *$%%aʕSSS!&&gV;n8aU{{{8pq$H$bhkk[^YTk Uo ," zȎKC2z1!Ԏ .XZ5FʚrUup ^N.p>Uiׯͫ()#}dܺTo\!;{ [Š7愽x&šʄ!29}JD~G((s'qjȹ[^?q4cgTUj$]+054B9{+mh~T0ЭMriQK{~ħ… aff 'OFQQR=3PMȮ^K.mR͡{Uzzz|2~w j߿X}}}5j=.>lr;""""""1LDDԆIRl۶ WWW#22!!!pqqZ>P,[ %%%8x ѳgm֭[D3gBGޕχ\.WH[[[#77WnΝܿ\.T*mVlUaLAH^K¼ 1̜tmr%wgjm)Ƽ%^?,~h= zCf}ըQ~M:WԷ2sl&f{YM0,FOڭo>߯Thc'~̓WǮͺw~ѽ/#X#] ϓzt;d-mTVWN{~ħY;;;D"4tmc~kv_M{U7:Ȩ+..FZZ tR7y,/.DDDDDDadƌPVTT :tӧO?0[3<==o>ݻW!mgg\\1kkk7o6x z$DG&_q Y,=zk֬l߾}*W?Cuu*NoQ.p zPnEpCI"F'4DMxU)ٚ#/JsrbJbjhozGLDB@xߟHkD]P| ? 65\yPB{a6I=,K6vNӇ:pPf'}~re[s+,=T͏Y_t,&_݄Ӡ*~uNsWih_cv8P%77WH7UAAb1|QkjjcHMME@@7z[[[###nnnK. _"""""""j < ؽ{70j(TUUB𦦦?+322(446mBll,&L \?/^,;Y\\pm6=/xp-DP]Ayf;wbС-20n<==a``+Ž2~9JI #UʆYsjNW!K*wrƬDڶ#82I "[g^Buw!4A-q0W!Q " P/޼ߏ}:;Ģ Қl?}2G8sO6ЗaSĞ=  IDATV&x # =DrƟ7~@o^P{+W5?: }>E|oٳgQQQwccc#0f_555y&~77y, I܎q\LDDUWW#55_}|| ---7{ɓ'QYY sss;Bn!!!xлwotIl֭x7acc ܾ} ‚ зo_txwѧO@Θ1n:=;vDQQFMMM ** /xy8&FX!ib>s+xRpe\OCנ^2Lsb*ي3~J}81{OEkϡht; 1#|]>7o&n06±SIpl9Viƫp' e!D҅a~jx?_wdmPbL #Sܾ_wGLΙ ÂקGЧkFME#PTV r'/}wܹ_#1pwB''?/Ym]\\ \'''|B;#!C )) fffk4ޘбcGܹsƈ#Ծۣo&Fqdgg[nMnGDDDDDD8QwkADDv(n yB*{4]=G&1TWeEy,=xpppw RTׯC, f6ō7`llܬBff& L*U}hμY642i mUom,ƃ0jo]}X5k+ݻ%E[CCԴtz |< C|ʫ*+p0CSVO݃e{i(Kkjp=6F&0k3Lo܁a>'?;pA߽{PZZ 2ܺu ;wS ۰w+iD{{{?~U\.Gvv6?==QQQ݅!++ @׮;9*+ʛ&| \={)\#+3=2r0_Fg&505YΝ;7GGf}\\\0k,9sFiL ڽY7tӆEO060lv{PzftSC#&:ovy< uiwi]_077y_000x[|o1UqA__FŠ+ "]tQ~H$m?zm2 ݦ5=kL=Ǵyϋ>>"""""""g<s+ڨr\r077 4[iw H${ihѣ=z8urssmӣGtQ{h߾=|||k8x @[[P'99wޅH$9w333@UUbbb0x`\< 0q}rDGG%ALL ._ +++A,=?鈏^{5=?())Att4!C@C )o}] @{իǭ[py1"QUUUE߾}aee߲e??&lʕkBDDDDDDDԪ&""j/_kkkXZZ̙3HJJ[o%$y:'O 8rf͚$S_#""dXjV^ѣG#** 'OB,{ ‹/bprrBVVwaa!ƌ???"-- NNNG~~>p\v 7oQ\\1cܹsѣqرFv),[ p--- GD"# <<<pII QXX:twFTTZcܸqy&"""kkk1W\?۷#)) +Vht|꾾c̘1FJJ !!k֬ŋallXjWƶmp9.]M6Sk-iOq٧1}t\pAH1LDDFP]]+V 55Ç1i$8;;C.oٳgѷoVTVV"??vvvj&J1|`РAµRҥK1b.\Ott4 a5h͛7pww7|E 'N]h>|8z-\v NNN$BS;.P\\T 7?|.\?2 êUrJNcӐɓ'۶mP uTO׷1gΜK/p_}>=Xd bcc_",,_(..T*VW?N.#''000P(AeeePxTC"@,+ܣ_~x;$"""""""6_'""j꒿@ڵÇ߇T*s"zjxyyEEEjWয়~ꇄ`$"C, ״`jjڤ~1b#::Ze=ccctׯ_WYgȐ!xҶaoo$&& v۷onj3jH$ȑ#>|B„hTY:077GEEp-&&d۷ocȐ!ڵ+(B,cŲeôipe6++ ^^^ٳ'lmm1qDTUU)O>=z􀛛,--FŽ;TiӰi&DDDDDDDDmW塠ݻwaÇ6IZRRj1>)Ts!==Ʈ]0c aǎ6l||| >}Zoߎ5k[o>5^,_FPPNW^yk \9sCEyy9n݊>޺8y$֭[(XXXjǡJPP1}tTUU!99/Vs!HR޽{c{.rssڬ{߼yV <*==OZʤIзo_k066FFFR P^^///z8`Ν;1vXa+)SqqqիqqqXhJ777H$۷ .ŋb9s&|}}qF/&ڸJDFF_Xܮ];txWpm\~+R7aaaBrO>vPEaԨQѣGUe*mHll,֮]={?Gn-lܘ666̙3 r"6l@hhB7no<`֭j[AAA=z4r9N< }}}GNEEB򲼼\FFFͺwݶ5mmmUʠ]]] p!XYY)V5bff#G~wdCCCD"+%F&?:Ou&LHQQQ8z(ڭp)@.cذaXd~ǎ8q"<<<΍BRR])q +b j,'"""""""zQ1LDDԆIRl۶ W( O? qF+E2M`(%'OŋW^ijS\9 0o<̛7 ~_~E000V"b6mPرcpppe˰b L0AHZСC1qDK.BnT]O߾}!p9$&&bذaJ^ٷfffD^\8~{YYRBQ.BCCa`` 65'bܹJegk,^S#744˖-CII <{{{p-D"̙3Gͣ_H$J+Օ\ekkk*ܹs}_uDDDDDDDD/2&(LH`̘1Jڵ믿.?z 8}4~W7ѣO,,,eeeHJJBpp0RHVB\\֯_w}W(ԩJ^/B455D$&&*ܷ1pwwǛerΝlml~T&Μ9???#++q^z ڵÇGGGȑ#o0a³ioo{Zi3<==o>ݻWaur98r{C[nШ7Akii H|3(??_!!\GK ;;&mNDDDDDDD֨^rwFYYF*TTTZs}!ᗞ VOݦMp|عs'f̘!$SzId2XB8cÇؽ{+&[bڵXv->3$%%L&ϟ˗ vxhuaϞ=8<H)..̙ 6ٳB 111j_ L_0fad[y᧟~!틹sbܸqx&1033#P^^˗6:: 455k/2D"mo@OO7nT[l?+++b⣏>Rm@9s{͛7aii9dΆSμ݃ HİaÐǏ{===rTׯC,̬߸qj{1#--󫳲ꊌ tڵE)pp포L0@D|H;3ʌ`ddaeĤPICCCifhhWWfxꉽ5y.̚5K8ϸLMM]]GKK ;wnv|n{i]/P;w.DDDDDDD1LDDDD Uu cǎ}Ѵ߳ߢ=/Ν`W_}ECDDDDDDDz&"""5tՖ/_kߟ&A/YMkQ%DDDD/K{N#uKRkADDDDDDDDD).7 ""j HPZZ SSS*4,,,˗ݻCWWm}jWc;vUcYlTvxO^ GA׮]Ѿ}{+WGz___hjjԩSͭM=бcG}UTTmmm888Cdܽ{"޽;UUUabbr 0`F$-- ӧpM.#::(XMM bbbpeXYY!((bXIOOG||;wڵkؼy3Əb3ΝC=?3<֗N:e˖?aѐH$srrq.))A`` Ca݈Rk~0~x77oDDDRRR`mm4ʕ+#}v$%%aŊO׷! ~{=Mǧ~g>zQ0QoC$p;5>>>>>9r$jjjzj_~SUUU۷oDX=Xcǎh_",,Le"~f6GѭhgmMՓU@ZY yq%$$_~,^~BىG``p]*bAkK u}}}1b,\P)h#..pssS(߼y3\\\P\\ www|7XhP>qDE!<<!!!M >o]'''@RRz[[[ԩS|g(..Fjj* Ԯԛ> . L~aժUXrPiɓm۶]|e:ƧU_c%cC* 'ˑsss(Ԡahh(<#!H IDDDDDDDmMDDF%~҂\.P|~:֭[%' sssnLL !&;?SXZZbСܹ3&N ^R?3FT5u=>/A=KKK$}} IDATGM6 6mjdy0Qxl޼2 &&&HOOG^^LLLpVyz+++8p@sN;V4 oƕ+WK.)$XSOCu\pR*5`heY=+xeRn= ÂblHXy8 p$85=c;w.vލ)S`Μ92e ۧT b1^~e!""Ǐ\9sCE`` TMMMɓ'<4** hR ת*$''+%:O?Tܽ{puumֽo޼ mmmSg~Ƨj|OÓ~Ư˔)S۷o#''aѢE8r^\D-[[nH$H$:u~̙ENN_ӧOcƍ q`X~=JJJ vڅ'N3DDDDDDDDDDmL&Cuu5R)jR b5550a"##rDEEѣBy߾}/FFFh{8vvv=iHݶJeN6n܈~ yyy())֭[4 ѣ!qIO> u***V֭@Qv6F[[[i;G2AWW`:tVVV +U<z5RPP8:uJx %Kw؁'+ܘ*$%% y",, 111={Bq [=+Y'66VDDDDDDDb ڵ+v \M66lȩĜ9sQhh(-[c >|G1*++Q]]"BR%j.]c}4Dٿ2iU䐐A$ջ 0o<̛7 ~_~EGGGk֬b6mPرcpppe˰b L0A:t(&N333t |۷/d2Ν;D 6L= "jQu )%UQYYJ"44ذaZk)^?u3~UoC/n݂H$œ9s<DR~uC.+|Ju=>}&""""""DDD":tLE~~>&^_ľ}w^}شirss$ŋ!ڐ*WǩrQ!1}\8{{{ܻwB|R"1=In*BX `ݺujݳ IIIΩvwwGTTBXj~zByN=@*D@@pǏnWs@vv6llln͏*ę3gwwwdee59'SWco狝r98rC[rШiii H|o@ :ZZ ;uޟDDDDDDDDDDmPYYdݻwqE!)SNA&D[3'M6!66&LD"/^Tغ Ԥ!%%u ?^zJlH@ʆ/{jodk.=>YnU2 +V@II ڕwV{EdBBtuuuV]k׮g}$)|r!Ӄppyy9~ ž={py(m\S\\,͙36lٳg=C_ L}Z:֭[(((PAaB;wСC<ޟ0`&M~0Q$J{n,Z_|+8;;_ 6 eeeXd +xyyͭ~r!!!8uzN: -,,pB?~|7oҲs$ɐ ###aVXX{ɩM& zzzJRׯ_X,n7nظYm{feeڵk'"""SeE=.| \={)\#+3=2 `""FFF*E"s{Zb3:...5kpXv争 LLLqTPZյYm544zyM޷'}ֻ:wݶgff&Ν/QDDDг^4+?WY֭[7;FZ{|}""""2&"j{ n%=־?Q[0QYj~퉈ڨH$J+ʐ\]RDDDDDDDDDDR&""jvcccܹsHHH̙38y$}}}hiia1=)&ڨ~" ɰj*?777xzzʕ+8x`kJDDDDDDDDDD- `""6.[rpĤ5"""""""""" `""6,;;׮]annoo"LjHRhjjB&vHDDDDDDDDDD1LDDԆuAAAGYY>!S0_H$BPXXڡS0QTVV,rݻwqE ujjjPQQj@EE*++[%^""""""""""jZhR-?Ɣ)Sߢ nnnwyEӒdd +SYY| O>#00Ƀ%"""""""6+ڨG_"ZZZµ/hkk]vx3%YQ|5[}*9ֹ+دPY\.gZ|v ^v R?3FT}vq HNNƘ1c:::>|8N6 6mj=Zr|999׿'''\~]6~~~ 鉢"@II \\\O? CBB0}t~.]C5|ԨQرcG#"""""""&ڰlc077wPPPݻ?[o o> H+_|&zN_O!i?;!oga`aoTkn… (//RYXX$ ƍ&mG"4>''555Ǟ={;;;ܸqC]pĉ&<ȑ#ѽ{w\v ۶mCΝj/Jq9ېdصk;vٳqUlܸXfR?5~u 99ODDDDDDDD/>nMDDԆd2TWWC*B__޳C+++ U/^S!9_zSY->3? ɑsy NUǦGGobOZcM4/""""""""j[/@$C(,,d2DFF@825}:ӧOooe˖qg}ƻ[!Cҥ ׿q988p뭷r뭷rIyf͚ņ ,_;wg;v,{6vK]]ڵ˖,"""dbڵFII zbСTVVg.\h;L8EbnNNmf.|>{%cqӋw6zsniY ;7@]饗3 3'MT5\Ƞs,]Z[hol̥HXDD'))\Xt)eeeun;WXsٽ &c"7O_^/cu,_뮻)S߽{5P!NfOG=Mc 0a0睺rp/#o~Xj\?MYp!?C}JKKyg9rd}.v}<C ok~ ?lHGDD3g5g6Yx1U]]/xSen̗&bSgm쳸{lwdSGU*LU&`%K>}:, Nl6l{lWU]49fX0'''<<$gϞ塇b…M6={ʲ%f 8z(<'00|ƌ[o[T7~xZfkL"""""""W=--""ҊqFVX_o/.uwܛ.kt^LhnuW U&e5 l9>| FIGugF1jdF灹z\eBGc>. IDATx뚹!Lh } }KJJ^zU}ѣ>|-[ؒUݺuh4rJvhh4ڒ3gp1jΞ=X2339v۷o筷޲L&檪*4SY o:ƚXŜ(ko9 f`1MarN/O~=YTU&Y}.41L<5}Ү];>s۱>I&`sF"++L"{Bj Кۼ/}~Sϣp[wx9h׾zzoG/\N>C&NHii)֭ILL$99"<==IIIikDff&_~%-H `V,221cGII jmKXצ-.(ܥm.u-ӿ&N0Lu3''.hnXݐKhZRCn:&w̛ 9Y+p:-<1Zzz oXX aT?#4{x}矖n M_|AQQ}aaa~3i$ˣff`Ȑ!DEE]\*EDDۓנvigX{δV^no*sXXOZ˖-cٲeٳQF1j([@ڵ //}NNݺua@NN v Bԗrpt@?~nL,VB΃?V?tt4VRs۫k-'C?'|[.]'|Mfkedgg~&=mڴbZ}{9dggb󋈈\-T,"" fK瓜Lhhhڥy)?G7u8N1!qK+&~lSEu'v"z;ֶ{nʬu ///쯭}dddkw<,,nݺ;¶ml... >+VpY>Fin;ɓlܸZ!C0cƌ݆:1&U?vrVY>miqaͮ🌦?2 h5զ9Ǝ?m4/_Η_~ɝwi;n00 $''~j燆͛4SصkC ￈B"""dbڵFII zbСuj$#7>5N펝.@:sݘ*{3801îЈ:Ȕ)SXn k[j;vɓ1c߿?SL!:: rc0Xb&L`ʕiӆAٝ3a:vHaa!7x#qqq~_Wl޼___\-Pzz:]v=᩻Wvu9Q~ZQ 6wХj M'a${ޯ!0'ORPP#W{lذI2e 뮣SNm۶e 8p,K=~7`9s^znuKYnK,9DDDDDDD&vYq\RwZ &{խN(++ggz_LyYiڕu%owyچ[%55[nT\\\Ξ=KVV{lđ#G6yǏ __jm%%%dddeuFtt4)))DFF:em!;G}88pWkE-R_h 005꣹???|"""p0m6 x֭[ǚ5kj\%`֭őXc|-'"""r*/+9":ݱi( "reHIܭ%EDDZ3OOOjM^]v~/@TT> ;wETTTNNNDFF֚R5%ڵk`Mnϟ?ŒO.npxxeUwt'Op\.ZQݔolP͍/W-[Ǝ;1lk?"""""""*EDD.]ܜT\weuunmst8"""""""*EDZXDDD9᾿aUTH+%EDDZ\F#S8qJt^y~FqDDDDDDDDDDT,""JǓDnn. ,]jrrrXb֭k|6mc׮]KDDDDDDDDDDF"""Ԝ9s0 f/^̾}X,|'Ӈcǎ5j:СC߿SH+u>{{''',];vHHHHDEEE'""""""""""u `V,==#Gp1ׯ-[0{ldsR\\d㉈HݩXDD3TVVb2pttl6֯_qwwo9GśoLnnn-""""""""""H+ɘ1c=ʉ'ȑ#QQQ#GjԜÆ ٳ^ #--""r0 oߞ<*++/Ŝ;w̜9GG3g6E""""""""""RJB%%%dff` ??dbcckepddٶmgn7z?%EDDZ!ڵk)..͍zСC}nłv%$$0bN>;vGdx"""""JB^^^<QVV7ε=oUU4z,i>}חB})((`͌7CRXDD3ϰb :vh&+OAA֭[)//oDDDDD>4#GለEݛv?rBQHSP4kC: ;;֯_?nf^'"""" `vڴi+s/~i%H+hqr'NP\\lwNtt4r*""""""""""MD `V*>>///<==ٻw/6mbٶvNTT""""""""""`JRs̱%sf3/f߾} 8AT""""""""""ĴH+ua%` bק3g`6/wx"""""""""" T,""Ҋs;k߸q#7ol6s72bĈTDDDDDDDDDD"""lɄ]ﭷފ?zjnEDDDDDDDDD1H+ɘ1c[[@@_ڵ+?cK*""""""""""M@ ``}|2XDDDDsϏ/+"" b ??dBCC(//ȑ#Ǐʵ^b1哐ݱǓDIII E%"""""MA{B&kR\\%%%ՋCPYYɪU0LP^^֭[ G.""""C>}̙3 2'''(((`͌7CRXDD'2qvv_s5<RUU/-\N= ֭[)//oDDDDDi<==>ݛwy3g䄃\s5tM-4""""""""W@~`61 X,V_%EDDDDDDD2t""""" Z:iJٳrsK!/="""""""(,""Jw^l1LPUUKxf ET+烱//ST~zƎkڿK`_~%3f`ܸq<-K/֭[e~N!5ӧ0a >GGY|9kJJJ"&&M6ၷ7YYYDDDeרf͚E;w.AAAnoN|l߾j<֭?qGK=Ç5jT }GRRHKRXDD3g-Qk6Yx1#66Vz3uj ]·;:BII9G38y׹BxRcwfzneigKqriد0f, 1IQYYIFFAAA۵UTTpIڴiCmǍǸq3gzb`}oF#!!!Md9z(L: κ 00ggjm%K0}ϭz6mğgOygx'%)))]v 0dȐ={Ʉ_s#©S0AM.ž}.vyJ>, ;RVVVy/TQQAVVbh$88'n`ƍ=QH"""ԅə&BSRR:t(؝z4r讝puwÌnDwDB,|NSc+mUr'nsY;^5FR[u.y^%̚zߏM6JXX!!!mۖ4[dÃ.]K/dk7otؑ:w޽{Gm.6aa!NNN\ <:о}{ݻ7wɉ{キqj*>|x "<<ѣGӡCBCC9tP>k֬n;KxϟC=ĺuOXjÆ i&vJ=1bgΜ <>d׷v0qDBCCRT{VVկ{5n~QF?޽{ӭ[7lѣdȐ!PXXXZꫯϰa >,9.JKK\߽{7aaa <=zp7 /Tg4L `V,==7b lKb2HLLW^aŬX–>tAm)3UU jˡ(/"sG:Gど5w3rsR>Oy`F2vޘvgaüwi¨݅q{5o<*++ꫯo=۾W_eՋ+W3%ڴiŋIHH`ժU8qzqb[,رc111̟?SNCί|}t^p!~]RL&N8Aݙ9s&u󒒒(--W^v Yv-3gGa̙|'v}ˏ?Hff&UUUfOq_ш/1FKֹ}̙t҅,233i۶-=]|UUUXe˖QTTDjj*O>=zÇe˖|[Bkx>6lO?7|ÇfРA #Ս5,wA~m>{Ǐ'ָ|ٲeK-""""""" %EDDZ1Lee%& 777[|If3+W$!!1cƐ..:t;vзo_tqUkx>}.zi&ZnHrr2EEExzzR_M'ؿ?6mKyG*Ki^~= O8ҥK{yy`Æ t&}||jLN:u={KϞ= {r^bfKB^^MGcxyy`ۿ \`[SL!44] m׮NW7 <񚞛I{?NAA'N7,[ƽ`[/0*ߛOBB&Lˋr.T@@@X۶mk_U`0о}{kf6mj99q4T:CΉ9)\dMpxpu~B<\}<.|po )ȯ#FpaܹsIOOMHH 77iӦg}f ]s5;wr39?`[iY;~]/,,ӧOטҟziK\nnAAAvݔahh(?~qj}kKl2222oO-qݒ~GHH'Nmڴb;ɚٲe /2 , ,,qjzh#+맽Oyطy IDATǞ0zߓyJ SNbp{h"+N8ѣGyg6Gtt4`O7`͚5ub󟗕Ŝ9sxIIIZÇy7k\ڷ;~]/&&www[%.>JKK1L=ζmkrlCk׎ogyƶٳgWݻmݺu˫Y Œ3}/:u*n߾ݮOhh(l޼^c  @ɬ_oߞX-Zl}&vjҥEDDDDDDaiL&k׮駟f…oK. :g̘1:uʶ$``Ĉ5apSlw:haOҏ9t iMk: K0Nmf.>|>{%;^@~Y >[AQufǍ/Lt1zU>ԩSٳ'3f **nIܹs2{lvw~#G׿5[ls$&&X7uTLBrr2> ?|ݛ۷h"*}Qf3g&00;~]ё)SnݺǸK.Q^^O?G}DN3fL{6Կ/\UU#Çg̙u][Gbbb˯ط蘾vǎ<ȕ!%q7Mn\1<==/ح)Ӝ||>Oܜ퍷puk׮5]gk}$''e `kkl2>_LST_xٹsg`[ ???jlswwi >>>%˫Ƥynnn?_~'֮}ٲeFJ?- WDDDDdggӳgOo΁7n]۷k)+""""""rPXDDDٳֶ'\|gggLBTTE5ۣ>!H_Yz.zq%^xZۺvʤI=G@@SN|w*ϟyDDDDDDDi(,"""*++kmK;-6\|Zۚ{{M24?%EDDD棟o9|H+hmbU;ÃF[PPK/DZZ?8ODDDDDDDDDDN `V*>>///<==ٻw/6mbٸRVV֭[gggӭ[F'-Z֭[YhэKDDDDDDDDDDG `VjΜ9 f3/f߾}}gk2Xp!kiiir-p KDDDDDDDDDDG{R瓿wrrbwӱcF[YYi[jZDDDDDDDDDD./uVDDKOOȑ#;v ??Z+|wI~ U\\kSH+f6d2l'//cǎѷoFu-[Ǝ;6lXQXDDd̘1QRRBBBB>;vK.4jիWCN54"""W@˳;^UUŞ={j]>f͚Ŷm8{,k׮mx""""""""""RJB%%%aX'99P~)))TUUѽ{&SN""""""""""ry8t"""L&k׮777JJJիCsNzE6mln[YDDDDDDDDDD./%EDDZ!///|I(++ggjfΜsmۖ'N4""""""""""riZZDD$ os9s&1$$$\yEDDDDDDDDDD"""Hvv6t8""""""""""W%EDDYx{{t""""""""""W--"""""""""""""J(,"""gCDDDDDDDDDEh hV*77Hqq1>>>DGGdOhĉiӆ(<==[(ڦrdwM-2FF/2Ǐgذa̟?EZxyyqwt("""""""""-B"""T||g͚EYYs[om85Oׯ_ڔQ^^aURs`0`6Yx1#66&G_/vmb17 ̾dŒ| W!seJ,UϟĂ 2dH ׷- nVUUufNNxxxb`46GcChh{>ȕD"""ԅ7v GGGkGGjKDw;M}٬wow( CwT?{bwʇ_v0qDBCCym>,9.JKKxWgذaK/Bhh(fСCom;7--^zѧOBBB뮻ԨQOwt֍:TkGTTz"&&B)SdnիWk7k,/_d󊈈\iiٸq#+Vnnm۶~JQQQU=Wfs fyt=ˮ77ЮgF2.#c{~e_ш/1FKGEVV?#Lvv6 [nFV\I׮]1F^g&66L;y뭷bbŊ,["RSS5^db޽8p,f3xzzCqaz-RRRxWdnӧc4.n\[Ng S;33oH[5a@7p@INNOOORRRl>wu={R|EE7ofѢE0}t z!'On ,,).f%z9bkׯO?4ǏԩS|Mt]|\yEDDDDDDDD.Ub38JJJHHH?#G2ydfΜ}Y Gܼ\08l{M/}3i$ˣJ[h$<>>W|=Én8.fȐ!DEE"""""""""*EDDۓgx-,, [۳} `p|^M!77˗mK&''‰'.:C`0!88&/LגSNuޟ'۷oOFFv]\\>|8+Vٳ|9ޱ23fKY|9;w>'==iiiL&k׮777JJJիCU'O?￧???&M‘7LYA1ŭ;v]Ax`Ew811}>m۶,Xbaرv}ƌSO=mڿm}ϔ)Sl'/\_ &бcG wtڵW[]bb"?8_5 83yd;\]]=8y$8::zjwΆ 5\I tկ*$ӵLC5[QVV7f38;;YJX瓟ODD5/b28v{Ǐՠs҈&%%zX=K_ȕط蘾vǎ<ȕ!%q*EDDZ3OOϋ&v]/yh >755XroƍǸqZ-}}"""""""""Ck/""""""""""""""""""""""""""""irss1Ctt4NN?~ш=zٹ:y<^p-J2NxIu[:$;| t(""""""""W,UR$%%KBBK.W_zj ؽ{7:-qitwӝof5OYe 9zy*˙;nԬsGRR6m"33/k ~)s?;<*{&R)EP"VT+?ص` vEH[& {IL?B̄H>ѐ{={sDDC `=-!C{l6{F;bӦIiI^ qZna8Us$k^18lN?zF0 ega~|VvR9[e?G2ShVo)$=א*0 3ˏE^CofL![m8^ U<^uGZoT̝n|YbIII۷E1l0RRRb/I__&u~w~oiwfopxxq忆sţ*ҏrjl2QTles fx7b+9Կ?G̬s¢f@LO|\}\3n$f?pyL7HMYOmkԜ9BH@ W<6gd~$}|?ρ0̤fs˛O=0Ѧa3Iyk,5sydȭ<:VϞdfS`)N=~zu~٪oxӷ-#xneʯxl44c/)'>2/ps~*;I9LJ!]z}Ttݍ2&L`׏f͚w͛ꪫHMMoqqf3r-L&ڴi×_~Ixxz~xGxGپ};}bCKĉ~ٳaÆAAA]wӧO/35\C.]?>tyf7oswЩS'ƌy)F˙6mKKvGNNNY=1[{VM׸b[_DoI67^c:Հat8&C-kiwǑZʺW׶!MO Ò]~Ux/hר9.b{ Ydi$O_y|C/!yWrv]I`ߝGʌeH>_ɴ21Xη;e),%R+?qkw:p2m9}ĆEx׶*̦_WG=&u:c)3/w_ˏ<9s ƾr\ޢgItIlMK9׬߽}~rckI/yp/v٤X0fWu~㯑<} !}/q%n|0asa=Ko߾=,^U̙$%%a6FI&Btt4'N<e<䓬Z}q1.d VZi޼9$''ǏK.pA֯_ԩS˞iӦ1eعs+]j֬Y[""""""""R99Nl6v'O4lؐ%KF/""""""""QXDDb]Nm6c{F*a,~ \pJ IDATĄF`2HɢnD4P+'~wwN?OJդIڴi7oF:%/^|ڔˮ9|p}'-34--LBDDDDDDDϧ5EDDCٳǕd֭eF8q5%mؾ};;vxk²?fk഑!%ɫ)ٳʼn} [ ״*kSvn џeɦٽk{lX$[_S_Lq[X߈l;(YveگL[>K{iTQ3S+5j(},Yȑ#]cccݻ7O=GɺԹYsj#F'm۶֯__zqlW^L6̙3۷н{wnV#.""r̙3|(,,m۶Ug˖-|x{{cߩSN-F}f׿q%[ki}Z#voY։XzG,?KFt;?s8MSW0w0{5ӕ4Ѭ lݑN۱i+_ї Qw3DDt}VŀuGӺ 4;0hR> Uv\xm!\4|8> :uQFe>#nԩCtt4<) ?N׮]]%<tFǎ>|8-[cر; < C޽7nǰw^7o^S """""""iۯVm;v"""[6ѨY2v/l䭟[X,[nyQQOMjb)$ | Y|+<Ah|S-6R7К=?H'&4ZSf吙MӺ MMs?nѡru.X( di*JLe䑖E1u9;LK%*$P?~Ѐj{. w;IKK㢋.rr;CZ}C˖-پ};[ZDDDDSVKlu#-۴/( "rnؾeFBBB v_X aZqU']}n^O?40jvkVF?QU  !<{ˋu~:P9?U?3>w^PFoӦM~տwɄ 3T_nN>;{zeuN8ߏOΎA1hРCDDDDDDDJH&to}tm5;Mwm8ߏODDDDDDDDZ?Wm:ߏODDDDDDDDZ̵HM1w=[a >|F&Ml`׮]s%WK֜iێ>UHLLnꭗ[.]ڵkkm9hy.--iӦ`2:ěoIJJ ?3.ŵeͩwYlU}WNͳصhoX ٻwo(ϬYx駟jM9(,""r3 Ӯ]ʖ/_Nǎ;q8lܸY?ϭ+NΑLӶ;mVq+V ))}h" FJJGmsAN8qZYÆ 8.h h؆ N:L+uχ]Pn[/c|9 C_d2l@{nX[O0իWӯ_?5kw}GTT7o檫"55ƍǁ0<3kőɀ:u*;N:1fFXf W_}ie8NضmǏ',,ܑG^Ao%(6 g9#xv'938v = E{Vm@{[0*0asa=痴׾=,^U̙$%%a6:v5אݻIIIaǎ|Yfn:F.\H^ EDDDDDDDDC ` T1c?<ŴoߞVZvXb9㦕VM븝n}Z#voyԩ5rm'k׮4h08pir7ͤIxٻw/͛7EDDDDDDDDŴ׍Fj; -hԬ;~ŗv?0 233'88أ}CJdffIӦM1gZlˌ)R>~H6l;g'3o٤"""2Dtttm!'EEEUwɄ ),"""r4h 0DDDDDDDDDy>9O(,"""""""""""""r"""ÇchҤI텅`%EDDsiiiL6|AE7=X-F)""""""""""5A `a̟?vq2eZM6޽KN""""""""""RylÆ ZYXXAAAY9OfEDDDDDDDDDD%EDDS .W^v(""""""""""r(,""r:p&22Aqq1pvx""""""""""'DDDl6Xt)tRFW-G(""""""""""%EDDC$&&_~=?Ǐwms8l6l6ɄYWDDDDDDDDDDj"""]v駟8q"qqq< %EDD.;wsejՊ^x"?"""R#X~vCMΝPDDO"""R#6lܤ䯈9naCDD%EDDFtooM."""""""r.S+ԈXXa\Д9lucm """""""Rm۷hzis"""eH5j֢C?"""""""""""""" %EDDDDDDDDDDDDDJ'H-;ggm """""""""" %EDD1uv"~,FZv?٩"""""""r֥K"""""""""""""" %EDDDDDDDDDDDDDZDDDDDDDDDDDjlj0.EdSlb3jd2GHX8A=%:%EDDDDDDDDDDDsǏb)*0άAtvJ_"r:M-"""""""""""r+*,RT^^g0KQ!Eg/)""""""""""""籼l||}1D2L&||}>}H9[xyyi{(.D|ZXDDDDDDDDDDDHHgZmiРl|t1$ M"88VbK `sЊiذ NÄa. do]ڵ (пÖ ̯n:L&H@hȅj-f޽\P 8t0 .jɧӹs7q4lؐ[g ߆pa>uR8 hy\eֽLYo1ƒ~jgSկNȮOVct:9q"7h]8eQ`S6&₣8 ʺ&>Hjwۻ߾}_%.I `sf%h.2bsp:fan؝&vȣEK=ҵ!بyV.Li\yrv,}KB`e|&~ϽI3niNhTW։|lYz }CfM])),X5|r5D||=IOOA3htڍ~n&**oo֬YU'x Z8?$HtE"ιˮUg{=㊨ &oK|!G W=K+$Υe noŨzTgl񨟳pϑg+ih^o?0@xds~sa63yYu=k]MڻWݞ\o@ `sa2'Ș!9 ۦ a?O\v{.A$4n+Moݾ\hpQgG>WҲEr˧Ln]:Ӷ%g_; ᢄIx=}S=|Vs8MGzd%Ѧ n%KQGlt4 7Tj랬~x7Q~?';TZo۶m[#ƍѻwoj*j{G8p =wPDDDDŁ(*Kƍ ;}Ԯ~%!!cZY|a،T_@\\dr}tz93fL$!c9Y/g>3ewq8ჹ?_`!/c;|eǻθ@Ф:Ͼ8wʪ"ϓ<.ՆRZRL+&nKwm=\y~q=XQ_N|&=}N__"GEoJ5)͒́\d`'wtl7jY?_|ʺ/>2)|9F_w&  n6|: |N?=n՛9{f"?oyX0n"2LHn_i;3> գ=s4ڱtЮ߷mYӦd55 f$D6h}q+niԟe ذiY48ԯ7uwB nS0`}*=j8d#k$qOC$ L+.wmXte,{,..fܸ,\={PKr}l2#j?]+0(k` sq%WYÐ!C),, ˎ]iܸ1~~ϪSXXȮ={Ա})*`v///bccYl9il 111`q:W ʴiSλhР!f3yL>=zrii$]i4 N _yb<9n$sK|_u;_nŶ²y[эp =fqjX۟ZLqqqWb&=qf(/FT!/t/Z2xΨ?þcܰ9~PV[NGK#N9<הZ.,h/WlydCq=MZi7g1RϽ?/՗"""縜VҤaH>3Y׭i&7Aff&~~ۖnѣԭ[YZfUqM&0Xr%III|٧xM,^^zt9&dw]aM{rvseOf+L&fbbby&]_xmG^t=G٧3躿l2N'$6kN}XgV[bcbyp}L9fN7Xme2a8Ob)y | xF),RXdDv>Vޙp8bXl7rn.i_7IK@Z^OaΤh뱠] +.Qf~,<^@wڋ)yť_P@~~!,|N[&M&[-}14oفK{ǮumzdrE9_r1;4G~> b r\u3/ma;FdDwNuUq8</ODvŴlќG|TUoΔi`2/( V&sZz:Ͽ*GS%4$'y%;GNYf-Vk1& s.b3__s+(Nݹg|A5fխDscD[2+wӫ $(wwFff/ʄl63w"V]/િsyif}<Jؽg/O?&0w2:|뗞7f݄_X+F u|StիDٶc3gJWuyr\U>?S9e*)SG->04u U];G.~mXV\ڦl+RS?m'R& о;HKKbЩS'>>h:tA2X,2kQr^za>|&?~aL~_Gnc=~BbJ dʕՋٳg3`+1 IDATWwʕVe99-..dY7ԫWCrWDGGu-Gzz:;z~gϘ1?> 6`Bo@.#-=k֒ȉ'p0JF%B$ahPY:* )vQu}@B+ .SH^1|a͏Sz\7]w=ֳᓛ ſՙu:XV (?`ٰ쮯v1^| 鹅dqo/~ +#tFXA}q"iRA|2s2^|_u}1,'g}5}RiG#n׈֊~u!ٿ{6dk|^Z]g#)[J#EDD.PS# =$ƎÒo˖DJewꝉvmZolTzyz\c8saAs߸l۾ICǩiJ Mi7nW_&,4Of>7^ԃm(**[nt:[ٳ/VCC3Jnge^({y}1~\;vO?ca)))nN:teŗ^;ds琔ٳիQsΩ 09Y%˿D !>Ó1?, ndiѢyX~=}1燏7fΔK.)(0#,Y+wފ+<鑡khpBq60Gql̕7MB+]uą8)Β>_ormC'qI|3<.Ե 俓9{}_HDqr^z#Nי2MNs!d"r^ɳA{rVIﭢȂbbRdPdÔn aC*LV'juxou6Ŷe,8 QnΤg쎚X\9-DZ]S$?+m;m1N'~EW~__PHZz:wh'"7TgN|q~߶GfVa|vrAsMi3L_s6iLq?DΝ !UYk C&H'-!yor<1m[ {^'Pd)""<;w>ѩCoky柏:ۓ_ ߟշ 9%z[uM{v"|+"Cq8ၤ%kL\*̬Js\ꏟ >ڔ~6sѽ[Wf+ hz?={uV/[ <o|tcITT_y0j`B@dd$RopQ `L/233]G|٧ 0L`evFNtZJ=өsz{ԩS ٸq#mڴ! ƍseǎ3;8 Իod)yfعkG%{ԗt"pdonxT8 0"J1y-=N8 檤[ 7 ^Y C&=c L\Q_џ|5?OWZFaqV&3R71w Ur:99s:\?6,IH>>^\QBTd8hFDv ˧Q5J `sPՇ@.&oXb;4p󖣴[zni{׭{4Hddf_fؘl699uWcOc2"3+봾?l,eD \?&|}} -{p>Xԉ܅_ҼY3v jVZԋ`ٯG;JLJ@_}qs}:w>[aaGOɸX+LDx/(Jffk ?NXÆ yTy]>:ts>K׮]nԡÇ1͌{W]v9&EDDDDp`6:aH#̜%!!!xyyI:L&BCCp-[V&\4Ɔur%ϰܹ#s3tp: }WP~p"˛?#%(l'sO$c+$X$%o}: O>jasxRtqgfNs&?؄ U S EEaߏtߥ/[  |KlM;})o~~\ִ(^G3xx7صHk_T]DX@U}}ÿO 3IZM&m)vܧB9b󣮟K7HF 'F֕iY*.6/# "#)X/( 8 $i8adfeqZ18N^Ӯԕ=se2뫹<#IBrV>M䱙2ӥY}z(֯gެ]S:*Qxweo,wu ?2"|LkQb})?=!\#N5/n\}BՍ߯}6*#i}\\#(pgSh٪5 ٢E 9m۶:u%?W::&\4\&O~F_/$ݱzn&/^|ZŋIJJ>(`9tDT3N'''/ r%v;لEᐐ-XdTc믿>)]WCٱcZ$A h3:6O 3phoq!ΦG+<+Z1KӵgG%8d]\w Z,x@x5_jƯ'~C"hee hx?<$`SXzi!et8pQn6&,,hN|UV卵ٶu ".w_]f oo/G|Ygdקz~6ĵ>$RNpc 07)_M|RXDDte=)> g`[Sri||)M{0zFRyb:|{mvWtR&!xrӇ`[ GҲy"S?߅7amN.ƨ 6&V|dSkǼy&#SWlg,r4'?GmLf'999sV^Kݫ_Ιǖqc$ԏ?sq ñZYȵUׯzL13oo/4&N}ݾ \#]]wC+.y&<\񯠎Slvռڳ7^z>ܹF˝}"#9tHm٪ >HPf6y3nx6lM7݄/yy\է;ve˖L"/<ż+\w ֖NNN0  +.gAbfݴi:uc=믿/N~ZOn]+cȐ,^^zrJ 6{l @FzZ$""""ÁO5o/dݩj`bw wJ_87D`@ %kASgnSe}w󆶼1w+fS3{ Lc:VHV5xYR0;s`|%~τ"""砾>9#^xa'-$ǽ6:?ʉBDW*L&{A}eVYZL -Smsu"G527fب[\SE7o֔I~;S~`c@ڱ>?!*I(Mə|v7TJޥ%z\ލ2cm.:qqUo2p=<{m~rWvL&?4I_#['''G `w`'}h"i巽yϱ}]M|ms9̚=___bi׶5S|u\qWvyr\^#O1l(|ssYeuvԤK3vALu_ ~0O#66Lo3Gছn9s(**⒋/ٻ( nz$ JAEP,(`H, X@T4j޳eΜ4fgg9SΔU+WGnjAZPzuaEh4h46M[``ʔQACԭ[qqqԱ#nM6lcѣ[GƏ?޽{#;z!"""`VEv&??m섂8;;C!++ F G֭иQ#|;xzz6VjL޹k7-[FAZZ"_Gf -͛ZFQz~=[߉l0HO FB|4̄ Μ= cpYINAшݻ2o(>لeF!kX{@. 5SQ9z-iX+򜗽mӶbcQn]sO]thB= `ښ3'I{:f5Wy::g0衁B˂s h (tі)S<4mhվ/HѰw"ÿv]\RAvįxc/1A:^w'wG\71GheSWWWǭ[ :F)b[z$$&B!0 @Q[J_N¾$xtǠX9 0ǏÀOIMEvv6֩cA??_xzX\32xl>/< z28`;n݌B_õrz$\r-T6Bӈ-ծJNE^0eR˧J>tswu7k1sM^ S]:QQQ(,9lGڵUCդw3{zz矷|a9%gKm;ٮ\.s{lkL%&%a/Q 9|{w##o`w鉜5IMM@li&zZn WWW !!_,^hoݺ@;d`ǎpu~sh-[!2"hGGi^|z=:]>q) "3r0x1hyoƜpr! M3**<:'?7<hr n;*#Q-4 Vjv0N5>Z5@Td I<~~\]Q/$Xxz:~hZؕӳz6_VKFi|+WBc%5efƯރ;|86VR=].{HmQ* uzг'LDDDD6iM|ԗq!"5j l4W^ aѬYR^ <<ݺuŚ#GЦMյۻYY @֭p꯿v1~~] y0^gggA](8XVx\ޏ_|"1wt*?/{٣|"Grjv_ tZ\صW6jϷ: ht!7_h*mwo\x-[7D@cq+9hROfHIIzy^eX;.ƿO! h{,Vd;b`"""";vX Kna|m݆Z>>;=xᦚ5˥}_륻Uz41 HICOWRoGP+Dv伬QSsmgŗqyHh5pfC BgpQbkswXv'` 9N. :|RDbbbꬁ T +;ب~g4Ix蕊ʉDDDDDBpUy]zmWYS`@n^.7*yY:Ew3>yFJ}w PY6p_] :NNe-V""m""""""""""";X(/0| FMrY`""""""""""";g5{x /7:]!Yyp0!Ӌqk7lD:;dzg{UqeTjzyY-D͚5oK.̙؉kׯsǎfî]?"55l`“a IDAT㔗x9{pqqq]Oe+柶_W-*%Wed6M?Ww"qdOy1]qT;`kimߧ*KQ+|%Uiً8"^U/@ٔoEcJ\r._ͨ(J@aajB4=*[M<Ʊ'p)ơv@sM?x]V ??R5]/] aqMժt:b|rY)) ApPb~ɿBDd$j֬ի[H>/rlMDHy0Q4)wronn u###3OtPDe+Тy3Mdʣ/t"oOSp1~gϝ7oڤ RvϞCrmc(jsp꯿ѫGw<Ctao[jKnXʛ=1SQ}Kb̶n;(9Om}x*KErmUjr[j:*,v\5,_ۊKMðc6M<*[K*y|;`p=^?QcH;wa³#EDDFk32+ goRli_Tt)ŧ4?/[#`' O[wF(?t[R>/rlMDT&""n%$@-((^gdGJJ*pN[ 3WXXd򅋋{ fWΘi&&%NNNf |6jtHHLoZ]r!8]9#"qE|:M)3e\^e2.r sgX+,b}% ~L k&"iipuq)uŹ%AfXgϝǺDHp.wТY3’+)r-"o~ab|HFF9#cێ>t"1) ~~6EyHMK+Uζ֦TYpP.O2VhLpu^;5吋[-u"%%^^^^,ZLrڴEId*lIBb"k,vzfo2Sr>%qt媈ؖ V:i)Jm){ڳUQkm=#Rl6ښ?m/^2~}I{~ 7ww>צIvєۯW@&1|vϙ nf<5 <8@g ԓjJ-O>=ŋ?k5N)ŧt41q“8z8Z-?|Zoo@zZ\>o˱5;*̬,?'vQvm|p1""oEdJש#}-J(˫7j"^fg{nػjOqIg`!xL~i<~x{wٿyG{:YcFbR2fNVkr\t o͜ VؿY?ltCΝ޽֥Ix11׍}YV|Tά-?mjbwaobۖ!WK ?vm/]iӱ{ۯW̄񥦭q_?߮߀d]:w®={1 V%WXKJ1*)G֭0;钛-7l֑ -77cxGw)/CƍOcQq 7z}8!hddfb>%l3n%$O?DžFfV-5(9}rVjDq+-Ⱥ`2'w 4-?g'>Rl3=fYZ}YMs #m xerKXInX[^mr[ʾl*exw64- =뇟~{wl ._,ZܮKe%){xxJn=%w,B.%WUiJܶS)Jm6ʞ}/[reZ4hEE-F[TFlk4 o,sԴJu2є[Q_]n٬NN_e9^x֪e4rOi֩c1Wj<^cXZ.J=&"LDDTĩs8v$ڵm!`wTc׷qܹ~V-cEjGzrUGWzt:N7֭Y5j`0e$3}؄5aԯW:Gt:;q_\5曵Xl֭Y2p ^)S_ǎ`v-`?àxaDWͯ$xawX>|̾'3~fMy&֮^iY[fS+VfT4H܅zt7Z`Rً,#\vG| 4~W9&.x߲ys8ub|E8e.)(G7?@{k/;X2}w߼ϛL`v@8#55ZtW=EYaZE]јliօ-o'''T^L1rm#'",kq["6Hl߹ 5W`@Bbд,m0J$7[VIi+)^Cԓ㐗ӦJmKc펱)}x֩SOJn=%w,B.UŃҾܶS)rɶQ{ٵ`%WEmmɽ~HX˖o6HE+ E:{~WE(.{g:XX[S98x05ݫ/OipuuZ\fc?߶mGvN6> z5re_2>-_Yc'N"-\rLffXZ.JĎDDDŖ_ÐշжMq=)W|qLlĤ$J@p7j0=5jwFc|,lHp0#"y1 5k0&իw?j^7o""2O{ၱ.5ځcWXpvv›˜'BpOHLo-n2p^6|z5>>>صX=`Ƞ][*Gd?I|;77wa[dcVD5QQz:&=%t`q!%995K??]/֩#<lj,[܂ VWmIW^CDdع{O1E0l =^x518x)ƻ%?jh9˵Y"LXi1 "-f{"Ͳ!n\ yC 0 @xZ"aK<ֈ3ul*"21qxb]'-g$}Kdkoܼ!+{"$۪X'McVʣҶSX ut:KtW*%~PXK4J۲ R]y싋 E:)R+[K*y|kJݹ n<:j{FV7n(t:X.ȱ'Á]۱w6ln-]E_,͡D=喯,z!;'鸝bVXZ)JĎLDDTihZhLdxh4Z|f߷nPRaZVm2 j1sq]:wFQXP #1)^5̮pbng''@c= `z,GF ~B=ѬIxD 8 ,sLġPZ5Ӿ<y#TVn٢wTgff0 ^Ƙ7];HKKzV?q ;?=iuc='؉~Z $<ܚ[ pwoGkrVϬO$2.bm1"-.%j'-q47u~ǟ-[4/>hִ)qNNNh`.۷S/Yo|LlvRNN =ӒCGb6PP""pxgUTAA!سڶ-[;wu3 HLN2;٥4ܚ"5- .{ڋQ"LXcYl6u(]Dr]27A}߈ヌn,Qk%KX*'uXb̦̬,ccr3GXOx{\ǢS"۪X'McVʣ^o%|)T.KvBز +{W%Ϸg\hDe$D;& MDDT899]Y}Я/vً!#S[76Z+QPP@:9 EZ-zRRS7wغCz!Xi3 77~b6eV㉧'ƍx͘d;s;kK/yy5-̜> |%ggg/~N}7kI6GyyyO6-'nF}/~3rsay7G]t .\^H=4j߬[@ ]~zi|V_xڂ_n3Ƒ֮aa㻦E)/^LJs;ӧM{=~))wK'/߹ o'<ܚ nKWBaLddf^rVR8:&uk3۬A)%ss+!GIOYhB ˭dLr%KӶg[PDn+3~};^H0Zh%V ''1qÏ.351Q#r+X4wJ*ɒ1+Qn۩6rRK NNNpe?A.Wrl= ͟Rlxz󑙕5/~UV9TOd~J"#3yy0@e5r8qܙYYn&4kkz,QslMDwLDDTIyzzb@ˡj71},Pqu4g&7{T³/@3_pqqFzٳ<,[?|̖ 4ytho>$ħ[o0 2p.]e1>s6ggԮhS>\(G`5}yyyhܨf͘nSFU`Դ4<> nNZq˶Чw/G@QNm<>Q@3ɷow6r^i5; sGV-bhҸ])GCG*c9Ii]5>k GPN\K&66g /DbR26 *1ydyu[PDn+3o`?JùIuo#!?k-gjbciwTr%NV:Y2f{;t@} /u X..5'=.;Yq*1ONE:#.>pw.*-=iiS;ЬS$r.(} nm)}Y"&k, j+Iph[-Rnz=o݂N^-7k7l]gDake_cpXHrY^^^(oyǖ^mk-W뱚6RnbˁRm;*/[i{lY7l]_MɽGgYߖmwR\jM`eˣ5֎o$&%B!0 NNNڱ ''fWC۷___s壈=&;[ĕpv&un܌*:;ƒ+1rPUt(>SdddQÆxaDq^ W5E'{an55iW2UʖG:\cd?uqf6lڌ9A 7%*ѱV:Y1v*ۺkIyWeEeǔ\qrҎWeUؚ*Fĕp8sskW2DDDw[q3.vRx׬鐻XΝUaؤW׷:{8jX|wUvmr4^*{W*ReMEţf1[h t.xg{ǿ\Xb,{]WSmݵʫ墲cJVM9NiǫxlMD#v&"9: IDAT"*OJĕpTΗe+:""""""""""hԨqE@DDDDDDDDDDU\C2JN&L>]N1y24j,YRz"+j]#G.>9J˚[̿? 'ٱq)qpw..8X{}@qT(y5`0woEQ u?~Z"M |E-?/k]eMi(RQymߪjD~TTڵ[3VYۗ|| +CDD2oNBBl6`iSlhy"rs7_$?&5 4R_ּtG̿3ǧ|;2?j| @vU.o3g__~}_G@ǎև?Ξ223-GHJ-?/k]eMi(vfھU5J/E 2W*UtRrr""*DDDYv{ *t^_1yy֧5kJ[ @tQi>,3S_"-Z)N]~o)?GUs,bˏR B~@v6`#iW`bb+L_ihjդR[?EWM|rEn[Fww)sfQƗ[?i?X˟#ʯ, {(OGJ/o%"}/\jRuKW J;76mn݀= -M&M\$}x9s:MKBCޓjJKҰN7o~AopE_/̜Yzx#[ڷ:ɋ_i|(_$jo E\\yI[Ke*>9#F Joޔ돚i=䡇Wd$/JUR\77f sR~JK)"chxAR|-ϞqqVҨpPz x⿏~}}^crW~\"{qǏNXbQ;URVvȯjoeM.[t.f ǎ_d/O寶i_0n.#C.\WZ?մ"ӖMK6;헚+ҾɵE.-ӷrw)OT\ݶ4| b0QUxt!= d[(_F:A-{CC˗;8.^,}TVVN\K's瀫W33?/PrtZ:_ԸqȐ>jyB~.HˡK_$?Jɟc )IcttĈ'K1H'D?-[(vD7{N: ٴ 8v̾<]"ݹ[n5 Gi_Z%ZȾXARR*?qlpg0H_xAzL O۫K%?.,r{nw5ݻFnJ֏)Scb<)s@,?j/RjSJnCŧKe?@˖҉_~)gP|jJW~*/JY3Sϴo#m#_i囜 +uЛ+m__M},"vDo S?jJ_~taNB #wwS7mnݤ:'|QC)G>|qyd:_4Gu\^~͟ZnF+WJWR8C̳ys`:n%+:pr*?jQQJwHU=zXΝ?\7 isǎRaR:d P>/O-ŲctуH~_|.>ػ~TuJש#U.ZjWZ?ՖYa$^:^6n.%{)_BԾ |A"W5W}SjMY>uEOY(կ|`^д QU]DDDU@Jt*#=%EX,-[)ߟJ÷m:HپJ[t|hJnML_篴}-RV'"_ʣݥLDDTx{K+VHWJLZ@D齟gr'\?'%su5?`KKJwkQ"?;*]a>4od-4:?xP-SVމ->i'=H~OuD|j>*MsC7Qjoj-%2vQ'OOoGqOY .>cpgپJRhJnMLUۻ|{Utgrrq|SG_UCDt;ӧڪtlɓ uJW(%F:AHW[Æ R jGqq;n_~G% Ng{l|#۸"@>ݥEJOޓZ-Шfͤ*vT)+uJ'31'NH,iܸ_I{W?kDk/@N~] `>_H|#VԳ'SbUDA ~GcJapA[^G{kwszwzTKGMT*_GħGC͞_DOM_nTSS_&I9ڷOz\(-=Dsq_F_Jl__M}fz| :{QG\W_R]_N;6Vz$)SG\b&""jN:}{^=S'`t$]߲tUU~~YImy%ѤQ"u4fW^k<رwww,n,_.}6`v4hܤqǍNJ,Z$߆ 66/Jwl)Aɓj4o. \#Gm['eO޺%]HKۼYzGcnoH4}gJi䨩_E >$Dz[o[h H-{WwRϞ ֭-&ƶ6NyGaRYD\-i@:ڲ%0yrp"6>5>ʭjoE˧S_nTS$jO~u$֯a]liak|~~R_0}U[7,ZݾTۻ|c%J_~-[&U+E2EOQ+U 6j߱ ""+?M[}q%ҁѣAXT<)9YפI髨t̘!ʒRKΖnpr*=ߤ;WzU9JjzJ?b[~U_N'ս޽mS>KH}[z\Cm|RVz+Wsaa7T6@dT~>>#ǍEĦvJ6>>]?T%2ˣ,KRС6"X_hufY/Wt:iR|qҥEO+Vt!"?`""$\Q',uk4^zImPzD[IwSR_w;whm+=s\i6O*:Oe J݉QKDDt ;v>ʔ/eHwt0z.yև5omt|شI: ݰ!p;СݙFt'`o)a/U+:""$x0Q9Lևi|;ʓ3wl6""""""""""""";oC!""Dg\~}F׮]8r+;[_`ԩCL6 C A޽e~w/^^^0aB6[RRR{nĠaÆ:t(\\\,"""""""""Gc0Q%#F "" 4l߾K.Exx46mڄk׮!44Ԯᕝ={a۶m3gN>]jؑ#GСCn 772~XX뛭[ 4@֭ѢE Y>9իY,DDDDDDDDDG@UQ}v4h ) /,U1`@TT߉_4^aaCb-I!//O7yyy`8ZGdd$RRR욿`@||}:ׯ'Nf͚wH'''c(v""""""""ʎDDDU̷~/vhٲ%BBBw^߸a7nݱh"˖-Þ={РAԯ_-[4WDϞ=֭[c1Aůhzj,XM6#PO#'""""""""S:CСC+: """""""""";&"""""""""""""CwU"ϟGnn.tb`0`֭ڵ+ɘ0a`E\۷f͚xGPF 'OFTT5j%KRYwqdggo߾RRR{nĠaÆ:t(\\\*,F"""""""""wU"'Nرc;;<Bw999ؾ};fϞرcСΜ9 ]vEffoN={СCe0 ЧOf7hK, 0sLtܹTDDDDDDDDDDd;*A!22ׯ_7~o>tAAA~߮];ԩS컙3gbĉXnߏ<|fiѢׯ_6 !`0`7n\aGűc_̙3Ǻu* J"""""""""DDDHHHڶm{ۻw/,4~^^>#F\]]1h UVB֭ѱcRڴic၀$%%gxDDDDDDDDDDU;*;lqat:ƅ 7n8$ףSNEGG M#.. ,y{cȑjC'"""""""""+8WtDDDdn>|8 ?tEhܼ<ҝӦMðaиqcj  MW^3PV-edd`ر=5P IDAT{]DDDDDDDDDDd&""du^gb޽8p ZMv58<,X4xyy9$6BD8~85jQPP`]^^ frHDDDDDDDDDDwLDDT899!44{޽{ꫯ ///UV/EСCXhaջ"88o6 !!1cvލ5kBa̘1UKMDDDDDDDDDt`0Q%4x`,]ϟGhhx#F+0bڵk׸q0n8 5[+WbѢE8q'"99[lAFF '"""""""=:^3dB !PO5R凴46zjTxc-TQ05bz@[C@@.dٿ?Sc2Z+kMlf7t/4qD޽[#GTXXX>s*//WTTu=(--*UY555֭[)*ժ~KNx9+۷onu1CM章55}t!!!2 +΍tBEiiiZ`6nؤd2馛nj6e˖i׮]7n.p N(<<\999mjނ#G iΝ իURRn+//ɓ5ydM֩Ї~u5>3fɓ5sL@ ٳGK.kˎՆ aeйEvءFmyyyJOOp]xQk׮ܹs5a„F㳲 M65kƌ͛7{ `ѢE0a>3mڴI7|>&:o]=5:+`HJJN8wۆ &Fmiii*--Ւ%K.>񊋋\/jzrJ3F+Vh40 ͘1CR#tVxX 2D6fٔڪ5"""Fy j4f4hFqNͿ fP}}l٢svtY+Wj(&&kKzTTTRiٲe"-\P;wԻ+ ˤ*==]a(77WE]$iر y[dґ#GTRRF79sqի@W-2IIIr\ڿl6&M$;d!OK\xQ7oz-_^zG%I}rssտmڴIGUmm6mڤyh Srrl6l6y䑎.mSty 26|p-_\TWW=R={V?֭[={zH7RSS<%''jnCCjjjT[[+0TYY)???Leffy~\\BCCjٲZz;NNNnt/^;vv ~'j9rZ5wڵZzuYV%%%]J['00PZ`맾}J?|G>x9+۷onKOOa_9/??_-5yd~;Pnό3t)_K.GXEE+˥F}-… ZzJJJt7*%%E+1u%۫~77j;#˗gUXXhIRaabccsN***=gϞ4h㕛@mݺU!!!1Gիj*8pjrYYY9sV, _'Oj̘1 WBB>˗kذa8-&))):q m6lPBB;C6 %i۶mھ}^u۷On4&>>^qqqW --MZdIWΜ9JJJ޽{ohǎ9`Hll "nlJMMmvX,ݻx-vEvءFmyyyJOOp]xQk׮ܹs5a„F^;kذal4f֬Y3f6o|2 M6Iɓ'5x`-ZHzguawE4a}gڴinf}kERRRt 6lؠEGG7?tPEEE5j۳g>XBwV}}VZhL||A\AZZJKKdɒf̙$ݻWovءaÆ_|E͟?_/V\1chŊת|jxX 2D6fٔ5fsr:=bcclК5k4gUVVɘFPPPU: IMMf4^&))I.KfӤIֹ%&NEiǎ*//=Rar8xBHH$~W۶mt) мy[oiգ>Ύx???%''ffGiZ&IÇѣG=R͛-`L&w_:uJϟא!CmÇ+ JMM￯(99***~z\.Iұc{iȑ+33S{ioCC*++U[[+0TYY*IXB.KѣGKw}Wd۵zF0tex'j9rZ}qiWYvV|A9sFVUIII СCVttL&z)IR```O}UUU:*/!yqS\\%]ST\\JĸE*((x-.0.O&'O'O6];,Ҵ`mܸI?w6JҲe˴k.7jf lUN(<<\999mjނ#Gx<)11f֬YQF)22RT^^Otw_Ynnjkk5~F3fЩSԿ-]*MΝ;uׯu֭ј ݻW.KM?kתgϞwkU>x5vEvءFmyyyJOOp]xQk׮ܹs5a„f:tnv=CMf͚1ch͞=tR'mݦjwVV"""iӦ5}v%$$h߾}o~QF5]0^$%%E'NPAAmÆ JHHPtttCUTTTv04c eff6>񊋋\᭰m6m߾]ۧbeggTZZ%K4;'ԩS7n_VW#jȐ!l6ͦV|r 4H#Ft6x`kŢ޽{fvmٲESL$(%%ѿtexTwY__-[*.**… _㵭\Rzk?^/??_G]wբOTll}+&&F'Ol{/@cJOOa͕bQbbbϜ9S?zƎf[VUU2224wFntiٳc0^&))I.KfӤId6OG}\_6mѣGU[[M6ݵ!OkviȑzꩧZ}Z||Mtv/4qD޽[#GTXXXG1555֭[)*ժ~=fڵZzuYV%%%{9+**J{^x9+۷ol5}&*((hw"0rLzzWu1C.:-v Y,iڸqc/[LvҸqBuWdM7D _`:py~VVx v A >|0`#G A >|0`#G A >|0`#G A >|0`#G A >|0`#G A >|0`#G A >|0`#G A >|0`#G A >|$uХ ښj'M3O55Y?֟׭Icn_VXXo̯͟eO^YXfY}k+WEh?7oC+[ֳ{Ԯ={rJ5ǚsfTao~⮿}s}?z9t%NSwo^yYa={7Ԓ_/SkUpwW)4$Dan)~S5ch[\lY'2Eg/Ν[KEc\zY'u0I.R)$ahP&w.=/l1<]ƈ 34?Æ{`DGߏ`!I& 4=ieJu1uZoۨ^$P1SKM*Q/I kgsr!˥QQczk?C|ThnC^UU>}$I՟?{d2t!md2%ݯSn2z Er|LfK[WB"{Vt(mG:{L7t$)aPߚ?nk\kK_Kڼu>`,K_K!^0tLMor=g6tWZk*,t-ԗ!O񷫺FonZ|>]qyVk0+8oIiAt~72t!m2dg$GE¿/s!˯>d$s=6[w!e6_z,aP}I>9tX ֪{YuIfY.S*/m[y9@.\ ?_e 6ɸؘh 5+jhpH\.CyN4UYYw>Y)IkNG;tk~_sJJKU\R*I ]Ç U!0|>yFoPUuv2TU]Ꚛˎ߾c^uKxrw՘')88eYiP;&d?XLS`zdH LVߥ>woL;Х?/֞=z(*2Rj_1{?Po]%M+-MT>}dcFn1Bߟ#B?]_d2yϽtu:[V%/,h2-g2e7-}YajoϬ'~2[nkz}Ρ8sHszUppΖkΣdK㶜O-\R%(H[f:|Dmݪ=dM՜'*eʷ_}Dj[zx]RC].SA_wL2-SoU^~K'G*:Z&E&s7Цښ=-rEevsTSS2nX]q͚Z+"<\ݻpDK ݮҳ TDx{gx[䳿\.KJt:'uk'oGqINR^gtBѥ"w1$Sr٥|n'9@ˮrF2H2Jf? `Zj1!;Mwɠגj3c8:>;N|e**ʮ<<}n=^/*Ev{vYKdVK_{C.@I2K6TU}Q' K{k$ϟۺ;NߍeB[@ A >6'ӭIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/eclipse_hello_svg_graph.png000066400000000000000000007060021507764646700266720ustar00rootroot00000000000000PNG  IHDR$#sBIT|d IDATxgUoU0E!ŰbĈGq]\WFsT#$aȨ4 09uwbfd~⢻ҹϩS}ש2m6ض ͫ_v(  è?L䵉aPuyơdd ] ˲K| Jsh{U'EDDDDDDDDDDDDȷcNLh0Y.$qML<4˼.W|0p-.!6MbhaXY.kF[eG+ ,""""""""""""rs8LGslJr%%5 CIaӟ.I{h41؀:Ke٥^rl,xemIM%n-""""""""""""r4q8NC6%u xy ``<.hIfY0}dWN7vJ6ˌJ]NhV{}p[DMW,9mgd'cO)H7:,V%5-|>!!nZӭ{;N=?]i(-ޜ"""""""""""""G/0p5/x: o\m+N+?1eem&SuSJ_ޚ,|^R6%o B8~\vՙG/QCW `ʕDDD6wͪMK%K'%kJsůc8♶;Lvv ܦ d3M` i!do"""""""""""""G\:[|=Ϸʒ姗nc]^Y9XCQx mt4u\gzzZFKV\W `#…Km0Mp O:y0}RBv"ōm T,Xf+ӧ%8Ѓ9z9̙3eN;2Kp@- 0oDGGVa[ N'ud4W(e͵,?Sҧ_+*"""""^/ƍrKS;FUߺ_+((믿cS{=4syL fĈTVzvɴl0 !!!cNPPPHEDDDDet.¦Mҥ3Z0 ;[O=xc ՛kйs'HMMcmً5kVӻw/*0v)K0wP&Z0u\" ,Y&<<ԷH,Xȋ/ѣx?MNNNi = -33M6,u+G^=^~xƌYaL3޽{ٶm\2À3>pЈ֝iՎp6M3gffЫWoN=u8ׇ>֭۸ˈŲl22^aNv ADuĸquU9>G}Jmڴ;wѽ1$&&'% <<+WIYn=Mvm,p88psLLL4}tmXtyyyt܉vڒɢE$**ݏceԟ9s~4MN:D<֭g-ŀ+ .橧/9h/>0$5u/|0Ν;\yO;їˊzD iv`СXl%Drj\.+Wk׮ddd2l0嗹{0cƧ2f&O~:-"gv܅|``y>,:gy/2 }=ٌ7\VL&4yy@qRsqd>lW^y 'p m3k,Əϻ~aǓW^/[wq,Y8.M9>s8yG>|8GFF}||N2%߽^/#F`ʔ)\ql;۶!;@eV}rjA|| K,rɧv*G~~?h{v_yۑs~pb6nC} qSXOaaq^zꫯaY^rsիn_~];sѷoOJF0رcx93Xd ֭4̜9o'#PqΝre2gq&//zQTʕIMMeȑX"rssٽ;m)=~tЮ\/o7|3>_;v ==L֭[SmqqQnĪUq̙3N:Mf㱬C4W?]mk֭[йsg(n*mɱhߪXl+ЋguhꟗHy磰;GӸ㎻xݷ+xXж[IuĊ|pRuݫN頠 'uvx4B]m acřlӬ䋭H2ckkזQF1m4{$\DppH܃ 򒛛 @HH?Iff*m'!N';vdڵی}:端>cѢذa3gd„0 j-"??/_fPPf}3HJJgz|>LǜS1iYɲ /d&Y bݸ喛qk۶iYd. En?/$$2^\\ܹbVdzuV i׮]@ڻ#Gҳgw¹(Jڦuڵ~[Rn} LNCbv+~  Ν;3m{@E%y7֭[S.m޼?jeY˲8ĜFJJ*W_}-Z --˲HHH䪫911ѕs{$f+5/yDpwz"""""r:t03f|΂ 8Xt)ӇL Bppsaڴkf|jWH>ԩS1L8 05\V^˲eӧ={vgլY2p8{aذL'|2oAA~:TÆ !$$y1estҁ+֛'M6E\\ 7o~ /)zuօyմl?2srW5탵kג3_hMDD7۶K8IM韰l2ꯌ=LZ%0y<3+qwкu>O<,@(!CtUl<|jZ nXmQ~ҢE{=taaQiSojnC͛W3/yyxEATT"`s@J헝n""b0M| , %$$<222n7pb03N 't:(,'((򑙙ꏹ"?{m+B_)$'g۲bDDDt2˯_F]̕W^W^鏿9)$++SD 7㫬u+޽k{~[zdgz˶m,ˇ[DAA*@/"""""RڛoK^=*Q8昮lٲ^1˂jxb풵Ir]mq7oߖ[=߿;vѧO?V\N cݶ /ڑG?/rt`^| 3((ĦK`N < mWʈԤE埥ZWضmK¥jT(4Va.whEѠe999$$-`ixy^T]ԦroTO>m6LasK撑ITT Wؿ `ns>)\,^ a80M'qlu^ՆAvqB Q,/fE)3X*Wr;o=9R-XC˖QQƶ0.Ù.$?l"3# ʍϱ=5E"""""".((E2{"""""rٳ'Wбcp(\2!cR$fE)(blllLa:zLι`COIl\檃,_rlpxy)K%;3:H;PHQ-)Vkf9dffj""" iphU&l `:Daȑ$##7ܡjN4K;#""""""""""""""G lDDDDDDDDDDDDDDhB `#""""""""""""""G%EDDDDDDDDDDDDDJ!{τJ^ 6ASvђ8.lf/ jApppPsbcڼe+;v$!!?m=XL61kĊׅvݿ.W2-Y&,,5_8DDDDDDDDDW_/ȇ[tx>r+W&55т $?ANNNgpb_w8~sS[Yz /A8<+|W7𲱼ƛoy޹1z.^}M~2'<(k׭orj;܎e+VoF~cΨK.ݩk PeԶ<˖//eY}y˖z_> e}nիoC8g~0G`o)0*}YTȁlUÒWS\>Դ4b+̻˼x9duy$eVUnAII N;dIVZO}ZSW֦w[14fϝnjϿ䲋TLuu̞;^|οǿ pLn;wabgy&|9?~UUuL4f 䘩Kmu6˯PXTDaA!?Iջ4`e{O 1ѕUm.jjCQ>eԶ͡91s'g7xKLHUb"W}?p|'\s,_LҀC_5Xgqui__fw?ǟ~민XTmρc3yAvNﻷ"""""""""" )ɼV^4j$>۲\g⣏Lg⣏//RV&'[;]Nz) IDAT lǟ✳M|n쨥Z3O 0(iw}g ?>fȠDhe$矷ނ/=#66R#|>*-[YOpddd`ţb(**O?#<,N:X?cM7b|\ƍFm<#ַ}hnS˲iar Oq#*V]]:t^x2f~-'x!kZO[fqUWRXX;*+c1PML WW61pxeZṗ^_w}+3oڴ4^nQ\]qUevVvܖoۦV91s7Zп+V&3X2~}Ix^|>6)SU]5}tNЯ?4 XtqqH^ Æd@D)˼91DEEٗ_1'i5X""""""""""%z0YTTgU'~vCU:m "(;vd=xP?48k4%-=)~.'>.˲eTbcbΦujZZF4 .y>S^|;wn瓝oi\NZ9(("622*0*=>5>.223+TrNDx8ᅲK[wż_/9NIM%`⧺B}ScC6mhqyeTW aي[=/F&ǓENnݻkU^k1c-Ν5 ?fJbBULڇKԴ,ˮ1=Pf }*?nkI)3mOJVX:'/s|cJMV4g9[Wg_~Eaa!Iy5}ҟϿcpt׫WTs`dmۤK/sAHCj_۵mC=xWxGTYM ZSTv>ӡ};# n+&%[mk222d;^tFvܩ#oxyrIۗvԑ PXXȇRmڶWkl%T֭Zq9#oթ|4,22زu?Ξ_]}> ͛¸Ƨ_|E~AZMugq:M%-㜳Ϫ<+k>(u@+~݇j:fj*?:WS:Զf=ꫦ}\Yuٯ8䓘ͷܵ (G8w(}NڟÀ~t,_>Z53ݟ[gq y@MMey La$ϴ7Qs_$+|mۧS6 I#O<9.$..}<;=d3eS JJ➻{o3Ʀ͛_^ΣO=3l zf&?Ξ/J˖Qx<y:v`}rćq ҹ3}7^w-߿`!msވYn}eyMFF/N~.d+++j]~֭pHoNLf)'Ws}+ݧ5yS8kWN:2IZsH~}iݪU<:} 7b>py_c@ ˔YC:ܘQ>\ӹ.P}++5cFd=\~jIfVt„{vkRrN\ly!?2 JH`ΝYcFŁPz۝̬,>zP~=M6 ro~"""""""""" ͱ<#6&³-aڴe(,,dojAGX75JoxٓB|\\($3+,Z%&y,4>K 776T9RV6oNԵ> ~WRrTW~]T>ܪW\s=7^-tb5 :6</){SjѢ̳hJ]K,>\U xUW3:x{RRHHH $8 #M$VCO^E?nHN?ݻwӦukm?b}pÚ;4iYY8g.S^|CbBBs$r+I CDDDDDDDDD4-K6nХSGo-[fOvrx;GRW0qС5w"""""""""r`#D] J!9B84w """"""""""""""-EcYVs """"""""""",#Βaۺ}t hi͑5 =\siiidggxtkh99;i{RRhaQ$ Ɂ4 4j&!8BO(>ƌÇ~H`!+"""""@dge?=zJL$84x#[rX krNW3FR>ګ$AXxm-,ke]δS"""p:JH۱OrsqaoSm[].5GaA! H%ڿbhMI@]XXիٲe ^Q㊌dj?-TRRRx^ **8zI7;+lo>-BBB)n{.iӦb&8ZQߓ}7,y)9c s2&(8r vENVf#);ٿt᫶k˖-,]F f0 uxmf߾}| >=zTYhWff&\pDFF/VmaŤhWPP@tt4=z ** S>رW0uTB8 q\JQE KpwnDDDDDDD$0G}驸F=f_zj` rr<]Zb .'!!Qm={_r '0x*p\ >w#y^i۶-.n:.]%\BlllTٳOq8ضegnܸE]Tfucx<,Y7Ҿ}{x믿f„ \~[αH`y765-[/EC 9<ڿ222_K$ кukڷoϻK׮]eӦM >DlCstڕe˖}vzUa۶Yd gqF3_gӦMt֭$,ˢЯ_&M\.zܹs|>>?DEEqm)Oa) |ٓf宍e;W:W~e2e;ڳ 9rҕ9XNS|{rq9MYVe=ttAQf2SluSؒW~DEU%msZac^gc)ؿ=x pGּ`=?޴|g/Mf-r?oS:aX;:uY,b˲|<3sM7qǿ^mO}7uh,YLY7m`Y-$2Ui9DreloJKaܧqIsֱ'4se-زz1g86^W2ELu;6w@߿C#r)lb?=׿tXnc?558q}<{TIwky`/!Ώb@Nb*?O/~Y ў-)N:ĶӾ; o惷~G6tކ-ؽGF4I܄{puOpDxݘطuY]^S}N2Olw;1K0HۦɸW<̺?t}c#44^ۨJCSÁWͩYaZvmk=Z!)_n&CU|lNkO˓o OCQ>,~Y|7\A8Q#kv-h\?!nR -2`@X=-ٱGD#gϻ6qHڼe |2B-[ƚ5k=j䡻6f?rS2y9+y桩3oyx<"[TX &~];%~$@hHq xNfL'J'*aO9 Ju@y<mpWZUHഴ4YF{-.dOv*ɿ}B߁p6u_ŷ0*MVvk01 ˲ ‚| 媼ON~K. .`̙s? p8';e/E^n;6@}ފ揌DIc^-<|n̜7Ɂ"%݊[bLڵ2x軗ysgiwϜLv62 ɷ2hK1oңSw/egZ!:o?k6|7]Ѵw5h(999׏W'`Y9L{ya_omCBw?ĄiHYYYon8䓈%33c |d$p]O~ ?Y;41Ma4tZK'HLlOhH XٸfK[^oϫX^_ZfS'@OU4,%!WЂį' Et|*IU|~`QzUtJMM㎫v$pbbbZ, >vEQ_D~AtWBٛKg0pإ~]mSPXae]I>,Z˶1 l|> 0qqcĬ-b_-z%E ؄G'r͐'y=la2&Ν;Zի̳/WEgr2شi3kOKat:uJ8Rvd_F&ApHڵ%8(vo㛟~ጳϥSV.cYVQ> &ƹ97_M>@Y@Vxo.xƝ27}c_7\nAay[f07L Nypb6vtn i wƝI%_n|ӱٔY8K0ibNo=X-wfE>"hCPp0"r3hEPP0oi:1v,3_;)ևy7&6pp:,]yՌ}uʪ)CmA޿yӡ H:ucD;/@i)p.Nd&QIvY{ 럿ǵ?[.^\xwoeܧߵo-ҏMyP-C RNmoNj݆ԽiM+~=WT'Pv0r] IDAT aCaJ6mc1QY}jY^};e84x߾ln߮m9SNw/דK_iVh/psk\{5}o{puݓ;ӿo+DDb:,ٶMBBmlr&2f8n"D q&<]<=G>bd!qǚ """""Mbe^EcɌ_?ѣ=~qc>e&.iR襏U||Y S{4t5*KȇiSg{Bn'@Vv&^0 ֬t^r-3^<.g)wYr!^|ز,22*=**L.W۶v1  X͈dq}o&޶;ZoS} <-v툧 GD"w6 gDyJeA JԽ1 :c)gܫ?/fqՍ0AW1I^~})oLa sOIٻ()[B ( ;,?lwwzz*v޻bH*$@zپ3?d7 y^dʳӞy!xEx%%3owtRX `OE9oL{ [:V̛zWQ}--Z%>:r;a,mZSyo)i#ZuVUѦeZMaז ~N, ƍN`EQ:$ |SXh97/|oXO0{yWDse7ү}8y:% Q\=DI ʎxxƫ<=/'8%M 9 3W% du8N 8śoj[!%b A[2=vkv2u۠ؽ#m^MZOs-,!5:z\CjL&3)2Oboٺ~)횾H׷tBIϷz7;uOǙԱ+vSaRnI:`@DIVy^uuJ2R]Xh1 -"--1bo `PpCۥEWP.Z=N f|ISh5:/iע2a2zfsRqa/s+{$řZ{NLq"!kDDjyy{XaClJt ?ׯ/ѵ<0}i߅(hêmpZ56E#ڪ 'ɲRz!B! ֯_=kױ}{<}EoGES@QUT΄vedNAelٸ'.9ENo/jQ tE>ƛzMp xK諟Z3 2#{1y[leװqUGF. ߏmKYZ3?[w~ɜsرrvԲ-Qm;sq]%ӳ{2V\E7@Ll<=zt#g&A9٫7qκUj*|<{yBՑCln}]CQq 0]T"̛aq2x`ߣSHLJS^H7Ĥ$lwʺm%\rŵ9,]](uvFFVF6m!;RaAR2Hv81>֯ wi> %&Нdep0|pF͜9ޭ$_&| yWY.\~5`Mx Ï?Iݲ_go&1A,Ԅ}YԁC2GkM9 n:m*s-:fM(@.w[~U;49ƌ٪[Bqb 2K. o挹>@ /ť(z<=_| fӽ$֯33O{6eNhf!;=Hd7pބcћ+ lUAIq1Vf}X졇d2^MQ2J3GK]=[y)co)by\.רuo`'SW^%Go^;].#;;bbb;wFp uff73S+6pDx 0C{k.LJ4aNc.Gؿ̤pDŀNIIduHLlTB;KUf|xy:@]k<^WޠF7H_ &QA|n3)* bjV >xǭOWW]9\ÛB 4=M01QL7\n_麎Ţpͪ[9s)ڵ?1`tu_:ַ+(G7X\jga$%%tD(*NVtJmۢ-ފb|hcf{'% --g]}=ɒ~|=$GWNDrvlyoд#o-âٰi ~MHKN뮠ED٭ 3Siݍ;76l`y:fbn[qw7^5c _|.4F?pU`mSO={\X. v(*@Iugc*.2ɏgƢw-ك ;&b8R;J,cphmO ;w߲OyEZ{ˊiI'#5ٙP.$ٓ$$.msH֧ C70 ‡o0LҒ)*6i& BE6~v''^=&UU9~aq<T 'ނs[*Jg5: O%':Y?[$խbɒ*$%s6t݂iWyY،,zgnNjb5@x Yo 9[l^~]{qx2 #MQ 334;wo_ip$zYpYYYNII>lC'1?xIxK7[8Sx PuX%|*} ,GnyCSLHJmɏ)3._YjV|p%VLCX u^Fcz}>}IMM4M-Z!ۏjcl8r&]YZU@?'0m6R)TVܡ?QB!B22mɨ$t$Dz} (XY7qM;wcOȦ nWsҨ!t`-|H_&Z9,̙"Q"~8zAᗓAzvEa-p\B t֯__MUU̬W+W1n-`v-'_ @'DxCU(e xzS޸yYtJ_Ͼгk6M%)'m4;_:II< hdfhlIu'w{]as5W;oeР΍+yO9粫ٹ561xxhi?WeW\FzJek8I҈[P vSQu8bzl_N t5&;+w/)%UEE $:(s&>j:}ǐAac߻;[7m o^rv`\^e.]GOwDV;:v>fݵ&*=SL駟W^~V\AΝQUV# enx= ^lھh{<3k!'t鈆TT3܇VoE|9맮2)cWPP}YP-GULWn uq1?0&}VRBɷkSkm#5 gLK+s{.<wQ>-2= K\ K~t6yj+a'``QC\y%yWXp2N {;L[ sšO;Zs0|'A啹.m~ l+涓kqQ3{NN9=bQp _:nb4܁ .9qdffa&tRϧ3vPDKrxM7wH,YJ۸ARr {rC0o4 Cv_o>w䗒y$kٷm=bwAVFzn ȼ_棪*-Z#//CBdiNp Ha .$%%Vj&y333>jIhWⰛ(Ӵv`&J4B9Fp԰ReV>?s]躾:4U?pB%%'a'gMdB!Sl.\ϳҬYhӦu/`ڡ+dL\Fҥdcz~L;m8W_ >[==Ma~Yu^֡0}Lpi|[dH ^Iv4ЭMNNf5 !6 l?Oy6;NncW(.)uu Wj|:ihӞcYxظf"nX/n `PrYgRRO3#bbTu2 e˖HOgk(* .1]NF5ѬQח~֝9IDNruxSr]KVVǸ//WnEA?N* JWi$f0hPvCUl(ڵb=~L#Ȍ'coT VNҫkny_t`:VU 5*111جGHZtvm<3tܙ`0ڵkܩ UIb9[Vof|D;iON͠E| 1-n۩iR ] }h xa4LʽZ%9#6GN L3 RiKn띇]a6P1JH1Ԭ7٥!qlT|;H0'+.5D̑[qb2녜|XQQ%;ozLKX?Xt}* E/n3 #Ȭ)/e Zρ~LLjAy}'_@\!gRrs Y0! Lֲr6fw *Al-{xm( {6:bJJ]8{,[ӉfcA_!>s`XHh3NÜWoe$&Q߾[l-,4iCEEE\~}t4 *J ,XWݺ+`Sa}P/ŅB!hv blܸQ#GBqI fbuN>}$H}Y8JY~.񉭉Ki"UBNq_va_.AJr4'}KIii B:oh\ aquv?7T.sYY6mt4ll`VVO8U8ZyHUU4M6}pP0gy{#7^qOÀfʨ9+n_ 'ѥ[:vꂦn\X#OdG+os߭W 00xZ|7%@Iq1@ۍ~P54Ϣ 6PMCҹ[sbo$őڒVɨ\s/mf㏢jr<~dtդE硤VI"<>?ߏ(ʊX`.,Ǐ'.JvpEp{;#&Ex JǾ}(.-#i#'~×_ -lAiQnD!**Ն%hޜ],]CF,\.6{>~&KKK[:u*;vq&:vHB|b*Y6m''/# 7^g:{f8l6l_BخmpOIctESHi#.OE1QVa 7ֻU'o[D i܆x~ӷM/l^+gRPc@N@c}z}5_L#NS\\̜0s㰓iw4PU(aԎw>=3KH3'_amĴrb%椈P`0]Nl-"6.-,6UUu^x05Yo=g7fhՙ 7CZi>l6غu+.ܜBӜruw'}<F ӿ-fℓ^Gluu 3@r>w߽A_CFv(}ۢ|ma-#{m|Z~o#mB IDATa9 ?F/~dX˨.DL`ԅj.mswQo!t;ሖP-;pZr: :{VZE}ILLhY VO4`)guo>:MӴ !Bbcc93裏ٸq#7nw] |\p_UWZQO8@!>-mx*ZbEb얪a{tHLtL4gw2[6p᥃$ RXGLQٍjPc*zҹN]#FT%uvEFFϧo߾5 WYCOlܸ04.]L&5 FKll覗a+,&%)Jkr8n_KѺ=_9/ qb>'̊|~3~3;2癘Q$Y5fΚcGa4Le2|PR*^mSأ, A!weO,ZvӻkO.|6X*^}HI[Saّ^ysyj,V+ CSNc@ʊ +ϼD[)EuafκfN?=[˭w %wE8|GK,sѕ(B]'ǑժD K˱\{=];o)W_w=ۡhV.&b/נ[,t=ڑo+mv\sMh{Ѫ àcԩt #hefڵoOJio=Ƅ13=>?0+j$V9ć_ @Ŭ'z2΄R3 ( E,30Vg-o |[&mpk{8RkJTk vyHj?"R.TW[UQ]FR? ;Q{T?T 8b׫ O]r''ޫaZѻ>j~8`ē4 ]ܳ/?~%66ixTUSv6y}p 8 RXXȔ)Sx'ޣ^۶*qqUI`ksux|L?N:܈5|?uԂFK|@+m_?=Ar wzwL4o>c}ǁ@]yX6>?ť:)؝n#S@0M7=woGn$:(t|*|>&tlm$볲W>fsqGPp<>ctMp:~Z>[&iH?Ԩ(^GFs#=};Zg^pθOC 91԰m܀6딆4s؝[V-[MptmM[%00q=[3W&sgʕ`^e}b[x aSz)7EQjT+ϳжmmݺC;#w =jun\/N/6HRFP%7Knm*+Wlos Y霑]WZB!S&K.7z"zΟ|[jh^߇UyA|;1sf`M>M~d}tJo\,w?YaF=y Lm6u4j^3j0}BÅjjYjNLjK+_W/=6J`%srr(**fPəG[57\o?ocḺHJXb3 5 QvmbRP?.Iz={&$9~ڴ@jbٻnWsuGZ\NsT4}rZj]QtRm -Nt.| q1P5{.U`={0e}Q_~aǎк5qqv6zY> FthŸ^ϼ}7ky _?U%>`DAg9h5];^ۺX+ -f1 /~5{NAHgIypL*+kSL|ndD;ՃT^\P}$R+yHk۪o%wE9FL^x;Yu1ܤߊR#o}c  zb6;vU@rr9eʇiՒwV( LO <ӵK(Ol-7]rE5TZmFZapǂNcwx&[a7gUکORE&|؜wҘV+K.e$%P$P4̡)=R,B!D-bccKyhJHs$'ĢFw`gM_zza=t+kM2LTYSlnى$':z(BQ]yxURf-\VZU0CT 111\/IeA Ä9/>GÄiO1G>;}V\7L{goM}oU%k93>UC'OD&Ъ;#}3>eՖ5}k)_:II6:i6|vK,=dƍXdP8iltMxOl;pWoA-Vf6mj ?\>)7`0X xI>XV˞9ݍr;4@5pOm!B!jp88y/(Ï>⒋/nDz}ojW⩇oGêN"q4 JKh& vETT42J+0i@,K߿DLˮU eDBZwemz]veٲ5˗/1Nnnnᚦ1z-z%pe˖ՈV {5 Ҁ734E|˺uŸˣCMu4M d1JKKi bmSN 6Я_7rشikNa|*GQC*kzSH߃srr;xG6|83fDNN[wЩu;bB筋𱯬$%:>'Q t΅FMXk&o{%IaIݰ{(Glէychb)|@zOc&D3o;dt!B!1pp%ǟ0lYH=z%+QUU%>hhɏ߯e^/q :/]f4WE/weؼ4"ryyd7wpxx>|8ւ ?>]wvF\9ÇcZg  77O? /Zڹs'>ƍ#;;^GS'ا~JZZ~:i('uIKKSNMz4Mlի9S`ZlXVXxq+ON|\,-Z 66-hNroX>Nḇ~k]!B!D=\elr5(ҩIu½+T~u~|Go6j0 vMaa!'O>bu̜9>}|>2339묳 mEQݻ7F:d=ʁb},X&m&[}*-//_o4~xk+roX>NḇB!B!p}ow8 }h(Au:K(//Q"e˖\uU^FMh*B޽{~kYgŌ3XjUf:t(}9YYY\q5H( NF³*A(%$( -Z`ذa^]v5Ii8 rO>jW_k!**;fJ+Et:HIIJ {"%X$˱ !B!nIɩھծ5x~4M>?i:MjF BJn_6bDuW=giğ(v4MJP5ZLG( 匩zNZZZRRRrqLC=)̙3 {q3|0lB-RoUU5(;11X,x[B497,Y_g'EXM!B!uGCEY)Mte ~gL ѱG)m3V4 I44Ur8 >`FnEUqܤO>tw … 9b޼yɴi/еKZh@Uժy)i躎jE!B!\ !B!-uF)/- / è_QjD9$Sb@\Ah.իcݮ A40LSynv|>nAqq1g ;w?3$==ߏ8N,Kd(U?B!B#׺B!BcI]QcTXf[2*:]Go:ۓO>ߎ{0 }Y `„ ٳriZՏ5B!B!B!B4I`m´)OViԩ|x^xP2dÆ 0 6l`ƌ8N4M]T !B!B!X' `!DniӦ׿c=n}t3;kT?4ayyy09k!B!B!BIXuUWrUmV[feI˖-(..cРAUU .d$&&v{(*. % ,B!B!BcԤ?oyy9SN%666nݚdv;VYfriswpgTUk 06YYx柫dB!B!Bqlҷn\1$TU.[nD#]}{2arrvcXX,躎(`aÆ3vXʕ+ ~ofaۧ7?d #!.عk9 !B!Dm!ıMCwB4בޢQ( EEELF]häI(%5>U5=ôixxҥ _=~:n0Xh o嗦 aڗSp978!B!Dro$:HAQaZUWq) jV%pUU{eRQUfϞٳ9r$Z||AΝf'}kSRR ^;B!B!Bq Xpas (,P5(h5\СCݻ7{d]'% | ;%B!B!Bk$,$\B4EQ4LUULd9̚94QͶgd!B!B!I !pN֖LW_ٜqe+B!B!BI !PIm^B!B!B!$jB!B!B!zQB!B!B!BHX!B!B!B! h!8o(!B!Bqt֚i?;GhȄ/ կ\K~Zwǖ؊.>~i~e>ҥsg"Y| 7t3^p>{n̘18zLOf"&(P@EAQTWRD( pLjiA5UW4+LT ]24|}xղ58x:%{<8$%!B!6۟:ư~]d .0 ˰aC=O!ğq?Q؛^cXgv_JW84>vM)))aŊsv뭑Pe?35r߿c۶de1_~ 7S43:'|\u =ggq&. WEy(DQ[rE%.L|eLlj(@LzCbosXp8طoMl{˪`f:hۮ{=f|j{B!BYmÂoquy<.r~&Mi^dž?~ ;{ĕ9} o0s>{.W_} ]x!iiix^Cjja\. IDATdddTUM]{n}r?yr{''' VQ/((jصklڦi̚5>z /ѣGHGcG@p XAQ@UBc=#@ 0}4'SJr2e唕iMCSi*ح$uOfu_}~=sܫюK .!B!{ k t㾊ϥ)uNWޥ˄S56N"7z4am6 ~RN?k.-^LJJ2%%|tV:s=m3Oswq˖-磪*a0xa4rsعs'^v6l$**|dwݽ{7C gfҤI}ι+/Яƍgګ0nܸZv:.2݋aKU~0 de9{m#׺|Y=z4~!'N:(͚5 ӏCۧ\-ItЁ޽{c۳˗/AǎWgãd7CBB/"" (P )A)"HT(*R"N@j*$$m#dɒ|'3e{ʹs,s8aD6_ω'g}RNPdWƋ/Y3լYƍh g}ѹa6+H͘H>d[;oǡT͚a6پc;1wZ*nߙqsu+T^~zJ6KIIn&Q(Hv.Ç0vxHIIaŊ<ѮP^AץݣvB!BpxTm V۲7,|}JIIaѢ5kսggg>:Jj,gZld?׃𰫌~mBCCی?۳p79{ >W^~ȼw޽[7~YTȽ!bMmʅ-p*eƍV^:`j5t~ ~a6y}@*Wh4bcc-I,aaa|wGMȷ]2h@=RSSyG~B&Mغe *ϧMc!sGǵ{n?޻RbEϛ!4l֭ؐPc)k4}g',ZorM&Ocizzzw}~͛ٳ{^^^L&J1ׯ:Ha{W@lL h4=<%]o=ul&---v Vg˰aѣ{ER=ټ~NCz6mތ]ݼsNj~j-Ʉh `ff0)f4lЀaJƍiܸ1QQQؠ_>..@ޟ+J{{[RRos$h}А.]zNƎˈ#ظq#& 'NPCؼy3~)*vjB$Чݳ ?ZsgKo3{0Ƽfg8gL1˗uRR\s\{B!BN"4Skۊ|?y 'I圙JSYW{BPEobHƿNҭc}n~;zOkǀHqrtl6ӷ_?BCҩS8;;͛I0/oR e[A^j} *X^_zRСoZլY3ݱC|_Ӱa+I+J|}}Q.22{\\\,*UGg9_E>uu:[^~eRI*U, F7NOUaԩTR4xr2k֮e<3hZlmmy睷9tPڵ.]T*;}ڷs=G\\^^^|GL&ƍO@@~)߿?~GRaR߲hLH^x3g?Xwڕ˗cޭnݺ̚5 w޴iݺX$Ξ={xcGL&NΌ7=Q#>48p'N d/<oi&.$CL2W=z4 xoqM>\?܎''3IK'-*C.eGGGΝ=G!((Gn%aog֭[ܹ3}mdh4\~;;;bbbɷ(]FEk?7??&3=jk#==X^^.B!Bd)hfYJXl"?>AK9SX6"X `}%uH(GbHRsSRO\p'"<{‰ d&D}Jf,g^zeV\N+]~ܸqDn'.^hioEF#sNތF#Cyٲu+[fmVcyo& *w۶`ݺԩk%4L1gxa @q@R Cj'g\R*gNjj( |*py ogFJj*z_T+ݨĐLj^ {]_A< !B!a`6Cd|j*8bUtK)80oÚ5kiԨվ_VŅƍ2å;66֪|~3_GIȽ!DY(_q϶l?&9]?sLkS*Hll gBCYzei֬&LLӚ@BBU+W``O'))vk֬A&M;vII\t/ʪqƓ9db}^ė7 ?oCZ%|{nLR/'L&[O5SS~@zz:gy*1zY%رc[ւ/ЫXm/Ԫg$iz˶zbl޼EgkYϏ&Mek׎:ҰaC\\?onԭS۷SfM Eϟ#uNgGtt4IPPG"::*VDT_%BFm/qsueرv̶Mce~ڵ+!!!DDDޭZj?RN*UW_\QI B~߸$ϩvZ( VV(1VPĄxlU`3Mr^SOX|lqv %j#vnUw zAc<]\\\xcь=ڵk¯رciԨ۴fdK'Sh3(p(qdLoQ/lNƉУ 㥤׈\3?~X0< !B!aag;xq6SvʉONܩ!3G7>6ml{vfΘ N:jjj*_=ת-oooBCCKXKr_Eu!BGO4zȕ*/CsjZ 2Ǜ7Rʘfyo%O1xT71lՓ4,aȐ@?~5vGPz5׿s|iw?0|*W/QQQ4m҄yp 7m$whO-l}29;Rɏ?|O~Yf ԯWo3S^RRR, g_>%%Od>@ǹXz5={Q^^^L&͟oj69pv:;ˈ4ޒQhP*O3T2J&3;ƖyyK`?nO]pI._$%'3pB_(\{B!Bmڨ8j[6Q`d21|H ~^gҤI ~gʇLzifL&CAÈ#=g:ub8dU^{wXS@;sjkZ3v?ŬY;[TV5֪U_r nPeǏ9~VT7XԬY#[B 89:GΚsYp0W^{;=wYmРǏ#<<Nc 禰ǔ[( Za(]|:v|ʒ4$&&Xց CRѥK*Ud 8+w1c.V}Jx'EPR):s\]-eN ߯ 3k,*TɓUjVX~[$' {#IVzrE='9b;e}!˭ͬɎrr/_&<6ل>7]`ղQa2Z mڶ#).Ζ任SNeĉ(JFIxx8ݲFJ~΅>:'}P!`T`2)0a%V[/f XիW#%9kס^~yد=B!B!Dqo6یoH}pACm?˥K1TR:ѴiS.w+WR/vvvL8m6 S}WɏqN+UnnnZkʥ˗Vgϝe9| 4t:nOZ~ْYUǔ]m^qr#._AcJr߿)}FFFJFqXIPT4mڔHUN^5]F͚5?͕g< 빕|}@4 !|A:r0IIILׯ/9[xx8w1c:n ||ca @>}غu <ף #GV6޳:u-ƍw}kXn;ӷo_lj'r|6w:BGwFY<숿qWWk\'Rبm0ԭ[~`D طhlu:tܼy?#JOIq;ɛSV׌paQRQµG!B!xXp6zXk{خRzjh4VW{BPp&uȑ#9}ooo֯y3TX۹s':9KlL NN\x'h4h ,۹~:8q#FGq_Yf fV^{$ٳ;w.Cɞ=ywOzz:t nܸF`00dñ=RNgDze3f +W-9VˀX`|[yœm۶j/z0`Je3;眎ٰa<]-{N Ĵi;n7pqueÆ =w) IA\M\Qktٳ''55V˴iXfeTF'`ʔ)B^Ozz:+TaQtFf̘ܞ=3!0gS-B!Bq0U{ l_! !#Aqas՚x]:gMTTb׏OH ** *FDDn۲FmzFj `0Pj(гgZtBCCiؠ #"h4sEZ-fʕ+DXX8GGTR`?.] @*UHII!** OOIըh_NjUqt[aݲbMNNƆqҡCGobUɓYnUVҥKY(s[^ό3ؿ?7ܙRf|# jgg ɓ'NEaSj+bBC IDAT!B!B!ȟ$HJS!B!B!3Z!B!B!B!B!B!B!B$B!B!B!!Hhx,~66 eڇh Viks=B!B!ģDm!խ(=.VgwOQRns:Hmu"Ϳlfܸq^{ Z J&GgyBǠOK5=zЪU+Bu/kB!Bq?ӧ΅s.hZ3B'.L !ᒑѣGloڴ)z*ӾJ%fLB!B!B;I !!FÔ)S ]y4o޼TիҖB!B!BQwB!x 4kRV-o^!B!B!#G376Ԕs@eooOrrry!CC-[MBB"m,B!B!d|_sbxNqf Iܘ9n<6l/vo6;vxnݺCҪUA眝h.!B!B!|pHuBq'|Ven3۷ӹsbDZk.4i{1b$>>Cȑ#; !B!B!BܥL'@2L*&TfɘL&}9.rH|}}Qsu{ll,ZGGb+b fϞݻIJJ*x8::ҲeKFoy#D87B!B!BP& `Y3#[V[2n@'F}w3 "--֮]Kz**gϞEʕ+]pIӧOꫯr5hѢK,ӳx'Aܗ<<<B۵iҷA* ӁٺM# >wqg$%%'ڵJ;gx͚5,^8}O>9}ĸQ+ `̻=aotd#˷͛7s<<<裏 ѣߟf͚sNT*ߟ&dO>oߞٳg֭GfٲeB!G9xzzr!/|M˺۷o'&&'''K>|.\;S&6G32 (iV[nM`` 'N@TQ;w+V… %ZC!B!Q.JWX gW6v{s%j?KJd0vҷı7SHFct[Tϙitk!--wٓvf}є{?3?~<^^^L2ӧOSnbgW^y&Jfg<Y#z2 >K|SLl6s%v󉌌l6ӫW/&NHllN^Ν;lj'رcZ'ҹsg~"ߠB!Æ  MѣG-#IZlI֭+21sLkItUlmksyHMGZWVZuVy\˘f"""pssβJ*0rNdd$θYl6U Y}||r]C!B!~Q]kR ۻh )VϏW6= x}\xxՐN\r#,,$p%;\ QH`=)9KB,oOzl$!$pA_A ~OslA>N#==˗Q* 8Ъ|͉/0Սʕ+뉍-zB!DyJ@rH||/oooڵ+| :Rsδhт~/b28z(kȑ#JNϏ'|'Z3x`qK%n!B!BH15 ,7wq#t)ql\ˑ둜u W\C357ݟNf3 Bl8y9.Ӣz22}qliy]S@O;fBo:W{ .'ܝ轗,>>,'kܘ1uʔWЍI *`׮]4h^Kdܹ3.Ѷm[-Zȑ#-eF#~-WUV0 8@>CBCCiذak,X `!B!B:b# (.o()Kײ3^lhU 3ȟghQx}faMe2+k'U/t}ἲv+_TϚy 0ŋ[]t#G`ۻO?77WzEzz:@i `CrW> C42rMGw?ęE?S͚5iѢcƌa޼yh4L&{j׮MzBW*,[7/̞=ȱ!%KCV<==-:PvmΝ;Wa*9o?`S\B g3pB'7HgrL".U2U^>"é\r}СCy饗 !66^OFFFr5jȱry͒Ҷm[J)j!B!B3<~4yd/u9g ?]8K]GGNߺE.>08d܁<)s[APQ+iFĦq2 = %29`YgfϙÊ+9sؒǖTTGƗQˍ7cRˍ/n+>R,?YX^^5 ~9 .$==ǏB.//m;ǫ;tWư|v+Z4ɳP5j԰ZremFbb"QQQdawoԨeg!:˱/0녿?7n ###(MB.l ¶C̥*fN%L&\;myd钕HN˳ltt4~~~V*VիWٜcpAbbbXp!іQ'O5Vz @dd$~~~DEEQnB B!BQnVJi Z*M&* &Q*DYUm{fV0jZovRΞ}!LPqynb$upʯjeξ&pyPՖfӦM| =X+o߽cP 4߲s>lNJ_'''oQeMu(BTT+[.G2 e_ /z}խ d&Hn&qطVme_՞j@ɽkϬ ͆=mn֭c̙9wڕӲeK|}}eV `V˼y۷/L:ѣGx^<<<8q"-[rʘfuV`,YB>}prrq<ӹ]vB!B!(K~ C ѹ.[)ui7o_Ǒ M9Y~ K/`_DZuа-jW`T5k";NfԴ4I#Om}b,qY'n $D]6mTYz!Bt:UkZz|(k-Օ8DBBFWWWTM OK-8J[hh(]t!44%:W"MOF+ g\޽aÆ˗/kcO\\qqqԨQS~FT*O>$ }ϟN:>}ZjVB!B!ʈ>-u.=뽍 Q~zVٿf5-zH%ͣ܅k1KlL&oʑ}׋W.r6:p3C1#&*zQz3o0{PFC1jZofKȎso72eOH߬Qv 0Iy| _{{|}|9SR_:Z#BqR*`0b:D6m\ߥgu%MjjԨQ{lڴh4h8uݻw*JPP$B!BQVpqxv׹%K; 0m5DHXCP(TjNpHMd>W$pYf7xxx \,Myjx%} }M{7os|Nc…:t( KB!x3CUPPP;yrkԮ]^z}xzzb V\IժUٻwow#),B!Bq|1.l+\hy2i4 F5\ ?oZ.o֨x^U`|3=.I^S?pt. m{fVys6|Æ/kJ .s:u^$,B;vl;ݚ5kƒ%KJ-!B!B{e/W~;W 3 }:U7k[Y*$pzM[]m7-[ϹJf%Z 石w^]Ɩ-[;w.|ڵk׮c_/GʚoTyn&۾ GJ9{'-- ^_a!B!B!ʉB!RÆ  MѣGiѢ7orBQ? /LBXΨ4_LF FS\~??r9=S!B!BqxF޽~uֱuVΜ9TÆ ceBQ:666888 dӴk׎TRRR+R|mی Cp>1MbFpva>Qfg >QfyiM6 65jDÆ IHH֭[rJKW^yK燓&LȳYpa+B!B!/.zt79bh];_ۚ8q"aaaԬY^ٳgQ*|G }ۗ}2x`FcVX`(B!B!qOk֬(rТ<>h{b|-F,.V}afVXW_*W7322FSXֶuB,_:XM pMƎdctڵ<-jPc/ؖk@_4ǾNbX,Ux|3$Eߖj>?z6+r }$Л7oŋ-4i|@=vO$7oތ=W!B!Bqo)׬YŅO܆8UF6Km$^{8~RQ(Ӈ@Lb~UڴiCzhڴ)=gϞk.jժEǎa͖}*T`Ϟ=G}ʕi۶-kf„ 9~!"?%KO,۵Z-?8JZMÆ rJyzOht( ڿ&CMkFA[8p <ԩST.mP.} !B!B!=(#NQ0]F`cW1Z9S0X1uЁsjߟf͚sNT*j 6pA<==o8p /^l5 F#*L0T:wl/B˱矷舍 ?)#54tܙzj^|2G!B!tvB, a0N֌^#v.h4V?%@rr2/^d׮]իWի$66RgС) dbΝui.^Ș1ct[Ŏ]!lf͚5УGIKKLT*]6l6c4\r9G^5kFTT7oDRtRիǖ-[}!B!B!ă}O>0 [i3tܞ2} _́ˊҥKر#* ???F#GFDDX^f"##-\NGjjeLL嵿?qqq$$$ BFc}n\\\=z4qqqh4l"'?w>2ʷ{,]xZ^M/XQF[?~<ǏvybbbxC ,SB!B!r_pZftƯ?v{v*۫Ikyb<4ݻ7bԩԬY-Z0fӁuwm… q#sZ~DvWׯ 55ٳSV-5kFPPIII\xYf !N>LBCC VZdB!B!BwG% a4s"6E] d:&]aL&VbPR%̙3t.Bp޽;ݻw`B!B!Dw&666N:aooݻ[nT*͛7cX۷DӦMi۶m) t:Ԯ]Rkڿ?>w^hذaP%%}GU nz!=$P zB"{.P (卟"T *E)RbA+@ ! l}ִ}]93g9%)[kOŹ?OiX:[nE_5cj`Phյ^ey0q7caVhR\~___J*<++hʕ+IYNNPh? ,`ƍ۷bB8}r1ckg_a:;멈C!B!ţ*?\>g#p8Fʎ+ЬZهnUV\r+_f e˖UV۷oÇ===Yl=z ;;rAzJ_~VZOZjELL +WFVsyJ%SNeȑf5}t\Ν;MʲoO>MP9ͪӒqkև-,x,"N|3-MMSغ RB/ ZufKy\@RMcooOj-)p(d|w%B!B!B<$ U1 ;C aƌW^S2p@lmm3i$N:Źs`֬Y4i҄cDz|rc3 1tP8p`qӴiSbcc3矱[n ]^#M=P*ςk5b%xtЖ0aYy8x})OT}ZլYǏs9|||ؾ};AAAO:,!B!B!⩲$$gTɇl\3:\wGC~駟ꫯ?~|L/^_|aL2}t ܹsMQ( 0C1k,pA7nLjXj+4h666^y4l5(KS;;k QTڄgը0}V_V5;B}^ZdBGOmqx%6ժUcܹO: !ϱ,.\@zz:TZոOFŋyqtt|;T!5>F)'CWL nXդ봬рtH&G*U%B!x+V6|t,:ٱN߿SmllLJ3g2bFa\gϒMݢE t:N]lӦ -BVgYx8&&ݻw3o޼Hq L:  3SJq|jm_«!x[$\^W\U} !?eUB,3gƍرEIsA~Ǐ?~d o>\~N<㓭A)ֵ#Q3 :5hHę3gصkqqqdffrҥ͛ii&B!ȥ6]y_ x"#ZMjj+---O]Rre>a R+ ¤\ѡUk0Es;Wl-Aё+[TX~ 7nuD0?^Z)[I88T~bo&삃IYFv&Y9jJVzzo0S؋Ĵ[8hWϽ)xr&Ĵ<-B&"Ezy3S_~vŇ~7ofʔ)L8J*YNzz:Zww㏋2NZ.Dqtt,` 66<}!B! z=%xW YfѥKƌcrg0111xzzĘ)H\\7;wf4k֌+W2vl{> {XY۠i(BB}( *wπ_ U4HHH%eT*B~g`[ǻ?o9Q,ku7!:~Z~FZEh>Yju:}8Lu6J5'"bh7m 9j23iG1m-f5vT:7laos?I~b㱽D%ġ7 x-e*_!G^@O}nU1Kfcgm'yoQ ow`gG֓YWoFP+}٫Qv|l\/0>[32ҙkz 3k|.^Fʝ,5jɲ1ccemeLs33xsf2U2O&-3^?_sƧ}4HR=WP>f~ s+Lhh(CT\ݻwm۶cg֭[ ʕ+(J:t(gϞEPPvm{\]]|q-&NȤI]vdgg_Oƈ#x"}!99;wЭ[7-[fV bƍDEE '00Xgȑ4j!B!PIaGhUӇԌ{[yjg^zf͚G^:eʔ~^=Ӈjpss+իW/2Gpp0o߾0hc^/ !Vf=8BOh7nD͚5-?~8 43Y?+i J;O˼ka\$v6]׊0\$:ԥÂ$7z=vL4|2zW1rkLVext/~_=^ΞlM-7mNutz=KwmwҴjbo|Z?\WR*i5 o\Ŵou ztzYVxz7yѷi[KF;>! gKX Sگ87*_ &cϙ=>&J-;iPX۝5>Eܲ[vbuɹ~DT tav0}o/j|{ | ?OOj)l͍0hZ_> >=zP~}ٲe {`ڵ'8vؑ!L6 r{ǎ8pZjV9v!Nll,7nd_y=z4AAA,^T7nŋyNҥK駟hڴ)111ۛ4 CJX!B:l3 +o<MJRrrr* Q՜?O>ǏsAԩ1nݺ=W/(Wh=s98tDB,B[ko]U>wI`[k|1OVg15::3e' ?^,IUlfB1O򷤼s()=H΀j{X,Ԩf z4nE)G 6OJyRxܿ7/7n޲F 0襎^Yl?u{!wiܘ( }3?3coޠN*;1Үs]IHna\&Ǭ{Qߒ ׯ~~~( Y bܾ}۷o\r-&&&qLEll,u1;%&&~xt$$$[Ԟ&B!:ڻwaaa9̰aÈ`0Pb9=$!lceF{?!v';—zP+Tf=l8A-;<1 l.^_ݛkI *B 1n@nRt>p ߜ8)l-(fS@@kfƍlذd+??l1Oe˖ڵ—,S*~r_?IV\o8!B!GTBg;mdkrt޷{r(2w)RGm'1Eyowz)2yǯڻs.`WkAckmC XsӮn(90y6:{ Ē%Kضm4M6L2.w_t8p?Yj[8zIeʔ!&&$󶶶jՊK^v-ڵZha,N!B!A#诌ޓ惨LXo&ut3gޒz4_UuB!@̙36lH޽ǏN:X[[?Kڭ78pqgղ4ɤN9 bT\';]>n[F/fyʛLLڵdN.ņ#Pf-,߽ e~+ֵҥr +U_vT{+g4>442^$2@WY,zc=fR.efЦ֋tk=dR2Q)޷/EfQXE0S?ׯhԨ+V4)[bCOOO?~qΝ;{ѤIʔ)Crr2Ǐq66lH~VuԨQ,Z=zPBhӦ !!!åK ,!B!B8 ð_}tE@ڏ=߲ܹɕLfiؓƾŊcU7e|B¬@UM]I@`uڱ/ɰETL2`hG)?b[i:J-xicmԫI8a[d'=)Ixr+R7oqv*|Q*,[L=Wq';r^y&&|QJsĴ[&OV#:1gܜJYդ딲w,ֵOa)je|HMM%11ragg\qaK\zRJڋ/RZ5"""RxY !Ba)uv\>g4(B<'Z}8UL09٫lO/'I:tnjɧ Jr2sZxBQ4;\YNNZYX_Rž`- [;˖茫sV*|~y/b_4(j|9QgWWW\]ߣr}}ʗ/?22PI !B!=3 `^-L(Q-lFƷuFZ?2.SJŇ=)nZk =K{nq֬8+zZ1;JlM)d-]Jn(kMJ'[%c$s)Qo}!B'CNqYee+зiTxO<]vk׮O: !B!B<Sדoda_gc;/O"5S/b|[|nD0jUz onn _TdJΉh5JF~ oLS_|Ý4-D3Dgޤa!Bf}^ ,S*Kv'y?!B!Bly4=`td=>YK htEO'zwF,^Bu-NYO){"QW-nE%ok @oӺ=}fE]D IDATn7%SOz-B燕JU{?B!B!ϖgfpN.MUO)Lkd_KZ1`RT tq3W\ζJcҸ ^*2s ξ_zeKϰB!dʠȗt(B!B!B=C `%|Z}R+tJ% i5WRvJK |7nc-ZDNN|LBBdy1 -ڽ~d o>igΜ`׮]őɥKG~˜9s&G)6B!B!ϟg&Ө) [v @MXߗʸe1;ſ%0y-|\TlC/#2z%mJ'Ҍu^Y%;coL;&èҬ)MRzB!BHH}ۄ}۷oLDDm6,N!B!xZYrV*(J*e,ޜdzj*HHH|g3ƌ;C۶mYb'NgϞ(J C ) Ǝ8 P*zj:vX`ǎ#44l233iݺuW{f;ݾx!!V˪UM6uѯ_?V O?įʠATm){J*%vss_~P(puu54ٳgsmoߎ?35={УGJ*Z6Y\,n!B!"?6* RUޟfRbRR)<|9VXAtt4 bccXhbҤI@pp0Ceƌddd|V͛7s1Xx1Ç'** |ضmGӓSJxx+wxfB!ez=֭={\2eHLLg3*o oPV-c2AԮ]7n:D{RRK,̝;ɓ''kkk C'B!B>ndooONNN{ !}:ƍ͛TT){ !C-x"III%B!B4"GwoƯ{ l18&&@rpqqᥗ^믿`0P(!""Xjժ2owgTw̝T(QYN:Pn]^{5K 'N$,, zRdٲe,ZJ*QJ6mjqm۶nݺ2a-[;wd˖-)(yPN'Bq$+W59wb$-jξ$*uvSB!B!Ge5Gƣ3tìz&mNǵkj/_"QDEEW볲\r8:: !'=R2xgoI"B!B!BHqRTTXѢklmmVCkoom!D~%u: )O: !B!B!B!xf=|AVJ<JR7-BK &н{wZjT'B!B!B!Y +W2tbuOQ(O6}v*́hРDєX{B!㖕Ņ HOOݝURL\zXYY{!B!B!xz^~=IGc!|PǯEAOzz=_N)<{Ej4(SL|H'vvv&/^o}VK||ÇY`}`<6 hZt:7ʕ+DFFrcz֭[9z(qqq7ВX!(I899qRRRj-[B?/_~" !B!B!(%$pMG)+]xgvH29j(x۷[ ""(y;vl׍=OOOz͹sLB!ƍ$%%QfM a۷oDB!B!BQ29{Nxe2ΣIDZBs~??ϧCn8''N'{ !xj֭[G6mP۶mm۶V!B!B!ð8s9` PdUHز]C^j;``]5 ؽ  Lۓe={DB!B!(o<54߲{KJƍywLʶmیǙ&[jϞ=o߾ ! `KElP`ClMTVv+ˍ`M=?Ҙ ޞjժU$(d|wf+Bw+V ::WWW ,ZC1i$` 88C2c 222hӦIZ^^gnݚ Vҥ Ǐg͚5Ƶm6='SN%44ԸݗB$=kt³FGѴB!B!B!ElPqoRe,RR)QTZ݂(dggau놇֏ܹs9r'''&L믿o .pݛ{/LSFŋ/ܹsi֬Yqwwx\rnnns˗GVwyyy'''N'x !")B!B!BT >7kT +ey;e'}q|͛ݻ =~mulqtMtB.]Xre/pB/_۷󔇄ХKƎ8B!x.YYaRb]:7J3cbbv...K`0!""Xjժ2o<233YpaRF f̘@NNf͢gϞUXtű !DA$,B<8s IIIٳ>lcիWψ|<~7oAO:d5jd{ A|D||? B=OdkrPk{q?rt5j&tD/$9s]vGff&.]2ӬY3Ǝ^:\#G$;; &ЩS',l”)Sh׮IÇ[.|7aRgܸqhт?ΰB!+V*TdϪP*ب,OGEEQN[._~q݉'zBTl2-ZDJR M6-}Rɚ5k_L2dee$wɖ-[,!(,-BipѮ{aSZjMi)yx[ LE&I\g맰77ڵ?+++6o̔)S8q"*U2ֹΝt 33=̊r&OL-U^w(`ƍ|}~]6vv?矓CoMfVZ?B!DѬTJ]ut:jٲ%\v VKM3lْ .\ӤI]FTT~~~8;;Rs u!22,z B&Ǖ*U2HBsHX!xN=BfAM6mPTrҥg2N۶m7~0uVBBBr JxٳgQ(Ԯ]޸|nbĉL4ڵkGvv6iii0m4F.٧Osݺucٲe&o۷oOPP7n$** ^Oxx8eZ-?C˼^uCqz n<}9A| ;g/p*KLRzd$2*k!g?3 ӭ]ӟ7G(5~:C[Zw-G?݊NCe[)p IDAT~4ìr5:>K:BAVX1Q65iѶS`Jxpv*`qn?m AUj^i`j?Pb '>7*_MY2;k[>>{ !;֓YWoFP+}٫Qv|l\|%̭t&Ƥ^ׯg$r';nZlXY5>EYo+ ܟƘ7o0WfäefP݋5k&IbZ 6 jg#OAon| etЁʕ+{n<<<޽;k֬)Vx׮]L8FC5ذak=z4`̘1? ֭$&&һwolllի}Y~/իWԩ(Jqqq)j5ƆN:oYȑ#iԨoE !B[lRXERZ5B,-B<.]Ν;Yt)p9nܸ )))O8_>&%]} & xdڴi;vg߾}\|5]µzƲl2%66֘=z4AAAѣGYxI:KW_qm"##Mիiذa_,zSNYNeo/ڻV9:,F@ц 0\$tX0z#guzZIg|;ot t[&-!;_Qb Woכ87diu]ecX˶l19о^/77m4C׳t_n;@?᜛@tyoj:I[:ά{BU]r۴-J#}RL~Sگ7c)D4Zx6AUjv xzǧ([v"v6^m]>Kk:N]g?avQST[@m99.fLBѯ  fܸqq<6lȁihN… š3gL՚,-7mDll,nnnwqQW_Àl"+ Z"WR\Js~ejvˮfj}y5)Y ^ d u`~a`Qc5Yjx9ߒw IM}߿K.~ܹsݻ*_}*ŷ~ˉ'7n8_~1B!Bq;Y,B4b唖hTh033c۶mk'''??hFņ hŜvABBhZBBBX`R~;;wfv%%%ڵ{{{ƌCll,/Nޑ#G* _~ʪ@TY 8 ߡ]vC W}i72#6;Hu6gtlۧs?3٘0k :{O$]8/ɩk=y9餍7XYl!_4†;8%D{ 8E_~9߹ ?=}*67kR6)єدzmo0rhNޚN7X6ogTΐTi>ssq~z|wѣG+[ߏ??{Ϩ:J-,,8}4+;wUV_y,%ӧOgذajժ}v\\\.'B!BNB!D#닯/ZUV{nBBB07B3uT4i?\= ?\@IMMER1uT2UҪ]klZ-nn̬37 jwfP㉙mf{ql3;CKt.͌]޺J,Wo >4Su{ [+kKK)-n?j\Ⱥv1o 7? ^n<ܲbkܬtִYtj[_xy3#.W[ܩ[CnbjFqiA JumߘPދJ=r;9Ss]]]ʪs]03fpUùz*O?4+VԴ66ﻼ<ָso:B!B!j#`!/@RAnn.>;;^yԪU+زe 7ofJ'Zm۶]1۬Y3.^Xc&&&H\\\PTdggӴiSb>o W2$p={GUeoŕ+W(-- Er_H4<+k L?aDe"UtJM3G~F5r2}}mXlV0O< =xHT֝wUqߒx8~urNޟ[] 715 7-Cr$y/DItrxX]s6͏C:8AajohnCR]o7@L%@lwwwW7i҄R b6|Wkooϊ+Xb&88`zZo֬$''Ӯ];N<B!B!ꉜ,B4B={V v\|Ǐ+BJEIHHk׮q):tАݾcGfժUl߾QF)]]] 7T9222kr AjjN*~XzR_~e\cƌ!11ڇ_8zhA ujP%dӬ"xi<Wqv۶t,?gU;DBղq$}ǰkΆzkй"WKl-1U<]H PR'ۮ)+cO?CIqiʾSI\KF?oe}3HjHPGxs ܼMs"mx8:s&<֡ 6+u{v7z w2?wOMw:B/),,D_0hПAKa9tEEE@řﶶ߬R>|8+V/wnXs=gt9!B!vX!h4 6mƍXZZRPP@N۷'$$cйsge҃*""_G}-Z褭Yg}wwwW_U̙3=z(_}UuѵkW"""weҤI,_07o "**0jy3t"Xgft:X'Oǧ{s::_Ns.. δ 1؞Ӈ5l"{VIZ(/rhe<c6#CsW.]ɥǮ ںRES=sGLǬqi5D^4VK+wO2$̮c <Vɣm:­NښWo>n6d\!sfCfS19<]ͻƫCGϛBf407QF=}y.]<&&)^-QSk;aUtt4|AѪU+Z--[d̙J+€صkݻ ^?y\t3d_h4mڔ7n2l0ǑB۶m.'B!BNuh]. !/!9-n Sjk\Ҡ|aggG&MkZ._=Q}(.*4k׮w[j4.\rn1~wlmmT^8u!!!:uʙE q*fz:iؚȺN f6rl:`n[ﯼd\֡N[^οknꅉʸt_{Y1Swn^%OAqYnZVw|_ۥ)fU4ee\?KZZשjٿ(0ʕ+ܸqoo*iҺukwi+ھkGFF...n%LJftoj`kk['N ::]Ξ=?>>u.{/1e111deeѾ}{ I ՚g[]v;w RSS9z(C A2۷ӣG\]]u[p!{wFߧK.5tWB!w* ɟy99`6T:MqttT~SVVV%F!-- ww*i+W FCFF:׆1u!ĭA!h6n܈-6669rx&O?֭[Ԕٳg7pK2|)//gɒ%|ᇄ8|0NNN+ǏF˖-9{Rwnn.ÇwޘD˖-S~Ξ=lZlɥK8>>Ǐckk?ϒ%K3f ~!k׮ȑ#:u'x5k֐111A3׽{w^yF@6l7n[ﯷL6JBq; !K/ 8=zݻЮ];8s挲:1(..&;;OOOe M0{lbccyǔk7nSvޝ!C0w*Ύ;HNNVVVZz5~~~Ѿ}{>c͛;Vw޼yDEEq,7nܨF\yll,ٙziӦqjg![BFJV\1ݻ7QQQѩS'~:PϏJ^x{ҷׯSXXr{{{ꉉaȐ!7-:t… z 0k׮Uٶ.ر#;wTܹPXn&MyOT6--g ӟ[2?:wΪYJA˄ XbEiF]v,Xӧ3f% ZIW_)T8::RTT\GEFF ח̵kt׿Yf,\d<==8q"OOOO>sٳgԩ?0͚5cر?i׮...UÆ czpĉZJoB!dnfZZBV+ij6733n+++ؼyrtNĉ̙3z^{v&O38qB4: X!hRRR8<.\ё.]4tFÑ#G8q&&&t҅72i$lllX~=!!!tڕ;ẃ {ݺu|GզmٲOO;99ѷo_BCC0aO<={4jrff&dff͚UmÇ~/_hqvv&77~Ν;y())a޽:y~4Ge\|LŋtV7kL~;CV'NW_{9zQ%ZO>K.ڒ\%رcҩS';wՕm۶_~Ɉ#mƏOVرc< ?`ǎ̛7{ұcG_زe sU6ydwʕ+vݺucʕJ2V^7|CϞ=IMMR]޸q#!B!KFM3UWM&ӧEYK2i$y/^L^͟Qe#ooZ۹&gKKKJJJ(++3C B!D#V^^Nii)KKK1c(?u畴.]0o< FVV{쩲E>uUoMoβeoywh۶-111ʖɵٺu+ɓMөS'Z-*O?ѣGԱrJ;{P]VeXZZҭ[7XXX`nn9spuuYo|o|ծ޷FRakk[%0 (&UsGQذa͞={wABBhZBBBX`R~;;`nn\vڥlmooϘ1c 9RY\Jb,B!B_` j?~6Uc` <"##,Zp233thڴ)W^PUet]ކB)VB!ғ IDAT1___BCCݻw7t[TnҤIqHOO3jVET03f !!K.̙3 .}[lښ[IMMe,^ȢE&55!ChGsvIHHH棏>RMPTu^\8yrJ@ёb%ѣ2dHs>EEE駟V/O訬̽FMll,|wxyyJbԩ1#Gl2"Әj:73)lU֭ksϷB!B SSL&+&&M0UhRRvvvۗ.K~~~mV@a kײz;W%`!/@RQ/[>^|EILLo1ܞ={5kVLDDgΜ1(AAve˖B퉎gccK,ARU9/E׏+o]!h ^! V fΝFkmmMٷoenպukLLLHIIQT 6?tZh߾=:tS_#Cy*w{yyqJKK۪U+زe 6lYVe۶m$$$(8%OM[nW2116@J";;[T۶fxzzB!`f?Vbʄ&jϟSNjՊ@Ǝ'|9s&7nʊ'|>3/_N֭Uv*c۶mwTKB!iݺ5*˗/sqw)++T RmnUVqA>̱c2d(njSYd QQQpM6mT,V}177g͚51ϰa(((_V3{lf̘IVk׎h:wLaa!qqqF eٲe$%%\%-MLL`ԩ̜9'|.]PXXݻ :CޞtRzINN6l`Μ97v~=z4ӦM#66'''_}"**6mϔ)SXhrsXX_R. +++=#}hժUn.B!7L&>>4iD'{ŋ9h1cGL6#GRTTtO7h"O(_}RtVa駟‚+Wg}_|+NNNX[[3k,V4hGَһヒP&N/L޽;v'|ѣGqvvۛ^z1i$˗EӦMfL:է~Zgu9sfʵ{XZ&""J$$$йsgZhfN<;xyysN%=449sУGpss#>>^]?:s|ri޼9>>>eDGG3j()))"B!Bܟ 6Qj-ZЦMoMrBT'&juB!Ѣ=E팪ܢ)**0*.*4׹t钲J~S^^5 ҇7orE\\\rRRRiӦw\r-[>SN©Sڵkdgg퍅EtFÅ prrwlmmT&**jϯ>{,$''ktB!B4FEF9w:ڿmHA!Id h!1;Q#vvv5t7211^߿NeMLLz`~ Ϗ)SpAzmty{{jWwW2555h[u}|||\,[/TM&_!B!B4zB!5zwmۖ#FԿ߽nZbڴi5:Cޣ!B!B4 !BtՖ_C/B!B! !B=g !B!BE!B<@N~{ßjn!B!B!S@!hrrrHKKƍﯳҰ3gΐ#~~~q޽;Pnо,\޽{ӻw*itx ~'|}}iڴi.]Tmٳ@wZ&!!jҼys֭[iڴ)]vU`ff7;vԩg޽\|J#:tbcc߿?vvv9s,S$%%QTTDnݔkZwt/++#66ӧOJhh(NNNω'Ɔ'|GGGĐE0`EuM3էׯݍMYmO!B!B4B!7bkk G!>>ɓ'caaEpssŅk.^|E4i=o7nܹs7h?N<ɪUx׫MxV6u_{[Ũǎ# x###֭[+yz /cǪ]tRϟOxx8,Y?0ٿ? ^u%@xqhٲ%gϞUessshٲ%qqqJ@wdggӲeK.]Yz51|p9B`` _}A7$$$pB~7ZRRaaa)+رgРA?M6h~dȑ\x瓘AS3g:ubݺuڵŋ:>C_ߚVw7mݺzCݕk| !B!qBHKT*%vQekTTnnnxb>7F%%%ddd`ggԼysg3f@|ؿ'ZC_{j2ok&//FV%==GGGt(..l6prriCS!B!-B4R755E**P-C=͛7iÇqtt CyyEŅҺukƎKaaaCuZoUҢ'mHq%d ztL"oFwfM#0l}6m/2lٲ?ׯJaÆ~zkׯ_'''嚩)!CDLL|tЁ .3`]VecǎܹSsNBCC cݺuL4I'bPٴ4Ξ=oLLL4ıc(,,SNU:͠ՎjVkʹ~1^<|xitt48;;}BCCkII {x뭷Gll,/_&33:}E͚5SΐՉ'j퟾ wϟ!/ǏUVddd3WرyO?q9233իڵ#-->mےFZZ&LPO<ݻ΅ 8p+WgYYWfŊs).6n/b4 !B!XTT !}JB!rJKKh4XZZ*nU\\̆  Y5j6l <%`!V˪Uؽ{7!!!JFaڵxyy~5zh.\H~~>=^^^ښ[2qD%?7 .dŌ5J Z 8c@6mPVqNуr9Ν; R~՞}J"77nU8yUʵ͏>S\\9GʊO?Ԡ} oM/T*NS֛BҪ]olZnnndffV{թРB! 8F}-/'L&՜]o޼Ivv6)׋)++Lܰoh4(--%==4iRFFFJGQAB!T*:r6l؀ÇoՏVZ-[ذa꾜VZ޽{Yx1s˫J0 VktvϞ=̚5ڇ1gzyyqʕz LNeRN;vTT8;;3g j]vѲeKRRRHII}U͍%KRXbNz-ׯ:C+++FnZMpp0;w4_kkkڷoϾ} .s֭[cbbBJJr-%%͏>]tAVsAڷo { UkVe۶m$$$(8%OM[rW211J";?deggWcCm1)B!T]0#{ j05F[xyy1l0ڴi+)SHNNfʔ)xzzskApp07N'HܡCf̘A-ի tЁӧ?NӦMپ}R]wCj=_m۶v] ! !PAAgϞU /_ʊ4V˦M(((`ذaPTTtѣYj۷ogԨQuJ:[Vʊ]v1cHLLok`eeѣGu,&x? y^i樔="ۏ\PUVxb󁊻7mdxYf ˖-cٲeڵ*j5gfѢEmClaaAv픀raa!qqqF9CCCo9zhpUn\SҦNʧ~ʡC>z=gҥņ xꩧo8991zhMPZ:UJPPoӧ-!B!'MjT*&&@ϛ+W&415>q ֬Y9vYYY,_vڱ|rҔhƍGVVN"55$֮]ԩhHLL$##>}h_ܹs,Xq)k5M6}v8@zz:SNeڴiF_8IX!h4 6mb޼yUVn\pw}3jܹ3-ZP;;;3w\zAǎdȐ!Uʛ'0f,,,ZMDDDEWo]jZM~FsO\ǧ{&ae8|G ::Z'8Vo\]]),,dѢEO3 \]]qrrښYfTҠA8x >hc888(%mĉرO>ѣ8;;M^4iiӦx{{3y*[4Mu`͚5>>e}>+ȖqB!dnfZZ}hV+A`s33붲͛7seT*^^^5 ѣ[ 6FBBNɓ'+;TX4i...<󔗗O?[ۨ\%Ή'h/!D:q4QۮSB񗐜tmte)ZmgT=mOQQvvvr̭ \|Y2~t)BBB8uAO~gHJJv~_ΥKpvvuP^^5 ҇7orE\\\rRRRiӦw\r-[6@~]Fvv6՞hpNNN5nm<{,$''ktB!T\T@;w:ڿm4Taxs.spظq#K.СC<#,^^zȬY*V9'O=?#|{󉈈P=üKL0ggglB޽|x ]VmXp!aaa@ mڴQ^B.9- B4b6664t7NNN8995t7jǔ)S8xAq-[7ngg=LLL׬SY_N?ծdjjJ֭?:y)M&_!Bq+V6cgS50[#22H_΢E'33C͕+Wx衇x"W^ɧR]zxiZ222x---ux!0B!3ӧOo.Ic=kwћֶm[Fq{Sz| _mj|:CE: IDATޣ!Ba< SSJ*8(71S^mRSSjx{{cggG߾}OjT*INNV򛘘PVVFNN=ej*{9Myyr\W퉉aV9jڵ=F!_B!AMg߯[ !B! fjʴ?[Yq&JE#666ddd'(+xgΜ?ϒ%K4h~-QQQмysZ-O=yyyO%]VYEo֭[)//gܸq8p@)Ǎ7$,0,BC A<X!B!zYà|g]wYY/^DC&Mj-E^^Z23{pX[[)))… xzzbeee8 `!B!B!B<5ZEFqss,--6I&]BNB!D#CZZ7nLMM NB!B!#44OOφ/B+B4R7n9B||<'O t!B!BQ?>/DB!D#K/R(//gɒ%=zݻ.B!B!SB!x Uw+VkpB!B!BB!Ο?υ pttK.F !B!B!x `!+//FZܨt!B!B! !/DEEQPPݻJB!B!B`BR 77NB!B!B! B!Ξ=V?~OOO҅B!B!B}:<4mTg5^{5<<<ՕqQVV'|BϞ=u߿GGGn޼YRSS ÃGwwwۧ:_ƍ$''Hǎ&$$%uM={anw} /ܹsk'cO177g_^o/J ӏuN;g )-)jEd2MbRxӪW9{FvY6Nී_B!B!Bq_+זFl!L(>>=z`YIxBoYcJ'?/]{,L8ݽajfFkpޟ+v2N "G[_Zh8#<>HSuI !B!B!>|{dk 5޽ u_ی#&&7xkkk-[f%%%%޽< :u-ZPZȑ#z߿?gΜaܹGGG?@.㛟_͢EjӥK࣏>Ύupp޾A16%KhgO64M2Ɲ;wfĉlٲ\Z;$$.^ٳgywYVb׮]$j{m >P-6lorɉTc-kOOW&+T$K#/2 )>cfA빩\˾ѳ05` v?BFpi=ʹsj|R3%%K.`t'ai;^9]e׿[ZBB17o͛GknМ_~=jB!^pJ6[֚cW2fFFXZZju`ݻD`` Ό1BgcR]t!#z3f0|N>MNNeee\vM[gɒ%STTĶmt5$%%+++:wlN]cl Uh ʨu Zrpl۶ ZMhh(۷g̘1cmmMussjڰ^s+w&ٛ4ZBhX'> 㘙[ѥ[?najf]g*Tݨ֭0\{̺Cz3I !mTHHgϞ%''Çjٸq#{m(VBBXOH[΅  䡇[^RRBiii G+ޜ:u/CjiJ5EsĈKzB!B72 te3 6~:ciii53k,~7rrr3f ӦM)OO̭F!##ggj$&&w^?Ά x뭷xtW^X_L޺uKo.1ZXX\;ƴU8\]]96yd9tAAA̛7Otqq|mTq\׆ uW}Mʪ6۽>y<3ĝ>GbecOyY1%߅7r026ejmYSZRL{VHB!D /0oZ]CTZZJjjZBm㏌$%%AÇ R!B!QsO3$pfA!_eI n OOO֯_@QQ3zhtCx7oތZf222B嫯Vożɐ!CXb@w& kO>P\\\McڪmF"**JSSSfϞͺug޼y2OOO]ָ5kObdT=]VFCvZwx5j+*F&x%i뽌 Yc3>v73 bǟN+GŬ u_HR_3eʔje۷oGTbkk˛o6@ii)G'̌~Z X>r|o 99N9ŋŀ߿?7nT.tMtn}}}&j{k֬qʧNJpp,m.B!HaAf]_Nv=oA0R4,addĦMѣpIJJb퍿?0ƍOOO-[ƦM֖;/׏3g2}j|}})++_g^C ;v >>GGGzMϞ=u8){u놧'COCVm3g?Cgt]{Ȉ`{\]]qqq1)]FC9r񔔔pfϞmNvzW ^Oʩ/xӿy& \@Ǽ1a yq"5u};S2dEEEꭳ`:uTӝ}v٣o>IIIsEΝ;'#88hL>:w06nѣGy'>U7ȈK/i:y$'N~㧟~O?%22ڍss''` *߽{7˖-sb\!B!jRZR.n jMmqSii)III(Jlllj[?JMg0///֮]˄ HNNk׮XYY^VV띹R$**>}k\yyy憹yc,++#99RYsM-}2e ӧO߿^baaw?k.cǎx<իK/9+W2iRgC'((PFY7r015mu+T\w-6$&B!ZJޡ;'Ҏ'pӖOzR>eزIhNsnG6\ɻsV]^6iiit`ƌc0۱cZϯz‚W^ySwиd<sZo;wĉ|2))) 6$rss)++#)) 777[;&U@:0R`́.R^T'GzuiTb9h4 NUOskֶ֬5\ 5h| 633ӻo͸q 룵6B!BQ7&MuꫯxXr4F_|˗7y鳹ܹdՎ^ڱ%i?C$ `!>P(pqq!77rڷo?@aa!n~gis `._FFJ.t =K-Օ+׮]Ig/ZK.Kޕˍ_p//k)Ǐ'))~7l2N;vlt}0a|7zUjF&-\[233Q*2W!B!R6lhW*$&&-ӷccB,I !mPQQBA^^ϟZ53Jdd$ǎcɒ%rŔ-?ӹ+91_gѣMO̙3gggx V\ouCEGGb ~g6l˗/g̙믴kN[ǿj* S[޽{ir!B!BѶ(bDizq!t嗢8={׫vuwMJJJh߾=fff6%un%s[K,:X5KqqqL88k?i4ѱZyBBz"66VgnDD/&::ZB!B4Ғzs)>VwgBClI#! ɓ2;7fKxyy7[ P(ٳ/T&/_~W&?[>H>SI !B!B `! tB!B!D%,mB!B!B!B6DB!B!B!BFHX!r*G9a!B!B!RB!ڨN>ѣG9w*J<%%XFJѶId=kNv}SwMnĿvZ"""- 6cǎfn8xh8C{!/h?B!ڨΞ=KNN'%%%ÇFDDGsF102V4yuAgGX<}_[^/\@`` =P;\OEq6T}o)_>}^[7>^||.o񷏧^yc`j_ؽn\8 |O/ ;Kxu6D5.CTRRBiii4!!!|?~`bo֩i|K.eԨQ؈#x饗PB!Ga_… m>/_ޤEEE,g\|7o= @xx8O<_.\?S?oHSۛ+V45(//EV,/-.$%c!y ( vJա nbiݡZhz2X!hL) LLL𭨨 ??_fd5#?dFF{f)jM}H=?5.E}_w?xyɼU,G_c>^+~=7P>RR/Ԟ+FbРA\zU ҥ iGd?]V.7''Wo?Iu;:W&9ʤ)TΎ}AU%8Y[>r^x?ܼ|c۷T*7lҶO:N7,, 777ƏO=bhM:` , 0g !B6AQ7>>髏0ӧ7)\/2L:={lmʄ pqqGŅsK0a:wL@@>+<NNN;vLɓ'ԩS͟?[s\[[}7ӓaÆÇ4`&NM^'5J''N`ggGaa!Py?M~ڵ+'N$''G[С:DEEѽ{wƌw֖ >;wwϲjժS}M177g5%'gg/uNgiUk^Iϗ"nx﷭]/pk#;"?/_%p¯cMCB!Dȁظq#Ֆ9p?xwt>{r#]%+㑧AB(=0[|?*",]9i<MǰB]Q}nukWoң3# "Tߕ{;# wcyPSUΞ=Kqq;}' 8˗/s%=}wޤi&<==IKK#--MFg#99j.YCNrr2:OƪT****7ݵTE@~0%xtFO./G+ OW&742i鐝^g`s ne3}da%΃Q\6:4~ΨL.ѭSߧΝKZZ3glЗ=54h;wf߾}c;v`ƌUM0 .^Hzz:.\hj|>>>=z`yHH*_!B5O0}ZX:!옾i-O48 CPPIII={,7ٳg@vv6 ХKrNdeeGjj*lݺ^ևJ"44'Np%V^M@@EEE;h4T ϏE֖J",,HYt-???"##u bԩX[[Vcر\zLT*/NyIiǟՠ6---)))aL2; K`׮]ڙw~f߿?QQQQXX#Ǐ' E.\[oő#G8qbK, 6mk֬A:?.cܹsg&NȖ-[(--%$$o/rY~grVU0a۶m>[%..?5˖-c…@{/ri>C]dn?eVvtCpOs?جK#/2Ocf7hVT߻ʵ+L{L,0)~qh,BaL4ŋSTT3A OOOEӳwnMwOzNgN ޻9 #v_uE/eīXM .*ـNF9Hf%vUJp&Q?> p-^{^[9ZttVCh4RitGߏ^j-3gaaaܼyWWW-_h3f ::\JKK)//oň3*F˫:B!vJ6[֚cW2f "00gggF]9%%+++:w\O&''z'k꣊Ry\###tBFFw޻[XXPVVFEEEP( ϟ϶mPՄҾ}{ƌ@zz:tp{X777JKKͭ~FFXZZjuUo;;;;w. MQz+㉏>F!98fVt[yY`nqhcי U9%Ecu6~"ۺ<$,B ...z?PV㓆],yz-NQ?FA\vMobΎߤ/g٣SHogUR*o. αlN\Uץ`l #'T.Ou_P,z3Y_;ײQ?^s#e_7n7~\wwwϞ={عs'sіѣGYnVյkFjLJeB!n.d變YP7=ìYa̘1L6 L޺uK)ݻǏaz-xz殩*鷗h4ddd T&aw)[_iqV{{s!7o~ŅBobffwcggg_ZZ}lllnOVVv&{CtxUy)Ig;}R5cƞbJKn o`dlB;ڲUyحïw4^! ***"!!A{ӑO-r qqqٳbgdp2.нLվ#Giq9GB8EQA uV{;8Kb߿?9sZɓ'))wbb;6RSS@һwo6o @llik\iرlܸvuܴ\zTPw5jϯ24Iӷw2\WƷI Z'Wp +/bԱ4ԟQ , \{#`!ols!003{lqBB~ryBCCT* )DEE1j(}B!ĽoI|u&V,(3^l 55ڷoѣ)++CгgO Š+瓟{Xz_SU37oތZfG{P\\7|YƴU8FETT9̞=uμyeӇj͚5aaa]`4\SG ^M0#KxzӖurž'@]/_@z]G%gUy)ѿXDHX!hT*v⭷>࣏>]{sP^^ζmxyٴicǎwޭyiXY0t^эb#?2[դ?Ф݃i;N+~ccc|}}ٻwom۶Ȁڵ+>>>Lr|ua鎤$ ;kܱc8::һwozIJJ ,^ӯ_?fΜM}T7nxzzl26mڄu!!!t OOOڠضjǜ9s(-ݯ' pscddDppvFFFlڴ>=zp ;r񔔔p￲I%hl*T|9( ~b"#?y'W)bDizq!t嗢8=ǼEݼyڷoNZƍTTTСCW%u~Wt\yuki餴{/..'NYAA888бczRHJJͭZU\Ξ9UHMMG~=$$$ЫW/bccrc]w~2fz._b  ?| VjNsßYkWoˣG Z?-- WWW;]޽{ŋ_kB!W%ŵWKzhXv-&L 99]bees^YYڕ)2tLW{XXX]*VS*ԩ}'''}]Ǝ'@B!B!)Jնl ZO!ĽOB!B!B!Ĥ1ѧ y!B!B!B!D!!mTNNiiibggG^=YZZJll,tOOOyYhQS A!B!B!D=IX!hBBBƆӧOΒ%Kh׮?Ν;GpvvnPO=̚5ӔB!B!BQB!ڨ^xBZ?̙3 :ƬYk׮tڕ^zqEI !B!B!D+=B6ΤBFT&ccc=z45Y&&&5Y{B!B!B!Nf !mXbb"III$''cooq*hΜ9Jٙ9s`ii٨>۵kGaaaS/B!B!dBцjQTV(//TTTpF9aߒB!B!Bԝ$B6ÃI&xb ^sssz!011\r}3oNjjjB!B!hN/ >ٲen:Oʕ+[ .|&k럘Ȍ3tqAcO?4'Oֹ'OfMomj<_IHHh/H|`k-Q% `!>P(pqq!77LMMu n*WgoeРAM޾B!B!DS:|0 >ѣ^_… m>/_ݶ)iW'??pen޼INNXtt4</ўpB?O?5Yix{{bŊ奨*Ņ$ނ]B!ڠ"h4@SϟGT`dd'; Ν;[kvB!B!HQWi**T* l~nn.7oެyEEE*//'99uhƍDŽ ׹]?}㈉!22ӧ;VC/G[rr2%%%:ǯ^/INNf̙|w=zУGk]4x|}}9rJ (/-v|씗puם0VWp3? UӼE%I !mJb׮][||G3zhmI&Ś5kXf ~}k4#B!B!Zfo|8c?}WaO7>I'NӓaÆF %%#GҧO|||&>>mM~ڵ+'N$''RSS0a...<#p9+<NNN;vLɓ'ԩS͟?[R/S[}74xjGpp0'N O>QFu (,,:t(;vE3f ޽[[6|pvw}ϳ>˪UjNq7ܜl0Do{RilZ=d| !.b놧~*o2 go6/yU? /j06Q?Bk;0sLuxYhK.gҲQVTTCc B!B֨x*֢``Mkx*IIzz:K.ٓߟ|LRRR1cu[ϏcrU233QT5h={6dgg@LL ]t* ĉ\tիW@Q IDATQje ̭-"--MqmmT*^-???"##xN\SNںNSRV?3̛7+WKxxnXXsaҥ~,YRum<>>>=z`YIxBoYcJ'?/]{,L8ݽajfFkpޟ+v2N "G[_Zh8#<>HSu6B!0OQӱcGllloRDT6Lb!B!B;_8Zdkwo趗,Y:m4bbbh4$%%qQ}Y222HIIaڴi8qZ۽x"gϞw*gaZ]viz\t>#:k,ZH… Q9rנ1 ]ƑΝ;3qDl@ii)!!!qqqo`mmͲe˴Ç'44ckkˆ xmqrr"55`>c'z,Ӈӕ U9 ~kx˺̃c`lbϘ$F<@Pzn*ײ0x,L,0)B!Ċ+HJJ~¢B!B!DsJ6[֚cW2YXXPVVFEE39s&gȐ!nzz:tA{͍)))XYYѹsguJFFFt҅_0tjB?>۶mCVJ3f P뙑Ϊw]vN{έL\ޙdo wxj5 c8h4cfnEnUߣ6vPSRtZ[n^Ss1`SB!$~B!BlFYPs,n.֧R~Ǐѣ]\\(,,IbffV㞼J[n՘$NOF!##ggg2 [\\-˞Æ4Jffαɓ'S\\̡C b޼yڤe}3ׯ_׉?--Z=<Ɠk!T奤'!> ?+{ˊ)-- ,WkƞҒBTec jU/FB!B!B!>j|u&9'5[={dС+U&j5u:ӓ>}h-++c͚5<Nٓ!Cb Ir56oތZfӇP9Vm5jQQQ:瘚2{l֭[Gxx8Ӗzzyy(**jԵ',,k,ŨQ _;~es:⿤^:8m]'W;qN*Nwֱ]'W:*9sKmw"j& `!B!B!BՌF,4oN|>if,4 #E; ;wʕ+8::ҷo_:umdDpp0=)P(رc8::һwozIJJθqӓe˖i&+} [nxzz2t_ƴU8̙?@iiy3|pw=^iddĦMѣ >W#Gp )))̞=`xռנ^:k/l;޿YP0nk$GùM?BO,\w@'ڻNcSĜ0B! 'K8=Ƽ]K-䐖Faa!vvv IHHvnnnuNq!B!TZR\{?35jǚ+G\zggglmm}~jj*5.O^^yyyan^_k׮e„ $''ӵkWt+++#99RglC4E[0eOګ,--%)) RMwrrweرxzzYzv#GrJ&MjLOPP|. o`bjN;'r _Ss)FĤB!D N>Mxx8K,]vT[(33޽{9,B!BZ*٫mUjǎر2 zꥷ mٴiS۫4777x 8q"'55UD-[kt<ϟgݺu A;Թ)6c4',Bhkj>CFwJ|245B!B!Ľ^ܖ1rHe/"˗/7xǧBdBцU%mbbF[ܹsXZZҭ[N!B!T*ILL[fdTL 6xB{$B6,11$g`O DӽkG-N#==Ng#^ThĀgwomQ!eޡ7gO`40tؾe.egUgДkr3pu󦺪QyZ~Dұquv38wiվ) Q(Ti5|Z Gk?5EU*4Eu5E1gpu / ?|"(nC~v;| Tθz2tsbk)FBT~~>h4|||ر#*333IOOё(<==Sz~=y 6`˫jv,il²eL?+DGG_njo֭[B:ukvsxW߿?og:t@˖-͎q}1hРƤm믿NZZdܹ * y7nKf̘AFFm۶eҥ,^G}bbbPT߿A^+/gϞq;!Bq}2* 9M޺u+$ ̝;L2޽{gx饗2e ]>Yd UUU99szjF&NHxx8yyy899CmFN?̢Eh4uVohlg%66j rϓzyi$g0d#$'=>zVOCZcF_!m7+z] ߭~[?ȮMo. yJ67,EhFO]B/qboتok!'K@ !7kr ٵkK.5[g|s9|MnF#2rw8ee`4-k6!z`'u 5:|?2⓮j.R(T*(,,)5ŋ=z46lp)-[ƭzUڿZ׉'رcYYYTTTl*oMV%..ᄏWSee%}?{l ޽{i^ q]^z}ε֯_?z) NE!B܄$H*r}2߾6I@r' cǒPȹs۷ӧMzz:cǎСCԿ3g8q/,\ 69%%[o@`` ~~~DBB=řnhz .5A*GgjΟ mE)(xx۞! ֑yRRt i 1C$ Wwo]јPp!3_wKNIK}nNF`4N=aW{qa&=h2*Gz ȹF 2Fk\vK.p!$zMIm;'/tu#3BԓO>i`0op1L3l0ʕ+9tFh4)P@9SQQ2.\Wy^ .8;[,Vϭs1 dggT* F,\MM fVV]]ͅ ptt$$$t{{'kTnćځLBCC-ϟX>FAPP ߛoɓ-kuرCR7ϳ`"""i4***hѢEc@@… 0`@KKKtYq7{F# Zliu׮]qqqs<**cǎUwF,̮}֭mejj=[ff&-[m>|xrB!’`g\j3p\+zRSSQ*<#fCqqiؚ,<<<qwwX^_uWj}Ck`/`36{RZvGM[elMqN.\Ft"z^|fK}|LP@yF ^^Z+ήc>-jVP^vgWOTK_9]VB_en:*7/ /Gf !7+g.PrJ2`P?|6U##%.ntlCHlX EU,{m0[6mq9~9{XxMa*45}ޡür:%;v@VFhh(={Txgpww]vꫯʟ~i<==iݺ5mۖ_~yXc+~II *>}пZjEHHY|JE8z(*G}>3T*rLxx8ÇUVjN>mW~zFevp͝;Yfyf6mg}رHt)..6ߟ5k֘~9sͣFBVرcQZc~TWR՘iJQ:pqӏ*]M]/ݽ1 ӗ}y{vշBqKNNf,_???zi*5jڵo2Ӭi @j\8ûpP8edJU(\|Uհ(Ng[$~u]؟K{ o z%49l{Sݜ2~ijjj˞={x ~oݺucŊ}?>555S\\\oQQQDFFxb***XdIdO`m^3߾u4%hJprq'$<~,u(?f~-IM>GWS/ !HOV`׾jLhp %FڢtΞSo|o)|BZwO@~-9 zGm~( |Ws,bVqVGMmF.%Xͥ:YZ! jjjt[FPFT^X\Ngg} Go.qrhRvs?`3J3\.q`49/f?w[;hȇ;(Pd~$:bC̜{P7zc^^|E7Z}7o6[~kײe***prrɓ lْB֭[G6@ܣ>ʁx7ԩO>dd$''a*utt4;G.\/ .k׮<#ufN4 wwwv@W^iPg۶m8p,F##F_Sw|L]\\8}4[n]vW=o{ ի2e ѵ}rvwcۗǏsI?==_;vKyb`=V[B!B4CX+g0{56IB_|i "44lzͶm=_Tzj&Lu먬$::?޸k֬ᡇ"((`ٹs']v٦RdŊ=UVj0akdO co0|p6nXӜ? P(k'Nήr'M3Ao0]p,+ZE IDATEb[n1o?`41̤Kq qb>y=7?*ˋpv7Fcr|Hֽ7O HN`OR9Q)bgptrw>_&=t;Uql]/N.zEcXܽ)/+Eh$NyJ.z dX!uЁ:`4YlvbĈ_0ajIꫯo4i^h˵ߢu,)-õ1rv/J eqJQ{PYlo}kJ> 5U8Yޏi|Z/ۦ.]G}W^yW^ymF۶mGDGGOtt4-[D7yO+يIXXiP%<\oS-5IdddP(={qKϟOQQcǎ|>^̬39=ڵѣGEUUi,yyO_e !B\& nwU|""" gǎrZl}J9bv֭IIIdddjm۶%>>B ٮ6Kzz:ΝCVɛo>W9sfzL-f)+hAeV>iJFZQHJs @̭wЫ/ ~rt7%[}3{ʱ6C.qrv'@m6kKC  !YUTTEDD BNΝ;9r}777vɝwiw BAAAAAA !7!Ndž h4RQQAnݸہY^ǏgƍTUUǸq!1IN%-3J-. U'JھS7T.[W]ڦȞ yvB[ں`b5ly#zjc,Ybe$n\\|G8p0tP~6nH۶m{,⋍e֭|̜9Ν;=l+%ݻw=zc<6أ?&L͛ Í}#**va4i۶- \^b⮻bΝZV\?Lpp0dgg3g>3>Znͅ W[\NU0 ^pwww˔jZe˛FZvs___Ǟ={8wmڴ3N#44ۺP҈#HJJU}e/hhժU 222/..&//VZRw )[JKK&00д233 #>>:IBB7of;vgϞcǎ$&&ҡCuN7HMMRuiϰFqpp`РAL:)Sό38~|P%BRl9)-~! BqcH<~Tf !73OOOJ@s]|qi%x{{ێbu{6ɓ'yyy ڊd۷oo,((m~C׿(fΜÇ5 z_b[gǴyCyyyՕ_cY> 33[B!~W-o!B!BLB!B!B!B<..BܤDCǎKKK#33WWWt邓SꫯrYϟOLLLB!B!Ba,Bܤ֮]ˉ'g׮],]Vk*NQQGޣq/^7o;vlr{B!B!B!'B!M''Dq1jjjصk9iӦѣGٳ1n oo&'B!BGݻidƍgw둣(Ͷu/]?~Gd?q3`!&P(̾WTFvfNGhh<,,&ǭԴB!B!DSşbx`'*zdӾ}̼1ٺuc>}/]Z6}Wq-rQTUjH?{i')/-Z^kBqKNNܹsGϞ=pss(++krLFcjS!B!9QRs6[?I9}L&_^^N^^~~~W4߫!((ɟXPPP3utdff\쒼<ZhaW.WUUjÖj-Ÿzn:n`0Xn`У)]qt5Uh+JpGt7FSrjჃ>: qG|v~G]-X^Zն+ˋpu+k}-DBq3 Ԡpuu`0A֭ٺu+wy'٤cm$$$w^:ŋ B!B!s 9Zb"I-֚woñb6ؾ9sXjjbccYt)L6HII ;w$::8VZE`` ٌ;+VPODsb$&&2qDrssjĘb9r1cƠT*1+~~~[Hت?sL9s&;dʕuHM:@AN Mc9}G<=w`r1?߾Oۖn{ Cw'CoΞ؍h`踿} ]HV>7g)g%f&MuU9 9oc?MyY!:g}q( ZoߥU^$GPRpiվ7+_fw姳WŐս3ŋPF" ]y5 2P9:֮h2,B:t@0,[]v1b&M?uhѢ1115:?N׮]iӦMsuA!B!rO\Wjf O4z8!!+W7%F#'N$<<<7 >#,^JEII fժU<#vǷ`02x`,YBUU#GdΜ9^ԩSyYh;ì)Sлwoك/SLU#>>g}X}*=OC/楑zS]?c>O:/ݛ߯brJTζhFж?u5|yoXF#6Ed3aj+ܰ=u1 {Ǎy`-c=h<X!P(PPpyxfϞ̓>Haa!-[ltiӦOii)6lhB!B!8æ#=>So_r 9mj),,DP@JJ 筷ɩvI@ڵ+ǎ?/ ((4(gΜĉ 8;;pB6l؀` )){98SΝc߾}L>l;v,25A*GgjΟ mE)(xxf[G$V(6]BZFL7-k峛+G6\HrݸyTo:4SSOؕ%^\Iayz ʑ^'r.q?F .}G'WvҢ \H;I^#(/+DSG<]{~EB!M,"""P(rIbbbLuFT`Z§ڴiӠ7B!B! m]aM6\<ݛ_~}sF#ǏԩS 6 xbۊb TުU+(:3Cqq1fZ1|c\N-KiځC5-sl5y8:ؽ?ms4ٶe 艳zʲz}B3CWB\qv0i^W8zrt"eE(J[fjpq2;yBqtlذF+t֍oTܹJhd̘179%eB!B!Üyvu XsyK?Mj?66XJJJx;v,999j)((3͛x"j5(!!!h4ɉZlIQQԶ^~0__.W*lAD_϶/j Q w 5ZAK.fs>gۗ8:վyh+56&F#eyGklӏJڥ/XwO?t5ը~˯m)LחKx-DB!Mˋg} 0}t^xƏoڣ`РA̟?3fpBnf@zzz%B!B1ip;wfZvatoHӧϧbQ*zHLLdݺu o+Fdd$;wfѢETWW+0fJ%QQQDFFxbvE%Küy󨮮`0~W?88D}J;s|-4%hJprq'$<~LGQ~+'5vm]M5d* !HOV`NjjJ=E=>$4S/1 Row0ZstO@AuBO@>j[*Y(~amȅW[4B!nbxzZߤƘ:u*cƌ䣏>bھB!B!|])<0ip&{9Fdggh]f =AAAΝ;ڵ+3f̠k׮n<@ii6U(6c^ &n:*++?jgXѣGj*Z-&L_5_0m4 %;;޽{m6W7i'/4O-&)+-aꉓ+e>IL[g%VۭwLAFz %73RGk~ܰO^Ïb{5]<ј;:u'0Ү*}z'P1d3Y6hyp ؾe݃NWPkyS 5骉2W7vm~O^˟BZFrbh8E±#Nz^<B?Gi>IԠv]\3FṾWWWaVB!B!W礜NyAדN#<hLnX*G'|[Y,껇wTV٭A /NB!B!B!Lol֬Y̝;g$nnݫhJ zˢG~c B!n*6[ ީ!B!BVINNXeWuW|bLIԒ;A!HOO'%%9z(UUU!ly?##Yf1j(|ǟ;ç~zB!B!̨T*_2,CteA"BX|97o6;ҥKüTWW_,OHvةQח,ro6:ijqw7Kz٬mlZM0B!B!!K@ !71Ȧr IDATMѣfe۷oW^5 ^o͑#GۮOd'O[$ o2U`ĉ,\)&;;ooo|}}-o47772^_f^^[c4߿N"//Z}]:}gyL!B!B!n$2X!:t BCC͎t:RSSܹ3DFFZݏ`*{o?Ox RrU{k֍}U|qv5jj<ƎZ橧2$00C)S4kwߟʫ @bb"jiӦqij5j?sϞ=KnѣL2ΌaÆ /н{w:uD`` Oߟ3fEnڵ+%%%źuL'Lƒ?Z֛߭6m˖-kB!B!BhdX!Io>:e IHH 77oooC#zb/ -1Xk(I/ s=<#Et+/ vG`sv߲e qF233Yt|ذadggs8um?={HII!''~ЩS'233Ybdffɣ>j:' &&,RSS9x }d^g|}E/@vv6kի5k)))|G$&&dɒf 0yd233?~EiFmZp8ナѽ5}rqNy$[l1˪UHHHj=xY2227;̠AX|9P5k:ths0`_|^}-[ÇÚ5kxINNfUB!B!7n縸8v,m7g[G}}Nl{[?"IB?#FPQQ/[oEnԩNQEog,,vK?ʲ~D'Vpm͒_@@ .o߾DGGȑ#s=~8gݺuӷo_Νjwިj }j5Æ SjO!B!ĵ*Yy생ZO|7Dqq1[n5\QQaSS۷fiZOCاOƤf&%aUk9b\oU~vIK gȧ_B!ğ@>}ӧ1f͚Eaa!...xxx\K?m֙_>-wrekR.fދ/ٳ),,$""sx< qqqo:˗/7Rǎ9}4iiixyyY<ٳgg6Yr?Cݺuجkơ) !B!^(9Dݟ>gtdff\/)//'//???ԐEHHNNN6cWUUrqqi|',:D쌧gl]KLۋZ*z]mRSWO\M   z4%{r+ mE n(hJz] "<|pPY,Q^V?J K888j1<~^uIeygG>vbNB?1BA@@NOuT*s;w.:thtMq_ !B!h ?S%&"+Iuoñb6ؾ#G3f JC=dVSO=ĉ3gVBVCll,K.K. > 6T*)..?gĈcűj*fرX}>s~'S{ - i8x sNILLdĉj1`HOOgĉ>}WWWXnsU̙$&&2sL͛ǝwʕ+봑tz d1r؏xxP)""zob~:{}-G?>Nxޜ=qc:}oVN"Дkr3pu󦺪QyZ~Dұquv38M*ԇ߾KHI؏BJӪ}oV>ZcA)OgǗR)B!(,!?{yYgADvkdrtՓ'@m%]?#B!{{7m?!B!dUxj&xdNF#SNfѢEh4::@BB+W$55Fr:-[p!;:b}w7I @%Tқ( %P.xU+ 'E \(MEHDD ]5lYXvC5y)v.}?oyӿʦ1Vz~f~ ӹ?jl_&=Ҳ715i"14| {7GSU}S^,B!B!BKD)өe+^JqΝ̙3c>}z)))a˖-dffP(03ydj{N޽{+l/44'N_a||| ,0zh999|7L<1믿ݝ .p)z-lll3g7oFU;ϓBBBÆ enʴ6҄ ZgPR .^նXA- Eusq:ް 6vϾS1z'ZN?~эsՎ6 o^#)HUcL"e> K+tC|z3JJe}+k;w|v^5]=M6)Ϥ 7v&-|<fX!B!B!4_?rd8RRRpww7Z#TZ~||GL<{N:ܺVTGJJJz=O?4gϞo߾R2ԙ:u*ݻwg|WVW&!! MNN7gRZZJFFFqJ%&L0j]v;j5K'gp`l]_#:h2Φ]vz=';=:j(-ίv .7) <)*WTU0/+k;l ל\jԔRV6w3}B[7jǿ1%;]7LB!B!B!x`Mׂ1_r n|c緇qu!;;btȨQ5j 6ԛ zRRRSNv.]Ddd$YYY8:%6iٲ%M4aƍ\W^y1SXXHFFFkݺu)(( ''Wײ^bb"xzzV;hZ WKu߬a4lUI!~ eUKKk4R%E\PJ()ʫ8X bscJߘڷXYiT9(Kgb^+SQ)-)(ښڻNiI K+?0j[1h;&Z!x$$$WzQQ/^… !*!>˶o=?0*qWǐY{rڵB! Dr:z_N2M=&zgU7nLHH-++HBBPEs׮]QT7V]ra5kttڵ\[JVKzz:lڴ\)S믓ȑ#SÆ i׮3gDVeG£>;Jb<(jaÆoߞߨT*t:kTWח*t)?7Pp,MցjkbNvzBՓ{r\E\ߘѫi˖PZR`RfǨPi).,Kf'ptۈՙ#; +^GzU;T5vWܽ9,j8o tBP?'gmQ۾9ж^3R]}S_NV !\ZZVS۶mKKK^{ouA#}k]~Wptɺ;vm OZZU{E>/x2}pnws:uPvڅ...|bضmo6Gg} !B!nZ5 jOXjWTzjڵk)))aȑ֏gСDJJ ~zEّڵk +|oĔ)S ^zzN^Y#Gd7hk( ֯_3<{nBCCYn#GdӦMӬY3>sCPa&Ma{ cYxXx1/NZy.=@.y٩\?;'m(ϤV4{"?x$ FYἵ9~+iܲדG͋X(). bcXm5Ѷ\}=AM*-jɦObiiMQA6aqcLcqY|.FO=:?ʟ*1t:;hT?֍Wni5D~wGAa~&~! 7Xo])Μ8o޿*BځVʂICM:Wӯ1^BAoޑx}U)EOQk(oo\Af~taŔ`g}}@F͈8 :-YO_ m=fx *C}())!77;'pENFF 2իW Էo_ڷoODDt:;FHHΤIh۶-?I"B!slllhҤIu-,,_~uLn ˶nʚ5kݻ_OP}̇Y\sCT{(-,q𫲎sW&1;· LceWYXZU 6f%rͭBQKԩS+ݶ6~Kj ~ Oʼ=BG}V{k} :.p~-x2Χiu4:7-mmDZ6%9_~@!|=쌞$Ci@Vrhߖxu8l(QD>) uVcծ-|7=:6nNbuCOr.-BYYVӢ&UxH :v2c/O>< _g>|VA 33SVqgꊅoԌSCNa>fgEf^8ʤnm>?:]rN8[J%g1&}uS][_q=~b:7ia~uOUoj|U1cpZjExx8&L`Сj ooooΰaX~=#F0|r><< b< soALL ͚5ÇдiS\i7h$,B! 6mD ܖDΝ+_d K`88_] ~²f힠Ea$,BR[n{E:Eߑ-I%h IDAT{_(HRIhU̾-5չթ0:r&Nu?iN|U_QQQвeY:11_6O'%%ѼysUZZZsvm>>>\e[&B!RRkVv4,%mg;,-IuݽHH5J(PP(8kr7ۻ V{wзeCYW+;wZv?úg߳~tgNfs 9e/9w{Y͏5kͪ^/>-ڛ=MaSn cʕܹ1c{{{ӳgO|Mڲs򈉉Q]3g-j:tȨu뒘HzzuwΪU m_>}=.]Ϛ}B!B!ĭ#B!C|WΝӦMQ\Vu~ðBϳ^`TΜ;Wlg[]kYhֆAmmܔ_ٕ9#&azA5EpL^'ןW34hߚSqstjd>}6|>~Fe_4KwBo<\INgڠ1sf0Y{ôchjg3t =?ܢz6{)F=<3\  рDmTus;Ly~ #GW^m۶ԯ_ߨ/`IJJ ӦM3lCR^=rssٳ'SL1{ .]"$$S B!B̉#[8BBɣo ijV;6vw2)-)~ UInײr߽FgVz.:gʷ6֔ɹS7l[%3?:(mbhf{SXR#^q:XT)'-7G`ke]\r%-'nj5jt6$ ؓGeB!j[{l]Z>']-m {Wwwޡ{8UaocK_m_W'\*- N@S{0nf|puuյ3Xܹs̘1CB!B!mB!תc|Ҳzثa=jĽ1x`|B!B!D- `!BQϼX{g?52n}? !B!Bs8B!ܳD}|B!BQDFFjܹk׮R$##èNjj*h4{BTGޑB!DrC[~!B!B 9##gsu#6̙СCYvgyC`mmͦM7zƌöm۰ WI !⌮sIbbb8sLO]y>w^çӾ}{:w|B!B!.v'[O~Q) 6gnӦMy)--βe8|!Q !ĝ$ `!KKKcժUDFF]w9pׯ_'::O>Jus>DnMkt/s0o;jN`׮]$''STTĥKjGE6nȼy8xkS!B!8|>Sem _?O'O!muF ˗/˗/_f߾}k^gg+wI BbzZlɕ+WʦLj{cǎѾ}3tj&fゅ?tj-R5z~waiiɶmx75k&RHII777zQRRbv\B!B!Dm0=/ظS^$_|!ukԾRd޼yL8'Ta'|o={2i$z]nB!D-va|||+Wv# `ee#2;*rrk9W}|o<2Io=-G3 -濖{&?c ^|E"##૯[n;v www; D+K6xyyѧO7nfäIXr !B!C8';k%xH]m1x`6lXhZZn}[ !" `!'&&ƤO^~t{{lLg},.*XMZ0=#wG ĞYZ?g̘͛ _&<#fΜIrrm)wl-BR[n{WY 6гgOU7 ьhƑ"Zvul.r=%1VߛGAjOp@+}ܬ̙s!44 &k(3f 6l`ذaɾ} :tɓ>}|5{v܉ !B!s ]~>Bmn]ҩS'ϟ_,77{ŋso1yd~W!$I !˗IHHCǓJ">>@,,,h4]zylP(u̍3{G? WWWCyXX .$??(hٲ|ٳCLii)j8trB!B! 7?qwo߿Z`;vD]6mM4`ٲe<|W<3wo!$,BBj((((ñ@ӱay'sRP].+++z=zBQB!B';DāpN!ub/ iE 2M6ر͛7si5OOO>c&OL޽c1!BQ 5jԈF~>tgԩ@YsJ++Prꫲ픋 6g?wqޑ 5z^4߲Lʕ+9rK,1\W(( 222lݺ1c=wW'ҥ AAA|w,n!B!~zScTD8ϓB;Oqz¼#*/qF6nhy啫7bFq1!ĝ$ `!!V9v3\oӦ Æ _aXIv?LYet}MN7ݙGH111ݻ:.]dժUӺu{B!B!D3 m̺%zܳ,Yi\pMck2n!! `!ںu+ݻw޾::ZFNw#B!B!}lX}˅,G[%m ?_楞6wk&x&vէbrBQ ]|:t@||<T* FѨQ#z=+WdϞ=>G/B!BQ)wkwmK/'%:ڲc(b[[(pTYC1>Rk ۷PZ4ˊZYfjDS> %x;YYX~OB%㨬V4nk  oB&I !ZŅ( (,,$**pCBAݺuȸ !B!BTu<ve:ezq|m%7>ͯJK:6d2qG:bdB mu<=E:,\1zB{>amJ8z~q Đt֑x~m!GZlRSӒwRdoo&HWa MXh;:@hTZ=Kb+licJ~df?W^˥ݖm+'$BZ::u*EEE$''B 33ӧOӾ}B!B!D^U62u*Fhˁ0|B>G8 lTж-cW_cn,ɧ{xg{6l棅\԰dKwՁc=j϶eOK¸NLXNN:3 |@+6ԏ>SKpwKV g߅Tz ;G\-?҃WFDv\GZa5lVt 4lθߋʡ5_ BԜ,B<4 7o7`޼yѵkB!B!DMuvّ-n{BKVX^[!!SC}/+.icMYZ󳮲=ieɍ\ZCB.20(*ϣv#{[Vꪴz>57zZˊ5l8\P^fgro|&Iq !Y,B<ڵkGv ?;;;3{l)))kPB!B!_M@kΎl:{mLjUiR;=-9_J~+ ζU.}+ J%huG/YN1 +ѥ-zeE*!a p-W{%9тb%j+roKUB=B!bNNN8990B!B!).yyt {bw[-\HU-upUip4^y^Jz6 SEͦhVd4$}-PkϬfhqSbcTS֊w+}=\+[tw8,IB!B!B!/@6%5lԼ6%{^JWJ}OKz76O$hRF'Ltp]*4%djOW޹CV /K*L^p5S3V Fu,WI]kN$Vg`[ws 2 mwB!B!B!k|}|k-N).M!i?=#)[棅·k8dۨEfw"ͱRRrnl?N&vX)~ȩ}!#<Kb |TЮ UgӺtod˗k<[=P2#ŕ0;ҮgN7m~!B<bO~F.__~6"+ȋ/Ȑ!CXdwSff& bРA|/^ȇ~Ț5k/ޔ)S4h/ҽW!B ]԰tw}rI̮89,BQ˥j*"##kT.nڶm[8{,+Wm۶foy6ێޅ~GawARFދ'}_JԥLƀ֝j_8uv"99".]dTSNKt`!))4i%%%L>]~7}v|McTvZhch߾=Fu^~et¾}eB!B!RB!j1^ODD-[QymGVVz(**V(, ١ IDAT,^'11>%K0vXǥӓI~J6 0ҩhJ'r3)7,)ZKvuEٜ+I$%+:u4j]PXzČ++()"=/B_דYq@Zn՜!VTաxSU$f\ T+}=}2S*k.FA\\۶mc$''ر#?PPP@ZZZ?uƍG4i~Cz=TJ:g?!< p~6=ؐ4l1 [1:r&', 5#qXXY0%x忖~,XDxV{gSiz}Z=;γu_cDQi JbU)g?ތ#%Rr &3P޸ŃŠ)G߹/оQ3"B|j2:cKHY`0?8Je6Z=_ gjӵ/\ K&Lvl)|U~P*d3~[q" kȦ/βyk *cS2gML2r(,)fHn~ -L|wZF^Q/ż;ND?@nQ~^s__S槪̟αsf3W=Y:U2c G~hذ!?3=[~vŬYP111ݻF'Zۛ۷_#FTs'<< RRRHNNӓsGGGow^HMMS-\6mJRRW&$$$ _SҾ}{r bŊFqjZVZܹs&a믿M6~S(..yʢ_Zb޹<>̻#YjbO},uVDžbDrbOj]VN3{JCˢ8x>1d?pJr MCbqJ Ҧ HZ/I_=V$iNCro|ͅOy4g.OF}hu:Vɯ!sSYIr.-$( JYpFEiM[vqcõv2c/?O$+s74ԏ>o_wVN-[re∉1|:7ϙ3gիM qF8`}B!B!_ `!ںu+ݻw޾F7cƌaÆ 6b"## g-INNFӿϟoݺu7f͚`ccC.]L[R{n-Z+cǎeǎFu~iV&߭[7ê2%|CRRXYYUX^[DS4ܚB7mՏ.EV4xmG؎P(&pꫲjtLel9rЪAc|+t N#';{b{So: 1*maXk sGӬ8*3k6D3?(Vyx/ =/q/I,Nd\A4 ʚ.MM^Qa-Yc `8pQݪv6{]F=;ʘ2?74ꞟߜ*3gܘ3gL0r|}} #8,, ,!`ڨ S~P_[[[Ο?ÇiժAAA&X;;;OTqٹs'^^^f'B8B d$B$$$СC@RO`` U[XX!-,, OTTQ(FܚTMJJpe)LZnppz4Mb_։ˉY[p$W?GyIغ:K)3OvA>}y):5QiYeϿ#YnUsnҊRʤLjuOUoN|qs+{F? h{::F_~mÔ7Tw̙dgg3l0=z4˗/Ҳ?ʞ#/(!lmi$nae _B5$CX2J\s&!Z}T6?^nenu|xLll,M6ٳ4n!B!w,BB5bԩnݺԩS*,,+WsNƌcMϞ=y7 Nu9j(֮<+fHdf b Q%5Q55T Zj DgTQUmh,:E[KD fA{dr&7 !}? |9>sӧ"|X/@^:i_@'d^:&q<<~et}zV6܃^}Ji;1!<̕|U$ۂE~'{^3пswtܝu"NSXklY̝˓`NYctnIǙO9c4*&Lդ46?'a擄,y@#߽ShH;INtp{kK9uF׉xy4 ɿ/Nii\=A|: |/|=:0k3!GaS)::ZSNxzzRVVoI{饗ؾ};:t $$j2P{]>s\]]ˋ3vX˿Ӷm[|||xg?~|ޠU"B!B)N?߯!B.$Ce!/ҩ[zcaٺ) e%D;ꫠ\vŋ(J;q%lmmT~HMMeѤba`--ͫbnmck*u7qeic ۦnʯquhRŅй'&G2w4FVi ^n:ynr9zbj`)*ULn biJ'WPЦ{^~ * k|Øۿ?f"997n͛xyyݾ}˗/ӹs{@])**ʕ+8;;]J:++ OOO~'qw~BVO>Mtt4۶mӻ CZZеkB!hҒz96E6$ #B!~_׿,Tٹs/"III:,g7nf8ukwOokeu+mkmuXYX?6[L735s[ܶeuOcs5}dʕ 8::+++uຍQ{mlmm5B֭?~<˖-cĉJW(t`իW2g !B!hB!yW ;jpptW uso3hh-9s4whJ_reO>$O>dB!B!$`!B!s~`I.ZzB!B!7 !B3G>(ZzB!B!o$B!B!B!"3B߁L***ԩֱ7ojE%W'Ow^z155-ӻwoڷoyǎmۖkk.O}Bё={@yy9 >;;;Ν;ǵk2dHJNNG}TsLVquu՜NJR*bcc9{,...T*ӧO O?4F1ڵkу#F`bR>emѵkWwK8C&L@tt4 GET + :u &бcG4uSOtؑ8M@w޼yұcG^JFFQTTSO=űcݻ7۶mc׮]߿ξ8pKrͱd&L@VVٚ~iŌ5<سg_5FOtt4aaaL4L/^Çquu5j|r9ߟM6Ȳe쟱א\z)Ç磏>ԩS?+V~ƍ9vV]gΜaݺuZˎ;x뭷8r= 8qBB!B!=Bтjӧ/^I`aPYYxxx<0J͛Gll,Æ {i8cDz`zbbb8q"w&%%E3Fdd$>>>ѣGpBMԩS5.\Yf3/dddбcGݻ7|?~qQ~:`k,XT* Ċ+X|&O]S{ 6nT?[gϞc^ߺdee#|}neөرc>}֭[9s&666l޼ѣGӿHIIFսi&>#iQQQxxxZ^Tc?O Pg L6kO>~akkKJJN'())_x߾}qqqgĉlٲgyFԩw#/?`,\}GYY{deeł 8uN"""8p k׮G}k2{lM*"##پ}; ˴nZ޺u+!B!B `!aذa:˰3f JRSSٴinnn =5}GФ׏ 2~x]޽{ ݆j0{ܹ+W7߰duFLLfر^zFpp0\v WWWMj5 5k0e:֮]w}ǵk(..fÆ F!!!L0Z/B֭yG򔖖j/KJJiйk677377Yn1XZZbaaٳg...Z3 8880n8>3K$[[[P( e'O/`ĉ޽{ꥸẃFV3zh}]M͛73uT\r5uNllV`ҤI͞:uiڨ;w=\!B!a%`!pA^^ddd퍩VLJnݺqܹ:|ʭZ >s,Z'x^TZ3;ʊ__8::RVVFYYL2+++֬YcTҬY:u*sI={6O?4iii|ᇼ:yjzLҥK)..f׮]xzzҧO._B_*sKYYY:3ZzՕ;w.CalPZ!B!a&`!Ύ]vpMnݺŮ]>}::e{X/!88}v?Un޽ZnNNN۷ILLdĉУG666b vիIСCթf&{AWVV=CԔ`HHH:o]ѣ?SLܹ3&&&@zzu!Ԕ$ѣiiingSxGhӦ ?~GRSSp?8'Oֺ7===q:;uD^o՚Z78㽶%k :;;P(ͥm۶@uP΀p 3GWB!B!hiZozBߡ] :""" M0ҥKҥKfnn:Oٲe 3gSÇ~ T*-[Fqq1P=S믿6zd||<lذ+WrJ~m}N~SSS͛o2Ėtޝhz⸸8ziTj7pqukHi^~e֬YÑ#G4m5 >?Jŵk/ ;>J)S0g45Yf}vż⋼899#0akիVVV?~\oSLaݺuܹɓ'kĢE}i򄅅qF~UUU:{lk׎˗/su 6HM[laȑ>\,!C0mڴz+B!B0B;TQQ΢EXd ׯgذat޽vO$''_bmmM@@saҤIޗ6l߾qqqߨ111<Z3{1 qqqz3\\\P*X[[3w\5jIII 0@k9O^4i3f`Ү];ؽ{O8~8NNNxyy1x`fΜi0}gժUxzzҶm[Y5g}VkvbcZ)@BCC9p}CZi6l̙3}===IHHФ0|Օx:Ohh(899iU}x{{ˬY=6Z뻥9X!B!a8}nB񻐒|]Jn ZXnt[T*TUUwYڔ4I;~O z*NNN8995wstT*.]5...҆[nsHRf)ᦖǍ7رCHLMMeѤ KKKJ.^R_t [[ݿ?f"99YiiiB׮]]B!-QYiݫ"ې8B<RB!I,찳kfdbb3~׷AeMLLy`A LJ_|Qq}]̌Ν;7} .{AV\ip?T̙#_!B!BxB!Zdnݺ34>ڤ=(̙SkO>ɓO>yZ#B!B|$,B!jU^f[>L}~!B!BѲHX!Ba>}~!B!BѲȔ!Bșort}bs7C!B!BB!@ff&tIxYY)))ܼyRIn055mV6ճ1߬mYt)h/w2ڵ+m۶I[|9#((Hoٓ'Ow^z155-ӻwoڷoyǎmۖkk.O}Bё={@yy9 >;;;Ν;ǵk2dHcLii)>Z&&&9JVUUUrY\\\ AT=>O&..~icõkѣ#F,Q][s} U[_0B!B!D!`!%22^y֬Yڵɓ888 lݺܬ8s ֭ӺVwjH'lTxWdz=qz">>kkkr ;w|WWWƧ.Ν#((oooٴi,[{}kS~333_w/رz#Gܓk{>B!BѲHX!hj5QQQӇ/jܹoooP(+W`gg=X}6S~NVOODu^Χu튏033cǎ,Z7xC+>>ѣGpBMԩS5.\YfZﱸۘ1c_BFF;v 11޽{oMQQGׯƍkƂ ?JbРAX˗k5>y bƍ@wٳg꟱׷._]]_]̾Z&;;GGGҪ(++\5sdeeT*aB!B!D#~ !-ءCpuu BT*RRRx())¦qQx{{RU;z-9r$;wfԩ4Wd۶m7N'-ռmm۟;MCY?w gcY/'m0͙3ٳgMTT9C7~x6oެuJ昙Fcǎ%((lmmٳKw1b:7'~~~$$$h%$$bt6mb̙Z=gggfeeoLLL3fV{dZb >>>Fs g˕+W1b]vGSPPUϿ/J%CݝK3fٳxxx~)?}ݝSR^^Upp0o&{{8;;=w1c֭cB!BBBo>FVXXHee%|GXȇ6ܷo_\\\5Ƕl3}43g~`„ RVV 7Zj( (LUא=CCC@P]Pxy#447xʨ`eeG}5;v`ƌ<ˋKl2&OFԩSqppK.n@3nJرc$$$0zhcƌѻ {Xuԉ^zŷ~5[MԩS:sssjqݻW[TTwƍTTT蝅X >5&\3ѐM'''BCCYjQ}6L8tzAttVWWWVXݻYz54:tлoq,\z?X} 155%88[kkkzO?Ę1c.WsΘ:{5>SSS GջeZ78㿶%k}6Q(Ҷm[:(|g@Y3B!B!OB!Z] :"""ĄnݺZ'OMo)Sn:vɓ5 SNi-][+++up>clW^XYY.U5^Oɯ74?e0~wGM٤q|XgJbٲe3UkgDcaa Xr%+W&11Qզ̛7ߨe---޽f_ᒒٳQ7| IY"STTI{Yf GѴ=t>|81*k׮_?AoRdʔ)̙3|͵4C70BPP- "vظqf?媪*4dMVzB!B`B;µkxxP( >(8p}ҡCq''',X@@@~~~1vX|'ciiipVbjjJhh&Yכ|7+fER|zߚd]߳t݇i|UZ۷oooo\\\())7Θx LMM5{1 qqqz3\\\P*X[[3w\5jIII 0@O^4i3f`Ү];ؽ'pqb̜9`ϪUm۶xyyqm{뿡篆 6pӓMzHH' \]]ת닓VVZE||<۷___f͚U~z>k9X!B!>5w;B߅#t袽,녴T:u^z,,emiժ~u)+-ivk}2zhRSSڟA~f͚Err-,,ի899 -JҥKX[[,mu8;;{T*h njyyyܸq;@acrssR'/T*5jǥKmPٺϴ4|}}IIIk׮_!BjjgiiR4711ё֭򌽽}|xxx֠B!ée~ͿwgBkWeBq?=3a,DC,Y`Znxgck^s/B!B'%-o6!B!Pm{ݿ>B!B!DK#`!B!jSs/B!B+B!B!B!BBB!@ff&t tY[[}' +uvaOԙ?yYn;W˽lCgk0cܒ[t)]f@l߫5ӧO%!!!xxx|2˖-ҥK :9s4CKf׏nB!B!If !-\nn.DGGk~IJJjƖ{;V,%$pҬ*8c2+P`bf•9c7 n7-yOOnF9su1`zۺu+G-k^dРAdeeqI1cg̘1Mچ;vЯ_&N楗^BRݳs!B!B!D VO>\xQsƆ^xAsee%= .jb& ڠ0QhWUVQY^e0QYQE`bSnaecu+6kE8蜿)TUq;64I/οE+VXM(VfL ۘҺʖ-$om}ʍ"l֘ck_EY%7nNo>pZjlo߾=tӠr\jP*Zm9޶m&W8.. &|Z8q 0d4cgh|(++X[[cmmm04hPQFz!B!B<$,B`777N<۷omkj)?gRx&Vc۟qv$yY>yK*5_چB~CUal[$QYQ=Sk9 Ey7x f&S>=abjcUZrd̞oŅEKnh$&OK?Uo_" PŪ/ g<׾<& OKi?lIvR1|˩9_K?/_4v)oץVfqMq려Sk{OvK4s(*˫4CprdFee%۶mcϞ=z7|nܸoܹsxFx뭷quu%??֮]xD IDATK֭5y8p QQQdddR8z(ݺuȑ#?SSSZnÆ cM>???6nHEE溁qqQrss8q"Z駟34)))9R 5KO/^4?ǭ[7nׯ 6~5Ə͛ g̘^ B!B!ăGB!ZbLj#̛D~P(~mn*U'^|'?IsJQt\r>c~ljf3?ō|agPRsh`E`JiNd͕P/jJTUV|U󫪌 3>ni8UyyyLzsdwy7ٶt7Z3in0Ndk]QVɪ~IQ-S‰l>[cT*oץSUUmم}(qo6w:<-܍s26f<`43ڞ'NݻwpB~GΟ?ONN6oʕ+;wlΜ9ç~HV^Mqq1(Jj5=/.]"**{RYYdstt$##7|SozLL YYY8887ߐ ӿƷ{dee~zuFVVYYY/@DD$;;/rA֮]kݩ۷8lݺB!B!DB!D ðat[^^/^dҤIeNIq)g~>O?{f['{ (&ohH&;6ִ̙_2(]UA̺3MҺϼ67r k>u]ƿFrV%v6}}xvSrM}c5yfNz9n9uؐoҤI===ɓC`:u^ƿFeEfSȥWQnw[s9.mV1++K2sL~&L-eeeTTTܹα@I'>S&M?Nʱc8~&ZטEV9JNNN^}w'C׿FSB!BqHX!h***c׮]ܼy[nk.O WUUqQƎۜm>ݸO_#$~~evW ߸ͣO1cTUkwAO0Yw5 ]mco)WqͲzŖ9ΩvkuP2T!j!V64Cu~V(ouW2홓6 ZV37Fc _{Rm,xkGǭ7wf,Z{5\u 4;NcOOOnܸw[www233k퇹9jQC_κu1O:7ifߗvpW^{F6X[[Bxx8>,̙3Ϙ511;&( rssi۶-P3 \C)''z;!B!BB!DԵkW"""4C@DD|)))TUUѣGflmضt7GvU>q>xW ޵z[/sjo:mƕB Y_*+݆b_,Wsu:rܲ:j7qabcӐ!=Qᓿ~/g=P㗨|$k<2v㉉ {<̾G]Zuw煽 D%s4 O^w'hLLbN3,c_$if[1׿ajf-f#B>߭kT{u煽 ;Et| +++?S>,,7rtuUUUS+^ú+O3[ Kظp]]궰jŬNΥ [/_ղ8LM8`LO݃\6ˎO~_yoƎ>\n'?{vE׻{Қtͅ;wĤaVUUajjʰaØ>}:SNդKJJ ]vwݥdeeEV1K.akk۠g֬Y$''m_cO!BFiMg6o;v}u>1_rj_:vm.`3ڿ|1_$))@t333:wvFTTolkƒNPXfΜ9 !B!YZ!d hJe0MP0Z޿TVVL311irB!BƓ%%B!wE{W33/B!B!mB!B!B!B :B;IEE:u:Eff&`ccL-B!B!BdBItt222iV !B!B!h B!Z0ZMTT}Iۿ?3PڷoϡCB!B!B!BСCfffgSSSdw!B!B!xoyB}ɓ'uǍΝ;<QT)Ҕi.9R9ԨeS)fciM {fʪʪpS'@YEvn7ϵo:umPnn򔛛"8kתrww`PFF԰aC\-p@'OTRR6m;wNJHHbQQQVZ[cƌɓ奯C `PQQ<==qFmܸQx6nܨ"egg@~~~>ZRzzz=F )($$>::ZvҴi$I...ruu?}Xqqqv a07!3F_~~Gf5mTG5@@Ϟ=ճgOpL&effaÆjҤI=E-$999ǧP;H  08tĉ2剉ڵk߯zp@aiӟK Ν;˭{wlٲkQcǎ7:>zP׿`}u26 `\zz.](oV+VPffۧ{O_(hIUZdnr d6qT?]j裏ԫW/z/|^5NКoȜo?aiIkQzgIt Zf^nŊhԳ>[ ٬;vhȑ jȐ!oʴ2e,YR9jRSSS۷•GSNdR.]ԹsgeggKrsse2~رzGˌSꫯ^qՏ1B}V?[@;v讻*S.Ix] PJ{ݢ{ޜ3KEZs:AoIX,ꫯ4|puQ O\:pbcc^ZԤI}g1cN8>@qqqz7ˌS__Yݻw׎;l|vZߖƍm۶ڰa?*--MNW=DZ;n}dRv on-rSvSfYu.E̹W?WԐwǯk[=>8%%E>>>jРA'r|||򙴍5ғO>#F(((ܳj7l __ߚ\E6mҔ)SԾ}{M4IVG?~yٳluv{91BgϞި߱c֭['ISQQ%nw~*Z/[\ `ɓ'M*!!AΝSaalz!9^zQw{I9/In\dp2ȫ3/}7ukV^ar_ѤIIRNNbbbpBeggh4i۷o_LZj %^kuo?аa2 I&)55UWXXXcWE]<_p,S7n$ŋڸq&O,ggg5nXϪUԲe N[7$e%TDN:'7/5pwkkpt*۷]$EFFV:֭[h4*..NᒤÇ_7λ[ ڸq.]9shĈ8q_k+]׿jРAnץ}o߾xw}8qb̙3g/*DhڵKӦMeffSNNNU\\O^9^! U̲Լc2bzA+{>rMtY:xzQk?^Ǐv#Gj9rRSS_kٲeeWAAAOjq999iȐ!2dN>?XSLѦM*},YD?߯CiСڵk+k޽d1PܤbbbuVjjȑꄋ[^3G?MP26ꃵ:Ǝ2 =z̔VX:hӦMCE^{5 4H-[T^^~a1LǏ+44lR</&&FO=;yxx(""Bs՘1c?ֶ޼yW>Z}TT.\Xp0k r[}M!.fbc 8nsVonnմiSTsaZZJ̐]=dx 9&֤I5o޼Wo%%d2i駟l2 [MJs^kzh$pEVčb08V! 08ԍ$ٕ`0ޟ;wNG:v(WWk&z$hΝ9~aIѣmwyGFܺS*99Y۷o]8{e˖5zו~iܹu2H࠶mۦ F[d%GcǎpႶo߮ӧaÆMm:q Ta|[N (MAAAYf)**JVr|RΝeyxxSiii XW2eiFs̑_W__Ծ}ju\zB޽{[yfu]ÇbѢEw^q8ʛGaa)ooom۪RtYnnnod+olٲE?⢯J?~ZO:tHS߾}]]fZMݮbl6_qtyxxã6VU))))3P8fXb:uJ:t$9;;+44TǏ0oxWӦMׯW``޿w߭`M0AÊ+/Ѩg}ƽlܹ1c˗_~6t5J9s2wޑ֭[k8kʔ):r﯏>رcҥu֭[k„ *,,РAzԵkWWGk3e-Y PU$p`7o֢E /h۶mS:{<==Y޸nV5o\֭\RGS[ RZZ IDAT=T>|.YSǏWJJƌ#Rk^6w\}>^+VT\\,IjР֭[pIfjժU5j۷#""_~EjҤ1⪙7o5o `lVBB-ٕxr-$:h*))QVVձc Ciɒ%ڰa|A[``йs$&/׮][ݵuksm?~[_roܸqO+h6ZRrr222]]]տ-]TR+Ww]X'VP[X***U\\,WWWf_6+*,,ԭjW;vx ~j׮Yf7o"""(ժC?~4|͞=RsCOVffb uA6m/{yEDDUV:wfϞ={t]cǎUXX$_c=&Izt}m۶5u*zqj1k r[}M!.fb>c ZխU۔(;;[Ervv.jrssSƍ RqWϟl%we( @vR``Zn]Қ5kt*_*su)-Dj=v*k\_>'ĕ p} `UXA͚5FG>>>ߨQ#1BF?UxbhՎ>U(88sVo||ΝK0 Gb4aÆiذanrU{p]" 08A%%%)//Ϯ,,,LҥKJMMjUHHH{7{n 0@=XT `Զm۔!h+3L?J5h?O Ҹqtm{5Pu$p`ݻwuܹ=7x6mڨM6 ѣGI@= `bQVVJJJyzzãtqqQaaa :V6oެ[DԀ|N772g 8!CGNNN׊+称:wРAzժU+Wu:Wl SzɤP=zׯrrrb %''|_dp]{W4ydd>X 2JHHj$%&&*>>^rbQAA$I25 8c0H˗/Wqq\]]e6տ9rD˗/饗ԢE ͞=Fs[k`0qz甝-"ooo9;;۵ ׫ZZ,׷VTspPNNNVf$?/ku>,V1{EEEJHHХKʭZJNNVIIIɓ'UXX?Oꫯ+Tk+]… 5~x5lذZse=Z'NW_}_e)==ZsH &h r}NN.\p9.?cYV9sFiiiWoDΝղe2&IW]χjUJJJ{߶m[nݺZs_VXXSN)33ONNp7;Cy 0b(++doW^^z9i2MBͽJ$7F m'sU'Ɗ]2NÈgx*N9Z/z8AkF!sN}#qyo{>-նT~XVs=jӦu[ձcGSXysE2=ztDo<<<$///͟?_-S}Q6mԪU+8pE]v$۷O...zG*=z)IZ|\\\ԿJ m֬YÇەܹs5c EEE)22R˗/W~lٲEر ,[]>}rJӧkܹÇ5j(m|4u] Q4h %iРAzԵkW5LLLTǎ}uή5j<|||ԯ_?nݺGMw߭`M0Avm߷oԧOuQ<#FgU}klP5$p`7o֢E /h۶me?͛7kҥjڴm #gү -e^-HYq*o$]:=զIn^*6io(TMwk3&:=G9Q=UbOXҦ'1A)Ǫu?d24qDeddߗ$hȑ:{>S{֔)S5f:ux M4IyJ;եK}zm :*Ţ={sΚ;wΞ=3f[,ʼ-K5{lI獾v ī_\\$uA'OW:|uŮFxΝ/B'O֬Y4ydEFFڵ9p=TY,kv]vRRR/R)))z+]?yd)--Mj֬^z%,.]ŋ+77W$[֭N<'Nhǎr9i&=su 9sF{)kРAJKK=G}dצjդI裏*11Qھ}{+w;vZ| 2d|||xXB~~~v۾HjԨ-t>B\]?Svټߝmjlwv҉-dW7?ZbU؟JW>̄]s%V[@?ܩ!mg'E M/Kaz;Wf+}*ߓ&M7'(99YRNҘ1c-ZCoƍWyڽ{ciΜ9*))rJc咤(jժ6-[Կo mذAvRAAA$y޽[ .Txx*ѤI-X@Ǐ*w})))Q oo޼yּyԹsgM4 هzHJ-ҫZy###C6mݻ*ժ1cl[_7779rD{ѭު:4auԩ\]շoߚ;/\5iDqqqeڕOJJ?-[H*ʻO>g{,ynv$pPL&ѣv jjɒ%ڶm,cF*X2f\q2妑؆:-V'6d$%I'*ǣ*dVw+^Ōҭږ^wU6FQNNNc/P\f֬߾599ܳR"%%EԾ}{IڶmQ.]{09*&d.'Us߿FזTTٲ-I;ve0eצyv>{~}]INN`ЬY{n㩧RffFL=Zx\\ݸџ2+kc=m۶hlVQQQv}/Y۟9͚5+w*-I89Uc`0UV:w$)}K>Ά2g޾e1L#dЉ1:"Bد4alTX:jUv9yy\5"I͚H5Q| W~u3gӧ^r/^,w[n-I3VU1u̙ ۤ,纼E-Sgt\_@@.\PnbFy:udK^FFݿl6lhwݵ5jպu{nkeVŋ۷+22F>Zn+mРVkZdvء_]S@@@ϴ_φ?}ts9sƖuH>UfYj׮ x@t%*88XAϟ/^zI۷L9rJ-M rVN oJ9={MoءKsοW};5d^P$^4풤}Z7o֟Zy8pyrvvVNNbbb*}ԡC)<<\FNVWAAAOF>ƍGUdXw^nݪ?`0`0ܹsjѢ~]V>`jJz҂ h"ٳGw֝wY޽{kujP>8"-_\ruulV.t/ByyyjԨ.].]҄:xY%b~<mf nӢ[+qaI)I.n 4z}[Z7CIRt童 <}=uXOS?vֿڿ|SS7n/B'Omk04tPvƍӆ ~zM>]:tKWT5kдi$ICsQRR>޽[z饗tUڷo|||[o|I͚5KӦMӸq|W5vXEEEIK7gdRPPVڷou 3gꮻ֭[]'5k4͞=ɺ˗?V۶mui 0Om8~BCCF~>^=30{lk5{쫎ݬY3͛7O jO?< Ѩnݺ.QQQZpaUg=B\>dWvXBë4[3xDٲX,s6*((\2W*J̐]=ܷ ̹ rJOOׅ ԪU+Z?11QFm@jٲ-,y{{wvڕI={VŶ-k[m_V||x^}{߻ޟ .(//Omڴ)Sw%%''+88ڐtiFnnnUꛓ4ڶh׮] ,***Jk֬?v]X%]GqqN:%j7"ϟ|#bYɓ5a[Ν;5uTT{|>sA~8W63!.f+pTNNNWM4iDM4F v_7/yU=1S͛7Wl2eKF]2+ --Z[^jg24}tOZ{?M6UӦM˭sww򒗗W+~hQF1b^u5֋/VJJ Pnx͝;Β|(88N)7q_ׯי3gԩS'EGG+66VÆ kwo IDAT}/ `z֩S F+oذƎ+tvu5Ow+>K6LRWºP=sjv~(sxP9$YQQQuY-իW_?Sa]m}C~ij0@=Y7|XࠒgW&Pnۢ"xL͟?_ǎSO=^zxL@Am۶M22T&K[O>>rrrR||VX!???۹k׮U^+jڳg,XP+c-pPrr*Qo2GJN<$5mT :w R~GXB;wVvj:GdTzN6nܨ7*66V/^ƍUTTTL]v)''G_|Em f]v2 JLLT||xIRHHBBBlk.M6skNΝ8# ***U\\,WWWf_u>Vk(0qz甝-"ooo9;;WؾgϞٳgݬY3%%%X `լY+&kɓ_*$$D۶mfX jYDDΜ9l5jԨ `P'<==;48Uphpk}" JJJR\\jVF6|ܹNWlSxѬCkӟٕ/X@~m}̙%''kƌ>|.\Xz76Am۶M22$PÇd=u2~AromJut~d]+ݻ}Q:tHNN۱cn7eiFs̑_H"""Իwj߈.\ǫaÆ1=[ <õ>VaߢBsZXwq$iͺ{mK.yUԡC4o<۷Z999*..VӦM+j*%%E>>>rww+?{JJJԲe-p`EYYY*))VXk֬c}a+0Y>޶Jz/|^5NКoȜokbe{{ۤkaG58b}gmٲEر ,[]>}rJӧkܹÇ5j(k̙OKK]wݥuA _ ^xA]vUxx|}}uIRbb:v쨈W[Vvvvk2e,YRP3$p`7o֢E /h۶mU:tHҥ]y=]epvM1Ks1C" eР7VInm}ׄk7'ʣJ, 3Y25pwՠ7&(%X1v];v(:zRSSeXkꊋE~ڵJIIK߮tɓ4Yfz饗X,Zt/^\G[o[n:yN8;vͭ5^Z?C@Ͱ4jȐ!񑓓b ),,R77hH[ɜo~Ī?,2Ϊcw)bνXulr$Nδ?ig,5^}#BagˍOZeH~衇!4!h"ꫵro$##C6mݻm dWvXBxuukt6%%%ΖbT5Jqkշo ۣ} .ʜ[ rwܩS*&&-/\{̩{9.\HN(,,$))ŋӡCcݺuҥKRRR ++ &pYF#8p???-[__+Bnn.3g$-- @Yj >O?LV+v"::QfСs=M}R""""W 6pB}YKKK1ͤ+׿%Kp6eHWc:fZVJO!oWexKBq+wucv,7rtr!rrr8pol6{nOnn.V+/$_}Uȱc8z(7oaÆm۶Zmnٲxf͚ETTܹskԷX,,YEQ^^NFFAAA5\o+fS&L ((HFF+V ,,X(..'jtRIJJjț8v%姊Ӂ_5EGV2e'#DE^ G֧k b&^ۤIMMe߾}Czzz:3fp̴6l2OOO>^z &0n8.\Ȉ# eر̙3@YY7onR `|]w5<2h H;T]]1xxx`2=z4}qIJJbٲe̟?Jhh(cƌièݷƱMKg;8AnHw zy<fon=,_~ݻs)ƌS#GSO= l2fΜIXX2{&/|t\9 0IHOI^15;ATtzΨgFXVJKKX,RgBiR J)),,={XrYeeeR)))!??nݺ٤s>Lll,ݻY틈TUsLw3\SwjH{e4 h^sR[PPP__GQWDDDDDDDD*p-p-u"""""""""M_PDDDDDDDDDDDDDDHJJXDD:q5b0õر#+DieJS8`ӧ""""""""""W1%EDDڱxFQ븏sl^`3<ieXDDX,`Zݻw<%"""؆ ꫯZ;1cYo <p#֤H;5a0dddb ˆQ̙3?~ɓ'Q""""""""""ZH;hO}LL :tV(/w""""""""""ʔ8 ba׮] < "֦H;d2fEFFzQ/==B߾}"LieXDDff<<<0L=>}Ԩ}v [E*""""""""""I `vۛgyR, Ԫ7k֬6NDDDDDDDDDD.%EDD)H@@@[!""""""""""i'i'i'i'\:4N8AEEEc lN8111\0 *y>sPnt\S7dӇ;ѥw p˿lHZ_$!!>|8\'IIIDDDuH_f;C `v*995k֐c)))[ɂ kÈ//^zCb0@ܨ|q=o\m 8Ÿw b8q'-^\\\<{3;/D' Kf8οߟ+9=YO1o3f>\j|ſ݊lma8?`40|VG"6ʘgwc?!Hw/u'r3ߘJxT0Sc+f#rSϧX_l}?1 O%dxO1p|f@f3~!7n3ƿo<PTT?O)O;길nwfۿSsW`jCxx1D& jb";qo)+_8ja1glV3aZ1a|Vng3.,)aع> >|q=ܘD"Ըvk{fL3c{sxϝ4V}fn瞡$>|rϧP_lQ|ttי&V۬f3wfbZYrW_e;vGyf<==6l۶m-[`֬YDEEKNN̝;FbC aʕ+o-ikW4iЄ  h4+ #66Q`0PPP@EEWޏU(Ds8 g`QFb 15oپzr7rjkL-ū-'B>' m:‘]'6qh?~ M}:6l`ڴi5D.]7SNu*3f8\6l2OOO)ScO}_bmrLgy#5o-KgҵD++nEg;lզ3ãokp?GY`?rPQ|0ߖ`߷dzcq^?ܫh ?Gdsu/ſEkc`l-=xz{`0{xu}ͥPgJq^)YOHn(?, ξ?l6[1fgg93{$''3qD|}}1LTWWת׳gZ?[o̽L<n~;3ݻwgG2ՙ/^xcQ\\Nqq1ӦMcѢE)))u1j(BBBҥ v`0h/""ֽw5!""""""SO{RUUEXXXΝ\fj3;6<̽u˟g?Ww:^|'BsUǙG׮])**s.]pĉ VoGNطo_Ⱥ)ܹ3Sӧ14;Č36m%%%<#M>,ZEk.ILLdҤIQQQ;Ô)SXn7ntrfzZK:XcѤ'"""""""W,""L&233 ,222ի`O6xxxLOO|zv>>~d!|m:@^E.Jyk_{iOtd߬YIbg[ [>Prk']lZbu5kp7C}i*6xZ.+eF \ ϰ\&`chiAǙPj$0̗[0cMNsp7;_i䧼G8߿?^^^ٳSNwe~5,K/DDDW_}U  gΜ?wni'/je֭lݺκ#G䮻jR|7x#o6`Μ9߿Ν;ӣGsNK___Gh4K/Ą 7n .tرc3g}ܔ5o}v,\~,""UWW|rf3L&FɓOزe &@~_q͗1i2`zx:CgcGwdT/]ߓ#Xv}y'*G׳1t lۿCٕc張\;_=np sKz"[+.K?j/8>Cwӆpxٜ^YߟYG ط0iޟйgCuz-?8sǵlh떤pt`\-n.<ci9a y0e>3eIII|XݻwW[UUt wwV\BBB]ʹ>%%%ӭ[7<=%>RSS댿'""""W&SUe9z0m( "reHOݩ"""hl4Ac4huOrFuuxzۃp?k7Wẑ:[㉗OI!3s__5ECǙXk_oB9S_3xo^cyܬ jњ5k#..m۶~nu222xGl[?/97m۶uKODDDDDDD.%EDDDZͅk%fܭ- < p¥ϧ.=Z(9ngcصYzG[Şzˢ[eV\ɪUѣ~m/r-r#<`ODDDDDD%EDD.˹v%.}Z  &e}i1f2EDDDDH%EDDD?ދ{Ƹ9DDDDDDDD.oDDDDhחCDDDDDDDDDN:H;u ***j`lVVtЁ~~lu6a}TT9*ת=RIZ9/HBB $ wqhmY<>ۏW~~ #nO/c opӆvq)M/55e $.xQ}3\;^7fm|H 7_f;C@3EDDکd֬YCJJfsoܸ+VP\\Ν;y8w\FrߗVڃ+ٹF[{\z_٬sx<ɇ,w&o0tЦ ܞLwСlN5+7./8 /۴ku%<ڼ+]S?/W ])ݺuy(9O/G/v#FࡇjOri?4XDDgĈWWW]wETT6oܹ6u.\m :lP|P Eyexxٱ̻8 ꈫK9Kw& 0cƌ:gkl6N>j%<>|vFYYf . 뮻 6pM-n-""""""W<%EDD1BII (..l6ӥK ]vѣWmRQr_{9|.*M̎ };ct1rtI|:t di~#{Qvrqq5#wo?ho9wuw%<*)G DƃUج؋Fצ6yf?7ֺ,&Lٳg1TVVrHOOoR""";w.oرc'N___BBBٳfҤI9s[oK: %x<==Y`S\xj&J'zY>s0{/I=VRpsbke0q{BWapSV+| zD7%}! l-[ ?8&W^y\fΜIZZj*/ DΧ~Jff&V]v}avm{&n=](""""""i6l… ygINNv??+w+//Z~_70NF `ʓ7qï`Æb";qo)+_su~}/g wV |]au=q?o_ރ=iQ}`c=a#_=~3Za e-}~#9d? }_* `QXδd鱃dOkJy~h( Uy,N87~~(>;sD||<۷r|||HO9@f̘rհa̼`C UFĉyٹs' ~znJNN63o޼ZqN<ݵkF]KP]f\ii.%nLL :tXOκuXj >  s$v{:G7{oIOo Fc_~[f=obMƢEpum{@1өS6ƱN:h͹$z?`Op 0"Zݢ|{R/B 8}`|(/epe5٩}_UgߜeÃBu\[=w/| 7LkkාAY1<|i0g}ƞ_.oe?Bq}9빿mO [.G P_j{/L8___L&5_rwwyC ̟?***XnF"$$.]k. ?\s_Jb/_qȑ#,"""? ͤI?/wXUq^6=RcU3|vJQQյfh"-ZĮ]HLL$1136u&BCC8s||ϯvww)--u,qXm-,ž@]%u]Ώ!gB~s[0$j8{w)ⶍbSPBC3~.ʷ/ܚF{MOQ|lʻgG5-{t"{BD7Nj5%~gP?^K!,`d787~~~= x7s|۷Ie }>QQQ;Ô)SXn7n$!>@"""l^֒?~򈈈s/XDD2Ldff:~AEFFzr)..vٺ~2dH{_$h=wͳ񌊩oV;8[Vx]r;:tI///SlΝTUcӧ;wɓ8޵kWo @zz:=%KPVVύ78ԩSlذV#Gr]w5 .0Ϲ?jEV"KnW׎\3&a4}%~ďϒ8uXa]ɺڇGajSw5B;g?<`S|>Hi1 &ۏ>Y瓵=1..~Z.vu۔ӻ_>G1|1  Μ}M }>Fy饗0aƍc…a ;9s8U/++kp/رYm+"""""""""Puu5˗/gΜ9<,]ѣG;ft2w\>#~_ֆQ_z]c87o~>uS-dլ|{S FX IDATg&=ܛ{ZM5)~Lg}VltԉЭ[7 7\ΐ!C2e xbXd ,W^L4ɑ|8^/{DFF}(衇ѣIIIkδ#G8luk Qaps,fwx  |3/a\$D”p=¨$8saik\ Xٓ`Јq=})pK,kz9 > w8xt*.rb苮kIΘ|-]ϿKt>~gz'7LyzOW/$[Ss>_y駉'..SlmLc#G͍}DNNNe˖qˆk׮M }wqGm+""""""ϰ[:ԝSرDE7m-UֱZbXiZ?(//&TUTWU<9xx6^r|kw}ٞiOXa{t6Eϯ[Oݦ(\Y.kHkߐŦ_6K iKaa!ٳ֖ h3@II t֭III#55_i&s`zPADʐS{WF$|||L].B#:f`fk6z˼nv<#m|{v뻸6=w=v_hjY%.kHkߐPyuoKPPPg%߼m6כ>EDDDDDhet9g_jW sU,{3® Gu8"""""""Wi4XDDD~=ݸ&u""""""""""շ񝈈I `vBK@c&t*** "::G3g8x f+uVƌýወH),""NEΝIKK# px7ׯEEElڴxwwf_(N2dHkގ8A `vjڵDFF2uT C 60dnV, .dǎ\wuj[nt֍X:H"""j%==믿JΞ=[l6sq 9rmrܹ_GDDDDDDDDDDN3EDDڡRf3ٳLxx8ӧOˋ2V+~~~߿`lq۞TTT]HSiH;T]] @qq1O<O=d>͍իW}vܰX,-n;11_O>_ODDDDDDDDDDH;СC1ҿjL&~aroQFQVVƊ+8yd'""""""""""SXDD Pg/` ??=o~xz-͛ǹs4h}iq6 Q1i JS̞=BQ ۻmZ,rrrZe)ii:Mi炂j%3 JwΜ9DDDQc\>JHxd˖-tСI"""*ZcfJZZDD3LSQQAPPѸ8Ϟ=KNN6޽{a""""""""""iJJJxsΤ@xx8[l/COq""""""""""RJSk׮%22Sb0jӇs!֭[Hk"""j%==믿JΞ=[;vlDDDDDDDDDDR `vLjj*{l6jDDDDDDDDDD `vbx z), m\JJC :ш+'++#KI `v777 C[""""""""""""""h$::۷cٰX,cXr,]UUdjEDDDDDDDDDu"""ri$%%l2ϟj%441c8<ΝKN={v[+""""""""""@ `vߟٳgSXX;>>>5(:i炂:LH;H;H;H;ȥc2HOO qqqCQVVF`` 1112Z ?]H֥ N>(7|3GnÇՕ&_i,X@xx8wqSi4XDD*))a…l޼ӧO_(_·~ӧY~=Ν1}=#/v^vڪ/ob#/ɵ6338tPZlIW\ /֭[U~ٽ{7r~k_|}iH;vZ"##:u*V}GN楗^b׮]|a(-`4X]kJOO&`Pv ayy4l6NaZ կϙ-*˛o.'/eUg޵/-G:z~.eglڙƿPx;d2ODDD2f35wޝzXytؑ;6=x~x~99դl6 ͽX,LF!"""""""" SXDDZ<<{&{B@@JAei1 ҢH֪HJ `(X)ů;ٕeIB6HB,m@0~^ו+9g<眓OII @d/͚5}]ƽN\Bu:c8b>a٘L&']Ý?@ƞލŒ͍o2ŖR&%e{}rCMB~q!q0m}[xn|^R9SGR/y_HR6^ѶuHa82b1to[(Y&Mux&kI/ۯ+ l9n~7O6={0gfϞ͂ (..~ff&\uUus՟jcWӸv0WYTV0nM{sÜ__`8}@nxiZ;px%W?#kkF,Ai^OK6 |?˙< R~Iϒۚԅraz*!i^wp`̽) }@x3iIG~LڛSXZv\,Vge 8:oo}Aߎ]]ϤW.9 [}UΕ_Fr_ؐ\Mm8NDP㫮;߿t˗/mۖt҈`ڴi_'ҫW/8~8[nen_j< ׯѣdddзo_9~8K.eÆ wյܹ3,\RSSIMMu;dzQW} `&jun˔)Sp8,\kגTb/.*.,[7wg~:M=xyY7^C[4F[Ot}ۙ'lQ~{__/o`\sEG6U 9~oO@?RNP7}kˈǘ7V t Vȴ;8tgt]❫>}߹ܺzmzq)!p#CI9ɾGX\ķs \.vÖ3k25wט1c\+{ɱc';;UVeҜ߲2֬YìY a̘1|}\;} Z5S ҇ω"""M3Ixua61$&&\fcŴlْ׫ᯌgŽs8C6vzEʑbk8zߐ ߆m-,`'wV};@XX ~mOE_[dp(ygk?*ɤke0 C.$ӢC;*wvx=d7g]?uS?pH:VVVddd#55]VZVS,"##˕W_g6[K""""""""TJ4AxyyUHu8ߟn}t%JTv+yi%[^/s~EaU1+?d6a:< #s 3hּsI*!̽)`شI 7wd4v\dgbۓGl%+$X~Q!!a+6,٧+- d2=?1PKrqQʧ_qmi]/O n}9_^^^Qa5GFF:oV1113)[""""""""?:XDD 2$$$}v no>gߧ[n2JKK][GŪ?wݵ5@$X`39cjßv;~P\8G31o5X}1 bl5S`<2yl_?K",S9&LL&r~xۓGYm}tgZ˰,Y)úvGưNE7=0[G[} 8xUw|犋Xs^ pnUݹf)Qa r-w.vccJ/kn+yg۝*;w7\Y8U0p@,Xl2Gŋٿ?v[e˖tܙz 6o`w}-8unر/%EDDDDDDH `&*))Lf̘30L 4pk.? /iӘ6m+Vsgf91>u":rg?Fìr;~?ߌ0X9q!͢+hNF|8u< u+t}t4&HvGruzo Ӄ#; "(GK)w0ŧ*uG\qʹ0㷎uMi4}%m'{ݕԽSO)wۉ׬޳UzVZ&D6|ÇӧOz"""M %%믿+qRd2aj6V""""""""""R{묈HfcϞ=$''cوwzGرc?~0w^ 뽍ԍV4AV\LԩS]\=Ájfsgܹl۶k""""""""""R7J4A>>>\wuf<==ILLĉuЁ$x+$koaɒ%$&&ҦMz_DDDDDDDDDDF `&(00//rVd2ѢE rrr ؼy3Ν߯s;%EDD LBB۷o0 v;#>>b>a9sozݦMz%EDDDDDDDDD<{"""rq$%%h"f̘ **A`x),,Ϗbv__~f+,"""""""""""""MTHH<gΜۛ@WYPPSLRn~#""8yd"""M\xxx`_СC; IDAT֮]`qEDDDDDDDDDfZ,""" wddd_cGDDDDDDDDDE `({""""""""""8ZDDDDDDDDDDDDDPXDDDDDDDDDDDDDPXDD X,޽7nɓ'9z%/өSz#F|ˋ/6p.\|Invlz3֭s^zѫW/WԩS|8}bE\\\sa +|֭)--ߚ" V0  ߿Byaa!%%%DFF)~MO?MԾ_ZjEVԩRXDDDDDDDi h&p_OII ŕ۶m^6W_M#<|3$ 3gt豋{9HB6:wС{)(ev~,[yҤ|]ҭ[7bcc;v,eeeꫯ΀e̙=zp=@Ν:t(~l2ϓ&Ms#FGVV#G$..ɓ'_~<U^wg4,%EDD|l6{aΜ9̞= KqF ҈#tV:3g9z'*\ 9#Gz֛B;g*Wpv]+V\Ejj/BC+IM+s;f{w!_Kzzo˳ݞ/ѭ[3'GdƫuW%KУGJRSkwqaa!%''~q8,_U6~xڶmKzz:iiiDDD0mڴZ]'ҫW/8~8[nyVga=z *l$''s(,,dg^ݕYb+v|ݻ5<___ 4>Ejkf։podV!!~u6a\XYx%6FOn;bxz|al.oaɒ%$&&ҦMJ넄{{{cX8u&~\ҵaDGG&##tڵ:QQQǑdff~QUܛ8?,0w\,YB֭iJ4Axyya2UܳZmy?~|Lpdiu99r aooV-noƯi~!!̝۞s۳kW!ÆeذP݆ 3gNe}[ &0ay{kls^\\arjW{yyaaTޯ<22DVV1113)pMbcc9ydu/lٝS.!Zmܫ*.ߝL>se""""""""tHd6IHH`ng߾}СC&N0`L8ANm۶Vř_8ݺ\:KfRZf3X$a\elޜe|w}4; (-uٟ Vsk;vTUUmڴ!''Vmsαqrg͚5ƩLJ`We˖1tP8zh/^[n-WgҥbXd Æ sdzyfN>_|vo|wp3߿?w}wQXDDJJJ"333f0c L& ja]tWx>))S[ѻn:vNt7^[Nv~\qVZ܂?U6yr,wC6[IJG~?^v&_ӵZBt{6$ٌa5WEwѼys:uD˖-/7`̘1VX\]믿իiݺ5tԉx%%%1uTzMǎfk׎+-[p8x]e'OޣM6$%%ѯ_\7;vfsշGS\L;]78DDD~RM&tU_?9soookGM,%f38~p/BC/$z'SdȐPjSmqSJiVFCįj~IOExW.?xyyoԩ}^^YYYjꢬV?qAAA֩fㄇqW3e BQQZжSNѮ] {Cįj~\ܺukϟύ7XÇԩ)))tСNHòԺу)mCyCʞZ,""ԅ7xDv~煇{U7Q yб%Ə|@XvmۇСCUy|||k׮&IHHhgp7DG\\\; 8p{L_L7J||ݒ?ʲF밚޽{A~~>~~??|e 57pCg^/'xzooe"""Pcl}\lϴ5MTs$ l*fsg6EDDDD""MOʞZ,"""R_gKhO}Ծg :Jo9 8󱭡5w6 _uaB(-Ⱥůɡpнtk_UZjEVԩRXDDDDDDD.k:XDD r8pSRRBqqqv;yyy8K7uۺmP,+& ;+à  8wd7 Ounc\K_>0v 3/HJCB~rم͓F8g'L?U_.OOOjyaEDDDDDDD.1ilٳdl6111w²/5kp8Ŵi]oZ*""_+ko]b6C;Y[اC˶UΟޘOЪ&B^|gbytOky ,8Wvfs#wfÃӜv[kbÎ^_7^Wv|_׷""""""""%EDD @nn.SLppB֮]KRR7x#f8%Kh޼ysOK;6.]{+l;>wΆjjYlFZÀØ>$PsxU&yݚٰa۶mc֬Y։mSy\V8's>>L@wsaÀ_IFW=klY0X6|~0b39 9tǀo[{ .$/U]po~5Æ c޼yhтy%EDD gl6c6ILL$99Ulj;Cl85B׮ͪuH/23/^CB.mbL=DKrsm\wܹt C]ǭFjl6u9) &XyXz riΏ+؊a@۶~~xxx?! `2቏-/""""""""""""MXIq^ޘ"& /ooJ /y_"R9%EDDDDDDDDDDD0ՊEaf^Dr:XDDDDDDDDDDD9s | 2 Ç e\KW١Ç/ ᖛFЬQƘut)U15pw3>?bËaPET0p88<==rdKۗ\||{XJ\6M.8 6V66|˗k:ϻ-0>:6}'%HWVVƱۢŜJMe\,ucЫW_aЪe+;[nɩS'ycR@Ǟ+-Ydַ6X^S>UU{S?!M 6/Ann쎪U _}]lg,9FJz/}~֭#" xJ_q` 6mDii)m۶eȐ!5~G[na='546:^^ķ%YݷL޸k bA+p:#O>]E󣨨5k3ֺ^ u IDAT=-Q|uX#(,,dW\f7cwl3ƴ;{9Ҏu/ğ; )yZQj)OF\GNwr?3j՗ᠰBJ }~Ge}y7ayǧ_R%Y\Aia&%>Q8/G },J.k{y7??"cٳ)qY'g[d{y08s߭.cEܽ}:7F1Db9eЫjN9U߷_ u|y`yяq>QwR|5ǭ#7x+ߖrK `\Wh:DPo2m'/e 5q}> xyjVvrΜ%,4///酟_Pٳg ӳ_/\Gfr q+(, gZ,矣Yx@Q: ȸ!aٮl9jw6yVܼ|BBz6d{Ιg f5wMRƹBCp}tarm-|6wxU>boc=j^)Wr5鿋XVG&Jv9!o$Nٽ{7۷w=cǎ usrr!00X6tZhQ" #88ֱ*++ ( u5j.Fr%rU4Nv\}p0Lb6bDDDV̌ zp[Șo#ncժUU-_]Y~#q-k;8ٰY}?WP̋S07H&ߚ3RUoyQ\bBn^!/N\;SZj)w~}>[U_x_? r p1>|[*׌_dь^f>,"߽{ԟrr?),,b X^86L~me1xHԝ]vDեe!UQXDD2{Օ?qϠ@셅xnw;s%?v0&4$I=InWa8Ak埣ZFy pA,ZlHn]+3; ^~x{{,'Ll&=$]Ӎ _o ĴL~Ύ{(,,dد{6-X6_HDxy\߷{l}C:Pj$T69Mv?oMr67>=cl6ټeۅO!:rgɢy[m{#G0L _OU9+pQ)..gҺUKg|’2"'yOW0L\߷)5j3qe.lQeo.Tj\Ŀ^]Is͡fcOҫGw6| eeV,eQ\kP7ż咿=z\DzJJ wuҫW//^Ldd$W]ur ˗/b> r~GXx1qqqddd0zh^ybɓ'뮻8x ~~~;P5gݺu 8w}$Wwݺu&""""tvɲwֱ۫9@ ͛G3h'DDD111ƑŐ_.Hv|Gn#Gٱk7\Ӎ3|V:$$`;5Gw Jۆtq& gq~UL#'H\ز #/g~ܼZs~^QqﷲmX|\ձR0}xXVlVju=^Uu s̜1_c7s6,O=tsXa%V\嚑IX/ͼZO͛h4oN}2|?yCu=<1JOϲֹkHu1'DMQh^>cޥ[~ oK=O+EDD~J][< '{t;u"t 3NrMBLز}'47w楙'B╝ݿj򷙳Ym>ks fN{}|m./Nk @\N{w3'qq8Y7_ ٳ罳=ҙU{N;V. xgdQ+3.<<<(*.fjF~=h᭷v4?ܤYv׷a1x~(JKK:W߆a_3 Y/NTdKӓs\Ow /p{N8 g@V~!N\#+Yj5V//jGWu*0 fUy޻Auٹ3;^ksɼ8oysR^ Pܷo%%%ׯrptw2h ̙bᦛnGaҥsEħ~֭[c׮]߿EqqBBB0 TcرckY~= ;[V{V\Z;p@\eW\""""d۝ 2[ǿ4D?{Unz!z;"** "( *ې^IQ@ZI۶vٽey'ܹ3s̙;1Gb 8vZ"#= @&ظiߋFQQ<<Ư)Ehc; ](..AQq1K/?wWt'Ǐ&TH-ѻ D;E> vl| 0=xVy,g[ՠ ` ڹB}_lA~f. ^p. /&Co0f(Bď:p]BcS,v2w\MLJ[x|&E[1f:u0 [~KNNIE+Tdfe1{X'qϝ:vϞ;Uթ: @?KP ߳.eb{0\}طWۖwԯ1KLDqq p ~~xL<Z>7(|b"nE|cFg] IKW⍗_0kn RT(_@_ ;tz:wsd\t?mn.gu$Q$PƮ]̟?C uLfBpp0 22㽼Q\\[b BDDȑ#/"##1|ⷅ__1bŽAAaoWS!@WFw*=:˯Gڵ<:u ڵ5jX4k ˗.a|XEAgPQmp8 bg~#) Dde`Rv%.FLYc 8=L~t~0^?IkLC#Jw<M Vxcx@ᩯpop>ƾ—ǡwϱ tQowk-~9<,ZMEƲwOg jB`FPlPv`"9ɝyf۱1"`{NYף3kb2߫0AATA K:@.T uC 8:30$kSϞE-O-ՆZƠ}:Ceh=#3 X V<ӽA~T*U&Դ4jXM4[ww2wDSZ!xaؾ{/V~-6iNF:pSaw6:mWQc@ۺ(a03ưOۡF ohu:탯>4'O7ڴj!ff6jX1˿txzxؽ?W z }X :Nvx:,.8iqssC oo޼%hGr%%r!ՏXz-?nđs:8 A@@QGII Mwekذ!֬Y+V駟FNtRCrFn޼ ZSZ<ҥ '  Qz=cjva9e8>!? pqqtVT=͚6q05rŋApmhݺ5>{eV-[(Md  쫏ADtN!95 HNI=maB<0~=O#ϞNmqNW ZEt 2186"=1EEF(oǷqq1S;!Ĭȸ骂:5lzetz^x.jtmOvŘ2#)9'=Me:9-=; ;=p?>c@^@ݝOgXb\tw:^MʂVqM; #K@҉Psבhߦ5jAr`Y.Q-l9'u$[jplٲwl: ..'\z!H;~/VŸvCS'957+rh/éo $kcǗo(;..*~{C/g2=svh ^jnnCHH0Jqq9B6oEshui񾣨*kEd3SرnjF}+H-щMC@O8K{& *HN}=.&Q^z/Jwi4ԬtP/"[~݊iSNOmC-ǟз7j ml ]͛6w6S ._V-˖/=#!A-[`-lyi0L/=o Ob߹8ITU ̓'pq߮WlۉQ0'$auxh8hm37O#: zxzƘ5 yI~_9>[D R7 hjHHLo;w,ymv$']R)~`7 eHzZ%K'DÆ 1uT#77˗/ѹsgn .G}Fŋcرc ՃzիW1͛ iӦڵ+̙UVǎ<1b;   z=OqssAᥦo~^:wž^^سw&u:܏b::Dnbqǟih{7``{"5(*ҠEE2s8:vԤHѦUKDוWέ_m,0f9 ss4h'5:`ĸ(ole*%][̬|<9j@}aأ3}/{oLm;RO x{yӓ]rb=ѩs\pmڶ}_ s#[`ՎkP}v_TXX[zxB~N2Ԇ|'@mȅNL0Uh  Bڮ!+XŋAVv~i7ܼb:vߋ=T*Y,S9v% ԋ̰xᄊmWހ;gL;#{lxg,4m{M|)XǨ/OOdfeS@s(%[`^puJ_<X5ߙ;Nh(s^ChH0]~}7ЦUK*ROaCGjЫ{7ܺgr?ǟ)ӟA?rrs7PF=Iz3vF/]]= 4.nwvv!88ڴƊvh-Bv$']RHg_~_wYJ8%3`ᚿ%u$U*;nX?0T*^uh"=pf'A;]oj9Y Fݠ,///xx.../Wd\"MvHJLD0I ѻm-m WX !n>7Nx]֔-m1T0@Tjnj'Y IDATAeY֗*)֪oAKNaSݱϟAphg%( z=9qG 믷P}ؘݸq6pww{FAJZB)#[XTlXi,zԖǢqF6=^w;Uc1'fƔ&b`>99(,,BڡuZkZ$"80^^w_^^>rP+8X֪ڍ,[:z}f0ܟ;|+-Ʈ[ wّt9GJՈXH #% S>W#k|5&p-h4ԯ_4dN\\dߵqmt:$ԩSwC۴i7q>sH  yW]hZoauj;OFF&[5}|Q#ZyFQQ5͛5u8>oGr27n؛hݺ5ݑ|"Y᧦V-yW/_w)VW݆J劢4kR1Gʊa3!099z::ws+c\<5{~]N7#.#0S]îv~_t:~Z-y"u*4.{8jS1`Φ?`pť70Z}O_7 h w1uc>jC>TbieBe(PK0Mo*^8&LydIOIAQɺXBPjj DNw}t.k 7*]ڄYG:>1 P?..7ׯ/!n4lЩaJ5bobo߾t3AA!Hаgf<+WAzWԋp54iܨ#xxzB7 //nnnr2:wꈵ6hӦ ^v2@='WL -7Ù WaF puuEp` zۂ%Z`7M,.gOo }߸v\O vUrV;OJhjD7䃒b-T*2sKb`^kip@; ovZn䫗ڝ?,'*?2 wbeu(ѻGrЫJfyh JQ3ڌ[c&*LAAm_Z`ҹmMع{0WQJvkVч!0X8 6 u{> -(o k=ݨە vf\PbSE~*?+ZKT*1t `e'|k(_DOHvAD%ӸySy@ʡ$@C݅do%K=+[Jdw@iAFq$AAAQtbלGG\U,{OASD߫ P:S c+2^u[*MAD$߽|\yrzQQ}Pe@AAA1ak] Dgdfܹ󈋋GDD޿|*.\ЕO Ԩm3,) KʘA%)6& #OL?TxtN cǝ$l jiY"ٳHIM)8̉'q1O~Μ17o |`: eNR|j,[? YOzCFƯmtɓߧƎmzj[ 0eړ8z8q 3n6W\\2Tjd? ryVWb}am/άlygץƌA^p?qEۦcؽw/"njAYT8эLR婬[TG {Ùubmo6syNݐhΪʳ-mHzʠ5?hl'3110t8Λ[(?7nق%-٭8<)KʘA·& j@rJ ZJff 00^GrJ BM+6u:dY Ƙ 7xnii 7FLjHIMEPPP+W^D=x{_ /bGKe#΄,!:itJU_`Ou!bv\<ꉬT%%2Ά;jx^lΰ!1OOO :D\:ii vhjJj*%u = >s?nXzQ/[;v$Zl??,[$?FnDGc/av-9C0y};Əc=1=K{u]qq1اz^bi|ԋGbR1c0$0"TVwutݻ?AI;(%Br BMQQ2 aқ=*aѺ:l { [Qe}Yl݋ʱ !VG^i#fjDMvDBHm\ / *a-OuCT5=c_.\r06۷lфyP0o@[u&lކߣyӦ&/_/$''ѣXJAI}!x~aIOL_rp4LAU|~OܳQ.㓥KV_!ccX|v~ Ixm[y<=<닥A}~IѫGwٷzK|>zbQRRZ՟} 7A丱xN'M2u\M~}zs %v# iiiؿ?9jtD]̅Zc #?`ӏ?o1l(wjbמٽEZHT<41cWyj.(3{i6gXWg];%W[2 /ot y8~4Gp5<=2a ;~˖.1=>t~ u{bSbT-_aӖd#&6{(hЮM,zmHPT6d0Ξ;o׬5)r9{~ j5{g+xiPT`aİ/i;姟+s,&ru˖8Z<9ړ۞=Hutر{7BkBzF&,ovU2 !U{kP*9b z@Ie]Y#f.3u-+r,Gw҅në.6i.X}(vOfMnJO؏QHNT~Bn}GsR޼s.`,,%_~ۆ'< w77Ӥ5:4 u])}^Ĥ$Y$`= & dΞbAAT QgO‚|ܵZ-;x0K>ϱ;wBp&L~ٷٴ3e3^coc?>>:r3a"K`1 lңSXNN.c|0Vcnfvg11۹{E~eee1[Wl#SLW]g:1X^^ua0XCg`1VPP&?ҫEIIװq&~%-cQ.Kxj[q,<|?%$&ֳc#Y~AZglNFF&е;yyo/0-W8{wKX%oۜd֮cgv#:"5ֳynlILF4%-c1VRRf̚^]y͘i3sO֮cgVRR"Iёȱ,331O ʊcy٨qW~c=Ԍ2nNCٿN>"kױ3yӮc\f s 6dhkd#Sئb1œlbzs{}[L0d9919)V>cɓlvuhbaJIN!VVco}debu3A)e^Hnk{ks܈fg\^%eKo2 !G[a3&^f哣uiu)큔 [Lbu7<нVՕX9;KmuIk} J'/orTrtϘxDiOH~9mݕwVT>DUӣ5r}N#˖`Nf&@s9}0~GA]ʖƀa#ߢ}vعS1\89{c"5- Iؿ.FE!;;##[лg@2Z/"CLlݸ'M?``c Ьi\v } .8؛Sx{{aÓ˄SvmL{qy#S`̄Ie7TQ(E}zƒF_꯿G/xa{gnؾs~<1f(;r*4xr6ΦF*QOO@,8()@ٳ%m[oS㧞?Kq5EŒ0nl1}OE|B4IE`>sΞ; gc+Xi3rsL|%^л\~11:x`Ԉع{z{bzv4s ]͔G)TJ<”*CJY#R˼\7h48psrRP;4TrXRC:m?7ǟؒOicA8:  .j*&&&ARwߵx޶Mk:r[dtxgzv0{Zjbbo{VENnHKO/<==Mnuj l*}Ȓ*QwM 6>hެ<xpH,c<;i<|>5|@$֫RhN\yfu LP}[R*cjZLXV4 sda wwwիפZ&7ZZ!!HKK+3+5k4!Pq79%EPO:d:LbU ;wBƑo^h>$''˦8Ժ!Ks#GQXT!wJXx>5ѣL W<<^El`07Fvv6Vi>mA L׋'aO1 MADah;q;wƪ_m6>lw>@e />]5kZmF}lCqIRk6 7:())_1ZĤ$޻En^ ϗ,:9[;4x'hw0t =v G^={؍:Ͷݳ'Y:;wbZҕYYY(D``pF4Z4o~\\\S)Je AaaLS]ڽ]:uZ,WjjwʛڡA~~>| 'ȒQÆU ~Ô?8x0H!a >r#:'~VhZٻoѦ[Ķ;,&,PTT2\s8>0c1-"G>{RoE@Y{kk <CB~~>[^ܷ|e#aeѺl#큭|UΜ%5!l8CbRN !VG4YL춝R՞]֓r9'% UR Q}T>DUӣ5r}Nm 8̩\i "zºΰ!/w">>_7) T*#$}p :  ...գ;,ZaCk^ 6ǎ0h#ED]6XShZ*< FT* yg4&6nt4_ ǚuXiE+>_=؛71vv>\|Y<˯PRRwz wEjZZ>tO#Gܤ՚ƍ II㑿5ku" IDAT?|lܲ0 X~e鎺tOPN{(qFIo׬E}WU h4Ũ(}c>.^gϖ%[3ضc' ;~zhݪ>ZEOH %وZ ƆM[Mro FV-[HSQu>\>̟gy}+8|odfv Y=EDmϠyyysQulʊ\!T3Zk{ӵnSR ChuZ07k#iÌ8ZVG`Ǯ8X^9S"blqt%K՝t[mZf1= J'/1T*\\\R^Po= ?Qb?BH՟r 1ʻaOއjzuLc0AmY,QÇcΝȿSwO?-[(c [K1<JOV<שFl!>c O43IѽɿeDɘAA; RGDjZ.>>,|k׭GnɇKdB,X CuVX٧"Dx8&F#Nc4?Zo s^]QRR!ҝC ӟ8rP*ÇO>FwwwT*AyN/66nnnЯ=u{߬^eCk+Ͷ3j$&>2;tWH^yxp0ÃFE~XbT:xӞxv3gWapssC:'K4?yzF<l EF!j5/|s_}GII 4iwޚ/5kݚpwGhh(:w7Cxݺڹ&>(<<=yzr@ wBDxx#mY,?1ヨhn޺_ۆ_erװlŧ0n? >!ሽyko.@^댷yyhҸ1f?3SR׮@F L+OE}r|rbbcqy\Ln*K%e:l 5-)Օ5Ζ:dE\vU+Tkʯ&9B6X6ǙИATٗ={o.O(ڵ+}#mx6q3@PP>MWUqݥ眰p(8[MEoy#V>(mڷJ_y|)g_uE/7oV\|MUI_e/չP#Ae ` ʴkxz}_n0*^W??>eƀx>Qm閟'jCBHٳݭ+@xxӏ!5h;bO\@˺I/k5n.mKҿa?JO̿1:uJygӹ3w|޵+9c2G~}b@a!PsGH>,7c@BOuّXCSS5\ڧU"LRG!#J7GqFOM: )c%s}*JԟFWHK[R~!~~CAi?p,~@yq)#b%߿U~ 2AQݸu h [n.իpoK/Xt4{Od3d|UVW[ǎԩ@m`={7/S~=7{Ϟ-$1bG,~)S^G0܀ KAdmw32}:W.yy\-[JM̘8#T?HA̾/>~mɳ_1N߫С$=l15.W^(Q}KwPjׯx￟}1>` HӏW|b-.J۷FH۹Nz<8Ç_JڗBSi+?/B(k4R?g p/V>RSNpy)}&~)_)PhDMio wFGi)fbu:m[>v,?6\>ߪ`?A]h  +Vgl,? ȑ]Z|ϓO2w];8._.;@_ |r|Uٳ~>G~u:r<,/G2Oh۟NǏ0XRKяXR$}|ݻرӦs ;H_pZp/4ΰGO?c|qŒ| NW6͛c|ǸfM`~vt4|Phrⱅ~)ٗXI+u%K#c}L~L K~,5ǏK#g:no_t43GwC}̚w'$p<%K_ٷJ@|(m! -[s瀭[KO}+J_i)VD͚I= ƍm3/+ا bR ՏFߔOjp@(~gO~b/f_>椦;wZ|BS%߿U~ AQkoXnŋ|fM>Icĉ@w{x}?8p$zO)))@d$z5_}'ca;'́~㊏\\JmY$.\;JgOa1l<'|7TA} xңM;͏vT>o۶AϞƌfL}ow}m;aQheٽ/z%3탣:S2NR#1bSigYLwHϞ'^6m "%RG1"_j*_{Uob9BoJ'%|ܫ_}i4SiS~T {& ܹ@Vfe'Ryi௿/|&Ma~ZHWzˁbwSV LƏ/}zyÝq?]MUMs Q'vma=&-wX3u*[|eKq%T?)a^Ԫ%VJ [ ={l~޽|IH,0ľтB( _}AQs]4(ΐP_gZ]NipGbʧ/"8SbBB/²ib)EC|(Uo9BoJ'%|ܫ_}5R^'Rl'g|*" & ;_|W;j5ZLN.^h]--;l99 ^O31͍?ctT/W/Zd<v-y^Ju]H{ rݘR>)AN(C G7(mߔ">%%ſPT}S 'oo>iW9Ǐ+-?]Ko~}Ƙ1SO?ğKi_دX&V?#$}R¯lk@jO8ٗqgFF|SgAܣAQ8}V*Y0| %s}|6l2tj~}II"grӏRh/MJwU ߟ.5rj5Ш?Y3 `gOy'C~L] _~oƍ|EΏR{(/q#PT돵kC-1""x}Ri]dM^OHKѷt)0|8'eUCcأz{E}ӿ?߹777-}UbbX(%TMG!/OOn?NLGB#?uVe! ` n[WǶoԫtiNǎ||˖|U_|m[>gW M>+nװ!0bsww;uMUoO?bÃ}Q>(|t6lO5lW[7f p2Ӳ%A%4o\]K'e?B(~<.=W;sv[;u^~i~g#~B}I-|D?W-݅ʏ|*\[>~ɽ7ۅԫoݚo 81x -x^tRCH+Wry4-[c 1+Zǟ2Fΰʴ .XTAqOpi4lgС}߼i~?W&&#9)#4iRv*(Tk &MQQmxݳg˺(џǯիWm1RJUHz=}iD(aC}df*mJ逘^wzx{:ttqCUCnܰanEȦ4~U*RQ?*-BTWR_|*?OZ%߿RI-~9~ÁQRI=VCu+"l_ؿ}(]Rim?Aw!gN ` H$Ogd LRAJ8C1{6mؐfݞ~|ݻZm9yvr&Ɏѣ%- AT#Od]ž]y垝@1Z(C[8?Ѓ%?/k9iP Hy=/uyG ;#y#v";>J+6a #/Ύ;2bĈbz*]tQ$ٰaCn$u]~dƌ9O:=zh?a„\2zj|#|V7o޼l۶-W\qEs￟3gf4hPtp,ڑgܸq-/^cǦ??v1cF&NѣGgذaYpa墋.ʖ-[Z3jԨ̞=ȝ,]4_~y/h8p`|,Y$sO>OU?&vd̘1YbE/_Ogذak瞛h{r7fڴiygАG}ŘȜD+L0!Ǐ߫?G}4 .̺u2mڴP%G Ȁ2t7קU2gΜ\{IΝ;g̘1-kz 2$\p^}sNtIyw?Y`hgjkk۝;wfΜ9W^ 0 ?OdɒTUU孷j{\xZUk]6&M׾]dI-[O}S[:q.h6cǎMX̼yR^^#FjnCCC=;gNkn?\^ziקOVqmN޽?t͛3nܸL8Ů``hgFݻwgѢEUW]ݣG$ɦMxL4)|Az&466>l͛SO=5fkm۶e֬YٵkW󸆆e{ۤvt!555O}}}VϭLϞ=tҜ}IW^y%nfϞɓ'o{;vHUUU$I֯-^ IDAT_wy'w}wfΜN8!MMMӻwǏ?555-;N&O'ٳŏ OdI.]8x4CW^ye^z >ϦUڶnݚiӦeܹׯ_***RQQ/}KG4c7MMMpTWW_~YhQ^3kޒ%K|6Pz뭹[ܽ{ţX`8.w_y晽 B8}I2uԼ⋹ˎt|cP>}2}C?eʔ6`!(`!vdyZ3a„\}չZ_΃>~Yn~͛7/O?GXM 3nܸm/رcܶ}̘1#'Nѣ[2eJq妛nqn5*gn8?Go̚5+gyf~_5nҥ/~#X%vd̘1YbE/_Ogذak瞛h8Nuuu~oF!=XFGyŘb &dGFcڑdСonOmmAѷo#Q^ڵk:wΝ;k׮-Oo}+_vڬ_>k׮ԩS׮]I&^O~6x v6cǎMX̼yR^^#F$^z~C>}zBe˖N[nwߝ޽{vvfȑٽ{w-Z\uU)+kd466۷oϸq=?< __$I}̛7/zjf͚^{-۶mˬYk׮#yj%`hg:t萚ק>wq.ٳ+W|C6y衇$;vHUUU$I֯_wy'w}wfΜN84 ͔)Sx]ve֭ٶm[b6nܘ:G]3~C_]]=z'?I444'lkjjZwL<9?y@ W^ye^z >Gpf-{Y`Avޝ}ӦMy'oOϘ1cҹs%0#7ln[xqƎUV5m߾=3f /Xc2dH~̝;7ݻwos'?~OL2%v[***R^^⼒[oeԨQӧO ͠Ary}x4#cƌɊ+|涧~:Æ Kee^=߿Es=>>h.\ueڴi- <8G$.ׯ<]weȑY`Ag_@;2` :4mmsNtIyw۴1`wVccczu]ٸqc6lp8mwܙ9sT-Y$˖-˧>6˅^>ʹ-aVʎ;gȐ!93re ~;رcS,3o޼gĈ͛3nܸL8Ůq饗j}}ic޾߾}{=^jUsW_z6=>`hgFݻwgѢEUW]#!uuu>|x6X,qw$ӡCt%gsϵUv@;ӡCԤ>;j~SSS;?p6{L[v@;27Eŋ3v4666m߾=3fĉ3z?ꫯF;lܸŘo=Fٳǔ)Sҷoߌ7.7t^oV9YdIo6JsߟѣG緿mf͚3<3կ>ShЎ3&+V˛۞~ 6,{?sӿm/r~GK/;wo1f>2'quuuY~}x}u]9rd,X?~;o~7c=QFGhЎ 0 CM}}}s[}}}jkk[FYYY;wNSSSy8 }1O=Tlܸ16lkL׮][<sڵX֦>7|svܙ9sdĉF}}}f͚sN6ɓ7}TUUVʎ;G?Qry__ӧO$ɷ|_ڵk~]6SN= 03;vlb͛1hll̶m۲}TTTxK>=۷oO=VJccc|_ONnR(+W>СC?"8 +K/eիWر#7tS<e];V{pf̘r-YfM***2rI&eҥLeee B$I.]2mڴw}4hPN>l޼9N]hh훦[paSYYd#W\ueƍj~dɒ,_ncǦX,SNɯ֭[:o>gv1xf̜93WΠAr5פSN-Ƽ{Y`Avޝo~3fNȧ?*]ڑgܸq-/^cǦm1cF&NѣGsK/Žn5*gnh̒%Kr=Dl?eʔ7ƍM7ݴ>Æ … }/]tQ30#cƌɊ+|涧~:Æ Kee^=b &dGU>@&vd:thS[[{P͛3nܸL8ŮАd;3uuu9Ӛwv@;3r޽;-J}}}ꪔg}6˩Yf^˶m2k֬ڵk+ill`444.ÇϽy=zHlڴ)/ΤIgϞu|Re03:tHMMMS__;swؑ|+_I_>N̜93'pa6{L|xzui3[nʹi2w/ȗ13fHEEEn喬Y&9rdseÆ ߿~; P߾}tq .Luuu*++뭷[oݫ}ɒ%Y|aywbcƎcE_OϞ=ӯ_.e0SWW/O2 DJDx0#[n/~=:ݺuKY|yVqE%I:v옹st{wO4)>#]Z;W_O~Jշ37p;Ƽym۶̜93WΠAr5פSNGhk`hG֮]k6of$1cFLe˖jy7RSSsHֿhѢl޼Mk?_j,XWܹs3lذ6=hǎҥ[ҥS]]3dȐ <8wי;wnw~jpڴiS{ス;P_>[n=wb+Wf۶mmۤ?ԔА+WX,ׯ5vܙ+V?Ŭ[.k֬o}ҚuZoyNb1&L{G… nݺL6G1k֬/%gyf lܸo~;/~*++>;wlSO=5gޫ_rV_SS˿̰argO̫ڪ^~ 0 FgڵK.MUUUyWSUU|{k1nܹ}xbo1zkVXG 4gUVn(VʪUrM7_2K,ɚ5k{?n;WSS5k^ի+hѢ矟7|3oF̙]&ikMzdYjU>Ϥi5֮]I&k_.Y$˖-˧>VGڡM6eƍٸqcݝNfΜ[V^UV媫̙3;wO? &$IzB%\'I>Ofill̂ r%gJ 0 }=W\_$IڼGi֭[v#FdI92z~ӣG,]UO]W_͋/Ɯvi=;Ƈv[Cm޼9ƍĉ[ .ĉ3p$ɔ)S2eʔ${Bo69S,ӯ_~eݺu%ooqlٲ%oK/4'xb*++jժVRIr'X֫W;wn~_p[\[n%;6={LCCCvժ']wݕ?g?g3uto…P_k|Xf޼y?̚5+ZmۖYf/NNyr]]]{)`8TUUX,fƌHN:X,X,P(OLPӿ${BֆC͖-[ _}-;$K.s=5F/| u;䓳rʼ;5o B B}${ߧzjX`Avؑd;{nm?̟?s]w: ' 6v6o޼P+ iIDATc1+x: 0 -t%3~t5'Onu;/2pTWW笳j~gok5*:uʐ!CR[[իW7 m?BGy$&MgOzy^믿>guV훇zs[so߾{3r :4ƍW_}P?L~rySNɅ^b]é5>O*?xo*555plݺ5ӦMܹsӯ_TTT"_җd}(]p 2-^Ez7f9Sҵk׶*[o={>766f婮N.]o~:+K.͙goù~6lȆ r駧M6e͚59Qɑaڢ~(E `8V+z:bWWW;v`wٲe79קO >{왞={0mQ?*0pܹkr52ڜw0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@0@$K/8uqm9GJ@a-ţ]#JDJ8m`C}{.88潻nUZhG}G lْ-[62H{t<bY]p9N1ݘޙbд# e)vB:vK:'eZra-C ](Bub)ݐM)6]o4_郎tiں!V<ݻLI)u]8mߺ96w#~)PlLi{;=:wNaM^'ع1sЩS;}㡋IZW ]2+^_z#|)yoiGᅵn9~Ag/дc ;|E˦Mvq;S,NvzXhLДbaw )ʒLY{w`סctZ~#~)P(K:c'i;IR֩2=wkS:BVǼA]p9N,ŲN)uM!i|oI:)rZ eݓbcvw8!+M gX5)8uZn~#|)B:vKեRpB eIY &:'{浂8ِێvq;:%SZB~B:&BkV=T\B)I=[h0p{wʼң]p9NaO\#'IS eMR3:N=R(tn;{2w IY$ ΏlihhH's'eBYZ7%ajl'͑9sOĄH BP( BP(ZqwA BP( BPocH  BP( BP>u|P( BP( BqP1l9eP( BP( B?BP( BP( CTPBP( BP(P( BP( ⠢BP( BP(%>( BP( B8(AP( BP( AE BP( BP(*.@c]+Y spP2BE,\A.ط]8B BP(Cˑ%>ľ'7k$s wP2?qףLY](Fe#;TQcBP( s.Mq~u7pCI{yU<"z O.ɫvtz2|P( BP(?6C|Y) Y[%Tb+Ԗ/Rp7vzu=߬jb- 0+l;~E~_EpW0b{NK]kSߗ2;Ц;|k!ylBo?>a "~<ȇ`o\ܩ T \9lk9۞2ȧ?@P( B?!ige MQ:<›+?uU,Vk-N?: #픑񣞤:+p=EU!LN\H$\]J|3{ys̙jBƏyQ aԝ5D79O`D{p7BP( BPr,9&T. it^>çrt>Zg #9,^}\Gl^ǟzpi=cD9*:Nfm~y#, B#eȅtFO}6@9/c7Cqt>xk_J[UqQi٧Q BP( BpIg]{N95?`s$|dOpcwciWxz `y u|c%v#7sgPbW2ybGp6J, ڴDS( BP( q̙wX S7f'gOBL3而o,<,ض:BP( BgXc i?/fM7Sҫ[grtIsdH'nr>q׵72@>s7 ϝO^Ny}q^-{Cn9MOέ%;=9ms猦]S@7F1l[w7Ǐg.%}o:;BgדZ-~nc]wS'129ra݃v1gBmד>c?;Ufet؁vwu]˩aՔGSһm:ЮH&\t;=m;n3O8}zѶ]z3˸_SXUӌ"f7]x"m:/<}pO{qօ܂9a秷8BBAx+s>|V.;$ KvkO~A ?g{,,;J#+ BP(9GeX3ySѤbroY7x@A-QJۗ6al=Z3>e& 93;a~ʓml%[_cEmûicl_=ɯ-Ͽ}\E)'5lXZAF1rj^r,j3+g=[_DFTe>wlRkظ+6.ƛ#o :597Wū+4,yB,]GiWqBֹ'Nѧ~ 2bVuK`+~oa>6m<̊&hM1k|_OrO]P( BPu⃽u:.>-uz*&9"yǼBcEL<||A ;;#-ShdNS?|<4ڷd`lOo01cAE ͟Z9ŵ5>sYbI>=خ-Aͽ^@ӷDضn%VAd|)=O)%+ݷr]-/su]to$Rw.@epF(N՛l6%";Iې΢[T_Ӟ傂;ܙ1'e`gjٸ|=miӲ&G~zͩr[&T16M+:{4Sxh?LN܎?Pe>ΪJ |nm-›&/OǀxcUl]KpŁYK+H!g\thz'#eŽo+H-$t)CNEdMcΖZxPL;BP( G%>8ż{Axg^S Dx5\wݜY|&\}p3jxr?Vb+ܣN\$ Zr u׻2vT>[ 䬑 WM|?S^KL;>f/<}72 Px cg5ի=#Ǹ ug.z_?pRv_'.?Mvs'4D|_~6M0ڝ?z˺_q\EE˾/{>ȓifswЛ^䍻ǐ]\w̛`[?k&Ha; n?$9s%i J>ݼUh"Ng=kɻ\qpIp&~Y;~xwi;D#'?{G9S,]/=ev>w5y./CP( Bce踻o{֖^u?4|;n~{JآObWk.};^,p*k`nIR;Ss[69{8ni`1k.)̜]oNgi/Y[hv828$,ȨIIٯ3ӯƔqkMrH/ x?Otyg Z6gv@2ⶻXlsV{}l=xw5\ᾛF 3@.M➛'.,v3gmMoW#.<hZA2*j]h[C+šoxh7qe+k*ƈBP( B(Z0%9MHk5?9'p1^`vsX1yBK焟L"E3{ȉIŪRE)xz3vD6f͡n#0c@b3#c!6Sy+90^Wpש#s*gsFۦbtʐ4 pXmJl.a Zqw|z` ޸Uk5s| y=wyCAd7p")5k75lO]a?ghs<𞈌tHIp<,Y!?{D3_0+k?$үBP( hCn^{Z$ JWSMirz},W^gVY!?{Gk;q]uf0>Q~ BP(G) 8Wr!x*-^ZcorO5UXl5f- @/ؤPwq6hL > JQ=649D ʜiS#AK -,jv8p/O{KLuY2Z?"YIYeK/#_>啴j~hI Id4BLJJ#nr(iZsˀ_u\J"[BޠӸdR">5fZ˹7iWuhJv9%$Eu BP(CQpR|{zsq>&hdv?4E\zz{ޞ[F~o 6`GJ֨F gbYiK1' c.*qD2'Cb^&5vQ53/&vh%|MQRH="~uB{t!4 )k:+ز5N/~}$2~Cп.cz֝͐BHV-~1a-/p}<]5"ʆRY[L{̿K^~y^x[b/z]!=w5O<'1L0~O( BP( G RJ !FpƎlyG"c/>ίuV1;dRk;_[z8Xd7Xi ϪzHΉcS@OE+)f:ˬ X5imɌ=u<4B eyv|L24b;DTM cJdehqj ьM40v]&:]KGbkefwるG kfkP+Ap2[:]O;\G?fʧk0'O~pF9:>2?^ iiL_a?2 8_؏CdsqλOx3ݬ/Xt?N cz%EqA'FDndkMcocC?667! v) BP(~YH"9-nwx]ʕ$"{t)o٬`.^7Fs(GƐ;g֪TA`?2'8vX|<3oaУWM[a>t5w|&2LEesf{BWrR?|&W*/eIr= ^\^ӴA`q<.媑I$'WXd'<+6nDQb gcNLg~鮲Es_g }φG<BP( ŏ#k>>Wb.:K.F]0YeS:UFN<]IOp6,w ӹwWDƉ\qN[>|n vI%Gp@9"Zzgp\4Hy)eCOFzrIi6Rh'pI.uw'h?m*^zX&CDo.>KzO_sț8N;m3IJY>#r 6lj1Ͼz*gӆ̐ի NM6PmK~^{*BP( ^8E6g\`l_/`;}}7_W> lɛ~siy`!Ϩ/k0aϸ7u]2$ Ax9Sˮ!grfyd"4 8>7Fds䙄kT1J}k˾mW:iKE}N]V:-d+ kױ>j)#/5ӪdX=}= KNwʑ{Zo<9._k+Ax /w2Z̚r&xЕ>F2Id=M%0}Oyxn?;1'Vgrs;pg۟Ul*!*=$dӾ[/83paRWWȫ}˼)1kי0Drg0&W8:Lt1ǝO cڴJ挵:]o버7?$dwWs6BP( BODe#,jBP('?aVXo7HS BP(GGVIBP( BP(s(AP( BP( AE BP( BP(*-r) U8umU-vmjWզ#'@kBP( ы|P( BP( BqPQ. BP( BPTBP( BP(pAP( BP( 0" * BP( BP( ñ]BP( BP(è BP( BP(*J|P( BP( BqPQBP( BP( BP( BPT P. BP( BP< =( BP( B8J{P( BP( Bq0Q1 BP( BPTP( BP( b(FJI,#ŶD#A4MIILL"8Eg՚_۲:t0^poA7ʴ.]F4ÑHt{!," $''"z* E3RdsBP(CT!P="o^$ @`AY4SvE!5o9]ILL:Ĉ1[>~Ow5qXGХMA8|LϘG$SЦ N$'+ ]?4CRJjCQ|I88!E#\rYK4c"@MM% HMHHL$%5X$:uh=EWեhk74r_Ln^a!jj87p)%RJG97b9P^V9s(.ځkJbb"``0@׮?` /''+n];*z21KOfUAEy o%5yu iK?S2cjEӎa=.mOnjƦwUr1|!r)x @(q`Y 3d^Aj^kAnff?SL2R| B8P6!*E/'Zp퓩#WmىһwK𤞊0 m5t8@#V?~ҍrdv%yQY]/}FIMS4pcI_,೹:Y<§| ۵u+#0p ۷o!#E 8;Ru+=z#jrGc1V݀#᳏?ƶMI'L$+3$ @ FB!κuܩ LvFG׏K=[e1nf害;3{g#9)YxڊD~(O3uG?FwHfɖ('>/k>)<<}l˺2`33ׄxtY l)&X㸂t$oDJ@q?#wH!W!Big]$&0-)%>~u $߀N-ok{Ìƈnۈq%D?Zj:zb2,NĴ,/Jj2rͤpG%[v@.xBP(LT8"ZAt]`%5U$PUSMz^^+ i$!h 6 MtO zf$iJ ?b["Y;Ws (=\p|qCz?+O= qxgB^d6MN,um;}<!ZO,mSZRB$q.%јiڇ]KUe8W(fM0o\L~}n;1'&F^~>aȶm̛Ӿb |7L?VZK=[Ǒ|C }k;e^m'.bކ Jk \e/&P0MDw7zrl=_Iyr$`ކ^Rm qy6?Cd i:5ae9؎m9X8Xcۮ8aۚm;8Ė9[]!c&Ѕ@ MMhh.iiFˀ^(,*!59m%5XMnmXf'x{0H𚥈uIHJ$5-Zl N ]l>񒜔@vF*D)f"42R;^P(#o}KD;S{XIimHͺe $A2 R!ݕu)^{xE`M'AS~6%7Aoʊj!~XS1:0OgF?+No⽯Ƨs5?;}H=:&q׽_v ժu4@Mw0*4$e 8/[iZVZgANHMDxU[KΝcѢ|t?)'N8.E?z1yr"rrR[">U&~}_.lq\<&Mʶm@lrZ;|:3wc\ܖtO n<_ny\3ZGBq`h ?'^h-WTl `;2vcw4lB@As]NA\M04 GGtqDyH)uYB!v*9I~_~> "!Rg'TzmE+Ꚇa$%J'dR; c{q%mgP(=~⃴ʿD z>c^ORyl_c`_iY ! 3ɸ`Ev_e!ҎHlGԧز 6`"Fȟ{y/?=bH+;w/h`_%Q:;M|+ۑIJkLqgҾ}GS]fqusg2A:H9b8))|?{&#Ges;f⏪(yH f~\7%zRcZn|e+%5<=CǶ+ztYm !4zt| ۑ<&X^ˤ1wE;b--QzWXP_3oc/iK=#]ר*ڂ ĵ{@`AbT+wt \~ 3PB 5AU_C7 ؎C8CCbx ZG2nݎnxY8>7v:RW v 5`Bް6nлOkjXj%ITUՐy8ɋarEU&^`1mށt.I#(/}/R0e<9P&=c;377EBb-Q{Z KB Nۍb\M'd ?(5-l B6/# Y8;s79?|v3'bf/O{{ݹ6>Ws :;Jw"2%+7GX6"@؄ h>': iGFKuuh[ Y+E4MHR( žqv-F#wno]a144U%NY>l^;uu›\a ņ HA'.>].vd "!g4mm5Z" .yr?ƛgGE}kΙ4$|799iδE<̙F%&2dp~pms[y2n W;$☍\.Ő#D{m[[niێtOġnpm/l*t㩴kWfrzLSKxJL!=㻱6%NԢ6m7 ZfWvf a;ڧ?+$͉[Ld+23g$Q4cK&#!) ?X:nʢQTcqkY%ߞKGM*B.& {?}De)n)%Vۏ+돢WmEDPtzA0pАhؚKHjChAu GuݭC~"t+ZF^-I6̥*$ GL{( x6bbx%lt|3Fs7bT_I^ oJ6ª[+DAlJ|Qx![Gȸ8"SAA$dwS忠4e 9ymMUa(g7u[j*#8nxO'sx ՙӎɖ*LУ悫~O?/@jPQݖ>TnhNc GB'm%nFrr 7gIbQNktÇ53p4d'5#Uˊn~%[HG4rGucGs[)H⎻]8dA|w C3pάj!qꉋ-AYV$LψYօMo-TBY%#q-%5bxi>bK3(d-[ѥa>|~$&$ݲ|"Ak[&!/ OPBPH0A%R_8:$G B1plL"MrzmP7)aD u[te(Dr\ K@:~\Cmps+ARS0FF:q+ LP0|P/ /~W߻3ț8^t6zwa˜!l^e;h KrS1GM]Nx4 Xv!Ķd<8n<߹֬@Zz&w"'/Ĥ"BfuC\c7*]U{,8z:3-Ų%-ɥT- Cc Ҏg}@s]LGKynF%$=ؔMmT4-j l4ֹN45L ۣɽ0&7%36QVkqX%ӑ{&`'otoFoe,'rY9;(CWh9<.X篑T _q2H #a#(=D- aL*) رy ^%1Grr2id}"~kj yHecmX@,2GMU1  ǀql4ijAd)* `*&8&BF!m:ecGj4)$g.d/ҫwx5]vC\|Ag= <ǂD@d/7],ć:"&3tn9xîՆe+031/eKIIm(ܬt%r{f*\u qdz׳ `sQ91N 7HGڞ31d`׷neV:h(N|ZpvxuA7[9eYF$%%xqMČ7\w1h@0Mx6aP7?No٥Çcӎ21k;adxx0/8sˈD$$>cIʾHHIQ B?)>Q)[EKV݉Y3HtY:^MME!f'n:JX) v]ŕXUpOa@%׻&N3Z<ư=D!|=D*X@V4~plY9r{!;Q)/-gio!C09$ A0KNQY5_,gsVѫ[zwqWf&Ƴ2};L8T.8,ztl߿Rb۶+<4By4,vpSߐGBR4M}x\ÃtlTZ[r×ҨzNL[y!GgЀjcd{ÑTl MB2#+2 Dq)nDĢQBTQ^\LͶ̈́*±dkIr0TTz>۩Z2ψcBPsԉz.Mch 8ۡ9HX ?ہ\Jӝ%k&8[ ČqF3/~ǎBd N{z]2 ܘr׺BZ%$xS$+@vNn또gPD5jJxW=E0m jCϣOq jBQ1JFr1HNO1};;n,{:u!?/ult7Sč`n(X`uE^_" I) \.oaM|) ƖNHLh 9o 5A *12S';,HЎ,UHe)3IEbZ̘{}D#Luy%mq\P85}Kq jjD2QU!o&U۞(㕐}3Ts''=_2 UC:aVV<+>@ aФn[;~RLB_3&`ԤOݘdQ\񡩄H> ҁڧ\TqQm@W ^H8cw7}7W HSG&1!qneBC` uXMeuJZIiŷ Vjv58nƒ!%^a;$.Vz7 p-atplښrA:B!|>?߲LM,qɺ9)l/55l􆣋! . { ;/LN#&ap Rl1-y~N ^MuF7{en HHP¶Rڈ$gb@ ȴEi7#%Ǧ<"IJK]N0`h>ط܄6:2(Vgpqi\9a4dz5tܠ;|If{0+ѩxm|yq hk$v41ϰc877>'nnbEiY6 V 𐝙O?`7$.:{"ǍIAA^]Ӹqp?6e!eD5UBblˢ)n[nvE; ۶BеKG*[GS疒֭[2x!MPd"~@JB꾊urM$fB:DRrb+Yso~Ip$o0hHDjkF${B:i;H S(:Qu}z-܋@mqqıoHrCr@(#j98BYfBrf#F$x6~^Œ1;%C?Yav5/ٔ{]byfrV)׎Iu 𗿤ˠ` g1fp ƎeؕW$Mrˎ똗Ģ"2v ¶!BKL+bx+S[V72_47wL+A)wpL^Z;ǁ{GϪ[c_iŢMo1mx6O,GțYN|\SE.ct'q6ԽA'!TMF4'@܌QN2#n`nǑXp,ێυftc=p]@ډ`qc6> {]ρsaR~ %]zcA!#+J9mcİaDI5nvfk;&Ưْ}iٮpM361n2ɞ/}Fthu JCL^aѶ"~Fc:[˖,HСsC= rHMN >TDc1`-BIcthx^ҳrн;5TlYERp; I~bGQ0M /ZM-8mѵ`a"]0?}_?v299Ǵ @=:ƾs= ˌ6zDd3?E9Os0g]Nd 70B `q ҎkCN@h9O*n!GTx&`ҥQG2y? oJ~m-sRJ  _Q {2IC^7 UCí F`ِ|"d)yޔ6MqHA1+x2/BӑBOg3/qu,**SS6|m? %;3 vfƏyef8 P.LIJJҘ@uy)hFuMltMP2^/&M!u ~r)NbR"'r2[mA~ڟKHL$77['=P&!HL~܉WC'kJ._Ԇ&i8,X =X $/ΫojM>3i$4 )50u}+&H72)r3N.E3ki$MiJ"X۴-GWTAAlXYmSd7alssky6*)dNvZncKYwi?QqӖ$tdt`^7l+nd2ރXAJɪgַ/-[86QO-}F+nए>Ÿ}`A;4C0*yt޲NJI4# I:ۣ׾ddӦs'uHtvhhiX kc(fImdu^RS456~+7˫G1 >ywtt>߷Q2-k "*s'2`[4kK ȂwA8!w*{㕐*k trFRpV$sr:y) 9XD#E$XR"|CGHW|pd -~pX%^ ̪1)GDܹ^ztFvi)!VE& _O糎OYz= {ֳ6;lQClڼ O鎏8b`HJql{H?MfrQ)67:g K0녬^I̗IC9p7cY"S.eBW43syu\sJ7hXrK}7y?f*jHO;wQi73p9k6m2HL@~qcwqBv Ŋ΋'߱dcV9z2rҙ\r奜%шl{ӆwQg tݭ򦁜Rw7w>g ? gϫu쨌 dH>{rz({855hNxt#ޑ#= ߢl{ b٠45$'!B@9P/KYPܙ6.~to˧v]l- yC{""H~{x%$st9@uu i8k~r3ؼa#[7j08{*I]pGUl޸Hn)D>>=BPY^JFr&w7ae8Q Bu4Mi:>aqFiKozֵ Y 1{&?d6l 0 txt0D utC G6UPS CC?sX)%|)mS(:8@l?mSYQ-XOKKpFமcjnMNJWii B3e7S1l\y5h 4zΦd[ +|>ʒKytuZj_ˑ<=vF 0íoÂ֩ {.*\@96 b_D{ ĎZDJj'ssOs̖x籗b@F͛W=G;F'0&Jsd873=q򭫨 Lnx)cQUÔQ;fOq}/U;Tɸ:-V,`QZu[Tn^%ՒY}6cW`xp6r.gŬ:[O7u8j +UWiE N-̢/^fїo9'.{|fw˸YdvJmj*le֜؀>v_{xl>v JX;cW^g~36V oq)+H EkyLy ~-:)>w_pZ=}+.DUHZRԉNb[k,0pr.Fg4O^8b1"HOL*b]O3ZK/5JhR|HpdQ*)4ėK4Q< gΦ* >pԔTZ}"4 c5ް+Νg3f}!`yGJt]'%%پ*ƍ 2bPOr[M)l] ?nh}Ӵߺ;&!!a*q, ׋i>pr(Pv +ncv-U¨y)ލ]z2s; 9 ^/ZK#l1`-TA)}FnN&y7j=C䉙AUK̑K!O,˦j q83DI<-v_hx'ۅ@88:@Zt5āf`{:uLK¢;LzkF%sݘ$=6!xrn6hT:6Bn F.?ڠu*eWIcf-7:2\0h5EEt{:wL4ڱXZ" Rݍ»&}R 8U'ر(?~= Mwdf[l3-T:T|>_ !0^RS@('%Ńck`"lOZ$x#_~xybBK?ǓޢnbރpCJlt:ƒ/rE?E]n"M$}oztjCvJUMOxy숆X=~&ϜRˠIۙixUl]I!o$NՃNmrH#_ygD%?m{װa|:#|Oz?;ǎe[# 4c/|ɲZw-a&O欵Qcq^5ێNex\e-W0oцh{D/OkpAqљ# kf2o秝{`elv6̏q7e /AYW4`.Gۑ{ 똂Yikk xٕ^^G&gq~Hƣل+62Onc-Nr a3q_],~޴hsGBE Nu].hAjQބf7\p;"\]O.va\%.T=U=:¤ܧ#i^8vltMw |vSn M-G8 km`ݺ 8Ȩi^D"a6K=))!*%D O )}נKA:7x=cz0wΎ퐐 InI }炵eB4 k  B0,3gq:/jy'HᎳؼCTC;Z[k+si)%H7]'x/]] \+qo1*q"!H6I{ eI e|6f,[5EaŋK[nݝJv,n%50Cr/R*!x>ܔa/S+t>ڵD[I]&YZKȿQ}I}skS>“vpϋ Cj(*`u&Q&҈&di5M յz~)9u|K۶"+!5CuUࠋwߝ=3kXܭ= ?9+ı%rӿe_7#2vnf<3.kOpҷF%NSsys/*׾괟T#OpQs֗D*'93i/o/&*F>]՗ws|2>]w\^< 7ԧߡ.ac{6rp͝º%d9s`Mh^01Wq1)拏 R_N*Hي)#٬{NS#9Wxcɮ?Ut\u -g׾91cx-ʝ2s°&533'.^6;A4%;ܭJK[CY1N3g|Dze˹KYf#WjU=[uR: ŴFc*LRI"k!Ve"Y^Y, ``+&VTM  ,8'ks[!VĪ*w7rٳGI+,D8Nٓ/ڔ_5ܓ?_E_hۉ dwҿZ3`xo&dl-PjQb `3KLu)p]-"kf.hQ̞4 R"Ѷ6g):0fS=8fꄈJwaGCEF0B8䮀ZvމoM}vkv.uDc`ZuP^e6Q 0HF6۶yݯy٭#ϾuFc膁Z`ǃEu+Vo 3ۻWmF8m7>4tmݗAD"IS'c}ctàgJ Hl:q`핝 Я;=f:]Ürd@gHР ʋ4>A(h-d%pHժyyޭ :Mlؼݗ%=1a r{GxX ^]M$t$ŰI5lR $Amd$6f6Z2|&"j"&N8b"5~Mݪ&uG*]rsKͯKˮ?Oר~]YP'kTS7Ɏu&YNv㶊YMj86)MW!I<r{ }N .^~Ғtt04ᦓ-_[N9:ut^f&]CS'jJ:0SUsXU+nd6D"E|̮ HuLXy-VYPMQ̳l{ =tvv*Bo,5:&5CX۶\KV?maEؚ0mؑG[l8.ozK_N"G{(r9L;מ[ӎ y;ysJ(+itwޖQ4ԯd_=! ݹHMW2!)+-D8NA .Pu|7[6vIB[2ٵ w3cF Ll_gG`XG0>#}\)ngIOCAdG9c\붶K>`VϦW >adS>6nt?Pb9ac;ca->}݌}^=U6~b~@k ;-)4[jh40R ߃SK92#ލJuFk8HFm?eaۏ[$-ǗUEu3IH5`BʠstE· ox<>Zc7ܹ9u.ҋIM)Ue!N]&S g[|_K)3<Iɬ\P X౧n=ekXq wbP^,\~z~_J G:G @ԕ gcmM0ŅtD´,AW۹zF\>?9Δ)`&`ɩTT*u>PaHJJO>櫯sg1d``B I^5 ә't؎k~3V^?1EnƍiL8X st{/o^`/|!*z$) C u4 R}ܠR ~H@Є.3K$}/e<ln[> ] I|wtk %{|H)ݟk_hY$4I >TC|3f^&dJfJ 0fmMx^ZYa1C[U1<04<ڰC8OS wA\n(@@CI =>N cdsPκqBhhNN'8;6DB ͝['HG4_htNQ C ٯ1r9Yl_Q \>IJs14Atz4>`SiFXy}b$biI|pdž?;1~ܙXH NӾ^gh'sh'^|/O"p3m}Jʝnrĩ X5Q_!ROuL|5J&p!h$ l@_?R288AJ+*IH"%!!oRWmP$F X-0~tC@Z?|2-*{Yc̽խ2@“<D%N{ܾOnztжSNI;9ԲJ%?'>F:T8r}[>{ۍ+_BϔC/PQ^AK=\'O\mpʧlj:h1Om6P=c(ᔗR ǢIK$kXQ"vKOfb~/XrrWI*1EWsb%kt#A6R^]UHtDpvzS'>88F[<'ޡSzOMOӡ" uڎ+SfΗؿɉ^WiZqA[4܀ MRw7usMұ-Ce0 >FߵzHOO([,5]'$1(8hb k\0 xڣnlnX I0Z!KH1v5M=XK\]NxhpJ]{07YfL?а;lki~~ Es1bbTQa IIJLq[nY$|mwGd&sfm<ݯ "$zϺN@4Dyke?lm}<:,5H=ĬisHqy\[|3[{}3MH}'yKsKO 0-xOyȌC\W? /n{O^/ 1Lw+-”WD0GYkN=/8xUoˀz2tNI K=(1A!:=1 F1ƒ HJCO ߆nTPP9t}v:OAA>^MZ —1THʂ`+84~0B˦x݃٦xEl+@h]N#a%CD ª+Skp=,:{l"99ą͍Po c x ))ɄBnևv VMdcv:wh=Yt{]|}F:;!HN xF{m(w Y ײ,D|8FEEy}6 !qM̚3­ N$ TcSiPkj>ųo-ߎ/H_$c9#x9.n MĄݗRRSS4p/{  )7'»X4~kqW k ^"2}u , A_#'HNI ڤHG–k&x`h.i`k6lt;}wg!@w'AA:Cs|p5dԮb6G4ɶ]/:Y=~*b8v}㏧i5@IT5DĴV #<l<$1 %xI EIܺo6ť;=Ԕ z"q#"xtݵ5LV"?痓K˰s8ӲrҠЖTR{&-3}~LBty \ۃ!o<8;m+v2{0 JtP:@GqX-m)-*-вސuCxɶ#ݝG}# {ѹ^ɝ{rkBUp;,^ȶu0p@Jgs˭}*l }ȋoa8f|l]*n)ӆ| obcJgW+w:FNIK?^7XYNt۫\2pz~_KI䕎C4[}8Ǔ?Y8GO8ڨ/`1bx:Bp !y%'sjƱaٿx=$M}8cSq9s7+Qꖺ<"McDQ,LN2JL$y..n ";; c3l ~/.UxbvcYXxo4o,7=BU"aINذE!m¨񱊋KIF][[KII U'&YqSS?c&+7ºa{ mU +<|딃)t/eĈ '33 : dΜ٬X33nXb(Pg{UkpR~ogs&^dLjm'װ` !쐋EVEg&.t䅨i&8iRcw}V1r'E$B31e6Lc0V]XFre%(J1 => 7Qc6*i&&\Ld%_쐋A>1`p}lXg*8 ~ob @ #kIDAT%%̣l^%D[;ïSx^yOZ^O}X,NyU586|jWϽ0͙9h6p.iL4%Qo;'Q00mj 9U\8`35}x,6p>ǭ1Әl9'~8|wr|͑L<j>|>1W1'ףs `)?_o ]/r3;< ʹA b|۝^ "p78uN⵼3l98>Xk$&D=ڧ>'J-5,C:sd-}4%cY /M0"0/˚.X}qll &Cee% /# sr'~iӦ1 8q?/qr5vS;>:rPL7ɛI uoq8ED? g!nBȃPR91]sܶɷk3eX<^7m#K p+\].WCP5++Gr8bKuC>,x,Mۈ&cզ]ǩ w SMVjVu+b$vu`L 3v49Y~b555D"0vx@Adff0j(nO#uօ=<8ãϬ$qY󙛛7j:W!qD\8D>yCOm#Ő΂SdDU޻nz'ߑV w`n9:Y49z<䐃Z+7ÀLr'B*?l}pAP yXvh*{%?cCI.2~Y1j|/[ Xυаu7!-68d!`_`i0 z0L:E<85>\Ny5~~w\ /T=rٴe'iɐŐFEF(#q,vbVfNCހl^O~uI<gێ" dq|a?Ԇ#]nD8c!k$z-A2C븒z|͏#77x0i}l,˅rzhyb ` @(Jvh]iS o"hff]-UeG5vVmW :zfR&߲[Y!cy0~1cF10okW0 jkk2d0le3]ksۮ&빧|)n dGͣj݌4 Zd-2U?Qwo$z* V] f-DD+wuCڬ'ypܺ^VAIfNA ʢ85D]da$gǝx _\~Gղ0MN/eSq YpD_ ݑ .?b2>*-,{__͂޺ۆ>\pN9%*ԟC Jaۉ'c1X '#f8N,C,STAr~ɒkxqvr_ۅ9i6aq9CݿGdq'ryl*wmJ֯89Ic0HL`ct:ygNc]1wVup1U-k1ә5 {~i:@=L'nN}^&c,[l;(,.gl&+N-/%d'&cu.Ⱦ3&f~;c'1hKèm7E4%AXϖ})}09я~XiD"\ɁeL2,6l@yy9yycIvFXVd1L];)# #K=Vviz4lH*'Qnxh^zk3|iB0L&OD4eǶ|Ё|">l%|2ML[>IS=sVll/jlYUAf8erTF$4\aó, *J#9Cu&s0y,P7dj9)=o$~W L]@]IЗX"+h2(fbd H,Sbi0dЛlZ /`=:ɕHϞBzTt;r4IXbո,x/1 YZl#,f]b 6`(ƨ wQTXX7ކM=6BA7*,Ho/iZFmiv6{:/Cs?g6qyX|$\zc-f.G]{=S>T&'ŊY F0N(Cr' 1**bg `┯Y[3s`r.ElU[`Dz` ];-=/?zi yopaGq `Wy'2\Nkyss8g Zxf3g{Z,Kh/7'R;g2NednWWwt|#wk*7r" mvbzSm+?7;w̾{e=?p3^au3m0G0v5r+5"bUTSYTpm-ϘXs몦Qq9O&wN-Qcǧ0eҿKJѴU kat8ș{.Ŏş>.79^N"Ó | 3غ euq濯Bqù{bպM ˪rxN˘qOWg!πd̞2ek6SV^I,@ԅ1N4zϑ1\@abQ X@|KrX=Y> >#THnYN3`d8ӯ'2|H>cK%Ju_YMz>ƍL<ӕ8b MdԆülVodƤ = l>pu qb8X9t 'ߟyټjN #F"#=˳XWe-2{2$rma'VFoqK.ZwLv?::烙Hfz>$F_cXx Fg<2BNp LF=zJd$CdLZ\..޿_ZzEapE|1a] ؆A0İ#Jb]k\Cp8Ѻ a{(os{d dǾ|Xbn*n;OÆ'*jj(G(.&nW۵eT.|n _C+NS۰kx8 qG 3cCM# t)Bٯ8xaeb=_/wϼ e3dlp|Dft )Ⱦ?y{(Ͽ[Kx|~By :#F3nwnF^<xwtAO'sg7Fq_bWA~8m8-5rOKV~{ /q V4ar 6g\1ʋw PRRJ{?(|X4qK]WPQUZ5-3?q zCKuYmLp%M6p6U䒛DJPwf;4&}`@*>OʻjK'o͎A,& H1vY ǡG_Ҫ0F e冭|z#4xhpz3w 0}dz6lڒ\Z?xئM1`@Fj>hL͛0uƎæ͛deCc|qVVM?L$n3+ŧ[f2Ig3a+c܉217*bs7 zM&|%80(`]|yLϷMVTfWa3gLkׄaxЕ,Z+N<e e2 ԡcvtKbeŔ,×=Km0aˆD=IFNWQQZj#DϽ^5#L\mv[8Fd6 {3McfȜVO1a$&֛p.63fy_]xm7T򨨩 <ǵ2GѣK6gfkLM=F$z;4F7sl*%fwShnB$wprN>a>k~m 2w2s(a5 )4DwG͖NIv^..$ T ۬,K2`ד@uLvE3Zv1fLz{?A3[Ob@bu/cu!B0ysV ͉xI'ٓ;`؉e9[&{zֵq1<7s&PSS?f8gXU1zH;C0`}m29eYضMuM 5O۬pÎFOf@ HeU%H{20ArA}1ډ0y8^{Cz``ҫSs&c`0~sM\VZaѬ[M$1xKyZ6lF2E4A1gwipͽ+bfBڒ(.2pY6. vb΄AOJ#2̈́\u>Ae!sa4z^IDxqFc]1xbu .ʏbsEfbMobrɠ`lmU6ߞ&>׶?I@ލNj 3;vrefvO:x4Cg[6yLӎA``,~'#09̌&3D{`,u?X /qMQO֬:kHoCΆ gcP}8rp\8GX\y2֠9f YYY}ATs;ӕXq$rCF5uCN4 Ө{m_}@6X%CeRic`_fK!cFZL;[vԇ.ep^655aVoa8u$j9q[&cb |,]'TT`ēoxF 4`93n#tz*qIbk1+sq`?Qd[?.rDҝtw~"L9&Y~>c]i ÀoM}uP#&W@ PNJ ys> h-|ѴGhLnI"ȄXs #'3e`"Ldalغ4  ˦53qxM΄amcS߃?}>-7b&I3khl~eՇ nqK}:MSUU5X\L}d +VFbT L߱ Eqy?xBb.*Hq&w$ &/j*؎ nHfbIY2pDO#1b'HvLL]7d=f38t(%֑'L/б`xܱx0ad aieias,jq(aghL'o`|~5\}Wfб\21bv6|8~rrI|o5sٲ."~Б0" !tvbٰyhw0|ȌĤO\)cڒլ^fVT3v vq.&Ⱦ$P=9x66,b-N˲5[<]gژ! a]a0l0ͯ}6Z_?Cc7j55l޼#Fe&%l8fKzP^ &!aaҶ>ap$?om%Q}=~ s HNiY $.npL0lC0fmc}gOرe#fb1Gg:\gCϔtg msۗCU!a(D"@hz0\[?;;TZ'2Gu1X~& 9ř\/e`&>7^M[Dgdf`7opJ` ~8~0oi=`;qL'FI|pn:K6ܗ$h8NCo}Nr2Ӻ Y&q0i  W\>Fr=mǰΟ#vnCuw0_%A.<. e%Rs$8;c ΣMc$YD#qvmư;y],""i(+ڑǡ͛P]SC^n. ku}AQFjw^1%0lqQ76Z7+cB<abc9CS^0+7ײéd&|crn86C{D6wڝ2A5o/'9::!)F F;I'KJfȰaYcFcNkx^>?H~?HFTWa`3|PnF03ǁHu9Ç r}))/.l>]nя|߇w@6 ŷrruDjfp8>\*elYD I9S3TOۍ+9G4Mz[;෋vVHkm>D0G©c ~.ItpWmv xSNM$l䴑~j>1$W6ʯ_~s7¬jcǚ|捱ш1(W18,_i:{b_Jui bc8dP+9"&V}K}cE0mؼe1ph D #էNJHϱ~v!0 |>DNNNbV4d8r dO_nָ'D|p` 4(,̖s2Bzn^ (kohxK4{z,\.[v1sHQ]飘2:)Cj4?v,5r>̌ 2q1v0 Ғ">7Cd<.7C<ʰCUUD~֔UəN2UXEn^\Vl6f$&B Ec2qepΑpY^e10/h8 vD{a_{)]J#C`jťs 9Ld10J%%0X*+2`Jq HΥ ^,P9l_2/o3|Ԭ4Ȏ4mlh0=_e:{Ld`n1Ǥ,jxfYm;)+ !XWm%A#FYQ16ld/6k90t8r <섓I,}!FO>bʿ38QrVB&7U05+FN7׏/{(Q " qiMDDGڅa {vmsd IW7C08$'Ɖ@e)TOh5<Ыoe|//|N&n2n1-Q6UVsjNuRTNycYXlq|.  0x|èA.VmaFAt h3O웉 Yq0 |>/S'ɉyvW 3 Gl^'`\\n\7.t$""=#m|!|5եf >Zޝxa'rv$R:H~"""""""o)|R """"""")ADDDDDDDRJ჈I)hMI|R """"""")zz """""""ҏU%ADDDDDDDR vDDDDDDDӜ"""""""R DDDDDDD$>HJ)|R """"""")ADDDDDDDRJ჈I)"""""""R DDDDDDD$>HJ)|R """"""")ADDDDDDDRJ჈I)"""""""R[r)?g/yxw,昋y} "rø~~'\3 yW!NFWW#<@O ¥Ib0g,*cyqmbsA3͢6v+g"o)GZ{{\~y& QDDD0{}d=?Eg\ceS95+pе/k8S.9d[5?exL-}?FrHz>moq=x8u` Ҳ?f(P;3Yd()S "}X};۸m͡ƶ韘횉H|O[C߮߻߾c1\ߓ]EEeË^۱75Խtޏ9^xe%uKy -/§W1}inٻl{[w烈H)QH|3| ̀vvvTv]DDZfO~:ֲl~+@N~>ַ۝W"1rOrqesӭ{fkabu>E,Ձ3c'9<OnoV`]]e`RԎkȚxi a ƒA|)Q|+V$oؐ=z>H [j3w nb1׮dm?&f_ĵgVk9\{ |j+%U: =3{xp8gO#k1x~ k? d_&>H-_Һ̞ގ4ONv߲ٹd1>HUi Q!fy}^vD$}YL:x&v#'>oǠ\} sUIx \ %6&]7ݭC<IƌgL;bdfYwTPVސ8şIP#cSYv{z}UDl f3<&,0>ȗmK""""e1j\ponYs w idggu1q?50~19=?8G19}M݇͜ڵӮR^{(ћpsSu\1՛ _>HﳷcP9<&N>'&'|呗XDDD nW'Զ=sڞQF6Gv~8MKOـMU+߮5{^?[5oo.61O?Y&W5ޠ74;m~7G}uNNN}kETP "}ͦnQxri1d͂'ƪ{oٲޮH:ٱuGք)LhuɐooƮ0 s2){+~|g\ϹasnO'3cwssZynWp;U$/?&& 7^GD~pe[{t DqӋkk/rWG)/ &`xl|Tgщ>!~YEDDDGze$'/5ݮ\\Td.Ξ0c2eS[WW6H"\%:`X8?sJ~ $I|ޗ7_ㅅG"@wV& .y9rdJd0k晇Fp|q;Ws-Yğy?`yK9|pW=0.yY_x?^&vZSDά#r HbڞVKn]7O;O?şb;mHNxlk4Hs/ߗ}_0;c7WQd4n6O:y0 7N=|e?d{~Q7Y[8w › 3ײ򗯾/` &7I=a572̝l-D{lUs 8ջ8ms8y{ʭ}sFe`%soo_-.PO9'2+.e;|G఩xT֭DX}=ij;bk[AnlrL+c$dv "_ڳWxj?~g!. 7rY~v9ܱ{xWsa`?Z?q٣ pj> nf~k~t _yb&-Wn|m-) b2,iM?⎏[?S[1EW7OffM#[yOqϲuoJа .K{Kx ?K|m%Dڼ~%O9ثbVɍ4wݿyMİcTZ'?^x f-+p{yЇ) G)g1Z>p]B~|b`7̙#z51.9lO}O7QVS yo\~Lmq?rJxgw=<*+vvKO?}&7ϲ9֞R/FIh[rW\q)s+HQRC/Z| ppqppH$$._?n>8~ 0Mh$ܡF.DС{=f˖| P]U_U뺷woV߽{[}vDK/#pe?Oc>v!""""""]yUWʢ"=,d?\KD'mkmmHD5Һ6;tqY~8ˌ&P5Wkm5i ඳ]Vqk1c||e|WaD49ƠC G0M\y0E;/:\_ ;#΍ޗ˯暸ݕi0n_/r72Y׼ٶo-/r+ì7 תfbӴm;U4mx\S{.M۶SM{tjۮMk#>~w&ߜTh7ǜfWRnZ?~'@j~\=ˡ4 S>vpl>ffOŭk9>SFv9uE1>ʢ5]+ڶKM 8yS*(J ␯/GxNj{ڵmfm"i?} 8V[]vֶ]{o^m[6Em;>Ю .=JMQkM`S'7OöR}ӬmN&ŸP-Q>>Nb< ۶KMm{Jȍ4c|uA+Savֶ]yonm[6tlWYNa_G ,oM>| nS-0u ôlzӄ"=(txr 8|xG5rQi#Oq(MRɷ-۝q=p^Q/~L/is^ p=  V>8`YL:ض]om7q1SQo׼fe#wvֶq8wAf>' [4۶+M{kۺM+\<'wVS[U͹T䆆Ӯm-{ɝ]/B}L 9/_̃oQ8N$Vض]o&>:ֶ0 ,vS:mWnm܄s3x?%U86mە[vEm}@n `OO=##AҮm{8jr =F.Dza2~B" ߨqm&23>y8 ׎q3if(o>[]YYۥ핚:K]7ocK6H7)HӰ cJUS/fύ"XC_1<})}w2jNf^%G#Ȭ=-}/#dJf$Ӥ0,oQ如6ԿmV}ӿm'|/?W38{K J3m}ӷmG}3j_YͯoR=һm;^kοHodoЮr5zg*9]̘ 8UwQ?n"v@nÜW/kRG=Dzdz@[.stlaR}Gj)28,}w7۶#m\tkNHöL Ol* > X,8'ԏl<AS\x\muL9#9^hv'H7xis.&VhGJMy S_?0 'M'Om۞mş5lQ%\0;yy>'mmkZvU#a*{|ӵm;[_үm;]4lۮM+SZb\7`ag?hշM7^rqJ^)iض]oRyӰm;]ڷncca`!8TXAZmںyuiEu41)N?}3gz+|)͊6y.ƻ`iK]ӧ$Ɓ}c{몁y%6}U}psC?߭UA[Syv'05&-^dK8*J|k 0ag< ۶Km c 08ƶ٧gt wүmV?O_hێحi;Cu}qJldY |VY&&pI˶ݭ3 CK}8(i$S 'A.a#h$-ְ [lp2o|Xc6omc>δ{&_/xYx_hA;/lWܝC`k|O׶T}ӭmM\?n↟zka1NCYyڵmWfmD**ھUG艹h}ɋtk.7ڶMJ]ӭ]G,Q .sf&˼2ƎdVmB]W"8pun7ބFH}m\.c10L.o7ࣅ6KMƎ0r|No/9yQ"]^>v^&- Ϫ8jUo״l76cߡ?hq~1'Ⱥ&߇]viֶUO1sN&z+{Y/cLVĵ%OR}ӬmZ޴jۮ5ڵNyG } r7?1F' BZm u[(|I(9.9xm3ÙesW,,y Wl8Ø.A8߼2.s4Jn~\xuA dFL>g6},'4=/:W;/|W,xPZwz74* ۶M]zounݝ2}k1őgeﻃLr]#Nö|}ӯm$ ۶ҭmX޴jۮ5ڵί7d-B.peXrv&@*ޯ7ҵmw()3<SollMV c|xC)߹!Z߽7 84?n߀V>r`盕|U|Tۥ߽÷۷yr|6NM`feL~- gUvӶ90(w]Pʕ4'!ڶMmo?\K»⛆mە[viֶ]kkA332Ŵ1LVjy Yγۚfm۸#;߲ϗ1Cð Ӝitۜ/~YAGwzOS"ihѢZ46 x('r#qz=&5"<"Y! 5q?sìM%-33ԾxQ;yya0L3ZPۭdLԶ]omCy{8jMB!ٲ*ʫTq%siYSvֶ3}Lc^VߞPkۮ7ڶkMJ]ӭ]T'c d ̨Q^͇-C^m۸?f.,ňQ{a`ǻgB|%]xFOמADDDDDDDRJH]qť]>MH-\x6t:|((QPOA7. L  ((PPPKAA UTRPPNAA)%QPl`[((LAF SP5`%)(e|JAR >N-`l> t鸅 "HP """"""")A>t in""""""M)|R  """""@H7Ѽ""""""-S " """""ADDDDDDDRJH I1"ݨI'ADDDDDv DDDDDDD$>…׫>)|f-"""""7S b DDDDDdogns:_oy`Nk[oOwq fQPw83:L9 f.tەޡ}C 88888$svǒ'rciD#n_hPԍ/[7!""""{.<#4B$Ś_DDDDD6 DR@拈4P b{=C+t{-ڪo~+eLrgkuIE;C}:sO"""""7S ҃ADDDDDF DRn """""S bPBDDDDD DDDDDDD$>z9 > tAAw z(^H,r7BSA{2b_QALvĵ|^{ U<|$ADDDDD zibYe`F+4leXi }\JH@BDDDDD3Wo`od6쾁S;p.sLr?|F`qмD""""""z>z8W DzB>!CK> DDDDD?S  { DHA)hca`f6oٽqQRN B4,i7x`);~WSP0]ϔ`V\{"""""wh_@ eY)+88~scRVZ…q+CK_o{.m =f @DZm'?t ! hm> B჈|E{Ig D LH0BDDDDDҝV"""""_)|""""""HHADDDDD "}H{e"=yPADDDDD;"= >1 DDDDDQ td"""""n>HJ)|uZŰ """""DHP """""LHрA "}&BH/Ӽ9H_ASo """"">9""""")|2J 9鯷<@AA'-ݷǻ8((Hrޒ"y """""Qk{_h1nj!986 ta])(^g;oI'DDDDDo3z=B.DPK!&(DH$""""">JH_ASO>1 ZGA DDDDDDD$>Б """""M჈>ADDDDD""i^(ޤADDDDDDDRJH=4񤈈5 D DDDDD$(|I1ǃ  DDDDDDD$>1흐Sm"H)|ÚZ^SDDDDDґ~ {?HHԅ ^E~Ll\H<@@At'l[.D """""W)|ً)ADDDDDDDRJHhz"""""j D("""""_h D[[h BMI=Q Z $Ȟ4 I1 Cs?HoP """"""")AloODDDDD- DXg """""4Hdx'T""""")| 1J9DDDDD$54B0T N D1"""""(|I3(F ;)|H_Ak+P ZBzh9*GgKu҅-Zz_]k{oG(|VyOZL$écǯM70h_f*tDz#ط 8poW+%zҘ{w[v"""""ҶO_t]a:70Z4iq IJ7@R++DDDDDDRI჈  V"ػoTqDZqGkeNoW)cHj7B*^t.H(|ً"k>Bw 9 ("{""""">H """""MH?p@DDDDD"i;}> """""I჈~'z&Tw i"@o )DDDDDdO>W """""AH?"""""(|.S!"""""mQ Ot6PI5"iy[+˞EDDDDDZA,\x}<W\乛{OwdCDDDDD%v!ҏ6;DhXtI)"i;{#gi7""""" DT """"" D^{ """""Q DҜ>E+P!"""""AJ8`ADDDDDI)"{TR=$DDDDD> DDDDD/S OD CDDDDDC჈Ht"{! """""ғ>- z!"""""}i DR,((|R uWB{Wi~DE.H.[*OkeloZEDDDD$}(|& *ڱU]g,\x}oW\[yZ=uf;4Bd/"""""қ>3 4]H9;v9NAdB)9~WSP0.-qePj| Vܹqmqpѿ}ik;nm?6m]mPUUIee%UU+7hJ_}g?p ny qլ}{!|3MK_ŐaߞjCUUe#xu'X͏ټUUUkz+}Jsעn߆[r߭|UU;;wX͏\B$Mๅsխ*luk0o}kp̙w_֧WAee%GTUU+M_󤺺1a{ԵGZviN|u+-}UU%T\7ᅳ57=b=j*++>>侺ϝ=v%oCWbbO^{i׫߳۞iqiw_KV?g~ۤ8w&nz7~M۾}u7?*9∺U4#ڭz&=&UVVpGQYY^m}j|_eW/OVUMlV֊7~7sop*ʻ[|mV;fmO.䋿>۱R@͉OUU h޶G1WTT0wVΏ-֭W1'v;W7^ 8꧛lp[پyʓukomkuk5 ULՓTT7ydMOVxM:TSnC '6+1nyrϹo!=廕,4߾n?ʒe,I|~<ڹϋJݾ(ċ!^v]{z>HJ)|K5lBC/DDDDD$>HJii"vH7=Nzud3\2!^ܵ5lZz?sv ZSDDDDD|,,H73<,[D[(2>XCQ!"""""AAFpT9oTX>P """""Mt;+$88‡H,[s>D""""" bFkĉFv[BI 9q#%V4.f6[7Ua Z {dY&86n;j^"""""' 9l2́Z⫶YUoR8 """""MtwVap\f'o$_>: /.GD5ȋ`nV&c>ǚ8mz"]hٍ^Hw1J uxW?9OޛF;)n4Opۈa`'oN fQPw83((uq|xYv?ud||y㘛󭷹oԒR"Y~r,&|S=k{|g"N%s?~-t?#yۏv}efMhy8]1V?YP0?)upwv[I3:E=f @>8m;88drXWqE>ӹ9*3QPLk #]@f~8sx ebY&NX*o9}8h=SwR4o'9x}/hm8e|" 8 Vv9 z!"""""tl5m>ō+4tG~%aATY?8t dϷ~,;wQ1l2IsuSHWtCaR'c.DDDDDDDdwݰڅ͒7wP5^"""""Ғv-[h!uv """""U!NMmoWCD """""\7h~IiU"ү7tpAa4Ad/P@DDDDDzC&^C±_*z[;^C,v-˯{ ^r ձU_…Ív?DD,r7Bu =3b_SLvĵ|^{-<|{O*"""""""zaXe-c$c?{Өn=Ϩ{n#-ҏtׄzYH @a;W%$ٴKnuMkr_-V^_x E2[/S^m#Lr?B ^B=D4BDDDDD:C჈ """"" DC4BDDDDD:J^H$"""""""R^jSDk/ݞtZSDDDDdyO:HSmÏ_n`9p.̌#aU&e雇3opÑ.[uz5+(F+/ (LҰdXe& ۴a`Y&eb}ADv`BDDDDD]_$|g *8܁fwc6]UPXN{SP03YPJs#zQP͖Gk1cN'}Ꝣz_*zVILEEMo(KoE%"""""")Azcۏ~Z>{r P iI=O^tF<}mlDiCtIP.~|`M76Q}I:\s ɼqSZVS!"""""-qv2393uK1rj""""""{#|0ݓ٘eIpjiԫADDDDD:K=9)X8\#^^܀[k)/%ځt3 ێlJ#yۏv}efe雇3opÑ~b[| O]LFK,ivE('}gMvn*ŋc fq_{f`po=Ft/.lorJGM20Z$ma`Y&eb{DDDDDD:C.YnK"""""">U3K`q"ьS;8~~QdO3BDDDDD:C=DirJiL჈4@DDDDDI)"!"""""RG჈I)" AD:L>HG(|R """"""")ADZ]C&4BDDDDD>H(TR """"""")ADRN$DDDDDn DDDDDDD$>HcA=DDDDD=>HJ)|^"""""{/"""""""R.-ت.z|{rOT^"`{)uY==."""""{7 R """"""")aDAA`'oN{SP03YPJUu+YwvYN+_;u{8n(穪2yK|_]]ɯϱVGv:oױ et蹻5:gw?)O?yQC".z>iL """""""R DDDDDDD$44sqlg$N_s'=ַ1r&qG&9ly/琾>C=>|weӼ"""""Y M=:^71WsHHǭ?dd.$Z]+4餈tI!s~.eQK"]$I9"""""ҜIùCUo ayK$""""""Aფ'Kn=1>2y[)|vI 7?m%Ena"""""Q ۹'ȭ _/|j{DPHc {r-Grw߁Qmǿ{ zҤ"b׆vDA "]zUzTBn?.$.yHevfllbC<#0/Og@n,rÀ;{K3q;Yw'`>ǧ5 Z;cIgoo 泂#HvD8EDD, y<\>{ߝחڦ7u&zuS[īQgv|L|P&tDDDDDDnXj 'jX #I8NaXV q|Rɜ7d_䄱OY:R2}I`F1N'Q{.Wi"""""rPq+DDDDDDDĭ|RAD*y) "٦d"""""""V >[) "n3DDDDDnn >H( """""QADDDDDDDJq+D$W """""""V >HUGq+D$רIq+D%ԪADDDDD HcVVNYIDATRSɗnҺ|q!CFx: yu!"""""k"""""""V >[ I&((p$)0פT7!ܒ~N M'$v:W/z xweg0uur]ģifͣ$Vuru7f:r:v粳N7!`4Z>[) """""""n|dAqw/&&[-jOn <)b=Wq$)˩܂qk ?>~{;ui3d_01mќIl`|߽ޗ2!h6?ľP(]Ow.B/颋( 8 J哒[XGÉOӓN^7cسqccxzfft7m#0ك;BW1?ƞ ?s#""""""' hޯu} rޛ}uFЕܨ q=Rӹ V{8M'IPqK24l{DU"OԄwZ/u6~e'l9_i >Y+ lk-H *r{){~nИ gDZeq əj`^bT_yaYn-3;za[:p|Fپ5h='yzT , & tvrT5/0hV#(?OejX #I8NaXV }lCDDDDDmڅv6ob0۸p3JzΒK}|̉=@O/pdz63|4>r$בtOVG1Ng9Q{n<;僈"""""""V >[) "nN'EDDDDnn >[) """""""n"+Kq+DDDDDDDĭ|RADDDDDDDJ5tRDDDD"""""""V >[) "J>||tDnz/"""""6< (RSCyT fr?yu 2"ח)"""""rk"""""""V >[  *83ߤgvXvI?z^5rY;uBmP$'rT A|Mdl 'K9<ބW:„MW/IT/rtgs$/ezIʞr9֋zҚ/X^Zʥϴn^z{5[$KLϖ6NIr Owy'I,i&Nc1O,$yLn^q)L({:es%Nl\ul[6ԣܞc%9)˖rXuVݑyR[͓lxmtE}Cb9SKؿ>r%+YP/f&O.7lnL ^+[\}7uZ~YfP7evNZ굴ovx _5ST&uP\*-+Cʴ{eG=M-DDDDDDDĭ|RADDDDDDDJq+DDDDDDDĭ|RADDDDDDDJq+<; ">/ߵMeS7u&zuS[īQgv|L|P&tDDDDDDD ,Vi`14&qN:a`ZZ-X DDDDDDD|R0bt/0͵7̣܄DDDDDD,dZ>[) """""""n.DDDDDDD.]ȑi6y~3o^3/4[Ku@4oՆ+t$Ⴤ ayuF mq.ɷ"""""""7gT!pYΌͬ W)~OmXF]NPP=ܖ~N u; K$-AA:ޒ$m!vfNzY\NvImY"C L7 aWڸŝMC8<|Yfcq6^Mo]< 5]gq_'Z_K/Iw6H6Hk= k0Ob6X^T!~Φo؟2QN[*NΓZimNœϔYJ$pm*eHMJ/'L)c˧'Aj*z"+-)zk]Yn%;dwVx@PХ 0)RY!߈pZ̺Hk)efvǥ,6H;\MqLL󵌛\Zs3/qeeHqc~"a7zg14s:nٝz؆7n_~J >܀< ^P ~]Eԗ#텷7#X>烈 l?$םADDDDDDD=:ˡoFN (9q=awM:AA p[9I'(!/ ꝥ7oLL,̄/Z$JkQfSM,[\Kʩ"""""""V >[) """""""n"""""""V >[) """""""n3 """""""`$?sb&j:7ItJ?DDDDDDDDRk"""""""V >[) """""""n"""""""V >[) """""""n"""""""V >[) """""""n v_WOgED䦤zta"#/qOgE,`A|}[TADDnH}O؅3ΆMKX,VoH:lZGw$6m#TR%) """"""銌D;;y:)KRa#-Y\) """"""ҫ5] . •-\I<=H P@U\iEy3H؃;rڼiK{-m7kˊHb.rΞ9MBrmy "HflDg}nveatDDatxT7hؘE ~s0;q=SA%y:?"׹\ Xr9[tD/tE$fx: e˝A>lΐH,{Z%KSD%|3$.u9:PL\ rt}smթrG?s:xE\ԏG~D᪋F7OA8g|@5ܼvz:CyH kf- Wi7ܹtAofQu.2#XԭZ=a}DWuȭ@~R܃u#s~cFqx8Г6 kS\y@ixgOK~_zp4.ك2<# Y|} rg7L$a٘(A:f?rwl1kg>Izk͍Fo}mqkͪ,]2Ѡ}SF9U^ زovAdάϰ_oiGҺJrU׭`LEƮ;C\~;rxf<͟K.s-yo 6(Hb>IF;u8zOcdt/nZ7ܮ+rݜ&rU4 _ӻ>͛Ͱ+/0Wv˜رqo-cq8Ҹ66#6ӽyD%9Tc-,^V)szq˳9i300[g18翕&}cMw<`^|pៗ9gq e7 ?ѽ[!.ҡm ~^PGaPvL 5|>$u/#wQJx7[?,L wE[TSk'|<_\J7lNtJaKyט{^>> gp{?88ˬݥ+bL aQ󉟙975&zZ{Esh6ܔfqbaڐ82cO'u'P2Nq0rOFN|>=c|j5GIo(x*RNt9EKQff];QR74ו7ۅ{~)1Q R5W+V$ QY&EL럳*x(}ݛ꾩On LޢVM" Hɵy nZl7zŽi+?2j!q&a3Q/Hx/=d58y OEzsiy |~T؍ bػRzugK7-5Z`lW5yy cf bh(ЕW_iBiw`>ޑ}x8yd/ f^d3 bhB3(+*D[?2kf%JBqU 8|3f&S\fQTej7Lx9x1s)$A@*i֑zENN_ʝ W8%VFmxtH V;\̚]8w JTx'ռ _ ;8mlf=3 aPIޔzR,B2g`u L1ا_n6 z5}멱=j,EylVk5ٳp6oe}?r3ˆnPлY)ogυ3v sWH}P;h{ |=Sk2D_1N]$nHi*VMbf${f?j!N`X"oK½UvX>s1`7%NPr|Ǹ.>9ܖoeسsaT~y (XJkѰMO=ۆ+v^ӂd ?}]e͂X5VtK] 7󑜳*Tpn^V?x{dM1kG y 9W2އԽܒFtekra}>wb1lEÁ?d+ܠc;i\=~vkaObeb; ʵ[xo5_>򩓘2Vo=pրT]ĠnROFRݱ?z |&O)? ?;s?4MVN^~`4oi{yշ<JY!<GyBp%Mxw"`)z7?YŢT(X.836'niNdMıuL ֧r.BbR`<[uVl ,,M[՚,.1CY@z ԥN͖<իQï3 O>pl'Kd)/~zb r03&d!w.OƯP iOńݑ_9wsؿ c$Mcd=evlX~zs~֝j\ƱymVj]ڹr+s &L&κ?3qkޱp`\l_S_ah+L~~MƝIͿhNc͌}5^hzD~6bOFq`<lϟ>Ώ?KbQGaZZ~KrHv̛8iU|"'ɥ?)z͎5Lݳ壿a@ё#Sx-b#8{pĜ&o>X2 e77l'Si ؿ3wݷIb# xO$؋l]%3ϙMs9in͛?fERl:0s:/Q/]ٺ|o'MJɯsMcxļeqbփ"y۟4#iQc#_cZE4MVQ+I}K<9vj2+~c4=h(O2,Mb/Gc7d" | oKٱd*W̴ޢU )S=x"|yւAh} %" 83l|؟5坅2 v=_c6L |KNέS0>qa8к#qr|1PR3 Ջ\<9#%vO|,޴[KҰk'T*A!/UYsq6N/BX^4{S<8E'VOZYRIC:V۪TZb 9#$M0苖)R695J&--dƓ8Xs c t(|7Mi[1<ؙfKDRs3TO1+׀NmSB3t>=;i^pxcF0 4Z|\=lZK[5r,9G:$&|OJ^^#l799l;`P2mAz)c#vlXufȰxcdpoX[ʼ7JXqV/+V| ,Y >ѥ\o Tߘ;jWr]Xu]XTMjT\B19fH-プ~ܗ5z+v-Gؼh>^qic;k><_4Eg:7F ޻ wqޞo#p{7=4ZQ=cWhGgj;|~FNͩU.~aٻ~ K!v?M_E{SӽN{;аl>KȦY*I([PfyUᡯ1EeF[ee6 {_n%;ڝ}2Gdۢ9,}|ؓXM:ilÛSrjv|'K.co#xePmx69ԅʱc/krCVfN<{:9+w2>xgƙU3bh*_+Hcei$2|!mP_,(XȔwp>>3 ,ѸcRB^9e>S%;`h|1I\ h;tnehLNs7r!&ŎO<. h.G'@meY\pQ=FBhg~0 PJ+VNo=M9~g1 DWM婾~^9pҧ@wI3{XC<7v'^X 71cxm7KK >-TDf<,>ybDSj~Х;įvP93 0C3&sF*)~(HQ198338iP=*' o^86]޿9?[$W="E3LFcK̼pM5-nЅdßM5b)efԿY=)wf WxĀpߐO1dn~CiXw@%=z- Q|1iZ>+{ e{R?X.`gy<&i`%8cMc8}J@ ^OqŶ~c/s7K)zw iɆzYo,r7jŰL'^h)코raQ<{w{Vʺ-^Ӈ{[NHuX(Kf6́?cbwNaA|P?J{⛔VQ&>۝fCݒ9mO2y63c~NeqsWiIv}7D0 _N )g{'=;f G=>1r'ya,[#8 鴔o;& 88|ycv?~}:}{?>#fX^v+?jYLN^!`6Lmj$W D9/Q+]K_1}+6; yvcyS|^z7ku嫯NDxDyx! >ƫƗ#{:,r(>|߃Ƿ>%AzcUi$asQ%ٟ>Ο3?c<ãtZC,3\\ynhώϗs! ovZ=~h-+6!D.̿`Ҥ#ܒ8}.]')s7G3ĕF&/VXG:VlcU/$`{Ӽ*{'x&1V>:~Q~l3[?ig.9Dl3pgR2USۉ N ˙$E.ܕm !Wn˜{&!!.k U+HH2U;̾oӦVa°fx̥j\y"/xi&2)qKxZZuWFפ{(fر+,kH>5?w}r6y$e?¶X4{'>JxHFh|VCB6*<k9+#]~+ğ_M `?͔p"i63v:.`k~L~c`ͳ#?{I .&lC&?k!'<XjS &1~kR; W >wP \Cc9x^/ &a8_3hԼN-scY?>u捪~EjծZ—gm F>Zߙ4!v䭕kP%i||һU[ҵl\wyUOJ~Q={ _3e ۷qQN[yE+'[V LbNmdgV< Et&]t`-Օ\1PŝeŋDfz]9h!m&XsOiD+hШ^3syl`x>4Ȱ_-dA4zwь'><^02E}ӤU}| C!n1뙻4 M5n5BA=dyuXJQ7`?=Qg1al6K`⾎^=ymcJ@>&S^x0^XXzxZOҫxעe5uW&TBy+`\%G|3ZTtkIMY|>ڝh,7m[vuF X(yTf {vuc՝؍Yt.~&Lw-XGy\$@؍YpՅgNR 9αpYzE6o;YrRgF쉼e}^O3C&snb/_ts~l&L񹍆u1GQ\H&MH'9ԃؼf1&]e8ʭi'S/on`p{q(»m[M3 `Z5Y{[Wqeb8k1F` O|m?fɤӢzI)R1=K'Qq}6s/X n+n_ë$OS1,96,Ctg?ng;揉Xv; ERF Q! qxLvޝ zQg4]$~l޶ٹWE7,@=;9O֊4i<4mT릣}fLYY$,,%5`'[ɦ]9EW%+Vqz6sU2Ka"G!W͞HgEx%qN&`gڒ;W;j |hS\33n9w L4pg҃pߨ納Cն.<5 zwiJoٲ!(\|dzapd&&,iUVʔ/ ]W?ߧ-b>eEJWZ9G&6-뻐4J[8K.v8[4_g5 ۻv phFعppxעUb|,^R}Ktx 77 5ٽ0! O4͇6d:k-t,KY/LlaD'pgfF~kAxDy-/We3o dayLi5fI;i/M]WۮH#(Q(.a7 7LI" ~&ΦFy\>֮ *TiJҩW*fvr4H"86.]dV͋9ٲ-!XXӮ"W-es Z7&dS̃|KrauO0 a`o!#QH/`8l z+\t>3*x߸#~ȝ0_94qq , n^`ɇӸ@/w¥XlX"f X3 !Y`D8ˌgږ{f$/\LRt'Q?NFKfSLt\}bO6x[&.3\'r(ldٶ3e%?_Osu#3>#`sDDۈ|.֮Ŝ4$3< `䩽+Bed3.N[(UD¹2ۋ9ladt/J"Й]R\9.^H? epe(I\r3|KؗbcsӽmvfF %ޛguj0B 6,cě~^j~a8]S?doyF-n/aluyqk 74 #8gVUv]YsO9ԕckC;,HZaճex9Kd/ EΚ4o yN KN=sXf>3kh؝r5|ۧ}.lN(4Jr` :˯2F~e.Y,j~1`h._DŽ3ylk#4)b2 ]:Ƕ0Ti-˰\-Rٴ:7L3`?ySv %JH3|Smx'9޲(EZ{ ?ٯoyJ@GD~3a1e>^죌%&|}h|`;뭼zm#S0v6mϙ (Nn$_<8}ޑΫvΝ98#00H2kԺ+`~&VJu?u\=̤Yrc!-‰jޜ q]Z6ݞ̎1bfvz9y \(EV9;6*Ђ;K7xsq[fzqSFF™5+, &:(ybe,ϑŊgt5ݳ@Û,?ΡS ̂5y]ofÌHw(ecS18;S|r~YCڌ&È'd,.ZyiByW'+xbh`ф9w<,'\+^\X]Gsh1Z$9I{g8s)!T_4ّi|ҷϓYHfC^z=Tᩛ^;G}·!5gSi8]\?ܔٽfa͚OsO]t _9Nw8a÷}f֌H/N-{ uQ}{e2ljI ;qw wϼfpMgၤb{pUe\-$_RYc$"<g5`{IܶMlNa|\X zSx V޴.~keffs8d^)ُ~iʧ>868^5]õt/>Ě&11pgdtzkUϲVI56ۖMBx"*J_׻dynI1~Yڧ+.abPe:f7Lo&4QLܖi~cχRz1.Ǚr>lׅ>Jv/GD1Q<ޥ*tmڵn0& )Dkj6TbXolym?OjR+pvى.RH|i|8v8B)8;#w6ϴp/?rs?>>f`N q7p+L0 кd>#(,QN: f9ߖoy綾Lq}874u*бN –-[s֭[QhYuB5vŽmO ܘYwD.ސFQr`#Zf83cK4`uҦ/L8 nݯ܋Fm1q] m+tSl'r?p/IBFOq^sw2Rv)Al^5E4"2f}8/^8N}|Xй!yUws53{%.7/2Yi(w:Ƕ{w33./-q5hNt*(d@iIռ!CH58KbM6D0'왬^|8H!.Þ /vHo^{;/6 f'8K9*Oz¢yliW6zK/V|˫a4hw'NxP;BF14#>%Sp6VZ§wdҶe@~/g@-n?&ʻ^oj~o^0Soj !" ^o5͏m(0&`Ǟ]0s,7q$N~P<`)NzQ5*5mvΞvذ%41+U-$Gگe͆aäMHhL,.]HУG:4W y3p@KV"L߫Vu0M)|m&Ǟ#!i j?sعNG/]~rQ{NJ&坡Sjѹ7,kXJ^cܿY<nO! `?ş{8BY FlH "?.ݥnnhr ׎f_t% ?aZ ꃑ Jي"ԭp}&%!2.|;Jvz`29?eb+Ő|W //53а$5jz1HѢϣܒpUk`Q6=_GVegsI B- +ɳ>=M?~?L\F~%Sw)[Л8uw݂h]=`\;Sb7YVM:5m7KD|+u`SeusK?Fhھ5 ?D΍0gg TϧXj7ivlAJG,gٵq2w QC?d旘y:CjxՊ@u̙M0(#SNh;(ő؏w.`qjkma-s_ivι/Ӷ\zӘ kќ\tȍ_H_@ëQ.g1th^Eq~[mG6<ϔ O;RjibHeLiwelWJ3-"fA@tH6Zヘ϶l%6n'#ž_&aضb1wx[feٱggGiҤVEJs2,6ƘruhqUpaqLzVU 1%=GVsѹkHmHÚ,=& 'kva)4ס zL4u; PGӻ> Rf< 1(mޘ#ƀ;%$-{hKkܢGfg4q3yJ~lَu*Q"GT'ng&Ssy*F6nG(`=ǢՂM_a#)/8TySޝۍVgiwˊo>xY٫2^:_*{Dqff*ě+`&E<_ ZBs9d-vk;e>ɶ%sY'xw⦁ẅ́diyrrǙ7LE~9<3Z(ܰϴ/%6~f1&Xg_Ρٷwu8f,iҬzZtᡖcK:ox?gBZinz[^{w͡':P~LdpA@ט03 ؏F~p(ޠ}_u/3Vn;>9aҴ}ɺGxEeلoX6!R3t=igDYGQkg5  18"fQ֤LRsK?= sc]IY"!@K+矃:Gȍ[z}ûR{>x aEKKLEķ m?#["l6vC/bVBq۹?w.fRÇ8hv?1_X͔ëZVEi^39`O׎4>G΂# g_UgŇoT-K9mY@1 Y;#kSxyjɳ6p,:IlAO:N1,4ߏ}vz(;͊p:>'$XJn[e&JFoxs_yEvyɖwӏZ Ÿͭ@my2u[6x%bMyJ-y}B1'v"Wn'^Ć5۱CϠT2 qz/|3KqL4ϭ~q/݅/w\&"F IroQK&-3lc/1WT^ ;%^Yƶ/;P櫄Lդ j:N >xbضYs|Vv?ƙ(bl>E)]:CzGԟ {'3i 6kdRX)*V-ׁO5F~aqᅯ~ +I2eP25[7r6MROL_`GU%P&9+ش(C/c'Xi֬MVO?Wt`2a 6 l K”,WAN]ڛ1KZȟ%s @jցyGZÝ} V䇯2mvU5y{U&! f;sL]=v`#_/UrSZ=}Τ#,X.i)Y*5CZKz~φr)/Qq[?$z7s&[ѭ}:N 4iww ;^C`s-38cx< PԨӔN>I{nIV|zlYUSp!p`Zpߧ/,b(X s/ ff>` #:ނw/]֦avӥ=J~oJUw"b1hR-[ Ѵmi|IR9L]N0zb:ʹH~Es[߽k^bf MQSQ LYNܺg#5|[פ5]WV/`sYi?'B!qn=1O(Ԁ',{.K7DH ^KRvsޛ{4DlCT5EeĨYANٰ+Bi K ڽ;]fӘb;OW2U} 6teO/[r;o\~..Ƕp'ݼv'v wr¶50ۚ$uCe;8nRTNAxa`P;Xs* k2TkЉO?ϳF0Yc|:_%h|q ,ow3_ɚ4o x<Fy68NMh(CcwةSli-.۷K*=]d>s}O؅3!r*o}9M`WmZVŭwƁO Ϧ 6naF7bTx-^ƒt6$̡Q&.Ib0o\:%+0"٬1(H{YUV,]P,x@+v$]޴NŪߪỎҗ$浏]v\ʫ-t} i3̝XB]>c'7jUPjOgC$.EyS ]8mçg"9bbɕzr}H}>8NgƆXL ʝYdO9^)M֤j@ yuc{3jc^T*OTU^8W}6cu(yRIf!kᣈHk+;.p~D?U?njDDDDD]^lD/Og$5>-$#<[) """""""n"""""""V >[) """""""ny?N%tEXtdate:create2021-06-02T09:03:07+00:00ͣ'%tEXtdate:modify2021-06-02T09:03:07+00:00ِtEXtSoftwaregnome-screenshot>IENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/eclipse_install_cdt.png000066400000000000000000003262211507764646700260300ustar00rootroot00000000000000PNG  IHDRjYsBITO IDATxwE🪞K HYŌf5<<ϻ(fEO<3Q)(*HUdv lzea=|>LOuSOU=TuU5>PeO_VWVA/:CI`  rY`aAAAsK3TVu\)3!-"X((%//?P   Ņ-c+_DN:}mmm$(8=eC~GơAAAi欬RUUkn0BPo'[?kI5Id}ݷ& S˖-\q>|o^ju^WZ٩S"(--ۼ=zZWYY2קʭu/jU5̼^u."1MsEP(==bϏ?][{EK*\TɏTUUɯ%%ǟֻCBRu-??_0''[o9 A_ av޽iO1!0s;úyGLz{9j ={z7!i /4>W@8?7RM3G?Z0[n[zMuuuN+G 4=ܕCi4W_}mbt233b]ӡC)J6lظm]iٲD|8p@VVUTT 4PĿa5kѻwvc0 y}bs/ԩcvmyYfƍsss _T_l)JyLʤItP(ΌD꛷PP!TJXTT矿;3xGF#i&kM6_||sF,flyXbEիWӷ'̘:LpY~~}{W^٫W5yv7ߘ͂XAi0l̬zCr_/B!;`Կ=?[Eћom(Z|q4ѱcG_Kss[WlO>dKQeŊ~#G}No~WSS`|Bǟz9`ƭra@n]WZ=~>ܴi^>|33/*jdwCfvILOKKر]t%%_~!$''toLԩCZZ眃\⡬s.'t}M>׮]3u322^z޽{/Z(7L(-IFFXaW\ycnEܬoo^|EFFo3ź gqqѰaC`ɒ唆WX /_ٹsÇ͟?_+,,9;'>S[X,֩w}w8hxi^8唓}9J?qWSS뙗.1xٳoW_}ccWXVxҸkj ҹ[ŇSN9']p*ڏb1ӌ:wgnܸ1ʈF#c >bH_^Y\\RZ[\4ck ^=z_5 -rڷ?jƌ.Bػwόu)??O!9'rn bջEjͤ4$B! qe + G4+V|ǏǧD";v._|C )0g$RFL)0aB4Z{'/Zh͚57t}(z^Ν;uָ}{dɢWDl߾]vm׬YdҚ &,\S0bĈX,*5jWU_>~p ("CD~={6ybz//>p-Z^^!m۶p8}nE ضmۧ箻S.uЩSLϴi;╕fU+*A~qDh]6Mn-o5_c nlB/n159ϋ k ԨF3kvg!5eݦCgxQ!FrZA&-- :ut@)-,,h^L2Vy|ŠjIgXmmiTWW@8ܹY/Xu}sR޽M7 mێ!C͜iϷkϞ1cƜ9GD=zt]rU##Fb׮]u Zmk 2s_ߧOp8x⬬>}V >4--_̘Fyޕ+W44lq\RJO~{$Bg]r%K5 #{2uuu7ӎ >ԭ(?tzL=OdLJғs|I˿,..[b}oI 0"he09Z\a+`O'16i~`Νw9߹sJIMM-ۮꚼ/"!pLpK/# fdg-†zݜ+6@0^,OEp"j@vN{.}A,Z*y≇~  :8lY䓏<=7233ۜ\$ eggqǽ /3_xep0/O>=S0M3~( !aÆS {>REZn>cs/tG.+t iZRRwR0())tonϼn{w,YrWsxUnEncӌu_Ϗ?XV9 +-KƏ?7jU#&A~2B6mp8ܣ#ӧoqqIYY髯,ViW^yuqqI߾vޡI;Z\2v-'_\S[PBJf}@MMBc|۹YY  y}}}aaa(F뫪/))1 b7"ϻwo:P(k 1szz E Bhnn TWG"@(/%<ӌTb)xW(5 (5+L3W^^R[+.npcfEE8)яܖ8;PT{bsΘE*A~#={vL}8yy ے;מ׏;nܞb$555Я߀+uT|طo֭|Ү]@76O:|Zq1H+24bfmm}y]};<į۶;!!Ӷc}'F"ŭE :rJKbPK!/*<TyL%-f/A?}mۄBR]]]^^"333EGG\_XR]e 1Ec( PQbmW> yn. w51''57$IcNF{U^{fhPAhժe]]]zzZs ry}%ϗ~mQn(L)0|Mgvq HSgp# 8brdA#;w-]Ls"{67 l1sisYCQX0   nc|  4,? |}}RkʴNve#X;l:?AA9TTT|ݪl_,I|} V 4FЋ   |u%%%-H󓚯   ;Dх 7  rd/GAA#|0nUFAA# GAA#AA}}AA92A_AAALGAA#=/]ri/6P(ԪeK̳sSӖ-%%@@ m'4 +V۵mKW](==-## <6m+Vegg7.kOhѢ a޽p8#anRb 3U6nڼu۶bu˖hۦ5"o-*, U+V͕W,Z 233lt \‹;wlժ:bذX_14LZУ{45LeUշ Ϫ<\_t 7:eWǍ9]\o5"6Urק6Q#Fdee~/ʌ_l$$K-:x3KKK۵m[YU?ꫯoot)|]bWWW.]{MJM?>|7|_}z+Wq}͞ W^xo*&??Su(, O=Uq9& وM,-++W/[h4Fׂڿ˧|R0tFۗ F &(-+mB3NόnhCĕʪ'ȲiKK +* PÔ٫]l(t};4Y4+*B] ~j?$bޗ랳WR\Tݻe붣ڵW :d_|En0x3i'pJbe{);A9hLw 7]x޹|v}һ&ݿep8}w?]Siv?.J 0Fۓ~_nRֻ3?<%¬Y7ìoZ?y!sv){_4ncYYYJ/~O`U;l'}t[c׬ͭ~ǎ ֬[sS#o`~(0wUɐ{פDzq?j)eswi&{uuIҫg(--+).>tp׭nxBFF]-KJZltڵݷtÛ{ee+T◴_۴'Wgߟǟ?g͞222>wݤ"&\uͰ!C%;"G  !%_^Vzx49c`N~GӞB)}ӞkXWxW,.s~Cqiڛ~w|j.9.PBo?Ychsƌ è['Ɯ~I'ur&Ï:nDzz*cI}渫.n7 c8i 8?M<}z:xPnOdm7 ^`Rkb1-^x镧 7L|׆akZ!f~}wMh?N|{_R&o$D"~Vffǎ,y8pYLfƒW׀-_q-_ѯO{yߤX,fuk}U29ϤZc&U=_;T^^h⢢e+1|آ%K8թvjkkTZi_}K?fMzޜ7 AAl߱swߝ}渲={v.}5kוWT4NH4ZYU%UWUO[lڼILO_|9;vxmfvѝ9|gZr9@4/Ɲqv͛y@8sǩф}ᇮv~lg}pğ@;w\rooQɶœ3^ 'F?rMF ?s 7]tUO?7ܯ)eؾ}ۏ6+  ^se1Gr~ܾcɫgHn׿9[^6bJ1?3]{͗_ٿ>oyvVPXP4N&m0839L>|'6399 +**v.MOK\3-% h1egٶM)CiٞlGoٲ$VTTR1%嗕yޞ޽=8|k`~e{l۾}يg=l^=$z,bRo(JIqaaٞ=AAGijEE)>,oB TV g3!Mƀ}ק+۶orן~O>gWii7|C>d{iNIIqqŁUbfwێIT3r(tK)9wSyvmݻum~dqQQm]]yEEuвd@?R‚ʪ*ݥe`S?52=? /]~3/GDF>J@Iqq֭ż=t y_z<~-. LsGwoOy=&!{y=r  ]Zڶգ_1bFFgi?G{ͷwfzX駟ʍΪ_)`> 2zIxsqj׶{O?;voثĩft[^^ֻ3;|E/⩴kۦgL}>(Fw0N;^y}#GRteO&K[:1KЩc|&_c@~3zO^b#u}[z՚7`ݼ_K.-嗃 [o7zORoJ9}40 s !;s ѭS=N/7ߖ_ģxnI$96ntxńK&?7>dC1;qtp۴n}^-[r'?FwW\{]~}[+P|<߲xܘ13z;qZFD>F#Qj? IDATᶎg&9g|=Rr>;uO̦MXK//Yz5W?qcN_fAf`~k[*)ٲm[ދ=xѭkŁh|=q6nw ļoSZx𰻴4q.뮽Z}hl]EE?3a8?[E_yu6d]JJJ! /Lffơ`C(T4̝cǎ6[e7ߺ玉It)Ɵx-rrZ4٠q/:p/nݾ1]0~j     G&ܝv5  4-vZ`AAi2gn;={6 6e82Xw"  (T7͒.  \AA92A_AAAL,_0H$B9BѨa?iAA9 $By41`(h1P(iƢ #X4   b3DMuKlaq"Sv;?z;m^jYi.b'rVv`k~Xlw~QI0 AAAϽ]}0 ӪUNvvvvN]m yp}]-P(HAI2 ӲrKAAAb 2tץbvj! Z'j۱Uێ@OAAA{^66{J.[kTݹs-*nܢ!  g3>߯gS/c@-ݽeܹ) -&  H7a˦s+^Uٿ-Qѧ8׵o>}͗.j_zuyy#׍7~wcǎ PWW7gΜO<1''3wǏ?qAA#}"íMmJ7Quٹa_KǎCd߽͝dz<|ŋO8qǎ#<ϟ?_xϿ袋ꫯxH ugs;ϰAA~Zp{Eߒg%@MT}}8͛wu͛7oȑw%\FcآE^+}:bO?tҥyyy_~ѣ'O /={u4QwN8jqQ7AA&+Sc܊ZWڲ']4&&!~rԨQ999;w[[[K>|ƍꫩSرs~yq{),,lذa3g B &׭[w͝;wqikR`رs̹K.s̚5kBPD%rF_:"Z|AA9Q'oF '{sVnr9f=Я}q:WE>{k-v_}KoQ|ԓT^^UG}q޼y^{-ͫ:thzz͛)W_}eС۽e˖b5۷gee ڷo___gLޞVPP JKKI%QAAi5<3"oٮY[{u-nDҬlvuwپ}{~`ԨQӦM۶mk9srssdh۶muu6޺u몪rq֭[CPz*J< {RIAA7XV7?ݺڽǼ]Ur(8,ݾ6g7l-uNxGa9oYpt2lذnM?C.]zGQ(/////ڵkϞ=~D">9#$%D%cǎ>ܹs333333  ȑʟpH1x"9tq/[qM~ ɦ%κ>ҧUǴb-:|ѐaǹc=z޽{G%o 7x㧟~*))իWaaI!3fX~}III=teJ?vڵnݺvʔ))4D%555N8A   ŋW9p۹3  t޼yE-2 oܷ-]{s6O֯*: [r;wj5w޽{o>w֭[jJ{_jH  ^sqUK/MvNNm̢O}"u}u5g^recIp ԍvD޼z&  $F9x[/?4hF?)ҫ:pxJ+۹/֭Ǚ_RAA4X?1ϿtԨϟoٻ5W_ߺMgAA`}}mNK[{Ň"fAA9RIC   ۨXvƤ   H"om;#  HSb7y@AA룳   Gro.:  rD|ona%]onzZ$u=w6   Hxon@'ݘ<<1"㟹ϸE\w ! 19看->մbqY"I87liHPHU;QH)Uu%{*0L(a;.2SLQMXRQ£s45J`8Ab꘵ `p$S;焊ĕIpףH,##z0܁iٿB)RjvJ)0skH͠(ǫU>!s'_J]c0ڞ{MqNߡgo%jc 纭ay8BQ c\5FIfZK_{5C!HX<_2~ιu|,!L>aCn<i51^~z}vS{sb́Qܞ%)S8 abQg x1Mj[Ur;= bވ ιii2kkEXCB ->3n6|]V[|^idϋP;Y:DE~2nTseTwٻP'Xm2=9WjW<6ݦ)5z0F֗x𲁀'f 5eUKB ;*5KaKL`I; [uw,d.HZ}Su&ɅYc`ͩ[kr: zCsMID"ɔT!x JƘmH87 A87I)n&C͈ A&̞YPzie,m  4&p BA?r=8\ xy[eJ nSE\tv X3/ιwW6wk;q8eX&p6>duUw7&SuZ5M,+Xkl'H4ii(~.[LD\6^В׵;z;Ji,`h69ZS-" ĭY܄Bc"{ 'ل|%Y0Z'S_kaXw$2܏J-X,.iܺrZ6n %quEIxj86 s9}5j_S!i uϟ,!!^ ߲QESgP'*qnJEKWnpJ,#EAHԘ)d5 v_rD‹"F9Mo&0iD VIRij DY7͒h\UV۳`w>Epsy*PHhOd U;Z4>!)XCGJ.n꛴Yk>Zq9Z< NQ$bRʈ:,ԏsH~0{i^3#ym 'd8b7pb :/B.W+sר%lP-J_ ==7B$ ag&&BM-S+#wjN8B noVl-iFmWkΫE֬iQ+Xb>:Hc{/2o [[x܇Z$tj^˕v^Au\ף{hgNdlUWjx'y~Au#pnrY7h\^XdqXc@I  b3M8r%nocP(~6W&ڵxT̀}6W0qz ޢVw 8-MG_D"(ι9jn^˂Dd8I.bX+E#*-( MnK/~[KH[Ji%DulķKRkvZj >U+U%U߳JB ι{󳿩SYt4ibY!P"إ^~Պ>=0;a {}5hy%^W8pUi?'%{z*!@a`0ȁ&7"򐎬?';CLT 9䔨:[$S&D3~fGMh3TLA7!Vzf Gi&)jWd<ĵ$Z YSU^Ə#vFĝԄ)~G=ic7sLjS9s$I+`S7s̩C;  ; IDAT q`b080 i? 殀!H$,J}V0 3L" l% zt?8K3cRj-"4|cBԞ{F%W~!e^ue G6g}0sEPF'!&i9J<lb;IyK9rrkX ~(7L;MBSLfb1qi# J3Bx[CDR(;G yʈ2 #& P 9CVNwppݥ4Ci jl+Y;%8D,ӗe Ts!QS ݎ+r9ĬYxnȿ%5D`:e߹e(![ҡ-|@<4Cj۽k"GkZ-ԮbX@ !W1T]ĉ69S-{!!pnZcf+J@#buڽ 3 sιeBR=.*5~N웄v57VWK) L(!DG,0jr<]_4GCpF)OՄP0L4B9,d "48q% QY]Io!R1&߯x/nw D+5Ɋ+$+d8+:Zv{mpm 228!+{U6_eH8,51aD8!sFk.QqT_T^o<_:1ڢaus @**wni= ǡp#z[ 괖+SRk=-yX*V\^.k7޳4$D篞2%UɐzI3!D+inܩ QSP iD d}cī5_ aF[ꝔgH)y'ۚIjZZ.dN֤TJݵkn[[/F@4=A sITv.~[5W;ˮ !@}aܿ\U3uނ秹/ϪKxɜU}'PrI^-9ݙɉzEŭ+; hJMLZ-S5*J հ1;~\zhG+Ʉk/&֌I-Azva57j<GO!#jqTb#&d<q>)9_=dVێւd?ǯ鹵%v|=FmPZh ~0B*4dLS} 8u)!{_oBϫ˛5ԝ-1 bIs`YO $]8\G}IW5dq'}a}8ra|&"'lOjLgׯ'pk)ބ<>U`\֊Udx|g~µ}nƳKSk)'# 'H|O%u;\Q+ ǭfO3!x(E;}gރR9ҺLy1ņi&IS }nϔ5 j&ow0+!Dlv| zFD/\+S/-LMIdoWcH!6P[)}5>tᕨZj> 5Ȉsك*'W1%FY#:Io"vAos JqM#%, >6P$?c=ir-,%xe_$?x_w:"x鮗p4q%ltj6̶c}w;Bt-!M=#^Q~J_efq=w\vҞNWC8 ZdK Px ~h|Ų#$IxWCqÓN|Mu7uv?^~\xo%Am€1%$H'*{{Xōƿ\ÃOM&?NoW9y&ڈ E-2 MC%֗Rj&WnMs$9W_O[.kGsgi;[v]wzy.x)*_2Ȇ-]Ӛ=!1G߬rc a\v"ZR.@:y;WzHqQ朙1+!*y"RH56uVUnc7Rqހ3I•];M'92YU絪WR4ԅ? ]dίTު3;2ѤVb܄%)IrB:~wI3tn j*6!P@￰`8h(޳d[1!ΨAy4F0i<'͘>s FsX1n8@ %$ ro$`0VyXv0d33rۖK`ƽ:Bh>P.[Ҁa8(ɣx(Lbx bpV@ Ð(<3S͜s{ۧtl~5c0 SjUs / zNr '%XܚȀf+2 8;ZvR콤6╇د %Hܣb-*Ȗvψ5+UL`ѭ̠A2;)̜5ue%rgƹC !brڹCNpvwccTW'BK9]zс&/e }rPfѮnbw>J. VSIt8bRݍ@lGo>{:. %B NhsWiѪg=jߦ![YgSHIFx%9n% &--4ؾjYT WXv~(#<5Bm}#cT^=̹y X3kAĺ6HI\mt}2XH"DgJ}/T@x FnYG?=3$W2U%8vJt<{ CzR7ET}T,@( Y@PRHHo$y'OsevvvvfvwkR8w٪zUTa{55GYك {<# bcqR#DOi朇ׅgeU)[Ga[aF5P*LQ.(bsz6'bbFĒ xaFn4ǚ<,4b~=+ &$` iX%%;G ৕ha1%B{UuܖgjH>ƘEE*B- z1D7U\җ4$򍕀zCI+ Ҩ*Eu͌ X瓿#IAF5* D-U֢JuliSVky>(!Ƕ)[->}e]tcC{}Gm*"j>$ 06u`&P9Iv XE I]mAU;ŪwTۨb]4!;qh6HQ`\5Y/T3 sa 1Կv:ey`K~ilz- ρVumFgKr@`PWVa6ҰUL()s]NT>$?hHD%+Epd]ZugUG?7B!&GwX+-`Z񃴅 Ha,V+eG&R5TӄJ>`1*2sq( DѢA\$"b*}G׸GgVF\\^;Gk>/V_W3l@W}IEAÐ-WM¥S {kS1iHwGEUy頒KgdlՏ #Y"yzYXMPv,mtC䠰b@+LJѝie8XxArKAX&*fhq W_MZ~Ա#@#d%DH[M1q'UewUFQ/ 5zd_"If'~Ɍ~Զ+5 MV*F lEf=%?_6z_ut*RoUJ54Sak9~`np jLw]kұ&#4zC lZ`!18 Ic @V@O-)PvHkV&rIEO+Utct4]9@[)A8!A,Z83~q*p5[܅˜e} Dv8~TNSOZb9:j@at_Fv9s=Nt/;?Cꘒ!ٌdܞZ\CYJb#eiVjHZj4 -UN!lR j Uv:瀩mUU!ڸR0[0F_gf}cltX\'*/)Rʋ(,עtfwv_vFf%NtjzS1~}zF0i< Q&dzE*/o.tXՔiPE IDAT#OdW_TTrbsjܴ^˖M}<\KKrrb{LrY+xzz ?H˺}atHCǃR NNN&q21QY4 n9io\KFyC֍6 qq񬬼l~4=-;9~ii@/;tjRsQaiff֕yy4(Sqfcc5Gr?rE#4l" s~+bAQ X*+@:jLX!lnКmF PF  ڦ]!/n6dW~d7HT~5A'e\W_5v'?nsϳVbi 0Ht<0ĩ7E/?̩vOְX;WKwieP fIRjiIYX&Olڼ-?;x5wl..5tQNN=} _r8o0yuv%¥ 1~'宬JMCcA$~'JqȜJ)$I YHPR,z`,رAPY ,p8 j rՊdZ9N CARsV]Kk*"2I'-א_ȞZ@屒Ug-HBNJJda9ʈҚCЪK^XBJIa8T*ߖ&ǐtp`Ǒ}9rȖ# =0x [-2!N,oۂ$cKU""ըrNzc`HKH BгTV9g'LEwq:P;V>yYYCLGqbAGGXϋyUf\>!0ʒL 冞H䞆!e4ءJz_&N"$_%!D ]lZLiY5\VY"+2vTDITW:~ x9Hq!2P` DiD^zg =cg+b?`b\hrvrrAYV,Y12ZKBp b5^ pL: 1QT|u{fǜD:EHff$Ǻ!V _tP!ʄF []I;IYm;@_̩C:`:s:6V VPȶ}=Ř-WW*U gO_QC!ԺMOxCW'P;8?/+} 7 xxӯdCv9Y`ڄ;qM]KMl[k̕f`Fi_{fHSP ^wߞd+^7YcA8.[v_}îa4J c*iJa5~!{VU ; qUJPQQj59Zc,` B@ ږwR2ci4UO}hATne`zݺnae-^ }Cb${:2UXug$=Oi[ kz ֔@Rb,pՠò;\ɭ\ +iijujrU_䳶|#CAyJZdRt&?DI["Pъ1$ @2"F 'p8RU)9f$YUUhŰ-ekŨHdԃZŠaecµ+hT)qv8QESvtS_m葿u2&ԭGU-{;JK][6~mn`+!D$QІ&=$=T1J[bY9 M@|ԒVEp Ђ1V{گOFC;9H?(U[U0qD/Gt y[A6y@䜑JU3b79Z(kOES$`\jmO _5&#<ܸo2:5cϙͺ3*W?x1UOڰ'lCR&lnK/xyEO9y ZVhҴ~]6飨Y{b(M&~AM!U[4%6&)m%ӮS%ߪb֞=pR N K;4lioO:E-@Li-Wl'95JL}zllFo%)U@LOi8FWf- ݉4$ !e39qqHZ\ft"SPK $vջ&`jOX)&f}d[ XU.ؘ bMUU?̮-DG>(K*CuUK ^O"8'aãY*[U߫yö~EnzJ7VjR!cT4&fa05T1 X{8X `=_Wrqq-\4̌%wn3/璗sNCEͫjVm Fa9 "Qd12B@%Tjп⽑ÉS$O¡U$ ŨApDSJZ'f-XBSfDVxVIʇ|ƻ&1Ї6/+9r1 QV\?8a5jQZkO* V~*Ѩk/|lE @KM.A8drDfAXfi3)2T]gFln3^(fT$qDkX, F|{K~< JP߫$ɘJru* Wunnt mP(@fVI^]ʾ/#w"j 7iVnť,~}Zzx}t8l[}Ѧy3Wt}+_~Gqܘ 'SV>IJ;o~KWUGJuq͗XE-ؑ{vѻ8Ǿ ?IsZLJ])ƘBS_E }ZrW?☁֦yF w(b" U3\J، F?SrUwf3_!AЈ9;mvV}b>zmԋ|tm^-tkYU0ݧ'Oz{EtYBU}Ii˜dP^'=fϟs᯿ >$kT$BUx]AI2iɈȶqO6m%qjTPv%}G<;[ڱ8NV㷺J$ߞJd/R'>|-0Ƙ)cD6sjEH#<">x">2-ц0T֮NcRu?\__a2)Jr} -8 VV)$ Db8SS"7ҳG$.TBCÁU:Xa= f+MVz_t}jKOSQ{RqھMgݻ-w捝0lúm`Ftv/[ 2Tod9. FIKwXb%X_wDGP%2kcT(l2l_g/p\r=wwJ3职(ȫ&9nltWWC^ Zl)**{W?RRRLJ<ǫ߶ ! .|r;RqQT)7"1NX)F'rJt M{nbmKc#wEl)..wѥ 1:YRRӂ O=foz ]EMX0P&;4tLK:rL0 V #JsG2NB9VD  _)ayiupZVXI_2`+O{F3{^Mf LzYU6 >yh`MZXL"?=M}]T12YamO=|yorZдqx@@o֔u3Eay&E0#Q'%eťlOW|;S?şU׮$~pHwnnA֢\MU F&on@ëщ1 ϧmm/oLϐO c\;$N^@u` V#YE;8Sz"7Q?W%Sf\r\l2!wX;|}J\Uf R@ެoO *6Vn/*t.FĭdzYT^LMD+?vK@0Tl#*mp]( ! 3dXiV): !@Nt;K|,Kɖ3ͮY)eWVe"C z/[Dџ~Fc1n"ȏJZC @j2{s\('֯M>HM{y]w/=O"cܤiW1|r}#1HϢk*fnب[ANule4Ei֕#KLN$ ]LJJLGt}6Ũ:ui7Nrcc++A ! ` `%z|13yL PF1p8+ʽD=H@5,>!z޵_Ӕ5CԺwaVM]KKi od'40sSn*\&F2*I"~U)YtSi &mD%:B[5%8sR@o 9R{l %gwa@N󋴈:wew[d!!VENN0Ir`ɀ9 ǚҢu -ywMK1ɮq熏9߬]5mk Rbz 붩jgnPXXd4{ab VY8`6|VqA"$ Ί'HMIB{KnMA )[R0"Z7ܸqܐz+_|&&ܵb8~hMO}qѽJ:vij8G L &J)OL=VA%a,*ݖ xp YblU(ǒֆ^$OJ;I ,f\{1&_$V(;Ò7m2llL$4 B6B< 8Y@!T|lS". qV, ,..Vc, Jb1'޴u`xyZV+ƀ9N}JDnJH7$jsivmkXjdH8hl Qb[:,w ,?8Nn; CY[A{g\MF@?ʮe1YOr]Ze{N%jД]"n3!pp==}{wt4HgG25}xH(1}G{q}^^^|+69F||ʊJyu5k˴ZlFEE%$P ÍXB[f6cɦ$oRegoɱBZ vl;?;unsLװQĄ۹={ޔn;y$n޴3VA5/|9k{7W'۹09,UVs'G,).MaRȪݵȅKнF {\{Kvx4`5kn;yF'lڸ#A + 6i_Z̛)iÎ[qIw)4iQ̍[U2sqC}|dfyxsܣWOqqiLto% g; f%3"utusM{qp_۶ZrQ_|b޽"_|%}uuz pOO{{w&2wځCOԶ R@s'wُ6&q] I]hp`⒒ϿzwM(coUf &G,))s|}MXmڅ'EUxـPMNڲG^nALtuffdoI밦CyGK=ؘ3Vj2-Byy$m~WffC[D*))[zk^1mzձz݌M޺yR]ظV0՜ lI3c)u{ڪjN]Z{y{߼ӏn'_ѣWgΜ=[ώ{vl) IDAT=5}]z݊˖I?YЫo'KZiъbƌo`x͚~23]ǵCVݮ4 V3TVZ\[Qc~U&]1^T8bԬ"|0y 5Tx&6aW&;>h?饎Ȓ)6^UWm`9_Շ*LH϶ h!«陓=ySNNgiS# lҘ$ZO$ 7v_ٳwn5k|r-x{ƌesf,O9ck^hsd ` mqւ/:qçӟmMλw 5=$C6霙ݸg6BK^ڲi*`m6,,*9xNN^pHfǎ kӬW.vEK7!tȹ-AQaɡ'r3SQ'.%|e&u...ݼSW7>|\8Ekֳw}f#9㹅stذE>V?qN{wM8fhGq1IVV;_^_6jv贷[?rus9㇙>#]%>%œm?~0s鲗~ 5[86& ҩUdT$X5:VځXU}m@_`MyyVvV8 # L-X\VBiQU-<&p+ɨKU[ngaI,RUFꤝE%in^f$E6u蘘f͚UVÿGDfM$DGi XU)\Cu>z88VƂ yIgGU~/?[O:06~Lfi&5xX{5k@Ua΃4ǩW}]qq)yiu!@& WZZLuFtp yc6QVTTތM )whQS_(FSoz퍙)sO;s `~Ѱ~ii}|^[3ģ̧O^ص%o̚2~!Y9kي1ߩK^}"n'k7h4~-F' $Y89 w򄅉~.=q<7:t Ij]Ňj$W/lּahƭÚ̘vr&?~܎}_-zS:uҒ7Ӆ0WO|̜jWb:bVѵ_ŨB xdoݪRgӰ^'Rr_@ hLbqfcKЫBWCU4b;a0Ž Ma$WMy6!dR)I VVC],NgT{83aiA/=rp905bXH!+-CCcbb={ֺukg @&VYP5@&>111??m6Z [ >#^pW7ֽ״Zo7_CtД?ߵw1 ]F^(EJ/tt~R~ms1\c*baa =WYi>}'|u k۴CV0 }E͚Veը3 EJVwsWģR㸫c,KDd5hnXЕ1sRGm Snggy,^ *$3>~}zG}M|;am[xy<~$|(,J7=5qvVI`1[b#v$[q\9,>wn޼Ԧmt/[I|l@A=>z/г-1 ];xKfܹ ]0M[ݺﶮ{dfdE_7?mn ‘O!iɻ$FbDvAAfV@__oG 0@d߀gg[,ؤv^yQŽ䠝B$WNN.̞7e)Z7~}úmF'"7'3y^zΣ[wZ5 V￝baP{!~;x򷃧{w@׮%dz }C[5>{jIq'8!ix!GPV& !4+={ 4G3wsxg~~+:ui㙟WA~-%z 4;,YEu17{w%mպ' \,..sv~ #'ϛ+Zq;r/,(&=v ʄl Q/.v7@bKv6Uu$kz/1{hRQrpщpq A^AAPZeu}(>Jɛ~6rc @ B "*F.tHct}&hc٧n`U"z  alz碢mOsu葿 Ddq֭'0-[55zȴrt{ FZ#Z"(!y^Fm>Ҳ<%˗򥘹 ^ lQFf = `?OOwOOpKJ֞xG%+=xFZD++L"ngjqY+01$"q1 X&1~> ٢?x8hLdfd@Z?nع|앫c.FR#?C[l`?Q" K){3616&ERFn-,JJ1gZ1kZG2=PJ@ҿBd2xa=7vvvѶg·^} NBIx1w҅](t jzB4/U~Yñ Ir cXwLz5X#Z7/*G7=PFS0bJD QGrɊ1"ē$~.+S3GReJR* N_tRIVѯ\zj֕c |Dve:xx v̬shm.Jķj_\\y޽rz_5j8;;7l[ۑָi} x߹`[ﭚ@gh?XPiIYIIe$ 7M9Msev ϧyy>РQKgʔ0S yUFM߼27߿67O2;vp Lyy$`_}>߶m[qɠ 2L)XX͑\ Jjzsɨs:t hwvÂ񷋋K#U6,$@#ǝ]\j@y:+-(,<++.z S b-ѺR}1LIK=Nv>E7;hFҬ}I|5۴LX3܅ W}UƖZNm(@XoA{ ͕le)c' 'bKn|rGm' C PMt!RrHn8<6++AlV+1.+#HE'>Hofϟ8iU|SҠa>OjEEe>] oҋQ0g/MOrK|0Ϩ2J< &1ӳ+{|Iq~E ߯敨h%ɞ=_Ykft;n~7 L-صȽ;i#^ݵ{۾`]ݰl}H`0 $x=uyU4z FnY!JF_p#(cV !fu}`li6Y`c @Jě19_i9!clH ;j`6'D1s*EdدwP?jW0s -~tŠ2 H~}Cۻ+H%N7cšGODSlZvTfaS>yLqQ!?Jg+CF'8շn2v2\DdCN>(KhF9''IfE@o޸q8:9@!+bH-I& .8S1Ur fF-% TsW>CǣZlIkW:v8ԹK[AȞ~rQ׵BJIqg.PVZU!JKK%*\ǟ%9]Y%Yj;8i5- L<UAW)!#4gKk8d\}b)m=v㇣_-8wpsC=z[,H&@qgd`1XJU|$ |I01Sc[0?J1ݠe.ѣW'wWvlqtt;aHvV9Z2H=u`X/]x /'EPV:{pvmSf?ϱyu;Tէ_Z*55Wnn!<RpVi\pZ)eþ 5KO^a1xYLUs+;+waY>_d*0Hu}ޤSPV֥$ Ƭ]T8P? e@3g ,J< qJF|IDB1 ժSIE`HJ]_eWϧ@ޏǴlU ֢4%Qveg }}8^q% UcL2ı -韠Z4%i.jef<値?@ޜ;sXCI;@ 洴'$ގ ;q&qOj4{ 篱zrX%$dNVȤF1$UQ,2ҳڵo٪u3._E]ݻodڶTTTEutWld`ssJ6K6_w owj\aa6fQF0RӴә79VEtzjQ]B 99y e' D|R/ 4,ǙxΉC&۹yuyӶx{{̩yO`rs 5ǞW/$kWbHAM,>xL4S72ҳ$m6[_v-l7aX o/'xr@Lb@ J2iϘ ւA=pO 4<4lTc~:HwPHR3nq>dӍ+9PTT萜x(q\-2̝b]utC;3&3WsZ100!q=s-nzAAqHH;= 0&_z!ts9Eupw[AP:kxFψ eu=| w CB=<܊K^O/"ii@&0X0@%oEG'摣6lʇz q(mlyҒIxO'8c h0SEuD5c,ȮkNfo6=9 * $Fb7<Τ14ҘB H[I%1tTI x 0O@z+#Ht[yAX, bpL& pD.r8Ld=,rF - s CG~A< *7.,#V96TS.6Aj i*ԴA쟓?{Woj1yBh׎?hћcOC=} ~y&UIS[BMd-&&k4'BN[LZ2}prr 'iz ѿ&MTĄK IDAT:O߭%?=N8=pp/~—X{F¨[/BBjkvN:6=!c&LzvCZ>}mmxn^VR\}WO\'!xj=r 7B%Ӡ=)x֫IhuaA7T=}dz{MfzbI5lD/ZC[6J[pGX '/QBevn^[UژP/(PyYTb6+b-}N+FjoHژ@Zg٪-Dy$KuVqȮɅ Gp#HViۻѧ7PG&NQVV gD?"!Nv-7Ą ota;FHh0dĨ~+w"0o<++?{j\]kwRQEEŨn|gfltR/}zD6w=FYi(cVN8ݮCw}4;l٭3^I9G)+gO_֣[[mqn3jL?wW~1=]ۮp_?fm?s\-!%)[5s ԴY߼sWNF# ^寧usdK6vSW޺ot.mN"5阌2Hڄ~qlT7qo#0FtlH. -`1WFIc.]o4v?I=8l#'y]z&`TY@9ަsURvf~:lX՛;/!_Ii(HRqz/zq>Ǐ^$#۽AÐf #۸ cQ}p72s**nn۴GN?n+ՊT͞/`ʄELޣ} olq9BvV .x߀ܯ(Os_8{ԑ/,,,r)fnn1ƹ-5wR]^ѯ-v݌KҚIgN]q̜_0'7wrv}omծzmY1J!&bL s= |$!00{ﶄ-Y$K][{{ڝ9sY]!;̙3?x_^s-oik瞯}黏>,"~Cyy_q={?~_;-OKz{uӻ5 x[)NYdo׮=#֝ms' Z9C]o]:5?o_mrLT\d&{G>.8.z㍞_~O|_C_xeWo׵k:nuWXxtUמ=LoO>y,:;} nM/v_~O _X]j",9/:yʴ}}=7]gn\Wwܻ~F,IzU^z>/c>Ӯ yǟ6/M_- ZZr+WO} oq?|݈==GvGNso[E6r'ok"J{ֲFRtmMݱ= Ͻ KN_t9Krأ~ ~;> ab S+Iwysmsx==~tȐ-4٥bӷӲVG'>GsktFa/%7>z~eq\]~_o>GB'UQ}p !I8Bqi]R" V+CST_uA8Q:ج )Ӌ7*T,>‘ûG"lLLXr9s9:W)A%{0Z/C:xU~cTG|sCu{BvX=@ef'mlAXw^$}GK=yCSj[U$bzaΆv8aF5Uo̟+[Ru)IOPiIyIʿm1>zpp^._*.4A+^\ϯ||)?9e};w{m+7[Y(SvK떜8ąvl߳y}uB9u;bρ 0)ѯUs,߷S|LYyiUj:7t<~%cBY%%6Dpg(FA_Ͽ<;O~kG3'GT⥵QCNuk7[I^DiŋV7[ߗ]"HD|Ib=|ë[^]I%DO^ܳ!ޙ,-[£?#E Z8C:%7GͅGqS4G=uCs{͗_cҲ~T9S7ps1]/O RH!,ҖO䷱Q_yIu YBuW* 9BP6#C+ &r?XCmo.tKDJUj3x~`1ヂ5f59ĶTFEQf|i0/BdD@@yDfrsU>E |LyGj̃Z͠ҙ*pt*T޺!"Mr 0 J2@@0>vRS$1ʻ%$}$vxeq UVvDp 617`3̅j'Lפ*Ԛ/PI`pQ !([&dHjOq(Ay`[_an؈zoM "f,p$y DB/ U W 8=RS*['_ZǷ,b]нmFh A*IӐz\A@tsDL*{M򕤀ʕ"{(bdU #T׿__U+zaDBHy~1sh]c?8YD"`TazAB%z;9Np'=:Uc L#zȴ9Z٪X´QWSPUÅ@y!*I!^>UĊʂP_} Y "t/rT?dhBpRDdq.bu#)" Qbi~BQ?ObZm-|R4FX՟;9k5  DК _Bp^VY ]ZBgk󽿟6ǷnWu7LWu}D>1_ I//ʇhqQnªV*gd]5,`q A%2'3P*iwa/ [ ŮM |Q'wf#CQ $g%9 μ9!*5 A2 DoA?'ݗC8.CxzI6 }b ː"r9G\DgɀuNfjWxlt!Eʒ jtI $҃ĕ%L儦Psyr 2PWo* AB >,nz!ݸHw%9N]o=ډl}ƒ?|Jw@U{&6dSvL~\T\v}/o]x'KJbF!9DpG[U"QY+LjMCpmSQ嫇P\XB.82AJYd3GpGaIX꜑ct~;r74q8)\D[T@PeO ] 3QjT $s8dcxgIgho|$` "lܨ zQk߼†>*s Z7\&b.yE C aLP@BKHCȣ( s0tVrC ;W-.7iKh)I YƃA BrSÉ i3iV2@_ADgwhzX8L^r֋Ke~J@z˷Mh.}tq CU4;q}LT*~y}O~c{{.;%?]w~׵_r呈7n?г|ɩX9 "a1f?l6)UM`"g}>G  p"tyg#reY, ].3h1L#"2@t1H8I||C0DC@RuNr"o ȁ%5壒 Z 完 HcIvqQY \8K(2eN8Jwonb>LOڃֿ 3mZS~L bK[q.X(Ĕذ#tED[8VStg5^5!W`@jpqLjt@O7(D簐^n2ERLJ/5O0k^[15dG'^kRIBSFW?]kD@"ȕ}uvLHʁk_z.o/}?/k{?T*msL4vLMP۸aڵ=4,l> Riz€} j4ԇ$߳YpMYqJ+ WC&hKӈhۂ1΋eXNUbl# J$'_LRb0޲qB̈B%"bȤ/ ӛHtz_&܍/rpdnO@X(a1n >[i>suGVHtx+"p6Y IDATA7o%"DT;a`l׃uOJtz apS6 ) !o#"\$B\ V=AXMQrT>@JB˪d]Um[\IY>Ց=KnNDqWDBTf,`ݟѐ__Ka׹B?qItɈ%Ap D@*/xc|s)I{FBBVz8P'p iG"2baU ~|6A!yBPWCTMJsG5O+&VDtGf T}Gc?XB!JHQTLDްD{MdҞ!vQXy -YDUW 䖘;$iQdP{OL?d]y{!j\s*fM!}aʿNo跻)/%HHi\sa62!).Jʈ^G]Aa˛<)˱#iwhzW\IcI} H$O69+)/2䲧J|Ɍ n@@uG´]ZU^ +'>^bĥ:OtwakQ|F,Ă?0$C.[_CRkbA-ln!2. ,$Cdu7|*X_U ,8$h:XnBrܡVr/'#WX SnZsQ2  dmP6.U[zK4\$LCI+"(x\.bmsjbDVB0BG7m#zY'k^LLBETb9/F01wqUc+bwڪ5犪 p>c4c倈+]iHJ/jVFtWk*4"* 2ĭT>07΅*Bp/3H]A0֫@yء% mXK(IW}ͦ$|,Ǔ;%t!{AgUT5jR8QUFz1R%mzSyGӋ&Cؠj6!ksv/%MH@LJΩvU,̲Zdx>yl˅<<}[9CPћRcPstUeycrs;I\9R>fIsTsU"ԅξbHs?E&ӶŽmrlh y鵠6"0'2O106168/;_U)70 o~?w^{65zȱzZD Qs>~?}h&oUi"24g3ɃJNG#g$ DwajVڞY5DsF!#%SU#!GF!\"pK.C#6L8fN!nоlKg޶f/2A22BnJJg5gߠެ\wyoY/K_WvxomF.:O@q&ٰB^zBڟ)uUDUMC0Juđˀ Cn,ܔ#ZgTvJOb}3)RT _Om3ά.'(cn:VbDC)nXU-DBJa䉄n)9&k̗!p6[BFR=@ 7صbUd\ rCH p Š4Ktv}B}OhADhM*xЊ4пn&='R7:6 6Y"GG^T1 ?)v0[! z|-ȏkmT}ADi#ضB0Msy171t!mΥ>}NW}j:ip(3Qj$,|$ M$ S1XjU=EMm 8~%o5;s`z8CyU\LBL q!w<%B["i 37h,Z l/y-C"U6:P/a\RS:a2"G?4aR%=qeҵ}[(s\\?ZP2}zAքȣ8 6q2}@pVfӯ)8r銞}? >ڣ^wA Ĝ z#\ Ֆ2Nwf򴽨+)bBGjHO7/ȴA=.~Z!n$PmCS|#""G6V=3gwVf.Mnr4j<{8FR84rPuFr}Y Iqʞ9D(ēT'v r zK}s9N;gg=_9O)u)=XWW+~Dg۾_m' 2iif'8O*©AYY|OHUM%TOɌ{!Ϊ)_XI+3hY'j In~ J^!1L1ݦ$@OP&+Wp1"DT_@fVuZ'#DTCQmyi!3yn>% ϓ8Bq)GQyjZLwug8/@9K4I['VI`oS6Z@;yTCJPk_k?\_UvޚJՖ NY dq3D]xpkXNB/SAE֯+2r) qf\*](S[9|NϸqerJBWզ#'K@ɩԝ E!RvtLk 'e0G"ԻS|3zM^t -y FC&kY,;vԇoSO,?eSOm_/~SW_w=w? !8\G }waH W9Dxv?b8ƃCԕfg=uP*ּNY6cִ-twBbEILǗ.<%םBѨ+k+̟?sϝ0aBoxZZZC(4(@СJ/{eV{#+rw||ٿvV5sz9B8'.Q| ׇtK>W5CU\|՜ /sl,>%KO^~x JOl n_uuS/|i5l?|R޵5! Vt_+ZDRm!cx9ɫUttM5uԳ>'JA&y{?;kgV\zŹ? f'w|T*{wߧWA{!uоnn.5 9<9;'JaDmhnsZ]Zhad6A.ž4bpAL͈cZV^s=`'{?}6A"եǐ-B:ӊj$jLcY[?Z!M65<n|xžUڑ#MM58̙'W'KR}[JBD}\?x"R^DVc|CDhr*pMMM~$w;zb)߹>)۹k>|ߟ/b?z#cXAss3ۊx,㏂:{N*;hBhU}{n V})dԵ7_w=%7u#za+bNW-BN¿\8hx[ҷ{^)p&xo_~ l /q/a~u!~ U~kՙLͰ&@:?}o~"D7'kf@G_>E':kSk``p". WrD kضoqƦi\.`qT"J,u\D(gV`+;Q[ۀRT׷0f r^u|_{6t`P*;q޼9554,_";qzؾ}./.6yD6ix .zgAT*o}m6oP3\{?L8@ө 4^8өH/`_tp*ЉVzV9K5|v.xt`X֤\#vj`[khh-}e"Gr=MMLr.P͹\mQ %ݗ-Z0PřAiӤ鶶y֭߶mtҴytKBZZR P_.bw}f4ciL/ @ij]xQ[10082^@Do45Ljm\.w56tObq@ju׏`d \_ߡQJkŁzlF*B[\ҨrOԘ1hQTJzImmszcQ  =a~uewC͡W/f=D;J\QT} |P%0u|ƺ7+'zz깿Kw@"Fr{@ cY7_8.ҋ[m.v/W`,0BpAe!lS|sp<  㗖3#=r|9 IDATi/8/Zh NZ̚5gҕמFqSl````````08>~ ̴O5p`ɾKf?qbd0&} 700000000x (N EQQKؑ'>d `%[JY NdQ٦M"CeA: 700000000x?>;(1@,pO0A^/Q* [ )* $_ĖKo````````pH}&ٺk0\ F|8A=Ri%ǍӟzI)lv#F;fLMMM"ڰq={J N[8yto5<3 ""I@߇m ?x3 jΝBaA[9䟢i'zs̞5K/aƧ}ֲqcڶ <Ў;,^|ҥc70000008၈8xt4B @$,w pԲwa0[}ضm[XVevP !-p QWW܋guuTd-s! Qg/[9_`̓N*y_9g_|*"ι"Q6,J#c @$8 R HR|#1$"D"CDpb. fwvɓs]]]7o^p!< .8<عk׽_WWw_;(Jrg,]ʼdڶ큁3{sڽ,̙sYg{w /]:+Wzy1G͟O4kzB|cy.Y=^MMUsd``````p*DёK=8 A"B$ F(( xpunsfr/X!~7or9Xutvٳ-:̆|~ʕk֮8a3@H@ "W-\յ}ǎyfIml6;{֬iӦf˫VRǒJFo{G5~O6u*?{o.?~S7wtiSgƏ3z4 Q(ѣnj4y$"ڻob=DT,OƎϦ,/0""2Dƀ!2,_10e11+,RZ2drOܓoQpB*466J~mmmXl| ڞ_s&y\0aBMRgP(ĒKKK \.\8wzf9R]]700gގ׷m۲ukWw .dt2Ɔ$e5y{ٳgܽg} )7#c (!\@D@pt82}@p^ f$7h'O?l(R|ϫ6y8o/m۶|=׷omllߟJ;wBNS/@D}P_mhh/hamۦOV__?kƽmض]__BX,J"xwЦLٟN8&^qO4)a6ձ920000008jI<א49{GA_Uaڽd [lm;J]~饲mfv2eJ&9cƦ͛[SS#LRBc ߱sgՈxYzm]JH:mԆ?yYbq}R+C566;n޽PɌlmmmizhڴi7n)'tH |0a„sN<`ĉtz+G=ؒI#cDBJ%wHRq5)oiPD@Bt:6Ic` 'Myu{wvN2mڼΙ#e3g̘>mڶ۟y&xikk[qc>,sΑT[!N}&"N^(Yvܩ[[[/8<믽gݳw{q}vTO= uL\嚚`K&a .~gwTAp{Hq|;oHvD\E(bYmnd mD}7K]AYa.PbDkQ]o````````pt 8 yֹU ȟ!‘ W:Gm!؜\p};v4EãBݏK%~npN,8$p7No````````fgAHXFI+\_쥒B&儃7¥P~"pL`ho````````p4ADBTlGC+r} nVxB`Z72} È>y~[S:?o````````uWbƀNY4=V-WD~_3)\nGs h̑e!b]@AH@Dsb=` @$pBiW'x6 ȥmwٽ?([o8N<,_i{> as X/Ko````````p$ Obݽ6a4.+jSq| {jg}]F7P69\5 \M.d RD 70000000082ekXQONeDE@Xh16aDCQsF=}޲봶 e ˂)-(J@y.B ך'>o````````p6/ҶLCa(c QH |cX@ض/c!󁁁z˲ܔ|>djjj ?BX_(XUN !lCCC0IQ !r*˃MUh!#J_"*ȗ6:ÏJ߃MJ; }e˗O<.y~ٲ\.7s̋.C? sf:sw}6gg{~wuw?]rE;OifK/4qb,;gt͛mm۾'OQKR^3Lccc|Is7u`Y%K~"E .v…a֌VlBc E0$кZA WeSP~pTq ;_5u4D&)Jv@dg{{{=823#Z!L3ԏjaBSɣ=cc3?;` .X@D6n|gOV.ɧRƏW*u?MӃo~GMR[[9/x @ss^vݺyfرgy bJl.NRSSP(Ȕ744 6XT*%5}Cc E0$ZA ×!T(&d````p|DolR-&Q}!RwK&2B(Ca _?e–ֺ4)Jǎc3};uErt"xY{߇ąoqϽBOOϸc^}\A464!rFIe{z{_{}#tBt6g9gJUW?&MGCCC\.Jb,9T$!*JnB&,* ŗh5! vR1pn%ރ}}VSC n`J*uLZAm&B𶙭xٵ5bIە/\=,]V}~KwАƍ{5GK≟qGC}}>jkkO[~e͚iS^y*#F,Z7]sS̜9T*"{3隚\.w j/~6obQ.>%X'UUC=XwG lMnB;v, D6W;*c1TP(풔1ql0v铊e'w>3c O}{GD""@}&g_z@?/ 8^v眎?"j;`W6@Ջ~Ns۶.| @kkFml6J544RIArP( { ryDssQSSm=atdY+v`a 9*ZA H=Mข Pӓ3&nljQzwn4i¹3ƴ61?{ǎi֌9("C@9_߽b0@\q}"s}ANȽ^'锈hRk?|߽OԤ]R$d(,RTKpVGd2 U^ڠ"67z$UM -#}C{VР?~b200008ހ2 T[[c1@m}MuMM?9%! OizXa锕M} td3@;O7j-&.vn=c=CRM:M;oѾٖ-۲ı$$d_hIۅ{oy6<׾CR  @}LgıUef?&B%q"gy<::sbtt uzeĬϰ O,t:|*LH! d} k2/X]y$^+gerV._ZՂ4gU ,%%)о7VxNP)e y93O8P/1-5O> YCI=P4qB.S0Ē Xq8aO{@ЯTvN_7v#. }biKT' r*EɟOR(YJw݂!StZ;yY^?vIN']C/iF<C0:3;KC@ 3 2GO7pzQžKxI+;[1zVu#BplĠ죧 3vr;!Gy">}YY4#FbX#bHV߹E-=Qx-r"֨HA8}? ##/aXe &-7%Y?1cJmlʸ,G'yn IDATB @$J-ddPQkJu!Ex]\@^0⪛,'.VxVYdmy̬LqLqL{l~ OK, #H. 1$^?,i xF.cslf@@ *c4Rf&3 +aYVHU$1efs^13C+ -9+*%+KLZ TwBn2,dWԶ>6< D@&cH!Ŝ0 ˰/pqMyԳ?J&z1}xbeX 0Dry2Y9dD2axA/ qA`ڠ>;~ЈQ2)(8a^t3+q\rAlO CtV ^07BG%(I!9춬$q )nw4:z5Z]Rcnj({L>XzE)`uJPԎH>4!H>4!H~٬͡G]}}:n@s&z+rԩQرc]}7xѣ^O01հi fY6SjpmmO?5V8p b{[[ۿ>6@Ng]}} UӱuǎķD,z-Vk[!w֭w~ԩ۶\k6oݱIxY sknj_n8zh[[[?֭[c O낅?q:V_Ǜ>]niaxc^o0/M҅N՝D?["еaדxۮP꫇~ѣw_yyyo ~߬^:|{mm޻xرc77Gn޶:Vp>'Xo"GۼmЖ۶:r$Gx읝o^^\Ւi_/:8s˔ycyA#ۯq%~E`)2dHjjjc㚚Bl6'Xڒ8- u8jϳ>;q-Q\pt&p .NS3˞h,tmXD=HE-qE&΁^{駟~g_z%K]vnٲ'eԩ /|_=1r>dHqyEESw"l.WV8xƏ8].w^>d7@"zAoXRSR<8~ܩgFNDMK9l6۶]=qYzZ&[߷OjfLo޶`02y22'O-}1?xlyߧP(*O9{Ǣo0tȐl}ӦM85oa2s̕+WcñZbuԍ7cݞvitmm 1߼rs**[L6r 2N|UUjv:u 0Duʷmٶ]R4s=p?9];w;2\R͚>-4@ wb-4j5gguC!㸽{F?bLD~d2裏nذoV*))yY]fͪU֬Y#s=g67nܨP(:::d /\__`0mu )qw~Ӄ˗.]|<4:Mg}UTL7Nܾkށƍڸ1T>3p@aEe)#rBaB X4a8e~o6VTV XDD 4DQ }Fo6m۹{wɴx|aJOر{ϝ%n"mrr,sYP5773 el=jdQ#FՅ}IqF!"m[{_}=dHD'"ZmLKdq> MDt){VUD??f79}iiiD4cƌjܡ.qwɒ% ӅyP:qdG%Cꪫ~a"h4X?))Gyd<#G8e˖:t92~Df"****//7mڔ.^KKK.]bXf̘QVVfΟ?_ZZ_JPhLIInT~ǎva1cG*Rzz:666>DRP ,Qp9 Q:̦X^޸Ss7 v۟'O߿?$N|8?~„ G]CjNKKo~SSS3i$:;;0\SS0/Bh˰ajh4]'\Q7N2wڵk?<#GGCe0|GAyl2B5pjGQnnn WNK,Dms0XeRSS7KJ_]g?[reoqOgg8/^19lOD**u:]EUyJy 5~D-zJeOV{q ҒQ`׮]/rcđĆLp8hZ 8oZ00a€6n8|p*j>Ν;n*Z,Ჲx뭷B17fvܹsΝt:gyoDT|]mvvn _1X5cH WVVVNs"ފm?Dz,VgG4?.>ϲb6h4UUU&JLֿD^ ~ ?q_aKEeκB!'"k{Xu: K9u: 3A"0;'ʲĠ`ζ6Ҏ{x&zdsfT(7oo9vQNĩxflΟ+<܄ddd>3q/A=wDT]]#S^/~D{nMLucƌZ۷o?~<0ƍCx.+V [oj#4rذ1FݖH ̼mSLpMMMMMM=ySO/*^Z7㞬h@ŋ7@f_I))&G8Yv:=+3#$OA,֋#87{^WNfsyEDJ:;YD0̌inݹ&ҍiƩeٙӧܳMϙ)VP2L>OQnNNuMm^n?b=RVZ%>}zǎ+V,r'|򫯾|s̩^7͛}>_QQs='DՖvq7oޜx֏Us>}-[0k,Dtdzda^|ŗ^zi---%%%Yz??o޼ ׿5θrnןIIIj<2[#/҂ ov1o5'ƮVTT,7lȐ!Q'˖ވz,Ne֭[vZFzWXq=Dӈ>{fh4;w$xꩧϟP(rrr&$ݘQ״JS^QYؿN)àWVU :t)n8{G1 u;56^Ńyܓd2){a_I)?ߏ?􃋊NgN{߾~jndZ0wy]1H̕7L5٭$.O$][\.L~M/.?S<|ty=򗿜4iRSSSvvvG>|cpBVVVבHժR5\8- rZ[[M&-nvXҼlX/{Ν;~)DWNYH͉X2QWtj9C'\ sㅃ\]TE-7qSFƻ1x@k•?Uv~-@vk[v^y嵟JRT*.\DO8YwRT\҂EJ2 LD9==]\%BtQ 6k7,uU8{ w>ܺukccc^^^]]|kǮu_];g!6'cQD}o\ѩIřG A2,-oXۥJt5g`Ov-F bAևd:ujho q3)l7^' 8}tuuuFF 6ni}toWh4lZfY5@ևdd7A Ս-]RS'D\ M҄ M҄bYVS@|{VWE+Frv&]*u*v%==:*AQ;ꦆJ5X8KvCxkomN5Fޮ.՘%ꨛ>$JSRj+86H]NGcCސ#zCQAG%(~Gp/-i JXaHDuneYZ+32UjԛS,:*A>\V m:J0@ Y@ Y@ Y@ Y@ Y@ Y@nD`kostڂ@\HIMK5f0 Ya^$ IDATo|z eUjM1]'- Y{;~_ E]04;MOu8lVDzADjkcFfӝpGf뇠=\iElwN{~:A?eYސ׷nwnNw9@AzBuj-#+ɒҤ22nHwB-|^FKv+z.Nz݊[<;q,+q Y@ KpBzzzqq-ZDD6mwqGj 8ԩS ,]44':;;/ZhD+%Kt}{'N~{d~~Jc˖j5}?$hF ZпeAw-II`S:|>֭[Vkvv6;vlС A(//j;_T(ׯ]b0{Y~#<;eYZ={l"شiSfQjAO~ARRR/^ggg={'?Iԗ@ ?>wyy`%"ZXej,|X؈JWTg}Y?T*СC;`{?3FѣwQWWQ__pB!Bqq;nXuttĩvرaA9rڵ+VP*T ?ߡ?Nk]NL&WTOKMMxy2.k}-j,7rͶCL喈ӍƱF}6=˾ ~0nܸ'OE)))a  |2^nS`ƍwOlܲu@AAvVf^nZڹ{wɴx|aJOر{ϝE}Ю={BŲ2M~{YZrS#tUUDmh<U@O)Slذa {dӦM#F )((xwr;qwuח_~믋zrssxkoUTH 555,Ɋkkk? t2xC흝& ̉s%NZF ! F| @DFy6geut-Jd#?? 1c"J̞=f>>>tzJE],^8xi38Qڈ:H zPi^oeu͠b^yޔy𑩓',+ŜXeRSR2MedeŅ}JV]P[_/ۦW_y+rA˿̮ruJѵW{I0LuM͡#j q,:erZZ͜>m}Hպnɴ`*3cڴ|#FyΛT>|Ͼ}'Oꓗ7{P%d0UUñxwl=BYYYZZԩS#H JuǢAs\r\ՄV *ۡ9Knn~A2 |>tb1gsҮm?gv׍  =ҥQD4gN'cd1  Ee,u M҄ MCPd@/ :B)i@P 46H2=Rr@ԴTczveygYF'A äңP5K7$!=R5nEw#՘s\ː>@ou5.G'nNOhlR:}ӝ01fd{G{ q_IJJ̖X'd}FK/BTݣl``qɄm&ܬ>RNv+ܩd7 H>4a@v:v׋uxD:(x<I]G2|c maYXy5 v:~?mӦ' Qq:`7# 0#xj}g5PX8ctahWVUq 18y2䖩Stn޺fjjiiiI8<&;+˘v|△ **bY6߯_Ee%/sgϮ9W^nwQQѭSѿ?lc֭n g͜Qz 8.̟7//77y=pŐo8a;J E3ŃO4eY߿/ΕLD ?eT*ՖmŤ fLnC5mٶ-+3sw0 sر-۷߽tiz)...;}ZN…)'Q߁ ߽lViko?{|PT/AJM[2e qz޽sf߽tZ>|],YpcX>e:.5%EF7ljt*E]]XsOMM-=q"b{FFFk[[yEEEeVXR(_?"*2RSSRS;:::;;,!C\.,,,lnn:f7x@d SFF߁JJ ) jjjħacL&7vluMMh5aCj"*,,lov:z؁GΜ;[zWl\w{waZ]Ydjjk8|m۫jcw}¸ 5i _=|7o۷JxD&"\dry0t8 l۱#=;+F߯srs# #uw J1fvۉSvڕ9yҤ!=˥P(ā"2y^F#Z.x ((7┹ꚝ{R( z^RT*÷G-!V`0-2D<a_WP2xРW@ֿrsrrǏG t,>^ETO =Cl6t8.;0|EE~XiM~C%uZm I>,b@4\z.vLRfMJDtng@Aڵw_KkZ ={f;p#9o%tqQ~nO벯 YF4q}>z EGGGEUU}*-5՜oʲ WMA#_?\}| z^T䔝>Q ---h__J%Πs8Q ժPͱ~3)s|a س1cř?;;{ٳC?Nqc_}ZI.`cbvZy@&O8y-~yG?r8 l/R"2edgLi¸YY3jee.\.??¹JNWQUED<{ކ ϕ0dAХ˿u0e2 b0\KUVU0 x^?gNq oOڲ}ǃpH2 `0K"5J|eu9+&n4eWT"p p[Z[:~"JMIt9zE^_'k?/IKrlU>\BR>0Wt-xJ\&d}iB&d}iB&d}išý~,<>Hs =J]:! @fݐTczҝz N7q9:yOvsz ]NGcCސEM sxzAZMv[{0SLPsami0fd{G{ q_IJJ̖X'dqr㝗`0jmj6es#9}k6xFD%&p'.  v[);'~'"\n6mۿ>\SJ[_=wS;PULDʩS"G!{jhhxgo̾ঀ$\6rE_bȐ!/#8%33S&o "صj5LJ2N1QHIIc~=6MTjX/Zy~?e-˾0NkBx8knnȈS蝐/]>} WnG3?]8< |Mz_}ձcbyT*UJJ/OD˗/:uƍysss #w}#_}Æ mmmsy'# /_|6mhOMMʕ+~aVZ5@Gv̙zeYA.\Y_ :Tg~3uY΃*`}֬Y;?hժU%%%o6˲k֬Yj՚5kݻw==JV;vvaڞʟDT*y۷G,`!1N0VϗWR(Dd4SRRlX]]]uu?LD`\_"MZE}7|OdJxojjb^6l#xVV1~rss߭]CII?>rDҢj v=|O$u&XeVF??k[ZZRRRjl6ǩz!d})(M;>wtzNִD܉y pz/ܹsΝt:gyoIUnp;Z "55J+gJ]L䅱xl6[ǡD ;;;}>J"37);Gτoux }7E^-}6쭷 D|iiU Zzѣ _~.{n VQSSx֙ cu8#WSx:DW_[. C֗_9_}[g|6 aq77yx0/b?̙b!>ҥK|UV=3cYv{Yx… }>O:;_{fY%jgպ<>skl!\Vɤ鮽@zB' >@zB' >@zB' >@zB' >@zB'z+>[W @ʕ- =!HOr}\ =!HOr}\ =!HOr}\ =!HOr}\ =!HOr}\ =!HOr}\ =!HOr}\ =!HOr}\ =!HOHc+]HK^JW!eF: HX' >@zB' >@zB)3>1hZEQ RPu O/!ey6oXQ^&ױwoXwm8Nx<>1a^*%u4\?Jl7 iZ^R\B4ju8²h4r<ݻ0S)J2U 4\&&'[^r}}\s^.U*}jFcCchhljnoٷwocsKoZx<=@o?~WUUͽ}:lvl_ 2ʷutu,+e͇:HQTk[ٺڽsוl.e۷nɨ@ p}eo=O!sGO_ӟR8voo,#BAafaaa|b5EES>?z,ivvnh4SS"ifYvphBt:25>p^=\T*gfg)ׇ^6Z˩gkR8 fkg۷nɱv-Kc>le+]A-t:|6NȳG(4!nEl%6[ ᙳp8ŹnMQ*\eet+WrNg=#Kkh+Zc6z!d W _UQczĶ(\p<}ZR) dz{H@2yU?`hZ]B%/ O|.;6ݽ! 22vGG?>{iq\^/[ԐKPPF1 ቾ2zD" AHOr}\ =!HOr}\ =!HOr}Dt҇F[*|r}\ =!HOr}\ =!HOr}\ =!HOr}\ =+]W y}\ =!HOr}\ =!HOr}\ =!HOr}\ =!HOr}\ =!HOJW }{V @ʕ- =!HOr}ksS)sahUEݶxw2773WhZ{2ܶxw2I=sޜk#4=2>rG47iax“oJ}u26>KiӬHs8kko?ulKTDx1ޔX #Ĥc'߸v{ӹpY镼Cwa\e;m=^K-+~1 !WGvwt:;f޿'&BR{}p0R"}0 q8Awoo_@Eyym2.)CoJ$:yx/_\G|+{$\P%CqnX/跲rހKHf&D"ge4_~a/?p}VL ߀xףhe[ּ|~\P(RJ8%|Z-)2z^F#ŋaϧR*JȾRī6n[}B0Zt9ks]*"8ǣje d~HtEɠKgA#ޢ"Ro8<J@m('h&K&7Bat%z|#qژt[Vre7<2RXP~9k]17<ϫ>3??&[.,0́{[!tmeY6';jz`pj&ojO)ήG=^h7+Z]o(1?mrj| 'GCcSs˅|˾{5v9W;iJu߾(ұ-j؊(*xa{u:-v=$95='| /Gx߿oPDq8QD x>8DQd>yƊET1hע'&[E{jC 2=b~ ‚OD zF>):-*|.MOϜpZGȍ~\]oCG?G)"oaㅊ9vR>A"atIifx#<c!_^Z\jm-/- V:n~쭿뮈MrS%6[nN5ߪVr:V.66|Gx?}6ߒsve?8u|sjK^ޙs>OVcE pj?tM m:Bq$"!{GTJٺضesNGUUVl޾uKn",++=ԼeӦR)|q,^mN+<׋(,1`]"'~LQNxnA8&L6\QUg7%uuwBB{;m{UEEcs˼)ѴZm0w2LRB-MM[L&;`dgӷ^$AUYYLӳ~m/\ҫ8?xeΡg  8%6[x3O}Q F{JKT;wXrx".z8=Dz?D G!8N4#8F`$R ģ]HPyѰB;"DEⵑxmca ϲۻkgeEEϊw)|o{93?a4LB8vuMja%/qۇ !bkkGp8N/v݄RiXvѪd[q[{Ѳ҄'4 )HD4fEO4>!G'|^-WK}""^#N鴑O:h4Bi>IC ģ]Ii]odB; ruv`0lT}B02~ab2`G"$*iln.+)w 74\mo/Zw8kh+ K8fC!#cz=mMQ0Y\̰ӧU*Bxj+.$QHy@.voX|\z﩮ν[c~z'4Ӊ#tE}޾~emkf愗g))UZm]V8ҏfRvh}#Jd숄OEI<'̘mo %U lygJ"pcM/6'y(+.8scen7-ZSn[.8~W#MO"]0f-^o 4dd,Kaywe"7tOU%'}RP KS=ò.KӉ|H, a)s/,'˲ ..Vg.=DJ h(=^x]J>A$D͌FywB^x{=5RT*74-VM?$IQTyIIĖ rpC$"FFzz)I ! B=5FگD(*-ZmIHҔoO噋 x? 2ҏxO%sr0[)]m] /ṮK'āH1(2@Xzzqzba~azCjR2`4dk$gOSϴ*{nfztxah2Mb`B'L p O=Y앮N5< >@zB' >@zBpy~nf圗2FӪln[~L6^a'f9nف5!Is>k!׻ Ռel|v(|/Rn>v=ѱsu:zCRkM'tucū3~e1?'o‡(SaE}BK\m^qܹYXp k,a9n=厎ߞF|6uvum_?%yտۿi9!֙Y鑿x치/|~aPkB`$ǝrIO'_|͹fLɡ5 +%QaXnXó2[Z/?C ߆xǭhr9!2B!tNbgo],q|#hG{3'T x}>RT*8a|~V>eY׫QI4ժl4=33Ϙ3'-՘p2Z.oqF~&r!?߲o^}\s^.U*}jFvua}aY9r,/ߵc;!_yMx2DcsKx:=ԉ IDAT"9qݵ5LMOժ@ dL-j؊(Jzgf><}(KK;KG!#fM695hU^n%/Rk‚ٹ97W^}BȥWlڸ륟}lH~LS3~[&>8?~[YxW'x!?_?~937"joOuq>K't2/ˣN/<}Uw.;Y3ւ,ŷ^10P+dF/[mJPW?(冹c_C*(o)kwf= Tu/w vh ! y9׏~ wᝉ=e>/FmB!$ήbM,4dRK奥BIihj }"s,/ ##>h8;w-y;ogYS7쯩&r'?;|HVwuԓOD$w{GsuufRvÇ7FRy'm\gcUJo*Q@'zJ%63j++7n5<\m>hM/^y×[lܰae;:+|c'Fˏ焐=\.wo{?f[{|ȁ.eY!kyy!Г2?#Q >?:z̈m?zk4Ǔg6PWe'7hMP5WB^i;BX?z6NSo'[ٿ(5hjUEJ$P,/' p8hfYvphXXأztݔJeI򈽜΅۷ ~j6 d5Mxs8\ h*aySCCR6@z۲i+[7ݽAX=WuBkmh'?XHٿ>hP%=}}pOgefIVsuN'EQ7*-QK\7 =$Z1`<3su7xlPSqܷU[u0C-{d׍B˾{ydXI''<;yQ }a)X2cLL6k|S;*++N`XI8ѸVVuB{{&ƝuwݾuK.rj!VT38<F <BAndu,EtڏltZ# .E39@ TUBBr9ðz}=::ϟ6o7fVV;n2eE}.[}YcT*9JehVyhhT mٴqjz\ֳmx+~[ >!DR q2/wCC;m`й |7_*+ <_WW y.f\ǟE٫c^qzLşezgC?s|E-XVPAx Kݧ~PC[h"L6uu<pA(޳ɺ !D|m1w4Zy*BJRT'C Zm0LV!4-gُww%GxY!Dq+kEl%6[ ᙳ<u[e4]==Fz*02t <{BAB\? Q<}>oCyHGD^-7'ǚo9utyY)!d o[߿e&}߸Q[w _ON.|Rwu_?7_ |xmߖkU2BȨclzЛ:Ϩx-Nc^ ~VuIxFWj? pxFÂL_??!a&L6Y>^[!w_ @XóYYQ:F7(ھuK{GGGW7˲\Oh Sf+m㮴]]SZXn!0,wWO'|^nH1*.';BhĤȾn&(JKnnhī[UEe˥nض&f?d/~\@ kj}~!p j`6Z.՛}(ī 6oW_۰~nگwtnt29bKL!'&'7^t D,nQY7Ko]&kʍ5/3dyBǓ vH-[gg! uWKL/75>|sa}QGm+o61;'xmo¼3o]6yaﮝ2᱈b\tImK?E>uv`0lT}ҖMN|uKKC{Y-7--{{lXj{{zWSW^ؘ8  OT*Bxvxu+iln.+)wyu/F}?`TUQczĶ(&ԨW^m׮yE ޶lwtO`~}ڵ;Zxg レЗ~#/ko{.cm0dhک?oF|*v}yrp[E~/^zɬ/T﯎JFq~;t{e6m -IlqݨTw_۹  fkx[Zۮ^qI!ܨQBZ䩅lygJ"˲ oz>h tWE<|@YqqGN̍n; j5,\.Vh[6[V˥Ap}w'_Ӆ{@0hȈw]bگu ~`虘}])fn5*I1|q,;1Uʄ»|A՟ 0l ϨЩn99<޿}r9`922o5)mH1-> {)JR;up1h\KJ"dHwX[F?)Ɍ!pIP(w(*C_t Y͘Ÿh4 Bet. ׮S7EGz ,)dJn_Lo9?LRV KϱOT~ 1snMfĸp[нn>-샆 7rM&̐áh?';ĴREl_Π^IwG \?LfGٌi!ӴާlGɄ_2ƘEh w)rԣ(*˜eN40@j \ =!HOr}\ =!HO~8z'?:895&el||s~ٖ)xCfWل\?Y4д"zOkշ}w|rZ~ޛyp=ضre*6+a8K"HOt ;sca ϝa/?p}VL ߀xףhe[V|~\P(.:^x%&aϧR*Jxs|K5_8IJh4r<%K^Q Ź=V+ɤ8DQd>yƙYr!?RXP ި~ZxlEE{v =qXaAA}edϜwJo_hPV@ pOMxχ{񻪪J !niZ~,)^.[Q M᝹ox.V(O(Ғ⎮ґx.EJbN=$;5='| ըU 7[]X0`dX"utܸ+˅P{`)YMrS%6[nN5ߪVr:V.66|Gx?}6ߒsve?8u|sjK^ޙs>OVcE p1+2XU^ M, t:ݢƳhLپ~]ф_ϺRB\.deEqk}>DY"*+ʅe<ϏML?\ܱ}CV k?(T Q)=옟_p-.B-UUt%wj#,;0`g)B΅$UE[aAErB`0 #j,׿dʊʊrzz>:XXPm68xǣPС5?zeY߯Q-ѱ1c5rn|ĈUk++y[ge2{ut֟?m6oߺ%7'GP:u1_`W*ZVxrQmߟiutZ>i:IP%7Rz)2}Vt%w99J75oٴixdDTF:mr?TgZ.W>1\0lՎK\EQTUEEcs˼)ѴZm0 LV!&-Y&s[ Ū,SqnY۔l%6[ ᙳ(saaJKu%+ꂂѱҒxJ\OU2;k {8s`̼|qܕkV -yn>4dX!K^[<$ltwɌv]n7!DTZrs9MPFp|"}1`c9M-E<ϏOLFl#GX>/7O8u07_@ $WFEt!EzgM-Afaa굎E+1ʉ+L&+-+'%T;^,Y(ۛ/eeU{WffkO3 e2l#P(̫~_X"o HmkJJbsuiJP(<];*t W /(CFFUEśǎzۚ` 3j~ʫb7eX|\z﩮Nr<u/F}?`ɔDݙR b=|=~ګ:5JFh"o-yz$?D-KAΎB*Jׄt]ڼzn/x^|?m"c=UܘaYMiV#i)Ixwe_z#x@0hX߂pxn߲j\.NM~߯BEϰmTD@z گu ~R*.JT$pNB /|羦T*J&I掂GӴğe`h\ityH.HOo5??^O(*֥W;F_(NPDV_b(Z.όoQTDmTD@J .CFc~kTr$*.JT$^?^0q!n0`4dk`çk8Rl2L~hd$ ȍ΅ky3{}p0w|bD?N@ 0r6Kir8a8KmpR"RH:~2 ޾b烢 "$KđJIm{{*ˋmkaT~#K-/O|GL>_.)9x=ZV&8߆eY׫hry Jǹ=V+žVR|Z\a>JT*3{wJ"ftn.U 1ۣV 8mIh:٥&`vnΚo) dg/LhȆF\^Hx,fgo.s4s _v##ojsca:vJ !f*8][zYξzZ&qX~ ˲ w%/p4465\Ϸۻ_V{<[QQBnwG Sj*>]=W2^)^~ߓCyyZhB:84D(*SS]hBgN9M'XMv7L IDATg;rZRݷh4FT_ym׎턐Xz̘qkHx߿o_ B⍸P|iIqGW|HD5BMiQѡMs!"AB˱* {\+vوP^7p=D1ksڼilx]mÄM]==_~)B70衇㍬ĶzRkkyii󲡩f"s0,j]OͷܾeN:ܲ:V.66|Gv1>9yu%6t֭2rVd2(*zpHqE7x~xdÇjuWwOmGO=Mӵ}TVRu`y :DW\]]|E9=}OIDYϏ&1f8EO?|X<3<ϟ9W[YQqed̕$ƉiQD&vȾ{eLTض21YYQx\ni M, ~BNdi}_X)Z/Ki8XM9u1_`W*~Bi:1,NqNIQNx㭼G"wP}/=ܰh5FrBH."uN8l%6[ ᙳ<ň $-}Яt:!΅DE̶HNJbb/J$rpу""&Rr|KzlZ.EwYv{wYQ^^uo#, \2 W!]ihՐZEyBp#!h0d/^ @ b߈ƌ#)3shd°lĝzz =px:奥Sӡ˛o=^=iŚ=O:ϲlmܜx"}レ_(l6?8\'OVT 5:Rb.mDwϾ_N(-mv- "q:I*щ(sdS;psP":\PW~?bZ-$7o.Pʰ˭Ջow{ 7 }o,.,ށ∳n17\.2Bn\N'.ACFF?:;Yq .N ű:}fr:]K, Gy]zFEbDh4MQTށGJEd_GQz2:( ;S)%NIB샃!#1?ڵ{.O,d) R&vLz[\ْځKAIyץ+SOo0NO w0q!$d2`0x WFFRt-y3ʞdFd3L*m؝lGAB2ƘrTȈi;rԣ(*˜e^g' >@N:mR"y?? !8[&,'N8.''f^^ /0??;l&0 K/uuuqWUUo|{^h_ao}[f^***tѺ? `ƍ}}}n[~}:>~_}oB4HH !MMMsss?_L6*'?Z!鱱m۶?chPd2׷m~g,q^|lBڵk'&&bnc2_~`0|*RFFB֯_wAe~ǟ73>{F#@HAJ@,Ƭ bP`Aa Bpc0. BČ$tFh3}UuWe?Lhuu֛Yo[Ofz]}*% ]{"r׏ͭԒ~_9r4l6ݽl={w}?z衛o+ߥя~t!IRaDQԪzFiT*ǗmI.^s=S+5ۻw޽{׾~>>D|%w. c'͖o'IRX,X,z#r9,qᇵpCk]nL&#H*ڿev,ZGGd:~$/}K7D"?_s5;浹[?~}}3f+ٰag?ٿۿ{'&&Tnx+_򞞞G~s{G YMM``Z:ʋo}bض]_|@Lj`s"_~oeYiָ?'OWo}Dk]eK>М@غu뷿*DYhNd}9DYhNd}9dջo݅\׵ǞW>xiz|eyk ql.m._=zum~|>*~4ݯ Y".YhNd}9JN&@M%FϜRO̐ nTW%b*t*4 VHekx@M%FΞ:j[+ Ťݒ|YO~-sT{/2WPTJ S/?8y̫?s|7-o E^J KTt:cbEE۲}OX\JJdR8vd*dҲܵw*_ڊ]ߏPSӓc#5x >PSt*Njjݽ5x >PS]j@͉4'>Мd2EszdrǶ.M"_Hi>k9rL4ݯ 1"Ի.3ȑGw7/뺎㺮n8wP0M4M0JzB_ZZT*-.4Z򟢵hwsyJ>pߠZzKk]]h÷D/+4 S)Cn,˰jJY)QeV e2aXPhD!JhE+Dk)Gj@⾈VJU=2Yw]`eV)$K9ϧ}QJ糾T~|ݶDD)ZU}/kJA_r-J'78Di^_hq/w e}e je-բoRm%ľx}"7o*)e}o}]+|-/ _ioR&n]xff_JZ+o^u"\ Re*ylֿ輾ka^ӿO{)%| YJeQ*7fy'sk^WS>WJ.3s7(^WZf-J/RQ/u+>PfŹ+g;J-)y)ždBKj-9_|j>V̫ѹ5) \aJw?k.SK~)~)/˗o/ڗ[.__ 4V*W eaf-ߐ2^t`X+U9JV>2Uu~弾苝d(0%eթƯX4ʅ"ܬX߰BPa`_qhʾ^voh(Qe82 e( nfբȾ|%_e-mnTKiy?'>H7tC+W/DYhNd}9DYhNd}9DYhNd}9DYhNd}9DYhNd}9DYhNd}9DYhNd}9DYhNd}9Y@w¥#_ޞzwk.w.5<@s"͉4'>М@s"͉4'>М@s"͉4'>М@s-JLs9uݗ1 # #BFFV+aVʱjd}@L&s3]zјa\5Ic|SyKGfD29ѵ!\cͤ''l[Gײmدs* 0VW"UW=d׆^bGizgE#Ɏ 24;W+Jد YP\.ջK$粌R,]BH"ѕƊjfB O=9ڱ^yHbr&eFGkt󦞷ɶ׻w\׽+ew$#Hyda/ZJcLuk'~`{kXqٹO*{έ}&Y}ϾʱO}=;D$/ML|V_`;o߾~[_4$~Ϟ|"'Sx}q]1 w]}{zzqZ{W¿@_rkՁmljG>;y}@#"Th}~^:uvLDvݧܺQDy??x: _SϾttM}]ZkׇDdǖ{Kwg-}|^ɡQuWo"O ׵r.\ƎɊt*i:vi>|ڙgmǩ#> #5xP~Cy?\vb|&Zm﯆@~M޾|u;nn׶hylһ' '\Q.u>\µ3Zyb|4ImyKF m.akCut-~h$24<3kߝ->XgGǚ>I]}X^MCYN_ܡui~e{E[rzxzw]f;}']hdvE#HXGD\9uHjnֶ],RjZu1-k]?LLZZWc"bZZT>/__d^ql(Zճ3L867 m۹|~l.'"`pF[ڼ7J֎mWw6au]DZM4 +b5BA9W:~zŢm_}sogY,ga:ɹL.\6 C׉sӦiv1oQ"b#ùLƴLq7o 8|ލ0-ٙɶEwȹB> efo3^Eӆal4>rϏ G=}*7JDbxK[$ƭض}ȑwYtϿ`0lyMkOdp`ӉSDZmot:}g&`0P('~s`vn4@ g҄ :ki'Ec܌vݾp$,m۱җ 岙Vjk\~U,xU7U6S"Mn&i3t: #ֶMWfe3Squ.x<޹W)'ffsC>oۮ=J)DZ+],Q IDAT׬#TgwE2rn( mڼUDM%GϝزzzGϟ lݑ-[wHwl6]J=msӉv<km C5z8=޻s0TP/~;wlW>x]`ŗ^z>D#ѿ=ar97vuv޽NK>x]|k$I'Kwg:JlnrM!޺ca"bN&Y~ERcp>+/%lÖ;Lwr, +޹n'ɉʅji밋],Fc-lH,om?7tjrbkܶ`(u~GJOl*MMM?qĩSPh|ba0ڽ+ Kx۽ @ L޵3ɤ̓D.s[ceiWk[iZ"φWJ9#"W动5y%gNϔN6سt ʔf$H_ h0 8"R, X}fzk׳sCZN- JgKށLk[D4RS U D$I{}O>933ۿ/+V,X庮T:eY֢d*xB57R xl4=BݽpQ%~M𖝿Į-遏{7Vw 5"4xk{& h<۶jo.v&O\fwO7nTj]T GBTb[%ggc-T2DJZb!9;37;:NuK:N֖H$|ԩEw͝:{>Y"J?S.H$bv.Huw_+P,38?|f=QSYcj>7^ç&7R":f:z\f΁z5T$غsOmnv^_Wf3pXD(WRs3^K? 'Fqk+exg 9O*sl&Hi۴pzeל$GΜ8V7nݹkCoc}QJGFX,z :ωS/xNP( -xwWs/jG/+],z5aY%x3ׯp(|3+ :l<_(> ~30m6..{3J{3ܖݽ"[;Μ8H4RŽFI@0]`cYϱP`K&ܺC)d2!SH4ַirx[GZmۺUD<\([ڵ; \k׏~h4*Zl,.}dr˓ B|}}o|H8d:;;磑H%b& >ߥ|Y5],kZֲ"w"15->ʠnt{g W"Ӳ "bh _KK|>Sjx }/ջzƮ'"\C8y\vջhTdyrv?wù\j w-;>KCp(pMv5".'2"\}\^qD)(ɐTC(ias]0 Ff)od.gbX5&zu+ eҩzb}ɤ`YP6}ֳl&X_-~W_`XZ㣎Ի#8񱖶vFf,}*12V\ǙJ4VW._5Y %Jw/#B>w]z#X~dRX4G"x"H49DrTb<[qد<ݯVJgTXkWHk{?;?|%Wڻ˱,:cѵc~fZ~#KS8]?WFzU7a"~ٕM^{pY,wUׯZ/&֐R^~BTV7B>YJøzw]{G#jKm6F*5 ˬ)5 K)ST `m(Hũ_2*M7ԕx[/&֐>-/7@8$F#X ^~cB~)ɅV; ]>~8ߤE֯+~2Ţ\I&S"^~eoO *x/Od2zd~'"?D9dIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/eclipse_install_pde.png000066400000000000000000003754231507764646700260360ustar00rootroot00000000000000PNG  IHDR0~ 1sBIT|d IDATxwUsn{) !`+(&+1(?5bhb,(Xb )f`~w;P}朽kbY{D"H$~T9:t1{,ѫW/>lNdD"ٵk7j1N;v;p̛7_zfYpYgZ %D"}f$vZ,b޼sΌ} G$D"9Uhd:bfFɧ.^Ϟ=K :EiIw@>j\:ܚVbb#HLlƗH$躎2ЧOo.aܲnC񷔩|viR[AuNΠgґH$ h iDQQQMTGQK׷QZRd/_{o7a(~7D"H$C�`BfU7Δ-=y ֺ[,Z7ᄑC˹}:gtD"H¡~s05`_un߰nm̻=:f<D"Hu9~mKuxmr^0 nvD"OiڱQQQI||a5E`*wJk7]᦮{ĖwmC >[Fӏs8L4n‚MG˾{6m Сܮ;BZ4M'&&:MN]%Sa5S\\̞={%v+7nyaYWb˖ۏ-[6CrrO}زe3}!22"REZR[ $1¸mV1H$yѽd&M@svmV#2vލ9Ѣ7H=wN׮9<}[dwmh}ۣԩCvvxTG+0 MKJJطo?_~%ooE!%sOgs?Pʫw>}ӗ7x ~}/ #Dՙ%K^׿NBB0rZ^^ƒ%K||\ 900A+~k ^ߣ XfV3s&%IOO 77=70n88 *jbߤUUeԩ~qm6fμO?ͫJ׮\tѯ076}]M4 \.JLn }e,X!][~a(],[!6fsrՆ>8V|>>_pӇgypjBj^tbB]4g'uJ#Zwqc*L@_⺮y),,7p`Ѣʫ 4 j~pz= g6[PU^@mۭ;0O>̠A]BƟ)@Qޜɬ%%&Mw ~=u&0 <r$v,e͚oپ};˖-d!zsssKBeJbb" 媫&q& Axͷ2dr-:jMuu)B4-Rl6֭[Y%.Waɬ 0FהLCH}IvqU"9Y;&77%K^?._[nmm)5>Lqq)Ӧ]GLL b))eVHHw(LFbR5,u#&V;LA','9=?Ittd1rp_}=8]da&nFڲwȑCY >l6n+߿?{`lٲP5Lv;{!66˧~ƥ: C`~!CΨ8sx^y#N!$JTTs!'O /䫯V/ʘL&r G(7| | > Dݍ1M\x/Z \xD~{)}oIaqd} X,f.|u(..7B.2@oNՍM8w]֬ii);tqU"9WRV}l6?Ts6TZU6Q]]MHNNR-zN,ZӦ]Krr  (eٳğtg |P9 'F[cpQUKC aEQøK ϫHN7<b7x|`2. = bE ]0|9XVvAee9&SpXsvWc2Yz |>8PZC>[Vuu-Q]]z^/X\*~/Frr*f σ0tKQYY*j"**U5rU[btƠSKJ >tv &&%D"HFll,CauZӖ90D"HN(ѣ svI9$H$ς:oEU˾7; 4`` lf11KgL^itK%.FX:0D" x=ܼ+4]G*b#ڡh#lTp >pKp ™=R5ܔh2"rXN>ҁH$g7#LTVDd-f_ {)R%J o@b!zHSkFb1ҁH$g@t}$[T?:G;OAz'cpᏱϒn5f(-ukMuTybA!D"H$'Ōs9}W'_vNhJ] D"HNv ~&x ;6EF3rՄ1Mw xIXv`nVnz)S믇UƧ~Jyy9>U̙3y'Bpe{n?(/"/_ΟgS"D"H~D4+0Lf4،VR~!fQ(!N!aÆ1qu߾}ɣi$&&fڼy3xL8>(..jd:yOII jk_~e͛Gtt4qyi MYYjm6ֶZo@-ٱ~k{D"8fAuVTPU!PF0a;u3Qrgp]~塴}qp8&..kٲ Dvv6˗/ 55]wEvv6999$''+ȭJtt4٤?*sϞ=L0IΝ{AӴVuҥ uuuK<(BNN1yǍGdd$YYYw(O>?jժFy̘1L~(** K6ow/7ql¨< aR @x-l/8ˑ:;lܸ1t=k,.R<cƌa0m4jjjh,Mu0]?xlݺO>믿s=OХK^n76l@׃cpn1cPXXȔ)S0a<tr=5\c=ƓO>O8q"^}5ʛȠx{:K/>s3Π8P … 4h~)IIIa1i$9p `Ȑ!t9,;v[ӧ{H$IǡuKh~Vl> !]ï8@HU1)P1i$xl&w;gNgL .CFBĉVhs!8+?*_|YYY3sAu:wLQQNhrQ^^N>}2e qqq|Gbd+`<쳭~FFFrcqUW">?1yy>3 tܙ͛7_j-ZtLsa۶m5?_ڵk9p\p>`vڬOkxm/H$㧰fg1f >R#1?ݤ}Am@FP]wXWŁ7]!x/o71{*ЉKߎY"H$mXm>cݞR ' cat4ExkQ (ͻB ̸p^q;UסXT֒_Zҵ{{3Ir8@':6;0cgddPVVFaaa0q0 HOOl6oOqF{=x ΝNzz:Ѭ_^$vʄ 裏CKΎdٲeڵLv܉`,_m<;}t^yf̘ALL ]vYHph0w4ٱ=~;D" w䫝wApoatJ39W*dI̦wU:  |$;t::9})n_-Vjւ Vd%EqNL>+r]&Ӈŋ3a2228x wӧOrqa.\ئ:ϟf۶m8p%\B^^=>/ɨ( HKKСC̞=)SPYYիw ^x&WXIyy9/"|gm`ҤI{\{\uU[\tEa y)++cԨQL6U;GoG^"H$͓_Z[ջu/ Y%.ʙ=R43≴[qZ;@mnU+Q#QLk.04Ec05>0 0 bdMTrvƧx(i-Jό8g9)ݶhSVVFYYُEDDDb nV/_Μ9s:uj~… )--nOEuu5|M/L,X簬Xowywy$n喰OII!%%K.'G1sLFĉ۬iӨo+we„ a1k,֯_ƍYp!~ӜۣOk﷣l/H$qYۊt4@5,V )1:'E2qP'K')ځŬb2)} @quFpuQs(Z: M`RmKHpo$`ff!=Jf_O@km IDATwUzGn~*|nO)YӸFF87{,? @\\(immk$&&R^^ҥK8q"{!##FJJJ(//'::&qKJJJCAˁ4/&\}Z{'Ds7'vQ Pqv❌„BwG.U|bv-°DC kT+;ѭQ>Ԛc(E~XVf3 Ʉbj1cXTXKXJ7uu~ D𘃙1-ڣ2h={<2V+zj2 \J,. s c(3%a @54@u4M'`wu]t Ҫaj+g2wp涻#ҁH$4'>S?ibʯ(,wtV fP\}EQLX*u5ܟ:7Egb+in5"x.w/0Y44]zЁwf4Ctf.fWzO,"ڷɩt`$D" ןC| [ool`]( f?-{!JC/ 1(f >;uUhM'gNHahhnhzpwg{X0( 6q69)-mjD"H$3L;//²oLz`13*kTQ^UG34 3H㻽El &9uHRWDPVkO)"p  .>ߏ:~Љt̪?ަ,\b5aVU>LFvD"H$?!՝ ,YfK_$5ɅC3k:u^eUոn CiQ ޓĘh*jSTʁl|g= V |JpgvMuPEQBǬz>dtbۉMN!rQyLWGM!D"H~bAݥTx݌DeM^OFOC1`pj2Uun'@@#kz[Dr9* P9qUbRsn!S0oE/6VtCG5Ge먫aϴ1O#H$Ogo!ow)!vZbd662,+nɑ$GXHh~kB<^\~?>ߋ 1%+>.yI?"l&\<ѨN1 4ܥPDȈ~io \x+up/5cG.HTDv7'4ס`KJ'"#∎Èu:ݮTPtk&nU/xX\ݳ}IFygpD-[tRV\#**rv =͹YJ6ũ*ϪUX~=۷oAdd Z ERR)]j6l[nN㏉ ݻw3c &O|\9ILL |z_NUt"V:!tt 0q7x stZ*G%=>n d%-3RTp,} j:˗LtSHLllot%Z߃!tE+D0+&}*+6`,a`ժoy5jWf޽,X??W\ɁGÇwm)yn7@EkMEAGaD3aDUEqM5;Mxm֓C\%MXdЉ!=uq#T{ѰQH)( O'3>9S0O>²:~>f{vs0a„c򖖖V٭wݔTzrQZZJ||<111ǤaَdSŤ`瑷|W^yӧ3}tRSPI4 IOOf .lM}?q:۶wL:5dUў7j0(FNRZmƙ={(E;V$$$nI,^X>|Q9#nviB|Pq!CW^y%aѣGZ|>!\!Āk֬[l1112T^AAB]Em&qƉS6+kBBXrezݺu"&&&tݔzBZj-slm91w\$QSS#~..]_KmDϞ=E׮]Ųe˄BL>]u]MG[ny"\.W(Z>_~6R[[+ &"""ԭ-=]7~ZjMq=Ɇ%|2iqճbԟs^ oXPl( JĆ}w{ ŷ ź"o~v~f>v>3_ݙ/v勼%6o])S|/1W[ Eޮ"oWX]/vvow{ CbkaRX)[$>ndׅm3tB)a@D\Ⱦag#hWfϞ%kG}_W7Ljj*?{eʕ\QPPe]F^^eeeǔZ={j*}P7))du:x^.]Jyy9ЩS'v}fcΜ9FKnjp馛B.["hQ5:RqF^}U^{5RRRXfMزK/Ĉ#իwy'?x7t)))&[xO,m/Buƣ>zLZkm,99>}b o?`Ŋ;6,9۞g̘z1 /2~d{ؽ{wuGֿ}vnw @dd$wqGzumESx?x?-ؙ5/'rʌa5Pi:kvwr+XRҕݵ53%=(j2b'47Q&z>4xŬZ%81TT >c2Y8mT;5>*ew͍lWn(G3yW໿Ośc\ǵj2c f̘arUdddPTTwd{hpҋt:wܢM:ʶ 4Ýq<oֹ0#2{*`ڽ*a_i-^ |<:|ŗ:5]A>t͇j1a*EX tL!,\,+h[\IEUmߢb6zk)LEf*+{a.sVAό8odH}`TUn;`׮] 6 ]O'-7 EYY1OCqWrWR]]7.2IOOP݅X&`cx<[=uOS-OG<-ݻy-m.((Cdi ++s=t%hMB:ɶg}6Fb޼yƎ˭J `ܸq׏^{5FhΎj -8=VGfff}T8[K~J<O&(۞ Nf{̅Cph6k<염I.UM]PDlw@Dg8,z4`*1{j0t6\:$w>-(َpoqU;";nR銅"-3*)ν[Ǹno_3h uƈ#;@Cjժ&k-n>|8g-e ՟ʶmۚbbb8!ѣ} N}xx&O{ W8Zx;ZUUu=n۶7xMu.Z  &[ұ- ۶C=3< s9;0vXEa̘1<#-qtT{~B_|E n2okuGӿ޳gOzw >h:ʶ'ݞyD1UR^Q %H1TT= Lz Jk1HĦ{75ex JT2֢el{-~5< X9ܬ~E\T*vk X\c׬8q80oDFF̞= һwoE^cзo_;wn嵖_Q,YŽ;HIIwt-.^uN'^z1ݻХKo~~,mUe,[N:ޤSr$<:у#F4JoM>Mq<[w<]to׏ɓ's嗷˗vzlIǶp*l䢋.Ƣ6lycǎ/َjƍcу; [>*tUUyyꩧڵ+ݻw3lQdp?7t\"f]`mDBT᫜Q) |~J0coPfϞ%;kvBN'iiiLcp!Ҏs///V8])((@4󡰰VOfffҖ:$}Nd'Bjjjҥ) e7Gsm%;4OͶ'O]={d?|:wLDDDRZRc޽dffOv|*sG}Nfn b&fdB聐(_U?Bxa«1@7;CX hVԘ-|nMj:1LCPI& ,FБQ1[bZYUߞazL:.}++0?{cq 7BB a׀d"''tfY QٶB?fݏQ#tX0T 6& 8e zI]=uUt ⅎ/MQi*r1摇9J$%\pAgH$+.tIB5sn`ACC {6]BF!DG9"H$'pbJ8v"@(?IFppp,{#H$O[U[Uh´&*͗T"(G<)#0D"HSÊ$%-hnxJpՑُ|.x;𨊯H!PHRDPi `CAPQT?Pg4]I%BrɒMdC>drfsJ*k(Ti)زe [la׮]6ۤ4iM߿?)))q1[L:={^Ѱ` ̙3cn~.ٺMa|ril7BTPbT(T%4qH*mB"_g/ IDAT/5U2`fϞ/_|kFxx8ϷlUSN|vmS#k̙=z;wpB[țkYf O?4>Y~͏"jl1=XآmzcL~K࿋DCDP4.zJ*E!$J$4B[ P(Q(K*7*C79r$;wСC,[YfkZ-RSSIMM5f~~>X,j=Qlhw$##èt{cXe/ӧ0ũSׯm۶HNKT6KL򾸸xnXyZڤc`^5 qqq2["ceqWT֖XSm1+cU뎠nT1JQpӐQ@iw7Fa*c(K)AB JlBII_Pi\Fv6ͪwzbcchٲ%ӧO'""Ν;s!ҥ -ZcǎmۖK.a⋄1x`7n?oֳL23eBCC;vȑ#4mڔ{???v-?3%lْ3fФI:uD 8s ǏUV /ϟ?O۶miժקg+Yw}gYرcYjK.%44˗%X"dgg4XWUM:u*ƍgb2V4ڦc %M4wce K<:oeͱTaN9ZGO )Jun- !SjE[/E7{en(dclOiOMT퍱uVhԨPm6;Frr2]veDEEquxG=z4ΝcʕpiRRRxWL>ŋӼys/^իWYrlǎ>|$^x^~eYecՋ{EZ-+Ǐݛ={rwӧϹx"z#GңG]jYjǼkZon4/V\ȑ#8q"W^婧2KΊV˝jLI)LqHIIŋ$&&r)VZe4 阩tO9r+W{1n8 ̖YR$FkeuRKͱFl07,#AݧHSCJ\KV 0,5dn]\ʠWE (J ;@y3|խ8n APP`uCU6`V\Itt4͚5cڴi,Zɓ'˻ű~&L@rr2 :#G EEEl޼  ae,aWСC9w$Sl r;:N4I>j}( &L֤IΟ?˗9}4o&PٳٴiKݒ|j^9+K1%۩Sҥ ?<᚛anݚ'Ozj֮]K@@@ɫe˳tsKj'O?^g޽U.s1GR*kse-#AGJJrN;%ث$*`Wb1\!"Ey9dfXP2RR^Yr J%':T(l>Pi5*vHHw\\Jqԩ4jԈ+Wh"&NHTT ,sDDDTWΨjt:I,=碬{''rۊ;99QPP@RRnnn </w4wͣP(Xv-вeK|}};w.P2M6DFFҶm[F͒%KP(>3ƫʺupqqlՅRd͚5l۶06n5-5rV7_ktzPtH&MD֭iժÇgذa6M 37wm۶iӦL:e˖fo6֭A4mڔhMyK˦6ǚ< ?ى>̪c|fR8i)T11G<dž0RW~֒"qr.N2[ri;TN.5\y\wʄ8ᏊbZ1KW-rߞvjJNf{v~KgwzCԅH(/WGz o0 #ȫG-)ki@ KiEP/iΉ4KǣKDG7*&dk24tF7T¯q2ؓO$-( sFtjN " |Q)„R#g{&iɸt~<˖#dC#)rÇ hHJ5J|%93kcU/nb)€?D {էO0f ifevIBSSUB!ۅx&tj@KC0@ Q)s;ujHLJ?cP(`D<ՀU5-n9 +5%@  xM=iLV]xyZg`qa@ ̡8WlP!@`&€@P@ :0`@ 9ZFDž  O@ !*Q6֤_+1t???xIKKСCk֬L+l߾״!_ϋzՃЫEFf&']v&&<@ttt5^c5-@  ]vfM0:N/jZ @ mZL@Pg! b$u&h2tP)'(jZ:@`##5H<Q_}lN>(|Ar1@ Xm={RPP@XX;w&((ȖͤI3f V?u 9 >~R6=D4>buLm-i,%((&U[q'[Μ k$_Ēx7-+Bʍws%/*'ot)FQ%ڿҵY=1q<}BhӺA=4խ'^/ ա+: pwTGݷZ/Z2`{=}Lzضmsٳ,vM~ŲJ~~>/_扑#ԓ6:|f2x=lznqyz8F?>%gIje+v횁9:F b[ά)ŲiՊgW]_jC-' .po:PJ(]iHeh~¡ɓ(+ G2ڻg<9f4#ʗ),uo)NcM %u)00@fw5`  x ~'v>;;T7\~(+V%11` ήdLѧwo޻oPF4zyy |}PTLtgh[zlzv+sg`~_998bK]Ւ/Umh4ddf元_NGJj*~ۛ }6e SySV^*EeVnV}t*yZYu:iiU),NGʽKP%iQ(Dta(΀J %KrIVXFhH|?;;gn}^}egz6e{{;>_qWӦLɰ̭Ǖ,a Sc],~+4TxǸtx{{~z6m ԩSYj~~~$''3tP-[&+{˖-0`k׮EӱvZ6l9|0~~~ӪU+9/LQQӪLi٢9is=%7CBB"xxΛo^? T[s/I^~>?^HdÆ&>6Iu̯vQVS\TwަC|"bE| ؁gjqzڷGHvoobyc;dffViѢ9s^!cc$ٽwJyy5{t eܷhݪ%/]i-f'iލYӧT*/B3ٴktkMڳ/HvE~ڴgggvYR}~LǟGFf&}z石**өHvSg SipzR$A}J$iQ2QWZ$PDBף b6m/h`@IףkSnnux  %>}ظy mɬ0˖bN=,=bJ/L)},6`ݻ7<>(;wo߾32zhػw/*w}ѣGspI߸q#''={j*ƍ|o۶cǎNGiii;8|0̙3_~Ǐ[!j،+ xy4)_| J߮%HoK``;nގlJIPǡ,b oXsV-WgHeqZJw *`zߞKv4 /^%˴|<==O̝>?''Gt:Rs%$Jߟ&=J"/?/A;!V~^^Yq)XB=շwo>KR?,m;vҭKgΛetHz=@eVlްw77$I"5-ͬ<0Fe:UY\nLa3ߏg?Iaa!S^gC'Z]&@ DdwJzgĠViբʖ)ݺ}IwxAż0m{Y˖bN=(=q)}ގO0۷ow%>>g}@̙@LL g„ $''СC9rnݚ'Ozj֮]K@@2cˁ裏nB???t+Nӓīs%cG`ooϓ`}Ð_j B/I8u*YE6?_]x{{qy7CJeEdžt:j|%}qyyyrOپ_wfЀܼTVSzHZEP ]ZCE:eM\攟iOH 6.O<3#?bU(5EuTv(Q95CcRn< (*ޘz<{[CGLzaQt"**ظ8GpttdЇ su̪ejyy򕟗`R/L叹mnc&Nĉ,]I&1`P*roJ):u";;΅ ׯj233 ܇MHH\RZFә=faii`!6[gqu %s/0pߢy3򸞒US~K+zPR5-zqethߎŕА\]KƿSqvv]vF!''GNT*vI5mZ7 Yc$ڛoUcSOҮMpyc 4$7f?0G4)=3Qt*8(Ȣ-?kӞNN@+7 '.φ(#PP(ud]Bu׻`_0K ӔT9{tJٳo?wRt"**̬l `.z`i[vwǎF Scn+TyR3<ԩS￉BӱsNc7[n%33777@2pP :(==`ʖ;憻o>GGaC~S޲EEd䔫BafɁFkPXXȍS(Pxz-!< p]Z AQADx|}}6i @=yxpDSee)撗/Olsu\S^[ڏoǴ4-ZdoAVӮ];7nLtt4ӦMC.yz8PRY2K2?WgƍԩӧOG%+|AA<0?KMXh-7%_hoGԙ3Тy3h%KCM5o/bmaiрWJ^ ߬^C] CܒiێHzvmZ0Dqq1Xa|J^GͲ]mK㲳}u9+?37oLq{҃T+joF01LuU-?Si_a^W*nJGC "(P(Q(!8=GٻR&MlڲUnoz=׮W2ݲa4).Bv.R]XQ !Cә ScWPFT~zM?ŨT*+7/YvZƏO@@!!!$''ɤIhݺ5 4@$ Fnnnq* ;FE@@[=G&C IDATcO0y xa|IKϠ]MYp _͜wߣCC&;;}! +k>dͺ swyJR*5{zvȆ =c8::ɛ͔&<9^"IϹÃ"!ѫ{7 5h~jڶ!)Yez|^3>+T\dJ)JӮT*xm+L5;AVs_\*} <Ӊ"2/"]2EYa7*ϿZƇ>LJ7nbrqEe fI F`?k+].uX0M}Q:ԋǔ (OI1cNl߾Æ_XXHbb"...͕7*0 ;%%\"##^ AFFrעlڸvXgd#))KNn.99Ĕ_kؾ}GғcGGa򄧉;k7}c)F˵3̬,  К4ZWY*ʛRz 4NUےnyZYh4$%'W;-{x汁dd|fW3EJj*xSϣ|x'Ya#O?ȟ-uz` E)叩VJ`[**ΎZ?ٙ&&v(g 2|,fK\]]*5>Jʭ[Z}XvrQsWT7*"=P*V}Gw7\^h.~yZYQ%YS9p yVT{LHp0 Y9_8[Nc\/͟ l8QP{x`Dc)..&77`?jȈ\x8||d9L/#/LT"Ubyk8Q5-4g&ִ?a%k>nª@ XɈ@ A " @ uCH-ooa5-M4=/TB&& gдi;!Kcl?@ M+1$!V! M$IB$r3ILA~zFeRT7 ZXG]י- @ $I\@NVa2zLʝDq#'8 / ZD]IHJ%7+pyz3N%';!~ǀ$ؓsݨYW ?wZ0;*2-deAmX#n J/\:wW7||kZ,MdeImzb(.3*{дek.={=tNρYūK5-@`z tߍEPk55/k@ZJrM"(C]יkGѠjK\VERbݮ}9n?.9޴uai9uUMl?S5{ʱyxi1*ĽW.״2u)#A$Ts=$3 nmГǟxhݲEa8pt<<}puuH&Aqf6mb]f<8N<}݇S2x`za^Ϟ=W|?//~ ooouf&Mb̘1DGG[RgJ9s H郳s5z07z,B䙳@-F*bARr2!]:STTFZy78ǟVtxi߮CLHp0׮i:~O>テΞ=D7nLvv6'N୷bƌs%J%saa~TT0`j/.^f̘/0aӡC ٳٷo .䥗^YI&qws!eݽ{7oN6m< JΨjz)6oL^_>?3< {I&&-e[PPFQ ?{ѡ};9}?'G=2Y54h@Ynu:ii;Bӑ5b֟+S_G4h_OqqgQ#Gwoeر;V)\$֭[ǘ1cpqq5jNP;x.غu+>,+V(2ݻ70+WgϞ-..d4r*#55rq"??T zƱμkݻ'Nиqcc U+-T\z__ /==GGGݍ>/_~2>,%3f|2N:իBQ(_ #''{{\\Wf˨$wGGْyp#zr<豿.\gxaڽ =6S }hC0m:1l3,Y7|xرRTT_Exx8]viӦ̚5 O[;?mҪU+ׯOIKKh;ٍ)JV1QQ1OMqa!bpv{β۸N'xy,s;[B2`?>BtH^v֬\o۷GYɢ$|{.Οyz<| ;}^z?;kٺu+vvv4jԨRwzٳǜ9sޫ,QQդ$6i3x} 7kתU+V0zh8p +Vr QFrJkFUtܸqpE9uVkZmƱcHNNk׮ý˱cDEEquxG=z4/y,^WrJΝ;ʕ+Ӥ+T9"4$īWyz/,c,&L?WѠ+-#2ZgJΪ15oŔ>?MA˗9}4o&̞=M6(\꟭-:#WxcE {|ų/MCB"j"QUٷ7X5STYs`_JE4h/[JΔ+e1xzz(?+ۣ^^9.e1ШQ#Ynwc-FgL>7TJ>{Aٵg? dg쌓|47j [lٙPt:;w4PۿF#gС\~ݚV ^^^,]\B&>[Kϕ+4%CKNCg7oNhh(ׯ]vFݘ+)}dddw\rE~L^^ٲD -ci pKlImҙqH\\pwuV^RPXȍ7䲿}yZj,4g巫(**bun+m3!ח?3'Nb9}z̑^KЍN9ˆ#8z(7pwRvvMUgؐ!D4'.>c{OH ~ja˖-t:6le?j*<=zT/RDɓ"ϟ? ɉ-[FˣqDGG3m4j5P:p/00˿IHHJ{uZѕۋe/J\}U(J}-Zė_~)]nn.o62KWn/[SФIx#&&>HߴiSZh!bj͛ǐ!C0%CY553v$IS&~O5%SG4Z I7?$$BBl7߭EBBwkٽ9X,w0J9b3hPxxmZߜ=wj8[hޥO^`ggżoѠWxydeeLϟ3<xzzr Zjen…xyy\{챙1lFnpo1| sra9zL!TGyĠ JV~DDDCUɉ;݌,22I&ѺukZj6la;88~zBBB۷/]xhٲ%̝;Wꫯn:\\\xᇉM6DFFҶm[F͒%K*)J7FT> xx y>Rd͚5l۶0),,O>1;]d(A+zu[b3!ǡ<7wzo0t(<8MUH_f(X~;jACe jIbMȬ5%??w_@l\AA̞1//wy 6?n6eն;EJJ DFFek\]FPP9d:Z-88TOHXnjT!u]g<Z7zwl8Lӑw#W X#DTI 1\<{] YpGxdS<}k|3Jp]\;&M[€cooO`p(5-Pu./Ly@ ua@ sF AC0@ €@P@ :0`@ 9,%@ 8, \ ,$CH@ €@P@ ;Njj*۶ 55*€@p F Go)€@P@ :0`@ 9#aN'--{R*tP( //O %,, G~qqq6;]oWs״8Uƚ2zf͛7㉑#jX"눋GբP(lذsFC|B...*5&KMWFP;L-erJ%FG³ShҸQnՑqcFӶuk/eÚoiڤIٖ| tU?`;ٰޜ56 )Ӻ$Is'uրlk7}5-77xﻘ<9f4/<;ZtZRUVjSj'€xxcogONn.ZSaZ*WXXȞ}w MG>|\OSװ4%2<8hll۱ @^h]l[YmA#43t yՙOٶ}Ǎ5pOءW^eM߸ ^{ >UմimՋ#R4?п__|}} KG9u 7nÃV-Z(J% +;g'gNǶ;Fmɡm.Ξ;' ߭]ĩS$]M"7/RIhH=wew/^?x89m;p.PXXH?ݺv!~ e`}N9=:qOt'jIzgΞ m _yeMIvNm۴{׊+ӽG5cHHС]rv^b/0!߬,<|͚= AAAT%Iȱc9{l]\ e4~q| ֍z6g4=sBPLٙ^=s?HMM3Ȇ 9J%ko@׳vj]f y{{{/jMtΝ?_.~ݝϗ~  >= IDATb5o5(ڽ݀${+W}_})IX|X.ޅ,{KY| ПkטTbbc Ϙ8)yz||qppclݶ ''' 0L]sS_ygΛr-v4 T_xD~~7NuoJGP?,TE9_F Jŀ~}Yn=Yq0]; _~EQjϘ:;\ ٺqEɖ0Ɔ5R&XXS ADB#dee_߁O9k+3g5lcUSpq&:ʲ~8_%@{/sf╗Ҫer)櫨FJL 3̰!˂?݄gfW|)**bAtl^3h@3(,bbcQ(<>QޜE|Wڳljo[.<8h >>5髯dKء=zsI5a*,Æ2c˜9whZ#~S77oo8ӯ J R~s|߷Oe婱cXb,I罕ŜaΛO-_ }XX⌗qgư聩<4J%jZ/}#/ϨҗI)>KTdnظ8pIZjE^F'oZf;ska9BFf&><5v 7nŋِL^%++cǏanٛ/]0hۦ C? [Wd4jYnXs Μ`DZVy&ʲ*>5N> @24dX&[~ƒa/{ЯO+^ywWӦ4n\a}%:ӭKg<==+jsrs)((Ef͸39:_jxÝ '=sH#}~~fYY=3FU^ez!S''':sNz\c$&]2h0e2͛7;:8VAڒ/ҮkzP(HDMCt)=B)EB2wh֌?;HLLۛWt 40+m&_JÈ89?{gD}߅RN ȡ7 """ /("r#rV)+i4mܝiId7mZ+33`0 33ŝ@?J'^fL~,GJ]XX4=}]fvugش+Y&_Ç⧭[QZǞ².RRply-.6/Gmw㣪}FoL /B&a岥)ՑgeWxpZ܏fMz֭]y&h߶mdG"8=,l8CQ֩qfx̛aF``q-|ŗ-co7nij ht6KʇL&Wmj/S֏;(6~q[{՟aڳ9?;vx/EQܽׯCTb%xU+4nԐEwd2w"H$Xp=Wu T~q ADFF !>W\._le+NcnyH2.6AAAU -5D"b{0HLH`ҭTVPP$&$`8߹{Nc Tr9 #:wb{  d?䳟7m X C?Ec}`4iS{ &Ozݥ˗].[xJض?t0^7ع{O-'=xnp8y W^;;@Td$:>G;AA(ho1~&lێ!OBNٙuU4ĉxs'@~¹s\ƭ=,<;l(Νr'~\+24n+׮a}iVθ(|4 MmYSTig5=j7ԩɴiS~ vރԴ4ٓGx{N̬l 44 Q)*B΁rhĝwؘ ᥥzdfg!2%AVv6tR":*aIo5yYr kHW[ՙ?wu {n4`dNUJKPPZGXX(bcb\n;>|罡:cO{] clݲ}\zE KHT?(^;nr#5n6/b 'W9_"T.\uU48^q#O}8}&5[u_+_yooܼ7;v@EǛ iL~sƌzm@ej!G 둘t2oiQ W`@ >v:l۔6߷/k4yiM`޽Acs#!pq*-X\r29S|ڮ6VPPÙ?!d TaW`uu/;wbPTزm;3?BCBp&G޽h\ٖ`@# e^\-{߸Z=,C=CV 'q#T̋2/-7ǥ˗]M@dLp&giڐ}1B0hpܹ{!!ٽL"24EE| e:dǎl?Olw?l—[@~_}-ǐA<U!˼ϛ %kX~v'+]į;wca9΄a^iڔGݻii: .ZFVVp(y۲ tXcL&ôwAA5*< ΤhoRv7y7"" v:իeΩT51T'mo6l`1|OpFP`j,:6D"M PР~}m~&<Ʉy -Ѹ{#\_aOB ބNCPP͟M~čDpq\xǾ vfSʼh2c{SQ'1g LUۮӵ8w>!{Ն2ܖ A6ۻp՟o7a?VƵ[RV~pЛ7y+ۂ17}8ثm&]{c'BM{FS\XhOUۮӵ6BQ5Ji xBP`j%A$a*ue^kVX\ZMϚ !8uL}RKJš+Ǜ-4$KiSHܸqߞ hѬՅ \+V§k?#{v5̬,0˓\^`[ @֏.]5ܱk7SO+}Sqn4Νw^\+9vރo ]i)ŢysAQrrs Cْ7y+JٌА4o r ?_cM+0ݺ>ub2%Őd ޴]mHƆ?]rEkx+0xv$&$ `2y*h*ط7sDEF"..YYY?h݋3 _ȱif˽Lx~CY-Zt:?oص _o؀4X$ƥAII (us7ݧ:w0k|0s?ppdϕxޓz uՆt/eP-hVNZ#-[`H[z7ot_}I;w{ /(˗)*H$B}}u M܈H FI 'OFII ''eKѷw)30`A!]\{v!]qv6mh뮀@CM:L6'1s=HMK=y\FII bc0AD o})Gu'dBNn.JKG\lOe|v57lٶ6xuӻ=c1`ѢeRmǛC]ߪQ+ɼ6*2;㫶tja I@@@@@@CP`8F@@@@@@CP`8F@@@@@@CP`8F@@@@@@CP`8F@@@@@@CP`8jLg008϶ʫ:5S1Y}ǵX8ǷOw w@R# }0pWOלs}qKq+u(.8uEK>KW|)ޯt8ngd`w_PPm?8wۗNI cMKU#8qng0>zEKu3gp=aoܬ=qI*!8ah49p?ڧӨϮ{]IǕBNg䓿'6| "Ţ]J ΤW:o9{<5}g -DžDcOT:f/oDJH1{K=rDe~/P*TZW]DŽ01|ήjۗd2{Q^?C *}[k|srŧFLfM7 ͝^~k wbkЦ5*OMs~OuQc Pϥ@p^&rD8+:]]sTN.O>ް5&yxs.4< 1B@DD8- T*DGEA*VBzGyy \&s>DcdBAҷCddۼVQF=ŧ0Ͽ2yya8tR]\k rz#cU3g kOik4He2%ծxR&\),|{v7iF>}Wٮ*7K/c'+a4`0HiP`ڇ3q] \˴ B$Abԡ]{Ɗ%hV\koLG޽iywb͐#SOb\Ǚ+>Ǝ]Jn]b֌+\SB$~}z;d ؽ11/PW6 r e+>ڥCw{0X<.qc9~E0  LUUm{g(B=pf=.[>{U5}RYˈD34n԰Bsŷlcn _wB޽XЛ8uŦ -[mǡ!0 Ŧ6cú' 8))nዧrz̬jZ|ƽ";'+;&X4(@"A*C$ⶻq<\Ntr9f3ANN.(Zà IDAT9sl9[- *kԅjkm_<멄qh -fT*Ϗ'>M0wBh oO.c{*l>hղ%~?v?6&w9{8څoHiC@`` A``x\pva칬N7` SA&!8(׮]5խL*u;1.}B^v\y%խûV{';u;hVرk?(|.%Nl]J ڵmfĽLxQײ#H`q2L}Hǵ̪o=+weǐT*#$$㷲Tv۽fܽAFF!Z#(224͠ČB\R*7y;'T.iWl`Nx|1lٶˎ`֏=2nÆ p-m͚a'+QZGVv66lE`maP!^=|;VC[{J%O"(( fe[D"= le L&U) oT)X,X,f}00L())˗1qĉwu[1dU5uѲEs|j5;ˠjj]7FC';\򦬭 BPmhW|cnlݾm><wĽ,]>_hѼ9b=JZ|ǽsmoovr$4l_eo֣k.>ӕFf`\ gyղmH֣B`lyȖ0"u&:"B2hFpaF>?W8%ꆢ(,?={[UhެX}ۦ`Q/(ٳμxdq(C0?\t _pf͛gzF\l oխSR mH$'tENT̚1oMy;vhDq-^YxkT 0 y{n۴OEX,Z[ER:n) ovĺoc7!)m $cSVf c6(¢ys1},<ݳ"#.>¿~_甕hѬ4EE-ImЮU9qJH4e>Ç!+;C?php&X0gqM{T%-޹xJIFX8o?шFb/NLMWԤwރԴ4ٓGxΕά?zJe ,\ L fk2M4SfC`6g#!!>St:a`09ut|=sr ?f9qXwņ?cW_T(PP'1ɄƸ])a*iqq}(Jt:DFF:Vi_s۹|rWj)*BQQ쑼ɫ[ZV>f* sUMo[Xr+>˫[ZV>HR$խ2O9Xմ{Wx޾L2϶ƍx]zRbʋ HDm y3Re%$XTDe@փ 5D"(g/>Xms;2J qpmDEFbͪxe _kvu+ Mq]Z.ٶC۔lۿS S ĚMA,aqZ>78Ńiz 'Io4"n_ sm[njđEh ʋX,Xl_Vv 5Ǘ~Pb#bqxy+#^Fs^1-A*)/6K=4;[{/"UqakZP=HJ uضJnW3J FqvH9L#/OWN DTa^y^W`엝*U@N*UK6;o/;,]1*wi?ob7=8~ky^mR9v8+:|z.8QvmϦ&σL,!n?XjRQ"S4*< c cSpl32pB,$Wp6uޕTx P)gB~AOp: e (/Nk\z ߚ c3DzKNĽTϙxVW/x=YD w)v|RqG,bSZ,-}[SvėKHR4M…p]y|5.y1k>t!5w>zuO畯qʷ\ v4vܕk9%Hp^B7 YC]{{eÖvR- lNyb#T*yQ|a#G!ّo&Lz!G`k0 u(F}Ou3Lk⥸{~1{<]qyqgzٰ+>F}t9{Cbxj0Ͽ4_rbl9Y|z7ߙ~CҰhr6,Wcg116rܽ6- 1 ^3[n̐#|*uLמS+WT{^C53lb}g+=`ƎU|\ݵsy|9.D7#e+_[ƽLL~M|w7N):<oѲh۱3k^vpV._%rS^S':9TWRefbG/aB%jSLuPO Hz1-dD2ߍF 1D7HZ#h D.%:WBrs$+ܻ!oɍU|YI[AΞ%ԩLr=r=CzO IڈdBF#?a" 6΀ԡdQB_~E=7mCG2<;ysn N%jB?{"zBȵ7b!jɰFmpa$p[Cbs >vdl_]GFym=gyyyÙdZM4EE. :ugγ/Х+[W֮#b0 Ɠvr[>oYUg>;| !x<þ<1.]͆HOv!Ϥʗ^|)2z5ym{oLF B=F!dƬ9՟B*+*Su?/)7<\yySoƾq1lR'+SN&5~"q3sQZj^oDb5ڵwXgb?zңE]K dgaϮ޽woīzҋ8p?CSYݺvŭ۷y͕_:d0íG4 !H?k5kҸ^]{`ϾE'aiiwJ>& q"Pz:x0T**i\|BZ\3qLy-xxxǃ\N degc̨X=Ў>3oϸsng_cjOmχ0zSO!MJEwpK>o=yDD86Pj1,5'ONOG۲Z&odv/3\pz7P#ۨqCQ : `]ߖmSh*E C.|dTȗ q<֝T< x$d2A)BD,<, 0`bBWg=D"Af|Bvtx=B`ٓc9er&&$`G3q&]-5ۓ&V 0srNQ-pW|Buwu-B58伯*̛U*8_juܶ=jb\6dBeǢ;y O?? ٗ;\f"v&UXņL&99Z8}BR=Z-{ҨBT*EXXf3 yy*3!|C~s+Q`,գ;z莒[-&O 66 `ͪNxO^F4粸]B.`4bcQTTs-DGEFr2/!i?wmpGuK>S8_Tڦ`X,h.5DZ=㑖-~s[y*Se |lF c}JEeqsRsHۖ+3}mWZa@-Uu )^6h} zo֣k.^ZjOTd$nedΣ2lٶˎ`֏=0 5 [R\֏? B#-Z`Unap T.ZhWl VVuYwSW4lF|&6^R7keDiYذ{kz\n{jOmp/]uv^^/}5NmpyBāC.%E!un#yXqn+-O^#5b `11,G8:-8t…\3/w*0Jew p9z =G}a00+cFchI{yTVmS0l( H%+>Ƭ# u!H1ώĔi3]degcQ;hcmyU鞽1d<ս\eZ\)¢ysq=<ݳ~CҐP,u_=C1 u(ixⲅ 3{㍷A>Y5H$¬ӱifCGx*W{xj{=|1.O<^jw{l`4X;tX$waB|\kgN*?QQ ̛+EEEsAb`09ut|:ul6#'711$8a 2U7& Y9w+?p>s|ڞ51.כv_rRٱ_>g|׃̖m3i9[lA>+c0Zg-ZvxM&falu~Fƶ\hk3-!]eGx307hX:xfDu r ' `wrD"$&$xKq?,4aTHRKJrF:uq ̪\ՁL&XVO==jb\z/zq]8Uc _>g|扗b#{addczQ<8ڣP@2SoѹSG]HUQÆ(**G@aͪxsfTА*-a3Y !(-8lbސAMIw(3/;ۮ8B`2j Szӧ5'4:-@-L4 G<~{N##%.תx}i+ }ī՚NvdUN\9-GsX/*2l.KOԸS|dvbtGٻK++γ/ĥ2cS4g\q唖a4Z]Yq~ڲ9 *.uAT'_;^S2ޟ1ߞ/^\0ؼgu߬F jň`bal_p,C9qLyH%({Ŧ^e9 ǾcO ++ o} {xqy(n?h5_`޽^~x`Y8D~A{nێyCt^y#^a M d?⬈8 !w"UTjV).6:;#U_g⧍T.{+L& Ԉ>jMwx P"4$uO FLyMP(á6Չփ;&}3>x5oLX,TdYr˪vv'+ٌưNT|c2.,DLtG(6QQQlf՜M߃f?2/QQHݗ._gΨ5ވ׶QIyiR<FJW h @DԭθRr4F~~B7oNqM;vFxx8T*uY3~@*Lܹ{~r9BCBd|$׫+|ر{7bcb_F1m;.󺝑ig@d#-Z`OY>;v}A3 ϟ6?^ĕ<`RKh߮8^ÇOkogxs'F#(_^6(.*Bxٝ|^UZ-,vރB$խ% `ŪU8}&"^qc_ܹ{ ,t0 Tz❷bڳ7 5i,+yyHLH%p)0 \#G`+"8(h%dg6=#'7X8w~I:Τe.k7^2^y%^ <Ը#;jC" SC,@ T 00/(2Wo븕"'Gu1rx_<Ԥ$1ʕ.%g%\Vxfz~~zqF#8Qt`$bW']1x1 IDAT?^|lh_D"oטllfg۷]vR= "ukL}g2L&x,Y1εL4;6GdDDⵥNq&z 01գx4M^ii'Nm@HH0>9 /x}f}8231xس:<'YXh -[ѿo9z 20űhat4<Ͽ AhH^~uT*j:cEQ0URP"E,fR%T*74@eԣda2BPXX z<;/bIpx&SJJLq\篪QT=w3̞ZENN.(9sl( Iu`G3q&]-5ۓ&VgHT*;LY'd2A)bLbcku31!w=T_oLVAe0 |ʞ L`d0 SYgb='.]^^bC5L?aC &ɫz>0 $:[Djd ~Yh+ؽRa z̥JTbNRzF &4Mq:xcl͏Acz6~i5mG[5wyBlk]YL&ẍ(1 GlL v?8YY8t~=8w<ccc0 ֬Z>!ztGQRRuppߞ 񢣢QZVeR(J 1t_gNɷ@UoU.'Y+S<'n?Xm店ɆGېvZ@ݷ6/flVvjj8;Ӕ]`@vJWhJc@C" $4~2 " qO\E>Lf3"L T@.ҥEN[Z)@"lmߪ1PмY3|zgxVv6ޚ2!! @^~>rD",7Sߟ}eY5j3?|\p)K:h/^3- ߡO㥋=֩SႫduUvkb8{<Ξ;H={;<5ae!~aCE^_. @M:L6'a=HMK=yg.AYXLO&D"ButP +S 82x# ƚu`0R#uö0xnUq2bB!r.DR?x8:wuh,3 @d0[ EHyyZ!*2(ۢM3eKPz 7WŌ j5t:$&VڿN*?QQ nwAӴK\P*'c{*S}Q?\k]WUAWZ|y_`0 W@pp0"#"j. PlV>6xuӻ=u{Ѣe5o #_smBa41Jiƹ0@d\{9J().DaFD< eXh ^,ZM&HF@&"/Ѐp?4 P9DDd8B"`4ӐH`%Ě\.Fv&UydY(&i) "~~rDG^V1ʌD"D"'^"X !+[>Hv0s 0i<-8| N~ " OG J0<C[F XЪyRXLh  E\?(f Ma @[bQQp0 Z )HdRg.Q.( PԸi (8 Z?oC^*L,Ci[~> 0,8~:m !Xh z :HXE"ܺ~($F@*& H$zf4A_2+ M*0'C;"M4,& !KDO(mM\I۠͜ 4Z粛M&3FU!b`h#hƄBxxJ=  H!K@V%15L$"`bvzgʋ@SFRiͣlEr?(_ EQOIF|c!#_QXlX;C[qqxaP X,fB:s$>x9oII!$'ǓpB)Z]6@ O &/OdA.PRRLRY:ukѸi1LlٚCGV*"??`Je4g&RM^^jш5Vt"tf#Gr~g<Ν{ʺؿ+׮]FQ\\ 26T*9nnJ@P4xL&CBaRTTHNdDnCqDEu!++[a4H^^.:tӛSy,c5xzzӻ\ą t%l N4׮s |9~JJٸW 1 ?m4JJ),,7|rssHKK…xc$2;Ӽy;d2琚z""k:22R >[7kAuhW5￧o߾iӦVHLL777v8o]G!!!([NY:v.xxxZuE]Y}QUS@ i ^\F@;X,r<=}dr4aXz ==USk{wwƍN)(( # EETy]r">\A톛ҡ]הsѩSZcc͚5dggLQQ)))=Ç;7#=z4 +'33[jԀYf ڵc̘1u֖m;}vmu(#U+j>_+AUәh ^NDVAVV&WJh@њfBDqGTӕnDQQAt`\AII1))d2|}|ul\BaAR)whc]c2vzqqq|7Ou_b|רjz-si6qNSPPPʨόF#yyyxyyRΙ%aGAAJR2';;e=UՇUJ +P*KppKFAgTE.WСCwo_dž ?ӱcj gQ5'st%f r k'iiɥuہ'KB SТEOw~~!xzjssyTA Z LFqq1ӦMs۷sJiXd oSOHy:tbڵkN2hٴiS^{5w΁0 z|I"##o… xzzR\\s=Gxx8 ,']v\t?w}W*>XjqqqxzzK=xᇑ\r+W/СCL•+Wꫯ`0ЦMMԞ.]~, >(uW6ҕ@ ܪ4JP;׮Q\l\FVduVLyوZ͝+W iSTj)~ A@@ξFb)Zrf3ʕ\Bv]hӦ39 [RѷҕweT*h§^pq^x<==ٹs'˗/W_w/мysbccvx8z(rJEEEcFYHHH`ݺuRB#\l6s ϟ'֭_b_oRrY)ucRQj<\.?8ĉIJJ{!::Z*/$22|Ȓ%K駟>}ԞǏ`]N_>tUW@p(A fͼHLd2ݢrY@nؼy7oLflnMDDWixGT1băX, = 77ws :] 2t`-2r mKc\dnnۀ{8P+8P?p@֮]K||<;ww2qD9x ͫ{rJJJb„ RX=T:N2`F#EEERm cM&xРAgUWՇ5ѕ@ 45Jjcy*ņ-lid2bI^{j0TX,hҠ שnTnX,l3{`.|}]Ib霛ͽ6ot֞+ח\xYf7""##߿?Z 9ʒRD>C) VTF, YYYjj\vE}fXXt)iiit ֺPaaaehpwwc4)((UaMt%M}ng߁ݫF-1hқB%X0rLfrmFĶu6భkl/LjjS>xsrr͕~hтP8@ll,C<)UСC Iμ * ^/}v6>رc{WiŊv^AQ\\,p(J;#x&Fpp01F8,{LF6KF*Q&e /qSа7^CVɦT*hoo+yكb 4h4WYe(..… |\x.hUD֭Yzb+ <<Ǐ`0ضmCL&+ =W\vyIKK>Ɔ voڴݻWj8qM6U@phAiJڷ'!!B}R:ma34bnL2T-s~FFB!m[? icx7QT1m4{ЫW/VZE>}\ޭM6i&i!(.\H``][uVcf͢C_&裏l2^x)((gYfİd;b_~\p,((AbgϞHyVX-[ܹ33f`|Ҭf͚#Tڎx.^(|!7;,X`q~ƍ;5p\))1Ia^:n !/_e=%_p>dvGj sE* 233w0R Yp! , ,,NQ(,,.HVVmY^^%%%UKG٨T*i}N.]\._ 6ϫV1R.ŋk DFsb׮I/ay,6ƋH)k\/VF}{?||dvfG/X>-1C4^z:GP9K,!::Z/@4zƀF$"///әZzMs._f)8l)koU+_jC|CpA+dɒH`c-@ 4J ٌdd2)#<\CV|k@p12l^?e.6jjULz eZ12w%ʰ-& kc20 F#F̄Da!0eҴhk~>Th\"*?eA2rT*#zٺRDRIq5T$z4Je=6v\. IDAT7nF60Lҡ!9e& rp1VP2L 5`i='">x@ 8lsr jB$da61X,-m Jf 8QTRuP +97LoyGr\zk04c{YklLRB+ ivj+   66d20zʮ[S֐)k5`/k4Toll,׮]+vn:u@--Z ***[NY Gr$$$/O?ժ=EC7{lƎ˝wYu5fNݍG-NRR [裏9ۇd++;44]vImژ={6 >`O.m録uoݺPR]^ΪjՊ;(-ZoAYԀqΌW.Ό~CTҾ}Ju]8qN'?yWכo4bGeԩ\xSNe4SLwޤq%|AL"7Y}țo)**r9뉋#55g}9sԉ|ɓ'իWɓ4k֬ʺm]Y]'Odٲe$%%q1ә7o~4iqqq;wN:l2Ǝ+T6C(}>}:9sd=7|Tu}ݤI  Qd21ydf̘9s ;wU *3`V-[F߾}ر#s壏>"((<^^^uxN#''Gz:{N<… d$vɌ3|2.]bܸq۷.P'|'0رO=17n'ObZDbcc׿% k_e婪.wwwJJJXz5d2Zh߃9r$_~ՖS3uƑ#Go a޽.믳{nV^]nSHJJb\FZB/@ 8Ac`3d{1i߾=ގ;'55Fc1l0b`._?ұ-[J/\\.wxӇ!ռys\.Yf\|FRFVEc2j-ߥK0.mTUWYl}O>${waN6m>,k֬ǧ=ڴiSbX0aOfʪ2oY~g;իWEHo]A" VZU{:~СCor0)..FJ͛7d2qJRSS˗/rl6SXXȵkbBj+m8q"'N$77~qƑ4mLL 3f`۶m,[SV:DYYZb)mFF>!!~,ixҥKdgg]v׿uѮ];Edff+܉5.O z!&d\ĉٿ?/KW~)''iAw3u>C|DD}eܹzjڕK+f3 r9EV󥩾.UՕ̥KaРA WVTLLr aaav9:t` > .вeK U峑Iff&Z e2tFVZUZ]*[s7on3d#==<ڵkRc믿cǎE 4emWbԨiJty,^h۩ :)ff ^D Gcݸ9s0i$@[[A}i+ x[Acǎ;-@ h8@ ." @ Ma@ hrF AC0@ €@@ &0`@ 49u!;łb!7'`6FAPIPh8޾d/}V <{݀It,Z[d4EҬeZ0 gu)@ ú^{/7;,"tb6@WRU62^Duƹ/ FE5Jա@ pD<z5`2үڼ%F޴bh4"m֒5VXY/G ޢ= @x=1LVe" @&TFZ/G#nkF]1Vӗ@ 4+ց@9\Y'>qoS+ }&7^4!657G%!!hCHi餥FhHH^G1LtԉȈzԲrvSPPHH(Gbcc cǎVGe̞=crw:\rrrصk.\`0Ю];F+gرlr9cƌ;IVVAAA 6pmˣm۶tfaqq17obЬY3zӧOtR}Xzz:Gb0|pqa2dNe?7fΜԩS۷?<3g$"n<\<|¢"Bi߶mU5|s1Ld:whĤ$~fS>f͝WWԇʑGݽ饗XnO>OʳsNRRR*\Égaٌ3N:qUe뮻y}DFFc1ofϞMPPPuϙ3^z} cĉMݜJHHd߫?h4r%FùBΟ?On霮x"%%5[rM6|G_Rvӧob_$++Gyo4}AA#Gd۶mmۖ;vкukiO|8'ر-[VX+9r$jZ:֦M|}}+ٳghHMM}JO{yt‚ O>nݚǏӵkW7oγ>+?u=zk׮lْ#GaW^Eie2۷k[p=ԙGRpRo1'2hH+zx*&=ˆعkXbK'>O~&qO}j3ٻo?˖`а<6ss0lXMz2Ͽ|VLiS=$ܵ{deg*KpZOOOgw';+ZͶ;X0n>HHLd=.f g~~"hd+œ9s8t'Odٲe\p___,4 'k.,XĉOλヒR$77!C70}*e>(֭ BzΑ#G6lXn߾%Kxb:u4g}ƌ3x'Yl-[䣏>rH|Ŷmd0dU$%%o>.\ZFP_swTFezc4ٲe ϏGy!CotRܹ33g$22I&1d>Ct:111LFFz,n}8{,L2Y/g|w?={f-~!0i$.^ȺuHJJ>L0!ڳgOزe cƌRe2YG ٳgÇ_Bd8`w͍"RSSVZvt2c6+>j=?h]I !b@n6.23Oyn s?+~X 0tRA]I <&<<_xO_~]˗/_L5iO=$, %Et֍:!FaȝwҬY3J_@NN.ЕX۶`0^o+uIrJ(1BaK!8q"'N$77~qI3;r{[:!!~,<==kvvvvWwf̘1dffpBx 4֭ H:OҙffϞM֭i߾dئGw˗/SXX(,^cJF̚5kjNnѢm}v<]Su^[)(( ''GINNFVAQSk]\E./ urW^fἹ?wOzwLxҼ\Ξ}GV9W߶G$lz} aa!N A/ _oB}F̥֮_[9RZj4ڴiChzGG;I3BP.di֘ BKނPN:%L&)ԩS\[~=/0=Cff&ر2p@!O?o^֮]+l5rkү_?"""$̙3Y|9 ,SNvKYYlF֭[ȹ~wpczX,2e ˖-}'smUHmLO{msμ뀵oˆIMe:pwqG np<, Μa΂]K!  ۸w> VC('7~O>`߲:-O`!-|҅*ϷhѼbq,^{ٺ};* ^?w[h"de]u;:.]T*1l,;DE1h JJ^ERRwxyyqe,Y"x'x1b 3gΤ[nn]˨}믿JCylӁֵCR9@ 47@MDBky7 @NBU}&7^4![g@p!uIo% @PԫFP`2|lqui>@ pA}?OՀq𤤨w4,y{j+@ i{{cPϓz5Bʐ1Mdg^# 5fՅ@ pD<z5`<}%qrsܮb2"q<}|qYgu)@ ú~c`ršBAZER.&I2L(J}h _Zq6Fե@ pD<z5`T**愆WC:CS[ AC0@ €@@ &0`@ 49#! @ M:YuQ@ [ZQ'L]"@*bI AC0@ €@@ &0`@ 49#! @ Ma@ hrF AC0@ €@@ &0`@ 49#ɡlldd\#7/-Z@VTֲr\<*p~VKhH ´2 J'~~d2/+9x=wV[A!CMX:hJ}]՜_׬va܀|t)~Y]W,'*2ҥgj؍DEۗgyȈjSѭKۨ32;RRRc#&h,n4&6nUiۦMT]nu]UEelݪ%/:le@~7u0`nAu۵CJ"7/H>z_V~OhHHϝL/:vgu*wCҐA}CP39r(,M4{VcP.{tN(Μ=W_ÂysCzA\ 78K!b!n~;N~~>tܙ>F.wB@!>'nW3+5b8[mSrQչ8INNZwwѥs'ڷk'3')..&<,aCڕU_Q\\̜^dמ= <>}C_EEJJ ?~l6;ҵKg:DE9_U5`u"?ccA垻`2}Z6oag0]`æҽkW  ѕ fHǎ}p%= B#|u6b2\kd2g.N>Mqq1! c ZKW̞q\!}i۶-1#GT* חv0t]i36SgHv|趢2TGU]GӇ~}XU]MQÇ͙gY~ܪqS@0Mk׮1krE/q{Z]{\ggtc|1wo?*7sn)osǎ|cKPLv/쮅 ieתZfѯo벪)v*((C ߿B5%,y/.bH&_?pd ߟW^ZļgӵKg0HLJb9>rxkxylTr).3gW1 8t?&B٬iUˬH\+1#Gh})RFa.W^\Dd22!^^0|Nc< X &Mx //+WVS}mRQ#yt=p}ST%CutZz3莁M@$cMtuߔt!M$8|h}TBLuibR[.|Ó&֪ůFNHr={ػoYY)]n^EEtԑ!I9z8F VkGs!z=o`U)ˬ"1oo/5ʼA׃x*RQVDիx{;,+bNJj*O`wyo׎g>e6++o={tW);oA^2n U_Qkeл}6m9X,lW=w˫V^|B"'Nu <Xc!oeߙxn][ {cX0o.F۶c08B,o҄).*Cy@\Me2xyySWxq\&'}.s7]*J||jF^আ0`np}):u(}֨X.^$קO'ءsǎ@rr2ݻuOϪ_VKBCBx|4[8s)%%yF͍̿ːz?_i*ߊIUNm.ñT}HʶFVvwݱGNҀp2 BAa̓=:%vfyysjs%\۹lۦ5nnn옷LYF.ӥs'>5U*hn kҦ_Y\޸"+:ur? jKWR޳laDEE:}ؔ}5Tf3:V XVm{ Ŧ-[9p .^̹sУ{w;٢{FQM|6nnU-_}w^NTԿaQULn:td4c|\G<==!vڷzKT4k3}TZճ'[ŋ~=fU֭Zq[.WW7NBVu8f:N55]*1 Ε+i$${ХS'J%Fo`ćhX]͛5CRըoqPXX(&j4Zh`jFŽz=g`XH:( L&*gn'Oڗ( 0^ӧIMMe媟9pRuwIqֿNֹ(**jFAAۆ$eҵmOOO ظy {pSbko/u+ܱc˿>7;vJƕY啥lmΌGjYqf T-iSe{\7e^V~o**ք>i,]^IK Z1 ko9>|~uF3/}MNn.OHN)**\|<n\#F&~#-=l&?4777rrr}tؑVZɁǔLo'n0ݺt!'7s \JN?|Lfj9pW^^^3<:rXgSUF= ̸$;'!OƇYYCrxfn\Gu1GO_*J2kR^Y=/]b̸i,cN\Ԗ>uyE<=٣;^ytZjty5F&U9;)! &ߞ_-_NVVBQ3ժT*^{[w a'j"#sYN>- 䙿>)9oWp.>s {Q*xyyƀ~}0n\N*Yx8flԀxgH;WQkϏ!ISJ 0DWkggjFصKg-;zT}ѽ;O=9_~5ʂv&D AcrKi`vm d?oYzt8[~ƏޝPP5T &<,̜];t\x___KOq-b|}} v:l6Jaa>64wSCVqi觮ȸFNn.m۴pSaa!Ʈ/m¿ Opi׍D]_+)d2qR2%%%;]ߙf>71h(]fgg3^Ur&Q.ŋLV'ceh4kVt\.eu)Z3_?=qXw75eCP\z}C/5A0A c6me777YWݺv(r gfK d65#` ),,(LVBvM=ٵ$$&ZuV7N7:;#h ܌ a|Fqss4<;#4.7i&E0@ bdo9w.J?G ၹۿu7~j]q#Sgt|=Hװ@ ၹHOKѣfC#3D| %5%r9͛5cAZ=36SgHyvPĔhh6ݽ'NR\\LxXÆ[8b8INNZwwѥs'ڷkR=@ '€i"|ŗm>~3vy{̨\IKo'>~^ei_/8պU+~[c@PТysW"#&55?7YY,~}~_ݟ111k9p1F$—OHD0jтϖ.C,z:wZg<ãI(I"pmt&U@"JGEA HA@R- M)*HN91dLLs^kϬm=.;_W `@ *SC7z\Ohذ))DĐÞ"7'X :vR޽c19{FÞW(}zF994ov?]t6R9@ T!|z>|=ZdzVvwer:VL BBhӺ5~m`7; f<5dp5p="}Z4g1xyz-pb9gggV-}M$[ŁC9s,7nϫWg<4n԰k|߷OtN׺'%'7~89:2}"/]ZD}w.ODQ""iܨr@ ShҸ* YYv:_XdZM߿:u<=^G\|xoSv"oѼY80em [ {kۧ񳵾ikVg5izrԺXk Y ):<*BNw`D܎*$1h?[zˡugIKICѢT)DAQdtI0:wkIv-h4 IzgO>68e7/dZ^OBb"A888HOIIj}z_`}`!+k^AlڷxX:(xs\\\xw"cS{ZZNθLt$$&$(򙳫VAe{氧 c-6eͮdNvKao-Y|*nEZעjiڂ^'>! &KKJj*^^xxxRY}+C*ׁQ׾C. IՄ_ۈM@EA(N'_p+yҩCZ= W'OgB$Eol6~<2GˈÍßk?xȤ֐$>zr|FcleV}>Ԙϥ˗y)ڳ77Wm9oBF-Zx<|}}zvhߎJ"33%б}{czN vGRXZlɟ~&#=!d֫ӭgͮK "*%˖{khQ͛k c [ll-tО}e-^UZj?Ѯ{c&MsδkӖ„&SOi i3fRP~O)"zZ*ۻ SݳgZBWsuf?[K/NEOZBRR"CSQ_Dvnd3 jNo'2"cp_[K`4FQdRS2ص[7?͸S8lٰ{w̿5ƴ7-E9r`vݙ3oy9v %Q ZAA 2|!Ob߮oٹ8w0)|ǟh+n4 `6}zܼux{ڥ3nnV/,̚&Z>zZ]Dϣǎ/7Lyis,"//0RL IDATϘSw&~86[3iѼ9f֯mۉq&,YѰ~}ޘ5C{w[t^Ξ?ǟ~f| 6aM'O|o=Mv~]͚o`oP݃:h~Ν`6O[ڪ=do[Py 1___~>u)"2l+gGTY϶T<΁_ߍf׳8YBnӓ_r21Ə}~MFZ(jض{&>6{\(?5֍HE!&6s.0'IHLN\wʥ˗I3f䘱 zj8YˬKGDF˨V1|1]Qz=zꐺ9ܼl<ܱHC@Xh(aaDFE,_yUђi|eeeI7W5cСc]Y`rT4}p/Zo1_mՎz-DݸIDdnj+ώxژn\\\h4|iHDHpE} c? hyA*=Re|:>yF\*NEQě}ȵnKCpqVjȲw$ _-e|uV 8wwwB^*RRS4}P޵gs,@Rqxtg=EBbxzx_Wf`~,[^xOAG۶ $jתF!--8?dLWTd_|KP˳Yk6Z\c K66GpPvgukkqJ7 ՙ,+VZk֓=m!1)///\\\z ˢysٲu+,EfLq*]Pv jƁѝ~ݙqd$»︂>:ƆuPWbiQVu;K( 88Yx<==pڵBlڧV7~NMM5 &==,ゴXc}0o]O֭PT6٦sǎ,Xgΰknk^Kffx2!!^Q [}aJvn$3ծEe5$8oPJrJmվz-A~~>j0l(zݳ{ ++dp`kי5}J.+R{j1; _A#I Jv;,gʃld;*򜗿1L+$o,TBCyE ]V f_hذǎ ??ͅ~ѼY3V\bbcٴ+c$I888Pge|,.]7ҕ+L}e68::D^lr?:̀~}iڶ^x 4_j|F=Xݷ 0] -<"a"^hLGdffZ]T%ۂv-/*ˮ<Wn+Y,˞jK=ꅇS/< 6-օ=).>;qNkxYvY T~Y TOCW_M1,loE|Fd}W$eΝ8cmѣ'|yw dxEV.];qtӇ^}*e{ yC'oӃ۲ƍfP~|4=@s3쵭Jb 9~$=>}cӊ\ 1Cbsq=Gݏ,ю,mܼ/Y1gc[ծEe޽񧟋,kKuSeyג=mz-T*5BCy)&6F 190g,#>̚>ˊ^@9s2{ \^%.ػwC/'xmr+\IpOF/+HN.8 a5(zJHH4mϿqopXWnnݲU[UgFQd7(58>'nFvDzZ6E5cG%=Q#+̬LNd>l@ysߴ+oG~3gi߶m++e-?//Ngl Tv򖯢#jZ4ofuuqيeu2-YD.KC+Z$ANAҞZ!~lj>\qI>>M5//Դ4KS`;b8T FCrr ~~vj4fYzM+KsZZNfϏt$$&`י ann.)x{yC/Xaڠ|||J3[)moa}R)gBb~jOyUQ={xK`VTϊ/oYH:0Q(7Ѓ!<;zO7G</se|}泏>q 4:g߁eeQvmf97oV򥋩#۳___y[75JŒe+bɲڶyEr'.bָL1J(+v lʡ`gt2Ͽs1(a>mx{G#nje/Lbs|M7=je}cwǟزa=>>>|g|o Gӆ{zu5n ovлgOό?fZqץ%bo6ۗlyTD0fYOrT($Pn&?D3 ɌQN.@IP8b4wz䍥$:&~cՁea˂Q ܅ ̙=DEn]uCƍܸq\xA\XyZ"M""ʳ#f%6Kص{'Fa^Tݼu׮:C(ggg&ËxlE'4}™sl׆...h4?z:wۻLi6+ ""eHHjÇւvQuPk1-fSC{[7>߰EQ.ZOlfsf$33 4-۶<׬aNK}W;/E1NKd1VϒD.IDuQIgDߺCNf*I"#+e[Z;puu8Oh|$Ib¢_jќ,y7Avm@#ʚZĤDZw%l*^zy?!ZkiKHL//OjתF!--h§T*ILL9ZEeV-YJf͘6e0͡6HIM`ۺ”gƚ}(M~VjȲlwy]C0yuvGƍ1hQk`lyYÜn۷7ۗJ,+vh_ڳ 9msmN,IT#8y͠!!fU:0( z888R%͵2!^e>^OOWފȱqx2 =b^<@FFՆC߹cG,^3gص{75rh|䒙i>..'''||%$$+BbR"6\ك={_2}l~8d~u, j5yT`2. Ko k)NtL~SUF-zhz}4_mǷ<t7m5X\_*̞gAYŀ}ǰ֩,Zzmԅ>4,Z _-z8::dMuPhтw׬eά899!2~zIeԴT\CkƘG?QQ6 pǟvcG*!Spꅇaf?<6ծyw/6|_ϞuQyhؠWNAt{"Cv|ˀbwe?lU0.>EQat|(HT"ʰ54l؀cǏӮͣ?<,͚5̜>6mfZ5٧8,2hyaCлVh4E~œn {e$݉e+֞%JEF$y{sΧkg}hKIJo3t0ԙHfXZBeؐ }zkFQztFVv11eKظi3ڵce8{\1)T*ߚ+3^={ϧW\a ߏa5d7\ 1ZMRr27oil+3f养 II̙5+T~asK6xt8"+ ߏ.]t ^L>} aЀ|y1ݚ Z%٧8,2xy^={bj}bvP|}Qk0ە?ؗyVX+eeڳĖ~WSfΜ̞=W ǐC8 i[]#qs|֟붛BR ʑى$(2Z}ҠѲujNzz:BB̡#;;Ĥ$JONI!;;uX\(fkOdQt=c’eq%ԗ_C޾C"-l^eYN\zZ!!VϚ,直V;YB9zY,ا07-,g \3zF6h`5/[u/(/KU1o&7LJBR{(6t~"^'G7ݨ*F'_e2 mV7>/َgܹdӫLJY3jڤ̶ks2TZln`[8OLl,uu7oav_mUt\8z;mڔ? cN7[ {X~QXҿ,ϮN.u 4PeFJhX-ۨ ۭUjΞL'=%ߛl^]6Fn ..NSCZrSEv:[dD | 5<.MZQNW֭2˲/*rTF#+ ߹Lxk?>b6+pYT**P=p`Pd=!lۥ"xejUPiPӦLj1Yʻ XYe~QqTЌ, B-g~o#|]Hʼls̐$܎3Q@%9;տei-xNNxy_\@ rTZsJvmKz FCrr ~~͜tNGbb32PvǔQz ,rʤV1.+Lnn.)x{y۶Kv$%׍59Jt: (q.^'>!ӡ0Jk#km0˥<{kK?2׮ ,ٳj$%%d<&11 wgh4RRS xM"F{hZSR }We2,@`@]l8^=Y0R.w|5U([h_#NV^V^ c SUz9E6[TĀTnQZG8pȓ0xtО}e-^đn^|}}ILLnݘcǟLi8QQm#_?MN$NIbkٲu+'t ~YNN\ߜˍ7qQb 7׵KgC~ yyy^֭Zd "Xl9ᄋmeѼ,wKpPI)ك3^-ɸbj63'c^}Xcp+ɋpAN(ހMthߎJ"33%#^͘JBQ;,46fX䘱>'z*uyג4׮mgyddff}ZKjZNzJŤ'2anܼ+9} ,D޽xWCjZǍe]'$ɇSvm._ϧN!2jgG<ͤ'=3ϒys'cc{}r\]]Yh!]tf¤O IDATYvî={yax?&&UJP/m1;^/unބ'o̝^dKnEG3l`׫ǛТ|:T*9ezc~'~-ǟ~_Ƭ׸o=={{ؿ[<==Qb;[dMƁ) *\AAW̎f‹/,z*cj];wVp_ ''/];TY{ao4k_Kc]+b՞NGeov}'Nr+:c<ޭ+8G|JO_"11/:oǎ]wIHJkzʲ̧Ef4iJ&O=ؾ_n؀A*NA^?4 >oشO?kX|]tCvܹGtL yIrssm]poR5pnA8>IBb"wx[W.]LZZ1O 1?ޭƳFHKOG$'Z-٬,wX6S02x@dE̹sDݸIDdnj͕gG<]^{mdk([+m\ŞŸѣx 4gg'>CV,]ZFenpILL[; V%!1HOŦ{IO .>3CHZٽ;۾ạɂo1wa0>!,F?;Z|a9:w[vEsT5awUpL յͼ2s\Njrɩ$I+:ҳbp p@5q7ERzrrʍȜ"vqbꌧa@{EQx7Avm@ӒagX#% Q;;;XV;w$ n2[^5ZYM5ש]EeV-YJf͘6eԈXۢBằ&GXhrۤyy$$F|}}2۫WnS|^.R~\|<xF75č{`^~5J.2Frré,򿻌^VO}@>g_[^.=?;7|m8dpXHHdYG^^ٹ h I$^<.ZV9l全;ΉYOoHp0dee1.988Yxǖ»={лgXKϜU-̜jȎT]7ت,z"OB߿LE!1) ??Z PdzZ6*M۰GҔgkk kH/du(s30pZxZZxzzr+&ژP4= wwwˢZHM+ޢJrKq8J9ݑ9 U_Hַ.h"=3Tt<IJr@PPk̊%9Qh ԡ$I⩧gāb I,f򊌊C07k֐KLl,|UBCyE ]V vyeϝ8<<0éΆMÖF k}-a͞osyӱ}tLyuoņ3jڴnmqzcpe~>\ ׾o挬!Oj<Ǭ8t?-UZ$ʡLl6ۆ.S:N֠M '7Ixw}a(2C dF\#Iu#+Chݺ 2Ov(B6^r濽} rR'IӵWÔDR"͛5ckmc̨gY6nLvx2c&^^ĜYv>{\0+k2b2^KL(xr刉-޶ 1ZMRr270kx%??^={pz`[2e#{چ5coyoPT /aȠA6}.o\Xt_ T*iah|!K#G03bS"oy.#͜9]={.ݻ!C ,rGvAtW;>(a!u]5_3ĔD1N#+_lʡ`ɧƼ]lLSRΦn:VF9z+S$&%`WxzS7RdHz=T-h jܾsࠠ"J+GY$ØKthזw\LFCLmpws+ildIXJc_SVٳCRR!!!fϻ1G^^wϯЕEvA͠}60q$A^adwVVL8(Gq+ `Ͳd(zLzy2Z9/xrUo[Q4_d4(aI(KoY^<غMQSU-FMZh~\: yhΊ_r$Ӌ{CI FУO^k6+DDFk:阮VUBYԱC]HZ jphS4?/G:K5?GSlGIfZ 6i{)XG!1z|/1mCPxejU Ns`$F8si᧥xg9$ѷqr{a-%ޝbc:;>Ҥ@PNTK&P>?# cנ+3ňarX8 HOIF"20˝&ӹ#m/5 V@ :0wqh1#>hS[ {GV[yq5; d(H+~^'^ #0J8-@ 4c UhGTQRQ2ѧ\EI@qWW l#-!V,+`QCq` !"B(2^ ([%"@ iH Qm:@ 4@ Ai@ p`@ 8#X믿m۶ԯ_\򋈈 ++~Duݶm<M41*WtjժW 4@P>rU5kVnڵd?<<ܪsu E=h4Dxk.RSS "''K^7lؐT@ (zѱlU裏Zh4deeJezvqĈv˪h WWfffi*EWVN/]vEFb@PL5l۶ Ie4oޜǏsif̘a622>%KXQ1Ejj*7oƍxxx+BڵK\|rvJ֭Xh͛7%dǩS!##VZ1l0RRR/ wwwƍGpppiLfM( IIIp`)/^d֬Yxxxp 6m… iݺ5| 111ԭ['OҪU+ٳg񳃃{FQ֯_?˖-ё,-eYFQ"w&''/oꊢ(ݸq#L6 Jٸq#3gδN2oJ"((@P=);v^e]+[ɓrY:vh1?NGNN񕛛 @RR 28ၻ{euvvFr$ ___cׯ_cǎC=č7L.̵W~/_… o6lQV@ \L5@$|||HOOsά^s)ԩc1GӦMMIIIٹTOڼys{99W_}Exx8O>$ 6$99I/U^=rssmv2U@&Mh߾=R:%@PL5T(m™3g8y$ݻw/u9Ż$kVhժwy___Ea&~~~b˷@ 1TM9y$(Bƍ;w6nnٲt:E^,Hzرcq{tAzYeMMM%%%WWW7nNCQ_>۷oG'""˜K8p@S #F`)M6eɒ%899ѣQݲeKmF6mlyta>\SLiӦ; 60k,bԩԩSL޺uu iii6.;v,7of֬Yip&O k׸y{S^@PL5d޼y b L {ȲLTTtIx駙8q"K.-u9-Z`ƌ4i҄ݻSV-!!!NN3g>>>mW_)Μ9w}k׎@9bQ3gP~}tB&MرcG[nѱcG7oNVx衇믿XfMP OLJ, ::>#͛WyCTnsQկ% իkצ{Ԯ]/-+Bhh(qL2fLѢE fΜIСEmA805;wHF,^׭[7.]D~~Ǐgג%KޯصkO&""%K0frrr^((z#F0qD?9q1‘u:{ԩS2uT^}U*رc5j7oʕ+:t5=[&..[nSOsQSNqU6l`xxx-<>i޼9/^$77;Z0ג0bIHHڵk\|l NS67W/_fÆ DFFkf\t޽3gpm:vhQ~{ۤ@ wL5eÆ mۖ￟3ff-  '--JOO?o,g„ Ȳ̱cʕ+DFF2c \]]z7ǤI8C (btt:z?˗/oQ$@cdd$'N`ĉܾ}[n1dN>MRRAAAӇ7[mݺI...r6V~ɓ'Yjqj%00???2`cǎ$''#I6e 4iDEEYڤ@ g!TSvʸqQFxyyY'66Z]dB=ZSn]{JE:u}}cXc\X < wuuEѠټyѱppp0xOXX7PT%6mڐF@@Gfԩ,Z]vm5ӴiӘ3gL0kײerttt4SZtueZcÆ Xf ?<[ft\w.Q_Ejp`)vouݼy3ݻw+۷:?'&&߇Lzzѡ(\FbȑaTrss[cbbԭ[^)L~8q"?6l`ԨQ͡VLJ-[дiS&Lw|w=!=$!! ҋ"*T(EzQHAD)" H;'@K5!] !:_>!;ͼ۷oҺukY~= 6,R5V^Ozz:F&Ce0`RRR;w.{&::ڨ^!!!KF7O b"%%'N0`N<ɜ9s{t)999\D;z=[.4m:uغu+,ZH>WF 4i˜1cPT`bWP`ii=SԬY?YŒWիӬY3>#4 s?|ʊ_~ygl^ :)ST*;w./fŊr̘1cǎ]ocu^:M6eر\;!GFFr]\\\hݺ5IU ϘI@A809s`gg ={uK7|}oP/H Y&FbժU'Θ1 j֬)eÆ ܹs___u+") J%VbҥTV5jТE B`ݺuܹsoooBCCleȐ!ٳ-Z3&L믿xYz5gё~I؅a ~k׮Mڵ^x!S~}BBBhР˖-CPܶϘI@bRb})1񾔑*/R.>( ԬYSڼy!I*hZ-]vMJOO7vR֭KC|deeIaaaRjj4)))իWR/oߖ_.Rgҵkפ,eVËGimaO I^c/+cǎ' c/J )ٳ~\u,)VyI0ߣm%[ ~%Ib̘1 8ƍ+Wwz*0T(۔umIO |*VZ̛7Te JҶyi'1W AC80@ *@P@ ]" IDAT p`@ T8#!@ @ pFv$!I)ɉE'#]^+"xZI~]q(6+M@Pq?,}܁w7ԤD|Z :DDB90JS@ D$G&%)@]ݱ(oJNGjrFJK/ b:0)I &%R#>^FuVYY>(8:9S3/]7wb*6+E@PRZI66*ݨQǕ\f2@hڜ>VEŧr q1+V@ (H\|:$I׿l'edwpϼ^$LW[FyYi/dpprO~RCHz]`(J01۬  9vcs ~R3j+f.Ql@P1@ pfDo>@ (B*6h!]Q*T8Ν?͛7[ @ ʌ'z):&6+obR7o#IM4.p>!1hټ)~Ҝ:s7obooO /v]̢CHJ 9JNJI>|wwwjժUfeŨQx饗x ^9ro&;;:u#"߻wKhh(/IN޼y'ڵO>w^RSSZ*033ݻw#I+WI&ʾr V/ ""Ǐs=\\\h׮UTɗի߿VKiԨQ111?IСCu+M;T i֬YGͰaè^cLP8u TZHHy3QS≎ ,z`ʬϘ=Ae78~4W_7 yV˸S9g#8u,ǒU8w<-3&Mbڵ"iӦ/f9tQQQ???vϨQ޽;k&66VN?RF  ĨQh׮ڵ#33H .F|Ǽa9͈#ٳ'+W ##]ңGFAcƌ|ᇬYW믿RfM6m$_f 5ĉ /0s||}}0`oѶ+M;_5o> iРcǎ-k/'LbKUqhEYNѿIHL,{yQTjwp]yݻ;z$ WU,;S'r~,^\>܍◟~`t&Mjrfi4®\ѣ\qCu-ͬ+)Zwj KOO'""|jujܹCVV^`}c[ne׮]|7ܹ#G+k޼y|'^e˖|r͛^'11/Ҝ&̇1;/ #;;;whc$$$>>>4 t28p@ U=zcb }dg :Eӡ.p ++?x@v<].w}GލB!R5Dލ29J\99~_xuV&,^|-7n">>$}N ^^T,YY,11?ï~SܼyrbRNUL eĉԬY͛ɾ}#G$ ^zի3b{=x_r"С^^^ 2ĬM 973f{F3<СCiժo̙3ddd+G.]LQC5l777֭ UtЁRzu8@``Ak׮SNX[[L쌍[[[(9?Y0fGsţ2~xjԨAӦM ŋԭ[>C9}XX 4nݺҩS'3ԏĉ ۷/E=hֺ-V|C޻/tC/ZLxO?:ԋw_6078vZ7དiӮ#{GЙ_~U>5'GJ"1))S^=*X[[S GGGZh^ TK|B/^.hw/]櫯W`eiRdҸitÒTԹyGmRĤ$~߾QͭLjرǏi3f gΜˬY۷o$I0ҥK9rǏg!CciiIJJ m۶C 1KQA}vLRhoܹsk'''۷V:o߾MVVV> /_5o6k֬axij֬޽{͓)ÇӫWO8;{.gϞeժU 2|pe,[r̡(;_牴m֤KըSRo//F7/OOYjmѶMQ!-[Pt^EѠPҢe Cdacc-yZdgaWu1ug2)e܉k?P>w\|9KdeeyfP()^z;wuͱcLXy&MFЩS'ӧM4Aiӆ6mڰn:9_Q}YP`ooOZZW\!++ "wA RJFǧqNGff&iii$&&#S*(J^ʵkj&+X~QÆ cޥK oYzujԨAXXׯ_… L:ȉL4_5_hP?. cy|||4nA._Ĕ0i8bcP>")) __.89jђ(ODser*www靯?n M\\mڴwܼy-[p1 &OLժUͲyQѣlْ>Q ,Yӧ9|o.޾};_}|Sѡܿ|ehт-[rDIJJ"...ߒ&MV0N0ŎJ"99Y>u>'(SLLLKA.vGXRY3ؿ{+Z€~}j|}oF=JdT^^c'o*{!$yxG7jPYj__o4hISRg!Ii*tzIOظ72Y:֭Zakk.:}.9ϦLLGYYԽ{LƍiҸqyt %:/HyU [FaaarzRN'W~R+oǎ̙3`ۓ=UVjJBɧ&Mbɒ% 8+Wbmm-O,_>͛79OÆ ~`Ԯ]GGBuٺu+vvvtU@(>,Nt:v%h0o<6l3fmV^=O=3LFFCCrrɫ~QRj֬I:u5k\ٳgӳgOt)}Y Ƹ| ƎgöCV߇Cvy}.y^rJ l_sĉ_b63/yDȈEo(,Z\޹glmmٽ5&֐wk = ?̛-Oxԯ?7oprrD1{tlmlWj$&%z}NZ/0 T͚nӚEJpzAJprr,[LT7zӱcG6mİaèWAAAHD>}H51i'Noã.7wbBBBkfҥ4ldhqe~WooLo ֧]vt֍{ҥKmۆCt e˖jlll >(݉y0 9?5YF^yrJ>9T*R/JRdڵߟדIݺuYz2SVVwO?-AXpe<$b⸏\q߶nC1iXZhGBAtD1vhiq.`ǎ~ęcxOg"y>s+WAzpႼCZht"eXY[cccDzZΏBΞxZ-~~deٓJfV&>dtƭkafS^Li֮ƆwɁƯ7ҹsi99s?˨p eƍitC5W3g9yQQFl"HDVF:N(OYZaogMVFzA!LnݰAc`m۶T bRXXXlVooo9z837k׮²e ]RY䎺?ϷLr= [R,Eqة0|K.1o޼V%;;LDz58;9✳I$*[^ SNlڴ  y<-(Q1jRprrcǎ89:0DPn(A9r4UsMWx.ڬq\ѠA$:pFxSf͚|珽\aϟ_*Hz$I'zbafB#VVV><DΝQ3Ѩo9Qgagks>+hتbStիZ}R9@ %4NG駟cNNNv~xr4!gL.3C@dAW;/@ ʊ#G0`aaWCRjeZ}*z ڵk4iJr%bccIOO';;qbCF~*z ࿊^gժUL6 Rɀx<)'jI A+Wh",--ׯ?.\$>>qbԁQZX}Hq~t:}7W6+@ (BlIG='F_|_5}&;1zL'e;8>CD'+CCxwT@ UxA" ;/ܹsYz5666@FFFN*-L'e{WaΒHJa(6+ @PwObD{ɓJ^%ٳgaڵ/^ >>̜]?hZbGLGg]vDt:RvGW]-<ڬ4AA]prqc$Fh'꓋^/tL6۷LNp#^JPPիWի$&&>'F80@  IRQNGjj*GƍH^/3'F80@  XFדرc{.jLJ7n#@ t$&&2n8}APPr\*_i@P888y(Ѩ?~<ͣM6߿V#...X[[e@ ϣΊ5'Oh{ݻwciiR#1%É@P$$勲T*֬YcrCJJ Tt@ F۷/6m0WwΎI&amm-GWr_P(///* #Hƍǜ9s޽;C?СCi޼9T\WWW:*J,--++RI ! @ 0ȬY3g'N7fŊhZ:vRݝ@BBBVիW'$$ pqq)/ @ 7o'N$;;6mJ۶m塠Hlق%dggcii lmm(݄#  hX`6l`ҤIV%}Wz:vȃPThZJ%GTT*塥"@ 2 ŋ3rH4 -ӼysZl)J~:{D233ɺ B&@ b >X`TVZjqA&L/ |r:tHMMEєٛA80@ [oEpqqjժTZ@TBrr2m۶iӦrerΥGG\ ^(^OxDx( k%$}L:ڵk|(DFC ,'NJJ PV-,-ͻ}syTRNceeEݺu@Tboo?Lquu%99wD`` 8::m's׫VqǕJ%-5ߣFjEeYY27&9 4S'R׮Fә'914ΎM3dk4W$('N#>y=w3;Q֞KBIni#Iw #3va&N@`\IUXޢ* 7nbꌙ\ySpx&?^=.+pR%7Ns5IͿ1"233cAA̟BBLo'7bccB {05jB\\zϳi:|6?x؏ۻWu{J6϶* shT )'wcc8w<*Uzs%Way*$$$&pU*2 >>ߖ/Αt:8Hؕ+dffEg[Q%0LNGLJ($q).^Lrr2vT%Nmi*6.^h(AAF6˾\DFb_YWz=uxfH$8Hpss+V'OrEpvvn:4}p/X( bbPZйcܻKpuu[xzz+Ә= gѣ\tL|}i dØ}ݥSGڷ /ҼiS7k cbرs))4_6a/l۹@6 ѣ]*=twnckNu)DciaAN}.tz><Ӥq6X{JameMqێHHWL mj`u1~سΠx7因NAGDILl,AU)땫cոe6LI\\7믾bn6&2%(|0U^qLΎkÑ 66yp/r fIٵaP*l٘7o2e̜dmccmMHժL1 u7Y~Cc.k^6;o||<#|尰 oZyXkNYYY޽;pPN?7+Z0`hnܼ/͢2kTo*쓷.'Nb˶mҼYSޕxaD%qA=+WӨvEv`)WV.+] Q{݊`^PΞ;ĩ0ciҸ@ޢ|AΪԮ ӧ<}ӧL6Z9Sqvgy8;[d._@:CA-4G[80 7BT**SAHJJb"!>>OUаT"񄉄GDP(x?ܸy+Fjk)B٩{׮sv!:%j);0FlllұCy*gn,k222ٸ7فٽO 'Ldz8dff2a4l]N|BU8nQQQDܰx6² *y1#J%F>DΝ mLSq._ͳ;f4 קglڲ{άg2`Jj*ԩ]mZK[CJ6^^ih߁TfΙW7iJ:tάރ}8N !1${ܼu/йcm3h4oaTs3קݜw=ٖ-8n,BBxsp b!;prr ϳmNسcF!I /`0fn~FvGFbgk_Rh|@,RҌR[YYZZ*EԘOѧvyw-+;v$9%GFבsKŖ;_6++W&)) /u/ݍ?Zʵ,+?xG'/ 77@>W$y(!OAB80O83Nq-tRg^j֬Ag)U7G?^sBE cϟ:}wrucbt<ԯOj_Y7iԨ4E}xQQQQvVU{{WƍTMӖjkkk X߳'o:gxZW(rKJHH(T\JN6$Jn߹ö;s*Uxab  Ygq6n,;AAԯ[Tt7R'ש]XVNljSt`T DTy-E9b"vyqwI7yVOaK͑i ujՒgwŽ]?uI[ѭKgO~Êr#r]:}vҮsWy")ޗ\aƻ0<`C6w>^2?vmyĶ;w'|p˶9s):o=Ғ#>`̇#O_0fXkN "RR> rۯ]stA>oRzȉ8tCGл英^rwSї[svrU=y;["e8Ӹ\;w}9sm ?0lD_s ,:oW庹r~:ܽK}\ُ ':88ȳI[tы:jQJ :}ȉTZ&6.geJҤvrr>C6ji.^.3ƃ_ķk'9%Pv-222~] _,`Ӗ-DĐ3|r~ڒlnnn6e}BPSR~&w##Yty?9}ɉ>={/E^'7jؐC=eIJN.$;=Ug̸ 89:ҰA}1`w̒ehݺeV-[(j5jrAC2/,Rw&2 zUkM3ppp=NVdHÆ1;dgg3T'TK]DD`G0qrr"6.ΣZo^N>55Wʵk6Jxm<<<:RETr7qS6o11"Iϗi*UrCVs9^F%77,,beeŬiSs7Rv^^|-~+缉pF~>nnn\qϽV-ZT{>n&02{BۇV-r&'gff".4)vjP>y[ޜ/%5&snnn&OmڤH y*+! 8aԫ(ꅆ>|m.^LZZyYfI;֫ʒ_Off&7o,UKV.]RnK%G1vhiq.`ǎ3t3Ӷn7tܹIVV^EЙQ222###WW*{Mɥ˗ٷ{W#IQPT8kfRF|(r!j5o/M^'=3puuã\v6=&5-*U.ۍ'9%AFXvz\\~~^`/E>ZMLli4l\w=JԽ.5'ƍt.$җN`ΜbIPXXX<)ΜboogҮ}{  EM[2JTX|}|1)ɫNͥ˗9q$jpiz\η* zRx@лGVA ()8;3k @ (qcIOO HڵĞ)' Pr6|W@ *@P@  p`@ T8#!@ @ pT}`ܹÞ={>t@ kP={Уo7~[{^}[5@  >OIUp#B" 9ON8A[Ra98ӴIf͙K.]W7mӆƍ~"啦Γ U^4o vvvFA=rm]\ʲK'QQ|t).Pk4s.>Y ݍ8{j7}Fz ʎ ޽ ueH-{;^|dž֭[zu?_q{{y[wo6iSiѬc̹sԮT"[-5y)_C``iii\voݿOe?|Bh\t:I..m\; z}(_D_(3Ti>w/:/i0),:MWv*C^{}lLxsfΞ˩h.(;*sys`>6ҷ~bߧNZ|{t鉅E9Ck*TV%6.O}~bbcquu:ɟL(4}Q$1) gg YُҩC{.vdtU~ڹ|8Lurr2V_g|^LmCFCBB"+_.:<=<~>g5z}yYu:1\|SRs[ny]?8'IE^o@Q;u}7={͗˖s6!߻._bPk]PvTFӐF,ѪUVܹt2_,ZZ!++Ҥq#DG3~n߹ .̛)AU*Wlќ?ڋR$--fNUC'̛CraǎCP I]:u_6rd_2d(/O۶iͮ{ ϼ/x{yHјBu3W16i^Q*nOLBb"jz|:}*nnn&s }܈g(9Szȳ-[z~:~~f9sϹgsŢ4k 3N1ύ+h,JIڹS>7?/`|}cD0m֧\tJnfRW )>P.Eas(Cr11L/SA%Oq`Bs#V nݺGyT>7k׬Օ+aŬL:ڵn(JYNgՅt?p7776n̔3ٱe3KHhu: gnIݳo ZF{x|9Zh}nevvm '''$I"&6`#8gϲ|7w+++7qM5bh4>5y 2{ 6|[7F8 T*o;wУ[W M>̺ǽR%z=o {>g>1ΟgؾX?/J$IL1]waX|S祰:k\_$O/wʊ$"0Cܻ>lnhXV*7nBVӰA}yRk]P2**$kkk(x/_}U}{ ?ϭp$I"=Μ;G=At4/>ߖK/\\)?zIԙ3fu+<{ lllxbF_B, 6Ė߷ʐ۟/ɸF{"hIKK?*ʬƸs.oݷr[o ϽF?mXz5^;n+UŋJAܣc'S!p+(2 Ɍ9_Θ|cM\;R׾nѥUpqykb>3~Z١3d<:&h-.(OsI}]n'#%)L-~<8iNbtڵ|bB׳8998oI38LgccCZ-ϻ7)))T*y]d=sѱ};T*ǎg;K$(|}J==<$--Matt VVV=_adT_bo1>B}⒖Ajj*jZ^wH˜|6ԡ8v,q+?33dVst1ԇ[okkkprrbԧ{ =T͚E(.(OJbؑ/n}d^ܘ~Zo>}غuky@PX%@Xs̷qoS߷K4jذд!!U9p j,Hڵh1Dݻk*v}cbx #nH6!F fP@BVe%߮{Riӆ ^ORrSoq9xs+" R*UXÏ@Ϊؘ|cMv4p nh~ѹMŐ.EѽKW@T2Yg6n,LOOgŪopreIMMeȐ!ܴe@dj9Zʹihڴ)f΍7P(,:`S6_/`f4˯`ccC|BN&??{{#G!$Kݺreٟ1m֧Щ>>ލXw{{bjx IDAT8.gSykXfeIŜ6 _^2~~HDGnP9s2U͛%*ST2mDF~1o߁ZCvܸyˤ:oj̱1ǵE9f2q4v與;I|lɑC]Rd.֫Os( /]^ɼd2fRb})1񾔑*/R.>(=>C%H IF>I t&Ғ%K[n=+*J}[RTE޻׾RVVt+3ɥkڤ1~ʥBʨIyl`*˝woy0*7l <,>p1"ǒ%KHܚ]!0X:|'-[iP=g6}!ܵn.'_'55B!DիqLQQ @V;vL:~ۻwgҥKN<ţ>ڵk"8!ӬZ^}c 4ߧO>)Ocw^o+*5FECs]{{>~sV/^+??BVtVUV*z]ԸYQ??nr`ȑYCs!.ܼ\<4̘1ѣG_bRvN6?[wa.%]Feێ:lG H^w9JԀ`ffenʬ>gyQg4N>m;ҥs c4IJNVD9+/ZV2m;/͞7wcYl gʽzRS 5bW_}.V~eǎO511L>Դ4 znlӆsf]5vHv̎h4dgg`ub7޵ [nh2h\væ͛])i݋iSXsQ9UU~}jZ~ٽn ٘,Zo.m;kUi 1w#* オ_}ϿFVf&C):\fe^ݺ.=.Q}W,6?yi~JcƜٶc'mۙxwoms1^s)z=,YouypXFa 2gvQ( #|rUU9={fBBBHNN殨(fϘnyt^LFא򵵮SkOu˪uam]`k{gOm۴v輭v5m$QF3W<=TydR?R}wUUUffŒYj;{k]ƞ?EqJhϲm/wR:lyqQ#i̚;O}ͥ6Z:ew@C9GW<; ue}v7hEv'ƫOlt=Sբ"UUU5;;[C,,3gڼlu˪[V]\q]+c{׮{+zr伭,gGqm`ZnY'ہQQs/,\,gcxlh|}}xhHq<|!LBb"wEcdddB.ymc_V:}'ƍ#{t ;v.U8l+0*G|09jÇ !0 &/qKeݟ-ϩSe [n#,4^Cֺp-V\q]˻"\OU=Bj۶-ـENwޖ$$:WNP/\jmZCpp0 Ϣ`gOgoutpԍDדiyv\rMFcP-9/Cծa^ݺ̝5_} iӪ~RZRZz;D_i7RrY//ڼt^8^f%KeTu!Wt L&SY򬌤$(&m]Q%GUU‹DǜKNc(2Xfn.5d|6go™{\yʻ^:}j/&M8OfN&lYM9deeQXXN̍Sծ] Ļ˖;knݘ3!{cՑK'|-'&^B|<))UUINI&<<܉Jόܷw/ENN+V}ʳMc-WUvm233ɱ4wG#+.2+Uq\t:uKm~;9[ΖEJbYӓa姟ہ߹=N_:? iִ)+Wunko5ݦ1TnHtˣ%r2ӨaCZjko.%//x>[S٘r,Gˬ8.͚5e}yȒ5ΖoEΙ2Lp1{%?Τ',^/);;r?fmp9Ed2a!a.oW\ٟ^ۻv{+W']0xyyQhQTE{$''[oq׳|rd i4fϘWk M7h^Q͟GBB"=3՛?vo~o,5y/履}w<7er:uQra,~c _Qr  ga[*뵅 HHH~xj[#y!~ܺݺ3ϗ/G9Zf%UqxlN{!øP"a[sV\EǮէ/̘E@?߬͛ZnuWয়q8͊o g 9!bugTۺ앯_u^ۻv{+W']BpڪU+H6j-[XݎdRW^o^uW/O)Sr:uУG*g&]6i䧟yBb{եӺukY.Rĉء#=@dž x衇ur!BL5.ڵ+{zlxZ/zgao7|K-x>B!52ҥ aaL*OHHo'njLѣG0rDEEE|gLy~KS qLf}S\i+:n3yrNdbͺo-X s(DLFlfs`ǎ|<9a"F<<}:-$bbb8z(… .'gn#!ҷ]5z˶;]*;'M*{\qǟVj6~ũr[~^YĎ]-55-֩S'{Xx1<07Fףh0Lb5۷~VVΝ#66x.^Hbb"EEEٹ[YfMu'O涎ҹUedd Vh$%"Kdoii>IMM#4,]m Og-򑟟OZz:Aic+%OJNVDW}w))0ZSTTDRr2VGgEPVAAch:ϲ앙 RRR]ejy^OZz:""li2HHL$<3sF.DMTcnݺt2kUqi߾=zo?:r֮]˖͛j׮Mk7BCCLJUiӦU%͘2vDQغm;Ϡ%:&igF^ύm0ά+]e6&ϥ}YnBb"^ɹXu:Yp>57̈́]QQ̞1FÂEr6&Kt[GΚIT~dOz;zu`ԮU4ŴS<߳Gw~ܶB,^Dp &O}8l#FҵKgv܅F!;;sЭKr74${ex &;'YΚuZ=foyiÇy%Vyj=MVv67o!== xX g1e?0LhZۇ)>>,2={tg܄h}u(Bt56iٲ%\`Jݹ1 (ظiGoV+˪Uiw-=͛鉪9(EKeݜ>sůZ] Y3 )]=?%:z+My^S%,kQh4U^4kmZ'F0}V\}줧 ''G63p@^|>ro^mFJNRTb*kylt4}?n@@@,[y.N_~eڛKOQUY/ //313ٻ/V}BHHk]9szߢ"F>Hӑ<*cUU ։d϶jz̜)_k;r~ZwO喙ՊFL&߬]ǀ{fO?sBK箨صwMP` g"Զ-ݺtfMl=I))l)`>N&>^A֭h٢9Ǟ@KՋn]:|J-RY*߯sݏ,\*={tk$$$r!.ƍqM7r]˖;w!j[gh2z)Z-#)hw붭ݷ/ ۹{8q"ZOOfϜ1ciٲ%& ^`UU)((`_u;\wuռW=SĸqyqaǮݥCZFt:8.>3d IHL䮨=v Zh?NÖ-lٺPJSɼ^LHoooz=;%#3EQl HG b+*ѨJ̹XFؘШvgAWjއIUV\%B\Y<ϡG:c.lYVyeyYU3^]-W.@әO9~77`c2sJRrrݷ\Bff.bȠI;`d̜nn}))F?e[엦sG۝λ5U),,ēz jբAd$FV˾}㏬]-[_rJ.?OTT?0AAFEFTh$))W֭[__]kSRr2>vFFȴ]˹.^L@Qf\mZCPP}Ecѥs'1,3VFɼG;k& ҦU+&?II:+6t:ed|YyQ/5 ""HNNgEP,11ۍԫ[Ke|U]~d`ޗu-ˋB HJ6P =))Z%.)~4X-?׉.jf S%Ԃr19w!j= >4:xyy/ +?~ǓKᱱDQQepbIVV{ǭ?r}1k,DSDxyydgg[njXҹkd2tvO;wXf9MbGӕ퐞n%+y{ѷw/rrrXS}nnj qqvlKDx8YYYZFyXRҕZUU%9%K\_{ӑ<*ڵkOFF{k򵶾gi啙#CU)[sQZ]caqWz,>1<"矖jb06f]]>B2 Vk` o@Uc IDATCsuױxb&{fy???ezM(Vd2cf̜A\|~!j ^7jHM`'l>^;Tρ pc6ty)ġGs9L&3?11lݶ68Sf͚w>\_U*.h\ߟ[݂P#yqF4nԈ}i{X~Mc2z-ax:[eܰAڶi͒eo{a:;;9S֯Y^;6u68պuji|r/_m:|׭Keɲ9zϒeoUUɻ5I #((y6pZw?pW^yHZZ亖탡`+\ ?Xn,~UzTmuh4,2Ͻ0۶SXXH͘3JmWQ͟y٧ ?%֭Z[hP>Ç fQԍ3\6<s-`gӵsgg3ϲ租PM* GU8q<39%)%?oFHmh4̞1gM?l>{qYu؁Dӑ9-5@Ұu<Ƀ2VWeӗ03\u9Ǽ"秭2u>yy 8С܅8x0FЯOou?zw9 ݻٹ{7`~3j*ɯ.DM<ܳiS]o- : !ߣ]Xv-w[¢\k+9WSa!BkƶB!(֭][yB! 4 `:th!BB!,n0:!BspOvq(Q+!Bq(F!5!H/$!Bj` F!5!8F`BQsH/$!B$B#B!܎>>6?(,Yo^dÆ ?q_-ˑAjj*@ll,S2x`nwdYMxlch4&=3cǏY_a=7`L4T>;oԩSxxx3zr+8S{w&..wy^gy~:tgRj{ZLBQ]wFɆ6w62|p6l!CX e鸮uSPPCc+U.p\o[o]je󟜚''r|,̚1^z[ly/@퉍%:& 77g&Mv JxzzRv.]Jh4S'2}^%;S۶me֜YmŋddfZ~N94|||mxΜ=CFFRBQR< o +>Y߮#(0 t:TSR^g>sym۶Y~K2=kw 99R'l_8 -kn1 k"VXIz0L{1?Κk1~>cY3sA{W_쭥|gyyy=zG,{IOOb6]!p*oSdl߱0 ӬYr|9{ݠAC{}{]|輽.@Nٿ7y'yglYILkPy%ΖMKI)B@`Umf;SWz5Sd4Z%~EΝ9|01DGGcv >C/ՈX!jVUf60dPѓѣZo]{+ݓ  ֯_o=_ZAC3z,qqq.)&P+hLqu6-N:w}Ǟ={8v/^`0h]6]wݺuvmU!:Tys@ch2i()=hgB!Uɯ"xm{{3o#< 4<0M/MX4GS)η*V O3KϬt+*.. &i&3f ˖-㣏>]vUC!Ahţe?(2xt>=xr{2l0-a>@,N7-/(;:t;wf$k3.BTJ0YC#B\K:Sy9mµdķ'jPU-"5zC.! @Q]i_bXd -,xzE FeXXЩ55/R¡Fۖs9q@XkyҤIlܸUU 3Oɯ @Z֭ud2ɘ1cسg#ƪ=(V{䁤ϙE)KLOG/|΍' _](^Z?(T 1!t\>h43W#(~dM0ۮ%3rxhsˌ#Gm/6]Vס𠎟/֛# gر>2e )))lْ'NX TʢB44p BTS/d38ZO/O?Lj9yh ("@K(E}Z]U)7ߨϿ8кߢ)Z-0:D,_7߰j*~eB\(,\Տ%$fvv$"4(jvx퉮FDʥ2Sߐ 4:KFrH@A~#)ce$%AdF)eW2 QFa\)r2HOm[E4 ^Y;VINf-܄j2BaA59E? ȩC`pE^V>#$*-ڻ訪<ߪJUBBX"&ȦK7j}f<=xL88h==j=..!;!,^ޛ?BJ*@RE ~;޻Ww}h";7O*|ƒGKs#HagP'֖fr|h^"4 ߏN#+'"?/2W :L$\N{:V]N  ^"@4 ]iNEV_/ 3U#l[α'hiIM0bx//dlan#O)TUY}H;XDUE<R"ї0`zR-L4N!ٚ;(+?_GslG"-!%r/.a `Ue[ؾ0wFxil>K-L̝}7td]3>YB!D k;ӏ?1>\{QN[s5WrL()dY'!.~* y ޭG~ %~Ȫ{ tҬXh1CwժmWbHd".a `|>?r=-^H!UUYaGX8w/#R<2lbM0qkÑ=!BD0[bԈlSe`T{nEFZ2 lܾpF}UG'euȢ444R][Kum-Grƍ465 >[CC#Z[/oڲCڳ]9Zs]I]w!E`/eMx~~w={"yC̙秮DKKusI ]{+>y\3$Ϝ\R-A[:?bcMr Ub@܁ )) UUiii!99E ;VgxݶsYYLtՀK=IKK m;wP9f28~ڵ̞5%}u62iD233ؼuϿ -ݿ1cF_<\(vNX7<_90Kh8gGXr} ʹ S< GEp3N=^/Ռ5*FQ.f?nf3z}*Nr'...^Q4+߹̎]UXn7111 .m"027b}|wɱ*:^ȃ)R%3)‰RS{<.nwKNZ}qκfYiC-؝nLƸ^﹯9΋>$7;-=VV{8(륪457sy0g`0} oڲCFN'# F0gC߯[Ocs3&7HJJV$je53I᣿~ʈ<**P瑓yqW`tQX8k6q6yVrm9̝=;E#懲\qYIiNr\.sfc2fRV6lLCob\׋٧d╝w657@AsS.:HL?epi/=tR./)aTa!w>T-[{WKٔG+x[4@e.TUh]+=zL}^)766UUihl"9)‘?v..fڔ)z^/KCk`Xxz\n7n_hvo[Έn'=5%'9\QAEe&fihhdԩQH|||--'˯dܐ^h`N:!6EQ %!}V~GRR'4a111LrU55hdbذdq8x=JƎرc?N- sh:=V/n]0s>Vt)[K˯&55Cvcq:̞9ZTM?c3`$px=sˢ8r*fϚimk [@Y .*MP%lvן:} eaiɵQY{1!ߞjqna~VV# dGՎ^7&-yl޺믻VJnnqqvXL&SPVj S3M1ЫjeYmHFz:ӦL&+33z+:y|ƍ Zp:1 AbAQn7&\zU%'FNUk֎"+μ6`aNM؄b9 owjeIX|>AKZ]|>|D(t۳K>qOgK )Pu{Wup5%2*}zfy|qPcev`uS‘EeƍØbpEw$3#QE [&%&rh-mm xru݌i8\Qβ+a|bcpE#)*ew^[/Yd_7PYYdefkOizٌLخcaͨJ-Ԕ\.7Udfdz::WVR^Q륨p$ A"%??̌ V]ݱz6leg@4AB33q\-+#UU/a{n&&EXBFSEl}ߍ1͡c;W,(>ys~ʨ"WTh]iLyEejDv΁UU&%%Ζm'[׋t>t^,?Xo[_?bv{8lf)hNo0L<uII Ʈ{1KK)1"dٲp8TWאc;.ҽz>ں};})Lz^O%&#>/߇OVihqsV宧vٲdNnnf^zp98vnۆ*!})u! ",-0fS,pXXPc2o\V?>f 7.$>%%F3ڹ~_}VԡV >>dW^17t^rüy}@ON㺹sXj5UU(~?)))̙5-`0 ȵ٨"7'tSTXU& ɓO?-waaIH%q]ƉƎ >K#BiH~n^[UE3>X'fތ+x5۸kCO]\>1\.^Dk6nۍbN׾]VaQOϯӟ~_bcbBv]Hvmc{VwE$qqFz}ز^Mӈ7iol5ͱi:scsǃٺ1'Xw^Mttimk%;;CUt]nO4h3dyGЁ^p.Bo'nO;KާQ?xF/V:¥}>>IVKgvm )LNG\.CT|w`t:ih^\NkO6a `~uBVn(wyL() ?'~._Eͱ{LN4B\z"xM 9Hw`m!D]3WaCټc[p{}l 7Əa0D$KB1D#bтW0wzEUeB!kE!"߅$BkₒF! B!:#Lף* 8/ʥDU@r~!u*r}Z#dN~qhd6KGcdtsq;{HTdU?" ꏢ(Q? HIϐrDtB1%X1'X)?֓(r ](?Ŋ)!R\G_d iMfZ+A`2ɴb2)'ԱILԌ, qq47RAْHNHS]TxK0bX;[DdIGXbccI&=3BgG&u*:HB!0B!:!"H#B#B!0B!:!"H#B#B!0B!:!"H#B#B!N,@GՂ~#BRG8jN͐B!DoN'+W@i4GB!/5WjIENDB`starpu-1.4.9+dfsg/doc/doxygen/chapters/images/eclipse_installer.png000066400000000000000000002471341507764646700255320ustar00rootroot00000000000000PNG  IHDRvCKIDATxwul9$AA̤HJ ےϖ-t>ۿ;ߝlɒdDI)RasɱGvHT?;3կ^z8p|8888,B(>2&/!Zd5 :ZR^ڹ\.~) h>ƗR  'E(*ʪ_Cm#ȁ#_!CX6U!1%3e1/@h͋AYnDIJE?+7!s+P4"t}2u@ Ў(kBY^;nP6?o=Hf >Յ !scP}ɛLEuӲP4]bW֌[EZ7[@'c2r9wk@}'Ǿh#/zgQ:oFYN"4?JȑO$MFr5zz_@&/D"ENÝY~ɛ QΒ. b( qD"p(=o@րP=qD͈(zP@QEB{j%R%!f:Dm[&Tr9w2PʚDoPb#Mlj |L|z1X?4lȢES9w"g-ak?MC)d@!\!d~t/xa}Pی7v ڮ˰6 ː3r隣hwPUU1ME-3J!C `hyc=eRN=޾v,{C2}9OQ߈6#)T:oDF0|7@N?y#Aiنu Du^%orM^4tG;88?fP4q;wSӔ@t4/Sj4ֳ4b;Ng:4X#X;9CQ͇ukRk`@s#Wri,0/֖??N,* uv9f)%Bt]Gn ql9ƱCq3DVgb򄧊8~P咙d5#_Ma{'HPpڿ 8"S~S.J-^/](' _!@mֳ4rvd`) і(&6^t/22)B) S(S;`v?ȟ}G.`3~硹1bg:^j'SCf{]NZ 18];QU; 5pc#H}i.7c h/LYN!|e'|q^ z v6.2A,¼ ]ִG!NY_ѐ7o1˵h>Zg \Qiۼe AzaF~x~¾&X`__q Bj͆ ֮1ߖ]']4gOj`~&rUYA)(Ha`D `M=wE8o.1?s]lݶήqBʥ {weϮ#$g(Bk[`ŪE-BC?ȆMogc?mk1  ~[VU= g39>iZ^:[|J:iiJa!nmj@?;; MĎOCyb0͏lނNh;Gau?2"ý#?DT>p мNp4&qD%ԃдFÓ`UO=x5EciĎOBuȖG#Y {_x΂(#%Yj Q)abbyd~9-"`7 {y/H$RD"$Nj|\}$g|?E\%5L&OwO BY+>_òopoapuf)ʯx<.}>dzzh4ۭ劼=?|R{_½%3#S9z?|W\p̊mc 7~,1R(' }B]H_3dG;>ۑ/ F~xy+lB >*NȍՅאՅ v }-8gً/ ?{1J!An3dU`C߁V{r]^: p$ *˄@(%{ȍ 1,=PֹD6v!?t'oc5[㹃|O7l/~G|k?yׯNG:591a4]c$ӴT5aZe~7GBǎO|^{[u|?"/_WYMS)J#yh;OߢY)%>ٌ9B,Hh^'|_6qE 7a ;AZX-L7y;*O~Ljg?D|ϥڱK{kmf?׈af,v FC,F߷˴تIC< lh<符!x~vte2RŧH:^㏃eaEWBӖ%`BI4->6mҽ}mrEw~C? z{e;ve\y:~mĂ[Rݴ7 -Iwo+_qzKzmw4]Ų$BUH)Io=*|ŕW׿L:|<~i`)5 `E |l[x`I'lؚ"N ;WZ_V W{1 6+qмu.{P\:B獈#GFVn,:85RJ.rsi |?JsH '%`j2Y+(Jف"k{ؿaxÝ׊a坥k&BzMQZ9B6]O84={W!A5EAQF>8̆M[־`ehE&5녁KҲp mNv3\y7J;glV1y;gXwR 170̟)5Q ;A*l~v-3@fğ!w Wٙf#sz~H! KjB ֝"JI2*.tUbYbY"NF$HvZJz @JR( .[Tu4%E{'xGyipM[?7UShi[{\Q>o `rbHJHLN d& }h[װiJʥ2c3<׾xm\:M|Co,ٝ[YU ԍ u6!GĎO 1?EL9 v)%-19CöoePnkr<^7 M /%-^x o{>[#T2t|&ZC{Y8;?ā}^zZٳ(ǎKt ̼ |< ~3{lJqL* \ci a{V(`<fl#Jff/_{"DYO|,F(;wd2Iss3ccctww#P((J]H~4Mcjjd2ɚ5k0M:;;IlٲR6l`޽̐QU+VsNRڊ"^gpH)ٸq#6l8sl 9'nYƦ(|οӷRlE'nJJ*eϮ#nC?ۮYϣg?=hh0024źg%`|?MoX/ȤW0ϱvC?M-Q<7io1FG7Y_5O[|B߲\nJJr6G[Kz؉-c:j{ʸöv@%HDVq|r;iGF1xͧ*c{, &EyvTd[JB!q-u@ GUUB\Ab1uKRQ*,od 0Yi|>n`0raF}vzzJ%^o}'bxNp9+?eٲe4.oI>ÒVBbC{~:m3HqwO/ X,7X&-5w菉s(ɃwI @)[:{/| &,{z!~ֲ @ȮN)O^? >[ka";GDf_sϵ_R033a'w^/_^`Çikkcjj UUiiiannٺ]}-4M$Pl6K{{;bq1/'ec/_~A:7/gntM/Qȗp]Y]wU׬qlj#I>[D"_t a˷5FT!pմ7xكo9 MQpu_OCcMSsKT&ɣ 6-x娚be+X`unC?wFضۅWV A&Za! Z.Oqܼ?%15˥}˥eQ\A:Y' m䅋煢=%laO ߩ/ޜiYU""\4\C^2.EΞh4J,p7_^Blq&,_oֶp_J QsN)3p6 G;888=ʅ#^e8U#^e(lKj`^k;8Հ<f>(.5]GU_n'aՊ"^,ߎWë G;888p/!eL&)DQuNA>v,Jv^d2~oNZ7YҜTa/ek.vͱc8~8B;vP.1Mw>op10 DZc.[c,ziooriRJj}jJZ% ֿH) L&(۶mvS*0ML&a=zUUrQ(=/". ۍC!T*kJ)%'ى/SWpppx044?ϝw-_'?I~a-[vѮuނ=pA6mD>g~t]ےlBASSk׮EeY L&YP\.ǁuǃ>|EQBl޼G2>>k\?OUUn7.C{|A~?ٳVXj$I2 rk&---ZnW/glld2Iww7<3D":::hnnfݺuPwpp(&SSSJD"4-ywIBEkpZv(r|$IR###(B4Tp/w 7iR)\.at]GUUZZZpKs^b:Yr\^aH)fMsS+U|t+Ex\)MtcS #~ǥ"H~U-wGW"\.K*2MrG;888p2*2*zDmV",K2qtppp8.`rTU|*ǥ bKoӒT -&%e88888=L{] NCDQq銭g^WLlJ$&sppp8.`TASE[OE l8ȓp qB47HH-A@ptRJ,BQ R^i5su[zSKF@ J!RŋECi(B gEJwxȅڂ|AY T^Pyṳ2L%j\*Mm e]A(f .NKglSgr$ilC( SrK1EJ2sZ{K\n.}QBO=֬`eOlI%S9;ZϻCJIbrl* n7^vQA>gcе}~vpD\4{{UHi/6\x=*F e.OCGyB鍷tB x!^Ú-ynPvr+j8ذ@̻˯yއ R)W|\{^gwRkq[_*wǞaU.{r>pp~>;>\n Z.8ϋ?1Mg~}FT+Ut7_kuu=V/y)ms.xgBÔs秹 y<(rQ.x{sB0>4 9H0b5LNrqrn}?%/r*ML%WQ5b3;x{B4=IS'iw0@K{ V[3h/7@Cs큤ix|^z$xsG8]z8r0}~t˸kx⡟ӷj7R?=\skx[edE9yaCi4]BԓDiIZk08>B!NP 0wfM0qGh]}"lpt-$r;ބF5 Gؚi7vEe!|4w ,}>ں۹L&g/aT*TZ6w28py߀88/%5MB ͭ\{n7?ሇPS7z|n|FY19nYX"ժ%%Bhґ44LS,zt]&50Am ?躎Xe==O^ދelj i, kJOw.Z7f&dU?hhn βzZ±"*4-?tD㇎jzVϛ'߿6|cae{^[Χ^@J{AШ N?E(H)$L Me춓֎VB0wֻ^?/ /pdcěO$c<ßKs:Ya5.VGwV*l\cߡN,b˶ц2fee|XUܸ{?aۜ4pm7_ 5;c>L1j|?+ܹ֭hc\:˺7t/-?;z|6G D5%<ΕsRB$kK)*AZۂ$r`T-4]0LaP-)  !-*r B &)!4fճ/moe?=cph~Z:PU@8Hߪeic^!oǃfbdD\+].,7ʞ$9$ l 4]GQ0:8 x++֯b&mgSQ[׋|9ua&6. Υ)%vǟOqy^xmݎBYPh~TUAs>0オTl'9bn;CT 4ɧ= ,@UJJ)_R40&UfyZ̐Ne9~dd40+e!-)[V(U4ݯ$]y82 ׾(rV/ eQ̌Vfl߾biy;߇75;-V':BrN]w]94M S&B*Ҳ5BB*a299LarpRPT sj25 iP֒TE3`BB%C_{" K"k׮czzUUٹs'PTס 3n mmᗇs=.FrYaz4Kl+0-H&昘=4W6v"Ul%l:DjjmG;@{4JT(VUwr]!N_~ :P(F Ri|vٙ~Q9gH %%YTUƏ`i.4_G2Y}~_JFYīh;pu\ f%f feuE%!@ׄ!Ë"MgxxP(D&Y>b]Pc_Y/ii4w>0[e\Q-Qbxn;ͫz7PsU #Díx qn?vøאXFEV'0 R;r-B4<7>ð^ќRgETHs8W$li@S4#E*|jT),1xn̊ Aj0,9 v4RۣN M!&d|xACQc4xce$QO'PVP ^fgM‹Q]S̟Ri::87S*@ZKs/J{"O!K Mt3|$H ['Y%jBW\TQO>-Dw,jTe7 ((k;&#Or+pQ|~9P?j3sxpnJ@d. c$5ef6ɓS3fѪ*v) 4 YŰ!& GH& /4u.ò%M1ҒJ&~YUЫWӽ88,4v aJtM8_I&M(&E#fAZ']d"?XJ0(UKvq<}jH %묄X1c}r7BI05~i @wy EMտU=3cY:_|ǖ'>sbGSqf;Өj.ʥT.Pu|nq6w4MӗAgl]} 8)-\.ٻ r}-uw5;uYӣÍ<<{b˲{in.q6=eOR1A7i%!N Ժ0tEw6&=7IgQ uFkhY!k}gT{.ΒQ|0oh2O߶'D]hCrE #ejH]L!dvzZұdbhMw!- .-(Ѿ/!ȧHLٻ(X .q˲NRC]5$sY7PZE2ˡDQ\_;B5^me.?_ v Yz4,T1̳ (C$B63i %,,L}E6=K>i߲'~+ҤTvK%\, JPlMJ\ ] 4G>K*9QX 0*T\&iVe1X.F.JTrb!kre2X0ff<BMϒ$ԉR.JNK`Y&JB.]y)*}jJ)OznR1r1WL+Q=[ეY4iP0zVJyL !=u柳QF%J`FL6C,ro6ݲL7H9svZsQ-RSW%"l~ j'TJvysXQ(RHiQ*WT)k ed32IR-T92DZ[&HM-J|v&FD>&=7aT0 Jֶb4ɦgɤgN7la$57I>"93J(bvhd3S2I6%giMNpxabMܶR3}OaϐzM'(3Te9$SsT  Ysdb#P*T"mJ+1`llx;m(lct@5B<FBhCB͎ ih"53NznTE"V:z06E6>6t\v7urڏ$n 2|t7__ LSkGwQ)0*m]0"iOѳ4?NJ4273F$B& P֏XekylPb>\.7eҿj+ervTUZ)տ!L#Ms%^_lYJRB.]hjHͨ{ҍg5*cJy*\!sS }+.grlƖ^8;h\A(ȉ=_BKy<^?@B>M-$(sNS,d7uڱAwl 4lf!2iFC\N0'oNHN"`8N$VئƎBuTŲ[?Jb8`B>Cx?<Өгl33͌h^u,(pcCȤfCYH|MsmhQ[!줻Gq{u$MQq3(Qտ#;Afn(J+Z;wڱ^w[A\dz~^kl'VR R44wT 7ٷ~QaZ{!._ KvA.$19D%mNa-A:Wұ 4k&(naH#o!bCp_ W21T1r|/BQTob`S@t,ˤoL aT+#= 㼌mŲIbrVJ)孴{V2YOC:?1,2RfYĚkO[L%+`*k{gJ˪dӳlm]!ȡR)>/ALR.P5o0*"Zz)3LO##o;6ڹU(H)#Dus3c,AHTU'nxcijfګQdb͊uWmlgjH]RR,f]Z:V0n m^_k&kfj(iP*f[GCSWVmh#iRȥTB1ފ#MM؂XN1^afxS?;=/khGwyiTILҽl3˷ry!@(FbrEQɦg(N: 01r] ,`r}Y*Ji_ek$k&19H*9A:5M(@e61Bѻb RRgk[EXJt\f! +]MSkߢi~g-sFwox!T+%"eb:{ٻlzrmұP_9Y,폺&N #=7gSifax㋴e٦V(FRBZ&s yr("؎gnfMӤZ. FV+ Q*B 7C.N1TUc庫i\qb))X!bLQȥ4з 2JP֞Yp^01S& tn’p,woGEdH43jFNʳ<˺o 6@&g0<.Z4M6Lè"x}!B&M](AR,DL07;NCS E9|inJ2_ Eu!VJ;pB(]BLoMwrVJCx<~2sj:\oq\Ŷ EPxhO!FJP__aj(IBU4|0nL$0YvI)-4*nO36t)-G|è42ttB(.=kS4L7~j(=>4ͅ'JP#,ZzZ?F0P[Ymη2M2n]w/J>'zm>..~no IU5|m _ GDTk5mGw6=¥E(H Lۏ4tӿj4 |6ճX(* iX]& ^MBZ)5JT=:zrt7_Ul@i!b/+y- <^([L0oK)m5h!":Fޒ\u+~O۲UsOP}n?>‘&Cx둈K8҄U-D\DQ4"rYʥ7Xag_.цvG"DXak03>|6 X0IL 7 7<MR.26߶v X@8DrfcdRvUU SG)2?cbȦfA}P?yϳ.0 ^+(x!ͥԕdӳ^U$&Ǐryq2IPm S5Zi's|yf'h\S#$g0ƆP.kTj[vugEQhX~&FSȧLkk][C(n$J09vtraW=Ў'N$9M6K19:@.4(-G>LΖN!bBU!vTba)1<+[@)x+ Z#D-H6"ز`dk몪 q<!p}&<>(pL*A>SEWXA[J\h\I0JNQ.fxA<_EQxQTUO„"hlzPXC^ЮsĀHiOR$h /QZڗˋ"&&m"܍"8ۋ5bIBFQT ۋ-ۍ$^Sk/p]ֵwOèkvPQ7LOM8D6@=4]ۮ_d 8N5[ `^644bT+Bq{|2{pB1Lry#" s)J,+ H''miin_fk.$J@(v{I''I&Fil-qrDu7>i!{@6q.ȦQT>pc$PP f?;EQq}HiNNF)L ogZ Ě:j,1 Lj[5΢^=pihr{ɤx}!kUY׏QP*t7]} P( !Z{QT Uq>sSs"GCrW@tRIP<npQ%=7B qTMGt@6Ab l